summaryrefslogtreecommitdiff
path: root/schema.sql
diff options
context:
space:
mode:
Diffstat (limited to 'schema.sql')
-rw-r--r--schema.sql47
1 files changed, 32 insertions, 15 deletions
diff --git a/schema.sql b/schema.sql
index 35f82ca..99ae7e5 100644
--- a/schema.sql
+++ b/schema.sql
@@ -1,34 +1,47 @@
-CREATE TABLE package ( -- binary Debian packages.
+CREATE TABLE package ( -- binary Debian packages
id INTEGER PRIMARY KEY,
- name TEXT UNIQUE, -- binary package name
+ name TEXT UNIQUE NOT NULL,
+ -- binary package name
version TEXT, -- Debian version
architecture TEXT,
source TEXT); -- name of the source package it was built from
+
CREATE TABLE content ( -- a file contained in a binary package
id INTEGER PRIMARY KEY,
pid INTEGER NOT NULL REFERENCES package(id) ON DELETE CASCADE,
- -- which package the file is contained in
- filename TEXT,
- size INTEGER);
+ -- which package the file is contained in
+ filename TEXT NOT NULL,
+ size INTEGER NOT NULL);
+
CREATE TABLE function ( -- hash functions
id INTEGER PRIMARY KEY,
name TEXT UNIQUE NOT NULL,
- eqclass INTEGER); -- hash values of different hash functions are comparable if they share an eqclass
+ eqclass INTEGER);
+ -- hash values of different hash functions are comparable if they share
+ -- an eqclass
+
INSERT INTO function (id, name, eqclass) VALUES
(1, 'sha512', 1),
(2, 'gzip_sha512', 1),
+ -- decompress a gzip file, then hash
(3, 'png_sha512', 2),
+ -- decompress a PNG file, hash RGBA image contents + dimension
(4, 'gif_sha512', 2);
+ -- decompress a GIF file, hash RGBA image contents + dimension
+
CREATE TABLE hash ( -- hash values of files in binary packages
cid INTEGER NOT NULL REFERENCES content(id) ON DELETE CASCADE,
- -- which file has been hashed
+ -- which file has been hashed
fid INTEGER NOT NULL REFERENCES function(id),
- -- using which function
- hash TEXT); -- textual hash value
+ -- using which function
+ hash TEXT NOT NULL); -- textual hash value
+
CREATE TABLE dependency ( -- binary package dependencies
pid INTEGER NOT NULL REFERENCES package(id) ON DELETE CASCADE,
- -- the package carries a Depends: header
- required TEXT); -- the name of a package that is dependet upon without alternative
+ -- the package that carries a Depends: header
+ required TEXT NOT NULL);
+ -- the name of a package that is depended upon without alternative
+
CREATE INDEX content_package_size_index ON content (pid, size);
CREATE INDEX hash_cid_index ON hash (cid);
CREATE INDEX hash_hash_index ON hash (hash);
@@ -48,15 +61,19 @@ CREATE TABLE sharing (
pid2 INTEGER NOT NULL REFERENCES package(id) ON DELETE CASCADE,
fid1 INTEGER NOT NULL REFERENCES function(id),
fid2 INTEGER NOT NULL REFERENCES function(id),
- files INTEGER,
- size INTEGER);
+ files INTEGER NOT NULL,
+ size INTEGER NOT NULL);
CREATE INDEX sharing_insert_index ON sharing (pid1, pid2, fid1, fid2);
+
-- The duplicate table caches all files that have any non-unique hash value.
+-- It is used in webapp.py to speed up one query, but could be dropped
+-- otherwise.
CREATE TABLE duplicate (
cid INTEGER PRIMARY KEY REFERENCES content(id) ON DELETE CASCADE);
+
-- The issue table contains auxillary notices per file. For example, when a
-- filename ends in ".gz", but is not gzip decompressible (has no gzip_sha512
-- hash), it is recorded here.
CREATE TABLE issue (
- cid INTEGER REFERENCES content(id) ON DELETE CASCADE,
- issue TEXT);
+ cid INTEGER NOT NULL REFERENCES content(id) ON DELETE CASCADE,
+ issue TEXT NOT NULL); -- a human readable comment on the file