diff options
-rw-r--r-- | schema.sql | 47 |
1 files changed, 32 insertions, 15 deletions
@@ -1,34 +1,47 @@ -CREATE TABLE package ( -- binary Debian packages. +CREATE TABLE package ( -- binary Debian packages id INTEGER PRIMARY KEY, - name TEXT UNIQUE, -- binary package name + name TEXT UNIQUE NOT NULL, + -- binary package name version TEXT, -- Debian version architecture TEXT, source TEXT); -- name of the source package it was built from + CREATE TABLE content ( -- a file contained in a binary package id INTEGER PRIMARY KEY, pid INTEGER NOT NULL REFERENCES package(id) ON DELETE CASCADE, - -- which package the file is contained in - filename TEXT, - size INTEGER); + -- which package the file is contained in + filename TEXT NOT NULL, + size INTEGER NOT NULL); + CREATE TABLE function ( -- hash functions id INTEGER PRIMARY KEY, name TEXT UNIQUE NOT NULL, - eqclass INTEGER); -- hash values of different hash functions are comparable if they share an eqclass + eqclass INTEGER); + -- hash values of different hash functions are comparable if they share + -- an eqclass + INSERT INTO function (id, name, eqclass) VALUES (1, 'sha512', 1), (2, 'gzip_sha512', 1), + -- decompress a gzip file, then hash (3, 'png_sha512', 2), + -- decompress a PNG file, hash RGBA image contents + dimension (4, 'gif_sha512', 2); + -- decompress a GIF file, hash RGBA image contents + dimension + CREATE TABLE hash ( -- hash values of files in binary packages cid INTEGER NOT NULL REFERENCES content(id) ON DELETE CASCADE, - -- which file has been hashed + -- which file has been hashed fid INTEGER NOT NULL REFERENCES function(id), - -- using which function - hash TEXT); -- textual hash value + -- using which function + hash TEXT NOT NULL); -- textual hash value + CREATE TABLE dependency ( -- binary package dependencies pid INTEGER NOT NULL REFERENCES package(id) ON DELETE CASCADE, - -- the package carries a Depends: header - required TEXT); -- the name of a package that is dependet upon without alternative + -- the package that carries a Depends: header + required TEXT NOT NULL); + -- the name of a package that is depended upon without alternative + CREATE INDEX content_package_size_index ON content (pid, size); CREATE INDEX hash_cid_index ON hash (cid); CREATE INDEX hash_hash_index ON hash (hash); @@ -48,15 +61,19 @@ CREATE TABLE sharing ( pid2 INTEGER NOT NULL REFERENCES package(id) ON DELETE CASCADE, fid1 INTEGER NOT NULL REFERENCES function(id), fid2 INTEGER NOT NULL REFERENCES function(id), - files INTEGER, - size INTEGER); + files INTEGER NOT NULL, + size INTEGER NOT NULL); CREATE INDEX sharing_insert_index ON sharing (pid1, pid2, fid1, fid2); + -- The duplicate table caches all files that have any non-unique hash value. +-- It is used in webapp.py to speed up one query, but could be dropped +-- otherwise. CREATE TABLE duplicate ( cid INTEGER PRIMARY KEY REFERENCES content(id) ON DELETE CASCADE); + -- The issue table contains auxillary notices per file. For example, when a -- filename ends in ".gz", but is not gzip decompressible (has no gzip_sha512 -- hash), it is recorded here. CREATE TABLE issue ( - cid INTEGER REFERENCES content(id) ON DELETE CASCADE, - issue TEXT); + cid INTEGER NOT NULL REFERENCES content(id) ON DELETE CASCADE, + issue TEXT NOT NULL); -- a human readable comment on the file |