summaryrefslogtreecommitdiff
path: root/schema.sql
diff options
context:
space:
mode:
authorHelmut Grohne <helmut@subdivi.de>2013-07-10 16:16:45 +0200
committerHelmut Grohne <helmut@subdivi.de>2013-07-10 16:16:45 +0200
commit14020d53fd4853aa72f159885edad1dcbfce2ee7 (patch)
tree3d1182588e2a9f73941f88dd80bd4b10b04aca53 /schema.sql
parentada4f94466bf3eddc192cf22c8ecefc9cd5f0ea3 (diff)
downloaddebian-dedup-14020d53fd4853aa72f159885edad1dcbfce2ee7.tar.gz
schema: reference package table by integer key
One approach to improve performance is to reduce the database size. A package name takes up 15 bytes in average. A number of a package takes up two bytes. Multiply that difference with the number of references and it should be noticeably. A small test set show a reduction by 10%.
Diffstat (limited to 'schema.sql')
-rw-r--r--schema.sql12
1 files changed, 6 insertions, 6 deletions
diff --git a/schema.sql b/schema.sql
index 94f0317..b839a51 100644
--- a/schema.sql
+++ b/schema.sql
@@ -1,11 +1,11 @@
-CREATE TABLE package (package TEXT PRIMARY KEY, version TEXT, architecture TEXT, source TEXT);
-CREATE TABLE content (id INTEGER PRIMARY KEY, package TEXT, filename TEXT, size INTEGER, FOREIGN KEY (package) REFERENCES package(package) ON DELETE CASCADE);
+CREATE TABLE package (id INTEGER PRIMARY KEY, name TEXT UNIQUE, version TEXT, architecture TEXT, source TEXT);
+CREATE TABLE content (id INTEGER PRIMARY KEY, pid INTEGER, filename TEXT, size INTEGER, FOREIGN KEY (pid) REFERENCES package(id) ON DELETE CASCADE);
CREATE TABLE hash (cid INTEGER, function TEXT, hash TEXT, FOREIGN KEY (cid) REFERENCES content(id) ON DELETE CASCADE);
-CREATE TABLE dependency (package TEXT, required TEXT, FOREIGN KEY (package) REFERENCES package(package) ON DELETE CASCADE);
-CREATE INDEX content_package_index ON content (package);
+CREATE TABLE dependency (pid INTEGER, required TEXT, FOREIGN KEY (pid) REFERENCES package(id) ON DELETE CASCADE);
+CREATE INDEX content_package_index ON content (pid);
CREATE INDEX hash_cid_index ON hash (cid);
CREATE INDEX hash_hash_index ON hash (hash);
-CREATE TABLE sharing (package1 TEXT, package2 TEXT, func1 TEXT, func2 TEXT, files INTEGER, size INTEGER, FOREIGN KEY (package1) REFERENCES package(package) ON DELETE CASCADE, FOREIGN KEY (package2) REFERENCES package(package) ON DELETE CASCADE);
-CREATE INDEX sharing_insert_index ON sharing (package1, package2, func1, func2);
+CREATE TABLE sharing (pid1 INTEGER, pid2 INTEGER, func1 TEXT, func2 TEXT, files INTEGER, size INTEGER, FOREIGN KEY (pid1) REFERENCES package(id) ON DELETE CASCADE, FOREIGN KEY (pid2) REFERENCES package(id) ON DELETE CASCADE);
+CREATE INDEX sharing_insert_index ON sharing (pid1, pid2, func1, func2);
CREATE TABLE duplicate (cid INTEGER PRIMARY KEY, FOREIGN KEY (cid) REFERENCES content(id) ON DELETE CASCADE);