summaryrefslogtreecommitdiff
path: root/schema.sql
diff options
context:
space:
mode:
authorHelmut Grohne <helmut@subdivi.de>2013-04-24 20:56:46 +0200
committerHelmut Grohne <helmut@subdivi.de>2013-04-24 21:00:20 +0200
commit94eb867119af05639691ec7990dcf2d6a956dd86 (patch)
tree6f33e5f2badf1b19182c718f46614869047516cb /schema.sql
parentd2b83735a4810cec7bf7c0dd6fb521498f104435 (diff)
downloaddebian-dedup-94eb867119af05639691ec7990dcf2d6a956dd86.tar.gz
implement the /compare/pkg1/pkg2 page differently
The original version had two major drawbacks: 1) The SQL query used would cause a btree sort, so the time waiting for the first output was rather long. 2) For packages with many equal files, the output would grow with O(n^2). Thanks to the suggestions by Christine Grohne and Klaus Aehlig. The approach now groups files in package1 by their main hash value (sha512). It also does some work SQL was designed to solve manually now. To speed up page generation a new caching table was added identifying which files have corresponding shared files.
Diffstat (limited to 'schema.sql')
-rw-r--r--schema.sql1
1 files changed, 1 insertions, 0 deletions
diff --git a/schema.sql b/schema.sql
index a67c807..e942c7b 100644
--- a/schema.sql
+++ b/schema.sql
@@ -9,3 +9,4 @@ CREATE INDEX hash_hash_index ON hash (hash);
CREATE TABLE sharing (package1 TEXT, package2 TEXT, func1 TEXT, func2 TEXT, files INTEGER, size INTEGER, FOREIGN KEY (package1) REFERENCES package(package) ON DELETE CASCADE, FOREIGN KEY (package2) REFERENCES package(package) ON DELETE CASCADE);
CREATE INDEX sharing_insert_index ON sharing (package1, package2, func1, func2);
CREATE INDEX sharing_package_index ON sharing (package1);
+CREATE TABLE duplicate (cid INTEGER PRIMARY KEY, FOREIGN KEY (cid) REFERENCES content(id) ON DELETE CASCADE);