summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--schema.sql4
-rwxr-xr-xwebapp.py21
2 files changed, 6 insertions, 19 deletions
diff --git a/schema.sql b/schema.sql
index ddc6ccd..2ab7ca7 100644
--- a/schema.sql
+++ b/schema.sql
@@ -1,7 +1,7 @@
CREATE TABLE package (id INTEGER PRIMARY KEY, name TEXT UNIQUE, version TEXT, architecture TEXT, source TEXT);
CREATE TABLE content (id INTEGER PRIMARY KEY, pid INTEGER, filename TEXT, size INTEGER, FOREIGN KEY (pid) REFERENCES package(id) ON DELETE CASCADE);
-CREATE TABLE function (id INTEGER PRIMARY KEY, name TEXT UNIQUE NOT NULL);
-INSERT INTO function (name) VALUES ("sha512"), ("gzip_sha512"), ("png_sha512"), ("gif_sha512");
+CREATE TABLE function (id INTEGER PRIMARY KEY, name TEXT UNIQUE NOT NULL, eqclass INTEGER);
+INSERT INTO function (name, eqclass) VALUES ("sha512", 1), ("gzip_sha512", 1), ("png_sha512", 2), ("gif_sha512", 2);
CREATE TABLE hash (cid INTEGER, fid INTEGER NOT NULL, hash TEXT, FOREIGN KEY (cid) REFERENCES content(id) ON DELETE CASCADE, FOREIGN KEY (fid) REFERENCES function(id));
CREATE TABLE dependency (pid INTEGER, required TEXT, FOREIGN KEY (pid) REFERENCES package(id) ON DELETE CASCADE);
CREATE INDEX content_package_size_index ON content (pid, size);
diff --git a/webapp.py b/webapp.py
index 260268a..f202c2e 100755
--- a/webapp.py
+++ b/webapp.py
@@ -12,16 +12,6 @@ from werkzeug.wsgi import SharedDataMiddleware
from dedup.utils import fetchiter
-hash_functions = [
- ("sha512", "sha512"),
- ("png_sha512", "png_sha512"),
- ("png_sha512", "gif_sha512"),
- ("gif_sha512", "png_sha512"),
- ("gif_sha512", "gif_sha512"),
- ("gzip_sha512", "gzip_sha512"),
- ("sha512", "gzip_sha512"),
- ("gzip_sha512", "sha512")]
-
jinjaenv = jinja2.Environment(loader=jinja2.PackageLoader("dedup", "templates"))
def format_size(size):
@@ -135,11 +125,9 @@ class Application(object):
def cached_sharedstats(self, pid):
cur = self.db.cursor()
sharedstats = {}
- cur.execute("SELECT pid2, package.name, f1.name, f2.name, files, size FROM sharing JOIN package ON sharing.pid2 = package.id JOIN function AS f1 ON sharing.fid1 = f1.id JOIN function AS f2 ON sharing.fid2 = f2.id WHERE pid1 = ?;",
+ cur.execute("SELECT pid2, package.name, f1.name, f2.name, files, size FROM sharing JOIN package ON sharing.pid2 = package.id JOIN function AS f1 ON sharing.fid1 = f1.id JOIN function AS f2 ON sharing.fid2 = f2.id WHERE pid1 = ? AND f1.eqclass = f2.eqclass;",
(pid,))
for pid2, package2, func1, func2, files, size in fetchiter(cur):
- if (func1, func2) not in hash_functions:
- continue
curstats = sharedstats.setdefault(
function_combination(func1, func2), list())
if pid2 == pid:
@@ -218,12 +206,11 @@ class Application(object):
def show_hash(self, function, hashvalue):
cur = self.db.cursor()
- cur.execute("SELECT package.name, content.filename, content.size, function.name FROM hash JOIN content ON hash.cid = content.id JOIN package ON content.pid = package.id JOIN function ON hash.fid = function.id WHERE hash = ?;",
- (hashvalue,))
+ cur.execute("SELECT package.name, content.filename, content.size, f2.name FROM hash JOIN content ON hash.cid = content.id JOIN package ON content.pid = package.id JOIN function AS f2 ON hash.fid = f2.id JOIN function AS f1 ON f2.eqclass = f1.eqclass WHERE f1.name = ? AND hash = ?;",
+ (function, hashvalue,))
entries = [dict(package=package, filename=filename, size=size,
function=otherfunc)
- for package, filename, size, otherfunc in fetchiter(cur)
- if (function, otherfunc) in hash_functions]
+ for package, filename, size, otherfunc in fetchiter(cur)]
if not entries:
raise NotFound()
params = dict(function=function, hashvalue=hashvalue, entries=entries,