diff options
-rw-r--r-- | schema.sql | 4 | ||||
-rwxr-xr-x | webapp.py | 21 |
2 files changed, 6 insertions, 19 deletions
@@ -1,7 +1,7 @@ CREATE TABLE package (id INTEGER PRIMARY KEY, name TEXT UNIQUE, version TEXT, architecture TEXT, source TEXT); CREATE TABLE content (id INTEGER PRIMARY KEY, pid INTEGER, filename TEXT, size INTEGER, FOREIGN KEY (pid) REFERENCES package(id) ON DELETE CASCADE); -CREATE TABLE function (id INTEGER PRIMARY KEY, name TEXT UNIQUE NOT NULL); -INSERT INTO function (name) VALUES ("sha512"), ("gzip_sha512"), ("png_sha512"), ("gif_sha512"); +CREATE TABLE function (id INTEGER PRIMARY KEY, name TEXT UNIQUE NOT NULL, eqclass INTEGER); +INSERT INTO function (name, eqclass) VALUES ("sha512", 1), ("gzip_sha512", 1), ("png_sha512", 2), ("gif_sha512", 2); CREATE TABLE hash (cid INTEGER, fid INTEGER NOT NULL, hash TEXT, FOREIGN KEY (cid) REFERENCES content(id) ON DELETE CASCADE, FOREIGN KEY (fid) REFERENCES function(id)); CREATE TABLE dependency (pid INTEGER, required TEXT, FOREIGN KEY (pid) REFERENCES package(id) ON DELETE CASCADE); CREATE INDEX content_package_size_index ON content (pid, size); @@ -12,16 +12,6 @@ from werkzeug.wsgi import SharedDataMiddleware from dedup.utils import fetchiter -hash_functions = [ - ("sha512", "sha512"), - ("png_sha512", "png_sha512"), - ("png_sha512", "gif_sha512"), - ("gif_sha512", "png_sha512"), - ("gif_sha512", "gif_sha512"), - ("gzip_sha512", "gzip_sha512"), - ("sha512", "gzip_sha512"), - ("gzip_sha512", "sha512")] - jinjaenv = jinja2.Environment(loader=jinja2.PackageLoader("dedup", "templates")) def format_size(size): @@ -135,11 +125,9 @@ class Application(object): def cached_sharedstats(self, pid): cur = self.db.cursor() sharedstats = {} - cur.execute("SELECT pid2, package.name, f1.name, f2.name, files, size FROM sharing JOIN package ON sharing.pid2 = package.id JOIN function AS f1 ON sharing.fid1 = f1.id JOIN function AS f2 ON sharing.fid2 = f2.id WHERE pid1 = ?;", + cur.execute("SELECT pid2, package.name, f1.name, f2.name, files, size FROM sharing JOIN package ON sharing.pid2 = package.id JOIN function AS f1 ON sharing.fid1 = f1.id JOIN function AS f2 ON sharing.fid2 = f2.id WHERE pid1 = ? AND f1.eqclass = f2.eqclass;", (pid,)) for pid2, package2, func1, func2, files, size in fetchiter(cur): - if (func1, func2) not in hash_functions: - continue curstats = sharedstats.setdefault( function_combination(func1, func2), list()) if pid2 == pid: @@ -218,12 +206,11 @@ class Application(object): def show_hash(self, function, hashvalue): cur = self.db.cursor() - cur.execute("SELECT package.name, content.filename, content.size, function.name FROM hash JOIN content ON hash.cid = content.id JOIN package ON content.pid = package.id JOIN function ON hash.fid = function.id WHERE hash = ?;", - (hashvalue,)) + cur.execute("SELECT package.name, content.filename, content.size, f2.name FROM hash JOIN content ON hash.cid = content.id JOIN package ON content.pid = package.id JOIN function AS f2 ON hash.fid = f2.id JOIN function AS f1 ON f2.eqclass = f1.eqclass WHERE f1.name = ? AND hash = ?;", + (function, hashvalue,)) entries = [dict(package=package, filename=filename, size=size, function=otherfunc) - for package, filename, size, otherfunc in fetchiter(cur) - if (function, otherfunc) in hash_functions] + for package, filename, size, otherfunc in fetchiter(cur)] if not entries: raise NotFound() params = dict(function=function, hashvalue=hashvalue, entries=entries, |