summaryrefslogtreecommitdiff
path: root/webapp.py
diff options
context:
space:
mode:
authorHelmut Grohne <helmut@subdivi.de>2013-08-02 15:21:56 +0200
committerHelmut Grohne <helmut@subdivi.de>2013-08-02 15:21:56 +0200
commit7528af6d22d3967be9727f6e2d88dfcbf0f78ce9 (patch)
tree40f16ad103c89240cc68f6eaa35d278af42fb9ae /webapp.py
parent2712edb550968ce7ec8cd9800241d7944666631a (diff)
downloaddebian-dedup-7528af6d22d3967be9727f6e2d88dfcbf0f78ce9.tar.gz
model comparability as an equivalence relation
webapp has had a relation hash_functions, that modeled "comparable functions". Images should not be compares to other files, since it makes no sense to store them as the RGBA stream, that is being hashed. This comparability property resembles an equivalence relation. So the function table gains a column eqclass. Each class is represented by a number and functions are statically assigned to these classes. Now the filtering happens in SQL instead of Python.
Diffstat (limited to 'webapp.py')
-rwxr-xr-xwebapp.py21
1 files changed, 4 insertions, 17 deletions
diff --git a/webapp.py b/webapp.py
index 260268a..f202c2e 100755
--- a/webapp.py
+++ b/webapp.py
@@ -12,16 +12,6 @@ from werkzeug.wsgi import SharedDataMiddleware
from dedup.utils import fetchiter
-hash_functions = [
- ("sha512", "sha512"),
- ("png_sha512", "png_sha512"),
- ("png_sha512", "gif_sha512"),
- ("gif_sha512", "png_sha512"),
- ("gif_sha512", "gif_sha512"),
- ("gzip_sha512", "gzip_sha512"),
- ("sha512", "gzip_sha512"),
- ("gzip_sha512", "sha512")]
-
jinjaenv = jinja2.Environment(loader=jinja2.PackageLoader("dedup", "templates"))
def format_size(size):
@@ -135,11 +125,9 @@ class Application(object):
def cached_sharedstats(self, pid):
cur = self.db.cursor()
sharedstats = {}
- cur.execute("SELECT pid2, package.name, f1.name, f2.name, files, size FROM sharing JOIN package ON sharing.pid2 = package.id JOIN function AS f1 ON sharing.fid1 = f1.id JOIN function AS f2 ON sharing.fid2 = f2.id WHERE pid1 = ?;",
+ cur.execute("SELECT pid2, package.name, f1.name, f2.name, files, size FROM sharing JOIN package ON sharing.pid2 = package.id JOIN function AS f1 ON sharing.fid1 = f1.id JOIN function AS f2 ON sharing.fid2 = f2.id WHERE pid1 = ? AND f1.eqclass = f2.eqclass;",
(pid,))
for pid2, package2, func1, func2, files, size in fetchiter(cur):
- if (func1, func2) not in hash_functions:
- continue
curstats = sharedstats.setdefault(
function_combination(func1, func2), list())
if pid2 == pid:
@@ -218,12 +206,11 @@ class Application(object):
def show_hash(self, function, hashvalue):
cur = self.db.cursor()
- cur.execute("SELECT package.name, content.filename, content.size, function.name FROM hash JOIN content ON hash.cid = content.id JOIN package ON content.pid = package.id JOIN function ON hash.fid = function.id WHERE hash = ?;",
- (hashvalue,))
+ cur.execute("SELECT package.name, content.filename, content.size, f2.name FROM hash JOIN content ON hash.cid = content.id JOIN package ON content.pid = package.id JOIN function AS f2 ON hash.fid = f2.id JOIN function AS f1 ON f2.eqclass = f1.eqclass WHERE f1.name = ? AND hash = ?;",
+ (function, hashvalue,))
entries = [dict(package=package, filename=filename, size=size,
function=otherfunc)
- for package, filename, size, otherfunc in fetchiter(cur)
- if (function, otherfunc) in hash_functions]
+ for package, filename, size, otherfunc in fetchiter(cur)]
if not entries:
raise NotFound()
params = dict(function=function, hashvalue=hashvalue, entries=entries,