diff options
author | Helmut Grohne <helmut@subdivi.de> | 2013-07-03 21:19:13 +0200 |
---|---|---|
committer | Helmut Grohne <helmut@subdivi.de> | 2013-07-03 21:19:13 +0200 |
commit | 56d048320a358b2c417cdb2211b3640394a182e9 (patch) | |
tree | ef5c52619ff18c4f3391b4eb19301999de4c66e7 /webapp.py | |
parent | f2bd48d342518c11ec7deaeee5b437ac524514de (diff) | |
download | debian-dedup-56d048320a358b2c417cdb2211b3640394a182e9.tar.gz |
store hash values as sqlite BLOB
They were previously hex encoded, so this should cut the space consumed
by hashes in half. A first benchmark indicates that the savings in
database size are in the order of 30%.
Diffstat (limited to 'webapp.py')
-rwxr-xr-x | webapp.py | 12 |
1 files changed, 9 insertions, 3 deletions
@@ -1,5 +1,6 @@ #!/usr/bin/python +import binascii import datetime import os.path import sqlite3 @@ -210,7 +211,7 @@ def generate_shared(rows): funcdict = dict() entry = dict(filename1=filename1, filename2=filename2, size1=size1, size2=size2, functions=funcdict) - funcdict[funccomb] = hashvalue + funcdict[funccomb] = binascii.b2a_hex(hashvalue) if entry: yield entry @@ -317,6 +318,7 @@ class Application(object): files.clear() cursize = size + hashvalue = binascii.b2a_hex(hashvalue) if hashvalue in files: files[hashvalue]["filenames"].add(filename) continue @@ -329,7 +331,7 @@ class Application(object): (cid, package2)) for func1, hashvalue, func2, filename in fetchiter(cur2): entry["matches"].setdefault(filename, {})[func1, func2] = \ - hashvalue + binascii.b2a_hex(hashvalue) cur2.close() cur.close() @@ -351,9 +353,13 @@ class Application(object): return html_response(detail_template.stream(params)) def show_hash(self, function, hashvalue): + try: + bhash = buffer(binascii.a2b_hex(hashvalue)) + except TypeError: + raise NotFound() cur = self.db.cursor() cur.execute("SELECT content.package, content.filename, content.size, hash.function FROM content JOIN hash ON content.id = hash.cid WHERE hash = ?;", - (hashvalue,)) + (bhash,)) entries = [dict(package=package, filename=filename, size=size, function=otherfunc) for package, filename, size, otherfunc in fetchiter(cur) |