summaryrefslogtreecommitdiff
path: root/webapp.py
diff options
context:
space:
mode:
authorHelmut Grohne <helmut@subdivi.de>2013-07-23 23:23:41 +0200
committerHelmut Grohne <helmut@subdivi.de>2013-07-23 23:23:41 +0200
commiteaba84e444c77495a5654b600c599646b8aa1aed (patch)
treeff6bc8bb15de0c3669e2a6a6ad159b39dd638594 /webapp.py
parent6206dea43941560a29c9a1105ae3055740ab80aa (diff)
downloaddebian-dedup-eaba84e444c77495a5654b600c599646b8aa1aed.tar.gz
schema: identify hash values by an integerhashid
This one is a bit more complex, than the other transformations, because the new hashvalue table has to be cleaned with a trigger. During a test import the -wal file exploded. The resulting db is similar in size to the original.
Diffstat (limited to 'webapp.py')
-rwxr-xr-xwebapp.py6
1 files changed, 3 insertions, 3 deletions
diff --git a/webapp.py b/webapp.py
index 9e23128..bcea2f9 100755
--- a/webapp.py
+++ b/webapp.py
@@ -305,7 +305,7 @@ class Application(object):
from hash function pairs to hash values.
"""
cur = self.db.cursor()
- cur.execute("SELECT id, filename, size, hash FROM content JOIN hash ON content.id = hash.cid JOIN duplicate ON content.id = duplicate.cid WHERE pid = ? AND function = 'sha512' ORDER BY size DESC;",
+ cur.execute("SELECT content.id, content.filename, content.size, hashvalue.hash FROM content JOIN hash ON content.id = hash.cid JOIN duplicate ON content.id = duplicate.cid JOIN hashvalue ON hash.hid = hashvalue.id WHERE pid = ? AND function = 'sha512' ORDER BY size DESC;",
(pid1,))
cursize = -1
files = dict()
@@ -326,7 +326,7 @@ class Application(object):
files[hashvalue] = entry
cur2 = self.db.cursor()
- cur2.execute("SELECT ha.function, ha.hash, hb.function, filename FROM hash AS ha JOIN hash AS hb ON ha.hash = hb.hash JOIN content ON hb.cid = content.id WHERE ha.cid = ? AND pid = ?;",
+ cur2.execute("SELECT ha.function, hashvalue.hash, hb.function, filename FROM hash AS ha JOIN hash AS hb ON ha.hid = hb.hid JOIN content ON hb.cid = content.id JOIN hashvalue ON ha.hid = hashvalue.id WHERE ha.cid = ? AND content.pid = ?;",
(cid, pid2))
for func1, hashvalue, func2, filename in fetchiter(cur2):
entry["matches"].setdefault(filename, {})[func1, func2] = \
@@ -353,7 +353,7 @@ class Application(object):
def show_hash(self, function, hashvalue):
cur = self.db.cursor()
- cur.execute("SELECT package.name, content.filename, content.size, hash.function FROM hash JOIN content ON hash.cid = content.id JOIN package ON content.pid = package.id WHERE hash = ?;",
+ cur.execute("SELECT package.name, content.filename, content.size, hash.function FROM hash JOIN content ON hash.cid = content.id JOIN package ON content.pid = package.id JOIN hashvalue ON hash.hid = hashvalue.id WHERE hashvalue.hash = ?;",
(hashvalue,))
entries = [dict(package=package, filename=filename, size=size,
function=otherfunc)