From eaba84e444c77495a5654b600c599646b8aa1aed Mon Sep 17 00:00:00 2001 From: Helmut Grohne Date: Tue, 23 Jul 2013 23:23:41 +0200 Subject: schema: identify hash values by an integer This one is a bit more complex, than the other transformations, because the new hashvalue table has to be cleaned with a trigger. During a test import the -wal file exploded. The resulting db is similar in size to the original. --- webapp.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'webapp.py') diff --git a/webapp.py b/webapp.py index 9e23128..bcea2f9 100755 --- a/webapp.py +++ b/webapp.py @@ -305,7 +305,7 @@ class Application(object): from hash function pairs to hash values. """ cur = self.db.cursor() - cur.execute("SELECT id, filename, size, hash FROM content JOIN hash ON content.id = hash.cid JOIN duplicate ON content.id = duplicate.cid WHERE pid = ? AND function = 'sha512' ORDER BY size DESC;", + cur.execute("SELECT content.id, content.filename, content.size, hashvalue.hash FROM content JOIN hash ON content.id = hash.cid JOIN duplicate ON content.id = duplicate.cid JOIN hashvalue ON hash.hid = hashvalue.id WHERE pid = ? AND function = 'sha512' ORDER BY size DESC;", (pid1,)) cursize = -1 files = dict() @@ -326,7 +326,7 @@ class Application(object): files[hashvalue] = entry cur2 = self.db.cursor() - cur2.execute("SELECT ha.function, ha.hash, hb.function, filename FROM hash AS ha JOIN hash AS hb ON ha.hash = hb.hash JOIN content ON hb.cid = content.id WHERE ha.cid = ? AND pid = ?;", + cur2.execute("SELECT ha.function, hashvalue.hash, hb.function, filename FROM hash AS ha JOIN hash AS hb ON ha.hid = hb.hid JOIN content ON hb.cid = content.id JOIN hashvalue ON ha.hid = hashvalue.id WHERE ha.cid = ? AND content.pid = ?;", (cid, pid2)) for func1, hashvalue, func2, filename in fetchiter(cur2): entry["matches"].setdefault(filename, {})[func1, func2] = \ @@ -353,7 +353,7 @@ class Application(object): def show_hash(self, function, hashvalue): cur = self.db.cursor() - cur.execute("SELECT package.name, content.filename, content.size, hash.function FROM hash JOIN content ON hash.cid = content.id JOIN package ON content.pid = package.id WHERE hash = ?;", + cur.execute("SELECT package.name, content.filename, content.size, hash.function FROM hash JOIN content ON hash.cid = content.id JOIN package ON content.pid = package.id JOIN hashvalue ON hash.hid = hashvalue.id WHERE hashvalue.hash = ?;", (hashvalue,)) entries = [dict(package=package, filename=filename, size=size, function=otherfunc) -- cgit v1.2.3