diff options
author | Helmut Grohne <helmut@subdivi.de> | 2013-07-23 23:23:41 +0200 |
---|---|---|
committer | Helmut Grohne <helmut@subdivi.de> | 2013-07-23 23:23:41 +0200 |
commit | eaba84e444c77495a5654b600c599646b8aa1aed (patch) | |
tree | ff6bc8bb15de0c3669e2a6a6ad159b39dd638594 /webapp.py | |
parent | 6206dea43941560a29c9a1105ae3055740ab80aa (diff) | |
download | debian-dedup-eaba84e444c77495a5654b600c599646b8aa1aed.tar.gz |
schema: identify hash values by an integerhashid
This one is a bit more complex, than the other transformations, because
the new hashvalue table has to be cleaned with a trigger. During a test
import the -wal file exploded. The resulting db is similar in size to
the original.
Diffstat (limited to 'webapp.py')
-rwxr-xr-x | webapp.py | 6 |
1 files changed, 3 insertions, 3 deletions
@@ -305,7 +305,7 @@ class Application(object): from hash function pairs to hash values. """ cur = self.db.cursor() - cur.execute("SELECT id, filename, size, hash FROM content JOIN hash ON content.id = hash.cid JOIN duplicate ON content.id = duplicate.cid WHERE pid = ? AND function = 'sha512' ORDER BY size DESC;", + cur.execute("SELECT content.id, content.filename, content.size, hashvalue.hash FROM content JOIN hash ON content.id = hash.cid JOIN duplicate ON content.id = duplicate.cid JOIN hashvalue ON hash.hid = hashvalue.id WHERE pid = ? AND function = 'sha512' ORDER BY size DESC;", (pid1,)) cursize = -1 files = dict() @@ -326,7 +326,7 @@ class Application(object): files[hashvalue] = entry cur2 = self.db.cursor() - cur2.execute("SELECT ha.function, ha.hash, hb.function, filename FROM hash AS ha JOIN hash AS hb ON ha.hash = hb.hash JOIN content ON hb.cid = content.id WHERE ha.cid = ? AND pid = ?;", + cur2.execute("SELECT ha.function, hashvalue.hash, hb.function, filename FROM hash AS ha JOIN hash AS hb ON ha.hid = hb.hid JOIN content ON hb.cid = content.id JOIN hashvalue ON ha.hid = hashvalue.id WHERE ha.cid = ? AND content.pid = ?;", (cid, pid2)) for func1, hashvalue, func2, filename in fetchiter(cur2): entry["matches"].setdefault(filename, {})[func1, func2] = \ @@ -353,7 +353,7 @@ class Application(object): def show_hash(self, function, hashvalue): cur = self.db.cursor() - cur.execute("SELECT package.name, content.filename, content.size, hash.function FROM hash JOIN content ON hash.cid = content.id JOIN package ON content.pid = package.id WHERE hash = ?;", + cur.execute("SELECT package.name, content.filename, content.size, hash.function FROM hash JOIN content ON hash.cid = content.id JOIN package ON content.pid = package.id JOIN hashvalue ON hash.hid = hashvalue.id WHERE hashvalue.hash = ?;", (hashvalue,)) entries = [dict(package=package, filename=filename, size=size, function=otherfunc) |