split content table to a hash table

In the old content table (package, filename, size) would be the same for multiple hash functions. Now the schema represents that each file has precisely one size, but multiple hashes.
author: Helmut Grohne <helmut@subdivi.de> 2013-03-09 18:43:47 +0100
committer: Helmut Grohne <helmut@subdivi.de> 2013-03-09 18:43:47 +0100
commit: 5c0dcba3229b8c3e0faf42cf3e07cb82ee1369cd (patch)
tree: 08f4b4cd1aae470eb6bae21bd0a4859ecd91aee9 /update_sharing.py
parent: 423ceee0d0befc8755a9ae915d15e8d415d98159 (diff)
download: debian-dedup-5c0dcba3229b8c3e0faf42cf3e07cb82ee1369cd.tar.gz
1 files changed, 2 insertions, 2 deletions
diff --git a/update_sharing.py b/update_sharing.py
index 2ed532b..b45e40b 100755
--- a/update_sharing.py
+++ b/update_sharing.py
@@ -43,9 +43,9 @@ def main():
     cur.execute("PRAGMA foreign_keys = ON;")
     cur.execute("DELETE FROM sharing;")
     readcur = db.cursor()
-    readcur.execute("SELECT hash FROM content GROUP BY hash HAVING count(*) > 1;")
+    readcur.execute("SELECT hash FROM hash GROUP BY hash HAVING count(*) > 1;")
     for hashvalue, in fetchiter(readcur):
-        cur.execute("SELECT package, filename, size, function FROM content WHERE hash = ?;",
+        cur.execute("SELECT content.package, content.filename, content.size, hash.function FROM hash JOIN content ON hash.cid = content.id WHERE hash = ?;",
                     (hashvalue,))
         rows = cur.fetchall()
         print("processing hash %s with %d entries" % (hashvalue, len(rows)))
author	Helmut Grohne <helmut@subdivi.de>	2013-03-09 18:43:47 +0100
committer	Helmut Grohne <helmut@subdivi.de>	2013-03-09 18:43:47 +0100
commit	5c0dcba3229b8c3e0faf42cf3e07cb82ee1369cd (patch)
tree	08f4b4cd1aae470eb6bae21bd0a4859ecd91aee9 /update_sharing.py
parent	423ceee0d0befc8755a9ae915d15e8d415d98159 (diff)
download	debian-dedup-5c0dcba3229b8c3e0faf42cf3e07cb82ee1369cd.tar.gz