schema: identify hash values by an integerhashid

This one is a bit more complex, than the other transformations, because the new hashvalue table has to be cleaned with a trigger. During a test import the -wal file exploded. The resulting db is similar in size to the original.
author: Helmut Grohne <helmut@subdivi.de> 2013-07-23 23:23:41 +0200
committer: Helmut Grohne <helmut@subdivi.de> 2013-07-23 23:23:41 +0200
commit: eaba84e444c77495a5654b600c599646b8aa1aed (patch)
tree: ff6bc8bb15de0c3669e2a6a6ad159b39dd638594 /update_sharing.py
parent: 6206dea43941560a29c9a1105ae3055740ab80aa (diff)
download: debian-dedup-eaba84e444c77495a5654b600c599646b8aa1aed.tar.gz
1 files changed, 5 insertions, 5 deletions
diff --git a/update_sharing.py b/update_sharing.py
index 55e8096..bca0213 100755
--- a/update_sharing.py
+++ b/update_sharing.py
@@ -44,12 +44,12 @@ def main():
     cur.execute("DELETE FROM sharing;")
     cur.execute("DELETE FROM duplicate;")
     readcur = db.cursor()
-    readcur.execute("SELECT hash FROM hash GROUP BY hash HAVING count(*) > 1;")
-    for hashvalue, in fetchiter(readcur):
-        cur.execute("SELECT content.pid, content.id, content.filename, content.size, hash.function FROM hash JOIN content ON hash.cid = content.id WHERE hash = ?;",
-                    (hashvalue,))
+    readcur.execute("SELECT hid FROM hash GROUP BY hid HAVING count(*) > 1;")
+    for hid, in fetchiter(readcur):
+        cur.execute("SELECT content.pid, content.id, content.filename, content.size, hash.function FROM hash JOIN content ON hash.cid = content.id WHERE hid = ?;",
+                    (hid,))
         rows = cur.fetchall()
-        print("processing hash %s with %d entries" % (hashvalue, len(rows)))
+        print("processing hash %d with %d entries" % (hid, len(rows)))
         pkgdict = compute_pkgdict(rows)
         cur.executemany("INSERT OR IGNORE INTO duplicate (cid) VALUES (?);",
                         [(row[1],) for row in rows])
author	Helmut Grohne <helmut@subdivi.de>	2013-07-23 23:23:41 +0200
committer	Helmut Grohne <helmut@subdivi.de>	2013-07-23 23:23:41 +0200
commit	eaba84e444c77495a5654b600c599646b8aa1aed (patch)
tree	ff6bc8bb15de0c3669e2a6a6ad159b39dd638594 /update_sharing.py
parent	6206dea43941560a29c9a1105ae3055740ab80aa (diff)
download	debian-dedup-eaba84e444c77495a5654b600c599646b8aa1aed.tar.gz