store hash values as sqlite BLOB

They were previously hex encoded, so this should cut the space consumed by hashes in half. A first benchmark indicates that the savings in database size are in the order of 30%.
author: Helmut Grohne <helmut@subdivi.de> 2013-07-03 21:19:13 +0200
committer: Helmut Grohne <helmut@subdivi.de> 2013-07-03 21:19:13 +0200
commit: 56d048320a358b2c417cdb2211b3640394a182e9 (patch)
tree: ef5c52619ff18c4f3391b4eb19301999de4c66e7 /update_sharing.py
parent: f2bd48d342518c11ec7deaeee5b437ac524514de (diff)
download: debian-dedup-56d048320a358b2c417cdb2211b3640394a182e9.tar.gz
1 files changed, 2 insertions, 1 deletions
diff --git a/update_sharing.py b/update_sharing.py
index d2b357b..1ea3d28 100755
--- a/update_sharing.py
+++ b/update_sharing.py
@@ -1,5 +1,6 @@
 #!/usr/bin/python
 
+import binascii
 import sqlite3
 
 from dedup.utils import fetchiter
@@ -49,7 +50,7 @@ def main():
         cur.execute("SELECT content.package, content.id, content.filename, content.size, hash.function FROM hash JOIN content ON hash.cid = content.id WHERE hash = ?;",
                     (hashvalue,))
         rows = cur.fetchall()
-        print("processing hash %s with %d entries" % (hashvalue, len(rows)))
+        print("processing hash %s with %d entries" % (binascii.b2a_hex(hashvalue), len(rows)))
         pkgdict = compute_pkgdict(rows)
         cur.executemany("INSERT OR IGNORE INTO duplicate (cid) VALUES (?);",
                         [(row[1],) for row in rows])
author	Helmut Grohne <helmut@subdivi.de>	2013-07-03 21:19:13 +0200
committer	Helmut Grohne <helmut@subdivi.de>	2013-07-03 21:19:13 +0200
commit	56d048320a358b2c417cdb2211b3640394a182e9 (patch)
tree	ef5c52619ff18c4f3391b4eb19301999de4c66e7 /update_sharing.py
parent	f2bd48d342518c11ec7deaeee5b437ac524514de (diff)
download	debian-dedup-56d048320a358b2c417cdb2211b3640394a182e9.tar.gz