diff options
author | Helmut Grohne <helmut@subdivi.de> | 2013-07-03 21:19:13 +0200 |
---|---|---|
committer | Helmut Grohne <helmut@subdivi.de> | 2013-07-03 21:19:13 +0200 |
commit | 56d048320a358b2c417cdb2211b3640394a182e9 (patch) | |
tree | ef5c52619ff18c4f3391b4eb19301999de4c66e7 /readyaml.py | |
parent | f2bd48d342518c11ec7deaeee5b437ac524514de (diff) | |
download | debian-dedup-56d048320a358b2c417cdb2211b3640394a182e9.tar.gz |
store hash values as sqlite BLOB
They were previously hex encoded, so this should cut the space consumed
by hashes in half. A first benchmark indicates that the savings in
database size are in the order of 30%.
Diffstat (limited to 'readyaml.py')
-rwxr-xr-x | readyaml.py | 3 |
1 files changed, 2 insertions, 1 deletions
diff --git a/readyaml.py b/readyaml.py index e2f3bb3..1a7206d 100755 --- a/readyaml.py +++ b/readyaml.py @@ -2,6 +2,7 @@ """This tool reads a yaml file as generated by importpkg.py on stdin and updates the database with the contents.""" +import binascii import sqlite3 import sys @@ -39,7 +40,7 @@ def readyaml(db, stream): (package, entry["name"], entry["size"])) cid = cur.lastrowid cur.executemany("INSERT INTO hash (cid, function, hash) VALUES (?, ?, ?);", - ((cid, func, hexhash) + ((cid, func, buffer(binascii.a2b_hex(hexhash))) for func, hexhash in entry["hashes"].items())) raise ValueError("missing commit block") |