summaryrefslogtreecommitdiff
path: root/readyaml.py
diff options
context:
space:
mode:
authorHelmut Grohne <helmut@subdivi.de>2013-07-23 18:53:55 +0200
committerHelmut Grohne <helmut@subdivi.de>2013-07-23 18:53:55 +0200
commit6f88561d726327c90f83b8aad1db26abbd4cdf1e (patch)
tree49d04fed10475183190cbe9ce536947958e9a749 /readyaml.py
parent6206dea43941560a29c9a1105ae3055740ab80aa (diff)
downloaddebian-dedup-6f88561d726327c90f83b8aad1db26abbd4cdf1e.tar.gz
schema: reference hash functions by integer key
This already worked quite well for package.id. On a test data set of 5% size this transformation reduces the database size by about 4%.
Diffstat (limited to 'readyaml.py')
-rwxr-xr-xreadyaml.py2
1 files changed, 1 insertions, 1 deletions
diff --git a/readyaml.py b/readyaml.py
index bb8ac54..f4d6ead 100755
--- a/readyaml.py
+++ b/readyaml.py
@@ -45,7 +45,7 @@ def readyaml(db, stream):
cur.execute("INSERT INTO content (pid, filename, size) VALUES (?, ?, ?);",
(pid, entry["name"], entry["size"]))
cid = cur.lastrowid
- cur.executemany("INSERT INTO hash (cid, function, hash) VALUES (?, ?, ?);",
+ cur.executemany("INSERT INTO hash (cid, fid, hash) VALUES (?, (SELECT id FROM function WHERE name = ?), ?);",
((cid, func, hexhash)
for func, hexhash in entry["hashes"].items()))
raise ValueError("missing commit block")