summaryrefslogtreecommitdiff
path: root/readyaml.py
diff options
context:
space:
mode:
authorHelmut Grohne <helmut@subdivi.de>2013-07-26 15:04:02 +0200
committerHelmut Grohne <helmut@subdivi.de>2013-07-26 15:04:02 +0200
commitdc378a18d50142baceaef4c2a416cb5a40f84861 (patch)
tree975967733a6f7f726618843df11acf766537f9e0 /readyaml.py
parent9b653583711c59d96c45af43ff8ee9534500adb6 (diff)
parent32f406706c0a2a21b11656e5c56ff203e0ee3799 (diff)
downloaddebian-dedup-dc378a18d50142baceaef4c2a416cb5a40f84861.tar.gz
Merge branch functionid
Actual savings on the full data set are around 7%. Conflicts: README
Diffstat (limited to 'readyaml.py')
-rwxr-xr-xreadyaml.py6
1 files changed, 4 insertions, 2 deletions
diff --git a/readyaml.py b/readyaml.py
index bb8ac54..21b1ca1 100755
--- a/readyaml.py
+++ b/readyaml.py
@@ -25,6 +25,8 @@ def readyaml(db, stream):
pid = None
cur.execute("BEGIN;")
+ cur.execute("SELECT name, id FROM function;")
+ funcmapping = dict(cur.fetchall())
if pid is not None:
cur.execute("DELETE FROM content WHERE pid = ?;", (pid,))
cur.execute("DELETE FROM dependency WHERE pid = ?;", (pid,))
@@ -45,8 +47,8 @@ def readyaml(db, stream):
cur.execute("INSERT INTO content (pid, filename, size) VALUES (?, ?, ?);",
(pid, entry["name"], entry["size"]))
cid = cur.lastrowid
- cur.executemany("INSERT INTO hash (cid, function, hash) VALUES (?, ?, ?);",
- ((cid, func, hexhash)
+ cur.executemany("INSERT INTO hash (cid, fid, hash) VALUES (?, ?, ?);",
+ ((cid, funcmapping[func], hexhash)
for func, hexhash in entry["hashes"].items()))
raise ValueError("missing commit block")