summaryrefslogtreecommitdiff
path: root/readyaml.py
diff options
context:
space:
mode:
authorHelmut Grohne <helmut@subdivi.de>2013-07-23 23:23:41 +0200
committerHelmut Grohne <helmut@subdivi.de>2013-07-23 23:23:41 +0200
commiteaba84e444c77495a5654b600c599646b8aa1aed (patch)
treeff6bc8bb15de0c3669e2a6a6ad159b39dd638594 /readyaml.py
parent6206dea43941560a29c9a1105ae3055740ab80aa (diff)
downloaddebian-dedup-eaba84e444c77495a5654b600c599646b8aa1aed.tar.gz
schema: identify hash values by an integerhashid
This one is a bit more complex, than the other transformations, because the new hashvalue table has to be cleaned with a trigger. During a test import the -wal file exploded. The resulting db is similar in size to the original.
Diffstat (limited to 'readyaml.py')
-rwxr-xr-xreadyaml.py13
1 files changed, 10 insertions, 3 deletions
diff --git a/readyaml.py b/readyaml.py
index bb8ac54..007ed96 100755
--- a/readyaml.py
+++ b/readyaml.py
@@ -45,9 +45,16 @@ def readyaml(db, stream):
cur.execute("INSERT INTO content (pid, filename, size) VALUES (?, ?, ?);",
(pid, entry["name"], entry["size"]))
cid = cur.lastrowid
- cur.executemany("INSERT INTO hash (cid, function, hash) VALUES (?, ?, ?);",
- ((cid, func, hexhash)
- for func, hexhash in entry["hashes"].items()))
+ for func, hexhash in entry["hashes"].items():
+ cur.execute("SELECT id FROM hashvalue WHERE hash = ?;", (hexhash,))
+ row = cur.fetchone()
+ if row:
+ hid = row[0]
+ else:
+ cur.execute("INSERT INTO hashvalue (hash) VALUES (?);", (hexhash,))
+ hid = cur.lastrowid
+ cur.execute("INSERT INTO hash (cid, function, hid) VALUES (?, ?, ?);",
+ (cid, func, hid))
raise ValueError("missing commit block")
def main():