diff options
author | Helmut Grohne <helmut@subdivi.de> | 2013-07-23 23:23:41 +0200 |
---|---|---|
committer | Helmut Grohne <helmut@subdivi.de> | 2013-07-23 23:23:41 +0200 |
commit | eaba84e444c77495a5654b600c599646b8aa1aed (patch) | |
tree | ff6bc8bb15de0c3669e2a6a6ad159b39dd638594 /readyaml.py | |
parent | 6206dea43941560a29c9a1105ae3055740ab80aa (diff) | |
download | debian-dedup-eaba84e444c77495a5654b600c599646b8aa1aed.tar.gz |
schema: identify hash values by an integerhashid
This one is a bit more complex, than the other transformations, because
the new hashvalue table has to be cleaned with a trigger. During a test
import the -wal file exploded. The resulting db is similar in size to
the original.
Diffstat (limited to 'readyaml.py')
-rwxr-xr-x | readyaml.py | 13 |
1 files changed, 10 insertions, 3 deletions
diff --git a/readyaml.py b/readyaml.py index bb8ac54..007ed96 100755 --- a/readyaml.py +++ b/readyaml.py @@ -45,9 +45,16 @@ def readyaml(db, stream): cur.execute("INSERT INTO content (pid, filename, size) VALUES (?, ?, ?);", (pid, entry["name"], entry["size"])) cid = cur.lastrowid - cur.executemany("INSERT INTO hash (cid, function, hash) VALUES (?, ?, ?);", - ((cid, func, hexhash) - for func, hexhash in entry["hashes"].items())) + for func, hexhash in entry["hashes"].items(): + cur.execute("SELECT id FROM hashvalue WHERE hash = ?;", (hexhash,)) + row = cur.fetchone() + if row: + hid = row[0] + else: + cur.execute("INSERT INTO hashvalue (hash) VALUES (?);", (hexhash,)) + hid = cur.lastrowid + cur.execute("INSERT INTO hash (cid, function, hid) VALUES (?, ?, ?);", + (cid, func, hid)) raise ValueError("missing commit block") def main(): |