From eaba84e444c77495a5654b600c599646b8aa1aed Mon Sep 17 00:00:00 2001 From: Helmut Grohne Date: Tue, 23 Jul 2013 23:23:41 +0200 Subject: schema: identify hash values by an integer This one is a bit more complex, than the other transformations, because the new hashvalue table has to be cleaned with a trigger. During a test import the -wal file exploded. The resulting db is similar in size to the original. --- update_sharing.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'update_sharing.py') diff --git a/update_sharing.py b/update_sharing.py index 55e8096..bca0213 100755 --- a/update_sharing.py +++ b/update_sharing.py @@ -44,12 +44,12 @@ def main(): cur.execute("DELETE FROM sharing;") cur.execute("DELETE FROM duplicate;") readcur = db.cursor() - readcur.execute("SELECT hash FROM hash GROUP BY hash HAVING count(*) > 1;") - for hashvalue, in fetchiter(readcur): - cur.execute("SELECT content.pid, content.id, content.filename, content.size, hash.function FROM hash JOIN content ON hash.cid = content.id WHERE hash = ?;", - (hashvalue,)) + readcur.execute("SELECT hid FROM hash GROUP BY hid HAVING count(*) > 1;") + for hid, in fetchiter(readcur): + cur.execute("SELECT content.pid, content.id, content.filename, content.size, hash.function FROM hash JOIN content ON hash.cid = content.id WHERE hid = ?;", + (hid,)) rows = cur.fetchall() - print("processing hash %s with %d entries" % (hashvalue, len(rows))) + print("processing hash %d with %d entries" % (hid, len(rows))) pkgdict = compute_pkgdict(rows) cur.executemany("INSERT OR IGNORE INTO duplicate (cid) VALUES (?);", [(row[1],) for row in rows]) -- cgit v1.2.3