From 523fb675368d180ab9c5a16b6f48a4d29693c8ae Mon Sep 17 00:00:00 2001 From: Helmut Grohne Date: Sun, 23 Jun 2013 13:01:56 +0200 Subject: port update_sharing.py to sqlalchemy --- update_sharing.py | 49 +++++++++++++++++++++++-------------------------- 1 file changed, 23 insertions(+), 26 deletions(-) (limited to 'update_sharing.py') diff --git a/update_sharing.py b/update_sharing.py index d2b357b..bbd19e5 100755 --- a/update_sharing.py +++ b/update_sharing.py @@ -1,16 +1,16 @@ #!/usr/bin/python -import sqlite3 +import sqlalchemy from dedup.utils import fetchiter -def add_values(cursor, insert_key, files, size): - cursor.execute("UPDATE sharing SET files = files + ?, size = size + ? WHERE package1 = ? AND package2 = ? AND func1 = ? AND func2 = ?;", - (files, size) + insert_key) - if cursor.rowcount > 0: +def add_values(conn, insert_key, files, size): + rows = conn.execute("UPDATE sharing SET files = files + ?, size = size + ? WHERE package1 = ? AND package2 = ? AND func1 = ? AND func2 = ?;", + (files, size) + insert_key) + if rows.rowcount > 0: return - cursor.execute("INSERT INTO sharing (package1, package2, func1, func2, files, size) VALUES (?, ?, ?, ?, ?, ?);", - insert_key + (files, size)) + conn.execute("INSERT INTO sharing (package1, package2, func1, func2, files, size) VALUES (?, ?, ?, ?, ?, ?);", + insert_key + (files, size)) def compute_pkgdict(rows): pkgdict = dict() @@ -19,7 +19,7 @@ def compute_pkgdict(rows): funcdict.setdefault(function, []).append((size, filename)) return pkgdict -def process_pkgdict(cursor, pkgdict): +def process_pkgdict(conn, pkgdict): for package1, funcdict1 in pkgdict.items(): for function1, files in funcdict1.items(): numfiles = len(files) @@ -35,26 +35,23 @@ def process_pkgdict(cursor, pkgdict): pkgsize = size for function2 in funcdict2.keys(): insert_key = (package1, package2, function1, function2) - add_values(cursor, insert_key, pkgnumfiles, pkgsize) + add_values(conn, insert_key, pkgnumfiles, pkgsize) def main(): - db = sqlite3.connect("test.sqlite3") - cur = db.cursor() - cur.execute("PRAGMA foreign_keys = ON;") - cur.execute("DELETE FROM sharing;") - cur.execute("DELETE FROM duplicate;") - readcur = db.cursor() - readcur.execute("SELECT hash FROM hash GROUP BY hash HAVING count(*) > 1;") - for hashvalue, in fetchiter(readcur): - cur.execute("SELECT content.package, content.id, content.filename, content.size, hash.function FROM hash JOIN content ON hash.cid = content.id WHERE hash = ?;", - (hashvalue,)) - rows = cur.fetchall() - print("processing hash %s with %d entries" % (hashvalue, len(rows))) - pkgdict = compute_pkgdict(rows) - cur.executemany("INSERT OR IGNORE INTO duplicate (cid) VALUES (?);", - [(row[1],) for row in rows]) - process_pkgdict(cur, pkgdict) - db.commit() + db = sqlalchemy.create_engine("sqlite:///test.sqlite3") + with db.begin() as conn: + conn.execute("PRAGMA foreign_keys = ON;") + conn.execute("DELETE FROM sharing;") + conn.execute("DELETE FROM duplicate;") + readcur = conn.execute("SELECT hash FROM hash GROUP BY hash HAVING count(*) > 1;") + for hashvalue, in fetchiter(readcur): + rows = conn.execute("SELECT content.package, content.id, content.filename, content.size, hash.function FROM hash JOIN content ON hash.cid = content.id WHERE hash = ?;", + (hashvalue,)).fetchall() + print("processing hash %s with %d entries" % (hashvalue, len(rows))) + pkgdict = compute_pkgdict(rows) + conn.execute("INSERT OR IGNORE INTO duplicate (cid) VALUES (?);", + *[(row[1],) for row in rows]) + process_pkgdict(conn, pkgdict) if __name__ == "__main__": main() -- cgit v1.2.3