summaryrefslogtreecommitdiff
path: root/update_sharing.py
diff options
context:
space:
mode:
Diffstat (limited to 'update_sharing.py')
-rwxr-xr-xupdate_sharing.py49
1 files changed, 23 insertions, 26 deletions
diff --git a/update_sharing.py b/update_sharing.py
index d2b357b..bbd19e5 100755
--- a/update_sharing.py
+++ b/update_sharing.py
@@ -1,16 +1,16 @@
#!/usr/bin/python
-import sqlite3
+import sqlalchemy
from dedup.utils import fetchiter
-def add_values(cursor, insert_key, files, size):
- cursor.execute("UPDATE sharing SET files = files + ?, size = size + ? WHERE package1 = ? AND package2 = ? AND func1 = ? AND func2 = ?;",
- (files, size) + insert_key)
- if cursor.rowcount > 0:
+def add_values(conn, insert_key, files, size):
+ rows = conn.execute("UPDATE sharing SET files = files + ?, size = size + ? WHERE package1 = ? AND package2 = ? AND func1 = ? AND func2 = ?;",
+ (files, size) + insert_key)
+ if rows.rowcount > 0:
return
- cursor.execute("INSERT INTO sharing (package1, package2, func1, func2, files, size) VALUES (?, ?, ?, ?, ?, ?);",
- insert_key + (files, size))
+ conn.execute("INSERT INTO sharing (package1, package2, func1, func2, files, size) VALUES (?, ?, ?, ?, ?, ?);",
+ insert_key + (files, size))
def compute_pkgdict(rows):
pkgdict = dict()
@@ -19,7 +19,7 @@ def compute_pkgdict(rows):
funcdict.setdefault(function, []).append((size, filename))
return pkgdict
-def process_pkgdict(cursor, pkgdict):
+def process_pkgdict(conn, pkgdict):
for package1, funcdict1 in pkgdict.items():
for function1, files in funcdict1.items():
numfiles = len(files)
@@ -35,26 +35,23 @@ def process_pkgdict(cursor, pkgdict):
pkgsize = size
for function2 in funcdict2.keys():
insert_key = (package1, package2, function1, function2)
- add_values(cursor, insert_key, pkgnumfiles, pkgsize)
+ add_values(conn, insert_key, pkgnumfiles, pkgsize)
def main():
- db = sqlite3.connect("test.sqlite3")
- cur = db.cursor()
- cur.execute("PRAGMA foreign_keys = ON;")
- cur.execute("DELETE FROM sharing;")
- cur.execute("DELETE FROM duplicate;")
- readcur = db.cursor()
- readcur.execute("SELECT hash FROM hash GROUP BY hash HAVING count(*) > 1;")
- for hashvalue, in fetchiter(readcur):
- cur.execute("SELECT content.package, content.id, content.filename, content.size, hash.function FROM hash JOIN content ON hash.cid = content.id WHERE hash = ?;",
- (hashvalue,))
- rows = cur.fetchall()
- print("processing hash %s with %d entries" % (hashvalue, len(rows)))
- pkgdict = compute_pkgdict(rows)
- cur.executemany("INSERT OR IGNORE INTO duplicate (cid) VALUES (?);",
- [(row[1],) for row in rows])
- process_pkgdict(cur, pkgdict)
- db.commit()
+ db = sqlalchemy.create_engine("sqlite:///test.sqlite3")
+ with db.begin() as conn:
+ conn.execute("PRAGMA foreign_keys = ON;")
+ conn.execute("DELETE FROM sharing;")
+ conn.execute("DELETE FROM duplicate;")
+ readcur = conn.execute("SELECT hash FROM hash GROUP BY hash HAVING count(*) > 1;")
+ for hashvalue, in fetchiter(readcur):
+ rows = conn.execute("SELECT content.package, content.id, content.filename, content.size, hash.function FROM hash JOIN content ON hash.cid = content.id WHERE hash = ?;",
+ (hashvalue,)).fetchall()
+ print("processing hash %s with %d entries" % (hashvalue, len(rows)))
+ pkgdict = compute_pkgdict(rows)
+ conn.execute("INSERT OR IGNORE INTO duplicate (cid) VALUES (?);",
+ *[(row[1],) for row in rows])
+ process_pkgdict(conn, pkgdict)
if __name__ == "__main__":
main()