summaryrefslogtreecommitdiff
path: root/update_sharing.py
diff options
context:
space:
mode:
Diffstat (limited to 'update_sharing.py')
-rwxr-xr-xupdate_sharing.py23
1 files changed, 13 insertions, 10 deletions
diff --git a/update_sharing.py b/update_sharing.py
index 0995114..caca445 100755
--- a/update_sharing.py
+++ b/update_sharing.py
@@ -5,12 +5,15 @@ import sqlalchemy
from dedup.utils import fetchiter, enable_sqlite_foreign_keys
def add_values(conn, insert_key, files, size):
- rows = conn.execute("UPDATE sharing SET files = files + ?, size = size + ? WHERE package1 = ? AND package2 = ? AND func1 = ? AND func2 = ?;",
- (files, size) + insert_key)
+ params = dict(files=files, size=size, package1=insert_key[0],
+ package2=insert_key[1], func1=insert_key[2],
+ func2=insert_key[3])
+ rows = conn.execute("UPDATE sharing SET files = files + :files, size = size + :size WHERE package1 = :package1 AND package2 = :package2 AND func1 = :func1 AND func2 = :func2;",
+ **params)
if rows.rowcount > 0:
return
- conn.execute("INSERT INTO sharing (package1, package2, func1, func2, files, size) VALUES (?, ?, ?, ?, ?, ?);",
- insert_key + (files, size))
+ conn.execute("INSERT INTO sharing (package1, package2, func1, func2, files, size) VALUES (:package1, :package2, :func1, :func2, :files, :size);",
+ **params)
def compute_pkgdict(rows):
pkgdict = dict()
@@ -45,17 +48,17 @@ def main():
conn.execute("DELETE FROM duplicate;")
readcur = conn.execute("SELECT hash FROM hash GROUP BY hash HAVING count(*) > 1;")
for hashvalue, in fetchiter(readcur):
- rows = conn.execute("SELECT content.package, content.id, content.filename, content.size, hash.function FROM hash JOIN content ON hash.cid = content.id WHERE hash = ?;",
- (hashvalue,)).fetchall()
+ rows = conn.execute("SELECT content.package, content.id, content.filename, content.size, hash.function FROM hash JOIN content ON hash.cid = content.id WHERE hash = :hashvalue;",
+ hashvalue=hashvalue).fetchall()
print("processing hash %s with %d entries" % (hashvalue, len(rows)))
pkgdict = compute_pkgdict(rows)
for row in rows:
cid = row[1]
- already = conn.scalar("SELECT cid FROM duplicate WHERE cid = ?;",
- (cid,))
+ already = conn.scalar("SELECT cid FROM duplicate WHERE cid = :cid;",
+ cid=cid)
if not already:
- conn.execute("INSERT INTO duplicate (cid) VALUES (?);",
- (cid,))
+ conn.execute("INSERT INTO duplicate (cid) VALUES (:cid);",
+ cid=cid)
process_pkgdict(conn, pkgdict)
if __name__ == "__main__":