diff options
Diffstat (limited to 'update_sharing.py')
-rwxr-xr-x | update_sharing.py | 33 |
1 files changed, 27 insertions, 6 deletions
diff --git a/update_sharing.py b/update_sharing.py index 78e6171..3a86268 100755 --- a/update_sharing.py +++ b/update_sharing.py @@ -2,10 +2,17 @@ import argparse import sqlite3 +import typing from dedup.utils import fetchiter -def add_values(cursor, insert_key, files, size): + +def add_values( + cursor: sqlite3.Cursor, + insert_key: typing.Tuple[int, int, int, int], + files: int, + size: int, +) -> None: cursor.execute("UPDATE sharing SET files = files + ?, size = size + ? WHERE pid1 = ? AND pid2 = ? AND fid1 = ? AND fid2 = ?;", (files, size) + insert_key) if cursor.rowcount > 0: @@ -13,14 +20,25 @@ def add_values(cursor, insert_key, files, size): cursor.execute("INSERT INTO sharing (pid1, pid2, fid1, fid2, files, size) VALUES (?, ?, ?, ?, ?, ?);", insert_key + (files, size)) -def compute_pkgdict(rows): - pkgdict = dict() + +def compute_pkgdict( + rows: typing.Iterable[typing.Tuple[int, typing.Any, str, int, int]] +) -> typing.Dict[int, typing.Dict[int, typing.List[typing.Tuple[int, str]]]]: + pkgdict: typing.Dict[ + int, typing.Dict[int, typing.List[typing.Tuple[int, str]]] + ] = {} for pid, _, filename, size, fid in rows: funcdict = pkgdict.setdefault(pid, {}) funcdict.setdefault(fid, []).append((size, filename)) return pkgdict -def process_pkgdict(cursor, pkgdict): + +def process_pkgdict( + cursor: sqlite3.Cursor, + pkgdict: typing.Dict[ + int, typing.Dict[int, typing.List[typing.Tuple[int, str]]] + ], +) -> None: for pid1, funcdict1 in pkgdict.items(): for fid1, files in funcdict1.items(): numfiles = len(files) @@ -38,7 +56,8 @@ def process_pkgdict(cursor, pkgdict): insert_key = (pid1, pid2, fid1, fid2) add_values(cursor, insert_key, pkgnumfiles, pkgsize) -def main(db): + +def main(db: sqlite3.Connection) -> None: cur = db.cursor() cur.execute("PRAGMA foreign_keys = ON;") cur.execute("DELETE FROM sharing;") @@ -49,7 +68,9 @@ def main(db): for hashvalue, in fetchiter(readcur): cur.execute("SELECT function.eqclass, content.pid, content.id, content.filename, content.size, hash.fid FROM hash JOIN content ON hash.cid = content.id JOIN function ON hash.fid = function.id AND function.eqclass IS NOT NULL WHERE hash = ?;", (hashvalue,)) - rowdict = dict() + rowdict: typing.Dict[ + int, typing.List[typing.Tuple[int, int, str, int, int]] + ] = {} for row in cur.fetchall(): rowdict.setdefault(row[0], []).append(row[1:]) for eqclass, rows in rowdict.items(): |