summaryrefslogtreecommitdiff
path: root/importpkg.py
diff options
context:
space:
mode:
Diffstat (limited to 'importpkg.py')
-rwxr-xr-ximportpkg.py13
1 files changed, 9 insertions, 4 deletions
diff --git a/importpkg.py b/importpkg.py
index aae9a7f..5d6a58c 100755
--- a/importpkg.py
+++ b/importpkg.py
@@ -102,10 +102,12 @@ def get_hashes(tar):
continue
hasher = MultiHash(sha512_nontrivial(), gziphash(), imagehash())
hasher = hash_file(hasher, tar.extractfile(elem))
+ hashes = {}
for hashobj in hasher.hashes:
hashvalue = hashobj.hexdigest()
if hashvalue:
- yield (elem.name, elem.size, hashobj.name, hashvalue)
+ hashes[hashobj.name] = hashvalue
+ yield (elem.name, elem.size, hashes)
def process_package(db, filelike):
cur = db.cursor()
@@ -169,14 +171,17 @@ def process_package(db, filelike):
continue
if state != "control_file":
raise ValueError("missing control file")
- for name, size, function, hexhash in get_hashes(tf):
+ for name, size, hashes in get_hashes(tf):
try:
name = name.decode("utf8")
except UnicodeDecodeError:
print("warning: skipping filename with encoding error")
continue # skip files with non-utf8 encoding for now
- cur.execute("INSERT INTO content (package, filename, size, function, hash) VALUES (?, ?, ?, ?, ?);",
- (package, name, size, function, hexhash))
+ cur.execute("INSERT INTO content (package, filename, size) VALUES (?, ?, ?);",
+ (package, name, size))
+ cid = cur.lastrowid
+ cur.executemany("INSERT INTO hash (cid, function, hash) VALUES (?, ?, ?);",
+ ((cid, func, hexhash) for func, hexhash in hashes.items()))
db.commit()
return
raise ValueError("data.tar not found")