summaryrefslogtreecommitdiff
path: root/importpkg.py
diff options
context:
space:
mode:
Diffstat (limited to 'importpkg.py')
-rwxr-xr-ximportpkg.py45
1 files changed, 38 insertions, 7 deletions
diff --git a/importpkg.py b/importpkg.py
index 56e03ae..182ca01 100755
--- a/importpkg.py
+++ b/importpkg.py
@@ -6,6 +6,7 @@ document contains package metadata. Then a document is emitted for each file.
And finally a document consisting of the string "commit" is emitted."""
import hashlib
+import optparse
import sys
import tarfile
import zlib
@@ -15,9 +16,10 @@ import lzma
import yaml
from dedup.arreader import ArReader
-from dedup.hashing import HashBlacklist, DecompressedHash, SuppressingHash, hash_file
+from dedup.hashing import HashBlacklist, DecompressedHash, SuppressingHash, \
+ HashedStream, hash_file
from dedup.compression import GzipDecompressor, DecompressedStream
-from dedup.image import ImageHash
+from dedup.image import GIFHash, PNGHash
class MultiHash(object):
def __init__(self, *hashes):
@@ -42,17 +44,24 @@ def gziphash():
hashobj.name = "gzip_sha512"
return HashBlacklist(hashobj, boring_sha512_hashes)
-def imagehash():
- hashobj = ImageHash(hashlib.sha512())
+def pnghash():
+ hashobj = PNGHash(hashlib.sha512())
hashobj = SuppressingHash(hashobj, (ValueError,))
- hashobj.name = "image_sha512"
+ hashobj.name = "png_sha512"
+ return hashobj
+
+def gifhash():
+ hashobj = GIFHash(hashlib.sha512())
+ hashobj = SuppressingHash(hashobj, (ValueError,))
+ hashobj.name = "gif_sha512"
return hashobj
def get_hashes(tar):
for elem in tar:
if not elem.isreg(): # excludes hard links as well
continue
- hasher = MultiHash(sha512_nontrivial(), gziphash(), imagehash())
+ hasher = MultiHash(sha512_nontrivial(), gziphash(), pnghash(),
+ gifhash())
hasher = hash_file(hasher, tar.extractfile(elem))
hashes = {}
for hashobj in hasher.hashes:
@@ -107,6 +116,8 @@ def process_package(filelike):
elif name == "data.tar.xz":
zf = DecompressedStream(af, lzma.LZMADecompressor())
tf = tarfile.open(fileobj=zf, mode="r|")
+ elif name == "data.tar":
+ tf = tarfile.open(fileobj=af, mode="r|")
else:
continue
if state != "control_file":
@@ -121,8 +132,28 @@ def process_package(filelike):
yield "commit"
break
+def process_package_with_hash(filelike, sha256hash):
+ hstream = HashedStream(filelike, hashlib.sha256())
+ for elem in process_package(hstream):
+ if elem == "commit":
+ while hstream.read(4096):
+ pass
+ if hstream.hexdigest() != sha256hash:
+ raise ValueError("hash sum mismatch")
+ yield elem
+ break
+ yield elem
+
def main():
- yaml.safe_dump_all(process_package(sys.stdin), sys.stdout)
+ parser = optparse.OptionParser()
+ parser.add_option("-H", "--hash", action="store",
+ help="verify that stdin hash given sha256 hash")
+ options, args = parser.parse_args()
+ if options.hash:
+ gen = process_package_with_hash(sys.stdin, options.hash)
+ else:
+ gen = process_package(sys.stdin)
+ yaml.safe_dump_all(gen, sys.stdout)
if __name__ == "__main__":
main()