diff options
Diffstat (limited to 'importpkg.py')
-rwxr-xr-x | importpkg.py | 45 |
1 files changed, 38 insertions, 7 deletions
diff --git a/importpkg.py b/importpkg.py index 56e03ae..182ca01 100755 --- a/importpkg.py +++ b/importpkg.py @@ -6,6 +6,7 @@ document contains package metadata. Then a document is emitted for each file. And finally a document consisting of the string "commit" is emitted.""" import hashlib +import optparse import sys import tarfile import zlib @@ -15,9 +16,10 @@ import lzma import yaml from dedup.arreader import ArReader -from dedup.hashing import HashBlacklist, DecompressedHash, SuppressingHash, hash_file +from dedup.hashing import HashBlacklist, DecompressedHash, SuppressingHash, \ + HashedStream, hash_file from dedup.compression import GzipDecompressor, DecompressedStream -from dedup.image import ImageHash +from dedup.image import GIFHash, PNGHash class MultiHash(object): def __init__(self, *hashes): @@ -42,17 +44,24 @@ def gziphash(): hashobj.name = "gzip_sha512" return HashBlacklist(hashobj, boring_sha512_hashes) -def imagehash(): - hashobj = ImageHash(hashlib.sha512()) +def pnghash(): + hashobj = PNGHash(hashlib.sha512()) hashobj = SuppressingHash(hashobj, (ValueError,)) - hashobj.name = "image_sha512" + hashobj.name = "png_sha512" + return hashobj + +def gifhash(): + hashobj = GIFHash(hashlib.sha512()) + hashobj = SuppressingHash(hashobj, (ValueError,)) + hashobj.name = "gif_sha512" return hashobj def get_hashes(tar): for elem in tar: if not elem.isreg(): # excludes hard links as well continue - hasher = MultiHash(sha512_nontrivial(), gziphash(), imagehash()) + hasher = MultiHash(sha512_nontrivial(), gziphash(), pnghash(), + gifhash()) hasher = hash_file(hasher, tar.extractfile(elem)) hashes = {} for hashobj in hasher.hashes: @@ -107,6 +116,8 @@ def process_package(filelike): elif name == "data.tar.xz": zf = DecompressedStream(af, lzma.LZMADecompressor()) tf = tarfile.open(fileobj=zf, mode="r|") + elif name == "data.tar": + tf = tarfile.open(fileobj=af, mode="r|") else: continue if state != "control_file": @@ -121,8 +132,28 @@ def process_package(filelike): yield "commit" break +def process_package_with_hash(filelike, sha256hash): + hstream = HashedStream(filelike, hashlib.sha256()) + for elem in process_package(hstream): + if elem == "commit": + while hstream.read(4096): + pass + if hstream.hexdigest() != sha256hash: + raise ValueError("hash sum mismatch") + yield elem + break + yield elem + def main(): - yaml.safe_dump_all(process_package(sys.stdin), sys.stdout) + parser = optparse.OptionParser() + parser.add_option("-H", "--hash", action="store", + help="verify that stdin hash given sha256 hash") + options, args = parser.parse_args() + if options.hash: + gen = process_package_with_hash(sys.stdin, options.hash) + else: + gen = process_package(sys.stdin) + yaml.safe_dump_all(gen, sys.stdout) if __name__ == "__main__": main() |