diff options
author | Helmut Grohne <helmut@subdivi.de> | 2013-08-02 08:40:49 +0200 |
---|---|---|
committer | Helmut Grohne <helmut@subdivi.de> | 2013-08-02 08:40:49 +0200 |
commit | cb3708825bf7ea32314040575cef35980dad0cd8 (patch) | |
tree | 31575a8525dc90ba6904268d94f47e1604bf0557 /importpkg.py | |
parent | a4bbbb6e664e605634cb3f9e0564c7e4a93697be (diff) | |
parent | 2712edb550968ce7ec8cd9800241d7944666631a (diff) | |
download | debian-dedup-cb3708825bf7ea32314040575cef35980dad0cd8.tar.gz |
Merge branch master into sqlalchemy
This makes the sqlalchemy branch schema-compatible with master again.
The biggest change on master was the introduction of the function table.
It caused most of the conflicts. Note that webapp had one conflict not
detected by git: The selecting of issues in show_package needed
sqlalchemy conversion.
Conflicts:
README
update_sharing.py
webapp.py
Diffstat (limited to 'importpkg.py')
-rwxr-xr-x | importpkg.py | 45 |
1 files changed, 38 insertions, 7 deletions
diff --git a/importpkg.py b/importpkg.py index 56e03ae..182ca01 100755 --- a/importpkg.py +++ b/importpkg.py @@ -6,6 +6,7 @@ document contains package metadata. Then a document is emitted for each file. And finally a document consisting of the string "commit" is emitted.""" import hashlib +import optparse import sys import tarfile import zlib @@ -15,9 +16,10 @@ import lzma import yaml from dedup.arreader import ArReader -from dedup.hashing import HashBlacklist, DecompressedHash, SuppressingHash, hash_file +from dedup.hashing import HashBlacklist, DecompressedHash, SuppressingHash, \ + HashedStream, hash_file from dedup.compression import GzipDecompressor, DecompressedStream -from dedup.image import ImageHash +from dedup.image import GIFHash, PNGHash class MultiHash(object): def __init__(self, *hashes): @@ -42,17 +44,24 @@ def gziphash(): hashobj.name = "gzip_sha512" return HashBlacklist(hashobj, boring_sha512_hashes) -def imagehash(): - hashobj = ImageHash(hashlib.sha512()) +def pnghash(): + hashobj = PNGHash(hashlib.sha512()) hashobj = SuppressingHash(hashobj, (ValueError,)) - hashobj.name = "image_sha512" + hashobj.name = "png_sha512" + return hashobj + +def gifhash(): + hashobj = GIFHash(hashlib.sha512()) + hashobj = SuppressingHash(hashobj, (ValueError,)) + hashobj.name = "gif_sha512" return hashobj def get_hashes(tar): for elem in tar: if not elem.isreg(): # excludes hard links as well continue - hasher = MultiHash(sha512_nontrivial(), gziphash(), imagehash()) + hasher = MultiHash(sha512_nontrivial(), gziphash(), pnghash(), + gifhash()) hasher = hash_file(hasher, tar.extractfile(elem)) hashes = {} for hashobj in hasher.hashes: @@ -107,6 +116,8 @@ def process_package(filelike): elif name == "data.tar.xz": zf = DecompressedStream(af, lzma.LZMADecompressor()) tf = tarfile.open(fileobj=zf, mode="r|") + elif name == "data.tar": + tf = tarfile.open(fileobj=af, mode="r|") else: continue if state != "control_file": @@ -121,8 +132,28 @@ def process_package(filelike): yield "commit" break +def process_package_with_hash(filelike, sha256hash): + hstream = HashedStream(filelike, hashlib.sha256()) + for elem in process_package(hstream): + if elem == "commit": + while hstream.read(4096): + pass + if hstream.hexdigest() != sha256hash: + raise ValueError("hash sum mismatch") + yield elem + break + yield elem + def main(): - yaml.safe_dump_all(process_package(sys.stdin), sys.stdout) + parser = optparse.OptionParser() + parser.add_option("-H", "--hash", action="store", + help="verify that stdin hash given sha256 hash") + options, args = parser.parse_args() + if options.hash: + gen = process_package_with_hash(sys.stdin, options.hash) + else: + gen = process_package(sys.stdin) + yaml.safe_dump_all(gen, sys.stdout) if __name__ == "__main__": main() |