diff options
Diffstat (limited to 'importpkg.py')
-rwxr-xr-x | importpkg.py | 55 |
1 files changed, 9 insertions, 46 deletions
diff --git a/importpkg.py b/importpkg.py index 1334dd6..54f6181 100755 --- a/importpkg.py +++ b/importpkg.py @@ -11,24 +11,16 @@ import sys import tarfile import zlib -from debian import deb822 import lzma import yaml from dedup.arreader import ArReader +from dedup.debpkg import process_control, get_tar_hashes from dedup.hashing import HashBlacklist, DecompressedHash, SuppressingHash, \ - HashedStream, hash_file + HashedStream from dedup.compression import GzipDecompressor, DecompressedStream from dedup.image import GIFHash, PNGHash -class MultiHash(object): - def __init__(self, *hashes): - self.hashes = hashes - - def update(self, data): - for hasher in self.hashes: - hasher.update(data) - boring_sha512_hashes = set(( # "" "cf83e1357eefb8bdf1542850d66d8007d620e4050b5715dc83f4a921d36ce9ce47d0d13c5d85f2b0ff8318d2877eec2f63b931bd47417a81a538327af927da3e", @@ -57,37 +49,7 @@ def gifhash(): hashobj.name = "gif_sha512" return hashobj -def get_hashes(tar): - for elem in tar: - if not elem.isreg(): # excludes hard links as well - continue - hasher = MultiHash(sha512_nontrivial(), gziphash(), pnghash(), - gifhash()) - hasher = hash_file(hasher, tar.extractfile(elem)) - hashes = {} - for hashobj in hasher.hashes: - hashvalue = hashobj.hexdigest() - if hashvalue: - hashes[hashobj.name] = hashvalue - yield (elem.name, elem.size, hashes) - -def process_control(control_contents): - control = deb822.Packages(control_contents) - package = control["package"].encode("ascii") - try: - source = control["source"].encode("ascii").split()[0] - except KeyError: - source = package - version = control["version"].encode("ascii") - architecture = control["architecture"].encode("ascii") - - depends = set(dep[0]["name"].encode("ascii") - for dep in control.relations.get("depends", ()) - if len(dep) == 1) - return dict(package=package, source=source, version=version, - architecture=architecture, depends=depends) - -def process_package(filelike): +def process_package(filelike, hash_functions): af = ArReader(filelike) af.read_magic() state = "start" @@ -123,7 +85,7 @@ def process_package(filelike): continue if state != "control_file": raise ValueError("missing control file") - for name, size, hashes in get_hashes(tf): + for name, size, hashes in get_tar_hashes(tf, hash_functions): try: name = name.decode("utf8") except UnicodeDecodeError: @@ -133,9 +95,9 @@ def process_package(filelike): yield "commit" break -def process_package_with_hash(filelike, sha256hash): +def process_package_with_hash(filelike, hash_functions, sha256hash): hstream = HashedStream(filelike, hashlib.sha256()) - for elem in process_package(hstream): + for elem in process_package(hstream, hash_functions): if elem == "commit": while hstream.read(4096): pass @@ -150,10 +112,11 @@ def main(): parser.add_option("-H", "--hash", action="store", help="verify that stdin hash given sha256 hash") options, args = parser.parse_args() + hash_functions = [sha512_nontrivial, gziphash, pnghash, gifhash] if options.hash: - gen = process_package_with_hash(sys.stdin, options.hash) + gen = process_package_with_hash(sys.stdin, hash_functions, options.hash) else: - gen = process_package(sys.stdin) + gen = process_package(sys.stdin, hash_functions) yaml.safe_dump_all(gen, sys.stdout) if __name__ == "__main__": |