diff options
Diffstat (limited to 'importpkg.py')
-rwxr-xr-x | importpkg.py | 58 |
1 files changed, 33 insertions, 25 deletions
diff --git a/importpkg.py b/importpkg.py index f72cf03..0798f13 100755 --- a/importpkg.py +++ b/importpkg.py @@ -14,8 +14,7 @@ import zlib import lzma import yaml -from dedup.arreader import ArReader -from dedup.debpkg import process_control, get_tar_hashes +from dedup.debpkg import DebExtractor, process_control, get_tar_hashes from dedup.hashing import DecompressedHash, SuppressingHash, HashedStream, \ HashBlacklistContent from dedup.compression import GzipDecompressor, DecompressedStream @@ -54,42 +53,46 @@ def decompress_tar(filelike, extension): return tarfile.open(fileobj=filelike, mode="r|" + extension[1:].decode("ascii")) -def process_package(filelike, hash_functions, callback): - af = ArReader(filelike) - af.read_magic() - state = "start" - while True: - try: - name = af.read_entry() - except EOFError: - raise ValueError("data.tar not found") +class ProcessingFinished(Exception): + pass + +class ImportpkgExtractor(DebExtractor): + def __init__(self, hash_functions, callback): + self.state = "start" + self.hash_functions = hash_functions + self.callback = callback + + def handle_ar_member(self, name, filelike): if name.startswith(b"control.tar"): - if state != "start": + if self.state != "start": raise ValueError("unexpected control.tar") - state = "control" - tf = decompress_tar(af, name[11:]) + self.state = "control" + tf = decompress_tar(filelike, name[11:]) for elem in tf: if elem.name not in ("./control", "control"): continue - if state != "control": + if self.state != "control": raise ValueError("duplicate control file") - state = "control_file" - callback(process_control(tf.extractfile(elem).read())) + self.state = "control_file" + self.callback(process_control(tf.extractfile(elem).read())) break - continue elif name.startswith(b"data.tar"): - if state != "control_file": + if self.state != "control_file": raise ValueError("missing control file") - state = "data" - tf = decompress_tar(af, name[8:]) - for name, size, hashes in get_tar_hashes(tf, hash_functions): + self.state = "data" + tf = decompress_tar(filelike, name[8:]) + for name, size, hashes in get_tar_hashes(tf, self.hash_functions): try: name = name.decode("utf8") except UnicodeDecodeError: print("warning: skipping filename with encoding error") continue # skip files with non-utf8 encoding for now - callback(dict(name=name, size=size, hashes=hashes)) - break + self.callback(dict(name=name, size=size, hashes=hashes)) + raise ProcessingFinished() + + def handle_ar_end(self): + if self.state != "data": + raise ValueError("data.tar not found") def main(): parser = optparse.OptionParser() @@ -105,7 +108,12 @@ def main(): dumper.open() if options.hash: stdin = HashedStream(stdin, hashlib.sha256()) - process_package(stdin, hash_functions, dumper.represent) + try: + ImportpkgExtractor(hash_functions, dumper.represent).process(stdin) + except ProcessingFinished: + pass + else: + raise RuntimeError("unexpected termination of extractor") if options.hash: stdin.validate(options.hash) dumper.represent("commit") |