summaryrefslogtreecommitdiff
path: root/importpkg.py
diff options
context:
space:
mode:
Diffstat (limited to 'importpkg.py')
-rwxr-xr-ximportpkg.py58
1 files changed, 33 insertions, 25 deletions
diff --git a/importpkg.py b/importpkg.py
index f72cf03..0798f13 100755
--- a/importpkg.py
+++ b/importpkg.py
@@ -14,8 +14,7 @@ import zlib
import lzma
import yaml
-from dedup.arreader import ArReader
-from dedup.debpkg import process_control, get_tar_hashes
+from dedup.debpkg import DebExtractor, process_control, get_tar_hashes
from dedup.hashing import DecompressedHash, SuppressingHash, HashedStream, \
HashBlacklistContent
from dedup.compression import GzipDecompressor, DecompressedStream
@@ -54,42 +53,46 @@ def decompress_tar(filelike, extension):
return tarfile.open(fileobj=filelike,
mode="r|" + extension[1:].decode("ascii"))
-def process_package(filelike, hash_functions, callback):
- af = ArReader(filelike)
- af.read_magic()
- state = "start"
- while True:
- try:
- name = af.read_entry()
- except EOFError:
- raise ValueError("data.tar not found")
+class ProcessingFinished(Exception):
+ pass
+
+class ImportpkgExtractor(DebExtractor):
+ def __init__(self, hash_functions, callback):
+ self.state = "start"
+ self.hash_functions = hash_functions
+ self.callback = callback
+
+ def handle_ar_member(self, name, filelike):
if name.startswith(b"control.tar"):
- if state != "start":
+ if self.state != "start":
raise ValueError("unexpected control.tar")
- state = "control"
- tf = decompress_tar(af, name[11:])
+ self.state = "control"
+ tf = decompress_tar(filelike, name[11:])
for elem in tf:
if elem.name not in ("./control", "control"):
continue
- if state != "control":
+ if self.state != "control":
raise ValueError("duplicate control file")
- state = "control_file"
- callback(process_control(tf.extractfile(elem).read()))
+ self.state = "control_file"
+ self.callback(process_control(tf.extractfile(elem).read()))
break
- continue
elif name.startswith(b"data.tar"):
- if state != "control_file":
+ if self.state != "control_file":
raise ValueError("missing control file")
- state = "data"
- tf = decompress_tar(af, name[8:])
- for name, size, hashes in get_tar_hashes(tf, hash_functions):
+ self.state = "data"
+ tf = decompress_tar(filelike, name[8:])
+ for name, size, hashes in get_tar_hashes(tf, self.hash_functions):
try:
name = name.decode("utf8")
except UnicodeDecodeError:
print("warning: skipping filename with encoding error")
continue # skip files with non-utf8 encoding for now
- callback(dict(name=name, size=size, hashes=hashes))
- break
+ self.callback(dict(name=name, size=size, hashes=hashes))
+ raise ProcessingFinished()
+
+ def handle_ar_end(self):
+ if self.state != "data":
+ raise ValueError("data.tar not found")
def main():
parser = optparse.OptionParser()
@@ -105,7 +108,12 @@ def main():
dumper.open()
if options.hash:
stdin = HashedStream(stdin, hashlib.sha256())
- process_package(stdin, hash_functions, dumper.represent)
+ try:
+ ImportpkgExtractor(hash_functions, dumper.represent).process(stdin)
+ except ProcessingFinished:
+ pass
+ else:
+ raise RuntimeError("unexpected termination of extractor")
if options.hash:
stdin.validate(options.hash)
dumper.represent("commit")