diff options
author | Guillem Jover <guillem@debian.org> | 2014-05-07 21:06:38 +0200 |
---|---|---|
committer | Helmut Grohne <helmut@subdivi.de> | 2014-05-11 15:29:11 +0200 |
commit | ddaa08f7a63a1fedf4c1f2804873199dd5182142 (patch) | |
tree | 7e2739619a562f9313141700d9bac6661cd99f26 | |
parent | cb3900603b79731891adbe44a1a1b3eb19f16cad (diff) | |
download | debian-dedup-ddaa08f7a63a1fedf4c1f2804873199dd5182142.tar.gz |
importpkg: add support for control.tar and control.tar.xz
dpkg supports those since 1.17.6.
Signed-off-by: Guillem Jover <guillem@debian.org>
-rw-r--r-- | dedup/debpkg.py | 2 | ||||
-rwxr-xr-x | importpkg.py | 61 |
2 files changed, 39 insertions, 24 deletions
diff --git a/dedup/debpkg.py b/dedup/debpkg.py index 2d67135..dbee849 100644 --- a/dedup/debpkg.py +++ b/dedup/debpkg.py @@ -3,7 +3,7 @@ from debian import deb822 from dedup.hashing import hash_file def process_control(control_contents): - """Parses the contents of a control file from a control.tar.gz of a Debian + """Parses the contents of a control file from a control.tar of a Debian package and returns a dictionary containing the fields relevant to dedup. @type control_contents: bytes @rtype: {str: object} diff --git a/importpkg.py b/importpkg.py index aeccda5..7482c4f 100755 --- a/importpkg.py +++ b/importpkg.py @@ -54,41 +54,56 @@ def process_package(filelike, hash_functions): except EOFError: raise ValueError("data.tar not found") if name == "control.tar.gz": - if state != "start": - raise ValueError("unexpected control.tar.gz") - state = "control" + new_state = "control" tf = tarfile.open(fileobj=af, mode="r|gz") - for elem in tf: - if elem.name != "./control": - continue - if state != "control": - raise ValueError("duplicate control file") - state = "control_file" - yield process_control(tf.extractfile(elem).read()) - break - continue + elif name == "control.tar.xz": + new_state = "control" + zf = DecompressedStream(af, lzma.LZMADecompressor()) + tf = tarfile.open(fileobj=zf, mode="r|") + elif name == "control.tar": + new_state = "control" + tf = tarfile.open(fileobj=af, mode="r|") elif name == "data.tar.gz": + new_state = "data" tf = tarfile.open(fileobj=af, mode="r|gz") elif name == "data.tar.bz2": + new_state = "data" tf = tarfile.open(fileobj=af, mode="r|bz2") elif name == "data.tar.xz": + new_state = "data" zf = DecompressedStream(af, lzma.LZMADecompressor()) tf = tarfile.open(fileobj=zf, mode="r|") elif name == "data.tar": + new_state = "data" tf = tarfile.open(fileobj=af, mode="r|") else: continue - if state != "control_file": - raise ValueError("missing control file") - for name, size, hashes in get_tar_hashes(tf, hash_functions): - try: - name = name.decode("utf8") - except UnicodeDecodeError: - print("warning: skipping filename with encoding error") - continue # skip files with non-utf8 encoding for now - yield dict(name=name, size=size, hashes=hashes) - yield "commit" - break + if new_state == "control": + if state != "start": + raise ValueError("unexpected control.tar") + state = new_state + for elem in tf: + if elem.name != "./control": + continue + if state != "control": + raise ValueError("duplicate control file") + state = "control_file" + yield process_control(tf.extractfile(elem).read()) + break + continue + elif new_state == "data": + if state != "control_file": + raise ValueError("missing control file") + state = new_state + for name, size, hashes in get_tar_hashes(tf, hash_functions): + try: + name = name.decode("utf8") + except UnicodeDecodeError: + print("warning: skipping filename with encoding error") + continue # skip files with non-utf8 encoding for now + yield dict(name=name, size=size, hashes=hashes) + yield "commit" + break def process_package_with_hash(filelike, hash_functions, sha256hash): hstream = HashedStream(filelike, hashlib.sha256()) |