summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGuillem Jover <guillem@debian.org>2014-05-07 21:06:38 +0200
committerHelmut Grohne <helmut@subdivi.de>2014-05-11 15:29:11 +0200
commitddaa08f7a63a1fedf4c1f2804873199dd5182142 (patch)
tree7e2739619a562f9313141700d9bac6661cd99f26
parentcb3900603b79731891adbe44a1a1b3eb19f16cad (diff)
downloaddebian-dedup-ddaa08f7a63a1fedf4c1f2804873199dd5182142.tar.gz
importpkg: add support for control.tar and control.tar.xz
dpkg supports those since 1.17.6. Signed-off-by: Guillem Jover <guillem@debian.org>
-rw-r--r--dedup/debpkg.py2
-rwxr-xr-ximportpkg.py61
2 files changed, 39 insertions, 24 deletions
diff --git a/dedup/debpkg.py b/dedup/debpkg.py
index 2d67135..dbee849 100644
--- a/dedup/debpkg.py
+++ b/dedup/debpkg.py
@@ -3,7 +3,7 @@ from debian import deb822
from dedup.hashing import hash_file
def process_control(control_contents):
- """Parses the contents of a control file from a control.tar.gz of a Debian
+ """Parses the contents of a control file from a control.tar of a Debian
package and returns a dictionary containing the fields relevant to dedup.
@type control_contents: bytes
@rtype: {str: object}
diff --git a/importpkg.py b/importpkg.py
index aeccda5..7482c4f 100755
--- a/importpkg.py
+++ b/importpkg.py
@@ -54,41 +54,56 @@ def process_package(filelike, hash_functions):
except EOFError:
raise ValueError("data.tar not found")
if name == "control.tar.gz":
- if state != "start":
- raise ValueError("unexpected control.tar.gz")
- state = "control"
+ new_state = "control"
tf = tarfile.open(fileobj=af, mode="r|gz")
- for elem in tf:
- if elem.name != "./control":
- continue
- if state != "control":
- raise ValueError("duplicate control file")
- state = "control_file"
- yield process_control(tf.extractfile(elem).read())
- break
- continue
+ elif name == "control.tar.xz":
+ new_state = "control"
+ zf = DecompressedStream(af, lzma.LZMADecompressor())
+ tf = tarfile.open(fileobj=zf, mode="r|")
+ elif name == "control.tar":
+ new_state = "control"
+ tf = tarfile.open(fileobj=af, mode="r|")
elif name == "data.tar.gz":
+ new_state = "data"
tf = tarfile.open(fileobj=af, mode="r|gz")
elif name == "data.tar.bz2":
+ new_state = "data"
tf = tarfile.open(fileobj=af, mode="r|bz2")
elif name == "data.tar.xz":
+ new_state = "data"
zf = DecompressedStream(af, lzma.LZMADecompressor())
tf = tarfile.open(fileobj=zf, mode="r|")
elif name == "data.tar":
+ new_state = "data"
tf = tarfile.open(fileobj=af, mode="r|")
else:
continue
- if state != "control_file":
- raise ValueError("missing control file")
- for name, size, hashes in get_tar_hashes(tf, hash_functions):
- try:
- name = name.decode("utf8")
- except UnicodeDecodeError:
- print("warning: skipping filename with encoding error")
- continue # skip files with non-utf8 encoding for now
- yield dict(name=name, size=size, hashes=hashes)
- yield "commit"
- break
+ if new_state == "control":
+ if state != "start":
+ raise ValueError("unexpected control.tar")
+ state = new_state
+ for elem in tf:
+ if elem.name != "./control":
+ continue
+ if state != "control":
+ raise ValueError("duplicate control file")
+ state = "control_file"
+ yield process_control(tf.extractfile(elem).read())
+ break
+ continue
+ elif new_state == "data":
+ if state != "control_file":
+ raise ValueError("missing control file")
+ state = new_state
+ for name, size, hashes in get_tar_hashes(tf, hash_functions):
+ try:
+ name = name.decode("utf8")
+ except UnicodeDecodeError:
+ print("warning: skipping filename with encoding error")
+ continue # skip files with non-utf8 encoding for now
+ yield dict(name=name, size=size, hashes=hashes)
+ yield "commit"
+ break
def process_package_with_hash(filelike, hash_functions, sha256hash):
hstream = HashedStream(filelike, hashlib.sha256())