diff options
-rw-r--r-- | dedup/compression.py | 32 | ||||
-rwxr-xr-x | importpkg.py | 13 |
2 files changed, 35 insertions, 10 deletions
diff --git a/dedup/compression.py b/dedup/compression.py index 52917e3..5df6613 100644 --- a/dedup/compression.py +++ b/dedup/compression.py @@ -1,7 +1,10 @@ +import bz2 import struct import sys import zlib +import lzma + crc32_type = "L" if sys.version_info.major >= 3 else "l" class GzipDecompressor(object): @@ -151,3 +154,32 @@ class DecompressedStream(object): self.decompressor = None self.buff = b"" self.closed = True + +decompressors = { + '.gz': GzipDecompressor, + '.bz2': bz2.BZ2Decompressor, + '.lzma': lzma.LZMADecompressor, + '.xz': lzma.LZMADecompressor, +} + +def decompress(filelike, extension): + """Decompress a stream according to its extension. + @param filelike: is a read-only byte-stream. It must support read(size) and + close(). + @param extension: permitted values are "", ".gz", ".bz2", ".lzma", and + ".xz" + @type extension: str + @returns: a read-only byte-stream with the decompressed contents of the + original filelike. It supports read(size) and close(). If the + original supports seek(pos) and tell(), then it also supports + those. + @raises ValueError: on unkown extensions + """ + if not extension: + return filelike + try: + decompressor = decompressors[extension] + except KeyError: + raise ValueError("unknown compression format with extension %r" % + extension) + return DecompressedStream(filelike, decompressor()) diff --git a/importpkg.py b/importpkg.py index 01ec87a..dac4bb1 100755 --- a/importpkg.py +++ b/importpkg.py @@ -11,13 +11,12 @@ import sys import tarfile import zlib -import lzma import yaml from dedup.debpkg import DebExtractor, process_control, get_tar_hashes from dedup.hashing import DecompressedHash, SuppressingHash, HashedStream, \ HashBlacklistContent -from dedup.compression import GzipDecompressor, DecompressedStream +from dedup.compression import GzipDecompressor, decompress from dedup.image import GIFHash, PNGHash boring_content = set(("", "\n")) @@ -44,14 +43,8 @@ def gifhash(): return hashobj def decompress_tar(filelike, extension): - if extension in (b".lzma", b".xz"): - filelike = DecompressedStream(filelike, lzma.LZMADecompressor()) - extension = b"" - if extension not in (b"", b".gz", b".bz2"): - raise ValueError("unknown compression format with extension %r" % - extension) - return tarfile.open(fileobj=filelike, - mode="r|" + extension[1:].decode("ascii")) + filelike = decompress(filelike, extension.decode("ascii")) + return tarfile.open(fileobj=filelike, mode="r|") class ProcessingFinished(Exception): pass |