summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--dedup/compression.py32
-rwxr-xr-ximportpkg.py13
2 files changed, 35 insertions, 10 deletions
diff --git a/dedup/compression.py b/dedup/compression.py
index 52917e3..5df6613 100644
--- a/dedup/compression.py
+++ b/dedup/compression.py
@@ -1,7 +1,10 @@
+import bz2
import struct
import sys
import zlib
+import lzma
+
crc32_type = "L" if sys.version_info.major >= 3 else "l"
class GzipDecompressor(object):
@@ -151,3 +154,32 @@ class DecompressedStream(object):
self.decompressor = None
self.buff = b""
self.closed = True
+
+decompressors = {
+ '.gz': GzipDecompressor,
+ '.bz2': bz2.BZ2Decompressor,
+ '.lzma': lzma.LZMADecompressor,
+ '.xz': lzma.LZMADecompressor,
+}
+
+def decompress(filelike, extension):
+ """Decompress a stream according to its extension.
+ @param filelike: is a read-only byte-stream. It must support read(size) and
+ close().
+ @param extension: permitted values are "", ".gz", ".bz2", ".lzma", and
+ ".xz"
+ @type extension: str
+ @returns: a read-only byte-stream with the decompressed contents of the
+ original filelike. It supports read(size) and close(). If the
+ original supports seek(pos) and tell(), then it also supports
+ those.
+ @raises ValueError: on unkown extensions
+ """
+ if not extension:
+ return filelike
+ try:
+ decompressor = decompressors[extension]
+ except KeyError:
+ raise ValueError("unknown compression format with extension %r" %
+ extension)
+ return DecompressedStream(filelike, decompressor())
diff --git a/importpkg.py b/importpkg.py
index 01ec87a..dac4bb1 100755
--- a/importpkg.py
+++ b/importpkg.py
@@ -11,13 +11,12 @@ import sys
import tarfile
import zlib
-import lzma
import yaml
from dedup.debpkg import DebExtractor, process_control, get_tar_hashes
from dedup.hashing import DecompressedHash, SuppressingHash, HashedStream, \
HashBlacklistContent
-from dedup.compression import GzipDecompressor, DecompressedStream
+from dedup.compression import GzipDecompressor, decompress
from dedup.image import GIFHash, PNGHash
boring_content = set(("", "\n"))
@@ -44,14 +43,8 @@ def gifhash():
return hashobj
def decompress_tar(filelike, extension):
- if extension in (b".lzma", b".xz"):
- filelike = DecompressedStream(filelike, lzma.LZMADecompressor())
- extension = b""
- if extension not in (b"", b".gz", b".bz2"):
- raise ValueError("unknown compression format with extension %r" %
- extension)
- return tarfile.open(fileobj=filelike,
- mode="r|" + extension[1:].decode("ascii"))
+ filelike = decompress(filelike, extension.decode("ascii"))
+ return tarfile.open(fileobj=filelike, mode="r|")
class ProcessingFinished(Exception):
pass