summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorHelmut Grohne <helmut@subdivi.de>2014-02-19 14:19:56 +0100
committerHelmut Grohne <helmut@subdivi.de>2014-02-19 14:19:56 +0100
commitd467a2a4e85d4b6f09bd2e3dc70466bfcc45a577 (patch)
treec82e409a3e9b104b8d20569cfe72e895e8b8d128
parent17597b5e828f9bbc9b0159102b173c284c23a140 (diff)
downloaddebian-dedup-d467a2a4e85d4b6f09bd2e3dc70466bfcc45a577.tar.gz
GzipDecompressor: don't treat checksum as garbage trailer
-rw-r--r--dedup/compression.py15
1 files changed, 15 insertions, 0 deletions
diff --git a/dedup/compression.py b/dedup/compression.py
index 869c49f..4ce258c 100644
--- a/dedup/compression.py
+++ b/dedup/compression.py
@@ -5,8 +5,11 @@ class GzipDecompressor(object):
"""An interface to gzip which is similar to bz2.BZ2Decompressor and
lzma.LZMADecompressor."""
def __init__(self):
+ self.sawheader = False
self.inbuffer = b""
self.decompressor = None
+ self.crc = 0
+ self.size = 0
def decompress(self, data):
"""
@@ -16,6 +19,8 @@ class GzipDecompressor(object):
while True:
if self.decompressor:
data = self.decompressor.decompress(data)
+ self.crc = zlib.crc32(data, self.crc)
+ self.size += len(data)
unused_data = self.decompressor.unused_data
if not unused_data:
return data
@@ -45,13 +50,20 @@ class GzipDecompressor(object):
return b""
data = self.inbuffer[skip:]
self.inbuffer = b""
+ self.sawheader = True
self.decompressor = zlib.decompressobj(-zlib.MAX_WBITS)
@property
def unused_data(self):
if self.decompressor:
return self.decompressor.unused_data
+ elif not self.sawheader:
+ return self.inbuffer
else:
+ expect = struct.pack("<ll", self.crc, self.size)
+ if self.inbuffer.startswith(expect) and \
+ self.inbuffer[len(expect):].replace("\0", "") == "":
+ return b""
return self.inbuffer
def flush(self):
@@ -67,6 +79,9 @@ class GzipDecompressor(object):
new.inbuffer = self.inbuffer
if self.decompressor:
new.decompressor = self.decompressor.copy()
+ new.sawheader = self.sawheader
+ new.crc = self.crc
+ new.size = self.size
return new
class DecompressedStream(object):