summaryrefslogtreecommitdiff
path: root/dedup/compression.py
diff options
context:
space:
mode:
authorHelmut Grohne <helmut@subdivi.de>2014-07-22 08:56:42 +0200
committerHelmut Grohne <helmut@subdivi.de>2014-07-22 08:56:42 +0200
commit04597f25729740406775a3dff528c9774c84efd5 (patch)
treefe905fc94afbdcfad60d5aaf88886a1f10f92a8c /dedup/compression.py
parentba9ae116e0bbb25e2df327ba48c82472ccfa2690 (diff)
parentd48c3c208ee6ba54225b3eb68ce5c9f3c894bfa4 (diff)
downloaddebian-dedup-04597f25729740406775a3dff528c9774c84efd5.tar.gz
Merge branch master into multiarch
Resolve accumulated conflicts. In particular webapp.py gained a few non-trivial ones, such as changes in InternalRedirect or usage of contextlib.closing. Conflicts: schema.sql webapp.py
Diffstat (limited to 'dedup/compression.py')
-rw-r--r--dedup/compression.py15
1 files changed, 15 insertions, 0 deletions
diff --git a/dedup/compression.py b/dedup/compression.py
index 869c49f..4ce258c 100644
--- a/dedup/compression.py
+++ b/dedup/compression.py
@@ -5,8 +5,11 @@ class GzipDecompressor(object):
"""An interface to gzip which is similar to bz2.BZ2Decompressor and
lzma.LZMADecompressor."""
def __init__(self):
+ self.sawheader = False
self.inbuffer = b""
self.decompressor = None
+ self.crc = 0
+ self.size = 0
def decompress(self, data):
"""
@@ -16,6 +19,8 @@ class GzipDecompressor(object):
while True:
if self.decompressor:
data = self.decompressor.decompress(data)
+ self.crc = zlib.crc32(data, self.crc)
+ self.size += len(data)
unused_data = self.decompressor.unused_data
if not unused_data:
return data
@@ -45,13 +50,20 @@ class GzipDecompressor(object):
return b""
data = self.inbuffer[skip:]
self.inbuffer = b""
+ self.sawheader = True
self.decompressor = zlib.decompressobj(-zlib.MAX_WBITS)
@property
def unused_data(self):
if self.decompressor:
return self.decompressor.unused_data
+ elif not self.sawheader:
+ return self.inbuffer
else:
+ expect = struct.pack("<ll", self.crc, self.size)
+ if self.inbuffer.startswith(expect) and \
+ self.inbuffer[len(expect):].replace("\0", "") == "":
+ return b""
return self.inbuffer
def flush(self):
@@ -67,6 +79,9 @@ class GzipDecompressor(object):
new.inbuffer = self.inbuffer
if self.decompressor:
new.decompressor = self.decompressor.copy()
+ new.sawheader = self.sawheader
+ new.crc = self.crc
+ new.size = self.size
return new
class DecompressedStream(object):