summaryrefslogtreecommitdiff
path: root/dedup/compression.py
diff options
context:
space:
mode:
authorHelmut Grohne <helmut@subdivi.de>2014-03-08 12:39:32 +0100
committerHelmut Grohne <helmut@subdivi.de>2014-03-08 12:39:32 +0100
commitc6a30cefff55cd247a47fa0a2d4f819592e1202b (patch)
tree58b6ff52bc6827782c2973f1ce976e245ce5f34c /dedup/compression.py
parent751f19ec1107c9059ae4834e4b757741ebee6cbd (diff)
parentbb0aea9971bc79d8787d8f034022d0ca803fcab3 (diff)
downloaddebian-dedup-c6a30cefff55cd247a47fa0a2d4f819592e1202b.tar.gz
Merge branch 'master' into sqlalchemy
In the mean time, the master branch evolved quite a bit and the schema changed again (eqclass added to function table). The main reason for the merge is to resolve the large amounts of conflicts once, so development of the sqlalchemy branch can continue and still benefit from changes in the master branch such as schema compatibility, adapting the indent level in web app due to the use of contextlib.closing which resembles sqlalchemy's "with db.begin() as conn:". Conflicts: autoimport.py dedup/utils.py readyaml.py update_sharing.py webapp.py
Diffstat (limited to 'dedup/compression.py')
-rw-r--r--dedup/compression.py15
1 files changed, 15 insertions, 0 deletions
diff --git a/dedup/compression.py b/dedup/compression.py
index 869c49f..4ce258c 100644
--- a/dedup/compression.py
+++ b/dedup/compression.py
@@ -5,8 +5,11 @@ class GzipDecompressor(object):
"""An interface to gzip which is similar to bz2.BZ2Decompressor and
lzma.LZMADecompressor."""
def __init__(self):
+ self.sawheader = False
self.inbuffer = b""
self.decompressor = None
+ self.crc = 0
+ self.size = 0
def decompress(self, data):
"""
@@ -16,6 +19,8 @@ class GzipDecompressor(object):
while True:
if self.decompressor:
data = self.decompressor.decompress(data)
+ self.crc = zlib.crc32(data, self.crc)
+ self.size += len(data)
unused_data = self.decompressor.unused_data
if not unused_data:
return data
@@ -45,13 +50,20 @@ class GzipDecompressor(object):
return b""
data = self.inbuffer[skip:]
self.inbuffer = b""
+ self.sawheader = True
self.decompressor = zlib.decompressobj(-zlib.MAX_WBITS)
@property
def unused_data(self):
if self.decompressor:
return self.decompressor.unused_data
+ elif not self.sawheader:
+ return self.inbuffer
else:
+ expect = struct.pack("<ll", self.crc, self.size)
+ if self.inbuffer.startswith(expect) and \
+ self.inbuffer[len(expect):].replace("\0", "") == "":
+ return b""
return self.inbuffer
def flush(self):
@@ -67,6 +79,9 @@ class GzipDecompressor(object):
new.inbuffer = self.inbuffer
if self.decompressor:
new.decompressor = self.decompressor.copy()
+ new.sawheader = self.sawheader
+ new.crc = self.crc
+ new.size = self.size
return new
class DecompressedStream(object):