diff options
author | Helmut Grohne <helmut@subdivi.de> | 2014-03-08 12:39:32 +0100 |
---|---|---|
committer | Helmut Grohne <helmut@subdivi.de> | 2014-03-08 12:39:32 +0100 |
commit | c6a30cefff55cd247a47fa0a2d4f819592e1202b (patch) | |
tree | 58b6ff52bc6827782c2973f1ce976e245ce5f34c /dedup/compression.py | |
parent | 751f19ec1107c9059ae4834e4b757741ebee6cbd (diff) | |
parent | bb0aea9971bc79d8787d8f034022d0ca803fcab3 (diff) | |
download | debian-dedup-c6a30cefff55cd247a47fa0a2d4f819592e1202b.tar.gz |
Merge branch 'master' into sqlalchemy
In the mean time, the master branch evolved quite a bit and the schema
changed again (eqclass added to function table). The main reason for the
merge is to resolve the large amounts of conflicts once, so development
of the sqlalchemy branch can continue and still benefit from changes in
the master branch such as schema compatibility, adapting the indent
level in web app due to the use of contextlib.closing which resembles
sqlalchemy's "with db.begin() as conn:".
Conflicts:
autoimport.py
dedup/utils.py
readyaml.py
update_sharing.py
webapp.py
Diffstat (limited to 'dedup/compression.py')
-rw-r--r-- | dedup/compression.py | 15 |
1 files changed, 15 insertions, 0 deletions
diff --git a/dedup/compression.py b/dedup/compression.py index 869c49f..4ce258c 100644 --- a/dedup/compression.py +++ b/dedup/compression.py @@ -5,8 +5,11 @@ class GzipDecompressor(object): """An interface to gzip which is similar to bz2.BZ2Decompressor and lzma.LZMADecompressor.""" def __init__(self): + self.sawheader = False self.inbuffer = b"" self.decompressor = None + self.crc = 0 + self.size = 0 def decompress(self, data): """ @@ -16,6 +19,8 @@ class GzipDecompressor(object): while True: if self.decompressor: data = self.decompressor.decompress(data) + self.crc = zlib.crc32(data, self.crc) + self.size += len(data) unused_data = self.decompressor.unused_data if not unused_data: return data @@ -45,13 +50,20 @@ class GzipDecompressor(object): return b"" data = self.inbuffer[skip:] self.inbuffer = b"" + self.sawheader = True self.decompressor = zlib.decompressobj(-zlib.MAX_WBITS) @property def unused_data(self): if self.decompressor: return self.decompressor.unused_data + elif not self.sawheader: + return self.inbuffer else: + expect = struct.pack("<ll", self.crc, self.size) + if self.inbuffer.startswith(expect) and \ + self.inbuffer[len(expect):].replace("\0", "") == "": + return b"" return self.inbuffer def flush(self): @@ -67,6 +79,9 @@ class GzipDecompressor(object): new.inbuffer = self.inbuffer if self.decompressor: new.decompressor = self.decompressor.copy() + new.sawheader = self.sawheader + new.crc = self.crc + new.size = self.size return new class DecompressedStream(object): |