From d467a2a4e85d4b6f09bd2e3dc70466bfcc45a577 Mon Sep 17 00:00:00 2001
From: Helmut Grohne <helmut@subdivi.de>
Date: Wed, 19 Feb 2014 14:19:56 +0100
Subject: GzipDecompressor: don't treat checksum as garbage trailer

---
 dedup/compression.py | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

(limited to 'dedup/compression.py')

diff --git a/dedup/compression.py b/dedup/compression.py
index 869c49f..4ce258c 100644
--- a/dedup/compression.py
+++ b/dedup/compression.py
@@ -5,8 +5,11 @@ class GzipDecompressor(object):
     """An interface to gzip which is similar to bz2.BZ2Decompressor and
     lzma.LZMADecompressor."""
     def __init__(self):
+        self.sawheader = False
         self.inbuffer = b""
         self.decompressor = None
+        self.crc = 0
+        self.size = 0
 
     def decompress(self, data):
         """
@@ -16,6 +19,8 @@ class GzipDecompressor(object):
         while True:
             if self.decompressor:
                 data = self.decompressor.decompress(data)
+                self.crc = zlib.crc32(data, self.crc)
+                self.size += len(data)
                 unused_data = self.decompressor.unused_data
                 if not unused_data:
                     return data
@@ -45,13 +50,20 @@ class GzipDecompressor(object):
                 return b""
             data = self.inbuffer[skip:]
             self.inbuffer = b""
+            self.sawheader = True
             self.decompressor = zlib.decompressobj(-zlib.MAX_WBITS)
 
     @property
     def unused_data(self):
         if self.decompressor:
             return self.decompressor.unused_data
+        elif not self.sawheader:
+            return self.inbuffer
         else:
+            expect = struct.pack("<ll", self.crc, self.size)
+            if self.inbuffer.startswith(expect) and \
+                    self.inbuffer[len(expect):].replace("\0", "") == "":
+                return b""
             return self.inbuffer
 
     def flush(self):
@@ -67,6 +79,9 @@ class GzipDecompressor(object):
         new.inbuffer = self.inbuffer
         if self.decompressor:
             new.decompressor = self.decompressor.copy()
+        new.sawheader = self.sawheader
+        new.crc = self.crc
+        new.size = self.size
         return new
 
 class DecompressedStream(object):
-- 
cgit v1.2.3