diff options
author | Helmut Grohne <helmut@subdivi.de> | 2014-02-21 22:05:30 +0100 |
---|---|---|
committer | Helmut Grohne <helmut@subdivi.de> | 2014-02-21 22:05:30 +0100 |
commit | f07c4642083e19c90a180b7bc407d54f341a1909 (patch) | |
tree | 13a14f3d70b854bce140e5fdd752ba45d14d4938 | |
parent | 7389e4b00f6add611e8d6b318654056097d6d546 (diff) | |
download | debian-dedup-f07c4642083e19c90a180b7bc407d54f341a1909.tar.gz |
add a "decompressor" that collapses consecutive spaces
-rw-r--r-- | dedup/compression.py | 25 |
1 files changed, 25 insertions, 0 deletions
diff --git a/dedup/compression.py b/dedup/compression.py index 4ce258c..f0349cf 100644 --- a/dedup/compression.py +++ b/dedup/compression.py @@ -1,5 +1,6 @@ import struct import zlib +import re class GzipDecompressor(object): """An interface to gzip which is similar to bz2.BZ2Decompressor and @@ -116,3 +117,27 @@ class DecompressedStream(object): self.buff += self.decompressor.decompress(data) else: self.buff += self.decompressor.flush() + +class SpaceCompressor(object): + """Not actually a compresor. It squeezes spaces.""" + spacerc = re.compile(r"\s+") + unused_data = "" + + def __init__(self): + self.lastspace = False + + def decompress(self, data): + data = self.spacerc.sub(" ", data) + newlastspace = data[-1:] == " " + if self.lastspace and data[0:1] == " ": + data = data[1:] + self.lastspace = newlastspace + return data + + def flush(self): + return "" + + def copy(self): + new = SpaceCompressor() + new.lastspace = self.lastspace + return new |