From f07c4642083e19c90a180b7bc407d54f341a1909 Mon Sep 17 00:00:00 2001 From: Helmut Grohne Date: Fri, 21 Feb 2014 22:05:30 +0100 Subject: add a "decompressor" that collapses consecutive spaces --- dedup/compression.py | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/dedup/compression.py b/dedup/compression.py index 4ce258c..f0349cf 100644 --- a/dedup/compression.py +++ b/dedup/compression.py @@ -1,5 +1,6 @@ import struct import zlib +import re class GzipDecompressor(object): """An interface to gzip which is similar to bz2.BZ2Decompressor and @@ -116,3 +117,27 @@ class DecompressedStream(object): self.buff += self.decompressor.decompress(data) else: self.buff += self.decompressor.flush() + +class SpaceCompressor(object): + """Not actually a compresor. It squeezes spaces.""" + spacerc = re.compile(r"\s+") + unused_data = "" + + def __init__(self): + self.lastspace = False + + def decompress(self, data): + data = self.spacerc.sub(" ", data) + newlastspace = data[-1:] == " " + if self.lastspace and data[0:1] == " ": + data = data[1:] + self.lastspace = newlastspace + return data + + def flush(self): + return "" + + def copy(self): + new = SpaceCompressor() + new.lastspace = self.lastspace + return new -- cgit v1.2.3