summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorHelmut Grohne <helmut@subdivi.de>2014-02-21 22:05:30 +0100
committerHelmut Grohne <helmut@subdivi.de>2014-02-21 22:05:30 +0100
commitf07c4642083e19c90a180b7bc407d54f341a1909 (patch)
tree13a14f3d70b854bce140e5fdd752ba45d14d4938
parent7389e4b00f6add611e8d6b318654056097d6d546 (diff)
downloaddebian-dedup-f07c4642083e19c90a180b7bc407d54f341a1909.tar.gz
add a "decompressor" that collapses consecutive spaces
-rw-r--r--dedup/compression.py25
1 files changed, 25 insertions, 0 deletions
diff --git a/dedup/compression.py b/dedup/compression.py
index 4ce258c..f0349cf 100644
--- a/dedup/compression.py
+++ b/dedup/compression.py
@@ -1,5 +1,6 @@
import struct
import zlib
+import re
class GzipDecompressor(object):
"""An interface to gzip which is similar to bz2.BZ2Decompressor and
@@ -116,3 +117,27 @@ class DecompressedStream(object):
self.buff += self.decompressor.decompress(data)
else:
self.buff += self.decompressor.flush()
+
+class SpaceCompressor(object):
+ """Not actually a compresor. It squeezes spaces."""
+ spacerc = re.compile(r"\s+")
+ unused_data = ""
+
+ def __init__(self):
+ self.lastspace = False
+
+ def decompress(self, data):
+ data = self.spacerc.sub(" ", data)
+ newlastspace = data[-1:] == " "
+ if self.lastspace and data[0:1] == " ":
+ data = data[1:]
+ self.lastspace = newlastspace
+ return data
+
+ def flush(self):
+ return ""
+
+ def copy(self):
+ new = SpaceCompressor()
+ new.lastspace = self.lastspace
+ return new