summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--dedup/compression.py25
1 files changed, 25 insertions, 0 deletions
diff --git a/dedup/compression.py b/dedup/compression.py
index 4ce258c..f0349cf 100644
--- a/dedup/compression.py
+++ b/dedup/compression.py
@@ -1,5 +1,6 @@
import struct
import zlib
+import re
class GzipDecompressor(object):
"""An interface to gzip which is similar to bz2.BZ2Decompressor and
@@ -116,3 +117,27 @@ class DecompressedStream(object):
self.buff += self.decompressor.decompress(data)
else:
self.buff += self.decompressor.flush()
+
+class SpaceCompressor(object):
+ """Not actually a compresor. It squeezes spaces."""
+ spacerc = re.compile(r"\s+")
+ unused_data = ""
+
+ def __init__(self):
+ self.lastspace = False
+
+ def decompress(self, data):
+ data = self.spacerc.sub(" ", data)
+ newlastspace = data[-1:] == " "
+ if self.lastspace and data[0:1] == " ":
+ data = data[1:]
+ self.lastspace = newlastspace
+ return data
+
+ def flush(self):
+ return ""
+
+ def copy(self):
+ new = SpaceCompressor()
+ new.lastspace = self.lastspace
+ return new