summaryrefslogtreecommitdiff
path: root/dedup
diff options
context:
space:
mode:
Diffstat (limited to 'dedup')
-rw-r--r--dedup/compression.py40
1 files changed, 36 insertions, 4 deletions
diff --git a/dedup/compression.py b/dedup/compression.py
index 4fd7320..52917e3 100644
--- a/dedup/compression.py
+++ b/dedup/compression.py
@@ -88,8 +88,8 @@ class GzipDecompressor(object):
return new
class DecompressedStream(object):
- """Turn a readable file-like into a decompressed file-like. Te only part
- of being file-like consists of the read(size) method in both cases."""
+ """Turn a readable file-like into a decompressed file-like. It supports
+ read(optional length), tell, seek(forward only) and close."""
blocksize = 65536
def __init__(self, fileobj, decompressor):
@@ -102,20 +102,52 @@ class DecompressedStream(object):
self.fileobj = fileobj
self.decompressor = decompressor
self.buff = b""
+ self.pos = 0
+ self.closed = False
def read(self, length=None):
+ assert not self.closed
data = True
while True:
if length is not None and len(self.buff) >= length:
ret = self.buff[:length]
self.buff = self.buff[length:]
- return ret
+ break
elif not data: # read EOF in last iteration
ret = self.buff
self.buff = b""
- return ret
+ break
data = self.fileobj.read(self.blocksize)
if data:
self.buff += self.decompressor.decompress(data)
else:
self.buff += self.decompressor.flush()
+ self.pos += len(ret)
+ return ret
+
+ def tell(self):
+ assert not self.closed
+ return self.pos
+
+ def seek(self, pos):
+ """Forward seeks by absolute position only."""
+ assert not self.closed
+ if pos < self.pos:
+ raise ValueError("negative seek not allowed on decompressed stream")
+ while True:
+ left = pos - self.pos
+ # Reading self.buff entirely avoids string concatenation.
+ size = len(self.buff) or self.blocksize
+ if left > size:
+ self.read(size)
+ else:
+ self.read(left)
+ return
+
+ def close(self):
+ if not self.closed:
+ self.fileobj.close()
+ self.fileobj = None
+ self.decompressor = None
+ self.buff = b""
+ self.closed = True