summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorHelmut Grohne <helmut@subdivi.de>2016-04-28 20:28:11 +0200
committerHelmut Grohne <helmut@subdivi.de>2016-04-28 20:28:11 +0200
commita24b9125ae91cb26e56bac3752b6e38e1dbf264e (patch)
treea27f0e48dbf877b460d8d735a65f2ed183113594
parentcf5e9f8c9714361042147889ab4b24225c4d07a0 (diff)
downloaddebian-dedup-a24b9125ae91cb26e56bac3752b6e38e1dbf264e.tar.gz
extend functionality of DecompressedStream
It now supports: * tell() * seek(absolute_position), forward only * close() * closed This is sufficient for putting it as a fileobj into tarfile.TarFile. By doing so we can decouple decompression from tar processing, which eases papering over the Python 2.x vs Python 3.x differences.
-rw-r--r--dedup/compression.py40
1 files changed, 36 insertions, 4 deletions
diff --git a/dedup/compression.py b/dedup/compression.py
index 4fd7320..52917e3 100644
--- a/dedup/compression.py
+++ b/dedup/compression.py
@@ -88,8 +88,8 @@ class GzipDecompressor(object):
return new
class DecompressedStream(object):
- """Turn a readable file-like into a decompressed file-like. Te only part
- of being file-like consists of the read(size) method in both cases."""
+ """Turn a readable file-like into a decompressed file-like. It supports
+ read(optional length), tell, seek(forward only) and close."""
blocksize = 65536
def __init__(self, fileobj, decompressor):
@@ -102,20 +102,52 @@ class DecompressedStream(object):
self.fileobj = fileobj
self.decompressor = decompressor
self.buff = b""
+ self.pos = 0
+ self.closed = False
def read(self, length=None):
+ assert not self.closed
data = True
while True:
if length is not None and len(self.buff) >= length:
ret = self.buff[:length]
self.buff = self.buff[length:]
- return ret
+ break
elif not data: # read EOF in last iteration
ret = self.buff
self.buff = b""
- return ret
+ break
data = self.fileobj.read(self.blocksize)
if data:
self.buff += self.decompressor.decompress(data)
else:
self.buff += self.decompressor.flush()
+ self.pos += len(ret)
+ return ret
+
+ def tell(self):
+ assert not self.closed
+ return self.pos
+
+ def seek(self, pos):
+ """Forward seeks by absolute position only."""
+ assert not self.closed
+ if pos < self.pos:
+ raise ValueError("negative seek not allowed on decompressed stream")
+ while True:
+ left = pos - self.pos
+ # Reading self.buff entirely avoids string concatenation.
+ size = len(self.buff) or self.blocksize
+ if left > size:
+ self.read(size)
+ else:
+ self.read(left)
+ return
+
+ def close(self):
+ if not self.closed:
+ self.fileobj.close()
+ self.fileobj = None
+ self.decompressor = None
+ self.buff = b""
+ self.closed = True