From a24b9125ae91cb26e56bac3752b6e38e1dbf264e Mon Sep 17 00:00:00 2001 From: Helmut Grohne Date: Thu, 28 Apr 2016 20:28:11 +0200 Subject: extend functionality of DecompressedStream It now supports: * tell() * seek(absolute_position), forward only * close() * closed This is sufficient for putting it as a fileobj into tarfile.TarFile. By doing so we can decouple decompression from tar processing, which eases papering over the Python 2.x vs Python 3.x differences. --- dedup/compression.py | 40 ++++++++++++++++++++++++++++++++++++---- 1 file changed, 36 insertions(+), 4 deletions(-) diff --git a/dedup/compression.py b/dedup/compression.py index 4fd7320..52917e3 100644 --- a/dedup/compression.py +++ b/dedup/compression.py @@ -88,8 +88,8 @@ class GzipDecompressor(object): return new class DecompressedStream(object): - """Turn a readable file-like into a decompressed file-like. Te only part - of being file-like consists of the read(size) method in both cases.""" + """Turn a readable file-like into a decompressed file-like. It supports + read(optional length), tell, seek(forward only) and close.""" blocksize = 65536 def __init__(self, fileobj, decompressor): @@ -102,20 +102,52 @@ class DecompressedStream(object): self.fileobj = fileobj self.decompressor = decompressor self.buff = b"" + self.pos = 0 + self.closed = False def read(self, length=None): + assert not self.closed data = True while True: if length is not None and len(self.buff) >= length: ret = self.buff[:length] self.buff = self.buff[length:] - return ret + break elif not data: # read EOF in last iteration ret = self.buff self.buff = b"" - return ret + break data = self.fileobj.read(self.blocksize) if data: self.buff += self.decompressor.decompress(data) else: self.buff += self.decompressor.flush() + self.pos += len(ret) + return ret + + def tell(self): + assert not self.closed + return self.pos + + def seek(self, pos): + """Forward seeks by absolute position only.""" + assert not self.closed + if pos < self.pos: + raise ValueError("negative seek not allowed on decompressed stream") + while True: + left = pos - self.pos + # Reading self.buff entirely avoids string concatenation. + size = len(self.buff) or self.blocksize + if left > size: + self.read(size) + else: + self.read(left) + return + + def close(self): + if not self.closed: + self.fileobj.close() + self.fileobj = None + self.decompressor = None + self.buff = b"" + self.closed = True -- cgit v1.2.3