summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorHelmut Grohne <helmut@subdivi.de>2021-12-30 17:52:38 +0100
committerHelmut Grohne <helmut@subdivi.de>2021-12-30 17:52:38 +0100
commit0b4882ecf657d70dd3236dcf176e083bf08dccdd (patch)
treec688aa1964a111c75f7035fa243df16030ba3ba1
parent35c22db308a91e82ed4a5f6a9c937c186d81d810 (diff)
downloaddebian-dedup-0b4882ecf657d70dd3236dcf176e083bf08dccdd.tar.gz
DecompressedStream: improve performance
When the decompression ratio is huge, we may be faced with a large (multiple megabytes) bytes object. Slicing that object incurs a copy becomes O(n^2) while appending and trimming a bytearray is much faster.
-rw-r--r--dedup/compression.py8
1 files changed, 4 insertions, 4 deletions
diff --git a/dedup/compression.py b/dedup/compression.py
index 6d361ac..da6e9a0 100644
--- a/dedup/compression.py
+++ b/dedup/compression.py
@@ -101,7 +101,7 @@ class DecompressedStream:
"""
self.fileobj = fileobj
self.decompressor = decompressor
- self.buff = b""
+ self.buff = bytearray()
self.pos = 0
def _fill_buff_until(self, predicate):
@@ -116,8 +116,8 @@ class DecompressedStream:
break
def _read_from_buff(self, length):
- ret = self.buff[:length]
- self.buff = self.buff[length:]
+ ret = bytes(self.buff[:length])
+ self.buff[:length] = b""
self.pos += length
return ret
@@ -164,7 +164,7 @@ class DecompressedStream:
self.fileobj.close()
self.fileobj = None
self.decompressor = None
- self.buff = b""
+ self.buff = bytearray()
decompressors = {
'.gz': GzipDecompressor,