diff options
author | Helmut Grohne <helmut@subdivi.de> | 2021-12-30 17:52:38 +0100 |
---|---|---|
committer | Helmut Grohne <helmut@subdivi.de> | 2021-12-30 17:52:38 +0100 |
commit | 0b4882ecf657d70dd3236dcf176e083bf08dccdd (patch) | |
tree | c688aa1964a111c75f7035fa243df16030ba3ba1 | |
parent | 35c22db308a91e82ed4a5f6a9c937c186d81d810 (diff) | |
download | debian-dedup-0b4882ecf657d70dd3236dcf176e083bf08dccdd.tar.gz |
DecompressedStream: improve performance
When the decompression ratio is huge, we may be faced with a large
(multiple megabytes) bytes object. Slicing that object incurs a copy
becomes O(n^2) while appending and trimming a bytearray is much faster.
-rw-r--r-- | dedup/compression.py | 8 |
1 files changed, 4 insertions, 4 deletions
diff --git a/dedup/compression.py b/dedup/compression.py index 6d361ac..da6e9a0 100644 --- a/dedup/compression.py +++ b/dedup/compression.py @@ -101,7 +101,7 @@ class DecompressedStream: """ self.fileobj = fileobj self.decompressor = decompressor - self.buff = b"" + self.buff = bytearray() self.pos = 0 def _fill_buff_until(self, predicate): @@ -116,8 +116,8 @@ class DecompressedStream: break def _read_from_buff(self, length): - ret = self.buff[:length] - self.buff = self.buff[length:] + ret = bytes(self.buff[:length]) + self.buff[:length] = b"" self.pos += length return ret @@ -164,7 +164,7 @@ class DecompressedStream: self.fileobj.close() self.fileobj = None self.decompressor = None - self.buff = b"" + self.buff = bytearray() decompressors = { '.gz': GzipDecompressor, |