From 27b95909f061ae3ecb3ba1b8d46adfef98ca5e6f Mon Sep 17 00:00:00 2001 From: Helmut Grohne Date: Sun, 16 Feb 2020 08:21:20 +0100 Subject: drop support for Python 2.x --- dedup/compression.py | 17 +++++++---------- 1 file changed, 7 insertions(+), 10 deletions(-) (limited to 'dedup/compression.py') diff --git a/dedup/compression.py b/dedup/compression.py index 8d1912b..161eda2 100644 --- a/dedup/compression.py +++ b/dedup/compression.py @@ -1,13 +1,10 @@ import bz2 import struct -import sys import zlib import lzma -crc32_type = "L" if sys.version_info.major >= 3 else "l" - -class GzipDecompressor(object): +class GzipDecompressor: """An interface to gzip which is similar to bz2.BZ2Decompressor and lzma.LZMADecompressor.""" def __init__(self): @@ -66,7 +63,7 @@ class GzipDecompressor(object): elif not self.sawheader: return self.inbuffer else: - expect = struct.pack("<" + crc32_type + "L", self.crc, self.size) + expect = struct.pack(" Date: Wed, 29 Dec 2021 12:00:26 +0100 Subject: DecompressedStream: eliminate redundant closed field --- dedup/compression.py | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) (limited to 'dedup/compression.py') diff --git a/dedup/compression.py b/dedup/compression.py index 161eda2..ea921c4 100644 --- a/dedup/compression.py +++ b/dedup/compression.py @@ -103,10 +103,9 @@ class DecompressedStream: self.decompressor = decompressor self.buff = b"" self.pos = 0 - self.closed = False def _fill_buff_until(self, predicate): - assert not self.closed + assert self.fileobj is not None data = True while True: if predicate(self.buff) or not data: @@ -143,12 +142,12 @@ class DecompressedStream: return iter(self.readline, b'') def tell(self): - assert not self.closed + assert self.fileobj is not None return self.pos def seek(self, pos): """Forward seeks by absolute position only.""" - assert not self.closed + assert self.fileobj is not None if pos < self.pos: raise ValueError("negative seek not allowed on decompressed stream") while True: @@ -162,12 +161,11 @@ class DecompressedStream: return def close(self): - if not self.closed: + if self.fileobj is not None: self.fileobj.close() self.fileobj = None self.decompressor = None self.buff = b"" - self.closed = True decompressors = { '.gz': GzipDecompressor, -- cgit v1.2.3 From 775bdde52ad5375773c0635e4ce52f74cb820525 Mon Sep 17 00:00:00 2001 From: Helmut Grohne Date: Wed, 29 Dec 2021 13:43:48 +0100 Subject: DecompressedStream: avoid mixing types for variable data The local variable data can be bool or bytes. That's inconvenient for static type checkers. Avoid doing so. --- dedup/compression.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'dedup/compression.py') diff --git a/dedup/compression.py b/dedup/compression.py index ea921c4..9cd63e5 100644 --- a/dedup/compression.py +++ b/dedup/compression.py @@ -106,15 +106,13 @@ class DecompressedStream: def _fill_buff_until(self, predicate): assert self.fileobj is not None - data = True - while True: - if predicate(self.buff) or not data: - return + while not predicate(self.buff): data = self.fileobj.read(self.blocksize) if data: self.buff += self.decompressor.decompress(data) elif hasattr(self.decompressor, "flush"): self.buff += self.decompressor.flush() + break def _read_from_buff(self, length): ret = self.buff[:length] -- cgit v1.2.3 From 35c22db308a91e82ed4a5f6a9c937c186d81d810 Mon Sep 17 00:00:00 2001 From: Helmut Grohne Date: Wed, 29 Dec 2021 22:14:50 +0100 Subject: DecompressedStream: fix endless loop Fixes: 775bdde52ad5 ("DecompressedStream: avoid mixing types for variable data") --- dedup/compression.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'dedup/compression.py') diff --git a/dedup/compression.py b/dedup/compression.py index 9cd63e5..6d361ac 100644 --- a/dedup/compression.py +++ b/dedup/compression.py @@ -110,8 +110,9 @@ class DecompressedStream: data = self.fileobj.read(self.blocksize) if data: self.buff += self.decompressor.decompress(data) - elif hasattr(self.decompressor, "flush"): - self.buff += self.decompressor.flush() + else: + if hasattr(self.decompressor, "flush"): + self.buff += self.decompressor.flush() break def _read_from_buff(self, length): -- cgit v1.2.3 From 0b4882ecf657d70dd3236dcf176e083bf08dccdd Mon Sep 17 00:00:00 2001 From: Helmut Grohne Date: Thu, 30 Dec 2021 17:52:38 +0100 Subject: DecompressedStream: improve performance When the decompression ratio is huge, we may be faced with a large (multiple megabytes) bytes object. Slicing that object incurs a copy becomes O(n^2) while appending and trimming a bytearray is much faster. --- dedup/compression.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'dedup/compression.py') diff --git a/dedup/compression.py b/dedup/compression.py index 6d361ac..da6e9a0 100644 --- a/dedup/compression.py +++ b/dedup/compression.py @@ -101,7 +101,7 @@ class DecompressedStream: """ self.fileobj = fileobj self.decompressor = decompressor - self.buff = b"" + self.buff = bytearray() self.pos = 0 def _fill_buff_until(self, predicate): @@ -116,8 +116,8 @@ class DecompressedStream: break def _read_from_buff(self, length): - ret = self.buff[:length] - self.buff = self.buff[length:] + ret = bytes(self.buff[:length]) + self.buff[:length] = b"" self.pos += length return ret @@ -164,7 +164,7 @@ class DecompressedStream: self.fileobj.close() self.fileobj = None self.decompressor = None - self.buff = b"" + self.buff = bytearray() decompressors = { '.gz': GzipDecompressor, -- cgit v1.2.3