diff options
Diffstat (limited to 'dedup/debpkg.py')
-rw-r--r-- | dedup/debpkg.py | 50 |
1 files changed, 26 insertions, 24 deletions
diff --git a/dedup/debpkg.py b/dedup/debpkg.py index de00e60..0d1b7da 100644 --- a/dedup/debpkg.py +++ b/dedup/debpkg.py @@ -1,26 +1,29 @@ import tarfile +import typing import arpy from debian import deb822 from dedup.compression import decompress -from dedup.hashing import hash_file +from dedup.hashing import HashlibLike, hash_file class MultiHash: - def __init__(self, *hashes): + def __init__(self, *hashes: HashlibLike): self.hashes = hashes - def update(self, data): + def update(self, data: bytes) -> None: for hasher in self.hashes: hasher.update(data) -def get_tar_hashes(tar, hash_functions): + +def get_tar_hashes( + tar: tarfile.TarFile, + hash_functions: typing.Sequence[typing.Callable[[], HashlibLike]], +) -> typing.Iterator[typing.Tuple[str, int, typing.Dict[str, str]]]: """Given a TarFile read all regular files and compute all of the given hash functions on each file. - @type tar: tarfile.TarFile @param hash_functions: a sequence of parameter-less functions each creating a new hashlib-like object - @rtype: gen((str, int, {str: str}} @returns: an iterable of (filename, filesize, hashes) tuples where hashes is a dict mapping hash function names to hash values """ @@ -29,7 +32,9 @@ def get_tar_hashes(tar, hash_functions): if not elem.isreg(): # excludes hard links as well continue hasher = MultiHash(*[func() for func in hash_functions]) - hash_file(hasher, tar.extractfile(elem)) + extracted = tar.extractfile(elem) + assert extracted is not None + hash_file(hasher, extracted) hashes = {} for hashobj in hasher.hashes: hashvalue = hashobj.hexdigest() @@ -37,17 +42,18 @@ def get_tar_hashes(tar, hash_functions): hashes[hashobj.name] = hashvalue yield (elem.name, elem.size, hashes) -def opentar(filelike): + +def opentar(filelike: typing.BinaryIO) -> tarfile.TarFile: return tarfile.open(fileobj=filelike, mode="r|", encoding="utf8", errors="surrogateescape") class DebExtractor: "Base class for extracting desired features from a Debian package." - def __init__(self): + def __init__(self) -> None: self.arstate = "start" - def process(self, filelike): + def process(self, filelike: typing.BinaryIO) -> None: """Process a Debian package. @param filelike: is a file-like object containing the contents of the Debian packge and can be read once without seeks. @@ -89,22 +95,20 @@ class DebExtractor: else: assert self.arstate == "data" - def handle_ar_end(self): + def handle_ar_end(self) -> None: "Handle the end of the ar archive of the Debian package." if self.arstate != "data": raise ValueError("data.tar not found") - def handle_debversion(self, version): + def handle_debversion(self, version: bytes) -> None: """Handle the debian-binary member of the Debian package. - @type version: bytes @param version: The full contents of the ar member. """ - def handle_control_tar(self, tarfileobj): + def handle_control_tar(self, tarfileobj: tarfile.TarFile) -> None: """Handle the control.tar member of the Debian package. If you replace this method, none of handle_control_member, handle_control_info or handle_control_end are called. - @type tarfileobj: tarfile.TarFile @param tarfile: is opened for streaming reads """ controlseen = False @@ -113,7 +117,9 @@ class DebExtractor: name = elem.name if name.startswith("./"): name = name[2:] - content = tarfileobj.extractfile(elem).read() + extracted = tarfileobj.extractfile(elem) + assert extracted is not None + content = extracted.read() self.handle_control_member(name, content) if name == "control": self.handle_control_info(deb822.Packages(content)) @@ -125,24 +131,20 @@ class DebExtractor: raise ValueError("control missing from control.tar") self.handle_control_end() - def handle_control_member(self, name, content): + def handle_control_member(self, name: str, content: bytes) -> None: """Handle a file member of the control.tar member of the Debian package. - @type name: str @param name: is the plain member name - @type content: bytes """ - def handle_control_info(self, info): + def handle_control_info(self, info: deb822.Packages) -> None: """Handle the control member of the control.tar member of the Debian package. - @type info: deb822.Packages """ - def handle_control_end(self): + def handle_control_end(self) -> None: "Handle the end of the control.tar member of the Debian package." - def handle_data_tar(self, tarfileobj): + def handle_data_tar(self, tarfileobj: tarfile.TarFile) -> None: """Handle the data.tar member of the Debian package. - @type tarfileobj: tarfile.TarFile @param tarfile: is opened for streaming reads """ |