diff options
author | Helmut Grohne <helmut@subdivi.de> | 2021-12-31 15:45:33 +0100 |
---|---|---|
committer | Helmut Grohne <helmut@subdivi.de> | 2021-12-31 15:45:33 +0100 |
commit | f3ea68482e6c01053cb202573d953e8a2e89529f (patch) | |
tree | 4c08f6e5a99bbe5131c0949e7f97cc44cf4a2cbd /importpkg.py | |
parent | f2eda3ba74e5bc5613e84381ebd8bfd343e1c8cc (diff) | |
parent | 5b359b10053cbade539246eec26e86b44793ca40 (diff) | |
download | debian-dedup-f3ea68482e6c01053cb202573d953e8a2e89529f.tar.gz |
Merge branch master into branch multiarchhints
Among other things, this drops Python 2.x support.
Diffstat (limited to 'importpkg.py')
-rwxr-xr-x | importpkg.py | 38 |
1 files changed, 13 insertions, 25 deletions
diff --git a/importpkg.py b/importpkg.py index ce4a446..7bca70b 100755 --- a/importpkg.py +++ b/importpkg.py @@ -1,4 +1,4 @@ -#!/usr/bin/python +#!/usr/bin/python3 """This tool reads a Debian package from stdin and emits a yaml stream on stdout. It does not access a database. Therefore it can be run in parallel and on multiple machines. The generated yaml contains multiple documents. The first @@ -8,42 +8,33 @@ And finally a document consisting of the string "commit" is emitted.""" import argparse import hashlib import sys +import urllib.request import zlib -try: - from urllib.request import urlopen -except ImportError: - from urllib2 import urlopen import yaml -from dedup.debpkg import DebExtractor, decodetarname, get_tar_hashes +from dedup.debpkg import DebExtractor, get_tar_hashes from dedup.hashing import DecompressedHash, SuppressingHash, HashedStream, \ HashBlacklistContent from dedup.compression import GzipDecompressor from dedup.image import GIFHash, PNGHash -boring_content = set(("", "\n")) +boring_content = set((b"", b"\n")) def sha512_nontrivial(): return HashBlacklistContent(hashlib.sha512(), boring_content) def gziphash(): - hashobj = DecompressedHash(GzipDecompressor(), hashlib.sha512()) + hashobj = hashlib.sha512() + hashobj = DecompressedHash(GzipDecompressor(), hashobj, "gzip_sha512") hashobj = SuppressingHash(hashobj, (ValueError, zlib.error)) - hashobj.name = "gzip_sha512" return HashBlacklistContent(hashobj, boring_content) def pnghash(): - hashobj = PNGHash(hashlib.sha512()) - hashobj = SuppressingHash(hashobj, (ValueError,)) - hashobj.name = "png_sha512" - return hashobj + return SuppressingHash(PNGHash(hashlib.sha512()), (ValueError,)) def gifhash(): - hashobj = GIFHash(hashlib.sha512()) - hashobj = SuppressingHash(hashobj, (ValueError,)) - hashobj.name = "gif_sha512" - return hashobj + return SuppressingHash(GIFHash(hashlib.sha512()), (ValueError,)) class ProcessingFinished(Exception): pass @@ -63,7 +54,7 @@ class ImportpkgExtractor(DebExtractor): # deb822 currently returns :any dependencies raw. see #670679 deprelations = info.relations.get("depends", []) + \ info.relations.get("pre-depends", []) - depends = set(dep[0]["name"].split(u':', 1)[0] + depends = set(dep[0]["name"].split(':', 1)[0] for dep in deprelations if len(dep) == 1) self.callback(dict(package=info["package"], source=source, version=info["version"], @@ -73,22 +64,19 @@ class ImportpkgExtractor(DebExtractor): for name, size, hashes in get_tar_hashes(tarfileobj, self.hash_functions): try: - name = decodetarname(name) - except UnicodeDecodeError: + name.encode("utf8", "strict") + except UnicodeEncodeError: print("warning: skipping filename with encoding error") continue # skip files with non-utf8 encoding for now self.callback(dict(name=name, size=size, hashes=hashes)) raise ProcessingFinished() def main(): - try: - stdin = sys.stdin.buffer - except AttributeError: # python2 - stdin = sys.stdin parser = argparse.ArgumentParser() parser.add_argument("-H", "--hash", action="store", help="verify that stdin hash given sha256 hash") - parser.add_argument("input", nargs='?', default=stdin, type=urlopen, + parser.add_argument("input", nargs='?', default=sys.stdin.buffer, + type=urllib.request.urlopen, help="read from this location instead of stdin") args = parser.parse_args() dumper = yaml.SafeDumper(sys.stdout) |