diff options
Diffstat (limited to 'importpkg.py')
-rwxr-xr-x | importpkg.py | 22 |
1 files changed, 8 insertions, 14 deletions
diff --git a/importpkg.py b/importpkg.py index ce4a446..4693401 100755 --- a/importpkg.py +++ b/importpkg.py @@ -1,4 +1,4 @@ -#!/usr/bin/python +#!/usr/bin/python3 """This tool reads a Debian package from stdin and emits a yaml stream on stdout. It does not access a database. Therefore it can be run in parallel and on multiple machines. The generated yaml contains multiple documents. The first @@ -8,15 +8,12 @@ And finally a document consisting of the string "commit" is emitted.""" import argparse import hashlib import sys +import urllib.request import zlib -try: - from urllib.request import urlopen -except ImportError: - from urllib2 import urlopen import yaml -from dedup.debpkg import DebExtractor, decodetarname, get_tar_hashes +from dedup.debpkg import DebExtractor, get_tar_hashes from dedup.hashing import DecompressedHash, SuppressingHash, HashedStream, \ HashBlacklistContent from dedup.compression import GzipDecompressor @@ -63,7 +60,7 @@ class ImportpkgExtractor(DebExtractor): # deb822 currently returns :any dependencies raw. see #670679 deprelations = info.relations.get("depends", []) + \ info.relations.get("pre-depends", []) - depends = set(dep[0]["name"].split(u':', 1)[0] + depends = set(dep[0]["name"].split(':', 1)[0] for dep in deprelations if len(dep) == 1) self.callback(dict(package=info["package"], source=source, version=info["version"], @@ -73,22 +70,19 @@ class ImportpkgExtractor(DebExtractor): for name, size, hashes in get_tar_hashes(tarfileobj, self.hash_functions): try: - name = decodetarname(name) - except UnicodeDecodeError: + name.encode("utf8", "strict") + except UnicodeEncodeError: print("warning: skipping filename with encoding error") continue # skip files with non-utf8 encoding for now self.callback(dict(name=name, size=size, hashes=hashes)) raise ProcessingFinished() def main(): - try: - stdin = sys.stdin.buffer - except AttributeError: # python2 - stdin = sys.stdin parser = argparse.ArgumentParser() parser.add_argument("-H", "--hash", action="store", help="verify that stdin hash given sha256 hash") - parser.add_argument("input", nargs='?', default=stdin, type=urlopen, + parser.add_argument("input", nargs='?', default=sys.stdin.buffer, + type=urllib.request.urlopen, help="read from this location instead of stdin") args = parser.parse_args() dumper = yaml.SafeDumper(sys.stdout) |