summaryrefslogtreecommitdiff
path: root/importpkg.py
diff options
context:
space:
mode:
Diffstat (limited to 'importpkg.py')
-rwxr-xr-ximportpkg.py38
1 files changed, 13 insertions, 25 deletions
diff --git a/importpkg.py b/importpkg.py
index ce4a446..7bca70b 100755
--- a/importpkg.py
+++ b/importpkg.py
@@ -1,4 +1,4 @@
-#!/usr/bin/python
+#!/usr/bin/python3
"""This tool reads a Debian package from stdin and emits a yaml stream on
stdout. It does not access a database. Therefore it can be run in parallel and
on multiple machines. The generated yaml contains multiple documents. The first
@@ -8,42 +8,33 @@ And finally a document consisting of the string "commit" is emitted."""
import argparse
import hashlib
import sys
+import urllib.request
import zlib
-try:
- from urllib.request import urlopen
-except ImportError:
- from urllib2 import urlopen
import yaml
-from dedup.debpkg import DebExtractor, decodetarname, get_tar_hashes
+from dedup.debpkg import DebExtractor, get_tar_hashes
from dedup.hashing import DecompressedHash, SuppressingHash, HashedStream, \
HashBlacklistContent
from dedup.compression import GzipDecompressor
from dedup.image import GIFHash, PNGHash
-boring_content = set(("", "\n"))
+boring_content = set((b"", b"\n"))
def sha512_nontrivial():
return HashBlacklistContent(hashlib.sha512(), boring_content)
def gziphash():
- hashobj = DecompressedHash(GzipDecompressor(), hashlib.sha512())
+ hashobj = hashlib.sha512()
+ hashobj = DecompressedHash(GzipDecompressor(), hashobj, "gzip_sha512")
hashobj = SuppressingHash(hashobj, (ValueError, zlib.error))
- hashobj.name = "gzip_sha512"
return HashBlacklistContent(hashobj, boring_content)
def pnghash():
- hashobj = PNGHash(hashlib.sha512())
- hashobj = SuppressingHash(hashobj, (ValueError,))
- hashobj.name = "png_sha512"
- return hashobj
+ return SuppressingHash(PNGHash(hashlib.sha512()), (ValueError,))
def gifhash():
- hashobj = GIFHash(hashlib.sha512())
- hashobj = SuppressingHash(hashobj, (ValueError,))
- hashobj.name = "gif_sha512"
- return hashobj
+ return SuppressingHash(GIFHash(hashlib.sha512()), (ValueError,))
class ProcessingFinished(Exception):
pass
@@ -63,7 +54,7 @@ class ImportpkgExtractor(DebExtractor):
# deb822 currently returns :any dependencies raw. see #670679
deprelations = info.relations.get("depends", []) + \
info.relations.get("pre-depends", [])
- depends = set(dep[0]["name"].split(u':', 1)[0]
+ depends = set(dep[0]["name"].split(':', 1)[0]
for dep in deprelations if len(dep) == 1)
self.callback(dict(package=info["package"], source=source,
version=info["version"],
@@ -73,22 +64,19 @@ class ImportpkgExtractor(DebExtractor):
for name, size, hashes in get_tar_hashes(tarfileobj,
self.hash_functions):
try:
- name = decodetarname(name)
- except UnicodeDecodeError:
+ name.encode("utf8", "strict")
+ except UnicodeEncodeError:
print("warning: skipping filename with encoding error")
continue # skip files with non-utf8 encoding for now
self.callback(dict(name=name, size=size, hashes=hashes))
raise ProcessingFinished()
def main():
- try:
- stdin = sys.stdin.buffer
- except AttributeError: # python2
- stdin = sys.stdin
parser = argparse.ArgumentParser()
parser.add_argument("-H", "--hash", action="store",
help="verify that stdin hash given sha256 hash")
- parser.add_argument("input", nargs='?', default=stdin, type=urlopen,
+ parser.add_argument("input", nargs='?', default=sys.stdin.buffer,
+ type=urllib.request.urlopen,
help="read from this location instead of stdin")
args = parser.parse_args()
dumper = yaml.SafeDumper(sys.stdout)