diff options
author | Helmut Grohne <helmut@subdivi.de> | 2016-04-28 21:35:42 +0200 |
---|---|---|
committer | Helmut Grohne <helmut@subdivi.de> | 2016-04-28 21:35:42 +0200 |
commit | 8e326cadab47896f77666b0a4c7f434cdefc83da (patch) | |
tree | 7691ac50e247be2c1db9e70f4379cb3165d9fef5 | |
parent | 9ad30297a535a8a8f9bdc945288b02d75a379cb6 (diff) | |
download | debian-dedup-8e326cadab47896f77666b0a4c7f434cdefc83da.tar.gz |
support Python 3.x in importpkg
In Python 2.x, TarInfo.name is a bytes object. In Python 3.x,
TarInfo.name always is a unicode object. To avoid importpkg crashing
with an exception, we direct the Python 3.x decoding to use
surrogateescapes. Thus decoding the name boils down to checking whether
it contains surrogates.
-rwxr-xr-x | importpkg.py | 31 |
1 files changed, 27 insertions, 4 deletions
diff --git a/importpkg.py b/importpkg.py index dac4bb1..e8cc2fa 100755 --- a/importpkg.py +++ b/importpkg.py @@ -42,9 +42,32 @@ def gifhash(): hashobj.name = "gif_sha512" return hashobj -def decompress_tar(filelike, extension): - filelike = decompress(filelike, extension.decode("ascii")) - return tarfile.open(fileobj=filelike, mode="r|") +if sys.version_info.major >= 3: + def decompress_tar(filelike, extension): + filelike = decompress(filelike, extension.decode("ascii")) + return tarfile.open(fileobj=filelike, mode="r|") + + def decodetarname(name): + """Decoded name of a tarinfo. + @raises UnicodeDecodeError: + """ + try: + name.encode("utf8", "strict") + except UnicodeEncodeError as e: + if e.reason == "surrogates not allowed": + name.encode("utf8", "surrogateescape").decode("utf8", "strict") + return name +else: + def decompress_tar(filelike, extension): + filelike = decompress(filelike, extension.decode("ascii")) + return tarfile.open(fileobj=filelike, mode="r|", encoding="utf8", + errors="surrogateescape") + + def decodetarname(name): + """Decoded name of a tarinfo. + @raises UnicodeDecodeError: + """ + return name.decode("utf8") class ProcessingFinished(Exception): pass @@ -77,7 +100,7 @@ class ImportpkgExtractor(DebExtractor): tf = decompress_tar(filelike, name[8:]) for name, size, hashes in get_tar_hashes(tf, self.hash_functions): try: - name = name.decode("utf8") + name = decodetarname(name) except UnicodeDecodeError: print("warning: skipping filename with encoding error") continue # skip files with non-utf8 encoding for now |