diff options
author | Helmut Grohne <helmut@subdivi.de> | 2016-04-28 21:35:42 +0200 |
---|---|---|
committer | Helmut Grohne <helmut@subdivi.de> | 2016-04-28 21:35:42 +0200 |
commit | 8e326cadab47896f77666b0a4c7f434cdefc83da (patch) | |
tree | 7691ac50e247be2c1db9e70f4379cb3165d9fef5 /importpkg.py | |
parent | 9ad30297a535a8a8f9bdc945288b02d75a379cb6 (diff) | |
download | debian-dedup-8e326cadab47896f77666b0a4c7f434cdefc83da.tar.gz |
support Python 3.x in importpkg
In Python 2.x, TarInfo.name is a bytes object. In Python 3.x,
TarInfo.name always is a unicode object. To avoid importpkg crashing
with an exception, we direct the Python 3.x decoding to use
surrogateescapes. Thus decoding the name boils down to checking whether
it contains surrogates.
Diffstat (limited to 'importpkg.py')
-rwxr-xr-x | importpkg.py | 31 |
1 files changed, 27 insertions, 4 deletions
diff --git a/importpkg.py b/importpkg.py index dac4bb1..e8cc2fa 100755 --- a/importpkg.py +++ b/importpkg.py @@ -42,9 +42,32 @@ def gifhash(): hashobj.name = "gif_sha512" return hashobj -def decompress_tar(filelike, extension): - filelike = decompress(filelike, extension.decode("ascii")) - return tarfile.open(fileobj=filelike, mode="r|") +if sys.version_info.major >= 3: + def decompress_tar(filelike, extension): + filelike = decompress(filelike, extension.decode("ascii")) + return tarfile.open(fileobj=filelike, mode="r|") + + def decodetarname(name): + """Decoded name of a tarinfo. + @raises UnicodeDecodeError: + """ + try: + name.encode("utf8", "strict") + except UnicodeEncodeError as e: + if e.reason == "surrogates not allowed": + name.encode("utf8", "surrogateescape").decode("utf8", "strict") + return name +else: + def decompress_tar(filelike, extension): + filelike = decompress(filelike, extension.decode("ascii")) + return tarfile.open(fileobj=filelike, mode="r|", encoding="utf8", + errors="surrogateescape") + + def decodetarname(name): + """Decoded name of a tarinfo. + @raises UnicodeDecodeError: + """ + return name.decode("utf8") class ProcessingFinished(Exception): pass @@ -77,7 +100,7 @@ class ImportpkgExtractor(DebExtractor): tf = decompress_tar(filelike, name[8:]) for name, size, hashes in get_tar_hashes(tf, self.hash_functions): try: - name = name.decode("utf8") + name = decodetarname(name) except UnicodeDecodeError: print("warning: skipping filename with encoding error") continue # skip files with non-utf8 encoding for now |