diff options
Diffstat (limited to 'importpkg.py')
-rwxr-xr-x | importpkg.py | 31 |
1 files changed, 27 insertions, 4 deletions
diff --git a/importpkg.py b/importpkg.py index dac4bb1..e8cc2fa 100755 --- a/importpkg.py +++ b/importpkg.py @@ -42,9 +42,32 @@ def gifhash(): hashobj.name = "gif_sha512" return hashobj -def decompress_tar(filelike, extension): - filelike = decompress(filelike, extension.decode("ascii")) - return tarfile.open(fileobj=filelike, mode="r|") +if sys.version_info.major >= 3: + def decompress_tar(filelike, extension): + filelike = decompress(filelike, extension.decode("ascii")) + return tarfile.open(fileobj=filelike, mode="r|") + + def decodetarname(name): + """Decoded name of a tarinfo. + @raises UnicodeDecodeError: + """ + try: + name.encode("utf8", "strict") + except UnicodeEncodeError as e: + if e.reason == "surrogates not allowed": + name.encode("utf8", "surrogateescape").decode("utf8", "strict") + return name +else: + def decompress_tar(filelike, extension): + filelike = decompress(filelike, extension.decode("ascii")) + return tarfile.open(fileobj=filelike, mode="r|", encoding="utf8", + errors="surrogateescape") + + def decodetarname(name): + """Decoded name of a tarinfo. + @raises UnicodeDecodeError: + """ + return name.decode("utf8") class ProcessingFinished(Exception): pass @@ -77,7 +100,7 @@ class ImportpkgExtractor(DebExtractor): tf = decompress_tar(filelike, name[8:]) for name, size, hashes in get_tar_hashes(tf, self.hash_functions): try: - name = name.decode("utf8") + name = decodetarname(name) except UnicodeDecodeError: print("warning: skipping filename with encoding error") continue # skip files with non-utf8 encoding for now |