summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rwxr-xr-ximportpkg.py31
1 files changed, 27 insertions, 4 deletions
diff --git a/importpkg.py b/importpkg.py
index dac4bb1..e8cc2fa 100755
--- a/importpkg.py
+++ b/importpkg.py
@@ -42,9 +42,32 @@ def gifhash():
hashobj.name = "gif_sha512"
return hashobj
-def decompress_tar(filelike, extension):
- filelike = decompress(filelike, extension.decode("ascii"))
- return tarfile.open(fileobj=filelike, mode="r|")
+if sys.version_info.major >= 3:
+ def decompress_tar(filelike, extension):
+ filelike = decompress(filelike, extension.decode("ascii"))
+ return tarfile.open(fileobj=filelike, mode="r|")
+
+ def decodetarname(name):
+ """Decoded name of a tarinfo.
+ @raises UnicodeDecodeError:
+ """
+ try:
+ name.encode("utf8", "strict")
+ except UnicodeEncodeError as e:
+ if e.reason == "surrogates not allowed":
+ name.encode("utf8", "surrogateescape").decode("utf8", "strict")
+ return name
+else:
+ def decompress_tar(filelike, extension):
+ filelike = decompress(filelike, extension.decode("ascii"))
+ return tarfile.open(fileobj=filelike, mode="r|", encoding="utf8",
+ errors="surrogateescape")
+
+ def decodetarname(name):
+ """Decoded name of a tarinfo.
+ @raises UnicodeDecodeError:
+ """
+ return name.decode("utf8")
class ProcessingFinished(Exception):
pass
@@ -77,7 +100,7 @@ class ImportpkgExtractor(DebExtractor):
tf = decompress_tar(filelike, name[8:])
for name, size, hashes in get_tar_hashes(tf, self.hash_functions):
try:
- name = name.decode("utf8")
+ name = decodetarname(name)
except UnicodeDecodeError:
print("warning: skipping filename with encoding error")
continue # skip files with non-utf8 encoding for now