summaryrefslogtreecommitdiff
path: root/importpkg.py
diff options
context:
space:
mode:
authorHelmut Grohne <helmut@subdivi.de>2016-04-28 21:35:42 +0200
committerHelmut Grohne <helmut@subdivi.de>2016-04-28 21:35:42 +0200
commit8e326cadab47896f77666b0a4c7f434cdefc83da (patch)
tree7691ac50e247be2c1db9e70f4379cb3165d9fef5 /importpkg.py
parent9ad30297a535a8a8f9bdc945288b02d75a379cb6 (diff)
downloaddebian-dedup-8e326cadab47896f77666b0a4c7f434cdefc83da.tar.gz
support Python 3.x in importpkg
In Python 2.x, TarInfo.name is a bytes object. In Python 3.x, TarInfo.name always is a unicode object. To avoid importpkg crashing with an exception, we direct the Python 3.x decoding to use surrogateescapes. Thus decoding the name boils down to checking whether it contains surrogates.
Diffstat (limited to 'importpkg.py')
-rwxr-xr-ximportpkg.py31
1 files changed, 27 insertions, 4 deletions
diff --git a/importpkg.py b/importpkg.py
index dac4bb1..e8cc2fa 100755
--- a/importpkg.py
+++ b/importpkg.py
@@ -42,9 +42,32 @@ def gifhash():
hashobj.name = "gif_sha512"
return hashobj
-def decompress_tar(filelike, extension):
- filelike = decompress(filelike, extension.decode("ascii"))
- return tarfile.open(fileobj=filelike, mode="r|")
+if sys.version_info.major >= 3:
+ def decompress_tar(filelike, extension):
+ filelike = decompress(filelike, extension.decode("ascii"))
+ return tarfile.open(fileobj=filelike, mode="r|")
+
+ def decodetarname(name):
+ """Decoded name of a tarinfo.
+ @raises UnicodeDecodeError:
+ """
+ try:
+ name.encode("utf8", "strict")
+ except UnicodeEncodeError as e:
+ if e.reason == "surrogates not allowed":
+ name.encode("utf8", "surrogateescape").decode("utf8", "strict")
+ return name
+else:
+ def decompress_tar(filelike, extension):
+ filelike = decompress(filelike, extension.decode("ascii"))
+ return tarfile.open(fileobj=filelike, mode="r|", encoding="utf8",
+ errors="surrogateescape")
+
+ def decodetarname(name):
+ """Decoded name of a tarinfo.
+ @raises UnicodeDecodeError:
+ """
+ return name.decode("utf8")
class ProcessingFinished(Exception):
pass
@@ -77,7 +100,7 @@ class ImportpkgExtractor(DebExtractor):
tf = decompress_tar(filelike, name[8:])
for name, size, hashes in get_tar_hashes(tf, self.hash_functions):
try:
- name = name.decode("utf8")
+ name = decodetarname(name)
except UnicodeDecodeError:
print("warning: skipping filename with encoding error")
continue # skip files with non-utf8 encoding for now