diff options
author | Helmut Grohne <helmut@subdivi.de> | 2014-07-22 21:16:14 +0200 |
---|---|---|
committer | Helmut Grohne <helmut@subdivi.de> | 2014-07-22 21:16:14 +0200 |
commit | 95125e250b9e4a3023a6b769c372990530d1aa87 (patch) | |
tree | e53f5110c0177fe190c8cbcebf6e9a24c2e28439 /importpkg.py | |
parent | d48c3c208ee6ba54225b3eb68ce5c9f3c894bfa4 (diff) | |
download | debian-dedup-binaryfilename.tar.gz |
store filenames as blobsbinaryfilename
To facilitate this importpkg.py decodes filenames using iso-8859-1.
While this is not the encoding used for most filenames, decoding will
never fail. Conversely, readyaml.py encodes to iso-8859-1 to undo the
above effect.
webapp.py cannot currently cope with the above change and is broken by
this commit.
Diffstat (limited to 'importpkg.py')
-rwxr-xr-x | importpkg.py | 8 |
1 files changed, 3 insertions, 5 deletions
diff --git a/importpkg.py b/importpkg.py index 06d9da4..8f2390c 100755 --- a/importpkg.py +++ b/importpkg.py @@ -82,11 +82,9 @@ def process_package(filelike, hash_functions): state = "data" tf = decompress_tar(af, name[8:]) for name, size, hashes in get_tar_hashes(tf, hash_functions): - try: - name = name.decode("utf8") - except UnicodeDecodeError: - print("warning: skipping filename with encoding error") - continue # skip files with non-utf8 encoding for now + # filenames are not actually iso-8859-1, but this decode + # cannot cause UnicodeDecodeError + name = name.decode("iso-8859-1") yield dict(name=name, size=size, hashes=hashes) yield "commit" break |