From 420ad71f69c46c40f322957a0c93f17f5618c9e2 Mon Sep 17 00:00:00 2001 From: Helmut Grohne Date: Sat, 23 Feb 2013 09:53:33 +0100 Subject: importpkg: ignore filenames with encoding errors --- importpkg.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/importpkg.py b/importpkg.py index 89020b9..5901b57 100755 --- a/importpkg.py +++ b/importpkg.py @@ -167,8 +167,13 @@ def process_package(db, filelike): if state != "control_file": raise ValueError("missing control file") for name, size, function, hexhash in get_hashes(tf): + try: + name = name.decode("utf8") + except UnicodeDecodeError: + print("warning: skipping filename with encoding error") + continue # skip files with non-utf8 encoding for now cur.execute("INSERT INTO content (package, filename, size, function, hash) VALUES (?, ?, ?, ?, ?);", - (package, name.decode("utf8"), size, function, hexhash)) + (package, name, size, function, hexhash)) db.commit() return raise ValueError("data.tar not found") -- cgit v1.2.3