From 95125e250b9e4a3023a6b769c372990530d1aa87 Mon Sep 17 00:00:00 2001 From: Helmut Grohne Date: Tue, 22 Jul 2014 21:16:14 +0200 Subject: store filenames as blobs To facilitate this importpkg.py decodes filenames using iso-8859-1. While this is not the encoding used for most filenames, decoding will never fail. Conversely, readyaml.py encodes to iso-8859-1 to undo the above effect. webapp.py cannot currently cope with the above change and is broken by this commit. --- readyaml.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'readyaml.py') diff --git a/readyaml.py b/readyaml.py index 2ef9a3b..3e5ba87 100755 --- a/readyaml.py +++ b/readyaml.py @@ -45,8 +45,11 @@ def readyaml(db, stream): db.commit() return + # iso-8859-1 is used as a safe representation of binary data within + # utf-8. + filename = buffer(entry["name"].encode("iso-8859-1")) cur.execute("INSERT INTO content (pid, filename, size) VALUES (?, ?, ?);", - (pid, entry["name"], entry["size"])) + (pid, filename, entry["size"])) cid = cur.lastrowid cur.executemany("INSERT INTO hash (cid, fid, hash) VALUES (?, ?, ?);", ((cid, funcmapping[func], hexhash) -- cgit v1.2.3