summaryrefslogtreecommitdiff
path: root/readyaml.py
diff options
context:
space:
mode:
authorHelmut Grohne <helmut@subdivi.de>2014-07-22 21:16:14 +0200
committerHelmut Grohne <helmut@subdivi.de>2014-07-22 21:16:14 +0200
commit95125e250b9e4a3023a6b769c372990530d1aa87 (patch)
treee53f5110c0177fe190c8cbcebf6e9a24c2e28439 /readyaml.py
parentd48c3c208ee6ba54225b3eb68ce5c9f3c894bfa4 (diff)
downloaddebian-dedup-binaryfilename.tar.gz
store filenames as blobsbinaryfilename
To facilitate this importpkg.py decodes filenames using iso-8859-1. While this is not the encoding used for most filenames, decoding will never fail. Conversely, readyaml.py encodes to iso-8859-1 to undo the above effect. webapp.py cannot currently cope with the above change and is broken by this commit.
Diffstat (limited to 'readyaml.py')
-rwxr-xr-xreadyaml.py5
1 files changed, 4 insertions, 1 deletions
diff --git a/readyaml.py b/readyaml.py
index 2ef9a3b..3e5ba87 100755
--- a/readyaml.py
+++ b/readyaml.py
@@ -45,8 +45,11 @@ def readyaml(db, stream):
db.commit()
return
+ # iso-8859-1 is used as a safe representation of binary data within
+ # utf-8.
+ filename = buffer(entry["name"].encode("iso-8859-1"))
cur.execute("INSERT INTO content (pid, filename, size) VALUES (?, ?, ?);",
- (pid, entry["name"], entry["size"]))
+ (pid, filename, entry["size"]))
cid = cur.lastrowid
cur.executemany("INSERT INTO hash (cid, fid, hash) VALUES (?, ?, ?);",
((cid, funcmapping[func], hexhash)