diff options
author | Helmut Grohne <helmut@subdivi.de> | 2013-06-23 12:00:36 +0200 |
---|---|---|
committer | Helmut Grohne <helmut@subdivi.de> | 2013-06-23 12:00:36 +0200 |
commit | f2bd48d342518c11ec7deaeee5b437ac524514de (patch) | |
tree | 938e533ece907a0382eb2ec02c33cc755a724b12 /readyaml.py | |
parent | 14116126ce7d1e1d48743bbed78599805af7747f (diff) | |
parent | f652c17f242fb743a167041521e9618039ae7296 (diff) | |
download | debian-dedup-f2bd48d342518c11ec7deaeee5b437ac524514de.tar.gz |
Merge branch yamlimport
+ Way faster on multiple cores.
+ More reliable, cause http connections do not time out when the db
blocks.
- Way slower on single core with contended io path. No clue why.
Still update_sharing.py makes up the bulk of processing time.
Diffstat (limited to 'readyaml.py')
-rwxr-xr-x | readyaml.py | 51 |
1 files changed, 51 insertions, 0 deletions
diff --git a/readyaml.py b/readyaml.py new file mode 100755 index 0000000..e2f3bb3 --- /dev/null +++ b/readyaml.py @@ -0,0 +1,51 @@ +#!/usr/bin/python +"""This tool reads a yaml file as generated by importpkg.py on stdin and +updates the database with the contents.""" + +import sqlite3 +import sys + +from debian.debian_support import version_compare +import yaml + +def readyaml(db, stream): + cur = db.cursor() + cur.execute("PRAGMA foreign_keys = ON;") + gen = yaml.safe_load_all(stream) + metadata = next(gen) + package = metadata["package"] + cur.execute("SELECT version FROM package WHERE package = ?;", + (package,)) + row = cur.fetchone() + if row and version_compare(row[0], metadata["version"]) > 0: + return + + cur.execute("BEGIN;") + cur.execute("DELETE FROM content WHERE package = ?;", + (package,)) + cur.execute("INSERT OR REPLACE INTO package (package, version, architecture, source) VALUES (?, ?, ?, ?);", + (package, metadata["version"], metadata["architecture"], + metadata["source"])) + cur.execute("DELETE FROM dependency WHERE package = ?;", + (package,)) + cur.executemany("INSERT INTO dependency (package, required) VALUES (?, ?);", + ((package, dep) for dep in metadata["depends"])) + for entry in gen: + if entry == "commit": + db.commit() + return + + cur.execute("INSERT INTO content (package, filename, size) VALUES (?, ?, ?);", + (package, entry["name"], entry["size"])) + cid = cur.lastrowid + cur.executemany("INSERT INTO hash (cid, function, hash) VALUES (?, ?, ?);", + ((cid, func, hexhash) + for func, hexhash in entry["hashes"].items())) + raise ValueError("missing commit block") + +def main(): + db = sqlite3.connect("test.sqlite3") + readyaml(db, sys.stdin) + +if __name__ == "__main__": + main() |