summaryrefslogtreecommitdiff
path: root/readyaml.py
diff options
context:
space:
mode:
authorHelmut Grohne <helmut@subdivi.de>2013-06-23 12:00:36 +0200
committerHelmut Grohne <helmut@subdivi.de>2013-06-23 12:00:36 +0200
commitf2bd48d342518c11ec7deaeee5b437ac524514de (patch)
tree938e533ece907a0382eb2ec02c33cc755a724b12 /readyaml.py
parent14116126ce7d1e1d48743bbed78599805af7747f (diff)
parentf652c17f242fb743a167041521e9618039ae7296 (diff)
downloaddebian-dedup-f2bd48d342518c11ec7deaeee5b437ac524514de.tar.gz
Merge branch yamlimport
+ Way faster on multiple cores. + More reliable, cause http connections do not time out when the db blocks. - Way slower on single core with contended io path. No clue why. Still update_sharing.py makes up the bulk of processing time.
Diffstat (limited to 'readyaml.py')
-rwxr-xr-xreadyaml.py51
1 files changed, 51 insertions, 0 deletions
diff --git a/readyaml.py b/readyaml.py
new file mode 100755
index 0000000..e2f3bb3
--- /dev/null
+++ b/readyaml.py
@@ -0,0 +1,51 @@
+#!/usr/bin/python
+"""This tool reads a yaml file as generated by importpkg.py on stdin and
+updates the database with the contents."""
+
+import sqlite3
+import sys
+
+from debian.debian_support import version_compare
+import yaml
+
+def readyaml(db, stream):
+ cur = db.cursor()
+ cur.execute("PRAGMA foreign_keys = ON;")
+ gen = yaml.safe_load_all(stream)
+ metadata = next(gen)
+ package = metadata["package"]
+ cur.execute("SELECT version FROM package WHERE package = ?;",
+ (package,))
+ row = cur.fetchone()
+ if row and version_compare(row[0], metadata["version"]) > 0:
+ return
+
+ cur.execute("BEGIN;")
+ cur.execute("DELETE FROM content WHERE package = ?;",
+ (package,))
+ cur.execute("INSERT OR REPLACE INTO package (package, version, architecture, source) VALUES (?, ?, ?, ?);",
+ (package, metadata["version"], metadata["architecture"],
+ metadata["source"]))
+ cur.execute("DELETE FROM dependency WHERE package = ?;",
+ (package,))
+ cur.executemany("INSERT INTO dependency (package, required) VALUES (?, ?);",
+ ((package, dep) for dep in metadata["depends"]))
+ for entry in gen:
+ if entry == "commit":
+ db.commit()
+ return
+
+ cur.execute("INSERT INTO content (package, filename, size) VALUES (?, ?, ?);",
+ (package, entry["name"], entry["size"]))
+ cid = cur.lastrowid
+ cur.executemany("INSERT INTO hash (cid, function, hash) VALUES (?, ?, ?);",
+ ((cid, func, hexhash)
+ for func, hexhash in entry["hashes"].items()))
+ raise ValueError("missing commit block")
+
+def main():
+ db = sqlite3.connect("test.sqlite3")
+ readyaml(db, sys.stdin)
+
+if __name__ == "__main__":
+ main()