summaryrefslogtreecommitdiff
path: root/autoimport.py
diff options
context:
space:
mode:
Diffstat (limited to 'autoimport.py')
-rwxr-xr-xautoimport.py131
1 files changed, 104 insertions, 27 deletions
diff --git a/autoimport.py b/autoimport.py
index 453a839..f30b36a 100755
--- a/autoimport.py
+++ b/autoimport.py
@@ -1,48 +1,125 @@
#!/usr/bin/python
+"""This scrip takes a directory or a http base url to a mirror and imports all
+packages contained. It has rather strong assumptions on the working directory.
+"""
import gzip
import io
+import multiprocessing
+import optparse
+import os
import sqlite3
import subprocess
-import sys
import urllib
+import concurrent.futures
from debian import deb822
from debian.debian_support import version_compare
+def process_http(pkgs, url):
+ pkglist = urllib.urlopen(url + "/dists/sid/main/binary-amd64/Packages.gz").read()
+ pkglist = gzip.GzipFile(fileobj=io.BytesIO(pkglist)).read()
+ pkglist = io.BytesIO(pkglist)
+ pkglist = deb822.Packages.iter_paragraphs(pkglist)
+ for pkg in pkglist:
+ name = pkg["Package"]
+ if name in pkgs and \
+ version_compare(pkgs[name]["version"], pkg["Version"]) > 0:
+ continue
+ pkgs[name] = dict(version=pkg["Version"],
+ filename="%s/%s" % (url, pkg["Filename"]))
+
+def process_dir(pkgs, d):
+ for entry in os.listdir(d):
+ if not entry.endswith(".deb"):
+ continue
+ parts = entry.split("_")
+ if len(parts) != 3:
+ continue
+ name, version, _ = parts
+ version = urllib.unquote(version)
+ if name in pkgs and version_compare(pkgs[name]["version"], version) > 0:
+ continue
+ pkgs[name] = dict(version=version, filename=os.path.join(d, entry))
+
+def process_pkg(name, filename):
+ print("importing %s" % filename)
+ if filename.startswith("http://"):
+ with open(os.path.join("tmp", name), "w") as outp:
+ dl = subprocess.Popen(["curl", "-s", filename],
+ stdout=subprocess.PIPE, close_fds=True)
+ imp = subprocess.Popen(["python", "importpkg.py"], stdin=dl.stdout,
+ stdout=outp, close_fds=True)
+ if imp.wait():
+ raise ValueError("importpkg failed")
+ if dl.wait():
+ raise ValueError("curl failed")
+ else:
+ with open(filename) as inp:
+ with open(os.path.join("tmp", name), "w") as outp:
+ subprocess.check_call(["python", "importpkg.py"], stdin=inp,
+ stdout=outp, close_fds=True)
+ print("preprocessed %s" % name)
+
def main():
- urlbase = sys.argv[1]
+ parser = optparse.OptionParser()
+ parser.add_option("-n", "--new", action="store_true",
+ help="avoid reimporting same versions")
+ parser.add_option("-p", "--prune", action="store_true",
+ help="prune packages old packages")
+ options, args = parser.parse_args()
+ subprocess.check_call(["mkdir", "-p", "tmp"])
db = sqlite3.connect("test.sqlite3")
cur = db.cursor()
cur.execute("PRAGMA foreign_keys = ON;")
+ e = concurrent.futures.ThreadPoolExecutor(multiprocessing.cpu_count())
+ pkgs = {}
+ for d in args:
+ print("processing %s" % d)
+ if d.startswith("http://"):
+ process_http(pkgs, d)
+ else:
+ process_dir(pkgs, d)
+
+ print("reading database")
cur.execute("SELECT package, version FROM package;")
knownpkgs = dict((row[0], row[1]) for row in cur.fetchall())
+ distpkgs = set(pkgs.keys())
+ if options.new:
+ for name in distpkgs:
+ if name in knownpkgs and version_compare(pkgs[name]["version"],
+ knownpkgs[name]) <= 0:
+ del pkgs[name]
+ knownpkgs = set(knownpkgs)
- pkglist = urllib.urlopen(urlbase + "/dists/sid/main/binary-amd64/Packages.gz").read()
- pkglist = gzip.GzipFile(fileobj=io.BytesIO(pkglist)).read()
- distpkgs = set()
- for pkg in deb822.Packages.iter_paragraphs(io.BytesIO(pkglist)):
- name = pkg["Package"]
- distpkgs.add(name)
- if name in knownpkgs and \
- version_compare(pkg["Version"], knownpkgs[name]) <= 0:
- continue
- pkgurl = "%s/%s" % (urlbase, pkg["Filename"])
- print("importing %s" % name)
- dl = subprocess.Popen(["curl", "-s", pkgurl], stdout=subprocess.PIPE)
- imp = subprocess.Popen("./importpkg.py", stdin=dl.stdout)
- if imp.wait():
- print("import failed")
- if dl.wait():
- print("curl failed")
-
- delpkgs = set(knownpkgs) - distpkgs
- print("clearing packages %s" % " ".join(delpkgs))
- cur.executemany("DELETE FROM package WHERE package = ?;",
- ((pkg,) for pkg in delpkgs))
- # Tables content, dependency and sharing will also be pruned
- # due to ON DELETE CASCADE clauses.
- db.commit()
+ with e:
+ fs = {}
+ for name, pkg in pkgs.items():
+ fs[e.submit(process_pkg, name, pkg["filename"])] = name
+
+ for f in concurrent.futures.as_completed(fs.keys()):
+ name = fs[f]
+ if f.exception():
+ print("%s failed to import: %r" % (name, f.exception()))
+ continue
+ inf = os.path.join("tmp", name)
+ print("sqlimporting %s" % name)
+ with open(inf) as inp:
+ try:
+ subprocess.check_call(["python", "readyaml.py"], stdin=inp)
+ except subprocess.CalledProcessError:
+ print("%s failed sql" % name)
+ else:
+ os.unlink(inf)
+
+ if options.prune:
+ delpkgs = knownpkgs - distpkgs
+ print("clearing packages %s" % " ".join(delpkgs))
+ cur.executemany("DELETE FROM package WHERE package = ?;",
+ ((pkg,) for pkg in delpkgs))
+ # Tables content, dependency and sharing will also be pruned
+ # due to ON DELETE CASCADE clauses.
+ db.commit()
if __name__ == "__main__":
main()