diff options
Diffstat (limited to 'autoimport.py')
-rwxr-xr-x | autoimport.py | 85 |
1 files changed, 39 insertions, 46 deletions
diff --git a/autoimport.py b/autoimport.py index c00dc4d..0f518c6 100755 --- a/autoimport.py +++ b/autoimport.py @@ -1,61 +1,52 @@ -#!/usr/bin/python +#!/usr/bin/python3 """This scrip takes a directory or a http base url to a mirror and imports all packages contained. It has rather strong assumptions on the working directory. """ import argparse -import contextlib import errno import multiprocessing -import os +import pathlib import sqlite3 import subprocess import sys import tempfile -try: - from urllib.parse import unquote -except ImportError: - from urllib import unquote - +import urllib.parse import concurrent.futures -from debian import deb822 from debian.debian_support import version_compare -from dedup.utils import open_compressed_mirror_url +from dedup.utils import iterate_packages from readyaml import readyaml def process_http(pkgs, url, addhash=True): - url += "/dists/sid/main/binary-amd64/Packages" - with contextlib.closing(open_compressed_mirror_url(url)) as pkglist: - for pkg in deb822.Packages.iter_paragraphs(pkglist): - name = pkg["Package"] - if name in pkgs and \ - version_compare(pkgs[name]["version"], pkg["Version"]) > 0: - continue - inst = dict(version=pkg["Version"], - filename="%s/%s" % (url, pkg["Filename"])) - if addhash: - inst["sha256hash"] = pkg["SHA256"] - pkgs[name] = inst + for pkg in iterate_packages(url, "amd64"): + name = pkg["Package"] + if name in pkgs and \ + version_compare(pkgs[name]["version"], pkg["Version"]) > 0: + continue + inst = dict(version=pkg["Version"], + filename="%s/%s" % (url, pkg["Filename"])) + if addhash: + inst["sha256hash"] = pkg["SHA256"] + pkgs[name] = inst def process_file(pkgs, filename): - base = os.path.basename(filename) - if not base.endswith(".deb"): + if filename.suffix != ".deb": raise ValueError("filename does not end in .deb") - parts = base.split("_") + parts = filename.name.split("_") if len(parts) != 3: raise ValueError("filename not in form name_version_arch.deb") name, version, _ = parts - version = unquote(version) + version = urllib.parse.unquote(version) if name in pkgs and version_compare(pkgs[name]["version"], version) > 0: return - pkgs[name] = dict(version=version, filename=filename) + pkgs[name] = dict(version=version, filename=str(filename)) def process_dir(pkgs, d): - for entry in os.listdir(d): + for entry in d.iterdir(): try: - process_file(pkgs, os.path.join(d, entry)) + process_file(pkgs, entry) except ValueError: pass @@ -67,11 +58,11 @@ def process_pkg(name, pkgdict, outpath): importcmd.extend(["-H", pkgdict["sha256hash"]]) if filename.startswith(("http://", "https://", "ftp://", "file://")): importcmd.append(filename) - with open(outpath, "w") as outp: + with outpath.open("w") as outp: subprocess.check_call(importcmd, stdout=outp, close_fds=True) else: with open(filename) as inp: - with open(outpath, "w") as outp: + with outpath.open("w") as outp: subprocess.check_call(importcmd, stdin=inp, stdout=outp, close_fds=True) print("preprocessed %s" % name) @@ -90,7 +81,7 @@ def main(): parser.add_argument("files", nargs='+', help="files or directories or repository urls") args = parser.parse_args() - tmpdir = tempfile.mkdtemp(prefix="debian-dedup") + tmpdir = pathlib.Path(tempfile.mkdtemp(prefix="debian-dedup")) db = sqlite3.connect(args.database) cur = db.cursor() cur.execute("PRAGMA foreign_keys = ON;") @@ -100,42 +91,44 @@ def main(): print("processing %s" % d) if d.startswith(("http://", "https://", "ftp://", "file://")): process_http(pkgs, d, not args.noverify) - elif os.path.isdir(d): - process_dir(pkgs, d) else: - process_file(pkgs, d) + dp = pathlib.Path(d) + if dp.is_dir(): + process_dir(pkgs, dp) + else: + process_file(pkgs, dp) print("reading database") cur.execute("SELECT name, version FROM package;") - knownpkgs = dict((row[0], row[1]) for row in cur.fetchall()) + knownpkgvers = dict((row[0], row[1]) for row in cur.fetchall()) distpkgs = set(pkgs.keys()) if args.new: for name in distpkgs: - if name in knownpkgs and version_compare(pkgs[name]["version"], - knownpkgs[name]) <= 0: + if name in knownpkgvers and \ + version_compare(pkgs[name]["version"], knownpkgvers[name]) <= 0: del pkgs[name] - knownpkgs = set(knownpkgs) + knownpkgs = set(knownpkgvers) + del knownpkgvers with e: fs = {} for name, pkg in pkgs.items(): - outpath = os.path.join(tmpdir, name) - fs[e.submit(process_pkg, name, pkg, outpath)] = name + fs[e.submit(process_pkg, name, pkg, tmpdir / name)] = name for f in concurrent.futures.as_completed(fs.keys()): name = fs[f] if f.exception(): print("%s failed to import: %r" % (name, f.exception())) continue - inf = os.path.join(tmpdir, name) + inf = tmpdir / name print("sqlimporting %s" % name) - with open(inf) as inp: + with inf.open() as inp: try: readyaml(db, inp) except Exception as exc: print("%s failed sql with exception %r" % (name, exc)) else: - os.unlink(inf) + inf.unlink() if args.prune: delpkgs = knownpkgs - distpkgs @@ -146,12 +139,12 @@ def main(): # due to ON DELETE CASCADE clauses. db.commit() try: - os.rmdir(tmpdir) + tmpdir.rmdir() except OSError as err: if err.errno != errno.ENOTEMPTY: raise print("keeping temporary directory %s due to failed packages %s" % - (tmpdir, " ".join(os.listdir(tmpdir)))) + (tmpdir, " ".join(map(str, tmpdir.iterdir())))) if __name__ == "__main__": main() |