initial checkin
authorHelmut Grohne <helmut@subdivi.de>
Sun, 3 Feb 2019 11:42:23 +0000 (12:42 +0100)
committerHelmut Grohne <helmut@subdivi.de>
Sun, 3 Feb 2019 11:42:23 +0000 (12:42 +0100)
build.py [new file with mode: 0755]
build.sh [new file with mode: 0755]
common.py [new file with mode: 0644]
depcheck.py [new file with mode: 0755]
schema.sql [new file with mode: 0644]

diff --git a/build.py b/build.py
new file mode 100755 (executable)
index 0000000..c0def3b
--- /dev/null
+++ b/build.py
@@ -0,0 +1,62 @@
+#!/usr/bin/python3
+
+import collections
+import contextlib
+import datetime
+import lzma
+import os.path
+import sqlite3
+import subprocess
+
+from common import decompress_stream, yield_lines
+
+def yield_chunks(filelike, chunksize=65536):
+    while True:
+        data = filelike.read(chunksize)
+        if not data:
+            break
+        yield data
+
+def scan_log_status(filelike):
+    it = yield_chunks(filelike)
+    it = decompress_stream(it, lzma.LZMADecompressor())
+    it = yield_lines(it)
+    last_lines = collections.deque(it, 25)
+    status = [l.split(b':', 1)[1].strip()
+              for l in last_lines if l.startswith(b"Status:")]
+    if status:
+        return status[0].decode("ascii")
+    return "unknown"
+
+def do_build(source, version, architecture):
+    now = datetime.datetime.utcnow()
+    logtarget = "%s_%s_%s_%s.log.xz" % (source, version, architecture,
+                                        now.strftime("%Y%m%d%H%M%S"))
+    cmdline = ["ssh", "gcc131", "sh", "/dev/stdin", architecture,
+               "%s_%s" % (source, version)]
+    with open(os.path.join("logs", logtarget), "w+b") as output:
+        with open("build.sh", "rb") as instructions:
+            code = subprocess.call(cmdline, stdin=instructions, stdout=output)
+        output.seek(0)
+        status = scan_log_status(output)
+        print("status %s code %d" % (status, code))
+    return (now, code == 0, logtarget, status == "given-back")
+
+def main():
+    db = sqlite3.connect("db", detect_types=sqlite3.PARSE_DECLTYPES)
+    with contextlib.closing(db.cursor()) as cur:
+        cur.execute("SELECT source, version, depstate.architecture FROM depstate JOIN depcheck ON depstate.architecture = depcheck.architecture WHERE satisfiable = 1 AND giveback = 0 ORDER BY random() LIMIT 1;")
+        source, version, architecture = cur.fetchone()
+    print("building %s_%s for %s" % (source, version, architecture))
+    timestamp, success, filename, giveback = do_build(source, version, architecture)
+    with contextlib.closing(db.cursor()) as cur:
+        cur.execute("INSERT INTO builds (source, version, architecture, success, starttime, filename) VALUES (?, ?, ?, ?, ?, ?);",
+                    (source, version, architecture, success, timestamp,
+                     filename))
+        if giveback:
+            cur.execute("UPDATE depcheck SET giveback = 1 WHERE architecture = ?;",
+                        (architecture,))
+    db.commit()
+
+if __name__ == "__main__":
+    main()
diff --git a/build.sh b/build.sh
new file mode 100755 (executable)
index 0000000..a81f359
--- /dev/null
+++ b/build.sh
@@ -0,0 +1,52 @@
+#!/bin/sh
+set -u
+set -e
+die() {
+       echo "error: $*" 1>&2
+       exit 254
+}
+test "$#" = 2 || die "usage: $0 <architecture> <package>"
+HOST_ARCH="$1"
+PACKAGE="$2"
+if ! dpkg-architecture "-a$HOST_ARCH" >/dev/null 2>&1; then
+       die "$HOST_ARCH is not a valid architecture"
+fi
+BUILDTMP=$(mktemp -d) || die "failed to create temporary directory"
+cleanup() {
+       rm -R -f "$BUILDTMP"
+}
+trap cleanup EXIT
+cd "$BUILDTMP" || die "failed to cd to tmpdir"
+export SBUILD_CONFIG="$BUILDTMP/sbuild.conf"
+cat >"$SBUILD_CONFIG" <<'EOF'
+$build_arch_any = 1;
+$build_arch_all = 0;
+$build_source = 0;
+$distribution = "unstable";
+$build_profiles = "cross nocheck";
+$manual_depends = ["libc-dev", "libstdc++-dev"];
+$dpkg_buildpackage_user_options = ["--jobs-try=1"];
+$bd_uninstallable_explainer = "apt";
+$source_only_changes = 0;
+$apt_update = 1;
+$apt_distupgrade = 1;
+$lintian_opts = ["-T", "binary-from-other-architecture"];
+#$lintian_require_success = 1;
+$run_lintian = 1;
+$run_autopkgtest = 0;
+$run_piuparts = 0;
+$sbuild_mode = "user";
+1;
+EOF
+RET=0
+sbuild "--host=$HOST_ARCH" "$PACKAGE" >/dev/null || RET=$?
+for f in *.build; do
+       test -L "$f" && continue
+       test -f "$f" || continue
+       # poor substitute for missing $lintian_require_success.
+       if tail -n20 "$f" | grep -q "^Lintian: fail$"; then
+               RET=1
+       fi
+       xz -9c "$f"
+done
+exit "$RET"
diff --git a/common.py b/common.py
new file mode 100644 (file)
index 0000000..bb8b7a4
--- /dev/null
+++ b/common.py
@@ -0,0 +1,23 @@
+def yield_lines(iterable):
+    """Converts an arbitrary bytes iterable into an iterable that yields whole
+    lines. The final byte of each returned value (except possibly the last one)
+    is a newline or carriage return character. The concatenation of the input
+    iterable equals the concatenation of the output iterable."""
+    buff = b""
+    for data in iterable:
+        buff += data
+        parts = buff.splitlines(True)
+        buff = parts.pop()
+        yield from parts
+    if buff:
+        yield buff
+
+def decompress_stream(iterable, decompressor):
+    """Decompress an iterable of bytes using the given decompressor into
+    another (decompressed) iterable of bytes. The decompressor can be a
+    bz2.BZ2Decompressor or lzma.LZMADecompressor instance."""
+    for data in iterable:
+        data = decompressor.decompress(data)
+        yield data
+    if hasattr(decompressor, "flush"):
+        yield decompressor.flush()
diff --git a/depcheck.py b/depcheck.py
new file mode 100755 (executable)
index 0000000..f1c1a1f
--- /dev/null
@@ -0,0 +1,445 @@
+#!/usr/bin/python3
+
+import collections
+import contextlib
+import datetime
+import hashlib
+import itertools
+import lzma
+import os.path
+import sqlite3
+import subprocess
+import tempfile
+import yaml
+
+import apt_pkg
+apt_pkg.init()
+version_compare = apt_pkg.version_compare
+import requests
+
+from common import decompress_stream, yield_lines
+
+BUILD_ARCH = "amd64"
+MIRROR = "http://proxy:3142/debian"
+PROFILES = frozenset(("cross", "nocheck"))
+
+CPUEntry = collections.namedtuple('CPUEntry',
+                                  'debcpu gnucpu regex bits endianness')
+
+TupleEntry = collections.namedtuple('TupleEntry',
+                                    'abi libc os cpu')
+
+class Architectures:
+    @staticmethod
+    def read_table(filename):
+        with open(filename) as f:
+            for line in f:
+                if not line.startswith("#"):
+                    yield line.split()
+
+    def __init__(self, cputable="/usr/share/dpkg/cputable",
+                 tupletable="/usr/share/dpkg/tupletable",
+                 abitable="/usr/share/dpkg/abitable"):
+        self.cputable = {}
+        self.tupletable = {}
+        self.abitable = {}
+        self.read_cputable(cputable)
+        self.read_tupletable(tupletable)
+        self.read_abitable(abitable)
+
+    def read_cputable(self, cputable):
+        self.cputable.clear()
+        for values in self.read_table(cputable):
+            values[3] = int(values[3])  # bits
+            entry = CPUEntry(*values)
+            self.cputable[entry.debcpu] = entry
+
+    def read_tupletable(self, tupletable):
+        self.tupletable.clear()
+        for debtuple, debarch in self.read_table(tupletable):
+            if '<cpu>' in debtuple:
+                for cpu in self.cputable:
+                    entry = TupleEntry(*debtuple.replace("<cpu>", cpu)
+                                       .split("-"))
+                    self.tupletable[debarch.replace("<cpu>", cpu)] = entry
+            else:
+                self.tupletable[debarch] = TupleEntry(*debtuple.split("-"))
+
+    def read_abitable(self, abitable):
+        self.abitable.clear()
+        for arch, bits in self.read_table(abitable):
+            bits = int(bits)
+            self.abitable[arch] = bits
+
+    def match(self, arch, pattern):
+        parts = pattern.split("-")
+        if not "any" in parts:
+            return pattern == arch
+        while len(parts) < 4:
+            parts.insert(0, "any")
+        entry = self.tupletable[arch]
+        return all(parts[i] in (entry[i], "any") for i in range(4))
+
+    def getendianness(self, arch):
+        return self.cputable[self.tupletable[arch].cpu].endianness
+
+architectures = Architectures()
+arch_match = architectures.match
+
+def call_dose_builddebcheck(arguments):
+    """
+    @type arguments: [str]
+    @param arguments: command line arguments to dose-builddebcheck
+    @returns: an iterable over loaded yaml documents. The first document
+              is the header, all other documents are per-package.
+    @raises subprocess.CalledProcessError: if dose errors out
+    """
+    cmd = ["dose-builddebcheck"]
+    cmd.extend(arguments)
+
+    proc = subprocess.Popen(cmd, stdout=subprocess.PIPE)
+
+    lines = []
+    for line in proc.stdout:
+        if line.startswith(b'  '):
+            lines.append(line)
+        elif line == b' -\n':
+            yield yaml.load(b"".join(lines), Loader=yaml.CBaseLoader)
+            lines = []
+    proc.stdout.close()
+    if lines:
+        yield yaml.load(b"".join(lines), Loader=yaml.CSafeLoader)
+    if proc.wait() not in (0, 1):
+        raise subprocess.CalledProcessError(proc.returncode, cmd)
+
+def parse_deb822(iterable):
+    """Parse an iterable of bytes into an iterable of str-dicts."""
+    mapping = {}
+    key = None
+    value = None
+    for line in yield_lines(iterable):
+        line = line.decode("utf8")
+        if line == "\n":
+            if key is not None:
+                mapping[key] = value.strip()
+                key = None
+            yield mapping
+            mapping = {}
+        elif key and line.startswith((" ", "\t")):
+            value += line
+        else:
+            if key is not None:
+                mapping[key] = value.strip()
+            try:
+                key, value = line.split(":", 1)
+            except ValueError:
+                raise ValueError("invalid input line %r" % line)
+    if key is not None:
+        mapping[key] = value.strip()
+    if mapping:
+        yield mapping
+
+def serialize_deb822(dct):
+    """Serialize a byte-dict into a single bytes object."""
+    return "".join(map("%s: %s\n".__mod__, dct.items())) + "\n"
+
+class HashSumMismatch(Exception):
+    pass
+
+def hash_check(iterable, hashobj, expected_digest):
+    """Wraps an iterable that yields bytes. It doesn't modify the sequence,
+    but on the final element it verifies that the concatenation of bytes
+    yields an expected digest value. Upon failure, the final next() results in
+    a HashSumMismatch rather than StopIteration.
+    """
+    for data in iterable:
+        hashobj.update(data)
+        yield data
+    if hashobj.hexdigest() != expected_digest:
+        raise HashSumMismatch()
+
+def parse_date(s):
+    return datetime.datetime.strptime(s, "%a, %d %b %Y %H:%M:%S %Z")
+
+class GPGV:
+    def __init__(self, files=("/etc/apt/trusted.gpg",),
+                 partsdir="/etc/apt/trusted.gpg.d"):
+        candidates = list(files)
+        candidates.extend(os.path.join(partsdir, e)
+                          for e in os.listdir(partsdir))
+        self.keyrings = list(filter(lambda f: os.access(f, os.R_OK),
+                                    candidates))
+
+    def verify(self, content):
+        cmdline = ["gpgv", "--quiet", "--weak-digest", "SHA1", "--output", "-"]
+        for keyring in self.keyrings:
+            cmdline.extend(("--keyring", keyring))
+        proc = subprocess.Popen(cmdline, stdin=subprocess.PIPE,
+                                stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+        stdout, _ = proc.communicate(content)
+        if proc.wait() != 0:
+            raise ValueError("signature verififcation failed")
+        return stdout
+
+class DebianMirror:
+    hashfunc = "SHA256"
+    def __init__(self, uri, dist="sid"):
+        self.uri = uri
+        self.dist = dist
+        self.releasetime = None
+        self.byhash = None
+        self.files = {}
+
+    @staticmethod
+    def get_all_keyrings():
+        yield "/etc/apt/trusted.gpg"
+        partsdir = "/etc/apt/trusted.gpg.d"
+        try:
+            for e in os.listdir(partsdir):
+                yield os.path.join(partsdir, e)
+        except FileNotFoundError:
+            pass
+
+    @staticmethod
+    def get_keyrings():
+        return filter(lambda f: os.access(f, os.R_OK),
+                      DebianMirror.get_all_keyrings())
+
+    def get_uri(self, filename):
+        return "%s/dists/%s/%s" % (self.uri, self.dist, filename)
+
+    def fetch_release(self):
+        resp = requests.get(self.get_uri("InRelease"))
+        resp.raise_for_status()
+        return GPGV().verify(resp.content)
+
+    def parse_release(self, content):
+        info, = list(parse_deb822([content]))
+        self.releasetime = parse_date(info["Date"])
+        valid_until = parse_date(info["Valid-Until"])
+        now = datetime.datetime.utcnow()
+        if self.releasetime > now:
+            raise ValueError("release file generated in future")
+        if valid_until < now:
+            raise ValueError("release signature expired")
+        self.byhash = info.pop("Acquire-By-Hash", "no") == "yes"
+        self.files = {}
+        for line in info[self.hashfunc].splitlines():
+            parts = line.split()
+            if not parts:
+                continue
+            if len(parts) != 3:
+                raise ValueError("invalid %s line %r" % (self.hashfunc, line))
+            self.files[parts[2]] = parts[0]
+
+    def update_release(self):
+        self.parse_release(self.fetch_release())
+
+    def fetch_list(self, listname):
+        if listname + ".xz" in self.files:
+            listname += ".xz"
+            wrapper = lambda i: decompress_stream(i, lzma.LZMADecompressor())
+        else:
+            wrapper = lambda i: i
+        hashvalue = self.files[listname]
+        if self.byhash:
+            listname = "%s/by-hash/%s/%s" % (os.path.dirname(listname),
+                                             self.hashfunc, hashvalue)
+        with requests.get(self.get_uri(listname), stream=True) as resp:
+            resp.raise_for_status()
+            it = resp.iter_content(65536)
+            it = hash_check(it, hashlib.new(self.hashfunc), hashvalue)
+            yield from wrapper(it)
+
+    def fetch_sources(self, component="main"):
+        return self.fetch_list("%s/source/Sources" % component)
+
+    def fetch_binaries(self, architecture, component="main"):
+        return self.fetch_list("%s/binary-%s/Packages" %
+                               (component, architecture))
+
+binfields = frozenset((
+    "Architecture",
+    "Breaks",
+    "Conflicts",
+    "Depends",
+    "Essential",
+    "Multi-Arch",
+    "Package",
+    "Pre-Depends",
+    "Provides",
+    "Version",
+))
+
+srcdepfields = frozenset((
+    "Build-Conflicts",
+    "Build-Conflicts-Arch",
+    "Build-Depends",
+    "Build-Depends-Arch",
+))
+srcfields = srcdepfields.union((
+    "Architecture",
+    "Package",
+    "Version",
+))
+
+bad_foreign_packages = frozenset((
+    "flex-old", # cannot execute /usr/bin/flex
+    "icmake", # cannot execute /usr/bin/icmake, build system
+    "jam", # cannot execute /usr/bin/jam, build system
+    "libtool-bin", # #836123
+    "python2.7-minimal", # fails postinst
+    "python3.6-minimal", # fails postinst
+    "python3.7-minimal", # fails postinst
+    "swi-prolog-nox", # fails postinst
+    "xrdp", # fails postinst
+    "libgvc6", # fails postinst
+))
+
+def strip_dict(dct, keepfields):
+    keys = set(dct.keys())
+    keys.difference_update(keepfields)
+    for k in keys:
+        del dct[k]
+
+def strip_alternatvies(dct, fields):
+    for f in fields:
+        try:
+            value = dct[f]
+        except KeyError:
+            continue
+        dct[f] = ",".join(dep.split("|", 1)[0]
+                          for dep in value.split(","))
+
+def latest_versions(pkgs):
+    packages = {}
+    for p in pkgs:
+        name = p["Package"]
+        try:
+            if version_compare(packages[name]["Version"], p["Version"]) > 0:
+                continue
+        except KeyError:
+            pass
+        packages[name] = p
+    return (p for p in packages.values()
+            if "Package" in p and not "Negative-Entry" in p)
+
+def make_binary_list_build(mirror, arch):
+    for p in parse_deb822(mirror.fetch_binaries(BUILD_ARCH)):
+        if p["Package"].startswith("crossbuild-essential-"):
+            if p["Package"] != "crossbuild-essential-" + arch:
+                continue
+            p["Depends"] += ", libc-dev:%s, libstdc++-dev:%s" % (arch, arch)
+        strip_dict(p, binfields)
+        yield p
+
+def make_binary_list_host(mirror, arch):
+    for p in parse_deb822(mirror.fetch_binaries(arch)):
+        if p["Architecture"] == "all":
+            continue
+        if p.get("Multi-Arch") == "foreign":
+            continue
+        if p.get("Essential") == "yes":
+            continue
+        if p["Package"] in bad_foreign_packages:
+            continue
+        strip_dict(p, binfields)
+        yield p
+
+def make_binary_list(mirror, arch):
+    return itertools.chain(make_binary_list_build(mirror, arch),
+                           make_binary_list_host(mirror, arch))
+
+def make_source_list(mirror, arch):
+    for p in parse_deb822(mirror.fetch_sources()):
+        if p.get("Extra-Source-Only") == "yes":
+            continue
+        if any(arch_match(arch, pattern)
+               for pattern in p["Architecture"].split()):
+            strip_dict(p, srcfields)
+            strip_alternatvies(p, srcdepfields)
+            yield p
+        else:
+            # dummy entry preventing older matching versions
+            yield {"Package": p["Package"], "Version": p["Version"],
+                   "Negative-Entry": "yes"}
+
+def check_bdsat(mirror, arch):
+    cmd = [
+        "--deb-native-arch=" + BUILD_ARCH,
+        "--deb-host-arch=" + arch,
+        "--deb-drop-b-d-indep",
+        "--deb-profiles=" + ",".join(PROFILES),
+        "--successes",
+        "--failures",
+        "--explain",
+        "--explain-minimal",
+        "--deb-emulate-sbuild",
+    ]
+
+    with tempfile.NamedTemporaryFile("w", encoding="utf8") as bintmp, \
+            tempfile.NamedTemporaryFile("w", encoding="utf8") as srctmp:
+        for p in make_binary_list(mirror, arch):
+            bintmp.write(serialize_deb822(p))
+        bintmp.flush()
+        cmd.append(bintmp.name)
+
+        for p in latest_versions(make_source_list(mirror, arch)):
+            srctmp.write(serialize_deb822(p))
+        srctmp.flush()
+        cmd.append(srctmp.name)
+
+        dose_result = call_dose_builddebcheck(cmd)
+        next(dose_result) # skip header
+        for d in dose_result:
+            if d["status"] == "ok":
+                yield (d["package"], d["version"], True, None)
+            else:
+                r = d["reasons"][0]
+                if "missing" in r:
+                    reason = "missing %s" % r["missing"]["pkg"]["unsat-dependency"].split()[0].split(":", 1)[0]
+                elif "conflict" in r:
+                    r = r["conflict"]["pkg1"]["unsat-conflict"]
+                    reason = "skew " if ' (!= ' in r else "conflict "
+                    reason += r.split()[0].split(':', 1)[0]
+                else:
+                    assert False
+                yield (d["package"], d["version"], False, reason)
+
+def update_depcheck(mirror, db, architecture):
+    now = datetime.datetime.utcnow()
+    mirror.update_release()
+    state = {}
+    for source, version, satisfiable, reason in check_bdsat(mirror, architecture):
+        state[source] = (version, satisfiable, reason)
+    with contextlib.closing(db.cursor()) as cur:
+        cur.execute("BEGIN;")
+        cur.execute("SELECT source, version, satisfiable, reason FROM depstate WHERE architecture = ?;",
+                    (architecture,))
+        for source, version, satisfiable, reason in list(cur.fetchall()):
+            if state.get(source) == (version, satisfiable, reason):
+                del state[source]
+            else:
+                cur.execute("DELETE FROM depstate WHERE source = ? AND version = ? AND architecture = ?;",
+                            (source, version, architecture))
+        cur.executemany("INSERT INTO depstate (source, architecture, version, satisfiable, reason) VALUES (?, ?, ?, ?, ?);",
+                        ((source, architecture, version, satisfiable, reason)
+                         for source, (version, satisfiable, reason) in state.items()))
+        cur.execute("UPDATE depcheck SET releasetime = ?, updatetime = ?, giveback = 0 WHERE architecture = ?",
+                    (mirror.releasetime, now, architecture))
+    db.commit()
+
+def main():
+    mirror = DebianMirror(MIRROR)
+    mirror.update_release()
+    db = sqlite3.connect("db", detect_types=sqlite3.PARSE_DECLTYPES)
+    cur = db.cursor()
+    cur.execute("SELECT architecture, releasetime, updatetime, giveback FROM depcheck;")
+    lastupdate = datetime.datetime.utcnow() - datetime.timedelta(hours=6)
+    for architecture, releasetime, updatetime, giveback in list(cur.fetchall()):
+        if giveback or updatetime < lastupdate or releasetime < mirror.releasetime:
+            print("update %s" % architecture)
+            update_depcheck(mirror, db, architecture)
+
+if __name__ == "__main__":
+    main()
diff --git a/schema.sql b/schema.sql
new file mode 100644 (file)
index 0000000..9a19739
--- /dev/null
@@ -0,0 +1,30 @@
+CREATE TABLE depstate (
+       source TEXT NOT NULL,
+       version TEXT NOT NULL,
+       architecture TEXT NOT NULL,
+       satisfiable BOOLEAN NOT NULL CHECK (satisfiable in (0, 1)),
+       reason TEXT,
+       UNIQUE (architecture, source, version));
+
+CREATE TABLE depcheck (
+       architecture TEXT NOT NULL UNIQUE,
+       releasetime TIMESTAMP NOT NULL,
+       updatetime TIMESTAMP NOT NULL,
+       giveback BOOLEAN NOT NULL CHECK (giveback in (0, 1)));
+INSERT INTO depcheck (architecture, releasetime, updatetime, giveback) VALUES
+       ("arm64", "2000-01-01 00:00:00", "2000-01-01 00:00:00", 1),
+       ("armel", "2000-01-01 00:00:00", "2000-01-01 00:00:00", 1),
+       ("armhf", "2000-01-01 00:00:00", "2000-01-01 00:00:00", 1),
+       ("mips", "2000-01-01 00:00:00", "2000-01-01 00:00:00", 1),
+       ("mips64el", "2000-01-01 00:00:00", "2000-01-01 00:00:00", 1),
+       ("mipsel", "2000-01-01 00:00:00", "2000-01-01 00:00:00", 1),
+       ("ppc64el", "2000-01-01 00:00:00", "2000-01-01 00:00:00", 1),
+       ("s390x", "2000-01-01 00:00:00", "2000-01-01 00:00:00", 1);
+
+CREATE TABLE builds (
+       source TEXT NOT NULL,
+       version TEXT NOT NULL,
+       architecture TEXT NOT NULL,
+       success BOOLEAN NOT NULL CHECK (success in (0, 1)),
+       starttime TIMESTAMP NOT NULL,
+       filename TEXT NOT NULL);