diff options
-rwxr-xr-x | build.py | 62 | ||||
-rwxr-xr-x | build.sh | 52 | ||||
-rw-r--r-- | common.py | 23 | ||||
-rwxr-xr-x | depcheck.py | 445 | ||||
-rw-r--r-- | schema.sql | 30 |
5 files changed, 612 insertions, 0 deletions
diff --git a/build.py b/build.py new file mode 100755 index 0000000..c0def3b --- /dev/null +++ b/build.py @@ -0,0 +1,62 @@ +#!/usr/bin/python3 + +import collections +import contextlib +import datetime +import lzma +import os.path +import sqlite3 +import subprocess + +from common import decompress_stream, yield_lines + +def yield_chunks(filelike, chunksize=65536): + while True: + data = filelike.read(chunksize) + if not data: + break + yield data + +def scan_log_status(filelike): + it = yield_chunks(filelike) + it = decompress_stream(it, lzma.LZMADecompressor()) + it = yield_lines(it) + last_lines = collections.deque(it, 25) + status = [l.split(b':', 1)[1].strip() + for l in last_lines if l.startswith(b"Status:")] + if status: + return status[0].decode("ascii") + return "unknown" + +def do_build(source, version, architecture): + now = datetime.datetime.utcnow() + logtarget = "%s_%s_%s_%s.log.xz" % (source, version, architecture, + now.strftime("%Y%m%d%H%M%S")) + cmdline = ["ssh", "gcc131", "sh", "/dev/stdin", architecture, + "%s_%s" % (source, version)] + with open(os.path.join("logs", logtarget), "w+b") as output: + with open("build.sh", "rb") as instructions: + code = subprocess.call(cmdline, stdin=instructions, stdout=output) + output.seek(0) + status = scan_log_status(output) + print("status %s code %d" % (status, code)) + return (now, code == 0, logtarget, status == "given-back") + +def main(): + db = sqlite3.connect("db", detect_types=sqlite3.PARSE_DECLTYPES) + with contextlib.closing(db.cursor()) as cur: + cur.execute("SELECT source, version, depstate.architecture FROM depstate JOIN depcheck ON depstate.architecture = depcheck.architecture WHERE satisfiable = 1 AND giveback = 0 ORDER BY random() LIMIT 1;") + source, version, architecture = cur.fetchone() + print("building %s_%s for %s" % (source, version, architecture)) + timestamp, success, filename, giveback = do_build(source, version, architecture) + with contextlib.closing(db.cursor()) as cur: + cur.execute("INSERT INTO builds (source, version, architecture, success, starttime, filename) VALUES (?, ?, ?, ?, ?, ?);", + (source, version, architecture, success, timestamp, + filename)) + if giveback: + cur.execute("UPDATE depcheck SET giveback = 1 WHERE architecture = ?;", + (architecture,)) + db.commit() + +if __name__ == "__main__": + main() diff --git a/build.sh b/build.sh new file mode 100755 index 0000000..a81f359 --- /dev/null +++ b/build.sh @@ -0,0 +1,52 @@ +#!/bin/sh +set -u +set -e +die() { + echo "error: $*" 1>&2 + exit 254 +} +test "$#" = 2 || die "usage: $0 <architecture> <package>" +HOST_ARCH="$1" +PACKAGE="$2" +if ! dpkg-architecture "-a$HOST_ARCH" >/dev/null 2>&1; then + die "$HOST_ARCH is not a valid architecture" +fi +BUILDTMP=$(mktemp -d) || die "failed to create temporary directory" +cleanup() { + rm -R -f "$BUILDTMP" +} +trap cleanup EXIT +cd "$BUILDTMP" || die "failed to cd to tmpdir" +export SBUILD_CONFIG="$BUILDTMP/sbuild.conf" +cat >"$SBUILD_CONFIG" <<'EOF' +$build_arch_any = 1; +$build_arch_all = 0; +$build_source = 0; +$distribution = "unstable"; +$build_profiles = "cross nocheck"; +$manual_depends = ["libc-dev", "libstdc++-dev"]; +$dpkg_buildpackage_user_options = ["--jobs-try=1"]; +$bd_uninstallable_explainer = "apt"; +$source_only_changes = 0; +$apt_update = 1; +$apt_distupgrade = 1; +$lintian_opts = ["-T", "binary-from-other-architecture"]; +#$lintian_require_success = 1; +$run_lintian = 1; +$run_autopkgtest = 0; +$run_piuparts = 0; +$sbuild_mode = "user"; +1; +EOF +RET=0 +sbuild "--host=$HOST_ARCH" "$PACKAGE" >/dev/null || RET=$? +for f in *.build; do + test -L "$f" && continue + test -f "$f" || continue + # poor substitute for missing $lintian_require_success. + if tail -n20 "$f" | grep -q "^Lintian: fail$"; then + RET=1 + fi + xz -9c "$f" +done +exit "$RET" diff --git a/common.py b/common.py new file mode 100644 index 0000000..bb8b7a4 --- /dev/null +++ b/common.py @@ -0,0 +1,23 @@ +def yield_lines(iterable): + """Converts an arbitrary bytes iterable into an iterable that yields whole + lines. The final byte of each returned value (except possibly the last one) + is a newline or carriage return character. The concatenation of the input + iterable equals the concatenation of the output iterable.""" + buff = b"" + for data in iterable: + buff += data + parts = buff.splitlines(True) + buff = parts.pop() + yield from parts + if buff: + yield buff + +def decompress_stream(iterable, decompressor): + """Decompress an iterable of bytes using the given decompressor into + another (decompressed) iterable of bytes. The decompressor can be a + bz2.BZ2Decompressor or lzma.LZMADecompressor instance.""" + for data in iterable: + data = decompressor.decompress(data) + yield data + if hasattr(decompressor, "flush"): + yield decompressor.flush() diff --git a/depcheck.py b/depcheck.py new file mode 100755 index 0000000..f1c1a1f --- /dev/null +++ b/depcheck.py @@ -0,0 +1,445 @@ +#!/usr/bin/python3 + +import collections +import contextlib +import datetime +import hashlib +import itertools +import lzma +import os.path +import sqlite3 +import subprocess +import tempfile +import yaml + +import apt_pkg +apt_pkg.init() +version_compare = apt_pkg.version_compare +import requests + +from common import decompress_stream, yield_lines + +BUILD_ARCH = "amd64" +MIRROR = "http://proxy:3142/debian" +PROFILES = frozenset(("cross", "nocheck")) + +CPUEntry = collections.namedtuple('CPUEntry', + 'debcpu gnucpu regex bits endianness') + +TupleEntry = collections.namedtuple('TupleEntry', + 'abi libc os cpu') + +class Architectures: + @staticmethod + def read_table(filename): + with open(filename) as f: + for line in f: + if not line.startswith("#"): + yield line.split() + + def __init__(self, cputable="/usr/share/dpkg/cputable", + tupletable="/usr/share/dpkg/tupletable", + abitable="/usr/share/dpkg/abitable"): + self.cputable = {} + self.tupletable = {} + self.abitable = {} + self.read_cputable(cputable) + self.read_tupletable(tupletable) + self.read_abitable(abitable) + + def read_cputable(self, cputable): + self.cputable.clear() + for values in self.read_table(cputable): + values[3] = int(values[3]) # bits + entry = CPUEntry(*values) + self.cputable[entry.debcpu] = entry + + def read_tupletable(self, tupletable): + self.tupletable.clear() + for debtuple, debarch in self.read_table(tupletable): + if '<cpu>' in debtuple: + for cpu in self.cputable: + entry = TupleEntry(*debtuple.replace("<cpu>", cpu) + .split("-")) + self.tupletable[debarch.replace("<cpu>", cpu)] = entry + else: + self.tupletable[debarch] = TupleEntry(*debtuple.split("-")) + + def read_abitable(self, abitable): + self.abitable.clear() + for arch, bits in self.read_table(abitable): + bits = int(bits) + self.abitable[arch] = bits + + def match(self, arch, pattern): + parts = pattern.split("-") + if not "any" in parts: + return pattern == arch + while len(parts) < 4: + parts.insert(0, "any") + entry = self.tupletable[arch] + return all(parts[i] in (entry[i], "any") for i in range(4)) + + def getendianness(self, arch): + return self.cputable[self.tupletable[arch].cpu].endianness + +architectures = Architectures() +arch_match = architectures.match + +def call_dose_builddebcheck(arguments): + """ + @type arguments: [str] + @param arguments: command line arguments to dose-builddebcheck + @returns: an iterable over loaded yaml documents. The first document + is the header, all other documents are per-package. + @raises subprocess.CalledProcessError: if dose errors out + """ + cmd = ["dose-builddebcheck"] + cmd.extend(arguments) + + proc = subprocess.Popen(cmd, stdout=subprocess.PIPE) + + lines = [] + for line in proc.stdout: + if line.startswith(b' '): + lines.append(line) + elif line == b' -\n': + yield yaml.load(b"".join(lines), Loader=yaml.CBaseLoader) + lines = [] + proc.stdout.close() + if lines: + yield yaml.load(b"".join(lines), Loader=yaml.CSafeLoader) + if proc.wait() not in (0, 1): + raise subprocess.CalledProcessError(proc.returncode, cmd) + +def parse_deb822(iterable): + """Parse an iterable of bytes into an iterable of str-dicts.""" + mapping = {} + key = None + value = None + for line in yield_lines(iterable): + line = line.decode("utf8") + if line == "\n": + if key is not None: + mapping[key] = value.strip() + key = None + yield mapping + mapping = {} + elif key and line.startswith((" ", "\t")): + value += line + else: + if key is not None: + mapping[key] = value.strip() + try: + key, value = line.split(":", 1) + except ValueError: + raise ValueError("invalid input line %r" % line) + if key is not None: + mapping[key] = value.strip() + if mapping: + yield mapping + +def serialize_deb822(dct): + """Serialize a byte-dict into a single bytes object.""" + return "".join(map("%s: %s\n".__mod__, dct.items())) + "\n" + +class HashSumMismatch(Exception): + pass + +def hash_check(iterable, hashobj, expected_digest): + """Wraps an iterable that yields bytes. It doesn't modify the sequence, + but on the final element it verifies that the concatenation of bytes + yields an expected digest value. Upon failure, the final next() results in + a HashSumMismatch rather than StopIteration. + """ + for data in iterable: + hashobj.update(data) + yield data + if hashobj.hexdigest() != expected_digest: + raise HashSumMismatch() + +def parse_date(s): + return datetime.datetime.strptime(s, "%a, %d %b %Y %H:%M:%S %Z") + +class GPGV: + def __init__(self, files=("/etc/apt/trusted.gpg",), + partsdir="/etc/apt/trusted.gpg.d"): + candidates = list(files) + candidates.extend(os.path.join(partsdir, e) + for e in os.listdir(partsdir)) + self.keyrings = list(filter(lambda f: os.access(f, os.R_OK), + candidates)) + + def verify(self, content): + cmdline = ["gpgv", "--quiet", "--weak-digest", "SHA1", "--output", "-"] + for keyring in self.keyrings: + cmdline.extend(("--keyring", keyring)) + proc = subprocess.Popen(cmdline, stdin=subprocess.PIPE, + stdout=subprocess.PIPE, stderr=subprocess.PIPE) + stdout, _ = proc.communicate(content) + if proc.wait() != 0: + raise ValueError("signature verififcation failed") + return stdout + +class DebianMirror: + hashfunc = "SHA256" + def __init__(self, uri, dist="sid"): + self.uri = uri + self.dist = dist + self.releasetime = None + self.byhash = None + self.files = {} + + @staticmethod + def get_all_keyrings(): + yield "/etc/apt/trusted.gpg" + partsdir = "/etc/apt/trusted.gpg.d" + try: + for e in os.listdir(partsdir): + yield os.path.join(partsdir, e) + except FileNotFoundError: + pass + + @staticmethod + def get_keyrings(): + return filter(lambda f: os.access(f, os.R_OK), + DebianMirror.get_all_keyrings()) + + def get_uri(self, filename): + return "%s/dists/%s/%s" % (self.uri, self.dist, filename) + + def fetch_release(self): + resp = requests.get(self.get_uri("InRelease")) + resp.raise_for_status() + return GPGV().verify(resp.content) + + def parse_release(self, content): + info, = list(parse_deb822([content])) + self.releasetime = parse_date(info["Date"]) + valid_until = parse_date(info["Valid-Until"]) + now = datetime.datetime.utcnow() + if self.releasetime > now: + raise ValueError("release file generated in future") + if valid_until < now: + raise ValueError("release signature expired") + self.byhash = info.pop("Acquire-By-Hash", "no") == "yes" + self.files = {} + for line in info[self.hashfunc].splitlines(): + parts = line.split() + if not parts: + continue + if len(parts) != 3: + raise ValueError("invalid %s line %r" % (self.hashfunc, line)) + self.files[parts[2]] = parts[0] + + def update_release(self): + self.parse_release(self.fetch_release()) + + def fetch_list(self, listname): + if listname + ".xz" in self.files: + listname += ".xz" + wrapper = lambda i: decompress_stream(i, lzma.LZMADecompressor()) + else: + wrapper = lambda i: i + hashvalue = self.files[listname] + if self.byhash: + listname = "%s/by-hash/%s/%s" % (os.path.dirname(listname), + self.hashfunc, hashvalue) + with requests.get(self.get_uri(listname), stream=True) as resp: + resp.raise_for_status() + it = resp.iter_content(65536) + it = hash_check(it, hashlib.new(self.hashfunc), hashvalue) + yield from wrapper(it) + + def fetch_sources(self, component="main"): + return self.fetch_list("%s/source/Sources" % component) + + def fetch_binaries(self, architecture, component="main"): + return self.fetch_list("%s/binary-%s/Packages" % + (component, architecture)) + +binfields = frozenset(( + "Architecture", + "Breaks", + "Conflicts", + "Depends", + "Essential", + "Multi-Arch", + "Package", + "Pre-Depends", + "Provides", + "Version", +)) + +srcdepfields = frozenset(( + "Build-Conflicts", + "Build-Conflicts-Arch", + "Build-Depends", + "Build-Depends-Arch", +)) +srcfields = srcdepfields.union(( + "Architecture", + "Package", + "Version", +)) + +bad_foreign_packages = frozenset(( + "flex-old", # cannot execute /usr/bin/flex + "icmake", # cannot execute /usr/bin/icmake, build system + "jam", # cannot execute /usr/bin/jam, build system + "libtool-bin", # #836123 + "python2.7-minimal", # fails postinst + "python3.6-minimal", # fails postinst + "python3.7-minimal", # fails postinst + "swi-prolog-nox", # fails postinst + "xrdp", # fails postinst + "libgvc6", # fails postinst +)) + +def strip_dict(dct, keepfields): + keys = set(dct.keys()) + keys.difference_update(keepfields) + for k in keys: + del dct[k] + +def strip_alternatvies(dct, fields): + for f in fields: + try: + value = dct[f] + except KeyError: + continue + dct[f] = ",".join(dep.split("|", 1)[0] + for dep in value.split(",")) + +def latest_versions(pkgs): + packages = {} + for p in pkgs: + name = p["Package"] + try: + if version_compare(packages[name]["Version"], p["Version"]) > 0: + continue + except KeyError: + pass + packages[name] = p + return (p for p in packages.values() + if "Package" in p and not "Negative-Entry" in p) + +def make_binary_list_build(mirror, arch): + for p in parse_deb822(mirror.fetch_binaries(BUILD_ARCH)): + if p["Package"].startswith("crossbuild-essential-"): + if p["Package"] != "crossbuild-essential-" + arch: + continue + p["Depends"] += ", libc-dev:%s, libstdc++-dev:%s" % (arch, arch) + strip_dict(p, binfields) + yield p + +def make_binary_list_host(mirror, arch): + for p in parse_deb822(mirror.fetch_binaries(arch)): + if p["Architecture"] == "all": + continue + if p.get("Multi-Arch") == "foreign": + continue + if p.get("Essential") == "yes": + continue + if p["Package"] in bad_foreign_packages: + continue + strip_dict(p, binfields) + yield p + +def make_binary_list(mirror, arch): + return itertools.chain(make_binary_list_build(mirror, arch), + make_binary_list_host(mirror, arch)) + +def make_source_list(mirror, arch): + for p in parse_deb822(mirror.fetch_sources()): + if p.get("Extra-Source-Only") == "yes": + continue + if any(arch_match(arch, pattern) + for pattern in p["Architecture"].split()): + strip_dict(p, srcfields) + strip_alternatvies(p, srcdepfields) + yield p + else: + # dummy entry preventing older matching versions + yield {"Package": p["Package"], "Version": p["Version"], + "Negative-Entry": "yes"} + +def check_bdsat(mirror, arch): + cmd = [ + "--deb-native-arch=" + BUILD_ARCH, + "--deb-host-arch=" + arch, + "--deb-drop-b-d-indep", + "--deb-profiles=" + ",".join(PROFILES), + "--successes", + "--failures", + "--explain", + "--explain-minimal", + "--deb-emulate-sbuild", + ] + + with tempfile.NamedTemporaryFile("w", encoding="utf8") as bintmp, \ + tempfile.NamedTemporaryFile("w", encoding="utf8") as srctmp: + for p in make_binary_list(mirror, arch): + bintmp.write(serialize_deb822(p)) + bintmp.flush() + cmd.append(bintmp.name) + + for p in latest_versions(make_source_list(mirror, arch)): + srctmp.write(serialize_deb822(p)) + srctmp.flush() + cmd.append(srctmp.name) + + dose_result = call_dose_builddebcheck(cmd) + next(dose_result) # skip header + for d in dose_result: + if d["status"] == "ok": + yield (d["package"], d["version"], True, None) + else: + r = d["reasons"][0] + if "missing" in r: + reason = "missing %s" % r["missing"]["pkg"]["unsat-dependency"].split()[0].split(":", 1)[0] + elif "conflict" in r: + r = r["conflict"]["pkg1"]["unsat-conflict"] + reason = "skew " if ' (!= ' in r else "conflict " + reason += r.split()[0].split(':', 1)[0] + else: + assert False + yield (d["package"], d["version"], False, reason) + +def update_depcheck(mirror, db, architecture): + now = datetime.datetime.utcnow() + mirror.update_release() + state = {} + for source, version, satisfiable, reason in check_bdsat(mirror, architecture): + state[source] = (version, satisfiable, reason) + with contextlib.closing(db.cursor()) as cur: + cur.execute("BEGIN;") + cur.execute("SELECT source, version, satisfiable, reason FROM depstate WHERE architecture = ?;", + (architecture,)) + for source, version, satisfiable, reason in list(cur.fetchall()): + if state.get(source) == (version, satisfiable, reason): + del state[source] + else: + cur.execute("DELETE FROM depstate WHERE source = ? AND version = ? AND architecture = ?;", + (source, version, architecture)) + cur.executemany("INSERT INTO depstate (source, architecture, version, satisfiable, reason) VALUES (?, ?, ?, ?, ?);", + ((source, architecture, version, satisfiable, reason) + for source, (version, satisfiable, reason) in state.items())) + cur.execute("UPDATE depcheck SET releasetime = ?, updatetime = ?, giveback = 0 WHERE architecture = ?", + (mirror.releasetime, now, architecture)) + db.commit() + +def main(): + mirror = DebianMirror(MIRROR) + mirror.update_release() + db = sqlite3.connect("db", detect_types=sqlite3.PARSE_DECLTYPES) + cur = db.cursor() + cur.execute("SELECT architecture, releasetime, updatetime, giveback FROM depcheck;") + lastupdate = datetime.datetime.utcnow() - datetime.timedelta(hours=6) + for architecture, releasetime, updatetime, giveback in list(cur.fetchall()): + if giveback or updatetime < lastupdate or releasetime < mirror.releasetime: + print("update %s" % architecture) + update_depcheck(mirror, db, architecture) + +if __name__ == "__main__": + main() diff --git a/schema.sql b/schema.sql new file mode 100644 index 0000000..9a19739 --- /dev/null +++ b/schema.sql @@ -0,0 +1,30 @@ +CREATE TABLE depstate ( + source TEXT NOT NULL, + version TEXT NOT NULL, + architecture TEXT NOT NULL, + satisfiable BOOLEAN NOT NULL CHECK (satisfiable in (0, 1)), + reason TEXT, + UNIQUE (architecture, source, version)); + +CREATE TABLE depcheck ( + architecture TEXT NOT NULL UNIQUE, + releasetime TIMESTAMP NOT NULL, + updatetime TIMESTAMP NOT NULL, + giveback BOOLEAN NOT NULL CHECK (giveback in (0, 1))); +INSERT INTO depcheck (architecture, releasetime, updatetime, giveback) VALUES + ("arm64", "2000-01-01 00:00:00", "2000-01-01 00:00:00", 1), + ("armel", "2000-01-01 00:00:00", "2000-01-01 00:00:00", 1), + ("armhf", "2000-01-01 00:00:00", "2000-01-01 00:00:00", 1), + ("mips", "2000-01-01 00:00:00", "2000-01-01 00:00:00", 1), + ("mips64el", "2000-01-01 00:00:00", "2000-01-01 00:00:00", 1), + ("mipsel", "2000-01-01 00:00:00", "2000-01-01 00:00:00", 1), + ("ppc64el", "2000-01-01 00:00:00", "2000-01-01 00:00:00", 1), + ("s390x", "2000-01-01 00:00:00", "2000-01-01 00:00:00", 1); + +CREATE TABLE builds ( + source TEXT NOT NULL, + version TEXT NOT NULL, + architecture TEXT NOT NULL, + success BOOLEAN NOT NULL CHECK (success in (0, 1)), + starttime TIMESTAMP NOT NULL, + filename TEXT NOT NULL); |