add build architecture to schema of building table
[~helmut/crossqa.git] / depcheck.py
1 #!/usr/bin/python3
2 # SPDX-License-Identifier: GPL-2.0+
3
4 import argparse
5 import collections
6 import contextlib
7 import datetime
8 import functools
9 import hashlib
10 import itertools
11 import lzma
12 import multiprocessing
13 import os.path
14 import sqlite3
15 import subprocess
16 import tempfile
17 import yaml
18
19 import apt_pkg
20 apt_pkg.init()
21 version_compare = apt_pkg.version_compare
22 import requests
23
24 from common import decompress_stream, yield_lines
25
26 BUILD_ARCH = "amd64"
27 PROFILES = frozenset(("cross", "nocheck"))
28
29 CPUEntry = collections.namedtuple('CPUEntry',
30                                   'debcpu gnucpu regex bits endianness')
31
32 TupleEntry = collections.namedtuple('TupleEntry',
33                                     'abi libc os cpu')
34
35 class Architectures:
36     @staticmethod
37     def read_table(filename):
38         with open(filename) as f:
39             for line in f:
40                 if not line.startswith("#"):
41                     yield line.split()
42
43     def __init__(self, cputable="/usr/share/dpkg/cputable",
44                  tupletable="/usr/share/dpkg/tupletable",
45                  abitable="/usr/share/dpkg/abitable"):
46         self.cputable = {}
47         self.tupletable = {}
48         self.abitable = {}
49         self.read_cputable(cputable)
50         self.read_tupletable(tupletable)
51         self.read_abitable(abitable)
52
53     def read_cputable(self, cputable):
54         self.cputable.clear()
55         for values in self.read_table(cputable):
56             values[3] = int(values[3])  # bits
57             entry = CPUEntry(*values)
58             self.cputable[entry.debcpu] = entry
59
60     def read_tupletable(self, tupletable):
61         self.tupletable.clear()
62         for debtuple, debarch in self.read_table(tupletable):
63             if '<cpu>' in debtuple:
64                 for cpu in self.cputable:
65                     entry = TupleEntry(*debtuple.replace("<cpu>", cpu)
66                                        .split("-"))
67                     self.tupletable[debarch.replace("<cpu>", cpu)] = entry
68             else:
69                 self.tupletable[debarch] = TupleEntry(*debtuple.split("-"))
70
71     def read_abitable(self, abitable):
72         self.abitable.clear()
73         for arch, bits in self.read_table(abitable):
74             bits = int(bits)
75             self.abitable[arch] = bits
76
77     def match(self, arch, pattern):
78         parts = pattern.split("-")
79         if not "any" in parts:
80             return pattern == arch
81         while len(parts) < 4:
82             parts.insert(0, "any")
83         entry = self.tupletable[arch]
84         return all(parts[i] in (entry[i], "any") for i in range(4))
85
86     def getendianness(self, arch):
87         return self.cputable[self.tupletable[arch].cpu].endianness
88
89 architectures = Architectures()
90 arch_match = architectures.match
91
92 def call_dose_builddebcheck(arguments):
93     """
94     @type arguments: [str]
95     @param arguments: command line arguments to dose-builddebcheck
96     @returns: an iterable over loaded yaml documents. The first document
97               is the header, all other documents are per-package.
98     @raises subprocess.CalledProcessError: if dose errors out
99     """
100     cmd = ["dose-builddebcheck"]
101     cmd.extend(arguments)
102
103     proc = subprocess.Popen(cmd, stdout=subprocess.PIPE)
104
105     lines = []
106     for line in proc.stdout:
107         if line.startswith(b'  '):
108             lines.append(line)
109         elif line == b' -\n':
110             yield yaml.load(b"".join(lines), Loader=yaml.CBaseLoader)
111             lines = []
112     proc.stdout.close()
113     if lines:
114         yield yaml.load(b"".join(lines), Loader=yaml.CSafeLoader)
115     if proc.wait() not in (0, 1):
116         raise subprocess.CalledProcessError(proc.returncode, cmd)
117
118 def parse_deb822(iterable):
119     """Parse an iterable of bytes into an iterable of str-dicts."""
120     mapping = {}
121     key = None
122     value = None
123     for line in yield_lines(iterable):
124         line = line.decode("utf8")
125         if line == "\n":
126             if key is not None:
127                 mapping[key] = value.strip()
128                 key = None
129             yield mapping
130             mapping = {}
131         elif key and line.startswith((" ", "\t")):
132             value += line
133         else:
134             if key is not None:
135                 mapping[key] = value.strip()
136             try:
137                 key, value = line.split(":", 1)
138             except ValueError:
139                 raise ValueError("invalid input line %r" % line)
140     if key is not None:
141         mapping[key] = value.strip()
142     if mapping:
143         yield mapping
144
145 def serialize_deb822(dct):
146     """Serialize a byte-dict into a single bytes object."""
147     return "".join(map("%s: %s\n".__mod__, dct.items())) + "\n"
148
149 class HashSumMismatch(Exception):
150     pass
151
152 def hash_check(iterable, hashobj, expected_digest):
153     """Wraps an iterable that yields bytes. It doesn't modify the sequence,
154     but on the final element it verifies that the concatenation of bytes
155     yields an expected digest value. Upon failure, the final next() results in
156     a HashSumMismatch rather than StopIteration.
157     """
158     for data in iterable:
159         hashobj.update(data)
160         yield data
161     if hashobj.hexdigest() != expected_digest:
162         raise HashSumMismatch()
163
164 def parse_date(s):
165     return datetime.datetime.strptime(s, "%a, %d %b %Y %H:%M:%S %Z")
166
167 class GPGV:
168     def __init__(self, files=("/etc/apt/trusted.gpg",),
169                  partsdir="/etc/apt/trusted.gpg.d"):
170         candidates = list(files)
171         candidates.extend(os.path.join(partsdir, e)
172                           for e in os.listdir(partsdir))
173         self.keyrings = list(filter(lambda f: os.access(f, os.R_OK),
174                                     candidates))
175
176     def verify(self, content):
177         cmdline = ["gpgv", "--quiet", "--weak-digest", "SHA1", "--output", "-"]
178         for keyring in self.keyrings:
179             cmdline.extend(("--keyring", keyring))
180         proc = subprocess.Popen(cmdline, stdin=subprocess.PIPE,
181                                 stdout=subprocess.PIPE, stderr=subprocess.PIPE)
182         stdout, _ = proc.communicate(content)
183         if proc.wait() != 0:
184             raise ValueError("signature verififcation failed")
185         return stdout
186
187 class DebianMirror:
188     hashfunc = "SHA256"
189     def __init__(self, uri, dist="sid"):
190         self.uri = uri
191         self.dist = dist
192         self.releasetime = None
193         self.byhash = None
194         self.files = {}
195
196     @staticmethod
197     def get_all_keyrings():
198         yield "/etc/apt/trusted.gpg"
199         partsdir = "/etc/apt/trusted.gpg.d"
200         try:
201             for e in os.listdir(partsdir):
202                 yield os.path.join(partsdir, e)
203         except FileNotFoundError:
204             pass
205
206     @staticmethod
207     def get_keyrings():
208         return filter(lambda f: os.access(f, os.R_OK),
209                       DebianMirror.get_all_keyrings())
210
211     def get_uri(self, filename):
212         return "%s/dists/%s/%s" % (self.uri, self.dist, filename)
213
214     def fetch_release(self):
215         resp = requests.get(self.get_uri("InRelease"))
216         resp.raise_for_status()
217         return GPGV().verify(resp.content)
218
219     def parse_release(self, content):
220         info, = list(parse_deb822([content]))
221         self.releasetime = parse_date(info["Date"])
222         valid_until = parse_date(info["Valid-Until"])
223         now = datetime.datetime.utcnow()
224         if self.releasetime > now:
225             raise ValueError("release file generated in future")
226         if valid_until < now:
227             raise ValueError("release signature expired")
228         self.byhash = info.pop("Acquire-By-Hash", "no") == "yes"
229         self.files = {}
230         for line in info[self.hashfunc].splitlines():
231             parts = line.split()
232             if not parts:
233                 continue
234             if len(parts) != 3:
235                 raise ValueError("invalid %s line %r" % (self.hashfunc, line))
236             self.files[parts[2]] = parts[0]
237
238     def update_release(self):
239         self.parse_release(self.fetch_release())
240
241     def fetch_list(self, listname):
242         if listname + ".xz" in self.files:
243             listname += ".xz"
244             wrapper = lambda i: decompress_stream(i, lzma.LZMADecompressor())
245         else:
246             wrapper = lambda i: i
247         hashvalue = self.files[listname]
248         if self.byhash:
249             listname = "%s/by-hash/%s/%s" % (os.path.dirname(listname),
250                                              self.hashfunc, hashvalue)
251         with contextlib.closing(requests.get(self.get_uri(listname),
252                                              stream=True)) as resp:
253             resp.raise_for_status()
254             it = resp.iter_content(65536)
255             it = hash_check(it, hashlib.new(self.hashfunc), hashvalue)
256             yield from wrapper(it)
257
258     def fetch_sources(self, component="main"):
259         return self.fetch_list("%s/source/Sources" % component)
260
261     def fetch_binaries(self, architecture, component="main"):
262         return self.fetch_list("%s/binary-%s/Packages" %
263                                (component, architecture))
264
265 binfields = frozenset((
266     "Architecture",
267     "Breaks",
268     "Conflicts",
269     "Depends",
270     "Essential",
271     "Multi-Arch",
272     "Package",
273     "Pre-Depends",
274     "Provides",
275     "Version",
276 ))
277
278 srcdepfields = frozenset((
279     "Build-Conflicts",
280     "Build-Conflicts-Arch",
281     "Build-Depends",
282     "Build-Depends-Arch",
283 ))
284 srcfields = srcdepfields.union((
285     "Architecture",
286     "Package",
287     "Version",
288 ))
289
290 bad_foreign_packages = frozenset((
291     "flex-old", # cannot execute /usr/bin/flex
292     "icmake", # cannot execute /usr/bin/icmake, build system
293     "jam", # cannot execute /usr/bin/jam, build system
294     "libtool-bin", # #836123
295     "python2.7-minimal", # fails postinst
296     "python3.6-minimal", # fails postinst
297     "python3.7-minimal", # fails postinst
298     "python3.8-minimal", # fails postinst
299     "python3.9-minimal", # fails postinst
300     "swi-prolog-nox", # fails postinst
301     "xrdp", # fails postinst
302     "libgvc6", # fails postinst
303 ))
304
305 def strip_dict(dct, keepfields):
306     keys = set(dct.keys())
307     keys.difference_update(keepfields)
308     for k in keys:
309         del dct[k]
310
311 def strip_alternatvies(dct, fields):
312     for f in fields:
313         try:
314             value = dct[f]
315         except KeyError:
316             continue
317         dct[f] = ",".join(dep.split("|", 1)[0]
318                           for dep in value.split(","))
319
320 def latest_versions(pkgs):
321     packages = {}
322     for p in pkgs:
323         name = p["Package"]
324         try:
325             if version_compare(packages[name]["Version"], p["Version"]) > 0:
326                 continue
327         except KeyError:
328             pass
329         packages[name] = p
330     return (p for p in packages.values()
331             if "Package" in p and not "Negative-Entry" in p)
332
333 def make_binary_list_build(mirror, buildarch, hostarch):
334     for p in parse_deb822(mirror.fetch_binaries(buildarch)):
335         if p["Package"].startswith("crossbuild-essential-"):
336             if p["Package"] != "crossbuild-essential-" + hostarch:
337                 continue
338             p["Depends"] += ", libc-dev:%s, libstdc++-dev:%s" % \
339                     (hostarch, hostarch)
340         strip_dict(p, binfields)
341         yield p
342
343 def make_binary_list_host(mirror, hostarch):
344     for p in parse_deb822(mirror.fetch_binaries(hostarch)):
345         if p["Architecture"] == "all":
346             continue
347         if p.get("Multi-Arch") == "foreign":
348             continue
349         if p.get("Essential") == "yes":
350             continue
351         if p["Package"] in bad_foreign_packages:
352             continue
353         strip_dict(p, binfields)
354         yield p
355
356 def make_binary_list(mirror, buildarch, hostarch):
357     return itertools.chain(make_binary_list_build(mirror, buildarch, hostarch),
358                            make_binary_list_host(mirror, hostarch))
359
360 def make_source_list(mirror, hostarch):
361     for p in parse_deb822(mirror.fetch_sources()):
362         if p.get("Extra-Source-Only") == "yes":
363             continue
364         if any(arch_match(hostarch, pattern)
365                for pattern in p["Architecture"].split()):
366             strip_dict(p, srcfields)
367             strip_alternatvies(p, srcdepfields)
368             yield p
369         else:
370             # dummy entry preventing older matching versions
371             yield {"Package": p["Package"], "Version": p["Version"],
372                    "Negative-Entry": "yes"}
373
374 def check_bdsat(mirror, buildarch, hostarch):
375     cmd = [
376         "--deb-native-arch=" + buildarch,
377         "--deb-host-arch=" + hostarch,
378         "--deb-drop-b-d-indep",
379         "--deb-profiles=" + ",".join(PROFILES),
380         "--successes",
381         "--failures",
382         "--explain",
383         "--explain-minimal",
384         "--deb-emulate-sbuild",
385     ]
386
387     result = {}
388     with tempfile.NamedTemporaryFile("w", encoding="utf8") as bintmp, \
389             tempfile.NamedTemporaryFile("w", encoding="utf8") as srctmp:
390         for p in make_binary_list(mirror, buildarch, hostarch):
391             bintmp.write(serialize_deb822(p))
392         bintmp.flush()
393         cmd.append(bintmp.name)
394
395         for p in latest_versions(make_source_list(mirror, hostarch)):
396             srctmp.write(serialize_deb822(p))
397         srctmp.flush()
398         cmd.append(srctmp.name)
399
400         dose_result = call_dose_builddebcheck(cmd)
401         next(dose_result) # skip header
402         for d in dose_result:
403             reason = None
404             if d["status"] != "ok":
405                 r = d["reasons"][0]
406                 if "missing" in r:
407                     reason = "missing %s" % r["missing"]["pkg"]["unsat-dependency"].split()[0].split(":", 1)[0]
408                 elif "conflict" in r:
409                     r = r["conflict"]["pkg1"]["unsat-conflict"]
410                     reason = "skew " if ' (!= ' in r else "conflict "
411                     reason += r.split()[0].split(':', 1)[0]
412                 else:
413                     assert False
414             result[d["package"]] = (d["version"], reason)
415     return result
416
417
418 def update_depcheck(mirror, db, updatetime, architecture, state):
419     with contextlib.closing(db.cursor()) as cur:
420         cur.execute("BEGIN;")
421         cur.execute("SELECT source, version, satisfiable, reason FROM depstate WHERE architecture = ?;",
422                     (architecture,))
423         for source, version, satisfiable, reason in list(cur.fetchall()):
424             if satisfiable == (reason is None) and \
425                state.get(source) == (version, reason):
426                 del state[source]
427             else:
428                 cur.execute("DELETE FROM depstate WHERE source = ? AND version = ? AND architecture = ?;",
429                             (source, version, architecture))
430         cur.executemany("INSERT INTO depstate (source, architecture, version, satisfiable, reason) VALUES (?, ?, ?, ?, ?);",
431                         ((source, architecture, version, reason is None,
432                           reason)
433                          for source, (version, reason) in state.items()))
434         cur.execute("UPDATE depcheck SET releasetime = ?, updatetime = ?, giveback = 0 WHERE architecture = ?",
435                     (mirror.releasetime, updatetime, architecture))
436     db.commit()
437
438
439 def main_docheck(mirror, architecture):
440     return (architecture, check_bdsat(mirror, BUILD_ARCH, architecture))
441
442
443 class SequentialPool:
444     """Sequential variant of multiprocessing.Pool for debugging."""
445     def __enter__(self):
446         return self
447     def __exit__(self, *args):
448         pass
449     def close(self):
450         pass
451     def join(self):
452         pass
453     imap_unordered = map
454
455 def main():
456     argp = argparse.ArgumentParser()
457     argp.add_argument('-m', '--mirror',
458                       default='http://deb.debian.org/debian',
459                       help="debian mirror to use")
460     argp.add_argument('-p', '--parallel', action="store_true",
461                       help="enable parallel checking")
462     args = argp.parse_args()
463     mirror = DebianMirror(args.mirror)
464     mirror.update_release()
465     db = sqlite3.connect("db")
466     cur = db.cursor()
467     cur.execute("SELECT architecture FROM depcheck WHERE releasetime < ?;",
468                 (mirror.releasetime,))
469     archs = set(row[0] for row in cur.fetchall())
470     if not archs:
471         return
472     print("checking %s" % " ".join(sorted(archs)))
473     now = datetime.datetime.utcnow().replace(microsecond=0)
474     with multiprocessing.Pool() if args.parallel else SequentialPool() as pool:
475         docheck = functools.partial(main_docheck, mirror)
476         try:
477             for architecture, state in pool.imap_unordered(docheck, archs):
478                 print("update %s" % architecture)
479                 update_depcheck(mirror, db, now, architecture, state)
480         finally:
481             pool.close()
482             pool.join()
483
484 if __name__ == "__main__":
485     main()