drop mips as a release architecture
[~helmut/crossqa.git] / depcheck.py
1 #!/usr/bin/python3
2 # SPDX-License-Identifier: GPL-2.0+
3
4 import argparse
5 import collections
6 import contextlib
7 import datetime
8 import functools
9 import hashlib
10 import itertools
11 import lzma
12 import multiprocessing
13 import os.path
14 import sqlite3
15 import subprocess
16 import tempfile
17 import yaml
18
19 import apt_pkg
20 apt_pkg.init()
21 version_compare = apt_pkg.version_compare
22 import requests
23
24 from common import decompress_stream, yield_lines
25
26 BUILD_ARCH = "amd64"
27 PROFILES = frozenset(("cross", "nocheck"))
28
29 CPUEntry = collections.namedtuple('CPUEntry',
30                                   'debcpu gnucpu regex bits endianness')
31
32 TupleEntry = collections.namedtuple('TupleEntry',
33                                     'abi libc os cpu')
34
35 class Architectures:
36     @staticmethod
37     def read_table(filename):
38         with open(filename) as f:
39             for line in f:
40                 if not line.startswith("#"):
41                     yield line.split()
42
43     def __init__(self, cputable="/usr/share/dpkg/cputable",
44                  tupletable="/usr/share/dpkg/tupletable",
45                  abitable="/usr/share/dpkg/abitable"):
46         self.cputable = {}
47         self.tupletable = {}
48         self.abitable = {}
49         self.read_cputable(cputable)
50         self.read_tupletable(tupletable)
51         self.read_abitable(abitable)
52
53     def read_cputable(self, cputable):
54         self.cputable.clear()
55         for values in self.read_table(cputable):
56             values[3] = int(values[3])  # bits
57             entry = CPUEntry(*values)
58             self.cputable[entry.debcpu] = entry
59
60     def read_tupletable(self, tupletable):
61         self.tupletable.clear()
62         for debtuple, debarch in self.read_table(tupletable):
63             if '<cpu>' in debtuple:
64                 for cpu in self.cputable:
65                     entry = TupleEntry(*debtuple.replace("<cpu>", cpu)
66                                        .split("-"))
67                     self.tupletable[debarch.replace("<cpu>", cpu)] = entry
68             else:
69                 self.tupletable[debarch] = TupleEntry(*debtuple.split("-"))
70
71     def read_abitable(self, abitable):
72         self.abitable.clear()
73         for arch, bits in self.read_table(abitable):
74             bits = int(bits)
75             self.abitable[arch] = bits
76
77     def match(self, arch, pattern):
78         parts = pattern.split("-")
79         if not "any" in parts:
80             return pattern == arch
81         while len(parts) < 4:
82             parts.insert(0, "any")
83         entry = self.tupletable[arch]
84         return all(parts[i] in (entry[i], "any") for i in range(4))
85
86     def getendianness(self, arch):
87         return self.cputable[self.tupletable[arch].cpu].endianness
88
89 architectures = Architectures()
90 arch_match = architectures.match
91
92 def call_dose_builddebcheck(arguments):
93     """
94     @type arguments: [str]
95     @param arguments: command line arguments to dose-builddebcheck
96     @returns: an iterable over loaded yaml documents. The first document
97               is the header, all other documents are per-package.
98     @raises subprocess.CalledProcessError: if dose errors out
99     """
100     cmd = ["dose-builddebcheck"]
101     cmd.extend(arguments)
102
103     proc = subprocess.Popen(cmd, stdout=subprocess.PIPE)
104
105     lines = []
106     for line in proc.stdout:
107         if line.startswith(b'  '):
108             lines.append(line)
109         elif line == b' -\n':
110             yield yaml.load(b"".join(lines), Loader=yaml.CBaseLoader)
111             lines = []
112     proc.stdout.close()
113     if lines:
114         yield yaml.load(b"".join(lines), Loader=yaml.CSafeLoader)
115     if proc.wait() not in (0, 1):
116         raise subprocess.CalledProcessError(proc.returncode, cmd)
117
118 def parse_deb822(iterable):
119     """Parse an iterable of bytes into an iterable of str-dicts."""
120     mapping = {}
121     key = None
122     value = None
123     for line in yield_lines(iterable):
124         line = line.decode("utf8")
125         if line == "\n":
126             if key is not None:
127                 mapping[key] = value.strip()
128                 key = None
129             yield mapping
130             mapping = {}
131         elif key and line.startswith((" ", "\t")):
132             value += line
133         else:
134             if key is not None:
135                 mapping[key] = value.strip()
136             try:
137                 key, value = line.split(":", 1)
138             except ValueError:
139                 raise ValueError("invalid input line %r" % line)
140     if key is not None:
141         mapping[key] = value.strip()
142     if mapping:
143         yield mapping
144
145 def serialize_deb822(dct):
146     """Serialize a byte-dict into a single bytes object."""
147     return "".join(map("%s: %s\n".__mod__, dct.items())) + "\n"
148
149 class HashSumMismatch(Exception):
150     pass
151
152 def hash_check(iterable, hashobj, expected_digest):
153     """Wraps an iterable that yields bytes. It doesn't modify the sequence,
154     but on the final element it verifies that the concatenation of bytes
155     yields an expected digest value. Upon failure, the final next() results in
156     a HashSumMismatch rather than StopIteration.
157     """
158     for data in iterable:
159         hashobj.update(data)
160         yield data
161     if hashobj.hexdigest() != expected_digest:
162         raise HashSumMismatch()
163
164 def parse_date(s):
165     return datetime.datetime.strptime(s, "%a, %d %b %Y %H:%M:%S %Z")
166
167 class GPGV:
168     def __init__(self, files=("/etc/apt/trusted.gpg",),
169                  partsdir="/etc/apt/trusted.gpg.d"):
170         candidates = list(files)
171         candidates.extend(os.path.join(partsdir, e)
172                           for e in os.listdir(partsdir))
173         self.keyrings = list(filter(lambda f: os.access(f, os.R_OK),
174                                     candidates))
175
176     def verify(self, content):
177         cmdline = ["gpgv", "--quiet", "--weak-digest", "SHA1", "--output", "-"]
178         for keyring in self.keyrings:
179             cmdline.extend(("--keyring", keyring))
180         proc = subprocess.Popen(cmdline, stdin=subprocess.PIPE,
181                                 stdout=subprocess.PIPE, stderr=subprocess.PIPE)
182         stdout, _ = proc.communicate(content)
183         if proc.wait() != 0:
184             raise ValueError("signature verififcation failed")
185         return stdout
186
187 class DebianMirror:
188     hashfunc = "SHA256"
189     def __init__(self, uri, dist="sid"):
190         self.uri = uri
191         self.dist = dist
192         self.releasetime = None
193         self.byhash = None
194         self.files = {}
195
196     @staticmethod
197     def get_all_keyrings():
198         yield "/etc/apt/trusted.gpg"
199         partsdir = "/etc/apt/trusted.gpg.d"
200         try:
201             for e in os.listdir(partsdir):
202                 yield os.path.join(partsdir, e)
203         except FileNotFoundError:
204             pass
205
206     @staticmethod
207     def get_keyrings():
208         return filter(lambda f: os.access(f, os.R_OK),
209                       DebianMirror.get_all_keyrings())
210
211     def get_uri(self, filename):
212         return "%s/dists/%s/%s" % (self.uri, self.dist, filename)
213
214     def fetch_release(self):
215         resp = requests.get(self.get_uri("InRelease"))
216         resp.raise_for_status()
217         return GPGV().verify(resp.content)
218
219     def parse_release(self, content):
220         info, = list(parse_deb822([content]))
221         self.releasetime = parse_date(info["Date"])
222         valid_until = parse_date(info["Valid-Until"])
223         now = datetime.datetime.utcnow()
224         if self.releasetime > now:
225             raise ValueError("release file generated in future")
226         if valid_until < now:
227             raise ValueError("release signature expired")
228         self.byhash = info.pop("Acquire-By-Hash", "no") == "yes"
229         self.files = {}
230         for line in info[self.hashfunc].splitlines():
231             parts = line.split()
232             if not parts:
233                 continue
234             if len(parts) != 3:
235                 raise ValueError("invalid %s line %r" % (self.hashfunc, line))
236             self.files[parts[2]] = parts[0]
237
238     def update_release(self):
239         self.parse_release(self.fetch_release())
240
241     def fetch_list(self, listname):
242         if listname + ".xz" in self.files:
243             listname += ".xz"
244             wrapper = lambda i: decompress_stream(i, lzma.LZMADecompressor())
245         else:
246             wrapper = lambda i: i
247         hashvalue = self.files[listname]
248         if self.byhash:
249             listname = "%s/by-hash/%s/%s" % (os.path.dirname(listname),
250                                              self.hashfunc, hashvalue)
251         with contextlib.closing(requests.get(self.get_uri(listname),
252                                              stream=True)) as resp:
253             resp.raise_for_status()
254             it = resp.iter_content(65536)
255             it = hash_check(it, hashlib.new(self.hashfunc), hashvalue)
256             yield from wrapper(it)
257
258     def fetch_sources(self, component="main"):
259         return self.fetch_list("%s/source/Sources" % component)
260
261     def fetch_binaries(self, architecture, component="main"):
262         return self.fetch_list("%s/binary-%s/Packages" %
263                                (component, architecture))
264
265 binfields = frozenset((
266     "Architecture",
267     "Breaks",
268     "Conflicts",
269     "Depends",
270     "Essential",
271     "Multi-Arch",
272     "Package",
273     "Pre-Depends",
274     "Provides",
275     "Version",
276 ))
277
278 srcdepfields = frozenset((
279     "Build-Conflicts",
280     "Build-Conflicts-Arch",
281     "Build-Depends",
282     "Build-Depends-Arch",
283 ))
284 srcfields = srcdepfields.union((
285     "Architecture",
286     "Package",
287     "Version",
288 ))
289
290 bad_foreign_packages = frozenset((
291     "flex-old", # cannot execute /usr/bin/flex
292     "icmake", # cannot execute /usr/bin/icmake, build system
293     "jam", # cannot execute /usr/bin/jam, build system
294     "libtool-bin", # #836123
295     "python2.7-minimal", # fails postinst
296     "python3.6-minimal", # fails postinst
297     "python3.7-minimal", # fails postinst
298     "python3.8-minimal", # fails postinst
299     "python3.9-minimal", # fails postinst
300     "swi-prolog-nox", # fails postinst
301     "xrdp", # fails postinst
302     "libgvc6", # fails postinst
303 ))
304
305 def strip_dict(dct, keepfields):
306     keys = set(dct.keys())
307     keys.difference_update(keepfields)
308     for k in keys:
309         del dct[k]
310
311 def strip_alternatvies(dct, fields):
312     for f in fields:
313         try:
314             value = dct[f]
315         except KeyError:
316             continue
317         dct[f] = ",".join(dep.split("|", 1)[0]
318                           for dep in value.split(","))
319
320 def latest_versions(pkgs):
321     packages = {}
322     for p in pkgs:
323         name = p["Package"]
324         try:
325             if version_compare(packages[name]["Version"], p["Version"]) > 0:
326                 continue
327         except KeyError:
328             pass
329         packages[name] = p
330     return (p for p in packages.values()
331             if "Package" in p and not "Negative-Entry" in p)
332
333 def make_binary_list_build(mirror, arch):
334     for p in parse_deb822(mirror.fetch_binaries(BUILD_ARCH)):
335         if p["Package"].startswith("crossbuild-essential-"):
336             if p["Package"] != "crossbuild-essential-" + arch:
337                 continue
338             p["Depends"] += ", libc-dev:%s, libstdc++-dev:%s" % (arch, arch)
339         strip_dict(p, binfields)
340         yield p
341
342 def make_binary_list_host(mirror, arch):
343     for p in parse_deb822(mirror.fetch_binaries(arch)):
344         if p["Architecture"] == "all":
345             continue
346         if p.get("Multi-Arch") == "foreign":
347             continue
348         if p.get("Essential") == "yes":
349             continue
350         if p["Package"] in bad_foreign_packages:
351             continue
352         strip_dict(p, binfields)
353         yield p
354
355 def make_binary_list(mirror, arch):
356     return itertools.chain(make_binary_list_build(mirror, arch),
357                            make_binary_list_host(mirror, arch))
358
359 def make_source_list(mirror, arch):
360     for p in parse_deb822(mirror.fetch_sources()):
361         if p.get("Extra-Source-Only") == "yes":
362             continue
363         if any(arch_match(arch, pattern)
364                for pattern in p["Architecture"].split()):
365             strip_dict(p, srcfields)
366             strip_alternatvies(p, srcdepfields)
367             yield p
368         else:
369             # dummy entry preventing older matching versions
370             yield {"Package": p["Package"], "Version": p["Version"],
371                    "Negative-Entry": "yes"}
372
373 def check_bdsat(mirror, arch):
374     cmd = [
375         "--deb-native-arch=" + BUILD_ARCH,
376         "--deb-host-arch=" + arch,
377         "--deb-drop-b-d-indep",
378         "--deb-profiles=" + ",".join(PROFILES),
379         "--successes",
380         "--failures",
381         "--explain",
382         "--explain-minimal",
383         "--deb-emulate-sbuild",
384     ]
385
386     result = {}
387     with tempfile.NamedTemporaryFile("w", encoding="utf8") as bintmp, \
388             tempfile.NamedTemporaryFile("w", encoding="utf8") as srctmp:
389         for p in make_binary_list(mirror, arch):
390             bintmp.write(serialize_deb822(p))
391         bintmp.flush()
392         cmd.append(bintmp.name)
393
394         for p in latest_versions(make_source_list(mirror, arch)):
395             srctmp.write(serialize_deb822(p))
396         srctmp.flush()
397         cmd.append(srctmp.name)
398
399         dose_result = call_dose_builddebcheck(cmd)
400         next(dose_result) # skip header
401         for d in dose_result:
402             reason = None
403             if d["status"] != "ok":
404                 r = d["reasons"][0]
405                 if "missing" in r:
406                     reason = "missing %s" % r["missing"]["pkg"]["unsat-dependency"].split()[0].split(":", 1)[0]
407                 elif "conflict" in r:
408                     r = r["conflict"]["pkg1"]["unsat-conflict"]
409                     reason = "skew " if ' (!= ' in r else "conflict "
410                     reason += r.split()[0].split(':', 1)[0]
411                 else:
412                     assert False
413             result[d["package"]] = (d["version"], reason)
414     return result
415
416
417 def update_depcheck(mirror, db, updatetime, architecture, state):
418     with contextlib.closing(db.cursor()) as cur:
419         cur.execute("BEGIN;")
420         cur.execute("SELECT source, version, satisfiable, reason FROM depstate WHERE architecture = ?;",
421                     (architecture,))
422         for source, version, satisfiable, reason in list(cur.fetchall()):
423             if satisfiable == (reason is None) and \
424                state.get(source) == (version, reason):
425                 del state[source]
426             else:
427                 cur.execute("DELETE FROM depstate WHERE source = ? AND version = ? AND architecture = ?;",
428                             (source, version, architecture))
429         cur.executemany("INSERT INTO depstate (source, architecture, version, satisfiable, reason) VALUES (?, ?, ?, ?, ?);",
430                         ((source, architecture, version, reason is None,
431                           reason)
432                          for source, (version, reason) in state.items()))
433         cur.execute("UPDATE depcheck SET releasetime = ?, updatetime = ?, giveback = 0 WHERE architecture = ?",
434                     (mirror.releasetime, updatetime, architecture))
435     db.commit()
436
437
438 def main_docheck(mirror, architecture):
439     return (architecture, check_bdsat(mirror, architecture))
440
441
442 class SequentialPool:
443     """Sequential variant of multiprocessing.Pool for debugging."""
444     def __enter__(self):
445         return self
446     def __exit__(self, *args):
447         pass
448     def close(self):
449         pass
450     def join(self):
451         pass
452     imap_unordered = map
453
454 def main():
455     argp = argparse.ArgumentParser()
456     argp.add_argument('-m', '--mirror',
457                       default='http://deb.debian.org/debian',
458                       help="debian mirror to use")
459     argp.add_argument('-p', '--parallel', action="store_true",
460                       help="enable parallel checking")
461     args = argp.parse_args()
462     mirror = DebianMirror(args.mirror)
463     mirror.update_release()
464     db = sqlite3.connect("db")
465     cur = db.cursor()
466     cur.execute("SELECT architecture FROM depcheck WHERE releasetime < ?;",
467                 (mirror.releasetime,))
468     archs = set(row[0] for row in cur.fetchall())
469     if not archs:
470         return
471     print("checking %s" % " ".join(sorted(archs)))
472     now = datetime.datetime.utcnow().replace(microsecond=0)
473     with multiprocessing.Pool() if args.parallel else SequentialPool() as pool:
474         docheck = functools.partial(main_docheck, mirror)
475         try:
476             for architecture, state in pool.imap_unordered(docheck, archs):
477                 print("update %s" % architecture)
478                 update_depcheck(mirror, db, now, architecture, state)
479         finally:
480             pool.close()
481             pool.join()
482
483 if __name__ == "__main__":
484     main()