simplify common functions
[~helmut/crossqa.git] / depcheck.py
1 #!/usr/bin/python3
2 # SPDX-License-Identifier: GPL-2.0+
3
4 import argparse
5 import collections
6 import contextlib
7 import datetime
8 import functools
9 import hashlib
10 import itertools
11 import lzma
12 import multiprocessing
13 import os.path
14 import sqlite3
15 import subprocess
16 import tempfile
17 import yaml
18
19 import apt_pkg
20 apt_pkg.init()
21 version_compare = apt_pkg.version_compare
22 import requests
23
24 from common import decompress_stream, yield_lines
25
26 PROFILES = frozenset(("cross", "nocheck"))
27
28 CPUEntry = collections.namedtuple('CPUEntry',
29                                   'debcpu gnucpu regex bits endianness')
30
31 TupleEntry = collections.namedtuple('TupleEntry',
32                                     'abi libc os cpu')
33
34 class Architectures:
35     @staticmethod
36     def read_table(filename):
37         with open(filename) as f:
38             for line in f:
39                 if not line.startswith("#"):
40                     yield line.split()
41
42     def __init__(self, cputable="/usr/share/dpkg/cputable",
43                  tupletable="/usr/share/dpkg/tupletable",
44                  abitable="/usr/share/dpkg/abitable"):
45         self.cputable = {}
46         self.tupletable = {}
47         self.abitable = {}
48         self.read_cputable(cputable)
49         self.read_tupletable(tupletable)
50         self.read_abitable(abitable)
51
52     def read_cputable(self, cputable):
53         self.cputable.clear()
54         for values in self.read_table(cputable):
55             values[3] = int(values[3])  # bits
56             entry = CPUEntry(*values)
57             self.cputable[entry.debcpu] = entry
58
59     def read_tupletable(self, tupletable):
60         self.tupletable.clear()
61         for debtuple, debarch in self.read_table(tupletable):
62             if '<cpu>' in debtuple:
63                 for cpu in self.cputable:
64                     entry = TupleEntry(*debtuple.replace("<cpu>", cpu)
65                                        .split("-"))
66                     self.tupletable[debarch.replace("<cpu>", cpu)] = entry
67             else:
68                 self.tupletable[debarch] = TupleEntry(*debtuple.split("-"))
69
70     def read_abitable(self, abitable):
71         self.abitable.clear()
72         for arch, bits in self.read_table(abitable):
73             self.abitable[arch] = int(bits)
74
75     def match(self, arch, pattern):
76         parts = pattern.split("-")
77         if not "any" in parts:
78             return pattern == arch
79         while len(parts) < 4:
80             parts.insert(0, "any")
81         entry = self.tupletable[arch]
82         return all(parts[i] in (entry[i], "any") for i in range(4))
83
84     def getendianness(self, arch):
85         return self.cputable[self.tupletable[arch].cpu].endianness
86
87 architectures = Architectures()
88 arch_match = architectures.match
89
90 def call_dose_builddebcheck(arguments):
91     """
92     @type arguments: [str]
93     @param arguments: command line arguments to dose-builddebcheck
94     @returns: an iterable over loaded yaml documents. The first document
95               is the header, all other documents are per-package.
96     @raises subprocess.CalledProcessError: if dose errors out
97     """
98     cmd = ["dose-builddebcheck"]
99     cmd.extend(arguments)
100
101     proc = subprocess.Popen(cmd, stdout=subprocess.PIPE)
102
103     lines = []
104     for line in proc.stdout:
105         if line.startswith(b'  '):
106             lines.append(line)
107         elif line == b' -\n':
108             yield yaml.load(b"".join(lines), Loader=yaml.CBaseLoader)
109             lines = []
110     proc.stdout.close()
111     if lines:
112         yield yaml.load(b"".join(lines), Loader=yaml.CSafeLoader)
113     if proc.wait() not in (0, 1):
114         raise subprocess.CalledProcessError(proc.returncode, cmd)
115
116 def parse_deb822(iterable):
117     """Parse an iterable of bytes into an iterable of str-dicts."""
118     mapping = {}
119     key = None
120     value = None
121     for line in yield_lines(iterable):
122         line = line.decode("utf8")
123         if line == "\n":
124             if key is not None:
125                 mapping[key] = value.strip()
126                 key = None
127             yield mapping
128             mapping = {}
129         elif key and line.startswith((" ", "\t")):
130             value += line
131         else:
132             if key is not None:
133                 mapping[key] = value.strip()
134             try:
135                 key, value = line.split(":", 1)
136             except ValueError:
137                 raise ValueError("invalid input line %r" % line)
138     if key is not None:
139         mapping[key] = value.strip()
140     if mapping:
141         yield mapping
142
143 def serialize_deb822(dct):
144     """Serialize a byte-dict into a single bytes object."""
145     return "".join(map("%s: %s\n".__mod__, dct.items())) + "\n"
146
147 class HashSumMismatch(Exception):
148     pass
149
150 def hash_check(iterable, hashobj, expected_digest):
151     """Wraps an iterable that yields bytes. It doesn't modify the sequence,
152     but on the final element it verifies that the concatenation of bytes
153     yields an expected digest value. Upon failure, the final next() results in
154     a HashSumMismatch rather than StopIteration.
155     """
156     for data in iterable:
157         hashobj.update(data)
158         yield data
159     if hashobj.hexdigest() != expected_digest:
160         raise HashSumMismatch()
161
162 def parse_date(s):
163     return datetime.datetime.strptime(s, "%a, %d %b %Y %H:%M:%S %Z")
164
165 class GPGV:
166     def __init__(self, files=("/etc/apt/trusted.gpg",),
167                  partsdir="/etc/apt/trusted.gpg.d"):
168         candidates = list(files)
169         candidates.extend(os.path.join(partsdir, e)
170                           for e in os.listdir(partsdir))
171         self.keyrings = list(filter(lambda f: os.access(f, os.R_OK),
172                                     candidates))
173
174     def verify(self, content):
175         cmdline = ["gpgv", "--quiet", "--weak-digest", "SHA1", "--output", "-"]
176         for keyring in self.keyrings:
177             cmdline.extend(("--keyring", keyring))
178         proc = subprocess.Popen(cmdline, stdin=subprocess.PIPE,
179                                 stdout=subprocess.PIPE, stderr=subprocess.PIPE)
180         stdout, _ = proc.communicate(content)
181         if proc.wait() != 0:
182             raise ValueError("signature verififcation failed")
183         return stdout
184
185 class DebianMirror:
186     hashfunc = "SHA256"
187     def __init__(self, uri, dist="sid"):
188         self.uri = uri
189         self.dist = dist
190         self.releasetime = None
191         self.byhash = None
192         self.files = {}
193
194     def get_uri(self, filename):
195         return "%s/dists/%s/%s" % (self.uri, self.dist, filename)
196
197     def fetch_release(self):
198         resp = requests.get(self.get_uri("InRelease"))
199         resp.raise_for_status()
200         return GPGV().verify(resp.content)
201
202     def parse_release(self, content):
203         info, = list(parse_deb822([content]))
204         self.releasetime = parse_date(info["Date"])
205         valid_until = parse_date(info["Valid-Until"])
206         now = datetime.datetime.utcnow()
207         if self.releasetime > now:
208             raise ValueError("release file generated in future")
209         if valid_until < now:
210             raise ValueError("release signature expired")
211         self.byhash = info.pop("Acquire-By-Hash", "no") == "yes"
212         self.files = {}
213         for line in info[self.hashfunc].splitlines():
214             parts = line.split()
215             if not parts:
216                 continue
217             if len(parts) != 3:
218                 raise ValueError("invalid %s line %r" % (self.hashfunc, line))
219             self.files[parts[2]] = parts[0]
220
221     def update_release(self):
222         self.parse_release(self.fetch_release())
223
224     def fetch_list(self, listname):
225         if listname + ".xz" in self.files:
226             listname += ".xz"
227             wrapper = lambda i: decompress_stream(i, lzma.LZMADecompressor())
228         else:
229             wrapper = lambda i: i
230         hashvalue = self.files[listname]
231         if self.byhash:
232             listname = "%s/by-hash/%s/%s" % (os.path.dirname(listname),
233                                              self.hashfunc, hashvalue)
234         with contextlib.closing(requests.get(self.get_uri(listname),
235                                              stream=True)) as resp:
236             resp.raise_for_status()
237             it = resp.iter_content(65536)
238             it = hash_check(it, hashlib.new(self.hashfunc), hashvalue)
239             yield from wrapper(it)
240
241     def fetch_sources(self, component="main"):
242         return self.fetch_list("%s/source/Sources" % component)
243
244     def fetch_binaries(self, architecture, component="main"):
245         return self.fetch_list("%s/binary-%s/Packages" %
246                                (component, architecture))
247
248 binfields = frozenset((
249     "Architecture",
250     "Breaks",
251     "Conflicts",
252     "Depends",
253     "Essential",
254     "Multi-Arch",
255     "Package",
256     "Pre-Depends",
257     "Provides",
258     "Version",
259 ))
260
261 srcdepfields = frozenset((
262     "Build-Conflicts",
263     "Build-Conflicts-Arch",
264     "Build-Depends",
265     "Build-Depends-Arch",
266 ))
267 srcfields = srcdepfields.union((
268     "Architecture",
269     "Package",
270     "Version",
271 ))
272
273 bad_foreign_packages = frozenset((
274     "flex-old", # cannot execute /usr/bin/flex
275     "icmake", # cannot execute /usr/bin/icmake, build system
276     "jam", # cannot execute /usr/bin/jam, build system
277     "libtool-bin", # #836123
278     "python2.7-minimal", # fails postinst
279     "python3.6-minimal", # fails postinst
280     "python3.7-minimal", # fails postinst
281     "python3.8-minimal", # fails postinst
282     "python3.9-minimal", # fails postinst
283     "swi-prolog-nox", # fails postinst
284     "xrdp", # fails postinst
285     "libgvc6", # fails postinst
286 ))
287
288 def strip_dict(dct, keepfields):
289     keys = set(dct.keys())
290     keys.difference_update(keepfields)
291     for k in keys:
292         del dct[k]
293
294 def strip_alternatvies(dct, fields):
295     for f in fields:
296         try:
297             value = dct[f]
298         except KeyError:
299             continue
300         dct[f] = ",".join(dep.split("|", 1)[0]
301                           for dep in value.split(","))
302
303 def latest_versions(pkgs):
304     packages = {}
305     for p in pkgs:
306         name = p["Package"]
307         try:
308             if version_compare(packages[name]["Version"], p["Version"]) > 0:
309                 continue
310         except KeyError:
311             pass
312         packages[name] = p
313     return (p for p in packages.values()
314             if "Package" in p and not "Negative-Entry" in p)
315
316 def make_binary_list_build(mirror, buildarch, hostarch):
317     for p in parse_deb822(mirror.fetch_binaries(buildarch)):
318         if p["Package"].startswith("crossbuild-essential-"):
319             if p["Package"] != "crossbuild-essential-" + hostarch:
320                 continue
321             p["Depends"] += ", libc-dev:%s, libstdc++-dev:%s" % \
322                     (hostarch, hostarch)
323         strip_dict(p, binfields)
324         yield p
325
326 def make_binary_list_host(mirror, hostarch):
327     for p in parse_deb822(mirror.fetch_binaries(hostarch)):
328         if p["Architecture"] == "all":
329             continue
330         if p.get("Multi-Arch") == "foreign":
331             continue
332         if p.get("Essential") == "yes":
333             continue
334         if p["Package"] in bad_foreign_packages:
335             continue
336         strip_dict(p, binfields)
337         yield p
338
339 def make_binary_list(mirror, buildarch, hostarch):
340     return itertools.chain(make_binary_list_build(mirror, buildarch, hostarch),
341                            make_binary_list_host(mirror, hostarch))
342
343 def make_source_list(mirror, hostarch):
344     for p in parse_deb822(mirror.fetch_sources()):
345         if p.get("Extra-Source-Only") == "yes":
346             continue
347         if any(arch_match(hostarch, pattern)
348                for pattern in p["Architecture"].split()):
349             strip_dict(p, srcfields)
350             strip_alternatvies(p, srcdepfields)
351             yield p
352         else:
353             # dummy entry preventing older matching versions
354             yield {"Package": p["Package"], "Version": p["Version"],
355                    "Negative-Entry": "yes"}
356
357 def check_bdsat(mirror, buildarch, hostarch):
358     cmd = [
359         "--deb-native-arch=" + buildarch,
360         "--deb-host-arch=" + hostarch,
361         "--deb-drop-b-d-indep",
362         "--deb-profiles=" + ",".join(PROFILES),
363         "--successes",
364         "--failures",
365         "--explain",
366         "--explain-minimal",
367         "--deb-emulate-sbuild",
368     ]
369
370     result = {}
371     with tempfile.NamedTemporaryFile("w", encoding="utf8") as bintmp, \
372             tempfile.NamedTemporaryFile("w", encoding="utf8") as srctmp:
373         for p in make_binary_list(mirror, buildarch, hostarch):
374             bintmp.write(serialize_deb822(p))
375         bintmp.flush()
376         cmd.append(bintmp.name)
377
378         for p in latest_versions(make_source_list(mirror, hostarch)):
379             srctmp.write(serialize_deb822(p))
380         srctmp.flush()
381         cmd.append(srctmp.name)
382
383         dose_result = call_dose_builddebcheck(cmd)
384         next(dose_result) # skip header
385         for d in dose_result:
386             reason = None
387             if d["status"] != "ok":
388                 r = d["reasons"][0]
389                 if "missing" in r:
390                     reason = "missing %s" % r["missing"]["pkg"]["unsat-dependency"].split()[0].split(":", 1)[0]
391                 elif "conflict" in r:
392                     r = r["conflict"]["pkg1"]["unsat-conflict"]
393                     reason = "skew " if ' (!= ' in r else "conflict "
394                     reason += r.split()[0].split(':', 1)[0]
395                 else:
396                     assert False
397             result[d["package"]] = (d["version"], reason)
398     return result
399
400
401 def update_depcheck(mirror, db, updatetime, buildarch, hostarch, state):
402     with contextlib.closing(db.cursor()) as cur:
403         cur.execute("BEGIN;")
404         cur.execute("""
405             SELECT source, version, satisfiable, reason FROM depstate
406                 WHERE buildarch = ? AND hostarch = ?;""",
407                     (buildarch, hostarch,))
408         for source, version, satisfiable, reason in list(cur.fetchall()):
409             if satisfiable == (reason is None) and \
410                state.get(source) == (version, reason):
411                 del state[source]
412             else:
413                 cur.execute("""
414                     DELETE FROM depstate
415                         WHERE source = ? AND version = ? AND buildarch = ?
416                             AND hostarch = ?;""",
417                             (source, version, buildarch, hostarch))
418         cur.executemany("""
419             INSERT INTO depstate (source, buildarch, hostarch, version,
420                                   satisfiable, reason)
421                 VALUES (?, ?, ?, ?, ?, ?);""",
422                         ((source, buildarch, hostarch, version, reason is None,
423                           reason)
424                          for source, (version, reason) in state.items()))
425         cur.execute("""
426             UPDATE depcheck SET releasetime = ?, updatetime = ?, giveback = 0
427                 WHERE buildarch = ? AND hostarch = ?""",
428                     (mirror.releasetime, updatetime, buildarch, hostarch))
429     db.commit()
430
431
432 def main_docheck(mirror, archpair):
433     return (*archpair, check_bdsat(mirror, *archpair))
434
435
436 class SequentialPool:
437     """Sequential variant of multiprocessing.Pool for debugging."""
438     def __enter__(self):
439         return self
440     def __exit__(self, *args):
441         pass
442     def close(self):
443         pass
444     def join(self):
445         pass
446     imap_unordered = map
447
448 def main():
449     argp = argparse.ArgumentParser()
450     argp.add_argument('-m', '--mirror',
451                       default='http://deb.debian.org/debian',
452                       help="debian mirror to use")
453     argp.add_argument('-p', '--parallel', action="store_true",
454                       help="enable parallel checking")
455     args = argp.parse_args()
456     mirror = DebianMirror(args.mirror)
457     mirror.update_release()
458     db = sqlite3.connect("db")
459     cur = db.cursor()
460     cur.execute("""
461         SELECT buildarch, hostarch FROM depcheck WHERE releasetime < ?;""",
462                 (mirror.releasetime,))
463     archpairs = set(cur.fetchall())
464     if not archpairs:
465         return
466     print("checking %s" %
467           ", ".join(sorted(map("%s -> %s".__mod__, archpairs))))
468     now = datetime.datetime.utcnow().replace(microsecond=0)
469     with multiprocessing.Pool() if args.parallel else SequentialPool() as pool:
470         docheck = functools.partial(main_docheck, mirror)
471         try:
472             for buildarch, hostarch, state in pool.imap_unordered(docheck,
473                                                                   archpairs):
474                 print("update %s -> %s" % (buildarch, hostarch))
475                 update_depcheck(mirror, db, now, buildarch, hostarch, state)
476         finally:
477             pool.close()
478             pool.join()
479
480 if __name__ == "__main__":
481     main()