webapp.py: improve parameter validation for /schedule
[~helmut/crossqa.git] / depcheck.py
1 #!/usr/bin/python3
2 # SPDX-License-Identifier: GPL-2.0+
3
4 import argparse
5 import collections
6 import contextlib
7 import datetime
8 import functools
9 import hashlib
10 import itertools
11 import lzma
12 import multiprocessing
13 import os.path
14 import sqlite3
15 import subprocess
16 import tempfile
17 import yaml
18
19 import apt_pkg
20 apt_pkg.init()
21 version_compare = apt_pkg.version_compare
22 import requests
23
24 from common import decompress_stream, yield_lines
25
26 PROFILES = frozenset(("cross", "nocheck"))
27
28 CPUEntry = collections.namedtuple('CPUEntry',
29                                   'debcpu gnucpu regex bits endianness')
30
31 TupleEntry = collections.namedtuple('TupleEntry',
32                                     'abi libc os cpu')
33
34 class Architectures:
35     @staticmethod
36     def read_table(filename):
37         with open(filename) as f:
38             for line in f:
39                 if not line.startswith("#"):
40                     yield line.split()
41
42     def __init__(self, cputable="/usr/share/dpkg/cputable",
43                  tupletable="/usr/share/dpkg/tupletable",
44                  abitable="/usr/share/dpkg/abitable"):
45         self.cputable = {}
46         self.tupletable = {}
47         self.abitable = {}
48         self.read_cputable(cputable)
49         self.read_tupletable(tupletable)
50         self.read_abitable(abitable)
51
52     def read_cputable(self, cputable):
53         self.cputable.clear()
54         for values in self.read_table(cputable):
55             values[3] = int(values[3])  # bits
56             entry = CPUEntry(*values)
57             self.cputable[entry.debcpu] = entry
58
59     def read_tupletable(self, tupletable):
60         self.tupletable.clear()
61         for debtuple, debarch in self.read_table(tupletable):
62             if '<cpu>' in debtuple:
63                 for cpu in self.cputable:
64                     entry = TupleEntry(*debtuple.replace("<cpu>", cpu)
65                                        .split("-"))
66                     self.tupletable[debarch.replace("<cpu>", cpu)] = entry
67             else:
68                 self.tupletable[debarch] = TupleEntry(*debtuple.split("-"))
69
70     def read_abitable(self, abitable):
71         self.abitable.clear()
72         for arch, bits in self.read_table(abitable):
73             bits = int(bits)
74             self.abitable[arch] = bits
75
76     def match(self, arch, pattern):
77         parts = pattern.split("-")
78         if not "any" in parts:
79             return pattern == arch
80         while len(parts) < 4:
81             parts.insert(0, "any")
82         entry = self.tupletable[arch]
83         return all(parts[i] in (entry[i], "any") for i in range(4))
84
85     def getendianness(self, arch):
86         return self.cputable[self.tupletable[arch].cpu].endianness
87
88 architectures = Architectures()
89 arch_match = architectures.match
90
91 def call_dose_builddebcheck(arguments):
92     """
93     @type arguments: [str]
94     @param arguments: command line arguments to dose-builddebcheck
95     @returns: an iterable over loaded yaml documents. The first document
96               is the header, all other documents are per-package.
97     @raises subprocess.CalledProcessError: if dose errors out
98     """
99     cmd = ["dose-builddebcheck"]
100     cmd.extend(arguments)
101
102     proc = subprocess.Popen(cmd, stdout=subprocess.PIPE)
103
104     lines = []
105     for line in proc.stdout:
106         if line.startswith(b'  '):
107             lines.append(line)
108         elif line == b' -\n':
109             yield yaml.load(b"".join(lines), Loader=yaml.CBaseLoader)
110             lines = []
111     proc.stdout.close()
112     if lines:
113         yield yaml.load(b"".join(lines), Loader=yaml.CSafeLoader)
114     if proc.wait() not in (0, 1):
115         raise subprocess.CalledProcessError(proc.returncode, cmd)
116
117 def parse_deb822(iterable):
118     """Parse an iterable of bytes into an iterable of str-dicts."""
119     mapping = {}
120     key = None
121     value = None
122     for line in yield_lines(iterable):
123         line = line.decode("utf8")
124         if line == "\n":
125             if key is not None:
126                 mapping[key] = value.strip()
127                 key = None
128             yield mapping
129             mapping = {}
130         elif key and line.startswith((" ", "\t")):
131             value += line
132         else:
133             if key is not None:
134                 mapping[key] = value.strip()
135             try:
136                 key, value = line.split(":", 1)
137             except ValueError:
138                 raise ValueError("invalid input line %r" % line)
139     if key is not None:
140         mapping[key] = value.strip()
141     if mapping:
142         yield mapping
143
144 def serialize_deb822(dct):
145     """Serialize a byte-dict into a single bytes object."""
146     return "".join(map("%s: %s\n".__mod__, dct.items())) + "\n"
147
148 class HashSumMismatch(Exception):
149     pass
150
151 def hash_check(iterable, hashobj, expected_digest):
152     """Wraps an iterable that yields bytes. It doesn't modify the sequence,
153     but on the final element it verifies that the concatenation of bytes
154     yields an expected digest value. Upon failure, the final next() results in
155     a HashSumMismatch rather than StopIteration.
156     """
157     for data in iterable:
158         hashobj.update(data)
159         yield data
160     if hashobj.hexdigest() != expected_digest:
161         raise HashSumMismatch()
162
163 def parse_date(s):
164     return datetime.datetime.strptime(s, "%a, %d %b %Y %H:%M:%S %Z")
165
166 class GPGV:
167     def __init__(self, files=("/etc/apt/trusted.gpg",),
168                  partsdir="/etc/apt/trusted.gpg.d"):
169         candidates = list(files)
170         candidates.extend(os.path.join(partsdir, e)
171                           for e in os.listdir(partsdir))
172         self.keyrings = list(filter(lambda f: os.access(f, os.R_OK),
173                                     candidates))
174
175     def verify(self, content):
176         cmdline = ["gpgv", "--quiet", "--weak-digest", "SHA1", "--output", "-"]
177         for keyring in self.keyrings:
178             cmdline.extend(("--keyring", keyring))
179         proc = subprocess.Popen(cmdline, stdin=subprocess.PIPE,
180                                 stdout=subprocess.PIPE, stderr=subprocess.PIPE)
181         stdout, _ = proc.communicate(content)
182         if proc.wait() != 0:
183             raise ValueError("signature verififcation failed")
184         return stdout
185
186 class DebianMirror:
187     hashfunc = "SHA256"
188     def __init__(self, uri, dist="sid"):
189         self.uri = uri
190         self.dist = dist
191         self.releasetime = None
192         self.byhash = None
193         self.files = {}
194
195     def get_uri(self, filename):
196         return "%s/dists/%s/%s" % (self.uri, self.dist, filename)
197
198     def fetch_release(self):
199         resp = requests.get(self.get_uri("InRelease"))
200         resp.raise_for_status()
201         return GPGV().verify(resp.content)
202
203     def parse_release(self, content):
204         info, = list(parse_deb822([content]))
205         self.releasetime = parse_date(info["Date"])
206         valid_until = parse_date(info["Valid-Until"])
207         now = datetime.datetime.utcnow()
208         if self.releasetime > now:
209             raise ValueError("release file generated in future")
210         if valid_until < now:
211             raise ValueError("release signature expired")
212         self.byhash = info.pop("Acquire-By-Hash", "no") == "yes"
213         self.files = {}
214         for line in info[self.hashfunc].splitlines():
215             parts = line.split()
216             if not parts:
217                 continue
218             if len(parts) != 3:
219                 raise ValueError("invalid %s line %r" % (self.hashfunc, line))
220             self.files[parts[2]] = parts[0]
221
222     def update_release(self):
223         self.parse_release(self.fetch_release())
224
225     def fetch_list(self, listname):
226         if listname + ".xz" in self.files:
227             listname += ".xz"
228             wrapper = lambda i: decompress_stream(i, lzma.LZMADecompressor())
229         else:
230             wrapper = lambda i: i
231         hashvalue = self.files[listname]
232         if self.byhash:
233             listname = "%s/by-hash/%s/%s" % (os.path.dirname(listname),
234                                              self.hashfunc, hashvalue)
235         with contextlib.closing(requests.get(self.get_uri(listname),
236                                              stream=True)) as resp:
237             resp.raise_for_status()
238             it = resp.iter_content(65536)
239             it = hash_check(it, hashlib.new(self.hashfunc), hashvalue)
240             yield from wrapper(it)
241
242     def fetch_sources(self, component="main"):
243         return self.fetch_list("%s/source/Sources" % component)
244
245     def fetch_binaries(self, architecture, component="main"):
246         return self.fetch_list("%s/binary-%s/Packages" %
247                                (component, architecture))
248
249 binfields = frozenset((
250     "Architecture",
251     "Breaks",
252     "Conflicts",
253     "Depends",
254     "Essential",
255     "Multi-Arch",
256     "Package",
257     "Pre-Depends",
258     "Provides",
259     "Version",
260 ))
261
262 srcdepfields = frozenset((
263     "Build-Conflicts",
264     "Build-Conflicts-Arch",
265     "Build-Depends",
266     "Build-Depends-Arch",
267 ))
268 srcfields = srcdepfields.union((
269     "Architecture",
270     "Package",
271     "Version",
272 ))
273
274 bad_foreign_packages = frozenset((
275     "flex-old", # cannot execute /usr/bin/flex
276     "icmake", # cannot execute /usr/bin/icmake, build system
277     "jam", # cannot execute /usr/bin/jam, build system
278     "libtool-bin", # #836123
279     "python2.7-minimal", # fails postinst
280     "python3.6-minimal", # fails postinst
281     "python3.7-minimal", # fails postinst
282     "python3.8-minimal", # fails postinst
283     "python3.9-minimal", # fails postinst
284     "swi-prolog-nox", # fails postinst
285     "xrdp", # fails postinst
286     "libgvc6", # fails postinst
287 ))
288
289 def strip_dict(dct, keepfields):
290     keys = set(dct.keys())
291     keys.difference_update(keepfields)
292     for k in keys:
293         del dct[k]
294
295 def strip_alternatvies(dct, fields):
296     for f in fields:
297         try:
298             value = dct[f]
299         except KeyError:
300             continue
301         dct[f] = ",".join(dep.split("|", 1)[0]
302                           for dep in value.split(","))
303
304 def latest_versions(pkgs):
305     packages = {}
306     for p in pkgs:
307         name = p["Package"]
308         try:
309             if version_compare(packages[name]["Version"], p["Version"]) > 0:
310                 continue
311         except KeyError:
312             pass
313         packages[name] = p
314     return (p for p in packages.values()
315             if "Package" in p and not "Negative-Entry" in p)
316
317 def make_binary_list_build(mirror, buildarch, hostarch):
318     for p in parse_deb822(mirror.fetch_binaries(buildarch)):
319         if p["Package"].startswith("crossbuild-essential-"):
320             if p["Package"] != "crossbuild-essential-" + hostarch:
321                 continue
322             p["Depends"] += ", libc-dev:%s, libstdc++-dev:%s" % \
323                     (hostarch, hostarch)
324         strip_dict(p, binfields)
325         yield p
326
327 def make_binary_list_host(mirror, hostarch):
328     for p in parse_deb822(mirror.fetch_binaries(hostarch)):
329         if p["Architecture"] == "all":
330             continue
331         if p.get("Multi-Arch") == "foreign":
332             continue
333         if p.get("Essential") == "yes":
334             continue
335         if p["Package"] in bad_foreign_packages:
336             continue
337         strip_dict(p, binfields)
338         yield p
339
340 def make_binary_list(mirror, buildarch, hostarch):
341     return itertools.chain(make_binary_list_build(mirror, buildarch, hostarch),
342                            make_binary_list_host(mirror, hostarch))
343
344 def make_source_list(mirror, hostarch):
345     for p in parse_deb822(mirror.fetch_sources()):
346         if p.get("Extra-Source-Only") == "yes":
347             continue
348         if any(arch_match(hostarch, pattern)
349                for pattern in p["Architecture"].split()):
350             strip_dict(p, srcfields)
351             strip_alternatvies(p, srcdepfields)
352             yield p
353         else:
354             # dummy entry preventing older matching versions
355             yield {"Package": p["Package"], "Version": p["Version"],
356                    "Negative-Entry": "yes"}
357
358 def check_bdsat(mirror, buildarch, hostarch):
359     cmd = [
360         "--deb-native-arch=" + buildarch,
361         "--deb-host-arch=" + hostarch,
362         "--deb-drop-b-d-indep",
363         "--deb-profiles=" + ",".join(PROFILES),
364         "--successes",
365         "--failures",
366         "--explain",
367         "--explain-minimal",
368         "--deb-emulate-sbuild",
369     ]
370
371     result = {}
372     with tempfile.NamedTemporaryFile("w", encoding="utf8") as bintmp, \
373             tempfile.NamedTemporaryFile("w", encoding="utf8") as srctmp:
374         for p in make_binary_list(mirror, buildarch, hostarch):
375             bintmp.write(serialize_deb822(p))
376         bintmp.flush()
377         cmd.append(bintmp.name)
378
379         for p in latest_versions(make_source_list(mirror, hostarch)):
380             srctmp.write(serialize_deb822(p))
381         srctmp.flush()
382         cmd.append(srctmp.name)
383
384         dose_result = call_dose_builddebcheck(cmd)
385         next(dose_result) # skip header
386         for d in dose_result:
387             reason = None
388             if d["status"] != "ok":
389                 r = d["reasons"][0]
390                 if "missing" in r:
391                     reason = "missing %s" % r["missing"]["pkg"]["unsat-dependency"].split()[0].split(":", 1)[0]
392                 elif "conflict" in r:
393                     r = r["conflict"]["pkg1"]["unsat-conflict"]
394                     reason = "skew " if ' (!= ' in r else "conflict "
395                     reason += r.split()[0].split(':', 1)[0]
396                 else:
397                     assert False
398             result[d["package"]] = (d["version"], reason)
399     return result
400
401
402 def update_depcheck(mirror, db, updatetime, buildarch, hostarch, state):
403     with contextlib.closing(db.cursor()) as cur:
404         cur.execute("BEGIN;")
405         cur.execute("""
406             SELECT source, version, satisfiable, reason FROM depstate
407                 WHERE buildarch = ? AND hostarch = ?;""",
408                     (buildarch, hostarch,))
409         for source, version, satisfiable, reason in list(cur.fetchall()):
410             if satisfiable == (reason is None) and \
411                state.get(source) == (version, reason):
412                 del state[source]
413             else:
414                 cur.execute("""
415                     DELETE FROM depstate
416                         WHERE source = ? AND version = ? AND buildarch = ?
417                             AND hostarch = ?;""",
418                             (source, version, buildarch, hostarch))
419         cur.executemany("""
420             INSERT INTO depstate (source, buildarch, hostarch, version,
421                                   satisfiable, reason)
422                 VALUES (?, ?, ?, ?, ?, ?);""",
423                         ((source, buildarch, hostarch, version, reason is None,
424                           reason)
425                          for source, (version, reason) in state.items()))
426         cur.execute("""
427             UPDATE depcheck SET releasetime = ?, updatetime = ?, giveback = 0
428                 WHERE buildarch = ? AND hostarch = ?""",
429                     (mirror.releasetime, updatetime, buildarch, hostarch))
430     db.commit()
431
432
433 def main_docheck(mirror, archpair):
434     return (*archpair, check_bdsat(mirror, *archpair))
435
436
437 class SequentialPool:
438     """Sequential variant of multiprocessing.Pool for debugging."""
439     def __enter__(self):
440         return self
441     def __exit__(self, *args):
442         pass
443     def close(self):
444         pass
445     def join(self):
446         pass
447     imap_unordered = map
448
449 def main():
450     argp = argparse.ArgumentParser()
451     argp.add_argument('-m', '--mirror',
452                       default='http://deb.debian.org/debian',
453                       help="debian mirror to use")
454     argp.add_argument('-p', '--parallel', action="store_true",
455                       help="enable parallel checking")
456     args = argp.parse_args()
457     mirror = DebianMirror(args.mirror)
458     mirror.update_release()
459     db = sqlite3.connect("db")
460     cur = db.cursor()
461     cur.execute("""
462         SELECT buildarch, hostarch FROM depcheck WHERE releasetime < ?;""",
463                 (mirror.releasetime,))
464     archpairs = set(cur.fetchall())
465     if not archpairs:
466         return
467     print("checking %s" %
468           ", ".join(sorted(map("%s -> %s".__mod__, archpairs))))
469     now = datetime.datetime.utcnow().replace(microsecond=0)
470     with multiprocessing.Pool() if args.parallel else SequentialPool() as pool:
471         docheck = functools.partial(main_docheck, mirror)
472         try:
473             for buildarch, hostarch, state in pool.imap_unordered(docheck,
474                                                                   archpairs):
475                 print("update %s -> %s" % (buildarch, hostarch))
476                 update_depcheck(mirror, db, now, buildarch, hostarch, state)
477         finally:
478             pool.close()
479             pool.join()
480
481 if __name__ == "__main__":
482     main()