depcheck: ignore givebacks
[~helmut/crossqa.git] / depcheck.py
1 #!/usr/bin/python3
2 # SPDX-License-Identifier: GPL-2.0+
3
4 import argparse
5 import collections
6 import contextlib
7 import datetime
8 import functools
9 import hashlib
10 import itertools
11 import lzma
12 import multiprocessing
13 import os.path
14 import sqlite3
15 import subprocess
16 import tempfile
17 import yaml
18
19 import apt_pkg
20 apt_pkg.init()
21 version_compare = apt_pkg.version_compare
22 import requests
23
24 from common import decompress_stream, yield_lines
25
26 BUILD_ARCH = "amd64"
27 PROFILES = frozenset(("cross", "nocheck"))
28
29 CPUEntry = collections.namedtuple('CPUEntry',
30                                   'debcpu gnucpu regex bits endianness')
31
32 TupleEntry = collections.namedtuple('TupleEntry',
33                                     'abi libc os cpu')
34
35 class Architectures:
36     @staticmethod
37     def read_table(filename):
38         with open(filename) as f:
39             for line in f:
40                 if not line.startswith("#"):
41                     yield line.split()
42
43     def __init__(self, cputable="/usr/share/dpkg/cputable",
44                  tupletable="/usr/share/dpkg/tupletable",
45                  abitable="/usr/share/dpkg/abitable"):
46         self.cputable = {}
47         self.tupletable = {}
48         self.abitable = {}
49         self.read_cputable(cputable)
50         self.read_tupletable(tupletable)
51         self.read_abitable(abitable)
52
53     def read_cputable(self, cputable):
54         self.cputable.clear()
55         for values in self.read_table(cputable):
56             values[3] = int(values[3])  # bits
57             entry = CPUEntry(*values)
58             self.cputable[entry.debcpu] = entry
59
60     def read_tupletable(self, tupletable):
61         self.tupletable.clear()
62         for debtuple, debarch in self.read_table(tupletable):
63             if '<cpu>' in debtuple:
64                 for cpu in self.cputable:
65                     entry = TupleEntry(*debtuple.replace("<cpu>", cpu)
66                                        .split("-"))
67                     self.tupletable[debarch.replace("<cpu>", cpu)] = entry
68             else:
69                 self.tupletable[debarch] = TupleEntry(*debtuple.split("-"))
70
71     def read_abitable(self, abitable):
72         self.abitable.clear()
73         for arch, bits in self.read_table(abitable):
74             bits = int(bits)
75             self.abitable[arch] = bits
76
77     def match(self, arch, pattern):
78         parts = pattern.split("-")
79         if not "any" in parts:
80             return pattern == arch
81         while len(parts) < 4:
82             parts.insert(0, "any")
83         entry = self.tupletable[arch]
84         return all(parts[i] in (entry[i], "any") for i in range(4))
85
86     def getendianness(self, arch):
87         return self.cputable[self.tupletable[arch].cpu].endianness
88
89 architectures = Architectures()
90 arch_match = architectures.match
91
92 def call_dose_builddebcheck(arguments):
93     """
94     @type arguments: [str]
95     @param arguments: command line arguments to dose-builddebcheck
96     @returns: an iterable over loaded yaml documents. The first document
97               is the header, all other documents are per-package.
98     @raises subprocess.CalledProcessError: if dose errors out
99     """
100     cmd = ["dose-builddebcheck"]
101     cmd.extend(arguments)
102
103     proc = subprocess.Popen(cmd, stdout=subprocess.PIPE)
104
105     lines = []
106     for line in proc.stdout:
107         if line.startswith(b'  '):
108             lines.append(line)
109         elif line == b' -\n':
110             yield yaml.load(b"".join(lines), Loader=yaml.CBaseLoader)
111             lines = []
112     proc.stdout.close()
113     if lines:
114         yield yaml.load(b"".join(lines), Loader=yaml.CSafeLoader)
115     if proc.wait() not in (0, 1):
116         raise subprocess.CalledProcessError(proc.returncode, cmd)
117
118 def parse_deb822(iterable):
119     """Parse an iterable of bytes into an iterable of str-dicts."""
120     mapping = {}
121     key = None
122     value = None
123     for line in yield_lines(iterable):
124         line = line.decode("utf8")
125         if line == "\n":
126             if key is not None:
127                 mapping[key] = value.strip()
128                 key = None
129             yield mapping
130             mapping = {}
131         elif key and line.startswith((" ", "\t")):
132             value += line
133         else:
134             if key is not None:
135                 mapping[key] = value.strip()
136             try:
137                 key, value = line.split(":", 1)
138             except ValueError:
139                 raise ValueError("invalid input line %r" % line)
140     if key is not None:
141         mapping[key] = value.strip()
142     if mapping:
143         yield mapping
144
145 def serialize_deb822(dct):
146     """Serialize a byte-dict into a single bytes object."""
147     return "".join(map("%s: %s\n".__mod__, dct.items())) + "\n"
148
149 class HashSumMismatch(Exception):
150     pass
151
152 def hash_check(iterable, hashobj, expected_digest):
153     """Wraps an iterable that yields bytes. It doesn't modify the sequence,
154     but on the final element it verifies that the concatenation of bytes
155     yields an expected digest value. Upon failure, the final next() results in
156     a HashSumMismatch rather than StopIteration.
157     """
158     for data in iterable:
159         hashobj.update(data)
160         yield data
161     if hashobj.hexdigest() != expected_digest:
162         raise HashSumMismatch()
163
164 def parse_date(s):
165     return datetime.datetime.strptime(s, "%a, %d %b %Y %H:%M:%S %Z")
166
167 class GPGV:
168     def __init__(self, files=("/etc/apt/trusted.gpg",),
169                  partsdir="/etc/apt/trusted.gpg.d"):
170         candidates = list(files)
171         candidates.extend(os.path.join(partsdir, e)
172                           for e in os.listdir(partsdir))
173         self.keyrings = list(filter(lambda f: os.access(f, os.R_OK),
174                                     candidates))
175
176     def verify(self, content):
177         cmdline = ["gpgv", "--quiet", "--weak-digest", "SHA1", "--output", "-"]
178         for keyring in self.keyrings:
179             cmdline.extend(("--keyring", keyring))
180         proc = subprocess.Popen(cmdline, stdin=subprocess.PIPE,
181                                 stdout=subprocess.PIPE, stderr=subprocess.PIPE)
182         stdout, _ = proc.communicate(content)
183         if proc.wait() != 0:
184             raise ValueError("signature verififcation failed")
185         return stdout
186
187 class DebianMirror:
188     hashfunc = "SHA256"
189     def __init__(self, uri, dist="sid"):
190         self.uri = uri
191         self.dist = dist
192         self.releasetime = None
193         self.byhash = None
194         self.files = {}
195
196     @staticmethod
197     def get_all_keyrings():
198         yield "/etc/apt/trusted.gpg"
199         partsdir = "/etc/apt/trusted.gpg.d"
200         try:
201             for e in os.listdir(partsdir):
202                 yield os.path.join(partsdir, e)
203         except FileNotFoundError:
204             pass
205
206     @staticmethod
207     def get_keyrings():
208         return filter(lambda f: os.access(f, os.R_OK),
209                       DebianMirror.get_all_keyrings())
210
211     def get_uri(self, filename):
212         return "%s/dists/%s/%s" % (self.uri, self.dist, filename)
213
214     def fetch_release(self):
215         resp = requests.get(self.get_uri("InRelease"))
216         resp.raise_for_status()
217         return GPGV().verify(resp.content)
218
219     def parse_release(self, content):
220         info, = list(parse_deb822([content]))
221         self.releasetime = parse_date(info["Date"])
222         valid_until = parse_date(info["Valid-Until"])
223         now = datetime.datetime.utcnow()
224         if self.releasetime > now:
225             raise ValueError("release file generated in future")
226         if valid_until < now:
227             raise ValueError("release signature expired")
228         self.byhash = info.pop("Acquire-By-Hash", "no") == "yes"
229         self.files = {}
230         for line in info[self.hashfunc].splitlines():
231             parts = line.split()
232             if not parts:
233                 continue
234             if len(parts) != 3:
235                 raise ValueError("invalid %s line %r" % (self.hashfunc, line))
236             self.files[parts[2]] = parts[0]
237
238     def update_release(self):
239         self.parse_release(self.fetch_release())
240
241     def fetch_list(self, listname):
242         if listname + ".xz" in self.files:
243             listname += ".xz"
244             wrapper = lambda i: decompress_stream(i, lzma.LZMADecompressor())
245         else:
246             wrapper = lambda i: i
247         hashvalue = self.files[listname]
248         if self.byhash:
249             listname = "%s/by-hash/%s/%s" % (os.path.dirname(listname),
250                                              self.hashfunc, hashvalue)
251         with contextlib.closing(requests.get(self.get_uri(listname),
252                                              stream=True)) as resp:
253             resp.raise_for_status()
254             it = resp.iter_content(65536)
255             it = hash_check(it, hashlib.new(self.hashfunc), hashvalue)
256             yield from wrapper(it)
257
258     def fetch_sources(self, component="main"):
259         return self.fetch_list("%s/source/Sources" % component)
260
261     def fetch_binaries(self, architecture, component="main"):
262         return self.fetch_list("%s/binary-%s/Packages" %
263                                (component, architecture))
264
265 binfields = frozenset((
266     "Architecture",
267     "Breaks",
268     "Conflicts",
269     "Depends",
270     "Essential",
271     "Multi-Arch",
272     "Package",
273     "Pre-Depends",
274     "Provides",
275     "Version",
276 ))
277
278 srcdepfields = frozenset((
279     "Build-Conflicts",
280     "Build-Conflicts-Arch",
281     "Build-Depends",
282     "Build-Depends-Arch",
283 ))
284 srcfields = srcdepfields.union((
285     "Architecture",
286     "Package",
287     "Version",
288 ))
289
290 bad_foreign_packages = frozenset((
291     "flex-old", # cannot execute /usr/bin/flex
292     "icmake", # cannot execute /usr/bin/icmake, build system
293     "jam", # cannot execute /usr/bin/jam, build system
294     "libtool-bin", # #836123
295     "python2.7-minimal", # fails postinst
296     "python3.6-minimal", # fails postinst
297     "python3.7-minimal", # fails postinst
298     "swi-prolog-nox", # fails postinst
299     "xrdp", # fails postinst
300     "libgvc6", # fails postinst
301 ))
302
303 def strip_dict(dct, keepfields):
304     keys = set(dct.keys())
305     keys.difference_update(keepfields)
306     for k in keys:
307         del dct[k]
308
309 def strip_alternatvies(dct, fields):
310     for f in fields:
311         try:
312             value = dct[f]
313         except KeyError:
314             continue
315         dct[f] = ",".join(dep.split("|", 1)[0]
316                           for dep in value.split(","))
317
318 def latest_versions(pkgs):
319     packages = {}
320     for p in pkgs:
321         name = p["Package"]
322         try:
323             if version_compare(packages[name]["Version"], p["Version"]) > 0:
324                 continue
325         except KeyError:
326             pass
327         packages[name] = p
328     return (p for p in packages.values()
329             if "Package" in p and not "Negative-Entry" in p)
330
331 def make_binary_list_build(mirror, arch):
332     for p in parse_deb822(mirror.fetch_binaries(BUILD_ARCH)):
333         if p["Package"].startswith("crossbuild-essential-"):
334             if p["Package"] != "crossbuild-essential-" + arch:
335                 continue
336             p["Depends"] += ", libc-dev:%s, libstdc++-dev:%s" % (arch, arch)
337         strip_dict(p, binfields)
338         yield p
339
340 def make_binary_list_host(mirror, arch):
341     for p in parse_deb822(mirror.fetch_binaries(arch)):
342         if p["Architecture"] == "all":
343             continue
344         if p.get("Multi-Arch") == "foreign":
345             continue
346         if p.get("Essential") == "yes":
347             continue
348         if p["Package"] in bad_foreign_packages:
349             continue
350         strip_dict(p, binfields)
351         yield p
352
353 def make_binary_list(mirror, arch):
354     return itertools.chain(make_binary_list_build(mirror, arch),
355                            make_binary_list_host(mirror, arch))
356
357 def make_source_list(mirror, arch):
358     for p in parse_deb822(mirror.fetch_sources()):
359         if p.get("Extra-Source-Only") == "yes":
360             continue
361         if any(arch_match(arch, pattern)
362                for pattern in p["Architecture"].split()):
363             strip_dict(p, srcfields)
364             strip_alternatvies(p, srcdepfields)
365             yield p
366         else:
367             # dummy entry preventing older matching versions
368             yield {"Package": p["Package"], "Version": p["Version"],
369                    "Negative-Entry": "yes"}
370
371 def check_bdsat(mirror, arch):
372     cmd = [
373         "--deb-native-arch=" + BUILD_ARCH,
374         "--deb-host-arch=" + arch,
375         "--deb-drop-b-d-indep",
376         "--deb-profiles=" + ",".join(PROFILES),
377         "--successes",
378         "--failures",
379         "--explain",
380         "--explain-minimal",
381         "--deb-emulate-sbuild",
382     ]
383
384     result = {}
385     with tempfile.NamedTemporaryFile("w", encoding="utf8") as bintmp, \
386             tempfile.NamedTemporaryFile("w", encoding="utf8") as srctmp:
387         for p in make_binary_list(mirror, arch):
388             bintmp.write(serialize_deb822(p))
389         bintmp.flush()
390         cmd.append(bintmp.name)
391
392         for p in latest_versions(make_source_list(mirror, arch)):
393             srctmp.write(serialize_deb822(p))
394         srctmp.flush()
395         cmd.append(srctmp.name)
396
397         dose_result = call_dose_builddebcheck(cmd)
398         next(dose_result) # skip header
399         for d in dose_result:
400             reason = None
401             if d["status"] != "ok":
402                 r = d["reasons"][0]
403                 if "missing" in r:
404                     reason = "missing %s" % r["missing"]["pkg"]["unsat-dependency"].split()[0].split(":", 1)[0]
405                 elif "conflict" in r:
406                     r = r["conflict"]["pkg1"]["unsat-conflict"]
407                     reason = "skew " if ' (!= ' in r else "conflict "
408                     reason += r.split()[0].split(':', 1)[0]
409                 else:
410                     assert False
411             result[d["package"]] = (d["version"], reason)
412     return result
413
414
415 def update_depcheck(mirror, db, updatetime, architecture, state):
416     with contextlib.closing(db.cursor()) as cur:
417         cur.execute("BEGIN;")
418         cur.execute("SELECT source, version, satisfiable, reason FROM depstate WHERE architecture = ?;",
419                     (architecture,))
420         for source, version, satisfiable, reason in list(cur.fetchall()):
421             if satisfiable == (reason is None) and \
422                state.get(source) == (version, reason):
423                 del state[source]
424             else:
425                 cur.execute("DELETE FROM depstate WHERE source = ? AND version = ? AND architecture = ?;",
426                             (source, version, architecture))
427         cur.executemany("INSERT INTO depstate (source, architecture, version, satisfiable, reason) VALUES (?, ?, ?, ?, ?);",
428                         ((source, architecture, version, reason is None,
429                           reason)
430                          for source, (version, reason) in state.items()))
431         cur.execute("UPDATE depcheck SET releasetime = ?, updatetime = ?, giveback = 0 WHERE architecture = ?",
432                     (mirror.releasetime, updatetime, architecture))
433     db.commit()
434
435
436 def main_docheck(mirror, architecture):
437     return (architecture, check_bdsat(mirror, architecture))
438
439
440 class SequentialPool:
441     """Sequential variant of multiprocessing.Pool for debugging."""
442     def __enter__(self):
443         return self
444     def __exit__(self, *args):
445         pass
446     def close(self):
447         pass
448     def join(self):
449         pass
450     imap_unordered = map
451
452 def main():
453     argp = argparse.ArgumentParser()
454     argp.add_argument('-m', '--mirror',
455                       default='http://deb.debian.org/debian',
456                       help="debian mirror to use")
457     argp.add_argument('-p', '--parallel', action="store_true",
458                       help="enable parallel checking")
459     args = argp.parse_args()
460     mirror = DebianMirror(args.mirror)
461     mirror.update_release()
462     db = sqlite3.connect("db")
463     cur = db.cursor()
464     cur.execute("SELECT architecture FROM depcheck WHERE releasetime < ?;",
465                 (mirror.releasetime,))
466     archs = set(row[0] for row in cur.fetchall())
467     if not archs:
468         return
469     print("checking %s" % " ".join(sorted(archs)))
470     now = datetime.datetime.utcnow().replace(microsecond=0)
471     with multiprocessing.Pool() if args.parallel else SequentialPool() as pool:
472         docheck = functools.partial(main_docheck, mirror)
473         try:
474             for architecture, state in pool.imap_unordered(docheck, archs):
475                 print("update %s" % architecture)
476                 update_depcheck(mirror, db, now, architecture, state)
477         finally:
478             pool.close()
479             pool.join()
480
481 if __name__ == "__main__":
482     main()