webapp: parse rounded timestamps
[~helmut/crossqa.git] / depcheck.py
1 #!/usr/bin/python3
2
3 import argparse
4 import collections
5 import contextlib
6 import datetime
7 import functools
8 import hashlib
9 import itertools
10 import lzma
11 import multiprocessing
12 import os.path
13 import sqlite3
14 import subprocess
15 import tempfile
16 import yaml
17
18 import apt_pkg
19 apt_pkg.init()
20 version_compare = apt_pkg.version_compare
21 import requests
22
23 from common import decompress_stream, yield_lines
24
25 BUILD_ARCH = "amd64"
26 PROFILES = frozenset(("cross", "nocheck"))
27
28 CPUEntry = collections.namedtuple('CPUEntry',
29                                   'debcpu gnucpu regex bits endianness')
30
31 TupleEntry = collections.namedtuple('TupleEntry',
32                                     'abi libc os cpu')
33
34 class Architectures:
35     @staticmethod
36     def read_table(filename):
37         with open(filename) as f:
38             for line in f:
39                 if not line.startswith("#"):
40                     yield line.split()
41
42     def __init__(self, cputable="/usr/share/dpkg/cputable",
43                  tupletable="/usr/share/dpkg/tupletable",
44                  abitable="/usr/share/dpkg/abitable"):
45         self.cputable = {}
46         self.tupletable = {}
47         self.abitable = {}
48         self.read_cputable(cputable)
49         self.read_tupletable(tupletable)
50         self.read_abitable(abitable)
51
52     def read_cputable(self, cputable):
53         self.cputable.clear()
54         for values in self.read_table(cputable):
55             values[3] = int(values[3])  # bits
56             entry = CPUEntry(*values)
57             self.cputable[entry.debcpu] = entry
58
59     def read_tupletable(self, tupletable):
60         self.tupletable.clear()
61         for debtuple, debarch in self.read_table(tupletable):
62             if '<cpu>' in debtuple:
63                 for cpu in self.cputable:
64                     entry = TupleEntry(*debtuple.replace("<cpu>", cpu)
65                                        .split("-"))
66                     self.tupletable[debarch.replace("<cpu>", cpu)] = entry
67             else:
68                 self.tupletable[debarch] = TupleEntry(*debtuple.split("-"))
69
70     def read_abitable(self, abitable):
71         self.abitable.clear()
72         for arch, bits in self.read_table(abitable):
73             bits = int(bits)
74             self.abitable[arch] = bits
75
76     def match(self, arch, pattern):
77         parts = pattern.split("-")
78         if not "any" in parts:
79             return pattern == arch
80         while len(parts) < 4:
81             parts.insert(0, "any")
82         entry = self.tupletable[arch]
83         return all(parts[i] in (entry[i], "any") for i in range(4))
84
85     def getendianness(self, arch):
86         return self.cputable[self.tupletable[arch].cpu].endianness
87
88 architectures = Architectures()
89 arch_match = architectures.match
90
91 def call_dose_builddebcheck(arguments):
92     """
93     @type arguments: [str]
94     @param arguments: command line arguments to dose-builddebcheck
95     @returns: an iterable over loaded yaml documents. The first document
96               is the header, all other documents are per-package.
97     @raises subprocess.CalledProcessError: if dose errors out
98     """
99     cmd = ["dose-builddebcheck"]
100     cmd.extend(arguments)
101
102     proc = subprocess.Popen(cmd, stdout=subprocess.PIPE)
103
104     lines = []
105     for line in proc.stdout:
106         if line.startswith(b'  '):
107             lines.append(line)
108         elif line == b' -\n':
109             yield yaml.load(b"".join(lines), Loader=yaml.CBaseLoader)
110             lines = []
111     proc.stdout.close()
112     if lines:
113         yield yaml.load(b"".join(lines), Loader=yaml.CSafeLoader)
114     if proc.wait() not in (0, 1):
115         raise subprocess.CalledProcessError(proc.returncode, cmd)
116
117 def parse_deb822(iterable):
118     """Parse an iterable of bytes into an iterable of str-dicts."""
119     mapping = {}
120     key = None
121     value = None
122     for line in yield_lines(iterable):
123         line = line.decode("utf8")
124         if line == "\n":
125             if key is not None:
126                 mapping[key] = value.strip()
127                 key = None
128             yield mapping
129             mapping = {}
130         elif key and line.startswith((" ", "\t")):
131             value += line
132         else:
133             if key is not None:
134                 mapping[key] = value.strip()
135             try:
136                 key, value = line.split(":", 1)
137             except ValueError:
138                 raise ValueError("invalid input line %r" % line)
139     if key is not None:
140         mapping[key] = value.strip()
141     if mapping:
142         yield mapping
143
144 def serialize_deb822(dct):
145     """Serialize a byte-dict into a single bytes object."""
146     return "".join(map("%s: %s\n".__mod__, dct.items())) + "\n"
147
148 class HashSumMismatch(Exception):
149     pass
150
151 def hash_check(iterable, hashobj, expected_digest):
152     """Wraps an iterable that yields bytes. It doesn't modify the sequence,
153     but on the final element it verifies that the concatenation of bytes
154     yields an expected digest value. Upon failure, the final next() results in
155     a HashSumMismatch rather than StopIteration.
156     """
157     for data in iterable:
158         hashobj.update(data)
159         yield data
160     if hashobj.hexdigest() != expected_digest:
161         raise HashSumMismatch()
162
163 def parse_date(s):
164     return datetime.datetime.strptime(s, "%a, %d %b %Y %H:%M:%S %Z")
165
166 class GPGV:
167     def __init__(self, files=("/etc/apt/trusted.gpg",),
168                  partsdir="/etc/apt/trusted.gpg.d"):
169         candidates = list(files)
170         candidates.extend(os.path.join(partsdir, e)
171                           for e in os.listdir(partsdir))
172         self.keyrings = list(filter(lambda f: os.access(f, os.R_OK),
173                                     candidates))
174
175     def verify(self, content):
176         cmdline = ["gpgv", "--quiet", "--weak-digest", "SHA1", "--output", "-"]
177         for keyring in self.keyrings:
178             cmdline.extend(("--keyring", keyring))
179         proc = subprocess.Popen(cmdline, stdin=subprocess.PIPE,
180                                 stdout=subprocess.PIPE, stderr=subprocess.PIPE)
181         stdout, _ = proc.communicate(content)
182         if proc.wait() != 0:
183             raise ValueError("signature verififcation failed")
184         return stdout
185
186 class DebianMirror:
187     hashfunc = "SHA256"
188     def __init__(self, uri, dist="sid"):
189         self.uri = uri
190         self.dist = dist
191         self.releasetime = None
192         self.byhash = None
193         self.files = {}
194
195     @staticmethod
196     def get_all_keyrings():
197         yield "/etc/apt/trusted.gpg"
198         partsdir = "/etc/apt/trusted.gpg.d"
199         try:
200             for e in os.listdir(partsdir):
201                 yield os.path.join(partsdir, e)
202         except FileNotFoundError:
203             pass
204
205     @staticmethod
206     def get_keyrings():
207         return filter(lambda f: os.access(f, os.R_OK),
208                       DebianMirror.get_all_keyrings())
209
210     def get_uri(self, filename):
211         return "%s/dists/%s/%s" % (self.uri, self.dist, filename)
212
213     def fetch_release(self):
214         resp = requests.get(self.get_uri("InRelease"))
215         resp.raise_for_status()
216         return GPGV().verify(resp.content)
217
218     def parse_release(self, content):
219         info, = list(parse_deb822([content]))
220         self.releasetime = parse_date(info["Date"])
221         valid_until = parse_date(info["Valid-Until"])
222         now = datetime.datetime.utcnow()
223         if self.releasetime > now:
224             raise ValueError("release file generated in future")
225         if valid_until < now:
226             raise ValueError("release signature expired")
227         self.byhash = info.pop("Acquire-By-Hash", "no") == "yes"
228         self.files = {}
229         for line in info[self.hashfunc].splitlines():
230             parts = line.split()
231             if not parts:
232                 continue
233             if len(parts) != 3:
234                 raise ValueError("invalid %s line %r" % (self.hashfunc, line))
235             self.files[parts[2]] = parts[0]
236
237     def update_release(self):
238         self.parse_release(self.fetch_release())
239
240     def fetch_list(self, listname):
241         if listname + ".xz" in self.files:
242             listname += ".xz"
243             wrapper = lambda i: decompress_stream(i, lzma.LZMADecompressor())
244         else:
245             wrapper = lambda i: i
246         hashvalue = self.files[listname]
247         if self.byhash:
248             listname = "%s/by-hash/%s/%s" % (os.path.dirname(listname),
249                                              self.hashfunc, hashvalue)
250         with contextlib.closing(requests.get(self.get_uri(listname),
251                                              stream=True)) as resp:
252             resp.raise_for_status()
253             it = resp.iter_content(65536)
254             it = hash_check(it, hashlib.new(self.hashfunc), hashvalue)
255             yield from wrapper(it)
256
257     def fetch_sources(self, component="main"):
258         return self.fetch_list("%s/source/Sources" % component)
259
260     def fetch_binaries(self, architecture, component="main"):
261         return self.fetch_list("%s/binary-%s/Packages" %
262                                (component, architecture))
263
264 binfields = frozenset((
265     "Architecture",
266     "Breaks",
267     "Conflicts",
268     "Depends",
269     "Essential",
270     "Multi-Arch",
271     "Package",
272     "Pre-Depends",
273     "Provides",
274     "Version",
275 ))
276
277 srcdepfields = frozenset((
278     "Build-Conflicts",
279     "Build-Conflicts-Arch",
280     "Build-Depends",
281     "Build-Depends-Arch",
282 ))
283 srcfields = srcdepfields.union((
284     "Architecture",
285     "Package",
286     "Version",
287 ))
288
289 bad_foreign_packages = frozenset((
290     "flex-old", # cannot execute /usr/bin/flex
291     "icmake", # cannot execute /usr/bin/icmake, build system
292     "jam", # cannot execute /usr/bin/jam, build system
293     "libtool-bin", # #836123
294     "python2.7-minimal", # fails postinst
295     "python3.6-minimal", # fails postinst
296     "python3.7-minimal", # fails postinst
297     "swi-prolog-nox", # fails postinst
298     "xrdp", # fails postinst
299     "libgvc6", # fails postinst
300 ))
301
302 def strip_dict(dct, keepfields):
303     keys = set(dct.keys())
304     keys.difference_update(keepfields)
305     for k in keys:
306         del dct[k]
307
308 def strip_alternatvies(dct, fields):
309     for f in fields:
310         try:
311             value = dct[f]
312         except KeyError:
313             continue
314         dct[f] = ",".join(dep.split("|", 1)[0]
315                           for dep in value.split(","))
316
317 def latest_versions(pkgs):
318     packages = {}
319     for p in pkgs:
320         name = p["Package"]
321         try:
322             if version_compare(packages[name]["Version"], p["Version"]) > 0:
323                 continue
324         except KeyError:
325             pass
326         packages[name] = p
327     return (p for p in packages.values()
328             if "Package" in p and not "Negative-Entry" in p)
329
330 def make_binary_list_build(mirror, arch):
331     for p in parse_deb822(mirror.fetch_binaries(BUILD_ARCH)):
332         if p["Package"].startswith("crossbuild-essential-"):
333             if p["Package"] != "crossbuild-essential-" + arch:
334                 continue
335             p["Depends"] += ", libc-dev:%s, libstdc++-dev:%s" % (arch, arch)
336         strip_dict(p, binfields)
337         yield p
338
339 def make_binary_list_host(mirror, arch):
340     for p in parse_deb822(mirror.fetch_binaries(arch)):
341         if p["Architecture"] == "all":
342             continue
343         if p.get("Multi-Arch") == "foreign":
344             continue
345         if p.get("Essential") == "yes":
346             continue
347         if p["Package"] in bad_foreign_packages:
348             continue
349         strip_dict(p, binfields)
350         yield p
351
352 def make_binary_list(mirror, arch):
353     return itertools.chain(make_binary_list_build(mirror, arch),
354                            make_binary_list_host(mirror, arch))
355
356 def make_source_list(mirror, arch):
357     for p in parse_deb822(mirror.fetch_sources()):
358         if p.get("Extra-Source-Only") == "yes":
359             continue
360         if any(arch_match(arch, pattern)
361                for pattern in p["Architecture"].split()):
362             strip_dict(p, srcfields)
363             strip_alternatvies(p, srcdepfields)
364             yield p
365         else:
366             # dummy entry preventing older matching versions
367             yield {"Package": p["Package"], "Version": p["Version"],
368                    "Negative-Entry": "yes"}
369
370 def check_bdsat(mirror, arch):
371     cmd = [
372         "--deb-native-arch=" + BUILD_ARCH,
373         "--deb-host-arch=" + arch,
374         "--deb-drop-b-d-indep",
375         "--deb-profiles=" + ",".join(PROFILES),
376         "--successes",
377         "--failures",
378         "--explain",
379         "--explain-minimal",
380         "--deb-emulate-sbuild",
381     ]
382
383     result = {}
384     with tempfile.NamedTemporaryFile("w", encoding="utf8") as bintmp, \
385             tempfile.NamedTemporaryFile("w", encoding="utf8") as srctmp:
386         for p in make_binary_list(mirror, arch):
387             bintmp.write(serialize_deb822(p))
388         bintmp.flush()
389         cmd.append(bintmp.name)
390
391         for p in latest_versions(make_source_list(mirror, arch)):
392             srctmp.write(serialize_deb822(p))
393         srctmp.flush()
394         cmd.append(srctmp.name)
395
396         dose_result = call_dose_builddebcheck(cmd)
397         next(dose_result) # skip header
398         for d in dose_result:
399             reason = None
400             if d["status"] != "ok":
401                 r = d["reasons"][0]
402                 if "missing" in r:
403                     reason = "missing %s" % r["missing"]["pkg"]["unsat-dependency"].split()[0].split(":", 1)[0]
404                 elif "conflict" in r:
405                     r = r["conflict"]["pkg1"]["unsat-conflict"]
406                     reason = "skew " if ' (!= ' in r else "conflict "
407                     reason += r.split()[0].split(':', 1)[0]
408                 else:
409                     assert False
410             result[d["package"]] = (d["version"], reason)
411     return result
412
413
414 def update_depcheck(mirror, db, updatetime, architecture, state):
415     with contextlib.closing(db.cursor()) as cur:
416         cur.execute("BEGIN;")
417         cur.execute("SELECT source, version, satisfiable, reason FROM depstate WHERE architecture = ?;",
418                     (architecture,))
419         for source, version, satisfiable, reason in list(cur.fetchall()):
420             if satisfiable == (reason is None) and \
421                state.get(source) == (version, reason):
422                 del state[source]
423             else:
424                 cur.execute("DELETE FROM depstate WHERE source = ? AND version = ? AND architecture = ?;",
425                             (source, version, architecture))
426         cur.executemany("INSERT INTO depstate (source, architecture, version, satisfiable, reason) VALUES (?, ?, ?, ?, ?);",
427                         ((source, architecture, version, reason is None,
428                           reason)
429                          for source, (version, reason) in state.items()))
430         cur.execute("UPDATE depcheck SET releasetime = ?, updatetime = ?, giveback = 0 WHERE architecture = ?",
431                     (mirror.releasetime, updatetime, architecture))
432     db.commit()
433
434
435 def main_docheck(mirror, architecture):
436     return (architecture, check_bdsat(mirror, architecture))
437
438
439 def main():
440     argp = argparse.ArgumentParser()
441     argp.add_argument('-m', '--mirror',
442                       default='http://deb.debian.org/debian',
443                       help="debian mirror to use")
444     argp.add_argument('-p', '--parallel', action="store_true",
445                       help="enable parallel checking")
446     args = argp.parse_args()
447     mirror = DebianMirror(args.mirror)
448     mirror.update_release()
449     db = sqlite3.connect("db")
450     cur = db.cursor()
451     cur.execute("""SELECT architecture FROM depcheck
452                        WHERE giveback = 1 OR releasetime < ?;""",
453                 (mirror.releasetime,))
454     archs = set(row[0] for row in cur.fetchall())
455     if not archs:
456         return
457     print("checking %s" % " ".join(sorted(archs)))
458     now = datetime.datetime.utcnow().replace(microsecond=0)
459     mapper = multiprocessing.Pool().imap_unordered if args.parallel else map
460     for architecture, state in mapper(functools.partial(main_docheck, mirror),
461                                       archs):
462         print("update %s" % architecture)
463         update_depcheck(mirror, db, now, architecture, state)
464
465 if __name__ == "__main__":
466     main()