depcheck.py: move the filter into sql
[~helmut/crossqa.git] / depcheck.py
1 #!/usr/bin/python3
2
3 import argparse
4 import collections
5 import contextlib
6 import datetime
7 import hashlib
8 import itertools
9 import lzma
10 import os.path
11 import sqlite3
12 import subprocess
13 import tempfile
14 import yaml
15
16 import apt_pkg
17 apt_pkg.init()
18 version_compare = apt_pkg.version_compare
19 import requests
20
21 from common import decompress_stream, yield_lines
22
23 BUILD_ARCH = "amd64"
24 PROFILES = frozenset(("cross", "nocheck"))
25
26 CPUEntry = collections.namedtuple('CPUEntry',
27                                   'debcpu gnucpu regex bits endianness')
28
29 TupleEntry = collections.namedtuple('TupleEntry',
30                                     'abi libc os cpu')
31
32 class Architectures:
33     @staticmethod
34     def read_table(filename):
35         with open(filename) as f:
36             for line in f:
37                 if not line.startswith("#"):
38                     yield line.split()
39
40     def __init__(self, cputable="/usr/share/dpkg/cputable",
41                  tupletable="/usr/share/dpkg/tupletable",
42                  abitable="/usr/share/dpkg/abitable"):
43         self.cputable = {}
44         self.tupletable = {}
45         self.abitable = {}
46         self.read_cputable(cputable)
47         self.read_tupletable(tupletable)
48         self.read_abitable(abitable)
49
50     def read_cputable(self, cputable):
51         self.cputable.clear()
52         for values in self.read_table(cputable):
53             values[3] = int(values[3])  # bits
54             entry = CPUEntry(*values)
55             self.cputable[entry.debcpu] = entry
56
57     def read_tupletable(self, tupletable):
58         self.tupletable.clear()
59         for debtuple, debarch in self.read_table(tupletable):
60             if '<cpu>' in debtuple:
61                 for cpu in self.cputable:
62                     entry = TupleEntry(*debtuple.replace("<cpu>", cpu)
63                                        .split("-"))
64                     self.tupletable[debarch.replace("<cpu>", cpu)] = entry
65             else:
66                 self.tupletable[debarch] = TupleEntry(*debtuple.split("-"))
67
68     def read_abitable(self, abitable):
69         self.abitable.clear()
70         for arch, bits in self.read_table(abitable):
71             bits = int(bits)
72             self.abitable[arch] = bits
73
74     def match(self, arch, pattern):
75         parts = pattern.split("-")
76         if not "any" in parts:
77             return pattern == arch
78         while len(parts) < 4:
79             parts.insert(0, "any")
80         entry = self.tupletable[arch]
81         return all(parts[i] in (entry[i], "any") for i in range(4))
82
83     def getendianness(self, arch):
84         return self.cputable[self.tupletable[arch].cpu].endianness
85
86 architectures = Architectures()
87 arch_match = architectures.match
88
89 def call_dose_builddebcheck(arguments):
90     """
91     @type arguments: [str]
92     @param arguments: command line arguments to dose-builddebcheck
93     @returns: an iterable over loaded yaml documents. The first document
94               is the header, all other documents are per-package.
95     @raises subprocess.CalledProcessError: if dose errors out
96     """
97     cmd = ["dose-builddebcheck"]
98     cmd.extend(arguments)
99
100     proc = subprocess.Popen(cmd, stdout=subprocess.PIPE)
101
102     lines = []
103     for line in proc.stdout:
104         if line.startswith(b'  '):
105             lines.append(line)
106         elif line == b' -\n':
107             yield yaml.load(b"".join(lines), Loader=yaml.CBaseLoader)
108             lines = []
109     proc.stdout.close()
110     if lines:
111         yield yaml.load(b"".join(lines), Loader=yaml.CSafeLoader)
112     if proc.wait() not in (0, 1):
113         raise subprocess.CalledProcessError(proc.returncode, cmd)
114
115 def parse_deb822(iterable):
116     """Parse an iterable of bytes into an iterable of str-dicts."""
117     mapping = {}
118     key = None
119     value = None
120     for line in yield_lines(iterable):
121         line = line.decode("utf8")
122         if line == "\n":
123             if key is not None:
124                 mapping[key] = value.strip()
125                 key = None
126             yield mapping
127             mapping = {}
128         elif key and line.startswith((" ", "\t")):
129             value += line
130         else:
131             if key is not None:
132                 mapping[key] = value.strip()
133             try:
134                 key, value = line.split(":", 1)
135             except ValueError:
136                 raise ValueError("invalid input line %r" % line)
137     if key is not None:
138         mapping[key] = value.strip()
139     if mapping:
140         yield mapping
141
142 def serialize_deb822(dct):
143     """Serialize a byte-dict into a single bytes object."""
144     return "".join(map("%s: %s\n".__mod__, dct.items())) + "\n"
145
146 class HashSumMismatch(Exception):
147     pass
148
149 def hash_check(iterable, hashobj, expected_digest):
150     """Wraps an iterable that yields bytes. It doesn't modify the sequence,
151     but on the final element it verifies that the concatenation of bytes
152     yields an expected digest value. Upon failure, the final next() results in
153     a HashSumMismatch rather than StopIteration.
154     """
155     for data in iterable:
156         hashobj.update(data)
157         yield data
158     if hashobj.hexdigest() != expected_digest:
159         raise HashSumMismatch()
160
161 def parse_date(s):
162     return datetime.datetime.strptime(s, "%a, %d %b %Y %H:%M:%S %Z")
163
164 class GPGV:
165     def __init__(self, files=("/etc/apt/trusted.gpg",),
166                  partsdir="/etc/apt/trusted.gpg.d"):
167         candidates = list(files)
168         candidates.extend(os.path.join(partsdir, e)
169                           for e in os.listdir(partsdir))
170         self.keyrings = list(filter(lambda f: os.access(f, os.R_OK),
171                                     candidates))
172
173     def verify(self, content):
174         cmdline = ["gpgv", "--quiet", "--weak-digest", "SHA1", "--output", "-"]
175         for keyring in self.keyrings:
176             cmdline.extend(("--keyring", keyring))
177         proc = subprocess.Popen(cmdline, stdin=subprocess.PIPE,
178                                 stdout=subprocess.PIPE, stderr=subprocess.PIPE)
179         stdout, _ = proc.communicate(content)
180         if proc.wait() != 0:
181             raise ValueError("signature verififcation failed")
182         return stdout
183
184 class DebianMirror:
185     hashfunc = "SHA256"
186     def __init__(self, uri, dist="sid"):
187         self.uri = uri
188         self.dist = dist
189         self.releasetime = None
190         self.byhash = None
191         self.files = {}
192
193     @staticmethod
194     def get_all_keyrings():
195         yield "/etc/apt/trusted.gpg"
196         partsdir = "/etc/apt/trusted.gpg.d"
197         try:
198             for e in os.listdir(partsdir):
199                 yield os.path.join(partsdir, e)
200         except FileNotFoundError:
201             pass
202
203     @staticmethod
204     def get_keyrings():
205         return filter(lambda f: os.access(f, os.R_OK),
206                       DebianMirror.get_all_keyrings())
207
208     def get_uri(self, filename):
209         return "%s/dists/%s/%s" % (self.uri, self.dist, filename)
210
211     def fetch_release(self):
212         resp = requests.get(self.get_uri("InRelease"))
213         resp.raise_for_status()
214         return GPGV().verify(resp.content)
215
216     def parse_release(self, content):
217         info, = list(parse_deb822([content]))
218         self.releasetime = parse_date(info["Date"])
219         valid_until = parse_date(info["Valid-Until"])
220         now = datetime.datetime.utcnow()
221         if self.releasetime > now:
222             raise ValueError("release file generated in future")
223         if valid_until < now:
224             raise ValueError("release signature expired")
225         self.byhash = info.pop("Acquire-By-Hash", "no") == "yes"
226         self.files = {}
227         for line in info[self.hashfunc].splitlines():
228             parts = line.split()
229             if not parts:
230                 continue
231             if len(parts) != 3:
232                 raise ValueError("invalid %s line %r" % (self.hashfunc, line))
233             self.files[parts[2]] = parts[0]
234
235     def update_release(self):
236         self.parse_release(self.fetch_release())
237
238     def fetch_list(self, listname):
239         if listname + ".xz" in self.files:
240             listname += ".xz"
241             wrapper = lambda i: decompress_stream(i, lzma.LZMADecompressor())
242         else:
243             wrapper = lambda i: i
244         hashvalue = self.files[listname]
245         if self.byhash:
246             listname = "%s/by-hash/%s/%s" % (os.path.dirname(listname),
247                                              self.hashfunc, hashvalue)
248         with contextlib.closing(requests.get(self.get_uri(listname),
249                                              stream=True)) as resp:
250             resp.raise_for_status()
251             it = resp.iter_content(65536)
252             it = hash_check(it, hashlib.new(self.hashfunc), hashvalue)
253             yield from wrapper(it)
254
255     def fetch_sources(self, component="main"):
256         return self.fetch_list("%s/source/Sources" % component)
257
258     def fetch_binaries(self, architecture, component="main"):
259         return self.fetch_list("%s/binary-%s/Packages" %
260                                (component, architecture))
261
262 binfields = frozenset((
263     "Architecture",
264     "Breaks",
265     "Conflicts",
266     "Depends",
267     "Essential",
268     "Multi-Arch",
269     "Package",
270     "Pre-Depends",
271     "Provides",
272     "Version",
273 ))
274
275 srcdepfields = frozenset((
276     "Build-Conflicts",
277     "Build-Conflicts-Arch",
278     "Build-Depends",
279     "Build-Depends-Arch",
280 ))
281 srcfields = srcdepfields.union((
282     "Architecture",
283     "Package",
284     "Version",
285 ))
286
287 bad_foreign_packages = frozenset((
288     "flex-old", # cannot execute /usr/bin/flex
289     "icmake", # cannot execute /usr/bin/icmake, build system
290     "jam", # cannot execute /usr/bin/jam, build system
291     "libtool-bin", # #836123
292     "python2.7-minimal", # fails postinst
293     "python3.6-minimal", # fails postinst
294     "python3.7-minimal", # fails postinst
295     "swi-prolog-nox", # fails postinst
296     "xrdp", # fails postinst
297     "libgvc6", # fails postinst
298 ))
299
300 def strip_dict(dct, keepfields):
301     keys = set(dct.keys())
302     keys.difference_update(keepfields)
303     for k in keys:
304         del dct[k]
305
306 def strip_alternatvies(dct, fields):
307     for f in fields:
308         try:
309             value = dct[f]
310         except KeyError:
311             continue
312         dct[f] = ",".join(dep.split("|", 1)[0]
313                           for dep in value.split(","))
314
315 def latest_versions(pkgs):
316     packages = {}
317     for p in pkgs:
318         name = p["Package"]
319         try:
320             if version_compare(packages[name]["Version"], p["Version"]) > 0:
321                 continue
322         except KeyError:
323             pass
324         packages[name] = p
325     return (p for p in packages.values()
326             if "Package" in p and not "Negative-Entry" in p)
327
328 def make_binary_list_build(mirror, arch):
329     for p in parse_deb822(mirror.fetch_binaries(BUILD_ARCH)):
330         if p["Package"].startswith("crossbuild-essential-"):
331             if p["Package"] != "crossbuild-essential-" + arch:
332                 continue
333             p["Depends"] += ", libc-dev:%s, libstdc++-dev:%s" % (arch, arch)
334         strip_dict(p, binfields)
335         yield p
336
337 def make_binary_list_host(mirror, arch):
338     for p in parse_deb822(mirror.fetch_binaries(arch)):
339         if p["Architecture"] == "all":
340             continue
341         if p.get("Multi-Arch") == "foreign":
342             continue
343         if p.get("Essential") == "yes":
344             continue
345         if p["Package"] in bad_foreign_packages:
346             continue
347         strip_dict(p, binfields)
348         yield p
349
350 def make_binary_list(mirror, arch):
351     return itertools.chain(make_binary_list_build(mirror, arch),
352                            make_binary_list_host(mirror, arch))
353
354 def make_source_list(mirror, arch):
355     for p in parse_deb822(mirror.fetch_sources()):
356         if p.get("Extra-Source-Only") == "yes":
357             continue
358         if any(arch_match(arch, pattern)
359                for pattern in p["Architecture"].split()):
360             strip_dict(p, srcfields)
361             strip_alternatvies(p, srcdepfields)
362             yield p
363         else:
364             # dummy entry preventing older matching versions
365             yield {"Package": p["Package"], "Version": p["Version"],
366                    "Negative-Entry": "yes"}
367
368 def check_bdsat(mirror, arch):
369     cmd = [
370         "--deb-native-arch=" + BUILD_ARCH,
371         "--deb-host-arch=" + arch,
372         "--deb-drop-b-d-indep",
373         "--deb-profiles=" + ",".join(PROFILES),
374         "--successes",
375         "--failures",
376         "--explain",
377         "--explain-minimal",
378         "--deb-emulate-sbuild",
379     ]
380
381     with tempfile.NamedTemporaryFile("w", encoding="utf8") as bintmp, \
382             tempfile.NamedTemporaryFile("w", encoding="utf8") as srctmp:
383         for p in make_binary_list(mirror, arch):
384             bintmp.write(serialize_deb822(p))
385         bintmp.flush()
386         cmd.append(bintmp.name)
387
388         for p in latest_versions(make_source_list(mirror, arch)):
389             srctmp.write(serialize_deb822(p))
390         srctmp.flush()
391         cmd.append(srctmp.name)
392
393         dose_result = call_dose_builddebcheck(cmd)
394         next(dose_result) # skip header
395         for d in dose_result:
396             if d["status"] == "ok":
397                 yield (d["package"], d["version"], None)
398             else:
399                 r = d["reasons"][0]
400                 if "missing" in r:
401                     reason = "missing %s" % r["missing"]["pkg"]["unsat-dependency"].split()[0].split(":", 1)[0]
402                 elif "conflict" in r:
403                     r = r["conflict"]["pkg1"]["unsat-conflict"]
404                     reason = "skew " if ' (!= ' in r else "conflict "
405                     reason += r.split()[0].split(':', 1)[0]
406                 else:
407                     assert False
408                 yield (d["package"], d["version"], reason)
409
410 def update_depcheck(mirror, db, architecture):
411     now = datetime.datetime.utcnow()
412     mirror.update_release()
413     state = {}
414     for source, version, reason in check_bdsat(mirror, architecture):
415         state[source] = (version, reason)
416     with contextlib.closing(db.cursor()) as cur:
417         cur.execute("BEGIN;")
418         cur.execute("SELECT source, version, satisfiable, reason FROM depstate WHERE architecture = ?;",
419                     (architecture,))
420         for source, version, satisfiable, reason in list(cur.fetchall()):
421             if satisfiable == (reason is None) and \
422                state.get(source) == (version, reason):
423                 del state[source]
424             else:
425                 cur.execute("DELETE FROM depstate WHERE source = ? AND version = ? AND architecture = ?;",
426                             (source, version, architecture))
427         cur.executemany("INSERT INTO depstate (source, architecture, version, satisfiable, reason) VALUES (?, ?, ?, ?, ?);",
428                         ((source, architecture, version, reason is None,
429                           reason)
430                          for source, (version, reason) in state.items()))
431         cur.execute("UPDATE depcheck SET releasetime = ?, updatetime = ?, giveback = 0 WHERE architecture = ?",
432                     (mirror.releasetime, now, architecture))
433     db.commit()
434
435 def main():
436     argp = argparse.ArgumentParser()
437     argp.add_argument('-m', '--mirror',
438                       default='http://deb.debian.org/debian',
439                       help="debian mirror to use")
440     args = argp.parse_args()
441     mirror = DebianMirror(args.mirror)
442     mirror.update_release()
443     db = sqlite3.connect("db")
444     cur = db.cursor()
445     lastupdate = datetime.datetime.utcnow() - datetime.timedelta(hours=6)
446     cur.execute("""
447         SELECT architecture FROM depcheck
448             WHERE giveback = 1 OR updatetime < ? OR releasetime < ?;""",
449                 (datetime.datetime.utcnow() - datetime.timedelta(hours=6),
450                  mirror.releasetime))
451     for architecture, in list(cur.fetchall()):
452         print("update %s" % architecture)
453         update_depcheck(mirror, db, architecture)
454
455 if __name__ == "__main__":
456     main()