move yield_chunks to common.py
[~helmut/crossqa.git] / depcheck.py
1 #!/usr/bin/python3
2
3 import collections
4 import contextlib
5 import datetime
6 import hashlib
7 import itertools
8 import lzma
9 import os.path
10 import sqlite3
11 import subprocess
12 import tempfile
13 import yaml
14
15 import apt_pkg
16 apt_pkg.init()
17 version_compare = apt_pkg.version_compare
18 import requests
19
20 from common import decompress_stream, yield_lines
21
22 BUILD_ARCH = "amd64"
23 MIRROR = "http://proxy:3142/debian"
24 PROFILES = frozenset(("cross", "nocheck"))
25
26 CPUEntry = collections.namedtuple('CPUEntry',
27                                   'debcpu gnucpu regex bits endianness')
28
29 TupleEntry = collections.namedtuple('TupleEntry',
30                                     'abi libc os cpu')
31
32 class Architectures:
33     @staticmethod
34     def read_table(filename):
35         with open(filename) as f:
36             for line in f:
37                 if not line.startswith("#"):
38                     yield line.split()
39
40     def __init__(self, cputable="/usr/share/dpkg/cputable",
41                  tupletable="/usr/share/dpkg/tupletable",
42                  abitable="/usr/share/dpkg/abitable"):
43         self.cputable = {}
44         self.tupletable = {}
45         self.abitable = {}
46         self.read_cputable(cputable)
47         self.read_tupletable(tupletable)
48         self.read_abitable(abitable)
49
50     def read_cputable(self, cputable):
51         self.cputable.clear()
52         for values in self.read_table(cputable):
53             values[3] = int(values[3])  # bits
54             entry = CPUEntry(*values)
55             self.cputable[entry.debcpu] = entry
56
57     def read_tupletable(self, tupletable):
58         self.tupletable.clear()
59         for debtuple, debarch in self.read_table(tupletable):
60             if '<cpu>' in debtuple:
61                 for cpu in self.cputable:
62                     entry = TupleEntry(*debtuple.replace("<cpu>", cpu)
63                                        .split("-"))
64                     self.tupletable[debarch.replace("<cpu>", cpu)] = entry
65             else:
66                 self.tupletable[debarch] = TupleEntry(*debtuple.split("-"))
67
68     def read_abitable(self, abitable):
69         self.abitable.clear()
70         for arch, bits in self.read_table(abitable):
71             bits = int(bits)
72             self.abitable[arch] = bits
73
74     def match(self, arch, pattern):
75         parts = pattern.split("-")
76         if not "any" in parts:
77             return pattern == arch
78         while len(parts) < 4:
79             parts.insert(0, "any")
80         entry = self.tupletable[arch]
81         return all(parts[i] in (entry[i], "any") for i in range(4))
82
83     def getendianness(self, arch):
84         return self.cputable[self.tupletable[arch].cpu].endianness
85
86 architectures = Architectures()
87 arch_match = architectures.match
88
89 def call_dose_builddebcheck(arguments):
90     """
91     @type arguments: [str]
92     @param arguments: command line arguments to dose-builddebcheck
93     @returns: an iterable over loaded yaml documents. The first document
94               is the header, all other documents are per-package.
95     @raises subprocess.CalledProcessError: if dose errors out
96     """
97     cmd = ["dose-builddebcheck"]
98     cmd.extend(arguments)
99
100     proc = subprocess.Popen(cmd, stdout=subprocess.PIPE)
101
102     lines = []
103     for line in proc.stdout:
104         if line.startswith(b'  '):
105             lines.append(line)
106         elif line == b' -\n':
107             yield yaml.load(b"".join(lines), Loader=yaml.CBaseLoader)
108             lines = []
109     proc.stdout.close()
110     if lines:
111         yield yaml.load(b"".join(lines), Loader=yaml.CSafeLoader)
112     if proc.wait() not in (0, 1):
113         raise subprocess.CalledProcessError(proc.returncode, cmd)
114
115 def parse_deb822(iterable):
116     """Parse an iterable of bytes into an iterable of str-dicts."""
117     mapping = {}
118     key = None
119     value = None
120     for line in yield_lines(iterable):
121         line = line.decode("utf8")
122         if line == "\n":
123             if key is not None:
124                 mapping[key] = value.strip()
125                 key = None
126             yield mapping
127             mapping = {}
128         elif key and line.startswith((" ", "\t")):
129             value += line
130         else:
131             if key is not None:
132                 mapping[key] = value.strip()
133             try:
134                 key, value = line.split(":", 1)
135             except ValueError:
136                 raise ValueError("invalid input line %r" % line)
137     if key is not None:
138         mapping[key] = value.strip()
139     if mapping:
140         yield mapping
141
142 def serialize_deb822(dct):
143     """Serialize a byte-dict into a single bytes object."""
144     return "".join(map("%s: %s\n".__mod__, dct.items())) + "\n"
145
146 class HashSumMismatch(Exception):
147     pass
148
149 def hash_check(iterable, hashobj, expected_digest):
150     """Wraps an iterable that yields bytes. It doesn't modify the sequence,
151     but on the final element it verifies that the concatenation of bytes
152     yields an expected digest value. Upon failure, the final next() results in
153     a HashSumMismatch rather than StopIteration.
154     """
155     for data in iterable:
156         hashobj.update(data)
157         yield data
158     if hashobj.hexdigest() != expected_digest:
159         raise HashSumMismatch()
160
161 def parse_date(s):
162     return datetime.datetime.strptime(s, "%a, %d %b %Y %H:%M:%S %Z")
163
164 class GPGV:
165     def __init__(self, files=("/etc/apt/trusted.gpg",),
166                  partsdir="/etc/apt/trusted.gpg.d"):
167         candidates = list(files)
168         candidates.extend(os.path.join(partsdir, e)
169                           for e in os.listdir(partsdir))
170         self.keyrings = list(filter(lambda f: os.access(f, os.R_OK),
171                                     candidates))
172
173     def verify(self, content):
174         cmdline = ["gpgv", "--quiet", "--weak-digest", "SHA1", "--output", "-"]
175         for keyring in self.keyrings:
176             cmdline.extend(("--keyring", keyring))
177         proc = subprocess.Popen(cmdline, stdin=subprocess.PIPE,
178                                 stdout=subprocess.PIPE, stderr=subprocess.PIPE)
179         stdout, _ = proc.communicate(content)
180         if proc.wait() != 0:
181             raise ValueError("signature verififcation failed")
182         return stdout
183
184 class DebianMirror:
185     hashfunc = "SHA256"
186     def __init__(self, uri, dist="sid"):
187         self.uri = uri
188         self.dist = dist
189         self.releasetime = None
190         self.byhash = None
191         self.files = {}
192
193     @staticmethod
194     def get_all_keyrings():
195         yield "/etc/apt/trusted.gpg"
196         partsdir = "/etc/apt/trusted.gpg.d"
197         try:
198             for e in os.listdir(partsdir):
199                 yield os.path.join(partsdir, e)
200         except FileNotFoundError:
201             pass
202
203     @staticmethod
204     def get_keyrings():
205         return filter(lambda f: os.access(f, os.R_OK),
206                       DebianMirror.get_all_keyrings())
207
208     def get_uri(self, filename):
209         return "%s/dists/%s/%s" % (self.uri, self.dist, filename)
210
211     def fetch_release(self):
212         resp = requests.get(self.get_uri("InRelease"))
213         resp.raise_for_status()
214         return GPGV().verify(resp.content)
215
216     def parse_release(self, content):
217         info, = list(parse_deb822([content]))
218         self.releasetime = parse_date(info["Date"])
219         valid_until = parse_date(info["Valid-Until"])
220         now = datetime.datetime.utcnow()
221         if self.releasetime > now:
222             raise ValueError("release file generated in future")
223         if valid_until < now:
224             raise ValueError("release signature expired")
225         self.byhash = info.pop("Acquire-By-Hash", "no") == "yes"
226         self.files = {}
227         for line in info[self.hashfunc].splitlines():
228             parts = line.split()
229             if not parts:
230                 continue
231             if len(parts) != 3:
232                 raise ValueError("invalid %s line %r" % (self.hashfunc, line))
233             self.files[parts[2]] = parts[0]
234
235     def update_release(self):
236         self.parse_release(self.fetch_release())
237
238     def fetch_list(self, listname):
239         if listname + ".xz" in self.files:
240             listname += ".xz"
241             wrapper = lambda i: decompress_stream(i, lzma.LZMADecompressor())
242         else:
243             wrapper = lambda i: i
244         hashvalue = self.files[listname]
245         if self.byhash:
246             listname = "%s/by-hash/%s/%s" % (os.path.dirname(listname),
247                                              self.hashfunc, hashvalue)
248         with requests.get(self.get_uri(listname), stream=True) as resp:
249             resp.raise_for_status()
250             it = resp.iter_content(65536)
251             it = hash_check(it, hashlib.new(self.hashfunc), hashvalue)
252             yield from wrapper(it)
253
254     def fetch_sources(self, component="main"):
255         return self.fetch_list("%s/source/Sources" % component)
256
257     def fetch_binaries(self, architecture, component="main"):
258         return self.fetch_list("%s/binary-%s/Packages" %
259                                (component, architecture))
260
261 binfields = frozenset((
262     "Architecture",
263     "Breaks",
264     "Conflicts",
265     "Depends",
266     "Essential",
267     "Multi-Arch",
268     "Package",
269     "Pre-Depends",
270     "Provides",
271     "Version",
272 ))
273
274 srcdepfields = frozenset((
275     "Build-Conflicts",
276     "Build-Conflicts-Arch",
277     "Build-Depends",
278     "Build-Depends-Arch",
279 ))
280 srcfields = srcdepfields.union((
281     "Architecture",
282     "Package",
283     "Version",
284 ))
285
286 bad_foreign_packages = frozenset((
287     "flex-old", # cannot execute /usr/bin/flex
288     "icmake", # cannot execute /usr/bin/icmake, build system
289     "jam", # cannot execute /usr/bin/jam, build system
290     "libtool-bin", # #836123
291     "python2.7-minimal", # fails postinst
292     "python3.6-minimal", # fails postinst
293     "python3.7-minimal", # fails postinst
294     "swi-prolog-nox", # fails postinst
295     "xrdp", # fails postinst
296     "libgvc6", # fails postinst
297 ))
298
299 def strip_dict(dct, keepfields):
300     keys = set(dct.keys())
301     keys.difference_update(keepfields)
302     for k in keys:
303         del dct[k]
304
305 def strip_alternatvies(dct, fields):
306     for f in fields:
307         try:
308             value = dct[f]
309         except KeyError:
310             continue
311         dct[f] = ",".join(dep.split("|", 1)[0]
312                           for dep in value.split(","))
313
314 def latest_versions(pkgs):
315     packages = {}
316     for p in pkgs:
317         name = p["Package"]
318         try:
319             if version_compare(packages[name]["Version"], p["Version"]) > 0:
320                 continue
321         except KeyError:
322             pass
323         packages[name] = p
324     return (p for p in packages.values()
325             if "Package" in p and not "Negative-Entry" in p)
326
327 def make_binary_list_build(mirror, arch):
328     for p in parse_deb822(mirror.fetch_binaries(BUILD_ARCH)):
329         if p["Package"].startswith("crossbuild-essential-"):
330             if p["Package"] != "crossbuild-essential-" + arch:
331                 continue
332             p["Depends"] += ", libc-dev:%s, libstdc++-dev:%s" % (arch, arch)
333         strip_dict(p, binfields)
334         yield p
335
336 def make_binary_list_host(mirror, arch):
337     for p in parse_deb822(mirror.fetch_binaries(arch)):
338         if p["Architecture"] == "all":
339             continue
340         if p.get("Multi-Arch") == "foreign":
341             continue
342         if p.get("Essential") == "yes":
343             continue
344         if p["Package"] in bad_foreign_packages:
345             continue
346         strip_dict(p, binfields)
347         yield p
348
349 def make_binary_list(mirror, arch):
350     return itertools.chain(make_binary_list_build(mirror, arch),
351                            make_binary_list_host(mirror, arch))
352
353 def make_source_list(mirror, arch):
354     for p in parse_deb822(mirror.fetch_sources()):
355         if p.get("Extra-Source-Only") == "yes":
356             continue
357         if any(arch_match(arch, pattern)
358                for pattern in p["Architecture"].split()):
359             strip_dict(p, srcfields)
360             strip_alternatvies(p, srcdepfields)
361             yield p
362         else:
363             # dummy entry preventing older matching versions
364             yield {"Package": p["Package"], "Version": p["Version"],
365                    "Negative-Entry": "yes"}
366
367 def check_bdsat(mirror, arch):
368     cmd = [
369         "--deb-native-arch=" + BUILD_ARCH,
370         "--deb-host-arch=" + arch,
371         "--deb-drop-b-d-indep",
372         "--deb-profiles=" + ",".join(PROFILES),
373         "--successes",
374         "--failures",
375         "--explain",
376         "--explain-minimal",
377         "--deb-emulate-sbuild",
378     ]
379
380     with tempfile.NamedTemporaryFile("w", encoding="utf8") as bintmp, \
381             tempfile.NamedTemporaryFile("w", encoding="utf8") as srctmp:
382         for p in make_binary_list(mirror, arch):
383             bintmp.write(serialize_deb822(p))
384         bintmp.flush()
385         cmd.append(bintmp.name)
386
387         for p in latest_versions(make_source_list(mirror, arch)):
388             srctmp.write(serialize_deb822(p))
389         srctmp.flush()
390         cmd.append(srctmp.name)
391
392         dose_result = call_dose_builddebcheck(cmd)
393         next(dose_result) # skip header
394         for d in dose_result:
395             if d["status"] == "ok":
396                 yield (d["package"], d["version"], True, None)
397             else:
398                 r = d["reasons"][0]
399                 if "missing" in r:
400                     reason = "missing %s" % r["missing"]["pkg"]["unsat-dependency"].split()[0].split(":", 1)[0]
401                 elif "conflict" in r:
402                     r = r["conflict"]["pkg1"]["unsat-conflict"]
403                     reason = "skew " if ' (!= ' in r else "conflict "
404                     reason += r.split()[0].split(':', 1)[0]
405                 else:
406                     assert False
407                 yield (d["package"], d["version"], False, reason)
408
409 def update_depcheck(mirror, db, architecture):
410     now = datetime.datetime.utcnow()
411     mirror.update_release()
412     state = {}
413     for source, version, satisfiable, reason in check_bdsat(mirror, architecture):
414         state[source] = (version, satisfiable, reason)
415     with contextlib.closing(db.cursor()) as cur:
416         cur.execute("BEGIN;")
417         cur.execute("SELECT source, version, satisfiable, reason FROM depstate WHERE architecture = ?;",
418                     (architecture,))
419         for source, version, satisfiable, reason in list(cur.fetchall()):
420             if state.get(source) == (version, satisfiable, reason):
421                 del state[source]
422             else:
423                 cur.execute("DELETE FROM depstate WHERE source = ? AND version = ? AND architecture = ?;",
424                             (source, version, architecture))
425         cur.executemany("INSERT INTO depstate (source, architecture, version, satisfiable, reason) VALUES (?, ?, ?, ?, ?);",
426                         ((source, architecture, version, satisfiable, reason)
427                          for source, (version, satisfiable, reason) in state.items()))
428         cur.execute("UPDATE depcheck SET releasetime = ?, updatetime = ?, giveback = 0 WHERE architecture = ?",
429                     (mirror.releasetime, now, architecture))
430     db.commit()
431
432 def main():
433     mirror = DebianMirror(MIRROR)
434     mirror.update_release()
435     db = sqlite3.connect("db", detect_types=sqlite3.PARSE_DECLTYPES)
436     cur = db.cursor()
437     cur.execute("SELECT architecture, releasetime, updatetime, giveback FROM depcheck;")
438     lastupdate = datetime.datetime.utcnow() - datetime.timedelta(hours=6)
439     for architecture, releasetime, updatetime, giveback in list(cur.fetchall()):
440         if giveback or updatetime < lastupdate or releasetime < mirror.releasetime:
441             print("update %s" % architecture)
442             update_depcheck(mirror, db, architecture)
443
444 if __name__ == "__main__":
445     main()