import contextlib import errno import urllib.error import urllib.request import debian.deb822 from dedup.compression import decompress def fetchiter(cursor): rows = cursor.fetchmany() while rows: yield from rows rows = cursor.fetchmany() def open_compressed_mirror_url(url, extensions=(".xz", ".gz", "")): """Fetch the given url. Try appending each of the given compression schemes and move on in case it doesn't exist. Decompress the resulting stream on the fly. @returns: a file-like with the decompressed contents """ for ext in extensions: try: handle = urllib.request.urlopen(url + ext) except urllib.error.HTTPError as error: if error.code != 404: raise except urllib.error.URLError as error: if not hasattr(error.reason, "errno"): raise if error.reason.errno != errno.ENOENT: raise else: return decompress(handle, ext) raise OSError(errno.ENOENT, "No such file or directory") def iterate_packages(mirror, architecture, distribution="sid", section="main"): """Download the relevant binary package list and generate debian.deb822.Packages objects per listed package.""" url = "%s/dists/%s/%s/binary-%s/Packages" % \ (mirror, distribution, section, architecture) with contextlib.closing(open_compressed_mirror_url(url)) as pkglist: yield from debian.deb822.Packages.iter_paragraphs(pkglist)