import contextlib import errno import sqlite3 import typing import urllib.error import urllib.request import debian.deb822 from dedup.compression import decompress def fetchiter(cursor: sqlite3.Cursor) -> typing.Iterator[typing.Any]: rows = cursor.fetchmany() while rows: yield from rows rows = cursor.fetchmany() def open_compressed_mirror_url( url: str, extensions: typing.Iterable[str] = (".xz", ".gz", "") ) -> typing.BinaryIO: """Fetch the given url. Try appending each of the given compression schemes and move on in case it doesn't exist. Decompress the resulting stream on the fly. @returns: a file-like with the decompressed contents """ for ext in extensions: try: handle = urllib.request.urlopen(url + ext) except urllib.error.HTTPError as error: if error.code != 404: raise except urllib.error.URLError as error: if not hasattr(error.reason, "errno"): raise if error.reason.errno != errno.ENOENT: raise else: return decompress(handle, ext) raise OSError(errno.ENOENT, "No such file or directory") def iterate_packages( mirror: str, architecture: str, distribution: str = "sid", section: str = "main", ) -> typing.Iterator[debian.deb822.Packages]: """Download the relevant binary package list and generate debian.deb822.Packages objects per listed package.""" url = "%s/dists/%s/%s/binary-%s/Packages" % \ (mirror, distribution, section, architecture) with contextlib.closing(open_compressed_mirror_url(url)) as pkglist: yield from debian.deb822.Packages.iter_paragraphs(pkglist)