summaryrefslogtreecommitdiff
path: root/dedup/utils.py
blob: e1b134fdfb55ff4a66d86781119334dcb21450b7 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
import contextlib
import errno
import sqlite3
import typing
import urllib.error
import urllib.request

import debian.deb822

from dedup.compression import decompress


def fetchiter(cursor: sqlite3.Cursor) -> typing.Iterator[typing.Any]:
    rows = cursor.fetchmany()
    while rows:
        yield from rows
        rows = cursor.fetchmany()


def open_compressed_mirror_url(
    url: str, extensions: typing.Iterable[str] = (".xz", ".gz", "")
) -> typing.BinaryIO:
    """Fetch the given url. Try appending each of the given compression
    schemes and move on in case it doesn't exist. Decompress the resulting
    stream on the fly.
    @returns: a file-like with the decompressed contents
    """
    for ext in extensions:
        try:
            handle = urllib.request.urlopen(url + ext)
        except urllib.error.HTTPError as error:
            if error.code != 404:
                raise
        except urllib.error.URLError as error:
            if not hasattr(error.reason, "errno"):
                raise
            if error.reason.errno != errno.ENOENT:
                raise
        else:
            return decompress(handle, ext)
    raise OSError(errno.ENOENT, "No such file or directory")


def iterate_packages(
    mirror: str,
    architecture: str,
    distribution: str = "sid",
    section: str = "main",
) -> typing.Iterator[debian.deb822.Packages]:
    """Download the relevant binary package list and generate
    debian.deb822.Packages objects per listed package."""
    url = "%s/dists/%s/%s/binary-%s/Packages" % \
            (mirror, distribution, section, architecture)
    with contextlib.closing(open_compressed_mirror_url(url)) as pkglist:
        yield from debian.deb822.Packages.iter_paragraphs(pkglist)