diff options
author | Helmut Grohne <helmut@subdivi.de> | 2016-05-25 19:27:35 +0200 |
---|---|---|
committer | Helmut Grohne <helmut@subdivi.de> | 2016-05-25 19:27:35 +0200 |
commit | 420804c25797add9689bacbd3db138a92a623c8e (patch) | |
tree | 5394acb6f3270a0b656fb4b0b5152f2f154835cc | |
parent | 89378e07e16c071efed82753cc16fbe2b9fd1598 (diff) | |
download | debian-dedup-420804c25797add9689bacbd3db138a92a623c8e.tar.gz |
autoimport: improve fetching package lists
Moving the fetching part into dedup.utils. Instead of hard coding the
gzip compressed copy, try xz, gz and plain in that order. Also take care
to actually close the connection.
-rwxr-xr-x | autoimport.py | 32 | ||||
-rw-r--r-- | dedup/utils.py | 33 |
2 files changed, 47 insertions, 18 deletions
diff --git a/autoimport.py b/autoimport.py index 9c416c5..5fb0659 100755 --- a/autoimport.py +++ b/autoimport.py @@ -4,6 +4,7 @@ packages contained. It has rather strong assumptions on the working directory. """ import argparse +import contextlib import errno import multiprocessing import os @@ -15,33 +16,28 @@ try: from urllib.parse import unquote except ImportError: from urllib import unquote -try: - from urllib.request import urlopen -except ImportError: - from urllib2 import urlopen import concurrent.futures from debian import deb822 from debian.debian_support import version_compare -from dedup.compression import decompress +from dedup.utils import open_compressed_mirror_url from readyaml import readyaml def process_http(pkgs, url, addhash=True): - pkglist = urlopen(url + "/dists/sid/main/binary-amd64/Packages.gz") - pkglist = decompress(pkglist, ".gz") - pkglist = deb822.Packages.iter_paragraphs(pkglist) - for pkg in pkglist: - name = pkg["Package"] - if name in pkgs and \ - version_compare(pkgs[name]["version"], pkg["Version"]) > 0: - continue - inst = dict(version=pkg["Version"], - filename="%s/%s" % (url, pkg["Filename"])) - if addhash: - inst["sharing"] = pkg["SHA256"] - pkgs[name] = inst + url += "/dists/sid/main/binary-amd64/Packages" + with contextlib.closing(open_compressed_mirror_url(url)) as pkglist: + for pkg in deb822.Packages.iter_paragraphs(pkglist): + name = pkg["Package"] + if name in pkgs and \ + version_compare(pkgs[name]["version"], pkg["Version"]) > 0: + continue + inst = dict(version=pkg["Version"], + filename="%s/%s" % (url, pkg["Filename"])) + if addhash: + inst["sharing"] = pkg["SHA256"] + pkgs[name] = inst def process_file(pkgs, filename): base = os.path.basename(filename) diff --git a/dedup/utils.py b/dedup/utils.py index 6fb233b..dab6653 100644 --- a/dedup/utils.py +++ b/dedup/utils.py @@ -1,5 +1,17 @@ +import errno +try: + from urllib.error import URLError, HTTPError +except ImportError: + from urllib2 import URLError, HTTPError +try: + from urllib.request import urlopen +except ImportError: + from urllib2 import urlopen + from debian.debian_support import version_compare +from dedup.compression import decompress + def fetchiter(cursor): rows = cursor.fetchmany() while rows: @@ -10,3 +22,24 @@ def fetchiter(cursor): def sql_add_version_compare(db): db.create_collation("debian_version", version_compare) db.create_function("debian_version_compare", 2, version_compare) + +def open_compressed_mirror_url(url, extensions=(u".xz", u".gz", u"")): + """Fetch the given url. Try appending each of the given compression + schemes and move on in case it doesn't exist. Decompress the resulting + stream on the fly. + @returns: a file-like with the decompressed contents + """ + for ext in extensions: + try: + handle = urlopen(url + ext) + except HTTPError as error: + if error.code != 404: + raise + except URLError as error: + if not hasattr(error.reason, "errno"): + raise + if error.reason.errno != errno.ENOENT: + raise + else: + return decompress(handle, ext) + raise OSError(errno.ENOENT, "No such file or directory") |