summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorHelmut Grohne <helmut@subdivi.de>2016-05-25 19:27:35 +0200
committerHelmut Grohne <helmut@subdivi.de>2016-05-25 19:27:35 +0200
commit420804c25797add9689bacbd3db138a92a623c8e (patch)
tree5394acb6f3270a0b656fb4b0b5152f2f154835cc
parent89378e07e16c071efed82753cc16fbe2b9fd1598 (diff)
downloaddebian-dedup-420804c25797add9689bacbd3db138a92a623c8e.tar.gz
autoimport: improve fetching package lists
Moving the fetching part into dedup.utils. Instead of hard coding the gzip compressed copy, try xz, gz and plain in that order. Also take care to actually close the connection.
-rwxr-xr-xautoimport.py32
-rw-r--r--dedup/utils.py33
2 files changed, 47 insertions, 18 deletions
diff --git a/autoimport.py b/autoimport.py
index 9c416c5..5fb0659 100755
--- a/autoimport.py
+++ b/autoimport.py
@@ -4,6 +4,7 @@ packages contained. It has rather strong assumptions on the working directory.
"""
import argparse
+import contextlib
import errno
import multiprocessing
import os
@@ -15,33 +16,28 @@ try:
from urllib.parse import unquote
except ImportError:
from urllib import unquote
-try:
- from urllib.request import urlopen
-except ImportError:
- from urllib2 import urlopen
import concurrent.futures
from debian import deb822
from debian.debian_support import version_compare
-from dedup.compression import decompress
+from dedup.utils import open_compressed_mirror_url
from readyaml import readyaml
def process_http(pkgs, url, addhash=True):
- pkglist = urlopen(url + "/dists/sid/main/binary-amd64/Packages.gz")
- pkglist = decompress(pkglist, ".gz")
- pkglist = deb822.Packages.iter_paragraphs(pkglist)
- for pkg in pkglist:
- name = pkg["Package"]
- if name in pkgs and \
- version_compare(pkgs[name]["version"], pkg["Version"]) > 0:
- continue
- inst = dict(version=pkg["Version"],
- filename="%s/%s" % (url, pkg["Filename"]))
- if addhash:
- inst["sharing"] = pkg["SHA256"]
- pkgs[name] = inst
+ url += "/dists/sid/main/binary-amd64/Packages"
+ with contextlib.closing(open_compressed_mirror_url(url)) as pkglist:
+ for pkg in deb822.Packages.iter_paragraphs(pkglist):
+ name = pkg["Package"]
+ if name in pkgs and \
+ version_compare(pkgs[name]["version"], pkg["Version"]) > 0:
+ continue
+ inst = dict(version=pkg["Version"],
+ filename="%s/%s" % (url, pkg["Filename"]))
+ if addhash:
+ inst["sharing"] = pkg["SHA256"]
+ pkgs[name] = inst
def process_file(pkgs, filename):
base = os.path.basename(filename)
diff --git a/dedup/utils.py b/dedup/utils.py
index 6fb233b..dab6653 100644
--- a/dedup/utils.py
+++ b/dedup/utils.py
@@ -1,5 +1,17 @@
+import errno
+try:
+ from urllib.error import URLError, HTTPError
+except ImportError:
+ from urllib2 import URLError, HTTPError
+try:
+ from urllib.request import urlopen
+except ImportError:
+ from urllib2 import urlopen
+
from debian.debian_support import version_compare
+from dedup.compression import decompress
+
def fetchiter(cursor):
rows = cursor.fetchmany()
while rows:
@@ -10,3 +22,24 @@ def fetchiter(cursor):
def sql_add_version_compare(db):
db.create_collation("debian_version", version_compare)
db.create_function("debian_version_compare", 2, version_compare)
+
+def open_compressed_mirror_url(url, extensions=(u".xz", u".gz", u"")):
+ """Fetch the given url. Try appending each of the given compression
+ schemes and move on in case it doesn't exist. Decompress the resulting
+ stream on the fly.
+ @returns: a file-like with the decompressed contents
+ """
+ for ext in extensions:
+ try:
+ handle = urlopen(url + ext)
+ except HTTPError as error:
+ if error.code != 404:
+ raise
+ except URLError as error:
+ if not hasattr(error.reason, "errno"):
+ raise
+ if error.reason.errno != errno.ENOENT:
+ raise
+ else:
+ return decompress(handle, ext)
+ raise OSError(errno.ENOENT, "No such file or directory")