diff options
author | Helmut Grohne <helmut@subdivi.de> | 2016-05-22 23:21:16 +0200 |
---|---|---|
committer | Helmut Grohne <helmut@subdivi.de> | 2016-05-22 23:21:16 +0200 |
commit | b1466bc8ac804d3ba90f671ec8431c147a397e4f (patch) | |
tree | e4015e1e5f493a707982798efe9a58391c5f73cd | |
parent | d7fd1020648d9b95b59bc4cc053f8dfa4e4b9032 (diff) | |
download | debian-dedup-b1466bc8ac804d3ba90f671ec8431c147a397e4f.tar.gz |
autoimport: stream package list and use generic decompressor
* streaming means that we do not need to hold the entire package list
in memory (but the pkgs dict will become large anyway).
* The decompress utility allows easily switching to e.g. xz which is
the only compression format for the dbgsym suites.
-rwxr-xr-x | autoimport.py | 9 |
1 files changed, 4 insertions, 5 deletions
diff --git a/autoimport.py b/autoimport.py index bbe5029..94358b6 100755 --- a/autoimport.py +++ b/autoimport.py @@ -4,9 +4,7 @@ packages contained. It has rather strong assumptions on the working directory. """ import argparse -import gzip import errno -import io import multiprocessing import os import sqlite3 @@ -26,12 +24,13 @@ import concurrent.futures from debian import deb822 from debian.debian_support import version_compare +from dedup.compression import decompress + from readyaml import readyaml def process_http(pkgs, url): - pkglist = urlopen(url + "/dists/sid/main/binary-amd64/Packages.gz").read() - pkglist = gzip.GzipFile(fileobj=io.BytesIO(pkglist)).read() - pkglist = io.BytesIO(pkglist) + pkglist = urlopen(url + "/dists/sid/main/binary-amd64/Packages.gz") + pkglist = decompress(pkglist, ".gz") pkglist = deb822.Packages.iter_paragraphs(pkglist) for pkg in pkglist: name = pkg["Package"] |