From b1466bc8ac804d3ba90f671ec8431c147a397e4f Mon Sep 17 00:00:00 2001 From: Helmut Grohne Date: Sun, 22 May 2016 23:21:16 +0200 Subject: autoimport: stream package list and use generic decompressor * streaming means that we do not need to hold the entire package list in memory (but the pkgs dict will become large anyway). * The decompress utility allows easily switching to e.g. xz which is the only compression format for the dbgsym suites. --- autoimport.py | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/autoimport.py b/autoimport.py index bbe5029..94358b6 100755 --- a/autoimport.py +++ b/autoimport.py @@ -4,9 +4,7 @@ packages contained. It has rather strong assumptions on the working directory. """ import argparse -import gzip import errno -import io import multiprocessing import os import sqlite3 @@ -26,12 +24,13 @@ import concurrent.futures from debian import deb822 from debian.debian_support import version_compare +from dedup.compression import decompress + from readyaml import readyaml def process_http(pkgs, url): - pkglist = urlopen(url + "/dists/sid/main/binary-amd64/Packages.gz").read() - pkglist = gzip.GzipFile(fileobj=io.BytesIO(pkglist)).read() - pkglist = io.BytesIO(pkglist) + pkglist = urlopen(url + "/dists/sid/main/binary-amd64/Packages.gz") + pkglist = decompress(pkglist, ".gz") pkglist = deb822.Packages.iter_paragraphs(pkglist) for pkg in pkglist: name = pkg["Package"] -- cgit v1.2.3