summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorHelmut Grohne <helmut@subdivi.de>2016-05-22 23:21:16 +0200
committerHelmut Grohne <helmut@subdivi.de>2016-05-22 23:21:16 +0200
commitb1466bc8ac804d3ba90f671ec8431c147a397e4f (patch)
treee4015e1e5f493a707982798efe9a58391c5f73cd
parentd7fd1020648d9b95b59bc4cc053f8dfa4e4b9032 (diff)
downloaddebian-dedup-b1466bc8ac804d3ba90f671ec8431c147a397e4f.tar.gz
autoimport: stream package list and use generic decompressor
* streaming means that we do not need to hold the entire package list in memory (but the pkgs dict will become large anyway). * The decompress utility allows easily switching to e.g. xz which is the only compression format for the dbgsym suites.
-rwxr-xr-xautoimport.py9
1 files changed, 4 insertions, 5 deletions
diff --git a/autoimport.py b/autoimport.py
index bbe5029..94358b6 100755
--- a/autoimport.py
+++ b/autoimport.py
@@ -4,9 +4,7 @@ packages contained. It has rather strong assumptions on the working directory.
"""
import argparse
-import gzip
import errno
-import io
import multiprocessing
import os
import sqlite3
@@ -26,12 +24,13 @@ import concurrent.futures
from debian import deb822
from debian.debian_support import version_compare
+from dedup.compression import decompress
+
from readyaml import readyaml
def process_http(pkgs, url):
- pkglist = urlopen(url + "/dists/sid/main/binary-amd64/Packages.gz").read()
- pkglist = gzip.GzipFile(fileobj=io.BytesIO(pkglist)).read()
- pkglist = io.BytesIO(pkglist)
+ pkglist = urlopen(url + "/dists/sid/main/binary-amd64/Packages.gz")
+ pkglist = decompress(pkglist, ".gz")
pkglist = deb822.Packages.iter_paragraphs(pkglist)
for pkg in pkglist:
name = pkg["Package"]