summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorHelmut Grohne <helmut@subdivi.de>2016-05-23 21:49:43 +0200
committerHelmut Grohne <helmut@subdivi.de>2016-05-23 21:49:43 +0200
commitbf1824d49d9fd2c19b4258184005822eacf41666 (patch)
tree79137ba1c7f81df86bc0b317ed8a588b34286f7b
parente686b05346892a1e2700f429136d46d8fff81f26 (diff)
downloaddebian-dedup-bf1824d49d9fd2c19b4258184005822eacf41666.tar.gz
move dedup.debpkg.process_control back into importpkg
After all, it isn't that generic. It knows what information is necessary for running dedup. Thus it really belongs to the extractor subclass. By building on handle_control_info, not that much parsing logic is left in the extractor subclass.
-rw-r--r--dedup/debpkg.py22
-rwxr-xr-ximportpkg.py23
2 files changed, 14 insertions, 31 deletions
diff --git a/dedup/debpkg.py b/dedup/debpkg.py
index c64f3c0..3a30b3e 100644
--- a/dedup/debpkg.py
+++ b/dedup/debpkg.py
@@ -7,28 +7,6 @@ from dedup.arreader import ArReader
from dedup.compression import decompress
from dedup.hashing import hash_file
-def process_control(control_contents):
- """Parses the contents of a control file from a control.tar of a Debian
- package and returns a dictionary containing the fields relevant to dedup.
- @type control_contents: bytes
- @rtype: {str: object}
- """
- control = deb822.Packages(control_contents)
- package = control["package"]
- try:
- source = control["source"].split()[0]
- except KeyError:
- source = package
- version = control["version"]
- architecture = control["architecture"]
- # deb822 currently returns :any dependencies raw. see #670679
- deprelations = control.relations.get("depends", []) + \
- control.relations.get("pre-depends", [])
- depends = set(dep[0]["name"].split(u':', 1)[0]
- for dep in deprelations if len(dep) == 1)
- return dict(package=package, source=source, version=version,
- architecture=architecture, depends=depends)
-
class MultiHash(object):
def __init__(self, *hashes):
self.hashes = hashes
diff --git a/importpkg.py b/importpkg.py
index 92c474e..b01fad3 100755
--- a/importpkg.py
+++ b/importpkg.py
@@ -16,8 +16,7 @@ except ImportError:
import yaml
-from dedup.debpkg import DebExtractor, decodetarname, get_tar_hashes, \
- process_control
+from dedup.debpkg import DebExtractor, decodetarname, get_tar_hashes
from dedup.hashing import DecompressedHash, SuppressingHash, HashedStream, \
HashBlacklistContent
from dedup.compression import GzipDecompressor
@@ -56,13 +55,19 @@ class ImportpkgExtractor(DebExtractor):
DebExtractor.__init__(self)
self.callback = callback
- def handle_control_tar(self, tarfileobj):
- for elem in tarfileobj:
- if elem.name not in ("./control", "control"):
- continue
- self.callback(process_control(tarfileobj.extractfile(elem).read()))
- return
- raise ValueError("missing control file")
+ def handle_control_info(self, info):
+ try:
+ source = info["source"].split()[0]
+ except KeyError:
+ source = info["package"]
+ # deb822 currently returns :any dependencies raw. see #670679
+ deprelations = info.relations.get("depends", []) + \
+ info.relations.get("pre-depends", [])
+ depends = set(dep[0]["name"].split(u':', 1)[0]
+ for dep in deprelations if len(dep) == 1)
+ self.callback(dict(package=info["package"], source=source,
+ version=info["version"],
+ architecture=info["architecture"], depends=depends))
def handle_data_tar(self, tarfileobj):
for name, size, hashes in get_tar_hashes(tarfileobj,