diff options
author | Helmut Grohne <helmut@subdivi.de> | 2016-05-23 21:49:43 +0200 |
---|---|---|
committer | Helmut Grohne <helmut@subdivi.de> | 2016-05-23 21:49:43 +0200 |
commit | bf1824d49d9fd2c19b4258184005822eacf41666 (patch) | |
tree | 79137ba1c7f81df86bc0b317ed8a588b34286f7b | |
parent | e686b05346892a1e2700f429136d46d8fff81f26 (diff) | |
download | debian-dedup-bf1824d49d9fd2c19b4258184005822eacf41666.tar.gz |
move dedup.debpkg.process_control back into importpkg
After all, it isn't that generic. It knows what information is necessary
for running dedup. Thus it really belongs to the extractor subclass.
By building on handle_control_info, not that much parsing logic is left
in the extractor subclass.
-rw-r--r-- | dedup/debpkg.py | 22 | ||||
-rwxr-xr-x | importpkg.py | 23 |
2 files changed, 14 insertions, 31 deletions
diff --git a/dedup/debpkg.py b/dedup/debpkg.py index c64f3c0..3a30b3e 100644 --- a/dedup/debpkg.py +++ b/dedup/debpkg.py @@ -7,28 +7,6 @@ from dedup.arreader import ArReader from dedup.compression import decompress from dedup.hashing import hash_file -def process_control(control_contents): - """Parses the contents of a control file from a control.tar of a Debian - package and returns a dictionary containing the fields relevant to dedup. - @type control_contents: bytes - @rtype: {str: object} - """ - control = deb822.Packages(control_contents) - package = control["package"] - try: - source = control["source"].split()[0] - except KeyError: - source = package - version = control["version"] - architecture = control["architecture"] - # deb822 currently returns :any dependencies raw. see #670679 - deprelations = control.relations.get("depends", []) + \ - control.relations.get("pre-depends", []) - depends = set(dep[0]["name"].split(u':', 1)[0] - for dep in deprelations if len(dep) == 1) - return dict(package=package, source=source, version=version, - architecture=architecture, depends=depends) - class MultiHash(object): def __init__(self, *hashes): self.hashes = hashes diff --git a/importpkg.py b/importpkg.py index 92c474e..b01fad3 100755 --- a/importpkg.py +++ b/importpkg.py @@ -16,8 +16,7 @@ except ImportError: import yaml -from dedup.debpkg import DebExtractor, decodetarname, get_tar_hashes, \ - process_control +from dedup.debpkg import DebExtractor, decodetarname, get_tar_hashes from dedup.hashing import DecompressedHash, SuppressingHash, HashedStream, \ HashBlacklistContent from dedup.compression import GzipDecompressor @@ -56,13 +55,19 @@ class ImportpkgExtractor(DebExtractor): DebExtractor.__init__(self) self.callback = callback - def handle_control_tar(self, tarfileobj): - for elem in tarfileobj: - if elem.name not in ("./control", "control"): - continue - self.callback(process_control(tarfileobj.extractfile(elem).read())) - return - raise ValueError("missing control file") + def handle_control_info(self, info): + try: + source = info["source"].split()[0] + except KeyError: + source = info["package"] + # deb822 currently returns :any dependencies raw. see #670679 + deprelations = info.relations.get("depends", []) + \ + info.relations.get("pre-depends", []) + depends = set(dep[0]["name"].split(u':', 1)[0] + for dep in deprelations if len(dep) == 1) + self.callback(dict(package=info["package"], source=source, + version=info["version"], + architecture=info["architecture"], depends=depends)) def handle_data_tar(self, tarfileobj): for name, size, hashes in get_tar_hashes(tarfileobj, |