summaryrefslogtreecommitdiff
path: root/dedup/debpkg.py
diff options
context:
space:
mode:
authorHelmut Grohne <helmut@subdivi.de>2014-02-25 07:17:39 +0100
committerHelmut Grohne <helmut@subdivi.de>2014-02-25 07:17:39 +0100
commitc04c6a6c6a3412593e77cf31eb5ceefc46f87783 (patch)
tree2ed77c18114d969d4beec843021d529cf321ea75 /dedup/debpkg.py
parentf29676904602fa9b0e0cf51ab0e7345ba28939db (diff)
downloaddebian-dedup-c04c6a6c6a3412593e77cf31eb5ceefc46f87783.tar.gz
record package metadata that describes co-installability
Specifically all entries in the Conflicts header are saved in the conflict table, all entries in the Provides header are saved in the provide table (to cover conflicts with virtual packages) and packages using dpkg-divert in preinst get a magic "_dpkg-divert" entry in their conflict table. With this metadata it should be possible to compute undeclared file conflicts.
Diffstat (limited to 'dedup/debpkg.py')
-rw-r--r--dedup/debpkg.py26
1 files changed, 23 insertions, 3 deletions
diff --git a/dedup/debpkg.py b/dedup/debpkg.py
index 2d67135..875a34d 100644
--- a/dedup/debpkg.py
+++ b/dedup/debpkg.py
@@ -2,6 +2,17 @@ from debian import deb822
from dedup.hashing import hash_file
+def pkgname_from_dict(entry):
+ """Given an entry dictionary obtained from deb822, return the package
+ name.
+ @rtype: bytes
+ """
+ name = entry[u"name"]
+ # deb822 currently returns :any dependencies raw. see #670679
+ name = name.split(u':', 1)[0]
+ return name.encode("ascii")
+
+
def process_control(control_contents):
"""Parses the contents of a control file from a control.tar.gz of a Debian
package and returns a dictionary containing the fields relevant to dedup.
@@ -16,12 +27,21 @@ def process_control(control_contents):
source = package
version = control["version"].encode("ascii")
architecture = control["architecture"].encode("ascii")
- # deb822 currently returns :any dependencies raw. see #670679
- depends = set(dep[0]["name"].split(u':', 1)[0].encode("ascii")
+ depends = set(pkgname_from_dict(dep[0])
for dep in control.relations.get("depends", ())
if len(dep) == 1)
+ conflicts = set(pkgname_from_dict(ent)
+ for group in control.relations.get("conflicts", ())
+ for ent in group)
+ conflicts.update(set(pkgname_from_dict(ent)
+ for group in control.relations.get("replaces", ())
+ for ent in group))
+ provides = set(pkgname_from_dict(ent)
+ for group in control.relations.get("provides", ())
+ for ent in group)
return dict(package=package, source=source, version=version,
- architecture=architecture, depends=depends)
+ architecture=architecture, depends=depends,
+ conflicts=conflicts, provides=provides)
class MultiHash(object):
def __init__(self, *hashes):