summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorHelmut Grohne <helmut@subdivi.de>2014-02-25 07:17:39 +0100
committerHelmut Grohne <helmut@subdivi.de>2014-02-25 07:17:39 +0100
commitc04c6a6c6a3412593e77cf31eb5ceefc46f87783 (patch)
tree2ed77c18114d969d4beec843021d529cf321ea75
parentf29676904602fa9b0e0cf51ab0e7345ba28939db (diff)
downloaddebian-dedup-c04c6a6c6a3412593e77cf31eb5ceefc46f87783.tar.gz
record package metadata that describes co-installability
Specifically all entries in the Conflicts header are saved in the conflict table, all entries in the Provides header are saved in the provide table (to cover conflicts with virtual packages) and packages using dpkg-divert in preinst get a magic "_dpkg-divert" entry in their conflict table. With this metadata it should be possible to compute undeclared file conflicts.
-rw-r--r--dedup/debpkg.py26
-rwxr-xr-ximportpkg.py14
-rwxr-xr-xreadyaml.py6
-rw-r--r--schema.sql2
4 files changed, 43 insertions, 5 deletions
diff --git a/dedup/debpkg.py b/dedup/debpkg.py
index 2d67135..875a34d 100644
--- a/dedup/debpkg.py
+++ b/dedup/debpkg.py
@@ -2,6 +2,17 @@ from debian import deb822
from dedup.hashing import hash_file
+def pkgname_from_dict(entry):
+ """Given an entry dictionary obtained from deb822, return the package
+ name.
+ @rtype: bytes
+ """
+ name = entry[u"name"]
+ # deb822 currently returns :any dependencies raw. see #670679
+ name = name.split(u':', 1)[0]
+ return name.encode("ascii")
+
+
def process_control(control_contents):
"""Parses the contents of a control file from a control.tar.gz of a Debian
package and returns a dictionary containing the fields relevant to dedup.
@@ -16,12 +27,21 @@ def process_control(control_contents):
source = package
version = control["version"].encode("ascii")
architecture = control["architecture"].encode("ascii")
- # deb822 currently returns :any dependencies raw. see #670679
- depends = set(dep[0]["name"].split(u':', 1)[0].encode("ascii")
+ depends = set(pkgname_from_dict(dep[0])
for dep in control.relations.get("depends", ())
if len(dep) == 1)
+ conflicts = set(pkgname_from_dict(ent)
+ for group in control.relations.get("conflicts", ())
+ for ent in group)
+ conflicts.update(set(pkgname_from_dict(ent)
+ for group in control.relations.get("replaces", ())
+ for ent in group))
+ provides = set(pkgname_from_dict(ent)
+ for group in control.relations.get("provides", ())
+ for ent in group)
return dict(package=package, source=source, version=version,
- architecture=architecture, depends=depends)
+ architecture=architecture, depends=depends,
+ conflicts=conflicts, provides=provides)
class MultiHash(object):
def __init__(self, *hashes):
diff --git a/importpkg.py b/importpkg.py
index aeccda5..f3868ff 100755
--- a/importpkg.py
+++ b/importpkg.py
@@ -58,14 +58,24 @@ def process_package(filelike, hash_functions):
raise ValueError("unexpected control.tar.gz")
state = "control"
tf = tarfile.open(fileobj=af, mode="r|gz")
+ meta = None
+ use_dpkg_divert = False
for elem in tf:
+ if elem.name == "./preinst":
+ if "dpkg-divert" in tf.extractfile(elem).read():
+ use_dpkg_divert = True
+ continue
if elem.name != "./control":
continue
if state != "control":
raise ValueError("duplicate control file")
state = "control_file"
- yield process_control(tf.extractfile(elem).read())
- break
+ meta = process_control(tf.extractfile(elem).read())
+ if meta is None:
+ raise ValueError("control file not found")
+ if use_dpkg_divert:
+ meta["conflicts"].add("_dpkg-divert")
+ yield meta
continue
elif name == "data.tar.gz":
tf = tarfile.open(fileobj=af, mode="r|gz")
diff --git a/readyaml.py b/readyaml.py
index 2ef9a3b..50603b1 100755
--- a/readyaml.py
+++ b/readyaml.py
@@ -31,6 +31,8 @@ def readyaml(db, stream):
if pid is not None:
cur.execute("DELETE FROM content WHERE pid = ?;", (pid,))
cur.execute("DELETE FROM dependency WHERE pid = ?;", (pid,))
+ cur.execute("DELETE FROM conflict WHERE pid = ?;", (pid,))
+ cur.execute("DELETE FROM provide WHERE pid = ?;", (pid,))
cur.execute("UPDATE package SET version = ?, architecture = ?, source = ? WHERE id = ?;",
(metadata["version"], metadata["architecture"], metadata["source"], pid))
else:
@@ -40,6 +42,10 @@ def readyaml(db, stream):
pid = cur.lastrowid
cur.executemany("INSERT INTO dependency (pid, required) VALUES (?, ?);",
((pid, dep) for dep in metadata["depends"]))
+ cur.executemany("INSERT INTO conflict (pid, conflicting) VALUES (?, ?);",
+ ((pid, conflict) for conflict in metadata["conflicts"]))
+ cur.executemany("INSERT INTO provide (pid, provided) VALUES (?, ?);",
+ ((pid, provided) for provided in metadata["provides"]))
for entry in gen:
if entry == "commit":
db.commit()
diff --git a/schema.sql b/schema.sql
index 2ab7ca7..eeaf3b5 100644
--- a/schema.sql
+++ b/schema.sql
@@ -4,6 +4,8 @@ CREATE TABLE function (id INTEGER PRIMARY KEY, name TEXT UNIQUE NOT NULL, eqclas
INSERT INTO function (name, eqclass) VALUES ("sha512", 1), ("gzip_sha512", 1), ("png_sha512", 2), ("gif_sha512", 2);
CREATE TABLE hash (cid INTEGER, fid INTEGER NOT NULL, hash TEXT, FOREIGN KEY (cid) REFERENCES content(id) ON DELETE CASCADE, FOREIGN KEY (fid) REFERENCES function(id));
CREATE TABLE dependency (pid INTEGER, required TEXT, FOREIGN KEY (pid) REFERENCES package(id) ON DELETE CASCADE);
+CREATE TABLE conflict (pid INTEGER, conflicting TEXT, FOREIGN KEY (pid) REFERENCES package(id) ON DELETE CASCADE);
+CREATE TABLE provide (pid INTEGER, provided TEXT, FOREIGN KEY (pid) REFERENCES package(id) ON DELETE CASCADE);
CREATE INDEX content_package_size_index ON content (pid, size);
CREATE INDEX hash_cid_index ON hash (cid);
CREATE INDEX hash_hash_index ON hash (hash);