summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--dedup/debpkg.py26
-rwxr-xr-ximportpkg.py14
-rwxr-xr-xreadyaml.py6
-rw-r--r--schema.sql2
4 files changed, 43 insertions, 5 deletions
diff --git a/dedup/debpkg.py b/dedup/debpkg.py
index 2d67135..875a34d 100644
--- a/dedup/debpkg.py
+++ b/dedup/debpkg.py
@@ -2,6 +2,17 @@ from debian import deb822
from dedup.hashing import hash_file
+def pkgname_from_dict(entry):
+ """Given an entry dictionary obtained from deb822, return the package
+ name.
+ @rtype: bytes
+ """
+ name = entry[u"name"]
+ # deb822 currently returns :any dependencies raw. see #670679
+ name = name.split(u':', 1)[0]
+ return name.encode("ascii")
+
+
def process_control(control_contents):
"""Parses the contents of a control file from a control.tar.gz of a Debian
package and returns a dictionary containing the fields relevant to dedup.
@@ -16,12 +27,21 @@ def process_control(control_contents):
source = package
version = control["version"].encode("ascii")
architecture = control["architecture"].encode("ascii")
- # deb822 currently returns :any dependencies raw. see #670679
- depends = set(dep[0]["name"].split(u':', 1)[0].encode("ascii")
+ depends = set(pkgname_from_dict(dep[0])
for dep in control.relations.get("depends", ())
if len(dep) == 1)
+ conflicts = set(pkgname_from_dict(ent)
+ for group in control.relations.get("conflicts", ())
+ for ent in group)
+ conflicts.update(set(pkgname_from_dict(ent)
+ for group in control.relations.get("replaces", ())
+ for ent in group))
+ provides = set(pkgname_from_dict(ent)
+ for group in control.relations.get("provides", ())
+ for ent in group)
return dict(package=package, source=source, version=version,
- architecture=architecture, depends=depends)
+ architecture=architecture, depends=depends,
+ conflicts=conflicts, provides=provides)
class MultiHash(object):
def __init__(self, *hashes):
diff --git a/importpkg.py b/importpkg.py
index aeccda5..f3868ff 100755
--- a/importpkg.py
+++ b/importpkg.py
@@ -58,14 +58,24 @@ def process_package(filelike, hash_functions):
raise ValueError("unexpected control.tar.gz")
state = "control"
tf = tarfile.open(fileobj=af, mode="r|gz")
+ meta = None
+ use_dpkg_divert = False
for elem in tf:
+ if elem.name == "./preinst":
+ if "dpkg-divert" in tf.extractfile(elem).read():
+ use_dpkg_divert = True
+ continue
if elem.name != "./control":
continue
if state != "control":
raise ValueError("duplicate control file")
state = "control_file"
- yield process_control(tf.extractfile(elem).read())
- break
+ meta = process_control(tf.extractfile(elem).read())
+ if meta is None:
+ raise ValueError("control file not found")
+ if use_dpkg_divert:
+ meta["conflicts"].add("_dpkg-divert")
+ yield meta
continue
elif name == "data.tar.gz":
tf = tarfile.open(fileobj=af, mode="r|gz")
diff --git a/readyaml.py b/readyaml.py
index 2ef9a3b..50603b1 100755
--- a/readyaml.py
+++ b/readyaml.py
@@ -31,6 +31,8 @@ def readyaml(db, stream):
if pid is not None:
cur.execute("DELETE FROM content WHERE pid = ?;", (pid,))
cur.execute("DELETE FROM dependency WHERE pid = ?;", (pid,))
+ cur.execute("DELETE FROM conflict WHERE pid = ?;", (pid,))
+ cur.execute("DELETE FROM provide WHERE pid = ?;", (pid,))
cur.execute("UPDATE package SET version = ?, architecture = ?, source = ? WHERE id = ?;",
(metadata["version"], metadata["architecture"], metadata["source"], pid))
else:
@@ -40,6 +42,10 @@ def readyaml(db, stream):
pid = cur.lastrowid
cur.executemany("INSERT INTO dependency (pid, required) VALUES (?, ?);",
((pid, dep) for dep in metadata["depends"]))
+ cur.executemany("INSERT INTO conflict (pid, conflicting) VALUES (?, ?);",
+ ((pid, conflict) for conflict in metadata["conflicts"]))
+ cur.executemany("INSERT INTO provide (pid, provided) VALUES (?, ?);",
+ ((pid, provided) for provided in metadata["provides"]))
for entry in gen:
if entry == "commit":
db.commit()
diff --git a/schema.sql b/schema.sql
index 2ab7ca7..eeaf3b5 100644
--- a/schema.sql
+++ b/schema.sql
@@ -4,6 +4,8 @@ CREATE TABLE function (id INTEGER PRIMARY KEY, name TEXT UNIQUE NOT NULL, eqclas
INSERT INTO function (name, eqclass) VALUES ("sha512", 1), ("gzip_sha512", 1), ("png_sha512", 2), ("gif_sha512", 2);
CREATE TABLE hash (cid INTEGER, fid INTEGER NOT NULL, hash TEXT, FOREIGN KEY (cid) REFERENCES content(id) ON DELETE CASCADE, FOREIGN KEY (fid) REFERENCES function(id));
CREATE TABLE dependency (pid INTEGER, required TEXT, FOREIGN KEY (pid) REFERENCES package(id) ON DELETE CASCADE);
+CREATE TABLE conflict (pid INTEGER, conflicting TEXT, FOREIGN KEY (pid) REFERENCES package(id) ON DELETE CASCADE);
+CREATE TABLE provide (pid INTEGER, provided TEXT, FOREIGN KEY (pid) REFERENCES package(id) ON DELETE CASCADE);
CREATE INDEX content_package_size_index ON content (pid, size);
CREATE INDEX hash_cid_index ON hash (cid);
CREATE INDEX hash_hash_index ON hash (hash);