diff options
-rw-r--r-- | dedup/debpkg.py | 26 | ||||
-rwxr-xr-x | importpkg.py | 14 | ||||
-rwxr-xr-x | readyaml.py | 6 | ||||
-rw-r--r-- | schema.sql | 2 |
4 files changed, 43 insertions, 5 deletions
diff --git a/dedup/debpkg.py b/dedup/debpkg.py index 2d67135..875a34d 100644 --- a/dedup/debpkg.py +++ b/dedup/debpkg.py @@ -2,6 +2,17 @@ from debian import deb822 from dedup.hashing import hash_file +def pkgname_from_dict(entry): + """Given an entry dictionary obtained from deb822, return the package + name. + @rtype: bytes + """ + name = entry[u"name"] + # deb822 currently returns :any dependencies raw. see #670679 + name = name.split(u':', 1)[0] + return name.encode("ascii") + + def process_control(control_contents): """Parses the contents of a control file from a control.tar.gz of a Debian package and returns a dictionary containing the fields relevant to dedup. @@ -16,12 +27,21 @@ def process_control(control_contents): source = package version = control["version"].encode("ascii") architecture = control["architecture"].encode("ascii") - # deb822 currently returns :any dependencies raw. see #670679 - depends = set(dep[0]["name"].split(u':', 1)[0].encode("ascii") + depends = set(pkgname_from_dict(dep[0]) for dep in control.relations.get("depends", ()) if len(dep) == 1) + conflicts = set(pkgname_from_dict(ent) + for group in control.relations.get("conflicts", ()) + for ent in group) + conflicts.update(set(pkgname_from_dict(ent) + for group in control.relations.get("replaces", ()) + for ent in group)) + provides = set(pkgname_from_dict(ent) + for group in control.relations.get("provides", ()) + for ent in group) return dict(package=package, source=source, version=version, - architecture=architecture, depends=depends) + architecture=architecture, depends=depends, + conflicts=conflicts, provides=provides) class MultiHash(object): def __init__(self, *hashes): diff --git a/importpkg.py b/importpkg.py index aeccda5..f3868ff 100755 --- a/importpkg.py +++ b/importpkg.py @@ -58,14 +58,24 @@ def process_package(filelike, hash_functions): raise ValueError("unexpected control.tar.gz") state = "control" tf = tarfile.open(fileobj=af, mode="r|gz") + meta = None + use_dpkg_divert = False for elem in tf: + if elem.name == "./preinst": + if "dpkg-divert" in tf.extractfile(elem).read(): + use_dpkg_divert = True + continue if elem.name != "./control": continue if state != "control": raise ValueError("duplicate control file") state = "control_file" - yield process_control(tf.extractfile(elem).read()) - break + meta = process_control(tf.extractfile(elem).read()) + if meta is None: + raise ValueError("control file not found") + if use_dpkg_divert: + meta["conflicts"].add("_dpkg-divert") + yield meta continue elif name == "data.tar.gz": tf = tarfile.open(fileobj=af, mode="r|gz") diff --git a/readyaml.py b/readyaml.py index 2ef9a3b..50603b1 100755 --- a/readyaml.py +++ b/readyaml.py @@ -31,6 +31,8 @@ def readyaml(db, stream): if pid is not None: cur.execute("DELETE FROM content WHERE pid = ?;", (pid,)) cur.execute("DELETE FROM dependency WHERE pid = ?;", (pid,)) + cur.execute("DELETE FROM conflict WHERE pid = ?;", (pid,)) + cur.execute("DELETE FROM provide WHERE pid = ?;", (pid,)) cur.execute("UPDATE package SET version = ?, architecture = ?, source = ? WHERE id = ?;", (metadata["version"], metadata["architecture"], metadata["source"], pid)) else: @@ -40,6 +42,10 @@ def readyaml(db, stream): pid = cur.lastrowid cur.executemany("INSERT INTO dependency (pid, required) VALUES (?, ?);", ((pid, dep) for dep in metadata["depends"])) + cur.executemany("INSERT INTO conflict (pid, conflicting) VALUES (?, ?);", + ((pid, conflict) for conflict in metadata["conflicts"])) + cur.executemany("INSERT INTO provide (pid, provided) VALUES (?, ?);", + ((pid, provided) for provided in metadata["provides"])) for entry in gen: if entry == "commit": db.commit() @@ -4,6 +4,8 @@ CREATE TABLE function (id INTEGER PRIMARY KEY, name TEXT UNIQUE NOT NULL, eqclas INSERT INTO function (name, eqclass) VALUES ("sha512", 1), ("gzip_sha512", 1), ("png_sha512", 2), ("gif_sha512", 2); CREATE TABLE hash (cid INTEGER, fid INTEGER NOT NULL, hash TEXT, FOREIGN KEY (cid) REFERENCES content(id) ON DELETE CASCADE, FOREIGN KEY (fid) REFERENCES function(id)); CREATE TABLE dependency (pid INTEGER, required TEXT, FOREIGN KEY (pid) REFERENCES package(id) ON DELETE CASCADE); +CREATE TABLE conflict (pid INTEGER, conflicting TEXT, FOREIGN KEY (pid) REFERENCES package(id) ON DELETE CASCADE); +CREATE TABLE provide (pid INTEGER, provided TEXT, FOREIGN KEY (pid) REFERENCES package(id) ON DELETE CASCADE); CREATE INDEX content_package_size_index ON content (pid, size); CREATE INDEX hash_cid_index ON hash (cid); CREATE INDEX hash_hash_index ON hash (hash); |