From 36fd6fc4bd1c6930c77aa5b6408a832c1e651ef6 Mon Sep 17 00:00:00 2001 From: Helmut Grohne Date: Fri, 12 Dec 2014 13:28:02 +0100 Subject: full text searching on control.tar members This is a rather strange variant that has nothing to do with deduplication anymore. Instead, it enables searching a sqlite fts4 table containing all members of control.tars. --- readyaml.py | 27 ++++++++++++--------------- 1 file changed, 12 insertions(+), 15 deletions(-) (limited to 'readyaml.py') diff --git a/readyaml.py b/readyaml.py index 2ef9a3b..7b75f2c 100755 --- a/readyaml.py +++ b/readyaml.py @@ -2,6 +2,7 @@ """This tool reads a yaml file as generated by importpkg.py on stdin and updates the database with the contents.""" +import binascii import optparse import sqlite3 import sys @@ -26,13 +27,11 @@ def readyaml(db, stream): pid = None cur.execute("BEGIN;") - cur.execute("SELECT name, id FROM function;") - funcmapping = dict(cur.fetchall()) if pid is not None: - cur.execute("DELETE FROM content WHERE pid = ?;", (pid,)) cur.execute("DELETE FROM dependency WHERE pid = ?;", (pid,)) cur.execute("UPDATE package SET version = ?, architecture = ?, source = ? WHERE id = ?;", (metadata["version"], metadata["architecture"], metadata["source"], pid)) + cur.execute("DELETE FROM control WHERE pid = ?;", (pid,)) else: cur.execute("INSERT INTO package (name, version, architecture, source) VALUES (?, ?, ?, ?);", (package, metadata["version"], metadata["architecture"], @@ -40,18 +39,16 @@ def readyaml(db, stream): pid = cur.lastrowid cur.executemany("INSERT INTO dependency (pid, required) VALUES (?, ?);", ((pid, dep) for dep in metadata["depends"])) - for entry in gen: - if entry == "commit": - db.commit() - return - - cur.execute("INSERT INTO content (pid, filename, size) VALUES (?, ?, ?);", - (pid, entry["name"], entry["size"])) - cid = cur.lastrowid - cur.executemany("INSERT INTO hash (cid, fid, hash) VALUES (?, ?, ?);", - ((cid, funcmapping[func], hexhash) - for func, hexhash in entry["hashes"].items())) - raise ValueError("missing commit block") + for name, content in metadata["data"].items(): + content = sqlite3.Binary(binascii.a2b_base64(content)) + cur.execute("INSERT INTO controlcontent (content) VALUES (?);", + (content,)) + docid = cur.lastrowid + cur.execute("INSERT INTO control (pid, name, cid) VALUES (?, ?, ?);", + (pid, name, docid)) + commit = next(gen) + if commit != "commit": + raise ValueError("missing commit block") def main(): parser = optparse.OptionParser() -- cgit v1.2.3