From 14020d53fd4853aa72f159885edad1dcbfce2ee7 Mon Sep 17 00:00:00 2001 From: Helmut Grohne Date: Wed, 10 Jul 2013 16:16:45 +0200 Subject: schema: reference package table by integer key One approach to improve performance is to reduce the database size. A package name takes up 15 bytes in average. A number of a package takes up two bytes. Multiply that difference with the number of references and it should be noticeably. A small test set show a reduction by 10%. --- autoimport.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'autoimport.py') diff --git a/autoimport.py b/autoimport.py index d326d61..694ffeb 100755 --- a/autoimport.py +++ b/autoimport.py @@ -93,7 +93,7 @@ def main(): process_file(pkgs, d) print("reading database") - cur.execute("SELECT package, version FROM package;") + cur.execute("SELECT name, version FROM package;") knownpkgs = dict((row[0], row[1]) for row in cur.fetchall()) distpkgs = set(pkgs.keys()) if options.new: @@ -126,7 +126,7 @@ def main(): if options.prune: delpkgs = knownpkgs - distpkgs print("clearing packages %s" % " ".join(delpkgs)) - cur.executemany("DELETE FROM package WHERE package = ?;", + cur.executemany("DELETE FROM package WHERE name = ?;", ((pkg,) for pkg in delpkgs)) # Tables content, dependency and sharing will also be pruned # due to ON DELETE CASCADE clauses. -- cgit v1.2.3