From 14020d53fd4853aa72f159885edad1dcbfce2ee7 Mon Sep 17 00:00:00 2001
From: Helmut Grohne <helmut@subdivi.de>
Date: Wed, 10 Jul 2013 16:16:45 +0200
Subject: schema: reference package table by integer key

One approach to improve performance is to reduce the database size. A
package name takes up 15 bytes in average. A number of a package takes
up two bytes. Multiply that difference with the number of references and
it should be noticeably. A small test set show a reduction by 10%.
---
 autoimport.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'autoimport.py')

diff --git a/autoimport.py b/autoimport.py
index d326d61..694ffeb 100755
--- a/autoimport.py
+++ b/autoimport.py
@@ -93,7 +93,7 @@ def main():
             process_file(pkgs, d)
 
     print("reading database")
-    cur.execute("SELECT package, version FROM package;")
+    cur.execute("SELECT name, version FROM package;")
     knownpkgs = dict((row[0], row[1]) for row in cur.fetchall())
     distpkgs = set(pkgs.keys())
     if options.new:
@@ -126,7 +126,7 @@ def main():
     if options.prune:
         delpkgs = knownpkgs - distpkgs
         print("clearing packages %s" % " ".join(delpkgs))
-        cur.executemany("DELETE FROM package WHERE package = ?;",
+        cur.executemany("DELETE FROM package WHERE name = ?;",
                         ((pkg,) for pkg in delpkgs))
         # Tables content, dependency and sharing will also be pruned
         # due to ON DELETE CASCADE clauses.
-- 
cgit v1.2.3