diff options
author | Helmut Grohne <helmut@subdivi.de> | 2013-07-10 16:16:45 +0200 |
---|---|---|
committer | Helmut Grohne <helmut@subdivi.de> | 2013-07-10 16:16:45 +0200 |
commit | 14020d53fd4853aa72f159885edad1dcbfce2ee7 (patch) | |
tree | 3d1182588e2a9f73941f88dd80bd4b10b04aca53 /autoimport.py | |
parent | ada4f94466bf3eddc192cf22c8ecefc9cd5f0ea3 (diff) | |
download | debian-dedup-14020d53fd4853aa72f159885edad1dcbfce2ee7.tar.gz |
schema: reference package table by integer key
One approach to improve performance is to reduce the database size. A
package name takes up 15 bytes in average. A number of a package takes
up two bytes. Multiply that difference with the number of references and
it should be noticeably. A small test set show a reduction by 10%.
Diffstat (limited to 'autoimport.py')
-rwxr-xr-x | autoimport.py | 4 |
1 files changed, 2 insertions, 2 deletions
diff --git a/autoimport.py b/autoimport.py index d326d61..694ffeb 100755 --- a/autoimport.py +++ b/autoimport.py @@ -93,7 +93,7 @@ def main(): process_file(pkgs, d) print("reading database") - cur.execute("SELECT package, version FROM package;") + cur.execute("SELECT name, version FROM package;") knownpkgs = dict((row[0], row[1]) for row in cur.fetchall()) distpkgs = set(pkgs.keys()) if options.new: @@ -126,7 +126,7 @@ def main(): if options.prune: delpkgs = knownpkgs - distpkgs print("clearing packages %s" % " ".join(delpkgs)) - cur.executemany("DELETE FROM package WHERE package = ?;", + cur.executemany("DELETE FROM package WHERE name = ?;", ((pkg,) for pkg in delpkgs)) # Tables content, dependency and sharing will also be pruned # due to ON DELETE CASCADE clauses. |