summaryrefslogtreecommitdiff
path: root/autoimport.py
diff options
context:
space:
mode:
authorHelmut Grohne <helmut@subdivi.de>2013-07-10 16:16:45 +0200
committerHelmut Grohne <helmut@subdivi.de>2013-07-10 16:16:45 +0200
commit14020d53fd4853aa72f159885edad1dcbfce2ee7 (patch)
tree3d1182588e2a9f73941f88dd80bd4b10b04aca53 /autoimport.py
parentada4f94466bf3eddc192cf22c8ecefc9cd5f0ea3 (diff)
downloaddebian-dedup-14020d53fd4853aa72f159885edad1dcbfce2ee7.tar.gz
schema: reference package table by integer key
One approach to improve performance is to reduce the database size. A package name takes up 15 bytes in average. A number of a package takes up two bytes. Multiply that difference with the number of references and it should be noticeably. A small test set show a reduction by 10%.
Diffstat (limited to 'autoimport.py')
-rwxr-xr-xautoimport.py4
1 files changed, 2 insertions, 2 deletions
diff --git a/autoimport.py b/autoimport.py
index d326d61..694ffeb 100755
--- a/autoimport.py
+++ b/autoimport.py
@@ -93,7 +93,7 @@ def main():
process_file(pkgs, d)
print("reading database")
- cur.execute("SELECT package, version FROM package;")
+ cur.execute("SELECT name, version FROM package;")
knownpkgs = dict((row[0], row[1]) for row in cur.fetchall())
distpkgs = set(pkgs.keys())
if options.new:
@@ -126,7 +126,7 @@ def main():
if options.prune:
delpkgs = knownpkgs - distpkgs
print("clearing packages %s" % " ".join(delpkgs))
- cur.executemany("DELETE FROM package WHERE package = ?;",
+ cur.executemany("DELETE FROM package WHERE name = ?;",
((pkg,) for pkg in delpkgs))
# Tables content, dependency and sharing will also be pruned
# due to ON DELETE CASCADE clauses.