summaryrefslogtreecommitdiff
path: root/webapp.py
diff options
context:
space:
mode:
authorHelmut Grohne <helmut@subdivi.de>2013-03-09 18:43:47 +0100
committerHelmut Grohne <helmut@subdivi.de>2013-03-09 18:43:47 +0100
commit5c0dcba3229b8c3e0faf42cf3e07cb82ee1369cd (patch)
tree08f4b4cd1aae470eb6bae21bd0a4859ecd91aee9 /webapp.py
parent423ceee0d0befc8755a9ae915d15e8d415d98159 (diff)
downloaddebian-dedup-5c0dcba3229b8c3e0faf42cf3e07cb82ee1369cd.tar.gz
split content table to a hash table
In the old content table (package, filename, size) would be the same for multiple hash functions. Now the schema represents that each file has precisely one size, but multiple hashes.
Diffstat (limited to 'webapp.py')
-rwxr-xr-xwebapp.py6
1 files changed, 3 insertions, 3 deletions
diff --git a/webapp.py b/webapp.py
index f80b3da..1da987b 100755
--- a/webapp.py
+++ b/webapp.py
@@ -279,13 +279,13 @@ class Application(object):
if package1 == package2:
details1 = details2 = self.get_details(package1)
- cur.execute("SELECT a.filename, a.size, a.function, b.filename, b.size, b.function, a.hash FROM content AS a JOIN content AS b ON a.hash = b.hash WHERE a.package = ? AND b.package = ? AND a.filename != b.filename ORDER BY a.size DESC, a.filename, b.filename;",
+ cur.execute("SELECT a.filename, a.size, ha.function, b.filename, b.size, hb.function, ha.hash FROM content AS a JOIN hash AS ha ON a.id = ha.cid JOIN hash AS hb ON ha.hash = hb.hash JOIN content AS b ON b.id = hb.cid WHERE a.package = ? AND b.package = ? AND a.filename != b.filename ORDER BY a.size DESC, a.filename, b.filename;",
(package1, package1))
else:
details1 = self.get_details(package1)
details2 = self.get_details(package2)
- cur.execute("SELECT a.filename, a.size, a.function, b.filename, b.size, b.function, a.hash FROM content AS a JOIN content AS b ON a.hash = b.hash WHERE a.package = ? AND b.package = ? ORDER BY a.size DESC, a.filename, b.filename;",
+ cur.execute("SELECT a.filename, a.size, ha.function, b.filename, b.size, hb.function, ha.hash FROM content AS a JOIN hash AS ha ON a.id = ha.cid JOIN hash AS hb ON ha.hash = hb.hash JOIN content AS b ON b.id = hb.cid WHERE a.package = ? AND b.package = ? ORDER BY a.size DESC, a.filename, b.filename;",
(package1, package2))
shared = generate_shared(fetchiter(cur))
# The cursor will be in use until the template is fully rendered.
@@ -297,7 +297,7 @@ class Application(object):
def show_hash(self, function, hashvalue):
cur = self.db.cursor()
- cur.execute("SELECT package, filename, size, function FROM content WHERE hash = ?;",
+ cur.execute("SELECT content.package, content.filename, content.size, hash.function FROM content JOIN hash ON content.id = hash.cid WHERE hash = ?;",
(hashvalue,))
entries = [dict(package=package, filename=filename, size=size,
function=otherfunc)