summaryrefslogtreecommitdiff
path: root/webapp.py
diff options
context:
space:
mode:
authorHelmut Grohne <helmut@subdivi.de>2013-07-17 16:27:08 +0200
committerHelmut Grohne <helmut@subdivi.de>2013-07-17 16:27:08 +0200
commited3e611cfc54b8c916e919701070bfd5c6770610 (patch)
treefe06694d5a2212c87a0d149eccb0f4cbb889a5cd /webapp.py
parenta03daac99a237babcd874748d19fc0f809a1dc60 (diff)
parent6205c89b1e289f04dcea1e6e32fafa6357abf063 (diff)
downloaddebian-dedup-ed3e611cfc54b8c916e919701070bfd5c6770610.tar.gz
Merge branch master into sqlalchemy
This basically pulls the packageid branch into sqlalchemy. The merge was complex, because many sql statements diverged. The merge brings us one step closer to supporting postgres, because an "INSERT OR REPLACE" was removed from readyaml.py in the packageid branch. Conflicts: update_sharing.py webapp.py
Diffstat (limited to 'webapp.py')
-rwxr-xr-xwebapp.py61
1 files changed, 31 insertions, 30 deletions
diff --git a/webapp.py b/webapp.py
index 9883caf..2ed84bb 100755
--- a/webapp.py
+++ b/webapp.py
@@ -248,39 +248,40 @@ class Application(object):
def get_details(self, package):
with self.db.begin() as conn:
- row = conn.execute("SELECT version, architecture FROM package WHERE package = :package;",
- package=package).fetchone()
+ row = conn.execute("SELECT id, version, architecture FROM package WHERE name = :name;",
+ name=package).fetchone()
if not row:
raise NotFound()
- version, architecture = row
- row = conn.execute("SELECT count(filename), sum(size) FROM content WHERE package = :package;",
- package=package).fetchone()
+ pid, version, architecture = row
+ row = conn.execute("SELECT count(filename), sum(size) FROM content WHERE pid = :pid;",
+ pid=pid).fetchone()
num_files, total_size = row
if total_size is None:
total_size = 0
- return dict(package=package,
+ return dict(pid=pid,
+ package=package,
version=version,
architecture=architecture,
num_files=num_files,
total_size=total_size)
- def get_dependencies(self, package):
+ def get_dependencies(self, pid):
with self.db.begin() as conn:
- cur = conn.execute("SELECT required FROM dependency WHERE package = :package;",
- package=package)
+ cur = conn.execute("SELECT required FROM dependency WHERE pid = :pid;",
+ pid=pid)
return set(row[0] for row in fetchiter(cur))
- def cached_sharedstats(self, package):
+ def cached_sharedstats(self, pid):
sharedstats = {}
with self.db.begin() as conn:
- cur = conn.execute("SELECT package2, func1, func2, files, size FROM sharing WHERE package1 = :package;",
- package=package)
- for package2, func1, func2, files, size in fetchiter(cur):
+ cur = conn.execute("SELECT pid2, package.name, func1, func2, files, size FROM sharing JOIN package ON sharing.pid2 = package.id WHERE pid1 = :pid;",
+ pid=pid)
+ for pid2, package2, func1, func2, files, size in fetchiter(cur):
if (func1, func2) not in hash_functions:
continue
curstats = sharedstats.setdefault(
function_combination(func1, func2), list())
- if package2 == package:
+ if pid2 == pid:
package2 = None
curstats.append(dict(package=package2, duplicate=files,
savable=size))
@@ -288,27 +289,27 @@ class Application(object):
def show_package(self, package):
params = self.get_details(package)
- params["dependencies"] = self.get_dependencies(package)
- params["shared"] = self.cached_sharedstats(package)
+ params["dependencies"] = self.get_dependencies(params["pid"])
+ params["shared"] = self.cached_sharedstats(params["pid"])
params["urlroot"] = ".."
return html_response(package_template.render(params))
- def compute_comparison(self, package1, package2):
+ def compute_comparison(self, pid1, pid2):
"""Compute a sequence of comparison objects ordery by the size of the
object in the first package. Each element of the sequence is a dict
defining the following keys:
- * filenames: A set of filenames in package1 all referring to the
- same object.
+ * filenames: A set of filenames in package 1 (pid1) all referring to
+ the same object.
* size: Size of the object in bytes.
- * matches: A mapping from filenames in package2 to a mapping from
- hash function pairs to hash values.
+ * matches: A mapping from filenames in package 2 (pid2) to a mapping
+ from hash function pairs to hash values.
"""
with self.db.begin() as conn:
- cur = conn.execute("SELECT id, filename, size, hash FROM content JOIN hash ON content.id = hash.cid JOIN duplicate ON content.id = duplicate.cid WHERE package = :package AND function = 'sha512' ORDER BY size DESC;",
- package=package1)
+ cur = conn.execute("SELECT id, filename, size, hash FROM content JOIN hash ON content.id = hash.cid JOIN duplicate ON content.id = duplicate.cid WHERE pid = :pid AND function = 'sha512' ORDER BY size DESC;",
+ pid=pid1)
cursize = -1
files = dict()
- minmatch = 2 if package1 == package2 else 1
+ minmatch = 2 if pid1 == pid2 else 1
for cid, filename, size, hashvalue in fetchiter(cur):
if cursize != size:
for entry in files.values():
@@ -324,8 +325,8 @@ class Application(object):
entry = dict(filenames=set((filename,)), size=size, matches={})
files[hashvalue] = entry
- cur = conn.execute("SELECT ha.function, ha.hash, hb.function, filename FROM hash AS ha JOIN hash AS hb ON ha.hash = hb.hash JOIN content ON hb.cid = content.id WHERE ha.cid = :cid AND package = :package;",
- cid=cid, package=package2)
+ cur = conn.execute("SELECT ha.function, ha.hash, hb.function, filename FROM hash AS ha JOIN hash AS hb ON ha.hash = hb.hash JOIN content ON hb.cid = content.id WHERE ha.cid = :cid AND pid = :pid;",
+ cid=cid, pid=pid2)
for func1, hashvalue, func2, filename in fetchiter(cur):
entry["matches"].setdefault(filename, {})[func1, func2] = \
hashvalue
@@ -339,7 +340,7 @@ class Application(object):
if package1 != package2:
details2 = self.get_details(package2)
- shared = self.compute_comparison(package1, package2)
+ shared = self.compute_comparison(details1["pid"], details2["pid"])
params = dict(
details1=details1,
details2=details2,
@@ -349,7 +350,7 @@ class Application(object):
def show_hash(self, function, hashvalue):
with self.db.begin() as conn:
- cur = conn.execute("SELECT content.package, content.filename, content.size, hash.function FROM content JOIN hash ON content.id = hash.cid WHERE hash = :hashvalue;",
+ cur = conn.execute("SELECT package.name, content.filename, content.size, hash.function FROM content JOIN hash ON hash.cid = content.id JOIN package ON content.pid = package.id WHERE hash = :hashvalue;",
hashvalue=hashvalue)
entries = [dict(package=package, filename=filename, size=size,
function=otherfunc)
@@ -363,12 +364,12 @@ class Application(object):
def show_source(self, package):
with self.db.begin() as conn:
- cur = conn.execute("SELECT package FROM package WHERE source = :source;",
+ cur = conn.execute("SELECT name FROM package WHERE source = :source;",
source=package)
binpkgs = dict.fromkeys(pkg for pkg, in fetchiter(cur))
if not binpkgs:
raise NotFound
- cur = conn.execute("SELECT package.package, sharing.package2, sharing.func1, sharing.func2, sharing.files, sharing.size FROM package JOIN sharing ON package.package = sharing.package1 WHERE package.source = :source;",
+ cur = conn.execute("SELECT p1.name, p2.name, sharing.func1, sharing.func2, sharing.files, sharing.size FROM sharing JOIN package AS p1 ON sharing.pid1 = p1.id JOIN package AS p2 ON sharing.pid2 = p2.id WHERE p1.source = :source;",
source=package)
for binary, otherbin, func1, func2, files, size in fetchiter(cur):
entry = dict(package=otherbin,