From 947358442ce048038a2e1a0bcd02399a9be38786 Mon Sep 17 00:00:00 2001 From: Helmut Grohne Date: Fri, 22 Feb 2013 14:12:33 +0100 Subject: webapp: support matching sha512 against gzip_sha512 This covers only the /binary page. The comparison may still be empty. --- webapp.py | 39 ++++++++++++++++++++++++--------------- 1 file changed, 24 insertions(+), 15 deletions(-) (limited to 'webapp.py') diff --git a/webapp.py b/webapp.py index df8cba7..10e526f 100755 --- a/webapp.py +++ b/webapp.py @@ -8,6 +8,12 @@ from werkzeug.exceptions import HTTPException, NotFound from werkzeug.routing import Map, Rule, RequestRedirect from werkzeug.wrappers import Request, Response +hash_functions = [ + ("sha512", "sha512"), + ("gzip_sha512", "gzip_sha512"), + ("sha512", "gzip_sha512"), + ("gzip_sha512", "sha512")] + jinjaenv = jinja2.Environment(loader=jinja2.FileSystemLoader(".")) def format_size(size): @@ -40,7 +46,7 @@ package_template = jinjaenv.from_string(

Total size: {{ total_size|format_size }}

{%- if shared -%} {%- for function, sharing in shared.items() -%} -

sharing with respect to {{ function }}

+

sharing with respect to {{ function|e }}

{%- for entry in sharing|sort(attribute="savable", reverse=true) -%} @@ -63,7 +69,7 @@ detail_template = jinjaenv.from_string(
packagefiles shareddata shared
{%- for entry in shared|sort(attribute="size", reverse=true) -%} + {%- for function, hashvalue in entry.functions.items() %}{{ function|e }} {% endfor %} {%- endfor -%}
sizefilename in {{ details1.package|e }}filename in {{ details2.package|e }}hash functions
{{ entry.size|format_size }}{{ entry.filename1 }}{{ entry.filename2 }} - {%- for function, hashvalue in entry.functions.items() %}{{ function|e }} {% endfor %}
{%- endif -%} @@ -150,27 +156,30 @@ class Application(object): params = self.get_details(package) params["dependencies"] = self.get_dependencies(package) - shared = dict() - self.cur.execute("SELECT a.filename, a.function, a.hash, a.size, b.package FROM content AS a JOIN content AS b ON a.function = b.function AND a.hash = b.hash WHERE a.package = ? AND (a.filename != b.filename OR b.package != ?);", - (package, package)) - for afile, function, hashval, size, bpkg in self.cur.fetchall(): - pkgdict = shared.setdefault(function, dict()) - hashdict = pkgdict.setdefault(bpkg, dict()) - fileset = hashdict.setdefault(hashval, (size, set()))[1] - fileset.add(afile) sharedstats = {} - if shared: - for function, sharing in shared.items(): - sharedstats[function] = list() + for func1, func2 in hash_functions: + self.cur.execute("SELECT a.filename, a.hash, a.size, b.package FROM content AS a JOIN content AS b ON a.hash = b.hash WHERE a.package = ? AND a.function = ? AND b.function = ? AND (a.filename != b.filename OR b.package != ?);", + (package, func1, func2, package)) + sharing = dict() + for afile, hashval, size, bpkg in self.cur.fetchall(): + hashdict = sharing.setdefault(bpkg, dict()) + fileset = hashdict.setdefault(hashval, (size, set()))[1] + fileset.add(afile) + if sharing: + curstats = list() + if func1 == func2: + sharedstats[func1] = curstats + else: + sharedstats["%s -> %s" % (func1, func2)] = curstats mapping = sharing.pop(package, dict()) if mapping: duplicate = sum(len(files) for _, files in mapping.values()) savable = sum(size * (len(files) - 1) for size, files in mapping.values()) - sharedstats[function].append(dict(package=None, duplicate=duplicate, savable=savable)) + curstats.append(dict(package=None, duplicate=duplicate, savable=savable)) for pkg, mapping in sharing.items(): duplicate = sum(len(files) for _, files in mapping.values()) savable = sum(size * len(files) for size, files in mapping.values()) - sharedstats[function].append(dict(package=pkg, duplicate=duplicate, savable=savable)) + curstats.append(dict(package=pkg, duplicate=duplicate, savable=savable)) params["shared"] = sharedstats return Response(package_template.render(**params).encode("utf8"), -- cgit v1.2.3 From 3c3e94bf5026d27d9307a825bae77dba968a1d9f Mon Sep 17 00:00:00 2001 From: Helmut Grohne Date: Mon, 25 Feb 2013 09:55:35 +0100 Subject: webapp: complete cross hash support --- webapp.py | 46 +++++++++++++++++++++++++++++----------------- 1 file changed, 29 insertions(+), 17 deletions(-) (limited to 'webapp.py') diff --git a/webapp.py b/webapp.py index 10e526f..06aa5d6 100755 --- a/webapp.py +++ b/webapp.py @@ -66,10 +66,13 @@ detail_template = jinjaenv.from_string( {% block content %}

{{ details1.package|e }} <-> {{ details2.package|e }}

{%- if shared -%} - - {%- for entry in shared|sort(attribute="size", reverse=true) -%} - +
sizefilename in {{ details1.package|e }}filename in {{ details2.package|e }}hash functions
{{ entry.size|format_size }}{{ entry.filename1 }}{{ entry.filename2 }} - {%- for function, hashvalue in entry.functions.items() %}{{ function|e }} {% endfor %}
+ + {%- for entry in shared|sort(attribute="size1", reverse=true) -%} + + {%- endfor -%}
{{ details1.package|e }}{{ details2.package|e }}
sizefilenamehash functionssizefilenamehash functions
{{ entry.size1|format_size }}{{ entry.filename1 }} + {%- for funccomb, hashvalue in entry.functions.items() %}{{ funccomb[0]|e }} {% endfor %}{{ entry.size2|format_size }}{{ entry.filename2 }} + {%- for funccomb, hashvalue in entry.functions.items() %}{{ funccomb[1]|e }} {% endfor %}
{%- endif -%} @@ -80,10 +83,11 @@ hash_template = jinjaenv.from_string( {% block title %}information on {{ function|e }} hash {{ hashvalue|e }}{% endblock %} {% block content %}

{{ function|e }} {{ hashvalue|e }}

- +
packagefilenamesize
{%- for entry in entries -%} - + + {%- endfor -%}
packagefilenamesizedifferent function
{{ entry.package|e }}{{ entry.filename|e }}{{ entry.size|format_size }}
{{ entry.filename|e }}{{ entry.size|format_size }}{% if function != entry.function %}{{ entry.function|e }}{% endif %}
{% endblock %}""") @@ -189,21 +193,27 @@ class Application(object): if package1 == package2: details1 = details2 = self.get_details(package1) - self.cur.execute("SELECT a.filename, b.filename, a.size, a.function, a.hash FROM content AS a JOIN content AS b ON a.function = b.function AND a.hash = b.hash WHERE a.package = ? AND b.package = ? AND a.filename != b.filename;", + self.cur.execute("SELECT a.filename, a.size, a.function, b.filename, b.size, b.function, a.hash FROM content AS a JOIN content AS b ON a.hash = b.hash WHERE a.package = ? AND b.package = ? AND a.filename != b.filename;", (package1, package1)) else: details1 = self.get_details(package1) details2 = self.get_details(package2) - self.cur.execute("SELECT a.filename, b.filename, a.size, a.function, a.hash FROM content AS a JOIN content AS b ON a.function = b.function AND a.hash = b.hash WHERE a.package = ? AND b.package = ?;", + self.cur.execute("SELECT a.filename, a.size, a.function, b.filename, b.size, b.function, a.hash FROM content AS a JOIN content AS b ON a.hash = b.hash WHERE a.package = ? AND b.package = ?;", (package1, package2)) shared = dict() - for filename1, filename2, size, function, hashvalue in self.cur.fetchall(): - shared.setdefault((filename1, filename2, size), dict())[function] = hashvalue - shared = [dict(filename1=filename1, filename2=filename2, size=size, - functions=functions) - for (filename1, filename2, size), functions in shared.items()] + for filename1, size1, func1, filename2, size2, func2, hashvalue in self.cur.fetchall(): + funccomb = (func1, func2) + if funccomb not in hash_functions: + continue + funcdict = shared.setdefault((filename1, filename2), + (size1, size2, dict()))[2] + funcdict[(func1, func2)] = hashvalue + shared = [dict(filename1=filename1, filename2=filename2, size1=size1, + size2=size2, functions=functions) + for (filename1, filename2), (size1, size2, functions) + in shared.items()] params = dict( details1=details1, details2=details2, @@ -212,10 +222,12 @@ class Application(object): content_type="text/html") def show_hash(self, function, hashvalue): - self.cur.execute("SELECT package, filename, size FROM content WHERE function = ? AND hash = ?;", - (function, hashvalue)) - entries = [dict(package=package, filename=filename, size=size) - for package, filename, size in self.cur.fetchall()] + self.cur.execute("SELECT package, filename, size, function FROM content WHERE hash = ?;", + (hashvalue,)) + entries = [dict(package=package, filename=filename, size=size, + function=otherfunc) + for package, filename, size, otherfunc in self.cur.fetchall() + if (function, otherfunc) in hash_functions] if not entries: raise NotFound() params = dict(function=function, hashvalue=hashvalue, entries=entries) -- cgit v1.2.3