summaryrefslogtreecommitdiff
path: root/webapp.py
diff options
context:
space:
mode:
authorHelmut Grohne <helmut@subdivi.de>2013-08-02 08:40:49 +0200
committerHelmut Grohne <helmut@subdivi.de>2013-08-02 08:40:49 +0200
commitcb3708825bf7ea32314040575cef35980dad0cd8 (patch)
tree31575a8525dc90ba6904268d94f47e1604bf0557 /webapp.py
parenta4bbbb6e664e605634cb3f9e0564c7e4a93697be (diff)
parent2712edb550968ce7ec8cd9800241d7944666631a (diff)
downloaddebian-dedup-cb3708825bf7ea32314040575cef35980dad0cd8.tar.gz
Merge branch master into sqlalchemy
This makes the sqlalchemy branch schema-compatible with master again. The biggest change on master was the introduction of the function table. It caused most of the conflicts. Note that webapp had one conflict not detected by git: The selecting of issues in show_package needed sqlalchemy conversion. Conflicts: README update_sharing.py webapp.py
Diffstat (limited to 'webapp.py')
-rwxr-xr-xwebapp.py187
1 files changed, 25 insertions, 162 deletions
diff --git a/webapp.py b/webapp.py
index ea25536..bbc45e1 100755
--- a/webapp.py
+++ b/webapp.py
@@ -1,7 +1,6 @@
#!/usr/bin/python
import datetime
-import os.path
from wsgiref.simple_server import make_server
import jinja2
@@ -15,12 +14,15 @@ from dedup.utils import fetchiter
hash_functions = [
("sha512", "sha512"),
- ("image_sha512", "image_sha512"),
+ ("png_sha512", "png_sha512"),
+ ("png_sha512", "gif_sha512"),
+ ("gif_sha512", "png_sha512"),
+ ("gif_sha512", "gif_sha512"),
("gzip_sha512", "gzip_sha512"),
("sha512", "gzip_sha512"),
("gzip_sha512", "sha512")]
-jinjaenv = jinja2.Environment(loader=jinja2.FileSystemLoader("."))
+jinjaenv = jinja2.Environment(loader=jinja2.PackageLoader("dedup", "templates"))
def format_size(size):
size = float(size)
@@ -45,137 +47,11 @@ def function_combination(function1, function2):
jinjaenv.filters["filesizeformat"] = format_size
base_template = jinjaenv.get_template("base.html")
-
-package_template = jinjaenv.from_string(
-"""{% extends "base.html" %}
-{% block title %}duplication of {{ package|e }}{% endblock %}
-{% block content %}<h1>{{ package|e }}</h1>
-<p>Version: {{ version|e }}</p>
-<p>Architecture: {{ architecture|e }}</p>
-<p>Number of files: {{ num_files }}</p>
-<p>Total size: {{ total_size|filesizeformat }}</p>
-{%- if shared -%}
- {%- for function, sharing in shared.items() -%}
- <h3>sharing with respect to {{ function|e }}</h3>
- <table border='1'><tr><th>package</th><th>files shared</th><th>data shared</th></tr>
- {%- for entry in sharing|sort(attribute="savable", reverse=true) -%}
- <tr><td{% if not entry.package or entry.package in dependencies %} class="dependency"{% endif %}>
- {%- if entry.package %}<a href="{{ entry.package|e }}"><span class="binary-package">{{ entry.package|e }}</span></a>{% else %}self{% endif %}
- <a href="../compare/{{ package|e }}/{{ entry.package|default(package, true)|e }}">compare</a></td>
- <td>{{ entry.duplicate }} ({{ (100 * entry.duplicate / num_files)|int }}%)</td>
- <td>{{ entry.savable|filesizeformat }} ({{ (100 * entry.savable / total_size)|int }}%)</td></tr>
- {%- endfor -%}
- </table>
- {%- endfor -%}
-<p>Note: Packages with yellow background are required to be installed when this package is installed.</p>
-{%- endif -%}
-{% endblock %}""")
-
-detail_template = jinjaenv.from_string(
-"""{% extends "base.html" %}
-{% block title %}sharing between {{ details1.package|e }} and {{ details2.package|e }}{% endblock%}
-{% block content %}
-<h1><a href="../../binary/{{ details1.package|e }}">{{ details1.package|e }}</a> &lt;-&gt; <a href="../../binary/{{ details2.package|e }}">{{ details2.package|e }}</a></h1>
-<p>Version of {{ details1.package|e }}: {{ details1.version|e }}</p>
-<p>Architecture of {{ details1.package|e }}: {{ details1.architecture|e }}</p>
-{%- if details1.package != details2.package -%}
-<p>Version of {{ details2.package|e }}: {{ details2.version|e }}</p>
-<p>Architecture of {{ details2.package|e }}: {{ details2.architecture|e }}</p>
-{%- endif -%}
-<table border='1'><tr><th colspan="2">{{ details1.package|e }}</th><th colspan="2">{{ details2.package|e }}</th></tr>
-<tr><th>size</th><th>filename</th><th>hash functions</th><th>filename</th></tr>
-{%- for entry in shared -%}
- <tr><td{% if entry.matches|length > 1 %} rowspan={{ entry.matches|length }}{% endif %}>{{ entry.size|filesizeformat }}</td><td{% if entry.matches|length > 1 %} rowspan={{ entry.matches|length }}{% endif %}>
- {%- for filename in entry.filenames %}<span class="filename">{{ filename|e }}</span>{% endfor -%}</td><td>
- {% for filename, match in entry.matches.items() -%}
- {% if not loop.first %}<tr><td>{% endif -%}
- {%- for funccomb, hashvalue in match.items() -%}
- <a href="../../hash/{{ funccomb[0]|e }}/{{ hashvalue|e }}">{{ funccomb[0]|e }}</a>
- {%- if funccomb[0] != funccomb[1] %} -&gt; <a href="../../hash/{{ funccomb[1]|e }}/{{ hashvalue|e }}">{{ funccomb[1]|e }}</a>{% endif %}
- {%- if not loop.last %}, {% endif %}
- {%- endfor -%}
- </td><td><span class="filename">{{ filename|e }}</span></td></tr>
- {%- endfor -%}
-{%- endfor -%}
-</table>
-{% endblock %}""")
-
-hash_template = jinjaenv.from_string(
-"""{% extends "base.html" %}
-{% block title %}information on {{ function|e }} hash {{ hashvalue|e }}{% endblock %}
-{% block content %}
-<h1>{{ function|e }} {{ hashvalue|e }}</h1>
-<table border='1'><tr><th>package</th><th>filename</th><th>size</th><th>different function</th></tr>
-{%- for entry in entries -%}
- <tr><td><a href="../../binary/{{ entry.package|e }}"><span class="binary-package">{{ entry.package|e }}</span></a></td>
- <td><span class="filename">{{ entry.filename|e }}</span></td><td>{{ entry.size|filesizeformat }}</td>
- <td>{% if function != entry.function %}{{ entry.function|e }}{% endif %}</td></tr>
-{%- endfor -%}
-</table>
-{% endblock %}""")
-
-index_template = jinjaenv.from_string(
-"""{% extends "base.html" %}
-{% block title %}Debian duplication detector{% endblock %}
-{% block header %}
- <script type="text/javascript">
- function getLinkTarget() {
- var pkg = document.getElementById("pkg_name").value;
- if(pkg) {
- return "/binary/"+pkg;
- }
- return '#';
- }
- function processData() {
- var link = document.getElementById("perma_link");
- link.href = getLinkTarget();
- link.text = location.href + getLinkTarget();
- }
- window.onload = function() {
- document.getElementById('pkg_name').onkeyup = processData;
- document.getElementById("pkg_form").onsubmit = function () {
- location.href = getLinkTarget();
- return false;
- }
- processData();
- document.getElementById("form_div").style.display = '';
- }
- </script>
-{% endblock %}
-{% block content %}
-<h1>Debian duplication detector</h1>
-<ul>
-<li>To inspect a particlar binary package, go to <pre>binary/&lt;packagename&gt;</pre> Example: <a href="binary/git">binary/git</a>
- <div style="display:none" id="form_div"><fieldset>
- <legend>Inspect package</legend>
- <noscript><b>This form is disfunctional when javascript is not enabled</b></noscript>
- Enter binary package to inspect - Note: Non-existing packages will result in <b>404</b>-Errors
- <form id="pkg_form">
- <label for="pkg_name">Name: <input type="text" size="30" name="pkg_name" id="pkg_name">
- <input type="submit" value="Go"> Permanent Link: <a id="perma_link" href="#"></a>
- </form>
- </fieldset></div></li>
-<li>To inspect a combination of binary packages go to <pre>compare/&lt;firstpackage&gt;/&lt;secondpackage&gt;</pre> Example: <a href="compare/git/git">compare/git/git</a></li>
-<li>To discover package shipping a particular file go to <pre>hash/sha512/&lt;hashvalue&gt;</pre> Example: <a href="hash/sha512/7633623b66b5e686bb94dd96a7cdb5a7e5ee00e87004fab416a5610d59c62badaf512a2e26e34e2455b7ed6b76690d2cd47464836d7d85d78b51d50f7e933d5c">hash/sha512/7633623b66b5e686bb94dd96a7cdb5a7e5ee00e87004fab416a5610d59c62badaf512a2e26e34e2455b7ed6b76690d2cd47464836d7d85d78b51d50f7e933d5c</a></li>
-</ul>
-{% endblock %}""")
-
-source_template = jinjaenv.from_string(
-"""{% extends "base.html" %}
-{% block title %}overview of {{ source|e }}{% endblock %}
-{% block content %}
-<h1>overview of {{ source|e }}</h1>
-<table border='1'><tr><th>binary from {{ source|e }}</th><th>savable</th><th>other package</th></tr>
-{% for package, sharing in packages.items() %}
- <tr><td><a href="../binary/{{ package|e }}"><span class="binary-package">{{ package|e }}</span></a></td><td>
- {%- if sharing -%}
- {{ sharing.savable|filesizeformat }}</td><td><a href="../binary/{{ sharing.package|e }}"><span class="binary-package">{{ sharing.package|e }}</span></a> <a href="../compare/{{ package|e }}/{{ sharing.package|e }}">compare</a>
- {%- else -%}</td><td>{%- endif -%}
- </td></tr>
-{% endfor %}
-</table>
-<p>Note: Not all sharing listed here. Click on binary packages with non-zero savable to see more.</p>
-{% endblock %}""")
+package_template = jinjaenv.get_template("binary.html")
+detail_template = jinjaenv.get_template("compare.html")
+hash_template = jinjaenv.get_template("hash.html")
+index_template = jinjaenv.get_template("index.html")
+source_template = jinjaenv.get_template("source.html")
def encode_and_buffer(iterator):
buff = b""
@@ -193,27 +69,6 @@ def html_response(unicode_iterator, max_age=24 * 60 * 60):
resp.expires = datetime.datetime.now() + datetime.timedelta(seconds=max_age)
return resp
-def generate_shared(rows):
- """internal helper from show_detail"""
- entry = None
- for filename1, size1, func1, filename2, size2, func2, hashvalue in rows:
- funccomb = (func1, func2)
- if funccomb not in hash_functions:
- continue
- if entry and (entry["filename1"] != filename1 or
- entry["filename2"] != filename2):
- yield entry
- entry = None
- if entry:
- funcdict = entry["functions"]
- else:
- funcdict = dict()
- entry = dict(filename1=filename1, filename2=filename2, size1=size1,
- size2=size2, functions=funcdict)
- funcdict[funccomb] = hashvalue
- if entry:
- yield entry
-
class Application(object):
def __init__(self, db):
self.db = db
@@ -235,6 +90,11 @@ class Application(object):
elif endpoint == "detail":
return self.show_detail(args["package1"], args["package2"])
elif endpoint == "hash":
+ if args["function"] == "image_sha512":
+ # backwards compatibility
+ raise RequestRedirect("%s/hash/png_sha512/%s" %
+ (request.environ["SCRIPT_NAME"],
+ args["hashvalue"]))
return self.show_hash(args["function"], args["hashvalue"])
elif endpoint == "index":
if not request.environ["PATH_INFO"]:
@@ -274,7 +134,7 @@ class Application(object):
def cached_sharedstats(self, pid):
sharedstats = {}
with self.db.begin() as conn:
- cur = conn.execute(sqlalchemy.text("SELECT pid2, package.name, func1, func2, files, size FROM sharing JOIN package ON sharing.pid2 = package.id WHERE pid1 = :pid;"),
+ cur = conn.execute(sqlalchemy.text("SELECT pid2, package.name, f1.name, f2.name, files, size FROM sharing JOIN package ON sharing.pid2 = package.id JOIN function AS f1 ON sharing.fid1 = f1.id JOIN function AS f2 ON sharing.fid2 = f2.id WHERE pid1 = :pid;"),
pid=pid)
for pid2, package2, func1, func2, files, size in fetchiter(cur):
if (func1, func2) not in hash_functions:
@@ -292,6 +152,10 @@ class Application(object):
params["dependencies"] = self.get_dependencies(params["pid"])
params["shared"] = self.cached_sharedstats(params["pid"])
params["urlroot"] = ".."
+ with self.db.begin() as conn:
+ cur = conn.execute(sqlalchemy.text("SELECT content.filename, issue.issue FROM content JOIN issue ON content.id = issue.cid WHERE content.pid = :pid;"),
+ pid=params["pid"])
+ params["issues"] = dict(cur.fetchall())
return html_response(package_template.render(params))
def compute_comparison(self, pid1, pid2):
@@ -305,7 +169,7 @@ class Application(object):
from hash function pairs to hash values.
"""
with self.db.begin() as conn:
- cur = conn.execute(sqlalchemy.text("SELECT id, filename, size, hash FROM content JOIN hash ON content.id = hash.cid JOIN duplicate ON content.id = duplicate.cid WHERE pid = :pid AND function = 'sha512' ORDER BY size DESC;"),
+ cur = conn.execute(sqlalchemy.text("SELECT content.id, content.filename, content.size, hash.hash FROM content JOIN hash ON content.id = hash.cid JOIN duplicate ON content.id = duplicate.cid JOIN function ON hash.fid = function.id WHERE pid = :pid AND function.name = 'sha512' ORDER BY size DESC;"),
pid=pid1)
cursize = -1
files = dict()
@@ -325,7 +189,7 @@ class Application(object):
entry = dict(filenames=set((filename,)), size=size, matches={})
files[hashvalue] = entry
- cur = conn.execute(sqlalchemy.text("SELECT ha.function, ha.hash, hb.function, filename FROM hash AS ha JOIN hash AS hb ON ha.hash = hb.hash JOIN content ON hb.cid = content.id WHERE ha.cid = :cid AND pid = :pid;"),
+ cur = conn.execute(sqlalchemy.text("SELECT fa.name, ha.hash, fb.name, filename FROM hash AS ha JOIN hash AS hb ON ha.hash = hb.hash JOIN content ON hb.cid = content.id JOIN function AS fa ON ha.fid = fa.id JOIN function AS fb ON hb.fid = fb.id WHERE ha.cid = :cid AND pid = :pid;"),
cid=cid, pid=pid2)
for func1, hashvalue, func2, filename in fetchiter(cur):
entry["matches"].setdefault(filename, {})[func1, func2] = \
@@ -350,7 +214,7 @@ class Application(object):
def show_hash(self, function, hashvalue):
with self.db.begin() as conn:
- cur = conn.execute(sqlalchemy.text("SELECT package.name, content.filename, content.size, hash.function FROM content JOIN hash ON hash.cid = content.id JOIN package ON content.pid = package.id WHERE hash = :hashvalue;"),
+ cur = conn.execute(sqlalchemy.text("SELECT package.name, content.filename, content.size, function.name FROM hash JOIN content ON hash.cid = content.id JOIN package ON content.pid = package.id JOIN function ON hash.fid = function.id WHERE hash = :hashvalue;"),
hashvalue=hashvalue)
entries = [dict(package=package, filename=filename, size=size,
function=otherfunc)
@@ -369,7 +233,7 @@ class Application(object):
binpkgs = dict.fromkeys(pkg for pkg, in fetchiter(cur))
if not binpkgs:
raise NotFound
- cur = conn.execute(sqlalchemy.text("SELECT p1.name, p2.name, sharing.func1, sharing.func2, sharing.files, sharing.size FROM sharing JOIN package AS p1 ON sharing.pid1 = p1.id JOIN package AS p2 ON sharing.pid2 = p2.id WHERE p1.source = :source;"),
+ cur = conn.execute(sqlalchemy.text("SELECT p1.name, p2.name, f1.name, f2.name, sharing.files, sharing.size FROM sharing JOIN package AS p1 ON sharing.pid1 = p1.id JOIN package AS p2 ON sharing.pid2 = p2.id JOIN function AS f1 ON sharing.fid1 = f1.id JOIN function AS f2 ON sharing.fid2 = f2.id WHERE p1.source = :source;"),
source=package)
for binary, otherbin, func1, func2, files, size in fetchiter(cur):
entry = dict(package=otherbin,
@@ -384,8 +248,7 @@ class Application(object):
def main():
db = sqlalchemy.create_engine("sqlite:///test.sqlite3")
app = Application(db)
- staticdir = os.path.join(os.path.dirname(__file__), "static")
- app = SharedDataMiddleware(app, {"/": staticdir})
+ app = SharedDataMiddleware(app, {"/": ("dedup", "static")})
make_server("0.0.0.0", 8800, app).serve_forever()
if __name__ == "__main__":