diff options
Diffstat (limited to 'webapp.py')
-rwxr-xr-x | webapp.py | 187 |
1 files changed, 25 insertions, 162 deletions
@@ -1,7 +1,6 @@ #!/usr/bin/python import datetime -import os.path from wsgiref.simple_server import make_server import jinja2 @@ -15,12 +14,15 @@ from dedup.utils import fetchiter hash_functions = [ ("sha512", "sha512"), - ("image_sha512", "image_sha512"), + ("png_sha512", "png_sha512"), + ("png_sha512", "gif_sha512"), + ("gif_sha512", "png_sha512"), + ("gif_sha512", "gif_sha512"), ("gzip_sha512", "gzip_sha512"), ("sha512", "gzip_sha512"), ("gzip_sha512", "sha512")] -jinjaenv = jinja2.Environment(loader=jinja2.FileSystemLoader(".")) +jinjaenv = jinja2.Environment(loader=jinja2.PackageLoader("dedup", "templates")) def format_size(size): size = float(size) @@ -45,137 +47,11 @@ def function_combination(function1, function2): jinjaenv.filters["filesizeformat"] = format_size base_template = jinjaenv.get_template("base.html") - -package_template = jinjaenv.from_string( -"""{% extends "base.html" %} -{% block title %}duplication of {{ package|e }}{% endblock %} -{% block content %}<h1>{{ package|e }}</h1> -<p>Version: {{ version|e }}</p> -<p>Architecture: {{ architecture|e }}</p> -<p>Number of files: {{ num_files }}</p> -<p>Total size: {{ total_size|filesizeformat }}</p> -{%- if shared -%} - {%- for function, sharing in shared.items() -%} - <h3>sharing with respect to {{ function|e }}</h3> - <table border='1'><tr><th>package</th><th>files shared</th><th>data shared</th></tr> - {%- for entry in sharing|sort(attribute="savable", reverse=true) -%} - <tr><td{% if not entry.package or entry.package in dependencies %} class="dependency"{% endif %}> - {%- if entry.package %}<a href="{{ entry.package|e }}"><span class="binary-package">{{ entry.package|e }}</span></a>{% else %}self{% endif %} - <a href="../compare/{{ package|e }}/{{ entry.package|default(package, true)|e }}">compare</a></td> - <td>{{ entry.duplicate }} ({{ (100 * entry.duplicate / num_files)|int }}%)</td> - <td>{{ entry.savable|filesizeformat }} ({{ (100 * entry.savable / total_size)|int }}%)</td></tr> - {%- endfor -%} - </table> - {%- endfor -%} -<p>Note: Packages with yellow background are required to be installed when this package is installed.</p> -{%- endif -%} -{% endblock %}""") - -detail_template = jinjaenv.from_string( -"""{% extends "base.html" %} -{% block title %}sharing between {{ details1.package|e }} and {{ details2.package|e }}{% endblock%} -{% block content %} -<h1><a href="../../binary/{{ details1.package|e }}">{{ details1.package|e }}</a> <-> <a href="../../binary/{{ details2.package|e }}">{{ details2.package|e }}</a></h1> -<p>Version of {{ details1.package|e }}: {{ details1.version|e }}</p> -<p>Architecture of {{ details1.package|e }}: {{ details1.architecture|e }}</p> -{%- if details1.package != details2.package -%} -<p>Version of {{ details2.package|e }}: {{ details2.version|e }}</p> -<p>Architecture of {{ details2.package|e }}: {{ details2.architecture|e }}</p> -{%- endif -%} -<table border='1'><tr><th colspan="2">{{ details1.package|e }}</th><th colspan="2">{{ details2.package|e }}</th></tr> -<tr><th>size</th><th>filename</th><th>hash functions</th><th>filename</th></tr> -{%- for entry in shared -%} - <tr><td{% if entry.matches|length > 1 %} rowspan={{ entry.matches|length }}{% endif %}>{{ entry.size|filesizeformat }}</td><td{% if entry.matches|length > 1 %} rowspan={{ entry.matches|length }}{% endif %}> - {%- for filename in entry.filenames %}<span class="filename">{{ filename|e }}</span>{% endfor -%}</td><td> - {% for filename, match in entry.matches.items() -%} - {% if not loop.first %}<tr><td>{% endif -%} - {%- for funccomb, hashvalue in match.items() -%} - <a href="../../hash/{{ funccomb[0]|e }}/{{ hashvalue|e }}">{{ funccomb[0]|e }}</a> - {%- if funccomb[0] != funccomb[1] %} -> <a href="../../hash/{{ funccomb[1]|e }}/{{ hashvalue|e }}">{{ funccomb[1]|e }}</a>{% endif %} - {%- if not loop.last %}, {% endif %} - {%- endfor -%} - </td><td><span class="filename">{{ filename|e }}</span></td></tr> - {%- endfor -%} -{%- endfor -%} -</table> -{% endblock %}""") - -hash_template = jinjaenv.from_string( -"""{% extends "base.html" %} -{% block title %}information on {{ function|e }} hash {{ hashvalue|e }}{% endblock %} -{% block content %} -<h1>{{ function|e }} {{ hashvalue|e }}</h1> -<table border='1'><tr><th>package</th><th>filename</th><th>size</th><th>different function</th></tr> -{%- for entry in entries -%} - <tr><td><a href="../../binary/{{ entry.package|e }}"><span class="binary-package">{{ entry.package|e }}</span></a></td> - <td><span class="filename">{{ entry.filename|e }}</span></td><td>{{ entry.size|filesizeformat }}</td> - <td>{% if function != entry.function %}{{ entry.function|e }}{% endif %}</td></tr> -{%- endfor -%} -</table> -{% endblock %}""") - -index_template = jinjaenv.from_string( -"""{% extends "base.html" %} -{% block title %}Debian duplication detector{% endblock %} -{% block header %} - <script type="text/javascript"> - function getLinkTarget() { - var pkg = document.getElementById("pkg_name").value; - if(pkg) { - return "/binary/"+pkg; - } - return '#'; - } - function processData() { - var link = document.getElementById("perma_link"); - link.href = getLinkTarget(); - link.text = location.href + getLinkTarget(); - } - window.onload = function() { - document.getElementById('pkg_name').onkeyup = processData; - document.getElementById("pkg_form").onsubmit = function () { - location.href = getLinkTarget(); - return false; - } - processData(); - document.getElementById("form_div").style.display = ''; - } - </script> -{% endblock %} -{% block content %} -<h1>Debian duplication detector</h1> -<ul> -<li>To inspect a particlar binary package, go to <pre>binary/<packagename></pre> Example: <a href="binary/git">binary/git</a> - <div style="display:none" id="form_div"><fieldset> - <legend>Inspect package</legend> - <noscript><b>This form is disfunctional when javascript is not enabled</b></noscript> - Enter binary package to inspect - Note: Non-existing packages will result in <b>404</b>-Errors - <form id="pkg_form"> - <label for="pkg_name">Name: <input type="text" size="30" name="pkg_name" id="pkg_name"> - <input type="submit" value="Go"> Permanent Link: <a id="perma_link" href="#"></a> - </form> - </fieldset></div></li> -<li>To inspect a combination of binary packages go to <pre>compare/<firstpackage>/<secondpackage></pre> Example: <a href="compare/git/git">compare/git/git</a></li> -<li>To discover package shipping a particular file go to <pre>hash/sha512/<hashvalue></pre> Example: <a href="hash/sha512/7633623b66b5e686bb94dd96a7cdb5a7e5ee00e87004fab416a5610d59c62badaf512a2e26e34e2455b7ed6b76690d2cd47464836d7d85d78b51d50f7e933d5c">hash/sha512/7633623b66b5e686bb94dd96a7cdb5a7e5ee00e87004fab416a5610d59c62badaf512a2e26e34e2455b7ed6b76690d2cd47464836d7d85d78b51d50f7e933d5c</a></li> -</ul> -{% endblock %}""") - -source_template = jinjaenv.from_string( -"""{% extends "base.html" %} -{% block title %}overview of {{ source|e }}{% endblock %} -{% block content %} -<h1>overview of {{ source|e }}</h1> -<table border='1'><tr><th>binary from {{ source|e }}</th><th>savable</th><th>other package</th></tr> -{% for package, sharing in packages.items() %} - <tr><td><a href="../binary/{{ package|e }}"><span class="binary-package">{{ package|e }}</span></a></td><td> - {%- if sharing -%} - {{ sharing.savable|filesizeformat }}</td><td><a href="../binary/{{ sharing.package|e }}"><span class="binary-package">{{ sharing.package|e }}</span></a> <a href="../compare/{{ package|e }}/{{ sharing.package|e }}">compare</a> - {%- else -%}</td><td>{%- endif -%} - </td></tr> -{% endfor %} -</table> -<p>Note: Not all sharing listed here. Click on binary packages with non-zero savable to see more.</p> -{% endblock %}""") +package_template = jinjaenv.get_template("binary.html") +detail_template = jinjaenv.get_template("compare.html") +hash_template = jinjaenv.get_template("hash.html") +index_template = jinjaenv.get_template("index.html") +source_template = jinjaenv.get_template("source.html") def encode_and_buffer(iterator): buff = b"" @@ -193,27 +69,6 @@ def html_response(unicode_iterator, max_age=24 * 60 * 60): resp.expires = datetime.datetime.now() + datetime.timedelta(seconds=max_age) return resp -def generate_shared(rows): - """internal helper from show_detail""" - entry = None - for filename1, size1, func1, filename2, size2, func2, hashvalue in rows: - funccomb = (func1, func2) - if funccomb not in hash_functions: - continue - if entry and (entry["filename1"] != filename1 or - entry["filename2"] != filename2): - yield entry - entry = None - if entry: - funcdict = entry["functions"] - else: - funcdict = dict() - entry = dict(filename1=filename1, filename2=filename2, size1=size1, - size2=size2, functions=funcdict) - funcdict[funccomb] = hashvalue - if entry: - yield entry - class Application(object): def __init__(self, db): self.db = db @@ -235,6 +90,11 @@ class Application(object): elif endpoint == "detail": return self.show_detail(args["package1"], args["package2"]) elif endpoint == "hash": + if args["function"] == "image_sha512": + # backwards compatibility + raise RequestRedirect("%s/hash/png_sha512/%s" % + (request.environ["SCRIPT_NAME"], + args["hashvalue"])) return self.show_hash(args["function"], args["hashvalue"]) elif endpoint == "index": if not request.environ["PATH_INFO"]: @@ -274,7 +134,7 @@ class Application(object): def cached_sharedstats(self, pid): sharedstats = {} with self.db.begin() as conn: - cur = conn.execute(sqlalchemy.text("SELECT pid2, package.name, func1, func2, files, size FROM sharing JOIN package ON sharing.pid2 = package.id WHERE pid1 = :pid;"), + cur = conn.execute(sqlalchemy.text("SELECT pid2, package.name, f1.name, f2.name, files, size FROM sharing JOIN package ON sharing.pid2 = package.id JOIN function AS f1 ON sharing.fid1 = f1.id JOIN function AS f2 ON sharing.fid2 = f2.id WHERE pid1 = :pid;"), pid=pid) for pid2, package2, func1, func2, files, size in fetchiter(cur): if (func1, func2) not in hash_functions: @@ -292,6 +152,10 @@ class Application(object): params["dependencies"] = self.get_dependencies(params["pid"]) params["shared"] = self.cached_sharedstats(params["pid"]) params["urlroot"] = ".." + with self.db.begin() as conn: + cur = conn.execute(sqlalchemy.text("SELECT content.filename, issue.issue FROM content JOIN issue ON content.id = issue.cid WHERE content.pid = :pid;"), + pid=params["pid"]) + params["issues"] = dict(cur.fetchall()) return html_response(package_template.render(params)) def compute_comparison(self, pid1, pid2): @@ -305,7 +169,7 @@ class Application(object): from hash function pairs to hash values. """ with self.db.begin() as conn: - cur = conn.execute(sqlalchemy.text("SELECT id, filename, size, hash FROM content JOIN hash ON content.id = hash.cid JOIN duplicate ON content.id = duplicate.cid WHERE pid = :pid AND function = 'sha512' ORDER BY size DESC;"), + cur = conn.execute(sqlalchemy.text("SELECT content.id, content.filename, content.size, hash.hash FROM content JOIN hash ON content.id = hash.cid JOIN duplicate ON content.id = duplicate.cid JOIN function ON hash.fid = function.id WHERE pid = :pid AND function.name = 'sha512' ORDER BY size DESC;"), pid=pid1) cursize = -1 files = dict() @@ -325,7 +189,7 @@ class Application(object): entry = dict(filenames=set((filename,)), size=size, matches={}) files[hashvalue] = entry - cur = conn.execute(sqlalchemy.text("SELECT ha.function, ha.hash, hb.function, filename FROM hash AS ha JOIN hash AS hb ON ha.hash = hb.hash JOIN content ON hb.cid = content.id WHERE ha.cid = :cid AND pid = :pid;"), + cur = conn.execute(sqlalchemy.text("SELECT fa.name, ha.hash, fb.name, filename FROM hash AS ha JOIN hash AS hb ON ha.hash = hb.hash JOIN content ON hb.cid = content.id JOIN function AS fa ON ha.fid = fa.id JOIN function AS fb ON hb.fid = fb.id WHERE ha.cid = :cid AND pid = :pid;"), cid=cid, pid=pid2) for func1, hashvalue, func2, filename in fetchiter(cur): entry["matches"].setdefault(filename, {})[func1, func2] = \ @@ -350,7 +214,7 @@ class Application(object): def show_hash(self, function, hashvalue): with self.db.begin() as conn: - cur = conn.execute(sqlalchemy.text("SELECT package.name, content.filename, content.size, hash.function FROM content JOIN hash ON hash.cid = content.id JOIN package ON content.pid = package.id WHERE hash = :hashvalue;"), + cur = conn.execute(sqlalchemy.text("SELECT package.name, content.filename, content.size, function.name FROM hash JOIN content ON hash.cid = content.id JOIN package ON content.pid = package.id JOIN function ON hash.fid = function.id WHERE hash = :hashvalue;"), hashvalue=hashvalue) entries = [dict(package=package, filename=filename, size=size, function=otherfunc) @@ -369,7 +233,7 @@ class Application(object): binpkgs = dict.fromkeys(pkg for pkg, in fetchiter(cur)) if not binpkgs: raise NotFound - cur = conn.execute(sqlalchemy.text("SELECT p1.name, p2.name, sharing.func1, sharing.func2, sharing.files, sharing.size FROM sharing JOIN package AS p1 ON sharing.pid1 = p1.id JOIN package AS p2 ON sharing.pid2 = p2.id WHERE p1.source = :source;"), + cur = conn.execute(sqlalchemy.text("SELECT p1.name, p2.name, f1.name, f2.name, sharing.files, sharing.size FROM sharing JOIN package AS p1 ON sharing.pid1 = p1.id JOIN package AS p2 ON sharing.pid2 = p2.id JOIN function AS f1 ON sharing.fid1 = f1.id JOIN function AS f2 ON sharing.fid2 = f2.id WHERE p1.source = :source;"), source=package) for binary, otherbin, func1, func2, files, size in fetchiter(cur): entry = dict(package=otherbin, @@ -384,8 +248,7 @@ class Application(object): def main(): db = sqlalchemy.create_engine("sqlite:///test.sqlite3") app = Application(db) - staticdir = os.path.join(os.path.dirname(__file__), "static") - app = SharedDataMiddleware(app, {"/": staticdir}) + app = SharedDataMiddleware(app, {"/": ("dedup", "static")}) make_server("0.0.0.0", 8800, app).serve_forever() if __name__ == "__main__": |