summaryrefslogtreecommitdiff
path: root/webapp.py
diff options
context:
space:
mode:
Diffstat (limited to 'webapp.py')
-rwxr-xr-xwebapp.py187
1 files changed, 25 insertions, 162 deletions
diff --git a/webapp.py b/webapp.py
index ea25536..bbc45e1 100755
--- a/webapp.py
+++ b/webapp.py
@@ -1,7 +1,6 @@
#!/usr/bin/python
import datetime
-import os.path
from wsgiref.simple_server import make_server
import jinja2
@@ -15,12 +14,15 @@ from dedup.utils import fetchiter
hash_functions = [
("sha512", "sha512"),
- ("image_sha512", "image_sha512"),
+ ("png_sha512", "png_sha512"),
+ ("png_sha512", "gif_sha512"),
+ ("gif_sha512", "png_sha512"),
+ ("gif_sha512", "gif_sha512"),
("gzip_sha512", "gzip_sha512"),
("sha512", "gzip_sha512"),
("gzip_sha512", "sha512")]
-jinjaenv = jinja2.Environment(loader=jinja2.FileSystemLoader("."))
+jinjaenv = jinja2.Environment(loader=jinja2.PackageLoader("dedup", "templates"))
def format_size(size):
size = float(size)
@@ -45,137 +47,11 @@ def function_combination(function1, function2):
jinjaenv.filters["filesizeformat"] = format_size
base_template = jinjaenv.get_template("base.html")
-
-package_template = jinjaenv.from_string(
-"""{% extends "base.html" %}
-{% block title %}duplication of {{ package|e }}{% endblock %}
-{% block content %}<h1>{{ package|e }}</h1>
-<p>Version: {{ version|e }}</p>
-<p>Architecture: {{ architecture|e }}</p>
-<p>Number of files: {{ num_files }}</p>
-<p>Total size: {{ total_size|filesizeformat }}</p>
-{%- if shared -%}
- {%- for function, sharing in shared.items() -%}
- <h3>sharing with respect to {{ function|e }}</h3>
- <table border='1'><tr><th>package</th><th>files shared</th><th>data shared</th></tr>
- {%- for entry in sharing|sort(attribute="savable", reverse=true) -%}
- <tr><td{% if not entry.package or entry.package in dependencies %} class="dependency"{% endif %}>
- {%- if entry.package %}<a href="{{ entry.package|e }}"><span class="binary-package">{{ entry.package|e }}</span></a>{% else %}self{% endif %}
- <a href="../compare/{{ package|e }}/{{ entry.package|default(package, true)|e }}">compare</a></td>
- <td>{{ entry.duplicate }} ({{ (100 * entry.duplicate / num_files)|int }}%)</td>
- <td>{{ entry.savable|filesizeformat }} ({{ (100 * entry.savable / total_size)|int }}%)</td></tr>
- {%- endfor -%}
- </table>
- {%- endfor -%}
-<p>Note: Packages with yellow background are required to be installed when this package is installed.</p>
-{%- endif -%}
-{% endblock %}""")
-
-detail_template = jinjaenv.from_string(
-"""{% extends "base.html" %}
-{% block title %}sharing between {{ details1.package|e }} and {{ details2.package|e }}{% endblock%}
-{% block content %}
-<h1><a href="../../binary/{{ details1.package|e }}">{{ details1.package|e }}</a> &lt;-&gt; <a href="../../binary/{{ details2.package|e }}">{{ details2.package|e }}</a></h1>
-<p>Version of {{ details1.package|e }}: {{ details1.version|e }}</p>
-<p>Architecture of {{ details1.package|e }}: {{ details1.architecture|e }}</p>
-{%- if details1.package != details2.package -%}
-<p>Version of {{ details2.package|e }}: {{ details2.version|e }}</p>
-<p>Architecture of {{ details2.package|e }}: {{ details2.architecture|e }}</p>
-{%- endif -%}
-<table border='1'><tr><th colspan="2">{{ details1.package|e }}</th><th colspan="2">{{ details2.package|e }}</th></tr>
-<tr><th>size</th><th>filename</th><th>hash functions</th><th>filename</th></tr>
-{%- for entry in shared -%}
- <tr><td{% if entry.matches|length > 1 %} rowspan={{ entry.matches|length }}{% endif %}>{{ entry.size|filesizeformat }}</td><td{% if entry.matches|length > 1 %} rowspan={{ entry.matches|length }}{% endif %}>
- {%- for filename in entry.filenames %}<span class="filename">{{ filename|e }}</span>{% endfor -%}</td><td>
- {% for filename, match in entry.matches.items() -%}
- {% if not loop.first %}<tr><td>{% endif -%}
- {%- for funccomb, hashvalue in match.items() -%}
- <a href="../../hash/{{ funccomb[0]|e }}/{{ hashvalue|e }}">{{ funccomb[0]|e }}</a>
- {%- if funccomb[0] != funccomb[1] %} -&gt; <a href="../../hash/{{ funccomb[1]|e }}/{{ hashvalue|e }}">{{ funccomb[1]|e }}</a>{% endif %}
- {%- if not loop.last %}, {% endif %}
- {%- endfor -%}
- </td><td><span class="filename">{{ filename|e }}</span></td></tr>
- {%- endfor -%}
-{%- endfor -%}
-</table>
-{% endblock %}""")
-
-hash_template = jinjaenv.from_string(
-"""{% extends "base.html" %}
-{% block title %}information on {{ function|e }} hash {{ hashvalue|e }}{% endblock %}
-{% block content %}
-<h1>{{ function|e }} {{ hashvalue|e }}</h1>
-<table border='1'><tr><th>package</th><th>filename</th><th>size</th><th>different function</th></tr>
-{%- for entry in entries -%}
- <tr><td><a href="../../binary/{{ entry.package|e }}"><span class="binary-package">{{ entry.package|e }}</span></a></td>
- <td><span class="filename">{{ entry.filename|e }}</span></td><td>{{ entry.size|filesizeformat }}</td>
- <td>{% if function != entry.function %}{{ entry.function|e }}{% endif %}</td></tr>
-{%- endfor -%}
-</table>
-{% endblock %}""")
-
-index_template = jinjaenv.from_string(
-"""{% extends "base.html" %}
-{% block title %}Debian duplication detector{% endblock %}
-{% block header %}
- <script type="text/javascript">
- function getLinkTarget() {
- var pkg = document.getElementById("pkg_name").value;
- if(pkg) {
- return "/binary/"+pkg;
- }
- return '#';
- }
- function processData() {
- var link = document.getElementById("perma_link");
- link.href = getLinkTarget();
- link.text = location.href + getLinkTarget();
- }
- window.onload = function() {
- document.getElementById('pkg_name').onkeyup = processData;
- document.getElementById("pkg_form").onsubmit = function () {
- location.href = getLinkTarget();
- return false;
- }
- processData();
- document.getElementById("form_div").style.display = '';
- }
- </script>
-{% endblock %}
-{% block content %}
-<h1>Debian duplication detector</h1>
-<ul>
-<li>To inspect a particlar binary package, go to <pre>binary/&lt;packagename&gt;</pre> Example: <a href="binary/git">binary/git</a>
- <div style="display:none" id="form_div"><fieldset>
- <legend>Inspect package</legend>
- <noscript><b>This form is disfunctional when javascript is not enabled</b></noscript>
- Enter binary package to inspect - Note: Non-existing packages will result in <b>404</b>-Errors
- <form id="pkg_form">
- <label for="pkg_name">Name: <input type="text" size="30" name="pkg_name" id="pkg_name">
- <input type="submit" value="Go"> Permanent Link: <a id="perma_link" href="#"></a>
- </form>
- </fieldset></div></li>
-<li>To inspect a combination of binary packages go to <pre>compare/&lt;firstpackage&gt;/&lt;secondpackage&gt;</pre> Example: <a href="compare/git/git">compare/git/git</a></li>
-<li>To discover package shipping a particular file go to <pre>hash/sha512/&lt;hashvalue&gt;</pre> Example: <a href="hash/sha512/7633623b66b5e686bb94dd96a7cdb5a7e5ee00e87004fab416a5610d59c62badaf512a2e26e34e2455b7ed6b76690d2cd47464836d7d85d78b51d50f7e933d5c">hash/sha512/7633623b66b5e686bb94dd96a7cdb5a7e5ee00e87004fab416a5610d59c62badaf512a2e26e34e2455b7ed6b76690d2cd47464836d7d85d78b51d50f7e933d5c</a></li>
-</ul>
-{% endblock %}""")
-
-source_template = jinjaenv.from_string(
-"""{% extends "base.html" %}
-{% block title %}overview of {{ source|e }}{% endblock %}
-{% block content %}
-<h1>overview of {{ source|e }}</h1>
-<table border='1'><tr><th>binary from {{ source|e }}</th><th>savable</th><th>other package</th></tr>
-{% for package, sharing in packages.items() %}
- <tr><td><a href="../binary/{{ package|e }}"><span class="binary-package">{{ package|e }}</span></a></td><td>
- {%- if sharing -%}
- {{ sharing.savable|filesizeformat }}</td><td><a href="../binary/{{ sharing.package|e }}"><span class="binary-package">{{ sharing.package|e }}</span></a> <a href="../compare/{{ package|e }}/{{ sharing.package|e }}">compare</a>
- {%- else -%}</td><td>{%- endif -%}
- </td></tr>
-{% endfor %}
-</table>
-<p>Note: Not all sharing listed here. Click on binary packages with non-zero savable to see more.</p>
-{% endblock %}""")
+package_template = jinjaenv.get_template("binary.html")
+detail_template = jinjaenv.get_template("compare.html")
+hash_template = jinjaenv.get_template("hash.html")
+index_template = jinjaenv.get_template("index.html")
+source_template = jinjaenv.get_template("source.html")
def encode_and_buffer(iterator):
buff = b""
@@ -193,27 +69,6 @@ def html_response(unicode_iterator, max_age=24 * 60 * 60):
resp.expires = datetime.datetime.now() + datetime.timedelta(seconds=max_age)
return resp
-def generate_shared(rows):
- """internal helper from show_detail"""
- entry = None
- for filename1, size1, func1, filename2, size2, func2, hashvalue in rows:
- funccomb = (func1, func2)
- if funccomb not in hash_functions:
- continue
- if entry and (entry["filename1"] != filename1 or
- entry["filename2"] != filename2):
- yield entry
- entry = None
- if entry:
- funcdict = entry["functions"]
- else:
- funcdict = dict()
- entry = dict(filename1=filename1, filename2=filename2, size1=size1,
- size2=size2, functions=funcdict)
- funcdict[funccomb] = hashvalue
- if entry:
- yield entry
-
class Application(object):
def __init__(self, db):
self.db = db
@@ -235,6 +90,11 @@ class Application(object):
elif endpoint == "detail":
return self.show_detail(args["package1"], args["package2"])
elif endpoint == "hash":
+ if args["function"] == "image_sha512":
+ # backwards compatibility
+ raise RequestRedirect("%s/hash/png_sha512/%s" %
+ (request.environ["SCRIPT_NAME"],
+ args["hashvalue"]))
return self.show_hash(args["function"], args["hashvalue"])
elif endpoint == "index":
if not request.environ["PATH_INFO"]:
@@ -274,7 +134,7 @@ class Application(object):
def cached_sharedstats(self, pid):
sharedstats = {}
with self.db.begin() as conn:
- cur = conn.execute(sqlalchemy.text("SELECT pid2, package.name, func1, func2, files, size FROM sharing JOIN package ON sharing.pid2 = package.id WHERE pid1 = :pid;"),
+ cur = conn.execute(sqlalchemy.text("SELECT pid2, package.name, f1.name, f2.name, files, size FROM sharing JOIN package ON sharing.pid2 = package.id JOIN function AS f1 ON sharing.fid1 = f1.id JOIN function AS f2 ON sharing.fid2 = f2.id WHERE pid1 = :pid;"),
pid=pid)
for pid2, package2, func1, func2, files, size in fetchiter(cur):
if (func1, func2) not in hash_functions:
@@ -292,6 +152,10 @@ class Application(object):
params["dependencies"] = self.get_dependencies(params["pid"])
params["shared"] = self.cached_sharedstats(params["pid"])
params["urlroot"] = ".."
+ with self.db.begin() as conn:
+ cur = conn.execute(sqlalchemy.text("SELECT content.filename, issue.issue FROM content JOIN issue ON content.id = issue.cid WHERE content.pid = :pid;"),
+ pid=params["pid"])
+ params["issues"] = dict(cur.fetchall())
return html_response(package_template.render(params))
def compute_comparison(self, pid1, pid2):
@@ -305,7 +169,7 @@ class Application(object):
from hash function pairs to hash values.
"""
with self.db.begin() as conn:
- cur = conn.execute(sqlalchemy.text("SELECT id, filename, size, hash FROM content JOIN hash ON content.id = hash.cid JOIN duplicate ON content.id = duplicate.cid WHERE pid = :pid AND function = 'sha512' ORDER BY size DESC;"),
+ cur = conn.execute(sqlalchemy.text("SELECT content.id, content.filename, content.size, hash.hash FROM content JOIN hash ON content.id = hash.cid JOIN duplicate ON content.id = duplicate.cid JOIN function ON hash.fid = function.id WHERE pid = :pid AND function.name = 'sha512' ORDER BY size DESC;"),
pid=pid1)
cursize = -1
files = dict()
@@ -325,7 +189,7 @@ class Application(object):
entry = dict(filenames=set((filename,)), size=size, matches={})
files[hashvalue] = entry
- cur = conn.execute(sqlalchemy.text("SELECT ha.function, ha.hash, hb.function, filename FROM hash AS ha JOIN hash AS hb ON ha.hash = hb.hash JOIN content ON hb.cid = content.id WHERE ha.cid = :cid AND pid = :pid;"),
+ cur = conn.execute(sqlalchemy.text("SELECT fa.name, ha.hash, fb.name, filename FROM hash AS ha JOIN hash AS hb ON ha.hash = hb.hash JOIN content ON hb.cid = content.id JOIN function AS fa ON ha.fid = fa.id JOIN function AS fb ON hb.fid = fb.id WHERE ha.cid = :cid AND pid = :pid;"),
cid=cid, pid=pid2)
for func1, hashvalue, func2, filename in fetchiter(cur):
entry["matches"].setdefault(filename, {})[func1, func2] = \
@@ -350,7 +214,7 @@ class Application(object):
def show_hash(self, function, hashvalue):
with self.db.begin() as conn:
- cur = conn.execute(sqlalchemy.text("SELECT package.name, content.filename, content.size, hash.function FROM content JOIN hash ON hash.cid = content.id JOIN package ON content.pid = package.id WHERE hash = :hashvalue;"),
+ cur = conn.execute(sqlalchemy.text("SELECT package.name, content.filename, content.size, function.name FROM hash JOIN content ON hash.cid = content.id JOIN package ON content.pid = package.id JOIN function ON hash.fid = function.id WHERE hash = :hashvalue;"),
hashvalue=hashvalue)
entries = [dict(package=package, filename=filename, size=size,
function=otherfunc)
@@ -369,7 +233,7 @@ class Application(object):
binpkgs = dict.fromkeys(pkg for pkg, in fetchiter(cur))
if not binpkgs:
raise NotFound
- cur = conn.execute(sqlalchemy.text("SELECT p1.name, p2.name, sharing.func1, sharing.func2, sharing.files, sharing.size FROM sharing JOIN package AS p1 ON sharing.pid1 = p1.id JOIN package AS p2 ON sharing.pid2 = p2.id WHERE p1.source = :source;"),
+ cur = conn.execute(sqlalchemy.text("SELECT p1.name, p2.name, f1.name, f2.name, sharing.files, sharing.size FROM sharing JOIN package AS p1 ON sharing.pid1 = p1.id JOIN package AS p2 ON sharing.pid2 = p2.id JOIN function AS f1 ON sharing.fid1 = f1.id JOIN function AS f2 ON sharing.fid2 = f2.id WHERE p1.source = :source;"),
source=package)
for binary, otherbin, func1, func2, files, size in fetchiter(cur):
entry = dict(package=otherbin,
@@ -384,8 +248,7 @@ class Application(object):
def main():
db = sqlalchemy.create_engine("sqlite:///test.sqlite3")
app = Application(db)
- staticdir = os.path.join(os.path.dirname(__file__), "static")
- app = SharedDataMiddleware(app, {"/": staticdir})
+ app = SharedDataMiddleware(app, {"/": ("dedup", "static")})
make_server("0.0.0.0", 8800, app).serve_forever()
if __name__ == "__main__":