diff options
author | Helmut Grohne <helmut@subdivi.de> | 2021-12-31 15:45:33 +0100 |
---|---|---|
committer | Helmut Grohne <helmut@subdivi.de> | 2021-12-31 15:45:33 +0100 |
commit | f3ea68482e6c01053cb202573d953e8a2e89529f (patch) | |
tree | 4c08f6e5a99bbe5131c0949e7f97cc44cf4a2cbd /webapp.py | |
parent | f2eda3ba74e5bc5613e84381ebd8bfd343e1c8cc (diff) | |
parent | 5b359b10053cbade539246eec26e86b44793ca40 (diff) | |
download | debian-dedup-f3ea68482e6c01053cb202573d953e8a2e89529f.tar.gz |
Merge branch master into branch multiarchhints
Among other things, this drops Python 2.x support.
Diffstat (limited to 'webapp.py')
-rwxr-xr-x | webapp.py | 125 |
1 files changed, 65 insertions, 60 deletions
@@ -1,8 +1,9 @@ -#!/usr/bin/python +#!/usr/bin/python3 import argparse import contextlib import datetime +import io import sqlite3 from wsgiref.simple_server import make_server @@ -11,25 +12,28 @@ from werkzeug.exceptions import HTTPException, NotFound from werkzeug.routing import Map, Rule from werkzeug.utils import redirect from werkzeug.wrappers import Request, Response -from werkzeug.wsgi import SharedDataMiddleware +try: + from werkzeug.middleware.shared_data import SharedDataMiddleware +except ImportError: + from werkzeug.wsgi import SharedDataMiddleware from dedup.utils import fetchiter jinjaenv = jinja2.Environment(loader=jinja2.PackageLoader("dedup", "templates")) def format_size(size): - size = float(size) + sizef = float(size) fmt = "%d B" - if size >= 1024: - size /= 1024 + if sizef >= 1024: + sizef /= 1024 fmt = "%.1f KB" - if size >= 1024: - size /= 1024 + if sizef >= 1024: + sizef /= 1024 fmt = "%.1f MB" - if size >= 1024: - size /= 1024 + if sizef >= 1024: + sizef /= 1024 fmt = "%.1f GB" - return fmt % size + return fmt % sizef def function_combination(function1, function2): if function1 == function2: @@ -46,15 +50,16 @@ hash_template = jinjaenv.get_template("hash.html") index_template = jinjaenv.get_template("index.html") source_template = jinjaenv.get_template("source.html") -def encode_and_buffer(iterator): - buff = b"" - for elem in iterator: - buff += elem.encode("utf8") - if len(buff) >= 2048: - yield buff - buff = b"" - if buff: - yield buff +def encode_and_buffer(stream): + stream.enable_buffering(16) + buff = io.BytesIO() + for elem in stream: + buff.write(elem.encode("utf8")) + if buff.tell() >= 2048: + yield buff.getvalue() + buff = io.BytesIO() + if buff.tell() > 0: + yield buff.getvalue() def html_response(unicode_iterator, max_age=24 * 60 * 60): resp = Response(encode_and_buffer(unicode_iterator), mimetype="text/html") @@ -68,7 +73,7 @@ class InternalRedirect(Exception): self.target = target self.code = code -class Application(object): +class Application: def __init__(self, db): self.db = db self.routingmap = Map([ @@ -79,6 +84,9 @@ class Application(object): Rule("/source/<package>", methods=("GET",), endpoint="source"), ]) + def cursor(self): + return contextlib.closing(self.db.cursor()) + @Request.application def __call__(self, request): mapadapter = self.routingmap.bind_to_environ(request.environ) @@ -97,7 +105,7 @@ class Application(object): elif endpoint == "index": if not request.environ["PATH_INFO"]: raise InternalRedirect("/") - return html_response(index_template.render(dict(urlroot=""))) + return html_response(index_template.stream(dict(urlroot=""))) elif endpoint == "source": return self.show_source(args["package"]) raise NotFound() @@ -107,7 +115,7 @@ class Application(object): return e def get_details(self, package): - with contextlib.closing(self.db.cursor()) as cur: + with self.cursor() as cur: cur.execute("SELECT id, version, architecture FROM package WHERE name = ?;", (package,)) row = cur.fetchone() @@ -127,14 +135,14 @@ class Application(object): return details def get_dependencies(self, pid): - with contextlib.closing(self.db.cursor()) as cur: + with self.cursor() as cur: cur.execute("SELECT required FROM dependency WHERE pid = ?;", (pid,)) return set(row[0] for row in fetchiter(cur)) def cached_sharedstats(self, pid): sharedstats = {} - with contextlib.closing(self.db.cursor()) as cur: + with self.cursor() as cur: cur.execute("SELECT pid2, package.name, f1.name, f2.name, files, size FROM sharing JOIN package ON sharing.pid2 = package.id JOIN function AS f1 ON sharing.fid1 = f1.id JOIN function AS f2 ON sharing.fid2 = f2.id WHERE pid1 = ? AND f1.eqclass = f2.eqclass;", (pid,)) for pid2, package2, func1, func2, files, size in fetchiter(cur): @@ -151,12 +159,11 @@ class Application(object): params["dependencies"] = self.get_dependencies(params["pid"]) params["shared"] = self.cached_sharedstats(params["pid"]) params["urlroot"] = ".." - cur = self.db.cursor() - cur.execute("SELECT content.filename, issue.issue FROM content JOIN issue ON content.id = issue.cid WHERE content.pid = ?;", - (params["pid"],)) - params["issues"] = dict(cur.fetchall()) - cur.close() - return html_response(package_template.render(params)) + with self.cursor() as cur: + cur.execute("SELECT content.filename, issue.issue FROM content JOIN issue ON content.id = issue.cid WHERE content.pid = ?;", + (params["pid"],)) + params["issues"] = dict(cur.fetchall()) + return html_response(package_template.stream(params)) def compute_comparison(self, pid1, pid2): """Compute a sequence of comparison objects ordered by the size of the @@ -168,35 +175,33 @@ class Application(object): * matches: A mapping from filenames in package 2 (pid2) to a mapping from hash function pairs to hash values. """ - cur = self.db.cursor() - cur.execute("SELECT content.id, content.filename, content.size, hash.hash FROM content JOIN hash ON content.id = hash.cid JOIN duplicate ON content.id = duplicate.cid JOIN function ON hash.fid = function.id WHERE pid = ? AND function.name = 'sha512' ORDER BY size DESC;", - (pid1,)) - cursize = -1 - files = dict() - minmatch = 2 if pid1 == pid2 else 1 - cur2 = self.db.cursor() - for cid, filename, size, hashvalue in fetchiter(cur): - if cursize != size: - for entry in files.values(): - if len(entry["matches"]) >= minmatch: - yield entry - files.clear() - cursize = size + with self.cursor() as cur, self.cursor() as cur2: + cur.execute("SELECT content.id, content.filename, content.size, hash.hash FROM content JOIN hash ON content.id = hash.cid JOIN duplicate ON content.id = duplicate.cid JOIN function ON hash.fid = function.id WHERE pid = ? AND function.name = 'sha512' ORDER BY size DESC;", + (pid1,)) + cursize = -1 + files = dict() + minmatch = 2 if pid1 == pid2 else 1 + cur2 = self.db.cursor() + for cid, filename, size, hashvalue in fetchiter(cur): + if cursize != size: + for entry in files.values(): + if len(entry["matches"]) >= minmatch: + yield entry + files.clear() + cursize = size - if hashvalue in files: - files[hashvalue]["filenames"].add(filename) - continue + if hashvalue in files: + files[hashvalue]["filenames"].add(filename) + continue - entry = dict(filenames=set((filename,)), size=size, matches={}) - files[hashvalue] = entry + entry = dict(filenames=set((filename,)), size=size, matches={}) + files[hashvalue] = entry - cur2.execute("SELECT fa.name, ha.hash, fb.name, filename FROM hash AS ha JOIN hash AS hb ON ha.hash = hb.hash JOIN content ON hb.cid = content.id JOIN function AS fa ON ha.fid = fa.id JOIN function AS fb ON hb.fid = fb.id WHERE ha.cid = ? AND pid = ? AND fa.eqclass = fb.eqclass;", - (cid, pid2)) - for func1, hashvalue, func2, filename in fetchiter(cur2): - entry["matches"].setdefault(filename, {})[func1, func2] = \ - hashvalue - cur2.close() - cur.close() + cur2.execute("SELECT fa.name, ha.hash, fb.name, filename FROM hash AS ha JOIN hash AS hb ON ha.hash = hb.hash JOIN content ON hb.cid = content.id JOIN function AS fa ON ha.fid = fa.id JOIN function AS fb ON hb.fid = fb.id WHERE ha.cid = ? AND pid = ? AND fa.eqclass = fb.eqclass;", + (cid, pid2)) + for func1, hashvalue, func2, filename in fetchiter(cur2): + entry["matches"].setdefault(filename, {})[func1, func2] = \ + hashvalue for entry in files.values(): if len(entry["matches"]) >= minmatch: @@ -216,7 +221,7 @@ class Application(object): return html_response(detail_template.stream(params)) def show_hash(self, function, hashvalue): - with contextlib.closing(self.db.cursor()) as cur: + with self.cursor() as cur: cur.execute("SELECT package.name, content.filename, content.size, f2.name FROM hash JOIN content ON hash.cid = content.id JOIN package ON content.pid = package.id JOIN function AS f2 ON hash.fid = f2.id JOIN function AS f1 ON f2.eqclass = f1.eqclass WHERE f1.name = ? AND hash = ?;", (function, hashvalue,)) entries = [dict(package=package, filename=filename, size=size, @@ -234,10 +239,10 @@ class Application(object): raise NotFound() params = dict(function=function, hashvalue=hashvalue, entries=entries, urlroot="../..") - return html_response(hash_template.render(params)) + return html_response(hash_template.stream(params)) def show_source(self, package): - with contextlib.closing(self.db.cursor()) as cur: + with self.cursor() as cur: cur.execute("SELECT name FROM package WHERE source = ?;", (package,)) binpkgs = dict.fromkeys(pkg for pkg, in fetchiter(cur)) @@ -253,7 +258,7 @@ class Application(object): if not (oldentry and oldentry["savable"] >= size): binpkgs[binary] = entry params = dict(source=package, packages=binpkgs, urlroot="..") - return html_response(source_template.render(params)) + return html_response(source_template.stream(params)) def main(): parser = argparse.ArgumentParser() |