summaryrefslogtreecommitdiff
path: root/webapp.py
diff options
context:
space:
mode:
authorHelmut Grohne <helmut@subdivi.de>2021-12-31 15:45:33 +0100
committerHelmut Grohne <helmut@subdivi.de>2021-12-31 15:45:33 +0100
commitf3ea68482e6c01053cb202573d953e8a2e89529f (patch)
tree4c08f6e5a99bbe5131c0949e7f97cc44cf4a2cbd /webapp.py
parentf2eda3ba74e5bc5613e84381ebd8bfd343e1c8cc (diff)
parent5b359b10053cbade539246eec26e86b44793ca40 (diff)
downloaddebian-dedup-f3ea68482e6c01053cb202573d953e8a2e89529f.tar.gz
Merge branch master into branch multiarchhints
Among other things, this drops Python 2.x support.
Diffstat (limited to 'webapp.py')
-rwxr-xr-xwebapp.py125
1 files changed, 65 insertions, 60 deletions
diff --git a/webapp.py b/webapp.py
index f9e667e..162a5a4 100755
--- a/webapp.py
+++ b/webapp.py
@@ -1,8 +1,9 @@
-#!/usr/bin/python
+#!/usr/bin/python3
import argparse
import contextlib
import datetime
+import io
import sqlite3
from wsgiref.simple_server import make_server
@@ -11,25 +12,28 @@ from werkzeug.exceptions import HTTPException, NotFound
from werkzeug.routing import Map, Rule
from werkzeug.utils import redirect
from werkzeug.wrappers import Request, Response
-from werkzeug.wsgi import SharedDataMiddleware
+try:
+ from werkzeug.middleware.shared_data import SharedDataMiddleware
+except ImportError:
+ from werkzeug.wsgi import SharedDataMiddleware
from dedup.utils import fetchiter
jinjaenv = jinja2.Environment(loader=jinja2.PackageLoader("dedup", "templates"))
def format_size(size):
- size = float(size)
+ sizef = float(size)
fmt = "%d B"
- if size >= 1024:
- size /= 1024
+ if sizef >= 1024:
+ sizef /= 1024
fmt = "%.1f KB"
- if size >= 1024:
- size /= 1024
+ if sizef >= 1024:
+ sizef /= 1024
fmt = "%.1f MB"
- if size >= 1024:
- size /= 1024
+ if sizef >= 1024:
+ sizef /= 1024
fmt = "%.1f GB"
- return fmt % size
+ return fmt % sizef
def function_combination(function1, function2):
if function1 == function2:
@@ -46,15 +50,16 @@ hash_template = jinjaenv.get_template("hash.html")
index_template = jinjaenv.get_template("index.html")
source_template = jinjaenv.get_template("source.html")
-def encode_and_buffer(iterator):
- buff = b""
- for elem in iterator:
- buff += elem.encode("utf8")
- if len(buff) >= 2048:
- yield buff
- buff = b""
- if buff:
- yield buff
+def encode_and_buffer(stream):
+ stream.enable_buffering(16)
+ buff = io.BytesIO()
+ for elem in stream:
+ buff.write(elem.encode("utf8"))
+ if buff.tell() >= 2048:
+ yield buff.getvalue()
+ buff = io.BytesIO()
+ if buff.tell() > 0:
+ yield buff.getvalue()
def html_response(unicode_iterator, max_age=24 * 60 * 60):
resp = Response(encode_and_buffer(unicode_iterator), mimetype="text/html")
@@ -68,7 +73,7 @@ class InternalRedirect(Exception):
self.target = target
self.code = code
-class Application(object):
+class Application:
def __init__(self, db):
self.db = db
self.routingmap = Map([
@@ -79,6 +84,9 @@ class Application(object):
Rule("/source/<package>", methods=("GET",), endpoint="source"),
])
+ def cursor(self):
+ return contextlib.closing(self.db.cursor())
+
@Request.application
def __call__(self, request):
mapadapter = self.routingmap.bind_to_environ(request.environ)
@@ -97,7 +105,7 @@ class Application(object):
elif endpoint == "index":
if not request.environ["PATH_INFO"]:
raise InternalRedirect("/")
- return html_response(index_template.render(dict(urlroot="")))
+ return html_response(index_template.stream(dict(urlroot="")))
elif endpoint == "source":
return self.show_source(args["package"])
raise NotFound()
@@ -107,7 +115,7 @@ class Application(object):
return e
def get_details(self, package):
- with contextlib.closing(self.db.cursor()) as cur:
+ with self.cursor() as cur:
cur.execute("SELECT id, version, architecture FROM package WHERE name = ?;",
(package,))
row = cur.fetchone()
@@ -127,14 +135,14 @@ class Application(object):
return details
def get_dependencies(self, pid):
- with contextlib.closing(self.db.cursor()) as cur:
+ with self.cursor() as cur:
cur.execute("SELECT required FROM dependency WHERE pid = ?;",
(pid,))
return set(row[0] for row in fetchiter(cur))
def cached_sharedstats(self, pid):
sharedstats = {}
- with contextlib.closing(self.db.cursor()) as cur:
+ with self.cursor() as cur:
cur.execute("SELECT pid2, package.name, f1.name, f2.name, files, size FROM sharing JOIN package ON sharing.pid2 = package.id JOIN function AS f1 ON sharing.fid1 = f1.id JOIN function AS f2 ON sharing.fid2 = f2.id WHERE pid1 = ? AND f1.eqclass = f2.eqclass;",
(pid,))
for pid2, package2, func1, func2, files, size in fetchiter(cur):
@@ -151,12 +159,11 @@ class Application(object):
params["dependencies"] = self.get_dependencies(params["pid"])
params["shared"] = self.cached_sharedstats(params["pid"])
params["urlroot"] = ".."
- cur = self.db.cursor()
- cur.execute("SELECT content.filename, issue.issue FROM content JOIN issue ON content.id = issue.cid WHERE content.pid = ?;",
- (params["pid"],))
- params["issues"] = dict(cur.fetchall())
- cur.close()
- return html_response(package_template.render(params))
+ with self.cursor() as cur:
+ cur.execute("SELECT content.filename, issue.issue FROM content JOIN issue ON content.id = issue.cid WHERE content.pid = ?;",
+ (params["pid"],))
+ params["issues"] = dict(cur.fetchall())
+ return html_response(package_template.stream(params))
def compute_comparison(self, pid1, pid2):
"""Compute a sequence of comparison objects ordered by the size of the
@@ -168,35 +175,33 @@ class Application(object):
* matches: A mapping from filenames in package 2 (pid2) to a mapping
from hash function pairs to hash values.
"""
- cur = self.db.cursor()
- cur.execute("SELECT content.id, content.filename, content.size, hash.hash FROM content JOIN hash ON content.id = hash.cid JOIN duplicate ON content.id = duplicate.cid JOIN function ON hash.fid = function.id WHERE pid = ? AND function.name = 'sha512' ORDER BY size DESC;",
- (pid1,))
- cursize = -1
- files = dict()
- minmatch = 2 if pid1 == pid2 else 1
- cur2 = self.db.cursor()
- for cid, filename, size, hashvalue in fetchiter(cur):
- if cursize != size:
- for entry in files.values():
- if len(entry["matches"]) >= minmatch:
- yield entry
- files.clear()
- cursize = size
+ with self.cursor() as cur, self.cursor() as cur2:
+ cur.execute("SELECT content.id, content.filename, content.size, hash.hash FROM content JOIN hash ON content.id = hash.cid JOIN duplicate ON content.id = duplicate.cid JOIN function ON hash.fid = function.id WHERE pid = ? AND function.name = 'sha512' ORDER BY size DESC;",
+ (pid1,))
+ cursize = -1
+ files = dict()
+ minmatch = 2 if pid1 == pid2 else 1
+ cur2 = self.db.cursor()
+ for cid, filename, size, hashvalue in fetchiter(cur):
+ if cursize != size:
+ for entry in files.values():
+ if len(entry["matches"]) >= minmatch:
+ yield entry
+ files.clear()
+ cursize = size
- if hashvalue in files:
- files[hashvalue]["filenames"].add(filename)
- continue
+ if hashvalue in files:
+ files[hashvalue]["filenames"].add(filename)
+ continue
- entry = dict(filenames=set((filename,)), size=size, matches={})
- files[hashvalue] = entry
+ entry = dict(filenames=set((filename,)), size=size, matches={})
+ files[hashvalue] = entry
- cur2.execute("SELECT fa.name, ha.hash, fb.name, filename FROM hash AS ha JOIN hash AS hb ON ha.hash = hb.hash JOIN content ON hb.cid = content.id JOIN function AS fa ON ha.fid = fa.id JOIN function AS fb ON hb.fid = fb.id WHERE ha.cid = ? AND pid = ? AND fa.eqclass = fb.eqclass;",
- (cid, pid2))
- for func1, hashvalue, func2, filename in fetchiter(cur2):
- entry["matches"].setdefault(filename, {})[func1, func2] = \
- hashvalue
- cur2.close()
- cur.close()
+ cur2.execute("SELECT fa.name, ha.hash, fb.name, filename FROM hash AS ha JOIN hash AS hb ON ha.hash = hb.hash JOIN content ON hb.cid = content.id JOIN function AS fa ON ha.fid = fa.id JOIN function AS fb ON hb.fid = fb.id WHERE ha.cid = ? AND pid = ? AND fa.eqclass = fb.eqclass;",
+ (cid, pid2))
+ for func1, hashvalue, func2, filename in fetchiter(cur2):
+ entry["matches"].setdefault(filename, {})[func1, func2] = \
+ hashvalue
for entry in files.values():
if len(entry["matches"]) >= minmatch:
@@ -216,7 +221,7 @@ class Application(object):
return html_response(detail_template.stream(params))
def show_hash(self, function, hashvalue):
- with contextlib.closing(self.db.cursor()) as cur:
+ with self.cursor() as cur:
cur.execute("SELECT package.name, content.filename, content.size, f2.name FROM hash JOIN content ON hash.cid = content.id JOIN package ON content.pid = package.id JOIN function AS f2 ON hash.fid = f2.id JOIN function AS f1 ON f2.eqclass = f1.eqclass WHERE f1.name = ? AND hash = ?;",
(function, hashvalue,))
entries = [dict(package=package, filename=filename, size=size,
@@ -234,10 +239,10 @@ class Application(object):
raise NotFound()
params = dict(function=function, hashvalue=hashvalue, entries=entries,
urlroot="../..")
- return html_response(hash_template.render(params))
+ return html_response(hash_template.stream(params))
def show_source(self, package):
- with contextlib.closing(self.db.cursor()) as cur:
+ with self.cursor() as cur:
cur.execute("SELECT name FROM package WHERE source = ?;",
(package,))
binpkgs = dict.fromkeys(pkg for pkg, in fetchiter(cur))
@@ -253,7 +258,7 @@ class Application(object):
if not (oldentry and oldentry["savable"] >= size):
binpkgs[binary] = entry
params = dict(source=package, packages=binpkgs, urlroot="..")
- return html_response(source_template.render(params))
+ return html_response(source_template.stream(params))
def main():
parser = argparse.ArgumentParser()