Merge branch master into branch multiarchhints

Among other things, this drops Python 2.x support.
author: Helmut Grohne <helmut@subdivi.de> 2021-12-31 15:45:33 +0100
committer: Helmut Grohne <helmut@subdivi.de> 2021-12-31 15:45:33 +0100
commit: f3ea68482e6c01053cb202573d953e8a2e89529f (patch)
tree: 4c08f6e5a99bbe5131c0949e7f97cc44cf4a2cbd /webapp.py
parent: f2eda3ba74e5bc5613e84381ebd8bfd343e1c8cc (diff)
parent: 5b359b10053cbade539246eec26e86b44793ca40 (diff)
download: debian-dedup-f3ea68482e6c01053cb202573d953e8a2e89529f.tar.gz
1 files changed, 65 insertions, 60 deletions
diff --git a/webapp.py b/webapp.py
index f9e667e..162a5a4 100755
--- a/webapp.py
+++ b/webapp.py
@@ -1,8 +1,9 @@
-#!/usr/bin/python
+#!/usr/bin/python3
 
 import argparse
 import contextlib
 import datetime
+import io
 import sqlite3
 from wsgiref.simple_server import make_server
 
@@ -11,25 +12,28 @@ from werkzeug.exceptions import HTTPException, NotFound
 from werkzeug.routing import Map, Rule
 from werkzeug.utils import redirect
 from werkzeug.wrappers import Request, Response
-from werkzeug.wsgi import SharedDataMiddleware
+try:
+    from werkzeug.middleware.shared_data import SharedDataMiddleware
+except ImportError:
+    from werkzeug.wsgi import SharedDataMiddleware
 
 from dedup.utils import fetchiter
 
 jinjaenv = jinja2.Environment(loader=jinja2.PackageLoader("dedup", "templates"))
 
 def format_size(size):
-    size = float(size)
+    sizef = float(size)
     fmt = "%d B"
-    if size >= 1024:
-        size /= 1024
+    if sizef >= 1024:
+        sizef /= 1024
         fmt = "%.1f KB"
-    if size >= 1024:
-        size /= 1024
+    if sizef >= 1024:
+        sizef /= 1024
         fmt = "%.1f MB"
-    if size >= 1024:
-        size /= 1024
+    if sizef >= 1024:
+        sizef /= 1024
         fmt = "%.1f GB"
-    return fmt % size
+    return fmt % sizef
 
 def function_combination(function1, function2):
     if function1 == function2:
@@ -46,15 +50,16 @@ hash_template = jinjaenv.get_template("hash.html")
 index_template = jinjaenv.get_template("index.html")
 source_template = jinjaenv.get_template("source.html")
 
-def encode_and_buffer(iterator):
-    buff = b""
-    for elem in iterator:
-        buff += elem.encode("utf8")
-        if len(buff) >= 2048:
-            yield buff
-            buff = b""
-    if buff:
-        yield buff
+def encode_and_buffer(stream):
+    stream.enable_buffering(16)
+    buff = io.BytesIO()
+    for elem in stream:
+        buff.write(elem.encode("utf8"))
+        if buff.tell() >= 2048:
+            yield buff.getvalue()
+            buff = io.BytesIO()
+    if buff.tell() > 0:
+        yield buff.getvalue()
 
 def html_response(unicode_iterator, max_age=24 * 60 * 60):
     resp = Response(encode_and_buffer(unicode_iterator), mimetype="text/html")
@@ -68,7 +73,7 @@ class InternalRedirect(Exception):
         self.target = target
         self.code = code
 
-class Application(object):
+class Application:
     def __init__(self, db):
         self.db = db
         self.routingmap = Map([
@@ -79,6 +84,9 @@ class Application(object):
             Rule("/source/<package>", methods=("GET",), endpoint="source"),
         ])
 
+    def cursor(self):
+        return contextlib.closing(self.db.cursor())
+
     @Request.application
     def __call__(self, request):
         mapadapter = self.routingmap.bind_to_environ(request.environ)
@@ -97,7 +105,7 @@ class Application(object):
             elif endpoint == "index":
                 if not request.environ["PATH_INFO"]:
                     raise InternalRedirect("/")
-                return html_response(index_template.render(dict(urlroot="")))
+                return html_response(index_template.stream(dict(urlroot="")))
             elif endpoint == "source":
                 return self.show_source(args["package"])
             raise NotFound()
@@ -107,7 +115,7 @@ class Application(object):
             return e
 
     def get_details(self, package):
-        with contextlib.closing(self.db.cursor()) as cur:
+        with self.cursor() as cur:
             cur.execute("SELECT id, version, architecture FROM package WHERE name = ?;",
                         (package,))
             row = cur.fetchone()
@@ -127,14 +135,14 @@ class Application(object):
         return details
 
     def get_dependencies(self, pid):
-        with contextlib.closing(self.db.cursor()) as cur:
+        with self.cursor() as cur:
             cur.execute("SELECT required FROM dependency WHERE pid = ?;",
                         (pid,))
             return set(row[0] for row in fetchiter(cur))
 
     def cached_sharedstats(self, pid):
         sharedstats = {}
-        with contextlib.closing(self.db.cursor()) as cur:
+        with self.cursor() as cur:
             cur.execute("SELECT pid2, package.name, f1.name, f2.name, files, size FROM sharing JOIN package ON sharing.pid2 = package.id JOIN function AS f1 ON sharing.fid1 = f1.id JOIN function AS f2 ON sharing.fid2 = f2.id WHERE pid1 = ? AND f1.eqclass = f2.eqclass;",
                         (pid,))
             for pid2, package2, func1, func2, files, size in fetchiter(cur):
@@ -151,12 +159,11 @@ class Application(object):
         params["dependencies"] = self.get_dependencies(params["pid"])
         params["shared"] = self.cached_sharedstats(params["pid"])
         params["urlroot"] = ".."
-        cur = self.db.cursor()
-        cur.execute("SELECT content.filename, issue.issue FROM content JOIN issue ON content.id = issue.cid WHERE content.pid = ?;",
-                    (params["pid"],))
-        params["issues"] = dict(cur.fetchall())
-        cur.close()
-        return html_response(package_template.render(params))
+        with self.cursor() as cur:
+            cur.execute("SELECT content.filename, issue.issue FROM content JOIN issue ON content.id = issue.cid WHERE content.pid = ?;",
+                        (params["pid"],))
+            params["issues"] = dict(cur.fetchall())
+        return html_response(package_template.stream(params))
 
     def compute_comparison(self, pid1, pid2):
         """Compute a sequence of comparison objects ordered by the size of the
@@ -168,35 +175,33 @@ class Application(object):
          * matches: A mapping from filenames in package 2 (pid2) to a mapping
            from hash function pairs to hash values.
         """
-        cur = self.db.cursor()
-        cur.execute("SELECT content.id, content.filename, content.size, hash.hash FROM content JOIN hash ON content.id = hash.cid JOIN duplicate ON content.id = duplicate.cid JOIN function ON hash.fid = function.id WHERE pid = ? AND function.name = 'sha512' ORDER BY size DESC;",
-                    (pid1,))
-        cursize = -1
-        files = dict()
-        minmatch = 2 if pid1 == pid2 else 1
-        cur2 = self.db.cursor()
-        for cid, filename, size, hashvalue in fetchiter(cur):
-            if cursize != size:
-                for entry in files.values():
-                    if len(entry["matches"]) >= minmatch:
-                        yield entry
-                files.clear()
-                cursize = size
+        with self.cursor() as cur, self.cursor() as cur2:
+            cur.execute("SELECT content.id, content.filename, content.size, hash.hash FROM content JOIN hash ON content.id = hash.cid JOIN duplicate ON content.id = duplicate.cid JOIN function ON hash.fid = function.id WHERE pid = ? AND function.name = 'sha512' ORDER BY size DESC;",
+                        (pid1,))
+            cursize = -1
+            files = dict()
+            minmatch = 2 if pid1 == pid2 else 1
+            cur2 = self.db.cursor()
+            for cid, filename, size, hashvalue in fetchiter(cur):
+                if cursize != size:
+                    for entry in files.values():
+                        if len(entry["matches"]) >= minmatch:
+                            yield entry
+                    files.clear()
+                    cursize = size
 
-            if hashvalue in files:
-                files[hashvalue]["filenames"].add(filename)
-                continue
+                if hashvalue in files:
+                    files[hashvalue]["filenames"].add(filename)
+                    continue
 
-            entry = dict(filenames=set((filename,)), size=size, matches={})
-            files[hashvalue] = entry
+                entry = dict(filenames=set((filename,)), size=size, matches={})
+                files[hashvalue] = entry
 
-            cur2.execute("SELECT fa.name, ha.hash, fb.name, filename FROM hash AS ha JOIN hash AS hb ON ha.hash = hb.hash JOIN content ON hb.cid = content.id JOIN function AS fa ON ha.fid = fa.id JOIN function AS fb ON hb.fid = fb.id WHERE ha.cid = ? AND pid = ? AND fa.eqclass = fb.eqclass;",
-                         (cid, pid2))
-            for func1, hashvalue, func2, filename in fetchiter(cur2):
-                entry["matches"].setdefault(filename, {})[func1, func2] = \
-                        hashvalue
-        cur2.close()
-        cur.close()
+                cur2.execute("SELECT fa.name, ha.hash, fb.name, filename FROM hash AS ha JOIN hash AS hb ON ha.hash = hb.hash JOIN content ON hb.cid = content.id JOIN function AS fa ON ha.fid = fa.id JOIN function AS fb ON hb.fid = fb.id WHERE ha.cid = ? AND pid = ? AND fa.eqclass = fb.eqclass;",
+                             (cid, pid2))
+                for func1, hashvalue, func2, filename in fetchiter(cur2):
+                    entry["matches"].setdefault(filename, {})[func1, func2] = \
+                            hashvalue
 
         for entry in files.values():
             if len(entry["matches"]) >= minmatch:
@@ -216,7 +221,7 @@ class Application(object):
         return html_response(detail_template.stream(params))
 
     def show_hash(self, function, hashvalue):
-        with contextlib.closing(self.db.cursor()) as cur:
+        with self.cursor() as cur:
             cur.execute("SELECT package.name, content.filename, content.size, f2.name FROM hash JOIN content ON hash.cid = content.id JOIN package ON content.pid = package.id JOIN function AS f2 ON hash.fid = f2.id JOIN function AS f1 ON f2.eqclass = f1.eqclass WHERE f1.name = ? AND hash = ?;",
                         (function, hashvalue,))
             entries = [dict(package=package, filename=filename, size=size,
@@ -234,10 +239,10 @@ class Application(object):
                 raise NotFound()
         params = dict(function=function, hashvalue=hashvalue, entries=entries,
                       urlroot="../..")
-        return html_response(hash_template.render(params))
+        return html_response(hash_template.stream(params))
 
     def show_source(self, package):
-        with contextlib.closing(self.db.cursor()) as cur:
+        with self.cursor() as cur:
             cur.execute("SELECT name FROM package WHERE source = ?;",
                         (package,))
             binpkgs = dict.fromkeys(pkg for pkg, in fetchiter(cur))
@@ -253,7 +258,7 @@ class Application(object):
                 if not (oldentry and oldentry["savable"] >= size):
                     binpkgs[binary] = entry
         params = dict(source=package, packages=binpkgs, urlroot="..")
-        return html_response(source_template.render(params))
+        return html_response(source_template.stream(params))
 
 def main():
     parser = argparse.ArgumentParser()
author	Helmut Grohne <helmut@subdivi.de>	2021-12-31 15:45:33 +0100
committer	Helmut Grohne <helmut@subdivi.de>	2021-12-31 15:45:33 +0100
commit	f3ea68482e6c01053cb202573d953e8a2e89529f (patch)
tree	4c08f6e5a99bbe5131c0949e7f97cc44cf4a2cbd /webapp.py
parent	f2eda3ba74e5bc5613e84381ebd8bfd343e1c8cc (diff)
parent	5b359b10053cbade539246eec26e86b44793ca40 (diff)
download	debian-dedup-f3ea68482e6c01053cb202573d953e8a2e89529f.tar.gz