From 6206dea43941560a29c9a1105ae3055740ab80aa Mon Sep 17 00:00:00 2001
From: Helmut Grohne <helmut@subdivi.de>
Date: Mon, 22 Jul 2013 12:03:35 +0200
Subject: schema: extend content_package_index

We can avoid a b-tree sort in the package comparison of the web app, if
the package index, also provides a size.
---
 schema.sql | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/schema.sql b/schema.sql
index b839a51..e9e0bcc 100644
--- a/schema.sql
+++ b/schema.sql
@@ -2,7 +2,7 @@ CREATE TABLE package (id INTEGER PRIMARY KEY, name TEXT UNIQUE, version TEXT, ar
 CREATE TABLE content (id INTEGER PRIMARY KEY, pid INTEGER, filename TEXT, size INTEGER, FOREIGN KEY (pid) REFERENCES package(id) ON DELETE CASCADE);
 CREATE TABLE hash (cid INTEGER, function TEXT, hash TEXT, FOREIGN KEY (cid) REFERENCES content(id) ON DELETE CASCADE);
 CREATE TABLE dependency (pid INTEGER, required TEXT, FOREIGN KEY (pid) REFERENCES package(id) ON DELETE CASCADE);
-CREATE INDEX content_package_index ON content (pid);
+CREATE INDEX content_package_size_index ON content (pid, size);
 CREATE INDEX hash_cid_index ON hash (cid);
 CREATE INDEX hash_hash_index ON hash (hash);
 
-- 
cgit v1.2.3


From 6f88561d726327c90f83b8aad1db26abbd4cdf1e Mon Sep 17 00:00:00 2001
From: Helmut Grohne <helmut@subdivi.de>
Date: Tue, 23 Jul 2013 18:53:55 +0200
Subject: schema: reference hash functions by integer key

This already worked quite well for package.id. On a test data set of 5%
size this transformation reduces the database size by about 4%.
---
 readyaml.py       |  2 +-
 schema.sql        | 14 +++++++++++---
 update_sharing.py | 16 ++++++++--------
 webapp.py         | 10 +++++-----
 4 files changed, 25 insertions(+), 17 deletions(-)

diff --git a/readyaml.py b/readyaml.py
index bb8ac54..f4d6ead 100755
--- a/readyaml.py
+++ b/readyaml.py
@@ -45,7 +45,7 @@ def readyaml(db, stream):
         cur.execute("INSERT INTO content (pid, filename, size) VALUES (?, ?, ?);",
                     (pid, entry["name"], entry["size"]))
         cid = cur.lastrowid
-        cur.executemany("INSERT INTO hash (cid, function, hash) VALUES (?, ?, ?);",
+        cur.executemany("INSERT INTO hash (cid, fid, hash) VALUES (?, (SELECT id FROM function WHERE name = ?), ?);",
                         ((cid, func, hexhash)
                          for func, hexhash in entry["hashes"].items()))
     raise ValueError("missing commit block")
diff --git a/schema.sql b/schema.sql
index e9e0bcc..cb6a2c5 100644
--- a/schema.sql
+++ b/schema.sql
@@ -1,11 +1,19 @@
 CREATE TABLE package (id INTEGER PRIMARY KEY, name TEXT UNIQUE, version TEXT, architecture TEXT, source TEXT);
 CREATE TABLE content (id INTEGER PRIMARY KEY, pid INTEGER, filename TEXT, size INTEGER, FOREIGN KEY (pid) REFERENCES package(id) ON DELETE CASCADE);
-CREATE TABLE hash (cid INTEGER, function TEXT, hash TEXT, FOREIGN KEY (cid) REFERENCES content(id) ON DELETE CASCADE);
+CREATE TABLE function (id INTEGER PRIMARY KEY, name TEXT UNIQUE NOT NULL);
+INSERT INTO function (name) VALUES ("sha512"), ("gzip_sha512"), ("image_sha512");
+CREATE TABLE hash (cid INTEGER, fid INTEGER NOT NULL, hash TEXT, FOREIGN KEY (cid) REFERENCES content(id) ON DELETE CASCADE, FOREIGN KEY (fid) REFERENCES function(id));
 CREATE TABLE dependency (pid INTEGER, required TEXT, FOREIGN KEY (pid) REFERENCES package(id) ON DELETE CASCADE);
 CREATE INDEX content_package_size_index ON content (pid, size);
 CREATE INDEX hash_cid_index ON hash (cid);
 CREATE INDEX hash_hash_index ON hash (hash);
 
-CREATE TABLE sharing (pid1 INTEGER, pid2 INTEGER, func1 TEXT, func2 TEXT, files INTEGER, size INTEGER, FOREIGN KEY (pid1) REFERENCES package(id) ON DELETE CASCADE, FOREIGN KEY (pid2) REFERENCES package(id) ON DELETE CASCADE);
-CREATE INDEX sharing_insert_index ON sharing (pid1, pid2, func1, func2);
+CREATE TABLE sharing (
+	pid1 INTEGER NOT NULL REFERENCES package(id) ON DELETE CASCADE,
+	pid2 INTEGER NOT NULL REFERENCES package(id) ON DELETE CASCADE,
+	fid1 INTEGER NOT NULL REFERENCES function(id),
+	fid2 INTEGER NOT NULL REFERENCES function(id),
+	files INTEGER,
+	size INTEGER);
+CREATE INDEX sharing_insert_index ON sharing (pid1, pid2, fid1, fid2);
 CREATE TABLE duplicate (cid INTEGER PRIMARY KEY, FOREIGN KEY (cid) REFERENCES content(id) ON DELETE CASCADE);
diff --git a/update_sharing.py b/update_sharing.py
index 55e8096..e1a2d68 100755
--- a/update_sharing.py
+++ b/update_sharing.py
@@ -5,23 +5,23 @@ import sqlite3
 from dedup.utils import fetchiter
 
 def add_values(cursor, insert_key, files, size):
-    cursor.execute("UPDATE sharing SET files = files + ?, size = size + ? WHERE pid1 = ? AND pid2 = ? AND func1 = ? AND func2 = ?;",
+    cursor.execute("UPDATE sharing SET files = files + ?, size = size + ? WHERE pid1 = ? AND pid2 = ? AND fid1 = ? AND fid2 = ?;",
                    (files, size) + insert_key)
     if cursor.rowcount > 0:
         return
-    cursor.execute("INSERT INTO sharing (pid1, pid2, func1, func2, files, size) VALUES (?, ?, ?, ?, ?, ?);",
+    cursor.execute("INSERT INTO sharing (pid1, pid2, fid1, fid2, files, size) VALUES (?, ?, ?, ?, ?, ?);",
                    insert_key + (files, size))
 
 def compute_pkgdict(rows):
     pkgdict = dict()
-    for pid, _, filename, size, function in rows:
+    for pid, _, filename, size, fid in rows:
         funcdict = pkgdict.setdefault(pid, {})
-        funcdict.setdefault(function, []).append((size, filename))
+        funcdict.setdefault(fid, []).append((size, filename))
     return pkgdict
 
 def process_pkgdict(cursor, pkgdict):
     for pid1, funcdict1 in pkgdict.items():
-        for function1, files in funcdict1.items():
+        for fid1, files in funcdict1.items():
             numfiles = len(files)
             size = sum(entry[0] for entry in files)
             for pid2, funcdict2 in pkgdict.items():
@@ -33,8 +33,8 @@ def process_pkgdict(cursor, pkgdict):
                 else:
                     pkgnumfiles = numfiles
                     pkgsize = size
-                for function2 in funcdict2.keys():
-                    insert_key = (pid1, pid2, function1, function2)
+                for fid2 in funcdict2.keys():
+                    insert_key = (pid1, pid2, fid1, fid2)
                     add_values(cursor, insert_key, pkgnumfiles, pkgsize)
 
 def main():
@@ -46,7 +46,7 @@ def main():
     readcur = db.cursor()
     readcur.execute("SELECT hash FROM hash GROUP BY hash HAVING count(*) > 1;")
     for hashvalue, in fetchiter(readcur):
-        cur.execute("SELECT content.pid, content.id, content.filename, content.size, hash.function FROM hash JOIN content ON hash.cid = content.id WHERE hash = ?;",
+        cur.execute("SELECT content.pid, content.id, content.filename, content.size, hash.fid FROM hash JOIN content ON hash.cid = content.id WHERE hash = ?;",
                     (hashvalue,))
         rows = cur.fetchall()
         print("processing hash %s with %d entries" % (hashvalue, len(rows)))
diff --git a/webapp.py b/webapp.py
index 9e23128..d42e932 100755
--- a/webapp.py
+++ b/webapp.py
@@ -275,7 +275,7 @@ class Application(object):
     def cached_sharedstats(self, pid):
         cur = self.db.cursor()
         sharedstats = {}
-        cur.execute("SELECT pid2, package.name, func1, func2, files, size FROM sharing JOIN package ON sharing.pid2 = package.id WHERE pid1 = ?;",
+        cur.execute("SELECT pid2, package.name, f1.name, f2.name, files, size FROM sharing JOIN package ON sharing.pid2 = package.id JOIN function AS f1 ON sharing.fid1 = f1.id JOIN function AS f2 ON sharing.fid2 = f2.id WHERE pid1 = ?;",
                     (pid,))
         for pid2, package2, func1, func2, files, size in fetchiter(cur):
             if (func1, func2) not in hash_functions:
@@ -305,7 +305,7 @@ class Application(object):
            from hash function pairs to hash values.
         """
         cur = self.db.cursor()
-        cur.execute("SELECT id, filename, size, hash FROM content JOIN hash ON content.id = hash.cid JOIN duplicate ON content.id = duplicate.cid WHERE pid = ? AND function = 'sha512' ORDER BY size DESC;",
+        cur.execute("SELECT content.id, content.filename, content.size, hash.hash FROM content JOIN hash ON content.id = hash.cid JOIN duplicate ON content.id = duplicate.cid JOIN function ON hash.fid = function.id WHERE pid = ? AND function.name = 'sha512' ORDER BY size DESC;",
                     (pid1,))
         cursize = -1
         files = dict()
@@ -326,7 +326,7 @@ class Application(object):
             files[hashvalue] = entry
 
             cur2 = self.db.cursor()
-            cur2.execute("SELECT ha.function, ha.hash, hb.function, filename FROM hash AS ha JOIN hash AS hb ON ha.hash = hb.hash JOIN content ON hb.cid = content.id WHERE ha.cid = ? AND pid = ?;",
+            cur2.execute("SELECT fa.name, ha.hash, fb.name, filename FROM hash AS ha JOIN hash AS hb ON ha.hash = hb.hash JOIN content ON hb.cid = content.id JOIN function AS fa ON ha.fid = fa.id JOIN function AS fb ON hb.fid = fb.id WHERE ha.cid = ? AND pid = ?;",
                          (cid, pid2))
             for func1, hashvalue, func2, filename in fetchiter(cur2):
                 entry["matches"].setdefault(filename, {})[func1, func2] = \
@@ -353,7 +353,7 @@ class Application(object):
 
     def show_hash(self, function, hashvalue):
         cur = self.db.cursor()
-        cur.execute("SELECT package.name, content.filename, content.size, hash.function FROM hash JOIN content ON hash.cid = content.id JOIN package ON content.pid = package.id WHERE hash = ?;",
+        cur.execute("SELECT package.name, content.filename, content.size, function.name FROM hash JOIN content ON hash.cid = content.id JOIN package ON content.pid = package.id JOIN function ON hash.fid = function.id WHERE hash = ?;",
                     (hashvalue,))
         entries = [dict(package=package, filename=filename, size=size,
                         function=otherfunc)
@@ -372,7 +372,7 @@ class Application(object):
         binpkgs = dict.fromkeys(pkg for pkg, in fetchiter(cur))
         if not binpkgs:
             raise NotFound
-        cur.execute("SELECT p1.name, p2.name, sharing.func1, sharing.func2, sharing.files, sharing.size FROM sharing JOIN package AS p1 ON sharing.pid1 = p1.id JOIN package AS p2 ON sharing.pid2 = p2.id WHERE p1.source = ?;",
+        cur.execute("SELECT p1.name, p2.name, f1.name, f2.name, sharing.files, sharing.size FROM sharing JOIN package AS p1 ON sharing.pid1 = p1.id JOIN package AS p2 ON sharing.pid2 = p2.id JOIN function AS f1 ON sharing.fid1 = f1.id JOIN function AS f2 ON sharing.fid2 = f2.id WHERE p1.source = ?;",
                     (package,))
         for binary, otherbin, func1, func2, files, size in fetchiter(cur):
             entry = dict(package=otherbin,
-- 
cgit v1.2.3


From 2f797c9b90f05eadf4bb13f4a9c1f029925d9275 Mon Sep 17 00:00:00 2001
From: Helmut Grohne <helmut@subdivi.de>
Date: Tue, 23 Jul 2013 21:54:41 +0200
Subject: adapt queries in README to new schema

---
 README | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/README b/README
index 44b086a..b0e06f3 100644
--- a/README
+++ b/README
@@ -43,12 +43,12 @@ Finding the 100 largest files shared with multiple packages.
 Finding those top 100 files that save most space when being reduced to only
 one copy in the archive.
 
-    SELECT hash, sum(size)-min(size), count(*), count(distinct pid) FROM content JOIN hash ON content.id = hash.cid WHERE hash.function = "sha512" GROUP BY hash ORDER BY sum(size)-min(size) DESC LIMIT 100;
+    SELECT hash, sum(size)-min(size), count(*), count(distinct pid) FROM content JOIN hash ON content.id = hash.cid JOIN function ON hash.fid = function.id WHERE function.name = "sha512" GROUP BY hash ORDER BY sum(size)-min(size) DESC LIMIT 100;
 
 Finding PNG images that do not carry a .png file extension.
 
-    SELECT package.name, content.filename, content.size FROM content JOIN hash ON content.id = hash.cid JOIN package ON content.pid = package.id WHERE function = "image_sha512" AND filename NOT LIKE "%.png";
+    SELECT package.name, content.filename, content.size FROM content JOIN hash ON content.id = hash.cid JOIN package ON content.pid = package.id JOIN function ON hash.fid = function.id WHERE function.name = "image_sha512" AND filename NOT LIKE "%.png";
 
 Finding .gz files which either are not gziped or contain errors.
 
-    SELECT package.name, content.filename FROM content JOIN package ON content.pid = package.id WHERE filename LIKE "%.gz" AND (SELECT count(*) FROM hash WHERE hash.cid = content.id AND hash.function = "gzip_sha512") = 0;
+    SELECT package.name, content.filename FROM content JOIN package ON content.pid = package.id WHERE filename LIKE "%.gz" AND (SELECT count(*) FROM hash JOIN function ON hash.fid = function.id WHERE hash.cid = content.id AND function.name = "gzip_sha512") = 0;
-- 
cgit v1.2.3


From da33f7f323fa9431809e4d93a546e3b234d50406 Mon Sep 17 00:00:00 2001
From: Helmut Grohne <helmut@subdivi.de>
Date: Tue, 23 Jul 2013 23:26:28 +0200
Subject: webapp: remove unused function

---
 webapp.py | 21 ---------------------
 1 file changed, 21 deletions(-)

diff --git a/webapp.py b/webapp.py
index 9e23128..6def0b6 100755
--- a/webapp.py
+++ b/webapp.py
@@ -193,27 +193,6 @@ def html_response(unicode_iterator, max_age=24 * 60 * 60):
     resp.expires = datetime.datetime.now() + datetime.timedelta(seconds=max_age)
     return resp
 
-def generate_shared(rows):
-    """internal helper from show_detail"""
-    entry = None
-    for filename1, size1, func1, filename2, size2, func2, hashvalue in rows:
-        funccomb = (func1, func2)
-        if funccomb not in hash_functions:
-            continue
-        if entry and (entry["filename1"] != filename1 or
-                      entry["filename2"] != filename2):
-            yield entry
-            entry = None
-        if entry:
-            funcdict = entry["functions"]
-        else:
-            funcdict = dict()
-            entry = dict(filename1=filename1, filename2=filename2, size1=size1,
-                         size2=size2, functions=funcdict)
-        funcdict[funccomb] = hashvalue
-    if entry:
-        yield entry
-
 class Application(object):
     def __init__(self, db):
         self.db = db
-- 
cgit v1.2.3


From 1e50900862fe8887755597d85483dbc845ccb5e3 Mon Sep 17 00:00:00 2001
From: Helmut Grohne <helmut@subdivi.de>
Date: Tue, 23 Jul 2013 23:26:52 +0200
Subject: README: fix typo in query

---
 README | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README b/README
index 44b086a..a023d0a 100644
--- a/README
+++ b/README
@@ -38,7 +38,7 @@ SQL database by hand. Here are some example queries.
 
 Finding the 100 largest files shared with multiple packages.
 
-    SELECT pa.name, a.filename, pb.name, b.filename, a.size FROM content AS a JOIN hash AS ha ON a.id = ha.cid JOIN hash AS hb ON ha.hash = hb.hash JOIN content AS b ON b.id = hb.cid JOIN package AS pa ON b.pid = pa.id JOIN package AS pb ON b.pid = pb.id WHERE (a.pid != b.pid OR a.filename != b.filename) ORDER BY a.size DESC LIMIT 100;
+    SELECT pa.name, a.filename, pb.name, b.filename, a.size FROM content AS a JOIN hash AS ha ON a.id = ha.cid JOIN hash AS hb ON ha.hash = hb.hash JOIN content AS b ON b.id = hb.cid JOIN package AS pa ON a.pid = pa.id JOIN package AS pb ON b.pid = pb.id WHERE (a.pid != b.pid OR a.filename != b.filename) ORDER BY a.size DESC LIMIT 100;
 
 Finding those top 100 files that save most space when being reduced to only
 one copy in the archive.
-- 
cgit v1.2.3


From 9e37415086e64d8f623f8236af83a767648611bc Mon Sep 17 00:00:00 2001
From: Helmut Grohne <helmut@subdivi.de>
Date: Tue, 23 Jul 2013 23:32:00 +0200
Subject: webapp: make html for index valid

---
 webapp.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/webapp.py b/webapp.py
index 6def0b6..b5e0c63 100755
--- a/webapp.py
+++ b/webapp.py
@@ -151,7 +151,7 @@ index_template = jinjaenv.from_string(
             <noscript><b>This form is disfunctional when javascript is not enabled</b></noscript>
             Enter binary package to inspect - Note: Non-existing packages will result in <b>404</b>-Errors
             <form id="pkg_form">
-                <label for="pkg_name">Name: <input type="text" size="30" name="pkg_name" id="pkg_name">
+                <label for="pkg_name">Name: </label><input type="text" size="30" name="pkg_name" id="pkg_name">
                 <input type="submit" value="Go"> Permanent Link: <a id="perma_link" href="#"></a>
             </form>
     </fieldset></div></li>
-- 
cgit v1.2.3


From 32f406706c0a2a21b11656e5c56ff203e0ee3799 Mon Sep 17 00:00:00 2001
From: Helmut Grohne <helmut@subdivi.de>
Date: Wed, 24 Jul 2013 07:20:19 +0200
Subject: readyaml: cache the whole function table

This should reduce the query bandwidth to the rdbms.
---
 readyaml.py | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/readyaml.py b/readyaml.py
index f4d6ead..21b1ca1 100755
--- a/readyaml.py
+++ b/readyaml.py
@@ -25,6 +25,8 @@ def readyaml(db, stream):
         pid = None
 
     cur.execute("BEGIN;")
+    cur.execute("SELECT name, id FROM function;")
+    funcmapping = dict(cur.fetchall())
     if pid is not None:
         cur.execute("DELETE FROM content WHERE pid = ?;", (pid,))
         cur.execute("DELETE FROM dependency WHERE pid = ?;", (pid,))
@@ -45,8 +47,8 @@ def readyaml(db, stream):
         cur.execute("INSERT INTO content (pid, filename, size) VALUES (?, ?, ?);",
                     (pid, entry["name"], entry["size"]))
         cid = cur.lastrowid
-        cur.executemany("INSERT INTO hash (cid, fid, hash) VALUES (?, (SELECT id FROM function WHERE name = ?), ?);",
-                        ((cid, func, hexhash)
+        cur.executemany("INSERT INTO hash (cid, fid, hash) VALUES (?, ?, ?);",
+                        ((cid, funcmapping[func], hexhash)
                          for func, hexhash in entry["hashes"].items()))
     raise ValueError("missing commit block")
 
-- 
cgit v1.2.3


From 796eeb217e449234b777512451c5b668837c9118 Mon Sep 17 00:00:00 2001
From: Helmut Grohne <helmut@subdivi.de>
Date: Thu, 25 Jul 2013 12:48:45 +0200
Subject: README: foo.PNG is also a valid png name

---
 README | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README b/README
index a023d0a..c749a86 100644
--- a/README
+++ b/README
@@ -47,7 +47,7 @@ one copy in the archive.
 
 Finding PNG images that do not carry a .png file extension.
 
-    SELECT package.name, content.filename, content.size FROM content JOIN hash ON content.id = hash.cid JOIN package ON content.pid = package.id WHERE function = "image_sha512" AND filename NOT LIKE "%.png";
+    SELECT package.name, content.filename, content.size FROM content JOIN hash ON content.id = hash.cid JOIN package ON content.pid = package.id WHERE function = "image_sha512" AND lower(filename) NOT LIKE "%.png";
 
 Finding .gz files which either are not gziped or contain errors.
 
-- 
cgit v1.2.3


From 9b653583711c59d96c45af43ff8ee9534500adb6 Mon Sep 17 00:00:00 2001
From: Helmut Grohne <helmut@subdivi.de>
Date: Thu, 25 Jul 2013 13:28:19 +0200
Subject: display "issues" with files in package view

Currently this is invalid .gz files and png files not named .png.
---
 schema.sql        |  1 +
 update_sharing.py |  3 +++
 webapp.py         | 13 +++++++++++++
 3 files changed, 17 insertions(+)

diff --git a/schema.sql b/schema.sql
index e9e0bcc..8a94882 100644
--- a/schema.sql
+++ b/schema.sql
@@ -9,3 +9,4 @@ CREATE INDEX hash_hash_index ON hash (hash);
 CREATE TABLE sharing (pid1 INTEGER, pid2 INTEGER, func1 TEXT, func2 TEXT, files INTEGER, size INTEGER, FOREIGN KEY (pid1) REFERENCES package(id) ON DELETE CASCADE, FOREIGN KEY (pid2) REFERENCES package(id) ON DELETE CASCADE);
 CREATE INDEX sharing_insert_index ON sharing (pid1, pid2, func1, func2);
 CREATE TABLE duplicate (cid INTEGER PRIMARY KEY, FOREIGN KEY (cid) REFERENCES content(id) ON DELETE CASCADE);
+CREATE TABLE issue (cid INTEGER REFERENCES content(id) ON DELETE CASCADE, issue TEXT);
diff --git a/update_sharing.py b/update_sharing.py
index 55e8096..62a3ab5 100755
--- a/update_sharing.py
+++ b/update_sharing.py
@@ -43,6 +43,7 @@ def main():
     cur.execute("PRAGMA foreign_keys = ON;")
     cur.execute("DELETE FROM sharing;")
     cur.execute("DELETE FROM duplicate;")
+    cur.execute("DELETE FROM issue;")
     readcur = db.cursor()
     readcur.execute("SELECT hash FROM hash GROUP BY hash HAVING count(*) > 1;")
     for hashvalue, in fetchiter(readcur):
@@ -54,6 +55,8 @@ def main():
         cur.executemany("INSERT OR IGNORE INTO duplicate (cid) VALUES (?);",
                         [(row[1],) for row in rows])
         process_pkgdict(cur, pkgdict)
+    cur.execute("INSERT INTO issue (cid, issue) SELECT content.id, 'file named something.gz is not a valid gzip file' FROM content WHERE content.filename LIKE '%.gz' AND NOT EXISTS (SELECT 1 FROM hash WHERE hash.cid = content.id AND hash.function = 'gzip_sha512');")
+    cur.execute("INSERT INTO issue (cid, issue) SELECT content.id, 'png image not named something.png' FROM content JOIN hash ON content.id = hash.cid WHERE function = 'image_sha512' AND lower(filename) NOT LIKE '%.png';")
     db.commit()
 
 if __name__ == "__main__":
diff --git a/webapp.py b/webapp.py
index b5e0c63..c442ebe 100755
--- a/webapp.py
+++ b/webapp.py
@@ -69,6 +69,14 @@ package_template = jinjaenv.from_string(
     {%- endfor -%}
 <p>Note: Packages with yellow background are required to be installed when this package is installed.</p>
 {%- endif -%}
+{%- if issues -%}
+    <h3>issues with particular files</h3>
+    <table border='1'><tr><th>filename</th><th>issue</th></tr>
+    {%- for filename, issue in issues|dictsort(true) -%}
+        <tr><td><span class="filename">{{ filename|e }}</span></td><td>{{ issue|e }}</td></tr>
+    {%- endfor -%}
+    </table>
+{%- endif -%}
 {% endblock %}""")
 
 detail_template = jinjaenv.from_string(
@@ -271,6 +279,11 @@ class Application(object):
         params["dependencies"] = self.get_dependencies(params["pid"])
         params["shared"] = self.cached_sharedstats(params["pid"])
         params["urlroot"] = ".."
+        cur = self.db.cursor()
+        cur.execute("SELECT content.filename, issue.issue FROM content JOIN issue ON content.id = issue.cid WHERE content.pid = ?;",
+                    (params["pid"],))
+        params["issues"] = dict(cur.fetchall())
+        cur.close()
         return html_response(package_template.render(params))
 
     def compute_comparison(self, pid1, pid2):
-- 
cgit v1.2.3


From 03e7e27b440917081369e797e09de975912cb68c Mon Sep 17 00:00:00 2001
From: Helmut Grohne <helmut@subdivi.de>
Date: Fri, 26 Jul 2013 21:53:11 +0200
Subject: verify package hashes when importing via http

---
 autoimport.py    | 19 ++++++++++++-------
 dedup/hashing.py | 19 +++++++++++++++++++
 importpkg.py     | 26 ++++++++++++++++++++++++--
 3 files changed, 55 insertions(+), 9 deletions(-)

diff --git a/autoimport.py b/autoimport.py
index 694ffeb..481a3f8 100755
--- a/autoimport.py
+++ b/autoimport.py
@@ -29,7 +29,8 @@ def process_http(pkgs, url):
                 version_compare(pkgs[name]["version"], pkg["Version"]) > 0:
             continue
         pkgs[name] = dict(version=pkg["Version"],
-                          filename="%s/%s" % (url, pkg["Filename"]))
+                          filename="%s/%s" % (url, pkg["Filename"]),
+                          sha256hash=pkg["SHA256"])
 
 def process_file(pkgs, filename):
     base = os.path.basename(filename)
@@ -51,14 +52,18 @@ def process_dir(pkgs, d):
         except ValueError:
             pass
 
-def process_pkg(name, filename):
+def process_pkg(name, pkgdict):
+    filename = pkgdict["filename"]
     print("importing %s" % filename)
+    importcmd = ["python", "importpkg.py"]
+    if "sha256hash" in pkgdict:
+        importcmd.extend(["-H", pkgdict["sha256hash"]])
     if filename.startswith("http://"):
         with open(os.path.join("tmp", name), "w") as outp:
             dl = subprocess.Popen(["curl", "-s", filename],
                                   stdout=subprocess.PIPE, close_fds=True)
-            imp = subprocess.Popen(["python", "importpkg.py"], stdin=dl.stdout,
-                                   stdout=outp, close_fds=True)
+            imp = subprocess.Popen(importcmd, stdin=dl.stdout, stdout=outp,
+                                   close_fds=True)
             if imp.wait():
                 raise ValueError("importpkg failed")
             if dl.wait():
@@ -66,8 +71,8 @@ def process_pkg(name, filename):
     else:
         with open(filename) as inp:
             with open(os.path.join("tmp", name), "w") as outp:
-                subprocess.check_call(["python", "importpkg.py"], stdin=inp,
-                                      stdout=outp, close_fds=True)
+                subprocess.check_call(importcmd, stdin=inp, stdout=outp,
+                                      close_fds=True)
     print("preprocessed %s" % name)
 
 def main():
@@ -106,7 +111,7 @@ def main():
     with e:
         fs = {}
         for name, pkg in pkgs.items():
-            fs[e.submit(process_pkg, name, pkg["filename"])] = name
+            fs[e.submit(process_pkg, name, pkg)] = name
 
         for f in concurrent.futures.as_completed(fs.keys()):
             name = fs[f]
diff --git a/dedup/hashing.py b/dedup/hashing.py
index 1283c7e..002eda8 100644
--- a/dedup/hashing.py
+++ b/dedup/hashing.py
@@ -106,3 +106,22 @@ def hash_file(hashobj, filelike, blocksize=65536):
         hashobj.update(data)
         data = filelike.read(blocksize)
     return hashobj
+
+class HashedStream(object):
+    """A file-like object, that supports sequential reading and hashes the
+    contents on the fly."""
+    def __init__(self, filelike, hashobj):
+        """
+        @param filelike: a file-like object, that must support the read method
+        @param hashobj: a hashlib-like object providing update and hexdigest
+        """
+        self.filelike = filelike
+        self.hashobj = hashobj
+
+    def read(self, length):
+        data = self.filelike.read(length)
+        self.hashobj.update(data)
+        return data
+
+    def hexdigest(self):
+        return self.hashobj.hexdigest()
diff --git a/importpkg.py b/importpkg.py
index 56e03ae..2f38f5c 100755
--- a/importpkg.py
+++ b/importpkg.py
@@ -6,6 +6,7 @@ document contains package metadata. Then a document is emitted for each file.
 And finally a document consisting of the string "commit" is emitted."""
 
 import hashlib
+import optparse
 import sys
 import tarfile
 import zlib
@@ -15,7 +16,8 @@ import lzma
 import yaml
 
 from dedup.arreader import ArReader
-from dedup.hashing import HashBlacklist, DecompressedHash, SuppressingHash, hash_file
+from dedup.hashing import HashBlacklist, DecompressedHash, SuppressingHash, \
+    HashedStream, hash_file
 from dedup.compression import GzipDecompressor, DecompressedStream
 from dedup.image import ImageHash
 
@@ -121,8 +123,28 @@ def process_package(filelike):
         yield "commit"
         break
 
+def process_package_with_hash(filelike, sha256hash):
+    hstream = HashedStream(filelike, hashlib.sha256())
+    for elem in process_package(hstream):
+        if elem == "commit":
+            while hstream.read(4096):
+                pass
+            if hstream.hexdigest() != sha256hash:
+                raise ValueError("hash sum mismatch")
+            yield elem
+            break
+        yield elem
+
 def main():
-    yaml.safe_dump_all(process_package(sys.stdin), sys.stdout)
+    parser = optparse.OptionParser()
+    parser.add_option("-H", "--hash", action="store",
+                      help="verify that stdin hash given sha256 hash")
+    options, args = parser.parse_args()
+    if options.hash:
+        gen = process_package_with_hash(sys.stdin, options.hash)
+    else:
+        gen = process_package(sys.stdin)
+    yaml.safe_dump_all(gen, sys.stdout)
 
 if __name__ == "__main__":
     main()
-- 
cgit v1.2.3


From 0c27c95a9c55b82b2c7e5e90b885c87578e895d0 Mon Sep 17 00:00:00 2001
From: Helmut Grohne <helmut@subdivi.de>
Date: Sat, 27 Jul 2013 09:32:03 +0200
Subject: move templates to dedup package

They cluttered webapp.py and now vim can give proper highlighting for
the templates.
---
 README                       |   2 +-
 base.html                    |  22 -------
 dedup/templates/base.html    |  22 +++++++
 dedup/templates/binary.html  |  31 +++++++++
 dedup/templates/compare.html |  27 ++++++++
 dedup/templates/hash.html    |  12 ++++
 dedup/templates/index.html   |  44 +++++++++++++
 dedup/templates/source.html  |  15 +++++
 webapp.py                    | 146 ++-----------------------------------------
 9 files changed, 158 insertions(+), 163 deletions(-)
 delete mode 100644 base.html
 create mode 100644 dedup/templates/base.html
 create mode 100644 dedup/templates/binary.html
 create mode 100644 dedup/templates/compare.html
 create mode 100644 dedup/templates/hash.html
 create mode 100644 dedup/templates/index.html
 create mode 100644 dedup/templates/source.html

diff --git a/README b/README
index ef0ae48..a84807a 100644
--- a/README
+++ b/README
@@ -1,7 +1,7 @@
 Required packages
 -----------------
 
-    aptitude install python python-debian python-lzma curl python-jinja2 python-werkzeug sqlite3 python-imaging python-yaml python-concurrent.futures
+    aptitude install python python-debian python-lzma curl python-jinja2 python-werkzeug sqlite3 python-imaging python-yaml python-concurrent.futures python-pkg-resources
 
 Create a database
 -----------------
diff --git a/base.html b/base.html
deleted file mode 100644
index 4e49d47..0000000
--- a/base.html
+++ /dev/null
@@ -1,22 +0,0 @@
-<!DOCTYPE html>
-<html>
-	<head>
-		<title>{% block title %}{% endblock %}</title>
-		<meta http-equiv="Content-Type" content="text/html; charset=UTF-8">
-		<link  rel="stylesheet" type="text/css" href="{{ urlroot|e }}/style.css">
-		<link rel="icon" type="image/vnd.microsoft.icon" href="{{ urlroot|e }}/favicon.ico">
-		{% block header %}{% endblock %}
-	</head>
-	<body>
-{% block content %}
-{% endblock %}
-<hr>
-<h4>Details about this service</h4>
-<ul>
-	<li>More information: <a href="http://wiki.debian.org/dedup.debian.net">see wiki</a></li>
-	<li>Maintainer: Helmut Grohne &lt;helmut@subdivi.de&gt;</li>
-	<li>Source: git://murkel.subdivi.de/~helmut/debian-dedup.git</li>
-	<li>Bugs reports / Feedback / Patches: to the maintainer</li>
-</ul>
-	</body>
-</html>
diff --git a/dedup/templates/base.html b/dedup/templates/base.html
new file mode 100644
index 0000000..4e49d47
--- /dev/null
+++ b/dedup/templates/base.html
@@ -0,0 +1,22 @@
+<!DOCTYPE html>
+<html>
+	<head>
+		<title>{% block title %}{% endblock %}</title>
+		<meta http-equiv="Content-Type" content="text/html; charset=UTF-8">
+		<link  rel="stylesheet" type="text/css" href="{{ urlroot|e }}/style.css">
+		<link rel="icon" type="image/vnd.microsoft.icon" href="{{ urlroot|e }}/favicon.ico">
+		{% block header %}{% endblock %}
+	</head>
+	<body>
+{% block content %}
+{% endblock %}
+<hr>
+<h4>Details about this service</h4>
+<ul>
+	<li>More information: <a href="http://wiki.debian.org/dedup.debian.net">see wiki</a></li>
+	<li>Maintainer: Helmut Grohne &lt;helmut@subdivi.de&gt;</li>
+	<li>Source: git://murkel.subdivi.de/~helmut/debian-dedup.git</li>
+	<li>Bugs reports / Feedback / Patches: to the maintainer</li>
+</ul>
+	</body>
+</html>
diff --git a/dedup/templates/binary.html b/dedup/templates/binary.html
new file mode 100644
index 0000000..59c910c
--- /dev/null
+++ b/dedup/templates/binary.html
@@ -0,0 +1,31 @@
+{% extends "base.html" %}
+{% block title %}duplication of {{ package|e }}{% endblock %}
+{% block content %}<h1>{{ package|e }}</h1>
+<p>Version: {{ version|e }}</p>
+<p>Architecture: {{ architecture|e }}</p>
+<p>Number of files: {{ num_files }}</p>
+<p>Total size: {{ total_size|filesizeformat }}</p>
+{%- if shared -%}
+    {%- for function, sharing in shared.items() -%}
+        <h3>sharing with respect to {{ function|e }}</h3>
+        <table border='1'><tr><th>package</th><th>files shared</th><th>data shared</th></tr>
+        {%- for entry in sharing|sort(attribute="savable", reverse=true) -%}
+            <tr><td{% if not entry.package or entry.package in dependencies %} class="dependency"{% endif %}>
+                {%- if entry.package %}<a href="{{ entry.package|e }}"><span class="binary-package">{{ entry.package|e }}</span></a>{% else %}self{% endif -%}
+                <a href="../compare/{{ package|e }}/{{ entry.package|default(package, true)|e }}">compare</a></td>
+            <td>{{ entry.duplicate }} ({{ (100 * entry.duplicate / num_files)|int }}%)</td>
+            <td>{{ entry.savable|filesizeformat }} ({{ (100 * entry.savable / total_size)|int }}%)</td></tr>
+        {%- endfor -%}
+        </table>
+    {%- endfor -%}
+<p>Note: Packages with yellow background are required to be installed when this package is installed.</p>
+{%- endif -%}
+{%- if issues -%}
+    <h3>issues with particular files</h3>
+    <table border='1'><tr><th>filename</th><th>issue</th></tr>
+    {%- for filename, issue in issues|dictsort(true) -%}
+        <tr><td><span class="filename">{{ filename|e }}</span></td><td>{{ issue|e }}</td></tr>
+    {%- endfor -%}
+    </table>
+{%- endif -%}
+{% endblock %}
diff --git a/dedup/templates/compare.html b/dedup/templates/compare.html
new file mode 100644
index 0000000..f78e80f
--- /dev/null
+++ b/dedup/templates/compare.html
@@ -0,0 +1,27 @@
+{% extends "base.html" %}
+{% block title %}sharing between {{ details1.package|e }} and {{ details2.package|e }}{% endblock%}
+{% block content %}
+<h1><a href="../../binary/{{ details1.package|e }}">{{ details1.package|e }}</a> &lt;-&gt; <a href="../../binary/{{ details2.package|e }}">{{ details2.package|e }}</a></h1>
+<p>Version of {{ details1.package|e }}: {{ details1.version|e }}</p>
+<p>Architecture of {{ details1.package|e }}: {{ details1.architecture|e }}</p>
+{%- if details1.package != details2.package -%}
+<p>Version of {{ details2.package|e }}: {{ details2.version|e }}</p>
+<p>Architecture of {{ details2.package|e }}: {{ details2.architecture|e }}</p>
+{%- endif -%}
+<table border='1'><tr><th colspan="2">{{ details1.package|e }}</th><th colspan="2">{{ details2.package|e }}</th></tr>
+<tr><th>size</th><th>filename</th><th>hash functions</th><th>filename</th></tr>
+{%- for entry in shared -%}
+    <tr><td{% if entry.matches|length > 1 %} rowspan={{ entry.matches|length }}{% endif %}>{{ entry.size|filesizeformat }}</td><td{% if entry.matches|length > 1 %} rowspan={{ entry.matches|length }}{% endif %}>
+    {%- for filename in entry.filenames %}<span class="filename">{{ filename|e }}</span>{% endfor -%}</td><td>
+    {% for filename, match in entry.matches.items() -%}
+        {% if not loop.first %}<tr><td>{% endif -%}
+        {%- for funccomb, hashvalue in match.items() -%}
+            <a href="../../hash/{{ funccomb[0]|e }}/{{ hashvalue|e }}">{{ funccomb[0]|e }}</a>
+            {%- if funccomb[0] != funccomb[1] %} -&gt; <a href="../../hash/{{ funccomb[1]|e }}/{{ hashvalue|e }}">{{ funccomb[1]|e }}</a>{% endif %}
+            {%- if not loop.last %}, {% endif %}
+        {%- endfor -%}
+        </td><td><span class="filename">{{ filename|e }}</span></td></tr>
+    {%- endfor -%}
+{%- endfor -%}
+</table>
+{% endblock %}
diff --git a/dedup/templates/hash.html b/dedup/templates/hash.html
new file mode 100644
index 0000000..7141f96
--- /dev/null
+++ b/dedup/templates/hash.html
@@ -0,0 +1,12 @@
+{% extends "base.html" %}
+{% block title %}information on {{ function|e }} hash {{ hashvalue|e }}{% endblock %}
+{% block content %}
+<h1>{{ function|e }} {{ hashvalue|e }}</h1>
+<table border='1'><tr><th>package</th><th>filename</th><th>size</th><th>different function</th></tr>
+{%- for entry in entries -%}
+    <tr><td><a href="../../binary/{{ entry.package|e }}"><span class="binary-package">{{ entry.package|e }}</span></a></td>
+    <td><span class="filename">{{ entry.filename|e }}</span></td><td>{{ entry.size|filesizeformat }}</td>
+    <td>{% if function != entry.function %}{{ entry.function|e }}{% endif %}</td></tr>
+{%- endfor -%}
+</table>
+{% endblock %}
diff --git a/dedup/templates/index.html b/dedup/templates/index.html
new file mode 100644
index 0000000..7c9000f
--- /dev/null
+++ b/dedup/templates/index.html
@@ -0,0 +1,44 @@
+{% extends "base.html" %}
+{% block title %}Debian duplication detector{% endblock %}
+{% block header %}
+    <script type="text/javascript">
+        function getLinkTarget() {
+            var pkg = document.getElementById("pkg_name").value;
+            if(pkg) {
+                return "/binary/"+pkg;
+            }
+            return '#';
+        }
+        function processData() {
+            var link = document.getElementById("perma_link");
+            link.href = getLinkTarget();
+            link.text = location.href + getLinkTarget();
+        }
+        window.onload = function() {
+            document.getElementById('pkg_name').onkeyup = processData;
+            document.getElementById("pkg_form").onsubmit = function () {
+                location.href = getLinkTarget();
+                return false;
+            }
+            processData();
+            document.getElementById("form_div").style.display = '';
+        }
+    </script>
+{% endblock %}
+{% block content %}
+<h1>Debian duplication detector</h1>
+<ul>
+<li>To inspect a particlar binary package, go to <pre>binary/&lt;packagename&gt;</pre> Example: <a href="binary/git">binary/git</a>
+    <div style="display:none" id="form_div"><fieldset>
+            <legend>Inspect package</legend>
+            <noscript><b>This form is dysfunctional when javascript is not enabled</b></noscript>
+            Enter binary package to inspect - Note: Non-existing packages will result in <b>404</b>-Errors
+            <form id="pkg_form">
+                <label for="pkg_name">Name: </label><input type="text" size="30" name="pkg_name" id="pkg_name">
+                <input type="submit" value="Go"> Permanent Link: <a id="perma_link" href="#"></a>
+            </form>
+    </fieldset></div></li>
+<li>To inspect a combination of binary packages go to <pre>compare/&lt;firstpackage&gt;/&lt;secondpackage&gt;</pre> Example: <a href="compare/git/git">compare/git/git</a></li>
+<li>To discover package shipping a particular file go to <pre>hash/sha512/&lt;hashvalue&gt;</pre> Example: <a href="hash/sha512/7633623b66b5e686bb94dd96a7cdb5a7e5ee00e87004fab416a5610d59c62badaf512a2e26e34e2455b7ed6b76690d2cd47464836d7d85d78b51d50f7e933d5c">hash/sha512/7633623b66b5e686bb94dd96a7cdb5a7e5ee00e87004fab416a5610d59c62badaf512a2e26e34e2455b7ed6b76690d2cd47464836d7d85d78b51d50f7e933d5c</a></li>
+</ul>
+{% endblock %}
diff --git a/dedup/templates/source.html b/dedup/templates/source.html
new file mode 100644
index 0000000..fc679b0
--- /dev/null
+++ b/dedup/templates/source.html
@@ -0,0 +1,15 @@
+{% extends "base.html" %}
+{% block title %}overview of {{ source|e }}{% endblock %}
+{% block content %}
+<h1>overview of {{ source|e }}</h1>
+<table border='1'><tr><th>binary from {{ source|e }}</th><th>savable</th><th>other package</th></tr>
+{%- for package, sharing in packages.items() -%}
+    <tr><td><a href="../binary/{{ package|e }}"><span class="binary-package">{{ package|e }}</span></a></td><td>
+    {%- if sharing -%}
+        {{ sharing.savable|filesizeformat }}</td><td><a href="../binary/{{ sharing.package|e }}"><span class="binary-package">{{ sharing.package|e }}</span></a> <a href="../compare/{{ package|e }}/{{ sharing.package|e }}">compare</a>
+    {%- else -%}</td><td>{%- endif -%}
+    </td></tr>
+{%- endfor -%}
+</table>
+<p>Note: Not all sharing listed here. Click on binary packages with non-zero savable to see more.</p>
+{% endblock %}
diff --git a/webapp.py b/webapp.py
index c080d41..e180087 100755
--- a/webapp.py
+++ b/webapp.py
@@ -20,7 +20,7 @@ hash_functions = [
         ("sha512", "gzip_sha512"),
         ("gzip_sha512", "sha512")]
 
-jinjaenv = jinja2.Environment(loader=jinja2.FileSystemLoader("."))
+jinjaenv = jinja2.Environment(loader=jinja2.PackageLoader("dedup", "templates"))
 
 def format_size(size):
     size = float(size)
@@ -45,145 +45,11 @@ def function_combination(function1, function2):
 jinjaenv.filters["filesizeformat"] = format_size
 
 base_template = jinjaenv.get_template("base.html")
-
-package_template = jinjaenv.from_string(
-"""{% extends "base.html" %}
-{% block title %}duplication of {{ package|e }}{% endblock %}
-{% block content %}<h1>{{ package|e }}</h1>
-<p>Version: {{ version|e }}</p>
-<p>Architecture: {{ architecture|e }}</p>
-<p>Number of files: {{ num_files }}</p>
-<p>Total size: {{ total_size|filesizeformat }}</p>
-{%- if shared -%}
-    {%- for function, sharing in shared.items() -%}
-        <h3>sharing with respect to {{ function|e }}</h3>
-        <table border='1'><tr><th>package</th><th>files shared</th><th>data shared</th></tr>
-        {%- for entry in sharing|sort(attribute="savable", reverse=true) -%}
-            <tr><td{% if not entry.package or entry.package in dependencies %} class="dependency"{% endif %}>
-                {%- if entry.package %}<a href="{{ entry.package|e }}"><span class="binary-package">{{ entry.package|e }}</span></a>{% else %}self{% endif %}
-                <a href="../compare/{{ package|e }}/{{ entry.package|default(package, true)|e }}">compare</a></td>
-            <td>{{ entry.duplicate }} ({{ (100 * entry.duplicate / num_files)|int }}%)</td>
-            <td>{{ entry.savable|filesizeformat }} ({{ (100 * entry.savable / total_size)|int }}%)</td></tr>
-        {%- endfor -%}
-        </table>
-    {%- endfor -%}
-<p>Note: Packages with yellow background are required to be installed when this package is installed.</p>
-{%- endif -%}
-{%- if issues -%}
-    <h3>issues with particular files</h3>
-    <table border='1'><tr><th>filename</th><th>issue</th></tr>
-    {%- for filename, issue in issues|dictsort(true) -%}
-        <tr><td><span class="filename">{{ filename|e }}</span></td><td>{{ issue|e }}</td></tr>
-    {%- endfor -%}
-    </table>
-{%- endif -%}
-{% endblock %}""")
-
-detail_template = jinjaenv.from_string(
-"""{% extends "base.html" %}
-{% block title %}sharing between {{ details1.package|e }} and {{ details2.package|e }}{% endblock%}
-{% block content %}
-<h1><a href="../../binary/{{ details1.package|e }}">{{ details1.package|e }}</a> &lt;-&gt; <a href="../../binary/{{ details2.package|e }}">{{ details2.package|e }}</a></h1>
-<p>Version of {{ details1.package|e }}: {{ details1.version|e }}</p>
-<p>Architecture of {{ details1.package|e }}: {{ details1.architecture|e }}</p>
-{%- if details1.package != details2.package -%}
-<p>Version of {{ details2.package|e }}: {{ details2.version|e }}</p>
-<p>Architecture of {{ details2.package|e }}: {{ details2.architecture|e }}</p>
-{%- endif -%}
-<table border='1'><tr><th colspan="2">{{ details1.package|e }}</th><th colspan="2">{{ details2.package|e }}</th></tr>
-<tr><th>size</th><th>filename</th><th>hash functions</th><th>filename</th></tr>
-{%- for entry in shared -%}
-    <tr><td{% if entry.matches|length > 1 %} rowspan={{ entry.matches|length }}{% endif %}>{{ entry.size|filesizeformat }}</td><td{% if entry.matches|length > 1 %} rowspan={{ entry.matches|length }}{% endif %}>
-    {%- for filename in entry.filenames %}<span class="filename">{{ filename|e }}</span>{% endfor -%}</td><td>
-    {% for filename, match in entry.matches.items() -%}
-        {% if not loop.first %}<tr><td>{% endif -%}
-        {%- for funccomb, hashvalue in match.items() -%}
-            <a href="../../hash/{{ funccomb[0]|e }}/{{ hashvalue|e }}">{{ funccomb[0]|e }}</a>
-            {%- if funccomb[0] != funccomb[1] %} -&gt; <a href="../../hash/{{ funccomb[1]|e }}/{{ hashvalue|e }}">{{ funccomb[1]|e }}</a>{% endif %}
-            {%- if not loop.last %}, {% endif %}
-        {%- endfor -%}
-        </td><td><span class="filename">{{ filename|e }}</span></td></tr>
-    {%- endfor -%}
-{%- endfor -%}
-</table>
-{% endblock %}""")
-
-hash_template = jinjaenv.from_string(
-"""{% extends "base.html" %}
-{% block title %}information on {{ function|e }} hash {{ hashvalue|e }}{% endblock %}
-{% block content %}
-<h1>{{ function|e }} {{ hashvalue|e }}</h1>
-<table border='1'><tr><th>package</th><th>filename</th><th>size</th><th>different function</th></tr>
-{%- for entry in entries -%}
-    <tr><td><a href="../../binary/{{ entry.package|e }}"><span class="binary-package">{{ entry.package|e }}</span></a></td>
-    <td><span class="filename">{{ entry.filename|e }}</span></td><td>{{ entry.size|filesizeformat }}</td>
-    <td>{% if function != entry.function %}{{ entry.function|e }}{% endif %}</td></tr>
-{%- endfor -%}
-</table>
-{% endblock %}""")
-
-index_template = jinjaenv.from_string(
-"""{% extends "base.html" %}
-{% block title %}Debian duplication detector{% endblock %}
-{% block header %}
-    <script type="text/javascript">
-        function getLinkTarget() {
-            var pkg = document.getElementById("pkg_name").value;
-            if(pkg) {
-                return "/binary/"+pkg;
-            }
-            return '#';
-        }
-        function processData() {
-            var link = document.getElementById("perma_link");
-            link.href = getLinkTarget();
-            link.text = location.href + getLinkTarget();
-        }
-        window.onload = function() {
-            document.getElementById('pkg_name').onkeyup = processData;
-            document.getElementById("pkg_form").onsubmit = function () {
-                location.href = getLinkTarget();
-                return false;
-            }
-            processData();
-            document.getElementById("form_div").style.display = '';
-        }
-    </script>
-{% endblock %}
-{% block content %}
-<h1>Debian duplication detector</h1>
-<ul>
-<li>To inspect a particlar binary package, go to <pre>binary/&lt;packagename&gt;</pre> Example: <a href="binary/git">binary/git</a>
-    <div style="display:none" id="form_div"><fieldset>
-            <legend>Inspect package</legend>
-            <noscript><b>This form is disfunctional when javascript is not enabled</b></noscript>
-            Enter binary package to inspect - Note: Non-existing packages will result in <b>404</b>-Errors
-            <form id="pkg_form">
-                <label for="pkg_name">Name: </label><input type="text" size="30" name="pkg_name" id="pkg_name">
-                <input type="submit" value="Go"> Permanent Link: <a id="perma_link" href="#"></a>
-            </form>
-    </fieldset></div></li>
-<li>To inspect a combination of binary packages go to <pre>compare/&lt;firstpackage&gt;/&lt;secondpackage&gt;</pre> Example: <a href="compare/git/git">compare/git/git</a></li>
-<li>To discover package shipping a particular file go to <pre>hash/sha512/&lt;hashvalue&gt;</pre> Example: <a href="hash/sha512/7633623b66b5e686bb94dd96a7cdb5a7e5ee00e87004fab416a5610d59c62badaf512a2e26e34e2455b7ed6b76690d2cd47464836d7d85d78b51d50f7e933d5c">hash/sha512/7633623b66b5e686bb94dd96a7cdb5a7e5ee00e87004fab416a5610d59c62badaf512a2e26e34e2455b7ed6b76690d2cd47464836d7d85d78b51d50f7e933d5c</a></li>
-</ul>
-{% endblock %}""")
-
-source_template = jinjaenv.from_string(
-"""{% extends "base.html" %}
-{% block title %}overview of {{ source|e }}{% endblock %}
-{% block content %}
-<h1>overview of {{ source|e }}</h1>
-<table border='1'><tr><th>binary from {{ source|e }}</th><th>savable</th><th>other package</th></tr>
-{% for package, sharing in packages.items() %}
-    <tr><td><a href="../binary/{{ package|e }}"><span class="binary-package">{{ package|e }}</span></a></td><td>
-    {%- if sharing -%}
-        {{ sharing.savable|filesizeformat }}</td><td><a href="../binary/{{ sharing.package|e }}"><span class="binary-package">{{ sharing.package|e }}</span></a> <a href="../compare/{{ package|e }}/{{ sharing.package|e }}">compare</a>
-    {%- else -%}</td><td>{%- endif -%}
-    </td></tr>
-{% endfor %}
-</table>
-<p>Note: Not all sharing listed here. Click on binary packages with non-zero savable to see more.</p>
-{% endblock %}""")
+package_template = jinjaenv.get_template("binary.html")
+detail_template = jinjaenv.get_template("compare.html")
+hash_template = jinjaenv.get_template("hash.html")
+index_template = jinjaenv.get_template("index.html")
+source_template = jinjaenv.get_template("source.html")
 
 def encode_and_buffer(iterator):
     buff = b""
-- 
cgit v1.2.3


From e88f5f74cebed92c42543ce0682a8a49075d859b Mon Sep 17 00:00:00 2001
From: Helmut Grohne <helmut@subdivi.de>
Date: Sat, 27 Jul 2013 09:39:14 +0200
Subject: also move the static directory into the dedup package

---
 dedup/static/favicon.ico | Bin 0 -> 4286 bytes
 dedup/static/style.css   |  12 ++++++++++++
 static/favicon.ico       | Bin 4286 -> 0 bytes
 static/style.css         |  12 ------------
 webapp.py                |   4 +---
 5 files changed, 13 insertions(+), 15 deletions(-)
 create mode 100644 dedup/static/favicon.ico
 create mode 100644 dedup/static/style.css
 delete mode 100644 static/favicon.ico
 delete mode 100644 static/style.css

diff --git a/dedup/static/favicon.ico b/dedup/static/favicon.ico
new file mode 100644
index 0000000..5039835
Binary files /dev/null and b/dedup/static/favicon.ico differ
diff --git a/dedup/static/style.css b/dedup/static/style.css
new file mode 100644
index 0000000..531ef9d
--- /dev/null
+++ b/dedup/static/style.css
@@ -0,0 +1,12 @@
+.dependency {
+	background-color: yellow;
+}
+td {
+	vertical-align: top;
+}
+.filename {
+	display: block;
+}
+.filename:hover {
+	background-color: #eee;
+}
diff --git a/static/favicon.ico b/static/favicon.ico
deleted file mode 100644
index 5039835..0000000
Binary files a/static/favicon.ico and /dev/null differ
diff --git a/static/style.css b/static/style.css
deleted file mode 100644
index 531ef9d..0000000
--- a/static/style.css
+++ /dev/null
@@ -1,12 +0,0 @@
-.dependency {
-	background-color: yellow;
-}
-td {
-	vertical-align: top;
-}
-.filename {
-	display: block;
-}
-.filename:hover {
-	background-color: #eee;
-}
diff --git a/webapp.py b/webapp.py
index e180087..6c6f5b4 100755
--- a/webapp.py
+++ b/webapp.py
@@ -1,7 +1,6 @@
 #!/usr/bin/python
 
 import datetime
-import os.path
 import sqlite3
 from wsgiref.simple_server import make_server
 
@@ -244,8 +243,7 @@ class Application(object):
 
 def main():
     app = Application(sqlite3.connect("test.sqlite3"))
-    staticdir = os.path.join(os.path.dirname(__file__), "static")
-    app = SharedDataMiddleware(app, {"/": staticdir})
+    app = SharedDataMiddleware(app, {"/": ("dedup", "static")})
     make_server("0.0.0.0", 8800, app).serve_forever()
 
 if __name__ == "__main__":
-- 
cgit v1.2.3


From 11c6897331d4df7704217d6718c0ad57dc567529 Mon Sep 17 00:00:00 2001
From: Helmut Grohne <helmut@subdivi.de>
Date: Mon, 29 Jul 2013 21:44:56 +0200
Subject: importpkg.py: support uncompressed data.tar

---
 importpkg.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/importpkg.py b/importpkg.py
index 2f38f5c..02d4936 100755
--- a/importpkg.py
+++ b/importpkg.py
@@ -109,6 +109,8 @@ def process_package(filelike):
         elif name == "data.tar.xz":
             zf = DecompressedStream(af, lzma.LZMADecompressor())
             tf = tarfile.open(fileobj=zf, mode="r|")
+        elif name == "data.tar":
+            tf = tarfile.open(fileobj=af, mode="r|")
         else:
             continue
         if state != "control_file":
-- 
cgit v1.2.3


From 5f8c7c8bd0b7c9f9e2c70adf966369910733ba2e Mon Sep 17 00:00:00 2001
From: Helmut Grohne <helmut@subdivi.de>
Date: Tue, 30 Jul 2013 15:52:22 +0200
Subject: fix update_sharing to work after functionid merge

---
 update_sharing.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/update_sharing.py b/update_sharing.py
index 4669759..910662e 100755
--- a/update_sharing.py
+++ b/update_sharing.py
@@ -55,8 +55,8 @@ def main():
         cur.executemany("INSERT OR IGNORE INTO duplicate (cid) VALUES (?);",
                         [(row[1],) for row in rows])
         process_pkgdict(cur, pkgdict)
-    cur.execute("INSERT INTO issue (cid, issue) SELECT content.id, 'file named something.gz is not a valid gzip file' FROM content WHERE content.filename LIKE '%.gz' AND NOT EXISTS (SELECT 1 FROM hash WHERE hash.cid = content.id AND hash.function = 'gzip_sha512');")
-    cur.execute("INSERT INTO issue (cid, issue) SELECT content.id, 'png image not named something.png' FROM content JOIN hash ON content.id = hash.cid WHERE function = 'image_sha512' AND lower(filename) NOT LIKE '%.png';")
+    cur.execute("INSERT INTO issue (cid, issue) SELECT content.id, 'file named something.gz is not a valid gzip file' FROM content WHERE content.filename LIKE '%.gz' AND NOT EXISTS (SELECT 1 FROM hash JOIN function ON hash.fid = function.id WHERE hash.cid = content.id AND function.name = 'gzip_sha512');")
+    cur.execute("INSERT INTO issue (cid, issue) SELECT content.id, 'png image not named something.png' FROM content JOIN hash ON content.id = hash.cid JOIN function ON hash.fid = function.id WHERE function.name = 'image_sha512' AND lower(filename) NOT LIKE '%.png';")
     db.commit()
 
 if __name__ == "__main__":
-- 
cgit v1.2.3


From 6cbe2522ad85f51a8c2dbbdde0c8779ecb14ea45 Mon Sep 17 00:00:00 2001
From: Helmut Grohne <helmut@subdivi.de>
Date: Tue, 30 Jul 2013 16:03:16 +0200
Subject: templates: wiki.d.o redirects to https now

---
 dedup/templates/base.html | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/dedup/templates/base.html b/dedup/templates/base.html
index 4e49d47..62f4087 100644
--- a/dedup/templates/base.html
+++ b/dedup/templates/base.html
@@ -13,7 +13,7 @@
 <hr>
 <h4>Details about this service</h4>
 <ul>
-	<li>More information: <a href="http://wiki.debian.org/dedup.debian.net">see wiki</a></li>
+	<li>More information: <a href="https://wiki.debian.org/dedup.debian.net">see wiki</a></li>
 	<li>Maintainer: Helmut Grohne &lt;helmut@subdivi.de&gt;</li>
 	<li>Source: git://murkel.subdivi.de/~helmut/debian-dedup.git</li>
 	<li>Bugs reports / Feedback / Patches: to the maintainer</li>
-- 
cgit v1.2.3


From d3f68ad766b1c33867c2c504b0f5e6d9bb7cbf03 Mon Sep 17 00:00:00 2001
From: Helmut Grohne <helmut@subdivi.de>
Date: Tue, 30 Jul 2013 18:15:56 +0200
Subject: templates/binary: space between package and compare

---
 dedup/templates/binary.html | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/dedup/templates/binary.html b/dedup/templates/binary.html
index 59c910c..69eceef 100644
--- a/dedup/templates/binary.html
+++ b/dedup/templates/binary.html
@@ -11,7 +11,7 @@
         <table border='1'><tr><th>package</th><th>files shared</th><th>data shared</th></tr>
         {%- for entry in sharing|sort(attribute="savable", reverse=true) -%}
             <tr><td{% if not entry.package or entry.package in dependencies %} class="dependency"{% endif %}>
-                {%- if entry.package %}<a href="{{ entry.package|e }}"><span class="binary-package">{{ entry.package|e }}</span></a>{% else %}self{% endif -%}
+                {%- if entry.package %}<a href="{{ entry.package|e }}"><span class="binary-package">{{ entry.package|e }}</span></a>{% else %}self{% endif %}
                 <a href="../compare/{{ package|e }}/{{ entry.package|default(package, true)|e }}">compare</a></td>
             <td>{{ entry.duplicate }} ({{ (100 * entry.duplicate / num_files)|int }}%)</td>
             <td>{{ entry.savable|filesizeformat }} ({{ (100 * entry.savable / total_size)|int }}%)</td></tr>
-- 
cgit v1.2.3


From 2712edb550968ce7ec8cd9800241d7944666631a Mon Sep 17 00:00:00 2001
From: Helmut Grohne <helmut@subdivi.de>
Date: Thu, 1 Aug 2013 23:06:26 +0200
Subject: support hashing gif images

 * Rename "image_sha512" to "png_sha512".
 * dedup.image.ImageHash is now a base class for image hashes such as
   PNGHash and GIFHash.
 * Enable both hashes in importpkg.
 * Fix README.
 * Add new hash combinations to webapp.
 * Add "gif file not named *.gif" to issues in update_sharing.
 * Add redirect for "image_sha512" to webapp for backwards
   compatibility.
---
 README            |  2 +-
 dedup/image.py    | 67 +++++++++++++++++++++++++++++++++++++------------------
 importpkg.py      | 17 +++++++++-----
 schema.sql        |  2 +-
 update_sharing.py |  3 ++-
 webapp.py         | 10 ++++++++-
 6 files changed, 70 insertions(+), 31 deletions(-)

diff --git a/README b/README
index a84807a..bf4da52 100644
--- a/README
+++ b/README
@@ -47,7 +47,7 @@ one copy in the archive.
 
 Finding PNG images that do not carry a .png file extension.
 
-    SELECT package.name, content.filename, content.size FROM content JOIN hash ON content.id = hash.cid JOIN package ON content.pid = package.id JOIN function ON hash.fid = function.id WHERE function.name = "image_sha512" AND lower(filename) NOT LIKE "%.png";
+    SELECT package.name, content.filename, content.size FROM content JOIN hash ON content.id = hash.cid JOIN package ON content.pid = package.id JOIN function ON hash.fid = function.id WHERE function.name = "png_sha512" AND lower(filename) NOT LIKE "%.png";
 
 Finding .gz files which either are not gziped or contain errors.
 
diff --git a/dedup/image.py b/dedup/image.py
index 1148890..c1f2de0 100644
--- a/dedup/image.py
+++ b/dedup/image.py
@@ -4,9 +4,10 @@ import struct
 import PIL.Image
 
 class ImageHash(object):
-    """A hash on the contents of an image. This disregards mode, depth and meta
-    information. Note that due to limitations in PIL and the image format
-    (interlacing) the full contents are stored and decoded in hexdigest."""
+    """A hash on the contents of an image datat type supported by PIL. This
+    disregards mode, depth and meta information. Note that due to limitations
+    in PIL and the image format (interlacing) the full contents are stored and
+    decoded in hexdigest."""
     maxsize = 1024 * 1024 * 32
     # max memory usage is about 5 * maxpixels in bytes
     maxpixels = 1024 * 1024 * 32
@@ -19,33 +20,25 @@ class ImageHash(object):
         self.imagedetected = False
         self.content = io.BytesIO()
 
+    def detect(self):
+        raise NotImplementedError
+
     def update(self, data):
         self.content.write(data)
         if self.content.tell() > self.maxsize:
             raise ValueError("maximum image size exceeded")
-        if self.imagedetected:
-            return
-        if self.content.tell() < 33: # header + IHDR
-            return
-        curvalue = self.content.getvalue()
-        if curvalue.startswith(b"\x89PNG\r\n\x1a\n\0\0\0\x0dIHDR"):
-            width, height = struct.unpack(">II", curvalue[16:24])
-            if width * height > self.maxpixels:
-                raise ValueError("maximum image pixels exceeded")
-            self.imagedetected = True
-            return
-        raise ValueError("not a png image")
+        if not self.imagedetected:
+            self.imagedetected = self.detect()
 
     def copy(self):
-        new = ImageHash()
-        new.hashobj = self.hashobj.copy()
+        new = self.__class__(self.hashobj.copy())
         new.imagedetected = self.imagedetected
         new.content = io.BytesIO(self.content.getvalue())
         return new
 
     def hexdigest(self):
         if not self.imagedetected:
-            raise ValueError("not a png image")
+            raise ValueError("not a image")
         hashobj = self.hashobj.copy()
         pos = self.content.tell()
         try:
@@ -53,7 +46,7 @@ class ImageHash(object):
             try:
                 img = PIL.Image.open(self.content)
             except IOError:
-                raise ValueError("broken png header")
+                raise ValueError("broken header")
             width, height = img.size
             pack = lambda elem: struct.pack("BBBB", *elem)
             # special casing easy modes reduces memory usage
@@ -64,13 +57,43 @@ class ImageHash(object):
             elif img.mode != "RGBA":
                 try:
                     img = img.convert("RGBA")
-                except (SyntaxError, IndexError, IOError): # crazy stuff from PIL
-                    raise ValueError("error reading png image")
+                except (SyntaxError, IndexError, IOError):
+                    # crazy stuff from PIL
+                    raise ValueError("error reading image")
             try:
                 for elem in img.getdata():
                     hashobj.update(pack(elem))
             except (SyntaxError, IndexError, IOError): # crazy stuff from PIL
-                raise ValueError("error reading png image")
+                raise ValueError("error reading image")
         finally:
             self.content.seek(pos)
         return "%s%8.8x%8.8x" % (hashobj.hexdigest(), width, height)
+
+
+class PNGHash(ImageHash):
+    """A hash on the contents of a PNG image."""
+
+    def detect(self):
+        if self.content.tell() < 33: # header + IHDR
+            return False
+        curvalue = self.content.getvalue()
+        if curvalue.startswith(b"\x89PNG\r\n\x1a\n\0\0\0\x0dIHDR"):
+            width, height = struct.unpack(">II", curvalue[16:24])
+            if width * height > self.maxpixels:
+                raise ValueError("maximum image pixels exceeded")
+            return True
+        raise ValueError("not a png image")
+
+class GIFHash(ImageHash):
+    """A hash on the contents of the first frame of a GIF image."""
+
+    def detect(self):
+        if self.content.tell() < 10: # magic + logical dimension
+            return False
+        curvalue = self.content.getvalue()
+        if curvalue.startswith((b"GIF87a", "GIF89a")):
+            width, height = struct.unpack("<HH", curvalue[6:10])
+            if width * height > self.maxpixels:
+                raise ValueError("maximum image pixels exceeded")
+            return True
+        raise ValueError("not a png image")
diff --git a/importpkg.py b/importpkg.py
index 02d4936..182ca01 100755
--- a/importpkg.py
+++ b/importpkg.py
@@ -19,7 +19,7 @@ from dedup.arreader import ArReader
 from dedup.hashing import HashBlacklist, DecompressedHash, SuppressingHash, \
     HashedStream, hash_file
 from dedup.compression import GzipDecompressor, DecompressedStream
-from dedup.image import ImageHash
+from dedup.image import GIFHash, PNGHash
 
 class MultiHash(object):
     def __init__(self, *hashes):
@@ -44,17 +44,24 @@ def gziphash():
     hashobj.name = "gzip_sha512"
     return HashBlacklist(hashobj, boring_sha512_hashes)
 
-def imagehash():
-    hashobj = ImageHash(hashlib.sha512())
+def pnghash():
+    hashobj = PNGHash(hashlib.sha512())
     hashobj = SuppressingHash(hashobj, (ValueError,))
-    hashobj.name = "image_sha512"
+    hashobj.name = "png_sha512"
+    return hashobj
+
+def gifhash():
+    hashobj = GIFHash(hashlib.sha512())
+    hashobj = SuppressingHash(hashobj, (ValueError,))
+    hashobj.name = "gif_sha512"
     return hashobj
 
 def get_hashes(tar):
     for elem in tar:
         if not elem.isreg(): # excludes hard links as well
             continue
-        hasher = MultiHash(sha512_nontrivial(), gziphash(), imagehash())
+        hasher = MultiHash(sha512_nontrivial(), gziphash(), pnghash(),
+                           gifhash())
         hasher = hash_file(hasher, tar.extractfile(elem))
         hashes = {}
         for hashobj in hasher.hashes:
diff --git a/schema.sql b/schema.sql
index 13a65aa..ddc6ccd 100644
--- a/schema.sql
+++ b/schema.sql
@@ -1,7 +1,7 @@
 CREATE TABLE package (id INTEGER PRIMARY KEY, name TEXT UNIQUE, version TEXT, architecture TEXT, source TEXT);
 CREATE TABLE content (id INTEGER PRIMARY KEY, pid INTEGER, filename TEXT, size INTEGER, FOREIGN KEY (pid) REFERENCES package(id) ON DELETE CASCADE);
 CREATE TABLE function (id INTEGER PRIMARY KEY, name TEXT UNIQUE NOT NULL);
-INSERT INTO function (name) VALUES ("sha512"), ("gzip_sha512"), ("image_sha512");
+INSERT INTO function (name) VALUES ("sha512"), ("gzip_sha512"), ("png_sha512"), ("gif_sha512");
 CREATE TABLE hash (cid INTEGER, fid INTEGER NOT NULL, hash TEXT, FOREIGN KEY (cid) REFERENCES content(id) ON DELETE CASCADE, FOREIGN KEY (fid) REFERENCES function(id));
 CREATE TABLE dependency (pid INTEGER, required TEXT, FOREIGN KEY (pid) REFERENCES package(id) ON DELETE CASCADE);
 CREATE INDEX content_package_size_index ON content (pid, size);
diff --git a/update_sharing.py b/update_sharing.py
index 910662e..5ec6c7b 100755
--- a/update_sharing.py
+++ b/update_sharing.py
@@ -56,7 +56,8 @@ def main():
                         [(row[1],) for row in rows])
         process_pkgdict(cur, pkgdict)
     cur.execute("INSERT INTO issue (cid, issue) SELECT content.id, 'file named something.gz is not a valid gzip file' FROM content WHERE content.filename LIKE '%.gz' AND NOT EXISTS (SELECT 1 FROM hash JOIN function ON hash.fid = function.id WHERE hash.cid = content.id AND function.name = 'gzip_sha512');")
-    cur.execute("INSERT INTO issue (cid, issue) SELECT content.id, 'png image not named something.png' FROM content JOIN hash ON content.id = hash.cid JOIN function ON hash.fid = function.id WHERE function.name = 'image_sha512' AND lower(filename) NOT LIKE '%.png';")
+    cur.execute("INSERT INTO issue (cid, issue) SELECT content.id, 'png image not named something.png' FROM content JOIN hash ON content.id = hash.cid JOIN function ON hash.fid = function.id WHERE function.name = 'png_sha512' AND lower(filename) NOT LIKE '%.png';")
+    cur.execute("INSERT INTO issue (cid, issue) SELECT content.id, 'gif image not named something.gif' FROM content JOIN hash ON content.id = hash.cid JOIN function ON hash.fid = function.id WHERE function.name = 'gif_sha512' AND lower(filename) NOT LIKE '%.gif';")
     db.commit()
 
 if __name__ == "__main__":
diff --git a/webapp.py b/webapp.py
index 6c6f5b4..260268a 100755
--- a/webapp.py
+++ b/webapp.py
@@ -14,7 +14,10 @@ from dedup.utils import fetchiter
 
 hash_functions = [
         ("sha512", "sha512"),
-        ("image_sha512", "image_sha512"),
+        ("png_sha512", "png_sha512"),
+        ("png_sha512", "gif_sha512"),
+        ("gif_sha512", "png_sha512"),
+        ("gif_sha512", "gif_sha512"),
         ("gzip_sha512", "gzip_sha512"),
         ("sha512", "gzip_sha512"),
         ("gzip_sha512", "sha512")]
@@ -87,6 +90,11 @@ class Application(object):
             elif endpoint == "detail":
                 return self.show_detail(args["package1"], args["package2"])
             elif endpoint == "hash":
+                if args["function"] == "image_sha512":
+                    # backwards compatibility
+                    raise RequestRedirect("%s/hash/png_sha512/%s" %
+                                          (request.environ["SCRIPT_NAME"],
+                                           args["hashvalue"]))
                 return self.show_hash(args["function"], args["hashvalue"])
             elif endpoint == "index":
                 if not request.environ["PATH_INFO"]:
-- 
cgit v1.2.3