3 files changed, 105 insertions, 112 deletions
diff --git a/README b/README
index a5ce9d7..d4c950c 100644
--- a/README
+++ b/README
@@ -1,7 +1,7 @@
 Required packages
 -----------------
 
-    aptitude install python python-debian python-lzma curl python-jinja2 python-werkzeug sqlite3 python-imaging python-yaml python-concurrent.futures
+    aptitude install python python-debian python-lzma curl python-jinja2 python-werkzeug sqlite3 python-imaging python-yaml python-concurrent.futures python-sqlalchemy
 
 Create a database
 -----------------
diff --git a/update_sharing.py b/update_sharing.py
index d2b357b..bbd19e5 100755
--- a/update_sharing.py
+++ b/update_sharing.py
@@ -1,16 +1,16 @@
 #!/usr/bin/python
 
-import sqlite3
+import sqlalchemy
 
 from dedup.utils import fetchiter
 
-def add_values(cursor, insert_key, files, size):
-    cursor.execute("UPDATE sharing SET files = files + ?, size = size + ? WHERE package1 = ? AND package2 = ? AND func1 = ? AND func2 = ?;",
-                   (files, size) + insert_key)
-    if cursor.rowcount > 0:
+def add_values(conn, insert_key, files, size):
+    rows = conn.execute("UPDATE sharing SET files = files + ?, size = size + ? WHERE package1 = ? AND package2 = ? AND func1 = ? AND func2 = ?;",
+                        (files, size) + insert_key)
+    if rows.rowcount > 0:
         return
-    cursor.execute("INSERT INTO sharing (package1, package2, func1, func2, files, size) VALUES (?, ?, ?, ?, ?, ?);",
-                   insert_key + (files, size))
+    conn.execute("INSERT INTO sharing (package1, package2, func1, func2, files, size) VALUES (?, ?, ?, ?, ?, ?);",
+                 insert_key + (files, size))
 
 def compute_pkgdict(rows):
     pkgdict = dict()
@@ -19,7 +19,7 @@ def compute_pkgdict(rows):
         funcdict.setdefault(function, []).append((size, filename))
     return pkgdict
 
-def process_pkgdict(cursor, pkgdict):
+def process_pkgdict(conn, pkgdict):
     for package1, funcdict1 in pkgdict.items():
         for function1, files in funcdict1.items():
             numfiles = len(files)
@@ -35,26 +35,23 @@ def process_pkgdict(cursor, pkgdict):
                     pkgsize = size
                 for function2 in funcdict2.keys():
                     insert_key = (package1, package2, function1, function2)
-                    add_values(cursor, insert_key, pkgnumfiles, pkgsize)
+                    add_values(conn, insert_key, pkgnumfiles, pkgsize)
 
 def main():
-    db = sqlite3.connect("test.sqlite3")
-    cur = db.cursor()
-    cur.execute("PRAGMA foreign_keys = ON;")
-    cur.execute("DELETE FROM sharing;")
-    cur.execute("DELETE FROM duplicate;")
-    readcur = db.cursor()
-    readcur.execute("SELECT hash FROM hash GROUP BY hash HAVING count(*) > 1;")
-    for hashvalue, in fetchiter(readcur):
-        cur.execute("SELECT content.package, content.id, content.filename, content.size, hash.function FROM hash JOIN content ON hash.cid = content.id WHERE hash = ?;",
-                    (hashvalue,))
-        rows = cur.fetchall()
-        print("processing hash %s with %d entries" % (hashvalue, len(rows)))
-        pkgdict = compute_pkgdict(rows)
-        cur.executemany("INSERT OR IGNORE INTO duplicate (cid) VALUES (?);",
-                        [(row[1],) for row in rows])
-        process_pkgdict(cur, pkgdict)
-    db.commit()
+    db = sqlalchemy.create_engine("sqlite:///test.sqlite3")
+    with db.begin() as conn:
+        conn.execute("PRAGMA foreign_keys = ON;")
+        conn.execute("DELETE FROM sharing;")
+        conn.execute("DELETE FROM duplicate;")
+        readcur = conn.execute("SELECT hash FROM hash GROUP BY hash HAVING count(*) > 1;")
+        for hashvalue, in fetchiter(readcur):
+            rows = conn.execute("SELECT content.package, content.id, content.filename, content.size, hash.function FROM hash JOIN content ON hash.cid = content.id WHERE hash = ?;",
+                                (hashvalue,)).fetchall()
+            print("processing hash %s with %d entries" % (hashvalue, len(rows)))
+            pkgdict = compute_pkgdict(rows)
+            conn.execute("INSERT OR IGNORE INTO duplicate (cid) VALUES (?);",
+                         *[(row[1],) for row in rows])
+            process_pkgdict(conn, pkgdict)
 
 if __name__ == "__main__":
     main()
diff --git a/webapp.py b/webapp.py
index 86d14f0..fdf8704 100755
--- a/webapp.py
+++ b/webapp.py
@@ -2,10 +2,10 @@
 
 import datetime
 import os.path
-import sqlite3
 from wsgiref.simple_server import make_server
 
 import jinja2
+import sqlalchemy
 from werkzeug.exceptions import HTTPException, NotFound
 from werkzeug.routing import Map, Rule, RequestRedirect
 from werkzeug.wrappers import Request, Response
@@ -247,43 +247,41 @@ class Application(object):
             return e
 
     def get_details(self, package):
-        cur = self.db.cursor()
-        cur.execute("SELECT version, architecture FROM package WHERE package = ?;",
-                    (package,))
-        row = cur.fetchone()
-        if not row:
-            raise NotFound()
-        version, architecture = row
-        details = dict(package=package,
-                       version=version,
-                       architecture=architecture)
-        cur.execute("SELECT count(filename), sum(size) FROM content WHERE package = ?;",
-                    (package,))
-        num_files, total_size = cur.fetchone()
-        if total_size is None:
-            total_size = 0
-        details.update(dict(num_files=num_files, total_size=total_size))
-        return details
+        with self.db.begin() as conn:
+            row = conn.execute("SELECT version, architecture FROM package WHERE package = ?;",
+                               (package,)).fetchone()
+            if not row:
+                raise NotFound()
+            version, architecture = row
+            row = conn.execute("SELECT count(filename), sum(size) FROM content WHERE package = ?;",
+                               (package,)).fetchone()
+            num_files, total_size = row
+        return dict(package=package,
+                    version=version,
+                    architecture=architecture,
+                    num_files=num_files,
+                    total_size=total_size and 0)  # total_size may be None
 
     def get_dependencies(self, package):
-        cur = self.db.cursor()
-        cur.execute("SELECT required FROM dependency WHERE package = ?;",
-                    (package,))
-        return set(row[0] for row in fetchiter(cur))
+        with self.db.begin() as conn:
+            cur = conn.execute("SELECT required FROM dependency WHERE package = ?;",
+                               (package,))
+            return set(row[0] for row in fetchiter(cur))
 
     def cached_sharedstats(self, package):
-        cur = self.db.cursor()
         sharedstats = {}
-        cur.execute("SELECT package2, func1, func2, files, size FROM sharing WHERE package1 = ?;",
-                    (package,))
-        for package2, func1, func2, files, size in fetchiter(cur):
-            if (func1, func2) not in hash_functions:
-                continue
-            curstats = sharedstats.setdefault(
-                    function_combination(func1, func2), list())
-            if package2 == package:
-                package2 = None
-            curstats.append(dict(package=package2, duplicate=files, savable=size))
+        with self.db.begin() as conn:
+            cur = conn.execute("SELECT package2, func1, func2, files, size FROM sharing WHERE package1 = ?;",
+                               (package,))
+            for package2, func1, func2, files, size in fetchiter(cur):
+                if (func1, func2) not in hash_functions:
+                    continue
+                curstats = sharedstats.setdefault(
+                        function_combination(func1, func2), list())
+                if package2 == package:
+                    package2 = None
+                curstats.append(dict(package=package2, duplicate=files,
+                                     savable=size))
         return sharedstats
 
     def show_package(self, package):
@@ -303,35 +301,32 @@ class Application(object):
          * matches: A mapping from filenames in package2 to a mapping from
            hash function pairs to hash values.
         """
-        cur = self.db.cursor()
-        cur.execute("SELECT id, filename, size, hash FROM content JOIN hash ON content.id = hash.cid JOIN duplicate ON content.id = duplicate.cid WHERE package = ? AND function = 'sha512' ORDER BY size DESC;",
-                    (package1,))
-        cursize = -1
-        files = dict()
-        minmatch = 2 if package1 == package2 else 1
-        for cid, filename, size, hashvalue in fetchiter(cur):
-            if cursize != size:
-                for entry in files.values():
-                    if len(entry["matches"]) >= minmatch:
-                        yield entry
-                files.clear()
-                cursize = size
-
-            if hashvalue in files:
-                files[hashvalue]["filenames"].add(filename)
-                continue
-
-            entry = dict(filenames=set((filename,)), size=size, matches={})
-            files[hashvalue] = entry
-
-            cur2 = self.db.cursor()
-            cur2.execute("SELECT ha.function, ha.hash, hb.function, filename FROM hash AS ha JOIN hash AS hb ON ha.hash = hb.hash JOIN content ON hb.cid = content.id WHERE ha.cid = ? AND package = ?;",
-                         (cid, package2))
-            for func1, hashvalue, func2, filename in fetchiter(cur2):
-                entry["matches"].setdefault(filename, {})[func1, func2] = \
-                        hashvalue
-            cur2.close()
-        cur.close()
+        with self.db.begin() as conn:
+            cur = conn.execute("SELECT id, filename, size, hash FROM content JOIN hash ON content.id = hash.cid JOIN duplicate ON content.id = duplicate.cid WHERE package = ? AND function = 'sha512' ORDER BY size DESC;",
+                               (package1,))
+            cursize = -1
+            files = dict()
+            minmatch = 2 if package1 == package2 else 1
+            for cid, filename, size, hashvalue in fetchiter(cur):
+                if cursize != size:
+                    for entry in files.values():
+                        if len(entry["matches"]) >= minmatch:
+                            yield entry
+                    files.clear()
+                    cursize = size
+
+                if hashvalue in files:
+                    files[hashvalue]["filenames"].add(filename)
+                    continue
+
+                entry = dict(filenames=set((filename,)), size=size, matches={})
+                files[hashvalue] = entry
+
+                cur = conn.execute("SELECT ha.function, ha.hash, hb.function, filename FROM hash AS ha JOIN hash AS hb ON ha.hash = hb.hash JOIN content ON hb.cid = content.id WHERE ha.cid = ? AND package = ?;",
+                                   (cid, package2))
+                for func1, hashvalue, func2, filename in fetchiter(cur):
+                    entry["matches"].setdefault(filename, {})[func1, func2] = \
+                            hashvalue
 
         for entry in files.values():
             if len(entry["matches"]) >= minmatch:
@@ -351,13 +346,13 @@ class Application(object):
         return html_response(detail_template.stream(params))
 
     def show_hash(self, function, hashvalue):
-        cur = self.db.cursor()
-        cur.execute("SELECT content.package, content.filename, content.size, hash.function FROM content JOIN hash ON content.id = hash.cid WHERE hash = ?;",
-                    (hashvalue,))
-        entries = [dict(package=package, filename=filename, size=size,
-                        function=otherfunc)
-                   for package, filename, size, otherfunc in fetchiter(cur)
-                   if (function, otherfunc) in hash_functions]
+        with self.db.begin() as conn:
+            cur = conn.execute("SELECT content.package, content.filename, content.size, hash.function FROM content JOIN hash ON content.id = hash.cid WHERE hash = ?;",
+                               (hashvalue,))
+            entries = [dict(package=package, filename=filename, size=size,
+                            function=otherfunc)
+                       for package, filename, size, otherfunc in fetchiter(cur)
+                       if (function, otherfunc) in hash_functions]
         if not entries:
             raise NotFound()
         params = dict(function=function, hashvalue=hashvalue, entries=entries,
@@ -365,26 +360,27 @@ class Application(object):
         return html_response(hash_template.render(params))
 
     def show_source(self, package):
-        cur = self.db.cursor()
-        cur.execute("SELECT package FROM package WHERE source = ?;",
-                    (package,))
-        binpkgs = dict.fromkeys(pkg for pkg, in fetchiter(cur))
-        if not binpkgs:
-            raise NotFound
-        cur.execute("SELECT package.package, sharing.package2, sharing.func1, sharing.func2, sharing.files, sharing.size FROM package JOIN sharing ON package.package = sharing.package1 WHERE package.source = ?;",
-                    (package,))
-        for binary, otherbin, func1, func2, files, size in fetchiter(cur):
-            entry = dict(package=otherbin,
-                         funccomb=function_combination(func1, func2),
-                         duplicate=files, savable=size)
-            oldentry = binpkgs.get(binary)
-            if not (oldentry and oldentry["savable"] >= size):
-                binpkgs[binary] = entry
+        with self.db.begin() as conn:
+            cur = conn.execute("SELECT package FROM package WHERE source = ?;",
+                               (package,))
+            binpkgs = dict.fromkeys(pkg for pkg, in fetchiter(cur))
+            if not binpkgs:
+                raise NotFound
+            cur = conn.execute("SELECT package.package, sharing.package2, sharing.func1, sharing.func2, sharing.files, sharing.size FROM package JOIN sharing ON package.package = sharing.package1 WHERE package.source = ?;",
+                               (package,))
+            for binary, otherbin, func1, func2, files, size in fetchiter(cur):
+                entry = dict(package=otherbin,
+                             funccomb=function_combination(func1, func2),
+                             duplicate=files, savable=size)
+                oldentry = binpkgs.get(binary)
+                if not (oldentry and oldentry["savable"] >= size):
+                    binpkgs[binary] = entry
         params = dict(source=package, packages=binpkgs, urlroot="..")
         return html_response(source_template.render(params))
 
 def main():
-    app = Application(sqlite3.connect("test.sqlite3"))
+    db = sqlalchemy.create_engine("sqlite:///test.sqlite3")
+    app = Application(db)
     staticdir = os.path.join(os.path.dirname(__file__), "static")
     app = SharedDataMiddleware(app, {"/": staticdir})
     make_server("0.0.0.0", 8800, app).serve_forever()