Merge branch master into sqlalchemy

This makes the sqlalchemy branch schema-compatible with master again. The biggest change on master was the introduction of the function table. It caused most of the conflicts. Note that webapp had one conflict not detected by git: The selecting of issues in show_package needed sqlalchemy conversion. Conflicts: README update_sharing.py webapp.py
author: Helmut Grohne <helmut@subdivi.de> 2013-08-02 08:40:49 +0200
committer: Helmut Grohne <helmut@subdivi.de> 2013-08-02 08:40:49 +0200
commit: cb3708825bf7ea32314040575cef35980dad0cd8 (patch)
tree: 31575a8525dc90ba6904268d94f47e1604bf0557 /README
parent: a4bbbb6e664e605634cb3f9e0564c7e4a93697be (diff)
parent: 2712edb550968ce7ec8cd9800241d7944666631a (diff)
download: debian-dedup-cb3708825bf7ea32314040575cef35980dad0cd8.tar.gz
1 files changed, 5 insertions, 5 deletions
diff --git a/README b/README
index c3ffcb3..768d204 100644
--- a/README
+++ b/README
@@ -1,7 +1,7 @@
 Required packages
 -----------------
 
-    aptitude install python python-debian python-lzma curl python-jinja2 python-werkzeug sqlite3 python-imaging python-yaml python-concurrent.futures python-sqlalchemy
+    aptitude install python python-debian python-lzma curl python-jinja2 python-werkzeug sqlite3 python-imaging python-yaml python-concurrent.futures python-pkg-resources python-sqlalchemy
 
 Create a database
 -----------------
@@ -38,17 +38,17 @@ SQL database by hand. Here are some example queries.
 
 Finding the 100 largest files shared with multiple packages.
 
-    SELECT pa.name, a.filename, pb.name, b.filename, a.size FROM content AS a JOIN hash AS ha ON a.id = ha.cid JOIN hash AS hb ON ha.hash = hb.hash JOIN content AS b ON b.id = hb.cid JOIN package AS pa ON b.pid = pa.id JOIN package AS pb ON b.pid = pb.id WHERE (a.pid != b.pid OR a.filename != b.filename) ORDER BY a.size DESC LIMIT 100;
+    SELECT pa.name, a.filename, pb.name, b.filename, a.size FROM content AS a JOIN hash AS ha ON a.id = ha.cid JOIN hash AS hb ON ha.hash = hb.hash JOIN content AS b ON b.id = hb.cid JOIN package AS pa ON a.pid = pa.id JOIN package AS pb ON b.pid = pb.id WHERE (a.pid != b.pid OR a.filename != b.filename) ORDER BY a.size DESC LIMIT 100;
 
 Finding those top 100 files that save most space when being reduced to only
 one copy in the archive.
 
-    SELECT hash, sum(size)-min(size), count(*), count(distinct pid) FROM content JOIN hash ON content.id = hash.cid WHERE hash.function = "sha512" GROUP BY hash ORDER BY sum(size)-min(size) DESC LIMIT 100;
+    SELECT hash, sum(size)-min(size), count(*), count(distinct pid) FROM content JOIN hash ON content.id = hash.cid JOIN function ON hash.fid = function.id WHERE function.name = "sha512" GROUP BY hash ORDER BY sum(size)-min(size) DESC LIMIT 100;
 
 Finding PNG images that do not carry a .png file extension.
 
-    SELECT package.name, content.filename, content.size FROM content JOIN hash ON content.id = hash.cid JOIN package ON content.pid = package.id WHERE function = "image_sha512" AND filename NOT LIKE "%.png";
+    SELECT package.name, content.filename, content.size FROM content JOIN hash ON content.id = hash.cid JOIN package ON content.pid = package.id JOIN function ON hash.fid = function.id WHERE function.name = "png_sha512" AND lower(filename) NOT LIKE "%.png";
 
 Finding .gz files which either are not gziped or contain errors.
 
-    SELECT package.name, content.filename FROM content JOIN package ON content.pid = package.id WHERE filename LIKE "%.gz" AND (SELECT count(*) FROM hash WHERE hash.cid = content.id AND hash.function = "gzip_sha512") = 0;
+    SELECT package.name, content.filename FROM content JOIN package ON content.pid = package.id WHERE filename LIKE "%.gz" AND (SELECT count(*) FROM hash JOIN function ON hash.fid = function.id WHERE hash.cid = content.id AND function.name = "gzip_sha512") = 0;
author	Helmut Grohne <helmut@subdivi.de>	2013-08-02 08:40:49 +0200
committer	Helmut Grohne <helmut@subdivi.de>	2013-08-02 08:40:49 +0200
commit	cb3708825bf7ea32314040575cef35980dad0cd8 (patch)
tree	31575a8525dc90ba6904268d94f47e1604bf0557 /README
parent	a4bbbb6e664e605634cb3f9e0564c7e4a93697be (diff)
parent	2712edb550968ce7ec8cd9800241d7944666631a (diff)
download	debian-dedup-cb3708825bf7ea32314040575cef35980dad0cd8.tar.gz