diff options
author | Helmut Grohne <helmut@subdivi.de> | 2013-07-26 15:04:02 +0200 |
---|---|---|
committer | Helmut Grohne <helmut@subdivi.de> | 2013-07-26 15:04:02 +0200 |
commit | dc378a18d50142baceaef4c2a416cb5a40f84861 (patch) | |
tree | 975967733a6f7f726618843df11acf766537f9e0 /README | |
parent | 9b653583711c59d96c45af43ff8ee9534500adb6 (diff) | |
parent | 32f406706c0a2a21b11656e5c56ff203e0ee3799 (diff) | |
download | debian-dedup-dc378a18d50142baceaef4c2a416cb5a40f84861.tar.gz |
Merge branch functionid
Actual savings on the full data set are around 7%.
Conflicts:
README
Diffstat (limited to 'README')
-rw-r--r-- | README | 6 |
1 files changed, 3 insertions, 3 deletions
@@ -43,12 +43,12 @@ Finding the 100 largest files shared with multiple packages. Finding those top 100 files that save most space when being reduced to only one copy in the archive. - SELECT hash, sum(size)-min(size), count(*), count(distinct pid) FROM content JOIN hash ON content.id = hash.cid WHERE hash.function = "sha512" GROUP BY hash ORDER BY sum(size)-min(size) DESC LIMIT 100; + SELECT hash, sum(size)-min(size), count(*), count(distinct pid) FROM content JOIN hash ON content.id = hash.cid JOIN function ON hash.fid = function.id WHERE function.name = "sha512" GROUP BY hash ORDER BY sum(size)-min(size) DESC LIMIT 100; Finding PNG images that do not carry a .png file extension. - SELECT package.name, content.filename, content.size FROM content JOIN hash ON content.id = hash.cid JOIN package ON content.pid = package.id WHERE function = "image_sha512" AND lower(filename) NOT LIKE "%.png"; + SELECT package.name, content.filename, content.size FROM content JOIN hash ON content.id = hash.cid JOIN package ON content.pid = package.id JOIN function ON hash.fid = function.id WHERE function.name = "image_sha512" AND lower(filename) NOT LIKE "%.png"; Finding .gz files which either are not gziped or contain errors. - SELECT package.name, content.filename FROM content JOIN package ON content.pid = package.id WHERE filename LIKE "%.gz" AND (SELECT count(*) FROM hash WHERE hash.cid = content.id AND hash.function = "gzip_sha512") = 0; + SELECT package.name, content.filename FROM content JOIN package ON content.pid = package.id WHERE filename LIKE "%.gz" AND (SELECT count(*) FROM hash JOIN function ON hash.fid = function.id WHERE hash.cid = content.id AND function.name = "gzip_sha512") = 0; |