diff options
-rw-r--r-- | dedup/hashing.py | 2 | ||||
-rw-r--r-- | dedup/image.py | 2 | ||||
-rw-r--r-- | dedup/templates/index.html | 2 | ||||
-rwxr-xr-x | importpkg.py | 4 | ||||
-rwxr-xr-x | webapp.py | 4 |
5 files changed, 7 insertions, 7 deletions
diff --git a/dedup/hashing.py b/dedup/hashing.py index 70f6268..a8a46c7 100644 --- a/dedup/hashing.py +++ b/dedup/hashing.py @@ -115,7 +115,7 @@ class DecompressedHash(object): class SuppressingHash(object): """A hash that silences exceptions from the update and hexdigest methods of - a hashlib-like object. If an exception has occured, hexdigest always + a hashlib-like object. If an exception has occurred, hexdigest always returns None.""" def __init__(self, hashobj, exceptions=()): """ diff --git a/dedup/image.py b/dedup/image.py index c1f2de0..ef17989 100644 --- a/dedup/image.py +++ b/dedup/image.py @@ -4,7 +4,7 @@ import struct import PIL.Image class ImageHash(object): - """A hash on the contents of an image datat type supported by PIL. This + """A hash on the contents of an image data type supported by PIL. This disregards mode, depth and meta information. Note that due to limitations in PIL and the image format (interlacing) the full contents are stored and decoded in hexdigest.""" diff --git a/dedup/templates/index.html b/dedup/templates/index.html index 7c9000f..169027e 100644 --- a/dedup/templates/index.html +++ b/dedup/templates/index.html @@ -28,7 +28,7 @@ {% block content %} <h1>Debian duplication detector</h1> <ul> -<li>To inspect a particlar binary package, go to <pre>binary/<packagename></pre> Example: <a href="binary/git">binary/git</a> +<li>To inspect a particular binary package, go to <pre>binary/<packagename></pre> Example: <a href="binary/git">binary/git</a> <div style="display:none" id="form_div"><fieldset> <legend>Inspect package</legend> <noscript><b>This form is dysfunctional when javascript is not enabled</b></noscript> diff --git a/importpkg.py b/importpkg.py index cb16f97..aeccda5 100755 --- a/importpkg.py +++ b/importpkg.py @@ -1,7 +1,7 @@ #!/usr/bin/python -"""This tool reads a debian package from stdin and emits a yaml stream on +"""This tool reads a Debian package from stdin and emits a yaml stream on stdout. It does not access a database. Therefore it can be run in parallel and -on multiple machines. The generated yaml conatins multiple documents. The first +on multiple machines. The generated yaml contains multiple documents. The first document contains package metadata. Then a document is emitted for each file. And finally a document consisting of the string "commit" is emitted.""" @@ -151,7 +151,7 @@ class Application(object): return html_response(package_template.render(params)) def compute_comparison(self, pid1, pid2): - """Compute a sequence of comparison objects ordery by the size of the + """Compute a sequence of comparison objects ordered by the size of the object in the first package. Each element of the sequence is a dict defining the following keys: * filenames: A set of filenames in package 1 (pid1) all referring to @@ -182,7 +182,7 @@ class Application(object): entry = dict(filenames=set((filename,)), size=size, matches={}) files[hashvalue] = entry - cur2.execute("SELECT fa.name, ha.hash, fb.name, filename FROM hash AS ha JOIN hash AS hb ON ha.hash = hb.hash JOIN content ON hb.cid = content.id JOIN function AS fa ON ha.fid = fa.id JOIN function AS fb ON hb.fid = fb.id WHERE ha.cid = ? AND pid = ?;", + cur2.execute("SELECT fa.name, ha.hash, fb.name, filename FROM hash AS ha JOIN hash AS hb ON ha.hash = hb.hash JOIN content ON hb.cid = content.id JOIN function AS fa ON ha.fid = fa.id JOIN function AS fb ON hb.fid = fb.id WHERE ha.cid = ? AND pid = ? AND fa.eqclass = fb.eqclass;", (cid, pid2)) for func1, hashvalue, func2, filename in fetchiter(cur2): entry["matches"].setdefault(filename, {})[func1, func2] = \ |