diff options
author | Helmut Grohne <helmut@subdivi.de> | 2013-03-08 16:33:37 +0100 |
---|---|---|
committer | Helmut Grohne <helmut@subdivi.de> | 2013-03-08 16:33:37 +0100 |
commit | d017cd116e3666a8a0e22cd2c0b40af45514fa0c (patch) | |
tree | c5f2dd65ae5edc4413fa32a0d6b4084090036ecd | |
parent | 0e690a1f5e32d1e16ad27dd96cb43b78d5d36fb4 (diff) | |
download | debian-dedup-d017cd116e3666a8a0e22cd2c0b40af45514fa0c.tar.gz |
importpkg: support ssdeep hash
-rw-r--r-- | README | 2 | ||||
-rwxr-xr-x | importpkg.py | 23 |
2 files changed, 22 insertions, 3 deletions
@@ -3,6 +3,8 @@ Required packages aptitude install python python-debian python-lzma curl python-jinja2 python-werkzeug sqlite3 python-imaging + Optional: https://pypi.python.org/pypi/ssdeep + Create a database ----------------- The database name is currently hardcoded as `test.sqlite3`. So copy the SQL diff --git a/importpkg.py b/importpkg.py index d63b85e..84cdad1 100755 --- a/importpkg.py +++ b/importpkg.py @@ -11,7 +11,12 @@ from debian.debian_support import version_compare from debian import deb822 import lzma -from dedup.hashing import HashBlacklist, DecompressedHash, SuppressingHash, hash_file +try: + import ssdeep +except ImportError: + ssdeep = None + +from dedup.hashing import HashBlacklist, DecompressedHash, SuppressingHash, StoredHash, hash_file from dedup.compression import GzipDecompressor, DecompressedStream from dedup.image import ImageHash @@ -96,12 +101,24 @@ def imagehash(): hashobj.name = "image_sha512" return hashobj +if ssdeep is None: + def multihash(): + return MultiHash(sha512_nontrivial(), gziphash(), imagehash()) +else: + def ssdeephash(): + hashobj = StoredHash(lambda bytesio: ssdeep.hash(bytesio.getvalue())) + hashobj.name = "ssdeep" + return hashobj + + def multihash(): + return MultiHash(sha512_nontrivial(), gziphash(), imagehash(), + ssdeephash()) + def get_hashes(tar): for elem in tar: if not elem.isreg(): # excludes hard links as well continue - hasher = MultiHash(sha512_nontrivial(), gziphash(), imagehash()) - hasher = hash_file(hasher, tar.extractfile(elem)) + hasher = hash_file(multihash(), tar.extractfile(elem)) for hashobj in hasher.hashes: hashvalue = hashobj.hexdigest() if hashvalue: |