From d017cd116e3666a8a0e22cd2c0b40af45514fa0c Mon Sep 17 00:00:00 2001 From: Helmut Grohne Date: Fri, 8 Mar 2013 16:33:37 +0100 Subject: importpkg: support ssdeep hash --- README | 2 ++ importpkg.py | 23 ++++++++++++++++++++--- 2 files changed, 22 insertions(+), 3 deletions(-) diff --git a/README b/README index bc1b715..4e2083c 100644 --- a/README +++ b/README @@ -3,6 +3,8 @@ Required packages aptitude install python python-debian python-lzma curl python-jinja2 python-werkzeug sqlite3 python-imaging + Optional: https://pypi.python.org/pypi/ssdeep + Create a database ----------------- The database name is currently hardcoded as `test.sqlite3`. So copy the SQL diff --git a/importpkg.py b/importpkg.py index d63b85e..84cdad1 100755 --- a/importpkg.py +++ b/importpkg.py @@ -11,7 +11,12 @@ from debian.debian_support import version_compare from debian import deb822 import lzma -from dedup.hashing import HashBlacklist, DecompressedHash, SuppressingHash, hash_file +try: + import ssdeep +except ImportError: + ssdeep = None + +from dedup.hashing import HashBlacklist, DecompressedHash, SuppressingHash, StoredHash, hash_file from dedup.compression import GzipDecompressor, DecompressedStream from dedup.image import ImageHash @@ -96,12 +101,24 @@ def imagehash(): hashobj.name = "image_sha512" return hashobj +if ssdeep is None: + def multihash(): + return MultiHash(sha512_nontrivial(), gziphash(), imagehash()) +else: + def ssdeephash(): + hashobj = StoredHash(lambda bytesio: ssdeep.hash(bytesio.getvalue())) + hashobj.name = "ssdeep" + return hashobj + + def multihash(): + return MultiHash(sha512_nontrivial(), gziphash(), imagehash(), + ssdeephash()) + def get_hashes(tar): for elem in tar: if not elem.isreg(): # excludes hard links as well continue - hasher = MultiHash(sha512_nontrivial(), gziphash(), imagehash()) - hasher = hash_file(hasher, tar.extractfile(elem)) + hasher = hash_file(multihash(), tar.extractfile(elem)) for hashobj in hasher.hashes: hashvalue = hashobj.hexdigest() if hashvalue: -- cgit v1.2.3