diff options
-rw-r--r-- | README | 2 | ||||
-rwxr-xr-x | importpkg.py | 23 |
2 files changed, 22 insertions, 3 deletions
@@ -3,6 +3,8 @@ Required packages aptitude install python python-debian python-lzma curl python-jinja2 python-werkzeug sqlite3 python-imaging + Optional: https://pypi.python.org/pypi/ssdeep + Create a database ----------------- The database name is currently hardcoded as `test.sqlite3`. So copy the SQL diff --git a/importpkg.py b/importpkg.py index d63b85e..84cdad1 100755 --- a/importpkg.py +++ b/importpkg.py @@ -11,7 +11,12 @@ from debian.debian_support import version_compare from debian import deb822 import lzma -from dedup.hashing import HashBlacklist, DecompressedHash, SuppressingHash, hash_file +try: + import ssdeep +except ImportError: + ssdeep = None + +from dedup.hashing import HashBlacklist, DecompressedHash, SuppressingHash, StoredHash, hash_file from dedup.compression import GzipDecompressor, DecompressedStream from dedup.image import ImageHash @@ -96,12 +101,24 @@ def imagehash(): hashobj.name = "image_sha512" return hashobj +if ssdeep is None: + def multihash(): + return MultiHash(sha512_nontrivial(), gziphash(), imagehash()) +else: + def ssdeephash(): + hashobj = StoredHash(lambda bytesio: ssdeep.hash(bytesio.getvalue())) + hashobj.name = "ssdeep" + return hashobj + + def multihash(): + return MultiHash(sha512_nontrivial(), gziphash(), imagehash(), + ssdeephash()) + def get_hashes(tar): for elem in tar: if not elem.isreg(): # excludes hard links as well continue - hasher = MultiHash(sha512_nontrivial(), gziphash(), imagehash()) - hasher = hash_file(hasher, tar.extractfile(elem)) + hasher = hash_file(multihash(), tar.extractfile(elem)) for hashobj in hasher.hashes: hashvalue = hashobj.hexdigest() if hashvalue: |