summaryrefslogtreecommitdiff
path: root/pyfuzzy.pyx
diff options
context:
space:
mode:
authorHelmut Grohne <helmut@subdivi.de>2013-03-25 13:00:48 +0100
committerHelmut Grohne <helmut@subdivi.de>2013-03-25 13:00:48 +0100
commita8671a9c2ebce7d958fb1cd26a0fab7969d6902b (patch)
tree970128189ac9073f4769f50cb24c795964434b0a /pyfuzzy.pyx
parentb7dad638d2eaa4d02ac8fbbdefa540a9473d6f80 (diff)
downloadssdeep-a8671a9c2ebce7d958fb1cd26a0fab7969d6902b.tar.gz
implement variants of the hashes
FUZZY_FLAG_ELIMSEQ: The comparison operation runs eliminate_sequence before actually comparing two hashes on both of them. This step can be moved to hash generation time using this flag. Suggested by Niels Thykier. FUZZY_FLAG_NOTRUNC: The second part of the hash is truncated to SPAMSUM_LENGTH/2 by default. When comparing two hashes with different blocksize this can result in a larger edit distance and therefore false negatives.
Diffstat (limited to 'pyfuzzy.pyx')
-rw-r--r--pyfuzzy.pyx11
1 files changed, 8 insertions, 3 deletions
diff --git a/pyfuzzy.pyx b/pyfuzzy.pyx
index 2a18786..e9d870e 100644
--- a/pyfuzzy.pyx
+++ b/pyfuzzy.pyx
@@ -21,10 +21,13 @@ import os
cdef extern from "fuzzy.h":
struct fuzzy_state
cdef enum:
+ FUZZY_FLAG_ELIMSEQ
+ FUZZY_FLAG_NOTRUNC
FUZZY_MAX_RESULT
cdef extern fuzzy_state *fuzzy_new() nogil
cdef extern int fuzzy_update(fuzzy_state *, unsigned char *, size_t) nogil
- cdef extern int fuzzy_digest(fuzzy_state *, char *) nogil
+ cdef extern int fuzzy_digest(fuzzy_state *, char *,
+ unsigned int flags) nogil
cdef extern void fuzzy_free(fuzzy_state *) nogil
class FuzzyError(Exception):
@@ -56,11 +59,13 @@ cdef class FuzzyHash:
self.state = NULL
raise FuzzyError(libc.errno.errno)
- def digest(self):
+ def digest(self, elimseq=False, notrunc=False):
if self.state == NULL:
raise FuzzyError(libc.errno.EINVAL)
cdef char result[FUZZY_MAX_RESULT]
- if fuzzy_digest(self.state, result) != 0:
+ flags = (FUZZY_FLAG_ELIMSEQ if elimseq else 0) | \
+ (FUZZY_FLAG_NOTRUNC if notrunc else 0)
+ if fuzzy_digest(self.state, result, flags) != 0:
raise FuzzyError(libc.errno.errno)
return str(result)