diff options
author | Helmut Grohne <helmut@subdivi.de> | 2013-03-25 13:00:48 +0100 |
---|---|---|
committer | Helmut Grohne <helmut@subdivi.de> | 2013-03-25 13:00:48 +0100 |
commit | a8671a9c2ebce7d958fb1cd26a0fab7969d6902b (patch) | |
tree | 970128189ac9073f4769f50cb24c795964434b0a /fuzzy.h | |
parent | b7dad638d2eaa4d02ac8fbbdefa540a9473d6f80 (diff) | |
download | ssdeep-a8671a9c2ebce7d958fb1cd26a0fab7969d6902b.tar.gz |
implement variants of the hashes
FUZZY_FLAG_ELIMSEQ: The comparison operation runs eliminate_sequence
before actually comparing two hashes on both of them. This step can
be moved to hash generation time using this flag. Suggested by Niels
Thykier.
FUZZY_FLAG_NOTRUNC: The second part of the hash is truncated to
SPAMSUM_LENGTH/2 by default. When comparing two hashes with
different blocksize this can result in a larger edit distance and
therefore false negatives.
Diffstat (limited to 'fuzzy.h')
-rw-r--r-- | fuzzy.h | 17 |
1 files changed, 15 insertions, 2 deletions
@@ -30,6 +30,17 @@ extern "C" { #ifndef FUZZY_H #define FUZZY_H +/** + * @brief fuzzy_digest flag indicating to eliminate sequences of more than + * three identical characters + */ +#define FUZZY_FLAG_ELIMSEQ 0x1u +/** + * @brief fuzzy_digest flag indicating not to truncate the second part to + * SPAMSUM_LENGTH/2 characters. + */ +#define FUZZY_FLAG_NOTRUNC 0x2u + struct fuzzy_state; /** @@ -60,10 +71,12 @@ extern int fuzzy_update(struct fuzzy_state *state, const unsigned char *buffer, * concatenation of the data previously fed using fuzzy_update. * @param result Where the fuzzy hash is stored. This variable * must be allocated to hold at least FUZZY_MAX_RESULT bytes. + * @param flags is a bitwise or of FUZZY_FLAG_* macros. The absence of flags is + * represented by a zero. * @return zero on success, non-zero on error */ extern int fuzzy_digest(const struct fuzzy_state *state, - /*@out@*/ char *result); + /*@out@*/ char *result, unsigned int flags); /** * @brief Dispose a fuzzy state. @@ -136,7 +149,7 @@ extern int fuzzy_hash_filename(const char *filename, /*@out@*/ char * result); /** The longest possible length for a fuzzy hash signature * (without the filename) */ -#define FUZZY_MAX_RESULT (SPAMSUM_LENGTH + (SPAMSUM_LENGTH/2 + 20)) +#define FUZZY_MAX_RESULT (2 * SPAMSUM_LENGTH + 20) #ifdef __cplusplus } |