summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--fuzzy.c60
1 files changed, 39 insertions, 21 deletions
diff --git a/fuzzy.c b/fuzzy.c
index 040038e..ed1e212 100644
--- a/fuzzy.c
+++ b/fuzzy.c
@@ -100,12 +100,25 @@ struct blockhash_context {
uint32_t h, halfh;
char digest[SPAMSUM_LENGTH];
unsigned int dlen;
- /*@null@*/ struct blockhash_context *next;
+ /*@null@*/ /*@only@*/ struct blockhash_context *next;
};
+static int blockhash_fork(struct blockhash_context *bh) {
+ struct blockhash_context *nbh;
+ if(NULL == (nbh = malloc(sizeof(struct blockhash_context))))
+ return -1;
+ nbh->h = bh->h;
+ nbh->halfh = bh->halfh;
+ nbh->dlen = 0;
+ nbh->next = NULL;
+ assert(NULL == bh->next);
+ bh->next = nbh;
+ return 0;
+}
+
struct ssdeep_context {
unsigned int start_blocksize;
- struct blockhash_context *blockhashes;
+ /*@only@*/ struct blockhash_context *blockhashes;
size_t total_size;
struct roll_state roll;
};
@@ -124,6 +137,25 @@ static int ssdeep_init(/*@out@*/ struct ssdeep_context *self) {
return 0;
}
+static void ssdeep_try_reduce_blockhash(struct ssdeep_context *self) {
+ struct blockhash_context *bh;
+ if(NULL == (bh = self->blockhashes->next))
+ /* Cannot remove last hash. */
+ return;
+ if((size_t)self->start_blocksize * SPAMSUM_LENGTH >= self->total_size)
+ /* Initial blocksize estimate would select this or a smaller
+ * blocksize. */
+ return;
+ if(bh->dlen < SPAMSUM_LENGTH / 2)
+ /* Estimate adjustment would select this blocksize. */
+ return;
+ /* At this point we are clearly no longer interested in the
+ * start_blocksize. Get rid of it. */
+ self->start_blocksize *= 2;
+ free(self->blockhashes);
+ self->blockhashes = bh;
+}
+
static const char *b64 =
"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
@@ -157,17 +189,10 @@ static int ssdeep_engine_step(struct ssdeep_context *self, unsigned char c) {
/* We have hit a reset point. We now emit hashes which are
* based on all characters in the piece of the message between
* the last reset point and this one */
- if(unlikely(0 == bh->dlen)) { /* Can only happen 30 times. */
+ if(unlikely(0 == bh->dlen)) /* Can only happen 30 times. */
/* First step for this blocksize. Clone next. */
- assert(NULL == bh->next);
- bh->next = malloc(sizeof(
- struct blockhash_context));
- if(NULL == bh->next)
+ if(blockhash_fork(bh) < 0)
return -1;
- bh->next->h = bh->h;
- bh->next->dlen = 0;
- bh->next->next = NULL;
- }
if(bh->dlen < SPAMSUM_LENGTH - 1) {
/* We can have a problem with the tail overflowing. The
* easiest way to cope with this is to only reset the
@@ -179,17 +204,10 @@ static int ssdeep_engine_step(struct ssdeep_context *self, unsigned char c) {
bh->h = HASH_INIT;
if(bh->dlen < SPAMSUM_LENGTH / 2)
bh->halfh = HASH_INIT;
- } else if(NULL != bh->next &&
- (size_t)self->start_blocksize * SPAMSUM_LENGTH <
- self->total_size &&
- bh->next->dlen >= SPAMSUM_LENGTH / 2 &&
- bh == self->blockhashes) {
- /* Operating on the currently smallest blocksize and
- * the next blocksize is already large enough. */
- self->start_blocksize *= 2;
+ } else {
+ /* The reduction might free bh. */
bh = bh->next;
- free(self->blockhashes);
- self->blockhashes = bh;
+ ssdeep_try_reduce_blockhash(self);
continue;
}
bh = bh->next;