diff options
-rw-r--r-- | fuzzy.c | 60 |
1 files changed, 39 insertions, 21 deletions
@@ -100,12 +100,25 @@ struct blockhash_context { uint32_t h, halfh; char digest[SPAMSUM_LENGTH]; unsigned int dlen; - /*@null@*/ struct blockhash_context *next; + /*@null@*/ /*@only@*/ struct blockhash_context *next; }; +static int blockhash_fork(struct blockhash_context *bh) { + struct blockhash_context *nbh; + if(NULL == (nbh = malloc(sizeof(struct blockhash_context)))) + return -1; + nbh->h = bh->h; + nbh->halfh = bh->halfh; + nbh->dlen = 0; + nbh->next = NULL; + assert(NULL == bh->next); + bh->next = nbh; + return 0; +} + struct ssdeep_context { unsigned int start_blocksize; - struct blockhash_context *blockhashes; + /*@only@*/ struct blockhash_context *blockhashes; size_t total_size; struct roll_state roll; }; @@ -124,6 +137,25 @@ static int ssdeep_init(/*@out@*/ struct ssdeep_context *self) { return 0; } +static void ssdeep_try_reduce_blockhash(struct ssdeep_context *self) { + struct blockhash_context *bh; + if(NULL == (bh = self->blockhashes->next)) + /* Cannot remove last hash. */ + return; + if((size_t)self->start_blocksize * SPAMSUM_LENGTH >= self->total_size) + /* Initial blocksize estimate would select this or a smaller + * blocksize. */ + return; + if(bh->dlen < SPAMSUM_LENGTH / 2) + /* Estimate adjustment would select this blocksize. */ + return; + /* At this point we are clearly no longer interested in the + * start_blocksize. Get rid of it. */ + self->start_blocksize *= 2; + free(self->blockhashes); + self->blockhashes = bh; +} + static const char *b64 = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"; @@ -157,17 +189,10 @@ static int ssdeep_engine_step(struct ssdeep_context *self, unsigned char c) { /* We have hit a reset point. We now emit hashes which are * based on all characters in the piece of the message between * the last reset point and this one */ - if(unlikely(0 == bh->dlen)) { /* Can only happen 30 times. */ + if(unlikely(0 == bh->dlen)) /* Can only happen 30 times. */ /* First step for this blocksize. Clone next. */ - assert(NULL == bh->next); - bh->next = malloc(sizeof( - struct blockhash_context)); - if(NULL == bh->next) + if(blockhash_fork(bh) < 0) return -1; - bh->next->h = bh->h; - bh->next->dlen = 0; - bh->next->next = NULL; - } if(bh->dlen < SPAMSUM_LENGTH - 1) { /* We can have a problem with the tail overflowing. The * easiest way to cope with this is to only reset the @@ -179,17 +204,10 @@ static int ssdeep_engine_step(struct ssdeep_context *self, unsigned char c) { bh->h = HASH_INIT; if(bh->dlen < SPAMSUM_LENGTH / 2) bh->halfh = HASH_INIT; - } else if(NULL != bh->next && - (size_t)self->start_blocksize * SPAMSUM_LENGTH < - self->total_size && - bh->next->dlen >= SPAMSUM_LENGTH / 2 && - bh == self->blockhashes) { - /* Operating on the currently smallest blocksize and - * the next blocksize is already large enough. */ - self->start_blocksize *= 2; + } else { + /* The reduction might free bh. */ bh = bh->next; - free(self->blockhashes); - self->blockhashes = bh; + ssdeep_try_reduce_blockhash(self); continue; } bh = bh->next; |