teach splint about f{seek,tell}o
[~helmut/ssdeep.git] / fuzzy.c
diff --git a/fuzzy.c b/fuzzy.c
index 7b2b608..32b19ad 100644 (file)
--- a/fuzzy.c
+++ b/fuzzy.c
@@ -100,12 +100,25 @@ struct blockhash_context {
        uint32_t h, halfh;
        char digest[SPAMSUM_LENGTH];
        unsigned int dlen;
-       /*@null@*/ struct blockhash_context *next;
+       /*@null@*/ /*@only@*/ struct blockhash_context *next;
 };
 
+static int blockhash_fork(struct blockhash_context *bh) {
+       struct blockhash_context *nbh;
+       if(NULL == (nbh = malloc(sizeof(struct blockhash_context))))
+               return -1;
+       nbh->h = bh->h;
+       nbh->halfh = bh->halfh;
+       nbh->dlen = 0;
+       nbh->next = NULL;
+       assert(NULL == bh->next);
+       bh->next = nbh;
+       return 0;
+}
+
 struct ssdeep_context {
        unsigned int start_blocksize;
-       struct blockhash_context *blockhashes;
+       /*@only@*/ struct blockhash_context *blockhashes;
        size_t total_size;
        struct roll_state roll;
 };
@@ -124,6 +137,25 @@ static int ssdeep_init(/*@out@*/ struct ssdeep_context *self) {
        return 0;
 }
 
+static void ssdeep_try_reduce_blockhash(struct ssdeep_context *self) {
+       struct blockhash_context *bh;
+       if(NULL == (bh = self->blockhashes->next))
+               /* Cannot remove last hash. */
+               return;
+       if((size_t)self->start_blocksize * SPAMSUM_LENGTH >= self->total_size)
+               /* Initial blocksize estimate would select this or a smaller
+                * blocksize. */
+               return;
+       if(bh->dlen < SPAMSUM_LENGTH / 2)
+               /* Estimate adjustment would select this blocksize. */
+               return;
+       /* At this point we are clearly no longer interested in the
+        * start_blocksize. Get rid of it. */
+       self->start_blocksize *= 2;
+       free(self->blockhashes);
+       self->blockhashes = bh;
+}
+
 static const char *b64 =
        "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
 
@@ -157,17 +189,10 @@ static int ssdeep_engine_step(struct ssdeep_context *self, unsigned char c) {
                /* We have hit a reset point. We now emit hashes which are
                 * based on all characters in the piece of the message between
                 * the last reset point and this one */
-               if(unlikely(0 == bh->dlen)) /* Can only happen 30 times. */
+               if(unlikely(0 == bh->dlen)) /* Can only happen 30 times. */
                        /* First step for this blocksize. Clone next. */
-                       assert(NULL == bh->next);
-                       bh->next = malloc(sizeof(
-                                       struct blockhash_context));
-                       if(NULL == bh->next)
+                       if(blockhash_fork(bh) < 0)
                                return -1;
-                       bh->next->h = bh->h;
-                       bh->next->dlen = 0;
-                       bh->next->next = NULL;
-               }
                if(bh->dlen < SPAMSUM_LENGTH - 1) {
                        /* We can have a problem with the tail overflowing. The
                         * easiest way to cope with this is to only reset the
@@ -179,17 +204,10 @@ static int ssdeep_engine_step(struct ssdeep_context *self, unsigned char c) {
                        bh->h = HASH_INIT;
                        if(bh->dlen < SPAMSUM_LENGTH / 2)
                                bh->halfh = HASH_INIT;
-               } else if(NULL != bh->next &&
-                               (size_t)self->start_blocksize * SPAMSUM_LENGTH <
-                                       self->total_size &&
-                               bh->next->dlen >= SPAMSUM_LENGTH / 2 &&
-                               bh == self->blockhashes) {
-                       /* Operating on the currently smallest blocksize and
-                        * the next blocksize is already large enough. */
-                       self->start_blocksize *= 2;
+               } else {
+                       /* The reduction might free bh. */
                        bh = bh->next;
-                       free(self->blockhashes);
-                       self->blockhashes = bh;
+                       ssdeep_try_reduce_blockhash(self);
                        continue;
                }
                bh = bh->next;
@@ -325,6 +343,8 @@ errout:
 
 #ifdef S_SPLINT_S
 typedef size_t off_t;
+int fseeko(FILE *, off_t, int);
+off_t ftello(FILE *);
 #endif
 
 int fuzzy_hash_file(FILE *handle, /*@out@*/ char *result) {
@@ -345,7 +365,7 @@ int fuzzy_hash_filename(const char *filename, /*@out@*/ char *result) {
        FILE *handle = fopen(filename, "rb");
        if(NULL == handle)
                return -1;
-       status = fuzzy_hash_file(handle, result);
+       status = fuzzy_hash_stream(handle, result);
        /* We cannot do anything about an fclose failure. */
        (void)fclose(handle);
        return status;