diff options
Diffstat (limited to 'fuzzy.c')
-rw-r--r-- | fuzzy.c | 85 |
1 files changed, 63 insertions, 22 deletions
@@ -213,14 +213,31 @@ int fuzzy_update(struct fuzzy_state *self, const unsigned char *buffer, return 0; } +static int memcpy_eliminate_sequences(char *dst, const char *src, + int n) { + const char *srcend = src + n; + assert(n >= 0); + if(src < srcend) *dst++ = *src++; + if(src < srcend) *dst++ = *src++; + if(src < srcend) *dst++ = *src++; + while(src < srcend) + if(*src == dst[-1] && *src == dst[-2] && *src == dst[-3]) { + ++src; + --n; + } else + *dst++ = *src++; + return n; +} + #ifdef S_SPLINT_S extern const int EOVERFLOW; #endif -int fuzzy_digest(const struct fuzzy_state *self, /*@out@*/ char *result) { +int fuzzy_digest(const struct fuzzy_state *self, /*@out@*/ char *result, + unsigned int flags) { unsigned int bi = self->bhstart; uint32_t h = roll_sum(&self->roll); - int i, remain = FUZZY_MAX_RESULT - 1; + int i, remain = FUZZY_MAX_RESULT - 1; /* Exclude terminating '\0'. */ /* Verify that our elimination was not overeager. */ assert(bi == 0 || (size_t)SSDEEP_BS(bi) / 2 * SPAMSUM_LENGTH < self->total_size); @@ -249,36 +266,60 @@ int fuzzy_digest(const struct fuzzy_state *self, /*@out@*/ char *result) { remain -= i; result += i; i = (int)self->bh[bi].dlen; - if(i > remain) - i = remain; - memcpy(result, self->bh[bi].digest, (size_t)i); + assert(i <= remain); + if((flags & FUZZY_FLAG_ELIMSEQ) != 0) + i = memcpy_eliminate_sequences(result, self->bh[bi].digest, i); + else + memcpy(result, self->bh[bi].digest, (size_t)i); result += i; remain -= i; - if(remain > 0 && h != 0) { - *result++ = b64[self->bh[bi].h % 64]; - --remain; - } - if(remain > 0) { - *result++ = ':'; - --remain; + if(h != 0) { + assert(remain > 0); + *result = b64[self->bh[bi].h % 64]; + if((flags & FUZZY_FLAG_ELIMSEQ) == 0 || i < 3 || + *result != result[-1] || + *result != result[-2] || + *result != result[-3]) { + ++result; + --remain; + } } + assert(remain > 0); + *result++ = ':'; + --remain; if(bi < self->bhend - 1) { ++bi; i = (int)self->bh[bi].dlen; - if(i > SPAMSUM_LENGTH / 2 - 1) + if((flags & FUZZY_FLAG_NOTRUNC) == 0 && + i > SPAMSUM_LENGTH / 2 - 1) i = SPAMSUM_LENGTH / 2 - 1; - if(i > remain) - i = remain; - memcpy(result, self->bh[bi].digest, (size_t)i); + assert(i <= remain); + if((flags & FUZZY_FLAG_ELIMSEQ) != 0) + i = memcpy_eliminate_sequences(result, + self->bh[bi].digest, i); + else + memcpy(result, self->bh[bi].digest, (size_t)i); result += i; remain -= i; - if(remain > 0 && h != 0) { - *result++ = b64[self->bh[bi].halfh % 64]; - --remain; + if(h != 0) { + assert(remain > 0); + h = (flags & FUZZY_FLAG_NOTRUNC) != 0 ? self->bh[bi].h : + self->bh[bi].halfh; + *result = b64[h % 64]; + if((flags & FUZZY_FLAG_ELIMSEQ) == 0 || i < 3 || + *result != result[-1] || + *result != result[-2] || + *result != result[-3]) { + ++result; + --remain; + } } - } else if(remain > 0 && h != 0) { + } else if(h != 0) { assert(self->bh[bi].dlen == 0); + assert(remain > 0); *result++ = b64[self->bh[bi].h % 64]; + /* No need to bother with FUZZY_FLAG_ELIMSEQ, because this + * digest has length 1. */ --remain; } *result = '\0'; @@ -297,7 +338,7 @@ int fuzzy_hash_buf(const unsigned char *buf, uint32_t buf_len, return -1; if(fuzzy_update(ctx, buf, buf_len) < 0) goto out; - if(fuzzy_digest(ctx, result) < 0) + if(fuzzy_digest(ctx, result, 0) < 0) goto out; ret = 0; out: @@ -321,7 +362,7 @@ int fuzzy_hash_stream(FILE *handle, /*@out@*/ char *result) { } if(ferror(handle) != 0) goto out; - if(fuzzy_digest(ctx, result) < 0) + if(fuzzy_digest(ctx, result, 0) < 0) goto out; ret = 0; out: |