summaryrefslogtreecommitdiff
path: root/fuzzy.c
diff options
context:
space:
mode:
Diffstat (limited to 'fuzzy.c')
-rw-r--r--fuzzy.c85
1 files changed, 63 insertions, 22 deletions
diff --git a/fuzzy.c b/fuzzy.c
index 6123341..f1dcdcf 100644
--- a/fuzzy.c
+++ b/fuzzy.c
@@ -213,14 +213,31 @@ int fuzzy_update(struct fuzzy_state *self, const unsigned char *buffer,
return 0;
}
+static int memcpy_eliminate_sequences(char *dst, const char *src,
+ int n) {
+ const char *srcend = src + n;
+ assert(n >= 0);
+ if(src < srcend) *dst++ = *src++;
+ if(src < srcend) *dst++ = *src++;
+ if(src < srcend) *dst++ = *src++;
+ while(src < srcend)
+ if(*src == dst[-1] && *src == dst[-2] && *src == dst[-3]) {
+ ++src;
+ --n;
+ } else
+ *dst++ = *src++;
+ return n;
+}
+
#ifdef S_SPLINT_S
extern const int EOVERFLOW;
#endif
-int fuzzy_digest(const struct fuzzy_state *self, /*@out@*/ char *result) {
+int fuzzy_digest(const struct fuzzy_state *self, /*@out@*/ char *result,
+ unsigned int flags) {
unsigned int bi = self->bhstart;
uint32_t h = roll_sum(&self->roll);
- int i, remain = FUZZY_MAX_RESULT - 1;
+ int i, remain = FUZZY_MAX_RESULT - 1; /* Exclude terminating '\0'. */
/* Verify that our elimination was not overeager. */
assert(bi == 0 || (size_t)SSDEEP_BS(bi) / 2 * SPAMSUM_LENGTH <
self->total_size);
@@ -249,36 +266,60 @@ int fuzzy_digest(const struct fuzzy_state *self, /*@out@*/ char *result) {
remain -= i;
result += i;
i = (int)self->bh[bi].dlen;
- if(i > remain)
- i = remain;
- memcpy(result, self->bh[bi].digest, (size_t)i);
+ assert(i <= remain);
+ if((flags & FUZZY_FLAG_ELIMSEQ) != 0)
+ i = memcpy_eliminate_sequences(result, self->bh[bi].digest, i);
+ else
+ memcpy(result, self->bh[bi].digest, (size_t)i);
result += i;
remain -= i;
- if(remain > 0 && h != 0) {
- *result++ = b64[self->bh[bi].h % 64];
- --remain;
- }
- if(remain > 0) {
- *result++ = ':';
- --remain;
+ if(h != 0) {
+ assert(remain > 0);
+ *result = b64[self->bh[bi].h % 64];
+ if((flags & FUZZY_FLAG_ELIMSEQ) == 0 || i < 3 ||
+ *result != result[-1] ||
+ *result != result[-2] ||
+ *result != result[-3]) {
+ ++result;
+ --remain;
+ }
}
+ assert(remain > 0);
+ *result++ = ':';
+ --remain;
if(bi < self->bhend - 1) {
++bi;
i = (int)self->bh[bi].dlen;
- if(i > SPAMSUM_LENGTH / 2 - 1)
+ if((flags & FUZZY_FLAG_NOTRUNC) == 0 &&
+ i > SPAMSUM_LENGTH / 2 - 1)
i = SPAMSUM_LENGTH / 2 - 1;
- if(i > remain)
- i = remain;
- memcpy(result, self->bh[bi].digest, (size_t)i);
+ assert(i <= remain);
+ if((flags & FUZZY_FLAG_ELIMSEQ) != 0)
+ i = memcpy_eliminate_sequences(result,
+ self->bh[bi].digest, i);
+ else
+ memcpy(result, self->bh[bi].digest, (size_t)i);
result += i;
remain -= i;
- if(remain > 0 && h != 0) {
- *result++ = b64[self->bh[bi].halfh % 64];
- --remain;
+ if(h != 0) {
+ assert(remain > 0);
+ h = (flags & FUZZY_FLAG_NOTRUNC) != 0 ? self->bh[bi].h :
+ self->bh[bi].halfh;
+ *result = b64[h % 64];
+ if((flags & FUZZY_FLAG_ELIMSEQ) == 0 || i < 3 ||
+ *result != result[-1] ||
+ *result != result[-2] ||
+ *result != result[-3]) {
+ ++result;
+ --remain;
+ }
}
- } else if(remain > 0 && h != 0) {
+ } else if(h != 0) {
assert(self->bh[bi].dlen == 0);
+ assert(remain > 0);
*result++ = b64[self->bh[bi].h % 64];
+ /* No need to bother with FUZZY_FLAG_ELIMSEQ, because this
+ * digest has length 1. */
--remain;
}
*result = '\0';
@@ -297,7 +338,7 @@ int fuzzy_hash_buf(const unsigned char *buf, uint32_t buf_len,
return -1;
if(fuzzy_update(ctx, buf, buf_len) < 0)
goto out;
- if(fuzzy_digest(ctx, result) < 0)
+ if(fuzzy_digest(ctx, result, 0) < 0)
goto out;
ret = 0;
out:
@@ -321,7 +362,7 @@ int fuzzy_hash_stream(FILE *handle, /*@out@*/ char *result) {
}
if(ferror(handle) != 0)
goto out;
- if(fuzzy_digest(ctx, result) < 0)
+ if(fuzzy_digest(ctx, result, 0) < 0)
goto out;
ret = 0;
out: