From 0e690a1f5e32d1e16ad27dd96cb43b78d5d36fb4 Mon Sep 17 00:00:00 2001 From: Helmut Grohne Date: Fri, 8 Mar 2013 16:33:09 +0100 Subject: generalize ImageHash to StoredHash --- dedup/filters.py | 50 ++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 50 insertions(+) create mode 100644 dedup/filters.py (limited to 'dedup/filters.py') diff --git a/dedup/filters.py b/dedup/filters.py new file mode 100644 index 0000000..c5b3251 --- /dev/null +++ b/dedup/filters.py @@ -0,0 +1,50 @@ +import struct + +class PNGFilter: + """Skips non-critical chunks in a PNG file.""" + magic = b"\x89PNG\r\n\x1a\n" + def __init__(self): + self.inbuffer = b"" + self.critchunk = False + self.chunkleft = None + + def filter(self, data): + self.inbuffer += data + if self.chunkleft is None: + if len(self.inbuffer) < 8: + return b"" + if not self.inbuffer.startswith(self.magic): + raise ValueError("PNG file magic not found") + self.inbuffer = self.inbuffer[8:] + self.chunkleft = 0 + ret = b"" + while True: + if self.chunkleft == 0: + if len(self.inbuffer) < 8: + break + self.chunkleft, chunktype = struct.unpack(">I4s", + self.inbuffer[:8]) + self.chunkleft += 12 # len, type, crc + self.critchunk = chunktype[0].isupper() + if self.critchunk: + print("critical chunk %s %r" % (chunktype, self.inbuffer[8:16])) + n = min(self.chunkleft, len(self.inbuffer)) + if self.critchunk: + ret += self.inbuffer[:n] + self.inbuffer = self.inbuffer[n:] + self.chunkleft -= n + if self.chunkleft: + break + return ret + + def flush(self): + ret = self.inbuffer + self.inbuffer = b"" + return ret + + def copy(self): + new = PNGFilter() + new.inbuffer = self.inbuffer + new.critchunk = self.critchunk + new.chunkleft = self.chunkleft + return new -- cgit v1.2.3