summaryrefslogtreecommitdiff
path: root/dedup/filters.py
diff options
context:
space:
mode:
authorHelmut Grohne <helmut@subdivi.de>2013-03-08 16:33:09 +0100
committerHelmut Grohne <helmut@subdivi.de>2013-03-08 16:33:09 +0100
commit0e690a1f5e32d1e16ad27dd96cb43b78d5d36fb4 (patch)
treead0e188b62806ecc7d2d806f1f8958b0a1fd8bbf /dedup/filters.py
parent5b5cf7f2629c3a6c78f6057ff1e8476ff001409f (diff)
downloaddebian-dedup-0e690a1f5e32d1e16ad27dd96cb43b78d5d36fb4.tar.gz
generalize ImageHash to StoredHash
Diffstat (limited to 'dedup/filters.py')
-rw-r--r--dedup/filters.py50
1 files changed, 50 insertions, 0 deletions
diff --git a/dedup/filters.py b/dedup/filters.py
new file mode 100644
index 0000000..c5b3251
--- /dev/null
+++ b/dedup/filters.py
@@ -0,0 +1,50 @@
+import struct
+
+class PNGFilter:
+ """Skips non-critical chunks in a PNG file."""
+ magic = b"\x89PNG\r\n\x1a\n"
+ def __init__(self):
+ self.inbuffer = b""
+ self.critchunk = False
+ self.chunkleft = None
+
+ def filter(self, data):
+ self.inbuffer += data
+ if self.chunkleft is None:
+ if len(self.inbuffer) < 8:
+ return b""
+ if not self.inbuffer.startswith(self.magic):
+ raise ValueError("PNG file magic not found")
+ self.inbuffer = self.inbuffer[8:]
+ self.chunkleft = 0
+ ret = b""
+ while True:
+ if self.chunkleft == 0:
+ if len(self.inbuffer) < 8:
+ break
+ self.chunkleft, chunktype = struct.unpack(">I4s",
+ self.inbuffer[:8])
+ self.chunkleft += 12 # len, type, crc
+ self.critchunk = chunktype[0].isupper()
+ if self.critchunk:
+ print("critical chunk %s %r" % (chunktype, self.inbuffer[8:16]))
+ n = min(self.chunkleft, len(self.inbuffer))
+ if self.critchunk:
+ ret += self.inbuffer[:n]
+ self.inbuffer = self.inbuffer[n:]
+ self.chunkleft -= n
+ if self.chunkleft:
+ break
+ return ret
+
+ def flush(self):
+ ret = self.inbuffer
+ self.inbuffer = b""
+ return ret
+
+ def copy(self):
+ new = PNGFilter()
+ new.inbuffer = self.inbuffer
+ new.critchunk = self.critchunk
+ new.chunkleft = self.chunkleft
+ return new