1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
|
import struct
import PIL.Image
from . import hashing
def detectpng(content, maxpixels=1024 * 1024 * 32):
if len(content) < 33:
return None # defer decision
if not content.startswith(b"\x89PNG\r\n\x1a\n\0\0\0\x0dIHDR"):
return False
width, height = struct.unpack(">II", content[16:24])
if width * height > maxpixels:
return False
return True
class ImageHash(hashing.StoredHash):
"""A hash on the contents of an image. This disregards mode, depth and meta
information. Note that due to limitations in PIL and the image format
(interlacing) the full contents are stored and decoded in hexdigest."""
maxsize = 1024 * 1024 * 32
# max memory usage is about 5 * maxpixels in bytes
maxpixels = 1024 * 1024 * 32
def __init__(self, hashobj):
"""
@param hashobj: a hashlib-like object
"""
self.hashobj = hashobj
hashing.StoredHash.__init__(self, self.computehash, self.detect,
self.maxsize)
self.hashobj = hashobj
def detect(self, bytesio):
return detectpng(bytesio.read(33), self.maxpixels)
def computehash(self, bytesio):
hashobj = self.hashobj.copy()
img = PIL.Image.open(self.content)
width, height = img.size
pack = lambda elem: struct.pack("BBBB", *elem)
# special casing easy modes reduces memory usage
if img.mode == "L":
pack = lambda elem: struct.pack("BBBB", elem, elem, elem, 255)
elif img.mode == "RGB":
pack = lambda elem: struct.pack("BBBB", *(elem + (255,)))
elif img.mode != "RGBA":
img = img.convert("RGBA")
for elem in img.getdata():
hashobj.update(pack(elem))
return "%s%8.8x%8.8x" % (hashobj.hexdigest(), width, height)
|