diff options
author | Helmut Grohne <helmut@subdivi.de> | 2013-08-02 08:40:49 +0200 |
---|---|---|
committer | Helmut Grohne <helmut@subdivi.de> | 2013-08-02 08:40:49 +0200 |
commit | cb3708825bf7ea32314040575cef35980dad0cd8 (patch) | |
tree | 31575a8525dc90ba6904268d94f47e1604bf0557 /dedup/image.py | |
parent | a4bbbb6e664e605634cb3f9e0564c7e4a93697be (diff) | |
parent | 2712edb550968ce7ec8cd9800241d7944666631a (diff) | |
download | debian-dedup-cb3708825bf7ea32314040575cef35980dad0cd8.tar.gz |
Merge branch master into sqlalchemy
This makes the sqlalchemy branch schema-compatible with master again.
The biggest change on master was the introduction of the function table.
It caused most of the conflicts. Note that webapp had one conflict not
detected by git: The selecting of issues in show_package needed
sqlalchemy conversion.
Conflicts:
README
update_sharing.py
webapp.py
Diffstat (limited to 'dedup/image.py')
-rw-r--r-- | dedup/image.py | 67 |
1 files changed, 45 insertions, 22 deletions
diff --git a/dedup/image.py b/dedup/image.py index 1148890..c1f2de0 100644 --- a/dedup/image.py +++ b/dedup/image.py @@ -4,9 +4,10 @@ import struct import PIL.Image class ImageHash(object): - """A hash on the contents of an image. This disregards mode, depth and meta - information. Note that due to limitations in PIL and the image format - (interlacing) the full contents are stored and decoded in hexdigest.""" + """A hash on the contents of an image datat type supported by PIL. This + disregards mode, depth and meta information. Note that due to limitations + in PIL and the image format (interlacing) the full contents are stored and + decoded in hexdigest.""" maxsize = 1024 * 1024 * 32 # max memory usage is about 5 * maxpixels in bytes maxpixels = 1024 * 1024 * 32 @@ -19,33 +20,25 @@ class ImageHash(object): self.imagedetected = False self.content = io.BytesIO() + def detect(self): + raise NotImplementedError + def update(self, data): self.content.write(data) if self.content.tell() > self.maxsize: raise ValueError("maximum image size exceeded") - if self.imagedetected: - return - if self.content.tell() < 33: # header + IHDR - return - curvalue = self.content.getvalue() - if curvalue.startswith(b"\x89PNG\r\n\x1a\n\0\0\0\x0dIHDR"): - width, height = struct.unpack(">II", curvalue[16:24]) - if width * height > self.maxpixels: - raise ValueError("maximum image pixels exceeded") - self.imagedetected = True - return - raise ValueError("not a png image") + if not self.imagedetected: + self.imagedetected = self.detect() def copy(self): - new = ImageHash() - new.hashobj = self.hashobj.copy() + new = self.__class__(self.hashobj.copy()) new.imagedetected = self.imagedetected new.content = io.BytesIO(self.content.getvalue()) return new def hexdigest(self): if not self.imagedetected: - raise ValueError("not a png image") + raise ValueError("not a image") hashobj = self.hashobj.copy() pos = self.content.tell() try: @@ -53,7 +46,7 @@ class ImageHash(object): try: img = PIL.Image.open(self.content) except IOError: - raise ValueError("broken png header") + raise ValueError("broken header") width, height = img.size pack = lambda elem: struct.pack("BBBB", *elem) # special casing easy modes reduces memory usage @@ -64,13 +57,43 @@ class ImageHash(object): elif img.mode != "RGBA": try: img = img.convert("RGBA") - except (SyntaxError, IndexError, IOError): # crazy stuff from PIL - raise ValueError("error reading png image") + except (SyntaxError, IndexError, IOError): + # crazy stuff from PIL + raise ValueError("error reading image") try: for elem in img.getdata(): hashobj.update(pack(elem)) except (SyntaxError, IndexError, IOError): # crazy stuff from PIL - raise ValueError("error reading png image") + raise ValueError("error reading image") finally: self.content.seek(pos) return "%s%8.8x%8.8x" % (hashobj.hexdigest(), width, height) + + +class PNGHash(ImageHash): + """A hash on the contents of a PNG image.""" + + def detect(self): + if self.content.tell() < 33: # header + IHDR + return False + curvalue = self.content.getvalue() + if curvalue.startswith(b"\x89PNG\r\n\x1a\n\0\0\0\x0dIHDR"): + width, height = struct.unpack(">II", curvalue[16:24]) + if width * height > self.maxpixels: + raise ValueError("maximum image pixels exceeded") + return True + raise ValueError("not a png image") + +class GIFHash(ImageHash): + """A hash on the contents of the first frame of a GIF image.""" + + def detect(self): + if self.content.tell() < 10: # magic + logical dimension + return False + curvalue = self.content.getvalue() + if curvalue.startswith((b"GIF87a", "GIF89a")): + width, height = struct.unpack("<HH", curvalue[6:10]) + if width * height > self.maxpixels: + raise ValueError("maximum image pixels exceeded") + return True + raise ValueError("not a png image") |