blob: a6d09bacacef64b451b6076bedd91ba2382b2406 (
plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
|
"""A very strange "hash" that uses the magic module (python3-magic) to guess
the file type."""
import typing
import magic
# It changed API a few times...
try:
_magic_identify = magic.from_buffer
except AttributeError:
_magic_identify = magic.none_magic.buffer
class FileDigester:
"""A hashlib-like class to guess a filetype using the magic module."""
FILE_BYTES_MAX = 1024 * 1024 # copied from file source
def __init__(self) -> None:
self.buff: typing.Optional[bytes] = b""
self.identification: typing.Optional[str] = None
def _compute_identification(self) -> str:
assert self.buff is not None
try:
return _magic_identify(self.buff)
except UnicodeDecodeError:
return "magic identification is not valid UTF-8"
def update(self, buff: bytes) -> None:
if self.identification:
return
assert self.buff is not None
self.buff += buff
if len(self.buff) >= self.FILE_BYTES_MAX:
self.identification = self._compute_identification()
self.buff = None
def identify(self) -> str:
"""Return the guessed file magic identification."""
if self.identification:
return self.identification
return self._compute_identification()
def hexdigest(self) -> str:
"""Compatibility with hashlib. An alias of identify. Doesn't return
hex."""
return self.identify()
def copy(self) -> "FileDigester":
new = FileDigester()
new.buff = self.buff
new.identification = self.identification
return new
|