"""A very strange "hash" that uses the magic module (python3-magic) to guess the file type.""" import typing import magic # It changed API a few times... try: _magic_identify = magic.from_buffer except AttributeError: _magic_identify = magic.none_magic.buffer class FileDigester: """A hashlib-like class to guess a filetype using the magic module.""" FILE_BYTES_MAX = 1024 * 1024 # copied from file source def __init__(self) -> None: self.buff: typing.Optional[bytes] = b"" self.identification: typing.Optional[str] = None def _compute_identification(self) -> str: assert self.buff is not None try: return _magic_identify(self.buff) except UnicodeDecodeError: return "magic identification is not valid UTF-8" def update(self, buff: bytes) -> None: if self.identification: return assert self.buff is not None self.buff += buff if len(self.buff) >= self.FILE_BYTES_MAX: self.identification = self._compute_identification() self.buff = None def identify(self) -> str: """Return the guessed file magic identification.""" if self.identification: return self.identification return self._compute_identification() def hexdigest(self) -> str: """Compatibility with hashlib. An alias of identify. Doesn't return hex.""" return self.identify() def copy(self) -> "FileDigester": new = FileDigester() new.buff = self.buff new.identification = self.identification return new