diff options
author | Helmut Grohne <helmut@subdivi.de> | 2021-12-31 15:45:33 +0100 |
---|---|---|
committer | Helmut Grohne <helmut@subdivi.de> | 2021-12-31 15:45:33 +0100 |
commit | f3ea68482e6c01053cb202573d953e8a2e89529f (patch) | |
tree | 4c08f6e5a99bbe5131c0949e7f97cc44cf4a2cbd /dedup/filemagic.py | |
parent | f2eda3ba74e5bc5613e84381ebd8bfd343e1c8cc (diff) | |
parent | 5b359b10053cbade539246eec26e86b44793ca40 (diff) | |
download | debian-dedup-f3ea68482e6c01053cb202573d953e8a2e89529f.tar.gz |
Merge branch master into branch multiarchhints
Among other things, this drops Python 2.x support.
Diffstat (limited to 'dedup/filemagic.py')
-rw-r--r-- | dedup/filemagic.py | 49 |
1 files changed, 49 insertions, 0 deletions
diff --git a/dedup/filemagic.py b/dedup/filemagic.py new file mode 100644 index 0000000..b71c276 --- /dev/null +++ b/dedup/filemagic.py @@ -0,0 +1,49 @@ +"""A very strange "hash" that uses the magic module (python3-magic) to guess +the file type.""" + +import magic + +# It changed API a few times... +try: + _magic_identify = magic.from_buffer +except AttributeError: + _magic_identify = magic.none_magic.buffer + +class FileDigester: + """A hashlib-like class to guess a filetype using the magic module.""" + FILE_BYTES_MAX = 1024 * 1024 # copied from file source + + def __init__(self): + self.buff = b"" + self.identification = None + + def _compute_identification(self): + try: + return _magic_identify(self.buff) + except UnicodeDecodeError: + return "magic identification is not valid UTF-8" + + def update(self, buff): + if self.identification: + return + self.buff += buff + if len(self.buff) >= self.FILE_BYTES_MAX: + self.identification = self._compute_identification() + self.buff = None + + def identify(self): + """Return the guessed file magic identification.""" + if self.identification: + return self.identification + return self._compute_identification() + + def hexdigest(self): + """Compatibility with hashlib. An alias of identify. Doesn't return + hex.""" + return self.identify() + + def copy(self): + new = FileDigester() + new.buff = self.buff + new.identification = self.identification + return new |