From 4542d84439bbc6bd8f3151a9cb61d0ee85cd910e Mon Sep 17 00:00:00 2001 From: Helmut Grohne Date: Sun, 25 Oct 2020 10:20:34 +0100 Subject: externalize ar parsing to arpy --- dedup/arreader.py | 79 ------------------------------------------------------- 1 file changed, 79 deletions(-) delete mode 100644 dedup/arreader.py (limited to 'dedup/arreader.py') diff --git a/dedup/arreader.py b/dedup/arreader.py deleted file mode 100644 index 8b14ff9..0000000 --- a/dedup/arreader.py +++ /dev/null @@ -1,79 +0,0 @@ -import struct - -class ArReader: - """Streaming AR file reader. After constructing an object, you usually - call read_magic once. Then you call read_entry in a loop and use the - ArReader object as file-like only providing read() to read the respective - file contents until you get EOFError from read_entry. - """ - global_magic = b"!\n" - file_magic = b"`\n" - - def __init__(self, fileobj): - """ - @param fileobj: a file-like object providing nothing but read(length) - """ - self.fileobj = fileobj - self.remaining = None - self.padding = 0 - - def read_magic(self): - """Consume the AR magic marker at the beginning of an AR file. You - must not call any other method before calling this method. - @raises ValueError: if the magic is not found - """ - data = self.fileobj.read(len(self.global_magic)) - if data != self.global_magic: - raise ValueError("ar global header not found") - self.remaining = 0 - - def read_entry(self): - """Read the next file header, return the filename and record the - length of the next file, so that the read method can be used to - exhaustively read the current file. - @rtype: bytes - @returns: the name of the next file - @raises ValueError: if the data format is wrong - @raises EOFError: when the end f the stream is reached - """ - self.skip_current_entry() - if self.padding: - if self.fileobj.read(1) != b'\n': - raise ValueError("missing ar padding") - self.padding = 0 - file_header = self.fileobj.read(60) - if not file_header: - raise EOFError("end of archive found") - parts = struct.unpack("16s 12s 6s 6s 8s 10s 2s", file_header) - parts = [p.rstrip(b"/ ") for p in parts] - if parts.pop() != self.file_magic: - raise ValueError("ar file header not found") - self.remaining = int(parts[5]) - self.padding = self.remaining % 2 - return parts[0] # name - - def skip_current_entry(self): - """Skip the remainder of the current file. This method must not be - called before calling read_entry. - @raises ValueError: if the archive appears truncated - """ - while self.remaining: - data = self.fileobj.read(min(4096, self.remaining)) - if not data: - raise ValueError("archive truncated") - self.remaining -= len(data) - - def read(self, length=None): - """ - @type length: int or None - @param length: number of bytes to read from the current file - @rtype: bytes - @returns: length or fewer bytes from the current file - """ - if length is None: - length = self.remaining - else: - length = min(self.remaining, length) - data = self.fileobj.read(length) - self.remaining -= len(data) - return data -- cgit v1.2.3