diff options
author | Helmut Grohne <helmut@subdivi.de> | 2013-03-12 08:24:49 +0100 |
---|---|---|
committer | Helmut Grohne <helmut@subdivi.de> | 2013-03-12 08:27:08 +0100 |
commit | f3152b91239d1ecc9462921a75c20e530bade9e2 (patch) | |
tree | d5ef1ab6f0f2d5baede5fd0dcdb6b74bddc027a6 /dedup | |
parent | 5b5cf7f2629c3a6c78f6057ff1e8476ff001409f (diff) | |
download | debian-dedup-f3152b91239d1ecc9462921a75c20e530bade9e2.tar.gz |
move ArReader from importpkg to dedup.arreader
Also document it.
Diffstat (limited to 'dedup')
-rw-r--r-- | dedup/arreader.py | 79 |
1 files changed, 79 insertions, 0 deletions
diff --git a/dedup/arreader.py b/dedup/arreader.py new file mode 100644 index 0000000..e48a0f8 --- /dev/null +++ b/dedup/arreader.py @@ -0,0 +1,79 @@ +import struct + +class ArReader(object): + """Streaming AR file reader. After constructing an object, you usually + call read_magic once. Then you call read_entry in a loop and use the + ArReader object as file-like only providing read() to read the respective + file contents until you get EOFError from read_entry. + """ + global_magic = b"!<arch>\n" + file_magic = b"`\n" + + def __init__(self, fileobj): + """ + @param fileobj: a file-like object providing nothing but read(length) + """ + self.fileobj = fileobj + self.remaining = None + self.padding = 0 + + def read_magic(self): + """Consume the AR magic marker at the beginning of an AR file. You + must not call any other method before calling this method. + @raises ValueError: if the magic is not found + """ + data = self.fileobj.read(len(self.global_magic)) + if data != self.global_magic: + raise ValueError("ar global header not found") + self.remaining = 0 + + def read_entry(self): + """Read the next file header, return the filename and record the + length of the next file, so that the read method can be used to + exhaustively read the current file. + @rtype: bytes + @returns: the name of the next file + @raises ValueError: if the data format is wrong + @raises EOFError: when the end f the stream is reached + """ + self.skip_current_entry() + if self.padding: + if self.fileobj.read(1) != b'\n': + raise ValueError("missing ar padding") + self.padding = 0 + file_header = self.fileobj.read(60) + if not file_header: + raise EOFError("end of archive found") + parts = struct.unpack("16s 12s 6s 6s 8s 10s 2s", file_header) + parts = [p.rstrip(" ") for p in parts] + if parts.pop() != self.file_magic: + raise ValueError("ar file header not found") + self.remaining = int(parts[5]) + self.padding = self.remaining % 2 + return parts[0] # name + + def skip_current_entry(self): + """Skip the remainder of the current file. This method must not be + called before calling read_entry. + @raises ValueError: if the archive appears truncated + """ + while self.remaining: + data = self.fileobj.read(min(4096, self.remaining)) + if not data: + raise ValueError("archive truncated") + self.remaining -= len(data) + + def read(self, length=None): + """ + @type length: int or None + @param length: number of bytes to read from the current file + @rtype: bytes + @returns: length or fewer bytes from the current file + """ + if length is None: + length = self.remaining + else: + length = min(self.remaining, length) + data = self.fileobj.read(length) + self.remaining -= len(data) + return data |