summaryrefslogtreecommitdiff
path: root/importpkg.py
diff options
context:
space:
mode:
authorHelmut Grohne <helmut@subdivi.de>2013-03-12 08:24:49 +0100
committerHelmut Grohne <helmut@subdivi.de>2013-03-12 08:27:08 +0100
commitf3152b91239d1ecc9462921a75c20e530bade9e2 (patch)
treed5ef1ab6f0f2d5baede5fd0dcdb6b74bddc027a6 /importpkg.py
parent5b5cf7f2629c3a6c78f6057ff1e8476ff001409f (diff)
downloaddebian-dedup-f3152b91239d1ecc9462921a75c20e530bade9e2.tar.gz
move ArReader from importpkg to dedup.arreader
Also document it.
Diffstat (limited to 'importpkg.py')
-rwxr-xr-ximportpkg.py54
1 files changed, 1 insertions, 53 deletions
diff --git a/importpkg.py b/importpkg.py
index d63b85e..c6ce7f9 100755
--- a/importpkg.py
+++ b/importpkg.py
@@ -2,7 +2,6 @@
import hashlib
import sqlite3
-import struct
import sys
import tarfile
import zlib
@@ -11,62 +10,11 @@ from debian.debian_support import version_compare
from debian import deb822
import lzma
+from dedup.arreader import ArReader
from dedup.hashing import HashBlacklist, DecompressedHash, SuppressingHash, hash_file
from dedup.compression import GzipDecompressor, DecompressedStream
from dedup.image import ImageHash
-class ArReader(object):
- global_magic = b"!<arch>\n"
- file_magic = b"`\n"
-
- def __init__(self, fileobj):
- self.fileobj = fileobj
- self.remaining = None
- self.padding = 0
-
- def skip(self, length):
- while length:
- data = self.fileobj.read(min(4096, length))
- if not data:
- raise ValueError("archive truncated")
- length -= len(data)
-
- def read_magic(self):
- data = self.fileobj.read(len(self.global_magic))
- if data != self.global_magic:
- raise ValueError("ar global header not found")
- self.remaining = 0
-
- def read_entry(self):
- self.skip_current_entry()
- if self.padding:
- if self.fileobj.read(1) != '\n':
- raise ValueError("missing ar padding")
- self.padding = 0
- file_header = self.fileobj.read(60)
- if not file_header:
- raise EOFError("end of archive found")
- parts = struct.unpack("16s 12s 6s 6s 8s 10s 2s", file_header)
- parts = [p.rstrip(" ") for p in parts]
- if parts.pop() != self.file_magic:
- raise ValueError("ar file header not found")
- self.remaining = int(parts[5])
- self.padding = self.remaining % 2
- return parts[0] # name
-
- def skip_current_entry(self):
- self.skip(self.remaining)
- self.remaining = 0
-
- def read(self, length=None):
- if length is None:
- length = self.remaining
- else:
- length = min(self.remaining, length)
- data = self.fileobj.read(length)
- self.remaining -= len(data)
- return data
-
class MultiHash(object):
def __init__(self, *hashes):
self.hashes = hashes