diff options
Diffstat (limited to 'dedup')
-rw-r--r-- | dedup/debpkg.py | 31 |
1 files changed, 31 insertions, 0 deletions
diff --git a/dedup/debpkg.py b/dedup/debpkg.py index 8f2121b..04773de 100644 --- a/dedup/debpkg.py +++ b/dedup/debpkg.py @@ -1,5 +1,6 @@ from debian import deb822 +from dedup.arreader import ArReader from dedup.hashing import hash_file def process_control(control_contents): @@ -53,3 +54,33 @@ def get_tar_hashes(tar, hash_functions): if hashvalue: hashes[hashobj.name] = hashvalue yield (elem.name, elem.size, hashes) + +class DebExtractor(object): + "Base class for extracting desired features from a Debian package." + + def process(self, filelike): + """Process a Debian package. + @param filelike: is a file-like object containing the contents of the + Debian packge and can be read once without seeks. + """ + af = ArReader(filelike) + af.read_magic() + while True: + try: + name = af.read_entry() + except EOFError: + break + else: + self.handle_ar_member(name, af) + self.handle_ar_end() + + def handle_ar_member(self, name, filelike): + """Handle an ar archive member of the Debian package. + @type name: bytes + @param name: is the name of the member + @param filelike: is a file-like object containing the contents of the + member and can be read once without seeks. + """ + + def handle_ar_end(self): + "Handle the end of the ar archive of the Debian package." |