summaryrefslogtreecommitdiff
path: root/dedup
diff options
context:
space:
mode:
Diffstat (limited to 'dedup')
-rw-r--r--dedup/debpkg.py31
1 files changed, 31 insertions, 0 deletions
diff --git a/dedup/debpkg.py b/dedup/debpkg.py
index 8f2121b..04773de 100644
--- a/dedup/debpkg.py
+++ b/dedup/debpkg.py
@@ -1,5 +1,6 @@
from debian import deb822
+from dedup.arreader import ArReader
from dedup.hashing import hash_file
def process_control(control_contents):
@@ -53,3 +54,33 @@ def get_tar_hashes(tar, hash_functions):
if hashvalue:
hashes[hashobj.name] = hashvalue
yield (elem.name, elem.size, hashes)
+
+class DebExtractor(object):
+ "Base class for extracting desired features from a Debian package."
+
+ def process(self, filelike):
+ """Process a Debian package.
+ @param filelike: is a file-like object containing the contents of the
+ Debian packge and can be read once without seeks.
+ """
+ af = ArReader(filelike)
+ af.read_magic()
+ while True:
+ try:
+ name = af.read_entry()
+ except EOFError:
+ break
+ else:
+ self.handle_ar_member(name, af)
+ self.handle_ar_end()
+
+ def handle_ar_member(self, name, filelike):
+ """Handle an ar archive member of the Debian package.
+ @type name: bytes
+ @param name: is the name of the member
+ @param filelike: is a file-like object containing the contents of the
+ member and can be read once without seeks.
+ """
+
+ def handle_ar_end(self):
+ "Handle the end of the ar archive of the Debian package."