From e686b05346892a1e2700f429136d46d8fff81f26 Mon Sep 17 00:00:00 2001 From: Helmut Grohne Date: Mon, 23 May 2016 21:48:15 +0200 Subject: DebExtractor: implement parsing of control.tar --- dedup/debpkg.py | 35 +++++++++++++++++++++++++++++++++++ 1 file changed, 35 insertions(+) (limited to 'dedup') diff --git a/dedup/debpkg.py b/dedup/debpkg.py index 337e385..c64f3c0 100644 --- a/dedup/debpkg.py +++ b/dedup/debpkg.py @@ -155,9 +155,44 @@ class DebExtractor(object): def handle_control_tar(self, tarfileobj): """Handle the control.tar member of the Debian package. + If you replace this method, none of handle_control_member, + handle_control_info or handle_control_end are called. @type tarfileobj: tarfile.TarFile @param tarfile: is opened for streaming reads """ + controlseen = False + for elem in tarfileobj: + if elem.isreg(): + name = elem.name + if name.startswith("./"): + name = name[2:] + content = tarfileobj.extractfile(elem).read() + self.handle_control_member(name, content) + if name == "control": + self.handle_control_info(deb822.Packages(content)) + controlseen = True + elif not (elem.isdir() and elem.name == "."): + raise ValueError("invalid non-file %r found in control.tar" % + elem.name) + if not controlseen: + raise ValueError("control missing from control.tar") + self.handle_control_end() + + def handle_control_member(self, name, content): + """Handle a file member of the control.tar member of the Debian package. + @type name: str + @param name: is the plain member name + @type content: bytes + """ + + def handle_control_info(self, info): + """Handle the control member of the control.tar member of the Debian + package. + @type info: deb822.Packages + """ + + def handle_control_end(self): + "Handle the end of the control.tar member of the Debian package." def handle_data_tar(self, tarfileobj): """Handle the data.tar member of the Debian package. -- cgit v1.2.3