From c45a83883625ea5226bace08e72f45782e31c29f Mon Sep 17 00:00:00 2001 From: Helmut Grohne Date: Thu, 9 May 2024 12:09:55 +0200 Subject: add linuxnamespaces.tarinfo.XAttrTarFile This is a mixin subclass for TarFile that enables it to restore and apply linux extended attributes as PAX headers in the SCHILY.xattr.* format. As a consequence, this enables us to process tar archives containing file system capabilities. --- examples/chroottar.py | 14 +++++---- linuxnamespaces/tarutils.py | 75 +++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 83 insertions(+), 6 deletions(-) diff --git a/examples/chroottar.py b/examples/chroottar.py index f43add4..30017e5 100755 --- a/examples/chroottar.py +++ b/examples/chroottar.py @@ -20,6 +20,12 @@ import linuxnamespaces import linuxnamespaces.tarutils +class TarFile( + linuxnamespaces.tarutils.ZstdTarFile, linuxnamespaces.tarutils.XAttrTarFile +): + pass + + def main() -> None: parser = argparse.ArgumentParser() parser.add_argument( @@ -49,9 +55,7 @@ def main() -> None: parentsock.close() # Once we drop privileges via setreuid and friends, we may become # unable to open basetar or to chdir to tdir, so do those early. - with linuxnamespaces.tarutils.ZstdTarFile.open( - args.basetar, "r:*" - ) as tarf: + with TarFile.open(args.basetar, "r:*") as tarf: os.chdir(tdir) linuxnamespaces.unshare( linuxnamespaces.CloneFlags.NEWUSER @@ -112,9 +116,7 @@ def main() -> None: if args.save and ret == 0: tmptar = f"{args.basetar}.new" try: - with linuxnamespaces.tarutils.ZstdTarFile.open( - tmptar, "x:" + comptype - ) as tout: + with TarFile.open(tmptar, "x:" + comptype) as tout: tout.add(tdir, ".") os.rename(tmptar, args.basetar) except: diff --git a/linuxnamespaces/tarutils.py b/linuxnamespaces/tarutils.py index c7a065c..facb537 100644 --- a/linuxnamespaces/tarutils.py +++ b/linuxnamespaces/tarutils.py @@ -5,12 +5,17 @@ """Extensions to the tarfile module. * ZstdTarFile extends TarFile to deal with zstd-compressed archives. * get_comptype guesses the compression used for an open TarFile. + * XAttrTarFile extends TarFile to map extended attributes to PAX headers. """ +import os import tarfile import typing +TarPath = str | bytes | os.PathLike[str] | os.PathLike[bytes] + + class ZstdTarFile(tarfile.TarFile): """Subclass of tarfile.TarFile that can read zstd compressed archives.""" @@ -75,3 +80,73 @@ def get_comptype(tarobj: tarfile.TarFile) -> str: except KeyError: # pylint: disable=raise-missing-from # no value in chaining raise ValueError(f"cannot guess comptype for module {compmodule}") + + +class XAttrTarFile(tarfile.TarFile): + """A subclass to tarfile.TarFile that adds support for extended attributes + via SCHILY.xattr.* PAX headers to extraction and creation of archives. It + can be used as a mixin class with others as it does not add any state. + """ + + def extract( + self, + member: tarfile.TarInfo | str, + path: TarPath = "", + set_attrs: bool = True, + **kwargs: typing.Any, + ) -> None: + """Refer to tarfile.TarFile.extract. In addition, SCHILY.xattr.* PAX + headers are examined and applied as extended attributes if set_attrs is + true-ish. + """ + if not set_attrs: + super().extract(member, path, False, **kwargs) + return + + # We also need the tarinfo, so mimic the start of the built-in extract. + if isinstance(member, str): + tarinfo = self.getmember(member) + else: + tarinfo = member + + super().extract(tarinfo, path, True, **kwargs) + + # mypy is unhappy about the next line, but we have the same code in + # TarFile.extract and if it bails here, it also bails there. + path = os.path.join(path, tarinfo.name) # type: ignore + + for attr, value in tarinfo.pax_headers.items(): + if not attr.startswith("SCHILY.xattr."): + continue + attr = attr.removeprefix("SCHILY.xattr.") + os.setxattr( + path, + attr, + value.encode(self.encoding or "utf8", "surrogateescape"), + follow_symlinks=False, + ) + + def gettarinfo( + self, + name: TarPath | None = None, + arcname: str | None = None, + fileobj: typing.IO[bytes] | None = None, + ) -> tarfile.TarInfo: + tarinfo = super().gettarinfo(name, arcname, fileobj) + path: int | TarPath + if fileobj is not None: + path = fileobj.fileno() + elif name is not None: + path = name + else: + raise ValueError("gettarinfo requires a name or fileobj") + dereference = True if self.dereference is None else self.dereference + for attr in os.listxattr(path, follow_symlinks=dereference): + key = "SCHILY.xattr." + attr + value = os.getxattr( + path, attr, follow_symlinks=dereference + ).decode(self.encoding or "utf8", "surrogateescape") + # TarInfo.pax_headers is designated as (read-only) Mapping, but it + # really is a writable dict. + tarinfo.pax_headers[key] = value # type: ignore[index] + return tarinfo -- cgit v1.2.3