summaryrefslogtreecommitdiff
path: root/examples/cgroup.py
blob: 9a01d0d7681286b1d0f468dbd7339e4ac870ab09 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
#!/usr/bin/python3
# Copyright 2024 Helmut Grohne <helmut@subdivi.de>
# SPDX-License-Identifier: GPL-3

"""Unshare a cgroup (and user) namespace such that the entire cgroup hierarchy
(inside the namespace) becomes writeable to the user.
"""

import asyncio
import os
import pathlib
import sys

try:
    import ravel
except ImportError:
    ravel = None

if __file__.split("/")[-2:-1] == ["examples"]:
    sys.path.insert(0, "/".join(__file__.split("/")[:-2]))

import linuxnamespaces


def get_cgroup(pid: int = -1) -> pathlib.PurePath:
    """Look up the cgroup that the given pid or the running process belongs
    to.
    """
    return pathlib.PurePath(
        pathlib.Path(
            f"/proc/{pid}/cgroup" if pid > 0 else "/proc/self/cgroup"
        ).read_text().split(":", 2)[2].strip()
    )


async def start_transient_unit_with_ravel(pid: int) -> None:
    """Call the StartTransientUnit dbus method on the user manager for the
    given pid.
    """
    bus = await ravel.session_bus_async()
    jobs_removed = {}
    scope_job = None
    systemd_path = "/org/freedesktop/systemd1"
    systemd_iface = "org.freedesktop.systemd1.Manager"
    scope_created = asyncio.get_running_loop().create_future()

    @ravel.signal(name="JobRemoved", in_signature="uoss")
    def handle_job_removed(_1, path, _2, result):
        nonlocal jobs_removed
        nonlocal scope_job
        nonlocal scope_created
        if scope_job is None:
            jobs_removed[path] = result
        elif path == scope_job:
            scope_created.set_result(result)

    bus.listen_signal(
        systemd_path, False, systemd_iface, "JobRemoved", handle_job_removed
    )
    scope_job = (
        bus["org.freedesktop.systemd1"][systemd_path]
        .get_interface(systemd_iface)
        .StartTransientUnit(
            f"cgroup-{pid}.scope", "fail", [("PIDs", ("au", [pid]))], []
        )
    )[0]
    if scope_job in jobs_removed:
        scope_created.set_result(jobs_removed[scope_job])
    else:
        try:
            await asyncio.wait_for(scope_created, 60)
        except TimeoutError:
            print("Error: timed out waiting for StartTransientUnit")
            sys.exit(1)
    bus.unlisten_signal(
        systemd_path, False, systemd_iface, "JobRemoved", handle_job_removed
    )
    if scope_created.result() != "done":
        print("Error: StartTransientUnit failed: " + scope_created.result())
        sys.exit(1)


def main() -> None:
    mycgroup = get_cgroup()
    if not os.access(
        pathlib.Path("/sys/fs/cgroup") / mycgroup.relative_to("/"),
        os.W_OK,
    ):
        # For some shells - notably from graphical desktop environments, the
        # hiearchy is immediatly writeable. For others, we may create a scope
        # unit.
        if ravel is not None:
            asyncio.get_event_loop().run_until_complete(
                start_transient_unit_with_ravel(os.getpid())
            )
            mycgroup = get_cgroup()
        else:
            # Re-execute ourselves via systemd-run.
            if (
                mycgroup.name.startswith("run-")
                and mycgroup.name.endswith(".scope")
            ):
                print(
                    "Error: We're running in a .scope cgroup, but it is not writeable. Giving up."
                )
                sys.exit(1)
            os.execvp(
                "systemd-run", ["systemd-run", "--user", "--scope"] + sys.argv,
            )
            print("Error: Failed to re-execute myself inside systemd-run.")
            sys.exit(1)
    linuxnamespaces.unshare_user_idmap(
        [linuxnamespaces.IDMapping(os.getuid(), os.getuid(), 1)],
        [linuxnamespaces.IDMapping(os.getgid(), os.getgid(), 1)],
        linuxnamespaces.CloneFlags.NEWUSER
        | linuxnamespaces.CloneFlags.NEWNS
        | linuxnamespaces.CloneFlags.NEWCGROUP,
    )
    linuxnamespaces.populate_sys("/", "/", mycgroup)
    os.execlp(os.environ["SHELL"], os.environ["SHELL"])


if __name__ == "__main__":
    main()