diff options
55 files changed, 1985 insertions, 791 deletions
| diff --git a/configure.ac b/configure.ac index d9ab3624dd..10e42c07be 100644 --- a/configure.ac +++ b/configure.ac @@ -39,9 +39,14 @@ AM_SILENT_RULES([yes])  AC_CANONICAL_HOST  AC_DEFINE_UNQUOTED([CANONICAL_HOST], "$host", [Canonical host string.]) -AC_CHECK_TOOLS([AR], [gcc-ar ar], [:]) -AC_CHECK_TOOLS([NM], [gcc-nm nm], [:]) -AC_CHECK_TOOLS([RANLIB], [gcc-ranlib ranlib], [:]) +AC_PROG_CC_C99 + +AX_COMPILER_VENDOR +AS_IF([test "x$ax_cv_c_compiler_vendor" = "xgnu"], [ +      AC_CHECK_TOOLS([AR], [gcc-ar ar], [:]) +      AC_CHECK_TOOLS([NM], [gcc-nm nm], [:]) +      AC_CHECK_TOOLS([RANLIB], [gcc-ranlib ranlib], [:]) +])  LT_PREREQ(2.2)  LT_INIT([disable-static]) @@ -87,8 +92,6 @@ AC_PROG_SED  AC_PROG_GREP  AC_PROG_AWK -AC_PROG_CC_C99 -  AC_PATH_PROG([M4], [m4])  AC_PATH_PROG([XSLTPROC], [xsltproc]) diff --git a/hwdb/70-mouse.hwdb b/hwdb/70-mouse.hwdb index d198591010..781a7ec240 100644 --- a/hwdb/70-mouse.hwdb +++ b/hwdb/70-mouse.hwdb @@ -124,6 +124,10 @@ mouse:usb:v04f2p0963:name:Chicony 2.4G Multimedia Wireless Kit:  # Dell  ########################################## +# Dell MUAR DEL7 +mouse:usb:v413cp3012:name:Dell Dell USB Optical Mouse: + MOUSE_DPI=400@166 +  # Dell USB Laser Mouse  mouse:usb:v046dpc063:name:DELL DELL USB Laser Mouse:   MOUSE_DPI=1000@125 @@ -301,6 +305,8 @@ mouse:usb:v046dpc065:name:Logitech USB Laser Mouse:  mouse:usb:v046dpc510:name:Logitech USB Receiver:   MOUSE_DPI=1000@125 +# Logitech V220 Cordless Optical Mouse +mouse:usb:v046dpc51b:name:Logitech USB Receiver:  # Logitech Performance MX  mouse:usb:v046dp101a:name:Logitech Performance MX:  # Logitech MX Master @@ -376,6 +382,8 @@ mouse:usb:v045ep0040:name:Microsoft Microsoft 3-Button Mouse with IntelliEye(TM)  mouse:usb:v045ep0745:name:Microsoft Microsoft® 2.4GHz Transceiver v6.0:   MOUSE_DPI=800@142 +# Microsoft Wireless Mobile Mouse 4000 +mouse:usb:v045ep0745:name:Microsoft Microsoft® Nano Transceiver v2.0:  # Microsoft Sculpt Ergonomic Mouse  mouse:usb:v045ep07a5:name:Microsoft Microsoft® 2.4GHz Transceiver v9.0:   MOUSE_DPI=1000@142 diff --git a/m4/ax_compiler_vendor.m4 b/m4/ax_compiler_vendor.m4 new file mode 100644 index 0000000000..39ca3c0f33 --- /dev/null +++ b/m4/ax_compiler_vendor.m4 @@ -0,0 +1,87 @@ +# =========================================================================== +#    http://www.gnu.org/software/autoconf-archive/ax_compiler_vendor.html +# =========================================================================== +# +# SYNOPSIS +# +#   AX_COMPILER_VENDOR +# +# DESCRIPTION +# +#   Determine the vendor of the C/C++ compiler, e.g., gnu, intel, ibm, sun, +#   hp, borland, comeau, dec, cray, kai, lcc, metrowerks, sgi, microsoft, +#   watcom, etc. The vendor is returned in the cache variable +#   $ax_cv_c_compiler_vendor for C and $ax_cv_cxx_compiler_vendor for C++. +# +# LICENSE +# +#   Copyright (c) 2008 Steven G. Johnson <stevenj@alum.mit.edu> +#   Copyright (c) 2008 Matteo Frigo +# +#   This program is free software: you can redistribute it and/or modify it +#   under the terms of the GNU General Public License as published by the +#   Free Software Foundation, either version 3 of the License, or (at your +#   option) any later version. +# +#   This program is distributed in the hope that it will be useful, but +#   WITHOUT ANY WARRANTY; without even the implied warranty of +#   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General +#   Public License for more details. +# +#   You should have received a copy of the GNU General Public License along +#   with this program. If not, see <http://www.gnu.org/licenses/>. +# +#   As a special exception, the respective Autoconf Macro's copyright owner +#   gives unlimited permission to copy, distribute and modify the configure +#   scripts that are the output of Autoconf when processing the Macro. You +#   need not follow the terms of the GNU General Public License when using +#   or distributing such scripts, even though portions of the text of the +#   Macro appear in them. The GNU General Public License (GPL) does govern +#   all other use of the material that constitutes the Autoconf Macro. +# +#   This special exception to the GPL applies to versions of the Autoconf +#   Macro released by the Autoconf Archive. When you make and distribute a +#   modified version of the Autoconf Macro, you may extend this special +#   exception to the GPL to apply to your modified version as well. + +#serial 15 + +AC_DEFUN([AX_COMPILER_VENDOR], +[AC_CACHE_CHECK([for _AC_LANG compiler vendor], ax_cv_[]_AC_LANG_ABBREV[]_compiler_vendor, +  dnl Please add if possible support to ax_compiler_version.m4 +  [# note: don't check for gcc first since some other compilers define __GNUC__ +  vendors="intel:     __ICC,__ECC,__INTEL_COMPILER +           ibm:       __xlc__,__xlC__,__IBMC__,__IBMCPP__ +           pathscale: __PATHCC__,__PATHSCALE__ +           clang:     __clang__ +           cray:      _CRAYC +           fujitsu:   __FUJITSU +           gnu:       __GNUC__ +           sun:       __SUNPRO_C,__SUNPRO_CC +           hp:        __HP_cc,__HP_aCC +           dec:       __DECC,__DECCXX,__DECC_VER,__DECCXX_VER +           borland:   __BORLANDC__,__CODEGEARC__,__TURBOC__ +           comeau:    __COMO__ +           kai:       __KCC +           lcc:       __LCC__ +           sgi:       __sgi,sgi +           microsoft: _MSC_VER +           metrowerks: __MWERKS__ +           watcom:    __WATCOMC__ +           portland:  __PGI +	   tcc:       __TINYC__ +           unknown:   UNKNOWN" +  for ventest in $vendors; do +    case $ventest in +      *:) vendor=$ventest; continue ;; +      *)  vencpp="defined("`echo $ventest | sed 's/,/) || defined(/g'`")" ;; +    esac +    AC_COMPILE_IFELSE([AC_LANG_PROGRAM(,[ +      #if !($vencpp) +        thisisanerror; +      #endif +    ])], [break]) +  done +  ax_cv_[]_AC_LANG_ABBREV[]_compiler_vendor=`echo $vendor | cut -d: -f1` + ]) +]) diff --git a/man/systemd-detect-virt.xml b/man/systemd-detect-virt.xml index 40755a24d0..9ea9141d4d 100644 --- a/man/systemd-detect-virt.xml +++ b/man/systemd-detect-virt.xml @@ -88,7 +88,7 @@          </thead>          <tbody>            <row> -      <entry morerows="8">VM</entry> +      <entry morerows="9">VM</entry>        <entry><varname>qemu</varname></entry>        <entry>QEMU software virtualization</entry>            </row> @@ -134,6 +134,11 @@            </row>            <row> +      <entry><varname>parallels</varname></entry> +      <entry>Parallels Desktop, Parallels Server</entry> +          </row> + +          <row>        <entry morerows="5">container</entry>        <entry><varname>openvz</varname></entry>        <entry>OpenVZ/Virtuozzo</entry> diff --git a/man/systemd-path.xml b/man/systemd-path.xml index dfc75ee0ff..4f790d2cda 100644 --- a/man/systemd-path.xml +++ b/man/systemd-path.xml @@ -64,9 +64,9 @@      <para>When invoked without arguments a list of known paths and      their current values is shown. When at least one argument is -    passed the path with this is name is queried and its value shown. +    passed the path with this name is queried and its value shown.      The variables whose name begins with <literal>search-</literal> -    don't refer to individual paths, but instead a to a list of +    don't refer to individual paths, but instead to a list of      colon-separated search paths, in their order of precedence.</para>    </refsect1> diff --git a/man/systemd-run.xml b/man/systemd-run.xml index 80db148702..b220e0dce1 100644 --- a/man/systemd-run.xml +++ b/man/systemd-run.xml @@ -113,6 +113,13 @@      <variablelist>        <varlistentry> +        <term><option>--no-ask-password</option></term> + +        <listitem><para>Do not query the user for authentication for +        privileged operations.</para></listitem> +      </varlistentry> + +      <varlistentry>          <term><option>--scope</option></term>          <listitem> diff --git a/shell-completion/bash/systemd-run b/shell-completion/bash/systemd-run index 63c831b8f1..a948677516 100644 --- a/shell-completion/bash/systemd-run +++ b/shell-completion/bash/systemd-run @@ -36,7 +36,7 @@ _systemd_run() {                  -r --remain-after-exit --send-sighup -H --host -M --machine --service-type                  --on-active --on-boot --on-startup --on-unit-active --on-unit-inactive                  --on-calendar --timer-property -t --pty -q --quiet --no-block -                --uid --gid --nice --setenv -p --property' +                --uid --gid --nice --setenv -p --property --no-ask-password'      local mode=--system      local i diff --git a/src/basic/cgroup-util.c b/src/basic/cgroup-util.c index 6b3162a35f..218de0b376 100644 --- a/src/basic/cgroup-util.c +++ b/src/basic/cgroup-util.c @@ -29,7 +29,6 @@  #include <sys/types.h>  #include <ftw.h> -#include "cgroup-util.h"  #include "set.h"  #include "macro.h"  #include "util.h" @@ -41,6 +40,7 @@  #include "special.h"  #include "mkdir.h"  #include "login-util.h" +#include "cgroup-util.h"  int cg_enumerate_processes(const char *controller, const char *path, FILE **_f) {          _cleanup_free_ char *fs = NULL; @@ -113,7 +113,7 @@ int cg_read_subgroup(DIR *d, char **fn) {          assert(d);          assert(fn); -        FOREACH_DIRENT(de, d, return -errno) { +        FOREACH_DIRENT_ALL(de, d, return -errno) {                  char *b;                  if (de->d_type != DT_DIR) @@ -197,7 +197,7 @@ int cg_kill(const char *controller, const char *path, int sig, bool sigcont, boo                                          ret = -errno;                          } else {                                  if (sigcont && sig != SIGKILL) -                                        kill(pid, SIGCONT); +                                        (void) kill(pid, SIGCONT);                                  if (ret == 0)                                          ret = 1; @@ -233,7 +233,7 @@ int cg_kill(const char *controller, const char *path, int sig, bool sigcont, boo  int cg_kill_recursive(const char *controller, const char *path, int sig, bool sigcont, bool ignore_self, bool rem, Set *s) {          _cleanup_set_free_ Set *allocated_set = NULL;          _cleanup_closedir_ DIR *d = NULL; -        int r, ret = 0; +        int r, ret;          char *fn;          assert(path); @@ -264,7 +264,7 @@ int cg_kill_recursive(const char *controller, const char *path, int sig, bool si                          return -ENOMEM;                  r = cg_kill_recursive(controller, p, sig, sigcont, ignore_self, rem, s); -                if (ret >= 0 && r != 0) +                if (r != 0 && ret >= 0)                          ret = r;          } @@ -321,6 +321,14 @@ int cg_migrate(const char *cfrom, const char *pfrom, const char *cto, const char                          if (set_get(s, LONG_TO_PTR(pid)) == LONG_TO_PTR(pid))                                  continue; +                        /* Ignore kernel threads. Since they can only +                         * exist in the root cgroup, we only check for +                         * them there. */ +                        if (cfrom && +                            (isempty(pfrom) || path_equal(pfrom, "/")) && +                            is_kernel_thread(pid) > 0) +                                continue; +                          r = cg_attach(cto, pto, pid);                          if (r < 0) {                                  if (ret >= 0 && r != -ESRCH) @@ -382,12 +390,8 @@ int cg_migrate_recursive(                  p = strjoin(pfrom, "/", fn, NULL);                  free(fn); -                if (!p) { -                        if (ret >= 0) -                                return -ENOMEM; - -                        return ret; -                } +                if (!p) +                        return -ENOMEM;                  r = cg_migrate_recursive(cfrom, p, cto, pto, ignore_self, rem);                  if (r != 0 && ret >= 0) @@ -428,114 +432,180 @@ int cg_migrate_recursive_fallback(                  /* This didn't work? Then let's try all prefixes of the destination */                  PATH_FOREACH_PREFIX(prefix, pto) { -                        r = cg_migrate_recursive(cfrom, pfrom, cto, prefix, ignore_self, rem); -                        if (r >= 0) -                                break; +                        int q; + +                        q = cg_migrate_recursive(cfrom, pfrom, cto, prefix, ignore_self, rem); +                        if (q >= 0) +                                return q;                  }          } -        return 0; +        return r;  } -static const char *normalize_controller(const char *controller) { +static const char *controller_to_dirname(const char *controller) { +        const char *e;          assert(controller); -        if (startswith(controller, "name=")) -                return controller + 5; -        else -                return controller; +        /* Converts a controller name to the directory name below +         * /sys/fs/cgroup/ we want to mount it to. Effectively, this +         * just cuts off the name= prefixed used for named +         * hierarchies, if it is specified. */ + +        e = startswith(controller, "name="); +        if (e) +                return e; + +        return controller;  } -static int join_path(const char *controller, const char *path, const char *suffix, char **fs) { +static int join_path_legacy(const char *controller_dn, const char *path, const char *suffix, char **fs) {          char *t = NULL; -        if (!isempty(controller)) { -                if (!isempty(path) && !isempty(suffix)) -                        t = strjoin("/sys/fs/cgroup/", controller, "/", path, "/", suffix, NULL); -                else if (!isempty(path)) -                        t = strjoin("/sys/fs/cgroup/", controller, "/", path, NULL); -                else if (!isempty(suffix)) -                        t = strjoin("/sys/fs/cgroup/", controller, "/", suffix, NULL); -                else -                        t = strappend("/sys/fs/cgroup/", controller); -        } else { -                if (!isempty(path) && !isempty(suffix)) -                        t = strjoin(path, "/", suffix, NULL); -                else if (!isempty(path)) -                        t = strdup(path); -                else -                        return -EINVAL; -        } +        assert(fs); +        assert(controller_dn); + +        if (isempty(path) && isempty(suffix)) +                t = strappend("/sys/fs/cgroup/", controller_dn); +        else if (isempty(path)) +                t = strjoin("/sys/fs/cgroup/", controller_dn, "/", suffix, NULL); +        else if (isempty(suffix)) +                t = strjoin("/sys/fs/cgroup/", controller_dn, "/", path, NULL); +        else +                t = strjoin("/sys/fs/cgroup/", controller_dn, "/", path, "/", suffix, NULL); +        if (!t) +                return -ENOMEM; + +        *fs = t; +        return 0; +} + +static int join_path_unified(const char *path, const char *suffix, char **fs) { +        char *t; + +        assert(fs); +        if (isempty(path) && isempty(suffix)) +                t = strdup("/sys/fs/cgroup"); +        else if (isempty(path)) +                t = strappend("/sys/fs/cgroup/", suffix); +        else if (isempty(suffix)) +                t = strappend("/sys/fs/cgroup/", path); +        else +                t = strjoin("/sys/fs/cgroup/", path, "/", suffix, NULL);          if (!t)                  return -ENOMEM; -        *fs = path_kill_slashes(t); +        *fs = t;          return 0;  }  int cg_get_path(const char *controller, const char *path, const char *suffix, char **fs) { -        const char *p; -        static thread_local bool good = false; +        int unified, r;          assert(fs); -        if (controller && !cg_controller_is_valid(controller)) +        if (!controller) { +                char *t; + +                /* If no controller is specified, we assume only the +                 * path below the controller matters */ + +                if (!path && !suffix) +                        return -EINVAL; + +                if (isempty(suffix)) +                        t = strdup(path); +                else if (isempty(path)) +                        t = strdup(suffix); +                else +                        t = strjoin(path, "/", suffix, NULL); +                if (!t) +                        return -ENOMEM; + +                *fs = path_kill_slashes(t); +                return 0; +        } + +        if (!cg_controller_is_valid(controller))                  return -EINVAL; -        if (_unlikely_(!good)) { -                int r; +        unified = cg_unified(); +        if (unified < 0) +                return unified; -                r = path_is_mount_point("/sys/fs/cgroup", 0); -                if (r < 0) -                        return r; -                if (r == 0) -                        return -ENOENT; +        if (unified > 0) +                r = join_path_unified(path, suffix, fs); +        else { +                const char *dn; -                /* Cache this to save a few stat()s */ -                good = true; +                if (controller) +                        dn = controller_to_dirname(controller); +                else +                        dn = NULL; + +                r = join_path_legacy(dn, path, suffix, fs);          } -        p = controller ? normalize_controller(controller) : NULL; +        if (r < 0) +                return r; -        return join_path(p, path, suffix, fs); +        path_kill_slashes(*fs); +        return 0;  } -static int check_hierarchy(const char *p) { -        const char *cc; +static int controller_is_accessible(const char *controller) { +        int unified; -        assert(p); +        assert(controller); -        if (!filename_is_valid(p)) -                return 0; +        /* Checks whether a specific controller is accessible, +         * i.e. its hierarchy mounted. In the unified hierarchy all +         * controllers are considered accessible, except for the named +         * hierarchies */ -        /* Check if this controller actually really exists */ -        cc = strjoina("/sys/fs/cgroup/", p); -        if (laccess(cc, F_OK) < 0) -                return -errno; +        if (!cg_controller_is_valid(controller)) +                return -EINVAL; + +        unified = cg_unified(); +        if (unified < 0) +                return unified; +        if (unified > 0) { +                /* We don't support named hierarchies if we are using +                 * the unified hierarchy. */ + +                if (streq(controller, SYSTEMD_CGROUP_CONTROLLER)) +                        return 0; + +                if (startswith(controller, "name=")) +                        return -EOPNOTSUPP; + +        } else { +                const char *cc, *dn; + +                dn = controller_to_dirname(controller); +                cc = strjoina("/sys/fs/cgroup/", dn); + +                if (laccess(cc, F_OK) < 0) +                        return -errno; +        }          return 0;  }  int cg_get_path_and_check(const char *controller, const char *path, const char *suffix, char **fs) { -        const char *p;          int r; +        assert(controller);          assert(fs); -        if (!cg_controller_is_valid(controller)) -                return -EINVAL; - -        /* Normalize the controller syntax */ -        p = normalize_controller(controller); - -        /* Check if this controller actually really exists */ -        r = check_hierarchy(p); +        /* Check if the specified controller is actually accessible */ +        r = controller_is_accessible(controller);          if (r < 0)                  return r; -        return join_path(p, path, suffix, fs); +        return cg_get_path(controller, path, suffix, fs);  }  static int trim_cb(const char *path, const struct stat *sb, int typeflag, struct FTW *ftwbuf) { @@ -549,7 +619,7 @@ static int trim_cb(const char *path, const struct stat *sb, int typeflag, struct          if (ftwbuf->level < 1)                  return 0; -        rmdir(path); +        (void) rmdir(path);          return 0;  } @@ -564,8 +634,14 @@ int cg_trim(const char *controller, const char *path, bool delete_root) {                  return r;          errno = 0; -        if (nftw(fs, trim_cb, 64, FTW_DEPTH|FTW_MOUNT|FTW_PHYS) != 0) -                r = errno ? -errno : -EIO; +        if (nftw(fs, trim_cb, 64, FTW_DEPTH|FTW_MOUNT|FTW_PHYS) != 0) { +                if (errno == ENOENT) +                        r = 0; +                else if (errno != 0) +                        r = -errno; +                else +                        r = -EIO; +        }          if (delete_root) {                  if (rmdir(fs) < 0 && errno != ENOENT) @@ -575,20 +651,6 @@ int cg_trim(const char *controller, const char *path, bool delete_root) {          return r;  } -int cg_delete(const char *controller, const char *path) { -        _cleanup_free_ char *parent = NULL; -        int r; - -        assert(path); - -        r = path_get_parent(path, &parent); -        if (r < 0) -                return r; - -        r = cg_migrate_recursive(controller, path, controller, parent, false, true); -        return r == -ENOENT ? 0 : r; -} -  int cg_create(const char *controller, const char *path) {          _cleanup_free_ char *fs = NULL;          int r; @@ -664,13 +726,15 @@ int cg_attach_fallback(const char *controller, const char *path, pid_t pid) {                   * the destination */                  PATH_FOREACH_PREFIX(prefix, path) { -                        r = cg_attach(controller, prefix, pid); -                        if (r >= 0) -                                break; +                        int q; + +                        q = cg_attach(controller, prefix, pid); +                        if (q >= 0) +                                return q;                  }          } -        return 0; +        return r;  }  int cg_set_group_access( @@ -683,7 +747,8 @@ int cg_set_group_access(          _cleanup_free_ char *fs = NULL;          int r; -        assert(path); +        if (mode == MODE_INVALID && uid == UID_INVALID && gid == GID_INVALID) +                return 0;          if (mode != MODE_INVALID)                  mode &= 0777; @@ -703,7 +768,7 @@ int cg_set_task_access(                  gid_t gid) {          _cleanup_free_ char *fs = NULL, *procs = NULL; -        int r; +        int r, unified;          assert(path); @@ -721,77 +786,88 @@ int cg_set_task_access(          if (r < 0)                  return r; +        unified = cg_unified(); +        if (unified < 0) +                return unified; +        if (unified) +                return 0; +          /* Compatibility, Always keep values for "tasks" in sync with           * "cgroup.procs" */ -        r = cg_get_path(controller, path, "tasks", &procs); -        if (r < 0) -                return r; +        if (cg_get_path(controller, path, "tasks", &procs) >= 0) +                (void) chmod_and_chown(procs, mode, uid, gid); -        return chmod_and_chown(procs, mode, uid, gid); +        return 0;  }  int cg_pid_get_path(const char *controller, pid_t pid, char **path) {          _cleanup_fclose_ FILE *f = NULL;          char line[LINE_MAX];          const char *fs; -        size_t cs; +        size_t cs = 0; +        int unified;          assert(path);          assert(pid >= 0); -        if (controller) { -                if (!cg_controller_is_valid(controller)) -                        return -EINVAL; +        unified = cg_unified(); +        if (unified < 0) +                return unified; +        if (unified == 0) { +                if (controller) { +                        if (!cg_controller_is_valid(controller)) +                                return -EINVAL; +                } else +                        controller = SYSTEMD_CGROUP_CONTROLLER; -                controller = normalize_controller(controller); -        } else -                controller = SYSTEMD_CGROUP_CONTROLLER; +                cs = strlen(controller); +        }          fs = procfs_file_alloca(pid, "cgroup"); -          f = fopen(fs, "re");          if (!f)                  return errno == ENOENT ? -ESRCH : -errno; -        cs = strlen(controller); -          FOREACH_LINE(line, f, return -errno) { -                char *l, *p, *e; -                size_t k; -                const char *word, *state; -                bool found = false; +                char *e, *p;                  truncate_nl(line); -                l = strchr(line, ':'); -                if (!l) -                        continue; - -                l++; -                e = strchr(l, ':'); -                if (!e) -                        continue; +                if (unified) { +                        e = startswith(line, "0:"); +                        if (!e) +                                continue; -                *e = 0; +                        e = strchr(e, ':'); +                        if (!e) +                                continue; +                } else { +                        char *l; +                        size_t k; +                        const char *word, *state; +                        bool found = false; + +                        l = strchr(line, ':'); +                        if (!l) +                                continue; -                FOREACH_WORD_SEPARATOR(word, k, l, ",", state) { +                        l++; +                        e = strchr(l, ':'); +                        if (!e) +                                continue; -                        if (k == cs && memcmp(word, controller, cs) == 0) { -                                found = true; -                                break; +                        *e = 0; +                        FOREACH_WORD_SEPARATOR(word, k, l, ",", state) { +                                if (k == cs && memcmp(word, controller, cs) == 0) { +                                        found = true; +                                        break; +                                }                          } -                        if (k == 5 + cs && -                            memcmp(word, "name=", 5) == 0 && -                            memcmp(word+5, controller, cs) == 0) { -                                found = true; -                                break; -                        } +                        if (!found) +                                continue;                  } -                if (!found) -                        continue; -                  p = strdup(e + 1);                  if (!p)                          return -ENOMEM; @@ -805,11 +881,17 @@ int cg_pid_get_path(const char *controller, pid_t pid, char **path) {  int cg_install_release_agent(const char *controller, const char *agent) {          _cleanup_free_ char *fs = NULL, *contents = NULL; -        char *sc; -        int r; +        const char *sc; +        int r, unified;          assert(agent); +        unified = cg_unified(); +        if (unified < 0) +                return unified; +        if (unified) /* doesn't apply to unified hierarchy */ +                return -EOPNOTSUPP; +          r = cg_get_path(controller, NULL, "release_agent", &fs);          if (r < 0)                  return r; @@ -819,7 +901,7 @@ int cg_install_release_agent(const char *controller, const char *agent) {                  return r;          sc = strstrip(contents); -        if (sc[0] == 0) { +        if (isempty(sc)) {                  r = write_string_file(fs, agent, 0);                  if (r < 0)                          return r; @@ -853,7 +935,13 @@ int cg_install_release_agent(const char *controller, const char *agent) {  int cg_uninstall_release_agent(const char *controller) {          _cleanup_free_ char *fs = NULL; -        int r; +        int r, unified; + +        unified = cg_unified(); +        if (unified < 0) +                return unified; +        if (unified) /* Doesn't apply to unified hierarchy */ +                return -EOPNOTSUPP;          r = cg_get_path(controller, NULL, "notify_on_release", &fs);          if (r < 0) @@ -876,73 +964,90 @@ int cg_uninstall_release_agent(const char *controller) {          return 0;  } -int cg_is_empty(const char *controller, const char *path, bool ignore_self) { +int cg_is_empty(const char *controller, const char *path) {          _cleanup_fclose_ FILE *f = NULL; -        pid_t pid = 0, self_pid; -        bool found = false; +        pid_t pid;          int r;          assert(path);          r = cg_enumerate_processes(controller, path, &f); +        if (r == -ENOENT) +                return 1;          if (r < 0) -                return r == -ENOENT ? 1 : r; - -        self_pid = getpid(); - -        while ((r = cg_read_pid(f, &pid)) > 0) { - -                if (ignore_self && pid == self_pid) -                        continue; - -                found = true; -                break; -        } +                return r; +        r = cg_read_pid(f, &pid);          if (r < 0)                  return r; -        return !found; +        return r == 0;  } -int cg_is_empty_recursive(const char *controller, const char *path, bool ignore_self) { -        _cleanup_closedir_ DIR *d = NULL; -        char *fn; -        int r; +int cg_is_empty_recursive(const char *controller, const char *path) { +        int unified, r;          assert(path); -        r = cg_is_empty(controller, path, ignore_self); -        if (r <= 0) -                return r; +        /* The root cgroup is always populated */ +        if (controller && (isempty(path) || path_equal(path, "/"))) +                return false; -        r = cg_enumerate_subgroups(controller, path, &d); -        if (r < 0) -                return r == -ENOENT ? 1 : r; +        unified = cg_unified(); +        if (unified < 0) +                return unified; -        while ((r = cg_read_subgroup(d, &fn)) > 0) { -                _cleanup_free_ char *p = NULL; +        if (unified > 0) { +                _cleanup_free_ char *populated = NULL, *t = NULL; -                p = strjoin(path, "/", fn, NULL); -                free(fn); -                if (!p) -                        return -ENOMEM; +                /* On the unified hierarchy we can check empty state +                 * via the "cgroup.populated" attribute. */ + +                r = cg_get_path(controller, path, "cgroup.populated", &populated); +                if (r < 0) +                        return r; -                r = cg_is_empty_recursive(controller, p, ignore_self); +                r = read_one_line_file(populated, &t); +                if (r < 0) +                        return r; + +                return streq(t, "0"); +        } else { +                _cleanup_closedir_ DIR *d = NULL; +                char *fn; + +                r = cg_is_empty(controller, path);                  if (r <= 0)                          return r; -        } -        if (r < 0) -                return r; +                r = cg_enumerate_subgroups(controller, path, &d); +                if (r == -ENOENT) +                        return 1; +                if (r < 0) +                        return r; -        return 1; +                while ((r = cg_read_subgroup(d, &fn)) > 0) { +                        _cleanup_free_ char *p = NULL; + +                        p = strjoin(path, "/", fn, NULL); +                        free(fn); +                        if (!p) +                                return -ENOMEM; + +                        r = cg_is_empty_recursive(controller, p); +                        if (r <= 0) +                                return r; +                } +                if (r < 0) +                        return r; + +                return true; +        }  }  int cg_split_spec(const char *spec, char **controller, char **path) { -        const char *e;          char *t = NULL, *u = NULL; -        _cleanup_free_ char *v = NULL; +        const char *e;          assert(spec); @@ -970,7 +1075,7 @@ int cg_split_spec(const char *spec, char **controller, char **path) {                          return -EINVAL;                  if (controller) { -                        t = strdup(normalize_controller(spec)); +                        t = strdup(spec);                          if (!t)                                  return -ENOMEM; @@ -983,10 +1088,7 @@ int cg_split_spec(const char *spec, char **controller, char **path) {                  return 0;          } -        v = strndup(spec, e-spec); -        if (!v) -                return -ENOMEM; -        t = strdup(normalize_controller(v)); +        t = strndup(spec, e-spec);          if (!t)                  return -ENOMEM;          if (!cg_controller_is_valid(t)) { @@ -994,13 +1096,9 @@ int cg_split_spec(const char *spec, char **controller, char **path) {                  return -EINVAL;          } -        if (streq(e+1, "")) { -                u = strdup("/"); -                if (!u) { -                        free(t); -                        return -ENOMEM; -                } -        } else { +        if (isempty(e+1)) +                u = NULL; +        else {                  u = strdup(e+1);                  if (!u) {                          free(t); @@ -1054,7 +1152,7 @@ int cg_mangle_path(const char *path, char **result) {          if (r < 0)                  return r; -        return cg_get_path(c ? c : SYSTEMD_CGROUP_CONTROLLER, p ? p : "/", NULL, result); +        return cg_get_path(c ?: SYSTEMD_CGROUP_CONTROLLER, p ?: "/", NULL, result);  }  int cg_get_root_path(char **path) { @@ -1067,7 +1165,11 @@ int cg_get_root_path(char **path) {          if (r < 0)                  return r; -        e = endswith(p, "/" SPECIAL_SYSTEM_SLICE); +        e = endswith(p, "/" SPECIAL_INIT_SCOPE); +        if (!e) +                e = endswith(p, "/" SPECIAL_SYSTEM_SLICE); /* legacy */ +        if (!e) +                e = endswith(p, "/system"); /* even more legacy */          if (e)                  *e = 0; @@ -1095,7 +1197,7 @@ int cg_shift_path(const char *cgroup, const char *root, const char **shifted) {          }          p = path_startswith(cgroup, root); -        if (p) +        if (p && p > cgroup)                  *shifted = p - 1;          else                  *shifted = cgroup; @@ -1359,17 +1461,15 @@ int cg_pid_get_user_unit(pid_t pid, char **unit) {  }  int cg_path_get_machine_name(const char *path, char **machine) { -        _cleanup_free_ char *u = NULL, *sl = NULL; +        _cleanup_free_ char *u = NULL; +        const char *sl;          int r;          r = cg_path_get_unit(path, &u);          if (r < 0)                  return r; -        sl = strjoin("/run/systemd/machines/unit:", u, NULL); -        if (!sl) -                return -ENOMEM; - +        sl = strjoina("/run/systemd/machines/unit:", u);          return readlink_malloc(sl, machine);  } @@ -1562,31 +1662,38 @@ char *cg_escape(const char *p) {              p[0] == '.' ||              streq(p, "notify_on_release") ||              streq(p, "release_agent") || -            streq(p, "tasks")) +            streq(p, "tasks") || +            startswith(p, "cgroup."))                  need_prefix = true;          else {                  const char *dot;                  dot = strrchr(p, '.');                  if (dot) { +                        CGroupController c; +                        size_t l = dot - p; -                        if (dot - p == 6 && memcmp(p, "cgroup", 6) == 0) -                                need_prefix = true; -                        else { -                                char *n; +                        for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) { +                                const char *n; + +                                n = cgroup_controller_to_string(c); -                                n = strndupa(p, dot - p); +                                if (l != strlen(n)) +                                        continue; -                                if (check_hierarchy(n) >= 0) -                                        need_prefix = true; +                                if (memcmp(p, n, l) != 0) +                                        continue; + +                                need_prefix = true; +                                break;                          }                  }          }          if (need_prefix)                  return strappend("_", p); -        else -                return strdup(p); + +        return strdup(p);  }  char *cg_unescape(const char *p) { @@ -1719,17 +1826,9 @@ int cg_get_attribute(const char *controller, const char *path, const char *attri          return read_one_line_file(p, ret);  } -static const char mask_names[] = -        "cpu\0" -        "cpuacct\0" -        "blkio\0" -        "memory\0" -        "devices\0"; - -int cg_create_everywhere(CGroupControllerMask supported, CGroupControllerMask mask, const char *path) { -        CGroupControllerMask bit = 1; -        const char *n; -        int r; +int cg_create_everywhere(CGroupMask supported, CGroupMask mask, const char *path) { +        CGroupController c; +        int r, unified;          /* This one will create a cgroup in our private tree, but also           * duplicate it in the trees specified in mask, and remove it @@ -1740,49 +1839,63 @@ int cg_create_everywhere(CGroupControllerMask supported, CGroupControllerMask ma          if (r < 0)                  return r; -        /* Then, do the same in the other hierarchies */ -        NULSTR_FOREACH(n, mask_names) { +        /* If we are in the unified hierarchy, we are done now */ +        unified = cg_unified(); +        if (unified < 0) +                return unified; +        if (unified > 0) +                return 0; + +        /* Otherwise, do the same in the other hierarchies */ +        for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) { +                CGroupMask bit = CGROUP_CONTROLLER_TO_MASK(c); +                const char *n; + +                n = cgroup_controller_to_string(c); +                  if (mask & bit) -                        cg_create(n, path); +                        (void) cg_create(n, path);                  else if (supported & bit) -                        cg_trim(n, path, true); - -                bit <<= 1; +                        (void) cg_trim(n, path, true);          }          return 0;  } -int cg_attach_everywhere(CGroupControllerMask supported, const char *path, pid_t pid, cg_migrate_callback_t path_callback, void *userdata) { -        CGroupControllerMask bit = 1; -        const char *n; -        int r; +int cg_attach_everywhere(CGroupMask supported, const char *path, pid_t pid, cg_migrate_callback_t path_callback, void *userdata) { +        CGroupController c; +        int r, unified;          r = cg_attach(SYSTEMD_CGROUP_CONTROLLER, path, pid);          if (r < 0)                  return r; -        NULSTR_FOREACH(n, mask_names) { +        unified = cg_unified(); +        if (unified < 0) +                return unified; +        if (unified > 0) +                return 0; -                if (supported & bit) { -                        const char *p = NULL; +        for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) { +                CGroupMask bit = CGROUP_CONTROLLER_TO_MASK(c); +                const char *p = NULL; -                        if (path_callback) -                                p = path_callback(bit, userdata); +                if (!(supported & bit)) +                        continue; -                        if (!p) -                                p = path; +                if (path_callback) +                        p = path_callback(bit, userdata); -                        cg_attach_fallback(n, p, pid); -                } +                if (!p) +                        p = path; -                bit <<= 1; +                (void) cg_attach_fallback(cgroup_controller_to_string(c), p, pid);          }          return 0;  } -int cg_attach_many_everywhere(CGroupControllerMask supported, const char *path, Set* pids, cg_migrate_callback_t path_callback, void *userdata) { +int cg_attach_many_everywhere(CGroupMask supported, const char *path, Set* pids, cg_migrate_callback_t path_callback, void *userdata) {          Iterator i;          void *pidp;          int r = 0; @@ -1792,17 +1905,16 @@ int cg_attach_many_everywhere(CGroupControllerMask supported, const char *path,                  int q;                  q = cg_attach_everywhere(supported, path, pid, path_callback, userdata); -                if (q < 0) +                if (q < 0 && r >= 0)                          r = q;          }          return r;  } -int cg_migrate_everywhere(CGroupControllerMask supported, const char *from, const char *to, cg_migrate_callback_t to_callback, void *userdata) { -        CGroupControllerMask bit = 1; -        const char *n; -        int r; +int cg_migrate_everywhere(CGroupMask supported, const char *from, const char *to, cg_migrate_callback_t to_callback, void *userdata) { +         CGroupController c; +        int r, unified;          if (!path_equal(from, to))  {                  r = cg_migrate_recursive(SYSTEMD_CGROUP_CONTROLLER, from, SYSTEMD_CGROUP_CONTROLLER, to, false, true); @@ -1810,56 +1922,119 @@ int cg_migrate_everywhere(CGroupControllerMask supported, const char *from, cons                          return r;          } -        NULSTR_FOREACH(n, mask_names) { -                if (supported & bit) { -                        const char *p = NULL; +        unified = cg_unified(); +        if (unified < 0) +                return unified; +        if (unified > 0) +                return r; -                        if (to_callback) -                                p = to_callback(bit, userdata); +        for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) { +                CGroupMask bit = CGROUP_CONTROLLER_TO_MASK(c); +                const char *p = NULL; -                        if (!p) -                                p = to; +                if (!(supported & bit)) +                        continue; -                        cg_migrate_recursive_fallback(SYSTEMD_CGROUP_CONTROLLER, to, n, p, false, false); -                } +                if (to_callback) +                        p = to_callback(bit, userdata); -                bit <<= 1; +                if (!p) +                        p = to; + +                (void) cg_migrate_recursive_fallback(SYSTEMD_CGROUP_CONTROLLER, to, cgroup_controller_to_string(c), p, false, false);          }          return 0;  } -int cg_trim_everywhere(CGroupControllerMask supported, const char *path, bool delete_root) { -        CGroupControllerMask bit = 1; -        const char *n; -        int r; +int cg_trim_everywhere(CGroupMask supported, const char *path, bool delete_root) { +        CGroupController c; +        int r, unified;          r = cg_trim(SYSTEMD_CGROUP_CONTROLLER, path, delete_root);          if (r < 0)                  return r; -        NULSTR_FOREACH(n, mask_names) { -                if (supported & bit) -                        cg_trim(n, path, delete_root); +        unified = cg_unified(); +        if (unified < 0) +                return unified; +        if (unified > 0) +                return r; + +        for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) { +                CGroupMask bit = CGROUP_CONTROLLER_TO_MASK(c); + +                if (!(supported & bit)) +                        continue; -                bit <<= 1; +                (void) cg_trim(cgroup_controller_to_string(c), path, delete_root);          }          return 0;  } -CGroupControllerMask cg_mask_supported(void) { -        CGroupControllerMask bit = 1, mask = 0; -        const char *n; +int cg_mask_supported(CGroupMask *ret) { +        CGroupMask mask = 0; +        int r, unified; + +        /* Determines the mask of supported cgroup controllers. Only +         * includes controllers we can make sense of and that are +         * actually accessible. */ + +        unified = cg_unified(); +        if (unified < 0) +                return unified; +        if (unified > 0) { +                _cleanup_free_ char *controllers = NULL; +                const char *c; + +                /* In the unified hierarchy we can read the supported +                 * and accessible controllers from a the top-level +                 * cgroup attribute */ + +                r = read_one_line_file("/sys/fs/cgroup/cgroup.controllers", &controllers); +                if (r < 0) +                        return r; + +                c = controllers; +                for (;;) { +                        _cleanup_free_ char *n = NULL; +                        CGroupController v; -        NULSTR_FOREACH(n, mask_names) { -                if (check_hierarchy(n) >= 0) -                        mask |= bit; +                        r = extract_first_word(&c, &n, NULL, 0); +                        if (r < 0) +                                return r; +                        if (r == 0) +                                break; + +                        v = cgroup_controller_from_string(n); +                        if (v < 0) +                                continue; + +                        mask |= CGROUP_CONTROLLER_TO_MASK(v); +                } + +                /* Currently, we only support the memory controller in +                 * the unified hierarchy, mask everything else off. */ +                mask &= CGROUP_MASK_MEMORY; + +        } else { +                CGroupController c; -                bit <<= 1; +                /* In the legacy hierarchy, we check whether which +                 * hierarchies are mounted. */ + +                for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) { +                        const char *n; + +                        n = cgroup_controller_to_string(c); +                        if (controller_is_accessible(n) >= 0) +                                mask |= CGROUP_CONTROLLER_TO_MASK(c); +                }          } -        return mask; +        *ret = mask; +        return 0;  }  int cg_kernel_controllers(Set *controllers) { @@ -1869,6 +2044,11 @@ int cg_kernel_controllers(Set *controllers) {          assert(controllers); +        /* Determines the full list of kernel-known controllers. Might +         * include controllers we don't actually support, arbitrary +         * named hierarchies and controllers that aren't currently +         * accessible (because not mounted). */ +          f = fopen("/proc/cgroups", "re");          if (!f) {                  if (errno == ENOENT) @@ -1889,7 +2069,7 @@ int cg_kernel_controllers(Set *controllers) {                          if (feof(f))                                  break; -                        if (ferror(f) && errno) +                        if (ferror(f) && errno != 0)                                  return -errno;                          return -EBADMSG; @@ -1900,7 +2080,7 @@ int cg_kernel_controllers(Set *controllers) {                          continue;                  } -                if (!filename_is_valid(controller)) { +                if (!cg_controller_is_valid(controller)) {                          free(controller);                          return -EBADMSG;                  } @@ -1912,3 +2092,122 @@ int cg_kernel_controllers(Set *controllers) {          return 0;  } + +static thread_local int unified_cache = -1; + +int cg_unified(void) { +        struct statfs fs; + +        /* Checks if we support the unified hierarchy. Returns an +         * error when the cgroup hierarchies aren't mounted yet or we +         * have any other trouble determining if the unified hierarchy +         * is supported. */ + +        if (unified_cache >= 0) +                return unified_cache; + +        if (statfs("/sys/fs/cgroup/", &fs) < 0) +                return -errno; + +        if (F_TYPE_EQUAL(fs.f_type, CGROUP_SUPER_MAGIC)) +                unified_cache = true; +        else if (F_TYPE_EQUAL(fs.f_type, TMPFS_MAGIC)) +                unified_cache = false; +        else +                return -ENOEXEC; + +        return unified_cache; +} + +void cg_unified_flush(void) { +        unified_cache = -1; +} + +int cg_enable_everywhere(CGroupMask supported, CGroupMask mask, const char *p) { +        _cleanup_free_ char *fs = NULL; +        CGroupController c; +        int r, unified; + +        assert(p); + +        if (supported == 0) +                return 0; + +        unified = cg_unified(); +        if (unified < 0) +                return unified; +        if (!unified) /* on the legacy hiearchy there's no joining of controllers defined */ +                return 0; + +        r = cg_get_path(SYSTEMD_CGROUP_CONTROLLER, p, "cgroup.subtree_control", &fs); +        if (r < 0) +                return r; + +        for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) { +                CGroupMask bit = CGROUP_CONTROLLER_TO_MASK(c); +                const char *n; + +                if (!(supported & bit)) +                        continue; + +                n = cgroup_controller_to_string(c); +                { +                        char s[1 + strlen(n) + 1]; + +                        s[0] = mask & bit ? '+' : '-'; +                        strcpy(s + 1, n); + +                        r = write_string_file(fs, s, 0); +                        if (r < 0) +                                log_warning_errno(r, "Failed to enable controller %s for %s (%s): %m", n, p, fs); +                } +        } + +        return 0; +} + +bool cg_is_unified_wanted(void) { +        static thread_local int wanted = -1; +        int r, unified; + +        /* If the hierarchy is already mounted, then follow whatever +         * was chosen for it. */ +        unified = cg_unified(); +        if (unified >= 0) +                return unified; + +        /* Otherwise, let's see what the kernel command line has to +         * say. Since checking that is expensive, let's cache the +         * result. */ +        if (wanted >= 0) +                return wanted; + +        r = get_proc_cmdline_key("systemd.unified_cgroup_hierarchy", NULL); +        if (r > 0) +                return (wanted = true); +        else { +                _cleanup_free_ char *value = NULL; + +                r = get_proc_cmdline_key("systemd.unified_cgroup_hierarchy=", &value); +                if (r < 0) +                        return false; +                if (r == 0) +                        return (wanted = false); + +                return (wanted = parse_boolean(value) > 0); +        } +} + +bool cg_is_legacy_wanted(void) { +        return !cg_is_unified_wanted(); +} + +static const char *cgroup_controller_table[_CGROUP_CONTROLLER_MAX] = { +        [CGROUP_CONTROLLER_CPU] = "cpu", +        [CGROUP_CONTROLLER_CPUACCT] = "cpuacct", +        [CGROUP_CONTROLLER_BLKIO] = "blkio", +        [CGROUP_CONTROLLER_MEMORY] = "memory", +        [CGROUP_CONTROLLER_DEVICE] = "device", +}; + +DEFINE_STRING_TABLE_LOOKUP(cgroup_controller, CGroupController); diff --git a/src/basic/cgroup-util.h b/src/basic/cgroup-util.h index fd72e9e5c5..6fd6d80590 100644 --- a/src/basic/cgroup-util.h +++ b/src/basic/cgroup-util.h @@ -28,15 +28,28 @@  #include "set.h"  #include "def.h" +/* An enum of well known cgroup controllers */ +typedef enum CGroupController { +        CGROUP_CONTROLLER_CPU, +        CGROUP_CONTROLLER_CPUACCT, +        CGROUP_CONTROLLER_BLKIO, +        CGROUP_CONTROLLER_MEMORY, +        CGROUP_CONTROLLER_DEVICE, +        _CGROUP_CONTROLLER_MAX, +        _CGROUP_CONTROLLER_INVALID = -1, +} CGroupController; + +#define CGROUP_CONTROLLER_TO_MASK(c) (1 << (c)) +  /* A bit mask of well known cgroup controllers */ -typedef enum CGroupControllerMask { -        CGROUP_CPU = 1, -        CGROUP_CPUACCT = 2, -        CGROUP_BLKIO = 4, -        CGROUP_MEMORY = 8, -        CGROUP_DEVICE = 16, -        _CGROUP_CONTROLLER_MASK_ALL = 31 -} CGroupControllerMask; +typedef enum CGroupMask { +        CGROUP_MASK_CPU = CGROUP_CONTROLLER_TO_MASK(CGROUP_CONTROLLER_CPU), +        CGROUP_MASK_CPUACCT = CGROUP_CONTROLLER_TO_MASK(CGROUP_CONTROLLER_CPUACCT), +        CGROUP_MASK_BLKIO = CGROUP_CONTROLLER_TO_MASK(CGROUP_CONTROLLER_BLKIO), +        CGROUP_MASK_MEMORY = CGROUP_CONTROLLER_TO_MASK(CGROUP_CONTROLLER_MEMORY), +        CGROUP_MASK_DEVICE = CGROUP_CONTROLLER_TO_MASK(CGROUP_CONTROLLER_DEVICE), +        _CGROUP_MASK_ALL = CGROUP_CONTROLLER_TO_MASK(_CGROUP_CONTROLLER_MAX) - 1 +} CGroupMask;  /*   * General rules: @@ -77,7 +90,6 @@ int cg_pid_get_path(const char *controller, pid_t pid, char **path);  int cg_trim(const char *controller, const char *path, bool delete_root);  int cg_rmdir(const char *controller, const char *path); -int cg_delete(const char *controller, const char *path);  int cg_create(const char *controller, const char *path);  int cg_attach(const char *controller, const char *path, pid_t pid); @@ -93,8 +105,8 @@ int cg_set_task_access(const char *controller, const char *path, mode_t mode, ui  int cg_install_release_agent(const char *controller, const char *agent);  int cg_uninstall_release_agent(const char *controller); -int cg_is_empty(const char *controller, const char *path, bool ignore_self); -int cg_is_empty_recursive(const char *controller, const char *path, bool ignore_self); +int cg_is_empty(const char *controller, const char *path); +int cg_is_empty_recursive(const char *controller, const char *path);  int cg_get_root_path(char **path); @@ -126,14 +138,24 @@ bool cg_controller_is_valid(const char *p);  int cg_slice_to_path(const char *unit, char **ret); -typedef const char* (*cg_migrate_callback_t)(CGroupControllerMask mask, void *userdata); +typedef const char* (*cg_migrate_callback_t)(CGroupMask mask, void *userdata); -int cg_create_everywhere(CGroupControllerMask supported, CGroupControllerMask mask, const char *path); -int cg_attach_everywhere(CGroupControllerMask supported, const char *path, pid_t pid, cg_migrate_callback_t callback, void *userdata); -int cg_attach_many_everywhere(CGroupControllerMask supported, const char *path, Set* pids, cg_migrate_callback_t callback, void *userdata); -int cg_migrate_everywhere(CGroupControllerMask supported, const char *from, const char *to, cg_migrate_callback_t callback, void *userdata); -int cg_trim_everywhere(CGroupControllerMask supported, const char *path, bool delete_root); +int cg_create_everywhere(CGroupMask supported, CGroupMask mask, const char *path); +int cg_attach_everywhere(CGroupMask supported, const char *path, pid_t pid, cg_migrate_callback_t callback, void *userdata); +int cg_attach_many_everywhere(CGroupMask supported, const char *path, Set* pids, cg_migrate_callback_t callback, void *userdata); +int cg_migrate_everywhere(CGroupMask supported, const char *from, const char *to, cg_migrate_callback_t callback, void *userdata); +int cg_trim_everywhere(CGroupMask supported, const char *path, bool delete_root); +int cg_enable_everywhere(CGroupMask supported, CGroupMask mask, const char *p); -CGroupControllerMask cg_mask_supported(void); +int cg_mask_supported(CGroupMask *ret);  int cg_kernel_controllers(Set *controllers); + +int cg_unified(void); +void cg_unified_flush(void); + +bool cg_is_unified_wanted(void); +bool cg_is_legacy_wanted(void); + +const char* cgroup_controller_to_string(CGroupController c) _const_; +CGroupController cgroup_controller_from_string(const char *s) _pure_; diff --git a/src/basic/def.h b/src/basic/def.h index 5aaba1fe87..7c4161eb72 100644 --- a/src/basic/def.h +++ b/src/basic/def.h @@ -35,7 +35,7 @@   * the watchdog pings will keep the loop busy. */  #define DEFAULT_EXIT_USEC (30*USEC_PER_SEC) -#define SYSTEMD_CGROUP_CONTROLLER "systemd" +#define SYSTEMD_CGROUP_CONTROLLER "name=systemd"  #define SIGNALS_CRASH_HANDLER SIGSEGV,SIGILL,SIGFPE,SIGBUS,SIGQUIT,SIGABRT  #define SIGNALS_IGNORE SIGPIPE diff --git a/src/basic/missing.h b/src/basic/missing.h index 34ab0254dd..dc1f244d4c 100644 --- a/src/basic/missing.h +++ b/src/basic/missing.h @@ -492,6 +492,14 @@ struct btrfs_ioctl_quota_ctl_args {  #define BTRFS_SUPER_MAGIC 0x9123683E  #endif +#ifndef CGROUP_SUPER_MAGIC +#define CGROUP_SUPER_MAGIC 0x27e0eb +#endif + +#ifndef TMPFS_MAGIC +#define TMPFS_MAGIC 0x01021994 +#endif +  #ifndef MS_MOVE  #define MS_MOVE 8192  #endif diff --git a/src/basic/selinux-util.c b/src/basic/selinux-util.c index 7c58985cd2..a39a0f775a 100644 --- a/src/basic/selinux-util.c +++ b/src/basic/selinux-util.c @@ -199,11 +199,11 @@ int mac_selinux_get_create_label_from_exe(const char *exe, char **label) {          if (!mac_selinux_use())                  return -EOPNOTSUPP; -        r = getcon(&mycon); +        r = getcon_raw(&mycon);          if (r < 0)                  return -errno; -        r = getfilecon(exe, &fcon); +        r = getfilecon_raw(exe, &fcon);          if (r < 0)                  return -errno; @@ -225,7 +225,7 @@ int mac_selinux_get_our_label(char **label) {          if (!mac_selinux_use())                  return -EOPNOTSUPP; -        r = getcon(label); +        r = getcon_raw(label);          if (r < 0)                  return -errno;  #endif @@ -249,7 +249,7 @@ int mac_selinux_get_child_mls_label(int socket_fd, const char *exe, const char *          if (!mac_selinux_use())                  return -EOPNOTSUPP; -        r = getcon(&mycon); +        r = getcon_raw(&mycon);          if (r < 0)                  return -errno; @@ -260,7 +260,7 @@ int mac_selinux_get_child_mls_label(int socket_fd, const char *exe, const char *          if (!exec_label) {                  /* If there is no context set for next exec let's use context                     of target executable */ -                r = getfilecon(exe, &fcon); +                r = getfilecon_raw(exe, &fcon);                  if (r < 0)                          return -errno;          } diff --git a/src/basic/set.h b/src/basic/set.h index 51e40d3a6c..4554ef2d49 100644 --- a/src/basic/set.h +++ b/src/basic/set.h @@ -28,12 +28,14 @@ Set *internal_set_new(const struct hash_ops *hash_ops  HASHMAP_DEBUG_PARAMS);  #define set_new(ops) internal_set_new(ops  HASHMAP_DEBUG_SRC_ARGS) -static inline void set_free(Set *s) { +static inline Set *set_free(Set *s) {          internal_hashmap_free(HASHMAP_BASE(s)); +        return NULL;  } -static inline void set_free_free(Set *s) { +static inline Set *set_free_free(Set *s) {          internal_hashmap_free_free(HASHMAP_BASE(s)); +        return NULL;  }  /* no set_free_free_free */ diff --git a/src/basic/special.h b/src/basic/special.h index e51310eb6d..f30458f25a 100644 --- a/src/basic/special.h +++ b/src/basic/special.h @@ -115,3 +115,6 @@  #define SPECIAL_USER_SLICE "user.slice"  #define SPECIAL_MACHINE_SLICE "machine.slice"  #define SPECIAL_ROOT_SLICE "-.slice" + +/* The scope unit systemd itself lives in. */ +#define SPECIAL_INIT_SCOPE "init.scope" diff --git a/src/basic/terminal-util.c b/src/basic/terminal-util.c index cf55263bbf..c5ef5ab0d1 100644 --- a/src/basic/terminal-util.c +++ b/src/basic/terminal-util.c @@ -1074,3 +1074,22 @@ int get_ctty(pid_t pid, dev_t *_devnr, char **r) {          return 0;  } + +int ptsname_namespace(int pty, char **ret) { +        int no = -1, r; + +        /* Like ptsname(), but doesn't assume that the path is +         * accessible in the local namespace. */ + +        r = ioctl(pty, TIOCGPTN, &no); +        if (r < 0) +                return -errno; + +        if (no < 0) +                return -EIO; + +        if (asprintf(ret, "/dev/pts/%i", no) < 0) +                return -ENOMEM; + +        return 0; +} diff --git a/src/basic/terminal-util.h b/src/basic/terminal-util.h index 188714f228..b9a3809a6c 100644 --- a/src/basic/terminal-util.h +++ b/src/basic/terminal-util.h @@ -107,3 +107,5 @@ int get_ctty(pid_t, dev_t *_devnr, char **r);  int getttyname_malloc(int fd, char **r);  int getttyname_harder(int fd, char **r); + +int ptsname_namespace(int pty, char **ret); diff --git a/src/basic/time-util.c b/src/basic/time-util.c index afc6a6eb24..531931f6e1 100644 --- a/src/basic/time-util.c +++ b/src/basic/time-util.c @@ -1046,7 +1046,7 @@ clockid_t clock_boottime_or_monotonic(void) {          return clock;  } -int get_timezone(char **timezone) { +int get_timezone(char **tz) {          _cleanup_free_ char *t = NULL;          const char *e;          char *z; @@ -1069,6 +1069,6 @@ int get_timezone(char **timezone) {          if (!z)                  return -ENOMEM; -        *timezone = z; +        *tz = z;          return 0;  } diff --git a/src/basic/util.c b/src/basic/util.c index 737f2a221c..f01f5f237b 100644 --- a/src/basic/util.c +++ b/src/basic/util.c @@ -6095,6 +6095,9 @@ int openpt_in_namespace(pid_t pid, int flags) {                  if (master < 0)                          _exit(EXIT_FAILURE); +                if (unlockpt(master) < 0) +                        _exit(EXIT_FAILURE); +                  cmsg = CMSG_FIRSTHDR(&mh);                  cmsg->cmsg_level = SOL_SOCKET;                  cmsg->cmsg_type = SCM_RIGHTS; diff --git a/src/basic/util.h b/src/basic/util.h index 1484ef58e5..ff7a00e928 100644 --- a/src/basic/util.h +++ b/src/basic/util.h @@ -363,6 +363,9 @@ int fd_is_temporary_fs(int fd);  int pipe_eof(int fd); +DEFINE_TRIVIAL_CLEANUP_FUNC(cpu_set_t*, CPU_FREE); +#define _cleanup_cpu_free_ _cleanup_(CPU_FREEp) +  cpu_set_t* cpu_set_malloc(unsigned *ncpus);  #define xsprintf(buf, fmt, ...) assert_se((size_t) snprintf(buf, ELEMENTSOF(buf), fmt, __VA_ARGS__) < ELEMENTSOF(buf)) diff --git a/src/basic/virt.c b/src/basic/virt.c index a8d26716a1..4a4bebd528 100644 --- a/src/basic/virt.c +++ b/src/basic/virt.c @@ -156,7 +156,8 @@ static int detect_vm_dmi(const char **_id) {                  "VMW\0"                   "vmware\0"                  "innotek GmbH\0"          "oracle\0"                  "Xen\0"                   "xen\0" -                "Bochs\0"                 "bochs\0"; +                "Bochs\0"                 "bochs\0" +                "Parallels\0"             "parallels\0";          unsigned i;          for (i = 0; i < ELEMENTSOF(dmi_vendors); i++) { @@ -244,8 +245,9 @@ int detect_vm(const char **id) {          r = detect_vm_dmi(&_id);          /* kvm with and without Virtualbox */ +        /* Parallels exports KVMKVMKVM leaf */          if (streq_ptr(_id_cpuid, "kvm")) { -                if (r > 0 && streq(_id, "oracle")) +                if (r > 0 && (streq(_id, "oracle") || streq(_id, "parallels")))                          goto finish;                  _id = _id_cpuid; diff --git a/src/cgls/cgls.c b/src/cgls/cgls.c index a8d910d532..4fb642e7b3 100644 --- a/src/cgls/cgls.c +++ b/src/cgls/cgls.c @@ -225,7 +225,10 @@ int main(int argc, char *argv[]) {                                  } else                                          path = root; -                                printf("Controller %s; control group %s:\n", controller, path); +                                if (cg_unified() > 0) +                                        printf("Control group %s:\n", path); +                                else +                                        printf("Controller %s; control group %s:\n", controller, path);                                  fflush(stdout);                                  q = show_cgroup(controller, path, NULL, 0, arg_kernel_threads, output_flags); diff --git a/src/cgtop/cgtop.c b/src/cgtop/cgtop.c index 06a43d15e4..1c94bea31a 100644 --- a/src/cgtop/cgtop.c +++ b/src/cgtop/cgtop.c @@ -175,7 +175,7 @@ static int process(                  if (g->n_tasks > 0)                          g->n_tasks_valid = true; -        } else if (streq(controller, "cpuacct")) { +        } else if (streq(controller, "cpuacct") && cg_unified() <= 0) {                  _cleanup_free_ char *p = NULL, *v = NULL;                  uint64_t new_usage;                  nsec_t timestamp; @@ -217,7 +217,10 @@ static int process(          } else if (streq(controller, "memory")) {                  _cleanup_free_ char *p = NULL, *v = NULL; -                r = cg_get_path(controller, path, "memory.usage_in_bytes", &p); +                if (cg_unified() <= 0) +                        r = cg_get_path(controller, path, "memory.usage_in_bytes", &p); +                else +                        r = cg_get_path(controller, path, "memory.current", &p);                  if (r < 0)                          return r; @@ -234,7 +237,7 @@ static int process(                  if (g->memory > 0)                          g->memory_valid = true; -        } else if (streq(controller, "blkio")) { +        } else if (streq(controller, "blkio") && cg_unified() <= 0) {                  _cleanup_fclose_ FILE *f = NULL;                  _cleanup_free_ char *p = NULL;                  uint64_t wr = 0, rd = 0; @@ -560,15 +563,17 @@ static void display(Hashmap *a) {                  path_columns = maxtpath;          for (j = 0; j < n; j++) { -                _cleanup_free_ char *p = NULL; +                _cleanup_free_ char *ellipsized = NULL; +                const char *path;                  if (on_tty() && j + 5 > rows)                          break;                  g = array[j]; -                p = ellipsize(g->path, path_columns, 33); -                printf("%-*s", path_columns, p ?: g->path); +                path = isempty(g->path) ? "/" : g->path; +                ellipsized = ellipsize(path, path_columns, 33); +                printf("%-*s", path_columns, ellipsized ?: path);                  if (g->n_tasks_valid)                          printf(" %7u", g->n_tasks); diff --git a/src/core/cgroup.c b/src/core/cgroup.c index c26807ba2b..a70b4d33ae 100644 --- a/src/core/cgroup.c +++ b/src/core/cgroup.c @@ -283,7 +283,7 @@ fail:          return -errno;  } -void cgroup_context_apply(CGroupContext *c, CGroupControllerMask mask, const char *path, ManagerState state) { +void cgroup_context_apply(CGroupContext *c, CGroupMask mask, const char *path, ManagerState state) {          bool is_root;          int r; @@ -304,7 +304,7 @@ void cgroup_context_apply(CGroupContext *c, CGroupControllerMask mask, const cha           * cgroup trees (assuming we are running in a container then),           * and missing cgroups, i.e. EROFS and ENOENT. */ -        if ((mask & CGROUP_CPU) && !is_root) { +        if ((mask & CGROUP_MASK_CPU) && !is_root) {                  char buf[MAX(DECIMAL_STR_MAX(unsigned long), DECIMAL_STR_MAX(usec_t)) + 1];                  sprintf(buf, "%lu\n", @@ -331,7 +331,7 @@ void cgroup_context_apply(CGroupContext *c, CGroupControllerMask mask, const cha                                         "Failed to set cpu.cfs_quota_us on %s: %m", path);          } -        if (mask & CGROUP_BLKIO) { +        if (mask & CGROUP_MASK_BLKIO) {                  char buf[MAX3(DECIMAL_STR_MAX(unsigned long)+1,                                DECIMAL_STR_MAX(dev_t)*2+2+DECIMAL_STR_MAX(unsigned long)*1,                                DECIMAL_STR_MAX(dev_t)*2+2+DECIMAL_STR_MAX(uint64_t)+1)]; @@ -381,21 +381,30 @@ void cgroup_context_apply(CGroupContext *c, CGroupControllerMask mask, const cha                  }          } -        if ((mask & CGROUP_MEMORY) && !is_root) { +        if ((mask & CGROUP_MASK_MEMORY) && !is_root) {                  if (c->memory_limit != (uint64_t) -1) {                          char buf[DECIMAL_STR_MAX(uint64_t) + 1];                          sprintf(buf, "%" PRIu64 "\n", c->memory_limit); -                        r = cg_set_attribute("memory", path, "memory.limit_in_bytes", buf); -                } else -                        r = cg_set_attribute("memory", path, "memory.limit_in_bytes", "-1"); + +                        if (cg_unified() <= 0) +                                r = cg_set_attribute("memory", path, "memory.limit_in_bytes", buf); +                        else +                                r = cg_set_attribute("memory", path, "memory.max", buf); + +                } else { +                        if (cg_unified() <= 0) +                                r = cg_set_attribute("memory", path, "memory.limit_in_bytes", "-1"); +                        else +                                r = cg_set_attribute("memory", path, "memory.max", "max"); +                }                  if (r < 0)                          log_full_errno(IN_SET(r, -ENOENT, -EROFS) ? LOG_DEBUG : LOG_WARNING, r, -                                       "Failed to set memory.limit_in_bytes on %s: %m", path); +                                       "Failed to set memory.limit_in_bytes/memory.max on %s: %m", path);          } -        if ((mask & CGROUP_DEVICE) && !is_root) { +        if ((mask & CGROUP_MASK_DEVICE) && !is_root) {                  CGroupDeviceAllow *a;                  /* Changing the devices list of a populated cgroup @@ -459,8 +468,8 @@ void cgroup_context_apply(CGroupContext *c, CGroupControllerMask mask, const cha          }  } -CGroupControllerMask cgroup_context_get_mask(CGroupContext *c) { -        CGroupControllerMask mask = 0; +CGroupMask cgroup_context_get_mask(CGroupContext *c) { +        CGroupMask mask = 0;          /* Figure out which controllers we need */ @@ -468,29 +477,31 @@ CGroupControllerMask cgroup_context_get_mask(CGroupContext *c) {              c->cpu_shares != (unsigned long) -1 ||              c->startup_cpu_shares != (unsigned long) -1 ||              c->cpu_quota_per_sec_usec != USEC_INFINITY) -                mask |= CGROUP_CPUACCT | CGROUP_CPU; +                mask |= CGROUP_MASK_CPUACCT | CGROUP_MASK_CPU;          if (c->blockio_accounting ||              c->blockio_weight != (unsigned long) -1 ||              c->startup_blockio_weight != (unsigned long) -1 ||              c->blockio_device_weights ||              c->blockio_device_bandwidths) -                mask |= CGROUP_BLKIO; +                mask |= CGROUP_MASK_BLKIO;          if (c->memory_accounting ||              c->memory_limit != (uint64_t) -1) -                mask |= CGROUP_MEMORY; +                mask |= CGROUP_MASK_MEMORY;          if (c->device_allow ||              c->device_policy != CGROUP_AUTO) -                mask |= CGROUP_DEVICE; +                mask |= CGROUP_MASK_DEVICE;          return mask;  } -CGroupControllerMask unit_get_cgroup_mask(Unit *u) { +CGroupMask unit_get_own_mask(Unit *u) {          CGroupContext *c; +        /* Returns the mask of controllers the unit needs for itself */ +          c = unit_get_cgroup_context(u);          if (!c)                  return 0; @@ -505,15 +516,18 @@ CGroupControllerMask unit_get_cgroup_mask(Unit *u) {                  e = unit_get_exec_context(u);                  if (!e || exec_context_maintains_privileges(e)) -                        return _CGROUP_CONTROLLER_MASK_ALL; +                        return _CGROUP_MASK_ALL;          }          return cgroup_context_get_mask(c);  } -CGroupControllerMask unit_get_members_mask(Unit *u) { +CGroupMask unit_get_members_mask(Unit *u) {          assert(u); +        /* Returns the mask of controllers all of the unit's children +         * require, merged */ +          if (u->cgroup_members_mask_valid)                  return u->cgroup_members_mask; @@ -532,7 +546,7 @@ CGroupControllerMask unit_get_members_mask(Unit *u) {                                  continue;                          u->cgroup_members_mask |= -                                unit_get_cgroup_mask(member) | +                                unit_get_own_mask(member) |                                  unit_get_members_mask(member);                  }          } @@ -541,19 +555,52 @@ CGroupControllerMask unit_get_members_mask(Unit *u) {          return u->cgroup_members_mask;  } -CGroupControllerMask unit_get_siblings_mask(Unit *u) { +CGroupMask unit_get_siblings_mask(Unit *u) {          assert(u); +        /* Returns the mask of controllers all of the unit's siblings +         * require, i.e. the members mask of the unit's parent slice +         * if there is one. */ +          if (UNIT_ISSET(u->slice))                  return unit_get_members_mask(UNIT_DEREF(u->slice)); -        return unit_get_cgroup_mask(u) | unit_get_members_mask(u); +        return unit_get_own_mask(u) | unit_get_members_mask(u);  } -CGroupControllerMask unit_get_target_mask(Unit *u) { -        CGroupControllerMask mask; +CGroupMask unit_get_subtree_mask(Unit *u) { + +        /* Returns the mask of this subtree, meaning of the group +         * itself and its children. */ + +        return unit_get_own_mask(u) | unit_get_members_mask(u); +} -        mask = unit_get_cgroup_mask(u) | unit_get_members_mask(u) | unit_get_siblings_mask(u); +CGroupMask unit_get_target_mask(Unit *u) { +        CGroupMask mask; + +        /* This returns the cgroup mask of all controllers to enable +         * for a specific cgroup, i.e. everything it needs itself, +         * plus all that its children need, plus all that its siblings +         * need. This is primarily useful on the legacy cgroup +         * hierarchy, where we need to duplicate each cgroup in each +         * hierarchy that shall be enabled for it. */ + +        mask = unit_get_own_mask(u) | unit_get_members_mask(u) | unit_get_siblings_mask(u); +        mask &= u->manager->cgroup_supported; + +        return mask; +} + +CGroupMask unit_get_enable_mask(Unit *u) { +        CGroupMask mask; + +        /* This returns the cgroup mask of all controllers to enable +         * for the children of a specific cgroup. This is primarily +         * useful for the unified cgroup hierarchy, where each cgroup +         * controls which controllers are enabled for its children. */ + +        mask = unit_get_members_mask(u);          mask &= u->manager->cgroup_supported;          return mask; @@ -562,13 +609,13 @@ CGroupControllerMask unit_get_target_mask(Unit *u) {  /* Recurse from a unit up through its containing slices, propagating   * mask bits upward. A unit is also member of itself. */  void unit_update_cgroup_members_masks(Unit *u) { -        CGroupControllerMask m; +        CGroupMask m;          bool more;          assert(u);          /* Calculate subtree mask */ -        m = unit_get_cgroup_mask(u) | unit_get_members_mask(u); +        m = unit_get_subtree_mask(u);          /* See if anything changed from the previous invocation. If           * not, we're done. */ @@ -608,7 +655,7 @@ void unit_update_cgroup_members_masks(Unit *u) {          }  } -static const char *migrate_callback(CGroupControllerMask mask, void *userdata) { +static const char *migrate_callback(CGroupMask mask, void *userdata) {          Unit *u = userdata;          assert(mask != 0); @@ -626,7 +673,115 @@ static const char *migrate_callback(CGroupControllerMask mask, void *userdata) {          return NULL;  } -static int unit_create_cgroups(Unit *u, CGroupControllerMask mask) { +char *unit_default_cgroup_path(Unit *u) { +        _cleanup_free_ char *escaped = NULL, *slice = NULL; +        int r; + +        assert(u); + +        if (unit_has_name(u, SPECIAL_ROOT_SLICE)) +                return strdup(u->manager->cgroup_root); + +        if (UNIT_ISSET(u->slice) && !unit_has_name(UNIT_DEREF(u->slice), SPECIAL_ROOT_SLICE)) { +                r = cg_slice_to_path(UNIT_DEREF(u->slice)->id, &slice); +                if (r < 0) +                        return NULL; +        } + +        escaped = cg_escape(u->id); +        if (!escaped) +                return NULL; + +        if (slice) +                return strjoin(u->manager->cgroup_root, "/", slice, "/", escaped, NULL); +        else +                return strjoin(u->manager->cgroup_root, "/", escaped, NULL); +} + +int unit_set_cgroup_path(Unit *u, const char *path) { +        _cleanup_free_ char *p = NULL; +        int r; + +        assert(u); + +        if (path) { +                p = strdup(path); +                if (!p) +                        return -ENOMEM; +        } else +                p = NULL; + +        if (streq_ptr(u->cgroup_path, p)) +                return 0; + +        if (p) { +                r = hashmap_put(u->manager->cgroup_unit, p, u); +                if (r < 0) +                        return r; +        } + +        unit_release_cgroup(u); + +        u->cgroup_path = p; +        p = NULL; + +        return 1; +} + +int unit_watch_cgroup(Unit *u) { +        _cleanup_free_ char *populated = NULL; +        int r; + +        assert(u); + +        if (!u->cgroup_path) +                return 0; + +        if (u->cgroup_inotify_wd >= 0) +                return 0; + +        /* Only applies to the unified hierarchy */ +        r = cg_unified(); +        if (r < 0) +                return log_unit_error_errno(u, r, "Failed detect wether the unified hierarchy is used: %m"); +        if (r == 0) +                return 0; + +        /* Don't watch the root slice, it's pointless. */ +        if (unit_has_name(u, SPECIAL_ROOT_SLICE)) +                return 0; + +        r = hashmap_ensure_allocated(&u->manager->cgroup_inotify_wd_unit, &trivial_hash_ops); +        if (r < 0) +                return log_oom(); + +        r = cg_get_path(SYSTEMD_CGROUP_CONTROLLER, u->cgroup_path, "cgroup.populated", &populated); +        if (r < 0) +                return log_oom(); + +        u->cgroup_inotify_wd = inotify_add_watch(u->manager->cgroup_inotify_fd, populated, IN_MODIFY); +        if (u->cgroup_inotify_wd < 0) { + +                if (errno == ENOENT) /* If the directory is already +                                      * gone we don't need to track +                                      * it, so this is not an error */ +                        return 0; + +                return log_unit_error_errno(u, errno, "Failed to add inotify watch descriptor for control group %s: %m", u->cgroup_path); +        } + +        r = hashmap_put(u->manager->cgroup_inotify_wd_unit, INT_TO_PTR(u->cgroup_inotify_wd), u); +        if (r < 0) +                return log_unit_error_errno(u, r, "Failed to add inotify watch descriptor to hash map: %m"); + +        return 0; +} + +static int unit_create_cgroup( +                Unit *u, +                CGroupMask target_mask, +                CGroupMask enable_mask) { +          CGroupContext *c;          int r; @@ -643,25 +798,29 @@ static int unit_create_cgroups(Unit *u, CGroupControllerMask mask) {                  if (!path)                          return log_oom(); -                r = hashmap_put(u->manager->cgroup_unit, path, u); -                if (r < 0) { -                        log_error(r == -EEXIST ? "cgroup %s exists already: %s" : "hashmap_put failed for %s: %s", path, strerror(-r)); -                        return r; -                } -                if (r > 0) { -                        u->cgroup_path = path; -                        path = NULL; -                } +                r = unit_set_cgroup_path(u, path); +                if (r == -EEXIST) +                        return log_unit_error_errno(u, r, "Control group %s exists already.", path); +                if (r < 0) +                        return log_unit_error_errno(u, r, "Failed to set unit's control group path to %s: %m", path);          }          /* First, create our own group */ -        r = cg_create_everywhere(u->manager->cgroup_supported, mask, u->cgroup_path); +        r = cg_create_everywhere(u->manager->cgroup_supported, target_mask, u->cgroup_path);          if (r < 0) -                return log_error_errno(r, "Failed to create cgroup %s: %m", u->cgroup_path); +                return log_unit_error_errno(u, r, "Failed to create cgroup %s: %m", u->cgroup_path); + +        /* Start watching it */ +        (void) unit_watch_cgroup(u); + +        /* Enable all controllers we need */ +        r = cg_enable_everywhere(u->manager->cgroup_supported, enable_mask, u->cgroup_path); +        if (r < 0) +                log_unit_warning_errno(u, r, "Failed to enable controllers on cgroup %s, ignoring: %m", u->cgroup_path);          /* Keep track that this is now realized */          u->cgroup_realized = true; -        u->cgroup_realized_mask = mask; +        u->cgroup_realized_mask = target_mask;          if (u->type != UNIT_SLICE && !c->delegate) { @@ -670,7 +829,7 @@ static int unit_create_cgroups(Unit *u, CGroupControllerMask mask) {                   * for slice and delegation units. */                  r = cg_migrate_everywhere(u->manager->cgroup_supported, u->cgroup_path, u->cgroup_path, migrate_callback, u);                  if (r < 0) -                        log_warning_errno(r, "Failed to migrate cgroup from to %s: %m", u->cgroup_path); +                        log_unit_warning_errno(u, r, "Failed to migrate cgroup from to %s, ignoring: %m", u->cgroup_path);          }          return 0; @@ -691,10 +850,10 @@ int unit_attach_pids_to_cgroup(Unit *u) {          return 0;  } -static bool unit_has_mask_realized(Unit *u, CGroupControllerMask mask) { +static bool unit_has_mask_realized(Unit *u, CGroupMask target_mask) {          assert(u); -        return u->cgroup_realized && u->cgroup_realized_mask == mask; +        return u->cgroup_realized && u->cgroup_realized_mask == target_mask;  }  /* Check if necessary controllers and attributes for a unit are in place. @@ -704,7 +863,7 @@ static bool unit_has_mask_realized(Unit *u, CGroupControllerMask mask) {   *   * Returns 0 on success and < 0 on failure. */  static int unit_realize_cgroup_now(Unit *u, ManagerState state) { -        CGroupControllerMask mask; +        CGroupMask target_mask, enable_mask;          int r;          assert(u); @@ -714,9 +873,8 @@ static int unit_realize_cgroup_now(Unit *u, ManagerState state) {                  u->in_cgroup_queue = false;          } -        mask = unit_get_target_mask(u); - -        if (unit_has_mask_realized(u, mask)) +        target_mask = unit_get_target_mask(u); +        if (unit_has_mask_realized(u, target_mask))                  return 0;          /* First, realize parents */ @@ -727,12 +885,13 @@ static int unit_realize_cgroup_now(Unit *u, ManagerState state) {          }          /* And then do the real work */ -        r = unit_create_cgroups(u, mask); +        enable_mask = unit_get_enable_mask(u); +        r = unit_create_cgroup(u, target_mask, enable_mask);          if (r < 0)                  return r;          /* Finally, apply the necessary attributes. */ -        cgroup_context_apply(unit_get_cgroup_context(u), mask, u->cgroup_path, state); +        cgroup_context_apply(unit_get_cgroup_context(u), target_mask, u->cgroup_path, state);          return 0;  } @@ -759,7 +918,7 @@ unsigned manager_dispatch_cgroup_queue(Manager *m) {                  r = unit_realize_cgroup_now(i, state);                  if (r < 0) -                        log_warning_errno(r, "Failed to realize cgroups for queued unit %s: %m", i->id); +                        log_warning_errno(r, "Failed to realize cgroups for queued unit %s, ignoring: %m", i->id);                  n++;          } @@ -829,39 +988,67 @@ int unit_realize_cgroup(Unit *u) {          return unit_realize_cgroup_now(u, manager_state(u->manager));  } -void unit_destroy_cgroup_if_empty(Unit *u) { +void unit_release_cgroup(Unit *u) { +        assert(u); + +        /* Forgets all cgroup details for this cgroup */ + +        if (u->cgroup_path) { +                (void) hashmap_remove(u->manager->cgroup_unit, u->cgroup_path); +                u->cgroup_path = mfree(u->cgroup_path); +        } + +        if (u->cgroup_inotify_wd >= 0) { +                if (inotify_rm_watch(u->manager->cgroup_inotify_fd, u->cgroup_inotify_wd) < 0) +                        log_unit_debug_errno(u, errno, "Failed to remove cgroup inotify watch %i for %s, ignoring", u->cgroup_inotify_wd, u->id); + +                (void) hashmap_remove(u->manager->cgroup_inotify_wd_unit, INT_TO_PTR(u->cgroup_inotify_wd)); +                u->cgroup_inotify_wd = -1; +        } +} + +void unit_prune_cgroup(Unit *u) {          int r; +        bool is_root_slice;          assert(u); +        /* Removes the cgroup, if empty and possible, and stops watching it. */ +          if (!u->cgroup_path)                  return; -        r = cg_trim_everywhere(u->manager->cgroup_supported, u->cgroup_path, !unit_has_name(u, SPECIAL_ROOT_SLICE)); +        is_root_slice = unit_has_name(u, SPECIAL_ROOT_SLICE); + +        r = cg_trim_everywhere(u->manager->cgroup_supported, u->cgroup_path, !is_root_slice);          if (r < 0) { -                log_debug_errno(r, "Failed to destroy cgroup %s: %m", u->cgroup_path); +                log_debug_errno(r, "Failed to destroy cgroup %s, ignoring: %m", u->cgroup_path);                  return;          } -        hashmap_remove(u->manager->cgroup_unit, u->cgroup_path); +        if (is_root_slice) +                return; + +        unit_release_cgroup(u); -        free(u->cgroup_path); -        u->cgroup_path = NULL;          u->cgroup_realized = false;          u->cgroup_realized_mask = 0;  } -pid_t unit_search_main_pid(Unit *u) { +int unit_search_main_pid(Unit *u, pid_t *ret) {          _cleanup_fclose_ FILE *f = NULL;          pid_t pid = 0, npid, mypid; +        int r;          assert(u); +        assert(ret);          if (!u->cgroup_path) -                return 0; +                return -ENXIO; -        if (cg_enumerate_processes(SYSTEMD_CGROUP_CONTROLLER, u->cgroup_path, &f) < 0) -                return 0; +        r = cg_enumerate_processes(SYSTEMD_CGROUP_CONTROLLER, u->cgroup_path, &f); +        if (r < 0) +                return r;          mypid = getpid();          while (cg_read_pid(f, &npid) > 0)  { @@ -874,90 +1061,274 @@ pid_t unit_search_main_pid(Unit *u) {                  if (get_parent_of_pid(npid, &ppid) >= 0 && ppid != mypid)                          continue; -                if (pid != 0) { +                if (pid != 0)                          /* Dang, there's more than one daemonized PID                          in this group, so we don't know what process                          is the main process. */ -                        pid = 0; -                        break; -                } + +                        return -ENODATA;                  pid = npid;          } -        return pid; +        *ret = pid; +        return 0; +} + +static int unit_watch_pids_in_path(Unit *u, const char *path) { +       _cleanup_closedir_ DIR *d = NULL; +        _cleanup_fclose_ FILE *f = NULL; +        int ret = 0, r; + +        assert(u); +        assert(path); + +        r = cg_enumerate_processes(SYSTEMD_CGROUP_CONTROLLER, path, &f); +        if (r < 0) +                ret = r; +        else { +                pid_t pid; + +                while ((r = cg_read_pid(f, &pid)) > 0) { +                        r = unit_watch_pid(u, pid); +                        if (r < 0 && ret >= 0) +                                ret = r; +                } + +                if (r < 0 && ret >= 0) +                        ret = r; +        } + +        r = cg_enumerate_subgroups(SYSTEMD_CGROUP_CONTROLLER, path, &d); +        if (r < 0) { +                if (ret >= 0) +                        ret = r; +        } else { +                char *fn; + +                while ((r = cg_read_subgroup(d, &fn)) > 0) { +                        _cleanup_free_ char *p = NULL; + +                        p = strjoin(path, "/", fn, NULL); +                        free(fn); + +                        if (!p) +                                return -ENOMEM; + +                        r = unit_watch_pids_in_path(u, p); +                        if (r < 0 && ret >= 0) +                                ret = r; +                } + +                if (r < 0 && ret >= 0) +                        ret = r; +        } + +        return ret; +} + +int unit_watch_all_pids(Unit *u) { +        assert(u); + +        /* Adds all PIDs from our cgroup to the set of PIDs we +         * watch. This is a fallback logic for cases where we do not +         * get reliable cgroup empty notifications: we try to use +         * SIGCHLD as replacement. */ + +        if (!u->cgroup_path) +                return -ENOENT; + +        if (cg_unified() > 0) /* On unified we can use proper notifications */ +                return 0; + +        return unit_watch_pids_in_path(u, u->cgroup_path); +} + +int unit_notify_cgroup_empty(Unit *u) { +        int r; + +        assert(u); + +        if (!u->cgroup_path) +                return 0; + +        r = cg_is_empty_recursive(SYSTEMD_CGROUP_CONTROLLER, u->cgroup_path); +        if (r <= 0) +                return r; + +        unit_add_to_gc_queue(u); + +        if (UNIT_VTABLE(u)->notify_cgroup_empty) +                UNIT_VTABLE(u)->notify_cgroup_empty(u); + +        return 0; +} + +static int on_cgroup_inotify_event(sd_event_source *s, int fd, uint32_t revents, void *userdata) { +        Manager *m = userdata; + +        assert(s); +        assert(fd >= 0); +        assert(m); + +        for (;;) { +                union inotify_event_buffer buffer; +                struct inotify_event *e; +                ssize_t l; + +                l = read(fd, &buffer, sizeof(buffer)); +                if (l < 0) { +                        if (errno == EINTR || errno == EAGAIN) +                                return 0; + +                        return log_error_errno(errno, "Failed to read control group inotify events: %m"); +                } + +                FOREACH_INOTIFY_EVENT(e, buffer, l) { +                        Unit *u; + +                        if (e->wd < 0) +                                /* Queue overflow has no watch descriptor */ +                                continue; + +                        if (e->mask & IN_IGNORED) +                                /* The watch was just removed */ +                                continue; + +                        u = hashmap_get(m->cgroup_inotify_wd_unit, INT_TO_PTR(e->wd)); +                        if (!u) /* Not that inotify might deliver +                                 * events for a watch even after it +                                 * was removed, because it was queued +                                 * before the removal. Let's ignore +                                 * this here safely. */ +                                continue; + +                        (void) unit_notify_cgroup_empty(u); +                } +        }  }  int manager_setup_cgroup(Manager *m) {          _cleanup_free_ char *path = NULL; -        int r; +        CGroupController c; +        int r, unified; +        char *e;          assert(m);          /* 1. Determine hierarchy */ -        free(m->cgroup_root); -        m->cgroup_root = NULL; - +        m->cgroup_root = mfree(m->cgroup_root);          r = cg_pid_get_path(SYSTEMD_CGROUP_CONTROLLER, 0, &m->cgroup_root);          if (r < 0)                  return log_error_errno(r, "Cannot determine cgroup we are running in: %m"); -        /* LEGACY: Already in /system.slice? If so, let's cut this -         * off. This is to support live upgrades from older systemd -         * versions where PID 1 was moved there. */ -        if (m->running_as == MANAGER_SYSTEM) { -                char *e; +        /* Chop off the init scope, if we are already located in it */ +        e = endswith(m->cgroup_root, "/" SPECIAL_INIT_SCOPE); +        /* LEGACY: Also chop off the system slice if we are in +         * it. This is to support live upgrades from older systemd +         * versions where PID 1 was moved there. Also see +         * cg_get_root_path(). */ +        if (!e && m->running_as == MANAGER_SYSTEM) {                  e = endswith(m->cgroup_root, "/" SPECIAL_SYSTEM_SLICE);                  if (!e) -                        e = endswith(m->cgroup_root, "/system"); -                if (e) -                        *e = 0; +                        e = endswith(m->cgroup_root, "/system"); /* even more legacy */          } +        if (e) +                *e = 0;          /* And make sure to store away the root value without trailing           * slash, even for the root dir, so that we can easily prepend           * it everywhere. */ -        if (streq(m->cgroup_root, "/")) -                m->cgroup_root[0] = 0; +        while ((e = endswith(m->cgroup_root, "/"))) +                *e = 0;          /* 2. Show data */          r = cg_get_path(SYSTEMD_CGROUP_CONTROLLER, m->cgroup_root, NULL, &path);          if (r < 0)                  return log_error_errno(r, "Cannot find cgroup mount point: %m"); -        log_debug("Using cgroup controller " SYSTEMD_CGROUP_CONTROLLER ". File system hierarchy is at %s.", path); +        unified = cg_unified(); +        if (unified < 0) +                return log_error_errno(r, "Couldn't determine if we are running in the unified hierarchy: %m"); +        if (unified > 0) +                log_debug("Unified cgroup hierarchy is located at %s.", path); +        else +                log_debug("Using cgroup controller " SYSTEMD_CGROUP_CONTROLLER ". File system hierarchy is at %s.", path); +          if (!m->test_run) { +                const char *scope_path;                  /* 3. Install agent */ -                if (m->running_as == MANAGER_SYSTEM) { +                if (unified) { + +                        /* In the unified hierarchy we can can get +                         * cgroup empty notifications via inotify. */ + +                        m->cgroup_inotify_event_source = sd_event_source_unref(m->cgroup_inotify_event_source); +                        safe_close(m->cgroup_inotify_fd); + +                        m->cgroup_inotify_fd = inotify_init1(IN_NONBLOCK|IN_CLOEXEC); +                        if (m->cgroup_inotify_fd < 0) +                                return log_error_errno(errno, "Failed to create control group inotify object: %m"); + +                        r = sd_event_add_io(m->event, &m->cgroup_inotify_event_source, m->cgroup_inotify_fd, EPOLLIN, on_cgroup_inotify_event, m); +                        if (r < 0) +                                return log_error_errno(r, "Failed to watch control group inotify object: %m"); + +                        r = sd_event_source_set_priority(m->cgroup_inotify_event_source, SD_EVENT_PRIORITY_IDLE - 5); +                        if (r < 0) +                                return log_error_errno(r, "Failed to set priority of inotify event source: %m"); + +                        (void) sd_event_source_set_description(m->cgroup_inotify_event_source, "cgroup-inotify"); + +                } else if (m->running_as == MANAGER_SYSTEM) { + +                        /* On the legacy hierarchy we only get +                         * notifications via cgroup agents. (Which +                         * isn't really reliable, since it does not +                         * generate events when control groups with +                         * children run empty. */ +                          r = cg_install_release_agent(SYSTEMD_CGROUP_CONTROLLER, SYSTEMD_CGROUP_AGENT_PATH);                          if (r < 0)                                  log_warning_errno(r, "Failed to install release agent, ignoring: %m");                          else if (r > 0)                                  log_debug("Installed release agent."); -                        else +                        else if (r == 0)                                  log_debug("Release agent already installed.");                  } -                /* 4. Make sure we are in the root cgroup */ -                r = cg_create_and_attach(SYSTEMD_CGROUP_CONTROLLER, m->cgroup_root, 0); +                /* 4. Make sure we are in the special "init.scope" unit in the root slice. */ +                scope_path = strjoina(m->cgroup_root, "/" SPECIAL_INIT_SCOPE); +                r = cg_create_and_attach(SYSTEMD_CGROUP_CONTROLLER, scope_path, 0);                  if (r < 0) -                        return log_error_errno(r, "Failed to create root cgroup hierarchy: %m"); +                        return log_error_errno(r, "Failed to create %s control group: %m", scope_path); + +                /* also, move all other userspace processes remaining +                 * in the root cgroup into that scope. */ +                r = cg_migrate(SYSTEMD_CGROUP_CONTROLLER, m->cgroup_root, SYSTEMD_CGROUP_CONTROLLER, scope_path, false); +                if (r < 0) +                        log_warning_errno(r, "Couldn't move remaining userspace processes, ignoring: %m");                  /* 5. And pin it, so that it cannot be unmounted */                  safe_close(m->pin_cgroupfs_fd); -                  m->pin_cgroupfs_fd = open(path, O_RDONLY|O_CLOEXEC|O_DIRECTORY|O_NOCTTY|O_NONBLOCK);                  if (m->pin_cgroupfs_fd < 0)                          return log_error_errno(errno, "Failed to open pin file: %m");                  /* 6.  Always enable hierarchical support if it exists... */ -                cg_set_attribute("memory", "/", "memory.use_hierarchy", "1"); +                if (!unified) +                        (void) cg_set_attribute("memory", "/", "memory.use_hierarchy", "1");          }          /* 7. Figure out which controllers are supported */ -        m->cgroup_supported = cg_mask_supported(); +        r = cg_mask_supported(&m->cgroup_supported); +        if (r < 0) +                return log_error_errno(r, "Failed to determine supported controllers: %m"); + +        for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) +                log_debug("Controller '%s' supported: %s", cgroup_controller_to_string(c), yes_no(m->cgroup_supported & c));          return 0;  } @@ -968,12 +1339,16 @@ void manager_shutdown_cgroup(Manager *m, bool delete) {          /* We can't really delete the group, since we are in it. But           * let's trim it. */          if (delete && m->cgroup_root) -                cg_trim(SYSTEMD_CGROUP_CONTROLLER, m->cgroup_root, false); +                (void) cg_trim(SYSTEMD_CGROUP_CONTROLLER, m->cgroup_root, false); + +        m->cgroup_inotify_wd_unit = hashmap_free(m->cgroup_inotify_wd_unit); + +        m->cgroup_inotify_event_source = sd_event_source_unref(m->cgroup_inotify_event_source); +        m->cgroup_inotify_fd = safe_close(m->cgroup_inotify_fd);          m->pin_cgroupfs_fd = safe_close(m->pin_cgroupfs_fd); -        free(m->cgroup_root); -        m->cgroup_root = NULL; +        m->cgroup_root = mfree(m->cgroup_root);  }  Unit* manager_get_unit_by_cgroup(Manager *m, const char *cgroup) { @@ -992,8 +1367,8 @@ Unit* manager_get_unit_by_cgroup(Manager *m, const char *cgroup) {                  char *e;                  e = strrchr(p, '/'); -                if (e == p || !e) -                        return NULL; +                if (!e || e == p) +                        return hashmap_get(m->cgroup_unit, SPECIAL_ROOT_SLICE);                  *e = 0; @@ -1005,13 +1380,25 @@ Unit* manager_get_unit_by_cgroup(Manager *m, const char *cgroup) {  Unit *manager_get_unit_by_pid(Manager *m, pid_t pid) {          _cleanup_free_ char *cgroup = NULL; +        Unit *u;          int r;          assert(m); -        if (pid <= 1) +        if (pid <= 0)                  return NULL; +        if (pid == 1) +                return hashmap_get(m->units, SPECIAL_INIT_SCOPE); + +        u = hashmap_get(m->watch_pids1, LONG_TO_PTR(pid)); +        if (u) +                return u; + +        u = hashmap_get(m->watch_pids2, LONG_TO_PTR(pid)); +        if (u) +                return u; +          r = cg_pid_get_path(SYSTEMD_CGROUP_CONTROLLER, pid, &cgroup);          if (r < 0)                  return NULL; @@ -1021,7 +1408,6 @@ Unit *manager_get_unit_by_pid(Manager *m, pid_t pid) {  int manager_notify_cgroup_empty(Manager *m, const char *cgroup) {          Unit *u; -        int r;          assert(m);          assert(cgroup); @@ -1030,15 +1416,7 @@ int manager_notify_cgroup_empty(Manager *m, const char *cgroup) {          if (!u)                  return 0; -        r = cg_is_empty_recursive(SYSTEMD_CGROUP_CONTROLLER, u->cgroup_path, true); -        if (r <= 0) -                return r; - -        if (UNIT_VTABLE(u)->notify_cgroup_empty) -                UNIT_VTABLE(u)->notify_cgroup_empty(u); - -        unit_add_to_gc_queue(u); -        return 0; +        return unit_notify_cgroup_empty(u);  }  int unit_get_memory_current(Unit *u, uint64_t *ret) { @@ -1051,10 +1429,13 @@ int unit_get_memory_current(Unit *u, uint64_t *ret) {          if (!u->cgroup_path)                  return -ENODATA; -        if ((u->cgroup_realized_mask & CGROUP_MEMORY) == 0) +        if ((u->cgroup_realized_mask & CGROUP_MASK_MEMORY) == 0)                  return -ENODATA; -        r = cg_get_attribute("memory", u->cgroup_path, "memory.usage_in_bytes", &v); +        if (cg_unified() <= 0) +                r = cg_get_attribute("memory", u->cgroup_path, "memory.usage_in_bytes", &v); +        else +                r = cg_get_attribute("memory", u->cgroup_path, "memory.current", &v);          if (r == -ENOENT)                  return -ENODATA;          if (r < 0) @@ -1074,7 +1455,7 @@ static int unit_get_cpu_usage_raw(Unit *u, nsec_t *ret) {          if (!u->cgroup_path)                  return -ENODATA; -        if ((u->cgroup_realized_mask & CGROUP_CPUACCT) == 0) +        if ((u->cgroup_realized_mask & CGROUP_MASK_CPUACCT) == 0)                  return -ENODATA;          r = cg_get_attribute("cpuacct", u->cgroup_path, "cpuacct.usage", &v); @@ -1124,6 +1505,18 @@ int unit_reset_cpu_usage(Unit *u) {          return 0;  } +bool unit_cgroup_delegate(Unit *u) { +        CGroupContext *c; + +        assert(u); + +        c = unit_get_cgroup_context(u); +        if (!c) +                return false; + +        return c->delegate; +} +  static const char* const cgroup_device_policy_table[_CGROUP_DEVICE_POLICY_MAX] = {          [CGROUP_AUTO] = "auto",          [CGROUP_CLOSED] = "closed", diff --git a/src/core/cgroup.h b/src/core/cgroup.h index 869ddae8c4..1ce21f43f2 100644 --- a/src/core/cgroup.h +++ b/src/core/cgroup.h @@ -96,22 +96,32 @@ struct CGroupContext {  void cgroup_context_init(CGroupContext *c);  void cgroup_context_done(CGroupContext *c);  void cgroup_context_dump(CGroupContext *c, FILE* f, const char *prefix); -void cgroup_context_apply(CGroupContext *c, CGroupControllerMask mask, const char *path, ManagerState state); +void cgroup_context_apply(CGroupContext *c, CGroupMask mask, const char *path, ManagerState state); -CGroupControllerMask cgroup_context_get_mask(CGroupContext *c); +CGroupMask cgroup_context_get_mask(CGroupContext *c);  void cgroup_context_free_device_allow(CGroupContext *c, CGroupDeviceAllow *a);  void cgroup_context_free_blockio_device_weight(CGroupContext *c, CGroupBlockIODeviceWeight *w);  void cgroup_context_free_blockio_device_bandwidth(CGroupContext *c, CGroupBlockIODeviceBandwidth *b); -CGroupControllerMask unit_get_cgroup_mask(Unit *u); -CGroupControllerMask unit_get_siblings_mask(Unit *u); -CGroupControllerMask unit_get_members_mask(Unit *u); -CGroupControllerMask unit_get_target_mask(Unit *u); +CGroupMask unit_get_own_mask(Unit *u); +CGroupMask unit_get_siblings_mask(Unit *u); +CGroupMask unit_get_members_mask(Unit *u); +CGroupMask unit_get_subtree_mask(Unit *u); + +CGroupMask unit_get_target_mask(Unit *u); +CGroupMask unit_get_enable_mask(Unit *u);  void unit_update_cgroup_members_masks(Unit *u); + +char *unit_default_cgroup_path(Unit *u); +int unit_set_cgroup_path(Unit *u, const char *path); +  int unit_realize_cgroup(Unit *u); -void unit_destroy_cgroup_if_empty(Unit *u); +void unit_release_cgroup(Unit *u); +void unit_prune_cgroup(Unit *u); +int unit_watch_cgroup(Unit *u); +  int unit_attach_pids_to_cgroup(Unit *u);  int manager_setup_cgroup(Manager *m); @@ -122,13 +132,17 @@ unsigned manager_dispatch_cgroup_queue(Manager *m);  Unit *manager_get_unit_by_cgroup(Manager *m, const char *cgroup);  Unit* manager_get_unit_by_pid(Manager *m, pid_t pid); -pid_t unit_search_main_pid(Unit *u); - -int manager_notify_cgroup_empty(Manager *m, const char *group); +int unit_search_main_pid(Unit *u, pid_t *ret); +int unit_watch_all_pids(Unit *u);  int unit_get_memory_current(Unit *u, uint64_t *ret);  int unit_get_cpu_usage(Unit *u, nsec_t *ret);  int unit_reset_cpu_usage(Unit *u); +bool unit_cgroup_delegate(Unit *u); + +int unit_notify_cgroup_empty(Unit *u); +int manager_notify_cgroup_empty(Manager *m, const char *group); +  const char* cgroup_device_policy_to_string(CGroupDevicePolicy i) _const_;  CGroupDevicePolicy cgroup_device_policy_from_string(const char *s) _pure_; diff --git a/src/core/dbus-cgroup.c b/src/core/dbus-cgroup.c index 9dcc51f240..e8fd44e294 100644 --- a/src/core/dbus-cgroup.c +++ b/src/core/dbus-cgroup.c @@ -228,7 +228,7 @@ int bus_cgroup_set_property(                  if (mode != UNIT_CHECK) {                          c->cpu_accounting = b; -                        u->cgroup_realized_mask &= ~CGROUP_CPUACCT; +                        u->cgroup_realized_mask &= ~CGROUP_MASK_CPUACCT;                          unit_write_drop_in_private(u, mode, name, b ? "CPUAccounting=yes" : "CPUAccounting=no");                  } @@ -252,7 +252,7 @@ int bus_cgroup_set_property(                  if (mode != UNIT_CHECK) {                          c->cpu_shares = ul; -                        u->cgroup_realized_mask &= ~CGROUP_CPU; +                        u->cgroup_realized_mask &= ~CGROUP_MASK_CPU;                          unit_write_drop_in_private_format(u, mode, name, "CPUShares=%lu", ul);                  } @@ -276,7 +276,7 @@ int bus_cgroup_set_property(                  if (mode != UNIT_CHECK) {                          c->startup_cpu_shares = ul; -                        u->cgroup_realized_mask &= ~CGROUP_CPU; +                        u->cgroup_realized_mask &= ~CGROUP_MASK_CPU;                          unit_write_drop_in_private_format(u, mode, name, "StartupCPUShares=%lu", ul);                  } @@ -294,7 +294,7 @@ int bus_cgroup_set_property(                  if (mode != UNIT_CHECK) {                          c->cpu_quota_per_sec_usec = u64; -                        u->cgroup_realized_mask &= ~CGROUP_CPU; +                        u->cgroup_realized_mask &= ~CGROUP_MASK_CPU;                          unit_write_drop_in_private_format(u, mode, "CPUQuota", "CPUQuota=%0.f%%", (double) (c->cpu_quota_per_sec_usec / 10000));                  } @@ -309,7 +309,7 @@ int bus_cgroup_set_property(                  if (mode != UNIT_CHECK) {                          c->blockio_accounting = b; -                        u->cgroup_realized_mask &= ~CGROUP_BLKIO; +                        u->cgroup_realized_mask &= ~CGROUP_MASK_BLKIO;                          unit_write_drop_in_private(u, mode, name, b ? "BlockIOAccounting=yes" : "BlockIOAccounting=no");                  } @@ -333,7 +333,7 @@ int bus_cgroup_set_property(                  if (mode != UNIT_CHECK) {                          c->blockio_weight = ul; -                        u->cgroup_realized_mask &= ~CGROUP_BLKIO; +                        u->cgroup_realized_mask &= ~CGROUP_MASK_BLKIO;                          unit_write_drop_in_private_format(u, mode, name, "BlockIOWeight=%lu", ul);                  } @@ -357,7 +357,7 @@ int bus_cgroup_set_property(                  if (mode != UNIT_CHECK) {                          c->startup_blockio_weight = ul; -                        u->cgroup_realized_mask &= ~CGROUP_BLKIO; +                        u->cgroup_realized_mask &= ~CGROUP_MASK_BLKIO;                          unit_write_drop_in_private_format(u, mode, name, "StartupBlockIOWeight=%lu", ul);                  } @@ -427,7 +427,7 @@ int bus_cgroup_set_property(                                                  cgroup_context_free_blockio_device_bandwidth(c, a);                          } -                        u->cgroup_realized_mask &= ~CGROUP_BLKIO; +                        u->cgroup_realized_mask &= ~CGROUP_MASK_BLKIO;                          f = open_memstream(&buf, &size);                          if (!f) @@ -510,7 +510,7 @@ int bus_cgroup_set_property(                                          cgroup_context_free_blockio_device_weight(c, c->blockio_device_weights);                          } -                        u->cgroup_realized_mask &= ~CGROUP_BLKIO; +                        u->cgroup_realized_mask &= ~CGROUP_MASK_BLKIO;                          f = open_memstream(&buf, &size);                          if (!f) @@ -535,7 +535,7 @@ int bus_cgroup_set_property(                  if (mode != UNIT_CHECK) {                          c->memory_accounting = b; -                        u->cgroup_realized_mask &= ~CGROUP_MEMORY; +                        u->cgroup_realized_mask &= ~CGROUP_MASK_MEMORY;                          unit_write_drop_in_private(u, mode, name, b ? "MemoryAccounting=yes" : "MemoryAccounting=no");                  } @@ -550,7 +550,7 @@ int bus_cgroup_set_property(                  if (mode != UNIT_CHECK) {                          c->memory_limit = limit; -                        u->cgroup_realized_mask &= ~CGROUP_MEMORY; +                        u->cgroup_realized_mask &= ~CGROUP_MASK_MEMORY;                          unit_write_drop_in_private_format(u, mode, name, "%s=%" PRIu64, name, limit);                  } @@ -572,7 +572,7 @@ int bus_cgroup_set_property(                          char *buf;                          c->device_policy = p; -                        u->cgroup_realized_mask &= ~CGROUP_DEVICE; +                        u->cgroup_realized_mask &= ~CGROUP_MASK_DEVICE;                          buf = strjoina("DevicePolicy=", policy);                          unit_write_drop_in_private(u, mode, name, buf); @@ -651,7 +651,7 @@ int bus_cgroup_set_property(                                          cgroup_context_free_device_allow(c, c->device_allow);                          } -                        u->cgroup_realized_mask &= ~CGROUP_DEVICE; +                        u->cgroup_realized_mask &= ~CGROUP_MASK_DEVICE;                          f = open_memstream(&buf, &size);                          if (!f) diff --git a/src/core/dbus-unit.c b/src/core/dbus-unit.c index 1e6291e762..31016b6c4a 100644 --- a/src/core/dbus-unit.c +++ b/src/core/dbus-unit.c @@ -25,6 +25,7 @@  #include "cgroup-util.h"  #include "strv.h"  #include "bus-common-errors.h" +#include "special.h"  #include "dbus.h"  #include "dbus-unit.h" @@ -973,6 +974,8 @@ static int bus_unit_set_transient_property(                          return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "The slice property is only available for units with control groups.");                  if (u->type == UNIT_SLICE)                          return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Slice may not be set for slice units."); +                if (unit_has_name(u, SPECIAL_INIT_SCOPE)) +                        return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Cannot set slice for init.scope");                  r = sd_bus_message_read(message, "s", &s);                  if (r < 0) diff --git a/src/core/execute.h b/src/core/execute.h index 8d14fe23d0..a750246a89 100644 --- a/src/core/execute.h +++ b/src/core/execute.h @@ -214,7 +214,7 @@ struct ExecParameters {          bool apply_tty_stdin;          bool confirm_spawn;          bool selinux_context_net; -        CGroupControllerMask cgroup_supported; +        CGroupMask cgroup_supported;          const char *cgroup_path;          bool cgroup_delegate;          const char *runtime_prefix; diff --git a/src/core/main.c b/src/core/main.c index e232be88c0..4cd2b08c38 100644 --- a/src/core/main.c +++ b/src/core/main.c @@ -433,25 +433,28 @@ static int config_parse_cpu_affinity2(                  void *data,                  void *userdata) { -        const char *word, *state; -        size_t l; -        cpu_set_t *c = NULL; +        const char *whole_rvalue = rvalue; +        _cleanup_cpu_free_ cpu_set_t *c = NULL;          unsigned ncpus = 0;          assert(filename);          assert(lvalue);          assert(rvalue); -        FOREACH_WORD_QUOTED(word, l, rvalue, state) { -                char *t; -                int r; +        for (;;) { +                _cleanup_free_ char *word = NULL;                  unsigned cpu; +                int r; -                if (!(t = strndup(word, l))) -                        return log_oom(); +                r = extract_first_word(&rvalue, &word, WHITESPACE, EXTRACT_QUOTES); +                if (r < 0) { +                        log_syntax(unit, LOG_ERR, filename, line, r, "Invalid value for %s: %s", lvalue, whole_rvalue); +                        return r; +                } +                if (r == 0) +                        break; -                r = safe_atou(t, &cpu); -                free(t); +                r = safe_atou(word, &cpu);                  if (!c)                          if (!(c = cpu_set_malloc(&ncpus))) @@ -460,23 +463,19 @@ static int config_parse_cpu_affinity2(                  if (r < 0 || cpu >= ncpus) {                          log_syntax(unit, LOG_ERR, filename, line, -r,                                     "Failed to parse CPU affinity '%s'", rvalue); -                        CPU_FREE(c);                          return -EBADMSG;                  }                  CPU_SET_S(cpu, CPU_ALLOC_SIZE(ncpus), c);          } -        if (!isempty(state)) +        if (!isempty(rvalue))                  log_syntax(unit, LOG_ERR, filename, line, EINVAL,                             "Trailing garbage, ignoring."); -        if (c) { +        if (c)                  if (sched_setaffinity(0, CPU_ALLOC_SIZE(ncpus), c) < 0)                          log_warning("Failed to set CPU affinity: %m"); -                CPU_FREE(c); -        } -          return 0;  } @@ -538,9 +537,8 @@ static int config_parse_join_controllers(const char *unit,                                           void *data,                                           void *userdata) { +        const char *whole_rvalue = rvalue;          unsigned n = 0; -        const char *word, *state; -        size_t length;          assert(filename);          assert(lvalue); @@ -548,16 +546,22 @@ static int config_parse_join_controllers(const char *unit,          free_join_controllers(); -        FOREACH_WORD_QUOTED(word, length, rvalue, state) { -                char *s, **l; - -                s = strndup(word, length); -                if (!s) -                        return log_oom(); +        for (;;) { +                _cleanup_free_ char *word = NULL; +                char **l; +                int r; -                l = strv_split(s, ","); -                free(s); +                r = extract_first_word(&rvalue, &word, WHITESPACE, EXTRACT_QUOTES); +                if (r < 0) { +                        log_syntax(unit, LOG_ERR, filename, line, r, "Invalid value for %s: %s", lvalue, whole_rvalue); +                        return r; +                } +                if (r == 0) +                        break; +                l = strv_split(word, ","); +                if (!l) +                        log_oom();                  strv_uniq(l);                  if (strv_length(l) <= 1) { @@ -617,7 +621,7 @@ static int config_parse_join_controllers(const char *unit,                          arg_join_controllers = t;                  }          } -        if (!isempty(state)) +        if (!isempty(rvalue))                  log_syntax(unit, LOG_ERR, filename, line, EINVAL,                             "Trailing garbage, ignoring."); @@ -2043,7 +2047,7 @@ finish:                   * kernel; at this point, we will not listen to the                   * signals anyway */                  if (detect_container(NULL) <= 0) -                        cg_uninstall_release_agent(SYSTEMD_CGROUP_CONTROLLER); +                        (void) cg_uninstall_release_agent(SYSTEMD_CGROUP_CONTROLLER);                  execve(SYSTEMD_SHUTDOWN_BINARY_PATH, (char **) command_line, env_block);                  log_error_errno(errno, "Failed to execute shutdown binary, %s: %m", diff --git a/src/core/manager.c b/src/core/manager.c index ede2a9910d..c3327e37f5 100644 --- a/src/core/manager.c +++ b/src/core/manager.c @@ -568,11 +568,14 @@ int manager_new(ManagerRunningAs running_as, bool test_run, Manager **_m) {          m->idle_pipe[0] = m->idle_pipe[1] = m->idle_pipe[2] = m->idle_pipe[3] = -1; -        m->pin_cgroupfs_fd = m->notify_fd = m->signal_fd = m->time_change_fd = m->dev_autofs_fd = m->private_listen_fd = m->kdbus_fd = m->utab_inotify_fd = -1; +        m->pin_cgroupfs_fd = m->notify_fd = m->signal_fd = m->time_change_fd = +                m->dev_autofs_fd = m->private_listen_fd = m->kdbus_fd = m->utab_inotify_fd = +                m->cgroup_inotify_fd = -1;          m->current_job_id = 1; /* start as id #1, so that we can leave #0 around as "null-like" value */          m->ask_password_inotify_fd = -1;          m->have_ask_password = -EINVAL; /* we don't know */ +        m->first_boot = -1;          m->test_run = test_run; @@ -2721,7 +2724,7 @@ void manager_check_finished(Manager *m) {          SET_FOREACH(u, m->startup_units, i)                  if (u->cgroup_path) -                        cgroup_context_apply(unit_get_cgroup_context(u), unit_get_cgroup_mask(u), u->cgroup_path, manager_state(m)); +                        cgroup_context_apply(unit_get_cgroup_context(u), unit_get_own_mask(u), u->cgroup_path, manager_state(m));  }  static int create_generator_dir(Manager *m, char **generator, const char *name) { @@ -2998,12 +3001,14 @@ void manager_set_first_boot(Manager *m, bool b) {          if (m->running_as != MANAGER_SYSTEM)                  return; -        m->first_boot = b; +        if (m->first_boot != (int) b) { +                if (b) +                        (void) touch("/run/systemd/first-boot"); +                else +                        (void) unlink("/run/systemd/first-boot"); +        } -        if (m->first_boot) -                touch("/run/systemd/first-boot"); -        else -                unlink("/run/systemd/first-boot"); +        m->first_boot = b;  }  void manager_status_printf(Manager *m, StatusType type, const char *status, const char *format, ...) { diff --git a/src/core/manager.h b/src/core/manager.h index 1e01f2bdef..9956cb7700 100644 --- a/src/core/manager.h +++ b/src/core/manager.h @@ -215,16 +215,22 @@ struct Manager {          /* Data specific to the cgroup subsystem */          Hashmap *cgroup_unit; -        CGroupControllerMask cgroup_supported; +        CGroupMask cgroup_supported;          char *cgroup_root; -        int gc_marker; -        unsigned n_in_gc_queue; +        /* Notifications from cgroups, when the unified hierarchy is +         * used is done via inotify. */ +        int cgroup_inotify_fd; +        sd_event_source *cgroup_inotify_event_source; +        Hashmap *cgroup_inotify_wd_unit;          /* Make sure the user cannot accidentally unmount our cgroup           * file system */          int pin_cgroupfs_fd; +        int gc_marker; +        unsigned n_in_gc_queue; +          /* Flags */          ManagerRunningAs running_as;          ManagerExitCode exit_code:5; @@ -233,7 +239,6 @@ struct Manager {          bool dispatching_dbus_queue:1;          bool taint_usr:1; -        bool first_boot:1;          bool test_run:1; @@ -295,6 +300,8 @@ struct Manager {          const char *unit_log_field;          const char *unit_log_format_string; + +        int first_boot;  };  int manager_new(ManagerRunningAs running_as, bool test_run, Manager **m); diff --git a/src/core/mount-setup.c b/src/core/mount-setup.c index 1782d40720..c6f3569915 100644 --- a/src/core/mount-setup.c +++ b/src/core/mount-setup.c @@ -93,12 +93,14 @@ static const MountPoint mount_table[] = {  #endif          { "tmpfs",       "/run",                      "tmpfs",      "mode=755",                MS_NOSUID|MS_NODEV|MS_STRICTATIME,            NULL,          MNT_FATAL|MNT_IN_CONTAINER }, +        { "cgroup",      "/sys/fs/cgroup",            "cgroup",     "__DEVEL__sane_behavior",  MS_NOSUID|MS_NOEXEC|MS_NODEV, +          cg_is_unified_wanted, MNT_FATAL|MNT_IN_CONTAINER },          { "tmpfs",       "/sys/fs/cgroup",            "tmpfs",      "mode=755",                MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_STRICTATIME, -          NULL,          MNT_FATAL|MNT_IN_CONTAINER }, +          cg_is_legacy_wanted, MNT_FATAL|MNT_IN_CONTAINER },          { "cgroup",      "/sys/fs/cgroup/systemd",    "cgroup",     "none,name=systemd,xattr", MS_NOSUID|MS_NOEXEC|MS_NODEV, -          NULL,          MNT_IN_CONTAINER           }, +          cg_is_legacy_wanted, MNT_IN_CONTAINER           },          { "cgroup",      "/sys/fs/cgroup/systemd",    "cgroup",     "none,name=systemd",       MS_NOSUID|MS_NOEXEC|MS_NODEV, -          NULL,          MNT_FATAL|MNT_IN_CONTAINER }, +          cg_is_legacy_wanted, MNT_FATAL|MNT_IN_CONTAINER },          { "pstore",      "/sys/fs/pstore",            "pstore",     NULL,                      MS_NOSUID|MS_NOEXEC|MS_NODEV,            NULL,          MNT_NONE                   },  #ifdef ENABLE_EFI @@ -217,6 +219,9 @@ int mount_cgroup_controllers(char ***join_controllers) {          _cleanup_set_free_free_ Set *controllers = NULL;          int r; +        if (!cg_is_legacy_wanted()) +                return 0; +          /* Mount all available cgroup controllers that are built into the kernel. */          controllers = set_new(&string_hash_ops); diff --git a/src/core/scope.c b/src/core/scope.c index c594ab5294..44cd324f58 100644 --- a/src/core/scope.c +++ b/src/core/scope.c @@ -22,12 +22,13 @@  #include <errno.h>  #include <unistd.h> -#include "unit.h" -#include "scope.h"  #include "log.h" -#include "dbus-scope.h" +#include "strv.h"  #include "special.h"  #include "unit-name.h" +#include "unit.h" +#include "scope.h" +#include "dbus-scope.h"  #include "load-dropin.h"  static const UnitActiveState state_translation_table[_SCOPE_STATE_MAX] = { @@ -136,7 +137,9 @@ static int scope_verify(Scope *s) {          if (UNIT(s)->load_state != UNIT_LOADED)                  return 0; -        if (set_isempty(UNIT(s)->pids) && UNIT(s)->manager->n_reloading <= 0) { +        if (set_isempty(UNIT(s)->pids) && +            !manager_is_reloading_or_reexecuting(UNIT(s)->manager) <= 0 && +            !unit_has_name(UNIT(s), SPECIAL_INIT_SCOPE)) {                  log_unit_error(UNIT(s), "Scope has no PIDs. Refusing.");                  return -EINVAL;          } @@ -151,7 +154,7 @@ static int scope_load(Unit *u) {          assert(s);          assert(u->load_state == UNIT_STUB); -        if (!u->transient && UNIT(s)->manager->n_reloading <= 0) +        if (!u->transient && !manager_is_reloading_or_reexecuting(u->manager))                  return -ENOENT;          u->load_state = UNIT_LOADED; @@ -279,6 +282,9 @@ static int scope_start(Unit *u) {          assert(s); +        if (unit_has_name(u, SPECIAL_INIT_SCOPE)) +                return -EPERM; +          if (s->state == SCOPE_FAILED)                  return -EPERM; @@ -289,7 +295,7 @@ static int scope_start(Unit *u) {          assert(s->state == SCOPE_DEAD); -        if (!u->transient && UNIT(s)->manager->n_reloading <= 0) +        if (!u->transient && !manager_is_reloading_or_reexecuting(u->manager))                  return -ENOENT;          (void) unit_realize_cgroup(u); @@ -396,7 +402,7 @@ static bool scope_check_gc(Unit *u) {          if (u->cgroup_path) {                  int r; -                r = cg_is_empty_recursive(SYSTEMD_CGROUP_CONTROLLER, u->cgroup_path, true); +                r = cg_is_empty_recursive(SYSTEMD_CGROUP_CONTROLLER, u->cgroup_path);                  if (r <= 0)                          return true;          } @@ -464,6 +470,9 @@ static int scope_dispatch_timer(sd_event_source *source, usec_t usec, void *user  int scope_abandon(Scope *s) {          assert(s); +        if (unit_has_name(UNIT(s), SPECIAL_INIT_SCOPE)) +                return -EPERM; +          if (!IN_SET(s->state, SCOPE_RUNNING, SCOPE_ABANDONED))                  return -ESTALE; @@ -499,6 +508,48 @@ _pure_ static const char *scope_sub_state_to_string(Unit *u) {          return scope_state_to_string(SCOPE(u)->state);  } +static int scope_enumerate(Manager *m) { +        Unit *u; +        int r; + +        assert(m); + +        /* Let's unconditionally add the "init.scope" special unit +         * that encapsulates PID 1. Note that PID 1 already is in the +         * cgroup for this, we hence just need to allocate the object +         * for it and that's it. */ + +        u = manager_get_unit(m, SPECIAL_INIT_SCOPE); +        if (!u) { +                u = unit_new(m, sizeof(Scope)); +                if (!u) +                        return log_oom(); + +                r = unit_add_name(u, SPECIAL_INIT_SCOPE); +                if (r < 0)  { +                        unit_free(u); +                        return log_error_errno(r, "Failed to add init.scope name"); +                } +        } + +        u->transient = true; +        u->default_dependencies = false; +        u->no_gc = true; +        SCOPE(u)->deserialized_state = SCOPE_RUNNING; +        SCOPE(u)->kill_context.kill_signal = SIGRTMIN+14; + +        /* Prettify things, if we can. */ +        if (!u->description) +                u->description = strdup("System and Service Manager"); +        if (!u->documentation) +                (void) strv_extend(&u->documentation, "man:systemd(1)"); + +        unit_add_to_load_queue(u); +        unit_add_to_dbus_queue(u); + +        return 0; +} +  static const char* const scope_state_table[_SCOPE_STATE_MAX] = {          [SCOPE_DEAD] = "dead",          [SCOPE_RUNNING] = "running", @@ -565,5 +616,7 @@ const UnitVTable scope_vtable = {          .bus_set_property = bus_scope_set_property,          .bus_commit_properties = bus_scope_commit_properties, -        .can_transient = true +        .can_transient = true, + +        .enumerate = scope_enumerate,  }; diff --git a/src/core/selinux-access.c b/src/core/selinux-access.c index f920c2e2cd..40ca0c6166 100644 --- a/src/core/selinux-access.c +++ b/src/core/selinux-access.c @@ -246,7 +246,7 @@ int mac_selinux_generic_access_check(          if (path) {                  /* Get the file context of the unit file */ -                r = getfilecon(path, &fcon); +                r = getfilecon_raw(path, &fcon);                  if (r < 0) {                          r = sd_bus_error_setf(error, SD_BUS_ERROR_ACCESS_DENIED, "Failed to get file context on %s.", path);                          goto finish; @@ -254,7 +254,7 @@ int mac_selinux_generic_access_check(                  tclass = "service";          } else { -                r = getcon(&fcon); +                r = getcon_raw(&fcon);                  if (r < 0) {                          r = sd_bus_error_setf(error, SD_BUS_ERROR_ACCESS_DENIED, "Failed to get current context.");                          goto finish; diff --git a/src/core/service.c b/src/core/service.c index 3c4232417d..292fe50de8 100644 --- a/src/core/service.c +++ b/src/core/service.c @@ -767,7 +767,7 @@ static int service_load_pid_file(Service *s, bool may_warn) {  }  static int service_search_main_pid(Service *s) { -        pid_t pid; +        pid_t pid = 0;          int r;          assert(s); @@ -782,9 +782,9 @@ static int service_search_main_pid(Service *s) {          assert(s->main_pid <= 0); -        pid = unit_search_main_pid(UNIT(s)); -        if (pid <= 0) -                return -ENOENT; +        r = unit_search_main_pid(UNIT(s), &pid); +        if (r < 0) +                return r;          log_unit_debug(UNIT(s), "Main PID guessed: "PID_FMT, pid);          r = service_set_main_pid(s, pid); @@ -860,7 +860,7 @@ static void service_set_state(Service *s, ServiceState state) {          /* For the inactive states unit_notify() will trim the cgroup,           * but for exit we have to do that ourselves... */          if (state == SERVICE_EXITED && UNIT(s)->manager->n_reloading <= 0) -                unit_destroy_cgroup_if_empty(UNIT(s)); +                unit_prune_cgroup(UNIT(s));          /* For remain_after_exit services, let's see if we can "release" the           * hold on the console, since unit_notify() only does that in case of @@ -1269,7 +1269,7 @@ static int cgroup_good(Service *s) {          if (!UNIT(s)->cgroup_path)                  return 0; -        r = cg_is_empty_recursive(SYSTEMD_CGROUP_CONTROLLER, UNIT(s)->cgroup_path, true); +        r = cg_is_empty_recursive(SYSTEMD_CGROUP_CONTROLLER, UNIT(s)->cgroup_path);          if (r < 0)                  return r; @@ -1520,18 +1520,33 @@ fail:          service_enter_signal(s, SERVICE_STOP_SIGTERM, SERVICE_FAILURE_RESOURCES);  } +static bool service_good(Service *s) { +        int main_pid_ok; +        assert(s); + +        if (s->type == SERVICE_DBUS && !s->bus_name_good) +                return false; + +        main_pid_ok = main_pid_good(s); +        if (main_pid_ok > 0) /* It's alive */ +                return true; +        if (main_pid_ok == 0) /* It's dead */ +                return false; + +        /* OK, we don't know anything about the main PID, maybe +         * because there is none. Let's check the control group +         * instead. */ + +        return cgroup_good(s) != 0; +} +  static void service_enter_running(Service *s, ServiceResult f) { -        int main_pid_ok, cgroup_ok;          assert(s);          if (f != SERVICE_SUCCESS)                  s->result = f; -        main_pid_ok = main_pid_good(s); -        cgroup_ok = cgroup_good(s); - -        if ((main_pid_ok > 0 || (main_pid_ok < 0 && cgroup_ok != 0)) && -            (s->bus_name_good || s->type != SERVICE_DBUS)) { +        if (service_good(s)) {                  /* If there are any queued up sd_notify()                   * notifications, process them now */ @@ -2629,7 +2644,7 @@ static void service_sigchld_event(Unit *u, pid_t pid, int code, int status) {                                                  break;                                          }                                  } else -                                        service_search_main_pid(s); +                                        (void) service_search_main_pid(s);                                  service_enter_start_post(s);                                  break; @@ -2651,7 +2666,7 @@ static void service_sigchld_event(Unit *u, pid_t pid, int code, int status) {                                                  break;                                          }                                  } else -                                        service_search_main_pid(s); +                                        (void) service_search_main_pid(s);                                  service_enter_running(s, SERVICE_SUCCESS);                                  break; @@ -2659,7 +2674,7 @@ static void service_sigchld_event(Unit *u, pid_t pid, int code, int status) {                          case SERVICE_RELOAD:                                  if (f == SERVICE_SUCCESS) {                                          service_load_pid_file(s, true); -                                        service_search_main_pid(s); +                                        (void) service_search_main_pid(s);                                  }                                  s->reload_result = f; diff --git a/src/core/slice.c b/src/core/slice.c index 7442d23391..b414462066 100644 --- a/src/core/slice.c +++ b/src/core/slice.c @@ -21,12 +21,13 @@  #include <errno.h> -#include "unit.h" -#include "slice.h"  #include "log.h" -#include "dbus-slice.h" +#include "strv.h"  #include "special.h"  #include "unit-name.h" +#include "unit.h" +#include "slice.h" +#include "dbus-slice.h"  static const UnitActiveState state_translation_table[_SLICE_STATE_MAX] = {          [SLICE_DEAD] = UNIT_INACTIVE, @@ -252,6 +253,40 @@ _pure_ static const char *slice_sub_state_to_string(Unit *u) {          return slice_state_to_string(SLICE(u)->state);  } +static int slice_enumerate(Manager *m) { +        Unit *u; +        int r; + +        assert(m); + +        u = manager_get_unit(m, SPECIAL_ROOT_SLICE); +        if (!u) { +                u = unit_new(m, sizeof(Slice)); +                if (!u) +                        return log_oom(); + +                r = unit_add_name(u, SPECIAL_ROOT_SLICE); +                if (r < 0) { +                        unit_free(u); +                        return log_error_errno(r, "Failed to add -.slice name"); +                } +        } + +        u->default_dependencies = false; +        u->no_gc = true; +        SLICE(u)->deserialized_state = SLICE_ACTIVE; + +        if (!u->description) +                u->description = strdup("Root Slice"); +        if (!u->documentation) +                (void) strv_extend(&u->documentation, "man:systemd.special(7)"); + +        unit_add_to_load_queue(u); +        unit_add_to_dbus_queue(u); + +        return 0; +} +  static const char* const slice_state_table[_SLICE_STATE_MAX] = {          [SLICE_DEAD] = "dead",          [SLICE_ACTIVE] = "active" @@ -293,6 +328,8 @@ const UnitVTable slice_vtable = {          .bus_set_property = bus_slice_set_property,          .bus_commit_properties = bus_slice_commit_properties, +        .enumerate = slice_enumerate, +          .status_message_formats = {                  .finished_start_job = {                          [JOB_DONE]       = "Created slice %s.", diff --git a/src/core/unit.c b/src/core/unit.c index 5f602bdf5f..8c07c6140d 100644 --- a/src/core/unit.c +++ b/src/core/unit.c @@ -28,27 +28,28 @@  #include "sd-id128.h"  #include "sd-messages.h"  #include "set.h" -#include "unit.h"  #include "macro.h"  #include "strv.h"  #include "path-util.h" -#include "load-fragment.h" -#include "load-dropin.h"  #include "log.h" -#include "unit-name.h" -#include "dbus-unit.h" -#include "special.h"  #include "cgroup-util.h"  #include "missing.h"  #include "mkdir.h"  #include "fileio-label.h" -#include "bus-common-errors.h" -#include "dbus.h" -#include "execute.h" -#include "dropin.h"  #include "formats-util.h"  #include "process-util.h" +#include "virt.h" +#include "bus-common-errors.h"  #include "bus-util.h" +#include "dropin.h" +#include "unit-name.h" +#include "special.h" +#include "unit.h" +#include "load-fragment.h" +#include "load-dropin.h" +#include "dbus.h" +#include "dbus-unit.h" +#include "execute.h"  const UnitVTable * const unit_vtable[_UNIT_TYPE_MAX] = {          [UNIT_SERVICE] = &service_vtable, @@ -90,6 +91,7 @@ Unit *unit_new(Manager *m, size_t size) {          u->unit_file_state = _UNIT_FILE_STATE_INVALID;          u->unit_file_preset = -1;          u->on_failure_job_mode = JOB_REPLACE; +        u->cgroup_inotify_wd = -1;          RATELIMIT_INIT(u->auto_stop_ratelimit, 10 * USEC_PER_SEC, 16); @@ -524,10 +526,7 @@ void unit_free(Unit *u) {          if (u->in_cgroup_queue)                  LIST_REMOVE(cgroup_queue, u->manager->cgroup_queue, u); -        if (u->cgroup_path) { -                hashmap_remove(u->manager->cgroup_unit, u->cgroup_path); -                free(u->cgroup_path); -        } +        unit_release_cgroup(u);          manager_update_failed_units(u->manager, u, false);          set_remove(u->manager->startup_units, u); @@ -1800,7 +1799,7 @@ void unit_notify(Unit *u, UnitActiveState os, UnitActiveState ns, bool reload_su          /* Make sure the cgroup is always removed when we become inactive */          if (UNIT_IS_INACTIVE_OR_FAILED(ns)) -                unit_destroy_cgroup_if_empty(u); +                unit_prune_cgroup(u);          /* Note that this doesn't apply to RemainAfterExit services exiting           * successfully, since there's no change of state in that case. Which is @@ -2016,9 +2015,9 @@ void unit_unwatch_pid(Unit *u, pid_t pid) {          assert(u);          assert(pid >= 1); -        hashmap_remove_value(u->manager->watch_pids1, LONG_TO_PTR(pid), u); -        hashmap_remove_value(u->manager->watch_pids2, LONG_TO_PTR(pid), u); -        set_remove(u->pids, LONG_TO_PTR(pid)); +        (void) hashmap_remove_value(u->manager->watch_pids1, LONG_TO_PTR(pid), u); +        (void) hashmap_remove_value(u->manager->watch_pids2, LONG_TO_PTR(pid), u); +        (void) set_remove(u->pids, LONG_TO_PTR(pid));  }  void unit_unwatch_all_pids(Unit *u) { @@ -2027,70 +2026,7 @@ void unit_unwatch_all_pids(Unit *u) {          while (!set_isempty(u->pids))                  unit_unwatch_pid(u, PTR_TO_LONG(set_first(u->pids))); -        set_free(u->pids); -        u->pids = NULL; -} - -static int unit_watch_pids_in_path(Unit *u, const char *path) { -        _cleanup_closedir_ DIR *d = NULL; -        _cleanup_fclose_ FILE *f = NULL; -        int ret = 0, r; - -        assert(u); -        assert(path); - -        /* Adds all PIDs from a specific cgroup path to the set of PIDs we watch. */ - -        r = cg_enumerate_processes(SYSTEMD_CGROUP_CONTROLLER, path, &f); -        if (r >= 0) { -                pid_t pid; - -                while ((r = cg_read_pid(f, &pid)) > 0) { -                        r = unit_watch_pid(u, pid); -                        if (r < 0 && ret >= 0) -                                ret = r; -                } -                if (r < 0 && ret >= 0) -                        ret = r; - -        } else if (ret >= 0) -                ret = r; - -        r = cg_enumerate_subgroups(SYSTEMD_CGROUP_CONTROLLER, path, &d); -        if (r >= 0) { -                char *fn; - -                while ((r = cg_read_subgroup(d, &fn)) > 0) { -                        _cleanup_free_ char *p = NULL; - -                        p = strjoin(path, "/", fn, NULL); -                        free(fn); - -                        if (!p) -                                return -ENOMEM; - -                        r = unit_watch_pids_in_path(u, p); -                        if (r < 0 && ret >= 0) -                                ret = r; -                } -                if (r < 0 && ret >= 0) -                        ret = r; - -        } else if (ret >= 0) -                ret = r; - -        return ret; -} - -int unit_watch_all_pids(Unit *u) { -        assert(u); - -        /* Adds all PIDs from our cgroup to the set of PIDs we watch */ - -        if (!u->cgroup_path) -                return -ENOENT; - -        return unit_watch_pids_in_path(u, u->cgroup_path); +        u->pids = set_free(u->pids);  }  void unit_tidy_watch_pids(Unit *u, pid_t except1, pid_t except2) { @@ -2399,31 +2335,6 @@ char *unit_dbus_path(Unit *u) {          return unit_dbus_path_from_name(u->id);  } -char *unit_default_cgroup_path(Unit *u) { -        _cleanup_free_ char *escaped = NULL, *slice = NULL; -        int r; - -        assert(u); - -        if (unit_has_name(u, SPECIAL_ROOT_SLICE)) -                return strdup(u->manager->cgroup_root); - -        if (UNIT_ISSET(u->slice) && !unit_has_name(UNIT_DEREF(u->slice), SPECIAL_ROOT_SLICE)) { -                r = cg_slice_to_path(UNIT_DEREF(u->slice)->id, &slice); -                if (r < 0) -                        return NULL; -        } - -        escaped = cg_escape(u->id); -        if (!escaped) -                return NULL; - -        if (slice) -                return strjoin(u->manager->cgroup_root, "/", slice, "/", escaped, NULL); -        else -                return strjoin(u->manager->cgroup_root, "/", escaped, NULL); -} -  int unit_set_slice(Unit *u, Unit *slice) {          assert(u);          assert(slice); @@ -2440,9 +2351,16 @@ int unit_set_slice(Unit *u, Unit *slice) {          if (u->type == UNIT_SLICE)                  return -EINVAL; +        if (unit_active_state(u) != UNIT_INACTIVE) +                return -EBUSY; +          if (slice->type != UNIT_SLICE)                  return -EINVAL; +        if (unit_has_name(u, SPECIAL_INIT_SCOPE) && +            !unit_has_name(slice, SPECIAL_ROOT_SLICE)) +                return -EPERM; +          if (UNIT_DEREF(u->slice) == slice)                  return 0; @@ -2491,7 +2409,7 @@ int unit_set_default_slice(Unit *u) {                  slice_name = b;          } else                  slice_name = -                        u->manager->running_as == MANAGER_SYSTEM +                        u->manager->running_as == MANAGER_SYSTEM && !unit_has_name(u, SPECIAL_INIT_SCOPE)                          ? SPECIAL_SYSTEM_SLICE                          : SPECIAL_ROOT_SLICE; @@ -2700,40 +2618,6 @@ void unit_serialize_item(Unit *u, FILE *f, const char *key, const char *value) {          fprintf(f, "%s=%s\n", key, value);  } -static int unit_set_cgroup_path(Unit *u, const char *path) { -        _cleanup_free_ char *p = NULL; -        int r; - -        assert(u); - -        if (path) { -                p = strdup(path); -                if (!p) -                        return -ENOMEM; -        } else -                p = NULL; - -        if (streq_ptr(u->cgroup_path, p)) -                return 0; - -        if (p) { -                r = hashmap_put(u->manager->cgroup_unit, p, u); -                if (r < 0) -                        return r; -        } - -        if (u->cgroup_path) { -                log_unit_debug(u, "Changing cgroup path from %s to %s.", u->cgroup_path, strna(p)); -                hashmap_remove(u->manager->cgroup_unit, u->cgroup_path); -                free(u->cgroup_path); -        } - -        u->cgroup_path = p; -        p = NULL; - -        return 0; -} -  int unit_deserialize(Unit *u, FILE *f, FDSet *fds) {          ExecRuntime **rt = NULL;          size_t offset; @@ -2864,6 +2748,8 @@ int unit_deserialize(Unit *u, FILE *f, FDSet *fds) {                          if (r < 0)                                  log_unit_debug_errno(u, r, "Failed to set cgroup path %s, ignoring: %m", v); +                        (void) unit_watch_cgroup(u); +                          continue;                  } else if (streq(l, "cgroup-realized")) {                          int b; @@ -3168,7 +3054,7 @@ int unit_kill_common(                  if (!pid_set)                          return -ENOMEM; -                q = cg_kill_recursive(SYSTEMD_CGROUP_CONTROLLER, u->cgroup_path, signo, false, true, false, pid_set); +                q = cg_kill_recursive(SYSTEMD_CGROUP_CONTROLLER, u->cgroup_path, signo, false, false, false, pid_set);                  if (q < 0 && q != -EAGAIN && q != -ESRCH && q != -ENOENT)                          r = q;          } @@ -3524,7 +3410,8 @@ int unit_kill_context(                  pid_t control_pid,                  bool main_pid_alien) { -        int sig, wait_for_exit = false, r; +        bool wait_for_exit = false; +        int sig, r;          assert(u);          assert(c); @@ -3553,13 +3440,13 @@ int unit_kill_context(                          _cleanup_free_ char *comm = NULL;                          get_process_comm(main_pid, &comm); -                        log_unit_warning_errno(u, r, "Failed to kill main process " PID_FMT " (%s): %m", main_pid, strna(comm)); +                        log_unit_warning_errno(u, r, "Failed to kill main process " PID_FMT " (%s), ignoring: %m", main_pid, strna(comm));                  } else {                          if (!main_pid_alien)                                  wait_for_exit = true; -                        if (c->send_sighup && k != KILL_KILL) -                                kill(main_pid, SIGHUP); +                        if (c->send_sighup && k == KILL_TERMINATE) +                                (void) kill(main_pid, SIGHUP);                  }          } @@ -3570,16 +3457,17 @@ int unit_kill_context(                          _cleanup_free_ char *comm = NULL;                          get_process_comm(control_pid, &comm); -                        log_unit_warning_errno(u, r, "Failed to kill control process " PID_FMT " (%s): %m", control_pid, strna(comm)); +                        log_unit_warning_errno(u, r, "Failed to kill control process " PID_FMT " (%s), ignoring: %m", control_pid, strna(comm));                  } else {                          wait_for_exit = true; -                        if (c->send_sighup && k != KILL_KILL) -                                kill(control_pid, SIGHUP); +                        if (c->send_sighup && k == KILL_TERMINATE) +                                (void) kill(control_pid, SIGHUP);                  }          } -        if ((c->kill_mode == KILL_CONTROL_GROUP || (c->kill_mode == KILL_MIXED && k == KILL_KILL)) && u->cgroup_path) { +        if (u->cgroup_path && +            (c->kill_mode == KILL_CONTROL_GROUP || (c->kill_mode == KILL_MIXED && k == KILL_KILL))) {                  _cleanup_set_free_ Set *pid_set = NULL;                  /* Exclude the main/control pids from being killed via the cgroup */ @@ -3587,21 +3475,30 @@ int unit_kill_context(                  if (!pid_set)                          return -ENOMEM; -                r = cg_kill_recursive(SYSTEMD_CGROUP_CONTROLLER, u->cgroup_path, sig, true, true, false, pid_set); +                r = cg_kill_recursive(SYSTEMD_CGROUP_CONTROLLER, u->cgroup_path, sig, true, k != KILL_TERMINATE, false, pid_set);                  if (r < 0) {                          if (r != -EAGAIN && r != -ESRCH && r != -ENOENT) -                                log_unit_warning_errno(u, r, "Failed to kill control group: %m"); -                } else if (r > 0) { +                                log_unit_warning_errno(u, r, "Failed to kill control group %s, ignoring: %m", u->cgroup_path); -                        /* FIXME: For now, we will not wait for the -                         * cgroup members to die, simply because -                         * cgroup notification is unreliable. It -                         * doesn't work at all in containers, and -                         * outside of containers it can be confused -                         * easily by leaving directories in the -                         * cgroup. */ +                } else if (r > 0) { -                        /* wait_for_exit = true; */ +                        /* FIXME: For now, on the legacy hierarchy, we +                         * will not wait for the cgroup members to die +                         * if we are running in a container or if this +                         * is a delegation unit, simply because cgroup +                         * notification is unreliable in these +                         * cases. It doesn't work at all in +                         * containers, and outside of containers it +                         * can be confused easily by left-over +                         * directories in the cgroup -- which however +                         * should not exist in non-delegated units. On +                         * the unified hierarchy that's different, +                         * there we get proper events. Hence rely on +                         * them.*/ + +                        if  (cg_unified() > 0 || +                             (detect_container(NULL) == 0 && !unit_cgroup_delegate(u))) +                                wait_for_exit = true;                          if (c->send_sighup && k != KILL_KILL) {                                  set_free(pid_set); diff --git a/src/core/unit.h b/src/core/unit.h index bc26653247..3c7684411b 100644 --- a/src/core/unit.h +++ b/src/core/unit.h @@ -184,9 +184,10 @@ struct Unit {          /* Counterparts in the cgroup filesystem */          char *cgroup_path; -        CGroupControllerMask cgroup_realized_mask; -        CGroupControllerMask cgroup_subtree_mask; -        CGroupControllerMask cgroup_members_mask; +        CGroupMask cgroup_realized_mask; +        CGroupMask cgroup_subtree_mask; +        CGroupMask cgroup_members_mask; +        int cgroup_inotify_wd;          /* How to start OnFailure units */          JobMode on_failure_job_mode; @@ -522,7 +523,6 @@ void unit_notify(Unit *u, UnitActiveState os, UnitActiveState ns, bool reload_su  int unit_watch_pid(Unit *u, pid_t pid);  void unit_unwatch_pid(Unit *u, pid_t pid); -int unit_watch_all_pids(Unit *u);  void unit_unwatch_all_pids(Unit *u);  void unit_tidy_watch_pids(Unit *u, pid_t except1, pid_t except2); @@ -567,8 +567,6 @@ bool unit_active_or_pending(Unit *u);  int unit_add_default_target_dependency(Unit *u, Unit *target); -char *unit_default_cgroup_path(Unit *u); -  void unit_start_on_failure(Unit *u);  void unit_trigger_notify(Unit *u); diff --git a/src/libsystemd-network/sd-dhcp-lease.c b/src/libsystemd-network/sd-dhcp-lease.c index 6551e7c94c..aa07846693 100644 --- a/src/libsystemd-network/sd-dhcp-lease.c +++ b/src/libsystemd-network/sd-dhcp-lease.c @@ -1121,13 +1121,13 @@ int dhcp_lease_set_client_id(sd_dhcp_lease *lease, const void *client_id, size_t          return 0;  } -int sd_dhcp_lease_get_timezone(sd_dhcp_lease *lease, const char **timezone) { +int sd_dhcp_lease_get_timezone(sd_dhcp_lease *lease, const char **tz) {          assert_return(lease, -EINVAL); -        assert_return(timezone, -EINVAL); +        assert_return(tz, -EINVAL);          if (!lease->timezone)                  return -ENODATA; -        *timezone = lease->timezone; +        *tz = lease->timezone;          return 0;  } diff --git a/src/libsystemd-network/sd-dhcp-server.c b/src/libsystemd-network/sd-dhcp-server.c index 7a8b298b51..1f167485e3 100644 --- a/src/libsystemd-network/sd-dhcp-server.c +++ b/src/libsystemd-network/sd-dhcp-server.c @@ -1062,16 +1062,16 @@ int sd_dhcp_server_forcerenew(sd_dhcp_server *server) {          return r;  } -int sd_dhcp_server_set_timezone(sd_dhcp_server *server, const char *timezone) { +int sd_dhcp_server_set_timezone(sd_dhcp_server *server, const char *tz) {          int r;          assert_return(server, -EINVAL); -        assert_return(timezone_is_valid(timezone), -EINVAL); +        assert_return(timezone_is_valid(tz), -EINVAL); -        if (streq_ptr(timezone, server->timezone)) +        if (streq_ptr(tz, server->timezone))                  return 0; -        r = free_and_strdup(&server->timezone, timezone); +        r = free_and_strdup(&server->timezone, tz);          if (r < 0)                  return r; diff --git a/src/libsystemd/sd-bus/bus-container.c b/src/libsystemd/sd-bus/bus-container.c index 101e4af18d..56dc086ae2 100644 --- a/src/libsystemd/sd-bus/bus-container.c +++ b/src/libsystemd/sd-bus/bus-container.c @@ -125,15 +125,22 @@ int bus_container_connect_kernel(sd_bus *b) {                  struct cmsghdr cmsghdr;                  uint8_t buf[CMSG_SPACE(sizeof(int))];          } control = {}; +        int error_buf = 0; +        struct iovec iov = { +                .iov_base = &error_buf, +                .iov_len = sizeof(error_buf), +        };          struct msghdr mh = {                  .msg_control = &control,                  .msg_controllen = sizeof(control), +                .msg_iov = &iov, +                .msg_iovlen = 1,          };          struct cmsghdr *cmsg;          pid_t child;          siginfo_t si; -        int r; -        _cleanup_close_ int fd = -1; +        int r, fd = -1; +        ssize_t n;          assert(b);          assert(b->input_fd < 0); @@ -178,10 +185,13 @@ int bus_container_connect_kernel(sd_bus *b) {                          _exit(EXIT_FAILURE);                  if (grandchild == 0) { -                          fd = open(b->kernel, O_RDWR|O_NOCTTY|O_CLOEXEC); -                        if (fd < 0) +                        if (fd < 0) { +                                /* Try to send error up */ +                                error_buf = errno; +                                (void) write(pair[1], &error_buf, sizeof(error_buf));                                  _exit(EXIT_FAILURE); +                        }                          cmsg = CMSG_FIRSTHDR(&mh);                          cmsg->cmsg_level = SOL_SOCKET; @@ -213,20 +223,17 @@ int bus_container_connect_kernel(sd_bus *b) {          if (r < 0)                  return r; -        if (si.si_code != CLD_EXITED) -                return -EIO; - -        if (si.si_status != EXIT_SUCCESS) -                return -EIO; - -        if (recvmsg(pair[0], &mh, MSG_NOSIGNAL|MSG_CMSG_CLOEXEC) < 0) +        n = recvmsg(pair[0], &mh, MSG_NOSIGNAL|MSG_CMSG_CLOEXEC); +        if (n < 0)                  return -errno; -        CMSG_FOREACH(cmsg, &mh) +        CMSG_FOREACH(cmsg, &mh) {                  if (cmsg->cmsg_level == SOL_SOCKET && cmsg->cmsg_type == SCM_RIGHTS) {                          int *fds;                          unsigned n_fds; +                        assert(fd < 0); +                          fds = (int*) CMSG_DATA(cmsg);                          n_fds = (cmsg->cmsg_len - CMSG_LEN(0)) / sizeof(int); @@ -237,9 +244,18 @@ int bus_container_connect_kernel(sd_bus *b) {                          fd = fds[0];                  } +        } + +        /* If there's an fd passed, we are good. */ +        if (fd >= 0) { +                b->input_fd = b->output_fd = fd; +                return bus_kernel_take_fd(b); +        } -        b->input_fd = b->output_fd = fd; -        fd = -1; +        /* If there's an error passed, use it */ +        if (n == sizeof(error_buf) && error_buf > 0) +                return -error_buf; -        return bus_kernel_take_fd(b); +        /* Otherwise, we have no clue */ +        return -EIO;  } diff --git a/src/libsystemd/sd-bus/sd-bus.c b/src/libsystemd/sd-bus/sd-bus.c index 5285278d92..4ed508427e 100644 --- a/src/libsystemd/sd-bus/sd-bus.c +++ b/src/libsystemd/sd-bus/sd-bus.c @@ -1025,19 +1025,30 @@ static int bus_start_address(sd_bus *b) {                  if (b->exec_path)                          r = bus_socket_exec(b); +                  else if ((b->nspid > 0 || b->machine) && b->kernel) {                          r = bus_container_connect_kernel(b);                          if (r < 0 && !IN_SET(r, -ENOENT, -ESOCKTNOSUPPORT))                                  container_kdbus_available = true; -                } else if (!container_kdbus_available && (b->nspid > 0 || b->machine) && b->sockaddr.sa.sa_family != AF_UNSPEC) -                        r = bus_container_connect_socket(b); -                else if (b->kernel) { + +                } else if ((b->nspid > 0 || b->machine) && b->sockaddr.sa.sa_family != AF_UNSPEC) { +                        if (!container_kdbus_available) +                                r = bus_container_connect_socket(b); +                        else +                                skipped = true; + +                } else if (b->kernel) {                          r = bus_kernel_connect(b);                          if (r < 0 && !IN_SET(r, -ENOENT, -ESOCKTNOSUPPORT))                                  kdbus_available = true; -                } else if (!kdbus_available && b->sockaddr.sa.sa_family != AF_UNSPEC) -                        r = bus_socket_connect(b); -                else + +                } else if (b->sockaddr.sa.sa_family != AF_UNSPEC) { +                        if (!kdbus_available) +                                r = bus_socket_connect(b); +                        else +                                skipped = true; + +                } else                          skipped = true;                  if (!skipped) { diff --git a/src/libsystemd/sd-login/sd-login.c b/src/libsystemd/sd-login/sd-login.c index 0eadc8c747..7d6a4b78cf 100644 --- a/src/libsystemd/sd-login/sd-login.c +++ b/src/libsystemd/sd-login/sd-login.c @@ -237,11 +237,13 @@ _public_ int sd_uid_get_display(uid_t uid, char **session) {                  return r;          r = parse_env_file(p, NEWLINE, "DISPLAY", &s, NULL); +        if (r == -ENOENT) +                return -ENXIO;          if (r < 0)                  return r;          if (isempty(s)) -                return -ENOENT; +                return -ENXIO;          *session = s;          s = NULL; @@ -465,7 +467,7 @@ static int session_get_string(const char *session, const char *field, char **val                  return r;          if (isempty(s)) -                return -ENOENT; +                return -ENXIO;          *value = s;          s = NULL; diff --git a/src/libsystemd/sd-login/test-login.c b/src/libsystemd/sd-login/test-login.c index 05affa442d..ddea7ffa14 100644 --- a/src/libsystemd/sd-login/test-login.c +++ b/src/libsystemd/sd-login/test-login.c @@ -33,7 +33,7 @@ static void test_login(void) {          _cleanup_free_ char *pp = NULL, *qq = NULL;          int r, k;          uid_t u, u2; -        char *seat, *type, *class, *display, *remote_user, *remote_host; +        char *seat, *type, *class, *display, *remote_user, *remote_host, *display_session;          char *session;          char *state;          char *session2; @@ -50,6 +50,12 @@ static void test_login(void) {          assert_se(sd_pid_get_owner_uid(0, &u2) == 0);          printf("user = "UID_FMT"\n", u2); +        display_session = NULL; +        r = sd_uid_get_display(u2, &display_session); +        assert_se(r >= 0 || r == -ENXIO); +        printf("user's display session = %s\n", strna(display_session)); +        free(display_session); +          assert_se(socketpair(AF_UNIX, SOCK_STREAM, 0, pair) == 0);          sd_peer_get_session(pair[0], &pp);          sd_peer_get_session(pair[1], &qq); @@ -100,16 +106,22 @@ static void test_login(void) {          printf("class = %s\n", class);          free(class); -        assert_se(sd_session_get_display(session, &display) >= 0); -        printf("display = %s\n", display); +        display = NULL; +        r = sd_session_get_display(session, &display); +        assert_se(r >= 0 || r == -ENXIO); +        printf("display = %s\n", strna(display));          free(display); -        assert_se(sd_session_get_remote_user(session, &remote_user) >= 0); -        printf("remote_user = %s\n", remote_user); +        remote_user = NULL; +        r = sd_session_get_remote_user(session, &remote_user); +        assert_se(r >= 0 || r == -ENXIO); +        printf("remote_user = %s\n", strna(remote_user));          free(remote_user); -        assert_se(sd_session_get_remote_host(session, &remote_host) >= 0); -        printf("remote_host = %s\n", remote_host); +        remote_host = NULL; +        r = sd_session_get_remote_host(session, &remote_host); +        assert_se(r >= 0 || r == -ENXIO); +        printf("remote_host = %s\n", strna(remote_host));          free(remote_host);          assert_se(sd_session_get_seat(session, &seat) >= 0); diff --git a/src/login/loginctl.c b/src/login/loginctl.c index 5fa98e069f..a7e64071cf 100644 --- a/src/login/loginctl.c +++ b/src/login/loginctl.c @@ -263,7 +263,7 @@ static int show_unit_cgroup(sd_bus *bus, const char *interface, const char *unit          if (isempty(cgroup))                  return 0; -        if (cg_is_empty_recursive(SYSTEMD_CGROUP_CONTROLLER, cgroup, false) != 0 && leader <= 0) +        if (cg_is_empty_recursive(SYSTEMD_CGROUP_CONTROLLER, cgroup) != 0 && leader <= 0)                  return 0;          c = columns(); diff --git a/src/machine/machine-dbus.c b/src/machine/machine-dbus.c index af2b8eff06..a63b9785af 100644 --- a/src/machine/machine-dbus.c +++ b/src/machine/machine-dbus.c @@ -45,6 +45,7 @@  #include "formats-util.h"  #include "process-util.h"  #include "env-util.h" +#include "terminal-util.h"  static int property_get_id(                  sd_bus *bus, @@ -500,7 +501,7 @@ int bus_machine_method_open_pty(sd_bus_message *message, void *userdata, sd_bus_          if (master < 0)                  return master; -        r = ptsname_malloc(master, &pty_name); +        r = ptsname_namespace(master, &pty_name);          if (r < 0)                  return r; @@ -589,7 +590,7 @@ int bus_machine_method_open_login(sd_bus_message *message, void *userdata, sd_bu          if (master < 0)                  return master; -        r = ptsname_malloc(master, &pty_name); +        r = ptsname_namespace(master, &pty_name);          if (r < 0)                  return r; @@ -597,9 +598,6 @@ int bus_machine_method_open_login(sd_bus_message *message, void *userdata, sd_bu          if (!p)                  return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "PTS name %s is invalid", pty_name); -        if (unlockpt(master) < 0) -                return -errno; -          r = container_bus_new(m, &allocated_bus);          if (r < 0)                  return r; @@ -690,7 +688,7 @@ int bus_machine_method_open_shell(sd_bus_message *message, void *userdata, sd_bu          if (master < 0)                  return master; -        r = ptsname_malloc(master, &pty_name); +        r = ptsname_namespace(master, &pty_name);          if (r < 0)                  return r; @@ -701,9 +699,6 @@ int bus_machine_method_open_shell(sd_bus_message *message, void *userdata, sd_bu          utmp_id = path_startswith(pty_name, "/dev/");          assert(utmp_id); -        if (unlockpt(master) < 0) -                return -errno; -          r = container_bus_new(m, &allocated_bus);          if (r < 0)                  return r; diff --git a/src/machine/machinectl.c b/src/machine/machinectl.c index 8bd0ed756b..bb8c5ac64b 100644 --- a/src/machine/machinectl.c +++ b/src/machine/machinectl.c @@ -375,7 +375,7 @@ static int show_unit_cgroup(sd_bus *bus, const char *unit, pid_t leader) {          if (r < 0)                  return bus_log_parse_error(r); -        if (cg_is_empty_recursive(SYSTEMD_CGROUP_CONTROLLER, cgroup, false) != 0 && leader <= 0) +        if (cg_is_empty_recursive(SYSTEMD_CGROUP_CONTROLLER, cgroup) != 0 && leader <= 0)                  return 0;          c = columns(); diff --git a/src/network/networkctl.c b/src/network/networkctl.c index 2281d4b718..786579def0 100644 --- a/src/network/networkctl.c +++ b/src/network/networkctl.c @@ -497,7 +497,7 @@ static int link_status_one(                  sd_hwdb *hwdb,                  const char *name) {          _cleanup_strv_free_ char **dns = NULL, **ntp = NULL, **domains = NULL; -        _cleanup_free_ char *setup_state = NULL, *operational_state = NULL, *timezone = NULL; +        _cleanup_free_ char *setup_state = NULL, *operational_state = NULL, *tz = NULL;          _cleanup_netlink_message_unref_ sd_netlink_message *req = NULL, *reply = NULL;          _cleanup_device_unref_ sd_device *d = NULL;          char devid[2 + DECIMAL_STR_MAX(int)]; @@ -662,9 +662,9 @@ static int link_status_one(          if (!strv_isempty(carrier_bound_by))                  dump_list("Carrier Bound By: ", carrier_bound_by); -        (void) sd_network_link_get_timezone(ifindex, &timezone); -        if (timezone) -                printf("       Time Zone: %s", timezone); +        (void) sd_network_link_get_timezone(ifindex, &tz); +        if (tz) +                printf("       Time Zone: %s", tz);          return 0;  } diff --git a/src/network/networkd-link.c b/src/network/networkd-link.c index 979f3115f6..1dc9db0fca 100644 --- a/src/network/networkd-link.c +++ b/src/network/networkd-link.c @@ -967,14 +967,14 @@ static int set_timezone_handler(sd_bus_message *m, void *userdata, sd_bus_error          return 1;  } -int link_set_timezone(Link *link, const char *timezone) { +int link_set_timezone(Link *link, const char *tz) {          int r;          assert(link);          assert(link->manager); -        assert(timezone); +        assert(tz); -        log_link_debug(link, "Setting system timezone: '%s'", timezone); +        log_link_debug(link, "Setting system timezone: '%s'", tz);          if (!link->manager->bus) {                  log_link_info(link, "Not connected to system bus, ignoring timezone."); @@ -991,7 +991,7 @@ int link_set_timezone(Link *link, const char *timezone) {                          set_timezone_handler,                          link,                          "sb", -                        timezone, +                        tz,                          false);          if (r < 0)                  return log_link_error_errno(link, r, "Could not set timezone: %m"); diff --git a/src/network/networkd-network.c b/src/network/networkd-network.c index 2a77242013..ee14401982 100644 --- a/src/network/networkd-network.c +++ b/src/network/networkd-network.c @@ -786,7 +786,7 @@ int config_parse_timezone(                  void *data,                  void *userdata) { -        char **timezone = data, *tz = NULL; +        char **datap = data, *tz = NULL;          int r;          assert(filename); @@ -803,8 +803,8 @@ int config_parse_timezone(                  return 0;          } -        free(*timezone); -        *timezone = tz; +        free(*datap); +        *datap = tz;          return 0;  } diff --git a/src/nspawn/nspawn.c b/src/nspawn/nspawn.c index 8039847a72..a56960506c 100644 --- a/src/nspawn/nspawn.c +++ b/src/nspawn/nspawn.c @@ -204,6 +204,7 @@ static char **arg_property = NULL;  static uid_t arg_uid_shift = UID_INVALID, arg_uid_range = 0x10000U;  static bool arg_userns = false;  static int arg_kill_signal = 0; +static bool arg_unified_cgroup_hierarchy = false;  static void help(void) {          printf("%s [OPTIONS...] [PATH] [ARGUMENTS...]\n\n" @@ -385,6 +386,30 @@ static int set_sanitized_path(char **b, const char *path) {          return 0;  } +static int detect_unified_cgroup_hierarchy(void) { +        const char *e; +        int r; + +        /* Allow the user to control whether the unified hierarchy is used */ +        e = getenv("UNIFIED_CGROUP_HIERARCHY"); +        if (e) { +                r = parse_boolean(e); +                if (r < 0) +                        return log_error_errno(r, "Failed to parse $UNIFIED_CGROUP_HIERARCHY."); + +                arg_unified_cgroup_hierarchy = r; +                return 0; +        } + +        /* Otherwise inherit the default from the host system */ +        r = cg_unified(); +        if (r < 0) +                return log_error_errno(r, "Failed to determine whether the unified cgroups hierarchy is used: %m"); + +        arg_unified_cgroup_hierarchy = r; +        return 0; +} +  static int parse_argv(int argc, char *argv[]) {          enum { @@ -1037,6 +1062,10 @@ static int parse_argv(int argc, char *argv[]) {          if (arg_boot && arg_kill_signal <= 0)                  arg_kill_signal = SIGRTMIN+3; +        r = detect_unified_cgroup_hierarchy(); +        if (r < 0) +                return r; +          return 1;  } @@ -1095,7 +1124,6 @@ static int mount_all(const char *dest, bool userns) {                  { "/proc/sys", "/proc/sys",      NULL,     NULL,        MS_BIND,                                                   true,  true  },   /* Bind mount first */                  { NULL,        "/proc/sys",      NULL,     NULL,        MS_BIND|MS_RDONLY|MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_REMOUNT, true,  true  },   /* Then, make it r/o */                  { "sysfs",     "/sys",           "sysfs",  NULL,        MS_RDONLY|MS_NOSUID|MS_NOEXEC|MS_NODEV,                    true,  false }, -                { "tmpfs",     "/sys/fs/cgroup", "tmpfs",  "mode=755",  MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_STRICTATIME,               true,  false },                  { "tmpfs",     "/dev",           "tmpfs",  "mode=755",  MS_NOSUID|MS_STRICTATIME,                                  true,  false },                  { "tmpfs",     "/dev/shm",       "tmpfs",  "mode=1777", MS_NOSUID|MS_NODEV|MS_STRICTATIME,                         true,  false },                  { "tmpfs",     "/run",           "tmpfs",  "mode=755",  MS_NOSUID|MS_NODEV|MS_STRICTATIME,                         true,  false }, @@ -1381,7 +1409,7 @@ static int mount_custom(const char *dest) {          return 0;  } -static int mount_cgroup_hierarchy(const char *dest, const char *controller, const char *hierarchy, bool read_only) { +static int mount_legacy_cgroup_hierarchy(const char *dest, const char *controller, const char *hierarchy, bool read_only) {          char *to;          int r; @@ -1409,11 +1437,31 @@ static int mount_cgroup_hierarchy(const char *dest, const char *controller, cons          return 1;  } -static int mount_cgroup(const char *dest) { +static int mount_legacy_cgroups(const char *dest) {          _cleanup_set_free_free_ Set *controllers = NULL;          const char *cgroup_root;          int r; +        cgroup_root = prefix_roota(dest, "/sys/fs/cgroup"); + +        /* Mount a tmpfs to /sys/fs/cgroup if it's not mounted there yet. */ +        r = path_is_mount_point(cgroup_root, AT_SYMLINK_FOLLOW); +        if (r < 0) +                return log_error_errno(r, "Failed to determine if /sys/fs/cgroup is already mounted: %m"); +        if (r == 0) { +                _cleanup_free_ char *options = NULL; + +                r = tmpfs_patch_options("mode=755", &options); +                if (r < 0) +                        return log_oom(); + +                if (mount("tmpfs", cgroup_root, "tmpfs", MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_STRICTATIME, options) < 0) +                        return log_error_errno(errno, "Failed to mount /sys/fs/cgroup: %m"); +        } + +        if (cg_unified() > 0) +                goto skip_controllers; +          controllers = set_new(&string_hash_ops);          if (!controllers)                  return log_oom(); @@ -1437,7 +1485,7 @@ static int mount_cgroup(const char *dest) {                  if (r == -EINVAL) {                          /* Not a symbolic link, but directly a single cgroup hierarchy */ -                        r = mount_cgroup_hierarchy(dest, controller, controller, true); +                        r = mount_legacy_cgroup_hierarchy(dest, controller, controller, true);                          if (r < 0)                                  return r; @@ -1457,7 +1505,7 @@ static int mount_cgroup(const char *dest) {                                  continue;                          } -                        r = mount_cgroup_hierarchy(dest, combined, combined, true); +                        r = mount_legacy_cgroup_hierarchy(dest, combined, combined, true);                          if (r < 0)                                  return r; @@ -1471,17 +1519,52 @@ static int mount_cgroup(const char *dest) {                  }          } -        r = mount_cgroup_hierarchy(dest, "name=systemd,xattr", "systemd", false); +skip_controllers: +        r = mount_legacy_cgroup_hierarchy(dest, "none,name=systemd,xattr", "systemd", false);          if (r < 0)                  return r; -        cgroup_root = prefix_roota(dest, "/sys/fs/cgroup");          if (mount(NULL, cgroup_root, NULL, MS_REMOUNT|MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_STRICTATIME|MS_RDONLY, "mode=755") < 0)                  return log_error_errno(errno, "Failed to remount %s read-only: %m", cgroup_root);          return 0;  } +static int mount_unified_cgroups(const char *dest) { +        const char *p; +        int r; + +        assert(dest); + +        p = strjoina(dest, "/sys/fs/cgroup"); + +        r = path_is_mount_point(p, AT_SYMLINK_FOLLOW); +        if (r < 0) +                return log_error_errno(r, "Failed to determine if %s is mounted already: %m", p); +        if (r > 0) { +                p = strjoina(dest, "/sys/fs/cgroup/cgroup.procs"); +                if (access(p, F_OK) >= 0) +                        return 0; +                if (errno != ENOENT) +                        return log_error_errno(errno, "Failed to determine if mount point %s contains the unified cgroup hierarchy: %m", p); + +                log_error("%s is already mounted but not a unified cgroup hierarchy. Refusing.", p); +                return -EINVAL; +        } + +        if (mount("cgroup", p, "cgroup", MS_NOSUID|MS_NOEXEC|MS_NODEV, "__DEVEL__sane_behavior") < 0) +                return log_error_errno(errno, "Failed to mount unified cgroup hierarchy to %s: %m", p); + +        return 0; +} + +static int mount_cgroups(const char *dest) { +        if (arg_unified_cgroup_hierarchy) +                return mount_unified_cgroups(dest); +        else +                return mount_legacy_cgroups(dest); +} +  static int mount_systemd_cgroup_writable(const char *dest) {          _cleanup_free_ char *own_cgroup_path = NULL;          const char *systemd_root, *systemd_own; @@ -1493,13 +1576,23 @@ static int mount_systemd_cgroup_writable(const char *dest) {          if (r < 0)                  return log_error_errno(r, "Failed to determine our own cgroup path: %m"); +        /* If we are living in the top-level, then there's nothing to do... */ +        if (path_equal(own_cgroup_path, "/")) +                return 0; + +        if (arg_unified_cgroup_hierarchy) { +                systemd_own = strjoina(dest, "/sys/fs/cgroup", own_cgroup_path); +                systemd_root = prefix_roota(dest, "/sys/fs/cgroup"); +        } else { +                systemd_own = strjoina(dest, "/sys/fs/cgroup/systemd", own_cgroup_path); +                systemd_root = prefix_roota(dest, "/sys/fs/cgroup/systemd"); +        } +          /* Make our own cgroup a (writable) bind mount */ -        systemd_own = strjoina(dest, "/sys/fs/cgroup/systemd", own_cgroup_path);          if (mount(systemd_own, systemd_own,  NULL, MS_BIND, NULL) < 0)                  return log_error_errno(errno, "Failed to turn %s into a bind mount: %m", own_cgroup_path);          /* And then remount the systemd cgroup root read-only */ -        systemd_root = prefix_roota(dest, "/sys/fs/cgroup/systemd");          if (mount(NULL, systemd_root, NULL, MS_BIND|MS_REMOUNT|MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_RDONLY, NULL) < 0)                  return log_error_errno(errno, "Failed to mount cgroup root read-only: %m"); @@ -4187,6 +4280,8 @@ static int inner_child(          assert(directory);          assert(kmsg_socket >= 0); +        cg_unified_flush(); +          if (arg_userns) {                  /* Tell the parent, that it now can write the UID map. */                  (void) barrier_place(barrier); /* #1 */ @@ -4368,6 +4463,8 @@ static int outer_child(          assert(pid_socket >= 0);          assert(kmsg_socket >= 0); +        cg_unified_flush(); +          if (prctl(PR_SET_PDEATHSIG, SIGKILL) < 0)                  return log_error_errno(errno, "PR_SET_PDEATHSIG failed: %m"); @@ -4484,7 +4581,7 @@ static int outer_child(          if (r < 0)                  return r; -        r = mount_cgroup(directory); +        r = mount_cgroups(directory);          if (r < 0)                  return r; @@ -4499,7 +4596,6 @@ static int outer_child(                          NULL);          if (pid < 0)                  return log_error_errno(errno, "Failed to fork inner child: %m"); -          if (pid == 0) {                  pid_socket = safe_close(pid_socket);                  uid_shift_socket = safe_close(uid_shift_socket); @@ -4567,9 +4663,112 @@ static int chown_cgroup(pid_t pid) {          if (fd < 0)                  return log_error_errno(errno, "Failed to open %s: %m", fs); -        FOREACH_STRING(fn, ".", "tasks", "notify_on_release", "cgroup.procs", "cgroup.clone_children") +        FOREACH_STRING(fn, +                       ".", +                       "tasks", +                       "notify_on_release", +                       "cgroup.procs", +                       "cgroup.clone_children", +                       "cgroup.controllers", +                       "cgroup.subtree_control", +                       "cgroup.populated")                  if (fchownat(fd, fn, arg_uid_shift, arg_uid_shift, 0) < 0) -                        log_warning_errno(errno, "Failed to chown() cgroup file %s, ignoring: %m", fn); +                        log_full_errno(errno == ENOENT ? LOG_DEBUG :  LOG_WARNING, errno, +                                       "Failed to chown() cgroup file %s, ignoring: %m", fn); + +        return 0; +} + +static int sync_cgroup(pid_t pid) { +        _cleanup_free_ char *cgroup = NULL; +        char tree[] = "/tmp/unifiedXXXXXX", pid_string[DECIMAL_STR_MAX(pid) + 1]; +        bool undo_mount = false; +        const char *fn; +        int unified, r; + +        unified = cg_unified(); +        if (unified < 0) +                return log_error_errno(unified, "Failed to determine whether the unified hierachy is used: %m"); + +        if ((unified > 0) == arg_unified_cgroup_hierarchy) +                return 0; + +        /* When the host uses the legacy cgroup setup, but the +         * container shall use the unified hierarchy, let's make sure +         * we copy the path from the name=systemd hierarchy into the +         * unified hierarchy. Similar for the reverse situation. */ + +        r = cg_pid_get_path(SYSTEMD_CGROUP_CONTROLLER, pid, &cgroup); +        if (r < 0) +                return log_error_errno(r, "Failed to get control group of " PID_FMT ": %m", pid); + +        /* In order to access the unified hierarchy we need to mount it */ +        if (!mkdtemp(tree)) +                return log_error_errno(errno, "Failed to generate temporary mount point for unified hierarchy: %m"); + +        if (unified) +                r = mount("cgroup", tree, "cgroup", MS_NOSUID|MS_NOEXEC|MS_NODEV, "none,name=systemd,xattr"); +        else +                r = mount("cgroup", tree, "cgroup", MS_NOSUID|MS_NOEXEC|MS_NODEV, "__DEVEL__sane_behavior"); +        if (r < 0) { +                r = log_error_errno(errno, "Failed to mount unified hierarchy: %m"); +                goto finish; +        } + +        undo_mount = true; + +        fn = strjoina(tree, cgroup, "/cgroup.procs"); +        (void) mkdir_parents(fn, 0755); + +        sprintf(pid_string, PID_FMT, pid); +        r = write_string_file(fn, pid_string, 0); +        if (r < 0) +                log_error_errno(r, "Failed to move process: %m"); + +finish: +        if (undo_mount) +                (void) umount(tree); + +        (void) rmdir(tree); +        return r; +} + +static int create_subcgroup(pid_t pid) { +        _cleanup_free_ char *cgroup = NULL; +        const char *child; +        int unified, r; + +        /* In the unified hierarchy inner nodes may only only contain +         * subgroups, but not processes. Hence, if we running in the +         * unified hierarchy and the container does the same, and we +         * did not create a scope unit for the container move us and +         * the container into two separate subcgroups. */ + +        if (!arg_keep_unit) +                return 0; + +        if (!arg_unified_cgroup_hierarchy) +                return 0; + +        unified = cg_unified(); +        if (unified < 0) +                return log_error_errno(unified, "Failed to determine whether the unified hierachy is used: %m"); +        if (unified == 0) +                return 0; + +        r = cg_pid_get_path(SYSTEMD_CGROUP_CONTROLLER, 0, &cgroup); +        if (r < 0) +                return log_error_errno(r, "Failed to get our control group: %m"); + +        child = strjoina(cgroup, "/payload"); +        r = cg_create_and_attach(SYSTEMD_CGROUP_CONTROLLER, child, pid); +        if (r < 0) +                return log_error_errno(r, "Failed to create %s subcgroup: %m", child); + +        child = strjoina(cgroup, "/supervisor"); +        r = cg_create_and_attach(SYSTEMD_CGROUP_CONTROLLER, child, 0); +        if (r < 0) +                return log_error_errno(r, "Failed to create %s subcgroup: %m", child);          return 0;  } @@ -4976,6 +5175,14 @@ int main(int argc, char *argv[]) {                  if (r < 0)                          goto finish; +                r = sync_cgroup(pid); +                if (r < 0) +                        goto finish; + +                r = create_subcgroup(pid); +                if (r < 0) +                        goto finish; +                  r = chown_cgroup(pid);                  if (r < 0)                          goto finish; diff --git a/src/run/run.c b/src/run/run.c index 3dd97022de..657c6fcaf1 100644 --- a/src/run/run.c +++ b/src/run/run.c @@ -36,7 +36,9 @@  #include "ptyfwd.h"  #include "formats-util.h"  #include "signal-util.h" +#include "spawn-polkit-agent.h" +static bool arg_ask_password = true;  static bool arg_scope = false;  static bool arg_remain_after_exit = false;  static bool arg_no_block = false; @@ -64,6 +66,18 @@ static char *arg_on_calendar = NULL;  static char **arg_timer_property = NULL;  static bool arg_quiet = false; +static void polkit_agent_open_if_enabled(void) { + +        /* Open the polkit agent as a child process if necessary */ +        if (!arg_ask_password) +                return; + +        if (arg_transport != BUS_TRANSPORT_LOCAL) +                return; + +        polkit_agent_open(); +} +  static void help(void) {          printf("%s [OPTIONS...] {COMMAND} [ARGS...]\n\n"                 "Run the specified command in a transient scope or service or timer\n" @@ -71,6 +85,7 @@ static void help(void) {                 "specified with --unit option then command can be omitted.\n\n"                 "  -h --help                       Show this help\n"                 "     --version                    Show package version\n" +               "     --no-ask-password            Do not prompt for password\n"                 "     --user                       Run as user unit\n"                 "  -H --host=[USER@]HOST           Operate on remote host\n"                 "  -M --machine=CONTAINER          Operate on local container\n" @@ -108,6 +123,7 @@ static int parse_argv(int argc, char *argv[]) {          enum {                  ARG_VERSION = 0x100, +                ARG_NO_ASK_PASSWORD,                  ARG_USER,                  ARG_SYSTEM,                  ARG_SCOPE, @@ -160,6 +176,7 @@ static int parse_argv(int argc, char *argv[]) {                  { "on-calendar",       required_argument, NULL, ARG_ON_CALENDAR      },                  { "timer-property",    required_argument, NULL, ARG_TIMER_PROPERTY   },                  { "no-block",          no_argument,       NULL, ARG_NO_BLOCK         }, +                { "no-ask-password",   no_argument,       NULL, ARG_NO_ASK_PASSWORD },                  {},          }; @@ -177,6 +194,10 @@ static int parse_argv(int argc, char *argv[]) {                          help();                          return 0; +                case ARG_NO_ASK_PASSWORD: +                        arg_ask_password = false; +                        break; +                  case ARG_VERSION:                          puts(PACKAGE_STRING);                          puts(SYSTEMD_FEATURES); @@ -681,6 +702,9 @@ static int start_transient_service(                          if (r < 0)                                  return log_error_errno(r, "Failed to determine tty name: %m"); +                        if (unlockpt(master) < 0) +                                return log_error_errno(errno, "Failed to unlock tty: %m"); +                  } else if (arg_transport == BUS_TRANSPORT_MACHINE) {                          _cleanup_bus_unref_ sd_bus *system_bus = NULL;                          const char *s; @@ -717,9 +741,6 @@ static int start_transient_service(                                  return log_oom();                  } else                          assert_not_reached("Can't allocate tty via ssh"); - -                if (unlockpt(master) < 0) -                        return log_error_errno(errno, "Failed to unlock tty: %m");          }          if (!arg_no_block) { @@ -745,6 +766,10 @@ static int start_transient_service(          if (r < 0)                  return bus_log_create_error(r); +        r = sd_bus_message_set_allow_interactive_authorization(m, arg_ask_password); +        if (r < 0) +                return bus_log_create_error(r); +          /* Name and mode */          r = sd_bus_message_append(m, "ss", service, "fail");          if (r < 0) @@ -768,6 +793,8 @@ static int start_transient_service(          if (r < 0)                  return bus_log_create_error(r); +        polkit_agent_open_if_enabled(); +          r = sd_bus_call(bus, m, 0, &error, &reply);          if (r < 0) {                  log_error("Failed to start transient service unit: %s", bus_error_message(&error, -r)); @@ -860,6 +887,10 @@ static int start_transient_scope(          if (r < 0)                  return bus_log_create_error(r); +        r = sd_bus_message_set_allow_interactive_authorization(m, arg_ask_password); +        if (r < 0) +                return bus_log_create_error(r); +          /* Name and Mode */          r = sd_bus_message_append(m, "ss", scope, "fail");          if (r < 0) @@ -883,6 +914,8 @@ static int start_transient_scope(          if (r < 0)                  return bus_log_create_error(r); +        polkit_agent_open_if_enabled(); +          r = sd_bus_call(bus, m, 0, &error, &reply);          if (r < 0) {                  log_error("Failed to start transient scope unit: %s", bus_error_message(&error, -r)); @@ -1025,6 +1058,10 @@ static int start_transient_timer(          if (r < 0)                  return bus_log_create_error(r); +        r = sd_bus_message_set_allow_interactive_authorization(m, arg_ask_password); +        if (r < 0) +                return bus_log_create_error(r); +          /* Name and Mode */          r = sd_bus_message_append(m, "ss", timer, "fail");          if (r < 0) @@ -1077,6 +1114,8 @@ static int start_transient_timer(          if (r < 0)                  return bus_log_create_error(r); +        polkit_agent_open_if_enabled(); +          r = sd_bus_call(bus, m, 0, &error, &reply);          if (r < 0) {                  log_error("Failed to start transient timer unit: %s", bus_error_message(&error, -r)); diff --git a/src/shared/cgroup-show.c b/src/shared/cgroup-show.c index 3abccdb49a..31b4f6c684 100644 --- a/src/shared/cgroup-show.c +++ b/src/shared/cgroup-show.c @@ -152,7 +152,7 @@ int show_cgroup_by_path(const char *path, const char *prefix, unsigned n_columns                  if (!k)                          return -ENOMEM; -                if (!(flags & OUTPUT_SHOW_ALL) && cg_is_empty_recursive(NULL, k, false) > 0) +                if (!(flags & OUTPUT_SHOW_ALL) && cg_is_empty_recursive(NULL, k) > 0)                          continue;                  if (!shown_pids) { diff --git a/src/systemctl/systemctl.c b/src/systemctl/systemctl.c index 3cb5f61868..8d80aae182 100644 --- a/src/systemctl/systemctl.c +++ b/src/systemctl/systemctl.c @@ -3557,7 +3557,7 @@ static void print_status_info(          if (i->control_group &&              (i->main_pid > 0 || i->control_pid > 0 || -             ((arg_transport != BUS_TRANSPORT_LOCAL && arg_transport != BUS_TRANSPORT_MACHINE) || cg_is_empty_recursive(SYSTEMD_CGROUP_CONTROLLER, i->control_group, false) == 0))) { +             ((arg_transport != BUS_TRANSPORT_LOCAL && arg_transport != BUS_TRANSPORT_MACHINE) || cg_is_empty_recursive(SYSTEMD_CGROUP_CONTROLLER, i->control_group) == 0))) {                  unsigned c;                  printf("   CGroup: %s\n", i->control_group); diff --git a/src/test/test-cgroup-mask.c b/src/test/test-cgroup-mask.c index 72f874d8a9..de6c421b82 100644 --- a/src/test/test-cgroup-mask.c +++ b/src/test/test-cgroup-mask.c @@ -61,36 +61,36 @@ static int test_cgroup_mask(void) {          root = UNIT_DEREF(parent->slice);          /* Verify per-unit cgroups settings. */ -        assert_se(unit_get_cgroup_mask(son) == (CGROUP_CPU | CGROUP_CPUACCT)); -        assert_se(unit_get_cgroup_mask(daughter) == 0); -        assert_se(unit_get_cgroup_mask(grandchild) == 0); -        assert_se(unit_get_cgroup_mask(parent_deep) == CGROUP_MEMORY); -        assert_se(unit_get_cgroup_mask(parent) == CGROUP_BLKIO); -        assert_se(unit_get_cgroup_mask(root) == 0); +        assert_se(unit_get_own_mask(son) == (CGROUP_MASK_CPU | CGROUP_MASK_CPUACCT)); +        assert_se(unit_get_own_mask(daughter) == 0); +        assert_se(unit_get_own_mask(grandchild) == 0); +        assert_se(unit_get_own_mask(parent_deep) == CGROUP_MASK_MEMORY); +        assert_se(unit_get_own_mask(parent) == CGROUP_MASK_BLKIO); +        assert_se(unit_get_own_mask(root) == 0);          /* Verify aggregation of member masks */          assert_se(unit_get_members_mask(son) == 0);          assert_se(unit_get_members_mask(daughter) == 0);          assert_se(unit_get_members_mask(grandchild) == 0);          assert_se(unit_get_members_mask(parent_deep) == 0); -        assert_se(unit_get_members_mask(parent) == (CGROUP_CPU | CGROUP_CPUACCT | CGROUP_MEMORY)); -        assert_se(unit_get_members_mask(root) == (CGROUP_CPU | CGROUP_CPUACCT | CGROUP_BLKIO | CGROUP_MEMORY)); +        assert_se(unit_get_members_mask(parent) == (CGROUP_MASK_CPU | CGROUP_MASK_CPUACCT | CGROUP_MASK_MEMORY)); +        assert_se(unit_get_members_mask(root) == (CGROUP_MASK_CPU | CGROUP_MASK_CPUACCT | CGROUP_MASK_BLKIO | CGROUP_MASK_MEMORY));          /* Verify aggregation of sibling masks. */ -        assert_se(unit_get_siblings_mask(son) == (CGROUP_CPU | CGROUP_CPUACCT | CGROUP_MEMORY)); -        assert_se(unit_get_siblings_mask(daughter) == (CGROUP_CPU | CGROUP_CPUACCT | CGROUP_MEMORY)); +        assert_se(unit_get_siblings_mask(son) == (CGROUP_MASK_CPU | CGROUP_MASK_CPUACCT | CGROUP_MASK_MEMORY)); +        assert_se(unit_get_siblings_mask(daughter) == (CGROUP_MASK_CPU | CGROUP_MASK_CPUACCT | CGROUP_MASK_MEMORY));          assert_se(unit_get_siblings_mask(grandchild) == 0); -        assert_se(unit_get_siblings_mask(parent_deep) == (CGROUP_CPU | CGROUP_CPUACCT | CGROUP_MEMORY)); -        assert_se(unit_get_siblings_mask(parent) == (CGROUP_CPU | CGROUP_CPUACCT | CGROUP_BLKIO | CGROUP_MEMORY)); -        assert_se(unit_get_siblings_mask(root) == (CGROUP_CPU | CGROUP_CPUACCT | CGROUP_BLKIO | CGROUP_MEMORY)); +        assert_se(unit_get_siblings_mask(parent_deep) == (CGROUP_MASK_CPU | CGROUP_MASK_CPUACCT | CGROUP_MASK_MEMORY)); +        assert_se(unit_get_siblings_mask(parent) == (CGROUP_MASK_CPU | CGROUP_MASK_CPUACCT | CGROUP_MASK_BLKIO | CGROUP_MASK_MEMORY)); +        assert_se(unit_get_siblings_mask(root) == (CGROUP_MASK_CPU | CGROUP_MASK_CPUACCT | CGROUP_MASK_BLKIO | CGROUP_MASK_MEMORY));          /* Verify aggregation of target masks. */ -        assert_se(unit_get_target_mask(son) == ((CGROUP_CPU | CGROUP_CPUACCT | CGROUP_MEMORY) & m->cgroup_supported)); -        assert_se(unit_get_target_mask(daughter) == ((CGROUP_CPU | CGROUP_CPUACCT | CGROUP_MEMORY) & m->cgroup_supported)); +        assert_se(unit_get_target_mask(son) == ((CGROUP_MASK_CPU | CGROUP_MASK_CPUACCT | CGROUP_MASK_MEMORY) & m->cgroup_supported)); +        assert_se(unit_get_target_mask(daughter) == ((CGROUP_MASK_CPU | CGROUP_MASK_CPUACCT | CGROUP_MASK_MEMORY) & m->cgroup_supported));          assert_se(unit_get_target_mask(grandchild) == 0); -        assert_se(unit_get_target_mask(parent_deep) == ((CGROUP_CPU | CGROUP_CPUACCT | CGROUP_MEMORY) & m->cgroup_supported)); -        assert_se(unit_get_target_mask(parent) == ((CGROUP_CPU | CGROUP_CPUACCT | CGROUP_BLKIO | CGROUP_MEMORY) & m->cgroup_supported)); -        assert_se(unit_get_target_mask(root) == ((CGROUP_CPU | CGROUP_CPUACCT | CGROUP_BLKIO | CGROUP_MEMORY) & m->cgroup_supported)); +        assert_se(unit_get_target_mask(parent_deep) == ((CGROUP_MASK_CPU | CGROUP_MASK_CPUACCT | CGROUP_MASK_MEMORY) & m->cgroup_supported)); +        assert_se(unit_get_target_mask(parent) == ((CGROUP_MASK_CPU | CGROUP_MASK_CPUACCT | CGROUP_MASK_BLKIO | CGROUP_MASK_MEMORY) & m->cgroup_supported)); +        assert_se(unit_get_target_mask(root) == ((CGROUP_MASK_CPU | CGROUP_MASK_CPUACCT | CGROUP_MASK_BLKIO | CGROUP_MASK_MEMORY) & m->cgroup_supported));          manager_free(m); diff --git a/src/test/test-cgroup.c b/src/test/test-cgroup.c index 4be69a408d..37b1c3554a 100644 --- a/src/test/test-cgroup.c +++ b/src/test/test-cgroup.c @@ -56,26 +56,26 @@ int main(int argc, char*argv[]) {          assert_se(path_equal(path, "/sys/fs/cgroup/systemd/test-b/test-d"));          free(path); -        assert_se(cg_is_empty(SYSTEMD_CGROUP_CONTROLLER, "/test-a", false) > 0); -        assert_se(cg_is_empty(SYSTEMD_CGROUP_CONTROLLER, "/test-b", false) > 0); -        assert_se(cg_is_empty_recursive(SYSTEMD_CGROUP_CONTROLLER, "/test-a", false) > 0); -        assert_se(cg_is_empty_recursive(SYSTEMD_CGROUP_CONTROLLER, "/test-b", false) == 0); +        assert_se(cg_is_empty(SYSTEMD_CGROUP_CONTROLLER, "/test-a") > 0); +        assert_se(cg_is_empty(SYSTEMD_CGROUP_CONTROLLER, "/test-b") > 0); +        assert_se(cg_is_empty_recursive(SYSTEMD_CGROUP_CONTROLLER, "/test-a") > 0); +        assert_se(cg_is_empty_recursive(SYSTEMD_CGROUP_CONTROLLER, "/test-b") == 0);          assert_se(cg_kill_recursive(SYSTEMD_CGROUP_CONTROLLER, "/test-a", 0, false, false, false, NULL) == 0);          assert_se(cg_kill_recursive(SYSTEMD_CGROUP_CONTROLLER, "/test-b", 0, false, false, false, NULL) > 0);          assert_se(cg_migrate_recursive(SYSTEMD_CGROUP_CONTROLLER, "/test-b", SYSTEMD_CGROUP_CONTROLLER, "/test-a", false, false) > 0); -        assert_se(cg_is_empty_recursive(SYSTEMD_CGROUP_CONTROLLER, "/test-a", false) == 0); -        assert_se(cg_is_empty_recursive(SYSTEMD_CGROUP_CONTROLLER, "/test-b", false) > 0); +        assert_se(cg_is_empty_recursive(SYSTEMD_CGROUP_CONTROLLER, "/test-a") == 0); +        assert_se(cg_is_empty_recursive(SYSTEMD_CGROUP_CONTROLLER, "/test-b") > 0);          assert_se(cg_kill_recursive(SYSTEMD_CGROUP_CONTROLLER, "/test-a", 0, false, false, false, NULL) > 0);          assert_se(cg_kill_recursive(SYSTEMD_CGROUP_CONTROLLER, "/test-b", 0, false, false, false, NULL) == 0);          cg_trim(SYSTEMD_CGROUP_CONTROLLER, "/", false); -        assert_se(cg_delete(SYSTEMD_CGROUP_CONTROLLER, "/test-b") < 0); -        assert_se(cg_delete(SYSTEMD_CGROUP_CONTROLLER, "/test-a") >= 0); +        assert_se(cg_rmdir(SYSTEMD_CGROUP_CONTROLLER, "/test-b") < 0); +        assert_se(cg_rmdir(SYSTEMD_CGROUP_CONTROLLER, "/test-a") >= 0);          assert_se(cg_split_spec("foobar:/", &c, &p) == 0);          assert_se(streq(c, "foobar")); | 
