From 5659958529d16f082a24d0c5b68699570b3eace3 Mon Sep 17 00:00:00 2001 From: Lennart Poettering Date: Fri, 29 Apr 2016 19:14:52 +0200 Subject: machined: run clone operation asynchronously in the background Cloning an image can be slow, if the image is not on a btrfs subvolume, hence let's make sure we do this asynchronously in a child process, so that machined isn't blocked as long as we process the client request. This adds a new, generic "Operation" object to machined, that is used to track these kind of background processes. This is inspired by the MachineOperation object that already exists to make copy operations asynchronous. A later patch will rework the MachineOperation logic to use the generic Operation instead. --- Makefile.am | 4 +- src/machine/image-dbus.c | 40 ++++++++++++++-- src/machine/machined.c | 8 ++++ src/machine/machined.h | 4 ++ src/machine/operation.c | 118 +++++++++++++++++++++++++++++++++++++++++++++++ src/machine/operation.h | 45 ++++++++++++++++++ 6 files changed, 213 insertions(+), 6 deletions(-) create mode 100644 src/machine/operation.c create mode 100644 src/machine/operation.h diff --git a/Makefile.am b/Makefile.am index b323de55c6..8ff9eeb5a5 100644 --- a/Makefile.am +++ b/Makefile.am @@ -4942,7 +4942,9 @@ libmachine_core_la_SOURCES = \ src/machine/machine-dbus.c \ src/machine/machine-dbus.h \ src/machine/image-dbus.c \ - src/machine/image-dbus.h + src/machine/image-dbus.h \ + src/machine/operation.c \ + src/machine/operation.h libmachine_core_la_LIBADD = \ libshared.la diff --git a/src/machine/image-dbus.c b/src/machine/image-dbus.c index b764bc43a0..ca38f61dd3 100644 --- a/src/machine/image-dbus.c +++ b/src/machine/image-dbus.c @@ -20,9 +20,11 @@ #include "alloc-util.h" #include "bus-label.h" #include "bus-util.h" +#include "fd-util.h" #include "image-dbus.h" #include "io-util.h" #include "machine-image.h" +#include "process-util.h" #include "strv.h" #include "user-util.h" @@ -107,13 +109,19 @@ int bus_image_method_clone( void *userdata, sd_bus_error *error) { + _cleanup_close_pair_ int errno_pipe_fd[2] = { -1, -1 }; Image *image = userdata; Manager *m = image->userdata; const char *new_name; int r, read_only; + pid_t child; assert(message); assert(image); + assert(m); + + if (m->n_operations >= OPERATIONS_MAX) + return sd_bus_error_setf(error, SD_BUS_ERROR_LIMITS_EXCEEDED, "Too many ongoing operations."); r = sd_bus_message_read(message, "sb", &new_name, &read_only); if (r < 0) @@ -136,13 +144,35 @@ int bus_image_method_clone( if (r == 0) return 1; /* Will call us back */ - r = image_clone(image, new_name, read_only); - if (r == -EOPNOTSUPP) - return sd_bus_reply_method_errnof(message, r, "Image cloning is currently only supported on btrfs file systems."); - if (r < 0) + if (pipe2(errno_pipe_fd, O_CLOEXEC|O_NONBLOCK) < 0) + return sd_bus_error_set_errnof(error, errno, "Failed to create pipe: %m"); + + child = fork(); + if (child < 0) + return sd_bus_error_set_errnof(error, errno, "Failed to fork(): %m"); + if (child == 0) { + errno_pipe_fd[0] = safe_close(errno_pipe_fd[0]); + + r = image_clone(image, new_name, read_only); + if (r < 0) { + (void) write(errno_pipe_fd[1], &r, sizeof(r)); + _exit(EXIT_FAILURE); + } + + _exit(EXIT_SUCCESS); + } + + errno_pipe_fd[1] = safe_close(errno_pipe_fd[1]); + + r = operation_new(m, child, message, errno_pipe_fd[0]); + if (r < 0) { + (void) sigkill_wait(&child); return r; + } - return sd_bus_reply_method_return(message, NULL); + errno_pipe_fd[0] = -1; + + return 1; } int bus_image_method_mark_read_only( diff --git a/src/machine/machined.c b/src/machine/machined.c index f2c1966a6b..f7ceb5e603 100644 --- a/src/machine/machined.c +++ b/src/machine/machined.c @@ -70,6 +70,11 @@ void manager_free(Manager *m) { assert(m); + while (m->operations) + operation_free(m->operations); + + assert(m->n_operations == 0); + while ((machine = hashmap_first(m->machines))) machine_free(machine); @@ -336,6 +341,9 @@ int manager_startup(Manager *m) { static bool check_idle(void *userdata) { Manager *m = userdata; + if (m->operations) + return false; + manager_gc(m, true); return hashmap_isempty(m->machines); diff --git a/src/machine/machined.h b/src/machine/machined.h index e7d7dfdceb..7b9b148044 100644 --- a/src/machine/machined.h +++ b/src/machine/machined.h @@ -32,6 +32,7 @@ typedef struct Manager Manager; #include "image-dbus.h" #include "machine-dbus.h" #include "machine.h" +#include "operation.h" struct Manager { sd_event *event; @@ -49,6 +50,9 @@ struct Manager { LIST_HEAD(Machine, machine_gc_queue); Machine *host_machine; + + LIST_HEAD(Operation, operations); + unsigned n_operations; }; Manager *manager_new(void); diff --git a/src/machine/operation.c b/src/machine/operation.c new file mode 100644 index 0000000000..53e996b48f --- /dev/null +++ b/src/machine/operation.c @@ -0,0 +1,118 @@ +/*** + This file is part of systemd. + + Copyright 2016 Lennart Poettering + + systemd is free software; you can redistribute it and/or modify it + under the terms of the GNU Lesser General Public License as published by + the Free Software Foundation; either version 2.1 of the License, or + (at your option) any later version. + + systemd is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public License + along with systemd; If not, see . +***/ + +#include "alloc-util.h" +#include "fd-util.h" +#include "operation.h" +#include "process-util.h" + +static int operation_done(sd_event_source *s, const siginfo_t *si, void *userdata) { + _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL; + Operation *o = userdata; + int r; + + assert(o); + assert(si); + + log_debug("Operating " PID_FMT " is now complete with with code=%s status=%i", + o->pid, + sigchld_code_to_string(si->si_code), si->si_status); + + o->pid = 0; + + if (si->si_code != CLD_EXITED) { + r = sd_bus_error_setf(&error, SD_BUS_ERROR_FAILED, "Child died abnormally."); + goto fail; + } + + if (si->si_status != EXIT_SUCCESS) { + if (read(o->errno_fd, &r, sizeof(r)) == sizeof(r)) + r = sd_bus_error_set_errnof(&error, r, "%m"); + else + r = sd_bus_error_setf(&error, SD_BUS_ERROR_FAILED, "Child failed."); + + goto fail; + } + + r = sd_bus_reply_method_return(o->message, NULL); + if (r < 0) + log_error_errno(r, "Failed to reply to message: %m"); + + operation_free(o); + return 0; + +fail: + r = sd_bus_reply_method_error(o->message, &error); + if (r < 0) + log_error_errno(r, "Failed to reply to message: %m"); + + operation_free(o); + return 0; +} + +int operation_new(Manager *m, pid_t child, sd_bus_message *message, int errno_fd) { + Operation *o; + int r; + + o = new0(Operation, 1); + if (!o) + return -ENOMEM; + + r = sd_event_add_child(m->event, &o->event_source, child, WEXITED, operation_done, o); + if (r < 0) { + free(o); + return r; + } + + o->pid = child; + o->message = sd_bus_message_ref(message); + o->errno_fd = errno_fd; + + LIST_PREPEND(operations, m->operations, o); + m->n_operations++; + o->manager = m; + + log_debug("Started new operation " PID_FMT ".", child); + + /* At this point we took ownership of both the child and the errno file descriptor! */ + + return 0; +} + +Operation *operation_free(Operation *o) { + if (!o) + return NULL; + + sd_event_source_unref(o->event_source); + + safe_close(o->errno_fd); + + if (o->pid > 1) + (void) sigkill_wait(&o->pid); + + sd_bus_message_unref(o->message); + + if (o->manager) { + LIST_REMOVE(operations, o->manager->operations, o); + o->manager->n_operations--; + } + + free(o); + return NULL; +} diff --git a/src/machine/operation.h b/src/machine/operation.h new file mode 100644 index 0000000000..9d4c3afe45 --- /dev/null +++ b/src/machine/operation.h @@ -0,0 +1,45 @@ +#pragma once + +/*** + This file is part of systemd. + + Copyright 2016 Lennart Poettering + + systemd is free software; you can redistribute it and/or modify it + under the terms of the GNU Lesser General Public License as published by + the Free Software Foundation; either version 2.1 of the License, or + (at your option) any later version. + + systemd is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public License + along with systemd; If not, see . +***/ + +#include + +#include "sd-bus.h" +#include "sd-event.h" + +#include "list.h" + +typedef struct Operation Operation; + +#include "machined.h" + +#define OPERATIONS_MAX 64 + +struct Operation { + Manager *manager; + pid_t pid; + sd_bus_message *message; + int errno_fd; + sd_event_source *event_source; + LIST_FIELDS(Operation, operations); +}; + +int operation_new(Manager *m, pid_t child, sd_bus_message *message, int errno_fd); +Operation *operation_free(Operation *o); -- cgit v1.2.3-54-g00ecf From 89c9030d319e118fa324fa5a1302ba53180b05b6 Mon Sep 17 00:00:00 2001 From: Lennart Poettering Date: Fri, 29 Apr 2016 19:23:23 +0200 Subject: util: rework sigkill_wait() to not require pid_t pointer Let's make sigkill_wait() take a normal pid_t, and add sigkill_waitp() that takes a pointer (which is useful for usage in _cleanup_), following the usual logic we have for this. --- src/basic/process-util.c | 12 +++++++++--- src/basic/process-util.h | 4 ++-- src/import/pull-common.c | 2 +- src/machine/image-dbus.c | 2 +- src/machine/operation.c | 2 +- 5 files changed, 14 insertions(+), 8 deletions(-) diff --git a/src/basic/process-util.c b/src/basic/process-util.c index f2cea01979..4a7367cc92 100644 --- a/src/basic/process-util.c +++ b/src/basic/process-util.c @@ -528,14 +528,20 @@ int wait_for_terminate_and_warn(const char *name, pid_t pid, bool check_exit_cod return -EPROTO; } -void sigkill_wait(pid_t *pid) { +void sigkill_wait(pid_t pid) { + assert(pid > 1); + + if (kill(pid, SIGKILL) > 0) + (void) wait_for_terminate(pid, NULL); +} + +void sigkill_waitp(pid_t *pid) { if (!pid) return; if (*pid <= 1) return; - if (kill(*pid, SIGKILL) > 0) - (void) wait_for_terminate(*pid, NULL); + sigkill_wait(*pid); } int kill_and_sigcont(pid_t pid, int sig) { diff --git a/src/basic/process-util.h b/src/basic/process-util.h index ffd4bcb0ff..9f75088796 100644 --- a/src/basic/process-util.h +++ b/src/basic/process-util.h @@ -58,8 +58,8 @@ int get_process_ppid(pid_t pid, pid_t *ppid); int wait_for_terminate(pid_t pid, siginfo_t *status); int wait_for_terminate_and_warn(const char *name, pid_t pid, bool check_exit_code); -void sigkill_wait(pid_t *pid); -#define _cleanup_sigkill_wait_ _cleanup_(sigkill_wait) +void sigkill_wait(pid_t pid); +void sigkill_waitp(pid_t *pid); int kill_and_sigcont(pid_t pid, int sig); diff --git a/src/import/pull-common.c b/src/import/pull-common.c index d301d4d79e..dc4e4667a9 100644 --- a/src/import/pull-common.c +++ b/src/import/pull-common.c @@ -330,7 +330,7 @@ int pull_verify(PullJob *main_job, _cleanup_close_ int sig_file = -1; const char *p, *line; char sig_file_path[] = "/tmp/sigXXXXXX", gpg_home[] = "/tmp/gpghomeXXXXXX"; - _cleanup_sigkill_wait_ pid_t pid = 0; + _cleanup_(sigkill_waitp) pid_t pid = 0; bool gpg_home_created = false; int r; diff --git a/src/machine/image-dbus.c b/src/machine/image-dbus.c index ca38f61dd3..db0ed03b69 100644 --- a/src/machine/image-dbus.c +++ b/src/machine/image-dbus.c @@ -166,7 +166,7 @@ int bus_image_method_clone( r = operation_new(m, child, message, errno_pipe_fd[0]); if (r < 0) { - (void) sigkill_wait(&child); + (void) sigkill_wait(child); return r; } diff --git a/src/machine/operation.c b/src/machine/operation.c index 53e996b48f..e8564c29f7 100644 --- a/src/machine/operation.c +++ b/src/machine/operation.c @@ -104,7 +104,7 @@ Operation *operation_free(Operation *o) { safe_close(o->errno_fd); if (o->pid > 1) - (void) sigkill_wait(&o->pid); + (void) sigkill_wait(o->pid); sd_bus_message_unref(o->message); -- cgit v1.2.3-54-g00ecf From 8120ee28b8d5d316d9ded9240bcccc9edb41ee06 Mon Sep 17 00:00:00 2001 From: Lennart Poettering Date: Fri, 29 Apr 2016 19:26:54 +0200 Subject: image: enable btrfs quotas on the clone destination, not the source --- src/shared/machine-image.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/shared/machine-image.c b/src/shared/machine-image.c index bebfc40efe..042ccc071c 100644 --- a/src/shared/machine-image.c +++ b/src/shared/machine-image.c @@ -607,9 +607,9 @@ int image_clone(Image *i, const char *new_name, bool read_only) { r = btrfs_subvol_snapshot(i->path, new_path, (read_only ? BTRFS_SNAPSHOT_READ_ONLY : 0) | BTRFS_SNAPSHOT_FALLBACK_COPY | BTRFS_SNAPSHOT_RECURSIVE | BTRFS_SNAPSHOT_QUOTA); - /* Enable "subtree" quotas for the copy, if we didn't - * copy any quota from the source. */ - (void) btrfs_subvol_auto_qgroup(i->path, 0, true); + /* Enable "subtree" quotas for the copy, if we didn't copy any quota from the source. */ + if (r >= 0) + (void) btrfs_subvol_auto_qgroup(new_path, 0, true); break; -- cgit v1.2.3-54-g00ecf From b498c53d80faf7ebd0df5888a48793889ee421e4 Mon Sep 17 00:00:00 2001 From: Lennart Poettering Date: Fri, 29 Apr 2016 19:42:07 +0200 Subject: copy: return the right error when we can't open a file --- src/basic/copy.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/basic/copy.c b/src/basic/copy.c index 3001234a01..c2baef6d22 100644 --- a/src/basic/copy.c +++ b/src/basic/copy.c @@ -305,6 +305,8 @@ static int fd_copy_directory( fdf = openat(df, from, O_RDONLY|O_DIRECTORY|O_CLOEXEC|O_NOCTTY|O_NOFOLLOW); else fdf = fcntl(df, F_DUPFD_CLOEXEC, 3); + if (fdf < 0) + return -errno; d = fdopendir(fdf); if (!d) -- cgit v1.2.3-54-g00ecf From 3b8483c0a3c7e2ba32c2f9b8ba36082bee3e0536 Mon Sep 17 00:00:00 2001 From: Lennart Poettering Date: Fri, 29 Apr 2016 19:44:59 +0200 Subject: copy: adjust directory times after writing to the directory When recursively copying a directory tree, fix up the file times after having created all contents in it, so that our changes don't end up altering any of the directory times. --- src/basic/copy.c | 33 ++++++++++++++++----------------- 1 file changed, 16 insertions(+), 17 deletions(-) diff --git a/src/basic/copy.c b/src/basic/copy.c index c2baef6d22..79b9a0e1a0 100644 --- a/src/basic/copy.c +++ b/src/basic/copy.c @@ -327,22 +327,6 @@ static int fd_copy_directory( r = 0; - if (created) { - struct timespec ut[2] = { - st->st_atim, - st->st_mtim - }; - - if (fchown(fdt, st->st_uid, st->st_gid) < 0) - r = -errno; - - if (fchmod(fdt, st->st_mode & 07777) < 0) - r = -errno; - - (void) futimens(fdt, ut); - (void) copy_xattr(dirfd(d), fdt); - } - FOREACH_DIRENT_ALL(de, d, return -errno) { struct stat buf; int q; @@ -378,6 +362,22 @@ static int fd_copy_directory( r = q; } + if (created) { + struct timespec ut[2] = { + st->st_atim, + st->st_mtim + }; + + if (fchown(fdt, st->st_uid, st->st_gid) < 0) + r = -errno; + + if (fchmod(fdt, st->st_mode & 07777) < 0) + r = -errno; + + (void) copy_xattr(dirfd(d), fdt); + (void) futimens(fdt, ut); + } + return r; } @@ -409,7 +409,6 @@ int copy_tree(const char *from, const char *to, bool merge) { } int copy_directory_fd(int dirfd, const char *to, bool merge) { - struct stat st; assert(dirfd >= 0); -- cgit v1.2.3-54-g00ecf From a67d68b84801dccbbc03010c679138bb9e4f91ac Mon Sep 17 00:00:00 2001 From: Lennart Poettering Date: Fri, 29 Apr 2016 20:05:44 +0200 Subject: tree-wide: fix invocations of chattr_path() chattr_path() takes two bitmasks, and no booleans. Fix the various invocations to do this properly. --- src/journal/journal-file.c | 2 +- src/shared/machine-image.c | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/src/journal/journal-file.c b/src/journal/journal-file.c index c9ce5c73be..ec50333c2c 100644 --- a/src/journal/journal-file.c +++ b/src/journal/journal-file.c @@ -3293,7 +3293,7 @@ int journal_file_open_reliably( /* btrfs doesn't cope well with our write pattern and * fragments heavily. Let's defrag all files we rotate */ - (void) chattr_path(p, false, FS_NOCOW_FL); + (void) chattr_path(p, 0, FS_NOCOW_FL); (void) btrfs_defrag(p); log_warning_errno(r, "File %s corrupted or uncleanly shut down, renaming and replacing.", fname); diff --git a/src/shared/machine-image.c b/src/shared/machine-image.c index 042ccc071c..eb8f6ee438 100644 --- a/src/shared/machine-image.c +++ b/src/shared/machine-image.c @@ -423,7 +423,7 @@ int image_remove(Image *i) { case IMAGE_DIRECTORY: /* Allow deletion of read-only directories */ - (void) chattr_path(i->path, false, FS_IMMUTABLE_FL); + (void) chattr_path(i->path, 0, FS_IMMUTABLE_FL); r = rm_rf(i->path, REMOVE_ROOT|REMOVE_PHYSICAL|REMOVE_SUBVOLUME); if (r < 0) return r; @@ -505,7 +505,7 @@ int image_rename(Image *i, const char *new_name) { (void) read_attr_path(i->path, &file_attr); if (file_attr & FS_IMMUTABLE_FL) - (void) chattr_path(i->path, false, FS_IMMUTABLE_FL); + (void) chattr_path(i->path, 0, FS_IMMUTABLE_FL); /* fall through */ @@ -538,7 +538,7 @@ int image_rename(Image *i, const char *new_name) { /* Restore the immutable bit, if it was set before */ if (file_attr & FS_IMMUTABLE_FL) - (void) chattr_path(new_path, true, FS_IMMUTABLE_FL); + (void) chattr_path(new_path, FS_IMMUTABLE_FL, FS_IMMUTABLE_FL); free(i->path); i->path = new_path; @@ -670,7 +670,7 @@ int image_read_only(Image *i, bool b) { a read-only subvolume, but at least something, and we can read the value back.*/ - r = chattr_path(i->path, b, FS_IMMUTABLE_FL); + r = chattr_path(i->path, b ? FS_IMMUTABLE_FL : 0, FS_IMMUTABLE_FL); if (r < 0) return r; -- cgit v1.2.3-54-g00ecf From 9a50e3caab82f8406ecfac6048ac8e2ce98b0ab8 Mon Sep 17 00:00:00 2001 From: Lennart Poettering Date: Fri, 29 Apr 2016 20:06:20 +0200 Subject: machined: support non-btrfs file systems with "machinectl clone" Fall back to a normal copy operation when the backing file system isn't btrfs, and hence doesn't support cheap snapshotting. Of course, this will be slow, but given that the execution is asynchronous now, this should be OK. Fixes: #1308 --- src/basic/copy.c | 15 +++++++++++++++ src/basic/copy.h | 1 + src/shared/machine-image.c | 14 +++++++++++--- 3 files changed, 27 insertions(+), 3 deletions(-) diff --git a/src/basic/copy.c b/src/basic/copy.c index 79b9a0e1a0..c3586728d0 100644 --- a/src/basic/copy.c +++ b/src/basic/copy.c @@ -423,6 +423,21 @@ int copy_directory_fd(int dirfd, const char *to, bool merge) { return fd_copy_directory(dirfd, NULL, &st, AT_FDCWD, to, st.st_dev, merge); } +int copy_directory(const char *from, const char *to, bool merge) { + struct stat st; + + assert(from); + assert(to); + + if (lstat(from, &st) < 0) + return -errno; + + if (!S_ISDIR(st.st_mode)) + return -ENOTDIR; + + return fd_copy_directory(AT_FDCWD, from, &st, AT_FDCWD, to, st.st_dev, merge); +} + int copy_file_fd(const char *from, int fdt, bool try_reflink) { _cleanup_close_ int fdf = -1; int r; diff --git a/src/basic/copy.h b/src/basic/copy.h index 3e5eb52506..b5d08ebafe 100644 --- a/src/basic/copy.h +++ b/src/basic/copy.h @@ -30,6 +30,7 @@ int copy_file_atomic(const char *from, const char *to, mode_t mode, bool replace int copy_tree(const char *from, const char *to, bool merge); int copy_tree_at(int fdf, const char *from, int fdt, const char *to, bool merge); int copy_directory_fd(int dirfd, const char *to, bool merge); +int copy_directory(const char *from, const char *to, bool merge); int copy_bytes(int fdf, int fdt, uint64_t max_bytes, bool try_reflink); int copy_times(int fdf, int fdt); int copy_xattr(int fdf, int fdt); diff --git a/src/shared/machine-image.c b/src/shared/machine-image.c index eb8f6ee438..66f58ecd92 100644 --- a/src/shared/machine-image.c +++ b/src/shared/machine-image.c @@ -603,12 +603,20 @@ int image_clone(Image *i, const char *new_name, bool read_only) { case IMAGE_SUBVOLUME: case IMAGE_DIRECTORY: + /* If we can we'll always try to create a new btrfs subvolume here, even if the source is a plain + * directory.*/ + new_path = strjoina("/var/lib/machines/", new_name); r = btrfs_subvol_snapshot(i->path, new_path, (read_only ? BTRFS_SNAPSHOT_READ_ONLY : 0) | BTRFS_SNAPSHOT_FALLBACK_COPY | BTRFS_SNAPSHOT_RECURSIVE | BTRFS_SNAPSHOT_QUOTA); - - /* Enable "subtree" quotas for the copy, if we didn't copy any quota from the source. */ - if (r >= 0) + if (r == -EOPNOTSUPP) { + /* No btrfs snapshots supported, create a normal directory then. */ + + r = copy_directory(i->path, new_path, false); + if (r >= 0) + (void) chattr_path(new_path, read_only ? FS_IMMUTABLE_FL : 0, FS_IMMUTABLE_FL); + } else if (r >= 0) + /* Enable "subtree" quotas for the copy, if we didn't copy any quota from the source. */ (void) btrfs_subvol_auto_qgroup(new_path, 0, true); break; -- cgit v1.2.3-54-g00ecf From 5d2036b5f3506bd0ff07042aee8d69c26db32298 Mon Sep 17 00:00:00 2001 From: Lennart Poettering Date: Fri, 29 Apr 2016 20:17:55 +0200 Subject: machined: also make image removal operation asynchronous If we remove a directory image (i.e. not a btrfs snapshot) then things might get quite expensive, hence run this asynchronous in a forked off process, too. --- src/machine/image-dbus.c | 35 ++++++++++++++++++++++++++++++++--- 1 file changed, 32 insertions(+), 3 deletions(-) diff --git a/src/machine/image-dbus.c b/src/machine/image-dbus.c index db0ed03b69..e07edae6ef 100644 --- a/src/machine/image-dbus.c +++ b/src/machine/image-dbus.c @@ -35,13 +35,18 @@ int bus_image_method_remove( void *userdata, sd_bus_error *error) { + _cleanup_close_pair_ int errno_pipe_fd[2] = { -1, -1 }; Image *image = userdata; Manager *m = image->userdata; + pid_t child; int r; assert(message); assert(image); + if (m->n_operations >= OPERATIONS_MAX) + return sd_bus_error_setf(error, SD_BUS_ERROR_LIMITS_EXCEEDED, "Too many ongoing operations."); + r = bus_verify_polkit_async( message, CAP_SYS_ADMIN, @@ -56,11 +61,35 @@ int bus_image_method_remove( if (r == 0) return 1; /* Will call us back */ - r = image_remove(image); - if (r < 0) + if (pipe2(errno_pipe_fd, O_CLOEXEC|O_NONBLOCK) < 0) + return sd_bus_error_set_errnof(error, errno, "Failed to create pipe: %m"); + + child = fork(); + if (child < 0) + return sd_bus_error_set_errnof(error, errno, "Failed to fork(): %m"); + if (child == 0) { + errno_pipe_fd[0] = safe_close(errno_pipe_fd[0]); + + r = image_remove(image); + if (r < 0) { + (void) write(errno_pipe_fd[1], &r, sizeof(r)); + _exit(EXIT_FAILURE); + } + + _exit(EXIT_SUCCESS); + } + + errno_pipe_fd[1] = safe_close(errno_pipe_fd[1]); + + r = operation_new(m, child, message, errno_pipe_fd[0]); + if (r < 0) { + (void) sigkill_wait(child); return r; + } - return sd_bus_reply_method_return(message, NULL); + errno_pipe_fd[0] = -1; + + return 1; } int bus_image_method_rename( -- cgit v1.2.3-54-g00ecf From 795c5d31affeb3b3edbf673940cc0f16afebc7a8 Mon Sep 17 00:00:00 2001 From: Lennart Poettering Date: Fri, 29 Apr 2016 20:32:56 +0200 Subject: machined: rework copy-from/copy-to operation to use generic Operation object With this all potentially slow operations are done out-of-process, asynchronously, using the same "Operation" object. --- src/machine/image-dbus.c | 4 +-- src/machine/machine-dbus.c | 66 +++++----------------------------------------- src/machine/machine.c | 24 +---------------- src/machine/machine.h | 20 +++----------- src/machine/operation.c | 23 ++++++++++++---- src/machine/operation.h | 4 ++- 6 files changed, 33 insertions(+), 108 deletions(-) diff --git a/src/machine/image-dbus.c b/src/machine/image-dbus.c index e07edae6ef..0eed9b81bb 100644 --- a/src/machine/image-dbus.c +++ b/src/machine/image-dbus.c @@ -81,7 +81,7 @@ int bus_image_method_remove( errno_pipe_fd[1] = safe_close(errno_pipe_fd[1]); - r = operation_new(m, child, message, errno_pipe_fd[0]); + r = operation_new(m, NULL, child, message, errno_pipe_fd[0]); if (r < 0) { (void) sigkill_wait(child); return r; @@ -193,7 +193,7 @@ int bus_image_method_clone( errno_pipe_fd[1] = safe_close(errno_pipe_fd[1]); - r = operation_new(m, child, message, errno_pipe_fd[0]); + r = operation_new(m, NULL, child, message, errno_pipe_fd[0]); if (r < 0) { (void) sigkill_wait(child); return r; diff --git a/src/machine/machine-dbus.c b/src/machine/machine-dbus.c index 5121bfdd18..7b9aa66d63 100644 --- a/src/machine/machine-dbus.c +++ b/src/machine/machine-dbus.c @@ -1085,52 +1085,11 @@ finish: return r; } -static int machine_operation_done(sd_event_source *s, const siginfo_t *si, void *userdata) { - _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL; - MachineOperation *o = userdata; - int r; - - assert(o); - assert(si); - - o->pid = 0; - - if (si->si_code != CLD_EXITED) { - r = sd_bus_error_setf(&error, SD_BUS_ERROR_FAILED, "Child died abnormally."); - goto fail; - } - - if (si->si_status != EXIT_SUCCESS) { - if (read(o->errno_fd, &r, sizeof(r)) == sizeof(r)) - r = sd_bus_error_set_errnof(&error, r, "%m"); - else - r = sd_bus_error_setf(&error, SD_BUS_ERROR_FAILED, "Child failed."); - - goto fail; - } - - r = sd_bus_reply_method_return(o->message, NULL); - if (r < 0) - log_error_errno(r, "Failed to reply to message: %m"); - - machine_operation_unref(o); - return 0; - -fail: - r = sd_bus_reply_method_error(o->message, &error); - if (r < 0) - log_error_errno(r, "Failed to reply to message: %m"); - - machine_operation_unref(o); - return 0; -} - int bus_machine_method_copy(sd_bus_message *message, void *userdata, sd_bus_error *error) { const char *src, *dest, *host_path, *container_path, *host_basename, *host_dirname, *container_basename, *container_dirname; _cleanup_close_pair_ int errno_pipe_fd[2] = { -1, -1 }; _cleanup_close_ int hostfd = -1; Machine *m = userdata; - MachineOperation *o; bool copy_from; pid_t child; char *t; @@ -1139,7 +1098,7 @@ int bus_machine_method_copy(sd_bus_message *message, void *userdata, sd_bus_erro assert(message); assert(m); - if (m->n_operations >= MACHINE_OPERATIONS_MAX) + if (m->manager->n_operations >= OPERATIONS_MAX) return sd_bus_error_setf(error, SD_BUS_ERROR_LIMITS_EXCEEDED, "Too many ongoing copies."); if (m->class != MACHINE_CONTAINER) @@ -1249,27 +1208,14 @@ int bus_machine_method_copy(sd_bus_message *message, void *userdata, sd_bus_erro errno_pipe_fd[1] = safe_close(errno_pipe_fd[1]); - /* Copying might take a while, hence install a watch the - * child, and return */ + /* Copying might take a while, hence install a watch on the child, and return */ - o = new0(MachineOperation, 1); - if (!o) - return log_oom(); - - o->pid = child; - o->message = sd_bus_message_ref(message); - o->errno_fd = errno_pipe_fd[0]; - errno_pipe_fd[0] = -1; - - r = sd_event_add_child(m->manager->event, &o->event_source, child, WEXITED, machine_operation_done, o); + r = operation_new(m->manager, m, child, message, errno_pipe_fd[0]); if (r < 0) { - machine_operation_unref(o); - return log_oom(); + (void) sigkill_wait(child); + return r; } - - LIST_PREPEND(operations, m->operations, o); - m->n_operations++; - o->machine = m; + errno_pipe_fd[0] = -1; return 1; } diff --git a/src/machine/machine.c b/src/machine/machine.c index 7d4270a8ff..c1fae57084 100644 --- a/src/machine/machine.c +++ b/src/machine/machine.c @@ -89,7 +89,7 @@ void machine_free(Machine *m) { assert(m); while (m->operations) - machine_operation_unref(m->operations); + operation_free(m->operations); if (m->in_gc_queue) LIST_REMOVE(gc_queue, m->manager->machine_gc_queue, m); @@ -596,28 +596,6 @@ int machine_open_terminal(Machine *m, const char *path, int mode) { } } -MachineOperation *machine_operation_unref(MachineOperation *o) { - if (!o) - return NULL; - - sd_event_source_unref(o->event_source); - - safe_close(o->errno_fd); - - if (o->pid > 1) - (void) kill(o->pid, SIGKILL); - - sd_bus_message_unref(o->message); - - if (o->machine) { - LIST_REMOVE(operations, o->machine->operations, o); - o->machine->n_operations--; - } - - free(o); - return NULL; -} - void machine_release_unit(Machine *m) { assert(m); diff --git a/src/machine/machine.h b/src/machine/machine.h index 1d8cc5911a..e5d75361a9 100644 --- a/src/machine/machine.h +++ b/src/machine/machine.h @@ -20,11 +20,11 @@ ***/ typedef struct Machine Machine; -typedef struct MachineOperation MachineOperation; typedef enum KillWho KillWho; #include "list.h" #include "machined.h" +#include "operation.h" typedef enum MachineState { MACHINE_OPENING, /* Machine is being registered */ @@ -49,17 +49,6 @@ enum KillWho { _KILL_WHO_INVALID = -1 }; -#define MACHINE_OPERATIONS_MAX 64 - -struct MachineOperation { - Machine *machine; - pid_t pid; - sd_bus_message *message; - int errno_fd; - sd_event_source *event_source; - LIST_FIELDS(MachineOperation, operations); -}; - struct Machine { Manager *manager; @@ -88,10 +77,9 @@ struct Machine { int *netif; unsigned n_netif; - LIST_FIELDS(Machine, gc_queue); + LIST_HEAD(Operation, operations); - MachineOperation *operations; - unsigned n_operations; + LIST_FIELDS(Machine, gc_queue); }; Machine* machine_new(Manager *manager, MachineClass class, const char *name); @@ -109,8 +97,6 @@ void machine_release_unit(Machine *m); MachineState machine_get_state(Machine *u); -MachineOperation *machine_operation_unref(MachineOperation *o); - const char* machine_class_to_string(MachineClass t) _const_; MachineClass machine_class_from_string(const char *s) _pure_; diff --git a/src/machine/operation.c b/src/machine/operation.c index e8564c29f7..e6ddc41a55 100644 --- a/src/machine/operation.c +++ b/src/machine/operation.c @@ -66,15 +66,20 @@ fail: return 0; } -int operation_new(Manager *m, pid_t child, sd_bus_message *message, int errno_fd) { +int operation_new(Manager *manager, Machine *machine, pid_t child, sd_bus_message *message, int errno_fd) { Operation *o; int r; + assert(manager); + assert(child > 1); + assert(message); + assert(errno_fd >= 0); + o = new0(Operation, 1); if (!o) return -ENOMEM; - r = sd_event_add_child(m->event, &o->event_source, child, WEXITED, operation_done, o); + r = sd_event_add_child(manager->event, &o->event_source, child, WEXITED, operation_done, o); if (r < 0) { free(o); return r; @@ -84,9 +89,14 @@ int operation_new(Manager *m, pid_t child, sd_bus_message *message, int errno_fd o->message = sd_bus_message_ref(message); o->errno_fd = errno_fd; - LIST_PREPEND(operations, m->operations, o); - m->n_operations++; - o->manager = m; + LIST_PREPEND(operations, manager->operations, o); + manager->n_operations++; + o->manager = manager; + + if (machine) { + LIST_PREPEND(operations_by_machine, machine->operations, o); + o->machine = machine; + } log_debug("Started new operation " PID_FMT ".", child); @@ -113,6 +123,9 @@ Operation *operation_free(Operation *o) { o->manager->n_operations--; } + if (o->machine) + LIST_REMOVE(operations_by_machine, o->machine->operations, o); + free(o); return NULL; } diff --git a/src/machine/operation.h b/src/machine/operation.h index 9d4c3afe45..7ca47bc3af 100644 --- a/src/machine/operation.h +++ b/src/machine/operation.h @@ -34,12 +34,14 @@ typedef struct Operation Operation; struct Operation { Manager *manager; + Machine *machine; pid_t pid; sd_bus_message *message; int errno_fd; sd_event_source *event_source; LIST_FIELDS(Operation, operations); + LIST_FIELDS(Operation, operations_by_machine); }; -int operation_new(Manager *m, pid_t child, sd_bus_message *message, int errno_fd); +int operation_new(Manager *manager, Machine *machine, pid_t child, sd_bus_message *message, int errno_fd); Operation *operation_free(Operation *o); -- cgit v1.2.3-54-g00ecf From 3d87174db439fea28e956f06a69785415bbf5826 Mon Sep 17 00:00:00 2001 From: Lennart Poettering Date: Fri, 29 Apr 2016 20:57:39 +0200 Subject: machinectl: since clone/remove/copy verbs are possibly slow, turn off bus call timeout By default we timeout all bus calls, but if we know that these bus calls might be slow, let's explicitly turn the timeouts off. --- src/machine/machinectl.c | 69 ++++++++++++++++++++++++++++++++---------------- 1 file changed, 46 insertions(+), 23 deletions(-) diff --git a/src/machine/machinectl.c b/src/machine/machinectl.c index 5a68c4ceb2..1165ab5afa 100644 --- a/src/machine/machinectl.c +++ b/src/machine/machinectl.c @@ -1076,6 +1076,7 @@ static int terminate_machine(int argc, char *argv[], void *userdata) { static int copy_files(int argc, char *argv[], void *userdata) { _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL; + _cleanup_(sd_bus_message_unrefp) sd_bus_message *m = NULL; _cleanup_free_ char *abs_host_path = NULL; char *dest, *host_path, *container_path; sd_bus *bus = userdata; @@ -1099,18 +1100,27 @@ static int copy_files(int argc, char *argv[], void *userdata) { host_path = abs_host_path; } - r = sd_bus_call_method( + r = sd_bus_message_new_method_call( bus, + &m, "org.freedesktop.machine1", "/org/freedesktop/machine1", "org.freedesktop.machine1.Manager", - copy_from ? "CopyFromMachine" : "CopyToMachine", - &error, - NULL, + copy_from ? "CopyFromMachine" : "CopyToMachine"); + if (r < 0) + return bus_log_create_error(r); + + r = sd_bus_message_append( + m, "sss", argv[1], copy_from ? container_path : host_path, copy_from ? host_path : container_path); + if (r < 0) + return bus_log_create_error(r); + + /* This is a slow operation, hence turn off any method call timeouts */ + r = sd_bus_call(bus, m, USEC_INFINITY, &error, NULL); if (r < 0) return log_error_errno(r, "Failed to copy: %s", bus_error_message(&error, r)); @@ -1393,7 +1403,6 @@ static int shell_machine(int argc, char *argv[], void *userdata) { } static int remove_image(int argc, char *argv[], void *userdata) { - _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL; sd_bus *bus = userdata; int r, i; @@ -1402,19 +1411,27 @@ static int remove_image(int argc, char *argv[], void *userdata) { polkit_agent_open_if_enabled(); for (i = 1; i < argc; i++) { - r = sd_bus_call_method( + _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL; + _cleanup_(sd_bus_message_unrefp) sd_bus_message *m = NULL; + + r = sd_bus_message_new_method_call( bus, + &m, "org.freedesktop.machine1", "/org/freedesktop/machine1", "org.freedesktop.machine1.Manager", - "RemoveImage", - &error, - NULL, - "s", argv[i]); - if (r < 0) { - log_error("Could not remove image: %s", bus_error_message(&error, -r)); - return r; - } + "RemoveImage"); + if (r < 0) + return bus_log_create_error(r); + + r = sd_bus_message_append(m, "s", argv[i]); + if (r < 0) + return bus_log_create_error(r); + + /* This is a slow operation, hence turn off any method call timeouts */ + r = sd_bus_call(bus, m, USEC_INFINITY, &error, NULL); + if (r < 0) + return log_error_errno(r, "Could not remove image: %s", bus_error_message(&error, r)); } return 0; @@ -1446,24 +1463,30 @@ static int rename_image(int argc, char *argv[], void *userdata) { static int clone_image(int argc, char *argv[], void *userdata) { _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL; + _cleanup_(sd_bus_message_unrefp) sd_bus_message *m = NULL; sd_bus *bus = userdata; int r; polkit_agent_open_if_enabled(); - r = sd_bus_call_method( + r = sd_bus_message_new_method_call( bus, + &m, "org.freedesktop.machine1", "/org/freedesktop/machine1", "org.freedesktop.machine1.Manager", - "CloneImage", - &error, - NULL, - "ssb", argv[1], argv[2], arg_read_only); - if (r < 0) { - log_error("Could not clone image: %s", bus_error_message(&error, -r)); - return r; - } + "CloneImage"); + if (r < 0) + return bus_log_create_error(r); + + r = sd_bus_message_append(m, "ssb", argv[1], argv[2], arg_read_only); + if (r < 0) + return bus_log_create_error(r); + + /* This is a slow operation, hence turn off any method call timeouts */ + r = sd_bus_call(bus, m, USEC_INFINITY, &error, NULL); + if (r < 0) + return log_error_errno(r, "Could not clone image: %s", bus_error_message(&error, r)); return 0; } -- cgit v1.2.3-54-g00ecf From 3e8a82dbd0455a3ef71095bbb0de4880d6163596 Mon Sep 17 00:00:00 2001 From: Lennart Poettering Date: Fri, 29 Apr 2016 20:35:14 +0200 Subject: update TODO --- TODO | 2 -- 1 file changed, 2 deletions(-) diff --git a/TODO b/TODO index 57d731b46d..a930583f85 100644 --- a/TODO +++ b/TODO @@ -35,8 +35,6 @@ Features: * journalctl: make sure -f ends when the container indicated by -M terminates -* make "machinectl clone" properly async, and add fallback for non-tmpfs - * rework fopen_temporary() to make use of open_tmpfile_linkable() (problem: the kernel doesn't support linkat() that replaces existing files, currently) -- cgit v1.2.3-54-g00ecf From 10c6258e326f6fcc749747334de9fc1672296719 Mon Sep 17 00:00:00 2001 From: Lennart Poettering Date: Mon, 2 May 2016 11:14:13 +0200 Subject: minor CODING_STYLE clarification --- CODING_STYLE | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/CODING_STYLE b/CODING_STYLE index c2b2e56d5d..b689355c9a 100644 --- a/CODING_STYLE +++ b/CODING_STYLE @@ -240,6 +240,11 @@ unlink("/foo/bar/baz"); + Don't cast function calls to (void) that return no error + conditions. Specifically, the various xyz_unref() calls that return a NULL + object shouldn't be cast to (void), since not using the return value does not + hide any errors. + - Don't invoke exit(), ever. It is not replacement for proper error handling. Please escalate errors up your call chain, and use normal "return" to exit from the main function of a process. If you -- cgit v1.2.3-54-g00ecf From d13febb1e0d17ce11cfa904085c91e98d336f476 Mon Sep 17 00:00:00 2001 From: Lennart Poettering Date: Mon, 2 May 2016 11:14:46 +0200 Subject: man: slightly extend the machinectl clone documentation --- man/machinectl.xml | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/man/machinectl.xml b/man/machinectl.xml index 43a3b98840..4b7f9a0391 100644 --- a/man/machinectl.xml +++ b/man/machinectl.xml @@ -589,13 +589,11 @@ clone NAME NAME - Clones a container or VM image. The - arguments specify the name of the image to clone and the name - of the newly cloned image. Note that plain directory container - images are cloned into subvolume images with this command. - Note that cloning a container or VM image is optimized for - btrfs file systems, and might not be efficient on others, due - to file system limitations. + Clones a container or VM image. The arguments specify the name of the image to clone and the + name of the newly cloned image. Note that plain directory container images are cloned into btrfs subvolume + images with this command, if the underlying file system supports this. Note that cloning a container or VM + image is optimized for btrfs file systems, and might not be efficient on others, due to file system + limitations. Note that this command leaves host name, machine ID and all other settings that could identify the instance -- cgit v1.2.3-54-g00ecf