summaryrefslogtreecommitdiff
path: root/src/shared
diff options
context:
space:
mode:
Diffstat (limited to 'src/shared')
-rw-r--r--src/shared/pty.c640
-rw-r--r--src/shared/pty.h77
2 files changed, 717 insertions, 0 deletions
diff --git a/src/shared/pty.c b/src/shared/pty.c
new file mode 100644
index 0000000000..11d76f825f
--- /dev/null
+++ b/src/shared/pty.c
@@ -0,0 +1,640 @@
+/*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
+
+/***
+ This file is part of systemd.
+
+ Copyright 2014 David Herrmann <dh.herrmann@gmail.com>
+
+ systemd is free software; you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published by
+ the Free Software Foundation; either version 2.1 of the License, or
+ (at your option) any later version.
+
+ systemd is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with systemd; If not, see <http://www.gnu.org/licenses/>.
+***/
+
+/*
+ * PTY
+ * A PTY object represents a single PTY connection between a master and a
+ * child. The child process is fork()ed so the caller controls what program
+ * will be run.
+ *
+ * Programs like /bin/login tend to perform a vhangup() on their TTY
+ * before running the login procedure. This also causes the pty master
+ * to get a EPOLLHUP event as long as no client has the TTY opened.
+ * This means, we cannot use the TTY connection as reliable way to track
+ * the client. Instead, we _must_ rely on the PID of the client to track
+ * them.
+ * However, this has the side effect that if the client forks and the
+ * parent exits, we loose them and restart the client. But this seems to
+ * be the expected behavior so we implement it here.
+ *
+ * Unfortunately, epoll always polls for EPOLLHUP so as long as the
+ * vhangup() is ongoing, we will _always_ get EPOLLHUP and cannot sleep.
+ * This gets worse if the client closes the TTY but doesn't exit.
+ * Therefore, the fd must be edge-triggered in the epoll-set so we
+ * only get the events once they change.
+ */
+
+#include <errno.h>
+#include <fcntl.h>
+#include <limits.h>
+#include <pty.h>
+#include <signal.h>
+#include <stdbool.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/epoll.h>
+#include <sys/eventfd.h>
+#include <sys/ioctl.h>
+#include <sys/types.h>
+#include <sys/uio.h>
+#include <sys/wait.h>
+#include <termios.h>
+#include <unistd.h>
+
+#include "barrier.h"
+#include "macro.h"
+#include "pty.h"
+#include "ring.h"
+#include "util.h"
+
+#define PTY_BUFSIZE 16384
+
+enum {
+ PTY_ROLE_UNKNOWN,
+ PTY_ROLE_PARENT,
+ PTY_ROLE_CHILD,
+};
+
+struct Pty {
+ unsigned long ref;
+ Barrier barrier;
+ int fd;
+ pid_t child;
+ sd_event_source *fd_source;
+ sd_event_source *child_source;
+
+ char in_buf[PTY_BUFSIZE];
+ Ring out_buf;
+
+ pty_event_t event_fn;
+ void *event_fn_userdata;
+
+ bool needs_requeue : 1;
+ unsigned int role : 2;
+};
+
+int pty_new(Pty **out) {
+ _pty_unref_ Pty *pty = NULL;
+ int r;
+
+ assert_return(out, -EINVAL);
+
+ pty = new0(Pty, 1);
+ if (!pty)
+ return -ENOMEM;
+
+ pty->ref = 1;
+ pty->fd = -1;
+
+ pty->fd = posix_openpt(O_RDWR | O_NOCTTY | O_CLOEXEC | O_NONBLOCK);
+ if (pty->fd < 0)
+ return -errno;
+
+ /*
+ * The slave-node is initialized to uid/gid of the caller of
+ * posix_openpt(). Only if devpts is mounted with fixed uid/gid this is
+ * skipped. In that case, grantpt() can overwrite these, but then you
+ * have to be root to use chown() (or a pt_chown helper has to be
+ * present). In those cases grantpt() really does something,
+ * otherwise it's a no-op. We call grantpt() here to try supporting
+ * those cases, even though no-one uses that, I guess. If you need other
+ * access-rights, set them yourself after this call returns (no, this is
+ * not racy, it looks racy, but races regarding your own UID are never
+ * important as an attacker could ptrace you; and the slave-pty is also
+ * still locked).
+ */
+ r = grantpt(pty->fd);
+ if (r < 0)
+ return -errno;
+
+ r = barrier_init(&pty->barrier);
+ if (r < 0)
+ return r;
+
+ *out = pty;
+ pty = NULL;
+ return 0;
+}
+
+Pty *pty_ref(Pty *pty) {
+ if (!pty || pty->ref < 1)
+ return NULL;
+
+ ++pty->ref;
+ return pty;
+}
+
+Pty *pty_unref(Pty *pty) {
+ if (!pty || pty->ref < 1 || --pty->ref > 0)
+ return NULL;
+
+ pty_close(pty);
+ pty->child_source = sd_event_source_unref(pty->child_source);
+ barrier_destroy(&pty->barrier);
+ ring_clear(&pty->out_buf);
+ free(pty);
+
+ return NULL;
+}
+
+Barrier *pty_get_barrier(Pty *pty) {
+ assert(pty);
+ return &pty->barrier;
+}
+
+bool pty_is_unknown(Pty *pty) {
+ return pty && pty->role == PTY_ROLE_UNKNOWN;
+}
+
+bool pty_is_parent(Pty *pty) {
+ return pty && pty->role == PTY_ROLE_PARENT;
+}
+
+bool pty_is_child(Pty *pty) {
+ return pty && pty->role == PTY_ROLE_CHILD;
+}
+
+bool pty_has_child(Pty *pty) {
+ return pty_is_parent(pty) && pty->child > 0;
+}
+
+pid_t pty_get_child(Pty *pty) {
+ return pty_has_child(pty) ? pty->child : -ECHILD;
+}
+
+bool pty_is_open(Pty *pty) {
+ return pty && pty->fd >= 0;
+}
+
+int pty_get_fd(Pty *pty) {
+ assert_return(pty, -EINVAL);
+
+ return pty_is_open(pty) ? pty->fd : -EPIPE;
+}
+
+int pty_make_child(Pty *pty) {
+ char slave_name[1024];
+ int r, fd;
+
+ assert_return(pty, -EINVAL);
+ assert_return(pty_is_unknown(pty), -EALREADY);
+
+ r = ptsname_r(pty->fd, slave_name, sizeof(slave_name));
+ if (r < 0)
+ return -errno;
+
+ fd = open(slave_name, O_RDWR | O_CLOEXEC | O_NOCTTY);
+ if (fd < 0)
+ return -errno;
+
+ safe_close(pty->fd);
+ pty->fd = fd;
+ pty->child = getpid();
+ pty->role = PTY_ROLE_CHILD;
+ barrier_set_role(&pty->barrier, BARRIER_CHILD);
+
+ return 0;
+}
+
+int pty_make_parent(Pty *pty, pid_t child) {
+ assert_return(pty, -EINVAL);
+ assert_return(pty_is_unknown(pty), -EALREADY);
+
+ pty->child = child;
+ pty->role = PTY_ROLE_PARENT;
+
+ return 0;
+}
+
+int pty_unlock(Pty *pty) {
+ assert_return(pty, -EINVAL);
+ assert_return(pty_is_unknown(pty) || pty_is_parent(pty), -EINVAL);
+ assert_return(pty_is_open(pty), -ENODEV);
+
+ return unlockpt(pty->fd) < 0 ? -errno : 0;
+}
+
+int pty_setup_child(Pty *pty) {
+ struct termios attr;
+ pid_t pid;
+ int r;
+
+ assert_return(pty, -EINVAL);
+ assert_return(pty_is_child(pty), -EINVAL);
+ assert_return(pty_is_open(pty), -EALREADY);
+
+ r = sigprocmask_many(SIG_SETMASK, -1);
+ if (r < 0)
+ return r;
+
+ r = reset_all_signal_handlers();
+ if (r < 0)
+ return r;
+
+ pid = setsid();
+ if (pid < 0 && errno != EPERM)
+ return -errno;
+
+ r = ioctl(pty->fd, TIOCSCTTY, 0);
+ if (r < 0)
+ return -errno;
+
+ r = tcgetattr(pty->fd, &attr);
+ if (r < 0)
+ return -errno;
+
+ /* erase character should be normal backspace, PLEASEEE! */
+ attr.c_cc[VERASE] = 010;
+ /* always set UTF8 flag */
+ attr.c_iflag |= IUTF8;
+
+ r = tcsetattr(pty->fd, TCSANOW, &attr);
+ if (r < 0)
+ return -errno;
+
+ if (dup2(pty->fd, STDIN_FILENO) != STDIN_FILENO ||
+ dup2(pty->fd, STDOUT_FILENO) != STDOUT_FILENO ||
+ dup2(pty->fd, STDERR_FILENO) != STDERR_FILENO)
+ return -errno;
+
+ /* only close FD if it's not a std-fd */
+ pty->fd = (pty->fd > 2) ? safe_close(pty->fd) : -1;
+
+ return 0;
+}
+
+void pty_close(Pty *pty) {
+ if (!pty_is_open(pty))
+ return;
+
+ pty->fd_source = sd_event_source_unref(pty->fd_source);
+ pty->fd = safe_close(pty->fd);
+}
+
+/*
+ * Drain input-queue and dispatch data via the event-handler. Returns <0 on
+ * error, 0 if queue is empty and 1 if we couldn't empty the input queue fast
+ * enough and there's still data left.
+ */
+static int pty_dispatch_read(Pty *pty) {
+ unsigned int i;
+ ssize_t len;
+ int r;
+
+ /*
+ * We're edge-triggered, means we need to read the whole queue. This,
+ * however, might cause us to stall if the writer is faster than we
+ * are. Therefore, we read twice and if the second read still returned
+ * data, we reschedule.
+ */
+
+ for (i = 0; i < 2; ++i) {
+ len = read(pty->fd, pty->in_buf, sizeof(pty->in_buf) - 1);
+ if (len < 0) {
+ if (errno == EINTR)
+ continue;
+
+ return (errno == EAGAIN) ? 0 : -errno;
+ } else if (len == 0) {
+ continue;
+ }
+
+ /* set terminating zero for debugging safety */
+ pty->in_buf[len] = 0;
+ r = pty->event_fn(pty, pty->event_fn_userdata, PTY_DATA, pty->in_buf, len);
+ if (r < 0)
+ return r;
+ }
+
+ /* still data left, make sure we're queued again */
+ pty->needs_requeue = true;
+
+ return 1;
+}
+
+/*
+ * Drain output-queue by writing data to the pty. Returns <0 on error, 0 if the
+ * output queue is empty now and 1 if we couldn't empty the output queue fast
+ * enough and there's still data left.
+ */
+static int pty_dispatch_write(Pty *pty) {
+ struct iovec vec[2];
+ unsigned int i;
+ ssize_t len;
+ size_t num;
+
+ /*
+ * Same as pty_dispatch_read(), we're edge-triggered so we need to call
+ * write() until either all data is written or it returns EAGAIN. We
+ * call it twice and if it still writes successfully, we reschedule.
+ */
+
+ for (i = 0; i < 2; ++i) {
+ num = ring_peek(&pty->out_buf, vec);
+ if (num < 1)
+ return 0;
+
+ len = writev(pty->fd, vec, (int)num);
+ if (len < 0) {
+ if (errno == EINTR)
+ continue;
+
+ return (errno == EAGAIN) ? 1 : -errno;
+ } else if (len == 0) {
+ continue;
+ }
+
+ ring_pull(&pty->out_buf, (size_t)len);
+ }
+
+ /* still data left, make sure we're queued again */
+ if (ring_get_size(&pty->out_buf) > 0) {
+ pty->needs_requeue = true;
+ return 1;
+ }
+
+ return 0;
+}
+
+static int pty_fd_fn(sd_event_source *source, int fd, uint32_t revents, void *userdata) {
+ Pty *pty = userdata;
+ int r_hup = 0, r_write = 0, r_read = 0, r;
+
+ /*
+ * Whenever we encounter I/O errors, we have to make sure to drain the
+ * input queue first, before we handle any HUP. A child might send us
+ * a message and immediately close the queue. We must not handle the
+ * HUP first or we loose data.
+ * Therefore, if we read a message successfully, we always return
+ * success and wait for the next event-loop iteration. Furthermore,
+ * whenever there is a write-error, we must try reading from the input
+ * queue even if EPOLLIN is not set. The input might have arrived in
+ * between epoll_wait() and write(). Therefore, write-errors are only
+ * ever handled if the input-queue is empty. In all other cases they
+ * are ignored until either reading fails or the input queue is empty.
+ */
+
+ if (revents & (EPOLLHUP | EPOLLERR))
+ r_hup = -EPIPE;
+
+ if (revents & EPOLLOUT)
+ r_write = pty_dispatch_write(pty);
+
+ /* Awesome! Kernel signals HUP without IN but queues are not empty.. */
+ if ((revents & EPOLLIN) || r_hup < 0 || r_write < 0) {
+ r_read = pty_dispatch_read(pty);
+ if (r_read > 0)
+ return 0; /* still data left to fetch next round */
+ }
+
+ if (r_hup < 0 || r_write < 0 || r_read < 0) {
+ /* PTY closed and input-queue drained */
+ pty_close(pty);
+ r = pty->event_fn(pty, pty->event_fn_userdata, PTY_HUP, NULL, 0);
+ if (r < 0)
+ return r;
+ }
+
+ return 0;
+}
+
+static int pty_fd_prepare_fn(sd_event_source *source, void *userdata) {
+ Pty *pty = userdata;
+ int r;
+
+ if (pty->needs_requeue) {
+ /*
+ * We're edge-triggered. In case we couldn't handle all events
+ * or in case new write-data is queued, we set needs_requeue.
+ * Before going asleep, we set the io-events *again*. sd-event
+ * notices that we're edge-triggered and forwards the call to
+ * the kernel even if the events didn't change. The kernel will
+ * check the events and re-queue us on the ready queue in case
+ * an event is pending.
+ */
+ r = sd_event_source_set_io_events(source, EPOLLHUP | EPOLLERR | EPOLLIN | EPOLLOUT | EPOLLET);
+ if (r >= 0)
+ pty->needs_requeue = false;
+ }
+
+ return 0;
+}
+
+static int pty_child_fn(sd_event_source *source, const siginfo_t *si, void *userdata) {
+ Pty *pty = userdata;
+ int r;
+
+ pty->child = 0;
+
+ r = pty->event_fn(pty, pty->event_fn_userdata, PTY_CHILD, si, sizeof(*si));
+ if (r < 0)
+ return r;
+
+ return 0;
+}
+
+int pty_attach_event(Pty *pty, sd_event *event, pty_event_t event_fn, void *event_fn_userdata) {
+ int r;
+
+ assert_return(pty, -EINVAL);
+ assert_return(event, -EINVAL);
+ assert_return(event_fn, -EINVAL);
+ assert_return(pty_is_parent(pty), -EINVAL);
+
+ pty_detach_event(pty);
+
+ if (pty_is_open(pty)) {
+ r = sd_event_add_io(event,
+ &pty->fd_source,
+ pty->fd,
+ EPOLLHUP | EPOLLERR | EPOLLIN | EPOLLOUT | EPOLLET,
+ pty_fd_fn,
+ pty);
+ if (r < 0)
+ goto error;
+
+ r = sd_event_source_set_prepare(pty->fd_source, pty_fd_prepare_fn);
+ if (r < 0)
+ goto error;
+ }
+
+ if (pty_has_child(pty)) {
+ r = sd_event_add_child(event,
+ &pty->child_source,
+ pty->child,
+ WEXITED,
+ pty_child_fn,
+ pty);
+ if (r < 0)
+ goto error;
+ }
+
+ pty->event_fn = event_fn;
+ pty->event_fn_userdata = event_fn_userdata;
+
+ return 0;
+
+error:
+ pty_detach_event(pty);
+ return r;
+}
+
+void pty_detach_event(Pty *pty) {
+ if (!pty)
+ return;
+
+ pty->child_source = sd_event_source_unref(pty->child_source);
+ pty->fd_source = sd_event_source_unref(pty->fd_source);
+ pty->event_fn = NULL;
+ pty->event_fn_userdata = NULL;
+}
+
+int pty_write(Pty *pty, const void *buf, size_t size) {
+ bool was_empty;
+ int r;
+
+ assert_return(pty, -EINVAL);
+ assert_return(pty_is_open(pty), -ENODEV);
+ assert_return(pty_is_parent(pty), -ENODEV);
+
+ if (size < 1)
+ return 0;
+
+ /*
+ * Push @buf[0..@size] into the output ring-buffer. In case the
+ * ring-buffer wasn't empty beforehand, we're already waiting for
+ * EPOLLOUT and we're done. If it was empty, we have to re-queue the
+ * FD for EPOLLOUT as we're edge-triggered and wouldn't get any new
+ * EPOLLOUT event.
+ */
+
+ was_empty = ring_get_size(&pty->out_buf) < 1;
+
+ r = ring_push(&pty->out_buf, buf, size);
+ if (r < 0)
+ return r;
+
+ if (was_empty)
+ pty->needs_requeue = true;
+
+ return 0;
+}
+
+int pty_signal(Pty *pty, int sig) {
+ assert_return(pty, -EINVAL);
+ assert_return(pty_is_open(pty), -ENODEV);
+ assert_return(pty_is_parent(pty), -ENODEV);
+
+ return ioctl(pty->fd, TIOCSIG, sig) < 0 ? -errno : 0;
+}
+
+int pty_resize(Pty *pty, unsigned short term_width, unsigned short term_height) {
+ struct winsize ws;
+
+ assert_return(pty, -EINVAL);
+ assert_return(pty_is_open(pty), -ENODEV);
+ assert_return(pty_is_parent(pty), -ENODEV);
+
+ zero(ws);
+ ws.ws_col = term_width;
+ ws.ws_row = term_height;
+
+ /*
+ * This will send SIGWINCH to the pty slave foreground process group.
+ * We will also get one, but we don't need it.
+ */
+ return ioctl(pty->fd, TIOCSWINSZ, &ws) < 0 ? -errno : 0;
+}
+
+pid_t pty_fork(Pty **out, sd_event *event, pty_event_t event_fn, void *event_fn_userdata, unsigned short initial_term_width, unsigned short initial_term_height) {
+ _pty_unref_ Pty *pty = NULL;
+ int r;
+ pid_t pid;
+
+ assert_return(out, -EINVAL);
+ assert_return((event && event_fn) || (!event && !event_fn), -EINVAL);
+
+ r = pty_new(&pty);
+ if (r < 0)
+ return r;
+
+ r = pty_unlock(pty);
+ if (r < 0)
+ return r;
+
+ pid = fork();
+ if (pid < 0)
+ return -errno;
+
+ if (pid == 0) {
+ /* child */
+
+ r = pty_make_child(pty);
+ if (r < 0)
+ _exit(-r);
+
+ r = pty_setup_child(pty);
+ if (r < 0)
+ _exit(-r);
+
+ /* sync with parent */
+ if (!barrier_place_and_sync(&pty->barrier))
+ _exit(1);
+
+ /* fallthrough and return the child's PTY object */
+ } else {
+ /* parent */
+
+ r = pty_make_parent(pty, pid);
+ if (r < 0)
+ goto parent_error;
+
+ r = pty_resize(pty, initial_term_width, initial_term_height);
+ if (r < 0)
+ goto parent_error;
+
+ if (event) {
+ r = pty_attach_event(pty, event, event_fn, event_fn_userdata);
+ if (r < 0)
+ goto parent_error;
+ }
+
+ /* sync with child */
+ if (!barrier_place_and_sync(&pty->barrier)) {
+ r = -ECHILD;
+ goto parent_error;
+ }
+
+ /* fallthrough and return the parent's PTY object */
+ }
+
+ *out = pty;
+ pty = NULL;
+ return pid;
+
+parent_error:
+ barrier_abort(&pty->barrier);
+ waitpid(pty->child, NULL, 0);
+ pty->child = 0;
+ return r;
+}
diff --git a/src/shared/pty.h b/src/shared/pty.h
new file mode 100644
index 0000000000..a87ceb58ca
--- /dev/null
+++ b/src/shared/pty.h
@@ -0,0 +1,77 @@
+/*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
+
+#pragma once
+
+/***
+ This file is part of systemd.
+
+ Copyright 2014 David Herrmann <dh.herrmann@gmail.com>
+
+ systemd is free software; you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published by
+ the Free Software Foundation; either version 2.1 of the License, or
+ (at your option) any later version.
+
+ systemd is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with systemd; If not, see <http://www.gnu.org/licenses/>.
+***/
+
+#include <errno.h>
+#include <stdbool.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+
+#include "barrier.h"
+#include "macro.h"
+#include "sd-event.h"
+#include "util.h"
+
+typedef struct Pty Pty;
+
+enum {
+ PTY_CHILD,
+ PTY_HUP,
+ PTY_DATA,
+};
+
+typedef int (*pty_event_t) (Pty *pty, void *userdata, unsigned int event, const void *ptr, size_t size);
+
+int pty_new(Pty **out);
+Pty *pty_ref(Pty *pty);
+Pty *pty_unref(Pty *pty);
+
+#define _pty_unref_ _cleanup_(pty_unrefp)
+DEFINE_TRIVIAL_CLEANUP_FUNC(Pty*, pty_unref);
+
+Barrier *pty_get_barrier(Pty *pty);
+
+bool pty_is_unknown(Pty *pty);
+bool pty_is_parent(Pty *pty);
+bool pty_is_child(Pty *pty);
+bool pty_has_child(Pty *pty);
+pid_t pty_get_child(Pty *pty);
+
+bool pty_is_open(Pty *pty);
+int pty_get_fd(Pty *pty);
+
+int pty_make_child(Pty *pty);
+int pty_make_parent(Pty *pty, pid_t child);
+int pty_unlock(Pty *pty);
+int pty_setup_child(Pty *pty);
+void pty_close(Pty *pty);
+
+int pty_attach_event(Pty *pty, sd_event *event, pty_event_t event_fn, void *event_fn_userdata);
+void pty_detach_event(Pty *pty);
+
+int pty_write(Pty *pty, const void *buf, size_t size);
+int pty_signal(Pty *pty, int sig);
+int pty_resize(Pty *pty, unsigned short term_width, unsigned short term_height);
+
+pid_t pty_fork(Pty **out, sd_event *event, pty_event_t event_fn, void *event_fn_userdata, unsigned short initial_term_width, unsigned short initial_term_height);