/*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/ /*** This file is part of systemd. Copyright 2014 David Herrmann <dh.herrmann@gmail.com> systemd is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public License as published by the Free Software Foundation; either version 2.1 of the License, or (at your option) any later version. systemd is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details. You should have received a copy of the GNU Lesser General Public License along with systemd; If not, see <http://www.gnu.org/licenses/>. ***/ /* * PTY * A PTY object represents a single PTY connection between a master and a * child. The child process is fork()ed so the caller controls what program * will be run. * * Programs like /bin/login tend to perform a vhangup() on their TTY * before running the login procedure. This also causes the pty master * to get a EPOLLHUP event as long as no client has the TTY opened. * This means, we cannot use the TTY connection as reliable way to track * the client. Instead, we _must_ rely on the PID of the client to track * them. * However, this has the side effect that if the client forks and the * parent exits, we loose them and restart the client. But this seems to * be the expected behavior so we implement it here. * * Unfortunately, epoll always polls for EPOLLHUP so as long as the * vhangup() is ongoing, we will _always_ get EPOLLHUP and cannot sleep. * This gets worse if the client closes the TTY but doesn't exit. * Therefore, the fd must be edge-triggered in the epoll-set so we * only get the events once they change. */ #include <errno.h> #include <fcntl.h> #include <limits.h> #include <linux/ioctl.h> #include <signal.h> #include <stdbool.h> #include <stdint.h> #include <stdio.h> #include <stdlib.h> #include <string.h> #include <sys/epoll.h> #include <sys/eventfd.h> #include <sys/ioctl.h> #include <sys/types.h> #include <sys/uio.h> #include <sys/wait.h> #include <termios.h> #include <unistd.h> #include "barrier.h" #include "macro.h" #include "pty.h" #include "ring.h" #include "util.h" #define PTY_BUFSIZE 4096 enum { PTY_ROLE_UNKNOWN, PTY_ROLE_PARENT, PTY_ROLE_CHILD, }; struct Pty { unsigned long ref; Barrier barrier; int fd; pid_t child; sd_event_source *fd_source; sd_event_source *child_source; char in_buf[PTY_BUFSIZE]; Ring out_buf; pty_event_t event_fn; void *event_fn_userdata; bool needs_requeue : 1; unsigned int role : 2; }; int pty_new(Pty **out) { _pty_unref_ Pty *pty = NULL; int r; assert_return(out, -EINVAL); pty = new0(Pty, 1); if (!pty) return -ENOMEM; pty->ref = 1; pty->fd = -1; pty->barrier = (Barrier) BARRIER_NULL; pty->fd = posix_openpt(O_RDWR | O_NOCTTY | O_CLOEXEC | O_NONBLOCK); if (pty->fd < 0) return -errno; /* * The slave-node is initialized to uid/gid of the caller of * posix_openpt(). Only if devpts is mounted with fixed uid/gid this is * skipped. In that case, grantpt() can overwrite these, but then you * have to be root to use chown() (or a pt_chown helper has to be * present). In those cases grantpt() really does something, * otherwise it's a no-op. We call grantpt() here to try supporting * those cases, even though no-one uses that, I guess. If you need other * access-rights, set them yourself after this call returns (no, this is * not racy, it looks racy, but races regarding your own UID are never * important as an attacker could ptrace you; and the slave-pty is also * still locked). */ r = grantpt(pty->fd); if (r < 0) return -errno; r = barrier_create(&pty->barrier); if (r < 0) return r; *out = pty; pty = NULL; return 0; } Pty *pty_ref(Pty *pty) { if (!pty || pty->ref < 1) return NULL; ++pty->ref; return pty; } Pty *pty_unref(Pty *pty) { if (!pty || pty->ref < 1 || --pty->ref > 0) return NULL; pty_close(pty); pty->child_source = sd_event_source_unref(pty->child_source); barrier_destroy(&pty->barrier); ring_clear(&pty->out_buf); free(pty); return NULL; } Barrier *pty_get_barrier(Pty *pty) { assert(pty); return &pty->barrier; } bool pty_is_unknown(Pty *pty) { return pty && pty->role == PTY_ROLE_UNKNOWN; } bool pty_is_parent(Pty *pty) { return pty && pty->role == PTY_ROLE_PARENT; } bool pty_is_child(Pty *pty) { return pty && pty->role == PTY_ROLE_CHILD; } bool pty_has_child(Pty *pty) { return pty_is_parent(pty) && pty->child > 0; } pid_t pty_get_child(Pty *pty) { return pty_has_child(pty) ? pty->child : -ECHILD; } bool pty_is_open(Pty *pty) { return pty && pty->fd >= 0; } int pty_get_fd(Pty *pty) { assert_return(pty, -EINVAL); return pty_is_open(pty) ? pty->fd : -EPIPE; } int pty_make_child(Pty *pty) { _cleanup_free_ char *slave_name = NULL; int r, fd; assert_return(pty, -EINVAL); assert_return(pty_is_unknown(pty), -EALREADY); r = ptsname_malloc(pty->fd, &slave_name); if (r < 0) return -errno; fd = open(slave_name, O_RDWR | O_CLOEXEC | O_NOCTTY); if (fd < 0) return -errno; safe_close(pty->fd); pty->fd = fd; pty->child = getpid(); pty->role = PTY_ROLE_CHILD; barrier_set_role(&pty->barrier, BARRIER_CHILD); return 0; } int pty_make_parent(Pty *pty, pid_t child) { assert_return(pty, -EINVAL); assert_return(pty_is_unknown(pty), -EALREADY); pty->child = child; pty->role = PTY_ROLE_PARENT; return 0; } int pty_unlock(Pty *pty) { assert_return(pty, -EINVAL); assert_return(pty_is_unknown(pty) || pty_is_parent(pty), -EINVAL); assert_return(pty_is_open(pty), -ENODEV); return unlockpt(pty->fd) < 0 ? -errno : 0; } int pty_setup_child(Pty *pty) { struct termios attr; pid_t pid; int r; assert_return(pty, -EINVAL); assert_return(pty_is_child(pty), -EINVAL); assert_return(pty_is_open(pty), -EALREADY); r = sigprocmask_many(SIG_SETMASK, -1); if (r < 0) return r; r = reset_all_signal_handlers(); if (r < 0) return r; pid = setsid(); if (pid < 0 && errno != EPERM) return -errno; r = ioctl(pty->fd, TIOCSCTTY, 0); if (r < 0) return -errno; r = tcgetattr(pty->fd, &attr); if (r < 0) return -errno; /* erase character should be normal backspace, PLEASEEE! */ attr.c_cc[VERASE] = 010; /* always set UTF8 flag */ attr.c_iflag |= IUTF8; r = tcsetattr(pty->fd, TCSANOW, &attr); if (r < 0) return -errno; if (dup2(pty->fd, STDIN_FILENO) != STDIN_FILENO || dup2(pty->fd, STDOUT_FILENO) != STDOUT_FILENO || dup2(pty->fd, STDERR_FILENO) != STDERR_FILENO) return -errno; /* only close FD if it's not a std-fd */ pty->fd = (pty->fd > 2) ? safe_close(pty->fd) : -1; return 0; } void pty_close(Pty *pty) { if (!pty_is_open(pty)) return; pty->fd_source = sd_event_source_unref(pty->fd_source); pty->fd = safe_close(pty->fd); } /* * Drain input-queue and dispatch data via the event-handler. Returns <0 on * error, 0 if queue is empty and 1 if we couldn't empty the input queue fast * enough and there's still data left. */ static int pty_dispatch_read(Pty *pty) { unsigned int i; ssize_t len; int r; /* * We're edge-triggered, means we need to read the whole queue. This, * however, might cause us to stall if the writer is faster than we * are. Therefore, try reading as much as 8 times (32KiB) and only * bail out then. */ for (i = 0; i < 8; ++i) { len = read(pty->fd, pty->in_buf, sizeof(pty->in_buf) - 1); if (len < 0) { if (errno == EINTR) continue; return (errno == EAGAIN) ? 0 : -errno; } else if (len == 0) { continue; } /* set terminating zero for debugging safety */ pty->in_buf[len] = 0; r = pty->event_fn(pty, pty->event_fn_userdata, PTY_DATA, pty->in_buf, len); if (r < 0) return r; } /* still data left, make sure we're queued again */ pty->needs_requeue = true; return 1; } /* * Drain output-queue by writing data to the pty. Returns <0 on error, 0 if the * output queue is empty now and 1 if we couldn't empty the output queue fast * enough and there's still data left. */ static int pty_dispatch_write(Pty *pty) { struct iovec vec[2]; unsigned int i; ssize_t len; size_t num; /* * Same as pty_dispatch_read(), we're edge-triggered so we need to call * write() until either all data is written or it returns EAGAIN. We * call it twice and if it still writes successfully, we reschedule. */ for (i = 0; i < 2; ++i) { num = ring_peek(&pty->out_buf, vec); if (num < 1) return 0; len = writev(pty->fd, vec, (int)num); if (len < 0) { if (errno == EINTR) continue; return (errno == EAGAIN) ? 1 : -errno; } else if (len == 0) { continue; } ring_pull(&pty->out_buf, (size_t)len); } /* still data left, make sure we're queued again */ if (ring_get_size(&pty->out_buf) > 0) { pty->needs_requeue = true; return 1; } return 0; } static int pty_fd_fn(sd_event_source *source, int fd, uint32_t revents, void *userdata) { Pty *pty = userdata; int r_hup = 0, r_write = 0, r_read = 0, r; /* * Whenever we encounter I/O errors, we have to make sure to drain the * input queue first, before we handle any HUP. A child might send us * a message and immediately close the queue. We must not handle the * HUP first or we loose data. * Therefore, if we read a message successfully, we always return * success and wait for the next event-loop iteration. Furthermore, * whenever there is a write-error, we must try reading from the input * queue even if EPOLLIN is not set. The input might have arrived in * between epoll_wait() and write(). Therefore, write-errors are only * ever handled if the input-queue is empty. In all other cases they * are ignored until either reading fails or the input queue is empty. */ if (revents & (EPOLLHUP | EPOLLERR)) r_hup = -EPIPE; if (revents & EPOLLOUT) r_write = pty_dispatch_write(pty); /* Awesome! Kernel signals HUP without IN but queues are not empty.. */ if ((revents & EPOLLIN) || r_hup < 0 || r_write < 0) { r_read = pty_dispatch_read(pty); if (r_read > 0) return 0; /* still data left to fetch next round */ } if (r_hup < 0 || r_write < 0 || r_read < 0) { /* PTY closed and input-queue drained */ pty_close(pty); r = pty->event_fn(pty, pty->event_fn_userdata, PTY_HUP, NULL, 0); if (r < 0) return r; } return 0; } static int pty_fd_prepare_fn(sd_event_source *source, void *userdata) { Pty *pty = userdata; int r; if (pty->needs_requeue) { /* * We're edge-triggered. In case we couldn't handle all events * or in case new write-data is queued, we set needs_requeue. * Before going asleep, we set the io-events *again*. sd-event * notices that we're edge-triggered and forwards the call to * the kernel even if the events didn't change. The kernel will * check the events and re-queue us on the ready queue in case * an event is pending. */ r = sd_event_source_set_io_events(source, EPOLLHUP | EPOLLERR | EPOLLIN | EPOLLOUT | EPOLLET); if (r >= 0) pty->needs_requeue = false; } return 0; } static int pty_child_fn(sd_event_source *source, const siginfo_t *si, void *userdata) { Pty *pty = userdata; int r; pty->child = 0; r = pty->event_fn(pty, pty->event_fn_userdata, PTY_CHILD, si, sizeof(*si)); if (r < 0) return r; return 0; } int pty_attach_event(Pty *pty, sd_event *event, pty_event_t event_fn, void *event_fn_userdata) { int r; assert_return(pty, -EINVAL); assert_return(event, -EINVAL); assert_return(event_fn, -EINVAL); assert_return(pty_is_parent(pty), -EINVAL); pty_detach_event(pty); if (pty_is_open(pty)) { r = sd_event_add_io(event, &pty->fd_source, pty->fd, EPOLLHUP | EPOLLERR | EPOLLIN | EPOLLOUT | EPOLLET, pty_fd_fn, pty); if (r < 0) goto error; r = sd_event_source_set_prepare(pty->fd_source, pty_fd_prepare_fn); if (r < 0) goto error; } if (pty_has_child(pty)) { r = sd_event_add_child(event, &pty->child_source, pty->child, WEXITED, pty_child_fn, pty); if (r < 0) goto error; } pty->event_fn = event_fn; pty->event_fn_userdata = event_fn_userdata; return 0; error: pty_detach_event(pty); return r; } void pty_detach_event(Pty *pty) { if (!pty) return; pty->child_source = sd_event_source_unref(pty->child_source); pty->fd_source = sd_event_source_unref(pty->fd_source); pty->event_fn = NULL; pty->event_fn_userdata = NULL; } int pty_write(Pty *pty, const void *buf, size_t size) { bool was_empty; int r; assert_return(pty, -EINVAL); assert_return(pty_is_open(pty), -ENODEV); assert_return(pty_is_parent(pty), -ENODEV); if (size < 1) return 0; /* * Push @buf[0..@size] into the output ring-buffer. In case the * ring-buffer wasn't empty beforehand, we're already waiting for * EPOLLOUT and we're done. If it was empty, we have to re-queue the * FD for EPOLLOUT as we're edge-triggered and wouldn't get any new * EPOLLOUT event. */ was_empty = ring_get_size(&pty->out_buf) < 1; r = ring_push(&pty->out_buf, buf, size); if (r < 0) return r; if (was_empty) pty->needs_requeue = true; return 0; } int pty_signal(Pty *pty, int sig) { assert_return(pty, -EINVAL); assert_return(pty_is_open(pty), -ENODEV); assert_return(pty_is_parent(pty), -ENODEV); return ioctl(pty->fd, TIOCSIG, sig) < 0 ? -errno : 0; } int pty_resize(Pty *pty, unsigned short term_width, unsigned short term_height) { struct winsize ws; assert_return(pty, -EINVAL); assert_return(pty_is_open(pty), -ENODEV); assert_return(pty_is_parent(pty), -ENODEV); zero(ws); ws.ws_col = term_width; ws.ws_row = term_height; /* * This will send SIGWINCH to the pty slave foreground process group. * We will also get one, but we don't need it. */ return ioctl(pty->fd, TIOCSWINSZ, &ws) < 0 ? -errno : 0; } pid_t pty_fork(Pty **out, sd_event *event, pty_event_t event_fn, void *event_fn_userdata, unsigned short initial_term_width, unsigned short initial_term_height) { _pty_unref_ Pty *pty = NULL; int r; pid_t pid; assert_return(out, -EINVAL); assert_return((event && event_fn) || (!event && !event_fn), -EINVAL); r = pty_new(&pty); if (r < 0) return r; r = pty_unlock(pty); if (r < 0) return r; pid = fork(); if (pid < 0) return -errno; if (pid == 0) { /* child */ r = pty_make_child(pty); if (r < 0) _exit(-r); r = pty_setup_child(pty); if (r < 0) _exit(-r); /* sync with parent */ if (!barrier_place_and_sync(&pty->barrier)) _exit(1); /* fallthrough and return the child's PTY object */ } else { /* parent */ r = pty_make_parent(pty, pid); if (r < 0) goto parent_error; r = pty_resize(pty, initial_term_width, initial_term_height); if (r < 0) goto parent_error; if (event) { r = pty_attach_event(pty, event, event_fn, event_fn_userdata); if (r < 0) goto parent_error; } /* sync with child */ if (!barrier_place_and_sync(&pty->barrier)) { r = -ECHILD; goto parent_error; } /* fallthrough and return the parent's PTY object */ } *out = pty; pty = NULL; return pid; parent_error: barrier_abort(&pty->barrier); waitpid(pty->child, NULL, 0); pty->child = 0; return r; }