diff options
Diffstat (limited to 'src/systemd-bootchart/store.c')
-rw-r--r-- | src/systemd-bootchart/store.c | 555 |
1 files changed, 555 insertions, 0 deletions
diff --git a/src/systemd-bootchart/store.c b/src/systemd-bootchart/store.c new file mode 100644 index 0000000000..42cb8043ce --- /dev/null +++ b/src/systemd-bootchart/store.c @@ -0,0 +1,555 @@ +/*** + This file is part of systemd. + + Copyright (C) 2009-2013 Intel Corporation + + Authors: + Auke Kok <auke-jan.h.kok@intel.com> + + systemd is free software; you can redistribute it and/or modify it + under the terms of the GNU Lesser General Public License as published by + the Free Software Foundation; either version 2.1 of the License, or + (at your option) any later version. + + systemd is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public License + along with systemd; If not, see <http://www.gnu.org/licenses/>. + ***/ + +#include <dirent.h> +#include <fcntl.h> +#include <limits.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <time.h> +#include <unistd.h> + +#include "alloc-util.h" +#include "bootchart.h" +#include "cgroup-util.h" +#include "dirent-util.h" +#include "fd-util.h" +#include "fileio.h" +#include "parse-util.h" +#include "store.h" +#include "string-util.h" +#include "strxcpyx.h" +#include "time-util.h" +#include "util.h" + +/* + * Alloc a static 4k buffer for stdio - primarily used to increase + * PSS buffering from the default 1k stdin buffer to reduce + * read() overhead. + */ +static char smaps_buf[4096]; +static int skip = 0; + +double gettime_ns(void) { + struct timespec n; + + clock_gettime(CLOCK_MONOTONIC, &n); + + return (n.tv_sec + (n.tv_nsec / (double) NSEC_PER_SEC)); +} + +static char *bufgetline(char *buf) { + char *c; + + if (!buf) + return NULL; + + c = strchr(buf, '\n'); + if (c) + c++; + + return c; +} + +static int pid_cmdline_strscpy(int procfd, char *buffer, size_t buf_len, int pid) { + char filename[PATH_MAX]; + _cleanup_close_ int fd = -1; + ssize_t n; + + sprintf(filename, "%d/cmdline", pid); + fd = openat(procfd, filename, O_RDONLY|O_CLOEXEC); + if (fd < 0) + return -errno; + + n = read(fd, buffer, buf_len-1); + if (n > 0) { + int i; + for (i = 0; i < n; i++) + if (buffer[i] == '\0') + buffer[i] = ' '; + buffer[n] = '\0'; + } + + return 0; +} + +int log_sample(DIR *proc, + int sample, + struct ps_struct *ps_first, + struct list_sample_data **ptr, + int *pscount, + int *cpus) { + + static int vmstat = -1; + _cleanup_free_ char *buf_schedstat = NULL; + char buf[4096]; + char key[256]; + char val[256]; + char rt[256]; + char wt[256]; + char *m; + int r; + int c; + int p; + int mod; + static int e_fd = -1; + ssize_t s; + ssize_t n; + struct dirent *ent; + int fd; + struct list_sample_data *sampledata; + struct ps_sched_struct *ps_prev = NULL; + int procfd; + int taskfd = -1; + + sampledata = *ptr; + + procfd = dirfd(proc); + if (procfd < 0) + return -errno; + + if (vmstat < 0) { + /* block stuff */ + vmstat = openat(procfd, "vmstat", O_RDONLY|O_CLOEXEC); + if (vmstat < 0) + return log_error_errno(errno, "Failed to open /proc/vmstat: %m"); + } + + n = pread(vmstat, buf, sizeof(buf) - 1, 0); + if (n <= 0) { + vmstat = safe_close(vmstat); + if (n < 0) + return -errno; + return -ENODATA; + } + + buf[n] = '\0'; + + m = buf; + while (m) { + if (sscanf(m, "%s %s", key, val) < 2) + goto vmstat_next; + if (streq(key, "pgpgin")) + sampledata->blockstat.bi = atoi(val); + if (streq(key, "pgpgout")) { + sampledata->blockstat.bo = atoi(val); + break; + } +vmstat_next: + m = bufgetline(m); + if (!m) + break; + } + + /* Parse "/proc/schedstat" for overall CPU utilization */ + r = read_full_file("/proc/schedstat", &buf_schedstat, NULL); + if (r < 0) + return log_error_errno(r, "Unable to read schedstat: %m"); + + m = buf_schedstat; + while (m) { + if (sscanf(m, "%s %*s %*s %*s %*s %*s %*s %s %s", key, rt, wt) < 3) + goto schedstat_next; + + if (strstr(key, "cpu")) { + r = safe_atoi((const char*)(key+3), &c); + if (r < 0 || c > MAXCPUS -1) + /* Oops, we only have room for MAXCPUS data */ + break; + sampledata->runtime[c] = atoll(rt); + sampledata->waittime[c] = atoll(wt); + + if (c == *cpus) + *cpus = c + 1; + } +schedstat_next: + m = bufgetline(m); + if (!m) + break; + } + + if (arg_entropy) { + if (e_fd < 0) { + e_fd = openat(procfd, "sys/kernel/random/entropy_avail", O_RDONLY|O_CLOEXEC); + if (e_fd < 0) + return log_error_errno(errno, "Failed to open /proc/sys/kernel/random/entropy_avail: %m"); + } + + n = pread(e_fd, buf, sizeof(buf) - 1, 0); + if (n <= 0) { + e_fd = safe_close(e_fd); + } else { + buf[n] = '\0'; + sampledata->entropy_avail = atoi(buf); + } + } + + while ((ent = readdir(proc)) != NULL) { + char filename[PATH_MAX]; + int pid; + struct ps_struct *ps; + + if ((ent->d_name[0] < '0') || (ent->d_name[0] > '9')) + continue; + + pid = atoi(ent->d_name); + + if (pid >= MAXPIDS) + continue; + + ps = ps_first; + while (ps->next_ps) { + ps = ps->next_ps; + if (ps->pid == pid) + break; + } + + /* end of our LL? then append a new record */ + if (ps->pid != pid) { + _cleanup_fclose_ FILE *st = NULL; + char t[32]; + struct ps_struct *parent; + + ps->next_ps = new0(struct ps_struct, 1); + if (!ps->next_ps) + return log_oom(); + + ps = ps->next_ps; + ps->pid = pid; + ps->sched = -1; + ps->schedstat = -1; + + ps->sample = new0(struct ps_sched_struct, 1); + if (!ps->sample) + return log_oom(); + + ps->sample->sampledata = sampledata; + + (*pscount)++; + + /* mark our first sample */ + ps->first = ps->last = ps->sample; + ps->sample->runtime = atoll(rt); + ps->sample->waittime = atoll(wt); + + /* get name, start time */ + if (ps->sched < 0) { + sprintf(filename, "%d/sched", pid); + ps->sched = openat(procfd, filename, O_RDONLY|O_CLOEXEC); + if (ps->sched < 0) + continue; + } + + s = pread(ps->sched, buf, sizeof(buf) - 1, 0); + if (s <= 0) { + ps->sched = safe_close(ps->sched); + continue; + } + buf[s] = '\0'; + + if (!sscanf(buf, "%s %*s %*s", key)) + continue; + + strscpy(ps->name, sizeof(ps->name), key); + + /* cmdline */ + if (arg_show_cmdline) + pid_cmdline_strscpy(procfd, ps->name, sizeof(ps->name), pid); + + /* discard line 2 */ + m = bufgetline(buf); + if (!m) + continue; + + m = bufgetline(m); + if (!m) + continue; + + if (!sscanf(m, "%*s %*s %s", t)) + continue; + + r = safe_atod(t, &ps->starttime); + if (r < 0) + continue; + + ps->starttime /= 1000.0; + + if (arg_show_cgroup) + /* if this fails, that's OK */ + cg_pid_get_path(SYSTEMD_CGROUP_CONTROLLER, + ps->pid, &ps->cgroup); + + /* ppid */ + sprintf(filename, "%d/stat", pid); + fd = openat(procfd, filename, O_RDONLY|O_CLOEXEC); + if (fd < 0) + continue; + + st = fdopen(fd, "re"); + if (!st) { + close(fd); + continue; + } + + if (!fscanf(st, "%*s %*s %*s %i", &p)) + continue; + + ps->ppid = p; + + /* + * setup child pointers + * + * these are used to paint the tree coherently later + * each parent has a LL of children, and a LL of siblings + */ + if (pid == 1) + continue; /* nothing to do for init atm */ + + /* kthreadd has ppid=0, which breaks our tree ordering */ + if (ps->ppid == 0) + ps->ppid = 1; + + parent = ps_first; + while ((parent->next_ps && parent->pid != ps->ppid)) + parent = parent->next_ps; + + if (parent->pid != ps->ppid) { + /* orphan */ + ps->ppid = 1; + parent = ps_first->next_ps; + } + + ps->parent = parent; + + if (!parent->children) { + /* it's the first child */ + parent->children = ps; + } else { + /* walk all children and append */ + struct ps_struct *children; + children = parent->children; + while (children->next) + children = children->next; + + children->next = ps; + } + } + + /* else -> found pid, append data in ps */ + + /* below here is all continuous logging parts - we get here on every + * iteration */ + + /* rt, wt */ + if (ps->schedstat < 0) { + sprintf(filename, "%d/schedstat", pid); + ps->schedstat = openat(procfd, filename, O_RDONLY|O_CLOEXEC); + if (ps->schedstat < 0) + continue; + } + + s = pread(ps->schedstat, buf, sizeof(buf) - 1, 0); + if (s <= 0) { + /* clean up our file descriptors - assume that the process exited */ + close(ps->schedstat); + ps->schedstat = -1; + ps->sched = safe_close(ps->sched); + continue; + } + + buf[s] = '\0'; + + if (!sscanf(buf, "%s %s %*s", rt, wt)) + continue; + + ps->sample->next = new0(struct ps_sched_struct, 1); + if (!ps->sample->next) + return log_oom(); + + ps->sample->next->prev = ps->sample; + ps->sample = ps->sample->next; + ps->last = ps->sample; + ps->sample->runtime = atoll(rt); + ps->sample->waittime = atoll(wt); + ps->sample->sampledata = sampledata; + ps->sample->ps_new = ps; + if (ps_prev) + ps_prev->cross = ps->sample; + + ps_prev = ps->sample; + ps->total = (ps->last->runtime - ps->first->runtime) + / 1000000000.0; + + /* Take into account CPU runtime/waittime spent in non-main threads of the process + * by parsing "/proc/[pid]/task/[tid]/schedstat" for all [tid] != [pid] + * See https://github.com/systemd/systemd/issues/139 + */ + + /* Browse directory "/proc/[pid]/task" to know the thread ids of process [pid] */ + snprintf(filename, sizeof(filename), PID_FMT "/task", pid); + taskfd = openat(procfd, filename, O_RDONLY|O_DIRECTORY|O_CLOEXEC); + if (taskfd >= 0) { + _cleanup_closedir_ DIR *taskdir = NULL; + + taskdir = fdopendir(taskfd); + if (!taskdir) { + safe_close(taskfd); + return -errno; + } + FOREACH_DIRENT(ent, taskdir, break) { + int tid = -1; + _cleanup_close_ int tid_schedstat = -1; + long long delta_rt; + long long delta_wt; + + if ((ent->d_name[0] < '0') || (ent->d_name[0] > '9')) + continue; + + /* Skip main thread as it was already accounted */ + r = safe_atoi(ent->d_name, &tid); + if (r < 0 || tid == pid) + continue; + + /* Parse "/proc/[pid]/task/[tid]/schedstat" */ + snprintf(filename, sizeof(filename), PID_FMT "/schedstat", tid); + tid_schedstat = openat(taskfd, filename, O_RDONLY|O_CLOEXEC); + + if (tid_schedstat == -1) + continue; + + s = pread(tid_schedstat, buf, sizeof(buf) - 1, 0); + if (s <= 0) + continue; + buf[s] = '\0'; + + if (!sscanf(buf, "%s %s %*s", rt, wt)) + continue; + + r = safe_atolli(rt, &delta_rt); + if (r < 0) + continue; + r = safe_atolli(rt, &delta_wt); + if (r < 0) + continue; + ps->sample->runtime += delta_rt; + ps->sample->waittime += delta_wt; + } + } + + if (!arg_pss) + goto catch_rename; + + /* Pss */ + if (!ps->smaps) { + sprintf(filename, "%d/smaps", pid); + fd = openat(procfd, filename, O_RDONLY|O_CLOEXEC); + if (fd < 0) + continue; + ps->smaps = fdopen(fd, "re"); + if (!ps->smaps) { + close(fd); + continue; + } + setvbuf(ps->smaps, smaps_buf, _IOFBF, sizeof(smaps_buf)); + } else { + rewind(ps->smaps); + } + + /* test to see if we need to skip another field */ + if (skip == 0) { + if (fgets(buf, sizeof(buf), ps->smaps) == NULL) { + continue; + } + if (fread(buf, 1, 28 * 15, ps->smaps) != (28 * 15)) { + continue; + } + if (buf[392] == 'V') { + skip = 2; + } + else { + skip = 1; + } + rewind(ps->smaps); + } + + while (1) { + int pss_kb; + + /* skip one line, this contains the object mapped. */ + if (fgets(buf, sizeof(buf), ps->smaps) == NULL) { + break; + } + /* then there's a 28 char 14 line block */ + if (fread(buf, 1, 28 * 14, ps->smaps) != 28 * 14) { + break; + } + pss_kb = atoi(&buf[61]); + ps->sample->pss += pss_kb; + + /* skip one more line if this is a newer kernel */ + if (skip == 2) { + if (fgets(buf, sizeof(buf), ps->smaps) == NULL) + break; + } + } + + if (ps->sample->pss > ps->pss_max) + ps->pss_max = ps->sample->pss; + +catch_rename: + /* catch process rename, try to randomize time */ + mod = (arg_hz < 4.0) ? 4.0 : (arg_hz / 4.0); + if (((sample - ps->pid) + pid) % (int)(mod) == 0) { + + /* re-fetch name */ + /* get name, start time */ + if (ps->sched < 0) { + sprintf(filename, "%d/sched", pid); + ps->sched = openat(procfd, filename, O_RDONLY|O_CLOEXEC); + if (ps->sched < 0) + continue; + } + + s = pread(ps->sched, buf, sizeof(buf) - 1, 0); + if (s <= 0) { + /* clean up file descriptors */ + ps->sched = safe_close(ps->sched); + ps->schedstat = safe_close(ps->schedstat); + continue; + } + + buf[s] = '\0'; + + if (!sscanf(buf, "%s %*s %*s", key)) + continue; + + strscpy(ps->name, sizeof(ps->name), key); + + /* cmdline */ + if (arg_show_cmdline) + pid_cmdline_strscpy(procfd, ps->name, sizeof(ps->name), pid); + } + } + + return 0; +} |