diff options
Diffstat (limited to 'fs/proc')
-rw-r--r-- | fs/proc/Kconfig | 10 | ||||
-rw-r--r-- | fs/proc/array.c | 12 | ||||
-rw-r--r-- | fs/proc/base.c | 231 | ||||
-rw-r--r-- | fs/proc/inode.c | 9 | ||||
-rw-r--r-- | fs/proc/kcore.c | 4 | ||||
-rw-r--r-- | fs/proc/namespaces.c | 4 | ||||
-rw-r--r-- | fs/proc/nommu.c | 7 | ||||
-rw-r--r-- | fs/proc/self.c | 24 | ||||
-rw-r--r-- | fs/proc/task_mmu.c | 11 | ||||
-rw-r--r-- | fs/proc/task_nommu.c | 7 | ||||
-rw-r--r-- | fs/proc/thread_self.c | 22 |
11 files changed, 270 insertions, 71 deletions
diff --git a/fs/proc/Kconfig b/fs/proc/Kconfig index 2183fcf41..1ade1206b 100644 --- a/fs/proc/Kconfig +++ b/fs/proc/Kconfig @@ -71,3 +71,13 @@ config PROC_PAGE_MONITOR /proc/pid/smaps, /proc/pid/clear_refs, /proc/pid/pagemap, /proc/kpagecount, and /proc/kpageflags. Disabling these interfaces will reduce the size of the kernel by approximately 4kb. + +config PROC_CHILDREN + bool "Include /proc/<pid>/task/<tid>/children file" + default n + help + Provides a fast way to retrieve first level children pids of a task. See + <file:Documentation/filesystems/proc.txt> for more information. + + Say Y if you are running any user-space software which takes benefit from + this interface. For example, rkt is such a piece of software. diff --git a/fs/proc/array.c b/fs/proc/array.c index fd02a9ebf..ce065cf31 100644 --- a/fs/proc/array.c +++ b/fs/proc/array.c @@ -126,6 +126,14 @@ static inline const char *get_task_state(struct task_struct *tsk) { unsigned int state = (tsk->state | tsk->exit_state) & TASK_REPORT; + /* + * Parked tasks do not run; they sit in __kthread_parkme(). + * Without this check, we would report them as running, which is + * clearly wrong, so we report them as sleeping instead. + */ + if (tsk->state == TASK_PARKED) + state = TASK_INTERRUPTIBLE; + BUILD_BUG_ON(1 + ilog2(TASK_REPORT) != ARRAY_SIZE(task_state_array)-1); return task_state_array[fls(state)]; @@ -569,7 +577,7 @@ int proc_pid_statm(struct seq_file *m, struct pid_namespace *ns, return 0; } -#ifdef CONFIG_CHECKPOINT_RESTORE +#ifdef CONFIG_PROC_CHILDREN static struct pid * get_children_pid(struct inode *inode, struct pid *pid_prev, loff_t pos) { @@ -692,4 +700,4 @@ const struct file_operations proc_tid_children_operations = { .llseek = seq_lseek, .release = children_seq_release, }; -#endif /* CONFIG_CHECKPOINT_RESTORE */ +#endif /* CONFIG_PROC_CHILDREN */ diff --git a/fs/proc/base.c b/fs/proc/base.c index c439a9dcc..aa50d1ac2 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -196,18 +196,210 @@ static int proc_root_link(struct dentry *dentry, struct path *path) return result; } -static int proc_pid_cmdline(struct seq_file *m, struct pid_namespace *ns, - struct pid *pid, struct task_struct *task) +static ssize_t proc_pid_cmdline_read(struct file *file, char __user *buf, + size_t _count, loff_t *pos) { + struct task_struct *tsk; + struct mm_struct *mm; + char *page; + unsigned long count = _count; + unsigned long arg_start, arg_end, env_start, env_end; + unsigned long len1, len2, len; + unsigned long p; + char c; + ssize_t rv; + + BUG_ON(*pos < 0); + + tsk = get_proc_task(file_inode(file)); + if (!tsk) + return -ESRCH; + mm = get_task_mm(tsk); + put_task_struct(tsk); + if (!mm) + return 0; + /* Check if process spawned far enough to have cmdline. */ + if (!mm->env_end) { + rv = 0; + goto out_mmput; + } + + page = (char *)__get_free_page(GFP_TEMPORARY); + if (!page) { + rv = -ENOMEM; + goto out_mmput; + } + + down_read(&mm->mmap_sem); + arg_start = mm->arg_start; + arg_end = mm->arg_end; + env_start = mm->env_start; + env_end = mm->env_end; + up_read(&mm->mmap_sem); + + BUG_ON(arg_start > arg_end); + BUG_ON(env_start > env_end); + + len1 = arg_end - arg_start; + len2 = env_end - env_start; + + /* Empty ARGV. */ + if (len1 == 0) { + rv = 0; + goto out_free_page; + } /* - * Rely on struct seq_operations::show() being called once - * per internal buffer allocation. See single_open(), traverse(). + * Inherently racy -- command line shares address space + * with code and data. */ - BUG_ON(m->size < PAGE_SIZE); - m->count += get_cmdline(task, m->buf, PAGE_SIZE); - return 0; + rv = access_remote_vm(mm, arg_end - 1, &c, 1, 0); + if (rv <= 0) + goto out_free_page; + + rv = 0; + + if (c == '\0') { + /* Command line (set of strings) occupies whole ARGV. */ + if (len1 <= *pos) + goto out_free_page; + + p = arg_start + *pos; + len = len1 - *pos; + while (count > 0 && len > 0) { + unsigned int _count; + int nr_read; + + _count = min3(count, len, PAGE_SIZE); + nr_read = access_remote_vm(mm, p, page, _count, 0); + if (nr_read < 0) + rv = nr_read; + if (nr_read <= 0) + goto out_free_page; + + if (copy_to_user(buf, page, nr_read)) { + rv = -EFAULT; + goto out_free_page; + } + + p += nr_read; + len -= nr_read; + buf += nr_read; + count -= nr_read; + rv += nr_read; + } + } else { + /* + * Command line (1 string) occupies ARGV and maybe + * extends into ENVP. + */ + if (len1 + len2 <= *pos) + goto skip_argv_envp; + if (len1 <= *pos) + goto skip_argv; + + p = arg_start + *pos; + len = len1 - *pos; + while (count > 0 && len > 0) { + unsigned int _count, l; + int nr_read; + bool final; + + _count = min3(count, len, PAGE_SIZE); + nr_read = access_remote_vm(mm, p, page, _count, 0); + if (nr_read < 0) + rv = nr_read; + if (nr_read <= 0) + goto out_free_page; + + /* + * Command line can be shorter than whole ARGV + * even if last "marker" byte says it is not. + */ + final = false; + l = strnlen(page, nr_read); + if (l < nr_read) { + nr_read = l; + final = true; + } + + if (copy_to_user(buf, page, nr_read)) { + rv = -EFAULT; + goto out_free_page; + } + + p += nr_read; + len -= nr_read; + buf += nr_read; + count -= nr_read; + rv += nr_read; + + if (final) + goto out_free_page; + } +skip_argv: + /* + * Command line (1 string) occupies ARGV and + * extends into ENVP. + */ + if (len1 <= *pos) { + p = env_start + *pos - len1; + len = len1 + len2 - *pos; + } else { + p = env_start; + len = len2; + } + while (count > 0 && len > 0) { + unsigned int _count, l; + int nr_read; + bool final; + + _count = min3(count, len, PAGE_SIZE); + nr_read = access_remote_vm(mm, p, page, _count, 0); + if (nr_read < 0) + rv = nr_read; + if (nr_read <= 0) + goto out_free_page; + + /* Find EOS. */ + final = false; + l = strnlen(page, nr_read); + if (l < nr_read) { + nr_read = l; + final = true; + } + + if (copy_to_user(buf, page, nr_read)) { + rv = -EFAULT; + goto out_free_page; + } + + p += nr_read; + len -= nr_read; + buf += nr_read; + count -= nr_read; + rv += nr_read; + + if (final) + goto out_free_page; + } +skip_argv_envp: + ; + } + +out_free_page: + free_page((unsigned long)page); +out_mmput: + mmput(mm); + if (rv > 0) + *pos += rv; + return rv; } +static const struct file_operations proc_pid_cmdline_ops = { + .read = proc_pid_cmdline_read, + .llseek = generic_file_llseek, +}; + static int proc_pid_auxv(struct seq_file *m, struct pid_namespace *ns, struct pid *pid, struct task_struct *task) { @@ -304,15 +496,18 @@ static int proc_pid_stack(struct seq_file *m, struct pid_namespace *ns, } #endif -#ifdef CONFIG_SCHEDSTATS +#ifdef CONFIG_SCHED_INFO /* * Provides /proc/PID/schedstat */ static int proc_pid_schedstat(struct seq_file *m, struct pid_namespace *ns, struct pid *pid, struct task_struct *task) { - seq_printf(m, "%llu %llu %lu\n", - (unsigned long long)tsk_seruntime(task), + if (unlikely(!sched_info_on())) + seq_printf(m, "0 0 0\n"); + else + seq_printf(m, "%llu %llu %lu\n", + (unsigned long long)task->se.sum_exec_runtime, (unsigned long long)task->sched_info.run_delay, task->sched_info.pcount); @@ -1380,7 +1575,7 @@ static int proc_exe_link(struct dentry *dentry, struct path *exe_path) return -ENOENT; } -static void *proc_pid_follow_link(struct dentry *dentry, struct nameidata *nd) +static const char *proc_pid_follow_link(struct dentry *dentry, void **cookie) { struct inode *inode = d_inode(dentry); struct path path; @@ -1394,7 +1589,7 @@ static void *proc_pid_follow_link(struct dentry *dentry, struct nameidata *nd) if (error) goto out; - nd_jump_link(nd, &path); + nd_jump_link(&path); return NULL; out: return ERR_PTR(error); @@ -1744,7 +1939,7 @@ static int proc_map_files_get_link(struct dentry *dentry, struct path *path) down_read(&mm->mmap_sem); vma = find_exact_vma(mm, vm_start, vm_end); if (vma && vma->vm_file) { - *path = vma_pr_or_file(vma)->f_path; + *path = vma->vm_file->f_path; path_get(path); rc = 0; } @@ -2572,7 +2767,7 @@ static const struct pid_entry tgid_base_stuff[] = { #ifdef CONFIG_HAVE_ARCH_TRACEHOOK ONE("syscall", S_IRUSR, proc_pid_syscall), #endif - ONE("cmdline", S_IRUGO, proc_pid_cmdline), + REG("cmdline", S_IRUGO, proc_pid_cmdline_ops), ONE("stat", S_IRUGO, proc_tgid_stat), ONE("statm", S_IRUGO, proc_pid_statm), REG("maps", S_IRUGO, proc_pid_maps_operations), @@ -2600,7 +2795,7 @@ static const struct pid_entry tgid_base_stuff[] = { #ifdef CONFIG_STACKTRACE ONE("stack", S_IRUSR, proc_pid_stack), #endif -#ifdef CONFIG_SCHEDSTATS +#ifdef CONFIG_SCHED_INFO ONE("schedstat", S_IRUGO, proc_pid_schedstat), #endif #ifdef CONFIG_LATENCYTOP @@ -2918,11 +3113,11 @@ static const struct pid_entry tid_base_stuff[] = { #ifdef CONFIG_HAVE_ARCH_TRACEHOOK ONE("syscall", S_IRUSR, proc_pid_syscall), #endif - ONE("cmdline", S_IRUGO, proc_pid_cmdline), + REG("cmdline", S_IRUGO, proc_pid_cmdline_ops), ONE("stat", S_IRUGO, proc_tid_stat), ONE("statm", S_IRUGO, proc_pid_statm), REG("maps", S_IRUGO, proc_tid_maps_operations), -#ifdef CONFIG_CHECKPOINT_RESTORE +#ifdef CONFIG_PROC_CHILDREN REG("children", S_IRUGO, proc_tid_children_operations), #endif #ifdef CONFIG_NUMA @@ -2948,7 +3143,7 @@ static const struct pid_entry tid_base_stuff[] = { #ifdef CONFIG_STACKTRACE ONE("stack", S_IRUSR, proc_pid_stack), #endif -#ifdef CONFIG_SCHEDSTATS +#ifdef CONFIG_SCHED_INFO ONE("schedstat", S_IRUGO, proc_pid_schedstat), #endif #ifdef CONFIG_LATENCYTOP diff --git a/fs/proc/inode.c b/fs/proc/inode.c index e3eb55246..bd95b9fde 100644 --- a/fs/proc/inode.c +++ b/fs/proc/inode.c @@ -23,7 +23,6 @@ #include <linux/slab.h> #include <linux/mount.h> #include <linux/magic.h> -#include <linux/namei.h> #include <asm/uaccess.h> @@ -394,16 +393,16 @@ static const struct file_operations proc_reg_file_ops_no_compat = { }; #endif -static void *proc_follow_link(struct dentry *dentry, struct nameidata *nd) +static const char *proc_follow_link(struct dentry *dentry, void **cookie) { struct proc_dir_entry *pde = PDE(d_inode(dentry)); if (unlikely(!use_pde(pde))) return ERR_PTR(-EINVAL); - nd_set_link(nd, pde->data); - return pde; + *cookie = pde; + return pde->data; } -static void proc_put_link(struct dentry *dentry, struct nameidata *nd, void *p) +static void proc_put_link(struct inode *unused, void *p) { unuse_pde(p); } diff --git a/fs/proc/kcore.c b/fs/proc/kcore.c index 91a4e6426..92e6726f6 100644 --- a/fs/proc/kcore.c +++ b/fs/proc/kcore.c @@ -92,7 +92,7 @@ static size_t get_kcore_size(int *nphdr, size_t *elf_buflen) roundup(sizeof(CORE_STR), 4)) + roundup(sizeof(struct elf_prstatus), 4) + roundup(sizeof(struct elf_prpsinfo), 4) + - roundup(sizeof(struct task_struct), 4); + roundup(arch_task_struct_size, 4); *elf_buflen = PAGE_ALIGN(*elf_buflen); return size + *elf_buflen; } @@ -415,7 +415,7 @@ static void elf_kcore_store_hdr(char *bufp, int nphdr, int dataoff) /* set up the task structure */ notes[2].name = CORE_STR; notes[2].type = NT_TASKSTRUCT; - notes[2].datasz = sizeof(struct task_struct); + notes[2].datasz = arch_task_struct_size; notes[2].data = current; nhdr->p_filesz += notesize(¬es[2]); diff --git a/fs/proc/namespaces.c b/fs/proc/namespaces.c index e512642db..f6e8354b8 100644 --- a/fs/proc/namespaces.c +++ b/fs/proc/namespaces.c @@ -30,7 +30,7 @@ static const struct proc_ns_operations *ns_entries[] = { &mntns_operations, }; -static void *proc_ns_follow_link(struct dentry *dentry, struct nameidata *nd) +static const char *proc_ns_follow_link(struct dentry *dentry, void **cookie) { struct inode *inode = d_inode(dentry); const struct proc_ns_operations *ns_ops = PROC_I(inode)->ns_ops; @@ -45,7 +45,7 @@ static void *proc_ns_follow_link(struct dentry *dentry, struct nameidata *nd) if (ptrace_may_access(task, PTRACE_MODE_READ)) { error = ns_get_path(&ns_path, task, ns_ops); if (!error) - nd_jump_link(nd, &ns_path); + nd_jump_link(&ns_path); } put_task_struct(task); return error; diff --git a/fs/proc/nommu.c b/fs/proc/nommu.c index 139718132..f8595e8b5 100644 --- a/fs/proc/nommu.c +++ b/fs/proc/nommu.c @@ -45,10 +45,7 @@ static int nommu_region_show(struct seq_file *m, struct vm_region *region) file = region->vm_file; if (file) { - struct inode *inode; - - file = vmr_pr_or_file(region); - inode = file_inode(file); + struct inode *inode = file_inode(region->vm_file); dev = inode->i_sb->s_dev; ino = inode->i_ino; } @@ -67,7 +64,7 @@ static int nommu_region_show(struct seq_file *m, struct vm_region *region) if (file) { seq_pad(m, ' '); - seq_path(m, &file->f_path, ""); + seq_file_path(m, file, ""); } seq_putc(m, '\n'); diff --git a/fs/proc/self.c b/fs/proc/self.c index 6195b4a7c..113b8d061 100644 --- a/fs/proc/self.c +++ b/fs/proc/self.c @@ -1,5 +1,4 @@ #include <linux/sched.h> -#include <linux/namei.h> #include <linux/slab.h> #include <linux/pid_namespace.h> #include "internal.h" @@ -19,21 +18,20 @@ static int proc_self_readlink(struct dentry *dentry, char __user *buffer, return readlink_copy(buffer, buflen, tmp); } -static void *proc_self_follow_link(struct dentry *dentry, struct nameidata *nd) +static const char *proc_self_follow_link(struct dentry *dentry, void **cookie) { struct pid_namespace *ns = dentry->d_sb->s_fs_info; pid_t tgid = task_tgid_nr_ns(current, ns); - char *name = ERR_PTR(-ENOENT); - if (tgid) { - /* 11 for max length of signed int in decimal + NULL term */ - name = kmalloc(12, GFP_KERNEL); - if (!name) - name = ERR_PTR(-ENOMEM); - else - sprintf(name, "%d", tgid); - } - nd_set_link(nd, name); - return NULL; + char *name; + + if (!tgid) + return ERR_PTR(-ENOENT); + /* 11 for max length of signed int in decimal + NULL term */ + name = kmalloc(12, GFP_KERNEL); + if (!name) + return ERR_PTR(-ENOMEM); + sprintf(name, "%d", tgid); + return *cookie = name; } static const struct inode_operations proc_self_inode_operations = { diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index 9afa35d04..ca1e09188 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c @@ -279,10 +279,7 @@ show_map_vma(struct seq_file *m, struct vm_area_struct *vma, int is_pid) const char *name = NULL; if (file) { - struct inode *inode; - - file = vma_pr_or_file(vma); - inode = file_inode(file); + struct inode *inode = file_inode(vma->vm_file); dev = inode->i_sb->s_dev; ino = inode->i_ino; pgoff = ((loff_t)vma->vm_pgoff) << PAGE_SHIFT; @@ -313,7 +310,7 @@ show_map_vma(struct seq_file *m, struct vm_area_struct *vma, int is_pid) */ if (file) { seq_pad(m, ' '); - seq_path(m, &file->f_path, "\n"); + seq_file_path(m, file, "\n"); goto done; } @@ -1482,7 +1479,7 @@ static int show_numa_map(struct seq_file *m, void *v, int is_pid) struct proc_maps_private *proc_priv = &numa_priv->proc_maps; struct vm_area_struct *vma = v; struct numa_maps *md = &numa_priv->md; - struct file *file = vma_pr_or_file(vma); + struct file *file = vma->vm_file; struct mm_struct *mm = vma->vm_mm; struct mm_walk walk = { .hugetlb_entry = gather_hugetlb_stats, @@ -1512,7 +1509,7 @@ static int show_numa_map(struct seq_file *m, void *v, int is_pid) if (file) { seq_puts(m, " file="); - seq_path(m, &file->f_path, "\n\t= "); + seq_file_path(m, file, "\n\t= "); } else if (vma->vm_start <= mm->brk && vma->vm_end >= mm->start_brk) { seq_puts(m, " heap"); } else { diff --git a/fs/proc/task_nommu.c b/fs/proc/task_nommu.c index 174020784..e0d64c92e 100644 --- a/fs/proc/task_nommu.c +++ b/fs/proc/task_nommu.c @@ -160,10 +160,7 @@ static int nommu_vma_show(struct seq_file *m, struct vm_area_struct *vma, file = vma->vm_file; if (file) { - struct inode *inode; - - file = vma_pr_or_file(vma); - inode = file_inode(file); + struct inode *inode = file_inode(vma->vm_file); dev = inode->i_sb->s_dev; ino = inode->i_ino; pgoff = (loff_t)vma->vm_pgoff << PAGE_SHIFT; @@ -183,7 +180,7 @@ static int nommu_vma_show(struct seq_file *m, struct vm_area_struct *vma, if (file) { seq_pad(m, ' '); - seq_path(m, &file->f_path, ""); + seq_file_path(m, file, ""); } else if (mm) { pid_t tid = pid_of_stack(priv, vma, is_pid); diff --git a/fs/proc/thread_self.c b/fs/proc/thread_self.c index a8371993b..947b0f4fd 100644 --- a/fs/proc/thread_self.c +++ b/fs/proc/thread_self.c @@ -1,5 +1,4 @@ #include <linux/sched.h> -#include <linux/namei.h> #include <linux/slab.h> #include <linux/pid_namespace.h> #include "internal.h" @@ -20,21 +19,20 @@ static int proc_thread_self_readlink(struct dentry *dentry, char __user *buffer, return readlink_copy(buffer, buflen, tmp); } -static void *proc_thread_self_follow_link(struct dentry *dentry, struct nameidata *nd) +static const char *proc_thread_self_follow_link(struct dentry *dentry, void **cookie) { struct pid_namespace *ns = dentry->d_sb->s_fs_info; pid_t tgid = task_tgid_nr_ns(current, ns); pid_t pid = task_pid_nr_ns(current, ns); - char *name = ERR_PTR(-ENOENT); - if (pid) { - name = kmalloc(PROC_NUMBUF + 6 + PROC_NUMBUF, GFP_KERNEL); - if (!name) - name = ERR_PTR(-ENOMEM); - else - sprintf(name, "%d/task/%d", tgid, pid); - } - nd_set_link(nd, name); - return NULL; + char *name; + + if (!pid) + return ERR_PTR(-ENOENT); + name = kmalloc(PROC_NUMBUF + 6 + PROC_NUMBUF, GFP_KERNEL); + if (!name) + return ERR_PTR(-ENOMEM); + sprintf(name, "%d/task/%d", tgid, pid); + return *cookie = name; } static const struct inode_operations proc_thread_self_inode_operations = { |