diff options
Diffstat (limited to 'kernel/trace')
-rw-r--r-- | kernel/trace/blktrace.c | 12 | ||||
-rw-r--r-- | kernel/trace/bpf_trace.c | 16 | ||||
-rw-r--r-- | kernel/trace/ftrace.c | 449 | ||||
-rw-r--r-- | kernel/trace/ring_buffer.c | 57 | ||||
-rw-r--r-- | kernel/trace/trace.h | 6 | ||||
-rw-r--r-- | kernel/trace/trace_event_perf.c | 2 | ||||
-rw-r--r-- | kernel/trace/trace_events.c | 28 | ||||
-rw-r--r-- | kernel/trace/trace_events_trigger.c | 25 | ||||
-rw-r--r-- | kernel/trace/trace_stack.c | 6 |
9 files changed, 313 insertions, 288 deletions
diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c index a990824c8..2aeb6ffc0 100644 --- a/kernel/trace/blktrace.c +++ b/kernel/trace/blktrace.c @@ -349,16 +349,10 @@ static ssize_t blk_msg_write(struct file *filp, const char __user *buffer, if (count >= BLK_TN_MAX_MSG) return -EINVAL; - msg = kmalloc(count + 1, GFP_KERNEL); - if (msg == NULL) - return -ENOMEM; - - if (copy_from_user(msg, buffer, count)) { - kfree(msg); - return -EFAULT; - } + msg = memdup_user_nul(buffer, count); + if (IS_ERR(msg)) + return PTR_ERR(msg); - msg[count] = '\0'; bt = filp->private_data; __trace_note_message(bt, "%s", msg); kfree(msg); diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c index 4228fd368..326a75e88 100644 --- a/kernel/trace/bpf_trace.c +++ b/kernel/trace/bpf_trace.c @@ -191,14 +191,17 @@ static u64 bpf_perf_event_read(u64 r1, u64 index, u64 r3, u64 r4, u64 r5) struct bpf_map *map = (struct bpf_map *) (unsigned long) r1; struct bpf_array *array = container_of(map, struct bpf_array, map); struct perf_event *event; + struct file *file; if (unlikely(index >= array->map.max_entries)) return -E2BIG; - event = (struct perf_event *)array->ptrs[index]; - if (!event) + file = (struct file *)array->ptrs[index]; + if (unlikely(!file)) return -ENOENT; + event = file->private_data; + /* make sure event is local and doesn't have pmu::count */ if (event->oncpu != smp_processor_id() || event->pmu->count) @@ -228,6 +231,7 @@ static u64 bpf_perf_event_output(u64 r1, u64 r2, u64 index, u64 r4, u64 size) void *data = (void *) (long) r4; struct perf_sample_data sample_data; struct perf_event *event; + struct file *file; struct perf_raw_record raw = { .size = size, .data = data, @@ -236,10 +240,12 @@ static u64 bpf_perf_event_output(u64 r1, u64 r2, u64 index, u64 r4, u64 size) if (unlikely(index >= array->map.max_entries)) return -E2BIG; - event = (struct perf_event *)array->ptrs[index]; - if (unlikely(!event)) + file = (struct file *)array->ptrs[index]; + if (unlikely(!file)) return -ENOENT; + event = file->private_data; + if (unlikely(event->attr.type != PERF_TYPE_SOFTWARE || event->attr.config != PERF_COUNT_SW_BPF_OUTPUT)) return -EINVAL; @@ -316,7 +322,7 @@ static bool kprobe_prog_is_valid_access(int off, int size, enum bpf_access_type return true; } -static struct bpf_verifier_ops kprobe_prog_ops = { +static const struct bpf_verifier_ops kprobe_prog_ops = { .get_func_proto = kprobe_prog_func_proto, .is_valid_access = kprobe_prog_is_valid_access, }; diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 3f743b147..57a6eea84 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -62,8 +62,6 @@ #define FTRACE_HASH_DEFAULT_BITS 10 #define FTRACE_HASH_MAX_BITS 12 -#define FL_GLOBAL_CONTROL_MASK (FTRACE_OPS_FL_CONTROL) - #ifdef CONFIG_DYNAMIC_FTRACE #define INIT_OPS_HASH(opsname) \ .func_hash = &opsname.local_hash, \ @@ -113,14 +111,9 @@ static int ftrace_disabled __read_mostly; static DEFINE_MUTEX(ftrace_lock); -static struct ftrace_ops *ftrace_control_list __read_mostly = &ftrace_list_end; static struct ftrace_ops *ftrace_ops_list __read_mostly = &ftrace_list_end; ftrace_func_t ftrace_trace_function __read_mostly = ftrace_stub; static struct ftrace_ops global_ops; -static struct ftrace_ops control_ops; - -static void ftrace_ops_recurs_func(unsigned long ip, unsigned long parent_ip, - struct ftrace_ops *op, struct pt_regs *regs); #if ARCH_SUPPORTS_FTRACE_OPS static void ftrace_ops_list_func(unsigned long ip, unsigned long parent_ip, @@ -203,7 +196,7 @@ void clear_ftrace_function(void) ftrace_trace_function = ftrace_stub; } -static void control_ops_disable_all(struct ftrace_ops *ops) +static void per_cpu_ops_disable_all(struct ftrace_ops *ops) { int cpu; @@ -211,16 +204,19 @@ static void control_ops_disable_all(struct ftrace_ops *ops) *per_cpu_ptr(ops->disabled, cpu) = 1; } -static int control_ops_alloc(struct ftrace_ops *ops) +static int per_cpu_ops_alloc(struct ftrace_ops *ops) { int __percpu *disabled; + if (WARN_ON_ONCE(!(ops->flags & FTRACE_OPS_FL_PER_CPU))) + return -EINVAL; + disabled = alloc_percpu(int); if (!disabled) return -ENOMEM; ops->disabled = disabled; - control_ops_disable_all(ops); + per_cpu_ops_disable_all(ops); return 0; } @@ -256,10 +252,11 @@ static inline void update_function_graph_func(void) { } static ftrace_func_t ftrace_ops_get_list_func(struct ftrace_ops *ops) { /* - * If this is a dynamic ops or we force list func, + * If this is a dynamic, RCU, or per CPU ops, or we force list func, * then it needs to call the list anyway. */ - if (ops->flags & FTRACE_OPS_FL_DYNAMIC || FTRACE_FORCE_LIST_FUNC) + if (ops->flags & (FTRACE_OPS_FL_DYNAMIC | FTRACE_OPS_FL_PER_CPU | + FTRACE_OPS_FL_RCU) || FTRACE_FORCE_LIST_FUNC) return ftrace_ops_list_func; return ftrace_ops_get_func(ops); @@ -383,26 +380,6 @@ static int remove_ftrace_ops(struct ftrace_ops **list, struct ftrace_ops *ops) return 0; } -static void add_ftrace_list_ops(struct ftrace_ops **list, - struct ftrace_ops *main_ops, - struct ftrace_ops *ops) -{ - int first = *list == &ftrace_list_end; - add_ftrace_ops(list, ops); - if (first) - add_ftrace_ops(&ftrace_ops_list, main_ops); -} - -static int remove_ftrace_list_ops(struct ftrace_ops **list, - struct ftrace_ops *main_ops, - struct ftrace_ops *ops) -{ - int ret = remove_ftrace_ops(list, ops); - if (!ret && *list == &ftrace_list_end) - ret = remove_ftrace_ops(&ftrace_ops_list, main_ops); - return ret; -} - static void ftrace_update_trampoline(struct ftrace_ops *ops); static int __register_ftrace_function(struct ftrace_ops *ops) @@ -430,14 +407,12 @@ static int __register_ftrace_function(struct ftrace_ops *ops) if (!core_kernel_data((unsigned long)ops)) ops->flags |= FTRACE_OPS_FL_DYNAMIC; - if (ops->flags & FTRACE_OPS_FL_CONTROL) { - if (control_ops_alloc(ops)) + if (ops->flags & FTRACE_OPS_FL_PER_CPU) { + if (per_cpu_ops_alloc(ops)) return -ENOMEM; - add_ftrace_list_ops(&ftrace_control_list, &control_ops, ops); - /* The control_ops needs the trampoline update */ - ops = &control_ops; - } else - add_ftrace_ops(&ftrace_ops_list, ops); + } + + add_ftrace_ops(&ftrace_ops_list, ops); /* Always save the function, and reset at unregistering */ ops->saved_func = ops->func; @@ -460,11 +435,7 @@ static int __unregister_ftrace_function(struct ftrace_ops *ops) if (WARN_ON(!(ops->flags & FTRACE_OPS_FL_ENABLED))) return -EBUSY; - if (ops->flags & FTRACE_OPS_FL_CONTROL) { - ret = remove_ftrace_list_ops(&ftrace_control_list, - &control_ops, ops); - } else - ret = remove_ftrace_ops(&ftrace_ops_list, ops); + ret = remove_ftrace_ops(&ftrace_ops_list, ops); if (ret < 0) return ret; @@ -1687,6 +1658,9 @@ static void __ftrace_hash_rec_update(struct ftrace_ops *ops, int in_hash = 0; int match = 0; + if (rec->flags & FTRACE_FL_DISABLED) + continue; + if (all) { /* * Only the filter_hash affects all records. @@ -1940,7 +1914,7 @@ static int ftrace_hash_ipmodify_update(struct ftrace_ops *ops, return __ftrace_hash_update_ipmodify(ops, old_hash, new_hash); } -static void print_ip_ins(const char *fmt, unsigned char *p) +static void print_ip_ins(const char *fmt, const unsigned char *p) { int i; @@ -1952,6 +1926,31 @@ static void print_ip_ins(const char *fmt, unsigned char *p) static struct ftrace_ops * ftrace_find_tramp_ops_any(struct dyn_ftrace *rec); +static struct ftrace_ops * +ftrace_find_tramp_ops_next(struct dyn_ftrace *rec, struct ftrace_ops *ops); + +enum ftrace_bug_type ftrace_bug_type; +const void *ftrace_expected; + +static void print_bug_type(void) +{ + switch (ftrace_bug_type) { + case FTRACE_BUG_UNKNOWN: + break; + case FTRACE_BUG_INIT: + pr_info("Initializing ftrace call sites\n"); + break; + case FTRACE_BUG_NOP: + pr_info("Setting ftrace call site to NOP\n"); + break; + case FTRACE_BUG_CALL: + pr_info("Setting ftrace call site to call ftrace function\n"); + break; + case FTRACE_BUG_UPDATE: + pr_info("Updating ftrace call site to call a different ftrace function\n"); + break; + } +} /** * ftrace_bug - report and shutdown function tracer @@ -1979,8 +1978,12 @@ void ftrace_bug(int failed, struct dyn_ftrace *rec) FTRACE_WARN_ON_ONCE(1); pr_info("ftrace failed to modify "); print_ip_sym(ip); - print_ip_ins(" actual: ", (unsigned char *)ip); + print_ip_ins(" actual: ", (unsigned char *)ip); pr_cont("\n"); + if (ftrace_expected) { + print_ip_ins(" expected: ", ftrace_expected); + pr_cont("\n"); + } break; case -EPERM: FTRACE_WARN_ON_ONCE(1); @@ -1992,6 +1995,7 @@ void ftrace_bug(int failed, struct dyn_ftrace *rec) pr_info("ftrace faulted on unknown error "); print_ip_sym(ip); } + print_bug_type(); if (rec) { struct ftrace_ops *ops = NULL; @@ -2000,15 +2004,19 @@ void ftrace_bug(int failed, struct dyn_ftrace *rec) rec->flags & FTRACE_FL_REGS ? " R" : " "); if (rec->flags & FTRACE_FL_TRAMP_EN) { ops = ftrace_find_tramp_ops_any(rec); - if (ops) - pr_cont("\ttramp: %pS", - (void *)ops->trampoline); - else + if (ops) { + do { + pr_cont("\ttramp: %pS (%pS)", + (void *)ops->trampoline, + (void *)ops->func); + ops = ftrace_find_tramp_ops_next(rec, ops); + } while (ops); + } else pr_cont("\ttramp: ERROR!"); } ip = ftrace_get_addr_curr(rec); - pr_cont(" expected tramp: %lx\n", ip); + pr_cont("\n expected tramp: %lx\n", ip); } } @@ -2016,6 +2024,11 @@ static int ftrace_check_record(struct dyn_ftrace *rec, int enable, int update) { unsigned long flag = 0UL; + ftrace_bug_type = FTRACE_BUG_UNKNOWN; + + if (rec->flags & FTRACE_FL_DISABLED) + return FTRACE_UPDATE_IGNORE; + /* * If we are updating calls: * @@ -2077,9 +2090,12 @@ static int ftrace_check_record(struct dyn_ftrace *rec, int enable, int update) * from the save regs, to a non-save regs function or * vice versa, or from a trampoline call. */ - if (flag & FTRACE_FL_ENABLED) + if (flag & FTRACE_FL_ENABLED) { + ftrace_bug_type = FTRACE_BUG_CALL; return FTRACE_UPDATE_MAKE_CALL; + } + ftrace_bug_type = FTRACE_BUG_UPDATE; return FTRACE_UPDATE_MODIFY_CALL; } @@ -2096,6 +2112,7 @@ static int ftrace_check_record(struct dyn_ftrace *rec, int enable, int update) FTRACE_FL_REGS_EN); } + ftrace_bug_type = FTRACE_BUG_NOP; return FTRACE_UPDATE_MAKE_NOP; } @@ -2145,6 +2162,24 @@ ftrace_find_tramp_ops_any(struct dyn_ftrace *rec) } static struct ftrace_ops * +ftrace_find_tramp_ops_next(struct dyn_ftrace *rec, + struct ftrace_ops *op) +{ + unsigned long ip = rec->ip; + + while_for_each_ftrace_op(op) { + + if (!op->trampoline) + continue; + + if (hash_contains_ip(ip, op->func_hash)) + return op; + } + + return NULL; +} + +static struct ftrace_ops * ftrace_find_tramp_ops_curr(struct dyn_ftrace *rec) { struct ftrace_ops *op; @@ -2307,17 +2342,22 @@ __ftrace_replace_code(struct dyn_ftrace *rec, int enable) ret = ftrace_update_record(rec, enable); + ftrace_bug_type = FTRACE_BUG_UNKNOWN; + switch (ret) { case FTRACE_UPDATE_IGNORE: return 0; case FTRACE_UPDATE_MAKE_CALL: + ftrace_bug_type = FTRACE_BUG_CALL; return ftrace_make_call(rec, ftrace_addr); case FTRACE_UPDATE_MAKE_NOP: + ftrace_bug_type = FTRACE_BUG_NOP; return ftrace_make_nop(NULL, rec, ftrace_old_addr); case FTRACE_UPDATE_MODIFY_CALL: + ftrace_bug_type = FTRACE_BUG_UPDATE; return ftrace_modify_call(rec, ftrace_old_addr, ftrace_addr); } @@ -2425,6 +2465,7 @@ ftrace_code_disable(struct module *mod, struct dyn_ftrace *rec) ret = ftrace_make_nop(mod, rec, MCOUNT_ADDR); if (ret) { + ftrace_bug_type = FTRACE_BUG_INIT; ftrace_bug(ret, rec); return 0; } @@ -2566,7 +2607,7 @@ void __weak arch_ftrace_trampoline_free(struct ftrace_ops *ops) { } -static void control_ops_free(struct ftrace_ops *ops) +static void per_cpu_ops_free(struct ftrace_ops *ops) { free_percpu(ops->disabled); } @@ -2667,13 +2708,13 @@ static int ftrace_shutdown(struct ftrace_ops *ops, int command) if (!command || !ftrace_enabled) { /* - * If these are control ops, they still need their + * If these are per_cpu ops, they still need their * per_cpu field freed. Since, function tracing is * not currently active, we can just free them * without synchronizing all CPUs. */ - if (ops->flags & FTRACE_OPS_FL_CONTROL) - control_ops_free(ops); + if (ops->flags & FTRACE_OPS_FL_PER_CPU) + per_cpu_ops_free(ops); return 0; } @@ -2714,7 +2755,7 @@ static int ftrace_shutdown(struct ftrace_ops *ops, int command) /* * Dynamic ops may be freed, we must make sure that all * callers are done before leaving this function. - * The same goes for freeing the per_cpu data of the control + * The same goes for freeing the per_cpu data of the per_cpu * ops. * * Again, normal synchronize_sched() is not good enough. @@ -2725,13 +2766,13 @@ static int ftrace_shutdown(struct ftrace_ops *ops, int command) * infrastructure to do the synchronization, thus we must do it * ourselves. */ - if (ops->flags & (FTRACE_OPS_FL_DYNAMIC | FTRACE_OPS_FL_CONTROL)) { + if (ops->flags & (FTRACE_OPS_FL_DYNAMIC | FTRACE_OPS_FL_PER_CPU)) { schedule_on_each_cpu(ftrace_sync); arch_ftrace_trampoline_free(ops); - if (ops->flags & FTRACE_OPS_FL_CONTROL) - control_ops_free(ops); + if (ops->flags & FTRACE_OPS_FL_PER_CPU) + per_cpu_ops_free(ops); } return 0; @@ -2798,9 +2839,9 @@ ops_references_rec(struct ftrace_ops *ops, struct dyn_ftrace *rec) if (!(ops->flags & FTRACE_OPS_FL_ENABLED)) return 0; - /* If ops traces all mods, we already accounted for it */ + /* If ops traces all then it includes this function */ if (ops_traces_mod(ops)) - return 0; + return 1; /* The function must be in the filter */ if (!ftrace_hash_empty(ops->func_hash->filter_hash) && @@ -2814,64 +2855,41 @@ ops_references_rec(struct ftrace_ops *ops, struct dyn_ftrace *rec) return 1; } -static int referenced_filters(struct dyn_ftrace *rec) -{ - struct ftrace_ops *ops; - int cnt = 0; - - for (ops = ftrace_ops_list; ops != &ftrace_list_end; ops = ops->next) { - if (ops_references_rec(ops, rec)) - cnt++; - } - - return cnt; -} - static int ftrace_update_code(struct module *mod, struct ftrace_page *new_pgs) { struct ftrace_page *pg; struct dyn_ftrace *p; cycle_t start, stop; unsigned long update_cnt = 0; - unsigned long ref = 0; - bool test = false; + unsigned long rec_flags = 0; int i; + start = ftrace_now(raw_smp_processor_id()); + /* - * When adding a module, we need to check if tracers are - * currently enabled and if they are set to trace all functions. - * If they are, we need to enable the module functions as well - * as update the reference counts for those function records. + * When a module is loaded, this function is called to convert + * the calls to mcount in its text to nops, and also to create + * an entry in the ftrace data. Now, if ftrace is activated + * after this call, but before the module sets its text to + * read-only, the modification of enabling ftrace can fail if + * the read-only is done while ftrace is converting the calls. + * To prevent this, the module's records are set as disabled + * and will be enabled after the call to set the module's text + * to read-only. */ - if (mod) { - struct ftrace_ops *ops; - - for (ops = ftrace_ops_list; - ops != &ftrace_list_end; ops = ops->next) { - if (ops->flags & FTRACE_OPS_FL_ENABLED) { - if (ops_traces_mod(ops)) - ref++; - else - test = true; - } - } - } - - start = ftrace_now(raw_smp_processor_id()); + if (mod) + rec_flags |= FTRACE_FL_DISABLED; for (pg = new_pgs; pg; pg = pg->next) { for (i = 0; i < pg->index; i++) { - int cnt = ref; /* If something went wrong, bail without enabling anything */ if (unlikely(ftrace_disabled)) return -1; p = &pg->records[i]; - if (test) - cnt += referenced_filters(p); - p->flags = cnt; + p->flags = rec_flags; /* * Do the initial record conversion from mcount jump @@ -2881,21 +2899,6 @@ static int ftrace_update_code(struct module *mod, struct ftrace_page *new_pgs) break; update_cnt++; - - /* - * If the tracing is enabled, go ahead and enable the record. - * - * The reason not to enable the record immediatelly is the - * inherent check of ftrace_make_nop/ftrace_make_call for - * correct previous instructions. Making first the NOP - * conversion puts the module to the correct state, thus - * passing the ftrace_make_call check. - */ - if (ftrace_start_up && cnt) { - int failed = __ftrace_replace_code(p, 1); - if (failed) - ftrace_bug(failed, p); - } } } @@ -3258,7 +3261,7 @@ static int t_show(struct seq_file *m, void *v) seq_printf(m, "%ps", (void *)rec->ip); if (iter->flags & FTRACE_ITER_ENABLED) { - struct ftrace_ops *ops = NULL; + struct ftrace_ops *ops; seq_printf(m, " (%ld)%s%s", ftrace_rec_count(rec), @@ -3266,14 +3269,19 @@ static int t_show(struct seq_file *m, void *v) rec->flags & FTRACE_FL_IPMODIFY ? " I" : " "); if (rec->flags & FTRACE_FL_TRAMP_EN) { ops = ftrace_find_tramp_ops_any(rec); - if (ops) - seq_printf(m, "\ttramp: %pS", - (void *)ops->trampoline); - else + if (ops) { + do { + seq_printf(m, "\ttramp: %pS (%pS)", + (void *)ops->trampoline, + (void *)ops->func); + add_trampoline_func(m, ops, rec); + ops = ftrace_find_tramp_ops_next(rec, ops); + } while (ops); + } else seq_puts(m, "\ttramp: ERROR!"); - + } else { + add_trampoline_func(m, NULL, rec); } - add_trampoline_func(m, ops, rec); } seq_putc(m, '\n'); @@ -4898,6 +4906,19 @@ static int ftrace_process_locs(struct module *mod, #define next_to_ftrace_page(p) container_of(p, struct ftrace_page, next) +static int referenced_filters(struct dyn_ftrace *rec) +{ + struct ftrace_ops *ops; + int cnt = 0; + + for (ops = ftrace_ops_list; ops != &ftrace_list_end; ops = ops->next) { + if (ops_references_rec(ops, rec)) + cnt++; + } + + return cnt; +} + void ftrace_release_mod(struct module *mod) { struct dyn_ftrace *rec; @@ -4940,44 +4961,85 @@ void ftrace_release_mod(struct module *mod) mutex_unlock(&ftrace_lock); } -static void ftrace_init_module(struct module *mod, - unsigned long *start, unsigned long *end) +void ftrace_module_enable(struct module *mod) { - if (ftrace_disabled || start == end) - return; - ftrace_process_locs(mod, start, end); -} + struct dyn_ftrace *rec; + struct ftrace_page *pg; -void ftrace_module_init(struct module *mod) -{ - ftrace_init_module(mod, mod->ftrace_callsites, - mod->ftrace_callsites + - mod->num_ftrace_callsites); -} + mutex_lock(&ftrace_lock); -static int ftrace_module_notify_exit(struct notifier_block *self, - unsigned long val, void *data) -{ - struct module *mod = data; + if (ftrace_disabled) + goto out_unlock; - if (val == MODULE_STATE_GOING) - ftrace_release_mod(mod); + /* + * If the tracing is enabled, go ahead and enable the record. + * + * The reason not to enable the record immediatelly is the + * inherent check of ftrace_make_nop/ftrace_make_call for + * correct previous instructions. Making first the NOP + * conversion puts the module to the correct state, thus + * passing the ftrace_make_call check. + * + * We also delay this to after the module code already set the + * text to read-only, as we now need to set it back to read-write + * so that we can modify the text. + */ + if (ftrace_start_up) + ftrace_arch_code_modify_prepare(); - return 0; + do_for_each_ftrace_rec(pg, rec) { + int cnt; + /* + * do_for_each_ftrace_rec() is a double loop. + * module text shares the pg. If a record is + * not part of this module, then skip this pg, + * which the "break" will do. + */ + if (!within_module_core(rec->ip, mod)) + break; + + cnt = 0; + + /* + * When adding a module, we need to check if tracers are + * currently enabled and if they are, and can trace this record, + * we need to enable the module functions as well as update the + * reference counts for those function records. + */ + if (ftrace_start_up) + cnt += referenced_filters(rec); + + /* This clears FTRACE_FL_DISABLED */ + rec->flags = cnt; + + if (ftrace_start_up && cnt) { + int failed = __ftrace_replace_code(rec, 1); + if (failed) { + ftrace_bug(failed, rec); + goto out_loop; + } + } + + } while_for_each_ftrace_rec(); + + out_loop: + if (ftrace_start_up) + ftrace_arch_code_modify_post_process(); + + out_unlock: + mutex_unlock(&ftrace_lock); } -#else -static int ftrace_module_notify_exit(struct notifier_block *self, - unsigned long val, void *data) + +void ftrace_module_init(struct module *mod) { - return 0; + if (ftrace_disabled || !mod->num_ftrace_callsites) + return; + + ftrace_process_locs(mod, mod->ftrace_callsites, + mod->ftrace_callsites + mod->num_ftrace_callsites); } #endif /* CONFIG_MODULES */ -struct notifier_block ftrace_module_exit_nb = { - .notifier_call = ftrace_module_notify_exit, - .priority = INT_MIN, /* Run after anything that can remove kprobes */ -}; - void __init ftrace_init(void) { extern unsigned long __start_mcount_loc[]; @@ -5006,10 +5068,6 @@ void __init ftrace_init(void) __start_mcount_loc, __stop_mcount_loc); - ret = register_module_notifier(&ftrace_module_exit_nb); - if (ret) - pr_warning("Failed to register trace ftrace module exit notifier\n"); - set_ftrace_early_filters(); return; @@ -5116,44 +5174,6 @@ void ftrace_reset_array_ops(struct trace_array *tr) tr->ops->func = ftrace_stub; } -static void -ftrace_ops_control_func(unsigned long ip, unsigned long parent_ip, - struct ftrace_ops *op, struct pt_regs *regs) -{ - if (unlikely(trace_recursion_test(TRACE_CONTROL_BIT))) - return; - - /* - * Some of the ops may be dynamically allocated, - * they must be freed after a synchronize_sched(). - */ - preempt_disable_notrace(); - trace_recursion_set(TRACE_CONTROL_BIT); - - /* - * Control funcs (perf) uses RCU. Only trace if - * RCU is currently active. - */ - if (!rcu_is_watching()) - goto out; - - do_for_each_ftrace_op(op, ftrace_control_list) { - if (!(op->flags & FTRACE_OPS_FL_STUB) && - !ftrace_function_local_disabled(op) && - ftrace_ops_test(op, ip, regs)) - op->func(ip, parent_ip, op, regs); - } while_for_each_ftrace_op(op); - out: - trace_recursion_clear(TRACE_CONTROL_BIT); - preempt_enable_notrace(); -} - -static struct ftrace_ops control_ops = { - .func = ftrace_ops_control_func, - .flags = FTRACE_OPS_FL_RECURSION_SAFE | FTRACE_OPS_FL_INITIALIZED, - INIT_OPS_HASH(control_ops) -}; - static inline void __ftrace_ops_list_func(unsigned long ip, unsigned long parent_ip, struct ftrace_ops *ignored, struct pt_regs *regs) @@ -5170,8 +5190,22 @@ __ftrace_ops_list_func(unsigned long ip, unsigned long parent_ip, * they must be freed after a synchronize_sched(). */ preempt_disable_notrace(); + do_for_each_ftrace_op(op, ftrace_ops_list) { - if (ftrace_ops_test(op, ip, regs)) { + /* + * Check the following for each ops before calling their func: + * if RCU flag is set, then rcu_is_watching() must be true + * if PER_CPU is set, then ftrace_function_local_disable() + * must be false + * Otherwise test if the ip matches the ops filter + * + * If any of the above fails then the op->func() is not executed. + */ + if ((!(op->flags & FTRACE_OPS_FL_RCU) || rcu_is_watching()) && + (!(op->flags & FTRACE_OPS_FL_PER_CPU) || + !ftrace_function_local_disabled(op)) && + ftrace_ops_test(op, ip, regs)) { + if (FTRACE_WARN_ON(!op->func)) { pr_warn("op=%p %pS\n", op, op); goto out; @@ -5195,7 +5229,7 @@ out: * being NULL, or CONFIG_DYNAMIC_FTRACE_WITH_REGS. * Note, CONFIG_DYNAMIC_FTRACE_WITH_REGS expects a full regs to be saved. * An architecture can pass partial regs with ftrace_ops and still - * set the ARCH_SUPPORT_FTARCE_OPS. + * set the ARCH_SUPPORTS_FTRACE_OPS. */ #if ARCH_SUPPORTS_FTRACE_OPS static void ftrace_ops_list_func(unsigned long ip, unsigned long parent_ip, @@ -5212,20 +5246,29 @@ static void ftrace_ops_no_ops(unsigned long ip, unsigned long parent_ip) /* * If there's only one function registered but it does not support - * recursion, this function will be called by the mcount trampoline. - * This function will handle recursion protection. + * recursion, needs RCU protection and/or requires per cpu handling, then + * this function will be called by the mcount trampoline. */ -static void ftrace_ops_recurs_func(unsigned long ip, unsigned long parent_ip, +static void ftrace_ops_assist_func(unsigned long ip, unsigned long parent_ip, struct ftrace_ops *op, struct pt_regs *regs) { int bit; + if ((op->flags & FTRACE_OPS_FL_RCU) && !rcu_is_watching()) + return; + bit = trace_test_and_set_recursion(TRACE_LIST_START, TRACE_LIST_MAX); if (bit < 0) return; - op->func(ip, parent_ip, op, regs); + preempt_disable_notrace(); + if (!(op->flags & FTRACE_OPS_FL_PER_CPU) || + !ftrace_function_local_disabled(op)) { + op->func(ip, parent_ip, op, regs); + } + + preempt_enable_notrace(); trace_clear_recursion(bit); } @@ -5243,12 +5286,12 @@ static void ftrace_ops_recurs_func(unsigned long ip, unsigned long parent_ip, ftrace_func_t ftrace_ops_get_func(struct ftrace_ops *ops) { /* - * If the func handles its own recursion, call it directly. - * Otherwise call the recursion protected function that - * will call the ftrace ops function. + * If the function does not handle recursion, needs to be RCU safe, + * or does per cpu logic, then we need to call the assist handler. */ - if (!(ops->flags & FTRACE_OPS_FL_RECURSION_SAFE)) - return ftrace_ops_recurs_func; + if (!(ops->flags & FTRACE_OPS_FL_RECURSION_SAFE) || + ops->flags & (FTRACE_OPS_FL_RCU | FTRACE_OPS_FL_PER_CPU)) + return ftrace_ops_assist_func; return ops->func; } diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index 9c6045a27..95181e368 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -1001,17 +1001,13 @@ static int rb_head_page_replace(struct buffer_page *old, /* * rb_tail_page_update - move the tail page forward - * - * Returns 1 if moved tail page, 0 if someone else did. */ -static int rb_tail_page_update(struct ring_buffer_per_cpu *cpu_buffer, +static void rb_tail_page_update(struct ring_buffer_per_cpu *cpu_buffer, struct buffer_page *tail_page, struct buffer_page *next_page) { - struct buffer_page *old_tail; unsigned long old_entries; unsigned long old_write; - int ret = 0; /* * The tail page now needs to be moved forward. @@ -1036,7 +1032,7 @@ static int rb_tail_page_update(struct ring_buffer_per_cpu *cpu_buffer, * it is, then it is up to us to update the tail * pointer. */ - if (tail_page == cpu_buffer->tail_page) { + if (tail_page == READ_ONCE(cpu_buffer->tail_page)) { /* Zero the write counter */ unsigned long val = old_write & ~RB_WRITE_MASK; unsigned long eval = old_entries & ~RB_WRITE_MASK; @@ -1061,14 +1057,9 @@ static int rb_tail_page_update(struct ring_buffer_per_cpu *cpu_buffer, */ local_set(&next_page->page->commit, 0); - old_tail = cmpxchg(&cpu_buffer->tail_page, - tail_page, next_page); - - if (old_tail == tail_page) - ret = 1; + /* Again, either we update tail_page or an interrupt does */ + (void)cmpxchg(&cpu_buffer->tail_page, tail_page, next_page); } - - return ret; } static int rb_check_bpage(struct ring_buffer_per_cpu *cpu_buffer, @@ -2036,12 +2027,15 @@ rb_handle_head_page(struct ring_buffer_per_cpu *cpu_buffer, * the tail page would have moved. */ if (ret == RB_PAGE_NORMAL) { + struct buffer_page *buffer_tail_page; + + buffer_tail_page = READ_ONCE(cpu_buffer->tail_page); /* * If the tail had moved passed next, then we need * to reset the pointer. */ - if (cpu_buffer->tail_page != tail_page && - cpu_buffer->tail_page != next_page) + if (buffer_tail_page != tail_page && + buffer_tail_page != next_page) rb_head_page_set_normal(cpu_buffer, new_head, next_page, RB_PAGE_HEAD); @@ -2135,6 +2129,8 @@ rb_reset_tail(struct ring_buffer_per_cpu *cpu_buffer, local_sub(length, &tail_page->write); } +static inline void rb_end_commit(struct ring_buffer_per_cpu *cpu_buffer); + /* * This is the slow path, force gcc not to inline it. */ @@ -2147,7 +2143,6 @@ rb_move_tail(struct ring_buffer_per_cpu *cpu_buffer, struct ring_buffer *buffer = cpu_buffer->buffer; struct buffer_page *next_page; int ret; - u64 ts; next_page = tail_page; @@ -2221,20 +2216,17 @@ rb_move_tail(struct ring_buffer_per_cpu *cpu_buffer, } } - ret = rb_tail_page_update(cpu_buffer, tail_page, next_page); - if (ret) { - /* - * Nested commits always have zero deltas, so - * just reread the time stamp - */ - ts = rb_time_stamp(buffer); - next_page->page->time_stamp = ts; - } + rb_tail_page_update(cpu_buffer, tail_page, next_page); out_again: rb_reset_tail(cpu_buffer, tail, info); + /* Commit what we have for now. */ + rb_end_commit(cpu_buffer); + /* rb_end_commit() decs committing */ + local_inc(&cpu_buffer->committing); + /* fail and let the caller try again */ return ERR_PTR(-EAGAIN); @@ -2362,7 +2354,7 @@ rb_try_to_discard(struct ring_buffer_per_cpu *cpu_buffer, addr = (unsigned long)event; addr &= PAGE_MASK; - bpage = cpu_buffer->tail_page; + bpage = READ_ONCE(cpu_buffer->tail_page); if (bpage->page == (void *)addr && rb_page_write(bpage) == old_index) { unsigned long write_mask = @@ -2410,7 +2402,7 @@ rb_set_commit_to_write(struct ring_buffer_per_cpu *cpu_buffer) again: max_count = cpu_buffer->nr_pages * 100; - while (cpu_buffer->commit_page != cpu_buffer->tail_page) { + while (cpu_buffer->commit_page != READ_ONCE(cpu_buffer->tail_page)) { if (RB_WARN_ON(cpu_buffer, !(--max_count))) return; if (RB_WARN_ON(cpu_buffer, @@ -2419,8 +2411,10 @@ rb_set_commit_to_write(struct ring_buffer_per_cpu *cpu_buffer) local_set(&cpu_buffer->commit_page->page->commit, rb_page_write(cpu_buffer->commit_page)); rb_inc_page(cpu_buffer, &cpu_buffer->commit_page); - cpu_buffer->write_stamp = - cpu_buffer->commit_page->page->time_stamp; + /* Only update the write stamp if the page has an event */ + if (rb_page_write(cpu_buffer->commit_page)) + cpu_buffer->write_stamp = + cpu_buffer->commit_page->page->time_stamp; /* add barrier to keep gcc from optimizing too much */ barrier(); } @@ -2443,7 +2437,7 @@ rb_set_commit_to_write(struct ring_buffer_per_cpu *cpu_buffer) * and pushed the tail page forward, we will be left with * a dangling commit that will never go forward. */ - if (unlikely(cpu_buffer->commit_page != cpu_buffer->tail_page)) + if (unlikely(cpu_buffer->commit_page != READ_ONCE(cpu_buffer->tail_page))) goto again; } @@ -2699,7 +2693,8 @@ __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer, if (unlikely(info->add_timestamp)) info->length += RB_LEN_TIME_EXTEND; - tail_page = info->tail_page = cpu_buffer->tail_page; + /* Don't let the compiler play games with cpu_buffer->tail_page */ + tail_page = info->tail_page = READ_ONCE(cpu_buffer->tail_page); write = local_add_return(info->length, &tail_page->write); /* set write to only the index of the write */ diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index 919d9d076..8414fa40b 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -363,8 +363,8 @@ struct trace_option_dentry { * @name: the name chosen to select it on the available_tracers file * @init: called when one switches to this tracer (echo name > current_tracer) * @reset: called when one switches to another tracer - * @start: called when tracing is unpaused (echo 1 > tracing_enabled) - * @stop: called when tracing is paused (echo 0 > tracing_enabled) + * @start: called when tracing is unpaused (echo 1 > tracing_on) + * @stop: called when tracing is paused (echo 0 > tracing_on) * @update_thresh: called when tracing_thresh is updated * @open: called when the trace file is opened * @pipe_open: called when the trace_pipe file is opened @@ -467,8 +467,6 @@ enum { TRACE_INTERNAL_IRQ_BIT, TRACE_INTERNAL_SIRQ_BIT, - TRACE_CONTROL_BIT, - TRACE_BRANCH_BIT, /* * Abuse of the trace_recursion. diff --git a/kernel/trace/trace_event_perf.c b/kernel/trace/trace_event_perf.c index cc9f7a931..00df25fd8 100644 --- a/kernel/trace/trace_event_perf.c +++ b/kernel/trace/trace_event_perf.c @@ -334,7 +334,7 @@ static int perf_ftrace_function_register(struct perf_event *event) { struct ftrace_ops *ops = &event->ftrace_ops; - ops->flags |= FTRACE_OPS_FL_CONTROL; + ops->flags |= FTRACE_OPS_FL_PER_CPU | FTRACE_OPS_FL_RCU; ops->func = perf_ftrace_function_call; return register_ftrace_function(ops); } diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c index d202d991e..05ddc0820 100644 --- a/kernel/trace/trace_events.c +++ b/kernel/trace/trace_events.c @@ -1343,15 +1343,9 @@ event_filter_write(struct file *filp, const char __user *ubuf, size_t cnt, if (cnt >= PAGE_SIZE) return -EINVAL; - buf = (char *)__get_free_page(GFP_TEMPORARY); - if (!buf) - return -ENOMEM; - - if (copy_from_user(buf, ubuf, cnt)) { - free_page((unsigned long) buf); - return -EFAULT; - } - buf[cnt] = '\0'; + buf = memdup_user_nul(ubuf, cnt); + if (IS_ERR(buf)) + return PTR_ERR(buf); mutex_lock(&event_mutex); file = event_file_data(filp); @@ -1359,7 +1353,7 @@ event_filter_write(struct file *filp, const char __user *ubuf, size_t cnt, err = apply_event_filter(file, buf); mutex_unlock(&event_mutex); - free_page((unsigned long) buf); + kfree(buf); if (err < 0) return err; @@ -1510,18 +1504,12 @@ subsystem_filter_write(struct file *filp, const char __user *ubuf, size_t cnt, if (cnt >= PAGE_SIZE) return -EINVAL; - buf = (char *)__get_free_page(GFP_TEMPORARY); - if (!buf) - return -ENOMEM; - - if (copy_from_user(buf, ubuf, cnt)) { - free_page((unsigned long) buf); - return -EFAULT; - } - buf[cnt] = '\0'; + buf = memdup_user_nul(ubuf, cnt); + if (IS_ERR(buf)) + return PTR_ERR(buf); err = apply_subsystem_event_filter(dir, buf); - free_page((unsigned long) buf); + kfree(buf); if (err < 0) return err; diff --git a/kernel/trace/trace_events_trigger.c b/kernel/trace/trace_events_trigger.c index 42a4009fd..b38f617b6 100644 --- a/kernel/trace/trace_events_trigger.c +++ b/kernel/trace/trace_events_trigger.c @@ -237,28 +237,23 @@ static ssize_t event_trigger_regex_write(struct file *file, if (cnt >= PAGE_SIZE) return -EINVAL; - buf = (char *)__get_free_page(GFP_TEMPORARY); - if (!buf) - return -ENOMEM; + buf = memdup_user_nul(ubuf, cnt); + if (IS_ERR(buf)) + return PTR_ERR(buf); - if (copy_from_user(buf, ubuf, cnt)) { - free_page((unsigned long)buf); - return -EFAULT; - } - buf[cnt] = '\0'; strim(buf); mutex_lock(&event_mutex); event_file = event_file_data(file); if (unlikely(!event_file)) { mutex_unlock(&event_mutex); - free_page((unsigned long)buf); + kfree(buf); return -ENODEV; } ret = trigger_process_regex(event_file, buf); mutex_unlock(&event_mutex); - free_page((unsigned long)buf); + kfree(buf); if (ret < 0) goto out; @@ -543,11 +538,12 @@ static int register_trigger(char *glob, struct event_trigger_ops *ops, list_add_rcu(&data->list, &file->triggers); ret++; + update_cond_flag(file); if (trace_event_trigger_enable_disable(file, 1) < 0) { list_del_rcu(&data->list); + update_cond_flag(file); ret--; } - update_cond_flag(file); out: return ret; } @@ -575,8 +571,8 @@ static void unregister_trigger(char *glob, struct event_trigger_ops *ops, if (data->cmd_ops->trigger_type == test->cmd_ops->trigger_type) { unregistered = true; list_del_rcu(&data->list); - update_cond_flag(file); trace_event_trigger_enable_disable(file, 0); + update_cond_flag(file); break; } } @@ -1319,11 +1315,12 @@ static int event_enable_register_trigger(char *glob, list_add_rcu(&data->list, &file->triggers); ret++; + update_cond_flag(file); if (trace_event_trigger_enable_disable(file, 1) < 0) { list_del_rcu(&data->list); + update_cond_flag(file); ret--; } - update_cond_flag(file); out: return ret; } @@ -1344,8 +1341,8 @@ static void event_enable_unregister_trigger(char *glob, (enable_data->file == test_enable_data->file)) { unregistered = true; list_del_rcu(&data->list); - update_cond_flag(file); trace_event_trigger_enable_disable(file, 0); + update_cond_flag(file); break; } } diff --git a/kernel/trace/trace_stack.c b/kernel/trace/trace_stack.c index 202df6cff..2a1abbaca 100644 --- a/kernel/trace/trace_stack.c +++ b/kernel/trace/trace_stack.c @@ -156,7 +156,11 @@ check_stack(unsigned long ip, unsigned long *stack) for (; p < top && i < stack_trace_max.nr_entries; p++) { if (stack_dump_trace[i] == ULONG_MAX) break; - if (*p == stack_dump_trace[i]) { + /* + * The READ_ONCE_NOCHECK is used to let KASAN know that + * this is not a stack-out-of-bounds error. + */ + if ((READ_ONCE_NOCHECK(*p)) == stack_dump_trace[i]) { stack_dump_trace[x] = stack_dump_trace[i++]; this_size = stack_trace_index[x++] = (top - p) * sizeof(unsigned long); |