summaryrefslogtreecommitdiff
path: root/kernel/events/core.c
diff options
context:
space:
mode:
authorAndré Fabian Silva Delgado <emulatorman@parabola.nu>2016-01-20 14:01:31 -0300
committerAndré Fabian Silva Delgado <emulatorman@parabola.nu>2016-01-20 14:01:31 -0300
commitb4b7ff4b08e691656c9d77c758fc355833128ac0 (patch)
tree82fcb00e6b918026dc9f2d1f05ed8eee83874cc0 /kernel/events/core.c
parent35acfa0fc609f2a2cd95cef4a6a9c3a5c38f1778 (diff)
Linux-libre 4.4-gnupck-4.4-gnu
Diffstat (limited to 'kernel/events/core.c')
-rw-r--r--kernel/events/core.c393
1 files changed, 258 insertions, 135 deletions
diff --git a/kernel/events/core.c b/kernel/events/core.c
index b11756f9b..cfc227ccf 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -3,7 +3,7 @@
*
* Copyright (C) 2008 Thomas Gleixner <tglx@linutronix.de>
* Copyright (C) 2008-2011 Red Hat, Inc., Ingo Molnar
- * Copyright (C) 2008-2011 Red Hat, Inc., Peter Zijlstra <pzijlstr@redhat.com>
+ * Copyright (C) 2008-2011 Red Hat, Inc., Peter Zijlstra
* Copyright © 2009 Paul Mackerras, IBM Corp. <paulus@au1.ibm.com>
*
* For licensing details see kernel-base/COPYING
@@ -196,7 +196,7 @@ static int perf_sample_period_ns __read_mostly = DEFAULT_SAMPLE_PERIOD_NS;
static int perf_sample_allowed_ns __read_mostly =
DEFAULT_SAMPLE_PERIOD_NS * DEFAULT_CPU_TIME_MAX_PERCENT / 100;
-void update_perf_cpu_limits(void)
+static void update_perf_cpu_limits(void)
{
u64 tmp = perf_sample_period_ns;
@@ -435,7 +435,7 @@ static inline void update_cgrp_time_from_event(struct perf_event *event)
if (!is_cgroup_event(event))
return;
- cgrp = perf_cgroup_from_task(current);
+ cgrp = perf_cgroup_from_task(current, event->ctx);
/*
* Do not update time when cgroup is not active
*/
@@ -458,7 +458,7 @@ perf_cgroup_set_timestamp(struct task_struct *task,
if (!task || !ctx->nr_cgroups)
return;
- cgrp = perf_cgroup_from_task(task);
+ cgrp = perf_cgroup_from_task(task, ctx);
info = this_cpu_ptr(cgrp->info);
info->timestamp = ctx->timestamp;
}
@@ -472,7 +472,7 @@ perf_cgroup_set_timestamp(struct task_struct *task,
* mode SWOUT : schedule out everything
* mode SWIN : schedule in based on cgroup for next
*/
-void perf_cgroup_switch(struct task_struct *task, int mode)
+static void perf_cgroup_switch(struct task_struct *task, int mode)
{
struct perf_cpu_context *cpuctx;
struct pmu *pmu;
@@ -489,7 +489,6 @@ void perf_cgroup_switch(struct task_struct *task, int mode)
* we reschedule only in the presence of cgroup
* constrained events.
*/
- rcu_read_lock();
list_for_each_entry_rcu(pmu, &pmus, entry) {
cpuctx = this_cpu_ptr(pmu->pmu_cpu_context);
@@ -522,8 +521,10 @@ void perf_cgroup_switch(struct task_struct *task, int mode)
* set cgrp before ctxsw in to allow
* event_filter_match() to not have to pass
* task around
+ * we pass the cpuctx->ctx to perf_cgroup_from_task()
+ * because cgorup events are only per-cpu
*/
- cpuctx->cgrp = perf_cgroup_from_task(task);
+ cpuctx->cgrp = perf_cgroup_from_task(task, &cpuctx->ctx);
cpu_ctx_sched_in(cpuctx, EVENT_ALL, task);
}
perf_pmu_enable(cpuctx->ctx.pmu);
@@ -531,8 +532,6 @@ void perf_cgroup_switch(struct task_struct *task, int mode)
}
}
- rcu_read_unlock();
-
local_irq_restore(flags);
}
@@ -542,17 +541,20 @@ static inline void perf_cgroup_sched_out(struct task_struct *task,
struct perf_cgroup *cgrp1;
struct perf_cgroup *cgrp2 = NULL;
+ rcu_read_lock();
/*
* we come here when we know perf_cgroup_events > 0
+ * we do not need to pass the ctx here because we know
+ * we are holding the rcu lock
*/
- cgrp1 = perf_cgroup_from_task(task);
+ cgrp1 = perf_cgroup_from_task(task, NULL);
/*
* next is NULL when called from perf_event_enable_on_exec()
* that will systematically cause a cgroup_switch()
*/
if (next)
- cgrp2 = perf_cgroup_from_task(next);
+ cgrp2 = perf_cgroup_from_task(next, NULL);
/*
* only schedule out current cgroup events if we know
@@ -561,6 +563,8 @@ static inline void perf_cgroup_sched_out(struct task_struct *task,
*/
if (cgrp1 != cgrp2)
perf_cgroup_switch(task, PERF_CGROUP_SWOUT);
+
+ rcu_read_unlock();
}
static inline void perf_cgroup_sched_in(struct task_struct *prev,
@@ -569,13 +573,16 @@ static inline void perf_cgroup_sched_in(struct task_struct *prev,
struct perf_cgroup *cgrp1;
struct perf_cgroup *cgrp2 = NULL;
+ rcu_read_lock();
/*
* we come here when we know perf_cgroup_events > 0
+ * we do not need to pass the ctx here because we know
+ * we are holding the rcu lock
*/
- cgrp1 = perf_cgroup_from_task(task);
+ cgrp1 = perf_cgroup_from_task(task, NULL);
/* prev can never be NULL */
- cgrp2 = perf_cgroup_from_task(prev);
+ cgrp2 = perf_cgroup_from_task(prev, NULL);
/*
* only need to schedule in cgroup events if we are changing
@@ -584,6 +591,8 @@ static inline void perf_cgroup_sched_in(struct task_struct *prev,
*/
if (cgrp1 != cgrp2)
perf_cgroup_switch(task, PERF_CGROUP_SWIN);
+
+ rcu_read_unlock();
}
static inline int perf_cgroup_connect(int fd, struct perf_event *event,
@@ -1050,13 +1059,13 @@ retry:
/*
* One of the few rules of preemptible RCU is that one cannot do
* rcu_read_unlock() while holding a scheduler (or nested) lock when
- * part of the read side critical section was preemptible -- see
+ * part of the read side critical section was irqs-enabled -- see
* rcu_read_unlock_special().
*
* Since ctx->lock nests under rq->lock we must ensure the entire read
- * side critical section is non-preemptible.
+ * side critical section has interrupts disabled.
*/
- preempt_disable();
+ local_irq_save(*flags);
rcu_read_lock();
ctx = rcu_dereference(task->perf_event_ctxp[ctxn]);
if (ctx) {
@@ -1070,21 +1079,22 @@ retry:
* if so. If we locked the right context, then it
* can't get swapped on us any more.
*/
- raw_spin_lock_irqsave(&ctx->lock, *flags);
+ raw_spin_lock(&ctx->lock);
if (ctx != rcu_dereference(task->perf_event_ctxp[ctxn])) {
- raw_spin_unlock_irqrestore(&ctx->lock, *flags);
+ raw_spin_unlock(&ctx->lock);
rcu_read_unlock();
- preempt_enable();
+ local_irq_restore(*flags);
goto retry;
}
if (!atomic_inc_not_zero(&ctx->refcount)) {
- raw_spin_unlock_irqrestore(&ctx->lock, *flags);
+ raw_spin_unlock(&ctx->lock);
ctx = NULL;
}
}
rcu_read_unlock();
- preempt_enable();
+ if (!ctx)
+ local_irq_restore(*flags);
return ctx;
}
@@ -1939,7 +1949,7 @@ group_sched_in(struct perf_event *group_event,
if (group_event->state == PERF_EVENT_STATE_OFF)
return 0;
- pmu->start_txn(pmu);
+ pmu->start_txn(pmu, PERF_PMU_TXN_ADD);
if (event_sched_in(group_event, cpuctx, ctx)) {
pmu->cancel_txn(pmu);
@@ -3144,15 +3154,16 @@ static int event_enable_on_exec(struct perf_event *event,
* Enable all of a task's events that have been marked enable-on-exec.
* This expects task == current.
*/
-static void perf_event_enable_on_exec(struct perf_event_context *ctx)
+static void perf_event_enable_on_exec(int ctxn)
{
- struct perf_event_context *clone_ctx = NULL;
+ struct perf_event_context *ctx, *clone_ctx = NULL;
struct perf_event *event;
unsigned long flags;
int enabled = 0;
int ret;
local_irq_save(flags);
+ ctx = current->perf_event_ctxp[ctxn];
if (!ctx || !ctx->nr_events)
goto out;
@@ -3195,28 +3206,30 @@ out:
void perf_event_exec(void)
{
- struct perf_event_context *ctx;
int ctxn;
rcu_read_lock();
- for_each_task_context_nr(ctxn) {
- ctx = current->perf_event_ctxp[ctxn];
- if (!ctx)
- continue;
-
- perf_event_enable_on_exec(ctx);
- }
+ for_each_task_context_nr(ctxn)
+ perf_event_enable_on_exec(ctxn);
rcu_read_unlock();
}
+struct perf_read_data {
+ struct perf_event *event;
+ bool group;
+ int ret;
+};
+
/*
* Cross CPU call to read the hardware event
*/
static void __perf_event_read(void *info)
{
- struct perf_event *event = info;
+ struct perf_read_data *data = info;
+ struct perf_event *sub, *event = data->event;
struct perf_event_context *ctx = event->ctx;
struct perf_cpu_context *cpuctx = __get_cpu_context(ctx);
+ struct pmu *pmu = event->pmu;
/*
* If this is a task context, we need to check whether it is
@@ -3233,9 +3246,35 @@ static void __perf_event_read(void *info)
update_context_time(ctx);
update_cgrp_time_from_event(event);
}
+
update_event_times(event);
- if (event->state == PERF_EVENT_STATE_ACTIVE)
- event->pmu->read(event);
+ if (event->state != PERF_EVENT_STATE_ACTIVE)
+ goto unlock;
+
+ if (!data->group) {
+ pmu->read(event);
+ data->ret = 0;
+ goto unlock;
+ }
+
+ pmu->start_txn(pmu, PERF_PMU_TXN_READ);
+
+ pmu->read(event);
+
+ list_for_each_entry(sub, &event->sibling_list, group_entry) {
+ update_event_times(sub);
+ if (sub->state == PERF_EVENT_STATE_ACTIVE) {
+ /*
+ * Use sibling's PMU rather than @event's since
+ * sibling could be on different (eg: software) PMU.
+ */
+ sub->pmu->read(sub);
+ }
+ }
+
+ data->ret = pmu->commit_txn(pmu);
+
+unlock:
raw_spin_unlock(&ctx->lock);
}
@@ -3300,15 +3339,23 @@ u64 perf_event_read_local(struct perf_event *event)
return val;
}
-static u64 perf_event_read(struct perf_event *event)
+static int perf_event_read(struct perf_event *event, bool group)
{
+ int ret = 0;
+
/*
* If event is enabled and currently active on a CPU, update the
* value in the event structure:
*/
if (event->state == PERF_EVENT_STATE_ACTIVE) {
+ struct perf_read_data data = {
+ .event = event,
+ .group = group,
+ .ret = 0,
+ };
smp_call_function_single(event->oncpu,
- __perf_event_read, event, 1);
+ __perf_event_read, &data, 1);
+ ret = data.ret;
} else if (event->state == PERF_EVENT_STATE_INACTIVE) {
struct perf_event_context *ctx = event->ctx;
unsigned long flags;
@@ -3323,11 +3370,14 @@ static u64 perf_event_read(struct perf_event *event)
update_context_time(ctx);
update_cgrp_time_from_event(event);
}
- update_event_times(event);
+ if (group)
+ update_group_times(event);
+ else
+ update_event_times(event);
raw_spin_unlock_irqrestore(&ctx->lock, flags);
}
- return perf_event_count(event);
+ return ret;
}
/*
@@ -3769,7 +3819,7 @@ static void put_event(struct perf_event *event)
* see the comment there.
*
* 2) there is a lock-inversion with mmap_sem through
- * perf_event_read_group(), which takes faults while
+ * perf_read_group(), which takes faults while
* holding ctx->mutex, however this is called after
* the last filedesc died, so there is no possibility
* to trigger the AB-BA case.
@@ -3843,14 +3893,18 @@ u64 perf_event_read_value(struct perf_event *event, u64 *enabled, u64 *running)
*running = 0;
mutex_lock(&event->child_mutex);
- total += perf_event_read(event);
+
+ (void)perf_event_read(event, false);
+ total += perf_event_count(event);
+
*enabled += event->total_time_enabled +
atomic64_read(&event->child_total_time_enabled);
*running += event->total_time_running +
atomic64_read(&event->child_total_time_running);
list_for_each_entry(child, &event->child_list, child_list) {
- total += perf_event_read(child);
+ (void)perf_event_read(child, false);
+ total += perf_event_count(child);
*enabled += child->total_time_enabled;
*running += child->total_time_running;
}
@@ -3860,55 +3914,95 @@ u64 perf_event_read_value(struct perf_event *event, u64 *enabled, u64 *running)
}
EXPORT_SYMBOL_GPL(perf_event_read_value);
-static int perf_event_read_group(struct perf_event *event,
- u64 read_format, char __user *buf)
+static int __perf_read_group_add(struct perf_event *leader,
+ u64 read_format, u64 *values)
{
- struct perf_event *leader = event->group_leader, *sub;
- struct perf_event_context *ctx = leader->ctx;
- int n = 0, size = 0, ret;
- u64 count, enabled, running;
- u64 values[5];
+ struct perf_event *sub;
+ int n = 1; /* skip @nr */
+ int ret;
- lockdep_assert_held(&ctx->mutex);
+ ret = perf_event_read(leader, true);
+ if (ret)
+ return ret;
- count = perf_event_read_value(leader, &enabled, &running);
+ /*
+ * Since we co-schedule groups, {enabled,running} times of siblings
+ * will be identical to those of the leader, so we only publish one
+ * set.
+ */
+ if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) {
+ values[n++] += leader->total_time_enabled +
+ atomic64_read(&leader->child_total_time_enabled);
+ }
- values[n++] = 1 + leader->nr_siblings;
- if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED)
- values[n++] = enabled;
- if (read_format & PERF_FORMAT_TOTAL_TIME_RUNNING)
- values[n++] = running;
- values[n++] = count;
+ if (read_format & PERF_FORMAT_TOTAL_TIME_RUNNING) {
+ values[n++] += leader->total_time_running +
+ atomic64_read(&leader->child_total_time_running);
+ }
+
+ /*
+ * Write {count,id} tuples for every sibling.
+ */
+ values[n++] += perf_event_count(leader);
if (read_format & PERF_FORMAT_ID)
values[n++] = primary_event_id(leader);
- size = n * sizeof(u64);
+ list_for_each_entry(sub, &leader->sibling_list, group_entry) {
+ values[n++] += perf_event_count(sub);
+ if (read_format & PERF_FORMAT_ID)
+ values[n++] = primary_event_id(sub);
+ }
- if (copy_to_user(buf, values, size))
- return -EFAULT;
+ return 0;
+}
- ret = size;
+static int perf_read_group(struct perf_event *event,
+ u64 read_format, char __user *buf)
+{
+ struct perf_event *leader = event->group_leader, *child;
+ struct perf_event_context *ctx = leader->ctx;
+ int ret;
+ u64 *values;
- list_for_each_entry(sub, &leader->sibling_list, group_entry) {
- n = 0;
+ lockdep_assert_held(&ctx->mutex);
- values[n++] = perf_event_read_value(sub, &enabled, &running);
- if (read_format & PERF_FORMAT_ID)
- values[n++] = primary_event_id(sub);
+ values = kzalloc(event->read_size, GFP_KERNEL);
+ if (!values)
+ return -ENOMEM;
- size = n * sizeof(u64);
+ values[0] = 1 + leader->nr_siblings;
- if (copy_to_user(buf + ret, values, size)) {
- return -EFAULT;
- }
+ /*
+ * By locking the child_mutex of the leader we effectively
+ * lock the child list of all siblings.. XXX explain how.
+ */
+ mutex_lock(&leader->child_mutex);
+
+ ret = __perf_read_group_add(leader, read_format, values);
+ if (ret)
+ goto unlock;
- ret += size;
+ list_for_each_entry(child, &leader->child_list, child_list) {
+ ret = __perf_read_group_add(child, read_format, values);
+ if (ret)
+ goto unlock;
}
+ mutex_unlock(&leader->child_mutex);
+
+ ret = event->read_size;
+ if (copy_to_user(buf, values, event->read_size))
+ ret = -EFAULT;
+ goto out;
+
+unlock:
+ mutex_unlock(&leader->child_mutex);
+out:
+ kfree(values);
return ret;
}
-static int perf_event_read_one(struct perf_event *event,
+static int perf_read_one(struct perf_event *event,
u64 read_format, char __user *buf)
{
u64 enabled, running;
@@ -3946,7 +4040,7 @@ static bool is_event_hup(struct perf_event *event)
* Read the performance event - simple non blocking version for now
*/
static ssize_t
-perf_read_hw(struct perf_event *event, char __user *buf, size_t count)
+__perf_read(struct perf_event *event, char __user *buf, size_t count)
{
u64 read_format = event->attr.read_format;
int ret;
@@ -3964,9 +4058,9 @@ perf_read_hw(struct perf_event *event, char __user *buf, size_t count)
WARN_ON_ONCE(event->ctx->parent_ctx);
if (read_format & PERF_FORMAT_GROUP)
- ret = perf_event_read_group(event, read_format, buf);
+ ret = perf_read_group(event, read_format, buf);
else
- ret = perf_event_read_one(event, read_format, buf);
+ ret = perf_read_one(event, read_format, buf);
return ret;
}
@@ -3979,7 +4073,7 @@ perf_read(struct file *file, char __user *buf, size_t count, loff_t *ppos)
int ret;
ctx = perf_event_ctx_lock(event);
- ret = perf_read_hw(event, buf, count);
+ ret = __perf_read(event, buf, count);
perf_event_ctx_unlock(event, ctx);
return ret;
@@ -4010,7 +4104,7 @@ static unsigned int perf_poll(struct file *file, poll_table *wait)
static void _perf_event_reset(struct perf_event *event)
{
- (void)perf_event_read(event);
+ (void)perf_event_read(event, false);
local64_set(&event->count, 0);
perf_event_update_userpage(event);
}
@@ -4126,7 +4220,14 @@ retry:
goto retry;
}
- __perf_event_period(&pe);
+ if (event->attr.freq) {
+ event->attr.sample_freq = value;
+ } else {
+ event->attr.sample_period = value;
+ event->hw.sample_period = value;
+ }
+
+ local64_set(&event->hw.period_left, 0);
raw_spin_unlock_irq(&ctx->lock);
return 0;
@@ -5286,9 +5387,15 @@ void perf_output_sample(struct perf_output_handle *handle,
if (sample_type & PERF_SAMPLE_RAW) {
if (data->raw) {
- perf_output_put(handle, data->raw->size);
- __output_copy(handle, data->raw->data,
- data->raw->size);
+ u32 raw_size = data->raw->size;
+ u32 real_size = round_up(raw_size + sizeof(u32),
+ sizeof(u64)) - sizeof(u32);
+ u64 zero = 0;
+
+ perf_output_put(handle, real_size);
+ __output_copy(handle, data->raw->data, raw_size);
+ if (real_size - raw_size)
+ __output_copy(handle, &zero, real_size - raw_size);
} else {
struct {
u32 size;
@@ -5420,8 +5527,7 @@ void perf_prepare_sample(struct perf_event_header *header,
else
size += sizeof(u32);
- WARN_ON_ONCE(size & (sizeof(u64)-1));
- header->size += size;
+ header->size += round_up(size, sizeof(u64));
}
if (sample_type & PERF_SAMPLE_BRANCH_STACK) {
@@ -5572,6 +5678,17 @@ perf_event_aux_ctx(struct perf_event_context *ctx,
}
static void
+perf_event_aux_task_ctx(perf_event_aux_output_cb output, void *data,
+ struct perf_event_context *task_ctx)
+{
+ rcu_read_lock();
+ preempt_disable();
+ perf_event_aux_ctx(task_ctx, output, data);
+ preempt_enable();
+ rcu_read_unlock();
+}
+
+static void
perf_event_aux(perf_event_aux_output_cb output, void *data,
struct perf_event_context *task_ctx)
{
@@ -5580,14 +5697,23 @@ perf_event_aux(perf_event_aux_output_cb output, void *data,
struct pmu *pmu;
int ctxn;
+ /*
+ * If we have task_ctx != NULL we only notify
+ * the task context itself. The task_ctx is set
+ * only for EXIT events before releasing task
+ * context.
+ */
+ if (task_ctx) {
+ perf_event_aux_task_ctx(output, data, task_ctx);
+ return;
+ }
+
rcu_read_lock();
list_for_each_entry_rcu(pmu, &pmus, entry) {
cpuctx = get_cpu_ptr(pmu->pmu_cpu_context);
if (cpuctx->unique_pmu != pmu)
goto next;
perf_event_aux_ctx(&cpuctx->ctx, output, data);
- if (task_ctx)
- goto next;
ctxn = pmu->task_ctx_nr;
if (ctxn < 0)
goto next;
@@ -5597,12 +5723,6 @@ perf_event_aux(perf_event_aux_output_cb output, void *data,
next:
put_cpu_ptr(pmu->pmu_cpu_context);
}
-
- if (task_ctx) {
- preempt_disable();
- perf_event_aux_ctx(task_ctx, output, data);
- preempt_enable();
- }
rcu_read_unlock();
}
@@ -6368,9 +6488,6 @@ struct swevent_htable {
/* Recursion avoidance in each contexts */
int recursion[PERF_NR_CONTEXTS];
-
- /* Keeps track of cpu being initialized/exited */
- bool online;
};
static DEFINE_PER_CPU(struct swevent_htable, swevent_htable);
@@ -6628,14 +6745,8 @@ static int perf_swevent_add(struct perf_event *event, int flags)
hwc->state = !(flags & PERF_EF_START);
head = find_swevent_head(swhash, event);
- if (!head) {
- /*
- * We can race with cpu hotplug code. Do not
- * WARN if the cpu just got unplugged.
- */
- WARN_ON_ONCE(swhash->online);
+ if (WARN_ON_ONCE(!head))
return -EINVAL;
- }
hlist_add_head_rcu(&event->hlist_entry, head);
perf_event_update_userpage(event);
@@ -6703,7 +6814,6 @@ static int swevent_hlist_get_cpu(struct perf_event *event, int cpu)
int err = 0;
mutex_lock(&swhash->hlist_mutex);
-
if (!swevent_hlist_deref(swhash) && cpu_online(cpu)) {
struct swevent_hlist *hlist;
@@ -6819,6 +6929,10 @@ static int perf_tp_filter_match(struct perf_event *event,
{
void *record = data->raw->data;
+ /* only top level events have filters set */
+ if (event->parent)
+ event = event->parent;
+
if (likely(!event->filter) || filter_match_preds(event->filter, record))
return 1;
return 0;
@@ -7292,24 +7406,49 @@ static void perf_pmu_nop_void(struct pmu *pmu)
{
}
+static void perf_pmu_nop_txn(struct pmu *pmu, unsigned int flags)
+{
+}
+
static int perf_pmu_nop_int(struct pmu *pmu)
{
return 0;
}
-static void perf_pmu_start_txn(struct pmu *pmu)
+static DEFINE_PER_CPU(unsigned int, nop_txn_flags);
+
+static void perf_pmu_start_txn(struct pmu *pmu, unsigned int flags)
{
+ __this_cpu_write(nop_txn_flags, flags);
+
+ if (flags & ~PERF_PMU_TXN_ADD)
+ return;
+
perf_pmu_disable(pmu);
}
static int perf_pmu_commit_txn(struct pmu *pmu)
{
+ unsigned int flags = __this_cpu_read(nop_txn_flags);
+
+ __this_cpu_write(nop_txn_flags, 0);
+
+ if (flags & ~PERF_PMU_TXN_ADD)
+ return 0;
+
perf_pmu_enable(pmu);
return 0;
}
static void perf_pmu_cancel_txn(struct pmu *pmu)
{
+ unsigned int flags = __this_cpu_read(nop_txn_flags);
+
+ __this_cpu_write(nop_txn_flags, 0);
+
+ if (flags & ~PERF_PMU_TXN_ADD)
+ return;
+
perf_pmu_enable(pmu);
}
@@ -7548,7 +7687,7 @@ got_cpu_context:
pmu->commit_txn = perf_pmu_commit_txn;
pmu->cancel_txn = perf_pmu_cancel_txn;
} else {
- pmu->start_txn = perf_pmu_nop_void;
+ pmu->start_txn = perf_pmu_nop_txn;
pmu->commit_txn = perf_pmu_nop_int;
pmu->cancel_txn = perf_pmu_nop_void;
}
@@ -7636,7 +7775,7 @@ static int perf_try_init_event(struct pmu *pmu, struct perf_event *event)
return ret;
}
-struct pmu *perf_init_event(struct perf_event *event)
+static struct pmu *perf_init_event(struct perf_event *event)
{
struct pmu *pmu = NULL;
int idx;
@@ -8663,10 +8802,8 @@ static void perf_event_exit_task_context(struct task_struct *child, int ctxn)
struct perf_event_context *child_ctx, *clone_ctx = NULL;
unsigned long flags;
- if (likely(!child->perf_event_ctxp[ctxn])) {
- perf_event_task(child, NULL, 0);
+ if (likely(!child->perf_event_ctxp[ctxn]))
return;
- }
local_irq_save(flags);
/*
@@ -8750,6 +8887,14 @@ void perf_event_exit_task(struct task_struct *child)
for_each_task_context_nr(ctxn)
perf_event_exit_task_context(child, ctxn);
+
+ /*
+ * The perf_event_exit_task_context calls perf_event_task
+ * with child's task_ctx, which generates EXIT events for
+ * child contexts and sets child->perf_event_ctxp[] to NULL.
+ * At this point we need to send EXIT events to cpu contexts.
+ */
+ perf_event_task(child, NULL, 0);
}
static void perf_free_event(struct perf_event *event,
@@ -9131,7 +9276,6 @@ static void perf_event_init_cpu(int cpu)
struct swevent_htable *swhash = &per_cpu(swevent_htable, cpu);
mutex_lock(&swhash->hlist_mutex);
- swhash->online = true;
if (swhash->hlist_refcount > 0) {
struct swevent_hlist *hlist;
@@ -9173,14 +9317,7 @@ static void perf_event_exit_cpu_context(int cpu)
static void perf_event_exit_cpu(int cpu)
{
- struct swevent_htable *swhash = &per_cpu(swevent_htable, cpu);
-
perf_event_exit_cpu_context(cpu);
-
- mutex_lock(&swhash->hlist_mutex);
- swhash->online = false;
- swevent_hlist_release(swhash);
- mutex_unlock(&swhash->hlist_mutex);
}
#else
static inline void perf_event_exit_cpu(int cpu) { }
@@ -9328,38 +9465,24 @@ static void perf_cgroup_css_free(struct cgroup_subsys_state *css)
static int __perf_cgroup_move(void *info)
{
struct task_struct *task = info;
+ rcu_read_lock();
perf_cgroup_switch(task, PERF_CGROUP_SWOUT | PERF_CGROUP_SWIN);
+ rcu_read_unlock();
return 0;
}
-static void perf_cgroup_attach(struct cgroup_subsys_state *css,
- struct cgroup_taskset *tset)
+static void perf_cgroup_attach(struct cgroup_taskset *tset)
{
struct task_struct *task;
+ struct cgroup_subsys_state *css;
- cgroup_taskset_for_each(task, tset)
+ cgroup_taskset_for_each(task, css, tset)
task_function_call(task, __perf_cgroup_move, task);
}
-static void perf_cgroup_exit(struct cgroup_subsys_state *css,
- struct cgroup_subsys_state *old_css,
- struct task_struct *task)
-{
- /*
- * cgroup_exit() is called in the copy_process() failure path.
- * Ignore this case since the task hasn't ran yet, this avoids
- * trying to poke a half freed task state from generic code.
- */
- if (!(task->flags & PF_EXITING))
- return;
-
- task_function_call(task, __perf_cgroup_move, task);
-}
-
struct cgroup_subsys perf_event_cgrp_subsys = {
.css_alloc = perf_cgroup_css_alloc,
.css_free = perf_cgroup_css_free,
- .exit = perf_cgroup_exit,
.attach = perf_cgroup_attach,
};
#endif /* CONFIG_CGROUP_PERF */