diff options
author | André Fabian Silva Delgado <emulatorman@parabola.nu> | 2015-12-15 14:52:16 -0300 |
---|---|---|
committer | André Fabian Silva Delgado <emulatorman@parabola.nu> | 2015-12-15 14:52:16 -0300 |
commit | 8d91c1e411f55d7ea91b1183a2e9f8088fb4d5be (patch) | |
tree | e9891aa6c295060d065adffd610c4f49ecf884f3 /arch/arc/kernel/perf_event.c | |
parent | a71852147516bc1cb5b0b3cbd13639bfd4022dc8 (diff) |
Linux-libre 4.3.2-gnu
Diffstat (limited to 'arch/arc/kernel/perf_event.c')
-rw-r--r-- | arch/arc/kernel/perf_event.c | 275 |
1 files changed, 243 insertions, 32 deletions
diff --git a/arch/arc/kernel/perf_event.c b/arch/arc/kernel/perf_event.c index 1287388c2..0c08bb1ce 100644 --- a/arch/arc/kernel/perf_event.c +++ b/arch/arc/kernel/perf_event.c @@ -1,7 +1,7 @@ /* * Linux performance counter support for ARC700 series * - * Copyright (C) 2013 Synopsys, Inc. (www.synopsys.com) + * Copyright (C) 2013-2015 Synopsys, Inc. (www.synopsys.com) * * This code is inspired by the perf support of various other architectures. * @@ -11,6 +11,7 @@ * */ #include <linux/errno.h> +#include <linux/interrupt.h> #include <linux/module.h> #include <linux/of.h> #include <linux/perf_event.h> @@ -20,12 +21,25 @@ struct arc_pmu { struct pmu pmu; - int counter_size; /* in bits */ + unsigned int irq; int n_counters; - unsigned long used_mask[BITS_TO_LONGS(ARC_PMU_MAX_HWEVENTS)]; + u64 max_period; int ev_hw_idx[PERF_COUNT_ARC_HW_MAX]; }; +struct arc_pmu_cpu { + /* + * A 1 bit for an index indicates that the counter is being used for + * an event. A 0 means that the counter can be used. + */ + unsigned long used_mask[BITS_TO_LONGS(ARC_PERF_MAX_COUNTERS)]; + + /* + * The events that are active on the PMU for the given index. + */ + struct perf_event *act_counter[ARC_PERF_MAX_COUNTERS]; +}; + struct arc_callchain_trace { int depth; void *perf_stuff; @@ -65,6 +79,7 @@ perf_callchain_user(struct perf_callchain_entry *entry, struct pt_regs *regs) } static struct arc_pmu *arc_pmu; +static DEFINE_PER_CPU(struct arc_pmu_cpu, arc_pmu_cpu); /* read counter #idx; note that counter# != event# on ARC! */ static uint64_t arc_pmu_read_counter(int idx) @@ -88,18 +103,15 @@ static uint64_t arc_pmu_read_counter(int idx) static void arc_perf_event_update(struct perf_event *event, struct hw_perf_event *hwc, int idx) { - uint64_t prev_raw_count, new_raw_count; - int64_t delta; - - do { - prev_raw_count = local64_read(&hwc->prev_count); - new_raw_count = arc_pmu_read_counter(idx); - } while (local64_cmpxchg(&hwc->prev_count, prev_raw_count, - new_raw_count) != prev_raw_count); - - delta = (new_raw_count - prev_raw_count) & - ((1ULL << arc_pmu->counter_size) - 1ULL); + uint64_t prev_raw_count = local64_read(&hwc->prev_count); + uint64_t new_raw_count = arc_pmu_read_counter(idx); + int64_t delta = new_raw_count - prev_raw_count; + /* + * We don't afaraid of hwc->prev_count changing beneath our feet + * because there's no way for us to re-enter this function anytime. + */ + local64_set(&hwc->prev_count, new_raw_count); local64_add(delta, &event->count); local64_sub(delta, &hwc->period_left); } @@ -142,22 +154,41 @@ static int arc_pmu_event_init(struct perf_event *event) struct hw_perf_event *hwc = &event->hw; int ret; + if (!is_sampling_event(event)) { + hwc->sample_period = arc_pmu->max_period; + hwc->last_period = hwc->sample_period; + local64_set(&hwc->period_left, hwc->sample_period); + } + + hwc->config = 0; + + if (is_isa_arcv2()) { + /* "exclude user" means "count only kernel" */ + if (event->attr.exclude_user) + hwc->config |= ARC_REG_PCT_CONFIG_KERN; + + /* "exclude kernel" means "count only user" */ + if (event->attr.exclude_kernel) + hwc->config |= ARC_REG_PCT_CONFIG_USER; + } + switch (event->attr.type) { case PERF_TYPE_HARDWARE: if (event->attr.config >= PERF_COUNT_HW_MAX) return -ENOENT; if (arc_pmu->ev_hw_idx[event->attr.config] < 0) return -ENOENT; - hwc->config = arc_pmu->ev_hw_idx[event->attr.config]; + hwc->config |= arc_pmu->ev_hw_idx[event->attr.config]; pr_debug("init event %d with h/w %d \'%s\'\n", (int) event->attr.config, (int) hwc->config, arc_pmu_ev_hw_map[event->attr.config]); return 0; + case PERF_TYPE_HW_CACHE: ret = arc_pmu_cache_event(event->attr.config); if (ret < 0) return ret; - hwc->config = arc_pmu->ev_hw_idx[ret]; + hwc->config |= arc_pmu->ev_hw_idx[ret]; return 0; default: return -ENOENT; @@ -180,6 +211,47 @@ static void arc_pmu_disable(struct pmu *pmu) write_aux_reg(ARC_REG_PCT_CONTROL, (tmp & 0xffff0000) | 0x0); } +static int arc_pmu_event_set_period(struct perf_event *event) +{ + struct hw_perf_event *hwc = &event->hw; + s64 left = local64_read(&hwc->period_left); + s64 period = hwc->sample_period; + int idx = hwc->idx; + int overflow = 0; + u64 value; + + if (unlikely(left <= -period)) { + /* left underflowed by more than period. */ + left = period; + local64_set(&hwc->period_left, left); + hwc->last_period = period; + overflow = 1; + } else if (unlikely(left <= 0)) { + /* left underflowed by less than period. */ + left += period; + local64_set(&hwc->period_left, left); + hwc->last_period = period; + overflow = 1; + } + + if (left > arc_pmu->max_period) + left = arc_pmu->max_period; + + value = arc_pmu->max_period - left; + local64_set(&hwc->prev_count, value); + + /* Select counter */ + write_aux_reg(ARC_REG_PCT_INDEX, idx); + + /* Write value */ + write_aux_reg(ARC_REG_PCT_COUNTL, (u32)value); + write_aux_reg(ARC_REG_PCT_COUNTH, (value >> 32)); + + perf_event_update_userpage(event); + + return overflow; +} + /* * Assigns hardware counter to hardware condition. * Note that there is no separate start/stop mechanism; @@ -194,13 +266,20 @@ static void arc_pmu_start(struct perf_event *event, int flags) return; if (flags & PERF_EF_RELOAD) - WARN_ON_ONCE(!(event->hw.state & PERF_HES_UPTODATE)); + WARN_ON_ONCE(!(hwc->state & PERF_HES_UPTODATE)); + + hwc->state = 0; - event->hw.state = 0; + arc_pmu_event_set_period(event); + + /* Enable interrupt for this counter */ + if (is_sampling_event(event)) + write_aux_reg(ARC_REG_PCT_INT_CTRL, + read_aux_reg(ARC_REG_PCT_INT_CTRL) | (1 << idx)); /* enable ARC pmu here */ - write_aux_reg(ARC_REG_PCT_INDEX, idx); - write_aux_reg(ARC_REG_PCT_CONFIG, hwc->config); + write_aux_reg(ARC_REG_PCT_INDEX, idx); /* counter # */ + write_aux_reg(ARC_REG_PCT_CONFIG, hwc->config); /* condition */ } static void arc_pmu_stop(struct perf_event *event, int flags) @@ -208,6 +287,17 @@ static void arc_pmu_stop(struct perf_event *event, int flags) struct hw_perf_event *hwc = &event->hw; int idx = hwc->idx; + /* Disable interrupt for this counter */ + if (is_sampling_event(event)) { + /* + * Reset interrupt flag by writing of 1. This is required + * to make sure pending interrupt was not left. + */ + write_aux_reg(ARC_REG_PCT_INT_ACT, 1 << idx); + write_aux_reg(ARC_REG_PCT_INT_CTRL, + read_aux_reg(ARC_REG_PCT_INT_CTRL) & ~(1 << idx)); + } + if (!(event->hw.state & PERF_HES_STOPPED)) { /* stop ARC pmu here */ write_aux_reg(ARC_REG_PCT_INDEX, idx); @@ -227,8 +317,12 @@ static void arc_pmu_stop(struct perf_event *event, int flags) static void arc_pmu_del(struct perf_event *event, int flags) { + struct arc_pmu_cpu *pmu_cpu = this_cpu_ptr(&arc_pmu_cpu); + arc_pmu_stop(event, PERF_EF_UPDATE); - __clear_bit(event->hw.idx, arc_pmu->used_mask); + __clear_bit(event->hw.idx, pmu_cpu->used_mask); + + pmu_cpu->act_counter[event->hw.idx] = 0; perf_event_update_userpage(event); } @@ -236,20 +330,31 @@ static void arc_pmu_del(struct perf_event *event, int flags) /* allocate hardware counter and optionally start counting */ static int arc_pmu_add(struct perf_event *event, int flags) { + struct arc_pmu_cpu *pmu_cpu = this_cpu_ptr(&arc_pmu_cpu); struct hw_perf_event *hwc = &event->hw; int idx = hwc->idx; - if (__test_and_set_bit(idx, arc_pmu->used_mask)) { - idx = find_first_zero_bit(arc_pmu->used_mask, + if (__test_and_set_bit(idx, pmu_cpu->used_mask)) { + idx = find_first_zero_bit(pmu_cpu->used_mask, arc_pmu->n_counters); if (idx == arc_pmu->n_counters) return -EAGAIN; - __set_bit(idx, arc_pmu->used_mask); + __set_bit(idx, pmu_cpu->used_mask); hwc->idx = idx; } write_aux_reg(ARC_REG_PCT_INDEX, idx); + + pmu_cpu->act_counter[idx] = event; + + if (is_sampling_event(event)) { + /* Mimic full counter overflow as other arches do */ + write_aux_reg(ARC_REG_PCT_INT_CNTL, (u32)arc_pmu->max_period); + write_aux_reg(ARC_REG_PCT_INT_CNTH, + (arc_pmu->max_period >> 32)); + } + write_aux_reg(ARC_REG_PCT_CONFIG, 0); write_aux_reg(ARC_REG_PCT_COUNTL, 0); write_aux_reg(ARC_REG_PCT_COUNTH, 0); @@ -264,11 +369,82 @@ static int arc_pmu_add(struct perf_event *event, int flags) return 0; } +#ifdef CONFIG_ISA_ARCV2 +static irqreturn_t arc_pmu_intr(int irq, void *dev) +{ + struct perf_sample_data data; + struct arc_pmu_cpu *pmu_cpu = this_cpu_ptr(&arc_pmu_cpu); + struct pt_regs *regs; + int active_ints; + int idx; + + arc_pmu_disable(&arc_pmu->pmu); + + active_ints = read_aux_reg(ARC_REG_PCT_INT_ACT); + + regs = get_irq_regs(); + + for (idx = 0; idx < arc_pmu->n_counters; idx++) { + struct perf_event *event = pmu_cpu->act_counter[idx]; + struct hw_perf_event *hwc; + + if (!(active_ints & (1 << idx))) + continue; + + /* Reset interrupt flag by writing of 1 */ + write_aux_reg(ARC_REG_PCT_INT_ACT, 1 << idx); + + /* + * On reset of "interrupt active" bit corresponding + * "interrupt enable" bit gets automatically reset as well. + * Now we need to re-enable interrupt for the counter. + */ + write_aux_reg(ARC_REG_PCT_INT_CTRL, + read_aux_reg(ARC_REG_PCT_INT_CTRL) | (1 << idx)); + + hwc = &event->hw; + + WARN_ON_ONCE(hwc->idx != idx); + + arc_perf_event_update(event, &event->hw, event->hw.idx); + perf_sample_data_init(&data, 0, hwc->last_period); + if (!arc_pmu_event_set_period(event)) + continue; + + if (perf_event_overflow(event, &data, regs)) + arc_pmu_stop(event, 0); + } + + arc_pmu_enable(&arc_pmu->pmu); + + return IRQ_HANDLED; +} +#else + +static irqreturn_t arc_pmu_intr(int irq, void *dev) +{ + return IRQ_NONE; +} + +#endif /* CONFIG_ISA_ARCV2 */ + +void arc_cpu_pmu_irq_init(void) +{ + struct arc_pmu_cpu *pmu_cpu = this_cpu_ptr(&arc_pmu_cpu); + + arc_request_percpu_irq(arc_pmu->irq, smp_processor_id(), arc_pmu_intr, + "ARC perf counters", pmu_cpu); + + /* Clear all pending interrupt flags */ + write_aux_reg(ARC_REG_PCT_INT_ACT, 0xffffffff); +} + static int arc_pmu_device_probe(struct platform_device *pdev) { struct arc_reg_pct_build pct_bcr; struct arc_reg_cc_build cc_bcr; - int i, j; + int i, j, has_interrupts; + int counter_size; /* in bits */ union cc_name { struct { @@ -284,7 +460,7 @@ static int arc_pmu_device_probe(struct platform_device *pdev) pr_err("This core does not have performance counters!\n"); return -ENODEV; } - BUG_ON(pct_bcr.c > ARC_PMU_MAX_HWEVENTS); + BUG_ON(pct_bcr.c > ARC_PERF_MAX_COUNTERS); READ_BCR(ARC_REG_CC_BUILD, cc_bcr); BUG_ON(!cc_bcr.v); /* Counters exist but No countable conditions ? */ @@ -293,11 +469,16 @@ static int arc_pmu_device_probe(struct platform_device *pdev) if (!arc_pmu) return -ENOMEM; + has_interrupts = is_isa_arcv2() ? pct_bcr.i : 0; + arc_pmu->n_counters = pct_bcr.c; - arc_pmu->counter_size = 32 + (pct_bcr.s << 4); + counter_size = 32 + (pct_bcr.s << 4); - pr_info("ARC perf\t: %d counters (%d bits), %d countable conditions\n", - arc_pmu->n_counters, arc_pmu->counter_size, cc_bcr.c); + arc_pmu->max_period = (1ULL << counter_size) / 2 - 1ULL; + + pr_info("ARC perf\t: %d counters (%d bits), %d conditions%s\n", + arc_pmu->n_counters, counter_size, cc_bcr.c, + has_interrupts ? ", [overflow IRQ support]":""); cc_name.str[8] = 0; for (i = 0; i < PERF_COUNT_ARC_HW_MAX; i++) @@ -332,8 +513,37 @@ static int arc_pmu_device_probe(struct platform_device *pdev) .read = arc_pmu_read, }; - /* ARC 700 PMU does not support sampling events */ - arc_pmu->pmu.capabilities |= PERF_PMU_CAP_NO_INTERRUPT; + if (has_interrupts) { + int irq = platform_get_irq(pdev, 0); + unsigned long flags; + + if (irq < 0) { + pr_err("Cannot get IRQ number for the platform\n"); + return -ENODEV; + } + + arc_pmu->irq = irq; + + /* + * arc_cpu_pmu_irq_init() needs to be called on all cores for + * their respective local PMU. + * However we use opencoded on_each_cpu() to ensure it is called + * on core0 first, so that arc_request_percpu_irq() sets up + * AUTOEN etc. Otherwise enable_percpu_irq() fails to enable + * perf IRQ on non master cores. + * see arc_request_percpu_irq() + */ + preempt_disable(); + local_irq_save(flags); + arc_cpu_pmu_irq_init(); + local_irq_restore(flags); + smp_call_function((smp_call_func_t)arc_cpu_pmu_irq_init, 0, 1); + preempt_enable(); + + /* Clean all pending interrupt flags */ + write_aux_reg(ARC_REG_PCT_INT_ACT, 0xffffffff); + } else + arc_pmu->pmu.capabilities |= PERF_PMU_CAP_NO_INTERRUPT; return perf_pmu_register(&arc_pmu->pmu, pdev->name, PERF_TYPE_RAW); } @@ -341,6 +551,7 @@ static int arc_pmu_device_probe(struct platform_device *pdev) #ifdef CONFIG_OF static const struct of_device_id arc_pmu_match[] = { { .compatible = "snps,arc700-pct" }, + { .compatible = "snps,archs-pct" }, {}, }; MODULE_DEVICE_TABLE(of, arc_pmu_match); @@ -348,7 +559,7 @@ MODULE_DEVICE_TABLE(of, arc_pmu_match); static struct platform_driver arc_pmu_driver = { .driver = { - .name = "arc700-pct", + .name = "arc-pct", .of_match_table = of_match_ptr(arc_pmu_match), }, .probe = arc_pmu_device_probe, |