summaryrefslogtreecommitdiff
path: root/arch/x86/events
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86/events')
-rw-r--r--arch/x86/events/Makefile14
-rw-r--r--arch/x86/events/amd/core.c746
-rw-r--r--arch/x86/events/amd/ibs.c1026
-rw-r--r--arch/x86/events/amd/iommu.c505
-rw-r--r--arch/x86/events/amd/iommu.h40
-rw-r--r--arch/x86/events/amd/power.c353
-rw-r--r--arch/x86/events/amd/uncore.c603
-rw-r--r--arch/x86/events/core.c2442
-rw-r--r--arch/x86/events/intel/bts.c544
-rw-r--r--arch/x86/events/intel/core.c3799
-rw-r--r--arch/x86/events/intel/cqm.c1789
-rw-r--r--arch/x86/events/intel/cstate.c694
-rw-r--r--arch/x86/events/intel/ds.c1413
-rw-r--r--arch/x86/events/intel/knc.c321
-rw-r--r--arch/x86/events/intel/lbr.c1064
-rw-r--r--arch/x86/events/intel/p4.c1376
-rw-r--r--arch/x86/events/intel/p6.c279
-rw-r--r--arch/x86/events/intel/pt.c1243
-rw-r--r--arch/x86/events/intel/pt.h119
-rw-r--r--arch/x86/events/intel/rapl.c770
-rw-r--r--arch/x86/events/intel/uncore.c1412
-rw-r--r--arch/x86/events/intel/uncore.h378
-rw-r--r--arch/x86/events/intel/uncore_nhmex.c1227
-rw-r--r--arch/x86/events/intel/uncore_snb.c731
-rw-r--r--arch/x86/events/intel/uncore_snbep.c3139
-rw-r--r--arch/x86/events/msr.c241
-rw-r--r--arch/x86/events/perf_event.h965
27 files changed, 27233 insertions, 0 deletions
diff --git a/arch/x86/events/Makefile b/arch/x86/events/Makefile
new file mode 100644
index 000000000..f59618a39
--- /dev/null
+++ b/arch/x86/events/Makefile
@@ -0,0 +1,14 @@
+obj-y += core.o
+
+obj-$(CONFIG_CPU_SUP_AMD) += amd/core.o amd/uncore.o
+obj-$(CONFIG_PERF_EVENTS_AMD_POWER) += amd/power.o
+obj-$(CONFIG_X86_LOCAL_APIC) += amd/ibs.o msr.o
+ifdef CONFIG_AMD_IOMMU
+obj-$(CONFIG_CPU_SUP_AMD) += amd/iommu.o
+endif
+obj-$(CONFIG_CPU_SUP_INTEL) += intel/core.o intel/bts.o intel/cqm.o
+obj-$(CONFIG_CPU_SUP_INTEL) += intel/cstate.o intel/ds.o intel/knc.o
+obj-$(CONFIG_CPU_SUP_INTEL) += intel/lbr.o intel/p4.o intel/p6.o intel/pt.o
+obj-$(CONFIG_CPU_SUP_INTEL) += intel/rapl.o msr.o
+obj-$(CONFIG_PERF_EVENTS_INTEL_UNCORE) += intel/uncore.o intel/uncore_nhmex.o
+obj-$(CONFIG_PERF_EVENTS_INTEL_UNCORE) += intel/uncore_snb.o intel/uncore_snbep.o
diff --git a/arch/x86/events/amd/core.c b/arch/x86/events/amd/core.c
new file mode 100644
index 000000000..bd3e8421b
--- /dev/null
+++ b/arch/x86/events/amd/core.c
@@ -0,0 +1,746 @@
+#include <linux/perf_event.h>
+#include <linux/export.h>
+#include <linux/types.h>
+#include <linux/init.h>
+#include <linux/slab.h>
+#include <asm/apicdef.h>
+
+#include "../perf_event.h"
+
+static __initconst const u64 amd_hw_cache_event_ids
+ [PERF_COUNT_HW_CACHE_MAX]
+ [PERF_COUNT_HW_CACHE_OP_MAX]
+ [PERF_COUNT_HW_CACHE_RESULT_MAX] =
+{
+ [ C(L1D) ] = {
+ [ C(OP_READ) ] = {
+ [ C(RESULT_ACCESS) ] = 0x0040, /* Data Cache Accesses */
+ [ C(RESULT_MISS) ] = 0x0141, /* Data Cache Misses */
+ },
+ [ C(OP_WRITE) ] = {
+ [ C(RESULT_ACCESS) ] = 0,
+ [ C(RESULT_MISS) ] = 0,
+ },
+ [ C(OP_PREFETCH) ] = {
+ [ C(RESULT_ACCESS) ] = 0x0267, /* Data Prefetcher :attempts */
+ [ C(RESULT_MISS) ] = 0x0167, /* Data Prefetcher :cancelled */
+ },
+ },
+ [ C(L1I ) ] = {
+ [ C(OP_READ) ] = {
+ [ C(RESULT_ACCESS) ] = 0x0080, /* Instruction cache fetches */
+ [ C(RESULT_MISS) ] = 0x0081, /* Instruction cache misses */
+ },
+ [ C(OP_WRITE) ] = {
+ [ C(RESULT_ACCESS) ] = -1,
+ [ C(RESULT_MISS) ] = -1,
+ },
+ [ C(OP_PREFETCH) ] = {
+ [ C(RESULT_ACCESS) ] = 0x014B, /* Prefetch Instructions :Load */
+ [ C(RESULT_MISS) ] = 0,
+ },
+ },
+ [ C(LL ) ] = {
+ [ C(OP_READ) ] = {
+ [ C(RESULT_ACCESS) ] = 0x037D, /* Requests to L2 Cache :IC+DC */
+ [ C(RESULT_MISS) ] = 0x037E, /* L2 Cache Misses : IC+DC */
+ },
+ [ C(OP_WRITE) ] = {
+ [ C(RESULT_ACCESS) ] = 0x017F, /* L2 Fill/Writeback */
+ [ C(RESULT_MISS) ] = 0,
+ },
+ [ C(OP_PREFETCH) ] = {
+ [ C(RESULT_ACCESS) ] = 0,
+ [ C(RESULT_MISS) ] = 0,
+ },
+ },
+ [ C(DTLB) ] = {
+ [ C(OP_READ) ] = {
+ [ C(RESULT_ACCESS) ] = 0x0040, /* Data Cache Accesses */
+ [ C(RESULT_MISS) ] = 0x0746, /* L1_DTLB_AND_L2_DLTB_MISS.ALL */
+ },
+ [ C(OP_WRITE) ] = {
+ [ C(RESULT_ACCESS) ] = 0,
+ [ C(RESULT_MISS) ] = 0,
+ },
+ [ C(OP_PREFETCH) ] = {
+ [ C(RESULT_ACCESS) ] = 0,
+ [ C(RESULT_MISS) ] = 0,
+ },
+ },
+ [ C(ITLB) ] = {
+ [ C(OP_READ) ] = {
+ [ C(RESULT_ACCESS) ] = 0x0080, /* Instruction fecthes */
+ [ C(RESULT_MISS) ] = 0x0385, /* L1_ITLB_AND_L2_ITLB_MISS.ALL */
+ },
+ [ C(OP_WRITE) ] = {
+ [ C(RESULT_ACCESS) ] = -1,
+ [ C(RESULT_MISS) ] = -1,
+ },
+ [ C(OP_PREFETCH) ] = {
+ [ C(RESULT_ACCESS) ] = -1,
+ [ C(RESULT_MISS) ] = -1,
+ },
+ },
+ [ C(BPU ) ] = {
+ [ C(OP_READ) ] = {
+ [ C(RESULT_ACCESS) ] = 0x00c2, /* Retired Branch Instr. */
+ [ C(RESULT_MISS) ] = 0x00c3, /* Retired Mispredicted BI */
+ },
+ [ C(OP_WRITE) ] = {
+ [ C(RESULT_ACCESS) ] = -1,
+ [ C(RESULT_MISS) ] = -1,
+ },
+ [ C(OP_PREFETCH) ] = {
+ [ C(RESULT_ACCESS) ] = -1,
+ [ C(RESULT_MISS) ] = -1,
+ },
+ },
+ [ C(NODE) ] = {
+ [ C(OP_READ) ] = {
+ [ C(RESULT_ACCESS) ] = 0xb8e9, /* CPU Request to Memory, l+r */
+ [ C(RESULT_MISS) ] = 0x98e9, /* CPU Request to Memory, r */
+ },
+ [ C(OP_WRITE) ] = {
+ [ C(RESULT_ACCESS) ] = -1,
+ [ C(RESULT_MISS) ] = -1,
+ },
+ [ C(OP_PREFETCH) ] = {
+ [ C(RESULT_ACCESS) ] = -1,
+ [ C(RESULT_MISS) ] = -1,
+ },
+ },
+};
+
+/*
+ * AMD Performance Monitor K7 and later.
+ */
+static const u64 amd_perfmon_event_map[PERF_COUNT_HW_MAX] =
+{
+ [PERF_COUNT_HW_CPU_CYCLES] = 0x0076,
+ [PERF_COUNT_HW_INSTRUCTIONS] = 0x00c0,
+ [PERF_COUNT_HW_CACHE_REFERENCES] = 0x0080,
+ [PERF_COUNT_HW_CACHE_MISSES] = 0x0081,
+ [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x00c2,
+ [PERF_COUNT_HW_BRANCH_MISSES] = 0x00c3,
+ [PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = 0x00d0, /* "Decoder empty" event */
+ [PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = 0x00d1, /* "Dispatch stalls" event */
+};
+
+static u64 amd_pmu_event_map(int hw_event)
+{
+ return amd_perfmon_event_map[hw_event];
+}
+
+/*
+ * Previously calculated offsets
+ */
+static unsigned int event_offsets[X86_PMC_IDX_MAX] __read_mostly;
+static unsigned int count_offsets[X86_PMC_IDX_MAX] __read_mostly;
+
+/*
+ * Legacy CPUs:
+ * 4 counters starting at 0xc0010000 each offset by 1
+ *
+ * CPUs with core performance counter extensions:
+ * 6 counters starting at 0xc0010200 each offset by 2
+ */
+static inline int amd_pmu_addr_offset(int index, bool eventsel)
+{
+ int offset;
+
+ if (!index)
+ return index;
+
+ if (eventsel)
+ offset = event_offsets[index];
+ else
+ offset = count_offsets[index];
+
+ if (offset)
+ return offset;
+
+ if (!boot_cpu_has(X86_FEATURE_PERFCTR_CORE))
+ offset = index;
+ else
+ offset = index << 1;
+
+ if (eventsel)
+ event_offsets[index] = offset;
+ else
+ count_offsets[index] = offset;
+
+ return offset;
+}
+
+static int amd_core_hw_config(struct perf_event *event)
+{
+ if (event->attr.exclude_host && event->attr.exclude_guest)
+ /*
+ * When HO == GO == 1 the hardware treats that as GO == HO == 0
+ * and will count in both modes. We don't want to count in that
+ * case so we emulate no-counting by setting US = OS = 0.
+ */
+ event->hw.config &= ~(ARCH_PERFMON_EVENTSEL_USR |
+ ARCH_PERFMON_EVENTSEL_OS);
+ else if (event->attr.exclude_host)
+ event->hw.config |= AMD64_EVENTSEL_GUESTONLY;
+ else if (event->attr.exclude_guest)
+ event->hw.config |= AMD64_EVENTSEL_HOSTONLY;
+
+ return 0;
+}
+
+/*
+ * AMD64 events are detected based on their event codes.
+ */
+static inline unsigned int amd_get_event_code(struct hw_perf_event *hwc)
+{
+ return ((hwc->config >> 24) & 0x0f00) | (hwc->config & 0x00ff);
+}
+
+static inline int amd_is_nb_event(struct hw_perf_event *hwc)
+{
+ return (hwc->config & 0xe0) == 0xe0;
+}
+
+static inline int amd_has_nb(struct cpu_hw_events *cpuc)
+{
+ struct amd_nb *nb = cpuc->amd_nb;
+
+ return nb && nb->nb_id != -1;
+}
+
+static int amd_pmu_hw_config(struct perf_event *event)
+{
+ int ret;
+
+ /* pass precise event sampling to ibs: */
+ if (event->attr.precise_ip && get_ibs_caps())
+ return -ENOENT;
+
+ if (has_branch_stack(event))
+ return -EOPNOTSUPP;
+
+ ret = x86_pmu_hw_config(event);
+ if (ret)
+ return ret;
+
+ if (event->attr.type == PERF_TYPE_RAW)
+ event->hw.config |= event->attr.config & AMD64_RAW_EVENT_MASK;
+
+ return amd_core_hw_config(event);
+}
+
+static void __amd_put_nb_event_constraints(struct cpu_hw_events *cpuc,
+ struct perf_event *event)
+{
+ struct amd_nb *nb = cpuc->amd_nb;
+ int i;
+
+ /*
+ * need to scan whole list because event may not have
+ * been assigned during scheduling
+ *
+ * no race condition possible because event can only
+ * be removed on one CPU at a time AND PMU is disabled
+ * when we come here
+ */
+ for (i = 0; i < x86_pmu.num_counters; i++) {
+ if (cmpxchg(nb->owners + i, event, NULL) == event)
+ break;
+ }
+}
+
+ /*
+ * AMD64 NorthBridge events need special treatment because
+ * counter access needs to be synchronized across all cores
+ * of a package. Refer to BKDG section 3.12
+ *
+ * NB events are events measuring L3 cache, Hypertransport
+ * traffic. They are identified by an event code >= 0xe00.
+ * They measure events on the NorthBride which is shared
+ * by all cores on a package. NB events are counted on a
+ * shared set of counters. When a NB event is programmed
+ * in a counter, the data actually comes from a shared
+ * counter. Thus, access to those counters needs to be
+ * synchronized.
+ *
+ * We implement the synchronization such that no two cores
+ * can be measuring NB events using the same counters. Thus,
+ * we maintain a per-NB allocation table. The available slot
+ * is propagated using the event_constraint structure.
+ *
+ * We provide only one choice for each NB event based on
+ * the fact that only NB events have restrictions. Consequently,
+ * if a counter is available, there is a guarantee the NB event
+ * will be assigned to it. If no slot is available, an empty
+ * constraint is returned and scheduling will eventually fail
+ * for this event.
+ *
+ * Note that all cores attached the same NB compete for the same
+ * counters to host NB events, this is why we use atomic ops. Some
+ * multi-chip CPUs may have more than one NB.
+ *
+ * Given that resources are allocated (cmpxchg), they must be
+ * eventually freed for others to use. This is accomplished by
+ * calling __amd_put_nb_event_constraints()
+ *
+ * Non NB events are not impacted by this restriction.
+ */
+static struct event_constraint *
+__amd_get_nb_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *event,
+ struct event_constraint *c)
+{
+ struct hw_perf_event *hwc = &event->hw;
+ struct amd_nb *nb = cpuc->amd_nb;
+ struct perf_event *old;
+ int idx, new = -1;
+
+ if (!c)
+ c = &unconstrained;
+
+ if (cpuc->is_fake)
+ return c;
+
+ /*
+ * detect if already present, if so reuse
+ *
+ * cannot merge with actual allocation
+ * because of possible holes
+ *
+ * event can already be present yet not assigned (in hwc->idx)
+ * because of successive calls to x86_schedule_events() from
+ * hw_perf_group_sched_in() without hw_perf_enable()
+ */
+ for_each_set_bit(idx, c->idxmsk, x86_pmu.num_counters) {
+ if (new == -1 || hwc->idx == idx)
+ /* assign free slot, prefer hwc->idx */
+ old = cmpxchg(nb->owners + idx, NULL, event);
+ else if (nb->owners[idx] == event)
+ /* event already present */
+ old = event;
+ else
+ continue;
+
+ if (old && old != event)
+ continue;
+
+ /* reassign to this slot */
+ if (new != -1)
+ cmpxchg(nb->owners + new, event, NULL);
+ new = idx;
+
+ /* already present, reuse */
+ if (old == event)
+ break;
+ }
+
+ if (new == -1)
+ return &emptyconstraint;
+
+ return &nb->event_constraints[new];
+}
+
+static struct amd_nb *amd_alloc_nb(int cpu)
+{
+ struct amd_nb *nb;
+ int i;
+
+ nb = kzalloc_node(sizeof(struct amd_nb), GFP_KERNEL, cpu_to_node(cpu));
+ if (!nb)
+ return NULL;
+
+ nb->nb_id = -1;
+
+ /*
+ * initialize all possible NB constraints
+ */
+ for (i = 0; i < x86_pmu.num_counters; i++) {
+ __set_bit(i, nb->event_constraints[i].idxmsk);
+ nb->event_constraints[i].weight = 1;
+ }
+ return nb;
+}
+
+static int amd_pmu_cpu_prepare(int cpu)
+{
+ struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu);
+
+ WARN_ON_ONCE(cpuc->amd_nb);
+
+ if (!x86_pmu.amd_nb_constraints)
+ return NOTIFY_OK;
+
+ cpuc->amd_nb = amd_alloc_nb(cpu);
+ if (!cpuc->amd_nb)
+ return NOTIFY_BAD;
+
+ return NOTIFY_OK;
+}
+
+static void amd_pmu_cpu_starting(int cpu)
+{
+ struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu);
+ void **onln = &cpuc->kfree_on_online[X86_PERF_KFREE_SHARED];
+ struct amd_nb *nb;
+ int i, nb_id;
+
+ cpuc->perf_ctr_virt_mask = AMD64_EVENTSEL_HOSTONLY;
+
+ if (!x86_pmu.amd_nb_constraints)
+ return;
+
+ nb_id = amd_get_nb_id(cpu);
+ WARN_ON_ONCE(nb_id == BAD_APICID);
+
+ for_each_online_cpu(i) {
+ nb = per_cpu(cpu_hw_events, i).amd_nb;
+ if (WARN_ON_ONCE(!nb))
+ continue;
+
+ if (nb->nb_id == nb_id) {
+ *onln = cpuc->amd_nb;
+ cpuc->amd_nb = nb;
+ break;
+ }
+ }
+
+ cpuc->amd_nb->nb_id = nb_id;
+ cpuc->amd_nb->refcnt++;
+}
+
+static void amd_pmu_cpu_dead(int cpu)
+{
+ struct cpu_hw_events *cpuhw;
+
+ if (!x86_pmu.amd_nb_constraints)
+ return;
+
+ cpuhw = &per_cpu(cpu_hw_events, cpu);
+
+ if (cpuhw->amd_nb) {
+ struct amd_nb *nb = cpuhw->amd_nb;
+
+ if (nb->nb_id == -1 || --nb->refcnt == 0)
+ kfree(nb);
+
+ cpuhw->amd_nb = NULL;
+ }
+}
+
+static struct event_constraint *
+amd_get_event_constraints(struct cpu_hw_events *cpuc, int idx,
+ struct perf_event *event)
+{
+ /*
+ * if not NB event or no NB, then no constraints
+ */
+ if (!(amd_has_nb(cpuc) && amd_is_nb_event(&event->hw)))
+ return &unconstrained;
+
+ return __amd_get_nb_event_constraints(cpuc, event, NULL);
+}
+
+static void amd_put_event_constraints(struct cpu_hw_events *cpuc,
+ struct perf_event *event)
+{
+ if (amd_has_nb(cpuc) && amd_is_nb_event(&event->hw))
+ __amd_put_nb_event_constraints(cpuc, event);
+}
+
+PMU_FORMAT_ATTR(event, "config:0-7,32-35");
+PMU_FORMAT_ATTR(umask, "config:8-15" );
+PMU_FORMAT_ATTR(edge, "config:18" );
+PMU_FORMAT_ATTR(inv, "config:23" );
+PMU_FORMAT_ATTR(cmask, "config:24-31" );
+
+static struct attribute *amd_format_attr[] = {
+ &format_attr_event.attr,
+ &format_attr_umask.attr,
+ &format_attr_edge.attr,
+ &format_attr_inv.attr,
+ &format_attr_cmask.attr,
+ NULL,
+};
+
+/* AMD Family 15h */
+
+#define AMD_EVENT_TYPE_MASK 0x000000F0ULL
+
+#define AMD_EVENT_FP 0x00000000ULL ... 0x00000010ULL
+#define AMD_EVENT_LS 0x00000020ULL ... 0x00000030ULL
+#define AMD_EVENT_DC 0x00000040ULL ... 0x00000050ULL
+#define AMD_EVENT_CU 0x00000060ULL ... 0x00000070ULL
+#define AMD_EVENT_IC_DE 0x00000080ULL ... 0x00000090ULL
+#define AMD_EVENT_EX_LS 0x000000C0ULL
+#define AMD_EVENT_DE 0x000000D0ULL
+#define AMD_EVENT_NB 0x000000E0ULL ... 0x000000F0ULL
+
+/*
+ * AMD family 15h event code/PMC mappings:
+ *
+ * type = event_code & 0x0F0:
+ *
+ * 0x000 FP PERF_CTL[5:3]
+ * 0x010 FP PERF_CTL[5:3]
+ * 0x020 LS PERF_CTL[5:0]
+ * 0x030 LS PERF_CTL[5:0]
+ * 0x040 DC PERF_CTL[5:0]
+ * 0x050 DC PERF_CTL[5:0]
+ * 0x060 CU PERF_CTL[2:0]
+ * 0x070 CU PERF_CTL[2:0]
+ * 0x080 IC/DE PERF_CTL[2:0]
+ * 0x090 IC/DE PERF_CTL[2:0]
+ * 0x0A0 ---
+ * 0x0B0 ---
+ * 0x0C0 EX/LS PERF_CTL[5:0]
+ * 0x0D0 DE PERF_CTL[2:0]
+ * 0x0E0 NB NB_PERF_CTL[3:0]
+ * 0x0F0 NB NB_PERF_CTL[3:0]
+ *
+ * Exceptions:
+ *
+ * 0x000 FP PERF_CTL[3], PERF_CTL[5:3] (*)
+ * 0x003 FP PERF_CTL[3]
+ * 0x004 FP PERF_CTL[3], PERF_CTL[5:3] (*)
+ * 0x00B FP PERF_CTL[3]
+ * 0x00D FP PERF_CTL[3]
+ * 0x023 DE PERF_CTL[2:0]
+ * 0x02D LS PERF_CTL[3]
+ * 0x02E LS PERF_CTL[3,0]
+ * 0x031 LS PERF_CTL[2:0] (**)
+ * 0x043 CU PERF_CTL[2:0]
+ * 0x045 CU PERF_CTL[2:0]
+ * 0x046 CU PERF_CTL[2:0]
+ * 0x054 CU PERF_CTL[2:0]
+ * 0x055 CU PERF_CTL[2:0]
+ * 0x08F IC PERF_CTL[0]
+ * 0x187 DE PERF_CTL[0]
+ * 0x188 DE PERF_CTL[0]
+ * 0x0DB EX PERF_CTL[5:0]
+ * 0x0DC LS PERF_CTL[5:0]
+ * 0x0DD LS PERF_CTL[5:0]
+ * 0x0DE LS PERF_CTL[5:0]
+ * 0x0DF LS PERF_CTL[5:0]
+ * 0x1C0 EX PERF_CTL[5:3]
+ * 0x1D6 EX PERF_CTL[5:0]
+ * 0x1D8 EX PERF_CTL[5:0]
+ *
+ * (*) depending on the umask all FPU counters may be used
+ * (**) only one unitmask enabled at a time
+ */
+
+static struct event_constraint amd_f15_PMC0 = EVENT_CONSTRAINT(0, 0x01, 0);
+static struct event_constraint amd_f15_PMC20 = EVENT_CONSTRAINT(0, 0x07, 0);
+static struct event_constraint amd_f15_PMC3 = EVENT_CONSTRAINT(0, 0x08, 0);
+static struct event_constraint amd_f15_PMC30 = EVENT_CONSTRAINT_OVERLAP(0, 0x09, 0);
+static struct event_constraint amd_f15_PMC50 = EVENT_CONSTRAINT(0, 0x3F, 0);
+static struct event_constraint amd_f15_PMC53 = EVENT_CONSTRAINT(0, 0x38, 0);
+
+static struct event_constraint *
+amd_get_event_constraints_f15h(struct cpu_hw_events *cpuc, int idx,
+ struct perf_event *event)
+{
+ struct hw_perf_event *hwc = &event->hw;
+ unsigned int event_code = amd_get_event_code(hwc);
+
+ switch (event_code & AMD_EVENT_TYPE_MASK) {
+ case AMD_EVENT_FP:
+ switch (event_code) {
+ case 0x000:
+ if (!(hwc->config & 0x0000F000ULL))
+ break;
+ if (!(hwc->config & 0x00000F00ULL))
+ break;
+ return &amd_f15_PMC3;
+ case 0x004:
+ if (hweight_long(hwc->config & ARCH_PERFMON_EVENTSEL_UMASK) <= 1)
+ break;
+ return &amd_f15_PMC3;
+ case 0x003:
+ case 0x00B:
+ case 0x00D:
+ return &amd_f15_PMC3;
+ }
+ return &amd_f15_PMC53;
+ case AMD_EVENT_LS:
+ case AMD_EVENT_DC:
+ case AMD_EVENT_EX_LS:
+ switch (event_code) {
+ case 0x023:
+ case 0x043:
+ case 0x045:
+ case 0x046:
+ case 0x054:
+ case 0x055:
+ return &amd_f15_PMC20;
+ case 0x02D:
+ return &amd_f15_PMC3;
+ case 0x02E:
+ return &amd_f15_PMC30;
+ case 0x031:
+ if (hweight_long(hwc->config & ARCH_PERFMON_EVENTSEL_UMASK) <= 1)
+ return &amd_f15_PMC20;
+ return &emptyconstraint;
+ case 0x1C0:
+ return &amd_f15_PMC53;
+ default:
+ return &amd_f15_PMC50;
+ }
+ case AMD_EVENT_CU:
+ case AMD_EVENT_IC_DE:
+ case AMD_EVENT_DE:
+ switch (event_code) {
+ case 0x08F:
+ case 0x187:
+ case 0x188:
+ return &amd_f15_PMC0;
+ case 0x0DB ... 0x0DF:
+ case 0x1D6:
+ case 0x1D8:
+ return &amd_f15_PMC50;
+ default:
+ return &amd_f15_PMC20;
+ }
+ case AMD_EVENT_NB:
+ /* moved to perf_event_amd_uncore.c */
+ return &emptyconstraint;
+ default:
+ return &emptyconstraint;
+ }
+}
+
+static ssize_t amd_event_sysfs_show(char *page, u64 config)
+{
+ u64 event = (config & ARCH_PERFMON_EVENTSEL_EVENT) |
+ (config & AMD64_EVENTSEL_EVENT) >> 24;
+
+ return x86_event_sysfs_show(page, config, event);
+}
+
+static __initconst const struct x86_pmu amd_pmu = {
+ .name = "AMD",
+ .handle_irq = x86_pmu_handle_irq,
+ .disable_all = x86_pmu_disable_all,
+ .enable_all = x86_pmu_enable_all,
+ .enable = x86_pmu_enable_event,
+ .disable = x86_pmu_disable_event,
+ .hw_config = amd_pmu_hw_config,
+ .schedule_events = x86_schedule_events,
+ .eventsel = MSR_K7_EVNTSEL0,
+ .perfctr = MSR_K7_PERFCTR0,
+ .addr_offset = amd_pmu_addr_offset,
+ .event_map = amd_pmu_event_map,
+ .max_events = ARRAY_SIZE(amd_perfmon_event_map),
+ .num_counters = AMD64_NUM_COUNTERS,
+ .cntval_bits = 48,
+ .cntval_mask = (1ULL << 48) - 1,
+ .apic = 1,
+ /* use highest bit to detect overflow */
+ .max_period = (1ULL << 47) - 1,
+ .get_event_constraints = amd_get_event_constraints,
+ .put_event_constraints = amd_put_event_constraints,
+
+ .format_attrs = amd_format_attr,
+ .events_sysfs_show = amd_event_sysfs_show,
+
+ .cpu_prepare = amd_pmu_cpu_prepare,
+ .cpu_starting = amd_pmu_cpu_starting,
+ .cpu_dead = amd_pmu_cpu_dead,
+
+ .amd_nb_constraints = 1,
+};
+
+static int __init amd_core_pmu_init(void)
+{
+ if (!boot_cpu_has(X86_FEATURE_PERFCTR_CORE))
+ return 0;
+
+ switch (boot_cpu_data.x86) {
+ case 0x15:
+ pr_cont("Fam15h ");
+ x86_pmu.get_event_constraints = amd_get_event_constraints_f15h;
+ break;
+
+ default:
+ pr_err("core perfctr but no constraints; unknown hardware!\n");
+ return -ENODEV;
+ }
+
+ /*
+ * If core performance counter extensions exists, we must use
+ * MSR_F15H_PERF_CTL/MSR_F15H_PERF_CTR msrs. See also
+ * amd_pmu_addr_offset().
+ */
+ x86_pmu.eventsel = MSR_F15H_PERF_CTL;
+ x86_pmu.perfctr = MSR_F15H_PERF_CTR;
+ x86_pmu.num_counters = AMD64_NUM_COUNTERS_CORE;
+ /*
+ * AMD Core perfctr has separate MSRs for the NB events, see
+ * the amd/uncore.c driver.
+ */
+ x86_pmu.amd_nb_constraints = 0;
+
+ pr_cont("core perfctr, ");
+ return 0;
+}
+
+__init int amd_pmu_init(void)
+{
+ int ret;
+
+ /* Performance-monitoring supported from K7 and later: */
+ if (boot_cpu_data.x86 < 6)
+ return -ENODEV;
+
+ x86_pmu = amd_pmu;
+
+ ret = amd_core_pmu_init();
+ if (ret)
+ return ret;
+
+ if (num_possible_cpus() == 1) {
+ /*
+ * No point in allocating data structures to serialize
+ * against other CPUs, when there is only the one CPU.
+ */
+ x86_pmu.amd_nb_constraints = 0;
+ }
+
+ /* Events are common for all AMDs */
+ memcpy(hw_cache_event_ids, amd_hw_cache_event_ids,
+ sizeof(hw_cache_event_ids));
+
+ return 0;
+}
+
+void amd_pmu_enable_virt(void)
+{
+ struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
+
+ cpuc->perf_ctr_virt_mask = 0;
+
+ /* Reload all events */
+ x86_pmu_disable_all();
+ x86_pmu_enable_all(0);
+}
+EXPORT_SYMBOL_GPL(amd_pmu_enable_virt);
+
+void amd_pmu_disable_virt(void)
+{
+ struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
+
+ /*
+ * We only mask out the Host-only bit so that host-only counting works
+ * when SVM is disabled. If someone sets up a guest-only counter when
+ * SVM is disabled the Guest-only bits still gets set and the counter
+ * will not count anything.
+ */
+ cpuc->perf_ctr_virt_mask = AMD64_EVENTSEL_HOSTONLY;
+
+ /* Reload all events */
+ x86_pmu_disable_all();
+ x86_pmu_enable_all(0);
+}
+EXPORT_SYMBOL_GPL(amd_pmu_disable_virt);
diff --git a/arch/x86/events/amd/ibs.c b/arch/x86/events/amd/ibs.c
new file mode 100644
index 000000000..feb90f673
--- /dev/null
+++ b/arch/x86/events/amd/ibs.c
@@ -0,0 +1,1026 @@
+/*
+ * Performance events - AMD IBS
+ *
+ * Copyright (C) 2011 Advanced Micro Devices, Inc., Robert Richter
+ *
+ * For licencing details see kernel-base/COPYING
+ */
+
+#include <linux/perf_event.h>
+#include <linux/module.h>
+#include <linux/pci.h>
+#include <linux/ptrace.h>
+#include <linux/syscore_ops.h>
+
+#include <asm/apic.h>
+
+#include "../perf_event.h"
+
+static u32 ibs_caps;
+
+#if defined(CONFIG_PERF_EVENTS) && defined(CONFIG_CPU_SUP_AMD)
+
+#include <linux/kprobes.h>
+#include <linux/hardirq.h>
+
+#include <asm/nmi.h>
+
+#define IBS_FETCH_CONFIG_MASK (IBS_FETCH_RAND_EN | IBS_FETCH_MAX_CNT)
+#define IBS_OP_CONFIG_MASK IBS_OP_MAX_CNT
+
+
+/*
+ * IBS states:
+ *
+ * ENABLED; tracks the pmu::add(), pmu::del() state, when set the counter is taken
+ * and any further add()s must fail.
+ *
+ * STARTED/STOPPING/STOPPED; deal with pmu::start(), pmu::stop() state but are
+ * complicated by the fact that the IBS hardware can send late NMIs (ie. after
+ * we've cleared the EN bit).
+ *
+ * In order to consume these late NMIs we have the STOPPED state, any NMI that
+ * happens after we've cleared the EN state will clear this bit and report the
+ * NMI handled (this is fundamentally racy in the face or multiple NMI sources,
+ * someone else can consume our BIT and our NMI will go unhandled).
+ *
+ * And since we cannot set/clear this separate bit together with the EN bit,
+ * there are races; if we cleared STARTED early, an NMI could land in
+ * between clearing STARTED and clearing the EN bit (in fact multiple NMIs
+ * could happen if the period is small enough), and consume our STOPPED bit
+ * and trigger streams of unhandled NMIs.
+ *
+ * If, however, we clear STARTED late, an NMI can hit between clearing the
+ * EN bit and clearing STARTED, still see STARTED set and process the event.
+ * If this event will have the VALID bit clear, we bail properly, but this
+ * is not a given. With VALID set we can end up calling pmu::stop() again
+ * (the throttle logic) and trigger the WARNs in there.
+ *
+ * So what we do is set STOPPING before clearing EN to avoid the pmu::stop()
+ * nesting, and clear STARTED late, so that we have a well defined state over
+ * the clearing of the EN bit.
+ *
+ * XXX: we could probably be using !atomic bitops for all this.
+ */
+
+enum ibs_states {
+ IBS_ENABLED = 0,
+ IBS_STARTED = 1,
+ IBS_STOPPING = 2,
+ IBS_STOPPED = 3,
+
+ IBS_MAX_STATES,
+};
+
+struct cpu_perf_ibs {
+ struct perf_event *event;
+ unsigned long state[BITS_TO_LONGS(IBS_MAX_STATES)];
+};
+
+struct perf_ibs {
+ struct pmu pmu;
+ unsigned int msr;
+ u64 config_mask;
+ u64 cnt_mask;
+ u64 enable_mask;
+ u64 valid_mask;
+ u64 max_period;
+ unsigned long offset_mask[1];
+ int offset_max;
+ struct cpu_perf_ibs __percpu *pcpu;
+
+ struct attribute **format_attrs;
+ struct attribute_group format_group;
+ const struct attribute_group *attr_groups[2];
+
+ u64 (*get_count)(u64 config);
+};
+
+struct perf_ibs_data {
+ u32 size;
+ union {
+ u32 data[0]; /* data buffer starts here */
+ u32 caps;
+ };
+ u64 regs[MSR_AMD64_IBS_REG_COUNT_MAX];
+};
+
+static int
+perf_event_set_period(struct hw_perf_event *hwc, u64 min, u64 max, u64 *hw_period)
+{
+ s64 left = local64_read(&hwc->period_left);
+ s64 period = hwc->sample_period;
+ int overflow = 0;
+
+ /*
+ * If we are way outside a reasonable range then just skip forward:
+ */
+ if (unlikely(left <= -period)) {
+ left = period;
+ local64_set(&hwc->period_left, left);
+ hwc->last_period = period;
+ overflow = 1;
+ }
+
+ if (unlikely(left < (s64)min)) {
+ left += period;
+ local64_set(&hwc->period_left, left);
+ hwc->last_period = period;
+ overflow = 1;
+ }
+
+ /*
+ * If the hw period that triggers the sw overflow is too short
+ * we might hit the irq handler. This biases the results.
+ * Thus we shorten the next-to-last period and set the last
+ * period to the max period.
+ */
+ if (left > max) {
+ left -= max;
+ if (left > max)
+ left = max;
+ else if (left < min)
+ left = min;
+ }
+
+ *hw_period = (u64)left;
+
+ return overflow;
+}
+
+static int
+perf_event_try_update(struct perf_event *event, u64 new_raw_count, int width)
+{
+ struct hw_perf_event *hwc = &event->hw;
+ int shift = 64 - width;
+ u64 prev_raw_count;
+ u64 delta;
+
+ /*
+ * Careful: an NMI might modify the previous event value.
+ *
+ * Our tactic to handle this is to first atomically read and
+ * exchange a new raw count - then add that new-prev delta
+ * count to the generic event atomically:
+ */
+ prev_raw_count = local64_read(&hwc->prev_count);
+ if (local64_cmpxchg(&hwc->prev_count, prev_raw_count,
+ new_raw_count) != prev_raw_count)
+ return 0;
+
+ /*
+ * Now we have the new raw value and have updated the prev
+ * timestamp already. We can now calculate the elapsed delta
+ * (event-)time and add that to the generic event.
+ *
+ * Careful, not all hw sign-extends above the physical width
+ * of the count.
+ */
+ delta = (new_raw_count << shift) - (prev_raw_count << shift);
+ delta >>= shift;
+
+ local64_add(delta, &event->count);
+ local64_sub(delta, &hwc->period_left);
+
+ return 1;
+}
+
+static struct perf_ibs perf_ibs_fetch;
+static struct perf_ibs perf_ibs_op;
+
+static struct perf_ibs *get_ibs_pmu(int type)
+{
+ if (perf_ibs_fetch.pmu.type == type)
+ return &perf_ibs_fetch;
+ if (perf_ibs_op.pmu.type == type)
+ return &perf_ibs_op;
+ return NULL;
+}
+
+/*
+ * Use IBS for precise event sampling:
+ *
+ * perf record -a -e cpu-cycles:p ... # use ibs op counting cycle count
+ * perf record -a -e r076:p ... # same as -e cpu-cycles:p
+ * perf record -a -e r0C1:p ... # use ibs op counting micro-ops
+ *
+ * IbsOpCntCtl (bit 19) of IBS Execution Control Register (IbsOpCtl,
+ * MSRC001_1033) is used to select either cycle or micro-ops counting
+ * mode.
+ *
+ * The rip of IBS samples has skid 0. Thus, IBS supports precise
+ * levels 1 and 2 and the PERF_EFLAGS_EXACT is set. In rare cases the
+ * rip is invalid when IBS was not able to record the rip correctly.
+ * We clear PERF_EFLAGS_EXACT and take the rip from pt_regs then.
+ *
+ */
+static int perf_ibs_precise_event(struct perf_event *event, u64 *config)
+{
+ switch (event->attr.precise_ip) {
+ case 0:
+ return -ENOENT;
+ case 1:
+ case 2:
+ break;
+ default:
+ return -EOPNOTSUPP;
+ }
+
+ switch (event->attr.type) {
+ case PERF_TYPE_HARDWARE:
+ switch (event->attr.config) {
+ case PERF_COUNT_HW_CPU_CYCLES:
+ *config = 0;
+ return 0;
+ }
+ break;
+ case PERF_TYPE_RAW:
+ switch (event->attr.config) {
+ case 0x0076:
+ *config = 0;
+ return 0;
+ case 0x00C1:
+ *config = IBS_OP_CNT_CTL;
+ return 0;
+ }
+ break;
+ default:
+ return -ENOENT;
+ }
+
+ return -EOPNOTSUPP;
+}
+
+static const struct perf_event_attr ibs_notsupp = {
+ .exclude_user = 1,
+ .exclude_kernel = 1,
+ .exclude_hv = 1,
+ .exclude_idle = 1,
+ .exclude_host = 1,
+ .exclude_guest = 1,
+};
+
+static int perf_ibs_init(struct perf_event *event)
+{
+ struct hw_perf_event *hwc = &event->hw;
+ struct perf_ibs *perf_ibs;
+ u64 max_cnt, config;
+ int ret;
+
+ perf_ibs = get_ibs_pmu(event->attr.type);
+ if (perf_ibs) {
+ config = event->attr.config;
+ } else {
+ perf_ibs = &perf_ibs_op;
+ ret = perf_ibs_precise_event(event, &config);
+ if (ret)
+ return ret;
+ }
+
+ if (event->pmu != &perf_ibs->pmu)
+ return -ENOENT;
+
+ if (perf_flags(&event->attr) & perf_flags(&ibs_notsupp))
+ return -EINVAL;
+
+ if (config & ~perf_ibs->config_mask)
+ return -EINVAL;
+
+ if (hwc->sample_period) {
+ if (config & perf_ibs->cnt_mask)
+ /* raw max_cnt may not be set */
+ return -EINVAL;
+ if (!event->attr.sample_freq && hwc->sample_period & 0x0f)
+ /*
+ * lower 4 bits can not be set in ibs max cnt,
+ * but allowing it in case we adjust the
+ * sample period to set a frequency.
+ */
+ return -EINVAL;
+ hwc->sample_period &= ~0x0FULL;
+ if (!hwc->sample_period)
+ hwc->sample_period = 0x10;
+ } else {
+ max_cnt = config & perf_ibs->cnt_mask;
+ config &= ~perf_ibs->cnt_mask;
+ event->attr.sample_period = max_cnt << 4;
+ hwc->sample_period = event->attr.sample_period;
+ }
+
+ if (!hwc->sample_period)
+ return -EINVAL;
+
+ /*
+ * If we modify hwc->sample_period, we also need to update
+ * hwc->last_period and hwc->period_left.
+ */
+ hwc->last_period = hwc->sample_period;
+ local64_set(&hwc->period_left, hwc->sample_period);
+
+ hwc->config_base = perf_ibs->msr;
+ hwc->config = config;
+
+ return 0;
+}
+
+static int perf_ibs_set_period(struct perf_ibs *perf_ibs,
+ struct hw_perf_event *hwc, u64 *period)
+{
+ int overflow;
+
+ /* ignore lower 4 bits in min count: */
+ overflow = perf_event_set_period(hwc, 1<<4, perf_ibs->max_period, period);
+ local64_set(&hwc->prev_count, 0);
+
+ return overflow;
+}
+
+static u64 get_ibs_fetch_count(u64 config)
+{
+ return (config & IBS_FETCH_CNT) >> 12;
+}
+
+static u64 get_ibs_op_count(u64 config)
+{
+ u64 count = 0;
+
+ if (config & IBS_OP_VAL)
+ count += (config & IBS_OP_MAX_CNT) << 4; /* cnt rolled over */
+
+ if (ibs_caps & IBS_CAPS_RDWROPCNT)
+ count += (config & IBS_OP_CUR_CNT) >> 32;
+
+ return count;
+}
+
+static void
+perf_ibs_event_update(struct perf_ibs *perf_ibs, struct perf_event *event,
+ u64 *config)
+{
+ u64 count = perf_ibs->get_count(*config);
+
+ /*
+ * Set width to 64 since we do not overflow on max width but
+ * instead on max count. In perf_ibs_set_period() we clear
+ * prev count manually on overflow.
+ */
+ while (!perf_event_try_update(event, count, 64)) {
+ rdmsrl(event->hw.config_base, *config);
+ count = perf_ibs->get_count(*config);
+ }
+}
+
+static inline void perf_ibs_enable_event(struct perf_ibs *perf_ibs,
+ struct hw_perf_event *hwc, u64 config)
+{
+ wrmsrl(hwc->config_base, hwc->config | config | perf_ibs->enable_mask);
+}
+
+/*
+ * Erratum #420 Instruction-Based Sampling Engine May Generate
+ * Interrupt that Cannot Be Cleared:
+ *
+ * Must clear counter mask first, then clear the enable bit. See
+ * Revision Guide for AMD Family 10h Processors, Publication #41322.
+ */
+static inline void perf_ibs_disable_event(struct perf_ibs *perf_ibs,
+ struct hw_perf_event *hwc, u64 config)
+{
+ config &= ~perf_ibs->cnt_mask;
+ wrmsrl(hwc->config_base, config);
+ config &= ~perf_ibs->enable_mask;
+ wrmsrl(hwc->config_base, config);
+}
+
+/*
+ * We cannot restore the ibs pmu state, so we always needs to update
+ * the event while stopping it and then reset the state when starting
+ * again. Thus, ignoring PERF_EF_RELOAD and PERF_EF_UPDATE flags in
+ * perf_ibs_start()/perf_ibs_stop() and instead always do it.
+ */
+static void perf_ibs_start(struct perf_event *event, int flags)
+{
+ struct hw_perf_event *hwc = &event->hw;
+ struct perf_ibs *perf_ibs = container_of(event->pmu, struct perf_ibs, pmu);
+ struct cpu_perf_ibs *pcpu = this_cpu_ptr(perf_ibs->pcpu);
+ u64 period;
+
+ if (WARN_ON_ONCE(!(hwc->state & PERF_HES_STOPPED)))
+ return;
+
+ WARN_ON_ONCE(!(hwc->state & PERF_HES_UPTODATE));
+ hwc->state = 0;
+
+ perf_ibs_set_period(perf_ibs, hwc, &period);
+ /*
+ * Set STARTED before enabling the hardware, such that a subsequent NMI
+ * must observe it.
+ */
+ set_bit(IBS_STARTED, pcpu->state);
+ clear_bit(IBS_STOPPING, pcpu->state);
+ perf_ibs_enable_event(perf_ibs, hwc, period >> 4);
+
+ perf_event_update_userpage(event);
+}
+
+static void perf_ibs_stop(struct perf_event *event, int flags)
+{
+ struct hw_perf_event *hwc = &event->hw;
+ struct perf_ibs *perf_ibs = container_of(event->pmu, struct perf_ibs, pmu);
+ struct cpu_perf_ibs *pcpu = this_cpu_ptr(perf_ibs->pcpu);
+ u64 config;
+ int stopping;
+
+ if (test_and_set_bit(IBS_STOPPING, pcpu->state))
+ return;
+
+ stopping = test_bit(IBS_STARTED, pcpu->state);
+
+ if (!stopping && (hwc->state & PERF_HES_UPTODATE))
+ return;
+
+ rdmsrl(hwc->config_base, config);
+
+ if (stopping) {
+ /*
+ * Set STOPPED before disabling the hardware, such that it
+ * must be visible to NMIs the moment we clear the EN bit,
+ * at which point we can generate an !VALID sample which
+ * we need to consume.
+ */
+ set_bit(IBS_STOPPED, pcpu->state);
+ perf_ibs_disable_event(perf_ibs, hwc, config);
+ /*
+ * Clear STARTED after disabling the hardware; if it were
+ * cleared before an NMI hitting after the clear but before
+ * clearing the EN bit might think it a spurious NMI and not
+ * handle it.
+ *
+ * Clearing it after, however, creates the problem of the NMI
+ * handler seeing STARTED but not having a valid sample.
+ */
+ clear_bit(IBS_STARTED, pcpu->state);
+ WARN_ON_ONCE(hwc->state & PERF_HES_STOPPED);
+ hwc->state |= PERF_HES_STOPPED;
+ }
+
+ if (hwc->state & PERF_HES_UPTODATE)
+ return;
+
+ /*
+ * Clear valid bit to not count rollovers on update, rollovers
+ * are only updated in the irq handler.
+ */
+ config &= ~perf_ibs->valid_mask;
+
+ perf_ibs_event_update(perf_ibs, event, &config);
+ hwc->state |= PERF_HES_UPTODATE;
+}
+
+static int perf_ibs_add(struct perf_event *event, int flags)
+{
+ struct perf_ibs *perf_ibs = container_of(event->pmu, struct perf_ibs, pmu);
+ struct cpu_perf_ibs *pcpu = this_cpu_ptr(perf_ibs->pcpu);
+
+ if (test_and_set_bit(IBS_ENABLED, pcpu->state))
+ return -ENOSPC;
+
+ event->hw.state = PERF_HES_UPTODATE | PERF_HES_STOPPED;
+
+ pcpu->event = event;
+
+ if (flags & PERF_EF_START)
+ perf_ibs_start(event, PERF_EF_RELOAD);
+
+ return 0;
+}
+
+static void perf_ibs_del(struct perf_event *event, int flags)
+{
+ struct perf_ibs *perf_ibs = container_of(event->pmu, struct perf_ibs, pmu);
+ struct cpu_perf_ibs *pcpu = this_cpu_ptr(perf_ibs->pcpu);
+
+ if (!test_and_clear_bit(IBS_ENABLED, pcpu->state))
+ return;
+
+ perf_ibs_stop(event, PERF_EF_UPDATE);
+
+ pcpu->event = NULL;
+
+ perf_event_update_userpage(event);
+}
+
+static void perf_ibs_read(struct perf_event *event) { }
+
+PMU_FORMAT_ATTR(rand_en, "config:57");
+PMU_FORMAT_ATTR(cnt_ctl, "config:19");
+
+static struct attribute *ibs_fetch_format_attrs[] = {
+ &format_attr_rand_en.attr,
+ NULL,
+};
+
+static struct attribute *ibs_op_format_attrs[] = {
+ NULL, /* &format_attr_cnt_ctl.attr if IBS_CAPS_OPCNT */
+ NULL,
+};
+
+static struct perf_ibs perf_ibs_fetch = {
+ .pmu = {
+ .task_ctx_nr = perf_invalid_context,
+
+ .event_init = perf_ibs_init,
+ .add = perf_ibs_add,
+ .del = perf_ibs_del,
+ .start = perf_ibs_start,
+ .stop = perf_ibs_stop,
+ .read = perf_ibs_read,
+ },
+ .msr = MSR_AMD64_IBSFETCHCTL,
+ .config_mask = IBS_FETCH_CONFIG_MASK,
+ .cnt_mask = IBS_FETCH_MAX_CNT,
+ .enable_mask = IBS_FETCH_ENABLE,
+ .valid_mask = IBS_FETCH_VAL,
+ .max_period = IBS_FETCH_MAX_CNT << 4,
+ .offset_mask = { MSR_AMD64_IBSFETCH_REG_MASK },
+ .offset_max = MSR_AMD64_IBSFETCH_REG_COUNT,
+ .format_attrs = ibs_fetch_format_attrs,
+
+ .get_count = get_ibs_fetch_count,
+};
+
+static struct perf_ibs perf_ibs_op = {
+ .pmu = {
+ .task_ctx_nr = perf_invalid_context,
+
+ .event_init = perf_ibs_init,
+ .add = perf_ibs_add,
+ .del = perf_ibs_del,
+ .start = perf_ibs_start,
+ .stop = perf_ibs_stop,
+ .read = perf_ibs_read,
+ },
+ .msr = MSR_AMD64_IBSOPCTL,
+ .config_mask = IBS_OP_CONFIG_MASK,
+ .cnt_mask = IBS_OP_MAX_CNT,
+ .enable_mask = IBS_OP_ENABLE,
+ .valid_mask = IBS_OP_VAL,
+ .max_period = IBS_OP_MAX_CNT << 4,
+ .offset_mask = { MSR_AMD64_IBSOP_REG_MASK },
+ .offset_max = MSR_AMD64_IBSOP_REG_COUNT,
+ .format_attrs = ibs_op_format_attrs,
+
+ .get_count = get_ibs_op_count,
+};
+
+static int perf_ibs_handle_irq(struct perf_ibs *perf_ibs, struct pt_regs *iregs)
+{
+ struct cpu_perf_ibs *pcpu = this_cpu_ptr(perf_ibs->pcpu);
+ struct perf_event *event = pcpu->event;
+ struct hw_perf_event *hwc = &event->hw;
+ struct perf_sample_data data;
+ struct perf_raw_record raw;
+ struct pt_regs regs;
+ struct perf_ibs_data ibs_data;
+ int offset, size, check_rip, offset_max, throttle = 0;
+ unsigned int msr;
+ u64 *buf, *config, period;
+
+ if (!test_bit(IBS_STARTED, pcpu->state)) {
+fail:
+ /*
+ * Catch spurious interrupts after stopping IBS: After
+ * disabling IBS there could be still incoming NMIs
+ * with samples that even have the valid bit cleared.
+ * Mark all this NMIs as handled.
+ */
+ if (test_and_clear_bit(IBS_STOPPED, pcpu->state))
+ return 1;
+
+ return 0;
+ }
+
+ msr = hwc->config_base;
+ buf = ibs_data.regs;
+ rdmsrl(msr, *buf);
+ if (!(*buf++ & perf_ibs->valid_mask))
+ goto fail;
+
+ config = &ibs_data.regs[0];
+ perf_ibs_event_update(perf_ibs, event, config);
+ perf_sample_data_init(&data, 0, hwc->last_period);
+ if (!perf_ibs_set_period(perf_ibs, hwc, &period))
+ goto out; /* no sw counter overflow */
+
+ ibs_data.caps = ibs_caps;
+ size = 1;
+ offset = 1;
+ check_rip = (perf_ibs == &perf_ibs_op && (ibs_caps & IBS_CAPS_RIPINVALIDCHK));
+ if (event->attr.sample_type & PERF_SAMPLE_RAW)
+ offset_max = perf_ibs->offset_max;
+ else if (check_rip)
+ offset_max = 2;
+ else
+ offset_max = 1;
+ do {
+ rdmsrl(msr + offset, *buf++);
+ size++;
+ offset = find_next_bit(perf_ibs->offset_mask,
+ perf_ibs->offset_max,
+ offset + 1);
+ } while (offset < offset_max);
+ if (event->attr.sample_type & PERF_SAMPLE_RAW) {
+ /*
+ * Read IbsBrTarget and IbsOpData4 separately
+ * depending on their availability.
+ * Can't add to offset_max as they are staggered
+ */
+ if (ibs_caps & IBS_CAPS_BRNTRGT) {
+ rdmsrl(MSR_AMD64_IBSBRTARGET, *buf++);
+ size++;
+ }
+ if (ibs_caps & IBS_CAPS_OPDATA4) {
+ rdmsrl(MSR_AMD64_IBSOPDATA4, *buf++);
+ size++;
+ }
+ }
+ ibs_data.size = sizeof(u64) * size;
+
+ regs = *iregs;
+ if (check_rip && (ibs_data.regs[2] & IBS_RIP_INVALID)) {
+ regs.flags &= ~PERF_EFLAGS_EXACT;
+ } else {
+ set_linear_ip(&regs, ibs_data.regs[1]);
+ regs.flags |= PERF_EFLAGS_EXACT;
+ }
+
+ if (event->attr.sample_type & PERF_SAMPLE_RAW) {
+ raw.size = sizeof(u32) + ibs_data.size;
+ raw.data = ibs_data.data;
+ data.raw = &raw;
+ }
+
+ throttle = perf_event_overflow(event, &data, &regs);
+out:
+ if (throttle)
+ perf_ibs_stop(event, 0);
+ else
+ perf_ibs_enable_event(perf_ibs, hwc, period >> 4);
+
+ perf_event_update_userpage(event);
+
+ return 1;
+}
+
+static int
+perf_ibs_nmi_handler(unsigned int cmd, struct pt_regs *regs)
+{
+ u64 stamp = sched_clock();
+ int handled = 0;
+
+ handled += perf_ibs_handle_irq(&perf_ibs_fetch, regs);
+ handled += perf_ibs_handle_irq(&perf_ibs_op, regs);
+
+ if (handled)
+ inc_irq_stat(apic_perf_irqs);
+
+ perf_sample_event_took(sched_clock() - stamp);
+
+ return handled;
+}
+NOKPROBE_SYMBOL(perf_ibs_nmi_handler);
+
+static __init int perf_ibs_pmu_init(struct perf_ibs *perf_ibs, char *name)
+{
+ struct cpu_perf_ibs __percpu *pcpu;
+ int ret;
+
+ pcpu = alloc_percpu(struct cpu_perf_ibs);
+ if (!pcpu)
+ return -ENOMEM;
+
+ perf_ibs->pcpu = pcpu;
+
+ /* register attributes */
+ if (perf_ibs->format_attrs[0]) {
+ memset(&perf_ibs->format_group, 0, sizeof(perf_ibs->format_group));
+ perf_ibs->format_group.name = "format";
+ perf_ibs->format_group.attrs = perf_ibs->format_attrs;
+
+ memset(&perf_ibs->attr_groups, 0, sizeof(perf_ibs->attr_groups));
+ perf_ibs->attr_groups[0] = &perf_ibs->format_group;
+ perf_ibs->pmu.attr_groups = perf_ibs->attr_groups;
+ }
+
+ ret = perf_pmu_register(&perf_ibs->pmu, name, -1);
+ if (ret) {
+ perf_ibs->pcpu = NULL;
+ free_percpu(pcpu);
+ }
+
+ return ret;
+}
+
+static __init int perf_event_ibs_init(void)
+{
+ struct attribute **attr = ibs_op_format_attrs;
+
+ if (!ibs_caps)
+ return -ENODEV; /* ibs not supported by the cpu */
+
+ perf_ibs_pmu_init(&perf_ibs_fetch, "ibs_fetch");
+
+ if (ibs_caps & IBS_CAPS_OPCNT) {
+ perf_ibs_op.config_mask |= IBS_OP_CNT_CTL;
+ *attr++ = &format_attr_cnt_ctl.attr;
+ }
+ perf_ibs_pmu_init(&perf_ibs_op, "ibs_op");
+
+ register_nmi_handler(NMI_LOCAL, perf_ibs_nmi_handler, 0, "perf_ibs");
+ pr_info("perf: AMD IBS detected (0x%08x)\n", ibs_caps);
+
+ return 0;
+}
+
+#else /* defined(CONFIG_PERF_EVENTS) && defined(CONFIG_CPU_SUP_AMD) */
+
+static __init int perf_event_ibs_init(void) { return 0; }
+
+#endif
+
+/* IBS - apic initialization, for perf and oprofile */
+
+static __init u32 __get_ibs_caps(void)
+{
+ u32 caps;
+ unsigned int max_level;
+
+ if (!boot_cpu_has(X86_FEATURE_IBS))
+ return 0;
+
+ /* check IBS cpuid feature flags */
+ max_level = cpuid_eax(0x80000000);
+ if (max_level < IBS_CPUID_FEATURES)
+ return IBS_CAPS_DEFAULT;
+
+ caps = cpuid_eax(IBS_CPUID_FEATURES);
+ if (!(caps & IBS_CAPS_AVAIL))
+ /* cpuid flags not valid */
+ return IBS_CAPS_DEFAULT;
+
+ return caps;
+}
+
+u32 get_ibs_caps(void)
+{
+ return ibs_caps;
+}
+
+EXPORT_SYMBOL(get_ibs_caps);
+
+static inline int get_eilvt(int offset)
+{
+ return !setup_APIC_eilvt(offset, 0, APIC_EILVT_MSG_NMI, 1);
+}
+
+static inline int put_eilvt(int offset)
+{
+ return !setup_APIC_eilvt(offset, 0, 0, 1);
+}
+
+/*
+ * Check and reserve APIC extended interrupt LVT offset for IBS if available.
+ */
+static inline int ibs_eilvt_valid(void)
+{
+ int offset;
+ u64 val;
+ int valid = 0;
+
+ preempt_disable();
+
+ rdmsrl(MSR_AMD64_IBSCTL, val);
+ offset = val & IBSCTL_LVT_OFFSET_MASK;
+
+ if (!(val & IBSCTL_LVT_OFFSET_VALID)) {
+ pr_err(FW_BUG "cpu %d, invalid IBS interrupt offset %d (MSR%08X=0x%016llx)\n",
+ smp_processor_id(), offset, MSR_AMD64_IBSCTL, val);
+ goto out;
+ }
+
+ if (!get_eilvt(offset)) {
+ pr_err(FW_BUG "cpu %d, IBS interrupt offset %d not available (MSR%08X=0x%016llx)\n",
+ smp_processor_id(), offset, MSR_AMD64_IBSCTL, val);
+ goto out;
+ }
+
+ valid = 1;
+out:
+ preempt_enable();
+
+ return valid;
+}
+
+static int setup_ibs_ctl(int ibs_eilvt_off)
+{
+ struct pci_dev *cpu_cfg;
+ int nodes;
+ u32 value = 0;
+
+ nodes = 0;
+ cpu_cfg = NULL;
+ do {
+ cpu_cfg = pci_get_device(PCI_VENDOR_ID_AMD,
+ PCI_DEVICE_ID_AMD_10H_NB_MISC,
+ cpu_cfg);
+ if (!cpu_cfg)
+ break;
+ ++nodes;
+ pci_write_config_dword(cpu_cfg, IBSCTL, ibs_eilvt_off
+ | IBSCTL_LVT_OFFSET_VALID);
+ pci_read_config_dword(cpu_cfg, IBSCTL, &value);
+ if (value != (ibs_eilvt_off | IBSCTL_LVT_OFFSET_VALID)) {
+ pci_dev_put(cpu_cfg);
+ pr_debug("Failed to setup IBS LVT offset, IBSCTL = 0x%08x\n",
+ value);
+ return -EINVAL;
+ }
+ } while (1);
+
+ if (!nodes) {
+ pr_debug("No CPU node configured for IBS\n");
+ return -ENODEV;
+ }
+
+ return 0;
+}
+
+/*
+ * This runs only on the current cpu. We try to find an LVT offset and
+ * setup the local APIC. For this we must disable preemption. On
+ * success we initialize all nodes with this offset. This updates then
+ * the offset in the IBS_CTL per-node msr. The per-core APIC setup of
+ * the IBS interrupt vector is handled by perf_ibs_cpu_notifier that
+ * is using the new offset.
+ */
+static void force_ibs_eilvt_setup(void)
+{
+ int offset;
+ int ret;
+
+ preempt_disable();
+ /* find the next free available EILVT entry, skip offset 0 */
+ for (offset = 1; offset < APIC_EILVT_NR_MAX; offset++) {
+ if (get_eilvt(offset))
+ break;
+ }
+ preempt_enable();
+
+ if (offset == APIC_EILVT_NR_MAX) {
+ pr_debug("No EILVT entry available\n");
+ return;
+ }
+
+ ret = setup_ibs_ctl(offset);
+ if (ret)
+ goto out;
+
+ if (!ibs_eilvt_valid())
+ goto out;
+
+ pr_info("IBS: LVT offset %d assigned\n", offset);
+
+ return;
+out:
+ preempt_disable();
+ put_eilvt(offset);
+ preempt_enable();
+ return;
+}
+
+static void ibs_eilvt_setup(void)
+{
+ /*
+ * Force LVT offset assignment for family 10h: The offsets are
+ * not assigned by the BIOS for this family, so the OS is
+ * responsible for doing it. If the OS assignment fails, fall
+ * back to BIOS settings and try to setup this.
+ */
+ if (boot_cpu_data.x86 == 0x10)
+ force_ibs_eilvt_setup();
+}
+
+static inline int get_ibs_lvt_offset(void)
+{
+ u64 val;
+
+ rdmsrl(MSR_AMD64_IBSCTL, val);
+ if (!(val & IBSCTL_LVT_OFFSET_VALID))
+ return -EINVAL;
+
+ return val & IBSCTL_LVT_OFFSET_MASK;
+}
+
+static void setup_APIC_ibs(void *dummy)
+{
+ int offset;
+
+ offset = get_ibs_lvt_offset();
+ if (offset < 0)
+ goto failed;
+
+ if (!setup_APIC_eilvt(offset, 0, APIC_EILVT_MSG_NMI, 0))
+ return;
+failed:
+ pr_warn("perf: IBS APIC setup failed on cpu #%d\n",
+ smp_processor_id());
+}
+
+static void clear_APIC_ibs(void *dummy)
+{
+ int offset;
+
+ offset = get_ibs_lvt_offset();
+ if (offset >= 0)
+ setup_APIC_eilvt(offset, 0, APIC_EILVT_MSG_FIX, 1);
+}
+
+#ifdef CONFIG_PM
+
+static int perf_ibs_suspend(void)
+{
+ clear_APIC_ibs(NULL);
+ return 0;
+}
+
+static void perf_ibs_resume(void)
+{
+ ibs_eilvt_setup();
+ setup_APIC_ibs(NULL);
+}
+
+static struct syscore_ops perf_ibs_syscore_ops = {
+ .resume = perf_ibs_resume,
+ .suspend = perf_ibs_suspend,
+};
+
+static void perf_ibs_pm_init(void)
+{
+ register_syscore_ops(&perf_ibs_syscore_ops);
+}
+
+#else
+
+static inline void perf_ibs_pm_init(void) { }
+
+#endif
+
+static int
+perf_ibs_cpu_notifier(struct notifier_block *self, unsigned long action, void *hcpu)
+{
+ switch (action & ~CPU_TASKS_FROZEN) {
+ case CPU_STARTING:
+ setup_APIC_ibs(NULL);
+ break;
+ case CPU_DYING:
+ clear_APIC_ibs(NULL);
+ break;
+ default:
+ break;
+ }
+
+ return NOTIFY_OK;
+}
+
+static __init int amd_ibs_init(void)
+{
+ u32 caps;
+ int ret = -EINVAL;
+
+ caps = __get_ibs_caps();
+ if (!caps)
+ return -ENODEV; /* ibs not supported by the cpu */
+
+ ibs_eilvt_setup();
+
+ if (!ibs_eilvt_valid())
+ goto out;
+
+ perf_ibs_pm_init();
+ cpu_notifier_register_begin();
+ ibs_caps = caps;
+ /* make ibs_caps visible to other cpus: */
+ smp_mb();
+ smp_call_function(setup_APIC_ibs, NULL, 1);
+ __perf_cpu_notifier(perf_ibs_cpu_notifier);
+ cpu_notifier_register_done();
+
+ ret = perf_event_ibs_init();
+out:
+ if (ret)
+ pr_err("Failed to setup IBS, %d\n", ret);
+ return ret;
+}
+
+/* Since we need the pci subsystem to init ibs we can't do this earlier: */
+device_initcall(amd_ibs_init);
diff --git a/arch/x86/events/amd/iommu.c b/arch/x86/events/amd/iommu.c
new file mode 100644
index 000000000..6011a573d
--- /dev/null
+++ b/arch/x86/events/amd/iommu.c
@@ -0,0 +1,505 @@
+/*
+ * Copyright (C) 2013 Advanced Micro Devices, Inc.
+ *
+ * Author: Steven Kinney <Steven.Kinney@amd.com>
+ * Author: Suravee Suthikulpanit <Suraveee.Suthikulpanit@amd.com>
+ *
+ * Perf: amd_iommu - AMD IOMMU Performance Counter PMU implementation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/perf_event.h>
+#include <linux/module.h>
+#include <linux/cpumask.h>
+#include <linux/slab.h>
+
+#include "../perf_event.h"
+#include "iommu.h"
+
+#define COUNTER_SHIFT 16
+
+#define _GET_BANK(ev) ((u8)(ev->hw.extra_reg.reg >> 8))
+#define _GET_CNTR(ev) ((u8)(ev->hw.extra_reg.reg))
+
+/* iommu pmu config masks */
+#define _GET_CSOURCE(ev) ((ev->hw.config & 0xFFULL))
+#define _GET_DEVID(ev) ((ev->hw.config >> 8) & 0xFFFFULL)
+#define _GET_PASID(ev) ((ev->hw.config >> 24) & 0xFFFFULL)
+#define _GET_DOMID(ev) ((ev->hw.config >> 40) & 0xFFFFULL)
+#define _GET_DEVID_MASK(ev) ((ev->hw.extra_reg.config) & 0xFFFFULL)
+#define _GET_PASID_MASK(ev) ((ev->hw.extra_reg.config >> 16) & 0xFFFFULL)
+#define _GET_DOMID_MASK(ev) ((ev->hw.extra_reg.config >> 32) & 0xFFFFULL)
+
+static struct perf_amd_iommu __perf_iommu;
+
+struct perf_amd_iommu {
+ struct pmu pmu;
+ u8 max_banks;
+ u8 max_counters;
+ u64 cntr_assign_mask;
+ raw_spinlock_t lock;
+ const struct attribute_group *attr_groups[4];
+};
+
+#define format_group attr_groups[0]
+#define cpumask_group attr_groups[1]
+#define events_group attr_groups[2]
+#define null_group attr_groups[3]
+
+/*---------------------------------------------
+ * sysfs format attributes
+ *---------------------------------------------*/
+PMU_FORMAT_ATTR(csource, "config:0-7");
+PMU_FORMAT_ATTR(devid, "config:8-23");
+PMU_FORMAT_ATTR(pasid, "config:24-39");
+PMU_FORMAT_ATTR(domid, "config:40-55");
+PMU_FORMAT_ATTR(devid_mask, "config1:0-15");
+PMU_FORMAT_ATTR(pasid_mask, "config1:16-31");
+PMU_FORMAT_ATTR(domid_mask, "config1:32-47");
+
+static struct attribute *iommu_format_attrs[] = {
+ &format_attr_csource.attr,
+ &format_attr_devid.attr,
+ &format_attr_pasid.attr,
+ &format_attr_domid.attr,
+ &format_attr_devid_mask.attr,
+ &format_attr_pasid_mask.attr,
+ &format_attr_domid_mask.attr,
+ NULL,
+};
+
+static struct attribute_group amd_iommu_format_group = {
+ .name = "format",
+ .attrs = iommu_format_attrs,
+};
+
+/*---------------------------------------------
+ * sysfs events attributes
+ *---------------------------------------------*/
+struct amd_iommu_event_desc {
+ struct kobj_attribute attr;
+ const char *event;
+};
+
+static ssize_t _iommu_event_show(struct kobject *kobj,
+ struct kobj_attribute *attr, char *buf)
+{
+ struct amd_iommu_event_desc *event =
+ container_of(attr, struct amd_iommu_event_desc, attr);
+ return sprintf(buf, "%s\n", event->event);
+}
+
+#define AMD_IOMMU_EVENT_DESC(_name, _event) \
+{ \
+ .attr = __ATTR(_name, 0444, _iommu_event_show, NULL), \
+ .event = _event, \
+}
+
+static struct amd_iommu_event_desc amd_iommu_v2_event_descs[] = {
+ AMD_IOMMU_EVENT_DESC(mem_pass_untrans, "csource=0x01"),
+ AMD_IOMMU_EVENT_DESC(mem_pass_pretrans, "csource=0x02"),
+ AMD_IOMMU_EVENT_DESC(mem_pass_excl, "csource=0x03"),
+ AMD_IOMMU_EVENT_DESC(mem_target_abort, "csource=0x04"),
+ AMD_IOMMU_EVENT_DESC(mem_trans_total, "csource=0x05"),
+ AMD_IOMMU_EVENT_DESC(mem_iommu_tlb_pte_hit, "csource=0x06"),
+ AMD_IOMMU_EVENT_DESC(mem_iommu_tlb_pte_mis, "csource=0x07"),
+ AMD_IOMMU_EVENT_DESC(mem_iommu_tlb_pde_hit, "csource=0x08"),
+ AMD_IOMMU_EVENT_DESC(mem_iommu_tlb_pde_mis, "csource=0x09"),
+ AMD_IOMMU_EVENT_DESC(mem_dte_hit, "csource=0x0a"),
+ AMD_IOMMU_EVENT_DESC(mem_dte_mis, "csource=0x0b"),
+ AMD_IOMMU_EVENT_DESC(page_tbl_read_tot, "csource=0x0c"),
+ AMD_IOMMU_EVENT_DESC(page_tbl_read_nst, "csource=0x0d"),
+ AMD_IOMMU_EVENT_DESC(page_tbl_read_gst, "csource=0x0e"),
+ AMD_IOMMU_EVENT_DESC(int_dte_hit, "csource=0x0f"),
+ AMD_IOMMU_EVENT_DESC(int_dte_mis, "csource=0x10"),
+ AMD_IOMMU_EVENT_DESC(cmd_processed, "csource=0x11"),
+ AMD_IOMMU_EVENT_DESC(cmd_processed_inv, "csource=0x12"),
+ AMD_IOMMU_EVENT_DESC(tlb_inv, "csource=0x13"),
+ AMD_IOMMU_EVENT_DESC(ign_rd_wr_mmio_1ff8h, "csource=0x14"),
+ AMD_IOMMU_EVENT_DESC(vapic_int_non_guest, "csource=0x15"),
+ AMD_IOMMU_EVENT_DESC(vapic_int_guest, "csource=0x16"),
+ AMD_IOMMU_EVENT_DESC(smi_recv, "csource=0x17"),
+ AMD_IOMMU_EVENT_DESC(smi_blk, "csource=0x18"),
+ { /* end: all zeroes */ },
+};
+
+/*---------------------------------------------
+ * sysfs cpumask attributes
+ *---------------------------------------------*/
+static cpumask_t iommu_cpumask;
+
+static ssize_t _iommu_cpumask_show(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ return cpumap_print_to_pagebuf(true, buf, &iommu_cpumask);
+}
+static DEVICE_ATTR(cpumask, S_IRUGO, _iommu_cpumask_show, NULL);
+
+static struct attribute *iommu_cpumask_attrs[] = {
+ &dev_attr_cpumask.attr,
+ NULL,
+};
+
+static struct attribute_group amd_iommu_cpumask_group = {
+ .attrs = iommu_cpumask_attrs,
+};
+
+/*---------------------------------------------*/
+
+static int get_next_avail_iommu_bnk_cntr(struct perf_amd_iommu *perf_iommu)
+{
+ unsigned long flags;
+ int shift, bank, cntr, retval;
+ int max_banks = perf_iommu->max_banks;
+ int max_cntrs = perf_iommu->max_counters;
+
+ raw_spin_lock_irqsave(&perf_iommu->lock, flags);
+
+ for (bank = 0, shift = 0; bank < max_banks; bank++) {
+ for (cntr = 0; cntr < max_cntrs; cntr++) {
+ shift = bank + (bank*3) + cntr;
+ if (perf_iommu->cntr_assign_mask & (1ULL<<shift)) {
+ continue;
+ } else {
+ perf_iommu->cntr_assign_mask |= (1ULL<<shift);
+ retval = ((u16)((u16)bank<<8) | (u8)(cntr));
+ goto out;
+ }
+ }
+ }
+ retval = -ENOSPC;
+out:
+ raw_spin_unlock_irqrestore(&perf_iommu->lock, flags);
+ return retval;
+}
+
+static int clear_avail_iommu_bnk_cntr(struct perf_amd_iommu *perf_iommu,
+ u8 bank, u8 cntr)
+{
+ unsigned long flags;
+ int max_banks, max_cntrs;
+ int shift = 0;
+
+ max_banks = perf_iommu->max_banks;
+ max_cntrs = perf_iommu->max_counters;
+
+ if ((bank > max_banks) || (cntr > max_cntrs))
+ return -EINVAL;
+
+ shift = bank + cntr + (bank*3);
+
+ raw_spin_lock_irqsave(&perf_iommu->lock, flags);
+ perf_iommu->cntr_assign_mask &= ~(1ULL<<shift);
+ raw_spin_unlock_irqrestore(&perf_iommu->lock, flags);
+
+ return 0;
+}
+
+static int perf_iommu_event_init(struct perf_event *event)
+{
+ struct hw_perf_event *hwc = &event->hw;
+ struct perf_amd_iommu *perf_iommu;
+ u64 config, config1;
+
+ /* test the event attr type check for PMU enumeration */
+ if (event->attr.type != event->pmu->type)
+ return -ENOENT;
+
+ /*
+ * IOMMU counters are shared across all cores.
+ * Therefore, it does not support per-process mode.
+ * Also, it does not support event sampling mode.
+ */
+ if (is_sampling_event(event) || event->attach_state & PERF_ATTACH_TASK)
+ return -EINVAL;
+
+ /* IOMMU counters do not have usr/os/guest/host bits */
+ if (event->attr.exclude_user || event->attr.exclude_kernel ||
+ event->attr.exclude_host || event->attr.exclude_guest)
+ return -EINVAL;
+
+ if (event->cpu < 0)
+ return -EINVAL;
+
+ perf_iommu = &__perf_iommu;
+
+ if (event->pmu != &perf_iommu->pmu)
+ return -ENOENT;
+
+ if (perf_iommu) {
+ config = event->attr.config;
+ config1 = event->attr.config1;
+ } else {
+ return -EINVAL;
+ }
+
+ /* integrate with iommu base devid (0000), assume one iommu */
+ perf_iommu->max_banks =
+ amd_iommu_pc_get_max_banks(IOMMU_BASE_DEVID);
+ perf_iommu->max_counters =
+ amd_iommu_pc_get_max_counters(IOMMU_BASE_DEVID);
+ if ((perf_iommu->max_banks == 0) || (perf_iommu->max_counters == 0))
+ return -EINVAL;
+
+ /* update the hw_perf_event struct with the iommu config data */
+ hwc->config = config;
+ hwc->extra_reg.config = config1;
+
+ return 0;
+}
+
+static void perf_iommu_enable_event(struct perf_event *ev)
+{
+ u8 csource = _GET_CSOURCE(ev);
+ u16 devid = _GET_DEVID(ev);
+ u64 reg = 0ULL;
+
+ reg = csource;
+ amd_iommu_pc_get_set_reg_val(devid,
+ _GET_BANK(ev), _GET_CNTR(ev) ,
+ IOMMU_PC_COUNTER_SRC_REG, &reg, true);
+
+ reg = 0ULL | devid | (_GET_DEVID_MASK(ev) << 32);
+ if (reg)
+ reg |= (1UL << 31);
+ amd_iommu_pc_get_set_reg_val(devid,
+ _GET_BANK(ev), _GET_CNTR(ev) ,
+ IOMMU_PC_DEVID_MATCH_REG, &reg, true);
+
+ reg = 0ULL | _GET_PASID(ev) | (_GET_PASID_MASK(ev) << 32);
+ if (reg)
+ reg |= (1UL << 31);
+ amd_iommu_pc_get_set_reg_val(devid,
+ _GET_BANK(ev), _GET_CNTR(ev) ,
+ IOMMU_PC_PASID_MATCH_REG, &reg, true);
+
+ reg = 0ULL | _GET_DOMID(ev) | (_GET_DOMID_MASK(ev) << 32);
+ if (reg)
+ reg |= (1UL << 31);
+ amd_iommu_pc_get_set_reg_val(devid,
+ _GET_BANK(ev), _GET_CNTR(ev) ,
+ IOMMU_PC_DOMID_MATCH_REG, &reg, true);
+}
+
+static void perf_iommu_disable_event(struct perf_event *event)
+{
+ u64 reg = 0ULL;
+
+ amd_iommu_pc_get_set_reg_val(_GET_DEVID(event),
+ _GET_BANK(event), _GET_CNTR(event),
+ IOMMU_PC_COUNTER_SRC_REG, &reg, true);
+}
+
+static void perf_iommu_start(struct perf_event *event, int flags)
+{
+ struct hw_perf_event *hwc = &event->hw;
+
+ pr_debug("perf: amd_iommu:perf_iommu_start\n");
+ if (WARN_ON_ONCE(!(hwc->state & PERF_HES_STOPPED)))
+ return;
+
+ WARN_ON_ONCE(!(hwc->state & PERF_HES_UPTODATE));
+ hwc->state = 0;
+
+ if (flags & PERF_EF_RELOAD) {
+ u64 prev_raw_count = local64_read(&hwc->prev_count);
+ amd_iommu_pc_get_set_reg_val(_GET_DEVID(event),
+ _GET_BANK(event), _GET_CNTR(event),
+ IOMMU_PC_COUNTER_REG, &prev_raw_count, true);
+ }
+
+ perf_iommu_enable_event(event);
+ perf_event_update_userpage(event);
+
+}
+
+static void perf_iommu_read(struct perf_event *event)
+{
+ u64 count = 0ULL;
+ u64 prev_raw_count = 0ULL;
+ u64 delta = 0ULL;
+ struct hw_perf_event *hwc = &event->hw;
+ pr_debug("perf: amd_iommu:perf_iommu_read\n");
+
+ amd_iommu_pc_get_set_reg_val(_GET_DEVID(event),
+ _GET_BANK(event), _GET_CNTR(event),
+ IOMMU_PC_COUNTER_REG, &count, false);
+
+ /* IOMMU pc counter register is only 48 bits */
+ count &= 0xFFFFFFFFFFFFULL;
+
+ prev_raw_count = local64_read(&hwc->prev_count);
+ if (local64_cmpxchg(&hwc->prev_count, prev_raw_count,
+ count) != prev_raw_count)
+ return;
+
+ /* Handling 48-bit counter overflowing */
+ delta = (count << COUNTER_SHIFT) - (prev_raw_count << COUNTER_SHIFT);
+ delta >>= COUNTER_SHIFT;
+ local64_add(delta, &event->count);
+
+}
+
+static void perf_iommu_stop(struct perf_event *event, int flags)
+{
+ struct hw_perf_event *hwc = &event->hw;
+ u64 config;
+
+ pr_debug("perf: amd_iommu:perf_iommu_stop\n");
+
+ if (hwc->state & PERF_HES_UPTODATE)
+ return;
+
+ perf_iommu_disable_event(event);
+ WARN_ON_ONCE(hwc->state & PERF_HES_STOPPED);
+ hwc->state |= PERF_HES_STOPPED;
+
+ if (hwc->state & PERF_HES_UPTODATE)
+ return;
+
+ config = hwc->config;
+ perf_iommu_read(event);
+ hwc->state |= PERF_HES_UPTODATE;
+}
+
+static int perf_iommu_add(struct perf_event *event, int flags)
+{
+ int retval;
+ struct perf_amd_iommu *perf_iommu =
+ container_of(event->pmu, struct perf_amd_iommu, pmu);
+
+ pr_debug("perf: amd_iommu:perf_iommu_add\n");
+ event->hw.state = PERF_HES_UPTODATE | PERF_HES_STOPPED;
+
+ /* request an iommu bank/counter */
+ retval = get_next_avail_iommu_bnk_cntr(perf_iommu);
+ if (retval != -ENOSPC)
+ event->hw.extra_reg.reg = (u16)retval;
+ else
+ return retval;
+
+ if (flags & PERF_EF_START)
+ perf_iommu_start(event, PERF_EF_RELOAD);
+
+ return 0;
+}
+
+static void perf_iommu_del(struct perf_event *event, int flags)
+{
+ struct perf_amd_iommu *perf_iommu =
+ container_of(event->pmu, struct perf_amd_iommu, pmu);
+
+ pr_debug("perf: amd_iommu:perf_iommu_del\n");
+ perf_iommu_stop(event, PERF_EF_UPDATE);
+
+ /* clear the assigned iommu bank/counter */
+ clear_avail_iommu_bnk_cntr(perf_iommu,
+ _GET_BANK(event),
+ _GET_CNTR(event));
+
+ perf_event_update_userpage(event);
+}
+
+static __init int _init_events_attrs(struct perf_amd_iommu *perf_iommu)
+{
+ struct attribute **attrs;
+ struct attribute_group *attr_group;
+ int i = 0, j;
+
+ while (amd_iommu_v2_event_descs[i].attr.attr.name)
+ i++;
+
+ attr_group = kzalloc(sizeof(struct attribute *)
+ * (i + 1) + sizeof(*attr_group), GFP_KERNEL);
+ if (!attr_group)
+ return -ENOMEM;
+
+ attrs = (struct attribute **)(attr_group + 1);
+ for (j = 0; j < i; j++)
+ attrs[j] = &amd_iommu_v2_event_descs[j].attr.attr;
+
+ attr_group->name = "events";
+ attr_group->attrs = attrs;
+ perf_iommu->events_group = attr_group;
+
+ return 0;
+}
+
+static __init void amd_iommu_pc_exit(void)
+{
+ if (__perf_iommu.events_group != NULL) {
+ kfree(__perf_iommu.events_group);
+ __perf_iommu.events_group = NULL;
+ }
+}
+
+static __init int _init_perf_amd_iommu(
+ struct perf_amd_iommu *perf_iommu, char *name)
+{
+ int ret;
+
+ raw_spin_lock_init(&perf_iommu->lock);
+
+ /* Init format attributes */
+ perf_iommu->format_group = &amd_iommu_format_group;
+
+ /* Init cpumask attributes to only core 0 */
+ cpumask_set_cpu(0, &iommu_cpumask);
+ perf_iommu->cpumask_group = &amd_iommu_cpumask_group;
+
+ /* Init events attributes */
+ if (_init_events_attrs(perf_iommu) != 0)
+ pr_err("perf: amd_iommu: Only support raw events.\n");
+
+ /* Init null attributes */
+ perf_iommu->null_group = NULL;
+ perf_iommu->pmu.attr_groups = perf_iommu->attr_groups;
+
+ ret = perf_pmu_register(&perf_iommu->pmu, name, -1);
+ if (ret) {
+ pr_err("perf: amd_iommu: Failed to initialized.\n");
+ amd_iommu_pc_exit();
+ } else {
+ pr_info("perf: amd_iommu: Detected. (%d banks, %d counters/bank)\n",
+ amd_iommu_pc_get_max_banks(IOMMU_BASE_DEVID),
+ amd_iommu_pc_get_max_counters(IOMMU_BASE_DEVID));
+ }
+
+ return ret;
+}
+
+static struct perf_amd_iommu __perf_iommu = {
+ .pmu = {
+ .task_ctx_nr = perf_invalid_context,
+ .event_init = perf_iommu_event_init,
+ .add = perf_iommu_add,
+ .del = perf_iommu_del,
+ .start = perf_iommu_start,
+ .stop = perf_iommu_stop,
+ .read = perf_iommu_read,
+ },
+ .max_banks = 0x00,
+ .max_counters = 0x00,
+ .cntr_assign_mask = 0ULL,
+ .format_group = NULL,
+ .cpumask_group = NULL,
+ .events_group = NULL,
+ .null_group = NULL,
+};
+
+static __init int amd_iommu_pc_init(void)
+{
+ /* Make sure the IOMMU PC resource is available */
+ if (!amd_iommu_pc_supported())
+ return -ENODEV;
+
+ _init_perf_amd_iommu(&__perf_iommu, "amd_iommu");
+
+ return 0;
+}
+
+device_initcall(amd_iommu_pc_init);
diff --git a/arch/x86/events/amd/iommu.h b/arch/x86/events/amd/iommu.h
new file mode 100644
index 000000000..845d17327
--- /dev/null
+++ b/arch/x86/events/amd/iommu.h
@@ -0,0 +1,40 @@
+/*
+ * Copyright (C) 2013 Advanced Micro Devices, Inc.
+ *
+ * Author: Steven Kinney <Steven.Kinney@amd.com>
+ * Author: Suravee Suthikulpanit <Suraveee.Suthikulpanit@amd.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef _PERF_EVENT_AMD_IOMMU_H_
+#define _PERF_EVENT_AMD_IOMMU_H_
+
+/* iommu pc mmio region register indexes */
+#define IOMMU_PC_COUNTER_REG 0x00
+#define IOMMU_PC_COUNTER_SRC_REG 0x08
+#define IOMMU_PC_PASID_MATCH_REG 0x10
+#define IOMMU_PC_DOMID_MATCH_REG 0x18
+#define IOMMU_PC_DEVID_MATCH_REG 0x20
+#define IOMMU_PC_COUNTER_REPORT_REG 0x28
+
+/* maximun specified bank/counters */
+#define PC_MAX_SPEC_BNKS 64
+#define PC_MAX_SPEC_CNTRS 16
+
+/* iommu pc reg masks*/
+#define IOMMU_BASE_DEVID 0x0000
+
+/* amd_iommu_init.c external support functions */
+extern bool amd_iommu_pc_supported(void);
+
+extern u8 amd_iommu_pc_get_max_banks(u16 devid);
+
+extern u8 amd_iommu_pc_get_max_counters(u16 devid);
+
+extern int amd_iommu_pc_get_set_reg_val(u16 devid, u8 bank, u8 cntr,
+ u8 fxn, u64 *value, bool is_write);
+
+#endif /*_PERF_EVENT_AMD_IOMMU_H_*/
diff --git a/arch/x86/events/amd/power.c b/arch/x86/events/amd/power.c
new file mode 100644
index 000000000..55a3529db
--- /dev/null
+++ b/arch/x86/events/amd/power.c
@@ -0,0 +1,353 @@
+/*
+ * Performance events - AMD Processor Power Reporting Mechanism
+ *
+ * Copyright (C) 2016 Advanced Micro Devices, Inc.
+ *
+ * Author: Huang Rui <ray.huang@amd.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/perf_event.h>
+#include <asm/cpu_device_id.h>
+#include "../perf_event.h"
+
+#define MSR_F15H_CU_PWR_ACCUMULATOR 0xc001007a
+#define MSR_F15H_CU_MAX_PWR_ACCUMULATOR 0xc001007b
+#define MSR_F15H_PTSC 0xc0010280
+
+/* Event code: LSB 8 bits, passed in attr->config any other bit is reserved. */
+#define AMD_POWER_EVENT_MASK 0xFFULL
+
+/*
+ * Accumulated power status counters.
+ */
+#define AMD_POWER_EVENTSEL_PKG 1
+
+/*
+ * The ratio of compute unit power accumulator sample period to the
+ * PTSC period.
+ */
+static unsigned int cpu_pwr_sample_ratio;
+
+/* Maximum accumulated power of a compute unit. */
+static u64 max_cu_acc_power;
+
+static struct pmu pmu_class;
+
+/*
+ * Accumulated power represents the sum of each compute unit's (CU) power
+ * consumption. On any core of each CU we read the total accumulated power from
+ * MSR_F15H_CU_PWR_ACCUMULATOR. cpu_mask represents CPU bit map of all cores
+ * which are picked to measure the power for the CUs they belong to.
+ */
+static cpumask_t cpu_mask;
+
+static void event_update(struct perf_event *event)
+{
+ struct hw_perf_event *hwc = &event->hw;
+ u64 prev_pwr_acc, new_pwr_acc, prev_ptsc, new_ptsc;
+ u64 delta, tdelta;
+
+ prev_pwr_acc = hwc->pwr_acc;
+ prev_ptsc = hwc->ptsc;
+ rdmsrl(MSR_F15H_CU_PWR_ACCUMULATOR, new_pwr_acc);
+ rdmsrl(MSR_F15H_PTSC, new_ptsc);
+
+ /*
+ * Calculate the CU power consumption over a time period, the unit of
+ * final value (delta) is micro-Watts. Then add it to the event count.
+ */
+ if (new_pwr_acc < prev_pwr_acc) {
+ delta = max_cu_acc_power + new_pwr_acc;
+ delta -= prev_pwr_acc;
+ } else
+ delta = new_pwr_acc - prev_pwr_acc;
+
+ delta *= cpu_pwr_sample_ratio * 1000;
+ tdelta = new_ptsc - prev_ptsc;
+
+ do_div(delta, tdelta);
+ local64_add(delta, &event->count);
+}
+
+static void __pmu_event_start(struct perf_event *event)
+{
+ if (WARN_ON_ONCE(!(event->hw.state & PERF_HES_STOPPED)))
+ return;
+
+ event->hw.state = 0;
+
+ rdmsrl(MSR_F15H_PTSC, event->hw.ptsc);
+ rdmsrl(MSR_F15H_CU_PWR_ACCUMULATOR, event->hw.pwr_acc);
+}
+
+static void pmu_event_start(struct perf_event *event, int mode)
+{
+ __pmu_event_start(event);
+}
+
+static void pmu_event_stop(struct perf_event *event, int mode)
+{
+ struct hw_perf_event *hwc = &event->hw;
+
+ /* Mark event as deactivated and stopped. */
+ if (!(hwc->state & PERF_HES_STOPPED))
+ hwc->state |= PERF_HES_STOPPED;
+
+ /* Check if software counter update is necessary. */
+ if ((mode & PERF_EF_UPDATE) && !(hwc->state & PERF_HES_UPTODATE)) {
+ /*
+ * Drain the remaining delta count out of an event
+ * that we are disabling:
+ */
+ event_update(event);
+ hwc->state |= PERF_HES_UPTODATE;
+ }
+}
+
+static int pmu_event_add(struct perf_event *event, int mode)
+{
+ struct hw_perf_event *hwc = &event->hw;
+
+ hwc->state = PERF_HES_UPTODATE | PERF_HES_STOPPED;
+
+ if (mode & PERF_EF_START)
+ __pmu_event_start(event);
+
+ return 0;
+}
+
+static void pmu_event_del(struct perf_event *event, int flags)
+{
+ pmu_event_stop(event, PERF_EF_UPDATE);
+}
+
+static int pmu_event_init(struct perf_event *event)
+{
+ u64 cfg = event->attr.config & AMD_POWER_EVENT_MASK;
+
+ /* Only look at AMD power events. */
+ if (event->attr.type != pmu_class.type)
+ return -ENOENT;
+
+ /* Unsupported modes and filters. */
+ if (event->attr.exclude_user ||
+ event->attr.exclude_kernel ||
+ event->attr.exclude_hv ||
+ event->attr.exclude_idle ||
+ event->attr.exclude_host ||
+ event->attr.exclude_guest ||
+ /* no sampling */
+ event->attr.sample_period)
+ return -EINVAL;
+
+ if (cfg != AMD_POWER_EVENTSEL_PKG)
+ return -EINVAL;
+
+ return 0;
+}
+
+static void pmu_event_read(struct perf_event *event)
+{
+ event_update(event);
+}
+
+static ssize_t
+get_attr_cpumask(struct device *dev, struct device_attribute *attr, char *buf)
+{
+ return cpumap_print_to_pagebuf(true, buf, &cpu_mask);
+}
+
+static DEVICE_ATTR(cpumask, S_IRUGO, get_attr_cpumask, NULL);
+
+static struct attribute *pmu_attrs[] = {
+ &dev_attr_cpumask.attr,
+ NULL,
+};
+
+static struct attribute_group pmu_attr_group = {
+ .attrs = pmu_attrs,
+};
+
+/*
+ * Currently it only supports to report the power of each
+ * processor/package.
+ */
+EVENT_ATTR_STR(power-pkg, power_pkg, "event=0x01");
+
+EVENT_ATTR_STR(power-pkg.unit, power_pkg_unit, "mWatts");
+
+/* Convert the count from micro-Watts to milli-Watts. */
+EVENT_ATTR_STR(power-pkg.scale, power_pkg_scale, "1.000000e-3");
+
+static struct attribute *events_attr[] = {
+ EVENT_PTR(power_pkg),
+ EVENT_PTR(power_pkg_unit),
+ EVENT_PTR(power_pkg_scale),
+ NULL,
+};
+
+static struct attribute_group pmu_events_group = {
+ .name = "events",
+ .attrs = events_attr,
+};
+
+PMU_FORMAT_ATTR(event, "config:0-7");
+
+static struct attribute *formats_attr[] = {
+ &format_attr_event.attr,
+ NULL,
+};
+
+static struct attribute_group pmu_format_group = {
+ .name = "format",
+ .attrs = formats_attr,
+};
+
+static const struct attribute_group *attr_groups[] = {
+ &pmu_attr_group,
+ &pmu_format_group,
+ &pmu_events_group,
+ NULL,
+};
+
+static struct pmu pmu_class = {
+ .attr_groups = attr_groups,
+ /* system-wide only */
+ .task_ctx_nr = perf_invalid_context,
+ .event_init = pmu_event_init,
+ .add = pmu_event_add,
+ .del = pmu_event_del,
+ .start = pmu_event_start,
+ .stop = pmu_event_stop,
+ .read = pmu_event_read,
+};
+
+static void power_cpu_exit(int cpu)
+{
+ int target;
+
+ if (!cpumask_test_and_clear_cpu(cpu, &cpu_mask))
+ return;
+
+ /*
+ * Find a new CPU on the same compute unit, if was set in cpumask
+ * and still some CPUs on compute unit. Then migrate event and
+ * context to new CPU.
+ */
+ target = cpumask_any_but(topology_sibling_cpumask(cpu), cpu);
+ if (target < nr_cpumask_bits) {
+ cpumask_set_cpu(target, &cpu_mask);
+ perf_pmu_migrate_context(&pmu_class, cpu, target);
+ }
+}
+
+static void power_cpu_init(int cpu)
+{
+ int target;
+
+ /*
+ * 1) If any CPU is set at cpu_mask in the same compute unit, do
+ * nothing.
+ * 2) If no CPU is set at cpu_mask in the same compute unit,
+ * set current STARTING CPU.
+ *
+ * Note: if there is a CPU aside of the new one already in the
+ * sibling mask, then it is also in cpu_mask.
+ */
+ target = cpumask_any_but(topology_sibling_cpumask(cpu), cpu);
+ if (target >= nr_cpumask_bits)
+ cpumask_set_cpu(cpu, &cpu_mask);
+}
+
+static int
+power_cpu_notifier(struct notifier_block *self, unsigned long action, void *hcpu)
+{
+ unsigned int cpu = (long)hcpu;
+
+ switch (action & ~CPU_TASKS_FROZEN) {
+ case CPU_DOWN_FAILED:
+ case CPU_STARTING:
+ power_cpu_init(cpu);
+ break;
+ case CPU_DOWN_PREPARE:
+ power_cpu_exit(cpu);
+ break;
+ default:
+ break;
+ }
+
+ return NOTIFY_OK;
+}
+
+static struct notifier_block power_cpu_notifier_nb = {
+ .notifier_call = power_cpu_notifier,
+ .priority = CPU_PRI_PERF,
+};
+
+static const struct x86_cpu_id cpu_match[] = {
+ { .vendor = X86_VENDOR_AMD, .family = 0x15 },
+ {},
+};
+
+static int __init amd_power_pmu_init(void)
+{
+ int cpu, target, ret;
+
+ if (!x86_match_cpu(cpu_match))
+ return 0;
+
+ if (!boot_cpu_has(X86_FEATURE_ACC_POWER))
+ return -ENODEV;
+
+ cpu_pwr_sample_ratio = cpuid_ecx(0x80000007);
+
+ if (rdmsrl_safe(MSR_F15H_CU_MAX_PWR_ACCUMULATOR, &max_cu_acc_power)) {
+ pr_err("Failed to read max compute unit power accumulator MSR\n");
+ return -ENODEV;
+ }
+
+ cpu_notifier_register_begin();
+
+ /* Choose one online core of each compute unit. */
+ for_each_online_cpu(cpu) {
+ target = cpumask_first(topology_sibling_cpumask(cpu));
+ if (!cpumask_test_cpu(target, &cpu_mask))
+ cpumask_set_cpu(target, &cpu_mask);
+ }
+
+ ret = perf_pmu_register(&pmu_class, "power", -1);
+ if (WARN_ON(ret)) {
+ pr_warn("AMD Power PMU registration failed\n");
+ goto out;
+ }
+
+ __register_cpu_notifier(&power_cpu_notifier_nb);
+
+ pr_info("AMD Power PMU detected\n");
+
+out:
+ cpu_notifier_register_done();
+
+ return ret;
+}
+module_init(amd_power_pmu_init);
+
+static void __exit amd_power_pmu_exit(void)
+{
+ cpu_notifier_register_begin();
+ __unregister_cpu_notifier(&power_cpu_notifier_nb);
+ cpu_notifier_register_done();
+
+ perf_pmu_unregister(&pmu_class);
+}
+module_exit(amd_power_pmu_exit);
+
+MODULE_AUTHOR("Huang Rui <ray.huang@amd.com>");
+MODULE_DESCRIPTION("AMD Processor Power Reporting Mechanism");
+MODULE_LICENSE("GPL v2");
diff --git a/arch/x86/events/amd/uncore.c b/arch/x86/events/amd/uncore.c
new file mode 100644
index 000000000..3db9569e6
--- /dev/null
+++ b/arch/x86/events/amd/uncore.c
@@ -0,0 +1,603 @@
+/*
+ * Copyright (C) 2013 Advanced Micro Devices, Inc.
+ *
+ * Author: Jacob Shin <jacob.shin@amd.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/perf_event.h>
+#include <linux/percpu.h>
+#include <linux/types.h>
+#include <linux/slab.h>
+#include <linux/init.h>
+#include <linux/cpu.h>
+#include <linux/cpumask.h>
+
+#include <asm/cpufeature.h>
+#include <asm/perf_event.h>
+#include <asm/msr.h>
+
+#define NUM_COUNTERS_NB 4
+#define NUM_COUNTERS_L2 4
+#define MAX_COUNTERS NUM_COUNTERS_NB
+
+#define RDPMC_BASE_NB 6
+#define RDPMC_BASE_L2 10
+
+#define COUNTER_SHIFT 16
+
+struct amd_uncore {
+ int id;
+ int refcnt;
+ int cpu;
+ int num_counters;
+ int rdpmc_base;
+ u32 msr_base;
+ cpumask_t *active_mask;
+ struct pmu *pmu;
+ struct perf_event *events[MAX_COUNTERS];
+ struct amd_uncore *free_when_cpu_online;
+};
+
+static struct amd_uncore * __percpu *amd_uncore_nb;
+static struct amd_uncore * __percpu *amd_uncore_l2;
+
+static struct pmu amd_nb_pmu;
+static struct pmu amd_l2_pmu;
+
+static cpumask_t amd_nb_active_mask;
+static cpumask_t amd_l2_active_mask;
+
+static bool is_nb_event(struct perf_event *event)
+{
+ return event->pmu->type == amd_nb_pmu.type;
+}
+
+static bool is_l2_event(struct perf_event *event)
+{
+ return event->pmu->type == amd_l2_pmu.type;
+}
+
+static struct amd_uncore *event_to_amd_uncore(struct perf_event *event)
+{
+ if (is_nb_event(event) && amd_uncore_nb)
+ return *per_cpu_ptr(amd_uncore_nb, event->cpu);
+ else if (is_l2_event(event) && amd_uncore_l2)
+ return *per_cpu_ptr(amd_uncore_l2, event->cpu);
+
+ return NULL;
+}
+
+static void amd_uncore_read(struct perf_event *event)
+{
+ struct hw_perf_event *hwc = &event->hw;
+ u64 prev, new;
+ s64 delta;
+
+ /*
+ * since we do not enable counter overflow interrupts,
+ * we do not have to worry about prev_count changing on us
+ */
+
+ prev = local64_read(&hwc->prev_count);
+ rdpmcl(hwc->event_base_rdpmc, new);
+ local64_set(&hwc->prev_count, new);
+ delta = (new << COUNTER_SHIFT) - (prev << COUNTER_SHIFT);
+ delta >>= COUNTER_SHIFT;
+ local64_add(delta, &event->count);
+}
+
+static void amd_uncore_start(struct perf_event *event, int flags)
+{
+ struct hw_perf_event *hwc = &event->hw;
+
+ if (flags & PERF_EF_RELOAD)
+ wrmsrl(hwc->event_base, (u64)local64_read(&hwc->prev_count));
+
+ hwc->state = 0;
+ wrmsrl(hwc->config_base, (hwc->config | ARCH_PERFMON_EVENTSEL_ENABLE));
+ perf_event_update_userpage(event);
+}
+
+static void amd_uncore_stop(struct perf_event *event, int flags)
+{
+ struct hw_perf_event *hwc = &event->hw;
+
+ wrmsrl(hwc->config_base, hwc->config);
+ hwc->state |= PERF_HES_STOPPED;
+
+ if ((flags & PERF_EF_UPDATE) && !(hwc->state & PERF_HES_UPTODATE)) {
+ amd_uncore_read(event);
+ hwc->state |= PERF_HES_UPTODATE;
+ }
+}
+
+static int amd_uncore_add(struct perf_event *event, int flags)
+{
+ int i;
+ struct amd_uncore *uncore = event_to_amd_uncore(event);
+ struct hw_perf_event *hwc = &event->hw;
+
+ /* are we already assigned? */
+ if (hwc->idx != -1 && uncore->events[hwc->idx] == event)
+ goto out;
+
+ for (i = 0; i < uncore->num_counters; i++) {
+ if (uncore->events[i] == event) {
+ hwc->idx = i;
+ goto out;
+ }
+ }
+
+ /* if not, take the first available counter */
+ hwc->idx = -1;
+ for (i = 0; i < uncore->num_counters; i++) {
+ if (cmpxchg(&uncore->events[i], NULL, event) == NULL) {
+ hwc->idx = i;
+ break;
+ }
+ }
+
+out:
+ if (hwc->idx == -1)
+ return -EBUSY;
+
+ hwc->config_base = uncore->msr_base + (2 * hwc->idx);
+ hwc->event_base = uncore->msr_base + 1 + (2 * hwc->idx);
+ hwc->event_base_rdpmc = uncore->rdpmc_base + hwc->idx;
+ hwc->state = PERF_HES_UPTODATE | PERF_HES_STOPPED;
+
+ if (flags & PERF_EF_START)
+ amd_uncore_start(event, PERF_EF_RELOAD);
+
+ return 0;
+}
+
+static void amd_uncore_del(struct perf_event *event, int flags)
+{
+ int i;
+ struct amd_uncore *uncore = event_to_amd_uncore(event);
+ struct hw_perf_event *hwc = &event->hw;
+
+ amd_uncore_stop(event, PERF_EF_UPDATE);
+
+ for (i = 0; i < uncore->num_counters; i++) {
+ if (cmpxchg(&uncore->events[i], event, NULL) == event)
+ break;
+ }
+
+ hwc->idx = -1;
+}
+
+static int amd_uncore_event_init(struct perf_event *event)
+{
+ struct amd_uncore *uncore;
+ struct hw_perf_event *hwc = &event->hw;
+
+ if (event->attr.type != event->pmu->type)
+ return -ENOENT;
+
+ /*
+ * NB and L2 counters (MSRs) are shared across all cores that share the
+ * same NB / L2 cache. Interrupts can be directed to a single target
+ * core, however, event counts generated by processes running on other
+ * cores cannot be masked out. So we do not support sampling and
+ * per-thread events.
+ */
+ if (is_sampling_event(event) || event->attach_state & PERF_ATTACH_TASK)
+ return -EINVAL;
+
+ /* NB and L2 counters do not have usr/os/guest/host bits */
+ if (event->attr.exclude_user || event->attr.exclude_kernel ||
+ event->attr.exclude_host || event->attr.exclude_guest)
+ return -EINVAL;
+
+ /* and we do not enable counter overflow interrupts */
+ hwc->config = event->attr.config & AMD64_RAW_EVENT_MASK_NB;
+ hwc->idx = -1;
+
+ if (event->cpu < 0)
+ return -EINVAL;
+
+ uncore = event_to_amd_uncore(event);
+ if (!uncore)
+ return -ENODEV;
+
+ /*
+ * since request can come in to any of the shared cores, we will remap
+ * to a single common cpu.
+ */
+ event->cpu = uncore->cpu;
+
+ return 0;
+}
+
+static ssize_t amd_uncore_attr_show_cpumask(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ cpumask_t *active_mask;
+ struct pmu *pmu = dev_get_drvdata(dev);
+
+ if (pmu->type == amd_nb_pmu.type)
+ active_mask = &amd_nb_active_mask;
+ else if (pmu->type == amd_l2_pmu.type)
+ active_mask = &amd_l2_active_mask;
+ else
+ return 0;
+
+ return cpumap_print_to_pagebuf(true, buf, active_mask);
+}
+static DEVICE_ATTR(cpumask, S_IRUGO, amd_uncore_attr_show_cpumask, NULL);
+
+static struct attribute *amd_uncore_attrs[] = {
+ &dev_attr_cpumask.attr,
+ NULL,
+};
+
+static struct attribute_group amd_uncore_attr_group = {
+ .attrs = amd_uncore_attrs,
+};
+
+PMU_FORMAT_ATTR(event, "config:0-7,32-35");
+PMU_FORMAT_ATTR(umask, "config:8-15");
+
+static struct attribute *amd_uncore_format_attr[] = {
+ &format_attr_event.attr,
+ &format_attr_umask.attr,
+ NULL,
+};
+
+static struct attribute_group amd_uncore_format_group = {
+ .name = "format",
+ .attrs = amd_uncore_format_attr,
+};
+
+static const struct attribute_group *amd_uncore_attr_groups[] = {
+ &amd_uncore_attr_group,
+ &amd_uncore_format_group,
+ NULL,
+};
+
+static struct pmu amd_nb_pmu = {
+ .attr_groups = amd_uncore_attr_groups,
+ .name = "amd_nb",
+ .event_init = amd_uncore_event_init,
+ .add = amd_uncore_add,
+ .del = amd_uncore_del,
+ .start = amd_uncore_start,
+ .stop = amd_uncore_stop,
+ .read = amd_uncore_read,
+};
+
+static struct pmu amd_l2_pmu = {
+ .attr_groups = amd_uncore_attr_groups,
+ .name = "amd_l2",
+ .event_init = amd_uncore_event_init,
+ .add = amd_uncore_add,
+ .del = amd_uncore_del,
+ .start = amd_uncore_start,
+ .stop = amd_uncore_stop,
+ .read = amd_uncore_read,
+};
+
+static struct amd_uncore *amd_uncore_alloc(unsigned int cpu)
+{
+ return kzalloc_node(sizeof(struct amd_uncore), GFP_KERNEL,
+ cpu_to_node(cpu));
+}
+
+static int amd_uncore_cpu_up_prepare(unsigned int cpu)
+{
+ struct amd_uncore *uncore_nb = NULL, *uncore_l2;
+
+ if (amd_uncore_nb) {
+ uncore_nb = amd_uncore_alloc(cpu);
+ if (!uncore_nb)
+ goto fail;
+ uncore_nb->cpu = cpu;
+ uncore_nb->num_counters = NUM_COUNTERS_NB;
+ uncore_nb->rdpmc_base = RDPMC_BASE_NB;
+ uncore_nb->msr_base = MSR_F15H_NB_PERF_CTL;
+ uncore_nb->active_mask = &amd_nb_active_mask;
+ uncore_nb->pmu = &amd_nb_pmu;
+ *per_cpu_ptr(amd_uncore_nb, cpu) = uncore_nb;
+ }
+
+ if (amd_uncore_l2) {
+ uncore_l2 = amd_uncore_alloc(cpu);
+ if (!uncore_l2)
+ goto fail;
+ uncore_l2->cpu = cpu;
+ uncore_l2->num_counters = NUM_COUNTERS_L2;
+ uncore_l2->rdpmc_base = RDPMC_BASE_L2;
+ uncore_l2->msr_base = MSR_F16H_L2I_PERF_CTL;
+ uncore_l2->active_mask = &amd_l2_active_mask;
+ uncore_l2->pmu = &amd_l2_pmu;
+ *per_cpu_ptr(amd_uncore_l2, cpu) = uncore_l2;
+ }
+
+ return 0;
+
+fail:
+ if (amd_uncore_nb)
+ *per_cpu_ptr(amd_uncore_nb, cpu) = NULL;
+ kfree(uncore_nb);
+ return -ENOMEM;
+}
+
+static struct amd_uncore *
+amd_uncore_find_online_sibling(struct amd_uncore *this,
+ struct amd_uncore * __percpu *uncores)
+{
+ unsigned int cpu;
+ struct amd_uncore *that;
+
+ for_each_online_cpu(cpu) {
+ that = *per_cpu_ptr(uncores, cpu);
+
+ if (!that)
+ continue;
+
+ if (this == that)
+ continue;
+
+ if (this->id == that->id) {
+ that->free_when_cpu_online = this;
+ this = that;
+ break;
+ }
+ }
+
+ this->refcnt++;
+ return this;
+}
+
+static void amd_uncore_cpu_starting(unsigned int cpu)
+{
+ unsigned int eax, ebx, ecx, edx;
+ struct amd_uncore *uncore;
+
+ if (amd_uncore_nb) {
+ uncore = *per_cpu_ptr(amd_uncore_nb, cpu);
+ cpuid(0x8000001e, &eax, &ebx, &ecx, &edx);
+ uncore->id = ecx & 0xff;
+
+ uncore = amd_uncore_find_online_sibling(uncore, amd_uncore_nb);
+ *per_cpu_ptr(amd_uncore_nb, cpu) = uncore;
+ }
+
+ if (amd_uncore_l2) {
+ unsigned int apicid = cpu_data(cpu).apicid;
+ unsigned int nshared;
+
+ uncore = *per_cpu_ptr(amd_uncore_l2, cpu);
+ cpuid_count(0x8000001d, 2, &eax, &ebx, &ecx, &edx);
+ nshared = ((eax >> 14) & 0xfff) + 1;
+ uncore->id = apicid - (apicid % nshared);
+
+ uncore = amd_uncore_find_online_sibling(uncore, amd_uncore_l2);
+ *per_cpu_ptr(amd_uncore_l2, cpu) = uncore;
+ }
+}
+
+static void uncore_online(unsigned int cpu,
+ struct amd_uncore * __percpu *uncores)
+{
+ struct amd_uncore *uncore = *per_cpu_ptr(uncores, cpu);
+
+ kfree(uncore->free_when_cpu_online);
+ uncore->free_when_cpu_online = NULL;
+
+ if (cpu == uncore->cpu)
+ cpumask_set_cpu(cpu, uncore->active_mask);
+}
+
+static void amd_uncore_cpu_online(unsigned int cpu)
+{
+ if (amd_uncore_nb)
+ uncore_online(cpu, amd_uncore_nb);
+
+ if (amd_uncore_l2)
+ uncore_online(cpu, amd_uncore_l2);
+}
+
+static void uncore_down_prepare(unsigned int cpu,
+ struct amd_uncore * __percpu *uncores)
+{
+ unsigned int i;
+ struct amd_uncore *this = *per_cpu_ptr(uncores, cpu);
+
+ if (this->cpu != cpu)
+ return;
+
+ /* this cpu is going down, migrate to a shared sibling if possible */
+ for_each_online_cpu(i) {
+ struct amd_uncore *that = *per_cpu_ptr(uncores, i);
+
+ if (cpu == i)
+ continue;
+
+ if (this == that) {
+ perf_pmu_migrate_context(this->pmu, cpu, i);
+ cpumask_clear_cpu(cpu, that->active_mask);
+ cpumask_set_cpu(i, that->active_mask);
+ that->cpu = i;
+ break;
+ }
+ }
+}
+
+static void amd_uncore_cpu_down_prepare(unsigned int cpu)
+{
+ if (amd_uncore_nb)
+ uncore_down_prepare(cpu, amd_uncore_nb);
+
+ if (amd_uncore_l2)
+ uncore_down_prepare(cpu, amd_uncore_l2);
+}
+
+static void uncore_dead(unsigned int cpu, struct amd_uncore * __percpu *uncores)
+{
+ struct amd_uncore *uncore = *per_cpu_ptr(uncores, cpu);
+
+ if (cpu == uncore->cpu)
+ cpumask_clear_cpu(cpu, uncore->active_mask);
+
+ if (!--uncore->refcnt)
+ kfree(uncore);
+ *per_cpu_ptr(uncores, cpu) = NULL;
+}
+
+static void amd_uncore_cpu_dead(unsigned int cpu)
+{
+ if (amd_uncore_nb)
+ uncore_dead(cpu, amd_uncore_nb);
+
+ if (amd_uncore_l2)
+ uncore_dead(cpu, amd_uncore_l2);
+}
+
+static int
+amd_uncore_cpu_notifier(struct notifier_block *self, unsigned long action,
+ void *hcpu)
+{
+ unsigned int cpu = (long)hcpu;
+
+ switch (action & ~CPU_TASKS_FROZEN) {
+ case CPU_UP_PREPARE:
+ if (amd_uncore_cpu_up_prepare(cpu))
+ return notifier_from_errno(-ENOMEM);
+ break;
+
+ case CPU_STARTING:
+ amd_uncore_cpu_starting(cpu);
+ break;
+
+ case CPU_ONLINE:
+ amd_uncore_cpu_online(cpu);
+ break;
+
+ case CPU_DOWN_PREPARE:
+ amd_uncore_cpu_down_prepare(cpu);
+ break;
+
+ case CPU_UP_CANCELED:
+ case CPU_DEAD:
+ amd_uncore_cpu_dead(cpu);
+ break;
+
+ default:
+ break;
+ }
+
+ return NOTIFY_OK;
+}
+
+static struct notifier_block amd_uncore_cpu_notifier_block = {
+ .notifier_call = amd_uncore_cpu_notifier,
+ .priority = CPU_PRI_PERF + 1,
+};
+
+static void __init init_cpu_already_online(void *dummy)
+{
+ unsigned int cpu = smp_processor_id();
+
+ amd_uncore_cpu_starting(cpu);
+ amd_uncore_cpu_online(cpu);
+}
+
+static void cleanup_cpu_online(void *dummy)
+{
+ unsigned int cpu = smp_processor_id();
+
+ amd_uncore_cpu_dead(cpu);
+}
+
+static int __init amd_uncore_init(void)
+{
+ unsigned int cpu, cpu2;
+ int ret = -ENODEV;
+
+ if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD)
+ goto fail_nodev;
+
+ if (!boot_cpu_has(X86_FEATURE_TOPOEXT))
+ goto fail_nodev;
+
+ if (boot_cpu_has(X86_FEATURE_PERFCTR_NB)) {
+ amd_uncore_nb = alloc_percpu(struct amd_uncore *);
+ if (!amd_uncore_nb) {
+ ret = -ENOMEM;
+ goto fail_nb;
+ }
+ ret = perf_pmu_register(&amd_nb_pmu, amd_nb_pmu.name, -1);
+ if (ret)
+ goto fail_nb;
+
+ pr_info("perf: AMD NB counters detected\n");
+ ret = 0;
+ }
+
+ if (boot_cpu_has(X86_FEATURE_PERFCTR_L2)) {
+ amd_uncore_l2 = alloc_percpu(struct amd_uncore *);
+ if (!amd_uncore_l2) {
+ ret = -ENOMEM;
+ goto fail_l2;
+ }
+ ret = perf_pmu_register(&amd_l2_pmu, amd_l2_pmu.name, -1);
+ if (ret)
+ goto fail_l2;
+
+ pr_info("perf: AMD L2I counters detected\n");
+ ret = 0;
+ }
+
+ if (ret)
+ goto fail_nodev;
+
+ cpu_notifier_register_begin();
+
+ /* init cpus already online before registering for hotplug notifier */
+ for_each_online_cpu(cpu) {
+ ret = amd_uncore_cpu_up_prepare(cpu);
+ if (ret)
+ goto fail_online;
+ smp_call_function_single(cpu, init_cpu_already_online, NULL, 1);
+ }
+
+ __register_cpu_notifier(&amd_uncore_cpu_notifier_block);
+ cpu_notifier_register_done();
+
+ return 0;
+
+
+fail_online:
+ for_each_online_cpu(cpu2) {
+ if (cpu2 == cpu)
+ break;
+ smp_call_function_single(cpu, cleanup_cpu_online, NULL, 1);
+ }
+ cpu_notifier_register_done();
+
+ /* amd_uncore_nb/l2 should have been freed by cleanup_cpu_online */
+ amd_uncore_nb = amd_uncore_l2 = NULL;
+
+ if (boot_cpu_has(X86_FEATURE_PERFCTR_L2))
+ perf_pmu_unregister(&amd_l2_pmu);
+fail_l2:
+ if (boot_cpu_has(X86_FEATURE_PERFCTR_NB))
+ perf_pmu_unregister(&amd_nb_pmu);
+ if (amd_uncore_l2)
+ free_percpu(amd_uncore_l2);
+fail_nb:
+ if (amd_uncore_nb)
+ free_percpu(amd_uncore_nb);
+
+fail_nodev:
+ return ret;
+}
+device_initcall(amd_uncore_init);
diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c
new file mode 100644
index 000000000..041e442a3
--- /dev/null
+++ b/arch/x86/events/core.c
@@ -0,0 +1,2442 @@
+/*
+ * Performance events x86 architecture code
+ *
+ * Copyright (C) 2008 Thomas Gleixner <tglx@linutronix.de>
+ * Copyright (C) 2008-2009 Red Hat, Inc., Ingo Molnar
+ * Copyright (C) 2009 Jaswinder Singh Rajput
+ * Copyright (C) 2009 Advanced Micro Devices, Inc., Robert Richter
+ * Copyright (C) 2008-2009 Red Hat, Inc., Peter Zijlstra
+ * Copyright (C) 2009 Intel Corporation, <markus.t.metzger@intel.com>
+ * Copyright (C) 2009 Google, Inc., Stephane Eranian
+ *
+ * For licencing details see kernel-base/COPYING
+ */
+
+#include <linux/perf_event.h>
+#include <linux/capability.h>
+#include <linux/notifier.h>
+#include <linux/hardirq.h>
+#include <linux/kprobes.h>
+#include <linux/module.h>
+#include <linux/kdebug.h>
+#include <linux/sched.h>
+#include <linux/uaccess.h>
+#include <linux/slab.h>
+#include <linux/cpu.h>
+#include <linux/bitops.h>
+#include <linux/device.h>
+
+#include <asm/apic.h>
+#include <asm/stacktrace.h>
+#include <asm/nmi.h>
+#include <asm/smp.h>
+#include <asm/alternative.h>
+#include <asm/mmu_context.h>
+#include <asm/tlbflush.h>
+#include <asm/timer.h>
+#include <asm/desc.h>
+#include <asm/ldt.h>
+
+#include "perf_event.h"
+
+struct x86_pmu x86_pmu __read_mostly;
+
+DEFINE_PER_CPU(struct cpu_hw_events, cpu_hw_events) = {
+ .enabled = 1,
+};
+
+struct static_key rdpmc_always_available = STATIC_KEY_INIT_FALSE;
+
+u64 __read_mostly hw_cache_event_ids
+ [PERF_COUNT_HW_CACHE_MAX]
+ [PERF_COUNT_HW_CACHE_OP_MAX]
+ [PERF_COUNT_HW_CACHE_RESULT_MAX];
+u64 __read_mostly hw_cache_extra_regs
+ [PERF_COUNT_HW_CACHE_MAX]
+ [PERF_COUNT_HW_CACHE_OP_MAX]
+ [PERF_COUNT_HW_CACHE_RESULT_MAX];
+
+/*
+ * Propagate event elapsed time into the generic event.
+ * Can only be executed on the CPU where the event is active.
+ * Returns the delta events processed.
+ */
+u64 x86_perf_event_update(struct perf_event *event)
+{
+ struct hw_perf_event *hwc = &event->hw;
+ int shift = 64 - x86_pmu.cntval_bits;
+ u64 prev_raw_count, new_raw_count;
+ int idx = hwc->idx;
+ s64 delta;
+
+ if (idx == INTEL_PMC_IDX_FIXED_BTS)
+ return 0;
+
+ /*
+ * Careful: an NMI might modify the previous event value.
+ *
+ * Our tactic to handle this is to first atomically read and
+ * exchange a new raw count - then add that new-prev delta
+ * count to the generic event atomically:
+ */
+again:
+ prev_raw_count = local64_read(&hwc->prev_count);
+ rdpmcl(hwc->event_base_rdpmc, new_raw_count);
+
+ if (local64_cmpxchg(&hwc->prev_count, prev_raw_count,
+ new_raw_count) != prev_raw_count)
+ goto again;
+
+ /*
+ * Now we have the new raw value and have updated the prev
+ * timestamp already. We can now calculate the elapsed delta
+ * (event-)time and add that to the generic event.
+ *
+ * Careful, not all hw sign-extends above the physical width
+ * of the count.
+ */
+ delta = (new_raw_count << shift) - (prev_raw_count << shift);
+ delta >>= shift;
+
+ local64_add(delta, &event->count);
+ local64_sub(delta, &hwc->period_left);
+
+ return new_raw_count;
+}
+
+/*
+ * Find and validate any extra registers to set up.
+ */
+static int x86_pmu_extra_regs(u64 config, struct perf_event *event)
+{
+ struct hw_perf_event_extra *reg;
+ struct extra_reg *er;
+
+ reg = &event->hw.extra_reg;
+
+ if (!x86_pmu.extra_regs)
+ return 0;
+
+ for (er = x86_pmu.extra_regs; er->msr; er++) {
+ if (er->event != (config & er->config_mask))
+ continue;
+ if (event->attr.config1 & ~er->valid_mask)
+ return -EINVAL;
+ /* Check if the extra msrs can be safely accessed*/
+ if (!er->extra_msr_access)
+ return -ENXIO;
+
+ reg->idx = er->idx;
+ reg->config = event->attr.config1;
+ reg->reg = er->msr;
+ break;
+ }
+ return 0;
+}
+
+static atomic_t active_events;
+static atomic_t pmc_refcount;
+static DEFINE_MUTEX(pmc_reserve_mutex);
+
+#ifdef CONFIG_X86_LOCAL_APIC
+
+static bool reserve_pmc_hardware(void)
+{
+ int i;
+
+ for (i = 0; i < x86_pmu.num_counters; i++) {
+ if (!reserve_perfctr_nmi(x86_pmu_event_addr(i)))
+ goto perfctr_fail;
+ }
+
+ for (i = 0; i < x86_pmu.num_counters; i++) {
+ if (!reserve_evntsel_nmi(x86_pmu_config_addr(i)))
+ goto eventsel_fail;
+ }
+
+ return true;
+
+eventsel_fail:
+ for (i--; i >= 0; i--)
+ release_evntsel_nmi(x86_pmu_config_addr(i));
+
+ i = x86_pmu.num_counters;
+
+perfctr_fail:
+ for (i--; i >= 0; i--)
+ release_perfctr_nmi(x86_pmu_event_addr(i));
+
+ return false;
+}
+
+static void release_pmc_hardware(void)
+{
+ int i;
+
+ for (i = 0; i < x86_pmu.num_counters; i++) {
+ release_perfctr_nmi(x86_pmu_event_addr(i));
+ release_evntsel_nmi(x86_pmu_config_addr(i));
+ }
+}
+
+#else
+
+static bool reserve_pmc_hardware(void) { return true; }
+static void release_pmc_hardware(void) {}
+
+#endif
+
+static bool check_hw_exists(void)
+{
+ u64 val, val_fail, val_new= ~0;
+ int i, reg, reg_fail, ret = 0;
+ int bios_fail = 0;
+ int reg_safe = -1;
+
+ /*
+ * Check to see if the BIOS enabled any of the counters, if so
+ * complain and bail.
+ */
+ for (i = 0; i < x86_pmu.num_counters; i++) {
+ reg = x86_pmu_config_addr(i);
+ ret = rdmsrl_safe(reg, &val);
+ if (ret)
+ goto msr_fail;
+ if (val & ARCH_PERFMON_EVENTSEL_ENABLE) {
+ bios_fail = 1;
+ val_fail = val;
+ reg_fail = reg;
+ } else {
+ reg_safe = i;
+ }
+ }
+
+ if (x86_pmu.num_counters_fixed) {
+ reg = MSR_ARCH_PERFMON_FIXED_CTR_CTRL;
+ ret = rdmsrl_safe(reg, &val);
+ if (ret)
+ goto msr_fail;
+ for (i = 0; i < x86_pmu.num_counters_fixed; i++) {
+ if (val & (0x03 << i*4)) {
+ bios_fail = 1;
+ val_fail = val;
+ reg_fail = reg;
+ }
+ }
+ }
+
+ /*
+ * If all the counters are enabled, the below test will always
+ * fail. The tools will also become useless in this scenario.
+ * Just fail and disable the hardware counters.
+ */
+
+ if (reg_safe == -1) {
+ reg = reg_safe;
+ goto msr_fail;
+ }
+
+ /*
+ * Read the current value, change it and read it back to see if it
+ * matches, this is needed to detect certain hardware emulators
+ * (qemu/kvm) that don't trap on the MSR access and always return 0s.
+ */
+ reg = x86_pmu_event_addr(reg_safe);
+ if (rdmsrl_safe(reg, &val))
+ goto msr_fail;
+ val ^= 0xffffUL;
+ ret = wrmsrl_safe(reg, val);
+ ret |= rdmsrl_safe(reg, &val_new);
+ if (ret || val != val_new)
+ goto msr_fail;
+
+ /*
+ * We still allow the PMU driver to operate:
+ */
+ if (bios_fail) {
+ pr_cont("Broken BIOS detected, complain to your hardware vendor.\n");
+ pr_err(FW_BUG "the BIOS has corrupted hw-PMU resources (MSR %x is %Lx)\n",
+ reg_fail, val_fail);
+ }
+
+ return true;
+
+msr_fail:
+ pr_cont("Broken PMU hardware detected, using software events only.\n");
+ pr_info("%sFailed to access perfctr msr (MSR %x is %Lx)\n",
+ boot_cpu_has(X86_FEATURE_HYPERVISOR) ? KERN_INFO : KERN_ERR,
+ reg, val_new);
+
+ return false;
+}
+
+static void hw_perf_event_destroy(struct perf_event *event)
+{
+ x86_release_hardware();
+ atomic_dec(&active_events);
+}
+
+void hw_perf_lbr_event_destroy(struct perf_event *event)
+{
+ hw_perf_event_destroy(event);
+
+ /* undo the lbr/bts event accounting */
+ x86_del_exclusive(x86_lbr_exclusive_lbr);
+}
+
+static inline int x86_pmu_initialized(void)
+{
+ return x86_pmu.handle_irq != NULL;
+}
+
+static inline int
+set_ext_hw_attr(struct hw_perf_event *hwc, struct perf_event *event)
+{
+ struct perf_event_attr *attr = &event->attr;
+ unsigned int cache_type, cache_op, cache_result;
+ u64 config, val;
+
+ config = attr->config;
+
+ cache_type = (config >> 0) & 0xff;
+ if (cache_type >= PERF_COUNT_HW_CACHE_MAX)
+ return -EINVAL;
+
+ cache_op = (config >> 8) & 0xff;
+ if (cache_op >= PERF_COUNT_HW_CACHE_OP_MAX)
+ return -EINVAL;
+
+ cache_result = (config >> 16) & 0xff;
+ if (cache_result >= PERF_COUNT_HW_CACHE_RESULT_MAX)
+ return -EINVAL;
+
+ val = hw_cache_event_ids[cache_type][cache_op][cache_result];
+
+ if (val == 0)
+ return -ENOENT;
+
+ if (val == -1)
+ return -EINVAL;
+
+ hwc->config |= val;
+ attr->config1 = hw_cache_extra_regs[cache_type][cache_op][cache_result];
+ return x86_pmu_extra_regs(val, event);
+}
+
+int x86_reserve_hardware(void)
+{
+ int err = 0;
+
+ if (!atomic_inc_not_zero(&pmc_refcount)) {
+ mutex_lock(&pmc_reserve_mutex);
+ if (atomic_read(&pmc_refcount) == 0) {
+ if (!reserve_pmc_hardware())
+ err = -EBUSY;
+ else
+ reserve_ds_buffers();
+ }
+ if (!err)
+ atomic_inc(&pmc_refcount);
+ mutex_unlock(&pmc_reserve_mutex);
+ }
+
+ return err;
+}
+
+void x86_release_hardware(void)
+{
+ if (atomic_dec_and_mutex_lock(&pmc_refcount, &pmc_reserve_mutex)) {
+ release_pmc_hardware();
+ release_ds_buffers();
+ mutex_unlock(&pmc_reserve_mutex);
+ }
+}
+
+/*
+ * Check if we can create event of a certain type (that no conflicting events
+ * are present).
+ */
+int x86_add_exclusive(unsigned int what)
+{
+ int i;
+
+ if (!atomic_inc_not_zero(&x86_pmu.lbr_exclusive[what])) {
+ mutex_lock(&pmc_reserve_mutex);
+ for (i = 0; i < ARRAY_SIZE(x86_pmu.lbr_exclusive); i++) {
+ if (i != what && atomic_read(&x86_pmu.lbr_exclusive[i]))
+ goto fail_unlock;
+ }
+ atomic_inc(&x86_pmu.lbr_exclusive[what]);
+ mutex_unlock(&pmc_reserve_mutex);
+ }
+
+ atomic_inc(&active_events);
+ return 0;
+
+fail_unlock:
+ mutex_unlock(&pmc_reserve_mutex);
+ return -EBUSY;
+}
+
+void x86_del_exclusive(unsigned int what)
+{
+ atomic_dec(&x86_pmu.lbr_exclusive[what]);
+ atomic_dec(&active_events);
+}
+
+int x86_setup_perfctr(struct perf_event *event)
+{
+ struct perf_event_attr *attr = &event->attr;
+ struct hw_perf_event *hwc = &event->hw;
+ u64 config;
+
+ if (!is_sampling_event(event)) {
+ hwc->sample_period = x86_pmu.max_period;
+ hwc->last_period = hwc->sample_period;
+ local64_set(&hwc->period_left, hwc->sample_period);
+ }
+
+ if (attr->type == PERF_TYPE_RAW)
+ return x86_pmu_extra_regs(event->attr.config, event);
+
+ if (attr->type == PERF_TYPE_HW_CACHE)
+ return set_ext_hw_attr(hwc, event);
+
+ if (attr->config >= x86_pmu.max_events)
+ return -EINVAL;
+
+ /*
+ * The generic map:
+ */
+ config = x86_pmu.event_map(attr->config);
+
+ if (config == 0)
+ return -ENOENT;
+
+ if (config == -1LL)
+ return -EINVAL;
+
+ /*
+ * Branch tracing:
+ */
+ if (attr->config == PERF_COUNT_HW_BRANCH_INSTRUCTIONS &&
+ !attr->freq && hwc->sample_period == 1) {
+ /* BTS is not supported by this architecture. */
+ if (!x86_pmu.bts_active)
+ return -EOPNOTSUPP;
+
+ /* BTS is currently only allowed for user-mode. */
+ if (!attr->exclude_kernel)
+ return -EOPNOTSUPP;
+
+ /* disallow bts if conflicting events are present */
+ if (x86_add_exclusive(x86_lbr_exclusive_lbr))
+ return -EBUSY;
+
+ event->destroy = hw_perf_lbr_event_destroy;
+ }
+
+ hwc->config |= config;
+
+ return 0;
+}
+
+/*
+ * check that branch_sample_type is compatible with
+ * settings needed for precise_ip > 1 which implies
+ * using the LBR to capture ALL taken branches at the
+ * priv levels of the measurement
+ */
+static inline int precise_br_compat(struct perf_event *event)
+{
+ u64 m = event->attr.branch_sample_type;
+ u64 b = 0;
+
+ /* must capture all branches */
+ if (!(m & PERF_SAMPLE_BRANCH_ANY))
+ return 0;
+
+ m &= PERF_SAMPLE_BRANCH_KERNEL | PERF_SAMPLE_BRANCH_USER;
+
+ if (!event->attr.exclude_user)
+ b |= PERF_SAMPLE_BRANCH_USER;
+
+ if (!event->attr.exclude_kernel)
+ b |= PERF_SAMPLE_BRANCH_KERNEL;
+
+ /*
+ * ignore PERF_SAMPLE_BRANCH_HV, not supported on x86
+ */
+
+ return m == b;
+}
+
+int x86_pmu_hw_config(struct perf_event *event)
+{
+ if (event->attr.precise_ip) {
+ int precise = 0;
+
+ /* Support for constant skid */
+ if (x86_pmu.pebs_active && !x86_pmu.pebs_broken) {
+ precise++;
+
+ /* Support for IP fixup */
+ if (x86_pmu.lbr_nr || x86_pmu.intel_cap.pebs_format >= 2)
+ precise++;
+
+ if (x86_pmu.pebs_prec_dist)
+ precise++;
+ }
+
+ if (event->attr.precise_ip > precise)
+ return -EOPNOTSUPP;
+ }
+ /*
+ * check that PEBS LBR correction does not conflict with
+ * whatever the user is asking with attr->branch_sample_type
+ */
+ if (event->attr.precise_ip > 1 && x86_pmu.intel_cap.pebs_format < 2) {
+ u64 *br_type = &event->attr.branch_sample_type;
+
+ if (has_branch_stack(event)) {
+ if (!precise_br_compat(event))
+ return -EOPNOTSUPP;
+
+ /* branch_sample_type is compatible */
+
+ } else {
+ /*
+ * user did not specify branch_sample_type
+ *
+ * For PEBS fixups, we capture all
+ * the branches at the priv level of the
+ * event.
+ */
+ *br_type = PERF_SAMPLE_BRANCH_ANY;
+
+ if (!event->attr.exclude_user)
+ *br_type |= PERF_SAMPLE_BRANCH_USER;
+
+ if (!event->attr.exclude_kernel)
+ *br_type |= PERF_SAMPLE_BRANCH_KERNEL;
+ }
+ }
+
+ if (event->attr.branch_sample_type & PERF_SAMPLE_BRANCH_CALL_STACK)
+ event->attach_state |= PERF_ATTACH_TASK_DATA;
+
+ /*
+ * Generate PMC IRQs:
+ * (keep 'enabled' bit clear for now)
+ */
+ event->hw.config = ARCH_PERFMON_EVENTSEL_INT;
+
+ /*
+ * Count user and OS events unless requested not to
+ */
+ if (!event->attr.exclude_user)
+ event->hw.config |= ARCH_PERFMON_EVENTSEL_USR;
+ if (!event->attr.exclude_kernel)
+ event->hw.config |= ARCH_PERFMON_EVENTSEL_OS;
+
+ if (event->attr.type == PERF_TYPE_RAW)
+ event->hw.config |= event->attr.config & X86_RAW_EVENT_MASK;
+
+ if (event->attr.sample_period && x86_pmu.limit_period) {
+ if (x86_pmu.limit_period(event, event->attr.sample_period) >
+ event->attr.sample_period)
+ return -EINVAL;
+ }
+
+ return x86_setup_perfctr(event);
+}
+
+/*
+ * Setup the hardware configuration for a given attr_type
+ */
+static int __x86_pmu_event_init(struct perf_event *event)
+{
+ int err;
+
+ if (!x86_pmu_initialized())
+ return -ENODEV;
+
+ err = x86_reserve_hardware();
+ if (err)
+ return err;
+
+ atomic_inc(&active_events);
+ event->destroy = hw_perf_event_destroy;
+
+ event->hw.idx = -1;
+ event->hw.last_cpu = -1;
+ event->hw.last_tag = ~0ULL;
+
+ /* mark unused */
+ event->hw.extra_reg.idx = EXTRA_REG_NONE;
+ event->hw.branch_reg.idx = EXTRA_REG_NONE;
+
+ return x86_pmu.hw_config(event);
+}
+
+void x86_pmu_disable_all(void)
+{
+ struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
+ int idx;
+
+ for (idx = 0; idx < x86_pmu.num_counters; idx++) {
+ u64 val;
+
+ if (!test_bit(idx, cpuc->active_mask))
+ continue;
+ rdmsrl(x86_pmu_config_addr(idx), val);
+ if (!(val & ARCH_PERFMON_EVENTSEL_ENABLE))
+ continue;
+ val &= ~ARCH_PERFMON_EVENTSEL_ENABLE;
+ wrmsrl(x86_pmu_config_addr(idx), val);
+ }
+}
+
+/*
+ * There may be PMI landing after enabled=0. The PMI hitting could be before or
+ * after disable_all.
+ *
+ * If PMI hits before disable_all, the PMU will be disabled in the NMI handler.
+ * It will not be re-enabled in the NMI handler again, because enabled=0. After
+ * handling the NMI, disable_all will be called, which will not change the
+ * state either. If PMI hits after disable_all, the PMU is already disabled
+ * before entering NMI handler. The NMI handler will not change the state
+ * either.
+ *
+ * So either situation is harmless.
+ */
+static void x86_pmu_disable(struct pmu *pmu)
+{
+ struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
+
+ if (!x86_pmu_initialized())
+ return;
+
+ if (!cpuc->enabled)
+ return;
+
+ cpuc->n_added = 0;
+ cpuc->enabled = 0;
+ barrier();
+
+ x86_pmu.disable_all();
+}
+
+void x86_pmu_enable_all(int added)
+{
+ struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
+ int idx;
+
+ for (idx = 0; idx < x86_pmu.num_counters; idx++) {
+ struct hw_perf_event *hwc = &cpuc->events[idx]->hw;
+
+ if (!test_bit(idx, cpuc->active_mask))
+ continue;
+
+ __x86_pmu_enable_event(hwc, ARCH_PERFMON_EVENTSEL_ENABLE);
+ }
+}
+
+static struct pmu pmu;
+
+static inline int is_x86_event(struct perf_event *event)
+{
+ return event->pmu == &pmu;
+}
+
+/*
+ * Event scheduler state:
+ *
+ * Assign events iterating over all events and counters, beginning
+ * with events with least weights first. Keep the current iterator
+ * state in struct sched_state.
+ */
+struct sched_state {
+ int weight;
+ int event; /* event index */
+ int counter; /* counter index */
+ int unassigned; /* number of events to be assigned left */
+ int nr_gp; /* number of GP counters used */
+ unsigned long used[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
+};
+
+/* Total max is X86_PMC_IDX_MAX, but we are O(n!) limited */
+#define SCHED_STATES_MAX 2
+
+struct perf_sched {
+ int max_weight;
+ int max_events;
+ int max_gp;
+ int saved_states;
+ struct event_constraint **constraints;
+ struct sched_state state;
+ struct sched_state saved[SCHED_STATES_MAX];
+};
+
+/*
+ * Initialize interator that runs through all events and counters.
+ */
+static void perf_sched_init(struct perf_sched *sched, struct event_constraint **constraints,
+ int num, int wmin, int wmax, int gpmax)
+{
+ int idx;
+
+ memset(sched, 0, sizeof(*sched));
+ sched->max_events = num;
+ sched->max_weight = wmax;
+ sched->max_gp = gpmax;
+ sched->constraints = constraints;
+
+ for (idx = 0; idx < num; idx++) {
+ if (constraints[idx]->weight == wmin)
+ break;
+ }
+
+ sched->state.event = idx; /* start with min weight */
+ sched->state.weight = wmin;
+ sched->state.unassigned = num;
+}
+
+static void perf_sched_save_state(struct perf_sched *sched)
+{
+ if (WARN_ON_ONCE(sched->saved_states >= SCHED_STATES_MAX))
+ return;
+
+ sched->saved[sched->saved_states] = sched->state;
+ sched->saved_states++;
+}
+
+static bool perf_sched_restore_state(struct perf_sched *sched)
+{
+ if (!sched->saved_states)
+ return false;
+
+ sched->saved_states--;
+ sched->state = sched->saved[sched->saved_states];
+
+ /* continue with next counter: */
+ clear_bit(sched->state.counter++, sched->state.used);
+
+ return true;
+}
+
+/*
+ * Select a counter for the current event to schedule. Return true on
+ * success.
+ */
+static bool __perf_sched_find_counter(struct perf_sched *sched)
+{
+ struct event_constraint *c;
+ int idx;
+
+ if (!sched->state.unassigned)
+ return false;
+
+ if (sched->state.event >= sched->max_events)
+ return false;
+
+ c = sched->constraints[sched->state.event];
+ /* Prefer fixed purpose counters */
+ if (c->idxmsk64 & (~0ULL << INTEL_PMC_IDX_FIXED)) {
+ idx = INTEL_PMC_IDX_FIXED;
+ for_each_set_bit_from(idx, c->idxmsk, X86_PMC_IDX_MAX) {
+ if (!__test_and_set_bit(idx, sched->state.used))
+ goto done;
+ }
+ }
+
+ /* Grab the first unused counter starting with idx */
+ idx = sched->state.counter;
+ for_each_set_bit_from(idx, c->idxmsk, INTEL_PMC_IDX_FIXED) {
+ if (!__test_and_set_bit(idx, sched->state.used)) {
+ if (sched->state.nr_gp++ >= sched->max_gp)
+ return false;
+
+ goto done;
+ }
+ }
+
+ return false;
+
+done:
+ sched->state.counter = idx;
+
+ if (c->overlap)
+ perf_sched_save_state(sched);
+
+ return true;
+}
+
+static bool perf_sched_find_counter(struct perf_sched *sched)
+{
+ while (!__perf_sched_find_counter(sched)) {
+ if (!perf_sched_restore_state(sched))
+ return false;
+ }
+
+ return true;
+}
+
+/*
+ * Go through all unassigned events and find the next one to schedule.
+ * Take events with the least weight first. Return true on success.
+ */
+static bool perf_sched_next_event(struct perf_sched *sched)
+{
+ struct event_constraint *c;
+
+ if (!sched->state.unassigned || !--sched->state.unassigned)
+ return false;
+
+ do {
+ /* next event */
+ sched->state.event++;
+ if (sched->state.event >= sched->max_events) {
+ /* next weight */
+ sched->state.event = 0;
+ sched->state.weight++;
+ if (sched->state.weight > sched->max_weight)
+ return false;
+ }
+ c = sched->constraints[sched->state.event];
+ } while (c->weight != sched->state.weight);
+
+ sched->state.counter = 0; /* start with first counter */
+
+ return true;
+}
+
+/*
+ * Assign a counter for each event.
+ */
+int perf_assign_events(struct event_constraint **constraints, int n,
+ int wmin, int wmax, int gpmax, int *assign)
+{
+ struct perf_sched sched;
+
+ perf_sched_init(&sched, constraints, n, wmin, wmax, gpmax);
+
+ do {
+ if (!perf_sched_find_counter(&sched))
+ break; /* failed */
+ if (assign)
+ assign[sched.state.event] = sched.state.counter;
+ } while (perf_sched_next_event(&sched));
+
+ return sched.state.unassigned;
+}
+EXPORT_SYMBOL_GPL(perf_assign_events);
+
+int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign)
+{
+ struct event_constraint *c;
+ unsigned long used_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
+ struct perf_event *e;
+ int i, wmin, wmax, unsched = 0;
+ struct hw_perf_event *hwc;
+
+ bitmap_zero(used_mask, X86_PMC_IDX_MAX);
+
+ if (x86_pmu.start_scheduling)
+ x86_pmu.start_scheduling(cpuc);
+
+ for (i = 0, wmin = X86_PMC_IDX_MAX, wmax = 0; i < n; i++) {
+ cpuc->event_constraint[i] = NULL;
+ c = x86_pmu.get_event_constraints(cpuc, i, cpuc->event_list[i]);
+ cpuc->event_constraint[i] = c;
+
+ wmin = min(wmin, c->weight);
+ wmax = max(wmax, c->weight);
+ }
+
+ /*
+ * fastpath, try to reuse previous register
+ */
+ for (i = 0; i < n; i++) {
+ hwc = &cpuc->event_list[i]->hw;
+ c = cpuc->event_constraint[i];
+
+ /* never assigned */
+ if (hwc->idx == -1)
+ break;
+
+ /* constraint still honored */
+ if (!test_bit(hwc->idx, c->idxmsk))
+ break;
+
+ /* not already used */
+ if (test_bit(hwc->idx, used_mask))
+ break;
+
+ __set_bit(hwc->idx, used_mask);
+ if (assign)
+ assign[i] = hwc->idx;
+ }
+
+ /* slow path */
+ if (i != n) {
+ int gpmax = x86_pmu.num_counters;
+
+ /*
+ * Do not allow scheduling of more than half the available
+ * generic counters.
+ *
+ * This helps avoid counter starvation of sibling thread by
+ * ensuring at most half the counters cannot be in exclusive
+ * mode. There is no designated counters for the limits. Any
+ * N/2 counters can be used. This helps with events with
+ * specific counter constraints.
+ */
+ if (is_ht_workaround_enabled() && !cpuc->is_fake &&
+ READ_ONCE(cpuc->excl_cntrs->exclusive_present))
+ gpmax /= 2;
+
+ unsched = perf_assign_events(cpuc->event_constraint, n, wmin,
+ wmax, gpmax, assign);
+ }
+
+ /*
+ * In case of success (unsched = 0), mark events as committed,
+ * so we do not put_constraint() in case new events are added
+ * and fail to be scheduled
+ *
+ * We invoke the lower level commit callback to lock the resource
+ *
+ * We do not need to do all of this in case we are called to
+ * validate an event group (assign == NULL)
+ */
+ if (!unsched && assign) {
+ for (i = 0; i < n; i++) {
+ e = cpuc->event_list[i];
+ e->hw.flags |= PERF_X86_EVENT_COMMITTED;
+ if (x86_pmu.commit_scheduling)
+ x86_pmu.commit_scheduling(cpuc, i, assign[i]);
+ }
+ } else {
+ for (i = 0; i < n; i++) {
+ e = cpuc->event_list[i];
+ /*
+ * do not put_constraint() on comitted events,
+ * because they are good to go
+ */
+ if ((e->hw.flags & PERF_X86_EVENT_COMMITTED))
+ continue;
+
+ /*
+ * release events that failed scheduling
+ */
+ if (x86_pmu.put_event_constraints)
+ x86_pmu.put_event_constraints(cpuc, e);
+ }
+ }
+
+ if (x86_pmu.stop_scheduling)
+ x86_pmu.stop_scheduling(cpuc);
+
+ return unsched ? -EINVAL : 0;
+}
+
+/*
+ * dogrp: true if must collect siblings events (group)
+ * returns total number of events and error code
+ */
+static int collect_events(struct cpu_hw_events *cpuc, struct perf_event *leader, bool dogrp)
+{
+ struct perf_event *event;
+ int n, max_count;
+
+ max_count = x86_pmu.num_counters + x86_pmu.num_counters_fixed;
+
+ /* current number of events already accepted */
+ n = cpuc->n_events;
+
+ if (is_x86_event(leader)) {
+ if (n >= max_count)
+ return -EINVAL;
+ cpuc->event_list[n] = leader;
+ n++;
+ }
+ if (!dogrp)
+ return n;
+
+ list_for_each_entry(event, &leader->sibling_list, group_entry) {
+ if (!is_x86_event(event) ||
+ event->state <= PERF_EVENT_STATE_OFF)
+ continue;
+
+ if (n >= max_count)
+ return -EINVAL;
+
+ cpuc->event_list[n] = event;
+ n++;
+ }
+ return n;
+}
+
+static inline void x86_assign_hw_event(struct perf_event *event,
+ struct cpu_hw_events *cpuc, int i)
+{
+ struct hw_perf_event *hwc = &event->hw;
+
+ hwc->idx = cpuc->assign[i];
+ hwc->last_cpu = smp_processor_id();
+ hwc->last_tag = ++cpuc->tags[i];
+
+ if (hwc->idx == INTEL_PMC_IDX_FIXED_BTS) {
+ hwc->config_base = 0;
+ hwc->event_base = 0;
+ } else if (hwc->idx >= INTEL_PMC_IDX_FIXED) {
+ hwc->config_base = MSR_ARCH_PERFMON_FIXED_CTR_CTRL;
+ hwc->event_base = MSR_ARCH_PERFMON_FIXED_CTR0 + (hwc->idx - INTEL_PMC_IDX_FIXED);
+ hwc->event_base_rdpmc = (hwc->idx - INTEL_PMC_IDX_FIXED) | 1<<30;
+ } else {
+ hwc->config_base = x86_pmu_config_addr(hwc->idx);
+ hwc->event_base = x86_pmu_event_addr(hwc->idx);
+ hwc->event_base_rdpmc = x86_pmu_rdpmc_index(hwc->idx);
+ }
+}
+
+static inline int match_prev_assignment(struct hw_perf_event *hwc,
+ struct cpu_hw_events *cpuc,
+ int i)
+{
+ return hwc->idx == cpuc->assign[i] &&
+ hwc->last_cpu == smp_processor_id() &&
+ hwc->last_tag == cpuc->tags[i];
+}
+
+static void x86_pmu_start(struct perf_event *event, int flags);
+
+static void x86_pmu_enable(struct pmu *pmu)
+{
+ struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
+ struct perf_event *event;
+ struct hw_perf_event *hwc;
+ int i, added = cpuc->n_added;
+
+ if (!x86_pmu_initialized())
+ return;
+
+ if (cpuc->enabled)
+ return;
+
+ if (cpuc->n_added) {
+ int n_running = cpuc->n_events - cpuc->n_added;
+ /*
+ * apply assignment obtained either from
+ * hw_perf_group_sched_in() or x86_pmu_enable()
+ *
+ * step1: save events moving to new counters
+ */
+ for (i = 0; i < n_running; i++) {
+ event = cpuc->event_list[i];
+ hwc = &event->hw;
+
+ /*
+ * we can avoid reprogramming counter if:
+ * - assigned same counter as last time
+ * - running on same CPU as last time
+ * - no other event has used the counter since
+ */
+ if (hwc->idx == -1 ||
+ match_prev_assignment(hwc, cpuc, i))
+ continue;
+
+ /*
+ * Ensure we don't accidentally enable a stopped
+ * counter simply because we rescheduled.
+ */
+ if (hwc->state & PERF_HES_STOPPED)
+ hwc->state |= PERF_HES_ARCH;
+
+ x86_pmu_stop(event, PERF_EF_UPDATE);
+ }
+
+ /*
+ * step2: reprogram moved events into new counters
+ */
+ for (i = 0; i < cpuc->n_events; i++) {
+ event = cpuc->event_list[i];
+ hwc = &event->hw;
+
+ if (!match_prev_assignment(hwc, cpuc, i))
+ x86_assign_hw_event(event, cpuc, i);
+ else if (i < n_running)
+ continue;
+
+ if (hwc->state & PERF_HES_ARCH)
+ continue;
+
+ x86_pmu_start(event, PERF_EF_RELOAD);
+ }
+ cpuc->n_added = 0;
+ perf_events_lapic_init();
+ }
+
+ cpuc->enabled = 1;
+ barrier();
+
+ x86_pmu.enable_all(added);
+}
+
+static DEFINE_PER_CPU(u64 [X86_PMC_IDX_MAX], pmc_prev_left);
+
+/*
+ * Set the next IRQ period, based on the hwc->period_left value.
+ * To be called with the event disabled in hw:
+ */
+int x86_perf_event_set_period(struct perf_event *event)
+{
+ struct hw_perf_event *hwc = &event->hw;
+ s64 left = local64_read(&hwc->period_left);
+ s64 period = hwc->sample_period;
+ int ret = 0, idx = hwc->idx;
+
+ if (idx == INTEL_PMC_IDX_FIXED_BTS)
+ return 0;
+
+ /*
+ * If we are way outside a reasonable range then just skip forward:
+ */
+ if (unlikely(left <= -period)) {
+ left = period;
+ local64_set(&hwc->period_left, left);
+ hwc->last_period = period;
+ ret = 1;
+ }
+
+ if (unlikely(left <= 0)) {
+ left += period;
+ local64_set(&hwc->period_left, left);
+ hwc->last_period = period;
+ ret = 1;
+ }
+ /*
+ * Quirk: certain CPUs dont like it if just 1 hw_event is left:
+ */
+ if (unlikely(left < 2))
+ left = 2;
+
+ if (left > x86_pmu.max_period)
+ left = x86_pmu.max_period;
+
+ if (x86_pmu.limit_period)
+ left = x86_pmu.limit_period(event, left);
+
+ per_cpu(pmc_prev_left[idx], smp_processor_id()) = left;
+
+ if (!(hwc->flags & PERF_X86_EVENT_AUTO_RELOAD) ||
+ local64_read(&hwc->prev_count) != (u64)-left) {
+ /*
+ * The hw event starts counting from this event offset,
+ * mark it to be able to extra future deltas:
+ */
+ local64_set(&hwc->prev_count, (u64)-left);
+
+ wrmsrl(hwc->event_base, (u64)(-left) & x86_pmu.cntval_mask);
+ }
+
+ /*
+ * Due to erratum on certan cpu we need
+ * a second write to be sure the register
+ * is updated properly
+ */
+ if (x86_pmu.perfctr_second_write) {
+ wrmsrl(hwc->event_base,
+ (u64)(-left) & x86_pmu.cntval_mask);
+ }
+
+ perf_event_update_userpage(event);
+
+ return ret;
+}
+
+void x86_pmu_enable_event(struct perf_event *event)
+{
+ if (__this_cpu_read(cpu_hw_events.enabled))
+ __x86_pmu_enable_event(&event->hw,
+ ARCH_PERFMON_EVENTSEL_ENABLE);
+}
+
+/*
+ * Add a single event to the PMU.
+ *
+ * The event is added to the group of enabled events
+ * but only if it can be scehduled with existing events.
+ */
+static int x86_pmu_add(struct perf_event *event, int flags)
+{
+ struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
+ struct hw_perf_event *hwc;
+ int assign[X86_PMC_IDX_MAX];
+ int n, n0, ret;
+
+ hwc = &event->hw;
+
+ n0 = cpuc->n_events;
+ ret = n = collect_events(cpuc, event, false);
+ if (ret < 0)
+ goto out;
+
+ hwc->state = PERF_HES_UPTODATE | PERF_HES_STOPPED;
+ if (!(flags & PERF_EF_START))
+ hwc->state |= PERF_HES_ARCH;
+
+ /*
+ * If group events scheduling transaction was started,
+ * skip the schedulability test here, it will be performed
+ * at commit time (->commit_txn) as a whole.
+ */
+ if (cpuc->txn_flags & PERF_PMU_TXN_ADD)
+ goto done_collect;
+
+ ret = x86_pmu.schedule_events(cpuc, n, assign);
+ if (ret)
+ goto out;
+ /*
+ * copy new assignment, now we know it is possible
+ * will be used by hw_perf_enable()
+ */
+ memcpy(cpuc->assign, assign, n*sizeof(int));
+
+done_collect:
+ /*
+ * Commit the collect_events() state. See x86_pmu_del() and
+ * x86_pmu_*_txn().
+ */
+ cpuc->n_events = n;
+ cpuc->n_added += n - n0;
+ cpuc->n_txn += n - n0;
+
+ ret = 0;
+out:
+ return ret;
+}
+
+static void x86_pmu_start(struct perf_event *event, int flags)
+{
+ struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
+ int idx = event->hw.idx;
+
+ if (WARN_ON_ONCE(!(event->hw.state & PERF_HES_STOPPED)))
+ return;
+
+ if (WARN_ON_ONCE(idx == -1))
+ return;
+
+ if (flags & PERF_EF_RELOAD) {
+ WARN_ON_ONCE(!(event->hw.state & PERF_HES_UPTODATE));
+ x86_perf_event_set_period(event);
+ }
+
+ event->hw.state = 0;
+
+ cpuc->events[idx] = event;
+ __set_bit(idx, cpuc->active_mask);
+ __set_bit(idx, cpuc->running);
+ x86_pmu.enable(event);
+ perf_event_update_userpage(event);
+}
+
+void perf_event_print_debug(void)
+{
+ u64 ctrl, status, overflow, pmc_ctrl, pmc_count, prev_left, fixed;
+ u64 pebs, debugctl;
+ struct cpu_hw_events *cpuc;
+ unsigned long flags;
+ int cpu, idx;
+
+ if (!x86_pmu.num_counters)
+ return;
+
+ local_irq_save(flags);
+
+ cpu = smp_processor_id();
+ cpuc = &per_cpu(cpu_hw_events, cpu);
+
+ if (x86_pmu.version >= 2) {
+ rdmsrl(MSR_CORE_PERF_GLOBAL_CTRL, ctrl);
+ rdmsrl(MSR_CORE_PERF_GLOBAL_STATUS, status);
+ rdmsrl(MSR_CORE_PERF_GLOBAL_OVF_CTRL, overflow);
+ rdmsrl(MSR_ARCH_PERFMON_FIXED_CTR_CTRL, fixed);
+
+ pr_info("\n");
+ pr_info("CPU#%d: ctrl: %016llx\n", cpu, ctrl);
+ pr_info("CPU#%d: status: %016llx\n", cpu, status);
+ pr_info("CPU#%d: overflow: %016llx\n", cpu, overflow);
+ pr_info("CPU#%d: fixed: %016llx\n", cpu, fixed);
+ if (x86_pmu.pebs_constraints) {
+ rdmsrl(MSR_IA32_PEBS_ENABLE, pebs);
+ pr_info("CPU#%d: pebs: %016llx\n", cpu, pebs);
+ }
+ if (x86_pmu.lbr_nr) {
+ rdmsrl(MSR_IA32_DEBUGCTLMSR, debugctl);
+ pr_info("CPU#%d: debugctl: %016llx\n", cpu, debugctl);
+ }
+ }
+ pr_info("CPU#%d: active: %016llx\n", cpu, *(u64 *)cpuc->active_mask);
+
+ for (idx = 0; idx < x86_pmu.num_counters; idx++) {
+ rdmsrl(x86_pmu_config_addr(idx), pmc_ctrl);
+ rdmsrl(x86_pmu_event_addr(idx), pmc_count);
+
+ prev_left = per_cpu(pmc_prev_left[idx], cpu);
+
+ pr_info("CPU#%d: gen-PMC%d ctrl: %016llx\n",
+ cpu, idx, pmc_ctrl);
+ pr_info("CPU#%d: gen-PMC%d count: %016llx\n",
+ cpu, idx, pmc_count);
+ pr_info("CPU#%d: gen-PMC%d left: %016llx\n",
+ cpu, idx, prev_left);
+ }
+ for (idx = 0; idx < x86_pmu.num_counters_fixed; idx++) {
+ rdmsrl(MSR_ARCH_PERFMON_FIXED_CTR0 + idx, pmc_count);
+
+ pr_info("CPU#%d: fixed-PMC%d count: %016llx\n",
+ cpu, idx, pmc_count);
+ }
+ local_irq_restore(flags);
+}
+
+void x86_pmu_stop(struct perf_event *event, int flags)
+{
+ struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
+ struct hw_perf_event *hwc = &event->hw;
+
+ if (__test_and_clear_bit(hwc->idx, cpuc->active_mask)) {
+ x86_pmu.disable(event);
+ cpuc->events[hwc->idx] = NULL;
+ WARN_ON_ONCE(hwc->state & PERF_HES_STOPPED);
+ hwc->state |= PERF_HES_STOPPED;
+ }
+
+ if ((flags & PERF_EF_UPDATE) && !(hwc->state & PERF_HES_UPTODATE)) {
+ /*
+ * Drain the remaining delta count out of a event
+ * that we are disabling:
+ */
+ x86_perf_event_update(event);
+ hwc->state |= PERF_HES_UPTODATE;
+ }
+}
+
+static void x86_pmu_del(struct perf_event *event, int flags)
+{
+ struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
+ int i;
+
+ /*
+ * event is descheduled
+ */
+ event->hw.flags &= ~PERF_X86_EVENT_COMMITTED;
+
+ /*
+ * If we're called during a txn, we don't need to do anything.
+ * The events never got scheduled and ->cancel_txn will truncate
+ * the event_list.
+ *
+ * XXX assumes any ->del() called during a TXN will only be on
+ * an event added during that same TXN.
+ */
+ if (cpuc->txn_flags & PERF_PMU_TXN_ADD)
+ return;
+
+ /*
+ * Not a TXN, therefore cleanup properly.
+ */
+ x86_pmu_stop(event, PERF_EF_UPDATE);
+
+ for (i = 0; i < cpuc->n_events; i++) {
+ if (event == cpuc->event_list[i])
+ break;
+ }
+
+ if (WARN_ON_ONCE(i == cpuc->n_events)) /* called ->del() without ->add() ? */
+ return;
+
+ /* If we have a newly added event; make sure to decrease n_added. */
+ if (i >= cpuc->n_events - cpuc->n_added)
+ --cpuc->n_added;
+
+ if (x86_pmu.put_event_constraints)
+ x86_pmu.put_event_constraints(cpuc, event);
+
+ /* Delete the array entry. */
+ while (++i < cpuc->n_events) {
+ cpuc->event_list[i-1] = cpuc->event_list[i];
+ cpuc->event_constraint[i-1] = cpuc->event_constraint[i];
+ }
+ --cpuc->n_events;
+
+ perf_event_update_userpage(event);
+}
+
+int x86_pmu_handle_irq(struct pt_regs *regs)
+{
+ struct perf_sample_data data;
+ struct cpu_hw_events *cpuc;
+ struct perf_event *event;
+ int idx, handled = 0;
+ u64 val;
+
+ cpuc = this_cpu_ptr(&cpu_hw_events);
+
+ /*
+ * Some chipsets need to unmask the LVTPC in a particular spot
+ * inside the nmi handler. As a result, the unmasking was pushed
+ * into all the nmi handlers.
+ *
+ * This generic handler doesn't seem to have any issues where the
+ * unmasking occurs so it was left at the top.
+ */
+ apic_write(APIC_LVTPC, APIC_DM_NMI);
+
+ for (idx = 0; idx < x86_pmu.num_counters; idx++) {
+ if (!test_bit(idx, cpuc->active_mask)) {
+ /*
+ * Though we deactivated the counter some cpus
+ * might still deliver spurious interrupts still
+ * in flight. Catch them:
+ */
+ if (__test_and_clear_bit(idx, cpuc->running))
+ handled++;
+ continue;
+ }
+
+ event = cpuc->events[idx];
+
+ val = x86_perf_event_update(event);
+ if (val & (1ULL << (x86_pmu.cntval_bits - 1)))
+ continue;
+
+ /*
+ * event overflow
+ */
+ handled++;
+ perf_sample_data_init(&data, 0, event->hw.last_period);
+
+ if (!x86_perf_event_set_period(event))
+ continue;
+
+ if (perf_event_overflow(event, &data, regs))
+ x86_pmu_stop(event, 0);
+ }
+
+ if (handled)
+ inc_irq_stat(apic_perf_irqs);
+
+ return handled;
+}
+
+void perf_events_lapic_init(void)
+{
+ if (!x86_pmu.apic || !x86_pmu_initialized())
+ return;
+
+ /*
+ * Always use NMI for PMU
+ */
+ apic_write(APIC_LVTPC, APIC_DM_NMI);
+}
+
+static int
+perf_event_nmi_handler(unsigned int cmd, struct pt_regs *regs)
+{
+ u64 start_clock;
+ u64 finish_clock;
+ int ret;
+
+ /*
+ * All PMUs/events that share this PMI handler should make sure to
+ * increment active_events for their events.
+ */
+ if (!atomic_read(&active_events))
+ return NMI_DONE;
+
+ start_clock = sched_clock();
+ ret = x86_pmu.handle_irq(regs);
+ finish_clock = sched_clock();
+
+ perf_sample_event_took(finish_clock - start_clock);
+
+ return ret;
+}
+NOKPROBE_SYMBOL(perf_event_nmi_handler);
+
+struct event_constraint emptyconstraint;
+struct event_constraint unconstrained;
+
+static int
+x86_pmu_notifier(struct notifier_block *self, unsigned long action, void *hcpu)
+{
+ unsigned int cpu = (long)hcpu;
+ struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu);
+ int i, ret = NOTIFY_OK;
+
+ switch (action & ~CPU_TASKS_FROZEN) {
+ case CPU_UP_PREPARE:
+ for (i = 0 ; i < X86_PERF_KFREE_MAX; i++)
+ cpuc->kfree_on_online[i] = NULL;
+ if (x86_pmu.cpu_prepare)
+ ret = x86_pmu.cpu_prepare(cpu);
+ break;
+
+ case CPU_STARTING:
+ if (x86_pmu.cpu_starting)
+ x86_pmu.cpu_starting(cpu);
+ break;
+
+ case CPU_ONLINE:
+ for (i = 0 ; i < X86_PERF_KFREE_MAX; i++) {
+ kfree(cpuc->kfree_on_online[i]);
+ cpuc->kfree_on_online[i] = NULL;
+ }
+ break;
+
+ case CPU_DYING:
+ if (x86_pmu.cpu_dying)
+ x86_pmu.cpu_dying(cpu);
+ break;
+
+ case CPU_UP_CANCELED:
+ case CPU_DEAD:
+ if (x86_pmu.cpu_dead)
+ x86_pmu.cpu_dead(cpu);
+ break;
+
+ default:
+ break;
+ }
+
+ return ret;
+}
+
+static void __init pmu_check_apic(void)
+{
+ if (cpu_has_apic)
+ return;
+
+ x86_pmu.apic = 0;
+ pr_info("no APIC, boot with the \"lapic\" boot parameter to force-enable it.\n");
+ pr_info("no hardware sampling interrupt available.\n");
+
+ /*
+ * If we have a PMU initialized but no APIC
+ * interrupts, we cannot sample hardware
+ * events (user-space has to fall back and
+ * sample via a hrtimer based software event):
+ */
+ pmu.capabilities |= PERF_PMU_CAP_NO_INTERRUPT;
+
+}
+
+static struct attribute_group x86_pmu_format_group = {
+ .name = "format",
+ .attrs = NULL,
+};
+
+/*
+ * Remove all undefined events (x86_pmu.event_map(id) == 0)
+ * out of events_attr attributes.
+ */
+static void __init filter_events(struct attribute **attrs)
+{
+ struct device_attribute *d;
+ struct perf_pmu_events_attr *pmu_attr;
+ int offset = 0;
+ int i, j;
+
+ for (i = 0; attrs[i]; i++) {
+ d = (struct device_attribute *)attrs[i];
+ pmu_attr = container_of(d, struct perf_pmu_events_attr, attr);
+ /* str trumps id */
+ if (pmu_attr->event_str)
+ continue;
+ if (x86_pmu.event_map(i + offset))
+ continue;
+
+ for (j = i; attrs[j]; j++)
+ attrs[j] = attrs[j + 1];
+
+ /* Check the shifted attr. */
+ i--;
+
+ /*
+ * event_map() is index based, the attrs array is organized
+ * by increasing event index. If we shift the events, then
+ * we need to compensate for the event_map(), otherwise
+ * we are looking up the wrong event in the map
+ */
+ offset++;
+ }
+}
+
+/* Merge two pointer arrays */
+__init struct attribute **merge_attr(struct attribute **a, struct attribute **b)
+{
+ struct attribute **new;
+ int j, i;
+
+ for (j = 0; a[j]; j++)
+ ;
+ for (i = 0; b[i]; i++)
+ j++;
+ j++;
+
+ new = kmalloc(sizeof(struct attribute *) * j, GFP_KERNEL);
+ if (!new)
+ return NULL;
+
+ j = 0;
+ for (i = 0; a[i]; i++)
+ new[j++] = a[i];
+ for (i = 0; b[i]; i++)
+ new[j++] = b[i];
+ new[j] = NULL;
+
+ return new;
+}
+
+ssize_t events_sysfs_show(struct device *dev, struct device_attribute *attr, char *page)
+{
+ struct perf_pmu_events_attr *pmu_attr = \
+ container_of(attr, struct perf_pmu_events_attr, attr);
+ u64 config = x86_pmu.event_map(pmu_attr->id);
+
+ /* string trumps id */
+ if (pmu_attr->event_str)
+ return sprintf(page, "%s", pmu_attr->event_str);
+
+ return x86_pmu.events_sysfs_show(page, config);
+}
+EXPORT_SYMBOL_GPL(events_sysfs_show);
+
+EVENT_ATTR(cpu-cycles, CPU_CYCLES );
+EVENT_ATTR(instructions, INSTRUCTIONS );
+EVENT_ATTR(cache-references, CACHE_REFERENCES );
+EVENT_ATTR(cache-misses, CACHE_MISSES );
+EVENT_ATTR(branch-instructions, BRANCH_INSTRUCTIONS );
+EVENT_ATTR(branch-misses, BRANCH_MISSES );
+EVENT_ATTR(bus-cycles, BUS_CYCLES );
+EVENT_ATTR(stalled-cycles-frontend, STALLED_CYCLES_FRONTEND );
+EVENT_ATTR(stalled-cycles-backend, STALLED_CYCLES_BACKEND );
+EVENT_ATTR(ref-cycles, REF_CPU_CYCLES );
+
+static struct attribute *empty_attrs;
+
+static struct attribute *events_attr[] = {
+ EVENT_PTR(CPU_CYCLES),
+ EVENT_PTR(INSTRUCTIONS),
+ EVENT_PTR(CACHE_REFERENCES),
+ EVENT_PTR(CACHE_MISSES),
+ EVENT_PTR(BRANCH_INSTRUCTIONS),
+ EVENT_PTR(BRANCH_MISSES),
+ EVENT_PTR(BUS_CYCLES),
+ EVENT_PTR(STALLED_CYCLES_FRONTEND),
+ EVENT_PTR(STALLED_CYCLES_BACKEND),
+ EVENT_PTR(REF_CPU_CYCLES),
+ NULL,
+};
+
+static struct attribute_group x86_pmu_events_group = {
+ .name = "events",
+ .attrs = events_attr,
+};
+
+ssize_t x86_event_sysfs_show(char *page, u64 config, u64 event)
+{
+ u64 umask = (config & ARCH_PERFMON_EVENTSEL_UMASK) >> 8;
+ u64 cmask = (config & ARCH_PERFMON_EVENTSEL_CMASK) >> 24;
+ bool edge = (config & ARCH_PERFMON_EVENTSEL_EDGE);
+ bool pc = (config & ARCH_PERFMON_EVENTSEL_PIN_CONTROL);
+ bool any = (config & ARCH_PERFMON_EVENTSEL_ANY);
+ bool inv = (config & ARCH_PERFMON_EVENTSEL_INV);
+ ssize_t ret;
+
+ /*
+ * We have whole page size to spend and just little data
+ * to write, so we can safely use sprintf.
+ */
+ ret = sprintf(page, "event=0x%02llx", event);
+
+ if (umask)
+ ret += sprintf(page + ret, ",umask=0x%02llx", umask);
+
+ if (edge)
+ ret += sprintf(page + ret, ",edge");
+
+ if (pc)
+ ret += sprintf(page + ret, ",pc");
+
+ if (any)
+ ret += sprintf(page + ret, ",any");
+
+ if (inv)
+ ret += sprintf(page + ret, ",inv");
+
+ if (cmask)
+ ret += sprintf(page + ret, ",cmask=0x%02llx", cmask);
+
+ ret += sprintf(page + ret, "\n");
+
+ return ret;
+}
+
+static int __init init_hw_perf_events(void)
+{
+ struct x86_pmu_quirk *quirk;
+ int err;
+
+ pr_info("Performance Events: ");
+
+ switch (boot_cpu_data.x86_vendor) {
+ case X86_VENDOR_INTEL:
+ err = intel_pmu_init();
+ break;
+ case X86_VENDOR_AMD:
+ err = amd_pmu_init();
+ break;
+ default:
+ err = -ENOTSUPP;
+ }
+ if (err != 0) {
+ pr_cont("no PMU driver, software events only.\n");
+ return 0;
+ }
+
+ pmu_check_apic();
+
+ /* sanity check that the hardware exists or is emulated */
+ if (!check_hw_exists())
+ return 0;
+
+ pr_cont("%s PMU driver.\n", x86_pmu.name);
+
+ x86_pmu.attr_rdpmc = 1; /* enable userspace RDPMC usage by default */
+
+ for (quirk = x86_pmu.quirks; quirk; quirk = quirk->next)
+ quirk->func();
+
+ if (!x86_pmu.intel_ctrl)
+ x86_pmu.intel_ctrl = (1 << x86_pmu.num_counters) - 1;
+
+ perf_events_lapic_init();
+ register_nmi_handler(NMI_LOCAL, perf_event_nmi_handler, 0, "PMI");
+
+ unconstrained = (struct event_constraint)
+ __EVENT_CONSTRAINT(0, (1ULL << x86_pmu.num_counters) - 1,
+ 0, x86_pmu.num_counters, 0, 0);
+
+ x86_pmu_format_group.attrs = x86_pmu.format_attrs;
+
+ if (x86_pmu.event_attrs)
+ x86_pmu_events_group.attrs = x86_pmu.event_attrs;
+
+ if (!x86_pmu.events_sysfs_show)
+ x86_pmu_events_group.attrs = &empty_attrs;
+ else
+ filter_events(x86_pmu_events_group.attrs);
+
+ if (x86_pmu.cpu_events) {
+ struct attribute **tmp;
+
+ tmp = merge_attr(x86_pmu_events_group.attrs, x86_pmu.cpu_events);
+ if (!WARN_ON(!tmp))
+ x86_pmu_events_group.attrs = tmp;
+ }
+
+ pr_info("... version: %d\n", x86_pmu.version);
+ pr_info("... bit width: %d\n", x86_pmu.cntval_bits);
+ pr_info("... generic registers: %d\n", x86_pmu.num_counters);
+ pr_info("... value mask: %016Lx\n", x86_pmu.cntval_mask);
+ pr_info("... max period: %016Lx\n", x86_pmu.max_period);
+ pr_info("... fixed-purpose events: %d\n", x86_pmu.num_counters_fixed);
+ pr_info("... event mask: %016Lx\n", x86_pmu.intel_ctrl);
+
+ perf_pmu_register(&pmu, "cpu", PERF_TYPE_RAW);
+ perf_cpu_notifier(x86_pmu_notifier);
+
+ return 0;
+}
+early_initcall(init_hw_perf_events);
+
+static inline void x86_pmu_read(struct perf_event *event)
+{
+ x86_perf_event_update(event);
+}
+
+/*
+ * Start group events scheduling transaction
+ * Set the flag to make pmu::enable() not perform the
+ * schedulability test, it will be performed at commit time
+ *
+ * We only support PERF_PMU_TXN_ADD transactions. Save the
+ * transaction flags but otherwise ignore non-PERF_PMU_TXN_ADD
+ * transactions.
+ */
+static void x86_pmu_start_txn(struct pmu *pmu, unsigned int txn_flags)
+{
+ struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
+
+ WARN_ON_ONCE(cpuc->txn_flags); /* txn already in flight */
+
+ cpuc->txn_flags = txn_flags;
+ if (txn_flags & ~PERF_PMU_TXN_ADD)
+ return;
+
+ perf_pmu_disable(pmu);
+ __this_cpu_write(cpu_hw_events.n_txn, 0);
+}
+
+/*
+ * Stop group events scheduling transaction
+ * Clear the flag and pmu::enable() will perform the
+ * schedulability test.
+ */
+static void x86_pmu_cancel_txn(struct pmu *pmu)
+{
+ unsigned int txn_flags;
+ struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
+
+ WARN_ON_ONCE(!cpuc->txn_flags); /* no txn in flight */
+
+ txn_flags = cpuc->txn_flags;
+ cpuc->txn_flags = 0;
+ if (txn_flags & ~PERF_PMU_TXN_ADD)
+ return;
+
+ /*
+ * Truncate collected array by the number of events added in this
+ * transaction. See x86_pmu_add() and x86_pmu_*_txn().
+ */
+ __this_cpu_sub(cpu_hw_events.n_added, __this_cpu_read(cpu_hw_events.n_txn));
+ __this_cpu_sub(cpu_hw_events.n_events, __this_cpu_read(cpu_hw_events.n_txn));
+ perf_pmu_enable(pmu);
+}
+
+/*
+ * Commit group events scheduling transaction
+ * Perform the group schedulability test as a whole
+ * Return 0 if success
+ *
+ * Does not cancel the transaction on failure; expects the caller to do this.
+ */
+static int x86_pmu_commit_txn(struct pmu *pmu)
+{
+ struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
+ int assign[X86_PMC_IDX_MAX];
+ int n, ret;
+
+ WARN_ON_ONCE(!cpuc->txn_flags); /* no txn in flight */
+
+ if (cpuc->txn_flags & ~PERF_PMU_TXN_ADD) {
+ cpuc->txn_flags = 0;
+ return 0;
+ }
+
+ n = cpuc->n_events;
+
+ if (!x86_pmu_initialized())
+ return -EAGAIN;
+
+ ret = x86_pmu.schedule_events(cpuc, n, assign);
+ if (ret)
+ return ret;
+
+ /*
+ * copy new assignment, now we know it is possible
+ * will be used by hw_perf_enable()
+ */
+ memcpy(cpuc->assign, assign, n*sizeof(int));
+
+ cpuc->txn_flags = 0;
+ perf_pmu_enable(pmu);
+ return 0;
+}
+/*
+ * a fake_cpuc is used to validate event groups. Due to
+ * the extra reg logic, we need to also allocate a fake
+ * per_core and per_cpu structure. Otherwise, group events
+ * using extra reg may conflict without the kernel being
+ * able to catch this when the last event gets added to
+ * the group.
+ */
+static void free_fake_cpuc(struct cpu_hw_events *cpuc)
+{
+ kfree(cpuc->shared_regs);
+ kfree(cpuc);
+}
+
+static struct cpu_hw_events *allocate_fake_cpuc(void)
+{
+ struct cpu_hw_events *cpuc;
+ int cpu = raw_smp_processor_id();
+
+ cpuc = kzalloc(sizeof(*cpuc), GFP_KERNEL);
+ if (!cpuc)
+ return ERR_PTR(-ENOMEM);
+
+ /* only needed, if we have extra_regs */
+ if (x86_pmu.extra_regs) {
+ cpuc->shared_regs = allocate_shared_regs(cpu);
+ if (!cpuc->shared_regs)
+ goto error;
+ }
+ cpuc->is_fake = 1;
+ return cpuc;
+error:
+ free_fake_cpuc(cpuc);
+ return ERR_PTR(-ENOMEM);
+}
+
+/*
+ * validate that we can schedule this event
+ */
+static int validate_event(struct perf_event *event)
+{
+ struct cpu_hw_events *fake_cpuc;
+ struct event_constraint *c;
+ int ret = 0;
+
+ fake_cpuc = allocate_fake_cpuc();
+ if (IS_ERR(fake_cpuc))
+ return PTR_ERR(fake_cpuc);
+
+ c = x86_pmu.get_event_constraints(fake_cpuc, -1, event);
+
+ if (!c || !c->weight)
+ ret = -EINVAL;
+
+ if (x86_pmu.put_event_constraints)
+ x86_pmu.put_event_constraints(fake_cpuc, event);
+
+ free_fake_cpuc(fake_cpuc);
+
+ return ret;
+}
+
+/*
+ * validate a single event group
+ *
+ * validation include:
+ * - check events are compatible which each other
+ * - events do not compete for the same counter
+ * - number of events <= number of counters
+ *
+ * validation ensures the group can be loaded onto the
+ * PMU if it was the only group available.
+ */
+static int validate_group(struct perf_event *event)
+{
+ struct perf_event *leader = event->group_leader;
+ struct cpu_hw_events *fake_cpuc;
+ int ret = -EINVAL, n;
+
+ fake_cpuc = allocate_fake_cpuc();
+ if (IS_ERR(fake_cpuc))
+ return PTR_ERR(fake_cpuc);
+ /*
+ * the event is not yet connected with its
+ * siblings therefore we must first collect
+ * existing siblings, then add the new event
+ * before we can simulate the scheduling
+ */
+ n = collect_events(fake_cpuc, leader, true);
+ if (n < 0)
+ goto out;
+
+ fake_cpuc->n_events = n;
+ n = collect_events(fake_cpuc, event, false);
+ if (n < 0)
+ goto out;
+
+ fake_cpuc->n_events = n;
+
+ ret = x86_pmu.schedule_events(fake_cpuc, n, NULL);
+
+out:
+ free_fake_cpuc(fake_cpuc);
+ return ret;
+}
+
+static int x86_pmu_event_init(struct perf_event *event)
+{
+ struct pmu *tmp;
+ int err;
+
+ switch (event->attr.type) {
+ case PERF_TYPE_RAW:
+ case PERF_TYPE_HARDWARE:
+ case PERF_TYPE_HW_CACHE:
+ break;
+
+ default:
+ return -ENOENT;
+ }
+
+ err = __x86_pmu_event_init(event);
+ if (!err) {
+ /*
+ * we temporarily connect event to its pmu
+ * such that validate_group() can classify
+ * it as an x86 event using is_x86_event()
+ */
+ tmp = event->pmu;
+ event->pmu = &pmu;
+
+ if (event->group_leader != event)
+ err = validate_group(event);
+ else
+ err = validate_event(event);
+
+ event->pmu = tmp;
+ }
+ if (err) {
+ if (event->destroy)
+ event->destroy(event);
+ }
+
+ if (ACCESS_ONCE(x86_pmu.attr_rdpmc))
+ event->hw.flags |= PERF_X86_EVENT_RDPMC_ALLOWED;
+
+ return err;
+}
+
+static void refresh_pce(void *ignored)
+{
+ if (current->mm)
+ load_mm_cr4(current->mm);
+}
+
+static void x86_pmu_event_mapped(struct perf_event *event)
+{
+ if (!(event->hw.flags & PERF_X86_EVENT_RDPMC_ALLOWED))
+ return;
+
+ if (atomic_inc_return(&current->mm->context.perf_rdpmc_allowed) == 1)
+ on_each_cpu_mask(mm_cpumask(current->mm), refresh_pce, NULL, 1);
+}
+
+static void x86_pmu_event_unmapped(struct perf_event *event)
+{
+ if (!current->mm)
+ return;
+
+ if (!(event->hw.flags & PERF_X86_EVENT_RDPMC_ALLOWED))
+ return;
+
+ if (atomic_dec_and_test(&current->mm->context.perf_rdpmc_allowed))
+ on_each_cpu_mask(mm_cpumask(current->mm), refresh_pce, NULL, 1);
+}
+
+static int x86_pmu_event_idx(struct perf_event *event)
+{
+ int idx = event->hw.idx;
+
+ if (!(event->hw.flags & PERF_X86_EVENT_RDPMC_ALLOWED))
+ return 0;
+
+ if (x86_pmu.num_counters_fixed && idx >= INTEL_PMC_IDX_FIXED) {
+ idx -= INTEL_PMC_IDX_FIXED;
+ idx |= 1 << 30;
+ }
+
+ return idx + 1;
+}
+
+static ssize_t get_attr_rdpmc(struct device *cdev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ return snprintf(buf, 40, "%d\n", x86_pmu.attr_rdpmc);
+}
+
+static ssize_t set_attr_rdpmc(struct device *cdev,
+ struct device_attribute *attr,
+ const char *buf, size_t count)
+{
+ unsigned long val;
+ ssize_t ret;
+
+ ret = kstrtoul(buf, 0, &val);
+ if (ret)
+ return ret;
+
+ if (val > 2)
+ return -EINVAL;
+
+ if (x86_pmu.attr_rdpmc_broken)
+ return -ENOTSUPP;
+
+ if ((val == 2) != (x86_pmu.attr_rdpmc == 2)) {
+ /*
+ * Changing into or out of always available, aka
+ * perf-event-bypassing mode. This path is extremely slow,
+ * but only root can trigger it, so it's okay.
+ */
+ if (val == 2)
+ static_key_slow_inc(&rdpmc_always_available);
+ else
+ static_key_slow_dec(&rdpmc_always_available);
+ on_each_cpu(refresh_pce, NULL, 1);
+ }
+
+ x86_pmu.attr_rdpmc = val;
+
+ return count;
+}
+
+static DEVICE_ATTR(rdpmc, S_IRUSR | S_IWUSR, get_attr_rdpmc, set_attr_rdpmc);
+
+static struct attribute *x86_pmu_attrs[] = {
+ &dev_attr_rdpmc.attr,
+ NULL,
+};
+
+static struct attribute_group x86_pmu_attr_group = {
+ .attrs = x86_pmu_attrs,
+};
+
+static const struct attribute_group *x86_pmu_attr_groups[] = {
+ &x86_pmu_attr_group,
+ &x86_pmu_format_group,
+ &x86_pmu_events_group,
+ NULL,
+};
+
+static void x86_pmu_sched_task(struct perf_event_context *ctx, bool sched_in)
+{
+ if (x86_pmu.sched_task)
+ x86_pmu.sched_task(ctx, sched_in);
+}
+
+void perf_check_microcode(void)
+{
+ if (x86_pmu.check_microcode)
+ x86_pmu.check_microcode();
+}
+EXPORT_SYMBOL_GPL(perf_check_microcode);
+
+static struct pmu pmu = {
+ .pmu_enable = x86_pmu_enable,
+ .pmu_disable = x86_pmu_disable,
+
+ .attr_groups = x86_pmu_attr_groups,
+
+ .event_init = x86_pmu_event_init,
+
+ .event_mapped = x86_pmu_event_mapped,
+ .event_unmapped = x86_pmu_event_unmapped,
+
+ .add = x86_pmu_add,
+ .del = x86_pmu_del,
+ .start = x86_pmu_start,
+ .stop = x86_pmu_stop,
+ .read = x86_pmu_read,
+
+ .start_txn = x86_pmu_start_txn,
+ .cancel_txn = x86_pmu_cancel_txn,
+ .commit_txn = x86_pmu_commit_txn,
+
+ .event_idx = x86_pmu_event_idx,
+ .sched_task = x86_pmu_sched_task,
+ .task_ctx_size = sizeof(struct x86_perf_task_context),
+};
+
+void arch_perf_update_userpage(struct perf_event *event,
+ struct perf_event_mmap_page *userpg, u64 now)
+{
+ struct cyc2ns_data *data;
+
+ userpg->cap_user_time = 0;
+ userpg->cap_user_time_zero = 0;
+ userpg->cap_user_rdpmc =
+ !!(event->hw.flags & PERF_X86_EVENT_RDPMC_ALLOWED);
+ userpg->pmc_width = x86_pmu.cntval_bits;
+
+ if (!sched_clock_stable())
+ return;
+
+ data = cyc2ns_read_begin();
+
+ /*
+ * Internal timekeeping for enabled/running/stopped times
+ * is always in the local_clock domain.
+ */
+ userpg->cap_user_time = 1;
+ userpg->time_mult = data->cyc2ns_mul;
+ userpg->time_shift = data->cyc2ns_shift;
+ userpg->time_offset = data->cyc2ns_offset - now;
+
+ /*
+ * cap_user_time_zero doesn't make sense when we're using a different
+ * time base for the records.
+ */
+ if (event->clock == &local_clock) {
+ userpg->cap_user_time_zero = 1;
+ userpg->time_zero = data->cyc2ns_offset;
+ }
+
+ cyc2ns_read_end(data);
+}
+
+/*
+ * callchain support
+ */
+
+static int backtrace_stack(void *data, char *name)
+{
+ return 0;
+}
+
+static int backtrace_address(void *data, unsigned long addr, int reliable)
+{
+ struct perf_callchain_entry *entry = data;
+
+ return perf_callchain_store(entry, addr);
+}
+
+static const struct stacktrace_ops backtrace_ops = {
+ .stack = backtrace_stack,
+ .address = backtrace_address,
+ .walk_stack = print_context_stack_bp,
+};
+
+void
+perf_callchain_kernel(struct perf_callchain_entry *entry, struct pt_regs *regs)
+{
+ if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) {
+ /* TODO: We don't support guest os callchain now */
+ return;
+ }
+
+ perf_callchain_store(entry, regs->ip);
+
+ dump_trace(NULL, regs, NULL, 0, &backtrace_ops, entry);
+}
+
+static inline int
+valid_user_frame(const void __user *fp, unsigned long size)
+{
+ return (__range_not_ok(fp, size, TASK_SIZE) == 0);
+}
+
+static unsigned long get_segment_base(unsigned int segment)
+{
+ struct desc_struct *desc;
+ int idx = segment >> 3;
+
+ if ((segment & SEGMENT_TI_MASK) == SEGMENT_LDT) {
+#ifdef CONFIG_MODIFY_LDT_SYSCALL
+ struct ldt_struct *ldt;
+
+ if (idx > LDT_ENTRIES)
+ return 0;
+
+ /* IRQs are off, so this synchronizes with smp_store_release */
+ ldt = lockless_dereference(current->active_mm->context.ldt);
+ if (!ldt || idx > ldt->size)
+ return 0;
+
+ desc = &ldt->entries[idx];
+#else
+ return 0;
+#endif
+ } else {
+ if (idx > GDT_ENTRIES)
+ return 0;
+
+ desc = raw_cpu_ptr(gdt_page.gdt) + idx;
+ }
+
+ return get_desc_base(desc);
+}
+
+#ifdef CONFIG_IA32_EMULATION
+
+#include <asm/compat.h>
+
+static inline int
+perf_callchain_user32(struct pt_regs *regs, struct perf_callchain_entry *entry)
+{
+ /* 32-bit process in 64-bit kernel. */
+ unsigned long ss_base, cs_base;
+ struct stack_frame_ia32 frame;
+ const void __user *fp;
+
+ if (!test_thread_flag(TIF_IA32))
+ return 0;
+
+ cs_base = get_segment_base(regs->cs);
+ ss_base = get_segment_base(regs->ss);
+
+ fp = compat_ptr(ss_base + regs->bp);
+ pagefault_disable();
+ while (entry->nr < PERF_MAX_STACK_DEPTH) {
+ unsigned long bytes;
+ frame.next_frame = 0;
+ frame.return_address = 0;
+
+ if (!access_ok(VERIFY_READ, fp, 8))
+ break;
+
+ bytes = __copy_from_user_nmi(&frame.next_frame, fp, 4);
+ if (bytes != 0)
+ break;
+ bytes = __copy_from_user_nmi(&frame.return_address, fp+4, 4);
+ if (bytes != 0)
+ break;
+
+ if (!valid_user_frame(fp, sizeof(frame)))
+ break;
+
+ perf_callchain_store(entry, cs_base + frame.return_address);
+ fp = compat_ptr(ss_base + frame.next_frame);
+ }
+ pagefault_enable();
+ return 1;
+}
+#else
+static inline int
+perf_callchain_user32(struct pt_regs *regs, struct perf_callchain_entry *entry)
+{
+ return 0;
+}
+#endif
+
+void
+perf_callchain_user(struct perf_callchain_entry *entry, struct pt_regs *regs)
+{
+ struct stack_frame frame;
+ const void __user *fp;
+
+ if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) {
+ /* TODO: We don't support guest os callchain now */
+ return;
+ }
+
+ /*
+ * We don't know what to do with VM86 stacks.. ignore them for now.
+ */
+ if (regs->flags & (X86_VM_MASK | PERF_EFLAGS_VM))
+ return;
+
+ fp = (void __user *)regs->bp;
+
+ perf_callchain_store(entry, regs->ip);
+
+ if (!current->mm)
+ return;
+
+ if (perf_callchain_user32(regs, entry))
+ return;
+
+ pagefault_disable();
+ while (entry->nr < PERF_MAX_STACK_DEPTH) {
+ unsigned long bytes;
+ frame.next_frame = NULL;
+ frame.return_address = 0;
+
+ if (!access_ok(VERIFY_READ, fp, 16))
+ break;
+
+ bytes = __copy_from_user_nmi(&frame.next_frame, fp, 8);
+ if (bytes != 0)
+ break;
+ bytes = __copy_from_user_nmi(&frame.return_address, fp+8, 8);
+ if (bytes != 0)
+ break;
+
+ if (!valid_user_frame(fp, sizeof(frame)))
+ break;
+
+ perf_callchain_store(entry, frame.return_address);
+ fp = (void __user *)frame.next_frame;
+ }
+ pagefault_enable();
+}
+
+/*
+ * Deal with code segment offsets for the various execution modes:
+ *
+ * VM86 - the good olde 16 bit days, where the linear address is
+ * 20 bits and we use regs->ip + 0x10 * regs->cs.
+ *
+ * IA32 - Where we need to look at GDT/LDT segment descriptor tables
+ * to figure out what the 32bit base address is.
+ *
+ * X32 - has TIF_X32 set, but is running in x86_64
+ *
+ * X86_64 - CS,DS,SS,ES are all zero based.
+ */
+static unsigned long code_segment_base(struct pt_regs *regs)
+{
+ /*
+ * For IA32 we look at the GDT/LDT segment base to convert the
+ * effective IP to a linear address.
+ */
+
+#ifdef CONFIG_X86_32
+ /*
+ * If we are in VM86 mode, add the segment offset to convert to a
+ * linear address.
+ */
+ if (regs->flags & X86_VM_MASK)
+ return 0x10 * regs->cs;
+
+ if (user_mode(regs) && regs->cs != __USER_CS)
+ return get_segment_base(regs->cs);
+#else
+ if (user_mode(regs) && !user_64bit_mode(regs) &&
+ regs->cs != __USER32_CS)
+ return get_segment_base(regs->cs);
+#endif
+ return 0;
+}
+
+unsigned long perf_instruction_pointer(struct pt_regs *regs)
+{
+ if (perf_guest_cbs && perf_guest_cbs->is_in_guest())
+ return perf_guest_cbs->get_guest_ip();
+
+ return regs->ip + code_segment_base(regs);
+}
+
+unsigned long perf_misc_flags(struct pt_regs *regs)
+{
+ int misc = 0;
+
+ if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) {
+ if (perf_guest_cbs->is_user_mode())
+ misc |= PERF_RECORD_MISC_GUEST_USER;
+ else
+ misc |= PERF_RECORD_MISC_GUEST_KERNEL;
+ } else {
+ if (user_mode(regs))
+ misc |= PERF_RECORD_MISC_USER;
+ else
+ misc |= PERF_RECORD_MISC_KERNEL;
+ }
+
+ if (regs->flags & PERF_EFLAGS_EXACT)
+ misc |= PERF_RECORD_MISC_EXACT_IP;
+
+ return misc;
+}
+
+void perf_get_x86_pmu_capability(struct x86_pmu_capability *cap)
+{
+ cap->version = x86_pmu.version;
+ cap->num_counters_gp = x86_pmu.num_counters;
+ cap->num_counters_fixed = x86_pmu.num_counters_fixed;
+ cap->bit_width_gp = x86_pmu.cntval_bits;
+ cap->bit_width_fixed = x86_pmu.cntval_bits;
+ cap->events_mask = (unsigned int)x86_pmu.events_maskl;
+ cap->events_mask_len = x86_pmu.events_mask_len;
+}
+EXPORT_SYMBOL_GPL(perf_get_x86_pmu_capability);
diff --git a/arch/x86/events/intel/bts.c b/arch/x86/events/intel/bts.c
new file mode 100644
index 000000000..b99dc9258
--- /dev/null
+++ b/arch/x86/events/intel/bts.c
@@ -0,0 +1,544 @@
+/*
+ * BTS PMU driver for perf
+ * Copyright (c) 2013-2014, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ */
+
+#undef DEBUG
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/bitops.h>
+#include <linux/types.h>
+#include <linux/slab.h>
+#include <linux/debugfs.h>
+#include <linux/device.h>
+#include <linux/coredump.h>
+
+#include <asm-generic/sizes.h>
+#include <asm/perf_event.h>
+
+#include "../perf_event.h"
+
+struct bts_ctx {
+ struct perf_output_handle handle;
+ struct debug_store ds_back;
+ int started;
+};
+
+static DEFINE_PER_CPU(struct bts_ctx, bts_ctx);
+
+#define BTS_RECORD_SIZE 24
+#define BTS_SAFETY_MARGIN 4080
+
+struct bts_phys {
+ struct page *page;
+ unsigned long size;
+ unsigned long offset;
+ unsigned long displacement;
+};
+
+struct bts_buffer {
+ size_t real_size; /* multiple of BTS_RECORD_SIZE */
+ unsigned int nr_pages;
+ unsigned int nr_bufs;
+ unsigned int cur_buf;
+ bool snapshot;
+ local_t data_size;
+ local_t lost;
+ local_t head;
+ unsigned long end;
+ void **data_pages;
+ struct bts_phys buf[0];
+};
+
+struct pmu bts_pmu;
+
+static size_t buf_size(struct page *page)
+{
+ return 1 << (PAGE_SHIFT + page_private(page));
+}
+
+static void *
+bts_buffer_setup_aux(int cpu, void **pages, int nr_pages, bool overwrite)
+{
+ struct bts_buffer *buf;
+ struct page *page;
+ int node = (cpu == -1) ? cpu : cpu_to_node(cpu);
+ unsigned long offset;
+ size_t size = nr_pages << PAGE_SHIFT;
+ int pg, nbuf, pad;
+
+ /* count all the high order buffers */
+ for (pg = 0, nbuf = 0; pg < nr_pages;) {
+ page = virt_to_page(pages[pg]);
+ if (WARN_ON_ONCE(!PagePrivate(page) && nr_pages > 1))
+ return NULL;
+ pg += 1 << page_private(page);
+ nbuf++;
+ }
+
+ /*
+ * to avoid interrupts in overwrite mode, only allow one physical
+ */
+ if (overwrite && nbuf > 1)
+ return NULL;
+
+ buf = kzalloc_node(offsetof(struct bts_buffer, buf[nbuf]), GFP_KERNEL, node);
+ if (!buf)
+ return NULL;
+
+ buf->nr_pages = nr_pages;
+ buf->nr_bufs = nbuf;
+ buf->snapshot = overwrite;
+ buf->data_pages = pages;
+ buf->real_size = size - size % BTS_RECORD_SIZE;
+
+ for (pg = 0, nbuf = 0, offset = 0, pad = 0; nbuf < buf->nr_bufs; nbuf++) {
+ unsigned int __nr_pages;
+
+ page = virt_to_page(pages[pg]);
+ __nr_pages = PagePrivate(page) ? 1 << page_private(page) : 1;
+ buf->buf[nbuf].page = page;
+ buf->buf[nbuf].offset = offset;
+ buf->buf[nbuf].displacement = (pad ? BTS_RECORD_SIZE - pad : 0);
+ buf->buf[nbuf].size = buf_size(page) - buf->buf[nbuf].displacement;
+ pad = buf->buf[nbuf].size % BTS_RECORD_SIZE;
+ buf->buf[nbuf].size -= pad;
+
+ pg += __nr_pages;
+ offset += __nr_pages << PAGE_SHIFT;
+ }
+
+ return buf;
+}
+
+static void bts_buffer_free_aux(void *data)
+{
+ kfree(data);
+}
+
+static unsigned long bts_buffer_offset(struct bts_buffer *buf, unsigned int idx)
+{
+ return buf->buf[idx].offset + buf->buf[idx].displacement;
+}
+
+static void
+bts_config_buffer(struct bts_buffer *buf)
+{
+ int cpu = raw_smp_processor_id();
+ struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds;
+ struct bts_phys *phys = &buf->buf[buf->cur_buf];
+ unsigned long index, thresh = 0, end = phys->size;
+ struct page *page = phys->page;
+
+ index = local_read(&buf->head);
+
+ if (!buf->snapshot) {
+ if (buf->end < phys->offset + buf_size(page))
+ end = buf->end - phys->offset - phys->displacement;
+
+ index -= phys->offset + phys->displacement;
+
+ if (end - index > BTS_SAFETY_MARGIN)
+ thresh = end - BTS_SAFETY_MARGIN;
+ else if (end - index > BTS_RECORD_SIZE)
+ thresh = end - BTS_RECORD_SIZE;
+ else
+ thresh = end;
+ }
+
+ ds->bts_buffer_base = (u64)(long)page_address(page) + phys->displacement;
+ ds->bts_index = ds->bts_buffer_base + index;
+ ds->bts_absolute_maximum = ds->bts_buffer_base + end;
+ ds->bts_interrupt_threshold = !buf->snapshot
+ ? ds->bts_buffer_base + thresh
+ : ds->bts_absolute_maximum + BTS_RECORD_SIZE;
+}
+
+static void bts_buffer_pad_out(struct bts_phys *phys, unsigned long head)
+{
+ unsigned long index = head - phys->offset;
+
+ memset(page_address(phys->page) + index, 0, phys->size - index);
+}
+
+static bool bts_buffer_is_full(struct bts_buffer *buf, struct bts_ctx *bts)
+{
+ if (buf->snapshot)
+ return false;
+
+ if (local_read(&buf->data_size) >= bts->handle.size ||
+ bts->handle.size - local_read(&buf->data_size) < BTS_RECORD_SIZE)
+ return true;
+
+ return false;
+}
+
+static void bts_update(struct bts_ctx *bts)
+{
+ int cpu = raw_smp_processor_id();
+ struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds;
+ struct bts_buffer *buf = perf_get_aux(&bts->handle);
+ unsigned long index = ds->bts_index - ds->bts_buffer_base, old, head;
+
+ if (!buf)
+ return;
+
+ head = index + bts_buffer_offset(buf, buf->cur_buf);
+ old = local_xchg(&buf->head, head);
+
+ if (!buf->snapshot) {
+ if (old == head)
+ return;
+
+ if (ds->bts_index >= ds->bts_absolute_maximum)
+ local_inc(&buf->lost);
+
+ /*
+ * old and head are always in the same physical buffer, so we
+ * can subtract them to get the data size.
+ */
+ local_add(head - old, &buf->data_size);
+ } else {
+ local_set(&buf->data_size, head);
+ }
+}
+
+static void __bts_event_start(struct perf_event *event)
+{
+ struct bts_ctx *bts = this_cpu_ptr(&bts_ctx);
+ struct bts_buffer *buf = perf_get_aux(&bts->handle);
+ u64 config = 0;
+
+ if (!buf || bts_buffer_is_full(buf, bts))
+ return;
+
+ event->hw.itrace_started = 1;
+ event->hw.state = 0;
+
+ if (!buf->snapshot)
+ config |= ARCH_PERFMON_EVENTSEL_INT;
+ if (!event->attr.exclude_kernel)
+ config |= ARCH_PERFMON_EVENTSEL_OS;
+ if (!event->attr.exclude_user)
+ config |= ARCH_PERFMON_EVENTSEL_USR;
+
+ bts_config_buffer(buf);
+
+ /*
+ * local barrier to make sure that ds configuration made it
+ * before we enable BTS
+ */
+ wmb();
+
+ intel_pmu_enable_bts(config);
+}
+
+static void bts_event_start(struct perf_event *event, int flags)
+{
+ struct bts_ctx *bts = this_cpu_ptr(&bts_ctx);
+
+ __bts_event_start(event);
+
+ /* PMI handler: this counter is running and likely generating PMIs */
+ ACCESS_ONCE(bts->started) = 1;
+}
+
+static void __bts_event_stop(struct perf_event *event)
+{
+ /*
+ * No extra synchronization is mandated by the documentation to have
+ * BTS data stores globally visible.
+ */
+ intel_pmu_disable_bts();
+
+ if (event->hw.state & PERF_HES_STOPPED)
+ return;
+
+ ACCESS_ONCE(event->hw.state) |= PERF_HES_STOPPED;
+}
+
+static void bts_event_stop(struct perf_event *event, int flags)
+{
+ struct bts_ctx *bts = this_cpu_ptr(&bts_ctx);
+
+ /* PMI handler: don't restart this counter */
+ ACCESS_ONCE(bts->started) = 0;
+
+ __bts_event_stop(event);
+
+ if (flags & PERF_EF_UPDATE)
+ bts_update(bts);
+}
+
+void intel_bts_enable_local(void)
+{
+ struct bts_ctx *bts = this_cpu_ptr(&bts_ctx);
+
+ if (bts->handle.event && bts->started)
+ __bts_event_start(bts->handle.event);
+}
+
+void intel_bts_disable_local(void)
+{
+ struct bts_ctx *bts = this_cpu_ptr(&bts_ctx);
+
+ if (bts->handle.event)
+ __bts_event_stop(bts->handle.event);
+}
+
+static int
+bts_buffer_reset(struct bts_buffer *buf, struct perf_output_handle *handle)
+{
+ unsigned long head, space, next_space, pad, gap, skip, wakeup;
+ unsigned int next_buf;
+ struct bts_phys *phys, *next_phys;
+ int ret;
+
+ if (buf->snapshot)
+ return 0;
+
+ head = handle->head & ((buf->nr_pages << PAGE_SHIFT) - 1);
+ if (WARN_ON_ONCE(head != local_read(&buf->head)))
+ return -EINVAL;
+
+ phys = &buf->buf[buf->cur_buf];
+ space = phys->offset + phys->displacement + phys->size - head;
+ pad = space;
+ if (space > handle->size) {
+ space = handle->size;
+ space -= space % BTS_RECORD_SIZE;
+ }
+ if (space <= BTS_SAFETY_MARGIN) {
+ /* See if next phys buffer has more space */
+ next_buf = buf->cur_buf + 1;
+ if (next_buf >= buf->nr_bufs)
+ next_buf = 0;
+ next_phys = &buf->buf[next_buf];
+ gap = buf_size(phys->page) - phys->displacement - phys->size +
+ next_phys->displacement;
+ skip = pad + gap;
+ if (handle->size >= skip) {
+ next_space = next_phys->size;
+ if (next_space + skip > handle->size) {
+ next_space = handle->size - skip;
+ next_space -= next_space % BTS_RECORD_SIZE;
+ }
+ if (next_space > space || !space) {
+ if (pad)
+ bts_buffer_pad_out(phys, head);
+ ret = perf_aux_output_skip(handle, skip);
+ if (ret)
+ return ret;
+ /* Advance to next phys buffer */
+ phys = next_phys;
+ space = next_space;
+ head = phys->offset + phys->displacement;
+ /*
+ * After this, cur_buf and head won't match ds
+ * anymore, so we must not be racing with
+ * bts_update().
+ */
+ buf->cur_buf = next_buf;
+ local_set(&buf->head, head);
+ }
+ }
+ }
+
+ /* Don't go far beyond wakeup watermark */
+ wakeup = BTS_SAFETY_MARGIN + BTS_RECORD_SIZE + handle->wakeup -
+ handle->head;
+ if (space > wakeup) {
+ space = wakeup;
+ space -= space % BTS_RECORD_SIZE;
+ }
+
+ buf->end = head + space;
+
+ /*
+ * If we have no space, the lost notification would have been sent when
+ * we hit absolute_maximum - see bts_update()
+ */
+ if (!space)
+ return -ENOSPC;
+
+ return 0;
+}
+
+int intel_bts_interrupt(void)
+{
+ struct bts_ctx *bts = this_cpu_ptr(&bts_ctx);
+ struct perf_event *event = bts->handle.event;
+ struct bts_buffer *buf;
+ s64 old_head;
+ int err;
+
+ if (!event || !bts->started)
+ return 0;
+
+ buf = perf_get_aux(&bts->handle);
+ /*
+ * Skip snapshot counters: they don't use the interrupt, but
+ * there's no other way of telling, because the pointer will
+ * keep moving
+ */
+ if (!buf || buf->snapshot)
+ return 0;
+
+ old_head = local_read(&buf->head);
+ bts_update(bts);
+
+ /* no new data */
+ if (old_head == local_read(&buf->head))
+ return 0;
+
+ perf_aux_output_end(&bts->handle, local_xchg(&buf->data_size, 0),
+ !!local_xchg(&buf->lost, 0));
+
+ buf = perf_aux_output_begin(&bts->handle, event);
+ if (!buf)
+ return 1;
+
+ err = bts_buffer_reset(buf, &bts->handle);
+ if (err)
+ perf_aux_output_end(&bts->handle, 0, false);
+
+ return 1;
+}
+
+static void bts_event_del(struct perf_event *event, int mode)
+{
+ struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
+ struct bts_ctx *bts = this_cpu_ptr(&bts_ctx);
+ struct bts_buffer *buf = perf_get_aux(&bts->handle);
+
+ bts_event_stop(event, PERF_EF_UPDATE);
+
+ if (buf) {
+ if (buf->snapshot)
+ bts->handle.head =
+ local_xchg(&buf->data_size,
+ buf->nr_pages << PAGE_SHIFT);
+ perf_aux_output_end(&bts->handle, local_xchg(&buf->data_size, 0),
+ !!local_xchg(&buf->lost, 0));
+ }
+
+ cpuc->ds->bts_index = bts->ds_back.bts_buffer_base;
+ cpuc->ds->bts_buffer_base = bts->ds_back.bts_buffer_base;
+ cpuc->ds->bts_absolute_maximum = bts->ds_back.bts_absolute_maximum;
+ cpuc->ds->bts_interrupt_threshold = bts->ds_back.bts_interrupt_threshold;
+}
+
+static int bts_event_add(struct perf_event *event, int mode)
+{
+ struct bts_buffer *buf;
+ struct bts_ctx *bts = this_cpu_ptr(&bts_ctx);
+ struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
+ struct hw_perf_event *hwc = &event->hw;
+ int ret = -EBUSY;
+
+ event->hw.state = PERF_HES_STOPPED;
+
+ if (test_bit(INTEL_PMC_IDX_FIXED_BTS, cpuc->active_mask))
+ return -EBUSY;
+
+ if (bts->handle.event)
+ return -EBUSY;
+
+ buf = perf_aux_output_begin(&bts->handle, event);
+ if (!buf)
+ return -EINVAL;
+
+ ret = bts_buffer_reset(buf, &bts->handle);
+ if (ret) {
+ perf_aux_output_end(&bts->handle, 0, false);
+ return ret;
+ }
+
+ bts->ds_back.bts_buffer_base = cpuc->ds->bts_buffer_base;
+ bts->ds_back.bts_absolute_maximum = cpuc->ds->bts_absolute_maximum;
+ bts->ds_back.bts_interrupt_threshold = cpuc->ds->bts_interrupt_threshold;
+
+ if (mode & PERF_EF_START) {
+ bts_event_start(event, 0);
+ if (hwc->state & PERF_HES_STOPPED) {
+ bts_event_del(event, 0);
+ return -EBUSY;
+ }
+ }
+
+ return 0;
+}
+
+static void bts_event_destroy(struct perf_event *event)
+{
+ x86_release_hardware();
+ x86_del_exclusive(x86_lbr_exclusive_bts);
+}
+
+static int bts_event_init(struct perf_event *event)
+{
+ int ret;
+
+ if (event->attr.type != bts_pmu.type)
+ return -ENOENT;
+
+ if (x86_add_exclusive(x86_lbr_exclusive_bts))
+ return -EBUSY;
+
+ /*
+ * BTS leaks kernel addresses even when CPL0 tracing is
+ * disabled, so disallow intel_bts driver for unprivileged
+ * users on paranoid systems since it provides trace data
+ * to the user in a zero-copy fashion.
+ *
+ * Note that the default paranoia setting permits unprivileged
+ * users to profile the kernel.
+ */
+ if (event->attr.exclude_kernel && perf_paranoid_kernel() &&
+ !capable(CAP_SYS_ADMIN))
+ return -EACCES;
+
+ ret = x86_reserve_hardware();
+ if (ret) {
+ x86_del_exclusive(x86_lbr_exclusive_bts);
+ return ret;
+ }
+
+ event->destroy = bts_event_destroy;
+
+ return 0;
+}
+
+static void bts_event_read(struct perf_event *event)
+{
+}
+
+static __init int bts_init(void)
+{
+ if (!boot_cpu_has(X86_FEATURE_DTES64) || !x86_pmu.bts)
+ return -ENODEV;
+
+ bts_pmu.capabilities = PERF_PMU_CAP_AUX_NO_SG | PERF_PMU_CAP_ITRACE;
+ bts_pmu.task_ctx_nr = perf_sw_context;
+ bts_pmu.event_init = bts_event_init;
+ bts_pmu.add = bts_event_add;
+ bts_pmu.del = bts_event_del;
+ bts_pmu.start = bts_event_start;
+ bts_pmu.stop = bts_event_stop;
+ bts_pmu.read = bts_event_read;
+ bts_pmu.setup_aux = bts_buffer_setup_aux;
+ bts_pmu.free_aux = bts_buffer_free_aux;
+
+ return perf_pmu_register(&bts_pmu, "intel_bts", -1);
+}
+arch_initcall(bts_init);
diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c
new file mode 100644
index 000000000..5210eaa4a
--- /dev/null
+++ b/arch/x86/events/intel/core.c
@@ -0,0 +1,3799 @@
+/*
+ * Per core/cpu state
+ *
+ * Used to coordinate shared registers between HT threads or
+ * among events on a single PMU.
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/stddef.h>
+#include <linux/types.h>
+#include <linux/init.h>
+#include <linux/slab.h>
+#include <linux/export.h>
+#include <linux/nmi.h>
+
+#include <asm/cpufeature.h>
+#include <asm/hardirq.h>
+#include <asm/apic.h>
+
+#include "../perf_event.h"
+
+/*
+ * Intel PerfMon, used on Core and later.
+ */
+static u64 intel_perfmon_event_map[PERF_COUNT_HW_MAX] __read_mostly =
+{
+ [PERF_COUNT_HW_CPU_CYCLES] = 0x003c,
+ [PERF_COUNT_HW_INSTRUCTIONS] = 0x00c0,
+ [PERF_COUNT_HW_CACHE_REFERENCES] = 0x4f2e,
+ [PERF_COUNT_HW_CACHE_MISSES] = 0x412e,
+ [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x00c4,
+ [PERF_COUNT_HW_BRANCH_MISSES] = 0x00c5,
+ [PERF_COUNT_HW_BUS_CYCLES] = 0x013c,
+ [PERF_COUNT_HW_REF_CPU_CYCLES] = 0x0300, /* pseudo-encoding */
+};
+
+static struct event_constraint intel_core_event_constraints[] __read_mostly =
+{
+ INTEL_EVENT_CONSTRAINT(0x11, 0x2), /* FP_ASSIST */
+ INTEL_EVENT_CONSTRAINT(0x12, 0x2), /* MUL */
+ INTEL_EVENT_CONSTRAINT(0x13, 0x2), /* DIV */
+ INTEL_EVENT_CONSTRAINT(0x14, 0x1), /* CYCLES_DIV_BUSY */
+ INTEL_EVENT_CONSTRAINT(0x19, 0x2), /* DELAYED_BYPASS */
+ INTEL_EVENT_CONSTRAINT(0xc1, 0x1), /* FP_COMP_INSTR_RET */
+ EVENT_CONSTRAINT_END
+};
+
+static struct event_constraint intel_core2_event_constraints[] __read_mostly =
+{
+ FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */
+ FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */
+ FIXED_EVENT_CONSTRAINT(0x0300, 2), /* CPU_CLK_UNHALTED.REF */
+ INTEL_EVENT_CONSTRAINT(0x10, 0x1), /* FP_COMP_OPS_EXE */
+ INTEL_EVENT_CONSTRAINT(0x11, 0x2), /* FP_ASSIST */
+ INTEL_EVENT_CONSTRAINT(0x12, 0x2), /* MUL */
+ INTEL_EVENT_CONSTRAINT(0x13, 0x2), /* DIV */
+ INTEL_EVENT_CONSTRAINT(0x14, 0x1), /* CYCLES_DIV_BUSY */
+ INTEL_EVENT_CONSTRAINT(0x18, 0x1), /* IDLE_DURING_DIV */
+ INTEL_EVENT_CONSTRAINT(0x19, 0x2), /* DELAYED_BYPASS */
+ INTEL_EVENT_CONSTRAINT(0xa1, 0x1), /* RS_UOPS_DISPATCH_CYCLES */
+ INTEL_EVENT_CONSTRAINT(0xc9, 0x1), /* ITLB_MISS_RETIRED (T30-9) */
+ INTEL_EVENT_CONSTRAINT(0xcb, 0x1), /* MEM_LOAD_RETIRED */
+ EVENT_CONSTRAINT_END
+};
+
+static struct event_constraint intel_nehalem_event_constraints[] __read_mostly =
+{
+ FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */
+ FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */
+ FIXED_EVENT_CONSTRAINT(0x0300, 2), /* CPU_CLK_UNHALTED.REF */
+ INTEL_EVENT_CONSTRAINT(0x40, 0x3), /* L1D_CACHE_LD */
+ INTEL_EVENT_CONSTRAINT(0x41, 0x3), /* L1D_CACHE_ST */
+ INTEL_EVENT_CONSTRAINT(0x42, 0x3), /* L1D_CACHE_LOCK */
+ INTEL_EVENT_CONSTRAINT(0x43, 0x3), /* L1D_ALL_REF */
+ INTEL_EVENT_CONSTRAINT(0x48, 0x3), /* L1D_PEND_MISS */
+ INTEL_EVENT_CONSTRAINT(0x4e, 0x3), /* L1D_PREFETCH */
+ INTEL_EVENT_CONSTRAINT(0x51, 0x3), /* L1D */
+ INTEL_EVENT_CONSTRAINT(0x63, 0x3), /* CACHE_LOCK_CYCLES */
+ EVENT_CONSTRAINT_END
+};
+
+static struct extra_reg intel_nehalem_extra_regs[] __read_mostly =
+{
+ /* must define OFFCORE_RSP_X first, see intel_fixup_er() */
+ INTEL_UEVENT_EXTRA_REG(0x01b7, MSR_OFFCORE_RSP_0, 0xffff, RSP_0),
+ INTEL_UEVENT_PEBS_LDLAT_EXTRA_REG(0x100b),
+ EVENT_EXTRA_END
+};
+
+static struct event_constraint intel_westmere_event_constraints[] __read_mostly =
+{
+ FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */
+ FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */
+ FIXED_EVENT_CONSTRAINT(0x0300, 2), /* CPU_CLK_UNHALTED.REF */
+ INTEL_EVENT_CONSTRAINT(0x51, 0x3), /* L1D */
+ INTEL_EVENT_CONSTRAINT(0x60, 0x1), /* OFFCORE_REQUESTS_OUTSTANDING */
+ INTEL_EVENT_CONSTRAINT(0x63, 0x3), /* CACHE_LOCK_CYCLES */
+ INTEL_EVENT_CONSTRAINT(0xb3, 0x1), /* SNOOPQ_REQUEST_OUTSTANDING */
+ EVENT_CONSTRAINT_END
+};
+
+static struct event_constraint intel_snb_event_constraints[] __read_mostly =
+{
+ FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */
+ FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */
+ FIXED_EVENT_CONSTRAINT(0x0300, 2), /* CPU_CLK_UNHALTED.REF */
+ INTEL_UEVENT_CONSTRAINT(0x04a3, 0xf), /* CYCLE_ACTIVITY.CYCLES_NO_DISPATCH */
+ INTEL_UEVENT_CONSTRAINT(0x05a3, 0xf), /* CYCLE_ACTIVITY.STALLS_L2_PENDING */
+ INTEL_UEVENT_CONSTRAINT(0x02a3, 0x4), /* CYCLE_ACTIVITY.CYCLES_L1D_PENDING */
+ INTEL_UEVENT_CONSTRAINT(0x06a3, 0x4), /* CYCLE_ACTIVITY.STALLS_L1D_PENDING */
+ INTEL_EVENT_CONSTRAINT(0x48, 0x4), /* L1D_PEND_MISS.PENDING */
+ INTEL_UEVENT_CONSTRAINT(0x01c0, 0x2), /* INST_RETIRED.PREC_DIST */
+ INTEL_EVENT_CONSTRAINT(0xcd, 0x8), /* MEM_TRANS_RETIRED.LOAD_LATENCY */
+ INTEL_UEVENT_CONSTRAINT(0x04a3, 0xf), /* CYCLE_ACTIVITY.CYCLES_NO_DISPATCH */
+ INTEL_UEVENT_CONSTRAINT(0x02a3, 0x4), /* CYCLE_ACTIVITY.CYCLES_L1D_PENDING */
+
+ INTEL_EXCLEVT_CONSTRAINT(0xd0, 0xf), /* MEM_UOPS_RETIRED.* */
+ INTEL_EXCLEVT_CONSTRAINT(0xd1, 0xf), /* MEM_LOAD_UOPS_RETIRED.* */
+ INTEL_EXCLEVT_CONSTRAINT(0xd2, 0xf), /* MEM_LOAD_UOPS_LLC_HIT_RETIRED.* */
+ INTEL_EXCLEVT_CONSTRAINT(0xd3, 0xf), /* MEM_LOAD_UOPS_LLC_MISS_RETIRED.* */
+
+ EVENT_CONSTRAINT_END
+};
+
+static struct event_constraint intel_ivb_event_constraints[] __read_mostly =
+{
+ FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */
+ FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */
+ FIXED_EVENT_CONSTRAINT(0x0300, 2), /* CPU_CLK_UNHALTED.REF */
+ INTEL_UEVENT_CONSTRAINT(0x0148, 0x4), /* L1D_PEND_MISS.PENDING */
+ INTEL_UEVENT_CONSTRAINT(0x0279, 0xf), /* IDQ.EMTPY */
+ INTEL_UEVENT_CONSTRAINT(0x019c, 0xf), /* IDQ_UOPS_NOT_DELIVERED.CORE */
+ INTEL_UEVENT_CONSTRAINT(0x02a3, 0xf), /* CYCLE_ACTIVITY.CYCLES_LDM_PENDING */
+ INTEL_UEVENT_CONSTRAINT(0x04a3, 0xf), /* CYCLE_ACTIVITY.CYCLES_NO_EXECUTE */
+ INTEL_UEVENT_CONSTRAINT(0x05a3, 0xf), /* CYCLE_ACTIVITY.STALLS_L2_PENDING */
+ INTEL_UEVENT_CONSTRAINT(0x06a3, 0xf), /* CYCLE_ACTIVITY.STALLS_LDM_PENDING */
+ INTEL_UEVENT_CONSTRAINT(0x08a3, 0x4), /* CYCLE_ACTIVITY.CYCLES_L1D_PENDING */
+ INTEL_UEVENT_CONSTRAINT(0x0ca3, 0x4), /* CYCLE_ACTIVITY.STALLS_L1D_PENDING */
+ INTEL_UEVENT_CONSTRAINT(0x01c0, 0x2), /* INST_RETIRED.PREC_DIST */
+
+ INTEL_EXCLEVT_CONSTRAINT(0xd0, 0xf), /* MEM_UOPS_RETIRED.* */
+ INTEL_EXCLEVT_CONSTRAINT(0xd1, 0xf), /* MEM_LOAD_UOPS_RETIRED.* */
+ INTEL_EXCLEVT_CONSTRAINT(0xd2, 0xf), /* MEM_LOAD_UOPS_LLC_HIT_RETIRED.* */
+ INTEL_EXCLEVT_CONSTRAINT(0xd3, 0xf), /* MEM_LOAD_UOPS_LLC_MISS_RETIRED.* */
+
+ EVENT_CONSTRAINT_END
+};
+
+static struct extra_reg intel_westmere_extra_regs[] __read_mostly =
+{
+ /* must define OFFCORE_RSP_X first, see intel_fixup_er() */
+ INTEL_UEVENT_EXTRA_REG(0x01b7, MSR_OFFCORE_RSP_0, 0xffff, RSP_0),
+ INTEL_UEVENT_EXTRA_REG(0x01bb, MSR_OFFCORE_RSP_1, 0xffff, RSP_1),
+ INTEL_UEVENT_PEBS_LDLAT_EXTRA_REG(0x100b),
+ EVENT_EXTRA_END
+};
+
+static struct event_constraint intel_v1_event_constraints[] __read_mostly =
+{
+ EVENT_CONSTRAINT_END
+};
+
+static struct event_constraint intel_gen_event_constraints[] __read_mostly =
+{
+ FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */
+ FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */
+ FIXED_EVENT_CONSTRAINT(0x0300, 2), /* CPU_CLK_UNHALTED.REF */
+ EVENT_CONSTRAINT_END
+};
+
+static struct event_constraint intel_slm_event_constraints[] __read_mostly =
+{
+ FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */
+ FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */
+ FIXED_EVENT_CONSTRAINT(0x0300, 2), /* pseudo CPU_CLK_UNHALTED.REF */
+ EVENT_CONSTRAINT_END
+};
+
+struct event_constraint intel_skl_event_constraints[] = {
+ FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */
+ FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */
+ FIXED_EVENT_CONSTRAINT(0x0300, 2), /* CPU_CLK_UNHALTED.REF */
+ INTEL_UEVENT_CONSTRAINT(0x1c0, 0x2), /* INST_RETIRED.PREC_DIST */
+ EVENT_CONSTRAINT_END
+};
+
+static struct extra_reg intel_knl_extra_regs[] __read_mostly = {
+ INTEL_UEVENT_EXTRA_REG(0x01b7,
+ MSR_OFFCORE_RSP_0, 0x7f9ffbffffull, RSP_0),
+ INTEL_UEVENT_EXTRA_REG(0x02b7,
+ MSR_OFFCORE_RSP_1, 0x3f9ffbffffull, RSP_1),
+ EVENT_EXTRA_END
+};
+
+static struct extra_reg intel_snb_extra_regs[] __read_mostly = {
+ /* must define OFFCORE_RSP_X first, see intel_fixup_er() */
+ INTEL_UEVENT_EXTRA_REG(0x01b7, MSR_OFFCORE_RSP_0, 0x3f807f8fffull, RSP_0),
+ INTEL_UEVENT_EXTRA_REG(0x01bb, MSR_OFFCORE_RSP_1, 0x3f807f8fffull, RSP_1),
+ INTEL_UEVENT_PEBS_LDLAT_EXTRA_REG(0x01cd),
+ EVENT_EXTRA_END
+};
+
+static struct extra_reg intel_snbep_extra_regs[] __read_mostly = {
+ /* must define OFFCORE_RSP_X first, see intel_fixup_er() */
+ INTEL_UEVENT_EXTRA_REG(0x01b7, MSR_OFFCORE_RSP_0, 0x3fffff8fffull, RSP_0),
+ INTEL_UEVENT_EXTRA_REG(0x01bb, MSR_OFFCORE_RSP_1, 0x3fffff8fffull, RSP_1),
+ INTEL_UEVENT_PEBS_LDLAT_EXTRA_REG(0x01cd),
+ EVENT_EXTRA_END
+};
+
+static struct extra_reg intel_skl_extra_regs[] __read_mostly = {
+ INTEL_UEVENT_EXTRA_REG(0x01b7, MSR_OFFCORE_RSP_0, 0x3fffff8fffull, RSP_0),
+ INTEL_UEVENT_EXTRA_REG(0x01bb, MSR_OFFCORE_RSP_1, 0x3fffff8fffull, RSP_1),
+ INTEL_UEVENT_PEBS_LDLAT_EXTRA_REG(0x01cd),
+ /*
+ * Note the low 8 bits eventsel code is not a continuous field, containing
+ * some #GPing bits. These are masked out.
+ */
+ INTEL_UEVENT_EXTRA_REG(0x01c6, MSR_PEBS_FRONTEND, 0x7fff17, FE),
+ EVENT_EXTRA_END
+};
+
+EVENT_ATTR_STR(mem-loads, mem_ld_nhm, "event=0x0b,umask=0x10,ldlat=3");
+EVENT_ATTR_STR(mem-loads, mem_ld_snb, "event=0xcd,umask=0x1,ldlat=3");
+EVENT_ATTR_STR(mem-stores, mem_st_snb, "event=0xcd,umask=0x2");
+
+struct attribute *nhm_events_attrs[] = {
+ EVENT_PTR(mem_ld_nhm),
+ NULL,
+};
+
+struct attribute *snb_events_attrs[] = {
+ EVENT_PTR(mem_ld_snb),
+ EVENT_PTR(mem_st_snb),
+ NULL,
+};
+
+static struct event_constraint intel_hsw_event_constraints[] = {
+ FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */
+ FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */
+ FIXED_EVENT_CONSTRAINT(0x0300, 2), /* CPU_CLK_UNHALTED.REF */
+ INTEL_UEVENT_CONSTRAINT(0x148, 0x4), /* L1D_PEND_MISS.PENDING */
+ INTEL_UEVENT_CONSTRAINT(0x01c0, 0x2), /* INST_RETIRED.PREC_DIST */
+ INTEL_EVENT_CONSTRAINT(0xcd, 0x8), /* MEM_TRANS_RETIRED.LOAD_LATENCY */
+ /* CYCLE_ACTIVITY.CYCLES_L1D_PENDING */
+ INTEL_UEVENT_CONSTRAINT(0x08a3, 0x4),
+ /* CYCLE_ACTIVITY.STALLS_L1D_PENDING */
+ INTEL_UEVENT_CONSTRAINT(0x0ca3, 0x4),
+ /* CYCLE_ACTIVITY.CYCLES_NO_EXECUTE */
+ INTEL_UEVENT_CONSTRAINT(0x04a3, 0xf),
+
+ INTEL_EXCLEVT_CONSTRAINT(0xd0, 0xf), /* MEM_UOPS_RETIRED.* */
+ INTEL_EXCLEVT_CONSTRAINT(0xd1, 0xf), /* MEM_LOAD_UOPS_RETIRED.* */
+ INTEL_EXCLEVT_CONSTRAINT(0xd2, 0xf), /* MEM_LOAD_UOPS_LLC_HIT_RETIRED.* */
+ INTEL_EXCLEVT_CONSTRAINT(0xd3, 0xf), /* MEM_LOAD_UOPS_LLC_MISS_RETIRED.* */
+
+ EVENT_CONSTRAINT_END
+};
+
+struct event_constraint intel_bdw_event_constraints[] = {
+ FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */
+ FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */
+ FIXED_EVENT_CONSTRAINT(0x0300, 2), /* CPU_CLK_UNHALTED.REF */
+ INTEL_UEVENT_CONSTRAINT(0x148, 0x4), /* L1D_PEND_MISS.PENDING */
+ INTEL_UBIT_EVENT_CONSTRAINT(0x8a3, 0x4), /* CYCLE_ACTIVITY.CYCLES_L1D_MISS */
+ EVENT_CONSTRAINT_END
+};
+
+static u64 intel_pmu_event_map(int hw_event)
+{
+ return intel_perfmon_event_map[hw_event];
+}
+
+/*
+ * Notes on the events:
+ * - data reads do not include code reads (comparable to earlier tables)
+ * - data counts include speculative execution (except L1 write, dtlb, bpu)
+ * - remote node access includes remote memory, remote cache, remote mmio.
+ * - prefetches are not included in the counts.
+ * - icache miss does not include decoded icache
+ */
+
+#define SKL_DEMAND_DATA_RD BIT_ULL(0)
+#define SKL_DEMAND_RFO BIT_ULL(1)
+#define SKL_ANY_RESPONSE BIT_ULL(16)
+#define SKL_SUPPLIER_NONE BIT_ULL(17)
+#define SKL_L3_MISS_LOCAL_DRAM BIT_ULL(26)
+#define SKL_L3_MISS_REMOTE_HOP0_DRAM BIT_ULL(27)
+#define SKL_L3_MISS_REMOTE_HOP1_DRAM BIT_ULL(28)
+#define SKL_L3_MISS_REMOTE_HOP2P_DRAM BIT_ULL(29)
+#define SKL_L3_MISS (SKL_L3_MISS_LOCAL_DRAM| \
+ SKL_L3_MISS_REMOTE_HOP0_DRAM| \
+ SKL_L3_MISS_REMOTE_HOP1_DRAM| \
+ SKL_L3_MISS_REMOTE_HOP2P_DRAM)
+#define SKL_SPL_HIT BIT_ULL(30)
+#define SKL_SNOOP_NONE BIT_ULL(31)
+#define SKL_SNOOP_NOT_NEEDED BIT_ULL(32)
+#define SKL_SNOOP_MISS BIT_ULL(33)
+#define SKL_SNOOP_HIT_NO_FWD BIT_ULL(34)
+#define SKL_SNOOP_HIT_WITH_FWD BIT_ULL(35)
+#define SKL_SNOOP_HITM BIT_ULL(36)
+#define SKL_SNOOP_NON_DRAM BIT_ULL(37)
+#define SKL_ANY_SNOOP (SKL_SPL_HIT|SKL_SNOOP_NONE| \
+ SKL_SNOOP_NOT_NEEDED|SKL_SNOOP_MISS| \
+ SKL_SNOOP_HIT_NO_FWD|SKL_SNOOP_HIT_WITH_FWD| \
+ SKL_SNOOP_HITM|SKL_SNOOP_NON_DRAM)
+#define SKL_DEMAND_READ SKL_DEMAND_DATA_RD
+#define SKL_SNOOP_DRAM (SKL_SNOOP_NONE| \
+ SKL_SNOOP_NOT_NEEDED|SKL_SNOOP_MISS| \
+ SKL_SNOOP_HIT_NO_FWD|SKL_SNOOP_HIT_WITH_FWD| \
+ SKL_SNOOP_HITM|SKL_SPL_HIT)
+#define SKL_DEMAND_WRITE SKL_DEMAND_RFO
+#define SKL_LLC_ACCESS SKL_ANY_RESPONSE
+#define SKL_L3_MISS_REMOTE (SKL_L3_MISS_REMOTE_HOP0_DRAM| \
+ SKL_L3_MISS_REMOTE_HOP1_DRAM| \
+ SKL_L3_MISS_REMOTE_HOP2P_DRAM)
+
+static __initconst const u64 skl_hw_cache_event_ids
+ [PERF_COUNT_HW_CACHE_MAX]
+ [PERF_COUNT_HW_CACHE_OP_MAX]
+ [PERF_COUNT_HW_CACHE_RESULT_MAX] =
+{
+ [ C(L1D ) ] = {
+ [ C(OP_READ) ] = {
+ [ C(RESULT_ACCESS) ] = 0x81d0, /* MEM_INST_RETIRED.ALL_LOADS */
+ [ C(RESULT_MISS) ] = 0x151, /* L1D.REPLACEMENT */
+ },
+ [ C(OP_WRITE) ] = {
+ [ C(RESULT_ACCESS) ] = 0x82d0, /* MEM_INST_RETIRED.ALL_STORES */
+ [ C(RESULT_MISS) ] = 0x0,
+ },
+ [ C(OP_PREFETCH) ] = {
+ [ C(RESULT_ACCESS) ] = 0x0,
+ [ C(RESULT_MISS) ] = 0x0,
+ },
+ },
+ [ C(L1I ) ] = {
+ [ C(OP_READ) ] = {
+ [ C(RESULT_ACCESS) ] = 0x0,
+ [ C(RESULT_MISS) ] = 0x283, /* ICACHE_64B.MISS */
+ },
+ [ C(OP_WRITE) ] = {
+ [ C(RESULT_ACCESS) ] = -1,
+ [ C(RESULT_MISS) ] = -1,
+ },
+ [ C(OP_PREFETCH) ] = {
+ [ C(RESULT_ACCESS) ] = 0x0,
+ [ C(RESULT_MISS) ] = 0x0,
+ },
+ },
+ [ C(LL ) ] = {
+ [ C(OP_READ) ] = {
+ [ C(RESULT_ACCESS) ] = 0x1b7, /* OFFCORE_RESPONSE */
+ [ C(RESULT_MISS) ] = 0x1b7, /* OFFCORE_RESPONSE */
+ },
+ [ C(OP_WRITE) ] = {
+ [ C(RESULT_ACCESS) ] = 0x1b7, /* OFFCORE_RESPONSE */
+ [ C(RESULT_MISS) ] = 0x1b7, /* OFFCORE_RESPONSE */
+ },
+ [ C(OP_PREFETCH) ] = {
+ [ C(RESULT_ACCESS) ] = 0x0,
+ [ C(RESULT_MISS) ] = 0x0,
+ },
+ },
+ [ C(DTLB) ] = {
+ [ C(OP_READ) ] = {
+ [ C(RESULT_ACCESS) ] = 0x81d0, /* MEM_INST_RETIRED.ALL_LOADS */
+ [ C(RESULT_MISS) ] = 0x608, /* DTLB_LOAD_MISSES.WALK_COMPLETED */
+ },
+ [ C(OP_WRITE) ] = {
+ [ C(RESULT_ACCESS) ] = 0x82d0, /* MEM_INST_RETIRED.ALL_STORES */
+ [ C(RESULT_MISS) ] = 0x649, /* DTLB_STORE_MISSES.WALK_COMPLETED */
+ },
+ [ C(OP_PREFETCH) ] = {
+ [ C(RESULT_ACCESS) ] = 0x0,
+ [ C(RESULT_MISS) ] = 0x0,
+ },
+ },
+ [ C(ITLB) ] = {
+ [ C(OP_READ) ] = {
+ [ C(RESULT_ACCESS) ] = 0x2085, /* ITLB_MISSES.STLB_HIT */
+ [ C(RESULT_MISS) ] = 0xe85, /* ITLB_MISSES.WALK_COMPLETED */
+ },
+ [ C(OP_WRITE) ] = {
+ [ C(RESULT_ACCESS) ] = -1,
+ [ C(RESULT_MISS) ] = -1,
+ },
+ [ C(OP_PREFETCH) ] = {
+ [ C(RESULT_ACCESS) ] = -1,
+ [ C(RESULT_MISS) ] = -1,
+ },
+ },
+ [ C(BPU ) ] = {
+ [ C(OP_READ) ] = {
+ [ C(RESULT_ACCESS) ] = 0xc4, /* BR_INST_RETIRED.ALL_BRANCHES */
+ [ C(RESULT_MISS) ] = 0xc5, /* BR_MISP_RETIRED.ALL_BRANCHES */
+ },
+ [ C(OP_WRITE) ] = {
+ [ C(RESULT_ACCESS) ] = -1,
+ [ C(RESULT_MISS) ] = -1,
+ },
+ [ C(OP_PREFETCH) ] = {
+ [ C(RESULT_ACCESS) ] = -1,
+ [ C(RESULT_MISS) ] = -1,
+ },
+ },
+ [ C(NODE) ] = {
+ [ C(OP_READ) ] = {
+ [ C(RESULT_ACCESS) ] = 0x1b7, /* OFFCORE_RESPONSE */
+ [ C(RESULT_MISS) ] = 0x1b7, /* OFFCORE_RESPONSE */
+ },
+ [ C(OP_WRITE) ] = {
+ [ C(RESULT_ACCESS) ] = 0x1b7, /* OFFCORE_RESPONSE */
+ [ C(RESULT_MISS) ] = 0x1b7, /* OFFCORE_RESPONSE */
+ },
+ [ C(OP_PREFETCH) ] = {
+ [ C(RESULT_ACCESS) ] = 0x0,
+ [ C(RESULT_MISS) ] = 0x0,
+ },
+ },
+};
+
+static __initconst const u64 skl_hw_cache_extra_regs
+ [PERF_COUNT_HW_CACHE_MAX]
+ [PERF_COUNT_HW_CACHE_OP_MAX]
+ [PERF_COUNT_HW_CACHE_RESULT_MAX] =
+{
+ [ C(LL ) ] = {
+ [ C(OP_READ) ] = {
+ [ C(RESULT_ACCESS) ] = SKL_DEMAND_READ|
+ SKL_LLC_ACCESS|SKL_ANY_SNOOP,
+ [ C(RESULT_MISS) ] = SKL_DEMAND_READ|
+ SKL_L3_MISS|SKL_ANY_SNOOP|
+ SKL_SUPPLIER_NONE,
+ },
+ [ C(OP_WRITE) ] = {
+ [ C(RESULT_ACCESS) ] = SKL_DEMAND_WRITE|
+ SKL_LLC_ACCESS|SKL_ANY_SNOOP,
+ [ C(RESULT_MISS) ] = SKL_DEMAND_WRITE|
+ SKL_L3_MISS|SKL_ANY_SNOOP|
+ SKL_SUPPLIER_NONE,
+ },
+ [ C(OP_PREFETCH) ] = {
+ [ C(RESULT_ACCESS) ] = 0x0,
+ [ C(RESULT_MISS) ] = 0x0,
+ },
+ },
+ [ C(NODE) ] = {
+ [ C(OP_READ) ] = {
+ [ C(RESULT_ACCESS) ] = SKL_DEMAND_READ|
+ SKL_L3_MISS_LOCAL_DRAM|SKL_SNOOP_DRAM,
+ [ C(RESULT_MISS) ] = SKL_DEMAND_READ|
+ SKL_L3_MISS_REMOTE|SKL_SNOOP_DRAM,
+ },
+ [ C(OP_WRITE) ] = {
+ [ C(RESULT_ACCESS) ] = SKL_DEMAND_WRITE|
+ SKL_L3_MISS_LOCAL_DRAM|SKL_SNOOP_DRAM,
+ [ C(RESULT_MISS) ] = SKL_DEMAND_WRITE|
+ SKL_L3_MISS_REMOTE|SKL_SNOOP_DRAM,
+ },
+ [ C(OP_PREFETCH) ] = {
+ [ C(RESULT_ACCESS) ] = 0x0,
+ [ C(RESULT_MISS) ] = 0x0,
+ },
+ },
+};
+
+#define SNB_DMND_DATA_RD (1ULL << 0)
+#define SNB_DMND_RFO (1ULL << 1)
+#define SNB_DMND_IFETCH (1ULL << 2)
+#define SNB_DMND_WB (1ULL << 3)
+#define SNB_PF_DATA_RD (1ULL << 4)
+#define SNB_PF_RFO (1ULL << 5)
+#define SNB_PF_IFETCH (1ULL << 6)
+#define SNB_LLC_DATA_RD (1ULL << 7)
+#define SNB_LLC_RFO (1ULL << 8)
+#define SNB_LLC_IFETCH (1ULL << 9)
+#define SNB_BUS_LOCKS (1ULL << 10)
+#define SNB_STRM_ST (1ULL << 11)
+#define SNB_OTHER (1ULL << 15)
+#define SNB_RESP_ANY (1ULL << 16)
+#define SNB_NO_SUPP (1ULL << 17)
+#define SNB_LLC_HITM (1ULL << 18)
+#define SNB_LLC_HITE (1ULL << 19)
+#define SNB_LLC_HITS (1ULL << 20)
+#define SNB_LLC_HITF (1ULL << 21)
+#define SNB_LOCAL (1ULL << 22)
+#define SNB_REMOTE (0xffULL << 23)
+#define SNB_SNP_NONE (1ULL << 31)
+#define SNB_SNP_NOT_NEEDED (1ULL << 32)
+#define SNB_SNP_MISS (1ULL << 33)
+#define SNB_NO_FWD (1ULL << 34)
+#define SNB_SNP_FWD (1ULL << 35)
+#define SNB_HITM (1ULL << 36)
+#define SNB_NON_DRAM (1ULL << 37)
+
+#define SNB_DMND_READ (SNB_DMND_DATA_RD|SNB_LLC_DATA_RD)
+#define SNB_DMND_WRITE (SNB_DMND_RFO|SNB_LLC_RFO)
+#define SNB_DMND_PREFETCH (SNB_PF_DATA_RD|SNB_PF_RFO)
+
+#define SNB_SNP_ANY (SNB_SNP_NONE|SNB_SNP_NOT_NEEDED| \
+ SNB_SNP_MISS|SNB_NO_FWD|SNB_SNP_FWD| \
+ SNB_HITM)
+
+#define SNB_DRAM_ANY (SNB_LOCAL|SNB_REMOTE|SNB_SNP_ANY)
+#define SNB_DRAM_REMOTE (SNB_REMOTE|SNB_SNP_ANY)
+
+#define SNB_L3_ACCESS SNB_RESP_ANY
+#define SNB_L3_MISS (SNB_DRAM_ANY|SNB_NON_DRAM)
+
+static __initconst const u64 snb_hw_cache_extra_regs
+ [PERF_COUNT_HW_CACHE_MAX]
+ [PERF_COUNT_HW_CACHE_OP_MAX]
+ [PERF_COUNT_HW_CACHE_RESULT_MAX] =
+{
+ [ C(LL ) ] = {
+ [ C(OP_READ) ] = {
+ [ C(RESULT_ACCESS) ] = SNB_DMND_READ|SNB_L3_ACCESS,
+ [ C(RESULT_MISS) ] = SNB_DMND_READ|SNB_L3_MISS,
+ },
+ [ C(OP_WRITE) ] = {
+ [ C(RESULT_ACCESS) ] = SNB_DMND_WRITE|SNB_L3_ACCESS,
+ [ C(RESULT_MISS) ] = SNB_DMND_WRITE|SNB_L3_MISS,
+ },
+ [ C(OP_PREFETCH) ] = {
+ [ C(RESULT_ACCESS) ] = SNB_DMND_PREFETCH|SNB_L3_ACCESS,
+ [ C(RESULT_MISS) ] = SNB_DMND_PREFETCH|SNB_L3_MISS,
+ },
+ },
+ [ C(NODE) ] = {
+ [ C(OP_READ) ] = {
+ [ C(RESULT_ACCESS) ] = SNB_DMND_READ|SNB_DRAM_ANY,
+ [ C(RESULT_MISS) ] = SNB_DMND_READ|SNB_DRAM_REMOTE,
+ },
+ [ C(OP_WRITE) ] = {
+ [ C(RESULT_ACCESS) ] = SNB_DMND_WRITE|SNB_DRAM_ANY,
+ [ C(RESULT_MISS) ] = SNB_DMND_WRITE|SNB_DRAM_REMOTE,
+ },
+ [ C(OP_PREFETCH) ] = {
+ [ C(RESULT_ACCESS) ] = SNB_DMND_PREFETCH|SNB_DRAM_ANY,
+ [ C(RESULT_MISS) ] = SNB_DMND_PREFETCH|SNB_DRAM_REMOTE,
+ },
+ },
+};
+
+static __initconst const u64 snb_hw_cache_event_ids
+ [PERF_COUNT_HW_CACHE_MAX]
+ [PERF_COUNT_HW_CACHE_OP_MAX]
+ [PERF_COUNT_HW_CACHE_RESULT_MAX] =
+{
+ [ C(L1D) ] = {
+ [ C(OP_READ) ] = {
+ [ C(RESULT_ACCESS) ] = 0xf1d0, /* MEM_UOP_RETIRED.LOADS */
+ [ C(RESULT_MISS) ] = 0x0151, /* L1D.REPLACEMENT */
+ },
+ [ C(OP_WRITE) ] = {
+ [ C(RESULT_ACCESS) ] = 0xf2d0, /* MEM_UOP_RETIRED.STORES */
+ [ C(RESULT_MISS) ] = 0x0851, /* L1D.ALL_M_REPLACEMENT */
+ },
+ [ C(OP_PREFETCH) ] = {
+ [ C(RESULT_ACCESS) ] = 0x0,
+ [ C(RESULT_MISS) ] = 0x024e, /* HW_PRE_REQ.DL1_MISS */
+ },
+ },
+ [ C(L1I ) ] = {
+ [ C(OP_READ) ] = {
+ [ C(RESULT_ACCESS) ] = 0x0,
+ [ C(RESULT_MISS) ] = 0x0280, /* ICACHE.MISSES */
+ },
+ [ C(OP_WRITE) ] = {
+ [ C(RESULT_ACCESS) ] = -1,
+ [ C(RESULT_MISS) ] = -1,
+ },
+ [ C(OP_PREFETCH) ] = {
+ [ C(RESULT_ACCESS) ] = 0x0,
+ [ C(RESULT_MISS) ] = 0x0,
+ },
+ },
+ [ C(LL ) ] = {
+ [ C(OP_READ) ] = {
+ /* OFFCORE_RESPONSE.ANY_DATA.LOCAL_CACHE */
+ [ C(RESULT_ACCESS) ] = 0x01b7,
+ /* OFFCORE_RESPONSE.ANY_DATA.ANY_LLC_MISS */
+ [ C(RESULT_MISS) ] = 0x01b7,
+ },
+ [ C(OP_WRITE) ] = {
+ /* OFFCORE_RESPONSE.ANY_RFO.LOCAL_CACHE */
+ [ C(RESULT_ACCESS) ] = 0x01b7,
+ /* OFFCORE_RESPONSE.ANY_RFO.ANY_LLC_MISS */
+ [ C(RESULT_MISS) ] = 0x01b7,
+ },
+ [ C(OP_PREFETCH) ] = {
+ /* OFFCORE_RESPONSE.PREFETCH.LOCAL_CACHE */
+ [ C(RESULT_ACCESS) ] = 0x01b7,
+ /* OFFCORE_RESPONSE.PREFETCH.ANY_LLC_MISS */
+ [ C(RESULT_MISS) ] = 0x01b7,
+ },
+ },
+ [ C(DTLB) ] = {
+ [ C(OP_READ) ] = {
+ [ C(RESULT_ACCESS) ] = 0x81d0, /* MEM_UOP_RETIRED.ALL_LOADS */
+ [ C(RESULT_MISS) ] = 0x0108, /* DTLB_LOAD_MISSES.CAUSES_A_WALK */
+ },
+ [ C(OP_WRITE) ] = {
+ [ C(RESULT_ACCESS) ] = 0x82d0, /* MEM_UOP_RETIRED.ALL_STORES */
+ [ C(RESULT_MISS) ] = 0x0149, /* DTLB_STORE_MISSES.MISS_CAUSES_A_WALK */
+ },
+ [ C(OP_PREFETCH) ] = {
+ [ C(RESULT_ACCESS) ] = 0x0,
+ [ C(RESULT_MISS) ] = 0x0,
+ },
+ },
+ [ C(ITLB) ] = {
+ [ C(OP_READ) ] = {
+ [ C(RESULT_ACCESS) ] = 0x1085, /* ITLB_MISSES.STLB_HIT */
+ [ C(RESULT_MISS) ] = 0x0185, /* ITLB_MISSES.CAUSES_A_WALK */
+ },
+ [ C(OP_WRITE) ] = {
+ [ C(RESULT_ACCESS) ] = -1,
+ [ C(RESULT_MISS) ] = -1,
+ },
+ [ C(OP_PREFETCH) ] = {
+ [ C(RESULT_ACCESS) ] = -1,
+ [ C(RESULT_MISS) ] = -1,
+ },
+ },
+ [ C(BPU ) ] = {
+ [ C(OP_READ) ] = {
+ [ C(RESULT_ACCESS) ] = 0x00c4, /* BR_INST_RETIRED.ALL_BRANCHES */
+ [ C(RESULT_MISS) ] = 0x00c5, /* BR_MISP_RETIRED.ALL_BRANCHES */
+ },
+ [ C(OP_WRITE) ] = {
+ [ C(RESULT_ACCESS) ] = -1,
+ [ C(RESULT_MISS) ] = -1,
+ },
+ [ C(OP_PREFETCH) ] = {
+ [ C(RESULT_ACCESS) ] = -1,
+ [ C(RESULT_MISS) ] = -1,
+ },
+ },
+ [ C(NODE) ] = {
+ [ C(OP_READ) ] = {
+ [ C(RESULT_ACCESS) ] = 0x01b7,
+ [ C(RESULT_MISS) ] = 0x01b7,
+ },
+ [ C(OP_WRITE) ] = {
+ [ C(RESULT_ACCESS) ] = 0x01b7,
+ [ C(RESULT_MISS) ] = 0x01b7,
+ },
+ [ C(OP_PREFETCH) ] = {
+ [ C(RESULT_ACCESS) ] = 0x01b7,
+ [ C(RESULT_MISS) ] = 0x01b7,
+ },
+ },
+
+};
+
+/*
+ * Notes on the events:
+ * - data reads do not include code reads (comparable to earlier tables)
+ * - data counts include speculative execution (except L1 write, dtlb, bpu)
+ * - remote node access includes remote memory, remote cache, remote mmio.
+ * - prefetches are not included in the counts because they are not
+ * reliably counted.
+ */
+
+#define HSW_DEMAND_DATA_RD BIT_ULL(0)
+#define HSW_DEMAND_RFO BIT_ULL(1)
+#define HSW_ANY_RESPONSE BIT_ULL(16)
+#define HSW_SUPPLIER_NONE BIT_ULL(17)
+#define HSW_L3_MISS_LOCAL_DRAM BIT_ULL(22)
+#define HSW_L3_MISS_REMOTE_HOP0 BIT_ULL(27)
+#define HSW_L3_MISS_REMOTE_HOP1 BIT_ULL(28)
+#define HSW_L3_MISS_REMOTE_HOP2P BIT_ULL(29)
+#define HSW_L3_MISS (HSW_L3_MISS_LOCAL_DRAM| \
+ HSW_L3_MISS_REMOTE_HOP0|HSW_L3_MISS_REMOTE_HOP1| \
+ HSW_L3_MISS_REMOTE_HOP2P)
+#define HSW_SNOOP_NONE BIT_ULL(31)
+#define HSW_SNOOP_NOT_NEEDED BIT_ULL(32)
+#define HSW_SNOOP_MISS BIT_ULL(33)
+#define HSW_SNOOP_HIT_NO_FWD BIT_ULL(34)
+#define HSW_SNOOP_HIT_WITH_FWD BIT_ULL(35)
+#define HSW_SNOOP_HITM BIT_ULL(36)
+#define HSW_SNOOP_NON_DRAM BIT_ULL(37)
+#define HSW_ANY_SNOOP (HSW_SNOOP_NONE| \
+ HSW_SNOOP_NOT_NEEDED|HSW_SNOOP_MISS| \
+ HSW_SNOOP_HIT_NO_FWD|HSW_SNOOP_HIT_WITH_FWD| \
+ HSW_SNOOP_HITM|HSW_SNOOP_NON_DRAM)
+#define HSW_SNOOP_DRAM (HSW_ANY_SNOOP & ~HSW_SNOOP_NON_DRAM)
+#define HSW_DEMAND_READ HSW_DEMAND_DATA_RD
+#define HSW_DEMAND_WRITE HSW_DEMAND_RFO
+#define HSW_L3_MISS_REMOTE (HSW_L3_MISS_REMOTE_HOP0|\
+ HSW_L3_MISS_REMOTE_HOP1|HSW_L3_MISS_REMOTE_HOP2P)
+#define HSW_LLC_ACCESS HSW_ANY_RESPONSE
+
+#define BDW_L3_MISS_LOCAL BIT(26)
+#define BDW_L3_MISS (BDW_L3_MISS_LOCAL| \
+ HSW_L3_MISS_REMOTE_HOP0|HSW_L3_MISS_REMOTE_HOP1| \
+ HSW_L3_MISS_REMOTE_HOP2P)
+
+
+static __initconst const u64 hsw_hw_cache_event_ids
+ [PERF_COUNT_HW_CACHE_MAX]
+ [PERF_COUNT_HW_CACHE_OP_MAX]
+ [PERF_COUNT_HW_CACHE_RESULT_MAX] =
+{
+ [ C(L1D ) ] = {
+ [ C(OP_READ) ] = {
+ [ C(RESULT_ACCESS) ] = 0x81d0, /* MEM_UOPS_RETIRED.ALL_LOADS */
+ [ C(RESULT_MISS) ] = 0x151, /* L1D.REPLACEMENT */
+ },
+ [ C(OP_WRITE) ] = {
+ [ C(RESULT_ACCESS) ] = 0x82d0, /* MEM_UOPS_RETIRED.ALL_STORES */
+ [ C(RESULT_MISS) ] = 0x0,
+ },
+ [ C(OP_PREFETCH) ] = {
+ [ C(RESULT_ACCESS) ] = 0x0,
+ [ C(RESULT_MISS) ] = 0x0,
+ },
+ },
+ [ C(L1I ) ] = {
+ [ C(OP_READ) ] = {
+ [ C(RESULT_ACCESS) ] = 0x0,
+ [ C(RESULT_MISS) ] = 0x280, /* ICACHE.MISSES */
+ },
+ [ C(OP_WRITE) ] = {
+ [ C(RESULT_ACCESS) ] = -1,
+ [ C(RESULT_MISS) ] = -1,
+ },
+ [ C(OP_PREFETCH) ] = {
+ [ C(RESULT_ACCESS) ] = 0x0,
+ [ C(RESULT_MISS) ] = 0x0,
+ },
+ },
+ [ C(LL ) ] = {
+ [ C(OP_READ) ] = {
+ [ C(RESULT_ACCESS) ] = 0x1b7, /* OFFCORE_RESPONSE */
+ [ C(RESULT_MISS) ] = 0x1b7, /* OFFCORE_RESPONSE */
+ },
+ [ C(OP_WRITE) ] = {
+ [ C(RESULT_ACCESS) ] = 0x1b7, /* OFFCORE_RESPONSE */
+ [ C(RESULT_MISS) ] = 0x1b7, /* OFFCORE_RESPONSE */
+ },
+ [ C(OP_PREFETCH) ] = {
+ [ C(RESULT_ACCESS) ] = 0x0,
+ [ C(RESULT_MISS) ] = 0x0,
+ },
+ },
+ [ C(DTLB) ] = {
+ [ C(OP_READ) ] = {
+ [ C(RESULT_ACCESS) ] = 0x81d0, /* MEM_UOPS_RETIRED.ALL_LOADS */
+ [ C(RESULT_MISS) ] = 0x108, /* DTLB_LOAD_MISSES.MISS_CAUSES_A_WALK */
+ },
+ [ C(OP_WRITE) ] = {
+ [ C(RESULT_ACCESS) ] = 0x82d0, /* MEM_UOPS_RETIRED.ALL_STORES */
+ [ C(RESULT_MISS) ] = 0x149, /* DTLB_STORE_MISSES.MISS_CAUSES_A_WALK */
+ },
+ [ C(OP_PREFETCH) ] = {
+ [ C(RESULT_ACCESS) ] = 0x0,
+ [ C(RESULT_MISS) ] = 0x0,
+ },
+ },
+ [ C(ITLB) ] = {
+ [ C(OP_READ) ] = {
+ [ C(RESULT_ACCESS) ] = 0x6085, /* ITLB_MISSES.STLB_HIT */
+ [ C(RESULT_MISS) ] = 0x185, /* ITLB_MISSES.MISS_CAUSES_A_WALK */
+ },
+ [ C(OP_WRITE) ] = {
+ [ C(RESULT_ACCESS) ] = -1,
+ [ C(RESULT_MISS) ] = -1,
+ },
+ [ C(OP_PREFETCH) ] = {
+ [ C(RESULT_ACCESS) ] = -1,
+ [ C(RESULT_MISS) ] = -1,
+ },
+ },
+ [ C(BPU ) ] = {
+ [ C(OP_READ) ] = {
+ [ C(RESULT_ACCESS) ] = 0xc4, /* BR_INST_RETIRED.ALL_BRANCHES */
+ [ C(RESULT_MISS) ] = 0xc5, /* BR_MISP_RETIRED.ALL_BRANCHES */
+ },
+ [ C(OP_WRITE) ] = {
+ [ C(RESULT_ACCESS) ] = -1,
+ [ C(RESULT_MISS) ] = -1,
+ },
+ [ C(OP_PREFETCH) ] = {
+ [ C(RESULT_ACCESS) ] = -1,
+ [ C(RESULT_MISS) ] = -1,
+ },
+ },
+ [ C(NODE) ] = {
+ [ C(OP_READ) ] = {
+ [ C(RESULT_ACCESS) ] = 0x1b7, /* OFFCORE_RESPONSE */
+ [ C(RESULT_MISS) ] = 0x1b7, /* OFFCORE_RESPONSE */
+ },
+ [ C(OP_WRITE) ] = {
+ [ C(RESULT_ACCESS) ] = 0x1b7, /* OFFCORE_RESPONSE */
+ [ C(RESULT_MISS) ] = 0x1b7, /* OFFCORE_RESPONSE */
+ },
+ [ C(OP_PREFETCH) ] = {
+ [ C(RESULT_ACCESS) ] = 0x0,
+ [ C(RESULT_MISS) ] = 0x0,
+ },
+ },
+};
+
+static __initconst const u64 hsw_hw_cache_extra_regs
+ [PERF_COUNT_HW_CACHE_MAX]
+ [PERF_COUNT_HW_CACHE_OP_MAX]
+ [PERF_COUNT_HW_CACHE_RESULT_MAX] =
+{
+ [ C(LL ) ] = {
+ [ C(OP_READ) ] = {
+ [ C(RESULT_ACCESS) ] = HSW_DEMAND_READ|
+ HSW_LLC_ACCESS,
+ [ C(RESULT_MISS) ] = HSW_DEMAND_READ|
+ HSW_L3_MISS|HSW_ANY_SNOOP,
+ },
+ [ C(OP_WRITE) ] = {
+ [ C(RESULT_ACCESS) ] = HSW_DEMAND_WRITE|
+ HSW_LLC_ACCESS,
+ [ C(RESULT_MISS) ] = HSW_DEMAND_WRITE|
+ HSW_L3_MISS|HSW_ANY_SNOOP,
+ },
+ [ C(OP_PREFETCH) ] = {
+ [ C(RESULT_ACCESS) ] = 0x0,
+ [ C(RESULT_MISS) ] = 0x0,
+ },
+ },
+ [ C(NODE) ] = {
+ [ C(OP_READ) ] = {
+ [ C(RESULT_ACCESS) ] = HSW_DEMAND_READ|
+ HSW_L3_MISS_LOCAL_DRAM|
+ HSW_SNOOP_DRAM,
+ [ C(RESULT_MISS) ] = HSW_DEMAND_READ|
+ HSW_L3_MISS_REMOTE|
+ HSW_SNOOP_DRAM,
+ },
+ [ C(OP_WRITE) ] = {
+ [ C(RESULT_ACCESS) ] = HSW_DEMAND_WRITE|
+ HSW_L3_MISS_LOCAL_DRAM|
+ HSW_SNOOP_DRAM,
+ [ C(RESULT_MISS) ] = HSW_DEMAND_WRITE|
+ HSW_L3_MISS_REMOTE|
+ HSW_SNOOP_DRAM,
+ },
+ [ C(OP_PREFETCH) ] = {
+ [ C(RESULT_ACCESS) ] = 0x0,
+ [ C(RESULT_MISS) ] = 0x0,
+ },
+ },
+};
+
+static __initconst const u64 westmere_hw_cache_event_ids
+ [PERF_COUNT_HW_CACHE_MAX]
+ [PERF_COUNT_HW_CACHE_OP_MAX]
+ [PERF_COUNT_HW_CACHE_RESULT_MAX] =
+{
+ [ C(L1D) ] = {
+ [ C(OP_READ) ] = {
+ [ C(RESULT_ACCESS) ] = 0x010b, /* MEM_INST_RETIRED.LOADS */
+ [ C(RESULT_MISS) ] = 0x0151, /* L1D.REPL */
+ },
+ [ C(OP_WRITE) ] = {
+ [ C(RESULT_ACCESS) ] = 0x020b, /* MEM_INST_RETURED.STORES */
+ [ C(RESULT_MISS) ] = 0x0251, /* L1D.M_REPL */
+ },
+ [ C(OP_PREFETCH) ] = {
+ [ C(RESULT_ACCESS) ] = 0x014e, /* L1D_PREFETCH.REQUESTS */
+ [ C(RESULT_MISS) ] = 0x024e, /* L1D_PREFETCH.MISS */
+ },
+ },
+ [ C(L1I ) ] = {
+ [ C(OP_READ) ] = {
+ [ C(RESULT_ACCESS) ] = 0x0380, /* L1I.READS */
+ [ C(RESULT_MISS) ] = 0x0280, /* L1I.MISSES */
+ },
+ [ C(OP_WRITE) ] = {
+ [ C(RESULT_ACCESS) ] = -1,
+ [ C(RESULT_MISS) ] = -1,
+ },
+ [ C(OP_PREFETCH) ] = {
+ [ C(RESULT_ACCESS) ] = 0x0,
+ [ C(RESULT_MISS) ] = 0x0,
+ },
+ },
+ [ C(LL ) ] = {
+ [ C(OP_READ) ] = {
+ /* OFFCORE_RESPONSE.ANY_DATA.LOCAL_CACHE */
+ [ C(RESULT_ACCESS) ] = 0x01b7,
+ /* OFFCORE_RESPONSE.ANY_DATA.ANY_LLC_MISS */
+ [ C(RESULT_MISS) ] = 0x01b7,
+ },
+ /*
+ * Use RFO, not WRITEBACK, because a write miss would typically occur
+ * on RFO.
+ */
+ [ C(OP_WRITE) ] = {
+ /* OFFCORE_RESPONSE.ANY_RFO.LOCAL_CACHE */
+ [ C(RESULT_ACCESS) ] = 0x01b7,
+ /* OFFCORE_RESPONSE.ANY_RFO.ANY_LLC_MISS */
+ [ C(RESULT_MISS) ] = 0x01b7,
+ },
+ [ C(OP_PREFETCH) ] = {
+ /* OFFCORE_RESPONSE.PREFETCH.LOCAL_CACHE */
+ [ C(RESULT_ACCESS) ] = 0x01b7,
+ /* OFFCORE_RESPONSE.PREFETCH.ANY_LLC_MISS */
+ [ C(RESULT_MISS) ] = 0x01b7,
+ },
+ },
+ [ C(DTLB) ] = {
+ [ C(OP_READ) ] = {
+ [ C(RESULT_ACCESS) ] = 0x010b, /* MEM_INST_RETIRED.LOADS */
+ [ C(RESULT_MISS) ] = 0x0108, /* DTLB_LOAD_MISSES.ANY */
+ },
+ [ C(OP_WRITE) ] = {
+ [ C(RESULT_ACCESS) ] = 0x020b, /* MEM_INST_RETURED.STORES */
+ [ C(RESULT_MISS) ] = 0x010c, /* MEM_STORE_RETIRED.DTLB_MISS */
+ },
+ [ C(OP_PREFETCH) ] = {
+ [ C(RESULT_ACCESS) ] = 0x0,
+ [ C(RESULT_MISS) ] = 0x0,
+ },
+ },
+ [ C(ITLB) ] = {
+ [ C(OP_READ) ] = {
+ [ C(RESULT_ACCESS) ] = 0x01c0, /* INST_RETIRED.ANY_P */
+ [ C(RESULT_MISS) ] = 0x0185, /* ITLB_MISSES.ANY */
+ },
+ [ C(OP_WRITE) ] = {
+ [ C(RESULT_ACCESS) ] = -1,
+ [ C(RESULT_MISS) ] = -1,
+ },
+ [ C(OP_PREFETCH) ] = {
+ [ C(RESULT_ACCESS) ] = -1,
+ [ C(RESULT_MISS) ] = -1,
+ },
+ },
+ [ C(BPU ) ] = {
+ [ C(OP_READ) ] = {
+ [ C(RESULT_ACCESS) ] = 0x00c4, /* BR_INST_RETIRED.ALL_BRANCHES */
+ [ C(RESULT_MISS) ] = 0x03e8, /* BPU_CLEARS.ANY */
+ },
+ [ C(OP_WRITE) ] = {
+ [ C(RESULT_ACCESS) ] = -1,
+ [ C(RESULT_MISS) ] = -1,
+ },
+ [ C(OP_PREFETCH) ] = {
+ [ C(RESULT_ACCESS) ] = -1,
+ [ C(RESULT_MISS) ] = -1,
+ },
+ },
+ [ C(NODE) ] = {
+ [ C(OP_READ) ] = {
+ [ C(RESULT_ACCESS) ] = 0x01b7,
+ [ C(RESULT_MISS) ] = 0x01b7,
+ },
+ [ C(OP_WRITE) ] = {
+ [ C(RESULT_ACCESS) ] = 0x01b7,
+ [ C(RESULT_MISS) ] = 0x01b7,
+ },
+ [ C(OP_PREFETCH) ] = {
+ [ C(RESULT_ACCESS) ] = 0x01b7,
+ [ C(RESULT_MISS) ] = 0x01b7,
+ },
+ },
+};
+
+/*
+ * Nehalem/Westmere MSR_OFFCORE_RESPONSE bits;
+ * See IA32 SDM Vol 3B 30.6.1.3
+ */
+
+#define NHM_DMND_DATA_RD (1 << 0)
+#define NHM_DMND_RFO (1 << 1)
+#define NHM_DMND_IFETCH (1 << 2)
+#define NHM_DMND_WB (1 << 3)
+#define NHM_PF_DATA_RD (1 << 4)
+#define NHM_PF_DATA_RFO (1 << 5)
+#define NHM_PF_IFETCH (1 << 6)
+#define NHM_OFFCORE_OTHER (1 << 7)
+#define NHM_UNCORE_HIT (1 << 8)
+#define NHM_OTHER_CORE_HIT_SNP (1 << 9)
+#define NHM_OTHER_CORE_HITM (1 << 10)
+ /* reserved */
+#define NHM_REMOTE_CACHE_FWD (1 << 12)
+#define NHM_REMOTE_DRAM (1 << 13)
+#define NHM_LOCAL_DRAM (1 << 14)
+#define NHM_NON_DRAM (1 << 15)
+
+#define NHM_LOCAL (NHM_LOCAL_DRAM|NHM_REMOTE_CACHE_FWD)
+#define NHM_REMOTE (NHM_REMOTE_DRAM)
+
+#define NHM_DMND_READ (NHM_DMND_DATA_RD)
+#define NHM_DMND_WRITE (NHM_DMND_RFO|NHM_DMND_WB)
+#define NHM_DMND_PREFETCH (NHM_PF_DATA_RD|NHM_PF_DATA_RFO)
+
+#define NHM_L3_HIT (NHM_UNCORE_HIT|NHM_OTHER_CORE_HIT_SNP|NHM_OTHER_CORE_HITM)
+#define NHM_L3_MISS (NHM_NON_DRAM|NHM_LOCAL_DRAM|NHM_REMOTE_DRAM|NHM_REMOTE_CACHE_FWD)
+#define NHM_L3_ACCESS (NHM_L3_HIT|NHM_L3_MISS)
+
+static __initconst const u64 nehalem_hw_cache_extra_regs
+ [PERF_COUNT_HW_CACHE_MAX]
+ [PERF_COUNT_HW_CACHE_OP_MAX]
+ [PERF_COUNT_HW_CACHE_RESULT_MAX] =
+{
+ [ C(LL ) ] = {
+ [ C(OP_READ) ] = {
+ [ C(RESULT_ACCESS) ] = NHM_DMND_READ|NHM_L3_ACCESS,
+ [ C(RESULT_MISS) ] = NHM_DMND_READ|NHM_L3_MISS,
+ },
+ [ C(OP_WRITE) ] = {
+ [ C(RESULT_ACCESS) ] = NHM_DMND_WRITE|NHM_L3_ACCESS,
+ [ C(RESULT_MISS) ] = NHM_DMND_WRITE|NHM_L3_MISS,
+ },
+ [ C(OP_PREFETCH) ] = {
+ [ C(RESULT_ACCESS) ] = NHM_DMND_PREFETCH|NHM_L3_ACCESS,
+ [ C(RESULT_MISS) ] = NHM_DMND_PREFETCH|NHM_L3_MISS,
+ },
+ },
+ [ C(NODE) ] = {
+ [ C(OP_READ) ] = {
+ [ C(RESULT_ACCESS) ] = NHM_DMND_READ|NHM_LOCAL|NHM_REMOTE,
+ [ C(RESULT_MISS) ] = NHM_DMND_READ|NHM_REMOTE,
+ },
+ [ C(OP_WRITE) ] = {
+ [ C(RESULT_ACCESS) ] = NHM_DMND_WRITE|NHM_LOCAL|NHM_REMOTE,
+ [ C(RESULT_MISS) ] = NHM_DMND_WRITE|NHM_REMOTE,
+ },
+ [ C(OP_PREFETCH) ] = {
+ [ C(RESULT_ACCESS) ] = NHM_DMND_PREFETCH|NHM_LOCAL|NHM_REMOTE,
+ [ C(RESULT_MISS) ] = NHM_DMND_PREFETCH|NHM_REMOTE,
+ },
+ },
+};
+
+static __initconst const u64 nehalem_hw_cache_event_ids
+ [PERF_COUNT_HW_CACHE_MAX]
+ [PERF_COUNT_HW_CACHE_OP_MAX]
+ [PERF_COUNT_HW_CACHE_RESULT_MAX] =
+{
+ [ C(L1D) ] = {
+ [ C(OP_READ) ] = {
+ [ C(RESULT_ACCESS) ] = 0x010b, /* MEM_INST_RETIRED.LOADS */
+ [ C(RESULT_MISS) ] = 0x0151, /* L1D.REPL */
+ },
+ [ C(OP_WRITE) ] = {
+ [ C(RESULT_ACCESS) ] = 0x020b, /* MEM_INST_RETURED.STORES */
+ [ C(RESULT_MISS) ] = 0x0251, /* L1D.M_REPL */
+ },
+ [ C(OP_PREFETCH) ] = {
+ [ C(RESULT_ACCESS) ] = 0x014e, /* L1D_PREFETCH.REQUESTS */
+ [ C(RESULT_MISS) ] = 0x024e, /* L1D_PREFETCH.MISS */
+ },
+ },
+ [ C(L1I ) ] = {
+ [ C(OP_READ) ] = {
+ [ C(RESULT_ACCESS) ] = 0x0380, /* L1I.READS */
+ [ C(RESULT_MISS) ] = 0x0280, /* L1I.MISSES */
+ },
+ [ C(OP_WRITE) ] = {
+ [ C(RESULT_ACCESS) ] = -1,
+ [ C(RESULT_MISS) ] = -1,
+ },
+ [ C(OP_PREFETCH) ] = {
+ [ C(RESULT_ACCESS) ] = 0x0,
+ [ C(RESULT_MISS) ] = 0x0,
+ },
+ },
+ [ C(LL ) ] = {
+ [ C(OP_READ) ] = {
+ /* OFFCORE_RESPONSE.ANY_DATA.LOCAL_CACHE */
+ [ C(RESULT_ACCESS) ] = 0x01b7,
+ /* OFFCORE_RESPONSE.ANY_DATA.ANY_LLC_MISS */
+ [ C(RESULT_MISS) ] = 0x01b7,
+ },
+ /*
+ * Use RFO, not WRITEBACK, because a write miss would typically occur
+ * on RFO.
+ */
+ [ C(OP_WRITE) ] = {
+ /* OFFCORE_RESPONSE.ANY_RFO.LOCAL_CACHE */
+ [ C(RESULT_ACCESS) ] = 0x01b7,
+ /* OFFCORE_RESPONSE.ANY_RFO.ANY_LLC_MISS */
+ [ C(RESULT_MISS) ] = 0x01b7,
+ },
+ [ C(OP_PREFETCH) ] = {
+ /* OFFCORE_RESPONSE.PREFETCH.LOCAL_CACHE */
+ [ C(RESULT_ACCESS) ] = 0x01b7,
+ /* OFFCORE_RESPONSE.PREFETCH.ANY_LLC_MISS */
+ [ C(RESULT_MISS) ] = 0x01b7,
+ },
+ },
+ [ C(DTLB) ] = {
+ [ C(OP_READ) ] = {
+ [ C(RESULT_ACCESS) ] = 0x0f40, /* L1D_CACHE_LD.MESI (alias) */
+ [ C(RESULT_MISS) ] = 0x0108, /* DTLB_LOAD_MISSES.ANY */
+ },
+ [ C(OP_WRITE) ] = {
+ [ C(RESULT_ACCESS) ] = 0x0f41, /* L1D_CACHE_ST.MESI (alias) */
+ [ C(RESULT_MISS) ] = 0x010c, /* MEM_STORE_RETIRED.DTLB_MISS */
+ },
+ [ C(OP_PREFETCH) ] = {
+ [ C(RESULT_ACCESS) ] = 0x0,
+ [ C(RESULT_MISS) ] = 0x0,
+ },
+ },
+ [ C(ITLB) ] = {
+ [ C(OP_READ) ] = {
+ [ C(RESULT_ACCESS) ] = 0x01c0, /* INST_RETIRED.ANY_P */
+ [ C(RESULT_MISS) ] = 0x20c8, /* ITLB_MISS_RETIRED */
+ },
+ [ C(OP_WRITE) ] = {
+ [ C(RESULT_ACCESS) ] = -1,
+ [ C(RESULT_MISS) ] = -1,
+ },
+ [ C(OP_PREFETCH) ] = {
+ [ C(RESULT_ACCESS) ] = -1,
+ [ C(RESULT_MISS) ] = -1,
+ },
+ },
+ [ C(BPU ) ] = {
+ [ C(OP_READ) ] = {
+ [ C(RESULT_ACCESS) ] = 0x00c4, /* BR_INST_RETIRED.ALL_BRANCHES */
+ [ C(RESULT_MISS) ] = 0x03e8, /* BPU_CLEARS.ANY */
+ },
+ [ C(OP_WRITE) ] = {
+ [ C(RESULT_ACCESS) ] = -1,
+ [ C(RESULT_MISS) ] = -1,
+ },
+ [ C(OP_PREFETCH) ] = {
+ [ C(RESULT_ACCESS) ] = -1,
+ [ C(RESULT_MISS) ] = -1,
+ },
+ },
+ [ C(NODE) ] = {
+ [ C(OP_READ) ] = {
+ [ C(RESULT_ACCESS) ] = 0x01b7,
+ [ C(RESULT_MISS) ] = 0x01b7,
+ },
+ [ C(OP_WRITE) ] = {
+ [ C(RESULT_ACCESS) ] = 0x01b7,
+ [ C(RESULT_MISS) ] = 0x01b7,
+ },
+ [ C(OP_PREFETCH) ] = {
+ [ C(RESULT_ACCESS) ] = 0x01b7,
+ [ C(RESULT_MISS) ] = 0x01b7,
+ },
+ },
+};
+
+static __initconst const u64 core2_hw_cache_event_ids
+ [PERF_COUNT_HW_CACHE_MAX]
+ [PERF_COUNT_HW_CACHE_OP_MAX]
+ [PERF_COUNT_HW_CACHE_RESULT_MAX] =
+{
+ [ C(L1D) ] = {
+ [ C(OP_READ) ] = {
+ [ C(RESULT_ACCESS) ] = 0x0f40, /* L1D_CACHE_LD.MESI */
+ [ C(RESULT_MISS) ] = 0x0140, /* L1D_CACHE_LD.I_STATE */
+ },
+ [ C(OP_WRITE) ] = {
+ [ C(RESULT_ACCESS) ] = 0x0f41, /* L1D_CACHE_ST.MESI */
+ [ C(RESULT_MISS) ] = 0x0141, /* L1D_CACHE_ST.I_STATE */
+ },
+ [ C(OP_PREFETCH) ] = {
+ [ C(RESULT_ACCESS) ] = 0x104e, /* L1D_PREFETCH.REQUESTS */
+ [ C(RESULT_MISS) ] = 0,
+ },
+ },
+ [ C(L1I ) ] = {
+ [ C(OP_READ) ] = {
+ [ C(RESULT_ACCESS) ] = 0x0080, /* L1I.READS */
+ [ C(RESULT_MISS) ] = 0x0081, /* L1I.MISSES */
+ },
+ [ C(OP_WRITE) ] = {
+ [ C(RESULT_ACCESS) ] = -1,
+ [ C(RESULT_MISS) ] = -1,
+ },
+ [ C(OP_PREFETCH) ] = {
+ [ C(RESULT_ACCESS) ] = 0,
+ [ C(RESULT_MISS) ] = 0,
+ },
+ },
+ [ C(LL ) ] = {
+ [ C(OP_READ) ] = {
+ [ C(RESULT_ACCESS) ] = 0x4f29, /* L2_LD.MESI */
+ [ C(RESULT_MISS) ] = 0x4129, /* L2_LD.ISTATE */
+ },
+ [ C(OP_WRITE) ] = {
+ [ C(RESULT_ACCESS) ] = 0x4f2A, /* L2_ST.MESI */
+ [ C(RESULT_MISS) ] = 0x412A, /* L2_ST.ISTATE */
+ },
+ [ C(OP_PREFETCH) ] = {
+ [ C(RESULT_ACCESS) ] = 0,
+ [ C(RESULT_MISS) ] = 0,
+ },
+ },
+ [ C(DTLB) ] = {
+ [ C(OP_READ) ] = {
+ [ C(RESULT_ACCESS) ] = 0x0f40, /* L1D_CACHE_LD.MESI (alias) */
+ [ C(RESULT_MISS) ] = 0x0208, /* DTLB_MISSES.MISS_LD */
+ },
+ [ C(OP_WRITE) ] = {
+ [ C(RESULT_ACCESS) ] = 0x0f41, /* L1D_CACHE_ST.MESI (alias) */
+ [ C(RESULT_MISS) ] = 0x0808, /* DTLB_MISSES.MISS_ST */
+ },
+ [ C(OP_PREFETCH) ] = {
+ [ C(RESULT_ACCESS) ] = 0,
+ [ C(RESULT_MISS) ] = 0,
+ },
+ },
+ [ C(ITLB) ] = {
+ [ C(OP_READ) ] = {
+ [ C(RESULT_ACCESS) ] = 0x00c0, /* INST_RETIRED.ANY_P */
+ [ C(RESULT_MISS) ] = 0x1282, /* ITLBMISSES */
+ },
+ [ C(OP_WRITE) ] = {
+ [ C(RESULT_ACCESS) ] = -1,
+ [ C(RESULT_MISS) ] = -1,
+ },
+ [ C(OP_PREFETCH) ] = {
+ [ C(RESULT_ACCESS) ] = -1,
+ [ C(RESULT_MISS) ] = -1,
+ },
+ },
+ [ C(BPU ) ] = {
+ [ C(OP_READ) ] = {
+ [ C(RESULT_ACCESS) ] = 0x00c4, /* BR_INST_RETIRED.ANY */
+ [ C(RESULT_MISS) ] = 0x00c5, /* BP_INST_RETIRED.MISPRED */
+ },
+ [ C(OP_WRITE) ] = {
+ [ C(RESULT_ACCESS) ] = -1,
+ [ C(RESULT_MISS) ] = -1,
+ },
+ [ C(OP_PREFETCH) ] = {
+ [ C(RESULT_ACCESS) ] = -1,
+ [ C(RESULT_MISS) ] = -1,
+ },
+ },
+};
+
+static __initconst const u64 atom_hw_cache_event_ids
+ [PERF_COUNT_HW_CACHE_MAX]
+ [PERF_COUNT_HW_CACHE_OP_MAX]
+ [PERF_COUNT_HW_CACHE_RESULT_MAX] =
+{
+ [ C(L1D) ] = {
+ [ C(OP_READ) ] = {
+ [ C(RESULT_ACCESS) ] = 0x2140, /* L1D_CACHE.LD */
+ [ C(RESULT_MISS) ] = 0,
+ },
+ [ C(OP_WRITE) ] = {
+ [ C(RESULT_ACCESS) ] = 0x2240, /* L1D_CACHE.ST */
+ [ C(RESULT_MISS) ] = 0,
+ },
+ [ C(OP_PREFETCH) ] = {
+ [ C(RESULT_ACCESS) ] = 0x0,
+ [ C(RESULT_MISS) ] = 0,
+ },
+ },
+ [ C(L1I ) ] = {
+ [ C(OP_READ) ] = {
+ [ C(RESULT_ACCESS) ] = 0x0380, /* L1I.READS */
+ [ C(RESULT_MISS) ] = 0x0280, /* L1I.MISSES */
+ },
+ [ C(OP_WRITE) ] = {
+ [ C(RESULT_ACCESS) ] = -1,
+ [ C(RESULT_MISS) ] = -1,
+ },
+ [ C(OP_PREFETCH) ] = {
+ [ C(RESULT_ACCESS) ] = 0,
+ [ C(RESULT_MISS) ] = 0,
+ },
+ },
+ [ C(LL ) ] = {
+ [ C(OP_READ) ] = {
+ [ C(RESULT_ACCESS) ] = 0x4f29, /* L2_LD.MESI */
+ [ C(RESULT_MISS) ] = 0x4129, /* L2_LD.ISTATE */
+ },
+ [ C(OP_WRITE) ] = {
+ [ C(RESULT_ACCESS) ] = 0x4f2A, /* L2_ST.MESI */
+ [ C(RESULT_MISS) ] = 0x412A, /* L2_ST.ISTATE */
+ },
+ [ C(OP_PREFETCH) ] = {
+ [ C(RESULT_ACCESS) ] = 0,
+ [ C(RESULT_MISS) ] = 0,
+ },
+ },
+ [ C(DTLB) ] = {
+ [ C(OP_READ) ] = {
+ [ C(RESULT_ACCESS) ] = 0x2140, /* L1D_CACHE_LD.MESI (alias) */
+ [ C(RESULT_MISS) ] = 0x0508, /* DTLB_MISSES.MISS_LD */
+ },
+ [ C(OP_WRITE) ] = {
+ [ C(RESULT_ACCESS) ] = 0x2240, /* L1D_CACHE_ST.MESI (alias) */
+ [ C(RESULT_MISS) ] = 0x0608, /* DTLB_MISSES.MISS_ST */
+ },
+ [ C(OP_PREFETCH) ] = {
+ [ C(RESULT_ACCESS) ] = 0,
+ [ C(RESULT_MISS) ] = 0,
+ },
+ },
+ [ C(ITLB) ] = {
+ [ C(OP_READ) ] = {
+ [ C(RESULT_ACCESS) ] = 0x00c0, /* INST_RETIRED.ANY_P */
+ [ C(RESULT_MISS) ] = 0x0282, /* ITLB.MISSES */
+ },
+ [ C(OP_WRITE) ] = {
+ [ C(RESULT_ACCESS) ] = -1,
+ [ C(RESULT_MISS) ] = -1,
+ },
+ [ C(OP_PREFETCH) ] = {
+ [ C(RESULT_ACCESS) ] = -1,
+ [ C(RESULT_MISS) ] = -1,
+ },
+ },
+ [ C(BPU ) ] = {
+ [ C(OP_READ) ] = {
+ [ C(RESULT_ACCESS) ] = 0x00c4, /* BR_INST_RETIRED.ANY */
+ [ C(RESULT_MISS) ] = 0x00c5, /* BP_INST_RETIRED.MISPRED */
+ },
+ [ C(OP_WRITE) ] = {
+ [ C(RESULT_ACCESS) ] = -1,
+ [ C(RESULT_MISS) ] = -1,
+ },
+ [ C(OP_PREFETCH) ] = {
+ [ C(RESULT_ACCESS) ] = -1,
+ [ C(RESULT_MISS) ] = -1,
+ },
+ },
+};
+
+static struct extra_reg intel_slm_extra_regs[] __read_mostly =
+{
+ /* must define OFFCORE_RSP_X first, see intel_fixup_er() */
+ INTEL_UEVENT_EXTRA_REG(0x01b7, MSR_OFFCORE_RSP_0, 0x768005ffffull, RSP_0),
+ INTEL_UEVENT_EXTRA_REG(0x02b7, MSR_OFFCORE_RSP_1, 0x368005ffffull, RSP_1),
+ EVENT_EXTRA_END
+};
+
+#define SLM_DMND_READ SNB_DMND_DATA_RD
+#define SLM_DMND_WRITE SNB_DMND_RFO
+#define SLM_DMND_PREFETCH (SNB_PF_DATA_RD|SNB_PF_RFO)
+
+#define SLM_SNP_ANY (SNB_SNP_NONE|SNB_SNP_MISS|SNB_NO_FWD|SNB_HITM)
+#define SLM_LLC_ACCESS SNB_RESP_ANY
+#define SLM_LLC_MISS (SLM_SNP_ANY|SNB_NON_DRAM)
+
+static __initconst const u64 slm_hw_cache_extra_regs
+ [PERF_COUNT_HW_CACHE_MAX]
+ [PERF_COUNT_HW_CACHE_OP_MAX]
+ [PERF_COUNT_HW_CACHE_RESULT_MAX] =
+{
+ [ C(LL ) ] = {
+ [ C(OP_READ) ] = {
+ [ C(RESULT_ACCESS) ] = SLM_DMND_READ|SLM_LLC_ACCESS,
+ [ C(RESULT_MISS) ] = 0,
+ },
+ [ C(OP_WRITE) ] = {
+ [ C(RESULT_ACCESS) ] = SLM_DMND_WRITE|SLM_LLC_ACCESS,
+ [ C(RESULT_MISS) ] = SLM_DMND_WRITE|SLM_LLC_MISS,
+ },
+ [ C(OP_PREFETCH) ] = {
+ [ C(RESULT_ACCESS) ] = SLM_DMND_PREFETCH|SLM_LLC_ACCESS,
+ [ C(RESULT_MISS) ] = SLM_DMND_PREFETCH|SLM_LLC_MISS,
+ },
+ },
+};
+
+static __initconst const u64 slm_hw_cache_event_ids
+ [PERF_COUNT_HW_CACHE_MAX]
+ [PERF_COUNT_HW_CACHE_OP_MAX]
+ [PERF_COUNT_HW_CACHE_RESULT_MAX] =
+{
+ [ C(L1D) ] = {
+ [ C(OP_READ) ] = {
+ [ C(RESULT_ACCESS) ] = 0,
+ [ C(RESULT_MISS) ] = 0x0104, /* LD_DCU_MISS */
+ },
+ [ C(OP_WRITE) ] = {
+ [ C(RESULT_ACCESS) ] = 0,
+ [ C(RESULT_MISS) ] = 0,
+ },
+ [ C(OP_PREFETCH) ] = {
+ [ C(RESULT_ACCESS) ] = 0,
+ [ C(RESULT_MISS) ] = 0,
+ },
+ },
+ [ C(L1I ) ] = {
+ [ C(OP_READ) ] = {
+ [ C(RESULT_ACCESS) ] = 0x0380, /* ICACHE.ACCESSES */
+ [ C(RESULT_MISS) ] = 0x0280, /* ICACGE.MISSES */
+ },
+ [ C(OP_WRITE) ] = {
+ [ C(RESULT_ACCESS) ] = -1,
+ [ C(RESULT_MISS) ] = -1,
+ },
+ [ C(OP_PREFETCH) ] = {
+ [ C(RESULT_ACCESS) ] = 0,
+ [ C(RESULT_MISS) ] = 0,
+ },
+ },
+ [ C(LL ) ] = {
+ [ C(OP_READ) ] = {
+ /* OFFCORE_RESPONSE.ANY_DATA.LOCAL_CACHE */
+ [ C(RESULT_ACCESS) ] = 0x01b7,
+ [ C(RESULT_MISS) ] = 0,
+ },
+ [ C(OP_WRITE) ] = {
+ /* OFFCORE_RESPONSE.ANY_RFO.LOCAL_CACHE */
+ [ C(RESULT_ACCESS) ] = 0x01b7,
+ /* OFFCORE_RESPONSE.ANY_RFO.ANY_LLC_MISS */
+ [ C(RESULT_MISS) ] = 0x01b7,
+ },
+ [ C(OP_PREFETCH) ] = {
+ /* OFFCORE_RESPONSE.PREFETCH.LOCAL_CACHE */
+ [ C(RESULT_ACCESS) ] = 0x01b7,
+ /* OFFCORE_RESPONSE.PREFETCH.ANY_LLC_MISS */
+ [ C(RESULT_MISS) ] = 0x01b7,
+ },
+ },
+ [ C(DTLB) ] = {
+ [ C(OP_READ) ] = {
+ [ C(RESULT_ACCESS) ] = 0,
+ [ C(RESULT_MISS) ] = 0x0804, /* LD_DTLB_MISS */
+ },
+ [ C(OP_WRITE) ] = {
+ [ C(RESULT_ACCESS) ] = 0,
+ [ C(RESULT_MISS) ] = 0,
+ },
+ [ C(OP_PREFETCH) ] = {
+ [ C(RESULT_ACCESS) ] = 0,
+ [ C(RESULT_MISS) ] = 0,
+ },
+ },
+ [ C(ITLB) ] = {
+ [ C(OP_READ) ] = {
+ [ C(RESULT_ACCESS) ] = 0x00c0, /* INST_RETIRED.ANY_P */
+ [ C(RESULT_MISS) ] = 0x40205, /* PAGE_WALKS.I_SIDE_WALKS */
+ },
+ [ C(OP_WRITE) ] = {
+ [ C(RESULT_ACCESS) ] = -1,
+ [ C(RESULT_MISS) ] = -1,
+ },
+ [ C(OP_PREFETCH) ] = {
+ [ C(RESULT_ACCESS) ] = -1,
+ [ C(RESULT_MISS) ] = -1,
+ },
+ },
+ [ C(BPU ) ] = {
+ [ C(OP_READ) ] = {
+ [ C(RESULT_ACCESS) ] = 0x00c4, /* BR_INST_RETIRED.ANY */
+ [ C(RESULT_MISS) ] = 0x00c5, /* BP_INST_RETIRED.MISPRED */
+ },
+ [ C(OP_WRITE) ] = {
+ [ C(RESULT_ACCESS) ] = -1,
+ [ C(RESULT_MISS) ] = -1,
+ },
+ [ C(OP_PREFETCH) ] = {
+ [ C(RESULT_ACCESS) ] = -1,
+ [ C(RESULT_MISS) ] = -1,
+ },
+ },
+};
+
+#define KNL_OT_L2_HITE BIT_ULL(19) /* Other Tile L2 Hit */
+#define KNL_OT_L2_HITF BIT_ULL(20) /* Other Tile L2 Hit */
+#define KNL_MCDRAM_LOCAL BIT_ULL(21)
+#define KNL_MCDRAM_FAR BIT_ULL(22)
+#define KNL_DDR_LOCAL BIT_ULL(23)
+#define KNL_DDR_FAR BIT_ULL(24)
+#define KNL_DRAM_ANY (KNL_MCDRAM_LOCAL | KNL_MCDRAM_FAR | \
+ KNL_DDR_LOCAL | KNL_DDR_FAR)
+#define KNL_L2_READ SLM_DMND_READ
+#define KNL_L2_WRITE SLM_DMND_WRITE
+#define KNL_L2_PREFETCH SLM_DMND_PREFETCH
+#define KNL_L2_ACCESS SLM_LLC_ACCESS
+#define KNL_L2_MISS (KNL_OT_L2_HITE | KNL_OT_L2_HITF | \
+ KNL_DRAM_ANY | SNB_SNP_ANY | \
+ SNB_NON_DRAM)
+
+static __initconst const u64 knl_hw_cache_extra_regs
+ [PERF_COUNT_HW_CACHE_MAX]
+ [PERF_COUNT_HW_CACHE_OP_MAX]
+ [PERF_COUNT_HW_CACHE_RESULT_MAX] = {
+ [C(LL)] = {
+ [C(OP_READ)] = {
+ [C(RESULT_ACCESS)] = KNL_L2_READ | KNL_L2_ACCESS,
+ [C(RESULT_MISS)] = 0,
+ },
+ [C(OP_WRITE)] = {
+ [C(RESULT_ACCESS)] = KNL_L2_WRITE | KNL_L2_ACCESS,
+ [C(RESULT_MISS)] = KNL_L2_WRITE | KNL_L2_MISS,
+ },
+ [C(OP_PREFETCH)] = {
+ [C(RESULT_ACCESS)] = KNL_L2_PREFETCH | KNL_L2_ACCESS,
+ [C(RESULT_MISS)] = KNL_L2_PREFETCH | KNL_L2_MISS,
+ },
+ },
+};
+
+/*
+ * Used from PMIs where the LBRs are already disabled.
+ *
+ * This function could be called consecutively. It is required to remain in
+ * disabled state if called consecutively.
+ *
+ * During consecutive calls, the same disable value will be written to related
+ * registers, so the PMU state remains unchanged. hw.state in
+ * intel_bts_disable_local will remain PERF_HES_STOPPED too in consecutive
+ * calls.
+ */
+static void __intel_pmu_disable_all(void)
+{
+ struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
+
+ wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0);
+
+ if (test_bit(INTEL_PMC_IDX_FIXED_BTS, cpuc->active_mask))
+ intel_pmu_disable_bts();
+ else
+ intel_bts_disable_local();
+
+ intel_pmu_pebs_disable_all();
+}
+
+static void intel_pmu_disable_all(void)
+{
+ __intel_pmu_disable_all();
+ intel_pmu_lbr_disable_all();
+}
+
+static void __intel_pmu_enable_all(int added, bool pmi)
+{
+ struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
+
+ intel_pmu_pebs_enable_all();
+ intel_pmu_lbr_enable_all(pmi);
+ wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL,
+ x86_pmu.intel_ctrl & ~cpuc->intel_ctrl_guest_mask);
+
+ if (test_bit(INTEL_PMC_IDX_FIXED_BTS, cpuc->active_mask)) {
+ struct perf_event *event =
+ cpuc->events[INTEL_PMC_IDX_FIXED_BTS];
+
+ if (WARN_ON_ONCE(!event))
+ return;
+
+ intel_pmu_enable_bts(event->hw.config);
+ } else
+ intel_bts_enable_local();
+}
+
+static void intel_pmu_enable_all(int added)
+{
+ __intel_pmu_enable_all(added, false);
+}
+
+/*
+ * Workaround for:
+ * Intel Errata AAK100 (model 26)
+ * Intel Errata AAP53 (model 30)
+ * Intel Errata BD53 (model 44)
+ *
+ * The official story:
+ * These chips need to be 'reset' when adding counters by programming the
+ * magic three (non-counting) events 0x4300B5, 0x4300D2, and 0x4300B1 either
+ * in sequence on the same PMC or on different PMCs.
+ *
+ * In practise it appears some of these events do in fact count, and
+ * we need to programm all 4 events.
+ */
+static void intel_pmu_nhm_workaround(void)
+{
+ struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
+ static const unsigned long nhm_magic[4] = {
+ 0x4300B5,
+ 0x4300D2,
+ 0x4300B1,
+ 0x4300B1
+ };
+ struct perf_event *event;
+ int i;
+
+ /*
+ * The Errata requires below steps:
+ * 1) Clear MSR_IA32_PEBS_ENABLE and MSR_CORE_PERF_GLOBAL_CTRL;
+ * 2) Configure 4 PERFEVTSELx with the magic events and clear
+ * the corresponding PMCx;
+ * 3) set bit0~bit3 of MSR_CORE_PERF_GLOBAL_CTRL;
+ * 4) Clear MSR_CORE_PERF_GLOBAL_CTRL;
+ * 5) Clear 4 pairs of ERFEVTSELx and PMCx;
+ */
+
+ /*
+ * The real steps we choose are a little different from above.
+ * A) To reduce MSR operations, we don't run step 1) as they
+ * are already cleared before this function is called;
+ * B) Call x86_perf_event_update to save PMCx before configuring
+ * PERFEVTSELx with magic number;
+ * C) With step 5), we do clear only when the PERFEVTSELx is
+ * not used currently.
+ * D) Call x86_perf_event_set_period to restore PMCx;
+ */
+
+ /* We always operate 4 pairs of PERF Counters */
+ for (i = 0; i < 4; i++) {
+ event = cpuc->events[i];
+ if (event)
+ x86_perf_event_update(event);
+ }
+
+ for (i = 0; i < 4; i++) {
+ wrmsrl(MSR_ARCH_PERFMON_EVENTSEL0 + i, nhm_magic[i]);
+ wrmsrl(MSR_ARCH_PERFMON_PERFCTR0 + i, 0x0);
+ }
+
+ wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0xf);
+ wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0x0);
+
+ for (i = 0; i < 4; i++) {
+ event = cpuc->events[i];
+
+ if (event) {
+ x86_perf_event_set_period(event);
+ __x86_pmu_enable_event(&event->hw,
+ ARCH_PERFMON_EVENTSEL_ENABLE);
+ } else
+ wrmsrl(MSR_ARCH_PERFMON_EVENTSEL0 + i, 0x0);
+ }
+}
+
+static void intel_pmu_nhm_enable_all(int added)
+{
+ if (added)
+ intel_pmu_nhm_workaround();
+ intel_pmu_enable_all(added);
+}
+
+static inline u64 intel_pmu_get_status(void)
+{
+ u64 status;
+
+ rdmsrl(MSR_CORE_PERF_GLOBAL_STATUS, status);
+
+ return status;
+}
+
+static inline void intel_pmu_ack_status(u64 ack)
+{
+ wrmsrl(MSR_CORE_PERF_GLOBAL_OVF_CTRL, ack);
+}
+
+static void intel_pmu_disable_fixed(struct hw_perf_event *hwc)
+{
+ int idx = hwc->idx - INTEL_PMC_IDX_FIXED;
+ u64 ctrl_val, mask;
+
+ mask = 0xfULL << (idx * 4);
+
+ rdmsrl(hwc->config_base, ctrl_val);
+ ctrl_val &= ~mask;
+ wrmsrl(hwc->config_base, ctrl_val);
+}
+
+static inline bool event_is_checkpointed(struct perf_event *event)
+{
+ return (event->hw.config & HSW_IN_TX_CHECKPOINTED) != 0;
+}
+
+static void intel_pmu_disable_event(struct perf_event *event)
+{
+ struct hw_perf_event *hwc = &event->hw;
+ struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
+
+ if (unlikely(hwc->idx == INTEL_PMC_IDX_FIXED_BTS)) {
+ intel_pmu_disable_bts();
+ intel_pmu_drain_bts_buffer();
+ return;
+ }
+
+ cpuc->intel_ctrl_guest_mask &= ~(1ull << hwc->idx);
+ cpuc->intel_ctrl_host_mask &= ~(1ull << hwc->idx);
+ cpuc->intel_cp_status &= ~(1ull << hwc->idx);
+
+ /*
+ * must disable before any actual event
+ * because any event may be combined with LBR
+ */
+ if (needs_branch_stack(event))
+ intel_pmu_lbr_disable(event);
+
+ if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL)) {
+ intel_pmu_disable_fixed(hwc);
+ return;
+ }
+
+ x86_pmu_disable_event(event);
+
+ if (unlikely(event->attr.precise_ip))
+ intel_pmu_pebs_disable(event);
+}
+
+static void intel_pmu_enable_fixed(struct hw_perf_event *hwc)
+{
+ int idx = hwc->idx - INTEL_PMC_IDX_FIXED;
+ u64 ctrl_val, bits, mask;
+
+ /*
+ * Enable IRQ generation (0x8),
+ * and enable ring-3 counting (0x2) and ring-0 counting (0x1)
+ * if requested:
+ */
+ bits = 0x8ULL;
+ if (hwc->config & ARCH_PERFMON_EVENTSEL_USR)
+ bits |= 0x2;
+ if (hwc->config & ARCH_PERFMON_EVENTSEL_OS)
+ bits |= 0x1;
+
+ /*
+ * ANY bit is supported in v3 and up
+ */
+ if (x86_pmu.version > 2 && hwc->config & ARCH_PERFMON_EVENTSEL_ANY)
+ bits |= 0x4;
+
+ bits <<= (idx * 4);
+ mask = 0xfULL << (idx * 4);
+
+ rdmsrl(hwc->config_base, ctrl_val);
+ ctrl_val &= ~mask;
+ ctrl_val |= bits;
+ wrmsrl(hwc->config_base, ctrl_val);
+}
+
+static void intel_pmu_enable_event(struct perf_event *event)
+{
+ struct hw_perf_event *hwc = &event->hw;
+ struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
+
+ if (unlikely(hwc->idx == INTEL_PMC_IDX_FIXED_BTS)) {
+ if (!__this_cpu_read(cpu_hw_events.enabled))
+ return;
+
+ intel_pmu_enable_bts(hwc->config);
+ return;
+ }
+ /*
+ * must enabled before any actual event
+ * because any event may be combined with LBR
+ */
+ if (needs_branch_stack(event))
+ intel_pmu_lbr_enable(event);
+
+ if (event->attr.exclude_host)
+ cpuc->intel_ctrl_guest_mask |= (1ull << hwc->idx);
+ if (event->attr.exclude_guest)
+ cpuc->intel_ctrl_host_mask |= (1ull << hwc->idx);
+
+ if (unlikely(event_is_checkpointed(event)))
+ cpuc->intel_cp_status |= (1ull << hwc->idx);
+
+ if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL)) {
+ intel_pmu_enable_fixed(hwc);
+ return;
+ }
+
+ if (unlikely(event->attr.precise_ip))
+ intel_pmu_pebs_enable(event);
+
+ __x86_pmu_enable_event(hwc, ARCH_PERFMON_EVENTSEL_ENABLE);
+}
+
+/*
+ * Save and restart an expired event. Called by NMI contexts,
+ * so it has to be careful about preempting normal event ops:
+ */
+int intel_pmu_save_and_restart(struct perf_event *event)
+{
+ x86_perf_event_update(event);
+ /*
+ * For a checkpointed counter always reset back to 0. This
+ * avoids a situation where the counter overflows, aborts the
+ * transaction and is then set back to shortly before the
+ * overflow, and overflows and aborts again.
+ */
+ if (unlikely(event_is_checkpointed(event))) {
+ /* No race with NMIs because the counter should not be armed */
+ wrmsrl(event->hw.event_base, 0);
+ local64_set(&event->hw.prev_count, 0);
+ }
+ return x86_perf_event_set_period(event);
+}
+
+static void intel_pmu_reset(void)
+{
+ struct debug_store *ds = __this_cpu_read(cpu_hw_events.ds);
+ unsigned long flags;
+ int idx;
+
+ if (!x86_pmu.num_counters)
+ return;
+
+ local_irq_save(flags);
+
+ pr_info("clearing PMU state on CPU#%d\n", smp_processor_id());
+
+ for (idx = 0; idx < x86_pmu.num_counters; idx++) {
+ wrmsrl_safe(x86_pmu_config_addr(idx), 0ull);
+ wrmsrl_safe(x86_pmu_event_addr(idx), 0ull);
+ }
+ for (idx = 0; idx < x86_pmu.num_counters_fixed; idx++)
+ wrmsrl_safe(MSR_ARCH_PERFMON_FIXED_CTR0 + idx, 0ull);
+
+ if (ds)
+ ds->bts_index = ds->bts_buffer_base;
+
+ /* Ack all overflows and disable fixed counters */
+ if (x86_pmu.version >= 2) {
+ intel_pmu_ack_status(intel_pmu_get_status());
+ wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0);
+ }
+
+ /* Reset LBRs and LBR freezing */
+ if (x86_pmu.lbr_nr) {
+ update_debugctlmsr(get_debugctlmsr() &
+ ~(DEBUGCTLMSR_FREEZE_LBRS_ON_PMI|DEBUGCTLMSR_LBR));
+ }
+
+ local_irq_restore(flags);
+}
+
+/*
+ * This handler is triggered by the local APIC, so the APIC IRQ handling
+ * rules apply:
+ */
+static int intel_pmu_handle_irq(struct pt_regs *regs)
+{
+ struct perf_sample_data data;
+ struct cpu_hw_events *cpuc;
+ int bit, loops;
+ u64 status;
+ int handled;
+
+ cpuc = this_cpu_ptr(&cpu_hw_events);
+
+ /*
+ * No known reason to not always do late ACK,
+ * but just in case do it opt-in.
+ */
+ if (!x86_pmu.late_ack)
+ apic_write(APIC_LVTPC, APIC_DM_NMI);
+ __intel_pmu_disable_all();
+ handled = intel_pmu_drain_bts_buffer();
+ handled += intel_bts_interrupt();
+ status = intel_pmu_get_status();
+ if (!status)
+ goto done;
+
+ loops = 0;
+again:
+ intel_pmu_lbr_read();
+ intel_pmu_ack_status(status);
+ if (++loops > 100) {
+ static bool warned = false;
+ if (!warned) {
+ WARN(1, "perfevents: irq loop stuck!\n");
+ perf_event_print_debug();
+ warned = true;
+ }
+ intel_pmu_reset();
+ goto done;
+ }
+
+ inc_irq_stat(apic_perf_irqs);
+
+
+ /*
+ * Ignore a range of extra bits in status that do not indicate
+ * overflow by themselves.
+ */
+ status &= ~(GLOBAL_STATUS_COND_CHG |
+ GLOBAL_STATUS_ASIF |
+ GLOBAL_STATUS_LBRS_FROZEN);
+ if (!status)
+ goto done;
+
+ /*
+ * PEBS overflow sets bit 62 in the global status register
+ */
+ if (__test_and_clear_bit(62, (unsigned long *)&status)) {
+ handled++;
+ x86_pmu.drain_pebs(regs);
+ /*
+ * There are cases where, even though, the PEBS ovfl bit is set
+ * in GLOBAL_OVF_STATUS, the PEBS events may also have their
+ * overflow bits set for their counters. We must clear them
+ * here because they have been processed as exact samples in
+ * the drain_pebs() routine. They must not be processed again
+ * in the for_each_bit_set() loop for regular samples below.
+ */
+ status &= ~cpuc->pebs_enabled;
+ status &= x86_pmu.intel_ctrl | GLOBAL_STATUS_TRACE_TOPAPMI;
+ }
+
+ /*
+ * Intel PT
+ */
+ if (__test_and_clear_bit(55, (unsigned long *)&status)) {
+ handled++;
+ intel_pt_interrupt();
+ }
+
+ /*
+ * Checkpointed counters can lead to 'spurious' PMIs because the
+ * rollback caused by the PMI will have cleared the overflow status
+ * bit. Therefore always force probe these counters.
+ */
+ status |= cpuc->intel_cp_status;
+
+ for_each_set_bit(bit, (unsigned long *)&status, X86_PMC_IDX_MAX) {
+ struct perf_event *event = cpuc->events[bit];
+
+ handled++;
+
+ if (!test_bit(bit, cpuc->active_mask))
+ continue;
+
+ if (!intel_pmu_save_and_restart(event))
+ continue;
+
+ perf_sample_data_init(&data, 0, event->hw.last_period);
+
+ if (has_branch_stack(event))
+ data.br_stack = &cpuc->lbr_stack;
+
+ if (perf_event_overflow(event, &data, regs))
+ x86_pmu_stop(event, 0);
+ }
+
+ /*
+ * Repeat if there is more work to be done:
+ */
+ status = intel_pmu_get_status();
+ if (status)
+ goto again;
+
+done:
+ /* Only restore PMU state when it's active. See x86_pmu_disable(). */
+ if (cpuc->enabled)
+ __intel_pmu_enable_all(0, true);
+
+ /*
+ * Only unmask the NMI after the overflow counters
+ * have been reset. This avoids spurious NMIs on
+ * Haswell CPUs.
+ */
+ if (x86_pmu.late_ack)
+ apic_write(APIC_LVTPC, APIC_DM_NMI);
+ return handled;
+}
+
+static struct event_constraint *
+intel_bts_constraints(struct perf_event *event)
+{
+ struct hw_perf_event *hwc = &event->hw;
+ unsigned int hw_event, bts_event;
+
+ if (event->attr.freq)
+ return NULL;
+
+ hw_event = hwc->config & INTEL_ARCH_EVENT_MASK;
+ bts_event = x86_pmu.event_map(PERF_COUNT_HW_BRANCH_INSTRUCTIONS);
+
+ if (unlikely(hw_event == bts_event && hwc->sample_period == 1))
+ return &bts_constraint;
+
+ return NULL;
+}
+
+static int intel_alt_er(int idx, u64 config)
+{
+ int alt_idx = idx;
+
+ if (!(x86_pmu.flags & PMU_FL_HAS_RSP_1))
+ return idx;
+
+ if (idx == EXTRA_REG_RSP_0)
+ alt_idx = EXTRA_REG_RSP_1;
+
+ if (idx == EXTRA_REG_RSP_1)
+ alt_idx = EXTRA_REG_RSP_0;
+
+ if (config & ~x86_pmu.extra_regs[alt_idx].valid_mask)
+ return idx;
+
+ return alt_idx;
+}
+
+static void intel_fixup_er(struct perf_event *event, int idx)
+{
+ event->hw.extra_reg.idx = idx;
+
+ if (idx == EXTRA_REG_RSP_0) {
+ event->hw.config &= ~INTEL_ARCH_EVENT_MASK;
+ event->hw.config |= x86_pmu.extra_regs[EXTRA_REG_RSP_0].event;
+ event->hw.extra_reg.reg = MSR_OFFCORE_RSP_0;
+ } else if (idx == EXTRA_REG_RSP_1) {
+ event->hw.config &= ~INTEL_ARCH_EVENT_MASK;
+ event->hw.config |= x86_pmu.extra_regs[EXTRA_REG_RSP_1].event;
+ event->hw.extra_reg.reg = MSR_OFFCORE_RSP_1;
+ }
+}
+
+/*
+ * manage allocation of shared extra msr for certain events
+ *
+ * sharing can be:
+ * per-cpu: to be shared between the various events on a single PMU
+ * per-core: per-cpu + shared by HT threads
+ */
+static struct event_constraint *
+__intel_shared_reg_get_constraints(struct cpu_hw_events *cpuc,
+ struct perf_event *event,
+ struct hw_perf_event_extra *reg)
+{
+ struct event_constraint *c = &emptyconstraint;
+ struct er_account *era;
+ unsigned long flags;
+ int idx = reg->idx;
+
+ /*
+ * reg->alloc can be set due to existing state, so for fake cpuc we
+ * need to ignore this, otherwise we might fail to allocate proper fake
+ * state for this extra reg constraint. Also see the comment below.
+ */
+ if (reg->alloc && !cpuc->is_fake)
+ return NULL; /* call x86_get_event_constraint() */
+
+again:
+ era = &cpuc->shared_regs->regs[idx];
+ /*
+ * we use spin_lock_irqsave() to avoid lockdep issues when
+ * passing a fake cpuc
+ */
+ raw_spin_lock_irqsave(&era->lock, flags);
+
+ if (!atomic_read(&era->ref) || era->config == reg->config) {
+
+ /*
+ * If its a fake cpuc -- as per validate_{group,event}() we
+ * shouldn't touch event state and we can avoid doing so
+ * since both will only call get_event_constraints() once
+ * on each event, this avoids the need for reg->alloc.
+ *
+ * Not doing the ER fixup will only result in era->reg being
+ * wrong, but since we won't actually try and program hardware
+ * this isn't a problem either.
+ */
+ if (!cpuc->is_fake) {
+ if (idx != reg->idx)
+ intel_fixup_er(event, idx);
+
+ /*
+ * x86_schedule_events() can call get_event_constraints()
+ * multiple times on events in the case of incremental
+ * scheduling(). reg->alloc ensures we only do the ER
+ * allocation once.
+ */
+ reg->alloc = 1;
+ }
+
+ /* lock in msr value */
+ era->config = reg->config;
+ era->reg = reg->reg;
+
+ /* one more user */
+ atomic_inc(&era->ref);
+
+ /*
+ * need to call x86_get_event_constraint()
+ * to check if associated event has constraints
+ */
+ c = NULL;
+ } else {
+ idx = intel_alt_er(idx, reg->config);
+ if (idx != reg->idx) {
+ raw_spin_unlock_irqrestore(&era->lock, flags);
+ goto again;
+ }
+ }
+ raw_spin_unlock_irqrestore(&era->lock, flags);
+
+ return c;
+}
+
+static void
+__intel_shared_reg_put_constraints(struct cpu_hw_events *cpuc,
+ struct hw_perf_event_extra *reg)
+{
+ struct er_account *era;
+
+ /*
+ * Only put constraint if extra reg was actually allocated. Also takes
+ * care of event which do not use an extra shared reg.
+ *
+ * Also, if this is a fake cpuc we shouldn't touch any event state
+ * (reg->alloc) and we don't care about leaving inconsistent cpuc state
+ * either since it'll be thrown out.
+ */
+ if (!reg->alloc || cpuc->is_fake)
+ return;
+
+ era = &cpuc->shared_regs->regs[reg->idx];
+
+ /* one fewer user */
+ atomic_dec(&era->ref);
+
+ /* allocate again next time */
+ reg->alloc = 0;
+}
+
+static struct event_constraint *
+intel_shared_regs_constraints(struct cpu_hw_events *cpuc,
+ struct perf_event *event)
+{
+ struct event_constraint *c = NULL, *d;
+ struct hw_perf_event_extra *xreg, *breg;
+
+ xreg = &event->hw.extra_reg;
+ if (xreg->idx != EXTRA_REG_NONE) {
+ c = __intel_shared_reg_get_constraints(cpuc, event, xreg);
+ if (c == &emptyconstraint)
+ return c;
+ }
+ breg = &event->hw.branch_reg;
+ if (breg->idx != EXTRA_REG_NONE) {
+ d = __intel_shared_reg_get_constraints(cpuc, event, breg);
+ if (d == &emptyconstraint) {
+ __intel_shared_reg_put_constraints(cpuc, xreg);
+ c = d;
+ }
+ }
+ return c;
+}
+
+struct event_constraint *
+x86_get_event_constraints(struct cpu_hw_events *cpuc, int idx,
+ struct perf_event *event)
+{
+ struct event_constraint *c;
+
+ if (x86_pmu.event_constraints) {
+ for_each_event_constraint(c, x86_pmu.event_constraints) {
+ if ((event->hw.config & c->cmask) == c->code) {
+ event->hw.flags |= c->flags;
+ return c;
+ }
+ }
+ }
+
+ return &unconstrained;
+}
+
+static struct event_constraint *
+__intel_get_event_constraints(struct cpu_hw_events *cpuc, int idx,
+ struct perf_event *event)
+{
+ struct event_constraint *c;
+
+ c = intel_bts_constraints(event);
+ if (c)
+ return c;
+
+ c = intel_shared_regs_constraints(cpuc, event);
+ if (c)
+ return c;
+
+ c = intel_pebs_constraints(event);
+ if (c)
+ return c;
+
+ return x86_get_event_constraints(cpuc, idx, event);
+}
+
+static void
+intel_start_scheduling(struct cpu_hw_events *cpuc)
+{
+ struct intel_excl_cntrs *excl_cntrs = cpuc->excl_cntrs;
+ struct intel_excl_states *xl;
+ int tid = cpuc->excl_thread_id;
+
+ /*
+ * nothing needed if in group validation mode
+ */
+ if (cpuc->is_fake || !is_ht_workaround_enabled())
+ return;
+
+ /*
+ * no exclusion needed
+ */
+ if (WARN_ON_ONCE(!excl_cntrs))
+ return;
+
+ xl = &excl_cntrs->states[tid];
+
+ xl->sched_started = true;
+ /*
+ * lock shared state until we are done scheduling
+ * in stop_event_scheduling()
+ * makes scheduling appear as a transaction
+ */
+ raw_spin_lock(&excl_cntrs->lock);
+}
+
+static void intel_commit_scheduling(struct cpu_hw_events *cpuc, int idx, int cntr)
+{
+ struct intel_excl_cntrs *excl_cntrs = cpuc->excl_cntrs;
+ struct event_constraint *c = cpuc->event_constraint[idx];
+ struct intel_excl_states *xl;
+ int tid = cpuc->excl_thread_id;
+
+ if (cpuc->is_fake || !is_ht_workaround_enabled())
+ return;
+
+ if (WARN_ON_ONCE(!excl_cntrs))
+ return;
+
+ if (!(c->flags & PERF_X86_EVENT_DYNAMIC))
+ return;
+
+ xl = &excl_cntrs->states[tid];
+
+ lockdep_assert_held(&excl_cntrs->lock);
+
+ if (c->flags & PERF_X86_EVENT_EXCL)
+ xl->state[cntr] = INTEL_EXCL_EXCLUSIVE;
+ else
+ xl->state[cntr] = INTEL_EXCL_SHARED;
+}
+
+static void
+intel_stop_scheduling(struct cpu_hw_events *cpuc)
+{
+ struct intel_excl_cntrs *excl_cntrs = cpuc->excl_cntrs;
+ struct intel_excl_states *xl;
+ int tid = cpuc->excl_thread_id;
+
+ /*
+ * nothing needed if in group validation mode
+ */
+ if (cpuc->is_fake || !is_ht_workaround_enabled())
+ return;
+ /*
+ * no exclusion needed
+ */
+ if (WARN_ON_ONCE(!excl_cntrs))
+ return;
+
+ xl = &excl_cntrs->states[tid];
+
+ xl->sched_started = false;
+ /*
+ * release shared state lock (acquired in intel_start_scheduling())
+ */
+ raw_spin_unlock(&excl_cntrs->lock);
+}
+
+static struct event_constraint *
+intel_get_excl_constraints(struct cpu_hw_events *cpuc, struct perf_event *event,
+ int idx, struct event_constraint *c)
+{
+ struct intel_excl_cntrs *excl_cntrs = cpuc->excl_cntrs;
+ struct intel_excl_states *xlo;
+ int tid = cpuc->excl_thread_id;
+ int is_excl, i;
+
+ /*
+ * validating a group does not require
+ * enforcing cross-thread exclusion
+ */
+ if (cpuc->is_fake || !is_ht_workaround_enabled())
+ return c;
+
+ /*
+ * no exclusion needed
+ */
+ if (WARN_ON_ONCE(!excl_cntrs))
+ return c;
+
+ /*
+ * because we modify the constraint, we need
+ * to make a copy. Static constraints come
+ * from static const tables.
+ *
+ * only needed when constraint has not yet
+ * been cloned (marked dynamic)
+ */
+ if (!(c->flags & PERF_X86_EVENT_DYNAMIC)) {
+ struct event_constraint *cx;
+
+ /*
+ * grab pre-allocated constraint entry
+ */
+ cx = &cpuc->constraint_list[idx];
+
+ /*
+ * initialize dynamic constraint
+ * with static constraint
+ */
+ *cx = *c;
+
+ /*
+ * mark constraint as dynamic, so we
+ * can free it later on
+ */
+ cx->flags |= PERF_X86_EVENT_DYNAMIC;
+ c = cx;
+ }
+
+ /*
+ * From here on, the constraint is dynamic.
+ * Either it was just allocated above, or it
+ * was allocated during a earlier invocation
+ * of this function
+ */
+
+ /*
+ * state of sibling HT
+ */
+ xlo = &excl_cntrs->states[tid ^ 1];
+
+ /*
+ * event requires exclusive counter access
+ * across HT threads
+ */
+ is_excl = c->flags & PERF_X86_EVENT_EXCL;
+ if (is_excl && !(event->hw.flags & PERF_X86_EVENT_EXCL_ACCT)) {
+ event->hw.flags |= PERF_X86_EVENT_EXCL_ACCT;
+ if (!cpuc->n_excl++)
+ WRITE_ONCE(excl_cntrs->has_exclusive[tid], 1);
+ }
+
+ /*
+ * Modify static constraint with current dynamic
+ * state of thread
+ *
+ * EXCLUSIVE: sibling counter measuring exclusive event
+ * SHARED : sibling counter measuring non-exclusive event
+ * UNUSED : sibling counter unused
+ */
+ for_each_set_bit(i, c->idxmsk, X86_PMC_IDX_MAX) {
+ /*
+ * exclusive event in sibling counter
+ * our corresponding counter cannot be used
+ * regardless of our event
+ */
+ if (xlo->state[i] == INTEL_EXCL_EXCLUSIVE)
+ __clear_bit(i, c->idxmsk);
+ /*
+ * if measuring an exclusive event, sibling
+ * measuring non-exclusive, then counter cannot
+ * be used
+ */
+ if (is_excl && xlo->state[i] == INTEL_EXCL_SHARED)
+ __clear_bit(i, c->idxmsk);
+ }
+
+ /*
+ * recompute actual bit weight for scheduling algorithm
+ */
+ c->weight = hweight64(c->idxmsk64);
+
+ /*
+ * if we return an empty mask, then switch
+ * back to static empty constraint to avoid
+ * the cost of freeing later on
+ */
+ if (c->weight == 0)
+ c = &emptyconstraint;
+
+ return c;
+}
+
+static struct event_constraint *
+intel_get_event_constraints(struct cpu_hw_events *cpuc, int idx,
+ struct perf_event *event)
+{
+ struct event_constraint *c1 = NULL;
+ struct event_constraint *c2;
+
+ if (idx >= 0) /* fake does < 0 */
+ c1 = cpuc->event_constraint[idx];
+
+ /*
+ * first time only
+ * - static constraint: no change across incremental scheduling calls
+ * - dynamic constraint: handled by intel_get_excl_constraints()
+ */
+ c2 = __intel_get_event_constraints(cpuc, idx, event);
+ if (c1 && (c1->flags & PERF_X86_EVENT_DYNAMIC)) {
+ bitmap_copy(c1->idxmsk, c2->idxmsk, X86_PMC_IDX_MAX);
+ c1->weight = c2->weight;
+ c2 = c1;
+ }
+
+ if (cpuc->excl_cntrs)
+ return intel_get_excl_constraints(cpuc, event, idx, c2);
+
+ return c2;
+}
+
+static void intel_put_excl_constraints(struct cpu_hw_events *cpuc,
+ struct perf_event *event)
+{
+ struct hw_perf_event *hwc = &event->hw;
+ struct intel_excl_cntrs *excl_cntrs = cpuc->excl_cntrs;
+ int tid = cpuc->excl_thread_id;
+ struct intel_excl_states *xl;
+
+ /*
+ * nothing needed if in group validation mode
+ */
+ if (cpuc->is_fake)
+ return;
+
+ if (WARN_ON_ONCE(!excl_cntrs))
+ return;
+
+ if (hwc->flags & PERF_X86_EVENT_EXCL_ACCT) {
+ hwc->flags &= ~PERF_X86_EVENT_EXCL_ACCT;
+ if (!--cpuc->n_excl)
+ WRITE_ONCE(excl_cntrs->has_exclusive[tid], 0);
+ }
+
+ /*
+ * If event was actually assigned, then mark the counter state as
+ * unused now.
+ */
+ if (hwc->idx >= 0) {
+ xl = &excl_cntrs->states[tid];
+
+ /*
+ * put_constraint may be called from x86_schedule_events()
+ * which already has the lock held so here make locking
+ * conditional.
+ */
+ if (!xl->sched_started)
+ raw_spin_lock(&excl_cntrs->lock);
+
+ xl->state[hwc->idx] = INTEL_EXCL_UNUSED;
+
+ if (!xl->sched_started)
+ raw_spin_unlock(&excl_cntrs->lock);
+ }
+}
+
+static void
+intel_put_shared_regs_event_constraints(struct cpu_hw_events *cpuc,
+ struct perf_event *event)
+{
+ struct hw_perf_event_extra *reg;
+
+ reg = &event->hw.extra_reg;
+ if (reg->idx != EXTRA_REG_NONE)
+ __intel_shared_reg_put_constraints(cpuc, reg);
+
+ reg = &event->hw.branch_reg;
+ if (reg->idx != EXTRA_REG_NONE)
+ __intel_shared_reg_put_constraints(cpuc, reg);
+}
+
+static void intel_put_event_constraints(struct cpu_hw_events *cpuc,
+ struct perf_event *event)
+{
+ intel_put_shared_regs_event_constraints(cpuc, event);
+
+ /*
+ * is PMU has exclusive counter restrictions, then
+ * all events are subject to and must call the
+ * put_excl_constraints() routine
+ */
+ if (cpuc->excl_cntrs)
+ intel_put_excl_constraints(cpuc, event);
+}
+
+static void intel_pebs_aliases_core2(struct perf_event *event)
+{
+ if ((event->hw.config & X86_RAW_EVENT_MASK) == 0x003c) {
+ /*
+ * Use an alternative encoding for CPU_CLK_UNHALTED.THREAD_P
+ * (0x003c) so that we can use it with PEBS.
+ *
+ * The regular CPU_CLK_UNHALTED.THREAD_P event (0x003c) isn't
+ * PEBS capable. However we can use INST_RETIRED.ANY_P
+ * (0x00c0), which is a PEBS capable event, to get the same
+ * count.
+ *
+ * INST_RETIRED.ANY_P counts the number of cycles that retires
+ * CNTMASK instructions. By setting CNTMASK to a value (16)
+ * larger than the maximum number of instructions that can be
+ * retired per cycle (4) and then inverting the condition, we
+ * count all cycles that retire 16 or less instructions, which
+ * is every cycle.
+ *
+ * Thereby we gain a PEBS capable cycle counter.
+ */
+ u64 alt_config = X86_CONFIG(.event=0xc0, .inv=1, .cmask=16);
+
+ alt_config |= (event->hw.config & ~X86_RAW_EVENT_MASK);
+ event->hw.config = alt_config;
+ }
+}
+
+static void intel_pebs_aliases_snb(struct perf_event *event)
+{
+ if ((event->hw.config & X86_RAW_EVENT_MASK) == 0x003c) {
+ /*
+ * Use an alternative encoding for CPU_CLK_UNHALTED.THREAD_P
+ * (0x003c) so that we can use it with PEBS.
+ *
+ * The regular CPU_CLK_UNHALTED.THREAD_P event (0x003c) isn't
+ * PEBS capable. However we can use UOPS_RETIRED.ALL
+ * (0x01c2), which is a PEBS capable event, to get the same
+ * count.
+ *
+ * UOPS_RETIRED.ALL counts the number of cycles that retires
+ * CNTMASK micro-ops. By setting CNTMASK to a value (16)
+ * larger than the maximum number of micro-ops that can be
+ * retired per cycle (4) and then inverting the condition, we
+ * count all cycles that retire 16 or less micro-ops, which
+ * is every cycle.
+ *
+ * Thereby we gain a PEBS capable cycle counter.
+ */
+ u64 alt_config = X86_CONFIG(.event=0xc2, .umask=0x01, .inv=1, .cmask=16);
+
+ alt_config |= (event->hw.config & ~X86_RAW_EVENT_MASK);
+ event->hw.config = alt_config;
+ }
+}
+
+static void intel_pebs_aliases_precdist(struct perf_event *event)
+{
+ if ((event->hw.config & X86_RAW_EVENT_MASK) == 0x003c) {
+ /*
+ * Use an alternative encoding for CPU_CLK_UNHALTED.THREAD_P
+ * (0x003c) so that we can use it with PEBS.
+ *
+ * The regular CPU_CLK_UNHALTED.THREAD_P event (0x003c) isn't
+ * PEBS capable. However we can use INST_RETIRED.PREC_DIST
+ * (0x01c0), which is a PEBS capable event, to get the same
+ * count.
+ *
+ * The PREC_DIST event has special support to minimize sample
+ * shadowing effects. One drawback is that it can be
+ * only programmed on counter 1, but that seems like an
+ * acceptable trade off.
+ */
+ u64 alt_config = X86_CONFIG(.event=0xc0, .umask=0x01, .inv=1, .cmask=16);
+
+ alt_config |= (event->hw.config & ~X86_RAW_EVENT_MASK);
+ event->hw.config = alt_config;
+ }
+}
+
+static void intel_pebs_aliases_ivb(struct perf_event *event)
+{
+ if (event->attr.precise_ip < 3)
+ return intel_pebs_aliases_snb(event);
+ return intel_pebs_aliases_precdist(event);
+}
+
+static void intel_pebs_aliases_skl(struct perf_event *event)
+{
+ if (event->attr.precise_ip < 3)
+ return intel_pebs_aliases_core2(event);
+ return intel_pebs_aliases_precdist(event);
+}
+
+static unsigned long intel_pmu_free_running_flags(struct perf_event *event)
+{
+ unsigned long flags = x86_pmu.free_running_flags;
+
+ if (event->attr.use_clockid)
+ flags &= ~PERF_SAMPLE_TIME;
+ return flags;
+}
+
+static int intel_pmu_hw_config(struct perf_event *event)
+{
+ int ret = x86_pmu_hw_config(event);
+
+ if (ret)
+ return ret;
+
+ if (event->attr.precise_ip) {
+ if (!event->attr.freq) {
+ event->hw.flags |= PERF_X86_EVENT_AUTO_RELOAD;
+ if (!(event->attr.sample_type &
+ ~intel_pmu_free_running_flags(event)))
+ event->hw.flags |= PERF_X86_EVENT_FREERUNNING;
+ }
+ if (x86_pmu.pebs_aliases)
+ x86_pmu.pebs_aliases(event);
+ }
+
+ if (needs_branch_stack(event)) {
+ ret = intel_pmu_setup_lbr_filter(event);
+ if (ret)
+ return ret;
+
+ /*
+ * BTS is set up earlier in this path, so don't account twice
+ */
+ if (!intel_pmu_has_bts(event)) {
+ /* disallow lbr if conflicting events are present */
+ if (x86_add_exclusive(x86_lbr_exclusive_lbr))
+ return -EBUSY;
+
+ event->destroy = hw_perf_lbr_event_destroy;
+ }
+ }
+
+ if (event->attr.type != PERF_TYPE_RAW)
+ return 0;
+
+ if (!(event->attr.config & ARCH_PERFMON_EVENTSEL_ANY))
+ return 0;
+
+ if (x86_pmu.version < 3)
+ return -EINVAL;
+
+ if (perf_paranoid_cpu() && !capable(CAP_SYS_ADMIN))
+ return -EACCES;
+
+ event->hw.config |= ARCH_PERFMON_EVENTSEL_ANY;
+
+ return 0;
+}
+
+struct perf_guest_switch_msr *perf_guest_get_msrs(int *nr)
+{
+ if (x86_pmu.guest_get_msrs)
+ return x86_pmu.guest_get_msrs(nr);
+ *nr = 0;
+ return NULL;
+}
+EXPORT_SYMBOL_GPL(perf_guest_get_msrs);
+
+static struct perf_guest_switch_msr *intel_guest_get_msrs(int *nr)
+{
+ struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
+ struct perf_guest_switch_msr *arr = cpuc->guest_switch_msrs;
+
+ arr[0].msr = MSR_CORE_PERF_GLOBAL_CTRL;
+ arr[0].host = x86_pmu.intel_ctrl & ~cpuc->intel_ctrl_guest_mask;
+ arr[0].guest = x86_pmu.intel_ctrl & ~cpuc->intel_ctrl_host_mask;
+ /*
+ * If PMU counter has PEBS enabled it is not enough to disable counter
+ * on a guest entry since PEBS memory write can overshoot guest entry
+ * and corrupt guest memory. Disabling PEBS solves the problem.
+ */
+ arr[1].msr = MSR_IA32_PEBS_ENABLE;
+ arr[1].host = cpuc->pebs_enabled;
+ arr[1].guest = 0;
+
+ *nr = 2;
+ return arr;
+}
+
+static struct perf_guest_switch_msr *core_guest_get_msrs(int *nr)
+{
+ struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
+ struct perf_guest_switch_msr *arr = cpuc->guest_switch_msrs;
+ int idx;
+
+ for (idx = 0; idx < x86_pmu.num_counters; idx++) {
+ struct perf_event *event = cpuc->events[idx];
+
+ arr[idx].msr = x86_pmu_config_addr(idx);
+ arr[idx].host = arr[idx].guest = 0;
+
+ if (!test_bit(idx, cpuc->active_mask))
+ continue;
+
+ arr[idx].host = arr[idx].guest =
+ event->hw.config | ARCH_PERFMON_EVENTSEL_ENABLE;
+
+ if (event->attr.exclude_host)
+ arr[idx].host &= ~ARCH_PERFMON_EVENTSEL_ENABLE;
+ else if (event->attr.exclude_guest)
+ arr[idx].guest &= ~ARCH_PERFMON_EVENTSEL_ENABLE;
+ }
+
+ *nr = x86_pmu.num_counters;
+ return arr;
+}
+
+static void core_pmu_enable_event(struct perf_event *event)
+{
+ if (!event->attr.exclude_host)
+ x86_pmu_enable_event(event);
+}
+
+static void core_pmu_enable_all(int added)
+{
+ struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
+ int idx;
+
+ for (idx = 0; idx < x86_pmu.num_counters; idx++) {
+ struct hw_perf_event *hwc = &cpuc->events[idx]->hw;
+
+ if (!test_bit(idx, cpuc->active_mask) ||
+ cpuc->events[idx]->attr.exclude_host)
+ continue;
+
+ __x86_pmu_enable_event(hwc, ARCH_PERFMON_EVENTSEL_ENABLE);
+ }
+}
+
+static int hsw_hw_config(struct perf_event *event)
+{
+ int ret = intel_pmu_hw_config(event);
+
+ if (ret)
+ return ret;
+ if (!boot_cpu_has(X86_FEATURE_RTM) && !boot_cpu_has(X86_FEATURE_HLE))
+ return 0;
+ event->hw.config |= event->attr.config & (HSW_IN_TX|HSW_IN_TX_CHECKPOINTED);
+
+ /*
+ * IN_TX/IN_TX-CP filters are not supported by the Haswell PMU with
+ * PEBS or in ANY thread mode. Since the results are non-sensical forbid
+ * this combination.
+ */
+ if ((event->hw.config & (HSW_IN_TX|HSW_IN_TX_CHECKPOINTED)) &&
+ ((event->hw.config & ARCH_PERFMON_EVENTSEL_ANY) ||
+ event->attr.precise_ip > 0))
+ return -EOPNOTSUPP;
+
+ if (event_is_checkpointed(event)) {
+ /*
+ * Sampling of checkpointed events can cause situations where
+ * the CPU constantly aborts because of a overflow, which is
+ * then checkpointed back and ignored. Forbid checkpointing
+ * for sampling.
+ *
+ * But still allow a long sampling period, so that perf stat
+ * from KVM works.
+ */
+ if (event->attr.sample_period > 0 &&
+ event->attr.sample_period < 0x7fffffff)
+ return -EOPNOTSUPP;
+ }
+ return 0;
+}
+
+static struct event_constraint counter2_constraint =
+ EVENT_CONSTRAINT(0, 0x4, 0);
+
+static struct event_constraint *
+hsw_get_event_constraints(struct cpu_hw_events *cpuc, int idx,
+ struct perf_event *event)
+{
+ struct event_constraint *c;
+
+ c = intel_get_event_constraints(cpuc, idx, event);
+
+ /* Handle special quirk on in_tx_checkpointed only in counter 2 */
+ if (event->hw.config & HSW_IN_TX_CHECKPOINTED) {
+ if (c->idxmsk64 & (1U << 2))
+ return &counter2_constraint;
+ return &emptyconstraint;
+ }
+
+ return c;
+}
+
+/*
+ * Broadwell:
+ *
+ * The INST_RETIRED.ALL period always needs to have lowest 6 bits cleared
+ * (BDM55) and it must not use a period smaller than 100 (BDM11). We combine
+ * the two to enforce a minimum period of 128 (the smallest value that has bits
+ * 0-5 cleared and >= 100).
+ *
+ * Because of how the code in x86_perf_event_set_period() works, the truncation
+ * of the lower 6 bits is 'harmless' as we'll occasionally add a longer period
+ * to make up for the 'lost' events due to carrying the 'error' in period_left.
+ *
+ * Therefore the effective (average) period matches the requested period,
+ * despite coarser hardware granularity.
+ */
+static unsigned bdw_limit_period(struct perf_event *event, unsigned left)
+{
+ if ((event->hw.config & INTEL_ARCH_EVENT_MASK) ==
+ X86_CONFIG(.event=0xc0, .umask=0x01)) {
+ if (left < 128)
+ left = 128;
+ left &= ~0x3fu;
+ }
+ return left;
+}
+
+PMU_FORMAT_ATTR(event, "config:0-7" );
+PMU_FORMAT_ATTR(umask, "config:8-15" );
+PMU_FORMAT_ATTR(edge, "config:18" );
+PMU_FORMAT_ATTR(pc, "config:19" );
+PMU_FORMAT_ATTR(any, "config:21" ); /* v3 + */
+PMU_FORMAT_ATTR(inv, "config:23" );
+PMU_FORMAT_ATTR(cmask, "config:24-31" );
+PMU_FORMAT_ATTR(in_tx, "config:32");
+PMU_FORMAT_ATTR(in_tx_cp, "config:33");
+
+static struct attribute *intel_arch_formats_attr[] = {
+ &format_attr_event.attr,
+ &format_attr_umask.attr,
+ &format_attr_edge.attr,
+ &format_attr_pc.attr,
+ &format_attr_inv.attr,
+ &format_attr_cmask.attr,
+ NULL,
+};
+
+ssize_t intel_event_sysfs_show(char *page, u64 config)
+{
+ u64 event = (config & ARCH_PERFMON_EVENTSEL_EVENT);
+
+ return x86_event_sysfs_show(page, config, event);
+}
+
+struct intel_shared_regs *allocate_shared_regs(int cpu)
+{
+ struct intel_shared_regs *regs;
+ int i;
+
+ regs = kzalloc_node(sizeof(struct intel_shared_regs),
+ GFP_KERNEL, cpu_to_node(cpu));
+ if (regs) {
+ /*
+ * initialize the locks to keep lockdep happy
+ */
+ for (i = 0; i < EXTRA_REG_MAX; i++)
+ raw_spin_lock_init(&regs->regs[i].lock);
+
+ regs->core_id = -1;
+ }
+ return regs;
+}
+
+static struct intel_excl_cntrs *allocate_excl_cntrs(int cpu)
+{
+ struct intel_excl_cntrs *c;
+
+ c = kzalloc_node(sizeof(struct intel_excl_cntrs),
+ GFP_KERNEL, cpu_to_node(cpu));
+ if (c) {
+ raw_spin_lock_init(&c->lock);
+ c->core_id = -1;
+ }
+ return c;
+}
+
+static int intel_pmu_cpu_prepare(int cpu)
+{
+ struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu);
+
+ if (x86_pmu.extra_regs || x86_pmu.lbr_sel_map) {
+ cpuc->shared_regs = allocate_shared_regs(cpu);
+ if (!cpuc->shared_regs)
+ goto err;
+ }
+
+ if (x86_pmu.flags & PMU_FL_EXCL_CNTRS) {
+ size_t sz = X86_PMC_IDX_MAX * sizeof(struct event_constraint);
+
+ cpuc->constraint_list = kzalloc(sz, GFP_KERNEL);
+ if (!cpuc->constraint_list)
+ goto err_shared_regs;
+
+ cpuc->excl_cntrs = allocate_excl_cntrs(cpu);
+ if (!cpuc->excl_cntrs)
+ goto err_constraint_list;
+
+ cpuc->excl_thread_id = 0;
+ }
+
+ return NOTIFY_OK;
+
+err_constraint_list:
+ kfree(cpuc->constraint_list);
+ cpuc->constraint_list = NULL;
+
+err_shared_regs:
+ kfree(cpuc->shared_regs);
+ cpuc->shared_regs = NULL;
+
+err:
+ return NOTIFY_BAD;
+}
+
+static void intel_pmu_cpu_starting(int cpu)
+{
+ struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu);
+ int core_id = topology_core_id(cpu);
+ int i;
+
+ init_debug_store_on_cpu(cpu);
+ /*
+ * Deal with CPUs that don't clear their LBRs on power-up.
+ */
+ intel_pmu_lbr_reset();
+
+ cpuc->lbr_sel = NULL;
+
+ if (!cpuc->shared_regs)
+ return;
+
+ if (!(x86_pmu.flags & PMU_FL_NO_HT_SHARING)) {
+ for_each_cpu(i, topology_sibling_cpumask(cpu)) {
+ struct intel_shared_regs *pc;
+
+ pc = per_cpu(cpu_hw_events, i).shared_regs;
+ if (pc && pc->core_id == core_id) {
+ cpuc->kfree_on_online[0] = cpuc->shared_regs;
+ cpuc->shared_regs = pc;
+ break;
+ }
+ }
+ cpuc->shared_regs->core_id = core_id;
+ cpuc->shared_regs->refcnt++;
+ }
+
+ if (x86_pmu.lbr_sel_map)
+ cpuc->lbr_sel = &cpuc->shared_regs->regs[EXTRA_REG_LBR];
+
+ if (x86_pmu.flags & PMU_FL_EXCL_CNTRS) {
+ for_each_cpu(i, topology_sibling_cpumask(cpu)) {
+ struct intel_excl_cntrs *c;
+
+ c = per_cpu(cpu_hw_events, i).excl_cntrs;
+ if (c && c->core_id == core_id) {
+ cpuc->kfree_on_online[1] = cpuc->excl_cntrs;
+ cpuc->excl_cntrs = c;
+ cpuc->excl_thread_id = 1;
+ break;
+ }
+ }
+ cpuc->excl_cntrs->core_id = core_id;
+ cpuc->excl_cntrs->refcnt++;
+ }
+}
+
+static void free_excl_cntrs(int cpu)
+{
+ struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu);
+ struct intel_excl_cntrs *c;
+
+ c = cpuc->excl_cntrs;
+ if (c) {
+ if (c->core_id == -1 || --c->refcnt == 0)
+ kfree(c);
+ cpuc->excl_cntrs = NULL;
+ kfree(cpuc->constraint_list);
+ cpuc->constraint_list = NULL;
+ }
+}
+
+static void intel_pmu_cpu_dying(int cpu)
+{
+ struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu);
+ struct intel_shared_regs *pc;
+
+ pc = cpuc->shared_regs;
+ if (pc) {
+ if (pc->core_id == -1 || --pc->refcnt == 0)
+ kfree(pc);
+ cpuc->shared_regs = NULL;
+ }
+
+ free_excl_cntrs(cpu);
+
+ fini_debug_store_on_cpu(cpu);
+}
+
+static void intel_pmu_sched_task(struct perf_event_context *ctx,
+ bool sched_in)
+{
+ if (x86_pmu.pebs_active)
+ intel_pmu_pebs_sched_task(ctx, sched_in);
+ if (x86_pmu.lbr_nr)
+ intel_pmu_lbr_sched_task(ctx, sched_in);
+}
+
+PMU_FORMAT_ATTR(offcore_rsp, "config1:0-63");
+
+PMU_FORMAT_ATTR(ldlat, "config1:0-15");
+
+PMU_FORMAT_ATTR(frontend, "config1:0-23");
+
+static struct attribute *intel_arch3_formats_attr[] = {
+ &format_attr_event.attr,
+ &format_attr_umask.attr,
+ &format_attr_edge.attr,
+ &format_attr_pc.attr,
+ &format_attr_any.attr,
+ &format_attr_inv.attr,
+ &format_attr_cmask.attr,
+ &format_attr_in_tx.attr,
+ &format_attr_in_tx_cp.attr,
+
+ &format_attr_offcore_rsp.attr, /* XXX do NHM/WSM + SNB breakout */
+ &format_attr_ldlat.attr, /* PEBS load latency */
+ NULL,
+};
+
+static struct attribute *skl_format_attr[] = {
+ &format_attr_frontend.attr,
+ NULL,
+};
+
+static __initconst const struct x86_pmu core_pmu = {
+ .name = "core",
+ .handle_irq = x86_pmu_handle_irq,
+ .disable_all = x86_pmu_disable_all,
+ .enable_all = core_pmu_enable_all,
+ .enable = core_pmu_enable_event,
+ .disable = x86_pmu_disable_event,
+ .hw_config = x86_pmu_hw_config,
+ .schedule_events = x86_schedule_events,
+ .eventsel = MSR_ARCH_PERFMON_EVENTSEL0,
+ .perfctr = MSR_ARCH_PERFMON_PERFCTR0,
+ .event_map = intel_pmu_event_map,
+ .max_events = ARRAY_SIZE(intel_perfmon_event_map),
+ .apic = 1,
+ .free_running_flags = PEBS_FREERUNNING_FLAGS,
+
+ /*
+ * Intel PMCs cannot be accessed sanely above 32-bit width,
+ * so we install an artificial 1<<31 period regardless of
+ * the generic event period:
+ */
+ .max_period = (1ULL<<31) - 1,
+ .get_event_constraints = intel_get_event_constraints,
+ .put_event_constraints = intel_put_event_constraints,
+ .event_constraints = intel_core_event_constraints,
+ .guest_get_msrs = core_guest_get_msrs,
+ .format_attrs = intel_arch_formats_attr,
+ .events_sysfs_show = intel_event_sysfs_show,
+
+ /*
+ * Virtual (or funny metal) CPU can define x86_pmu.extra_regs
+ * together with PMU version 1 and thus be using core_pmu with
+ * shared_regs. We need following callbacks here to allocate
+ * it properly.
+ */
+ .cpu_prepare = intel_pmu_cpu_prepare,
+ .cpu_starting = intel_pmu_cpu_starting,
+ .cpu_dying = intel_pmu_cpu_dying,
+};
+
+static __initconst const struct x86_pmu intel_pmu = {
+ .name = "Intel",
+ .handle_irq = intel_pmu_handle_irq,
+ .disable_all = intel_pmu_disable_all,
+ .enable_all = intel_pmu_enable_all,
+ .enable = intel_pmu_enable_event,
+ .disable = intel_pmu_disable_event,
+ .hw_config = intel_pmu_hw_config,
+ .schedule_events = x86_schedule_events,
+ .eventsel = MSR_ARCH_PERFMON_EVENTSEL0,
+ .perfctr = MSR_ARCH_PERFMON_PERFCTR0,
+ .event_map = intel_pmu_event_map,
+ .max_events = ARRAY_SIZE(intel_perfmon_event_map),
+ .apic = 1,
+ .free_running_flags = PEBS_FREERUNNING_FLAGS,
+ /*
+ * Intel PMCs cannot be accessed sanely above 32 bit width,
+ * so we install an artificial 1<<31 period regardless of
+ * the generic event period:
+ */
+ .max_period = (1ULL << 31) - 1,
+ .get_event_constraints = intel_get_event_constraints,
+ .put_event_constraints = intel_put_event_constraints,
+ .pebs_aliases = intel_pebs_aliases_core2,
+
+ .format_attrs = intel_arch3_formats_attr,
+ .events_sysfs_show = intel_event_sysfs_show,
+
+ .cpu_prepare = intel_pmu_cpu_prepare,
+ .cpu_starting = intel_pmu_cpu_starting,
+ .cpu_dying = intel_pmu_cpu_dying,
+ .guest_get_msrs = intel_guest_get_msrs,
+ .sched_task = intel_pmu_sched_task,
+};
+
+static __init void intel_clovertown_quirk(void)
+{
+ /*
+ * PEBS is unreliable due to:
+ *
+ * AJ67 - PEBS may experience CPL leaks
+ * AJ68 - PEBS PMI may be delayed by one event
+ * AJ69 - GLOBAL_STATUS[62] will only be set when DEBUGCTL[12]
+ * AJ106 - FREEZE_LBRS_ON_PMI doesn't work in combination with PEBS
+ *
+ * AJ67 could be worked around by restricting the OS/USR flags.
+ * AJ69 could be worked around by setting PMU_FREEZE_ON_PMI.
+ *
+ * AJ106 could possibly be worked around by not allowing LBR
+ * usage from PEBS, including the fixup.
+ * AJ68 could possibly be worked around by always programming
+ * a pebs_event_reset[0] value and coping with the lost events.
+ *
+ * But taken together it might just make sense to not enable PEBS on
+ * these chips.
+ */
+ pr_warn("PEBS disabled due to CPU errata\n");
+ x86_pmu.pebs = 0;
+ x86_pmu.pebs_constraints = NULL;
+}
+
+static int intel_snb_pebs_broken(int cpu)
+{
+ u32 rev = UINT_MAX; /* default to broken for unknown models */
+
+ switch (cpu_data(cpu).x86_model) {
+ case 42: /* SNB */
+ rev = 0x28;
+ break;
+
+ case 45: /* SNB-EP */
+ switch (cpu_data(cpu).x86_mask) {
+ case 6: rev = 0x618; break;
+ case 7: rev = 0x70c; break;
+ }
+ }
+
+ return (cpu_data(cpu).microcode < rev);
+}
+
+static void intel_snb_check_microcode(void)
+{
+ int pebs_broken = 0;
+ int cpu;
+
+ get_online_cpus();
+ for_each_online_cpu(cpu) {
+ if ((pebs_broken = intel_snb_pebs_broken(cpu)))
+ break;
+ }
+ put_online_cpus();
+
+ if (pebs_broken == x86_pmu.pebs_broken)
+ return;
+
+ /*
+ * Serialized by the microcode lock..
+ */
+ if (x86_pmu.pebs_broken) {
+ pr_info("PEBS enabled due to microcode update\n");
+ x86_pmu.pebs_broken = 0;
+ } else {
+ pr_info("PEBS disabled due to CPU errata, please upgrade microcode\n");
+ x86_pmu.pebs_broken = 1;
+ }
+}
+
+/*
+ * Under certain circumstances, access certain MSR may cause #GP.
+ * The function tests if the input MSR can be safely accessed.
+ */
+static bool check_msr(unsigned long msr, u64 mask)
+{
+ u64 val_old, val_new, val_tmp;
+
+ /*
+ * Read the current value, change it and read it back to see if it
+ * matches, this is needed to detect certain hardware emulators
+ * (qemu/kvm) that don't trap on the MSR access and always return 0s.
+ */
+ if (rdmsrl_safe(msr, &val_old))
+ return false;
+
+ /*
+ * Only change the bits which can be updated by wrmsrl.
+ */
+ val_tmp = val_old ^ mask;
+ if (wrmsrl_safe(msr, val_tmp) ||
+ rdmsrl_safe(msr, &val_new))
+ return false;
+
+ if (val_new != val_tmp)
+ return false;
+
+ /* Here it's sure that the MSR can be safely accessed.
+ * Restore the old value and return.
+ */
+ wrmsrl(msr, val_old);
+
+ return true;
+}
+
+static __init void intel_sandybridge_quirk(void)
+{
+ x86_pmu.check_microcode = intel_snb_check_microcode;
+ intel_snb_check_microcode();
+}
+
+static const struct { int id; char *name; } intel_arch_events_map[] __initconst = {
+ { PERF_COUNT_HW_CPU_CYCLES, "cpu cycles" },
+ { PERF_COUNT_HW_INSTRUCTIONS, "instructions" },
+ { PERF_COUNT_HW_BUS_CYCLES, "bus cycles" },
+ { PERF_COUNT_HW_CACHE_REFERENCES, "cache references" },
+ { PERF_COUNT_HW_CACHE_MISSES, "cache misses" },
+ { PERF_COUNT_HW_BRANCH_INSTRUCTIONS, "branch instructions" },
+ { PERF_COUNT_HW_BRANCH_MISSES, "branch misses" },
+};
+
+static __init void intel_arch_events_quirk(void)
+{
+ int bit;
+
+ /* disable event that reported as not presend by cpuid */
+ for_each_set_bit(bit, x86_pmu.events_mask, ARRAY_SIZE(intel_arch_events_map)) {
+ intel_perfmon_event_map[intel_arch_events_map[bit].id] = 0;
+ pr_warn("CPUID marked event: \'%s\' unavailable\n",
+ intel_arch_events_map[bit].name);
+ }
+}
+
+static __init void intel_nehalem_quirk(void)
+{
+ union cpuid10_ebx ebx;
+
+ ebx.full = x86_pmu.events_maskl;
+ if (ebx.split.no_branch_misses_retired) {
+ /*
+ * Erratum AAJ80 detected, we work it around by using
+ * the BR_MISP_EXEC.ANY event. This will over-count
+ * branch-misses, but it's still much better than the
+ * architectural event which is often completely bogus:
+ */
+ intel_perfmon_event_map[PERF_COUNT_HW_BRANCH_MISSES] = 0x7f89;
+ ebx.split.no_branch_misses_retired = 0;
+ x86_pmu.events_maskl = ebx.full;
+ pr_info("CPU erratum AAJ80 worked around\n");
+ }
+}
+
+/*
+ * enable software workaround for errata:
+ * SNB: BJ122
+ * IVB: BV98
+ * HSW: HSD29
+ *
+ * Only needed when HT is enabled. However detecting
+ * if HT is enabled is difficult (model specific). So instead,
+ * we enable the workaround in the early boot, and verify if
+ * it is needed in a later initcall phase once we have valid
+ * topology information to check if HT is actually enabled
+ */
+static __init void intel_ht_bug(void)
+{
+ x86_pmu.flags |= PMU_FL_EXCL_CNTRS | PMU_FL_EXCL_ENABLED;
+
+ x86_pmu.start_scheduling = intel_start_scheduling;
+ x86_pmu.commit_scheduling = intel_commit_scheduling;
+ x86_pmu.stop_scheduling = intel_stop_scheduling;
+}
+
+EVENT_ATTR_STR(mem-loads, mem_ld_hsw, "event=0xcd,umask=0x1,ldlat=3");
+EVENT_ATTR_STR(mem-stores, mem_st_hsw, "event=0xd0,umask=0x82")
+
+/* Haswell special events */
+EVENT_ATTR_STR(tx-start, tx_start, "event=0xc9,umask=0x1");
+EVENT_ATTR_STR(tx-commit, tx_commit, "event=0xc9,umask=0x2");
+EVENT_ATTR_STR(tx-abort, tx_abort, "event=0xc9,umask=0x4");
+EVENT_ATTR_STR(tx-capacity, tx_capacity, "event=0x54,umask=0x2");
+EVENT_ATTR_STR(tx-conflict, tx_conflict, "event=0x54,umask=0x1");
+EVENT_ATTR_STR(el-start, el_start, "event=0xc8,umask=0x1");
+EVENT_ATTR_STR(el-commit, el_commit, "event=0xc8,umask=0x2");
+EVENT_ATTR_STR(el-abort, el_abort, "event=0xc8,umask=0x4");
+EVENT_ATTR_STR(el-capacity, el_capacity, "event=0x54,umask=0x2");
+EVENT_ATTR_STR(el-conflict, el_conflict, "event=0x54,umask=0x1");
+EVENT_ATTR_STR(cycles-t, cycles_t, "event=0x3c,in_tx=1");
+EVENT_ATTR_STR(cycles-ct, cycles_ct, "event=0x3c,in_tx=1,in_tx_cp=1");
+
+static struct attribute *hsw_events_attrs[] = {
+ EVENT_PTR(tx_start),
+ EVENT_PTR(tx_commit),
+ EVENT_PTR(tx_abort),
+ EVENT_PTR(tx_capacity),
+ EVENT_PTR(tx_conflict),
+ EVENT_PTR(el_start),
+ EVENT_PTR(el_commit),
+ EVENT_PTR(el_abort),
+ EVENT_PTR(el_capacity),
+ EVENT_PTR(el_conflict),
+ EVENT_PTR(cycles_t),
+ EVENT_PTR(cycles_ct),
+ EVENT_PTR(mem_ld_hsw),
+ EVENT_PTR(mem_st_hsw),
+ NULL
+};
+
+__init int intel_pmu_init(void)
+{
+ union cpuid10_edx edx;
+ union cpuid10_eax eax;
+ union cpuid10_ebx ebx;
+ struct event_constraint *c;
+ unsigned int unused;
+ struct extra_reg *er;
+ int version, i;
+
+ if (!cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) {
+ switch (boot_cpu_data.x86) {
+ case 0x6:
+ return p6_pmu_init();
+ case 0xb:
+ return knc_pmu_init();
+ case 0xf:
+ return p4_pmu_init();
+ }
+ return -ENODEV;
+ }
+
+ /*
+ * Check whether the Architectural PerfMon supports
+ * Branch Misses Retired hw_event or not.
+ */
+ cpuid(10, &eax.full, &ebx.full, &unused, &edx.full);
+ if (eax.split.mask_length < ARCH_PERFMON_EVENTS_COUNT)
+ return -ENODEV;
+
+ version = eax.split.version_id;
+ if (version < 2)
+ x86_pmu = core_pmu;
+ else
+ x86_pmu = intel_pmu;
+
+ x86_pmu.version = version;
+ x86_pmu.num_counters = eax.split.num_counters;
+ x86_pmu.cntval_bits = eax.split.bit_width;
+ x86_pmu.cntval_mask = (1ULL << eax.split.bit_width) - 1;
+
+ x86_pmu.events_maskl = ebx.full;
+ x86_pmu.events_mask_len = eax.split.mask_length;
+
+ x86_pmu.max_pebs_events = min_t(unsigned, MAX_PEBS_EVENTS, x86_pmu.num_counters);
+
+ /*
+ * Quirk: v2 perfmon does not report fixed-purpose events, so
+ * assume at least 3 events:
+ */
+ if (version > 1)
+ x86_pmu.num_counters_fixed = max((int)edx.split.num_counters_fixed, 3);
+
+ if (boot_cpu_has(X86_FEATURE_PDCM)) {
+ u64 capabilities;
+
+ rdmsrl(MSR_IA32_PERF_CAPABILITIES, capabilities);
+ x86_pmu.intel_cap.capabilities = capabilities;
+ }
+
+ intel_ds_init();
+
+ x86_add_quirk(intel_arch_events_quirk); /* Install first, so it runs last */
+
+ /*
+ * Install the hw-cache-events table:
+ */
+ switch (boot_cpu_data.x86_model) {
+ case 14: /* 65nm Core "Yonah" */
+ pr_cont("Core events, ");
+ break;
+
+ case 15: /* 65nm Core2 "Merom" */
+ x86_add_quirk(intel_clovertown_quirk);
+ case 22: /* 65nm Core2 "Merom-L" */
+ case 23: /* 45nm Core2 "Penryn" */
+ case 29: /* 45nm Core2 "Dunnington (MP) */
+ memcpy(hw_cache_event_ids, core2_hw_cache_event_ids,
+ sizeof(hw_cache_event_ids));
+
+ intel_pmu_lbr_init_core();
+
+ x86_pmu.event_constraints = intel_core2_event_constraints;
+ x86_pmu.pebs_constraints = intel_core2_pebs_event_constraints;
+ pr_cont("Core2 events, ");
+ break;
+
+ case 30: /* 45nm Nehalem */
+ case 26: /* 45nm Nehalem-EP */
+ case 46: /* 45nm Nehalem-EX */
+ memcpy(hw_cache_event_ids, nehalem_hw_cache_event_ids,
+ sizeof(hw_cache_event_ids));
+ memcpy(hw_cache_extra_regs, nehalem_hw_cache_extra_regs,
+ sizeof(hw_cache_extra_regs));
+
+ intel_pmu_lbr_init_nhm();
+
+ x86_pmu.event_constraints = intel_nehalem_event_constraints;
+ x86_pmu.pebs_constraints = intel_nehalem_pebs_event_constraints;
+ x86_pmu.enable_all = intel_pmu_nhm_enable_all;
+ x86_pmu.extra_regs = intel_nehalem_extra_regs;
+
+ x86_pmu.cpu_events = nhm_events_attrs;
+
+ /* UOPS_ISSUED.STALLED_CYCLES */
+ intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] =
+ X86_CONFIG(.event=0x0e, .umask=0x01, .inv=1, .cmask=1);
+ /* UOPS_EXECUTED.CORE_ACTIVE_CYCLES,c=1,i=1 */
+ intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_BACKEND] =
+ X86_CONFIG(.event=0xb1, .umask=0x3f, .inv=1, .cmask=1);
+
+ intel_pmu_pebs_data_source_nhm();
+ x86_add_quirk(intel_nehalem_quirk);
+
+ pr_cont("Nehalem events, ");
+ break;
+
+ case 28: /* 45nm Atom "Pineview" */
+ case 38: /* 45nm Atom "Lincroft" */
+ case 39: /* 32nm Atom "Penwell" */
+ case 53: /* 32nm Atom "Cloverview" */
+ case 54: /* 32nm Atom "Cedarview" */
+ memcpy(hw_cache_event_ids, atom_hw_cache_event_ids,
+ sizeof(hw_cache_event_ids));
+
+ intel_pmu_lbr_init_atom();
+
+ x86_pmu.event_constraints = intel_gen_event_constraints;
+ x86_pmu.pebs_constraints = intel_atom_pebs_event_constraints;
+ x86_pmu.pebs_aliases = intel_pebs_aliases_core2;
+ pr_cont("Atom events, ");
+ break;
+
+ case 55: /* 22nm Atom "Silvermont" */
+ case 76: /* 14nm Atom "Airmont" */
+ case 77: /* 22nm Atom "Silvermont Avoton/Rangely" */
+ memcpy(hw_cache_event_ids, slm_hw_cache_event_ids,
+ sizeof(hw_cache_event_ids));
+ memcpy(hw_cache_extra_regs, slm_hw_cache_extra_regs,
+ sizeof(hw_cache_extra_regs));
+
+ intel_pmu_lbr_init_atom();
+
+ x86_pmu.event_constraints = intel_slm_event_constraints;
+ x86_pmu.pebs_constraints = intel_slm_pebs_event_constraints;
+ x86_pmu.extra_regs = intel_slm_extra_regs;
+ x86_pmu.flags |= PMU_FL_HAS_RSP_1;
+ pr_cont("Silvermont events, ");
+ break;
+
+ case 37: /* 32nm Westmere */
+ case 44: /* 32nm Westmere-EP */
+ case 47: /* 32nm Westmere-EX */
+ memcpy(hw_cache_event_ids, westmere_hw_cache_event_ids,
+ sizeof(hw_cache_event_ids));
+ memcpy(hw_cache_extra_regs, nehalem_hw_cache_extra_regs,
+ sizeof(hw_cache_extra_regs));
+
+ intel_pmu_lbr_init_nhm();
+
+ x86_pmu.event_constraints = intel_westmere_event_constraints;
+ x86_pmu.enable_all = intel_pmu_nhm_enable_all;
+ x86_pmu.pebs_constraints = intel_westmere_pebs_event_constraints;
+ x86_pmu.extra_regs = intel_westmere_extra_regs;
+ x86_pmu.flags |= PMU_FL_HAS_RSP_1;
+
+ x86_pmu.cpu_events = nhm_events_attrs;
+
+ /* UOPS_ISSUED.STALLED_CYCLES */
+ intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] =
+ X86_CONFIG(.event=0x0e, .umask=0x01, .inv=1, .cmask=1);
+ /* UOPS_EXECUTED.CORE_ACTIVE_CYCLES,c=1,i=1 */
+ intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_BACKEND] =
+ X86_CONFIG(.event=0xb1, .umask=0x3f, .inv=1, .cmask=1);
+
+ intel_pmu_pebs_data_source_nhm();
+ pr_cont("Westmere events, ");
+ break;
+
+ case 42: /* 32nm SandyBridge */
+ case 45: /* 32nm SandyBridge-E/EN/EP */
+ x86_add_quirk(intel_sandybridge_quirk);
+ x86_add_quirk(intel_ht_bug);
+ memcpy(hw_cache_event_ids, snb_hw_cache_event_ids,
+ sizeof(hw_cache_event_ids));
+ memcpy(hw_cache_extra_regs, snb_hw_cache_extra_regs,
+ sizeof(hw_cache_extra_regs));
+
+ intel_pmu_lbr_init_snb();
+
+ x86_pmu.event_constraints = intel_snb_event_constraints;
+ x86_pmu.pebs_constraints = intel_snb_pebs_event_constraints;
+ x86_pmu.pebs_aliases = intel_pebs_aliases_snb;
+ if (boot_cpu_data.x86_model == 45)
+ x86_pmu.extra_regs = intel_snbep_extra_regs;
+ else
+ x86_pmu.extra_regs = intel_snb_extra_regs;
+
+
+ /* all extra regs are per-cpu when HT is on */
+ x86_pmu.flags |= PMU_FL_HAS_RSP_1;
+ x86_pmu.flags |= PMU_FL_NO_HT_SHARING;
+
+ x86_pmu.cpu_events = snb_events_attrs;
+
+ /* UOPS_ISSUED.ANY,c=1,i=1 to count stall cycles */
+ intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] =
+ X86_CONFIG(.event=0x0e, .umask=0x01, .inv=1, .cmask=1);
+ /* UOPS_DISPATCHED.THREAD,c=1,i=1 to count stall cycles*/
+ intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_BACKEND] =
+ X86_CONFIG(.event=0xb1, .umask=0x01, .inv=1, .cmask=1);
+
+ pr_cont("SandyBridge events, ");
+ break;
+
+ case 58: /* 22nm IvyBridge */
+ case 62: /* 22nm IvyBridge-EP/EX */
+ x86_add_quirk(intel_ht_bug);
+ memcpy(hw_cache_event_ids, snb_hw_cache_event_ids,
+ sizeof(hw_cache_event_ids));
+ /* dTLB-load-misses on IVB is different than SNB */
+ hw_cache_event_ids[C(DTLB)][C(OP_READ)][C(RESULT_MISS)] = 0x8108; /* DTLB_LOAD_MISSES.DEMAND_LD_MISS_CAUSES_A_WALK */
+
+ memcpy(hw_cache_extra_regs, snb_hw_cache_extra_regs,
+ sizeof(hw_cache_extra_regs));
+
+ intel_pmu_lbr_init_snb();
+
+ x86_pmu.event_constraints = intel_ivb_event_constraints;
+ x86_pmu.pebs_constraints = intel_ivb_pebs_event_constraints;
+ x86_pmu.pebs_aliases = intel_pebs_aliases_ivb;
+ x86_pmu.pebs_prec_dist = true;
+ if (boot_cpu_data.x86_model == 62)
+ x86_pmu.extra_regs = intel_snbep_extra_regs;
+ else
+ x86_pmu.extra_regs = intel_snb_extra_regs;
+ /* all extra regs are per-cpu when HT is on */
+ x86_pmu.flags |= PMU_FL_HAS_RSP_1;
+ x86_pmu.flags |= PMU_FL_NO_HT_SHARING;
+
+ x86_pmu.cpu_events = snb_events_attrs;
+
+ /* UOPS_ISSUED.ANY,c=1,i=1 to count stall cycles */
+ intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] =
+ X86_CONFIG(.event=0x0e, .umask=0x01, .inv=1, .cmask=1);
+
+ pr_cont("IvyBridge events, ");
+ break;
+
+
+ case 60: /* 22nm Haswell Core */
+ case 63: /* 22nm Haswell Server */
+ case 69: /* 22nm Haswell ULT */
+ case 70: /* 22nm Haswell + GT3e (Intel Iris Pro graphics) */
+ x86_add_quirk(intel_ht_bug);
+ x86_pmu.late_ack = true;
+ memcpy(hw_cache_event_ids, hsw_hw_cache_event_ids, sizeof(hw_cache_event_ids));
+ memcpy(hw_cache_extra_regs, hsw_hw_cache_extra_regs, sizeof(hw_cache_extra_regs));
+
+ intel_pmu_lbr_init_hsw();
+
+ x86_pmu.event_constraints = intel_hsw_event_constraints;
+ x86_pmu.pebs_constraints = intel_hsw_pebs_event_constraints;
+ x86_pmu.extra_regs = intel_snbep_extra_regs;
+ x86_pmu.pebs_aliases = intel_pebs_aliases_ivb;
+ x86_pmu.pebs_prec_dist = true;
+ /* all extra regs are per-cpu when HT is on */
+ x86_pmu.flags |= PMU_FL_HAS_RSP_1;
+ x86_pmu.flags |= PMU_FL_NO_HT_SHARING;
+
+ x86_pmu.hw_config = hsw_hw_config;
+ x86_pmu.get_event_constraints = hsw_get_event_constraints;
+ x86_pmu.cpu_events = hsw_events_attrs;
+ x86_pmu.lbr_double_abort = true;
+ pr_cont("Haswell events, ");
+ break;
+
+ case 61: /* 14nm Broadwell Core-M */
+ case 86: /* 14nm Broadwell Xeon D */
+ case 71: /* 14nm Broadwell + GT3e (Intel Iris Pro graphics) */
+ case 79: /* 14nm Broadwell Server */
+ x86_pmu.late_ack = true;
+ memcpy(hw_cache_event_ids, hsw_hw_cache_event_ids, sizeof(hw_cache_event_ids));
+ memcpy(hw_cache_extra_regs, hsw_hw_cache_extra_regs, sizeof(hw_cache_extra_regs));
+
+ /* L3_MISS_LOCAL_DRAM is BIT(26) in Broadwell */
+ hw_cache_extra_regs[C(LL)][C(OP_READ)][C(RESULT_MISS)] = HSW_DEMAND_READ |
+ BDW_L3_MISS|HSW_SNOOP_DRAM;
+ hw_cache_extra_regs[C(LL)][C(OP_WRITE)][C(RESULT_MISS)] = HSW_DEMAND_WRITE|BDW_L3_MISS|
+ HSW_SNOOP_DRAM;
+ hw_cache_extra_regs[C(NODE)][C(OP_READ)][C(RESULT_ACCESS)] = HSW_DEMAND_READ|
+ BDW_L3_MISS_LOCAL|HSW_SNOOP_DRAM;
+ hw_cache_extra_regs[C(NODE)][C(OP_WRITE)][C(RESULT_ACCESS)] = HSW_DEMAND_WRITE|
+ BDW_L3_MISS_LOCAL|HSW_SNOOP_DRAM;
+
+ intel_pmu_lbr_init_hsw();
+
+ x86_pmu.event_constraints = intel_bdw_event_constraints;
+ x86_pmu.pebs_constraints = intel_bdw_pebs_event_constraints;
+ x86_pmu.extra_regs = intel_snbep_extra_regs;
+ x86_pmu.pebs_aliases = intel_pebs_aliases_ivb;
+ x86_pmu.pebs_prec_dist = true;
+ /* all extra regs are per-cpu when HT is on */
+ x86_pmu.flags |= PMU_FL_HAS_RSP_1;
+ x86_pmu.flags |= PMU_FL_NO_HT_SHARING;
+
+ x86_pmu.hw_config = hsw_hw_config;
+ x86_pmu.get_event_constraints = hsw_get_event_constraints;
+ x86_pmu.cpu_events = hsw_events_attrs;
+ x86_pmu.limit_period = bdw_limit_period;
+ pr_cont("Broadwell events, ");
+ break;
+
+ case 87: /* Knights Landing Xeon Phi */
+ memcpy(hw_cache_event_ids,
+ slm_hw_cache_event_ids, sizeof(hw_cache_event_ids));
+ memcpy(hw_cache_extra_regs,
+ knl_hw_cache_extra_regs, sizeof(hw_cache_extra_regs));
+ intel_pmu_lbr_init_knl();
+
+ x86_pmu.event_constraints = intel_slm_event_constraints;
+ x86_pmu.pebs_constraints = intel_slm_pebs_event_constraints;
+ x86_pmu.extra_regs = intel_knl_extra_regs;
+
+ /* all extra regs are per-cpu when HT is on */
+ x86_pmu.flags |= PMU_FL_HAS_RSP_1;
+ x86_pmu.flags |= PMU_FL_NO_HT_SHARING;
+
+ pr_cont("Knights Landing events, ");
+ break;
+
+ case 142: /* 14nm Kabylake Mobile */
+ case 158: /* 14nm Kabylake Desktop */
+ case 78: /* 14nm Skylake Mobile */
+ case 94: /* 14nm Skylake Desktop */
+ case 85: /* 14nm Skylake Server */
+ x86_pmu.late_ack = true;
+ memcpy(hw_cache_event_ids, skl_hw_cache_event_ids, sizeof(hw_cache_event_ids));
+ memcpy(hw_cache_extra_regs, skl_hw_cache_extra_regs, sizeof(hw_cache_extra_regs));
+ intel_pmu_lbr_init_skl();
+
+ x86_pmu.event_constraints = intel_skl_event_constraints;
+ x86_pmu.pebs_constraints = intel_skl_pebs_event_constraints;
+ x86_pmu.extra_regs = intel_skl_extra_regs;
+ x86_pmu.pebs_aliases = intel_pebs_aliases_skl;
+ x86_pmu.pebs_prec_dist = true;
+ /* all extra regs are per-cpu when HT is on */
+ x86_pmu.flags |= PMU_FL_HAS_RSP_1;
+ x86_pmu.flags |= PMU_FL_NO_HT_SHARING;
+
+ x86_pmu.hw_config = hsw_hw_config;
+ x86_pmu.get_event_constraints = hsw_get_event_constraints;
+ x86_pmu.format_attrs = merge_attr(intel_arch3_formats_attr,
+ skl_format_attr);
+ WARN_ON(!x86_pmu.format_attrs);
+ x86_pmu.cpu_events = hsw_events_attrs;
+ pr_cont("Skylake events, ");
+ break;
+
+ default:
+ switch (x86_pmu.version) {
+ case 1:
+ x86_pmu.event_constraints = intel_v1_event_constraints;
+ pr_cont("generic architected perfmon v1, ");
+ break;
+ default:
+ /*
+ * default constraints for v2 and up
+ */
+ x86_pmu.event_constraints = intel_gen_event_constraints;
+ pr_cont("generic architected perfmon, ");
+ break;
+ }
+ }
+
+ if (x86_pmu.num_counters > INTEL_PMC_MAX_GENERIC) {
+ WARN(1, KERN_ERR "hw perf events %d > max(%d), clipping!",
+ x86_pmu.num_counters, INTEL_PMC_MAX_GENERIC);
+ x86_pmu.num_counters = INTEL_PMC_MAX_GENERIC;
+ }
+ x86_pmu.intel_ctrl = (1 << x86_pmu.num_counters) - 1;
+
+ if (x86_pmu.num_counters_fixed > INTEL_PMC_MAX_FIXED) {
+ WARN(1, KERN_ERR "hw perf events fixed %d > max(%d), clipping!",
+ x86_pmu.num_counters_fixed, INTEL_PMC_MAX_FIXED);
+ x86_pmu.num_counters_fixed = INTEL_PMC_MAX_FIXED;
+ }
+
+ x86_pmu.intel_ctrl |=
+ ((1LL << x86_pmu.num_counters_fixed)-1) << INTEL_PMC_IDX_FIXED;
+
+ if (x86_pmu.event_constraints) {
+ /*
+ * event on fixed counter2 (REF_CYCLES) only works on this
+ * counter, so do not extend mask to generic counters
+ */
+ for_each_event_constraint(c, x86_pmu.event_constraints) {
+ if (c->cmask == FIXED_EVENT_FLAGS
+ && c->idxmsk64 != INTEL_PMC_MSK_FIXED_REF_CYCLES) {
+ c->idxmsk64 |= (1ULL << x86_pmu.num_counters) - 1;
+ }
+ c->idxmsk64 &=
+ ~(~0ULL << (INTEL_PMC_IDX_FIXED + x86_pmu.num_counters_fixed));
+ c->weight = hweight64(c->idxmsk64);
+ }
+ }
+
+ /*
+ * Access LBR MSR may cause #GP under certain circumstances.
+ * E.g. KVM doesn't support LBR MSR
+ * Check all LBT MSR here.
+ * Disable LBR access if any LBR MSRs can not be accessed.
+ */
+ if (x86_pmu.lbr_nr && !check_msr(x86_pmu.lbr_tos, 0x3UL))
+ x86_pmu.lbr_nr = 0;
+ for (i = 0; i < x86_pmu.lbr_nr; i++) {
+ if (!(check_msr(x86_pmu.lbr_from + i, 0xffffUL) &&
+ check_msr(x86_pmu.lbr_to + i, 0xffffUL)))
+ x86_pmu.lbr_nr = 0;
+ }
+
+ /*
+ * Access extra MSR may cause #GP under certain circumstances.
+ * E.g. KVM doesn't support offcore event
+ * Check all extra_regs here.
+ */
+ if (x86_pmu.extra_regs) {
+ for (er = x86_pmu.extra_regs; er->msr; er++) {
+ er->extra_msr_access = check_msr(er->msr, 0x11UL);
+ /* Disable LBR select mapping */
+ if ((er->idx == EXTRA_REG_LBR) && !er->extra_msr_access)
+ x86_pmu.lbr_sel_map = NULL;
+ }
+ }
+
+ /* Support full width counters using alternative MSR range */
+ if (x86_pmu.intel_cap.full_width_write) {
+ x86_pmu.max_period = x86_pmu.cntval_mask;
+ x86_pmu.perfctr = MSR_IA32_PMC0;
+ pr_cont("full-width counters, ");
+ }
+
+ return 0;
+}
+
+/*
+ * HT bug: phase 2 init
+ * Called once we have valid topology information to check
+ * whether or not HT is enabled
+ * If HT is off, then we disable the workaround
+ */
+static __init int fixup_ht_bug(void)
+{
+ int cpu = smp_processor_id();
+ int w, c;
+ /*
+ * problem not present on this CPU model, nothing to do
+ */
+ if (!(x86_pmu.flags & PMU_FL_EXCL_ENABLED))
+ return 0;
+
+ w = cpumask_weight(topology_sibling_cpumask(cpu));
+ if (w > 1) {
+ pr_info("PMU erratum BJ122, BV98, HSD29 worked around, HT is on\n");
+ return 0;
+ }
+
+ if (lockup_detector_suspend() != 0) {
+ pr_debug("failed to disable PMU erratum BJ122, BV98, HSD29 workaround\n");
+ return 0;
+ }
+
+ x86_pmu.flags &= ~(PMU_FL_EXCL_CNTRS | PMU_FL_EXCL_ENABLED);
+
+ x86_pmu.start_scheduling = NULL;
+ x86_pmu.commit_scheduling = NULL;
+ x86_pmu.stop_scheduling = NULL;
+
+ lockup_detector_resume();
+
+ get_online_cpus();
+
+ for_each_online_cpu(c) {
+ free_excl_cntrs(c);
+ }
+
+ put_online_cpus();
+ pr_info("PMU erratum BJ122, BV98, HSD29 workaround disabled, HT off\n");
+ return 0;
+}
+subsys_initcall(fixup_ht_bug)
diff --git a/arch/x86/events/intel/cqm.c b/arch/x86/events/intel/cqm.c
new file mode 100644
index 000000000..7b5fd811e
--- /dev/null
+++ b/arch/x86/events/intel/cqm.c
@@ -0,0 +1,1789 @@
+/*
+ * Intel Cache Quality-of-Service Monitoring (CQM) support.
+ *
+ * Based very, very heavily on work by Peter Zijlstra.
+ */
+
+#include <linux/perf_event.h>
+#include <linux/slab.h>
+#include <asm/cpu_device_id.h>
+#include "../perf_event.h"
+
+#define MSR_IA32_PQR_ASSOC 0x0c8f
+#define MSR_IA32_QM_CTR 0x0c8e
+#define MSR_IA32_QM_EVTSEL 0x0c8d
+
+#define MBM_CNTR_WIDTH 24
+/*
+ * Guaranteed time in ms as per SDM where MBM counters will not overflow.
+ */
+#define MBM_CTR_OVERFLOW_TIME 1000
+
+static u32 cqm_max_rmid = -1;
+static unsigned int cqm_l3_scale; /* supposedly cacheline size */
+static bool cqm_enabled, mbm_enabled;
+unsigned int mbm_socket_max;
+
+/**
+ * struct intel_pqr_state - State cache for the PQR MSR
+ * @rmid: The cached Resource Monitoring ID
+ * @closid: The cached Class Of Service ID
+ * @rmid_usecnt: The usage counter for rmid
+ *
+ * The upper 32 bits of MSR_IA32_PQR_ASSOC contain closid and the
+ * lower 10 bits rmid. The update to MSR_IA32_PQR_ASSOC always
+ * contains both parts, so we need to cache them.
+ *
+ * The cache also helps to avoid pointless updates if the value does
+ * not change.
+ */
+struct intel_pqr_state {
+ u32 rmid;
+ u32 closid;
+ int rmid_usecnt;
+};
+
+/*
+ * The cached intel_pqr_state is strictly per CPU and can never be
+ * updated from a remote CPU. Both functions which modify the state
+ * (intel_cqm_event_start and intel_cqm_event_stop) are called with
+ * interrupts disabled, which is sufficient for the protection.
+ */
+static DEFINE_PER_CPU(struct intel_pqr_state, pqr_state);
+static struct hrtimer *mbm_timers;
+/**
+ * struct sample - mbm event's (local or total) data
+ * @total_bytes #bytes since we began monitoring
+ * @prev_msr previous value of MSR
+ */
+struct sample {
+ u64 total_bytes;
+ u64 prev_msr;
+};
+
+/*
+ * samples profiled for total memory bandwidth type events
+ */
+static struct sample *mbm_total;
+/*
+ * samples profiled for local memory bandwidth type events
+ */
+static struct sample *mbm_local;
+
+#define pkg_id topology_physical_package_id(smp_processor_id())
+/*
+ * rmid_2_index returns the index for the rmid in mbm_local/mbm_total array.
+ * mbm_total[] and mbm_local[] are linearly indexed by socket# * max number of
+ * rmids per socket, an example is given below
+ * RMID1 of Socket0: vrmid = 1
+ * RMID1 of Socket1: vrmid = 1 * (cqm_max_rmid + 1) + 1
+ * RMID1 of Socket2: vrmid = 2 * (cqm_max_rmid + 1) + 1
+ */
+#define rmid_2_index(rmid) ((pkg_id * (cqm_max_rmid + 1)) + rmid)
+/*
+ * Protects cache_cgroups and cqm_rmid_free_lru and cqm_rmid_limbo_lru.
+ * Also protects event->hw.cqm_rmid
+ *
+ * Hold either for stability, both for modification of ->hw.cqm_rmid.
+ */
+static DEFINE_MUTEX(cache_mutex);
+static DEFINE_RAW_SPINLOCK(cache_lock);
+
+/*
+ * Groups of events that have the same target(s), one RMID per group.
+ */
+static LIST_HEAD(cache_groups);
+
+/*
+ * Mask of CPUs for reading CQM values. We only need one per-socket.
+ */
+static cpumask_t cqm_cpumask;
+
+#define RMID_VAL_ERROR (1ULL << 63)
+#define RMID_VAL_UNAVAIL (1ULL << 62)
+
+/*
+ * Event IDs are used to program IA32_QM_EVTSEL before reading event
+ * counter from IA32_QM_CTR
+ */
+#define QOS_L3_OCCUP_EVENT_ID 0x01
+#define QOS_MBM_TOTAL_EVENT_ID 0x02
+#define QOS_MBM_LOCAL_EVENT_ID 0x03
+
+/*
+ * This is central to the rotation algorithm in __intel_cqm_rmid_rotate().
+ *
+ * This rmid is always free and is guaranteed to have an associated
+ * near-zero occupancy value, i.e. no cachelines are tagged with this
+ * RMID, once __intel_cqm_rmid_rotate() returns.
+ */
+static u32 intel_cqm_rotation_rmid;
+
+#define INVALID_RMID (-1)
+
+/*
+ * Is @rmid valid for programming the hardware?
+ *
+ * rmid 0 is reserved by the hardware for all non-monitored tasks, which
+ * means that we should never come across an rmid with that value.
+ * Likewise, an rmid value of -1 is used to indicate "no rmid currently
+ * assigned" and is used as part of the rotation code.
+ */
+static inline bool __rmid_valid(u32 rmid)
+{
+ if (!rmid || rmid == INVALID_RMID)
+ return false;
+
+ return true;
+}
+
+static u64 __rmid_read(u32 rmid)
+{
+ u64 val;
+
+ /*
+ * Ignore the SDM, this thing is _NOTHING_ like a regular perfcnt,
+ * it just says that to increase confusion.
+ */
+ wrmsr(MSR_IA32_QM_EVTSEL, QOS_L3_OCCUP_EVENT_ID, rmid);
+ rdmsrl(MSR_IA32_QM_CTR, val);
+
+ /*
+ * Aside from the ERROR and UNAVAIL bits, assume this thing returns
+ * the number of cachelines tagged with @rmid.
+ */
+ return val;
+}
+
+enum rmid_recycle_state {
+ RMID_YOUNG = 0,
+ RMID_AVAILABLE,
+ RMID_DIRTY,
+};
+
+struct cqm_rmid_entry {
+ u32 rmid;
+ enum rmid_recycle_state state;
+ struct list_head list;
+ unsigned long queue_time;
+};
+
+/*
+ * cqm_rmid_free_lru - A least recently used list of RMIDs.
+ *
+ * Oldest entry at the head, newest (most recently used) entry at the
+ * tail. This list is never traversed, it's only used to keep track of
+ * the lru order. That is, we only pick entries of the head or insert
+ * them on the tail.
+ *
+ * All entries on the list are 'free', and their RMIDs are not currently
+ * in use. To mark an RMID as in use, remove its entry from the lru
+ * list.
+ *
+ *
+ * cqm_rmid_limbo_lru - list of currently unused but (potentially) dirty RMIDs.
+ *
+ * This list is contains RMIDs that no one is currently using but that
+ * may have a non-zero occupancy value associated with them. The
+ * rotation worker moves RMIDs from the limbo list to the free list once
+ * the occupancy value drops below __intel_cqm_threshold.
+ *
+ * Both lists are protected by cache_mutex.
+ */
+static LIST_HEAD(cqm_rmid_free_lru);
+static LIST_HEAD(cqm_rmid_limbo_lru);
+
+/*
+ * We use a simple array of pointers so that we can lookup a struct
+ * cqm_rmid_entry in O(1). This alleviates the callers of __get_rmid()
+ * and __put_rmid() from having to worry about dealing with struct
+ * cqm_rmid_entry - they just deal with rmids, i.e. integers.
+ *
+ * Once this array is initialized it is read-only. No locks are required
+ * to access it.
+ *
+ * All entries for all RMIDs can be looked up in the this array at all
+ * times.
+ */
+static struct cqm_rmid_entry **cqm_rmid_ptrs;
+
+static inline struct cqm_rmid_entry *__rmid_entry(u32 rmid)
+{
+ struct cqm_rmid_entry *entry;
+
+ entry = cqm_rmid_ptrs[rmid];
+ WARN_ON(entry->rmid != rmid);
+
+ return entry;
+}
+
+/*
+ * Returns < 0 on fail.
+ *
+ * We expect to be called with cache_mutex held.
+ */
+static u32 __get_rmid(void)
+{
+ struct cqm_rmid_entry *entry;
+
+ lockdep_assert_held(&cache_mutex);
+
+ if (list_empty(&cqm_rmid_free_lru))
+ return INVALID_RMID;
+
+ entry = list_first_entry(&cqm_rmid_free_lru, struct cqm_rmid_entry, list);
+ list_del(&entry->list);
+
+ return entry->rmid;
+}
+
+static void __put_rmid(u32 rmid)
+{
+ struct cqm_rmid_entry *entry;
+
+ lockdep_assert_held(&cache_mutex);
+
+ WARN_ON(!__rmid_valid(rmid));
+ entry = __rmid_entry(rmid);
+
+ entry->queue_time = jiffies;
+ entry->state = RMID_YOUNG;
+
+ list_add_tail(&entry->list, &cqm_rmid_limbo_lru);
+}
+
+static void cqm_cleanup(void)
+{
+ int i;
+
+ if (!cqm_rmid_ptrs)
+ return;
+
+ for (i = 0; i < cqm_max_rmid; i++)
+ kfree(cqm_rmid_ptrs[i]);
+
+ kfree(cqm_rmid_ptrs);
+ cqm_rmid_ptrs = NULL;
+ cqm_enabled = false;
+}
+
+static int intel_cqm_setup_rmid_cache(void)
+{
+ struct cqm_rmid_entry *entry;
+ unsigned int nr_rmids;
+ int r = 0;
+
+ nr_rmids = cqm_max_rmid + 1;
+ cqm_rmid_ptrs = kzalloc(sizeof(struct cqm_rmid_entry *) *
+ nr_rmids, GFP_KERNEL);
+ if (!cqm_rmid_ptrs)
+ return -ENOMEM;
+
+ for (; r <= cqm_max_rmid; r++) {
+ struct cqm_rmid_entry *entry;
+
+ entry = kmalloc(sizeof(*entry), GFP_KERNEL);
+ if (!entry)
+ goto fail;
+
+ INIT_LIST_HEAD(&entry->list);
+ entry->rmid = r;
+ cqm_rmid_ptrs[r] = entry;
+
+ list_add_tail(&entry->list, &cqm_rmid_free_lru);
+ }
+
+ /*
+ * RMID 0 is special and is always allocated. It's used for all
+ * tasks that are not monitored.
+ */
+ entry = __rmid_entry(0);
+ list_del(&entry->list);
+
+ mutex_lock(&cache_mutex);
+ intel_cqm_rotation_rmid = __get_rmid();
+ mutex_unlock(&cache_mutex);
+
+ return 0;
+
+fail:
+ cqm_cleanup();
+ return -ENOMEM;
+}
+
+/*
+ * Determine if @a and @b measure the same set of tasks.
+ *
+ * If @a and @b measure the same set of tasks then we want to share a
+ * single RMID.
+ */
+static bool __match_event(struct perf_event *a, struct perf_event *b)
+{
+ /* Per-cpu and task events don't mix */
+ if ((a->attach_state & PERF_ATTACH_TASK) !=
+ (b->attach_state & PERF_ATTACH_TASK))
+ return false;
+
+#ifdef CONFIG_CGROUP_PERF
+ if (a->cgrp != b->cgrp)
+ return false;
+#endif
+
+ /* If not task event, we're machine wide */
+ if (!(b->attach_state & PERF_ATTACH_TASK))
+ return true;
+
+ /*
+ * Events that target same task are placed into the same cache group.
+ * Mark it as a multi event group, so that we update ->count
+ * for every event rather than just the group leader later.
+ */
+ if (a->hw.target == b->hw.target) {
+ b->hw.is_group_event = true;
+ return true;
+ }
+
+ /*
+ * Are we an inherited event?
+ */
+ if (b->parent == a)
+ return true;
+
+ return false;
+}
+
+#ifdef CONFIG_CGROUP_PERF
+static inline struct perf_cgroup *event_to_cgroup(struct perf_event *event)
+{
+ if (event->attach_state & PERF_ATTACH_TASK)
+ return perf_cgroup_from_task(event->hw.target, event->ctx);
+
+ return event->cgrp;
+}
+#endif
+
+/*
+ * Determine if @a's tasks intersect with @b's tasks
+ *
+ * There are combinations of events that we explicitly prohibit,
+ *
+ * PROHIBITS
+ * system-wide -> cgroup and task
+ * cgroup -> system-wide
+ * -> task in cgroup
+ * task -> system-wide
+ * -> task in cgroup
+ *
+ * Call this function before allocating an RMID.
+ */
+static bool __conflict_event(struct perf_event *a, struct perf_event *b)
+{
+#ifdef CONFIG_CGROUP_PERF
+ /*
+ * We can have any number of cgroups but only one system-wide
+ * event at a time.
+ */
+ if (a->cgrp && b->cgrp) {
+ struct perf_cgroup *ac = a->cgrp;
+ struct perf_cgroup *bc = b->cgrp;
+
+ /*
+ * This condition should have been caught in
+ * __match_event() and we should be sharing an RMID.
+ */
+ WARN_ON_ONCE(ac == bc);
+
+ if (cgroup_is_descendant(ac->css.cgroup, bc->css.cgroup) ||
+ cgroup_is_descendant(bc->css.cgroup, ac->css.cgroup))
+ return true;
+
+ return false;
+ }
+
+ if (a->cgrp || b->cgrp) {
+ struct perf_cgroup *ac, *bc;
+
+ /*
+ * cgroup and system-wide events are mutually exclusive
+ */
+ if ((a->cgrp && !(b->attach_state & PERF_ATTACH_TASK)) ||
+ (b->cgrp && !(a->attach_state & PERF_ATTACH_TASK)))
+ return true;
+
+ /*
+ * Ensure neither event is part of the other's cgroup
+ */
+ ac = event_to_cgroup(a);
+ bc = event_to_cgroup(b);
+ if (ac == bc)
+ return true;
+
+ /*
+ * Must have cgroup and non-intersecting task events.
+ */
+ if (!ac || !bc)
+ return false;
+
+ /*
+ * We have cgroup and task events, and the task belongs
+ * to a cgroup. Check for for overlap.
+ */
+ if (cgroup_is_descendant(ac->css.cgroup, bc->css.cgroup) ||
+ cgroup_is_descendant(bc->css.cgroup, ac->css.cgroup))
+ return true;
+
+ return false;
+ }
+#endif
+ /*
+ * If one of them is not a task, same story as above with cgroups.
+ */
+ if (!(a->attach_state & PERF_ATTACH_TASK) ||
+ !(b->attach_state & PERF_ATTACH_TASK))
+ return true;
+
+ /*
+ * Must be non-overlapping.
+ */
+ return false;
+}
+
+struct rmid_read {
+ u32 rmid;
+ u32 evt_type;
+ atomic64_t value;
+};
+
+static void __intel_cqm_event_count(void *info);
+static void init_mbm_sample(u32 rmid, u32 evt_type);
+static void __intel_mbm_event_count(void *info);
+
+static bool is_mbm_event(int e)
+{
+ return (e >= QOS_MBM_TOTAL_EVENT_ID && e <= QOS_MBM_LOCAL_EVENT_ID);
+}
+
+static void cqm_mask_call(struct rmid_read *rr)
+{
+ if (is_mbm_event(rr->evt_type))
+ on_each_cpu_mask(&cqm_cpumask, __intel_mbm_event_count, rr, 1);
+ else
+ on_each_cpu_mask(&cqm_cpumask, __intel_cqm_event_count, rr, 1);
+}
+
+/*
+ * Exchange the RMID of a group of events.
+ */
+static u32 intel_cqm_xchg_rmid(struct perf_event *group, u32 rmid)
+{
+ struct perf_event *event;
+ struct list_head *head = &group->hw.cqm_group_entry;
+ u32 old_rmid = group->hw.cqm_rmid;
+
+ lockdep_assert_held(&cache_mutex);
+
+ /*
+ * If our RMID is being deallocated, perform a read now.
+ */
+ if (__rmid_valid(old_rmid) && !__rmid_valid(rmid)) {
+ struct rmid_read rr = {
+ .rmid = old_rmid,
+ .evt_type = group->attr.config,
+ .value = ATOMIC64_INIT(0),
+ };
+
+ cqm_mask_call(&rr);
+ local64_set(&group->count, atomic64_read(&rr.value));
+ }
+
+ raw_spin_lock_irq(&cache_lock);
+
+ group->hw.cqm_rmid = rmid;
+ list_for_each_entry(event, head, hw.cqm_group_entry)
+ event->hw.cqm_rmid = rmid;
+
+ raw_spin_unlock_irq(&cache_lock);
+
+ /*
+ * If the allocation is for mbm, init the mbm stats.
+ * Need to check if each event in the group is mbm event
+ * because there could be multiple type of events in the same group.
+ */
+ if (__rmid_valid(rmid)) {
+ event = group;
+ if (is_mbm_event(event->attr.config))
+ init_mbm_sample(rmid, event->attr.config);
+
+ list_for_each_entry(event, head, hw.cqm_group_entry) {
+ if (is_mbm_event(event->attr.config))
+ init_mbm_sample(rmid, event->attr.config);
+ }
+ }
+
+ return old_rmid;
+}
+
+/*
+ * If we fail to assign a new RMID for intel_cqm_rotation_rmid because
+ * cachelines are still tagged with RMIDs in limbo, we progressively
+ * increment the threshold until we find an RMID in limbo with <=
+ * __intel_cqm_threshold lines tagged. This is designed to mitigate the
+ * problem where cachelines tagged with an RMID are not steadily being
+ * evicted.
+ *
+ * On successful rotations we decrease the threshold back towards zero.
+ *
+ * __intel_cqm_max_threshold provides an upper bound on the threshold,
+ * and is measured in bytes because it's exposed to userland.
+ */
+static unsigned int __intel_cqm_threshold;
+static unsigned int __intel_cqm_max_threshold;
+
+/*
+ * Test whether an RMID has a zero occupancy value on this cpu.
+ */
+static void intel_cqm_stable(void *arg)
+{
+ struct cqm_rmid_entry *entry;
+
+ list_for_each_entry(entry, &cqm_rmid_limbo_lru, list) {
+ if (entry->state != RMID_AVAILABLE)
+ break;
+
+ if (__rmid_read(entry->rmid) > __intel_cqm_threshold)
+ entry->state = RMID_DIRTY;
+ }
+}
+
+/*
+ * If we have group events waiting for an RMID that don't conflict with
+ * events already running, assign @rmid.
+ */
+static bool intel_cqm_sched_in_event(u32 rmid)
+{
+ struct perf_event *leader, *event;
+
+ lockdep_assert_held(&cache_mutex);
+
+ leader = list_first_entry(&cache_groups, struct perf_event,
+ hw.cqm_groups_entry);
+ event = leader;
+
+ list_for_each_entry_continue(event, &cache_groups,
+ hw.cqm_groups_entry) {
+ if (__rmid_valid(event->hw.cqm_rmid))
+ continue;
+
+ if (__conflict_event(event, leader))
+ continue;
+
+ intel_cqm_xchg_rmid(event, rmid);
+ return true;
+ }
+
+ return false;
+}
+
+/*
+ * Initially use this constant for both the limbo queue time and the
+ * rotation timer interval, pmu::hrtimer_interval_ms.
+ *
+ * They don't need to be the same, but the two are related since if you
+ * rotate faster than you recycle RMIDs, you may run out of available
+ * RMIDs.
+ */
+#define RMID_DEFAULT_QUEUE_TIME 250 /* ms */
+
+static unsigned int __rmid_queue_time_ms = RMID_DEFAULT_QUEUE_TIME;
+
+/*
+ * intel_cqm_rmid_stabilize - move RMIDs from limbo to free list
+ * @nr_available: number of freeable RMIDs on the limbo list
+ *
+ * Quiescent state; wait for all 'freed' RMIDs to become unused, i.e. no
+ * cachelines are tagged with those RMIDs. After this we can reuse them
+ * and know that the current set of active RMIDs is stable.
+ *
+ * Return %true or %false depending on whether stabilization needs to be
+ * reattempted.
+ *
+ * If we return %true then @nr_available is updated to indicate the
+ * number of RMIDs on the limbo list that have been queued for the
+ * minimum queue time (RMID_AVAILABLE), but whose data occupancy values
+ * are above __intel_cqm_threshold.
+ */
+static bool intel_cqm_rmid_stabilize(unsigned int *available)
+{
+ struct cqm_rmid_entry *entry, *tmp;
+
+ lockdep_assert_held(&cache_mutex);
+
+ *available = 0;
+ list_for_each_entry(entry, &cqm_rmid_limbo_lru, list) {
+ unsigned long min_queue_time;
+ unsigned long now = jiffies;
+
+ /*
+ * We hold RMIDs placed into limbo for a minimum queue
+ * time. Before the minimum queue time has elapsed we do
+ * not recycle RMIDs.
+ *
+ * The reasoning is that until a sufficient time has
+ * passed since we stopped using an RMID, any RMID
+ * placed onto the limbo list will likely still have
+ * data tagged in the cache, which means we'll probably
+ * fail to recycle it anyway.
+ *
+ * We can save ourselves an expensive IPI by skipping
+ * any RMIDs that have not been queued for the minimum
+ * time.
+ */
+ min_queue_time = entry->queue_time +
+ msecs_to_jiffies(__rmid_queue_time_ms);
+
+ if (time_after(min_queue_time, now))
+ break;
+
+ entry->state = RMID_AVAILABLE;
+ (*available)++;
+ }
+
+ /*
+ * Fast return if none of the RMIDs on the limbo list have been
+ * sitting on the queue for the minimum queue time.
+ */
+ if (!*available)
+ return false;
+
+ /*
+ * Test whether an RMID is free for each package.
+ */
+ on_each_cpu_mask(&cqm_cpumask, intel_cqm_stable, NULL, true);
+
+ list_for_each_entry_safe(entry, tmp, &cqm_rmid_limbo_lru, list) {
+ /*
+ * Exhausted all RMIDs that have waited min queue time.
+ */
+ if (entry->state == RMID_YOUNG)
+ break;
+
+ if (entry->state == RMID_DIRTY)
+ continue;
+
+ list_del(&entry->list); /* remove from limbo */
+
+ /*
+ * The rotation RMID gets priority if it's
+ * currently invalid. In which case, skip adding
+ * the RMID to the the free lru.
+ */
+ if (!__rmid_valid(intel_cqm_rotation_rmid)) {
+ intel_cqm_rotation_rmid = entry->rmid;
+ continue;
+ }
+
+ /*
+ * If we have groups waiting for RMIDs, hand
+ * them one now provided they don't conflict.
+ */
+ if (intel_cqm_sched_in_event(entry->rmid))
+ continue;
+
+ /*
+ * Otherwise place it onto the free list.
+ */
+ list_add_tail(&entry->list, &cqm_rmid_free_lru);
+ }
+
+
+ return __rmid_valid(intel_cqm_rotation_rmid);
+}
+
+/*
+ * Pick a victim group and move it to the tail of the group list.
+ * @next: The first group without an RMID
+ */
+static void __intel_cqm_pick_and_rotate(struct perf_event *next)
+{
+ struct perf_event *rotor;
+ u32 rmid;
+
+ lockdep_assert_held(&cache_mutex);
+
+ rotor = list_first_entry(&cache_groups, struct perf_event,
+ hw.cqm_groups_entry);
+
+ /*
+ * The group at the front of the list should always have a valid
+ * RMID. If it doesn't then no groups have RMIDs assigned and we
+ * don't need to rotate the list.
+ */
+ if (next == rotor)
+ return;
+
+ rmid = intel_cqm_xchg_rmid(rotor, INVALID_RMID);
+ __put_rmid(rmid);
+
+ list_rotate_left(&cache_groups);
+}
+
+/*
+ * Deallocate the RMIDs from any events that conflict with @event, and
+ * place them on the back of the group list.
+ */
+static void intel_cqm_sched_out_conflicting_events(struct perf_event *event)
+{
+ struct perf_event *group, *g;
+ u32 rmid;
+
+ lockdep_assert_held(&cache_mutex);
+
+ list_for_each_entry_safe(group, g, &cache_groups, hw.cqm_groups_entry) {
+ if (group == event)
+ continue;
+
+ rmid = group->hw.cqm_rmid;
+
+ /*
+ * Skip events that don't have a valid RMID.
+ */
+ if (!__rmid_valid(rmid))
+ continue;
+
+ /*
+ * No conflict? No problem! Leave the event alone.
+ */
+ if (!__conflict_event(group, event))
+ continue;
+
+ intel_cqm_xchg_rmid(group, INVALID_RMID);
+ __put_rmid(rmid);
+ }
+}
+
+/*
+ * Attempt to rotate the groups and assign new RMIDs.
+ *
+ * We rotate for two reasons,
+ * 1. To handle the scheduling of conflicting events
+ * 2. To recycle RMIDs
+ *
+ * Rotating RMIDs is complicated because the hardware doesn't give us
+ * any clues.
+ *
+ * There's problems with the hardware interface; when you change the
+ * task:RMID map cachelines retain their 'old' tags, giving a skewed
+ * picture. In order to work around this, we must always keep one free
+ * RMID - intel_cqm_rotation_rmid.
+ *
+ * Rotation works by taking away an RMID from a group (the old RMID),
+ * and assigning the free RMID to another group (the new RMID). We must
+ * then wait for the old RMID to not be used (no cachelines tagged).
+ * This ensure that all cachelines are tagged with 'active' RMIDs. At
+ * this point we can start reading values for the new RMID and treat the
+ * old RMID as the free RMID for the next rotation.
+ *
+ * Return %true or %false depending on whether we did any rotating.
+ */
+static bool __intel_cqm_rmid_rotate(void)
+{
+ struct perf_event *group, *start = NULL;
+ unsigned int threshold_limit;
+ unsigned int nr_needed = 0;
+ unsigned int nr_available;
+ bool rotated = false;
+
+ mutex_lock(&cache_mutex);
+
+again:
+ /*
+ * Fast path through this function if there are no groups and no
+ * RMIDs that need cleaning.
+ */
+ if (list_empty(&cache_groups) && list_empty(&cqm_rmid_limbo_lru))
+ goto out;
+
+ list_for_each_entry(group, &cache_groups, hw.cqm_groups_entry) {
+ if (!__rmid_valid(group->hw.cqm_rmid)) {
+ if (!start)
+ start = group;
+ nr_needed++;
+ }
+ }
+
+ /*
+ * We have some event groups, but they all have RMIDs assigned
+ * and no RMIDs need cleaning.
+ */
+ if (!nr_needed && list_empty(&cqm_rmid_limbo_lru))
+ goto out;
+
+ if (!nr_needed)
+ goto stabilize;
+
+ /*
+ * We have more event groups without RMIDs than available RMIDs,
+ * or we have event groups that conflict with the ones currently
+ * scheduled.
+ *
+ * We force deallocate the rmid of the group at the head of
+ * cache_groups. The first event group without an RMID then gets
+ * assigned intel_cqm_rotation_rmid. This ensures we always make
+ * forward progress.
+ *
+ * Rotate the cache_groups list so the previous head is now the
+ * tail.
+ */
+ __intel_cqm_pick_and_rotate(start);
+
+ /*
+ * If the rotation is going to succeed, reduce the threshold so
+ * that we don't needlessly reuse dirty RMIDs.
+ */
+ if (__rmid_valid(intel_cqm_rotation_rmid)) {
+ intel_cqm_xchg_rmid(start, intel_cqm_rotation_rmid);
+ intel_cqm_rotation_rmid = __get_rmid();
+
+ intel_cqm_sched_out_conflicting_events(start);
+
+ if (__intel_cqm_threshold)
+ __intel_cqm_threshold--;
+ }
+
+ rotated = true;
+
+stabilize:
+ /*
+ * We now need to stablize the RMID we freed above (if any) to
+ * ensure that the next time we rotate we have an RMID with zero
+ * occupancy value.
+ *
+ * Alternatively, if we didn't need to perform any rotation,
+ * we'll have a bunch of RMIDs in limbo that need stabilizing.
+ */
+ threshold_limit = __intel_cqm_max_threshold / cqm_l3_scale;
+
+ while (intel_cqm_rmid_stabilize(&nr_available) &&
+ __intel_cqm_threshold < threshold_limit) {
+ unsigned int steal_limit;
+
+ /*
+ * Don't spin if nobody is actively waiting for an RMID,
+ * the rotation worker will be kicked as soon as an
+ * event needs an RMID anyway.
+ */
+ if (!nr_needed)
+ break;
+
+ /* Allow max 25% of RMIDs to be in limbo. */
+ steal_limit = (cqm_max_rmid + 1) / 4;
+
+ /*
+ * We failed to stabilize any RMIDs so our rotation
+ * logic is now stuck. In order to make forward progress
+ * we have a few options:
+ *
+ * 1. rotate ("steal") another RMID
+ * 2. increase the threshold
+ * 3. do nothing
+ *
+ * We do both of 1. and 2. until we hit the steal limit.
+ *
+ * The steal limit prevents all RMIDs ending up on the
+ * limbo list. This can happen if every RMID has a
+ * non-zero occupancy above threshold_limit, and the
+ * occupancy values aren't dropping fast enough.
+ *
+ * Note that there is prioritisation at work here - we'd
+ * rather increase the number of RMIDs on the limbo list
+ * than increase the threshold, because increasing the
+ * threshold skews the event data (because we reuse
+ * dirty RMIDs) - threshold bumps are a last resort.
+ */
+ if (nr_available < steal_limit)
+ goto again;
+
+ __intel_cqm_threshold++;
+ }
+
+out:
+ mutex_unlock(&cache_mutex);
+ return rotated;
+}
+
+static void intel_cqm_rmid_rotate(struct work_struct *work);
+
+static DECLARE_DELAYED_WORK(intel_cqm_rmid_work, intel_cqm_rmid_rotate);
+
+static struct pmu intel_cqm_pmu;
+
+static void intel_cqm_rmid_rotate(struct work_struct *work)
+{
+ unsigned long delay;
+
+ __intel_cqm_rmid_rotate();
+
+ delay = msecs_to_jiffies(intel_cqm_pmu.hrtimer_interval_ms);
+ schedule_delayed_work(&intel_cqm_rmid_work, delay);
+}
+
+static u64 update_sample(unsigned int rmid, u32 evt_type, int first)
+{
+ struct sample *mbm_current;
+ u32 vrmid = rmid_2_index(rmid);
+ u64 val, bytes, shift;
+ u32 eventid;
+
+ if (evt_type == QOS_MBM_LOCAL_EVENT_ID) {
+ mbm_current = &mbm_local[vrmid];
+ eventid = QOS_MBM_LOCAL_EVENT_ID;
+ } else {
+ mbm_current = &mbm_total[vrmid];
+ eventid = QOS_MBM_TOTAL_EVENT_ID;
+ }
+
+ wrmsr(MSR_IA32_QM_EVTSEL, eventid, rmid);
+ rdmsrl(MSR_IA32_QM_CTR, val);
+ if (val & (RMID_VAL_ERROR | RMID_VAL_UNAVAIL))
+ return mbm_current->total_bytes;
+
+ if (first) {
+ mbm_current->prev_msr = val;
+ mbm_current->total_bytes = 0;
+ return mbm_current->total_bytes;
+ }
+
+ /*
+ * The h/w guarantees that counters will not overflow
+ * so long as we poll them at least once per second.
+ */
+ shift = 64 - MBM_CNTR_WIDTH;
+ bytes = (val << shift) - (mbm_current->prev_msr << shift);
+ bytes >>= shift;
+
+ bytes *= cqm_l3_scale;
+
+ mbm_current->total_bytes += bytes;
+ mbm_current->prev_msr = val;
+
+ return mbm_current->total_bytes;
+}
+
+static u64 rmid_read_mbm(unsigned int rmid, u32 evt_type)
+{
+ return update_sample(rmid, evt_type, 0);
+}
+
+static void __intel_mbm_event_init(void *info)
+{
+ struct rmid_read *rr = info;
+
+ update_sample(rr->rmid, rr->evt_type, 1);
+}
+
+static void init_mbm_sample(u32 rmid, u32 evt_type)
+{
+ struct rmid_read rr = {
+ .rmid = rmid,
+ .evt_type = evt_type,
+ .value = ATOMIC64_INIT(0),
+ };
+
+ /* on each socket, init sample */
+ on_each_cpu_mask(&cqm_cpumask, __intel_mbm_event_init, &rr, 1);
+}
+
+/*
+ * Find a group and setup RMID.
+ *
+ * If we're part of a group, we use the group's RMID.
+ */
+static void intel_cqm_setup_event(struct perf_event *event,
+ struct perf_event **group)
+{
+ struct perf_event *iter;
+ bool conflict = false;
+ u32 rmid;
+
+ event->hw.is_group_event = false;
+ list_for_each_entry(iter, &cache_groups, hw.cqm_groups_entry) {
+ rmid = iter->hw.cqm_rmid;
+
+ if (__match_event(iter, event)) {
+ /* All tasks in a group share an RMID */
+ event->hw.cqm_rmid = rmid;
+ *group = iter;
+ if (is_mbm_event(event->attr.config) && __rmid_valid(rmid))
+ init_mbm_sample(rmid, event->attr.config);
+ return;
+ }
+
+ /*
+ * We only care about conflicts for events that are
+ * actually scheduled in (and hence have a valid RMID).
+ */
+ if (__conflict_event(iter, event) && __rmid_valid(rmid))
+ conflict = true;
+ }
+
+ if (conflict)
+ rmid = INVALID_RMID;
+ else
+ rmid = __get_rmid();
+
+ if (is_mbm_event(event->attr.config) && __rmid_valid(rmid))
+ init_mbm_sample(rmid, event->attr.config);
+
+ event->hw.cqm_rmid = rmid;
+}
+
+static void intel_cqm_event_read(struct perf_event *event)
+{
+ unsigned long flags;
+ u32 rmid;
+ u64 val;
+
+ /*
+ * Task events are handled by intel_cqm_event_count().
+ */
+ if (event->cpu == -1)
+ return;
+
+ raw_spin_lock_irqsave(&cache_lock, flags);
+ rmid = event->hw.cqm_rmid;
+
+ if (!__rmid_valid(rmid))
+ goto out;
+
+ if (is_mbm_event(event->attr.config))
+ val = rmid_read_mbm(rmid, event->attr.config);
+ else
+ val = __rmid_read(rmid);
+
+ /*
+ * Ignore this reading on error states and do not update the value.
+ */
+ if (val & (RMID_VAL_ERROR | RMID_VAL_UNAVAIL))
+ goto out;
+
+ local64_set(&event->count, val);
+out:
+ raw_spin_unlock_irqrestore(&cache_lock, flags);
+}
+
+static void __intel_cqm_event_count(void *info)
+{
+ struct rmid_read *rr = info;
+ u64 val;
+
+ val = __rmid_read(rr->rmid);
+
+ if (val & (RMID_VAL_ERROR | RMID_VAL_UNAVAIL))
+ return;
+
+ atomic64_add(val, &rr->value);
+}
+
+static inline bool cqm_group_leader(struct perf_event *event)
+{
+ return !list_empty(&event->hw.cqm_groups_entry);
+}
+
+static void __intel_mbm_event_count(void *info)
+{
+ struct rmid_read *rr = info;
+ u64 val;
+
+ val = rmid_read_mbm(rr->rmid, rr->evt_type);
+ if (val & (RMID_VAL_ERROR | RMID_VAL_UNAVAIL))
+ return;
+ atomic64_add(val, &rr->value);
+}
+
+static enum hrtimer_restart mbm_hrtimer_handle(struct hrtimer *hrtimer)
+{
+ struct perf_event *iter, *iter1;
+ int ret = HRTIMER_RESTART;
+ struct list_head *head;
+ unsigned long flags;
+ u32 grp_rmid;
+
+ /*
+ * Need to cache_lock as the timer Event Select MSR reads
+ * can race with the mbm/cqm count() and mbm_init() reads.
+ */
+ raw_spin_lock_irqsave(&cache_lock, flags);
+
+ if (list_empty(&cache_groups)) {
+ ret = HRTIMER_NORESTART;
+ goto out;
+ }
+
+ list_for_each_entry(iter, &cache_groups, hw.cqm_groups_entry) {
+ grp_rmid = iter->hw.cqm_rmid;
+ if (!__rmid_valid(grp_rmid))
+ continue;
+ if (is_mbm_event(iter->attr.config))
+ update_sample(grp_rmid, iter->attr.config, 0);
+
+ head = &iter->hw.cqm_group_entry;
+ if (list_empty(head))
+ continue;
+ list_for_each_entry(iter1, head, hw.cqm_group_entry) {
+ if (!iter1->hw.is_group_event)
+ break;
+ if (is_mbm_event(iter1->attr.config))
+ update_sample(iter1->hw.cqm_rmid,
+ iter1->attr.config, 0);
+ }
+ }
+
+ hrtimer_forward_now(hrtimer, ms_to_ktime(MBM_CTR_OVERFLOW_TIME));
+out:
+ raw_spin_unlock_irqrestore(&cache_lock, flags);
+
+ return ret;
+}
+
+static void __mbm_start_timer(void *info)
+{
+ hrtimer_start(&mbm_timers[pkg_id], ms_to_ktime(MBM_CTR_OVERFLOW_TIME),
+ HRTIMER_MODE_REL_PINNED);
+}
+
+static void __mbm_stop_timer(void *info)
+{
+ hrtimer_cancel(&mbm_timers[pkg_id]);
+}
+
+static void mbm_start_timers(void)
+{
+ on_each_cpu_mask(&cqm_cpumask, __mbm_start_timer, NULL, 1);
+}
+
+static void mbm_stop_timers(void)
+{
+ on_each_cpu_mask(&cqm_cpumask, __mbm_stop_timer, NULL, 1);
+}
+
+static void mbm_hrtimer_init(void)
+{
+ struct hrtimer *hr;
+ int i;
+
+ for (i = 0; i < mbm_socket_max; i++) {
+ hr = &mbm_timers[i];
+ hrtimer_init(hr, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
+ hr->function = mbm_hrtimer_handle;
+ }
+}
+
+static u64 intel_cqm_event_count(struct perf_event *event)
+{
+ unsigned long flags;
+ struct rmid_read rr = {
+ .evt_type = event->attr.config,
+ .value = ATOMIC64_INIT(0),
+ };
+
+ /*
+ * We only need to worry about task events. System-wide events
+ * are handled like usual, i.e. entirely with
+ * intel_cqm_event_read().
+ */
+ if (event->cpu != -1)
+ return __perf_event_count(event);
+
+ /*
+ * Only the group leader gets to report values except in case of
+ * multiple events in the same group, we still need to read the
+ * other events.This stops us
+ * reporting duplicate values to userspace, and gives us a clear
+ * rule for which task gets to report the values.
+ *
+ * Note that it is impossible to attribute these values to
+ * specific packages - we forfeit that ability when we create
+ * task events.
+ */
+ if (!cqm_group_leader(event) && !event->hw.is_group_event)
+ return 0;
+
+ /*
+ * Getting up-to-date values requires an SMP IPI which is not
+ * possible if we're being called in interrupt context. Return
+ * the cached values instead.
+ */
+ if (unlikely(in_interrupt()))
+ goto out;
+
+ /*
+ * Notice that we don't perform the reading of an RMID
+ * atomically, because we can't hold a spin lock across the
+ * IPIs.
+ *
+ * Speculatively perform the read, since @event might be
+ * assigned a different (possibly invalid) RMID while we're
+ * busying performing the IPI calls. It's therefore necessary to
+ * check @event's RMID afterwards, and if it has changed,
+ * discard the result of the read.
+ */
+ rr.rmid = ACCESS_ONCE(event->hw.cqm_rmid);
+
+ if (!__rmid_valid(rr.rmid))
+ goto out;
+
+ cqm_mask_call(&rr);
+
+ raw_spin_lock_irqsave(&cache_lock, flags);
+ if (event->hw.cqm_rmid == rr.rmid)
+ local64_set(&event->count, atomic64_read(&rr.value));
+ raw_spin_unlock_irqrestore(&cache_lock, flags);
+out:
+ return __perf_event_count(event);
+}
+
+static void intel_cqm_event_start(struct perf_event *event, int mode)
+{
+ struct intel_pqr_state *state = this_cpu_ptr(&pqr_state);
+ u32 rmid = event->hw.cqm_rmid;
+
+ if (!(event->hw.cqm_state & PERF_HES_STOPPED))
+ return;
+
+ event->hw.cqm_state &= ~PERF_HES_STOPPED;
+
+ if (state->rmid_usecnt++) {
+ if (!WARN_ON_ONCE(state->rmid != rmid))
+ return;
+ } else {
+ WARN_ON_ONCE(state->rmid);
+ }
+
+ state->rmid = rmid;
+ wrmsr(MSR_IA32_PQR_ASSOC, rmid, state->closid);
+}
+
+static void intel_cqm_event_stop(struct perf_event *event, int mode)
+{
+ struct intel_pqr_state *state = this_cpu_ptr(&pqr_state);
+
+ if (event->hw.cqm_state & PERF_HES_STOPPED)
+ return;
+
+ event->hw.cqm_state |= PERF_HES_STOPPED;
+
+ intel_cqm_event_read(event);
+
+ if (!--state->rmid_usecnt) {
+ state->rmid = 0;
+ wrmsr(MSR_IA32_PQR_ASSOC, 0, state->closid);
+ } else {
+ WARN_ON_ONCE(!state->rmid);
+ }
+}
+
+static int intel_cqm_event_add(struct perf_event *event, int mode)
+{
+ unsigned long flags;
+ u32 rmid;
+
+ raw_spin_lock_irqsave(&cache_lock, flags);
+
+ event->hw.cqm_state = PERF_HES_STOPPED;
+ rmid = event->hw.cqm_rmid;
+
+ if (__rmid_valid(rmid) && (mode & PERF_EF_START))
+ intel_cqm_event_start(event, mode);
+
+ raw_spin_unlock_irqrestore(&cache_lock, flags);
+
+ return 0;
+}
+
+static void intel_cqm_event_destroy(struct perf_event *event)
+{
+ struct perf_event *group_other = NULL;
+ unsigned long flags;
+
+ mutex_lock(&cache_mutex);
+ /*
+ * Hold the cache_lock as mbm timer handlers could be
+ * scanning the list of events.
+ */
+ raw_spin_lock_irqsave(&cache_lock, flags);
+
+ /*
+ * If there's another event in this group...
+ */
+ if (!list_empty(&event->hw.cqm_group_entry)) {
+ group_other = list_first_entry(&event->hw.cqm_group_entry,
+ struct perf_event,
+ hw.cqm_group_entry);
+ list_del(&event->hw.cqm_group_entry);
+ }
+
+ /*
+ * And we're the group leader..
+ */
+ if (cqm_group_leader(event)) {
+ /*
+ * If there was a group_other, make that leader, otherwise
+ * destroy the group and return the RMID.
+ */
+ if (group_other) {
+ list_replace(&event->hw.cqm_groups_entry,
+ &group_other->hw.cqm_groups_entry);
+ } else {
+ u32 rmid = event->hw.cqm_rmid;
+
+ if (__rmid_valid(rmid))
+ __put_rmid(rmid);
+ list_del(&event->hw.cqm_groups_entry);
+ }
+ }
+
+ raw_spin_unlock_irqrestore(&cache_lock, flags);
+
+ /*
+ * Stop the mbm overflow timers when the last event is destroyed.
+ */
+ if (mbm_enabled && list_empty(&cache_groups))
+ mbm_stop_timers();
+
+ mutex_unlock(&cache_mutex);
+}
+
+static int intel_cqm_event_init(struct perf_event *event)
+{
+ struct perf_event *group = NULL;
+ bool rotate = false;
+ unsigned long flags;
+
+ if (event->attr.type != intel_cqm_pmu.type)
+ return -ENOENT;
+
+ if ((event->attr.config < QOS_L3_OCCUP_EVENT_ID) ||
+ (event->attr.config > QOS_MBM_LOCAL_EVENT_ID))
+ return -EINVAL;
+
+ /* unsupported modes and filters */
+ if (event->attr.exclude_user ||
+ event->attr.exclude_kernel ||
+ event->attr.exclude_hv ||
+ event->attr.exclude_idle ||
+ event->attr.exclude_host ||
+ event->attr.exclude_guest ||
+ event->attr.sample_period) /* no sampling */
+ return -EINVAL;
+
+ INIT_LIST_HEAD(&event->hw.cqm_group_entry);
+ INIT_LIST_HEAD(&event->hw.cqm_groups_entry);
+
+ event->destroy = intel_cqm_event_destroy;
+
+ mutex_lock(&cache_mutex);
+
+ /*
+ * Start the mbm overflow timers when the first event is created.
+ */
+ if (mbm_enabled && list_empty(&cache_groups))
+ mbm_start_timers();
+
+ /* Will also set rmid */
+ intel_cqm_setup_event(event, &group);
+
+ /*
+ * Hold the cache_lock as mbm timer handlers be
+ * scanning the list of events.
+ */
+ raw_spin_lock_irqsave(&cache_lock, flags);
+
+ if (group) {
+ list_add_tail(&event->hw.cqm_group_entry,
+ &group->hw.cqm_group_entry);
+ } else {
+ list_add_tail(&event->hw.cqm_groups_entry,
+ &cache_groups);
+
+ /*
+ * All RMIDs are either in use or have recently been
+ * used. Kick the rotation worker to clean/free some.
+ *
+ * We only do this for the group leader, rather than for
+ * every event in a group to save on needless work.
+ */
+ if (!__rmid_valid(event->hw.cqm_rmid))
+ rotate = true;
+ }
+
+ raw_spin_unlock_irqrestore(&cache_lock, flags);
+ mutex_unlock(&cache_mutex);
+
+ if (rotate)
+ schedule_delayed_work(&intel_cqm_rmid_work, 0);
+
+ return 0;
+}
+
+EVENT_ATTR_STR(llc_occupancy, intel_cqm_llc, "event=0x01");
+EVENT_ATTR_STR(llc_occupancy.per-pkg, intel_cqm_llc_pkg, "1");
+EVENT_ATTR_STR(llc_occupancy.unit, intel_cqm_llc_unit, "Bytes");
+EVENT_ATTR_STR(llc_occupancy.scale, intel_cqm_llc_scale, NULL);
+EVENT_ATTR_STR(llc_occupancy.snapshot, intel_cqm_llc_snapshot, "1");
+
+EVENT_ATTR_STR(total_bytes, intel_cqm_total_bytes, "event=0x02");
+EVENT_ATTR_STR(total_bytes.per-pkg, intel_cqm_total_bytes_pkg, "1");
+EVENT_ATTR_STR(total_bytes.unit, intel_cqm_total_bytes_unit, "MB");
+EVENT_ATTR_STR(total_bytes.scale, intel_cqm_total_bytes_scale, "1e-6");
+
+EVENT_ATTR_STR(local_bytes, intel_cqm_local_bytes, "event=0x03");
+EVENT_ATTR_STR(local_bytes.per-pkg, intel_cqm_local_bytes_pkg, "1");
+EVENT_ATTR_STR(local_bytes.unit, intel_cqm_local_bytes_unit, "MB");
+EVENT_ATTR_STR(local_bytes.scale, intel_cqm_local_bytes_scale, "1e-6");
+
+static struct attribute *intel_cqm_events_attr[] = {
+ EVENT_PTR(intel_cqm_llc),
+ EVENT_PTR(intel_cqm_llc_pkg),
+ EVENT_PTR(intel_cqm_llc_unit),
+ EVENT_PTR(intel_cqm_llc_scale),
+ EVENT_PTR(intel_cqm_llc_snapshot),
+ NULL,
+};
+
+static struct attribute *intel_mbm_events_attr[] = {
+ EVENT_PTR(intel_cqm_total_bytes),
+ EVENT_PTR(intel_cqm_local_bytes),
+ EVENT_PTR(intel_cqm_total_bytes_pkg),
+ EVENT_PTR(intel_cqm_local_bytes_pkg),
+ EVENT_PTR(intel_cqm_total_bytes_unit),
+ EVENT_PTR(intel_cqm_local_bytes_unit),
+ EVENT_PTR(intel_cqm_total_bytes_scale),
+ EVENT_PTR(intel_cqm_local_bytes_scale),
+ NULL,
+};
+
+static struct attribute *intel_cmt_mbm_events_attr[] = {
+ EVENT_PTR(intel_cqm_llc),
+ EVENT_PTR(intel_cqm_total_bytes),
+ EVENT_PTR(intel_cqm_local_bytes),
+ EVENT_PTR(intel_cqm_llc_pkg),
+ EVENT_PTR(intel_cqm_total_bytes_pkg),
+ EVENT_PTR(intel_cqm_local_bytes_pkg),
+ EVENT_PTR(intel_cqm_llc_unit),
+ EVENT_PTR(intel_cqm_total_bytes_unit),
+ EVENT_PTR(intel_cqm_local_bytes_unit),
+ EVENT_PTR(intel_cqm_llc_scale),
+ EVENT_PTR(intel_cqm_total_bytes_scale),
+ EVENT_PTR(intel_cqm_local_bytes_scale),
+ EVENT_PTR(intel_cqm_llc_snapshot),
+ NULL,
+};
+
+static struct attribute_group intel_cqm_events_group = {
+ .name = "events",
+ .attrs = NULL,
+};
+
+PMU_FORMAT_ATTR(event, "config:0-7");
+static struct attribute *intel_cqm_formats_attr[] = {
+ &format_attr_event.attr,
+ NULL,
+};
+
+static struct attribute_group intel_cqm_format_group = {
+ .name = "format",
+ .attrs = intel_cqm_formats_attr,
+};
+
+static ssize_t
+max_recycle_threshold_show(struct device *dev, struct device_attribute *attr,
+ char *page)
+{
+ ssize_t rv;
+
+ mutex_lock(&cache_mutex);
+ rv = snprintf(page, PAGE_SIZE-1, "%u\n", __intel_cqm_max_threshold);
+ mutex_unlock(&cache_mutex);
+
+ return rv;
+}
+
+static ssize_t
+max_recycle_threshold_store(struct device *dev,
+ struct device_attribute *attr,
+ const char *buf, size_t count)
+{
+ unsigned int bytes, cachelines;
+ int ret;
+
+ ret = kstrtouint(buf, 0, &bytes);
+ if (ret)
+ return ret;
+
+ mutex_lock(&cache_mutex);
+
+ __intel_cqm_max_threshold = bytes;
+ cachelines = bytes / cqm_l3_scale;
+
+ /*
+ * The new maximum takes effect immediately.
+ */
+ if (__intel_cqm_threshold > cachelines)
+ __intel_cqm_threshold = cachelines;
+
+ mutex_unlock(&cache_mutex);
+
+ return count;
+}
+
+static DEVICE_ATTR_RW(max_recycle_threshold);
+
+static struct attribute *intel_cqm_attrs[] = {
+ &dev_attr_max_recycle_threshold.attr,
+ NULL,
+};
+
+static const struct attribute_group intel_cqm_group = {
+ .attrs = intel_cqm_attrs,
+};
+
+static const struct attribute_group *intel_cqm_attr_groups[] = {
+ &intel_cqm_events_group,
+ &intel_cqm_format_group,
+ &intel_cqm_group,
+ NULL,
+};
+
+static struct pmu intel_cqm_pmu = {
+ .hrtimer_interval_ms = RMID_DEFAULT_QUEUE_TIME,
+ .attr_groups = intel_cqm_attr_groups,
+ .task_ctx_nr = perf_sw_context,
+ .event_init = intel_cqm_event_init,
+ .add = intel_cqm_event_add,
+ .del = intel_cqm_event_stop,
+ .start = intel_cqm_event_start,
+ .stop = intel_cqm_event_stop,
+ .read = intel_cqm_event_read,
+ .count = intel_cqm_event_count,
+};
+
+static inline void cqm_pick_event_reader(int cpu)
+{
+ int reader;
+
+ /* First online cpu in package becomes the reader */
+ reader = cpumask_any_and(&cqm_cpumask, topology_core_cpumask(cpu));
+ if (reader >= nr_cpu_ids)
+ cpumask_set_cpu(cpu, &cqm_cpumask);
+}
+
+static void intel_cqm_cpu_starting(unsigned int cpu)
+{
+ struct intel_pqr_state *state = &per_cpu(pqr_state, cpu);
+ struct cpuinfo_x86 *c = &cpu_data(cpu);
+
+ state->rmid = 0;
+ state->closid = 0;
+ state->rmid_usecnt = 0;
+
+ WARN_ON(c->x86_cache_max_rmid != cqm_max_rmid);
+ WARN_ON(c->x86_cache_occ_scale != cqm_l3_scale);
+}
+
+static void intel_cqm_cpu_exit(unsigned int cpu)
+{
+ int target;
+
+ /* Is @cpu the current cqm reader for this package ? */
+ if (!cpumask_test_and_clear_cpu(cpu, &cqm_cpumask))
+ return;
+
+ /* Find another online reader in this package */
+ target = cpumask_any_but(topology_core_cpumask(cpu), cpu);
+
+ if (target < nr_cpu_ids)
+ cpumask_set_cpu(target, &cqm_cpumask);
+}
+
+static int intel_cqm_cpu_notifier(struct notifier_block *nb,
+ unsigned long action, void *hcpu)
+{
+ unsigned int cpu = (unsigned long)hcpu;
+
+ switch (action & ~CPU_TASKS_FROZEN) {
+ case CPU_DOWN_PREPARE:
+ intel_cqm_cpu_exit(cpu);
+ break;
+ case CPU_STARTING:
+ intel_cqm_cpu_starting(cpu);
+ cqm_pick_event_reader(cpu);
+ break;
+ }
+
+ return NOTIFY_OK;
+}
+
+static const struct x86_cpu_id intel_cqm_match[] = {
+ { .vendor = X86_VENDOR_INTEL, .feature = X86_FEATURE_CQM_OCCUP_LLC },
+ {}
+};
+
+static void mbm_cleanup(void)
+{
+ if (!mbm_enabled)
+ return;
+
+ kfree(mbm_local);
+ kfree(mbm_total);
+ mbm_enabled = false;
+}
+
+static const struct x86_cpu_id intel_mbm_local_match[] = {
+ { .vendor = X86_VENDOR_INTEL, .feature = X86_FEATURE_CQM_MBM_LOCAL },
+ {}
+};
+
+static const struct x86_cpu_id intel_mbm_total_match[] = {
+ { .vendor = X86_VENDOR_INTEL, .feature = X86_FEATURE_CQM_MBM_TOTAL },
+ {}
+};
+
+static int intel_mbm_init(void)
+{
+ int ret = 0, array_size, maxid = cqm_max_rmid + 1;
+
+ mbm_socket_max = topology_max_packages();
+ array_size = sizeof(struct sample) * maxid * mbm_socket_max;
+ mbm_local = kmalloc(array_size, GFP_KERNEL);
+ if (!mbm_local)
+ return -ENOMEM;
+
+ mbm_total = kmalloc(array_size, GFP_KERNEL);
+ if (!mbm_total) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ array_size = sizeof(struct hrtimer) * mbm_socket_max;
+ mbm_timers = kmalloc(array_size, GFP_KERNEL);
+ if (!mbm_timers) {
+ ret = -ENOMEM;
+ goto out;
+ }
+ mbm_hrtimer_init();
+
+out:
+ if (ret)
+ mbm_cleanup();
+
+ return ret;
+}
+
+static int __init intel_cqm_init(void)
+{
+ char *str = NULL, scale[20];
+ int i, cpu, ret;
+
+ if (x86_match_cpu(intel_cqm_match))
+ cqm_enabled = true;
+
+ if (x86_match_cpu(intel_mbm_local_match) &&
+ x86_match_cpu(intel_mbm_total_match))
+ mbm_enabled = true;
+
+ if (!cqm_enabled && !mbm_enabled)
+ return -ENODEV;
+
+ cqm_l3_scale = boot_cpu_data.x86_cache_occ_scale;
+
+ /*
+ * It's possible that not all resources support the same number
+ * of RMIDs. Instead of making scheduling much more complicated
+ * (where we have to match a task's RMID to a cpu that supports
+ * that many RMIDs) just find the minimum RMIDs supported across
+ * all cpus.
+ *
+ * Also, check that the scales match on all cpus.
+ */
+ cpu_notifier_register_begin();
+
+ for_each_online_cpu(cpu) {
+ struct cpuinfo_x86 *c = &cpu_data(cpu);
+
+ if (c->x86_cache_max_rmid < cqm_max_rmid)
+ cqm_max_rmid = c->x86_cache_max_rmid;
+
+ if (c->x86_cache_occ_scale != cqm_l3_scale) {
+ pr_err("Multiple LLC scale values, disabling\n");
+ ret = -EINVAL;
+ goto out;
+ }
+ }
+
+ /*
+ * A reasonable upper limit on the max threshold is the number
+ * of lines tagged per RMID if all RMIDs have the same number of
+ * lines tagged in the LLC.
+ *
+ * For a 35MB LLC and 56 RMIDs, this is ~1.8% of the LLC.
+ */
+ __intel_cqm_max_threshold =
+ boot_cpu_data.x86_cache_size * 1024 / (cqm_max_rmid + 1);
+
+ snprintf(scale, sizeof(scale), "%u", cqm_l3_scale);
+ str = kstrdup(scale, GFP_KERNEL);
+ if (!str) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ event_attr_intel_cqm_llc_scale.event_str = str;
+
+ ret = intel_cqm_setup_rmid_cache();
+ if (ret)
+ goto out;
+
+ for_each_online_cpu(i) {
+ intel_cqm_cpu_starting(i);
+ cqm_pick_event_reader(i);
+ }
+
+ if (mbm_enabled)
+ ret = intel_mbm_init();
+ if (ret && !cqm_enabled)
+ goto out;
+
+ if (cqm_enabled && mbm_enabled)
+ intel_cqm_events_group.attrs = intel_cmt_mbm_events_attr;
+ else if (!cqm_enabled && mbm_enabled)
+ intel_cqm_events_group.attrs = intel_mbm_events_attr;
+ else if (cqm_enabled && !mbm_enabled)
+ intel_cqm_events_group.attrs = intel_cqm_events_attr;
+
+ ret = perf_pmu_register(&intel_cqm_pmu, "intel_cqm", -1);
+ if (ret) {
+ pr_err("Intel CQM perf registration failed: %d\n", ret);
+ goto out;
+ }
+
+ if (cqm_enabled)
+ pr_info("Intel CQM monitoring enabled\n");
+ if (mbm_enabled)
+ pr_info("Intel MBM enabled\n");
+
+ /*
+ * Register the hot cpu notifier once we are sure cqm
+ * is enabled to avoid notifier leak.
+ */
+ __perf_cpu_notifier(intel_cqm_cpu_notifier);
+out:
+ cpu_notifier_register_done();
+ if (ret) {
+ kfree(str);
+ cqm_cleanup();
+ mbm_cleanup();
+ }
+
+ return ret;
+}
+device_initcall(intel_cqm_init);
diff --git a/arch/x86/events/intel/cstate.c b/arch/x86/events/intel/cstate.c
new file mode 100644
index 000000000..7946c4231
--- /dev/null
+++ b/arch/x86/events/intel/cstate.c
@@ -0,0 +1,694 @@
+/*
+ * perf_event_intel_cstate.c: support cstate residency counters
+ *
+ * Copyright (C) 2015, Intel Corp.
+ * Author: Kan Liang (kan.liang@intel.com)
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Library General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Library General Public License for more details.
+ *
+ */
+
+/*
+ * This file export cstate related free running (read-only) counters
+ * for perf. These counters may be use simultaneously by other tools,
+ * such as turbostat. However, it still make sense to implement them
+ * in perf. Because we can conveniently collect them together with
+ * other events, and allow to use them from tools without special MSR
+ * access code.
+ *
+ * The events only support system-wide mode counting. There is no
+ * sampling support because it is not supported by the hardware.
+ *
+ * According to counters' scope and category, two PMUs are registered
+ * with the perf_event core subsystem.
+ * - 'cstate_core': The counter is available for each physical core.
+ * The counters include CORE_C*_RESIDENCY.
+ * - 'cstate_pkg': The counter is available for each physical package.
+ * The counters include PKG_C*_RESIDENCY.
+ *
+ * All of these counters are specified in the Intel® 64 and IA-32
+ * Architectures Software Developer.s Manual Vol3b.
+ *
+ * Model specific counters:
+ * MSR_CORE_C1_RES: CORE C1 Residency Counter
+ * perf code: 0x00
+ * Available model: SLM,AMT
+ * Scope: Core (each processor core has a MSR)
+ * MSR_CORE_C3_RESIDENCY: CORE C3 Residency Counter
+ * perf code: 0x01
+ * Available model: NHM,WSM,SNB,IVB,HSW,BDW,SKL
+ * Scope: Core
+ * MSR_CORE_C6_RESIDENCY: CORE C6 Residency Counter
+ * perf code: 0x02
+ * Available model: SLM,AMT,NHM,WSM,SNB,IVB,HSW,BDW,SKL
+ * Scope: Core
+ * MSR_CORE_C7_RESIDENCY: CORE C7 Residency Counter
+ * perf code: 0x03
+ * Available model: SNB,IVB,HSW,BDW,SKL
+ * Scope: Core
+ * MSR_PKG_C2_RESIDENCY: Package C2 Residency Counter.
+ * perf code: 0x00
+ * Available model: SNB,IVB,HSW,BDW,SKL
+ * Scope: Package (physical package)
+ * MSR_PKG_C3_RESIDENCY: Package C3 Residency Counter.
+ * perf code: 0x01
+ * Available model: NHM,WSM,SNB,IVB,HSW,BDW,SKL
+ * Scope: Package (physical package)
+ * MSR_PKG_C6_RESIDENCY: Package C6 Residency Counter.
+ * perf code: 0x02
+ * Available model: SLM,AMT,NHM,WSM,SNB,IVB,HSW,BDW,SKL
+ * Scope: Package (physical package)
+ * MSR_PKG_C7_RESIDENCY: Package C7 Residency Counter.
+ * perf code: 0x03
+ * Available model: NHM,WSM,SNB,IVB,HSW,BDW,SKL
+ * Scope: Package (physical package)
+ * MSR_PKG_C8_RESIDENCY: Package C8 Residency Counter.
+ * perf code: 0x04
+ * Available model: HSW ULT only
+ * Scope: Package (physical package)
+ * MSR_PKG_C9_RESIDENCY: Package C9 Residency Counter.
+ * perf code: 0x05
+ * Available model: HSW ULT only
+ * Scope: Package (physical package)
+ * MSR_PKG_C10_RESIDENCY: Package C10 Residency Counter.
+ * perf code: 0x06
+ * Available model: HSW ULT only
+ * Scope: Package (physical package)
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/perf_event.h>
+#include <asm/cpu_device_id.h>
+#include "../perf_event.h"
+
+#define DEFINE_CSTATE_FORMAT_ATTR(_var, _name, _format) \
+static ssize_t __cstate_##_var##_show(struct kobject *kobj, \
+ struct kobj_attribute *attr, \
+ char *page) \
+{ \
+ BUILD_BUG_ON(sizeof(_format) >= PAGE_SIZE); \
+ return sprintf(page, _format "\n"); \
+} \
+static struct kobj_attribute format_attr_##_var = \
+ __ATTR(_name, 0444, __cstate_##_var##_show, NULL)
+
+static ssize_t cstate_get_attr_cpumask(struct device *dev,
+ struct device_attribute *attr,
+ char *buf);
+
+struct perf_cstate_msr {
+ u64 msr;
+ struct perf_pmu_events_attr *attr;
+ bool (*test)(int idx);
+};
+
+
+/* cstate_core PMU */
+
+static struct pmu cstate_core_pmu;
+static bool has_cstate_core;
+
+enum perf_cstate_core_id {
+ /*
+ * cstate_core events
+ */
+ PERF_CSTATE_CORE_C1_RES = 0,
+ PERF_CSTATE_CORE_C3_RES,
+ PERF_CSTATE_CORE_C6_RES,
+ PERF_CSTATE_CORE_C7_RES,
+
+ PERF_CSTATE_CORE_EVENT_MAX,
+};
+
+bool test_core(int idx)
+{
+ if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL ||
+ boot_cpu_data.x86 != 6)
+ return false;
+
+ switch (boot_cpu_data.x86_model) {
+ case 30: /* 45nm Nehalem */
+ case 26: /* 45nm Nehalem-EP */
+ case 46: /* 45nm Nehalem-EX */
+
+ case 37: /* 32nm Westmere */
+ case 44: /* 32nm Westmere-EP */
+ case 47: /* 32nm Westmere-EX */
+ if (idx == PERF_CSTATE_CORE_C3_RES ||
+ idx == PERF_CSTATE_CORE_C6_RES)
+ return true;
+ break;
+ case 42: /* 32nm SandyBridge */
+ case 45: /* 32nm SandyBridge-E/EN/EP */
+
+ case 58: /* 22nm IvyBridge */
+ case 62: /* 22nm IvyBridge-EP/EX */
+
+ case 60: /* 22nm Haswell Core */
+ case 63: /* 22nm Haswell Server */
+ case 69: /* 22nm Haswell ULT */
+ case 70: /* 22nm Haswell + GT3e (Intel Iris Pro graphics) */
+
+ case 61: /* 14nm Broadwell Core-M */
+ case 86: /* 14nm Broadwell Xeon D */
+ case 71: /* 14nm Broadwell + GT3e (Intel Iris Pro graphics) */
+ case 79: /* 14nm Broadwell Server */
+
+ case 78: /* 14nm Skylake Mobile */
+ case 94: /* 14nm Skylake Desktop */
+ if (idx == PERF_CSTATE_CORE_C3_RES ||
+ idx == PERF_CSTATE_CORE_C6_RES ||
+ idx == PERF_CSTATE_CORE_C7_RES)
+ return true;
+ break;
+ case 55: /* 22nm Atom "Silvermont" */
+ case 77: /* 22nm Atom "Silvermont Avoton/Rangely" */
+ case 76: /* 14nm Atom "Airmont" */
+ if (idx == PERF_CSTATE_CORE_C1_RES ||
+ idx == PERF_CSTATE_CORE_C6_RES)
+ return true;
+ break;
+ }
+
+ return false;
+}
+
+PMU_EVENT_ATTR_STRING(c1-residency, evattr_cstate_core_c1, "event=0x00");
+PMU_EVENT_ATTR_STRING(c3-residency, evattr_cstate_core_c3, "event=0x01");
+PMU_EVENT_ATTR_STRING(c6-residency, evattr_cstate_core_c6, "event=0x02");
+PMU_EVENT_ATTR_STRING(c7-residency, evattr_cstate_core_c7, "event=0x03");
+
+static struct perf_cstate_msr core_msr[] = {
+ [PERF_CSTATE_CORE_C1_RES] = { MSR_CORE_C1_RES, &evattr_cstate_core_c1, test_core, },
+ [PERF_CSTATE_CORE_C3_RES] = { MSR_CORE_C3_RESIDENCY, &evattr_cstate_core_c3, test_core, },
+ [PERF_CSTATE_CORE_C6_RES] = { MSR_CORE_C6_RESIDENCY, &evattr_cstate_core_c6, test_core, },
+ [PERF_CSTATE_CORE_C7_RES] = { MSR_CORE_C7_RESIDENCY, &evattr_cstate_core_c7, test_core, },
+};
+
+static struct attribute *core_events_attrs[PERF_CSTATE_CORE_EVENT_MAX + 1] = {
+ NULL,
+};
+
+static struct attribute_group core_events_attr_group = {
+ .name = "events",
+ .attrs = core_events_attrs,
+};
+
+DEFINE_CSTATE_FORMAT_ATTR(core_event, event, "config:0-63");
+static struct attribute *core_format_attrs[] = {
+ &format_attr_core_event.attr,
+ NULL,
+};
+
+static struct attribute_group core_format_attr_group = {
+ .name = "format",
+ .attrs = core_format_attrs,
+};
+
+static cpumask_t cstate_core_cpu_mask;
+static DEVICE_ATTR(cpumask, S_IRUGO, cstate_get_attr_cpumask, NULL);
+
+static struct attribute *cstate_cpumask_attrs[] = {
+ &dev_attr_cpumask.attr,
+ NULL,
+};
+
+static struct attribute_group cpumask_attr_group = {
+ .attrs = cstate_cpumask_attrs,
+};
+
+static const struct attribute_group *core_attr_groups[] = {
+ &core_events_attr_group,
+ &core_format_attr_group,
+ &cpumask_attr_group,
+ NULL,
+};
+
+/* cstate_core PMU end */
+
+
+/* cstate_pkg PMU */
+
+static struct pmu cstate_pkg_pmu;
+static bool has_cstate_pkg;
+
+enum perf_cstate_pkg_id {
+ /*
+ * cstate_pkg events
+ */
+ PERF_CSTATE_PKG_C2_RES = 0,
+ PERF_CSTATE_PKG_C3_RES,
+ PERF_CSTATE_PKG_C6_RES,
+ PERF_CSTATE_PKG_C7_RES,
+ PERF_CSTATE_PKG_C8_RES,
+ PERF_CSTATE_PKG_C9_RES,
+ PERF_CSTATE_PKG_C10_RES,
+
+ PERF_CSTATE_PKG_EVENT_MAX,
+};
+
+bool test_pkg(int idx)
+{
+ if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL ||
+ boot_cpu_data.x86 != 6)
+ return false;
+
+ switch (boot_cpu_data.x86_model) {
+ case 30: /* 45nm Nehalem */
+ case 26: /* 45nm Nehalem-EP */
+ case 46: /* 45nm Nehalem-EX */
+
+ case 37: /* 32nm Westmere */
+ case 44: /* 32nm Westmere-EP */
+ case 47: /* 32nm Westmere-EX */
+ if (idx == PERF_CSTATE_CORE_C3_RES ||
+ idx == PERF_CSTATE_CORE_C6_RES ||
+ idx == PERF_CSTATE_CORE_C7_RES)
+ return true;
+ break;
+ case 42: /* 32nm SandyBridge */
+ case 45: /* 32nm SandyBridge-E/EN/EP */
+
+ case 58: /* 22nm IvyBridge */
+ case 62: /* 22nm IvyBridge-EP/EX */
+
+ case 60: /* 22nm Haswell Core */
+ case 63: /* 22nm Haswell Server */
+ case 70: /* 22nm Haswell + GT3e (Intel Iris Pro graphics) */
+
+ case 61: /* 14nm Broadwell Core-M */
+ case 86: /* 14nm Broadwell Xeon D */
+ case 71: /* 14nm Broadwell + GT3e (Intel Iris Pro graphics) */
+ case 79: /* 14nm Broadwell Server */
+
+ case 78: /* 14nm Skylake Mobile */
+ case 94: /* 14nm Skylake Desktop */
+ if (idx == PERF_CSTATE_PKG_C2_RES ||
+ idx == PERF_CSTATE_PKG_C3_RES ||
+ idx == PERF_CSTATE_PKG_C6_RES ||
+ idx == PERF_CSTATE_PKG_C7_RES)
+ return true;
+ break;
+ case 55: /* 22nm Atom "Silvermont" */
+ case 77: /* 22nm Atom "Silvermont Avoton/Rangely" */
+ case 76: /* 14nm Atom "Airmont" */
+ if (idx == PERF_CSTATE_CORE_C6_RES)
+ return true;
+ break;
+ case 69: /* 22nm Haswell ULT */
+ if (idx == PERF_CSTATE_PKG_C2_RES ||
+ idx == PERF_CSTATE_PKG_C3_RES ||
+ idx == PERF_CSTATE_PKG_C6_RES ||
+ idx == PERF_CSTATE_PKG_C7_RES ||
+ idx == PERF_CSTATE_PKG_C8_RES ||
+ idx == PERF_CSTATE_PKG_C9_RES ||
+ idx == PERF_CSTATE_PKG_C10_RES)
+ return true;
+ break;
+ }
+
+ return false;
+}
+
+PMU_EVENT_ATTR_STRING(c2-residency, evattr_cstate_pkg_c2, "event=0x00");
+PMU_EVENT_ATTR_STRING(c3-residency, evattr_cstate_pkg_c3, "event=0x01");
+PMU_EVENT_ATTR_STRING(c6-residency, evattr_cstate_pkg_c6, "event=0x02");
+PMU_EVENT_ATTR_STRING(c7-residency, evattr_cstate_pkg_c7, "event=0x03");
+PMU_EVENT_ATTR_STRING(c8-residency, evattr_cstate_pkg_c8, "event=0x04");
+PMU_EVENT_ATTR_STRING(c9-residency, evattr_cstate_pkg_c9, "event=0x05");
+PMU_EVENT_ATTR_STRING(c10-residency, evattr_cstate_pkg_c10, "event=0x06");
+
+static struct perf_cstate_msr pkg_msr[] = {
+ [PERF_CSTATE_PKG_C2_RES] = { MSR_PKG_C2_RESIDENCY, &evattr_cstate_pkg_c2, test_pkg, },
+ [PERF_CSTATE_PKG_C3_RES] = { MSR_PKG_C3_RESIDENCY, &evattr_cstate_pkg_c3, test_pkg, },
+ [PERF_CSTATE_PKG_C6_RES] = { MSR_PKG_C6_RESIDENCY, &evattr_cstate_pkg_c6, test_pkg, },
+ [PERF_CSTATE_PKG_C7_RES] = { MSR_PKG_C7_RESIDENCY, &evattr_cstate_pkg_c7, test_pkg, },
+ [PERF_CSTATE_PKG_C8_RES] = { MSR_PKG_C8_RESIDENCY, &evattr_cstate_pkg_c8, test_pkg, },
+ [PERF_CSTATE_PKG_C9_RES] = { MSR_PKG_C9_RESIDENCY, &evattr_cstate_pkg_c9, test_pkg, },
+ [PERF_CSTATE_PKG_C10_RES] = { MSR_PKG_C10_RESIDENCY, &evattr_cstate_pkg_c10, test_pkg, },
+};
+
+static struct attribute *pkg_events_attrs[PERF_CSTATE_PKG_EVENT_MAX + 1] = {
+ NULL,
+};
+
+static struct attribute_group pkg_events_attr_group = {
+ .name = "events",
+ .attrs = pkg_events_attrs,
+};
+
+DEFINE_CSTATE_FORMAT_ATTR(pkg_event, event, "config:0-63");
+static struct attribute *pkg_format_attrs[] = {
+ &format_attr_pkg_event.attr,
+ NULL,
+};
+static struct attribute_group pkg_format_attr_group = {
+ .name = "format",
+ .attrs = pkg_format_attrs,
+};
+
+static cpumask_t cstate_pkg_cpu_mask;
+
+static const struct attribute_group *pkg_attr_groups[] = {
+ &pkg_events_attr_group,
+ &pkg_format_attr_group,
+ &cpumask_attr_group,
+ NULL,
+};
+
+/* cstate_pkg PMU end*/
+
+static ssize_t cstate_get_attr_cpumask(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ struct pmu *pmu = dev_get_drvdata(dev);
+
+ if (pmu == &cstate_core_pmu)
+ return cpumap_print_to_pagebuf(true, buf, &cstate_core_cpu_mask);
+ else if (pmu == &cstate_pkg_pmu)
+ return cpumap_print_to_pagebuf(true, buf, &cstate_pkg_cpu_mask);
+ else
+ return 0;
+}
+
+static int cstate_pmu_event_init(struct perf_event *event)
+{
+ u64 cfg = event->attr.config;
+ int ret = 0;
+
+ if (event->attr.type != event->pmu->type)
+ return -ENOENT;
+
+ /* unsupported modes and filters */
+ if (event->attr.exclude_user ||
+ event->attr.exclude_kernel ||
+ event->attr.exclude_hv ||
+ event->attr.exclude_idle ||
+ event->attr.exclude_host ||
+ event->attr.exclude_guest ||
+ event->attr.sample_period) /* no sampling */
+ return -EINVAL;
+
+ if (event->pmu == &cstate_core_pmu) {
+ if (cfg >= PERF_CSTATE_CORE_EVENT_MAX)
+ return -EINVAL;
+ if (!core_msr[cfg].attr)
+ return -EINVAL;
+ event->hw.event_base = core_msr[cfg].msr;
+ } else if (event->pmu == &cstate_pkg_pmu) {
+ if (cfg >= PERF_CSTATE_PKG_EVENT_MAX)
+ return -EINVAL;
+ if (!pkg_msr[cfg].attr)
+ return -EINVAL;
+ event->hw.event_base = pkg_msr[cfg].msr;
+ } else
+ return -ENOENT;
+
+ /* must be done before validate_group */
+ event->hw.config = cfg;
+ event->hw.idx = -1;
+
+ return ret;
+}
+
+static inline u64 cstate_pmu_read_counter(struct perf_event *event)
+{
+ u64 val;
+
+ rdmsrl(event->hw.event_base, val);
+ return val;
+}
+
+static void cstate_pmu_event_update(struct perf_event *event)
+{
+ struct hw_perf_event *hwc = &event->hw;
+ u64 prev_raw_count, new_raw_count;
+
+again:
+ prev_raw_count = local64_read(&hwc->prev_count);
+ new_raw_count = cstate_pmu_read_counter(event);
+
+ if (local64_cmpxchg(&hwc->prev_count, prev_raw_count,
+ new_raw_count) != prev_raw_count)
+ goto again;
+
+ local64_add(new_raw_count - prev_raw_count, &event->count);
+}
+
+static void cstate_pmu_event_start(struct perf_event *event, int mode)
+{
+ local64_set(&event->hw.prev_count, cstate_pmu_read_counter(event));
+}
+
+static void cstate_pmu_event_stop(struct perf_event *event, int mode)
+{
+ cstate_pmu_event_update(event);
+}
+
+static void cstate_pmu_event_del(struct perf_event *event, int mode)
+{
+ cstate_pmu_event_stop(event, PERF_EF_UPDATE);
+}
+
+static int cstate_pmu_event_add(struct perf_event *event, int mode)
+{
+ if (mode & PERF_EF_START)
+ cstate_pmu_event_start(event, mode);
+
+ return 0;
+}
+
+static void cstate_cpu_exit(int cpu)
+{
+ int i, id, target;
+
+ /* cpu exit for cstate core */
+ if (has_cstate_core) {
+ id = topology_core_id(cpu);
+ target = -1;
+
+ for_each_online_cpu(i) {
+ if (i == cpu)
+ continue;
+ if (id == topology_core_id(i)) {
+ target = i;
+ break;
+ }
+ }
+ if (cpumask_test_and_clear_cpu(cpu, &cstate_core_cpu_mask) && target >= 0)
+ cpumask_set_cpu(target, &cstate_core_cpu_mask);
+ WARN_ON(cpumask_empty(&cstate_core_cpu_mask));
+ if (target >= 0)
+ perf_pmu_migrate_context(&cstate_core_pmu, cpu, target);
+ }
+
+ /* cpu exit for cstate pkg */
+ if (has_cstate_pkg) {
+ id = topology_physical_package_id(cpu);
+ target = -1;
+
+ for_each_online_cpu(i) {
+ if (i == cpu)
+ continue;
+ if (id == topology_physical_package_id(i)) {
+ target = i;
+ break;
+ }
+ }
+ if (cpumask_test_and_clear_cpu(cpu, &cstate_pkg_cpu_mask) && target >= 0)
+ cpumask_set_cpu(target, &cstate_pkg_cpu_mask);
+ WARN_ON(cpumask_empty(&cstate_pkg_cpu_mask));
+ if (target >= 0)
+ perf_pmu_migrate_context(&cstate_pkg_pmu, cpu, target);
+ }
+}
+
+static void cstate_cpu_init(int cpu)
+{
+ int i, id;
+
+ /* cpu init for cstate core */
+ if (has_cstate_core) {
+ id = topology_core_id(cpu);
+ for_each_cpu(i, &cstate_core_cpu_mask) {
+ if (id == topology_core_id(i))
+ break;
+ }
+ if (i >= nr_cpu_ids)
+ cpumask_set_cpu(cpu, &cstate_core_cpu_mask);
+ }
+
+ /* cpu init for cstate pkg */
+ if (has_cstate_pkg) {
+ id = topology_physical_package_id(cpu);
+ for_each_cpu(i, &cstate_pkg_cpu_mask) {
+ if (id == topology_physical_package_id(i))
+ break;
+ }
+ if (i >= nr_cpu_ids)
+ cpumask_set_cpu(cpu, &cstate_pkg_cpu_mask);
+ }
+}
+
+static int cstate_cpu_notifier(struct notifier_block *self,
+ unsigned long action, void *hcpu)
+{
+ unsigned int cpu = (long)hcpu;
+
+ switch (action & ~CPU_TASKS_FROZEN) {
+ case CPU_UP_PREPARE:
+ break;
+ case CPU_STARTING:
+ cstate_cpu_init(cpu);
+ break;
+ case CPU_UP_CANCELED:
+ case CPU_DYING:
+ break;
+ case CPU_ONLINE:
+ case CPU_DEAD:
+ break;
+ case CPU_DOWN_PREPARE:
+ cstate_cpu_exit(cpu);
+ break;
+ default:
+ break;
+ }
+
+ return NOTIFY_OK;
+}
+
+/*
+ * Probe the cstate events and insert the available one into sysfs attrs
+ * Return false if there is no available events.
+ */
+static bool cstate_probe_msr(struct perf_cstate_msr *msr,
+ struct attribute **events_attrs,
+ int max_event_nr)
+{
+ int i, j = 0;
+ u64 val;
+
+ /* Probe the cstate events. */
+ for (i = 0; i < max_event_nr; i++) {
+ if (!msr[i].test(i) || rdmsrl_safe(msr[i].msr, &val))
+ msr[i].attr = NULL;
+ }
+
+ /* List remaining events in the sysfs attrs. */
+ for (i = 0; i < max_event_nr; i++) {
+ if (msr[i].attr)
+ events_attrs[j++] = &msr[i].attr->attr.attr;
+ }
+ events_attrs[j] = NULL;
+
+ return (j > 0) ? true : false;
+}
+
+static int __init cstate_init(void)
+{
+ /* SLM has different MSR for PKG C6 */
+ switch (boot_cpu_data.x86_model) {
+ case 55:
+ case 76:
+ case 77:
+ pkg_msr[PERF_CSTATE_PKG_C6_RES].msr = MSR_PKG_C7_RESIDENCY;
+ }
+
+ if (cstate_probe_msr(core_msr, core_events_attrs, PERF_CSTATE_CORE_EVENT_MAX))
+ has_cstate_core = true;
+
+ if (cstate_probe_msr(pkg_msr, pkg_events_attrs, PERF_CSTATE_PKG_EVENT_MAX))
+ has_cstate_pkg = true;
+
+ return (has_cstate_core || has_cstate_pkg) ? 0 : -ENODEV;
+}
+
+static void __init cstate_cpumask_init(void)
+{
+ int cpu;
+
+ cpu_notifier_register_begin();
+
+ for_each_online_cpu(cpu)
+ cstate_cpu_init(cpu);
+
+ __perf_cpu_notifier(cstate_cpu_notifier);
+
+ cpu_notifier_register_done();
+}
+
+static struct pmu cstate_core_pmu = {
+ .attr_groups = core_attr_groups,
+ .name = "cstate_core",
+ .task_ctx_nr = perf_invalid_context,
+ .event_init = cstate_pmu_event_init,
+ .add = cstate_pmu_event_add, /* must have */
+ .del = cstate_pmu_event_del, /* must have */
+ .start = cstate_pmu_event_start,
+ .stop = cstate_pmu_event_stop,
+ .read = cstate_pmu_event_update,
+ .capabilities = PERF_PMU_CAP_NO_INTERRUPT,
+};
+
+static struct pmu cstate_pkg_pmu = {
+ .attr_groups = pkg_attr_groups,
+ .name = "cstate_pkg",
+ .task_ctx_nr = perf_invalid_context,
+ .event_init = cstate_pmu_event_init,
+ .add = cstate_pmu_event_add, /* must have */
+ .del = cstate_pmu_event_del, /* must have */
+ .start = cstate_pmu_event_start,
+ .stop = cstate_pmu_event_stop,
+ .read = cstate_pmu_event_update,
+ .capabilities = PERF_PMU_CAP_NO_INTERRUPT,
+};
+
+static void __init cstate_pmus_register(void)
+{
+ int err;
+
+ if (has_cstate_core) {
+ err = perf_pmu_register(&cstate_core_pmu, cstate_core_pmu.name, -1);
+ if (WARN_ON(err))
+ pr_info("Failed to register PMU %s error %d\n",
+ cstate_core_pmu.name, err);
+ }
+
+ if (has_cstate_pkg) {
+ err = perf_pmu_register(&cstate_pkg_pmu, cstate_pkg_pmu.name, -1);
+ if (WARN_ON(err))
+ pr_info("Failed to register PMU %s error %d\n",
+ cstate_pkg_pmu.name, err);
+ }
+}
+
+static int __init cstate_pmu_init(void)
+{
+ int err;
+
+ if (cpu_has_hypervisor)
+ return -ENODEV;
+
+ err = cstate_init();
+ if (err)
+ return err;
+
+ cstate_cpumask_init();
+
+ cstate_pmus_register();
+
+ return 0;
+}
+
+device_initcall(cstate_pmu_init);
diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c
new file mode 100644
index 000000000..8584b90d8
--- /dev/null
+++ b/arch/x86/events/intel/ds.c
@@ -0,0 +1,1413 @@
+#include <linux/bitops.h>
+#include <linux/types.h>
+#include <linux/slab.h>
+
+#include <asm/perf_event.h>
+#include <asm/insn.h>
+
+#include "../perf_event.h"
+
+/* The size of a BTS record in bytes: */
+#define BTS_RECORD_SIZE 24
+
+#define BTS_BUFFER_SIZE (PAGE_SIZE << 4)
+#define PEBS_BUFFER_SIZE (PAGE_SIZE << 4)
+#define PEBS_FIXUP_SIZE PAGE_SIZE
+
+/*
+ * pebs_record_32 for p4 and core not supported
+
+struct pebs_record_32 {
+ u32 flags, ip;
+ u32 ax, bc, cx, dx;
+ u32 si, di, bp, sp;
+};
+
+ */
+
+union intel_x86_pebs_dse {
+ u64 val;
+ struct {
+ unsigned int ld_dse:4;
+ unsigned int ld_stlb_miss:1;
+ unsigned int ld_locked:1;
+ unsigned int ld_reserved:26;
+ };
+ struct {
+ unsigned int st_l1d_hit:1;
+ unsigned int st_reserved1:3;
+ unsigned int st_stlb_miss:1;
+ unsigned int st_locked:1;
+ unsigned int st_reserved2:26;
+ };
+};
+
+
+/*
+ * Map PEBS Load Latency Data Source encodings to generic
+ * memory data source information
+ */
+#define P(a, b) PERF_MEM_S(a, b)
+#define OP_LH (P(OP, LOAD) | P(LVL, HIT))
+#define SNOOP_NONE_MISS (P(SNOOP, NONE) | P(SNOOP, MISS))
+
+/* Version for Sandy Bridge and later */
+static u64 pebs_data_source[] = {
+ P(OP, LOAD) | P(LVL, MISS) | P(LVL, L3) | P(SNOOP, NA),/* 0x00:ukn L3 */
+ OP_LH | P(LVL, L1) | P(SNOOP, NONE), /* 0x01: L1 local */
+ OP_LH | P(LVL, LFB) | P(SNOOP, NONE), /* 0x02: LFB hit */
+ OP_LH | P(LVL, L2) | P(SNOOP, NONE), /* 0x03: L2 hit */
+ OP_LH | P(LVL, L3) | P(SNOOP, NONE), /* 0x04: L3 hit */
+ OP_LH | P(LVL, L3) | P(SNOOP, MISS), /* 0x05: L3 hit, snoop miss */
+ OP_LH | P(LVL, L3) | P(SNOOP, HIT), /* 0x06: L3 hit, snoop hit */
+ OP_LH | P(LVL, L3) | P(SNOOP, HITM), /* 0x07: L3 hit, snoop hitm */
+ OP_LH | P(LVL, REM_CCE1) | P(SNOOP, HIT), /* 0x08: L3 miss snoop hit */
+ OP_LH | P(LVL, REM_CCE1) | P(SNOOP, HITM), /* 0x09: L3 miss snoop hitm*/
+ OP_LH | P(LVL, LOC_RAM) | P(SNOOP, HIT), /* 0x0a: L3 miss, shared */
+ OP_LH | P(LVL, REM_RAM1) | P(SNOOP, HIT), /* 0x0b: L3 miss, shared */
+ OP_LH | P(LVL, LOC_RAM) | SNOOP_NONE_MISS,/* 0x0c: L3 miss, excl */
+ OP_LH | P(LVL, REM_RAM1) | SNOOP_NONE_MISS,/* 0x0d: L3 miss, excl */
+ OP_LH | P(LVL, IO) | P(SNOOP, NONE), /* 0x0e: I/O */
+ OP_LH | P(LVL, UNC) | P(SNOOP, NONE), /* 0x0f: uncached */
+};
+
+/* Patch up minor differences in the bits */
+void __init intel_pmu_pebs_data_source_nhm(void)
+{
+ pebs_data_source[0x05] = OP_LH | P(LVL, L3) | P(SNOOP, HIT);
+ pebs_data_source[0x06] = OP_LH | P(LVL, L3) | P(SNOOP, HITM);
+ pebs_data_source[0x07] = OP_LH | P(LVL, L3) | P(SNOOP, HITM);
+}
+
+static u64 precise_store_data(u64 status)
+{
+ union intel_x86_pebs_dse dse;
+ u64 val = P(OP, STORE) | P(SNOOP, NA) | P(LVL, L1) | P(TLB, L2);
+
+ dse.val = status;
+
+ /*
+ * bit 4: TLB access
+ * 1 = stored missed 2nd level TLB
+ *
+ * so it either hit the walker or the OS
+ * otherwise hit 2nd level TLB
+ */
+ if (dse.st_stlb_miss)
+ val |= P(TLB, MISS);
+ else
+ val |= P(TLB, HIT);
+
+ /*
+ * bit 0: hit L1 data cache
+ * if not set, then all we know is that
+ * it missed L1D
+ */
+ if (dse.st_l1d_hit)
+ val |= P(LVL, HIT);
+ else
+ val |= P(LVL, MISS);
+
+ /*
+ * bit 5: Locked prefix
+ */
+ if (dse.st_locked)
+ val |= P(LOCK, LOCKED);
+
+ return val;
+}
+
+static u64 precise_datala_hsw(struct perf_event *event, u64 status)
+{
+ union perf_mem_data_src dse;
+
+ dse.val = PERF_MEM_NA;
+
+ if (event->hw.flags & PERF_X86_EVENT_PEBS_ST_HSW)
+ dse.mem_op = PERF_MEM_OP_STORE;
+ else if (event->hw.flags & PERF_X86_EVENT_PEBS_LD_HSW)
+ dse.mem_op = PERF_MEM_OP_LOAD;
+
+ /*
+ * L1 info only valid for following events:
+ *
+ * MEM_UOPS_RETIRED.STLB_MISS_STORES
+ * MEM_UOPS_RETIRED.LOCK_STORES
+ * MEM_UOPS_RETIRED.SPLIT_STORES
+ * MEM_UOPS_RETIRED.ALL_STORES
+ */
+ if (event->hw.flags & PERF_X86_EVENT_PEBS_ST_HSW) {
+ if (status & 1)
+ dse.mem_lvl = PERF_MEM_LVL_L1 | PERF_MEM_LVL_HIT;
+ else
+ dse.mem_lvl = PERF_MEM_LVL_L1 | PERF_MEM_LVL_MISS;
+ }
+ return dse.val;
+}
+
+static u64 load_latency_data(u64 status)
+{
+ union intel_x86_pebs_dse dse;
+ u64 val;
+ int model = boot_cpu_data.x86_model;
+ int fam = boot_cpu_data.x86;
+
+ dse.val = status;
+
+ /*
+ * use the mapping table for bit 0-3
+ */
+ val = pebs_data_source[dse.ld_dse];
+
+ /*
+ * Nehalem models do not support TLB, Lock infos
+ */
+ if (fam == 0x6 && (model == 26 || model == 30
+ || model == 31 || model == 46)) {
+ val |= P(TLB, NA) | P(LOCK, NA);
+ return val;
+ }
+ /*
+ * bit 4: TLB access
+ * 0 = did not miss 2nd level TLB
+ * 1 = missed 2nd level TLB
+ */
+ if (dse.ld_stlb_miss)
+ val |= P(TLB, MISS) | P(TLB, L2);
+ else
+ val |= P(TLB, HIT) | P(TLB, L1) | P(TLB, L2);
+
+ /*
+ * bit 5: locked prefix
+ */
+ if (dse.ld_locked)
+ val |= P(LOCK, LOCKED);
+
+ return val;
+}
+
+struct pebs_record_core {
+ u64 flags, ip;
+ u64 ax, bx, cx, dx;
+ u64 si, di, bp, sp;
+ u64 r8, r9, r10, r11;
+ u64 r12, r13, r14, r15;
+};
+
+struct pebs_record_nhm {
+ u64 flags, ip;
+ u64 ax, bx, cx, dx;
+ u64 si, di, bp, sp;
+ u64 r8, r9, r10, r11;
+ u64 r12, r13, r14, r15;
+ u64 status, dla, dse, lat;
+};
+
+/*
+ * Same as pebs_record_nhm, with two additional fields.
+ */
+struct pebs_record_hsw {
+ u64 flags, ip;
+ u64 ax, bx, cx, dx;
+ u64 si, di, bp, sp;
+ u64 r8, r9, r10, r11;
+ u64 r12, r13, r14, r15;
+ u64 status, dla, dse, lat;
+ u64 real_ip, tsx_tuning;
+};
+
+union hsw_tsx_tuning {
+ struct {
+ u32 cycles_last_block : 32,
+ hle_abort : 1,
+ rtm_abort : 1,
+ instruction_abort : 1,
+ non_instruction_abort : 1,
+ retry : 1,
+ data_conflict : 1,
+ capacity_writes : 1,
+ capacity_reads : 1;
+ };
+ u64 value;
+};
+
+#define PEBS_HSW_TSX_FLAGS 0xff00000000ULL
+
+/* Same as HSW, plus TSC */
+
+struct pebs_record_skl {
+ u64 flags, ip;
+ u64 ax, bx, cx, dx;
+ u64 si, di, bp, sp;
+ u64 r8, r9, r10, r11;
+ u64 r12, r13, r14, r15;
+ u64 status, dla, dse, lat;
+ u64 real_ip, tsx_tuning;
+ u64 tsc;
+};
+
+void init_debug_store_on_cpu(int cpu)
+{
+ struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds;
+
+ if (!ds)
+ return;
+
+ wrmsr_on_cpu(cpu, MSR_IA32_DS_AREA,
+ (u32)((u64)(unsigned long)ds),
+ (u32)((u64)(unsigned long)ds >> 32));
+}
+
+void fini_debug_store_on_cpu(int cpu)
+{
+ if (!per_cpu(cpu_hw_events, cpu).ds)
+ return;
+
+ wrmsr_on_cpu(cpu, MSR_IA32_DS_AREA, 0, 0);
+}
+
+static DEFINE_PER_CPU(void *, insn_buffer);
+
+static int alloc_pebs_buffer(int cpu)
+{
+ struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds;
+ int node = cpu_to_node(cpu);
+ int max;
+ void *buffer, *ibuffer;
+
+ if (!x86_pmu.pebs)
+ return 0;
+
+ buffer = kzalloc_node(x86_pmu.pebs_buffer_size, GFP_KERNEL, node);
+ if (unlikely(!buffer))
+ return -ENOMEM;
+
+ /*
+ * HSW+ already provides us the eventing ip; no need to allocate this
+ * buffer then.
+ */
+ if (x86_pmu.intel_cap.pebs_format < 2) {
+ ibuffer = kzalloc_node(PEBS_FIXUP_SIZE, GFP_KERNEL, node);
+ if (!ibuffer) {
+ kfree(buffer);
+ return -ENOMEM;
+ }
+ per_cpu(insn_buffer, cpu) = ibuffer;
+ }
+
+ max = x86_pmu.pebs_buffer_size / x86_pmu.pebs_record_size;
+
+ ds->pebs_buffer_base = (u64)(unsigned long)buffer;
+ ds->pebs_index = ds->pebs_buffer_base;
+ ds->pebs_absolute_maximum = ds->pebs_buffer_base +
+ max * x86_pmu.pebs_record_size;
+
+ return 0;
+}
+
+static void release_pebs_buffer(int cpu)
+{
+ struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds;
+
+ if (!ds || !x86_pmu.pebs)
+ return;
+
+ kfree(per_cpu(insn_buffer, cpu));
+ per_cpu(insn_buffer, cpu) = NULL;
+
+ kfree((void *)(unsigned long)ds->pebs_buffer_base);
+ ds->pebs_buffer_base = 0;
+}
+
+static int alloc_bts_buffer(int cpu)
+{
+ struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds;
+ int node = cpu_to_node(cpu);
+ int max, thresh;
+ void *buffer;
+
+ if (!x86_pmu.bts)
+ return 0;
+
+ buffer = kzalloc_node(BTS_BUFFER_SIZE, GFP_KERNEL | __GFP_NOWARN, node);
+ if (unlikely(!buffer)) {
+ WARN_ONCE(1, "%s: BTS buffer allocation failure\n", __func__);
+ return -ENOMEM;
+ }
+
+ max = BTS_BUFFER_SIZE / BTS_RECORD_SIZE;
+ thresh = max / 16;
+
+ ds->bts_buffer_base = (u64)(unsigned long)buffer;
+ ds->bts_index = ds->bts_buffer_base;
+ ds->bts_absolute_maximum = ds->bts_buffer_base +
+ max * BTS_RECORD_SIZE;
+ ds->bts_interrupt_threshold = ds->bts_absolute_maximum -
+ thresh * BTS_RECORD_SIZE;
+
+ return 0;
+}
+
+static void release_bts_buffer(int cpu)
+{
+ struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds;
+
+ if (!ds || !x86_pmu.bts)
+ return;
+
+ kfree((void *)(unsigned long)ds->bts_buffer_base);
+ ds->bts_buffer_base = 0;
+}
+
+static int alloc_ds_buffer(int cpu)
+{
+ int node = cpu_to_node(cpu);
+ struct debug_store *ds;
+
+ ds = kzalloc_node(sizeof(*ds), GFP_KERNEL, node);
+ if (unlikely(!ds))
+ return -ENOMEM;
+
+ per_cpu(cpu_hw_events, cpu).ds = ds;
+
+ return 0;
+}
+
+static void release_ds_buffer(int cpu)
+{
+ struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds;
+
+ if (!ds)
+ return;
+
+ per_cpu(cpu_hw_events, cpu).ds = NULL;
+ kfree(ds);
+}
+
+void release_ds_buffers(void)
+{
+ int cpu;
+
+ if (!x86_pmu.bts && !x86_pmu.pebs)
+ return;
+
+ get_online_cpus();
+ for_each_online_cpu(cpu)
+ fini_debug_store_on_cpu(cpu);
+
+ for_each_possible_cpu(cpu) {
+ release_pebs_buffer(cpu);
+ release_bts_buffer(cpu);
+ release_ds_buffer(cpu);
+ }
+ put_online_cpus();
+}
+
+void reserve_ds_buffers(void)
+{
+ int bts_err = 0, pebs_err = 0;
+ int cpu;
+
+ x86_pmu.bts_active = 0;
+ x86_pmu.pebs_active = 0;
+
+ if (!x86_pmu.bts && !x86_pmu.pebs)
+ return;
+
+ if (!x86_pmu.bts)
+ bts_err = 1;
+
+ if (!x86_pmu.pebs)
+ pebs_err = 1;
+
+ get_online_cpus();
+
+ for_each_possible_cpu(cpu) {
+ if (alloc_ds_buffer(cpu)) {
+ bts_err = 1;
+ pebs_err = 1;
+ }
+
+ if (!bts_err && alloc_bts_buffer(cpu))
+ bts_err = 1;
+
+ if (!pebs_err && alloc_pebs_buffer(cpu))
+ pebs_err = 1;
+
+ if (bts_err && pebs_err)
+ break;
+ }
+
+ if (bts_err) {
+ for_each_possible_cpu(cpu)
+ release_bts_buffer(cpu);
+ }
+
+ if (pebs_err) {
+ for_each_possible_cpu(cpu)
+ release_pebs_buffer(cpu);
+ }
+
+ if (bts_err && pebs_err) {
+ for_each_possible_cpu(cpu)
+ release_ds_buffer(cpu);
+ } else {
+ if (x86_pmu.bts && !bts_err)
+ x86_pmu.bts_active = 1;
+
+ if (x86_pmu.pebs && !pebs_err)
+ x86_pmu.pebs_active = 1;
+
+ for_each_online_cpu(cpu)
+ init_debug_store_on_cpu(cpu);
+ }
+
+ put_online_cpus();
+}
+
+/*
+ * BTS
+ */
+
+struct event_constraint bts_constraint =
+ EVENT_CONSTRAINT(0, 1ULL << INTEL_PMC_IDX_FIXED_BTS, 0);
+
+void intel_pmu_enable_bts(u64 config)
+{
+ unsigned long debugctlmsr;
+
+ debugctlmsr = get_debugctlmsr();
+
+ debugctlmsr |= DEBUGCTLMSR_TR;
+ debugctlmsr |= DEBUGCTLMSR_BTS;
+ if (config & ARCH_PERFMON_EVENTSEL_INT)
+ debugctlmsr |= DEBUGCTLMSR_BTINT;
+
+ if (!(config & ARCH_PERFMON_EVENTSEL_OS))
+ debugctlmsr |= DEBUGCTLMSR_BTS_OFF_OS;
+
+ if (!(config & ARCH_PERFMON_EVENTSEL_USR))
+ debugctlmsr |= DEBUGCTLMSR_BTS_OFF_USR;
+
+ update_debugctlmsr(debugctlmsr);
+}
+
+void intel_pmu_disable_bts(void)
+{
+ struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
+ unsigned long debugctlmsr;
+
+ if (!cpuc->ds)
+ return;
+
+ debugctlmsr = get_debugctlmsr();
+
+ debugctlmsr &=
+ ~(DEBUGCTLMSR_TR | DEBUGCTLMSR_BTS | DEBUGCTLMSR_BTINT |
+ DEBUGCTLMSR_BTS_OFF_OS | DEBUGCTLMSR_BTS_OFF_USR);
+
+ update_debugctlmsr(debugctlmsr);
+}
+
+int intel_pmu_drain_bts_buffer(void)
+{
+ struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
+ struct debug_store *ds = cpuc->ds;
+ struct bts_record {
+ u64 from;
+ u64 to;
+ u64 flags;
+ };
+ struct perf_event *event = cpuc->events[INTEL_PMC_IDX_FIXED_BTS];
+ struct bts_record *at, *base, *top;
+ struct perf_output_handle handle;
+ struct perf_event_header header;
+ struct perf_sample_data data;
+ unsigned long skip = 0;
+ struct pt_regs regs;
+
+ if (!event)
+ return 0;
+
+ if (!x86_pmu.bts_active)
+ return 0;
+
+ base = (struct bts_record *)(unsigned long)ds->bts_buffer_base;
+ top = (struct bts_record *)(unsigned long)ds->bts_index;
+
+ if (top <= base)
+ return 0;
+
+ memset(&regs, 0, sizeof(regs));
+
+ ds->bts_index = ds->bts_buffer_base;
+
+ perf_sample_data_init(&data, 0, event->hw.last_period);
+
+ /*
+ * BTS leaks kernel addresses in branches across the cpl boundary,
+ * such as traps or system calls, so unless the user is asking for
+ * kernel tracing (and right now it's not possible), we'd need to
+ * filter them out. But first we need to count how many of those we
+ * have in the current batch. This is an extra O(n) pass, however,
+ * it's much faster than the other one especially considering that
+ * n <= 2560 (BTS_BUFFER_SIZE / BTS_RECORD_SIZE * 15/16; see the
+ * alloc_bts_buffer()).
+ */
+ for (at = base; at < top; at++) {
+ /*
+ * Note that right now *this* BTS code only works if
+ * attr::exclude_kernel is set, but let's keep this extra
+ * check here in case that changes.
+ */
+ if (event->attr.exclude_kernel &&
+ (kernel_ip(at->from) || kernel_ip(at->to)))
+ skip++;
+ }
+
+ /*
+ * Prepare a generic sample, i.e. fill in the invariant fields.
+ * We will overwrite the from and to address before we output
+ * the sample.
+ */
+ rcu_read_lock();
+ perf_prepare_sample(&header, &data, event, &regs);
+
+ if (perf_output_begin(&handle, event, header.size *
+ (top - base - skip)))
+ goto unlock;
+
+ for (at = base; at < top; at++) {
+ /* Filter out any records that contain kernel addresses. */
+ if (event->attr.exclude_kernel &&
+ (kernel_ip(at->from) || kernel_ip(at->to)))
+ continue;
+
+ data.ip = at->from;
+ data.addr = at->to;
+
+ perf_output_sample(&handle, &header, &data, event);
+ }
+
+ perf_output_end(&handle);
+
+ /* There's new data available. */
+ event->hw.interrupts++;
+ event->pending_kill = POLL_IN;
+unlock:
+ rcu_read_unlock();
+ return 1;
+}
+
+static inline void intel_pmu_drain_pebs_buffer(void)
+{
+ struct pt_regs regs;
+
+ x86_pmu.drain_pebs(&regs);
+}
+
+void intel_pmu_pebs_sched_task(struct perf_event_context *ctx, bool sched_in)
+{
+ if (!sched_in)
+ intel_pmu_drain_pebs_buffer();
+}
+
+/*
+ * PEBS
+ */
+struct event_constraint intel_core2_pebs_event_constraints[] = {
+ INTEL_FLAGS_UEVENT_CONSTRAINT(0x00c0, 0x1), /* INST_RETIRED.ANY */
+ INTEL_FLAGS_UEVENT_CONSTRAINT(0xfec1, 0x1), /* X87_OPS_RETIRED.ANY */
+ INTEL_FLAGS_UEVENT_CONSTRAINT(0x00c5, 0x1), /* BR_INST_RETIRED.MISPRED */
+ INTEL_FLAGS_UEVENT_CONSTRAINT(0x1fc7, 0x1), /* SIMD_INST_RETURED.ANY */
+ INTEL_FLAGS_EVENT_CONSTRAINT(0xcb, 0x1), /* MEM_LOAD_RETIRED.* */
+ /* INST_RETIRED.ANY_P, inv=1, cmask=16 (cycles:p). */
+ INTEL_FLAGS_EVENT_CONSTRAINT(0x108000c0, 0x01),
+ EVENT_CONSTRAINT_END
+};
+
+struct event_constraint intel_atom_pebs_event_constraints[] = {
+ INTEL_FLAGS_UEVENT_CONSTRAINT(0x00c0, 0x1), /* INST_RETIRED.ANY */
+ INTEL_FLAGS_UEVENT_CONSTRAINT(0x00c5, 0x1), /* MISPREDICTED_BRANCH_RETIRED */
+ INTEL_FLAGS_EVENT_CONSTRAINT(0xcb, 0x1), /* MEM_LOAD_RETIRED.* */
+ /* INST_RETIRED.ANY_P, inv=1, cmask=16 (cycles:p). */
+ INTEL_FLAGS_EVENT_CONSTRAINT(0x108000c0, 0x01),
+ /* Allow all events as PEBS with no flags */
+ INTEL_ALL_EVENT_CONSTRAINT(0, 0x1),
+ EVENT_CONSTRAINT_END
+};
+
+struct event_constraint intel_slm_pebs_event_constraints[] = {
+ /* INST_RETIRED.ANY_P, inv=1, cmask=16 (cycles:p). */
+ INTEL_FLAGS_EVENT_CONSTRAINT(0x108000c0, 0x1),
+ /* Allow all events as PEBS with no flags */
+ INTEL_ALL_EVENT_CONSTRAINT(0, 0x1),
+ EVENT_CONSTRAINT_END
+};
+
+struct event_constraint intel_nehalem_pebs_event_constraints[] = {
+ INTEL_PLD_CONSTRAINT(0x100b, 0xf), /* MEM_INST_RETIRED.* */
+ INTEL_FLAGS_EVENT_CONSTRAINT(0x0f, 0xf), /* MEM_UNCORE_RETIRED.* */
+ INTEL_FLAGS_UEVENT_CONSTRAINT(0x010c, 0xf), /* MEM_STORE_RETIRED.DTLB_MISS */
+ INTEL_FLAGS_EVENT_CONSTRAINT(0xc0, 0xf), /* INST_RETIRED.ANY */
+ INTEL_EVENT_CONSTRAINT(0xc2, 0xf), /* UOPS_RETIRED.* */
+ INTEL_FLAGS_EVENT_CONSTRAINT(0xc4, 0xf), /* BR_INST_RETIRED.* */
+ INTEL_FLAGS_UEVENT_CONSTRAINT(0x02c5, 0xf), /* BR_MISP_RETIRED.NEAR_CALL */
+ INTEL_FLAGS_EVENT_CONSTRAINT(0xc7, 0xf), /* SSEX_UOPS_RETIRED.* */
+ INTEL_FLAGS_UEVENT_CONSTRAINT(0x20c8, 0xf), /* ITLB_MISS_RETIRED */
+ INTEL_FLAGS_EVENT_CONSTRAINT(0xcb, 0xf), /* MEM_LOAD_RETIRED.* */
+ INTEL_FLAGS_EVENT_CONSTRAINT(0xf7, 0xf), /* FP_ASSIST.* */
+ /* INST_RETIRED.ANY_P, inv=1, cmask=16 (cycles:p). */
+ INTEL_FLAGS_EVENT_CONSTRAINT(0x108000c0, 0x0f),
+ EVENT_CONSTRAINT_END
+};
+
+struct event_constraint intel_westmere_pebs_event_constraints[] = {
+ INTEL_PLD_CONSTRAINT(0x100b, 0xf), /* MEM_INST_RETIRED.* */
+ INTEL_FLAGS_EVENT_CONSTRAINT(0x0f, 0xf), /* MEM_UNCORE_RETIRED.* */
+ INTEL_FLAGS_UEVENT_CONSTRAINT(0x010c, 0xf), /* MEM_STORE_RETIRED.DTLB_MISS */
+ INTEL_FLAGS_EVENT_CONSTRAINT(0xc0, 0xf), /* INSTR_RETIRED.* */
+ INTEL_EVENT_CONSTRAINT(0xc2, 0xf), /* UOPS_RETIRED.* */
+ INTEL_FLAGS_EVENT_CONSTRAINT(0xc4, 0xf), /* BR_INST_RETIRED.* */
+ INTEL_FLAGS_EVENT_CONSTRAINT(0xc5, 0xf), /* BR_MISP_RETIRED.* */
+ INTEL_FLAGS_EVENT_CONSTRAINT(0xc7, 0xf), /* SSEX_UOPS_RETIRED.* */
+ INTEL_FLAGS_UEVENT_CONSTRAINT(0x20c8, 0xf), /* ITLB_MISS_RETIRED */
+ INTEL_FLAGS_EVENT_CONSTRAINT(0xcb, 0xf), /* MEM_LOAD_RETIRED.* */
+ INTEL_FLAGS_EVENT_CONSTRAINT(0xf7, 0xf), /* FP_ASSIST.* */
+ /* INST_RETIRED.ANY_P, inv=1, cmask=16 (cycles:p). */
+ INTEL_FLAGS_EVENT_CONSTRAINT(0x108000c0, 0x0f),
+ EVENT_CONSTRAINT_END
+};
+
+struct event_constraint intel_snb_pebs_event_constraints[] = {
+ INTEL_FLAGS_UEVENT_CONSTRAINT(0x01c0, 0x2), /* INST_RETIRED.PRECDIST */
+ INTEL_PLD_CONSTRAINT(0x01cd, 0x8), /* MEM_TRANS_RETIRED.LAT_ABOVE_THR */
+ INTEL_PST_CONSTRAINT(0x02cd, 0x8), /* MEM_TRANS_RETIRED.PRECISE_STORES */
+ /* UOPS_RETIRED.ALL, inv=1, cmask=16 (cycles:p). */
+ INTEL_FLAGS_EVENT_CONSTRAINT(0x108001c2, 0xf),
+ INTEL_EXCLEVT_CONSTRAINT(0xd0, 0xf), /* MEM_UOP_RETIRED.* */
+ INTEL_EXCLEVT_CONSTRAINT(0xd1, 0xf), /* MEM_LOAD_UOPS_RETIRED.* */
+ INTEL_EXCLEVT_CONSTRAINT(0xd2, 0xf), /* MEM_LOAD_UOPS_LLC_HIT_RETIRED.* */
+ INTEL_EXCLEVT_CONSTRAINT(0xd3, 0xf), /* MEM_LOAD_UOPS_LLC_MISS_RETIRED.* */
+ /* Allow all events as PEBS with no flags */
+ INTEL_ALL_EVENT_CONSTRAINT(0, 0xf),
+ EVENT_CONSTRAINT_END
+};
+
+struct event_constraint intel_ivb_pebs_event_constraints[] = {
+ INTEL_FLAGS_UEVENT_CONSTRAINT(0x01c0, 0x2), /* INST_RETIRED.PRECDIST */
+ INTEL_PLD_CONSTRAINT(0x01cd, 0x8), /* MEM_TRANS_RETIRED.LAT_ABOVE_THR */
+ INTEL_PST_CONSTRAINT(0x02cd, 0x8), /* MEM_TRANS_RETIRED.PRECISE_STORES */
+ /* UOPS_RETIRED.ALL, inv=1, cmask=16 (cycles:p). */
+ INTEL_FLAGS_EVENT_CONSTRAINT(0x108001c2, 0xf),
+ /* INST_RETIRED.PREC_DIST, inv=1, cmask=16 (cycles:ppp). */
+ INTEL_FLAGS_EVENT_CONSTRAINT(0x108001c0, 0x2),
+ INTEL_EXCLEVT_CONSTRAINT(0xd0, 0xf), /* MEM_UOP_RETIRED.* */
+ INTEL_EXCLEVT_CONSTRAINT(0xd1, 0xf), /* MEM_LOAD_UOPS_RETIRED.* */
+ INTEL_EXCLEVT_CONSTRAINT(0xd2, 0xf), /* MEM_LOAD_UOPS_LLC_HIT_RETIRED.* */
+ INTEL_EXCLEVT_CONSTRAINT(0xd3, 0xf), /* MEM_LOAD_UOPS_LLC_MISS_RETIRED.* */
+ /* Allow all events as PEBS with no flags */
+ INTEL_ALL_EVENT_CONSTRAINT(0, 0xf),
+ EVENT_CONSTRAINT_END
+};
+
+struct event_constraint intel_hsw_pebs_event_constraints[] = {
+ INTEL_FLAGS_UEVENT_CONSTRAINT(0x01c0, 0x2), /* INST_RETIRED.PRECDIST */
+ INTEL_PLD_CONSTRAINT(0x01cd, 0xf), /* MEM_TRANS_RETIRED.* */
+ /* UOPS_RETIRED.ALL, inv=1, cmask=16 (cycles:p). */
+ INTEL_FLAGS_EVENT_CONSTRAINT(0x108001c2, 0xf),
+ /* INST_RETIRED.PREC_DIST, inv=1, cmask=16 (cycles:ppp). */
+ INTEL_FLAGS_EVENT_CONSTRAINT(0x108001c0, 0x2),
+ INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_NA(0x01c2, 0xf), /* UOPS_RETIRED.ALL */
+ INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_XLD(0x11d0, 0xf), /* MEM_UOPS_RETIRED.STLB_MISS_LOADS */
+ INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_XLD(0x21d0, 0xf), /* MEM_UOPS_RETIRED.LOCK_LOADS */
+ INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_XLD(0x41d0, 0xf), /* MEM_UOPS_RETIRED.SPLIT_LOADS */
+ INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_XLD(0x81d0, 0xf), /* MEM_UOPS_RETIRED.ALL_LOADS */
+ INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_XST(0x12d0, 0xf), /* MEM_UOPS_RETIRED.STLB_MISS_STORES */
+ INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_XST(0x42d0, 0xf), /* MEM_UOPS_RETIRED.SPLIT_STORES */
+ INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_XST(0x82d0, 0xf), /* MEM_UOPS_RETIRED.ALL_STORES */
+ INTEL_FLAGS_EVENT_CONSTRAINT_DATALA_XLD(0xd1, 0xf), /* MEM_LOAD_UOPS_RETIRED.* */
+ INTEL_FLAGS_EVENT_CONSTRAINT_DATALA_XLD(0xd2, 0xf), /* MEM_LOAD_UOPS_L3_HIT_RETIRED.* */
+ INTEL_FLAGS_EVENT_CONSTRAINT_DATALA_XLD(0xd3, 0xf), /* MEM_LOAD_UOPS_L3_MISS_RETIRED.* */
+ /* Allow all events as PEBS with no flags */
+ INTEL_ALL_EVENT_CONSTRAINT(0, 0xf),
+ EVENT_CONSTRAINT_END
+};
+
+struct event_constraint intel_bdw_pebs_event_constraints[] = {
+ INTEL_FLAGS_UEVENT_CONSTRAINT(0x01c0, 0x2), /* INST_RETIRED.PRECDIST */
+ INTEL_PLD_CONSTRAINT(0x01cd, 0xf), /* MEM_TRANS_RETIRED.* */
+ /* UOPS_RETIRED.ALL, inv=1, cmask=16 (cycles:p). */
+ INTEL_FLAGS_EVENT_CONSTRAINT(0x108001c2, 0xf),
+ /* INST_RETIRED.PREC_DIST, inv=1, cmask=16 (cycles:ppp). */
+ INTEL_FLAGS_EVENT_CONSTRAINT(0x108001c0, 0x2),
+ INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_NA(0x01c2, 0xf), /* UOPS_RETIRED.ALL */
+ INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x11d0, 0xf), /* MEM_UOPS_RETIRED.STLB_MISS_LOADS */
+ INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x21d0, 0xf), /* MEM_UOPS_RETIRED.LOCK_LOADS */
+ INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x41d0, 0xf), /* MEM_UOPS_RETIRED.SPLIT_LOADS */
+ INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x81d0, 0xf), /* MEM_UOPS_RETIRED.ALL_LOADS */
+ INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_ST(0x12d0, 0xf), /* MEM_UOPS_RETIRED.STLB_MISS_STORES */
+ INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_ST(0x42d0, 0xf), /* MEM_UOPS_RETIRED.SPLIT_STORES */
+ INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_ST(0x82d0, 0xf), /* MEM_UOPS_RETIRED.ALL_STORES */
+ INTEL_FLAGS_EVENT_CONSTRAINT_DATALA_LD(0xd1, 0xf), /* MEM_LOAD_UOPS_RETIRED.* */
+ INTEL_FLAGS_EVENT_CONSTRAINT_DATALA_LD(0xd2, 0xf), /* MEM_LOAD_UOPS_L3_HIT_RETIRED.* */
+ INTEL_FLAGS_EVENT_CONSTRAINT_DATALA_LD(0xd3, 0xf), /* MEM_LOAD_UOPS_L3_MISS_RETIRED.* */
+ /* Allow all events as PEBS with no flags */
+ INTEL_ALL_EVENT_CONSTRAINT(0, 0xf),
+ EVENT_CONSTRAINT_END
+};
+
+
+struct event_constraint intel_skl_pebs_event_constraints[] = {
+ INTEL_FLAGS_UEVENT_CONSTRAINT(0x1c0, 0x2), /* INST_RETIRED.PREC_DIST */
+ /* INST_RETIRED.PREC_DIST, inv=1, cmask=16 (cycles:ppp). */
+ INTEL_FLAGS_EVENT_CONSTRAINT(0x108001c0, 0x2),
+ /* INST_RETIRED.TOTAL_CYCLES_PS (inv=1, cmask=16) (cycles:p). */
+ INTEL_FLAGS_EVENT_CONSTRAINT(0x108000c0, 0x0f),
+ INTEL_PLD_CONSTRAINT(0x1cd, 0xf), /* MEM_TRANS_RETIRED.* */
+ INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x11d0, 0xf), /* MEM_INST_RETIRED.STLB_MISS_LOADS */
+ INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_ST(0x12d0, 0xf), /* MEM_INST_RETIRED.STLB_MISS_STORES */
+ INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x21d0, 0xf), /* MEM_INST_RETIRED.LOCK_LOADS */
+ INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_ST(0x22d0, 0xf), /* MEM_INST_RETIRED.LOCK_STORES */
+ INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x41d0, 0xf), /* MEM_INST_RETIRED.SPLIT_LOADS */
+ INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_ST(0x42d0, 0xf), /* MEM_INST_RETIRED.SPLIT_STORES */
+ INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x81d0, 0xf), /* MEM_INST_RETIRED.ALL_LOADS */
+ INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_ST(0x82d0, 0xf), /* MEM_INST_RETIRED.ALL_STORES */
+ INTEL_FLAGS_EVENT_CONSTRAINT_DATALA_LD(0xd1, 0xf), /* MEM_LOAD_RETIRED.* */
+ INTEL_FLAGS_EVENT_CONSTRAINT_DATALA_LD(0xd2, 0xf), /* MEM_LOAD_L3_HIT_RETIRED.* */
+ INTEL_FLAGS_EVENT_CONSTRAINT_DATALA_LD(0xd3, 0xf), /* MEM_LOAD_L3_MISS_RETIRED.* */
+ /* Allow all events as PEBS with no flags */
+ INTEL_ALL_EVENT_CONSTRAINT(0, 0xf),
+ EVENT_CONSTRAINT_END
+};
+
+struct event_constraint *intel_pebs_constraints(struct perf_event *event)
+{
+ struct event_constraint *c;
+
+ if (!event->attr.precise_ip)
+ return NULL;
+
+ if (x86_pmu.pebs_constraints) {
+ for_each_event_constraint(c, x86_pmu.pebs_constraints) {
+ if ((event->hw.config & c->cmask) == c->code) {
+ event->hw.flags |= c->flags;
+ return c;
+ }
+ }
+ }
+
+ return &emptyconstraint;
+}
+
+static inline bool pebs_is_enabled(struct cpu_hw_events *cpuc)
+{
+ return (cpuc->pebs_enabled & ((1ULL << MAX_PEBS_EVENTS) - 1));
+}
+
+void intel_pmu_pebs_enable(struct perf_event *event)
+{
+ struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
+ struct hw_perf_event *hwc = &event->hw;
+ struct debug_store *ds = cpuc->ds;
+ bool first_pebs;
+ u64 threshold;
+
+ hwc->config &= ~ARCH_PERFMON_EVENTSEL_INT;
+
+ first_pebs = !pebs_is_enabled(cpuc);
+ cpuc->pebs_enabled |= 1ULL << hwc->idx;
+
+ if (event->hw.flags & PERF_X86_EVENT_PEBS_LDLAT)
+ cpuc->pebs_enabled |= 1ULL << (hwc->idx + 32);
+ else if (event->hw.flags & PERF_X86_EVENT_PEBS_ST)
+ cpuc->pebs_enabled |= 1ULL << 63;
+
+ /*
+ * When the event is constrained enough we can use a larger
+ * threshold and run the event with less frequent PMI.
+ */
+ if (hwc->flags & PERF_X86_EVENT_FREERUNNING) {
+ threshold = ds->pebs_absolute_maximum -
+ x86_pmu.max_pebs_events * x86_pmu.pebs_record_size;
+
+ if (first_pebs)
+ perf_sched_cb_inc(event->ctx->pmu);
+ } else {
+ threshold = ds->pebs_buffer_base + x86_pmu.pebs_record_size;
+
+ /*
+ * If not all events can use larger buffer,
+ * roll back to threshold = 1
+ */
+ if (!first_pebs &&
+ (ds->pebs_interrupt_threshold > threshold))
+ perf_sched_cb_dec(event->ctx->pmu);
+ }
+
+ /* Use auto-reload if possible to save a MSR write in the PMI */
+ if (hwc->flags & PERF_X86_EVENT_AUTO_RELOAD) {
+ ds->pebs_event_reset[hwc->idx] =
+ (u64)(-hwc->sample_period) & x86_pmu.cntval_mask;
+ }
+
+ if (first_pebs || ds->pebs_interrupt_threshold > threshold)
+ ds->pebs_interrupt_threshold = threshold;
+}
+
+void intel_pmu_pebs_disable(struct perf_event *event)
+{
+ struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
+ struct hw_perf_event *hwc = &event->hw;
+ struct debug_store *ds = cpuc->ds;
+ bool large_pebs = ds->pebs_interrupt_threshold >
+ ds->pebs_buffer_base + x86_pmu.pebs_record_size;
+
+ if (large_pebs)
+ intel_pmu_drain_pebs_buffer();
+
+ cpuc->pebs_enabled &= ~(1ULL << hwc->idx);
+
+ if (event->hw.flags & PERF_X86_EVENT_PEBS_LDLAT)
+ cpuc->pebs_enabled &= ~(1ULL << (hwc->idx + 32));
+ else if (event->hw.flags & PERF_X86_EVENT_PEBS_ST)
+ cpuc->pebs_enabled &= ~(1ULL << 63);
+
+ if (large_pebs && !pebs_is_enabled(cpuc))
+ perf_sched_cb_dec(event->ctx->pmu);
+
+ if (cpuc->enabled)
+ wrmsrl(MSR_IA32_PEBS_ENABLE, cpuc->pebs_enabled);
+
+ hwc->config |= ARCH_PERFMON_EVENTSEL_INT;
+}
+
+void intel_pmu_pebs_enable_all(void)
+{
+ struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
+
+ if (cpuc->pebs_enabled)
+ wrmsrl(MSR_IA32_PEBS_ENABLE, cpuc->pebs_enabled);
+}
+
+void intel_pmu_pebs_disable_all(void)
+{
+ struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
+
+ if (cpuc->pebs_enabled)
+ wrmsrl(MSR_IA32_PEBS_ENABLE, 0);
+}
+
+static int intel_pmu_pebs_fixup_ip(struct pt_regs *regs)
+{
+ struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
+ unsigned long from = cpuc->lbr_entries[0].from;
+ unsigned long old_to, to = cpuc->lbr_entries[0].to;
+ unsigned long ip = regs->ip;
+ int is_64bit = 0;
+ void *kaddr;
+ int size;
+
+ /*
+ * We don't need to fixup if the PEBS assist is fault like
+ */
+ if (!x86_pmu.intel_cap.pebs_trap)
+ return 1;
+
+ /*
+ * No LBR entry, no basic block, no rewinding
+ */
+ if (!cpuc->lbr_stack.nr || !from || !to)
+ return 0;
+
+ /*
+ * Basic blocks should never cross user/kernel boundaries
+ */
+ if (kernel_ip(ip) != kernel_ip(to))
+ return 0;
+
+ /*
+ * unsigned math, either ip is before the start (impossible) or
+ * the basic block is larger than 1 page (sanity)
+ */
+ if ((ip - to) > PEBS_FIXUP_SIZE)
+ return 0;
+
+ /*
+ * We sampled a branch insn, rewind using the LBR stack
+ */
+ if (ip == to) {
+ set_linear_ip(regs, from);
+ return 1;
+ }
+
+ size = ip - to;
+ if (!kernel_ip(ip)) {
+ int bytes;
+ u8 *buf = this_cpu_read(insn_buffer);
+
+ /* 'size' must fit our buffer, see above */
+ bytes = copy_from_user_nmi(buf, (void __user *)to, size);
+ if (bytes != 0)
+ return 0;
+
+ kaddr = buf;
+ } else {
+ kaddr = (void *)to;
+ }
+
+ do {
+ struct insn insn;
+
+ old_to = to;
+
+#ifdef CONFIG_X86_64
+ is_64bit = kernel_ip(to) || !test_thread_flag(TIF_IA32);
+#endif
+ insn_init(&insn, kaddr, size, is_64bit);
+ insn_get_length(&insn);
+ /*
+ * Make sure there was not a problem decoding the
+ * instruction and getting the length. This is
+ * doubly important because we have an infinite
+ * loop if insn.length=0.
+ */
+ if (!insn.length)
+ break;
+
+ to += insn.length;
+ kaddr += insn.length;
+ size -= insn.length;
+ } while (to < ip);
+
+ if (to == ip) {
+ set_linear_ip(regs, old_to);
+ return 1;
+ }
+
+ /*
+ * Even though we decoded the basic block, the instruction stream
+ * never matched the given IP, either the TO or the IP got corrupted.
+ */
+ return 0;
+}
+
+static inline u64 intel_hsw_weight(struct pebs_record_skl *pebs)
+{
+ if (pebs->tsx_tuning) {
+ union hsw_tsx_tuning tsx = { .value = pebs->tsx_tuning };
+ return tsx.cycles_last_block;
+ }
+ return 0;
+}
+
+static inline u64 intel_hsw_transaction(struct pebs_record_skl *pebs)
+{
+ u64 txn = (pebs->tsx_tuning & PEBS_HSW_TSX_FLAGS) >> 32;
+
+ /* For RTM XABORTs also log the abort code from AX */
+ if ((txn & PERF_TXN_TRANSACTION) && (pebs->ax & 1))
+ txn |= ((pebs->ax >> 24) & 0xff) << PERF_TXN_ABORT_SHIFT;
+ return txn;
+}
+
+static void setup_pebs_sample_data(struct perf_event *event,
+ struct pt_regs *iregs, void *__pebs,
+ struct perf_sample_data *data,
+ struct pt_regs *regs)
+{
+#define PERF_X86_EVENT_PEBS_HSW_PREC \
+ (PERF_X86_EVENT_PEBS_ST_HSW | \
+ PERF_X86_EVENT_PEBS_LD_HSW | \
+ PERF_X86_EVENT_PEBS_NA_HSW)
+ /*
+ * We cast to the biggest pebs_record but are careful not to
+ * unconditionally access the 'extra' entries.
+ */
+ struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
+ struct pebs_record_skl *pebs = __pebs;
+ u64 sample_type;
+ int fll, fst, dsrc;
+ int fl = event->hw.flags;
+
+ if (pebs == NULL)
+ return;
+
+ sample_type = event->attr.sample_type;
+ dsrc = sample_type & PERF_SAMPLE_DATA_SRC;
+
+ fll = fl & PERF_X86_EVENT_PEBS_LDLAT;
+ fst = fl & (PERF_X86_EVENT_PEBS_ST | PERF_X86_EVENT_PEBS_HSW_PREC);
+
+ perf_sample_data_init(data, 0, event->hw.last_period);
+
+ data->period = event->hw.last_period;
+
+ /*
+ * Use latency for weight (only avail with PEBS-LL)
+ */
+ if (fll && (sample_type & PERF_SAMPLE_WEIGHT))
+ data->weight = pebs->lat;
+
+ /*
+ * data.data_src encodes the data source
+ */
+ if (dsrc) {
+ u64 val = PERF_MEM_NA;
+ if (fll)
+ val = load_latency_data(pebs->dse);
+ else if (fst && (fl & PERF_X86_EVENT_PEBS_HSW_PREC))
+ val = precise_datala_hsw(event, pebs->dse);
+ else if (fst)
+ val = precise_store_data(pebs->dse);
+ data->data_src.val = val;
+ }
+
+ /*
+ * We use the interrupt regs as a base because the PEBS record
+ * does not contain a full regs set, specifically it seems to
+ * lack segment descriptors, which get used by things like
+ * user_mode().
+ *
+ * In the simple case fix up only the IP and BP,SP regs, for
+ * PERF_SAMPLE_IP and PERF_SAMPLE_CALLCHAIN to function properly.
+ * A possible PERF_SAMPLE_REGS will have to transfer all regs.
+ */
+ *regs = *iregs;
+ regs->flags = pebs->flags;
+ set_linear_ip(regs, pebs->ip);
+ regs->bp = pebs->bp;
+ regs->sp = pebs->sp;
+
+ if (sample_type & PERF_SAMPLE_REGS_INTR) {
+ regs->ax = pebs->ax;
+ regs->bx = pebs->bx;
+ regs->cx = pebs->cx;
+ regs->dx = pebs->dx;
+ regs->si = pebs->si;
+ regs->di = pebs->di;
+ regs->bp = pebs->bp;
+ regs->sp = pebs->sp;
+
+ regs->flags = pebs->flags;
+#ifndef CONFIG_X86_32
+ regs->r8 = pebs->r8;
+ regs->r9 = pebs->r9;
+ regs->r10 = pebs->r10;
+ regs->r11 = pebs->r11;
+ regs->r12 = pebs->r12;
+ regs->r13 = pebs->r13;
+ regs->r14 = pebs->r14;
+ regs->r15 = pebs->r15;
+#endif
+ }
+
+ if (event->attr.precise_ip > 1 && x86_pmu.intel_cap.pebs_format >= 2) {
+ regs->ip = pebs->real_ip;
+ regs->flags |= PERF_EFLAGS_EXACT;
+ } else if (event->attr.precise_ip > 1 && intel_pmu_pebs_fixup_ip(regs))
+ regs->flags |= PERF_EFLAGS_EXACT;
+ else
+ regs->flags &= ~PERF_EFLAGS_EXACT;
+
+ if ((sample_type & PERF_SAMPLE_ADDR) &&
+ x86_pmu.intel_cap.pebs_format >= 1)
+ data->addr = pebs->dla;
+
+ if (x86_pmu.intel_cap.pebs_format >= 2) {
+ /* Only set the TSX weight when no memory weight. */
+ if ((sample_type & PERF_SAMPLE_WEIGHT) && !fll)
+ data->weight = intel_hsw_weight(pebs);
+
+ if (sample_type & PERF_SAMPLE_TRANSACTION)
+ data->txn = intel_hsw_transaction(pebs);
+ }
+
+ /*
+ * v3 supplies an accurate time stamp, so we use that
+ * for the time stamp.
+ *
+ * We can only do this for the default trace clock.
+ */
+ if (x86_pmu.intel_cap.pebs_format >= 3 &&
+ event->attr.use_clockid == 0)
+ data->time = native_sched_clock_from_tsc(pebs->tsc);
+
+ if (has_branch_stack(event))
+ data->br_stack = &cpuc->lbr_stack;
+}
+
+static inline void *
+get_next_pebs_record_by_bit(void *base, void *top, int bit)
+{
+ struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
+ void *at;
+ u64 pebs_status;
+
+ /*
+ * fmt0 does not have a status bitfield (does not use
+ * perf_record_nhm format)
+ */
+ if (x86_pmu.intel_cap.pebs_format < 1)
+ return base;
+
+ if (base == NULL)
+ return NULL;
+
+ for (at = base; at < top; at += x86_pmu.pebs_record_size) {
+ struct pebs_record_nhm *p = at;
+
+ if (test_bit(bit, (unsigned long *)&p->status)) {
+ /* PEBS v3 has accurate status bits */
+ if (x86_pmu.intel_cap.pebs_format >= 3)
+ return at;
+
+ if (p->status == (1 << bit))
+ return at;
+
+ /* clear non-PEBS bit and re-check */
+ pebs_status = p->status & cpuc->pebs_enabled;
+ pebs_status &= (1ULL << MAX_PEBS_EVENTS) - 1;
+ if (pebs_status == (1 << bit))
+ return at;
+ }
+ }
+ return NULL;
+}
+
+static void __intel_pmu_pebs_event(struct perf_event *event,
+ struct pt_regs *iregs,
+ void *base, void *top,
+ int bit, int count)
+{
+ struct perf_sample_data data;
+ struct pt_regs regs;
+ void *at = get_next_pebs_record_by_bit(base, top, bit);
+
+ if (!intel_pmu_save_and_restart(event) &&
+ !(event->hw.flags & PERF_X86_EVENT_AUTO_RELOAD))
+ return;
+
+ while (count > 1) {
+ setup_pebs_sample_data(event, iregs, at, &data, &regs);
+ perf_event_output(event, &data, &regs);
+ at += x86_pmu.pebs_record_size;
+ at = get_next_pebs_record_by_bit(at, top, bit);
+ count--;
+ }
+
+ setup_pebs_sample_data(event, iregs, at, &data, &regs);
+
+ /*
+ * All but the last records are processed.
+ * The last one is left to be able to call the overflow handler.
+ */
+ if (perf_event_overflow(event, &data, &regs)) {
+ x86_pmu_stop(event, 0);
+ return;
+ }
+
+}
+
+static void intel_pmu_drain_pebs_core(struct pt_regs *iregs)
+{
+ struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
+ struct debug_store *ds = cpuc->ds;
+ struct perf_event *event = cpuc->events[0]; /* PMC0 only */
+ struct pebs_record_core *at, *top;
+ int n;
+
+ if (!x86_pmu.pebs_active)
+ return;
+
+ at = (struct pebs_record_core *)(unsigned long)ds->pebs_buffer_base;
+ top = (struct pebs_record_core *)(unsigned long)ds->pebs_index;
+
+ /*
+ * Whatever else happens, drain the thing
+ */
+ ds->pebs_index = ds->pebs_buffer_base;
+
+ if (!test_bit(0, cpuc->active_mask))
+ return;
+
+ WARN_ON_ONCE(!event);
+
+ if (!event->attr.precise_ip)
+ return;
+
+ n = top - at;
+ if (n <= 0)
+ return;
+
+ __intel_pmu_pebs_event(event, iregs, at, top, 0, n);
+}
+
+static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs)
+{
+ struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
+ struct debug_store *ds = cpuc->ds;
+ struct perf_event *event;
+ void *base, *at, *top;
+ short counts[MAX_PEBS_EVENTS] = {};
+ short error[MAX_PEBS_EVENTS] = {};
+ int bit, i;
+
+ if (!x86_pmu.pebs_active)
+ return;
+
+ base = (struct pebs_record_nhm *)(unsigned long)ds->pebs_buffer_base;
+ top = (struct pebs_record_nhm *)(unsigned long)ds->pebs_index;
+
+ ds->pebs_index = ds->pebs_buffer_base;
+
+ if (unlikely(base >= top))
+ return;
+
+ for (at = base; at < top; at += x86_pmu.pebs_record_size) {
+ struct pebs_record_nhm *p = at;
+ u64 pebs_status;
+
+ /* PEBS v3 has accurate status bits */
+ if (x86_pmu.intel_cap.pebs_format >= 3) {
+ for_each_set_bit(bit, (unsigned long *)&p->status,
+ MAX_PEBS_EVENTS)
+ counts[bit]++;
+
+ continue;
+ }
+
+ pebs_status = p->status & cpuc->pebs_enabled;
+ pebs_status &= (1ULL << x86_pmu.max_pebs_events) - 1;
+
+ /*
+ * On some CPUs the PEBS status can be zero when PEBS is
+ * racing with clearing of GLOBAL_STATUS.
+ *
+ * Normally we would drop that record, but in the
+ * case when there is only a single active PEBS event
+ * we can assume it's for that event.
+ */
+ if (!pebs_status && cpuc->pebs_enabled &&
+ !(cpuc->pebs_enabled & (cpuc->pebs_enabled-1)))
+ pebs_status = cpuc->pebs_enabled;
+
+ bit = find_first_bit((unsigned long *)&pebs_status,
+ x86_pmu.max_pebs_events);
+ if (bit >= x86_pmu.max_pebs_events)
+ continue;
+
+ /*
+ * The PEBS hardware does not deal well with the situation
+ * when events happen near to each other and multiple bits
+ * are set. But it should happen rarely.
+ *
+ * If these events include one PEBS and multiple non-PEBS
+ * events, it doesn't impact PEBS record. The record will
+ * be handled normally. (slow path)
+ *
+ * If these events include two or more PEBS events, the
+ * records for the events can be collapsed into a single
+ * one, and it's not possible to reconstruct all events
+ * that caused the PEBS record. It's called collision.
+ * If collision happened, the record will be dropped.
+ */
+ if (p->status != (1ULL << bit)) {
+ for_each_set_bit(i, (unsigned long *)&pebs_status,
+ x86_pmu.max_pebs_events)
+ error[i]++;
+ continue;
+ }
+
+ counts[bit]++;
+ }
+
+ for (bit = 0; bit < x86_pmu.max_pebs_events; bit++) {
+ if ((counts[bit] == 0) && (error[bit] == 0))
+ continue;
+
+ event = cpuc->events[bit];
+ WARN_ON_ONCE(!event);
+ WARN_ON_ONCE(!event->attr.precise_ip);
+
+ /* log dropped samples number */
+ if (error[bit])
+ perf_log_lost_samples(event, error[bit]);
+
+ if (counts[bit]) {
+ __intel_pmu_pebs_event(event, iregs, base,
+ top, bit, counts[bit]);
+ }
+ }
+}
+
+/*
+ * BTS, PEBS probe and setup
+ */
+
+void __init intel_ds_init(void)
+{
+ /*
+ * No support for 32bit formats
+ */
+ if (!boot_cpu_has(X86_FEATURE_DTES64))
+ return;
+
+ x86_pmu.bts = boot_cpu_has(X86_FEATURE_BTS);
+ x86_pmu.pebs = boot_cpu_has(X86_FEATURE_PEBS);
+ x86_pmu.pebs_buffer_size = PEBS_BUFFER_SIZE;
+ if (x86_pmu.pebs) {
+ char pebs_type = x86_pmu.intel_cap.pebs_trap ? '+' : '-';
+ int format = x86_pmu.intel_cap.pebs_format;
+
+ switch (format) {
+ case 0:
+ pr_cont("PEBS fmt0%c, ", pebs_type);
+ x86_pmu.pebs_record_size = sizeof(struct pebs_record_core);
+ /*
+ * Using >PAGE_SIZE buffers makes the WRMSR to
+ * PERF_GLOBAL_CTRL in intel_pmu_enable_all()
+ * mysteriously hang on Core2.
+ *
+ * As a workaround, we don't do this.
+ */
+ x86_pmu.pebs_buffer_size = PAGE_SIZE;
+ x86_pmu.drain_pebs = intel_pmu_drain_pebs_core;
+ break;
+
+ case 1:
+ pr_cont("PEBS fmt1%c, ", pebs_type);
+ x86_pmu.pebs_record_size = sizeof(struct pebs_record_nhm);
+ x86_pmu.drain_pebs = intel_pmu_drain_pebs_nhm;
+ break;
+
+ case 2:
+ pr_cont("PEBS fmt2%c, ", pebs_type);
+ x86_pmu.pebs_record_size = sizeof(struct pebs_record_hsw);
+ x86_pmu.drain_pebs = intel_pmu_drain_pebs_nhm;
+ break;
+
+ case 3:
+ pr_cont("PEBS fmt3%c, ", pebs_type);
+ x86_pmu.pebs_record_size =
+ sizeof(struct pebs_record_skl);
+ x86_pmu.drain_pebs = intel_pmu_drain_pebs_nhm;
+ x86_pmu.free_running_flags |= PERF_SAMPLE_TIME;
+ break;
+
+ default:
+ pr_cont("no PEBS fmt%d%c, ", format, pebs_type);
+ x86_pmu.pebs = 0;
+ }
+ }
+}
+
+void perf_restore_debug_store(void)
+{
+ struct debug_store *ds = __this_cpu_read(cpu_hw_events.ds);
+
+ if (!x86_pmu.bts && !x86_pmu.pebs)
+ return;
+
+ wrmsrl(MSR_IA32_DS_AREA, (unsigned long)ds);
+}
diff --git a/arch/x86/events/intel/knc.c b/arch/x86/events/intel/knc.c
new file mode 100644
index 000000000..548d5f774
--- /dev/null
+++ b/arch/x86/events/intel/knc.c
@@ -0,0 +1,321 @@
+/* Driver for Intel Xeon Phi "Knights Corner" PMU */
+
+#include <linux/perf_event.h>
+#include <linux/types.h>
+
+#include <asm/hardirq.h>
+
+#include "../perf_event.h"
+
+static const u64 knc_perfmon_event_map[] =
+{
+ [PERF_COUNT_HW_CPU_CYCLES] = 0x002a,
+ [PERF_COUNT_HW_INSTRUCTIONS] = 0x0016,
+ [PERF_COUNT_HW_CACHE_REFERENCES] = 0x0028,
+ [PERF_COUNT_HW_CACHE_MISSES] = 0x0029,
+ [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x0012,
+ [PERF_COUNT_HW_BRANCH_MISSES] = 0x002b,
+};
+
+static const u64 __initconst knc_hw_cache_event_ids
+ [PERF_COUNT_HW_CACHE_MAX]
+ [PERF_COUNT_HW_CACHE_OP_MAX]
+ [PERF_COUNT_HW_CACHE_RESULT_MAX] =
+{
+ [ C(L1D) ] = {
+ [ C(OP_READ) ] = {
+ /* On Xeon Phi event "0" is a valid DATA_READ */
+ /* (L1 Data Cache Reads) Instruction. */
+ /* We code this as ARCH_PERFMON_EVENTSEL_INT as this */
+ /* bit will always be set in x86_pmu_hw_config(). */
+ [ C(RESULT_ACCESS) ] = ARCH_PERFMON_EVENTSEL_INT,
+ /* DATA_READ */
+ [ C(RESULT_MISS) ] = 0x0003, /* DATA_READ_MISS */
+ },
+ [ C(OP_WRITE) ] = {
+ [ C(RESULT_ACCESS) ] = 0x0001, /* DATA_WRITE */
+ [ C(RESULT_MISS) ] = 0x0004, /* DATA_WRITE_MISS */
+ },
+ [ C(OP_PREFETCH) ] = {
+ [ C(RESULT_ACCESS) ] = 0x0011, /* L1_DATA_PF1 */
+ [ C(RESULT_MISS) ] = 0x001c, /* L1_DATA_PF1_MISS */
+ },
+ },
+ [ C(L1I ) ] = {
+ [ C(OP_READ) ] = {
+ [ C(RESULT_ACCESS) ] = 0x000c, /* CODE_READ */
+ [ C(RESULT_MISS) ] = 0x000e, /* CODE_CACHE_MISS */
+ },
+ [ C(OP_WRITE) ] = {
+ [ C(RESULT_ACCESS) ] = -1,
+ [ C(RESULT_MISS) ] = -1,
+ },
+ [ C(OP_PREFETCH) ] = {
+ [ C(RESULT_ACCESS) ] = 0x0,
+ [ C(RESULT_MISS) ] = 0x0,
+ },
+ },
+ [ C(LL ) ] = {
+ [ C(OP_READ) ] = {
+ [ C(RESULT_ACCESS) ] = 0,
+ [ C(RESULT_MISS) ] = 0x10cb, /* L2_READ_MISS */
+ },
+ [ C(OP_WRITE) ] = {
+ [ C(RESULT_ACCESS) ] = 0x10cc, /* L2_WRITE_HIT */
+ [ C(RESULT_MISS) ] = 0,
+ },
+ [ C(OP_PREFETCH) ] = {
+ [ C(RESULT_ACCESS) ] = 0x10fc, /* L2_DATA_PF2 */
+ [ C(RESULT_MISS) ] = 0x10fe, /* L2_DATA_PF2_MISS */
+ },
+ },
+ [ C(DTLB) ] = {
+ [ C(OP_READ) ] = {
+ [ C(RESULT_ACCESS) ] = ARCH_PERFMON_EVENTSEL_INT,
+ /* DATA_READ */
+ /* see note on L1 OP_READ */
+ [ C(RESULT_MISS) ] = 0x0002, /* DATA_PAGE_WALK */
+ },
+ [ C(OP_WRITE) ] = {
+ [ C(RESULT_ACCESS) ] = 0x0001, /* DATA_WRITE */
+ [ C(RESULT_MISS) ] = 0x0002, /* DATA_PAGE_WALK */
+ },
+ [ C(OP_PREFETCH) ] = {
+ [ C(RESULT_ACCESS) ] = 0x0,
+ [ C(RESULT_MISS) ] = 0x0,
+ },
+ },
+ [ C(ITLB) ] = {
+ [ C(OP_READ) ] = {
+ [ C(RESULT_ACCESS) ] = 0x000c, /* CODE_READ */
+ [ C(RESULT_MISS) ] = 0x000d, /* CODE_PAGE_WALK */
+ },
+ [ C(OP_WRITE) ] = {
+ [ C(RESULT_ACCESS) ] = -1,
+ [ C(RESULT_MISS) ] = -1,
+ },
+ [ C(OP_PREFETCH) ] = {
+ [ C(RESULT_ACCESS) ] = -1,
+ [ C(RESULT_MISS) ] = -1,
+ },
+ },
+ [ C(BPU ) ] = {
+ [ C(OP_READ) ] = {
+ [ C(RESULT_ACCESS) ] = 0x0012, /* BRANCHES */
+ [ C(RESULT_MISS) ] = 0x002b, /* BRANCHES_MISPREDICTED */
+ },
+ [ C(OP_WRITE) ] = {
+ [ C(RESULT_ACCESS) ] = -1,
+ [ C(RESULT_MISS) ] = -1,
+ },
+ [ C(OP_PREFETCH) ] = {
+ [ C(RESULT_ACCESS) ] = -1,
+ [ C(RESULT_MISS) ] = -1,
+ },
+ },
+};
+
+
+static u64 knc_pmu_event_map(int hw_event)
+{
+ return knc_perfmon_event_map[hw_event];
+}
+
+static struct event_constraint knc_event_constraints[] =
+{
+ INTEL_EVENT_CONSTRAINT(0xc3, 0x1), /* HWP_L2HIT */
+ INTEL_EVENT_CONSTRAINT(0xc4, 0x1), /* HWP_L2MISS */
+ INTEL_EVENT_CONSTRAINT(0xc8, 0x1), /* L2_READ_HIT_E */
+ INTEL_EVENT_CONSTRAINT(0xc9, 0x1), /* L2_READ_HIT_M */
+ INTEL_EVENT_CONSTRAINT(0xca, 0x1), /* L2_READ_HIT_S */
+ INTEL_EVENT_CONSTRAINT(0xcb, 0x1), /* L2_READ_MISS */
+ INTEL_EVENT_CONSTRAINT(0xcc, 0x1), /* L2_WRITE_HIT */
+ INTEL_EVENT_CONSTRAINT(0xce, 0x1), /* L2_STRONGLY_ORDERED_STREAMING_VSTORES_MISS */
+ INTEL_EVENT_CONSTRAINT(0xcf, 0x1), /* L2_WEAKLY_ORDERED_STREAMING_VSTORE_MISS */
+ INTEL_EVENT_CONSTRAINT(0xd7, 0x1), /* L2_VICTIM_REQ_WITH_DATA */
+ INTEL_EVENT_CONSTRAINT(0xe3, 0x1), /* SNP_HITM_BUNIT */
+ INTEL_EVENT_CONSTRAINT(0xe6, 0x1), /* SNP_HIT_L2 */
+ INTEL_EVENT_CONSTRAINT(0xe7, 0x1), /* SNP_HITM_L2 */
+ INTEL_EVENT_CONSTRAINT(0xf1, 0x1), /* L2_DATA_READ_MISS_CACHE_FILL */
+ INTEL_EVENT_CONSTRAINT(0xf2, 0x1), /* L2_DATA_WRITE_MISS_CACHE_FILL */
+ INTEL_EVENT_CONSTRAINT(0xf6, 0x1), /* L2_DATA_READ_MISS_MEM_FILL */
+ INTEL_EVENT_CONSTRAINT(0xf7, 0x1), /* L2_DATA_WRITE_MISS_MEM_FILL */
+ INTEL_EVENT_CONSTRAINT(0xfc, 0x1), /* L2_DATA_PF2 */
+ INTEL_EVENT_CONSTRAINT(0xfd, 0x1), /* L2_DATA_PF2_DROP */
+ INTEL_EVENT_CONSTRAINT(0xfe, 0x1), /* L2_DATA_PF2_MISS */
+ INTEL_EVENT_CONSTRAINT(0xff, 0x1), /* L2_DATA_HIT_INFLIGHT_PF2 */
+ EVENT_CONSTRAINT_END
+};
+
+#define MSR_KNC_IA32_PERF_GLOBAL_STATUS 0x0000002d
+#define MSR_KNC_IA32_PERF_GLOBAL_OVF_CONTROL 0x0000002e
+#define MSR_KNC_IA32_PERF_GLOBAL_CTRL 0x0000002f
+
+#define KNC_ENABLE_COUNTER0 0x00000001
+#define KNC_ENABLE_COUNTER1 0x00000002
+
+static void knc_pmu_disable_all(void)
+{
+ u64 val;
+
+ rdmsrl(MSR_KNC_IA32_PERF_GLOBAL_CTRL, val);
+ val &= ~(KNC_ENABLE_COUNTER0|KNC_ENABLE_COUNTER1);
+ wrmsrl(MSR_KNC_IA32_PERF_GLOBAL_CTRL, val);
+}
+
+static void knc_pmu_enable_all(int added)
+{
+ u64 val;
+
+ rdmsrl(MSR_KNC_IA32_PERF_GLOBAL_CTRL, val);
+ val |= (KNC_ENABLE_COUNTER0|KNC_ENABLE_COUNTER1);
+ wrmsrl(MSR_KNC_IA32_PERF_GLOBAL_CTRL, val);
+}
+
+static inline void
+knc_pmu_disable_event(struct perf_event *event)
+{
+ struct hw_perf_event *hwc = &event->hw;
+ u64 val;
+
+ val = hwc->config;
+ val &= ~ARCH_PERFMON_EVENTSEL_ENABLE;
+
+ (void)wrmsrl_safe(hwc->config_base + hwc->idx, val);
+}
+
+static void knc_pmu_enable_event(struct perf_event *event)
+{
+ struct hw_perf_event *hwc = &event->hw;
+ u64 val;
+
+ val = hwc->config;
+ val |= ARCH_PERFMON_EVENTSEL_ENABLE;
+
+ (void)wrmsrl_safe(hwc->config_base + hwc->idx, val);
+}
+
+static inline u64 knc_pmu_get_status(void)
+{
+ u64 status;
+
+ rdmsrl(MSR_KNC_IA32_PERF_GLOBAL_STATUS, status);
+
+ return status;
+}
+
+static inline void knc_pmu_ack_status(u64 ack)
+{
+ wrmsrl(MSR_KNC_IA32_PERF_GLOBAL_OVF_CONTROL, ack);
+}
+
+static int knc_pmu_handle_irq(struct pt_regs *regs)
+{
+ struct perf_sample_data data;
+ struct cpu_hw_events *cpuc;
+ int handled = 0;
+ int bit, loops;
+ u64 status;
+
+ cpuc = this_cpu_ptr(&cpu_hw_events);
+
+ knc_pmu_disable_all();
+
+ status = knc_pmu_get_status();
+ if (!status) {
+ knc_pmu_enable_all(0);
+ return handled;
+ }
+
+ loops = 0;
+again:
+ knc_pmu_ack_status(status);
+ if (++loops > 100) {
+ WARN_ONCE(1, "perf: irq loop stuck!\n");
+ perf_event_print_debug();
+ goto done;
+ }
+
+ inc_irq_stat(apic_perf_irqs);
+
+ for_each_set_bit(bit, (unsigned long *)&status, X86_PMC_IDX_MAX) {
+ struct perf_event *event = cpuc->events[bit];
+
+ handled++;
+
+ if (!test_bit(bit, cpuc->active_mask))
+ continue;
+
+ if (!intel_pmu_save_and_restart(event))
+ continue;
+
+ perf_sample_data_init(&data, 0, event->hw.last_period);
+
+ if (perf_event_overflow(event, &data, regs))
+ x86_pmu_stop(event, 0);
+ }
+
+ /*
+ * Repeat if there is more work to be done:
+ */
+ status = knc_pmu_get_status();
+ if (status)
+ goto again;
+
+done:
+ /* Only restore PMU state when it's active. See x86_pmu_disable(). */
+ if (cpuc->enabled)
+ knc_pmu_enable_all(0);
+
+ return handled;
+}
+
+
+PMU_FORMAT_ATTR(event, "config:0-7" );
+PMU_FORMAT_ATTR(umask, "config:8-15" );
+PMU_FORMAT_ATTR(edge, "config:18" );
+PMU_FORMAT_ATTR(inv, "config:23" );
+PMU_FORMAT_ATTR(cmask, "config:24-31" );
+
+static struct attribute *intel_knc_formats_attr[] = {
+ &format_attr_event.attr,
+ &format_attr_umask.attr,
+ &format_attr_edge.attr,
+ &format_attr_inv.attr,
+ &format_attr_cmask.attr,
+ NULL,
+};
+
+static const struct x86_pmu knc_pmu __initconst = {
+ .name = "knc",
+ .handle_irq = knc_pmu_handle_irq,
+ .disable_all = knc_pmu_disable_all,
+ .enable_all = knc_pmu_enable_all,
+ .enable = knc_pmu_enable_event,
+ .disable = knc_pmu_disable_event,
+ .hw_config = x86_pmu_hw_config,
+ .schedule_events = x86_schedule_events,
+ .eventsel = MSR_KNC_EVNTSEL0,
+ .perfctr = MSR_KNC_PERFCTR0,
+ .event_map = knc_pmu_event_map,
+ .max_events = ARRAY_SIZE(knc_perfmon_event_map),
+ .apic = 1,
+ .max_period = (1ULL << 39) - 1,
+ .version = 0,
+ .num_counters = 2,
+ .cntval_bits = 40,
+ .cntval_mask = (1ULL << 40) - 1,
+ .get_event_constraints = x86_get_event_constraints,
+ .event_constraints = knc_event_constraints,
+ .format_attrs = intel_knc_formats_attr,
+};
+
+__init int knc_pmu_init(void)
+{
+ x86_pmu = knc_pmu;
+
+ memcpy(hw_cache_event_ids, knc_hw_cache_event_ids,
+ sizeof(hw_cache_event_ids));
+
+ return 0;
+}
diff --git a/arch/x86/events/intel/lbr.c b/arch/x86/events/intel/lbr.c
new file mode 100644
index 000000000..1ca5d1e7d
--- /dev/null
+++ b/arch/x86/events/intel/lbr.c
@@ -0,0 +1,1064 @@
+#include <linux/perf_event.h>
+#include <linux/types.h>
+
+#include <asm/perf_event.h>
+#include <asm/msr.h>
+#include <asm/insn.h>
+
+#include "../perf_event.h"
+
+enum {
+ LBR_FORMAT_32 = 0x00,
+ LBR_FORMAT_LIP = 0x01,
+ LBR_FORMAT_EIP = 0x02,
+ LBR_FORMAT_EIP_FLAGS = 0x03,
+ LBR_FORMAT_EIP_FLAGS2 = 0x04,
+ LBR_FORMAT_INFO = 0x05,
+ LBR_FORMAT_MAX_KNOWN = LBR_FORMAT_INFO,
+};
+
+static enum {
+ LBR_EIP_FLAGS = 1,
+ LBR_TSX = 2,
+} lbr_desc[LBR_FORMAT_MAX_KNOWN + 1] = {
+ [LBR_FORMAT_EIP_FLAGS] = LBR_EIP_FLAGS,
+ [LBR_FORMAT_EIP_FLAGS2] = LBR_EIP_FLAGS | LBR_TSX,
+};
+
+/*
+ * Intel LBR_SELECT bits
+ * Intel Vol3a, April 2011, Section 16.7 Table 16-10
+ *
+ * Hardware branch filter (not available on all CPUs)
+ */
+#define LBR_KERNEL_BIT 0 /* do not capture at ring0 */
+#define LBR_USER_BIT 1 /* do not capture at ring > 0 */
+#define LBR_JCC_BIT 2 /* do not capture conditional branches */
+#define LBR_REL_CALL_BIT 3 /* do not capture relative calls */
+#define LBR_IND_CALL_BIT 4 /* do not capture indirect calls */
+#define LBR_RETURN_BIT 5 /* do not capture near returns */
+#define LBR_IND_JMP_BIT 6 /* do not capture indirect jumps */
+#define LBR_REL_JMP_BIT 7 /* do not capture relative jumps */
+#define LBR_FAR_BIT 8 /* do not capture far branches */
+#define LBR_CALL_STACK_BIT 9 /* enable call stack */
+
+/*
+ * Following bit only exists in Linux; we mask it out before writing it to
+ * the actual MSR. But it helps the constraint perf code to understand
+ * that this is a separate configuration.
+ */
+#define LBR_NO_INFO_BIT 63 /* don't read LBR_INFO. */
+
+#define LBR_KERNEL (1 << LBR_KERNEL_BIT)
+#define LBR_USER (1 << LBR_USER_BIT)
+#define LBR_JCC (1 << LBR_JCC_BIT)
+#define LBR_REL_CALL (1 << LBR_REL_CALL_BIT)
+#define LBR_IND_CALL (1 << LBR_IND_CALL_BIT)
+#define LBR_RETURN (1 << LBR_RETURN_BIT)
+#define LBR_REL_JMP (1 << LBR_REL_JMP_BIT)
+#define LBR_IND_JMP (1 << LBR_IND_JMP_BIT)
+#define LBR_FAR (1 << LBR_FAR_BIT)
+#define LBR_CALL_STACK (1 << LBR_CALL_STACK_BIT)
+#define LBR_NO_INFO (1ULL << LBR_NO_INFO_BIT)
+
+#define LBR_PLM (LBR_KERNEL | LBR_USER)
+
+#define LBR_SEL_MASK 0x3ff /* valid bits in LBR_SELECT */
+#define LBR_NOT_SUPP -1 /* LBR filter not supported */
+#define LBR_IGN 0 /* ignored */
+
+#define LBR_ANY \
+ (LBR_JCC |\
+ LBR_REL_CALL |\
+ LBR_IND_CALL |\
+ LBR_RETURN |\
+ LBR_REL_JMP |\
+ LBR_IND_JMP |\
+ LBR_FAR)
+
+#define LBR_FROM_FLAG_MISPRED (1ULL << 63)
+#define LBR_FROM_FLAG_IN_TX (1ULL << 62)
+#define LBR_FROM_FLAG_ABORT (1ULL << 61)
+
+/*
+ * x86control flow change classification
+ * x86control flow changes include branches, interrupts, traps, faults
+ */
+enum {
+ X86_BR_NONE = 0, /* unknown */
+
+ X86_BR_USER = 1 << 0, /* branch target is user */
+ X86_BR_KERNEL = 1 << 1, /* branch target is kernel */
+
+ X86_BR_CALL = 1 << 2, /* call */
+ X86_BR_RET = 1 << 3, /* return */
+ X86_BR_SYSCALL = 1 << 4, /* syscall */
+ X86_BR_SYSRET = 1 << 5, /* syscall return */
+ X86_BR_INT = 1 << 6, /* sw interrupt */
+ X86_BR_IRET = 1 << 7, /* return from interrupt */
+ X86_BR_JCC = 1 << 8, /* conditional */
+ X86_BR_JMP = 1 << 9, /* jump */
+ X86_BR_IRQ = 1 << 10,/* hw interrupt or trap or fault */
+ X86_BR_IND_CALL = 1 << 11,/* indirect calls */
+ X86_BR_ABORT = 1 << 12,/* transaction abort */
+ X86_BR_IN_TX = 1 << 13,/* in transaction */
+ X86_BR_NO_TX = 1 << 14,/* not in transaction */
+ X86_BR_ZERO_CALL = 1 << 15,/* zero length call */
+ X86_BR_CALL_STACK = 1 << 16,/* call stack */
+ X86_BR_IND_JMP = 1 << 17,/* indirect jump */
+};
+
+#define X86_BR_PLM (X86_BR_USER | X86_BR_KERNEL)
+#define X86_BR_ANYTX (X86_BR_NO_TX | X86_BR_IN_TX)
+
+#define X86_BR_ANY \
+ (X86_BR_CALL |\
+ X86_BR_RET |\
+ X86_BR_SYSCALL |\
+ X86_BR_SYSRET |\
+ X86_BR_INT |\
+ X86_BR_IRET |\
+ X86_BR_JCC |\
+ X86_BR_JMP |\
+ X86_BR_IRQ |\
+ X86_BR_ABORT |\
+ X86_BR_IND_CALL |\
+ X86_BR_IND_JMP |\
+ X86_BR_ZERO_CALL)
+
+#define X86_BR_ALL (X86_BR_PLM | X86_BR_ANY)
+
+#define X86_BR_ANY_CALL \
+ (X86_BR_CALL |\
+ X86_BR_IND_CALL |\
+ X86_BR_ZERO_CALL |\
+ X86_BR_SYSCALL |\
+ X86_BR_IRQ |\
+ X86_BR_INT)
+
+static void intel_pmu_lbr_filter(struct cpu_hw_events *cpuc);
+
+/*
+ * We only support LBR implementations that have FREEZE_LBRS_ON_PMI
+ * otherwise it becomes near impossible to get a reliable stack.
+ */
+
+static void __intel_pmu_lbr_enable(bool pmi)
+{
+ struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
+ u64 debugctl, lbr_select = 0, orig_debugctl;
+
+ /*
+ * No need to unfreeze manually, as v4 can do that as part
+ * of the GLOBAL_STATUS ack.
+ */
+ if (pmi && x86_pmu.version >= 4)
+ return;
+
+ /*
+ * No need to reprogram LBR_SELECT in a PMI, as it
+ * did not change.
+ */
+ if (cpuc->lbr_sel)
+ lbr_select = cpuc->lbr_sel->config & x86_pmu.lbr_sel_mask;
+ if (!pmi && cpuc->lbr_sel)
+ wrmsrl(MSR_LBR_SELECT, lbr_select);
+
+ rdmsrl(MSR_IA32_DEBUGCTLMSR, debugctl);
+ orig_debugctl = debugctl;
+ debugctl |= DEBUGCTLMSR_LBR;
+ /*
+ * LBR callstack does not work well with FREEZE_LBRS_ON_PMI.
+ * If FREEZE_LBRS_ON_PMI is set, PMI near call/return instructions
+ * may cause superfluous increase/decrease of LBR_TOS.
+ */
+ if (!(lbr_select & LBR_CALL_STACK))
+ debugctl |= DEBUGCTLMSR_FREEZE_LBRS_ON_PMI;
+ if (orig_debugctl != debugctl)
+ wrmsrl(MSR_IA32_DEBUGCTLMSR, debugctl);
+}
+
+static void __intel_pmu_lbr_disable(void)
+{
+ u64 debugctl;
+
+ rdmsrl(MSR_IA32_DEBUGCTLMSR, debugctl);
+ debugctl &= ~(DEBUGCTLMSR_LBR | DEBUGCTLMSR_FREEZE_LBRS_ON_PMI);
+ wrmsrl(MSR_IA32_DEBUGCTLMSR, debugctl);
+}
+
+static void intel_pmu_lbr_reset_32(void)
+{
+ int i;
+
+ for (i = 0; i < x86_pmu.lbr_nr; i++)
+ wrmsrl(x86_pmu.lbr_from + i, 0);
+}
+
+static void intel_pmu_lbr_reset_64(void)
+{
+ int i;
+
+ for (i = 0; i < x86_pmu.lbr_nr; i++) {
+ wrmsrl(x86_pmu.lbr_from + i, 0);
+ wrmsrl(x86_pmu.lbr_to + i, 0);
+ if (x86_pmu.intel_cap.lbr_format == LBR_FORMAT_INFO)
+ wrmsrl(MSR_LBR_INFO_0 + i, 0);
+ }
+}
+
+void intel_pmu_lbr_reset(void)
+{
+ if (!x86_pmu.lbr_nr)
+ return;
+
+ if (x86_pmu.intel_cap.lbr_format == LBR_FORMAT_32)
+ intel_pmu_lbr_reset_32();
+ else
+ intel_pmu_lbr_reset_64();
+}
+
+/*
+ * TOS = most recently recorded branch
+ */
+static inline u64 intel_pmu_lbr_tos(void)
+{
+ u64 tos;
+
+ rdmsrl(x86_pmu.lbr_tos, tos);
+ return tos;
+}
+
+enum {
+ LBR_NONE,
+ LBR_VALID,
+};
+
+static void __intel_pmu_lbr_restore(struct x86_perf_task_context *task_ctx)
+{
+ int i;
+ unsigned lbr_idx, mask;
+ u64 tos;
+
+ if (task_ctx->lbr_callstack_users == 0 ||
+ task_ctx->lbr_stack_state == LBR_NONE) {
+ intel_pmu_lbr_reset();
+ return;
+ }
+
+ mask = x86_pmu.lbr_nr - 1;
+ tos = task_ctx->tos;
+ for (i = 0; i < tos; i++) {
+ lbr_idx = (tos - i) & mask;
+ wrmsrl(x86_pmu.lbr_from + lbr_idx, task_ctx->lbr_from[i]);
+ wrmsrl(x86_pmu.lbr_to + lbr_idx, task_ctx->lbr_to[i]);
+ if (x86_pmu.intel_cap.lbr_format == LBR_FORMAT_INFO)
+ wrmsrl(MSR_LBR_INFO_0 + lbr_idx, task_ctx->lbr_info[i]);
+ }
+ wrmsrl(x86_pmu.lbr_tos, tos);
+ task_ctx->lbr_stack_state = LBR_NONE;
+}
+
+static void __intel_pmu_lbr_save(struct x86_perf_task_context *task_ctx)
+{
+ int i;
+ unsigned lbr_idx, mask;
+ u64 tos;
+
+ if (task_ctx->lbr_callstack_users == 0) {
+ task_ctx->lbr_stack_state = LBR_NONE;
+ return;
+ }
+
+ mask = x86_pmu.lbr_nr - 1;
+ tos = intel_pmu_lbr_tos();
+ for (i = 0; i < tos; i++) {
+ lbr_idx = (tos - i) & mask;
+ rdmsrl(x86_pmu.lbr_from + lbr_idx, task_ctx->lbr_from[i]);
+ rdmsrl(x86_pmu.lbr_to + lbr_idx, task_ctx->lbr_to[i]);
+ if (x86_pmu.intel_cap.lbr_format == LBR_FORMAT_INFO)
+ rdmsrl(MSR_LBR_INFO_0 + lbr_idx, task_ctx->lbr_info[i]);
+ }
+ task_ctx->tos = tos;
+ task_ctx->lbr_stack_state = LBR_VALID;
+}
+
+void intel_pmu_lbr_sched_task(struct perf_event_context *ctx, bool sched_in)
+{
+ struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
+ struct x86_perf_task_context *task_ctx;
+
+ /*
+ * If LBR callstack feature is enabled and the stack was saved when
+ * the task was scheduled out, restore the stack. Otherwise flush
+ * the LBR stack.
+ */
+ task_ctx = ctx ? ctx->task_ctx_data : NULL;
+ if (task_ctx) {
+ if (sched_in) {
+ __intel_pmu_lbr_restore(task_ctx);
+ cpuc->lbr_context = ctx;
+ } else {
+ __intel_pmu_lbr_save(task_ctx);
+ }
+ return;
+ }
+
+ /*
+ * When sampling the branck stack in system-wide, it may be
+ * necessary to flush the stack on context switch. This happens
+ * when the branch stack does not tag its entries with the pid
+ * of the current task. Otherwise it becomes impossible to
+ * associate a branch entry with a task. This ambiguity is more
+ * likely to appear when the branch stack supports priv level
+ * filtering and the user sets it to monitor only at the user
+ * level (which could be a useful measurement in system-wide
+ * mode). In that case, the risk is high of having a branch
+ * stack with branch from multiple tasks.
+ */
+ if (sched_in) {
+ intel_pmu_lbr_reset();
+ cpuc->lbr_context = ctx;
+ }
+}
+
+static inline bool branch_user_callstack(unsigned br_sel)
+{
+ return (br_sel & X86_BR_USER) && (br_sel & X86_BR_CALL_STACK);
+}
+
+void intel_pmu_lbr_enable(struct perf_event *event)
+{
+ struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
+ struct x86_perf_task_context *task_ctx;
+
+ if (!x86_pmu.lbr_nr)
+ return;
+
+ /*
+ * Reset the LBR stack if we changed task context to
+ * avoid data leaks.
+ */
+ if (event->ctx->task && cpuc->lbr_context != event->ctx) {
+ intel_pmu_lbr_reset();
+ cpuc->lbr_context = event->ctx;
+ }
+ cpuc->br_sel = event->hw.branch_reg.reg;
+
+ if (branch_user_callstack(cpuc->br_sel) && event->ctx &&
+ event->ctx->task_ctx_data) {
+ task_ctx = event->ctx->task_ctx_data;
+ task_ctx->lbr_callstack_users++;
+ }
+
+ cpuc->lbr_users++;
+ perf_sched_cb_inc(event->ctx->pmu);
+}
+
+void intel_pmu_lbr_disable(struct perf_event *event)
+{
+ struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
+ struct x86_perf_task_context *task_ctx;
+
+ if (!x86_pmu.lbr_nr)
+ return;
+
+ if (branch_user_callstack(cpuc->br_sel) && event->ctx &&
+ event->ctx->task_ctx_data) {
+ task_ctx = event->ctx->task_ctx_data;
+ task_ctx->lbr_callstack_users--;
+ }
+
+ cpuc->lbr_users--;
+ WARN_ON_ONCE(cpuc->lbr_users < 0);
+ perf_sched_cb_dec(event->ctx->pmu);
+
+ if (cpuc->enabled && !cpuc->lbr_users) {
+ __intel_pmu_lbr_disable();
+ /* avoid stale pointer */
+ cpuc->lbr_context = NULL;
+ }
+}
+
+void intel_pmu_lbr_enable_all(bool pmi)
+{
+ struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
+
+ if (cpuc->lbr_users)
+ __intel_pmu_lbr_enable(pmi);
+}
+
+void intel_pmu_lbr_disable_all(void)
+{
+ struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
+
+ if (cpuc->lbr_users)
+ __intel_pmu_lbr_disable();
+}
+
+static void intel_pmu_lbr_read_32(struct cpu_hw_events *cpuc)
+{
+ unsigned long mask = x86_pmu.lbr_nr - 1;
+ u64 tos = intel_pmu_lbr_tos();
+ int i;
+
+ for (i = 0; i < x86_pmu.lbr_nr; i++) {
+ unsigned long lbr_idx = (tos - i) & mask;
+ union {
+ struct {
+ u32 from;
+ u32 to;
+ };
+ u64 lbr;
+ } msr_lastbranch;
+
+ rdmsrl(x86_pmu.lbr_from + lbr_idx, msr_lastbranch.lbr);
+
+ cpuc->lbr_entries[i].from = msr_lastbranch.from;
+ cpuc->lbr_entries[i].to = msr_lastbranch.to;
+ cpuc->lbr_entries[i].mispred = 0;
+ cpuc->lbr_entries[i].predicted = 0;
+ cpuc->lbr_entries[i].reserved = 0;
+ }
+ cpuc->lbr_stack.nr = i;
+}
+
+/*
+ * Due to lack of segmentation in Linux the effective address (offset)
+ * is the same as the linear address, allowing us to merge the LIP and EIP
+ * LBR formats.
+ */
+static void intel_pmu_lbr_read_64(struct cpu_hw_events *cpuc)
+{
+ bool need_info = false;
+ unsigned long mask = x86_pmu.lbr_nr - 1;
+ int lbr_format = x86_pmu.intel_cap.lbr_format;
+ u64 tos = intel_pmu_lbr_tos();
+ int i;
+ int out = 0;
+ int num = x86_pmu.lbr_nr;
+
+ if (cpuc->lbr_sel) {
+ need_info = !(cpuc->lbr_sel->config & LBR_NO_INFO);
+ if (cpuc->lbr_sel->config & LBR_CALL_STACK)
+ num = tos;
+ }
+
+ for (i = 0; i < num; i++) {
+ unsigned long lbr_idx = (tos - i) & mask;
+ u64 from, to, mis = 0, pred = 0, in_tx = 0, abort = 0;
+ int skip = 0;
+ u16 cycles = 0;
+ int lbr_flags = lbr_desc[lbr_format];
+
+ rdmsrl(x86_pmu.lbr_from + lbr_idx, from);
+ rdmsrl(x86_pmu.lbr_to + lbr_idx, to);
+
+ if (lbr_format == LBR_FORMAT_INFO && need_info) {
+ u64 info;
+
+ rdmsrl(MSR_LBR_INFO_0 + lbr_idx, info);
+ mis = !!(info & LBR_INFO_MISPRED);
+ pred = !mis;
+ in_tx = !!(info & LBR_INFO_IN_TX);
+ abort = !!(info & LBR_INFO_ABORT);
+ cycles = (info & LBR_INFO_CYCLES);
+ }
+ if (lbr_flags & LBR_EIP_FLAGS) {
+ mis = !!(from & LBR_FROM_FLAG_MISPRED);
+ pred = !mis;
+ skip = 1;
+ }
+ if (lbr_flags & LBR_TSX) {
+ in_tx = !!(from & LBR_FROM_FLAG_IN_TX);
+ abort = !!(from & LBR_FROM_FLAG_ABORT);
+ skip = 3;
+ }
+ from = (u64)((((s64)from) << skip) >> skip);
+
+ /*
+ * Some CPUs report duplicated abort records,
+ * with the second entry not having an abort bit set.
+ * Skip them here. This loop runs backwards,
+ * so we need to undo the previous record.
+ * If the abort just happened outside the window
+ * the extra entry cannot be removed.
+ */
+ if (abort && x86_pmu.lbr_double_abort && out > 0)
+ out--;
+
+ cpuc->lbr_entries[out].from = from;
+ cpuc->lbr_entries[out].to = to;
+ cpuc->lbr_entries[out].mispred = mis;
+ cpuc->lbr_entries[out].predicted = pred;
+ cpuc->lbr_entries[out].in_tx = in_tx;
+ cpuc->lbr_entries[out].abort = abort;
+ cpuc->lbr_entries[out].cycles = cycles;
+ cpuc->lbr_entries[out].reserved = 0;
+ out++;
+ }
+ cpuc->lbr_stack.nr = out;
+}
+
+void intel_pmu_lbr_read(void)
+{
+ struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
+
+ if (!cpuc->lbr_users)
+ return;
+
+ if (x86_pmu.intel_cap.lbr_format == LBR_FORMAT_32)
+ intel_pmu_lbr_read_32(cpuc);
+ else
+ intel_pmu_lbr_read_64(cpuc);
+
+ intel_pmu_lbr_filter(cpuc);
+}
+
+/*
+ * SW filter is used:
+ * - in case there is no HW filter
+ * - in case the HW filter has errata or limitations
+ */
+static int intel_pmu_setup_sw_lbr_filter(struct perf_event *event)
+{
+ u64 br_type = event->attr.branch_sample_type;
+ int mask = 0;
+
+ if (br_type & PERF_SAMPLE_BRANCH_USER)
+ mask |= X86_BR_USER;
+
+ if (br_type & PERF_SAMPLE_BRANCH_KERNEL)
+ mask |= X86_BR_KERNEL;
+
+ /* we ignore BRANCH_HV here */
+
+ if (br_type & PERF_SAMPLE_BRANCH_ANY)
+ mask |= X86_BR_ANY;
+
+ if (br_type & PERF_SAMPLE_BRANCH_ANY_CALL)
+ mask |= X86_BR_ANY_CALL;
+
+ if (br_type & PERF_SAMPLE_BRANCH_ANY_RETURN)
+ mask |= X86_BR_RET | X86_BR_IRET | X86_BR_SYSRET;
+
+ if (br_type & PERF_SAMPLE_BRANCH_IND_CALL)
+ mask |= X86_BR_IND_CALL;
+
+ if (br_type & PERF_SAMPLE_BRANCH_ABORT_TX)
+ mask |= X86_BR_ABORT;
+
+ if (br_type & PERF_SAMPLE_BRANCH_IN_TX)
+ mask |= X86_BR_IN_TX;
+
+ if (br_type & PERF_SAMPLE_BRANCH_NO_TX)
+ mask |= X86_BR_NO_TX;
+
+ if (br_type & PERF_SAMPLE_BRANCH_COND)
+ mask |= X86_BR_JCC;
+
+ if (br_type & PERF_SAMPLE_BRANCH_CALL_STACK) {
+ if (!x86_pmu_has_lbr_callstack())
+ return -EOPNOTSUPP;
+ if (mask & ~(X86_BR_USER | X86_BR_KERNEL))
+ return -EINVAL;
+ mask |= X86_BR_CALL | X86_BR_IND_CALL | X86_BR_RET |
+ X86_BR_CALL_STACK;
+ }
+
+ if (br_type & PERF_SAMPLE_BRANCH_IND_JUMP)
+ mask |= X86_BR_IND_JMP;
+
+ if (br_type & PERF_SAMPLE_BRANCH_CALL)
+ mask |= X86_BR_CALL | X86_BR_ZERO_CALL;
+ /*
+ * stash actual user request into reg, it may
+ * be used by fixup code for some CPU
+ */
+ event->hw.branch_reg.reg = mask;
+ return 0;
+}
+
+/*
+ * setup the HW LBR filter
+ * Used only when available, may not be enough to disambiguate
+ * all branches, may need the help of the SW filter
+ */
+static int intel_pmu_setup_hw_lbr_filter(struct perf_event *event)
+{
+ struct hw_perf_event_extra *reg;
+ u64 br_type = event->attr.branch_sample_type;
+ u64 mask = 0, v;
+ int i;
+
+ for (i = 0; i < PERF_SAMPLE_BRANCH_MAX_SHIFT; i++) {
+ if (!(br_type & (1ULL << i)))
+ continue;
+
+ v = x86_pmu.lbr_sel_map[i];
+ if (v == LBR_NOT_SUPP)
+ return -EOPNOTSUPP;
+
+ if (v != LBR_IGN)
+ mask |= v;
+ }
+
+ reg = &event->hw.branch_reg;
+ reg->idx = EXTRA_REG_LBR;
+
+ /*
+ * The first 9 bits (LBR_SEL_MASK) in LBR_SELECT operate
+ * in suppress mode. So LBR_SELECT should be set to
+ * (~mask & LBR_SEL_MASK) | (mask & ~LBR_SEL_MASK)
+ * But the 10th bit LBR_CALL_STACK does not operate
+ * in suppress mode.
+ */
+ reg->config = mask ^ (x86_pmu.lbr_sel_mask & ~LBR_CALL_STACK);
+
+ if ((br_type & PERF_SAMPLE_BRANCH_NO_CYCLES) &&
+ (br_type & PERF_SAMPLE_BRANCH_NO_FLAGS) &&
+ (x86_pmu.intel_cap.lbr_format == LBR_FORMAT_INFO))
+ reg->config |= LBR_NO_INFO;
+
+ return 0;
+}
+
+int intel_pmu_setup_lbr_filter(struct perf_event *event)
+{
+ int ret = 0;
+
+ /*
+ * no LBR on this PMU
+ */
+ if (!x86_pmu.lbr_nr)
+ return -EOPNOTSUPP;
+
+ /*
+ * setup SW LBR filter
+ */
+ ret = intel_pmu_setup_sw_lbr_filter(event);
+ if (ret)
+ return ret;
+
+ /*
+ * setup HW LBR filter, if any
+ */
+ if (x86_pmu.lbr_sel_map)
+ ret = intel_pmu_setup_hw_lbr_filter(event);
+
+ return ret;
+}
+
+/*
+ * return the type of control flow change at address "from"
+ * instruction is not necessarily a branch (in case of interrupt).
+ *
+ * The branch type returned also includes the priv level of the
+ * target of the control flow change (X86_BR_USER, X86_BR_KERNEL).
+ *
+ * If a branch type is unknown OR the instruction cannot be
+ * decoded (e.g., text page not present), then X86_BR_NONE is
+ * returned.
+ */
+static int branch_type(unsigned long from, unsigned long to, int abort)
+{
+ struct insn insn;
+ void *addr;
+ int bytes_read, bytes_left;
+ int ret = X86_BR_NONE;
+ int ext, to_plm, from_plm;
+ u8 buf[MAX_INSN_SIZE];
+ int is64 = 0;
+
+ to_plm = kernel_ip(to) ? X86_BR_KERNEL : X86_BR_USER;
+ from_plm = kernel_ip(from) ? X86_BR_KERNEL : X86_BR_USER;
+
+ /*
+ * maybe zero if lbr did not fill up after a reset by the time
+ * we get a PMU interrupt
+ */
+ if (from == 0 || to == 0)
+ return X86_BR_NONE;
+
+ if (abort)
+ return X86_BR_ABORT | to_plm;
+
+ if (from_plm == X86_BR_USER) {
+ /*
+ * can happen if measuring at the user level only
+ * and we interrupt in a kernel thread, e.g., idle.
+ */
+ if (!current->mm)
+ return X86_BR_NONE;
+
+ /* may fail if text not present */
+ bytes_left = copy_from_user_nmi(buf, (void __user *)from,
+ MAX_INSN_SIZE);
+ bytes_read = MAX_INSN_SIZE - bytes_left;
+ if (!bytes_read)
+ return X86_BR_NONE;
+
+ addr = buf;
+ } else {
+ /*
+ * The LBR logs any address in the IP, even if the IP just
+ * faulted. This means userspace can control the from address.
+ * Ensure we don't blindy read any address by validating it is
+ * a known text address.
+ */
+ if (kernel_text_address(from)) {
+ addr = (void *)from;
+ /*
+ * Assume we can get the maximum possible size
+ * when grabbing kernel data. This is not
+ * _strictly_ true since we could possibly be
+ * executing up next to a memory hole, but
+ * it is very unlikely to be a problem.
+ */
+ bytes_read = MAX_INSN_SIZE;
+ } else {
+ return X86_BR_NONE;
+ }
+ }
+
+ /*
+ * decoder needs to know the ABI especially
+ * on 64-bit systems running 32-bit apps
+ */
+#ifdef CONFIG_X86_64
+ is64 = kernel_ip((unsigned long)addr) || !test_thread_flag(TIF_IA32);
+#endif
+ insn_init(&insn, addr, bytes_read, is64);
+ insn_get_opcode(&insn);
+ if (!insn.opcode.got)
+ return X86_BR_ABORT;
+
+ switch (insn.opcode.bytes[0]) {
+ case 0xf:
+ switch (insn.opcode.bytes[1]) {
+ case 0x05: /* syscall */
+ case 0x34: /* sysenter */
+ ret = X86_BR_SYSCALL;
+ break;
+ case 0x07: /* sysret */
+ case 0x35: /* sysexit */
+ ret = X86_BR_SYSRET;
+ break;
+ case 0x80 ... 0x8f: /* conditional */
+ ret = X86_BR_JCC;
+ break;
+ default:
+ ret = X86_BR_NONE;
+ }
+ break;
+ case 0x70 ... 0x7f: /* conditional */
+ ret = X86_BR_JCC;
+ break;
+ case 0xc2: /* near ret */
+ case 0xc3: /* near ret */
+ case 0xca: /* far ret */
+ case 0xcb: /* far ret */
+ ret = X86_BR_RET;
+ break;
+ case 0xcf: /* iret */
+ ret = X86_BR_IRET;
+ break;
+ case 0xcc ... 0xce: /* int */
+ ret = X86_BR_INT;
+ break;
+ case 0xe8: /* call near rel */
+ insn_get_immediate(&insn);
+ if (insn.immediate1.value == 0) {
+ /* zero length call */
+ ret = X86_BR_ZERO_CALL;
+ break;
+ }
+ case 0x9a: /* call far absolute */
+ ret = X86_BR_CALL;
+ break;
+ case 0xe0 ... 0xe3: /* loop jmp */
+ ret = X86_BR_JCC;
+ break;
+ case 0xe9 ... 0xeb: /* jmp */
+ ret = X86_BR_JMP;
+ break;
+ case 0xff: /* call near absolute, call far absolute ind */
+ insn_get_modrm(&insn);
+ ext = (insn.modrm.bytes[0] >> 3) & 0x7;
+ switch (ext) {
+ case 2: /* near ind call */
+ case 3: /* far ind call */
+ ret = X86_BR_IND_CALL;
+ break;
+ case 4:
+ case 5:
+ ret = X86_BR_IND_JMP;
+ break;
+ }
+ break;
+ default:
+ ret = X86_BR_NONE;
+ }
+ /*
+ * interrupts, traps, faults (and thus ring transition) may
+ * occur on any instructions. Thus, to classify them correctly,
+ * we need to first look at the from and to priv levels. If they
+ * are different and to is in the kernel, then it indicates
+ * a ring transition. If the from instruction is not a ring
+ * transition instr (syscall, systenter, int), then it means
+ * it was a irq, trap or fault.
+ *
+ * we have no way of detecting kernel to kernel faults.
+ */
+ if (from_plm == X86_BR_USER && to_plm == X86_BR_KERNEL
+ && ret != X86_BR_SYSCALL && ret != X86_BR_INT)
+ ret = X86_BR_IRQ;
+
+ /*
+ * branch priv level determined by target as
+ * is done by HW when LBR_SELECT is implemented
+ */
+ if (ret != X86_BR_NONE)
+ ret |= to_plm;
+
+ return ret;
+}
+
+/*
+ * implement actual branch filter based on user demand.
+ * Hardware may not exactly satisfy that request, thus
+ * we need to inspect opcodes. Mismatched branches are
+ * discarded. Therefore, the number of branches returned
+ * in PERF_SAMPLE_BRANCH_STACK sample may vary.
+ */
+static void
+intel_pmu_lbr_filter(struct cpu_hw_events *cpuc)
+{
+ u64 from, to;
+ int br_sel = cpuc->br_sel;
+ int i, j, type;
+ bool compress = false;
+
+ /* if sampling all branches, then nothing to filter */
+ if ((br_sel & X86_BR_ALL) == X86_BR_ALL)
+ return;
+
+ for (i = 0; i < cpuc->lbr_stack.nr; i++) {
+
+ from = cpuc->lbr_entries[i].from;
+ to = cpuc->lbr_entries[i].to;
+
+ type = branch_type(from, to, cpuc->lbr_entries[i].abort);
+ if (type != X86_BR_NONE && (br_sel & X86_BR_ANYTX)) {
+ if (cpuc->lbr_entries[i].in_tx)
+ type |= X86_BR_IN_TX;
+ else
+ type |= X86_BR_NO_TX;
+ }
+
+ /* if type does not correspond, then discard */
+ if (type == X86_BR_NONE || (br_sel & type) != type) {
+ cpuc->lbr_entries[i].from = 0;
+ compress = true;
+ }
+ }
+
+ if (!compress)
+ return;
+
+ /* remove all entries with from=0 */
+ for (i = 0; i < cpuc->lbr_stack.nr; ) {
+ if (!cpuc->lbr_entries[i].from) {
+ j = i;
+ while (++j < cpuc->lbr_stack.nr)
+ cpuc->lbr_entries[j-1] = cpuc->lbr_entries[j];
+ cpuc->lbr_stack.nr--;
+ if (!cpuc->lbr_entries[i].from)
+ continue;
+ }
+ i++;
+ }
+}
+
+/*
+ * Map interface branch filters onto LBR filters
+ */
+static const int nhm_lbr_sel_map[PERF_SAMPLE_BRANCH_MAX_SHIFT] = {
+ [PERF_SAMPLE_BRANCH_ANY_SHIFT] = LBR_ANY,
+ [PERF_SAMPLE_BRANCH_USER_SHIFT] = LBR_USER,
+ [PERF_SAMPLE_BRANCH_KERNEL_SHIFT] = LBR_KERNEL,
+ [PERF_SAMPLE_BRANCH_HV_SHIFT] = LBR_IGN,
+ [PERF_SAMPLE_BRANCH_ANY_RETURN_SHIFT] = LBR_RETURN | LBR_REL_JMP
+ | LBR_IND_JMP | LBR_FAR,
+ /*
+ * NHM/WSM erratum: must include REL_JMP+IND_JMP to get CALL branches
+ */
+ [PERF_SAMPLE_BRANCH_ANY_CALL_SHIFT] =
+ LBR_REL_CALL | LBR_IND_CALL | LBR_REL_JMP | LBR_IND_JMP | LBR_FAR,
+ /*
+ * NHM/WSM erratum: must include IND_JMP to capture IND_CALL
+ */
+ [PERF_SAMPLE_BRANCH_IND_CALL_SHIFT] = LBR_IND_CALL | LBR_IND_JMP,
+ [PERF_SAMPLE_BRANCH_COND_SHIFT] = LBR_JCC,
+ [PERF_SAMPLE_BRANCH_IND_JUMP_SHIFT] = LBR_IND_JMP,
+};
+
+static const int snb_lbr_sel_map[PERF_SAMPLE_BRANCH_MAX_SHIFT] = {
+ [PERF_SAMPLE_BRANCH_ANY_SHIFT] = LBR_ANY,
+ [PERF_SAMPLE_BRANCH_USER_SHIFT] = LBR_USER,
+ [PERF_SAMPLE_BRANCH_KERNEL_SHIFT] = LBR_KERNEL,
+ [PERF_SAMPLE_BRANCH_HV_SHIFT] = LBR_IGN,
+ [PERF_SAMPLE_BRANCH_ANY_RETURN_SHIFT] = LBR_RETURN | LBR_FAR,
+ [PERF_SAMPLE_BRANCH_ANY_CALL_SHIFT] = LBR_REL_CALL | LBR_IND_CALL
+ | LBR_FAR,
+ [PERF_SAMPLE_BRANCH_IND_CALL_SHIFT] = LBR_IND_CALL,
+ [PERF_SAMPLE_BRANCH_COND_SHIFT] = LBR_JCC,
+ [PERF_SAMPLE_BRANCH_IND_JUMP_SHIFT] = LBR_IND_JMP,
+ [PERF_SAMPLE_BRANCH_CALL_SHIFT] = LBR_REL_CALL,
+};
+
+static const int hsw_lbr_sel_map[PERF_SAMPLE_BRANCH_MAX_SHIFT] = {
+ [PERF_SAMPLE_BRANCH_ANY_SHIFT] = LBR_ANY,
+ [PERF_SAMPLE_BRANCH_USER_SHIFT] = LBR_USER,
+ [PERF_SAMPLE_BRANCH_KERNEL_SHIFT] = LBR_KERNEL,
+ [PERF_SAMPLE_BRANCH_HV_SHIFT] = LBR_IGN,
+ [PERF_SAMPLE_BRANCH_ANY_RETURN_SHIFT] = LBR_RETURN | LBR_FAR,
+ [PERF_SAMPLE_BRANCH_ANY_CALL_SHIFT] = LBR_REL_CALL | LBR_IND_CALL
+ | LBR_FAR,
+ [PERF_SAMPLE_BRANCH_IND_CALL_SHIFT] = LBR_IND_CALL,
+ [PERF_SAMPLE_BRANCH_COND_SHIFT] = LBR_JCC,
+ [PERF_SAMPLE_BRANCH_CALL_STACK_SHIFT] = LBR_REL_CALL | LBR_IND_CALL
+ | LBR_RETURN | LBR_CALL_STACK,
+ [PERF_SAMPLE_BRANCH_IND_JUMP_SHIFT] = LBR_IND_JMP,
+ [PERF_SAMPLE_BRANCH_CALL_SHIFT] = LBR_REL_CALL,
+};
+
+/* core */
+void __init intel_pmu_lbr_init_core(void)
+{
+ x86_pmu.lbr_nr = 4;
+ x86_pmu.lbr_tos = MSR_LBR_TOS;
+ x86_pmu.lbr_from = MSR_LBR_CORE_FROM;
+ x86_pmu.lbr_to = MSR_LBR_CORE_TO;
+
+ /*
+ * SW branch filter usage:
+ * - compensate for lack of HW filter
+ */
+ pr_cont("4-deep LBR, ");
+}
+
+/* nehalem/westmere */
+void __init intel_pmu_lbr_init_nhm(void)
+{
+ x86_pmu.lbr_nr = 16;
+ x86_pmu.lbr_tos = MSR_LBR_TOS;
+ x86_pmu.lbr_from = MSR_LBR_NHM_FROM;
+ x86_pmu.lbr_to = MSR_LBR_NHM_TO;
+
+ x86_pmu.lbr_sel_mask = LBR_SEL_MASK;
+ x86_pmu.lbr_sel_map = nhm_lbr_sel_map;
+
+ /*
+ * SW branch filter usage:
+ * - workaround LBR_SEL errata (see above)
+ * - support syscall, sysret capture.
+ * That requires LBR_FAR but that means far
+ * jmp need to be filtered out
+ */
+ pr_cont("16-deep LBR, ");
+}
+
+/* sandy bridge */
+void __init intel_pmu_lbr_init_snb(void)
+{
+ x86_pmu.lbr_nr = 16;
+ x86_pmu.lbr_tos = MSR_LBR_TOS;
+ x86_pmu.lbr_from = MSR_LBR_NHM_FROM;
+ x86_pmu.lbr_to = MSR_LBR_NHM_TO;
+
+ x86_pmu.lbr_sel_mask = LBR_SEL_MASK;
+ x86_pmu.lbr_sel_map = snb_lbr_sel_map;
+
+ /*
+ * SW branch filter usage:
+ * - support syscall, sysret capture.
+ * That requires LBR_FAR but that means far
+ * jmp need to be filtered out
+ */
+ pr_cont("16-deep LBR, ");
+}
+
+/* haswell */
+void intel_pmu_lbr_init_hsw(void)
+{
+ x86_pmu.lbr_nr = 16;
+ x86_pmu.lbr_tos = MSR_LBR_TOS;
+ x86_pmu.lbr_from = MSR_LBR_NHM_FROM;
+ x86_pmu.lbr_to = MSR_LBR_NHM_TO;
+
+ x86_pmu.lbr_sel_mask = LBR_SEL_MASK;
+ x86_pmu.lbr_sel_map = hsw_lbr_sel_map;
+
+ pr_cont("16-deep LBR, ");
+}
+
+/* skylake */
+__init void intel_pmu_lbr_init_skl(void)
+{
+ x86_pmu.lbr_nr = 32;
+ x86_pmu.lbr_tos = MSR_LBR_TOS;
+ x86_pmu.lbr_from = MSR_LBR_NHM_FROM;
+ x86_pmu.lbr_to = MSR_LBR_NHM_TO;
+
+ x86_pmu.lbr_sel_mask = LBR_SEL_MASK;
+ x86_pmu.lbr_sel_map = hsw_lbr_sel_map;
+
+ /*
+ * SW branch filter usage:
+ * - support syscall, sysret capture.
+ * That requires LBR_FAR but that means far
+ * jmp need to be filtered out
+ */
+ pr_cont("32-deep LBR, ");
+}
+
+/* atom */
+void __init intel_pmu_lbr_init_atom(void)
+{
+ /*
+ * only models starting at stepping 10 seems
+ * to have an operational LBR which can freeze
+ * on PMU interrupt
+ */
+ if (boot_cpu_data.x86_model == 28
+ && boot_cpu_data.x86_mask < 10) {
+ pr_cont("LBR disabled due to erratum");
+ return;
+ }
+
+ x86_pmu.lbr_nr = 8;
+ x86_pmu.lbr_tos = MSR_LBR_TOS;
+ x86_pmu.lbr_from = MSR_LBR_CORE_FROM;
+ x86_pmu.lbr_to = MSR_LBR_CORE_TO;
+
+ /*
+ * SW branch filter usage:
+ * - compensate for lack of HW filter
+ */
+ pr_cont("8-deep LBR, ");
+}
+
+/* Knights Landing */
+void intel_pmu_lbr_init_knl(void)
+{
+ x86_pmu.lbr_nr = 8;
+ x86_pmu.lbr_tos = MSR_LBR_TOS;
+ x86_pmu.lbr_from = MSR_LBR_NHM_FROM;
+ x86_pmu.lbr_to = MSR_LBR_NHM_TO;
+
+ x86_pmu.lbr_sel_mask = LBR_SEL_MASK;
+ x86_pmu.lbr_sel_map = snb_lbr_sel_map;
+
+ pr_cont("8-deep LBR, ");
+}
diff --git a/arch/x86/events/intel/p4.c b/arch/x86/events/intel/p4.c
new file mode 100644
index 000000000..0a5ede187
--- /dev/null
+++ b/arch/x86/events/intel/p4.c
@@ -0,0 +1,1376 @@
+/*
+ * Netburst Performance Events (P4, old Xeon)
+ *
+ * Copyright (C) 2010 Parallels, Inc., Cyrill Gorcunov <gorcunov@openvz.org>
+ * Copyright (C) 2010 Intel Corporation, Lin Ming <ming.m.lin@intel.com>
+ *
+ * For licencing details see kernel-base/COPYING
+ */
+
+#include <linux/perf_event.h>
+
+#include <asm/perf_event_p4.h>
+#include <asm/hardirq.h>
+#include <asm/apic.h>
+
+#include "../perf_event.h"
+
+#define P4_CNTR_LIMIT 3
+/*
+ * array indices: 0,1 - HT threads, used with HT enabled cpu
+ */
+struct p4_event_bind {
+ unsigned int opcode; /* Event code and ESCR selector */
+ unsigned int escr_msr[2]; /* ESCR MSR for this event */
+ unsigned int escr_emask; /* valid ESCR EventMask bits */
+ unsigned int shared; /* event is shared across threads */
+ char cntr[2][P4_CNTR_LIMIT]; /* counter index (offset), -1 on abscence */
+};
+
+struct p4_pebs_bind {
+ unsigned int metric_pebs;
+ unsigned int metric_vert;
+};
+
+/* it sets P4_PEBS_ENABLE_UOP_TAG as well */
+#define P4_GEN_PEBS_BIND(name, pebs, vert) \
+ [P4_PEBS_METRIC__##name] = { \
+ .metric_pebs = pebs | P4_PEBS_ENABLE_UOP_TAG, \
+ .metric_vert = vert, \
+ }
+
+/*
+ * note we have P4_PEBS_ENABLE_UOP_TAG always set here
+ *
+ * it's needed for mapping P4_PEBS_CONFIG_METRIC_MASK bits of
+ * event configuration to find out which values are to be
+ * written into MSR_IA32_PEBS_ENABLE and MSR_P4_PEBS_MATRIX_VERT
+ * resgisters
+ */
+static struct p4_pebs_bind p4_pebs_bind_map[] = {
+ P4_GEN_PEBS_BIND(1stl_cache_load_miss_retired, 0x0000001, 0x0000001),
+ P4_GEN_PEBS_BIND(2ndl_cache_load_miss_retired, 0x0000002, 0x0000001),
+ P4_GEN_PEBS_BIND(dtlb_load_miss_retired, 0x0000004, 0x0000001),
+ P4_GEN_PEBS_BIND(dtlb_store_miss_retired, 0x0000004, 0x0000002),
+ P4_GEN_PEBS_BIND(dtlb_all_miss_retired, 0x0000004, 0x0000003),
+ P4_GEN_PEBS_BIND(tagged_mispred_branch, 0x0018000, 0x0000010),
+ P4_GEN_PEBS_BIND(mob_load_replay_retired, 0x0000200, 0x0000001),
+ P4_GEN_PEBS_BIND(split_load_retired, 0x0000400, 0x0000001),
+ P4_GEN_PEBS_BIND(split_store_retired, 0x0000400, 0x0000002),
+};
+
+/*
+ * Note that we don't use CCCR1 here, there is an
+ * exception for P4_BSQ_ALLOCATION but we just have
+ * no workaround
+ *
+ * consider this binding as resources which particular
+ * event may borrow, it doesn't contain EventMask,
+ * Tags and friends -- they are left to a caller
+ */
+static struct p4_event_bind p4_event_bind_map[] = {
+ [P4_EVENT_TC_DELIVER_MODE] = {
+ .opcode = P4_OPCODE(P4_EVENT_TC_DELIVER_MODE),
+ .escr_msr = { MSR_P4_TC_ESCR0, MSR_P4_TC_ESCR1 },
+ .escr_emask =
+ P4_ESCR_EMASK_BIT(P4_EVENT_TC_DELIVER_MODE, DD) |
+ P4_ESCR_EMASK_BIT(P4_EVENT_TC_DELIVER_MODE, DB) |
+ P4_ESCR_EMASK_BIT(P4_EVENT_TC_DELIVER_MODE, DI) |
+ P4_ESCR_EMASK_BIT(P4_EVENT_TC_DELIVER_MODE, BD) |
+ P4_ESCR_EMASK_BIT(P4_EVENT_TC_DELIVER_MODE, BB) |
+ P4_ESCR_EMASK_BIT(P4_EVENT_TC_DELIVER_MODE, BI) |
+ P4_ESCR_EMASK_BIT(P4_EVENT_TC_DELIVER_MODE, ID),
+ .shared = 1,
+ .cntr = { {4, 5, -1}, {6, 7, -1} },
+ },
+ [P4_EVENT_BPU_FETCH_REQUEST] = {
+ .opcode = P4_OPCODE(P4_EVENT_BPU_FETCH_REQUEST),
+ .escr_msr = { MSR_P4_BPU_ESCR0, MSR_P4_BPU_ESCR1 },
+ .escr_emask =
+ P4_ESCR_EMASK_BIT(P4_EVENT_BPU_FETCH_REQUEST, TCMISS),
+ .cntr = { {0, -1, -1}, {2, -1, -1} },
+ },
+ [P4_EVENT_ITLB_REFERENCE] = {
+ .opcode = P4_OPCODE(P4_EVENT_ITLB_REFERENCE),
+ .escr_msr = { MSR_P4_ITLB_ESCR0, MSR_P4_ITLB_ESCR1 },
+ .escr_emask =
+ P4_ESCR_EMASK_BIT(P4_EVENT_ITLB_REFERENCE, HIT) |
+ P4_ESCR_EMASK_BIT(P4_EVENT_ITLB_REFERENCE, MISS) |
+ P4_ESCR_EMASK_BIT(P4_EVENT_ITLB_REFERENCE, HIT_UK),
+ .cntr = { {0, -1, -1}, {2, -1, -1} },
+ },
+ [P4_EVENT_MEMORY_CANCEL] = {
+ .opcode = P4_OPCODE(P4_EVENT_MEMORY_CANCEL),
+ .escr_msr = { MSR_P4_DAC_ESCR0, MSR_P4_DAC_ESCR1 },
+ .escr_emask =
+ P4_ESCR_EMASK_BIT(P4_EVENT_MEMORY_CANCEL, ST_RB_FULL) |
+ P4_ESCR_EMASK_BIT(P4_EVENT_MEMORY_CANCEL, 64K_CONF),
+ .cntr = { {8, 9, -1}, {10, 11, -1} },
+ },
+ [P4_EVENT_MEMORY_COMPLETE] = {
+ .opcode = P4_OPCODE(P4_EVENT_MEMORY_COMPLETE),
+ .escr_msr = { MSR_P4_SAAT_ESCR0 , MSR_P4_SAAT_ESCR1 },
+ .escr_emask =
+ P4_ESCR_EMASK_BIT(P4_EVENT_MEMORY_COMPLETE, LSC) |
+ P4_ESCR_EMASK_BIT(P4_EVENT_MEMORY_COMPLETE, SSC),
+ .cntr = { {8, 9, -1}, {10, 11, -1} },
+ },
+ [P4_EVENT_LOAD_PORT_REPLAY] = {
+ .opcode = P4_OPCODE(P4_EVENT_LOAD_PORT_REPLAY),
+ .escr_msr = { MSR_P4_SAAT_ESCR0, MSR_P4_SAAT_ESCR1 },
+ .escr_emask =
+ P4_ESCR_EMASK_BIT(P4_EVENT_LOAD_PORT_REPLAY, SPLIT_LD),
+ .cntr = { {8, 9, -1}, {10, 11, -1} },
+ },
+ [P4_EVENT_STORE_PORT_REPLAY] = {
+ .opcode = P4_OPCODE(P4_EVENT_STORE_PORT_REPLAY),
+ .escr_msr = { MSR_P4_SAAT_ESCR0 , MSR_P4_SAAT_ESCR1 },
+ .escr_emask =
+ P4_ESCR_EMASK_BIT(P4_EVENT_STORE_PORT_REPLAY, SPLIT_ST),
+ .cntr = { {8, 9, -1}, {10, 11, -1} },
+ },
+ [P4_EVENT_MOB_LOAD_REPLAY] = {
+ .opcode = P4_OPCODE(P4_EVENT_MOB_LOAD_REPLAY),
+ .escr_msr = { MSR_P4_MOB_ESCR0, MSR_P4_MOB_ESCR1 },
+ .escr_emask =
+ P4_ESCR_EMASK_BIT(P4_EVENT_MOB_LOAD_REPLAY, NO_STA) |
+ P4_ESCR_EMASK_BIT(P4_EVENT_MOB_LOAD_REPLAY, NO_STD) |
+ P4_ESCR_EMASK_BIT(P4_EVENT_MOB_LOAD_REPLAY, PARTIAL_DATA) |
+ P4_ESCR_EMASK_BIT(P4_EVENT_MOB_LOAD_REPLAY, UNALGN_ADDR),
+ .cntr = { {0, -1, -1}, {2, -1, -1} },
+ },
+ [P4_EVENT_PAGE_WALK_TYPE] = {
+ .opcode = P4_OPCODE(P4_EVENT_PAGE_WALK_TYPE),
+ .escr_msr = { MSR_P4_PMH_ESCR0, MSR_P4_PMH_ESCR1 },
+ .escr_emask =
+ P4_ESCR_EMASK_BIT(P4_EVENT_PAGE_WALK_TYPE, DTMISS) |
+ P4_ESCR_EMASK_BIT(P4_EVENT_PAGE_WALK_TYPE, ITMISS),
+ .shared = 1,
+ .cntr = { {0, -1, -1}, {2, -1, -1} },
+ },
+ [P4_EVENT_BSQ_CACHE_REFERENCE] = {
+ .opcode = P4_OPCODE(P4_EVENT_BSQ_CACHE_REFERENCE),
+ .escr_msr = { MSR_P4_BSU_ESCR0, MSR_P4_BSU_ESCR1 },
+ .escr_emask =
+ P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_2ndL_HITS) |
+ P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_2ndL_HITE) |
+ P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_2ndL_HITM) |
+ P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_3rdL_HITS) |
+ P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_3rdL_HITE) |
+ P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_3rdL_HITM) |
+ P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_2ndL_MISS) |
+ P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_3rdL_MISS) |
+ P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, WR_2ndL_MISS),
+ .cntr = { {0, -1, -1}, {2, -1, -1} },
+ },
+ [P4_EVENT_IOQ_ALLOCATION] = {
+ .opcode = P4_OPCODE(P4_EVENT_IOQ_ALLOCATION),
+ .escr_msr = { MSR_P4_FSB_ESCR0, MSR_P4_FSB_ESCR1 },
+ .escr_emask =
+ P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ALLOCATION, DEFAULT) |
+ P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ALLOCATION, ALL_READ) |
+ P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ALLOCATION, ALL_WRITE) |
+ P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ALLOCATION, MEM_UC) |
+ P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ALLOCATION, MEM_WC) |
+ P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ALLOCATION, MEM_WT) |
+ P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ALLOCATION, MEM_WP) |
+ P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ALLOCATION, MEM_WB) |
+ P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ALLOCATION, OWN) |
+ P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ALLOCATION, OTHER) |
+ P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ALLOCATION, PREFETCH),
+ .cntr = { {0, -1, -1}, {2, -1, -1} },
+ },
+ [P4_EVENT_IOQ_ACTIVE_ENTRIES] = { /* shared ESCR */
+ .opcode = P4_OPCODE(P4_EVENT_IOQ_ACTIVE_ENTRIES),
+ .escr_msr = { MSR_P4_FSB_ESCR1, MSR_P4_FSB_ESCR1 },
+ .escr_emask =
+ P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ACTIVE_ENTRIES, DEFAULT) |
+ P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ACTIVE_ENTRIES, ALL_READ) |
+ P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ACTIVE_ENTRIES, ALL_WRITE) |
+ P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ACTIVE_ENTRIES, MEM_UC) |
+ P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ACTIVE_ENTRIES, MEM_WC) |
+ P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ACTIVE_ENTRIES, MEM_WT) |
+ P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ACTIVE_ENTRIES, MEM_WP) |
+ P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ACTIVE_ENTRIES, MEM_WB) |
+ P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ACTIVE_ENTRIES, OWN) |
+ P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ACTIVE_ENTRIES, OTHER) |
+ P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ACTIVE_ENTRIES, PREFETCH),
+ .cntr = { {2, -1, -1}, {3, -1, -1} },
+ },
+ [P4_EVENT_FSB_DATA_ACTIVITY] = {
+ .opcode = P4_OPCODE(P4_EVENT_FSB_DATA_ACTIVITY),
+ .escr_msr = { MSR_P4_FSB_ESCR0, MSR_P4_FSB_ESCR1 },
+ .escr_emask =
+ P4_ESCR_EMASK_BIT(P4_EVENT_FSB_DATA_ACTIVITY, DRDY_DRV) |
+ P4_ESCR_EMASK_BIT(P4_EVENT_FSB_DATA_ACTIVITY, DRDY_OWN) |
+ P4_ESCR_EMASK_BIT(P4_EVENT_FSB_DATA_ACTIVITY, DRDY_OTHER) |
+ P4_ESCR_EMASK_BIT(P4_EVENT_FSB_DATA_ACTIVITY, DBSY_DRV) |
+ P4_ESCR_EMASK_BIT(P4_EVENT_FSB_DATA_ACTIVITY, DBSY_OWN) |
+ P4_ESCR_EMASK_BIT(P4_EVENT_FSB_DATA_ACTIVITY, DBSY_OTHER),
+ .shared = 1,
+ .cntr = { {0, -1, -1}, {2, -1, -1} },
+ },
+ [P4_EVENT_BSQ_ALLOCATION] = { /* shared ESCR, broken CCCR1 */
+ .opcode = P4_OPCODE(P4_EVENT_BSQ_ALLOCATION),
+ .escr_msr = { MSR_P4_BSU_ESCR0, MSR_P4_BSU_ESCR0 },
+ .escr_emask =
+ P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ALLOCATION, REQ_TYPE0) |
+ P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ALLOCATION, REQ_TYPE1) |
+ P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ALLOCATION, REQ_LEN0) |
+ P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ALLOCATION, REQ_LEN1) |
+ P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ALLOCATION, REQ_IO_TYPE) |
+ P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ALLOCATION, REQ_LOCK_TYPE) |
+ P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ALLOCATION, REQ_CACHE_TYPE) |
+ P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ALLOCATION, REQ_SPLIT_TYPE) |
+ P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ALLOCATION, REQ_DEM_TYPE) |
+ P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ALLOCATION, REQ_ORD_TYPE) |
+ P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ALLOCATION, MEM_TYPE0) |
+ P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ALLOCATION, MEM_TYPE1) |
+ P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ALLOCATION, MEM_TYPE2),
+ .cntr = { {0, -1, -1}, {1, -1, -1} },
+ },
+ [P4_EVENT_BSQ_ACTIVE_ENTRIES] = { /* shared ESCR */
+ .opcode = P4_OPCODE(P4_EVENT_BSQ_ACTIVE_ENTRIES),
+ .escr_msr = { MSR_P4_BSU_ESCR1 , MSR_P4_BSU_ESCR1 },
+ .escr_emask =
+ P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ACTIVE_ENTRIES, REQ_TYPE0) |
+ P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ACTIVE_ENTRIES, REQ_TYPE1) |
+ P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ACTIVE_ENTRIES, REQ_LEN0) |
+ P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ACTIVE_ENTRIES, REQ_LEN1) |
+ P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ACTIVE_ENTRIES, REQ_IO_TYPE) |
+ P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ACTIVE_ENTRIES, REQ_LOCK_TYPE) |
+ P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ACTIVE_ENTRIES, REQ_CACHE_TYPE) |
+ P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ACTIVE_ENTRIES, REQ_SPLIT_TYPE) |
+ P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ACTIVE_ENTRIES, REQ_DEM_TYPE) |
+ P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ACTIVE_ENTRIES, REQ_ORD_TYPE) |
+ P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ACTIVE_ENTRIES, MEM_TYPE0) |
+ P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ACTIVE_ENTRIES, MEM_TYPE1) |
+ P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ACTIVE_ENTRIES, MEM_TYPE2),
+ .cntr = { {2, -1, -1}, {3, -1, -1} },
+ },
+ [P4_EVENT_SSE_INPUT_ASSIST] = {
+ .opcode = P4_OPCODE(P4_EVENT_SSE_INPUT_ASSIST),
+ .escr_msr = { MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1 },
+ .escr_emask =
+ P4_ESCR_EMASK_BIT(P4_EVENT_SSE_INPUT_ASSIST, ALL),
+ .shared = 1,
+ .cntr = { {8, 9, -1}, {10, 11, -1} },
+ },
+ [P4_EVENT_PACKED_SP_UOP] = {
+ .opcode = P4_OPCODE(P4_EVENT_PACKED_SP_UOP),
+ .escr_msr = { MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1 },
+ .escr_emask =
+ P4_ESCR_EMASK_BIT(P4_EVENT_PACKED_SP_UOP, ALL),
+ .shared = 1,
+ .cntr = { {8, 9, -1}, {10, 11, -1} },
+ },
+ [P4_EVENT_PACKED_DP_UOP] = {
+ .opcode = P4_OPCODE(P4_EVENT_PACKED_DP_UOP),
+ .escr_msr = { MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1 },
+ .escr_emask =
+ P4_ESCR_EMASK_BIT(P4_EVENT_PACKED_DP_UOP, ALL),
+ .shared = 1,
+ .cntr = { {8, 9, -1}, {10, 11, -1} },
+ },
+ [P4_EVENT_SCALAR_SP_UOP] = {
+ .opcode = P4_OPCODE(P4_EVENT_SCALAR_SP_UOP),
+ .escr_msr = { MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1 },
+ .escr_emask =
+ P4_ESCR_EMASK_BIT(P4_EVENT_SCALAR_SP_UOP, ALL),
+ .shared = 1,
+ .cntr = { {8, 9, -1}, {10, 11, -1} },
+ },
+ [P4_EVENT_SCALAR_DP_UOP] = {
+ .opcode = P4_OPCODE(P4_EVENT_SCALAR_DP_UOP),
+ .escr_msr = { MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1 },
+ .escr_emask =
+ P4_ESCR_EMASK_BIT(P4_EVENT_SCALAR_DP_UOP, ALL),
+ .shared = 1,
+ .cntr = { {8, 9, -1}, {10, 11, -1} },
+ },
+ [P4_EVENT_64BIT_MMX_UOP] = {
+ .opcode = P4_OPCODE(P4_EVENT_64BIT_MMX_UOP),
+ .escr_msr = { MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1 },
+ .escr_emask =
+ P4_ESCR_EMASK_BIT(P4_EVENT_64BIT_MMX_UOP, ALL),
+ .shared = 1,
+ .cntr = { {8, 9, -1}, {10, 11, -1} },
+ },
+ [P4_EVENT_128BIT_MMX_UOP] = {
+ .opcode = P4_OPCODE(P4_EVENT_128BIT_MMX_UOP),
+ .escr_msr = { MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1 },
+ .escr_emask =
+ P4_ESCR_EMASK_BIT(P4_EVENT_128BIT_MMX_UOP, ALL),
+ .shared = 1,
+ .cntr = { {8, 9, -1}, {10, 11, -1} },
+ },
+ [P4_EVENT_X87_FP_UOP] = {
+ .opcode = P4_OPCODE(P4_EVENT_X87_FP_UOP),
+ .escr_msr = { MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1 },
+ .escr_emask =
+ P4_ESCR_EMASK_BIT(P4_EVENT_X87_FP_UOP, ALL),
+ .shared = 1,
+ .cntr = { {8, 9, -1}, {10, 11, -1} },
+ },
+ [P4_EVENT_TC_MISC] = {
+ .opcode = P4_OPCODE(P4_EVENT_TC_MISC),
+ .escr_msr = { MSR_P4_TC_ESCR0, MSR_P4_TC_ESCR1 },
+ .escr_emask =
+ P4_ESCR_EMASK_BIT(P4_EVENT_TC_MISC, FLUSH),
+ .cntr = { {4, 5, -1}, {6, 7, -1} },
+ },
+ [P4_EVENT_GLOBAL_POWER_EVENTS] = {
+ .opcode = P4_OPCODE(P4_EVENT_GLOBAL_POWER_EVENTS),
+ .escr_msr = { MSR_P4_FSB_ESCR0, MSR_P4_FSB_ESCR1 },
+ .escr_emask =
+ P4_ESCR_EMASK_BIT(P4_EVENT_GLOBAL_POWER_EVENTS, RUNNING),
+ .cntr = { {0, -1, -1}, {2, -1, -1} },
+ },
+ [P4_EVENT_TC_MS_XFER] = {
+ .opcode = P4_OPCODE(P4_EVENT_TC_MS_XFER),
+ .escr_msr = { MSR_P4_MS_ESCR0, MSR_P4_MS_ESCR1 },
+ .escr_emask =
+ P4_ESCR_EMASK_BIT(P4_EVENT_TC_MS_XFER, CISC),
+ .cntr = { {4, 5, -1}, {6, 7, -1} },
+ },
+ [P4_EVENT_UOP_QUEUE_WRITES] = {
+ .opcode = P4_OPCODE(P4_EVENT_UOP_QUEUE_WRITES),
+ .escr_msr = { MSR_P4_MS_ESCR0, MSR_P4_MS_ESCR1 },
+ .escr_emask =
+ P4_ESCR_EMASK_BIT(P4_EVENT_UOP_QUEUE_WRITES, FROM_TC_BUILD) |
+ P4_ESCR_EMASK_BIT(P4_EVENT_UOP_QUEUE_WRITES, FROM_TC_DELIVER) |
+ P4_ESCR_EMASK_BIT(P4_EVENT_UOP_QUEUE_WRITES, FROM_ROM),
+ .cntr = { {4, 5, -1}, {6, 7, -1} },
+ },
+ [P4_EVENT_RETIRED_MISPRED_BRANCH_TYPE] = {
+ .opcode = P4_OPCODE(P4_EVENT_RETIRED_MISPRED_BRANCH_TYPE),
+ .escr_msr = { MSR_P4_TBPU_ESCR0 , MSR_P4_TBPU_ESCR0 },
+ .escr_emask =
+ P4_ESCR_EMASK_BIT(P4_EVENT_RETIRED_MISPRED_BRANCH_TYPE, CONDITIONAL) |
+ P4_ESCR_EMASK_BIT(P4_EVENT_RETIRED_MISPRED_BRANCH_TYPE, CALL) |
+ P4_ESCR_EMASK_BIT(P4_EVENT_RETIRED_MISPRED_BRANCH_TYPE, RETURN) |
+ P4_ESCR_EMASK_BIT(P4_EVENT_RETIRED_MISPRED_BRANCH_TYPE, INDIRECT),
+ .cntr = { {4, 5, -1}, {6, 7, -1} },
+ },
+ [P4_EVENT_RETIRED_BRANCH_TYPE] = {
+ .opcode = P4_OPCODE(P4_EVENT_RETIRED_BRANCH_TYPE),
+ .escr_msr = { MSR_P4_TBPU_ESCR0 , MSR_P4_TBPU_ESCR1 },
+ .escr_emask =
+ P4_ESCR_EMASK_BIT(P4_EVENT_RETIRED_BRANCH_TYPE, CONDITIONAL) |
+ P4_ESCR_EMASK_BIT(P4_EVENT_RETIRED_BRANCH_TYPE, CALL) |
+ P4_ESCR_EMASK_BIT(P4_EVENT_RETIRED_BRANCH_TYPE, RETURN) |
+ P4_ESCR_EMASK_BIT(P4_EVENT_RETIRED_BRANCH_TYPE, INDIRECT),
+ .cntr = { {4, 5, -1}, {6, 7, -1} },
+ },
+ [P4_EVENT_RESOURCE_STALL] = {
+ .opcode = P4_OPCODE(P4_EVENT_RESOURCE_STALL),
+ .escr_msr = { MSR_P4_ALF_ESCR0, MSR_P4_ALF_ESCR1 },
+ .escr_emask =
+ P4_ESCR_EMASK_BIT(P4_EVENT_RESOURCE_STALL, SBFULL),
+ .cntr = { {12, 13, 16}, {14, 15, 17} },
+ },
+ [P4_EVENT_WC_BUFFER] = {
+ .opcode = P4_OPCODE(P4_EVENT_WC_BUFFER),
+ .escr_msr = { MSR_P4_DAC_ESCR0, MSR_P4_DAC_ESCR1 },
+ .escr_emask =
+ P4_ESCR_EMASK_BIT(P4_EVENT_WC_BUFFER, WCB_EVICTS) |
+ P4_ESCR_EMASK_BIT(P4_EVENT_WC_BUFFER, WCB_FULL_EVICTS),
+ .shared = 1,
+ .cntr = { {8, 9, -1}, {10, 11, -1} },
+ },
+ [P4_EVENT_B2B_CYCLES] = {
+ .opcode = P4_OPCODE(P4_EVENT_B2B_CYCLES),
+ .escr_msr = { MSR_P4_FSB_ESCR0, MSR_P4_FSB_ESCR1 },
+ .escr_emask = 0,
+ .cntr = { {0, -1, -1}, {2, -1, -1} },
+ },
+ [P4_EVENT_BNR] = {
+ .opcode = P4_OPCODE(P4_EVENT_BNR),
+ .escr_msr = { MSR_P4_FSB_ESCR0, MSR_P4_FSB_ESCR1 },
+ .escr_emask = 0,
+ .cntr = { {0, -1, -1}, {2, -1, -1} },
+ },
+ [P4_EVENT_SNOOP] = {
+ .opcode = P4_OPCODE(P4_EVENT_SNOOP),
+ .escr_msr = { MSR_P4_FSB_ESCR0, MSR_P4_FSB_ESCR1 },
+ .escr_emask = 0,
+ .cntr = { {0, -1, -1}, {2, -1, -1} },
+ },
+ [P4_EVENT_RESPONSE] = {
+ .opcode = P4_OPCODE(P4_EVENT_RESPONSE),
+ .escr_msr = { MSR_P4_FSB_ESCR0, MSR_P4_FSB_ESCR1 },
+ .escr_emask = 0,
+ .cntr = { {0, -1, -1}, {2, -1, -1} },
+ },
+ [P4_EVENT_FRONT_END_EVENT] = {
+ .opcode = P4_OPCODE(P4_EVENT_FRONT_END_EVENT),
+ .escr_msr = { MSR_P4_CRU_ESCR2, MSR_P4_CRU_ESCR3 },
+ .escr_emask =
+ P4_ESCR_EMASK_BIT(P4_EVENT_FRONT_END_EVENT, NBOGUS) |
+ P4_ESCR_EMASK_BIT(P4_EVENT_FRONT_END_EVENT, BOGUS),
+ .cntr = { {12, 13, 16}, {14, 15, 17} },
+ },
+ [P4_EVENT_EXECUTION_EVENT] = {
+ .opcode = P4_OPCODE(P4_EVENT_EXECUTION_EVENT),
+ .escr_msr = { MSR_P4_CRU_ESCR2, MSR_P4_CRU_ESCR3 },
+ .escr_emask =
+ P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, NBOGUS0) |
+ P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, NBOGUS1) |
+ P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, NBOGUS2) |
+ P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, NBOGUS3) |
+ P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, BOGUS0) |
+ P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, BOGUS1) |
+ P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, BOGUS2) |
+ P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, BOGUS3),
+ .cntr = { {12, 13, 16}, {14, 15, 17} },
+ },
+ [P4_EVENT_REPLAY_EVENT] = {
+ .opcode = P4_OPCODE(P4_EVENT_REPLAY_EVENT),
+ .escr_msr = { MSR_P4_CRU_ESCR2, MSR_P4_CRU_ESCR3 },
+ .escr_emask =
+ P4_ESCR_EMASK_BIT(P4_EVENT_REPLAY_EVENT, NBOGUS) |
+ P4_ESCR_EMASK_BIT(P4_EVENT_REPLAY_EVENT, BOGUS),
+ .cntr = { {12, 13, 16}, {14, 15, 17} },
+ },
+ [P4_EVENT_INSTR_RETIRED] = {
+ .opcode = P4_OPCODE(P4_EVENT_INSTR_RETIRED),
+ .escr_msr = { MSR_P4_CRU_ESCR0, MSR_P4_CRU_ESCR1 },
+ .escr_emask =
+ P4_ESCR_EMASK_BIT(P4_EVENT_INSTR_RETIRED, NBOGUSNTAG) |
+ P4_ESCR_EMASK_BIT(P4_EVENT_INSTR_RETIRED, NBOGUSTAG) |
+ P4_ESCR_EMASK_BIT(P4_EVENT_INSTR_RETIRED, BOGUSNTAG) |
+ P4_ESCR_EMASK_BIT(P4_EVENT_INSTR_RETIRED, BOGUSTAG),
+ .cntr = { {12, 13, 16}, {14, 15, 17} },
+ },
+ [P4_EVENT_UOPS_RETIRED] = {
+ .opcode = P4_OPCODE(P4_EVENT_UOPS_RETIRED),
+ .escr_msr = { MSR_P4_CRU_ESCR0, MSR_P4_CRU_ESCR1 },
+ .escr_emask =
+ P4_ESCR_EMASK_BIT(P4_EVENT_UOPS_RETIRED, NBOGUS) |
+ P4_ESCR_EMASK_BIT(P4_EVENT_UOPS_RETIRED, BOGUS),
+ .cntr = { {12, 13, 16}, {14, 15, 17} },
+ },
+ [P4_EVENT_UOP_TYPE] = {
+ .opcode = P4_OPCODE(P4_EVENT_UOP_TYPE),
+ .escr_msr = { MSR_P4_RAT_ESCR0, MSR_P4_RAT_ESCR1 },
+ .escr_emask =
+ P4_ESCR_EMASK_BIT(P4_EVENT_UOP_TYPE, TAGLOADS) |
+ P4_ESCR_EMASK_BIT(P4_EVENT_UOP_TYPE, TAGSTORES),
+ .cntr = { {12, 13, 16}, {14, 15, 17} },
+ },
+ [P4_EVENT_BRANCH_RETIRED] = {
+ .opcode = P4_OPCODE(P4_EVENT_BRANCH_RETIRED),
+ .escr_msr = { MSR_P4_CRU_ESCR2, MSR_P4_CRU_ESCR3 },
+ .escr_emask =
+ P4_ESCR_EMASK_BIT(P4_EVENT_BRANCH_RETIRED, MMNP) |
+ P4_ESCR_EMASK_BIT(P4_EVENT_BRANCH_RETIRED, MMNM) |
+ P4_ESCR_EMASK_BIT(P4_EVENT_BRANCH_RETIRED, MMTP) |
+ P4_ESCR_EMASK_BIT(P4_EVENT_BRANCH_RETIRED, MMTM),
+ .cntr = { {12, 13, 16}, {14, 15, 17} },
+ },
+ [P4_EVENT_MISPRED_BRANCH_RETIRED] = {
+ .opcode = P4_OPCODE(P4_EVENT_MISPRED_BRANCH_RETIRED),
+ .escr_msr = { MSR_P4_CRU_ESCR0, MSR_P4_CRU_ESCR1 },
+ .escr_emask =
+ P4_ESCR_EMASK_BIT(P4_EVENT_MISPRED_BRANCH_RETIRED, NBOGUS),
+ .cntr = { {12, 13, 16}, {14, 15, 17} },
+ },
+ [P4_EVENT_X87_ASSIST] = {
+ .opcode = P4_OPCODE(P4_EVENT_X87_ASSIST),
+ .escr_msr = { MSR_P4_CRU_ESCR2, MSR_P4_CRU_ESCR3 },
+ .escr_emask =
+ P4_ESCR_EMASK_BIT(P4_EVENT_X87_ASSIST, FPSU) |
+ P4_ESCR_EMASK_BIT(P4_EVENT_X87_ASSIST, FPSO) |
+ P4_ESCR_EMASK_BIT(P4_EVENT_X87_ASSIST, POAO) |
+ P4_ESCR_EMASK_BIT(P4_EVENT_X87_ASSIST, POAU) |
+ P4_ESCR_EMASK_BIT(P4_EVENT_X87_ASSIST, PREA),
+ .cntr = { {12, 13, 16}, {14, 15, 17} },
+ },
+ [P4_EVENT_MACHINE_CLEAR] = {
+ .opcode = P4_OPCODE(P4_EVENT_MACHINE_CLEAR),
+ .escr_msr = { MSR_P4_CRU_ESCR2, MSR_P4_CRU_ESCR3 },
+ .escr_emask =
+ P4_ESCR_EMASK_BIT(P4_EVENT_MACHINE_CLEAR, CLEAR) |
+ P4_ESCR_EMASK_BIT(P4_EVENT_MACHINE_CLEAR, MOCLEAR) |
+ P4_ESCR_EMASK_BIT(P4_EVENT_MACHINE_CLEAR, SMCLEAR),
+ .cntr = { {12, 13, 16}, {14, 15, 17} },
+ },
+ [P4_EVENT_INSTR_COMPLETED] = {
+ .opcode = P4_OPCODE(P4_EVENT_INSTR_COMPLETED),
+ .escr_msr = { MSR_P4_CRU_ESCR0, MSR_P4_CRU_ESCR1 },
+ .escr_emask =
+ P4_ESCR_EMASK_BIT(P4_EVENT_INSTR_COMPLETED, NBOGUS) |
+ P4_ESCR_EMASK_BIT(P4_EVENT_INSTR_COMPLETED, BOGUS),
+ .cntr = { {12, 13, 16}, {14, 15, 17} },
+ },
+};
+
+#define P4_GEN_CACHE_EVENT(event, bit, metric) \
+ p4_config_pack_escr(P4_ESCR_EVENT(event) | \
+ P4_ESCR_EMASK_BIT(event, bit)) | \
+ p4_config_pack_cccr(metric | \
+ P4_CCCR_ESEL(P4_OPCODE_ESEL(P4_OPCODE(event))))
+
+static __initconst const u64 p4_hw_cache_event_ids
+ [PERF_COUNT_HW_CACHE_MAX]
+ [PERF_COUNT_HW_CACHE_OP_MAX]
+ [PERF_COUNT_HW_CACHE_RESULT_MAX] =
+{
+ [ C(L1D ) ] = {
+ [ C(OP_READ) ] = {
+ [ C(RESULT_ACCESS) ] = 0x0,
+ [ C(RESULT_MISS) ] = P4_GEN_CACHE_EVENT(P4_EVENT_REPLAY_EVENT, NBOGUS,
+ P4_PEBS_METRIC__1stl_cache_load_miss_retired),
+ },
+ },
+ [ C(LL ) ] = {
+ [ C(OP_READ) ] = {
+ [ C(RESULT_ACCESS) ] = 0x0,
+ [ C(RESULT_MISS) ] = P4_GEN_CACHE_EVENT(P4_EVENT_REPLAY_EVENT, NBOGUS,
+ P4_PEBS_METRIC__2ndl_cache_load_miss_retired),
+ },
+},
+ [ C(DTLB) ] = {
+ [ C(OP_READ) ] = {
+ [ C(RESULT_ACCESS) ] = 0x0,
+ [ C(RESULT_MISS) ] = P4_GEN_CACHE_EVENT(P4_EVENT_REPLAY_EVENT, NBOGUS,
+ P4_PEBS_METRIC__dtlb_load_miss_retired),
+ },
+ [ C(OP_WRITE) ] = {
+ [ C(RESULT_ACCESS) ] = 0x0,
+ [ C(RESULT_MISS) ] = P4_GEN_CACHE_EVENT(P4_EVENT_REPLAY_EVENT, NBOGUS,
+ P4_PEBS_METRIC__dtlb_store_miss_retired),
+ },
+ },
+ [ C(ITLB) ] = {
+ [ C(OP_READ) ] = {
+ [ C(RESULT_ACCESS) ] = P4_GEN_CACHE_EVENT(P4_EVENT_ITLB_REFERENCE, HIT,
+ P4_PEBS_METRIC__none),
+ [ C(RESULT_MISS) ] = P4_GEN_CACHE_EVENT(P4_EVENT_ITLB_REFERENCE, MISS,
+ P4_PEBS_METRIC__none),
+ },
+ [ C(OP_WRITE) ] = {
+ [ C(RESULT_ACCESS) ] = -1,
+ [ C(RESULT_MISS) ] = -1,
+ },
+ [ C(OP_PREFETCH) ] = {
+ [ C(RESULT_ACCESS) ] = -1,
+ [ C(RESULT_MISS) ] = -1,
+ },
+ },
+ [ C(NODE) ] = {
+ [ C(OP_READ) ] = {
+ [ C(RESULT_ACCESS) ] = -1,
+ [ C(RESULT_MISS) ] = -1,
+ },
+ [ C(OP_WRITE) ] = {
+ [ C(RESULT_ACCESS) ] = -1,
+ [ C(RESULT_MISS) ] = -1,
+ },
+ [ C(OP_PREFETCH) ] = {
+ [ C(RESULT_ACCESS) ] = -1,
+ [ C(RESULT_MISS) ] = -1,
+ },
+ },
+};
+
+/*
+ * Because of Netburst being quite restricted in how many
+ * identical events may run simultaneously, we introduce event aliases,
+ * ie the different events which have the same functionality but
+ * utilize non-intersected resources (ESCR/CCCR/counter registers).
+ *
+ * This allow us to relax restrictions a bit and run two or more
+ * identical events together.
+ *
+ * Never set any custom internal bits such as P4_CONFIG_HT,
+ * P4_CONFIG_ALIASABLE or bits for P4_PEBS_METRIC, they are
+ * either up to date automatically or not applicable at all.
+ */
+struct p4_event_alias {
+ u64 original;
+ u64 alternative;
+} p4_event_aliases[] = {
+ {
+ /*
+ * Non-halted cycles can be substituted with non-sleeping cycles (see
+ * Intel SDM Vol3b for details). We need this alias to be able
+ * to run nmi-watchdog and 'perf top' (or any other user space tool
+ * which is interested in running PERF_COUNT_HW_CPU_CYCLES)
+ * simultaneously.
+ */
+ .original =
+ p4_config_pack_escr(P4_ESCR_EVENT(P4_EVENT_GLOBAL_POWER_EVENTS) |
+ P4_ESCR_EMASK_BIT(P4_EVENT_GLOBAL_POWER_EVENTS, RUNNING)),
+ .alternative =
+ p4_config_pack_escr(P4_ESCR_EVENT(P4_EVENT_EXECUTION_EVENT) |
+ P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, NBOGUS0)|
+ P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, NBOGUS1)|
+ P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, NBOGUS2)|
+ P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, NBOGUS3)|
+ P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, BOGUS0) |
+ P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, BOGUS1) |
+ P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, BOGUS2) |
+ P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, BOGUS3))|
+ p4_config_pack_cccr(P4_CCCR_THRESHOLD(15) | P4_CCCR_COMPLEMENT |
+ P4_CCCR_COMPARE),
+ },
+};
+
+static u64 p4_get_alias_event(u64 config)
+{
+ u64 config_match;
+ int i;
+
+ /*
+ * Only event with special mark is allowed,
+ * we're to be sure it didn't come as malformed
+ * RAW event.
+ */
+ if (!(config & P4_CONFIG_ALIASABLE))
+ return 0;
+
+ config_match = config & P4_CONFIG_EVENT_ALIAS_MASK;
+
+ for (i = 0; i < ARRAY_SIZE(p4_event_aliases); i++) {
+ if (config_match == p4_event_aliases[i].original) {
+ config_match = p4_event_aliases[i].alternative;
+ break;
+ } else if (config_match == p4_event_aliases[i].alternative) {
+ config_match = p4_event_aliases[i].original;
+ break;
+ }
+ }
+
+ if (i >= ARRAY_SIZE(p4_event_aliases))
+ return 0;
+
+ return config_match | (config & P4_CONFIG_EVENT_ALIAS_IMMUTABLE_BITS);
+}
+
+static u64 p4_general_events[PERF_COUNT_HW_MAX] = {
+ /* non-halted CPU clocks */
+ [PERF_COUNT_HW_CPU_CYCLES] =
+ p4_config_pack_escr(P4_ESCR_EVENT(P4_EVENT_GLOBAL_POWER_EVENTS) |
+ P4_ESCR_EMASK_BIT(P4_EVENT_GLOBAL_POWER_EVENTS, RUNNING)) |
+ P4_CONFIG_ALIASABLE,
+
+ /*
+ * retired instructions
+ * in a sake of simplicity we don't use the FSB tagging
+ */
+ [PERF_COUNT_HW_INSTRUCTIONS] =
+ p4_config_pack_escr(P4_ESCR_EVENT(P4_EVENT_INSTR_RETIRED) |
+ P4_ESCR_EMASK_BIT(P4_EVENT_INSTR_RETIRED, NBOGUSNTAG) |
+ P4_ESCR_EMASK_BIT(P4_EVENT_INSTR_RETIRED, BOGUSNTAG)),
+
+ /* cache hits */
+ [PERF_COUNT_HW_CACHE_REFERENCES] =
+ p4_config_pack_escr(P4_ESCR_EVENT(P4_EVENT_BSQ_CACHE_REFERENCE) |
+ P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_2ndL_HITS) |
+ P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_2ndL_HITE) |
+ P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_2ndL_HITM) |
+ P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_3rdL_HITS) |
+ P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_3rdL_HITE) |
+ P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_3rdL_HITM)),
+
+ /* cache misses */
+ [PERF_COUNT_HW_CACHE_MISSES] =
+ p4_config_pack_escr(P4_ESCR_EVENT(P4_EVENT_BSQ_CACHE_REFERENCE) |
+ P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_2ndL_MISS) |
+ P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_3rdL_MISS) |
+ P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, WR_2ndL_MISS)),
+
+ /* branch instructions retired */
+ [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] =
+ p4_config_pack_escr(P4_ESCR_EVENT(P4_EVENT_RETIRED_BRANCH_TYPE) |
+ P4_ESCR_EMASK_BIT(P4_EVENT_RETIRED_BRANCH_TYPE, CONDITIONAL) |
+ P4_ESCR_EMASK_BIT(P4_EVENT_RETIRED_BRANCH_TYPE, CALL) |
+ P4_ESCR_EMASK_BIT(P4_EVENT_RETIRED_BRANCH_TYPE, RETURN) |
+ P4_ESCR_EMASK_BIT(P4_EVENT_RETIRED_BRANCH_TYPE, INDIRECT)),
+
+ /* mispredicted branches retired */
+ [PERF_COUNT_HW_BRANCH_MISSES] =
+ p4_config_pack_escr(P4_ESCR_EVENT(P4_EVENT_MISPRED_BRANCH_RETIRED) |
+ P4_ESCR_EMASK_BIT(P4_EVENT_MISPRED_BRANCH_RETIRED, NBOGUS)),
+
+ /* bus ready clocks (cpu is driving #DRDY_DRV\#DRDY_OWN): */
+ [PERF_COUNT_HW_BUS_CYCLES] =
+ p4_config_pack_escr(P4_ESCR_EVENT(P4_EVENT_FSB_DATA_ACTIVITY) |
+ P4_ESCR_EMASK_BIT(P4_EVENT_FSB_DATA_ACTIVITY, DRDY_DRV) |
+ P4_ESCR_EMASK_BIT(P4_EVENT_FSB_DATA_ACTIVITY, DRDY_OWN)) |
+ p4_config_pack_cccr(P4_CCCR_EDGE | P4_CCCR_COMPARE),
+};
+
+static struct p4_event_bind *p4_config_get_bind(u64 config)
+{
+ unsigned int evnt = p4_config_unpack_event(config);
+ struct p4_event_bind *bind = NULL;
+
+ if (evnt < ARRAY_SIZE(p4_event_bind_map))
+ bind = &p4_event_bind_map[evnt];
+
+ return bind;
+}
+
+static u64 p4_pmu_event_map(int hw_event)
+{
+ struct p4_event_bind *bind;
+ unsigned int esel;
+ u64 config;
+
+ config = p4_general_events[hw_event];
+ bind = p4_config_get_bind(config);
+ esel = P4_OPCODE_ESEL(bind->opcode);
+ config |= p4_config_pack_cccr(P4_CCCR_ESEL(esel));
+
+ return config;
+}
+
+/* check cpu model specifics */
+static bool p4_event_match_cpu_model(unsigned int event_idx)
+{
+ /* INSTR_COMPLETED event only exist for model 3, 4, 6 (Prescott) */
+ if (event_idx == P4_EVENT_INSTR_COMPLETED) {
+ if (boot_cpu_data.x86_model != 3 &&
+ boot_cpu_data.x86_model != 4 &&
+ boot_cpu_data.x86_model != 6)
+ return false;
+ }
+
+ /*
+ * For info
+ * - IQ_ESCR0, IQ_ESCR1 only for models 1 and 2
+ */
+
+ return true;
+}
+
+static int p4_validate_raw_event(struct perf_event *event)
+{
+ unsigned int v, emask;
+
+ /* User data may have out-of-bound event index */
+ v = p4_config_unpack_event(event->attr.config);
+ if (v >= ARRAY_SIZE(p4_event_bind_map))
+ return -EINVAL;
+
+ /* It may be unsupported: */
+ if (!p4_event_match_cpu_model(v))
+ return -EINVAL;
+
+ /*
+ * NOTE: P4_CCCR_THREAD_ANY has not the same meaning as
+ * in Architectural Performance Monitoring, it means not
+ * on _which_ logical cpu to count but rather _when_, ie it
+ * depends on logical cpu state -- count event if one cpu active,
+ * none, both or any, so we just allow user to pass any value
+ * desired.
+ *
+ * In turn we always set Tx_OS/Tx_USR bits bound to logical
+ * cpu without their propagation to another cpu
+ */
+
+ /*
+ * if an event is shared across the logical threads
+ * the user needs special permissions to be able to use it
+ */
+ if (p4_ht_active() && p4_event_bind_map[v].shared) {
+ if (perf_paranoid_cpu() && !capable(CAP_SYS_ADMIN))
+ return -EACCES;
+ }
+
+ /* ESCR EventMask bits may be invalid */
+ emask = p4_config_unpack_escr(event->attr.config) & P4_ESCR_EVENTMASK_MASK;
+ if (emask & ~p4_event_bind_map[v].escr_emask)
+ return -EINVAL;
+
+ /*
+ * it may have some invalid PEBS bits
+ */
+ if (p4_config_pebs_has(event->attr.config, P4_PEBS_CONFIG_ENABLE))
+ return -EINVAL;
+
+ v = p4_config_unpack_metric(event->attr.config);
+ if (v >= ARRAY_SIZE(p4_pebs_bind_map))
+ return -EINVAL;
+
+ return 0;
+}
+
+static int p4_hw_config(struct perf_event *event)
+{
+ int cpu = get_cpu();
+ int rc = 0;
+ u32 escr, cccr;
+
+ /*
+ * the reason we use cpu that early is that: if we get scheduled
+ * first time on the same cpu -- we will not need swap thread
+ * specific flags in config (and will save some cpu cycles)
+ */
+
+ cccr = p4_default_cccr_conf(cpu);
+ escr = p4_default_escr_conf(cpu, event->attr.exclude_kernel,
+ event->attr.exclude_user);
+ event->hw.config = p4_config_pack_escr(escr) |
+ p4_config_pack_cccr(cccr);
+
+ if (p4_ht_active() && p4_ht_thread(cpu))
+ event->hw.config = p4_set_ht_bit(event->hw.config);
+
+ if (event->attr.type == PERF_TYPE_RAW) {
+ struct p4_event_bind *bind;
+ unsigned int esel;
+ /*
+ * Clear bits we reserve to be managed by kernel itself
+ * and never allowed from a user space
+ */
+ event->attr.config &= P4_CONFIG_MASK;
+
+ rc = p4_validate_raw_event(event);
+ if (rc)
+ goto out;
+
+ /*
+ * Note that for RAW events we allow user to use P4_CCCR_RESERVED
+ * bits since we keep additional info here (for cache events and etc)
+ */
+ event->hw.config |= event->attr.config;
+ bind = p4_config_get_bind(event->attr.config);
+ if (!bind) {
+ rc = -EINVAL;
+ goto out;
+ }
+ esel = P4_OPCODE_ESEL(bind->opcode);
+ event->hw.config |= p4_config_pack_cccr(P4_CCCR_ESEL(esel));
+ }
+
+ rc = x86_setup_perfctr(event);
+out:
+ put_cpu();
+ return rc;
+}
+
+static inline int p4_pmu_clear_cccr_ovf(struct hw_perf_event *hwc)
+{
+ u64 v;
+
+ /* an official way for overflow indication */
+ rdmsrl(hwc->config_base, v);
+ if (v & P4_CCCR_OVF) {
+ wrmsrl(hwc->config_base, v & ~P4_CCCR_OVF);
+ return 1;
+ }
+
+ /*
+ * In some circumstances the overflow might issue an NMI but did
+ * not set P4_CCCR_OVF bit. Because a counter holds a negative value
+ * we simply check for high bit being set, if it's cleared it means
+ * the counter has reached zero value and continued counting before
+ * real NMI signal was received:
+ */
+ rdmsrl(hwc->event_base, v);
+ if (!(v & ARCH_P4_UNFLAGGED_BIT))
+ return 1;
+
+ return 0;
+}
+
+static void p4_pmu_disable_pebs(void)
+{
+ /*
+ * FIXME
+ *
+ * It's still allowed that two threads setup same cache
+ * events so we can't simply clear metrics until we knew
+ * no one is depending on us, so we need kind of counter
+ * for "ReplayEvent" users.
+ *
+ * What is more complex -- RAW events, if user (for some
+ * reason) will pass some cache event metric with improper
+ * event opcode -- it's fine from hardware point of view
+ * but completely nonsense from "meaning" of such action.
+ *
+ * So at moment let leave metrics turned on forever -- it's
+ * ok for now but need to be revisited!
+ *
+ * (void)wrmsrl_safe(MSR_IA32_PEBS_ENABLE, 0);
+ * (void)wrmsrl_safe(MSR_P4_PEBS_MATRIX_VERT, 0);
+ */
+}
+
+static inline void p4_pmu_disable_event(struct perf_event *event)
+{
+ struct hw_perf_event *hwc = &event->hw;
+
+ /*
+ * If event gets disabled while counter is in overflowed
+ * state we need to clear P4_CCCR_OVF, otherwise interrupt get
+ * asserted again and again
+ */
+ (void)wrmsrl_safe(hwc->config_base,
+ p4_config_unpack_cccr(hwc->config) & ~P4_CCCR_ENABLE & ~P4_CCCR_OVF & ~P4_CCCR_RESERVED);
+}
+
+static void p4_pmu_disable_all(void)
+{
+ struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
+ int idx;
+
+ for (idx = 0; idx < x86_pmu.num_counters; idx++) {
+ struct perf_event *event = cpuc->events[idx];
+ if (!test_bit(idx, cpuc->active_mask))
+ continue;
+ p4_pmu_disable_event(event);
+ }
+
+ p4_pmu_disable_pebs();
+}
+
+/* configuration must be valid */
+static void p4_pmu_enable_pebs(u64 config)
+{
+ struct p4_pebs_bind *bind;
+ unsigned int idx;
+
+ BUILD_BUG_ON(P4_PEBS_METRIC__max > P4_PEBS_CONFIG_METRIC_MASK);
+
+ idx = p4_config_unpack_metric(config);
+ if (idx == P4_PEBS_METRIC__none)
+ return;
+
+ bind = &p4_pebs_bind_map[idx];
+
+ (void)wrmsrl_safe(MSR_IA32_PEBS_ENABLE, (u64)bind->metric_pebs);
+ (void)wrmsrl_safe(MSR_P4_PEBS_MATRIX_VERT, (u64)bind->metric_vert);
+}
+
+static void p4_pmu_enable_event(struct perf_event *event)
+{
+ struct hw_perf_event *hwc = &event->hw;
+ int thread = p4_ht_config_thread(hwc->config);
+ u64 escr_conf = p4_config_unpack_escr(p4_clear_ht_bit(hwc->config));
+ unsigned int idx = p4_config_unpack_event(hwc->config);
+ struct p4_event_bind *bind;
+ u64 escr_addr, cccr;
+
+ bind = &p4_event_bind_map[idx];
+ escr_addr = bind->escr_msr[thread];
+
+ /*
+ * - we dont support cascaded counters yet
+ * - and counter 1 is broken (erratum)
+ */
+ WARN_ON_ONCE(p4_is_event_cascaded(hwc->config));
+ WARN_ON_ONCE(hwc->idx == 1);
+
+ /* we need a real Event value */
+ escr_conf &= ~P4_ESCR_EVENT_MASK;
+ escr_conf |= P4_ESCR_EVENT(P4_OPCODE_EVNT(bind->opcode));
+
+ cccr = p4_config_unpack_cccr(hwc->config);
+
+ /*
+ * it could be Cache event so we need to write metrics
+ * into additional MSRs
+ */
+ p4_pmu_enable_pebs(hwc->config);
+
+ (void)wrmsrl_safe(escr_addr, escr_conf);
+ (void)wrmsrl_safe(hwc->config_base,
+ (cccr & ~P4_CCCR_RESERVED) | P4_CCCR_ENABLE);
+}
+
+static void p4_pmu_enable_all(int added)
+{
+ struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
+ int idx;
+
+ for (idx = 0; idx < x86_pmu.num_counters; idx++) {
+ struct perf_event *event = cpuc->events[idx];
+ if (!test_bit(idx, cpuc->active_mask))
+ continue;
+ p4_pmu_enable_event(event);
+ }
+}
+
+static int p4_pmu_handle_irq(struct pt_regs *regs)
+{
+ struct perf_sample_data data;
+ struct cpu_hw_events *cpuc;
+ struct perf_event *event;
+ struct hw_perf_event *hwc;
+ int idx, handled = 0;
+ u64 val;
+
+ cpuc = this_cpu_ptr(&cpu_hw_events);
+
+ for (idx = 0; idx < x86_pmu.num_counters; idx++) {
+ int overflow;
+
+ if (!test_bit(idx, cpuc->active_mask)) {
+ /* catch in-flight IRQs */
+ if (__test_and_clear_bit(idx, cpuc->running))
+ handled++;
+ continue;
+ }
+
+ event = cpuc->events[idx];
+ hwc = &event->hw;
+
+ WARN_ON_ONCE(hwc->idx != idx);
+
+ /* it might be unflagged overflow */
+ overflow = p4_pmu_clear_cccr_ovf(hwc);
+
+ val = x86_perf_event_update(event);
+ if (!overflow && (val & (1ULL << (x86_pmu.cntval_bits - 1))))
+ continue;
+
+ handled += overflow;
+
+ /* event overflow for sure */
+ perf_sample_data_init(&data, 0, hwc->last_period);
+
+ if (!x86_perf_event_set_period(event))
+ continue;
+
+
+ if (perf_event_overflow(event, &data, regs))
+ x86_pmu_stop(event, 0);
+ }
+
+ if (handled)
+ inc_irq_stat(apic_perf_irqs);
+
+ /*
+ * When dealing with the unmasking of the LVTPC on P4 perf hw, it has
+ * been observed that the OVF bit flag has to be cleared first _before_
+ * the LVTPC can be unmasked.
+ *
+ * The reason is the NMI line will continue to be asserted while the OVF
+ * bit is set. This causes a second NMI to generate if the LVTPC is
+ * unmasked before the OVF bit is cleared, leading to unknown NMI
+ * messages.
+ */
+ apic_write(APIC_LVTPC, APIC_DM_NMI);
+
+ return handled;
+}
+
+/*
+ * swap thread specific fields according to a thread
+ * we are going to run on
+ */
+static void p4_pmu_swap_config_ts(struct hw_perf_event *hwc, int cpu)
+{
+ u32 escr, cccr;
+
+ /*
+ * we either lucky and continue on same cpu or no HT support
+ */
+ if (!p4_should_swap_ts(hwc->config, cpu))
+ return;
+
+ /*
+ * the event is migrated from an another logical
+ * cpu, so we need to swap thread specific flags
+ */
+
+ escr = p4_config_unpack_escr(hwc->config);
+ cccr = p4_config_unpack_cccr(hwc->config);
+
+ if (p4_ht_thread(cpu)) {
+ cccr &= ~P4_CCCR_OVF_PMI_T0;
+ cccr |= P4_CCCR_OVF_PMI_T1;
+ if (escr & P4_ESCR_T0_OS) {
+ escr &= ~P4_ESCR_T0_OS;
+ escr |= P4_ESCR_T1_OS;
+ }
+ if (escr & P4_ESCR_T0_USR) {
+ escr &= ~P4_ESCR_T0_USR;
+ escr |= P4_ESCR_T1_USR;
+ }
+ hwc->config = p4_config_pack_escr(escr);
+ hwc->config |= p4_config_pack_cccr(cccr);
+ hwc->config |= P4_CONFIG_HT;
+ } else {
+ cccr &= ~P4_CCCR_OVF_PMI_T1;
+ cccr |= P4_CCCR_OVF_PMI_T0;
+ if (escr & P4_ESCR_T1_OS) {
+ escr &= ~P4_ESCR_T1_OS;
+ escr |= P4_ESCR_T0_OS;
+ }
+ if (escr & P4_ESCR_T1_USR) {
+ escr &= ~P4_ESCR_T1_USR;
+ escr |= P4_ESCR_T0_USR;
+ }
+ hwc->config = p4_config_pack_escr(escr);
+ hwc->config |= p4_config_pack_cccr(cccr);
+ hwc->config &= ~P4_CONFIG_HT;
+ }
+}
+
+/*
+ * ESCR address hashing is tricky, ESCRs are not sequential
+ * in memory but all starts from MSR_P4_BSU_ESCR0 (0x03a0) and
+ * the metric between any ESCRs is laid in range [0xa0,0xe1]
+ *
+ * so we make ~70% filled hashtable
+ */
+
+#define P4_ESCR_MSR_BASE 0x000003a0
+#define P4_ESCR_MSR_MAX 0x000003e1
+#define P4_ESCR_MSR_TABLE_SIZE (P4_ESCR_MSR_MAX - P4_ESCR_MSR_BASE + 1)
+#define P4_ESCR_MSR_IDX(msr) (msr - P4_ESCR_MSR_BASE)
+#define P4_ESCR_MSR_TABLE_ENTRY(msr) [P4_ESCR_MSR_IDX(msr)] = msr
+
+static const unsigned int p4_escr_table[P4_ESCR_MSR_TABLE_SIZE] = {
+ P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_ALF_ESCR0),
+ P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_ALF_ESCR1),
+ P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_BPU_ESCR0),
+ P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_BPU_ESCR1),
+ P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_BSU_ESCR0),
+ P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_BSU_ESCR1),
+ P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_CRU_ESCR0),
+ P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_CRU_ESCR1),
+ P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_CRU_ESCR2),
+ P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_CRU_ESCR3),
+ P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_CRU_ESCR4),
+ P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_CRU_ESCR5),
+ P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_DAC_ESCR0),
+ P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_DAC_ESCR1),
+ P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_FIRM_ESCR0),
+ P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_FIRM_ESCR1),
+ P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_FLAME_ESCR0),
+ P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_FLAME_ESCR1),
+ P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_FSB_ESCR0),
+ P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_FSB_ESCR1),
+ P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_IQ_ESCR0),
+ P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_IQ_ESCR1),
+ P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_IS_ESCR0),
+ P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_IS_ESCR1),
+ P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_ITLB_ESCR0),
+ P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_ITLB_ESCR1),
+ P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_IX_ESCR0),
+ P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_IX_ESCR1),
+ P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_MOB_ESCR0),
+ P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_MOB_ESCR1),
+ P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_MS_ESCR0),
+ P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_MS_ESCR1),
+ P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_PMH_ESCR0),
+ P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_PMH_ESCR1),
+ P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_RAT_ESCR0),
+ P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_RAT_ESCR1),
+ P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_SAAT_ESCR0),
+ P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_SAAT_ESCR1),
+ P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_SSU_ESCR0),
+ P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_SSU_ESCR1),
+ P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_TBPU_ESCR0),
+ P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_TBPU_ESCR1),
+ P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_TC_ESCR0),
+ P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_TC_ESCR1),
+ P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_U2L_ESCR0),
+ P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_U2L_ESCR1),
+};
+
+static int p4_get_escr_idx(unsigned int addr)
+{
+ unsigned int idx = P4_ESCR_MSR_IDX(addr);
+
+ if (unlikely(idx >= P4_ESCR_MSR_TABLE_SIZE ||
+ !p4_escr_table[idx] ||
+ p4_escr_table[idx] != addr)) {
+ WARN_ONCE(1, "P4 PMU: Wrong address passed: %x\n", addr);
+ return -1;
+ }
+
+ return idx;
+}
+
+static int p4_next_cntr(int thread, unsigned long *used_mask,
+ struct p4_event_bind *bind)
+{
+ int i, j;
+
+ for (i = 0; i < P4_CNTR_LIMIT; i++) {
+ j = bind->cntr[thread][i];
+ if (j != -1 && !test_bit(j, used_mask))
+ return j;
+ }
+
+ return -1;
+}
+
+static int p4_pmu_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign)
+{
+ unsigned long used_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
+ unsigned long escr_mask[BITS_TO_LONGS(P4_ESCR_MSR_TABLE_SIZE)];
+ int cpu = smp_processor_id();
+ struct hw_perf_event *hwc;
+ struct p4_event_bind *bind;
+ unsigned int i, thread, num;
+ int cntr_idx, escr_idx;
+ u64 config_alias;
+ int pass;
+
+ bitmap_zero(used_mask, X86_PMC_IDX_MAX);
+ bitmap_zero(escr_mask, P4_ESCR_MSR_TABLE_SIZE);
+
+ for (i = 0, num = n; i < n; i++, num--) {
+
+ hwc = &cpuc->event_list[i]->hw;
+ thread = p4_ht_thread(cpu);
+ pass = 0;
+
+again:
+ /*
+ * It's possible to hit a circular lock
+ * between original and alternative events
+ * if both are scheduled already.
+ */
+ if (pass > 2)
+ goto done;
+
+ bind = p4_config_get_bind(hwc->config);
+ escr_idx = p4_get_escr_idx(bind->escr_msr[thread]);
+ if (unlikely(escr_idx == -1))
+ goto done;
+
+ if (hwc->idx != -1 && !p4_should_swap_ts(hwc->config, cpu)) {
+ cntr_idx = hwc->idx;
+ if (assign)
+ assign[i] = hwc->idx;
+ goto reserve;
+ }
+
+ cntr_idx = p4_next_cntr(thread, used_mask, bind);
+ if (cntr_idx == -1 || test_bit(escr_idx, escr_mask)) {
+ /*
+ * Check whether an event alias is still available.
+ */
+ config_alias = p4_get_alias_event(hwc->config);
+ if (!config_alias)
+ goto done;
+ hwc->config = config_alias;
+ pass++;
+ goto again;
+ }
+ /*
+ * Perf does test runs to see if a whole group can be assigned
+ * together succesfully. There can be multiple rounds of this.
+ * Unfortunately, p4_pmu_swap_config_ts touches the hwc->config
+ * bits, such that the next round of group assignments will
+ * cause the above p4_should_swap_ts to pass instead of fail.
+ * This leads to counters exclusive to thread0 being used by
+ * thread1.
+ *
+ * Solve this with a cheap hack, reset the idx back to -1 to
+ * force a new lookup (p4_next_cntr) to get the right counter
+ * for the right thread.
+ *
+ * This probably doesn't comply with the general spirit of how
+ * perf wants to work, but P4 is special. :-(
+ */
+ if (p4_should_swap_ts(hwc->config, cpu))
+ hwc->idx = -1;
+ p4_pmu_swap_config_ts(hwc, cpu);
+ if (assign)
+ assign[i] = cntr_idx;
+reserve:
+ set_bit(cntr_idx, used_mask);
+ set_bit(escr_idx, escr_mask);
+ }
+
+done:
+ return num ? -EINVAL : 0;
+}
+
+PMU_FORMAT_ATTR(cccr, "config:0-31" );
+PMU_FORMAT_ATTR(escr, "config:32-62");
+PMU_FORMAT_ATTR(ht, "config:63" );
+
+static struct attribute *intel_p4_formats_attr[] = {
+ &format_attr_cccr.attr,
+ &format_attr_escr.attr,
+ &format_attr_ht.attr,
+ NULL,
+};
+
+static __initconst const struct x86_pmu p4_pmu = {
+ .name = "Netburst P4/Xeon",
+ .handle_irq = p4_pmu_handle_irq,
+ .disable_all = p4_pmu_disable_all,
+ .enable_all = p4_pmu_enable_all,
+ .enable = p4_pmu_enable_event,
+ .disable = p4_pmu_disable_event,
+ .eventsel = MSR_P4_BPU_CCCR0,
+ .perfctr = MSR_P4_BPU_PERFCTR0,
+ .event_map = p4_pmu_event_map,
+ .max_events = ARRAY_SIZE(p4_general_events),
+ .get_event_constraints = x86_get_event_constraints,
+ /*
+ * IF HT disabled we may need to use all
+ * ARCH_P4_MAX_CCCR counters simulaneously
+ * though leave it restricted at moment assuming
+ * HT is on
+ */
+ .num_counters = ARCH_P4_MAX_CCCR,
+ .apic = 1,
+ .cntval_bits = ARCH_P4_CNTRVAL_BITS,
+ .cntval_mask = ARCH_P4_CNTRVAL_MASK,
+ .max_period = (1ULL << (ARCH_P4_CNTRVAL_BITS - 1)) - 1,
+ .hw_config = p4_hw_config,
+ .schedule_events = p4_pmu_schedule_events,
+ /*
+ * This handles erratum N15 in intel doc 249199-029,
+ * the counter may not be updated correctly on write
+ * so we need a second write operation to do the trick
+ * (the official workaround didn't work)
+ *
+ * the former idea is taken from OProfile code
+ */
+ .perfctr_second_write = 1,
+
+ .format_attrs = intel_p4_formats_attr,
+};
+
+__init int p4_pmu_init(void)
+{
+ unsigned int low, high;
+ int i, reg;
+
+ /* If we get stripped -- indexing fails */
+ BUILD_BUG_ON(ARCH_P4_MAX_CCCR > INTEL_PMC_MAX_GENERIC);
+
+ rdmsr(MSR_IA32_MISC_ENABLE, low, high);
+ if (!(low & (1 << 7))) {
+ pr_cont("unsupported Netburst CPU model %d ",
+ boot_cpu_data.x86_model);
+ return -ENODEV;
+ }
+
+ memcpy(hw_cache_event_ids, p4_hw_cache_event_ids,
+ sizeof(hw_cache_event_ids));
+
+ pr_cont("Netburst events, ");
+
+ x86_pmu = p4_pmu;
+
+ /*
+ * Even though the counters are configured to interrupt a particular
+ * logical processor when an overflow happens, testing has shown that
+ * on kdump kernels (which uses a single cpu), thread1's counter
+ * continues to run and will report an NMI on thread0. Due to the
+ * overflow bug, this leads to a stream of unknown NMIs.
+ *
+ * Solve this by zero'ing out the registers to mimic a reset.
+ */
+ for (i = 0; i < x86_pmu.num_counters; i++) {
+ reg = x86_pmu_config_addr(i);
+ wrmsrl_safe(reg, 0ULL);
+ }
+
+ return 0;
+}
diff --git a/arch/x86/events/intel/p6.c b/arch/x86/events/intel/p6.c
new file mode 100644
index 000000000..1f5c47ab4
--- /dev/null
+++ b/arch/x86/events/intel/p6.c
@@ -0,0 +1,279 @@
+#include <linux/perf_event.h>
+#include <linux/types.h>
+
+#include "../perf_event.h"
+
+/*
+ * Not sure about some of these
+ */
+static const u64 p6_perfmon_event_map[] =
+{
+ [PERF_COUNT_HW_CPU_CYCLES] = 0x0079, /* CPU_CLK_UNHALTED */
+ [PERF_COUNT_HW_INSTRUCTIONS] = 0x00c0, /* INST_RETIRED */
+ [PERF_COUNT_HW_CACHE_REFERENCES] = 0x0f2e, /* L2_RQSTS:M:E:S:I */
+ [PERF_COUNT_HW_CACHE_MISSES] = 0x012e, /* L2_RQSTS:I */
+ [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x00c4, /* BR_INST_RETIRED */
+ [PERF_COUNT_HW_BRANCH_MISSES] = 0x00c5, /* BR_MISS_PRED_RETIRED */
+ [PERF_COUNT_HW_BUS_CYCLES] = 0x0062, /* BUS_DRDY_CLOCKS */
+ [PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = 0x00a2, /* RESOURCE_STALLS */
+
+};
+
+static const u64 __initconst p6_hw_cache_event_ids
+ [PERF_COUNT_HW_CACHE_MAX]
+ [PERF_COUNT_HW_CACHE_OP_MAX]
+ [PERF_COUNT_HW_CACHE_RESULT_MAX] =
+{
+ [ C(L1D) ] = {
+ [ C(OP_READ) ] = {
+ [ C(RESULT_ACCESS) ] = 0x0043, /* DATA_MEM_REFS */
+ [ C(RESULT_MISS) ] = 0x0045, /* DCU_LINES_IN */
+ },
+ [ C(OP_WRITE) ] = {
+ [ C(RESULT_ACCESS) ] = 0,
+ [ C(RESULT_MISS) ] = 0x0f29, /* L2_LD:M:E:S:I */
+ },
+ [ C(OP_PREFETCH) ] = {
+ [ C(RESULT_ACCESS) ] = 0,
+ [ C(RESULT_MISS) ] = 0,
+ },
+ },
+ [ C(L1I ) ] = {
+ [ C(OP_READ) ] = {
+ [ C(RESULT_ACCESS) ] = 0x0080, /* IFU_IFETCH */
+ [ C(RESULT_MISS) ] = 0x0f28, /* L2_IFETCH:M:E:S:I */
+ },
+ [ C(OP_WRITE) ] = {
+ [ C(RESULT_ACCESS) ] = -1,
+ [ C(RESULT_MISS) ] = -1,
+ },
+ [ C(OP_PREFETCH) ] = {
+ [ C(RESULT_ACCESS) ] = 0,
+ [ C(RESULT_MISS) ] = 0,
+ },
+ },
+ [ C(LL ) ] = {
+ [ C(OP_READ) ] = {
+ [ C(RESULT_ACCESS) ] = 0,
+ [ C(RESULT_MISS) ] = 0,
+ },
+ [ C(OP_WRITE) ] = {
+ [ C(RESULT_ACCESS) ] = 0,
+ [ C(RESULT_MISS) ] = 0x0025, /* L2_M_LINES_INM */
+ },
+ [ C(OP_PREFETCH) ] = {
+ [ C(RESULT_ACCESS) ] = 0,
+ [ C(RESULT_MISS) ] = 0,
+ },
+ },
+ [ C(DTLB) ] = {
+ [ C(OP_READ) ] = {
+ [ C(RESULT_ACCESS) ] = 0x0043, /* DATA_MEM_REFS */
+ [ C(RESULT_MISS) ] = 0,
+ },
+ [ C(OP_WRITE) ] = {
+ [ C(RESULT_ACCESS) ] = 0,
+ [ C(RESULT_MISS) ] = 0,
+ },
+ [ C(OP_PREFETCH) ] = {
+ [ C(RESULT_ACCESS) ] = 0,
+ [ C(RESULT_MISS) ] = 0,
+ },
+ },
+ [ C(ITLB) ] = {
+ [ C(OP_READ) ] = {
+ [ C(RESULT_ACCESS) ] = 0x0080, /* IFU_IFETCH */
+ [ C(RESULT_MISS) ] = 0x0085, /* ITLB_MISS */
+ },
+ [ C(OP_WRITE) ] = {
+ [ C(RESULT_ACCESS) ] = -1,
+ [ C(RESULT_MISS) ] = -1,
+ },
+ [ C(OP_PREFETCH) ] = {
+ [ C(RESULT_ACCESS) ] = -1,
+ [ C(RESULT_MISS) ] = -1,
+ },
+ },
+ [ C(BPU ) ] = {
+ [ C(OP_READ) ] = {
+ [ C(RESULT_ACCESS) ] = 0x00c4, /* BR_INST_RETIRED */
+ [ C(RESULT_MISS) ] = 0x00c5, /* BR_MISS_PRED_RETIRED */
+ },
+ [ C(OP_WRITE) ] = {
+ [ C(RESULT_ACCESS) ] = -1,
+ [ C(RESULT_MISS) ] = -1,
+ },
+ [ C(OP_PREFETCH) ] = {
+ [ C(RESULT_ACCESS) ] = -1,
+ [ C(RESULT_MISS) ] = -1,
+ },
+ },
+};
+
+static u64 p6_pmu_event_map(int hw_event)
+{
+ return p6_perfmon_event_map[hw_event];
+}
+
+/*
+ * Event setting that is specified not to count anything.
+ * We use this to effectively disable a counter.
+ *
+ * L2_RQSTS with 0 MESI unit mask.
+ */
+#define P6_NOP_EVENT 0x0000002EULL
+
+static struct event_constraint p6_event_constraints[] =
+{
+ INTEL_EVENT_CONSTRAINT(0xc1, 0x1), /* FLOPS */
+ INTEL_EVENT_CONSTRAINT(0x10, 0x1), /* FP_COMP_OPS_EXE */
+ INTEL_EVENT_CONSTRAINT(0x11, 0x2), /* FP_ASSIST */
+ INTEL_EVENT_CONSTRAINT(0x12, 0x2), /* MUL */
+ INTEL_EVENT_CONSTRAINT(0x13, 0x2), /* DIV */
+ INTEL_EVENT_CONSTRAINT(0x14, 0x1), /* CYCLES_DIV_BUSY */
+ EVENT_CONSTRAINT_END
+};
+
+static void p6_pmu_disable_all(void)
+{
+ u64 val;
+
+ /* p6 only has one enable register */
+ rdmsrl(MSR_P6_EVNTSEL0, val);
+ val &= ~ARCH_PERFMON_EVENTSEL_ENABLE;
+ wrmsrl(MSR_P6_EVNTSEL0, val);
+}
+
+static void p6_pmu_enable_all(int added)
+{
+ unsigned long val;
+
+ /* p6 only has one enable register */
+ rdmsrl(MSR_P6_EVNTSEL0, val);
+ val |= ARCH_PERFMON_EVENTSEL_ENABLE;
+ wrmsrl(MSR_P6_EVNTSEL0, val);
+}
+
+static inline void
+p6_pmu_disable_event(struct perf_event *event)
+{
+ struct hw_perf_event *hwc = &event->hw;
+ u64 val = P6_NOP_EVENT;
+
+ (void)wrmsrl_safe(hwc->config_base, val);
+}
+
+static void p6_pmu_enable_event(struct perf_event *event)
+{
+ struct hw_perf_event *hwc = &event->hw;
+ u64 val;
+
+ val = hwc->config;
+
+ /*
+ * p6 only has a global event enable, set on PerfEvtSel0
+ * We "disable" events by programming P6_NOP_EVENT
+ * and we rely on p6_pmu_enable_all() being called
+ * to actually enable the events.
+ */
+
+ (void)wrmsrl_safe(hwc->config_base, val);
+}
+
+PMU_FORMAT_ATTR(event, "config:0-7" );
+PMU_FORMAT_ATTR(umask, "config:8-15" );
+PMU_FORMAT_ATTR(edge, "config:18" );
+PMU_FORMAT_ATTR(pc, "config:19" );
+PMU_FORMAT_ATTR(inv, "config:23" );
+PMU_FORMAT_ATTR(cmask, "config:24-31" );
+
+static struct attribute *intel_p6_formats_attr[] = {
+ &format_attr_event.attr,
+ &format_attr_umask.attr,
+ &format_attr_edge.attr,
+ &format_attr_pc.attr,
+ &format_attr_inv.attr,
+ &format_attr_cmask.attr,
+ NULL,
+};
+
+static __initconst const struct x86_pmu p6_pmu = {
+ .name = "p6",
+ .handle_irq = x86_pmu_handle_irq,
+ .disable_all = p6_pmu_disable_all,
+ .enable_all = p6_pmu_enable_all,
+ .enable = p6_pmu_enable_event,
+ .disable = p6_pmu_disable_event,
+ .hw_config = x86_pmu_hw_config,
+ .schedule_events = x86_schedule_events,
+ .eventsel = MSR_P6_EVNTSEL0,
+ .perfctr = MSR_P6_PERFCTR0,
+ .event_map = p6_pmu_event_map,
+ .max_events = ARRAY_SIZE(p6_perfmon_event_map),
+ .apic = 1,
+ .max_period = (1ULL << 31) - 1,
+ .version = 0,
+ .num_counters = 2,
+ /*
+ * Events have 40 bits implemented. However they are designed such
+ * that bits [32-39] are sign extensions of bit 31. As such the
+ * effective width of a event for P6-like PMU is 32 bits only.
+ *
+ * See IA-32 Intel Architecture Software developer manual Vol 3B
+ */
+ .cntval_bits = 32,
+ .cntval_mask = (1ULL << 32) - 1,
+ .get_event_constraints = x86_get_event_constraints,
+ .event_constraints = p6_event_constraints,
+
+ .format_attrs = intel_p6_formats_attr,
+ .events_sysfs_show = intel_event_sysfs_show,
+
+};
+
+static __init void p6_pmu_rdpmc_quirk(void)
+{
+ if (boot_cpu_data.x86_mask < 9) {
+ /*
+ * PPro erratum 26; fixed in stepping 9 and above.
+ */
+ pr_warn("Userspace RDPMC support disabled due to a CPU erratum\n");
+ x86_pmu.attr_rdpmc_broken = 1;
+ x86_pmu.attr_rdpmc = 0;
+ }
+}
+
+__init int p6_pmu_init(void)
+{
+ x86_pmu = p6_pmu;
+
+ switch (boot_cpu_data.x86_model) {
+ case 1: /* Pentium Pro */
+ x86_add_quirk(p6_pmu_rdpmc_quirk);
+ break;
+
+ case 3: /* Pentium II - Klamath */
+ case 5: /* Pentium II - Deschutes */
+ case 6: /* Pentium II - Mendocino */
+ break;
+
+ case 7: /* Pentium III - Katmai */
+ case 8: /* Pentium III - Coppermine */
+ case 10: /* Pentium III Xeon */
+ case 11: /* Pentium III - Tualatin */
+ break;
+
+ case 9: /* Pentium M - Banias */
+ case 13: /* Pentium M - Dothan */
+ break;
+
+ default:
+ pr_cont("unsupported p6 CPU model %d ", boot_cpu_data.x86_model);
+ return -ENODEV;
+ }
+
+ memcpy(hw_cache_event_ids, p6_hw_cache_event_ids,
+ sizeof(hw_cache_event_ids));
+
+ return 0;
+}
diff --git a/arch/x86/events/intel/pt.c b/arch/x86/events/intel/pt.c
new file mode 100644
index 000000000..7377814de
--- /dev/null
+++ b/arch/x86/events/intel/pt.c
@@ -0,0 +1,1243 @@
+/*
+ * Intel(R) Processor Trace PMU driver for perf
+ * Copyright (c) 2013-2014, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * Intel PT is specified in the Intel Architecture Instruction Set Extensions
+ * Programming Reference:
+ * http://software.intel.com/en-us/intel-isa-extensions
+ */
+
+#undef DEBUG
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/types.h>
+#include <linux/slab.h>
+#include <linux/device.h>
+
+#include <asm/perf_event.h>
+#include <asm/insn.h>
+#include <asm/io.h>
+#include <asm/intel_pt.h>
+
+#include "../perf_event.h"
+#include "pt.h"
+
+static DEFINE_PER_CPU(struct pt, pt_ctx);
+
+static struct pt_pmu pt_pmu;
+
+enum cpuid_regs {
+ CR_EAX = 0,
+ CR_ECX,
+ CR_EDX,
+ CR_EBX
+};
+
+/*
+ * Capabilities of Intel PT hardware, such as number of address bits or
+ * supported output schemes, are cached and exported to userspace as "caps"
+ * attribute group of pt pmu device
+ * (/sys/bus/event_source/devices/intel_pt/caps/) so that userspace can store
+ * relevant bits together with intel_pt traces.
+ *
+ * These are necessary for both trace decoding (payloads_lip, contains address
+ * width encoded in IP-related packets), and event configuration (bitmasks with
+ * permitted values for certain bit fields).
+ */
+#define PT_CAP(_n, _l, _r, _m) \
+ [PT_CAP_ ## _n] = { .name = __stringify(_n), .leaf = _l, \
+ .reg = _r, .mask = _m }
+
+static struct pt_cap_desc {
+ const char *name;
+ u32 leaf;
+ u8 reg;
+ u32 mask;
+} pt_caps[] = {
+ PT_CAP(max_subleaf, 0, CR_EAX, 0xffffffff),
+ PT_CAP(cr3_filtering, 0, CR_EBX, BIT(0)),
+ PT_CAP(psb_cyc, 0, CR_EBX, BIT(1)),
+ PT_CAP(mtc, 0, CR_EBX, BIT(3)),
+ PT_CAP(topa_output, 0, CR_ECX, BIT(0)),
+ PT_CAP(topa_multiple_entries, 0, CR_ECX, BIT(1)),
+ PT_CAP(single_range_output, 0, CR_ECX, BIT(2)),
+ PT_CAP(payloads_lip, 0, CR_ECX, BIT(31)),
+ PT_CAP(mtc_periods, 1, CR_EAX, 0xffff0000),
+ PT_CAP(cycle_thresholds, 1, CR_EBX, 0xffff),
+ PT_CAP(psb_periods, 1, CR_EBX, 0xffff0000),
+};
+
+static u32 pt_cap_get(enum pt_capabilities cap)
+{
+ struct pt_cap_desc *cd = &pt_caps[cap];
+ u32 c = pt_pmu.caps[cd->leaf * PT_CPUID_REGS_NUM + cd->reg];
+ unsigned int shift = __ffs(cd->mask);
+
+ return (c & cd->mask) >> shift;
+}
+
+static ssize_t pt_cap_show(struct device *cdev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ struct dev_ext_attribute *ea =
+ container_of(attr, struct dev_ext_attribute, attr);
+ enum pt_capabilities cap = (long)ea->var;
+
+ return snprintf(buf, PAGE_SIZE, "%x\n", pt_cap_get(cap));
+}
+
+static struct attribute_group pt_cap_group = {
+ .name = "caps",
+};
+
+PMU_FORMAT_ATTR(cyc, "config:1" );
+PMU_FORMAT_ATTR(mtc, "config:9" );
+PMU_FORMAT_ATTR(tsc, "config:10" );
+PMU_FORMAT_ATTR(noretcomp, "config:11" );
+PMU_FORMAT_ATTR(mtc_period, "config:14-17" );
+PMU_FORMAT_ATTR(cyc_thresh, "config:19-22" );
+PMU_FORMAT_ATTR(psb_period, "config:24-27" );
+
+static struct attribute *pt_formats_attr[] = {
+ &format_attr_cyc.attr,
+ &format_attr_mtc.attr,
+ &format_attr_tsc.attr,
+ &format_attr_noretcomp.attr,
+ &format_attr_mtc_period.attr,
+ &format_attr_cyc_thresh.attr,
+ &format_attr_psb_period.attr,
+ NULL,
+};
+
+static struct attribute_group pt_format_group = {
+ .name = "format",
+ .attrs = pt_formats_attr,
+};
+
+static const struct attribute_group *pt_attr_groups[] = {
+ &pt_cap_group,
+ &pt_format_group,
+ NULL,
+};
+
+static int __init pt_pmu_hw_init(void)
+{
+ struct dev_ext_attribute *de_attrs;
+ struct attribute **attrs;
+ size_t size;
+ u64 reg;
+ int ret;
+ long i;
+
+ if (boot_cpu_has(X86_FEATURE_VMX)) {
+ /*
+ * Intel SDM, 36.5 "Tracing post-VMXON" says that
+ * "IA32_VMX_MISC[bit 14]" being 1 means PT can trace
+ * post-VMXON.
+ */
+ rdmsrl(MSR_IA32_VMX_MISC, reg);
+ if (reg & BIT(14))
+ pt_pmu.vmx = true;
+ }
+
+ attrs = NULL;
+
+ for (i = 0; i < PT_CPUID_LEAVES; i++) {
+ cpuid_count(20, i,
+ &pt_pmu.caps[CR_EAX + i*PT_CPUID_REGS_NUM],
+ &pt_pmu.caps[CR_EBX + i*PT_CPUID_REGS_NUM],
+ &pt_pmu.caps[CR_ECX + i*PT_CPUID_REGS_NUM],
+ &pt_pmu.caps[CR_EDX + i*PT_CPUID_REGS_NUM]);
+ }
+
+ ret = -ENOMEM;
+ size = sizeof(struct attribute *) * (ARRAY_SIZE(pt_caps)+1);
+ attrs = kzalloc(size, GFP_KERNEL);
+ if (!attrs)
+ goto fail;
+
+ size = sizeof(struct dev_ext_attribute) * (ARRAY_SIZE(pt_caps)+1);
+ de_attrs = kzalloc(size, GFP_KERNEL);
+ if (!de_attrs)
+ goto fail;
+
+ for (i = 0; i < ARRAY_SIZE(pt_caps); i++) {
+ struct dev_ext_attribute *de_attr = de_attrs + i;
+
+ de_attr->attr.attr.name = pt_caps[i].name;
+
+ sysfs_attr_init(&de_attr->attr.attr);
+
+ de_attr->attr.attr.mode = S_IRUGO;
+ de_attr->attr.show = pt_cap_show;
+ de_attr->var = (void *)i;
+
+ attrs[i] = &de_attr->attr.attr;
+ }
+
+ pt_cap_group.attrs = attrs;
+
+ return 0;
+
+fail:
+ kfree(attrs);
+
+ return ret;
+}
+
+#define RTIT_CTL_CYC_PSB (RTIT_CTL_CYCLEACC | \
+ RTIT_CTL_CYC_THRESH | \
+ RTIT_CTL_PSB_FREQ)
+
+#define RTIT_CTL_MTC (RTIT_CTL_MTC_EN | \
+ RTIT_CTL_MTC_RANGE)
+
+#define PT_CONFIG_MASK (RTIT_CTL_TSC_EN | \
+ RTIT_CTL_DISRETC | \
+ RTIT_CTL_CYC_PSB | \
+ RTIT_CTL_MTC)
+
+static bool pt_event_valid(struct perf_event *event)
+{
+ u64 config = event->attr.config;
+ u64 allowed, requested;
+
+ if ((config & PT_CONFIG_MASK) != config)
+ return false;
+
+ if (config & RTIT_CTL_CYC_PSB) {
+ if (!pt_cap_get(PT_CAP_psb_cyc))
+ return false;
+
+ allowed = pt_cap_get(PT_CAP_psb_periods);
+ requested = (config & RTIT_CTL_PSB_FREQ) >>
+ RTIT_CTL_PSB_FREQ_OFFSET;
+ if (requested && (!(allowed & BIT(requested))))
+ return false;
+
+ allowed = pt_cap_get(PT_CAP_cycle_thresholds);
+ requested = (config & RTIT_CTL_CYC_THRESH) >>
+ RTIT_CTL_CYC_THRESH_OFFSET;
+ if (requested && (!(allowed & BIT(requested))))
+ return false;
+ }
+
+ if (config & RTIT_CTL_MTC) {
+ /*
+ * In the unlikely case that CPUID lists valid mtc periods,
+ * but not the mtc capability, drop out here.
+ *
+ * Spec says that setting mtc period bits while mtc bit in
+ * CPUID is 0 will #GP, so better safe than sorry.
+ */
+ if (!pt_cap_get(PT_CAP_mtc))
+ return false;
+
+ allowed = pt_cap_get(PT_CAP_mtc_periods);
+ if (!allowed)
+ return false;
+
+ requested = (config & RTIT_CTL_MTC_RANGE) >>
+ RTIT_CTL_MTC_RANGE_OFFSET;
+
+ if (!(allowed & BIT(requested)))
+ return false;
+ }
+
+ return true;
+}
+
+/*
+ * PT configuration helpers
+ * These all are cpu affine and operate on a local PT
+ */
+
+static void pt_config(struct perf_event *event)
+{
+ u64 reg;
+
+ if (!event->hw.itrace_started) {
+ event->hw.itrace_started = 1;
+ wrmsrl(MSR_IA32_RTIT_STATUS, 0);
+ }
+
+ reg = RTIT_CTL_TOPA | RTIT_CTL_BRANCH_EN | RTIT_CTL_TRACEEN;
+
+ if (!event->attr.exclude_kernel)
+ reg |= RTIT_CTL_OS;
+ if (!event->attr.exclude_user)
+ reg |= RTIT_CTL_USR;
+
+ reg |= (event->attr.config & PT_CONFIG_MASK);
+
+ event->hw.config = reg;
+ wrmsrl(MSR_IA32_RTIT_CTL, reg);
+}
+
+static void pt_config_stop(struct perf_event *event)
+{
+ u64 ctl = READ_ONCE(event->hw.config);
+
+ /* may be already stopped by a PMI */
+ if (!(ctl & RTIT_CTL_TRACEEN))
+ return;
+
+ ctl &= ~RTIT_CTL_TRACEEN;
+ wrmsrl(MSR_IA32_RTIT_CTL, ctl);
+
+ WRITE_ONCE(event->hw.config, ctl);
+
+ /*
+ * A wrmsr that disables trace generation serializes other PT
+ * registers and causes all data packets to be written to memory,
+ * but a fence is required for the data to become globally visible.
+ *
+ * The below WMB, separating data store and aux_head store matches
+ * the consumer's RMB that separates aux_head load and data load.
+ */
+ wmb();
+}
+
+static void pt_config_buffer(void *buf, unsigned int topa_idx,
+ unsigned int output_off)
+{
+ u64 reg;
+
+ wrmsrl(MSR_IA32_RTIT_OUTPUT_BASE, virt_to_phys(buf));
+
+ reg = 0x7f | ((u64)topa_idx << 7) | ((u64)output_off << 32);
+
+ wrmsrl(MSR_IA32_RTIT_OUTPUT_MASK, reg);
+}
+
+/*
+ * Keep ToPA table-related metadata on the same page as the actual table,
+ * taking up a few words from the top
+ */
+
+#define TENTS_PER_PAGE (((PAGE_SIZE - 40) / sizeof(struct topa_entry)) - 1)
+
+/**
+ * struct topa - page-sized ToPA table with metadata at the top
+ * @table: actual ToPA table entries, as understood by PT hardware
+ * @list: linkage to struct pt_buffer's list of tables
+ * @phys: physical address of this page
+ * @offset: offset of the first entry in this table in the buffer
+ * @size: total size of all entries in this table
+ * @last: index of the last initialized entry in this table
+ */
+struct topa {
+ struct topa_entry table[TENTS_PER_PAGE];
+ struct list_head list;
+ u64 phys;
+ u64 offset;
+ size_t size;
+ int last;
+};
+
+/* make -1 stand for the last table entry */
+#define TOPA_ENTRY(t, i) ((i) == -1 ? &(t)->table[(t)->last] : &(t)->table[(i)])
+
+/**
+ * topa_alloc() - allocate page-sized ToPA table
+ * @cpu: CPU on which to allocate.
+ * @gfp: Allocation flags.
+ *
+ * Return: On success, return the pointer to ToPA table page.
+ */
+static struct topa *topa_alloc(int cpu, gfp_t gfp)
+{
+ int node = cpu_to_node(cpu);
+ struct topa *topa;
+ struct page *p;
+
+ p = alloc_pages_node(node, gfp | __GFP_ZERO, 0);
+ if (!p)
+ return NULL;
+
+ topa = page_address(p);
+ topa->last = 0;
+ topa->phys = page_to_phys(p);
+
+ /*
+ * In case of singe-entry ToPA, always put the self-referencing END
+ * link as the 2nd entry in the table
+ */
+ if (!pt_cap_get(PT_CAP_topa_multiple_entries)) {
+ TOPA_ENTRY(topa, 1)->base = topa->phys >> TOPA_SHIFT;
+ TOPA_ENTRY(topa, 1)->end = 1;
+ }
+
+ return topa;
+}
+
+/**
+ * topa_free() - free a page-sized ToPA table
+ * @topa: Table to deallocate.
+ */
+static void topa_free(struct topa *topa)
+{
+ free_page((unsigned long)topa);
+}
+
+/**
+ * topa_insert_table() - insert a ToPA table into a buffer
+ * @buf: PT buffer that's being extended.
+ * @topa: New topa table to be inserted.
+ *
+ * If it's the first table in this buffer, set up buffer's pointers
+ * accordingly; otherwise, add a END=1 link entry to @topa to the current
+ * "last" table and adjust the last table pointer to @topa.
+ */
+static void topa_insert_table(struct pt_buffer *buf, struct topa *topa)
+{
+ struct topa *last = buf->last;
+
+ list_add_tail(&topa->list, &buf->tables);
+
+ if (!buf->first) {
+ buf->first = buf->last = buf->cur = topa;
+ return;
+ }
+
+ topa->offset = last->offset + last->size;
+ buf->last = topa;
+
+ if (!pt_cap_get(PT_CAP_topa_multiple_entries))
+ return;
+
+ BUG_ON(last->last != TENTS_PER_PAGE - 1);
+
+ TOPA_ENTRY(last, -1)->base = topa->phys >> TOPA_SHIFT;
+ TOPA_ENTRY(last, -1)->end = 1;
+}
+
+/**
+ * topa_table_full() - check if a ToPA table is filled up
+ * @topa: ToPA table.
+ */
+static bool topa_table_full(struct topa *topa)
+{
+ /* single-entry ToPA is a special case */
+ if (!pt_cap_get(PT_CAP_topa_multiple_entries))
+ return !!topa->last;
+
+ return topa->last == TENTS_PER_PAGE - 1;
+}
+
+/**
+ * topa_insert_pages() - create a list of ToPA tables
+ * @buf: PT buffer being initialized.
+ * @gfp: Allocation flags.
+ *
+ * This initializes a list of ToPA tables with entries from
+ * the data_pages provided by rb_alloc_aux().
+ *
+ * Return: 0 on success or error code.
+ */
+static int topa_insert_pages(struct pt_buffer *buf, gfp_t gfp)
+{
+ struct topa *topa = buf->last;
+ int order = 0;
+ struct page *p;
+
+ p = virt_to_page(buf->data_pages[buf->nr_pages]);
+ if (PagePrivate(p))
+ order = page_private(p);
+
+ if (topa_table_full(topa)) {
+ topa = topa_alloc(buf->cpu, gfp);
+ if (!topa)
+ return -ENOMEM;
+
+ topa_insert_table(buf, topa);
+ }
+
+ TOPA_ENTRY(topa, -1)->base = page_to_phys(p) >> TOPA_SHIFT;
+ TOPA_ENTRY(topa, -1)->size = order;
+ if (!buf->snapshot && !pt_cap_get(PT_CAP_topa_multiple_entries)) {
+ TOPA_ENTRY(topa, -1)->intr = 1;
+ TOPA_ENTRY(topa, -1)->stop = 1;
+ }
+
+ topa->last++;
+ topa->size += sizes(order);
+
+ buf->nr_pages += 1ul << order;
+
+ return 0;
+}
+
+/**
+ * pt_topa_dump() - print ToPA tables and their entries
+ * @buf: PT buffer.
+ */
+static void pt_topa_dump(struct pt_buffer *buf)
+{
+ struct topa *topa;
+
+ list_for_each_entry(topa, &buf->tables, list) {
+ int i;
+
+ pr_debug("# table @%p (%016Lx), off %llx size %zx\n", topa->table,
+ topa->phys, topa->offset, topa->size);
+ for (i = 0; i < TENTS_PER_PAGE; i++) {
+ pr_debug("# entry @%p (%lx sz %u %c%c%c) raw=%16llx\n",
+ &topa->table[i],
+ (unsigned long)topa->table[i].base << TOPA_SHIFT,
+ sizes(topa->table[i].size),
+ topa->table[i].end ? 'E' : ' ',
+ topa->table[i].intr ? 'I' : ' ',
+ topa->table[i].stop ? 'S' : ' ',
+ *(u64 *)&topa->table[i]);
+ if ((pt_cap_get(PT_CAP_topa_multiple_entries) &&
+ topa->table[i].stop) ||
+ topa->table[i].end)
+ break;
+ }
+ }
+}
+
+/**
+ * pt_buffer_advance() - advance to the next output region
+ * @buf: PT buffer.
+ *
+ * Advance the current pointers in the buffer to the next ToPA entry.
+ */
+static void pt_buffer_advance(struct pt_buffer *buf)
+{
+ buf->output_off = 0;
+ buf->cur_idx++;
+
+ if (buf->cur_idx == buf->cur->last) {
+ if (buf->cur == buf->last)
+ buf->cur = buf->first;
+ else
+ buf->cur = list_entry(buf->cur->list.next, struct topa,
+ list);
+ buf->cur_idx = 0;
+ }
+}
+
+/**
+ * pt_update_head() - calculate current offsets and sizes
+ * @pt: Per-cpu pt context.
+ *
+ * Update buffer's current write pointer position and data size.
+ */
+static void pt_update_head(struct pt *pt)
+{
+ struct pt_buffer *buf = perf_get_aux(&pt->handle);
+ u64 topa_idx, base, old;
+
+ /* offset of the first region in this table from the beginning of buf */
+ base = buf->cur->offset + buf->output_off;
+
+ /* offset of the current output region within this table */
+ for (topa_idx = 0; topa_idx < buf->cur_idx; topa_idx++)
+ base += sizes(buf->cur->table[topa_idx].size);
+
+ if (buf->snapshot) {
+ local_set(&buf->data_size, base);
+ } else {
+ old = (local64_xchg(&buf->head, base) &
+ ((buf->nr_pages << PAGE_SHIFT) - 1));
+ if (base < old)
+ base += buf->nr_pages << PAGE_SHIFT;
+
+ local_add(base - old, &buf->data_size);
+ }
+}
+
+/**
+ * pt_buffer_region() - obtain current output region's address
+ * @buf: PT buffer.
+ */
+static void *pt_buffer_region(struct pt_buffer *buf)
+{
+ return phys_to_virt(buf->cur->table[buf->cur_idx].base << TOPA_SHIFT);
+}
+
+/**
+ * pt_buffer_region_size() - obtain current output region's size
+ * @buf: PT buffer.
+ */
+static size_t pt_buffer_region_size(struct pt_buffer *buf)
+{
+ return sizes(buf->cur->table[buf->cur_idx].size);
+}
+
+/**
+ * pt_handle_status() - take care of possible status conditions
+ * @pt: Per-cpu pt context.
+ */
+static void pt_handle_status(struct pt *pt)
+{
+ struct pt_buffer *buf = perf_get_aux(&pt->handle);
+ int advance = 0;
+ u64 status;
+
+ rdmsrl(MSR_IA32_RTIT_STATUS, status);
+
+ if (status & RTIT_STATUS_ERROR) {
+ pr_err_ratelimited("ToPA ERROR encountered, trying to recover\n");
+ pt_topa_dump(buf);
+ status &= ~RTIT_STATUS_ERROR;
+ }
+
+ if (status & RTIT_STATUS_STOPPED) {
+ status &= ~RTIT_STATUS_STOPPED;
+
+ /*
+ * On systems that only do single-entry ToPA, hitting STOP
+ * means we are already losing data; need to let the decoder
+ * know.
+ */
+ if (!pt_cap_get(PT_CAP_topa_multiple_entries) ||
+ buf->output_off == sizes(TOPA_ENTRY(buf->cur, buf->cur_idx)->size)) {
+ local_inc(&buf->lost);
+ advance++;
+ }
+ }
+
+ /*
+ * Also on single-entry ToPA implementations, interrupt will come
+ * before the output reaches its output region's boundary.
+ */
+ if (!pt_cap_get(PT_CAP_topa_multiple_entries) && !buf->snapshot &&
+ pt_buffer_region_size(buf) - buf->output_off <= TOPA_PMI_MARGIN) {
+ void *head = pt_buffer_region(buf);
+
+ /* everything within this margin needs to be zeroed out */
+ memset(head + buf->output_off, 0,
+ pt_buffer_region_size(buf) -
+ buf->output_off);
+ advance++;
+ }
+
+ if (advance)
+ pt_buffer_advance(buf);
+
+ wrmsrl(MSR_IA32_RTIT_STATUS, status);
+}
+
+/**
+ * pt_read_offset() - translate registers into buffer pointers
+ * @buf: PT buffer.
+ *
+ * Set buffer's output pointers from MSR values.
+ */
+static void pt_read_offset(struct pt_buffer *buf)
+{
+ u64 offset, base_topa;
+
+ rdmsrl(MSR_IA32_RTIT_OUTPUT_BASE, base_topa);
+ buf->cur = phys_to_virt(base_topa);
+
+ rdmsrl(MSR_IA32_RTIT_OUTPUT_MASK, offset);
+ /* offset within current output region */
+ buf->output_off = offset >> 32;
+ /* index of current output region within this table */
+ buf->cur_idx = (offset & 0xffffff80) >> 7;
+}
+
+/**
+ * pt_topa_next_entry() - obtain index of the first page in the next ToPA entry
+ * @buf: PT buffer.
+ * @pg: Page offset in the buffer.
+ *
+ * When advancing to the next output region (ToPA entry), given a page offset
+ * into the buffer, we need to find the offset of the first page in the next
+ * region.
+ */
+static unsigned int pt_topa_next_entry(struct pt_buffer *buf, unsigned int pg)
+{
+ struct topa_entry *te = buf->topa_index[pg];
+
+ /* one region */
+ if (buf->first == buf->last && buf->first->last == 1)
+ return pg;
+
+ do {
+ pg++;
+ pg &= buf->nr_pages - 1;
+ } while (buf->topa_index[pg] == te);
+
+ return pg;
+}
+
+/**
+ * pt_buffer_reset_markers() - place interrupt and stop bits in the buffer
+ * @buf: PT buffer.
+ * @handle: Current output handle.
+ *
+ * Place INT and STOP marks to prevent overwriting old data that the consumer
+ * hasn't yet collected and waking up the consumer after a certain fraction of
+ * the buffer has filled up. Only needed and sensible for non-snapshot counters.
+ *
+ * This obviously relies on buf::head to figure out buffer markers, so it has
+ * to be called after pt_buffer_reset_offsets() and before the hardware tracing
+ * is enabled.
+ */
+static int pt_buffer_reset_markers(struct pt_buffer *buf,
+ struct perf_output_handle *handle)
+
+{
+ unsigned long head = local64_read(&buf->head);
+ unsigned long idx, npages, wakeup;
+
+ /* can't stop in the middle of an output region */
+ if (buf->output_off + handle->size + 1 <
+ sizes(TOPA_ENTRY(buf->cur, buf->cur_idx)->size))
+ return -EINVAL;
+
+
+ /* single entry ToPA is handled by marking all regions STOP=1 INT=1 */
+ if (!pt_cap_get(PT_CAP_topa_multiple_entries))
+ return 0;
+
+ /* clear STOP and INT from current entry */
+ buf->topa_index[buf->stop_pos]->stop = 0;
+ buf->topa_index[buf->stop_pos]->intr = 0;
+ buf->topa_index[buf->intr_pos]->intr = 0;
+
+ /* how many pages till the STOP marker */
+ npages = handle->size >> PAGE_SHIFT;
+
+ /* if it's on a page boundary, fill up one more page */
+ if (!offset_in_page(head + handle->size + 1))
+ npages++;
+
+ idx = (head >> PAGE_SHIFT) + npages;
+ idx &= buf->nr_pages - 1;
+ buf->stop_pos = idx;
+
+ wakeup = handle->wakeup >> PAGE_SHIFT;
+
+ /* in the worst case, wake up the consumer one page before hard stop */
+ idx = (head >> PAGE_SHIFT) + npages - 1;
+ if (idx > wakeup)
+ idx = wakeup;
+
+ idx &= buf->nr_pages - 1;
+ buf->intr_pos = idx;
+
+ buf->topa_index[buf->stop_pos]->stop = 1;
+ buf->topa_index[buf->stop_pos]->intr = 1;
+ buf->topa_index[buf->intr_pos]->intr = 1;
+
+ return 0;
+}
+
+/**
+ * pt_buffer_setup_topa_index() - build topa_index[] table of regions
+ * @buf: PT buffer.
+ *
+ * topa_index[] references output regions indexed by offset into the
+ * buffer for purposes of quick reverse lookup.
+ */
+static void pt_buffer_setup_topa_index(struct pt_buffer *buf)
+{
+ struct topa *cur = buf->first, *prev = buf->last;
+ struct topa_entry *te_cur = TOPA_ENTRY(cur, 0),
+ *te_prev = TOPA_ENTRY(prev, prev->last - 1);
+ int pg = 0, idx = 0;
+
+ while (pg < buf->nr_pages) {
+ int tidx;
+
+ /* pages within one topa entry */
+ for (tidx = 0; tidx < 1 << te_cur->size; tidx++, pg++)
+ buf->topa_index[pg] = te_prev;
+
+ te_prev = te_cur;
+
+ if (idx == cur->last - 1) {
+ /* advance to next topa table */
+ idx = 0;
+ cur = list_entry(cur->list.next, struct topa, list);
+ } else {
+ idx++;
+ }
+ te_cur = TOPA_ENTRY(cur, idx);
+ }
+
+}
+
+/**
+ * pt_buffer_reset_offsets() - adjust buffer's write pointers from aux_head
+ * @buf: PT buffer.
+ * @head: Write pointer (aux_head) from AUX buffer.
+ *
+ * Find the ToPA table and entry corresponding to given @head and set buffer's
+ * "current" pointers accordingly. This is done after we have obtained the
+ * current aux_head position from a successful call to perf_aux_output_begin()
+ * to make sure the hardware is writing to the right place.
+ *
+ * This function modifies buf::{cur,cur_idx,output_off} that will be programmed
+ * into PT msrs when the tracing is enabled and buf::head and buf::data_size,
+ * which are used to determine INT and STOP markers' locations by a subsequent
+ * call to pt_buffer_reset_markers().
+ */
+static void pt_buffer_reset_offsets(struct pt_buffer *buf, unsigned long head)
+{
+ int pg;
+
+ if (buf->snapshot)
+ head &= (buf->nr_pages << PAGE_SHIFT) - 1;
+
+ pg = (head >> PAGE_SHIFT) & (buf->nr_pages - 1);
+ pg = pt_topa_next_entry(buf, pg);
+
+ buf->cur = (struct topa *)((unsigned long)buf->topa_index[pg] & PAGE_MASK);
+ buf->cur_idx = ((unsigned long)buf->topa_index[pg] -
+ (unsigned long)buf->cur) / sizeof(struct topa_entry);
+ buf->output_off = head & (sizes(buf->cur->table[buf->cur_idx].size) - 1);
+
+ local64_set(&buf->head, head);
+ local_set(&buf->data_size, 0);
+}
+
+/**
+ * pt_buffer_fini_topa() - deallocate ToPA structure of a buffer
+ * @buf: PT buffer.
+ */
+static void pt_buffer_fini_topa(struct pt_buffer *buf)
+{
+ struct topa *topa, *iter;
+
+ list_for_each_entry_safe(topa, iter, &buf->tables, list) {
+ /*
+ * right now, this is in free_aux() path only, so
+ * no need to unlink this table from the list
+ */
+ topa_free(topa);
+ }
+}
+
+/**
+ * pt_buffer_init_topa() - initialize ToPA table for pt buffer
+ * @buf: PT buffer.
+ * @size: Total size of all regions within this ToPA.
+ * @gfp: Allocation flags.
+ */
+static int pt_buffer_init_topa(struct pt_buffer *buf, unsigned long nr_pages,
+ gfp_t gfp)
+{
+ struct topa *topa;
+ int err;
+
+ topa = topa_alloc(buf->cpu, gfp);
+ if (!topa)
+ return -ENOMEM;
+
+ topa_insert_table(buf, topa);
+
+ while (buf->nr_pages < nr_pages) {
+ err = topa_insert_pages(buf, gfp);
+ if (err) {
+ pt_buffer_fini_topa(buf);
+ return -ENOMEM;
+ }
+ }
+
+ pt_buffer_setup_topa_index(buf);
+
+ /* link last table to the first one, unless we're double buffering */
+ if (pt_cap_get(PT_CAP_topa_multiple_entries)) {
+ TOPA_ENTRY(buf->last, -1)->base = buf->first->phys >> TOPA_SHIFT;
+ TOPA_ENTRY(buf->last, -1)->end = 1;
+ }
+
+ pt_topa_dump(buf);
+ return 0;
+}
+
+/**
+ * pt_buffer_setup_aux() - set up topa tables for a PT buffer
+ * @cpu: Cpu on which to allocate, -1 means current.
+ * @pages: Array of pointers to buffer pages passed from perf core.
+ * @nr_pages: Number of pages in the buffer.
+ * @snapshot: If this is a snapshot/overwrite counter.
+ *
+ * This is a pmu::setup_aux callback that sets up ToPA tables and all the
+ * bookkeeping for an AUX buffer.
+ *
+ * Return: Our private PT buffer structure.
+ */
+static void *
+pt_buffer_setup_aux(int cpu, void **pages, int nr_pages, bool snapshot)
+{
+ struct pt_buffer *buf;
+ int node, ret;
+
+ if (!nr_pages)
+ return NULL;
+
+ if (cpu == -1)
+ cpu = raw_smp_processor_id();
+ node = cpu_to_node(cpu);
+
+ buf = kzalloc_node(offsetof(struct pt_buffer, topa_index[nr_pages]),
+ GFP_KERNEL, node);
+ if (!buf)
+ return NULL;
+
+ buf->cpu = cpu;
+ buf->snapshot = snapshot;
+ buf->data_pages = pages;
+
+ INIT_LIST_HEAD(&buf->tables);
+
+ ret = pt_buffer_init_topa(buf, nr_pages, GFP_KERNEL);
+ if (ret) {
+ kfree(buf);
+ return NULL;
+ }
+
+ return buf;
+}
+
+/**
+ * pt_buffer_free_aux() - perf AUX deallocation path callback
+ * @data: PT buffer.
+ */
+static void pt_buffer_free_aux(void *data)
+{
+ struct pt_buffer *buf = data;
+
+ pt_buffer_fini_topa(buf);
+ kfree(buf);
+}
+
+/**
+ * pt_buffer_is_full() - check if the buffer is full
+ * @buf: PT buffer.
+ * @pt: Per-cpu pt handle.
+ *
+ * If the user hasn't read data from the output region that aux_head
+ * points to, the buffer is considered full: the user needs to read at
+ * least this region and update aux_tail to point past it.
+ */
+static bool pt_buffer_is_full(struct pt_buffer *buf, struct pt *pt)
+{
+ if (buf->snapshot)
+ return false;
+
+ if (local_read(&buf->data_size) >= pt->handle.size)
+ return true;
+
+ return false;
+}
+
+/**
+ * intel_pt_interrupt() - PT PMI handler
+ */
+void intel_pt_interrupt(void)
+{
+ struct pt *pt = this_cpu_ptr(&pt_ctx);
+ struct pt_buffer *buf;
+ struct perf_event *event = pt->handle.event;
+
+ /*
+ * There may be a dangling PT bit in the interrupt status register
+ * after PT has been disabled by pt_event_stop(). Make sure we don't
+ * do anything (particularly, re-enable) for this event here.
+ */
+ if (!ACCESS_ONCE(pt->handle_nmi))
+ return;
+
+ /*
+ * If VMX is on and PT does not support it, don't touch anything.
+ */
+ if (READ_ONCE(pt->vmx_on))
+ return;
+
+ if (!event)
+ return;
+
+ pt_config_stop(event);
+
+ buf = perf_get_aux(&pt->handle);
+ if (!buf)
+ return;
+
+ pt_read_offset(buf);
+
+ pt_handle_status(pt);
+
+ pt_update_head(pt);
+
+ perf_aux_output_end(&pt->handle, local_xchg(&buf->data_size, 0),
+ local_xchg(&buf->lost, 0));
+
+ if (!event->hw.state) {
+ int ret;
+
+ buf = perf_aux_output_begin(&pt->handle, event);
+ if (!buf) {
+ event->hw.state = PERF_HES_STOPPED;
+ return;
+ }
+
+ pt_buffer_reset_offsets(buf, pt->handle.head);
+ /* snapshot counters don't use PMI, so it's safe */
+ ret = pt_buffer_reset_markers(buf, &pt->handle);
+ if (ret) {
+ perf_aux_output_end(&pt->handle, 0, true);
+ return;
+ }
+
+ pt_config_buffer(buf->cur->table, buf->cur_idx,
+ buf->output_off);
+ pt_config(event);
+ }
+}
+
+void intel_pt_handle_vmx(int on)
+{
+ struct pt *pt = this_cpu_ptr(&pt_ctx);
+ struct perf_event *event;
+ unsigned long flags;
+
+ /* PT plays nice with VMX, do nothing */
+ if (pt_pmu.vmx)
+ return;
+
+ /*
+ * VMXON will clear RTIT_CTL.TraceEn; we need to make
+ * sure to not try to set it while VMX is on. Disable
+ * interrupts to avoid racing with pmu callbacks;
+ * concurrent PMI should be handled fine.
+ */
+ local_irq_save(flags);
+ WRITE_ONCE(pt->vmx_on, on);
+
+ if (on) {
+ /* prevent pt_config_stop() from writing RTIT_CTL */
+ event = pt->handle.event;
+ if (event)
+ event->hw.config = 0;
+ }
+ local_irq_restore(flags);
+}
+EXPORT_SYMBOL_GPL(intel_pt_handle_vmx);
+
+/*
+ * PMU callbacks
+ */
+
+static void pt_event_start(struct perf_event *event, int mode)
+{
+ struct pt *pt = this_cpu_ptr(&pt_ctx);
+ struct pt_buffer *buf = perf_get_aux(&pt->handle);
+
+ if (READ_ONCE(pt->vmx_on))
+ return;
+
+ if (!buf || pt_buffer_is_full(buf, pt)) {
+ event->hw.state = PERF_HES_STOPPED;
+ return;
+ }
+
+ ACCESS_ONCE(pt->handle_nmi) = 1;
+ event->hw.state = 0;
+
+ pt_config_buffer(buf->cur->table, buf->cur_idx,
+ buf->output_off);
+ pt_config(event);
+}
+
+static void pt_event_stop(struct perf_event *event, int mode)
+{
+ struct pt *pt = this_cpu_ptr(&pt_ctx);
+
+ /*
+ * Protect against the PMI racing with disabling wrmsr,
+ * see comment in intel_pt_interrupt().
+ */
+ ACCESS_ONCE(pt->handle_nmi) = 0;
+
+ pt_config_stop(event);
+
+ if (event->hw.state == PERF_HES_STOPPED)
+ return;
+
+ event->hw.state = PERF_HES_STOPPED;
+
+ if (mode & PERF_EF_UPDATE) {
+ struct pt_buffer *buf = perf_get_aux(&pt->handle);
+
+ if (!buf)
+ return;
+
+ if (WARN_ON_ONCE(pt->handle.event != event))
+ return;
+
+ pt_read_offset(buf);
+
+ pt_handle_status(pt);
+
+ pt_update_head(pt);
+ }
+}
+
+static void pt_event_del(struct perf_event *event, int mode)
+{
+ struct pt *pt = this_cpu_ptr(&pt_ctx);
+ struct pt_buffer *buf;
+
+ pt_event_stop(event, PERF_EF_UPDATE);
+
+ buf = perf_get_aux(&pt->handle);
+
+ if (buf) {
+ if (buf->snapshot)
+ pt->handle.head =
+ local_xchg(&buf->data_size,
+ buf->nr_pages << PAGE_SHIFT);
+ perf_aux_output_end(&pt->handle, local_xchg(&buf->data_size, 0),
+ local_xchg(&buf->lost, 0));
+ }
+}
+
+static int pt_event_add(struct perf_event *event, int mode)
+{
+ struct pt_buffer *buf;
+ struct pt *pt = this_cpu_ptr(&pt_ctx);
+ struct hw_perf_event *hwc = &event->hw;
+ int ret = -EBUSY;
+
+ if (pt->handle.event)
+ goto fail;
+
+ buf = perf_aux_output_begin(&pt->handle, event);
+ ret = -EINVAL;
+ if (!buf)
+ goto fail_stop;
+
+ pt_buffer_reset_offsets(buf, pt->handle.head);
+ if (!buf->snapshot) {
+ ret = pt_buffer_reset_markers(buf, &pt->handle);
+ if (ret)
+ goto fail_end_stop;
+ }
+
+ if (mode & PERF_EF_START) {
+ pt_event_start(event, 0);
+ ret = -EBUSY;
+ if (hwc->state == PERF_HES_STOPPED)
+ goto fail_end_stop;
+ } else {
+ hwc->state = PERF_HES_STOPPED;
+ }
+
+ return 0;
+
+fail_end_stop:
+ perf_aux_output_end(&pt->handle, 0, true);
+fail_stop:
+ hwc->state = PERF_HES_STOPPED;
+fail:
+ return ret;
+}
+
+static void pt_event_read(struct perf_event *event)
+{
+}
+
+static void pt_event_destroy(struct perf_event *event)
+{
+ x86_del_exclusive(x86_lbr_exclusive_pt);
+}
+
+static int pt_event_init(struct perf_event *event)
+{
+ if (event->attr.type != pt_pmu.pmu.type)
+ return -ENOENT;
+
+ if (!pt_event_valid(event))
+ return -EINVAL;
+
+ if (x86_add_exclusive(x86_lbr_exclusive_pt))
+ return -EBUSY;
+
+ event->destroy = pt_event_destroy;
+
+ return 0;
+}
+
+void cpu_emergency_stop_pt(void)
+{
+ struct pt *pt = this_cpu_ptr(&pt_ctx);
+
+ if (pt->handle.event)
+ pt_event_stop(pt->handle.event, PERF_EF_UPDATE);
+}
+
+static __init int pt_init(void)
+{
+ int ret, cpu, prior_warn = 0;
+
+ BUILD_BUG_ON(sizeof(struct topa) > PAGE_SIZE);
+
+ if (!test_cpu_cap(&boot_cpu_data, X86_FEATURE_INTEL_PT))
+ return -ENODEV;
+
+ get_online_cpus();
+ for_each_online_cpu(cpu) {
+ u64 ctl;
+
+ ret = rdmsrl_safe_on_cpu(cpu, MSR_IA32_RTIT_CTL, &ctl);
+ if (!ret && (ctl & RTIT_CTL_TRACEEN))
+ prior_warn++;
+ }
+ put_online_cpus();
+
+ if (prior_warn) {
+ x86_add_exclusive(x86_lbr_exclusive_pt);
+ pr_warn("PT is enabled at boot time, doing nothing\n");
+
+ return -EBUSY;
+ }
+
+ ret = pt_pmu_hw_init();
+ if (ret)
+ return ret;
+
+ if (!pt_cap_get(PT_CAP_topa_output)) {
+ pr_warn("ToPA output is not supported on this CPU\n");
+ return -ENODEV;
+ }
+
+ if (!pt_cap_get(PT_CAP_topa_multiple_entries))
+ pt_pmu.pmu.capabilities =
+ PERF_PMU_CAP_AUX_NO_SG | PERF_PMU_CAP_AUX_SW_DOUBLEBUF;
+
+ pt_pmu.pmu.capabilities |= PERF_PMU_CAP_EXCLUSIVE | PERF_PMU_CAP_ITRACE;
+ pt_pmu.pmu.attr_groups = pt_attr_groups;
+ pt_pmu.pmu.task_ctx_nr = perf_sw_context;
+ pt_pmu.pmu.event_init = pt_event_init;
+ pt_pmu.pmu.add = pt_event_add;
+ pt_pmu.pmu.del = pt_event_del;
+ pt_pmu.pmu.start = pt_event_start;
+ pt_pmu.pmu.stop = pt_event_stop;
+ pt_pmu.pmu.read = pt_event_read;
+ pt_pmu.pmu.setup_aux = pt_buffer_setup_aux;
+ pt_pmu.pmu.free_aux = pt_buffer_free_aux;
+ ret = perf_pmu_register(&pt_pmu.pmu, "intel_pt", -1);
+
+ return ret;
+}
+arch_initcall(pt_init);
diff --git a/arch/x86/events/intel/pt.h b/arch/x86/events/intel/pt.h
new file mode 100644
index 000000000..3abb5f5cc
--- /dev/null
+++ b/arch/x86/events/intel/pt.h
@@ -0,0 +1,119 @@
+/*
+ * Intel(R) Processor Trace PMU driver for perf
+ * Copyright (c) 2013-2014, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * Intel PT is specified in the Intel Architecture Instruction Set Extensions
+ * Programming Reference:
+ * http://software.intel.com/en-us/intel-isa-extensions
+ */
+
+#ifndef __INTEL_PT_H__
+#define __INTEL_PT_H__
+
+/*
+ * Single-entry ToPA: when this close to region boundary, switch
+ * buffers to avoid losing data.
+ */
+#define TOPA_PMI_MARGIN 512
+
+#define TOPA_SHIFT 12
+
+static inline unsigned int sizes(unsigned int tsz)
+{
+ return 1 << (tsz + TOPA_SHIFT);
+};
+
+struct topa_entry {
+ u64 end : 1;
+ u64 rsvd0 : 1;
+ u64 intr : 1;
+ u64 rsvd1 : 1;
+ u64 stop : 1;
+ u64 rsvd2 : 1;
+ u64 size : 4;
+ u64 rsvd3 : 2;
+ u64 base : 36;
+ u64 rsvd4 : 16;
+};
+
+#define PT_CPUID_LEAVES 2
+#define PT_CPUID_REGS_NUM 4 /* number of regsters (eax, ebx, ecx, edx) */
+
+enum pt_capabilities {
+ PT_CAP_max_subleaf = 0,
+ PT_CAP_cr3_filtering,
+ PT_CAP_psb_cyc,
+ PT_CAP_mtc,
+ PT_CAP_topa_output,
+ PT_CAP_topa_multiple_entries,
+ PT_CAP_single_range_output,
+ PT_CAP_payloads_lip,
+ PT_CAP_mtc_periods,
+ PT_CAP_cycle_thresholds,
+ PT_CAP_psb_periods,
+};
+
+struct pt_pmu {
+ struct pmu pmu;
+ u32 caps[PT_CPUID_REGS_NUM * PT_CPUID_LEAVES];
+ bool vmx;
+};
+
+/**
+ * struct pt_buffer - buffer configuration; one buffer per task_struct or
+ * cpu, depending on perf event configuration
+ * @cpu: cpu for per-cpu allocation
+ * @tables: list of ToPA tables in this buffer
+ * @first: shorthand for first topa table
+ * @last: shorthand for last topa table
+ * @cur: current topa table
+ * @nr_pages: buffer size in pages
+ * @cur_idx: current output region's index within @cur table
+ * @output_off: offset within the current output region
+ * @data_size: running total of the amount of data in this buffer
+ * @lost: if data was lost/truncated
+ * @head: logical write offset inside the buffer
+ * @snapshot: if this is for a snapshot/overwrite counter
+ * @stop_pos: STOP topa entry in the buffer
+ * @intr_pos: INT topa entry in the buffer
+ * @data_pages: array of pages from perf
+ * @topa_index: table of topa entries indexed by page offset
+ */
+struct pt_buffer {
+ int cpu;
+ struct list_head tables;
+ struct topa *first, *last, *cur;
+ unsigned int cur_idx;
+ size_t output_off;
+ unsigned long nr_pages;
+ local_t data_size;
+ local_t lost;
+ local64_t head;
+ bool snapshot;
+ unsigned long stop_pos, intr_pos;
+ void **data_pages;
+ struct topa_entry *topa_index[0];
+};
+
+/**
+ * struct pt - per-cpu pt context
+ * @handle: perf output handle
+ * @handle_nmi: do handle PT PMI on this cpu, there's an active event
+ * @vmx_on: 1 if VMX is ON on this cpu
+ */
+struct pt {
+ struct perf_output_handle handle;
+ int handle_nmi;
+ int vmx_on;
+};
+
+#endif /* __INTEL_PT_H__ */
diff --git a/arch/x86/events/intel/rapl.c b/arch/x86/events/intel/rapl.c
new file mode 100644
index 000000000..1705c9d75
--- /dev/null
+++ b/arch/x86/events/intel/rapl.c
@@ -0,0 +1,770 @@
+/*
+ * perf_event_intel_rapl.c: support Intel RAPL energy consumption counters
+ * Copyright (C) 2013 Google, Inc., Stephane Eranian
+ *
+ * Intel RAPL interface is specified in the IA-32 Manual Vol3b
+ * section 14.7.1 (September 2013)
+ *
+ * RAPL provides more controls than just reporting energy consumption
+ * however here we only expose the 3 energy consumption free running
+ * counters (pp0, pkg, dram).
+ *
+ * Each of those counters increments in a power unit defined by the
+ * RAPL_POWER_UNIT MSR. On SandyBridge, this unit is 1/(2^16) Joules
+ * but it can vary.
+ *
+ * Counter to rapl events mappings:
+ *
+ * pp0 counter: consumption of all physical cores (power plane 0)
+ * event: rapl_energy_cores
+ * perf code: 0x1
+ *
+ * pkg counter: consumption of the whole processor package
+ * event: rapl_energy_pkg
+ * perf code: 0x2
+ *
+ * dram counter: consumption of the dram domain (servers only)
+ * event: rapl_energy_dram
+ * perf code: 0x3
+ *
+ * dram counter: consumption of the builtin-gpu domain (client only)
+ * event: rapl_energy_gpu
+ * perf code: 0x4
+ *
+ * We manage those counters as free running (read-only). They may be
+ * use simultaneously by other tools, such as turbostat.
+ *
+ * The events only support system-wide mode counting. There is no
+ * sampling support because it does not make sense and is not
+ * supported by the RAPL hardware.
+ *
+ * Because we want to avoid floating-point operations in the kernel,
+ * the events are all reported in fixed point arithmetic (32.32).
+ * Tools must adjust the counts to convert them to Watts using
+ * the duration of the measurement. Tools may use a function such as
+ * ldexp(raw_count, -32);
+ */
+
+#define pr_fmt(fmt) "RAPL PMU: " fmt
+
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/perf_event.h>
+#include <asm/cpu_device_id.h>
+#include "../perf_event.h"
+
+/*
+ * RAPL energy status counters
+ */
+#define RAPL_IDX_PP0_NRG_STAT 0 /* all cores */
+#define INTEL_RAPL_PP0 0x1 /* pseudo-encoding */
+#define RAPL_IDX_PKG_NRG_STAT 1 /* entire package */
+#define INTEL_RAPL_PKG 0x2 /* pseudo-encoding */
+#define RAPL_IDX_RAM_NRG_STAT 2 /* DRAM */
+#define INTEL_RAPL_RAM 0x3 /* pseudo-encoding */
+#define RAPL_IDX_PP1_NRG_STAT 3 /* gpu */
+#define INTEL_RAPL_PP1 0x4 /* pseudo-encoding */
+
+#define NR_RAPL_DOMAINS 0x4
+static const char *const rapl_domain_names[NR_RAPL_DOMAINS] __initconst = {
+ "pp0-core",
+ "package",
+ "dram",
+ "pp1-gpu",
+};
+
+/* Clients have PP0, PKG */
+#define RAPL_IDX_CLN (1<<RAPL_IDX_PP0_NRG_STAT|\
+ 1<<RAPL_IDX_PKG_NRG_STAT|\
+ 1<<RAPL_IDX_PP1_NRG_STAT)
+
+/* Servers have PP0, PKG, RAM */
+#define RAPL_IDX_SRV (1<<RAPL_IDX_PP0_NRG_STAT|\
+ 1<<RAPL_IDX_PKG_NRG_STAT|\
+ 1<<RAPL_IDX_RAM_NRG_STAT)
+
+/* Servers have PP0, PKG, RAM, PP1 */
+#define RAPL_IDX_HSW (1<<RAPL_IDX_PP0_NRG_STAT|\
+ 1<<RAPL_IDX_PKG_NRG_STAT|\
+ 1<<RAPL_IDX_RAM_NRG_STAT|\
+ 1<<RAPL_IDX_PP1_NRG_STAT)
+
+/* Knights Landing has PKG, RAM */
+#define RAPL_IDX_KNL (1<<RAPL_IDX_PKG_NRG_STAT|\
+ 1<<RAPL_IDX_RAM_NRG_STAT)
+
+/*
+ * event code: LSB 8 bits, passed in attr->config
+ * any other bit is reserved
+ */
+#define RAPL_EVENT_MASK 0xFFULL
+
+#define DEFINE_RAPL_FORMAT_ATTR(_var, _name, _format) \
+static ssize_t __rapl_##_var##_show(struct kobject *kobj, \
+ struct kobj_attribute *attr, \
+ char *page) \
+{ \
+ BUILD_BUG_ON(sizeof(_format) >= PAGE_SIZE); \
+ return sprintf(page, _format "\n"); \
+} \
+static struct kobj_attribute format_attr_##_var = \
+ __ATTR(_name, 0444, __rapl_##_var##_show, NULL)
+
+#define RAPL_CNTR_WIDTH 32
+
+#define RAPL_EVENT_ATTR_STR(_name, v, str) \
+static struct perf_pmu_events_attr event_attr_##v = { \
+ .attr = __ATTR(_name, 0444, perf_event_sysfs_show, NULL), \
+ .id = 0, \
+ .event_str = str, \
+};
+
+struct rapl_pmu {
+ raw_spinlock_t lock;
+ int n_active;
+ int cpu;
+ struct list_head active_list;
+ struct pmu *pmu;
+ ktime_t timer_interval;
+ struct hrtimer hrtimer;
+};
+
+struct rapl_pmus {
+ struct pmu pmu;
+ unsigned int maxpkg;
+ struct rapl_pmu *pmus[];
+};
+
+ /* 1/2^hw_unit Joule */
+static int rapl_hw_unit[NR_RAPL_DOMAINS] __read_mostly;
+static struct rapl_pmus *rapl_pmus;
+static cpumask_t rapl_cpu_mask;
+static unsigned int rapl_cntr_mask;
+static u64 rapl_timer_ms;
+
+static inline struct rapl_pmu *cpu_to_rapl_pmu(unsigned int cpu)
+{
+ return rapl_pmus->pmus[topology_logical_package_id(cpu)];
+}
+
+static inline u64 rapl_read_counter(struct perf_event *event)
+{
+ u64 raw;
+ rdmsrl(event->hw.event_base, raw);
+ return raw;
+}
+
+static inline u64 rapl_scale(u64 v, int cfg)
+{
+ if (cfg > NR_RAPL_DOMAINS) {
+ pr_warn("Invalid domain %d, failed to scale data\n", cfg);
+ return v;
+ }
+ /*
+ * scale delta to smallest unit (1/2^32)
+ * users must then scale back: count * 1/(1e9*2^32) to get Joules
+ * or use ldexp(count, -32).
+ * Watts = Joules/Time delta
+ */
+ return v << (32 - rapl_hw_unit[cfg - 1]);
+}
+
+static u64 rapl_event_update(struct perf_event *event)
+{
+ struct hw_perf_event *hwc = &event->hw;
+ u64 prev_raw_count, new_raw_count;
+ s64 delta, sdelta;
+ int shift = RAPL_CNTR_WIDTH;
+
+again:
+ prev_raw_count = local64_read(&hwc->prev_count);
+ rdmsrl(event->hw.event_base, new_raw_count);
+
+ if (local64_cmpxchg(&hwc->prev_count, prev_raw_count,
+ new_raw_count) != prev_raw_count) {
+ cpu_relax();
+ goto again;
+ }
+
+ /*
+ * Now we have the new raw value and have updated the prev
+ * timestamp already. We can now calculate the elapsed delta
+ * (event-)time and add that to the generic event.
+ *
+ * Careful, not all hw sign-extends above the physical width
+ * of the count.
+ */
+ delta = (new_raw_count << shift) - (prev_raw_count << shift);
+ delta >>= shift;
+
+ sdelta = rapl_scale(delta, event->hw.config);
+
+ local64_add(sdelta, &event->count);
+
+ return new_raw_count;
+}
+
+static void rapl_start_hrtimer(struct rapl_pmu *pmu)
+{
+ hrtimer_start(&pmu->hrtimer, pmu->timer_interval,
+ HRTIMER_MODE_REL_PINNED);
+}
+
+static enum hrtimer_restart rapl_hrtimer_handle(struct hrtimer *hrtimer)
+{
+ struct rapl_pmu *pmu = container_of(hrtimer, struct rapl_pmu, hrtimer);
+ struct perf_event *event;
+ unsigned long flags;
+
+ if (!pmu->n_active)
+ return HRTIMER_NORESTART;
+
+ raw_spin_lock_irqsave(&pmu->lock, flags);
+
+ list_for_each_entry(event, &pmu->active_list, active_entry)
+ rapl_event_update(event);
+
+ raw_spin_unlock_irqrestore(&pmu->lock, flags);
+
+ hrtimer_forward_now(hrtimer, pmu->timer_interval);
+
+ return HRTIMER_RESTART;
+}
+
+static void rapl_hrtimer_init(struct rapl_pmu *pmu)
+{
+ struct hrtimer *hr = &pmu->hrtimer;
+
+ hrtimer_init(hr, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
+ hr->function = rapl_hrtimer_handle;
+}
+
+static void __rapl_pmu_event_start(struct rapl_pmu *pmu,
+ struct perf_event *event)
+{
+ if (WARN_ON_ONCE(!(event->hw.state & PERF_HES_STOPPED)))
+ return;
+
+ event->hw.state = 0;
+
+ list_add_tail(&event->active_entry, &pmu->active_list);
+
+ local64_set(&event->hw.prev_count, rapl_read_counter(event));
+
+ pmu->n_active++;
+ if (pmu->n_active == 1)
+ rapl_start_hrtimer(pmu);
+}
+
+static void rapl_pmu_event_start(struct perf_event *event, int mode)
+{
+ struct rapl_pmu *pmu = event->pmu_private;
+ unsigned long flags;
+
+ raw_spin_lock_irqsave(&pmu->lock, flags);
+ __rapl_pmu_event_start(pmu, event);
+ raw_spin_unlock_irqrestore(&pmu->lock, flags);
+}
+
+static void rapl_pmu_event_stop(struct perf_event *event, int mode)
+{
+ struct rapl_pmu *pmu = event->pmu_private;
+ struct hw_perf_event *hwc = &event->hw;
+ unsigned long flags;
+
+ raw_spin_lock_irqsave(&pmu->lock, flags);
+
+ /* mark event as deactivated and stopped */
+ if (!(hwc->state & PERF_HES_STOPPED)) {
+ WARN_ON_ONCE(pmu->n_active <= 0);
+ pmu->n_active--;
+ if (pmu->n_active == 0)
+ hrtimer_cancel(&pmu->hrtimer);
+
+ list_del(&event->active_entry);
+
+ WARN_ON_ONCE(hwc->state & PERF_HES_STOPPED);
+ hwc->state |= PERF_HES_STOPPED;
+ }
+
+ /* check if update of sw counter is necessary */
+ if ((mode & PERF_EF_UPDATE) && !(hwc->state & PERF_HES_UPTODATE)) {
+ /*
+ * Drain the remaining delta count out of a event
+ * that we are disabling:
+ */
+ rapl_event_update(event);
+ hwc->state |= PERF_HES_UPTODATE;
+ }
+
+ raw_spin_unlock_irqrestore(&pmu->lock, flags);
+}
+
+static int rapl_pmu_event_add(struct perf_event *event, int mode)
+{
+ struct rapl_pmu *pmu = event->pmu_private;
+ struct hw_perf_event *hwc = &event->hw;
+ unsigned long flags;
+
+ raw_spin_lock_irqsave(&pmu->lock, flags);
+
+ hwc->state = PERF_HES_UPTODATE | PERF_HES_STOPPED;
+
+ if (mode & PERF_EF_START)
+ __rapl_pmu_event_start(pmu, event);
+
+ raw_spin_unlock_irqrestore(&pmu->lock, flags);
+
+ return 0;
+}
+
+static void rapl_pmu_event_del(struct perf_event *event, int flags)
+{
+ rapl_pmu_event_stop(event, PERF_EF_UPDATE);
+}
+
+static int rapl_pmu_event_init(struct perf_event *event)
+{
+ u64 cfg = event->attr.config & RAPL_EVENT_MASK;
+ int bit, msr, ret = 0;
+ struct rapl_pmu *pmu;
+
+ /* only look at RAPL events */
+ if (event->attr.type != rapl_pmus->pmu.type)
+ return -ENOENT;
+
+ /* check only supported bits are set */
+ if (event->attr.config & ~RAPL_EVENT_MASK)
+ return -EINVAL;
+
+ if (event->cpu < 0)
+ return -EINVAL;
+
+ /*
+ * check event is known (determines counter)
+ */
+ switch (cfg) {
+ case INTEL_RAPL_PP0:
+ bit = RAPL_IDX_PP0_NRG_STAT;
+ msr = MSR_PP0_ENERGY_STATUS;
+ break;
+ case INTEL_RAPL_PKG:
+ bit = RAPL_IDX_PKG_NRG_STAT;
+ msr = MSR_PKG_ENERGY_STATUS;
+ break;
+ case INTEL_RAPL_RAM:
+ bit = RAPL_IDX_RAM_NRG_STAT;
+ msr = MSR_DRAM_ENERGY_STATUS;
+ break;
+ case INTEL_RAPL_PP1:
+ bit = RAPL_IDX_PP1_NRG_STAT;
+ msr = MSR_PP1_ENERGY_STATUS;
+ break;
+ default:
+ return -EINVAL;
+ }
+ /* check event supported */
+ if (!(rapl_cntr_mask & (1 << bit)))
+ return -EINVAL;
+
+ /* unsupported modes and filters */
+ if (event->attr.exclude_user ||
+ event->attr.exclude_kernel ||
+ event->attr.exclude_hv ||
+ event->attr.exclude_idle ||
+ event->attr.exclude_host ||
+ event->attr.exclude_guest ||
+ event->attr.sample_period) /* no sampling */
+ return -EINVAL;
+
+ /* must be done before validate_group */
+ pmu = cpu_to_rapl_pmu(event->cpu);
+ event->cpu = pmu->cpu;
+ event->pmu_private = pmu;
+ event->hw.event_base = msr;
+ event->hw.config = cfg;
+ event->hw.idx = bit;
+
+ return ret;
+}
+
+static void rapl_pmu_event_read(struct perf_event *event)
+{
+ rapl_event_update(event);
+}
+
+static ssize_t rapl_get_attr_cpumask(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ return cpumap_print_to_pagebuf(true, buf, &rapl_cpu_mask);
+}
+
+static DEVICE_ATTR(cpumask, S_IRUGO, rapl_get_attr_cpumask, NULL);
+
+static struct attribute *rapl_pmu_attrs[] = {
+ &dev_attr_cpumask.attr,
+ NULL,
+};
+
+static struct attribute_group rapl_pmu_attr_group = {
+ .attrs = rapl_pmu_attrs,
+};
+
+RAPL_EVENT_ATTR_STR(energy-cores, rapl_cores, "event=0x01");
+RAPL_EVENT_ATTR_STR(energy-pkg , rapl_pkg, "event=0x02");
+RAPL_EVENT_ATTR_STR(energy-ram , rapl_ram, "event=0x03");
+RAPL_EVENT_ATTR_STR(energy-gpu , rapl_gpu, "event=0x04");
+
+RAPL_EVENT_ATTR_STR(energy-cores.unit, rapl_cores_unit, "Joules");
+RAPL_EVENT_ATTR_STR(energy-pkg.unit , rapl_pkg_unit, "Joules");
+RAPL_EVENT_ATTR_STR(energy-ram.unit , rapl_ram_unit, "Joules");
+RAPL_EVENT_ATTR_STR(energy-gpu.unit , rapl_gpu_unit, "Joules");
+
+/*
+ * we compute in 0.23 nJ increments regardless of MSR
+ */
+RAPL_EVENT_ATTR_STR(energy-cores.scale, rapl_cores_scale, "2.3283064365386962890625e-10");
+RAPL_EVENT_ATTR_STR(energy-pkg.scale, rapl_pkg_scale, "2.3283064365386962890625e-10");
+RAPL_EVENT_ATTR_STR(energy-ram.scale, rapl_ram_scale, "2.3283064365386962890625e-10");
+RAPL_EVENT_ATTR_STR(energy-gpu.scale, rapl_gpu_scale, "2.3283064365386962890625e-10");
+
+static struct attribute *rapl_events_srv_attr[] = {
+ EVENT_PTR(rapl_cores),
+ EVENT_PTR(rapl_pkg),
+ EVENT_PTR(rapl_ram),
+
+ EVENT_PTR(rapl_cores_unit),
+ EVENT_PTR(rapl_pkg_unit),
+ EVENT_PTR(rapl_ram_unit),
+
+ EVENT_PTR(rapl_cores_scale),
+ EVENT_PTR(rapl_pkg_scale),
+ EVENT_PTR(rapl_ram_scale),
+ NULL,
+};
+
+static struct attribute *rapl_events_cln_attr[] = {
+ EVENT_PTR(rapl_cores),
+ EVENT_PTR(rapl_pkg),
+ EVENT_PTR(rapl_gpu),
+
+ EVENT_PTR(rapl_cores_unit),
+ EVENT_PTR(rapl_pkg_unit),
+ EVENT_PTR(rapl_gpu_unit),
+
+ EVENT_PTR(rapl_cores_scale),
+ EVENT_PTR(rapl_pkg_scale),
+ EVENT_PTR(rapl_gpu_scale),
+ NULL,
+};
+
+static struct attribute *rapl_events_hsw_attr[] = {
+ EVENT_PTR(rapl_cores),
+ EVENT_PTR(rapl_pkg),
+ EVENT_PTR(rapl_gpu),
+ EVENT_PTR(rapl_ram),
+
+ EVENT_PTR(rapl_cores_unit),
+ EVENT_PTR(rapl_pkg_unit),
+ EVENT_PTR(rapl_gpu_unit),
+ EVENT_PTR(rapl_ram_unit),
+
+ EVENT_PTR(rapl_cores_scale),
+ EVENT_PTR(rapl_pkg_scale),
+ EVENT_PTR(rapl_gpu_scale),
+ EVENT_PTR(rapl_ram_scale),
+ NULL,
+};
+
+static struct attribute *rapl_events_knl_attr[] = {
+ EVENT_PTR(rapl_pkg),
+ EVENT_PTR(rapl_ram),
+
+ EVENT_PTR(rapl_pkg_unit),
+ EVENT_PTR(rapl_ram_unit),
+
+ EVENT_PTR(rapl_pkg_scale),
+ EVENT_PTR(rapl_ram_scale),
+ NULL,
+};
+
+static struct attribute_group rapl_pmu_events_group = {
+ .name = "events",
+ .attrs = NULL, /* patched at runtime */
+};
+
+DEFINE_RAPL_FORMAT_ATTR(event, event, "config:0-7");
+static struct attribute *rapl_formats_attr[] = {
+ &format_attr_event.attr,
+ NULL,
+};
+
+static struct attribute_group rapl_pmu_format_group = {
+ .name = "format",
+ .attrs = rapl_formats_attr,
+};
+
+const struct attribute_group *rapl_attr_groups[] = {
+ &rapl_pmu_attr_group,
+ &rapl_pmu_format_group,
+ &rapl_pmu_events_group,
+ NULL,
+};
+
+static void rapl_cpu_exit(int cpu)
+{
+ struct rapl_pmu *pmu = cpu_to_rapl_pmu(cpu);
+ int target;
+
+ /* Check if exiting cpu is used for collecting rapl events */
+ if (!cpumask_test_and_clear_cpu(cpu, &rapl_cpu_mask))
+ return;
+
+ pmu->cpu = -1;
+ /* Find a new cpu to collect rapl events */
+ target = cpumask_any_but(topology_core_cpumask(cpu), cpu);
+
+ /* Migrate rapl events to the new target */
+ if (target < nr_cpu_ids) {
+ cpumask_set_cpu(target, &rapl_cpu_mask);
+ pmu->cpu = target;
+ perf_pmu_migrate_context(pmu->pmu, cpu, target);
+ }
+}
+
+static void rapl_cpu_init(int cpu)
+{
+ struct rapl_pmu *pmu = cpu_to_rapl_pmu(cpu);
+ int target;
+
+ /*
+ * Check if there is an online cpu in the package which collects rapl
+ * events already.
+ */
+ target = cpumask_any_and(&rapl_cpu_mask, topology_core_cpumask(cpu));
+ if (target < nr_cpu_ids)
+ return;
+
+ cpumask_set_cpu(cpu, &rapl_cpu_mask);
+ pmu->cpu = cpu;
+}
+
+static int rapl_cpu_prepare(int cpu)
+{
+ struct rapl_pmu *pmu = cpu_to_rapl_pmu(cpu);
+
+ if (pmu)
+ return 0;
+
+ pmu = kzalloc_node(sizeof(*pmu), GFP_KERNEL, cpu_to_node(cpu));
+ if (!pmu)
+ return -ENOMEM;
+
+ raw_spin_lock_init(&pmu->lock);
+ INIT_LIST_HEAD(&pmu->active_list);
+ pmu->pmu = &rapl_pmus->pmu;
+ pmu->timer_interval = ms_to_ktime(rapl_timer_ms);
+ pmu->cpu = -1;
+ rapl_hrtimer_init(pmu);
+ rapl_pmus->pmus[topology_logical_package_id(cpu)] = pmu;
+ return 0;
+}
+
+static int rapl_cpu_notifier(struct notifier_block *self,
+ unsigned long action, void *hcpu)
+{
+ unsigned int cpu = (long)hcpu;
+
+ switch (action & ~CPU_TASKS_FROZEN) {
+ case CPU_UP_PREPARE:
+ rapl_cpu_prepare(cpu);
+ break;
+
+ case CPU_DOWN_FAILED:
+ case CPU_ONLINE:
+ rapl_cpu_init(cpu);
+ break;
+
+ case CPU_DOWN_PREPARE:
+ rapl_cpu_exit(cpu);
+ break;
+ }
+ return NOTIFY_OK;
+}
+
+static int rapl_check_hw_unit(bool apply_quirk)
+{
+ u64 msr_rapl_power_unit_bits;
+ int i;
+
+ /* protect rdmsrl() to handle virtualization */
+ if (rdmsrl_safe(MSR_RAPL_POWER_UNIT, &msr_rapl_power_unit_bits))
+ return -1;
+ for (i = 0; i < NR_RAPL_DOMAINS; i++)
+ rapl_hw_unit[i] = (msr_rapl_power_unit_bits >> 8) & 0x1FULL;
+
+ /*
+ * DRAM domain on HSW server and KNL has fixed energy unit which can be
+ * different than the unit from power unit MSR. See
+ * "Intel Xeon Processor E5-1600 and E5-2600 v3 Product Families, V2
+ * of 2. Datasheet, September 2014, Reference Number: 330784-001 "
+ */
+ if (apply_quirk)
+ rapl_hw_unit[RAPL_IDX_RAM_NRG_STAT] = 16;
+
+ /*
+ * Calculate the timer rate:
+ * Use reference of 200W for scaling the timeout to avoid counter
+ * overflows. 200W = 200 Joules/sec
+ * Divide interval by 2 to avoid lockstep (2 * 100)
+ * if hw unit is 32, then we use 2 ms 1/200/2
+ */
+ rapl_timer_ms = 2;
+ if (rapl_hw_unit[0] < 32) {
+ rapl_timer_ms = (1000 / (2 * 100));
+ rapl_timer_ms *= (1ULL << (32 - rapl_hw_unit[0] - 1));
+ }
+ return 0;
+}
+
+static void __init rapl_advertise(void)
+{
+ int i;
+
+ pr_info("API unit is 2^-32 Joules, %d fixed counters, %llu ms ovfl timer\n",
+ hweight32(rapl_cntr_mask), rapl_timer_ms);
+
+ for (i = 0; i < NR_RAPL_DOMAINS; i++) {
+ if (rapl_cntr_mask & (1 << i)) {
+ pr_info("hw unit of domain %s 2^-%d Joules\n",
+ rapl_domain_names[i], rapl_hw_unit[i]);
+ }
+ }
+}
+
+static int __init rapl_prepare_cpus(void)
+{
+ unsigned int cpu, pkg;
+ int ret;
+
+ for_each_online_cpu(cpu) {
+ pkg = topology_logical_package_id(cpu);
+ if (rapl_pmus->pmus[pkg])
+ continue;
+
+ ret = rapl_cpu_prepare(cpu);
+ if (ret)
+ return ret;
+ rapl_cpu_init(cpu);
+ }
+ return 0;
+}
+
+static void __init cleanup_rapl_pmus(void)
+{
+ int i;
+
+ for (i = 0; i < rapl_pmus->maxpkg; i++)
+ kfree(rapl_pmus->pmus + i);
+ kfree(rapl_pmus);
+}
+
+static int __init init_rapl_pmus(void)
+{
+ int maxpkg = topology_max_packages();
+ size_t size;
+
+ size = sizeof(*rapl_pmus) + maxpkg * sizeof(struct rapl_pmu *);
+ rapl_pmus = kzalloc(size, GFP_KERNEL);
+ if (!rapl_pmus)
+ return -ENOMEM;
+
+ rapl_pmus->maxpkg = maxpkg;
+ rapl_pmus->pmu.attr_groups = rapl_attr_groups;
+ rapl_pmus->pmu.task_ctx_nr = perf_invalid_context;
+ rapl_pmus->pmu.event_init = rapl_pmu_event_init;
+ rapl_pmus->pmu.add = rapl_pmu_event_add;
+ rapl_pmus->pmu.del = rapl_pmu_event_del;
+ rapl_pmus->pmu.start = rapl_pmu_event_start;
+ rapl_pmus->pmu.stop = rapl_pmu_event_stop;
+ rapl_pmus->pmu.read = rapl_pmu_event_read;
+ return 0;
+}
+
+static const struct x86_cpu_id rapl_cpu_match[] __initconst = {
+ [0] = { .vendor = X86_VENDOR_INTEL, .family = 6 },
+ [1] = {},
+};
+
+static int __init rapl_pmu_init(void)
+{
+ bool apply_quirk = false;
+ int ret;
+
+ if (!x86_match_cpu(rapl_cpu_match))
+ return -ENODEV;
+
+ switch (boot_cpu_data.x86_model) {
+ case 42: /* Sandy Bridge */
+ case 58: /* Ivy Bridge */
+ rapl_cntr_mask = RAPL_IDX_CLN;
+ rapl_pmu_events_group.attrs = rapl_events_cln_attr;
+ break;
+ case 63: /* Haswell-Server */
+ case 79: /* Broadwell-Server */
+ apply_quirk = true;
+ rapl_cntr_mask = RAPL_IDX_SRV;
+ rapl_pmu_events_group.attrs = rapl_events_srv_attr;
+ break;
+ case 60: /* Haswell */
+ case 69: /* Haswell-Celeron */
+ case 70: /* Haswell GT3e */
+ case 61: /* Broadwell */
+ case 71: /* Broadwell-H */
+ rapl_cntr_mask = RAPL_IDX_HSW;
+ rapl_pmu_events_group.attrs = rapl_events_hsw_attr;
+ break;
+ case 45: /* Sandy Bridge-EP */
+ case 62: /* IvyTown */
+ rapl_cntr_mask = RAPL_IDX_SRV;
+ rapl_pmu_events_group.attrs = rapl_events_srv_attr;
+ break;
+ case 87: /* Knights Landing */
+ apply_quirk = true;
+ rapl_cntr_mask = RAPL_IDX_KNL;
+ rapl_pmu_events_group.attrs = rapl_events_knl_attr;
+ break;
+ default:
+ return -ENODEV;
+ }
+
+ ret = rapl_check_hw_unit(apply_quirk);
+ if (ret)
+ return ret;
+
+ ret = init_rapl_pmus();
+ if (ret)
+ return ret;
+
+ cpu_notifier_register_begin();
+
+ ret = rapl_prepare_cpus();
+ if (ret)
+ goto out;
+
+ ret = perf_pmu_register(&rapl_pmus->pmu, "power", -1);
+ if (ret)
+ goto out;
+
+ __perf_cpu_notifier(rapl_cpu_notifier);
+ cpu_notifier_register_done();
+ rapl_advertise();
+ return 0;
+
+out:
+ pr_warn("Initialization failed (%d), disabled\n", ret);
+ cleanup_rapl_pmus();
+ cpu_notifier_register_done();
+ return ret;
+}
+device_initcall(rapl_pmu_init);
diff --git a/arch/x86/events/intel/uncore.c b/arch/x86/events/intel/uncore.c
new file mode 100644
index 000000000..f921a1ed4
--- /dev/null
+++ b/arch/x86/events/intel/uncore.c
@@ -0,0 +1,1412 @@
+#include "uncore.h"
+
+static struct intel_uncore_type *empty_uncore[] = { NULL, };
+struct intel_uncore_type **uncore_msr_uncores = empty_uncore;
+struct intel_uncore_type **uncore_pci_uncores = empty_uncore;
+
+static bool pcidrv_registered;
+struct pci_driver *uncore_pci_driver;
+/* pci bus to socket mapping */
+DEFINE_RAW_SPINLOCK(pci2phy_map_lock);
+struct list_head pci2phy_map_head = LIST_HEAD_INIT(pci2phy_map_head);
+struct pci_extra_dev *uncore_extra_pci_dev;
+static int max_packages;
+
+/* mask of cpus that collect uncore events */
+static cpumask_t uncore_cpu_mask;
+
+/* constraint for the fixed counter */
+static struct event_constraint uncore_constraint_fixed =
+ EVENT_CONSTRAINT(~0ULL, 1 << UNCORE_PMC_IDX_FIXED, ~0ULL);
+struct event_constraint uncore_constraint_empty =
+ EVENT_CONSTRAINT(0, 0, 0);
+
+static int uncore_pcibus_to_physid(struct pci_bus *bus)
+{
+ struct pci2phy_map *map;
+ int phys_id = -1;
+
+ raw_spin_lock(&pci2phy_map_lock);
+ list_for_each_entry(map, &pci2phy_map_head, list) {
+ if (map->segment == pci_domain_nr(bus)) {
+ phys_id = map->pbus_to_physid[bus->number];
+ break;
+ }
+ }
+ raw_spin_unlock(&pci2phy_map_lock);
+
+ return phys_id;
+}
+
+static void uncore_free_pcibus_map(void)
+{
+ struct pci2phy_map *map, *tmp;
+
+ list_for_each_entry_safe(map, tmp, &pci2phy_map_head, list) {
+ list_del(&map->list);
+ kfree(map);
+ }
+}
+
+struct pci2phy_map *__find_pci2phy_map(int segment)
+{
+ struct pci2phy_map *map, *alloc = NULL;
+ int i;
+
+ lockdep_assert_held(&pci2phy_map_lock);
+
+lookup:
+ list_for_each_entry(map, &pci2phy_map_head, list) {
+ if (map->segment == segment)
+ goto end;
+ }
+
+ if (!alloc) {
+ raw_spin_unlock(&pci2phy_map_lock);
+ alloc = kmalloc(sizeof(struct pci2phy_map), GFP_KERNEL);
+ raw_spin_lock(&pci2phy_map_lock);
+
+ if (!alloc)
+ return NULL;
+
+ goto lookup;
+ }
+
+ map = alloc;
+ alloc = NULL;
+ map->segment = segment;
+ for (i = 0; i < 256; i++)
+ map->pbus_to_physid[i] = -1;
+ list_add_tail(&map->list, &pci2phy_map_head);
+
+end:
+ kfree(alloc);
+ return map;
+}
+
+ssize_t uncore_event_show(struct kobject *kobj,
+ struct kobj_attribute *attr, char *buf)
+{
+ struct uncore_event_desc *event =
+ container_of(attr, struct uncore_event_desc, attr);
+ return sprintf(buf, "%s", event->config);
+}
+
+struct intel_uncore_box *uncore_pmu_to_box(struct intel_uncore_pmu *pmu, int cpu)
+{
+ return pmu->boxes[topology_logical_package_id(cpu)];
+}
+
+u64 uncore_msr_read_counter(struct intel_uncore_box *box, struct perf_event *event)
+{
+ u64 count;
+
+ rdmsrl(event->hw.event_base, count);
+
+ return count;
+}
+
+/*
+ * generic get constraint function for shared match/mask registers.
+ */
+struct event_constraint *
+uncore_get_constraint(struct intel_uncore_box *box, struct perf_event *event)
+{
+ struct intel_uncore_extra_reg *er;
+ struct hw_perf_event_extra *reg1 = &event->hw.extra_reg;
+ struct hw_perf_event_extra *reg2 = &event->hw.branch_reg;
+ unsigned long flags;
+ bool ok = false;
+
+ /*
+ * reg->alloc can be set due to existing state, so for fake box we
+ * need to ignore this, otherwise we might fail to allocate proper
+ * fake state for this extra reg constraint.
+ */
+ if (reg1->idx == EXTRA_REG_NONE ||
+ (!uncore_box_is_fake(box) && reg1->alloc))
+ return NULL;
+
+ er = &box->shared_regs[reg1->idx];
+ raw_spin_lock_irqsave(&er->lock, flags);
+ if (!atomic_read(&er->ref) ||
+ (er->config1 == reg1->config && er->config2 == reg2->config)) {
+ atomic_inc(&er->ref);
+ er->config1 = reg1->config;
+ er->config2 = reg2->config;
+ ok = true;
+ }
+ raw_spin_unlock_irqrestore(&er->lock, flags);
+
+ if (ok) {
+ if (!uncore_box_is_fake(box))
+ reg1->alloc = 1;
+ return NULL;
+ }
+
+ return &uncore_constraint_empty;
+}
+
+void uncore_put_constraint(struct intel_uncore_box *box, struct perf_event *event)
+{
+ struct intel_uncore_extra_reg *er;
+ struct hw_perf_event_extra *reg1 = &event->hw.extra_reg;
+
+ /*
+ * Only put constraint if extra reg was actually allocated. Also
+ * takes care of event which do not use an extra shared reg.
+ *
+ * Also, if this is a fake box we shouldn't touch any event state
+ * (reg->alloc) and we don't care about leaving inconsistent box
+ * state either since it will be thrown out.
+ */
+ if (uncore_box_is_fake(box) || !reg1->alloc)
+ return;
+
+ er = &box->shared_regs[reg1->idx];
+ atomic_dec(&er->ref);
+ reg1->alloc = 0;
+}
+
+u64 uncore_shared_reg_config(struct intel_uncore_box *box, int idx)
+{
+ struct intel_uncore_extra_reg *er;
+ unsigned long flags;
+ u64 config;
+
+ er = &box->shared_regs[idx];
+
+ raw_spin_lock_irqsave(&er->lock, flags);
+ config = er->config;
+ raw_spin_unlock_irqrestore(&er->lock, flags);
+
+ return config;
+}
+
+static void uncore_assign_hw_event(struct intel_uncore_box *box,
+ struct perf_event *event, int idx)
+{
+ struct hw_perf_event *hwc = &event->hw;
+
+ hwc->idx = idx;
+ hwc->last_tag = ++box->tags[idx];
+
+ if (hwc->idx == UNCORE_PMC_IDX_FIXED) {
+ hwc->event_base = uncore_fixed_ctr(box);
+ hwc->config_base = uncore_fixed_ctl(box);
+ return;
+ }
+
+ hwc->config_base = uncore_event_ctl(box, hwc->idx);
+ hwc->event_base = uncore_perf_ctr(box, hwc->idx);
+}
+
+void uncore_perf_event_update(struct intel_uncore_box *box, struct perf_event *event)
+{
+ u64 prev_count, new_count, delta;
+ int shift;
+
+ if (event->hw.idx >= UNCORE_PMC_IDX_FIXED)
+ shift = 64 - uncore_fixed_ctr_bits(box);
+ else
+ shift = 64 - uncore_perf_ctr_bits(box);
+
+ /* the hrtimer might modify the previous event value */
+again:
+ prev_count = local64_read(&event->hw.prev_count);
+ new_count = uncore_read_counter(box, event);
+ if (local64_xchg(&event->hw.prev_count, new_count) != prev_count)
+ goto again;
+
+ delta = (new_count << shift) - (prev_count << shift);
+ delta >>= shift;
+
+ local64_add(delta, &event->count);
+}
+
+/*
+ * The overflow interrupt is unavailable for SandyBridge-EP, is broken
+ * for SandyBridge. So we use hrtimer to periodically poll the counter
+ * to avoid overflow.
+ */
+static enum hrtimer_restart uncore_pmu_hrtimer(struct hrtimer *hrtimer)
+{
+ struct intel_uncore_box *box;
+ struct perf_event *event;
+ unsigned long flags;
+ int bit;
+
+ box = container_of(hrtimer, struct intel_uncore_box, hrtimer);
+ if (!box->n_active || box->cpu != smp_processor_id())
+ return HRTIMER_NORESTART;
+ /*
+ * disable local interrupt to prevent uncore_pmu_event_start/stop
+ * to interrupt the update process
+ */
+ local_irq_save(flags);
+
+ /*
+ * handle boxes with an active event list as opposed to active
+ * counters
+ */
+ list_for_each_entry(event, &box->active_list, active_entry) {
+ uncore_perf_event_update(box, event);
+ }
+
+ for_each_set_bit(bit, box->active_mask, UNCORE_PMC_IDX_MAX)
+ uncore_perf_event_update(box, box->events[bit]);
+
+ local_irq_restore(flags);
+
+ hrtimer_forward_now(hrtimer, ns_to_ktime(box->hrtimer_duration));
+ return HRTIMER_RESTART;
+}
+
+void uncore_pmu_start_hrtimer(struct intel_uncore_box *box)
+{
+ hrtimer_start(&box->hrtimer, ns_to_ktime(box->hrtimer_duration),
+ HRTIMER_MODE_REL_PINNED);
+}
+
+void uncore_pmu_cancel_hrtimer(struct intel_uncore_box *box)
+{
+ hrtimer_cancel(&box->hrtimer);
+}
+
+static void uncore_pmu_init_hrtimer(struct intel_uncore_box *box)
+{
+ hrtimer_init(&box->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
+ box->hrtimer.function = uncore_pmu_hrtimer;
+}
+
+static struct intel_uncore_box *uncore_alloc_box(struct intel_uncore_type *type,
+ int node)
+{
+ int i, size, numshared = type->num_shared_regs ;
+ struct intel_uncore_box *box;
+
+ size = sizeof(*box) + numshared * sizeof(struct intel_uncore_extra_reg);
+
+ box = kzalloc_node(size, GFP_KERNEL, node);
+ if (!box)
+ return NULL;
+
+ for (i = 0; i < numshared; i++)
+ raw_spin_lock_init(&box->shared_regs[i].lock);
+
+ uncore_pmu_init_hrtimer(box);
+ box->cpu = -1;
+ box->pci_phys_id = -1;
+ box->pkgid = -1;
+
+ /* set default hrtimer timeout */
+ box->hrtimer_duration = UNCORE_PMU_HRTIMER_INTERVAL;
+
+ INIT_LIST_HEAD(&box->active_list);
+
+ return box;
+}
+
+/*
+ * Using uncore_pmu_event_init pmu event_init callback
+ * as a detection point for uncore events.
+ */
+static int uncore_pmu_event_init(struct perf_event *event);
+
+static bool is_uncore_event(struct perf_event *event)
+{
+ return event->pmu->event_init == uncore_pmu_event_init;
+}
+
+static int
+uncore_collect_events(struct intel_uncore_box *box, struct perf_event *leader,
+ bool dogrp)
+{
+ struct perf_event *event;
+ int n, max_count;
+
+ max_count = box->pmu->type->num_counters;
+ if (box->pmu->type->fixed_ctl)
+ max_count++;
+
+ if (box->n_events >= max_count)
+ return -EINVAL;
+
+ n = box->n_events;
+
+ if (is_uncore_event(leader)) {
+ box->event_list[n] = leader;
+ n++;
+ }
+
+ if (!dogrp)
+ return n;
+
+ list_for_each_entry(event, &leader->sibling_list, group_entry) {
+ if (!is_uncore_event(event) ||
+ event->state <= PERF_EVENT_STATE_OFF)
+ continue;
+
+ if (n >= max_count)
+ return -EINVAL;
+
+ box->event_list[n] = event;
+ n++;
+ }
+ return n;
+}
+
+static struct event_constraint *
+uncore_get_event_constraint(struct intel_uncore_box *box, struct perf_event *event)
+{
+ struct intel_uncore_type *type = box->pmu->type;
+ struct event_constraint *c;
+
+ if (type->ops->get_constraint) {
+ c = type->ops->get_constraint(box, event);
+ if (c)
+ return c;
+ }
+
+ if (event->attr.config == UNCORE_FIXED_EVENT)
+ return &uncore_constraint_fixed;
+
+ if (type->constraints) {
+ for_each_event_constraint(c, type->constraints) {
+ if ((event->hw.config & c->cmask) == c->code)
+ return c;
+ }
+ }
+
+ return &type->unconstrainted;
+}
+
+static void uncore_put_event_constraint(struct intel_uncore_box *box,
+ struct perf_event *event)
+{
+ if (box->pmu->type->ops->put_constraint)
+ box->pmu->type->ops->put_constraint(box, event);
+}
+
+static int uncore_assign_events(struct intel_uncore_box *box, int assign[], int n)
+{
+ unsigned long used_mask[BITS_TO_LONGS(UNCORE_PMC_IDX_MAX)];
+ struct event_constraint *c;
+ int i, wmin, wmax, ret = 0;
+ struct hw_perf_event *hwc;
+
+ bitmap_zero(used_mask, UNCORE_PMC_IDX_MAX);
+
+ for (i = 0, wmin = UNCORE_PMC_IDX_MAX, wmax = 0; i < n; i++) {
+ c = uncore_get_event_constraint(box, box->event_list[i]);
+ box->event_constraint[i] = c;
+ wmin = min(wmin, c->weight);
+ wmax = max(wmax, c->weight);
+ }
+
+ /* fastpath, try to reuse previous register */
+ for (i = 0; i < n; i++) {
+ hwc = &box->event_list[i]->hw;
+ c = box->event_constraint[i];
+
+ /* never assigned */
+ if (hwc->idx == -1)
+ break;
+
+ /* constraint still honored */
+ if (!test_bit(hwc->idx, c->idxmsk))
+ break;
+
+ /* not already used */
+ if (test_bit(hwc->idx, used_mask))
+ break;
+
+ __set_bit(hwc->idx, used_mask);
+ if (assign)
+ assign[i] = hwc->idx;
+ }
+ /* slow path */
+ if (i != n)
+ ret = perf_assign_events(box->event_constraint, n,
+ wmin, wmax, n, assign);
+
+ if (!assign || ret) {
+ for (i = 0; i < n; i++)
+ uncore_put_event_constraint(box, box->event_list[i]);
+ }
+ return ret ? -EINVAL : 0;
+}
+
+static void uncore_pmu_event_start(struct perf_event *event, int flags)
+{
+ struct intel_uncore_box *box = uncore_event_to_box(event);
+ int idx = event->hw.idx;
+
+ if (WARN_ON_ONCE(!(event->hw.state & PERF_HES_STOPPED)))
+ return;
+
+ if (WARN_ON_ONCE(idx == -1 || idx >= UNCORE_PMC_IDX_MAX))
+ return;
+
+ event->hw.state = 0;
+ box->events[idx] = event;
+ box->n_active++;
+ __set_bit(idx, box->active_mask);
+
+ local64_set(&event->hw.prev_count, uncore_read_counter(box, event));
+ uncore_enable_event(box, event);
+
+ if (box->n_active == 1) {
+ uncore_enable_box(box);
+ uncore_pmu_start_hrtimer(box);
+ }
+}
+
+static void uncore_pmu_event_stop(struct perf_event *event, int flags)
+{
+ struct intel_uncore_box *box = uncore_event_to_box(event);
+ struct hw_perf_event *hwc = &event->hw;
+
+ if (__test_and_clear_bit(hwc->idx, box->active_mask)) {
+ uncore_disable_event(box, event);
+ box->n_active--;
+ box->events[hwc->idx] = NULL;
+ WARN_ON_ONCE(hwc->state & PERF_HES_STOPPED);
+ hwc->state |= PERF_HES_STOPPED;
+
+ if (box->n_active == 0) {
+ uncore_disable_box(box);
+ uncore_pmu_cancel_hrtimer(box);
+ }
+ }
+
+ if ((flags & PERF_EF_UPDATE) && !(hwc->state & PERF_HES_UPTODATE)) {
+ /*
+ * Drain the remaining delta count out of a event
+ * that we are disabling:
+ */
+ uncore_perf_event_update(box, event);
+ hwc->state |= PERF_HES_UPTODATE;
+ }
+}
+
+static int uncore_pmu_event_add(struct perf_event *event, int flags)
+{
+ struct intel_uncore_box *box = uncore_event_to_box(event);
+ struct hw_perf_event *hwc = &event->hw;
+ int assign[UNCORE_PMC_IDX_MAX];
+ int i, n, ret;
+
+ if (!box)
+ return -ENODEV;
+
+ ret = n = uncore_collect_events(box, event, false);
+ if (ret < 0)
+ return ret;
+
+ hwc->state = PERF_HES_UPTODATE | PERF_HES_STOPPED;
+ if (!(flags & PERF_EF_START))
+ hwc->state |= PERF_HES_ARCH;
+
+ ret = uncore_assign_events(box, assign, n);
+ if (ret)
+ return ret;
+
+ /* save events moving to new counters */
+ for (i = 0; i < box->n_events; i++) {
+ event = box->event_list[i];
+ hwc = &event->hw;
+
+ if (hwc->idx == assign[i] &&
+ hwc->last_tag == box->tags[assign[i]])
+ continue;
+ /*
+ * Ensure we don't accidentally enable a stopped
+ * counter simply because we rescheduled.
+ */
+ if (hwc->state & PERF_HES_STOPPED)
+ hwc->state |= PERF_HES_ARCH;
+
+ uncore_pmu_event_stop(event, PERF_EF_UPDATE);
+ }
+
+ /* reprogram moved events into new counters */
+ for (i = 0; i < n; i++) {
+ event = box->event_list[i];
+ hwc = &event->hw;
+
+ if (hwc->idx != assign[i] ||
+ hwc->last_tag != box->tags[assign[i]])
+ uncore_assign_hw_event(box, event, assign[i]);
+ else if (i < box->n_events)
+ continue;
+
+ if (hwc->state & PERF_HES_ARCH)
+ continue;
+
+ uncore_pmu_event_start(event, 0);
+ }
+ box->n_events = n;
+
+ return 0;
+}
+
+static void uncore_pmu_event_del(struct perf_event *event, int flags)
+{
+ struct intel_uncore_box *box = uncore_event_to_box(event);
+ int i;
+
+ uncore_pmu_event_stop(event, PERF_EF_UPDATE);
+
+ for (i = 0; i < box->n_events; i++) {
+ if (event == box->event_list[i]) {
+ uncore_put_event_constraint(box, event);
+
+ for (++i; i < box->n_events; i++)
+ box->event_list[i - 1] = box->event_list[i];
+
+ --box->n_events;
+ break;
+ }
+ }
+
+ event->hw.idx = -1;
+ event->hw.last_tag = ~0ULL;
+}
+
+void uncore_pmu_event_read(struct perf_event *event)
+{
+ struct intel_uncore_box *box = uncore_event_to_box(event);
+ uncore_perf_event_update(box, event);
+}
+
+/*
+ * validation ensures the group can be loaded onto the
+ * PMU if it was the only group available.
+ */
+static int uncore_validate_group(struct intel_uncore_pmu *pmu,
+ struct perf_event *event)
+{
+ struct perf_event *leader = event->group_leader;
+ struct intel_uncore_box *fake_box;
+ int ret = -EINVAL, n;
+
+ fake_box = uncore_alloc_box(pmu->type, NUMA_NO_NODE);
+ if (!fake_box)
+ return -ENOMEM;
+
+ fake_box->pmu = pmu;
+ /*
+ * the event is not yet connected with its
+ * siblings therefore we must first collect
+ * existing siblings, then add the new event
+ * before we can simulate the scheduling
+ */
+ n = uncore_collect_events(fake_box, leader, true);
+ if (n < 0)
+ goto out;
+
+ fake_box->n_events = n;
+ n = uncore_collect_events(fake_box, event, false);
+ if (n < 0)
+ goto out;
+
+ fake_box->n_events = n;
+
+ ret = uncore_assign_events(fake_box, NULL, n);
+out:
+ kfree(fake_box);
+ return ret;
+}
+
+static int uncore_pmu_event_init(struct perf_event *event)
+{
+ struct intel_uncore_pmu *pmu;
+ struct intel_uncore_box *box;
+ struct hw_perf_event *hwc = &event->hw;
+ int ret;
+
+ if (event->attr.type != event->pmu->type)
+ return -ENOENT;
+
+ pmu = uncore_event_to_pmu(event);
+ /* no device found for this pmu */
+ if (pmu->func_id < 0)
+ return -ENOENT;
+
+ /*
+ * Uncore PMU does measure at all privilege level all the time.
+ * So it doesn't make sense to specify any exclude bits.
+ */
+ if (event->attr.exclude_user || event->attr.exclude_kernel ||
+ event->attr.exclude_hv || event->attr.exclude_idle)
+ return -EINVAL;
+
+ /* Sampling not supported yet */
+ if (hwc->sample_period)
+ return -EINVAL;
+
+ /*
+ * Place all uncore events for a particular physical package
+ * onto a single cpu
+ */
+ if (event->cpu < 0)
+ return -EINVAL;
+ box = uncore_pmu_to_box(pmu, event->cpu);
+ if (!box || box->cpu < 0)
+ return -EINVAL;
+ event->cpu = box->cpu;
+ event->pmu_private = box;
+
+ event->hw.idx = -1;
+ event->hw.last_tag = ~0ULL;
+ event->hw.extra_reg.idx = EXTRA_REG_NONE;
+ event->hw.branch_reg.idx = EXTRA_REG_NONE;
+
+ if (event->attr.config == UNCORE_FIXED_EVENT) {
+ /* no fixed counter */
+ if (!pmu->type->fixed_ctl)
+ return -EINVAL;
+ /*
+ * if there is only one fixed counter, only the first pmu
+ * can access the fixed counter
+ */
+ if (pmu->type->single_fixed && pmu->pmu_idx > 0)
+ return -EINVAL;
+
+ /* fixed counters have event field hardcoded to zero */
+ hwc->config = 0ULL;
+ } else {
+ hwc->config = event->attr.config & pmu->type->event_mask;
+ if (pmu->type->ops->hw_config) {
+ ret = pmu->type->ops->hw_config(box, event);
+ if (ret)
+ return ret;
+ }
+ }
+
+ if (event->group_leader != event)
+ ret = uncore_validate_group(pmu, event);
+ else
+ ret = 0;
+
+ return ret;
+}
+
+static ssize_t uncore_get_attr_cpumask(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ return cpumap_print_to_pagebuf(true, buf, &uncore_cpu_mask);
+}
+
+static DEVICE_ATTR(cpumask, S_IRUGO, uncore_get_attr_cpumask, NULL);
+
+static struct attribute *uncore_pmu_attrs[] = {
+ &dev_attr_cpumask.attr,
+ NULL,
+};
+
+static struct attribute_group uncore_pmu_attr_group = {
+ .attrs = uncore_pmu_attrs,
+};
+
+static int uncore_pmu_register(struct intel_uncore_pmu *pmu)
+{
+ int ret;
+
+ if (!pmu->type->pmu) {
+ pmu->pmu = (struct pmu) {
+ .attr_groups = pmu->type->attr_groups,
+ .task_ctx_nr = perf_invalid_context,
+ .event_init = uncore_pmu_event_init,
+ .add = uncore_pmu_event_add,
+ .del = uncore_pmu_event_del,
+ .start = uncore_pmu_event_start,
+ .stop = uncore_pmu_event_stop,
+ .read = uncore_pmu_event_read,
+ };
+ } else {
+ pmu->pmu = *pmu->type->pmu;
+ pmu->pmu.attr_groups = pmu->type->attr_groups;
+ }
+
+ if (pmu->type->num_boxes == 1) {
+ if (strlen(pmu->type->name) > 0)
+ sprintf(pmu->name, "uncore_%s", pmu->type->name);
+ else
+ sprintf(pmu->name, "uncore");
+ } else {
+ sprintf(pmu->name, "uncore_%s_%d", pmu->type->name,
+ pmu->pmu_idx);
+ }
+
+ ret = perf_pmu_register(&pmu->pmu, pmu->name, -1);
+ if (!ret)
+ pmu->registered = true;
+ return ret;
+}
+
+static void uncore_pmu_unregister(struct intel_uncore_pmu *pmu)
+{
+ if (!pmu->registered)
+ return;
+ perf_pmu_unregister(&pmu->pmu);
+ pmu->registered = false;
+}
+
+static void __init __uncore_exit_boxes(struct intel_uncore_type *type, int cpu)
+{
+ struct intel_uncore_pmu *pmu = type->pmus;
+ struct intel_uncore_box *box;
+ int i, pkg;
+
+ if (pmu) {
+ pkg = topology_physical_package_id(cpu);
+ for (i = 0; i < type->num_boxes; i++, pmu++) {
+ box = pmu->boxes[pkg];
+ if (box)
+ uncore_box_exit(box);
+ }
+ }
+}
+
+static void __init uncore_exit_boxes(void *dummy)
+{
+ struct intel_uncore_type **types;
+
+ for (types = uncore_msr_uncores; *types; types++)
+ __uncore_exit_boxes(*types++, smp_processor_id());
+}
+
+static void uncore_free_boxes(struct intel_uncore_pmu *pmu)
+{
+ int pkg;
+
+ for (pkg = 0; pkg < max_packages; pkg++)
+ kfree(pmu->boxes[pkg]);
+ kfree(pmu->boxes);
+}
+
+static void __init uncore_type_exit(struct intel_uncore_type *type)
+{
+ struct intel_uncore_pmu *pmu = type->pmus;
+ int i;
+
+ if (pmu) {
+ for (i = 0; i < type->num_boxes; i++, pmu++) {
+ uncore_pmu_unregister(pmu);
+ uncore_free_boxes(pmu);
+ }
+ kfree(type->pmus);
+ type->pmus = NULL;
+ }
+ kfree(type->events_group);
+ type->events_group = NULL;
+}
+
+static void __init uncore_types_exit(struct intel_uncore_type **types)
+{
+ for (; *types; types++)
+ uncore_type_exit(*types);
+}
+
+static int __init uncore_type_init(struct intel_uncore_type *type, bool setid)
+{
+ struct intel_uncore_pmu *pmus;
+ struct attribute_group *attr_group;
+ struct attribute **attrs;
+ size_t size;
+ int i, j;
+
+ pmus = kzalloc(sizeof(*pmus) * type->num_boxes, GFP_KERNEL);
+ if (!pmus)
+ return -ENOMEM;
+
+ size = max_packages * sizeof(struct intel_uncore_box *);
+
+ for (i = 0; i < type->num_boxes; i++) {
+ pmus[i].func_id = setid ? i : -1;
+ pmus[i].pmu_idx = i;
+ pmus[i].type = type;
+ pmus[i].boxes = kzalloc(size, GFP_KERNEL);
+ if (!pmus[i].boxes)
+ return -ENOMEM;
+ }
+
+ type->pmus = pmus;
+ type->unconstrainted = (struct event_constraint)
+ __EVENT_CONSTRAINT(0, (1ULL << type->num_counters) - 1,
+ 0, type->num_counters, 0, 0);
+
+ if (type->event_descs) {
+ for (i = 0; type->event_descs[i].attr.attr.name; i++);
+
+ attr_group = kzalloc(sizeof(struct attribute *) * (i + 1) +
+ sizeof(*attr_group), GFP_KERNEL);
+ if (!attr_group)
+ return -ENOMEM;
+
+ attrs = (struct attribute **)(attr_group + 1);
+ attr_group->name = "events";
+ attr_group->attrs = attrs;
+
+ for (j = 0; j < i; j++)
+ attrs[j] = &type->event_descs[j].attr.attr;
+
+ type->events_group = attr_group;
+ }
+
+ type->pmu_group = &uncore_pmu_attr_group;
+ return 0;
+}
+
+static int __init
+uncore_types_init(struct intel_uncore_type **types, bool setid)
+{
+ int ret;
+
+ for (; *types; types++) {
+ ret = uncore_type_init(*types, setid);
+ if (ret)
+ return ret;
+ }
+ return 0;
+}
+
+/*
+ * add a pci uncore device
+ */
+static int uncore_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
+{
+ struct intel_uncore_type *type;
+ struct intel_uncore_pmu *pmu;
+ struct intel_uncore_box *box;
+ int phys_id, pkg, ret;
+
+ phys_id = uncore_pcibus_to_physid(pdev->bus);
+ if (phys_id < 0)
+ return -ENODEV;
+
+ pkg = topology_phys_to_logical_pkg(phys_id);
+ if (pkg < 0)
+ return -EINVAL;
+
+ if (UNCORE_PCI_DEV_TYPE(id->driver_data) == UNCORE_EXTRA_PCI_DEV) {
+ int idx = UNCORE_PCI_DEV_IDX(id->driver_data);
+
+ uncore_extra_pci_dev[pkg].dev[idx] = pdev;
+ pci_set_drvdata(pdev, NULL);
+ return 0;
+ }
+
+ type = uncore_pci_uncores[UNCORE_PCI_DEV_TYPE(id->driver_data)];
+ /*
+ * for performance monitoring unit with multiple boxes,
+ * each box has a different function id.
+ */
+ pmu = &type->pmus[UNCORE_PCI_DEV_IDX(id->driver_data)];
+ /* Knights Landing uses a common PCI device ID for multiple instances of
+ * an uncore PMU device type. There is only one entry per device type in
+ * the knl_uncore_pci_ids table inspite of multiple devices present for
+ * some device types. Hence PCI device idx would be 0 for all devices.
+ * So increment pmu pointer to point to an unused array element.
+ */
+ if (boot_cpu_data.x86_model == 87) {
+ while (pmu->func_id >= 0)
+ pmu++;
+ }
+
+ if (WARN_ON_ONCE(pmu->boxes[pkg] != NULL))
+ return -EINVAL;
+
+ box = uncore_alloc_box(type, NUMA_NO_NODE);
+ if (!box)
+ return -ENOMEM;
+
+ if (pmu->func_id < 0)
+ pmu->func_id = pdev->devfn;
+ else
+ WARN_ON_ONCE(pmu->func_id != pdev->devfn);
+
+ atomic_inc(&box->refcnt);
+ box->pci_phys_id = phys_id;
+ box->pkgid = pkg;
+ box->pci_dev = pdev;
+ box->pmu = pmu;
+ uncore_box_init(box);
+ pci_set_drvdata(pdev, box);
+
+ pmu->boxes[pkg] = box;
+ if (atomic_inc_return(&pmu->activeboxes) > 1)
+ return 0;
+
+ /* First active box registers the pmu */
+ ret = uncore_pmu_register(pmu);
+ if (ret) {
+ pci_set_drvdata(pdev, NULL);
+ pmu->boxes[pkg] = NULL;
+ uncore_box_exit(box);
+ kfree(box);
+ }
+ return ret;
+}
+
+static void uncore_pci_remove(struct pci_dev *pdev)
+{
+ struct intel_uncore_box *box = pci_get_drvdata(pdev);
+ struct intel_uncore_pmu *pmu;
+ int i, phys_id, pkg;
+
+ phys_id = uncore_pcibus_to_physid(pdev->bus);
+ pkg = topology_phys_to_logical_pkg(phys_id);
+
+ box = pci_get_drvdata(pdev);
+ if (!box) {
+ for (i = 0; i < UNCORE_EXTRA_PCI_DEV_MAX; i++) {
+ if (uncore_extra_pci_dev[pkg].dev[i] == pdev) {
+ uncore_extra_pci_dev[pkg].dev[i] = NULL;
+ break;
+ }
+ }
+ WARN_ON_ONCE(i >= UNCORE_EXTRA_PCI_DEV_MAX);
+ return;
+ }
+
+ pmu = box->pmu;
+ if (WARN_ON_ONCE(phys_id != box->pci_phys_id))
+ return;
+
+ pci_set_drvdata(pdev, NULL);
+ pmu->boxes[pkg] = NULL;
+ if (atomic_dec_return(&pmu->activeboxes) == 0)
+ uncore_pmu_unregister(pmu);
+ uncore_box_exit(box);
+ kfree(box);
+}
+
+static int __init uncore_pci_init(void)
+{
+ size_t size;
+ int ret;
+
+ switch (boot_cpu_data.x86_model) {
+ case 45: /* Sandy Bridge-EP */
+ ret = snbep_uncore_pci_init();
+ break;
+ case 62: /* Ivy Bridge-EP */
+ ret = ivbep_uncore_pci_init();
+ break;
+ case 63: /* Haswell-EP */
+ ret = hswep_uncore_pci_init();
+ break;
+ case 79: /* BDX-EP */
+ case 86: /* BDX-DE */
+ ret = bdx_uncore_pci_init();
+ break;
+ case 42: /* Sandy Bridge */
+ ret = snb_uncore_pci_init();
+ break;
+ case 58: /* Ivy Bridge */
+ ret = ivb_uncore_pci_init();
+ break;
+ case 60: /* Haswell */
+ case 69: /* Haswell Celeron */
+ ret = hsw_uncore_pci_init();
+ break;
+ case 61: /* Broadwell */
+ ret = bdw_uncore_pci_init();
+ break;
+ case 87: /* Knights Landing */
+ ret = knl_uncore_pci_init();
+ break;
+ case 94: /* SkyLake */
+ ret = skl_uncore_pci_init();
+ break;
+ default:
+ return -ENODEV;
+ }
+
+ if (ret)
+ return ret;
+
+ size = max_packages * sizeof(struct pci_extra_dev);
+ uncore_extra_pci_dev = kzalloc(size, GFP_KERNEL);
+ if (!uncore_extra_pci_dev) {
+ ret = -ENOMEM;
+ goto err;
+ }
+
+ ret = uncore_types_init(uncore_pci_uncores, false);
+ if (ret)
+ goto errtype;
+
+ uncore_pci_driver->probe = uncore_pci_probe;
+ uncore_pci_driver->remove = uncore_pci_remove;
+
+ ret = pci_register_driver(uncore_pci_driver);
+ if (ret)
+ goto errtype;
+
+ pcidrv_registered = true;
+ return 0;
+
+errtype:
+ uncore_types_exit(uncore_pci_uncores);
+ kfree(uncore_extra_pci_dev);
+ uncore_extra_pci_dev = NULL;
+ uncore_free_pcibus_map();
+err:
+ uncore_pci_uncores = empty_uncore;
+ return ret;
+}
+
+static void __init uncore_pci_exit(void)
+{
+ if (pcidrv_registered) {
+ pcidrv_registered = false;
+ pci_unregister_driver(uncore_pci_driver);
+ uncore_types_exit(uncore_pci_uncores);
+ kfree(uncore_extra_pci_dev);
+ uncore_free_pcibus_map();
+ }
+}
+
+static void uncore_cpu_dying(int cpu)
+{
+ struct intel_uncore_type *type, **types = uncore_msr_uncores;
+ struct intel_uncore_pmu *pmu;
+ struct intel_uncore_box *box;
+ int i, pkg;
+
+ pkg = topology_logical_package_id(cpu);
+ for (; *types; types++) {
+ type = *types;
+ pmu = type->pmus;
+ for (i = 0; i < type->num_boxes; i++, pmu++) {
+ box = pmu->boxes[pkg];
+ if (box && atomic_dec_return(&box->refcnt) == 0)
+ uncore_box_exit(box);
+ }
+ }
+}
+
+static void uncore_cpu_starting(int cpu, bool init)
+{
+ struct intel_uncore_type *type, **types = uncore_msr_uncores;
+ struct intel_uncore_pmu *pmu;
+ struct intel_uncore_box *box;
+ int i, pkg, ncpus = 1;
+
+ if (init) {
+ /*
+ * On init we get the number of online cpus in the package
+ * and set refcount for all of them.
+ */
+ ncpus = cpumask_weight(topology_core_cpumask(cpu));
+ }
+
+ pkg = topology_logical_package_id(cpu);
+ for (; *types; types++) {
+ type = *types;
+ pmu = type->pmus;
+ for (i = 0; i < type->num_boxes; i++, pmu++) {
+ box = pmu->boxes[pkg];
+ if (!box)
+ continue;
+ /* The first cpu on a package activates the box */
+ if (atomic_add_return(ncpus, &box->refcnt) == ncpus)
+ uncore_box_init(box);
+ }
+ }
+}
+
+static int uncore_cpu_prepare(int cpu)
+{
+ struct intel_uncore_type *type, **types = uncore_msr_uncores;
+ struct intel_uncore_pmu *pmu;
+ struct intel_uncore_box *box;
+ int i, pkg;
+
+ pkg = topology_logical_package_id(cpu);
+ for (; *types; types++) {
+ type = *types;
+ pmu = type->pmus;
+ for (i = 0; i < type->num_boxes; i++, pmu++) {
+ if (pmu->boxes[pkg])
+ continue;
+ /* First cpu of a package allocates the box */
+ box = uncore_alloc_box(type, cpu_to_node(cpu));
+ if (!box)
+ return -ENOMEM;
+ box->pmu = pmu;
+ box->pkgid = pkg;
+ pmu->boxes[pkg] = box;
+ }
+ }
+ return 0;
+}
+
+static void uncore_change_type_ctx(struct intel_uncore_type *type, int old_cpu,
+ int new_cpu)
+{
+ struct intel_uncore_pmu *pmu = type->pmus;
+ struct intel_uncore_box *box;
+ int i, pkg;
+
+ pkg = topology_logical_package_id(old_cpu < 0 ? new_cpu : old_cpu);
+ for (i = 0; i < type->num_boxes; i++, pmu++) {
+ box = pmu->boxes[pkg];
+ if (!box)
+ continue;
+
+ if (old_cpu < 0) {
+ WARN_ON_ONCE(box->cpu != -1);
+ box->cpu = new_cpu;
+ continue;
+ }
+
+ WARN_ON_ONCE(box->cpu != old_cpu);
+ box->cpu = -1;
+ if (new_cpu < 0)
+ continue;
+
+ uncore_pmu_cancel_hrtimer(box);
+ perf_pmu_migrate_context(&pmu->pmu, old_cpu, new_cpu);
+ box->cpu = new_cpu;
+ }
+}
+
+static void uncore_change_context(struct intel_uncore_type **uncores,
+ int old_cpu, int new_cpu)
+{
+ for (; *uncores; uncores++)
+ uncore_change_type_ctx(*uncores, old_cpu, new_cpu);
+}
+
+static void uncore_event_exit_cpu(int cpu)
+{
+ int target;
+
+ /* Check if exiting cpu is used for collecting uncore events */
+ if (!cpumask_test_and_clear_cpu(cpu, &uncore_cpu_mask))
+ return;
+
+ /* Find a new cpu to collect uncore events */
+ target = cpumask_any_but(topology_core_cpumask(cpu), cpu);
+
+ /* Migrate uncore events to the new target */
+ if (target < nr_cpu_ids)
+ cpumask_set_cpu(target, &uncore_cpu_mask);
+ else
+ target = -1;
+
+ uncore_change_context(uncore_msr_uncores, cpu, target);
+ uncore_change_context(uncore_pci_uncores, cpu, target);
+}
+
+static void uncore_event_init_cpu(int cpu)
+{
+ int target;
+
+ /*
+ * Check if there is an online cpu in the package
+ * which collects uncore events already.
+ */
+ target = cpumask_any_and(&uncore_cpu_mask, topology_core_cpumask(cpu));
+ if (target < nr_cpu_ids)
+ return;
+
+ cpumask_set_cpu(cpu, &uncore_cpu_mask);
+
+ uncore_change_context(uncore_msr_uncores, -1, cpu);
+ uncore_change_context(uncore_pci_uncores, -1, cpu);
+}
+
+static int uncore_cpu_notifier(struct notifier_block *self,
+ unsigned long action, void *hcpu)
+{
+ unsigned int cpu = (long)hcpu;
+
+ switch (action & ~CPU_TASKS_FROZEN) {
+ case CPU_UP_PREPARE:
+ return notifier_from_errno(uncore_cpu_prepare(cpu));
+
+ case CPU_STARTING:
+ uncore_cpu_starting(cpu, false);
+ case CPU_DOWN_FAILED:
+ uncore_event_init_cpu(cpu);
+ break;
+
+ case CPU_UP_CANCELED:
+ case CPU_DYING:
+ uncore_cpu_dying(cpu);
+ break;
+
+ case CPU_DOWN_PREPARE:
+ uncore_event_exit_cpu(cpu);
+ break;
+ }
+ return NOTIFY_OK;
+}
+
+static struct notifier_block uncore_cpu_nb = {
+ .notifier_call = uncore_cpu_notifier,
+ /*
+ * to migrate uncore events, our notifier should be executed
+ * before perf core's notifier.
+ */
+ .priority = CPU_PRI_PERF + 1,
+};
+
+static int __init type_pmu_register(struct intel_uncore_type *type)
+{
+ int i, ret;
+
+ for (i = 0; i < type->num_boxes; i++) {
+ ret = uncore_pmu_register(&type->pmus[i]);
+ if (ret)
+ return ret;
+ }
+ return 0;
+}
+
+static int __init uncore_msr_pmus_register(void)
+{
+ struct intel_uncore_type **types = uncore_msr_uncores;
+ int ret;
+
+ for (; *types; types++) {
+ ret = type_pmu_register(*types);
+ if (ret)
+ return ret;
+ }
+ return 0;
+}
+
+static int __init uncore_cpu_init(void)
+{
+ int ret;
+
+ switch (boot_cpu_data.x86_model) {
+ case 26: /* Nehalem */
+ case 30:
+ case 37: /* Westmere */
+ case 44:
+ nhm_uncore_cpu_init();
+ break;
+ case 42: /* Sandy Bridge */
+ case 58: /* Ivy Bridge */
+ case 60: /* Haswell */
+ case 69: /* Haswell */
+ case 70: /* Haswell */
+ case 61: /* Broadwell */
+ case 71: /* Broadwell */
+ snb_uncore_cpu_init();
+ break;
+ case 45: /* Sandy Bridge-EP */
+ snbep_uncore_cpu_init();
+ break;
+ case 46: /* Nehalem-EX */
+ case 47: /* Westmere-EX aka. Xeon E7 */
+ nhmex_uncore_cpu_init();
+ break;
+ case 62: /* Ivy Bridge-EP */
+ ivbep_uncore_cpu_init();
+ break;
+ case 63: /* Haswell-EP */
+ hswep_uncore_cpu_init();
+ break;
+ case 79: /* BDX-EP */
+ case 86: /* BDX-DE */
+ bdx_uncore_cpu_init();
+ break;
+ case 87: /* Knights Landing */
+ knl_uncore_cpu_init();
+ break;
+ default:
+ return -ENODEV;
+ }
+
+ ret = uncore_types_init(uncore_msr_uncores, true);
+ if (ret)
+ goto err;
+
+ ret = uncore_msr_pmus_register();
+ if (ret)
+ goto err;
+ return 0;
+err:
+ uncore_types_exit(uncore_msr_uncores);
+ uncore_msr_uncores = empty_uncore;
+ return ret;
+}
+
+static void __init uncore_cpu_setup(void *dummy)
+{
+ uncore_cpu_starting(smp_processor_id(), true);
+}
+
+/* Lazy to avoid allocation of a few bytes for the normal case */
+static __initdata DECLARE_BITMAP(packages, MAX_LOCAL_APIC);
+
+static int __init uncore_cpumask_init(bool msr)
+{
+ unsigned int cpu;
+
+ for_each_online_cpu(cpu) {
+ unsigned int pkg = topology_logical_package_id(cpu);
+ int ret;
+
+ if (test_and_set_bit(pkg, packages))
+ continue;
+ /*
+ * The first online cpu of each package allocates and takes
+ * the refcounts for all other online cpus in that package.
+ * If msrs are not enabled no allocation is required.
+ */
+ if (msr) {
+ ret = uncore_cpu_prepare(cpu);
+ if (ret)
+ return ret;
+ }
+ uncore_event_init_cpu(cpu);
+ smp_call_function_single(cpu, uncore_cpu_setup, NULL, 1);
+ }
+ __register_cpu_notifier(&uncore_cpu_nb);
+ return 0;
+}
+
+static int __init intel_uncore_init(void)
+{
+ int pret, cret, ret;
+
+ if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL)
+ return -ENODEV;
+
+ if (cpu_has_hypervisor)
+ return -ENODEV;
+
+ max_packages = topology_max_packages();
+
+ pret = uncore_pci_init();
+ cret = uncore_cpu_init();
+
+ if (cret && pret)
+ return -ENODEV;
+
+ cpu_notifier_register_begin();
+ ret = uncore_cpumask_init(!cret);
+ if (ret)
+ goto err;
+ cpu_notifier_register_done();
+ return 0;
+
+err:
+ /* Undo box->init_box() */
+ on_each_cpu_mask(&uncore_cpu_mask, uncore_exit_boxes, NULL, 1);
+ uncore_types_exit(uncore_msr_uncores);
+ uncore_pci_exit();
+ cpu_notifier_register_done();
+ return ret;
+}
+device_initcall(intel_uncore_init);
diff --git a/arch/x86/events/intel/uncore.h b/arch/x86/events/intel/uncore.h
new file mode 100644
index 000000000..79766b9a3
--- /dev/null
+++ b/arch/x86/events/intel/uncore.h
@@ -0,0 +1,378 @@
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/pci.h>
+#include <asm/apicdef.h>
+
+#include <linux/perf_event.h>
+#include "../perf_event.h"
+
+#define UNCORE_PMU_NAME_LEN 32
+#define UNCORE_PMU_HRTIMER_INTERVAL (60LL * NSEC_PER_SEC)
+#define UNCORE_SNB_IMC_HRTIMER_INTERVAL (5ULL * NSEC_PER_SEC)
+
+#define UNCORE_FIXED_EVENT 0xff
+#define UNCORE_PMC_IDX_MAX_GENERIC 8
+#define UNCORE_PMC_IDX_FIXED UNCORE_PMC_IDX_MAX_GENERIC
+#define UNCORE_PMC_IDX_MAX (UNCORE_PMC_IDX_FIXED + 1)
+
+#define UNCORE_PCI_DEV_DATA(type, idx) ((type << 8) | idx)
+#define UNCORE_PCI_DEV_TYPE(data) ((data >> 8) & 0xff)
+#define UNCORE_PCI_DEV_IDX(data) (data & 0xff)
+#define UNCORE_EXTRA_PCI_DEV 0xff
+#define UNCORE_EXTRA_PCI_DEV_MAX 3
+
+#define UNCORE_EVENT_CONSTRAINT(c, n) EVENT_CONSTRAINT(c, n, 0xff)
+
+struct pci_extra_dev {
+ struct pci_dev *dev[UNCORE_EXTRA_PCI_DEV_MAX];
+};
+
+struct intel_uncore_ops;
+struct intel_uncore_pmu;
+struct intel_uncore_box;
+struct uncore_event_desc;
+
+struct intel_uncore_type {
+ const char *name;
+ int num_counters;
+ int num_boxes;
+ int perf_ctr_bits;
+ int fixed_ctr_bits;
+ unsigned perf_ctr;
+ unsigned event_ctl;
+ unsigned event_mask;
+ unsigned fixed_ctr;
+ unsigned fixed_ctl;
+ unsigned box_ctl;
+ unsigned msr_offset;
+ unsigned num_shared_regs:8;
+ unsigned single_fixed:1;
+ unsigned pair_ctr_ctl:1;
+ unsigned *msr_offsets;
+ struct event_constraint unconstrainted;
+ struct event_constraint *constraints;
+ struct intel_uncore_pmu *pmus;
+ struct intel_uncore_ops *ops;
+ struct uncore_event_desc *event_descs;
+ const struct attribute_group *attr_groups[4];
+ struct pmu *pmu; /* for custom pmu ops */
+};
+
+#define pmu_group attr_groups[0]
+#define format_group attr_groups[1]
+#define events_group attr_groups[2]
+
+struct intel_uncore_ops {
+ void (*init_box)(struct intel_uncore_box *);
+ void (*exit_box)(struct intel_uncore_box *);
+ void (*disable_box)(struct intel_uncore_box *);
+ void (*enable_box)(struct intel_uncore_box *);
+ void (*disable_event)(struct intel_uncore_box *, struct perf_event *);
+ void (*enable_event)(struct intel_uncore_box *, struct perf_event *);
+ u64 (*read_counter)(struct intel_uncore_box *, struct perf_event *);
+ int (*hw_config)(struct intel_uncore_box *, struct perf_event *);
+ struct event_constraint *(*get_constraint)(struct intel_uncore_box *,
+ struct perf_event *);
+ void (*put_constraint)(struct intel_uncore_box *, struct perf_event *);
+};
+
+struct intel_uncore_pmu {
+ struct pmu pmu;
+ char name[UNCORE_PMU_NAME_LEN];
+ int pmu_idx;
+ int func_id;
+ bool registered;
+ atomic_t activeboxes;
+ struct intel_uncore_type *type;
+ struct intel_uncore_box **boxes;
+};
+
+struct intel_uncore_extra_reg {
+ raw_spinlock_t lock;
+ u64 config, config1, config2;
+ atomic_t ref;
+};
+
+struct intel_uncore_box {
+ int pci_phys_id;
+ int pkgid;
+ int n_active; /* number of active events */
+ int n_events;
+ int cpu; /* cpu to collect events */
+ unsigned long flags;
+ atomic_t refcnt;
+ struct perf_event *events[UNCORE_PMC_IDX_MAX];
+ struct perf_event *event_list[UNCORE_PMC_IDX_MAX];
+ struct event_constraint *event_constraint[UNCORE_PMC_IDX_MAX];
+ unsigned long active_mask[BITS_TO_LONGS(UNCORE_PMC_IDX_MAX)];
+ u64 tags[UNCORE_PMC_IDX_MAX];
+ struct pci_dev *pci_dev;
+ struct intel_uncore_pmu *pmu;
+ u64 hrtimer_duration; /* hrtimer timeout for this box */
+ struct hrtimer hrtimer;
+ struct list_head list;
+ struct list_head active_list;
+ void *io_addr;
+ struct intel_uncore_extra_reg shared_regs[0];
+};
+
+#define UNCORE_BOX_FLAG_INITIATED 0
+
+struct uncore_event_desc {
+ struct kobj_attribute attr;
+ const char *config;
+};
+
+struct pci2phy_map {
+ struct list_head list;
+ int segment;
+ int pbus_to_physid[256];
+};
+
+struct pci2phy_map *__find_pci2phy_map(int segment);
+
+ssize_t uncore_event_show(struct kobject *kobj,
+ struct kobj_attribute *attr, char *buf);
+
+#define INTEL_UNCORE_EVENT_DESC(_name, _config) \
+{ \
+ .attr = __ATTR(_name, 0444, uncore_event_show, NULL), \
+ .config = _config, \
+}
+
+#define DEFINE_UNCORE_FORMAT_ATTR(_var, _name, _format) \
+static ssize_t __uncore_##_var##_show(struct kobject *kobj, \
+ struct kobj_attribute *attr, \
+ char *page) \
+{ \
+ BUILD_BUG_ON(sizeof(_format) >= PAGE_SIZE); \
+ return sprintf(page, _format "\n"); \
+} \
+static struct kobj_attribute format_attr_##_var = \
+ __ATTR(_name, 0444, __uncore_##_var##_show, NULL)
+
+static inline unsigned uncore_pci_box_ctl(struct intel_uncore_box *box)
+{
+ return box->pmu->type->box_ctl;
+}
+
+static inline unsigned uncore_pci_fixed_ctl(struct intel_uncore_box *box)
+{
+ return box->pmu->type->fixed_ctl;
+}
+
+static inline unsigned uncore_pci_fixed_ctr(struct intel_uncore_box *box)
+{
+ return box->pmu->type->fixed_ctr;
+}
+
+static inline
+unsigned uncore_pci_event_ctl(struct intel_uncore_box *box, int idx)
+{
+ return idx * 4 + box->pmu->type->event_ctl;
+}
+
+static inline
+unsigned uncore_pci_perf_ctr(struct intel_uncore_box *box, int idx)
+{
+ return idx * 8 + box->pmu->type->perf_ctr;
+}
+
+static inline unsigned uncore_msr_box_offset(struct intel_uncore_box *box)
+{
+ struct intel_uncore_pmu *pmu = box->pmu;
+ return pmu->type->msr_offsets ?
+ pmu->type->msr_offsets[pmu->pmu_idx] :
+ pmu->type->msr_offset * pmu->pmu_idx;
+}
+
+static inline unsigned uncore_msr_box_ctl(struct intel_uncore_box *box)
+{
+ if (!box->pmu->type->box_ctl)
+ return 0;
+ return box->pmu->type->box_ctl + uncore_msr_box_offset(box);
+}
+
+static inline unsigned uncore_msr_fixed_ctl(struct intel_uncore_box *box)
+{
+ if (!box->pmu->type->fixed_ctl)
+ return 0;
+ return box->pmu->type->fixed_ctl + uncore_msr_box_offset(box);
+}
+
+static inline unsigned uncore_msr_fixed_ctr(struct intel_uncore_box *box)
+{
+ return box->pmu->type->fixed_ctr + uncore_msr_box_offset(box);
+}
+
+static inline
+unsigned uncore_msr_event_ctl(struct intel_uncore_box *box, int idx)
+{
+ return box->pmu->type->event_ctl +
+ (box->pmu->type->pair_ctr_ctl ? 2 * idx : idx) +
+ uncore_msr_box_offset(box);
+}
+
+static inline
+unsigned uncore_msr_perf_ctr(struct intel_uncore_box *box, int idx)
+{
+ return box->pmu->type->perf_ctr +
+ (box->pmu->type->pair_ctr_ctl ? 2 * idx : idx) +
+ uncore_msr_box_offset(box);
+}
+
+static inline
+unsigned uncore_fixed_ctl(struct intel_uncore_box *box)
+{
+ if (box->pci_dev)
+ return uncore_pci_fixed_ctl(box);
+ else
+ return uncore_msr_fixed_ctl(box);
+}
+
+static inline
+unsigned uncore_fixed_ctr(struct intel_uncore_box *box)
+{
+ if (box->pci_dev)
+ return uncore_pci_fixed_ctr(box);
+ else
+ return uncore_msr_fixed_ctr(box);
+}
+
+static inline
+unsigned uncore_event_ctl(struct intel_uncore_box *box, int idx)
+{
+ if (box->pci_dev)
+ return uncore_pci_event_ctl(box, idx);
+ else
+ return uncore_msr_event_ctl(box, idx);
+}
+
+static inline
+unsigned uncore_perf_ctr(struct intel_uncore_box *box, int idx)
+{
+ if (box->pci_dev)
+ return uncore_pci_perf_ctr(box, idx);
+ else
+ return uncore_msr_perf_ctr(box, idx);
+}
+
+static inline int uncore_perf_ctr_bits(struct intel_uncore_box *box)
+{
+ return box->pmu->type->perf_ctr_bits;
+}
+
+static inline int uncore_fixed_ctr_bits(struct intel_uncore_box *box)
+{
+ return box->pmu->type->fixed_ctr_bits;
+}
+
+static inline int uncore_num_counters(struct intel_uncore_box *box)
+{
+ return box->pmu->type->num_counters;
+}
+
+static inline void uncore_disable_box(struct intel_uncore_box *box)
+{
+ if (box->pmu->type->ops->disable_box)
+ box->pmu->type->ops->disable_box(box);
+}
+
+static inline void uncore_enable_box(struct intel_uncore_box *box)
+{
+ if (box->pmu->type->ops->enable_box)
+ box->pmu->type->ops->enable_box(box);
+}
+
+static inline void uncore_disable_event(struct intel_uncore_box *box,
+ struct perf_event *event)
+{
+ box->pmu->type->ops->disable_event(box, event);
+}
+
+static inline void uncore_enable_event(struct intel_uncore_box *box,
+ struct perf_event *event)
+{
+ box->pmu->type->ops->enable_event(box, event);
+}
+
+static inline u64 uncore_read_counter(struct intel_uncore_box *box,
+ struct perf_event *event)
+{
+ return box->pmu->type->ops->read_counter(box, event);
+}
+
+static inline void uncore_box_init(struct intel_uncore_box *box)
+{
+ if (!test_and_set_bit(UNCORE_BOX_FLAG_INITIATED, &box->flags)) {
+ if (box->pmu->type->ops->init_box)
+ box->pmu->type->ops->init_box(box);
+ }
+}
+
+static inline void uncore_box_exit(struct intel_uncore_box *box)
+{
+ if (test_and_clear_bit(UNCORE_BOX_FLAG_INITIATED, &box->flags)) {
+ if (box->pmu->type->ops->exit_box)
+ box->pmu->type->ops->exit_box(box);
+ }
+}
+
+static inline bool uncore_box_is_fake(struct intel_uncore_box *box)
+{
+ return (box->pkgid < 0);
+}
+
+static inline struct intel_uncore_pmu *uncore_event_to_pmu(struct perf_event *event)
+{
+ return container_of(event->pmu, struct intel_uncore_pmu, pmu);
+}
+
+static inline struct intel_uncore_box *uncore_event_to_box(struct perf_event *event)
+{
+ return event->pmu_private;
+}
+
+struct intel_uncore_box *uncore_pmu_to_box(struct intel_uncore_pmu *pmu, int cpu);
+u64 uncore_msr_read_counter(struct intel_uncore_box *box, struct perf_event *event);
+void uncore_pmu_start_hrtimer(struct intel_uncore_box *box);
+void uncore_pmu_cancel_hrtimer(struct intel_uncore_box *box);
+void uncore_pmu_event_read(struct perf_event *event);
+void uncore_perf_event_update(struct intel_uncore_box *box, struct perf_event *event);
+struct event_constraint *
+uncore_get_constraint(struct intel_uncore_box *box, struct perf_event *event);
+void uncore_put_constraint(struct intel_uncore_box *box, struct perf_event *event);
+u64 uncore_shared_reg_config(struct intel_uncore_box *box, int idx);
+
+extern struct intel_uncore_type **uncore_msr_uncores;
+extern struct intel_uncore_type **uncore_pci_uncores;
+extern struct pci_driver *uncore_pci_driver;
+extern raw_spinlock_t pci2phy_map_lock;
+extern struct list_head pci2phy_map_head;
+extern struct pci_extra_dev *uncore_extra_pci_dev;
+extern struct event_constraint uncore_constraint_empty;
+
+/* perf_event_intel_uncore_snb.c */
+int snb_uncore_pci_init(void);
+int ivb_uncore_pci_init(void);
+int hsw_uncore_pci_init(void);
+int bdw_uncore_pci_init(void);
+int skl_uncore_pci_init(void);
+void snb_uncore_cpu_init(void);
+void nhm_uncore_cpu_init(void);
+int snb_pci2phy_map_init(int devid);
+
+/* perf_event_intel_uncore_snbep.c */
+int snbep_uncore_pci_init(void);
+void snbep_uncore_cpu_init(void);
+int ivbep_uncore_pci_init(void);
+void ivbep_uncore_cpu_init(void);
+int hswep_uncore_pci_init(void);
+void hswep_uncore_cpu_init(void);
+int bdx_uncore_pci_init(void);
+void bdx_uncore_cpu_init(void);
+int knl_uncore_pci_init(void);
+void knl_uncore_cpu_init(void);
+
+/* perf_event_intel_uncore_nhmex.c */
+void nhmex_uncore_cpu_init(void);
diff --git a/arch/x86/events/intel/uncore_nhmex.c b/arch/x86/events/intel/uncore_nhmex.c
new file mode 100644
index 000000000..cda569332
--- /dev/null
+++ b/arch/x86/events/intel/uncore_nhmex.c
@@ -0,0 +1,1227 @@
+/* Nehalem-EX/Westmere-EX uncore support */
+#include "uncore.h"
+
+/* NHM-EX event control */
+#define NHMEX_PMON_CTL_EV_SEL_MASK 0x000000ff
+#define NHMEX_PMON_CTL_UMASK_MASK 0x0000ff00
+#define NHMEX_PMON_CTL_EN_BIT0 (1 << 0)
+#define NHMEX_PMON_CTL_EDGE_DET (1 << 18)
+#define NHMEX_PMON_CTL_PMI_EN (1 << 20)
+#define NHMEX_PMON_CTL_EN_BIT22 (1 << 22)
+#define NHMEX_PMON_CTL_INVERT (1 << 23)
+#define NHMEX_PMON_CTL_TRESH_MASK 0xff000000
+#define NHMEX_PMON_RAW_EVENT_MASK (NHMEX_PMON_CTL_EV_SEL_MASK | \
+ NHMEX_PMON_CTL_UMASK_MASK | \
+ NHMEX_PMON_CTL_EDGE_DET | \
+ NHMEX_PMON_CTL_INVERT | \
+ NHMEX_PMON_CTL_TRESH_MASK)
+
+/* NHM-EX Ubox */
+#define NHMEX_U_MSR_PMON_GLOBAL_CTL 0xc00
+#define NHMEX_U_MSR_PMON_CTR 0xc11
+#define NHMEX_U_MSR_PMON_EV_SEL 0xc10
+
+#define NHMEX_U_PMON_GLOBAL_EN (1 << 0)
+#define NHMEX_U_PMON_GLOBAL_PMI_CORE_SEL 0x0000001e
+#define NHMEX_U_PMON_GLOBAL_EN_ALL (1 << 28)
+#define NHMEX_U_PMON_GLOBAL_RST_ALL (1 << 29)
+#define NHMEX_U_PMON_GLOBAL_FRZ_ALL (1 << 31)
+
+#define NHMEX_U_PMON_RAW_EVENT_MASK \
+ (NHMEX_PMON_CTL_EV_SEL_MASK | \
+ NHMEX_PMON_CTL_EDGE_DET)
+
+/* NHM-EX Cbox */
+#define NHMEX_C0_MSR_PMON_GLOBAL_CTL 0xd00
+#define NHMEX_C0_MSR_PMON_CTR0 0xd11
+#define NHMEX_C0_MSR_PMON_EV_SEL0 0xd10
+#define NHMEX_C_MSR_OFFSET 0x20
+
+/* NHM-EX Bbox */
+#define NHMEX_B0_MSR_PMON_GLOBAL_CTL 0xc20
+#define NHMEX_B0_MSR_PMON_CTR0 0xc31
+#define NHMEX_B0_MSR_PMON_CTL0 0xc30
+#define NHMEX_B_MSR_OFFSET 0x40
+#define NHMEX_B0_MSR_MATCH 0xe45
+#define NHMEX_B0_MSR_MASK 0xe46
+#define NHMEX_B1_MSR_MATCH 0xe4d
+#define NHMEX_B1_MSR_MASK 0xe4e
+
+#define NHMEX_B_PMON_CTL_EN (1 << 0)
+#define NHMEX_B_PMON_CTL_EV_SEL_SHIFT 1
+#define NHMEX_B_PMON_CTL_EV_SEL_MASK \
+ (0x1f << NHMEX_B_PMON_CTL_EV_SEL_SHIFT)
+#define NHMEX_B_PMON_CTR_SHIFT 6
+#define NHMEX_B_PMON_CTR_MASK \
+ (0x3 << NHMEX_B_PMON_CTR_SHIFT)
+#define NHMEX_B_PMON_RAW_EVENT_MASK \
+ (NHMEX_B_PMON_CTL_EV_SEL_MASK | \
+ NHMEX_B_PMON_CTR_MASK)
+
+/* NHM-EX Sbox */
+#define NHMEX_S0_MSR_PMON_GLOBAL_CTL 0xc40
+#define NHMEX_S0_MSR_PMON_CTR0 0xc51
+#define NHMEX_S0_MSR_PMON_CTL0 0xc50
+#define NHMEX_S_MSR_OFFSET 0x80
+#define NHMEX_S0_MSR_MM_CFG 0xe48
+#define NHMEX_S0_MSR_MATCH 0xe49
+#define NHMEX_S0_MSR_MASK 0xe4a
+#define NHMEX_S1_MSR_MM_CFG 0xe58
+#define NHMEX_S1_MSR_MATCH 0xe59
+#define NHMEX_S1_MSR_MASK 0xe5a
+
+#define NHMEX_S_PMON_MM_CFG_EN (0x1ULL << 63)
+#define NHMEX_S_EVENT_TO_R_PROG_EV 0
+
+/* NHM-EX Mbox */
+#define NHMEX_M0_MSR_GLOBAL_CTL 0xca0
+#define NHMEX_M0_MSR_PMU_DSP 0xca5
+#define NHMEX_M0_MSR_PMU_ISS 0xca6
+#define NHMEX_M0_MSR_PMU_MAP 0xca7
+#define NHMEX_M0_MSR_PMU_MSC_THR 0xca8
+#define NHMEX_M0_MSR_PMU_PGT 0xca9
+#define NHMEX_M0_MSR_PMU_PLD 0xcaa
+#define NHMEX_M0_MSR_PMU_ZDP_CTL_FVC 0xcab
+#define NHMEX_M0_MSR_PMU_CTL0 0xcb0
+#define NHMEX_M0_MSR_PMU_CNT0 0xcb1
+#define NHMEX_M_MSR_OFFSET 0x40
+#define NHMEX_M0_MSR_PMU_MM_CFG 0xe54
+#define NHMEX_M1_MSR_PMU_MM_CFG 0xe5c
+
+#define NHMEX_M_PMON_MM_CFG_EN (1ULL << 63)
+#define NHMEX_M_PMON_ADDR_MATCH_MASK 0x3ffffffffULL
+#define NHMEX_M_PMON_ADDR_MASK_MASK 0x7ffffffULL
+#define NHMEX_M_PMON_ADDR_MASK_SHIFT 34
+
+#define NHMEX_M_PMON_CTL_EN (1 << 0)
+#define NHMEX_M_PMON_CTL_PMI_EN (1 << 1)
+#define NHMEX_M_PMON_CTL_COUNT_MODE_SHIFT 2
+#define NHMEX_M_PMON_CTL_COUNT_MODE_MASK \
+ (0x3 << NHMEX_M_PMON_CTL_COUNT_MODE_SHIFT)
+#define NHMEX_M_PMON_CTL_STORAGE_MODE_SHIFT 4
+#define NHMEX_M_PMON_CTL_STORAGE_MODE_MASK \
+ (0x3 << NHMEX_M_PMON_CTL_STORAGE_MODE_SHIFT)
+#define NHMEX_M_PMON_CTL_WRAP_MODE (1 << 6)
+#define NHMEX_M_PMON_CTL_FLAG_MODE (1 << 7)
+#define NHMEX_M_PMON_CTL_INC_SEL_SHIFT 9
+#define NHMEX_M_PMON_CTL_INC_SEL_MASK \
+ (0x1f << NHMEX_M_PMON_CTL_INC_SEL_SHIFT)
+#define NHMEX_M_PMON_CTL_SET_FLAG_SEL_SHIFT 19
+#define NHMEX_M_PMON_CTL_SET_FLAG_SEL_MASK \
+ (0x7 << NHMEX_M_PMON_CTL_SET_FLAG_SEL_SHIFT)
+#define NHMEX_M_PMON_RAW_EVENT_MASK \
+ (NHMEX_M_PMON_CTL_COUNT_MODE_MASK | \
+ NHMEX_M_PMON_CTL_STORAGE_MODE_MASK | \
+ NHMEX_M_PMON_CTL_WRAP_MODE | \
+ NHMEX_M_PMON_CTL_FLAG_MODE | \
+ NHMEX_M_PMON_CTL_INC_SEL_MASK | \
+ NHMEX_M_PMON_CTL_SET_FLAG_SEL_MASK)
+
+#define NHMEX_M_PMON_ZDP_CTL_FVC_MASK (((1 << 11) - 1) | (1 << 23))
+#define NHMEX_M_PMON_ZDP_CTL_FVC_EVENT_MASK(n) (0x7ULL << (11 + 3 * (n)))
+
+#define WSMEX_M_PMON_ZDP_CTL_FVC_MASK (((1 << 12) - 1) | (1 << 24))
+#define WSMEX_M_PMON_ZDP_CTL_FVC_EVENT_MASK(n) (0x7ULL << (12 + 3 * (n)))
+
+/*
+ * use the 9~13 bits to select event If the 7th bit is not set,
+ * otherwise use the 19~21 bits to select event.
+ */
+#define MBOX_INC_SEL(x) ((x) << NHMEX_M_PMON_CTL_INC_SEL_SHIFT)
+#define MBOX_SET_FLAG_SEL(x) (((x) << NHMEX_M_PMON_CTL_SET_FLAG_SEL_SHIFT) | \
+ NHMEX_M_PMON_CTL_FLAG_MODE)
+#define MBOX_INC_SEL_MASK (NHMEX_M_PMON_CTL_INC_SEL_MASK | \
+ NHMEX_M_PMON_CTL_FLAG_MODE)
+#define MBOX_SET_FLAG_SEL_MASK (NHMEX_M_PMON_CTL_SET_FLAG_SEL_MASK | \
+ NHMEX_M_PMON_CTL_FLAG_MODE)
+#define MBOX_INC_SEL_EXTAR_REG(c, r) \
+ EVENT_EXTRA_REG(MBOX_INC_SEL(c), NHMEX_M0_MSR_PMU_##r, \
+ MBOX_INC_SEL_MASK, (u64)-1, NHMEX_M_##r)
+#define MBOX_SET_FLAG_SEL_EXTRA_REG(c, r) \
+ EVENT_EXTRA_REG(MBOX_SET_FLAG_SEL(c), NHMEX_M0_MSR_PMU_##r, \
+ MBOX_SET_FLAG_SEL_MASK, \
+ (u64)-1, NHMEX_M_##r)
+
+/* NHM-EX Rbox */
+#define NHMEX_R_MSR_GLOBAL_CTL 0xe00
+#define NHMEX_R_MSR_PMON_CTL0 0xe10
+#define NHMEX_R_MSR_PMON_CNT0 0xe11
+#define NHMEX_R_MSR_OFFSET 0x20
+
+#define NHMEX_R_MSR_PORTN_QLX_CFG(n) \
+ ((n) < 4 ? (0xe0c + (n)) : (0xe2c + (n) - 4))
+#define NHMEX_R_MSR_PORTN_IPERF_CFG0(n) (0xe04 + (n))
+#define NHMEX_R_MSR_PORTN_IPERF_CFG1(n) (0xe24 + (n))
+#define NHMEX_R_MSR_PORTN_XBR_OFFSET(n) \
+ (((n) < 4 ? 0 : 0x10) + (n) * 4)
+#define NHMEX_R_MSR_PORTN_XBR_SET1_MM_CFG(n) \
+ (0xe60 + NHMEX_R_MSR_PORTN_XBR_OFFSET(n))
+#define NHMEX_R_MSR_PORTN_XBR_SET1_MATCH(n) \
+ (NHMEX_R_MSR_PORTN_XBR_SET1_MM_CFG(n) + 1)
+#define NHMEX_R_MSR_PORTN_XBR_SET1_MASK(n) \
+ (NHMEX_R_MSR_PORTN_XBR_SET1_MM_CFG(n) + 2)
+#define NHMEX_R_MSR_PORTN_XBR_SET2_MM_CFG(n) \
+ (0xe70 + NHMEX_R_MSR_PORTN_XBR_OFFSET(n))
+#define NHMEX_R_MSR_PORTN_XBR_SET2_MATCH(n) \
+ (NHMEX_R_MSR_PORTN_XBR_SET2_MM_CFG(n) + 1)
+#define NHMEX_R_MSR_PORTN_XBR_SET2_MASK(n) \
+ (NHMEX_R_MSR_PORTN_XBR_SET2_MM_CFG(n) + 2)
+
+#define NHMEX_R_PMON_CTL_EN (1 << 0)
+#define NHMEX_R_PMON_CTL_EV_SEL_SHIFT 1
+#define NHMEX_R_PMON_CTL_EV_SEL_MASK \
+ (0x1f << NHMEX_R_PMON_CTL_EV_SEL_SHIFT)
+#define NHMEX_R_PMON_CTL_PMI_EN (1 << 6)
+#define NHMEX_R_PMON_RAW_EVENT_MASK NHMEX_R_PMON_CTL_EV_SEL_MASK
+
+/* NHM-EX Wbox */
+#define NHMEX_W_MSR_GLOBAL_CTL 0xc80
+#define NHMEX_W_MSR_PMON_CNT0 0xc90
+#define NHMEX_W_MSR_PMON_EVT_SEL0 0xc91
+#define NHMEX_W_MSR_PMON_FIXED_CTR 0x394
+#define NHMEX_W_MSR_PMON_FIXED_CTL 0x395
+
+#define NHMEX_W_PMON_GLOBAL_FIXED_EN (1ULL << 31)
+
+#define __BITS_VALUE(x, i, n) ((typeof(x))(((x) >> ((i) * (n))) & \
+ ((1ULL << (n)) - 1)))
+
+DEFINE_UNCORE_FORMAT_ATTR(event, event, "config:0-7");
+DEFINE_UNCORE_FORMAT_ATTR(event5, event, "config:1-5");
+DEFINE_UNCORE_FORMAT_ATTR(umask, umask, "config:8-15");
+DEFINE_UNCORE_FORMAT_ATTR(edge, edge, "config:18");
+DEFINE_UNCORE_FORMAT_ATTR(inv, inv, "config:23");
+DEFINE_UNCORE_FORMAT_ATTR(thresh8, thresh, "config:24-31");
+DEFINE_UNCORE_FORMAT_ATTR(counter, counter, "config:6-7");
+DEFINE_UNCORE_FORMAT_ATTR(match, match, "config1:0-63");
+DEFINE_UNCORE_FORMAT_ATTR(mask, mask, "config2:0-63");
+
+static void nhmex_uncore_msr_init_box(struct intel_uncore_box *box)
+{
+ wrmsrl(NHMEX_U_MSR_PMON_GLOBAL_CTL, NHMEX_U_PMON_GLOBAL_EN_ALL);
+}
+
+static void nhmex_uncore_msr_exit_box(struct intel_uncore_box *box)
+{
+ wrmsrl(NHMEX_U_MSR_PMON_GLOBAL_CTL, 0);
+}
+
+static void nhmex_uncore_msr_disable_box(struct intel_uncore_box *box)
+{
+ unsigned msr = uncore_msr_box_ctl(box);
+ u64 config;
+
+ if (msr) {
+ rdmsrl(msr, config);
+ config &= ~((1ULL << uncore_num_counters(box)) - 1);
+ /* WBox has a fixed counter */
+ if (uncore_msr_fixed_ctl(box))
+ config &= ~NHMEX_W_PMON_GLOBAL_FIXED_EN;
+ wrmsrl(msr, config);
+ }
+}
+
+static void nhmex_uncore_msr_enable_box(struct intel_uncore_box *box)
+{
+ unsigned msr = uncore_msr_box_ctl(box);
+ u64 config;
+
+ if (msr) {
+ rdmsrl(msr, config);
+ config |= (1ULL << uncore_num_counters(box)) - 1;
+ /* WBox has a fixed counter */
+ if (uncore_msr_fixed_ctl(box))
+ config |= NHMEX_W_PMON_GLOBAL_FIXED_EN;
+ wrmsrl(msr, config);
+ }
+}
+
+static void nhmex_uncore_msr_disable_event(struct intel_uncore_box *box, struct perf_event *event)
+{
+ wrmsrl(event->hw.config_base, 0);
+}
+
+static void nhmex_uncore_msr_enable_event(struct intel_uncore_box *box, struct perf_event *event)
+{
+ struct hw_perf_event *hwc = &event->hw;
+
+ if (hwc->idx >= UNCORE_PMC_IDX_FIXED)
+ wrmsrl(hwc->config_base, NHMEX_PMON_CTL_EN_BIT0);
+ else if (box->pmu->type->event_mask & NHMEX_PMON_CTL_EN_BIT0)
+ wrmsrl(hwc->config_base, hwc->config | NHMEX_PMON_CTL_EN_BIT22);
+ else
+ wrmsrl(hwc->config_base, hwc->config | NHMEX_PMON_CTL_EN_BIT0);
+}
+
+#define NHMEX_UNCORE_OPS_COMMON_INIT() \
+ .init_box = nhmex_uncore_msr_init_box, \
+ .exit_box = nhmex_uncore_msr_exit_box, \
+ .disable_box = nhmex_uncore_msr_disable_box, \
+ .enable_box = nhmex_uncore_msr_enable_box, \
+ .disable_event = nhmex_uncore_msr_disable_event, \
+ .read_counter = uncore_msr_read_counter
+
+static struct intel_uncore_ops nhmex_uncore_ops = {
+ NHMEX_UNCORE_OPS_COMMON_INIT(),
+ .enable_event = nhmex_uncore_msr_enable_event,
+};
+
+static struct attribute *nhmex_uncore_ubox_formats_attr[] = {
+ &format_attr_event.attr,
+ &format_attr_edge.attr,
+ NULL,
+};
+
+static struct attribute_group nhmex_uncore_ubox_format_group = {
+ .name = "format",
+ .attrs = nhmex_uncore_ubox_formats_attr,
+};
+
+static struct intel_uncore_type nhmex_uncore_ubox = {
+ .name = "ubox",
+ .num_counters = 1,
+ .num_boxes = 1,
+ .perf_ctr_bits = 48,
+ .event_ctl = NHMEX_U_MSR_PMON_EV_SEL,
+ .perf_ctr = NHMEX_U_MSR_PMON_CTR,
+ .event_mask = NHMEX_U_PMON_RAW_EVENT_MASK,
+ .box_ctl = NHMEX_U_MSR_PMON_GLOBAL_CTL,
+ .ops = &nhmex_uncore_ops,
+ .format_group = &nhmex_uncore_ubox_format_group
+};
+
+static struct attribute *nhmex_uncore_cbox_formats_attr[] = {
+ &format_attr_event.attr,
+ &format_attr_umask.attr,
+ &format_attr_edge.attr,
+ &format_attr_inv.attr,
+ &format_attr_thresh8.attr,
+ NULL,
+};
+
+static struct attribute_group nhmex_uncore_cbox_format_group = {
+ .name = "format",
+ .attrs = nhmex_uncore_cbox_formats_attr,
+};
+
+/* msr offset for each instance of cbox */
+static unsigned nhmex_cbox_msr_offsets[] = {
+ 0x0, 0x80, 0x40, 0xc0, 0x20, 0xa0, 0x60, 0xe0, 0x240, 0x2c0,
+};
+
+static struct intel_uncore_type nhmex_uncore_cbox = {
+ .name = "cbox",
+ .num_counters = 6,
+ .num_boxes = 10,
+ .perf_ctr_bits = 48,
+ .event_ctl = NHMEX_C0_MSR_PMON_EV_SEL0,
+ .perf_ctr = NHMEX_C0_MSR_PMON_CTR0,
+ .event_mask = NHMEX_PMON_RAW_EVENT_MASK,
+ .box_ctl = NHMEX_C0_MSR_PMON_GLOBAL_CTL,
+ .msr_offsets = nhmex_cbox_msr_offsets,
+ .pair_ctr_ctl = 1,
+ .ops = &nhmex_uncore_ops,
+ .format_group = &nhmex_uncore_cbox_format_group
+};
+
+static struct uncore_event_desc nhmex_uncore_wbox_events[] = {
+ INTEL_UNCORE_EVENT_DESC(clockticks, "event=0xff,umask=0"),
+ { /* end: all zeroes */ },
+};
+
+static struct intel_uncore_type nhmex_uncore_wbox = {
+ .name = "wbox",
+ .num_counters = 4,
+ .num_boxes = 1,
+ .perf_ctr_bits = 48,
+ .event_ctl = NHMEX_W_MSR_PMON_CNT0,
+ .perf_ctr = NHMEX_W_MSR_PMON_EVT_SEL0,
+ .fixed_ctr = NHMEX_W_MSR_PMON_FIXED_CTR,
+ .fixed_ctl = NHMEX_W_MSR_PMON_FIXED_CTL,
+ .event_mask = NHMEX_PMON_RAW_EVENT_MASK,
+ .box_ctl = NHMEX_W_MSR_GLOBAL_CTL,
+ .pair_ctr_ctl = 1,
+ .event_descs = nhmex_uncore_wbox_events,
+ .ops = &nhmex_uncore_ops,
+ .format_group = &nhmex_uncore_cbox_format_group
+};
+
+static int nhmex_bbox_hw_config(struct intel_uncore_box *box, struct perf_event *event)
+{
+ struct hw_perf_event *hwc = &event->hw;
+ struct hw_perf_event_extra *reg1 = &hwc->extra_reg;
+ struct hw_perf_event_extra *reg2 = &hwc->branch_reg;
+ int ctr, ev_sel;
+
+ ctr = (hwc->config & NHMEX_B_PMON_CTR_MASK) >>
+ NHMEX_B_PMON_CTR_SHIFT;
+ ev_sel = (hwc->config & NHMEX_B_PMON_CTL_EV_SEL_MASK) >>
+ NHMEX_B_PMON_CTL_EV_SEL_SHIFT;
+
+ /* events that do not use the match/mask registers */
+ if ((ctr == 0 && ev_sel > 0x3) || (ctr == 1 && ev_sel > 0x6) ||
+ (ctr == 2 && ev_sel != 0x4) || ctr == 3)
+ return 0;
+
+ if (box->pmu->pmu_idx == 0)
+ reg1->reg = NHMEX_B0_MSR_MATCH;
+ else
+ reg1->reg = NHMEX_B1_MSR_MATCH;
+ reg1->idx = 0;
+ reg1->config = event->attr.config1;
+ reg2->config = event->attr.config2;
+ return 0;
+}
+
+static void nhmex_bbox_msr_enable_event(struct intel_uncore_box *box, struct perf_event *event)
+{
+ struct hw_perf_event *hwc = &event->hw;
+ struct hw_perf_event_extra *reg1 = &hwc->extra_reg;
+ struct hw_perf_event_extra *reg2 = &hwc->branch_reg;
+
+ if (reg1->idx != EXTRA_REG_NONE) {
+ wrmsrl(reg1->reg, reg1->config);
+ wrmsrl(reg1->reg + 1, reg2->config);
+ }
+ wrmsrl(hwc->config_base, NHMEX_PMON_CTL_EN_BIT0 |
+ (hwc->config & NHMEX_B_PMON_CTL_EV_SEL_MASK));
+}
+
+/*
+ * The Bbox has 4 counters, but each counter monitors different events.
+ * Use bits 6-7 in the event config to select counter.
+ */
+static struct event_constraint nhmex_uncore_bbox_constraints[] = {
+ EVENT_CONSTRAINT(0 , 1, 0xc0),
+ EVENT_CONSTRAINT(0x40, 2, 0xc0),
+ EVENT_CONSTRAINT(0x80, 4, 0xc0),
+ EVENT_CONSTRAINT(0xc0, 8, 0xc0),
+ EVENT_CONSTRAINT_END,
+};
+
+static struct attribute *nhmex_uncore_bbox_formats_attr[] = {
+ &format_attr_event5.attr,
+ &format_attr_counter.attr,
+ &format_attr_match.attr,
+ &format_attr_mask.attr,
+ NULL,
+};
+
+static struct attribute_group nhmex_uncore_bbox_format_group = {
+ .name = "format",
+ .attrs = nhmex_uncore_bbox_formats_attr,
+};
+
+static struct intel_uncore_ops nhmex_uncore_bbox_ops = {
+ NHMEX_UNCORE_OPS_COMMON_INIT(),
+ .enable_event = nhmex_bbox_msr_enable_event,
+ .hw_config = nhmex_bbox_hw_config,
+ .get_constraint = uncore_get_constraint,
+ .put_constraint = uncore_put_constraint,
+};
+
+static struct intel_uncore_type nhmex_uncore_bbox = {
+ .name = "bbox",
+ .num_counters = 4,
+ .num_boxes = 2,
+ .perf_ctr_bits = 48,
+ .event_ctl = NHMEX_B0_MSR_PMON_CTL0,
+ .perf_ctr = NHMEX_B0_MSR_PMON_CTR0,
+ .event_mask = NHMEX_B_PMON_RAW_EVENT_MASK,
+ .box_ctl = NHMEX_B0_MSR_PMON_GLOBAL_CTL,
+ .msr_offset = NHMEX_B_MSR_OFFSET,
+ .pair_ctr_ctl = 1,
+ .num_shared_regs = 1,
+ .constraints = nhmex_uncore_bbox_constraints,
+ .ops = &nhmex_uncore_bbox_ops,
+ .format_group = &nhmex_uncore_bbox_format_group
+};
+
+static int nhmex_sbox_hw_config(struct intel_uncore_box *box, struct perf_event *event)
+{
+ struct hw_perf_event *hwc = &event->hw;
+ struct hw_perf_event_extra *reg1 = &hwc->extra_reg;
+ struct hw_perf_event_extra *reg2 = &hwc->branch_reg;
+
+ /* only TO_R_PROG_EV event uses the match/mask register */
+ if ((hwc->config & NHMEX_PMON_CTL_EV_SEL_MASK) !=
+ NHMEX_S_EVENT_TO_R_PROG_EV)
+ return 0;
+
+ if (box->pmu->pmu_idx == 0)
+ reg1->reg = NHMEX_S0_MSR_MM_CFG;
+ else
+ reg1->reg = NHMEX_S1_MSR_MM_CFG;
+ reg1->idx = 0;
+ reg1->config = event->attr.config1;
+ reg2->config = event->attr.config2;
+ return 0;
+}
+
+static void nhmex_sbox_msr_enable_event(struct intel_uncore_box *box, struct perf_event *event)
+{
+ struct hw_perf_event *hwc = &event->hw;
+ struct hw_perf_event_extra *reg1 = &hwc->extra_reg;
+ struct hw_perf_event_extra *reg2 = &hwc->branch_reg;
+
+ if (reg1->idx != EXTRA_REG_NONE) {
+ wrmsrl(reg1->reg, 0);
+ wrmsrl(reg1->reg + 1, reg1->config);
+ wrmsrl(reg1->reg + 2, reg2->config);
+ wrmsrl(reg1->reg, NHMEX_S_PMON_MM_CFG_EN);
+ }
+ wrmsrl(hwc->config_base, hwc->config | NHMEX_PMON_CTL_EN_BIT22);
+}
+
+static struct attribute *nhmex_uncore_sbox_formats_attr[] = {
+ &format_attr_event.attr,
+ &format_attr_umask.attr,
+ &format_attr_edge.attr,
+ &format_attr_inv.attr,
+ &format_attr_thresh8.attr,
+ &format_attr_match.attr,
+ &format_attr_mask.attr,
+ NULL,
+};
+
+static struct attribute_group nhmex_uncore_sbox_format_group = {
+ .name = "format",
+ .attrs = nhmex_uncore_sbox_formats_attr,
+};
+
+static struct intel_uncore_ops nhmex_uncore_sbox_ops = {
+ NHMEX_UNCORE_OPS_COMMON_INIT(),
+ .enable_event = nhmex_sbox_msr_enable_event,
+ .hw_config = nhmex_sbox_hw_config,
+ .get_constraint = uncore_get_constraint,
+ .put_constraint = uncore_put_constraint,
+};
+
+static struct intel_uncore_type nhmex_uncore_sbox = {
+ .name = "sbox",
+ .num_counters = 4,
+ .num_boxes = 2,
+ .perf_ctr_bits = 48,
+ .event_ctl = NHMEX_S0_MSR_PMON_CTL0,
+ .perf_ctr = NHMEX_S0_MSR_PMON_CTR0,
+ .event_mask = NHMEX_PMON_RAW_EVENT_MASK,
+ .box_ctl = NHMEX_S0_MSR_PMON_GLOBAL_CTL,
+ .msr_offset = NHMEX_S_MSR_OFFSET,
+ .pair_ctr_ctl = 1,
+ .num_shared_regs = 1,
+ .ops = &nhmex_uncore_sbox_ops,
+ .format_group = &nhmex_uncore_sbox_format_group
+};
+
+enum {
+ EXTRA_REG_NHMEX_M_FILTER,
+ EXTRA_REG_NHMEX_M_DSP,
+ EXTRA_REG_NHMEX_M_ISS,
+ EXTRA_REG_NHMEX_M_MAP,
+ EXTRA_REG_NHMEX_M_MSC_THR,
+ EXTRA_REG_NHMEX_M_PGT,
+ EXTRA_REG_NHMEX_M_PLD,
+ EXTRA_REG_NHMEX_M_ZDP_CTL_FVC,
+};
+
+static struct extra_reg nhmex_uncore_mbox_extra_regs[] = {
+ MBOX_INC_SEL_EXTAR_REG(0x0, DSP),
+ MBOX_INC_SEL_EXTAR_REG(0x4, MSC_THR),
+ MBOX_INC_SEL_EXTAR_REG(0x5, MSC_THR),
+ MBOX_INC_SEL_EXTAR_REG(0x9, ISS),
+ /* event 0xa uses two extra registers */
+ MBOX_INC_SEL_EXTAR_REG(0xa, ISS),
+ MBOX_INC_SEL_EXTAR_REG(0xa, PLD),
+ MBOX_INC_SEL_EXTAR_REG(0xb, PLD),
+ /* events 0xd ~ 0x10 use the same extra register */
+ MBOX_INC_SEL_EXTAR_REG(0xd, ZDP_CTL_FVC),
+ MBOX_INC_SEL_EXTAR_REG(0xe, ZDP_CTL_FVC),
+ MBOX_INC_SEL_EXTAR_REG(0xf, ZDP_CTL_FVC),
+ MBOX_INC_SEL_EXTAR_REG(0x10, ZDP_CTL_FVC),
+ MBOX_INC_SEL_EXTAR_REG(0x16, PGT),
+ MBOX_SET_FLAG_SEL_EXTRA_REG(0x0, DSP),
+ MBOX_SET_FLAG_SEL_EXTRA_REG(0x1, ISS),
+ MBOX_SET_FLAG_SEL_EXTRA_REG(0x5, PGT),
+ MBOX_SET_FLAG_SEL_EXTRA_REG(0x6, MAP),
+ EVENT_EXTRA_END
+};
+
+/* Nehalem-EX or Westmere-EX ? */
+static bool uncore_nhmex;
+
+static bool nhmex_mbox_get_shared_reg(struct intel_uncore_box *box, int idx, u64 config)
+{
+ struct intel_uncore_extra_reg *er;
+ unsigned long flags;
+ bool ret = false;
+ u64 mask;
+
+ if (idx < EXTRA_REG_NHMEX_M_ZDP_CTL_FVC) {
+ er = &box->shared_regs[idx];
+ raw_spin_lock_irqsave(&er->lock, flags);
+ if (!atomic_read(&er->ref) || er->config == config) {
+ atomic_inc(&er->ref);
+ er->config = config;
+ ret = true;
+ }
+ raw_spin_unlock_irqrestore(&er->lock, flags);
+
+ return ret;
+ }
+ /*
+ * The ZDP_CTL_FVC MSR has 4 fields which are used to control
+ * events 0xd ~ 0x10. Besides these 4 fields, there are additional
+ * fields which are shared.
+ */
+ idx -= EXTRA_REG_NHMEX_M_ZDP_CTL_FVC;
+ if (WARN_ON_ONCE(idx >= 4))
+ return false;
+
+ /* mask of the shared fields */
+ if (uncore_nhmex)
+ mask = NHMEX_M_PMON_ZDP_CTL_FVC_MASK;
+ else
+ mask = WSMEX_M_PMON_ZDP_CTL_FVC_MASK;
+ er = &box->shared_regs[EXTRA_REG_NHMEX_M_ZDP_CTL_FVC];
+
+ raw_spin_lock_irqsave(&er->lock, flags);
+ /* add mask of the non-shared field if it's in use */
+ if (__BITS_VALUE(atomic_read(&er->ref), idx, 8)) {
+ if (uncore_nhmex)
+ mask |= NHMEX_M_PMON_ZDP_CTL_FVC_EVENT_MASK(idx);
+ else
+ mask |= WSMEX_M_PMON_ZDP_CTL_FVC_EVENT_MASK(idx);
+ }
+
+ if (!atomic_read(&er->ref) || !((er->config ^ config) & mask)) {
+ atomic_add(1 << (idx * 8), &er->ref);
+ if (uncore_nhmex)
+ mask = NHMEX_M_PMON_ZDP_CTL_FVC_MASK |
+ NHMEX_M_PMON_ZDP_CTL_FVC_EVENT_MASK(idx);
+ else
+ mask = WSMEX_M_PMON_ZDP_CTL_FVC_MASK |
+ WSMEX_M_PMON_ZDP_CTL_FVC_EVENT_MASK(idx);
+ er->config &= ~mask;
+ er->config |= (config & mask);
+ ret = true;
+ }
+ raw_spin_unlock_irqrestore(&er->lock, flags);
+
+ return ret;
+}
+
+static void nhmex_mbox_put_shared_reg(struct intel_uncore_box *box, int idx)
+{
+ struct intel_uncore_extra_reg *er;
+
+ if (idx < EXTRA_REG_NHMEX_M_ZDP_CTL_FVC) {
+ er = &box->shared_regs[idx];
+ atomic_dec(&er->ref);
+ return;
+ }
+
+ idx -= EXTRA_REG_NHMEX_M_ZDP_CTL_FVC;
+ er = &box->shared_regs[EXTRA_REG_NHMEX_M_ZDP_CTL_FVC];
+ atomic_sub(1 << (idx * 8), &er->ref);
+}
+
+static u64 nhmex_mbox_alter_er(struct perf_event *event, int new_idx, bool modify)
+{
+ struct hw_perf_event *hwc = &event->hw;
+ struct hw_perf_event_extra *reg1 = &hwc->extra_reg;
+ u64 idx, orig_idx = __BITS_VALUE(reg1->idx, 0, 8);
+ u64 config = reg1->config;
+
+ /* get the non-shared control bits and shift them */
+ idx = orig_idx - EXTRA_REG_NHMEX_M_ZDP_CTL_FVC;
+ if (uncore_nhmex)
+ config &= NHMEX_M_PMON_ZDP_CTL_FVC_EVENT_MASK(idx);
+ else
+ config &= WSMEX_M_PMON_ZDP_CTL_FVC_EVENT_MASK(idx);
+ if (new_idx > orig_idx) {
+ idx = new_idx - orig_idx;
+ config <<= 3 * idx;
+ } else {
+ idx = orig_idx - new_idx;
+ config >>= 3 * idx;
+ }
+
+ /* add the shared control bits back */
+ if (uncore_nhmex)
+ config |= NHMEX_M_PMON_ZDP_CTL_FVC_MASK & reg1->config;
+ else
+ config |= WSMEX_M_PMON_ZDP_CTL_FVC_MASK & reg1->config;
+ config |= NHMEX_M_PMON_ZDP_CTL_FVC_MASK & reg1->config;
+ if (modify) {
+ /* adjust the main event selector */
+ if (new_idx > orig_idx)
+ hwc->config += idx << NHMEX_M_PMON_CTL_INC_SEL_SHIFT;
+ else
+ hwc->config -= idx << NHMEX_M_PMON_CTL_INC_SEL_SHIFT;
+ reg1->config = config;
+ reg1->idx = ~0xff | new_idx;
+ }
+ return config;
+}
+
+static struct event_constraint *
+nhmex_mbox_get_constraint(struct intel_uncore_box *box, struct perf_event *event)
+{
+ struct hw_perf_event_extra *reg1 = &event->hw.extra_reg;
+ struct hw_perf_event_extra *reg2 = &event->hw.branch_reg;
+ int i, idx[2], alloc = 0;
+ u64 config1 = reg1->config;
+
+ idx[0] = __BITS_VALUE(reg1->idx, 0, 8);
+ idx[1] = __BITS_VALUE(reg1->idx, 1, 8);
+again:
+ for (i = 0; i < 2; i++) {
+ if (!uncore_box_is_fake(box) && (reg1->alloc & (0x1 << i)))
+ idx[i] = 0xff;
+
+ if (idx[i] == 0xff)
+ continue;
+
+ if (!nhmex_mbox_get_shared_reg(box, idx[i],
+ __BITS_VALUE(config1, i, 32)))
+ goto fail;
+ alloc |= (0x1 << i);
+ }
+
+ /* for the match/mask registers */
+ if (reg2->idx != EXTRA_REG_NONE &&
+ (uncore_box_is_fake(box) || !reg2->alloc) &&
+ !nhmex_mbox_get_shared_reg(box, reg2->idx, reg2->config))
+ goto fail;
+
+ /*
+ * If it's a fake box -- as per validate_{group,event}() we
+ * shouldn't touch event state and we can avoid doing so
+ * since both will only call get_event_constraints() once
+ * on each event, this avoids the need for reg->alloc.
+ */
+ if (!uncore_box_is_fake(box)) {
+ if (idx[0] != 0xff && idx[0] != __BITS_VALUE(reg1->idx, 0, 8))
+ nhmex_mbox_alter_er(event, idx[0], true);
+ reg1->alloc |= alloc;
+ if (reg2->idx != EXTRA_REG_NONE)
+ reg2->alloc = 1;
+ }
+ return NULL;
+fail:
+ if (idx[0] != 0xff && !(alloc & 0x1) &&
+ idx[0] >= EXTRA_REG_NHMEX_M_ZDP_CTL_FVC) {
+ /*
+ * events 0xd ~ 0x10 are functional identical, but are
+ * controlled by different fields in the ZDP_CTL_FVC
+ * register. If we failed to take one field, try the
+ * rest 3 choices.
+ */
+ BUG_ON(__BITS_VALUE(reg1->idx, 1, 8) != 0xff);
+ idx[0] -= EXTRA_REG_NHMEX_M_ZDP_CTL_FVC;
+ idx[0] = (idx[0] + 1) % 4;
+ idx[0] += EXTRA_REG_NHMEX_M_ZDP_CTL_FVC;
+ if (idx[0] != __BITS_VALUE(reg1->idx, 0, 8)) {
+ config1 = nhmex_mbox_alter_er(event, idx[0], false);
+ goto again;
+ }
+ }
+
+ if (alloc & 0x1)
+ nhmex_mbox_put_shared_reg(box, idx[0]);
+ if (alloc & 0x2)
+ nhmex_mbox_put_shared_reg(box, idx[1]);
+ return &uncore_constraint_empty;
+}
+
+static void nhmex_mbox_put_constraint(struct intel_uncore_box *box, struct perf_event *event)
+{
+ struct hw_perf_event_extra *reg1 = &event->hw.extra_reg;
+ struct hw_perf_event_extra *reg2 = &event->hw.branch_reg;
+
+ if (uncore_box_is_fake(box))
+ return;
+
+ if (reg1->alloc & 0x1)
+ nhmex_mbox_put_shared_reg(box, __BITS_VALUE(reg1->idx, 0, 8));
+ if (reg1->alloc & 0x2)
+ nhmex_mbox_put_shared_reg(box, __BITS_VALUE(reg1->idx, 1, 8));
+ reg1->alloc = 0;
+
+ if (reg2->alloc) {
+ nhmex_mbox_put_shared_reg(box, reg2->idx);
+ reg2->alloc = 0;
+ }
+}
+
+static int nhmex_mbox_extra_reg_idx(struct extra_reg *er)
+{
+ if (er->idx < EXTRA_REG_NHMEX_M_ZDP_CTL_FVC)
+ return er->idx;
+ return er->idx + (er->event >> NHMEX_M_PMON_CTL_INC_SEL_SHIFT) - 0xd;
+}
+
+static int nhmex_mbox_hw_config(struct intel_uncore_box *box, struct perf_event *event)
+{
+ struct intel_uncore_type *type = box->pmu->type;
+ struct hw_perf_event_extra *reg1 = &event->hw.extra_reg;
+ struct hw_perf_event_extra *reg2 = &event->hw.branch_reg;
+ struct extra_reg *er;
+ unsigned msr;
+ int reg_idx = 0;
+ /*
+ * The mbox events may require 2 extra MSRs at the most. But only
+ * the lower 32 bits in these MSRs are significant, so we can use
+ * config1 to pass two MSRs' config.
+ */
+ for (er = nhmex_uncore_mbox_extra_regs; er->msr; er++) {
+ if (er->event != (event->hw.config & er->config_mask))
+ continue;
+ if (event->attr.config1 & ~er->valid_mask)
+ return -EINVAL;
+
+ msr = er->msr + type->msr_offset * box->pmu->pmu_idx;
+ if (WARN_ON_ONCE(msr >= 0xffff || er->idx >= 0xff))
+ return -EINVAL;
+
+ /* always use the 32~63 bits to pass the PLD config */
+ if (er->idx == EXTRA_REG_NHMEX_M_PLD)
+ reg_idx = 1;
+ else if (WARN_ON_ONCE(reg_idx > 0))
+ return -EINVAL;
+
+ reg1->idx &= ~(0xff << (reg_idx * 8));
+ reg1->reg &= ~(0xffff << (reg_idx * 16));
+ reg1->idx |= nhmex_mbox_extra_reg_idx(er) << (reg_idx * 8);
+ reg1->reg |= msr << (reg_idx * 16);
+ reg1->config = event->attr.config1;
+ reg_idx++;
+ }
+ /*
+ * The mbox only provides ability to perform address matching
+ * for the PLD events.
+ */
+ if (reg_idx == 2) {
+ reg2->idx = EXTRA_REG_NHMEX_M_FILTER;
+ if (event->attr.config2 & NHMEX_M_PMON_MM_CFG_EN)
+ reg2->config = event->attr.config2;
+ else
+ reg2->config = ~0ULL;
+ if (box->pmu->pmu_idx == 0)
+ reg2->reg = NHMEX_M0_MSR_PMU_MM_CFG;
+ else
+ reg2->reg = NHMEX_M1_MSR_PMU_MM_CFG;
+ }
+ return 0;
+}
+
+static u64 nhmex_mbox_shared_reg_config(struct intel_uncore_box *box, int idx)
+{
+ struct intel_uncore_extra_reg *er;
+ unsigned long flags;
+ u64 config;
+
+ if (idx < EXTRA_REG_NHMEX_M_ZDP_CTL_FVC)
+ return box->shared_regs[idx].config;
+
+ er = &box->shared_regs[EXTRA_REG_NHMEX_M_ZDP_CTL_FVC];
+ raw_spin_lock_irqsave(&er->lock, flags);
+ config = er->config;
+ raw_spin_unlock_irqrestore(&er->lock, flags);
+ return config;
+}
+
+static void nhmex_mbox_msr_enable_event(struct intel_uncore_box *box, struct perf_event *event)
+{
+ struct hw_perf_event *hwc = &event->hw;
+ struct hw_perf_event_extra *reg1 = &hwc->extra_reg;
+ struct hw_perf_event_extra *reg2 = &hwc->branch_reg;
+ int idx;
+
+ idx = __BITS_VALUE(reg1->idx, 0, 8);
+ if (idx != 0xff)
+ wrmsrl(__BITS_VALUE(reg1->reg, 0, 16),
+ nhmex_mbox_shared_reg_config(box, idx));
+ idx = __BITS_VALUE(reg1->idx, 1, 8);
+ if (idx != 0xff)
+ wrmsrl(__BITS_VALUE(reg1->reg, 1, 16),
+ nhmex_mbox_shared_reg_config(box, idx));
+
+ if (reg2->idx != EXTRA_REG_NONE) {
+ wrmsrl(reg2->reg, 0);
+ if (reg2->config != ~0ULL) {
+ wrmsrl(reg2->reg + 1,
+ reg2->config & NHMEX_M_PMON_ADDR_MATCH_MASK);
+ wrmsrl(reg2->reg + 2, NHMEX_M_PMON_ADDR_MASK_MASK &
+ (reg2->config >> NHMEX_M_PMON_ADDR_MASK_SHIFT));
+ wrmsrl(reg2->reg, NHMEX_M_PMON_MM_CFG_EN);
+ }
+ }
+
+ wrmsrl(hwc->config_base, hwc->config | NHMEX_PMON_CTL_EN_BIT0);
+}
+
+DEFINE_UNCORE_FORMAT_ATTR(count_mode, count_mode, "config:2-3");
+DEFINE_UNCORE_FORMAT_ATTR(storage_mode, storage_mode, "config:4-5");
+DEFINE_UNCORE_FORMAT_ATTR(wrap_mode, wrap_mode, "config:6");
+DEFINE_UNCORE_FORMAT_ATTR(flag_mode, flag_mode, "config:7");
+DEFINE_UNCORE_FORMAT_ATTR(inc_sel, inc_sel, "config:9-13");
+DEFINE_UNCORE_FORMAT_ATTR(set_flag_sel, set_flag_sel, "config:19-21");
+DEFINE_UNCORE_FORMAT_ATTR(filter_cfg_en, filter_cfg_en, "config2:63");
+DEFINE_UNCORE_FORMAT_ATTR(filter_match, filter_match, "config2:0-33");
+DEFINE_UNCORE_FORMAT_ATTR(filter_mask, filter_mask, "config2:34-61");
+DEFINE_UNCORE_FORMAT_ATTR(dsp, dsp, "config1:0-31");
+DEFINE_UNCORE_FORMAT_ATTR(thr, thr, "config1:0-31");
+DEFINE_UNCORE_FORMAT_ATTR(fvc, fvc, "config1:0-31");
+DEFINE_UNCORE_FORMAT_ATTR(pgt, pgt, "config1:0-31");
+DEFINE_UNCORE_FORMAT_ATTR(map, map, "config1:0-31");
+DEFINE_UNCORE_FORMAT_ATTR(iss, iss, "config1:0-31");
+DEFINE_UNCORE_FORMAT_ATTR(pld, pld, "config1:32-63");
+
+static struct attribute *nhmex_uncore_mbox_formats_attr[] = {
+ &format_attr_count_mode.attr,
+ &format_attr_storage_mode.attr,
+ &format_attr_wrap_mode.attr,
+ &format_attr_flag_mode.attr,
+ &format_attr_inc_sel.attr,
+ &format_attr_set_flag_sel.attr,
+ &format_attr_filter_cfg_en.attr,
+ &format_attr_filter_match.attr,
+ &format_attr_filter_mask.attr,
+ &format_attr_dsp.attr,
+ &format_attr_thr.attr,
+ &format_attr_fvc.attr,
+ &format_attr_pgt.attr,
+ &format_attr_map.attr,
+ &format_attr_iss.attr,
+ &format_attr_pld.attr,
+ NULL,
+};
+
+static struct attribute_group nhmex_uncore_mbox_format_group = {
+ .name = "format",
+ .attrs = nhmex_uncore_mbox_formats_attr,
+};
+
+static struct uncore_event_desc nhmex_uncore_mbox_events[] = {
+ INTEL_UNCORE_EVENT_DESC(bbox_cmds_read, "inc_sel=0xd,fvc=0x2800"),
+ INTEL_UNCORE_EVENT_DESC(bbox_cmds_write, "inc_sel=0xd,fvc=0x2820"),
+ { /* end: all zeroes */ },
+};
+
+static struct uncore_event_desc wsmex_uncore_mbox_events[] = {
+ INTEL_UNCORE_EVENT_DESC(bbox_cmds_read, "inc_sel=0xd,fvc=0x5000"),
+ INTEL_UNCORE_EVENT_DESC(bbox_cmds_write, "inc_sel=0xd,fvc=0x5040"),
+ { /* end: all zeroes */ },
+};
+
+static struct intel_uncore_ops nhmex_uncore_mbox_ops = {
+ NHMEX_UNCORE_OPS_COMMON_INIT(),
+ .enable_event = nhmex_mbox_msr_enable_event,
+ .hw_config = nhmex_mbox_hw_config,
+ .get_constraint = nhmex_mbox_get_constraint,
+ .put_constraint = nhmex_mbox_put_constraint,
+};
+
+static struct intel_uncore_type nhmex_uncore_mbox = {
+ .name = "mbox",
+ .num_counters = 6,
+ .num_boxes = 2,
+ .perf_ctr_bits = 48,
+ .event_ctl = NHMEX_M0_MSR_PMU_CTL0,
+ .perf_ctr = NHMEX_M0_MSR_PMU_CNT0,
+ .event_mask = NHMEX_M_PMON_RAW_EVENT_MASK,
+ .box_ctl = NHMEX_M0_MSR_GLOBAL_CTL,
+ .msr_offset = NHMEX_M_MSR_OFFSET,
+ .pair_ctr_ctl = 1,
+ .num_shared_regs = 8,
+ .event_descs = nhmex_uncore_mbox_events,
+ .ops = &nhmex_uncore_mbox_ops,
+ .format_group = &nhmex_uncore_mbox_format_group,
+};
+
+static void nhmex_rbox_alter_er(struct intel_uncore_box *box, struct perf_event *event)
+{
+ struct hw_perf_event *hwc = &event->hw;
+ struct hw_perf_event_extra *reg1 = &hwc->extra_reg;
+
+ /* adjust the main event selector and extra register index */
+ if (reg1->idx % 2) {
+ reg1->idx--;
+ hwc->config -= 1 << NHMEX_R_PMON_CTL_EV_SEL_SHIFT;
+ } else {
+ reg1->idx++;
+ hwc->config += 1 << NHMEX_R_PMON_CTL_EV_SEL_SHIFT;
+ }
+
+ /* adjust extra register config */
+ switch (reg1->idx % 6) {
+ case 2:
+ /* shift the 8~15 bits to the 0~7 bits */
+ reg1->config >>= 8;
+ break;
+ case 3:
+ /* shift the 0~7 bits to the 8~15 bits */
+ reg1->config <<= 8;
+ break;
+ }
+}
+
+/*
+ * Each rbox has 4 event set which monitor PQI port 0~3 or 4~7.
+ * An event set consists of 6 events, the 3rd and 4th events in
+ * an event set use the same extra register. So an event set uses
+ * 5 extra registers.
+ */
+static struct event_constraint *
+nhmex_rbox_get_constraint(struct intel_uncore_box *box, struct perf_event *event)
+{
+ struct hw_perf_event *hwc = &event->hw;
+ struct hw_perf_event_extra *reg1 = &hwc->extra_reg;
+ struct hw_perf_event_extra *reg2 = &hwc->branch_reg;
+ struct intel_uncore_extra_reg *er;
+ unsigned long flags;
+ int idx, er_idx;
+ u64 config1;
+ bool ok = false;
+
+ if (!uncore_box_is_fake(box) && reg1->alloc)
+ return NULL;
+
+ idx = reg1->idx % 6;
+ config1 = reg1->config;
+again:
+ er_idx = idx;
+ /* the 3rd and 4th events use the same extra register */
+ if (er_idx > 2)
+ er_idx--;
+ er_idx += (reg1->idx / 6) * 5;
+
+ er = &box->shared_regs[er_idx];
+ raw_spin_lock_irqsave(&er->lock, flags);
+ if (idx < 2) {
+ if (!atomic_read(&er->ref) || er->config == reg1->config) {
+ atomic_inc(&er->ref);
+ er->config = reg1->config;
+ ok = true;
+ }
+ } else if (idx == 2 || idx == 3) {
+ /*
+ * these two events use different fields in a extra register,
+ * the 0~7 bits and the 8~15 bits respectively.
+ */
+ u64 mask = 0xff << ((idx - 2) * 8);
+ if (!__BITS_VALUE(atomic_read(&er->ref), idx - 2, 8) ||
+ !((er->config ^ config1) & mask)) {
+ atomic_add(1 << ((idx - 2) * 8), &er->ref);
+ er->config &= ~mask;
+ er->config |= config1 & mask;
+ ok = true;
+ }
+ } else {
+ if (!atomic_read(&er->ref) ||
+ (er->config == (hwc->config >> 32) &&
+ er->config1 == reg1->config &&
+ er->config2 == reg2->config)) {
+ atomic_inc(&er->ref);
+ er->config = (hwc->config >> 32);
+ er->config1 = reg1->config;
+ er->config2 = reg2->config;
+ ok = true;
+ }
+ }
+ raw_spin_unlock_irqrestore(&er->lock, flags);
+
+ if (!ok) {
+ /*
+ * The Rbox events are always in pairs. The paired
+ * events are functional identical, but use different
+ * extra registers. If we failed to take an extra
+ * register, try the alternative.
+ */
+ idx ^= 1;
+ if (idx != reg1->idx % 6) {
+ if (idx == 2)
+ config1 >>= 8;
+ else if (idx == 3)
+ config1 <<= 8;
+ goto again;
+ }
+ } else {
+ if (!uncore_box_is_fake(box)) {
+ if (idx != reg1->idx % 6)
+ nhmex_rbox_alter_er(box, event);
+ reg1->alloc = 1;
+ }
+ return NULL;
+ }
+ return &uncore_constraint_empty;
+}
+
+static void nhmex_rbox_put_constraint(struct intel_uncore_box *box, struct perf_event *event)
+{
+ struct intel_uncore_extra_reg *er;
+ struct hw_perf_event_extra *reg1 = &event->hw.extra_reg;
+ int idx, er_idx;
+
+ if (uncore_box_is_fake(box) || !reg1->alloc)
+ return;
+
+ idx = reg1->idx % 6;
+ er_idx = idx;
+ if (er_idx > 2)
+ er_idx--;
+ er_idx += (reg1->idx / 6) * 5;
+
+ er = &box->shared_regs[er_idx];
+ if (idx == 2 || idx == 3)
+ atomic_sub(1 << ((idx - 2) * 8), &er->ref);
+ else
+ atomic_dec(&er->ref);
+
+ reg1->alloc = 0;
+}
+
+static int nhmex_rbox_hw_config(struct intel_uncore_box *box, struct perf_event *event)
+{
+ struct hw_perf_event *hwc = &event->hw;
+ struct hw_perf_event_extra *reg1 = &event->hw.extra_reg;
+ struct hw_perf_event_extra *reg2 = &event->hw.branch_reg;
+ int idx;
+
+ idx = (event->hw.config & NHMEX_R_PMON_CTL_EV_SEL_MASK) >>
+ NHMEX_R_PMON_CTL_EV_SEL_SHIFT;
+ if (idx >= 0x18)
+ return -EINVAL;
+
+ reg1->idx = idx;
+ reg1->config = event->attr.config1;
+
+ switch (idx % 6) {
+ case 4:
+ case 5:
+ hwc->config |= event->attr.config & (~0ULL << 32);
+ reg2->config = event->attr.config2;
+ break;
+ }
+ return 0;
+}
+
+static void nhmex_rbox_msr_enable_event(struct intel_uncore_box *box, struct perf_event *event)
+{
+ struct hw_perf_event *hwc = &event->hw;
+ struct hw_perf_event_extra *reg1 = &hwc->extra_reg;
+ struct hw_perf_event_extra *reg2 = &hwc->branch_reg;
+ int idx, port;
+
+ idx = reg1->idx;
+ port = idx / 6 + box->pmu->pmu_idx * 4;
+
+ switch (idx % 6) {
+ case 0:
+ wrmsrl(NHMEX_R_MSR_PORTN_IPERF_CFG0(port), reg1->config);
+ break;
+ case 1:
+ wrmsrl(NHMEX_R_MSR_PORTN_IPERF_CFG1(port), reg1->config);
+ break;
+ case 2:
+ case 3:
+ wrmsrl(NHMEX_R_MSR_PORTN_QLX_CFG(port),
+ uncore_shared_reg_config(box, 2 + (idx / 6) * 5));
+ break;
+ case 4:
+ wrmsrl(NHMEX_R_MSR_PORTN_XBR_SET1_MM_CFG(port),
+ hwc->config >> 32);
+ wrmsrl(NHMEX_R_MSR_PORTN_XBR_SET1_MATCH(port), reg1->config);
+ wrmsrl(NHMEX_R_MSR_PORTN_XBR_SET1_MASK(port), reg2->config);
+ break;
+ case 5:
+ wrmsrl(NHMEX_R_MSR_PORTN_XBR_SET2_MM_CFG(port),
+ hwc->config >> 32);
+ wrmsrl(NHMEX_R_MSR_PORTN_XBR_SET2_MATCH(port), reg1->config);
+ wrmsrl(NHMEX_R_MSR_PORTN_XBR_SET2_MASK(port), reg2->config);
+ break;
+ }
+
+ wrmsrl(hwc->config_base, NHMEX_PMON_CTL_EN_BIT0 |
+ (hwc->config & NHMEX_R_PMON_CTL_EV_SEL_MASK));
+}
+
+DEFINE_UNCORE_FORMAT_ATTR(xbr_mm_cfg, xbr_mm_cfg, "config:32-63");
+DEFINE_UNCORE_FORMAT_ATTR(xbr_match, xbr_match, "config1:0-63");
+DEFINE_UNCORE_FORMAT_ATTR(xbr_mask, xbr_mask, "config2:0-63");
+DEFINE_UNCORE_FORMAT_ATTR(qlx_cfg, qlx_cfg, "config1:0-15");
+DEFINE_UNCORE_FORMAT_ATTR(iperf_cfg, iperf_cfg, "config1:0-31");
+
+static struct attribute *nhmex_uncore_rbox_formats_attr[] = {
+ &format_attr_event5.attr,
+ &format_attr_xbr_mm_cfg.attr,
+ &format_attr_xbr_match.attr,
+ &format_attr_xbr_mask.attr,
+ &format_attr_qlx_cfg.attr,
+ &format_attr_iperf_cfg.attr,
+ NULL,
+};
+
+static struct attribute_group nhmex_uncore_rbox_format_group = {
+ .name = "format",
+ .attrs = nhmex_uncore_rbox_formats_attr,
+};
+
+static struct uncore_event_desc nhmex_uncore_rbox_events[] = {
+ INTEL_UNCORE_EVENT_DESC(qpi0_flit_send, "event=0x0,iperf_cfg=0x80000000"),
+ INTEL_UNCORE_EVENT_DESC(qpi1_filt_send, "event=0x6,iperf_cfg=0x80000000"),
+ INTEL_UNCORE_EVENT_DESC(qpi0_idle_filt, "event=0x0,iperf_cfg=0x40000000"),
+ INTEL_UNCORE_EVENT_DESC(qpi1_idle_filt, "event=0x6,iperf_cfg=0x40000000"),
+ INTEL_UNCORE_EVENT_DESC(qpi0_date_response, "event=0x0,iperf_cfg=0xc4"),
+ INTEL_UNCORE_EVENT_DESC(qpi1_date_response, "event=0x6,iperf_cfg=0xc4"),
+ { /* end: all zeroes */ },
+};
+
+static struct intel_uncore_ops nhmex_uncore_rbox_ops = {
+ NHMEX_UNCORE_OPS_COMMON_INIT(),
+ .enable_event = nhmex_rbox_msr_enable_event,
+ .hw_config = nhmex_rbox_hw_config,
+ .get_constraint = nhmex_rbox_get_constraint,
+ .put_constraint = nhmex_rbox_put_constraint,
+};
+
+static struct intel_uncore_type nhmex_uncore_rbox = {
+ .name = "rbox",
+ .num_counters = 8,
+ .num_boxes = 2,
+ .perf_ctr_bits = 48,
+ .event_ctl = NHMEX_R_MSR_PMON_CTL0,
+ .perf_ctr = NHMEX_R_MSR_PMON_CNT0,
+ .event_mask = NHMEX_R_PMON_RAW_EVENT_MASK,
+ .box_ctl = NHMEX_R_MSR_GLOBAL_CTL,
+ .msr_offset = NHMEX_R_MSR_OFFSET,
+ .pair_ctr_ctl = 1,
+ .num_shared_regs = 20,
+ .event_descs = nhmex_uncore_rbox_events,
+ .ops = &nhmex_uncore_rbox_ops,
+ .format_group = &nhmex_uncore_rbox_format_group
+};
+
+static struct intel_uncore_type *nhmex_msr_uncores[] = {
+ &nhmex_uncore_ubox,
+ &nhmex_uncore_cbox,
+ &nhmex_uncore_bbox,
+ &nhmex_uncore_sbox,
+ &nhmex_uncore_mbox,
+ &nhmex_uncore_rbox,
+ &nhmex_uncore_wbox,
+ NULL,
+};
+
+void nhmex_uncore_cpu_init(void)
+{
+ if (boot_cpu_data.x86_model == 46)
+ uncore_nhmex = true;
+ else
+ nhmex_uncore_mbox.event_descs = wsmex_uncore_mbox_events;
+ if (nhmex_uncore_cbox.num_boxes > boot_cpu_data.x86_max_cores)
+ nhmex_uncore_cbox.num_boxes = boot_cpu_data.x86_max_cores;
+ uncore_msr_uncores = nhmex_msr_uncores;
+}
+/* end of Nehalem-EX uncore support */
diff --git a/arch/x86/events/intel/uncore_snb.c b/arch/x86/events/intel/uncore_snb.c
new file mode 100644
index 000000000..96531d2b8
--- /dev/null
+++ b/arch/x86/events/intel/uncore_snb.c
@@ -0,0 +1,731 @@
+/* Nehalem/SandBridge/Haswell uncore support */
+#include "uncore.h"
+
+/* Uncore IMC PCI IDs */
+#define PCI_DEVICE_ID_INTEL_SNB_IMC 0x0100
+#define PCI_DEVICE_ID_INTEL_IVB_IMC 0x0154
+#define PCI_DEVICE_ID_INTEL_IVB_E3_IMC 0x0150
+#define PCI_DEVICE_ID_INTEL_HSW_IMC 0x0c00
+#define PCI_DEVICE_ID_INTEL_HSW_U_IMC 0x0a04
+#define PCI_DEVICE_ID_INTEL_BDW_IMC 0x1604
+#define PCI_DEVICE_ID_INTEL_SKL_IMC 0x191f
+
+/* SNB event control */
+#define SNB_UNC_CTL_EV_SEL_MASK 0x000000ff
+#define SNB_UNC_CTL_UMASK_MASK 0x0000ff00
+#define SNB_UNC_CTL_EDGE_DET (1 << 18)
+#define SNB_UNC_CTL_EN (1 << 22)
+#define SNB_UNC_CTL_INVERT (1 << 23)
+#define SNB_UNC_CTL_CMASK_MASK 0x1f000000
+#define NHM_UNC_CTL_CMASK_MASK 0xff000000
+#define NHM_UNC_FIXED_CTR_CTL_EN (1 << 0)
+
+#define SNB_UNC_RAW_EVENT_MASK (SNB_UNC_CTL_EV_SEL_MASK | \
+ SNB_UNC_CTL_UMASK_MASK | \
+ SNB_UNC_CTL_EDGE_DET | \
+ SNB_UNC_CTL_INVERT | \
+ SNB_UNC_CTL_CMASK_MASK)
+
+#define NHM_UNC_RAW_EVENT_MASK (SNB_UNC_CTL_EV_SEL_MASK | \
+ SNB_UNC_CTL_UMASK_MASK | \
+ SNB_UNC_CTL_EDGE_DET | \
+ SNB_UNC_CTL_INVERT | \
+ NHM_UNC_CTL_CMASK_MASK)
+
+/* SNB global control register */
+#define SNB_UNC_PERF_GLOBAL_CTL 0x391
+#define SNB_UNC_FIXED_CTR_CTRL 0x394
+#define SNB_UNC_FIXED_CTR 0x395
+
+/* SNB uncore global control */
+#define SNB_UNC_GLOBAL_CTL_CORE_ALL ((1 << 4) - 1)
+#define SNB_UNC_GLOBAL_CTL_EN (1 << 29)
+
+/* SNB Cbo register */
+#define SNB_UNC_CBO_0_PERFEVTSEL0 0x700
+#define SNB_UNC_CBO_0_PER_CTR0 0x706
+#define SNB_UNC_CBO_MSR_OFFSET 0x10
+
+/* SNB ARB register */
+#define SNB_UNC_ARB_PER_CTR0 0x3b0
+#define SNB_UNC_ARB_PERFEVTSEL0 0x3b2
+#define SNB_UNC_ARB_MSR_OFFSET 0x10
+
+/* NHM global control register */
+#define NHM_UNC_PERF_GLOBAL_CTL 0x391
+#define NHM_UNC_FIXED_CTR 0x394
+#define NHM_UNC_FIXED_CTR_CTRL 0x395
+
+/* NHM uncore global control */
+#define NHM_UNC_GLOBAL_CTL_EN_PC_ALL ((1ULL << 8) - 1)
+#define NHM_UNC_GLOBAL_CTL_EN_FC (1ULL << 32)
+
+/* NHM uncore register */
+#define NHM_UNC_PERFEVTSEL0 0x3c0
+#define NHM_UNC_UNCORE_PMC0 0x3b0
+
+DEFINE_UNCORE_FORMAT_ATTR(event, event, "config:0-7");
+DEFINE_UNCORE_FORMAT_ATTR(umask, umask, "config:8-15");
+DEFINE_UNCORE_FORMAT_ATTR(edge, edge, "config:18");
+DEFINE_UNCORE_FORMAT_ATTR(inv, inv, "config:23");
+DEFINE_UNCORE_FORMAT_ATTR(cmask5, cmask, "config:24-28");
+DEFINE_UNCORE_FORMAT_ATTR(cmask8, cmask, "config:24-31");
+
+/* Sandy Bridge uncore support */
+static void snb_uncore_msr_enable_event(struct intel_uncore_box *box, struct perf_event *event)
+{
+ struct hw_perf_event *hwc = &event->hw;
+
+ if (hwc->idx < UNCORE_PMC_IDX_FIXED)
+ wrmsrl(hwc->config_base, hwc->config | SNB_UNC_CTL_EN);
+ else
+ wrmsrl(hwc->config_base, SNB_UNC_CTL_EN);
+}
+
+static void snb_uncore_msr_disable_event(struct intel_uncore_box *box, struct perf_event *event)
+{
+ wrmsrl(event->hw.config_base, 0);
+}
+
+static void snb_uncore_msr_init_box(struct intel_uncore_box *box)
+{
+ if (box->pmu->pmu_idx == 0) {
+ wrmsrl(SNB_UNC_PERF_GLOBAL_CTL,
+ SNB_UNC_GLOBAL_CTL_EN | SNB_UNC_GLOBAL_CTL_CORE_ALL);
+ }
+}
+
+static void snb_uncore_msr_exit_box(struct intel_uncore_box *box)
+{
+ if (box->pmu->pmu_idx == 0)
+ wrmsrl(SNB_UNC_PERF_GLOBAL_CTL, 0);
+}
+
+static struct uncore_event_desc snb_uncore_events[] = {
+ INTEL_UNCORE_EVENT_DESC(clockticks, "event=0xff,umask=0x00"),
+ { /* end: all zeroes */ },
+};
+
+static struct attribute *snb_uncore_formats_attr[] = {
+ &format_attr_event.attr,
+ &format_attr_umask.attr,
+ &format_attr_edge.attr,
+ &format_attr_inv.attr,
+ &format_attr_cmask5.attr,
+ NULL,
+};
+
+static struct attribute_group snb_uncore_format_group = {
+ .name = "format",
+ .attrs = snb_uncore_formats_attr,
+};
+
+static struct intel_uncore_ops snb_uncore_msr_ops = {
+ .init_box = snb_uncore_msr_init_box,
+ .exit_box = snb_uncore_msr_exit_box,
+ .disable_event = snb_uncore_msr_disable_event,
+ .enable_event = snb_uncore_msr_enable_event,
+ .read_counter = uncore_msr_read_counter,
+};
+
+static struct event_constraint snb_uncore_arb_constraints[] = {
+ UNCORE_EVENT_CONSTRAINT(0x80, 0x1),
+ UNCORE_EVENT_CONSTRAINT(0x83, 0x1),
+ EVENT_CONSTRAINT_END
+};
+
+static struct intel_uncore_type snb_uncore_cbox = {
+ .name = "cbox",
+ .num_counters = 2,
+ .num_boxes = 4,
+ .perf_ctr_bits = 44,
+ .fixed_ctr_bits = 48,
+ .perf_ctr = SNB_UNC_CBO_0_PER_CTR0,
+ .event_ctl = SNB_UNC_CBO_0_PERFEVTSEL0,
+ .fixed_ctr = SNB_UNC_FIXED_CTR,
+ .fixed_ctl = SNB_UNC_FIXED_CTR_CTRL,
+ .single_fixed = 1,
+ .event_mask = SNB_UNC_RAW_EVENT_MASK,
+ .msr_offset = SNB_UNC_CBO_MSR_OFFSET,
+ .ops = &snb_uncore_msr_ops,
+ .format_group = &snb_uncore_format_group,
+ .event_descs = snb_uncore_events,
+};
+
+static struct intel_uncore_type snb_uncore_arb = {
+ .name = "arb",
+ .num_counters = 2,
+ .num_boxes = 1,
+ .perf_ctr_bits = 44,
+ .perf_ctr = SNB_UNC_ARB_PER_CTR0,
+ .event_ctl = SNB_UNC_ARB_PERFEVTSEL0,
+ .event_mask = SNB_UNC_RAW_EVENT_MASK,
+ .msr_offset = SNB_UNC_ARB_MSR_OFFSET,
+ .constraints = snb_uncore_arb_constraints,
+ .ops = &snb_uncore_msr_ops,
+ .format_group = &snb_uncore_format_group,
+};
+
+static struct intel_uncore_type *snb_msr_uncores[] = {
+ &snb_uncore_cbox,
+ &snb_uncore_arb,
+ NULL,
+};
+
+void snb_uncore_cpu_init(void)
+{
+ uncore_msr_uncores = snb_msr_uncores;
+ if (snb_uncore_cbox.num_boxes > boot_cpu_data.x86_max_cores)
+ snb_uncore_cbox.num_boxes = boot_cpu_data.x86_max_cores;
+}
+
+enum {
+ SNB_PCI_UNCORE_IMC,
+};
+
+static struct uncore_event_desc snb_uncore_imc_events[] = {
+ INTEL_UNCORE_EVENT_DESC(data_reads, "event=0x01"),
+ INTEL_UNCORE_EVENT_DESC(data_reads.scale, "6.103515625e-5"),
+ INTEL_UNCORE_EVENT_DESC(data_reads.unit, "MiB"),
+
+ INTEL_UNCORE_EVENT_DESC(data_writes, "event=0x02"),
+ INTEL_UNCORE_EVENT_DESC(data_writes.scale, "6.103515625e-5"),
+ INTEL_UNCORE_EVENT_DESC(data_writes.unit, "MiB"),
+
+ { /* end: all zeroes */ },
+};
+
+#define SNB_UNCORE_PCI_IMC_EVENT_MASK 0xff
+#define SNB_UNCORE_PCI_IMC_BAR_OFFSET 0x48
+
+/* page size multiple covering all config regs */
+#define SNB_UNCORE_PCI_IMC_MAP_SIZE 0x6000
+
+#define SNB_UNCORE_PCI_IMC_DATA_READS 0x1
+#define SNB_UNCORE_PCI_IMC_DATA_READS_BASE 0x5050
+#define SNB_UNCORE_PCI_IMC_DATA_WRITES 0x2
+#define SNB_UNCORE_PCI_IMC_DATA_WRITES_BASE 0x5054
+#define SNB_UNCORE_PCI_IMC_CTR_BASE SNB_UNCORE_PCI_IMC_DATA_READS_BASE
+
+static struct attribute *snb_uncore_imc_formats_attr[] = {
+ &format_attr_event.attr,
+ NULL,
+};
+
+static struct attribute_group snb_uncore_imc_format_group = {
+ .name = "format",
+ .attrs = snb_uncore_imc_formats_attr,
+};
+
+static void snb_uncore_imc_init_box(struct intel_uncore_box *box)
+{
+ struct pci_dev *pdev = box->pci_dev;
+ int where = SNB_UNCORE_PCI_IMC_BAR_OFFSET;
+ resource_size_t addr;
+ u32 pci_dword;
+
+ pci_read_config_dword(pdev, where, &pci_dword);
+ addr = pci_dword;
+
+#ifdef CONFIG_PHYS_ADDR_T_64BIT
+ pci_read_config_dword(pdev, where + 4, &pci_dword);
+ addr |= ((resource_size_t)pci_dword << 32);
+#endif
+
+ addr &= ~(PAGE_SIZE - 1);
+
+ box->io_addr = ioremap(addr, SNB_UNCORE_PCI_IMC_MAP_SIZE);
+ box->hrtimer_duration = UNCORE_SNB_IMC_HRTIMER_INTERVAL;
+}
+
+static void snb_uncore_imc_exit_box(struct intel_uncore_box *box)
+{
+ iounmap(box->io_addr);
+}
+
+static void snb_uncore_imc_enable_box(struct intel_uncore_box *box)
+{}
+
+static void snb_uncore_imc_disable_box(struct intel_uncore_box *box)
+{}
+
+static void snb_uncore_imc_enable_event(struct intel_uncore_box *box, struct perf_event *event)
+{}
+
+static void snb_uncore_imc_disable_event(struct intel_uncore_box *box, struct perf_event *event)
+{}
+
+static u64 snb_uncore_imc_read_counter(struct intel_uncore_box *box, struct perf_event *event)
+{
+ struct hw_perf_event *hwc = &event->hw;
+
+ return (u64)*(unsigned int *)(box->io_addr + hwc->event_base);
+}
+
+/*
+ * custom event_init() function because we define our own fixed, free
+ * running counters, so we do not want to conflict with generic uncore
+ * logic. Also simplifies processing
+ */
+static int snb_uncore_imc_event_init(struct perf_event *event)
+{
+ struct intel_uncore_pmu *pmu;
+ struct intel_uncore_box *box;
+ struct hw_perf_event *hwc = &event->hw;
+ u64 cfg = event->attr.config & SNB_UNCORE_PCI_IMC_EVENT_MASK;
+ int idx, base;
+
+ if (event->attr.type != event->pmu->type)
+ return -ENOENT;
+
+ pmu = uncore_event_to_pmu(event);
+ /* no device found for this pmu */
+ if (pmu->func_id < 0)
+ return -ENOENT;
+
+ /* Sampling not supported yet */
+ if (hwc->sample_period)
+ return -EINVAL;
+
+ /* unsupported modes and filters */
+ if (event->attr.exclude_user ||
+ event->attr.exclude_kernel ||
+ event->attr.exclude_hv ||
+ event->attr.exclude_idle ||
+ event->attr.exclude_host ||
+ event->attr.exclude_guest ||
+ event->attr.sample_period) /* no sampling */
+ return -EINVAL;
+
+ /*
+ * Place all uncore events for a particular physical package
+ * onto a single cpu
+ */
+ if (event->cpu < 0)
+ return -EINVAL;
+
+ /* check only supported bits are set */
+ if (event->attr.config & ~SNB_UNCORE_PCI_IMC_EVENT_MASK)
+ return -EINVAL;
+
+ box = uncore_pmu_to_box(pmu, event->cpu);
+ if (!box || box->cpu < 0)
+ return -EINVAL;
+
+ event->cpu = box->cpu;
+ event->pmu_private = box;
+
+ event->hw.idx = -1;
+ event->hw.last_tag = ~0ULL;
+ event->hw.extra_reg.idx = EXTRA_REG_NONE;
+ event->hw.branch_reg.idx = EXTRA_REG_NONE;
+ /*
+ * check event is known (whitelist, determines counter)
+ */
+ switch (cfg) {
+ case SNB_UNCORE_PCI_IMC_DATA_READS:
+ base = SNB_UNCORE_PCI_IMC_DATA_READS_BASE;
+ idx = UNCORE_PMC_IDX_FIXED;
+ break;
+ case SNB_UNCORE_PCI_IMC_DATA_WRITES:
+ base = SNB_UNCORE_PCI_IMC_DATA_WRITES_BASE;
+ idx = UNCORE_PMC_IDX_FIXED + 1;
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ /* must be done before validate_group */
+ event->hw.event_base = base;
+ event->hw.config = cfg;
+ event->hw.idx = idx;
+
+ /* no group validation needed, we have free running counters */
+
+ return 0;
+}
+
+static int snb_uncore_imc_hw_config(struct intel_uncore_box *box, struct perf_event *event)
+{
+ return 0;
+}
+
+static void snb_uncore_imc_event_start(struct perf_event *event, int flags)
+{
+ struct intel_uncore_box *box = uncore_event_to_box(event);
+ u64 count;
+
+ if (WARN_ON_ONCE(!(event->hw.state & PERF_HES_STOPPED)))
+ return;
+
+ event->hw.state = 0;
+ box->n_active++;
+
+ list_add_tail(&event->active_entry, &box->active_list);
+
+ count = snb_uncore_imc_read_counter(box, event);
+ local64_set(&event->hw.prev_count, count);
+
+ if (box->n_active == 1)
+ uncore_pmu_start_hrtimer(box);
+}
+
+static void snb_uncore_imc_event_stop(struct perf_event *event, int flags)
+{
+ struct intel_uncore_box *box = uncore_event_to_box(event);
+ struct hw_perf_event *hwc = &event->hw;
+
+ if (!(hwc->state & PERF_HES_STOPPED)) {
+ box->n_active--;
+
+ WARN_ON_ONCE(hwc->state & PERF_HES_STOPPED);
+ hwc->state |= PERF_HES_STOPPED;
+
+ list_del(&event->active_entry);
+
+ if (box->n_active == 0)
+ uncore_pmu_cancel_hrtimer(box);
+ }
+
+ if ((flags & PERF_EF_UPDATE) && !(hwc->state & PERF_HES_UPTODATE)) {
+ /*
+ * Drain the remaining delta count out of a event
+ * that we are disabling:
+ */
+ uncore_perf_event_update(box, event);
+ hwc->state |= PERF_HES_UPTODATE;
+ }
+}
+
+static int snb_uncore_imc_event_add(struct perf_event *event, int flags)
+{
+ struct intel_uncore_box *box = uncore_event_to_box(event);
+ struct hw_perf_event *hwc = &event->hw;
+
+ if (!box)
+ return -ENODEV;
+
+ hwc->state = PERF_HES_UPTODATE | PERF_HES_STOPPED;
+ if (!(flags & PERF_EF_START))
+ hwc->state |= PERF_HES_ARCH;
+
+ snb_uncore_imc_event_start(event, 0);
+
+ box->n_events++;
+
+ return 0;
+}
+
+static void snb_uncore_imc_event_del(struct perf_event *event, int flags)
+{
+ struct intel_uncore_box *box = uncore_event_to_box(event);
+ int i;
+
+ snb_uncore_imc_event_stop(event, PERF_EF_UPDATE);
+
+ for (i = 0; i < box->n_events; i++) {
+ if (event == box->event_list[i]) {
+ --box->n_events;
+ break;
+ }
+ }
+}
+
+int snb_pci2phy_map_init(int devid)
+{
+ struct pci_dev *dev = NULL;
+ struct pci2phy_map *map;
+ int bus, segment;
+
+ dev = pci_get_device(PCI_VENDOR_ID_INTEL, devid, dev);
+ if (!dev)
+ return -ENOTTY;
+
+ bus = dev->bus->number;
+ segment = pci_domain_nr(dev->bus);
+
+ raw_spin_lock(&pci2phy_map_lock);
+ map = __find_pci2phy_map(segment);
+ if (!map) {
+ raw_spin_unlock(&pci2phy_map_lock);
+ pci_dev_put(dev);
+ return -ENOMEM;
+ }
+ map->pbus_to_physid[bus] = 0;
+ raw_spin_unlock(&pci2phy_map_lock);
+
+ pci_dev_put(dev);
+
+ return 0;
+}
+
+static struct pmu snb_uncore_imc_pmu = {
+ .task_ctx_nr = perf_invalid_context,
+ .event_init = snb_uncore_imc_event_init,
+ .add = snb_uncore_imc_event_add,
+ .del = snb_uncore_imc_event_del,
+ .start = snb_uncore_imc_event_start,
+ .stop = snb_uncore_imc_event_stop,
+ .read = uncore_pmu_event_read,
+};
+
+static struct intel_uncore_ops snb_uncore_imc_ops = {
+ .init_box = snb_uncore_imc_init_box,
+ .exit_box = snb_uncore_imc_exit_box,
+ .enable_box = snb_uncore_imc_enable_box,
+ .disable_box = snb_uncore_imc_disable_box,
+ .disable_event = snb_uncore_imc_disable_event,
+ .enable_event = snb_uncore_imc_enable_event,
+ .hw_config = snb_uncore_imc_hw_config,
+ .read_counter = snb_uncore_imc_read_counter,
+};
+
+static struct intel_uncore_type snb_uncore_imc = {
+ .name = "imc",
+ .num_counters = 2,
+ .num_boxes = 1,
+ .fixed_ctr_bits = 32,
+ .fixed_ctr = SNB_UNCORE_PCI_IMC_CTR_BASE,
+ .event_descs = snb_uncore_imc_events,
+ .format_group = &snb_uncore_imc_format_group,
+ .perf_ctr = SNB_UNCORE_PCI_IMC_DATA_READS_BASE,
+ .event_mask = SNB_UNCORE_PCI_IMC_EVENT_MASK,
+ .ops = &snb_uncore_imc_ops,
+ .pmu = &snb_uncore_imc_pmu,
+};
+
+static struct intel_uncore_type *snb_pci_uncores[] = {
+ [SNB_PCI_UNCORE_IMC] = &snb_uncore_imc,
+ NULL,
+};
+
+static const struct pci_device_id snb_uncore_pci_ids[] = {
+ { /* IMC */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_SNB_IMC),
+ .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
+ },
+ { /* end: all zeroes */ },
+};
+
+static const struct pci_device_id ivb_uncore_pci_ids[] = {
+ { /* IMC */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IVB_IMC),
+ .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
+ },
+ { /* IMC */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IVB_E3_IMC),
+ .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
+ },
+ { /* end: all zeroes */ },
+};
+
+static const struct pci_device_id hsw_uncore_pci_ids[] = {
+ { /* IMC */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_HSW_IMC),
+ .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
+ },
+ { /* IMC */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_HSW_U_IMC),
+ .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
+ },
+ { /* end: all zeroes */ },
+};
+
+static const struct pci_device_id bdw_uncore_pci_ids[] = {
+ { /* IMC */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_BDW_IMC),
+ .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
+ },
+ { /* end: all zeroes */ },
+};
+
+static const struct pci_device_id skl_uncore_pci_ids[] = {
+ { /* IMC */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_SKL_IMC),
+ .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
+ },
+ { /* end: all zeroes */ },
+};
+
+static struct pci_driver snb_uncore_pci_driver = {
+ .name = "snb_uncore",
+ .id_table = snb_uncore_pci_ids,
+};
+
+static struct pci_driver ivb_uncore_pci_driver = {
+ .name = "ivb_uncore",
+ .id_table = ivb_uncore_pci_ids,
+};
+
+static struct pci_driver hsw_uncore_pci_driver = {
+ .name = "hsw_uncore",
+ .id_table = hsw_uncore_pci_ids,
+};
+
+static struct pci_driver bdw_uncore_pci_driver = {
+ .name = "bdw_uncore",
+ .id_table = bdw_uncore_pci_ids,
+};
+
+static struct pci_driver skl_uncore_pci_driver = {
+ .name = "skl_uncore",
+ .id_table = skl_uncore_pci_ids,
+};
+
+struct imc_uncore_pci_dev {
+ __u32 pci_id;
+ struct pci_driver *driver;
+};
+#define IMC_DEV(a, d) \
+ { .pci_id = PCI_DEVICE_ID_INTEL_##a, .driver = (d) }
+
+static const struct imc_uncore_pci_dev desktop_imc_pci_ids[] = {
+ IMC_DEV(SNB_IMC, &snb_uncore_pci_driver),
+ IMC_DEV(IVB_IMC, &ivb_uncore_pci_driver), /* 3rd Gen Core processor */
+ IMC_DEV(IVB_E3_IMC, &ivb_uncore_pci_driver), /* Xeon E3-1200 v2/3rd Gen Core processor */
+ IMC_DEV(HSW_IMC, &hsw_uncore_pci_driver), /* 4th Gen Core Processor */
+ IMC_DEV(HSW_U_IMC, &hsw_uncore_pci_driver), /* 4th Gen Core ULT Mobile Processor */
+ IMC_DEV(BDW_IMC, &bdw_uncore_pci_driver), /* 5th Gen Core U */
+ IMC_DEV(SKL_IMC, &skl_uncore_pci_driver), /* 6th Gen Core */
+ { /* end marker */ }
+};
+
+
+#define for_each_imc_pci_id(x, t) \
+ for (x = (t); (x)->pci_id; x++)
+
+static struct pci_driver *imc_uncore_find_dev(void)
+{
+ const struct imc_uncore_pci_dev *p;
+ int ret;
+
+ for_each_imc_pci_id(p, desktop_imc_pci_ids) {
+ ret = snb_pci2phy_map_init(p->pci_id);
+ if (ret == 0)
+ return p->driver;
+ }
+ return NULL;
+}
+
+static int imc_uncore_pci_init(void)
+{
+ struct pci_driver *imc_drv = imc_uncore_find_dev();
+
+ if (!imc_drv)
+ return -ENODEV;
+
+ uncore_pci_uncores = snb_pci_uncores;
+ uncore_pci_driver = imc_drv;
+
+ return 0;
+}
+
+int snb_uncore_pci_init(void)
+{
+ return imc_uncore_pci_init();
+}
+
+int ivb_uncore_pci_init(void)
+{
+ return imc_uncore_pci_init();
+}
+int hsw_uncore_pci_init(void)
+{
+ return imc_uncore_pci_init();
+}
+
+int bdw_uncore_pci_init(void)
+{
+ return imc_uncore_pci_init();
+}
+
+int skl_uncore_pci_init(void)
+{
+ return imc_uncore_pci_init();
+}
+
+/* end of Sandy Bridge uncore support */
+
+/* Nehalem uncore support */
+static void nhm_uncore_msr_disable_box(struct intel_uncore_box *box)
+{
+ wrmsrl(NHM_UNC_PERF_GLOBAL_CTL, 0);
+}
+
+static void nhm_uncore_msr_enable_box(struct intel_uncore_box *box)
+{
+ wrmsrl(NHM_UNC_PERF_GLOBAL_CTL, NHM_UNC_GLOBAL_CTL_EN_PC_ALL | NHM_UNC_GLOBAL_CTL_EN_FC);
+}
+
+static void nhm_uncore_msr_enable_event(struct intel_uncore_box *box, struct perf_event *event)
+{
+ struct hw_perf_event *hwc = &event->hw;
+
+ if (hwc->idx < UNCORE_PMC_IDX_FIXED)
+ wrmsrl(hwc->config_base, hwc->config | SNB_UNC_CTL_EN);
+ else
+ wrmsrl(hwc->config_base, NHM_UNC_FIXED_CTR_CTL_EN);
+}
+
+static struct attribute *nhm_uncore_formats_attr[] = {
+ &format_attr_event.attr,
+ &format_attr_umask.attr,
+ &format_attr_edge.attr,
+ &format_attr_inv.attr,
+ &format_attr_cmask8.attr,
+ NULL,
+};
+
+static struct attribute_group nhm_uncore_format_group = {
+ .name = "format",
+ .attrs = nhm_uncore_formats_attr,
+};
+
+static struct uncore_event_desc nhm_uncore_events[] = {
+ INTEL_UNCORE_EVENT_DESC(clockticks, "event=0xff,umask=0x00"),
+ INTEL_UNCORE_EVENT_DESC(qmc_writes_full_any, "event=0x2f,umask=0x0f"),
+ INTEL_UNCORE_EVENT_DESC(qmc_normal_reads_any, "event=0x2c,umask=0x0f"),
+ INTEL_UNCORE_EVENT_DESC(qhl_request_ioh_reads, "event=0x20,umask=0x01"),
+ INTEL_UNCORE_EVENT_DESC(qhl_request_ioh_writes, "event=0x20,umask=0x02"),
+ INTEL_UNCORE_EVENT_DESC(qhl_request_remote_reads, "event=0x20,umask=0x04"),
+ INTEL_UNCORE_EVENT_DESC(qhl_request_remote_writes, "event=0x20,umask=0x08"),
+ INTEL_UNCORE_EVENT_DESC(qhl_request_local_reads, "event=0x20,umask=0x10"),
+ INTEL_UNCORE_EVENT_DESC(qhl_request_local_writes, "event=0x20,umask=0x20"),
+ { /* end: all zeroes */ },
+};
+
+static struct intel_uncore_ops nhm_uncore_msr_ops = {
+ .disable_box = nhm_uncore_msr_disable_box,
+ .enable_box = nhm_uncore_msr_enable_box,
+ .disable_event = snb_uncore_msr_disable_event,
+ .enable_event = nhm_uncore_msr_enable_event,
+ .read_counter = uncore_msr_read_counter,
+};
+
+static struct intel_uncore_type nhm_uncore = {
+ .name = "",
+ .num_counters = 8,
+ .num_boxes = 1,
+ .perf_ctr_bits = 48,
+ .fixed_ctr_bits = 48,
+ .event_ctl = NHM_UNC_PERFEVTSEL0,
+ .perf_ctr = NHM_UNC_UNCORE_PMC0,
+ .fixed_ctr = NHM_UNC_FIXED_CTR,
+ .fixed_ctl = NHM_UNC_FIXED_CTR_CTRL,
+ .event_mask = NHM_UNC_RAW_EVENT_MASK,
+ .event_descs = nhm_uncore_events,
+ .ops = &nhm_uncore_msr_ops,
+ .format_group = &nhm_uncore_format_group,
+};
+
+static struct intel_uncore_type *nhm_msr_uncores[] = {
+ &nhm_uncore,
+ NULL,
+};
+
+void nhm_uncore_cpu_init(void)
+{
+ uncore_msr_uncores = nhm_msr_uncores;
+}
+
+/* end of Nehalem uncore support */
diff --git a/arch/x86/events/intel/uncore_snbep.c b/arch/x86/events/intel/uncore_snbep.c
new file mode 100644
index 000000000..b2625867e
--- /dev/null
+++ b/arch/x86/events/intel/uncore_snbep.c
@@ -0,0 +1,3139 @@
+/* SandyBridge-EP/IvyTown uncore support */
+#include "uncore.h"
+
+/* SNB-EP Box level control */
+#define SNBEP_PMON_BOX_CTL_RST_CTRL (1 << 0)
+#define SNBEP_PMON_BOX_CTL_RST_CTRS (1 << 1)
+#define SNBEP_PMON_BOX_CTL_FRZ (1 << 8)
+#define SNBEP_PMON_BOX_CTL_FRZ_EN (1 << 16)
+#define SNBEP_PMON_BOX_CTL_INT (SNBEP_PMON_BOX_CTL_RST_CTRL | \
+ SNBEP_PMON_BOX_CTL_RST_CTRS | \
+ SNBEP_PMON_BOX_CTL_FRZ_EN)
+/* SNB-EP event control */
+#define SNBEP_PMON_CTL_EV_SEL_MASK 0x000000ff
+#define SNBEP_PMON_CTL_UMASK_MASK 0x0000ff00
+#define SNBEP_PMON_CTL_RST (1 << 17)
+#define SNBEP_PMON_CTL_EDGE_DET (1 << 18)
+#define SNBEP_PMON_CTL_EV_SEL_EXT (1 << 21)
+#define SNBEP_PMON_CTL_EN (1 << 22)
+#define SNBEP_PMON_CTL_INVERT (1 << 23)
+#define SNBEP_PMON_CTL_TRESH_MASK 0xff000000
+#define SNBEP_PMON_RAW_EVENT_MASK (SNBEP_PMON_CTL_EV_SEL_MASK | \
+ SNBEP_PMON_CTL_UMASK_MASK | \
+ SNBEP_PMON_CTL_EDGE_DET | \
+ SNBEP_PMON_CTL_INVERT | \
+ SNBEP_PMON_CTL_TRESH_MASK)
+
+/* SNB-EP Ubox event control */
+#define SNBEP_U_MSR_PMON_CTL_TRESH_MASK 0x1f000000
+#define SNBEP_U_MSR_PMON_RAW_EVENT_MASK \
+ (SNBEP_PMON_CTL_EV_SEL_MASK | \
+ SNBEP_PMON_CTL_UMASK_MASK | \
+ SNBEP_PMON_CTL_EDGE_DET | \
+ SNBEP_PMON_CTL_INVERT | \
+ SNBEP_U_MSR_PMON_CTL_TRESH_MASK)
+
+#define SNBEP_CBO_PMON_CTL_TID_EN (1 << 19)
+#define SNBEP_CBO_MSR_PMON_RAW_EVENT_MASK (SNBEP_PMON_RAW_EVENT_MASK | \
+ SNBEP_CBO_PMON_CTL_TID_EN)
+
+/* SNB-EP PCU event control */
+#define SNBEP_PCU_MSR_PMON_CTL_OCC_SEL_MASK 0x0000c000
+#define SNBEP_PCU_MSR_PMON_CTL_TRESH_MASK 0x1f000000
+#define SNBEP_PCU_MSR_PMON_CTL_OCC_INVERT (1 << 30)
+#define SNBEP_PCU_MSR_PMON_CTL_OCC_EDGE_DET (1 << 31)
+#define SNBEP_PCU_MSR_PMON_RAW_EVENT_MASK \
+ (SNBEP_PMON_CTL_EV_SEL_MASK | \
+ SNBEP_PCU_MSR_PMON_CTL_OCC_SEL_MASK | \
+ SNBEP_PMON_CTL_EDGE_DET | \
+ SNBEP_PMON_CTL_INVERT | \
+ SNBEP_PCU_MSR_PMON_CTL_TRESH_MASK | \
+ SNBEP_PCU_MSR_PMON_CTL_OCC_INVERT | \
+ SNBEP_PCU_MSR_PMON_CTL_OCC_EDGE_DET)
+
+#define SNBEP_QPI_PCI_PMON_RAW_EVENT_MASK \
+ (SNBEP_PMON_RAW_EVENT_MASK | \
+ SNBEP_PMON_CTL_EV_SEL_EXT)
+
+/* SNB-EP pci control register */
+#define SNBEP_PCI_PMON_BOX_CTL 0xf4
+#define SNBEP_PCI_PMON_CTL0 0xd8
+/* SNB-EP pci counter register */
+#define SNBEP_PCI_PMON_CTR0 0xa0
+
+/* SNB-EP home agent register */
+#define SNBEP_HA_PCI_PMON_BOX_ADDRMATCH0 0x40
+#define SNBEP_HA_PCI_PMON_BOX_ADDRMATCH1 0x44
+#define SNBEP_HA_PCI_PMON_BOX_OPCODEMATCH 0x48
+/* SNB-EP memory controller register */
+#define SNBEP_MC_CHy_PCI_PMON_FIXED_CTL 0xf0
+#define SNBEP_MC_CHy_PCI_PMON_FIXED_CTR 0xd0
+/* SNB-EP QPI register */
+#define SNBEP_Q_Py_PCI_PMON_PKT_MATCH0 0x228
+#define SNBEP_Q_Py_PCI_PMON_PKT_MATCH1 0x22c
+#define SNBEP_Q_Py_PCI_PMON_PKT_MASK0 0x238
+#define SNBEP_Q_Py_PCI_PMON_PKT_MASK1 0x23c
+
+/* SNB-EP Ubox register */
+#define SNBEP_U_MSR_PMON_CTR0 0xc16
+#define SNBEP_U_MSR_PMON_CTL0 0xc10
+
+#define SNBEP_U_MSR_PMON_UCLK_FIXED_CTL 0xc08
+#define SNBEP_U_MSR_PMON_UCLK_FIXED_CTR 0xc09
+
+/* SNB-EP Cbo register */
+#define SNBEP_C0_MSR_PMON_CTR0 0xd16
+#define SNBEP_C0_MSR_PMON_CTL0 0xd10
+#define SNBEP_C0_MSR_PMON_BOX_CTL 0xd04
+#define SNBEP_C0_MSR_PMON_BOX_FILTER 0xd14
+#define SNBEP_CBO_MSR_OFFSET 0x20
+
+#define SNBEP_CB0_MSR_PMON_BOX_FILTER_TID 0x1f
+#define SNBEP_CB0_MSR_PMON_BOX_FILTER_NID 0x3fc00
+#define SNBEP_CB0_MSR_PMON_BOX_FILTER_STATE 0x7c0000
+#define SNBEP_CB0_MSR_PMON_BOX_FILTER_OPC 0xff800000
+
+#define SNBEP_CBO_EVENT_EXTRA_REG(e, m, i) { \
+ .event = (e), \
+ .msr = SNBEP_C0_MSR_PMON_BOX_FILTER, \
+ .config_mask = (m), \
+ .idx = (i) \
+}
+
+/* SNB-EP PCU register */
+#define SNBEP_PCU_MSR_PMON_CTR0 0xc36
+#define SNBEP_PCU_MSR_PMON_CTL0 0xc30
+#define SNBEP_PCU_MSR_PMON_BOX_CTL 0xc24
+#define SNBEP_PCU_MSR_PMON_BOX_FILTER 0xc34
+#define SNBEP_PCU_MSR_PMON_BOX_FILTER_MASK 0xffffffff
+#define SNBEP_PCU_MSR_CORE_C3_CTR 0x3fc
+#define SNBEP_PCU_MSR_CORE_C6_CTR 0x3fd
+
+/* IVBEP event control */
+#define IVBEP_PMON_BOX_CTL_INT (SNBEP_PMON_BOX_CTL_RST_CTRL | \
+ SNBEP_PMON_BOX_CTL_RST_CTRS)
+#define IVBEP_PMON_RAW_EVENT_MASK (SNBEP_PMON_CTL_EV_SEL_MASK | \
+ SNBEP_PMON_CTL_UMASK_MASK | \
+ SNBEP_PMON_CTL_EDGE_DET | \
+ SNBEP_PMON_CTL_TRESH_MASK)
+/* IVBEP Ubox */
+#define IVBEP_U_MSR_PMON_GLOBAL_CTL 0xc00
+#define IVBEP_U_PMON_GLOBAL_FRZ_ALL (1 << 31)
+#define IVBEP_U_PMON_GLOBAL_UNFRZ_ALL (1 << 29)
+
+#define IVBEP_U_MSR_PMON_RAW_EVENT_MASK \
+ (SNBEP_PMON_CTL_EV_SEL_MASK | \
+ SNBEP_PMON_CTL_UMASK_MASK | \
+ SNBEP_PMON_CTL_EDGE_DET | \
+ SNBEP_U_MSR_PMON_CTL_TRESH_MASK)
+/* IVBEP Cbo */
+#define IVBEP_CBO_MSR_PMON_RAW_EVENT_MASK (IVBEP_PMON_RAW_EVENT_MASK | \
+ SNBEP_CBO_PMON_CTL_TID_EN)
+
+#define IVBEP_CB0_MSR_PMON_BOX_FILTER_TID (0x1fULL << 0)
+#define IVBEP_CB0_MSR_PMON_BOX_FILTER_LINK (0xfULL << 5)
+#define IVBEP_CB0_MSR_PMON_BOX_FILTER_STATE (0x3fULL << 17)
+#define IVBEP_CB0_MSR_PMON_BOX_FILTER_NID (0xffffULL << 32)
+#define IVBEP_CB0_MSR_PMON_BOX_FILTER_OPC (0x1ffULL << 52)
+#define IVBEP_CB0_MSR_PMON_BOX_FILTER_C6 (0x1ULL << 61)
+#define IVBEP_CB0_MSR_PMON_BOX_FILTER_NC (0x1ULL << 62)
+#define IVBEP_CB0_MSR_PMON_BOX_FILTER_ISOC (0x1ULL << 63)
+
+/* IVBEP home agent */
+#define IVBEP_HA_PCI_PMON_CTL_Q_OCC_RST (1 << 16)
+#define IVBEP_HA_PCI_PMON_RAW_EVENT_MASK \
+ (IVBEP_PMON_RAW_EVENT_MASK | \
+ IVBEP_HA_PCI_PMON_CTL_Q_OCC_RST)
+/* IVBEP PCU */
+#define IVBEP_PCU_MSR_PMON_RAW_EVENT_MASK \
+ (SNBEP_PMON_CTL_EV_SEL_MASK | \
+ SNBEP_PCU_MSR_PMON_CTL_OCC_SEL_MASK | \
+ SNBEP_PMON_CTL_EDGE_DET | \
+ SNBEP_PCU_MSR_PMON_CTL_TRESH_MASK | \
+ SNBEP_PCU_MSR_PMON_CTL_OCC_INVERT | \
+ SNBEP_PCU_MSR_PMON_CTL_OCC_EDGE_DET)
+/* IVBEP QPI */
+#define IVBEP_QPI_PCI_PMON_RAW_EVENT_MASK \
+ (IVBEP_PMON_RAW_EVENT_MASK | \
+ SNBEP_PMON_CTL_EV_SEL_EXT)
+
+#define __BITS_VALUE(x, i, n) ((typeof(x))(((x) >> ((i) * (n))) & \
+ ((1ULL << (n)) - 1)))
+
+/* Haswell-EP Ubox */
+#define HSWEP_U_MSR_PMON_CTR0 0x709
+#define HSWEP_U_MSR_PMON_CTL0 0x705
+#define HSWEP_U_MSR_PMON_FILTER 0x707
+
+#define HSWEP_U_MSR_PMON_UCLK_FIXED_CTL 0x703
+#define HSWEP_U_MSR_PMON_UCLK_FIXED_CTR 0x704
+
+#define HSWEP_U_MSR_PMON_BOX_FILTER_TID (0x1 << 0)
+#define HSWEP_U_MSR_PMON_BOX_FILTER_CID (0x1fULL << 1)
+#define HSWEP_U_MSR_PMON_BOX_FILTER_MASK \
+ (HSWEP_U_MSR_PMON_BOX_FILTER_TID | \
+ HSWEP_U_MSR_PMON_BOX_FILTER_CID)
+
+/* Haswell-EP CBo */
+#define HSWEP_C0_MSR_PMON_CTR0 0xe08
+#define HSWEP_C0_MSR_PMON_CTL0 0xe01
+#define HSWEP_C0_MSR_PMON_BOX_CTL 0xe00
+#define HSWEP_C0_MSR_PMON_BOX_FILTER0 0xe05
+#define HSWEP_CBO_MSR_OFFSET 0x10
+
+
+#define HSWEP_CB0_MSR_PMON_BOX_FILTER_TID (0x3fULL << 0)
+#define HSWEP_CB0_MSR_PMON_BOX_FILTER_LINK (0xfULL << 6)
+#define HSWEP_CB0_MSR_PMON_BOX_FILTER_STATE (0x7fULL << 17)
+#define HSWEP_CB0_MSR_PMON_BOX_FILTER_NID (0xffffULL << 32)
+#define HSWEP_CB0_MSR_PMON_BOX_FILTER_OPC (0x1ffULL << 52)
+#define HSWEP_CB0_MSR_PMON_BOX_FILTER_C6 (0x1ULL << 61)
+#define HSWEP_CB0_MSR_PMON_BOX_FILTER_NC (0x1ULL << 62)
+#define HSWEP_CB0_MSR_PMON_BOX_FILTER_ISOC (0x1ULL << 63)
+
+
+/* Haswell-EP Sbox */
+#define HSWEP_S0_MSR_PMON_CTR0 0x726
+#define HSWEP_S0_MSR_PMON_CTL0 0x721
+#define HSWEP_S0_MSR_PMON_BOX_CTL 0x720
+#define HSWEP_SBOX_MSR_OFFSET 0xa
+#define HSWEP_S_MSR_PMON_RAW_EVENT_MASK (SNBEP_PMON_RAW_EVENT_MASK | \
+ SNBEP_CBO_PMON_CTL_TID_EN)
+
+/* Haswell-EP PCU */
+#define HSWEP_PCU_MSR_PMON_CTR0 0x717
+#define HSWEP_PCU_MSR_PMON_CTL0 0x711
+#define HSWEP_PCU_MSR_PMON_BOX_CTL 0x710
+#define HSWEP_PCU_MSR_PMON_BOX_FILTER 0x715
+
+/* KNL Ubox */
+#define KNL_U_MSR_PMON_RAW_EVENT_MASK \
+ (SNBEP_U_MSR_PMON_RAW_EVENT_MASK | \
+ SNBEP_CBO_PMON_CTL_TID_EN)
+/* KNL CHA */
+#define KNL_CHA_MSR_OFFSET 0xc
+#define KNL_CHA_MSR_PMON_CTL_QOR (1 << 16)
+#define KNL_CHA_MSR_PMON_RAW_EVENT_MASK \
+ (SNBEP_CBO_MSR_PMON_RAW_EVENT_MASK | \
+ KNL_CHA_MSR_PMON_CTL_QOR)
+#define KNL_CHA_MSR_PMON_BOX_FILTER_TID 0x1ff
+#define KNL_CHA_MSR_PMON_BOX_FILTER_STATE (7 << 18)
+#define KNL_CHA_MSR_PMON_BOX_FILTER_OP (0xfffffe2aULL << 32)
+#define KNL_CHA_MSR_PMON_BOX_FILTER_REMOTE_NODE (0x1ULL << 32)
+#define KNL_CHA_MSR_PMON_BOX_FILTER_LOCAL_NODE (0x1ULL << 33)
+#define KNL_CHA_MSR_PMON_BOX_FILTER_NNC (0x1ULL << 37)
+
+/* KNL EDC/MC UCLK */
+#define KNL_UCLK_MSR_PMON_CTR0_LOW 0x400
+#define KNL_UCLK_MSR_PMON_CTL0 0x420
+#define KNL_UCLK_MSR_PMON_BOX_CTL 0x430
+#define KNL_UCLK_MSR_PMON_UCLK_FIXED_LOW 0x44c
+#define KNL_UCLK_MSR_PMON_UCLK_FIXED_CTL 0x454
+#define KNL_PMON_FIXED_CTL_EN 0x1
+
+/* KNL EDC */
+#define KNL_EDC0_ECLK_MSR_PMON_CTR0_LOW 0xa00
+#define KNL_EDC0_ECLK_MSR_PMON_CTL0 0xa20
+#define KNL_EDC0_ECLK_MSR_PMON_BOX_CTL 0xa30
+#define KNL_EDC0_ECLK_MSR_PMON_ECLK_FIXED_LOW 0xa3c
+#define KNL_EDC0_ECLK_MSR_PMON_ECLK_FIXED_CTL 0xa44
+
+/* KNL MC */
+#define KNL_MC0_CH0_MSR_PMON_CTR0_LOW 0xb00
+#define KNL_MC0_CH0_MSR_PMON_CTL0 0xb20
+#define KNL_MC0_CH0_MSR_PMON_BOX_CTL 0xb30
+#define KNL_MC0_CH0_MSR_PMON_FIXED_LOW 0xb3c
+#define KNL_MC0_CH0_MSR_PMON_FIXED_CTL 0xb44
+
+/* KNL IRP */
+#define KNL_IRP_PCI_PMON_BOX_CTL 0xf0
+#define KNL_IRP_PCI_PMON_RAW_EVENT_MASK (SNBEP_PMON_RAW_EVENT_MASK | \
+ KNL_CHA_MSR_PMON_CTL_QOR)
+/* KNL PCU */
+#define KNL_PCU_PMON_CTL_EV_SEL_MASK 0x0000007f
+#define KNL_PCU_PMON_CTL_USE_OCC_CTR (1 << 7)
+#define KNL_PCU_MSR_PMON_CTL_TRESH_MASK 0x3f000000
+#define KNL_PCU_MSR_PMON_RAW_EVENT_MASK \
+ (KNL_PCU_PMON_CTL_EV_SEL_MASK | \
+ KNL_PCU_PMON_CTL_USE_OCC_CTR | \
+ SNBEP_PCU_MSR_PMON_CTL_OCC_SEL_MASK | \
+ SNBEP_PMON_CTL_EDGE_DET | \
+ SNBEP_CBO_PMON_CTL_TID_EN | \
+ SNBEP_PMON_CTL_INVERT | \
+ KNL_PCU_MSR_PMON_CTL_TRESH_MASK | \
+ SNBEP_PCU_MSR_PMON_CTL_OCC_INVERT | \
+ SNBEP_PCU_MSR_PMON_CTL_OCC_EDGE_DET)
+
+DEFINE_UNCORE_FORMAT_ATTR(event, event, "config:0-7");
+DEFINE_UNCORE_FORMAT_ATTR(event2, event, "config:0-6");
+DEFINE_UNCORE_FORMAT_ATTR(event_ext, event, "config:0-7,21");
+DEFINE_UNCORE_FORMAT_ATTR(use_occ_ctr, use_occ_ctr, "config:7");
+DEFINE_UNCORE_FORMAT_ATTR(umask, umask, "config:8-15");
+DEFINE_UNCORE_FORMAT_ATTR(qor, qor, "config:16");
+DEFINE_UNCORE_FORMAT_ATTR(edge, edge, "config:18");
+DEFINE_UNCORE_FORMAT_ATTR(tid_en, tid_en, "config:19");
+DEFINE_UNCORE_FORMAT_ATTR(inv, inv, "config:23");
+DEFINE_UNCORE_FORMAT_ATTR(thresh8, thresh, "config:24-31");
+DEFINE_UNCORE_FORMAT_ATTR(thresh6, thresh, "config:24-29");
+DEFINE_UNCORE_FORMAT_ATTR(thresh5, thresh, "config:24-28");
+DEFINE_UNCORE_FORMAT_ATTR(occ_sel, occ_sel, "config:14-15");
+DEFINE_UNCORE_FORMAT_ATTR(occ_invert, occ_invert, "config:30");
+DEFINE_UNCORE_FORMAT_ATTR(occ_edge, occ_edge, "config:14-51");
+DEFINE_UNCORE_FORMAT_ATTR(occ_edge_det, occ_edge_det, "config:31");
+DEFINE_UNCORE_FORMAT_ATTR(filter_tid, filter_tid, "config1:0-4");
+DEFINE_UNCORE_FORMAT_ATTR(filter_tid2, filter_tid, "config1:0");
+DEFINE_UNCORE_FORMAT_ATTR(filter_tid3, filter_tid, "config1:0-5");
+DEFINE_UNCORE_FORMAT_ATTR(filter_tid4, filter_tid, "config1:0-8");
+DEFINE_UNCORE_FORMAT_ATTR(filter_cid, filter_cid, "config1:5");
+DEFINE_UNCORE_FORMAT_ATTR(filter_link, filter_link, "config1:5-8");
+DEFINE_UNCORE_FORMAT_ATTR(filter_link2, filter_link, "config1:6-8");
+DEFINE_UNCORE_FORMAT_ATTR(filter_link3, filter_link, "config1:12");
+DEFINE_UNCORE_FORMAT_ATTR(filter_nid, filter_nid, "config1:10-17");
+DEFINE_UNCORE_FORMAT_ATTR(filter_nid2, filter_nid, "config1:32-47");
+DEFINE_UNCORE_FORMAT_ATTR(filter_state, filter_state, "config1:18-22");
+DEFINE_UNCORE_FORMAT_ATTR(filter_state2, filter_state, "config1:17-22");
+DEFINE_UNCORE_FORMAT_ATTR(filter_state3, filter_state, "config1:17-23");
+DEFINE_UNCORE_FORMAT_ATTR(filter_state4, filter_state, "config1:18-20");
+DEFINE_UNCORE_FORMAT_ATTR(filter_local, filter_local, "config1:33");
+DEFINE_UNCORE_FORMAT_ATTR(filter_all_op, filter_all_op, "config1:35");
+DEFINE_UNCORE_FORMAT_ATTR(filter_nnm, filter_nnm, "config1:37");
+DEFINE_UNCORE_FORMAT_ATTR(filter_opc, filter_opc, "config1:23-31");
+DEFINE_UNCORE_FORMAT_ATTR(filter_opc2, filter_opc, "config1:52-60");
+DEFINE_UNCORE_FORMAT_ATTR(filter_opc3, filter_opc, "config1:41-60");
+DEFINE_UNCORE_FORMAT_ATTR(filter_nc, filter_nc, "config1:62");
+DEFINE_UNCORE_FORMAT_ATTR(filter_c6, filter_c6, "config1:61");
+DEFINE_UNCORE_FORMAT_ATTR(filter_isoc, filter_isoc, "config1:63");
+DEFINE_UNCORE_FORMAT_ATTR(filter_band0, filter_band0, "config1:0-7");
+DEFINE_UNCORE_FORMAT_ATTR(filter_band1, filter_band1, "config1:8-15");
+DEFINE_UNCORE_FORMAT_ATTR(filter_band2, filter_band2, "config1:16-23");
+DEFINE_UNCORE_FORMAT_ATTR(filter_band3, filter_band3, "config1:24-31");
+DEFINE_UNCORE_FORMAT_ATTR(match_rds, match_rds, "config1:48-51");
+DEFINE_UNCORE_FORMAT_ATTR(match_rnid30, match_rnid30, "config1:32-35");
+DEFINE_UNCORE_FORMAT_ATTR(match_rnid4, match_rnid4, "config1:31");
+DEFINE_UNCORE_FORMAT_ATTR(match_dnid, match_dnid, "config1:13-17");
+DEFINE_UNCORE_FORMAT_ATTR(match_mc, match_mc, "config1:9-12");
+DEFINE_UNCORE_FORMAT_ATTR(match_opc, match_opc, "config1:5-8");
+DEFINE_UNCORE_FORMAT_ATTR(match_vnw, match_vnw, "config1:3-4");
+DEFINE_UNCORE_FORMAT_ATTR(match0, match0, "config1:0-31");
+DEFINE_UNCORE_FORMAT_ATTR(match1, match1, "config1:32-63");
+DEFINE_UNCORE_FORMAT_ATTR(mask_rds, mask_rds, "config2:48-51");
+DEFINE_UNCORE_FORMAT_ATTR(mask_rnid30, mask_rnid30, "config2:32-35");
+DEFINE_UNCORE_FORMAT_ATTR(mask_rnid4, mask_rnid4, "config2:31");
+DEFINE_UNCORE_FORMAT_ATTR(mask_dnid, mask_dnid, "config2:13-17");
+DEFINE_UNCORE_FORMAT_ATTR(mask_mc, mask_mc, "config2:9-12");
+DEFINE_UNCORE_FORMAT_ATTR(mask_opc, mask_opc, "config2:5-8");
+DEFINE_UNCORE_FORMAT_ATTR(mask_vnw, mask_vnw, "config2:3-4");
+DEFINE_UNCORE_FORMAT_ATTR(mask0, mask0, "config2:0-31");
+DEFINE_UNCORE_FORMAT_ATTR(mask1, mask1, "config2:32-63");
+
+static void snbep_uncore_pci_disable_box(struct intel_uncore_box *box)
+{
+ struct pci_dev *pdev = box->pci_dev;
+ int box_ctl = uncore_pci_box_ctl(box);
+ u32 config = 0;
+
+ if (!pci_read_config_dword(pdev, box_ctl, &config)) {
+ config |= SNBEP_PMON_BOX_CTL_FRZ;
+ pci_write_config_dword(pdev, box_ctl, config);
+ }
+}
+
+static void snbep_uncore_pci_enable_box(struct intel_uncore_box *box)
+{
+ struct pci_dev *pdev = box->pci_dev;
+ int box_ctl = uncore_pci_box_ctl(box);
+ u32 config = 0;
+
+ if (!pci_read_config_dword(pdev, box_ctl, &config)) {
+ config &= ~SNBEP_PMON_BOX_CTL_FRZ;
+ pci_write_config_dword(pdev, box_ctl, config);
+ }
+}
+
+static void snbep_uncore_pci_enable_event(struct intel_uncore_box *box, struct perf_event *event)
+{
+ struct pci_dev *pdev = box->pci_dev;
+ struct hw_perf_event *hwc = &event->hw;
+
+ pci_write_config_dword(pdev, hwc->config_base, hwc->config | SNBEP_PMON_CTL_EN);
+}
+
+static void snbep_uncore_pci_disable_event(struct intel_uncore_box *box, struct perf_event *event)
+{
+ struct pci_dev *pdev = box->pci_dev;
+ struct hw_perf_event *hwc = &event->hw;
+
+ pci_write_config_dword(pdev, hwc->config_base, hwc->config);
+}
+
+static u64 snbep_uncore_pci_read_counter(struct intel_uncore_box *box, struct perf_event *event)
+{
+ struct pci_dev *pdev = box->pci_dev;
+ struct hw_perf_event *hwc = &event->hw;
+ u64 count = 0;
+
+ pci_read_config_dword(pdev, hwc->event_base, (u32 *)&count);
+ pci_read_config_dword(pdev, hwc->event_base + 4, (u32 *)&count + 1);
+
+ return count;
+}
+
+static void snbep_uncore_pci_init_box(struct intel_uncore_box *box)
+{
+ struct pci_dev *pdev = box->pci_dev;
+ int box_ctl = uncore_pci_box_ctl(box);
+
+ pci_write_config_dword(pdev, box_ctl, SNBEP_PMON_BOX_CTL_INT);
+}
+
+static void snbep_uncore_msr_disable_box(struct intel_uncore_box *box)
+{
+ u64 config;
+ unsigned msr;
+
+ msr = uncore_msr_box_ctl(box);
+ if (msr) {
+ rdmsrl(msr, config);
+ config |= SNBEP_PMON_BOX_CTL_FRZ;
+ wrmsrl(msr, config);
+ }
+}
+
+static void snbep_uncore_msr_enable_box(struct intel_uncore_box *box)
+{
+ u64 config;
+ unsigned msr;
+
+ msr = uncore_msr_box_ctl(box);
+ if (msr) {
+ rdmsrl(msr, config);
+ config &= ~SNBEP_PMON_BOX_CTL_FRZ;
+ wrmsrl(msr, config);
+ }
+}
+
+static void snbep_uncore_msr_enable_event(struct intel_uncore_box *box, struct perf_event *event)
+{
+ struct hw_perf_event *hwc = &event->hw;
+ struct hw_perf_event_extra *reg1 = &hwc->extra_reg;
+
+ if (reg1->idx != EXTRA_REG_NONE)
+ wrmsrl(reg1->reg, uncore_shared_reg_config(box, 0));
+
+ wrmsrl(hwc->config_base, hwc->config | SNBEP_PMON_CTL_EN);
+}
+
+static void snbep_uncore_msr_disable_event(struct intel_uncore_box *box,
+ struct perf_event *event)
+{
+ struct hw_perf_event *hwc = &event->hw;
+
+ wrmsrl(hwc->config_base, hwc->config);
+}
+
+static void snbep_uncore_msr_init_box(struct intel_uncore_box *box)
+{
+ unsigned msr = uncore_msr_box_ctl(box);
+
+ if (msr)
+ wrmsrl(msr, SNBEP_PMON_BOX_CTL_INT);
+}
+
+static struct attribute *snbep_uncore_formats_attr[] = {
+ &format_attr_event.attr,
+ &format_attr_umask.attr,
+ &format_attr_edge.attr,
+ &format_attr_inv.attr,
+ &format_attr_thresh8.attr,
+ NULL,
+};
+
+static struct attribute *snbep_uncore_ubox_formats_attr[] = {
+ &format_attr_event.attr,
+ &format_attr_umask.attr,
+ &format_attr_edge.attr,
+ &format_attr_inv.attr,
+ &format_attr_thresh5.attr,
+ NULL,
+};
+
+static struct attribute *snbep_uncore_cbox_formats_attr[] = {
+ &format_attr_event.attr,
+ &format_attr_umask.attr,
+ &format_attr_edge.attr,
+ &format_attr_tid_en.attr,
+ &format_attr_inv.attr,
+ &format_attr_thresh8.attr,
+ &format_attr_filter_tid.attr,
+ &format_attr_filter_nid.attr,
+ &format_attr_filter_state.attr,
+ &format_attr_filter_opc.attr,
+ NULL,
+};
+
+static struct attribute *snbep_uncore_pcu_formats_attr[] = {
+ &format_attr_event.attr,
+ &format_attr_occ_sel.attr,
+ &format_attr_edge.attr,
+ &format_attr_inv.attr,
+ &format_attr_thresh5.attr,
+ &format_attr_occ_invert.attr,
+ &format_attr_occ_edge.attr,
+ &format_attr_filter_band0.attr,
+ &format_attr_filter_band1.attr,
+ &format_attr_filter_band2.attr,
+ &format_attr_filter_band3.attr,
+ NULL,
+};
+
+static struct attribute *snbep_uncore_qpi_formats_attr[] = {
+ &format_attr_event_ext.attr,
+ &format_attr_umask.attr,
+ &format_attr_edge.attr,
+ &format_attr_inv.attr,
+ &format_attr_thresh8.attr,
+ &format_attr_match_rds.attr,
+ &format_attr_match_rnid30.attr,
+ &format_attr_match_rnid4.attr,
+ &format_attr_match_dnid.attr,
+ &format_attr_match_mc.attr,
+ &format_attr_match_opc.attr,
+ &format_attr_match_vnw.attr,
+ &format_attr_match0.attr,
+ &format_attr_match1.attr,
+ &format_attr_mask_rds.attr,
+ &format_attr_mask_rnid30.attr,
+ &format_attr_mask_rnid4.attr,
+ &format_attr_mask_dnid.attr,
+ &format_attr_mask_mc.attr,
+ &format_attr_mask_opc.attr,
+ &format_attr_mask_vnw.attr,
+ &format_attr_mask0.attr,
+ &format_attr_mask1.attr,
+ NULL,
+};
+
+static struct uncore_event_desc snbep_uncore_imc_events[] = {
+ INTEL_UNCORE_EVENT_DESC(clockticks, "event=0xff,umask=0x00"),
+ INTEL_UNCORE_EVENT_DESC(cas_count_read, "event=0x04,umask=0x03"),
+ INTEL_UNCORE_EVENT_DESC(cas_count_read.scale, "6.103515625e-5"),
+ INTEL_UNCORE_EVENT_DESC(cas_count_read.unit, "MiB"),
+ INTEL_UNCORE_EVENT_DESC(cas_count_write, "event=0x04,umask=0x0c"),
+ INTEL_UNCORE_EVENT_DESC(cas_count_write.scale, "6.103515625e-5"),
+ INTEL_UNCORE_EVENT_DESC(cas_count_write.unit, "MiB"),
+ { /* end: all zeroes */ },
+};
+
+static struct uncore_event_desc snbep_uncore_qpi_events[] = {
+ INTEL_UNCORE_EVENT_DESC(clockticks, "event=0x14"),
+ INTEL_UNCORE_EVENT_DESC(txl_flits_active, "event=0x00,umask=0x06"),
+ INTEL_UNCORE_EVENT_DESC(drs_data, "event=0x102,umask=0x08"),
+ INTEL_UNCORE_EVENT_DESC(ncb_data, "event=0x103,umask=0x04"),
+ { /* end: all zeroes */ },
+};
+
+static struct attribute_group snbep_uncore_format_group = {
+ .name = "format",
+ .attrs = snbep_uncore_formats_attr,
+};
+
+static struct attribute_group snbep_uncore_ubox_format_group = {
+ .name = "format",
+ .attrs = snbep_uncore_ubox_formats_attr,
+};
+
+static struct attribute_group snbep_uncore_cbox_format_group = {
+ .name = "format",
+ .attrs = snbep_uncore_cbox_formats_attr,
+};
+
+static struct attribute_group snbep_uncore_pcu_format_group = {
+ .name = "format",
+ .attrs = snbep_uncore_pcu_formats_attr,
+};
+
+static struct attribute_group snbep_uncore_qpi_format_group = {
+ .name = "format",
+ .attrs = snbep_uncore_qpi_formats_attr,
+};
+
+#define __SNBEP_UNCORE_MSR_OPS_COMMON_INIT() \
+ .disable_box = snbep_uncore_msr_disable_box, \
+ .enable_box = snbep_uncore_msr_enable_box, \
+ .disable_event = snbep_uncore_msr_disable_event, \
+ .enable_event = snbep_uncore_msr_enable_event, \
+ .read_counter = uncore_msr_read_counter
+
+#define SNBEP_UNCORE_MSR_OPS_COMMON_INIT() \
+ __SNBEP_UNCORE_MSR_OPS_COMMON_INIT(), \
+ .init_box = snbep_uncore_msr_init_box \
+
+static struct intel_uncore_ops snbep_uncore_msr_ops = {
+ SNBEP_UNCORE_MSR_OPS_COMMON_INIT(),
+};
+
+#define SNBEP_UNCORE_PCI_OPS_COMMON_INIT() \
+ .init_box = snbep_uncore_pci_init_box, \
+ .disable_box = snbep_uncore_pci_disable_box, \
+ .enable_box = snbep_uncore_pci_enable_box, \
+ .disable_event = snbep_uncore_pci_disable_event, \
+ .read_counter = snbep_uncore_pci_read_counter
+
+static struct intel_uncore_ops snbep_uncore_pci_ops = {
+ SNBEP_UNCORE_PCI_OPS_COMMON_INIT(),
+ .enable_event = snbep_uncore_pci_enable_event, \
+};
+
+static struct event_constraint snbep_uncore_cbox_constraints[] = {
+ UNCORE_EVENT_CONSTRAINT(0x01, 0x1),
+ UNCORE_EVENT_CONSTRAINT(0x02, 0x3),
+ UNCORE_EVENT_CONSTRAINT(0x04, 0x3),
+ UNCORE_EVENT_CONSTRAINT(0x05, 0x3),
+ UNCORE_EVENT_CONSTRAINT(0x07, 0x3),
+ UNCORE_EVENT_CONSTRAINT(0x09, 0x3),
+ UNCORE_EVENT_CONSTRAINT(0x11, 0x1),
+ UNCORE_EVENT_CONSTRAINT(0x12, 0x3),
+ UNCORE_EVENT_CONSTRAINT(0x13, 0x3),
+ UNCORE_EVENT_CONSTRAINT(0x1b, 0xc),
+ UNCORE_EVENT_CONSTRAINT(0x1c, 0xc),
+ UNCORE_EVENT_CONSTRAINT(0x1d, 0xc),
+ UNCORE_EVENT_CONSTRAINT(0x1e, 0xc),
+ EVENT_CONSTRAINT_OVERLAP(0x1f, 0xe, 0xff),
+ UNCORE_EVENT_CONSTRAINT(0x21, 0x3),
+ UNCORE_EVENT_CONSTRAINT(0x23, 0x3),
+ UNCORE_EVENT_CONSTRAINT(0x31, 0x3),
+ UNCORE_EVENT_CONSTRAINT(0x32, 0x3),
+ UNCORE_EVENT_CONSTRAINT(0x33, 0x3),
+ UNCORE_EVENT_CONSTRAINT(0x34, 0x3),
+ UNCORE_EVENT_CONSTRAINT(0x35, 0x3),
+ UNCORE_EVENT_CONSTRAINT(0x36, 0x1),
+ UNCORE_EVENT_CONSTRAINT(0x37, 0x3),
+ UNCORE_EVENT_CONSTRAINT(0x38, 0x3),
+ UNCORE_EVENT_CONSTRAINT(0x39, 0x3),
+ UNCORE_EVENT_CONSTRAINT(0x3b, 0x1),
+ EVENT_CONSTRAINT_END
+};
+
+static struct event_constraint snbep_uncore_r2pcie_constraints[] = {
+ UNCORE_EVENT_CONSTRAINT(0x10, 0x3),
+ UNCORE_EVENT_CONSTRAINT(0x11, 0x3),
+ UNCORE_EVENT_CONSTRAINT(0x12, 0x1),
+ UNCORE_EVENT_CONSTRAINT(0x23, 0x3),
+ UNCORE_EVENT_CONSTRAINT(0x24, 0x3),
+ UNCORE_EVENT_CONSTRAINT(0x25, 0x3),
+ UNCORE_EVENT_CONSTRAINT(0x26, 0x3),
+ UNCORE_EVENT_CONSTRAINT(0x32, 0x3),
+ UNCORE_EVENT_CONSTRAINT(0x33, 0x3),
+ UNCORE_EVENT_CONSTRAINT(0x34, 0x3),
+ EVENT_CONSTRAINT_END
+};
+
+static struct event_constraint snbep_uncore_r3qpi_constraints[] = {
+ UNCORE_EVENT_CONSTRAINT(0x10, 0x3),
+ UNCORE_EVENT_CONSTRAINT(0x11, 0x3),
+ UNCORE_EVENT_CONSTRAINT(0x12, 0x3),
+ UNCORE_EVENT_CONSTRAINT(0x13, 0x1),
+ UNCORE_EVENT_CONSTRAINT(0x20, 0x3),
+ UNCORE_EVENT_CONSTRAINT(0x21, 0x3),
+ UNCORE_EVENT_CONSTRAINT(0x22, 0x3),
+ UNCORE_EVENT_CONSTRAINT(0x23, 0x3),
+ UNCORE_EVENT_CONSTRAINT(0x24, 0x3),
+ UNCORE_EVENT_CONSTRAINT(0x25, 0x3),
+ UNCORE_EVENT_CONSTRAINT(0x26, 0x3),
+ UNCORE_EVENT_CONSTRAINT(0x28, 0x3),
+ UNCORE_EVENT_CONSTRAINT(0x29, 0x3),
+ UNCORE_EVENT_CONSTRAINT(0x2a, 0x3),
+ UNCORE_EVENT_CONSTRAINT(0x2b, 0x3),
+ UNCORE_EVENT_CONSTRAINT(0x2c, 0x3),
+ UNCORE_EVENT_CONSTRAINT(0x2d, 0x3),
+ UNCORE_EVENT_CONSTRAINT(0x2e, 0x3),
+ UNCORE_EVENT_CONSTRAINT(0x2f, 0x3),
+ UNCORE_EVENT_CONSTRAINT(0x30, 0x3),
+ UNCORE_EVENT_CONSTRAINT(0x31, 0x3),
+ UNCORE_EVENT_CONSTRAINT(0x32, 0x3),
+ UNCORE_EVENT_CONSTRAINT(0x33, 0x3),
+ UNCORE_EVENT_CONSTRAINT(0x34, 0x3),
+ UNCORE_EVENT_CONSTRAINT(0x36, 0x3),
+ UNCORE_EVENT_CONSTRAINT(0x37, 0x3),
+ UNCORE_EVENT_CONSTRAINT(0x38, 0x3),
+ UNCORE_EVENT_CONSTRAINT(0x39, 0x3),
+ EVENT_CONSTRAINT_END
+};
+
+static struct intel_uncore_type snbep_uncore_ubox = {
+ .name = "ubox",
+ .num_counters = 2,
+ .num_boxes = 1,
+ .perf_ctr_bits = 44,
+ .fixed_ctr_bits = 48,
+ .perf_ctr = SNBEP_U_MSR_PMON_CTR0,
+ .event_ctl = SNBEP_U_MSR_PMON_CTL0,
+ .event_mask = SNBEP_U_MSR_PMON_RAW_EVENT_MASK,
+ .fixed_ctr = SNBEP_U_MSR_PMON_UCLK_FIXED_CTR,
+ .fixed_ctl = SNBEP_U_MSR_PMON_UCLK_FIXED_CTL,
+ .ops = &snbep_uncore_msr_ops,
+ .format_group = &snbep_uncore_ubox_format_group,
+};
+
+static struct extra_reg snbep_uncore_cbox_extra_regs[] = {
+ SNBEP_CBO_EVENT_EXTRA_REG(SNBEP_CBO_PMON_CTL_TID_EN,
+ SNBEP_CBO_PMON_CTL_TID_EN, 0x1),
+ SNBEP_CBO_EVENT_EXTRA_REG(0x0334, 0xffff, 0x4),
+ SNBEP_CBO_EVENT_EXTRA_REG(0x4334, 0xffff, 0x6),
+ SNBEP_CBO_EVENT_EXTRA_REG(0x0534, 0xffff, 0x4),
+ SNBEP_CBO_EVENT_EXTRA_REG(0x4534, 0xffff, 0x6),
+ SNBEP_CBO_EVENT_EXTRA_REG(0x0934, 0xffff, 0x4),
+ SNBEP_CBO_EVENT_EXTRA_REG(0x4934, 0xffff, 0x6),
+ SNBEP_CBO_EVENT_EXTRA_REG(0x4134, 0xffff, 0x6),
+ SNBEP_CBO_EVENT_EXTRA_REG(0x0135, 0xffff, 0x8),
+ SNBEP_CBO_EVENT_EXTRA_REG(0x0335, 0xffff, 0x8),
+ SNBEP_CBO_EVENT_EXTRA_REG(0x4135, 0xffff, 0xa),
+ SNBEP_CBO_EVENT_EXTRA_REG(0x4335, 0xffff, 0xa),
+ SNBEP_CBO_EVENT_EXTRA_REG(0x4435, 0xffff, 0x2),
+ SNBEP_CBO_EVENT_EXTRA_REG(0x4835, 0xffff, 0x2),
+ SNBEP_CBO_EVENT_EXTRA_REG(0x4a35, 0xffff, 0x2),
+ SNBEP_CBO_EVENT_EXTRA_REG(0x5035, 0xffff, 0x2),
+ SNBEP_CBO_EVENT_EXTRA_REG(0x0136, 0xffff, 0x8),
+ SNBEP_CBO_EVENT_EXTRA_REG(0x0336, 0xffff, 0x8),
+ SNBEP_CBO_EVENT_EXTRA_REG(0x4136, 0xffff, 0xa),
+ SNBEP_CBO_EVENT_EXTRA_REG(0x4336, 0xffff, 0xa),
+ SNBEP_CBO_EVENT_EXTRA_REG(0x4436, 0xffff, 0x2),
+ SNBEP_CBO_EVENT_EXTRA_REG(0x4836, 0xffff, 0x2),
+ SNBEP_CBO_EVENT_EXTRA_REG(0x4a36, 0xffff, 0x2),
+ SNBEP_CBO_EVENT_EXTRA_REG(0x4037, 0x40ff, 0x2),
+ EVENT_EXTRA_END
+};
+
+static void snbep_cbox_put_constraint(struct intel_uncore_box *box, struct perf_event *event)
+{
+ struct hw_perf_event_extra *reg1 = &event->hw.extra_reg;
+ struct intel_uncore_extra_reg *er = &box->shared_regs[0];
+ int i;
+
+ if (uncore_box_is_fake(box))
+ return;
+
+ for (i = 0; i < 5; i++) {
+ if (reg1->alloc & (0x1 << i))
+ atomic_sub(1 << (i * 6), &er->ref);
+ }
+ reg1->alloc = 0;
+}
+
+static struct event_constraint *
+__snbep_cbox_get_constraint(struct intel_uncore_box *box, struct perf_event *event,
+ u64 (*cbox_filter_mask)(int fields))
+{
+ struct hw_perf_event_extra *reg1 = &event->hw.extra_reg;
+ struct intel_uncore_extra_reg *er = &box->shared_regs[0];
+ int i, alloc = 0;
+ unsigned long flags;
+ u64 mask;
+
+ if (reg1->idx == EXTRA_REG_NONE)
+ return NULL;
+
+ raw_spin_lock_irqsave(&er->lock, flags);
+ for (i = 0; i < 5; i++) {
+ if (!(reg1->idx & (0x1 << i)))
+ continue;
+ if (!uncore_box_is_fake(box) && (reg1->alloc & (0x1 << i)))
+ continue;
+
+ mask = cbox_filter_mask(0x1 << i);
+ if (!__BITS_VALUE(atomic_read(&er->ref), i, 6) ||
+ !((reg1->config ^ er->config) & mask)) {
+ atomic_add(1 << (i * 6), &er->ref);
+ er->config &= ~mask;
+ er->config |= reg1->config & mask;
+ alloc |= (0x1 << i);
+ } else {
+ break;
+ }
+ }
+ raw_spin_unlock_irqrestore(&er->lock, flags);
+ if (i < 5)
+ goto fail;
+
+ if (!uncore_box_is_fake(box))
+ reg1->alloc |= alloc;
+
+ return NULL;
+fail:
+ for (; i >= 0; i--) {
+ if (alloc & (0x1 << i))
+ atomic_sub(1 << (i * 6), &er->ref);
+ }
+ return &uncore_constraint_empty;
+}
+
+static u64 snbep_cbox_filter_mask(int fields)
+{
+ u64 mask = 0;
+
+ if (fields & 0x1)
+ mask |= SNBEP_CB0_MSR_PMON_BOX_FILTER_TID;
+ if (fields & 0x2)
+ mask |= SNBEP_CB0_MSR_PMON_BOX_FILTER_NID;
+ if (fields & 0x4)
+ mask |= SNBEP_CB0_MSR_PMON_BOX_FILTER_STATE;
+ if (fields & 0x8)
+ mask |= SNBEP_CB0_MSR_PMON_BOX_FILTER_OPC;
+
+ return mask;
+}
+
+static struct event_constraint *
+snbep_cbox_get_constraint(struct intel_uncore_box *box, struct perf_event *event)
+{
+ return __snbep_cbox_get_constraint(box, event, snbep_cbox_filter_mask);
+}
+
+static int snbep_cbox_hw_config(struct intel_uncore_box *box, struct perf_event *event)
+{
+ struct hw_perf_event_extra *reg1 = &event->hw.extra_reg;
+ struct extra_reg *er;
+ int idx = 0;
+
+ for (er = snbep_uncore_cbox_extra_regs; er->msr; er++) {
+ if (er->event != (event->hw.config & er->config_mask))
+ continue;
+ idx |= er->idx;
+ }
+
+ if (idx) {
+ reg1->reg = SNBEP_C0_MSR_PMON_BOX_FILTER +
+ SNBEP_CBO_MSR_OFFSET * box->pmu->pmu_idx;
+ reg1->config = event->attr.config1 & snbep_cbox_filter_mask(idx);
+ reg1->idx = idx;
+ }
+ return 0;
+}
+
+static struct intel_uncore_ops snbep_uncore_cbox_ops = {
+ SNBEP_UNCORE_MSR_OPS_COMMON_INIT(),
+ .hw_config = snbep_cbox_hw_config,
+ .get_constraint = snbep_cbox_get_constraint,
+ .put_constraint = snbep_cbox_put_constraint,
+};
+
+static struct intel_uncore_type snbep_uncore_cbox = {
+ .name = "cbox",
+ .num_counters = 4,
+ .num_boxes = 8,
+ .perf_ctr_bits = 44,
+ .event_ctl = SNBEP_C0_MSR_PMON_CTL0,
+ .perf_ctr = SNBEP_C0_MSR_PMON_CTR0,
+ .event_mask = SNBEP_CBO_MSR_PMON_RAW_EVENT_MASK,
+ .box_ctl = SNBEP_C0_MSR_PMON_BOX_CTL,
+ .msr_offset = SNBEP_CBO_MSR_OFFSET,
+ .num_shared_regs = 1,
+ .constraints = snbep_uncore_cbox_constraints,
+ .ops = &snbep_uncore_cbox_ops,
+ .format_group = &snbep_uncore_cbox_format_group,
+};
+
+static u64 snbep_pcu_alter_er(struct perf_event *event, int new_idx, bool modify)
+{
+ struct hw_perf_event *hwc = &event->hw;
+ struct hw_perf_event_extra *reg1 = &hwc->extra_reg;
+ u64 config = reg1->config;
+
+ if (new_idx > reg1->idx)
+ config <<= 8 * (new_idx - reg1->idx);
+ else
+ config >>= 8 * (reg1->idx - new_idx);
+
+ if (modify) {
+ hwc->config += new_idx - reg1->idx;
+ reg1->config = config;
+ reg1->idx = new_idx;
+ }
+ return config;
+}
+
+static struct event_constraint *
+snbep_pcu_get_constraint(struct intel_uncore_box *box, struct perf_event *event)
+{
+ struct hw_perf_event_extra *reg1 = &event->hw.extra_reg;
+ struct intel_uncore_extra_reg *er = &box->shared_regs[0];
+ unsigned long flags;
+ int idx = reg1->idx;
+ u64 mask, config1 = reg1->config;
+ bool ok = false;
+
+ if (reg1->idx == EXTRA_REG_NONE ||
+ (!uncore_box_is_fake(box) && reg1->alloc))
+ return NULL;
+again:
+ mask = 0xffULL << (idx * 8);
+ raw_spin_lock_irqsave(&er->lock, flags);
+ if (!__BITS_VALUE(atomic_read(&er->ref), idx, 8) ||
+ !((config1 ^ er->config) & mask)) {
+ atomic_add(1 << (idx * 8), &er->ref);
+ er->config &= ~mask;
+ er->config |= config1 & mask;
+ ok = true;
+ }
+ raw_spin_unlock_irqrestore(&er->lock, flags);
+
+ if (!ok) {
+ idx = (idx + 1) % 4;
+ if (idx != reg1->idx) {
+ config1 = snbep_pcu_alter_er(event, idx, false);
+ goto again;
+ }
+ return &uncore_constraint_empty;
+ }
+
+ if (!uncore_box_is_fake(box)) {
+ if (idx != reg1->idx)
+ snbep_pcu_alter_er(event, idx, true);
+ reg1->alloc = 1;
+ }
+ return NULL;
+}
+
+static void snbep_pcu_put_constraint(struct intel_uncore_box *box, struct perf_event *event)
+{
+ struct hw_perf_event_extra *reg1 = &event->hw.extra_reg;
+ struct intel_uncore_extra_reg *er = &box->shared_regs[0];
+
+ if (uncore_box_is_fake(box) || !reg1->alloc)
+ return;
+
+ atomic_sub(1 << (reg1->idx * 8), &er->ref);
+ reg1->alloc = 0;
+}
+
+static int snbep_pcu_hw_config(struct intel_uncore_box *box, struct perf_event *event)
+{
+ struct hw_perf_event *hwc = &event->hw;
+ struct hw_perf_event_extra *reg1 = &hwc->extra_reg;
+ int ev_sel = hwc->config & SNBEP_PMON_CTL_EV_SEL_MASK;
+
+ if (ev_sel >= 0xb && ev_sel <= 0xe) {
+ reg1->reg = SNBEP_PCU_MSR_PMON_BOX_FILTER;
+ reg1->idx = ev_sel - 0xb;
+ reg1->config = event->attr.config1 & (0xff << (reg1->idx * 8));
+ }
+ return 0;
+}
+
+static struct intel_uncore_ops snbep_uncore_pcu_ops = {
+ SNBEP_UNCORE_MSR_OPS_COMMON_INIT(),
+ .hw_config = snbep_pcu_hw_config,
+ .get_constraint = snbep_pcu_get_constraint,
+ .put_constraint = snbep_pcu_put_constraint,
+};
+
+static struct intel_uncore_type snbep_uncore_pcu = {
+ .name = "pcu",
+ .num_counters = 4,
+ .num_boxes = 1,
+ .perf_ctr_bits = 48,
+ .perf_ctr = SNBEP_PCU_MSR_PMON_CTR0,
+ .event_ctl = SNBEP_PCU_MSR_PMON_CTL0,
+ .event_mask = SNBEP_PCU_MSR_PMON_RAW_EVENT_MASK,
+ .box_ctl = SNBEP_PCU_MSR_PMON_BOX_CTL,
+ .num_shared_regs = 1,
+ .ops = &snbep_uncore_pcu_ops,
+ .format_group = &snbep_uncore_pcu_format_group,
+};
+
+static struct intel_uncore_type *snbep_msr_uncores[] = {
+ &snbep_uncore_ubox,
+ &snbep_uncore_cbox,
+ &snbep_uncore_pcu,
+ NULL,
+};
+
+void snbep_uncore_cpu_init(void)
+{
+ if (snbep_uncore_cbox.num_boxes > boot_cpu_data.x86_max_cores)
+ snbep_uncore_cbox.num_boxes = boot_cpu_data.x86_max_cores;
+ uncore_msr_uncores = snbep_msr_uncores;
+}
+
+enum {
+ SNBEP_PCI_QPI_PORT0_FILTER,
+ SNBEP_PCI_QPI_PORT1_FILTER,
+ HSWEP_PCI_PCU_3,
+};
+
+static int snbep_qpi_hw_config(struct intel_uncore_box *box, struct perf_event *event)
+{
+ struct hw_perf_event *hwc = &event->hw;
+ struct hw_perf_event_extra *reg1 = &hwc->extra_reg;
+ struct hw_perf_event_extra *reg2 = &hwc->branch_reg;
+
+ if ((hwc->config & SNBEP_PMON_CTL_EV_SEL_MASK) == 0x38) {
+ reg1->idx = 0;
+ reg1->reg = SNBEP_Q_Py_PCI_PMON_PKT_MATCH0;
+ reg1->config = event->attr.config1;
+ reg2->reg = SNBEP_Q_Py_PCI_PMON_PKT_MASK0;
+ reg2->config = event->attr.config2;
+ }
+ return 0;
+}
+
+static void snbep_qpi_enable_event(struct intel_uncore_box *box, struct perf_event *event)
+{
+ struct pci_dev *pdev = box->pci_dev;
+ struct hw_perf_event *hwc = &event->hw;
+ struct hw_perf_event_extra *reg1 = &hwc->extra_reg;
+ struct hw_perf_event_extra *reg2 = &hwc->branch_reg;
+
+ if (reg1->idx != EXTRA_REG_NONE) {
+ int idx = box->pmu->pmu_idx + SNBEP_PCI_QPI_PORT0_FILTER;
+ int pkg = topology_phys_to_logical_pkg(box->pci_phys_id);
+ struct pci_dev *filter_pdev = uncore_extra_pci_dev[pkg].dev[idx];
+
+ if (filter_pdev) {
+ pci_write_config_dword(filter_pdev, reg1->reg,
+ (u32)reg1->config);
+ pci_write_config_dword(filter_pdev, reg1->reg + 4,
+ (u32)(reg1->config >> 32));
+ pci_write_config_dword(filter_pdev, reg2->reg,
+ (u32)reg2->config);
+ pci_write_config_dword(filter_pdev, reg2->reg + 4,
+ (u32)(reg2->config >> 32));
+ }
+ }
+
+ pci_write_config_dword(pdev, hwc->config_base, hwc->config | SNBEP_PMON_CTL_EN);
+}
+
+static struct intel_uncore_ops snbep_uncore_qpi_ops = {
+ SNBEP_UNCORE_PCI_OPS_COMMON_INIT(),
+ .enable_event = snbep_qpi_enable_event,
+ .hw_config = snbep_qpi_hw_config,
+ .get_constraint = uncore_get_constraint,
+ .put_constraint = uncore_put_constraint,
+};
+
+#define SNBEP_UNCORE_PCI_COMMON_INIT() \
+ .perf_ctr = SNBEP_PCI_PMON_CTR0, \
+ .event_ctl = SNBEP_PCI_PMON_CTL0, \
+ .event_mask = SNBEP_PMON_RAW_EVENT_MASK, \
+ .box_ctl = SNBEP_PCI_PMON_BOX_CTL, \
+ .ops = &snbep_uncore_pci_ops, \
+ .format_group = &snbep_uncore_format_group
+
+static struct intel_uncore_type snbep_uncore_ha = {
+ .name = "ha",
+ .num_counters = 4,
+ .num_boxes = 1,
+ .perf_ctr_bits = 48,
+ SNBEP_UNCORE_PCI_COMMON_INIT(),
+};
+
+static struct intel_uncore_type snbep_uncore_imc = {
+ .name = "imc",
+ .num_counters = 4,
+ .num_boxes = 4,
+ .perf_ctr_bits = 48,
+ .fixed_ctr_bits = 48,
+ .fixed_ctr = SNBEP_MC_CHy_PCI_PMON_FIXED_CTR,
+ .fixed_ctl = SNBEP_MC_CHy_PCI_PMON_FIXED_CTL,
+ .event_descs = snbep_uncore_imc_events,
+ SNBEP_UNCORE_PCI_COMMON_INIT(),
+};
+
+static struct intel_uncore_type snbep_uncore_qpi = {
+ .name = "qpi",
+ .num_counters = 4,
+ .num_boxes = 2,
+ .perf_ctr_bits = 48,
+ .perf_ctr = SNBEP_PCI_PMON_CTR0,
+ .event_ctl = SNBEP_PCI_PMON_CTL0,
+ .event_mask = SNBEP_QPI_PCI_PMON_RAW_EVENT_MASK,
+ .box_ctl = SNBEP_PCI_PMON_BOX_CTL,
+ .num_shared_regs = 1,
+ .ops = &snbep_uncore_qpi_ops,
+ .event_descs = snbep_uncore_qpi_events,
+ .format_group = &snbep_uncore_qpi_format_group,
+};
+
+
+static struct intel_uncore_type snbep_uncore_r2pcie = {
+ .name = "r2pcie",
+ .num_counters = 4,
+ .num_boxes = 1,
+ .perf_ctr_bits = 44,
+ .constraints = snbep_uncore_r2pcie_constraints,
+ SNBEP_UNCORE_PCI_COMMON_INIT(),
+};
+
+static struct intel_uncore_type snbep_uncore_r3qpi = {
+ .name = "r3qpi",
+ .num_counters = 3,
+ .num_boxes = 2,
+ .perf_ctr_bits = 44,
+ .constraints = snbep_uncore_r3qpi_constraints,
+ SNBEP_UNCORE_PCI_COMMON_INIT(),
+};
+
+enum {
+ SNBEP_PCI_UNCORE_HA,
+ SNBEP_PCI_UNCORE_IMC,
+ SNBEP_PCI_UNCORE_QPI,
+ SNBEP_PCI_UNCORE_R2PCIE,
+ SNBEP_PCI_UNCORE_R3QPI,
+};
+
+static struct intel_uncore_type *snbep_pci_uncores[] = {
+ [SNBEP_PCI_UNCORE_HA] = &snbep_uncore_ha,
+ [SNBEP_PCI_UNCORE_IMC] = &snbep_uncore_imc,
+ [SNBEP_PCI_UNCORE_QPI] = &snbep_uncore_qpi,
+ [SNBEP_PCI_UNCORE_R2PCIE] = &snbep_uncore_r2pcie,
+ [SNBEP_PCI_UNCORE_R3QPI] = &snbep_uncore_r3qpi,
+ NULL,
+};
+
+static const struct pci_device_id snbep_uncore_pci_ids[] = {
+ { /* Home Agent */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_UNC_HA),
+ .driver_data = UNCORE_PCI_DEV_DATA(SNBEP_PCI_UNCORE_HA, 0),
+ },
+ { /* MC Channel 0 */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_UNC_IMC0),
+ .driver_data = UNCORE_PCI_DEV_DATA(SNBEP_PCI_UNCORE_IMC, 0),
+ },
+ { /* MC Channel 1 */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_UNC_IMC1),
+ .driver_data = UNCORE_PCI_DEV_DATA(SNBEP_PCI_UNCORE_IMC, 1),
+ },
+ { /* MC Channel 2 */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_UNC_IMC2),
+ .driver_data = UNCORE_PCI_DEV_DATA(SNBEP_PCI_UNCORE_IMC, 2),
+ },
+ { /* MC Channel 3 */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_UNC_IMC3),
+ .driver_data = UNCORE_PCI_DEV_DATA(SNBEP_PCI_UNCORE_IMC, 3),
+ },
+ { /* QPI Port 0 */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_UNC_QPI0),
+ .driver_data = UNCORE_PCI_DEV_DATA(SNBEP_PCI_UNCORE_QPI, 0),
+ },
+ { /* QPI Port 1 */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_UNC_QPI1),
+ .driver_data = UNCORE_PCI_DEV_DATA(SNBEP_PCI_UNCORE_QPI, 1),
+ },
+ { /* R2PCIe */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_UNC_R2PCIE),
+ .driver_data = UNCORE_PCI_DEV_DATA(SNBEP_PCI_UNCORE_R2PCIE, 0),
+ },
+ { /* R3QPI Link 0 */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_UNC_R3QPI0),
+ .driver_data = UNCORE_PCI_DEV_DATA(SNBEP_PCI_UNCORE_R3QPI, 0),
+ },
+ { /* R3QPI Link 1 */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_UNC_R3QPI1),
+ .driver_data = UNCORE_PCI_DEV_DATA(SNBEP_PCI_UNCORE_R3QPI, 1),
+ },
+ { /* QPI Port 0 filter */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x3c86),
+ .driver_data = UNCORE_PCI_DEV_DATA(UNCORE_EXTRA_PCI_DEV,
+ SNBEP_PCI_QPI_PORT0_FILTER),
+ },
+ { /* QPI Port 0 filter */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x3c96),
+ .driver_data = UNCORE_PCI_DEV_DATA(UNCORE_EXTRA_PCI_DEV,
+ SNBEP_PCI_QPI_PORT1_FILTER),
+ },
+ { /* end: all zeroes */ }
+};
+
+static struct pci_driver snbep_uncore_pci_driver = {
+ .name = "snbep_uncore",
+ .id_table = snbep_uncore_pci_ids,
+};
+
+/*
+ * build pci bus to socket mapping
+ */
+static int snbep_pci2phy_map_init(int devid)
+{
+ struct pci_dev *ubox_dev = NULL;
+ int i, bus, nodeid, segment;
+ struct pci2phy_map *map;
+ int err = 0;
+ u32 config = 0;
+
+ while (1) {
+ /* find the UBOX device */
+ ubox_dev = pci_get_device(PCI_VENDOR_ID_INTEL, devid, ubox_dev);
+ if (!ubox_dev)
+ break;
+ bus = ubox_dev->bus->number;
+ /* get the Node ID of the local register */
+ err = pci_read_config_dword(ubox_dev, 0x40, &config);
+ if (err)
+ break;
+ nodeid = config;
+ /* get the Node ID mapping */
+ err = pci_read_config_dword(ubox_dev, 0x54, &config);
+ if (err)
+ break;
+
+ segment = pci_domain_nr(ubox_dev->bus);
+ raw_spin_lock(&pci2phy_map_lock);
+ map = __find_pci2phy_map(segment);
+ if (!map) {
+ raw_spin_unlock(&pci2phy_map_lock);
+ err = -ENOMEM;
+ break;
+ }
+
+ /*
+ * every three bits in the Node ID mapping register maps
+ * to a particular node.
+ */
+ for (i = 0; i < 8; i++) {
+ if (nodeid == ((config >> (3 * i)) & 0x7)) {
+ map->pbus_to_physid[bus] = i;
+ break;
+ }
+ }
+ raw_spin_unlock(&pci2phy_map_lock);
+ }
+
+ if (!err) {
+ /*
+ * For PCI bus with no UBOX device, find the next bus
+ * that has UBOX device and use its mapping.
+ */
+ raw_spin_lock(&pci2phy_map_lock);
+ list_for_each_entry(map, &pci2phy_map_head, list) {
+ i = -1;
+ for (bus = 255; bus >= 0; bus--) {
+ if (map->pbus_to_physid[bus] >= 0)
+ i = map->pbus_to_physid[bus];
+ else
+ map->pbus_to_physid[bus] = i;
+ }
+ }
+ raw_spin_unlock(&pci2phy_map_lock);
+ }
+
+ pci_dev_put(ubox_dev);
+
+ return err ? pcibios_err_to_errno(err) : 0;
+}
+
+int snbep_uncore_pci_init(void)
+{
+ int ret = snbep_pci2phy_map_init(0x3ce0);
+ if (ret)
+ return ret;
+ uncore_pci_uncores = snbep_pci_uncores;
+ uncore_pci_driver = &snbep_uncore_pci_driver;
+ return 0;
+}
+/* end of Sandy Bridge-EP uncore support */
+
+/* IvyTown uncore support */
+static void ivbep_uncore_msr_init_box(struct intel_uncore_box *box)
+{
+ unsigned msr = uncore_msr_box_ctl(box);
+ if (msr)
+ wrmsrl(msr, IVBEP_PMON_BOX_CTL_INT);
+}
+
+static void ivbep_uncore_pci_init_box(struct intel_uncore_box *box)
+{
+ struct pci_dev *pdev = box->pci_dev;
+
+ pci_write_config_dword(pdev, SNBEP_PCI_PMON_BOX_CTL, IVBEP_PMON_BOX_CTL_INT);
+}
+
+#define IVBEP_UNCORE_MSR_OPS_COMMON_INIT() \
+ .init_box = ivbep_uncore_msr_init_box, \
+ .disable_box = snbep_uncore_msr_disable_box, \
+ .enable_box = snbep_uncore_msr_enable_box, \
+ .disable_event = snbep_uncore_msr_disable_event, \
+ .enable_event = snbep_uncore_msr_enable_event, \
+ .read_counter = uncore_msr_read_counter
+
+static struct intel_uncore_ops ivbep_uncore_msr_ops = {
+ IVBEP_UNCORE_MSR_OPS_COMMON_INIT(),
+};
+
+static struct intel_uncore_ops ivbep_uncore_pci_ops = {
+ .init_box = ivbep_uncore_pci_init_box,
+ .disable_box = snbep_uncore_pci_disable_box,
+ .enable_box = snbep_uncore_pci_enable_box,
+ .disable_event = snbep_uncore_pci_disable_event,
+ .enable_event = snbep_uncore_pci_enable_event,
+ .read_counter = snbep_uncore_pci_read_counter,
+};
+
+#define IVBEP_UNCORE_PCI_COMMON_INIT() \
+ .perf_ctr = SNBEP_PCI_PMON_CTR0, \
+ .event_ctl = SNBEP_PCI_PMON_CTL0, \
+ .event_mask = IVBEP_PMON_RAW_EVENT_MASK, \
+ .box_ctl = SNBEP_PCI_PMON_BOX_CTL, \
+ .ops = &ivbep_uncore_pci_ops, \
+ .format_group = &ivbep_uncore_format_group
+
+static struct attribute *ivbep_uncore_formats_attr[] = {
+ &format_attr_event.attr,
+ &format_attr_umask.attr,
+ &format_attr_edge.attr,
+ &format_attr_inv.attr,
+ &format_attr_thresh8.attr,
+ NULL,
+};
+
+static struct attribute *ivbep_uncore_ubox_formats_attr[] = {
+ &format_attr_event.attr,
+ &format_attr_umask.attr,
+ &format_attr_edge.attr,
+ &format_attr_inv.attr,
+ &format_attr_thresh5.attr,
+ NULL,
+};
+
+static struct attribute *ivbep_uncore_cbox_formats_attr[] = {
+ &format_attr_event.attr,
+ &format_attr_umask.attr,
+ &format_attr_edge.attr,
+ &format_attr_tid_en.attr,
+ &format_attr_thresh8.attr,
+ &format_attr_filter_tid.attr,
+ &format_attr_filter_link.attr,
+ &format_attr_filter_state2.attr,
+ &format_attr_filter_nid2.attr,
+ &format_attr_filter_opc2.attr,
+ &format_attr_filter_nc.attr,
+ &format_attr_filter_c6.attr,
+ &format_attr_filter_isoc.attr,
+ NULL,
+};
+
+static struct attribute *ivbep_uncore_pcu_formats_attr[] = {
+ &format_attr_event.attr,
+ &format_attr_occ_sel.attr,
+ &format_attr_edge.attr,
+ &format_attr_thresh5.attr,
+ &format_attr_occ_invert.attr,
+ &format_attr_occ_edge.attr,
+ &format_attr_filter_band0.attr,
+ &format_attr_filter_band1.attr,
+ &format_attr_filter_band2.attr,
+ &format_attr_filter_band3.attr,
+ NULL,
+};
+
+static struct attribute *ivbep_uncore_qpi_formats_attr[] = {
+ &format_attr_event_ext.attr,
+ &format_attr_umask.attr,
+ &format_attr_edge.attr,
+ &format_attr_thresh8.attr,
+ &format_attr_match_rds.attr,
+ &format_attr_match_rnid30.attr,
+ &format_attr_match_rnid4.attr,
+ &format_attr_match_dnid.attr,
+ &format_attr_match_mc.attr,
+ &format_attr_match_opc.attr,
+ &format_attr_match_vnw.attr,
+ &format_attr_match0.attr,
+ &format_attr_match1.attr,
+ &format_attr_mask_rds.attr,
+ &format_attr_mask_rnid30.attr,
+ &format_attr_mask_rnid4.attr,
+ &format_attr_mask_dnid.attr,
+ &format_attr_mask_mc.attr,
+ &format_attr_mask_opc.attr,
+ &format_attr_mask_vnw.attr,
+ &format_attr_mask0.attr,
+ &format_attr_mask1.attr,
+ NULL,
+};
+
+static struct attribute_group ivbep_uncore_format_group = {
+ .name = "format",
+ .attrs = ivbep_uncore_formats_attr,
+};
+
+static struct attribute_group ivbep_uncore_ubox_format_group = {
+ .name = "format",
+ .attrs = ivbep_uncore_ubox_formats_attr,
+};
+
+static struct attribute_group ivbep_uncore_cbox_format_group = {
+ .name = "format",
+ .attrs = ivbep_uncore_cbox_formats_attr,
+};
+
+static struct attribute_group ivbep_uncore_pcu_format_group = {
+ .name = "format",
+ .attrs = ivbep_uncore_pcu_formats_attr,
+};
+
+static struct attribute_group ivbep_uncore_qpi_format_group = {
+ .name = "format",
+ .attrs = ivbep_uncore_qpi_formats_attr,
+};
+
+static struct intel_uncore_type ivbep_uncore_ubox = {
+ .name = "ubox",
+ .num_counters = 2,
+ .num_boxes = 1,
+ .perf_ctr_bits = 44,
+ .fixed_ctr_bits = 48,
+ .perf_ctr = SNBEP_U_MSR_PMON_CTR0,
+ .event_ctl = SNBEP_U_MSR_PMON_CTL0,
+ .event_mask = IVBEP_U_MSR_PMON_RAW_EVENT_MASK,
+ .fixed_ctr = SNBEP_U_MSR_PMON_UCLK_FIXED_CTR,
+ .fixed_ctl = SNBEP_U_MSR_PMON_UCLK_FIXED_CTL,
+ .ops = &ivbep_uncore_msr_ops,
+ .format_group = &ivbep_uncore_ubox_format_group,
+};
+
+static struct extra_reg ivbep_uncore_cbox_extra_regs[] = {
+ SNBEP_CBO_EVENT_EXTRA_REG(SNBEP_CBO_PMON_CTL_TID_EN,
+ SNBEP_CBO_PMON_CTL_TID_EN, 0x1),
+ SNBEP_CBO_EVENT_EXTRA_REG(0x1031, 0x10ff, 0x2),
+ SNBEP_CBO_EVENT_EXTRA_REG(0x1134, 0xffff, 0x4),
+ SNBEP_CBO_EVENT_EXTRA_REG(0x4134, 0xffff, 0xc),
+ SNBEP_CBO_EVENT_EXTRA_REG(0x5134, 0xffff, 0xc),
+ SNBEP_CBO_EVENT_EXTRA_REG(0x0334, 0xffff, 0x4),
+ SNBEP_CBO_EVENT_EXTRA_REG(0x4334, 0xffff, 0xc),
+ SNBEP_CBO_EVENT_EXTRA_REG(0x0534, 0xffff, 0x4),
+ SNBEP_CBO_EVENT_EXTRA_REG(0x4534, 0xffff, 0xc),
+ SNBEP_CBO_EVENT_EXTRA_REG(0x0934, 0xffff, 0x4),
+ SNBEP_CBO_EVENT_EXTRA_REG(0x4934, 0xffff, 0xc),
+ SNBEP_CBO_EVENT_EXTRA_REG(0x0135, 0xffff, 0x10),
+ SNBEP_CBO_EVENT_EXTRA_REG(0x0335, 0xffff, 0x10),
+ SNBEP_CBO_EVENT_EXTRA_REG(0x2135, 0xffff, 0x10),
+ SNBEP_CBO_EVENT_EXTRA_REG(0x2335, 0xffff, 0x10),
+ SNBEP_CBO_EVENT_EXTRA_REG(0x4135, 0xffff, 0x18),
+ SNBEP_CBO_EVENT_EXTRA_REG(0x4335, 0xffff, 0x18),
+ SNBEP_CBO_EVENT_EXTRA_REG(0x4435, 0xffff, 0x8),
+ SNBEP_CBO_EVENT_EXTRA_REG(0x4835, 0xffff, 0x8),
+ SNBEP_CBO_EVENT_EXTRA_REG(0x4a35, 0xffff, 0x8),
+ SNBEP_CBO_EVENT_EXTRA_REG(0x5035, 0xffff, 0x8),
+ SNBEP_CBO_EVENT_EXTRA_REG(0x8135, 0xffff, 0x10),
+ SNBEP_CBO_EVENT_EXTRA_REG(0x8335, 0xffff, 0x10),
+ SNBEP_CBO_EVENT_EXTRA_REG(0x0136, 0xffff, 0x10),
+ SNBEP_CBO_EVENT_EXTRA_REG(0x0336, 0xffff, 0x10),
+ SNBEP_CBO_EVENT_EXTRA_REG(0x2136, 0xffff, 0x10),
+ SNBEP_CBO_EVENT_EXTRA_REG(0x2336, 0xffff, 0x10),
+ SNBEP_CBO_EVENT_EXTRA_REG(0x4136, 0xffff, 0x18),
+ SNBEP_CBO_EVENT_EXTRA_REG(0x4336, 0xffff, 0x18),
+ SNBEP_CBO_EVENT_EXTRA_REG(0x4436, 0xffff, 0x8),
+ SNBEP_CBO_EVENT_EXTRA_REG(0x4836, 0xffff, 0x8),
+ SNBEP_CBO_EVENT_EXTRA_REG(0x4a36, 0xffff, 0x8),
+ SNBEP_CBO_EVENT_EXTRA_REG(0x5036, 0xffff, 0x8),
+ SNBEP_CBO_EVENT_EXTRA_REG(0x8136, 0xffff, 0x10),
+ SNBEP_CBO_EVENT_EXTRA_REG(0x8336, 0xffff, 0x10),
+ SNBEP_CBO_EVENT_EXTRA_REG(0x4037, 0x40ff, 0x8),
+ EVENT_EXTRA_END
+};
+
+static u64 ivbep_cbox_filter_mask(int fields)
+{
+ u64 mask = 0;
+
+ if (fields & 0x1)
+ mask |= IVBEP_CB0_MSR_PMON_BOX_FILTER_TID;
+ if (fields & 0x2)
+ mask |= IVBEP_CB0_MSR_PMON_BOX_FILTER_LINK;
+ if (fields & 0x4)
+ mask |= IVBEP_CB0_MSR_PMON_BOX_FILTER_STATE;
+ if (fields & 0x8)
+ mask |= IVBEP_CB0_MSR_PMON_BOX_FILTER_NID;
+ if (fields & 0x10) {
+ mask |= IVBEP_CB0_MSR_PMON_BOX_FILTER_OPC;
+ mask |= IVBEP_CB0_MSR_PMON_BOX_FILTER_NC;
+ mask |= IVBEP_CB0_MSR_PMON_BOX_FILTER_C6;
+ mask |= IVBEP_CB0_MSR_PMON_BOX_FILTER_ISOC;
+ }
+
+ return mask;
+}
+
+static struct event_constraint *
+ivbep_cbox_get_constraint(struct intel_uncore_box *box, struct perf_event *event)
+{
+ return __snbep_cbox_get_constraint(box, event, ivbep_cbox_filter_mask);
+}
+
+static int ivbep_cbox_hw_config(struct intel_uncore_box *box, struct perf_event *event)
+{
+ struct hw_perf_event_extra *reg1 = &event->hw.extra_reg;
+ struct extra_reg *er;
+ int idx = 0;
+
+ for (er = ivbep_uncore_cbox_extra_regs; er->msr; er++) {
+ if (er->event != (event->hw.config & er->config_mask))
+ continue;
+ idx |= er->idx;
+ }
+
+ if (idx) {
+ reg1->reg = SNBEP_C0_MSR_PMON_BOX_FILTER +
+ SNBEP_CBO_MSR_OFFSET * box->pmu->pmu_idx;
+ reg1->config = event->attr.config1 & ivbep_cbox_filter_mask(idx);
+ reg1->idx = idx;
+ }
+ return 0;
+}
+
+static void ivbep_cbox_enable_event(struct intel_uncore_box *box, struct perf_event *event)
+{
+ struct hw_perf_event *hwc = &event->hw;
+ struct hw_perf_event_extra *reg1 = &hwc->extra_reg;
+
+ if (reg1->idx != EXTRA_REG_NONE) {
+ u64 filter = uncore_shared_reg_config(box, 0);
+ wrmsrl(reg1->reg, filter & 0xffffffff);
+ wrmsrl(reg1->reg + 6, filter >> 32);
+ }
+
+ wrmsrl(hwc->config_base, hwc->config | SNBEP_PMON_CTL_EN);
+}
+
+static struct intel_uncore_ops ivbep_uncore_cbox_ops = {
+ .init_box = ivbep_uncore_msr_init_box,
+ .disable_box = snbep_uncore_msr_disable_box,
+ .enable_box = snbep_uncore_msr_enable_box,
+ .disable_event = snbep_uncore_msr_disable_event,
+ .enable_event = ivbep_cbox_enable_event,
+ .read_counter = uncore_msr_read_counter,
+ .hw_config = ivbep_cbox_hw_config,
+ .get_constraint = ivbep_cbox_get_constraint,
+ .put_constraint = snbep_cbox_put_constraint,
+};
+
+static struct intel_uncore_type ivbep_uncore_cbox = {
+ .name = "cbox",
+ .num_counters = 4,
+ .num_boxes = 15,
+ .perf_ctr_bits = 44,
+ .event_ctl = SNBEP_C0_MSR_PMON_CTL0,
+ .perf_ctr = SNBEP_C0_MSR_PMON_CTR0,
+ .event_mask = IVBEP_CBO_MSR_PMON_RAW_EVENT_MASK,
+ .box_ctl = SNBEP_C0_MSR_PMON_BOX_CTL,
+ .msr_offset = SNBEP_CBO_MSR_OFFSET,
+ .num_shared_regs = 1,
+ .constraints = snbep_uncore_cbox_constraints,
+ .ops = &ivbep_uncore_cbox_ops,
+ .format_group = &ivbep_uncore_cbox_format_group,
+};
+
+static struct intel_uncore_ops ivbep_uncore_pcu_ops = {
+ IVBEP_UNCORE_MSR_OPS_COMMON_INIT(),
+ .hw_config = snbep_pcu_hw_config,
+ .get_constraint = snbep_pcu_get_constraint,
+ .put_constraint = snbep_pcu_put_constraint,
+};
+
+static struct intel_uncore_type ivbep_uncore_pcu = {
+ .name = "pcu",
+ .num_counters = 4,
+ .num_boxes = 1,
+ .perf_ctr_bits = 48,
+ .perf_ctr = SNBEP_PCU_MSR_PMON_CTR0,
+ .event_ctl = SNBEP_PCU_MSR_PMON_CTL0,
+ .event_mask = IVBEP_PCU_MSR_PMON_RAW_EVENT_MASK,
+ .box_ctl = SNBEP_PCU_MSR_PMON_BOX_CTL,
+ .num_shared_regs = 1,
+ .ops = &ivbep_uncore_pcu_ops,
+ .format_group = &ivbep_uncore_pcu_format_group,
+};
+
+static struct intel_uncore_type *ivbep_msr_uncores[] = {
+ &ivbep_uncore_ubox,
+ &ivbep_uncore_cbox,
+ &ivbep_uncore_pcu,
+ NULL,
+};
+
+void ivbep_uncore_cpu_init(void)
+{
+ if (ivbep_uncore_cbox.num_boxes > boot_cpu_data.x86_max_cores)
+ ivbep_uncore_cbox.num_boxes = boot_cpu_data.x86_max_cores;
+ uncore_msr_uncores = ivbep_msr_uncores;
+}
+
+static struct intel_uncore_type ivbep_uncore_ha = {
+ .name = "ha",
+ .num_counters = 4,
+ .num_boxes = 2,
+ .perf_ctr_bits = 48,
+ IVBEP_UNCORE_PCI_COMMON_INIT(),
+};
+
+static struct intel_uncore_type ivbep_uncore_imc = {
+ .name = "imc",
+ .num_counters = 4,
+ .num_boxes = 8,
+ .perf_ctr_bits = 48,
+ .fixed_ctr_bits = 48,
+ .fixed_ctr = SNBEP_MC_CHy_PCI_PMON_FIXED_CTR,
+ .fixed_ctl = SNBEP_MC_CHy_PCI_PMON_FIXED_CTL,
+ .event_descs = snbep_uncore_imc_events,
+ IVBEP_UNCORE_PCI_COMMON_INIT(),
+};
+
+/* registers in IRP boxes are not properly aligned */
+static unsigned ivbep_uncore_irp_ctls[] = {0xd8, 0xdc, 0xe0, 0xe4};
+static unsigned ivbep_uncore_irp_ctrs[] = {0xa0, 0xb0, 0xb8, 0xc0};
+
+static void ivbep_uncore_irp_enable_event(struct intel_uncore_box *box, struct perf_event *event)
+{
+ struct pci_dev *pdev = box->pci_dev;
+ struct hw_perf_event *hwc = &event->hw;
+
+ pci_write_config_dword(pdev, ivbep_uncore_irp_ctls[hwc->idx],
+ hwc->config | SNBEP_PMON_CTL_EN);
+}
+
+static void ivbep_uncore_irp_disable_event(struct intel_uncore_box *box, struct perf_event *event)
+{
+ struct pci_dev *pdev = box->pci_dev;
+ struct hw_perf_event *hwc = &event->hw;
+
+ pci_write_config_dword(pdev, ivbep_uncore_irp_ctls[hwc->idx], hwc->config);
+}
+
+static u64 ivbep_uncore_irp_read_counter(struct intel_uncore_box *box, struct perf_event *event)
+{
+ struct pci_dev *pdev = box->pci_dev;
+ struct hw_perf_event *hwc = &event->hw;
+ u64 count = 0;
+
+ pci_read_config_dword(pdev, ivbep_uncore_irp_ctrs[hwc->idx], (u32 *)&count);
+ pci_read_config_dword(pdev, ivbep_uncore_irp_ctrs[hwc->idx] + 4, (u32 *)&count + 1);
+
+ return count;
+}
+
+static struct intel_uncore_ops ivbep_uncore_irp_ops = {
+ .init_box = ivbep_uncore_pci_init_box,
+ .disable_box = snbep_uncore_pci_disable_box,
+ .enable_box = snbep_uncore_pci_enable_box,
+ .disable_event = ivbep_uncore_irp_disable_event,
+ .enable_event = ivbep_uncore_irp_enable_event,
+ .read_counter = ivbep_uncore_irp_read_counter,
+};
+
+static struct intel_uncore_type ivbep_uncore_irp = {
+ .name = "irp",
+ .num_counters = 4,
+ .num_boxes = 1,
+ .perf_ctr_bits = 48,
+ .event_mask = IVBEP_PMON_RAW_EVENT_MASK,
+ .box_ctl = SNBEP_PCI_PMON_BOX_CTL,
+ .ops = &ivbep_uncore_irp_ops,
+ .format_group = &ivbep_uncore_format_group,
+};
+
+static struct intel_uncore_ops ivbep_uncore_qpi_ops = {
+ .init_box = ivbep_uncore_pci_init_box,
+ .disable_box = snbep_uncore_pci_disable_box,
+ .enable_box = snbep_uncore_pci_enable_box,
+ .disable_event = snbep_uncore_pci_disable_event,
+ .enable_event = snbep_qpi_enable_event,
+ .read_counter = snbep_uncore_pci_read_counter,
+ .hw_config = snbep_qpi_hw_config,
+ .get_constraint = uncore_get_constraint,
+ .put_constraint = uncore_put_constraint,
+};
+
+static struct intel_uncore_type ivbep_uncore_qpi = {
+ .name = "qpi",
+ .num_counters = 4,
+ .num_boxes = 3,
+ .perf_ctr_bits = 48,
+ .perf_ctr = SNBEP_PCI_PMON_CTR0,
+ .event_ctl = SNBEP_PCI_PMON_CTL0,
+ .event_mask = IVBEP_QPI_PCI_PMON_RAW_EVENT_MASK,
+ .box_ctl = SNBEP_PCI_PMON_BOX_CTL,
+ .num_shared_regs = 1,
+ .ops = &ivbep_uncore_qpi_ops,
+ .format_group = &ivbep_uncore_qpi_format_group,
+};
+
+static struct intel_uncore_type ivbep_uncore_r2pcie = {
+ .name = "r2pcie",
+ .num_counters = 4,
+ .num_boxes = 1,
+ .perf_ctr_bits = 44,
+ .constraints = snbep_uncore_r2pcie_constraints,
+ IVBEP_UNCORE_PCI_COMMON_INIT(),
+};
+
+static struct intel_uncore_type ivbep_uncore_r3qpi = {
+ .name = "r3qpi",
+ .num_counters = 3,
+ .num_boxes = 2,
+ .perf_ctr_bits = 44,
+ .constraints = snbep_uncore_r3qpi_constraints,
+ IVBEP_UNCORE_PCI_COMMON_INIT(),
+};
+
+enum {
+ IVBEP_PCI_UNCORE_HA,
+ IVBEP_PCI_UNCORE_IMC,
+ IVBEP_PCI_UNCORE_IRP,
+ IVBEP_PCI_UNCORE_QPI,
+ IVBEP_PCI_UNCORE_R2PCIE,
+ IVBEP_PCI_UNCORE_R3QPI,
+};
+
+static struct intel_uncore_type *ivbep_pci_uncores[] = {
+ [IVBEP_PCI_UNCORE_HA] = &ivbep_uncore_ha,
+ [IVBEP_PCI_UNCORE_IMC] = &ivbep_uncore_imc,
+ [IVBEP_PCI_UNCORE_IRP] = &ivbep_uncore_irp,
+ [IVBEP_PCI_UNCORE_QPI] = &ivbep_uncore_qpi,
+ [IVBEP_PCI_UNCORE_R2PCIE] = &ivbep_uncore_r2pcie,
+ [IVBEP_PCI_UNCORE_R3QPI] = &ivbep_uncore_r3qpi,
+ NULL,
+};
+
+static const struct pci_device_id ivbep_uncore_pci_ids[] = {
+ { /* Home Agent 0 */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xe30),
+ .driver_data = UNCORE_PCI_DEV_DATA(IVBEP_PCI_UNCORE_HA, 0),
+ },
+ { /* Home Agent 1 */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xe38),
+ .driver_data = UNCORE_PCI_DEV_DATA(IVBEP_PCI_UNCORE_HA, 1),
+ },
+ { /* MC0 Channel 0 */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xeb4),
+ .driver_data = UNCORE_PCI_DEV_DATA(IVBEP_PCI_UNCORE_IMC, 0),
+ },
+ { /* MC0 Channel 1 */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xeb5),
+ .driver_data = UNCORE_PCI_DEV_DATA(IVBEP_PCI_UNCORE_IMC, 1),
+ },
+ { /* MC0 Channel 3 */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xeb0),
+ .driver_data = UNCORE_PCI_DEV_DATA(IVBEP_PCI_UNCORE_IMC, 2),
+ },
+ { /* MC0 Channel 4 */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xeb1),
+ .driver_data = UNCORE_PCI_DEV_DATA(IVBEP_PCI_UNCORE_IMC, 3),
+ },
+ { /* MC1 Channel 0 */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xef4),
+ .driver_data = UNCORE_PCI_DEV_DATA(IVBEP_PCI_UNCORE_IMC, 4),
+ },
+ { /* MC1 Channel 1 */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xef5),
+ .driver_data = UNCORE_PCI_DEV_DATA(IVBEP_PCI_UNCORE_IMC, 5),
+ },
+ { /* MC1 Channel 3 */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xef0),
+ .driver_data = UNCORE_PCI_DEV_DATA(IVBEP_PCI_UNCORE_IMC, 6),
+ },
+ { /* MC1 Channel 4 */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xef1),
+ .driver_data = UNCORE_PCI_DEV_DATA(IVBEP_PCI_UNCORE_IMC, 7),
+ },
+ { /* IRP */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xe39),
+ .driver_data = UNCORE_PCI_DEV_DATA(IVBEP_PCI_UNCORE_IRP, 0),
+ },
+ { /* QPI0 Port 0 */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xe32),
+ .driver_data = UNCORE_PCI_DEV_DATA(IVBEP_PCI_UNCORE_QPI, 0),
+ },
+ { /* QPI0 Port 1 */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xe33),
+ .driver_data = UNCORE_PCI_DEV_DATA(IVBEP_PCI_UNCORE_QPI, 1),
+ },
+ { /* QPI1 Port 2 */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xe3a),
+ .driver_data = UNCORE_PCI_DEV_DATA(IVBEP_PCI_UNCORE_QPI, 2),
+ },
+ { /* R2PCIe */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xe34),
+ .driver_data = UNCORE_PCI_DEV_DATA(IVBEP_PCI_UNCORE_R2PCIE, 0),
+ },
+ { /* R3QPI0 Link 0 */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xe36),
+ .driver_data = UNCORE_PCI_DEV_DATA(IVBEP_PCI_UNCORE_R3QPI, 0),
+ },
+ { /* R3QPI0 Link 1 */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xe37),
+ .driver_data = UNCORE_PCI_DEV_DATA(IVBEP_PCI_UNCORE_R3QPI, 1),
+ },
+ { /* R3QPI1 Link 2 */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xe3e),
+ .driver_data = UNCORE_PCI_DEV_DATA(IVBEP_PCI_UNCORE_R3QPI, 2),
+ },
+ { /* QPI Port 0 filter */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xe86),
+ .driver_data = UNCORE_PCI_DEV_DATA(UNCORE_EXTRA_PCI_DEV,
+ SNBEP_PCI_QPI_PORT0_FILTER),
+ },
+ { /* QPI Port 0 filter */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xe96),
+ .driver_data = UNCORE_PCI_DEV_DATA(UNCORE_EXTRA_PCI_DEV,
+ SNBEP_PCI_QPI_PORT1_FILTER),
+ },
+ { /* end: all zeroes */ }
+};
+
+static struct pci_driver ivbep_uncore_pci_driver = {
+ .name = "ivbep_uncore",
+ .id_table = ivbep_uncore_pci_ids,
+};
+
+int ivbep_uncore_pci_init(void)
+{
+ int ret = snbep_pci2phy_map_init(0x0e1e);
+ if (ret)
+ return ret;
+ uncore_pci_uncores = ivbep_pci_uncores;
+ uncore_pci_driver = &ivbep_uncore_pci_driver;
+ return 0;
+}
+/* end of IvyTown uncore support */
+
+/* KNL uncore support */
+static struct attribute *knl_uncore_ubox_formats_attr[] = {
+ &format_attr_event.attr,
+ &format_attr_umask.attr,
+ &format_attr_edge.attr,
+ &format_attr_tid_en.attr,
+ &format_attr_inv.attr,
+ &format_attr_thresh5.attr,
+ NULL,
+};
+
+static struct attribute_group knl_uncore_ubox_format_group = {
+ .name = "format",
+ .attrs = knl_uncore_ubox_formats_attr,
+};
+
+static struct intel_uncore_type knl_uncore_ubox = {
+ .name = "ubox",
+ .num_counters = 2,
+ .num_boxes = 1,
+ .perf_ctr_bits = 48,
+ .fixed_ctr_bits = 48,
+ .perf_ctr = HSWEP_U_MSR_PMON_CTR0,
+ .event_ctl = HSWEP_U_MSR_PMON_CTL0,
+ .event_mask = KNL_U_MSR_PMON_RAW_EVENT_MASK,
+ .fixed_ctr = HSWEP_U_MSR_PMON_UCLK_FIXED_CTR,
+ .fixed_ctl = HSWEP_U_MSR_PMON_UCLK_FIXED_CTL,
+ .ops = &snbep_uncore_msr_ops,
+ .format_group = &knl_uncore_ubox_format_group,
+};
+
+static struct attribute *knl_uncore_cha_formats_attr[] = {
+ &format_attr_event.attr,
+ &format_attr_umask.attr,
+ &format_attr_qor.attr,
+ &format_attr_edge.attr,
+ &format_attr_tid_en.attr,
+ &format_attr_inv.attr,
+ &format_attr_thresh8.attr,
+ &format_attr_filter_tid4.attr,
+ &format_attr_filter_link3.attr,
+ &format_attr_filter_state4.attr,
+ &format_attr_filter_local.attr,
+ &format_attr_filter_all_op.attr,
+ &format_attr_filter_nnm.attr,
+ &format_attr_filter_opc3.attr,
+ &format_attr_filter_nc.attr,
+ &format_attr_filter_isoc.attr,
+ NULL,
+};
+
+static struct attribute_group knl_uncore_cha_format_group = {
+ .name = "format",
+ .attrs = knl_uncore_cha_formats_attr,
+};
+
+static struct event_constraint knl_uncore_cha_constraints[] = {
+ UNCORE_EVENT_CONSTRAINT(0x11, 0x1),
+ UNCORE_EVENT_CONSTRAINT(0x1f, 0x1),
+ UNCORE_EVENT_CONSTRAINT(0x36, 0x1),
+ EVENT_CONSTRAINT_END
+};
+
+static struct extra_reg knl_uncore_cha_extra_regs[] = {
+ SNBEP_CBO_EVENT_EXTRA_REG(SNBEP_CBO_PMON_CTL_TID_EN,
+ SNBEP_CBO_PMON_CTL_TID_EN, 0x1),
+ SNBEP_CBO_EVENT_EXTRA_REG(0x3d, 0xff, 0x2),
+ SNBEP_CBO_EVENT_EXTRA_REG(0x35, 0xff, 0x4),
+ SNBEP_CBO_EVENT_EXTRA_REG(0x36, 0xff, 0x4),
+ EVENT_EXTRA_END
+};
+
+static u64 knl_cha_filter_mask(int fields)
+{
+ u64 mask = 0;
+
+ if (fields & 0x1)
+ mask |= KNL_CHA_MSR_PMON_BOX_FILTER_TID;
+ if (fields & 0x2)
+ mask |= KNL_CHA_MSR_PMON_BOX_FILTER_STATE;
+ if (fields & 0x4)
+ mask |= KNL_CHA_MSR_PMON_BOX_FILTER_OP;
+ return mask;
+}
+
+static struct event_constraint *
+knl_cha_get_constraint(struct intel_uncore_box *box, struct perf_event *event)
+{
+ return __snbep_cbox_get_constraint(box, event, knl_cha_filter_mask);
+}
+
+static int knl_cha_hw_config(struct intel_uncore_box *box,
+ struct perf_event *event)
+{
+ struct hw_perf_event_extra *reg1 = &event->hw.extra_reg;
+ struct extra_reg *er;
+ int idx = 0;
+
+ for (er = knl_uncore_cha_extra_regs; er->msr; er++) {
+ if (er->event != (event->hw.config & er->config_mask))
+ continue;
+ idx |= er->idx;
+ }
+
+ if (idx) {
+ reg1->reg = HSWEP_C0_MSR_PMON_BOX_FILTER0 +
+ KNL_CHA_MSR_OFFSET * box->pmu->pmu_idx;
+ reg1->config = event->attr.config1 & knl_cha_filter_mask(idx);
+
+ reg1->config |= KNL_CHA_MSR_PMON_BOX_FILTER_REMOTE_NODE;
+ reg1->config |= KNL_CHA_MSR_PMON_BOX_FILTER_LOCAL_NODE;
+ reg1->config |= KNL_CHA_MSR_PMON_BOX_FILTER_NNC;
+ reg1->idx = idx;
+ }
+ return 0;
+}
+
+static void hswep_cbox_enable_event(struct intel_uncore_box *box,
+ struct perf_event *event);
+
+static struct intel_uncore_ops knl_uncore_cha_ops = {
+ .init_box = snbep_uncore_msr_init_box,
+ .disable_box = snbep_uncore_msr_disable_box,
+ .enable_box = snbep_uncore_msr_enable_box,
+ .disable_event = snbep_uncore_msr_disable_event,
+ .enable_event = hswep_cbox_enable_event,
+ .read_counter = uncore_msr_read_counter,
+ .hw_config = knl_cha_hw_config,
+ .get_constraint = knl_cha_get_constraint,
+ .put_constraint = snbep_cbox_put_constraint,
+};
+
+static struct intel_uncore_type knl_uncore_cha = {
+ .name = "cha",
+ .num_counters = 4,
+ .num_boxes = 38,
+ .perf_ctr_bits = 48,
+ .event_ctl = HSWEP_C0_MSR_PMON_CTL0,
+ .perf_ctr = HSWEP_C0_MSR_PMON_CTR0,
+ .event_mask = KNL_CHA_MSR_PMON_RAW_EVENT_MASK,
+ .box_ctl = HSWEP_C0_MSR_PMON_BOX_CTL,
+ .msr_offset = KNL_CHA_MSR_OFFSET,
+ .num_shared_regs = 1,
+ .constraints = knl_uncore_cha_constraints,
+ .ops = &knl_uncore_cha_ops,
+ .format_group = &knl_uncore_cha_format_group,
+};
+
+static struct attribute *knl_uncore_pcu_formats_attr[] = {
+ &format_attr_event2.attr,
+ &format_attr_use_occ_ctr.attr,
+ &format_attr_occ_sel.attr,
+ &format_attr_edge.attr,
+ &format_attr_tid_en.attr,
+ &format_attr_inv.attr,
+ &format_attr_thresh6.attr,
+ &format_attr_occ_invert.attr,
+ &format_attr_occ_edge_det.attr,
+ NULL,
+};
+
+static struct attribute_group knl_uncore_pcu_format_group = {
+ .name = "format",
+ .attrs = knl_uncore_pcu_formats_attr,
+};
+
+static struct intel_uncore_type knl_uncore_pcu = {
+ .name = "pcu",
+ .num_counters = 4,
+ .num_boxes = 1,
+ .perf_ctr_bits = 48,
+ .perf_ctr = HSWEP_PCU_MSR_PMON_CTR0,
+ .event_ctl = HSWEP_PCU_MSR_PMON_CTL0,
+ .event_mask = KNL_PCU_MSR_PMON_RAW_EVENT_MASK,
+ .box_ctl = HSWEP_PCU_MSR_PMON_BOX_CTL,
+ .ops = &snbep_uncore_msr_ops,
+ .format_group = &knl_uncore_pcu_format_group,
+};
+
+static struct intel_uncore_type *knl_msr_uncores[] = {
+ &knl_uncore_ubox,
+ &knl_uncore_cha,
+ &knl_uncore_pcu,
+ NULL,
+};
+
+void knl_uncore_cpu_init(void)
+{
+ uncore_msr_uncores = knl_msr_uncores;
+}
+
+static void knl_uncore_imc_enable_box(struct intel_uncore_box *box)
+{
+ struct pci_dev *pdev = box->pci_dev;
+ int box_ctl = uncore_pci_box_ctl(box);
+
+ pci_write_config_dword(pdev, box_ctl, 0);
+}
+
+static void knl_uncore_imc_enable_event(struct intel_uncore_box *box,
+ struct perf_event *event)
+{
+ struct pci_dev *pdev = box->pci_dev;
+ struct hw_perf_event *hwc = &event->hw;
+
+ if ((event->attr.config & SNBEP_PMON_CTL_EV_SEL_MASK)
+ == UNCORE_FIXED_EVENT)
+ pci_write_config_dword(pdev, hwc->config_base,
+ hwc->config | KNL_PMON_FIXED_CTL_EN);
+ else
+ pci_write_config_dword(pdev, hwc->config_base,
+ hwc->config | SNBEP_PMON_CTL_EN);
+}
+
+static struct intel_uncore_ops knl_uncore_imc_ops = {
+ .init_box = snbep_uncore_pci_init_box,
+ .disable_box = snbep_uncore_pci_disable_box,
+ .enable_box = knl_uncore_imc_enable_box,
+ .read_counter = snbep_uncore_pci_read_counter,
+ .enable_event = knl_uncore_imc_enable_event,
+ .disable_event = snbep_uncore_pci_disable_event,
+};
+
+static struct intel_uncore_type knl_uncore_imc_uclk = {
+ .name = "imc_uclk",
+ .num_counters = 4,
+ .num_boxes = 2,
+ .perf_ctr_bits = 48,
+ .fixed_ctr_bits = 48,
+ .perf_ctr = KNL_UCLK_MSR_PMON_CTR0_LOW,
+ .event_ctl = KNL_UCLK_MSR_PMON_CTL0,
+ .event_mask = SNBEP_PMON_RAW_EVENT_MASK,
+ .fixed_ctr = KNL_UCLK_MSR_PMON_UCLK_FIXED_LOW,
+ .fixed_ctl = KNL_UCLK_MSR_PMON_UCLK_FIXED_CTL,
+ .box_ctl = KNL_UCLK_MSR_PMON_BOX_CTL,
+ .ops = &knl_uncore_imc_ops,
+ .format_group = &snbep_uncore_format_group,
+};
+
+static struct intel_uncore_type knl_uncore_imc_dclk = {
+ .name = "imc",
+ .num_counters = 4,
+ .num_boxes = 6,
+ .perf_ctr_bits = 48,
+ .fixed_ctr_bits = 48,
+ .perf_ctr = KNL_MC0_CH0_MSR_PMON_CTR0_LOW,
+ .event_ctl = KNL_MC0_CH0_MSR_PMON_CTL0,
+ .event_mask = SNBEP_PMON_RAW_EVENT_MASK,
+ .fixed_ctr = KNL_MC0_CH0_MSR_PMON_FIXED_LOW,
+ .fixed_ctl = KNL_MC0_CH0_MSR_PMON_FIXED_CTL,
+ .box_ctl = KNL_MC0_CH0_MSR_PMON_BOX_CTL,
+ .ops = &knl_uncore_imc_ops,
+ .format_group = &snbep_uncore_format_group,
+};
+
+static struct intel_uncore_type knl_uncore_edc_uclk = {
+ .name = "edc_uclk",
+ .num_counters = 4,
+ .num_boxes = 8,
+ .perf_ctr_bits = 48,
+ .fixed_ctr_bits = 48,
+ .perf_ctr = KNL_UCLK_MSR_PMON_CTR0_LOW,
+ .event_ctl = KNL_UCLK_MSR_PMON_CTL0,
+ .event_mask = SNBEP_PMON_RAW_EVENT_MASK,
+ .fixed_ctr = KNL_UCLK_MSR_PMON_UCLK_FIXED_LOW,
+ .fixed_ctl = KNL_UCLK_MSR_PMON_UCLK_FIXED_CTL,
+ .box_ctl = KNL_UCLK_MSR_PMON_BOX_CTL,
+ .ops = &knl_uncore_imc_ops,
+ .format_group = &snbep_uncore_format_group,
+};
+
+static struct intel_uncore_type knl_uncore_edc_eclk = {
+ .name = "edc_eclk",
+ .num_counters = 4,
+ .num_boxes = 8,
+ .perf_ctr_bits = 48,
+ .fixed_ctr_bits = 48,
+ .perf_ctr = KNL_EDC0_ECLK_MSR_PMON_CTR0_LOW,
+ .event_ctl = KNL_EDC0_ECLK_MSR_PMON_CTL0,
+ .event_mask = SNBEP_PMON_RAW_EVENT_MASK,
+ .fixed_ctr = KNL_EDC0_ECLK_MSR_PMON_ECLK_FIXED_LOW,
+ .fixed_ctl = KNL_EDC0_ECLK_MSR_PMON_ECLK_FIXED_CTL,
+ .box_ctl = KNL_EDC0_ECLK_MSR_PMON_BOX_CTL,
+ .ops = &knl_uncore_imc_ops,
+ .format_group = &snbep_uncore_format_group,
+};
+
+static struct event_constraint knl_uncore_m2pcie_constraints[] = {
+ UNCORE_EVENT_CONSTRAINT(0x23, 0x3),
+ EVENT_CONSTRAINT_END
+};
+
+static struct intel_uncore_type knl_uncore_m2pcie = {
+ .name = "m2pcie",
+ .num_counters = 4,
+ .num_boxes = 1,
+ .perf_ctr_bits = 48,
+ .constraints = knl_uncore_m2pcie_constraints,
+ SNBEP_UNCORE_PCI_COMMON_INIT(),
+};
+
+static struct attribute *knl_uncore_irp_formats_attr[] = {
+ &format_attr_event.attr,
+ &format_attr_umask.attr,
+ &format_attr_qor.attr,
+ &format_attr_edge.attr,
+ &format_attr_inv.attr,
+ &format_attr_thresh8.attr,
+ NULL,
+};
+
+static struct attribute_group knl_uncore_irp_format_group = {
+ .name = "format",
+ .attrs = knl_uncore_irp_formats_attr,
+};
+
+static struct intel_uncore_type knl_uncore_irp = {
+ .name = "irp",
+ .num_counters = 2,
+ .num_boxes = 1,
+ .perf_ctr_bits = 48,
+ .perf_ctr = SNBEP_PCI_PMON_CTR0,
+ .event_ctl = SNBEP_PCI_PMON_CTL0,
+ .event_mask = KNL_IRP_PCI_PMON_RAW_EVENT_MASK,
+ .box_ctl = KNL_IRP_PCI_PMON_BOX_CTL,
+ .ops = &snbep_uncore_pci_ops,
+ .format_group = &knl_uncore_irp_format_group,
+};
+
+enum {
+ KNL_PCI_UNCORE_MC_UCLK,
+ KNL_PCI_UNCORE_MC_DCLK,
+ KNL_PCI_UNCORE_EDC_UCLK,
+ KNL_PCI_UNCORE_EDC_ECLK,
+ KNL_PCI_UNCORE_M2PCIE,
+ KNL_PCI_UNCORE_IRP,
+};
+
+static struct intel_uncore_type *knl_pci_uncores[] = {
+ [KNL_PCI_UNCORE_MC_UCLK] = &knl_uncore_imc_uclk,
+ [KNL_PCI_UNCORE_MC_DCLK] = &knl_uncore_imc_dclk,
+ [KNL_PCI_UNCORE_EDC_UCLK] = &knl_uncore_edc_uclk,
+ [KNL_PCI_UNCORE_EDC_ECLK] = &knl_uncore_edc_eclk,
+ [KNL_PCI_UNCORE_M2PCIE] = &knl_uncore_m2pcie,
+ [KNL_PCI_UNCORE_IRP] = &knl_uncore_irp,
+ NULL,
+};
+
+/*
+ * KNL uses a common PCI device ID for multiple instances of an Uncore PMU
+ * device type. prior to KNL, each instance of a PMU device type had a unique
+ * device ID.
+ *
+ * PCI Device ID Uncore PMU Devices
+ * ----------------------------------
+ * 0x7841 MC0 UClk, MC1 UClk
+ * 0x7843 MC0 DClk CH 0, MC0 DClk CH 1, MC0 DClk CH 2,
+ * MC1 DClk CH 0, MC1 DClk CH 1, MC1 DClk CH 2
+ * 0x7833 EDC0 UClk, EDC1 UClk, EDC2 UClk, EDC3 UClk,
+ * EDC4 UClk, EDC5 UClk, EDC6 UClk, EDC7 UClk
+ * 0x7835 EDC0 EClk, EDC1 EClk, EDC2 EClk, EDC3 EClk,
+ * EDC4 EClk, EDC5 EClk, EDC6 EClk, EDC7 EClk
+ * 0x7817 M2PCIe
+ * 0x7814 IRP
+*/
+
+static const struct pci_device_id knl_uncore_pci_ids[] = {
+ { /* MC UClk */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x7841),
+ .driver_data = UNCORE_PCI_DEV_DATA(KNL_PCI_UNCORE_MC_UCLK, 0),
+ },
+ { /* MC DClk Channel */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x7843),
+ .driver_data = UNCORE_PCI_DEV_DATA(KNL_PCI_UNCORE_MC_DCLK, 0),
+ },
+ { /* EDC UClk */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x7833),
+ .driver_data = UNCORE_PCI_DEV_DATA(KNL_PCI_UNCORE_EDC_UCLK, 0),
+ },
+ { /* EDC EClk */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x7835),
+ .driver_data = UNCORE_PCI_DEV_DATA(KNL_PCI_UNCORE_EDC_ECLK, 0),
+ },
+ { /* M2PCIe */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x7817),
+ .driver_data = UNCORE_PCI_DEV_DATA(KNL_PCI_UNCORE_M2PCIE, 0),
+ },
+ { /* IRP */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x7814),
+ .driver_data = UNCORE_PCI_DEV_DATA(KNL_PCI_UNCORE_IRP, 0),
+ },
+ { /* end: all zeroes */ }
+};
+
+static struct pci_driver knl_uncore_pci_driver = {
+ .name = "knl_uncore",
+ .id_table = knl_uncore_pci_ids,
+};
+
+int knl_uncore_pci_init(void)
+{
+ int ret;
+
+ /* All KNL PCI based PMON units are on the same PCI bus except IRP */
+ ret = snb_pci2phy_map_init(0x7814); /* IRP */
+ if (ret)
+ return ret;
+ ret = snb_pci2phy_map_init(0x7817); /* M2PCIe */
+ if (ret)
+ return ret;
+ uncore_pci_uncores = knl_pci_uncores;
+ uncore_pci_driver = &knl_uncore_pci_driver;
+ return 0;
+}
+
+/* end of KNL uncore support */
+
+/* Haswell-EP uncore support */
+static struct attribute *hswep_uncore_ubox_formats_attr[] = {
+ &format_attr_event.attr,
+ &format_attr_umask.attr,
+ &format_attr_edge.attr,
+ &format_attr_inv.attr,
+ &format_attr_thresh5.attr,
+ &format_attr_filter_tid2.attr,
+ &format_attr_filter_cid.attr,
+ NULL,
+};
+
+static struct attribute_group hswep_uncore_ubox_format_group = {
+ .name = "format",
+ .attrs = hswep_uncore_ubox_formats_attr,
+};
+
+static int hswep_ubox_hw_config(struct intel_uncore_box *box, struct perf_event *event)
+{
+ struct hw_perf_event_extra *reg1 = &event->hw.extra_reg;
+ reg1->reg = HSWEP_U_MSR_PMON_FILTER;
+ reg1->config = event->attr.config1 & HSWEP_U_MSR_PMON_BOX_FILTER_MASK;
+ reg1->idx = 0;
+ return 0;
+}
+
+static struct intel_uncore_ops hswep_uncore_ubox_ops = {
+ SNBEP_UNCORE_MSR_OPS_COMMON_INIT(),
+ .hw_config = hswep_ubox_hw_config,
+ .get_constraint = uncore_get_constraint,
+ .put_constraint = uncore_put_constraint,
+};
+
+static struct intel_uncore_type hswep_uncore_ubox = {
+ .name = "ubox",
+ .num_counters = 2,
+ .num_boxes = 1,
+ .perf_ctr_bits = 44,
+ .fixed_ctr_bits = 48,
+ .perf_ctr = HSWEP_U_MSR_PMON_CTR0,
+ .event_ctl = HSWEP_U_MSR_PMON_CTL0,
+ .event_mask = SNBEP_U_MSR_PMON_RAW_EVENT_MASK,
+ .fixed_ctr = HSWEP_U_MSR_PMON_UCLK_FIXED_CTR,
+ .fixed_ctl = HSWEP_U_MSR_PMON_UCLK_FIXED_CTL,
+ .num_shared_regs = 1,
+ .ops = &hswep_uncore_ubox_ops,
+ .format_group = &hswep_uncore_ubox_format_group,
+};
+
+static struct attribute *hswep_uncore_cbox_formats_attr[] = {
+ &format_attr_event.attr,
+ &format_attr_umask.attr,
+ &format_attr_edge.attr,
+ &format_attr_tid_en.attr,
+ &format_attr_thresh8.attr,
+ &format_attr_filter_tid3.attr,
+ &format_attr_filter_link2.attr,
+ &format_attr_filter_state3.attr,
+ &format_attr_filter_nid2.attr,
+ &format_attr_filter_opc2.attr,
+ &format_attr_filter_nc.attr,
+ &format_attr_filter_c6.attr,
+ &format_attr_filter_isoc.attr,
+ NULL,
+};
+
+static struct attribute_group hswep_uncore_cbox_format_group = {
+ .name = "format",
+ .attrs = hswep_uncore_cbox_formats_attr,
+};
+
+static struct event_constraint hswep_uncore_cbox_constraints[] = {
+ UNCORE_EVENT_CONSTRAINT(0x01, 0x1),
+ UNCORE_EVENT_CONSTRAINT(0x09, 0x1),
+ UNCORE_EVENT_CONSTRAINT(0x11, 0x1),
+ UNCORE_EVENT_CONSTRAINT(0x36, 0x1),
+ UNCORE_EVENT_CONSTRAINT(0x38, 0x3),
+ UNCORE_EVENT_CONSTRAINT(0x3b, 0x1),
+ UNCORE_EVENT_CONSTRAINT(0x3e, 0x1),
+ EVENT_CONSTRAINT_END
+};
+
+static struct extra_reg hswep_uncore_cbox_extra_regs[] = {
+ SNBEP_CBO_EVENT_EXTRA_REG(SNBEP_CBO_PMON_CTL_TID_EN,
+ SNBEP_CBO_PMON_CTL_TID_EN, 0x1),
+ SNBEP_CBO_EVENT_EXTRA_REG(0x0334, 0xffff, 0x4),
+ SNBEP_CBO_EVENT_EXTRA_REG(0x0534, 0xffff, 0x4),
+ SNBEP_CBO_EVENT_EXTRA_REG(0x0934, 0xffff, 0x4),
+ SNBEP_CBO_EVENT_EXTRA_REG(0x1134, 0xffff, 0x4),
+ SNBEP_CBO_EVENT_EXTRA_REG(0x2134, 0xffff, 0x4),
+ SNBEP_CBO_EVENT_EXTRA_REG(0x4134, 0xffff, 0x4),
+ SNBEP_CBO_EVENT_EXTRA_REG(0x4037, 0x40ff, 0x8),
+ SNBEP_CBO_EVENT_EXTRA_REG(0x4028, 0x40ff, 0x8),
+ SNBEP_CBO_EVENT_EXTRA_REG(0x4032, 0x40ff, 0x8),
+ SNBEP_CBO_EVENT_EXTRA_REG(0x4029, 0x40ff, 0x8),
+ SNBEP_CBO_EVENT_EXTRA_REG(0x4033, 0x40ff, 0x8),
+ SNBEP_CBO_EVENT_EXTRA_REG(0x402A, 0x40ff, 0x8),
+ SNBEP_CBO_EVENT_EXTRA_REG(0x0135, 0xffff, 0x12),
+ SNBEP_CBO_EVENT_EXTRA_REG(0x0335, 0xffff, 0x10),
+ SNBEP_CBO_EVENT_EXTRA_REG(0x4135, 0xffff, 0x18),
+ SNBEP_CBO_EVENT_EXTRA_REG(0x4435, 0xffff, 0x8),
+ SNBEP_CBO_EVENT_EXTRA_REG(0x4835, 0xffff, 0x8),
+ SNBEP_CBO_EVENT_EXTRA_REG(0x5035, 0xffff, 0x8),
+ SNBEP_CBO_EVENT_EXTRA_REG(0x4335, 0xffff, 0x18),
+ SNBEP_CBO_EVENT_EXTRA_REG(0x4a35, 0xffff, 0x8),
+ SNBEP_CBO_EVENT_EXTRA_REG(0x2335, 0xffff, 0x10),
+ SNBEP_CBO_EVENT_EXTRA_REG(0x8335, 0xffff, 0x10),
+ SNBEP_CBO_EVENT_EXTRA_REG(0x2135, 0xffff, 0x10),
+ SNBEP_CBO_EVENT_EXTRA_REG(0x8135, 0xffff, 0x10),
+ SNBEP_CBO_EVENT_EXTRA_REG(0x0136, 0xffff, 0x10),
+ SNBEP_CBO_EVENT_EXTRA_REG(0x0336, 0xffff, 0x10),
+ SNBEP_CBO_EVENT_EXTRA_REG(0x4136, 0xffff, 0x18),
+ SNBEP_CBO_EVENT_EXTRA_REG(0x4436, 0xffff, 0x8),
+ SNBEP_CBO_EVENT_EXTRA_REG(0x4836, 0xffff, 0x8),
+ SNBEP_CBO_EVENT_EXTRA_REG(0x4336, 0xffff, 0x18),
+ SNBEP_CBO_EVENT_EXTRA_REG(0x4a36, 0xffff, 0x8),
+ SNBEP_CBO_EVENT_EXTRA_REG(0x2336, 0xffff, 0x10),
+ SNBEP_CBO_EVENT_EXTRA_REG(0x8336, 0xffff, 0x10),
+ SNBEP_CBO_EVENT_EXTRA_REG(0x2136, 0xffff, 0x10),
+ SNBEP_CBO_EVENT_EXTRA_REG(0x8136, 0xffff, 0x10),
+ SNBEP_CBO_EVENT_EXTRA_REG(0x5036, 0xffff, 0x8),
+ EVENT_EXTRA_END
+};
+
+static u64 hswep_cbox_filter_mask(int fields)
+{
+ u64 mask = 0;
+ if (fields & 0x1)
+ mask |= HSWEP_CB0_MSR_PMON_BOX_FILTER_TID;
+ if (fields & 0x2)
+ mask |= HSWEP_CB0_MSR_PMON_BOX_FILTER_LINK;
+ if (fields & 0x4)
+ mask |= HSWEP_CB0_MSR_PMON_BOX_FILTER_STATE;
+ if (fields & 0x8)
+ mask |= HSWEP_CB0_MSR_PMON_BOX_FILTER_NID;
+ if (fields & 0x10) {
+ mask |= HSWEP_CB0_MSR_PMON_BOX_FILTER_OPC;
+ mask |= HSWEP_CB0_MSR_PMON_BOX_FILTER_NC;
+ mask |= HSWEP_CB0_MSR_PMON_BOX_FILTER_C6;
+ mask |= HSWEP_CB0_MSR_PMON_BOX_FILTER_ISOC;
+ }
+ return mask;
+}
+
+static struct event_constraint *
+hswep_cbox_get_constraint(struct intel_uncore_box *box, struct perf_event *event)
+{
+ return __snbep_cbox_get_constraint(box, event, hswep_cbox_filter_mask);
+}
+
+static int hswep_cbox_hw_config(struct intel_uncore_box *box, struct perf_event *event)
+{
+ struct hw_perf_event_extra *reg1 = &event->hw.extra_reg;
+ struct extra_reg *er;
+ int idx = 0;
+
+ for (er = hswep_uncore_cbox_extra_regs; er->msr; er++) {
+ if (er->event != (event->hw.config & er->config_mask))
+ continue;
+ idx |= er->idx;
+ }
+
+ if (idx) {
+ reg1->reg = HSWEP_C0_MSR_PMON_BOX_FILTER0 +
+ HSWEP_CBO_MSR_OFFSET * box->pmu->pmu_idx;
+ reg1->config = event->attr.config1 & hswep_cbox_filter_mask(idx);
+ reg1->idx = idx;
+ }
+ return 0;
+}
+
+static void hswep_cbox_enable_event(struct intel_uncore_box *box,
+ struct perf_event *event)
+{
+ struct hw_perf_event *hwc = &event->hw;
+ struct hw_perf_event_extra *reg1 = &hwc->extra_reg;
+
+ if (reg1->idx != EXTRA_REG_NONE) {
+ u64 filter = uncore_shared_reg_config(box, 0);
+ wrmsrl(reg1->reg, filter & 0xffffffff);
+ wrmsrl(reg1->reg + 1, filter >> 32);
+ }
+
+ wrmsrl(hwc->config_base, hwc->config | SNBEP_PMON_CTL_EN);
+}
+
+static struct intel_uncore_ops hswep_uncore_cbox_ops = {
+ .init_box = snbep_uncore_msr_init_box,
+ .disable_box = snbep_uncore_msr_disable_box,
+ .enable_box = snbep_uncore_msr_enable_box,
+ .disable_event = snbep_uncore_msr_disable_event,
+ .enable_event = hswep_cbox_enable_event,
+ .read_counter = uncore_msr_read_counter,
+ .hw_config = hswep_cbox_hw_config,
+ .get_constraint = hswep_cbox_get_constraint,
+ .put_constraint = snbep_cbox_put_constraint,
+};
+
+static struct intel_uncore_type hswep_uncore_cbox = {
+ .name = "cbox",
+ .num_counters = 4,
+ .num_boxes = 18,
+ .perf_ctr_bits = 48,
+ .event_ctl = HSWEP_C0_MSR_PMON_CTL0,
+ .perf_ctr = HSWEP_C0_MSR_PMON_CTR0,
+ .event_mask = SNBEP_CBO_MSR_PMON_RAW_EVENT_MASK,
+ .box_ctl = HSWEP_C0_MSR_PMON_BOX_CTL,
+ .msr_offset = HSWEP_CBO_MSR_OFFSET,
+ .num_shared_regs = 1,
+ .constraints = hswep_uncore_cbox_constraints,
+ .ops = &hswep_uncore_cbox_ops,
+ .format_group = &hswep_uncore_cbox_format_group,
+};
+
+/*
+ * Write SBOX Initialization register bit by bit to avoid spurious #GPs
+ */
+static void hswep_uncore_sbox_msr_init_box(struct intel_uncore_box *box)
+{
+ unsigned msr = uncore_msr_box_ctl(box);
+
+ if (msr) {
+ u64 init = SNBEP_PMON_BOX_CTL_INT;
+ u64 flags = 0;
+ int i;
+
+ for_each_set_bit(i, (unsigned long *)&init, 64) {
+ flags |= (1ULL << i);
+ wrmsrl(msr, flags);
+ }
+ }
+}
+
+static struct intel_uncore_ops hswep_uncore_sbox_msr_ops = {
+ __SNBEP_UNCORE_MSR_OPS_COMMON_INIT(),
+ .init_box = hswep_uncore_sbox_msr_init_box
+};
+
+static struct attribute *hswep_uncore_sbox_formats_attr[] = {
+ &format_attr_event.attr,
+ &format_attr_umask.attr,
+ &format_attr_edge.attr,
+ &format_attr_tid_en.attr,
+ &format_attr_inv.attr,
+ &format_attr_thresh8.attr,
+ NULL,
+};
+
+static struct attribute_group hswep_uncore_sbox_format_group = {
+ .name = "format",
+ .attrs = hswep_uncore_sbox_formats_attr,
+};
+
+static struct intel_uncore_type hswep_uncore_sbox = {
+ .name = "sbox",
+ .num_counters = 4,
+ .num_boxes = 4,
+ .perf_ctr_bits = 44,
+ .event_ctl = HSWEP_S0_MSR_PMON_CTL0,
+ .perf_ctr = HSWEP_S0_MSR_PMON_CTR0,
+ .event_mask = HSWEP_S_MSR_PMON_RAW_EVENT_MASK,
+ .box_ctl = HSWEP_S0_MSR_PMON_BOX_CTL,
+ .msr_offset = HSWEP_SBOX_MSR_OFFSET,
+ .ops = &hswep_uncore_sbox_msr_ops,
+ .format_group = &hswep_uncore_sbox_format_group,
+};
+
+static int hswep_pcu_hw_config(struct intel_uncore_box *box, struct perf_event *event)
+{
+ struct hw_perf_event *hwc = &event->hw;
+ struct hw_perf_event_extra *reg1 = &hwc->extra_reg;
+ int ev_sel = hwc->config & SNBEP_PMON_CTL_EV_SEL_MASK;
+
+ if (ev_sel >= 0xb && ev_sel <= 0xe) {
+ reg1->reg = HSWEP_PCU_MSR_PMON_BOX_FILTER;
+ reg1->idx = ev_sel - 0xb;
+ reg1->config = event->attr.config1 & (0xff << reg1->idx);
+ }
+ return 0;
+}
+
+static struct intel_uncore_ops hswep_uncore_pcu_ops = {
+ SNBEP_UNCORE_MSR_OPS_COMMON_INIT(),
+ .hw_config = hswep_pcu_hw_config,
+ .get_constraint = snbep_pcu_get_constraint,
+ .put_constraint = snbep_pcu_put_constraint,
+};
+
+static struct intel_uncore_type hswep_uncore_pcu = {
+ .name = "pcu",
+ .num_counters = 4,
+ .num_boxes = 1,
+ .perf_ctr_bits = 48,
+ .perf_ctr = HSWEP_PCU_MSR_PMON_CTR0,
+ .event_ctl = HSWEP_PCU_MSR_PMON_CTL0,
+ .event_mask = SNBEP_PCU_MSR_PMON_RAW_EVENT_MASK,
+ .box_ctl = HSWEP_PCU_MSR_PMON_BOX_CTL,
+ .num_shared_regs = 1,
+ .ops = &hswep_uncore_pcu_ops,
+ .format_group = &snbep_uncore_pcu_format_group,
+};
+
+static struct intel_uncore_type *hswep_msr_uncores[] = {
+ &hswep_uncore_ubox,
+ &hswep_uncore_cbox,
+ &hswep_uncore_sbox,
+ &hswep_uncore_pcu,
+ NULL,
+};
+
+void hswep_uncore_cpu_init(void)
+{
+ int pkg = topology_phys_to_logical_pkg(0);
+
+ if (hswep_uncore_cbox.num_boxes > boot_cpu_data.x86_max_cores)
+ hswep_uncore_cbox.num_boxes = boot_cpu_data.x86_max_cores;
+
+ /* Detect 6-8 core systems with only two SBOXes */
+ if (uncore_extra_pci_dev[pkg].dev[HSWEP_PCI_PCU_3]) {
+ u32 capid4;
+
+ pci_read_config_dword(uncore_extra_pci_dev[pkg].dev[HSWEP_PCI_PCU_3],
+ 0x94, &capid4);
+ if (((capid4 >> 6) & 0x3) == 0)
+ hswep_uncore_sbox.num_boxes = 2;
+ }
+
+ uncore_msr_uncores = hswep_msr_uncores;
+}
+
+static struct intel_uncore_type hswep_uncore_ha = {
+ .name = "ha",
+ .num_counters = 5,
+ .num_boxes = 2,
+ .perf_ctr_bits = 48,
+ SNBEP_UNCORE_PCI_COMMON_INIT(),
+};
+
+static struct uncore_event_desc hswep_uncore_imc_events[] = {
+ INTEL_UNCORE_EVENT_DESC(clockticks, "event=0x00,umask=0x00"),
+ INTEL_UNCORE_EVENT_DESC(cas_count_read, "event=0x04,umask=0x03"),
+ INTEL_UNCORE_EVENT_DESC(cas_count_read.scale, "6.103515625e-5"),
+ INTEL_UNCORE_EVENT_DESC(cas_count_read.unit, "MiB"),
+ INTEL_UNCORE_EVENT_DESC(cas_count_write, "event=0x04,umask=0x0c"),
+ INTEL_UNCORE_EVENT_DESC(cas_count_write.scale, "6.103515625e-5"),
+ INTEL_UNCORE_EVENT_DESC(cas_count_write.unit, "MiB"),
+ { /* end: all zeroes */ },
+};
+
+static struct intel_uncore_type hswep_uncore_imc = {
+ .name = "imc",
+ .num_counters = 5,
+ .num_boxes = 8,
+ .perf_ctr_bits = 48,
+ .fixed_ctr_bits = 48,
+ .fixed_ctr = SNBEP_MC_CHy_PCI_PMON_FIXED_CTR,
+ .fixed_ctl = SNBEP_MC_CHy_PCI_PMON_FIXED_CTL,
+ .event_descs = hswep_uncore_imc_events,
+ SNBEP_UNCORE_PCI_COMMON_INIT(),
+};
+
+static unsigned hswep_uncore_irp_ctrs[] = {0xa0, 0xa8, 0xb0, 0xb8};
+
+static u64 hswep_uncore_irp_read_counter(struct intel_uncore_box *box, struct perf_event *event)
+{
+ struct pci_dev *pdev = box->pci_dev;
+ struct hw_perf_event *hwc = &event->hw;
+ u64 count = 0;
+
+ pci_read_config_dword(pdev, hswep_uncore_irp_ctrs[hwc->idx], (u32 *)&count);
+ pci_read_config_dword(pdev, hswep_uncore_irp_ctrs[hwc->idx] + 4, (u32 *)&count + 1);
+
+ return count;
+}
+
+static struct intel_uncore_ops hswep_uncore_irp_ops = {
+ .init_box = snbep_uncore_pci_init_box,
+ .disable_box = snbep_uncore_pci_disable_box,
+ .enable_box = snbep_uncore_pci_enable_box,
+ .disable_event = ivbep_uncore_irp_disable_event,
+ .enable_event = ivbep_uncore_irp_enable_event,
+ .read_counter = hswep_uncore_irp_read_counter,
+};
+
+static struct intel_uncore_type hswep_uncore_irp = {
+ .name = "irp",
+ .num_counters = 4,
+ .num_boxes = 1,
+ .perf_ctr_bits = 48,
+ .event_mask = SNBEP_PMON_RAW_EVENT_MASK,
+ .box_ctl = SNBEP_PCI_PMON_BOX_CTL,
+ .ops = &hswep_uncore_irp_ops,
+ .format_group = &snbep_uncore_format_group,
+};
+
+static struct intel_uncore_type hswep_uncore_qpi = {
+ .name = "qpi",
+ .num_counters = 5,
+ .num_boxes = 3,
+ .perf_ctr_bits = 48,
+ .perf_ctr = SNBEP_PCI_PMON_CTR0,
+ .event_ctl = SNBEP_PCI_PMON_CTL0,
+ .event_mask = SNBEP_QPI_PCI_PMON_RAW_EVENT_MASK,
+ .box_ctl = SNBEP_PCI_PMON_BOX_CTL,
+ .num_shared_regs = 1,
+ .ops = &snbep_uncore_qpi_ops,
+ .format_group = &snbep_uncore_qpi_format_group,
+};
+
+static struct event_constraint hswep_uncore_r2pcie_constraints[] = {
+ UNCORE_EVENT_CONSTRAINT(0x10, 0x3),
+ UNCORE_EVENT_CONSTRAINT(0x11, 0x3),
+ UNCORE_EVENT_CONSTRAINT(0x13, 0x1),
+ UNCORE_EVENT_CONSTRAINT(0x23, 0x1),
+ UNCORE_EVENT_CONSTRAINT(0x24, 0x1),
+ UNCORE_EVENT_CONSTRAINT(0x25, 0x1),
+ UNCORE_EVENT_CONSTRAINT(0x26, 0x3),
+ UNCORE_EVENT_CONSTRAINT(0x27, 0x1),
+ UNCORE_EVENT_CONSTRAINT(0x28, 0x3),
+ UNCORE_EVENT_CONSTRAINT(0x29, 0x3),
+ UNCORE_EVENT_CONSTRAINT(0x2a, 0x1),
+ UNCORE_EVENT_CONSTRAINT(0x2b, 0x3),
+ UNCORE_EVENT_CONSTRAINT(0x2c, 0x3),
+ UNCORE_EVENT_CONSTRAINT(0x2d, 0x3),
+ UNCORE_EVENT_CONSTRAINT(0x32, 0x3),
+ UNCORE_EVENT_CONSTRAINT(0x33, 0x3),
+ UNCORE_EVENT_CONSTRAINT(0x34, 0x3),
+ UNCORE_EVENT_CONSTRAINT(0x35, 0x3),
+ EVENT_CONSTRAINT_END
+};
+
+static struct intel_uncore_type hswep_uncore_r2pcie = {
+ .name = "r2pcie",
+ .num_counters = 4,
+ .num_boxes = 1,
+ .perf_ctr_bits = 48,
+ .constraints = hswep_uncore_r2pcie_constraints,
+ SNBEP_UNCORE_PCI_COMMON_INIT(),
+};
+
+static struct event_constraint hswep_uncore_r3qpi_constraints[] = {
+ UNCORE_EVENT_CONSTRAINT(0x01, 0x3),
+ UNCORE_EVENT_CONSTRAINT(0x07, 0x7),
+ UNCORE_EVENT_CONSTRAINT(0x08, 0x7),
+ UNCORE_EVENT_CONSTRAINT(0x09, 0x7),
+ UNCORE_EVENT_CONSTRAINT(0x0a, 0x7),
+ UNCORE_EVENT_CONSTRAINT(0x0e, 0x7),
+ UNCORE_EVENT_CONSTRAINT(0x10, 0x3),
+ UNCORE_EVENT_CONSTRAINT(0x11, 0x3),
+ UNCORE_EVENT_CONSTRAINT(0x12, 0x3),
+ UNCORE_EVENT_CONSTRAINT(0x13, 0x1),
+ UNCORE_EVENT_CONSTRAINT(0x14, 0x3),
+ UNCORE_EVENT_CONSTRAINT(0x15, 0x3),
+ UNCORE_EVENT_CONSTRAINT(0x1f, 0x3),
+ UNCORE_EVENT_CONSTRAINT(0x20, 0x3),
+ UNCORE_EVENT_CONSTRAINT(0x21, 0x3),
+ UNCORE_EVENT_CONSTRAINT(0x22, 0x3),
+ UNCORE_EVENT_CONSTRAINT(0x23, 0x3),
+ UNCORE_EVENT_CONSTRAINT(0x25, 0x3),
+ UNCORE_EVENT_CONSTRAINT(0x26, 0x3),
+ UNCORE_EVENT_CONSTRAINT(0x28, 0x3),
+ UNCORE_EVENT_CONSTRAINT(0x29, 0x3),
+ UNCORE_EVENT_CONSTRAINT(0x2c, 0x3),
+ UNCORE_EVENT_CONSTRAINT(0x2d, 0x3),
+ UNCORE_EVENT_CONSTRAINT(0x2e, 0x3),
+ UNCORE_EVENT_CONSTRAINT(0x2f, 0x3),
+ UNCORE_EVENT_CONSTRAINT(0x31, 0x3),
+ UNCORE_EVENT_CONSTRAINT(0x32, 0x3),
+ UNCORE_EVENT_CONSTRAINT(0x33, 0x3),
+ UNCORE_EVENT_CONSTRAINT(0x34, 0x3),
+ UNCORE_EVENT_CONSTRAINT(0x36, 0x3),
+ UNCORE_EVENT_CONSTRAINT(0x37, 0x3),
+ UNCORE_EVENT_CONSTRAINT(0x38, 0x3),
+ UNCORE_EVENT_CONSTRAINT(0x39, 0x3),
+ EVENT_CONSTRAINT_END
+};
+
+static struct intel_uncore_type hswep_uncore_r3qpi = {
+ .name = "r3qpi",
+ .num_counters = 4,
+ .num_boxes = 3,
+ .perf_ctr_bits = 44,
+ .constraints = hswep_uncore_r3qpi_constraints,
+ SNBEP_UNCORE_PCI_COMMON_INIT(),
+};
+
+enum {
+ HSWEP_PCI_UNCORE_HA,
+ HSWEP_PCI_UNCORE_IMC,
+ HSWEP_PCI_UNCORE_IRP,
+ HSWEP_PCI_UNCORE_QPI,
+ HSWEP_PCI_UNCORE_R2PCIE,
+ HSWEP_PCI_UNCORE_R3QPI,
+};
+
+static struct intel_uncore_type *hswep_pci_uncores[] = {
+ [HSWEP_PCI_UNCORE_HA] = &hswep_uncore_ha,
+ [HSWEP_PCI_UNCORE_IMC] = &hswep_uncore_imc,
+ [HSWEP_PCI_UNCORE_IRP] = &hswep_uncore_irp,
+ [HSWEP_PCI_UNCORE_QPI] = &hswep_uncore_qpi,
+ [HSWEP_PCI_UNCORE_R2PCIE] = &hswep_uncore_r2pcie,
+ [HSWEP_PCI_UNCORE_R3QPI] = &hswep_uncore_r3qpi,
+ NULL,
+};
+
+static const struct pci_device_id hswep_uncore_pci_ids[] = {
+ { /* Home Agent 0 */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x2f30),
+ .driver_data = UNCORE_PCI_DEV_DATA(HSWEP_PCI_UNCORE_HA, 0),
+ },
+ { /* Home Agent 1 */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x2f38),
+ .driver_data = UNCORE_PCI_DEV_DATA(HSWEP_PCI_UNCORE_HA, 1),
+ },
+ { /* MC0 Channel 0 */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x2fb0),
+ .driver_data = UNCORE_PCI_DEV_DATA(HSWEP_PCI_UNCORE_IMC, 0),
+ },
+ { /* MC0 Channel 1 */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x2fb1),
+ .driver_data = UNCORE_PCI_DEV_DATA(HSWEP_PCI_UNCORE_IMC, 1),
+ },
+ { /* MC0 Channel 2 */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x2fb4),
+ .driver_data = UNCORE_PCI_DEV_DATA(HSWEP_PCI_UNCORE_IMC, 2),
+ },
+ { /* MC0 Channel 3 */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x2fb5),
+ .driver_data = UNCORE_PCI_DEV_DATA(HSWEP_PCI_UNCORE_IMC, 3),
+ },
+ { /* MC1 Channel 0 */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x2fd0),
+ .driver_data = UNCORE_PCI_DEV_DATA(HSWEP_PCI_UNCORE_IMC, 4),
+ },
+ { /* MC1 Channel 1 */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x2fd1),
+ .driver_data = UNCORE_PCI_DEV_DATA(HSWEP_PCI_UNCORE_IMC, 5),
+ },
+ { /* MC1 Channel 2 */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x2fd4),
+ .driver_data = UNCORE_PCI_DEV_DATA(HSWEP_PCI_UNCORE_IMC, 6),
+ },
+ { /* MC1 Channel 3 */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x2fd5),
+ .driver_data = UNCORE_PCI_DEV_DATA(HSWEP_PCI_UNCORE_IMC, 7),
+ },
+ { /* IRP */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x2f39),
+ .driver_data = UNCORE_PCI_DEV_DATA(HSWEP_PCI_UNCORE_IRP, 0),
+ },
+ { /* QPI0 Port 0 */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x2f32),
+ .driver_data = UNCORE_PCI_DEV_DATA(HSWEP_PCI_UNCORE_QPI, 0),
+ },
+ { /* QPI0 Port 1 */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x2f33),
+ .driver_data = UNCORE_PCI_DEV_DATA(HSWEP_PCI_UNCORE_QPI, 1),
+ },
+ { /* QPI1 Port 2 */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x2f3a),
+ .driver_data = UNCORE_PCI_DEV_DATA(HSWEP_PCI_UNCORE_QPI, 2),
+ },
+ { /* R2PCIe */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x2f34),
+ .driver_data = UNCORE_PCI_DEV_DATA(HSWEP_PCI_UNCORE_R2PCIE, 0),
+ },
+ { /* R3QPI0 Link 0 */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x2f36),
+ .driver_data = UNCORE_PCI_DEV_DATA(HSWEP_PCI_UNCORE_R3QPI, 0),
+ },
+ { /* R3QPI0 Link 1 */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x2f37),
+ .driver_data = UNCORE_PCI_DEV_DATA(HSWEP_PCI_UNCORE_R3QPI, 1),
+ },
+ { /* R3QPI1 Link 2 */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x2f3e),
+ .driver_data = UNCORE_PCI_DEV_DATA(HSWEP_PCI_UNCORE_R3QPI, 2),
+ },
+ { /* QPI Port 0 filter */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x2f86),
+ .driver_data = UNCORE_PCI_DEV_DATA(UNCORE_EXTRA_PCI_DEV,
+ SNBEP_PCI_QPI_PORT0_FILTER),
+ },
+ { /* QPI Port 1 filter */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x2f96),
+ .driver_data = UNCORE_PCI_DEV_DATA(UNCORE_EXTRA_PCI_DEV,
+ SNBEP_PCI_QPI_PORT1_FILTER),
+ },
+ { /* PCU.3 (for Capability registers) */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x2fc0),
+ .driver_data = UNCORE_PCI_DEV_DATA(UNCORE_EXTRA_PCI_DEV,
+ HSWEP_PCI_PCU_3),
+ },
+ { /* end: all zeroes */ }
+};
+
+static struct pci_driver hswep_uncore_pci_driver = {
+ .name = "hswep_uncore",
+ .id_table = hswep_uncore_pci_ids,
+};
+
+int hswep_uncore_pci_init(void)
+{
+ int ret = snbep_pci2phy_map_init(0x2f1e);
+ if (ret)
+ return ret;
+ uncore_pci_uncores = hswep_pci_uncores;
+ uncore_pci_driver = &hswep_uncore_pci_driver;
+ return 0;
+}
+/* end of Haswell-EP uncore support */
+
+/* BDX uncore support */
+
+static struct intel_uncore_type bdx_uncore_ubox = {
+ .name = "ubox",
+ .num_counters = 2,
+ .num_boxes = 1,
+ .perf_ctr_bits = 48,
+ .fixed_ctr_bits = 48,
+ .perf_ctr = HSWEP_U_MSR_PMON_CTR0,
+ .event_ctl = HSWEP_U_MSR_PMON_CTL0,
+ .event_mask = SNBEP_U_MSR_PMON_RAW_EVENT_MASK,
+ .fixed_ctr = HSWEP_U_MSR_PMON_UCLK_FIXED_CTR,
+ .fixed_ctl = HSWEP_U_MSR_PMON_UCLK_FIXED_CTL,
+ .num_shared_regs = 1,
+ .ops = &ivbep_uncore_msr_ops,
+ .format_group = &ivbep_uncore_ubox_format_group,
+};
+
+static struct event_constraint bdx_uncore_cbox_constraints[] = {
+ UNCORE_EVENT_CONSTRAINT(0x09, 0x3),
+ UNCORE_EVENT_CONSTRAINT(0x11, 0x1),
+ UNCORE_EVENT_CONSTRAINT(0x36, 0x1),
+ UNCORE_EVENT_CONSTRAINT(0x3e, 0x1),
+ EVENT_CONSTRAINT_END
+};
+
+static struct intel_uncore_type bdx_uncore_cbox = {
+ .name = "cbox",
+ .num_counters = 4,
+ .num_boxes = 24,
+ .perf_ctr_bits = 48,
+ .event_ctl = HSWEP_C0_MSR_PMON_CTL0,
+ .perf_ctr = HSWEP_C0_MSR_PMON_CTR0,
+ .event_mask = SNBEP_CBO_MSR_PMON_RAW_EVENT_MASK,
+ .box_ctl = HSWEP_C0_MSR_PMON_BOX_CTL,
+ .msr_offset = HSWEP_CBO_MSR_OFFSET,
+ .num_shared_regs = 1,
+ .constraints = bdx_uncore_cbox_constraints,
+ .ops = &hswep_uncore_cbox_ops,
+ .format_group = &hswep_uncore_cbox_format_group,
+};
+
+static struct intel_uncore_type bdx_uncore_sbox = {
+ .name = "sbox",
+ .num_counters = 4,
+ .num_boxes = 4,
+ .perf_ctr_bits = 48,
+ .event_ctl = HSWEP_S0_MSR_PMON_CTL0,
+ .perf_ctr = HSWEP_S0_MSR_PMON_CTR0,
+ .event_mask = HSWEP_S_MSR_PMON_RAW_EVENT_MASK,
+ .box_ctl = HSWEP_S0_MSR_PMON_BOX_CTL,
+ .msr_offset = HSWEP_SBOX_MSR_OFFSET,
+ .ops = &hswep_uncore_sbox_msr_ops,
+ .format_group = &hswep_uncore_sbox_format_group,
+};
+
+#define BDX_MSR_UNCORE_SBOX 3
+
+static struct intel_uncore_type *bdx_msr_uncores[] = {
+ &bdx_uncore_ubox,
+ &bdx_uncore_cbox,
+ &hswep_uncore_pcu,
+ &bdx_uncore_sbox,
+ NULL,
+};
+
+void bdx_uncore_cpu_init(void)
+{
+ if (bdx_uncore_cbox.num_boxes > boot_cpu_data.x86_max_cores)
+ bdx_uncore_cbox.num_boxes = boot_cpu_data.x86_max_cores;
+ uncore_msr_uncores = bdx_msr_uncores;
+
+ /* BDX-DE doesn't have SBOX */
+ if (boot_cpu_data.x86_model == 86)
+ uncore_msr_uncores[BDX_MSR_UNCORE_SBOX] = NULL;
+}
+
+static struct intel_uncore_type bdx_uncore_ha = {
+ .name = "ha",
+ .num_counters = 4,
+ .num_boxes = 2,
+ .perf_ctr_bits = 48,
+ SNBEP_UNCORE_PCI_COMMON_INIT(),
+};
+
+static struct intel_uncore_type bdx_uncore_imc = {
+ .name = "imc",
+ .num_counters = 5,
+ .num_boxes = 8,
+ .perf_ctr_bits = 48,
+ .fixed_ctr_bits = 48,
+ .fixed_ctr = SNBEP_MC_CHy_PCI_PMON_FIXED_CTR,
+ .fixed_ctl = SNBEP_MC_CHy_PCI_PMON_FIXED_CTL,
+ .event_descs = hswep_uncore_imc_events,
+ SNBEP_UNCORE_PCI_COMMON_INIT(),
+};
+
+static struct intel_uncore_type bdx_uncore_irp = {
+ .name = "irp",
+ .num_counters = 4,
+ .num_boxes = 1,
+ .perf_ctr_bits = 48,
+ .event_mask = SNBEP_PMON_RAW_EVENT_MASK,
+ .box_ctl = SNBEP_PCI_PMON_BOX_CTL,
+ .ops = &hswep_uncore_irp_ops,
+ .format_group = &snbep_uncore_format_group,
+};
+
+static struct intel_uncore_type bdx_uncore_qpi = {
+ .name = "qpi",
+ .num_counters = 4,
+ .num_boxes = 3,
+ .perf_ctr_bits = 48,
+ .perf_ctr = SNBEP_PCI_PMON_CTR0,
+ .event_ctl = SNBEP_PCI_PMON_CTL0,
+ .event_mask = SNBEP_QPI_PCI_PMON_RAW_EVENT_MASK,
+ .box_ctl = SNBEP_PCI_PMON_BOX_CTL,
+ .num_shared_regs = 1,
+ .ops = &snbep_uncore_qpi_ops,
+ .format_group = &snbep_uncore_qpi_format_group,
+};
+
+static struct event_constraint bdx_uncore_r2pcie_constraints[] = {
+ UNCORE_EVENT_CONSTRAINT(0x10, 0x3),
+ UNCORE_EVENT_CONSTRAINT(0x11, 0x3),
+ UNCORE_EVENT_CONSTRAINT(0x13, 0x1),
+ UNCORE_EVENT_CONSTRAINT(0x23, 0x1),
+ UNCORE_EVENT_CONSTRAINT(0x25, 0x1),
+ UNCORE_EVENT_CONSTRAINT(0x26, 0x3),
+ UNCORE_EVENT_CONSTRAINT(0x28, 0x3),
+ UNCORE_EVENT_CONSTRAINT(0x2c, 0x3),
+ UNCORE_EVENT_CONSTRAINT(0x2d, 0x3),
+ EVENT_CONSTRAINT_END
+};
+
+static struct intel_uncore_type bdx_uncore_r2pcie = {
+ .name = "r2pcie",
+ .num_counters = 4,
+ .num_boxes = 1,
+ .perf_ctr_bits = 48,
+ .constraints = bdx_uncore_r2pcie_constraints,
+ SNBEP_UNCORE_PCI_COMMON_INIT(),
+};
+
+static struct event_constraint bdx_uncore_r3qpi_constraints[] = {
+ UNCORE_EVENT_CONSTRAINT(0x01, 0x7),
+ UNCORE_EVENT_CONSTRAINT(0x07, 0x7),
+ UNCORE_EVENT_CONSTRAINT(0x08, 0x7),
+ UNCORE_EVENT_CONSTRAINT(0x09, 0x7),
+ UNCORE_EVENT_CONSTRAINT(0x0a, 0x7),
+ UNCORE_EVENT_CONSTRAINT(0x0e, 0x7),
+ UNCORE_EVENT_CONSTRAINT(0x10, 0x3),
+ UNCORE_EVENT_CONSTRAINT(0x11, 0x3),
+ UNCORE_EVENT_CONSTRAINT(0x13, 0x1),
+ UNCORE_EVENT_CONSTRAINT(0x14, 0x3),
+ UNCORE_EVENT_CONSTRAINT(0x15, 0x3),
+ UNCORE_EVENT_CONSTRAINT(0x1f, 0x3),
+ UNCORE_EVENT_CONSTRAINT(0x20, 0x3),
+ UNCORE_EVENT_CONSTRAINT(0x21, 0x3),
+ UNCORE_EVENT_CONSTRAINT(0x22, 0x3),
+ UNCORE_EVENT_CONSTRAINT(0x23, 0x3),
+ UNCORE_EVENT_CONSTRAINT(0x25, 0x3),
+ UNCORE_EVENT_CONSTRAINT(0x26, 0x3),
+ UNCORE_EVENT_CONSTRAINT(0x28, 0x3),
+ UNCORE_EVENT_CONSTRAINT(0x29, 0x3),
+ UNCORE_EVENT_CONSTRAINT(0x2c, 0x3),
+ UNCORE_EVENT_CONSTRAINT(0x2d, 0x3),
+ UNCORE_EVENT_CONSTRAINT(0x2e, 0x3),
+ UNCORE_EVENT_CONSTRAINT(0x2f, 0x3),
+ UNCORE_EVENT_CONSTRAINT(0x33, 0x3),
+ UNCORE_EVENT_CONSTRAINT(0x34, 0x3),
+ UNCORE_EVENT_CONSTRAINT(0x36, 0x3),
+ UNCORE_EVENT_CONSTRAINT(0x37, 0x3),
+ UNCORE_EVENT_CONSTRAINT(0x38, 0x3),
+ UNCORE_EVENT_CONSTRAINT(0x39, 0x3),
+ EVENT_CONSTRAINT_END
+};
+
+static struct intel_uncore_type bdx_uncore_r3qpi = {
+ .name = "r3qpi",
+ .num_counters = 3,
+ .num_boxes = 3,
+ .perf_ctr_bits = 48,
+ .constraints = bdx_uncore_r3qpi_constraints,
+ SNBEP_UNCORE_PCI_COMMON_INIT(),
+};
+
+enum {
+ BDX_PCI_UNCORE_HA,
+ BDX_PCI_UNCORE_IMC,
+ BDX_PCI_UNCORE_IRP,
+ BDX_PCI_UNCORE_QPI,
+ BDX_PCI_UNCORE_R2PCIE,
+ BDX_PCI_UNCORE_R3QPI,
+};
+
+static struct intel_uncore_type *bdx_pci_uncores[] = {
+ [BDX_PCI_UNCORE_HA] = &bdx_uncore_ha,
+ [BDX_PCI_UNCORE_IMC] = &bdx_uncore_imc,
+ [BDX_PCI_UNCORE_IRP] = &bdx_uncore_irp,
+ [BDX_PCI_UNCORE_QPI] = &bdx_uncore_qpi,
+ [BDX_PCI_UNCORE_R2PCIE] = &bdx_uncore_r2pcie,
+ [BDX_PCI_UNCORE_R3QPI] = &bdx_uncore_r3qpi,
+ NULL,
+};
+
+static const struct pci_device_id bdx_uncore_pci_ids[] = {
+ { /* Home Agent 0 */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x6f30),
+ .driver_data = UNCORE_PCI_DEV_DATA(BDX_PCI_UNCORE_HA, 0),
+ },
+ { /* Home Agent 1 */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x6f38),
+ .driver_data = UNCORE_PCI_DEV_DATA(BDX_PCI_UNCORE_HA, 1),
+ },
+ { /* MC0 Channel 0 */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x6fb0),
+ .driver_data = UNCORE_PCI_DEV_DATA(BDX_PCI_UNCORE_IMC, 0),
+ },
+ { /* MC0 Channel 1 */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x6fb1),
+ .driver_data = UNCORE_PCI_DEV_DATA(BDX_PCI_UNCORE_IMC, 1),
+ },
+ { /* MC0 Channel 2 */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x6fb4),
+ .driver_data = UNCORE_PCI_DEV_DATA(BDX_PCI_UNCORE_IMC, 2),
+ },
+ { /* MC0 Channel 3 */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x6fb5),
+ .driver_data = UNCORE_PCI_DEV_DATA(BDX_PCI_UNCORE_IMC, 3),
+ },
+ { /* MC1 Channel 0 */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x6fd0),
+ .driver_data = UNCORE_PCI_DEV_DATA(BDX_PCI_UNCORE_IMC, 4),
+ },
+ { /* MC1 Channel 1 */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x6fd1),
+ .driver_data = UNCORE_PCI_DEV_DATA(BDX_PCI_UNCORE_IMC, 5),
+ },
+ { /* MC1 Channel 2 */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x6fd4),
+ .driver_data = UNCORE_PCI_DEV_DATA(BDX_PCI_UNCORE_IMC, 6),
+ },
+ { /* MC1 Channel 3 */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x6fd5),
+ .driver_data = UNCORE_PCI_DEV_DATA(BDX_PCI_UNCORE_IMC, 7),
+ },
+ { /* IRP */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x6f39),
+ .driver_data = UNCORE_PCI_DEV_DATA(BDX_PCI_UNCORE_IRP, 0),
+ },
+ { /* QPI0 Port 0 */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x6f32),
+ .driver_data = UNCORE_PCI_DEV_DATA(BDX_PCI_UNCORE_QPI, 0),
+ },
+ { /* QPI0 Port 1 */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x6f33),
+ .driver_data = UNCORE_PCI_DEV_DATA(BDX_PCI_UNCORE_QPI, 1),
+ },
+ { /* QPI1 Port 2 */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x6f3a),
+ .driver_data = UNCORE_PCI_DEV_DATA(BDX_PCI_UNCORE_QPI, 2),
+ },
+ { /* R2PCIe */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x6f34),
+ .driver_data = UNCORE_PCI_DEV_DATA(BDX_PCI_UNCORE_R2PCIE, 0),
+ },
+ { /* R3QPI0 Link 0 */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x6f36),
+ .driver_data = UNCORE_PCI_DEV_DATA(BDX_PCI_UNCORE_R3QPI, 0),
+ },
+ { /* R3QPI0 Link 1 */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x6f37),
+ .driver_data = UNCORE_PCI_DEV_DATA(BDX_PCI_UNCORE_R3QPI, 1),
+ },
+ { /* R3QPI1 Link 2 */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x6f3e),
+ .driver_data = UNCORE_PCI_DEV_DATA(BDX_PCI_UNCORE_R3QPI, 2),
+ },
+ { /* QPI Port 0 filter */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x6f86),
+ .driver_data = UNCORE_PCI_DEV_DATA(UNCORE_EXTRA_PCI_DEV, 0),
+ },
+ { /* QPI Port 1 filter */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x6f96),
+ .driver_data = UNCORE_PCI_DEV_DATA(UNCORE_EXTRA_PCI_DEV, 1),
+ },
+ { /* QPI Port 2 filter */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x6f46),
+ .driver_data = UNCORE_PCI_DEV_DATA(UNCORE_EXTRA_PCI_DEV, 2),
+ },
+ { /* end: all zeroes */ }
+};
+
+static struct pci_driver bdx_uncore_pci_driver = {
+ .name = "bdx_uncore",
+ .id_table = bdx_uncore_pci_ids,
+};
+
+int bdx_uncore_pci_init(void)
+{
+ int ret = snbep_pci2phy_map_init(0x6f1e);
+
+ if (ret)
+ return ret;
+ uncore_pci_uncores = bdx_pci_uncores;
+ uncore_pci_driver = &bdx_uncore_pci_driver;
+ return 0;
+}
+
+/* end of BDX uncore support */
diff --git a/arch/x86/events/msr.c b/arch/x86/events/msr.c
new file mode 100644
index 000000000..8bef19f09
--- /dev/null
+++ b/arch/x86/events/msr.c
@@ -0,0 +1,241 @@
+#include <linux/perf_event.h>
+
+enum perf_msr_id {
+ PERF_MSR_TSC = 0,
+ PERF_MSR_APERF = 1,
+ PERF_MSR_MPERF = 2,
+ PERF_MSR_PPERF = 3,
+ PERF_MSR_SMI = 4,
+
+ PERF_MSR_EVENT_MAX,
+};
+
+static bool test_aperfmperf(int idx)
+{
+ return boot_cpu_has(X86_FEATURE_APERFMPERF);
+}
+
+static bool test_intel(int idx)
+{
+ if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL ||
+ boot_cpu_data.x86 != 6)
+ return false;
+
+ switch (boot_cpu_data.x86_model) {
+ case 30: /* 45nm Nehalem */
+ case 26: /* 45nm Nehalem-EP */
+ case 46: /* 45nm Nehalem-EX */
+
+ case 37: /* 32nm Westmere */
+ case 44: /* 32nm Westmere-EP */
+ case 47: /* 32nm Westmere-EX */
+
+ case 42: /* 32nm SandyBridge */
+ case 45: /* 32nm SandyBridge-E/EN/EP */
+
+ case 58: /* 22nm IvyBridge */
+ case 62: /* 22nm IvyBridge-EP/EX */
+
+ case 60: /* 22nm Haswell Core */
+ case 63: /* 22nm Haswell Server */
+ case 69: /* 22nm Haswell ULT */
+ case 70: /* 22nm Haswell + GT3e (Intel Iris Pro graphics) */
+
+ case 61: /* 14nm Broadwell Core-M */
+ case 86: /* 14nm Broadwell Xeon D */
+ case 71: /* 14nm Broadwell + GT3e (Intel Iris Pro graphics) */
+ case 79: /* 14nm Broadwell Server */
+
+ case 55: /* 22nm Atom "Silvermont" */
+ case 77: /* 22nm Atom "Silvermont Avoton/Rangely" */
+ case 76: /* 14nm Atom "Airmont" */
+ if (idx == PERF_MSR_SMI)
+ return true;
+ break;
+
+ case 78: /* 14nm Skylake Mobile */
+ case 94: /* 14nm Skylake Desktop */
+ if (idx == PERF_MSR_SMI || idx == PERF_MSR_PPERF)
+ return true;
+ break;
+ }
+
+ return false;
+}
+
+struct perf_msr {
+ u64 msr;
+ struct perf_pmu_events_attr *attr;
+ bool (*test)(int idx);
+};
+
+PMU_EVENT_ATTR_STRING(tsc, evattr_tsc, "event=0x00");
+PMU_EVENT_ATTR_STRING(aperf, evattr_aperf, "event=0x01");
+PMU_EVENT_ATTR_STRING(mperf, evattr_mperf, "event=0x02");
+PMU_EVENT_ATTR_STRING(pperf, evattr_pperf, "event=0x03");
+PMU_EVENT_ATTR_STRING(smi, evattr_smi, "event=0x04");
+
+static struct perf_msr msr[] = {
+ [PERF_MSR_TSC] = { 0, &evattr_tsc, NULL, },
+ [PERF_MSR_APERF] = { MSR_IA32_APERF, &evattr_aperf, test_aperfmperf, },
+ [PERF_MSR_MPERF] = { MSR_IA32_MPERF, &evattr_mperf, test_aperfmperf, },
+ [PERF_MSR_PPERF] = { MSR_PPERF, &evattr_pperf, test_intel, },
+ [PERF_MSR_SMI] = { MSR_SMI_COUNT, &evattr_smi, test_intel, },
+};
+
+static struct attribute *events_attrs[PERF_MSR_EVENT_MAX + 1] = {
+ NULL,
+};
+
+static struct attribute_group events_attr_group = {
+ .name = "events",
+ .attrs = events_attrs,
+};
+
+PMU_FORMAT_ATTR(event, "config:0-63");
+static struct attribute *format_attrs[] = {
+ &format_attr_event.attr,
+ NULL,
+};
+static struct attribute_group format_attr_group = {
+ .name = "format",
+ .attrs = format_attrs,
+};
+
+static const struct attribute_group *attr_groups[] = {
+ &events_attr_group,
+ &format_attr_group,
+ NULL,
+};
+
+static int msr_event_init(struct perf_event *event)
+{
+ u64 cfg = event->attr.config;
+
+ if (event->attr.type != event->pmu->type)
+ return -ENOENT;
+
+ if (cfg >= PERF_MSR_EVENT_MAX)
+ return -EINVAL;
+
+ /* unsupported modes and filters */
+ if (event->attr.exclude_user ||
+ event->attr.exclude_kernel ||
+ event->attr.exclude_hv ||
+ event->attr.exclude_idle ||
+ event->attr.exclude_host ||
+ event->attr.exclude_guest ||
+ event->attr.sample_period) /* no sampling */
+ return -EINVAL;
+
+ if (!msr[cfg].attr)
+ return -EINVAL;
+
+ event->hw.idx = -1;
+ event->hw.event_base = msr[cfg].msr;
+ event->hw.config = cfg;
+
+ return 0;
+}
+
+static inline u64 msr_read_counter(struct perf_event *event)
+{
+ u64 now;
+
+ if (event->hw.event_base)
+ rdmsrl(event->hw.event_base, now);
+ else
+ rdtscll(now);
+
+ return now;
+}
+static void msr_event_update(struct perf_event *event)
+{
+ u64 prev, now;
+ s64 delta;
+
+ /* Careful, an NMI might modify the previous event value. */
+again:
+ prev = local64_read(&event->hw.prev_count);
+ now = msr_read_counter(event);
+
+ if (local64_cmpxchg(&event->hw.prev_count, prev, now) != prev)
+ goto again;
+
+ delta = now - prev;
+ if (unlikely(event->hw.event_base == MSR_SMI_COUNT))
+ delta = sign_extend64(delta, 31);
+
+ local64_add(delta, &event->count);
+}
+
+static void msr_event_start(struct perf_event *event, int flags)
+{
+ u64 now;
+
+ now = msr_read_counter(event);
+ local64_set(&event->hw.prev_count, now);
+}
+
+static void msr_event_stop(struct perf_event *event, int flags)
+{
+ msr_event_update(event);
+}
+
+static void msr_event_del(struct perf_event *event, int flags)
+{
+ msr_event_stop(event, PERF_EF_UPDATE);
+}
+
+static int msr_event_add(struct perf_event *event, int flags)
+{
+ if (flags & PERF_EF_START)
+ msr_event_start(event, flags);
+
+ return 0;
+}
+
+static struct pmu pmu_msr = {
+ .task_ctx_nr = perf_sw_context,
+ .attr_groups = attr_groups,
+ .event_init = msr_event_init,
+ .add = msr_event_add,
+ .del = msr_event_del,
+ .start = msr_event_start,
+ .stop = msr_event_stop,
+ .read = msr_event_update,
+ .capabilities = PERF_PMU_CAP_NO_INTERRUPT,
+};
+
+static int __init msr_init(void)
+{
+ int i, j = 0;
+
+ if (!boot_cpu_has(X86_FEATURE_TSC)) {
+ pr_cont("no MSR PMU driver.\n");
+ return 0;
+ }
+
+ /* Probe the MSRs. */
+ for (i = PERF_MSR_TSC + 1; i < PERF_MSR_EVENT_MAX; i++) {
+ u64 val;
+
+ /*
+ * Virt sucks arse; you cannot tell if a R/O MSR is present :/
+ */
+ if (!msr[i].test(i) || rdmsrl_safe(msr[i].msr, &val))
+ msr[i].attr = NULL;
+ }
+
+ /* List remaining MSRs in the sysfs attrs. */
+ for (i = 0; i < PERF_MSR_EVENT_MAX; i++) {
+ if (msr[i].attr)
+ events_attrs[j++] = &msr[i].attr->attr.attr;
+ }
+ events_attrs[j] = NULL;
+
+ perf_pmu_register(&pmu_msr, "msr", -1);
+
+ return 0;
+}
+device_initcall(msr_init);
diff --git a/arch/x86/events/perf_event.h b/arch/x86/events/perf_event.h
new file mode 100644
index 000000000..ad4dc7fff
--- /dev/null
+++ b/arch/x86/events/perf_event.h
@@ -0,0 +1,965 @@
+/*
+ * Performance events x86 architecture header
+ *
+ * Copyright (C) 2008 Thomas Gleixner <tglx@linutronix.de>
+ * Copyright (C) 2008-2009 Red Hat, Inc., Ingo Molnar
+ * Copyright (C) 2009 Jaswinder Singh Rajput
+ * Copyright (C) 2009 Advanced Micro Devices, Inc., Robert Richter
+ * Copyright (C) 2008-2009 Red Hat, Inc., Peter Zijlstra
+ * Copyright (C) 2009 Intel Corporation, <markus.t.metzger@intel.com>
+ * Copyright (C) 2009 Google, Inc., Stephane Eranian
+ *
+ * For licencing details see kernel-base/COPYING
+ */
+
+#include <linux/perf_event.h>
+
+/* To enable MSR tracing please use the generic trace points. */
+
+/*
+ * | NHM/WSM | SNB |
+ * register -------------------------------
+ * | HT | no HT | HT | no HT |
+ *-----------------------------------------
+ * offcore | core | core | cpu | core |
+ * lbr_sel | core | core | cpu | core |
+ * ld_lat | cpu | core | cpu | core |
+ *-----------------------------------------
+ *
+ * Given that there is a small number of shared regs,
+ * we can pre-allocate their slot in the per-cpu
+ * per-core reg tables.
+ */
+enum extra_reg_type {
+ EXTRA_REG_NONE = -1, /* not used */
+
+ EXTRA_REG_RSP_0 = 0, /* offcore_response_0 */
+ EXTRA_REG_RSP_1 = 1, /* offcore_response_1 */
+ EXTRA_REG_LBR = 2, /* lbr_select */
+ EXTRA_REG_LDLAT = 3, /* ld_lat_threshold */
+ EXTRA_REG_FE = 4, /* fe_* */
+
+ EXTRA_REG_MAX /* number of entries needed */
+};
+
+struct event_constraint {
+ union {
+ unsigned long idxmsk[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
+ u64 idxmsk64;
+ };
+ u64 code;
+ u64 cmask;
+ int weight;
+ int overlap;
+ int flags;
+};
+/*
+ * struct hw_perf_event.flags flags
+ */
+#define PERF_X86_EVENT_PEBS_LDLAT 0x0001 /* ld+ldlat data address sampling */
+#define PERF_X86_EVENT_PEBS_ST 0x0002 /* st data address sampling */
+#define PERF_X86_EVENT_PEBS_ST_HSW 0x0004 /* haswell style datala, store */
+#define PERF_X86_EVENT_COMMITTED 0x0008 /* event passed commit_txn */
+#define PERF_X86_EVENT_PEBS_LD_HSW 0x0010 /* haswell style datala, load */
+#define PERF_X86_EVENT_PEBS_NA_HSW 0x0020 /* haswell style datala, unknown */
+#define PERF_X86_EVENT_EXCL 0x0040 /* HT exclusivity on counter */
+#define PERF_X86_EVENT_DYNAMIC 0x0080 /* dynamic alloc'd constraint */
+#define PERF_X86_EVENT_RDPMC_ALLOWED 0x0100 /* grant rdpmc permission */
+#define PERF_X86_EVENT_EXCL_ACCT 0x0200 /* accounted EXCL event */
+#define PERF_X86_EVENT_AUTO_RELOAD 0x0400 /* use PEBS auto-reload */
+#define PERF_X86_EVENT_FREERUNNING 0x0800 /* use freerunning PEBS */
+
+
+struct amd_nb {
+ int nb_id; /* NorthBridge id */
+ int refcnt; /* reference count */
+ struct perf_event *owners[X86_PMC_IDX_MAX];
+ struct event_constraint event_constraints[X86_PMC_IDX_MAX];
+};
+
+/* The maximal number of PEBS events: */
+#define MAX_PEBS_EVENTS 8
+
+/*
+ * Flags PEBS can handle without an PMI.
+ *
+ * TID can only be handled by flushing at context switch.
+ *
+ */
+#define PEBS_FREERUNNING_FLAGS \
+ (PERF_SAMPLE_IP | PERF_SAMPLE_TID | PERF_SAMPLE_ADDR | \
+ PERF_SAMPLE_ID | PERF_SAMPLE_CPU | PERF_SAMPLE_STREAM_ID | \
+ PERF_SAMPLE_DATA_SRC | PERF_SAMPLE_IDENTIFIER | \
+ PERF_SAMPLE_TRANSACTION)
+
+/*
+ * A debug store configuration.
+ *
+ * We only support architectures that use 64bit fields.
+ */
+struct debug_store {
+ u64 bts_buffer_base;
+ u64 bts_index;
+ u64 bts_absolute_maximum;
+ u64 bts_interrupt_threshold;
+ u64 pebs_buffer_base;
+ u64 pebs_index;
+ u64 pebs_absolute_maximum;
+ u64 pebs_interrupt_threshold;
+ u64 pebs_event_reset[MAX_PEBS_EVENTS];
+};
+
+/*
+ * Per register state.
+ */
+struct er_account {
+ raw_spinlock_t lock; /* per-core: protect structure */
+ u64 config; /* extra MSR config */
+ u64 reg; /* extra MSR number */
+ atomic_t ref; /* reference count */
+};
+
+/*
+ * Per core/cpu state
+ *
+ * Used to coordinate shared registers between HT threads or
+ * among events on a single PMU.
+ */
+struct intel_shared_regs {
+ struct er_account regs[EXTRA_REG_MAX];
+ int refcnt; /* per-core: #HT threads */
+ unsigned core_id; /* per-core: core id */
+};
+
+enum intel_excl_state_type {
+ INTEL_EXCL_UNUSED = 0, /* counter is unused */
+ INTEL_EXCL_SHARED = 1, /* counter can be used by both threads */
+ INTEL_EXCL_EXCLUSIVE = 2, /* counter can be used by one thread only */
+};
+
+struct intel_excl_states {
+ enum intel_excl_state_type state[X86_PMC_IDX_MAX];
+ bool sched_started; /* true if scheduling has started */
+};
+
+struct intel_excl_cntrs {
+ raw_spinlock_t lock;
+
+ struct intel_excl_states states[2];
+
+ union {
+ u16 has_exclusive[2];
+ u32 exclusive_present;
+ };
+
+ int refcnt; /* per-core: #HT threads */
+ unsigned core_id; /* per-core: core id */
+};
+
+#define MAX_LBR_ENTRIES 32
+
+enum {
+ X86_PERF_KFREE_SHARED = 0,
+ X86_PERF_KFREE_EXCL = 1,
+ X86_PERF_KFREE_MAX
+};
+
+struct cpu_hw_events {
+ /*
+ * Generic x86 PMC bits
+ */
+ struct perf_event *events[X86_PMC_IDX_MAX]; /* in counter order */
+ unsigned long active_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
+ unsigned long running[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
+ int enabled;
+
+ int n_events; /* the # of events in the below arrays */
+ int n_added; /* the # last events in the below arrays;
+ they've never been enabled yet */
+ int n_txn; /* the # last events in the below arrays;
+ added in the current transaction */
+ int assign[X86_PMC_IDX_MAX]; /* event to counter assignment */
+ u64 tags[X86_PMC_IDX_MAX];
+
+ struct perf_event *event_list[X86_PMC_IDX_MAX]; /* in enabled order */
+ struct event_constraint *event_constraint[X86_PMC_IDX_MAX];
+
+ int n_excl; /* the number of exclusive events */
+
+ unsigned int txn_flags;
+ int is_fake;
+
+ /*
+ * Intel DebugStore bits
+ */
+ struct debug_store *ds;
+ u64 pebs_enabled;
+
+ /*
+ * Intel LBR bits
+ */
+ int lbr_users;
+ void *lbr_context;
+ struct perf_branch_stack lbr_stack;
+ struct perf_branch_entry lbr_entries[MAX_LBR_ENTRIES];
+ struct er_account *lbr_sel;
+ u64 br_sel;
+
+ /*
+ * Intel host/guest exclude bits
+ */
+ u64 intel_ctrl_guest_mask;
+ u64 intel_ctrl_host_mask;
+ struct perf_guest_switch_msr guest_switch_msrs[X86_PMC_IDX_MAX];
+
+ /*
+ * Intel checkpoint mask
+ */
+ u64 intel_cp_status;
+
+ /*
+ * manage shared (per-core, per-cpu) registers
+ * used on Intel NHM/WSM/SNB
+ */
+ struct intel_shared_regs *shared_regs;
+ /*
+ * manage exclusive counter access between hyperthread
+ */
+ struct event_constraint *constraint_list; /* in enable order */
+ struct intel_excl_cntrs *excl_cntrs;
+ int excl_thread_id; /* 0 or 1 */
+
+ /*
+ * AMD specific bits
+ */
+ struct amd_nb *amd_nb;
+ /* Inverted mask of bits to clear in the perf_ctr ctrl registers */
+ u64 perf_ctr_virt_mask;
+
+ void *kfree_on_online[X86_PERF_KFREE_MAX];
+};
+
+#define __EVENT_CONSTRAINT(c, n, m, w, o, f) {\
+ { .idxmsk64 = (n) }, \
+ .code = (c), \
+ .cmask = (m), \
+ .weight = (w), \
+ .overlap = (o), \
+ .flags = f, \
+}
+
+#define EVENT_CONSTRAINT(c, n, m) \
+ __EVENT_CONSTRAINT(c, n, m, HWEIGHT(n), 0, 0)
+
+#define INTEL_EXCLEVT_CONSTRAINT(c, n) \
+ __EVENT_CONSTRAINT(c, n, ARCH_PERFMON_EVENTSEL_EVENT, HWEIGHT(n),\
+ 0, PERF_X86_EVENT_EXCL)
+
+/*
+ * The overlap flag marks event constraints with overlapping counter
+ * masks. This is the case if the counter mask of such an event is not
+ * a subset of any other counter mask of a constraint with an equal or
+ * higher weight, e.g.:
+ *
+ * c_overlaps = EVENT_CONSTRAINT_OVERLAP(0, 0x09, 0);
+ * c_another1 = EVENT_CONSTRAINT(0, 0x07, 0);
+ * c_another2 = EVENT_CONSTRAINT(0, 0x38, 0);
+ *
+ * The event scheduler may not select the correct counter in the first
+ * cycle because it needs to know which subsequent events will be
+ * scheduled. It may fail to schedule the events then. So we set the
+ * overlap flag for such constraints to give the scheduler a hint which
+ * events to select for counter rescheduling.
+ *
+ * Care must be taken as the rescheduling algorithm is O(n!) which
+ * will increase scheduling cycles for an over-committed system
+ * dramatically. The number of such EVENT_CONSTRAINT_OVERLAP() macros
+ * and its counter masks must be kept at a minimum.
+ */
+#define EVENT_CONSTRAINT_OVERLAP(c, n, m) \
+ __EVENT_CONSTRAINT(c, n, m, HWEIGHT(n), 1, 0)
+
+/*
+ * Constraint on the Event code.
+ */
+#define INTEL_EVENT_CONSTRAINT(c, n) \
+ EVENT_CONSTRAINT(c, n, ARCH_PERFMON_EVENTSEL_EVENT)
+
+/*
+ * Constraint on the Event code + UMask + fixed-mask
+ *
+ * filter mask to validate fixed counter events.
+ * the following filters disqualify for fixed counters:
+ * - inv
+ * - edge
+ * - cnt-mask
+ * - in_tx
+ * - in_tx_checkpointed
+ * The other filters are supported by fixed counters.
+ * The any-thread option is supported starting with v3.
+ */
+#define FIXED_EVENT_FLAGS (X86_RAW_EVENT_MASK|HSW_IN_TX|HSW_IN_TX_CHECKPOINTED)
+#define FIXED_EVENT_CONSTRAINT(c, n) \
+ EVENT_CONSTRAINT(c, (1ULL << (32+n)), FIXED_EVENT_FLAGS)
+
+/*
+ * Constraint on the Event code + UMask
+ */
+#define INTEL_UEVENT_CONSTRAINT(c, n) \
+ EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK)
+
+/* Constraint on specific umask bit only + event */
+#define INTEL_UBIT_EVENT_CONSTRAINT(c, n) \
+ EVENT_CONSTRAINT(c, n, ARCH_PERFMON_EVENTSEL_EVENT|(c))
+
+/* Like UEVENT_CONSTRAINT, but match flags too */
+#define INTEL_FLAGS_UEVENT_CONSTRAINT(c, n) \
+ EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK|X86_ALL_EVENT_FLAGS)
+
+#define INTEL_EXCLUEVT_CONSTRAINT(c, n) \
+ __EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK, \
+ HWEIGHT(n), 0, PERF_X86_EVENT_EXCL)
+
+#define INTEL_PLD_CONSTRAINT(c, n) \
+ __EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK|X86_ALL_EVENT_FLAGS, \
+ HWEIGHT(n), 0, PERF_X86_EVENT_PEBS_LDLAT)
+
+#define INTEL_PST_CONSTRAINT(c, n) \
+ __EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK|X86_ALL_EVENT_FLAGS, \
+ HWEIGHT(n), 0, PERF_X86_EVENT_PEBS_ST)
+
+/* Event constraint, but match on all event flags too. */
+#define INTEL_FLAGS_EVENT_CONSTRAINT(c, n) \
+ EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK|X86_ALL_EVENT_FLAGS)
+
+/* Check only flags, but allow all event/umask */
+#define INTEL_ALL_EVENT_CONSTRAINT(code, n) \
+ EVENT_CONSTRAINT(code, n, X86_ALL_EVENT_FLAGS)
+
+/* Check flags and event code, and set the HSW store flag */
+#define INTEL_FLAGS_EVENT_CONSTRAINT_DATALA_ST(code, n) \
+ __EVENT_CONSTRAINT(code, n, \
+ ARCH_PERFMON_EVENTSEL_EVENT|X86_ALL_EVENT_FLAGS, \
+ HWEIGHT(n), 0, PERF_X86_EVENT_PEBS_ST_HSW)
+
+/* Check flags and event code, and set the HSW load flag */
+#define INTEL_FLAGS_EVENT_CONSTRAINT_DATALA_LD(code, n) \
+ __EVENT_CONSTRAINT(code, n, \
+ ARCH_PERFMON_EVENTSEL_EVENT|X86_ALL_EVENT_FLAGS, \
+ HWEIGHT(n), 0, PERF_X86_EVENT_PEBS_LD_HSW)
+
+#define INTEL_FLAGS_EVENT_CONSTRAINT_DATALA_XLD(code, n) \
+ __EVENT_CONSTRAINT(code, n, \
+ ARCH_PERFMON_EVENTSEL_EVENT|X86_ALL_EVENT_FLAGS, \
+ HWEIGHT(n), 0, \
+ PERF_X86_EVENT_PEBS_LD_HSW|PERF_X86_EVENT_EXCL)
+
+/* Check flags and event code/umask, and set the HSW store flag */
+#define INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_ST(code, n) \
+ __EVENT_CONSTRAINT(code, n, \
+ INTEL_ARCH_EVENT_MASK|X86_ALL_EVENT_FLAGS, \
+ HWEIGHT(n), 0, PERF_X86_EVENT_PEBS_ST_HSW)
+
+#define INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_XST(code, n) \
+ __EVENT_CONSTRAINT(code, n, \
+ INTEL_ARCH_EVENT_MASK|X86_ALL_EVENT_FLAGS, \
+ HWEIGHT(n), 0, \
+ PERF_X86_EVENT_PEBS_ST_HSW|PERF_X86_EVENT_EXCL)
+
+/* Check flags and event code/umask, and set the HSW load flag */
+#define INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(code, n) \
+ __EVENT_CONSTRAINT(code, n, \
+ INTEL_ARCH_EVENT_MASK|X86_ALL_EVENT_FLAGS, \
+ HWEIGHT(n), 0, PERF_X86_EVENT_PEBS_LD_HSW)
+
+#define INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_XLD(code, n) \
+ __EVENT_CONSTRAINT(code, n, \
+ INTEL_ARCH_EVENT_MASK|X86_ALL_EVENT_FLAGS, \
+ HWEIGHT(n), 0, \
+ PERF_X86_EVENT_PEBS_LD_HSW|PERF_X86_EVENT_EXCL)
+
+/* Check flags and event code/umask, and set the HSW N/A flag */
+#define INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_NA(code, n) \
+ __EVENT_CONSTRAINT(code, n, \
+ INTEL_ARCH_EVENT_MASK|X86_ALL_EVENT_FLAGS, \
+ HWEIGHT(n), 0, PERF_X86_EVENT_PEBS_NA_HSW)
+
+
+/*
+ * We define the end marker as having a weight of -1
+ * to enable blacklisting of events using a counter bitmask
+ * of zero and thus a weight of zero.
+ * The end marker has a weight that cannot possibly be
+ * obtained from counting the bits in the bitmask.
+ */
+#define EVENT_CONSTRAINT_END { .weight = -1 }
+
+/*
+ * Check for end marker with weight == -1
+ */
+#define for_each_event_constraint(e, c) \
+ for ((e) = (c); (e)->weight != -1; (e)++)
+
+/*
+ * Extra registers for specific events.
+ *
+ * Some events need large masks and require external MSRs.
+ * Those extra MSRs end up being shared for all events on
+ * a PMU and sometimes between PMU of sibling HT threads.
+ * In either case, the kernel needs to handle conflicting
+ * accesses to those extra, shared, regs. The data structure
+ * to manage those registers is stored in cpu_hw_event.
+ */
+struct extra_reg {
+ unsigned int event;
+ unsigned int msr;
+ u64 config_mask;
+ u64 valid_mask;
+ int idx; /* per_xxx->regs[] reg index */
+ bool extra_msr_access;
+};
+
+#define EVENT_EXTRA_REG(e, ms, m, vm, i) { \
+ .event = (e), \
+ .msr = (ms), \
+ .config_mask = (m), \
+ .valid_mask = (vm), \
+ .idx = EXTRA_REG_##i, \
+ .extra_msr_access = true, \
+ }
+
+#define INTEL_EVENT_EXTRA_REG(event, msr, vm, idx) \
+ EVENT_EXTRA_REG(event, msr, ARCH_PERFMON_EVENTSEL_EVENT, vm, idx)
+
+#define INTEL_UEVENT_EXTRA_REG(event, msr, vm, idx) \
+ EVENT_EXTRA_REG(event, msr, ARCH_PERFMON_EVENTSEL_EVENT | \
+ ARCH_PERFMON_EVENTSEL_UMASK, vm, idx)
+
+#define INTEL_UEVENT_PEBS_LDLAT_EXTRA_REG(c) \
+ INTEL_UEVENT_EXTRA_REG(c, \
+ MSR_PEBS_LD_LAT_THRESHOLD, \
+ 0xffff, \
+ LDLAT)
+
+#define EVENT_EXTRA_END EVENT_EXTRA_REG(0, 0, 0, 0, RSP_0)
+
+union perf_capabilities {
+ struct {
+ u64 lbr_format:6;
+ u64 pebs_trap:1;
+ u64 pebs_arch_reg:1;
+ u64 pebs_format:4;
+ u64 smm_freeze:1;
+ /*
+ * PMU supports separate counter range for writing
+ * values > 32bit.
+ */
+ u64 full_width_write:1;
+ };
+ u64 capabilities;
+};
+
+struct x86_pmu_quirk {
+ struct x86_pmu_quirk *next;
+ void (*func)(void);
+};
+
+union x86_pmu_config {
+ struct {
+ u64 event:8,
+ umask:8,
+ usr:1,
+ os:1,
+ edge:1,
+ pc:1,
+ interrupt:1,
+ __reserved1:1,
+ en:1,
+ inv:1,
+ cmask:8,
+ event2:4,
+ __reserved2:4,
+ go:1,
+ ho:1;
+ } bits;
+ u64 value;
+};
+
+#define X86_CONFIG(args...) ((union x86_pmu_config){.bits = {args}}).value
+
+enum {
+ x86_lbr_exclusive_lbr,
+ x86_lbr_exclusive_bts,
+ x86_lbr_exclusive_pt,
+ x86_lbr_exclusive_max,
+};
+
+/*
+ * struct x86_pmu - generic x86 pmu
+ */
+struct x86_pmu {
+ /*
+ * Generic x86 PMC bits
+ */
+ const char *name;
+ int version;
+ int (*handle_irq)(struct pt_regs *);
+ void (*disable_all)(void);
+ void (*enable_all)(int added);
+ void (*enable)(struct perf_event *);
+ void (*disable)(struct perf_event *);
+ int (*hw_config)(struct perf_event *event);
+ int (*schedule_events)(struct cpu_hw_events *cpuc, int n, int *assign);
+ unsigned eventsel;
+ unsigned perfctr;
+ int (*addr_offset)(int index, bool eventsel);
+ int (*rdpmc_index)(int index);
+ u64 (*event_map)(int);
+ int max_events;
+ int num_counters;
+ int num_counters_fixed;
+ int cntval_bits;
+ u64 cntval_mask;
+ union {
+ unsigned long events_maskl;
+ unsigned long events_mask[BITS_TO_LONGS(ARCH_PERFMON_EVENTS_COUNT)];
+ };
+ int events_mask_len;
+ int apic;
+ u64 max_period;
+ struct event_constraint *
+ (*get_event_constraints)(struct cpu_hw_events *cpuc,
+ int idx,
+ struct perf_event *event);
+
+ void (*put_event_constraints)(struct cpu_hw_events *cpuc,
+ struct perf_event *event);
+
+ void (*start_scheduling)(struct cpu_hw_events *cpuc);
+
+ void (*commit_scheduling)(struct cpu_hw_events *cpuc, int idx, int cntr);
+
+ void (*stop_scheduling)(struct cpu_hw_events *cpuc);
+
+ struct event_constraint *event_constraints;
+ struct x86_pmu_quirk *quirks;
+ int perfctr_second_write;
+ bool late_ack;
+ unsigned (*limit_period)(struct perf_event *event, unsigned l);
+
+ /*
+ * sysfs attrs
+ */
+ int attr_rdpmc_broken;
+ int attr_rdpmc;
+ struct attribute **format_attrs;
+ struct attribute **event_attrs;
+
+ ssize_t (*events_sysfs_show)(char *page, u64 config);
+ struct attribute **cpu_events;
+
+ /*
+ * CPU Hotplug hooks
+ */
+ int (*cpu_prepare)(int cpu);
+ void (*cpu_starting)(int cpu);
+ void (*cpu_dying)(int cpu);
+ void (*cpu_dead)(int cpu);
+
+ void (*check_microcode)(void);
+ void (*sched_task)(struct perf_event_context *ctx,
+ bool sched_in);
+
+ /*
+ * Intel Arch Perfmon v2+
+ */
+ u64 intel_ctrl;
+ union perf_capabilities intel_cap;
+
+ /*
+ * Intel DebugStore bits
+ */
+ unsigned int bts :1,
+ bts_active :1,
+ pebs :1,
+ pebs_active :1,
+ pebs_broken :1,
+ pebs_prec_dist :1;
+ int pebs_record_size;
+ int pebs_buffer_size;
+ void (*drain_pebs)(struct pt_regs *regs);
+ struct event_constraint *pebs_constraints;
+ void (*pebs_aliases)(struct perf_event *event);
+ int max_pebs_events;
+ unsigned long free_running_flags;
+
+ /*
+ * Intel LBR
+ */
+ unsigned long lbr_tos, lbr_from, lbr_to; /* MSR base regs */
+ int lbr_nr; /* hardware stack size */
+ u64 lbr_sel_mask; /* LBR_SELECT valid bits */
+ const int *lbr_sel_map; /* lbr_select mappings */
+ bool lbr_double_abort; /* duplicated lbr aborts */
+
+ /*
+ * Intel PT/LBR/BTS are exclusive
+ */
+ atomic_t lbr_exclusive[x86_lbr_exclusive_max];
+
+ /*
+ * AMD bits
+ */
+ unsigned int amd_nb_constraints : 1;
+
+ /*
+ * Extra registers for events
+ */
+ struct extra_reg *extra_regs;
+ unsigned int flags;
+
+ /*
+ * Intel host/guest support (KVM)
+ */
+ struct perf_guest_switch_msr *(*guest_get_msrs)(int *nr);
+};
+
+struct x86_perf_task_context {
+ u64 lbr_from[MAX_LBR_ENTRIES];
+ u64 lbr_to[MAX_LBR_ENTRIES];
+ u64 lbr_info[MAX_LBR_ENTRIES];
+ int tos;
+ int lbr_callstack_users;
+ int lbr_stack_state;
+};
+
+#define x86_add_quirk(func_) \
+do { \
+ static struct x86_pmu_quirk __quirk __initdata = { \
+ .func = func_, \
+ }; \
+ __quirk.next = x86_pmu.quirks; \
+ x86_pmu.quirks = &__quirk; \
+} while (0)
+
+/*
+ * x86_pmu flags
+ */
+#define PMU_FL_NO_HT_SHARING 0x1 /* no hyper-threading resource sharing */
+#define PMU_FL_HAS_RSP_1 0x2 /* has 2 equivalent offcore_rsp regs */
+#define PMU_FL_EXCL_CNTRS 0x4 /* has exclusive counter requirements */
+#define PMU_FL_EXCL_ENABLED 0x8 /* exclusive counter active */
+
+#define EVENT_VAR(_id) event_attr_##_id
+#define EVENT_PTR(_id) &event_attr_##_id.attr.attr
+
+#define EVENT_ATTR(_name, _id) \
+static struct perf_pmu_events_attr EVENT_VAR(_id) = { \
+ .attr = __ATTR(_name, 0444, events_sysfs_show, NULL), \
+ .id = PERF_COUNT_HW_##_id, \
+ .event_str = NULL, \
+};
+
+#define EVENT_ATTR_STR(_name, v, str) \
+static struct perf_pmu_events_attr event_attr_##v = { \
+ .attr = __ATTR(_name, 0444, events_sysfs_show, NULL), \
+ .id = 0, \
+ .event_str = str, \
+};
+
+extern struct x86_pmu x86_pmu __read_mostly;
+
+static inline bool x86_pmu_has_lbr_callstack(void)
+{
+ return x86_pmu.lbr_sel_map &&
+ x86_pmu.lbr_sel_map[PERF_SAMPLE_BRANCH_CALL_STACK_SHIFT] > 0;
+}
+
+DECLARE_PER_CPU(struct cpu_hw_events, cpu_hw_events);
+
+int x86_perf_event_set_period(struct perf_event *event);
+
+/*
+ * Generalized hw caching related hw_event table, filled
+ * in on a per model basis. A value of 0 means
+ * 'not supported', -1 means 'hw_event makes no sense on
+ * this CPU', any other value means the raw hw_event
+ * ID.
+ */
+
+#define C(x) PERF_COUNT_HW_CACHE_##x
+
+extern u64 __read_mostly hw_cache_event_ids
+ [PERF_COUNT_HW_CACHE_MAX]
+ [PERF_COUNT_HW_CACHE_OP_MAX]
+ [PERF_COUNT_HW_CACHE_RESULT_MAX];
+extern u64 __read_mostly hw_cache_extra_regs
+ [PERF_COUNT_HW_CACHE_MAX]
+ [PERF_COUNT_HW_CACHE_OP_MAX]
+ [PERF_COUNT_HW_CACHE_RESULT_MAX];
+
+u64 x86_perf_event_update(struct perf_event *event);
+
+static inline unsigned int x86_pmu_config_addr(int index)
+{
+ return x86_pmu.eventsel + (x86_pmu.addr_offset ?
+ x86_pmu.addr_offset(index, true) : index);
+}
+
+static inline unsigned int x86_pmu_event_addr(int index)
+{
+ return x86_pmu.perfctr + (x86_pmu.addr_offset ?
+ x86_pmu.addr_offset(index, false) : index);
+}
+
+static inline int x86_pmu_rdpmc_index(int index)
+{
+ return x86_pmu.rdpmc_index ? x86_pmu.rdpmc_index(index) : index;
+}
+
+int x86_add_exclusive(unsigned int what);
+
+void x86_del_exclusive(unsigned int what);
+
+int x86_reserve_hardware(void);
+
+void x86_release_hardware(void);
+
+void hw_perf_lbr_event_destroy(struct perf_event *event);
+
+int x86_setup_perfctr(struct perf_event *event);
+
+int x86_pmu_hw_config(struct perf_event *event);
+
+void x86_pmu_disable_all(void);
+
+static inline void __x86_pmu_enable_event(struct hw_perf_event *hwc,
+ u64 enable_mask)
+{
+ u64 disable_mask = __this_cpu_read(cpu_hw_events.perf_ctr_virt_mask);
+
+ if (hwc->extra_reg.reg)
+ wrmsrl(hwc->extra_reg.reg, hwc->extra_reg.config);
+ wrmsrl(hwc->config_base, (hwc->config | enable_mask) & ~disable_mask);
+}
+
+void x86_pmu_enable_all(int added);
+
+int perf_assign_events(struct event_constraint **constraints, int n,
+ int wmin, int wmax, int gpmax, int *assign);
+int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign);
+
+void x86_pmu_stop(struct perf_event *event, int flags);
+
+static inline void x86_pmu_disable_event(struct perf_event *event)
+{
+ struct hw_perf_event *hwc = &event->hw;
+
+ wrmsrl(hwc->config_base, hwc->config);
+}
+
+void x86_pmu_enable_event(struct perf_event *event);
+
+int x86_pmu_handle_irq(struct pt_regs *regs);
+
+extern struct event_constraint emptyconstraint;
+
+extern struct event_constraint unconstrained;
+
+static inline bool kernel_ip(unsigned long ip)
+{
+#ifdef CONFIG_X86_32
+ return ip > PAGE_OFFSET;
+#else
+ return (long)ip < 0;
+#endif
+}
+
+/*
+ * Not all PMUs provide the right context information to place the reported IP
+ * into full context. Specifically segment registers are typically not
+ * supplied.
+ *
+ * Assuming the address is a linear address (it is for IBS), we fake the CS and
+ * vm86 mode using the known zero-based code segment and 'fix up' the registers
+ * to reflect this.
+ *
+ * Intel PEBS/LBR appear to typically provide the effective address, nothing
+ * much we can do about that but pray and treat it like a linear address.
+ */
+static inline void set_linear_ip(struct pt_regs *regs, unsigned long ip)
+{
+ regs->cs = kernel_ip(ip) ? __KERNEL_CS : __USER_CS;
+ if (regs->flags & X86_VM_MASK)
+ regs->flags ^= (PERF_EFLAGS_VM | X86_VM_MASK);
+ regs->ip = ip;
+}
+
+ssize_t x86_event_sysfs_show(char *page, u64 config, u64 event);
+ssize_t intel_event_sysfs_show(char *page, u64 config);
+
+struct attribute **merge_attr(struct attribute **a, struct attribute **b);
+
+ssize_t events_sysfs_show(struct device *dev, struct device_attribute *attr,
+ char *page);
+
+#ifdef CONFIG_CPU_SUP_AMD
+
+int amd_pmu_init(void);
+
+#else /* CONFIG_CPU_SUP_AMD */
+
+static inline int amd_pmu_init(void)
+{
+ return 0;
+}
+
+#endif /* CONFIG_CPU_SUP_AMD */
+
+#ifdef CONFIG_CPU_SUP_INTEL
+
+static inline bool intel_pmu_has_bts(struct perf_event *event)
+{
+ if (event->attr.config == PERF_COUNT_HW_BRANCH_INSTRUCTIONS &&
+ !event->attr.freq && event->hw.sample_period == 1)
+ return true;
+
+ return false;
+}
+
+int intel_pmu_save_and_restart(struct perf_event *event);
+
+struct event_constraint *
+x86_get_event_constraints(struct cpu_hw_events *cpuc, int idx,
+ struct perf_event *event);
+
+struct intel_shared_regs *allocate_shared_regs(int cpu);
+
+int intel_pmu_init(void);
+
+void init_debug_store_on_cpu(int cpu);
+
+void fini_debug_store_on_cpu(int cpu);
+
+void release_ds_buffers(void);
+
+void reserve_ds_buffers(void);
+
+extern struct event_constraint bts_constraint;
+
+void intel_pmu_enable_bts(u64 config);
+
+void intel_pmu_disable_bts(void);
+
+int intel_pmu_drain_bts_buffer(void);
+
+extern struct event_constraint intel_core2_pebs_event_constraints[];
+
+extern struct event_constraint intel_atom_pebs_event_constraints[];
+
+extern struct event_constraint intel_slm_pebs_event_constraints[];
+
+extern struct event_constraint intel_nehalem_pebs_event_constraints[];
+
+extern struct event_constraint intel_westmere_pebs_event_constraints[];
+
+extern struct event_constraint intel_snb_pebs_event_constraints[];
+
+extern struct event_constraint intel_ivb_pebs_event_constraints[];
+
+extern struct event_constraint intel_hsw_pebs_event_constraints[];
+
+extern struct event_constraint intel_bdw_pebs_event_constraints[];
+
+extern struct event_constraint intel_skl_pebs_event_constraints[];
+
+struct event_constraint *intel_pebs_constraints(struct perf_event *event);
+
+void intel_pmu_pebs_enable(struct perf_event *event);
+
+void intel_pmu_pebs_disable(struct perf_event *event);
+
+void intel_pmu_pebs_enable_all(void);
+
+void intel_pmu_pebs_disable_all(void);
+
+void intel_pmu_pebs_sched_task(struct perf_event_context *ctx, bool sched_in);
+
+void intel_ds_init(void);
+
+void intel_pmu_lbr_sched_task(struct perf_event_context *ctx, bool sched_in);
+
+void intel_pmu_lbr_reset(void);
+
+void intel_pmu_lbr_enable(struct perf_event *event);
+
+void intel_pmu_lbr_disable(struct perf_event *event);
+
+void intel_pmu_lbr_enable_all(bool pmi);
+
+void intel_pmu_lbr_disable_all(void);
+
+void intel_pmu_lbr_read(void);
+
+void intel_pmu_lbr_init_core(void);
+
+void intel_pmu_lbr_init_nhm(void);
+
+void intel_pmu_lbr_init_atom(void);
+
+void intel_pmu_lbr_init_snb(void);
+
+void intel_pmu_lbr_init_hsw(void);
+
+void intel_pmu_lbr_init_skl(void);
+
+void intel_pmu_lbr_init_knl(void);
+
+void intel_pmu_pebs_data_source_nhm(void);
+
+int intel_pmu_setup_lbr_filter(struct perf_event *event);
+
+void intel_pt_interrupt(void);
+
+int intel_bts_interrupt(void);
+
+void intel_bts_enable_local(void);
+
+void intel_bts_disable_local(void);
+
+int p4_pmu_init(void);
+
+int p6_pmu_init(void);
+
+int knc_pmu_init(void);
+
+static inline int is_ht_workaround_enabled(void)
+{
+ return !!(x86_pmu.flags & PMU_FL_EXCL_ENABLED);
+}
+
+#else /* CONFIG_CPU_SUP_INTEL */
+
+static inline void reserve_ds_buffers(void)
+{
+}
+
+static inline void release_ds_buffers(void)
+{
+}
+
+static inline int intel_pmu_init(void)
+{
+ return 0;
+}
+
+static inline struct intel_shared_regs *allocate_shared_regs(int cpu)
+{
+ return NULL;
+}
+
+static inline int is_ht_workaround_enabled(void)
+{
+ return 0;
+}
+#endif /* CONFIG_CPU_SUP_INTEL */