summaryrefslogtreecommitdiff
path: root/arch/x86/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86/kernel')
-rw-r--r--arch/x86/kernel/Makefile18
-rw-r--r--arch/x86/kernel/acpi/boot.c4
-rw-r--r--arch/x86/kernel/acpi/wakeup_64.S3
-rw-r--r--arch/x86/kernel/amd_nb.c6
-rw-r--r--arch/x86/kernel/apb_timer.c2
-rw-r--r--arch/x86/kernel/aperture_64.c12
-rw-r--r--arch/x86/kernel/apic/Makefile4
-rw-r--r--arch/x86/kernel/apic/apic.c16
-rw-r--r--arch/x86/kernel/apic/apic_flat_64.c2
-rw-r--r--arch/x86/kernel/apic/apic_numachip.c4
-rw-r--r--arch/x86/kernel/apic/ipi.c60
-rw-r--r--arch/x86/kernel/apic/x2apic_uv_x.c9
-rw-r--r--arch/x86/kernel/apm_32.c2
-rw-r--r--arch/x86/kernel/asm-offsets.c1
-rw-r--r--arch/x86/kernel/asm-offsets_32.c7
-rw-r--r--arch/x86/kernel/asm-offsets_64.c10
-rw-r--r--arch/x86/kernel/cpu/Makefile30
-rw-r--r--arch/x86/kernel/cpu/amd.c60
-rw-r--r--arch/x86/kernel/cpu/bugs_64.c2
-rw-r--r--arch/x86/kernel/cpu/centaur.c12
-rw-r--r--arch/x86/kernel/cpu/common.c155
-rw-r--r--arch/x86/kernel/cpu/cyrix.c11
-rw-r--r--arch/x86/kernel/cpu/hypervisor.c2
-rw-r--r--arch/x86/kernel/cpu/intel.c27
-rw-r--r--arch/x86/kernel/cpu/intel_cacheinfo.c4
-rw-r--r--arch/x86/kernel/cpu/intel_pt.h116
-rw-r--r--arch/x86/kernel/cpu/match.c2
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce-inject.c15
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce-severity.c22
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce.c85
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce_amd.c231
-rw-r--r--arch/x86/kernel/cpu/mcheck/p5.c18
-rw-r--r--arch/x86/kernel/cpu/mcheck/therm_throt.c15
-rw-r--r--arch/x86/kernel/cpu/mcheck/threshold.c4
-rw-r--r--arch/x86/kernel/cpu/mcheck/winchip.c5
-rw-r--r--arch/x86/kernel/cpu/microcode/amd.c17
-rw-r--r--arch/x86/kernel/cpu/microcode/core.c19
-rw-r--r--arch/x86/kernel/cpu/microcode/intel.c257
-rw-r--r--arch/x86/kernel/cpu/microcode/intel_lib.c58
-rw-r--r--arch/x86/kernel/cpu/mkcapflags.sh6
-rw-r--r--arch/x86/kernel/cpu/mshyperv.c20
-rw-r--r--arch/x86/kernel/cpu/mtrr/centaur.c2
-rw-r--r--arch/x86/kernel/cpu/mtrr/cleanup.c44
-rw-r--r--arch/x86/kernel/cpu/mtrr/generic.c25
-rw-r--r--arch/x86/kernel/cpu/mtrr/main.c22
-rw-r--r--arch/x86/kernel/cpu/perf_event.c2441
-rw-r--r--arch/x86/kernel/cpu/perf_event.h958
-rw-r--r--arch/x86/kernel/cpu/perf_event_amd.c731
-rw-r--r--arch/x86/kernel/cpu/perf_event_amd_ibs.c959
-rw-r--r--arch/x86/kernel/cpu/perf_event_amd_iommu.c499
-rw-r--r--arch/x86/kernel/cpu/perf_event_amd_iommu.h40
-rw-r--r--arch/x86/kernel/cpu/perf_event_amd_uncore.c603
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel.c3796
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel_bts.c544
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel_cqm.c1391
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel_cstate.c694
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel_ds.c1386
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel_lbr.c1062
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel_pt.c1188
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel_rapl.c783
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel_uncore.c1401
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel_uncore.h357
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel_uncore_nhmex.c1221
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel_uncore_snb.c717
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel_uncore_snbep.c3132
-rw-r--r--arch/x86/kernel/cpu/perf_event_knc.c321
-rw-r--r--arch/x86/kernel/cpu/perf_event_msr.c241
-rw-r--r--arch/x86/kernel/cpu/perf_event_p4.c1376
-rw-r--r--arch/x86/kernel/cpu/perf_event_p6.c279
-rw-r--r--arch/x86/kernel/cpu/powerflags.c2
-rw-r--r--arch/x86/kernel/cpu/rdrand.c2
-rw-r--r--arch/x86/kernel/cpu/topology.c4
-rw-r--r--arch/x86/kernel/cpu/transmeta.c10
-rw-r--r--arch/x86/kernel/cpu/vmware.c5
-rw-r--r--arch/x86/kernel/crash.c41
-rw-r--r--arch/x86/kernel/dumpstack.c11
-rw-r--r--arch/x86/kernel/e820.c39
-rw-r--r--arch/x86/kernel/early_printk.c2
-rw-r--r--arch/x86/kernel/fpu/core.c115
-rw-r--r--arch/x86/kernel/fpu/init.c13
-rw-r--r--arch/x86/kernel/fpu/regset.c2
-rw-r--r--arch/x86/kernel/fpu/xstate.c188
-rw-r--r--arch/x86/kernel/ftrace.c19
-rw-r--r--arch/x86/kernel/head64.c14
-rw-r--r--arch/x86/kernel/head_32.S2
-rw-r--r--arch/x86/kernel/head_64.S5
-rw-r--r--arch/x86/kernel/hpet.c3
-rw-r--r--arch/x86/kernel/kexec-bzimage64.c2
-rw-r--r--arch/x86/kernel/kgdb.c12
-rw-r--r--arch/x86/kernel/kprobes/core.c61
-rw-r--r--arch/x86/kernel/kvm.c37
-rw-r--r--arch/x86/kernel/kvmclock.c2
-rw-r--r--arch/x86/kernel/ldt.c4
-rw-r--r--arch/x86/kernel/mcount_64.S14
-rw-r--r--arch/x86/kernel/mpparse.c2
-rw-r--r--arch/x86/kernel/msr.c2
-rw-r--r--arch/x86/kernel/nmi.c3
-rw-r--r--arch/x86/kernel/pmem.c4
-rw-r--r--arch/x86/kernel/process.c7
-rw-r--r--arch/x86/kernel/process_64.c4
-rw-r--r--arch/x86/kernel/setup.c15
-rw-r--r--arch/x86/kernel/signal.c127
-rw-r--r--arch/x86/kernel/smpboot.c122
-rw-r--r--arch/x86/kernel/stacktrace.c18
-rw-r--r--arch/x86/kernel/tboot.c2
-rw-r--r--arch/x86/kernel/test_nx.c2
-rw-r--r--arch/x86/kernel/test_rodata.c2
-rw-r--r--arch/x86/kernel/traps.c147
-rw-r--r--arch/x86/kernel/tsc.c73
-rw-r--r--arch/x86/kernel/verify_cpu.S2
-rw-r--r--arch/x86/kernel/vm86_32.c2
-rw-r--r--arch/x86/kernel/vmlinux.lds.S51
-rw-r--r--arch/x86/kernel/x8664_ksyms_64.c2
113 files changed, 1698 insertions, 27064 deletions
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index b1b78ffe0..616ebd22e 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -16,9 +16,21 @@ CFLAGS_REMOVE_ftrace.o = -pg
CFLAGS_REMOVE_early_printk.o = -pg
endif
-KASAN_SANITIZE_head$(BITS).o := n
-KASAN_SANITIZE_dumpstack.o := n
-KASAN_SANITIZE_dumpstack_$(BITS).o := n
+KASAN_SANITIZE_head$(BITS).o := n
+KASAN_SANITIZE_dumpstack.o := n
+KASAN_SANITIZE_dumpstack_$(BITS).o := n
+KASAN_SANITIZE_stacktrace.o := n
+
+OBJECT_FILES_NON_STANDARD_head_$(BITS).o := y
+OBJECT_FILES_NON_STANDARD_relocate_kernel_$(BITS).o := y
+OBJECT_FILES_NON_STANDARD_mcount_$(BITS).o := y
+OBJECT_FILES_NON_STANDARD_test_nx.o := y
+
+# If instrumentation of this dir is enabled, boot hangs during first second.
+# Probably could be more selective here, but note that files related to irqs,
+# boot, dumpstack/stacktrace, etc are either non-interesting or can lead to
+# non-deterministic coverage.
+KCOV_INSTRUMENT := n
CFLAGS_irq.o := -I$(src)/../include/asm/trace
diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c
index e75907601..8c2f1ef6c 100644
--- a/arch/x86/kernel/acpi/boot.c
+++ b/arch/x86/kernel/acpi/boot.c
@@ -956,7 +956,7 @@ static int __init early_acpi_parse_madt_lapic_addr_ovr(void)
/*
* Note that the LAPIC address is obtained from the MADT (32-bit value)
- * and (optionally) overriden by a LAPIC_ADDR_OVR entry (64-bit value).
+ * and (optionally) overridden by a LAPIC_ADDR_OVR entry (64-bit value).
*/
count = acpi_table_parse_madt(ACPI_MADT_TYPE_LOCAL_APIC_OVERRIDE,
@@ -984,7 +984,7 @@ static int __init acpi_parse_madt_lapic_entries(void)
/*
* Note that the LAPIC address is obtained from the MADT (32-bit value)
- * and (optionally) overriden by a LAPIC_ADDR_OVR entry (64-bit value).
+ * and (optionally) overridden by a LAPIC_ADDR_OVR entry (64-bit value).
*/
count = acpi_table_parse_madt(ACPI_MADT_TYPE_LOCAL_APIC_OVERRIDE,
diff --git a/arch/x86/kernel/acpi/wakeup_64.S b/arch/x86/kernel/acpi/wakeup_64.S
index 8c35df468..169963f47 100644
--- a/arch/x86/kernel/acpi/wakeup_64.S
+++ b/arch/x86/kernel/acpi/wakeup_64.S
@@ -5,6 +5,7 @@
#include <asm/page_types.h>
#include <asm/msr.h>
#include <asm/asm-offsets.h>
+#include <asm/frame.h>
# Copyright 2003 Pavel Machek <pavel@suse.cz>, distribute under GPLv2
@@ -39,6 +40,7 @@ bogus_64_magic:
jmp bogus_64_magic
ENTRY(do_suspend_lowlevel)
+ FRAME_BEGIN
subq $8, %rsp
xorl %eax, %eax
call save_processor_state
@@ -109,6 +111,7 @@ ENTRY(do_suspend_lowlevel)
xorl %eax, %eax
addq $8, %rsp
+ FRAME_END
jmp restore_processor_state
ENDPROC(do_suspend_lowlevel)
diff --git a/arch/x86/kernel/amd_nb.c b/arch/x86/kernel/amd_nb.c
index 29fa475ec..a147e676f 100644
--- a/arch/x86/kernel/amd_nb.c
+++ b/arch/x86/kernel/amd_nb.c
@@ -170,15 +170,13 @@ int amd_get_subcaches(int cpu)
{
struct pci_dev *link = node_to_amd_nb(amd_get_nb_id(cpu))->link;
unsigned int mask;
- int cuid;
if (!amd_nb_has_feature(AMD_NB_L3_PARTITIONING))
return 0;
pci_read_config_dword(link, 0x1d4, &mask);
- cuid = cpu_data(cpu).compute_unit_id;
- return (mask >> (4 * cuid)) & 0xf;
+ return (mask >> (4 * cpu_data(cpu).cpu_core_id)) & 0xf;
}
int amd_set_subcaches(int cpu, unsigned long mask)
@@ -204,7 +202,7 @@ int amd_set_subcaches(int cpu, unsigned long mask)
pci_write_config_dword(nb->misc, 0x1b8, reg & ~0x180000);
}
- cuid = cpu_data(cpu).compute_unit_id;
+ cuid = cpu_data(cpu).cpu_core_id;
mask <<= 4 * cuid;
mask |= (0xf ^ (1 << cuid)) << 26;
diff --git a/arch/x86/kernel/apb_timer.c b/arch/x86/kernel/apb_timer.c
index 222a57076..cefacbad1 100644
--- a/arch/x86/kernel/apb_timer.c
+++ b/arch/x86/kernel/apb_timer.c
@@ -221,7 +221,7 @@ static int apbt_cpuhp_notify(struct notifier_block *n,
unsigned long cpu = (unsigned long)hcpu;
struct apbt_dev *adev = &per_cpu(cpu_apbt_dev, cpu);
- switch (action & 0xf) {
+ switch (action & ~CPU_TASKS_FROZEN) {
case CPU_DEAD:
dw_apb_clockevent_pause(adev->timer);
if (system_state == SYSTEM_RUNNING) {
diff --git a/arch/x86/kernel/aperture_64.c b/arch/x86/kernel/aperture_64.c
index 6e85f7136..0a2bb1f62 100644
--- a/arch/x86/kernel/aperture_64.c
+++ b/arch/x86/kernel/aperture_64.c
@@ -227,19 +227,11 @@ static u32 __init search_agp_bridge(u32 *order, int *valid_agp)
return 0;
}
-static int gart_fix_e820 __initdata = 1;
+static bool gart_fix_e820 __initdata = true;
static int __init parse_gart_mem(char *p)
{
- if (!p)
- return -EINVAL;
-
- if (!strncmp(p, "off", 3))
- gart_fix_e820 = 0;
- else if (!strncmp(p, "on", 2))
- gart_fix_e820 = 1;
-
- return 0;
+ return kstrtobool(p, &gart_fix_e820);
}
early_param("gart_fix_e820", parse_gart_mem);
diff --git a/arch/x86/kernel/apic/Makefile b/arch/x86/kernel/apic/Makefile
index 8bb12ddc5..8e63ebdcb 100644
--- a/arch/x86/kernel/apic/Makefile
+++ b/arch/x86/kernel/apic/Makefile
@@ -2,6 +2,10 @@
# Makefile for local APIC drivers and for the IO-APIC code
#
+# Leads to non-deterministic coverage that is not a function of syscall inputs.
+# In particualr, smp_apic_timer_interrupt() is called in random places.
+KCOV_INSTRUMENT := n
+
obj-$(CONFIG_X86_LOCAL_APIC) += apic.o apic_noop.o ipi.o vector.o
obj-y += hw_nmi.o
diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c
index 8a5cddac7..d356987a0 100644
--- a/arch/x86/kernel/apic/apic.c
+++ b/arch/x86/kernel/apic/apic.c
@@ -1611,7 +1611,7 @@ void __init enable_IR_x2apic(void)
legacy_pic->mask_all();
mask_ioapic_entries();
- /* If irq_remapping_prepare() succeded, try to enable it */
+ /* If irq_remapping_prepare() succeeded, try to enable it */
if (ir_stat >= 0)
ir_stat = try_to_enable_IR();
/* ir_stat contains the remap mode or an error code */
@@ -2078,6 +2078,20 @@ int generic_processor_info(int apicid, int version)
cpu = cpumask_next_zero(-1, cpu_present_mask);
/*
+ * This can happen on physical hotplug. The sanity check at boot time
+ * is done from native_smp_prepare_cpus() after num_possible_cpus() is
+ * established.
+ */
+ if (topology_update_package_map(apicid, cpu) < 0) {
+ int thiscpu = max + disabled_cpus;
+
+ pr_warning("ACPI: Package limit reached. Processor %d/0x%x ignored.\n",
+ thiscpu, apicid);
+ disabled_cpus++;
+ return -ENOSPC;
+ }
+
+ /*
* Validate version
*/
if (version == 0x0) {
diff --git a/arch/x86/kernel/apic/apic_flat_64.c b/arch/x86/kernel/apic/apic_flat_64.c
index 9968f30cc..76f89e2b2 100644
--- a/arch/x86/kernel/apic/apic_flat_64.c
+++ b/arch/x86/kernel/apic/apic_flat_64.c
@@ -53,7 +53,7 @@ void flat_init_apic_ldr(void)
apic_write(APIC_LDR, val);
}
-static inline void _flat_send_IPI_mask(unsigned long mask, int vector)
+static void _flat_send_IPI_mask(unsigned long mask, int vector)
{
unsigned long flags;
diff --git a/arch/x86/kernel/apic/apic_numachip.c b/arch/x86/kernel/apic/apic_numachip.c
index c80c02c6e..ab5c2c685 100644
--- a/arch/x86/kernel/apic/apic_numachip.c
+++ b/arch/x86/kernel/apic/apic_numachip.c
@@ -30,7 +30,7 @@ static unsigned int numachip1_get_apic_id(unsigned long x)
unsigned long value;
unsigned int id = (x >> 24) & 0xff;
- if (static_cpu_has_safe(X86_FEATURE_NODEID_MSR)) {
+ if (static_cpu_has(X86_FEATURE_NODEID_MSR)) {
rdmsrl(MSR_FAM10H_NODE_ID, value);
id |= (value << 2) & 0xff00;
}
@@ -178,7 +178,7 @@ static void fixup_cpu_id(struct cpuinfo_x86 *c, int node)
this_cpu_write(cpu_llc_id, node);
/* Account for nodes per socket in multi-core-module processors */
- if (static_cpu_has_safe(X86_FEATURE_NODEID_MSR)) {
+ if (static_cpu_has(X86_FEATURE_NODEID_MSR)) {
rdmsrl(MSR_FAM10H_NODE_ID, val);
nodes = ((val >> 3) & 7) + 1;
}
diff --git a/arch/x86/kernel/apic/ipi.c b/arch/x86/kernel/apic/ipi.c
index eb45fc9b6..28bde88b0 100644
--- a/arch/x86/kernel/apic/ipi.c
+++ b/arch/x86/kernel/apic/ipi.c
@@ -18,6 +18,66 @@
#include <asm/proto.h>
#include <asm/ipi.h>
+void __default_send_IPI_shortcut(unsigned int shortcut, int vector, unsigned int dest)
+{
+ /*
+ * Subtle. In the case of the 'never do double writes' workaround
+ * we have to lock out interrupts to be safe. As we don't care
+ * of the value read we use an atomic rmw access to avoid costly
+ * cli/sti. Otherwise we use an even cheaper single atomic write
+ * to the APIC.
+ */
+ unsigned int cfg;
+
+ /*
+ * Wait for idle.
+ */
+ __xapic_wait_icr_idle();
+
+ /*
+ * No need to touch the target chip field
+ */
+ cfg = __prepare_ICR(shortcut, vector, dest);
+
+ /*
+ * Send the IPI. The write to APIC_ICR fires this off.
+ */
+ native_apic_mem_write(APIC_ICR, cfg);
+}
+
+/*
+ * This is used to send an IPI with no shorthand notation (the destination is
+ * specified in bits 56 to 63 of the ICR).
+ */
+void __default_send_IPI_dest_field(unsigned int mask, int vector, unsigned int dest)
+{
+ unsigned long cfg;
+
+ /*
+ * Wait for idle.
+ */
+ if (unlikely(vector == NMI_VECTOR))
+ safe_apic_wait_icr_idle();
+ else
+ __xapic_wait_icr_idle();
+
+ /*
+ * prepare target chip field
+ */
+ cfg = __prepare_ICR2(mask);
+ native_apic_mem_write(APIC_ICR2, cfg);
+
+ /*
+ * program the ICR
+ */
+ cfg = __prepare_ICR(0, vector, dest);
+
+ /*
+ * Send the IPI. The write to APIC_ICR fires this off.
+ */
+ native_apic_mem_write(APIC_ICR, cfg);
+}
+
void default_send_IPI_single_phys(int cpu, int vector)
{
unsigned long flags;
diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c
index 624db0058..d7ce96a7d 100644
--- a/arch/x86/kernel/apic/x2apic_uv_x.c
+++ b/arch/x86/kernel/apic/x2apic_uv_x.c
@@ -792,7 +792,8 @@ static int uv_scir_cpu_notify(struct notifier_block *self, unsigned long action,
{
long cpu = (long)hcpu;
- switch (action) {
+ switch (action & ~CPU_TASKS_FROZEN) {
+ case CPU_DOWN_FAILED:
case CPU_ONLINE:
uv_heartbeat_enable(cpu);
break;
@@ -860,7 +861,7 @@ int uv_set_vga_state(struct pci_dev *pdev, bool decode,
*/
void uv_cpu_init(void)
{
- /* CPU 0 initilization will be done via uv_system_init. */
+ /* CPU 0 initialization will be done via uv_system_init. */
if (!uv_blade_info)
return;
@@ -890,9 +891,7 @@ void __init uv_system_init(void)
}
pr_info("UV: Found %s hub\n", hub);
- /* We now only need to map the MMRs on UV1 */
- if (is_uv1_hub())
- map_low_mmrs();
+ map_low_mmrs();
m_n_config.v = uv_read_local_mmr(UVH_RH_GAM_CONFIG_MMR );
m_val = m_n_config.s.m_skt;
diff --git a/arch/x86/kernel/apm_32.c b/arch/x86/kernel/apm_32.c
index 052c9c302..9307f182f 100644
--- a/arch/x86/kernel/apm_32.c
+++ b/arch/x86/kernel/apm_32.c
@@ -1088,7 +1088,7 @@ static int apm_get_battery_status(u_short which, u_short *status,
* @device: identity of device
* @enable: on/off
*
- * Activate or deactive power management on either a specific device
+ * Activate or deactivate power management on either a specific device
* or the entire system (%APM_DEVICE_ALL).
*/
diff --git a/arch/x86/kernel/asm-offsets.c b/arch/x86/kernel/asm-offsets.c
index 84a7524b2..5c042466f 100644
--- a/arch/x86/kernel/asm-offsets.c
+++ b/arch/x86/kernel/asm-offsets.c
@@ -59,7 +59,6 @@ void common(void) {
#ifdef CONFIG_PARAVIRT
BLANK();
- OFFSET(PARAVIRT_enabled, pv_info, paravirt_enabled);
OFFSET(PARAVIRT_PATCH_pv_cpu_ops, paravirt_patch_template, pv_cpu_ops);
OFFSET(PARAVIRT_PATCH_pv_irq_ops, paravirt_patch_template, pv_irq_ops);
OFFSET(PV_IRQ_irq_disable, pv_irq_ops, irq_disable);
diff --git a/arch/x86/kernel/asm-offsets_32.c b/arch/x86/kernel/asm-offsets_32.c
index 6ce39025f..ecdc1d217 100644
--- a/arch/x86/kernel/asm-offsets_32.c
+++ b/arch/x86/kernel/asm-offsets_32.c
@@ -7,7 +7,7 @@
#include <linux/lguest.h>
#include "../../../drivers/lguest/lg.h"
-#define __SYSCALL_I386(nr, sym, compat) [nr] = 1,
+#define __SYSCALL_I386(nr, sym, qual) [nr] = 1,
static char syscalls[] = {
#include <asm/syscalls_32.h>
};
@@ -52,6 +52,11 @@ void foo(void)
DEFINE(TSS_sysenter_sp0, offsetof(struct tss_struct, x86_tss.sp0) -
offsetofend(struct tss_struct, SYSENTER_stack));
+ /* Offset from cpu_tss to SYSENTER_stack */
+ OFFSET(CPU_TSS_SYSENTER_stack, tss_struct, SYSENTER_stack);
+ /* Size of SYSENTER_stack */
+ DEFINE(SIZEOF_SYSENTER_stack, sizeof(((struct tss_struct *)0)->SYSENTER_stack));
+
#if defined(CONFIG_LGUEST) || defined(CONFIG_LGUEST_GUEST) || defined(CONFIG_LGUEST_MODULE)
BLANK();
OFFSET(LGUEST_DATA_irq_enabled, lguest_data, irq_enabled);
diff --git a/arch/x86/kernel/asm-offsets_64.c b/arch/x86/kernel/asm-offsets_64.c
index f2edafb5f..d875f97d4 100644
--- a/arch/x86/kernel/asm-offsets_64.c
+++ b/arch/x86/kernel/asm-offsets_64.c
@@ -4,17 +4,11 @@
#include <asm/ia32.h>
-#define __SYSCALL_64(nr, sym, compat) [nr] = 1,
-#define __SYSCALL_COMMON(nr, sym, compat) [nr] = 1,
-#ifdef CONFIG_X86_X32_ABI
-# define __SYSCALL_X32(nr, sym, compat) [nr] = 1,
-#else
-# define __SYSCALL_X32(nr, sym, compat) /* nothing */
-#endif
+#define __SYSCALL_64(nr, sym, qual) [nr] = 1,
static char syscalls_64[] = {
#include <asm/syscalls_64.h>
};
-#define __SYSCALL_I386(nr, sym, compat) [nr] = 1,
+#define __SYSCALL_I386(nr, sym, qual) [nr] = 1,
static char syscalls_ia32[] = {
#include <asm/syscalls_32.h>
};
diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile
index 58031303e..4a8697f7d 100644
--- a/arch/x86/kernel/cpu/Makefile
+++ b/arch/x86/kernel/cpu/Makefile
@@ -8,6 +8,10 @@ CFLAGS_REMOVE_common.o = -pg
CFLAGS_REMOVE_perf_event.o = -pg
endif
+# If these files are instrumented, boot hangs during the first second.
+KCOV_INSTRUMENT_common.o := n
+KCOV_INSTRUMENT_perf_event.o := n
+
# Make sure load_percpu_segment has no stackprotector
nostackp := $(call cc-option, -fno-stack-protector)
CFLAGS_common.o := $(nostackp)
@@ -30,33 +34,11 @@ obj-$(CONFIG_CPU_SUP_CENTAUR) += centaur.o
obj-$(CONFIG_CPU_SUP_TRANSMETA_32) += transmeta.o
obj-$(CONFIG_CPU_SUP_UMC_32) += umc.o
-obj-$(CONFIG_PERF_EVENTS) += perf_event.o
-
-ifdef CONFIG_PERF_EVENTS
-obj-$(CONFIG_CPU_SUP_AMD) += perf_event_amd.o perf_event_amd_uncore.o
-ifdef CONFIG_AMD_IOMMU
-obj-$(CONFIG_CPU_SUP_AMD) += perf_event_amd_iommu.o
-endif
-obj-$(CONFIG_CPU_SUP_INTEL) += perf_event_p6.o perf_event_knc.o perf_event_p4.o
-obj-$(CONFIG_CPU_SUP_INTEL) += perf_event_intel_lbr.o perf_event_intel_ds.o perf_event_intel.o
-obj-$(CONFIG_CPU_SUP_INTEL) += perf_event_intel_rapl.o perf_event_intel_cqm.o
-obj-$(CONFIG_CPU_SUP_INTEL) += perf_event_intel_pt.o perf_event_intel_bts.o
-obj-$(CONFIG_CPU_SUP_INTEL) += perf_event_intel_cstate.o
-
-obj-$(CONFIG_PERF_EVENTS_INTEL_UNCORE) += perf_event_intel_uncore.o \
- perf_event_intel_uncore_snb.o \
- perf_event_intel_uncore_snbep.o \
- perf_event_intel_uncore_nhmex.o
-obj-$(CONFIG_CPU_SUP_INTEL) += perf_event_msr.o
-obj-$(CONFIG_CPU_SUP_AMD) += perf_event_msr.o
-endif
-
-
obj-$(CONFIG_X86_MCE) += mcheck/
obj-$(CONFIG_MTRR) += mtrr/
obj-$(CONFIG_MICROCODE) += microcode/
-obj-$(CONFIG_X86_LOCAL_APIC) += perfctr-watchdog.o perf_event_amd_ibs.o
+obj-$(CONFIG_X86_LOCAL_APIC) += perfctr-watchdog.o
obj-$(CONFIG_HYPERVISOR_GUEST) += vmware.o hypervisor.o mshyperv.o
@@ -64,7 +46,7 @@ ifdef CONFIG_X86_FEATURE_NAMES
quiet_cmd_mkcapflags = MKCAP $@
cmd_mkcapflags = $(CONFIG_SHELL) $(srctree)/$(src)/mkcapflags.sh $< $@
-cpufeature = $(src)/../../include/asm/cpufeature.h
+cpufeature = $(src)/../../include/asm/cpufeatures.h
targets += capflags.c
$(obj)/capflags.c: $(cpufeature) $(src)/mkcapflags.sh FORCE
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index a07956a08..7b76eb67a 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -75,14 +75,17 @@ static inline int wrmsrl_amd_safe(unsigned msr, unsigned long long val)
*/
extern __visible void vide(void);
-__asm__(".globl vide\n\t.align 4\nvide: ret");
+__asm__(".globl vide\n"
+ ".type vide, @function\n"
+ ".align 4\n"
+ "vide: ret\n");
static void init_amd_k5(struct cpuinfo_x86 *c)
{
#ifdef CONFIG_X86_32
/*
* General Systems BIOSen alias the cpu frequency registers
- * of the Elan at 0x000df000. Unfortuantly, one of the Linux
+ * of the Elan at 0x000df000. Unfortunately, one of the Linux
* drivers subsequently pokes it, and changes the CPU speed.
* Workaround : Remove the unneeded alias.
*/
@@ -117,7 +120,7 @@ static void init_amd_k6(struct cpuinfo_x86 *c)
void (*f_vide)(void);
u64 d, d2;
- printk(KERN_INFO "AMD K6 stepping B detected - ");
+ pr_info("AMD K6 stepping B detected - ");
/*
* It looks like AMD fixed the 2.6.2 bug and improved indirect
@@ -133,10 +136,9 @@ static void init_amd_k6(struct cpuinfo_x86 *c)
d = d2-d;
if (d > 20*K6_BUG_LOOP)
- printk(KERN_CONT
- "system stability may be impaired when more than 32 MB are used.\n");
+ pr_cont("system stability may be impaired when more than 32 MB are used.\n");
else
- printk(KERN_CONT "probably OK (after B9730xxxx).\n");
+ pr_cont("probably OK (after B9730xxxx).\n");
}
/* K6 with old style WHCR */
@@ -154,7 +156,7 @@ static void init_amd_k6(struct cpuinfo_x86 *c)
wbinvd();
wrmsr(MSR_K6_WHCR, l, h);
local_irq_restore(flags);
- printk(KERN_INFO "Enabling old style K6 write allocation for %d Mb\n",
+ pr_info("Enabling old style K6 write allocation for %d Mb\n",
mbytes);
}
return;
@@ -175,7 +177,7 @@ static void init_amd_k6(struct cpuinfo_x86 *c)
wbinvd();
wrmsr(MSR_K6_WHCR, l, h);
local_irq_restore(flags);
- printk(KERN_INFO "Enabling new style K6 write allocation for %d Mb\n",
+ pr_info("Enabling new style K6 write allocation for %d Mb\n",
mbytes);
}
@@ -202,7 +204,7 @@ static void init_amd_k7(struct cpuinfo_x86 *c)
*/
if (c->x86_model >= 6 && c->x86_model <= 10) {
if (!cpu_has(c, X86_FEATURE_XMM)) {
- printk(KERN_INFO "Enabling disabled K7/SSE Support.\n");
+ pr_info("Enabling disabled K7/SSE Support.\n");
msr_clear_bit(MSR_K7_HWCR, 15);
set_cpu_cap(c, X86_FEATURE_XMM);
}
@@ -216,9 +218,8 @@ static void init_amd_k7(struct cpuinfo_x86 *c)
if ((c->x86_model == 8 && c->x86_mask >= 1) || (c->x86_model > 8)) {
rdmsr(MSR_K7_CLK_CTL, l, h);
if ((l & 0xfff00000) != 0x20000000) {
- printk(KERN_INFO
- "CPU: CLK_CTL MSR was %x. Reprogramming to %x\n",
- l, ((l & 0x000fffff)|0x20000000));
+ pr_info("CPU: CLK_CTL MSR was %x. Reprogramming to %x\n",
+ l, ((l & 0x000fffff)|0x20000000));
wrmsr(MSR_K7_CLK_CTL, (l & 0x000fffff)|0x20000000, h);
}
}
@@ -299,7 +300,6 @@ static int nearby_node(int apicid)
#ifdef CONFIG_SMP
static void amd_get_topology(struct cpuinfo_x86 *c)
{
- u32 cores_per_cu = 1;
u8 node_id;
int cpu = smp_processor_id();
@@ -308,37 +308,32 @@ static void amd_get_topology(struct cpuinfo_x86 *c)
u32 eax, ebx, ecx, edx;
cpuid(0x8000001e, &eax, &ebx, &ecx, &edx);
- nodes_per_socket = ((ecx >> 8) & 7) + 1;
node_id = ecx & 7;
/* get compute unit information */
smp_num_siblings = ((ebx >> 8) & 3) + 1;
- c->compute_unit_id = ebx & 0xff;
- cores_per_cu += ((ebx >> 8) & 3);
+ c->x86_max_cores /= smp_num_siblings;
+ c->cpu_core_id = ebx & 0xff;
} else if (cpu_has(c, X86_FEATURE_NODEID_MSR)) {
u64 value;
rdmsrl(MSR_FAM10H_NODE_ID, value);
- nodes_per_socket = ((value >> 3) & 7) + 1;
node_id = value & 7;
} else
return;
/* fixup multi-node processor information */
if (nodes_per_socket > 1) {
- u32 cores_per_node;
u32 cus_per_node;
set_cpu_cap(c, X86_FEATURE_AMD_DCM);
- cores_per_node = c->x86_max_cores / nodes_per_socket;
- cus_per_node = cores_per_node / cores_per_cu;
+ cus_per_node = c->x86_max_cores / nodes_per_socket;
/* store NodeID, use llc_shared_map to store sibling info */
per_cpu(cpu_llc_id, cpu) = node_id;
/* core id has to be in the [0 .. cores_per_node - 1] range */
- c->cpu_core_id %= cores_per_node;
- c->compute_unit_id %= cus_per_node;
+ c->cpu_core_id %= cus_per_node;
}
}
#endif
@@ -485,7 +480,7 @@ static void bsp_init_amd(struct cpuinfo_x86 *c)
if (!rdmsrl_safe(MSR_K8_TSEG_ADDR, &tseg)) {
unsigned long pfn = tseg >> PAGE_SHIFT;
- printk(KERN_DEBUG "tseg: %010llx\n", tseg);
+ pr_debug("tseg: %010llx\n", tseg);
if (pfn_range_is_mapped(pfn, pfn + 1))
set_memory_4k((unsigned long)__va(tseg), 1);
}
@@ -500,8 +495,7 @@ static void bsp_init_amd(struct cpuinfo_x86 *c)
rdmsrl(MSR_K7_HWCR, val);
if (!(val & BIT(24)))
- printk(KERN_WARNING FW_BUG "TSC doesn't count "
- "with P0 frequency!\n");
+ pr_warn(FW_BUG "TSC doesn't count with P0 frequency!\n");
}
}
@@ -522,6 +516,18 @@ static void bsp_init_amd(struct cpuinfo_x86 *c)
if (cpu_has(c, X86_FEATURE_MWAITX))
use_mwaitx_delay();
+
+ if (boot_cpu_has(X86_FEATURE_TOPOEXT)) {
+ u32 ecx;
+
+ ecx = cpuid_ecx(0x8000001e);
+ nodes_per_socket = ((ecx >> 8) & 7) + 1;
+ } else if (boot_cpu_has(X86_FEATURE_NODEID_MSR)) {
+ u64 value;
+
+ rdmsrl(MSR_FAM10H_NODE_ID, value);
+ nodes_per_socket = ((value >> 3) & 7) + 1;
+ }
}
static void early_init_amd(struct cpuinfo_x86 *c)
@@ -539,6 +545,10 @@ static void early_init_amd(struct cpuinfo_x86 *c)
set_sched_clock_stable();
}
+ /* Bit 12 of 8000_0007 edx is accumulated power mechanism. */
+ if (c->x86_power & BIT(12))
+ set_cpu_cap(c, X86_FEATURE_ACC_POWER);
+
#ifdef CONFIG_X86_64
set_cpu_cap(c, X86_FEATURE_SYSCALL32);
#else
diff --git a/arch/x86/kernel/cpu/bugs_64.c b/arch/x86/kernel/cpu/bugs_64.c
index 04f0fe5af..a972ac4c7 100644
--- a/arch/x86/kernel/cpu/bugs_64.c
+++ b/arch/x86/kernel/cpu/bugs_64.c
@@ -15,7 +15,7 @@ void __init check_bugs(void)
{
identify_boot_cpu();
#if !defined(CONFIG_SMP)
- printk(KERN_INFO "CPU: ");
+ pr_info("CPU: ");
print_cpu_info(&boot_cpu_data);
#endif
alternative_instructions();
diff --git a/arch/x86/kernel/cpu/centaur.c b/arch/x86/kernel/cpu/centaur.c
index ae20be6e4..1661d8ec9 100644
--- a/arch/x86/kernel/cpu/centaur.c
+++ b/arch/x86/kernel/cpu/centaur.c
@@ -1,7 +1,7 @@
#include <linux/bitops.h>
#include <linux/kernel.h>
-#include <asm/processor.h>
+#include <asm/cpufeature.h>
#include <asm/e820.h>
#include <asm/mtrr.h>
#include <asm/msr.h>
@@ -29,7 +29,7 @@ static void init_c3(struct cpuinfo_x86 *c)
rdmsr(MSR_VIA_FCR, lo, hi);
lo |= ACE_FCR; /* enable ACE unit */
wrmsr(MSR_VIA_FCR, lo, hi);
- printk(KERN_INFO "CPU: Enabled ACE h/w crypto\n");
+ pr_info("CPU: Enabled ACE h/w crypto\n");
}
/* enable RNG unit, if present and disabled */
@@ -37,7 +37,7 @@ static void init_c3(struct cpuinfo_x86 *c)
rdmsr(MSR_VIA_RNG, lo, hi);
lo |= RNG_ENABLE; /* enable RNG unit */
wrmsr(MSR_VIA_RNG, lo, hi);
- printk(KERN_INFO "CPU: Enabled h/w RNG\n");
+ pr_info("CPU: Enabled h/w RNG\n");
}
/* store Centaur Extended Feature Flags as
@@ -130,7 +130,7 @@ static void init_centaur(struct cpuinfo_x86 *c)
name = "C6";
fcr_set = ECX8|DSMC|EDCTLB|EMMX|ERETSTK;
fcr_clr = DPDC;
- printk(KERN_NOTICE "Disabling bugged TSC.\n");
+ pr_notice("Disabling bugged TSC.\n");
clear_cpu_cap(c, X86_FEATURE_TSC);
break;
case 8:
@@ -163,11 +163,11 @@ static void init_centaur(struct cpuinfo_x86 *c)
newlo = (lo|fcr_set) & (~fcr_clr);
if (newlo != lo) {
- printk(KERN_INFO "Centaur FCR was 0x%X now 0x%X\n",
+ pr_info("Centaur FCR was 0x%X now 0x%X\n",
lo, newlo);
wrmsr(MSR_IDT_FCR1, newlo, hi);
} else {
- printk(KERN_INFO "Centaur FCR is 0x%X\n", lo);
+ pr_info("Centaur FCR is 0x%X\n", lo);
}
/* Emulate MTRRs using Centaur's MCR. */
set_cpu_cap(c, X86_FEATURE_CENTAUR_MCR);
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 37830de8f..f45a4b9d2 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -162,6 +162,22 @@ static int __init x86_mpx_setup(char *s)
}
__setup("nompx", x86_mpx_setup);
+static int __init x86_noinvpcid_setup(char *s)
+{
+ /* noinvpcid doesn't accept parameters */
+ if (s)
+ return -EINVAL;
+
+ /* do not emit a message if the feature is not present */
+ if (!boot_cpu_has(X86_FEATURE_INVPCID))
+ return 0;
+
+ setup_clear_cpu_cap(X86_FEATURE_INVPCID);
+ pr_info("noinvpcid: INVPCID feature disabled\n");
+ return 0;
+}
+early_param("noinvpcid", x86_noinvpcid_setup);
+
#ifdef CONFIG_X86_32
static int cachesize_override = -1;
static int disable_x86_serial_nr = 1;
@@ -228,7 +244,7 @@ static void squash_the_stupid_serial_number(struct cpuinfo_x86 *c)
lo |= 0x200000;
wrmsr(MSR_IA32_BBL_CR_CTL, lo, hi);
- printk(KERN_NOTICE "CPU serial number disabled.\n");
+ pr_notice("CPU serial number disabled.\n");
clear_cpu_cap(c, X86_FEATURE_PN);
/* Disabling the serial number may affect the cpuid level */
@@ -288,6 +304,52 @@ static __always_inline void setup_smap(struct cpuinfo_x86 *c)
}
/*
+ * Protection Keys are not available in 32-bit mode.
+ */
+static bool pku_disabled;
+
+static __always_inline void setup_pku(struct cpuinfo_x86 *c)
+{
+ /* check the boot processor, plus compile options for PKU: */
+ if (!cpu_feature_enabled(X86_FEATURE_PKU))
+ return;
+ /* checks the actual processor's cpuid bits: */
+ if (!cpu_has(c, X86_FEATURE_PKU))
+ return;
+ if (pku_disabled)
+ return;
+
+ cr4_set_bits(X86_CR4_PKE);
+ /*
+ * Seting X86_CR4_PKE will cause the X86_FEATURE_OSPKE
+ * cpuid bit to be set. We need to ensure that we
+ * update that bit in this CPU's "cpu_info".
+ */
+ get_cpu_cap(c);
+}
+
+#ifdef CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS
+static __init int setup_disable_pku(char *arg)
+{
+ /*
+ * Do not clear the X86_FEATURE_PKU bit. All of the
+ * runtime checks are against OSPKE so clearing the
+ * bit does nothing.
+ *
+ * This way, we will see "pku" in cpuinfo, but not
+ * "ospke", which is exactly what we want. It shows
+ * that the CPU has PKU, but the OS has not enabled it.
+ * This happens to be exactly how a system would look
+ * if we disabled the config option.
+ */
+ pr_info("x86: 'nopku' specified, disabling Memory Protection Keys\n");
+ pku_disabled = true;
+ return 1;
+}
+__setup("nopku", setup_disable_pku);
+#endif /* CONFIG_X86_64 */
+
+/*
* Some CPU features depend on higher CPUID levels, which may not always
* be available due to CPUID level capping or broken virtualization
* software. Add those features to this table to auto-disable them.
@@ -329,9 +391,8 @@ static void filter_cpuid_features(struct cpuinfo_x86 *c, bool warn)
if (!warn)
continue;
- printk(KERN_WARNING
- "CPU: CPU feature " X86_CAP_FMT " disabled, no CPUID level 0x%x\n",
- x86_cap_flag(df->feature), df->level);
+ pr_warn("CPU: CPU feature " X86_CAP_FMT " disabled, no CPUID level 0x%x\n",
+ x86_cap_flag(df->feature), df->level);
}
}
@@ -510,7 +571,7 @@ void detect_ht(struct cpuinfo_x86 *c)
smp_num_siblings = (ebx & 0xff0000) >> 16;
if (smp_num_siblings == 1) {
- printk_once(KERN_INFO "CPU0: Hyper-Threading is disabled\n");
+ pr_info_once("CPU0: Hyper-Threading is disabled\n");
goto out;
}
@@ -531,10 +592,10 @@ void detect_ht(struct cpuinfo_x86 *c)
out:
if (!printed && (c->x86_max_cores * smp_num_siblings) > 1) {
- printk(KERN_INFO "CPU: Physical Processor ID: %d\n",
- c->phys_proc_id);
- printk(KERN_INFO "CPU: Processor Core ID: %d\n",
- c->cpu_core_id);
+ pr_info("CPU: Physical Processor ID: %d\n",
+ c->phys_proc_id);
+ pr_info("CPU: Processor Core ID: %d\n",
+ c->cpu_core_id);
printed = 1;
}
#endif
@@ -559,9 +620,8 @@ static void get_cpu_vendor(struct cpuinfo_x86 *c)
}
}
- printk_once(KERN_ERR
- "CPU: vendor_id '%s' unknown, using generic init.\n" \
- "CPU: Your system may be unstable.\n", v);
+ pr_err_once("CPU: vendor_id '%s' unknown, using generic init.\n" \
+ "CPU: Your system may be unstable.\n", v);
c->x86_vendor = X86_VENDOR_UNKNOWN;
this_cpu = &default_cpu;
@@ -611,6 +671,7 @@ void get_cpu_cap(struct cpuinfo_x86 *c)
c->x86_capability[CPUID_7_0_EBX] = ebx;
c->x86_capability[CPUID_6_EAX] = cpuid_eax(0x00000006);
+ c->x86_capability[CPUID_7_ECX] = ecx;
}
/* Extended state features: level 0x0000000d */
@@ -635,7 +696,9 @@ void get_cpu_cap(struct cpuinfo_x86 *c)
cpuid_count(0x0000000F, 1, &eax, &ebx, &ecx, &edx);
c->x86_capability[CPUID_F_1_EDX] = edx;
- if (cpu_has(c, X86_FEATURE_CQM_OCCUP_LLC)) {
+ if ((cpu_has(c, X86_FEATURE_CQM_OCCUP_LLC)) ||
+ ((cpu_has(c, X86_FEATURE_CQM_MBM_TOTAL)) ||
+ (cpu_has(c, X86_FEATURE_CQM_MBM_LOCAL)))) {
c->x86_cache_max_rmid = ecx;
c->x86_cache_occ_scale = ebx;
}
@@ -760,7 +823,7 @@ void __init early_cpu_init(void)
int count = 0;
#ifdef CONFIG_PROCESSOR_SELECT
- printk(KERN_INFO "KERNEL supported cpus:\n");
+ pr_info("KERNEL supported cpus:\n");
#endif
for (cdev = __x86_cpu_dev_start; cdev < __x86_cpu_dev_end; cdev++) {
@@ -778,7 +841,7 @@ void __init early_cpu_init(void)
for (j = 0; j < 2; j++) {
if (!cpudev->c_ident[j])
continue;
- printk(KERN_INFO " %s %s\n", cpudev->c_vendor,
+ pr_info(" %s %s\n", cpudev->c_vendor,
cpudev->c_ident[j]);
}
}
@@ -803,6 +866,31 @@ static void detect_nopl(struct cpuinfo_x86 *c)
#else
set_cpu_cap(c, X86_FEATURE_NOPL);
#endif
+
+ /*
+ * ESPFIX is a strange bug. All real CPUs have it. Paravirt
+ * systems that run Linux at CPL > 0 may or may not have the
+ * issue, but, even if they have the issue, there's absolutely
+ * nothing we can do about it because we can't use the real IRET
+ * instruction.
+ *
+ * NB: For the time being, only 32-bit kernels support
+ * X86_BUG_ESPFIX as such. 64-bit kernels directly choose
+ * whether to apply espfix using paravirt hooks. If any
+ * non-paravirt system ever shows up that does *not* have the
+ * ESPFIX issue, we can change this.
+ */
+#ifdef CONFIG_X86_32
+#ifdef CONFIG_PARAVIRT
+ do {
+ extern void native_iret(void);
+ if (pv_cpu_ops.iret == native_iret)
+ set_cpu_bug(c, X86_BUG_ESPFIX);
+ } while (0);
+#else
+ set_cpu_bug(c, X86_BUG_ESPFIX);
+#endif
+#endif
}
static void generic_identify(struct cpuinfo_x86 *c)
@@ -886,7 +974,7 @@ static void identify_cpu(struct cpuinfo_x86 *c)
if (this_cpu->c_identify)
this_cpu->c_identify(c);
- /* Clear/Set all flags overriden by options, after probe */
+ /* Clear/Set all flags overridden by options, after probe */
for (i = 0; i < NCAPINTS; i++) {
c->x86_capability[i] &= ~cpu_caps_cleared[i];
c->x86_capability[i] |= cpu_caps_set[i];
@@ -943,9 +1031,10 @@ static void identify_cpu(struct cpuinfo_x86 *c)
init_hypervisor(c);
x86_init_rdrand(c);
x86_init_cache_qos(c);
+ setup_pku(c);
/*
- * Clear/Set all flags overriden by options, need do it
+ * Clear/Set all flags overridden by options, need do it
* before following smp all cpus cap AND.
*/
for (i = 0; i < NCAPINTS; i++) {
@@ -977,6 +1066,8 @@ static void identify_cpu(struct cpuinfo_x86 *c)
#ifdef CONFIG_NUMA
numa_add_cpu(smp_processor_id());
#endif
+ /* The boot/hotplug time assigment got cleared, restore it */
+ c->logical_proc_id = topology_phys_to_logical_pkg(c->phys_proc_id);
}
/*
@@ -1061,7 +1152,7 @@ static void __print_cpu_msr(void)
for (index = index_min; index < index_max; index++) {
if (rdmsrl_safe(index, &val))
continue;
- printk(KERN_INFO " MSR%08x: %016llx\n", index, val);
+ pr_info(" MSR%08x: %016llx\n", index, val);
}
}
}
@@ -1100,19 +1191,19 @@ void print_cpu_info(struct cpuinfo_x86 *c)
}
if (vendor && !strstr(c->x86_model_id, vendor))
- printk(KERN_CONT "%s ", vendor);
+ pr_cont("%s ", vendor);
if (c->x86_model_id[0])
- printk(KERN_CONT "%s", c->x86_model_id);
+ pr_cont("%s", c->x86_model_id);
else
- printk(KERN_CONT "%d86", c->x86);
+ pr_cont("%d86", c->x86);
- printk(KERN_CONT " (family: 0x%x, model: 0x%x", c->x86, c->x86_model);
+ pr_cont(" (family: 0x%x, model: 0x%x", c->x86, c->x86_model);
if (c->x86_mask || c->cpuid_level >= 0)
- printk(KERN_CONT ", stepping: 0x%x)\n", c->x86_mask);
+ pr_cont(", stepping: 0x%x)\n", c->x86_mask);
else
- printk(KERN_CONT ")\n");
+ pr_cont(")\n");
print_cpu_msr(c);
}
@@ -1438,7 +1529,7 @@ void cpu_init(void)
show_ucode_info_early();
- printk(KERN_INFO "Initializing CPU#%d\n", cpu);
+ pr_info("Initializing CPU#%d\n", cpu);
if (cpu_feature_enabled(X86_FEATURE_VME) ||
cpu_has_tsc ||
@@ -1475,20 +1566,6 @@ void cpu_init(void)
}
#endif
-#ifdef CONFIG_X86_DEBUG_STATIC_CPU_HAS
-void warn_pre_alternatives(void)
-{
- WARN(1, "You're using static_cpu_has before alternatives have run!\n");
-}
-EXPORT_SYMBOL_GPL(warn_pre_alternatives);
-#endif
-
-inline bool __static_cpu_has_safe(u16 bit)
-{
- return boot_cpu_has(bit);
-}
-EXPORT_SYMBOL_GPL(__static_cpu_has_safe);
-
static void bsp_resume(void)
{
if (this_cpu->c_bsp_resume)
diff --git a/arch/x86/kernel/cpu/cyrix.c b/arch/x86/kernel/cpu/cyrix.c
index aaf152e79..6adef9cac 100644
--- a/arch/x86/kernel/cpu/cyrix.c
+++ b/arch/x86/kernel/cpu/cyrix.c
@@ -8,6 +8,7 @@
#include <linux/timer.h>
#include <asm/pci-direct.h>
#include <asm/tsc.h>
+#include <asm/cpufeature.h>
#include "cpu.h"
@@ -103,7 +104,7 @@ static void check_cx686_slop(struct cpuinfo_x86 *c)
local_irq_restore(flags);
if (ccr5 & 2) { /* possible wrong calibration done */
- printk(KERN_INFO "Recalibrating delay loop with SLOP bit reset\n");
+ pr_info("Recalibrating delay loop with SLOP bit reset\n");
calibrate_delay();
c->loops_per_jiffy = loops_per_jiffy;
}
@@ -115,7 +116,7 @@ static void set_cx86_reorder(void)
{
u8 ccr3;
- printk(KERN_INFO "Enable Memory access reorder on Cyrix/NSC processor.\n");
+ pr_info("Enable Memory access reorder on Cyrix/NSC processor.\n");
ccr3 = getCx86(CX86_CCR3);
setCx86(CX86_CCR3, (ccr3 & 0x0f) | 0x10); /* enable MAPEN */
@@ -128,7 +129,7 @@ static void set_cx86_reorder(void)
static void set_cx86_memwb(void)
{
- printk(KERN_INFO "Enable Memory-Write-back mode on Cyrix/NSC processor.\n");
+ pr_info("Enable Memory-Write-back mode on Cyrix/NSC processor.\n");
/* CCR2 bit 2: unlock NW bit */
setCx86_old(CX86_CCR2, getCx86_old(CX86_CCR2) & ~0x04);
@@ -268,7 +269,7 @@ static void init_cyrix(struct cpuinfo_x86 *c)
* VSA1 we work around however.
*/
- printk(KERN_INFO "Working around Cyrix MediaGX virtual DMA bugs.\n");
+ pr_info("Working around Cyrix MediaGX virtual DMA bugs.\n");
isa_dma_bridge_buggy = 2;
/* We do this before the PCI layer is running. However we
@@ -426,7 +427,7 @@ static void cyrix_identify(struct cpuinfo_x86 *c)
if (dir0 == 5 || dir0 == 3) {
unsigned char ccr3;
unsigned long flags;
- printk(KERN_INFO "Enabling CPUID on Cyrix processor.\n");
+ pr_info("Enabling CPUID on Cyrix processor.\n");
local_irq_save(flags);
ccr3 = getCx86(CX86_CCR3);
/* enable MAPEN */
diff --git a/arch/x86/kernel/cpu/hypervisor.c b/arch/x86/kernel/cpu/hypervisor.c
index d820d8eae..73d391ae4 100644
--- a/arch/x86/kernel/cpu/hypervisor.c
+++ b/arch/x86/kernel/cpu/hypervisor.c
@@ -56,7 +56,7 @@ detect_hypervisor_vendor(void)
}
if (max_pri)
- printk(KERN_INFO "Hypervisor detected: %s\n", x86_hyper->name);
+ pr_info("Hypervisor detected: %s\n", x86_hyper->name);
}
void init_hypervisor(struct cpuinfo_x86 *c)
diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c
index 565648bc1..e4393bfc7 100644
--- a/arch/x86/kernel/cpu/intel.c
+++ b/arch/x86/kernel/cpu/intel.c
@@ -8,7 +8,7 @@
#include <linux/module.h>
#include <linux/uaccess.h>
-#include <asm/processor.h>
+#include <asm/cpufeature.h>
#include <asm/pgtable.h>
#include <asm/msr.h>
#include <asm/bugs.h>
@@ -61,7 +61,7 @@ static void early_init_intel(struct cpuinfo_x86 *c)
*/
if (c->x86 == 6 && c->x86_model == 0x1c && c->x86_mask <= 2 &&
c->microcode < 0x20e) {
- printk(KERN_WARNING "Atom PSE erratum detected, BIOS microcode update recommended\n");
+ pr_warn("Atom PSE erratum detected, BIOS microcode update recommended\n");
clear_cpu_cap(c, X86_FEATURE_PSE);
}
@@ -140,7 +140,7 @@ static void early_init_intel(struct cpuinfo_x86 *c)
if (c->x86 > 6 || (c->x86 == 6 && c->x86_model >= 0xd)) {
rdmsrl(MSR_IA32_MISC_ENABLE, misc_enable);
if (!(misc_enable & MSR_IA32_MISC_ENABLE_FAST_STRING)) {
- printk(KERN_INFO "Disabled fast string operations\n");
+ pr_info("Disabled fast string operations\n");
setup_clear_cpu_cap(X86_FEATURE_REP_GOOD);
setup_clear_cpu_cap(X86_FEATURE_ERMS);
}
@@ -160,6 +160,19 @@ static void early_init_intel(struct cpuinfo_x86 *c)
pr_info("Disabling PGE capability bit\n");
setup_clear_cpu_cap(X86_FEATURE_PGE);
}
+
+ if (c->cpuid_level >= 0x00000001) {
+ u32 eax, ebx, ecx, edx;
+
+ cpuid(0x00000001, &eax, &ebx, &ecx, &edx);
+ /*
+ * If HTT (EDX[28]) is set EBX[16:23] contain the number of
+ * apicids which are reserved per package. Store the resulting
+ * shift value for the package management code.
+ */
+ if (edx & (1U << 28))
+ c->x86_coreid_bits = get_count_order((ebx >> 16) & 0xff);
+ }
}
#ifdef CONFIG_X86_32
@@ -176,7 +189,7 @@ int ppro_with_ram_bug(void)
boot_cpu_data.x86 == 6 &&
boot_cpu_data.x86_model == 1 &&
boot_cpu_data.x86_mask < 8) {
- printk(KERN_INFO "Pentium Pro with Errata#50 detected. Taking evasive action.\n");
+ pr_info("Pentium Pro with Errata#50 detected. Taking evasive action.\n");
return 1;
}
return 0;
@@ -225,7 +238,7 @@ static void intel_workarounds(struct cpuinfo_x86 *c)
set_cpu_bug(c, X86_BUG_F00F);
if (!f00f_workaround_enabled) {
- printk(KERN_NOTICE "Intel Pentium with F0 0F bug - workaround enabled.\n");
+ pr_notice("Intel Pentium with F0 0F bug - workaround enabled.\n");
f00f_workaround_enabled = 1;
}
}
@@ -244,7 +257,7 @@ static void intel_workarounds(struct cpuinfo_x86 *c)
* Forcefully enable PAE if kernel parameter "forcepae" is present.
*/
if (forcepae) {
- printk(KERN_WARNING "PAE forced!\n");
+ pr_warn("PAE forced!\n");
set_cpu_cap(c, X86_FEATURE_PAE);
add_taint(TAINT_CPU_OUT_OF_SPEC, LOCKDEP_NOW_UNRELIABLE);
}
@@ -323,7 +336,7 @@ static int intel_num_cpu_cores(struct cpuinfo_x86 *c)
{
unsigned int eax, ebx, ecx, edx;
- if (c->cpuid_level < 4)
+ if (!IS_ENABLED(CONFIG_SMP) || c->cpuid_level < 4)
return 1;
/* Intel has a non-standard dependency on %ecx for this CPUID level. */
diff --git a/arch/x86/kernel/cpu/intel_cacheinfo.c b/arch/x86/kernel/cpu/intel_cacheinfo.c
index 0b6c52388..de6626c18 100644
--- a/arch/x86/kernel/cpu/intel_cacheinfo.c
+++ b/arch/x86/kernel/cpu/intel_cacheinfo.c
@@ -14,7 +14,7 @@
#include <linux/sysfs.h>
#include <linux/pci.h>
-#include <asm/processor.h>
+#include <asm/cpufeature.h>
#include <asm/amd_nb.h>
#include <asm/smp.h>
@@ -444,7 +444,7 @@ static ssize_t store_cache_disable(struct cacheinfo *this_leaf,
err = amd_set_l3_disable_slot(nb, cpu, slot, val);
if (err) {
if (err == -EEXIST)
- pr_warning("L3 slot %d in use/index already disabled!\n",
+ pr_warn("L3 slot %d in use/index already disabled!\n",
slot);
return err;
}
diff --git a/arch/x86/kernel/cpu/intel_pt.h b/arch/x86/kernel/cpu/intel_pt.h
deleted file mode 100644
index 336878a5d..000000000
--- a/arch/x86/kernel/cpu/intel_pt.h
+++ /dev/null
@@ -1,116 +0,0 @@
-/*
- * Intel(R) Processor Trace PMU driver for perf
- * Copyright (c) 2013-2014, Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
- * more details.
- *
- * Intel PT is specified in the Intel Architecture Instruction Set Extensions
- * Programming Reference:
- * http://software.intel.com/en-us/intel-isa-extensions
- */
-
-#ifndef __INTEL_PT_H__
-#define __INTEL_PT_H__
-
-/*
- * Single-entry ToPA: when this close to region boundary, switch
- * buffers to avoid losing data.
- */
-#define TOPA_PMI_MARGIN 512
-
-#define TOPA_SHIFT 12
-
-static inline unsigned int sizes(unsigned int tsz)
-{
- return 1 << (tsz + TOPA_SHIFT);
-};
-
-struct topa_entry {
- u64 end : 1;
- u64 rsvd0 : 1;
- u64 intr : 1;
- u64 rsvd1 : 1;
- u64 stop : 1;
- u64 rsvd2 : 1;
- u64 size : 4;
- u64 rsvd3 : 2;
- u64 base : 36;
- u64 rsvd4 : 16;
-};
-
-#define PT_CPUID_LEAVES 2
-#define PT_CPUID_REGS_NUM 4 /* number of regsters (eax, ebx, ecx, edx) */
-
-enum pt_capabilities {
- PT_CAP_max_subleaf = 0,
- PT_CAP_cr3_filtering,
- PT_CAP_psb_cyc,
- PT_CAP_mtc,
- PT_CAP_topa_output,
- PT_CAP_topa_multiple_entries,
- PT_CAP_single_range_output,
- PT_CAP_payloads_lip,
- PT_CAP_mtc_periods,
- PT_CAP_cycle_thresholds,
- PT_CAP_psb_periods,
-};
-
-struct pt_pmu {
- struct pmu pmu;
- u32 caps[PT_CPUID_REGS_NUM * PT_CPUID_LEAVES];
-};
-
-/**
- * struct pt_buffer - buffer configuration; one buffer per task_struct or
- * cpu, depending on perf event configuration
- * @cpu: cpu for per-cpu allocation
- * @tables: list of ToPA tables in this buffer
- * @first: shorthand for first topa table
- * @last: shorthand for last topa table
- * @cur: current topa table
- * @nr_pages: buffer size in pages
- * @cur_idx: current output region's index within @cur table
- * @output_off: offset within the current output region
- * @data_size: running total of the amount of data in this buffer
- * @lost: if data was lost/truncated
- * @head: logical write offset inside the buffer
- * @snapshot: if this is for a snapshot/overwrite counter
- * @stop_pos: STOP topa entry in the buffer
- * @intr_pos: INT topa entry in the buffer
- * @data_pages: array of pages from perf
- * @topa_index: table of topa entries indexed by page offset
- */
-struct pt_buffer {
- int cpu;
- struct list_head tables;
- struct topa *first, *last, *cur;
- unsigned int cur_idx;
- size_t output_off;
- unsigned long nr_pages;
- local_t data_size;
- local_t lost;
- local64_t head;
- bool snapshot;
- unsigned long stop_pos, intr_pos;
- void **data_pages;
- struct topa_entry *topa_index[0];
-};
-
-/**
- * struct pt - per-cpu pt context
- * @handle: perf output handle
- * @handle_nmi: do handle PT PMI on this cpu, there's an active event
- */
-struct pt {
- struct perf_output_handle handle;
- int handle_nmi;
-};
-
-#endif /* __INTEL_PT_H__ */
diff --git a/arch/x86/kernel/cpu/match.c b/arch/x86/kernel/cpu/match.c
index afa9f0d48..fbb5e9055 100644
--- a/arch/x86/kernel/cpu/match.c
+++ b/arch/x86/kernel/cpu/match.c
@@ -1,5 +1,5 @@
#include <asm/cpu_device_id.h>
-#include <asm/processor.h>
+#include <asm/cpufeature.h>
#include <linux/cpu.h>
#include <linux/module.h>
#include <linux/slab.h>
diff --git a/arch/x86/kernel/cpu/mcheck/mce-inject.c b/arch/x86/kernel/cpu/mcheck/mce-inject.c
index 4cfba4371..517619ea6 100644
--- a/arch/x86/kernel/cpu/mcheck/mce-inject.c
+++ b/arch/x86/kernel/cpu/mcheck/mce-inject.c
@@ -115,7 +115,7 @@ static int raise_local(void)
int cpu = m->extcpu;
if (m->inject_flags & MCJ_EXCEPTION) {
- printk(KERN_INFO "Triggering MCE exception on CPU %d\n", cpu);
+ pr_info("Triggering MCE exception on CPU %d\n", cpu);
switch (context) {
case MCJ_CTX_IRQ:
/*
@@ -128,15 +128,15 @@ static int raise_local(void)
raise_exception(m, NULL);
break;
default:
- printk(KERN_INFO "Invalid MCE context\n");
+ pr_info("Invalid MCE context\n");
ret = -EINVAL;
}
- printk(KERN_INFO "MCE exception done on CPU %d\n", cpu);
+ pr_info("MCE exception done on CPU %d\n", cpu);
} else if (m->status) {
- printk(KERN_INFO "Starting machine check poll CPU %d\n", cpu);
+ pr_info("Starting machine check poll CPU %d\n", cpu);
raise_poll(m);
mce_notify_irq();
- printk(KERN_INFO "Machine check poll done on CPU %d\n", cpu);
+ pr_info("Machine check poll done on CPU %d\n", cpu);
} else
m->finished = 0;
@@ -183,8 +183,7 @@ static void raise_mce(struct mce *m)
start = jiffies;
while (!cpumask_empty(mce_inject_cpumask)) {
if (!time_before(jiffies, start + 2*HZ)) {
- printk(KERN_ERR
- "Timeout waiting for mce inject %lx\n",
+ pr_err("Timeout waiting for mce inject %lx\n",
*cpumask_bits(mce_inject_cpumask));
break;
}
@@ -241,7 +240,7 @@ static int inject_init(void)
{
if (!alloc_cpumask_var(&mce_inject_cpumask, GFP_KERNEL))
return -ENOMEM;
- printk(KERN_INFO "Machine check injector initialized\n");
+ pr_info("Machine check injector initialized\n");
register_mce_write_callback(mce_write);
register_nmi_handler(NMI_LOCAL, mce_raise_notify, 0,
"mce_notify");
diff --git a/arch/x86/kernel/cpu/mcheck/mce-severity.c b/arch/x86/kernel/cpu/mcheck/mce-severity.c
index 9c682c222..5119766d9 100644
--- a/arch/x86/kernel/cpu/mcheck/mce-severity.c
+++ b/arch/x86/kernel/cpu/mcheck/mce-severity.c
@@ -14,6 +14,7 @@
#include <linux/init.h>
#include <linux/debugfs.h>
#include <asm/mce.h>
+#include <asm/uaccess.h>
#include "mce-internal.h"
@@ -29,7 +30,7 @@
* panic situations)
*/
-enum context { IN_KERNEL = 1, IN_USER = 2 };
+enum context { IN_KERNEL = 1, IN_USER = 2, IN_KERNEL_RECOV = 3 };
enum ser { SER_REQUIRED = 1, NO_SER = 2 };
enum exception { EXCP_CONTEXT = 1, NO_EXCP = 2 };
@@ -48,6 +49,7 @@ static struct severity {
#define MCESEV(s, m, c...) { .sev = MCE_ ## s ## _SEVERITY, .msg = m, ## c }
#define KERNEL .context = IN_KERNEL
#define USER .context = IN_USER
+#define KERNEL_RECOV .context = IN_KERNEL_RECOV
#define SER .ser = SER_REQUIRED
#define NOSER .ser = NO_SER
#define EXCP .excp = EXCP_CONTEXT
@@ -87,6 +89,10 @@ static struct severity {
EXCP, KERNEL, MCGMASK(MCG_STATUS_RIPV, 0)
),
MCESEV(
+ PANIC, "In kernel and no restart IP",
+ EXCP, KERNEL_RECOV, MCGMASK(MCG_STATUS_RIPV, 0)
+ ),
+ MCESEV(
DEFERRED, "Deferred error",
NOSER, MASK(MCI_STATUS_UC|MCI_STATUS_DEFERRED|MCI_STATUS_POISON, MCI_STATUS_DEFERRED)
),
@@ -123,6 +129,11 @@ static struct severity {
MCGMASK(MCG_STATUS_RIPV|MCG_STATUS_EIPV, MCG_STATUS_RIPV)
),
MCESEV(
+ AR, "Action required: data load in error recoverable area of kernel",
+ SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR|MCI_ADDR|MCACOD, MCI_UC_SAR|MCI_ADDR|MCACOD_DATA),
+ KERNEL_RECOV
+ ),
+ MCESEV(
AR, "Action required: data load error in a user process",
SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR|MCI_ADDR|MCACOD, MCI_UC_SAR|MCI_ADDR|MCACOD_DATA),
USER
@@ -170,6 +181,9 @@ static struct severity {
) /* always matches. keep at end */
};
+#define mc_recoverable(mcg) (((mcg) & (MCG_STATUS_RIPV|MCG_STATUS_EIPV)) == \
+ (MCG_STATUS_RIPV|MCG_STATUS_EIPV))
+
/*
* If mcgstatus indicated that ip/cs on the stack were
* no good, then "m->cs" will be zero and we will have
@@ -183,7 +197,11 @@ static struct severity {
*/
static int error_context(struct mce *m)
{
- return ((m->cs & 3) == 3) ? IN_USER : IN_KERNEL;
+ if ((m->cs & 3) == 3)
+ return IN_USER;
+ if (mc_recoverable(m->mcgstatus) && ex_has_fault_handler(m->ip))
+ return IN_KERNEL_RECOV;
+ return IN_KERNEL;
}
/*
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
index a006f4cd7..f0c921b03 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -961,6 +961,20 @@ static void mce_clear_state(unsigned long *toclear)
}
}
+static int do_memory_failure(struct mce *m)
+{
+ int flags = MF_ACTION_REQUIRED;
+ int ret;
+
+ pr_err("Uncorrected hardware memory error in user-access at %llx", m->addr);
+ if (!(m->mcgstatus & MCG_STATUS_RIPV))
+ flags |= MF_MUST_KILL;
+ ret = memory_failure(m->addr >> PAGE_SHIFT, MCE_VECTOR, flags);
+ if (ret)
+ pr_err("Memory error not recovered");
+ return ret;
+}
+
/*
* The actual machine check handler. This only handles real
* exceptions when something got corrupted coming in through int 18.
@@ -998,8 +1012,6 @@ void do_machine_check(struct pt_regs *regs, long error_code)
DECLARE_BITMAP(toclear, MAX_NR_BANKS);
DECLARE_BITMAP(valid_banks, MAX_NR_BANKS);
char *msg = "Unknown";
- u64 recover_paddr = ~0ull;
- int flags = MF_ACTION_REQUIRED;
int lmce = 0;
/* If this CPU is offline, just bail out. */
@@ -1136,22 +1148,13 @@ void do_machine_check(struct pt_regs *regs, long error_code)
}
/*
- * At insane "tolerant" levels we take no action. Otherwise
- * we only die if we have no other choice. For less serious
- * issues we try to recover, or limit damage to the current
- * process.
+ * If tolerant is at an insane level we drop requests to kill
+ * processes and continue even when there is no way out.
*/
- if (cfg->tolerant < 3) {
- if (no_way_out)
- mce_panic("Fatal machine check on current CPU", &m, msg);
- if (worst == MCE_AR_SEVERITY) {
- recover_paddr = m.addr;
- if (!(m.mcgstatus & MCG_STATUS_RIPV))
- flags |= MF_MUST_KILL;
- } else if (kill_it) {
- force_sig(SIGBUS, current);
- }
- }
+ if (cfg->tolerant == 3)
+ kill_it = 0;
+ else if (no_way_out)
+ mce_panic("Fatal machine check on current CPU", &m, msg);
if (worst > 0)
mce_report_event(regs);
@@ -1159,25 +1162,24 @@ void do_machine_check(struct pt_regs *regs, long error_code)
out:
sync_core();
- if (recover_paddr == ~0ull)
- goto done;
+ if (worst != MCE_AR_SEVERITY && !kill_it)
+ goto out_ist;
- pr_err("Uncorrected hardware memory error in user-access at %llx",
- recover_paddr);
- /*
- * We must call memory_failure() here even if the current process is
- * doomed. We still need to mark the page as poisoned and alert any
- * other users of the page.
- */
- ist_begin_non_atomic(regs);
- local_irq_enable();
- if (memory_failure(recover_paddr >> PAGE_SHIFT, MCE_VECTOR, flags) < 0) {
- pr_err("Memory error not recovered");
- force_sig(SIGBUS, current);
+ /* Fault was in user mode and we need to take some action */
+ if ((m.cs & 3) == 3) {
+ ist_begin_non_atomic(regs);
+ local_irq_enable();
+
+ if (kill_it || do_memory_failure(&m))
+ force_sig(SIGBUS, current);
+ local_irq_disable();
+ ist_end_non_atomic();
+ } else {
+ if (!fixup_exception(regs, X86_TRAP_MC))
+ mce_panic("Failed kernel mode recovery", &m, NULL);
}
- local_irq_disable();
- ist_end_non_atomic();
-done:
+
+out_ist:
ist_exit(regs);
}
EXPORT_SYMBOL_GPL(do_machine_check);
@@ -1576,6 +1578,17 @@ static int __mcheck_cpu_apply_quirks(struct cpuinfo_x86 *c)
if (c->x86 == 6 && c->x86_model == 45)
quirk_no_way_out = quirk_sandybridge_ifu;
+ /*
+ * MCG_CAP.MCG_SER_P is necessary but not sufficient to know
+ * whether this processor will actually generate recoverable
+ * machine checks. Check to see if this is an E7 model Xeon.
+ * We can't do a model number check because E5 and E7 use the
+ * same model number. E5 doesn't support recovery, E7 does.
+ */
+ if (mca_cfg.recovery || (mca_cfg.ser &&
+ !strncmp(c->x86_model_id,
+ "Intel(R) Xeon(R) CPU E7-", 24)))
+ set_cpu_cap(c, X86_FEATURE_MCE_RECOVERY);
}
if (cfg->monarch_timeout < 0)
cfg->monarch_timeout = 0;
@@ -1617,10 +1630,10 @@ static void __mcheck_cpu_init_vendor(struct cpuinfo_x86 *c)
case X86_VENDOR_AMD: {
u32 ebx = cpuid_ebx(0x80000007);
- mce_amd_feature_init(c);
mce_flags.overflow_recov = !!(ebx & BIT(0));
mce_flags.succor = !!(ebx & BIT(1));
mce_flags.smca = !!(ebx & BIT(3));
+ mce_amd_feature_init(c);
break;
}
@@ -2028,6 +2041,8 @@ static int __init mcheck_enable(char *str)
cfg->bootlog = (str[0] == 'b');
else if (!strcmp(str, "bios_cmci_threshold"))
cfg->bios_cmci_threshold = true;
+ else if (!strcmp(str, "recovery"))
+ cfg->recovery = true;
else if (isdigit(str[0])) {
if (get_option(&str, &cfg->tolerant) == 2)
get_option(&str, &(cfg->monarch_timeout));
diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd.c b/arch/x86/kernel/cpu/mcheck/mce_amd.c
index e99b15077..9d656fd43 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_amd.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_amd.c
@@ -1,5 +1,5 @@
/*
- * (c) 2005-2015 Advanced Micro Devices, Inc.
+ * (c) 2005-2016 Advanced Micro Devices, Inc.
* Your use of this code is subject to the terms and conditions of the
* GNU general public license version 2. See "COPYING" or
* http://www.gnu.org/licenses/gpl.html
@@ -28,7 +28,7 @@
#include <asm/msr.h>
#include <asm/trace/irq_vectors.h>
-#define NR_BLOCKS 9
+#define NR_BLOCKS 5
#define THRESHOLD_MAX 0xFFF
#define INT_TYPE_APIC 0x00020000
#define MASK_VALID_HI 0x80000000
@@ -49,6 +49,19 @@
#define DEF_LVT_OFF 0x2
#define DEF_INT_TYPE_APIC 0x2
+/* Scalable MCA: */
+
+/* Threshold LVT offset is at MSR0xC0000410[15:12] */
+#define SMCA_THR_LVT_OFF 0xF000
+
+/*
+ * OS is required to set the MCAX bit to acknowledge that it is now using the
+ * new MSR ranges and new registers under each bank. It also means that the OS
+ * will configure deferred errors in the new MCx_CONFIG register. If the bit is
+ * not set, uncorrectable errors will cause a system panic.
+ */
+#define SMCA_MCAX_EN_OFF 0x1
+
static const char * const th_names[] = {
"load_store",
"insn_fetch",
@@ -58,6 +71,35 @@ static const char * const th_names[] = {
"execution_unit",
};
+/* Define HWID to IP type mappings for Scalable MCA */
+struct amd_hwid amd_hwids[] = {
+ [SMCA_F17H_CORE] = { "f17h_core", 0xB0 },
+ [SMCA_DF] = { "data_fabric", 0x2E },
+ [SMCA_UMC] = { "umc", 0x96 },
+ [SMCA_PB] = { "param_block", 0x5 },
+ [SMCA_PSP] = { "psp", 0xFF },
+ [SMCA_SMU] = { "smu", 0x1 },
+};
+EXPORT_SYMBOL_GPL(amd_hwids);
+
+const char * const amd_core_mcablock_names[] = {
+ [SMCA_LS] = "load_store",
+ [SMCA_IF] = "insn_fetch",
+ [SMCA_L2_CACHE] = "l2_cache",
+ [SMCA_DE] = "decode_unit",
+ [RES] = "",
+ [SMCA_EX] = "execution_unit",
+ [SMCA_FP] = "floating_point",
+ [SMCA_L3_CACHE] = "l3_cache",
+};
+EXPORT_SYMBOL_GPL(amd_core_mcablock_names);
+
+const char * const amd_df_mcablock_names[] = {
+ [SMCA_CS] = "coherent_slave",
+ [SMCA_PIE] = "pie",
+};
+EXPORT_SYMBOL_GPL(amd_df_mcablock_names);
+
static DEFINE_PER_CPU(struct threshold_bank **, threshold_banks);
static DEFINE_PER_CPU(unsigned char, bank_map); /* see which banks are on */
@@ -84,6 +126,13 @@ struct thresh_restart {
static inline bool is_shared_bank(int bank)
{
+ /*
+ * Scalable MCA provides for only one core to have access to the MSRs of
+ * a shared bank.
+ */
+ if (mce_flags.smca)
+ return false;
+
/* Bank 4 is for northbridge reporting and is thus shared */
return (bank == 4);
}
@@ -135,6 +184,14 @@ static int lvt_off_valid(struct threshold_block *b, int apic, u32 lo, u32 hi)
}
if (apic != msr) {
+ /*
+ * On SMCA CPUs, LVT offset is programmed at a different MSR, and
+ * the BIOS provides the value. The original field where LVT offset
+ * was set is reserved. Return early here:
+ */
+ if (mce_flags.smca)
+ return 0;
+
pr_err(FW_BUG "cpu %d, invalid threshold interrupt offset %d "
"for bank %d, block %d (MSR%08X=0x%x%08x)\n",
b->cpu, apic, b->bank, b->block, b->address, hi, lo);
@@ -144,10 +201,7 @@ static int lvt_off_valid(struct threshold_block *b, int apic, u32 lo, u32 hi)
return 1;
};
-/*
- * Called via smp_call_function_single(), must be called with correct
- * cpu affinity.
- */
+/* Reprogram MCx_MISC MSR behind this threshold bank. */
static void threshold_restart_bank(void *_tr)
{
struct thresh_restart *tr = _tr;
@@ -247,27 +301,116 @@ static void deferred_error_interrupt_enable(struct cpuinfo_x86 *c)
wrmsr(MSR_CU_DEF_ERR, low, high);
}
+static u32 get_block_address(u32 current_addr, u32 low, u32 high,
+ unsigned int bank, unsigned int block)
+{
+ u32 addr = 0, offset = 0;
+
+ if (mce_flags.smca) {
+ if (!block) {
+ addr = MSR_AMD64_SMCA_MCx_MISC(bank);
+ } else {
+ /*
+ * For SMCA enabled processors, BLKPTR field of the
+ * first MISC register (MCx_MISC0) indicates presence of
+ * additional MISC register set (MISC1-4).
+ */
+ u32 low, high;
+
+ if (rdmsr_safe(MSR_AMD64_SMCA_MCx_CONFIG(bank), &low, &high))
+ return addr;
+
+ if (!(low & MCI_CONFIG_MCAX))
+ return addr;
+
+ if (!rdmsr_safe(MSR_AMD64_SMCA_MCx_MISC(bank), &low, &high) &&
+ (low & MASK_BLKPTR_LO))
+ addr = MSR_AMD64_SMCA_MCx_MISCy(bank, block - 1);
+ }
+ return addr;
+ }
+
+ /* Fall back to method we used for older processors: */
+ switch (block) {
+ case 0:
+ addr = MSR_IA32_MCx_MISC(bank);
+ break;
+ case 1:
+ offset = ((low & MASK_BLKPTR_LO) >> 21);
+ if (offset)
+ addr = MCG_XBLK_ADDR + offset;
+ break;
+ default:
+ addr = ++current_addr;
+ }
+ return addr;
+}
+
+static int
+prepare_threshold_block(unsigned int bank, unsigned int block, u32 addr,
+ int offset, u32 misc_high)
+{
+ unsigned int cpu = smp_processor_id();
+ struct threshold_block b;
+ int new;
+
+ if (!block)
+ per_cpu(bank_map, cpu) |= (1 << bank);
+
+ memset(&b, 0, sizeof(b));
+ b.cpu = cpu;
+ b.bank = bank;
+ b.block = block;
+ b.address = addr;
+ b.interrupt_capable = lvt_interrupt_supported(bank, misc_high);
+
+ if (!b.interrupt_capable)
+ goto done;
+
+ b.interrupt_enable = 1;
+
+ if (mce_flags.smca) {
+ u32 smca_low, smca_high;
+ u32 smca_addr = MSR_AMD64_SMCA_MCx_CONFIG(bank);
+
+ if (!rdmsr_safe(smca_addr, &smca_low, &smca_high)) {
+ smca_high |= SMCA_MCAX_EN_OFF;
+ wrmsr(smca_addr, smca_low, smca_high);
+ }
+
+ /* Gather LVT offset for thresholding: */
+ if (rdmsr_safe(MSR_CU_DEF_ERR, &smca_low, &smca_high))
+ goto out;
+
+ new = (smca_low & SMCA_THR_LVT_OFF) >> 12;
+ } else {
+ new = (misc_high & MASK_LVTOFF_HI) >> 20;
+ }
+
+ offset = setup_APIC_mce_threshold(offset, new);
+
+ if ((offset == new) && (mce_threshold_vector != amd_threshold_interrupt))
+ mce_threshold_vector = amd_threshold_interrupt;
+
+done:
+ mce_threshold_block_init(&b, offset);
+
+out:
+ return offset;
+}
+
/* cpu init entry point, called from mce.c with preempt off */
void mce_amd_feature_init(struct cpuinfo_x86 *c)
{
- struct threshold_block b;
- unsigned int cpu = smp_processor_id();
u32 low = 0, high = 0, address = 0;
unsigned int bank, block;
- int offset = -1, new;
+ int offset = -1;
for (bank = 0; bank < mca_cfg.banks; ++bank) {
for (block = 0; block < NR_BLOCKS; ++block) {
- if (block == 0)
- address = MSR_IA32_MCx_MISC(bank);
- else if (block == 1) {
- address = (low & MASK_BLKPTR_LO) >> 21;
- if (!address)
- break;
-
- address += MCG_XBLK_ADDR;
- } else
- ++address;
+ address = get_block_address(address, low, high, bank, block);
+ if (!address)
+ break;
if (rdmsr_safe(address, &low, &high))
break;
@@ -279,29 +422,7 @@ void mce_amd_feature_init(struct cpuinfo_x86 *c)
(high & MASK_LOCKED_HI))
continue;
- if (!block)
- per_cpu(bank_map, cpu) |= (1 << bank);
-
- memset(&b, 0, sizeof(b));
- b.cpu = cpu;
- b.bank = bank;
- b.block = block;
- b.address = address;
- b.interrupt_capable = lvt_interrupt_supported(bank, high);
-
- if (!b.interrupt_capable)
- goto init;
-
- b.interrupt_enable = 1;
- new = (high & MASK_LVTOFF_HI) >> 20;
- offset = setup_APIC_mce_threshold(offset, new);
-
- if ((offset == new) &&
- (mce_threshold_vector != amd_threshold_interrupt))
- mce_threshold_vector = amd_threshold_interrupt;
-
-init:
- mce_threshold_block_init(&b, offset);
+ offset = prepare_threshold_block(bank, block, address, offset, high);
}
}
@@ -394,16 +515,9 @@ static void amd_threshold_interrupt(void)
if (!(per_cpu(bank_map, cpu) & (1 << bank)))
continue;
for (block = 0; block < NR_BLOCKS; ++block) {
- if (block == 0) {
- address = MSR_IA32_MCx_MISC(bank);
- } else if (block == 1) {
- address = (low & MASK_BLKPTR_LO) >> 21;
- if (!address)
- break;
- address += MCG_XBLK_ADDR;
- } else {
- ++address;
- }
+ address = get_block_address(address, low, high, bank, block);
+ if (!address)
+ break;
if (rdmsr_safe(address, &low, &high))
break;
@@ -623,16 +737,11 @@ static int allocate_threshold_blocks(unsigned int cpu, unsigned int bank,
if (err)
goto out_free;
recurse:
- if (!block) {
- address = (low & MASK_BLKPTR_LO) >> 21;
- if (!address)
- return 0;
- address += MCG_XBLK_ADDR;
- } else {
- ++address;
- }
+ address = get_block_address(address, low, high, bank, ++block);
+ if (!address)
+ return 0;
- err = allocate_threshold_blocks(cpu, bank, ++block, address);
+ err = allocate_threshold_blocks(cpu, bank, block, address);
if (err)
goto out_free;
diff --git a/arch/x86/kernel/cpu/mcheck/p5.c b/arch/x86/kernel/cpu/mcheck/p5.c
index 12402e10a..2a0717bf8 100644
--- a/arch/x86/kernel/cpu/mcheck/p5.c
+++ b/arch/x86/kernel/cpu/mcheck/p5.c
@@ -26,14 +26,12 @@ static void pentium_machine_check(struct pt_regs *regs, long error_code)
rdmsr(MSR_IA32_P5_MC_ADDR, loaddr, hi);
rdmsr(MSR_IA32_P5_MC_TYPE, lotype, hi);
- printk(KERN_EMERG
- "CPU#%d: Machine Check Exception: 0x%8X (type 0x%8X).\n",
- smp_processor_id(), loaddr, lotype);
+ pr_emerg("CPU#%d: Machine Check Exception: 0x%8X (type 0x%8X).\n",
+ smp_processor_id(), loaddr, lotype);
if (lotype & (1<<5)) {
- printk(KERN_EMERG
- "CPU#%d: Possible thermal failure (CPU on fire ?).\n",
- smp_processor_id());
+ pr_emerg("CPU#%d: Possible thermal failure (CPU on fire ?).\n",
+ smp_processor_id());
}
add_taint(TAINT_MACHINE_CHECK, LOCKDEP_NOW_UNRELIABLE);
@@ -61,12 +59,10 @@ void intel_p5_mcheck_init(struct cpuinfo_x86 *c)
/* Read registers before enabling: */
rdmsr(MSR_IA32_P5_MC_ADDR, l, h);
rdmsr(MSR_IA32_P5_MC_TYPE, l, h);
- printk(KERN_INFO
- "Intel old style machine check architecture supported.\n");
+ pr_info("Intel old style machine check architecture supported.\n");
/* Enable MCE: */
cr4_set_bits(X86_CR4_MCE);
- printk(KERN_INFO
- "Intel old style machine check reporting enabled on CPU#%d.\n",
- smp_processor_id());
+ pr_info("Intel old style machine check reporting enabled on CPU#%d.\n",
+ smp_processor_id());
}
diff --git a/arch/x86/kernel/cpu/mcheck/therm_throt.c b/arch/x86/kernel/cpu/mcheck/therm_throt.c
index 05538582a..ac780cad3 100644
--- a/arch/x86/kernel/cpu/mcheck/therm_throt.c
+++ b/arch/x86/kernel/cpu/mcheck/therm_throt.c
@@ -190,7 +190,7 @@ static int therm_throt_process(bool new_event, int event, int level)
/* if we just entered the thermal event */
if (new_event) {
if (event == THERMAL_THROTTLING_EVENT)
- printk(KERN_CRIT "CPU%d: %s temperature above threshold, cpu clock throttled (total events = %lu)\n",
+ pr_crit("CPU%d: %s temperature above threshold, cpu clock throttled (total events = %lu)\n",
this_cpu,
level == CORE_LEVEL ? "Core" : "Package",
state->count);
@@ -198,8 +198,7 @@ static int therm_throt_process(bool new_event, int event, int level)
}
if (old_event) {
if (event == THERMAL_THROTTLING_EVENT)
- printk(KERN_INFO "CPU%d: %s temperature/speed normal\n",
- this_cpu,
+ pr_info("CPU%d: %s temperature/speed normal\n", this_cpu,
level == CORE_LEVEL ? "Core" : "Package");
return 1;
}
@@ -420,8 +419,8 @@ static void intel_thermal_interrupt(void)
static void unexpected_thermal_interrupt(void)
{
- printk(KERN_ERR "CPU%d: Unexpected LVT thermal interrupt!\n",
- smp_processor_id());
+ pr_err("CPU%d: Unexpected LVT thermal interrupt!\n",
+ smp_processor_id());
}
static void (*smp_thermal_vector)(void) = unexpected_thermal_interrupt;
@@ -502,7 +501,7 @@ void intel_init_thermal(struct cpuinfo_x86 *c)
if ((l & MSR_IA32_MISC_ENABLE_TM1) && (h & APIC_DM_SMI)) {
if (system_state == SYSTEM_BOOTING)
- printk(KERN_DEBUG "CPU%d: Thermal monitoring handled by SMI\n", cpu);
+ pr_debug("CPU%d: Thermal monitoring handled by SMI\n", cpu);
return;
}
@@ -560,8 +559,8 @@ void intel_init_thermal(struct cpuinfo_x86 *c)
l = apic_read(APIC_LVTTHMR);
apic_write(APIC_LVTTHMR, l & ~APIC_LVT_MASKED);
- printk_once(KERN_INFO "CPU0: Thermal monitoring enabled (%s)\n",
- tm2 ? "TM2" : "TM1");
+ pr_info_once("CPU0: Thermal monitoring enabled (%s)\n",
+ tm2 ? "TM2" : "TM1");
/* enable thermal throttle processing */
atomic_set(&therm_throt_en, 1);
diff --git a/arch/x86/kernel/cpu/mcheck/threshold.c b/arch/x86/kernel/cpu/mcheck/threshold.c
index 724598018..fcf9ae938 100644
--- a/arch/x86/kernel/cpu/mcheck/threshold.c
+++ b/arch/x86/kernel/cpu/mcheck/threshold.c
@@ -12,8 +12,8 @@
static void default_threshold_interrupt(void)
{
- printk(KERN_ERR "Unexpected threshold interrupt at vector %x\n",
- THRESHOLD_APIC_VECTOR);
+ pr_err("Unexpected threshold interrupt at vector %x\n",
+ THRESHOLD_APIC_VECTOR);
}
void (*mce_threshold_vector)(void) = default_threshold_interrupt;
diff --git a/arch/x86/kernel/cpu/mcheck/winchip.c b/arch/x86/kernel/cpu/mcheck/winchip.c
index 01dd87028..c6a722e1d 100644
--- a/arch/x86/kernel/cpu/mcheck/winchip.c
+++ b/arch/x86/kernel/cpu/mcheck/winchip.c
@@ -17,7 +17,7 @@ static void winchip_machine_check(struct pt_regs *regs, long error_code)
{
ist_enter(regs);
- printk(KERN_EMERG "CPU0: Machine Check Exception.\n");
+ pr_emerg("CPU0: Machine Check Exception.\n");
add_taint(TAINT_MACHINE_CHECK, LOCKDEP_NOW_UNRELIABLE);
ist_exit(regs);
@@ -39,6 +39,5 @@ void winchip_mcheck_init(struct cpuinfo_x86 *c)
cr4_set_bits(X86_CR4_MCE);
- printk(KERN_INFO
- "Winchip machine check reporting enabled on CPU#0.\n");
+ pr_info("Winchip machine check reporting enabled on CPU#0.\n");
}
diff --git a/arch/x86/kernel/cpu/microcode/amd.c b/arch/x86/kernel/cpu/microcode/amd.c
index c3cf6a217..d4872a456 100644
--- a/arch/x86/kernel/cpu/microcode/amd.c
+++ b/arch/x86/kernel/cpu/microcode/amd.c
@@ -431,10 +431,6 @@ int __init save_microcode_in_initrd_amd(void)
else
container = cont_va;
- if (ucode_new_rev)
- pr_info("microcode: updated early to new patch_level=0x%08x\n",
- ucode_new_rev);
-
eax = cpuid_eax(0x00000001);
eax = ((eax >> 8) & 0xf) + ((eax >> 20) & 0xff);
@@ -469,8 +465,7 @@ void reload_ucode_amd(void)
if (mc && rev < mc->hdr.patch_id) {
if (!__apply_microcode_amd(mc)) {
ucode_new_rev = mc->hdr.patch_id;
- pr_info("microcode: reload patch_level=0x%08x\n",
- ucode_new_rev);
+ pr_info("reload patch_level=0x%08x\n", ucode_new_rev);
}
}
}
@@ -793,15 +788,13 @@ static int verify_and_add_patch(u8 family, u8 *fw, unsigned int leftover)
return -EINVAL;
}
- patch->data = kzalloc(patch_size, GFP_KERNEL);
+ patch->data = kmemdup(fw + SECTION_HDR_SIZE, patch_size, GFP_KERNEL);
if (!patch->data) {
pr_err("Patch data allocation failure.\n");
kfree(patch);
return -EINVAL;
}
- /* All looks ok, copy patch... */
- memcpy(patch->data, fw + SECTION_HDR_SIZE, patch_size);
INIT_LIST_HEAD(&patch->plist);
patch->patch_id = mc_hdr->patch_id;
patch->equiv_cpu = proc_id;
@@ -938,10 +931,14 @@ struct microcode_ops * __init init_amd_microcode(void)
struct cpuinfo_x86 *c = &boot_cpu_data;
if (c->x86_vendor != X86_VENDOR_AMD || c->x86 < 0x10) {
- pr_warning("AMD CPU family 0x%x not supported\n", c->x86);
+ pr_warn("AMD CPU family 0x%x not supported\n", c->x86);
return NULL;
}
+ if (ucode_new_rev)
+ pr_info_once("microcode updated early to new patch_level=0x%08x\n",
+ ucode_new_rev);
+
return &microcode_amd_ops;
}
diff --git a/arch/x86/kernel/cpu/microcode/core.c b/arch/x86/kernel/cpu/microcode/core.c
index faec7120c..ac360bfbb 100644
--- a/arch/x86/kernel/cpu/microcode/core.c
+++ b/arch/x86/kernel/cpu/microcode/core.c
@@ -43,16 +43,8 @@
#define MICROCODE_VERSION "2.01"
static struct microcode_ops *microcode_ops;
-
static bool dis_ucode_ldr;
-static int __init disable_loader(char *str)
-{
- dis_ucode_ldr = true;
- return 1;
-}
-__setup("dis_ucode_ldr", disable_loader);
-
/*
* Synchronization.
*
@@ -81,15 +73,16 @@ struct cpu_info_ctx {
static bool __init check_loader_disabled_bsp(void)
{
+ static const char *__dis_opt_str = "dis_ucode_ldr";
+
#ifdef CONFIG_X86_32
const char *cmdline = (const char *)__pa_nodebug(boot_command_line);
- const char *opt = "dis_ucode_ldr";
- const char *option = (const char *)__pa_nodebug(opt);
+ const char *option = (const char *)__pa_nodebug(__dis_opt_str);
bool *res = (bool *)__pa_nodebug(&dis_ucode_ldr);
#else /* CONFIG_X86_64 */
const char *cmdline = boot_command_line;
- const char *option = "dis_ucode_ldr";
+ const char *option = __dis_opt_str;
bool *res = &dis_ucode_ldr;
#endif
@@ -479,7 +472,7 @@ static enum ucode_state microcode_init_cpu(int cpu, bool refresh_fw)
enum ucode_state ustate;
struct ucode_cpu_info *uci = ucode_cpu_info + cpu;
- if (uci && uci->valid)
+ if (uci->valid)
return UCODE_OK;
if (collect_cpu_info(cpu))
@@ -630,7 +623,7 @@ int __init microcode_init(void)
struct cpuinfo_x86 *c = &boot_cpu_data;
int error;
- if (paravirt_enabled() || dis_ucode_ldr)
+ if (dis_ucode_ldr)
return -EINVAL;
if (c->x86_vendor == X86_VENDOR_INTEL)
diff --git a/arch/x86/kernel/cpu/microcode/intel.c b/arch/x86/kernel/cpu/microcode/intel.c
index f9c7f3302..63a0888e3 100644
--- a/arch/x86/kernel/cpu/microcode/intel.c
+++ b/arch/x86/kernel/cpu/microcode/intel.c
@@ -39,9 +39,15 @@
#include <asm/setup.h>
#include <asm/msr.h>
-static unsigned long mc_saved_in_initrd[MAX_UCODE_COUNT];
+/*
+ * Temporary microcode blobs pointers storage. We note here the pointers to
+ * microcode blobs we've got from whatever storage (detached initrd, builtin).
+ * Later on, we put those into final storage mc_saved_data.mc_saved.
+ */
+static unsigned long mc_tmp_ptrs[MAX_UCODE_COUNT];
+
static struct mc_saved_data {
- unsigned int mc_saved_count;
+ unsigned int num_saved;
struct microcode_intel **mc_saved;
} mc_saved_data;
@@ -78,53 +84,50 @@ load_microcode_early(struct microcode_intel **saved,
}
static inline void
-copy_initrd_ptrs(struct microcode_intel **mc_saved, unsigned long *initrd,
- unsigned long off, int num_saved)
+copy_ptrs(struct microcode_intel **mc_saved, unsigned long *mc_ptrs,
+ unsigned long off, int num_saved)
{
int i;
for (i = 0; i < num_saved; i++)
- mc_saved[i] = (struct microcode_intel *)(initrd[i] + off);
+ mc_saved[i] = (struct microcode_intel *)(mc_ptrs[i] + off);
}
#ifdef CONFIG_X86_32
static void
-microcode_phys(struct microcode_intel **mc_saved_tmp,
- struct mc_saved_data *mc_saved_data)
+microcode_phys(struct microcode_intel **mc_saved_tmp, struct mc_saved_data *mcs)
{
int i;
struct microcode_intel ***mc_saved;
- mc_saved = (struct microcode_intel ***)
- __pa_nodebug(&mc_saved_data->mc_saved);
- for (i = 0; i < mc_saved_data->mc_saved_count; i++) {
+ mc_saved = (struct microcode_intel ***)__pa_nodebug(&mcs->mc_saved);
+
+ for (i = 0; i < mcs->num_saved; i++) {
struct microcode_intel *p;
- p = *(struct microcode_intel **)
- __pa_nodebug(mc_saved_data->mc_saved + i);
+ p = *(struct microcode_intel **)__pa_nodebug(mcs->mc_saved + i);
mc_saved_tmp[i] = (struct microcode_intel *)__pa_nodebug(p);
}
}
#endif
static enum ucode_state
-load_microcode(struct mc_saved_data *mc_saved_data, unsigned long *initrd,
- unsigned long initrd_start, struct ucode_cpu_info *uci)
+load_microcode(struct mc_saved_data *mcs, unsigned long *mc_ptrs,
+ unsigned long offset, struct ucode_cpu_info *uci)
{
struct microcode_intel *mc_saved_tmp[MAX_UCODE_COUNT];
- unsigned int count = mc_saved_data->mc_saved_count;
+ unsigned int count = mcs->num_saved;
- if (!mc_saved_data->mc_saved) {
- copy_initrd_ptrs(mc_saved_tmp, initrd, initrd_start, count);
+ if (!mcs->mc_saved) {
+ copy_ptrs(mc_saved_tmp, mc_ptrs, offset, count);
return load_microcode_early(mc_saved_tmp, count, uci);
} else {
#ifdef CONFIG_X86_32
- microcode_phys(mc_saved_tmp, mc_saved_data);
+ microcode_phys(mc_saved_tmp, mcs);
return load_microcode_early(mc_saved_tmp, count, uci);
#else
- return load_microcode_early(mc_saved_data->mc_saved,
- count, uci);
+ return load_microcode_early(mcs->mc_saved, count, uci);
#endif
}
}
@@ -175,25 +178,25 @@ matching_model_microcode(struct microcode_header_intel *mc_header,
}
static int
-save_microcode(struct mc_saved_data *mc_saved_data,
+save_microcode(struct mc_saved_data *mcs,
struct microcode_intel **mc_saved_src,
- unsigned int mc_saved_count)
+ unsigned int num_saved)
{
int i, j;
struct microcode_intel **saved_ptr;
int ret;
- if (!mc_saved_count)
+ if (!num_saved)
return -EINVAL;
/*
* Copy new microcode data.
*/
- saved_ptr = kcalloc(mc_saved_count, sizeof(struct microcode_intel *), GFP_KERNEL);
+ saved_ptr = kcalloc(num_saved, sizeof(struct microcode_intel *), GFP_KERNEL);
if (!saved_ptr)
return -ENOMEM;
- for (i = 0; i < mc_saved_count; i++) {
+ for (i = 0; i < num_saved; i++) {
struct microcode_header_intel *mc_hdr;
struct microcode_intel *mc;
unsigned long size;
@@ -207,20 +210,18 @@ save_microcode(struct mc_saved_data *mc_saved_data,
mc_hdr = &mc->hdr;
size = get_totalsize(mc_hdr);
- saved_ptr[i] = kmalloc(size, GFP_KERNEL);
+ saved_ptr[i] = kmemdup(mc, size, GFP_KERNEL);
if (!saved_ptr[i]) {
ret = -ENOMEM;
goto err;
}
-
- memcpy(saved_ptr[i], mc, size);
}
/*
* Point to newly saved microcode.
*/
- mc_saved_data->mc_saved = saved_ptr;
- mc_saved_data->mc_saved_count = mc_saved_count;
+ mcs->mc_saved = saved_ptr;
+ mcs->num_saved = num_saved;
return 0;
@@ -284,22 +285,20 @@ static unsigned int _save_mc(struct microcode_intel **mc_saved,
* BSP can stay in the platform.
*/
static enum ucode_state __init
-get_matching_model_microcode(int cpu, unsigned long start,
- void *data, size_t size,
- struct mc_saved_data *mc_saved_data,
- unsigned long *mc_saved_in_initrd,
+get_matching_model_microcode(unsigned long start, void *data, size_t size,
+ struct mc_saved_data *mcs, unsigned long *mc_ptrs,
struct ucode_cpu_info *uci)
{
- u8 *ucode_ptr = data;
- unsigned int leftover = size;
+ struct microcode_intel *mc_saved_tmp[MAX_UCODE_COUNT];
+ struct microcode_header_intel *mc_header;
+ unsigned int num_saved = mcs->num_saved;
enum ucode_state state = UCODE_OK;
+ unsigned int leftover = size;
+ u8 *ucode_ptr = data;
unsigned int mc_size;
- struct microcode_header_intel *mc_header;
- struct microcode_intel *mc_saved_tmp[MAX_UCODE_COUNT];
- unsigned int mc_saved_count = mc_saved_data->mc_saved_count;
int i;
- while (leftover && mc_saved_count < ARRAY_SIZE(mc_saved_tmp)) {
+ while (leftover && num_saved < ARRAY_SIZE(mc_saved_tmp)) {
if (leftover < sizeof(mc_header))
break;
@@ -318,32 +317,31 @@ get_matching_model_microcode(int cpu, unsigned long start,
* the platform, we need to find and save microcode patches
* with the same family and model as the BSP.
*/
- if (matching_model_microcode(mc_header, uci->cpu_sig.sig) !=
- UCODE_OK) {
+ if (matching_model_microcode(mc_header, uci->cpu_sig.sig) != UCODE_OK) {
ucode_ptr += mc_size;
continue;
}
- mc_saved_count = _save_mc(mc_saved_tmp, ucode_ptr, mc_saved_count);
+ num_saved = _save_mc(mc_saved_tmp, ucode_ptr, num_saved);
ucode_ptr += mc_size;
}
if (leftover) {
state = UCODE_ERROR;
- goto out;
+ return state;
}
- if (mc_saved_count == 0) {
+ if (!num_saved) {
state = UCODE_NFOUND;
- goto out;
+ return state;
}
- for (i = 0; i < mc_saved_count; i++)
- mc_saved_in_initrd[i] = (unsigned long)mc_saved_tmp[i] - start;
+ for (i = 0; i < num_saved; i++)
+ mc_ptrs[i] = (unsigned long)mc_saved_tmp[i] - start;
+
+ mcs->num_saved = num_saved;
- mc_saved_data->mc_saved_count = mc_saved_count;
-out:
return state;
}
@@ -373,7 +371,7 @@ static int collect_cpu_info_early(struct ucode_cpu_info *uci)
native_rdmsr(MSR_IA32_PLATFORM_ID, val[0], val[1]);
csig.pf = 1 << ((val[1] >> 18) & 7);
}
- native_wrmsr(MSR_IA32_UCODE_REV, 0, 0);
+ native_wrmsrl(MSR_IA32_UCODE_REV, 0);
/* As documented in the SDM: Do a CPUID 1 here */
sync_core();
@@ -396,11 +394,11 @@ static void show_saved_mc(void)
unsigned int sig, pf, rev, total_size, data_size, date;
struct ucode_cpu_info uci;
- if (mc_saved_data.mc_saved_count == 0) {
+ if (!mc_saved_data.num_saved) {
pr_debug("no microcode data saved.\n");
return;
}
- pr_debug("Total microcode saved: %d\n", mc_saved_data.mc_saved_count);
+ pr_debug("Total microcode saved: %d\n", mc_saved_data.num_saved);
collect_cpu_info_early(&uci);
@@ -409,7 +407,7 @@ static void show_saved_mc(void)
rev = uci.cpu_sig.rev;
pr_debug("CPU: sig=0x%x, pf=0x%x, rev=0x%x\n", sig, pf, rev);
- for (i = 0; i < mc_saved_data.mc_saved_count; i++) {
+ for (i = 0; i < mc_saved_data.num_saved; i++) {
struct microcode_header_intel *mc_saved_header;
struct extended_sigtable *ext_header;
int ext_sigcount;
@@ -465,7 +463,7 @@ int save_mc_for_early(u8 *mc)
{
struct microcode_intel *mc_saved_tmp[MAX_UCODE_COUNT];
unsigned int mc_saved_count_init;
- unsigned int mc_saved_count;
+ unsigned int num_saved;
struct microcode_intel **mc_saved;
int ret = 0;
int i;
@@ -476,23 +474,23 @@ int save_mc_for_early(u8 *mc)
*/
mutex_lock(&x86_cpu_microcode_mutex);
- mc_saved_count_init = mc_saved_data.mc_saved_count;
- mc_saved_count = mc_saved_data.mc_saved_count;
+ mc_saved_count_init = mc_saved_data.num_saved;
+ num_saved = mc_saved_data.num_saved;
mc_saved = mc_saved_data.mc_saved;
- if (mc_saved && mc_saved_count)
+ if (mc_saved && num_saved)
memcpy(mc_saved_tmp, mc_saved,
- mc_saved_count * sizeof(struct microcode_intel *));
+ num_saved * sizeof(struct microcode_intel *));
/*
* Save the microcode patch mc in mc_save_tmp structure if it's a newer
* version.
*/
- mc_saved_count = _save_mc(mc_saved_tmp, mc, mc_saved_count);
+ num_saved = _save_mc(mc_saved_tmp, mc, num_saved);
/*
* Save the mc_save_tmp in global mc_saved_data.
*/
- ret = save_microcode(&mc_saved_data, mc_saved_tmp, mc_saved_count);
+ ret = save_microcode(&mc_saved_data, mc_saved_tmp, num_saved);
if (ret) {
pr_err("Cannot save microcode patch.\n");
goto out;
@@ -536,7 +534,7 @@ static bool __init load_builtin_intel_microcode(struct cpio_data *cp)
static __initdata char ucode_name[] = "/*(DEBLOBBED)*/";
static __init enum ucode_state
-scan_microcode(struct mc_saved_data *mc_saved_data, unsigned long *initrd,
+scan_microcode(struct mc_saved_data *mcs, unsigned long *mc_ptrs,
unsigned long start, unsigned long size,
struct ucode_cpu_info *uci)
{
@@ -561,8 +559,8 @@ scan_microcode(struct mc_saved_data *mc_saved_data, unsigned long *initrd,
return UCODE_ERROR;
}
- return get_matching_model_microcode(0, start, cd.data, cd.size,
- mc_saved_data, initrd, uci);
+ return get_matching_model_microcode(start, cd.data, cd.size,
+ mcs, mc_ptrs, uci);
}
/*
@@ -571,14 +569,11 @@ scan_microcode(struct mc_saved_data *mc_saved_data, unsigned long *initrd,
static void
print_ucode_info(struct ucode_cpu_info *uci, unsigned int date)
{
- int cpu = smp_processor_id();
-
- pr_info("CPU%d microcode updated early to revision 0x%x, date = %04x-%02x-%02x\n",
- cpu,
- uci->cpu_sig.rev,
- date & 0xffff,
- date >> 24,
- (date >> 16) & 0xff);
+ pr_info_once("microcode updated early to revision 0x%x, date = %04x-%02x-%02x\n",
+ uci->cpu_sig.rev,
+ date & 0xffff,
+ date >> 24,
+ (date >> 16) & 0xff);
}
#ifdef CONFIG_X86_32
@@ -607,19 +602,19 @@ void show_ucode_info_early(void)
*/
static void print_ucode(struct ucode_cpu_info *uci)
{
- struct microcode_intel *mc_intel;
+ struct microcode_intel *mc;
int *delay_ucode_info_p;
int *current_mc_date_p;
- mc_intel = uci->mc;
- if (mc_intel == NULL)
+ mc = uci->mc;
+ if (!mc)
return;
delay_ucode_info_p = (int *)__pa_nodebug(&delay_ucode_info);
current_mc_date_p = (int *)__pa_nodebug(&current_mc_date);
*delay_ucode_info_p = 1;
- *current_mc_date_p = mc_intel->hdr.date;
+ *current_mc_date_p = mc->hdr.date;
}
#else
@@ -634,37 +629,35 @@ static inline void flush_tlb_early(void)
static inline void print_ucode(struct ucode_cpu_info *uci)
{
- struct microcode_intel *mc_intel;
+ struct microcode_intel *mc;
- mc_intel = uci->mc;
- if (mc_intel == NULL)
+ mc = uci->mc;
+ if (!mc)
return;
- print_ucode_info(uci, mc_intel->hdr.date);
+ print_ucode_info(uci, mc->hdr.date);
}
#endif
static int apply_microcode_early(struct ucode_cpu_info *uci, bool early)
{
- struct microcode_intel *mc_intel;
+ struct microcode_intel *mc;
unsigned int val[2];
- mc_intel = uci->mc;
- if (mc_intel == NULL)
+ mc = uci->mc;
+ if (!mc)
return 0;
/* write microcode via MSR 0x79 */
- native_wrmsr(MSR_IA32_UCODE_WRITE,
- (unsigned long) mc_intel->bits,
- (unsigned long) mc_intel->bits >> 16 >> 16);
- native_wrmsr(MSR_IA32_UCODE_REV, 0, 0);
+ native_wrmsrl(MSR_IA32_UCODE_WRITE, (unsigned long)mc->bits);
+ native_wrmsrl(MSR_IA32_UCODE_REV, 0);
/* As documented in the SDM: Do a CPUID 1 here */
sync_core();
/* get the current revision from MSR 0x8B */
native_rdmsr(MSR_IA32_UCODE_REV, val[0], val[1]);
- if (val[1] != mc_intel->hdr.rev)
+ if (val[1] != mc->hdr.rev)
return -1;
#ifdef CONFIG_X86_64
@@ -676,25 +669,26 @@ static int apply_microcode_early(struct ucode_cpu_info *uci, bool early)
if (early)
print_ucode(uci);
else
- print_ucode_info(uci, mc_intel->hdr.date);
+ print_ucode_info(uci, mc->hdr.date);
return 0;
}
/*
* This function converts microcode patch offsets previously stored in
- * mc_saved_in_initrd to pointers and stores the pointers in mc_saved_data.
+ * mc_tmp_ptrs to pointers and stores the pointers in mc_saved_data.
*/
int __init save_microcode_in_initrd_intel(void)
{
- unsigned int count = mc_saved_data.mc_saved_count;
+ unsigned int count = mc_saved_data.num_saved;
struct microcode_intel *mc_saved[MAX_UCODE_COUNT];
int ret = 0;
- if (count == 0)
+ if (!count)
return ret;
- copy_initrd_ptrs(mc_saved, mc_saved_in_initrd, get_initrd_start(), count);
+ copy_ptrs(mc_saved, mc_tmp_ptrs, get_initrd_start(), count);
+
ret = save_microcode(&mc_saved_data, mc_saved, count);
if (ret)
pr_err("Cannot save microcode patches from initrd.\n");
@@ -705,8 +699,7 @@ int __init save_microcode_in_initrd_intel(void)
}
static void __init
-_load_ucode_intel_bsp(struct mc_saved_data *mc_saved_data,
- unsigned long *initrd,
+_load_ucode_intel_bsp(struct mc_saved_data *mcs, unsigned long *mc_ptrs,
unsigned long start, unsigned long size)
{
struct ucode_cpu_info uci;
@@ -714,11 +707,11 @@ _load_ucode_intel_bsp(struct mc_saved_data *mc_saved_data,
collect_cpu_info_early(&uci);
- ret = scan_microcode(mc_saved_data, initrd, start, size, &uci);
+ ret = scan_microcode(mcs, mc_ptrs, start, size, &uci);
if (ret != UCODE_OK)
return;
- ret = load_microcode(mc_saved_data, initrd, start, &uci);
+ ret = load_microcode(mcs, mc_ptrs, start, &uci);
if (ret != UCODE_OK)
return;
@@ -741,42 +734,40 @@ void __init load_ucode_intel_bsp(void)
start = (size ? p->hdr.ramdisk_image : 0);
_load_ucode_intel_bsp((struct mc_saved_data *)__pa_nodebug(&mc_saved_data),
- (unsigned long *)__pa_nodebug(&mc_saved_in_initrd),
+ (unsigned long *)__pa_nodebug(&mc_tmp_ptrs),
start, size);
#else
size = boot_params.hdr.ramdisk_size;
start = (size ? boot_params.hdr.ramdisk_image + PAGE_OFFSET : 0);
- _load_ucode_intel_bsp(&mc_saved_data, mc_saved_in_initrd, start, size);
+ _load_ucode_intel_bsp(&mc_saved_data, mc_tmp_ptrs, start, size);
#endif
}
void load_ucode_intel_ap(void)
{
- struct mc_saved_data *mc_saved_data_p;
+ unsigned long *mcs_tmp_p;
+ struct mc_saved_data *mcs_p;
struct ucode_cpu_info uci;
- unsigned long *mc_saved_in_initrd_p;
enum ucode_state ret;
#ifdef CONFIG_X86_32
- mc_saved_in_initrd_p = (unsigned long *)__pa_nodebug(mc_saved_in_initrd);
- mc_saved_data_p = (struct mc_saved_data *)__pa_nodebug(&mc_saved_data);
+ mcs_tmp_p = (unsigned long *)__pa_nodebug(mc_tmp_ptrs);
+ mcs_p = (struct mc_saved_data *)__pa_nodebug(&mc_saved_data);
#else
- mc_saved_in_initrd_p = mc_saved_in_initrd;
- mc_saved_data_p = &mc_saved_data;
+ mcs_tmp_p = mc_tmp_ptrs;
+ mcs_p = &mc_saved_data;
#endif
/*
* If there is no valid ucode previously saved in memory, no need to
* update ucode on this AP.
*/
- if (mc_saved_data_p->mc_saved_count == 0)
+ if (!mcs_p->num_saved)
return;
collect_cpu_info_early(&uci);
- ret = load_microcode(mc_saved_data_p, mc_saved_in_initrd_p,
- get_initrd_start_addr(), &uci);
-
+ ret = load_microcode(mcs_p, mcs_tmp_p, get_initrd_start_addr(), &uci);
if (ret != UCODE_OK)
return;
@@ -788,13 +779,13 @@ void reload_ucode_intel(void)
struct ucode_cpu_info uci;
enum ucode_state ret;
- if (!mc_saved_data.mc_saved_count)
+ if (!mc_saved_data.num_saved)
return;
collect_cpu_info_early(&uci);
ret = load_microcode_early(mc_saved_data.mc_saved,
- mc_saved_data.mc_saved_count, &uci);
+ mc_saved_data.num_saved, &uci);
if (ret != UCODE_OK)
return;
@@ -827,7 +818,7 @@ static int collect_cpu_info(int cpu_num, struct cpu_signature *csig)
* return 0 - no update found
* return 1 - found update
*/
-static int get_matching_mc(struct microcode_intel *mc_intel, int cpu)
+static int get_matching_mc(struct microcode_intel *mc, int cpu)
{
struct cpu_signature cpu_sig;
unsigned int csig, cpf, crev;
@@ -838,39 +829,36 @@ static int get_matching_mc(struct microcode_intel *mc_intel, int cpu)
cpf = cpu_sig.pf;
crev = cpu_sig.rev;
- return has_newer_microcode(mc_intel, csig, cpf, crev);
+ return has_newer_microcode(mc, csig, cpf, crev);
}
static int apply_microcode_intel(int cpu)
{
- struct microcode_intel *mc_intel;
+ struct microcode_intel *mc;
struct ucode_cpu_info *uci;
+ struct cpuinfo_x86 *c;
unsigned int val[2];
- int cpu_num = raw_smp_processor_id();
- struct cpuinfo_x86 *c = &cpu_data(cpu_num);
-
- uci = ucode_cpu_info + cpu;
- mc_intel = uci->mc;
/* We should bind the task to the CPU */
- BUG_ON(cpu_num != cpu);
+ if (WARN_ON(raw_smp_processor_id() != cpu))
+ return -1;
- if (mc_intel == NULL)
+ uci = ucode_cpu_info + cpu;
+ mc = uci->mc;
+ if (!mc)
return 0;
/*
* Microcode on this CPU could be updated earlier. Only apply the
- * microcode patch in mc_intel when it is newer than the one on this
+ * microcode patch in mc when it is newer than the one on this
* CPU.
*/
- if (get_matching_mc(mc_intel, cpu) == 0)
+ if (!get_matching_mc(mc, cpu))
return 0;
/* write microcode via MSR 0x79 */
- wrmsr(MSR_IA32_UCODE_WRITE,
- (unsigned long) mc_intel->bits,
- (unsigned long) mc_intel->bits >> 16 >> 16);
- wrmsr(MSR_IA32_UCODE_REV, 0, 0);
+ wrmsrl(MSR_IA32_UCODE_WRITE, (unsigned long)mc->bits);
+ wrmsrl(MSR_IA32_UCODE_REV, 0);
/* As documented in the SDM: Do a CPUID 1 here */
sync_core();
@@ -878,16 +866,19 @@ static int apply_microcode_intel(int cpu)
/* get the current revision from MSR 0x8B */
rdmsr(MSR_IA32_UCODE_REV, val[0], val[1]);
- if (val[1] != mc_intel->hdr.rev) {
+ if (val[1] != mc->hdr.rev) {
pr_err("CPU%d update to revision 0x%x failed\n",
- cpu_num, mc_intel->hdr.rev);
+ cpu, mc->hdr.rev);
return -1;
}
+
pr_info("CPU%d updated to revision 0x%x, date = %04x-%02x-%02x\n",
- cpu_num, val[1],
- mc_intel->hdr.date & 0xffff,
- mc_intel->hdr.date >> 24,
- (mc_intel->hdr.date >> 16) & 0xff);
+ cpu, val[1],
+ mc->hdr.date & 0xffff,
+ mc->hdr.date >> 24,
+ (mc->hdr.date >> 16) & 0xff);
+
+ c = &cpu_data(cpu);
uci->cpu_sig.rev = val[1];
c->microcode = val[1];
diff --git a/arch/x86/kernel/cpu/microcode/intel_lib.c b/arch/x86/kernel/cpu/microcode/intel_lib.c
index b96896bcb..2ce1a7dc4 100644
--- a/arch/x86/kernel/cpu/microcode/intel_lib.c
+++ b/arch/x86/kernel/cpu/microcode/intel_lib.c
@@ -49,7 +49,7 @@ int microcode_sanity_check(void *mc, int print_err)
unsigned long total_size, data_size, ext_table_size;
struct microcode_header_intel *mc_header = mc;
struct extended_sigtable *ext_header = NULL;
- int sum, orig_sum, ext_sigcount = 0, i;
+ u32 sum, orig_sum, ext_sigcount = 0, i;
struct extended_signature *ext_sig;
total_size = get_totalsize(mc_header);
@@ -57,69 +57,85 @@ int microcode_sanity_check(void *mc, int print_err)
if (data_size + MC_HEADER_SIZE > total_size) {
if (print_err)
- pr_err("error! Bad data size in microcode data file\n");
+ pr_err("Error: bad microcode data file size.\n");
return -EINVAL;
}
if (mc_header->ldrver != 1 || mc_header->hdrver != 1) {
if (print_err)
- pr_err("error! Unknown microcode update format\n");
+ pr_err("Error: invalid/unknown microcode update format.\n");
return -EINVAL;
}
+
ext_table_size = total_size - (MC_HEADER_SIZE + data_size);
if (ext_table_size) {
+ u32 ext_table_sum = 0;
+ u32 *ext_tablep;
+
if ((ext_table_size < EXT_HEADER_SIZE)
|| ((ext_table_size - EXT_HEADER_SIZE) % EXT_SIGNATURE_SIZE)) {
if (print_err)
- pr_err("error! Small exttable size in microcode data file\n");
+ pr_err("Error: truncated extended signature table.\n");
return -EINVAL;
}
+
ext_header = mc + MC_HEADER_SIZE + data_size;
if (ext_table_size != exttable_size(ext_header)) {
if (print_err)
- pr_err("error! Bad exttable size in microcode data file\n");
+ pr_err("Error: extended signature table size mismatch.\n");
return -EFAULT;
}
+
ext_sigcount = ext_header->count;
- }
- /* check extended table checksum */
- if (ext_table_size) {
- int ext_table_sum = 0;
- int *ext_tablep = (int *)ext_header;
+ /*
+ * Check extended table checksum: the sum of all dwords that
+ * comprise a valid table must be 0.
+ */
+ ext_tablep = (u32 *)ext_header;
- i = ext_table_size / DWSIZE;
+ i = ext_table_size / sizeof(u32);
while (i--)
ext_table_sum += ext_tablep[i];
+
if (ext_table_sum) {
if (print_err)
- pr_warn("aborting, bad extended signature table checksum\n");
+ pr_warn("Bad extended signature table checksum, aborting.\n");
return -EINVAL;
}
}
- /* calculate the checksum */
+ /*
+ * Calculate the checksum of update data and header. The checksum of
+ * valid update data and header including the extended signature table
+ * must be 0.
+ */
orig_sum = 0;
- i = (MC_HEADER_SIZE + data_size) / DWSIZE;
+ i = (MC_HEADER_SIZE + data_size) / sizeof(u32);
while (i--)
- orig_sum += ((int *)mc)[i];
+ orig_sum += ((u32 *)mc)[i];
+
if (orig_sum) {
if (print_err)
- pr_err("aborting, bad checksum\n");
+ pr_err("Bad microcode data checksum, aborting.\n");
return -EINVAL;
}
+
if (!ext_table_size)
return 0;
- /* check extended signature checksum */
+
+ /*
+ * Check extended signature checksum: 0 => valid.
+ */
for (i = 0; i < ext_sigcount; i++) {
ext_sig = (void *)ext_header + EXT_HEADER_SIZE +
EXT_SIGNATURE_SIZE * i;
- sum = orig_sum
- - (mc_header->sig + mc_header->pf + mc_header->cksum)
- + (ext_sig->sig + ext_sig->pf + ext_sig->cksum);
+
+ sum = (mc_header->sig + mc_header->pf + mc_header->cksum) -
+ (ext_sig->sig + ext_sig->pf + ext_sig->cksum);
if (sum) {
if (print_err)
- pr_err("aborting, bad checksum\n");
+ pr_err("Bad extended signature checksum, aborting.\n");
return -EINVAL;
}
}
diff --git a/arch/x86/kernel/cpu/mkcapflags.sh b/arch/x86/kernel/cpu/mkcapflags.sh
index 3f20710a5..6988c7440 100644
--- a/arch/x86/kernel/cpu/mkcapflags.sh
+++ b/arch/x86/kernel/cpu/mkcapflags.sh
@@ -1,6 +1,6 @@
#!/bin/sh
#
-# Generate the x86_cap/bug_flags[] arrays from include/asm/cpufeature.h
+# Generate the x86_cap/bug_flags[] arrays from include/asm/cpufeatures.h
#
IN=$1
@@ -49,8 +49,8 @@ dump_array()
trap 'rm "$OUT"' EXIT
(
- echo "#ifndef _ASM_X86_CPUFEATURE_H"
- echo "#include <asm/cpufeature.h>"
+ echo "#ifndef _ASM_X86_CPUFEATURES_H"
+ echo "#include <asm/cpufeatures.h>"
echo "#endif"
echo ""
diff --git a/arch/x86/kernel/cpu/mshyperv.c b/arch/x86/kernel/cpu/mshyperv.c
index 20e242ea1..10c11b4da 100644
--- a/arch/x86/kernel/cpu/mshyperv.c
+++ b/arch/x86/kernel/cpu/mshyperv.c
@@ -152,6 +152,11 @@ static struct clocksource hyperv_cs = {
.flags = CLOCK_SOURCE_IS_CONTINUOUS,
};
+static unsigned char hv_get_nmi_reason(void)
+{
+ return 0;
+}
+
static void __init ms_hyperv_init_platform(void)
{
/*
@@ -161,8 +166,8 @@ static void __init ms_hyperv_init_platform(void)
ms_hyperv.misc_features = cpuid_edx(HYPERV_CPUID_FEATURES);
ms_hyperv.hints = cpuid_eax(HYPERV_CPUID_ENLIGHTMENT_INFO);
- printk(KERN_INFO "HyperV: features 0x%x, hints 0x%x\n",
- ms_hyperv.features, ms_hyperv.hints);
+ pr_info("HyperV: features 0x%x, hints 0x%x\n",
+ ms_hyperv.features, ms_hyperv.hints);
#ifdef CONFIG_X86_LOCAL_APIC
if (ms_hyperv.features & HV_X64_MSR_APIC_FREQUENCY_AVAILABLE) {
@@ -174,8 +179,8 @@ static void __init ms_hyperv_init_platform(void)
rdmsrl(HV_X64_MSR_APIC_FREQUENCY, hv_lapic_frequency);
hv_lapic_frequency = div_u64(hv_lapic_frequency, HZ);
lapic_timer_frequency = hv_lapic_frequency;
- printk(KERN_INFO "HyperV: LAPIC Timer Frequency: %#x\n",
- lapic_timer_frequency);
+ pr_info("HyperV: LAPIC Timer Frequency: %#x\n",
+ lapic_timer_frequency);
}
#endif
@@ -191,6 +196,13 @@ static void __init ms_hyperv_init_platform(void)
machine_ops.crash_shutdown = hv_machine_crash_shutdown;
#endif
mark_tsc_unstable("running on Hyper-V");
+
+ /*
+ * Generation 2 instances don't support reading the NMI status from
+ * 0x61 port.
+ */
+ if (efi_enabled(EFI_BOOT))
+ x86_platform.get_nmi_reason = hv_get_nmi_reason;
}
const __refconst struct hypervisor_x86 x86_hyper_ms_hyperv = {
diff --git a/arch/x86/kernel/cpu/mtrr/centaur.c b/arch/x86/kernel/cpu/mtrr/centaur.c
index 316fe3e60..3d689937f 100644
--- a/arch/x86/kernel/cpu/mtrr/centaur.c
+++ b/arch/x86/kernel/cpu/mtrr/centaur.c
@@ -103,7 +103,7 @@ centaur_validate_add_page(unsigned long base, unsigned long size, unsigned int t
*/
if (type != MTRR_TYPE_WRCOMB &&
(centaur_mcr_type == 0 || type != MTRR_TYPE_UNCACHABLE)) {
- pr_warning("mtrr: only write-combining%s supported\n",
+ pr_warn("mtrr: only write-combining%s supported\n",
centaur_mcr_type ? " and uncacheable are" : " is");
return -EINVAL;
}
diff --git a/arch/x86/kernel/cpu/mtrr/cleanup.c b/arch/x86/kernel/cpu/mtrr/cleanup.c
index 0d98503c2..31e951ce6 100644
--- a/arch/x86/kernel/cpu/mtrr/cleanup.c
+++ b/arch/x86/kernel/cpu/mtrr/cleanup.c
@@ -57,9 +57,9 @@ static int __initdata nr_range;
static struct var_mtrr_range_state __initdata range_state[RANGE_NUM];
static int __initdata debug_print;
-#define Dprintk(x...) do { if (debug_print) printk(KERN_DEBUG x); } while (0)
+#define Dprintk(x...) do { if (debug_print) pr_debug(x); } while (0)
-#define BIOS_BUG_MSG KERN_WARNING \
+#define BIOS_BUG_MSG \
"WARNING: BIOS bug: VAR MTRR %d contains strange UC entry under 1M, check with your system vendor!\n"
static int __init
@@ -81,9 +81,9 @@ x86_get_mtrr_mem_range(struct range *range, int nr_range,
base, base + size);
}
if (debug_print) {
- printk(KERN_DEBUG "After WB checking\n");
+ pr_debug("After WB checking\n");
for (i = 0; i < nr_range; i++)
- printk(KERN_DEBUG "MTRR MAP PFN: %016llx - %016llx\n",
+ pr_debug("MTRR MAP PFN: %016llx - %016llx\n",
range[i].start, range[i].end);
}
@@ -101,7 +101,7 @@ x86_get_mtrr_mem_range(struct range *range, int nr_range,
(mtrr_state.enabled & MTRR_STATE_MTRR_ENABLED) &&
(mtrr_state.enabled & MTRR_STATE_MTRR_FIXED_ENABLED)) {
/* Var MTRR contains UC entry below 1M? Skip it: */
- printk(BIOS_BUG_MSG, i);
+ pr_warn(BIOS_BUG_MSG, i);
if (base + size <= (1<<(20-PAGE_SHIFT)))
continue;
size -= (1<<(20-PAGE_SHIFT)) - base;
@@ -114,11 +114,11 @@ x86_get_mtrr_mem_range(struct range *range, int nr_range,
extra_remove_base + extra_remove_size);
if (debug_print) {
- printk(KERN_DEBUG "After UC checking\n");
+ pr_debug("After UC checking\n");
for (i = 0; i < RANGE_NUM; i++) {
if (!range[i].end)
continue;
- printk(KERN_DEBUG "MTRR MAP PFN: %016llx - %016llx\n",
+ pr_debug("MTRR MAP PFN: %016llx - %016llx\n",
range[i].start, range[i].end);
}
}
@@ -126,9 +126,9 @@ x86_get_mtrr_mem_range(struct range *range, int nr_range,
/* sort the ranges */
nr_range = clean_sort_range(range, RANGE_NUM);
if (debug_print) {
- printk(KERN_DEBUG "After sorting\n");
+ pr_debug("After sorting\n");
for (i = 0; i < nr_range; i++)
- printk(KERN_DEBUG "MTRR MAP PFN: %016llx - %016llx\n",
+ pr_debug("MTRR MAP PFN: %016llx - %016llx\n",
range[i].start, range[i].end);
}
@@ -544,7 +544,7 @@ static void __init print_out_mtrr_range_state(void)
start_base = to_size_factor(start_base, &start_factor),
type = range_state[i].type;
- printk(KERN_DEBUG "reg %d, base: %ld%cB, range: %ld%cB, type %s\n",
+ pr_debug("reg %d, base: %ld%cB, range: %ld%cB, type %s\n",
i, start_base, start_factor,
size_base, size_factor,
(type == MTRR_TYPE_UNCACHABLE) ? "UC" :
@@ -713,7 +713,7 @@ int __init mtrr_cleanup(unsigned address_bits)
return 0;
/* Print original var MTRRs at first, for debugging: */
- printk(KERN_DEBUG "original variable MTRRs\n");
+ pr_debug("original variable MTRRs\n");
print_out_mtrr_range_state();
memset(range, 0, sizeof(range));
@@ -733,7 +733,7 @@ int __init mtrr_cleanup(unsigned address_bits)
x_remove_base, x_remove_size);
range_sums = sum_ranges(range, nr_range);
- printk(KERN_INFO "total RAM covered: %ldM\n",
+ pr_info("total RAM covered: %ldM\n",
range_sums >> (20 - PAGE_SHIFT));
if (mtrr_chunk_size && mtrr_gran_size) {
@@ -745,12 +745,11 @@ int __init mtrr_cleanup(unsigned address_bits)
if (!result[i].bad) {
set_var_mtrr_all(address_bits);
- printk(KERN_DEBUG "New variable MTRRs\n");
+ pr_debug("New variable MTRRs\n");
print_out_mtrr_range_state();
return 1;
}
- printk(KERN_INFO "invalid mtrr_gran_size or mtrr_chunk_size, "
- "will find optimal one\n");
+ pr_info("invalid mtrr_gran_size or mtrr_chunk_size, will find optimal one\n");
}
i = 0;
@@ -768,7 +767,7 @@ int __init mtrr_cleanup(unsigned address_bits)
x_remove_base, x_remove_size, i);
if (debug_print) {
mtrr_print_out_one_result(i);
- printk(KERN_INFO "\n");
+ pr_info("\n");
}
i++;
@@ -779,7 +778,7 @@ int __init mtrr_cleanup(unsigned address_bits)
index_good = mtrr_search_optimal_index();
if (index_good != -1) {
- printk(KERN_INFO "Found optimal setting for mtrr clean up\n");
+ pr_info("Found optimal setting for mtrr clean up\n");
i = index_good;
mtrr_print_out_one_result(i);
@@ -790,7 +789,7 @@ int __init mtrr_cleanup(unsigned address_bits)
gran_size <<= 10;
x86_setup_var_mtrrs(range, nr_range, chunk_size, gran_size);
set_var_mtrr_all(address_bits);
- printk(KERN_DEBUG "New variable MTRRs\n");
+ pr_debug("New variable MTRRs\n");
print_out_mtrr_range_state();
return 1;
} else {
@@ -799,8 +798,8 @@ int __init mtrr_cleanup(unsigned address_bits)
mtrr_print_out_one_result(i);
}
- printk(KERN_INFO "mtrr_cleanup: can not find optimal value\n");
- printk(KERN_INFO "please specify mtrr_gran_size/mtrr_chunk_size\n");
+ pr_info("mtrr_cleanup: can not find optimal value\n");
+ pr_info("please specify mtrr_gran_size/mtrr_chunk_size\n");
return 0;
}
@@ -918,7 +917,7 @@ int __init mtrr_trim_uncached_memory(unsigned long end_pfn)
/* kvm/qemu doesn't have mtrr set right, don't trim them all: */
if (!highest_pfn) {
- printk(KERN_INFO "CPU MTRRs all blank - virtualized system.\n");
+ pr_info("CPU MTRRs all blank - virtualized system.\n");
return 0;
}
@@ -973,7 +972,8 @@ int __init mtrr_trim_uncached_memory(unsigned long end_pfn)
end_pfn);
if (total_trim_size) {
- pr_warning("WARNING: BIOS bug: CPU MTRRs don't cover all of memory, losing %lluMB of RAM.\n", total_trim_size >> 20);
+ pr_warn("WARNING: BIOS bug: CPU MTRRs don't cover all of memory, losing %lluMB of RAM.\n",
+ total_trim_size >> 20);
if (!changed_by_mtrr_cleanup)
WARN_ON(1);
diff --git a/arch/x86/kernel/cpu/mtrr/generic.c b/arch/x86/kernel/cpu/mtrr/generic.c
index c870af161..19f57360d 100644
--- a/arch/x86/kernel/cpu/mtrr/generic.c
+++ b/arch/x86/kernel/cpu/mtrr/generic.c
@@ -42,7 +42,7 @@ EXPORT_SYMBOL_GPL(mtrr_state);
* "BIOS and Kernel Developer's Guide for the AMD Athlon 64 and AMD
* Opteron Processors" (26094 Rev. 3.30 February 2006), section
* "13.2.1.2 SYSCFG Register": "The MtrrFixDramModEn bit should be set
- * to 1 during BIOS initalization of the fixed MTRRs, then cleared to
+ * to 1 during BIOS initialization of the fixed MTRRs, then cleared to
* 0 for operation."
*/
static inline void k8_check_syscfg_dram_mod_en(void)
@@ -55,7 +55,7 @@ static inline void k8_check_syscfg_dram_mod_en(void)
rdmsr(MSR_K8_SYSCFG, lo, hi);
if (lo & K8_MTRRFIXRANGE_DRAM_MODIFY) {
- printk(KERN_ERR FW_WARN "MTRR: CPU %u: SYSCFG[MtrrFixDramModEn]"
+ pr_err(FW_WARN "MTRR: CPU %u: SYSCFG[MtrrFixDramModEn]"
" not cleared by BIOS, clearing this bit\n",
smp_processor_id());
lo &= ~K8_MTRRFIXRANGE_DRAM_MODIFY;
@@ -501,14 +501,14 @@ void __init mtrr_state_warn(void)
if (!mask)
return;
if (mask & MTRR_CHANGE_MASK_FIXED)
- pr_warning("mtrr: your CPUs had inconsistent fixed MTRR settings\n");
+ pr_warn("mtrr: your CPUs had inconsistent fixed MTRR settings\n");
if (mask & MTRR_CHANGE_MASK_VARIABLE)
- pr_warning("mtrr: your CPUs had inconsistent variable MTRR settings\n");
+ pr_warn("mtrr: your CPUs had inconsistent variable MTRR settings\n");
if (mask & MTRR_CHANGE_MASK_DEFTYPE)
- pr_warning("mtrr: your CPUs had inconsistent MTRRdefType settings\n");
+ pr_warn("mtrr: your CPUs had inconsistent MTRRdefType settings\n");
- printk(KERN_INFO "mtrr: probably your BIOS does not setup all CPUs.\n");
- printk(KERN_INFO "mtrr: corrected configuration.\n");
+ pr_info("mtrr: probably your BIOS does not setup all CPUs.\n");
+ pr_info("mtrr: corrected configuration.\n");
}
/*
@@ -519,8 +519,7 @@ void __init mtrr_state_warn(void)
void mtrr_wrmsr(unsigned msr, unsigned a, unsigned b)
{
if (wrmsr_safe(msr, a, b) < 0) {
- printk(KERN_ERR
- "MTRR: CPU %u: Writing MSR %x to %x:%x failed\n",
+ pr_err("MTRR: CPU %u: Writing MSR %x to %x:%x failed\n",
smp_processor_id(), msr, a, b);
}
}
@@ -607,7 +606,7 @@ static void generic_get_mtrr(unsigned int reg, unsigned long *base,
tmp |= ~((1ULL<<(hi - 1)) - 1);
if (tmp != mask) {
- printk(KERN_WARNING "mtrr: your BIOS has configured an incorrect mask, fixing it.\n");
+ pr_warn("mtrr: your BIOS has configured an incorrect mask, fixing it.\n");
add_taint(TAINT_FIRMWARE_WORKAROUND, LOCKDEP_STILL_OK);
mask = tmp;
}
@@ -858,13 +857,13 @@ int generic_validate_add_page(unsigned long base, unsigned long size,
boot_cpu_data.x86_model == 1 &&
boot_cpu_data.x86_mask <= 7) {
if (base & ((1 << (22 - PAGE_SHIFT)) - 1)) {
- pr_warning("mtrr: base(0x%lx000) is not 4 MiB aligned\n", base);
+ pr_warn("mtrr: base(0x%lx000) is not 4 MiB aligned\n", base);
return -EINVAL;
}
if (!(base + size < 0x70000 || base > 0x7003F) &&
(type == MTRR_TYPE_WRCOMB
|| type == MTRR_TYPE_WRBACK)) {
- pr_warning("mtrr: writable mtrr between 0x70000000 and 0x7003FFFF may hang the CPU.\n");
+ pr_warn("mtrr: writable mtrr between 0x70000000 and 0x7003FFFF may hang the CPU.\n");
return -EINVAL;
}
}
@@ -878,7 +877,7 @@ int generic_validate_add_page(unsigned long base, unsigned long size,
lbase = lbase >> 1, last = last >> 1)
;
if (lbase != last) {
- pr_warning("mtrr: base(0x%lx000) is not aligned on a size(0x%lx000) boundary\n", base, size);
+ pr_warn("mtrr: base(0x%lx000) is not aligned on a size(0x%lx000) boundary\n", base, size);
return -EINVAL;
}
return 0;
diff --git a/arch/x86/kernel/cpu/mtrr/main.c b/arch/x86/kernel/cpu/mtrr/main.c
index 5c3d149ee..10f8d4796 100644
--- a/arch/x86/kernel/cpu/mtrr/main.c
+++ b/arch/x86/kernel/cpu/mtrr/main.c
@@ -47,7 +47,7 @@
#include <linux/smp.h>
#include <linux/syscore_ops.h>
-#include <asm/processor.h>
+#include <asm/cpufeature.h>
#include <asm/e820.h>
#include <asm/mtrr.h>
#include <asm/msr.h>
@@ -300,24 +300,24 @@ int mtrr_add_page(unsigned long base, unsigned long size,
return error;
if (type >= MTRR_NUM_TYPES) {
- pr_warning("mtrr: type: %u invalid\n", type);
+ pr_warn("mtrr: type: %u invalid\n", type);
return -EINVAL;
}
/* If the type is WC, check that this processor supports it */
if ((type == MTRR_TYPE_WRCOMB) && !have_wrcomb()) {
- pr_warning("mtrr: your processor doesn't support write-combining\n");
+ pr_warn("mtrr: your processor doesn't support write-combining\n");
return -ENOSYS;
}
if (!size) {
- pr_warning("mtrr: zero sized request\n");
+ pr_warn("mtrr: zero sized request\n");
return -EINVAL;
}
if ((base | (base + size - 1)) >>
(boot_cpu_data.x86_phys_bits - PAGE_SHIFT)) {
- pr_warning("mtrr: base or size exceeds the MTRR width\n");
+ pr_warn("mtrr: base or size exceeds the MTRR width\n");
return -EINVAL;
}
@@ -348,7 +348,7 @@ int mtrr_add_page(unsigned long base, unsigned long size,
} else if (types_compatible(type, ltype))
continue;
}
- pr_warning("mtrr: 0x%lx000,0x%lx000 overlaps existing"
+ pr_warn("mtrr: 0x%lx000,0x%lx000 overlaps existing"
" 0x%lx000,0x%lx000\n", base, size, lbase,
lsize);
goto out;
@@ -357,7 +357,7 @@ int mtrr_add_page(unsigned long base, unsigned long size,
if (ltype != type) {
if (types_compatible(type, ltype))
continue;
- pr_warning("mtrr: type mismatch for %lx000,%lx000 old: %s new: %s\n",
+ pr_warn("mtrr: type mismatch for %lx000,%lx000 old: %s new: %s\n",
base, size, mtrr_attrib_to_str(ltype),
mtrr_attrib_to_str(type));
goto out;
@@ -395,7 +395,7 @@ int mtrr_add_page(unsigned long base, unsigned long size,
static int mtrr_check(unsigned long base, unsigned long size)
{
if ((base & (PAGE_SIZE - 1)) || (size & (PAGE_SIZE - 1))) {
- pr_warning("mtrr: size and base must be multiples of 4 kiB\n");
+ pr_warn("mtrr: size and base must be multiples of 4 kiB\n");
pr_debug("mtrr: size: 0x%lx base: 0x%lx\n", size, base);
dump_stack();
return -1;
@@ -493,16 +493,16 @@ int mtrr_del_page(int reg, unsigned long base, unsigned long size)
}
}
if (reg >= max) {
- pr_warning("mtrr: register: %d too big\n", reg);
+ pr_warn("mtrr: register: %d too big\n", reg);
goto out;
}
mtrr_if->get(reg, &lbase, &lsize, &ltype);
if (lsize < 1) {
- pr_warning("mtrr: MTRR %d not used\n", reg);
+ pr_warn("mtrr: MTRR %d not used\n", reg);
goto out;
}
if (mtrr_usage_table[reg] < 1) {
- pr_warning("mtrr: reg: %d has count=0\n", reg);
+ pr_warn("mtrr: reg: %d has count=0\n", reg);
goto out;
}
if (--mtrr_usage_table[reg] < 1)
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
deleted file mode 100644
index 6532f5b40..000000000
--- a/arch/x86/kernel/cpu/perf_event.c
+++ /dev/null
@@ -1,2441 +0,0 @@
-/*
- * Performance events x86 architecture code
- *
- * Copyright (C) 2008 Thomas Gleixner <tglx@linutronix.de>
- * Copyright (C) 2008-2009 Red Hat, Inc., Ingo Molnar
- * Copyright (C) 2009 Jaswinder Singh Rajput
- * Copyright (C) 2009 Advanced Micro Devices, Inc., Robert Richter
- * Copyright (C) 2008-2009 Red Hat, Inc., Peter Zijlstra
- * Copyright (C) 2009 Intel Corporation, <markus.t.metzger@intel.com>
- * Copyright (C) 2009 Google, Inc., Stephane Eranian
- *
- * For licencing details see kernel-base/COPYING
- */
-
-#include <linux/perf_event.h>
-#include <linux/capability.h>
-#include <linux/notifier.h>
-#include <linux/hardirq.h>
-#include <linux/kprobes.h>
-#include <linux/module.h>
-#include <linux/kdebug.h>
-#include <linux/sched.h>
-#include <linux/uaccess.h>
-#include <linux/slab.h>
-#include <linux/cpu.h>
-#include <linux/bitops.h>
-#include <linux/device.h>
-
-#include <asm/apic.h>
-#include <asm/stacktrace.h>
-#include <asm/nmi.h>
-#include <asm/smp.h>
-#include <asm/alternative.h>
-#include <asm/mmu_context.h>
-#include <asm/tlbflush.h>
-#include <asm/timer.h>
-#include <asm/desc.h>
-#include <asm/ldt.h>
-
-#include "perf_event.h"
-
-struct x86_pmu x86_pmu __read_mostly;
-
-DEFINE_PER_CPU(struct cpu_hw_events, cpu_hw_events) = {
- .enabled = 1,
-};
-
-struct static_key rdpmc_always_available = STATIC_KEY_INIT_FALSE;
-
-u64 __read_mostly hw_cache_event_ids
- [PERF_COUNT_HW_CACHE_MAX]
- [PERF_COUNT_HW_CACHE_OP_MAX]
- [PERF_COUNT_HW_CACHE_RESULT_MAX];
-u64 __read_mostly hw_cache_extra_regs
- [PERF_COUNT_HW_CACHE_MAX]
- [PERF_COUNT_HW_CACHE_OP_MAX]
- [PERF_COUNT_HW_CACHE_RESULT_MAX];
-
-/*
- * Propagate event elapsed time into the generic event.
- * Can only be executed on the CPU where the event is active.
- * Returns the delta events processed.
- */
-u64 x86_perf_event_update(struct perf_event *event)
-{
- struct hw_perf_event *hwc = &event->hw;
- int shift = 64 - x86_pmu.cntval_bits;
- u64 prev_raw_count, new_raw_count;
- int idx = hwc->idx;
- s64 delta;
-
- if (idx == INTEL_PMC_IDX_FIXED_BTS)
- return 0;
-
- /*
- * Careful: an NMI might modify the previous event value.
- *
- * Our tactic to handle this is to first atomically read and
- * exchange a new raw count - then add that new-prev delta
- * count to the generic event atomically:
- */
-again:
- prev_raw_count = local64_read(&hwc->prev_count);
- rdpmcl(hwc->event_base_rdpmc, new_raw_count);
-
- if (local64_cmpxchg(&hwc->prev_count, prev_raw_count,
- new_raw_count) != prev_raw_count)
- goto again;
-
- /*
- * Now we have the new raw value and have updated the prev
- * timestamp already. We can now calculate the elapsed delta
- * (event-)time and add that to the generic event.
- *
- * Careful, not all hw sign-extends above the physical width
- * of the count.
- */
- delta = (new_raw_count << shift) - (prev_raw_count << shift);
- delta >>= shift;
-
- local64_add(delta, &event->count);
- local64_sub(delta, &hwc->period_left);
-
- return new_raw_count;
-}
-
-/*
- * Find and validate any extra registers to set up.
- */
-static int x86_pmu_extra_regs(u64 config, struct perf_event *event)
-{
- struct hw_perf_event_extra *reg;
- struct extra_reg *er;
-
- reg = &event->hw.extra_reg;
-
- if (!x86_pmu.extra_regs)
- return 0;
-
- for (er = x86_pmu.extra_regs; er->msr; er++) {
- if (er->event != (config & er->config_mask))
- continue;
- if (event->attr.config1 & ~er->valid_mask)
- return -EINVAL;
- /* Check if the extra msrs can be safely accessed*/
- if (!er->extra_msr_access)
- return -ENXIO;
-
- reg->idx = er->idx;
- reg->config = event->attr.config1;
- reg->reg = er->msr;
- break;
- }
- return 0;
-}
-
-static atomic_t active_events;
-static atomic_t pmc_refcount;
-static DEFINE_MUTEX(pmc_reserve_mutex);
-
-#ifdef CONFIG_X86_LOCAL_APIC
-
-static bool reserve_pmc_hardware(void)
-{
- int i;
-
- for (i = 0; i < x86_pmu.num_counters; i++) {
- if (!reserve_perfctr_nmi(x86_pmu_event_addr(i)))
- goto perfctr_fail;
- }
-
- for (i = 0; i < x86_pmu.num_counters; i++) {
- if (!reserve_evntsel_nmi(x86_pmu_config_addr(i)))
- goto eventsel_fail;
- }
-
- return true;
-
-eventsel_fail:
- for (i--; i >= 0; i--)
- release_evntsel_nmi(x86_pmu_config_addr(i));
-
- i = x86_pmu.num_counters;
-
-perfctr_fail:
- for (i--; i >= 0; i--)
- release_perfctr_nmi(x86_pmu_event_addr(i));
-
- return false;
-}
-
-static void release_pmc_hardware(void)
-{
- int i;
-
- for (i = 0; i < x86_pmu.num_counters; i++) {
- release_perfctr_nmi(x86_pmu_event_addr(i));
- release_evntsel_nmi(x86_pmu_config_addr(i));
- }
-}
-
-#else
-
-static bool reserve_pmc_hardware(void) { return true; }
-static void release_pmc_hardware(void) {}
-
-#endif
-
-static bool check_hw_exists(void)
-{
- u64 val, val_fail, val_new= ~0;
- int i, reg, reg_fail, ret = 0;
- int bios_fail = 0;
- int reg_safe = -1;
-
- /*
- * Check to see if the BIOS enabled any of the counters, if so
- * complain and bail.
- */
- for (i = 0; i < x86_pmu.num_counters; i++) {
- reg = x86_pmu_config_addr(i);
- ret = rdmsrl_safe(reg, &val);
- if (ret)
- goto msr_fail;
- if (val & ARCH_PERFMON_EVENTSEL_ENABLE) {
- bios_fail = 1;
- val_fail = val;
- reg_fail = reg;
- } else {
- reg_safe = i;
- }
- }
-
- if (x86_pmu.num_counters_fixed) {
- reg = MSR_ARCH_PERFMON_FIXED_CTR_CTRL;
- ret = rdmsrl_safe(reg, &val);
- if (ret)
- goto msr_fail;
- for (i = 0; i < x86_pmu.num_counters_fixed; i++) {
- if (val & (0x03 << i*4)) {
- bios_fail = 1;
- val_fail = val;
- reg_fail = reg;
- }
- }
- }
-
- /*
- * If all the counters are enabled, the below test will always
- * fail. The tools will also become useless in this scenario.
- * Just fail and disable the hardware counters.
- */
-
- if (reg_safe == -1) {
- reg = reg_safe;
- goto msr_fail;
- }
-
- /*
- * Read the current value, change it and read it back to see if it
- * matches, this is needed to detect certain hardware emulators
- * (qemu/kvm) that don't trap on the MSR access and always return 0s.
- */
- reg = x86_pmu_event_addr(reg_safe);
- if (rdmsrl_safe(reg, &val))
- goto msr_fail;
- val ^= 0xffffUL;
- ret = wrmsrl_safe(reg, val);
- ret |= rdmsrl_safe(reg, &val_new);
- if (ret || val != val_new)
- goto msr_fail;
-
- /*
- * We still allow the PMU driver to operate:
- */
- if (bios_fail) {
- printk(KERN_CONT "Broken BIOS detected, complain to your hardware vendor.\n");
- printk(KERN_ERR FW_BUG "the BIOS has corrupted hw-PMU resources (MSR %x is %Lx)\n", reg_fail, val_fail);
- }
-
- return true;
-
-msr_fail:
- printk(KERN_CONT "Broken PMU hardware detected, using software events only.\n");
- printk("%sFailed to access perfctr msr (MSR %x is %Lx)\n",
- boot_cpu_has(X86_FEATURE_HYPERVISOR) ? KERN_INFO : KERN_ERR,
- reg, val_new);
-
- return false;
-}
-
-static void hw_perf_event_destroy(struct perf_event *event)
-{
- x86_release_hardware();
- atomic_dec(&active_events);
-}
-
-void hw_perf_lbr_event_destroy(struct perf_event *event)
-{
- hw_perf_event_destroy(event);
-
- /* undo the lbr/bts event accounting */
- x86_del_exclusive(x86_lbr_exclusive_lbr);
-}
-
-static inline int x86_pmu_initialized(void)
-{
- return x86_pmu.handle_irq != NULL;
-}
-
-static inline int
-set_ext_hw_attr(struct hw_perf_event *hwc, struct perf_event *event)
-{
- struct perf_event_attr *attr = &event->attr;
- unsigned int cache_type, cache_op, cache_result;
- u64 config, val;
-
- config = attr->config;
-
- cache_type = (config >> 0) & 0xff;
- if (cache_type >= PERF_COUNT_HW_CACHE_MAX)
- return -EINVAL;
-
- cache_op = (config >> 8) & 0xff;
- if (cache_op >= PERF_COUNT_HW_CACHE_OP_MAX)
- return -EINVAL;
-
- cache_result = (config >> 16) & 0xff;
- if (cache_result >= PERF_COUNT_HW_CACHE_RESULT_MAX)
- return -EINVAL;
-
- val = hw_cache_event_ids[cache_type][cache_op][cache_result];
-
- if (val == 0)
- return -ENOENT;
-
- if (val == -1)
- return -EINVAL;
-
- hwc->config |= val;
- attr->config1 = hw_cache_extra_regs[cache_type][cache_op][cache_result];
- return x86_pmu_extra_regs(val, event);
-}
-
-int x86_reserve_hardware(void)
-{
- int err = 0;
-
- if (!atomic_inc_not_zero(&pmc_refcount)) {
- mutex_lock(&pmc_reserve_mutex);
- if (atomic_read(&pmc_refcount) == 0) {
- if (!reserve_pmc_hardware())
- err = -EBUSY;
- else
- reserve_ds_buffers();
- }
- if (!err)
- atomic_inc(&pmc_refcount);
- mutex_unlock(&pmc_reserve_mutex);
- }
-
- return err;
-}
-
-void x86_release_hardware(void)
-{
- if (atomic_dec_and_mutex_lock(&pmc_refcount, &pmc_reserve_mutex)) {
- release_pmc_hardware();
- release_ds_buffers();
- mutex_unlock(&pmc_reserve_mutex);
- }
-}
-
-/*
- * Check if we can create event of a certain type (that no conflicting events
- * are present).
- */
-int x86_add_exclusive(unsigned int what)
-{
- int i;
-
- if (!atomic_inc_not_zero(&x86_pmu.lbr_exclusive[what])) {
- mutex_lock(&pmc_reserve_mutex);
- for (i = 0; i < ARRAY_SIZE(x86_pmu.lbr_exclusive); i++) {
- if (i != what && atomic_read(&x86_pmu.lbr_exclusive[i]))
- goto fail_unlock;
- }
- atomic_inc(&x86_pmu.lbr_exclusive[what]);
- mutex_unlock(&pmc_reserve_mutex);
- }
-
- atomic_inc(&active_events);
- return 0;
-
-fail_unlock:
- mutex_unlock(&pmc_reserve_mutex);
- return -EBUSY;
-}
-
-void x86_del_exclusive(unsigned int what)
-{
- atomic_dec(&x86_pmu.lbr_exclusive[what]);
- atomic_dec(&active_events);
-}
-
-int x86_setup_perfctr(struct perf_event *event)
-{
- struct perf_event_attr *attr = &event->attr;
- struct hw_perf_event *hwc = &event->hw;
- u64 config;
-
- if (!is_sampling_event(event)) {
- hwc->sample_period = x86_pmu.max_period;
- hwc->last_period = hwc->sample_period;
- local64_set(&hwc->period_left, hwc->sample_period);
- }
-
- if (attr->type == PERF_TYPE_RAW)
- return x86_pmu_extra_regs(event->attr.config, event);
-
- if (attr->type == PERF_TYPE_HW_CACHE)
- return set_ext_hw_attr(hwc, event);
-
- if (attr->config >= x86_pmu.max_events)
- return -EINVAL;
-
- /*
- * The generic map:
- */
- config = x86_pmu.event_map(attr->config);
-
- if (config == 0)
- return -ENOENT;
-
- if (config == -1LL)
- return -EINVAL;
-
- /*
- * Branch tracing:
- */
- if (attr->config == PERF_COUNT_HW_BRANCH_INSTRUCTIONS &&
- !attr->freq && hwc->sample_period == 1) {
- /* BTS is not supported by this architecture. */
- if (!x86_pmu.bts_active)
- return -EOPNOTSUPP;
-
- /* BTS is currently only allowed for user-mode. */
- if (!attr->exclude_kernel)
- return -EOPNOTSUPP;
-
- /* disallow bts if conflicting events are present */
- if (x86_add_exclusive(x86_lbr_exclusive_lbr))
- return -EBUSY;
-
- event->destroy = hw_perf_lbr_event_destroy;
- }
-
- hwc->config |= config;
-
- return 0;
-}
-
-/*
- * check that branch_sample_type is compatible with
- * settings needed for precise_ip > 1 which implies
- * using the LBR to capture ALL taken branches at the
- * priv levels of the measurement
- */
-static inline int precise_br_compat(struct perf_event *event)
-{
- u64 m = event->attr.branch_sample_type;
- u64 b = 0;
-
- /* must capture all branches */
- if (!(m & PERF_SAMPLE_BRANCH_ANY))
- return 0;
-
- m &= PERF_SAMPLE_BRANCH_KERNEL | PERF_SAMPLE_BRANCH_USER;
-
- if (!event->attr.exclude_user)
- b |= PERF_SAMPLE_BRANCH_USER;
-
- if (!event->attr.exclude_kernel)
- b |= PERF_SAMPLE_BRANCH_KERNEL;
-
- /*
- * ignore PERF_SAMPLE_BRANCH_HV, not supported on x86
- */
-
- return m == b;
-}
-
-int x86_pmu_hw_config(struct perf_event *event)
-{
- if (event->attr.precise_ip) {
- int precise = 0;
-
- /* Support for constant skid */
- if (x86_pmu.pebs_active && !x86_pmu.pebs_broken) {
- precise++;
-
- /* Support for IP fixup */
- if (x86_pmu.lbr_nr || x86_pmu.intel_cap.pebs_format >= 2)
- precise++;
-
- if (x86_pmu.pebs_prec_dist)
- precise++;
- }
-
- if (event->attr.precise_ip > precise)
- return -EOPNOTSUPP;
- }
- /*
- * check that PEBS LBR correction does not conflict with
- * whatever the user is asking with attr->branch_sample_type
- */
- if (event->attr.precise_ip > 1 && x86_pmu.intel_cap.pebs_format < 2) {
- u64 *br_type = &event->attr.branch_sample_type;
-
- if (has_branch_stack(event)) {
- if (!precise_br_compat(event))
- return -EOPNOTSUPP;
-
- /* branch_sample_type is compatible */
-
- } else {
- /*
- * user did not specify branch_sample_type
- *
- * For PEBS fixups, we capture all
- * the branches at the priv level of the
- * event.
- */
- *br_type = PERF_SAMPLE_BRANCH_ANY;
-
- if (!event->attr.exclude_user)
- *br_type |= PERF_SAMPLE_BRANCH_USER;
-
- if (!event->attr.exclude_kernel)
- *br_type |= PERF_SAMPLE_BRANCH_KERNEL;
- }
- }
-
- if (event->attr.branch_sample_type & PERF_SAMPLE_BRANCH_CALL_STACK)
- event->attach_state |= PERF_ATTACH_TASK_DATA;
-
- /*
- * Generate PMC IRQs:
- * (keep 'enabled' bit clear for now)
- */
- event->hw.config = ARCH_PERFMON_EVENTSEL_INT;
-
- /*
- * Count user and OS events unless requested not to
- */
- if (!event->attr.exclude_user)
- event->hw.config |= ARCH_PERFMON_EVENTSEL_USR;
- if (!event->attr.exclude_kernel)
- event->hw.config |= ARCH_PERFMON_EVENTSEL_OS;
-
- if (event->attr.type == PERF_TYPE_RAW)
- event->hw.config |= event->attr.config & X86_RAW_EVENT_MASK;
-
- if (event->attr.sample_period && x86_pmu.limit_period) {
- if (x86_pmu.limit_period(event, event->attr.sample_period) >
- event->attr.sample_period)
- return -EINVAL;
- }
-
- return x86_setup_perfctr(event);
-}
-
-/*
- * Setup the hardware configuration for a given attr_type
- */
-static int __x86_pmu_event_init(struct perf_event *event)
-{
- int err;
-
- if (!x86_pmu_initialized())
- return -ENODEV;
-
- err = x86_reserve_hardware();
- if (err)
- return err;
-
- atomic_inc(&active_events);
- event->destroy = hw_perf_event_destroy;
-
- event->hw.idx = -1;
- event->hw.last_cpu = -1;
- event->hw.last_tag = ~0ULL;
-
- /* mark unused */
- event->hw.extra_reg.idx = EXTRA_REG_NONE;
- event->hw.branch_reg.idx = EXTRA_REG_NONE;
-
- return x86_pmu.hw_config(event);
-}
-
-void x86_pmu_disable_all(void)
-{
- struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
- int idx;
-
- for (idx = 0; idx < x86_pmu.num_counters; idx++) {
- u64 val;
-
- if (!test_bit(idx, cpuc->active_mask))
- continue;
- rdmsrl(x86_pmu_config_addr(idx), val);
- if (!(val & ARCH_PERFMON_EVENTSEL_ENABLE))
- continue;
- val &= ~ARCH_PERFMON_EVENTSEL_ENABLE;
- wrmsrl(x86_pmu_config_addr(idx), val);
- }
-}
-
-/*
- * There may be PMI landing after enabled=0. The PMI hitting could be before or
- * after disable_all.
- *
- * If PMI hits before disable_all, the PMU will be disabled in the NMI handler.
- * It will not be re-enabled in the NMI handler again, because enabled=0. After
- * handling the NMI, disable_all will be called, which will not change the
- * state either. If PMI hits after disable_all, the PMU is already disabled
- * before entering NMI handler. The NMI handler will not change the state
- * either.
- *
- * So either situation is harmless.
- */
-static void x86_pmu_disable(struct pmu *pmu)
-{
- struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
-
- if (!x86_pmu_initialized())
- return;
-
- if (!cpuc->enabled)
- return;
-
- cpuc->n_added = 0;
- cpuc->enabled = 0;
- barrier();
-
- x86_pmu.disable_all();
-}
-
-void x86_pmu_enable_all(int added)
-{
- struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
- int idx;
-
- for (idx = 0; idx < x86_pmu.num_counters; idx++) {
- struct hw_perf_event *hwc = &cpuc->events[idx]->hw;
-
- if (!test_bit(idx, cpuc->active_mask))
- continue;
-
- __x86_pmu_enable_event(hwc, ARCH_PERFMON_EVENTSEL_ENABLE);
- }
-}
-
-static struct pmu pmu;
-
-static inline int is_x86_event(struct perf_event *event)
-{
- return event->pmu == &pmu;
-}
-
-/*
- * Event scheduler state:
- *
- * Assign events iterating over all events and counters, beginning
- * with events with least weights first. Keep the current iterator
- * state in struct sched_state.
- */
-struct sched_state {
- int weight;
- int event; /* event index */
- int counter; /* counter index */
- int unassigned; /* number of events to be assigned left */
- int nr_gp; /* number of GP counters used */
- unsigned long used[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
-};
-
-/* Total max is X86_PMC_IDX_MAX, but we are O(n!) limited */
-#define SCHED_STATES_MAX 2
-
-struct perf_sched {
- int max_weight;
- int max_events;
- int max_gp;
- int saved_states;
- struct event_constraint **constraints;
- struct sched_state state;
- struct sched_state saved[SCHED_STATES_MAX];
-};
-
-/*
- * Initialize interator that runs through all events and counters.
- */
-static void perf_sched_init(struct perf_sched *sched, struct event_constraint **constraints,
- int num, int wmin, int wmax, int gpmax)
-{
- int idx;
-
- memset(sched, 0, sizeof(*sched));
- sched->max_events = num;
- sched->max_weight = wmax;
- sched->max_gp = gpmax;
- sched->constraints = constraints;
-
- for (idx = 0; idx < num; idx++) {
- if (constraints[idx]->weight == wmin)
- break;
- }
-
- sched->state.event = idx; /* start with min weight */
- sched->state.weight = wmin;
- sched->state.unassigned = num;
-}
-
-static void perf_sched_save_state(struct perf_sched *sched)
-{
- if (WARN_ON_ONCE(sched->saved_states >= SCHED_STATES_MAX))
- return;
-
- sched->saved[sched->saved_states] = sched->state;
- sched->saved_states++;
-}
-
-static bool perf_sched_restore_state(struct perf_sched *sched)
-{
- if (!sched->saved_states)
- return false;
-
- sched->saved_states--;
- sched->state = sched->saved[sched->saved_states];
-
- /* continue with next counter: */
- clear_bit(sched->state.counter++, sched->state.used);
-
- return true;
-}
-
-/*
- * Select a counter for the current event to schedule. Return true on
- * success.
- */
-static bool __perf_sched_find_counter(struct perf_sched *sched)
-{
- struct event_constraint *c;
- int idx;
-
- if (!sched->state.unassigned)
- return false;
-
- if (sched->state.event >= sched->max_events)
- return false;
-
- c = sched->constraints[sched->state.event];
- /* Prefer fixed purpose counters */
- if (c->idxmsk64 & (~0ULL << INTEL_PMC_IDX_FIXED)) {
- idx = INTEL_PMC_IDX_FIXED;
- for_each_set_bit_from(idx, c->idxmsk, X86_PMC_IDX_MAX) {
- if (!__test_and_set_bit(idx, sched->state.used))
- goto done;
- }
- }
-
- /* Grab the first unused counter starting with idx */
- idx = sched->state.counter;
- for_each_set_bit_from(idx, c->idxmsk, INTEL_PMC_IDX_FIXED) {
- if (!__test_and_set_bit(idx, sched->state.used)) {
- if (sched->state.nr_gp++ >= sched->max_gp)
- return false;
-
- goto done;
- }
- }
-
- return false;
-
-done:
- sched->state.counter = idx;
-
- if (c->overlap)
- perf_sched_save_state(sched);
-
- return true;
-}
-
-static bool perf_sched_find_counter(struct perf_sched *sched)
-{
- while (!__perf_sched_find_counter(sched)) {
- if (!perf_sched_restore_state(sched))
- return false;
- }
-
- return true;
-}
-
-/*
- * Go through all unassigned events and find the next one to schedule.
- * Take events with the least weight first. Return true on success.
- */
-static bool perf_sched_next_event(struct perf_sched *sched)
-{
- struct event_constraint *c;
-
- if (!sched->state.unassigned || !--sched->state.unassigned)
- return false;
-
- do {
- /* next event */
- sched->state.event++;
- if (sched->state.event >= sched->max_events) {
- /* next weight */
- sched->state.event = 0;
- sched->state.weight++;
- if (sched->state.weight > sched->max_weight)
- return false;
- }
- c = sched->constraints[sched->state.event];
- } while (c->weight != sched->state.weight);
-
- sched->state.counter = 0; /* start with first counter */
-
- return true;
-}
-
-/*
- * Assign a counter for each event.
- */
-int perf_assign_events(struct event_constraint **constraints, int n,
- int wmin, int wmax, int gpmax, int *assign)
-{
- struct perf_sched sched;
-
- perf_sched_init(&sched, constraints, n, wmin, wmax, gpmax);
-
- do {
- if (!perf_sched_find_counter(&sched))
- break; /* failed */
- if (assign)
- assign[sched.state.event] = sched.state.counter;
- } while (perf_sched_next_event(&sched));
-
- return sched.state.unassigned;
-}
-EXPORT_SYMBOL_GPL(perf_assign_events);
-
-int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign)
-{
- struct event_constraint *c;
- unsigned long used_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
- struct perf_event *e;
- int i, wmin, wmax, unsched = 0;
- struct hw_perf_event *hwc;
-
- bitmap_zero(used_mask, X86_PMC_IDX_MAX);
-
- if (x86_pmu.start_scheduling)
- x86_pmu.start_scheduling(cpuc);
-
- for (i = 0, wmin = X86_PMC_IDX_MAX, wmax = 0; i < n; i++) {
- cpuc->event_constraint[i] = NULL;
- c = x86_pmu.get_event_constraints(cpuc, i, cpuc->event_list[i]);
- cpuc->event_constraint[i] = c;
-
- wmin = min(wmin, c->weight);
- wmax = max(wmax, c->weight);
- }
-
- /*
- * fastpath, try to reuse previous register
- */
- for (i = 0; i < n; i++) {
- hwc = &cpuc->event_list[i]->hw;
- c = cpuc->event_constraint[i];
-
- /* never assigned */
- if (hwc->idx == -1)
- break;
-
- /* constraint still honored */
- if (!test_bit(hwc->idx, c->idxmsk))
- break;
-
- /* not already used */
- if (test_bit(hwc->idx, used_mask))
- break;
-
- __set_bit(hwc->idx, used_mask);
- if (assign)
- assign[i] = hwc->idx;
- }
-
- /* slow path */
- if (i != n) {
- int gpmax = x86_pmu.num_counters;
-
- /*
- * Do not allow scheduling of more than half the available
- * generic counters.
- *
- * This helps avoid counter starvation of sibling thread by
- * ensuring at most half the counters cannot be in exclusive
- * mode. There is no designated counters for the limits. Any
- * N/2 counters can be used. This helps with events with
- * specific counter constraints.
- */
- if (is_ht_workaround_enabled() && !cpuc->is_fake &&
- READ_ONCE(cpuc->excl_cntrs->exclusive_present))
- gpmax /= 2;
-
- unsched = perf_assign_events(cpuc->event_constraint, n, wmin,
- wmax, gpmax, assign);
- }
-
- /*
- * In case of success (unsched = 0), mark events as committed,
- * so we do not put_constraint() in case new events are added
- * and fail to be scheduled
- *
- * We invoke the lower level commit callback to lock the resource
- *
- * We do not need to do all of this in case we are called to
- * validate an event group (assign == NULL)
- */
- if (!unsched && assign) {
- for (i = 0; i < n; i++) {
- e = cpuc->event_list[i];
- e->hw.flags |= PERF_X86_EVENT_COMMITTED;
- if (x86_pmu.commit_scheduling)
- x86_pmu.commit_scheduling(cpuc, i, assign[i]);
- }
- } else {
- for (i = 0; i < n; i++) {
- e = cpuc->event_list[i];
- /*
- * do not put_constraint() on comitted events,
- * because they are good to go
- */
- if ((e->hw.flags & PERF_X86_EVENT_COMMITTED))
- continue;
-
- /*
- * release events that failed scheduling
- */
- if (x86_pmu.put_event_constraints)
- x86_pmu.put_event_constraints(cpuc, e);
- }
- }
-
- if (x86_pmu.stop_scheduling)
- x86_pmu.stop_scheduling(cpuc);
-
- return unsched ? -EINVAL : 0;
-}
-
-/*
- * dogrp: true if must collect siblings events (group)
- * returns total number of events and error code
- */
-static int collect_events(struct cpu_hw_events *cpuc, struct perf_event *leader, bool dogrp)
-{
- struct perf_event *event;
- int n, max_count;
-
- max_count = x86_pmu.num_counters + x86_pmu.num_counters_fixed;
-
- /* current number of events already accepted */
- n = cpuc->n_events;
-
- if (is_x86_event(leader)) {
- if (n >= max_count)
- return -EINVAL;
- cpuc->event_list[n] = leader;
- n++;
- }
- if (!dogrp)
- return n;
-
- list_for_each_entry(event, &leader->sibling_list, group_entry) {
- if (!is_x86_event(event) ||
- event->state <= PERF_EVENT_STATE_OFF)
- continue;
-
- if (n >= max_count)
- return -EINVAL;
-
- cpuc->event_list[n] = event;
- n++;
- }
- return n;
-}
-
-static inline void x86_assign_hw_event(struct perf_event *event,
- struct cpu_hw_events *cpuc, int i)
-{
- struct hw_perf_event *hwc = &event->hw;
-
- hwc->idx = cpuc->assign[i];
- hwc->last_cpu = smp_processor_id();
- hwc->last_tag = ++cpuc->tags[i];
-
- if (hwc->idx == INTEL_PMC_IDX_FIXED_BTS) {
- hwc->config_base = 0;
- hwc->event_base = 0;
- } else if (hwc->idx >= INTEL_PMC_IDX_FIXED) {
- hwc->config_base = MSR_ARCH_PERFMON_FIXED_CTR_CTRL;
- hwc->event_base = MSR_ARCH_PERFMON_FIXED_CTR0 + (hwc->idx - INTEL_PMC_IDX_FIXED);
- hwc->event_base_rdpmc = (hwc->idx - INTEL_PMC_IDX_FIXED) | 1<<30;
- } else {
- hwc->config_base = x86_pmu_config_addr(hwc->idx);
- hwc->event_base = x86_pmu_event_addr(hwc->idx);
- hwc->event_base_rdpmc = x86_pmu_rdpmc_index(hwc->idx);
- }
-}
-
-static inline int match_prev_assignment(struct hw_perf_event *hwc,
- struct cpu_hw_events *cpuc,
- int i)
-{
- return hwc->idx == cpuc->assign[i] &&
- hwc->last_cpu == smp_processor_id() &&
- hwc->last_tag == cpuc->tags[i];
-}
-
-static void x86_pmu_start(struct perf_event *event, int flags);
-
-static void x86_pmu_enable(struct pmu *pmu)
-{
- struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
- struct perf_event *event;
- struct hw_perf_event *hwc;
- int i, added = cpuc->n_added;
-
- if (!x86_pmu_initialized())
- return;
-
- if (cpuc->enabled)
- return;
-
- if (cpuc->n_added) {
- int n_running = cpuc->n_events - cpuc->n_added;
- /*
- * apply assignment obtained either from
- * hw_perf_group_sched_in() or x86_pmu_enable()
- *
- * step1: save events moving to new counters
- */
- for (i = 0; i < n_running; i++) {
- event = cpuc->event_list[i];
- hwc = &event->hw;
-
- /*
- * we can avoid reprogramming counter if:
- * - assigned same counter as last time
- * - running on same CPU as last time
- * - no other event has used the counter since
- */
- if (hwc->idx == -1 ||
- match_prev_assignment(hwc, cpuc, i))
- continue;
-
- /*
- * Ensure we don't accidentally enable a stopped
- * counter simply because we rescheduled.
- */
- if (hwc->state & PERF_HES_STOPPED)
- hwc->state |= PERF_HES_ARCH;
-
- x86_pmu_stop(event, PERF_EF_UPDATE);
- }
-
- /*
- * step2: reprogram moved events into new counters
- */
- for (i = 0; i < cpuc->n_events; i++) {
- event = cpuc->event_list[i];
- hwc = &event->hw;
-
- if (!match_prev_assignment(hwc, cpuc, i))
- x86_assign_hw_event(event, cpuc, i);
- else if (i < n_running)
- continue;
-
- if (hwc->state & PERF_HES_ARCH)
- continue;
-
- x86_pmu_start(event, PERF_EF_RELOAD);
- }
- cpuc->n_added = 0;
- perf_events_lapic_init();
- }
-
- cpuc->enabled = 1;
- barrier();
-
- x86_pmu.enable_all(added);
-}
-
-static DEFINE_PER_CPU(u64 [X86_PMC_IDX_MAX], pmc_prev_left);
-
-/*
- * Set the next IRQ period, based on the hwc->period_left value.
- * To be called with the event disabled in hw:
- */
-int x86_perf_event_set_period(struct perf_event *event)
-{
- struct hw_perf_event *hwc = &event->hw;
- s64 left = local64_read(&hwc->period_left);
- s64 period = hwc->sample_period;
- int ret = 0, idx = hwc->idx;
-
- if (idx == INTEL_PMC_IDX_FIXED_BTS)
- return 0;
-
- /*
- * If we are way outside a reasonable range then just skip forward:
- */
- if (unlikely(left <= -period)) {
- left = period;
- local64_set(&hwc->period_left, left);
- hwc->last_period = period;
- ret = 1;
- }
-
- if (unlikely(left <= 0)) {
- left += period;
- local64_set(&hwc->period_left, left);
- hwc->last_period = period;
- ret = 1;
- }
- /*
- * Quirk: certain CPUs dont like it if just 1 hw_event is left:
- */
- if (unlikely(left < 2))
- left = 2;
-
- if (left > x86_pmu.max_period)
- left = x86_pmu.max_period;
-
- if (x86_pmu.limit_period)
- left = x86_pmu.limit_period(event, left);
-
- per_cpu(pmc_prev_left[idx], smp_processor_id()) = left;
-
- if (!(hwc->flags & PERF_X86_EVENT_AUTO_RELOAD) ||
- local64_read(&hwc->prev_count) != (u64)-left) {
- /*
- * The hw event starts counting from this event offset,
- * mark it to be able to extra future deltas:
- */
- local64_set(&hwc->prev_count, (u64)-left);
-
- wrmsrl(hwc->event_base, (u64)(-left) & x86_pmu.cntval_mask);
- }
-
- /*
- * Due to erratum on certan cpu we need
- * a second write to be sure the register
- * is updated properly
- */
- if (x86_pmu.perfctr_second_write) {
- wrmsrl(hwc->event_base,
- (u64)(-left) & x86_pmu.cntval_mask);
- }
-
- perf_event_update_userpage(event);
-
- return ret;
-}
-
-void x86_pmu_enable_event(struct perf_event *event)
-{
- if (__this_cpu_read(cpu_hw_events.enabled))
- __x86_pmu_enable_event(&event->hw,
- ARCH_PERFMON_EVENTSEL_ENABLE);
-}
-
-/*
- * Add a single event to the PMU.
- *
- * The event is added to the group of enabled events
- * but only if it can be scehduled with existing events.
- */
-static int x86_pmu_add(struct perf_event *event, int flags)
-{
- struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
- struct hw_perf_event *hwc;
- int assign[X86_PMC_IDX_MAX];
- int n, n0, ret;
-
- hwc = &event->hw;
-
- n0 = cpuc->n_events;
- ret = n = collect_events(cpuc, event, false);
- if (ret < 0)
- goto out;
-
- hwc->state = PERF_HES_UPTODATE | PERF_HES_STOPPED;
- if (!(flags & PERF_EF_START))
- hwc->state |= PERF_HES_ARCH;
-
- /*
- * If group events scheduling transaction was started,
- * skip the schedulability test here, it will be performed
- * at commit time (->commit_txn) as a whole.
- */
- if (cpuc->txn_flags & PERF_PMU_TXN_ADD)
- goto done_collect;
-
- ret = x86_pmu.schedule_events(cpuc, n, assign);
- if (ret)
- goto out;
- /*
- * copy new assignment, now we know it is possible
- * will be used by hw_perf_enable()
- */
- memcpy(cpuc->assign, assign, n*sizeof(int));
-
-done_collect:
- /*
- * Commit the collect_events() state. See x86_pmu_del() and
- * x86_pmu_*_txn().
- */
- cpuc->n_events = n;
- cpuc->n_added += n - n0;
- cpuc->n_txn += n - n0;
-
- ret = 0;
-out:
- return ret;
-}
-
-static void x86_pmu_start(struct perf_event *event, int flags)
-{
- struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
- int idx = event->hw.idx;
-
- if (WARN_ON_ONCE(!(event->hw.state & PERF_HES_STOPPED)))
- return;
-
- if (WARN_ON_ONCE(idx == -1))
- return;
-
- if (flags & PERF_EF_RELOAD) {
- WARN_ON_ONCE(!(event->hw.state & PERF_HES_UPTODATE));
- x86_perf_event_set_period(event);
- }
-
- event->hw.state = 0;
-
- cpuc->events[idx] = event;
- __set_bit(idx, cpuc->active_mask);
- __set_bit(idx, cpuc->running);
- x86_pmu.enable(event);
- perf_event_update_userpage(event);
-}
-
-void perf_event_print_debug(void)
-{
- u64 ctrl, status, overflow, pmc_ctrl, pmc_count, prev_left, fixed;
- u64 pebs, debugctl;
- struct cpu_hw_events *cpuc;
- unsigned long flags;
- int cpu, idx;
-
- if (!x86_pmu.num_counters)
- return;
-
- local_irq_save(flags);
-
- cpu = smp_processor_id();
- cpuc = &per_cpu(cpu_hw_events, cpu);
-
- if (x86_pmu.version >= 2) {
- rdmsrl(MSR_CORE_PERF_GLOBAL_CTRL, ctrl);
- rdmsrl(MSR_CORE_PERF_GLOBAL_STATUS, status);
- rdmsrl(MSR_CORE_PERF_GLOBAL_OVF_CTRL, overflow);
- rdmsrl(MSR_ARCH_PERFMON_FIXED_CTR_CTRL, fixed);
-
- pr_info("\n");
- pr_info("CPU#%d: ctrl: %016llx\n", cpu, ctrl);
- pr_info("CPU#%d: status: %016llx\n", cpu, status);
- pr_info("CPU#%d: overflow: %016llx\n", cpu, overflow);
- pr_info("CPU#%d: fixed: %016llx\n", cpu, fixed);
- if (x86_pmu.pebs_constraints) {
- rdmsrl(MSR_IA32_PEBS_ENABLE, pebs);
- pr_info("CPU#%d: pebs: %016llx\n", cpu, pebs);
- }
- if (x86_pmu.lbr_nr) {
- rdmsrl(MSR_IA32_DEBUGCTLMSR, debugctl);
- pr_info("CPU#%d: debugctl: %016llx\n", cpu, debugctl);
- }
- }
- pr_info("CPU#%d: active: %016llx\n", cpu, *(u64 *)cpuc->active_mask);
-
- for (idx = 0; idx < x86_pmu.num_counters; idx++) {
- rdmsrl(x86_pmu_config_addr(idx), pmc_ctrl);
- rdmsrl(x86_pmu_event_addr(idx), pmc_count);
-
- prev_left = per_cpu(pmc_prev_left[idx], cpu);
-
- pr_info("CPU#%d: gen-PMC%d ctrl: %016llx\n",
- cpu, idx, pmc_ctrl);
- pr_info("CPU#%d: gen-PMC%d count: %016llx\n",
- cpu, idx, pmc_count);
- pr_info("CPU#%d: gen-PMC%d left: %016llx\n",
- cpu, idx, prev_left);
- }
- for (idx = 0; idx < x86_pmu.num_counters_fixed; idx++) {
- rdmsrl(MSR_ARCH_PERFMON_FIXED_CTR0 + idx, pmc_count);
-
- pr_info("CPU#%d: fixed-PMC%d count: %016llx\n",
- cpu, idx, pmc_count);
- }
- local_irq_restore(flags);
-}
-
-void x86_pmu_stop(struct perf_event *event, int flags)
-{
- struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
- struct hw_perf_event *hwc = &event->hw;
-
- if (__test_and_clear_bit(hwc->idx, cpuc->active_mask)) {
- x86_pmu.disable(event);
- cpuc->events[hwc->idx] = NULL;
- WARN_ON_ONCE(hwc->state & PERF_HES_STOPPED);
- hwc->state |= PERF_HES_STOPPED;
- }
-
- if ((flags & PERF_EF_UPDATE) && !(hwc->state & PERF_HES_UPTODATE)) {
- /*
- * Drain the remaining delta count out of a event
- * that we are disabling:
- */
- x86_perf_event_update(event);
- hwc->state |= PERF_HES_UPTODATE;
- }
-}
-
-static void x86_pmu_del(struct perf_event *event, int flags)
-{
- struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
- int i;
-
- /*
- * event is descheduled
- */
- event->hw.flags &= ~PERF_X86_EVENT_COMMITTED;
-
- /*
- * If we're called during a txn, we don't need to do anything.
- * The events never got scheduled and ->cancel_txn will truncate
- * the event_list.
- *
- * XXX assumes any ->del() called during a TXN will only be on
- * an event added during that same TXN.
- */
- if (cpuc->txn_flags & PERF_PMU_TXN_ADD)
- return;
-
- /*
- * Not a TXN, therefore cleanup properly.
- */
- x86_pmu_stop(event, PERF_EF_UPDATE);
-
- for (i = 0; i < cpuc->n_events; i++) {
- if (event == cpuc->event_list[i])
- break;
- }
-
- if (WARN_ON_ONCE(i == cpuc->n_events)) /* called ->del() without ->add() ? */
- return;
-
- /* If we have a newly added event; make sure to decrease n_added. */
- if (i >= cpuc->n_events - cpuc->n_added)
- --cpuc->n_added;
-
- if (x86_pmu.put_event_constraints)
- x86_pmu.put_event_constraints(cpuc, event);
-
- /* Delete the array entry. */
- while (++i < cpuc->n_events) {
- cpuc->event_list[i-1] = cpuc->event_list[i];
- cpuc->event_constraint[i-1] = cpuc->event_constraint[i];
- }
- --cpuc->n_events;
-
- perf_event_update_userpage(event);
-}
-
-int x86_pmu_handle_irq(struct pt_regs *regs)
-{
- struct perf_sample_data data;
- struct cpu_hw_events *cpuc;
- struct perf_event *event;
- int idx, handled = 0;
- u64 val;
-
- cpuc = this_cpu_ptr(&cpu_hw_events);
-
- /*
- * Some chipsets need to unmask the LVTPC in a particular spot
- * inside the nmi handler. As a result, the unmasking was pushed
- * into all the nmi handlers.
- *
- * This generic handler doesn't seem to have any issues where the
- * unmasking occurs so it was left at the top.
- */
- apic_write(APIC_LVTPC, APIC_DM_NMI);
-
- for (idx = 0; idx < x86_pmu.num_counters; idx++) {
- if (!test_bit(idx, cpuc->active_mask)) {
- /*
- * Though we deactivated the counter some cpus
- * might still deliver spurious interrupts still
- * in flight. Catch them:
- */
- if (__test_and_clear_bit(idx, cpuc->running))
- handled++;
- continue;
- }
-
- event = cpuc->events[idx];
-
- val = x86_perf_event_update(event);
- if (val & (1ULL << (x86_pmu.cntval_bits - 1)))
- continue;
-
- /*
- * event overflow
- */
- handled++;
- perf_sample_data_init(&data, 0, event->hw.last_period);
-
- if (!x86_perf_event_set_period(event))
- continue;
-
- if (perf_event_overflow(event, &data, regs))
- x86_pmu_stop(event, 0);
- }
-
- if (handled)
- inc_irq_stat(apic_perf_irqs);
-
- return handled;
-}
-
-void perf_events_lapic_init(void)
-{
- if (!x86_pmu.apic || !x86_pmu_initialized())
- return;
-
- /*
- * Always use NMI for PMU
- */
- apic_write(APIC_LVTPC, APIC_DM_NMI);
-}
-
-static int
-perf_event_nmi_handler(unsigned int cmd, struct pt_regs *regs)
-{
- u64 start_clock;
- u64 finish_clock;
- int ret;
-
- /*
- * All PMUs/events that share this PMI handler should make sure to
- * increment active_events for their events.
- */
- if (!atomic_read(&active_events))
- return NMI_DONE;
-
- start_clock = sched_clock();
- ret = x86_pmu.handle_irq(regs);
- finish_clock = sched_clock();
-
- perf_sample_event_took(finish_clock - start_clock);
-
- return ret;
-}
-NOKPROBE_SYMBOL(perf_event_nmi_handler);
-
-struct event_constraint emptyconstraint;
-struct event_constraint unconstrained;
-
-static int
-x86_pmu_notifier(struct notifier_block *self, unsigned long action, void *hcpu)
-{
- unsigned int cpu = (long)hcpu;
- struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu);
- int i, ret = NOTIFY_OK;
-
- switch (action & ~CPU_TASKS_FROZEN) {
- case CPU_UP_PREPARE:
- for (i = 0 ; i < X86_PERF_KFREE_MAX; i++)
- cpuc->kfree_on_online[i] = NULL;
- if (x86_pmu.cpu_prepare)
- ret = x86_pmu.cpu_prepare(cpu);
- break;
-
- case CPU_STARTING:
- if (x86_pmu.cpu_starting)
- x86_pmu.cpu_starting(cpu);
- break;
-
- case CPU_ONLINE:
- for (i = 0 ; i < X86_PERF_KFREE_MAX; i++) {
- kfree(cpuc->kfree_on_online[i]);
- cpuc->kfree_on_online[i] = NULL;
- }
- break;
-
- case CPU_DYING:
- if (x86_pmu.cpu_dying)
- x86_pmu.cpu_dying(cpu);
- break;
-
- case CPU_UP_CANCELED:
- case CPU_DEAD:
- if (x86_pmu.cpu_dead)
- x86_pmu.cpu_dead(cpu);
- break;
-
- default:
- break;
- }
-
- return ret;
-}
-
-static void __init pmu_check_apic(void)
-{
- if (cpu_has_apic)
- return;
-
- x86_pmu.apic = 0;
- pr_info("no APIC, boot with the \"lapic\" boot parameter to force-enable it.\n");
- pr_info("no hardware sampling interrupt available.\n");
-
- /*
- * If we have a PMU initialized but no APIC
- * interrupts, we cannot sample hardware
- * events (user-space has to fall back and
- * sample via a hrtimer based software event):
- */
- pmu.capabilities |= PERF_PMU_CAP_NO_INTERRUPT;
-
-}
-
-static struct attribute_group x86_pmu_format_group = {
- .name = "format",
- .attrs = NULL,
-};
-
-/*
- * Remove all undefined events (x86_pmu.event_map(id) == 0)
- * out of events_attr attributes.
- */
-static void __init filter_events(struct attribute **attrs)
-{
- struct device_attribute *d;
- struct perf_pmu_events_attr *pmu_attr;
- int offset = 0;
- int i, j;
-
- for (i = 0; attrs[i]; i++) {
- d = (struct device_attribute *)attrs[i];
- pmu_attr = container_of(d, struct perf_pmu_events_attr, attr);
- /* str trumps id */
- if (pmu_attr->event_str)
- continue;
- if (x86_pmu.event_map(i + offset))
- continue;
-
- for (j = i; attrs[j]; j++)
- attrs[j] = attrs[j + 1];
-
- /* Check the shifted attr. */
- i--;
-
- /*
- * event_map() is index based, the attrs array is organized
- * by increasing event index. If we shift the events, then
- * we need to compensate for the event_map(), otherwise
- * we are looking up the wrong event in the map
- */
- offset++;
- }
-}
-
-/* Merge two pointer arrays */
-__init struct attribute **merge_attr(struct attribute **a, struct attribute **b)
-{
- struct attribute **new;
- int j, i;
-
- for (j = 0; a[j]; j++)
- ;
- for (i = 0; b[i]; i++)
- j++;
- j++;
-
- new = kmalloc(sizeof(struct attribute *) * j, GFP_KERNEL);
- if (!new)
- return NULL;
-
- j = 0;
- for (i = 0; a[i]; i++)
- new[j++] = a[i];
- for (i = 0; b[i]; i++)
- new[j++] = b[i];
- new[j] = NULL;
-
- return new;
-}
-
-ssize_t events_sysfs_show(struct device *dev, struct device_attribute *attr,
- char *page)
-{
- struct perf_pmu_events_attr *pmu_attr = \
- container_of(attr, struct perf_pmu_events_attr, attr);
- u64 config = x86_pmu.event_map(pmu_attr->id);
-
- /* string trumps id */
- if (pmu_attr->event_str)
- return sprintf(page, "%s", pmu_attr->event_str);
-
- return x86_pmu.events_sysfs_show(page, config);
-}
-
-EVENT_ATTR(cpu-cycles, CPU_CYCLES );
-EVENT_ATTR(instructions, INSTRUCTIONS );
-EVENT_ATTR(cache-references, CACHE_REFERENCES );
-EVENT_ATTR(cache-misses, CACHE_MISSES );
-EVENT_ATTR(branch-instructions, BRANCH_INSTRUCTIONS );
-EVENT_ATTR(branch-misses, BRANCH_MISSES );
-EVENT_ATTR(bus-cycles, BUS_CYCLES );
-EVENT_ATTR(stalled-cycles-frontend, STALLED_CYCLES_FRONTEND );
-EVENT_ATTR(stalled-cycles-backend, STALLED_CYCLES_BACKEND );
-EVENT_ATTR(ref-cycles, REF_CPU_CYCLES );
-
-static struct attribute *empty_attrs;
-
-static struct attribute *events_attr[] = {
- EVENT_PTR(CPU_CYCLES),
- EVENT_PTR(INSTRUCTIONS),
- EVENT_PTR(CACHE_REFERENCES),
- EVENT_PTR(CACHE_MISSES),
- EVENT_PTR(BRANCH_INSTRUCTIONS),
- EVENT_PTR(BRANCH_MISSES),
- EVENT_PTR(BUS_CYCLES),
- EVENT_PTR(STALLED_CYCLES_FRONTEND),
- EVENT_PTR(STALLED_CYCLES_BACKEND),
- EVENT_PTR(REF_CPU_CYCLES),
- NULL,
-};
-
-static struct attribute_group x86_pmu_events_group = {
- .name = "events",
- .attrs = events_attr,
-};
-
-ssize_t x86_event_sysfs_show(char *page, u64 config, u64 event)
-{
- u64 umask = (config & ARCH_PERFMON_EVENTSEL_UMASK) >> 8;
- u64 cmask = (config & ARCH_PERFMON_EVENTSEL_CMASK) >> 24;
- bool edge = (config & ARCH_PERFMON_EVENTSEL_EDGE);
- bool pc = (config & ARCH_PERFMON_EVENTSEL_PIN_CONTROL);
- bool any = (config & ARCH_PERFMON_EVENTSEL_ANY);
- bool inv = (config & ARCH_PERFMON_EVENTSEL_INV);
- ssize_t ret;
-
- /*
- * We have whole page size to spend and just little data
- * to write, so we can safely use sprintf.
- */
- ret = sprintf(page, "event=0x%02llx", event);
-
- if (umask)
- ret += sprintf(page + ret, ",umask=0x%02llx", umask);
-
- if (edge)
- ret += sprintf(page + ret, ",edge");
-
- if (pc)
- ret += sprintf(page + ret, ",pc");
-
- if (any)
- ret += sprintf(page + ret, ",any");
-
- if (inv)
- ret += sprintf(page + ret, ",inv");
-
- if (cmask)
- ret += sprintf(page + ret, ",cmask=0x%02llx", cmask);
-
- ret += sprintf(page + ret, "\n");
-
- return ret;
-}
-
-static int __init init_hw_perf_events(void)
-{
- struct x86_pmu_quirk *quirk;
- int err;
-
- pr_info("Performance Events: ");
-
- switch (boot_cpu_data.x86_vendor) {
- case X86_VENDOR_INTEL:
- err = intel_pmu_init();
- break;
- case X86_VENDOR_AMD:
- err = amd_pmu_init();
- break;
- default:
- err = -ENOTSUPP;
- }
- if (err != 0) {
- pr_cont("no PMU driver, software events only.\n");
- return 0;
- }
-
- pmu_check_apic();
-
- /* sanity check that the hardware exists or is emulated */
- if (!check_hw_exists())
- return 0;
-
- pr_cont("%s PMU driver.\n", x86_pmu.name);
-
- x86_pmu.attr_rdpmc = 1; /* enable userspace RDPMC usage by default */
-
- for (quirk = x86_pmu.quirks; quirk; quirk = quirk->next)
- quirk->func();
-
- if (!x86_pmu.intel_ctrl)
- x86_pmu.intel_ctrl = (1 << x86_pmu.num_counters) - 1;
-
- perf_events_lapic_init();
- register_nmi_handler(NMI_LOCAL, perf_event_nmi_handler, 0, "PMI");
-
- unconstrained = (struct event_constraint)
- __EVENT_CONSTRAINT(0, (1ULL << x86_pmu.num_counters) - 1,
- 0, x86_pmu.num_counters, 0, 0);
-
- x86_pmu_format_group.attrs = x86_pmu.format_attrs;
-
- if (x86_pmu.event_attrs)
- x86_pmu_events_group.attrs = x86_pmu.event_attrs;
-
- if (!x86_pmu.events_sysfs_show)
- x86_pmu_events_group.attrs = &empty_attrs;
- else
- filter_events(x86_pmu_events_group.attrs);
-
- if (x86_pmu.cpu_events) {
- struct attribute **tmp;
-
- tmp = merge_attr(x86_pmu_events_group.attrs, x86_pmu.cpu_events);
- if (!WARN_ON(!tmp))
- x86_pmu_events_group.attrs = tmp;
- }
-
- pr_info("... version: %d\n", x86_pmu.version);
- pr_info("... bit width: %d\n", x86_pmu.cntval_bits);
- pr_info("... generic registers: %d\n", x86_pmu.num_counters);
- pr_info("... value mask: %016Lx\n", x86_pmu.cntval_mask);
- pr_info("... max period: %016Lx\n", x86_pmu.max_period);
- pr_info("... fixed-purpose events: %d\n", x86_pmu.num_counters_fixed);
- pr_info("... event mask: %016Lx\n", x86_pmu.intel_ctrl);
-
- perf_pmu_register(&pmu, "cpu", PERF_TYPE_RAW);
- perf_cpu_notifier(x86_pmu_notifier);
-
- return 0;
-}
-early_initcall(init_hw_perf_events);
-
-static inline void x86_pmu_read(struct perf_event *event)
-{
- x86_perf_event_update(event);
-}
-
-/*
- * Start group events scheduling transaction
- * Set the flag to make pmu::enable() not perform the
- * schedulability test, it will be performed at commit time
- *
- * We only support PERF_PMU_TXN_ADD transactions. Save the
- * transaction flags but otherwise ignore non-PERF_PMU_TXN_ADD
- * transactions.
- */
-static void x86_pmu_start_txn(struct pmu *pmu, unsigned int txn_flags)
-{
- struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
-
- WARN_ON_ONCE(cpuc->txn_flags); /* txn already in flight */
-
- cpuc->txn_flags = txn_flags;
- if (txn_flags & ~PERF_PMU_TXN_ADD)
- return;
-
- perf_pmu_disable(pmu);
- __this_cpu_write(cpu_hw_events.n_txn, 0);
-}
-
-/*
- * Stop group events scheduling transaction
- * Clear the flag and pmu::enable() will perform the
- * schedulability test.
- */
-static void x86_pmu_cancel_txn(struct pmu *pmu)
-{
- unsigned int txn_flags;
- struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
-
- WARN_ON_ONCE(!cpuc->txn_flags); /* no txn in flight */
-
- txn_flags = cpuc->txn_flags;
- cpuc->txn_flags = 0;
- if (txn_flags & ~PERF_PMU_TXN_ADD)
- return;
-
- /*
- * Truncate collected array by the number of events added in this
- * transaction. See x86_pmu_add() and x86_pmu_*_txn().
- */
- __this_cpu_sub(cpu_hw_events.n_added, __this_cpu_read(cpu_hw_events.n_txn));
- __this_cpu_sub(cpu_hw_events.n_events, __this_cpu_read(cpu_hw_events.n_txn));
- perf_pmu_enable(pmu);
-}
-
-/*
- * Commit group events scheduling transaction
- * Perform the group schedulability test as a whole
- * Return 0 if success
- *
- * Does not cancel the transaction on failure; expects the caller to do this.
- */
-static int x86_pmu_commit_txn(struct pmu *pmu)
-{
- struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
- int assign[X86_PMC_IDX_MAX];
- int n, ret;
-
- WARN_ON_ONCE(!cpuc->txn_flags); /* no txn in flight */
-
- if (cpuc->txn_flags & ~PERF_PMU_TXN_ADD) {
- cpuc->txn_flags = 0;
- return 0;
- }
-
- n = cpuc->n_events;
-
- if (!x86_pmu_initialized())
- return -EAGAIN;
-
- ret = x86_pmu.schedule_events(cpuc, n, assign);
- if (ret)
- return ret;
-
- /*
- * copy new assignment, now we know it is possible
- * will be used by hw_perf_enable()
- */
- memcpy(cpuc->assign, assign, n*sizeof(int));
-
- cpuc->txn_flags = 0;
- perf_pmu_enable(pmu);
- return 0;
-}
-/*
- * a fake_cpuc is used to validate event groups. Due to
- * the extra reg logic, we need to also allocate a fake
- * per_core and per_cpu structure. Otherwise, group events
- * using extra reg may conflict without the kernel being
- * able to catch this when the last event gets added to
- * the group.
- */
-static void free_fake_cpuc(struct cpu_hw_events *cpuc)
-{
- kfree(cpuc->shared_regs);
- kfree(cpuc);
-}
-
-static struct cpu_hw_events *allocate_fake_cpuc(void)
-{
- struct cpu_hw_events *cpuc;
- int cpu = raw_smp_processor_id();
-
- cpuc = kzalloc(sizeof(*cpuc), GFP_KERNEL);
- if (!cpuc)
- return ERR_PTR(-ENOMEM);
-
- /* only needed, if we have extra_regs */
- if (x86_pmu.extra_regs) {
- cpuc->shared_regs = allocate_shared_regs(cpu);
- if (!cpuc->shared_regs)
- goto error;
- }
- cpuc->is_fake = 1;
- return cpuc;
-error:
- free_fake_cpuc(cpuc);
- return ERR_PTR(-ENOMEM);
-}
-
-/*
- * validate that we can schedule this event
- */
-static int validate_event(struct perf_event *event)
-{
- struct cpu_hw_events *fake_cpuc;
- struct event_constraint *c;
- int ret = 0;
-
- fake_cpuc = allocate_fake_cpuc();
- if (IS_ERR(fake_cpuc))
- return PTR_ERR(fake_cpuc);
-
- c = x86_pmu.get_event_constraints(fake_cpuc, -1, event);
-
- if (!c || !c->weight)
- ret = -EINVAL;
-
- if (x86_pmu.put_event_constraints)
- x86_pmu.put_event_constraints(fake_cpuc, event);
-
- free_fake_cpuc(fake_cpuc);
-
- return ret;
-}
-
-/*
- * validate a single event group
- *
- * validation include:
- * - check events are compatible which each other
- * - events do not compete for the same counter
- * - number of events <= number of counters
- *
- * validation ensures the group can be loaded onto the
- * PMU if it was the only group available.
- */
-static int validate_group(struct perf_event *event)
-{
- struct perf_event *leader = event->group_leader;
- struct cpu_hw_events *fake_cpuc;
- int ret = -EINVAL, n;
-
- fake_cpuc = allocate_fake_cpuc();
- if (IS_ERR(fake_cpuc))
- return PTR_ERR(fake_cpuc);
- /*
- * the event is not yet connected with its
- * siblings therefore we must first collect
- * existing siblings, then add the new event
- * before we can simulate the scheduling
- */
- n = collect_events(fake_cpuc, leader, true);
- if (n < 0)
- goto out;
-
- fake_cpuc->n_events = n;
- n = collect_events(fake_cpuc, event, false);
- if (n < 0)
- goto out;
-
- fake_cpuc->n_events = n;
-
- ret = x86_pmu.schedule_events(fake_cpuc, n, NULL);
-
-out:
- free_fake_cpuc(fake_cpuc);
- return ret;
-}
-
-static int x86_pmu_event_init(struct perf_event *event)
-{
- struct pmu *tmp;
- int err;
-
- switch (event->attr.type) {
- case PERF_TYPE_RAW:
- case PERF_TYPE_HARDWARE:
- case PERF_TYPE_HW_CACHE:
- break;
-
- default:
- return -ENOENT;
- }
-
- err = __x86_pmu_event_init(event);
- if (!err) {
- /*
- * we temporarily connect event to its pmu
- * such that validate_group() can classify
- * it as an x86 event using is_x86_event()
- */
- tmp = event->pmu;
- event->pmu = &pmu;
-
- if (event->group_leader != event)
- err = validate_group(event);
- else
- err = validate_event(event);
-
- event->pmu = tmp;
- }
- if (err) {
- if (event->destroy)
- event->destroy(event);
- }
-
- if (ACCESS_ONCE(x86_pmu.attr_rdpmc))
- event->hw.flags |= PERF_X86_EVENT_RDPMC_ALLOWED;
-
- return err;
-}
-
-static void refresh_pce(void *ignored)
-{
- if (current->mm)
- load_mm_cr4(current->mm);
-}
-
-static void x86_pmu_event_mapped(struct perf_event *event)
-{
- if (!(event->hw.flags & PERF_X86_EVENT_RDPMC_ALLOWED))
- return;
-
- if (atomic_inc_return(&current->mm->context.perf_rdpmc_allowed) == 1)
- on_each_cpu_mask(mm_cpumask(current->mm), refresh_pce, NULL, 1);
-}
-
-static void x86_pmu_event_unmapped(struct perf_event *event)
-{
- if (!current->mm)
- return;
-
- if (!(event->hw.flags & PERF_X86_EVENT_RDPMC_ALLOWED))
- return;
-
- if (atomic_dec_and_test(&current->mm->context.perf_rdpmc_allowed))
- on_each_cpu_mask(mm_cpumask(current->mm), refresh_pce, NULL, 1);
-}
-
-static int x86_pmu_event_idx(struct perf_event *event)
-{
- int idx = event->hw.idx;
-
- if (!(event->hw.flags & PERF_X86_EVENT_RDPMC_ALLOWED))
- return 0;
-
- if (x86_pmu.num_counters_fixed && idx >= INTEL_PMC_IDX_FIXED) {
- idx -= INTEL_PMC_IDX_FIXED;
- idx |= 1 << 30;
- }
-
- return idx + 1;
-}
-
-static ssize_t get_attr_rdpmc(struct device *cdev,
- struct device_attribute *attr,
- char *buf)
-{
- return snprintf(buf, 40, "%d\n", x86_pmu.attr_rdpmc);
-}
-
-static ssize_t set_attr_rdpmc(struct device *cdev,
- struct device_attribute *attr,
- const char *buf, size_t count)
-{
- unsigned long val;
- ssize_t ret;
-
- ret = kstrtoul(buf, 0, &val);
- if (ret)
- return ret;
-
- if (val > 2)
- return -EINVAL;
-
- if (x86_pmu.attr_rdpmc_broken)
- return -ENOTSUPP;
-
- if ((val == 2) != (x86_pmu.attr_rdpmc == 2)) {
- /*
- * Changing into or out of always available, aka
- * perf-event-bypassing mode. This path is extremely slow,
- * but only root can trigger it, so it's okay.
- */
- if (val == 2)
- static_key_slow_inc(&rdpmc_always_available);
- else
- static_key_slow_dec(&rdpmc_always_available);
- on_each_cpu(refresh_pce, NULL, 1);
- }
-
- x86_pmu.attr_rdpmc = val;
-
- return count;
-}
-
-static DEVICE_ATTR(rdpmc, S_IRUSR | S_IWUSR, get_attr_rdpmc, set_attr_rdpmc);
-
-static struct attribute *x86_pmu_attrs[] = {
- &dev_attr_rdpmc.attr,
- NULL,
-};
-
-static struct attribute_group x86_pmu_attr_group = {
- .attrs = x86_pmu_attrs,
-};
-
-static const struct attribute_group *x86_pmu_attr_groups[] = {
- &x86_pmu_attr_group,
- &x86_pmu_format_group,
- &x86_pmu_events_group,
- NULL,
-};
-
-static void x86_pmu_sched_task(struct perf_event_context *ctx, bool sched_in)
-{
- if (x86_pmu.sched_task)
- x86_pmu.sched_task(ctx, sched_in);
-}
-
-void perf_check_microcode(void)
-{
- if (x86_pmu.check_microcode)
- x86_pmu.check_microcode();
-}
-EXPORT_SYMBOL_GPL(perf_check_microcode);
-
-static struct pmu pmu = {
- .pmu_enable = x86_pmu_enable,
- .pmu_disable = x86_pmu_disable,
-
- .attr_groups = x86_pmu_attr_groups,
-
- .event_init = x86_pmu_event_init,
-
- .event_mapped = x86_pmu_event_mapped,
- .event_unmapped = x86_pmu_event_unmapped,
-
- .add = x86_pmu_add,
- .del = x86_pmu_del,
- .start = x86_pmu_start,
- .stop = x86_pmu_stop,
- .read = x86_pmu_read,
-
- .start_txn = x86_pmu_start_txn,
- .cancel_txn = x86_pmu_cancel_txn,
- .commit_txn = x86_pmu_commit_txn,
-
- .event_idx = x86_pmu_event_idx,
- .sched_task = x86_pmu_sched_task,
- .task_ctx_size = sizeof(struct x86_perf_task_context),
-};
-
-void arch_perf_update_userpage(struct perf_event *event,
- struct perf_event_mmap_page *userpg, u64 now)
-{
- struct cyc2ns_data *data;
-
- userpg->cap_user_time = 0;
- userpg->cap_user_time_zero = 0;
- userpg->cap_user_rdpmc =
- !!(event->hw.flags & PERF_X86_EVENT_RDPMC_ALLOWED);
- userpg->pmc_width = x86_pmu.cntval_bits;
-
- if (!sched_clock_stable())
- return;
-
- data = cyc2ns_read_begin();
-
- /*
- * Internal timekeeping for enabled/running/stopped times
- * is always in the local_clock domain.
- */
- userpg->cap_user_time = 1;
- userpg->time_mult = data->cyc2ns_mul;
- userpg->time_shift = data->cyc2ns_shift;
- userpg->time_offset = data->cyc2ns_offset - now;
-
- /*
- * cap_user_time_zero doesn't make sense when we're using a different
- * time base for the records.
- */
- if (event->clock == &local_clock) {
- userpg->cap_user_time_zero = 1;
- userpg->time_zero = data->cyc2ns_offset;
- }
-
- cyc2ns_read_end(data);
-}
-
-/*
- * callchain support
- */
-
-static int backtrace_stack(void *data, char *name)
-{
- return 0;
-}
-
-static void backtrace_address(void *data, unsigned long addr, int reliable)
-{
- struct perf_callchain_entry *entry = data;
-
- perf_callchain_store(entry, addr);
-}
-
-static const struct stacktrace_ops backtrace_ops = {
- .stack = backtrace_stack,
- .address = backtrace_address,
- .walk_stack = print_context_stack_bp,
-};
-
-void
-perf_callchain_kernel(struct perf_callchain_entry *entry, struct pt_regs *regs)
-{
- if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) {
- /* TODO: We don't support guest os callchain now */
- return;
- }
-
- perf_callchain_store(entry, regs->ip);
-
- dump_trace(NULL, regs, NULL, 0, &backtrace_ops, entry);
-}
-
-static inline int
-valid_user_frame(const void __user *fp, unsigned long size)
-{
- return (__range_not_ok(fp, size, TASK_SIZE) == 0);
-}
-
-static unsigned long get_segment_base(unsigned int segment)
-{
- struct desc_struct *desc;
- int idx = segment >> 3;
-
- if ((segment & SEGMENT_TI_MASK) == SEGMENT_LDT) {
-#ifdef CONFIG_MODIFY_LDT_SYSCALL
- struct ldt_struct *ldt;
-
- if (idx > LDT_ENTRIES)
- return 0;
-
- /* IRQs are off, so this synchronizes with smp_store_release */
- ldt = lockless_dereference(current->active_mm->context.ldt);
- if (!ldt || idx > ldt->size)
- return 0;
-
- desc = &ldt->entries[idx];
-#else
- return 0;
-#endif
- } else {
- if (idx > GDT_ENTRIES)
- return 0;
-
- desc = raw_cpu_ptr(gdt_page.gdt) + idx;
- }
-
- return get_desc_base(desc);
-}
-
-#ifdef CONFIG_IA32_EMULATION
-
-#include <asm/compat.h>
-
-static inline int
-perf_callchain_user32(struct pt_regs *regs, struct perf_callchain_entry *entry)
-{
- /* 32-bit process in 64-bit kernel. */
- unsigned long ss_base, cs_base;
- struct stack_frame_ia32 frame;
- const void __user *fp;
-
- if (!test_thread_flag(TIF_IA32))
- return 0;
-
- cs_base = get_segment_base(regs->cs);
- ss_base = get_segment_base(regs->ss);
-
- fp = compat_ptr(ss_base + regs->bp);
- pagefault_disable();
- while (entry->nr < PERF_MAX_STACK_DEPTH) {
- unsigned long bytes;
- frame.next_frame = 0;
- frame.return_address = 0;
-
- if (!access_ok(VERIFY_READ, fp, 8))
- break;
-
- bytes = __copy_from_user_nmi(&frame.next_frame, fp, 4);
- if (bytes != 0)
- break;
- bytes = __copy_from_user_nmi(&frame.return_address, fp+4, 4);
- if (bytes != 0)
- break;
-
- if (!valid_user_frame(fp, sizeof(frame)))
- break;
-
- perf_callchain_store(entry, cs_base + frame.return_address);
- fp = compat_ptr(ss_base + frame.next_frame);
- }
- pagefault_enable();
- return 1;
-}
-#else
-static inline int
-perf_callchain_user32(struct pt_regs *regs, struct perf_callchain_entry *entry)
-{
- return 0;
-}
-#endif
-
-void
-perf_callchain_user(struct perf_callchain_entry *entry, struct pt_regs *regs)
-{
- struct stack_frame frame;
- const void __user *fp;
-
- if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) {
- /* TODO: We don't support guest os callchain now */
- return;
- }
-
- /*
- * We don't know what to do with VM86 stacks.. ignore them for now.
- */
- if (regs->flags & (X86_VM_MASK | PERF_EFLAGS_VM))
- return;
-
- fp = (void __user *)regs->bp;
-
- perf_callchain_store(entry, regs->ip);
-
- if (!current->mm)
- return;
-
- if (perf_callchain_user32(regs, entry))
- return;
-
- pagefault_disable();
- while (entry->nr < PERF_MAX_STACK_DEPTH) {
- unsigned long bytes;
- frame.next_frame = NULL;
- frame.return_address = 0;
-
- if (!access_ok(VERIFY_READ, fp, 16))
- break;
-
- bytes = __copy_from_user_nmi(&frame.next_frame, fp, 8);
- if (bytes != 0)
- break;
- bytes = __copy_from_user_nmi(&frame.return_address, fp+8, 8);
- if (bytes != 0)
- break;
-
- if (!valid_user_frame(fp, sizeof(frame)))
- break;
-
- perf_callchain_store(entry, frame.return_address);
- fp = (void __user *)frame.next_frame;
- }
- pagefault_enable();
-}
-
-/*
- * Deal with code segment offsets for the various execution modes:
- *
- * VM86 - the good olde 16 bit days, where the linear address is
- * 20 bits and we use regs->ip + 0x10 * regs->cs.
- *
- * IA32 - Where we need to look at GDT/LDT segment descriptor tables
- * to figure out what the 32bit base address is.
- *
- * X32 - has TIF_X32 set, but is running in x86_64
- *
- * X86_64 - CS,DS,SS,ES are all zero based.
- */
-static unsigned long code_segment_base(struct pt_regs *regs)
-{
- /*
- * For IA32 we look at the GDT/LDT segment base to convert the
- * effective IP to a linear address.
- */
-
-#ifdef CONFIG_X86_32
- /*
- * If we are in VM86 mode, add the segment offset to convert to a
- * linear address.
- */
- if (regs->flags & X86_VM_MASK)
- return 0x10 * regs->cs;
-
- if (user_mode(regs) && regs->cs != __USER_CS)
- return get_segment_base(regs->cs);
-#else
- if (user_mode(regs) && !user_64bit_mode(regs) &&
- regs->cs != __USER32_CS)
- return get_segment_base(regs->cs);
-#endif
- return 0;
-}
-
-unsigned long perf_instruction_pointer(struct pt_regs *regs)
-{
- if (perf_guest_cbs && perf_guest_cbs->is_in_guest())
- return perf_guest_cbs->get_guest_ip();
-
- return regs->ip + code_segment_base(regs);
-}
-
-unsigned long perf_misc_flags(struct pt_regs *regs)
-{
- int misc = 0;
-
- if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) {
- if (perf_guest_cbs->is_user_mode())
- misc |= PERF_RECORD_MISC_GUEST_USER;
- else
- misc |= PERF_RECORD_MISC_GUEST_KERNEL;
- } else {
- if (user_mode(regs))
- misc |= PERF_RECORD_MISC_USER;
- else
- misc |= PERF_RECORD_MISC_KERNEL;
- }
-
- if (regs->flags & PERF_EFLAGS_EXACT)
- misc |= PERF_RECORD_MISC_EXACT_IP;
-
- return misc;
-}
-
-void perf_get_x86_pmu_capability(struct x86_pmu_capability *cap)
-{
- cap->version = x86_pmu.version;
- cap->num_counters_gp = x86_pmu.num_counters;
- cap->num_counters_fixed = x86_pmu.num_counters_fixed;
- cap->bit_width_gp = x86_pmu.cntval_bits;
- cap->bit_width_fixed = x86_pmu.cntval_bits;
- cap->events_mask = (unsigned int)x86_pmu.events_maskl;
- cap->events_mask_len = x86_pmu.events_mask_len;
-}
-EXPORT_SYMBOL_GPL(perf_get_x86_pmu_capability);
diff --git a/arch/x86/kernel/cpu/perf_event.h b/arch/x86/kernel/cpu/perf_event.h
deleted file mode 100644
index 98be6d6d3..000000000
--- a/arch/x86/kernel/cpu/perf_event.h
+++ /dev/null
@@ -1,958 +0,0 @@
-/*
- * Performance events x86 architecture header
- *
- * Copyright (C) 2008 Thomas Gleixner <tglx@linutronix.de>
- * Copyright (C) 2008-2009 Red Hat, Inc., Ingo Molnar
- * Copyright (C) 2009 Jaswinder Singh Rajput
- * Copyright (C) 2009 Advanced Micro Devices, Inc., Robert Richter
- * Copyright (C) 2008-2009 Red Hat, Inc., Peter Zijlstra
- * Copyright (C) 2009 Intel Corporation, <markus.t.metzger@intel.com>
- * Copyright (C) 2009 Google, Inc., Stephane Eranian
- *
- * For licencing details see kernel-base/COPYING
- */
-
-#include <linux/perf_event.h>
-
-/* To enable MSR tracing please use the generic trace points. */
-
-/*
- * | NHM/WSM | SNB |
- * register -------------------------------
- * | HT | no HT | HT | no HT |
- *-----------------------------------------
- * offcore | core | core | cpu | core |
- * lbr_sel | core | core | cpu | core |
- * ld_lat | cpu | core | cpu | core |
- *-----------------------------------------
- *
- * Given that there is a small number of shared regs,
- * we can pre-allocate their slot in the per-cpu
- * per-core reg tables.
- */
-enum extra_reg_type {
- EXTRA_REG_NONE = -1, /* not used */
-
- EXTRA_REG_RSP_0 = 0, /* offcore_response_0 */
- EXTRA_REG_RSP_1 = 1, /* offcore_response_1 */
- EXTRA_REG_LBR = 2, /* lbr_select */
- EXTRA_REG_LDLAT = 3, /* ld_lat_threshold */
- EXTRA_REG_FE = 4, /* fe_* */
-
- EXTRA_REG_MAX /* number of entries needed */
-};
-
-struct event_constraint {
- union {
- unsigned long idxmsk[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
- u64 idxmsk64;
- };
- u64 code;
- u64 cmask;
- int weight;
- int overlap;
- int flags;
-};
-/*
- * struct hw_perf_event.flags flags
- */
-#define PERF_X86_EVENT_PEBS_LDLAT 0x0001 /* ld+ldlat data address sampling */
-#define PERF_X86_EVENT_PEBS_ST 0x0002 /* st data address sampling */
-#define PERF_X86_EVENT_PEBS_ST_HSW 0x0004 /* haswell style datala, store */
-#define PERF_X86_EVENT_COMMITTED 0x0008 /* event passed commit_txn */
-#define PERF_X86_EVENT_PEBS_LD_HSW 0x0010 /* haswell style datala, load */
-#define PERF_X86_EVENT_PEBS_NA_HSW 0x0020 /* haswell style datala, unknown */
-#define PERF_X86_EVENT_EXCL 0x0040 /* HT exclusivity on counter */
-#define PERF_X86_EVENT_DYNAMIC 0x0080 /* dynamic alloc'd constraint */
-#define PERF_X86_EVENT_RDPMC_ALLOWED 0x0100 /* grant rdpmc permission */
-#define PERF_X86_EVENT_EXCL_ACCT 0x0200 /* accounted EXCL event */
-#define PERF_X86_EVENT_AUTO_RELOAD 0x0400 /* use PEBS auto-reload */
-#define PERF_X86_EVENT_FREERUNNING 0x0800 /* use freerunning PEBS */
-
-
-struct amd_nb {
- int nb_id; /* NorthBridge id */
- int refcnt; /* reference count */
- struct perf_event *owners[X86_PMC_IDX_MAX];
- struct event_constraint event_constraints[X86_PMC_IDX_MAX];
-};
-
-/* The maximal number of PEBS events: */
-#define MAX_PEBS_EVENTS 8
-
-/*
- * Flags PEBS can handle without an PMI.
- *
- * TID can only be handled by flushing at context switch.
- *
- */
-#define PEBS_FREERUNNING_FLAGS \
- (PERF_SAMPLE_IP | PERF_SAMPLE_TID | PERF_SAMPLE_ADDR | \
- PERF_SAMPLE_ID | PERF_SAMPLE_CPU | PERF_SAMPLE_STREAM_ID | \
- PERF_SAMPLE_DATA_SRC | PERF_SAMPLE_IDENTIFIER | \
- PERF_SAMPLE_TRANSACTION)
-
-/*
- * A debug store configuration.
- *
- * We only support architectures that use 64bit fields.
- */
-struct debug_store {
- u64 bts_buffer_base;
- u64 bts_index;
- u64 bts_absolute_maximum;
- u64 bts_interrupt_threshold;
- u64 pebs_buffer_base;
- u64 pebs_index;
- u64 pebs_absolute_maximum;
- u64 pebs_interrupt_threshold;
- u64 pebs_event_reset[MAX_PEBS_EVENTS];
-};
-
-/*
- * Per register state.
- */
-struct er_account {
- raw_spinlock_t lock; /* per-core: protect structure */
- u64 config; /* extra MSR config */
- u64 reg; /* extra MSR number */
- atomic_t ref; /* reference count */
-};
-
-/*
- * Per core/cpu state
- *
- * Used to coordinate shared registers between HT threads or
- * among events on a single PMU.
- */
-struct intel_shared_regs {
- struct er_account regs[EXTRA_REG_MAX];
- int refcnt; /* per-core: #HT threads */
- unsigned core_id; /* per-core: core id */
-};
-
-enum intel_excl_state_type {
- INTEL_EXCL_UNUSED = 0, /* counter is unused */
- INTEL_EXCL_SHARED = 1, /* counter can be used by both threads */
- INTEL_EXCL_EXCLUSIVE = 2, /* counter can be used by one thread only */
-};
-
-struct intel_excl_states {
- enum intel_excl_state_type state[X86_PMC_IDX_MAX];
- bool sched_started; /* true if scheduling has started */
-};
-
-struct intel_excl_cntrs {
- raw_spinlock_t lock;
-
- struct intel_excl_states states[2];
-
- union {
- u16 has_exclusive[2];
- u32 exclusive_present;
- };
-
- int refcnt; /* per-core: #HT threads */
- unsigned core_id; /* per-core: core id */
-};
-
-#define MAX_LBR_ENTRIES 32
-
-enum {
- X86_PERF_KFREE_SHARED = 0,
- X86_PERF_KFREE_EXCL = 1,
- X86_PERF_KFREE_MAX
-};
-
-struct cpu_hw_events {
- /*
- * Generic x86 PMC bits
- */
- struct perf_event *events[X86_PMC_IDX_MAX]; /* in counter order */
- unsigned long active_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
- unsigned long running[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
- int enabled;
-
- int n_events; /* the # of events in the below arrays */
- int n_added; /* the # last events in the below arrays;
- they've never been enabled yet */
- int n_txn; /* the # last events in the below arrays;
- added in the current transaction */
- int assign[X86_PMC_IDX_MAX]; /* event to counter assignment */
- u64 tags[X86_PMC_IDX_MAX];
-
- struct perf_event *event_list[X86_PMC_IDX_MAX]; /* in enabled order */
- struct event_constraint *event_constraint[X86_PMC_IDX_MAX];
-
- int n_excl; /* the number of exclusive events */
-
- unsigned int txn_flags;
- int is_fake;
-
- /*
- * Intel DebugStore bits
- */
- struct debug_store *ds;
- u64 pebs_enabled;
-
- /*
- * Intel LBR bits
- */
- int lbr_users;
- void *lbr_context;
- struct perf_branch_stack lbr_stack;
- struct perf_branch_entry lbr_entries[MAX_LBR_ENTRIES];
- struct er_account *lbr_sel;
- u64 br_sel;
-
- /*
- * Intel host/guest exclude bits
- */
- u64 intel_ctrl_guest_mask;
- u64 intel_ctrl_host_mask;
- struct perf_guest_switch_msr guest_switch_msrs[X86_PMC_IDX_MAX];
-
- /*
- * Intel checkpoint mask
- */
- u64 intel_cp_status;
-
- /*
- * manage shared (per-core, per-cpu) registers
- * used on Intel NHM/WSM/SNB
- */
- struct intel_shared_regs *shared_regs;
- /*
- * manage exclusive counter access between hyperthread
- */
- struct event_constraint *constraint_list; /* in enable order */
- struct intel_excl_cntrs *excl_cntrs;
- int excl_thread_id; /* 0 or 1 */
-
- /*
- * AMD specific bits
- */
- struct amd_nb *amd_nb;
- /* Inverted mask of bits to clear in the perf_ctr ctrl registers */
- u64 perf_ctr_virt_mask;
-
- void *kfree_on_online[X86_PERF_KFREE_MAX];
-};
-
-#define __EVENT_CONSTRAINT(c, n, m, w, o, f) {\
- { .idxmsk64 = (n) }, \
- .code = (c), \
- .cmask = (m), \
- .weight = (w), \
- .overlap = (o), \
- .flags = f, \
-}
-
-#define EVENT_CONSTRAINT(c, n, m) \
- __EVENT_CONSTRAINT(c, n, m, HWEIGHT(n), 0, 0)
-
-#define INTEL_EXCLEVT_CONSTRAINT(c, n) \
- __EVENT_CONSTRAINT(c, n, ARCH_PERFMON_EVENTSEL_EVENT, HWEIGHT(n),\
- 0, PERF_X86_EVENT_EXCL)
-
-/*
- * The overlap flag marks event constraints with overlapping counter
- * masks. This is the case if the counter mask of such an event is not
- * a subset of any other counter mask of a constraint with an equal or
- * higher weight, e.g.:
- *
- * c_overlaps = EVENT_CONSTRAINT_OVERLAP(0, 0x09, 0);
- * c_another1 = EVENT_CONSTRAINT(0, 0x07, 0);
- * c_another2 = EVENT_CONSTRAINT(0, 0x38, 0);
- *
- * The event scheduler may not select the correct counter in the first
- * cycle because it needs to know which subsequent events will be
- * scheduled. It may fail to schedule the events then. So we set the
- * overlap flag for such constraints to give the scheduler a hint which
- * events to select for counter rescheduling.
- *
- * Care must be taken as the rescheduling algorithm is O(n!) which
- * will increase scheduling cycles for an over-commited system
- * dramatically. The number of such EVENT_CONSTRAINT_OVERLAP() macros
- * and its counter masks must be kept at a minimum.
- */
-#define EVENT_CONSTRAINT_OVERLAP(c, n, m) \
- __EVENT_CONSTRAINT(c, n, m, HWEIGHT(n), 1, 0)
-
-/*
- * Constraint on the Event code.
- */
-#define INTEL_EVENT_CONSTRAINT(c, n) \
- EVENT_CONSTRAINT(c, n, ARCH_PERFMON_EVENTSEL_EVENT)
-
-/*
- * Constraint on the Event code + UMask + fixed-mask
- *
- * filter mask to validate fixed counter events.
- * the following filters disqualify for fixed counters:
- * - inv
- * - edge
- * - cnt-mask
- * - in_tx
- * - in_tx_checkpointed
- * The other filters are supported by fixed counters.
- * The any-thread option is supported starting with v3.
- */
-#define FIXED_EVENT_FLAGS (X86_RAW_EVENT_MASK|HSW_IN_TX|HSW_IN_TX_CHECKPOINTED)
-#define FIXED_EVENT_CONSTRAINT(c, n) \
- EVENT_CONSTRAINT(c, (1ULL << (32+n)), FIXED_EVENT_FLAGS)
-
-/*
- * Constraint on the Event code + UMask
- */
-#define INTEL_UEVENT_CONSTRAINT(c, n) \
- EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK)
-
-/* Constraint on specific umask bit only + event */
-#define INTEL_UBIT_EVENT_CONSTRAINT(c, n) \
- EVENT_CONSTRAINT(c, n, ARCH_PERFMON_EVENTSEL_EVENT|(c))
-
-/* Like UEVENT_CONSTRAINT, but match flags too */
-#define INTEL_FLAGS_UEVENT_CONSTRAINT(c, n) \
- EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK|X86_ALL_EVENT_FLAGS)
-
-#define INTEL_EXCLUEVT_CONSTRAINT(c, n) \
- __EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK, \
- HWEIGHT(n), 0, PERF_X86_EVENT_EXCL)
-
-#define INTEL_PLD_CONSTRAINT(c, n) \
- __EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK|X86_ALL_EVENT_FLAGS, \
- HWEIGHT(n), 0, PERF_X86_EVENT_PEBS_LDLAT)
-
-#define INTEL_PST_CONSTRAINT(c, n) \
- __EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK|X86_ALL_EVENT_FLAGS, \
- HWEIGHT(n), 0, PERF_X86_EVENT_PEBS_ST)
-
-/* Event constraint, but match on all event flags too. */
-#define INTEL_FLAGS_EVENT_CONSTRAINT(c, n) \
- EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK|X86_ALL_EVENT_FLAGS)
-
-/* Check only flags, but allow all event/umask */
-#define INTEL_ALL_EVENT_CONSTRAINT(code, n) \
- EVENT_CONSTRAINT(code, n, X86_ALL_EVENT_FLAGS)
-
-/* Check flags and event code, and set the HSW store flag */
-#define INTEL_FLAGS_EVENT_CONSTRAINT_DATALA_ST(code, n) \
- __EVENT_CONSTRAINT(code, n, \
- ARCH_PERFMON_EVENTSEL_EVENT|X86_ALL_EVENT_FLAGS, \
- HWEIGHT(n), 0, PERF_X86_EVENT_PEBS_ST_HSW)
-
-/* Check flags and event code, and set the HSW load flag */
-#define INTEL_FLAGS_EVENT_CONSTRAINT_DATALA_LD(code, n) \
- __EVENT_CONSTRAINT(code, n, \
- ARCH_PERFMON_EVENTSEL_EVENT|X86_ALL_EVENT_FLAGS, \
- HWEIGHT(n), 0, PERF_X86_EVENT_PEBS_LD_HSW)
-
-#define INTEL_FLAGS_EVENT_CONSTRAINT_DATALA_XLD(code, n) \
- __EVENT_CONSTRAINT(code, n, \
- ARCH_PERFMON_EVENTSEL_EVENT|X86_ALL_EVENT_FLAGS, \
- HWEIGHT(n), 0, \
- PERF_X86_EVENT_PEBS_LD_HSW|PERF_X86_EVENT_EXCL)
-
-/* Check flags and event code/umask, and set the HSW store flag */
-#define INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_ST(code, n) \
- __EVENT_CONSTRAINT(code, n, \
- INTEL_ARCH_EVENT_MASK|X86_ALL_EVENT_FLAGS, \
- HWEIGHT(n), 0, PERF_X86_EVENT_PEBS_ST_HSW)
-
-#define INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_XST(code, n) \
- __EVENT_CONSTRAINT(code, n, \
- INTEL_ARCH_EVENT_MASK|X86_ALL_EVENT_FLAGS, \
- HWEIGHT(n), 0, \
- PERF_X86_EVENT_PEBS_ST_HSW|PERF_X86_EVENT_EXCL)
-
-/* Check flags and event code/umask, and set the HSW load flag */
-#define INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(code, n) \
- __EVENT_CONSTRAINT(code, n, \
- INTEL_ARCH_EVENT_MASK|X86_ALL_EVENT_FLAGS, \
- HWEIGHT(n), 0, PERF_X86_EVENT_PEBS_LD_HSW)
-
-#define INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_XLD(code, n) \
- __EVENT_CONSTRAINT(code, n, \
- INTEL_ARCH_EVENT_MASK|X86_ALL_EVENT_FLAGS, \
- HWEIGHT(n), 0, \
- PERF_X86_EVENT_PEBS_LD_HSW|PERF_X86_EVENT_EXCL)
-
-/* Check flags and event code/umask, and set the HSW N/A flag */
-#define INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_NA(code, n) \
- __EVENT_CONSTRAINT(code, n, \
- INTEL_ARCH_EVENT_MASK|X86_ALL_EVENT_FLAGS, \
- HWEIGHT(n), 0, PERF_X86_EVENT_PEBS_NA_HSW)
-
-
-/*
- * We define the end marker as having a weight of -1
- * to enable blacklisting of events using a counter bitmask
- * of zero and thus a weight of zero.
- * The end marker has a weight that cannot possibly be
- * obtained from counting the bits in the bitmask.
- */
-#define EVENT_CONSTRAINT_END { .weight = -1 }
-
-/*
- * Check for end marker with weight == -1
- */
-#define for_each_event_constraint(e, c) \
- for ((e) = (c); (e)->weight != -1; (e)++)
-
-/*
- * Extra registers for specific events.
- *
- * Some events need large masks and require external MSRs.
- * Those extra MSRs end up being shared for all events on
- * a PMU and sometimes between PMU of sibling HT threads.
- * In either case, the kernel needs to handle conflicting
- * accesses to those extra, shared, regs. The data structure
- * to manage those registers is stored in cpu_hw_event.
- */
-struct extra_reg {
- unsigned int event;
- unsigned int msr;
- u64 config_mask;
- u64 valid_mask;
- int idx; /* per_xxx->regs[] reg index */
- bool extra_msr_access;
-};
-
-#define EVENT_EXTRA_REG(e, ms, m, vm, i) { \
- .event = (e), \
- .msr = (ms), \
- .config_mask = (m), \
- .valid_mask = (vm), \
- .idx = EXTRA_REG_##i, \
- .extra_msr_access = true, \
- }
-
-#define INTEL_EVENT_EXTRA_REG(event, msr, vm, idx) \
- EVENT_EXTRA_REG(event, msr, ARCH_PERFMON_EVENTSEL_EVENT, vm, idx)
-
-#define INTEL_UEVENT_EXTRA_REG(event, msr, vm, idx) \
- EVENT_EXTRA_REG(event, msr, ARCH_PERFMON_EVENTSEL_EVENT | \
- ARCH_PERFMON_EVENTSEL_UMASK, vm, idx)
-
-#define INTEL_UEVENT_PEBS_LDLAT_EXTRA_REG(c) \
- INTEL_UEVENT_EXTRA_REG(c, \
- MSR_PEBS_LD_LAT_THRESHOLD, \
- 0xffff, \
- LDLAT)
-
-#define EVENT_EXTRA_END EVENT_EXTRA_REG(0, 0, 0, 0, RSP_0)
-
-union perf_capabilities {
- struct {
- u64 lbr_format:6;
- u64 pebs_trap:1;
- u64 pebs_arch_reg:1;
- u64 pebs_format:4;
- u64 smm_freeze:1;
- /*
- * PMU supports separate counter range for writing
- * values > 32bit.
- */
- u64 full_width_write:1;
- };
- u64 capabilities;
-};
-
-struct x86_pmu_quirk {
- struct x86_pmu_quirk *next;
- void (*func)(void);
-};
-
-union x86_pmu_config {
- struct {
- u64 event:8,
- umask:8,
- usr:1,
- os:1,
- edge:1,
- pc:1,
- interrupt:1,
- __reserved1:1,
- en:1,
- inv:1,
- cmask:8,
- event2:4,
- __reserved2:4,
- go:1,
- ho:1;
- } bits;
- u64 value;
-};
-
-#define X86_CONFIG(args...) ((union x86_pmu_config){.bits = {args}}).value
-
-enum {
- x86_lbr_exclusive_lbr,
- x86_lbr_exclusive_bts,
- x86_lbr_exclusive_pt,
- x86_lbr_exclusive_max,
-};
-
-/*
- * struct x86_pmu - generic x86 pmu
- */
-struct x86_pmu {
- /*
- * Generic x86 PMC bits
- */
- const char *name;
- int version;
- int (*handle_irq)(struct pt_regs *);
- void (*disable_all)(void);
- void (*enable_all)(int added);
- void (*enable)(struct perf_event *);
- void (*disable)(struct perf_event *);
- int (*hw_config)(struct perf_event *event);
- int (*schedule_events)(struct cpu_hw_events *cpuc, int n, int *assign);
- unsigned eventsel;
- unsigned perfctr;
- int (*addr_offset)(int index, bool eventsel);
- int (*rdpmc_index)(int index);
- u64 (*event_map)(int);
- int max_events;
- int num_counters;
- int num_counters_fixed;
- int cntval_bits;
- u64 cntval_mask;
- union {
- unsigned long events_maskl;
- unsigned long events_mask[BITS_TO_LONGS(ARCH_PERFMON_EVENTS_COUNT)];
- };
- int events_mask_len;
- int apic;
- u64 max_period;
- struct event_constraint *
- (*get_event_constraints)(struct cpu_hw_events *cpuc,
- int idx,
- struct perf_event *event);
-
- void (*put_event_constraints)(struct cpu_hw_events *cpuc,
- struct perf_event *event);
-
- void (*start_scheduling)(struct cpu_hw_events *cpuc);
-
- void (*commit_scheduling)(struct cpu_hw_events *cpuc, int idx, int cntr);
-
- void (*stop_scheduling)(struct cpu_hw_events *cpuc);
-
- struct event_constraint *event_constraints;
- struct x86_pmu_quirk *quirks;
- int perfctr_second_write;
- bool late_ack;
- unsigned (*limit_period)(struct perf_event *event, unsigned l);
-
- /*
- * sysfs attrs
- */
- int attr_rdpmc_broken;
- int attr_rdpmc;
- struct attribute **format_attrs;
- struct attribute **event_attrs;
-
- ssize_t (*events_sysfs_show)(char *page, u64 config);
- struct attribute **cpu_events;
-
- /*
- * CPU Hotplug hooks
- */
- int (*cpu_prepare)(int cpu);
- void (*cpu_starting)(int cpu);
- void (*cpu_dying)(int cpu);
- void (*cpu_dead)(int cpu);
-
- void (*check_microcode)(void);
- void (*sched_task)(struct perf_event_context *ctx,
- bool sched_in);
-
- /*
- * Intel Arch Perfmon v2+
- */
- u64 intel_ctrl;
- union perf_capabilities intel_cap;
-
- /*
- * Intel DebugStore bits
- */
- unsigned int bts :1,
- bts_active :1,
- pebs :1,
- pebs_active :1,
- pebs_broken :1,
- pebs_prec_dist :1;
- int pebs_record_size;
- int pebs_buffer_size;
- void (*drain_pebs)(struct pt_regs *regs);
- struct event_constraint *pebs_constraints;
- void (*pebs_aliases)(struct perf_event *event);
- int max_pebs_events;
- unsigned long free_running_flags;
-
- /*
- * Intel LBR
- */
- unsigned long lbr_tos, lbr_from, lbr_to; /* MSR base regs */
- int lbr_nr; /* hardware stack size */
- u64 lbr_sel_mask; /* LBR_SELECT valid bits */
- const int *lbr_sel_map; /* lbr_select mappings */
- bool lbr_double_abort; /* duplicated lbr aborts */
-
- /*
- * Intel PT/LBR/BTS are exclusive
- */
- atomic_t lbr_exclusive[x86_lbr_exclusive_max];
-
- /*
- * Extra registers for events
- */
- struct extra_reg *extra_regs;
- unsigned int flags;
-
- /*
- * Intel host/guest support (KVM)
- */
- struct perf_guest_switch_msr *(*guest_get_msrs)(int *nr);
-};
-
-struct x86_perf_task_context {
- u64 lbr_from[MAX_LBR_ENTRIES];
- u64 lbr_to[MAX_LBR_ENTRIES];
- u64 lbr_info[MAX_LBR_ENTRIES];
- int tos;
- int lbr_callstack_users;
- int lbr_stack_state;
-};
-
-#define x86_add_quirk(func_) \
-do { \
- static struct x86_pmu_quirk __quirk __initdata = { \
- .func = func_, \
- }; \
- __quirk.next = x86_pmu.quirks; \
- x86_pmu.quirks = &__quirk; \
-} while (0)
-
-/*
- * x86_pmu flags
- */
-#define PMU_FL_NO_HT_SHARING 0x1 /* no hyper-threading resource sharing */
-#define PMU_FL_HAS_RSP_1 0x2 /* has 2 equivalent offcore_rsp regs */
-#define PMU_FL_EXCL_CNTRS 0x4 /* has exclusive counter requirements */
-#define PMU_FL_EXCL_ENABLED 0x8 /* exclusive counter active */
-
-#define EVENT_VAR(_id) event_attr_##_id
-#define EVENT_PTR(_id) &event_attr_##_id.attr.attr
-
-#define EVENT_ATTR(_name, _id) \
-static struct perf_pmu_events_attr EVENT_VAR(_id) = { \
- .attr = __ATTR(_name, 0444, events_sysfs_show, NULL), \
- .id = PERF_COUNT_HW_##_id, \
- .event_str = NULL, \
-};
-
-#define EVENT_ATTR_STR(_name, v, str) \
-static struct perf_pmu_events_attr event_attr_##v = { \
- .attr = __ATTR(_name, 0444, events_sysfs_show, NULL), \
- .id = 0, \
- .event_str = str, \
-};
-
-extern struct x86_pmu x86_pmu __read_mostly;
-
-static inline bool x86_pmu_has_lbr_callstack(void)
-{
- return x86_pmu.lbr_sel_map &&
- x86_pmu.lbr_sel_map[PERF_SAMPLE_BRANCH_CALL_STACK_SHIFT] > 0;
-}
-
-DECLARE_PER_CPU(struct cpu_hw_events, cpu_hw_events);
-
-int x86_perf_event_set_period(struct perf_event *event);
-
-/*
- * Generalized hw caching related hw_event table, filled
- * in on a per model basis. A value of 0 means
- * 'not supported', -1 means 'hw_event makes no sense on
- * this CPU', any other value means the raw hw_event
- * ID.
- */
-
-#define C(x) PERF_COUNT_HW_CACHE_##x
-
-extern u64 __read_mostly hw_cache_event_ids
- [PERF_COUNT_HW_CACHE_MAX]
- [PERF_COUNT_HW_CACHE_OP_MAX]
- [PERF_COUNT_HW_CACHE_RESULT_MAX];
-extern u64 __read_mostly hw_cache_extra_regs
- [PERF_COUNT_HW_CACHE_MAX]
- [PERF_COUNT_HW_CACHE_OP_MAX]
- [PERF_COUNT_HW_CACHE_RESULT_MAX];
-
-u64 x86_perf_event_update(struct perf_event *event);
-
-static inline unsigned int x86_pmu_config_addr(int index)
-{
- return x86_pmu.eventsel + (x86_pmu.addr_offset ?
- x86_pmu.addr_offset(index, true) : index);
-}
-
-static inline unsigned int x86_pmu_event_addr(int index)
-{
- return x86_pmu.perfctr + (x86_pmu.addr_offset ?
- x86_pmu.addr_offset(index, false) : index);
-}
-
-static inline int x86_pmu_rdpmc_index(int index)
-{
- return x86_pmu.rdpmc_index ? x86_pmu.rdpmc_index(index) : index;
-}
-
-int x86_add_exclusive(unsigned int what);
-
-void x86_del_exclusive(unsigned int what);
-
-int x86_reserve_hardware(void);
-
-void x86_release_hardware(void);
-
-void hw_perf_lbr_event_destroy(struct perf_event *event);
-
-int x86_setup_perfctr(struct perf_event *event);
-
-int x86_pmu_hw_config(struct perf_event *event);
-
-void x86_pmu_disable_all(void);
-
-static inline void __x86_pmu_enable_event(struct hw_perf_event *hwc,
- u64 enable_mask)
-{
- u64 disable_mask = __this_cpu_read(cpu_hw_events.perf_ctr_virt_mask);
-
- if (hwc->extra_reg.reg)
- wrmsrl(hwc->extra_reg.reg, hwc->extra_reg.config);
- wrmsrl(hwc->config_base, (hwc->config | enable_mask) & ~disable_mask);
-}
-
-void x86_pmu_enable_all(int added);
-
-int perf_assign_events(struct event_constraint **constraints, int n,
- int wmin, int wmax, int gpmax, int *assign);
-int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign);
-
-void x86_pmu_stop(struct perf_event *event, int flags);
-
-static inline void x86_pmu_disable_event(struct perf_event *event)
-{
- struct hw_perf_event *hwc = &event->hw;
-
- wrmsrl(hwc->config_base, hwc->config);
-}
-
-void x86_pmu_enable_event(struct perf_event *event);
-
-int x86_pmu_handle_irq(struct pt_regs *regs);
-
-extern struct event_constraint emptyconstraint;
-
-extern struct event_constraint unconstrained;
-
-static inline bool kernel_ip(unsigned long ip)
-{
-#ifdef CONFIG_X86_32
- return ip > PAGE_OFFSET;
-#else
- return (long)ip < 0;
-#endif
-}
-
-/*
- * Not all PMUs provide the right context information to place the reported IP
- * into full context. Specifically segment registers are typically not
- * supplied.
- *
- * Assuming the address is a linear address (it is for IBS), we fake the CS and
- * vm86 mode using the known zero-based code segment and 'fix up' the registers
- * to reflect this.
- *
- * Intel PEBS/LBR appear to typically provide the effective address, nothing
- * much we can do about that but pray and treat it like a linear address.
- */
-static inline void set_linear_ip(struct pt_regs *regs, unsigned long ip)
-{
- regs->cs = kernel_ip(ip) ? __KERNEL_CS : __USER_CS;
- if (regs->flags & X86_VM_MASK)
- regs->flags ^= (PERF_EFLAGS_VM | X86_VM_MASK);
- regs->ip = ip;
-}
-
-ssize_t x86_event_sysfs_show(char *page, u64 config, u64 event);
-ssize_t intel_event_sysfs_show(char *page, u64 config);
-
-struct attribute **merge_attr(struct attribute **a, struct attribute **b);
-
-#ifdef CONFIG_CPU_SUP_AMD
-
-int amd_pmu_init(void);
-
-#else /* CONFIG_CPU_SUP_AMD */
-
-static inline int amd_pmu_init(void)
-{
- return 0;
-}
-
-#endif /* CONFIG_CPU_SUP_AMD */
-
-#ifdef CONFIG_CPU_SUP_INTEL
-
-static inline bool intel_pmu_has_bts(struct perf_event *event)
-{
- if (event->attr.config == PERF_COUNT_HW_BRANCH_INSTRUCTIONS &&
- !event->attr.freq && event->hw.sample_period == 1)
- return true;
-
- return false;
-}
-
-int intel_pmu_save_and_restart(struct perf_event *event);
-
-struct event_constraint *
-x86_get_event_constraints(struct cpu_hw_events *cpuc, int idx,
- struct perf_event *event);
-
-struct intel_shared_regs *allocate_shared_regs(int cpu);
-
-int intel_pmu_init(void);
-
-void init_debug_store_on_cpu(int cpu);
-
-void fini_debug_store_on_cpu(int cpu);
-
-void release_ds_buffers(void);
-
-void reserve_ds_buffers(void);
-
-extern struct event_constraint bts_constraint;
-
-void intel_pmu_enable_bts(u64 config);
-
-void intel_pmu_disable_bts(void);
-
-int intel_pmu_drain_bts_buffer(void);
-
-extern struct event_constraint intel_core2_pebs_event_constraints[];
-
-extern struct event_constraint intel_atom_pebs_event_constraints[];
-
-extern struct event_constraint intel_slm_pebs_event_constraints[];
-
-extern struct event_constraint intel_nehalem_pebs_event_constraints[];
-
-extern struct event_constraint intel_westmere_pebs_event_constraints[];
-
-extern struct event_constraint intel_snb_pebs_event_constraints[];
-
-extern struct event_constraint intel_ivb_pebs_event_constraints[];
-
-extern struct event_constraint intel_hsw_pebs_event_constraints[];
-
-extern struct event_constraint intel_skl_pebs_event_constraints[];
-
-struct event_constraint *intel_pebs_constraints(struct perf_event *event);
-
-void intel_pmu_pebs_enable(struct perf_event *event);
-
-void intel_pmu_pebs_disable(struct perf_event *event);
-
-void intel_pmu_pebs_enable_all(void);
-
-void intel_pmu_pebs_disable_all(void);
-
-void intel_pmu_pebs_sched_task(struct perf_event_context *ctx, bool sched_in);
-
-void intel_ds_init(void);
-
-void intel_pmu_lbr_sched_task(struct perf_event_context *ctx, bool sched_in);
-
-void intel_pmu_lbr_reset(void);
-
-void intel_pmu_lbr_enable(struct perf_event *event);
-
-void intel_pmu_lbr_disable(struct perf_event *event);
-
-void intel_pmu_lbr_enable_all(bool pmi);
-
-void intel_pmu_lbr_disable_all(void);
-
-void intel_pmu_lbr_read(void);
-
-void intel_pmu_lbr_init_core(void);
-
-void intel_pmu_lbr_init_nhm(void);
-
-void intel_pmu_lbr_init_atom(void);
-
-void intel_pmu_lbr_init_snb(void);
-
-void intel_pmu_lbr_init_hsw(void);
-
-void intel_pmu_lbr_init_skl(void);
-
-void intel_pmu_lbr_init_knl(void);
-
-void intel_pmu_pebs_data_source_nhm(void);
-
-int intel_pmu_setup_lbr_filter(struct perf_event *event);
-
-void intel_pt_interrupt(void);
-
-int intel_bts_interrupt(void);
-
-void intel_bts_enable_local(void);
-
-void intel_bts_disable_local(void);
-
-int p4_pmu_init(void);
-
-int p6_pmu_init(void);
-
-int knc_pmu_init(void);
-
-ssize_t events_sysfs_show(struct device *dev, struct device_attribute *attr,
- char *page);
-
-static inline int is_ht_workaround_enabled(void)
-{
- return !!(x86_pmu.flags & PMU_FL_EXCL_ENABLED);
-}
-
-#else /* CONFIG_CPU_SUP_INTEL */
-
-static inline void reserve_ds_buffers(void)
-{
-}
-
-static inline void release_ds_buffers(void)
-{
-}
-
-static inline int intel_pmu_init(void)
-{
- return 0;
-}
-
-static inline struct intel_shared_regs *allocate_shared_regs(int cpu)
-{
- return NULL;
-}
-
-static inline int is_ht_workaround_enabled(void)
-{
- return 0;
-}
-#endif /* CONFIG_CPU_SUP_INTEL */
diff --git a/arch/x86/kernel/cpu/perf_event_amd.c b/arch/x86/kernel/cpu/perf_event_amd.c
deleted file mode 100644
index 58610539b..000000000
--- a/arch/x86/kernel/cpu/perf_event_amd.c
+++ /dev/null
@@ -1,731 +0,0 @@
-#include <linux/perf_event.h>
-#include <linux/export.h>
-#include <linux/types.h>
-#include <linux/init.h>
-#include <linux/slab.h>
-#include <asm/apicdef.h>
-
-#include "perf_event.h"
-
-static __initconst const u64 amd_hw_cache_event_ids
- [PERF_COUNT_HW_CACHE_MAX]
- [PERF_COUNT_HW_CACHE_OP_MAX]
- [PERF_COUNT_HW_CACHE_RESULT_MAX] =
-{
- [ C(L1D) ] = {
- [ C(OP_READ) ] = {
- [ C(RESULT_ACCESS) ] = 0x0040, /* Data Cache Accesses */
- [ C(RESULT_MISS) ] = 0x0141, /* Data Cache Misses */
- },
- [ C(OP_WRITE) ] = {
- [ C(RESULT_ACCESS) ] = 0,
- [ C(RESULT_MISS) ] = 0,
- },
- [ C(OP_PREFETCH) ] = {
- [ C(RESULT_ACCESS) ] = 0x0267, /* Data Prefetcher :attempts */
- [ C(RESULT_MISS) ] = 0x0167, /* Data Prefetcher :cancelled */
- },
- },
- [ C(L1I ) ] = {
- [ C(OP_READ) ] = {
- [ C(RESULT_ACCESS) ] = 0x0080, /* Instruction cache fetches */
- [ C(RESULT_MISS) ] = 0x0081, /* Instruction cache misses */
- },
- [ C(OP_WRITE) ] = {
- [ C(RESULT_ACCESS) ] = -1,
- [ C(RESULT_MISS) ] = -1,
- },
- [ C(OP_PREFETCH) ] = {
- [ C(RESULT_ACCESS) ] = 0x014B, /* Prefetch Instructions :Load */
- [ C(RESULT_MISS) ] = 0,
- },
- },
- [ C(LL ) ] = {
- [ C(OP_READ) ] = {
- [ C(RESULT_ACCESS) ] = 0x037D, /* Requests to L2 Cache :IC+DC */
- [ C(RESULT_MISS) ] = 0x037E, /* L2 Cache Misses : IC+DC */
- },
- [ C(OP_WRITE) ] = {
- [ C(RESULT_ACCESS) ] = 0x017F, /* L2 Fill/Writeback */
- [ C(RESULT_MISS) ] = 0,
- },
- [ C(OP_PREFETCH) ] = {
- [ C(RESULT_ACCESS) ] = 0,
- [ C(RESULT_MISS) ] = 0,
- },
- },
- [ C(DTLB) ] = {
- [ C(OP_READ) ] = {
- [ C(RESULT_ACCESS) ] = 0x0040, /* Data Cache Accesses */
- [ C(RESULT_MISS) ] = 0x0746, /* L1_DTLB_AND_L2_DLTB_MISS.ALL */
- },
- [ C(OP_WRITE) ] = {
- [ C(RESULT_ACCESS) ] = 0,
- [ C(RESULT_MISS) ] = 0,
- },
- [ C(OP_PREFETCH) ] = {
- [ C(RESULT_ACCESS) ] = 0,
- [ C(RESULT_MISS) ] = 0,
- },
- },
- [ C(ITLB) ] = {
- [ C(OP_READ) ] = {
- [ C(RESULT_ACCESS) ] = 0x0080, /* Instruction fecthes */
- [ C(RESULT_MISS) ] = 0x0385, /* L1_ITLB_AND_L2_ITLB_MISS.ALL */
- },
- [ C(OP_WRITE) ] = {
- [ C(RESULT_ACCESS) ] = -1,
- [ C(RESULT_MISS) ] = -1,
- },
- [ C(OP_PREFETCH) ] = {
- [ C(RESULT_ACCESS) ] = -1,
- [ C(RESULT_MISS) ] = -1,
- },
- },
- [ C(BPU ) ] = {
- [ C(OP_READ) ] = {
- [ C(RESULT_ACCESS) ] = 0x00c2, /* Retired Branch Instr. */
- [ C(RESULT_MISS) ] = 0x00c3, /* Retired Mispredicted BI */
- },
- [ C(OP_WRITE) ] = {
- [ C(RESULT_ACCESS) ] = -1,
- [ C(RESULT_MISS) ] = -1,
- },
- [ C(OP_PREFETCH) ] = {
- [ C(RESULT_ACCESS) ] = -1,
- [ C(RESULT_MISS) ] = -1,
- },
- },
- [ C(NODE) ] = {
- [ C(OP_READ) ] = {
- [ C(RESULT_ACCESS) ] = 0xb8e9, /* CPU Request to Memory, l+r */
- [ C(RESULT_MISS) ] = 0x98e9, /* CPU Request to Memory, r */
- },
- [ C(OP_WRITE) ] = {
- [ C(RESULT_ACCESS) ] = -1,
- [ C(RESULT_MISS) ] = -1,
- },
- [ C(OP_PREFETCH) ] = {
- [ C(RESULT_ACCESS) ] = -1,
- [ C(RESULT_MISS) ] = -1,
- },
- },
-};
-
-/*
- * AMD Performance Monitor K7 and later.
- */
-static const u64 amd_perfmon_event_map[] =
-{
- [PERF_COUNT_HW_CPU_CYCLES] = 0x0076,
- [PERF_COUNT_HW_INSTRUCTIONS] = 0x00c0,
- [PERF_COUNT_HW_CACHE_REFERENCES] = 0x0080,
- [PERF_COUNT_HW_CACHE_MISSES] = 0x0081,
- [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x00c2,
- [PERF_COUNT_HW_BRANCH_MISSES] = 0x00c3,
- [PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = 0x00d0, /* "Decoder empty" event */
- [PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = 0x00d1, /* "Dispatch stalls" event */
-};
-
-static u64 amd_pmu_event_map(int hw_event)
-{
- return amd_perfmon_event_map[hw_event];
-}
-
-/*
- * Previously calculated offsets
- */
-static unsigned int event_offsets[X86_PMC_IDX_MAX] __read_mostly;
-static unsigned int count_offsets[X86_PMC_IDX_MAX] __read_mostly;
-
-/*
- * Legacy CPUs:
- * 4 counters starting at 0xc0010000 each offset by 1
- *
- * CPUs with core performance counter extensions:
- * 6 counters starting at 0xc0010200 each offset by 2
- */
-static inline int amd_pmu_addr_offset(int index, bool eventsel)
-{
- int offset;
-
- if (!index)
- return index;
-
- if (eventsel)
- offset = event_offsets[index];
- else
- offset = count_offsets[index];
-
- if (offset)
- return offset;
-
- if (!boot_cpu_has(X86_FEATURE_PERFCTR_CORE))
- offset = index;
- else
- offset = index << 1;
-
- if (eventsel)
- event_offsets[index] = offset;
- else
- count_offsets[index] = offset;
-
- return offset;
-}
-
-static int amd_core_hw_config(struct perf_event *event)
-{
- if (event->attr.exclude_host && event->attr.exclude_guest)
- /*
- * When HO == GO == 1 the hardware treats that as GO == HO == 0
- * and will count in both modes. We don't want to count in that
- * case so we emulate no-counting by setting US = OS = 0.
- */
- event->hw.config &= ~(ARCH_PERFMON_EVENTSEL_USR |
- ARCH_PERFMON_EVENTSEL_OS);
- else if (event->attr.exclude_host)
- event->hw.config |= AMD64_EVENTSEL_GUESTONLY;
- else if (event->attr.exclude_guest)
- event->hw.config |= AMD64_EVENTSEL_HOSTONLY;
-
- return 0;
-}
-
-/*
- * AMD64 events are detected based on their event codes.
- */
-static inline unsigned int amd_get_event_code(struct hw_perf_event *hwc)
-{
- return ((hwc->config >> 24) & 0x0f00) | (hwc->config & 0x00ff);
-}
-
-static inline int amd_is_nb_event(struct hw_perf_event *hwc)
-{
- return (hwc->config & 0xe0) == 0xe0;
-}
-
-static inline int amd_has_nb(struct cpu_hw_events *cpuc)
-{
- struct amd_nb *nb = cpuc->amd_nb;
-
- return nb && nb->nb_id != -1;
-}
-
-static int amd_pmu_hw_config(struct perf_event *event)
-{
- int ret;
-
- /* pass precise event sampling to ibs: */
- if (event->attr.precise_ip && get_ibs_caps())
- return -ENOENT;
-
- if (has_branch_stack(event))
- return -EOPNOTSUPP;
-
- ret = x86_pmu_hw_config(event);
- if (ret)
- return ret;
-
- if (event->attr.type == PERF_TYPE_RAW)
- event->hw.config |= event->attr.config & AMD64_RAW_EVENT_MASK;
-
- return amd_core_hw_config(event);
-}
-
-static void __amd_put_nb_event_constraints(struct cpu_hw_events *cpuc,
- struct perf_event *event)
-{
- struct amd_nb *nb = cpuc->amd_nb;
- int i;
-
- /*
- * need to scan whole list because event may not have
- * been assigned during scheduling
- *
- * no race condition possible because event can only
- * be removed on one CPU at a time AND PMU is disabled
- * when we come here
- */
- for (i = 0; i < x86_pmu.num_counters; i++) {
- if (cmpxchg(nb->owners + i, event, NULL) == event)
- break;
- }
-}
-
- /*
- * AMD64 NorthBridge events need special treatment because
- * counter access needs to be synchronized across all cores
- * of a package. Refer to BKDG section 3.12
- *
- * NB events are events measuring L3 cache, Hypertransport
- * traffic. They are identified by an event code >= 0xe00.
- * They measure events on the NorthBride which is shared
- * by all cores on a package. NB events are counted on a
- * shared set of counters. When a NB event is programmed
- * in a counter, the data actually comes from a shared
- * counter. Thus, access to those counters needs to be
- * synchronized.
- *
- * We implement the synchronization such that no two cores
- * can be measuring NB events using the same counters. Thus,
- * we maintain a per-NB allocation table. The available slot
- * is propagated using the event_constraint structure.
- *
- * We provide only one choice for each NB event based on
- * the fact that only NB events have restrictions. Consequently,
- * if a counter is available, there is a guarantee the NB event
- * will be assigned to it. If no slot is available, an empty
- * constraint is returned and scheduling will eventually fail
- * for this event.
- *
- * Note that all cores attached the same NB compete for the same
- * counters to host NB events, this is why we use atomic ops. Some
- * multi-chip CPUs may have more than one NB.
- *
- * Given that resources are allocated (cmpxchg), they must be
- * eventually freed for others to use. This is accomplished by
- * calling __amd_put_nb_event_constraints()
- *
- * Non NB events are not impacted by this restriction.
- */
-static struct event_constraint *
-__amd_get_nb_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *event,
- struct event_constraint *c)
-{
- struct hw_perf_event *hwc = &event->hw;
- struct amd_nb *nb = cpuc->amd_nb;
- struct perf_event *old;
- int idx, new = -1;
-
- if (!c)
- c = &unconstrained;
-
- if (cpuc->is_fake)
- return c;
-
- /*
- * detect if already present, if so reuse
- *
- * cannot merge with actual allocation
- * because of possible holes
- *
- * event can already be present yet not assigned (in hwc->idx)
- * because of successive calls to x86_schedule_events() from
- * hw_perf_group_sched_in() without hw_perf_enable()
- */
- for_each_set_bit(idx, c->idxmsk, x86_pmu.num_counters) {
- if (new == -1 || hwc->idx == idx)
- /* assign free slot, prefer hwc->idx */
- old = cmpxchg(nb->owners + idx, NULL, event);
- else if (nb->owners[idx] == event)
- /* event already present */
- old = event;
- else
- continue;
-
- if (old && old != event)
- continue;
-
- /* reassign to this slot */
- if (new != -1)
- cmpxchg(nb->owners + new, event, NULL);
- new = idx;
-
- /* already present, reuse */
- if (old == event)
- break;
- }
-
- if (new == -1)
- return &emptyconstraint;
-
- return &nb->event_constraints[new];
-}
-
-static struct amd_nb *amd_alloc_nb(int cpu)
-{
- struct amd_nb *nb;
- int i;
-
- nb = kzalloc_node(sizeof(struct amd_nb), GFP_KERNEL, cpu_to_node(cpu));
- if (!nb)
- return NULL;
-
- nb->nb_id = -1;
-
- /*
- * initialize all possible NB constraints
- */
- for (i = 0; i < x86_pmu.num_counters; i++) {
- __set_bit(i, nb->event_constraints[i].idxmsk);
- nb->event_constraints[i].weight = 1;
- }
- return nb;
-}
-
-static int amd_pmu_cpu_prepare(int cpu)
-{
- struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu);
-
- WARN_ON_ONCE(cpuc->amd_nb);
-
- if (boot_cpu_data.x86_max_cores < 2)
- return NOTIFY_OK;
-
- cpuc->amd_nb = amd_alloc_nb(cpu);
- if (!cpuc->amd_nb)
- return NOTIFY_BAD;
-
- return NOTIFY_OK;
-}
-
-static void amd_pmu_cpu_starting(int cpu)
-{
- struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu);
- void **onln = &cpuc->kfree_on_online[X86_PERF_KFREE_SHARED];
- struct amd_nb *nb;
- int i, nb_id;
-
- cpuc->perf_ctr_virt_mask = AMD64_EVENTSEL_HOSTONLY;
-
- if (boot_cpu_data.x86_max_cores < 2)
- return;
-
- nb_id = amd_get_nb_id(cpu);
- WARN_ON_ONCE(nb_id == BAD_APICID);
-
- for_each_online_cpu(i) {
- nb = per_cpu(cpu_hw_events, i).amd_nb;
- if (WARN_ON_ONCE(!nb))
- continue;
-
- if (nb->nb_id == nb_id) {
- *onln = cpuc->amd_nb;
- cpuc->amd_nb = nb;
- break;
- }
- }
-
- cpuc->amd_nb->nb_id = nb_id;
- cpuc->amd_nb->refcnt++;
-}
-
-static void amd_pmu_cpu_dead(int cpu)
-{
- struct cpu_hw_events *cpuhw;
-
- if (boot_cpu_data.x86_max_cores < 2)
- return;
-
- cpuhw = &per_cpu(cpu_hw_events, cpu);
-
- if (cpuhw->amd_nb) {
- struct amd_nb *nb = cpuhw->amd_nb;
-
- if (nb->nb_id == -1 || --nb->refcnt == 0)
- kfree(nb);
-
- cpuhw->amd_nb = NULL;
- }
-}
-
-static struct event_constraint *
-amd_get_event_constraints(struct cpu_hw_events *cpuc, int idx,
- struct perf_event *event)
-{
- /*
- * if not NB event or no NB, then no constraints
- */
- if (!(amd_has_nb(cpuc) && amd_is_nb_event(&event->hw)))
- return &unconstrained;
-
- return __amd_get_nb_event_constraints(cpuc, event, NULL);
-}
-
-static void amd_put_event_constraints(struct cpu_hw_events *cpuc,
- struct perf_event *event)
-{
- if (amd_has_nb(cpuc) && amd_is_nb_event(&event->hw))
- __amd_put_nb_event_constraints(cpuc, event);
-}
-
-PMU_FORMAT_ATTR(event, "config:0-7,32-35");
-PMU_FORMAT_ATTR(umask, "config:8-15" );
-PMU_FORMAT_ATTR(edge, "config:18" );
-PMU_FORMAT_ATTR(inv, "config:23" );
-PMU_FORMAT_ATTR(cmask, "config:24-31" );
-
-static struct attribute *amd_format_attr[] = {
- &format_attr_event.attr,
- &format_attr_umask.attr,
- &format_attr_edge.attr,
- &format_attr_inv.attr,
- &format_attr_cmask.attr,
- NULL,
-};
-
-/* AMD Family 15h */
-
-#define AMD_EVENT_TYPE_MASK 0x000000F0ULL
-
-#define AMD_EVENT_FP 0x00000000ULL ... 0x00000010ULL
-#define AMD_EVENT_LS 0x00000020ULL ... 0x00000030ULL
-#define AMD_EVENT_DC 0x00000040ULL ... 0x00000050ULL
-#define AMD_EVENT_CU 0x00000060ULL ... 0x00000070ULL
-#define AMD_EVENT_IC_DE 0x00000080ULL ... 0x00000090ULL
-#define AMD_EVENT_EX_LS 0x000000C0ULL
-#define AMD_EVENT_DE 0x000000D0ULL
-#define AMD_EVENT_NB 0x000000E0ULL ... 0x000000F0ULL
-
-/*
- * AMD family 15h event code/PMC mappings:
- *
- * type = event_code & 0x0F0:
- *
- * 0x000 FP PERF_CTL[5:3]
- * 0x010 FP PERF_CTL[5:3]
- * 0x020 LS PERF_CTL[5:0]
- * 0x030 LS PERF_CTL[5:0]
- * 0x040 DC PERF_CTL[5:0]
- * 0x050 DC PERF_CTL[5:0]
- * 0x060 CU PERF_CTL[2:0]
- * 0x070 CU PERF_CTL[2:0]
- * 0x080 IC/DE PERF_CTL[2:0]
- * 0x090 IC/DE PERF_CTL[2:0]
- * 0x0A0 ---
- * 0x0B0 ---
- * 0x0C0 EX/LS PERF_CTL[5:0]
- * 0x0D0 DE PERF_CTL[2:0]
- * 0x0E0 NB NB_PERF_CTL[3:0]
- * 0x0F0 NB NB_PERF_CTL[3:0]
- *
- * Exceptions:
- *
- * 0x000 FP PERF_CTL[3], PERF_CTL[5:3] (*)
- * 0x003 FP PERF_CTL[3]
- * 0x004 FP PERF_CTL[3], PERF_CTL[5:3] (*)
- * 0x00B FP PERF_CTL[3]
- * 0x00D FP PERF_CTL[3]
- * 0x023 DE PERF_CTL[2:0]
- * 0x02D LS PERF_CTL[3]
- * 0x02E LS PERF_CTL[3,0]
- * 0x031 LS PERF_CTL[2:0] (**)
- * 0x043 CU PERF_CTL[2:0]
- * 0x045 CU PERF_CTL[2:0]
- * 0x046 CU PERF_CTL[2:0]
- * 0x054 CU PERF_CTL[2:0]
- * 0x055 CU PERF_CTL[2:0]
- * 0x08F IC PERF_CTL[0]
- * 0x187 DE PERF_CTL[0]
- * 0x188 DE PERF_CTL[0]
- * 0x0DB EX PERF_CTL[5:0]
- * 0x0DC LS PERF_CTL[5:0]
- * 0x0DD LS PERF_CTL[5:0]
- * 0x0DE LS PERF_CTL[5:0]
- * 0x0DF LS PERF_CTL[5:0]
- * 0x1C0 EX PERF_CTL[5:3]
- * 0x1D6 EX PERF_CTL[5:0]
- * 0x1D8 EX PERF_CTL[5:0]
- *
- * (*) depending on the umask all FPU counters may be used
- * (**) only one unitmask enabled at a time
- */
-
-static struct event_constraint amd_f15_PMC0 = EVENT_CONSTRAINT(0, 0x01, 0);
-static struct event_constraint amd_f15_PMC20 = EVENT_CONSTRAINT(0, 0x07, 0);
-static struct event_constraint amd_f15_PMC3 = EVENT_CONSTRAINT(0, 0x08, 0);
-static struct event_constraint amd_f15_PMC30 = EVENT_CONSTRAINT_OVERLAP(0, 0x09, 0);
-static struct event_constraint amd_f15_PMC50 = EVENT_CONSTRAINT(0, 0x3F, 0);
-static struct event_constraint amd_f15_PMC53 = EVENT_CONSTRAINT(0, 0x38, 0);
-
-static struct event_constraint *
-amd_get_event_constraints_f15h(struct cpu_hw_events *cpuc, int idx,
- struct perf_event *event)
-{
- struct hw_perf_event *hwc = &event->hw;
- unsigned int event_code = amd_get_event_code(hwc);
-
- switch (event_code & AMD_EVENT_TYPE_MASK) {
- case AMD_EVENT_FP:
- switch (event_code) {
- case 0x000:
- if (!(hwc->config & 0x0000F000ULL))
- break;
- if (!(hwc->config & 0x00000F00ULL))
- break;
- return &amd_f15_PMC3;
- case 0x004:
- if (hweight_long(hwc->config & ARCH_PERFMON_EVENTSEL_UMASK) <= 1)
- break;
- return &amd_f15_PMC3;
- case 0x003:
- case 0x00B:
- case 0x00D:
- return &amd_f15_PMC3;
- }
- return &amd_f15_PMC53;
- case AMD_EVENT_LS:
- case AMD_EVENT_DC:
- case AMD_EVENT_EX_LS:
- switch (event_code) {
- case 0x023:
- case 0x043:
- case 0x045:
- case 0x046:
- case 0x054:
- case 0x055:
- return &amd_f15_PMC20;
- case 0x02D:
- return &amd_f15_PMC3;
- case 0x02E:
- return &amd_f15_PMC30;
- case 0x031:
- if (hweight_long(hwc->config & ARCH_PERFMON_EVENTSEL_UMASK) <= 1)
- return &amd_f15_PMC20;
- return &emptyconstraint;
- case 0x1C0:
- return &amd_f15_PMC53;
- default:
- return &amd_f15_PMC50;
- }
- case AMD_EVENT_CU:
- case AMD_EVENT_IC_DE:
- case AMD_EVENT_DE:
- switch (event_code) {
- case 0x08F:
- case 0x187:
- case 0x188:
- return &amd_f15_PMC0;
- case 0x0DB ... 0x0DF:
- case 0x1D6:
- case 0x1D8:
- return &amd_f15_PMC50;
- default:
- return &amd_f15_PMC20;
- }
- case AMD_EVENT_NB:
- /* moved to perf_event_amd_uncore.c */
- return &emptyconstraint;
- default:
- return &emptyconstraint;
- }
-}
-
-static ssize_t amd_event_sysfs_show(char *page, u64 config)
-{
- u64 event = (config & ARCH_PERFMON_EVENTSEL_EVENT) |
- (config & AMD64_EVENTSEL_EVENT) >> 24;
-
- return x86_event_sysfs_show(page, config, event);
-}
-
-static __initconst const struct x86_pmu amd_pmu = {
- .name = "AMD",
- .handle_irq = x86_pmu_handle_irq,
- .disable_all = x86_pmu_disable_all,
- .enable_all = x86_pmu_enable_all,
- .enable = x86_pmu_enable_event,
- .disable = x86_pmu_disable_event,
- .hw_config = amd_pmu_hw_config,
- .schedule_events = x86_schedule_events,
- .eventsel = MSR_K7_EVNTSEL0,
- .perfctr = MSR_K7_PERFCTR0,
- .addr_offset = amd_pmu_addr_offset,
- .event_map = amd_pmu_event_map,
- .max_events = ARRAY_SIZE(amd_perfmon_event_map),
- .num_counters = AMD64_NUM_COUNTERS,
- .cntval_bits = 48,
- .cntval_mask = (1ULL << 48) - 1,
- .apic = 1,
- /* use highest bit to detect overflow */
- .max_period = (1ULL << 47) - 1,
- .get_event_constraints = amd_get_event_constraints,
- .put_event_constraints = amd_put_event_constraints,
-
- .format_attrs = amd_format_attr,
- .events_sysfs_show = amd_event_sysfs_show,
-
- .cpu_prepare = amd_pmu_cpu_prepare,
- .cpu_starting = amd_pmu_cpu_starting,
- .cpu_dead = amd_pmu_cpu_dead,
-};
-
-static int __init amd_core_pmu_init(void)
-{
- if (!boot_cpu_has(X86_FEATURE_PERFCTR_CORE))
- return 0;
-
- switch (boot_cpu_data.x86) {
- case 0x15:
- pr_cont("Fam15h ");
- x86_pmu.get_event_constraints = amd_get_event_constraints_f15h;
- break;
-
- default:
- pr_err("core perfctr but no constraints; unknown hardware!\n");
- return -ENODEV;
- }
-
- /*
- * If core performance counter extensions exists, we must use
- * MSR_F15H_PERF_CTL/MSR_F15H_PERF_CTR msrs. See also
- * amd_pmu_addr_offset().
- */
- x86_pmu.eventsel = MSR_F15H_PERF_CTL;
- x86_pmu.perfctr = MSR_F15H_PERF_CTR;
- x86_pmu.num_counters = AMD64_NUM_COUNTERS_CORE;
-
- pr_cont("core perfctr, ");
- return 0;
-}
-
-__init int amd_pmu_init(void)
-{
- int ret;
-
- /* Performance-monitoring supported from K7 and later: */
- if (boot_cpu_data.x86 < 6)
- return -ENODEV;
-
- x86_pmu = amd_pmu;
-
- ret = amd_core_pmu_init();
- if (ret)
- return ret;
-
- /* Events are common for all AMDs */
- memcpy(hw_cache_event_ids, amd_hw_cache_event_ids,
- sizeof(hw_cache_event_ids));
-
- return 0;
-}
-
-void amd_pmu_enable_virt(void)
-{
- struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
-
- cpuc->perf_ctr_virt_mask = 0;
-
- /* Reload all events */
- x86_pmu_disable_all();
- x86_pmu_enable_all(0);
-}
-EXPORT_SYMBOL_GPL(amd_pmu_enable_virt);
-
-void amd_pmu_disable_virt(void)
-{
- struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
-
- /*
- * We only mask out the Host-only bit so that host-only counting works
- * when SVM is disabled. If someone sets up a guest-only counter when
- * SVM is disabled the Guest-only bits still gets set and the counter
- * will not count anything.
- */
- cpuc->perf_ctr_virt_mask = AMD64_EVENTSEL_HOSTONLY;
-
- /* Reload all events */
- x86_pmu_disable_all();
- x86_pmu_enable_all(0);
-}
-EXPORT_SYMBOL_GPL(amd_pmu_disable_virt);
diff --git a/arch/x86/kernel/cpu/perf_event_amd_ibs.c b/arch/x86/kernel/cpu/perf_event_amd_ibs.c
deleted file mode 100644
index 989d3c215..000000000
--- a/arch/x86/kernel/cpu/perf_event_amd_ibs.c
+++ /dev/null
@@ -1,959 +0,0 @@
-/*
- * Performance events - AMD IBS
- *
- * Copyright (C) 2011 Advanced Micro Devices, Inc., Robert Richter
- *
- * For licencing details see kernel-base/COPYING
- */
-
-#include <linux/perf_event.h>
-#include <linux/module.h>
-#include <linux/pci.h>
-#include <linux/ptrace.h>
-#include <linux/syscore_ops.h>
-
-#include <asm/apic.h>
-
-#include "perf_event.h"
-
-static u32 ibs_caps;
-
-#if defined(CONFIG_PERF_EVENTS) && defined(CONFIG_CPU_SUP_AMD)
-
-#include <linux/kprobes.h>
-#include <linux/hardirq.h>
-
-#include <asm/nmi.h>
-
-#define IBS_FETCH_CONFIG_MASK (IBS_FETCH_RAND_EN | IBS_FETCH_MAX_CNT)
-#define IBS_OP_CONFIG_MASK IBS_OP_MAX_CNT
-
-enum ibs_states {
- IBS_ENABLED = 0,
- IBS_STARTED = 1,
- IBS_STOPPING = 2,
-
- IBS_MAX_STATES,
-};
-
-struct cpu_perf_ibs {
- struct perf_event *event;
- unsigned long state[BITS_TO_LONGS(IBS_MAX_STATES)];
-};
-
-struct perf_ibs {
- struct pmu pmu;
- unsigned int msr;
- u64 config_mask;
- u64 cnt_mask;
- u64 enable_mask;
- u64 valid_mask;
- u64 max_period;
- unsigned long offset_mask[1];
- int offset_max;
- struct cpu_perf_ibs __percpu *pcpu;
-
- struct attribute **format_attrs;
- struct attribute_group format_group;
- const struct attribute_group *attr_groups[2];
-
- u64 (*get_count)(u64 config);
-};
-
-struct perf_ibs_data {
- u32 size;
- union {
- u32 data[0]; /* data buffer starts here */
- u32 caps;
- };
- u64 regs[MSR_AMD64_IBS_REG_COUNT_MAX];
-};
-
-static int
-perf_event_set_period(struct hw_perf_event *hwc, u64 min, u64 max, u64 *hw_period)
-{
- s64 left = local64_read(&hwc->period_left);
- s64 period = hwc->sample_period;
- int overflow = 0;
-
- /*
- * If we are way outside a reasonable range then just skip forward:
- */
- if (unlikely(left <= -period)) {
- left = period;
- local64_set(&hwc->period_left, left);
- hwc->last_period = period;
- overflow = 1;
- }
-
- if (unlikely(left < (s64)min)) {
- left += period;
- local64_set(&hwc->period_left, left);
- hwc->last_period = period;
- overflow = 1;
- }
-
- /*
- * If the hw period that triggers the sw overflow is too short
- * we might hit the irq handler. This biases the results.
- * Thus we shorten the next-to-last period and set the last
- * period to the max period.
- */
- if (left > max) {
- left -= max;
- if (left > max)
- left = max;
- else if (left < min)
- left = min;
- }
-
- *hw_period = (u64)left;
-
- return overflow;
-}
-
-static int
-perf_event_try_update(struct perf_event *event, u64 new_raw_count, int width)
-{
- struct hw_perf_event *hwc = &event->hw;
- int shift = 64 - width;
- u64 prev_raw_count;
- u64 delta;
-
- /*
- * Careful: an NMI might modify the previous event value.
- *
- * Our tactic to handle this is to first atomically read and
- * exchange a new raw count - then add that new-prev delta
- * count to the generic event atomically:
- */
- prev_raw_count = local64_read(&hwc->prev_count);
- if (local64_cmpxchg(&hwc->prev_count, prev_raw_count,
- new_raw_count) != prev_raw_count)
- return 0;
-
- /*
- * Now we have the new raw value and have updated the prev
- * timestamp already. We can now calculate the elapsed delta
- * (event-)time and add that to the generic event.
- *
- * Careful, not all hw sign-extends above the physical width
- * of the count.
- */
- delta = (new_raw_count << shift) - (prev_raw_count << shift);
- delta >>= shift;
-
- local64_add(delta, &event->count);
- local64_sub(delta, &hwc->period_left);
-
- return 1;
-}
-
-static struct perf_ibs perf_ibs_fetch;
-static struct perf_ibs perf_ibs_op;
-
-static struct perf_ibs *get_ibs_pmu(int type)
-{
- if (perf_ibs_fetch.pmu.type == type)
- return &perf_ibs_fetch;
- if (perf_ibs_op.pmu.type == type)
- return &perf_ibs_op;
- return NULL;
-}
-
-/*
- * Use IBS for precise event sampling:
- *
- * perf record -a -e cpu-cycles:p ... # use ibs op counting cycle count
- * perf record -a -e r076:p ... # same as -e cpu-cycles:p
- * perf record -a -e r0C1:p ... # use ibs op counting micro-ops
- *
- * IbsOpCntCtl (bit 19) of IBS Execution Control Register (IbsOpCtl,
- * MSRC001_1033) is used to select either cycle or micro-ops counting
- * mode.
- *
- * The rip of IBS samples has skid 0. Thus, IBS supports precise
- * levels 1 and 2 and the PERF_EFLAGS_EXACT is set. In rare cases the
- * rip is invalid when IBS was not able to record the rip correctly.
- * We clear PERF_EFLAGS_EXACT and take the rip from pt_regs then.
- *
- */
-static int perf_ibs_precise_event(struct perf_event *event, u64 *config)
-{
- switch (event->attr.precise_ip) {
- case 0:
- return -ENOENT;
- case 1:
- case 2:
- break;
- default:
- return -EOPNOTSUPP;
- }
-
- switch (event->attr.type) {
- case PERF_TYPE_HARDWARE:
- switch (event->attr.config) {
- case PERF_COUNT_HW_CPU_CYCLES:
- *config = 0;
- return 0;
- }
- break;
- case PERF_TYPE_RAW:
- switch (event->attr.config) {
- case 0x0076:
- *config = 0;
- return 0;
- case 0x00C1:
- *config = IBS_OP_CNT_CTL;
- return 0;
- }
- break;
- default:
- return -ENOENT;
- }
-
- return -EOPNOTSUPP;
-}
-
-static const struct perf_event_attr ibs_notsupp = {
- .exclude_user = 1,
- .exclude_kernel = 1,
- .exclude_hv = 1,
- .exclude_idle = 1,
- .exclude_host = 1,
- .exclude_guest = 1,
-};
-
-static int perf_ibs_init(struct perf_event *event)
-{
- struct hw_perf_event *hwc = &event->hw;
- struct perf_ibs *perf_ibs;
- u64 max_cnt, config;
- int ret;
-
- perf_ibs = get_ibs_pmu(event->attr.type);
- if (perf_ibs) {
- config = event->attr.config;
- } else {
- perf_ibs = &perf_ibs_op;
- ret = perf_ibs_precise_event(event, &config);
- if (ret)
- return ret;
- }
-
- if (event->pmu != &perf_ibs->pmu)
- return -ENOENT;
-
- if (perf_flags(&event->attr) & perf_flags(&ibs_notsupp))
- return -EINVAL;
-
- if (config & ~perf_ibs->config_mask)
- return -EINVAL;
-
- if (hwc->sample_period) {
- if (config & perf_ibs->cnt_mask)
- /* raw max_cnt may not be set */
- return -EINVAL;
- if (!event->attr.sample_freq && hwc->sample_period & 0x0f)
- /*
- * lower 4 bits can not be set in ibs max cnt,
- * but allowing it in case we adjust the
- * sample period to set a frequency.
- */
- return -EINVAL;
- hwc->sample_period &= ~0x0FULL;
- if (!hwc->sample_period)
- hwc->sample_period = 0x10;
- } else {
- max_cnt = config & perf_ibs->cnt_mask;
- config &= ~perf_ibs->cnt_mask;
- event->attr.sample_period = max_cnt << 4;
- hwc->sample_period = event->attr.sample_period;
- }
-
- if (!hwc->sample_period)
- return -EINVAL;
-
- /*
- * If we modify hwc->sample_period, we also need to update
- * hwc->last_period and hwc->period_left.
- */
- hwc->last_period = hwc->sample_period;
- local64_set(&hwc->period_left, hwc->sample_period);
-
- hwc->config_base = perf_ibs->msr;
- hwc->config = config;
-
- return 0;
-}
-
-static int perf_ibs_set_period(struct perf_ibs *perf_ibs,
- struct hw_perf_event *hwc, u64 *period)
-{
- int overflow;
-
- /* ignore lower 4 bits in min count: */
- overflow = perf_event_set_period(hwc, 1<<4, perf_ibs->max_period, period);
- local64_set(&hwc->prev_count, 0);
-
- return overflow;
-}
-
-static u64 get_ibs_fetch_count(u64 config)
-{
- return (config & IBS_FETCH_CNT) >> 12;
-}
-
-static u64 get_ibs_op_count(u64 config)
-{
- u64 count = 0;
-
- if (config & IBS_OP_VAL)
- count += (config & IBS_OP_MAX_CNT) << 4; /* cnt rolled over */
-
- if (ibs_caps & IBS_CAPS_RDWROPCNT)
- count += (config & IBS_OP_CUR_CNT) >> 32;
-
- return count;
-}
-
-static void
-perf_ibs_event_update(struct perf_ibs *perf_ibs, struct perf_event *event,
- u64 *config)
-{
- u64 count = perf_ibs->get_count(*config);
-
- /*
- * Set width to 64 since we do not overflow on max width but
- * instead on max count. In perf_ibs_set_period() we clear
- * prev count manually on overflow.
- */
- while (!perf_event_try_update(event, count, 64)) {
- rdmsrl(event->hw.config_base, *config);
- count = perf_ibs->get_count(*config);
- }
-}
-
-static inline void perf_ibs_enable_event(struct perf_ibs *perf_ibs,
- struct hw_perf_event *hwc, u64 config)
-{
- wrmsrl(hwc->config_base, hwc->config | config | perf_ibs->enable_mask);
-}
-
-/*
- * Erratum #420 Instruction-Based Sampling Engine May Generate
- * Interrupt that Cannot Be Cleared:
- *
- * Must clear counter mask first, then clear the enable bit. See
- * Revision Guide for AMD Family 10h Processors, Publication #41322.
- */
-static inline void perf_ibs_disable_event(struct perf_ibs *perf_ibs,
- struct hw_perf_event *hwc, u64 config)
-{
- config &= ~perf_ibs->cnt_mask;
- wrmsrl(hwc->config_base, config);
- config &= ~perf_ibs->enable_mask;
- wrmsrl(hwc->config_base, config);
-}
-
-/*
- * We cannot restore the ibs pmu state, so we always needs to update
- * the event while stopping it and then reset the state when starting
- * again. Thus, ignoring PERF_EF_RELOAD and PERF_EF_UPDATE flags in
- * perf_ibs_start()/perf_ibs_stop() and instead always do it.
- */
-static void perf_ibs_start(struct perf_event *event, int flags)
-{
- struct hw_perf_event *hwc = &event->hw;
- struct perf_ibs *perf_ibs = container_of(event->pmu, struct perf_ibs, pmu);
- struct cpu_perf_ibs *pcpu = this_cpu_ptr(perf_ibs->pcpu);
- u64 period;
-
- if (WARN_ON_ONCE(!(hwc->state & PERF_HES_STOPPED)))
- return;
-
- WARN_ON_ONCE(!(hwc->state & PERF_HES_UPTODATE));
- hwc->state = 0;
-
- perf_ibs_set_period(perf_ibs, hwc, &period);
- set_bit(IBS_STARTED, pcpu->state);
- perf_ibs_enable_event(perf_ibs, hwc, period >> 4);
-
- perf_event_update_userpage(event);
-}
-
-static void perf_ibs_stop(struct perf_event *event, int flags)
-{
- struct hw_perf_event *hwc = &event->hw;
- struct perf_ibs *perf_ibs = container_of(event->pmu, struct perf_ibs, pmu);
- struct cpu_perf_ibs *pcpu = this_cpu_ptr(perf_ibs->pcpu);
- u64 config;
- int stopping;
-
- stopping = test_and_clear_bit(IBS_STARTED, pcpu->state);
-
- if (!stopping && (hwc->state & PERF_HES_UPTODATE))
- return;
-
- rdmsrl(hwc->config_base, config);
-
- if (stopping) {
- set_bit(IBS_STOPPING, pcpu->state);
- perf_ibs_disable_event(perf_ibs, hwc, config);
- WARN_ON_ONCE(hwc->state & PERF_HES_STOPPED);
- hwc->state |= PERF_HES_STOPPED;
- }
-
- if (hwc->state & PERF_HES_UPTODATE)
- return;
-
- /*
- * Clear valid bit to not count rollovers on update, rollovers
- * are only updated in the irq handler.
- */
- config &= ~perf_ibs->valid_mask;
-
- perf_ibs_event_update(perf_ibs, event, &config);
- hwc->state |= PERF_HES_UPTODATE;
-}
-
-static int perf_ibs_add(struct perf_event *event, int flags)
-{
- struct perf_ibs *perf_ibs = container_of(event->pmu, struct perf_ibs, pmu);
- struct cpu_perf_ibs *pcpu = this_cpu_ptr(perf_ibs->pcpu);
-
- if (test_and_set_bit(IBS_ENABLED, pcpu->state))
- return -ENOSPC;
-
- event->hw.state = PERF_HES_UPTODATE | PERF_HES_STOPPED;
-
- pcpu->event = event;
-
- if (flags & PERF_EF_START)
- perf_ibs_start(event, PERF_EF_RELOAD);
-
- return 0;
-}
-
-static void perf_ibs_del(struct perf_event *event, int flags)
-{
- struct perf_ibs *perf_ibs = container_of(event->pmu, struct perf_ibs, pmu);
- struct cpu_perf_ibs *pcpu = this_cpu_ptr(perf_ibs->pcpu);
-
- if (!test_and_clear_bit(IBS_ENABLED, pcpu->state))
- return;
-
- perf_ibs_stop(event, PERF_EF_UPDATE);
-
- pcpu->event = NULL;
-
- perf_event_update_userpage(event);
-}
-
-static void perf_ibs_read(struct perf_event *event) { }
-
-PMU_FORMAT_ATTR(rand_en, "config:57");
-PMU_FORMAT_ATTR(cnt_ctl, "config:19");
-
-static struct attribute *ibs_fetch_format_attrs[] = {
- &format_attr_rand_en.attr,
- NULL,
-};
-
-static struct attribute *ibs_op_format_attrs[] = {
- NULL, /* &format_attr_cnt_ctl.attr if IBS_CAPS_OPCNT */
- NULL,
-};
-
-static struct perf_ibs perf_ibs_fetch = {
- .pmu = {
- .task_ctx_nr = perf_invalid_context,
-
- .event_init = perf_ibs_init,
- .add = perf_ibs_add,
- .del = perf_ibs_del,
- .start = perf_ibs_start,
- .stop = perf_ibs_stop,
- .read = perf_ibs_read,
- },
- .msr = MSR_AMD64_IBSFETCHCTL,
- .config_mask = IBS_FETCH_CONFIG_MASK,
- .cnt_mask = IBS_FETCH_MAX_CNT,
- .enable_mask = IBS_FETCH_ENABLE,
- .valid_mask = IBS_FETCH_VAL,
- .max_period = IBS_FETCH_MAX_CNT << 4,
- .offset_mask = { MSR_AMD64_IBSFETCH_REG_MASK },
- .offset_max = MSR_AMD64_IBSFETCH_REG_COUNT,
- .format_attrs = ibs_fetch_format_attrs,
-
- .get_count = get_ibs_fetch_count,
-};
-
-static struct perf_ibs perf_ibs_op = {
- .pmu = {
- .task_ctx_nr = perf_invalid_context,
-
- .event_init = perf_ibs_init,
- .add = perf_ibs_add,
- .del = perf_ibs_del,
- .start = perf_ibs_start,
- .stop = perf_ibs_stop,
- .read = perf_ibs_read,
- },
- .msr = MSR_AMD64_IBSOPCTL,
- .config_mask = IBS_OP_CONFIG_MASK,
- .cnt_mask = IBS_OP_MAX_CNT,
- .enable_mask = IBS_OP_ENABLE,
- .valid_mask = IBS_OP_VAL,
- .max_period = IBS_OP_MAX_CNT << 4,
- .offset_mask = { MSR_AMD64_IBSOP_REG_MASK },
- .offset_max = MSR_AMD64_IBSOP_REG_COUNT,
- .format_attrs = ibs_op_format_attrs,
-
- .get_count = get_ibs_op_count,
-};
-
-static int perf_ibs_handle_irq(struct perf_ibs *perf_ibs, struct pt_regs *iregs)
-{
- struct cpu_perf_ibs *pcpu = this_cpu_ptr(perf_ibs->pcpu);
- struct perf_event *event = pcpu->event;
- struct hw_perf_event *hwc = &event->hw;
- struct perf_sample_data data;
- struct perf_raw_record raw;
- struct pt_regs regs;
- struct perf_ibs_data ibs_data;
- int offset, size, check_rip, offset_max, throttle = 0;
- unsigned int msr;
- u64 *buf, *config, period;
-
- if (!test_bit(IBS_STARTED, pcpu->state)) {
- /*
- * Catch spurious interrupts after stopping IBS: After
- * disabling IBS there could be still incoming NMIs
- * with samples that even have the valid bit cleared.
- * Mark all this NMIs as handled.
- */
- return test_and_clear_bit(IBS_STOPPING, pcpu->state) ? 1 : 0;
- }
-
- msr = hwc->config_base;
- buf = ibs_data.regs;
- rdmsrl(msr, *buf);
- if (!(*buf++ & perf_ibs->valid_mask))
- return 0;
-
- config = &ibs_data.regs[0];
- perf_ibs_event_update(perf_ibs, event, config);
- perf_sample_data_init(&data, 0, hwc->last_period);
- if (!perf_ibs_set_period(perf_ibs, hwc, &period))
- goto out; /* no sw counter overflow */
-
- ibs_data.caps = ibs_caps;
- size = 1;
- offset = 1;
- check_rip = (perf_ibs == &perf_ibs_op && (ibs_caps & IBS_CAPS_RIPINVALIDCHK));
- if (event->attr.sample_type & PERF_SAMPLE_RAW)
- offset_max = perf_ibs->offset_max;
- else if (check_rip)
- offset_max = 2;
- else
- offset_max = 1;
- do {
- rdmsrl(msr + offset, *buf++);
- size++;
- offset = find_next_bit(perf_ibs->offset_mask,
- perf_ibs->offset_max,
- offset + 1);
- } while (offset < offset_max);
- if (event->attr.sample_type & PERF_SAMPLE_RAW) {
- /*
- * Read IbsBrTarget and IbsOpData4 separately
- * depending on their availability.
- * Can't add to offset_max as they are staggered
- */
- if (ibs_caps & IBS_CAPS_BRNTRGT) {
- rdmsrl(MSR_AMD64_IBSBRTARGET, *buf++);
- size++;
- }
- if (ibs_caps & IBS_CAPS_OPDATA4) {
- rdmsrl(MSR_AMD64_IBSOPDATA4, *buf++);
- size++;
- }
- }
- ibs_data.size = sizeof(u64) * size;
-
- regs = *iregs;
- if (check_rip && (ibs_data.regs[2] & IBS_RIP_INVALID)) {
- regs.flags &= ~PERF_EFLAGS_EXACT;
- } else {
- set_linear_ip(&regs, ibs_data.regs[1]);
- regs.flags |= PERF_EFLAGS_EXACT;
- }
-
- if (event->attr.sample_type & PERF_SAMPLE_RAW) {
- raw.size = sizeof(u32) + ibs_data.size;
- raw.data = ibs_data.data;
- data.raw = &raw;
- }
-
- throttle = perf_event_overflow(event, &data, &regs);
-out:
- if (throttle)
- perf_ibs_disable_event(perf_ibs, hwc, *config);
- else
- perf_ibs_enable_event(perf_ibs, hwc, period >> 4);
-
- perf_event_update_userpage(event);
-
- return 1;
-}
-
-static int
-perf_ibs_nmi_handler(unsigned int cmd, struct pt_regs *regs)
-{
- int handled = 0;
-
- handled += perf_ibs_handle_irq(&perf_ibs_fetch, regs);
- handled += perf_ibs_handle_irq(&perf_ibs_op, regs);
-
- if (handled)
- inc_irq_stat(apic_perf_irqs);
-
- return handled;
-}
-NOKPROBE_SYMBOL(perf_ibs_nmi_handler);
-
-static __init int perf_ibs_pmu_init(struct perf_ibs *perf_ibs, char *name)
-{
- struct cpu_perf_ibs __percpu *pcpu;
- int ret;
-
- pcpu = alloc_percpu(struct cpu_perf_ibs);
- if (!pcpu)
- return -ENOMEM;
-
- perf_ibs->pcpu = pcpu;
-
- /* register attributes */
- if (perf_ibs->format_attrs[0]) {
- memset(&perf_ibs->format_group, 0, sizeof(perf_ibs->format_group));
- perf_ibs->format_group.name = "format";
- perf_ibs->format_group.attrs = perf_ibs->format_attrs;
-
- memset(&perf_ibs->attr_groups, 0, sizeof(perf_ibs->attr_groups));
- perf_ibs->attr_groups[0] = &perf_ibs->format_group;
- perf_ibs->pmu.attr_groups = perf_ibs->attr_groups;
- }
-
- ret = perf_pmu_register(&perf_ibs->pmu, name, -1);
- if (ret) {
- perf_ibs->pcpu = NULL;
- free_percpu(pcpu);
- }
-
- return ret;
-}
-
-static __init int perf_event_ibs_init(void)
-{
- struct attribute **attr = ibs_op_format_attrs;
-
- if (!ibs_caps)
- return -ENODEV; /* ibs not supported by the cpu */
-
- perf_ibs_pmu_init(&perf_ibs_fetch, "ibs_fetch");
-
- if (ibs_caps & IBS_CAPS_OPCNT) {
- perf_ibs_op.config_mask |= IBS_OP_CNT_CTL;
- *attr++ = &format_attr_cnt_ctl.attr;
- }
- perf_ibs_pmu_init(&perf_ibs_op, "ibs_op");
-
- register_nmi_handler(NMI_LOCAL, perf_ibs_nmi_handler, 0, "perf_ibs");
- printk(KERN_INFO "perf: AMD IBS detected (0x%08x)\n", ibs_caps);
-
- return 0;
-}
-
-#else /* defined(CONFIG_PERF_EVENTS) && defined(CONFIG_CPU_SUP_AMD) */
-
-static __init int perf_event_ibs_init(void) { return 0; }
-
-#endif
-
-/* IBS - apic initialization, for perf and oprofile */
-
-static __init u32 __get_ibs_caps(void)
-{
- u32 caps;
- unsigned int max_level;
-
- if (!boot_cpu_has(X86_FEATURE_IBS))
- return 0;
-
- /* check IBS cpuid feature flags */
- max_level = cpuid_eax(0x80000000);
- if (max_level < IBS_CPUID_FEATURES)
- return IBS_CAPS_DEFAULT;
-
- caps = cpuid_eax(IBS_CPUID_FEATURES);
- if (!(caps & IBS_CAPS_AVAIL))
- /* cpuid flags not valid */
- return IBS_CAPS_DEFAULT;
-
- return caps;
-}
-
-u32 get_ibs_caps(void)
-{
- return ibs_caps;
-}
-
-EXPORT_SYMBOL(get_ibs_caps);
-
-static inline int get_eilvt(int offset)
-{
- return !setup_APIC_eilvt(offset, 0, APIC_EILVT_MSG_NMI, 1);
-}
-
-static inline int put_eilvt(int offset)
-{
- return !setup_APIC_eilvt(offset, 0, 0, 1);
-}
-
-/*
- * Check and reserve APIC extended interrupt LVT offset for IBS if available.
- */
-static inline int ibs_eilvt_valid(void)
-{
- int offset;
- u64 val;
- int valid = 0;
-
- preempt_disable();
-
- rdmsrl(MSR_AMD64_IBSCTL, val);
- offset = val & IBSCTL_LVT_OFFSET_MASK;
-
- if (!(val & IBSCTL_LVT_OFFSET_VALID)) {
- pr_err(FW_BUG "cpu %d, invalid IBS interrupt offset %d (MSR%08X=0x%016llx)\n",
- smp_processor_id(), offset, MSR_AMD64_IBSCTL, val);
- goto out;
- }
-
- if (!get_eilvt(offset)) {
- pr_err(FW_BUG "cpu %d, IBS interrupt offset %d not available (MSR%08X=0x%016llx)\n",
- smp_processor_id(), offset, MSR_AMD64_IBSCTL, val);
- goto out;
- }
-
- valid = 1;
-out:
- preempt_enable();
-
- return valid;
-}
-
-static int setup_ibs_ctl(int ibs_eilvt_off)
-{
- struct pci_dev *cpu_cfg;
- int nodes;
- u32 value = 0;
-
- nodes = 0;
- cpu_cfg = NULL;
- do {
- cpu_cfg = pci_get_device(PCI_VENDOR_ID_AMD,
- PCI_DEVICE_ID_AMD_10H_NB_MISC,
- cpu_cfg);
- if (!cpu_cfg)
- break;
- ++nodes;
- pci_write_config_dword(cpu_cfg, IBSCTL, ibs_eilvt_off
- | IBSCTL_LVT_OFFSET_VALID);
- pci_read_config_dword(cpu_cfg, IBSCTL, &value);
- if (value != (ibs_eilvt_off | IBSCTL_LVT_OFFSET_VALID)) {
- pci_dev_put(cpu_cfg);
- printk(KERN_DEBUG "Failed to setup IBS LVT offset, "
- "IBSCTL = 0x%08x\n", value);
- return -EINVAL;
- }
- } while (1);
-
- if (!nodes) {
- printk(KERN_DEBUG "No CPU node configured for IBS\n");
- return -ENODEV;
- }
-
- return 0;
-}
-
-/*
- * This runs only on the current cpu. We try to find an LVT offset and
- * setup the local APIC. For this we must disable preemption. On
- * success we initialize all nodes with this offset. This updates then
- * the offset in the IBS_CTL per-node msr. The per-core APIC setup of
- * the IBS interrupt vector is handled by perf_ibs_cpu_notifier that
- * is using the new offset.
- */
-static void force_ibs_eilvt_setup(void)
-{
- int offset;
- int ret;
-
- preempt_disable();
- /* find the next free available EILVT entry, skip offset 0 */
- for (offset = 1; offset < APIC_EILVT_NR_MAX; offset++) {
- if (get_eilvt(offset))
- break;
- }
- preempt_enable();
-
- if (offset == APIC_EILVT_NR_MAX) {
- printk(KERN_DEBUG "No EILVT entry available\n");
- return;
- }
-
- ret = setup_ibs_ctl(offset);
- if (ret)
- goto out;
-
- if (!ibs_eilvt_valid())
- goto out;
-
- pr_info("IBS: LVT offset %d assigned\n", offset);
-
- return;
-out:
- preempt_disable();
- put_eilvt(offset);
- preempt_enable();
- return;
-}
-
-static void ibs_eilvt_setup(void)
-{
- /*
- * Force LVT offset assignment for family 10h: The offsets are
- * not assigned by the BIOS for this family, so the OS is
- * responsible for doing it. If the OS assignment fails, fall
- * back to BIOS settings and try to setup this.
- */
- if (boot_cpu_data.x86 == 0x10)
- force_ibs_eilvt_setup();
-}
-
-static inline int get_ibs_lvt_offset(void)
-{
- u64 val;
-
- rdmsrl(MSR_AMD64_IBSCTL, val);
- if (!(val & IBSCTL_LVT_OFFSET_VALID))
- return -EINVAL;
-
- return val & IBSCTL_LVT_OFFSET_MASK;
-}
-
-static void setup_APIC_ibs(void *dummy)
-{
- int offset;
-
- offset = get_ibs_lvt_offset();
- if (offset < 0)
- goto failed;
-
- if (!setup_APIC_eilvt(offset, 0, APIC_EILVT_MSG_NMI, 0))
- return;
-failed:
- pr_warn("perf: IBS APIC setup failed on cpu #%d\n",
- smp_processor_id());
-}
-
-static void clear_APIC_ibs(void *dummy)
-{
- int offset;
-
- offset = get_ibs_lvt_offset();
- if (offset >= 0)
- setup_APIC_eilvt(offset, 0, APIC_EILVT_MSG_FIX, 1);
-}
-
-#ifdef CONFIG_PM
-
-static int perf_ibs_suspend(void)
-{
- clear_APIC_ibs(NULL);
- return 0;
-}
-
-static void perf_ibs_resume(void)
-{
- ibs_eilvt_setup();
- setup_APIC_ibs(NULL);
-}
-
-static struct syscore_ops perf_ibs_syscore_ops = {
- .resume = perf_ibs_resume,
- .suspend = perf_ibs_suspend,
-};
-
-static void perf_ibs_pm_init(void)
-{
- register_syscore_ops(&perf_ibs_syscore_ops);
-}
-
-#else
-
-static inline void perf_ibs_pm_init(void) { }
-
-#endif
-
-static int
-perf_ibs_cpu_notifier(struct notifier_block *self, unsigned long action, void *hcpu)
-{
- switch (action & ~CPU_TASKS_FROZEN) {
- case CPU_STARTING:
- setup_APIC_ibs(NULL);
- break;
- case CPU_DYING:
- clear_APIC_ibs(NULL);
- break;
- default:
- break;
- }
-
- return NOTIFY_OK;
-}
-
-static __init int amd_ibs_init(void)
-{
- u32 caps;
- int ret = -EINVAL;
-
- caps = __get_ibs_caps();
- if (!caps)
- return -ENODEV; /* ibs not supported by the cpu */
-
- ibs_eilvt_setup();
-
- if (!ibs_eilvt_valid())
- goto out;
-
- perf_ibs_pm_init();
- cpu_notifier_register_begin();
- ibs_caps = caps;
- /* make ibs_caps visible to other cpus: */
- smp_mb();
- smp_call_function(setup_APIC_ibs, NULL, 1);
- __perf_cpu_notifier(perf_ibs_cpu_notifier);
- cpu_notifier_register_done();
-
- ret = perf_event_ibs_init();
-out:
- if (ret)
- pr_err("Failed to setup IBS, %d\n", ret);
- return ret;
-}
-
-/* Since we need the pci subsystem to init ibs we can't do this earlier: */
-device_initcall(amd_ibs_init);
diff --git a/arch/x86/kernel/cpu/perf_event_amd_iommu.c b/arch/x86/kernel/cpu/perf_event_amd_iommu.c
deleted file mode 100644
index 97242a924..000000000
--- a/arch/x86/kernel/cpu/perf_event_amd_iommu.c
+++ /dev/null
@@ -1,499 +0,0 @@
-/*
- * Copyright (C) 2013 Advanced Micro Devices, Inc.
- *
- * Author: Steven Kinney <Steven.Kinney@amd.com>
- * Author: Suravee Suthikulpanit <Suraveee.Suthikulpanit@amd.com>
- *
- * Perf: amd_iommu - AMD IOMMU Performance Counter PMU implementation
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-#include <linux/perf_event.h>
-#include <linux/module.h>
-#include <linux/cpumask.h>
-#include <linux/slab.h>
-
-#include "perf_event.h"
-#include "perf_event_amd_iommu.h"
-
-#define COUNTER_SHIFT 16
-
-#define _GET_BANK(ev) ((u8)(ev->hw.extra_reg.reg >> 8))
-#define _GET_CNTR(ev) ((u8)(ev->hw.extra_reg.reg))
-
-/* iommu pmu config masks */
-#define _GET_CSOURCE(ev) ((ev->hw.config & 0xFFULL))
-#define _GET_DEVID(ev) ((ev->hw.config >> 8) & 0xFFFFULL)
-#define _GET_PASID(ev) ((ev->hw.config >> 24) & 0xFFFFULL)
-#define _GET_DOMID(ev) ((ev->hw.config >> 40) & 0xFFFFULL)
-#define _GET_DEVID_MASK(ev) ((ev->hw.extra_reg.config) & 0xFFFFULL)
-#define _GET_PASID_MASK(ev) ((ev->hw.extra_reg.config >> 16) & 0xFFFFULL)
-#define _GET_DOMID_MASK(ev) ((ev->hw.extra_reg.config >> 32) & 0xFFFFULL)
-
-static struct perf_amd_iommu __perf_iommu;
-
-struct perf_amd_iommu {
- struct pmu pmu;
- u8 max_banks;
- u8 max_counters;
- u64 cntr_assign_mask;
- raw_spinlock_t lock;
- const struct attribute_group *attr_groups[4];
-};
-
-#define format_group attr_groups[0]
-#define cpumask_group attr_groups[1]
-#define events_group attr_groups[2]
-#define null_group attr_groups[3]
-
-/*---------------------------------------------
- * sysfs format attributes
- *---------------------------------------------*/
-PMU_FORMAT_ATTR(csource, "config:0-7");
-PMU_FORMAT_ATTR(devid, "config:8-23");
-PMU_FORMAT_ATTR(pasid, "config:24-39");
-PMU_FORMAT_ATTR(domid, "config:40-55");
-PMU_FORMAT_ATTR(devid_mask, "config1:0-15");
-PMU_FORMAT_ATTR(pasid_mask, "config1:16-31");
-PMU_FORMAT_ATTR(domid_mask, "config1:32-47");
-
-static struct attribute *iommu_format_attrs[] = {
- &format_attr_csource.attr,
- &format_attr_devid.attr,
- &format_attr_pasid.attr,
- &format_attr_domid.attr,
- &format_attr_devid_mask.attr,
- &format_attr_pasid_mask.attr,
- &format_attr_domid_mask.attr,
- NULL,
-};
-
-static struct attribute_group amd_iommu_format_group = {
- .name = "format",
- .attrs = iommu_format_attrs,
-};
-
-/*---------------------------------------------
- * sysfs events attributes
- *---------------------------------------------*/
-struct amd_iommu_event_desc {
- struct kobj_attribute attr;
- const char *event;
-};
-
-static ssize_t _iommu_event_show(struct kobject *kobj,
- struct kobj_attribute *attr, char *buf)
-{
- struct amd_iommu_event_desc *event =
- container_of(attr, struct amd_iommu_event_desc, attr);
- return sprintf(buf, "%s\n", event->event);
-}
-
-#define AMD_IOMMU_EVENT_DESC(_name, _event) \
-{ \
- .attr = __ATTR(_name, 0444, _iommu_event_show, NULL), \
- .event = _event, \
-}
-
-static struct amd_iommu_event_desc amd_iommu_v2_event_descs[] = {
- AMD_IOMMU_EVENT_DESC(mem_pass_untrans, "csource=0x01"),
- AMD_IOMMU_EVENT_DESC(mem_pass_pretrans, "csource=0x02"),
- AMD_IOMMU_EVENT_DESC(mem_pass_excl, "csource=0x03"),
- AMD_IOMMU_EVENT_DESC(mem_target_abort, "csource=0x04"),
- AMD_IOMMU_EVENT_DESC(mem_trans_total, "csource=0x05"),
- AMD_IOMMU_EVENT_DESC(mem_iommu_tlb_pte_hit, "csource=0x06"),
- AMD_IOMMU_EVENT_DESC(mem_iommu_tlb_pte_mis, "csource=0x07"),
- AMD_IOMMU_EVENT_DESC(mem_iommu_tlb_pde_hit, "csource=0x08"),
- AMD_IOMMU_EVENT_DESC(mem_iommu_tlb_pde_mis, "csource=0x09"),
- AMD_IOMMU_EVENT_DESC(mem_dte_hit, "csource=0x0a"),
- AMD_IOMMU_EVENT_DESC(mem_dte_mis, "csource=0x0b"),
- AMD_IOMMU_EVENT_DESC(page_tbl_read_tot, "csource=0x0c"),
- AMD_IOMMU_EVENT_DESC(page_tbl_read_nst, "csource=0x0d"),
- AMD_IOMMU_EVENT_DESC(page_tbl_read_gst, "csource=0x0e"),
- AMD_IOMMU_EVENT_DESC(int_dte_hit, "csource=0x0f"),
- AMD_IOMMU_EVENT_DESC(int_dte_mis, "csource=0x10"),
- AMD_IOMMU_EVENT_DESC(cmd_processed, "csource=0x11"),
- AMD_IOMMU_EVENT_DESC(cmd_processed_inv, "csource=0x12"),
- AMD_IOMMU_EVENT_DESC(tlb_inv, "csource=0x13"),
- { /* end: all zeroes */ },
-};
-
-/*---------------------------------------------
- * sysfs cpumask attributes
- *---------------------------------------------*/
-static cpumask_t iommu_cpumask;
-
-static ssize_t _iommu_cpumask_show(struct device *dev,
- struct device_attribute *attr,
- char *buf)
-{
- return cpumap_print_to_pagebuf(true, buf, &iommu_cpumask);
-}
-static DEVICE_ATTR(cpumask, S_IRUGO, _iommu_cpumask_show, NULL);
-
-static struct attribute *iommu_cpumask_attrs[] = {
- &dev_attr_cpumask.attr,
- NULL,
-};
-
-static struct attribute_group amd_iommu_cpumask_group = {
- .attrs = iommu_cpumask_attrs,
-};
-
-/*---------------------------------------------*/
-
-static int get_next_avail_iommu_bnk_cntr(struct perf_amd_iommu *perf_iommu)
-{
- unsigned long flags;
- int shift, bank, cntr, retval;
- int max_banks = perf_iommu->max_banks;
- int max_cntrs = perf_iommu->max_counters;
-
- raw_spin_lock_irqsave(&perf_iommu->lock, flags);
-
- for (bank = 0, shift = 0; bank < max_banks; bank++) {
- for (cntr = 0; cntr < max_cntrs; cntr++) {
- shift = bank + (bank*3) + cntr;
- if (perf_iommu->cntr_assign_mask & (1ULL<<shift)) {
- continue;
- } else {
- perf_iommu->cntr_assign_mask |= (1ULL<<shift);
- retval = ((u16)((u16)bank<<8) | (u8)(cntr));
- goto out;
- }
- }
- }
- retval = -ENOSPC;
-out:
- raw_spin_unlock_irqrestore(&perf_iommu->lock, flags);
- return retval;
-}
-
-static int clear_avail_iommu_bnk_cntr(struct perf_amd_iommu *perf_iommu,
- u8 bank, u8 cntr)
-{
- unsigned long flags;
- int max_banks, max_cntrs;
- int shift = 0;
-
- max_banks = perf_iommu->max_banks;
- max_cntrs = perf_iommu->max_counters;
-
- if ((bank > max_banks) || (cntr > max_cntrs))
- return -EINVAL;
-
- shift = bank + cntr + (bank*3);
-
- raw_spin_lock_irqsave(&perf_iommu->lock, flags);
- perf_iommu->cntr_assign_mask &= ~(1ULL<<shift);
- raw_spin_unlock_irqrestore(&perf_iommu->lock, flags);
-
- return 0;
-}
-
-static int perf_iommu_event_init(struct perf_event *event)
-{
- struct hw_perf_event *hwc = &event->hw;
- struct perf_amd_iommu *perf_iommu;
- u64 config, config1;
-
- /* test the event attr type check for PMU enumeration */
- if (event->attr.type != event->pmu->type)
- return -ENOENT;
-
- /*
- * IOMMU counters are shared across all cores.
- * Therefore, it does not support per-process mode.
- * Also, it does not support event sampling mode.
- */
- if (is_sampling_event(event) || event->attach_state & PERF_ATTACH_TASK)
- return -EINVAL;
-
- /* IOMMU counters do not have usr/os/guest/host bits */
- if (event->attr.exclude_user || event->attr.exclude_kernel ||
- event->attr.exclude_host || event->attr.exclude_guest)
- return -EINVAL;
-
- if (event->cpu < 0)
- return -EINVAL;
-
- perf_iommu = &__perf_iommu;
-
- if (event->pmu != &perf_iommu->pmu)
- return -ENOENT;
-
- if (perf_iommu) {
- config = event->attr.config;
- config1 = event->attr.config1;
- } else {
- return -EINVAL;
- }
-
- /* integrate with iommu base devid (0000), assume one iommu */
- perf_iommu->max_banks =
- amd_iommu_pc_get_max_banks(IOMMU_BASE_DEVID);
- perf_iommu->max_counters =
- amd_iommu_pc_get_max_counters(IOMMU_BASE_DEVID);
- if ((perf_iommu->max_banks == 0) || (perf_iommu->max_counters == 0))
- return -EINVAL;
-
- /* update the hw_perf_event struct with the iommu config data */
- hwc->config = config;
- hwc->extra_reg.config = config1;
-
- return 0;
-}
-
-static void perf_iommu_enable_event(struct perf_event *ev)
-{
- u8 csource = _GET_CSOURCE(ev);
- u16 devid = _GET_DEVID(ev);
- u64 reg = 0ULL;
-
- reg = csource;
- amd_iommu_pc_get_set_reg_val(devid,
- _GET_BANK(ev), _GET_CNTR(ev) ,
- IOMMU_PC_COUNTER_SRC_REG, &reg, true);
-
- reg = 0ULL | devid | (_GET_DEVID_MASK(ev) << 32);
- if (reg)
- reg |= (1UL << 31);
- amd_iommu_pc_get_set_reg_val(devid,
- _GET_BANK(ev), _GET_CNTR(ev) ,
- IOMMU_PC_DEVID_MATCH_REG, &reg, true);
-
- reg = 0ULL | _GET_PASID(ev) | (_GET_PASID_MASK(ev) << 32);
- if (reg)
- reg |= (1UL << 31);
- amd_iommu_pc_get_set_reg_val(devid,
- _GET_BANK(ev), _GET_CNTR(ev) ,
- IOMMU_PC_PASID_MATCH_REG, &reg, true);
-
- reg = 0ULL | _GET_DOMID(ev) | (_GET_DOMID_MASK(ev) << 32);
- if (reg)
- reg |= (1UL << 31);
- amd_iommu_pc_get_set_reg_val(devid,
- _GET_BANK(ev), _GET_CNTR(ev) ,
- IOMMU_PC_DOMID_MATCH_REG, &reg, true);
-}
-
-static void perf_iommu_disable_event(struct perf_event *event)
-{
- u64 reg = 0ULL;
-
- amd_iommu_pc_get_set_reg_val(_GET_DEVID(event),
- _GET_BANK(event), _GET_CNTR(event),
- IOMMU_PC_COUNTER_SRC_REG, &reg, true);
-}
-
-static void perf_iommu_start(struct perf_event *event, int flags)
-{
- struct hw_perf_event *hwc = &event->hw;
-
- pr_debug("perf: amd_iommu:perf_iommu_start\n");
- if (WARN_ON_ONCE(!(hwc->state & PERF_HES_STOPPED)))
- return;
-
- WARN_ON_ONCE(!(hwc->state & PERF_HES_UPTODATE));
- hwc->state = 0;
-
- if (flags & PERF_EF_RELOAD) {
- u64 prev_raw_count = local64_read(&hwc->prev_count);
- amd_iommu_pc_get_set_reg_val(_GET_DEVID(event),
- _GET_BANK(event), _GET_CNTR(event),
- IOMMU_PC_COUNTER_REG, &prev_raw_count, true);
- }
-
- perf_iommu_enable_event(event);
- perf_event_update_userpage(event);
-
-}
-
-static void perf_iommu_read(struct perf_event *event)
-{
- u64 count = 0ULL;
- u64 prev_raw_count = 0ULL;
- u64 delta = 0ULL;
- struct hw_perf_event *hwc = &event->hw;
- pr_debug("perf: amd_iommu:perf_iommu_read\n");
-
- amd_iommu_pc_get_set_reg_val(_GET_DEVID(event),
- _GET_BANK(event), _GET_CNTR(event),
- IOMMU_PC_COUNTER_REG, &count, false);
-
- /* IOMMU pc counter register is only 48 bits */
- count &= 0xFFFFFFFFFFFFULL;
-
- prev_raw_count = local64_read(&hwc->prev_count);
- if (local64_cmpxchg(&hwc->prev_count, prev_raw_count,
- count) != prev_raw_count)
- return;
-
- /* Handling 48-bit counter overflowing */
- delta = (count << COUNTER_SHIFT) - (prev_raw_count << COUNTER_SHIFT);
- delta >>= COUNTER_SHIFT;
- local64_add(delta, &event->count);
-
-}
-
-static void perf_iommu_stop(struct perf_event *event, int flags)
-{
- struct hw_perf_event *hwc = &event->hw;
- u64 config;
-
- pr_debug("perf: amd_iommu:perf_iommu_stop\n");
-
- if (hwc->state & PERF_HES_UPTODATE)
- return;
-
- perf_iommu_disable_event(event);
- WARN_ON_ONCE(hwc->state & PERF_HES_STOPPED);
- hwc->state |= PERF_HES_STOPPED;
-
- if (hwc->state & PERF_HES_UPTODATE)
- return;
-
- config = hwc->config;
- perf_iommu_read(event);
- hwc->state |= PERF_HES_UPTODATE;
-}
-
-static int perf_iommu_add(struct perf_event *event, int flags)
-{
- int retval;
- struct perf_amd_iommu *perf_iommu =
- container_of(event->pmu, struct perf_amd_iommu, pmu);
-
- pr_debug("perf: amd_iommu:perf_iommu_add\n");
- event->hw.state = PERF_HES_UPTODATE | PERF_HES_STOPPED;
-
- /* request an iommu bank/counter */
- retval = get_next_avail_iommu_bnk_cntr(perf_iommu);
- if (retval != -ENOSPC)
- event->hw.extra_reg.reg = (u16)retval;
- else
- return retval;
-
- if (flags & PERF_EF_START)
- perf_iommu_start(event, PERF_EF_RELOAD);
-
- return 0;
-}
-
-static void perf_iommu_del(struct perf_event *event, int flags)
-{
- struct perf_amd_iommu *perf_iommu =
- container_of(event->pmu, struct perf_amd_iommu, pmu);
-
- pr_debug("perf: amd_iommu:perf_iommu_del\n");
- perf_iommu_stop(event, PERF_EF_UPDATE);
-
- /* clear the assigned iommu bank/counter */
- clear_avail_iommu_bnk_cntr(perf_iommu,
- _GET_BANK(event),
- _GET_CNTR(event));
-
- perf_event_update_userpage(event);
-}
-
-static __init int _init_events_attrs(struct perf_amd_iommu *perf_iommu)
-{
- struct attribute **attrs;
- struct attribute_group *attr_group;
- int i = 0, j;
-
- while (amd_iommu_v2_event_descs[i].attr.attr.name)
- i++;
-
- attr_group = kzalloc(sizeof(struct attribute *)
- * (i + 1) + sizeof(*attr_group), GFP_KERNEL);
- if (!attr_group)
- return -ENOMEM;
-
- attrs = (struct attribute **)(attr_group + 1);
- for (j = 0; j < i; j++)
- attrs[j] = &amd_iommu_v2_event_descs[j].attr.attr;
-
- attr_group->name = "events";
- attr_group->attrs = attrs;
- perf_iommu->events_group = attr_group;
-
- return 0;
-}
-
-static __init void amd_iommu_pc_exit(void)
-{
- if (__perf_iommu.events_group != NULL) {
- kfree(__perf_iommu.events_group);
- __perf_iommu.events_group = NULL;
- }
-}
-
-static __init int _init_perf_amd_iommu(
- struct perf_amd_iommu *perf_iommu, char *name)
-{
- int ret;
-
- raw_spin_lock_init(&perf_iommu->lock);
-
- /* Init format attributes */
- perf_iommu->format_group = &amd_iommu_format_group;
-
- /* Init cpumask attributes to only core 0 */
- cpumask_set_cpu(0, &iommu_cpumask);
- perf_iommu->cpumask_group = &amd_iommu_cpumask_group;
-
- /* Init events attributes */
- if (_init_events_attrs(perf_iommu) != 0)
- pr_err("perf: amd_iommu: Only support raw events.\n");
-
- /* Init null attributes */
- perf_iommu->null_group = NULL;
- perf_iommu->pmu.attr_groups = perf_iommu->attr_groups;
-
- ret = perf_pmu_register(&perf_iommu->pmu, name, -1);
- if (ret) {
- pr_err("perf: amd_iommu: Failed to initialized.\n");
- amd_iommu_pc_exit();
- } else {
- pr_info("perf: amd_iommu: Detected. (%d banks, %d counters/bank)\n",
- amd_iommu_pc_get_max_banks(IOMMU_BASE_DEVID),
- amd_iommu_pc_get_max_counters(IOMMU_BASE_DEVID));
- }
-
- return ret;
-}
-
-static struct perf_amd_iommu __perf_iommu = {
- .pmu = {
- .event_init = perf_iommu_event_init,
- .add = perf_iommu_add,
- .del = perf_iommu_del,
- .start = perf_iommu_start,
- .stop = perf_iommu_stop,
- .read = perf_iommu_read,
- },
- .max_banks = 0x00,
- .max_counters = 0x00,
- .cntr_assign_mask = 0ULL,
- .format_group = NULL,
- .cpumask_group = NULL,
- .events_group = NULL,
- .null_group = NULL,
-};
-
-static __init int amd_iommu_pc_init(void)
-{
- /* Make sure the IOMMU PC resource is available */
- if (!amd_iommu_pc_supported())
- return -ENODEV;
-
- _init_perf_amd_iommu(&__perf_iommu, "amd_iommu");
-
- return 0;
-}
-
-device_initcall(amd_iommu_pc_init);
diff --git a/arch/x86/kernel/cpu/perf_event_amd_iommu.h b/arch/x86/kernel/cpu/perf_event_amd_iommu.h
deleted file mode 100644
index 845d17327..000000000
--- a/arch/x86/kernel/cpu/perf_event_amd_iommu.h
+++ /dev/null
@@ -1,40 +0,0 @@
-/*
- * Copyright (C) 2013 Advanced Micro Devices, Inc.
- *
- * Author: Steven Kinney <Steven.Kinney@amd.com>
- * Author: Suravee Suthikulpanit <Suraveee.Suthikulpanit@amd.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-#ifndef _PERF_EVENT_AMD_IOMMU_H_
-#define _PERF_EVENT_AMD_IOMMU_H_
-
-/* iommu pc mmio region register indexes */
-#define IOMMU_PC_COUNTER_REG 0x00
-#define IOMMU_PC_COUNTER_SRC_REG 0x08
-#define IOMMU_PC_PASID_MATCH_REG 0x10
-#define IOMMU_PC_DOMID_MATCH_REG 0x18
-#define IOMMU_PC_DEVID_MATCH_REG 0x20
-#define IOMMU_PC_COUNTER_REPORT_REG 0x28
-
-/* maximun specified bank/counters */
-#define PC_MAX_SPEC_BNKS 64
-#define PC_MAX_SPEC_CNTRS 16
-
-/* iommu pc reg masks*/
-#define IOMMU_BASE_DEVID 0x0000
-
-/* amd_iommu_init.c external support functions */
-extern bool amd_iommu_pc_supported(void);
-
-extern u8 amd_iommu_pc_get_max_banks(u16 devid);
-
-extern u8 amd_iommu_pc_get_max_counters(u16 devid);
-
-extern int amd_iommu_pc_get_set_reg_val(u16 devid, u8 bank, u8 cntr,
- u8 fxn, u64 *value, bool is_write);
-
-#endif /*_PERF_EVENT_AMD_IOMMU_H_*/
diff --git a/arch/x86/kernel/cpu/perf_event_amd_uncore.c b/arch/x86/kernel/cpu/perf_event_amd_uncore.c
deleted file mode 100644
index 8836fc9fa..000000000
--- a/arch/x86/kernel/cpu/perf_event_amd_uncore.c
+++ /dev/null
@@ -1,603 +0,0 @@
-/*
- * Copyright (C) 2013 Advanced Micro Devices, Inc.
- *
- * Author: Jacob Shin <jacob.shin@amd.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-#include <linux/perf_event.h>
-#include <linux/percpu.h>
-#include <linux/types.h>
-#include <linux/slab.h>
-#include <linux/init.h>
-#include <linux/cpu.h>
-#include <linux/cpumask.h>
-
-#include <asm/cpufeature.h>
-#include <asm/perf_event.h>
-#include <asm/msr.h>
-
-#define NUM_COUNTERS_NB 4
-#define NUM_COUNTERS_L2 4
-#define MAX_COUNTERS NUM_COUNTERS_NB
-
-#define RDPMC_BASE_NB 6
-#define RDPMC_BASE_L2 10
-
-#define COUNTER_SHIFT 16
-
-struct amd_uncore {
- int id;
- int refcnt;
- int cpu;
- int num_counters;
- int rdpmc_base;
- u32 msr_base;
- cpumask_t *active_mask;
- struct pmu *pmu;
- struct perf_event *events[MAX_COUNTERS];
- struct amd_uncore *free_when_cpu_online;
-};
-
-static struct amd_uncore * __percpu *amd_uncore_nb;
-static struct amd_uncore * __percpu *amd_uncore_l2;
-
-static struct pmu amd_nb_pmu;
-static struct pmu amd_l2_pmu;
-
-static cpumask_t amd_nb_active_mask;
-static cpumask_t amd_l2_active_mask;
-
-static bool is_nb_event(struct perf_event *event)
-{
- return event->pmu->type == amd_nb_pmu.type;
-}
-
-static bool is_l2_event(struct perf_event *event)
-{
- return event->pmu->type == amd_l2_pmu.type;
-}
-
-static struct amd_uncore *event_to_amd_uncore(struct perf_event *event)
-{
- if (is_nb_event(event) && amd_uncore_nb)
- return *per_cpu_ptr(amd_uncore_nb, event->cpu);
- else if (is_l2_event(event) && amd_uncore_l2)
- return *per_cpu_ptr(amd_uncore_l2, event->cpu);
-
- return NULL;
-}
-
-static void amd_uncore_read(struct perf_event *event)
-{
- struct hw_perf_event *hwc = &event->hw;
- u64 prev, new;
- s64 delta;
-
- /*
- * since we do not enable counter overflow interrupts,
- * we do not have to worry about prev_count changing on us
- */
-
- prev = local64_read(&hwc->prev_count);
- rdpmcl(hwc->event_base_rdpmc, new);
- local64_set(&hwc->prev_count, new);
- delta = (new << COUNTER_SHIFT) - (prev << COUNTER_SHIFT);
- delta >>= COUNTER_SHIFT;
- local64_add(delta, &event->count);
-}
-
-static void amd_uncore_start(struct perf_event *event, int flags)
-{
- struct hw_perf_event *hwc = &event->hw;
-
- if (flags & PERF_EF_RELOAD)
- wrmsrl(hwc->event_base, (u64)local64_read(&hwc->prev_count));
-
- hwc->state = 0;
- wrmsrl(hwc->config_base, (hwc->config | ARCH_PERFMON_EVENTSEL_ENABLE));
- perf_event_update_userpage(event);
-}
-
-static void amd_uncore_stop(struct perf_event *event, int flags)
-{
- struct hw_perf_event *hwc = &event->hw;
-
- wrmsrl(hwc->config_base, hwc->config);
- hwc->state |= PERF_HES_STOPPED;
-
- if ((flags & PERF_EF_UPDATE) && !(hwc->state & PERF_HES_UPTODATE)) {
- amd_uncore_read(event);
- hwc->state |= PERF_HES_UPTODATE;
- }
-}
-
-static int amd_uncore_add(struct perf_event *event, int flags)
-{
- int i;
- struct amd_uncore *uncore = event_to_amd_uncore(event);
- struct hw_perf_event *hwc = &event->hw;
-
- /* are we already assigned? */
- if (hwc->idx != -1 && uncore->events[hwc->idx] == event)
- goto out;
-
- for (i = 0; i < uncore->num_counters; i++) {
- if (uncore->events[i] == event) {
- hwc->idx = i;
- goto out;
- }
- }
-
- /* if not, take the first available counter */
- hwc->idx = -1;
- for (i = 0; i < uncore->num_counters; i++) {
- if (cmpxchg(&uncore->events[i], NULL, event) == NULL) {
- hwc->idx = i;
- break;
- }
- }
-
-out:
- if (hwc->idx == -1)
- return -EBUSY;
-
- hwc->config_base = uncore->msr_base + (2 * hwc->idx);
- hwc->event_base = uncore->msr_base + 1 + (2 * hwc->idx);
- hwc->event_base_rdpmc = uncore->rdpmc_base + hwc->idx;
- hwc->state = PERF_HES_UPTODATE | PERF_HES_STOPPED;
-
- if (flags & PERF_EF_START)
- amd_uncore_start(event, PERF_EF_RELOAD);
-
- return 0;
-}
-
-static void amd_uncore_del(struct perf_event *event, int flags)
-{
- int i;
- struct amd_uncore *uncore = event_to_amd_uncore(event);
- struct hw_perf_event *hwc = &event->hw;
-
- amd_uncore_stop(event, PERF_EF_UPDATE);
-
- for (i = 0; i < uncore->num_counters; i++) {
- if (cmpxchg(&uncore->events[i], event, NULL) == event)
- break;
- }
-
- hwc->idx = -1;
-}
-
-static int amd_uncore_event_init(struct perf_event *event)
-{
- struct amd_uncore *uncore;
- struct hw_perf_event *hwc = &event->hw;
-
- if (event->attr.type != event->pmu->type)
- return -ENOENT;
-
- /*
- * NB and L2 counters (MSRs) are shared across all cores that share the
- * same NB / L2 cache. Interrupts can be directed to a single target
- * core, however, event counts generated by processes running on other
- * cores cannot be masked out. So we do not support sampling and
- * per-thread events.
- */
- if (is_sampling_event(event) || event->attach_state & PERF_ATTACH_TASK)
- return -EINVAL;
-
- /* NB and L2 counters do not have usr/os/guest/host bits */
- if (event->attr.exclude_user || event->attr.exclude_kernel ||
- event->attr.exclude_host || event->attr.exclude_guest)
- return -EINVAL;
-
- /* and we do not enable counter overflow interrupts */
- hwc->config = event->attr.config & AMD64_RAW_EVENT_MASK_NB;
- hwc->idx = -1;
-
- if (event->cpu < 0)
- return -EINVAL;
-
- uncore = event_to_amd_uncore(event);
- if (!uncore)
- return -ENODEV;
-
- /*
- * since request can come in to any of the shared cores, we will remap
- * to a single common cpu.
- */
- event->cpu = uncore->cpu;
-
- return 0;
-}
-
-static ssize_t amd_uncore_attr_show_cpumask(struct device *dev,
- struct device_attribute *attr,
- char *buf)
-{
- cpumask_t *active_mask;
- struct pmu *pmu = dev_get_drvdata(dev);
-
- if (pmu->type == amd_nb_pmu.type)
- active_mask = &amd_nb_active_mask;
- else if (pmu->type == amd_l2_pmu.type)
- active_mask = &amd_l2_active_mask;
- else
- return 0;
-
- return cpumap_print_to_pagebuf(true, buf, active_mask);
-}
-static DEVICE_ATTR(cpumask, S_IRUGO, amd_uncore_attr_show_cpumask, NULL);
-
-static struct attribute *amd_uncore_attrs[] = {
- &dev_attr_cpumask.attr,
- NULL,
-};
-
-static struct attribute_group amd_uncore_attr_group = {
- .attrs = amd_uncore_attrs,
-};
-
-PMU_FORMAT_ATTR(event, "config:0-7,32-35");
-PMU_FORMAT_ATTR(umask, "config:8-15");
-
-static struct attribute *amd_uncore_format_attr[] = {
- &format_attr_event.attr,
- &format_attr_umask.attr,
- NULL,
-};
-
-static struct attribute_group amd_uncore_format_group = {
- .name = "format",
- .attrs = amd_uncore_format_attr,
-};
-
-static const struct attribute_group *amd_uncore_attr_groups[] = {
- &amd_uncore_attr_group,
- &amd_uncore_format_group,
- NULL,
-};
-
-static struct pmu amd_nb_pmu = {
- .attr_groups = amd_uncore_attr_groups,
- .name = "amd_nb",
- .event_init = amd_uncore_event_init,
- .add = amd_uncore_add,
- .del = amd_uncore_del,
- .start = amd_uncore_start,
- .stop = amd_uncore_stop,
- .read = amd_uncore_read,
-};
-
-static struct pmu amd_l2_pmu = {
- .attr_groups = amd_uncore_attr_groups,
- .name = "amd_l2",
- .event_init = amd_uncore_event_init,
- .add = amd_uncore_add,
- .del = amd_uncore_del,
- .start = amd_uncore_start,
- .stop = amd_uncore_stop,
- .read = amd_uncore_read,
-};
-
-static struct amd_uncore *amd_uncore_alloc(unsigned int cpu)
-{
- return kzalloc_node(sizeof(struct amd_uncore), GFP_KERNEL,
- cpu_to_node(cpu));
-}
-
-static int amd_uncore_cpu_up_prepare(unsigned int cpu)
-{
- struct amd_uncore *uncore_nb = NULL, *uncore_l2;
-
- if (amd_uncore_nb) {
- uncore_nb = amd_uncore_alloc(cpu);
- if (!uncore_nb)
- goto fail;
- uncore_nb->cpu = cpu;
- uncore_nb->num_counters = NUM_COUNTERS_NB;
- uncore_nb->rdpmc_base = RDPMC_BASE_NB;
- uncore_nb->msr_base = MSR_F15H_NB_PERF_CTL;
- uncore_nb->active_mask = &amd_nb_active_mask;
- uncore_nb->pmu = &amd_nb_pmu;
- *per_cpu_ptr(amd_uncore_nb, cpu) = uncore_nb;
- }
-
- if (amd_uncore_l2) {
- uncore_l2 = amd_uncore_alloc(cpu);
- if (!uncore_l2)
- goto fail;
- uncore_l2->cpu = cpu;
- uncore_l2->num_counters = NUM_COUNTERS_L2;
- uncore_l2->rdpmc_base = RDPMC_BASE_L2;
- uncore_l2->msr_base = MSR_F16H_L2I_PERF_CTL;
- uncore_l2->active_mask = &amd_l2_active_mask;
- uncore_l2->pmu = &amd_l2_pmu;
- *per_cpu_ptr(amd_uncore_l2, cpu) = uncore_l2;
- }
-
- return 0;
-
-fail:
- if (amd_uncore_nb)
- *per_cpu_ptr(amd_uncore_nb, cpu) = NULL;
- kfree(uncore_nb);
- return -ENOMEM;
-}
-
-static struct amd_uncore *
-amd_uncore_find_online_sibling(struct amd_uncore *this,
- struct amd_uncore * __percpu *uncores)
-{
- unsigned int cpu;
- struct amd_uncore *that;
-
- for_each_online_cpu(cpu) {
- that = *per_cpu_ptr(uncores, cpu);
-
- if (!that)
- continue;
-
- if (this == that)
- continue;
-
- if (this->id == that->id) {
- that->free_when_cpu_online = this;
- this = that;
- break;
- }
- }
-
- this->refcnt++;
- return this;
-}
-
-static void amd_uncore_cpu_starting(unsigned int cpu)
-{
- unsigned int eax, ebx, ecx, edx;
- struct amd_uncore *uncore;
-
- if (amd_uncore_nb) {
- uncore = *per_cpu_ptr(amd_uncore_nb, cpu);
- cpuid(0x8000001e, &eax, &ebx, &ecx, &edx);
- uncore->id = ecx & 0xff;
-
- uncore = amd_uncore_find_online_sibling(uncore, amd_uncore_nb);
- *per_cpu_ptr(amd_uncore_nb, cpu) = uncore;
- }
-
- if (amd_uncore_l2) {
- unsigned int apicid = cpu_data(cpu).apicid;
- unsigned int nshared;
-
- uncore = *per_cpu_ptr(amd_uncore_l2, cpu);
- cpuid_count(0x8000001d, 2, &eax, &ebx, &ecx, &edx);
- nshared = ((eax >> 14) & 0xfff) + 1;
- uncore->id = apicid - (apicid % nshared);
-
- uncore = amd_uncore_find_online_sibling(uncore, amd_uncore_l2);
- *per_cpu_ptr(amd_uncore_l2, cpu) = uncore;
- }
-}
-
-static void uncore_online(unsigned int cpu,
- struct amd_uncore * __percpu *uncores)
-{
- struct amd_uncore *uncore = *per_cpu_ptr(uncores, cpu);
-
- kfree(uncore->free_when_cpu_online);
- uncore->free_when_cpu_online = NULL;
-
- if (cpu == uncore->cpu)
- cpumask_set_cpu(cpu, uncore->active_mask);
-}
-
-static void amd_uncore_cpu_online(unsigned int cpu)
-{
- if (amd_uncore_nb)
- uncore_online(cpu, amd_uncore_nb);
-
- if (amd_uncore_l2)
- uncore_online(cpu, amd_uncore_l2);
-}
-
-static void uncore_down_prepare(unsigned int cpu,
- struct amd_uncore * __percpu *uncores)
-{
- unsigned int i;
- struct amd_uncore *this = *per_cpu_ptr(uncores, cpu);
-
- if (this->cpu != cpu)
- return;
-
- /* this cpu is going down, migrate to a shared sibling if possible */
- for_each_online_cpu(i) {
- struct amd_uncore *that = *per_cpu_ptr(uncores, i);
-
- if (cpu == i)
- continue;
-
- if (this == that) {
- perf_pmu_migrate_context(this->pmu, cpu, i);
- cpumask_clear_cpu(cpu, that->active_mask);
- cpumask_set_cpu(i, that->active_mask);
- that->cpu = i;
- break;
- }
- }
-}
-
-static void amd_uncore_cpu_down_prepare(unsigned int cpu)
-{
- if (amd_uncore_nb)
- uncore_down_prepare(cpu, amd_uncore_nb);
-
- if (amd_uncore_l2)
- uncore_down_prepare(cpu, amd_uncore_l2);
-}
-
-static void uncore_dead(unsigned int cpu, struct amd_uncore * __percpu *uncores)
-{
- struct amd_uncore *uncore = *per_cpu_ptr(uncores, cpu);
-
- if (cpu == uncore->cpu)
- cpumask_clear_cpu(cpu, uncore->active_mask);
-
- if (!--uncore->refcnt)
- kfree(uncore);
- *per_cpu_ptr(uncores, cpu) = NULL;
-}
-
-static void amd_uncore_cpu_dead(unsigned int cpu)
-{
- if (amd_uncore_nb)
- uncore_dead(cpu, amd_uncore_nb);
-
- if (amd_uncore_l2)
- uncore_dead(cpu, amd_uncore_l2);
-}
-
-static int
-amd_uncore_cpu_notifier(struct notifier_block *self, unsigned long action,
- void *hcpu)
-{
- unsigned int cpu = (long)hcpu;
-
- switch (action & ~CPU_TASKS_FROZEN) {
- case CPU_UP_PREPARE:
- if (amd_uncore_cpu_up_prepare(cpu))
- return notifier_from_errno(-ENOMEM);
- break;
-
- case CPU_STARTING:
- amd_uncore_cpu_starting(cpu);
- break;
-
- case CPU_ONLINE:
- amd_uncore_cpu_online(cpu);
- break;
-
- case CPU_DOWN_PREPARE:
- amd_uncore_cpu_down_prepare(cpu);
- break;
-
- case CPU_UP_CANCELED:
- case CPU_DEAD:
- amd_uncore_cpu_dead(cpu);
- break;
-
- default:
- break;
- }
-
- return NOTIFY_OK;
-}
-
-static struct notifier_block amd_uncore_cpu_notifier_block = {
- .notifier_call = amd_uncore_cpu_notifier,
- .priority = CPU_PRI_PERF + 1,
-};
-
-static void __init init_cpu_already_online(void *dummy)
-{
- unsigned int cpu = smp_processor_id();
-
- amd_uncore_cpu_starting(cpu);
- amd_uncore_cpu_online(cpu);
-}
-
-static void cleanup_cpu_online(void *dummy)
-{
- unsigned int cpu = smp_processor_id();
-
- amd_uncore_cpu_dead(cpu);
-}
-
-static int __init amd_uncore_init(void)
-{
- unsigned int cpu, cpu2;
- int ret = -ENODEV;
-
- if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD)
- goto fail_nodev;
-
- if (!boot_cpu_has(X86_FEATURE_TOPOEXT))
- goto fail_nodev;
-
- if (boot_cpu_has(X86_FEATURE_PERFCTR_NB)) {
- amd_uncore_nb = alloc_percpu(struct amd_uncore *);
- if (!amd_uncore_nb) {
- ret = -ENOMEM;
- goto fail_nb;
- }
- ret = perf_pmu_register(&amd_nb_pmu, amd_nb_pmu.name, -1);
- if (ret)
- goto fail_nb;
-
- printk(KERN_INFO "perf: AMD NB counters detected\n");
- ret = 0;
- }
-
- if (boot_cpu_has(X86_FEATURE_PERFCTR_L2)) {
- amd_uncore_l2 = alloc_percpu(struct amd_uncore *);
- if (!amd_uncore_l2) {
- ret = -ENOMEM;
- goto fail_l2;
- }
- ret = perf_pmu_register(&amd_l2_pmu, amd_l2_pmu.name, -1);
- if (ret)
- goto fail_l2;
-
- printk(KERN_INFO "perf: AMD L2I counters detected\n");
- ret = 0;
- }
-
- if (ret)
- goto fail_nodev;
-
- cpu_notifier_register_begin();
-
- /* init cpus already online before registering for hotplug notifier */
- for_each_online_cpu(cpu) {
- ret = amd_uncore_cpu_up_prepare(cpu);
- if (ret)
- goto fail_online;
- smp_call_function_single(cpu, init_cpu_already_online, NULL, 1);
- }
-
- __register_cpu_notifier(&amd_uncore_cpu_notifier_block);
- cpu_notifier_register_done();
-
- return 0;
-
-
-fail_online:
- for_each_online_cpu(cpu2) {
- if (cpu2 == cpu)
- break;
- smp_call_function_single(cpu, cleanup_cpu_online, NULL, 1);
- }
- cpu_notifier_register_done();
-
- /* amd_uncore_nb/l2 should have been freed by cleanup_cpu_online */
- amd_uncore_nb = amd_uncore_l2 = NULL;
-
- if (boot_cpu_has(X86_FEATURE_PERFCTR_L2))
- perf_pmu_unregister(&amd_l2_pmu);
-fail_l2:
- if (boot_cpu_has(X86_FEATURE_PERFCTR_NB))
- perf_pmu_unregister(&amd_nb_pmu);
- if (amd_uncore_l2)
- free_percpu(amd_uncore_l2);
-fail_nb:
- if (amd_uncore_nb)
- free_percpu(amd_uncore_nb);
-
-fail_nodev:
- return ret;
-}
-device_initcall(amd_uncore_init);
diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c
deleted file mode 100644
index 760aec1e8..000000000
--- a/arch/x86/kernel/cpu/perf_event_intel.c
+++ /dev/null
@@ -1,3796 +0,0 @@
-/*
- * Per core/cpu state
- *
- * Used to coordinate shared registers between HT threads or
- * among events on a single PMU.
- */
-
-#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
-
-#include <linux/stddef.h>
-#include <linux/types.h>
-#include <linux/init.h>
-#include <linux/slab.h>
-#include <linux/export.h>
-#include <linux/nmi.h>
-
-#include <asm/cpufeature.h>
-#include <asm/hardirq.h>
-#include <asm/apic.h>
-
-#include "perf_event.h"
-
-/*
- * Intel PerfMon, used on Core and later.
- */
-static u64 intel_perfmon_event_map[PERF_COUNT_HW_MAX] __read_mostly =
-{
- [PERF_COUNT_HW_CPU_CYCLES] = 0x003c,
- [PERF_COUNT_HW_INSTRUCTIONS] = 0x00c0,
- [PERF_COUNT_HW_CACHE_REFERENCES] = 0x4f2e,
- [PERF_COUNT_HW_CACHE_MISSES] = 0x412e,
- [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x00c4,
- [PERF_COUNT_HW_BRANCH_MISSES] = 0x00c5,
- [PERF_COUNT_HW_BUS_CYCLES] = 0x013c,
- [PERF_COUNT_HW_REF_CPU_CYCLES] = 0x0300, /* pseudo-encoding */
-};
-
-static struct event_constraint intel_core_event_constraints[] __read_mostly =
-{
- INTEL_EVENT_CONSTRAINT(0x11, 0x2), /* FP_ASSIST */
- INTEL_EVENT_CONSTRAINT(0x12, 0x2), /* MUL */
- INTEL_EVENT_CONSTRAINT(0x13, 0x2), /* DIV */
- INTEL_EVENT_CONSTRAINT(0x14, 0x1), /* CYCLES_DIV_BUSY */
- INTEL_EVENT_CONSTRAINT(0x19, 0x2), /* DELAYED_BYPASS */
- INTEL_EVENT_CONSTRAINT(0xc1, 0x1), /* FP_COMP_INSTR_RET */
- EVENT_CONSTRAINT_END
-};
-
-static struct event_constraint intel_core2_event_constraints[] __read_mostly =
-{
- FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */
- FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */
- FIXED_EVENT_CONSTRAINT(0x0300, 2), /* CPU_CLK_UNHALTED.REF */
- INTEL_EVENT_CONSTRAINT(0x10, 0x1), /* FP_COMP_OPS_EXE */
- INTEL_EVENT_CONSTRAINT(0x11, 0x2), /* FP_ASSIST */
- INTEL_EVENT_CONSTRAINT(0x12, 0x2), /* MUL */
- INTEL_EVENT_CONSTRAINT(0x13, 0x2), /* DIV */
- INTEL_EVENT_CONSTRAINT(0x14, 0x1), /* CYCLES_DIV_BUSY */
- INTEL_EVENT_CONSTRAINT(0x18, 0x1), /* IDLE_DURING_DIV */
- INTEL_EVENT_CONSTRAINT(0x19, 0x2), /* DELAYED_BYPASS */
- INTEL_EVENT_CONSTRAINT(0xa1, 0x1), /* RS_UOPS_DISPATCH_CYCLES */
- INTEL_EVENT_CONSTRAINT(0xc9, 0x1), /* ITLB_MISS_RETIRED (T30-9) */
- INTEL_EVENT_CONSTRAINT(0xcb, 0x1), /* MEM_LOAD_RETIRED */
- EVENT_CONSTRAINT_END
-};
-
-static struct event_constraint intel_nehalem_event_constraints[] __read_mostly =
-{
- FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */
- FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */
- FIXED_EVENT_CONSTRAINT(0x0300, 2), /* CPU_CLK_UNHALTED.REF */
- INTEL_EVENT_CONSTRAINT(0x40, 0x3), /* L1D_CACHE_LD */
- INTEL_EVENT_CONSTRAINT(0x41, 0x3), /* L1D_CACHE_ST */
- INTEL_EVENT_CONSTRAINT(0x42, 0x3), /* L1D_CACHE_LOCK */
- INTEL_EVENT_CONSTRAINT(0x43, 0x3), /* L1D_ALL_REF */
- INTEL_EVENT_CONSTRAINT(0x48, 0x3), /* L1D_PEND_MISS */
- INTEL_EVENT_CONSTRAINT(0x4e, 0x3), /* L1D_PREFETCH */
- INTEL_EVENT_CONSTRAINT(0x51, 0x3), /* L1D */
- INTEL_EVENT_CONSTRAINT(0x63, 0x3), /* CACHE_LOCK_CYCLES */
- EVENT_CONSTRAINT_END
-};
-
-static struct extra_reg intel_nehalem_extra_regs[] __read_mostly =
-{
- /* must define OFFCORE_RSP_X first, see intel_fixup_er() */
- INTEL_UEVENT_EXTRA_REG(0x01b7, MSR_OFFCORE_RSP_0, 0xffff, RSP_0),
- INTEL_UEVENT_PEBS_LDLAT_EXTRA_REG(0x100b),
- EVENT_EXTRA_END
-};
-
-static struct event_constraint intel_westmere_event_constraints[] __read_mostly =
-{
- FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */
- FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */
- FIXED_EVENT_CONSTRAINT(0x0300, 2), /* CPU_CLK_UNHALTED.REF */
- INTEL_EVENT_CONSTRAINT(0x51, 0x3), /* L1D */
- INTEL_EVENT_CONSTRAINT(0x60, 0x1), /* OFFCORE_REQUESTS_OUTSTANDING */
- INTEL_EVENT_CONSTRAINT(0x63, 0x3), /* CACHE_LOCK_CYCLES */
- INTEL_EVENT_CONSTRAINT(0xb3, 0x1), /* SNOOPQ_REQUEST_OUTSTANDING */
- EVENT_CONSTRAINT_END
-};
-
-static struct event_constraint intel_snb_event_constraints[] __read_mostly =
-{
- FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */
- FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */
- FIXED_EVENT_CONSTRAINT(0x0300, 2), /* CPU_CLK_UNHALTED.REF */
- INTEL_UEVENT_CONSTRAINT(0x04a3, 0xf), /* CYCLE_ACTIVITY.CYCLES_NO_DISPATCH */
- INTEL_UEVENT_CONSTRAINT(0x05a3, 0xf), /* CYCLE_ACTIVITY.STALLS_L2_PENDING */
- INTEL_UEVENT_CONSTRAINT(0x02a3, 0x4), /* CYCLE_ACTIVITY.CYCLES_L1D_PENDING */
- INTEL_UEVENT_CONSTRAINT(0x06a3, 0x4), /* CYCLE_ACTIVITY.STALLS_L1D_PENDING */
- INTEL_EVENT_CONSTRAINT(0x48, 0x4), /* L1D_PEND_MISS.PENDING */
- INTEL_UEVENT_CONSTRAINT(0x01c0, 0x2), /* INST_RETIRED.PREC_DIST */
- INTEL_EVENT_CONSTRAINT(0xcd, 0x8), /* MEM_TRANS_RETIRED.LOAD_LATENCY */
- INTEL_UEVENT_CONSTRAINT(0x04a3, 0xf), /* CYCLE_ACTIVITY.CYCLES_NO_DISPATCH */
- INTEL_UEVENT_CONSTRAINT(0x02a3, 0x4), /* CYCLE_ACTIVITY.CYCLES_L1D_PENDING */
-
- INTEL_EXCLEVT_CONSTRAINT(0xd0, 0xf), /* MEM_UOPS_RETIRED.* */
- INTEL_EXCLEVT_CONSTRAINT(0xd1, 0xf), /* MEM_LOAD_UOPS_RETIRED.* */
- INTEL_EXCLEVT_CONSTRAINT(0xd2, 0xf), /* MEM_LOAD_UOPS_LLC_HIT_RETIRED.* */
- INTEL_EXCLEVT_CONSTRAINT(0xd3, 0xf), /* MEM_LOAD_UOPS_LLC_MISS_RETIRED.* */
-
- EVENT_CONSTRAINT_END
-};
-
-static struct event_constraint intel_ivb_event_constraints[] __read_mostly =
-{
- FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */
- FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */
- FIXED_EVENT_CONSTRAINT(0x0300, 2), /* CPU_CLK_UNHALTED.REF */
- INTEL_UEVENT_CONSTRAINT(0x0148, 0x4), /* L1D_PEND_MISS.PENDING */
- INTEL_UEVENT_CONSTRAINT(0x0279, 0xf), /* IDQ.EMTPY */
- INTEL_UEVENT_CONSTRAINT(0x019c, 0xf), /* IDQ_UOPS_NOT_DELIVERED.CORE */
- INTEL_UEVENT_CONSTRAINT(0x02a3, 0xf), /* CYCLE_ACTIVITY.CYCLES_LDM_PENDING */
- INTEL_UEVENT_CONSTRAINT(0x04a3, 0xf), /* CYCLE_ACTIVITY.CYCLES_NO_EXECUTE */
- INTEL_UEVENT_CONSTRAINT(0x05a3, 0xf), /* CYCLE_ACTIVITY.STALLS_L2_PENDING */
- INTEL_UEVENT_CONSTRAINT(0x06a3, 0xf), /* CYCLE_ACTIVITY.STALLS_LDM_PENDING */
- INTEL_UEVENT_CONSTRAINT(0x08a3, 0x4), /* CYCLE_ACTIVITY.CYCLES_L1D_PENDING */
- INTEL_UEVENT_CONSTRAINT(0x0ca3, 0x4), /* CYCLE_ACTIVITY.STALLS_L1D_PENDING */
- INTEL_UEVENT_CONSTRAINT(0x01c0, 0x2), /* INST_RETIRED.PREC_DIST */
-
- INTEL_EXCLEVT_CONSTRAINT(0xd0, 0xf), /* MEM_UOPS_RETIRED.* */
- INTEL_EXCLEVT_CONSTRAINT(0xd1, 0xf), /* MEM_LOAD_UOPS_RETIRED.* */
- INTEL_EXCLEVT_CONSTRAINT(0xd2, 0xf), /* MEM_LOAD_UOPS_LLC_HIT_RETIRED.* */
- INTEL_EXCLEVT_CONSTRAINT(0xd3, 0xf), /* MEM_LOAD_UOPS_LLC_MISS_RETIRED.* */
-
- EVENT_CONSTRAINT_END
-};
-
-static struct extra_reg intel_westmere_extra_regs[] __read_mostly =
-{
- /* must define OFFCORE_RSP_X first, see intel_fixup_er() */
- INTEL_UEVENT_EXTRA_REG(0x01b7, MSR_OFFCORE_RSP_0, 0xffff, RSP_0),
- INTEL_UEVENT_EXTRA_REG(0x01bb, MSR_OFFCORE_RSP_1, 0xffff, RSP_1),
- INTEL_UEVENT_PEBS_LDLAT_EXTRA_REG(0x100b),
- EVENT_EXTRA_END
-};
-
-static struct event_constraint intel_v1_event_constraints[] __read_mostly =
-{
- EVENT_CONSTRAINT_END
-};
-
-static struct event_constraint intel_gen_event_constraints[] __read_mostly =
-{
- FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */
- FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */
- FIXED_EVENT_CONSTRAINT(0x0300, 2), /* CPU_CLK_UNHALTED.REF */
- EVENT_CONSTRAINT_END
-};
-
-static struct event_constraint intel_slm_event_constraints[] __read_mostly =
-{
- FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */
- FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */
- FIXED_EVENT_CONSTRAINT(0x0300, 2), /* pseudo CPU_CLK_UNHALTED.REF */
- EVENT_CONSTRAINT_END
-};
-
-struct event_constraint intel_skl_event_constraints[] = {
- FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */
- FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */
- FIXED_EVENT_CONSTRAINT(0x0300, 2), /* CPU_CLK_UNHALTED.REF */
- INTEL_UEVENT_CONSTRAINT(0x1c0, 0x2), /* INST_RETIRED.PREC_DIST */
- EVENT_CONSTRAINT_END
-};
-
-static struct extra_reg intel_knl_extra_regs[] __read_mostly = {
- INTEL_UEVENT_EXTRA_REG(0x01b7,
- MSR_OFFCORE_RSP_0, 0x7f9ffbffffull, RSP_0),
- INTEL_UEVENT_EXTRA_REG(0x02b7,
- MSR_OFFCORE_RSP_1, 0x3f9ffbffffull, RSP_1),
- EVENT_EXTRA_END
-};
-
-static struct extra_reg intel_snb_extra_regs[] __read_mostly = {
- /* must define OFFCORE_RSP_X first, see intel_fixup_er() */
- INTEL_UEVENT_EXTRA_REG(0x01b7, MSR_OFFCORE_RSP_0, 0x3f807f8fffull, RSP_0),
- INTEL_UEVENT_EXTRA_REG(0x01bb, MSR_OFFCORE_RSP_1, 0x3f807f8fffull, RSP_1),
- INTEL_UEVENT_PEBS_LDLAT_EXTRA_REG(0x01cd),
- EVENT_EXTRA_END
-};
-
-static struct extra_reg intel_snbep_extra_regs[] __read_mostly = {
- /* must define OFFCORE_RSP_X first, see intel_fixup_er() */
- INTEL_UEVENT_EXTRA_REG(0x01b7, MSR_OFFCORE_RSP_0, 0x3fffff8fffull, RSP_0),
- INTEL_UEVENT_EXTRA_REG(0x01bb, MSR_OFFCORE_RSP_1, 0x3fffff8fffull, RSP_1),
- INTEL_UEVENT_PEBS_LDLAT_EXTRA_REG(0x01cd),
- EVENT_EXTRA_END
-};
-
-static struct extra_reg intel_skl_extra_regs[] __read_mostly = {
- INTEL_UEVENT_EXTRA_REG(0x01b7, MSR_OFFCORE_RSP_0, 0x3fffff8fffull, RSP_0),
- INTEL_UEVENT_EXTRA_REG(0x01bb, MSR_OFFCORE_RSP_1, 0x3fffff8fffull, RSP_1),
- INTEL_UEVENT_PEBS_LDLAT_EXTRA_REG(0x01cd),
- /*
- * Note the low 8 bits eventsel code is not a continuous field, containing
- * some #GPing bits. These are masked out.
- */
- INTEL_UEVENT_EXTRA_REG(0x01c6, MSR_PEBS_FRONTEND, 0x7fff17, FE),
- EVENT_EXTRA_END
-};
-
-EVENT_ATTR_STR(mem-loads, mem_ld_nhm, "event=0x0b,umask=0x10,ldlat=3");
-EVENT_ATTR_STR(mem-loads, mem_ld_snb, "event=0xcd,umask=0x1,ldlat=3");
-EVENT_ATTR_STR(mem-stores, mem_st_snb, "event=0xcd,umask=0x2");
-
-struct attribute *nhm_events_attrs[] = {
- EVENT_PTR(mem_ld_nhm),
- NULL,
-};
-
-struct attribute *snb_events_attrs[] = {
- EVENT_PTR(mem_ld_snb),
- EVENT_PTR(mem_st_snb),
- NULL,
-};
-
-static struct event_constraint intel_hsw_event_constraints[] = {
- FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */
- FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */
- FIXED_EVENT_CONSTRAINT(0x0300, 2), /* CPU_CLK_UNHALTED.REF */
- INTEL_UEVENT_CONSTRAINT(0x148, 0x4), /* L1D_PEND_MISS.PENDING */
- INTEL_UEVENT_CONSTRAINT(0x01c0, 0x2), /* INST_RETIRED.PREC_DIST */
- INTEL_EVENT_CONSTRAINT(0xcd, 0x8), /* MEM_TRANS_RETIRED.LOAD_LATENCY */
- /* CYCLE_ACTIVITY.CYCLES_L1D_PENDING */
- INTEL_UEVENT_CONSTRAINT(0x08a3, 0x4),
- /* CYCLE_ACTIVITY.STALLS_L1D_PENDING */
- INTEL_UEVENT_CONSTRAINT(0x0ca3, 0x4),
- /* CYCLE_ACTIVITY.CYCLES_NO_EXECUTE */
- INTEL_UEVENT_CONSTRAINT(0x04a3, 0xf),
-
- INTEL_EXCLEVT_CONSTRAINT(0xd0, 0xf), /* MEM_UOPS_RETIRED.* */
- INTEL_EXCLEVT_CONSTRAINT(0xd1, 0xf), /* MEM_LOAD_UOPS_RETIRED.* */
- INTEL_EXCLEVT_CONSTRAINT(0xd2, 0xf), /* MEM_LOAD_UOPS_LLC_HIT_RETIRED.* */
- INTEL_EXCLEVT_CONSTRAINT(0xd3, 0xf), /* MEM_LOAD_UOPS_LLC_MISS_RETIRED.* */
-
- EVENT_CONSTRAINT_END
-};
-
-struct event_constraint intel_bdw_event_constraints[] = {
- FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */
- FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */
- FIXED_EVENT_CONSTRAINT(0x0300, 2), /* CPU_CLK_UNHALTED.REF */
- INTEL_UEVENT_CONSTRAINT(0x148, 0x4), /* L1D_PEND_MISS.PENDING */
- INTEL_UBIT_EVENT_CONSTRAINT(0x8a3, 0x4), /* CYCLE_ACTIVITY.CYCLES_L1D_MISS */
- EVENT_CONSTRAINT_END
-};
-
-static u64 intel_pmu_event_map(int hw_event)
-{
- return intel_perfmon_event_map[hw_event];
-}
-
-/*
- * Notes on the events:
- * - data reads do not include code reads (comparable to earlier tables)
- * - data counts include speculative execution (except L1 write, dtlb, bpu)
- * - remote node access includes remote memory, remote cache, remote mmio.
- * - prefetches are not included in the counts.
- * - icache miss does not include decoded icache
- */
-
-#define SKL_DEMAND_DATA_RD BIT_ULL(0)
-#define SKL_DEMAND_RFO BIT_ULL(1)
-#define SKL_ANY_RESPONSE BIT_ULL(16)
-#define SKL_SUPPLIER_NONE BIT_ULL(17)
-#define SKL_L3_MISS_LOCAL_DRAM BIT_ULL(26)
-#define SKL_L3_MISS_REMOTE_HOP0_DRAM BIT_ULL(27)
-#define SKL_L3_MISS_REMOTE_HOP1_DRAM BIT_ULL(28)
-#define SKL_L3_MISS_REMOTE_HOP2P_DRAM BIT_ULL(29)
-#define SKL_L3_MISS (SKL_L3_MISS_LOCAL_DRAM| \
- SKL_L3_MISS_REMOTE_HOP0_DRAM| \
- SKL_L3_MISS_REMOTE_HOP1_DRAM| \
- SKL_L3_MISS_REMOTE_HOP2P_DRAM)
-#define SKL_SPL_HIT BIT_ULL(30)
-#define SKL_SNOOP_NONE BIT_ULL(31)
-#define SKL_SNOOP_NOT_NEEDED BIT_ULL(32)
-#define SKL_SNOOP_MISS BIT_ULL(33)
-#define SKL_SNOOP_HIT_NO_FWD BIT_ULL(34)
-#define SKL_SNOOP_HIT_WITH_FWD BIT_ULL(35)
-#define SKL_SNOOP_HITM BIT_ULL(36)
-#define SKL_SNOOP_NON_DRAM BIT_ULL(37)
-#define SKL_ANY_SNOOP (SKL_SPL_HIT|SKL_SNOOP_NONE| \
- SKL_SNOOP_NOT_NEEDED|SKL_SNOOP_MISS| \
- SKL_SNOOP_HIT_NO_FWD|SKL_SNOOP_HIT_WITH_FWD| \
- SKL_SNOOP_HITM|SKL_SNOOP_NON_DRAM)
-#define SKL_DEMAND_READ SKL_DEMAND_DATA_RD
-#define SKL_SNOOP_DRAM (SKL_SNOOP_NONE| \
- SKL_SNOOP_NOT_NEEDED|SKL_SNOOP_MISS| \
- SKL_SNOOP_HIT_NO_FWD|SKL_SNOOP_HIT_WITH_FWD| \
- SKL_SNOOP_HITM|SKL_SPL_HIT)
-#define SKL_DEMAND_WRITE SKL_DEMAND_RFO
-#define SKL_LLC_ACCESS SKL_ANY_RESPONSE
-#define SKL_L3_MISS_REMOTE (SKL_L3_MISS_REMOTE_HOP0_DRAM| \
- SKL_L3_MISS_REMOTE_HOP1_DRAM| \
- SKL_L3_MISS_REMOTE_HOP2P_DRAM)
-
-static __initconst const u64 skl_hw_cache_event_ids
- [PERF_COUNT_HW_CACHE_MAX]
- [PERF_COUNT_HW_CACHE_OP_MAX]
- [PERF_COUNT_HW_CACHE_RESULT_MAX] =
-{
- [ C(L1D ) ] = {
- [ C(OP_READ) ] = {
- [ C(RESULT_ACCESS) ] = 0x81d0, /* MEM_INST_RETIRED.ALL_LOADS */
- [ C(RESULT_MISS) ] = 0x151, /* L1D.REPLACEMENT */
- },
- [ C(OP_WRITE) ] = {
- [ C(RESULT_ACCESS) ] = 0x82d0, /* MEM_INST_RETIRED.ALL_STORES */
- [ C(RESULT_MISS) ] = 0x0,
- },
- [ C(OP_PREFETCH) ] = {
- [ C(RESULT_ACCESS) ] = 0x0,
- [ C(RESULT_MISS) ] = 0x0,
- },
- },
- [ C(L1I ) ] = {
- [ C(OP_READ) ] = {
- [ C(RESULT_ACCESS) ] = 0x0,
- [ C(RESULT_MISS) ] = 0x283, /* ICACHE_64B.MISS */
- },
- [ C(OP_WRITE) ] = {
- [ C(RESULT_ACCESS) ] = -1,
- [ C(RESULT_MISS) ] = -1,
- },
- [ C(OP_PREFETCH) ] = {
- [ C(RESULT_ACCESS) ] = 0x0,
- [ C(RESULT_MISS) ] = 0x0,
- },
- },
- [ C(LL ) ] = {
- [ C(OP_READ) ] = {
- [ C(RESULT_ACCESS) ] = 0x1b7, /* OFFCORE_RESPONSE */
- [ C(RESULT_MISS) ] = 0x1b7, /* OFFCORE_RESPONSE */
- },
- [ C(OP_WRITE) ] = {
- [ C(RESULT_ACCESS) ] = 0x1b7, /* OFFCORE_RESPONSE */
- [ C(RESULT_MISS) ] = 0x1b7, /* OFFCORE_RESPONSE */
- },
- [ C(OP_PREFETCH) ] = {
- [ C(RESULT_ACCESS) ] = 0x0,
- [ C(RESULT_MISS) ] = 0x0,
- },
- },
- [ C(DTLB) ] = {
- [ C(OP_READ) ] = {
- [ C(RESULT_ACCESS) ] = 0x81d0, /* MEM_INST_RETIRED.ALL_LOADS */
- [ C(RESULT_MISS) ] = 0x608, /* DTLB_LOAD_MISSES.WALK_COMPLETED */
- },
- [ C(OP_WRITE) ] = {
- [ C(RESULT_ACCESS) ] = 0x82d0, /* MEM_INST_RETIRED.ALL_STORES */
- [ C(RESULT_MISS) ] = 0x649, /* DTLB_STORE_MISSES.WALK_COMPLETED */
- },
- [ C(OP_PREFETCH) ] = {
- [ C(RESULT_ACCESS) ] = 0x0,
- [ C(RESULT_MISS) ] = 0x0,
- },
- },
- [ C(ITLB) ] = {
- [ C(OP_READ) ] = {
- [ C(RESULT_ACCESS) ] = 0x2085, /* ITLB_MISSES.STLB_HIT */
- [ C(RESULT_MISS) ] = 0xe85, /* ITLB_MISSES.WALK_COMPLETED */
- },
- [ C(OP_WRITE) ] = {
- [ C(RESULT_ACCESS) ] = -1,
- [ C(RESULT_MISS) ] = -1,
- },
- [ C(OP_PREFETCH) ] = {
- [ C(RESULT_ACCESS) ] = -1,
- [ C(RESULT_MISS) ] = -1,
- },
- },
- [ C(BPU ) ] = {
- [ C(OP_READ) ] = {
- [ C(RESULT_ACCESS) ] = 0xc4, /* BR_INST_RETIRED.ALL_BRANCHES */
- [ C(RESULT_MISS) ] = 0xc5, /* BR_MISP_RETIRED.ALL_BRANCHES */
- },
- [ C(OP_WRITE) ] = {
- [ C(RESULT_ACCESS) ] = -1,
- [ C(RESULT_MISS) ] = -1,
- },
- [ C(OP_PREFETCH) ] = {
- [ C(RESULT_ACCESS) ] = -1,
- [ C(RESULT_MISS) ] = -1,
- },
- },
- [ C(NODE) ] = {
- [ C(OP_READ) ] = {
- [ C(RESULT_ACCESS) ] = 0x1b7, /* OFFCORE_RESPONSE */
- [ C(RESULT_MISS) ] = 0x1b7, /* OFFCORE_RESPONSE */
- },
- [ C(OP_WRITE) ] = {
- [ C(RESULT_ACCESS) ] = 0x1b7, /* OFFCORE_RESPONSE */
- [ C(RESULT_MISS) ] = 0x1b7, /* OFFCORE_RESPONSE */
- },
- [ C(OP_PREFETCH) ] = {
- [ C(RESULT_ACCESS) ] = 0x0,
- [ C(RESULT_MISS) ] = 0x0,
- },
- },
-};
-
-static __initconst const u64 skl_hw_cache_extra_regs
- [PERF_COUNT_HW_CACHE_MAX]
- [PERF_COUNT_HW_CACHE_OP_MAX]
- [PERF_COUNT_HW_CACHE_RESULT_MAX] =
-{
- [ C(LL ) ] = {
- [ C(OP_READ) ] = {
- [ C(RESULT_ACCESS) ] = SKL_DEMAND_READ|
- SKL_LLC_ACCESS|SKL_ANY_SNOOP,
- [ C(RESULT_MISS) ] = SKL_DEMAND_READ|
- SKL_L3_MISS|SKL_ANY_SNOOP|
- SKL_SUPPLIER_NONE,
- },
- [ C(OP_WRITE) ] = {
- [ C(RESULT_ACCESS) ] = SKL_DEMAND_WRITE|
- SKL_LLC_ACCESS|SKL_ANY_SNOOP,
- [ C(RESULT_MISS) ] = SKL_DEMAND_WRITE|
- SKL_L3_MISS|SKL_ANY_SNOOP|
- SKL_SUPPLIER_NONE,
- },
- [ C(OP_PREFETCH) ] = {
- [ C(RESULT_ACCESS) ] = 0x0,
- [ C(RESULT_MISS) ] = 0x0,
- },
- },
- [ C(NODE) ] = {
- [ C(OP_READ) ] = {
- [ C(RESULT_ACCESS) ] = SKL_DEMAND_READ|
- SKL_L3_MISS_LOCAL_DRAM|SKL_SNOOP_DRAM,
- [ C(RESULT_MISS) ] = SKL_DEMAND_READ|
- SKL_L3_MISS_REMOTE|SKL_SNOOP_DRAM,
- },
- [ C(OP_WRITE) ] = {
- [ C(RESULT_ACCESS) ] = SKL_DEMAND_WRITE|
- SKL_L3_MISS_LOCAL_DRAM|SKL_SNOOP_DRAM,
- [ C(RESULT_MISS) ] = SKL_DEMAND_WRITE|
- SKL_L3_MISS_REMOTE|SKL_SNOOP_DRAM,
- },
- [ C(OP_PREFETCH) ] = {
- [ C(RESULT_ACCESS) ] = 0x0,
- [ C(RESULT_MISS) ] = 0x0,
- },
- },
-};
-
-#define SNB_DMND_DATA_RD (1ULL << 0)
-#define SNB_DMND_RFO (1ULL << 1)
-#define SNB_DMND_IFETCH (1ULL << 2)
-#define SNB_DMND_WB (1ULL << 3)
-#define SNB_PF_DATA_RD (1ULL << 4)
-#define SNB_PF_RFO (1ULL << 5)
-#define SNB_PF_IFETCH (1ULL << 6)
-#define SNB_LLC_DATA_RD (1ULL << 7)
-#define SNB_LLC_RFO (1ULL << 8)
-#define SNB_LLC_IFETCH (1ULL << 9)
-#define SNB_BUS_LOCKS (1ULL << 10)
-#define SNB_STRM_ST (1ULL << 11)
-#define SNB_OTHER (1ULL << 15)
-#define SNB_RESP_ANY (1ULL << 16)
-#define SNB_NO_SUPP (1ULL << 17)
-#define SNB_LLC_HITM (1ULL << 18)
-#define SNB_LLC_HITE (1ULL << 19)
-#define SNB_LLC_HITS (1ULL << 20)
-#define SNB_LLC_HITF (1ULL << 21)
-#define SNB_LOCAL (1ULL << 22)
-#define SNB_REMOTE (0xffULL << 23)
-#define SNB_SNP_NONE (1ULL << 31)
-#define SNB_SNP_NOT_NEEDED (1ULL << 32)
-#define SNB_SNP_MISS (1ULL << 33)
-#define SNB_NO_FWD (1ULL << 34)
-#define SNB_SNP_FWD (1ULL << 35)
-#define SNB_HITM (1ULL << 36)
-#define SNB_NON_DRAM (1ULL << 37)
-
-#define SNB_DMND_READ (SNB_DMND_DATA_RD|SNB_LLC_DATA_RD)
-#define SNB_DMND_WRITE (SNB_DMND_RFO|SNB_LLC_RFO)
-#define SNB_DMND_PREFETCH (SNB_PF_DATA_RD|SNB_PF_RFO)
-
-#define SNB_SNP_ANY (SNB_SNP_NONE|SNB_SNP_NOT_NEEDED| \
- SNB_SNP_MISS|SNB_NO_FWD|SNB_SNP_FWD| \
- SNB_HITM)
-
-#define SNB_DRAM_ANY (SNB_LOCAL|SNB_REMOTE|SNB_SNP_ANY)
-#define SNB_DRAM_REMOTE (SNB_REMOTE|SNB_SNP_ANY)
-
-#define SNB_L3_ACCESS SNB_RESP_ANY
-#define SNB_L3_MISS (SNB_DRAM_ANY|SNB_NON_DRAM)
-
-static __initconst const u64 snb_hw_cache_extra_regs
- [PERF_COUNT_HW_CACHE_MAX]
- [PERF_COUNT_HW_CACHE_OP_MAX]
- [PERF_COUNT_HW_CACHE_RESULT_MAX] =
-{
- [ C(LL ) ] = {
- [ C(OP_READ) ] = {
- [ C(RESULT_ACCESS) ] = SNB_DMND_READ|SNB_L3_ACCESS,
- [ C(RESULT_MISS) ] = SNB_DMND_READ|SNB_L3_MISS,
- },
- [ C(OP_WRITE) ] = {
- [ C(RESULT_ACCESS) ] = SNB_DMND_WRITE|SNB_L3_ACCESS,
- [ C(RESULT_MISS) ] = SNB_DMND_WRITE|SNB_L3_MISS,
- },
- [ C(OP_PREFETCH) ] = {
- [ C(RESULT_ACCESS) ] = SNB_DMND_PREFETCH|SNB_L3_ACCESS,
- [ C(RESULT_MISS) ] = SNB_DMND_PREFETCH|SNB_L3_MISS,
- },
- },
- [ C(NODE) ] = {
- [ C(OP_READ) ] = {
- [ C(RESULT_ACCESS) ] = SNB_DMND_READ|SNB_DRAM_ANY,
- [ C(RESULT_MISS) ] = SNB_DMND_READ|SNB_DRAM_REMOTE,
- },
- [ C(OP_WRITE) ] = {
- [ C(RESULT_ACCESS) ] = SNB_DMND_WRITE|SNB_DRAM_ANY,
- [ C(RESULT_MISS) ] = SNB_DMND_WRITE|SNB_DRAM_REMOTE,
- },
- [ C(OP_PREFETCH) ] = {
- [ C(RESULT_ACCESS) ] = SNB_DMND_PREFETCH|SNB_DRAM_ANY,
- [ C(RESULT_MISS) ] = SNB_DMND_PREFETCH|SNB_DRAM_REMOTE,
- },
- },
-};
-
-static __initconst const u64 snb_hw_cache_event_ids
- [PERF_COUNT_HW_CACHE_MAX]
- [PERF_COUNT_HW_CACHE_OP_MAX]
- [PERF_COUNT_HW_CACHE_RESULT_MAX] =
-{
- [ C(L1D) ] = {
- [ C(OP_READ) ] = {
- [ C(RESULT_ACCESS) ] = 0xf1d0, /* MEM_UOP_RETIRED.LOADS */
- [ C(RESULT_MISS) ] = 0x0151, /* L1D.REPLACEMENT */
- },
- [ C(OP_WRITE) ] = {
- [ C(RESULT_ACCESS) ] = 0xf2d0, /* MEM_UOP_RETIRED.STORES */
- [ C(RESULT_MISS) ] = 0x0851, /* L1D.ALL_M_REPLACEMENT */
- },
- [ C(OP_PREFETCH) ] = {
- [ C(RESULT_ACCESS) ] = 0x0,
- [ C(RESULT_MISS) ] = 0x024e, /* HW_PRE_REQ.DL1_MISS */
- },
- },
- [ C(L1I ) ] = {
- [ C(OP_READ) ] = {
- [ C(RESULT_ACCESS) ] = 0x0,
- [ C(RESULT_MISS) ] = 0x0280, /* ICACHE.MISSES */
- },
- [ C(OP_WRITE) ] = {
- [ C(RESULT_ACCESS) ] = -1,
- [ C(RESULT_MISS) ] = -1,
- },
- [ C(OP_PREFETCH) ] = {
- [ C(RESULT_ACCESS) ] = 0x0,
- [ C(RESULT_MISS) ] = 0x0,
- },
- },
- [ C(LL ) ] = {
- [ C(OP_READ) ] = {
- /* OFFCORE_RESPONSE.ANY_DATA.LOCAL_CACHE */
- [ C(RESULT_ACCESS) ] = 0x01b7,
- /* OFFCORE_RESPONSE.ANY_DATA.ANY_LLC_MISS */
- [ C(RESULT_MISS) ] = 0x01b7,
- },
- [ C(OP_WRITE) ] = {
- /* OFFCORE_RESPONSE.ANY_RFO.LOCAL_CACHE */
- [ C(RESULT_ACCESS) ] = 0x01b7,
- /* OFFCORE_RESPONSE.ANY_RFO.ANY_LLC_MISS */
- [ C(RESULT_MISS) ] = 0x01b7,
- },
- [ C(OP_PREFETCH) ] = {
- /* OFFCORE_RESPONSE.PREFETCH.LOCAL_CACHE */
- [ C(RESULT_ACCESS) ] = 0x01b7,
- /* OFFCORE_RESPONSE.PREFETCH.ANY_LLC_MISS */
- [ C(RESULT_MISS) ] = 0x01b7,
- },
- },
- [ C(DTLB) ] = {
- [ C(OP_READ) ] = {
- [ C(RESULT_ACCESS) ] = 0x81d0, /* MEM_UOP_RETIRED.ALL_LOADS */
- [ C(RESULT_MISS) ] = 0x0108, /* DTLB_LOAD_MISSES.CAUSES_A_WALK */
- },
- [ C(OP_WRITE) ] = {
- [ C(RESULT_ACCESS) ] = 0x82d0, /* MEM_UOP_RETIRED.ALL_STORES */
- [ C(RESULT_MISS) ] = 0x0149, /* DTLB_STORE_MISSES.MISS_CAUSES_A_WALK */
- },
- [ C(OP_PREFETCH) ] = {
- [ C(RESULT_ACCESS) ] = 0x0,
- [ C(RESULT_MISS) ] = 0x0,
- },
- },
- [ C(ITLB) ] = {
- [ C(OP_READ) ] = {
- [ C(RESULT_ACCESS) ] = 0x1085, /* ITLB_MISSES.STLB_HIT */
- [ C(RESULT_MISS) ] = 0x0185, /* ITLB_MISSES.CAUSES_A_WALK */
- },
- [ C(OP_WRITE) ] = {
- [ C(RESULT_ACCESS) ] = -1,
- [ C(RESULT_MISS) ] = -1,
- },
- [ C(OP_PREFETCH) ] = {
- [ C(RESULT_ACCESS) ] = -1,
- [ C(RESULT_MISS) ] = -1,
- },
- },
- [ C(BPU ) ] = {
- [ C(OP_READ) ] = {
- [ C(RESULT_ACCESS) ] = 0x00c4, /* BR_INST_RETIRED.ALL_BRANCHES */
- [ C(RESULT_MISS) ] = 0x00c5, /* BR_MISP_RETIRED.ALL_BRANCHES */
- },
- [ C(OP_WRITE) ] = {
- [ C(RESULT_ACCESS) ] = -1,
- [ C(RESULT_MISS) ] = -1,
- },
- [ C(OP_PREFETCH) ] = {
- [ C(RESULT_ACCESS) ] = -1,
- [ C(RESULT_MISS) ] = -1,
- },
- },
- [ C(NODE) ] = {
- [ C(OP_READ) ] = {
- [ C(RESULT_ACCESS) ] = 0x01b7,
- [ C(RESULT_MISS) ] = 0x01b7,
- },
- [ C(OP_WRITE) ] = {
- [ C(RESULT_ACCESS) ] = 0x01b7,
- [ C(RESULT_MISS) ] = 0x01b7,
- },
- [ C(OP_PREFETCH) ] = {
- [ C(RESULT_ACCESS) ] = 0x01b7,
- [ C(RESULT_MISS) ] = 0x01b7,
- },
- },
-
-};
-
-/*
- * Notes on the events:
- * - data reads do not include code reads (comparable to earlier tables)
- * - data counts include speculative execution (except L1 write, dtlb, bpu)
- * - remote node access includes remote memory, remote cache, remote mmio.
- * - prefetches are not included in the counts because they are not
- * reliably counted.
- */
-
-#define HSW_DEMAND_DATA_RD BIT_ULL(0)
-#define HSW_DEMAND_RFO BIT_ULL(1)
-#define HSW_ANY_RESPONSE BIT_ULL(16)
-#define HSW_SUPPLIER_NONE BIT_ULL(17)
-#define HSW_L3_MISS_LOCAL_DRAM BIT_ULL(22)
-#define HSW_L3_MISS_REMOTE_HOP0 BIT_ULL(27)
-#define HSW_L3_MISS_REMOTE_HOP1 BIT_ULL(28)
-#define HSW_L3_MISS_REMOTE_HOP2P BIT_ULL(29)
-#define HSW_L3_MISS (HSW_L3_MISS_LOCAL_DRAM| \
- HSW_L3_MISS_REMOTE_HOP0|HSW_L3_MISS_REMOTE_HOP1| \
- HSW_L3_MISS_REMOTE_HOP2P)
-#define HSW_SNOOP_NONE BIT_ULL(31)
-#define HSW_SNOOP_NOT_NEEDED BIT_ULL(32)
-#define HSW_SNOOP_MISS BIT_ULL(33)
-#define HSW_SNOOP_HIT_NO_FWD BIT_ULL(34)
-#define HSW_SNOOP_HIT_WITH_FWD BIT_ULL(35)
-#define HSW_SNOOP_HITM BIT_ULL(36)
-#define HSW_SNOOP_NON_DRAM BIT_ULL(37)
-#define HSW_ANY_SNOOP (HSW_SNOOP_NONE| \
- HSW_SNOOP_NOT_NEEDED|HSW_SNOOP_MISS| \
- HSW_SNOOP_HIT_NO_FWD|HSW_SNOOP_HIT_WITH_FWD| \
- HSW_SNOOP_HITM|HSW_SNOOP_NON_DRAM)
-#define HSW_SNOOP_DRAM (HSW_ANY_SNOOP & ~HSW_SNOOP_NON_DRAM)
-#define HSW_DEMAND_READ HSW_DEMAND_DATA_RD
-#define HSW_DEMAND_WRITE HSW_DEMAND_RFO
-#define HSW_L3_MISS_REMOTE (HSW_L3_MISS_REMOTE_HOP0|\
- HSW_L3_MISS_REMOTE_HOP1|HSW_L3_MISS_REMOTE_HOP2P)
-#define HSW_LLC_ACCESS HSW_ANY_RESPONSE
-
-#define BDW_L3_MISS_LOCAL BIT(26)
-#define BDW_L3_MISS (BDW_L3_MISS_LOCAL| \
- HSW_L3_MISS_REMOTE_HOP0|HSW_L3_MISS_REMOTE_HOP1| \
- HSW_L3_MISS_REMOTE_HOP2P)
-
-
-static __initconst const u64 hsw_hw_cache_event_ids
- [PERF_COUNT_HW_CACHE_MAX]
- [PERF_COUNT_HW_CACHE_OP_MAX]
- [PERF_COUNT_HW_CACHE_RESULT_MAX] =
-{
- [ C(L1D ) ] = {
- [ C(OP_READ) ] = {
- [ C(RESULT_ACCESS) ] = 0x81d0, /* MEM_UOPS_RETIRED.ALL_LOADS */
- [ C(RESULT_MISS) ] = 0x151, /* L1D.REPLACEMENT */
- },
- [ C(OP_WRITE) ] = {
- [ C(RESULT_ACCESS) ] = 0x82d0, /* MEM_UOPS_RETIRED.ALL_STORES */
- [ C(RESULT_MISS) ] = 0x0,
- },
- [ C(OP_PREFETCH) ] = {
- [ C(RESULT_ACCESS) ] = 0x0,
- [ C(RESULT_MISS) ] = 0x0,
- },
- },
- [ C(L1I ) ] = {
- [ C(OP_READ) ] = {
- [ C(RESULT_ACCESS) ] = 0x0,
- [ C(RESULT_MISS) ] = 0x280, /* ICACHE.MISSES */
- },
- [ C(OP_WRITE) ] = {
- [ C(RESULT_ACCESS) ] = -1,
- [ C(RESULT_MISS) ] = -1,
- },
- [ C(OP_PREFETCH) ] = {
- [ C(RESULT_ACCESS) ] = 0x0,
- [ C(RESULT_MISS) ] = 0x0,
- },
- },
- [ C(LL ) ] = {
- [ C(OP_READ) ] = {
- [ C(RESULT_ACCESS) ] = 0x1b7, /* OFFCORE_RESPONSE */
- [ C(RESULT_MISS) ] = 0x1b7, /* OFFCORE_RESPONSE */
- },
- [ C(OP_WRITE) ] = {
- [ C(RESULT_ACCESS) ] = 0x1b7, /* OFFCORE_RESPONSE */
- [ C(RESULT_MISS) ] = 0x1b7, /* OFFCORE_RESPONSE */
- },
- [ C(OP_PREFETCH) ] = {
- [ C(RESULT_ACCESS) ] = 0x0,
- [ C(RESULT_MISS) ] = 0x0,
- },
- },
- [ C(DTLB) ] = {
- [ C(OP_READ) ] = {
- [ C(RESULT_ACCESS) ] = 0x81d0, /* MEM_UOPS_RETIRED.ALL_LOADS */
- [ C(RESULT_MISS) ] = 0x108, /* DTLB_LOAD_MISSES.MISS_CAUSES_A_WALK */
- },
- [ C(OP_WRITE) ] = {
- [ C(RESULT_ACCESS) ] = 0x82d0, /* MEM_UOPS_RETIRED.ALL_STORES */
- [ C(RESULT_MISS) ] = 0x149, /* DTLB_STORE_MISSES.MISS_CAUSES_A_WALK */
- },
- [ C(OP_PREFETCH) ] = {
- [ C(RESULT_ACCESS) ] = 0x0,
- [ C(RESULT_MISS) ] = 0x0,
- },
- },
- [ C(ITLB) ] = {
- [ C(OP_READ) ] = {
- [ C(RESULT_ACCESS) ] = 0x6085, /* ITLB_MISSES.STLB_HIT */
- [ C(RESULT_MISS) ] = 0x185, /* ITLB_MISSES.MISS_CAUSES_A_WALK */
- },
- [ C(OP_WRITE) ] = {
- [ C(RESULT_ACCESS) ] = -1,
- [ C(RESULT_MISS) ] = -1,
- },
- [ C(OP_PREFETCH) ] = {
- [ C(RESULT_ACCESS) ] = -1,
- [ C(RESULT_MISS) ] = -1,
- },
- },
- [ C(BPU ) ] = {
- [ C(OP_READ) ] = {
- [ C(RESULT_ACCESS) ] = 0xc4, /* BR_INST_RETIRED.ALL_BRANCHES */
- [ C(RESULT_MISS) ] = 0xc5, /* BR_MISP_RETIRED.ALL_BRANCHES */
- },
- [ C(OP_WRITE) ] = {
- [ C(RESULT_ACCESS) ] = -1,
- [ C(RESULT_MISS) ] = -1,
- },
- [ C(OP_PREFETCH) ] = {
- [ C(RESULT_ACCESS) ] = -1,
- [ C(RESULT_MISS) ] = -1,
- },
- },
- [ C(NODE) ] = {
- [ C(OP_READ) ] = {
- [ C(RESULT_ACCESS) ] = 0x1b7, /* OFFCORE_RESPONSE */
- [ C(RESULT_MISS) ] = 0x1b7, /* OFFCORE_RESPONSE */
- },
- [ C(OP_WRITE) ] = {
- [ C(RESULT_ACCESS) ] = 0x1b7, /* OFFCORE_RESPONSE */
- [ C(RESULT_MISS) ] = 0x1b7, /* OFFCORE_RESPONSE */
- },
- [ C(OP_PREFETCH) ] = {
- [ C(RESULT_ACCESS) ] = 0x0,
- [ C(RESULT_MISS) ] = 0x0,
- },
- },
-};
-
-static __initconst const u64 hsw_hw_cache_extra_regs
- [PERF_COUNT_HW_CACHE_MAX]
- [PERF_COUNT_HW_CACHE_OP_MAX]
- [PERF_COUNT_HW_CACHE_RESULT_MAX] =
-{
- [ C(LL ) ] = {
- [ C(OP_READ) ] = {
- [ C(RESULT_ACCESS) ] = HSW_DEMAND_READ|
- HSW_LLC_ACCESS,
- [ C(RESULT_MISS) ] = HSW_DEMAND_READ|
- HSW_L3_MISS|HSW_ANY_SNOOP,
- },
- [ C(OP_WRITE) ] = {
- [ C(RESULT_ACCESS) ] = HSW_DEMAND_WRITE|
- HSW_LLC_ACCESS,
- [ C(RESULT_MISS) ] = HSW_DEMAND_WRITE|
- HSW_L3_MISS|HSW_ANY_SNOOP,
- },
- [ C(OP_PREFETCH) ] = {
- [ C(RESULT_ACCESS) ] = 0x0,
- [ C(RESULT_MISS) ] = 0x0,
- },
- },
- [ C(NODE) ] = {
- [ C(OP_READ) ] = {
- [ C(RESULT_ACCESS) ] = HSW_DEMAND_READ|
- HSW_L3_MISS_LOCAL_DRAM|
- HSW_SNOOP_DRAM,
- [ C(RESULT_MISS) ] = HSW_DEMAND_READ|
- HSW_L3_MISS_REMOTE|
- HSW_SNOOP_DRAM,
- },
- [ C(OP_WRITE) ] = {
- [ C(RESULT_ACCESS) ] = HSW_DEMAND_WRITE|
- HSW_L3_MISS_LOCAL_DRAM|
- HSW_SNOOP_DRAM,
- [ C(RESULT_MISS) ] = HSW_DEMAND_WRITE|
- HSW_L3_MISS_REMOTE|
- HSW_SNOOP_DRAM,
- },
- [ C(OP_PREFETCH) ] = {
- [ C(RESULT_ACCESS) ] = 0x0,
- [ C(RESULT_MISS) ] = 0x0,
- },
- },
-};
-
-static __initconst const u64 westmere_hw_cache_event_ids
- [PERF_COUNT_HW_CACHE_MAX]
- [PERF_COUNT_HW_CACHE_OP_MAX]
- [PERF_COUNT_HW_CACHE_RESULT_MAX] =
-{
- [ C(L1D) ] = {
- [ C(OP_READ) ] = {
- [ C(RESULT_ACCESS) ] = 0x010b, /* MEM_INST_RETIRED.LOADS */
- [ C(RESULT_MISS) ] = 0x0151, /* L1D.REPL */
- },
- [ C(OP_WRITE) ] = {
- [ C(RESULT_ACCESS) ] = 0x020b, /* MEM_INST_RETURED.STORES */
- [ C(RESULT_MISS) ] = 0x0251, /* L1D.M_REPL */
- },
- [ C(OP_PREFETCH) ] = {
- [ C(RESULT_ACCESS) ] = 0x014e, /* L1D_PREFETCH.REQUESTS */
- [ C(RESULT_MISS) ] = 0x024e, /* L1D_PREFETCH.MISS */
- },
- },
- [ C(L1I ) ] = {
- [ C(OP_READ) ] = {
- [ C(RESULT_ACCESS) ] = 0x0380, /* L1I.READS */
- [ C(RESULT_MISS) ] = 0x0280, /* L1I.MISSES */
- },
- [ C(OP_WRITE) ] = {
- [ C(RESULT_ACCESS) ] = -1,
- [ C(RESULT_MISS) ] = -1,
- },
- [ C(OP_PREFETCH) ] = {
- [ C(RESULT_ACCESS) ] = 0x0,
- [ C(RESULT_MISS) ] = 0x0,
- },
- },
- [ C(LL ) ] = {
- [ C(OP_READ) ] = {
- /* OFFCORE_RESPONSE.ANY_DATA.LOCAL_CACHE */
- [ C(RESULT_ACCESS) ] = 0x01b7,
- /* OFFCORE_RESPONSE.ANY_DATA.ANY_LLC_MISS */
- [ C(RESULT_MISS) ] = 0x01b7,
- },
- /*
- * Use RFO, not WRITEBACK, because a write miss would typically occur
- * on RFO.
- */
- [ C(OP_WRITE) ] = {
- /* OFFCORE_RESPONSE.ANY_RFO.LOCAL_CACHE */
- [ C(RESULT_ACCESS) ] = 0x01b7,
- /* OFFCORE_RESPONSE.ANY_RFO.ANY_LLC_MISS */
- [ C(RESULT_MISS) ] = 0x01b7,
- },
- [ C(OP_PREFETCH) ] = {
- /* OFFCORE_RESPONSE.PREFETCH.LOCAL_CACHE */
- [ C(RESULT_ACCESS) ] = 0x01b7,
- /* OFFCORE_RESPONSE.PREFETCH.ANY_LLC_MISS */
- [ C(RESULT_MISS) ] = 0x01b7,
- },
- },
- [ C(DTLB) ] = {
- [ C(OP_READ) ] = {
- [ C(RESULT_ACCESS) ] = 0x010b, /* MEM_INST_RETIRED.LOADS */
- [ C(RESULT_MISS) ] = 0x0108, /* DTLB_LOAD_MISSES.ANY */
- },
- [ C(OP_WRITE) ] = {
- [ C(RESULT_ACCESS) ] = 0x020b, /* MEM_INST_RETURED.STORES */
- [ C(RESULT_MISS) ] = 0x010c, /* MEM_STORE_RETIRED.DTLB_MISS */
- },
- [ C(OP_PREFETCH) ] = {
- [ C(RESULT_ACCESS) ] = 0x0,
- [ C(RESULT_MISS) ] = 0x0,
- },
- },
- [ C(ITLB) ] = {
- [ C(OP_READ) ] = {
- [ C(RESULT_ACCESS) ] = 0x01c0, /* INST_RETIRED.ANY_P */
- [ C(RESULT_MISS) ] = 0x0185, /* ITLB_MISSES.ANY */
- },
- [ C(OP_WRITE) ] = {
- [ C(RESULT_ACCESS) ] = -1,
- [ C(RESULT_MISS) ] = -1,
- },
- [ C(OP_PREFETCH) ] = {
- [ C(RESULT_ACCESS) ] = -1,
- [ C(RESULT_MISS) ] = -1,
- },
- },
- [ C(BPU ) ] = {
- [ C(OP_READ) ] = {
- [ C(RESULT_ACCESS) ] = 0x00c4, /* BR_INST_RETIRED.ALL_BRANCHES */
- [ C(RESULT_MISS) ] = 0x03e8, /* BPU_CLEARS.ANY */
- },
- [ C(OP_WRITE) ] = {
- [ C(RESULT_ACCESS) ] = -1,
- [ C(RESULT_MISS) ] = -1,
- },
- [ C(OP_PREFETCH) ] = {
- [ C(RESULT_ACCESS) ] = -1,
- [ C(RESULT_MISS) ] = -1,
- },
- },
- [ C(NODE) ] = {
- [ C(OP_READ) ] = {
- [ C(RESULT_ACCESS) ] = 0x01b7,
- [ C(RESULT_MISS) ] = 0x01b7,
- },
- [ C(OP_WRITE) ] = {
- [ C(RESULT_ACCESS) ] = 0x01b7,
- [ C(RESULT_MISS) ] = 0x01b7,
- },
- [ C(OP_PREFETCH) ] = {
- [ C(RESULT_ACCESS) ] = 0x01b7,
- [ C(RESULT_MISS) ] = 0x01b7,
- },
- },
-};
-
-/*
- * Nehalem/Westmere MSR_OFFCORE_RESPONSE bits;
- * See IA32 SDM Vol 3B 30.6.1.3
- */
-
-#define NHM_DMND_DATA_RD (1 << 0)
-#define NHM_DMND_RFO (1 << 1)
-#define NHM_DMND_IFETCH (1 << 2)
-#define NHM_DMND_WB (1 << 3)
-#define NHM_PF_DATA_RD (1 << 4)
-#define NHM_PF_DATA_RFO (1 << 5)
-#define NHM_PF_IFETCH (1 << 6)
-#define NHM_OFFCORE_OTHER (1 << 7)
-#define NHM_UNCORE_HIT (1 << 8)
-#define NHM_OTHER_CORE_HIT_SNP (1 << 9)
-#define NHM_OTHER_CORE_HITM (1 << 10)
- /* reserved */
-#define NHM_REMOTE_CACHE_FWD (1 << 12)
-#define NHM_REMOTE_DRAM (1 << 13)
-#define NHM_LOCAL_DRAM (1 << 14)
-#define NHM_NON_DRAM (1 << 15)
-
-#define NHM_LOCAL (NHM_LOCAL_DRAM|NHM_REMOTE_CACHE_FWD)
-#define NHM_REMOTE (NHM_REMOTE_DRAM)
-
-#define NHM_DMND_READ (NHM_DMND_DATA_RD)
-#define NHM_DMND_WRITE (NHM_DMND_RFO|NHM_DMND_WB)
-#define NHM_DMND_PREFETCH (NHM_PF_DATA_RD|NHM_PF_DATA_RFO)
-
-#define NHM_L3_HIT (NHM_UNCORE_HIT|NHM_OTHER_CORE_HIT_SNP|NHM_OTHER_CORE_HITM)
-#define NHM_L3_MISS (NHM_NON_DRAM|NHM_LOCAL_DRAM|NHM_REMOTE_DRAM|NHM_REMOTE_CACHE_FWD)
-#define NHM_L3_ACCESS (NHM_L3_HIT|NHM_L3_MISS)
-
-static __initconst const u64 nehalem_hw_cache_extra_regs
- [PERF_COUNT_HW_CACHE_MAX]
- [PERF_COUNT_HW_CACHE_OP_MAX]
- [PERF_COUNT_HW_CACHE_RESULT_MAX] =
-{
- [ C(LL ) ] = {
- [ C(OP_READ) ] = {
- [ C(RESULT_ACCESS) ] = NHM_DMND_READ|NHM_L3_ACCESS,
- [ C(RESULT_MISS) ] = NHM_DMND_READ|NHM_L3_MISS,
- },
- [ C(OP_WRITE) ] = {
- [ C(RESULT_ACCESS) ] = NHM_DMND_WRITE|NHM_L3_ACCESS,
- [ C(RESULT_MISS) ] = NHM_DMND_WRITE|NHM_L3_MISS,
- },
- [ C(OP_PREFETCH) ] = {
- [ C(RESULT_ACCESS) ] = NHM_DMND_PREFETCH|NHM_L3_ACCESS,
- [ C(RESULT_MISS) ] = NHM_DMND_PREFETCH|NHM_L3_MISS,
- },
- },
- [ C(NODE) ] = {
- [ C(OP_READ) ] = {
- [ C(RESULT_ACCESS) ] = NHM_DMND_READ|NHM_LOCAL|NHM_REMOTE,
- [ C(RESULT_MISS) ] = NHM_DMND_READ|NHM_REMOTE,
- },
- [ C(OP_WRITE) ] = {
- [ C(RESULT_ACCESS) ] = NHM_DMND_WRITE|NHM_LOCAL|NHM_REMOTE,
- [ C(RESULT_MISS) ] = NHM_DMND_WRITE|NHM_REMOTE,
- },
- [ C(OP_PREFETCH) ] = {
- [ C(RESULT_ACCESS) ] = NHM_DMND_PREFETCH|NHM_LOCAL|NHM_REMOTE,
- [ C(RESULT_MISS) ] = NHM_DMND_PREFETCH|NHM_REMOTE,
- },
- },
-};
-
-static __initconst const u64 nehalem_hw_cache_event_ids
- [PERF_COUNT_HW_CACHE_MAX]
- [PERF_COUNT_HW_CACHE_OP_MAX]
- [PERF_COUNT_HW_CACHE_RESULT_MAX] =
-{
- [ C(L1D) ] = {
- [ C(OP_READ) ] = {
- [ C(RESULT_ACCESS) ] = 0x010b, /* MEM_INST_RETIRED.LOADS */
- [ C(RESULT_MISS) ] = 0x0151, /* L1D.REPL */
- },
- [ C(OP_WRITE) ] = {
- [ C(RESULT_ACCESS) ] = 0x020b, /* MEM_INST_RETURED.STORES */
- [ C(RESULT_MISS) ] = 0x0251, /* L1D.M_REPL */
- },
- [ C(OP_PREFETCH) ] = {
- [ C(RESULT_ACCESS) ] = 0x014e, /* L1D_PREFETCH.REQUESTS */
- [ C(RESULT_MISS) ] = 0x024e, /* L1D_PREFETCH.MISS */
- },
- },
- [ C(L1I ) ] = {
- [ C(OP_READ) ] = {
- [ C(RESULT_ACCESS) ] = 0x0380, /* L1I.READS */
- [ C(RESULT_MISS) ] = 0x0280, /* L1I.MISSES */
- },
- [ C(OP_WRITE) ] = {
- [ C(RESULT_ACCESS) ] = -1,
- [ C(RESULT_MISS) ] = -1,
- },
- [ C(OP_PREFETCH) ] = {
- [ C(RESULT_ACCESS) ] = 0x0,
- [ C(RESULT_MISS) ] = 0x0,
- },
- },
- [ C(LL ) ] = {
- [ C(OP_READ) ] = {
- /* OFFCORE_RESPONSE.ANY_DATA.LOCAL_CACHE */
- [ C(RESULT_ACCESS) ] = 0x01b7,
- /* OFFCORE_RESPONSE.ANY_DATA.ANY_LLC_MISS */
- [ C(RESULT_MISS) ] = 0x01b7,
- },
- /*
- * Use RFO, not WRITEBACK, because a write miss would typically occur
- * on RFO.
- */
- [ C(OP_WRITE) ] = {
- /* OFFCORE_RESPONSE.ANY_RFO.LOCAL_CACHE */
- [ C(RESULT_ACCESS) ] = 0x01b7,
- /* OFFCORE_RESPONSE.ANY_RFO.ANY_LLC_MISS */
- [ C(RESULT_MISS) ] = 0x01b7,
- },
- [ C(OP_PREFETCH) ] = {
- /* OFFCORE_RESPONSE.PREFETCH.LOCAL_CACHE */
- [ C(RESULT_ACCESS) ] = 0x01b7,
- /* OFFCORE_RESPONSE.PREFETCH.ANY_LLC_MISS */
- [ C(RESULT_MISS) ] = 0x01b7,
- },
- },
- [ C(DTLB) ] = {
- [ C(OP_READ) ] = {
- [ C(RESULT_ACCESS) ] = 0x0f40, /* L1D_CACHE_LD.MESI (alias) */
- [ C(RESULT_MISS) ] = 0x0108, /* DTLB_LOAD_MISSES.ANY */
- },
- [ C(OP_WRITE) ] = {
- [ C(RESULT_ACCESS) ] = 0x0f41, /* L1D_CACHE_ST.MESI (alias) */
- [ C(RESULT_MISS) ] = 0x010c, /* MEM_STORE_RETIRED.DTLB_MISS */
- },
- [ C(OP_PREFETCH) ] = {
- [ C(RESULT_ACCESS) ] = 0x0,
- [ C(RESULT_MISS) ] = 0x0,
- },
- },
- [ C(ITLB) ] = {
- [ C(OP_READ) ] = {
- [ C(RESULT_ACCESS) ] = 0x01c0, /* INST_RETIRED.ANY_P */
- [ C(RESULT_MISS) ] = 0x20c8, /* ITLB_MISS_RETIRED */
- },
- [ C(OP_WRITE) ] = {
- [ C(RESULT_ACCESS) ] = -1,
- [ C(RESULT_MISS) ] = -1,
- },
- [ C(OP_PREFETCH) ] = {
- [ C(RESULT_ACCESS) ] = -1,
- [ C(RESULT_MISS) ] = -1,
- },
- },
- [ C(BPU ) ] = {
- [ C(OP_READ) ] = {
- [ C(RESULT_ACCESS) ] = 0x00c4, /* BR_INST_RETIRED.ALL_BRANCHES */
- [ C(RESULT_MISS) ] = 0x03e8, /* BPU_CLEARS.ANY */
- },
- [ C(OP_WRITE) ] = {
- [ C(RESULT_ACCESS) ] = -1,
- [ C(RESULT_MISS) ] = -1,
- },
- [ C(OP_PREFETCH) ] = {
- [ C(RESULT_ACCESS) ] = -1,
- [ C(RESULT_MISS) ] = -1,
- },
- },
- [ C(NODE) ] = {
- [ C(OP_READ) ] = {
- [ C(RESULT_ACCESS) ] = 0x01b7,
- [ C(RESULT_MISS) ] = 0x01b7,
- },
- [ C(OP_WRITE) ] = {
- [ C(RESULT_ACCESS) ] = 0x01b7,
- [ C(RESULT_MISS) ] = 0x01b7,
- },
- [ C(OP_PREFETCH) ] = {
- [ C(RESULT_ACCESS) ] = 0x01b7,
- [ C(RESULT_MISS) ] = 0x01b7,
- },
- },
-};
-
-static __initconst const u64 core2_hw_cache_event_ids
- [PERF_COUNT_HW_CACHE_MAX]
- [PERF_COUNT_HW_CACHE_OP_MAX]
- [PERF_COUNT_HW_CACHE_RESULT_MAX] =
-{
- [ C(L1D) ] = {
- [ C(OP_READ) ] = {
- [ C(RESULT_ACCESS) ] = 0x0f40, /* L1D_CACHE_LD.MESI */
- [ C(RESULT_MISS) ] = 0x0140, /* L1D_CACHE_LD.I_STATE */
- },
- [ C(OP_WRITE) ] = {
- [ C(RESULT_ACCESS) ] = 0x0f41, /* L1D_CACHE_ST.MESI */
- [ C(RESULT_MISS) ] = 0x0141, /* L1D_CACHE_ST.I_STATE */
- },
- [ C(OP_PREFETCH) ] = {
- [ C(RESULT_ACCESS) ] = 0x104e, /* L1D_PREFETCH.REQUESTS */
- [ C(RESULT_MISS) ] = 0,
- },
- },
- [ C(L1I ) ] = {
- [ C(OP_READ) ] = {
- [ C(RESULT_ACCESS) ] = 0x0080, /* L1I.READS */
- [ C(RESULT_MISS) ] = 0x0081, /* L1I.MISSES */
- },
- [ C(OP_WRITE) ] = {
- [ C(RESULT_ACCESS) ] = -1,
- [ C(RESULT_MISS) ] = -1,
- },
- [ C(OP_PREFETCH) ] = {
- [ C(RESULT_ACCESS) ] = 0,
- [ C(RESULT_MISS) ] = 0,
- },
- },
- [ C(LL ) ] = {
- [ C(OP_READ) ] = {
- [ C(RESULT_ACCESS) ] = 0x4f29, /* L2_LD.MESI */
- [ C(RESULT_MISS) ] = 0x4129, /* L2_LD.ISTATE */
- },
- [ C(OP_WRITE) ] = {
- [ C(RESULT_ACCESS) ] = 0x4f2A, /* L2_ST.MESI */
- [ C(RESULT_MISS) ] = 0x412A, /* L2_ST.ISTATE */
- },
- [ C(OP_PREFETCH) ] = {
- [ C(RESULT_ACCESS) ] = 0,
- [ C(RESULT_MISS) ] = 0,
- },
- },
- [ C(DTLB) ] = {
- [ C(OP_READ) ] = {
- [ C(RESULT_ACCESS) ] = 0x0f40, /* L1D_CACHE_LD.MESI (alias) */
- [ C(RESULT_MISS) ] = 0x0208, /* DTLB_MISSES.MISS_LD */
- },
- [ C(OP_WRITE) ] = {
- [ C(RESULT_ACCESS) ] = 0x0f41, /* L1D_CACHE_ST.MESI (alias) */
- [ C(RESULT_MISS) ] = 0x0808, /* DTLB_MISSES.MISS_ST */
- },
- [ C(OP_PREFETCH) ] = {
- [ C(RESULT_ACCESS) ] = 0,
- [ C(RESULT_MISS) ] = 0,
- },
- },
- [ C(ITLB) ] = {
- [ C(OP_READ) ] = {
- [ C(RESULT_ACCESS) ] = 0x00c0, /* INST_RETIRED.ANY_P */
- [ C(RESULT_MISS) ] = 0x1282, /* ITLBMISSES */
- },
- [ C(OP_WRITE) ] = {
- [ C(RESULT_ACCESS) ] = -1,
- [ C(RESULT_MISS) ] = -1,
- },
- [ C(OP_PREFETCH) ] = {
- [ C(RESULT_ACCESS) ] = -1,
- [ C(RESULT_MISS) ] = -1,
- },
- },
- [ C(BPU ) ] = {
- [ C(OP_READ) ] = {
- [ C(RESULT_ACCESS) ] = 0x00c4, /* BR_INST_RETIRED.ANY */
- [ C(RESULT_MISS) ] = 0x00c5, /* BP_INST_RETIRED.MISPRED */
- },
- [ C(OP_WRITE) ] = {
- [ C(RESULT_ACCESS) ] = -1,
- [ C(RESULT_MISS) ] = -1,
- },
- [ C(OP_PREFETCH) ] = {
- [ C(RESULT_ACCESS) ] = -1,
- [ C(RESULT_MISS) ] = -1,
- },
- },
-};
-
-static __initconst const u64 atom_hw_cache_event_ids
- [PERF_COUNT_HW_CACHE_MAX]
- [PERF_COUNT_HW_CACHE_OP_MAX]
- [PERF_COUNT_HW_CACHE_RESULT_MAX] =
-{
- [ C(L1D) ] = {
- [ C(OP_READ) ] = {
- [ C(RESULT_ACCESS) ] = 0x2140, /* L1D_CACHE.LD */
- [ C(RESULT_MISS) ] = 0,
- },
- [ C(OP_WRITE) ] = {
- [ C(RESULT_ACCESS) ] = 0x2240, /* L1D_CACHE.ST */
- [ C(RESULT_MISS) ] = 0,
- },
- [ C(OP_PREFETCH) ] = {
- [ C(RESULT_ACCESS) ] = 0x0,
- [ C(RESULT_MISS) ] = 0,
- },
- },
- [ C(L1I ) ] = {
- [ C(OP_READ) ] = {
- [ C(RESULT_ACCESS) ] = 0x0380, /* L1I.READS */
- [ C(RESULT_MISS) ] = 0x0280, /* L1I.MISSES */
- },
- [ C(OP_WRITE) ] = {
- [ C(RESULT_ACCESS) ] = -1,
- [ C(RESULT_MISS) ] = -1,
- },
- [ C(OP_PREFETCH) ] = {
- [ C(RESULT_ACCESS) ] = 0,
- [ C(RESULT_MISS) ] = 0,
- },
- },
- [ C(LL ) ] = {
- [ C(OP_READ) ] = {
- [ C(RESULT_ACCESS) ] = 0x4f29, /* L2_LD.MESI */
- [ C(RESULT_MISS) ] = 0x4129, /* L2_LD.ISTATE */
- },
- [ C(OP_WRITE) ] = {
- [ C(RESULT_ACCESS) ] = 0x4f2A, /* L2_ST.MESI */
- [ C(RESULT_MISS) ] = 0x412A, /* L2_ST.ISTATE */
- },
- [ C(OP_PREFETCH) ] = {
- [ C(RESULT_ACCESS) ] = 0,
- [ C(RESULT_MISS) ] = 0,
- },
- },
- [ C(DTLB) ] = {
- [ C(OP_READ) ] = {
- [ C(RESULT_ACCESS) ] = 0x2140, /* L1D_CACHE_LD.MESI (alias) */
- [ C(RESULT_MISS) ] = 0x0508, /* DTLB_MISSES.MISS_LD */
- },
- [ C(OP_WRITE) ] = {
- [ C(RESULT_ACCESS) ] = 0x2240, /* L1D_CACHE_ST.MESI (alias) */
- [ C(RESULT_MISS) ] = 0x0608, /* DTLB_MISSES.MISS_ST */
- },
- [ C(OP_PREFETCH) ] = {
- [ C(RESULT_ACCESS) ] = 0,
- [ C(RESULT_MISS) ] = 0,
- },
- },
- [ C(ITLB) ] = {
- [ C(OP_READ) ] = {
- [ C(RESULT_ACCESS) ] = 0x00c0, /* INST_RETIRED.ANY_P */
- [ C(RESULT_MISS) ] = 0x0282, /* ITLB.MISSES */
- },
- [ C(OP_WRITE) ] = {
- [ C(RESULT_ACCESS) ] = -1,
- [ C(RESULT_MISS) ] = -1,
- },
- [ C(OP_PREFETCH) ] = {
- [ C(RESULT_ACCESS) ] = -1,
- [ C(RESULT_MISS) ] = -1,
- },
- },
- [ C(BPU ) ] = {
- [ C(OP_READ) ] = {
- [ C(RESULT_ACCESS) ] = 0x00c4, /* BR_INST_RETIRED.ANY */
- [ C(RESULT_MISS) ] = 0x00c5, /* BP_INST_RETIRED.MISPRED */
- },
- [ C(OP_WRITE) ] = {
- [ C(RESULT_ACCESS) ] = -1,
- [ C(RESULT_MISS) ] = -1,
- },
- [ C(OP_PREFETCH) ] = {
- [ C(RESULT_ACCESS) ] = -1,
- [ C(RESULT_MISS) ] = -1,
- },
- },
-};
-
-static struct extra_reg intel_slm_extra_regs[] __read_mostly =
-{
- /* must define OFFCORE_RSP_X first, see intel_fixup_er() */
- INTEL_UEVENT_EXTRA_REG(0x01b7, MSR_OFFCORE_RSP_0, 0x768005ffffull, RSP_0),
- INTEL_UEVENT_EXTRA_REG(0x02b7, MSR_OFFCORE_RSP_1, 0x368005ffffull, RSP_1),
- EVENT_EXTRA_END
-};
-
-#define SLM_DMND_READ SNB_DMND_DATA_RD
-#define SLM_DMND_WRITE SNB_DMND_RFO
-#define SLM_DMND_PREFETCH (SNB_PF_DATA_RD|SNB_PF_RFO)
-
-#define SLM_SNP_ANY (SNB_SNP_NONE|SNB_SNP_MISS|SNB_NO_FWD|SNB_HITM)
-#define SLM_LLC_ACCESS SNB_RESP_ANY
-#define SLM_LLC_MISS (SLM_SNP_ANY|SNB_NON_DRAM)
-
-static __initconst const u64 slm_hw_cache_extra_regs
- [PERF_COUNT_HW_CACHE_MAX]
- [PERF_COUNT_HW_CACHE_OP_MAX]
- [PERF_COUNT_HW_CACHE_RESULT_MAX] =
-{
- [ C(LL ) ] = {
- [ C(OP_READ) ] = {
- [ C(RESULT_ACCESS) ] = SLM_DMND_READ|SLM_LLC_ACCESS,
- [ C(RESULT_MISS) ] = 0,
- },
- [ C(OP_WRITE) ] = {
- [ C(RESULT_ACCESS) ] = SLM_DMND_WRITE|SLM_LLC_ACCESS,
- [ C(RESULT_MISS) ] = SLM_DMND_WRITE|SLM_LLC_MISS,
- },
- [ C(OP_PREFETCH) ] = {
- [ C(RESULT_ACCESS) ] = SLM_DMND_PREFETCH|SLM_LLC_ACCESS,
- [ C(RESULT_MISS) ] = SLM_DMND_PREFETCH|SLM_LLC_MISS,
- },
- },
-};
-
-static __initconst const u64 slm_hw_cache_event_ids
- [PERF_COUNT_HW_CACHE_MAX]
- [PERF_COUNT_HW_CACHE_OP_MAX]
- [PERF_COUNT_HW_CACHE_RESULT_MAX] =
-{
- [ C(L1D) ] = {
- [ C(OP_READ) ] = {
- [ C(RESULT_ACCESS) ] = 0,
- [ C(RESULT_MISS) ] = 0x0104, /* LD_DCU_MISS */
- },
- [ C(OP_WRITE) ] = {
- [ C(RESULT_ACCESS) ] = 0,
- [ C(RESULT_MISS) ] = 0,
- },
- [ C(OP_PREFETCH) ] = {
- [ C(RESULT_ACCESS) ] = 0,
- [ C(RESULT_MISS) ] = 0,
- },
- },
- [ C(L1I ) ] = {
- [ C(OP_READ) ] = {
- [ C(RESULT_ACCESS) ] = 0x0380, /* ICACHE.ACCESSES */
- [ C(RESULT_MISS) ] = 0x0280, /* ICACGE.MISSES */
- },
- [ C(OP_WRITE) ] = {
- [ C(RESULT_ACCESS) ] = -1,
- [ C(RESULT_MISS) ] = -1,
- },
- [ C(OP_PREFETCH) ] = {
- [ C(RESULT_ACCESS) ] = 0,
- [ C(RESULT_MISS) ] = 0,
- },
- },
- [ C(LL ) ] = {
- [ C(OP_READ) ] = {
- /* OFFCORE_RESPONSE.ANY_DATA.LOCAL_CACHE */
- [ C(RESULT_ACCESS) ] = 0x01b7,
- [ C(RESULT_MISS) ] = 0,
- },
- [ C(OP_WRITE) ] = {
- /* OFFCORE_RESPONSE.ANY_RFO.LOCAL_CACHE */
- [ C(RESULT_ACCESS) ] = 0x01b7,
- /* OFFCORE_RESPONSE.ANY_RFO.ANY_LLC_MISS */
- [ C(RESULT_MISS) ] = 0x01b7,
- },
- [ C(OP_PREFETCH) ] = {
- /* OFFCORE_RESPONSE.PREFETCH.LOCAL_CACHE */
- [ C(RESULT_ACCESS) ] = 0x01b7,
- /* OFFCORE_RESPONSE.PREFETCH.ANY_LLC_MISS */
- [ C(RESULT_MISS) ] = 0x01b7,
- },
- },
- [ C(DTLB) ] = {
- [ C(OP_READ) ] = {
- [ C(RESULT_ACCESS) ] = 0,
- [ C(RESULT_MISS) ] = 0x0804, /* LD_DTLB_MISS */
- },
- [ C(OP_WRITE) ] = {
- [ C(RESULT_ACCESS) ] = 0,
- [ C(RESULT_MISS) ] = 0,
- },
- [ C(OP_PREFETCH) ] = {
- [ C(RESULT_ACCESS) ] = 0,
- [ C(RESULT_MISS) ] = 0,
- },
- },
- [ C(ITLB) ] = {
- [ C(OP_READ) ] = {
- [ C(RESULT_ACCESS) ] = 0x00c0, /* INST_RETIRED.ANY_P */
- [ C(RESULT_MISS) ] = 0x40205, /* PAGE_WALKS.I_SIDE_WALKS */
- },
- [ C(OP_WRITE) ] = {
- [ C(RESULT_ACCESS) ] = -1,
- [ C(RESULT_MISS) ] = -1,
- },
- [ C(OP_PREFETCH) ] = {
- [ C(RESULT_ACCESS) ] = -1,
- [ C(RESULT_MISS) ] = -1,
- },
- },
- [ C(BPU ) ] = {
- [ C(OP_READ) ] = {
- [ C(RESULT_ACCESS) ] = 0x00c4, /* BR_INST_RETIRED.ANY */
- [ C(RESULT_MISS) ] = 0x00c5, /* BP_INST_RETIRED.MISPRED */
- },
- [ C(OP_WRITE) ] = {
- [ C(RESULT_ACCESS) ] = -1,
- [ C(RESULT_MISS) ] = -1,
- },
- [ C(OP_PREFETCH) ] = {
- [ C(RESULT_ACCESS) ] = -1,
- [ C(RESULT_MISS) ] = -1,
- },
- },
-};
-
-#define KNL_OT_L2_HITE BIT_ULL(19) /* Other Tile L2 Hit */
-#define KNL_OT_L2_HITF BIT_ULL(20) /* Other Tile L2 Hit */
-#define KNL_MCDRAM_LOCAL BIT_ULL(21)
-#define KNL_MCDRAM_FAR BIT_ULL(22)
-#define KNL_DDR_LOCAL BIT_ULL(23)
-#define KNL_DDR_FAR BIT_ULL(24)
-#define KNL_DRAM_ANY (KNL_MCDRAM_LOCAL | KNL_MCDRAM_FAR | \
- KNL_DDR_LOCAL | KNL_DDR_FAR)
-#define KNL_L2_READ SLM_DMND_READ
-#define KNL_L2_WRITE SLM_DMND_WRITE
-#define KNL_L2_PREFETCH SLM_DMND_PREFETCH
-#define KNL_L2_ACCESS SLM_LLC_ACCESS
-#define KNL_L2_MISS (KNL_OT_L2_HITE | KNL_OT_L2_HITF | \
- KNL_DRAM_ANY | SNB_SNP_ANY | \
- SNB_NON_DRAM)
-
-static __initconst const u64 knl_hw_cache_extra_regs
- [PERF_COUNT_HW_CACHE_MAX]
- [PERF_COUNT_HW_CACHE_OP_MAX]
- [PERF_COUNT_HW_CACHE_RESULT_MAX] = {
- [C(LL)] = {
- [C(OP_READ)] = {
- [C(RESULT_ACCESS)] = KNL_L2_READ | KNL_L2_ACCESS,
- [C(RESULT_MISS)] = 0,
- },
- [C(OP_WRITE)] = {
- [C(RESULT_ACCESS)] = KNL_L2_WRITE | KNL_L2_ACCESS,
- [C(RESULT_MISS)] = KNL_L2_WRITE | KNL_L2_MISS,
- },
- [C(OP_PREFETCH)] = {
- [C(RESULT_ACCESS)] = KNL_L2_PREFETCH | KNL_L2_ACCESS,
- [C(RESULT_MISS)] = KNL_L2_PREFETCH | KNL_L2_MISS,
- },
- },
-};
-
-/*
- * Used from PMIs where the LBRs are already disabled.
- *
- * This function could be called consecutively. It is required to remain in
- * disabled state if called consecutively.
- *
- * During consecutive calls, the same disable value will be written to related
- * registers, so the PMU state remains unchanged. hw.state in
- * intel_bts_disable_local will remain PERF_HES_STOPPED too in consecutive
- * calls.
- */
-static void __intel_pmu_disable_all(void)
-{
- struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
-
- wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0);
-
- if (test_bit(INTEL_PMC_IDX_FIXED_BTS, cpuc->active_mask))
- intel_pmu_disable_bts();
- else
- intel_bts_disable_local();
-
- intel_pmu_pebs_disable_all();
-}
-
-static void intel_pmu_disable_all(void)
-{
- __intel_pmu_disable_all();
- intel_pmu_lbr_disable_all();
-}
-
-static void __intel_pmu_enable_all(int added, bool pmi)
-{
- struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
-
- intel_pmu_pebs_enable_all();
- intel_pmu_lbr_enable_all(pmi);
- wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL,
- x86_pmu.intel_ctrl & ~cpuc->intel_ctrl_guest_mask);
-
- if (test_bit(INTEL_PMC_IDX_FIXED_BTS, cpuc->active_mask)) {
- struct perf_event *event =
- cpuc->events[INTEL_PMC_IDX_FIXED_BTS];
-
- if (WARN_ON_ONCE(!event))
- return;
-
- intel_pmu_enable_bts(event->hw.config);
- } else
- intel_bts_enable_local();
-}
-
-static void intel_pmu_enable_all(int added)
-{
- __intel_pmu_enable_all(added, false);
-}
-
-/*
- * Workaround for:
- * Intel Errata AAK100 (model 26)
- * Intel Errata AAP53 (model 30)
- * Intel Errata BD53 (model 44)
- *
- * The official story:
- * These chips need to be 'reset' when adding counters by programming the
- * magic three (non-counting) events 0x4300B5, 0x4300D2, and 0x4300B1 either
- * in sequence on the same PMC or on different PMCs.
- *
- * In practise it appears some of these events do in fact count, and
- * we need to programm all 4 events.
- */
-static void intel_pmu_nhm_workaround(void)
-{
- struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
- static const unsigned long nhm_magic[4] = {
- 0x4300B5,
- 0x4300D2,
- 0x4300B1,
- 0x4300B1
- };
- struct perf_event *event;
- int i;
-
- /*
- * The Errata requires below steps:
- * 1) Clear MSR_IA32_PEBS_ENABLE and MSR_CORE_PERF_GLOBAL_CTRL;
- * 2) Configure 4 PERFEVTSELx with the magic events and clear
- * the corresponding PMCx;
- * 3) set bit0~bit3 of MSR_CORE_PERF_GLOBAL_CTRL;
- * 4) Clear MSR_CORE_PERF_GLOBAL_CTRL;
- * 5) Clear 4 pairs of ERFEVTSELx and PMCx;
- */
-
- /*
- * The real steps we choose are a little different from above.
- * A) To reduce MSR operations, we don't run step 1) as they
- * are already cleared before this function is called;
- * B) Call x86_perf_event_update to save PMCx before configuring
- * PERFEVTSELx with magic number;
- * C) With step 5), we do clear only when the PERFEVTSELx is
- * not used currently.
- * D) Call x86_perf_event_set_period to restore PMCx;
- */
-
- /* We always operate 4 pairs of PERF Counters */
- for (i = 0; i < 4; i++) {
- event = cpuc->events[i];
- if (event)
- x86_perf_event_update(event);
- }
-
- for (i = 0; i < 4; i++) {
- wrmsrl(MSR_ARCH_PERFMON_EVENTSEL0 + i, nhm_magic[i]);
- wrmsrl(MSR_ARCH_PERFMON_PERFCTR0 + i, 0x0);
- }
-
- wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0xf);
- wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0x0);
-
- for (i = 0; i < 4; i++) {
- event = cpuc->events[i];
-
- if (event) {
- x86_perf_event_set_period(event);
- __x86_pmu_enable_event(&event->hw,
- ARCH_PERFMON_EVENTSEL_ENABLE);
- } else
- wrmsrl(MSR_ARCH_PERFMON_EVENTSEL0 + i, 0x0);
- }
-}
-
-static void intel_pmu_nhm_enable_all(int added)
-{
- if (added)
- intel_pmu_nhm_workaround();
- intel_pmu_enable_all(added);
-}
-
-static inline u64 intel_pmu_get_status(void)
-{
- u64 status;
-
- rdmsrl(MSR_CORE_PERF_GLOBAL_STATUS, status);
-
- return status;
-}
-
-static inline void intel_pmu_ack_status(u64 ack)
-{
- wrmsrl(MSR_CORE_PERF_GLOBAL_OVF_CTRL, ack);
-}
-
-static void intel_pmu_disable_fixed(struct hw_perf_event *hwc)
-{
- int idx = hwc->idx - INTEL_PMC_IDX_FIXED;
- u64 ctrl_val, mask;
-
- mask = 0xfULL << (idx * 4);
-
- rdmsrl(hwc->config_base, ctrl_val);
- ctrl_val &= ~mask;
- wrmsrl(hwc->config_base, ctrl_val);
-}
-
-static inline bool event_is_checkpointed(struct perf_event *event)
-{
- return (event->hw.config & HSW_IN_TX_CHECKPOINTED) != 0;
-}
-
-static void intel_pmu_disable_event(struct perf_event *event)
-{
- struct hw_perf_event *hwc = &event->hw;
- struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
-
- if (unlikely(hwc->idx == INTEL_PMC_IDX_FIXED_BTS)) {
- intel_pmu_disable_bts();
- intel_pmu_drain_bts_buffer();
- return;
- }
-
- cpuc->intel_ctrl_guest_mask &= ~(1ull << hwc->idx);
- cpuc->intel_ctrl_host_mask &= ~(1ull << hwc->idx);
- cpuc->intel_cp_status &= ~(1ull << hwc->idx);
-
- /*
- * must disable before any actual event
- * because any event may be combined with LBR
- */
- if (needs_branch_stack(event))
- intel_pmu_lbr_disable(event);
-
- if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL)) {
- intel_pmu_disable_fixed(hwc);
- return;
- }
-
- x86_pmu_disable_event(event);
-
- if (unlikely(event->attr.precise_ip))
- intel_pmu_pebs_disable(event);
-}
-
-static void intel_pmu_enable_fixed(struct hw_perf_event *hwc)
-{
- int idx = hwc->idx - INTEL_PMC_IDX_FIXED;
- u64 ctrl_val, bits, mask;
-
- /*
- * Enable IRQ generation (0x8),
- * and enable ring-3 counting (0x2) and ring-0 counting (0x1)
- * if requested:
- */
- bits = 0x8ULL;
- if (hwc->config & ARCH_PERFMON_EVENTSEL_USR)
- bits |= 0x2;
- if (hwc->config & ARCH_PERFMON_EVENTSEL_OS)
- bits |= 0x1;
-
- /*
- * ANY bit is supported in v3 and up
- */
- if (x86_pmu.version > 2 && hwc->config & ARCH_PERFMON_EVENTSEL_ANY)
- bits |= 0x4;
-
- bits <<= (idx * 4);
- mask = 0xfULL << (idx * 4);
-
- rdmsrl(hwc->config_base, ctrl_val);
- ctrl_val &= ~mask;
- ctrl_val |= bits;
- wrmsrl(hwc->config_base, ctrl_val);
-}
-
-static void intel_pmu_enable_event(struct perf_event *event)
-{
- struct hw_perf_event *hwc = &event->hw;
- struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
-
- if (unlikely(hwc->idx == INTEL_PMC_IDX_FIXED_BTS)) {
- if (!__this_cpu_read(cpu_hw_events.enabled))
- return;
-
- intel_pmu_enable_bts(hwc->config);
- return;
- }
- /*
- * must enabled before any actual event
- * because any event may be combined with LBR
- */
- if (needs_branch_stack(event))
- intel_pmu_lbr_enable(event);
-
- if (event->attr.exclude_host)
- cpuc->intel_ctrl_guest_mask |= (1ull << hwc->idx);
- if (event->attr.exclude_guest)
- cpuc->intel_ctrl_host_mask |= (1ull << hwc->idx);
-
- if (unlikely(event_is_checkpointed(event)))
- cpuc->intel_cp_status |= (1ull << hwc->idx);
-
- if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL)) {
- intel_pmu_enable_fixed(hwc);
- return;
- }
-
- if (unlikely(event->attr.precise_ip))
- intel_pmu_pebs_enable(event);
-
- __x86_pmu_enable_event(hwc, ARCH_PERFMON_EVENTSEL_ENABLE);
-}
-
-/*
- * Save and restart an expired event. Called by NMI contexts,
- * so it has to be careful about preempting normal event ops:
- */
-int intel_pmu_save_and_restart(struct perf_event *event)
-{
- x86_perf_event_update(event);
- /*
- * For a checkpointed counter always reset back to 0. This
- * avoids a situation where the counter overflows, aborts the
- * transaction and is then set back to shortly before the
- * overflow, and overflows and aborts again.
- */
- if (unlikely(event_is_checkpointed(event))) {
- /* No race with NMIs because the counter should not be armed */
- wrmsrl(event->hw.event_base, 0);
- local64_set(&event->hw.prev_count, 0);
- }
- return x86_perf_event_set_period(event);
-}
-
-static void intel_pmu_reset(void)
-{
- struct debug_store *ds = __this_cpu_read(cpu_hw_events.ds);
- unsigned long flags;
- int idx;
-
- if (!x86_pmu.num_counters)
- return;
-
- local_irq_save(flags);
-
- pr_info("clearing PMU state on CPU#%d\n", smp_processor_id());
-
- for (idx = 0; idx < x86_pmu.num_counters; idx++) {
- wrmsrl_safe(x86_pmu_config_addr(idx), 0ull);
- wrmsrl_safe(x86_pmu_event_addr(idx), 0ull);
- }
- for (idx = 0; idx < x86_pmu.num_counters_fixed; idx++)
- wrmsrl_safe(MSR_ARCH_PERFMON_FIXED_CTR0 + idx, 0ull);
-
- if (ds)
- ds->bts_index = ds->bts_buffer_base;
-
- /* Ack all overflows and disable fixed counters */
- if (x86_pmu.version >= 2) {
- intel_pmu_ack_status(intel_pmu_get_status());
- wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0);
- }
-
- /* Reset LBRs and LBR freezing */
- if (x86_pmu.lbr_nr) {
- update_debugctlmsr(get_debugctlmsr() &
- ~(DEBUGCTLMSR_FREEZE_LBRS_ON_PMI|DEBUGCTLMSR_LBR));
- }
-
- local_irq_restore(flags);
-}
-
-/*
- * This handler is triggered by the local APIC, so the APIC IRQ handling
- * rules apply:
- */
-static int intel_pmu_handle_irq(struct pt_regs *regs)
-{
- struct perf_sample_data data;
- struct cpu_hw_events *cpuc;
- int bit, loops;
- u64 status;
- int handled;
-
- cpuc = this_cpu_ptr(&cpu_hw_events);
-
- /*
- * No known reason to not always do late ACK,
- * but just in case do it opt-in.
- */
- if (!x86_pmu.late_ack)
- apic_write(APIC_LVTPC, APIC_DM_NMI);
- __intel_pmu_disable_all();
- handled = intel_pmu_drain_bts_buffer();
- handled += intel_bts_interrupt();
- status = intel_pmu_get_status();
- if (!status)
- goto done;
-
- loops = 0;
-again:
- intel_pmu_lbr_read();
- intel_pmu_ack_status(status);
- if (++loops > 100) {
- static bool warned = false;
- if (!warned) {
- WARN(1, "perfevents: irq loop stuck!\n");
- perf_event_print_debug();
- warned = true;
- }
- intel_pmu_reset();
- goto done;
- }
-
- inc_irq_stat(apic_perf_irqs);
-
-
- /*
- * Ignore a range of extra bits in status that do not indicate
- * overflow by themselves.
- */
- status &= ~(GLOBAL_STATUS_COND_CHG |
- GLOBAL_STATUS_ASIF |
- GLOBAL_STATUS_LBRS_FROZEN);
- if (!status)
- goto done;
-
- /*
- * PEBS overflow sets bit 62 in the global status register
- */
- if (__test_and_clear_bit(62, (unsigned long *)&status)) {
- handled++;
- x86_pmu.drain_pebs(regs);
- /*
- * There are cases where, even though, the PEBS ovfl bit is set
- * in GLOBAL_OVF_STATUS, the PEBS events may also have their
- * overflow bits set for their counters. We must clear them
- * here because they have been processed as exact samples in
- * the drain_pebs() routine. They must not be processed again
- * in the for_each_bit_set() loop for regular samples below.
- */
- status &= ~cpuc->pebs_enabled;
- status &= x86_pmu.intel_ctrl | GLOBAL_STATUS_TRACE_TOPAPMI;
- }
-
- /*
- * Intel PT
- */
- if (__test_and_clear_bit(55, (unsigned long *)&status)) {
- handled++;
- intel_pt_interrupt();
- }
-
- /*
- * Checkpointed counters can lead to 'spurious' PMIs because the
- * rollback caused by the PMI will have cleared the overflow status
- * bit. Therefore always force probe these counters.
- */
- status |= cpuc->intel_cp_status;
-
- for_each_set_bit(bit, (unsigned long *)&status, X86_PMC_IDX_MAX) {
- struct perf_event *event = cpuc->events[bit];
-
- handled++;
-
- if (!test_bit(bit, cpuc->active_mask))
- continue;
-
- if (!intel_pmu_save_and_restart(event))
- continue;
-
- perf_sample_data_init(&data, 0, event->hw.last_period);
-
- if (has_branch_stack(event))
- data.br_stack = &cpuc->lbr_stack;
-
- if (perf_event_overflow(event, &data, regs))
- x86_pmu_stop(event, 0);
- }
-
- /*
- * Repeat if there is more work to be done:
- */
- status = intel_pmu_get_status();
- if (status)
- goto again;
-
-done:
- /* Only restore PMU state when it's active. See x86_pmu_disable(). */
- if (cpuc->enabled)
- __intel_pmu_enable_all(0, true);
-
- /*
- * Only unmask the NMI after the overflow counters
- * have been reset. This avoids spurious NMIs on
- * Haswell CPUs.
- */
- if (x86_pmu.late_ack)
- apic_write(APIC_LVTPC, APIC_DM_NMI);
- return handled;
-}
-
-static struct event_constraint *
-intel_bts_constraints(struct perf_event *event)
-{
- struct hw_perf_event *hwc = &event->hw;
- unsigned int hw_event, bts_event;
-
- if (event->attr.freq)
- return NULL;
-
- hw_event = hwc->config & INTEL_ARCH_EVENT_MASK;
- bts_event = x86_pmu.event_map(PERF_COUNT_HW_BRANCH_INSTRUCTIONS);
-
- if (unlikely(hw_event == bts_event && hwc->sample_period == 1))
- return &bts_constraint;
-
- return NULL;
-}
-
-static int intel_alt_er(int idx, u64 config)
-{
- int alt_idx = idx;
-
- if (!(x86_pmu.flags & PMU_FL_HAS_RSP_1))
- return idx;
-
- if (idx == EXTRA_REG_RSP_0)
- alt_idx = EXTRA_REG_RSP_1;
-
- if (idx == EXTRA_REG_RSP_1)
- alt_idx = EXTRA_REG_RSP_0;
-
- if (config & ~x86_pmu.extra_regs[alt_idx].valid_mask)
- return idx;
-
- return alt_idx;
-}
-
-static void intel_fixup_er(struct perf_event *event, int idx)
-{
- event->hw.extra_reg.idx = idx;
-
- if (idx == EXTRA_REG_RSP_0) {
- event->hw.config &= ~INTEL_ARCH_EVENT_MASK;
- event->hw.config |= x86_pmu.extra_regs[EXTRA_REG_RSP_0].event;
- event->hw.extra_reg.reg = MSR_OFFCORE_RSP_0;
- } else if (idx == EXTRA_REG_RSP_1) {
- event->hw.config &= ~INTEL_ARCH_EVENT_MASK;
- event->hw.config |= x86_pmu.extra_regs[EXTRA_REG_RSP_1].event;
- event->hw.extra_reg.reg = MSR_OFFCORE_RSP_1;
- }
-}
-
-/*
- * manage allocation of shared extra msr for certain events
- *
- * sharing can be:
- * per-cpu: to be shared between the various events on a single PMU
- * per-core: per-cpu + shared by HT threads
- */
-static struct event_constraint *
-__intel_shared_reg_get_constraints(struct cpu_hw_events *cpuc,
- struct perf_event *event,
- struct hw_perf_event_extra *reg)
-{
- struct event_constraint *c = &emptyconstraint;
- struct er_account *era;
- unsigned long flags;
- int idx = reg->idx;
-
- /*
- * reg->alloc can be set due to existing state, so for fake cpuc we
- * need to ignore this, otherwise we might fail to allocate proper fake
- * state for this extra reg constraint. Also see the comment below.
- */
- if (reg->alloc && !cpuc->is_fake)
- return NULL; /* call x86_get_event_constraint() */
-
-again:
- era = &cpuc->shared_regs->regs[idx];
- /*
- * we use spin_lock_irqsave() to avoid lockdep issues when
- * passing a fake cpuc
- */
- raw_spin_lock_irqsave(&era->lock, flags);
-
- if (!atomic_read(&era->ref) || era->config == reg->config) {
-
- /*
- * If its a fake cpuc -- as per validate_{group,event}() we
- * shouldn't touch event state and we can avoid doing so
- * since both will only call get_event_constraints() once
- * on each event, this avoids the need for reg->alloc.
- *
- * Not doing the ER fixup will only result in era->reg being
- * wrong, but since we won't actually try and program hardware
- * this isn't a problem either.
- */
- if (!cpuc->is_fake) {
- if (idx != reg->idx)
- intel_fixup_er(event, idx);
-
- /*
- * x86_schedule_events() can call get_event_constraints()
- * multiple times on events in the case of incremental
- * scheduling(). reg->alloc ensures we only do the ER
- * allocation once.
- */
- reg->alloc = 1;
- }
-
- /* lock in msr value */
- era->config = reg->config;
- era->reg = reg->reg;
-
- /* one more user */
- atomic_inc(&era->ref);
-
- /*
- * need to call x86_get_event_constraint()
- * to check if associated event has constraints
- */
- c = NULL;
- } else {
- idx = intel_alt_er(idx, reg->config);
- if (idx != reg->idx) {
- raw_spin_unlock_irqrestore(&era->lock, flags);
- goto again;
- }
- }
- raw_spin_unlock_irqrestore(&era->lock, flags);
-
- return c;
-}
-
-static void
-__intel_shared_reg_put_constraints(struct cpu_hw_events *cpuc,
- struct hw_perf_event_extra *reg)
-{
- struct er_account *era;
-
- /*
- * Only put constraint if extra reg was actually allocated. Also takes
- * care of event which do not use an extra shared reg.
- *
- * Also, if this is a fake cpuc we shouldn't touch any event state
- * (reg->alloc) and we don't care about leaving inconsistent cpuc state
- * either since it'll be thrown out.
- */
- if (!reg->alloc || cpuc->is_fake)
- return;
-
- era = &cpuc->shared_regs->regs[reg->idx];
-
- /* one fewer user */
- atomic_dec(&era->ref);
-
- /* allocate again next time */
- reg->alloc = 0;
-}
-
-static struct event_constraint *
-intel_shared_regs_constraints(struct cpu_hw_events *cpuc,
- struct perf_event *event)
-{
- struct event_constraint *c = NULL, *d;
- struct hw_perf_event_extra *xreg, *breg;
-
- xreg = &event->hw.extra_reg;
- if (xreg->idx != EXTRA_REG_NONE) {
- c = __intel_shared_reg_get_constraints(cpuc, event, xreg);
- if (c == &emptyconstraint)
- return c;
- }
- breg = &event->hw.branch_reg;
- if (breg->idx != EXTRA_REG_NONE) {
- d = __intel_shared_reg_get_constraints(cpuc, event, breg);
- if (d == &emptyconstraint) {
- __intel_shared_reg_put_constraints(cpuc, xreg);
- c = d;
- }
- }
- return c;
-}
-
-struct event_constraint *
-x86_get_event_constraints(struct cpu_hw_events *cpuc, int idx,
- struct perf_event *event)
-{
- struct event_constraint *c;
-
- if (x86_pmu.event_constraints) {
- for_each_event_constraint(c, x86_pmu.event_constraints) {
- if ((event->hw.config & c->cmask) == c->code) {
- event->hw.flags |= c->flags;
- return c;
- }
- }
- }
-
- return &unconstrained;
-}
-
-static struct event_constraint *
-__intel_get_event_constraints(struct cpu_hw_events *cpuc, int idx,
- struct perf_event *event)
-{
- struct event_constraint *c;
-
- c = intel_bts_constraints(event);
- if (c)
- return c;
-
- c = intel_shared_regs_constraints(cpuc, event);
- if (c)
- return c;
-
- c = intel_pebs_constraints(event);
- if (c)
- return c;
-
- return x86_get_event_constraints(cpuc, idx, event);
-}
-
-static void
-intel_start_scheduling(struct cpu_hw_events *cpuc)
-{
- struct intel_excl_cntrs *excl_cntrs = cpuc->excl_cntrs;
- struct intel_excl_states *xl;
- int tid = cpuc->excl_thread_id;
-
- /*
- * nothing needed if in group validation mode
- */
- if (cpuc->is_fake || !is_ht_workaround_enabled())
- return;
-
- /*
- * no exclusion needed
- */
- if (WARN_ON_ONCE(!excl_cntrs))
- return;
-
- xl = &excl_cntrs->states[tid];
-
- xl->sched_started = true;
- /*
- * lock shared state until we are done scheduling
- * in stop_event_scheduling()
- * makes scheduling appear as a transaction
- */
- raw_spin_lock(&excl_cntrs->lock);
-}
-
-static void intel_commit_scheduling(struct cpu_hw_events *cpuc, int idx, int cntr)
-{
- struct intel_excl_cntrs *excl_cntrs = cpuc->excl_cntrs;
- struct event_constraint *c = cpuc->event_constraint[idx];
- struct intel_excl_states *xl;
- int tid = cpuc->excl_thread_id;
-
- if (cpuc->is_fake || !is_ht_workaround_enabled())
- return;
-
- if (WARN_ON_ONCE(!excl_cntrs))
- return;
-
- if (!(c->flags & PERF_X86_EVENT_DYNAMIC))
- return;
-
- xl = &excl_cntrs->states[tid];
-
- lockdep_assert_held(&excl_cntrs->lock);
-
- if (c->flags & PERF_X86_EVENT_EXCL)
- xl->state[cntr] = INTEL_EXCL_EXCLUSIVE;
- else
- xl->state[cntr] = INTEL_EXCL_SHARED;
-}
-
-static void
-intel_stop_scheduling(struct cpu_hw_events *cpuc)
-{
- struct intel_excl_cntrs *excl_cntrs = cpuc->excl_cntrs;
- struct intel_excl_states *xl;
- int tid = cpuc->excl_thread_id;
-
- /*
- * nothing needed if in group validation mode
- */
- if (cpuc->is_fake || !is_ht_workaround_enabled())
- return;
- /*
- * no exclusion needed
- */
- if (WARN_ON_ONCE(!excl_cntrs))
- return;
-
- xl = &excl_cntrs->states[tid];
-
- xl->sched_started = false;
- /*
- * release shared state lock (acquired in intel_start_scheduling())
- */
- raw_spin_unlock(&excl_cntrs->lock);
-}
-
-static struct event_constraint *
-intel_get_excl_constraints(struct cpu_hw_events *cpuc, struct perf_event *event,
- int idx, struct event_constraint *c)
-{
- struct intel_excl_cntrs *excl_cntrs = cpuc->excl_cntrs;
- struct intel_excl_states *xlo;
- int tid = cpuc->excl_thread_id;
- int is_excl, i;
-
- /*
- * validating a group does not require
- * enforcing cross-thread exclusion
- */
- if (cpuc->is_fake || !is_ht_workaround_enabled())
- return c;
-
- /*
- * no exclusion needed
- */
- if (WARN_ON_ONCE(!excl_cntrs))
- return c;
-
- /*
- * because we modify the constraint, we need
- * to make a copy. Static constraints come
- * from static const tables.
- *
- * only needed when constraint has not yet
- * been cloned (marked dynamic)
- */
- if (!(c->flags & PERF_X86_EVENT_DYNAMIC)) {
- struct event_constraint *cx;
-
- /*
- * grab pre-allocated constraint entry
- */
- cx = &cpuc->constraint_list[idx];
-
- /*
- * initialize dynamic constraint
- * with static constraint
- */
- *cx = *c;
-
- /*
- * mark constraint as dynamic, so we
- * can free it later on
- */
- cx->flags |= PERF_X86_EVENT_DYNAMIC;
- c = cx;
- }
-
- /*
- * From here on, the constraint is dynamic.
- * Either it was just allocated above, or it
- * was allocated during a earlier invocation
- * of this function
- */
-
- /*
- * state of sibling HT
- */
- xlo = &excl_cntrs->states[tid ^ 1];
-
- /*
- * event requires exclusive counter access
- * across HT threads
- */
- is_excl = c->flags & PERF_X86_EVENT_EXCL;
- if (is_excl && !(event->hw.flags & PERF_X86_EVENT_EXCL_ACCT)) {
- event->hw.flags |= PERF_X86_EVENT_EXCL_ACCT;
- if (!cpuc->n_excl++)
- WRITE_ONCE(excl_cntrs->has_exclusive[tid], 1);
- }
-
- /*
- * Modify static constraint with current dynamic
- * state of thread
- *
- * EXCLUSIVE: sibling counter measuring exclusive event
- * SHARED : sibling counter measuring non-exclusive event
- * UNUSED : sibling counter unused
- */
- for_each_set_bit(i, c->idxmsk, X86_PMC_IDX_MAX) {
- /*
- * exclusive event in sibling counter
- * our corresponding counter cannot be used
- * regardless of our event
- */
- if (xlo->state[i] == INTEL_EXCL_EXCLUSIVE)
- __clear_bit(i, c->idxmsk);
- /*
- * if measuring an exclusive event, sibling
- * measuring non-exclusive, then counter cannot
- * be used
- */
- if (is_excl && xlo->state[i] == INTEL_EXCL_SHARED)
- __clear_bit(i, c->idxmsk);
- }
-
- /*
- * recompute actual bit weight for scheduling algorithm
- */
- c->weight = hweight64(c->idxmsk64);
-
- /*
- * if we return an empty mask, then switch
- * back to static empty constraint to avoid
- * the cost of freeing later on
- */
- if (c->weight == 0)
- c = &emptyconstraint;
-
- return c;
-}
-
-static struct event_constraint *
-intel_get_event_constraints(struct cpu_hw_events *cpuc, int idx,
- struct perf_event *event)
-{
- struct event_constraint *c1 = NULL;
- struct event_constraint *c2;
-
- if (idx >= 0) /* fake does < 0 */
- c1 = cpuc->event_constraint[idx];
-
- /*
- * first time only
- * - static constraint: no change across incremental scheduling calls
- * - dynamic constraint: handled by intel_get_excl_constraints()
- */
- c2 = __intel_get_event_constraints(cpuc, idx, event);
- if (c1 && (c1->flags & PERF_X86_EVENT_DYNAMIC)) {
- bitmap_copy(c1->idxmsk, c2->idxmsk, X86_PMC_IDX_MAX);
- c1->weight = c2->weight;
- c2 = c1;
- }
-
- if (cpuc->excl_cntrs)
- return intel_get_excl_constraints(cpuc, event, idx, c2);
-
- return c2;
-}
-
-static void intel_put_excl_constraints(struct cpu_hw_events *cpuc,
- struct perf_event *event)
-{
- struct hw_perf_event *hwc = &event->hw;
- struct intel_excl_cntrs *excl_cntrs = cpuc->excl_cntrs;
- int tid = cpuc->excl_thread_id;
- struct intel_excl_states *xl;
-
- /*
- * nothing needed if in group validation mode
- */
- if (cpuc->is_fake)
- return;
-
- if (WARN_ON_ONCE(!excl_cntrs))
- return;
-
- if (hwc->flags & PERF_X86_EVENT_EXCL_ACCT) {
- hwc->flags &= ~PERF_X86_EVENT_EXCL_ACCT;
- if (!--cpuc->n_excl)
- WRITE_ONCE(excl_cntrs->has_exclusive[tid], 0);
- }
-
- /*
- * If event was actually assigned, then mark the counter state as
- * unused now.
- */
- if (hwc->idx >= 0) {
- xl = &excl_cntrs->states[tid];
-
- /*
- * put_constraint may be called from x86_schedule_events()
- * which already has the lock held so here make locking
- * conditional.
- */
- if (!xl->sched_started)
- raw_spin_lock(&excl_cntrs->lock);
-
- xl->state[hwc->idx] = INTEL_EXCL_UNUSED;
-
- if (!xl->sched_started)
- raw_spin_unlock(&excl_cntrs->lock);
- }
-}
-
-static void
-intel_put_shared_regs_event_constraints(struct cpu_hw_events *cpuc,
- struct perf_event *event)
-{
- struct hw_perf_event_extra *reg;
-
- reg = &event->hw.extra_reg;
- if (reg->idx != EXTRA_REG_NONE)
- __intel_shared_reg_put_constraints(cpuc, reg);
-
- reg = &event->hw.branch_reg;
- if (reg->idx != EXTRA_REG_NONE)
- __intel_shared_reg_put_constraints(cpuc, reg);
-}
-
-static void intel_put_event_constraints(struct cpu_hw_events *cpuc,
- struct perf_event *event)
-{
- intel_put_shared_regs_event_constraints(cpuc, event);
-
- /*
- * is PMU has exclusive counter restrictions, then
- * all events are subject to and must call the
- * put_excl_constraints() routine
- */
- if (cpuc->excl_cntrs)
- intel_put_excl_constraints(cpuc, event);
-}
-
-static void intel_pebs_aliases_core2(struct perf_event *event)
-{
- if ((event->hw.config & X86_RAW_EVENT_MASK) == 0x003c) {
- /*
- * Use an alternative encoding for CPU_CLK_UNHALTED.THREAD_P
- * (0x003c) so that we can use it with PEBS.
- *
- * The regular CPU_CLK_UNHALTED.THREAD_P event (0x003c) isn't
- * PEBS capable. However we can use INST_RETIRED.ANY_P
- * (0x00c0), which is a PEBS capable event, to get the same
- * count.
- *
- * INST_RETIRED.ANY_P counts the number of cycles that retires
- * CNTMASK instructions. By setting CNTMASK to a value (16)
- * larger than the maximum number of instructions that can be
- * retired per cycle (4) and then inverting the condition, we
- * count all cycles that retire 16 or less instructions, which
- * is every cycle.
- *
- * Thereby we gain a PEBS capable cycle counter.
- */
- u64 alt_config = X86_CONFIG(.event=0xc0, .inv=1, .cmask=16);
-
- alt_config |= (event->hw.config & ~X86_RAW_EVENT_MASK);
- event->hw.config = alt_config;
- }
-}
-
-static void intel_pebs_aliases_snb(struct perf_event *event)
-{
- if ((event->hw.config & X86_RAW_EVENT_MASK) == 0x003c) {
- /*
- * Use an alternative encoding for CPU_CLK_UNHALTED.THREAD_P
- * (0x003c) so that we can use it with PEBS.
- *
- * The regular CPU_CLK_UNHALTED.THREAD_P event (0x003c) isn't
- * PEBS capable. However we can use UOPS_RETIRED.ALL
- * (0x01c2), which is a PEBS capable event, to get the same
- * count.
- *
- * UOPS_RETIRED.ALL counts the number of cycles that retires
- * CNTMASK micro-ops. By setting CNTMASK to a value (16)
- * larger than the maximum number of micro-ops that can be
- * retired per cycle (4) and then inverting the condition, we
- * count all cycles that retire 16 or less micro-ops, which
- * is every cycle.
- *
- * Thereby we gain a PEBS capable cycle counter.
- */
- u64 alt_config = X86_CONFIG(.event=0xc2, .umask=0x01, .inv=1, .cmask=16);
-
- alt_config |= (event->hw.config & ~X86_RAW_EVENT_MASK);
- event->hw.config = alt_config;
- }
-}
-
-static void intel_pebs_aliases_precdist(struct perf_event *event)
-{
- if ((event->hw.config & X86_RAW_EVENT_MASK) == 0x003c) {
- /*
- * Use an alternative encoding for CPU_CLK_UNHALTED.THREAD_P
- * (0x003c) so that we can use it with PEBS.
- *
- * The regular CPU_CLK_UNHALTED.THREAD_P event (0x003c) isn't
- * PEBS capable. However we can use INST_RETIRED.PREC_DIST
- * (0x01c0), which is a PEBS capable event, to get the same
- * count.
- *
- * The PREC_DIST event has special support to minimize sample
- * shadowing effects. One drawback is that it can be
- * only programmed on counter 1, but that seems like an
- * acceptable trade off.
- */
- u64 alt_config = X86_CONFIG(.event=0xc0, .umask=0x01, .inv=1, .cmask=16);
-
- alt_config |= (event->hw.config & ~X86_RAW_EVENT_MASK);
- event->hw.config = alt_config;
- }
-}
-
-static void intel_pebs_aliases_ivb(struct perf_event *event)
-{
- if (event->attr.precise_ip < 3)
- return intel_pebs_aliases_snb(event);
- return intel_pebs_aliases_precdist(event);
-}
-
-static void intel_pebs_aliases_skl(struct perf_event *event)
-{
- if (event->attr.precise_ip < 3)
- return intel_pebs_aliases_core2(event);
- return intel_pebs_aliases_precdist(event);
-}
-
-static unsigned long intel_pmu_free_running_flags(struct perf_event *event)
-{
- unsigned long flags = x86_pmu.free_running_flags;
-
- if (event->attr.use_clockid)
- flags &= ~PERF_SAMPLE_TIME;
- return flags;
-}
-
-static int intel_pmu_hw_config(struct perf_event *event)
-{
- int ret = x86_pmu_hw_config(event);
-
- if (ret)
- return ret;
-
- if (event->attr.precise_ip) {
- if (!event->attr.freq) {
- event->hw.flags |= PERF_X86_EVENT_AUTO_RELOAD;
- if (!(event->attr.sample_type &
- ~intel_pmu_free_running_flags(event)))
- event->hw.flags |= PERF_X86_EVENT_FREERUNNING;
- }
- if (x86_pmu.pebs_aliases)
- x86_pmu.pebs_aliases(event);
- }
-
- if (needs_branch_stack(event)) {
- ret = intel_pmu_setup_lbr_filter(event);
- if (ret)
- return ret;
-
- /*
- * BTS is set up earlier in this path, so don't account twice
- */
- if (!intel_pmu_has_bts(event)) {
- /* disallow lbr if conflicting events are present */
- if (x86_add_exclusive(x86_lbr_exclusive_lbr))
- return -EBUSY;
-
- event->destroy = hw_perf_lbr_event_destroy;
- }
- }
-
- if (event->attr.type != PERF_TYPE_RAW)
- return 0;
-
- if (!(event->attr.config & ARCH_PERFMON_EVENTSEL_ANY))
- return 0;
-
- if (x86_pmu.version < 3)
- return -EINVAL;
-
- if (perf_paranoid_cpu() && !capable(CAP_SYS_ADMIN))
- return -EACCES;
-
- event->hw.config |= ARCH_PERFMON_EVENTSEL_ANY;
-
- return 0;
-}
-
-struct perf_guest_switch_msr *perf_guest_get_msrs(int *nr)
-{
- if (x86_pmu.guest_get_msrs)
- return x86_pmu.guest_get_msrs(nr);
- *nr = 0;
- return NULL;
-}
-EXPORT_SYMBOL_GPL(perf_guest_get_msrs);
-
-static struct perf_guest_switch_msr *intel_guest_get_msrs(int *nr)
-{
- struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
- struct perf_guest_switch_msr *arr = cpuc->guest_switch_msrs;
-
- arr[0].msr = MSR_CORE_PERF_GLOBAL_CTRL;
- arr[0].host = x86_pmu.intel_ctrl & ~cpuc->intel_ctrl_guest_mask;
- arr[0].guest = x86_pmu.intel_ctrl & ~cpuc->intel_ctrl_host_mask;
- /*
- * If PMU counter has PEBS enabled it is not enough to disable counter
- * on a guest entry since PEBS memory write can overshoot guest entry
- * and corrupt guest memory. Disabling PEBS solves the problem.
- */
- arr[1].msr = MSR_IA32_PEBS_ENABLE;
- arr[1].host = cpuc->pebs_enabled;
- arr[1].guest = 0;
-
- *nr = 2;
- return arr;
-}
-
-static struct perf_guest_switch_msr *core_guest_get_msrs(int *nr)
-{
- struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
- struct perf_guest_switch_msr *arr = cpuc->guest_switch_msrs;
- int idx;
-
- for (idx = 0; idx < x86_pmu.num_counters; idx++) {
- struct perf_event *event = cpuc->events[idx];
-
- arr[idx].msr = x86_pmu_config_addr(idx);
- arr[idx].host = arr[idx].guest = 0;
-
- if (!test_bit(idx, cpuc->active_mask))
- continue;
-
- arr[idx].host = arr[idx].guest =
- event->hw.config | ARCH_PERFMON_EVENTSEL_ENABLE;
-
- if (event->attr.exclude_host)
- arr[idx].host &= ~ARCH_PERFMON_EVENTSEL_ENABLE;
- else if (event->attr.exclude_guest)
- arr[idx].guest &= ~ARCH_PERFMON_EVENTSEL_ENABLE;
- }
-
- *nr = x86_pmu.num_counters;
- return arr;
-}
-
-static void core_pmu_enable_event(struct perf_event *event)
-{
- if (!event->attr.exclude_host)
- x86_pmu_enable_event(event);
-}
-
-static void core_pmu_enable_all(int added)
-{
- struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
- int idx;
-
- for (idx = 0; idx < x86_pmu.num_counters; idx++) {
- struct hw_perf_event *hwc = &cpuc->events[idx]->hw;
-
- if (!test_bit(idx, cpuc->active_mask) ||
- cpuc->events[idx]->attr.exclude_host)
- continue;
-
- __x86_pmu_enable_event(hwc, ARCH_PERFMON_EVENTSEL_ENABLE);
- }
-}
-
-static int hsw_hw_config(struct perf_event *event)
-{
- int ret = intel_pmu_hw_config(event);
-
- if (ret)
- return ret;
- if (!boot_cpu_has(X86_FEATURE_RTM) && !boot_cpu_has(X86_FEATURE_HLE))
- return 0;
- event->hw.config |= event->attr.config & (HSW_IN_TX|HSW_IN_TX_CHECKPOINTED);
-
- /*
- * IN_TX/IN_TX-CP filters are not supported by the Haswell PMU with
- * PEBS or in ANY thread mode. Since the results are non-sensical forbid
- * this combination.
- */
- if ((event->hw.config & (HSW_IN_TX|HSW_IN_TX_CHECKPOINTED)) &&
- ((event->hw.config & ARCH_PERFMON_EVENTSEL_ANY) ||
- event->attr.precise_ip > 0))
- return -EOPNOTSUPP;
-
- if (event_is_checkpointed(event)) {
- /*
- * Sampling of checkpointed events can cause situations where
- * the CPU constantly aborts because of a overflow, which is
- * then checkpointed back and ignored. Forbid checkpointing
- * for sampling.
- *
- * But still allow a long sampling period, so that perf stat
- * from KVM works.
- */
- if (event->attr.sample_period > 0 &&
- event->attr.sample_period < 0x7fffffff)
- return -EOPNOTSUPP;
- }
- return 0;
-}
-
-static struct event_constraint counter2_constraint =
- EVENT_CONSTRAINT(0, 0x4, 0);
-
-static struct event_constraint *
-hsw_get_event_constraints(struct cpu_hw_events *cpuc, int idx,
- struct perf_event *event)
-{
- struct event_constraint *c;
-
- c = intel_get_event_constraints(cpuc, idx, event);
-
- /* Handle special quirk on in_tx_checkpointed only in counter 2 */
- if (event->hw.config & HSW_IN_TX_CHECKPOINTED) {
- if (c->idxmsk64 & (1U << 2))
- return &counter2_constraint;
- return &emptyconstraint;
- }
-
- return c;
-}
-
-/*
- * Broadwell:
- *
- * The INST_RETIRED.ALL period always needs to have lowest 6 bits cleared
- * (BDM55) and it must not use a period smaller than 100 (BDM11). We combine
- * the two to enforce a minimum period of 128 (the smallest value that has bits
- * 0-5 cleared and >= 100).
- *
- * Because of how the code in x86_perf_event_set_period() works, the truncation
- * of the lower 6 bits is 'harmless' as we'll occasionally add a longer period
- * to make up for the 'lost' events due to carrying the 'error' in period_left.
- *
- * Therefore the effective (average) period matches the requested period,
- * despite coarser hardware granularity.
- */
-static unsigned bdw_limit_period(struct perf_event *event, unsigned left)
-{
- if ((event->hw.config & INTEL_ARCH_EVENT_MASK) ==
- X86_CONFIG(.event=0xc0, .umask=0x01)) {
- if (left < 128)
- left = 128;
- left &= ~0x3fu;
- }
- return left;
-}
-
-PMU_FORMAT_ATTR(event, "config:0-7" );
-PMU_FORMAT_ATTR(umask, "config:8-15" );
-PMU_FORMAT_ATTR(edge, "config:18" );
-PMU_FORMAT_ATTR(pc, "config:19" );
-PMU_FORMAT_ATTR(any, "config:21" ); /* v3 + */
-PMU_FORMAT_ATTR(inv, "config:23" );
-PMU_FORMAT_ATTR(cmask, "config:24-31" );
-PMU_FORMAT_ATTR(in_tx, "config:32");
-PMU_FORMAT_ATTR(in_tx_cp, "config:33");
-
-static struct attribute *intel_arch_formats_attr[] = {
- &format_attr_event.attr,
- &format_attr_umask.attr,
- &format_attr_edge.attr,
- &format_attr_pc.attr,
- &format_attr_inv.attr,
- &format_attr_cmask.attr,
- NULL,
-};
-
-ssize_t intel_event_sysfs_show(char *page, u64 config)
-{
- u64 event = (config & ARCH_PERFMON_EVENTSEL_EVENT);
-
- return x86_event_sysfs_show(page, config, event);
-}
-
-struct intel_shared_regs *allocate_shared_regs(int cpu)
-{
- struct intel_shared_regs *regs;
- int i;
-
- regs = kzalloc_node(sizeof(struct intel_shared_regs),
- GFP_KERNEL, cpu_to_node(cpu));
- if (regs) {
- /*
- * initialize the locks to keep lockdep happy
- */
- for (i = 0; i < EXTRA_REG_MAX; i++)
- raw_spin_lock_init(&regs->regs[i].lock);
-
- regs->core_id = -1;
- }
- return regs;
-}
-
-static struct intel_excl_cntrs *allocate_excl_cntrs(int cpu)
-{
- struct intel_excl_cntrs *c;
-
- c = kzalloc_node(sizeof(struct intel_excl_cntrs),
- GFP_KERNEL, cpu_to_node(cpu));
- if (c) {
- raw_spin_lock_init(&c->lock);
- c->core_id = -1;
- }
- return c;
-}
-
-static int intel_pmu_cpu_prepare(int cpu)
-{
- struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu);
-
- if (x86_pmu.extra_regs || x86_pmu.lbr_sel_map) {
- cpuc->shared_regs = allocate_shared_regs(cpu);
- if (!cpuc->shared_regs)
- goto err;
- }
-
- if (x86_pmu.flags & PMU_FL_EXCL_CNTRS) {
- size_t sz = X86_PMC_IDX_MAX * sizeof(struct event_constraint);
-
- cpuc->constraint_list = kzalloc(sz, GFP_KERNEL);
- if (!cpuc->constraint_list)
- goto err_shared_regs;
-
- cpuc->excl_cntrs = allocate_excl_cntrs(cpu);
- if (!cpuc->excl_cntrs)
- goto err_constraint_list;
-
- cpuc->excl_thread_id = 0;
- }
-
- return NOTIFY_OK;
-
-err_constraint_list:
- kfree(cpuc->constraint_list);
- cpuc->constraint_list = NULL;
-
-err_shared_regs:
- kfree(cpuc->shared_regs);
- cpuc->shared_regs = NULL;
-
-err:
- return NOTIFY_BAD;
-}
-
-static void intel_pmu_cpu_starting(int cpu)
-{
- struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu);
- int core_id = topology_core_id(cpu);
- int i;
-
- init_debug_store_on_cpu(cpu);
- /*
- * Deal with CPUs that don't clear their LBRs on power-up.
- */
- intel_pmu_lbr_reset();
-
- cpuc->lbr_sel = NULL;
-
- if (!cpuc->shared_regs)
- return;
-
- if (!(x86_pmu.flags & PMU_FL_NO_HT_SHARING)) {
- for_each_cpu(i, topology_sibling_cpumask(cpu)) {
- struct intel_shared_regs *pc;
-
- pc = per_cpu(cpu_hw_events, i).shared_regs;
- if (pc && pc->core_id == core_id) {
- cpuc->kfree_on_online[0] = cpuc->shared_regs;
- cpuc->shared_regs = pc;
- break;
- }
- }
- cpuc->shared_regs->core_id = core_id;
- cpuc->shared_regs->refcnt++;
- }
-
- if (x86_pmu.lbr_sel_map)
- cpuc->lbr_sel = &cpuc->shared_regs->regs[EXTRA_REG_LBR];
-
- if (x86_pmu.flags & PMU_FL_EXCL_CNTRS) {
- for_each_cpu(i, topology_sibling_cpumask(cpu)) {
- struct intel_excl_cntrs *c;
-
- c = per_cpu(cpu_hw_events, i).excl_cntrs;
- if (c && c->core_id == core_id) {
- cpuc->kfree_on_online[1] = cpuc->excl_cntrs;
- cpuc->excl_cntrs = c;
- cpuc->excl_thread_id = 1;
- break;
- }
- }
- cpuc->excl_cntrs->core_id = core_id;
- cpuc->excl_cntrs->refcnt++;
- }
-}
-
-static void free_excl_cntrs(int cpu)
-{
- struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu);
- struct intel_excl_cntrs *c;
-
- c = cpuc->excl_cntrs;
- if (c) {
- if (c->core_id == -1 || --c->refcnt == 0)
- kfree(c);
- cpuc->excl_cntrs = NULL;
- kfree(cpuc->constraint_list);
- cpuc->constraint_list = NULL;
- }
-}
-
-static void intel_pmu_cpu_dying(int cpu)
-{
- struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu);
- struct intel_shared_regs *pc;
-
- pc = cpuc->shared_regs;
- if (pc) {
- if (pc->core_id == -1 || --pc->refcnt == 0)
- kfree(pc);
- cpuc->shared_regs = NULL;
- }
-
- free_excl_cntrs(cpu);
-
- fini_debug_store_on_cpu(cpu);
-}
-
-static void intel_pmu_sched_task(struct perf_event_context *ctx,
- bool sched_in)
-{
- if (x86_pmu.pebs_active)
- intel_pmu_pebs_sched_task(ctx, sched_in);
- if (x86_pmu.lbr_nr)
- intel_pmu_lbr_sched_task(ctx, sched_in);
-}
-
-PMU_FORMAT_ATTR(offcore_rsp, "config1:0-63");
-
-PMU_FORMAT_ATTR(ldlat, "config1:0-15");
-
-PMU_FORMAT_ATTR(frontend, "config1:0-23");
-
-static struct attribute *intel_arch3_formats_attr[] = {
- &format_attr_event.attr,
- &format_attr_umask.attr,
- &format_attr_edge.attr,
- &format_attr_pc.attr,
- &format_attr_any.attr,
- &format_attr_inv.attr,
- &format_attr_cmask.attr,
- &format_attr_in_tx.attr,
- &format_attr_in_tx_cp.attr,
-
- &format_attr_offcore_rsp.attr, /* XXX do NHM/WSM + SNB breakout */
- &format_attr_ldlat.attr, /* PEBS load latency */
- NULL,
-};
-
-static struct attribute *skl_format_attr[] = {
- &format_attr_frontend.attr,
- NULL,
-};
-
-static __initconst const struct x86_pmu core_pmu = {
- .name = "core",
- .handle_irq = x86_pmu_handle_irq,
- .disable_all = x86_pmu_disable_all,
- .enable_all = core_pmu_enable_all,
- .enable = core_pmu_enable_event,
- .disable = x86_pmu_disable_event,
- .hw_config = x86_pmu_hw_config,
- .schedule_events = x86_schedule_events,
- .eventsel = MSR_ARCH_PERFMON_EVENTSEL0,
- .perfctr = MSR_ARCH_PERFMON_PERFCTR0,
- .event_map = intel_pmu_event_map,
- .max_events = ARRAY_SIZE(intel_perfmon_event_map),
- .apic = 1,
- .free_running_flags = PEBS_FREERUNNING_FLAGS,
-
- /*
- * Intel PMCs cannot be accessed sanely above 32-bit width,
- * so we install an artificial 1<<31 period regardless of
- * the generic event period:
- */
- .max_period = (1ULL<<31) - 1,
- .get_event_constraints = intel_get_event_constraints,
- .put_event_constraints = intel_put_event_constraints,
- .event_constraints = intel_core_event_constraints,
- .guest_get_msrs = core_guest_get_msrs,
- .format_attrs = intel_arch_formats_attr,
- .events_sysfs_show = intel_event_sysfs_show,
-
- /*
- * Virtual (or funny metal) CPU can define x86_pmu.extra_regs
- * together with PMU version 1 and thus be using core_pmu with
- * shared_regs. We need following callbacks here to allocate
- * it properly.
- */
- .cpu_prepare = intel_pmu_cpu_prepare,
- .cpu_starting = intel_pmu_cpu_starting,
- .cpu_dying = intel_pmu_cpu_dying,
-};
-
-static __initconst const struct x86_pmu intel_pmu = {
- .name = "Intel",
- .handle_irq = intel_pmu_handle_irq,
- .disable_all = intel_pmu_disable_all,
- .enable_all = intel_pmu_enable_all,
- .enable = intel_pmu_enable_event,
- .disable = intel_pmu_disable_event,
- .hw_config = intel_pmu_hw_config,
- .schedule_events = x86_schedule_events,
- .eventsel = MSR_ARCH_PERFMON_EVENTSEL0,
- .perfctr = MSR_ARCH_PERFMON_PERFCTR0,
- .event_map = intel_pmu_event_map,
- .max_events = ARRAY_SIZE(intel_perfmon_event_map),
- .apic = 1,
- .free_running_flags = PEBS_FREERUNNING_FLAGS,
- /*
- * Intel PMCs cannot be accessed sanely above 32 bit width,
- * so we install an artificial 1<<31 period regardless of
- * the generic event period:
- */
- .max_period = (1ULL << 31) - 1,
- .get_event_constraints = intel_get_event_constraints,
- .put_event_constraints = intel_put_event_constraints,
- .pebs_aliases = intel_pebs_aliases_core2,
-
- .format_attrs = intel_arch3_formats_attr,
- .events_sysfs_show = intel_event_sysfs_show,
-
- .cpu_prepare = intel_pmu_cpu_prepare,
- .cpu_starting = intel_pmu_cpu_starting,
- .cpu_dying = intel_pmu_cpu_dying,
- .guest_get_msrs = intel_guest_get_msrs,
- .sched_task = intel_pmu_sched_task,
-};
-
-static __init void intel_clovertown_quirk(void)
-{
- /*
- * PEBS is unreliable due to:
- *
- * AJ67 - PEBS may experience CPL leaks
- * AJ68 - PEBS PMI may be delayed by one event
- * AJ69 - GLOBAL_STATUS[62] will only be set when DEBUGCTL[12]
- * AJ106 - FREEZE_LBRS_ON_PMI doesn't work in combination with PEBS
- *
- * AJ67 could be worked around by restricting the OS/USR flags.
- * AJ69 could be worked around by setting PMU_FREEZE_ON_PMI.
- *
- * AJ106 could possibly be worked around by not allowing LBR
- * usage from PEBS, including the fixup.
- * AJ68 could possibly be worked around by always programming
- * a pebs_event_reset[0] value and coping with the lost events.
- *
- * But taken together it might just make sense to not enable PEBS on
- * these chips.
- */
- pr_warn("PEBS disabled due to CPU errata\n");
- x86_pmu.pebs = 0;
- x86_pmu.pebs_constraints = NULL;
-}
-
-static int intel_snb_pebs_broken(int cpu)
-{
- u32 rev = UINT_MAX; /* default to broken for unknown models */
-
- switch (cpu_data(cpu).x86_model) {
- case 42: /* SNB */
- rev = 0x28;
- break;
-
- case 45: /* SNB-EP */
- switch (cpu_data(cpu).x86_mask) {
- case 6: rev = 0x618; break;
- case 7: rev = 0x70c; break;
- }
- }
-
- return (cpu_data(cpu).microcode < rev);
-}
-
-static void intel_snb_check_microcode(void)
-{
- int pebs_broken = 0;
- int cpu;
-
- get_online_cpus();
- for_each_online_cpu(cpu) {
- if ((pebs_broken = intel_snb_pebs_broken(cpu)))
- break;
- }
- put_online_cpus();
-
- if (pebs_broken == x86_pmu.pebs_broken)
- return;
-
- /*
- * Serialized by the microcode lock..
- */
- if (x86_pmu.pebs_broken) {
- pr_info("PEBS enabled due to microcode update\n");
- x86_pmu.pebs_broken = 0;
- } else {
- pr_info("PEBS disabled due to CPU errata, please upgrade microcode\n");
- x86_pmu.pebs_broken = 1;
- }
-}
-
-/*
- * Under certain circumstances, access certain MSR may cause #GP.
- * The function tests if the input MSR can be safely accessed.
- */
-static bool check_msr(unsigned long msr, u64 mask)
-{
- u64 val_old, val_new, val_tmp;
-
- /*
- * Read the current value, change it and read it back to see if it
- * matches, this is needed to detect certain hardware emulators
- * (qemu/kvm) that don't trap on the MSR access and always return 0s.
- */
- if (rdmsrl_safe(msr, &val_old))
- return false;
-
- /*
- * Only change the bits which can be updated by wrmsrl.
- */
- val_tmp = val_old ^ mask;
- if (wrmsrl_safe(msr, val_tmp) ||
- rdmsrl_safe(msr, &val_new))
- return false;
-
- if (val_new != val_tmp)
- return false;
-
- /* Here it's sure that the MSR can be safely accessed.
- * Restore the old value and return.
- */
- wrmsrl(msr, val_old);
-
- return true;
-}
-
-static __init void intel_sandybridge_quirk(void)
-{
- x86_pmu.check_microcode = intel_snb_check_microcode;
- intel_snb_check_microcode();
-}
-
-static const struct { int id; char *name; } intel_arch_events_map[] __initconst = {
- { PERF_COUNT_HW_CPU_CYCLES, "cpu cycles" },
- { PERF_COUNT_HW_INSTRUCTIONS, "instructions" },
- { PERF_COUNT_HW_BUS_CYCLES, "bus cycles" },
- { PERF_COUNT_HW_CACHE_REFERENCES, "cache references" },
- { PERF_COUNT_HW_CACHE_MISSES, "cache misses" },
- { PERF_COUNT_HW_BRANCH_INSTRUCTIONS, "branch instructions" },
- { PERF_COUNT_HW_BRANCH_MISSES, "branch misses" },
-};
-
-static __init void intel_arch_events_quirk(void)
-{
- int bit;
-
- /* disable event that reported as not presend by cpuid */
- for_each_set_bit(bit, x86_pmu.events_mask, ARRAY_SIZE(intel_arch_events_map)) {
- intel_perfmon_event_map[intel_arch_events_map[bit].id] = 0;
- pr_warn("CPUID marked event: \'%s\' unavailable\n",
- intel_arch_events_map[bit].name);
- }
-}
-
-static __init void intel_nehalem_quirk(void)
-{
- union cpuid10_ebx ebx;
-
- ebx.full = x86_pmu.events_maskl;
- if (ebx.split.no_branch_misses_retired) {
- /*
- * Erratum AAJ80 detected, we work it around by using
- * the BR_MISP_EXEC.ANY event. This will over-count
- * branch-misses, but it's still much better than the
- * architectural event which is often completely bogus:
- */
- intel_perfmon_event_map[PERF_COUNT_HW_BRANCH_MISSES] = 0x7f89;
- ebx.split.no_branch_misses_retired = 0;
- x86_pmu.events_maskl = ebx.full;
- pr_info("CPU erratum AAJ80 worked around\n");
- }
-}
-
-/*
- * enable software workaround for errata:
- * SNB: BJ122
- * IVB: BV98
- * HSW: HSD29
- *
- * Only needed when HT is enabled. However detecting
- * if HT is enabled is difficult (model specific). So instead,
- * we enable the workaround in the early boot, and verify if
- * it is needed in a later initcall phase once we have valid
- * topology information to check if HT is actually enabled
- */
-static __init void intel_ht_bug(void)
-{
- x86_pmu.flags |= PMU_FL_EXCL_CNTRS | PMU_FL_EXCL_ENABLED;
-
- x86_pmu.start_scheduling = intel_start_scheduling;
- x86_pmu.commit_scheduling = intel_commit_scheduling;
- x86_pmu.stop_scheduling = intel_stop_scheduling;
-}
-
-EVENT_ATTR_STR(mem-loads, mem_ld_hsw, "event=0xcd,umask=0x1,ldlat=3");
-EVENT_ATTR_STR(mem-stores, mem_st_hsw, "event=0xd0,umask=0x82")
-
-/* Haswell special events */
-EVENT_ATTR_STR(tx-start, tx_start, "event=0xc9,umask=0x1");
-EVENT_ATTR_STR(tx-commit, tx_commit, "event=0xc9,umask=0x2");
-EVENT_ATTR_STR(tx-abort, tx_abort, "event=0xc9,umask=0x4");
-EVENT_ATTR_STR(tx-capacity, tx_capacity, "event=0x54,umask=0x2");
-EVENT_ATTR_STR(tx-conflict, tx_conflict, "event=0x54,umask=0x1");
-EVENT_ATTR_STR(el-start, el_start, "event=0xc8,umask=0x1");
-EVENT_ATTR_STR(el-commit, el_commit, "event=0xc8,umask=0x2");
-EVENT_ATTR_STR(el-abort, el_abort, "event=0xc8,umask=0x4");
-EVENT_ATTR_STR(el-capacity, el_capacity, "event=0x54,umask=0x2");
-EVENT_ATTR_STR(el-conflict, el_conflict, "event=0x54,umask=0x1");
-EVENT_ATTR_STR(cycles-t, cycles_t, "event=0x3c,in_tx=1");
-EVENT_ATTR_STR(cycles-ct, cycles_ct, "event=0x3c,in_tx=1,in_tx_cp=1");
-
-static struct attribute *hsw_events_attrs[] = {
- EVENT_PTR(tx_start),
- EVENT_PTR(tx_commit),
- EVENT_PTR(tx_abort),
- EVENT_PTR(tx_capacity),
- EVENT_PTR(tx_conflict),
- EVENT_PTR(el_start),
- EVENT_PTR(el_commit),
- EVENT_PTR(el_abort),
- EVENT_PTR(el_capacity),
- EVENT_PTR(el_conflict),
- EVENT_PTR(cycles_t),
- EVENT_PTR(cycles_ct),
- EVENT_PTR(mem_ld_hsw),
- EVENT_PTR(mem_st_hsw),
- NULL
-};
-
-__init int intel_pmu_init(void)
-{
- union cpuid10_edx edx;
- union cpuid10_eax eax;
- union cpuid10_ebx ebx;
- struct event_constraint *c;
- unsigned int unused;
- struct extra_reg *er;
- int version, i;
-
- if (!cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) {
- switch (boot_cpu_data.x86) {
- case 0x6:
- return p6_pmu_init();
- case 0xb:
- return knc_pmu_init();
- case 0xf:
- return p4_pmu_init();
- }
- return -ENODEV;
- }
-
- /*
- * Check whether the Architectural PerfMon supports
- * Branch Misses Retired hw_event or not.
- */
- cpuid(10, &eax.full, &ebx.full, &unused, &edx.full);
- if (eax.split.mask_length < ARCH_PERFMON_EVENTS_COUNT)
- return -ENODEV;
-
- version = eax.split.version_id;
- if (version < 2)
- x86_pmu = core_pmu;
- else
- x86_pmu = intel_pmu;
-
- x86_pmu.version = version;
- x86_pmu.num_counters = eax.split.num_counters;
- x86_pmu.cntval_bits = eax.split.bit_width;
- x86_pmu.cntval_mask = (1ULL << eax.split.bit_width) - 1;
-
- x86_pmu.events_maskl = ebx.full;
- x86_pmu.events_mask_len = eax.split.mask_length;
-
- x86_pmu.max_pebs_events = min_t(unsigned, MAX_PEBS_EVENTS, x86_pmu.num_counters);
-
- /*
- * Quirk: v2 perfmon does not report fixed-purpose events, so
- * assume at least 3 events:
- */
- if (version > 1)
- x86_pmu.num_counters_fixed = max((int)edx.split.num_counters_fixed, 3);
-
- if (boot_cpu_has(X86_FEATURE_PDCM)) {
- u64 capabilities;
-
- rdmsrl(MSR_IA32_PERF_CAPABILITIES, capabilities);
- x86_pmu.intel_cap.capabilities = capabilities;
- }
-
- intel_ds_init();
-
- x86_add_quirk(intel_arch_events_quirk); /* Install first, so it runs last */
-
- /*
- * Install the hw-cache-events table:
- */
- switch (boot_cpu_data.x86_model) {
- case 14: /* 65nm Core "Yonah" */
- pr_cont("Core events, ");
- break;
-
- case 15: /* 65nm Core2 "Merom" */
- x86_add_quirk(intel_clovertown_quirk);
- case 22: /* 65nm Core2 "Merom-L" */
- case 23: /* 45nm Core2 "Penryn" */
- case 29: /* 45nm Core2 "Dunnington (MP) */
- memcpy(hw_cache_event_ids, core2_hw_cache_event_ids,
- sizeof(hw_cache_event_ids));
-
- intel_pmu_lbr_init_core();
-
- x86_pmu.event_constraints = intel_core2_event_constraints;
- x86_pmu.pebs_constraints = intel_core2_pebs_event_constraints;
- pr_cont("Core2 events, ");
- break;
-
- case 30: /* 45nm Nehalem */
- case 26: /* 45nm Nehalem-EP */
- case 46: /* 45nm Nehalem-EX */
- memcpy(hw_cache_event_ids, nehalem_hw_cache_event_ids,
- sizeof(hw_cache_event_ids));
- memcpy(hw_cache_extra_regs, nehalem_hw_cache_extra_regs,
- sizeof(hw_cache_extra_regs));
-
- intel_pmu_lbr_init_nhm();
-
- x86_pmu.event_constraints = intel_nehalem_event_constraints;
- x86_pmu.pebs_constraints = intel_nehalem_pebs_event_constraints;
- x86_pmu.enable_all = intel_pmu_nhm_enable_all;
- x86_pmu.extra_regs = intel_nehalem_extra_regs;
-
- x86_pmu.cpu_events = nhm_events_attrs;
-
- /* UOPS_ISSUED.STALLED_CYCLES */
- intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] =
- X86_CONFIG(.event=0x0e, .umask=0x01, .inv=1, .cmask=1);
- /* UOPS_EXECUTED.CORE_ACTIVE_CYCLES,c=1,i=1 */
- intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_BACKEND] =
- X86_CONFIG(.event=0xb1, .umask=0x3f, .inv=1, .cmask=1);
-
- intel_pmu_pebs_data_source_nhm();
- x86_add_quirk(intel_nehalem_quirk);
-
- pr_cont("Nehalem events, ");
- break;
-
- case 28: /* 45nm Atom "Pineview" */
- case 38: /* 45nm Atom "Lincroft" */
- case 39: /* 32nm Atom "Penwell" */
- case 53: /* 32nm Atom "Cloverview" */
- case 54: /* 32nm Atom "Cedarview" */
- memcpy(hw_cache_event_ids, atom_hw_cache_event_ids,
- sizeof(hw_cache_event_ids));
-
- intel_pmu_lbr_init_atom();
-
- x86_pmu.event_constraints = intel_gen_event_constraints;
- x86_pmu.pebs_constraints = intel_atom_pebs_event_constraints;
- x86_pmu.pebs_aliases = intel_pebs_aliases_core2;
- pr_cont("Atom events, ");
- break;
-
- case 55: /* 22nm Atom "Silvermont" */
- case 76: /* 14nm Atom "Airmont" */
- case 77: /* 22nm Atom "Silvermont Avoton/Rangely" */
- memcpy(hw_cache_event_ids, slm_hw_cache_event_ids,
- sizeof(hw_cache_event_ids));
- memcpy(hw_cache_extra_regs, slm_hw_cache_extra_regs,
- sizeof(hw_cache_extra_regs));
-
- intel_pmu_lbr_init_atom();
-
- x86_pmu.event_constraints = intel_slm_event_constraints;
- x86_pmu.pebs_constraints = intel_slm_pebs_event_constraints;
- x86_pmu.extra_regs = intel_slm_extra_regs;
- x86_pmu.flags |= PMU_FL_HAS_RSP_1;
- pr_cont("Silvermont events, ");
- break;
-
- case 37: /* 32nm Westmere */
- case 44: /* 32nm Westmere-EP */
- case 47: /* 32nm Westmere-EX */
- memcpy(hw_cache_event_ids, westmere_hw_cache_event_ids,
- sizeof(hw_cache_event_ids));
- memcpy(hw_cache_extra_regs, nehalem_hw_cache_extra_regs,
- sizeof(hw_cache_extra_regs));
-
- intel_pmu_lbr_init_nhm();
-
- x86_pmu.event_constraints = intel_westmere_event_constraints;
- x86_pmu.enable_all = intel_pmu_nhm_enable_all;
- x86_pmu.pebs_constraints = intel_westmere_pebs_event_constraints;
- x86_pmu.extra_regs = intel_westmere_extra_regs;
- x86_pmu.flags |= PMU_FL_HAS_RSP_1;
-
- x86_pmu.cpu_events = nhm_events_attrs;
-
- /* UOPS_ISSUED.STALLED_CYCLES */
- intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] =
- X86_CONFIG(.event=0x0e, .umask=0x01, .inv=1, .cmask=1);
- /* UOPS_EXECUTED.CORE_ACTIVE_CYCLES,c=1,i=1 */
- intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_BACKEND] =
- X86_CONFIG(.event=0xb1, .umask=0x3f, .inv=1, .cmask=1);
-
- intel_pmu_pebs_data_source_nhm();
- pr_cont("Westmere events, ");
- break;
-
- case 42: /* 32nm SandyBridge */
- case 45: /* 32nm SandyBridge-E/EN/EP */
- x86_add_quirk(intel_sandybridge_quirk);
- x86_add_quirk(intel_ht_bug);
- memcpy(hw_cache_event_ids, snb_hw_cache_event_ids,
- sizeof(hw_cache_event_ids));
- memcpy(hw_cache_extra_regs, snb_hw_cache_extra_regs,
- sizeof(hw_cache_extra_regs));
-
- intel_pmu_lbr_init_snb();
-
- x86_pmu.event_constraints = intel_snb_event_constraints;
- x86_pmu.pebs_constraints = intel_snb_pebs_event_constraints;
- x86_pmu.pebs_aliases = intel_pebs_aliases_snb;
- if (boot_cpu_data.x86_model == 45)
- x86_pmu.extra_regs = intel_snbep_extra_regs;
- else
- x86_pmu.extra_regs = intel_snb_extra_regs;
-
-
- /* all extra regs are per-cpu when HT is on */
- x86_pmu.flags |= PMU_FL_HAS_RSP_1;
- x86_pmu.flags |= PMU_FL_NO_HT_SHARING;
-
- x86_pmu.cpu_events = snb_events_attrs;
-
- /* UOPS_ISSUED.ANY,c=1,i=1 to count stall cycles */
- intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] =
- X86_CONFIG(.event=0x0e, .umask=0x01, .inv=1, .cmask=1);
- /* UOPS_DISPATCHED.THREAD,c=1,i=1 to count stall cycles*/
- intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_BACKEND] =
- X86_CONFIG(.event=0xb1, .umask=0x01, .inv=1, .cmask=1);
-
- pr_cont("SandyBridge events, ");
- break;
-
- case 58: /* 22nm IvyBridge */
- case 62: /* 22nm IvyBridge-EP/EX */
- x86_add_quirk(intel_ht_bug);
- memcpy(hw_cache_event_ids, snb_hw_cache_event_ids,
- sizeof(hw_cache_event_ids));
- /* dTLB-load-misses on IVB is different than SNB */
- hw_cache_event_ids[C(DTLB)][C(OP_READ)][C(RESULT_MISS)] = 0x8108; /* DTLB_LOAD_MISSES.DEMAND_LD_MISS_CAUSES_A_WALK */
-
- memcpy(hw_cache_extra_regs, snb_hw_cache_extra_regs,
- sizeof(hw_cache_extra_regs));
-
- intel_pmu_lbr_init_snb();
-
- x86_pmu.event_constraints = intel_ivb_event_constraints;
- x86_pmu.pebs_constraints = intel_ivb_pebs_event_constraints;
- x86_pmu.pebs_aliases = intel_pebs_aliases_ivb;
- x86_pmu.pebs_prec_dist = true;
- if (boot_cpu_data.x86_model == 62)
- x86_pmu.extra_regs = intel_snbep_extra_regs;
- else
- x86_pmu.extra_regs = intel_snb_extra_regs;
- /* all extra regs are per-cpu when HT is on */
- x86_pmu.flags |= PMU_FL_HAS_RSP_1;
- x86_pmu.flags |= PMU_FL_NO_HT_SHARING;
-
- x86_pmu.cpu_events = snb_events_attrs;
-
- /* UOPS_ISSUED.ANY,c=1,i=1 to count stall cycles */
- intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] =
- X86_CONFIG(.event=0x0e, .umask=0x01, .inv=1, .cmask=1);
-
- pr_cont("IvyBridge events, ");
- break;
-
-
- case 60: /* 22nm Haswell Core */
- case 63: /* 22nm Haswell Server */
- case 69: /* 22nm Haswell ULT */
- case 70: /* 22nm Haswell + GT3e (Intel Iris Pro graphics) */
- x86_add_quirk(intel_ht_bug);
- x86_pmu.late_ack = true;
- memcpy(hw_cache_event_ids, hsw_hw_cache_event_ids, sizeof(hw_cache_event_ids));
- memcpy(hw_cache_extra_regs, hsw_hw_cache_extra_regs, sizeof(hw_cache_extra_regs));
-
- intel_pmu_lbr_init_hsw();
-
- x86_pmu.event_constraints = intel_hsw_event_constraints;
- x86_pmu.pebs_constraints = intel_hsw_pebs_event_constraints;
- x86_pmu.extra_regs = intel_snbep_extra_regs;
- x86_pmu.pebs_aliases = intel_pebs_aliases_ivb;
- x86_pmu.pebs_prec_dist = true;
- /* all extra regs are per-cpu when HT is on */
- x86_pmu.flags |= PMU_FL_HAS_RSP_1;
- x86_pmu.flags |= PMU_FL_NO_HT_SHARING;
-
- x86_pmu.hw_config = hsw_hw_config;
- x86_pmu.get_event_constraints = hsw_get_event_constraints;
- x86_pmu.cpu_events = hsw_events_attrs;
- x86_pmu.lbr_double_abort = true;
- pr_cont("Haswell events, ");
- break;
-
- case 61: /* 14nm Broadwell Core-M */
- case 86: /* 14nm Broadwell Xeon D */
- case 71: /* 14nm Broadwell + GT3e (Intel Iris Pro graphics) */
- case 79: /* 14nm Broadwell Server */
- x86_pmu.late_ack = true;
- memcpy(hw_cache_event_ids, hsw_hw_cache_event_ids, sizeof(hw_cache_event_ids));
- memcpy(hw_cache_extra_regs, hsw_hw_cache_extra_regs, sizeof(hw_cache_extra_regs));
-
- /* L3_MISS_LOCAL_DRAM is BIT(26) in Broadwell */
- hw_cache_extra_regs[C(LL)][C(OP_READ)][C(RESULT_MISS)] = HSW_DEMAND_READ |
- BDW_L3_MISS|HSW_SNOOP_DRAM;
- hw_cache_extra_regs[C(LL)][C(OP_WRITE)][C(RESULT_MISS)] = HSW_DEMAND_WRITE|BDW_L3_MISS|
- HSW_SNOOP_DRAM;
- hw_cache_extra_regs[C(NODE)][C(OP_READ)][C(RESULT_ACCESS)] = HSW_DEMAND_READ|
- BDW_L3_MISS_LOCAL|HSW_SNOOP_DRAM;
- hw_cache_extra_regs[C(NODE)][C(OP_WRITE)][C(RESULT_ACCESS)] = HSW_DEMAND_WRITE|
- BDW_L3_MISS_LOCAL|HSW_SNOOP_DRAM;
-
- intel_pmu_lbr_init_hsw();
-
- x86_pmu.event_constraints = intel_bdw_event_constraints;
- x86_pmu.pebs_constraints = intel_hsw_pebs_event_constraints;
- x86_pmu.extra_regs = intel_snbep_extra_regs;
- x86_pmu.pebs_aliases = intel_pebs_aliases_ivb;
- x86_pmu.pebs_prec_dist = true;
- /* all extra regs are per-cpu when HT is on */
- x86_pmu.flags |= PMU_FL_HAS_RSP_1;
- x86_pmu.flags |= PMU_FL_NO_HT_SHARING;
-
- x86_pmu.hw_config = hsw_hw_config;
- x86_pmu.get_event_constraints = hsw_get_event_constraints;
- x86_pmu.cpu_events = hsw_events_attrs;
- x86_pmu.limit_period = bdw_limit_period;
- pr_cont("Broadwell events, ");
- break;
-
- case 87: /* Knights Landing Xeon Phi */
- memcpy(hw_cache_event_ids,
- slm_hw_cache_event_ids, sizeof(hw_cache_event_ids));
- memcpy(hw_cache_extra_regs,
- knl_hw_cache_extra_regs, sizeof(hw_cache_extra_regs));
- intel_pmu_lbr_init_knl();
-
- x86_pmu.event_constraints = intel_slm_event_constraints;
- x86_pmu.pebs_constraints = intel_slm_pebs_event_constraints;
- x86_pmu.extra_regs = intel_knl_extra_regs;
-
- /* all extra regs are per-cpu when HT is on */
- x86_pmu.flags |= PMU_FL_HAS_RSP_1;
- x86_pmu.flags |= PMU_FL_NO_HT_SHARING;
-
- pr_cont("Knights Landing events, ");
- break;
-
- case 78: /* 14nm Skylake Mobile */
- case 94: /* 14nm Skylake Desktop */
- x86_pmu.late_ack = true;
- memcpy(hw_cache_event_ids, skl_hw_cache_event_ids, sizeof(hw_cache_event_ids));
- memcpy(hw_cache_extra_regs, skl_hw_cache_extra_regs, sizeof(hw_cache_extra_regs));
- intel_pmu_lbr_init_skl();
-
- x86_pmu.event_constraints = intel_skl_event_constraints;
- x86_pmu.pebs_constraints = intel_skl_pebs_event_constraints;
- x86_pmu.extra_regs = intel_skl_extra_regs;
- x86_pmu.pebs_aliases = intel_pebs_aliases_skl;
- x86_pmu.pebs_prec_dist = true;
- /* all extra regs are per-cpu when HT is on */
- x86_pmu.flags |= PMU_FL_HAS_RSP_1;
- x86_pmu.flags |= PMU_FL_NO_HT_SHARING;
-
- x86_pmu.hw_config = hsw_hw_config;
- x86_pmu.get_event_constraints = hsw_get_event_constraints;
- x86_pmu.format_attrs = merge_attr(intel_arch3_formats_attr,
- skl_format_attr);
- WARN_ON(!x86_pmu.format_attrs);
- x86_pmu.cpu_events = hsw_events_attrs;
- pr_cont("Skylake events, ");
- break;
-
- default:
- switch (x86_pmu.version) {
- case 1:
- x86_pmu.event_constraints = intel_v1_event_constraints;
- pr_cont("generic architected perfmon v1, ");
- break;
- default:
- /*
- * default constraints for v2 and up
- */
- x86_pmu.event_constraints = intel_gen_event_constraints;
- pr_cont("generic architected perfmon, ");
- break;
- }
- }
-
- if (x86_pmu.num_counters > INTEL_PMC_MAX_GENERIC) {
- WARN(1, KERN_ERR "hw perf events %d > max(%d), clipping!",
- x86_pmu.num_counters, INTEL_PMC_MAX_GENERIC);
- x86_pmu.num_counters = INTEL_PMC_MAX_GENERIC;
- }
- x86_pmu.intel_ctrl = (1 << x86_pmu.num_counters) - 1;
-
- if (x86_pmu.num_counters_fixed > INTEL_PMC_MAX_FIXED) {
- WARN(1, KERN_ERR "hw perf events fixed %d > max(%d), clipping!",
- x86_pmu.num_counters_fixed, INTEL_PMC_MAX_FIXED);
- x86_pmu.num_counters_fixed = INTEL_PMC_MAX_FIXED;
- }
-
- x86_pmu.intel_ctrl |=
- ((1LL << x86_pmu.num_counters_fixed)-1) << INTEL_PMC_IDX_FIXED;
-
- if (x86_pmu.event_constraints) {
- /*
- * event on fixed counter2 (REF_CYCLES) only works on this
- * counter, so do not extend mask to generic counters
- */
- for_each_event_constraint(c, x86_pmu.event_constraints) {
- if (c->cmask == FIXED_EVENT_FLAGS
- && c->idxmsk64 != INTEL_PMC_MSK_FIXED_REF_CYCLES) {
- c->idxmsk64 |= (1ULL << x86_pmu.num_counters) - 1;
- }
- c->idxmsk64 &=
- ~(~0UL << (INTEL_PMC_IDX_FIXED + x86_pmu.num_counters_fixed));
- c->weight = hweight64(c->idxmsk64);
- }
- }
-
- /*
- * Access LBR MSR may cause #GP under certain circumstances.
- * E.g. KVM doesn't support LBR MSR
- * Check all LBT MSR here.
- * Disable LBR access if any LBR MSRs can not be accessed.
- */
- if (x86_pmu.lbr_nr && !check_msr(x86_pmu.lbr_tos, 0x3UL))
- x86_pmu.lbr_nr = 0;
- for (i = 0; i < x86_pmu.lbr_nr; i++) {
- if (!(check_msr(x86_pmu.lbr_from + i, 0xffffUL) &&
- check_msr(x86_pmu.lbr_to + i, 0xffffUL)))
- x86_pmu.lbr_nr = 0;
- }
-
- /*
- * Access extra MSR may cause #GP under certain circumstances.
- * E.g. KVM doesn't support offcore event
- * Check all extra_regs here.
- */
- if (x86_pmu.extra_regs) {
- for (er = x86_pmu.extra_regs; er->msr; er++) {
- er->extra_msr_access = check_msr(er->msr, 0x11UL);
- /* Disable LBR select mapping */
- if ((er->idx == EXTRA_REG_LBR) && !er->extra_msr_access)
- x86_pmu.lbr_sel_map = NULL;
- }
- }
-
- /* Support full width counters using alternative MSR range */
- if (x86_pmu.intel_cap.full_width_write) {
- x86_pmu.max_period = x86_pmu.cntval_mask;
- x86_pmu.perfctr = MSR_IA32_PMC0;
- pr_cont("full-width counters, ");
- }
-
- return 0;
-}
-
-/*
- * HT bug: phase 2 init
- * Called once we have valid topology information to check
- * whether or not HT is enabled
- * If HT is off, then we disable the workaround
- */
-static __init int fixup_ht_bug(void)
-{
- int cpu = smp_processor_id();
- int w, c;
- /*
- * problem not present on this CPU model, nothing to do
- */
- if (!(x86_pmu.flags & PMU_FL_EXCL_ENABLED))
- return 0;
-
- w = cpumask_weight(topology_sibling_cpumask(cpu));
- if (w > 1) {
- pr_info("PMU erratum BJ122, BV98, HSD29 worked around, HT is on\n");
- return 0;
- }
-
- if (lockup_detector_suspend() != 0) {
- pr_debug("failed to disable PMU erratum BJ122, BV98, HSD29 workaround\n");
- return 0;
- }
-
- x86_pmu.flags &= ~(PMU_FL_EXCL_CNTRS | PMU_FL_EXCL_ENABLED);
-
- x86_pmu.start_scheduling = NULL;
- x86_pmu.commit_scheduling = NULL;
- x86_pmu.stop_scheduling = NULL;
-
- lockup_detector_resume();
-
- get_online_cpus();
-
- for_each_online_cpu(c) {
- free_excl_cntrs(c);
- }
-
- put_online_cpus();
- pr_info("PMU erratum BJ122, BV98, HSD29 workaround disabled, HT off\n");
- return 0;
-}
-subsys_initcall(fixup_ht_bug)
diff --git a/arch/x86/kernel/cpu/perf_event_intel_bts.c b/arch/x86/kernel/cpu/perf_event_intel_bts.c
deleted file mode 100644
index 2cad71d1b..000000000
--- a/arch/x86/kernel/cpu/perf_event_intel_bts.c
+++ /dev/null
@@ -1,544 +0,0 @@
-/*
- * BTS PMU driver for perf
- * Copyright (c) 2013-2014, Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
- * more details.
- */
-
-#undef DEBUG
-
-#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
-
-#include <linux/bitops.h>
-#include <linux/types.h>
-#include <linux/slab.h>
-#include <linux/debugfs.h>
-#include <linux/device.h>
-#include <linux/coredump.h>
-
-#include <asm-generic/sizes.h>
-#include <asm/perf_event.h>
-
-#include "perf_event.h"
-
-struct bts_ctx {
- struct perf_output_handle handle;
- struct debug_store ds_back;
- int started;
-};
-
-static DEFINE_PER_CPU(struct bts_ctx, bts_ctx);
-
-#define BTS_RECORD_SIZE 24
-#define BTS_SAFETY_MARGIN 4080
-
-struct bts_phys {
- struct page *page;
- unsigned long size;
- unsigned long offset;
- unsigned long displacement;
-};
-
-struct bts_buffer {
- size_t real_size; /* multiple of BTS_RECORD_SIZE */
- unsigned int nr_pages;
- unsigned int nr_bufs;
- unsigned int cur_buf;
- bool snapshot;
- local_t data_size;
- local_t lost;
- local_t head;
- unsigned long end;
- void **data_pages;
- struct bts_phys buf[0];
-};
-
-struct pmu bts_pmu;
-
-static size_t buf_size(struct page *page)
-{
- return 1 << (PAGE_SHIFT + page_private(page));
-}
-
-static void *
-bts_buffer_setup_aux(int cpu, void **pages, int nr_pages, bool overwrite)
-{
- struct bts_buffer *buf;
- struct page *page;
- int node = (cpu == -1) ? cpu : cpu_to_node(cpu);
- unsigned long offset;
- size_t size = nr_pages << PAGE_SHIFT;
- int pg, nbuf, pad;
-
- /* count all the high order buffers */
- for (pg = 0, nbuf = 0; pg < nr_pages;) {
- page = virt_to_page(pages[pg]);
- if (WARN_ON_ONCE(!PagePrivate(page) && nr_pages > 1))
- return NULL;
- pg += 1 << page_private(page);
- nbuf++;
- }
-
- /*
- * to avoid interrupts in overwrite mode, only allow one physical
- */
- if (overwrite && nbuf > 1)
- return NULL;
-
- buf = kzalloc_node(offsetof(struct bts_buffer, buf[nbuf]), GFP_KERNEL, node);
- if (!buf)
- return NULL;
-
- buf->nr_pages = nr_pages;
- buf->nr_bufs = nbuf;
- buf->snapshot = overwrite;
- buf->data_pages = pages;
- buf->real_size = size - size % BTS_RECORD_SIZE;
-
- for (pg = 0, nbuf = 0, offset = 0, pad = 0; nbuf < buf->nr_bufs; nbuf++) {
- unsigned int __nr_pages;
-
- page = virt_to_page(pages[pg]);
- __nr_pages = PagePrivate(page) ? 1 << page_private(page) : 1;
- buf->buf[nbuf].page = page;
- buf->buf[nbuf].offset = offset;
- buf->buf[nbuf].displacement = (pad ? BTS_RECORD_SIZE - pad : 0);
- buf->buf[nbuf].size = buf_size(page) - buf->buf[nbuf].displacement;
- pad = buf->buf[nbuf].size % BTS_RECORD_SIZE;
- buf->buf[nbuf].size -= pad;
-
- pg += __nr_pages;
- offset += __nr_pages << PAGE_SHIFT;
- }
-
- return buf;
-}
-
-static void bts_buffer_free_aux(void *data)
-{
- kfree(data);
-}
-
-static unsigned long bts_buffer_offset(struct bts_buffer *buf, unsigned int idx)
-{
- return buf->buf[idx].offset + buf->buf[idx].displacement;
-}
-
-static void
-bts_config_buffer(struct bts_buffer *buf)
-{
- int cpu = raw_smp_processor_id();
- struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds;
- struct bts_phys *phys = &buf->buf[buf->cur_buf];
- unsigned long index, thresh = 0, end = phys->size;
- struct page *page = phys->page;
-
- index = local_read(&buf->head);
-
- if (!buf->snapshot) {
- if (buf->end < phys->offset + buf_size(page))
- end = buf->end - phys->offset - phys->displacement;
-
- index -= phys->offset + phys->displacement;
-
- if (end - index > BTS_SAFETY_MARGIN)
- thresh = end - BTS_SAFETY_MARGIN;
- else if (end - index > BTS_RECORD_SIZE)
- thresh = end - BTS_RECORD_SIZE;
- else
- thresh = end;
- }
-
- ds->bts_buffer_base = (u64)(long)page_address(page) + phys->displacement;
- ds->bts_index = ds->bts_buffer_base + index;
- ds->bts_absolute_maximum = ds->bts_buffer_base + end;
- ds->bts_interrupt_threshold = !buf->snapshot
- ? ds->bts_buffer_base + thresh
- : ds->bts_absolute_maximum + BTS_RECORD_SIZE;
-}
-
-static void bts_buffer_pad_out(struct bts_phys *phys, unsigned long head)
-{
- unsigned long index = head - phys->offset;
-
- memset(page_address(phys->page) + index, 0, phys->size - index);
-}
-
-static bool bts_buffer_is_full(struct bts_buffer *buf, struct bts_ctx *bts)
-{
- if (buf->snapshot)
- return false;
-
- if (local_read(&buf->data_size) >= bts->handle.size ||
- bts->handle.size - local_read(&buf->data_size) < BTS_RECORD_SIZE)
- return true;
-
- return false;
-}
-
-static void bts_update(struct bts_ctx *bts)
-{
- int cpu = raw_smp_processor_id();
- struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds;
- struct bts_buffer *buf = perf_get_aux(&bts->handle);
- unsigned long index = ds->bts_index - ds->bts_buffer_base, old, head;
-
- if (!buf)
- return;
-
- head = index + bts_buffer_offset(buf, buf->cur_buf);
- old = local_xchg(&buf->head, head);
-
- if (!buf->snapshot) {
- if (old == head)
- return;
-
- if (ds->bts_index >= ds->bts_absolute_maximum)
- local_inc(&buf->lost);
-
- /*
- * old and head are always in the same physical buffer, so we
- * can subtract them to get the data size.
- */
- local_add(head - old, &buf->data_size);
- } else {
- local_set(&buf->data_size, head);
- }
-}
-
-static void __bts_event_start(struct perf_event *event)
-{
- struct bts_ctx *bts = this_cpu_ptr(&bts_ctx);
- struct bts_buffer *buf = perf_get_aux(&bts->handle);
- u64 config = 0;
-
- if (!buf || bts_buffer_is_full(buf, bts))
- return;
-
- event->hw.itrace_started = 1;
- event->hw.state = 0;
-
- if (!buf->snapshot)
- config |= ARCH_PERFMON_EVENTSEL_INT;
- if (!event->attr.exclude_kernel)
- config |= ARCH_PERFMON_EVENTSEL_OS;
- if (!event->attr.exclude_user)
- config |= ARCH_PERFMON_EVENTSEL_USR;
-
- bts_config_buffer(buf);
-
- /*
- * local barrier to make sure that ds configuration made it
- * before we enable BTS
- */
- wmb();
-
- intel_pmu_enable_bts(config);
-}
-
-static void bts_event_start(struct perf_event *event, int flags)
-{
- struct bts_ctx *bts = this_cpu_ptr(&bts_ctx);
-
- __bts_event_start(event);
-
- /* PMI handler: this counter is running and likely generating PMIs */
- ACCESS_ONCE(bts->started) = 1;
-}
-
-static void __bts_event_stop(struct perf_event *event)
-{
- /*
- * No extra synchronization is mandated by the documentation to have
- * BTS data stores globally visible.
- */
- intel_pmu_disable_bts();
-
- if (event->hw.state & PERF_HES_STOPPED)
- return;
-
- ACCESS_ONCE(event->hw.state) |= PERF_HES_STOPPED;
-}
-
-static void bts_event_stop(struct perf_event *event, int flags)
-{
- struct bts_ctx *bts = this_cpu_ptr(&bts_ctx);
-
- /* PMI handler: don't restart this counter */
- ACCESS_ONCE(bts->started) = 0;
-
- __bts_event_stop(event);
-
- if (flags & PERF_EF_UPDATE)
- bts_update(bts);
-}
-
-void intel_bts_enable_local(void)
-{
- struct bts_ctx *bts = this_cpu_ptr(&bts_ctx);
-
- if (bts->handle.event && bts->started)
- __bts_event_start(bts->handle.event);
-}
-
-void intel_bts_disable_local(void)
-{
- struct bts_ctx *bts = this_cpu_ptr(&bts_ctx);
-
- if (bts->handle.event)
- __bts_event_stop(bts->handle.event);
-}
-
-static int
-bts_buffer_reset(struct bts_buffer *buf, struct perf_output_handle *handle)
-{
- unsigned long head, space, next_space, pad, gap, skip, wakeup;
- unsigned int next_buf;
- struct bts_phys *phys, *next_phys;
- int ret;
-
- if (buf->snapshot)
- return 0;
-
- head = handle->head & ((buf->nr_pages << PAGE_SHIFT) - 1);
- if (WARN_ON_ONCE(head != local_read(&buf->head)))
- return -EINVAL;
-
- phys = &buf->buf[buf->cur_buf];
- space = phys->offset + phys->displacement + phys->size - head;
- pad = space;
- if (space > handle->size) {
- space = handle->size;
- space -= space % BTS_RECORD_SIZE;
- }
- if (space <= BTS_SAFETY_MARGIN) {
- /* See if next phys buffer has more space */
- next_buf = buf->cur_buf + 1;
- if (next_buf >= buf->nr_bufs)
- next_buf = 0;
- next_phys = &buf->buf[next_buf];
- gap = buf_size(phys->page) - phys->displacement - phys->size +
- next_phys->displacement;
- skip = pad + gap;
- if (handle->size >= skip) {
- next_space = next_phys->size;
- if (next_space + skip > handle->size) {
- next_space = handle->size - skip;
- next_space -= next_space % BTS_RECORD_SIZE;
- }
- if (next_space > space || !space) {
- if (pad)
- bts_buffer_pad_out(phys, head);
- ret = perf_aux_output_skip(handle, skip);
- if (ret)
- return ret;
- /* Advance to next phys buffer */
- phys = next_phys;
- space = next_space;
- head = phys->offset + phys->displacement;
- /*
- * After this, cur_buf and head won't match ds
- * anymore, so we must not be racing with
- * bts_update().
- */
- buf->cur_buf = next_buf;
- local_set(&buf->head, head);
- }
- }
- }
-
- /* Don't go far beyond wakeup watermark */
- wakeup = BTS_SAFETY_MARGIN + BTS_RECORD_SIZE + handle->wakeup -
- handle->head;
- if (space > wakeup) {
- space = wakeup;
- space -= space % BTS_RECORD_SIZE;
- }
-
- buf->end = head + space;
-
- /*
- * If we have no space, the lost notification would have been sent when
- * we hit absolute_maximum - see bts_update()
- */
- if (!space)
- return -ENOSPC;
-
- return 0;
-}
-
-int intel_bts_interrupt(void)
-{
- struct bts_ctx *bts = this_cpu_ptr(&bts_ctx);
- struct perf_event *event = bts->handle.event;
- struct bts_buffer *buf;
- s64 old_head;
- int err;
-
- if (!event || !bts->started)
- return 0;
-
- buf = perf_get_aux(&bts->handle);
- /*
- * Skip snapshot counters: they don't use the interrupt, but
- * there's no other way of telling, because the pointer will
- * keep moving
- */
- if (!buf || buf->snapshot)
- return 0;
-
- old_head = local_read(&buf->head);
- bts_update(bts);
-
- /* no new data */
- if (old_head == local_read(&buf->head))
- return 0;
-
- perf_aux_output_end(&bts->handle, local_xchg(&buf->data_size, 0),
- !!local_xchg(&buf->lost, 0));
-
- buf = perf_aux_output_begin(&bts->handle, event);
- if (!buf)
- return 1;
-
- err = bts_buffer_reset(buf, &bts->handle);
- if (err)
- perf_aux_output_end(&bts->handle, 0, false);
-
- return 1;
-}
-
-static void bts_event_del(struct perf_event *event, int mode)
-{
- struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
- struct bts_ctx *bts = this_cpu_ptr(&bts_ctx);
- struct bts_buffer *buf = perf_get_aux(&bts->handle);
-
- bts_event_stop(event, PERF_EF_UPDATE);
-
- if (buf) {
- if (buf->snapshot)
- bts->handle.head =
- local_xchg(&buf->data_size,
- buf->nr_pages << PAGE_SHIFT);
- perf_aux_output_end(&bts->handle, local_xchg(&buf->data_size, 0),
- !!local_xchg(&buf->lost, 0));
- }
-
- cpuc->ds->bts_index = bts->ds_back.bts_buffer_base;
- cpuc->ds->bts_buffer_base = bts->ds_back.bts_buffer_base;
- cpuc->ds->bts_absolute_maximum = bts->ds_back.bts_absolute_maximum;
- cpuc->ds->bts_interrupt_threshold = bts->ds_back.bts_interrupt_threshold;
-}
-
-static int bts_event_add(struct perf_event *event, int mode)
-{
- struct bts_buffer *buf;
- struct bts_ctx *bts = this_cpu_ptr(&bts_ctx);
- struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
- struct hw_perf_event *hwc = &event->hw;
- int ret = -EBUSY;
-
- event->hw.state = PERF_HES_STOPPED;
-
- if (test_bit(INTEL_PMC_IDX_FIXED_BTS, cpuc->active_mask))
- return -EBUSY;
-
- if (bts->handle.event)
- return -EBUSY;
-
- buf = perf_aux_output_begin(&bts->handle, event);
- if (!buf)
- return -EINVAL;
-
- ret = bts_buffer_reset(buf, &bts->handle);
- if (ret) {
- perf_aux_output_end(&bts->handle, 0, false);
- return ret;
- }
-
- bts->ds_back.bts_buffer_base = cpuc->ds->bts_buffer_base;
- bts->ds_back.bts_absolute_maximum = cpuc->ds->bts_absolute_maximum;
- bts->ds_back.bts_interrupt_threshold = cpuc->ds->bts_interrupt_threshold;
-
- if (mode & PERF_EF_START) {
- bts_event_start(event, 0);
- if (hwc->state & PERF_HES_STOPPED) {
- bts_event_del(event, 0);
- return -EBUSY;
- }
- }
-
- return 0;
-}
-
-static void bts_event_destroy(struct perf_event *event)
-{
- x86_release_hardware();
- x86_del_exclusive(x86_lbr_exclusive_bts);
-}
-
-static int bts_event_init(struct perf_event *event)
-{
- int ret;
-
- if (event->attr.type != bts_pmu.type)
- return -ENOENT;
-
- if (x86_add_exclusive(x86_lbr_exclusive_bts))
- return -EBUSY;
-
- /*
- * BTS leaks kernel addresses even when CPL0 tracing is
- * disabled, so disallow intel_bts driver for unprivileged
- * users on paranoid systems since it provides trace data
- * to the user in a zero-copy fashion.
- *
- * Note that the default paranoia setting permits unprivileged
- * users to profile the kernel.
- */
- if (event->attr.exclude_kernel && perf_paranoid_kernel() &&
- !capable(CAP_SYS_ADMIN))
- return -EACCES;
-
- ret = x86_reserve_hardware();
- if (ret) {
- x86_del_exclusive(x86_lbr_exclusive_bts);
- return ret;
- }
-
- event->destroy = bts_event_destroy;
-
- return 0;
-}
-
-static void bts_event_read(struct perf_event *event)
-{
-}
-
-static __init int bts_init(void)
-{
- if (!boot_cpu_has(X86_FEATURE_DTES64) || !x86_pmu.bts)
- return -ENODEV;
-
- bts_pmu.capabilities = PERF_PMU_CAP_AUX_NO_SG | PERF_PMU_CAP_ITRACE;
- bts_pmu.task_ctx_nr = perf_sw_context;
- bts_pmu.event_init = bts_event_init;
- bts_pmu.add = bts_event_add;
- bts_pmu.del = bts_event_del;
- bts_pmu.start = bts_event_start;
- bts_pmu.stop = bts_event_stop;
- bts_pmu.read = bts_event_read;
- bts_pmu.setup_aux = bts_buffer_setup_aux;
- bts_pmu.free_aux = bts_buffer_free_aux;
-
- return perf_pmu_register(&bts_pmu, "intel_bts", -1);
-}
-arch_initcall(bts_init);
diff --git a/arch/x86/kernel/cpu/perf_event_intel_cqm.c b/arch/x86/kernel/cpu/perf_event_intel_cqm.c
deleted file mode 100644
index a316ca96f..000000000
--- a/arch/x86/kernel/cpu/perf_event_intel_cqm.c
+++ /dev/null
@@ -1,1391 +0,0 @@
-/*
- * Intel Cache Quality-of-Service Monitoring (CQM) support.
- *
- * Based very, very heavily on work by Peter Zijlstra.
- */
-
-#include <linux/perf_event.h>
-#include <linux/slab.h>
-#include <asm/cpu_device_id.h>
-#include "perf_event.h"
-
-#define MSR_IA32_PQR_ASSOC 0x0c8f
-#define MSR_IA32_QM_CTR 0x0c8e
-#define MSR_IA32_QM_EVTSEL 0x0c8d
-
-static u32 cqm_max_rmid = -1;
-static unsigned int cqm_l3_scale; /* supposedly cacheline size */
-
-/**
- * struct intel_pqr_state - State cache for the PQR MSR
- * @rmid: The cached Resource Monitoring ID
- * @closid: The cached Class Of Service ID
- * @rmid_usecnt: The usage counter for rmid
- *
- * The upper 32 bits of MSR_IA32_PQR_ASSOC contain closid and the
- * lower 10 bits rmid. The update to MSR_IA32_PQR_ASSOC always
- * contains both parts, so we need to cache them.
- *
- * The cache also helps to avoid pointless updates if the value does
- * not change.
- */
-struct intel_pqr_state {
- u32 rmid;
- u32 closid;
- int rmid_usecnt;
-};
-
-/*
- * The cached intel_pqr_state is strictly per CPU and can never be
- * updated from a remote CPU. Both functions which modify the state
- * (intel_cqm_event_start and intel_cqm_event_stop) are called with
- * interrupts disabled, which is sufficient for the protection.
- */
-static DEFINE_PER_CPU(struct intel_pqr_state, pqr_state);
-
-/*
- * Protects cache_cgroups and cqm_rmid_free_lru and cqm_rmid_limbo_lru.
- * Also protects event->hw.cqm_rmid
- *
- * Hold either for stability, both for modification of ->hw.cqm_rmid.
- */
-static DEFINE_MUTEX(cache_mutex);
-static DEFINE_RAW_SPINLOCK(cache_lock);
-
-/*
- * Groups of events that have the same target(s), one RMID per group.
- */
-static LIST_HEAD(cache_groups);
-
-/*
- * Mask of CPUs for reading CQM values. We only need one per-socket.
- */
-static cpumask_t cqm_cpumask;
-
-#define RMID_VAL_ERROR (1ULL << 63)
-#define RMID_VAL_UNAVAIL (1ULL << 62)
-
-#define QOS_L3_OCCUP_EVENT_ID (1 << 0)
-
-#define QOS_EVENT_MASK QOS_L3_OCCUP_EVENT_ID
-
-/*
- * This is central to the rotation algorithm in __intel_cqm_rmid_rotate().
- *
- * This rmid is always free and is guaranteed to have an associated
- * near-zero occupancy value, i.e. no cachelines are tagged with this
- * RMID, once __intel_cqm_rmid_rotate() returns.
- */
-static u32 intel_cqm_rotation_rmid;
-
-#define INVALID_RMID (-1)
-
-/*
- * Is @rmid valid for programming the hardware?
- *
- * rmid 0 is reserved by the hardware for all non-monitored tasks, which
- * means that we should never come across an rmid with that value.
- * Likewise, an rmid value of -1 is used to indicate "no rmid currently
- * assigned" and is used as part of the rotation code.
- */
-static inline bool __rmid_valid(u32 rmid)
-{
- if (!rmid || rmid == INVALID_RMID)
- return false;
-
- return true;
-}
-
-static u64 __rmid_read(u32 rmid)
-{
- u64 val;
-
- /*
- * Ignore the SDM, this thing is _NOTHING_ like a regular perfcnt,
- * it just says that to increase confusion.
- */
- wrmsr(MSR_IA32_QM_EVTSEL, QOS_L3_OCCUP_EVENT_ID, rmid);
- rdmsrl(MSR_IA32_QM_CTR, val);
-
- /*
- * Aside from the ERROR and UNAVAIL bits, assume this thing returns
- * the number of cachelines tagged with @rmid.
- */
- return val;
-}
-
-enum rmid_recycle_state {
- RMID_YOUNG = 0,
- RMID_AVAILABLE,
- RMID_DIRTY,
-};
-
-struct cqm_rmid_entry {
- u32 rmid;
- enum rmid_recycle_state state;
- struct list_head list;
- unsigned long queue_time;
-};
-
-/*
- * cqm_rmid_free_lru - A least recently used list of RMIDs.
- *
- * Oldest entry at the head, newest (most recently used) entry at the
- * tail. This list is never traversed, it's only used to keep track of
- * the lru order. That is, we only pick entries of the head or insert
- * them on the tail.
- *
- * All entries on the list are 'free', and their RMIDs are not currently
- * in use. To mark an RMID as in use, remove its entry from the lru
- * list.
- *
- *
- * cqm_rmid_limbo_lru - list of currently unused but (potentially) dirty RMIDs.
- *
- * This list is contains RMIDs that no one is currently using but that
- * may have a non-zero occupancy value associated with them. The
- * rotation worker moves RMIDs from the limbo list to the free list once
- * the occupancy value drops below __intel_cqm_threshold.
- *
- * Both lists are protected by cache_mutex.
- */
-static LIST_HEAD(cqm_rmid_free_lru);
-static LIST_HEAD(cqm_rmid_limbo_lru);
-
-/*
- * We use a simple array of pointers so that we can lookup a struct
- * cqm_rmid_entry in O(1). This alleviates the callers of __get_rmid()
- * and __put_rmid() from having to worry about dealing with struct
- * cqm_rmid_entry - they just deal with rmids, i.e. integers.
- *
- * Once this array is initialized it is read-only. No locks are required
- * to access it.
- *
- * All entries for all RMIDs can be looked up in the this array at all
- * times.
- */
-static struct cqm_rmid_entry **cqm_rmid_ptrs;
-
-static inline struct cqm_rmid_entry *__rmid_entry(u32 rmid)
-{
- struct cqm_rmid_entry *entry;
-
- entry = cqm_rmid_ptrs[rmid];
- WARN_ON(entry->rmid != rmid);
-
- return entry;
-}
-
-/*
- * Returns < 0 on fail.
- *
- * We expect to be called with cache_mutex held.
- */
-static u32 __get_rmid(void)
-{
- struct cqm_rmid_entry *entry;
-
- lockdep_assert_held(&cache_mutex);
-
- if (list_empty(&cqm_rmid_free_lru))
- return INVALID_RMID;
-
- entry = list_first_entry(&cqm_rmid_free_lru, struct cqm_rmid_entry, list);
- list_del(&entry->list);
-
- return entry->rmid;
-}
-
-static void __put_rmid(u32 rmid)
-{
- struct cqm_rmid_entry *entry;
-
- lockdep_assert_held(&cache_mutex);
-
- WARN_ON(!__rmid_valid(rmid));
- entry = __rmid_entry(rmid);
-
- entry->queue_time = jiffies;
- entry->state = RMID_YOUNG;
-
- list_add_tail(&entry->list, &cqm_rmid_limbo_lru);
-}
-
-static int intel_cqm_setup_rmid_cache(void)
-{
- struct cqm_rmid_entry *entry;
- unsigned int nr_rmids;
- int r = 0;
-
- nr_rmids = cqm_max_rmid + 1;
- cqm_rmid_ptrs = kmalloc(sizeof(struct cqm_rmid_entry *) *
- nr_rmids, GFP_KERNEL);
- if (!cqm_rmid_ptrs)
- return -ENOMEM;
-
- for (; r <= cqm_max_rmid; r++) {
- struct cqm_rmid_entry *entry;
-
- entry = kmalloc(sizeof(*entry), GFP_KERNEL);
- if (!entry)
- goto fail;
-
- INIT_LIST_HEAD(&entry->list);
- entry->rmid = r;
- cqm_rmid_ptrs[r] = entry;
-
- list_add_tail(&entry->list, &cqm_rmid_free_lru);
- }
-
- /*
- * RMID 0 is special and is always allocated. It's used for all
- * tasks that are not monitored.
- */
- entry = __rmid_entry(0);
- list_del(&entry->list);
-
- mutex_lock(&cache_mutex);
- intel_cqm_rotation_rmid = __get_rmid();
- mutex_unlock(&cache_mutex);
-
- return 0;
-fail:
- while (r--)
- kfree(cqm_rmid_ptrs[r]);
-
- kfree(cqm_rmid_ptrs);
- return -ENOMEM;
-}
-
-/*
- * Determine if @a and @b measure the same set of tasks.
- *
- * If @a and @b measure the same set of tasks then we want to share a
- * single RMID.
- */
-static bool __match_event(struct perf_event *a, struct perf_event *b)
-{
- /* Per-cpu and task events don't mix */
- if ((a->attach_state & PERF_ATTACH_TASK) !=
- (b->attach_state & PERF_ATTACH_TASK))
- return false;
-
-#ifdef CONFIG_CGROUP_PERF
- if (a->cgrp != b->cgrp)
- return false;
-#endif
-
- /* If not task event, we're machine wide */
- if (!(b->attach_state & PERF_ATTACH_TASK))
- return true;
-
- /*
- * Events that target same task are placed into the same cache group.
- */
- if (a->hw.target == b->hw.target)
- return true;
-
- /*
- * Are we an inherited event?
- */
- if (b->parent == a)
- return true;
-
- return false;
-}
-
-#ifdef CONFIG_CGROUP_PERF
-static inline struct perf_cgroup *event_to_cgroup(struct perf_event *event)
-{
- if (event->attach_state & PERF_ATTACH_TASK)
- return perf_cgroup_from_task(event->hw.target, event->ctx);
-
- return event->cgrp;
-}
-#endif
-
-/*
- * Determine if @a's tasks intersect with @b's tasks
- *
- * There are combinations of events that we explicitly prohibit,
- *
- * PROHIBITS
- * system-wide -> cgroup and task
- * cgroup -> system-wide
- * -> task in cgroup
- * task -> system-wide
- * -> task in cgroup
- *
- * Call this function before allocating an RMID.
- */
-static bool __conflict_event(struct perf_event *a, struct perf_event *b)
-{
-#ifdef CONFIG_CGROUP_PERF
- /*
- * We can have any number of cgroups but only one system-wide
- * event at a time.
- */
- if (a->cgrp && b->cgrp) {
- struct perf_cgroup *ac = a->cgrp;
- struct perf_cgroup *bc = b->cgrp;
-
- /*
- * This condition should have been caught in
- * __match_event() and we should be sharing an RMID.
- */
- WARN_ON_ONCE(ac == bc);
-
- if (cgroup_is_descendant(ac->css.cgroup, bc->css.cgroup) ||
- cgroup_is_descendant(bc->css.cgroup, ac->css.cgroup))
- return true;
-
- return false;
- }
-
- if (a->cgrp || b->cgrp) {
- struct perf_cgroup *ac, *bc;
-
- /*
- * cgroup and system-wide events are mutually exclusive
- */
- if ((a->cgrp && !(b->attach_state & PERF_ATTACH_TASK)) ||
- (b->cgrp && !(a->attach_state & PERF_ATTACH_TASK)))
- return true;
-
- /*
- * Ensure neither event is part of the other's cgroup
- */
- ac = event_to_cgroup(a);
- bc = event_to_cgroup(b);
- if (ac == bc)
- return true;
-
- /*
- * Must have cgroup and non-intersecting task events.
- */
- if (!ac || !bc)
- return false;
-
- /*
- * We have cgroup and task events, and the task belongs
- * to a cgroup. Check for for overlap.
- */
- if (cgroup_is_descendant(ac->css.cgroup, bc->css.cgroup) ||
- cgroup_is_descendant(bc->css.cgroup, ac->css.cgroup))
- return true;
-
- return false;
- }
-#endif
- /*
- * If one of them is not a task, same story as above with cgroups.
- */
- if (!(a->attach_state & PERF_ATTACH_TASK) ||
- !(b->attach_state & PERF_ATTACH_TASK))
- return true;
-
- /*
- * Must be non-overlapping.
- */
- return false;
-}
-
-struct rmid_read {
- u32 rmid;
- atomic64_t value;
-};
-
-static void __intel_cqm_event_count(void *info);
-
-/*
- * Exchange the RMID of a group of events.
- */
-static u32 intel_cqm_xchg_rmid(struct perf_event *group, u32 rmid)
-{
- struct perf_event *event;
- struct list_head *head = &group->hw.cqm_group_entry;
- u32 old_rmid = group->hw.cqm_rmid;
-
- lockdep_assert_held(&cache_mutex);
-
- /*
- * If our RMID is being deallocated, perform a read now.
- */
- if (__rmid_valid(old_rmid) && !__rmid_valid(rmid)) {
- struct rmid_read rr = {
- .value = ATOMIC64_INIT(0),
- .rmid = old_rmid,
- };
-
- on_each_cpu_mask(&cqm_cpumask, __intel_cqm_event_count,
- &rr, 1);
- local64_set(&group->count, atomic64_read(&rr.value));
- }
-
- raw_spin_lock_irq(&cache_lock);
-
- group->hw.cqm_rmid = rmid;
- list_for_each_entry(event, head, hw.cqm_group_entry)
- event->hw.cqm_rmid = rmid;
-
- raw_spin_unlock_irq(&cache_lock);
-
- return old_rmid;
-}
-
-/*
- * If we fail to assign a new RMID for intel_cqm_rotation_rmid because
- * cachelines are still tagged with RMIDs in limbo, we progressively
- * increment the threshold until we find an RMID in limbo with <=
- * __intel_cqm_threshold lines tagged. This is designed to mitigate the
- * problem where cachelines tagged with an RMID are not steadily being
- * evicted.
- *
- * On successful rotations we decrease the threshold back towards zero.
- *
- * __intel_cqm_max_threshold provides an upper bound on the threshold,
- * and is measured in bytes because it's exposed to userland.
- */
-static unsigned int __intel_cqm_threshold;
-static unsigned int __intel_cqm_max_threshold;
-
-/*
- * Test whether an RMID has a zero occupancy value on this cpu.
- */
-static void intel_cqm_stable(void *arg)
-{
- struct cqm_rmid_entry *entry;
-
- list_for_each_entry(entry, &cqm_rmid_limbo_lru, list) {
- if (entry->state != RMID_AVAILABLE)
- break;
-
- if (__rmid_read(entry->rmid) > __intel_cqm_threshold)
- entry->state = RMID_DIRTY;
- }
-}
-
-/*
- * If we have group events waiting for an RMID that don't conflict with
- * events already running, assign @rmid.
- */
-static bool intel_cqm_sched_in_event(u32 rmid)
-{
- struct perf_event *leader, *event;
-
- lockdep_assert_held(&cache_mutex);
-
- leader = list_first_entry(&cache_groups, struct perf_event,
- hw.cqm_groups_entry);
- event = leader;
-
- list_for_each_entry_continue(event, &cache_groups,
- hw.cqm_groups_entry) {
- if (__rmid_valid(event->hw.cqm_rmid))
- continue;
-
- if (__conflict_event(event, leader))
- continue;
-
- intel_cqm_xchg_rmid(event, rmid);
- return true;
- }
-
- return false;
-}
-
-/*
- * Initially use this constant for both the limbo queue time and the
- * rotation timer interval, pmu::hrtimer_interval_ms.
- *
- * They don't need to be the same, but the two are related since if you
- * rotate faster than you recycle RMIDs, you may run out of available
- * RMIDs.
- */
-#define RMID_DEFAULT_QUEUE_TIME 250 /* ms */
-
-static unsigned int __rmid_queue_time_ms = RMID_DEFAULT_QUEUE_TIME;
-
-/*
- * intel_cqm_rmid_stabilize - move RMIDs from limbo to free list
- * @nr_available: number of freeable RMIDs on the limbo list
- *
- * Quiescent state; wait for all 'freed' RMIDs to become unused, i.e. no
- * cachelines are tagged with those RMIDs. After this we can reuse them
- * and know that the current set of active RMIDs is stable.
- *
- * Return %true or %false depending on whether stabilization needs to be
- * reattempted.
- *
- * If we return %true then @nr_available is updated to indicate the
- * number of RMIDs on the limbo list that have been queued for the
- * minimum queue time (RMID_AVAILABLE), but whose data occupancy values
- * are above __intel_cqm_threshold.
- */
-static bool intel_cqm_rmid_stabilize(unsigned int *available)
-{
- struct cqm_rmid_entry *entry, *tmp;
-
- lockdep_assert_held(&cache_mutex);
-
- *available = 0;
- list_for_each_entry(entry, &cqm_rmid_limbo_lru, list) {
- unsigned long min_queue_time;
- unsigned long now = jiffies;
-
- /*
- * We hold RMIDs placed into limbo for a minimum queue
- * time. Before the minimum queue time has elapsed we do
- * not recycle RMIDs.
- *
- * The reasoning is that until a sufficient time has
- * passed since we stopped using an RMID, any RMID
- * placed onto the limbo list will likely still have
- * data tagged in the cache, which means we'll probably
- * fail to recycle it anyway.
- *
- * We can save ourselves an expensive IPI by skipping
- * any RMIDs that have not been queued for the minimum
- * time.
- */
- min_queue_time = entry->queue_time +
- msecs_to_jiffies(__rmid_queue_time_ms);
-
- if (time_after(min_queue_time, now))
- break;
-
- entry->state = RMID_AVAILABLE;
- (*available)++;
- }
-
- /*
- * Fast return if none of the RMIDs on the limbo list have been
- * sitting on the queue for the minimum queue time.
- */
- if (!*available)
- return false;
-
- /*
- * Test whether an RMID is free for each package.
- */
- on_each_cpu_mask(&cqm_cpumask, intel_cqm_stable, NULL, true);
-
- list_for_each_entry_safe(entry, tmp, &cqm_rmid_limbo_lru, list) {
- /*
- * Exhausted all RMIDs that have waited min queue time.
- */
- if (entry->state == RMID_YOUNG)
- break;
-
- if (entry->state == RMID_DIRTY)
- continue;
-
- list_del(&entry->list); /* remove from limbo */
-
- /*
- * The rotation RMID gets priority if it's
- * currently invalid. In which case, skip adding
- * the RMID to the the free lru.
- */
- if (!__rmid_valid(intel_cqm_rotation_rmid)) {
- intel_cqm_rotation_rmid = entry->rmid;
- continue;
- }
-
- /*
- * If we have groups waiting for RMIDs, hand
- * them one now provided they don't conflict.
- */
- if (intel_cqm_sched_in_event(entry->rmid))
- continue;
-
- /*
- * Otherwise place it onto the free list.
- */
- list_add_tail(&entry->list, &cqm_rmid_free_lru);
- }
-
-
- return __rmid_valid(intel_cqm_rotation_rmid);
-}
-
-/*
- * Pick a victim group and move it to the tail of the group list.
- * @next: The first group without an RMID
- */
-static void __intel_cqm_pick_and_rotate(struct perf_event *next)
-{
- struct perf_event *rotor;
- u32 rmid;
-
- lockdep_assert_held(&cache_mutex);
-
- rotor = list_first_entry(&cache_groups, struct perf_event,
- hw.cqm_groups_entry);
-
- /*
- * The group at the front of the list should always have a valid
- * RMID. If it doesn't then no groups have RMIDs assigned and we
- * don't need to rotate the list.
- */
- if (next == rotor)
- return;
-
- rmid = intel_cqm_xchg_rmid(rotor, INVALID_RMID);
- __put_rmid(rmid);
-
- list_rotate_left(&cache_groups);
-}
-
-/*
- * Deallocate the RMIDs from any events that conflict with @event, and
- * place them on the back of the group list.
- */
-static void intel_cqm_sched_out_conflicting_events(struct perf_event *event)
-{
- struct perf_event *group, *g;
- u32 rmid;
-
- lockdep_assert_held(&cache_mutex);
-
- list_for_each_entry_safe(group, g, &cache_groups, hw.cqm_groups_entry) {
- if (group == event)
- continue;
-
- rmid = group->hw.cqm_rmid;
-
- /*
- * Skip events that don't have a valid RMID.
- */
- if (!__rmid_valid(rmid))
- continue;
-
- /*
- * No conflict? No problem! Leave the event alone.
- */
- if (!__conflict_event(group, event))
- continue;
-
- intel_cqm_xchg_rmid(group, INVALID_RMID);
- __put_rmid(rmid);
- }
-}
-
-/*
- * Attempt to rotate the groups and assign new RMIDs.
- *
- * We rotate for two reasons,
- * 1. To handle the scheduling of conflicting events
- * 2. To recycle RMIDs
- *
- * Rotating RMIDs is complicated because the hardware doesn't give us
- * any clues.
- *
- * There's problems with the hardware interface; when you change the
- * task:RMID map cachelines retain their 'old' tags, giving a skewed
- * picture. In order to work around this, we must always keep one free
- * RMID - intel_cqm_rotation_rmid.
- *
- * Rotation works by taking away an RMID from a group (the old RMID),
- * and assigning the free RMID to another group (the new RMID). We must
- * then wait for the old RMID to not be used (no cachelines tagged).
- * This ensure that all cachelines are tagged with 'active' RMIDs. At
- * this point we can start reading values for the new RMID and treat the
- * old RMID as the free RMID for the next rotation.
- *
- * Return %true or %false depending on whether we did any rotating.
- */
-static bool __intel_cqm_rmid_rotate(void)
-{
- struct perf_event *group, *start = NULL;
- unsigned int threshold_limit;
- unsigned int nr_needed = 0;
- unsigned int nr_available;
- bool rotated = false;
-
- mutex_lock(&cache_mutex);
-
-again:
- /*
- * Fast path through this function if there are no groups and no
- * RMIDs that need cleaning.
- */
- if (list_empty(&cache_groups) && list_empty(&cqm_rmid_limbo_lru))
- goto out;
-
- list_for_each_entry(group, &cache_groups, hw.cqm_groups_entry) {
- if (!__rmid_valid(group->hw.cqm_rmid)) {
- if (!start)
- start = group;
- nr_needed++;
- }
- }
-
- /*
- * We have some event groups, but they all have RMIDs assigned
- * and no RMIDs need cleaning.
- */
- if (!nr_needed && list_empty(&cqm_rmid_limbo_lru))
- goto out;
-
- if (!nr_needed)
- goto stabilize;
-
- /*
- * We have more event groups without RMIDs than available RMIDs,
- * or we have event groups that conflict with the ones currently
- * scheduled.
- *
- * We force deallocate the rmid of the group at the head of
- * cache_groups. The first event group without an RMID then gets
- * assigned intel_cqm_rotation_rmid. This ensures we always make
- * forward progress.
- *
- * Rotate the cache_groups list so the previous head is now the
- * tail.
- */
- __intel_cqm_pick_and_rotate(start);
-
- /*
- * If the rotation is going to succeed, reduce the threshold so
- * that we don't needlessly reuse dirty RMIDs.
- */
- if (__rmid_valid(intel_cqm_rotation_rmid)) {
- intel_cqm_xchg_rmid(start, intel_cqm_rotation_rmid);
- intel_cqm_rotation_rmid = __get_rmid();
-
- intel_cqm_sched_out_conflicting_events(start);
-
- if (__intel_cqm_threshold)
- __intel_cqm_threshold--;
- }
-
- rotated = true;
-
-stabilize:
- /*
- * We now need to stablize the RMID we freed above (if any) to
- * ensure that the next time we rotate we have an RMID with zero
- * occupancy value.
- *
- * Alternatively, if we didn't need to perform any rotation,
- * we'll have a bunch of RMIDs in limbo that need stabilizing.
- */
- threshold_limit = __intel_cqm_max_threshold / cqm_l3_scale;
-
- while (intel_cqm_rmid_stabilize(&nr_available) &&
- __intel_cqm_threshold < threshold_limit) {
- unsigned int steal_limit;
-
- /*
- * Don't spin if nobody is actively waiting for an RMID,
- * the rotation worker will be kicked as soon as an
- * event needs an RMID anyway.
- */
- if (!nr_needed)
- break;
-
- /* Allow max 25% of RMIDs to be in limbo. */
- steal_limit = (cqm_max_rmid + 1) / 4;
-
- /*
- * We failed to stabilize any RMIDs so our rotation
- * logic is now stuck. In order to make forward progress
- * we have a few options:
- *
- * 1. rotate ("steal") another RMID
- * 2. increase the threshold
- * 3. do nothing
- *
- * We do both of 1. and 2. until we hit the steal limit.
- *
- * The steal limit prevents all RMIDs ending up on the
- * limbo list. This can happen if every RMID has a
- * non-zero occupancy above threshold_limit, and the
- * occupancy values aren't dropping fast enough.
- *
- * Note that there is prioritisation at work here - we'd
- * rather increase the number of RMIDs on the limbo list
- * than increase the threshold, because increasing the
- * threshold skews the event data (because we reuse
- * dirty RMIDs) - threshold bumps are a last resort.
- */
- if (nr_available < steal_limit)
- goto again;
-
- __intel_cqm_threshold++;
- }
-
-out:
- mutex_unlock(&cache_mutex);
- return rotated;
-}
-
-static void intel_cqm_rmid_rotate(struct work_struct *work);
-
-static DECLARE_DELAYED_WORK(intel_cqm_rmid_work, intel_cqm_rmid_rotate);
-
-static struct pmu intel_cqm_pmu;
-
-static void intel_cqm_rmid_rotate(struct work_struct *work)
-{
- unsigned long delay;
-
- __intel_cqm_rmid_rotate();
-
- delay = msecs_to_jiffies(intel_cqm_pmu.hrtimer_interval_ms);
- schedule_delayed_work(&intel_cqm_rmid_work, delay);
-}
-
-/*
- * Find a group and setup RMID.
- *
- * If we're part of a group, we use the group's RMID.
- */
-static void intel_cqm_setup_event(struct perf_event *event,
- struct perf_event **group)
-{
- struct perf_event *iter;
- bool conflict = false;
- u32 rmid;
-
- list_for_each_entry(iter, &cache_groups, hw.cqm_groups_entry) {
- rmid = iter->hw.cqm_rmid;
-
- if (__match_event(iter, event)) {
- /* All tasks in a group share an RMID */
- event->hw.cqm_rmid = rmid;
- *group = iter;
- return;
- }
-
- /*
- * We only care about conflicts for events that are
- * actually scheduled in (and hence have a valid RMID).
- */
- if (__conflict_event(iter, event) && __rmid_valid(rmid))
- conflict = true;
- }
-
- if (conflict)
- rmid = INVALID_RMID;
- else
- rmid = __get_rmid();
-
- event->hw.cqm_rmid = rmid;
-}
-
-static void intel_cqm_event_read(struct perf_event *event)
-{
- unsigned long flags;
- u32 rmid;
- u64 val;
-
- /*
- * Task events are handled by intel_cqm_event_count().
- */
- if (event->cpu == -1)
- return;
-
- raw_spin_lock_irqsave(&cache_lock, flags);
- rmid = event->hw.cqm_rmid;
-
- if (!__rmid_valid(rmid))
- goto out;
-
- val = __rmid_read(rmid);
-
- /*
- * Ignore this reading on error states and do not update the value.
- */
- if (val & (RMID_VAL_ERROR | RMID_VAL_UNAVAIL))
- goto out;
-
- local64_set(&event->count, val);
-out:
- raw_spin_unlock_irqrestore(&cache_lock, flags);
-}
-
-static void __intel_cqm_event_count(void *info)
-{
- struct rmid_read *rr = info;
- u64 val;
-
- val = __rmid_read(rr->rmid);
-
- if (val & (RMID_VAL_ERROR | RMID_VAL_UNAVAIL))
- return;
-
- atomic64_add(val, &rr->value);
-}
-
-static inline bool cqm_group_leader(struct perf_event *event)
-{
- return !list_empty(&event->hw.cqm_groups_entry);
-}
-
-static u64 intel_cqm_event_count(struct perf_event *event)
-{
- unsigned long flags;
- struct rmid_read rr = {
- .value = ATOMIC64_INIT(0),
- };
-
- /*
- * We only need to worry about task events. System-wide events
- * are handled like usual, i.e. entirely with
- * intel_cqm_event_read().
- */
- if (event->cpu != -1)
- return __perf_event_count(event);
-
- /*
- * Only the group leader gets to report values. This stops us
- * reporting duplicate values to userspace, and gives us a clear
- * rule for which task gets to report the values.
- *
- * Note that it is impossible to attribute these values to
- * specific packages - we forfeit that ability when we create
- * task events.
- */
- if (!cqm_group_leader(event))
- return 0;
-
- /*
- * Getting up-to-date values requires an SMP IPI which is not
- * possible if we're being called in interrupt context. Return
- * the cached values instead.
- */
- if (unlikely(in_interrupt()))
- goto out;
-
- /*
- * Notice that we don't perform the reading of an RMID
- * atomically, because we can't hold a spin lock across the
- * IPIs.
- *
- * Speculatively perform the read, since @event might be
- * assigned a different (possibly invalid) RMID while we're
- * busying performing the IPI calls. It's therefore necessary to
- * check @event's RMID afterwards, and if it has changed,
- * discard the result of the read.
- */
- rr.rmid = ACCESS_ONCE(event->hw.cqm_rmid);
-
- if (!__rmid_valid(rr.rmid))
- goto out;
-
- on_each_cpu_mask(&cqm_cpumask, __intel_cqm_event_count, &rr, 1);
-
- raw_spin_lock_irqsave(&cache_lock, flags);
- if (event->hw.cqm_rmid == rr.rmid)
- local64_set(&event->count, atomic64_read(&rr.value));
- raw_spin_unlock_irqrestore(&cache_lock, flags);
-out:
- return __perf_event_count(event);
-}
-
-static void intel_cqm_event_start(struct perf_event *event, int mode)
-{
- struct intel_pqr_state *state = this_cpu_ptr(&pqr_state);
- u32 rmid = event->hw.cqm_rmid;
-
- if (!(event->hw.cqm_state & PERF_HES_STOPPED))
- return;
-
- event->hw.cqm_state &= ~PERF_HES_STOPPED;
-
- if (state->rmid_usecnt++) {
- if (!WARN_ON_ONCE(state->rmid != rmid))
- return;
- } else {
- WARN_ON_ONCE(state->rmid);
- }
-
- state->rmid = rmid;
- wrmsr(MSR_IA32_PQR_ASSOC, rmid, state->closid);
-}
-
-static void intel_cqm_event_stop(struct perf_event *event, int mode)
-{
- struct intel_pqr_state *state = this_cpu_ptr(&pqr_state);
-
- if (event->hw.cqm_state & PERF_HES_STOPPED)
- return;
-
- event->hw.cqm_state |= PERF_HES_STOPPED;
-
- intel_cqm_event_read(event);
-
- if (!--state->rmid_usecnt) {
- state->rmid = 0;
- wrmsr(MSR_IA32_PQR_ASSOC, 0, state->closid);
- } else {
- WARN_ON_ONCE(!state->rmid);
- }
-}
-
-static int intel_cqm_event_add(struct perf_event *event, int mode)
-{
- unsigned long flags;
- u32 rmid;
-
- raw_spin_lock_irqsave(&cache_lock, flags);
-
- event->hw.cqm_state = PERF_HES_STOPPED;
- rmid = event->hw.cqm_rmid;
-
- if (__rmid_valid(rmid) && (mode & PERF_EF_START))
- intel_cqm_event_start(event, mode);
-
- raw_spin_unlock_irqrestore(&cache_lock, flags);
-
- return 0;
-}
-
-static void intel_cqm_event_destroy(struct perf_event *event)
-{
- struct perf_event *group_other = NULL;
-
- mutex_lock(&cache_mutex);
-
- /*
- * If there's another event in this group...
- */
- if (!list_empty(&event->hw.cqm_group_entry)) {
- group_other = list_first_entry(&event->hw.cqm_group_entry,
- struct perf_event,
- hw.cqm_group_entry);
- list_del(&event->hw.cqm_group_entry);
- }
-
- /*
- * And we're the group leader..
- */
- if (cqm_group_leader(event)) {
- /*
- * If there was a group_other, make that leader, otherwise
- * destroy the group and return the RMID.
- */
- if (group_other) {
- list_replace(&event->hw.cqm_groups_entry,
- &group_other->hw.cqm_groups_entry);
- } else {
- u32 rmid = event->hw.cqm_rmid;
-
- if (__rmid_valid(rmid))
- __put_rmid(rmid);
- list_del(&event->hw.cqm_groups_entry);
- }
- }
-
- mutex_unlock(&cache_mutex);
-}
-
-static int intel_cqm_event_init(struct perf_event *event)
-{
- struct perf_event *group = NULL;
- bool rotate = false;
-
- if (event->attr.type != intel_cqm_pmu.type)
- return -ENOENT;
-
- if (event->attr.config & ~QOS_EVENT_MASK)
- return -EINVAL;
-
- /* unsupported modes and filters */
- if (event->attr.exclude_user ||
- event->attr.exclude_kernel ||
- event->attr.exclude_hv ||
- event->attr.exclude_idle ||
- event->attr.exclude_host ||
- event->attr.exclude_guest ||
- event->attr.sample_period) /* no sampling */
- return -EINVAL;
-
- INIT_LIST_HEAD(&event->hw.cqm_group_entry);
- INIT_LIST_HEAD(&event->hw.cqm_groups_entry);
-
- event->destroy = intel_cqm_event_destroy;
-
- mutex_lock(&cache_mutex);
-
- /* Will also set rmid */
- intel_cqm_setup_event(event, &group);
-
- if (group) {
- list_add_tail(&event->hw.cqm_group_entry,
- &group->hw.cqm_group_entry);
- } else {
- list_add_tail(&event->hw.cqm_groups_entry,
- &cache_groups);
-
- /*
- * All RMIDs are either in use or have recently been
- * used. Kick the rotation worker to clean/free some.
- *
- * We only do this for the group leader, rather than for
- * every event in a group to save on needless work.
- */
- if (!__rmid_valid(event->hw.cqm_rmid))
- rotate = true;
- }
-
- mutex_unlock(&cache_mutex);
-
- if (rotate)
- schedule_delayed_work(&intel_cqm_rmid_work, 0);
-
- return 0;
-}
-
-EVENT_ATTR_STR(llc_occupancy, intel_cqm_llc, "event=0x01");
-EVENT_ATTR_STR(llc_occupancy.per-pkg, intel_cqm_llc_pkg, "1");
-EVENT_ATTR_STR(llc_occupancy.unit, intel_cqm_llc_unit, "Bytes");
-EVENT_ATTR_STR(llc_occupancy.scale, intel_cqm_llc_scale, NULL);
-EVENT_ATTR_STR(llc_occupancy.snapshot, intel_cqm_llc_snapshot, "1");
-
-static struct attribute *intel_cqm_events_attr[] = {
- EVENT_PTR(intel_cqm_llc),
- EVENT_PTR(intel_cqm_llc_pkg),
- EVENT_PTR(intel_cqm_llc_unit),
- EVENT_PTR(intel_cqm_llc_scale),
- EVENT_PTR(intel_cqm_llc_snapshot),
- NULL,
-};
-
-static struct attribute_group intel_cqm_events_group = {
- .name = "events",
- .attrs = intel_cqm_events_attr,
-};
-
-PMU_FORMAT_ATTR(event, "config:0-7");
-static struct attribute *intel_cqm_formats_attr[] = {
- &format_attr_event.attr,
- NULL,
-};
-
-static struct attribute_group intel_cqm_format_group = {
- .name = "format",
- .attrs = intel_cqm_formats_attr,
-};
-
-static ssize_t
-max_recycle_threshold_show(struct device *dev, struct device_attribute *attr,
- char *page)
-{
- ssize_t rv;
-
- mutex_lock(&cache_mutex);
- rv = snprintf(page, PAGE_SIZE-1, "%u\n", __intel_cqm_max_threshold);
- mutex_unlock(&cache_mutex);
-
- return rv;
-}
-
-static ssize_t
-max_recycle_threshold_store(struct device *dev,
- struct device_attribute *attr,
- const char *buf, size_t count)
-{
- unsigned int bytes, cachelines;
- int ret;
-
- ret = kstrtouint(buf, 0, &bytes);
- if (ret)
- return ret;
-
- mutex_lock(&cache_mutex);
-
- __intel_cqm_max_threshold = bytes;
- cachelines = bytes / cqm_l3_scale;
-
- /*
- * The new maximum takes effect immediately.
- */
- if (__intel_cqm_threshold > cachelines)
- __intel_cqm_threshold = cachelines;
-
- mutex_unlock(&cache_mutex);
-
- return count;
-}
-
-static DEVICE_ATTR_RW(max_recycle_threshold);
-
-static struct attribute *intel_cqm_attrs[] = {
- &dev_attr_max_recycle_threshold.attr,
- NULL,
-};
-
-static const struct attribute_group intel_cqm_group = {
- .attrs = intel_cqm_attrs,
-};
-
-static const struct attribute_group *intel_cqm_attr_groups[] = {
- &intel_cqm_events_group,
- &intel_cqm_format_group,
- &intel_cqm_group,
- NULL,
-};
-
-static struct pmu intel_cqm_pmu = {
- .hrtimer_interval_ms = RMID_DEFAULT_QUEUE_TIME,
- .attr_groups = intel_cqm_attr_groups,
- .task_ctx_nr = perf_sw_context,
- .event_init = intel_cqm_event_init,
- .add = intel_cqm_event_add,
- .del = intel_cqm_event_stop,
- .start = intel_cqm_event_start,
- .stop = intel_cqm_event_stop,
- .read = intel_cqm_event_read,
- .count = intel_cqm_event_count,
-};
-
-static inline void cqm_pick_event_reader(int cpu)
-{
- int phys_id = topology_physical_package_id(cpu);
- int i;
-
- for_each_cpu(i, &cqm_cpumask) {
- if (phys_id == topology_physical_package_id(i))
- return; /* already got reader for this socket */
- }
-
- cpumask_set_cpu(cpu, &cqm_cpumask);
-}
-
-static void intel_cqm_cpu_starting(unsigned int cpu)
-{
- struct intel_pqr_state *state = &per_cpu(pqr_state, cpu);
- struct cpuinfo_x86 *c = &cpu_data(cpu);
-
- state->rmid = 0;
- state->closid = 0;
- state->rmid_usecnt = 0;
-
- WARN_ON(c->x86_cache_max_rmid != cqm_max_rmid);
- WARN_ON(c->x86_cache_occ_scale != cqm_l3_scale);
-}
-
-static void intel_cqm_cpu_exit(unsigned int cpu)
-{
- int phys_id = topology_physical_package_id(cpu);
- int i;
-
- /*
- * Is @cpu a designated cqm reader?
- */
- if (!cpumask_test_and_clear_cpu(cpu, &cqm_cpumask))
- return;
-
- for_each_online_cpu(i) {
- if (i == cpu)
- continue;
-
- if (phys_id == topology_physical_package_id(i)) {
- cpumask_set_cpu(i, &cqm_cpumask);
- break;
- }
- }
-}
-
-static int intel_cqm_cpu_notifier(struct notifier_block *nb,
- unsigned long action, void *hcpu)
-{
- unsigned int cpu = (unsigned long)hcpu;
-
- switch (action & ~CPU_TASKS_FROZEN) {
- case CPU_DOWN_PREPARE:
- intel_cqm_cpu_exit(cpu);
- break;
- case CPU_STARTING:
- intel_cqm_cpu_starting(cpu);
- cqm_pick_event_reader(cpu);
- break;
- }
-
- return NOTIFY_OK;
-}
-
-static const struct x86_cpu_id intel_cqm_match[] = {
- { .vendor = X86_VENDOR_INTEL, .feature = X86_FEATURE_CQM_OCCUP_LLC },
- {}
-};
-
-static int __init intel_cqm_init(void)
-{
- char *str, scale[20];
- int i, cpu, ret;
-
- if (!x86_match_cpu(intel_cqm_match))
- return -ENODEV;
-
- cqm_l3_scale = boot_cpu_data.x86_cache_occ_scale;
-
- /*
- * It's possible that not all resources support the same number
- * of RMIDs. Instead of making scheduling much more complicated
- * (where we have to match a task's RMID to a cpu that supports
- * that many RMIDs) just find the minimum RMIDs supported across
- * all cpus.
- *
- * Also, check that the scales match on all cpus.
- */
- cpu_notifier_register_begin();
-
- for_each_online_cpu(cpu) {
- struct cpuinfo_x86 *c = &cpu_data(cpu);
-
- if (c->x86_cache_max_rmid < cqm_max_rmid)
- cqm_max_rmid = c->x86_cache_max_rmid;
-
- if (c->x86_cache_occ_scale != cqm_l3_scale) {
- pr_err("Multiple LLC scale values, disabling\n");
- ret = -EINVAL;
- goto out;
- }
- }
-
- /*
- * A reasonable upper limit on the max threshold is the number
- * of lines tagged per RMID if all RMIDs have the same number of
- * lines tagged in the LLC.
- *
- * For a 35MB LLC and 56 RMIDs, this is ~1.8% of the LLC.
- */
- __intel_cqm_max_threshold =
- boot_cpu_data.x86_cache_size * 1024 / (cqm_max_rmid + 1);
-
- snprintf(scale, sizeof(scale), "%u", cqm_l3_scale);
- str = kstrdup(scale, GFP_KERNEL);
- if (!str) {
- ret = -ENOMEM;
- goto out;
- }
-
- event_attr_intel_cqm_llc_scale.event_str = str;
-
- ret = intel_cqm_setup_rmid_cache();
- if (ret)
- goto out;
-
- for_each_online_cpu(i) {
- intel_cqm_cpu_starting(i);
- cqm_pick_event_reader(i);
- }
-
- __perf_cpu_notifier(intel_cqm_cpu_notifier);
-
- ret = perf_pmu_register(&intel_cqm_pmu, "intel_cqm", -1);
- if (ret)
- pr_err("Intel CQM perf registration failed: %d\n", ret);
- else
- pr_info("Intel CQM monitoring enabled\n");
-
-out:
- cpu_notifier_register_done();
-
- return ret;
-}
-device_initcall(intel_cqm_init);
diff --git a/arch/x86/kernel/cpu/perf_event_intel_cstate.c b/arch/x86/kernel/cpu/perf_event_intel_cstate.c
deleted file mode 100644
index 75a38b5a2..000000000
--- a/arch/x86/kernel/cpu/perf_event_intel_cstate.c
+++ /dev/null
@@ -1,694 +0,0 @@
-/*
- * perf_event_intel_cstate.c: support cstate residency counters
- *
- * Copyright (C) 2015, Intel Corp.
- * Author: Kan Liang (kan.liang@intel.com)
- *
- * This library is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Library General Public
- * License as published by the Free Software Foundation; either
- * version 2 of the License, or (at your option) any later version.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Library General Public License for more details.
- *
- */
-
-/*
- * This file export cstate related free running (read-only) counters
- * for perf. These counters may be use simultaneously by other tools,
- * such as turbostat. However, it still make sense to implement them
- * in perf. Because we can conveniently collect them together with
- * other events, and allow to use them from tools without special MSR
- * access code.
- *
- * The events only support system-wide mode counting. There is no
- * sampling support because it is not supported by the hardware.
- *
- * According to counters' scope and category, two PMUs are registered
- * with the perf_event core subsystem.
- * - 'cstate_core': The counter is available for each physical core.
- * The counters include CORE_C*_RESIDENCY.
- * - 'cstate_pkg': The counter is available for each physical package.
- * The counters include PKG_C*_RESIDENCY.
- *
- * All of these counters are specified in the Intel® 64 and IA-32
- * Architectures Software Developer.s Manual Vol3b.
- *
- * Model specific counters:
- * MSR_CORE_C1_RES: CORE C1 Residency Counter
- * perf code: 0x00
- * Available model: SLM,AMT
- * Scope: Core (each processor core has a MSR)
- * MSR_CORE_C3_RESIDENCY: CORE C3 Residency Counter
- * perf code: 0x01
- * Available model: NHM,WSM,SNB,IVB,HSW,BDW,SKL
- * Scope: Core
- * MSR_CORE_C6_RESIDENCY: CORE C6 Residency Counter
- * perf code: 0x02
- * Available model: SLM,AMT,NHM,WSM,SNB,IVB,HSW,BDW,SKL
- * Scope: Core
- * MSR_CORE_C7_RESIDENCY: CORE C7 Residency Counter
- * perf code: 0x03
- * Available model: SNB,IVB,HSW,BDW,SKL
- * Scope: Core
- * MSR_PKG_C2_RESIDENCY: Package C2 Residency Counter.
- * perf code: 0x00
- * Available model: SNB,IVB,HSW,BDW,SKL
- * Scope: Package (physical package)
- * MSR_PKG_C3_RESIDENCY: Package C3 Residency Counter.
- * perf code: 0x01
- * Available model: NHM,WSM,SNB,IVB,HSW,BDW,SKL
- * Scope: Package (physical package)
- * MSR_PKG_C6_RESIDENCY: Package C6 Residency Counter.
- * perf code: 0x02
- * Available model: SLM,AMT,NHM,WSM,SNB,IVB,HSW,BDW,SKL
- * Scope: Package (physical package)
- * MSR_PKG_C7_RESIDENCY: Package C7 Residency Counter.
- * perf code: 0x03
- * Available model: NHM,WSM,SNB,IVB,HSW,BDW,SKL
- * Scope: Package (physical package)
- * MSR_PKG_C8_RESIDENCY: Package C8 Residency Counter.
- * perf code: 0x04
- * Available model: HSW ULT only
- * Scope: Package (physical package)
- * MSR_PKG_C9_RESIDENCY: Package C9 Residency Counter.
- * perf code: 0x05
- * Available model: HSW ULT only
- * Scope: Package (physical package)
- * MSR_PKG_C10_RESIDENCY: Package C10 Residency Counter.
- * perf code: 0x06
- * Available model: HSW ULT only
- * Scope: Package (physical package)
- *
- */
-
-#include <linux/module.h>
-#include <linux/slab.h>
-#include <linux/perf_event.h>
-#include <asm/cpu_device_id.h>
-#include "perf_event.h"
-
-#define DEFINE_CSTATE_FORMAT_ATTR(_var, _name, _format) \
-static ssize_t __cstate_##_var##_show(struct kobject *kobj, \
- struct kobj_attribute *attr, \
- char *page) \
-{ \
- BUILD_BUG_ON(sizeof(_format) >= PAGE_SIZE); \
- return sprintf(page, _format "\n"); \
-} \
-static struct kobj_attribute format_attr_##_var = \
- __ATTR(_name, 0444, __cstate_##_var##_show, NULL)
-
-static ssize_t cstate_get_attr_cpumask(struct device *dev,
- struct device_attribute *attr,
- char *buf);
-
-struct perf_cstate_msr {
- u64 msr;
- struct perf_pmu_events_attr *attr;
- bool (*test)(int idx);
-};
-
-
-/* cstate_core PMU */
-
-static struct pmu cstate_core_pmu;
-static bool has_cstate_core;
-
-enum perf_cstate_core_id {
- /*
- * cstate_core events
- */
- PERF_CSTATE_CORE_C1_RES = 0,
- PERF_CSTATE_CORE_C3_RES,
- PERF_CSTATE_CORE_C6_RES,
- PERF_CSTATE_CORE_C7_RES,
-
- PERF_CSTATE_CORE_EVENT_MAX,
-};
-
-bool test_core(int idx)
-{
- if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL ||
- boot_cpu_data.x86 != 6)
- return false;
-
- switch (boot_cpu_data.x86_model) {
- case 30: /* 45nm Nehalem */
- case 26: /* 45nm Nehalem-EP */
- case 46: /* 45nm Nehalem-EX */
-
- case 37: /* 32nm Westmere */
- case 44: /* 32nm Westmere-EP */
- case 47: /* 32nm Westmere-EX */
- if (idx == PERF_CSTATE_CORE_C3_RES ||
- idx == PERF_CSTATE_CORE_C6_RES)
- return true;
- break;
- case 42: /* 32nm SandyBridge */
- case 45: /* 32nm SandyBridge-E/EN/EP */
-
- case 58: /* 22nm IvyBridge */
- case 62: /* 22nm IvyBridge-EP/EX */
-
- case 60: /* 22nm Haswell Core */
- case 63: /* 22nm Haswell Server */
- case 69: /* 22nm Haswell ULT */
- case 70: /* 22nm Haswell + GT3e (Intel Iris Pro graphics) */
-
- case 61: /* 14nm Broadwell Core-M */
- case 86: /* 14nm Broadwell Xeon D */
- case 71: /* 14nm Broadwell + GT3e (Intel Iris Pro graphics) */
- case 79: /* 14nm Broadwell Server */
-
- case 78: /* 14nm Skylake Mobile */
- case 94: /* 14nm Skylake Desktop */
- if (idx == PERF_CSTATE_CORE_C3_RES ||
- idx == PERF_CSTATE_CORE_C6_RES ||
- idx == PERF_CSTATE_CORE_C7_RES)
- return true;
- break;
- case 55: /* 22nm Atom "Silvermont" */
- case 77: /* 22nm Atom "Silvermont Avoton/Rangely" */
- case 76: /* 14nm Atom "Airmont" */
- if (idx == PERF_CSTATE_CORE_C1_RES ||
- idx == PERF_CSTATE_CORE_C6_RES)
- return true;
- break;
- }
-
- return false;
-}
-
-PMU_EVENT_ATTR_STRING(c1-residency, evattr_cstate_core_c1, "event=0x00");
-PMU_EVENT_ATTR_STRING(c3-residency, evattr_cstate_core_c3, "event=0x01");
-PMU_EVENT_ATTR_STRING(c6-residency, evattr_cstate_core_c6, "event=0x02");
-PMU_EVENT_ATTR_STRING(c7-residency, evattr_cstate_core_c7, "event=0x03");
-
-static struct perf_cstate_msr core_msr[] = {
- [PERF_CSTATE_CORE_C1_RES] = { MSR_CORE_C1_RES, &evattr_cstate_core_c1, test_core, },
- [PERF_CSTATE_CORE_C3_RES] = { MSR_CORE_C3_RESIDENCY, &evattr_cstate_core_c3, test_core, },
- [PERF_CSTATE_CORE_C6_RES] = { MSR_CORE_C6_RESIDENCY, &evattr_cstate_core_c6, test_core, },
- [PERF_CSTATE_CORE_C7_RES] = { MSR_CORE_C7_RESIDENCY, &evattr_cstate_core_c7, test_core, },
-};
-
-static struct attribute *core_events_attrs[PERF_CSTATE_CORE_EVENT_MAX + 1] = {
- NULL,
-};
-
-static struct attribute_group core_events_attr_group = {
- .name = "events",
- .attrs = core_events_attrs,
-};
-
-DEFINE_CSTATE_FORMAT_ATTR(core_event, event, "config:0-63");
-static struct attribute *core_format_attrs[] = {
- &format_attr_core_event.attr,
- NULL,
-};
-
-static struct attribute_group core_format_attr_group = {
- .name = "format",
- .attrs = core_format_attrs,
-};
-
-static cpumask_t cstate_core_cpu_mask;
-static DEVICE_ATTR(cpumask, S_IRUGO, cstate_get_attr_cpumask, NULL);
-
-static struct attribute *cstate_cpumask_attrs[] = {
- &dev_attr_cpumask.attr,
- NULL,
-};
-
-static struct attribute_group cpumask_attr_group = {
- .attrs = cstate_cpumask_attrs,
-};
-
-static const struct attribute_group *core_attr_groups[] = {
- &core_events_attr_group,
- &core_format_attr_group,
- &cpumask_attr_group,
- NULL,
-};
-
-/* cstate_core PMU end */
-
-
-/* cstate_pkg PMU */
-
-static struct pmu cstate_pkg_pmu;
-static bool has_cstate_pkg;
-
-enum perf_cstate_pkg_id {
- /*
- * cstate_pkg events
- */
- PERF_CSTATE_PKG_C2_RES = 0,
- PERF_CSTATE_PKG_C3_RES,
- PERF_CSTATE_PKG_C6_RES,
- PERF_CSTATE_PKG_C7_RES,
- PERF_CSTATE_PKG_C8_RES,
- PERF_CSTATE_PKG_C9_RES,
- PERF_CSTATE_PKG_C10_RES,
-
- PERF_CSTATE_PKG_EVENT_MAX,
-};
-
-bool test_pkg(int idx)
-{
- if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL ||
- boot_cpu_data.x86 != 6)
- return false;
-
- switch (boot_cpu_data.x86_model) {
- case 30: /* 45nm Nehalem */
- case 26: /* 45nm Nehalem-EP */
- case 46: /* 45nm Nehalem-EX */
-
- case 37: /* 32nm Westmere */
- case 44: /* 32nm Westmere-EP */
- case 47: /* 32nm Westmere-EX */
- if (idx == PERF_CSTATE_CORE_C3_RES ||
- idx == PERF_CSTATE_CORE_C6_RES ||
- idx == PERF_CSTATE_CORE_C7_RES)
- return true;
- break;
- case 42: /* 32nm SandyBridge */
- case 45: /* 32nm SandyBridge-E/EN/EP */
-
- case 58: /* 22nm IvyBridge */
- case 62: /* 22nm IvyBridge-EP/EX */
-
- case 60: /* 22nm Haswell Core */
- case 63: /* 22nm Haswell Server */
- case 70: /* 22nm Haswell + GT3e (Intel Iris Pro graphics) */
-
- case 61: /* 14nm Broadwell Core-M */
- case 86: /* 14nm Broadwell Xeon D */
- case 71: /* 14nm Broadwell + GT3e (Intel Iris Pro graphics) */
- case 79: /* 14nm Broadwell Server */
-
- case 78: /* 14nm Skylake Mobile */
- case 94: /* 14nm Skylake Desktop */
- if (idx == PERF_CSTATE_PKG_C2_RES ||
- idx == PERF_CSTATE_PKG_C3_RES ||
- idx == PERF_CSTATE_PKG_C6_RES ||
- idx == PERF_CSTATE_PKG_C7_RES)
- return true;
- break;
- case 55: /* 22nm Atom "Silvermont" */
- case 77: /* 22nm Atom "Silvermont Avoton/Rangely" */
- case 76: /* 14nm Atom "Airmont" */
- if (idx == PERF_CSTATE_CORE_C6_RES)
- return true;
- break;
- case 69: /* 22nm Haswell ULT */
- if (idx == PERF_CSTATE_PKG_C2_RES ||
- idx == PERF_CSTATE_PKG_C3_RES ||
- idx == PERF_CSTATE_PKG_C6_RES ||
- idx == PERF_CSTATE_PKG_C7_RES ||
- idx == PERF_CSTATE_PKG_C8_RES ||
- idx == PERF_CSTATE_PKG_C9_RES ||
- idx == PERF_CSTATE_PKG_C10_RES)
- return true;
- break;
- }
-
- return false;
-}
-
-PMU_EVENT_ATTR_STRING(c2-residency, evattr_cstate_pkg_c2, "event=0x00");
-PMU_EVENT_ATTR_STRING(c3-residency, evattr_cstate_pkg_c3, "event=0x01");
-PMU_EVENT_ATTR_STRING(c6-residency, evattr_cstate_pkg_c6, "event=0x02");
-PMU_EVENT_ATTR_STRING(c7-residency, evattr_cstate_pkg_c7, "event=0x03");
-PMU_EVENT_ATTR_STRING(c8-residency, evattr_cstate_pkg_c8, "event=0x04");
-PMU_EVENT_ATTR_STRING(c9-residency, evattr_cstate_pkg_c9, "event=0x05");
-PMU_EVENT_ATTR_STRING(c10-residency, evattr_cstate_pkg_c10, "event=0x06");
-
-static struct perf_cstate_msr pkg_msr[] = {
- [PERF_CSTATE_PKG_C2_RES] = { MSR_PKG_C2_RESIDENCY, &evattr_cstate_pkg_c2, test_pkg, },
- [PERF_CSTATE_PKG_C3_RES] = { MSR_PKG_C3_RESIDENCY, &evattr_cstate_pkg_c3, test_pkg, },
- [PERF_CSTATE_PKG_C6_RES] = { MSR_PKG_C6_RESIDENCY, &evattr_cstate_pkg_c6, test_pkg, },
- [PERF_CSTATE_PKG_C7_RES] = { MSR_PKG_C7_RESIDENCY, &evattr_cstate_pkg_c7, test_pkg, },
- [PERF_CSTATE_PKG_C8_RES] = { MSR_PKG_C8_RESIDENCY, &evattr_cstate_pkg_c8, test_pkg, },
- [PERF_CSTATE_PKG_C9_RES] = { MSR_PKG_C9_RESIDENCY, &evattr_cstate_pkg_c9, test_pkg, },
- [PERF_CSTATE_PKG_C10_RES] = { MSR_PKG_C10_RESIDENCY, &evattr_cstate_pkg_c10, test_pkg, },
-};
-
-static struct attribute *pkg_events_attrs[PERF_CSTATE_PKG_EVENT_MAX + 1] = {
- NULL,
-};
-
-static struct attribute_group pkg_events_attr_group = {
- .name = "events",
- .attrs = pkg_events_attrs,
-};
-
-DEFINE_CSTATE_FORMAT_ATTR(pkg_event, event, "config:0-63");
-static struct attribute *pkg_format_attrs[] = {
- &format_attr_pkg_event.attr,
- NULL,
-};
-static struct attribute_group pkg_format_attr_group = {
- .name = "format",
- .attrs = pkg_format_attrs,
-};
-
-static cpumask_t cstate_pkg_cpu_mask;
-
-static const struct attribute_group *pkg_attr_groups[] = {
- &pkg_events_attr_group,
- &pkg_format_attr_group,
- &cpumask_attr_group,
- NULL,
-};
-
-/* cstate_pkg PMU end*/
-
-static ssize_t cstate_get_attr_cpumask(struct device *dev,
- struct device_attribute *attr,
- char *buf)
-{
- struct pmu *pmu = dev_get_drvdata(dev);
-
- if (pmu == &cstate_core_pmu)
- return cpumap_print_to_pagebuf(true, buf, &cstate_core_cpu_mask);
- else if (pmu == &cstate_pkg_pmu)
- return cpumap_print_to_pagebuf(true, buf, &cstate_pkg_cpu_mask);
- else
- return 0;
-}
-
-static int cstate_pmu_event_init(struct perf_event *event)
-{
- u64 cfg = event->attr.config;
- int ret = 0;
-
- if (event->attr.type != event->pmu->type)
- return -ENOENT;
-
- /* unsupported modes and filters */
- if (event->attr.exclude_user ||
- event->attr.exclude_kernel ||
- event->attr.exclude_hv ||
- event->attr.exclude_idle ||
- event->attr.exclude_host ||
- event->attr.exclude_guest ||
- event->attr.sample_period) /* no sampling */
- return -EINVAL;
-
- if (event->pmu == &cstate_core_pmu) {
- if (cfg >= PERF_CSTATE_CORE_EVENT_MAX)
- return -EINVAL;
- if (!core_msr[cfg].attr)
- return -EINVAL;
- event->hw.event_base = core_msr[cfg].msr;
- } else if (event->pmu == &cstate_pkg_pmu) {
- if (cfg >= PERF_CSTATE_PKG_EVENT_MAX)
- return -EINVAL;
- if (!pkg_msr[cfg].attr)
- return -EINVAL;
- event->hw.event_base = pkg_msr[cfg].msr;
- } else
- return -ENOENT;
-
- /* must be done before validate_group */
- event->hw.config = cfg;
- event->hw.idx = -1;
-
- return ret;
-}
-
-static inline u64 cstate_pmu_read_counter(struct perf_event *event)
-{
- u64 val;
-
- rdmsrl(event->hw.event_base, val);
- return val;
-}
-
-static void cstate_pmu_event_update(struct perf_event *event)
-{
- struct hw_perf_event *hwc = &event->hw;
- u64 prev_raw_count, new_raw_count;
-
-again:
- prev_raw_count = local64_read(&hwc->prev_count);
- new_raw_count = cstate_pmu_read_counter(event);
-
- if (local64_cmpxchg(&hwc->prev_count, prev_raw_count,
- new_raw_count) != prev_raw_count)
- goto again;
-
- local64_add(new_raw_count - prev_raw_count, &event->count);
-}
-
-static void cstate_pmu_event_start(struct perf_event *event, int mode)
-{
- local64_set(&event->hw.prev_count, cstate_pmu_read_counter(event));
-}
-
-static void cstate_pmu_event_stop(struct perf_event *event, int mode)
-{
- cstate_pmu_event_update(event);
-}
-
-static void cstate_pmu_event_del(struct perf_event *event, int mode)
-{
- cstate_pmu_event_stop(event, PERF_EF_UPDATE);
-}
-
-static int cstate_pmu_event_add(struct perf_event *event, int mode)
-{
- if (mode & PERF_EF_START)
- cstate_pmu_event_start(event, mode);
-
- return 0;
-}
-
-static void cstate_cpu_exit(int cpu)
-{
- int i, id, target;
-
- /* cpu exit for cstate core */
- if (has_cstate_core) {
- id = topology_core_id(cpu);
- target = -1;
-
- for_each_online_cpu(i) {
- if (i == cpu)
- continue;
- if (id == topology_core_id(i)) {
- target = i;
- break;
- }
- }
- if (cpumask_test_and_clear_cpu(cpu, &cstate_core_cpu_mask) && target >= 0)
- cpumask_set_cpu(target, &cstate_core_cpu_mask);
- WARN_ON(cpumask_empty(&cstate_core_cpu_mask));
- if (target >= 0)
- perf_pmu_migrate_context(&cstate_core_pmu, cpu, target);
- }
-
- /* cpu exit for cstate pkg */
- if (has_cstate_pkg) {
- id = topology_physical_package_id(cpu);
- target = -1;
-
- for_each_online_cpu(i) {
- if (i == cpu)
- continue;
- if (id == topology_physical_package_id(i)) {
- target = i;
- break;
- }
- }
- if (cpumask_test_and_clear_cpu(cpu, &cstate_pkg_cpu_mask) && target >= 0)
- cpumask_set_cpu(target, &cstate_pkg_cpu_mask);
- WARN_ON(cpumask_empty(&cstate_pkg_cpu_mask));
- if (target >= 0)
- perf_pmu_migrate_context(&cstate_pkg_pmu, cpu, target);
- }
-}
-
-static void cstate_cpu_init(int cpu)
-{
- int i, id;
-
- /* cpu init for cstate core */
- if (has_cstate_core) {
- id = topology_core_id(cpu);
- for_each_cpu(i, &cstate_core_cpu_mask) {
- if (id == topology_core_id(i))
- break;
- }
- if (i >= nr_cpu_ids)
- cpumask_set_cpu(cpu, &cstate_core_cpu_mask);
- }
-
- /* cpu init for cstate pkg */
- if (has_cstate_pkg) {
- id = topology_physical_package_id(cpu);
- for_each_cpu(i, &cstate_pkg_cpu_mask) {
- if (id == topology_physical_package_id(i))
- break;
- }
- if (i >= nr_cpu_ids)
- cpumask_set_cpu(cpu, &cstate_pkg_cpu_mask);
- }
-}
-
-static int cstate_cpu_notifier(struct notifier_block *self,
- unsigned long action, void *hcpu)
-{
- unsigned int cpu = (long)hcpu;
-
- switch (action & ~CPU_TASKS_FROZEN) {
- case CPU_UP_PREPARE:
- break;
- case CPU_STARTING:
- cstate_cpu_init(cpu);
- break;
- case CPU_UP_CANCELED:
- case CPU_DYING:
- break;
- case CPU_ONLINE:
- case CPU_DEAD:
- break;
- case CPU_DOWN_PREPARE:
- cstate_cpu_exit(cpu);
- break;
- default:
- break;
- }
-
- return NOTIFY_OK;
-}
-
-/*
- * Probe the cstate events and insert the available one into sysfs attrs
- * Return false if there is no available events.
- */
-static bool cstate_probe_msr(struct perf_cstate_msr *msr,
- struct attribute **events_attrs,
- int max_event_nr)
-{
- int i, j = 0;
- u64 val;
-
- /* Probe the cstate events. */
- for (i = 0; i < max_event_nr; i++) {
- if (!msr[i].test(i) || rdmsrl_safe(msr[i].msr, &val))
- msr[i].attr = NULL;
- }
-
- /* List remaining events in the sysfs attrs. */
- for (i = 0; i < max_event_nr; i++) {
- if (msr[i].attr)
- events_attrs[j++] = &msr[i].attr->attr.attr;
- }
- events_attrs[j] = NULL;
-
- return (j > 0) ? true : false;
-}
-
-static int __init cstate_init(void)
-{
- /* SLM has different MSR for PKG C6 */
- switch (boot_cpu_data.x86_model) {
- case 55:
- case 76:
- case 77:
- pkg_msr[PERF_CSTATE_PKG_C6_RES].msr = MSR_PKG_C7_RESIDENCY;
- }
-
- if (cstate_probe_msr(core_msr, core_events_attrs, PERF_CSTATE_CORE_EVENT_MAX))
- has_cstate_core = true;
-
- if (cstate_probe_msr(pkg_msr, pkg_events_attrs, PERF_CSTATE_PKG_EVENT_MAX))
- has_cstate_pkg = true;
-
- return (has_cstate_core || has_cstate_pkg) ? 0 : -ENODEV;
-}
-
-static void __init cstate_cpumask_init(void)
-{
- int cpu;
-
- cpu_notifier_register_begin();
-
- for_each_online_cpu(cpu)
- cstate_cpu_init(cpu);
-
- __perf_cpu_notifier(cstate_cpu_notifier);
-
- cpu_notifier_register_done();
-}
-
-static struct pmu cstate_core_pmu = {
- .attr_groups = core_attr_groups,
- .name = "cstate_core",
- .task_ctx_nr = perf_invalid_context,
- .event_init = cstate_pmu_event_init,
- .add = cstate_pmu_event_add, /* must have */
- .del = cstate_pmu_event_del, /* must have */
- .start = cstate_pmu_event_start,
- .stop = cstate_pmu_event_stop,
- .read = cstate_pmu_event_update,
- .capabilities = PERF_PMU_CAP_NO_INTERRUPT,
-};
-
-static struct pmu cstate_pkg_pmu = {
- .attr_groups = pkg_attr_groups,
- .name = "cstate_pkg",
- .task_ctx_nr = perf_invalid_context,
- .event_init = cstate_pmu_event_init,
- .add = cstate_pmu_event_add, /* must have */
- .del = cstate_pmu_event_del, /* must have */
- .start = cstate_pmu_event_start,
- .stop = cstate_pmu_event_stop,
- .read = cstate_pmu_event_update,
- .capabilities = PERF_PMU_CAP_NO_INTERRUPT,
-};
-
-static void __init cstate_pmus_register(void)
-{
- int err;
-
- if (has_cstate_core) {
- err = perf_pmu_register(&cstate_core_pmu, cstate_core_pmu.name, -1);
- if (WARN_ON(err))
- pr_info("Failed to register PMU %s error %d\n",
- cstate_core_pmu.name, err);
- }
-
- if (has_cstate_pkg) {
- err = perf_pmu_register(&cstate_pkg_pmu, cstate_pkg_pmu.name, -1);
- if (WARN_ON(err))
- pr_info("Failed to register PMU %s error %d\n",
- cstate_pkg_pmu.name, err);
- }
-}
-
-static int __init cstate_pmu_init(void)
-{
- int err;
-
- if (cpu_has_hypervisor)
- return -ENODEV;
-
- err = cstate_init();
- if (err)
- return err;
-
- cstate_cpumask_init();
-
- cstate_pmus_register();
-
- return 0;
-}
-
-device_initcall(cstate_pmu_init);
diff --git a/arch/x86/kernel/cpu/perf_event_intel_ds.c b/arch/x86/kernel/cpu/perf_event_intel_ds.c
deleted file mode 100644
index 955140140..000000000
--- a/arch/x86/kernel/cpu/perf_event_intel_ds.c
+++ /dev/null
@@ -1,1386 +0,0 @@
-#include <linux/bitops.h>
-#include <linux/types.h>
-#include <linux/slab.h>
-
-#include <asm/perf_event.h>
-#include <asm/insn.h>
-
-#include "perf_event.h"
-
-/* The size of a BTS record in bytes: */
-#define BTS_RECORD_SIZE 24
-
-#define BTS_BUFFER_SIZE (PAGE_SIZE << 4)
-#define PEBS_BUFFER_SIZE (PAGE_SIZE << 4)
-#define PEBS_FIXUP_SIZE PAGE_SIZE
-
-/*
- * pebs_record_32 for p4 and core not supported
-
-struct pebs_record_32 {
- u32 flags, ip;
- u32 ax, bc, cx, dx;
- u32 si, di, bp, sp;
-};
-
- */
-
-union intel_x86_pebs_dse {
- u64 val;
- struct {
- unsigned int ld_dse:4;
- unsigned int ld_stlb_miss:1;
- unsigned int ld_locked:1;
- unsigned int ld_reserved:26;
- };
- struct {
- unsigned int st_l1d_hit:1;
- unsigned int st_reserved1:3;
- unsigned int st_stlb_miss:1;
- unsigned int st_locked:1;
- unsigned int st_reserved2:26;
- };
-};
-
-
-/*
- * Map PEBS Load Latency Data Source encodings to generic
- * memory data source information
- */
-#define P(a, b) PERF_MEM_S(a, b)
-#define OP_LH (P(OP, LOAD) | P(LVL, HIT))
-#define SNOOP_NONE_MISS (P(SNOOP, NONE) | P(SNOOP, MISS))
-
-/* Version for Sandy Bridge and later */
-static u64 pebs_data_source[] = {
- P(OP, LOAD) | P(LVL, MISS) | P(LVL, L3) | P(SNOOP, NA),/* 0x00:ukn L3 */
- OP_LH | P(LVL, L1) | P(SNOOP, NONE), /* 0x01: L1 local */
- OP_LH | P(LVL, LFB) | P(SNOOP, NONE), /* 0x02: LFB hit */
- OP_LH | P(LVL, L2) | P(SNOOP, NONE), /* 0x03: L2 hit */
- OP_LH | P(LVL, L3) | P(SNOOP, NONE), /* 0x04: L3 hit */
- OP_LH | P(LVL, L3) | P(SNOOP, MISS), /* 0x05: L3 hit, snoop miss */
- OP_LH | P(LVL, L3) | P(SNOOP, HIT), /* 0x06: L3 hit, snoop hit */
- OP_LH | P(LVL, L3) | P(SNOOP, HITM), /* 0x07: L3 hit, snoop hitm */
- OP_LH | P(LVL, REM_CCE1) | P(SNOOP, HIT), /* 0x08: L3 miss snoop hit */
- OP_LH | P(LVL, REM_CCE1) | P(SNOOP, HITM), /* 0x09: L3 miss snoop hitm*/
- OP_LH | P(LVL, LOC_RAM) | P(SNOOP, HIT), /* 0x0a: L3 miss, shared */
- OP_LH | P(LVL, REM_RAM1) | P(SNOOP, HIT), /* 0x0b: L3 miss, shared */
- OP_LH | P(LVL, LOC_RAM) | SNOOP_NONE_MISS,/* 0x0c: L3 miss, excl */
- OP_LH | P(LVL, REM_RAM1) | SNOOP_NONE_MISS,/* 0x0d: L3 miss, excl */
- OP_LH | P(LVL, IO) | P(SNOOP, NONE), /* 0x0e: I/O */
- OP_LH | P(LVL, UNC) | P(SNOOP, NONE), /* 0x0f: uncached */
-};
-
-/* Patch up minor differences in the bits */
-void __init intel_pmu_pebs_data_source_nhm(void)
-{
- pebs_data_source[0x05] = OP_LH | P(LVL, L3) | P(SNOOP, HIT);
- pebs_data_source[0x06] = OP_LH | P(LVL, L3) | P(SNOOP, HITM);
- pebs_data_source[0x07] = OP_LH | P(LVL, L3) | P(SNOOP, HITM);
-}
-
-static u64 precise_store_data(u64 status)
-{
- union intel_x86_pebs_dse dse;
- u64 val = P(OP, STORE) | P(SNOOP, NA) | P(LVL, L1) | P(TLB, L2);
-
- dse.val = status;
-
- /*
- * bit 4: TLB access
- * 1 = stored missed 2nd level TLB
- *
- * so it either hit the walker or the OS
- * otherwise hit 2nd level TLB
- */
- if (dse.st_stlb_miss)
- val |= P(TLB, MISS);
- else
- val |= P(TLB, HIT);
-
- /*
- * bit 0: hit L1 data cache
- * if not set, then all we know is that
- * it missed L1D
- */
- if (dse.st_l1d_hit)
- val |= P(LVL, HIT);
- else
- val |= P(LVL, MISS);
-
- /*
- * bit 5: Locked prefix
- */
- if (dse.st_locked)
- val |= P(LOCK, LOCKED);
-
- return val;
-}
-
-static u64 precise_datala_hsw(struct perf_event *event, u64 status)
-{
- union perf_mem_data_src dse;
-
- dse.val = PERF_MEM_NA;
-
- if (event->hw.flags & PERF_X86_EVENT_PEBS_ST_HSW)
- dse.mem_op = PERF_MEM_OP_STORE;
- else if (event->hw.flags & PERF_X86_EVENT_PEBS_LD_HSW)
- dse.mem_op = PERF_MEM_OP_LOAD;
-
- /*
- * L1 info only valid for following events:
- *
- * MEM_UOPS_RETIRED.STLB_MISS_STORES
- * MEM_UOPS_RETIRED.LOCK_STORES
- * MEM_UOPS_RETIRED.SPLIT_STORES
- * MEM_UOPS_RETIRED.ALL_STORES
- */
- if (event->hw.flags & PERF_X86_EVENT_PEBS_ST_HSW) {
- if (status & 1)
- dse.mem_lvl = PERF_MEM_LVL_L1 | PERF_MEM_LVL_HIT;
- else
- dse.mem_lvl = PERF_MEM_LVL_L1 | PERF_MEM_LVL_MISS;
- }
- return dse.val;
-}
-
-static u64 load_latency_data(u64 status)
-{
- union intel_x86_pebs_dse dse;
- u64 val;
- int model = boot_cpu_data.x86_model;
- int fam = boot_cpu_data.x86;
-
- dse.val = status;
-
- /*
- * use the mapping table for bit 0-3
- */
- val = pebs_data_source[dse.ld_dse];
-
- /*
- * Nehalem models do not support TLB, Lock infos
- */
- if (fam == 0x6 && (model == 26 || model == 30
- || model == 31 || model == 46)) {
- val |= P(TLB, NA) | P(LOCK, NA);
- return val;
- }
- /*
- * bit 4: TLB access
- * 0 = did not miss 2nd level TLB
- * 1 = missed 2nd level TLB
- */
- if (dse.ld_stlb_miss)
- val |= P(TLB, MISS) | P(TLB, L2);
- else
- val |= P(TLB, HIT) | P(TLB, L1) | P(TLB, L2);
-
- /*
- * bit 5: locked prefix
- */
- if (dse.ld_locked)
- val |= P(LOCK, LOCKED);
-
- return val;
-}
-
-struct pebs_record_core {
- u64 flags, ip;
- u64 ax, bx, cx, dx;
- u64 si, di, bp, sp;
- u64 r8, r9, r10, r11;
- u64 r12, r13, r14, r15;
-};
-
-struct pebs_record_nhm {
- u64 flags, ip;
- u64 ax, bx, cx, dx;
- u64 si, di, bp, sp;
- u64 r8, r9, r10, r11;
- u64 r12, r13, r14, r15;
- u64 status, dla, dse, lat;
-};
-
-/*
- * Same as pebs_record_nhm, with two additional fields.
- */
-struct pebs_record_hsw {
- u64 flags, ip;
- u64 ax, bx, cx, dx;
- u64 si, di, bp, sp;
- u64 r8, r9, r10, r11;
- u64 r12, r13, r14, r15;
- u64 status, dla, dse, lat;
- u64 real_ip, tsx_tuning;
-};
-
-union hsw_tsx_tuning {
- struct {
- u32 cycles_last_block : 32,
- hle_abort : 1,
- rtm_abort : 1,
- instruction_abort : 1,
- non_instruction_abort : 1,
- retry : 1,
- data_conflict : 1,
- capacity_writes : 1,
- capacity_reads : 1;
- };
- u64 value;
-};
-
-#define PEBS_HSW_TSX_FLAGS 0xff00000000ULL
-
-/* Same as HSW, plus TSC */
-
-struct pebs_record_skl {
- u64 flags, ip;
- u64 ax, bx, cx, dx;
- u64 si, di, bp, sp;
- u64 r8, r9, r10, r11;
- u64 r12, r13, r14, r15;
- u64 status, dla, dse, lat;
- u64 real_ip, tsx_tuning;
- u64 tsc;
-};
-
-void init_debug_store_on_cpu(int cpu)
-{
- struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds;
-
- if (!ds)
- return;
-
- wrmsr_on_cpu(cpu, MSR_IA32_DS_AREA,
- (u32)((u64)(unsigned long)ds),
- (u32)((u64)(unsigned long)ds >> 32));
-}
-
-void fini_debug_store_on_cpu(int cpu)
-{
- if (!per_cpu(cpu_hw_events, cpu).ds)
- return;
-
- wrmsr_on_cpu(cpu, MSR_IA32_DS_AREA, 0, 0);
-}
-
-static DEFINE_PER_CPU(void *, insn_buffer);
-
-static int alloc_pebs_buffer(int cpu)
-{
- struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds;
- int node = cpu_to_node(cpu);
- int max;
- void *buffer, *ibuffer;
-
- if (!x86_pmu.pebs)
- return 0;
-
- buffer = kzalloc_node(x86_pmu.pebs_buffer_size, GFP_KERNEL, node);
- if (unlikely(!buffer))
- return -ENOMEM;
-
- /*
- * HSW+ already provides us the eventing ip; no need to allocate this
- * buffer then.
- */
- if (x86_pmu.intel_cap.pebs_format < 2) {
- ibuffer = kzalloc_node(PEBS_FIXUP_SIZE, GFP_KERNEL, node);
- if (!ibuffer) {
- kfree(buffer);
- return -ENOMEM;
- }
- per_cpu(insn_buffer, cpu) = ibuffer;
- }
-
- max = x86_pmu.pebs_buffer_size / x86_pmu.pebs_record_size;
-
- ds->pebs_buffer_base = (u64)(unsigned long)buffer;
- ds->pebs_index = ds->pebs_buffer_base;
- ds->pebs_absolute_maximum = ds->pebs_buffer_base +
- max * x86_pmu.pebs_record_size;
-
- return 0;
-}
-
-static void release_pebs_buffer(int cpu)
-{
- struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds;
-
- if (!ds || !x86_pmu.pebs)
- return;
-
- kfree(per_cpu(insn_buffer, cpu));
- per_cpu(insn_buffer, cpu) = NULL;
-
- kfree((void *)(unsigned long)ds->pebs_buffer_base);
- ds->pebs_buffer_base = 0;
-}
-
-static int alloc_bts_buffer(int cpu)
-{
- struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds;
- int node = cpu_to_node(cpu);
- int max, thresh;
- void *buffer;
-
- if (!x86_pmu.bts)
- return 0;
-
- buffer = kzalloc_node(BTS_BUFFER_SIZE, GFP_KERNEL | __GFP_NOWARN, node);
- if (unlikely(!buffer)) {
- WARN_ONCE(1, "%s: BTS buffer allocation failure\n", __func__);
- return -ENOMEM;
- }
-
- max = BTS_BUFFER_SIZE / BTS_RECORD_SIZE;
- thresh = max / 16;
-
- ds->bts_buffer_base = (u64)(unsigned long)buffer;
- ds->bts_index = ds->bts_buffer_base;
- ds->bts_absolute_maximum = ds->bts_buffer_base +
- max * BTS_RECORD_SIZE;
- ds->bts_interrupt_threshold = ds->bts_absolute_maximum -
- thresh * BTS_RECORD_SIZE;
-
- return 0;
-}
-
-static void release_bts_buffer(int cpu)
-{
- struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds;
-
- if (!ds || !x86_pmu.bts)
- return;
-
- kfree((void *)(unsigned long)ds->bts_buffer_base);
- ds->bts_buffer_base = 0;
-}
-
-static int alloc_ds_buffer(int cpu)
-{
- int node = cpu_to_node(cpu);
- struct debug_store *ds;
-
- ds = kzalloc_node(sizeof(*ds), GFP_KERNEL, node);
- if (unlikely(!ds))
- return -ENOMEM;
-
- per_cpu(cpu_hw_events, cpu).ds = ds;
-
- return 0;
-}
-
-static void release_ds_buffer(int cpu)
-{
- struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds;
-
- if (!ds)
- return;
-
- per_cpu(cpu_hw_events, cpu).ds = NULL;
- kfree(ds);
-}
-
-void release_ds_buffers(void)
-{
- int cpu;
-
- if (!x86_pmu.bts && !x86_pmu.pebs)
- return;
-
- get_online_cpus();
- for_each_online_cpu(cpu)
- fini_debug_store_on_cpu(cpu);
-
- for_each_possible_cpu(cpu) {
- release_pebs_buffer(cpu);
- release_bts_buffer(cpu);
- release_ds_buffer(cpu);
- }
- put_online_cpus();
-}
-
-void reserve_ds_buffers(void)
-{
- int bts_err = 0, pebs_err = 0;
- int cpu;
-
- x86_pmu.bts_active = 0;
- x86_pmu.pebs_active = 0;
-
- if (!x86_pmu.bts && !x86_pmu.pebs)
- return;
-
- if (!x86_pmu.bts)
- bts_err = 1;
-
- if (!x86_pmu.pebs)
- pebs_err = 1;
-
- get_online_cpus();
-
- for_each_possible_cpu(cpu) {
- if (alloc_ds_buffer(cpu)) {
- bts_err = 1;
- pebs_err = 1;
- }
-
- if (!bts_err && alloc_bts_buffer(cpu))
- bts_err = 1;
-
- if (!pebs_err && alloc_pebs_buffer(cpu))
- pebs_err = 1;
-
- if (bts_err && pebs_err)
- break;
- }
-
- if (bts_err) {
- for_each_possible_cpu(cpu)
- release_bts_buffer(cpu);
- }
-
- if (pebs_err) {
- for_each_possible_cpu(cpu)
- release_pebs_buffer(cpu);
- }
-
- if (bts_err && pebs_err) {
- for_each_possible_cpu(cpu)
- release_ds_buffer(cpu);
- } else {
- if (x86_pmu.bts && !bts_err)
- x86_pmu.bts_active = 1;
-
- if (x86_pmu.pebs && !pebs_err)
- x86_pmu.pebs_active = 1;
-
- for_each_online_cpu(cpu)
- init_debug_store_on_cpu(cpu);
- }
-
- put_online_cpus();
-}
-
-/*
- * BTS
- */
-
-struct event_constraint bts_constraint =
- EVENT_CONSTRAINT(0, 1ULL << INTEL_PMC_IDX_FIXED_BTS, 0);
-
-void intel_pmu_enable_bts(u64 config)
-{
- unsigned long debugctlmsr;
-
- debugctlmsr = get_debugctlmsr();
-
- debugctlmsr |= DEBUGCTLMSR_TR;
- debugctlmsr |= DEBUGCTLMSR_BTS;
- if (config & ARCH_PERFMON_EVENTSEL_INT)
- debugctlmsr |= DEBUGCTLMSR_BTINT;
-
- if (!(config & ARCH_PERFMON_EVENTSEL_OS))
- debugctlmsr |= DEBUGCTLMSR_BTS_OFF_OS;
-
- if (!(config & ARCH_PERFMON_EVENTSEL_USR))
- debugctlmsr |= DEBUGCTLMSR_BTS_OFF_USR;
-
- update_debugctlmsr(debugctlmsr);
-}
-
-void intel_pmu_disable_bts(void)
-{
- struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
- unsigned long debugctlmsr;
-
- if (!cpuc->ds)
- return;
-
- debugctlmsr = get_debugctlmsr();
-
- debugctlmsr &=
- ~(DEBUGCTLMSR_TR | DEBUGCTLMSR_BTS | DEBUGCTLMSR_BTINT |
- DEBUGCTLMSR_BTS_OFF_OS | DEBUGCTLMSR_BTS_OFF_USR);
-
- update_debugctlmsr(debugctlmsr);
-}
-
-int intel_pmu_drain_bts_buffer(void)
-{
- struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
- struct debug_store *ds = cpuc->ds;
- struct bts_record {
- u64 from;
- u64 to;
- u64 flags;
- };
- struct perf_event *event = cpuc->events[INTEL_PMC_IDX_FIXED_BTS];
- struct bts_record *at, *base, *top;
- struct perf_output_handle handle;
- struct perf_event_header header;
- struct perf_sample_data data;
- unsigned long skip = 0;
- struct pt_regs regs;
-
- if (!event)
- return 0;
-
- if (!x86_pmu.bts_active)
- return 0;
-
- base = (struct bts_record *)(unsigned long)ds->bts_buffer_base;
- top = (struct bts_record *)(unsigned long)ds->bts_index;
-
- if (top <= base)
- return 0;
-
- memset(&regs, 0, sizeof(regs));
-
- ds->bts_index = ds->bts_buffer_base;
-
- perf_sample_data_init(&data, 0, event->hw.last_period);
-
- /*
- * BTS leaks kernel addresses in branches across the cpl boundary,
- * such as traps or system calls, so unless the user is asking for
- * kernel tracing (and right now it's not possible), we'd need to
- * filter them out. But first we need to count how many of those we
- * have in the current batch. This is an extra O(n) pass, however,
- * it's much faster than the other one especially considering that
- * n <= 2560 (BTS_BUFFER_SIZE / BTS_RECORD_SIZE * 15/16; see the
- * alloc_bts_buffer()).
- */
- for (at = base; at < top; at++) {
- /*
- * Note that right now *this* BTS code only works if
- * attr::exclude_kernel is set, but let's keep this extra
- * check here in case that changes.
- */
- if (event->attr.exclude_kernel &&
- (kernel_ip(at->from) || kernel_ip(at->to)))
- skip++;
- }
-
- /*
- * Prepare a generic sample, i.e. fill in the invariant fields.
- * We will overwrite the from and to address before we output
- * the sample.
- */
- perf_prepare_sample(&header, &data, event, &regs);
-
- if (perf_output_begin(&handle, event, header.size *
- (top - base - skip)))
- return 1;
-
- for (at = base; at < top; at++) {
- /* Filter out any records that contain kernel addresses. */
- if (event->attr.exclude_kernel &&
- (kernel_ip(at->from) || kernel_ip(at->to)))
- continue;
-
- data.ip = at->from;
- data.addr = at->to;
-
- perf_output_sample(&handle, &header, &data, event);
- }
-
- perf_output_end(&handle);
-
- /* There's new data available. */
- event->hw.interrupts++;
- event->pending_kill = POLL_IN;
- return 1;
-}
-
-static inline void intel_pmu_drain_pebs_buffer(void)
-{
- struct pt_regs regs;
-
- x86_pmu.drain_pebs(&regs);
-}
-
-void intel_pmu_pebs_sched_task(struct perf_event_context *ctx, bool sched_in)
-{
- if (!sched_in)
- intel_pmu_drain_pebs_buffer();
-}
-
-/*
- * PEBS
- */
-struct event_constraint intel_core2_pebs_event_constraints[] = {
- INTEL_FLAGS_UEVENT_CONSTRAINT(0x00c0, 0x1), /* INST_RETIRED.ANY */
- INTEL_FLAGS_UEVENT_CONSTRAINT(0xfec1, 0x1), /* X87_OPS_RETIRED.ANY */
- INTEL_FLAGS_UEVENT_CONSTRAINT(0x00c5, 0x1), /* BR_INST_RETIRED.MISPRED */
- INTEL_FLAGS_UEVENT_CONSTRAINT(0x1fc7, 0x1), /* SIMD_INST_RETURED.ANY */
- INTEL_FLAGS_EVENT_CONSTRAINT(0xcb, 0x1), /* MEM_LOAD_RETIRED.* */
- /* INST_RETIRED.ANY_P, inv=1, cmask=16 (cycles:p). */
- INTEL_FLAGS_EVENT_CONSTRAINT(0x108000c0, 0x01),
- EVENT_CONSTRAINT_END
-};
-
-struct event_constraint intel_atom_pebs_event_constraints[] = {
- INTEL_FLAGS_UEVENT_CONSTRAINT(0x00c0, 0x1), /* INST_RETIRED.ANY */
- INTEL_FLAGS_UEVENT_CONSTRAINT(0x00c5, 0x1), /* MISPREDICTED_BRANCH_RETIRED */
- INTEL_FLAGS_EVENT_CONSTRAINT(0xcb, 0x1), /* MEM_LOAD_RETIRED.* */
- /* INST_RETIRED.ANY_P, inv=1, cmask=16 (cycles:p). */
- INTEL_FLAGS_EVENT_CONSTRAINT(0x108000c0, 0x01),
- /* Allow all events as PEBS with no flags */
- INTEL_ALL_EVENT_CONSTRAINT(0, 0x1),
- EVENT_CONSTRAINT_END
-};
-
-struct event_constraint intel_slm_pebs_event_constraints[] = {
- /* INST_RETIRED.ANY_P, inv=1, cmask=16 (cycles:p). */
- INTEL_FLAGS_EVENT_CONSTRAINT(0x108000c0, 0x1),
- /* Allow all events as PEBS with no flags */
- INTEL_ALL_EVENT_CONSTRAINT(0, 0x1),
- EVENT_CONSTRAINT_END
-};
-
-struct event_constraint intel_nehalem_pebs_event_constraints[] = {
- INTEL_PLD_CONSTRAINT(0x100b, 0xf), /* MEM_INST_RETIRED.* */
- INTEL_FLAGS_EVENT_CONSTRAINT(0x0f, 0xf), /* MEM_UNCORE_RETIRED.* */
- INTEL_FLAGS_UEVENT_CONSTRAINT(0x010c, 0xf), /* MEM_STORE_RETIRED.DTLB_MISS */
- INTEL_FLAGS_EVENT_CONSTRAINT(0xc0, 0xf), /* INST_RETIRED.ANY */
- INTEL_EVENT_CONSTRAINT(0xc2, 0xf), /* UOPS_RETIRED.* */
- INTEL_FLAGS_EVENT_CONSTRAINT(0xc4, 0xf), /* BR_INST_RETIRED.* */
- INTEL_FLAGS_UEVENT_CONSTRAINT(0x02c5, 0xf), /* BR_MISP_RETIRED.NEAR_CALL */
- INTEL_FLAGS_EVENT_CONSTRAINT(0xc7, 0xf), /* SSEX_UOPS_RETIRED.* */
- INTEL_FLAGS_UEVENT_CONSTRAINT(0x20c8, 0xf), /* ITLB_MISS_RETIRED */
- INTEL_FLAGS_EVENT_CONSTRAINT(0xcb, 0xf), /* MEM_LOAD_RETIRED.* */
- INTEL_FLAGS_EVENT_CONSTRAINT(0xf7, 0xf), /* FP_ASSIST.* */
- /* INST_RETIRED.ANY_P, inv=1, cmask=16 (cycles:p). */
- INTEL_FLAGS_EVENT_CONSTRAINT(0x108000c0, 0x0f),
- EVENT_CONSTRAINT_END
-};
-
-struct event_constraint intel_westmere_pebs_event_constraints[] = {
- INTEL_PLD_CONSTRAINT(0x100b, 0xf), /* MEM_INST_RETIRED.* */
- INTEL_FLAGS_EVENT_CONSTRAINT(0x0f, 0xf), /* MEM_UNCORE_RETIRED.* */
- INTEL_FLAGS_UEVENT_CONSTRAINT(0x010c, 0xf), /* MEM_STORE_RETIRED.DTLB_MISS */
- INTEL_FLAGS_EVENT_CONSTRAINT(0xc0, 0xf), /* INSTR_RETIRED.* */
- INTEL_EVENT_CONSTRAINT(0xc2, 0xf), /* UOPS_RETIRED.* */
- INTEL_FLAGS_EVENT_CONSTRAINT(0xc4, 0xf), /* BR_INST_RETIRED.* */
- INTEL_FLAGS_EVENT_CONSTRAINT(0xc5, 0xf), /* BR_MISP_RETIRED.* */
- INTEL_FLAGS_EVENT_CONSTRAINT(0xc7, 0xf), /* SSEX_UOPS_RETIRED.* */
- INTEL_FLAGS_UEVENT_CONSTRAINT(0x20c8, 0xf), /* ITLB_MISS_RETIRED */
- INTEL_FLAGS_EVENT_CONSTRAINT(0xcb, 0xf), /* MEM_LOAD_RETIRED.* */
- INTEL_FLAGS_EVENT_CONSTRAINT(0xf7, 0xf), /* FP_ASSIST.* */
- /* INST_RETIRED.ANY_P, inv=1, cmask=16 (cycles:p). */
- INTEL_FLAGS_EVENT_CONSTRAINT(0x108000c0, 0x0f),
- EVENT_CONSTRAINT_END
-};
-
-struct event_constraint intel_snb_pebs_event_constraints[] = {
- INTEL_FLAGS_UEVENT_CONSTRAINT(0x01c0, 0x2), /* INST_RETIRED.PRECDIST */
- INTEL_PLD_CONSTRAINT(0x01cd, 0x8), /* MEM_TRANS_RETIRED.LAT_ABOVE_THR */
- INTEL_PST_CONSTRAINT(0x02cd, 0x8), /* MEM_TRANS_RETIRED.PRECISE_STORES */
- /* UOPS_RETIRED.ALL, inv=1, cmask=16 (cycles:p). */
- INTEL_FLAGS_EVENT_CONSTRAINT(0x108001c2, 0xf),
- INTEL_EXCLEVT_CONSTRAINT(0xd0, 0xf), /* MEM_UOP_RETIRED.* */
- INTEL_EXCLEVT_CONSTRAINT(0xd1, 0xf), /* MEM_LOAD_UOPS_RETIRED.* */
- INTEL_EXCLEVT_CONSTRAINT(0xd2, 0xf), /* MEM_LOAD_UOPS_LLC_HIT_RETIRED.* */
- INTEL_EXCLEVT_CONSTRAINT(0xd3, 0xf), /* MEM_LOAD_UOPS_LLC_MISS_RETIRED.* */
- /* Allow all events as PEBS with no flags */
- INTEL_ALL_EVENT_CONSTRAINT(0, 0xf),
- EVENT_CONSTRAINT_END
-};
-
-struct event_constraint intel_ivb_pebs_event_constraints[] = {
- INTEL_FLAGS_UEVENT_CONSTRAINT(0x01c0, 0x2), /* INST_RETIRED.PRECDIST */
- INTEL_PLD_CONSTRAINT(0x01cd, 0x8), /* MEM_TRANS_RETIRED.LAT_ABOVE_THR */
- INTEL_PST_CONSTRAINT(0x02cd, 0x8), /* MEM_TRANS_RETIRED.PRECISE_STORES */
- /* UOPS_RETIRED.ALL, inv=1, cmask=16 (cycles:p). */
- INTEL_FLAGS_EVENT_CONSTRAINT(0x108001c2, 0xf),
- /* INST_RETIRED.PREC_DIST, inv=1, cmask=16 (cycles:ppp). */
- INTEL_FLAGS_EVENT_CONSTRAINT(0x108001c0, 0x2),
- INTEL_EXCLEVT_CONSTRAINT(0xd0, 0xf), /* MEM_UOP_RETIRED.* */
- INTEL_EXCLEVT_CONSTRAINT(0xd1, 0xf), /* MEM_LOAD_UOPS_RETIRED.* */
- INTEL_EXCLEVT_CONSTRAINT(0xd2, 0xf), /* MEM_LOAD_UOPS_LLC_HIT_RETIRED.* */
- INTEL_EXCLEVT_CONSTRAINT(0xd3, 0xf), /* MEM_LOAD_UOPS_LLC_MISS_RETIRED.* */
- /* Allow all events as PEBS with no flags */
- INTEL_ALL_EVENT_CONSTRAINT(0, 0xf),
- EVENT_CONSTRAINT_END
-};
-
-struct event_constraint intel_hsw_pebs_event_constraints[] = {
- INTEL_FLAGS_UEVENT_CONSTRAINT(0x01c0, 0x2), /* INST_RETIRED.PRECDIST */
- INTEL_PLD_CONSTRAINT(0x01cd, 0xf), /* MEM_TRANS_RETIRED.* */
- /* UOPS_RETIRED.ALL, inv=1, cmask=16 (cycles:p). */
- INTEL_FLAGS_EVENT_CONSTRAINT(0x108001c2, 0xf),
- /* INST_RETIRED.PREC_DIST, inv=1, cmask=16 (cycles:ppp). */
- INTEL_FLAGS_EVENT_CONSTRAINT(0x108001c0, 0x2),
- INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_NA(0x01c2, 0xf), /* UOPS_RETIRED.ALL */
- INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_XLD(0x11d0, 0xf), /* MEM_UOPS_RETIRED.STLB_MISS_LOADS */
- INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_XLD(0x21d0, 0xf), /* MEM_UOPS_RETIRED.LOCK_LOADS */
- INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_XLD(0x41d0, 0xf), /* MEM_UOPS_RETIRED.SPLIT_LOADS */
- INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_XLD(0x81d0, 0xf), /* MEM_UOPS_RETIRED.ALL_LOADS */
- INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_XST(0x12d0, 0xf), /* MEM_UOPS_RETIRED.STLB_MISS_STORES */
- INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_XST(0x42d0, 0xf), /* MEM_UOPS_RETIRED.SPLIT_STORES */
- INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_XST(0x82d0, 0xf), /* MEM_UOPS_RETIRED.ALL_STORES */
- INTEL_FLAGS_EVENT_CONSTRAINT_DATALA_XLD(0xd1, 0xf), /* MEM_LOAD_UOPS_RETIRED.* */
- INTEL_FLAGS_EVENT_CONSTRAINT_DATALA_XLD(0xd2, 0xf), /* MEM_LOAD_UOPS_L3_HIT_RETIRED.* */
- INTEL_FLAGS_EVENT_CONSTRAINT_DATALA_XLD(0xd3, 0xf), /* MEM_LOAD_UOPS_L3_MISS_RETIRED.* */
- /* Allow all events as PEBS with no flags */
- INTEL_ALL_EVENT_CONSTRAINT(0, 0xf),
- EVENT_CONSTRAINT_END
-};
-
-struct event_constraint intel_skl_pebs_event_constraints[] = {
- INTEL_FLAGS_UEVENT_CONSTRAINT(0x1c0, 0x2), /* INST_RETIRED.PREC_DIST */
- /* INST_RETIRED.PREC_DIST, inv=1, cmask=16 (cycles:ppp). */
- INTEL_FLAGS_EVENT_CONSTRAINT(0x108001c0, 0x2),
- /* INST_RETIRED.TOTAL_CYCLES_PS (inv=1, cmask=16) (cycles:p). */
- INTEL_FLAGS_EVENT_CONSTRAINT(0x108000c0, 0x0f),
- INTEL_PLD_CONSTRAINT(0x1cd, 0xf), /* MEM_TRANS_RETIRED.* */
- INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x11d0, 0xf), /* MEM_INST_RETIRED.STLB_MISS_LOADS */
- INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_ST(0x12d0, 0xf), /* MEM_INST_RETIRED.STLB_MISS_STORES */
- INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x21d0, 0xf), /* MEM_INST_RETIRED.LOCK_LOADS */
- INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_ST(0x22d0, 0xf), /* MEM_INST_RETIRED.LOCK_STORES */
- INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x41d0, 0xf), /* MEM_INST_RETIRED.SPLIT_LOADS */
- INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_ST(0x42d0, 0xf), /* MEM_INST_RETIRED.SPLIT_STORES */
- INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x81d0, 0xf), /* MEM_INST_RETIRED.ALL_LOADS */
- INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_ST(0x82d0, 0xf), /* MEM_INST_RETIRED.ALL_STORES */
- INTEL_FLAGS_EVENT_CONSTRAINT_DATALA_LD(0xd1, 0xf), /* MEM_LOAD_RETIRED.* */
- INTEL_FLAGS_EVENT_CONSTRAINT_DATALA_LD(0xd2, 0xf), /* MEM_LOAD_L3_HIT_RETIRED.* */
- INTEL_FLAGS_EVENT_CONSTRAINT_DATALA_LD(0xd3, 0xf), /* MEM_LOAD_L3_MISS_RETIRED.* */
- /* Allow all events as PEBS with no flags */
- INTEL_ALL_EVENT_CONSTRAINT(0, 0xf),
- EVENT_CONSTRAINT_END
-};
-
-struct event_constraint *intel_pebs_constraints(struct perf_event *event)
-{
- struct event_constraint *c;
-
- if (!event->attr.precise_ip)
- return NULL;
-
- if (x86_pmu.pebs_constraints) {
- for_each_event_constraint(c, x86_pmu.pebs_constraints) {
- if ((event->hw.config & c->cmask) == c->code) {
- event->hw.flags |= c->flags;
- return c;
- }
- }
- }
-
- return &emptyconstraint;
-}
-
-static inline bool pebs_is_enabled(struct cpu_hw_events *cpuc)
-{
- return (cpuc->pebs_enabled & ((1ULL << MAX_PEBS_EVENTS) - 1));
-}
-
-void intel_pmu_pebs_enable(struct perf_event *event)
-{
- struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
- struct hw_perf_event *hwc = &event->hw;
- struct debug_store *ds = cpuc->ds;
- bool first_pebs;
- u64 threshold;
-
- hwc->config &= ~ARCH_PERFMON_EVENTSEL_INT;
-
- first_pebs = !pebs_is_enabled(cpuc);
- cpuc->pebs_enabled |= 1ULL << hwc->idx;
-
- if (event->hw.flags & PERF_X86_EVENT_PEBS_LDLAT)
- cpuc->pebs_enabled |= 1ULL << (hwc->idx + 32);
- else if (event->hw.flags & PERF_X86_EVENT_PEBS_ST)
- cpuc->pebs_enabled |= 1ULL << 63;
-
- /*
- * When the event is constrained enough we can use a larger
- * threshold and run the event with less frequent PMI.
- */
- if (hwc->flags & PERF_X86_EVENT_FREERUNNING) {
- threshold = ds->pebs_absolute_maximum -
- x86_pmu.max_pebs_events * x86_pmu.pebs_record_size;
-
- if (first_pebs)
- perf_sched_cb_inc(event->ctx->pmu);
- } else {
- threshold = ds->pebs_buffer_base + x86_pmu.pebs_record_size;
-
- /*
- * If not all events can use larger buffer,
- * roll back to threshold = 1
- */
- if (!first_pebs &&
- (ds->pebs_interrupt_threshold > threshold))
- perf_sched_cb_dec(event->ctx->pmu);
- }
-
- /* Use auto-reload if possible to save a MSR write in the PMI */
- if (hwc->flags & PERF_X86_EVENT_AUTO_RELOAD) {
- ds->pebs_event_reset[hwc->idx] =
- (u64)(-hwc->sample_period) & x86_pmu.cntval_mask;
- }
-
- if (first_pebs || ds->pebs_interrupt_threshold > threshold)
- ds->pebs_interrupt_threshold = threshold;
-}
-
-void intel_pmu_pebs_disable(struct perf_event *event)
-{
- struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
- struct hw_perf_event *hwc = &event->hw;
- struct debug_store *ds = cpuc->ds;
- bool large_pebs = ds->pebs_interrupt_threshold >
- ds->pebs_buffer_base + x86_pmu.pebs_record_size;
-
- if (large_pebs)
- intel_pmu_drain_pebs_buffer();
-
- cpuc->pebs_enabled &= ~(1ULL << hwc->idx);
-
- if (event->hw.flags & PERF_X86_EVENT_PEBS_LDLAT)
- cpuc->pebs_enabled &= ~(1ULL << (hwc->idx + 32));
- else if (event->hw.flags & PERF_X86_EVENT_PEBS_ST)
- cpuc->pebs_enabled &= ~(1ULL << 63);
-
- if (large_pebs && !pebs_is_enabled(cpuc))
- perf_sched_cb_dec(event->ctx->pmu);
-
- if (cpuc->enabled)
- wrmsrl(MSR_IA32_PEBS_ENABLE, cpuc->pebs_enabled);
-
- hwc->config |= ARCH_PERFMON_EVENTSEL_INT;
-}
-
-void intel_pmu_pebs_enable_all(void)
-{
- struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
-
- if (cpuc->pebs_enabled)
- wrmsrl(MSR_IA32_PEBS_ENABLE, cpuc->pebs_enabled);
-}
-
-void intel_pmu_pebs_disable_all(void)
-{
- struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
-
- if (cpuc->pebs_enabled)
- wrmsrl(MSR_IA32_PEBS_ENABLE, 0);
-}
-
-static int intel_pmu_pebs_fixup_ip(struct pt_regs *regs)
-{
- struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
- unsigned long from = cpuc->lbr_entries[0].from;
- unsigned long old_to, to = cpuc->lbr_entries[0].to;
- unsigned long ip = regs->ip;
- int is_64bit = 0;
- void *kaddr;
- int size;
-
- /*
- * We don't need to fixup if the PEBS assist is fault like
- */
- if (!x86_pmu.intel_cap.pebs_trap)
- return 1;
-
- /*
- * No LBR entry, no basic block, no rewinding
- */
- if (!cpuc->lbr_stack.nr || !from || !to)
- return 0;
-
- /*
- * Basic blocks should never cross user/kernel boundaries
- */
- if (kernel_ip(ip) != kernel_ip(to))
- return 0;
-
- /*
- * unsigned math, either ip is before the start (impossible) or
- * the basic block is larger than 1 page (sanity)
- */
- if ((ip - to) > PEBS_FIXUP_SIZE)
- return 0;
-
- /*
- * We sampled a branch insn, rewind using the LBR stack
- */
- if (ip == to) {
- set_linear_ip(regs, from);
- return 1;
- }
-
- size = ip - to;
- if (!kernel_ip(ip)) {
- int bytes;
- u8 *buf = this_cpu_read(insn_buffer);
-
- /* 'size' must fit our buffer, see above */
- bytes = copy_from_user_nmi(buf, (void __user *)to, size);
- if (bytes != 0)
- return 0;
-
- kaddr = buf;
- } else {
- kaddr = (void *)to;
- }
-
- do {
- struct insn insn;
-
- old_to = to;
-
-#ifdef CONFIG_X86_64
- is_64bit = kernel_ip(to) || !test_thread_flag(TIF_IA32);
-#endif
- insn_init(&insn, kaddr, size, is_64bit);
- insn_get_length(&insn);
- /*
- * Make sure there was not a problem decoding the
- * instruction and getting the length. This is
- * doubly important because we have an infinite
- * loop if insn.length=0.
- */
- if (!insn.length)
- break;
-
- to += insn.length;
- kaddr += insn.length;
- size -= insn.length;
- } while (to < ip);
-
- if (to == ip) {
- set_linear_ip(regs, old_to);
- return 1;
- }
-
- /*
- * Even though we decoded the basic block, the instruction stream
- * never matched the given IP, either the TO or the IP got corrupted.
- */
- return 0;
-}
-
-static inline u64 intel_hsw_weight(struct pebs_record_skl *pebs)
-{
- if (pebs->tsx_tuning) {
- union hsw_tsx_tuning tsx = { .value = pebs->tsx_tuning };
- return tsx.cycles_last_block;
- }
- return 0;
-}
-
-static inline u64 intel_hsw_transaction(struct pebs_record_skl *pebs)
-{
- u64 txn = (pebs->tsx_tuning & PEBS_HSW_TSX_FLAGS) >> 32;
-
- /* For RTM XABORTs also log the abort code from AX */
- if ((txn & PERF_TXN_TRANSACTION) && (pebs->ax & 1))
- txn |= ((pebs->ax >> 24) & 0xff) << PERF_TXN_ABORT_SHIFT;
- return txn;
-}
-
-static void setup_pebs_sample_data(struct perf_event *event,
- struct pt_regs *iregs, void *__pebs,
- struct perf_sample_data *data,
- struct pt_regs *regs)
-{
-#define PERF_X86_EVENT_PEBS_HSW_PREC \
- (PERF_X86_EVENT_PEBS_ST_HSW | \
- PERF_X86_EVENT_PEBS_LD_HSW | \
- PERF_X86_EVENT_PEBS_NA_HSW)
- /*
- * We cast to the biggest pebs_record but are careful not to
- * unconditionally access the 'extra' entries.
- */
- struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
- struct pebs_record_skl *pebs = __pebs;
- u64 sample_type;
- int fll, fst, dsrc;
- int fl = event->hw.flags;
-
- if (pebs == NULL)
- return;
-
- sample_type = event->attr.sample_type;
- dsrc = sample_type & PERF_SAMPLE_DATA_SRC;
-
- fll = fl & PERF_X86_EVENT_PEBS_LDLAT;
- fst = fl & (PERF_X86_EVENT_PEBS_ST | PERF_X86_EVENT_PEBS_HSW_PREC);
-
- perf_sample_data_init(data, 0, event->hw.last_period);
-
- data->period = event->hw.last_period;
-
- /*
- * Use latency for weight (only avail with PEBS-LL)
- */
- if (fll && (sample_type & PERF_SAMPLE_WEIGHT))
- data->weight = pebs->lat;
-
- /*
- * data.data_src encodes the data source
- */
- if (dsrc) {
- u64 val = PERF_MEM_NA;
- if (fll)
- val = load_latency_data(pebs->dse);
- else if (fst && (fl & PERF_X86_EVENT_PEBS_HSW_PREC))
- val = precise_datala_hsw(event, pebs->dse);
- else if (fst)
- val = precise_store_data(pebs->dse);
- data->data_src.val = val;
- }
-
- /*
- * We use the interrupt regs as a base because the PEBS record
- * does not contain a full regs set, specifically it seems to
- * lack segment descriptors, which get used by things like
- * user_mode().
- *
- * In the simple case fix up only the IP and BP,SP regs, for
- * PERF_SAMPLE_IP and PERF_SAMPLE_CALLCHAIN to function properly.
- * A possible PERF_SAMPLE_REGS will have to transfer all regs.
- */
- *regs = *iregs;
- regs->flags = pebs->flags;
- set_linear_ip(regs, pebs->ip);
- regs->bp = pebs->bp;
- regs->sp = pebs->sp;
-
- if (sample_type & PERF_SAMPLE_REGS_INTR) {
- regs->ax = pebs->ax;
- regs->bx = pebs->bx;
- regs->cx = pebs->cx;
- regs->dx = pebs->dx;
- regs->si = pebs->si;
- regs->di = pebs->di;
- regs->bp = pebs->bp;
- regs->sp = pebs->sp;
-
- regs->flags = pebs->flags;
-#ifndef CONFIG_X86_32
- regs->r8 = pebs->r8;
- regs->r9 = pebs->r9;
- regs->r10 = pebs->r10;
- regs->r11 = pebs->r11;
- regs->r12 = pebs->r12;
- regs->r13 = pebs->r13;
- regs->r14 = pebs->r14;
- regs->r15 = pebs->r15;
-#endif
- }
-
- if (event->attr.precise_ip > 1 && x86_pmu.intel_cap.pebs_format >= 2) {
- regs->ip = pebs->real_ip;
- regs->flags |= PERF_EFLAGS_EXACT;
- } else if (event->attr.precise_ip > 1 && intel_pmu_pebs_fixup_ip(regs))
- regs->flags |= PERF_EFLAGS_EXACT;
- else
- regs->flags &= ~PERF_EFLAGS_EXACT;
-
- if ((sample_type & PERF_SAMPLE_ADDR) &&
- x86_pmu.intel_cap.pebs_format >= 1)
- data->addr = pebs->dla;
-
- if (x86_pmu.intel_cap.pebs_format >= 2) {
- /* Only set the TSX weight when no memory weight. */
- if ((sample_type & PERF_SAMPLE_WEIGHT) && !fll)
- data->weight = intel_hsw_weight(pebs);
-
- if (sample_type & PERF_SAMPLE_TRANSACTION)
- data->txn = intel_hsw_transaction(pebs);
- }
-
- /*
- * v3 supplies an accurate time stamp, so we use that
- * for the time stamp.
- *
- * We can only do this for the default trace clock.
- */
- if (x86_pmu.intel_cap.pebs_format >= 3 &&
- event->attr.use_clockid == 0)
- data->time = native_sched_clock_from_tsc(pebs->tsc);
-
- if (has_branch_stack(event))
- data->br_stack = &cpuc->lbr_stack;
-}
-
-static inline void *
-get_next_pebs_record_by_bit(void *base, void *top, int bit)
-{
- struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
- void *at;
- u64 pebs_status;
-
- /*
- * fmt0 does not have a status bitfield (does not use
- * perf_record_nhm format)
- */
- if (x86_pmu.intel_cap.pebs_format < 1)
- return base;
-
- if (base == NULL)
- return NULL;
-
- for (at = base; at < top; at += x86_pmu.pebs_record_size) {
- struct pebs_record_nhm *p = at;
-
- if (test_bit(bit, (unsigned long *)&p->status)) {
- /* PEBS v3 has accurate status bits */
- if (x86_pmu.intel_cap.pebs_format >= 3)
- return at;
-
- if (p->status == (1 << bit))
- return at;
-
- /* clear non-PEBS bit and re-check */
- pebs_status = p->status & cpuc->pebs_enabled;
- pebs_status &= (1ULL << MAX_PEBS_EVENTS) - 1;
- if (pebs_status == (1 << bit))
- return at;
- }
- }
- return NULL;
-}
-
-static void __intel_pmu_pebs_event(struct perf_event *event,
- struct pt_regs *iregs,
- void *base, void *top,
- int bit, int count)
-{
- struct perf_sample_data data;
- struct pt_regs regs;
- void *at = get_next_pebs_record_by_bit(base, top, bit);
-
- if (!intel_pmu_save_and_restart(event) &&
- !(event->hw.flags & PERF_X86_EVENT_AUTO_RELOAD))
- return;
-
- while (count > 1) {
- setup_pebs_sample_data(event, iregs, at, &data, &regs);
- perf_event_output(event, &data, &regs);
- at += x86_pmu.pebs_record_size;
- at = get_next_pebs_record_by_bit(at, top, bit);
- count--;
- }
-
- setup_pebs_sample_data(event, iregs, at, &data, &regs);
-
- /*
- * All but the last records are processed.
- * The last one is left to be able to call the overflow handler.
- */
- if (perf_event_overflow(event, &data, &regs)) {
- x86_pmu_stop(event, 0);
- return;
- }
-
-}
-
-static void intel_pmu_drain_pebs_core(struct pt_regs *iregs)
-{
- struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
- struct debug_store *ds = cpuc->ds;
- struct perf_event *event = cpuc->events[0]; /* PMC0 only */
- struct pebs_record_core *at, *top;
- int n;
-
- if (!x86_pmu.pebs_active)
- return;
-
- at = (struct pebs_record_core *)(unsigned long)ds->pebs_buffer_base;
- top = (struct pebs_record_core *)(unsigned long)ds->pebs_index;
-
- /*
- * Whatever else happens, drain the thing
- */
- ds->pebs_index = ds->pebs_buffer_base;
-
- if (!test_bit(0, cpuc->active_mask))
- return;
-
- WARN_ON_ONCE(!event);
-
- if (!event->attr.precise_ip)
- return;
-
- n = top - at;
- if (n <= 0)
- return;
-
- __intel_pmu_pebs_event(event, iregs, at, top, 0, n);
-}
-
-static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs)
-{
- struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
- struct debug_store *ds = cpuc->ds;
- struct perf_event *event;
- void *base, *at, *top;
- short counts[MAX_PEBS_EVENTS] = {};
- short error[MAX_PEBS_EVENTS] = {};
- int bit, i;
-
- if (!x86_pmu.pebs_active)
- return;
-
- base = (struct pebs_record_nhm *)(unsigned long)ds->pebs_buffer_base;
- top = (struct pebs_record_nhm *)(unsigned long)ds->pebs_index;
-
- ds->pebs_index = ds->pebs_buffer_base;
-
- if (unlikely(base >= top))
- return;
-
- for (at = base; at < top; at += x86_pmu.pebs_record_size) {
- struct pebs_record_nhm *p = at;
- u64 pebs_status;
-
- /* PEBS v3 has accurate status bits */
- if (x86_pmu.intel_cap.pebs_format >= 3) {
- for_each_set_bit(bit, (unsigned long *)&p->status,
- MAX_PEBS_EVENTS)
- counts[bit]++;
-
- continue;
- }
-
- pebs_status = p->status & cpuc->pebs_enabled;
- pebs_status &= (1ULL << x86_pmu.max_pebs_events) - 1;
-
- /*
- * On some CPUs the PEBS status can be zero when PEBS is
- * racing with clearing of GLOBAL_STATUS.
- *
- * Normally we would drop that record, but in the
- * case when there is only a single active PEBS event
- * we can assume it's for that event.
- */
- if (!pebs_status && cpuc->pebs_enabled &&
- !(cpuc->pebs_enabled & (cpuc->pebs_enabled-1)))
- pebs_status = cpuc->pebs_enabled;
-
- bit = find_first_bit((unsigned long *)&pebs_status,
- x86_pmu.max_pebs_events);
- if (bit >= x86_pmu.max_pebs_events)
- continue;
-
- /*
- * The PEBS hardware does not deal well with the situation
- * when events happen near to each other and multiple bits
- * are set. But it should happen rarely.
- *
- * If these events include one PEBS and multiple non-PEBS
- * events, it doesn't impact PEBS record. The record will
- * be handled normally. (slow path)
- *
- * If these events include two or more PEBS events, the
- * records for the events can be collapsed into a single
- * one, and it's not possible to reconstruct all events
- * that caused the PEBS record. It's called collision.
- * If collision happened, the record will be dropped.
- */
- if (p->status != (1ULL << bit)) {
- for_each_set_bit(i, (unsigned long *)&pebs_status,
- x86_pmu.max_pebs_events)
- error[i]++;
- continue;
- }
-
- counts[bit]++;
- }
-
- for (bit = 0; bit < x86_pmu.max_pebs_events; bit++) {
- if ((counts[bit] == 0) && (error[bit] == 0))
- continue;
-
- event = cpuc->events[bit];
- WARN_ON_ONCE(!event);
- WARN_ON_ONCE(!event->attr.precise_ip);
-
- /* log dropped samples number */
- if (error[bit])
- perf_log_lost_samples(event, error[bit]);
-
- if (counts[bit]) {
- __intel_pmu_pebs_event(event, iregs, base,
- top, bit, counts[bit]);
- }
- }
-}
-
-/*
- * BTS, PEBS probe and setup
- */
-
-void __init intel_ds_init(void)
-{
- /*
- * No support for 32bit formats
- */
- if (!boot_cpu_has(X86_FEATURE_DTES64))
- return;
-
- x86_pmu.bts = boot_cpu_has(X86_FEATURE_BTS);
- x86_pmu.pebs = boot_cpu_has(X86_FEATURE_PEBS);
- x86_pmu.pebs_buffer_size = PEBS_BUFFER_SIZE;
- if (x86_pmu.pebs) {
- char pebs_type = x86_pmu.intel_cap.pebs_trap ? '+' : '-';
- int format = x86_pmu.intel_cap.pebs_format;
-
- switch (format) {
- case 0:
- printk(KERN_CONT "PEBS fmt0%c, ", pebs_type);
- x86_pmu.pebs_record_size = sizeof(struct pebs_record_core);
- /*
- * Using >PAGE_SIZE buffers makes the WRMSR to
- * PERF_GLOBAL_CTRL in intel_pmu_enable_all()
- * mysteriously hang on Core2.
- *
- * As a workaround, we don't do this.
- */
- x86_pmu.pebs_buffer_size = PAGE_SIZE;
- x86_pmu.drain_pebs = intel_pmu_drain_pebs_core;
- break;
-
- case 1:
- printk(KERN_CONT "PEBS fmt1%c, ", pebs_type);
- x86_pmu.pebs_record_size = sizeof(struct pebs_record_nhm);
- x86_pmu.drain_pebs = intel_pmu_drain_pebs_nhm;
- break;
-
- case 2:
- pr_cont("PEBS fmt2%c, ", pebs_type);
- x86_pmu.pebs_record_size = sizeof(struct pebs_record_hsw);
- x86_pmu.drain_pebs = intel_pmu_drain_pebs_nhm;
- break;
-
- case 3:
- pr_cont("PEBS fmt3%c, ", pebs_type);
- x86_pmu.pebs_record_size =
- sizeof(struct pebs_record_skl);
- x86_pmu.drain_pebs = intel_pmu_drain_pebs_nhm;
- x86_pmu.free_running_flags |= PERF_SAMPLE_TIME;
- break;
-
- default:
- printk(KERN_CONT "no PEBS fmt%d%c, ", format, pebs_type);
- x86_pmu.pebs = 0;
- }
- }
-}
-
-void perf_restore_debug_store(void)
-{
- struct debug_store *ds = __this_cpu_read(cpu_hw_events.ds);
-
- if (!x86_pmu.bts && !x86_pmu.pebs)
- return;
-
- wrmsrl(MSR_IA32_DS_AREA, (unsigned long)ds);
-}
diff --git a/arch/x86/kernel/cpu/perf_event_intel_lbr.c b/arch/x86/kernel/cpu/perf_event_intel_lbr.c
deleted file mode 100644
index 653f88d25..000000000
--- a/arch/x86/kernel/cpu/perf_event_intel_lbr.c
+++ /dev/null
@@ -1,1062 +0,0 @@
-#include <linux/perf_event.h>
-#include <linux/types.h>
-
-#include <asm/perf_event.h>
-#include <asm/msr.h>
-#include <asm/insn.h>
-
-#include "perf_event.h"
-
-enum {
- LBR_FORMAT_32 = 0x00,
- LBR_FORMAT_LIP = 0x01,
- LBR_FORMAT_EIP = 0x02,
- LBR_FORMAT_EIP_FLAGS = 0x03,
- LBR_FORMAT_EIP_FLAGS2 = 0x04,
- LBR_FORMAT_INFO = 0x05,
- LBR_FORMAT_MAX_KNOWN = LBR_FORMAT_INFO,
-};
-
-static enum {
- LBR_EIP_FLAGS = 1,
- LBR_TSX = 2,
-} lbr_desc[LBR_FORMAT_MAX_KNOWN + 1] = {
- [LBR_FORMAT_EIP_FLAGS] = LBR_EIP_FLAGS,
- [LBR_FORMAT_EIP_FLAGS2] = LBR_EIP_FLAGS | LBR_TSX,
-};
-
-/*
- * Intel LBR_SELECT bits
- * Intel Vol3a, April 2011, Section 16.7 Table 16-10
- *
- * Hardware branch filter (not available on all CPUs)
- */
-#define LBR_KERNEL_BIT 0 /* do not capture at ring0 */
-#define LBR_USER_BIT 1 /* do not capture at ring > 0 */
-#define LBR_JCC_BIT 2 /* do not capture conditional branches */
-#define LBR_REL_CALL_BIT 3 /* do not capture relative calls */
-#define LBR_IND_CALL_BIT 4 /* do not capture indirect calls */
-#define LBR_RETURN_BIT 5 /* do not capture near returns */
-#define LBR_IND_JMP_BIT 6 /* do not capture indirect jumps */
-#define LBR_REL_JMP_BIT 7 /* do not capture relative jumps */
-#define LBR_FAR_BIT 8 /* do not capture far branches */
-#define LBR_CALL_STACK_BIT 9 /* enable call stack */
-
-/*
- * Following bit only exists in Linux; we mask it out before writing it to
- * the actual MSR. But it helps the constraint perf code to understand
- * that this is a separate configuration.
- */
-#define LBR_NO_INFO_BIT 63 /* don't read LBR_INFO. */
-
-#define LBR_KERNEL (1 << LBR_KERNEL_BIT)
-#define LBR_USER (1 << LBR_USER_BIT)
-#define LBR_JCC (1 << LBR_JCC_BIT)
-#define LBR_REL_CALL (1 << LBR_REL_CALL_BIT)
-#define LBR_IND_CALL (1 << LBR_IND_CALL_BIT)
-#define LBR_RETURN (1 << LBR_RETURN_BIT)
-#define LBR_REL_JMP (1 << LBR_REL_JMP_BIT)
-#define LBR_IND_JMP (1 << LBR_IND_JMP_BIT)
-#define LBR_FAR (1 << LBR_FAR_BIT)
-#define LBR_CALL_STACK (1 << LBR_CALL_STACK_BIT)
-#define LBR_NO_INFO (1ULL << LBR_NO_INFO_BIT)
-
-#define LBR_PLM (LBR_KERNEL | LBR_USER)
-
-#define LBR_SEL_MASK 0x1ff /* valid bits in LBR_SELECT */
-#define LBR_NOT_SUPP -1 /* LBR filter not supported */
-#define LBR_IGN 0 /* ignored */
-
-#define LBR_ANY \
- (LBR_JCC |\
- LBR_REL_CALL |\
- LBR_IND_CALL |\
- LBR_RETURN |\
- LBR_REL_JMP |\
- LBR_IND_JMP |\
- LBR_FAR)
-
-#define LBR_FROM_FLAG_MISPRED (1ULL << 63)
-#define LBR_FROM_FLAG_IN_TX (1ULL << 62)
-#define LBR_FROM_FLAG_ABORT (1ULL << 61)
-
-/*
- * x86control flow change classification
- * x86control flow changes include branches, interrupts, traps, faults
- */
-enum {
- X86_BR_NONE = 0, /* unknown */
-
- X86_BR_USER = 1 << 0, /* branch target is user */
- X86_BR_KERNEL = 1 << 1, /* branch target is kernel */
-
- X86_BR_CALL = 1 << 2, /* call */
- X86_BR_RET = 1 << 3, /* return */
- X86_BR_SYSCALL = 1 << 4, /* syscall */
- X86_BR_SYSRET = 1 << 5, /* syscall return */
- X86_BR_INT = 1 << 6, /* sw interrupt */
- X86_BR_IRET = 1 << 7, /* return from interrupt */
- X86_BR_JCC = 1 << 8, /* conditional */
- X86_BR_JMP = 1 << 9, /* jump */
- X86_BR_IRQ = 1 << 10,/* hw interrupt or trap or fault */
- X86_BR_IND_CALL = 1 << 11,/* indirect calls */
- X86_BR_ABORT = 1 << 12,/* transaction abort */
- X86_BR_IN_TX = 1 << 13,/* in transaction */
- X86_BR_NO_TX = 1 << 14,/* not in transaction */
- X86_BR_ZERO_CALL = 1 << 15,/* zero length call */
- X86_BR_CALL_STACK = 1 << 16,/* call stack */
- X86_BR_IND_JMP = 1 << 17,/* indirect jump */
-};
-
-#define X86_BR_PLM (X86_BR_USER | X86_BR_KERNEL)
-#define X86_BR_ANYTX (X86_BR_NO_TX | X86_BR_IN_TX)
-
-#define X86_BR_ANY \
- (X86_BR_CALL |\
- X86_BR_RET |\
- X86_BR_SYSCALL |\
- X86_BR_SYSRET |\
- X86_BR_INT |\
- X86_BR_IRET |\
- X86_BR_JCC |\
- X86_BR_JMP |\
- X86_BR_IRQ |\
- X86_BR_ABORT |\
- X86_BR_IND_CALL |\
- X86_BR_IND_JMP |\
- X86_BR_ZERO_CALL)
-
-#define X86_BR_ALL (X86_BR_PLM | X86_BR_ANY)
-
-#define X86_BR_ANY_CALL \
- (X86_BR_CALL |\
- X86_BR_IND_CALL |\
- X86_BR_ZERO_CALL |\
- X86_BR_SYSCALL |\
- X86_BR_IRQ |\
- X86_BR_INT)
-
-static void intel_pmu_lbr_filter(struct cpu_hw_events *cpuc);
-
-/*
- * We only support LBR implementations that have FREEZE_LBRS_ON_PMI
- * otherwise it becomes near impossible to get a reliable stack.
- */
-
-static void __intel_pmu_lbr_enable(bool pmi)
-{
- struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
- u64 debugctl, lbr_select = 0, orig_debugctl;
-
- /*
- * No need to unfreeze manually, as v4 can do that as part
- * of the GLOBAL_STATUS ack.
- */
- if (pmi && x86_pmu.version >= 4)
- return;
-
- /*
- * No need to reprogram LBR_SELECT in a PMI, as it
- * did not change.
- */
- if (cpuc->lbr_sel)
- lbr_select = cpuc->lbr_sel->config & x86_pmu.lbr_sel_mask;
- if (!pmi && cpuc->lbr_sel)
- wrmsrl(MSR_LBR_SELECT, lbr_select);
-
- rdmsrl(MSR_IA32_DEBUGCTLMSR, debugctl);
- orig_debugctl = debugctl;
- debugctl |= DEBUGCTLMSR_LBR;
- /*
- * LBR callstack does not work well with FREEZE_LBRS_ON_PMI.
- * If FREEZE_LBRS_ON_PMI is set, PMI near call/return instructions
- * may cause superfluous increase/decrease of LBR_TOS.
- */
- if (!(lbr_select & LBR_CALL_STACK))
- debugctl |= DEBUGCTLMSR_FREEZE_LBRS_ON_PMI;
- if (orig_debugctl != debugctl)
- wrmsrl(MSR_IA32_DEBUGCTLMSR, debugctl);
-}
-
-static void __intel_pmu_lbr_disable(void)
-{
- u64 debugctl;
-
- rdmsrl(MSR_IA32_DEBUGCTLMSR, debugctl);
- debugctl &= ~(DEBUGCTLMSR_LBR | DEBUGCTLMSR_FREEZE_LBRS_ON_PMI);
- wrmsrl(MSR_IA32_DEBUGCTLMSR, debugctl);
-}
-
-static void intel_pmu_lbr_reset_32(void)
-{
- int i;
-
- for (i = 0; i < x86_pmu.lbr_nr; i++)
- wrmsrl(x86_pmu.lbr_from + i, 0);
-}
-
-static void intel_pmu_lbr_reset_64(void)
-{
- int i;
-
- for (i = 0; i < x86_pmu.lbr_nr; i++) {
- wrmsrl(x86_pmu.lbr_from + i, 0);
- wrmsrl(x86_pmu.lbr_to + i, 0);
- if (x86_pmu.intel_cap.lbr_format == LBR_FORMAT_INFO)
- wrmsrl(MSR_LBR_INFO_0 + i, 0);
- }
-}
-
-void intel_pmu_lbr_reset(void)
-{
- if (!x86_pmu.lbr_nr)
- return;
-
- if (x86_pmu.intel_cap.lbr_format == LBR_FORMAT_32)
- intel_pmu_lbr_reset_32();
- else
- intel_pmu_lbr_reset_64();
-}
-
-/*
- * TOS = most recently recorded branch
- */
-static inline u64 intel_pmu_lbr_tos(void)
-{
- u64 tos;
-
- rdmsrl(x86_pmu.lbr_tos, tos);
- return tos;
-}
-
-enum {
- LBR_NONE,
- LBR_VALID,
-};
-
-static void __intel_pmu_lbr_restore(struct x86_perf_task_context *task_ctx)
-{
- int i;
- unsigned lbr_idx, mask;
- u64 tos;
-
- if (task_ctx->lbr_callstack_users == 0 ||
- task_ctx->lbr_stack_state == LBR_NONE) {
- intel_pmu_lbr_reset();
- return;
- }
-
- mask = x86_pmu.lbr_nr - 1;
- tos = task_ctx->tos;
- for (i = 0; i < tos; i++) {
- lbr_idx = (tos - i) & mask;
- wrmsrl(x86_pmu.lbr_from + lbr_idx, task_ctx->lbr_from[i]);
- wrmsrl(x86_pmu.lbr_to + lbr_idx, task_ctx->lbr_to[i]);
- if (x86_pmu.intel_cap.lbr_format == LBR_FORMAT_INFO)
- wrmsrl(MSR_LBR_INFO_0 + lbr_idx, task_ctx->lbr_info[i]);
- }
- wrmsrl(x86_pmu.lbr_tos, tos);
- task_ctx->lbr_stack_state = LBR_NONE;
-}
-
-static void __intel_pmu_lbr_save(struct x86_perf_task_context *task_ctx)
-{
- int i;
- unsigned lbr_idx, mask;
- u64 tos;
-
- if (task_ctx->lbr_callstack_users == 0) {
- task_ctx->lbr_stack_state = LBR_NONE;
- return;
- }
-
- mask = x86_pmu.lbr_nr - 1;
- tos = intel_pmu_lbr_tos();
- for (i = 0; i < tos; i++) {
- lbr_idx = (tos - i) & mask;
- rdmsrl(x86_pmu.lbr_from + lbr_idx, task_ctx->lbr_from[i]);
- rdmsrl(x86_pmu.lbr_to + lbr_idx, task_ctx->lbr_to[i]);
- if (x86_pmu.intel_cap.lbr_format == LBR_FORMAT_INFO)
- rdmsrl(MSR_LBR_INFO_0 + lbr_idx, task_ctx->lbr_info[i]);
- }
- task_ctx->tos = tos;
- task_ctx->lbr_stack_state = LBR_VALID;
-}
-
-void intel_pmu_lbr_sched_task(struct perf_event_context *ctx, bool sched_in)
-{
- struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
- struct x86_perf_task_context *task_ctx;
-
- /*
- * If LBR callstack feature is enabled and the stack was saved when
- * the task was scheduled out, restore the stack. Otherwise flush
- * the LBR stack.
- */
- task_ctx = ctx ? ctx->task_ctx_data : NULL;
- if (task_ctx) {
- if (sched_in) {
- __intel_pmu_lbr_restore(task_ctx);
- cpuc->lbr_context = ctx;
- } else {
- __intel_pmu_lbr_save(task_ctx);
- }
- return;
- }
-
- /*
- * When sampling the branck stack in system-wide, it may be
- * necessary to flush the stack on context switch. This happens
- * when the branch stack does not tag its entries with the pid
- * of the current task. Otherwise it becomes impossible to
- * associate a branch entry with a task. This ambiguity is more
- * likely to appear when the branch stack supports priv level
- * filtering and the user sets it to monitor only at the user
- * level (which could be a useful measurement in system-wide
- * mode). In that case, the risk is high of having a branch
- * stack with branch from multiple tasks.
- */
- if (sched_in) {
- intel_pmu_lbr_reset();
- cpuc->lbr_context = ctx;
- }
-}
-
-static inline bool branch_user_callstack(unsigned br_sel)
-{
- return (br_sel & X86_BR_USER) && (br_sel & X86_BR_CALL_STACK);
-}
-
-void intel_pmu_lbr_enable(struct perf_event *event)
-{
- struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
- struct x86_perf_task_context *task_ctx;
-
- if (!x86_pmu.lbr_nr)
- return;
-
- /*
- * Reset the LBR stack if we changed task context to
- * avoid data leaks.
- */
- if (event->ctx->task && cpuc->lbr_context != event->ctx) {
- intel_pmu_lbr_reset();
- cpuc->lbr_context = event->ctx;
- }
- cpuc->br_sel = event->hw.branch_reg.reg;
-
- if (branch_user_callstack(cpuc->br_sel) && event->ctx &&
- event->ctx->task_ctx_data) {
- task_ctx = event->ctx->task_ctx_data;
- task_ctx->lbr_callstack_users++;
- }
-
- cpuc->lbr_users++;
- perf_sched_cb_inc(event->ctx->pmu);
-}
-
-void intel_pmu_lbr_disable(struct perf_event *event)
-{
- struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
- struct x86_perf_task_context *task_ctx;
-
- if (!x86_pmu.lbr_nr)
- return;
-
- if (branch_user_callstack(cpuc->br_sel) && event->ctx &&
- event->ctx->task_ctx_data) {
- task_ctx = event->ctx->task_ctx_data;
- task_ctx->lbr_callstack_users--;
- }
-
- cpuc->lbr_users--;
- WARN_ON_ONCE(cpuc->lbr_users < 0);
- perf_sched_cb_dec(event->ctx->pmu);
-
- if (cpuc->enabled && !cpuc->lbr_users) {
- __intel_pmu_lbr_disable();
- /* avoid stale pointer */
- cpuc->lbr_context = NULL;
- }
-}
-
-void intel_pmu_lbr_enable_all(bool pmi)
-{
- struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
-
- if (cpuc->lbr_users)
- __intel_pmu_lbr_enable(pmi);
-}
-
-void intel_pmu_lbr_disable_all(void)
-{
- struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
-
- if (cpuc->lbr_users)
- __intel_pmu_lbr_disable();
-}
-
-static void intel_pmu_lbr_read_32(struct cpu_hw_events *cpuc)
-{
- unsigned long mask = x86_pmu.lbr_nr - 1;
- u64 tos = intel_pmu_lbr_tos();
- int i;
-
- for (i = 0; i < x86_pmu.lbr_nr; i++) {
- unsigned long lbr_idx = (tos - i) & mask;
- union {
- struct {
- u32 from;
- u32 to;
- };
- u64 lbr;
- } msr_lastbranch;
-
- rdmsrl(x86_pmu.lbr_from + lbr_idx, msr_lastbranch.lbr);
-
- cpuc->lbr_entries[i].from = msr_lastbranch.from;
- cpuc->lbr_entries[i].to = msr_lastbranch.to;
- cpuc->lbr_entries[i].mispred = 0;
- cpuc->lbr_entries[i].predicted = 0;
- cpuc->lbr_entries[i].reserved = 0;
- }
- cpuc->lbr_stack.nr = i;
-}
-
-/*
- * Due to lack of segmentation in Linux the effective address (offset)
- * is the same as the linear address, allowing us to merge the LIP and EIP
- * LBR formats.
- */
-static void intel_pmu_lbr_read_64(struct cpu_hw_events *cpuc)
-{
- bool need_info = false;
- unsigned long mask = x86_pmu.lbr_nr - 1;
- int lbr_format = x86_pmu.intel_cap.lbr_format;
- u64 tos = intel_pmu_lbr_tos();
- int i;
- int out = 0;
- int num = x86_pmu.lbr_nr;
-
- if (cpuc->lbr_sel) {
- need_info = !(cpuc->lbr_sel->config & LBR_NO_INFO);
- if (cpuc->lbr_sel->config & LBR_CALL_STACK)
- num = tos;
- }
-
- for (i = 0; i < num; i++) {
- unsigned long lbr_idx = (tos - i) & mask;
- u64 from, to, mis = 0, pred = 0, in_tx = 0, abort = 0;
- int skip = 0;
- u16 cycles = 0;
- int lbr_flags = lbr_desc[lbr_format];
-
- rdmsrl(x86_pmu.lbr_from + lbr_idx, from);
- rdmsrl(x86_pmu.lbr_to + lbr_idx, to);
-
- if (lbr_format == LBR_FORMAT_INFO && need_info) {
- u64 info;
-
- rdmsrl(MSR_LBR_INFO_0 + lbr_idx, info);
- mis = !!(info & LBR_INFO_MISPRED);
- pred = !mis;
- in_tx = !!(info & LBR_INFO_IN_TX);
- abort = !!(info & LBR_INFO_ABORT);
- cycles = (info & LBR_INFO_CYCLES);
- }
- if (lbr_flags & LBR_EIP_FLAGS) {
- mis = !!(from & LBR_FROM_FLAG_MISPRED);
- pred = !mis;
- skip = 1;
- }
- if (lbr_flags & LBR_TSX) {
- in_tx = !!(from & LBR_FROM_FLAG_IN_TX);
- abort = !!(from & LBR_FROM_FLAG_ABORT);
- skip = 3;
- }
- from = (u64)((((s64)from) << skip) >> skip);
-
- /*
- * Some CPUs report duplicated abort records,
- * with the second entry not having an abort bit set.
- * Skip them here. This loop runs backwards,
- * so we need to undo the previous record.
- * If the abort just happened outside the window
- * the extra entry cannot be removed.
- */
- if (abort && x86_pmu.lbr_double_abort && out > 0)
- out--;
-
- cpuc->lbr_entries[out].from = from;
- cpuc->lbr_entries[out].to = to;
- cpuc->lbr_entries[out].mispred = mis;
- cpuc->lbr_entries[out].predicted = pred;
- cpuc->lbr_entries[out].in_tx = in_tx;
- cpuc->lbr_entries[out].abort = abort;
- cpuc->lbr_entries[out].cycles = cycles;
- cpuc->lbr_entries[out].reserved = 0;
- out++;
- }
- cpuc->lbr_stack.nr = out;
-}
-
-void intel_pmu_lbr_read(void)
-{
- struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
-
- if (!cpuc->lbr_users)
- return;
-
- if (x86_pmu.intel_cap.lbr_format == LBR_FORMAT_32)
- intel_pmu_lbr_read_32(cpuc);
- else
- intel_pmu_lbr_read_64(cpuc);
-
- intel_pmu_lbr_filter(cpuc);
-}
-
-/*
- * SW filter is used:
- * - in case there is no HW filter
- * - in case the HW filter has errata or limitations
- */
-static int intel_pmu_setup_sw_lbr_filter(struct perf_event *event)
-{
- u64 br_type = event->attr.branch_sample_type;
- int mask = 0;
-
- if (br_type & PERF_SAMPLE_BRANCH_USER)
- mask |= X86_BR_USER;
-
- if (br_type & PERF_SAMPLE_BRANCH_KERNEL)
- mask |= X86_BR_KERNEL;
-
- /* we ignore BRANCH_HV here */
-
- if (br_type & PERF_SAMPLE_BRANCH_ANY)
- mask |= X86_BR_ANY;
-
- if (br_type & PERF_SAMPLE_BRANCH_ANY_CALL)
- mask |= X86_BR_ANY_CALL;
-
- if (br_type & PERF_SAMPLE_BRANCH_ANY_RETURN)
- mask |= X86_BR_RET | X86_BR_IRET | X86_BR_SYSRET;
-
- if (br_type & PERF_SAMPLE_BRANCH_IND_CALL)
- mask |= X86_BR_IND_CALL;
-
- if (br_type & PERF_SAMPLE_BRANCH_ABORT_TX)
- mask |= X86_BR_ABORT;
-
- if (br_type & PERF_SAMPLE_BRANCH_IN_TX)
- mask |= X86_BR_IN_TX;
-
- if (br_type & PERF_SAMPLE_BRANCH_NO_TX)
- mask |= X86_BR_NO_TX;
-
- if (br_type & PERF_SAMPLE_BRANCH_COND)
- mask |= X86_BR_JCC;
-
- if (br_type & PERF_SAMPLE_BRANCH_CALL_STACK) {
- if (!x86_pmu_has_lbr_callstack())
- return -EOPNOTSUPP;
- if (mask & ~(X86_BR_USER | X86_BR_KERNEL))
- return -EINVAL;
- mask |= X86_BR_CALL | X86_BR_IND_CALL | X86_BR_RET |
- X86_BR_CALL_STACK;
- }
-
- if (br_type & PERF_SAMPLE_BRANCH_IND_JUMP)
- mask |= X86_BR_IND_JMP;
-
- if (br_type & PERF_SAMPLE_BRANCH_CALL)
- mask |= X86_BR_CALL | X86_BR_ZERO_CALL;
- /*
- * stash actual user request into reg, it may
- * be used by fixup code for some CPU
- */
- event->hw.branch_reg.reg = mask;
- return 0;
-}
-
-/*
- * setup the HW LBR filter
- * Used only when available, may not be enough to disambiguate
- * all branches, may need the help of the SW filter
- */
-static int intel_pmu_setup_hw_lbr_filter(struct perf_event *event)
-{
- struct hw_perf_event_extra *reg;
- u64 br_type = event->attr.branch_sample_type;
- u64 mask = 0, v;
- int i;
-
- for (i = 0; i < PERF_SAMPLE_BRANCH_MAX_SHIFT; i++) {
- if (!(br_type & (1ULL << i)))
- continue;
-
- v = x86_pmu.lbr_sel_map[i];
- if (v == LBR_NOT_SUPP)
- return -EOPNOTSUPP;
-
- if (v != LBR_IGN)
- mask |= v;
- }
-
- reg = &event->hw.branch_reg;
- reg->idx = EXTRA_REG_LBR;
-
- /*
- * The first 9 bits (LBR_SEL_MASK) in LBR_SELECT operate
- * in suppress mode. So LBR_SELECT should be set to
- * (~mask & LBR_SEL_MASK) | (mask & ~LBR_SEL_MASK)
- */
- reg->config = mask ^ x86_pmu.lbr_sel_mask;
-
- if ((br_type & PERF_SAMPLE_BRANCH_NO_CYCLES) &&
- (br_type & PERF_SAMPLE_BRANCH_NO_FLAGS) &&
- (x86_pmu.intel_cap.lbr_format == LBR_FORMAT_INFO))
- reg->config |= LBR_NO_INFO;
-
- return 0;
-}
-
-int intel_pmu_setup_lbr_filter(struct perf_event *event)
-{
- int ret = 0;
-
- /*
- * no LBR on this PMU
- */
- if (!x86_pmu.lbr_nr)
- return -EOPNOTSUPP;
-
- /*
- * setup SW LBR filter
- */
- ret = intel_pmu_setup_sw_lbr_filter(event);
- if (ret)
- return ret;
-
- /*
- * setup HW LBR filter, if any
- */
- if (x86_pmu.lbr_sel_map)
- ret = intel_pmu_setup_hw_lbr_filter(event);
-
- return ret;
-}
-
-/*
- * return the type of control flow change at address "from"
- * intruction is not necessarily a branch (in case of interrupt).
- *
- * The branch type returned also includes the priv level of the
- * target of the control flow change (X86_BR_USER, X86_BR_KERNEL).
- *
- * If a branch type is unknown OR the instruction cannot be
- * decoded (e.g., text page not present), then X86_BR_NONE is
- * returned.
- */
-static int branch_type(unsigned long from, unsigned long to, int abort)
-{
- struct insn insn;
- void *addr;
- int bytes_read, bytes_left;
- int ret = X86_BR_NONE;
- int ext, to_plm, from_plm;
- u8 buf[MAX_INSN_SIZE];
- int is64 = 0;
-
- to_plm = kernel_ip(to) ? X86_BR_KERNEL : X86_BR_USER;
- from_plm = kernel_ip(from) ? X86_BR_KERNEL : X86_BR_USER;
-
- /*
- * maybe zero if lbr did not fill up after a reset by the time
- * we get a PMU interrupt
- */
- if (from == 0 || to == 0)
- return X86_BR_NONE;
-
- if (abort)
- return X86_BR_ABORT | to_plm;
-
- if (from_plm == X86_BR_USER) {
- /*
- * can happen if measuring at the user level only
- * and we interrupt in a kernel thread, e.g., idle.
- */
- if (!current->mm)
- return X86_BR_NONE;
-
- /* may fail if text not present */
- bytes_left = copy_from_user_nmi(buf, (void __user *)from,
- MAX_INSN_SIZE);
- bytes_read = MAX_INSN_SIZE - bytes_left;
- if (!bytes_read)
- return X86_BR_NONE;
-
- addr = buf;
- } else {
- /*
- * The LBR logs any address in the IP, even if the IP just
- * faulted. This means userspace can control the from address.
- * Ensure we don't blindy read any address by validating it is
- * a known text address.
- */
- if (kernel_text_address(from)) {
- addr = (void *)from;
- /*
- * Assume we can get the maximum possible size
- * when grabbing kernel data. This is not
- * _strictly_ true since we could possibly be
- * executing up next to a memory hole, but
- * it is very unlikely to be a problem.
- */
- bytes_read = MAX_INSN_SIZE;
- } else {
- return X86_BR_NONE;
- }
- }
-
- /*
- * decoder needs to know the ABI especially
- * on 64-bit systems running 32-bit apps
- */
-#ifdef CONFIG_X86_64
- is64 = kernel_ip((unsigned long)addr) || !test_thread_flag(TIF_IA32);
-#endif
- insn_init(&insn, addr, bytes_read, is64);
- insn_get_opcode(&insn);
- if (!insn.opcode.got)
- return X86_BR_ABORT;
-
- switch (insn.opcode.bytes[0]) {
- case 0xf:
- switch (insn.opcode.bytes[1]) {
- case 0x05: /* syscall */
- case 0x34: /* sysenter */
- ret = X86_BR_SYSCALL;
- break;
- case 0x07: /* sysret */
- case 0x35: /* sysexit */
- ret = X86_BR_SYSRET;
- break;
- case 0x80 ... 0x8f: /* conditional */
- ret = X86_BR_JCC;
- break;
- default:
- ret = X86_BR_NONE;
- }
- break;
- case 0x70 ... 0x7f: /* conditional */
- ret = X86_BR_JCC;
- break;
- case 0xc2: /* near ret */
- case 0xc3: /* near ret */
- case 0xca: /* far ret */
- case 0xcb: /* far ret */
- ret = X86_BR_RET;
- break;
- case 0xcf: /* iret */
- ret = X86_BR_IRET;
- break;
- case 0xcc ... 0xce: /* int */
- ret = X86_BR_INT;
- break;
- case 0xe8: /* call near rel */
- insn_get_immediate(&insn);
- if (insn.immediate1.value == 0) {
- /* zero length call */
- ret = X86_BR_ZERO_CALL;
- break;
- }
- case 0x9a: /* call far absolute */
- ret = X86_BR_CALL;
- break;
- case 0xe0 ... 0xe3: /* loop jmp */
- ret = X86_BR_JCC;
- break;
- case 0xe9 ... 0xeb: /* jmp */
- ret = X86_BR_JMP;
- break;
- case 0xff: /* call near absolute, call far absolute ind */
- insn_get_modrm(&insn);
- ext = (insn.modrm.bytes[0] >> 3) & 0x7;
- switch (ext) {
- case 2: /* near ind call */
- case 3: /* far ind call */
- ret = X86_BR_IND_CALL;
- break;
- case 4:
- case 5:
- ret = X86_BR_IND_JMP;
- break;
- }
- break;
- default:
- ret = X86_BR_NONE;
- }
- /*
- * interrupts, traps, faults (and thus ring transition) may
- * occur on any instructions. Thus, to classify them correctly,
- * we need to first look at the from and to priv levels. If they
- * are different and to is in the kernel, then it indicates
- * a ring transition. If the from instruction is not a ring
- * transition instr (syscall, systenter, int), then it means
- * it was a irq, trap or fault.
- *
- * we have no way of detecting kernel to kernel faults.
- */
- if (from_plm == X86_BR_USER && to_plm == X86_BR_KERNEL
- && ret != X86_BR_SYSCALL && ret != X86_BR_INT)
- ret = X86_BR_IRQ;
-
- /*
- * branch priv level determined by target as
- * is done by HW when LBR_SELECT is implemented
- */
- if (ret != X86_BR_NONE)
- ret |= to_plm;
-
- return ret;
-}
-
-/*
- * implement actual branch filter based on user demand.
- * Hardware may not exactly satisfy that request, thus
- * we need to inspect opcodes. Mismatched branches are
- * discarded. Therefore, the number of branches returned
- * in PERF_SAMPLE_BRANCH_STACK sample may vary.
- */
-static void
-intel_pmu_lbr_filter(struct cpu_hw_events *cpuc)
-{
- u64 from, to;
- int br_sel = cpuc->br_sel;
- int i, j, type;
- bool compress = false;
-
- /* if sampling all branches, then nothing to filter */
- if ((br_sel & X86_BR_ALL) == X86_BR_ALL)
- return;
-
- for (i = 0; i < cpuc->lbr_stack.nr; i++) {
-
- from = cpuc->lbr_entries[i].from;
- to = cpuc->lbr_entries[i].to;
-
- type = branch_type(from, to, cpuc->lbr_entries[i].abort);
- if (type != X86_BR_NONE && (br_sel & X86_BR_ANYTX)) {
- if (cpuc->lbr_entries[i].in_tx)
- type |= X86_BR_IN_TX;
- else
- type |= X86_BR_NO_TX;
- }
-
- /* if type does not correspond, then discard */
- if (type == X86_BR_NONE || (br_sel & type) != type) {
- cpuc->lbr_entries[i].from = 0;
- compress = true;
- }
- }
-
- if (!compress)
- return;
-
- /* remove all entries with from=0 */
- for (i = 0; i < cpuc->lbr_stack.nr; ) {
- if (!cpuc->lbr_entries[i].from) {
- j = i;
- while (++j < cpuc->lbr_stack.nr)
- cpuc->lbr_entries[j-1] = cpuc->lbr_entries[j];
- cpuc->lbr_stack.nr--;
- if (!cpuc->lbr_entries[i].from)
- continue;
- }
- i++;
- }
-}
-
-/*
- * Map interface branch filters onto LBR filters
- */
-static const int nhm_lbr_sel_map[PERF_SAMPLE_BRANCH_MAX_SHIFT] = {
- [PERF_SAMPLE_BRANCH_ANY_SHIFT] = LBR_ANY,
- [PERF_SAMPLE_BRANCH_USER_SHIFT] = LBR_USER,
- [PERF_SAMPLE_BRANCH_KERNEL_SHIFT] = LBR_KERNEL,
- [PERF_SAMPLE_BRANCH_HV_SHIFT] = LBR_IGN,
- [PERF_SAMPLE_BRANCH_ANY_RETURN_SHIFT] = LBR_RETURN | LBR_REL_JMP
- | LBR_IND_JMP | LBR_FAR,
- /*
- * NHM/WSM erratum: must include REL_JMP+IND_JMP to get CALL branches
- */
- [PERF_SAMPLE_BRANCH_ANY_CALL_SHIFT] =
- LBR_REL_CALL | LBR_IND_CALL | LBR_REL_JMP | LBR_IND_JMP | LBR_FAR,
- /*
- * NHM/WSM erratum: must include IND_JMP to capture IND_CALL
- */
- [PERF_SAMPLE_BRANCH_IND_CALL_SHIFT] = LBR_IND_CALL | LBR_IND_JMP,
- [PERF_SAMPLE_BRANCH_COND_SHIFT] = LBR_JCC,
- [PERF_SAMPLE_BRANCH_IND_JUMP_SHIFT] = LBR_IND_JMP,
-};
-
-static const int snb_lbr_sel_map[PERF_SAMPLE_BRANCH_MAX_SHIFT] = {
- [PERF_SAMPLE_BRANCH_ANY_SHIFT] = LBR_ANY,
- [PERF_SAMPLE_BRANCH_USER_SHIFT] = LBR_USER,
- [PERF_SAMPLE_BRANCH_KERNEL_SHIFT] = LBR_KERNEL,
- [PERF_SAMPLE_BRANCH_HV_SHIFT] = LBR_IGN,
- [PERF_SAMPLE_BRANCH_ANY_RETURN_SHIFT] = LBR_RETURN | LBR_FAR,
- [PERF_SAMPLE_BRANCH_ANY_CALL_SHIFT] = LBR_REL_CALL | LBR_IND_CALL
- | LBR_FAR,
- [PERF_SAMPLE_BRANCH_IND_CALL_SHIFT] = LBR_IND_CALL,
- [PERF_SAMPLE_BRANCH_COND_SHIFT] = LBR_JCC,
- [PERF_SAMPLE_BRANCH_IND_JUMP_SHIFT] = LBR_IND_JMP,
- [PERF_SAMPLE_BRANCH_CALL_SHIFT] = LBR_REL_CALL,
-};
-
-static const int hsw_lbr_sel_map[PERF_SAMPLE_BRANCH_MAX_SHIFT] = {
- [PERF_SAMPLE_BRANCH_ANY_SHIFT] = LBR_ANY,
- [PERF_SAMPLE_BRANCH_USER_SHIFT] = LBR_USER,
- [PERF_SAMPLE_BRANCH_KERNEL_SHIFT] = LBR_KERNEL,
- [PERF_SAMPLE_BRANCH_HV_SHIFT] = LBR_IGN,
- [PERF_SAMPLE_BRANCH_ANY_RETURN_SHIFT] = LBR_RETURN | LBR_FAR,
- [PERF_SAMPLE_BRANCH_ANY_CALL_SHIFT] = LBR_REL_CALL | LBR_IND_CALL
- | LBR_FAR,
- [PERF_SAMPLE_BRANCH_IND_CALL_SHIFT] = LBR_IND_CALL,
- [PERF_SAMPLE_BRANCH_COND_SHIFT] = LBR_JCC,
- [PERF_SAMPLE_BRANCH_CALL_STACK_SHIFT] = LBR_REL_CALL | LBR_IND_CALL
- | LBR_RETURN | LBR_CALL_STACK,
- [PERF_SAMPLE_BRANCH_IND_JUMP_SHIFT] = LBR_IND_JMP,
- [PERF_SAMPLE_BRANCH_CALL_SHIFT] = LBR_REL_CALL,
-};
-
-/* core */
-void __init intel_pmu_lbr_init_core(void)
-{
- x86_pmu.lbr_nr = 4;
- x86_pmu.lbr_tos = MSR_LBR_TOS;
- x86_pmu.lbr_from = MSR_LBR_CORE_FROM;
- x86_pmu.lbr_to = MSR_LBR_CORE_TO;
-
- /*
- * SW branch filter usage:
- * - compensate for lack of HW filter
- */
- pr_cont("4-deep LBR, ");
-}
-
-/* nehalem/westmere */
-void __init intel_pmu_lbr_init_nhm(void)
-{
- x86_pmu.lbr_nr = 16;
- x86_pmu.lbr_tos = MSR_LBR_TOS;
- x86_pmu.lbr_from = MSR_LBR_NHM_FROM;
- x86_pmu.lbr_to = MSR_LBR_NHM_TO;
-
- x86_pmu.lbr_sel_mask = LBR_SEL_MASK;
- x86_pmu.lbr_sel_map = nhm_lbr_sel_map;
-
- /*
- * SW branch filter usage:
- * - workaround LBR_SEL errata (see above)
- * - support syscall, sysret capture.
- * That requires LBR_FAR but that means far
- * jmp need to be filtered out
- */
- pr_cont("16-deep LBR, ");
-}
-
-/* sandy bridge */
-void __init intel_pmu_lbr_init_snb(void)
-{
- x86_pmu.lbr_nr = 16;
- x86_pmu.lbr_tos = MSR_LBR_TOS;
- x86_pmu.lbr_from = MSR_LBR_NHM_FROM;
- x86_pmu.lbr_to = MSR_LBR_NHM_TO;
-
- x86_pmu.lbr_sel_mask = LBR_SEL_MASK;
- x86_pmu.lbr_sel_map = snb_lbr_sel_map;
-
- /*
- * SW branch filter usage:
- * - support syscall, sysret capture.
- * That requires LBR_FAR but that means far
- * jmp need to be filtered out
- */
- pr_cont("16-deep LBR, ");
-}
-
-/* haswell */
-void intel_pmu_lbr_init_hsw(void)
-{
- x86_pmu.lbr_nr = 16;
- x86_pmu.lbr_tos = MSR_LBR_TOS;
- x86_pmu.lbr_from = MSR_LBR_NHM_FROM;
- x86_pmu.lbr_to = MSR_LBR_NHM_TO;
-
- x86_pmu.lbr_sel_mask = LBR_SEL_MASK;
- x86_pmu.lbr_sel_map = hsw_lbr_sel_map;
-
- pr_cont("16-deep LBR, ");
-}
-
-/* skylake */
-__init void intel_pmu_lbr_init_skl(void)
-{
- x86_pmu.lbr_nr = 32;
- x86_pmu.lbr_tos = MSR_LBR_TOS;
- x86_pmu.lbr_from = MSR_LBR_NHM_FROM;
- x86_pmu.lbr_to = MSR_LBR_NHM_TO;
-
- x86_pmu.lbr_sel_mask = LBR_SEL_MASK;
- x86_pmu.lbr_sel_map = hsw_lbr_sel_map;
-
- /*
- * SW branch filter usage:
- * - support syscall, sysret capture.
- * That requires LBR_FAR but that means far
- * jmp need to be filtered out
- */
- pr_cont("32-deep LBR, ");
-}
-
-/* atom */
-void __init intel_pmu_lbr_init_atom(void)
-{
- /*
- * only models starting at stepping 10 seems
- * to have an operational LBR which can freeze
- * on PMU interrupt
- */
- if (boot_cpu_data.x86_model == 28
- && boot_cpu_data.x86_mask < 10) {
- pr_cont("LBR disabled due to erratum");
- return;
- }
-
- x86_pmu.lbr_nr = 8;
- x86_pmu.lbr_tos = MSR_LBR_TOS;
- x86_pmu.lbr_from = MSR_LBR_CORE_FROM;
- x86_pmu.lbr_to = MSR_LBR_CORE_TO;
-
- /*
- * SW branch filter usage:
- * - compensate for lack of HW filter
- */
- pr_cont("8-deep LBR, ");
-}
-
-/* Knights Landing */
-void intel_pmu_lbr_init_knl(void)
-{
- x86_pmu.lbr_nr = 8;
- x86_pmu.lbr_tos = MSR_LBR_TOS;
- x86_pmu.lbr_from = MSR_LBR_NHM_FROM;
- x86_pmu.lbr_to = MSR_LBR_NHM_TO;
-
- x86_pmu.lbr_sel_mask = LBR_SEL_MASK;
- x86_pmu.lbr_sel_map = snb_lbr_sel_map;
-
- pr_cont("8-deep LBR, ");
-}
diff --git a/arch/x86/kernel/cpu/perf_event_intel_pt.c b/arch/x86/kernel/cpu/perf_event_intel_pt.c
deleted file mode 100644
index c0bbd1033..000000000
--- a/arch/x86/kernel/cpu/perf_event_intel_pt.c
+++ /dev/null
@@ -1,1188 +0,0 @@
-/*
- * Intel(R) Processor Trace PMU driver for perf
- * Copyright (c) 2013-2014, Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
- * more details.
- *
- * Intel PT is specified in the Intel Architecture Instruction Set Extensions
- * Programming Reference:
- * http://software.intel.com/en-us/intel-isa-extensions
- */
-
-#undef DEBUG
-
-#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
-
-#include <linux/types.h>
-#include <linux/slab.h>
-#include <linux/device.h>
-
-#include <asm/perf_event.h>
-#include <asm/insn.h>
-#include <asm/io.h>
-#include <asm/intel_pt.h>
-
-#include "perf_event.h"
-#include "intel_pt.h"
-
-static DEFINE_PER_CPU(struct pt, pt_ctx);
-
-static struct pt_pmu pt_pmu;
-
-enum cpuid_regs {
- CR_EAX = 0,
- CR_ECX,
- CR_EDX,
- CR_EBX
-};
-
-/*
- * Capabilities of Intel PT hardware, such as number of address bits or
- * supported output schemes, are cached and exported to userspace as "caps"
- * attribute group of pt pmu device
- * (/sys/bus/event_source/devices/intel_pt/caps/) so that userspace can store
- * relevant bits together with intel_pt traces.
- *
- * These are necessary for both trace decoding (payloads_lip, contains address
- * width encoded in IP-related packets), and event configuration (bitmasks with
- * permitted values for certain bit fields).
- */
-#define PT_CAP(_n, _l, _r, _m) \
- [PT_CAP_ ## _n] = { .name = __stringify(_n), .leaf = _l, \
- .reg = _r, .mask = _m }
-
-static struct pt_cap_desc {
- const char *name;
- u32 leaf;
- u8 reg;
- u32 mask;
-} pt_caps[] = {
- PT_CAP(max_subleaf, 0, CR_EAX, 0xffffffff),
- PT_CAP(cr3_filtering, 0, CR_EBX, BIT(0)),
- PT_CAP(psb_cyc, 0, CR_EBX, BIT(1)),
- PT_CAP(mtc, 0, CR_EBX, BIT(3)),
- PT_CAP(topa_output, 0, CR_ECX, BIT(0)),
- PT_CAP(topa_multiple_entries, 0, CR_ECX, BIT(1)),
- PT_CAP(single_range_output, 0, CR_ECX, BIT(2)),
- PT_CAP(payloads_lip, 0, CR_ECX, BIT(31)),
- PT_CAP(mtc_periods, 1, CR_EAX, 0xffff0000),
- PT_CAP(cycle_thresholds, 1, CR_EBX, 0xffff),
- PT_CAP(psb_periods, 1, CR_EBX, 0xffff0000),
-};
-
-static u32 pt_cap_get(enum pt_capabilities cap)
-{
- struct pt_cap_desc *cd = &pt_caps[cap];
- u32 c = pt_pmu.caps[cd->leaf * PT_CPUID_REGS_NUM + cd->reg];
- unsigned int shift = __ffs(cd->mask);
-
- return (c & cd->mask) >> shift;
-}
-
-static ssize_t pt_cap_show(struct device *cdev,
- struct device_attribute *attr,
- char *buf)
-{
- struct dev_ext_attribute *ea =
- container_of(attr, struct dev_ext_attribute, attr);
- enum pt_capabilities cap = (long)ea->var;
-
- return snprintf(buf, PAGE_SIZE, "%x\n", pt_cap_get(cap));
-}
-
-static struct attribute_group pt_cap_group = {
- .name = "caps",
-};
-
-PMU_FORMAT_ATTR(cyc, "config:1" );
-PMU_FORMAT_ATTR(mtc, "config:9" );
-PMU_FORMAT_ATTR(tsc, "config:10" );
-PMU_FORMAT_ATTR(noretcomp, "config:11" );
-PMU_FORMAT_ATTR(mtc_period, "config:14-17" );
-PMU_FORMAT_ATTR(cyc_thresh, "config:19-22" );
-PMU_FORMAT_ATTR(psb_period, "config:24-27" );
-
-static struct attribute *pt_formats_attr[] = {
- &format_attr_cyc.attr,
- &format_attr_mtc.attr,
- &format_attr_tsc.attr,
- &format_attr_noretcomp.attr,
- &format_attr_mtc_period.attr,
- &format_attr_cyc_thresh.attr,
- &format_attr_psb_period.attr,
- NULL,
-};
-
-static struct attribute_group pt_format_group = {
- .name = "format",
- .attrs = pt_formats_attr,
-};
-
-static const struct attribute_group *pt_attr_groups[] = {
- &pt_cap_group,
- &pt_format_group,
- NULL,
-};
-
-static int __init pt_pmu_hw_init(void)
-{
- struct dev_ext_attribute *de_attrs;
- struct attribute **attrs;
- size_t size;
- int ret;
- long i;
-
- attrs = NULL;
-
- for (i = 0; i < PT_CPUID_LEAVES; i++) {
- cpuid_count(20, i,
- &pt_pmu.caps[CR_EAX + i*PT_CPUID_REGS_NUM],
- &pt_pmu.caps[CR_EBX + i*PT_CPUID_REGS_NUM],
- &pt_pmu.caps[CR_ECX + i*PT_CPUID_REGS_NUM],
- &pt_pmu.caps[CR_EDX + i*PT_CPUID_REGS_NUM]);
- }
-
- ret = -ENOMEM;
- size = sizeof(struct attribute *) * (ARRAY_SIZE(pt_caps)+1);
- attrs = kzalloc(size, GFP_KERNEL);
- if (!attrs)
- goto fail;
-
- size = sizeof(struct dev_ext_attribute) * (ARRAY_SIZE(pt_caps)+1);
- de_attrs = kzalloc(size, GFP_KERNEL);
- if (!de_attrs)
- goto fail;
-
- for (i = 0; i < ARRAY_SIZE(pt_caps); i++) {
- struct dev_ext_attribute *de_attr = de_attrs + i;
-
- de_attr->attr.attr.name = pt_caps[i].name;
-
- sysfs_attr_init(&de_attr->attr.attr);
-
- de_attr->attr.attr.mode = S_IRUGO;
- de_attr->attr.show = pt_cap_show;
- de_attr->var = (void *)i;
-
- attrs[i] = &de_attr->attr.attr;
- }
-
- pt_cap_group.attrs = attrs;
-
- return 0;
-
-fail:
- kfree(attrs);
-
- return ret;
-}
-
-#define RTIT_CTL_CYC_PSB (RTIT_CTL_CYCLEACC | \
- RTIT_CTL_CYC_THRESH | \
- RTIT_CTL_PSB_FREQ)
-
-#define RTIT_CTL_MTC (RTIT_CTL_MTC_EN | \
- RTIT_CTL_MTC_RANGE)
-
-#define PT_CONFIG_MASK (RTIT_CTL_TSC_EN | \
- RTIT_CTL_DISRETC | \
- RTIT_CTL_CYC_PSB | \
- RTIT_CTL_MTC)
-
-static bool pt_event_valid(struct perf_event *event)
-{
- u64 config = event->attr.config;
- u64 allowed, requested;
-
- if ((config & PT_CONFIG_MASK) != config)
- return false;
-
- if (config & RTIT_CTL_CYC_PSB) {
- if (!pt_cap_get(PT_CAP_psb_cyc))
- return false;
-
- allowed = pt_cap_get(PT_CAP_psb_periods);
- requested = (config & RTIT_CTL_PSB_FREQ) >>
- RTIT_CTL_PSB_FREQ_OFFSET;
- if (requested && (!(allowed & BIT(requested))))
- return false;
-
- allowed = pt_cap_get(PT_CAP_cycle_thresholds);
- requested = (config & RTIT_CTL_CYC_THRESH) >>
- RTIT_CTL_CYC_THRESH_OFFSET;
- if (requested && (!(allowed & BIT(requested))))
- return false;
- }
-
- if (config & RTIT_CTL_MTC) {
- /*
- * In the unlikely case that CPUID lists valid mtc periods,
- * but not the mtc capability, drop out here.
- *
- * Spec says that setting mtc period bits while mtc bit in
- * CPUID is 0 will #GP, so better safe than sorry.
- */
- if (!pt_cap_get(PT_CAP_mtc))
- return false;
-
- allowed = pt_cap_get(PT_CAP_mtc_periods);
- if (!allowed)
- return false;
-
- requested = (config & RTIT_CTL_MTC_RANGE) >>
- RTIT_CTL_MTC_RANGE_OFFSET;
-
- if (!(allowed & BIT(requested)))
- return false;
- }
-
- return true;
-}
-
-/*
- * PT configuration helpers
- * These all are cpu affine and operate on a local PT
- */
-
-static void pt_config(struct perf_event *event)
-{
- u64 reg;
-
- if (!event->hw.itrace_started) {
- event->hw.itrace_started = 1;
- wrmsrl(MSR_IA32_RTIT_STATUS, 0);
- }
-
- reg = RTIT_CTL_TOPA | RTIT_CTL_BRANCH_EN | RTIT_CTL_TRACEEN;
-
- if (!event->attr.exclude_kernel)
- reg |= RTIT_CTL_OS;
- if (!event->attr.exclude_user)
- reg |= RTIT_CTL_USR;
-
- reg |= (event->attr.config & PT_CONFIG_MASK);
-
- wrmsrl(MSR_IA32_RTIT_CTL, reg);
-}
-
-static void pt_config_start(bool start)
-{
- u64 ctl;
-
- rdmsrl(MSR_IA32_RTIT_CTL, ctl);
- if (start)
- ctl |= RTIT_CTL_TRACEEN;
- else
- ctl &= ~RTIT_CTL_TRACEEN;
- wrmsrl(MSR_IA32_RTIT_CTL, ctl);
-
- /*
- * A wrmsr that disables trace generation serializes other PT
- * registers and causes all data packets to be written to memory,
- * but a fence is required for the data to become globally visible.
- *
- * The below WMB, separating data store and aux_head store matches
- * the consumer's RMB that separates aux_head load and data load.
- */
- if (!start)
- wmb();
-}
-
-static void pt_config_buffer(void *buf, unsigned int topa_idx,
- unsigned int output_off)
-{
- u64 reg;
-
- wrmsrl(MSR_IA32_RTIT_OUTPUT_BASE, virt_to_phys(buf));
-
- reg = 0x7f | ((u64)topa_idx << 7) | ((u64)output_off << 32);
-
- wrmsrl(MSR_IA32_RTIT_OUTPUT_MASK, reg);
-}
-
-/*
- * Keep ToPA table-related metadata on the same page as the actual table,
- * taking up a few words from the top
- */
-
-#define TENTS_PER_PAGE (((PAGE_SIZE - 40) / sizeof(struct topa_entry)) - 1)
-
-/**
- * struct topa - page-sized ToPA table with metadata at the top
- * @table: actual ToPA table entries, as understood by PT hardware
- * @list: linkage to struct pt_buffer's list of tables
- * @phys: physical address of this page
- * @offset: offset of the first entry in this table in the buffer
- * @size: total size of all entries in this table
- * @last: index of the last initialized entry in this table
- */
-struct topa {
- struct topa_entry table[TENTS_PER_PAGE];
- struct list_head list;
- u64 phys;
- u64 offset;
- size_t size;
- int last;
-};
-
-/* make -1 stand for the last table entry */
-#define TOPA_ENTRY(t, i) ((i) == -1 ? &(t)->table[(t)->last] : &(t)->table[(i)])
-
-/**
- * topa_alloc() - allocate page-sized ToPA table
- * @cpu: CPU on which to allocate.
- * @gfp: Allocation flags.
- *
- * Return: On success, return the pointer to ToPA table page.
- */
-static struct topa *topa_alloc(int cpu, gfp_t gfp)
-{
- int node = cpu_to_node(cpu);
- struct topa *topa;
- struct page *p;
-
- p = alloc_pages_node(node, gfp | __GFP_ZERO, 0);
- if (!p)
- return NULL;
-
- topa = page_address(p);
- topa->last = 0;
- topa->phys = page_to_phys(p);
-
- /*
- * In case of singe-entry ToPA, always put the self-referencing END
- * link as the 2nd entry in the table
- */
- if (!pt_cap_get(PT_CAP_topa_multiple_entries)) {
- TOPA_ENTRY(topa, 1)->base = topa->phys >> TOPA_SHIFT;
- TOPA_ENTRY(topa, 1)->end = 1;
- }
-
- return topa;
-}
-
-/**
- * topa_free() - free a page-sized ToPA table
- * @topa: Table to deallocate.
- */
-static void topa_free(struct topa *topa)
-{
- free_page((unsigned long)topa);
-}
-
-/**
- * topa_insert_table() - insert a ToPA table into a buffer
- * @buf: PT buffer that's being extended.
- * @topa: New topa table to be inserted.
- *
- * If it's the first table in this buffer, set up buffer's pointers
- * accordingly; otherwise, add a END=1 link entry to @topa to the current
- * "last" table and adjust the last table pointer to @topa.
- */
-static void topa_insert_table(struct pt_buffer *buf, struct topa *topa)
-{
- struct topa *last = buf->last;
-
- list_add_tail(&topa->list, &buf->tables);
-
- if (!buf->first) {
- buf->first = buf->last = buf->cur = topa;
- return;
- }
-
- topa->offset = last->offset + last->size;
- buf->last = topa;
-
- if (!pt_cap_get(PT_CAP_topa_multiple_entries))
- return;
-
- BUG_ON(last->last != TENTS_PER_PAGE - 1);
-
- TOPA_ENTRY(last, -1)->base = topa->phys >> TOPA_SHIFT;
- TOPA_ENTRY(last, -1)->end = 1;
-}
-
-/**
- * topa_table_full() - check if a ToPA table is filled up
- * @topa: ToPA table.
- */
-static bool topa_table_full(struct topa *topa)
-{
- /* single-entry ToPA is a special case */
- if (!pt_cap_get(PT_CAP_topa_multiple_entries))
- return !!topa->last;
-
- return topa->last == TENTS_PER_PAGE - 1;
-}
-
-/**
- * topa_insert_pages() - create a list of ToPA tables
- * @buf: PT buffer being initialized.
- * @gfp: Allocation flags.
- *
- * This initializes a list of ToPA tables with entries from
- * the data_pages provided by rb_alloc_aux().
- *
- * Return: 0 on success or error code.
- */
-static int topa_insert_pages(struct pt_buffer *buf, gfp_t gfp)
-{
- struct topa *topa = buf->last;
- int order = 0;
- struct page *p;
-
- p = virt_to_page(buf->data_pages[buf->nr_pages]);
- if (PagePrivate(p))
- order = page_private(p);
-
- if (topa_table_full(topa)) {
- topa = topa_alloc(buf->cpu, gfp);
- if (!topa)
- return -ENOMEM;
-
- topa_insert_table(buf, topa);
- }
-
- TOPA_ENTRY(topa, -1)->base = page_to_phys(p) >> TOPA_SHIFT;
- TOPA_ENTRY(topa, -1)->size = order;
- if (!buf->snapshot && !pt_cap_get(PT_CAP_topa_multiple_entries)) {
- TOPA_ENTRY(topa, -1)->intr = 1;
- TOPA_ENTRY(topa, -1)->stop = 1;
- }
-
- topa->last++;
- topa->size += sizes(order);
-
- buf->nr_pages += 1ul << order;
-
- return 0;
-}
-
-/**
- * pt_topa_dump() - print ToPA tables and their entries
- * @buf: PT buffer.
- */
-static void pt_topa_dump(struct pt_buffer *buf)
-{
- struct topa *topa;
-
- list_for_each_entry(topa, &buf->tables, list) {
- int i;
-
- pr_debug("# table @%p (%016Lx), off %llx size %zx\n", topa->table,
- topa->phys, topa->offset, topa->size);
- for (i = 0; i < TENTS_PER_PAGE; i++) {
- pr_debug("# entry @%p (%lx sz %u %c%c%c) raw=%16llx\n",
- &topa->table[i],
- (unsigned long)topa->table[i].base << TOPA_SHIFT,
- sizes(topa->table[i].size),
- topa->table[i].end ? 'E' : ' ',
- topa->table[i].intr ? 'I' : ' ',
- topa->table[i].stop ? 'S' : ' ',
- *(u64 *)&topa->table[i]);
- if ((pt_cap_get(PT_CAP_topa_multiple_entries) &&
- topa->table[i].stop) ||
- topa->table[i].end)
- break;
- }
- }
-}
-
-/**
- * pt_buffer_advance() - advance to the next output region
- * @buf: PT buffer.
- *
- * Advance the current pointers in the buffer to the next ToPA entry.
- */
-static void pt_buffer_advance(struct pt_buffer *buf)
-{
- buf->output_off = 0;
- buf->cur_idx++;
-
- if (buf->cur_idx == buf->cur->last) {
- if (buf->cur == buf->last)
- buf->cur = buf->first;
- else
- buf->cur = list_entry(buf->cur->list.next, struct topa,
- list);
- buf->cur_idx = 0;
- }
-}
-
-/**
- * pt_update_head() - calculate current offsets and sizes
- * @pt: Per-cpu pt context.
- *
- * Update buffer's current write pointer position and data size.
- */
-static void pt_update_head(struct pt *pt)
-{
- struct pt_buffer *buf = perf_get_aux(&pt->handle);
- u64 topa_idx, base, old;
-
- /* offset of the first region in this table from the beginning of buf */
- base = buf->cur->offset + buf->output_off;
-
- /* offset of the current output region within this table */
- for (topa_idx = 0; topa_idx < buf->cur_idx; topa_idx++)
- base += sizes(buf->cur->table[topa_idx].size);
-
- if (buf->snapshot) {
- local_set(&buf->data_size, base);
- } else {
- old = (local64_xchg(&buf->head, base) &
- ((buf->nr_pages << PAGE_SHIFT) - 1));
- if (base < old)
- base += buf->nr_pages << PAGE_SHIFT;
-
- local_add(base - old, &buf->data_size);
- }
-}
-
-/**
- * pt_buffer_region() - obtain current output region's address
- * @buf: PT buffer.
- */
-static void *pt_buffer_region(struct pt_buffer *buf)
-{
- return phys_to_virt(buf->cur->table[buf->cur_idx].base << TOPA_SHIFT);
-}
-
-/**
- * pt_buffer_region_size() - obtain current output region's size
- * @buf: PT buffer.
- */
-static size_t pt_buffer_region_size(struct pt_buffer *buf)
-{
- return sizes(buf->cur->table[buf->cur_idx].size);
-}
-
-/**
- * pt_handle_status() - take care of possible status conditions
- * @pt: Per-cpu pt context.
- */
-static void pt_handle_status(struct pt *pt)
-{
- struct pt_buffer *buf = perf_get_aux(&pt->handle);
- int advance = 0;
- u64 status;
-
- rdmsrl(MSR_IA32_RTIT_STATUS, status);
-
- if (status & RTIT_STATUS_ERROR) {
- pr_err_ratelimited("ToPA ERROR encountered, trying to recover\n");
- pt_topa_dump(buf);
- status &= ~RTIT_STATUS_ERROR;
- }
-
- if (status & RTIT_STATUS_STOPPED) {
- status &= ~RTIT_STATUS_STOPPED;
-
- /*
- * On systems that only do single-entry ToPA, hitting STOP
- * means we are already losing data; need to let the decoder
- * know.
- */
- if (!pt_cap_get(PT_CAP_topa_multiple_entries) ||
- buf->output_off == sizes(TOPA_ENTRY(buf->cur, buf->cur_idx)->size)) {
- local_inc(&buf->lost);
- advance++;
- }
- }
-
- /*
- * Also on single-entry ToPA implementations, interrupt will come
- * before the output reaches its output region's boundary.
- */
- if (!pt_cap_get(PT_CAP_topa_multiple_entries) && !buf->snapshot &&
- pt_buffer_region_size(buf) - buf->output_off <= TOPA_PMI_MARGIN) {
- void *head = pt_buffer_region(buf);
-
- /* everything within this margin needs to be zeroed out */
- memset(head + buf->output_off, 0,
- pt_buffer_region_size(buf) -
- buf->output_off);
- advance++;
- }
-
- if (advance)
- pt_buffer_advance(buf);
-
- wrmsrl(MSR_IA32_RTIT_STATUS, status);
-}
-
-/**
- * pt_read_offset() - translate registers into buffer pointers
- * @buf: PT buffer.
- *
- * Set buffer's output pointers from MSR values.
- */
-static void pt_read_offset(struct pt_buffer *buf)
-{
- u64 offset, base_topa;
-
- rdmsrl(MSR_IA32_RTIT_OUTPUT_BASE, base_topa);
- buf->cur = phys_to_virt(base_topa);
-
- rdmsrl(MSR_IA32_RTIT_OUTPUT_MASK, offset);
- /* offset within current output region */
- buf->output_off = offset >> 32;
- /* index of current output region within this table */
- buf->cur_idx = (offset & 0xffffff80) >> 7;
-}
-
-/**
- * pt_topa_next_entry() - obtain index of the first page in the next ToPA entry
- * @buf: PT buffer.
- * @pg: Page offset in the buffer.
- *
- * When advancing to the next output region (ToPA entry), given a page offset
- * into the buffer, we need to find the offset of the first page in the next
- * region.
- */
-static unsigned int pt_topa_next_entry(struct pt_buffer *buf, unsigned int pg)
-{
- struct topa_entry *te = buf->topa_index[pg];
-
- /* one region */
- if (buf->first == buf->last && buf->first->last == 1)
- return pg;
-
- do {
- pg++;
- pg &= buf->nr_pages - 1;
- } while (buf->topa_index[pg] == te);
-
- return pg;
-}
-
-/**
- * pt_buffer_reset_markers() - place interrupt and stop bits in the buffer
- * @buf: PT buffer.
- * @handle: Current output handle.
- *
- * Place INT and STOP marks to prevent overwriting old data that the consumer
- * hasn't yet collected and waking up the consumer after a certain fraction of
- * the buffer has filled up. Only needed and sensible for non-snapshot counters.
- *
- * This obviously relies on buf::head to figure out buffer markers, so it has
- * to be called after pt_buffer_reset_offsets() and before the hardware tracing
- * is enabled.
- */
-static int pt_buffer_reset_markers(struct pt_buffer *buf,
- struct perf_output_handle *handle)
-
-{
- unsigned long head = local64_read(&buf->head);
- unsigned long idx, npages, wakeup;
-
- /* can't stop in the middle of an output region */
- if (buf->output_off + handle->size + 1 <
- sizes(TOPA_ENTRY(buf->cur, buf->cur_idx)->size))
- return -EINVAL;
-
-
- /* single entry ToPA is handled by marking all regions STOP=1 INT=1 */
- if (!pt_cap_get(PT_CAP_topa_multiple_entries))
- return 0;
-
- /* clear STOP and INT from current entry */
- buf->topa_index[buf->stop_pos]->stop = 0;
- buf->topa_index[buf->intr_pos]->intr = 0;
-
- /* how many pages till the STOP marker */
- npages = handle->size >> PAGE_SHIFT;
-
- /* if it's on a page boundary, fill up one more page */
- if (!offset_in_page(head + handle->size + 1))
- npages++;
-
- idx = (head >> PAGE_SHIFT) + npages;
- idx &= buf->nr_pages - 1;
- buf->stop_pos = idx;
-
- wakeup = handle->wakeup >> PAGE_SHIFT;
-
- /* in the worst case, wake up the consumer one page before hard stop */
- idx = (head >> PAGE_SHIFT) + npages - 1;
- if (idx > wakeup)
- idx = wakeup;
-
- idx &= buf->nr_pages - 1;
- buf->intr_pos = idx;
-
- buf->topa_index[buf->stop_pos]->stop = 1;
- buf->topa_index[buf->intr_pos]->intr = 1;
-
- return 0;
-}
-
-/**
- * pt_buffer_setup_topa_index() - build topa_index[] table of regions
- * @buf: PT buffer.
- *
- * topa_index[] references output regions indexed by offset into the
- * buffer for purposes of quick reverse lookup.
- */
-static void pt_buffer_setup_topa_index(struct pt_buffer *buf)
-{
- struct topa *cur = buf->first, *prev = buf->last;
- struct topa_entry *te_cur = TOPA_ENTRY(cur, 0),
- *te_prev = TOPA_ENTRY(prev, prev->last - 1);
- int pg = 0, idx = 0;
-
- while (pg < buf->nr_pages) {
- int tidx;
-
- /* pages within one topa entry */
- for (tidx = 0; tidx < 1 << te_cur->size; tidx++, pg++)
- buf->topa_index[pg] = te_prev;
-
- te_prev = te_cur;
-
- if (idx == cur->last - 1) {
- /* advance to next topa table */
- idx = 0;
- cur = list_entry(cur->list.next, struct topa, list);
- } else {
- idx++;
- }
- te_cur = TOPA_ENTRY(cur, idx);
- }
-
-}
-
-/**
- * pt_buffer_reset_offsets() - adjust buffer's write pointers from aux_head
- * @buf: PT buffer.
- * @head: Write pointer (aux_head) from AUX buffer.
- *
- * Find the ToPA table and entry corresponding to given @head and set buffer's
- * "current" pointers accordingly. This is done after we have obtained the
- * current aux_head position from a successful call to perf_aux_output_begin()
- * to make sure the hardware is writing to the right place.
- *
- * This function modifies buf::{cur,cur_idx,output_off} that will be programmed
- * into PT msrs when the tracing is enabled and buf::head and buf::data_size,
- * which are used to determine INT and STOP markers' locations by a subsequent
- * call to pt_buffer_reset_markers().
- */
-static void pt_buffer_reset_offsets(struct pt_buffer *buf, unsigned long head)
-{
- int pg;
-
- if (buf->snapshot)
- head &= (buf->nr_pages << PAGE_SHIFT) - 1;
-
- pg = (head >> PAGE_SHIFT) & (buf->nr_pages - 1);
- pg = pt_topa_next_entry(buf, pg);
-
- buf->cur = (struct topa *)((unsigned long)buf->topa_index[pg] & PAGE_MASK);
- buf->cur_idx = ((unsigned long)buf->topa_index[pg] -
- (unsigned long)buf->cur) / sizeof(struct topa_entry);
- buf->output_off = head & (sizes(buf->cur->table[buf->cur_idx].size) - 1);
-
- local64_set(&buf->head, head);
- local_set(&buf->data_size, 0);
-}
-
-/**
- * pt_buffer_fini_topa() - deallocate ToPA structure of a buffer
- * @buf: PT buffer.
- */
-static void pt_buffer_fini_topa(struct pt_buffer *buf)
-{
- struct topa *topa, *iter;
-
- list_for_each_entry_safe(topa, iter, &buf->tables, list) {
- /*
- * right now, this is in free_aux() path only, so
- * no need to unlink this table from the list
- */
- topa_free(topa);
- }
-}
-
-/**
- * pt_buffer_init_topa() - initialize ToPA table for pt buffer
- * @buf: PT buffer.
- * @size: Total size of all regions within this ToPA.
- * @gfp: Allocation flags.
- */
-static int pt_buffer_init_topa(struct pt_buffer *buf, unsigned long nr_pages,
- gfp_t gfp)
-{
- struct topa *topa;
- int err;
-
- topa = topa_alloc(buf->cpu, gfp);
- if (!topa)
- return -ENOMEM;
-
- topa_insert_table(buf, topa);
-
- while (buf->nr_pages < nr_pages) {
- err = topa_insert_pages(buf, gfp);
- if (err) {
- pt_buffer_fini_topa(buf);
- return -ENOMEM;
- }
- }
-
- pt_buffer_setup_topa_index(buf);
-
- /* link last table to the first one, unless we're double buffering */
- if (pt_cap_get(PT_CAP_topa_multiple_entries)) {
- TOPA_ENTRY(buf->last, -1)->base = buf->first->phys >> TOPA_SHIFT;
- TOPA_ENTRY(buf->last, -1)->end = 1;
- }
-
- pt_topa_dump(buf);
- return 0;
-}
-
-/**
- * pt_buffer_setup_aux() - set up topa tables for a PT buffer
- * @cpu: Cpu on which to allocate, -1 means current.
- * @pages: Array of pointers to buffer pages passed from perf core.
- * @nr_pages: Number of pages in the buffer.
- * @snapshot: If this is a snapshot/overwrite counter.
- *
- * This is a pmu::setup_aux callback that sets up ToPA tables and all the
- * bookkeeping for an AUX buffer.
- *
- * Return: Our private PT buffer structure.
- */
-static void *
-pt_buffer_setup_aux(int cpu, void **pages, int nr_pages, bool snapshot)
-{
- struct pt_buffer *buf;
- int node, ret;
-
- if (!nr_pages)
- return NULL;
-
- if (cpu == -1)
- cpu = raw_smp_processor_id();
- node = cpu_to_node(cpu);
-
- buf = kzalloc_node(offsetof(struct pt_buffer, topa_index[nr_pages]),
- GFP_KERNEL, node);
- if (!buf)
- return NULL;
-
- buf->cpu = cpu;
- buf->snapshot = snapshot;
- buf->data_pages = pages;
-
- INIT_LIST_HEAD(&buf->tables);
-
- ret = pt_buffer_init_topa(buf, nr_pages, GFP_KERNEL);
- if (ret) {
- kfree(buf);
- return NULL;
- }
-
- return buf;
-}
-
-/**
- * pt_buffer_free_aux() - perf AUX deallocation path callback
- * @data: PT buffer.
- */
-static void pt_buffer_free_aux(void *data)
-{
- struct pt_buffer *buf = data;
-
- pt_buffer_fini_topa(buf);
- kfree(buf);
-}
-
-/**
- * pt_buffer_is_full() - check if the buffer is full
- * @buf: PT buffer.
- * @pt: Per-cpu pt handle.
- *
- * If the user hasn't read data from the output region that aux_head
- * points to, the buffer is considered full: the user needs to read at
- * least this region and update aux_tail to point past it.
- */
-static bool pt_buffer_is_full(struct pt_buffer *buf, struct pt *pt)
-{
- if (buf->snapshot)
- return false;
-
- if (local_read(&buf->data_size) >= pt->handle.size)
- return true;
-
- return false;
-}
-
-/**
- * intel_pt_interrupt() - PT PMI handler
- */
-void intel_pt_interrupt(void)
-{
- struct pt *pt = this_cpu_ptr(&pt_ctx);
- struct pt_buffer *buf;
- struct perf_event *event = pt->handle.event;
-
- /*
- * There may be a dangling PT bit in the interrupt status register
- * after PT has been disabled by pt_event_stop(). Make sure we don't
- * do anything (particularly, re-enable) for this event here.
- */
- if (!ACCESS_ONCE(pt->handle_nmi))
- return;
-
- pt_config_start(false);
-
- if (!event)
- return;
-
- buf = perf_get_aux(&pt->handle);
- if (!buf)
- return;
-
- pt_read_offset(buf);
-
- pt_handle_status(pt);
-
- pt_update_head(pt);
-
- perf_aux_output_end(&pt->handle, local_xchg(&buf->data_size, 0),
- local_xchg(&buf->lost, 0));
-
- if (!event->hw.state) {
- int ret;
-
- buf = perf_aux_output_begin(&pt->handle, event);
- if (!buf) {
- event->hw.state = PERF_HES_STOPPED;
- return;
- }
-
- pt_buffer_reset_offsets(buf, pt->handle.head);
- /* snapshot counters don't use PMI, so it's safe */
- ret = pt_buffer_reset_markers(buf, &pt->handle);
- if (ret) {
- perf_aux_output_end(&pt->handle, 0, true);
- return;
- }
-
- pt_config_buffer(buf->cur->table, buf->cur_idx,
- buf->output_off);
- pt_config(event);
- }
-}
-
-/*
- * PMU callbacks
- */
-
-static void pt_event_start(struct perf_event *event, int mode)
-{
- struct pt *pt = this_cpu_ptr(&pt_ctx);
- struct pt_buffer *buf = perf_get_aux(&pt->handle);
-
- if (!buf || pt_buffer_is_full(buf, pt)) {
- event->hw.state = PERF_HES_STOPPED;
- return;
- }
-
- ACCESS_ONCE(pt->handle_nmi) = 1;
- event->hw.state = 0;
-
- pt_config_buffer(buf->cur->table, buf->cur_idx,
- buf->output_off);
- pt_config(event);
-}
-
-static void pt_event_stop(struct perf_event *event, int mode)
-{
- struct pt *pt = this_cpu_ptr(&pt_ctx);
-
- /*
- * Protect against the PMI racing with disabling wrmsr,
- * see comment in intel_pt_interrupt().
- */
- ACCESS_ONCE(pt->handle_nmi) = 0;
- pt_config_start(false);
-
- if (event->hw.state == PERF_HES_STOPPED)
- return;
-
- event->hw.state = PERF_HES_STOPPED;
-
- if (mode & PERF_EF_UPDATE) {
- struct pt_buffer *buf = perf_get_aux(&pt->handle);
-
- if (!buf)
- return;
-
- if (WARN_ON_ONCE(pt->handle.event != event))
- return;
-
- pt_read_offset(buf);
-
- pt_handle_status(pt);
-
- pt_update_head(pt);
- }
-}
-
-static void pt_event_del(struct perf_event *event, int mode)
-{
- struct pt *pt = this_cpu_ptr(&pt_ctx);
- struct pt_buffer *buf;
-
- pt_event_stop(event, PERF_EF_UPDATE);
-
- buf = perf_get_aux(&pt->handle);
-
- if (buf) {
- if (buf->snapshot)
- pt->handle.head =
- local_xchg(&buf->data_size,
- buf->nr_pages << PAGE_SHIFT);
- perf_aux_output_end(&pt->handle, local_xchg(&buf->data_size, 0),
- local_xchg(&buf->lost, 0));
- }
-}
-
-static int pt_event_add(struct perf_event *event, int mode)
-{
- struct pt_buffer *buf;
- struct pt *pt = this_cpu_ptr(&pt_ctx);
- struct hw_perf_event *hwc = &event->hw;
- int ret = -EBUSY;
-
- if (pt->handle.event)
- goto fail;
-
- buf = perf_aux_output_begin(&pt->handle, event);
- ret = -EINVAL;
- if (!buf)
- goto fail_stop;
-
- pt_buffer_reset_offsets(buf, pt->handle.head);
- if (!buf->snapshot) {
- ret = pt_buffer_reset_markers(buf, &pt->handle);
- if (ret)
- goto fail_end_stop;
- }
-
- if (mode & PERF_EF_START) {
- pt_event_start(event, 0);
- ret = -EBUSY;
- if (hwc->state == PERF_HES_STOPPED)
- goto fail_end_stop;
- } else {
- hwc->state = PERF_HES_STOPPED;
- }
-
- return 0;
-
-fail_end_stop:
- perf_aux_output_end(&pt->handle, 0, true);
-fail_stop:
- hwc->state = PERF_HES_STOPPED;
-fail:
- return ret;
-}
-
-static void pt_event_read(struct perf_event *event)
-{
-}
-
-static void pt_event_destroy(struct perf_event *event)
-{
- x86_del_exclusive(x86_lbr_exclusive_pt);
-}
-
-static int pt_event_init(struct perf_event *event)
-{
- if (event->attr.type != pt_pmu.pmu.type)
- return -ENOENT;
-
- if (!pt_event_valid(event))
- return -EINVAL;
-
- if (x86_add_exclusive(x86_lbr_exclusive_pt))
- return -EBUSY;
-
- event->destroy = pt_event_destroy;
-
- return 0;
-}
-
-void cpu_emergency_stop_pt(void)
-{
- struct pt *pt = this_cpu_ptr(&pt_ctx);
-
- if (pt->handle.event)
- pt_event_stop(pt->handle.event, PERF_EF_UPDATE);
-}
-
-static __init int pt_init(void)
-{
- int ret, cpu, prior_warn = 0;
-
- BUILD_BUG_ON(sizeof(struct topa) > PAGE_SIZE);
-
- if (!test_cpu_cap(&boot_cpu_data, X86_FEATURE_INTEL_PT))
- return -ENODEV;
-
- get_online_cpus();
- for_each_online_cpu(cpu) {
- u64 ctl;
-
- ret = rdmsrl_safe_on_cpu(cpu, MSR_IA32_RTIT_CTL, &ctl);
- if (!ret && (ctl & RTIT_CTL_TRACEEN))
- prior_warn++;
- }
- put_online_cpus();
-
- if (prior_warn) {
- x86_add_exclusive(x86_lbr_exclusive_pt);
- pr_warn("PT is enabled at boot time, doing nothing\n");
-
- return -EBUSY;
- }
-
- ret = pt_pmu_hw_init();
- if (ret)
- return ret;
-
- if (!pt_cap_get(PT_CAP_topa_output)) {
- pr_warn("ToPA output is not supported on this CPU\n");
- return -ENODEV;
- }
-
- if (!pt_cap_get(PT_CAP_topa_multiple_entries))
- pt_pmu.pmu.capabilities =
- PERF_PMU_CAP_AUX_NO_SG | PERF_PMU_CAP_AUX_SW_DOUBLEBUF;
-
- pt_pmu.pmu.capabilities |= PERF_PMU_CAP_EXCLUSIVE | PERF_PMU_CAP_ITRACE;
- pt_pmu.pmu.attr_groups = pt_attr_groups;
- pt_pmu.pmu.task_ctx_nr = perf_sw_context;
- pt_pmu.pmu.event_init = pt_event_init;
- pt_pmu.pmu.add = pt_event_add;
- pt_pmu.pmu.del = pt_event_del;
- pt_pmu.pmu.start = pt_event_start;
- pt_pmu.pmu.stop = pt_event_stop;
- pt_pmu.pmu.read = pt_event_read;
- pt_pmu.pmu.setup_aux = pt_buffer_setup_aux;
- pt_pmu.pmu.free_aux = pt_buffer_free_aux;
- ret = perf_pmu_register(&pt_pmu.pmu, "intel_pt", -1);
-
- return ret;
-}
-arch_initcall(pt_init);
diff --git a/arch/x86/kernel/cpu/perf_event_intel_rapl.c b/arch/x86/kernel/cpu/perf_event_intel_rapl.c
deleted file mode 100644
index 24a351ad6..000000000
--- a/arch/x86/kernel/cpu/perf_event_intel_rapl.c
+++ /dev/null
@@ -1,783 +0,0 @@
-/*
- * perf_event_intel_rapl.c: support Intel RAPL energy consumption counters
- * Copyright (C) 2013 Google, Inc., Stephane Eranian
- *
- * Intel RAPL interface is specified in the IA-32 Manual Vol3b
- * section 14.7.1 (September 2013)
- *
- * RAPL provides more controls than just reporting energy consumption
- * however here we only expose the 3 energy consumption free running
- * counters (pp0, pkg, dram).
- *
- * Each of those counters increments in a power unit defined by the
- * RAPL_POWER_UNIT MSR. On SandyBridge, this unit is 1/(2^16) Joules
- * but it can vary.
- *
- * Counter to rapl events mappings:
- *
- * pp0 counter: consumption of all physical cores (power plane 0)
- * event: rapl_energy_cores
- * perf code: 0x1
- *
- * pkg counter: consumption of the whole processor package
- * event: rapl_energy_pkg
- * perf code: 0x2
- *
- * dram counter: consumption of the dram domain (servers only)
- * event: rapl_energy_dram
- * perf code: 0x3
- *
- * dram counter: consumption of the builtin-gpu domain (client only)
- * event: rapl_energy_gpu
- * perf code: 0x4
- *
- * We manage those counters as free running (read-only). They may be
- * use simultaneously by other tools, such as turbostat.
- *
- * The events only support system-wide mode counting. There is no
- * sampling support because it does not make sense and is not
- * supported by the RAPL hardware.
- *
- * Because we want to avoid floating-point operations in the kernel,
- * the events are all reported in fixed point arithmetic (32.32).
- * Tools must adjust the counts to convert them to Watts using
- * the duration of the measurement. Tools may use a function such as
- * ldexp(raw_count, -32);
- */
-#include <linux/module.h>
-#include <linux/slab.h>
-#include <linux/perf_event.h>
-#include <asm/cpu_device_id.h>
-#include "perf_event.h"
-
-/*
- * RAPL energy status counters
- */
-#define RAPL_IDX_PP0_NRG_STAT 0 /* all cores */
-#define INTEL_RAPL_PP0 0x1 /* pseudo-encoding */
-#define RAPL_IDX_PKG_NRG_STAT 1 /* entire package */
-#define INTEL_RAPL_PKG 0x2 /* pseudo-encoding */
-#define RAPL_IDX_RAM_NRG_STAT 2 /* DRAM */
-#define INTEL_RAPL_RAM 0x3 /* pseudo-encoding */
-#define RAPL_IDX_PP1_NRG_STAT 3 /* gpu */
-#define INTEL_RAPL_PP1 0x4 /* pseudo-encoding */
-
-#define NR_RAPL_DOMAINS 0x4
-static const char *const rapl_domain_names[NR_RAPL_DOMAINS] __initconst = {
- "pp0-core",
- "package",
- "dram",
- "pp1-gpu",
-};
-
-/* Clients have PP0, PKG */
-#define RAPL_IDX_CLN (1<<RAPL_IDX_PP0_NRG_STAT|\
- 1<<RAPL_IDX_PKG_NRG_STAT|\
- 1<<RAPL_IDX_PP1_NRG_STAT)
-
-/* Servers have PP0, PKG, RAM */
-#define RAPL_IDX_SRV (1<<RAPL_IDX_PP0_NRG_STAT|\
- 1<<RAPL_IDX_PKG_NRG_STAT|\
- 1<<RAPL_IDX_RAM_NRG_STAT)
-
-/* Servers have PP0, PKG, RAM, PP1 */
-#define RAPL_IDX_HSW (1<<RAPL_IDX_PP0_NRG_STAT|\
- 1<<RAPL_IDX_PKG_NRG_STAT|\
- 1<<RAPL_IDX_RAM_NRG_STAT|\
- 1<<RAPL_IDX_PP1_NRG_STAT)
-
-/* Knights Landing has PKG, RAM */
-#define RAPL_IDX_KNL (1<<RAPL_IDX_PKG_NRG_STAT|\
- 1<<RAPL_IDX_RAM_NRG_STAT)
-
-/*
- * event code: LSB 8 bits, passed in attr->config
- * any other bit is reserved
- */
-#define RAPL_EVENT_MASK 0xFFULL
-
-#define DEFINE_RAPL_FORMAT_ATTR(_var, _name, _format) \
-static ssize_t __rapl_##_var##_show(struct kobject *kobj, \
- struct kobj_attribute *attr, \
- char *page) \
-{ \
- BUILD_BUG_ON(sizeof(_format) >= PAGE_SIZE); \
- return sprintf(page, _format "\n"); \
-} \
-static struct kobj_attribute format_attr_##_var = \
- __ATTR(_name, 0444, __rapl_##_var##_show, NULL)
-
-#define RAPL_CNTR_WIDTH 32 /* 32-bit rapl counters */
-
-#define RAPL_EVENT_ATTR_STR(_name, v, str) \
-static struct perf_pmu_events_attr event_attr_##v = { \
- .attr = __ATTR(_name, 0444, perf_event_sysfs_show, NULL), \
- .id = 0, \
- .event_str = str, \
-};
-
-struct rapl_pmu {
- spinlock_t lock;
- int n_active; /* number of active events */
- struct list_head active_list;
- struct pmu *pmu; /* pointer to rapl_pmu_class */
- ktime_t timer_interval; /* in ktime_t unit */
- struct hrtimer hrtimer;
-};
-
-static int rapl_hw_unit[NR_RAPL_DOMAINS] __read_mostly; /* 1/2^hw_unit Joule */
-static struct pmu rapl_pmu_class;
-static cpumask_t rapl_cpu_mask;
-static int rapl_cntr_mask;
-
-static DEFINE_PER_CPU(struct rapl_pmu *, rapl_pmu);
-static DEFINE_PER_CPU(struct rapl_pmu *, rapl_pmu_to_free);
-
-static struct x86_pmu_quirk *rapl_quirks;
-static inline u64 rapl_read_counter(struct perf_event *event)
-{
- u64 raw;
- rdmsrl(event->hw.event_base, raw);
- return raw;
-}
-
-#define rapl_add_quirk(func_) \
-do { \
- static struct x86_pmu_quirk __quirk __initdata = { \
- .func = func_, \
- }; \
- __quirk.next = rapl_quirks; \
- rapl_quirks = &__quirk; \
-} while (0)
-
-static inline u64 rapl_scale(u64 v, int cfg)
-{
- if (cfg > NR_RAPL_DOMAINS) {
- pr_warn("invalid domain %d, failed to scale data\n", cfg);
- return v;
- }
- /*
- * scale delta to smallest unit (1/2^32)
- * users must then scale back: count * 1/(1e9*2^32) to get Joules
- * or use ldexp(count, -32).
- * Watts = Joules/Time delta
- */
- return v << (32 - rapl_hw_unit[cfg - 1]);
-}
-
-static u64 rapl_event_update(struct perf_event *event)
-{
- struct hw_perf_event *hwc = &event->hw;
- u64 prev_raw_count, new_raw_count;
- s64 delta, sdelta;
- int shift = RAPL_CNTR_WIDTH;
-
-again:
- prev_raw_count = local64_read(&hwc->prev_count);
- rdmsrl(event->hw.event_base, new_raw_count);
-
- if (local64_cmpxchg(&hwc->prev_count, prev_raw_count,
- new_raw_count) != prev_raw_count) {
- cpu_relax();
- goto again;
- }
-
- /*
- * Now we have the new raw value and have updated the prev
- * timestamp already. We can now calculate the elapsed delta
- * (event-)time and add that to the generic event.
- *
- * Careful, not all hw sign-extends above the physical width
- * of the count.
- */
- delta = (new_raw_count << shift) - (prev_raw_count << shift);
- delta >>= shift;
-
- sdelta = rapl_scale(delta, event->hw.config);
-
- local64_add(sdelta, &event->count);
-
- return new_raw_count;
-}
-
-static void rapl_start_hrtimer(struct rapl_pmu *pmu)
-{
- hrtimer_start(&pmu->hrtimer, pmu->timer_interval,
- HRTIMER_MODE_REL_PINNED);
-}
-
-static void rapl_stop_hrtimer(struct rapl_pmu *pmu)
-{
- hrtimer_cancel(&pmu->hrtimer);
-}
-
-static enum hrtimer_restart rapl_hrtimer_handle(struct hrtimer *hrtimer)
-{
- struct rapl_pmu *pmu = __this_cpu_read(rapl_pmu);
- struct perf_event *event;
- unsigned long flags;
-
- if (!pmu->n_active)
- return HRTIMER_NORESTART;
-
- spin_lock_irqsave(&pmu->lock, flags);
-
- list_for_each_entry(event, &pmu->active_list, active_entry) {
- rapl_event_update(event);
- }
-
- spin_unlock_irqrestore(&pmu->lock, flags);
-
- hrtimer_forward_now(hrtimer, pmu->timer_interval);
-
- return HRTIMER_RESTART;
-}
-
-static void rapl_hrtimer_init(struct rapl_pmu *pmu)
-{
- struct hrtimer *hr = &pmu->hrtimer;
-
- hrtimer_init(hr, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
- hr->function = rapl_hrtimer_handle;
-}
-
-static void __rapl_pmu_event_start(struct rapl_pmu *pmu,
- struct perf_event *event)
-{
- if (WARN_ON_ONCE(!(event->hw.state & PERF_HES_STOPPED)))
- return;
-
- event->hw.state = 0;
-
- list_add_tail(&event->active_entry, &pmu->active_list);
-
- local64_set(&event->hw.prev_count, rapl_read_counter(event));
-
- pmu->n_active++;
- if (pmu->n_active == 1)
- rapl_start_hrtimer(pmu);
-}
-
-static void rapl_pmu_event_start(struct perf_event *event, int mode)
-{
- struct rapl_pmu *pmu = __this_cpu_read(rapl_pmu);
- unsigned long flags;
-
- spin_lock_irqsave(&pmu->lock, flags);
- __rapl_pmu_event_start(pmu, event);
- spin_unlock_irqrestore(&pmu->lock, flags);
-}
-
-static void rapl_pmu_event_stop(struct perf_event *event, int mode)
-{
- struct rapl_pmu *pmu = __this_cpu_read(rapl_pmu);
- struct hw_perf_event *hwc = &event->hw;
- unsigned long flags;
-
- spin_lock_irqsave(&pmu->lock, flags);
-
- /* mark event as deactivated and stopped */
- if (!(hwc->state & PERF_HES_STOPPED)) {
- WARN_ON_ONCE(pmu->n_active <= 0);
- pmu->n_active--;
- if (pmu->n_active == 0)
- rapl_stop_hrtimer(pmu);
-
- list_del(&event->active_entry);
-
- WARN_ON_ONCE(hwc->state & PERF_HES_STOPPED);
- hwc->state |= PERF_HES_STOPPED;
- }
-
- /* check if update of sw counter is necessary */
- if ((mode & PERF_EF_UPDATE) && !(hwc->state & PERF_HES_UPTODATE)) {
- /*
- * Drain the remaining delta count out of a event
- * that we are disabling:
- */
- rapl_event_update(event);
- hwc->state |= PERF_HES_UPTODATE;
- }
-
- spin_unlock_irqrestore(&pmu->lock, flags);
-}
-
-static int rapl_pmu_event_add(struct perf_event *event, int mode)
-{
- struct rapl_pmu *pmu = __this_cpu_read(rapl_pmu);
- struct hw_perf_event *hwc = &event->hw;
- unsigned long flags;
-
- spin_lock_irqsave(&pmu->lock, flags);
-
- hwc->state = PERF_HES_UPTODATE | PERF_HES_STOPPED;
-
- if (mode & PERF_EF_START)
- __rapl_pmu_event_start(pmu, event);
-
- spin_unlock_irqrestore(&pmu->lock, flags);
-
- return 0;
-}
-
-static void rapl_pmu_event_del(struct perf_event *event, int flags)
-{
- rapl_pmu_event_stop(event, PERF_EF_UPDATE);
-}
-
-static int rapl_pmu_event_init(struct perf_event *event)
-{
- u64 cfg = event->attr.config & RAPL_EVENT_MASK;
- int bit, msr, ret = 0;
-
- /* only look at RAPL events */
- if (event->attr.type != rapl_pmu_class.type)
- return -ENOENT;
-
- /* check only supported bits are set */
- if (event->attr.config & ~RAPL_EVENT_MASK)
- return -EINVAL;
-
- /*
- * check event is known (determines counter)
- */
- switch (cfg) {
- case INTEL_RAPL_PP0:
- bit = RAPL_IDX_PP0_NRG_STAT;
- msr = MSR_PP0_ENERGY_STATUS;
- break;
- case INTEL_RAPL_PKG:
- bit = RAPL_IDX_PKG_NRG_STAT;
- msr = MSR_PKG_ENERGY_STATUS;
- break;
- case INTEL_RAPL_RAM:
- bit = RAPL_IDX_RAM_NRG_STAT;
- msr = MSR_DRAM_ENERGY_STATUS;
- break;
- case INTEL_RAPL_PP1:
- bit = RAPL_IDX_PP1_NRG_STAT;
- msr = MSR_PP1_ENERGY_STATUS;
- break;
- default:
- return -EINVAL;
- }
- /* check event supported */
- if (!(rapl_cntr_mask & (1 << bit)))
- return -EINVAL;
-
- /* unsupported modes and filters */
- if (event->attr.exclude_user ||
- event->attr.exclude_kernel ||
- event->attr.exclude_hv ||
- event->attr.exclude_idle ||
- event->attr.exclude_host ||
- event->attr.exclude_guest ||
- event->attr.sample_period) /* no sampling */
- return -EINVAL;
-
- /* must be done before validate_group */
- event->hw.event_base = msr;
- event->hw.config = cfg;
- event->hw.idx = bit;
-
- return ret;
-}
-
-static void rapl_pmu_event_read(struct perf_event *event)
-{
- rapl_event_update(event);
-}
-
-static ssize_t rapl_get_attr_cpumask(struct device *dev,
- struct device_attribute *attr, char *buf)
-{
- return cpumap_print_to_pagebuf(true, buf, &rapl_cpu_mask);
-}
-
-static DEVICE_ATTR(cpumask, S_IRUGO, rapl_get_attr_cpumask, NULL);
-
-static struct attribute *rapl_pmu_attrs[] = {
- &dev_attr_cpumask.attr,
- NULL,
-};
-
-static struct attribute_group rapl_pmu_attr_group = {
- .attrs = rapl_pmu_attrs,
-};
-
-RAPL_EVENT_ATTR_STR(energy-cores, rapl_cores, "event=0x01");
-RAPL_EVENT_ATTR_STR(energy-pkg , rapl_pkg, "event=0x02");
-RAPL_EVENT_ATTR_STR(energy-ram , rapl_ram, "event=0x03");
-RAPL_EVENT_ATTR_STR(energy-gpu , rapl_gpu, "event=0x04");
-
-RAPL_EVENT_ATTR_STR(energy-cores.unit, rapl_cores_unit, "Joules");
-RAPL_EVENT_ATTR_STR(energy-pkg.unit , rapl_pkg_unit, "Joules");
-RAPL_EVENT_ATTR_STR(energy-ram.unit , rapl_ram_unit, "Joules");
-RAPL_EVENT_ATTR_STR(energy-gpu.unit , rapl_gpu_unit, "Joules");
-
-/*
- * we compute in 0.23 nJ increments regardless of MSR
- */
-RAPL_EVENT_ATTR_STR(energy-cores.scale, rapl_cores_scale, "2.3283064365386962890625e-10");
-RAPL_EVENT_ATTR_STR(energy-pkg.scale, rapl_pkg_scale, "2.3283064365386962890625e-10");
-RAPL_EVENT_ATTR_STR(energy-ram.scale, rapl_ram_scale, "2.3283064365386962890625e-10");
-RAPL_EVENT_ATTR_STR(energy-gpu.scale, rapl_gpu_scale, "2.3283064365386962890625e-10");
-
-static struct attribute *rapl_events_srv_attr[] = {
- EVENT_PTR(rapl_cores),
- EVENT_PTR(rapl_pkg),
- EVENT_PTR(rapl_ram),
-
- EVENT_PTR(rapl_cores_unit),
- EVENT_PTR(rapl_pkg_unit),
- EVENT_PTR(rapl_ram_unit),
-
- EVENT_PTR(rapl_cores_scale),
- EVENT_PTR(rapl_pkg_scale),
- EVENT_PTR(rapl_ram_scale),
- NULL,
-};
-
-static struct attribute *rapl_events_cln_attr[] = {
- EVENT_PTR(rapl_cores),
- EVENT_PTR(rapl_pkg),
- EVENT_PTR(rapl_gpu),
-
- EVENT_PTR(rapl_cores_unit),
- EVENT_PTR(rapl_pkg_unit),
- EVENT_PTR(rapl_gpu_unit),
-
- EVENT_PTR(rapl_cores_scale),
- EVENT_PTR(rapl_pkg_scale),
- EVENT_PTR(rapl_gpu_scale),
- NULL,
-};
-
-static struct attribute *rapl_events_hsw_attr[] = {
- EVENT_PTR(rapl_cores),
- EVENT_PTR(rapl_pkg),
- EVENT_PTR(rapl_gpu),
- EVENT_PTR(rapl_ram),
-
- EVENT_PTR(rapl_cores_unit),
- EVENT_PTR(rapl_pkg_unit),
- EVENT_PTR(rapl_gpu_unit),
- EVENT_PTR(rapl_ram_unit),
-
- EVENT_PTR(rapl_cores_scale),
- EVENT_PTR(rapl_pkg_scale),
- EVENT_PTR(rapl_gpu_scale),
- EVENT_PTR(rapl_ram_scale),
- NULL,
-};
-
-static struct attribute *rapl_events_knl_attr[] = {
- EVENT_PTR(rapl_pkg),
- EVENT_PTR(rapl_ram),
-
- EVENT_PTR(rapl_pkg_unit),
- EVENT_PTR(rapl_ram_unit),
-
- EVENT_PTR(rapl_pkg_scale),
- EVENT_PTR(rapl_ram_scale),
- NULL,
-};
-
-static struct attribute_group rapl_pmu_events_group = {
- .name = "events",
- .attrs = NULL, /* patched at runtime */
-};
-
-DEFINE_RAPL_FORMAT_ATTR(event, event, "config:0-7");
-static struct attribute *rapl_formats_attr[] = {
- &format_attr_event.attr,
- NULL,
-};
-
-static struct attribute_group rapl_pmu_format_group = {
- .name = "format",
- .attrs = rapl_formats_attr,
-};
-
-const struct attribute_group *rapl_attr_groups[] = {
- &rapl_pmu_attr_group,
- &rapl_pmu_format_group,
- &rapl_pmu_events_group,
- NULL,
-};
-
-static struct pmu rapl_pmu_class = {
- .attr_groups = rapl_attr_groups,
- .task_ctx_nr = perf_invalid_context, /* system-wide only */
- .event_init = rapl_pmu_event_init,
- .add = rapl_pmu_event_add, /* must have */
- .del = rapl_pmu_event_del, /* must have */
- .start = rapl_pmu_event_start,
- .stop = rapl_pmu_event_stop,
- .read = rapl_pmu_event_read,
-};
-
-static void rapl_cpu_exit(int cpu)
-{
- struct rapl_pmu *pmu = per_cpu(rapl_pmu, cpu);
- int i, phys_id = topology_physical_package_id(cpu);
- int target = -1;
-
- /* find a new cpu on same package */
- for_each_online_cpu(i) {
- if (i == cpu)
- continue;
- if (phys_id == topology_physical_package_id(i)) {
- target = i;
- break;
- }
- }
- /*
- * clear cpu from cpumask
- * if was set in cpumask and still some cpu on package,
- * then move to new cpu
- */
- if (cpumask_test_and_clear_cpu(cpu, &rapl_cpu_mask) && target >= 0)
- cpumask_set_cpu(target, &rapl_cpu_mask);
-
- WARN_ON(cpumask_empty(&rapl_cpu_mask));
- /*
- * migrate events and context to new cpu
- */
- if (target >= 0)
- perf_pmu_migrate_context(pmu->pmu, cpu, target);
-
- /* cancel overflow polling timer for CPU */
- rapl_stop_hrtimer(pmu);
-}
-
-static void rapl_cpu_init(int cpu)
-{
- int i, phys_id = topology_physical_package_id(cpu);
-
- /* check if phys_is is already covered */
- for_each_cpu(i, &rapl_cpu_mask) {
- if (phys_id == topology_physical_package_id(i))
- return;
- }
- /* was not found, so add it */
- cpumask_set_cpu(cpu, &rapl_cpu_mask);
-}
-
-static __init void rapl_hsw_server_quirk(void)
-{
- /*
- * DRAM domain on HSW server has fixed energy unit which can be
- * different than the unit from power unit MSR.
- * "Intel Xeon Processor E5-1600 and E5-2600 v3 Product Families, V2
- * of 2. Datasheet, September 2014, Reference Number: 330784-001 "
- */
- rapl_hw_unit[RAPL_IDX_RAM_NRG_STAT] = 16;
-}
-
-static int rapl_cpu_prepare(int cpu)
-{
- struct rapl_pmu *pmu = per_cpu(rapl_pmu, cpu);
- int phys_id = topology_physical_package_id(cpu);
- u64 ms;
-
- if (pmu)
- return 0;
-
- if (phys_id < 0)
- return -1;
-
- pmu = kzalloc_node(sizeof(*pmu), GFP_KERNEL, cpu_to_node(cpu));
- if (!pmu)
- return -1;
- spin_lock_init(&pmu->lock);
-
- INIT_LIST_HEAD(&pmu->active_list);
-
- pmu->pmu = &rapl_pmu_class;
-
- /*
- * use reference of 200W for scaling the timeout
- * to avoid missing counter overflows.
- * 200W = 200 Joules/sec
- * divide interval by 2 to avoid lockstep (2 * 100)
- * if hw unit is 32, then we use 2 ms 1/200/2
- */
- if (rapl_hw_unit[0] < 32)
- ms = (1000 / (2 * 100)) * (1ULL << (32 - rapl_hw_unit[0] - 1));
- else
- ms = 2;
-
- pmu->timer_interval = ms_to_ktime(ms);
-
- rapl_hrtimer_init(pmu);
-
- /* set RAPL pmu for this cpu for now */
- per_cpu(rapl_pmu, cpu) = pmu;
- per_cpu(rapl_pmu_to_free, cpu) = NULL;
-
- return 0;
-}
-
-static void rapl_cpu_kfree(int cpu)
-{
- struct rapl_pmu *pmu = per_cpu(rapl_pmu_to_free, cpu);
-
- kfree(pmu);
-
- per_cpu(rapl_pmu_to_free, cpu) = NULL;
-}
-
-static int rapl_cpu_dying(int cpu)
-{
- struct rapl_pmu *pmu = per_cpu(rapl_pmu, cpu);
-
- if (!pmu)
- return 0;
-
- per_cpu(rapl_pmu, cpu) = NULL;
-
- per_cpu(rapl_pmu_to_free, cpu) = pmu;
-
- return 0;
-}
-
-static int rapl_cpu_notifier(struct notifier_block *self,
- unsigned long action, void *hcpu)
-{
- unsigned int cpu = (long)hcpu;
-
- switch (action & ~CPU_TASKS_FROZEN) {
- case CPU_UP_PREPARE:
- rapl_cpu_prepare(cpu);
- break;
- case CPU_STARTING:
- rapl_cpu_init(cpu);
- break;
- case CPU_UP_CANCELED:
- case CPU_DYING:
- rapl_cpu_dying(cpu);
- break;
- case CPU_ONLINE:
- case CPU_DEAD:
- rapl_cpu_kfree(cpu);
- break;
- case CPU_DOWN_PREPARE:
- rapl_cpu_exit(cpu);
- break;
- default:
- break;
- }
-
- return NOTIFY_OK;
-}
-
-static int rapl_check_hw_unit(void)
-{
- u64 msr_rapl_power_unit_bits;
- int i;
-
- /* protect rdmsrl() to handle virtualization */
- if (rdmsrl_safe(MSR_RAPL_POWER_UNIT, &msr_rapl_power_unit_bits))
- return -1;
- for (i = 0; i < NR_RAPL_DOMAINS; i++)
- rapl_hw_unit[i] = (msr_rapl_power_unit_bits >> 8) & 0x1FULL;
-
- return 0;
-}
-
-static const struct x86_cpu_id rapl_cpu_match[] = {
- [0] = { .vendor = X86_VENDOR_INTEL, .family = 6 },
- [1] = {},
-};
-
-static int __init rapl_pmu_init(void)
-{
- struct rapl_pmu *pmu;
- int cpu, ret;
- struct x86_pmu_quirk *quirk;
- int i;
-
- /*
- * check for Intel processor family 6
- */
- if (!x86_match_cpu(rapl_cpu_match))
- return 0;
-
- /* check supported CPU */
- switch (boot_cpu_data.x86_model) {
- case 42: /* Sandy Bridge */
- case 58: /* Ivy Bridge */
- rapl_cntr_mask = RAPL_IDX_CLN;
- rapl_pmu_events_group.attrs = rapl_events_cln_attr;
- break;
- case 63: /* Haswell-Server */
- rapl_add_quirk(rapl_hsw_server_quirk);
- rapl_cntr_mask = RAPL_IDX_SRV;
- rapl_pmu_events_group.attrs = rapl_events_srv_attr;
- break;
- case 60: /* Haswell */
- case 69: /* Haswell-Celeron */
- case 61: /* Broadwell */
- rapl_cntr_mask = RAPL_IDX_HSW;
- rapl_pmu_events_group.attrs = rapl_events_hsw_attr;
- break;
- case 45: /* Sandy Bridge-EP */
- case 62: /* IvyTown */
- rapl_cntr_mask = RAPL_IDX_SRV;
- rapl_pmu_events_group.attrs = rapl_events_srv_attr;
- break;
- case 87: /* Knights Landing */
- rapl_add_quirk(rapl_hsw_server_quirk);
- rapl_cntr_mask = RAPL_IDX_KNL;
- rapl_pmu_events_group.attrs = rapl_events_knl_attr;
-
- default:
- /* unsupported */
- return 0;
- }
- ret = rapl_check_hw_unit();
- if (ret)
- return ret;
-
- /* run cpu model quirks */
- for (quirk = rapl_quirks; quirk; quirk = quirk->next)
- quirk->func();
- cpu_notifier_register_begin();
-
- for_each_online_cpu(cpu) {
- ret = rapl_cpu_prepare(cpu);
- if (ret)
- goto out;
- rapl_cpu_init(cpu);
- }
-
- __perf_cpu_notifier(rapl_cpu_notifier);
-
- ret = perf_pmu_register(&rapl_pmu_class, "power", -1);
- if (WARN_ON(ret)) {
- pr_info("RAPL PMU detected, registration failed (%d), RAPL PMU disabled\n", ret);
- cpu_notifier_register_done();
- return -1;
- }
-
- pmu = __this_cpu_read(rapl_pmu);
-
- pr_info("RAPL PMU detected,"
- " API unit is 2^-32 Joules,"
- " %d fixed counters"
- " %llu ms ovfl timer\n",
- hweight32(rapl_cntr_mask),
- ktime_to_ms(pmu->timer_interval));
- for (i = 0; i < NR_RAPL_DOMAINS; i++) {
- if (rapl_cntr_mask & (1 << i)) {
- pr_info("hw unit of domain %s 2^-%d Joules\n",
- rapl_domain_names[i], rapl_hw_unit[i]);
- }
- }
-out:
- cpu_notifier_register_done();
-
- return 0;
-}
-device_initcall(rapl_pmu_init);
diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore.c b/arch/x86/kernel/cpu/perf_event_intel_uncore.c
deleted file mode 100644
index 3bf41d413..000000000
--- a/arch/x86/kernel/cpu/perf_event_intel_uncore.c
+++ /dev/null
@@ -1,1401 +0,0 @@
-#include "perf_event_intel_uncore.h"
-
-static struct intel_uncore_type *empty_uncore[] = { NULL, };
-struct intel_uncore_type **uncore_msr_uncores = empty_uncore;
-struct intel_uncore_type **uncore_pci_uncores = empty_uncore;
-
-static bool pcidrv_registered;
-struct pci_driver *uncore_pci_driver;
-/* pci bus to socket mapping */
-DEFINE_RAW_SPINLOCK(pci2phy_map_lock);
-struct list_head pci2phy_map_head = LIST_HEAD_INIT(pci2phy_map_head);
-struct pci_dev *uncore_extra_pci_dev[UNCORE_SOCKET_MAX][UNCORE_EXTRA_PCI_DEV_MAX];
-
-static DEFINE_RAW_SPINLOCK(uncore_box_lock);
-/* mask of cpus that collect uncore events */
-static cpumask_t uncore_cpu_mask;
-
-/* constraint for the fixed counter */
-static struct event_constraint uncore_constraint_fixed =
- EVENT_CONSTRAINT(~0ULL, 1 << UNCORE_PMC_IDX_FIXED, ~0ULL);
-struct event_constraint uncore_constraint_empty =
- EVENT_CONSTRAINT(0, 0, 0);
-
-int uncore_pcibus_to_physid(struct pci_bus *bus)
-{
- struct pci2phy_map *map;
- int phys_id = -1;
-
- raw_spin_lock(&pci2phy_map_lock);
- list_for_each_entry(map, &pci2phy_map_head, list) {
- if (map->segment == pci_domain_nr(bus)) {
- phys_id = map->pbus_to_physid[bus->number];
- break;
- }
- }
- raw_spin_unlock(&pci2phy_map_lock);
-
- return phys_id;
-}
-
-struct pci2phy_map *__find_pci2phy_map(int segment)
-{
- struct pci2phy_map *map, *alloc = NULL;
- int i;
-
- lockdep_assert_held(&pci2phy_map_lock);
-
-lookup:
- list_for_each_entry(map, &pci2phy_map_head, list) {
- if (map->segment == segment)
- goto end;
- }
-
- if (!alloc) {
- raw_spin_unlock(&pci2phy_map_lock);
- alloc = kmalloc(sizeof(struct pci2phy_map), GFP_KERNEL);
- raw_spin_lock(&pci2phy_map_lock);
-
- if (!alloc)
- return NULL;
-
- goto lookup;
- }
-
- map = alloc;
- alloc = NULL;
- map->segment = segment;
- for (i = 0; i < 256; i++)
- map->pbus_to_physid[i] = -1;
- list_add_tail(&map->list, &pci2phy_map_head);
-
-end:
- kfree(alloc);
- return map;
-}
-
-ssize_t uncore_event_show(struct kobject *kobj,
- struct kobj_attribute *attr, char *buf)
-{
- struct uncore_event_desc *event =
- container_of(attr, struct uncore_event_desc, attr);
- return sprintf(buf, "%s", event->config);
-}
-
-struct intel_uncore_pmu *uncore_event_to_pmu(struct perf_event *event)
-{
- return container_of(event->pmu, struct intel_uncore_pmu, pmu);
-}
-
-struct intel_uncore_box *uncore_pmu_to_box(struct intel_uncore_pmu *pmu, int cpu)
-{
- struct intel_uncore_box *box;
-
- box = *per_cpu_ptr(pmu->box, cpu);
- if (box)
- return box;
-
- raw_spin_lock(&uncore_box_lock);
- /* Recheck in lock to handle races. */
- if (*per_cpu_ptr(pmu->box, cpu))
- goto out;
- list_for_each_entry(box, &pmu->box_list, list) {
- if (box->phys_id == topology_physical_package_id(cpu)) {
- atomic_inc(&box->refcnt);
- *per_cpu_ptr(pmu->box, cpu) = box;
- break;
- }
- }
-out:
- raw_spin_unlock(&uncore_box_lock);
-
- return *per_cpu_ptr(pmu->box, cpu);
-}
-
-struct intel_uncore_box *uncore_event_to_box(struct perf_event *event)
-{
- /*
- * perf core schedules event on the basis of cpu, uncore events are
- * collected by one of the cpus inside a physical package.
- */
- return uncore_pmu_to_box(uncore_event_to_pmu(event), smp_processor_id());
-}
-
-u64 uncore_msr_read_counter(struct intel_uncore_box *box, struct perf_event *event)
-{
- u64 count;
-
- rdmsrl(event->hw.event_base, count);
-
- return count;
-}
-
-/*
- * generic get constraint function for shared match/mask registers.
- */
-struct event_constraint *
-uncore_get_constraint(struct intel_uncore_box *box, struct perf_event *event)
-{
- struct intel_uncore_extra_reg *er;
- struct hw_perf_event_extra *reg1 = &event->hw.extra_reg;
- struct hw_perf_event_extra *reg2 = &event->hw.branch_reg;
- unsigned long flags;
- bool ok = false;
-
- /*
- * reg->alloc can be set due to existing state, so for fake box we
- * need to ignore this, otherwise we might fail to allocate proper
- * fake state for this extra reg constraint.
- */
- if (reg1->idx == EXTRA_REG_NONE ||
- (!uncore_box_is_fake(box) && reg1->alloc))
- return NULL;
-
- er = &box->shared_regs[reg1->idx];
- raw_spin_lock_irqsave(&er->lock, flags);
- if (!atomic_read(&er->ref) ||
- (er->config1 == reg1->config && er->config2 == reg2->config)) {
- atomic_inc(&er->ref);
- er->config1 = reg1->config;
- er->config2 = reg2->config;
- ok = true;
- }
- raw_spin_unlock_irqrestore(&er->lock, flags);
-
- if (ok) {
- if (!uncore_box_is_fake(box))
- reg1->alloc = 1;
- return NULL;
- }
-
- return &uncore_constraint_empty;
-}
-
-void uncore_put_constraint(struct intel_uncore_box *box, struct perf_event *event)
-{
- struct intel_uncore_extra_reg *er;
- struct hw_perf_event_extra *reg1 = &event->hw.extra_reg;
-
- /*
- * Only put constraint if extra reg was actually allocated. Also
- * takes care of event which do not use an extra shared reg.
- *
- * Also, if this is a fake box we shouldn't touch any event state
- * (reg->alloc) and we don't care about leaving inconsistent box
- * state either since it will be thrown out.
- */
- if (uncore_box_is_fake(box) || !reg1->alloc)
- return;
-
- er = &box->shared_regs[reg1->idx];
- atomic_dec(&er->ref);
- reg1->alloc = 0;
-}
-
-u64 uncore_shared_reg_config(struct intel_uncore_box *box, int idx)
-{
- struct intel_uncore_extra_reg *er;
- unsigned long flags;
- u64 config;
-
- er = &box->shared_regs[idx];
-
- raw_spin_lock_irqsave(&er->lock, flags);
- config = er->config;
- raw_spin_unlock_irqrestore(&er->lock, flags);
-
- return config;
-}
-
-static void uncore_assign_hw_event(struct intel_uncore_box *box, struct perf_event *event, int idx)
-{
- struct hw_perf_event *hwc = &event->hw;
-
- hwc->idx = idx;
- hwc->last_tag = ++box->tags[idx];
-
- if (hwc->idx == UNCORE_PMC_IDX_FIXED) {
- hwc->event_base = uncore_fixed_ctr(box);
- hwc->config_base = uncore_fixed_ctl(box);
- return;
- }
-
- hwc->config_base = uncore_event_ctl(box, hwc->idx);
- hwc->event_base = uncore_perf_ctr(box, hwc->idx);
-}
-
-void uncore_perf_event_update(struct intel_uncore_box *box, struct perf_event *event)
-{
- u64 prev_count, new_count, delta;
- int shift;
-
- if (event->hw.idx >= UNCORE_PMC_IDX_FIXED)
- shift = 64 - uncore_fixed_ctr_bits(box);
- else
- shift = 64 - uncore_perf_ctr_bits(box);
-
- /* the hrtimer might modify the previous event value */
-again:
- prev_count = local64_read(&event->hw.prev_count);
- new_count = uncore_read_counter(box, event);
- if (local64_xchg(&event->hw.prev_count, new_count) != prev_count)
- goto again;
-
- delta = (new_count << shift) - (prev_count << shift);
- delta >>= shift;
-
- local64_add(delta, &event->count);
-}
-
-/*
- * The overflow interrupt is unavailable for SandyBridge-EP, is broken
- * for SandyBridge. So we use hrtimer to periodically poll the counter
- * to avoid overflow.
- */
-static enum hrtimer_restart uncore_pmu_hrtimer(struct hrtimer *hrtimer)
-{
- struct intel_uncore_box *box;
- struct perf_event *event;
- unsigned long flags;
- int bit;
-
- box = container_of(hrtimer, struct intel_uncore_box, hrtimer);
- if (!box->n_active || box->cpu != smp_processor_id())
- return HRTIMER_NORESTART;
- /*
- * disable local interrupt to prevent uncore_pmu_event_start/stop
- * to interrupt the update process
- */
- local_irq_save(flags);
-
- /*
- * handle boxes with an active event list as opposed to active
- * counters
- */
- list_for_each_entry(event, &box->active_list, active_entry) {
- uncore_perf_event_update(box, event);
- }
-
- for_each_set_bit(bit, box->active_mask, UNCORE_PMC_IDX_MAX)
- uncore_perf_event_update(box, box->events[bit]);
-
- local_irq_restore(flags);
-
- hrtimer_forward_now(hrtimer, ns_to_ktime(box->hrtimer_duration));
- return HRTIMER_RESTART;
-}
-
-void uncore_pmu_start_hrtimer(struct intel_uncore_box *box)
-{
- hrtimer_start(&box->hrtimer, ns_to_ktime(box->hrtimer_duration),
- HRTIMER_MODE_REL_PINNED);
-}
-
-void uncore_pmu_cancel_hrtimer(struct intel_uncore_box *box)
-{
- hrtimer_cancel(&box->hrtimer);
-}
-
-static void uncore_pmu_init_hrtimer(struct intel_uncore_box *box)
-{
- hrtimer_init(&box->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
- box->hrtimer.function = uncore_pmu_hrtimer;
-}
-
-static struct intel_uncore_box *uncore_alloc_box(struct intel_uncore_type *type, int node)
-{
- struct intel_uncore_box *box;
- int i, size;
-
- size = sizeof(*box) + type->num_shared_regs * sizeof(struct intel_uncore_extra_reg);
-
- box = kzalloc_node(size, GFP_KERNEL, node);
- if (!box)
- return NULL;
-
- for (i = 0; i < type->num_shared_regs; i++)
- raw_spin_lock_init(&box->shared_regs[i].lock);
-
- uncore_pmu_init_hrtimer(box);
- atomic_set(&box->refcnt, 1);
- box->cpu = -1;
- box->phys_id = -1;
-
- /* set default hrtimer timeout */
- box->hrtimer_duration = UNCORE_PMU_HRTIMER_INTERVAL;
-
- INIT_LIST_HEAD(&box->active_list);
-
- return box;
-}
-
-/*
- * Using uncore_pmu_event_init pmu event_init callback
- * as a detection point for uncore events.
- */
-static int uncore_pmu_event_init(struct perf_event *event);
-
-static bool is_uncore_event(struct perf_event *event)
-{
- return event->pmu->event_init == uncore_pmu_event_init;
-}
-
-static int
-uncore_collect_events(struct intel_uncore_box *box, struct perf_event *leader, bool dogrp)
-{
- struct perf_event *event;
- int n, max_count;
-
- max_count = box->pmu->type->num_counters;
- if (box->pmu->type->fixed_ctl)
- max_count++;
-
- if (box->n_events >= max_count)
- return -EINVAL;
-
- n = box->n_events;
-
- if (is_uncore_event(leader)) {
- box->event_list[n] = leader;
- n++;
- }
-
- if (!dogrp)
- return n;
-
- list_for_each_entry(event, &leader->sibling_list, group_entry) {
- if (!is_uncore_event(event) ||
- event->state <= PERF_EVENT_STATE_OFF)
- continue;
-
- if (n >= max_count)
- return -EINVAL;
-
- box->event_list[n] = event;
- n++;
- }
- return n;
-}
-
-static struct event_constraint *
-uncore_get_event_constraint(struct intel_uncore_box *box, struct perf_event *event)
-{
- struct intel_uncore_type *type = box->pmu->type;
- struct event_constraint *c;
-
- if (type->ops->get_constraint) {
- c = type->ops->get_constraint(box, event);
- if (c)
- return c;
- }
-
- if (event->attr.config == UNCORE_FIXED_EVENT)
- return &uncore_constraint_fixed;
-
- if (type->constraints) {
- for_each_event_constraint(c, type->constraints) {
- if ((event->hw.config & c->cmask) == c->code)
- return c;
- }
- }
-
- return &type->unconstrainted;
-}
-
-static void uncore_put_event_constraint(struct intel_uncore_box *box, struct perf_event *event)
-{
- if (box->pmu->type->ops->put_constraint)
- box->pmu->type->ops->put_constraint(box, event);
-}
-
-static int uncore_assign_events(struct intel_uncore_box *box, int assign[], int n)
-{
- unsigned long used_mask[BITS_TO_LONGS(UNCORE_PMC_IDX_MAX)];
- struct event_constraint *c;
- int i, wmin, wmax, ret = 0;
- struct hw_perf_event *hwc;
-
- bitmap_zero(used_mask, UNCORE_PMC_IDX_MAX);
-
- for (i = 0, wmin = UNCORE_PMC_IDX_MAX, wmax = 0; i < n; i++) {
- c = uncore_get_event_constraint(box, box->event_list[i]);
- box->event_constraint[i] = c;
- wmin = min(wmin, c->weight);
- wmax = max(wmax, c->weight);
- }
-
- /* fastpath, try to reuse previous register */
- for (i = 0; i < n; i++) {
- hwc = &box->event_list[i]->hw;
- c = box->event_constraint[i];
-
- /* never assigned */
- if (hwc->idx == -1)
- break;
-
- /* constraint still honored */
- if (!test_bit(hwc->idx, c->idxmsk))
- break;
-
- /* not already used */
- if (test_bit(hwc->idx, used_mask))
- break;
-
- __set_bit(hwc->idx, used_mask);
- if (assign)
- assign[i] = hwc->idx;
- }
- /* slow path */
- if (i != n)
- ret = perf_assign_events(box->event_constraint, n,
- wmin, wmax, n, assign);
-
- if (!assign || ret) {
- for (i = 0; i < n; i++)
- uncore_put_event_constraint(box, box->event_list[i]);
- }
- return ret ? -EINVAL : 0;
-}
-
-static void uncore_pmu_event_start(struct perf_event *event, int flags)
-{
- struct intel_uncore_box *box = uncore_event_to_box(event);
- int idx = event->hw.idx;
-
- if (WARN_ON_ONCE(!(event->hw.state & PERF_HES_STOPPED)))
- return;
-
- if (WARN_ON_ONCE(idx == -1 || idx >= UNCORE_PMC_IDX_MAX))
- return;
-
- event->hw.state = 0;
- box->events[idx] = event;
- box->n_active++;
- __set_bit(idx, box->active_mask);
-
- local64_set(&event->hw.prev_count, uncore_read_counter(box, event));
- uncore_enable_event(box, event);
-
- if (box->n_active == 1) {
- uncore_enable_box(box);
- uncore_pmu_start_hrtimer(box);
- }
-}
-
-static void uncore_pmu_event_stop(struct perf_event *event, int flags)
-{
- struct intel_uncore_box *box = uncore_event_to_box(event);
- struct hw_perf_event *hwc = &event->hw;
-
- if (__test_and_clear_bit(hwc->idx, box->active_mask)) {
- uncore_disable_event(box, event);
- box->n_active--;
- box->events[hwc->idx] = NULL;
- WARN_ON_ONCE(hwc->state & PERF_HES_STOPPED);
- hwc->state |= PERF_HES_STOPPED;
-
- if (box->n_active == 0) {
- uncore_disable_box(box);
- uncore_pmu_cancel_hrtimer(box);
- }
- }
-
- if ((flags & PERF_EF_UPDATE) && !(hwc->state & PERF_HES_UPTODATE)) {
- /*
- * Drain the remaining delta count out of a event
- * that we are disabling:
- */
- uncore_perf_event_update(box, event);
- hwc->state |= PERF_HES_UPTODATE;
- }
-}
-
-static int uncore_pmu_event_add(struct perf_event *event, int flags)
-{
- struct intel_uncore_box *box = uncore_event_to_box(event);
- struct hw_perf_event *hwc = &event->hw;
- int assign[UNCORE_PMC_IDX_MAX];
- int i, n, ret;
-
- if (!box)
- return -ENODEV;
-
- ret = n = uncore_collect_events(box, event, false);
- if (ret < 0)
- return ret;
-
- hwc->state = PERF_HES_UPTODATE | PERF_HES_STOPPED;
- if (!(flags & PERF_EF_START))
- hwc->state |= PERF_HES_ARCH;
-
- ret = uncore_assign_events(box, assign, n);
- if (ret)
- return ret;
-
- /* save events moving to new counters */
- for (i = 0; i < box->n_events; i++) {
- event = box->event_list[i];
- hwc = &event->hw;
-
- if (hwc->idx == assign[i] &&
- hwc->last_tag == box->tags[assign[i]])
- continue;
- /*
- * Ensure we don't accidentally enable a stopped
- * counter simply because we rescheduled.
- */
- if (hwc->state & PERF_HES_STOPPED)
- hwc->state |= PERF_HES_ARCH;
-
- uncore_pmu_event_stop(event, PERF_EF_UPDATE);
- }
-
- /* reprogram moved events into new counters */
- for (i = 0; i < n; i++) {
- event = box->event_list[i];
- hwc = &event->hw;
-
- if (hwc->idx != assign[i] ||
- hwc->last_tag != box->tags[assign[i]])
- uncore_assign_hw_event(box, event, assign[i]);
- else if (i < box->n_events)
- continue;
-
- if (hwc->state & PERF_HES_ARCH)
- continue;
-
- uncore_pmu_event_start(event, 0);
- }
- box->n_events = n;
-
- return 0;
-}
-
-static void uncore_pmu_event_del(struct perf_event *event, int flags)
-{
- struct intel_uncore_box *box = uncore_event_to_box(event);
- int i;
-
- uncore_pmu_event_stop(event, PERF_EF_UPDATE);
-
- for (i = 0; i < box->n_events; i++) {
- if (event == box->event_list[i]) {
- uncore_put_event_constraint(box, event);
-
- while (++i < box->n_events)
- box->event_list[i - 1] = box->event_list[i];
-
- --box->n_events;
- break;
- }
- }
-
- event->hw.idx = -1;
- event->hw.last_tag = ~0ULL;
-}
-
-void uncore_pmu_event_read(struct perf_event *event)
-{
- struct intel_uncore_box *box = uncore_event_to_box(event);
- uncore_perf_event_update(box, event);
-}
-
-/*
- * validation ensures the group can be loaded onto the
- * PMU if it was the only group available.
- */
-static int uncore_validate_group(struct intel_uncore_pmu *pmu,
- struct perf_event *event)
-{
- struct perf_event *leader = event->group_leader;
- struct intel_uncore_box *fake_box;
- int ret = -EINVAL, n;
-
- fake_box = uncore_alloc_box(pmu->type, NUMA_NO_NODE);
- if (!fake_box)
- return -ENOMEM;
-
- fake_box->pmu = pmu;
- /*
- * the event is not yet connected with its
- * siblings therefore we must first collect
- * existing siblings, then add the new event
- * before we can simulate the scheduling
- */
- n = uncore_collect_events(fake_box, leader, true);
- if (n < 0)
- goto out;
-
- fake_box->n_events = n;
- n = uncore_collect_events(fake_box, event, false);
- if (n < 0)
- goto out;
-
- fake_box->n_events = n;
-
- ret = uncore_assign_events(fake_box, NULL, n);
-out:
- kfree(fake_box);
- return ret;
-}
-
-static int uncore_pmu_event_init(struct perf_event *event)
-{
- struct intel_uncore_pmu *pmu;
- struct intel_uncore_box *box;
- struct hw_perf_event *hwc = &event->hw;
- int ret;
-
- if (event->attr.type != event->pmu->type)
- return -ENOENT;
-
- pmu = uncore_event_to_pmu(event);
- /* no device found for this pmu */
- if (pmu->func_id < 0)
- return -ENOENT;
-
- /*
- * Uncore PMU does measure at all privilege level all the time.
- * So it doesn't make sense to specify any exclude bits.
- */
- if (event->attr.exclude_user || event->attr.exclude_kernel ||
- event->attr.exclude_hv || event->attr.exclude_idle)
- return -EINVAL;
-
- /* Sampling not supported yet */
- if (hwc->sample_period)
- return -EINVAL;
-
- /*
- * Place all uncore events for a particular physical package
- * onto a single cpu
- */
- if (event->cpu < 0)
- return -EINVAL;
- box = uncore_pmu_to_box(pmu, event->cpu);
- if (!box || box->cpu < 0)
- return -EINVAL;
- event->cpu = box->cpu;
-
- event->hw.idx = -1;
- event->hw.last_tag = ~0ULL;
- event->hw.extra_reg.idx = EXTRA_REG_NONE;
- event->hw.branch_reg.idx = EXTRA_REG_NONE;
-
- if (event->attr.config == UNCORE_FIXED_EVENT) {
- /* no fixed counter */
- if (!pmu->type->fixed_ctl)
- return -EINVAL;
- /*
- * if there is only one fixed counter, only the first pmu
- * can access the fixed counter
- */
- if (pmu->type->single_fixed && pmu->pmu_idx > 0)
- return -EINVAL;
-
- /* fixed counters have event field hardcoded to zero */
- hwc->config = 0ULL;
- } else {
- hwc->config = event->attr.config & pmu->type->event_mask;
- if (pmu->type->ops->hw_config) {
- ret = pmu->type->ops->hw_config(box, event);
- if (ret)
- return ret;
- }
- }
-
- if (event->group_leader != event)
- ret = uncore_validate_group(pmu, event);
- else
- ret = 0;
-
- return ret;
-}
-
-static ssize_t uncore_get_attr_cpumask(struct device *dev,
- struct device_attribute *attr, char *buf)
-{
- return cpumap_print_to_pagebuf(true, buf, &uncore_cpu_mask);
-}
-
-static DEVICE_ATTR(cpumask, S_IRUGO, uncore_get_attr_cpumask, NULL);
-
-static struct attribute *uncore_pmu_attrs[] = {
- &dev_attr_cpumask.attr,
- NULL,
-};
-
-static struct attribute_group uncore_pmu_attr_group = {
- .attrs = uncore_pmu_attrs,
-};
-
-static int uncore_pmu_register(struct intel_uncore_pmu *pmu)
-{
- int ret;
-
- if (!pmu->type->pmu) {
- pmu->pmu = (struct pmu) {
- .attr_groups = pmu->type->attr_groups,
- .task_ctx_nr = perf_invalid_context,
- .event_init = uncore_pmu_event_init,
- .add = uncore_pmu_event_add,
- .del = uncore_pmu_event_del,
- .start = uncore_pmu_event_start,
- .stop = uncore_pmu_event_stop,
- .read = uncore_pmu_event_read,
- };
- } else {
- pmu->pmu = *pmu->type->pmu;
- pmu->pmu.attr_groups = pmu->type->attr_groups;
- }
-
- if (pmu->type->num_boxes == 1) {
- if (strlen(pmu->type->name) > 0)
- sprintf(pmu->name, "uncore_%s", pmu->type->name);
- else
- sprintf(pmu->name, "uncore");
- } else {
- sprintf(pmu->name, "uncore_%s_%d", pmu->type->name,
- pmu->pmu_idx);
- }
-
- ret = perf_pmu_register(&pmu->pmu, pmu->name, -1);
- return ret;
-}
-
-static void __init uncore_type_exit(struct intel_uncore_type *type)
-{
- int i;
-
- for (i = 0; i < type->num_boxes; i++)
- free_percpu(type->pmus[i].box);
- kfree(type->pmus);
- type->pmus = NULL;
- kfree(type->events_group);
- type->events_group = NULL;
-}
-
-static void __init uncore_types_exit(struct intel_uncore_type **types)
-{
- int i;
- for (i = 0; types[i]; i++)
- uncore_type_exit(types[i]);
-}
-
-static int __init uncore_type_init(struct intel_uncore_type *type)
-{
- struct intel_uncore_pmu *pmus;
- struct attribute_group *attr_group;
- struct attribute **attrs;
- int i, j;
-
- pmus = kzalloc(sizeof(*pmus) * type->num_boxes, GFP_KERNEL);
- if (!pmus)
- return -ENOMEM;
-
- type->pmus = pmus;
-
- type->unconstrainted = (struct event_constraint)
- __EVENT_CONSTRAINT(0, (1ULL << type->num_counters) - 1,
- 0, type->num_counters, 0, 0);
-
- for (i = 0; i < type->num_boxes; i++) {
- pmus[i].func_id = -1;
- pmus[i].pmu_idx = i;
- pmus[i].type = type;
- INIT_LIST_HEAD(&pmus[i].box_list);
- pmus[i].box = alloc_percpu(struct intel_uncore_box *);
- if (!pmus[i].box)
- goto fail;
- }
-
- if (type->event_descs) {
- i = 0;
- while (type->event_descs[i].attr.attr.name)
- i++;
-
- attr_group = kzalloc(sizeof(struct attribute *) * (i + 1) +
- sizeof(*attr_group), GFP_KERNEL);
- if (!attr_group)
- goto fail;
-
- attrs = (struct attribute **)(attr_group + 1);
- attr_group->name = "events";
- attr_group->attrs = attrs;
-
- for (j = 0; j < i; j++)
- attrs[j] = &type->event_descs[j].attr.attr;
-
- type->events_group = attr_group;
- }
-
- type->pmu_group = &uncore_pmu_attr_group;
- return 0;
-fail:
- uncore_type_exit(type);
- return -ENOMEM;
-}
-
-static int __init uncore_types_init(struct intel_uncore_type **types)
-{
- int i, ret;
-
- for (i = 0; types[i]; i++) {
- ret = uncore_type_init(types[i]);
- if (ret)
- goto fail;
- }
- return 0;
-fail:
- while (--i >= 0)
- uncore_type_exit(types[i]);
- return ret;
-}
-
-/*
- * add a pci uncore device
- */
-static int uncore_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
-{
- struct intel_uncore_pmu *pmu;
- struct intel_uncore_box *box;
- struct intel_uncore_type *type;
- int phys_id;
- bool first_box = false;
-
- phys_id = uncore_pcibus_to_physid(pdev->bus);
- if (phys_id < 0)
- return -ENODEV;
-
- if (UNCORE_PCI_DEV_TYPE(id->driver_data) == UNCORE_EXTRA_PCI_DEV) {
- int idx = UNCORE_PCI_DEV_IDX(id->driver_data);
- uncore_extra_pci_dev[phys_id][idx] = pdev;
- pci_set_drvdata(pdev, NULL);
- return 0;
- }
-
- type = uncore_pci_uncores[UNCORE_PCI_DEV_TYPE(id->driver_data)];
- box = uncore_alloc_box(type, NUMA_NO_NODE);
- if (!box)
- return -ENOMEM;
-
- /*
- * for performance monitoring unit with multiple boxes,
- * each box has a different function id.
- */
- pmu = &type->pmus[UNCORE_PCI_DEV_IDX(id->driver_data)];
- /* Knights Landing uses a common PCI device ID for multiple instances of
- * an uncore PMU device type. There is only one entry per device type in
- * the knl_uncore_pci_ids table inspite of multiple devices present for
- * some device types. Hence PCI device idx would be 0 for all devices.
- * So increment pmu pointer to point to an unused array element.
- */
- if (boot_cpu_data.x86_model == 87)
- while (pmu->func_id >= 0)
- pmu++;
- if (pmu->func_id < 0)
- pmu->func_id = pdev->devfn;
- else
- WARN_ON_ONCE(pmu->func_id != pdev->devfn);
-
- box->phys_id = phys_id;
- box->pci_dev = pdev;
- box->pmu = pmu;
- uncore_box_init(box);
- pci_set_drvdata(pdev, box);
-
- raw_spin_lock(&uncore_box_lock);
- if (list_empty(&pmu->box_list))
- first_box = true;
- list_add_tail(&box->list, &pmu->box_list);
- raw_spin_unlock(&uncore_box_lock);
-
- if (first_box)
- uncore_pmu_register(pmu);
- return 0;
-}
-
-static void uncore_pci_remove(struct pci_dev *pdev)
-{
- struct intel_uncore_box *box = pci_get_drvdata(pdev);
- struct intel_uncore_pmu *pmu;
- int i, cpu, phys_id;
- bool last_box = false;
-
- phys_id = uncore_pcibus_to_physid(pdev->bus);
- box = pci_get_drvdata(pdev);
- if (!box) {
- for (i = 0; i < UNCORE_EXTRA_PCI_DEV_MAX; i++) {
- if (uncore_extra_pci_dev[phys_id][i] == pdev) {
- uncore_extra_pci_dev[phys_id][i] = NULL;
- break;
- }
- }
- WARN_ON_ONCE(i >= UNCORE_EXTRA_PCI_DEV_MAX);
- return;
- }
-
- pmu = box->pmu;
- if (WARN_ON_ONCE(phys_id != box->phys_id))
- return;
-
- pci_set_drvdata(pdev, NULL);
-
- raw_spin_lock(&uncore_box_lock);
- list_del(&box->list);
- if (list_empty(&pmu->box_list))
- last_box = true;
- raw_spin_unlock(&uncore_box_lock);
-
- for_each_possible_cpu(cpu) {
- if (*per_cpu_ptr(pmu->box, cpu) == box) {
- *per_cpu_ptr(pmu->box, cpu) = NULL;
- atomic_dec(&box->refcnt);
- }
- }
-
- WARN_ON_ONCE(atomic_read(&box->refcnt) != 1);
- kfree(box);
-
- if (last_box)
- perf_pmu_unregister(&pmu->pmu);
-}
-
-static int __init uncore_pci_init(void)
-{
- int ret;
-
- switch (boot_cpu_data.x86_model) {
- case 45: /* Sandy Bridge-EP */
- ret = snbep_uncore_pci_init();
- break;
- case 62: /* Ivy Bridge-EP */
- ret = ivbep_uncore_pci_init();
- break;
- case 63: /* Haswell-EP */
- ret = hswep_uncore_pci_init();
- break;
- case 79: /* BDX-EP */
- case 86: /* BDX-DE */
- ret = bdx_uncore_pci_init();
- break;
- case 42: /* Sandy Bridge */
- ret = snb_uncore_pci_init();
- break;
- case 58: /* Ivy Bridge */
- ret = ivb_uncore_pci_init();
- break;
- case 60: /* Haswell */
- case 69: /* Haswell Celeron */
- ret = hsw_uncore_pci_init();
- break;
- case 61: /* Broadwell */
- ret = bdw_uncore_pci_init();
- break;
- case 87: /* Knights Landing */
- ret = knl_uncore_pci_init();
- break;
- case 94: /* SkyLake */
- ret = skl_uncore_pci_init();
- break;
- default:
- return 0;
- }
-
- if (ret)
- return ret;
-
- ret = uncore_types_init(uncore_pci_uncores);
- if (ret)
- return ret;
-
- uncore_pci_driver->probe = uncore_pci_probe;
- uncore_pci_driver->remove = uncore_pci_remove;
-
- ret = pci_register_driver(uncore_pci_driver);
- if (ret == 0)
- pcidrv_registered = true;
- else
- uncore_types_exit(uncore_pci_uncores);
-
- return ret;
-}
-
-static void __init uncore_pci_exit(void)
-{
- if (pcidrv_registered) {
- pcidrv_registered = false;
- pci_unregister_driver(uncore_pci_driver);
- uncore_types_exit(uncore_pci_uncores);
- }
-}
-
-/* CPU hot plug/unplug are serialized by cpu_add_remove_lock mutex */
-static LIST_HEAD(boxes_to_free);
-
-static void uncore_kfree_boxes(void)
-{
- struct intel_uncore_box *box;
-
- while (!list_empty(&boxes_to_free)) {
- box = list_entry(boxes_to_free.next,
- struct intel_uncore_box, list);
- list_del(&box->list);
- kfree(box);
- }
-}
-
-static void uncore_cpu_dying(int cpu)
-{
- struct intel_uncore_type *type;
- struct intel_uncore_pmu *pmu;
- struct intel_uncore_box *box;
- int i, j;
-
- for (i = 0; uncore_msr_uncores[i]; i++) {
- type = uncore_msr_uncores[i];
- for (j = 0; j < type->num_boxes; j++) {
- pmu = &type->pmus[j];
- box = *per_cpu_ptr(pmu->box, cpu);
- *per_cpu_ptr(pmu->box, cpu) = NULL;
- if (box && atomic_dec_and_test(&box->refcnt))
- list_add(&box->list, &boxes_to_free);
- }
- }
-}
-
-static int uncore_cpu_starting(int cpu)
-{
- struct intel_uncore_type *type;
- struct intel_uncore_pmu *pmu;
- struct intel_uncore_box *box, *exist;
- int i, j, k, phys_id;
-
- phys_id = topology_physical_package_id(cpu);
-
- for (i = 0; uncore_msr_uncores[i]; i++) {
- type = uncore_msr_uncores[i];
- for (j = 0; j < type->num_boxes; j++) {
- pmu = &type->pmus[j];
- box = *per_cpu_ptr(pmu->box, cpu);
- /* called by uncore_cpu_init? */
- if (box && box->phys_id >= 0) {
- uncore_box_init(box);
- continue;
- }
-
- for_each_online_cpu(k) {
- exist = *per_cpu_ptr(pmu->box, k);
- if (exist && exist->phys_id == phys_id) {
- atomic_inc(&exist->refcnt);
- *per_cpu_ptr(pmu->box, cpu) = exist;
- if (box) {
- list_add(&box->list,
- &boxes_to_free);
- box = NULL;
- }
- break;
- }
- }
-
- if (box) {
- box->phys_id = phys_id;
- uncore_box_init(box);
- }
- }
- }
- return 0;
-}
-
-static int uncore_cpu_prepare(int cpu, int phys_id)
-{
- struct intel_uncore_type *type;
- struct intel_uncore_pmu *pmu;
- struct intel_uncore_box *box;
- int i, j;
-
- for (i = 0; uncore_msr_uncores[i]; i++) {
- type = uncore_msr_uncores[i];
- for (j = 0; j < type->num_boxes; j++) {
- pmu = &type->pmus[j];
- if (pmu->func_id < 0)
- pmu->func_id = j;
-
- box = uncore_alloc_box(type, cpu_to_node(cpu));
- if (!box)
- return -ENOMEM;
-
- box->pmu = pmu;
- box->phys_id = phys_id;
- *per_cpu_ptr(pmu->box, cpu) = box;
- }
- }
- return 0;
-}
-
-static void
-uncore_change_context(struct intel_uncore_type **uncores, int old_cpu, int new_cpu)
-{
- struct intel_uncore_type *type;
- struct intel_uncore_pmu *pmu;
- struct intel_uncore_box *box;
- int i, j;
-
- for (i = 0; uncores[i]; i++) {
- type = uncores[i];
- for (j = 0; j < type->num_boxes; j++) {
- pmu = &type->pmus[j];
- if (old_cpu < 0)
- box = uncore_pmu_to_box(pmu, new_cpu);
- else
- box = uncore_pmu_to_box(pmu, old_cpu);
- if (!box)
- continue;
-
- if (old_cpu < 0) {
- WARN_ON_ONCE(box->cpu != -1);
- box->cpu = new_cpu;
- continue;
- }
-
- WARN_ON_ONCE(box->cpu != old_cpu);
- if (new_cpu >= 0) {
- uncore_pmu_cancel_hrtimer(box);
- perf_pmu_migrate_context(&pmu->pmu,
- old_cpu, new_cpu);
- box->cpu = new_cpu;
- } else {
- box->cpu = -1;
- }
- }
- }
-}
-
-static void uncore_event_exit_cpu(int cpu)
-{
- int i, phys_id, target;
-
- /* if exiting cpu is used for collecting uncore events */
- if (!cpumask_test_and_clear_cpu(cpu, &uncore_cpu_mask))
- return;
-
- /* find a new cpu to collect uncore events */
- phys_id = topology_physical_package_id(cpu);
- target = -1;
- for_each_online_cpu(i) {
- if (i == cpu)
- continue;
- if (phys_id == topology_physical_package_id(i)) {
- target = i;
- break;
- }
- }
-
- /* migrate uncore events to the new cpu */
- if (target >= 0)
- cpumask_set_cpu(target, &uncore_cpu_mask);
-
- uncore_change_context(uncore_msr_uncores, cpu, target);
- uncore_change_context(uncore_pci_uncores, cpu, target);
-}
-
-static void uncore_event_init_cpu(int cpu)
-{
- int i, phys_id;
-
- phys_id = topology_physical_package_id(cpu);
- for_each_cpu(i, &uncore_cpu_mask) {
- if (phys_id == topology_physical_package_id(i))
- return;
- }
-
- cpumask_set_cpu(cpu, &uncore_cpu_mask);
-
- uncore_change_context(uncore_msr_uncores, -1, cpu);
- uncore_change_context(uncore_pci_uncores, -1, cpu);
-}
-
-static int uncore_cpu_notifier(struct notifier_block *self,
- unsigned long action, void *hcpu)
-{
- unsigned int cpu = (long)hcpu;
-
- /* allocate/free data structure for uncore box */
- switch (action & ~CPU_TASKS_FROZEN) {
- case CPU_UP_PREPARE:
- uncore_cpu_prepare(cpu, -1);
- break;
- case CPU_STARTING:
- uncore_cpu_starting(cpu);
- break;
- case CPU_UP_CANCELED:
- case CPU_DYING:
- uncore_cpu_dying(cpu);
- break;
- case CPU_ONLINE:
- case CPU_DEAD:
- uncore_kfree_boxes();
- break;
- default:
- break;
- }
-
- /* select the cpu that collects uncore events */
- switch (action & ~CPU_TASKS_FROZEN) {
- case CPU_DOWN_FAILED:
- case CPU_STARTING:
- uncore_event_init_cpu(cpu);
- break;
- case CPU_DOWN_PREPARE:
- uncore_event_exit_cpu(cpu);
- break;
- default:
- break;
- }
-
- return NOTIFY_OK;
-}
-
-static struct notifier_block uncore_cpu_nb = {
- .notifier_call = uncore_cpu_notifier,
- /*
- * to migrate uncore events, our notifier should be executed
- * before perf core's notifier.
- */
- .priority = CPU_PRI_PERF + 1,
-};
-
-static void __init uncore_cpu_setup(void *dummy)
-{
- uncore_cpu_starting(smp_processor_id());
-}
-
-static int __init uncore_cpu_init(void)
-{
- int ret;
-
- switch (boot_cpu_data.x86_model) {
- case 26: /* Nehalem */
- case 30:
- case 37: /* Westmere */
- case 44:
- nhm_uncore_cpu_init();
- break;
- case 42: /* Sandy Bridge */
- case 58: /* Ivy Bridge */
- case 60: /* Haswell */
- case 69: /* Haswell */
- case 70: /* Haswell */
- case 61: /* Broadwell */
- case 71: /* Broadwell */
- snb_uncore_cpu_init();
- break;
- case 45: /* Sandy Bridge-EP */
- snbep_uncore_cpu_init();
- break;
- case 46: /* Nehalem-EX */
- case 47: /* Westmere-EX aka. Xeon E7 */
- nhmex_uncore_cpu_init();
- break;
- case 62: /* Ivy Bridge-EP */
- ivbep_uncore_cpu_init();
- break;
- case 63: /* Haswell-EP */
- hswep_uncore_cpu_init();
- break;
- case 79: /* BDX-EP */
- case 86: /* BDX-DE */
- bdx_uncore_cpu_init();
- break;
- case 87: /* Knights Landing */
- knl_uncore_cpu_init();
- break;
- default:
- return 0;
- }
-
- ret = uncore_types_init(uncore_msr_uncores);
- if (ret)
- return ret;
-
- return 0;
-}
-
-static int __init uncore_pmus_register(void)
-{
- struct intel_uncore_pmu *pmu;
- struct intel_uncore_type *type;
- int i, j;
-
- for (i = 0; uncore_msr_uncores[i]; i++) {
- type = uncore_msr_uncores[i];
- for (j = 0; j < type->num_boxes; j++) {
- pmu = &type->pmus[j];
- uncore_pmu_register(pmu);
- }
- }
-
- return 0;
-}
-
-static void __init uncore_cpumask_init(void)
-{
- int cpu;
-
- /*
- * ony invoke once from msr or pci init code
- */
- if (!cpumask_empty(&uncore_cpu_mask))
- return;
-
- cpu_notifier_register_begin();
-
- for_each_online_cpu(cpu) {
- int i, phys_id = topology_physical_package_id(cpu);
-
- for_each_cpu(i, &uncore_cpu_mask) {
- if (phys_id == topology_physical_package_id(i)) {
- phys_id = -1;
- break;
- }
- }
- if (phys_id < 0)
- continue;
-
- uncore_cpu_prepare(cpu, phys_id);
- uncore_event_init_cpu(cpu);
- }
- on_each_cpu(uncore_cpu_setup, NULL, 1);
-
- __register_cpu_notifier(&uncore_cpu_nb);
-
- cpu_notifier_register_done();
-}
-
-
-static int __init intel_uncore_init(void)
-{
- int ret;
-
- if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL)
- return -ENODEV;
-
- if (cpu_has_hypervisor)
- return -ENODEV;
-
- ret = uncore_pci_init();
- if (ret)
- goto fail;
- ret = uncore_cpu_init();
- if (ret) {
- uncore_pci_exit();
- goto fail;
- }
- uncore_cpumask_init();
-
- uncore_pmus_register();
- return 0;
-fail:
- return ret;
-}
-device_initcall(intel_uncore_init);
diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore.h b/arch/x86/kernel/cpu/perf_event_intel_uncore.h
deleted file mode 100644
index a7086b862..000000000
--- a/arch/x86/kernel/cpu/perf_event_intel_uncore.h
+++ /dev/null
@@ -1,357 +0,0 @@
-#include <linux/module.h>
-#include <linux/slab.h>
-#include <linux/pci.h>
-#include <linux/perf_event.h>
-#include "perf_event.h"
-
-#define UNCORE_PMU_NAME_LEN 32
-#define UNCORE_PMU_HRTIMER_INTERVAL (60LL * NSEC_PER_SEC)
-#define UNCORE_SNB_IMC_HRTIMER_INTERVAL (5ULL * NSEC_PER_SEC)
-
-#define UNCORE_FIXED_EVENT 0xff
-#define UNCORE_PMC_IDX_MAX_GENERIC 8
-#define UNCORE_PMC_IDX_FIXED UNCORE_PMC_IDX_MAX_GENERIC
-#define UNCORE_PMC_IDX_MAX (UNCORE_PMC_IDX_FIXED + 1)
-
-#define UNCORE_PCI_DEV_DATA(type, idx) ((type << 8) | idx)
-#define UNCORE_PCI_DEV_TYPE(data) ((data >> 8) & 0xff)
-#define UNCORE_PCI_DEV_IDX(data) (data & 0xff)
-#define UNCORE_EXTRA_PCI_DEV 0xff
-#define UNCORE_EXTRA_PCI_DEV_MAX 3
-
-/* support up to 8 sockets */
-#define UNCORE_SOCKET_MAX 8
-
-#define UNCORE_EVENT_CONSTRAINT(c, n) EVENT_CONSTRAINT(c, n, 0xff)
-
-struct intel_uncore_ops;
-struct intel_uncore_pmu;
-struct intel_uncore_box;
-struct uncore_event_desc;
-
-struct intel_uncore_type {
- const char *name;
- int num_counters;
- int num_boxes;
- int perf_ctr_bits;
- int fixed_ctr_bits;
- unsigned perf_ctr;
- unsigned event_ctl;
- unsigned event_mask;
- unsigned fixed_ctr;
- unsigned fixed_ctl;
- unsigned box_ctl;
- unsigned msr_offset;
- unsigned num_shared_regs:8;
- unsigned single_fixed:1;
- unsigned pair_ctr_ctl:1;
- unsigned *msr_offsets;
- struct event_constraint unconstrainted;
- struct event_constraint *constraints;
- struct intel_uncore_pmu *pmus;
- struct intel_uncore_ops *ops;
- struct uncore_event_desc *event_descs;
- const struct attribute_group *attr_groups[4];
- struct pmu *pmu; /* for custom pmu ops */
-};
-
-#define pmu_group attr_groups[0]
-#define format_group attr_groups[1]
-#define events_group attr_groups[2]
-
-struct intel_uncore_ops {
- void (*init_box)(struct intel_uncore_box *);
- void (*disable_box)(struct intel_uncore_box *);
- void (*enable_box)(struct intel_uncore_box *);
- void (*disable_event)(struct intel_uncore_box *, struct perf_event *);
- void (*enable_event)(struct intel_uncore_box *, struct perf_event *);
- u64 (*read_counter)(struct intel_uncore_box *, struct perf_event *);
- int (*hw_config)(struct intel_uncore_box *, struct perf_event *);
- struct event_constraint *(*get_constraint)(struct intel_uncore_box *,
- struct perf_event *);
- void (*put_constraint)(struct intel_uncore_box *, struct perf_event *);
-};
-
-struct intel_uncore_pmu {
- struct pmu pmu;
- char name[UNCORE_PMU_NAME_LEN];
- int pmu_idx;
- int func_id;
- struct intel_uncore_type *type;
- struct intel_uncore_box ** __percpu box;
- struct list_head box_list;
-};
-
-struct intel_uncore_extra_reg {
- raw_spinlock_t lock;
- u64 config, config1, config2;
- atomic_t ref;
-};
-
-struct intel_uncore_box {
- int phys_id;
- int n_active; /* number of active events */
- int n_events;
- int cpu; /* cpu to collect events */
- unsigned long flags;
- atomic_t refcnt;
- struct perf_event *events[UNCORE_PMC_IDX_MAX];
- struct perf_event *event_list[UNCORE_PMC_IDX_MAX];
- struct event_constraint *event_constraint[UNCORE_PMC_IDX_MAX];
- unsigned long active_mask[BITS_TO_LONGS(UNCORE_PMC_IDX_MAX)];
- u64 tags[UNCORE_PMC_IDX_MAX];
- struct pci_dev *pci_dev;
- struct intel_uncore_pmu *pmu;
- u64 hrtimer_duration; /* hrtimer timeout for this box */
- struct hrtimer hrtimer;
- struct list_head list;
- struct list_head active_list;
- void *io_addr;
- struct intel_uncore_extra_reg shared_regs[0];
-};
-
-#define UNCORE_BOX_FLAG_INITIATED 0
-
-struct uncore_event_desc {
- struct kobj_attribute attr;
- const char *config;
-};
-
-struct pci2phy_map {
- struct list_head list;
- int segment;
- int pbus_to_physid[256];
-};
-
-int uncore_pcibus_to_physid(struct pci_bus *bus);
-struct pci2phy_map *__find_pci2phy_map(int segment);
-
-ssize_t uncore_event_show(struct kobject *kobj,
- struct kobj_attribute *attr, char *buf);
-
-#define INTEL_UNCORE_EVENT_DESC(_name, _config) \
-{ \
- .attr = __ATTR(_name, 0444, uncore_event_show, NULL), \
- .config = _config, \
-}
-
-#define DEFINE_UNCORE_FORMAT_ATTR(_var, _name, _format) \
-static ssize_t __uncore_##_var##_show(struct kobject *kobj, \
- struct kobj_attribute *attr, \
- char *page) \
-{ \
- BUILD_BUG_ON(sizeof(_format) >= PAGE_SIZE); \
- return sprintf(page, _format "\n"); \
-} \
-static struct kobj_attribute format_attr_##_var = \
- __ATTR(_name, 0444, __uncore_##_var##_show, NULL)
-
-static inline unsigned uncore_pci_box_ctl(struct intel_uncore_box *box)
-{
- return box->pmu->type->box_ctl;
-}
-
-static inline unsigned uncore_pci_fixed_ctl(struct intel_uncore_box *box)
-{
- return box->pmu->type->fixed_ctl;
-}
-
-static inline unsigned uncore_pci_fixed_ctr(struct intel_uncore_box *box)
-{
- return box->pmu->type->fixed_ctr;
-}
-
-static inline
-unsigned uncore_pci_event_ctl(struct intel_uncore_box *box, int idx)
-{
- return idx * 4 + box->pmu->type->event_ctl;
-}
-
-static inline
-unsigned uncore_pci_perf_ctr(struct intel_uncore_box *box, int idx)
-{
- return idx * 8 + box->pmu->type->perf_ctr;
-}
-
-static inline unsigned uncore_msr_box_offset(struct intel_uncore_box *box)
-{
- struct intel_uncore_pmu *pmu = box->pmu;
- return pmu->type->msr_offsets ?
- pmu->type->msr_offsets[pmu->pmu_idx] :
- pmu->type->msr_offset * pmu->pmu_idx;
-}
-
-static inline unsigned uncore_msr_box_ctl(struct intel_uncore_box *box)
-{
- if (!box->pmu->type->box_ctl)
- return 0;
- return box->pmu->type->box_ctl + uncore_msr_box_offset(box);
-}
-
-static inline unsigned uncore_msr_fixed_ctl(struct intel_uncore_box *box)
-{
- if (!box->pmu->type->fixed_ctl)
- return 0;
- return box->pmu->type->fixed_ctl + uncore_msr_box_offset(box);
-}
-
-static inline unsigned uncore_msr_fixed_ctr(struct intel_uncore_box *box)
-{
- return box->pmu->type->fixed_ctr + uncore_msr_box_offset(box);
-}
-
-static inline
-unsigned uncore_msr_event_ctl(struct intel_uncore_box *box, int idx)
-{
- return box->pmu->type->event_ctl +
- (box->pmu->type->pair_ctr_ctl ? 2 * idx : idx) +
- uncore_msr_box_offset(box);
-}
-
-static inline
-unsigned uncore_msr_perf_ctr(struct intel_uncore_box *box, int idx)
-{
- return box->pmu->type->perf_ctr +
- (box->pmu->type->pair_ctr_ctl ? 2 * idx : idx) +
- uncore_msr_box_offset(box);
-}
-
-static inline
-unsigned uncore_fixed_ctl(struct intel_uncore_box *box)
-{
- if (box->pci_dev)
- return uncore_pci_fixed_ctl(box);
- else
- return uncore_msr_fixed_ctl(box);
-}
-
-static inline
-unsigned uncore_fixed_ctr(struct intel_uncore_box *box)
-{
- if (box->pci_dev)
- return uncore_pci_fixed_ctr(box);
- else
- return uncore_msr_fixed_ctr(box);
-}
-
-static inline
-unsigned uncore_event_ctl(struct intel_uncore_box *box, int idx)
-{
- if (box->pci_dev)
- return uncore_pci_event_ctl(box, idx);
- else
- return uncore_msr_event_ctl(box, idx);
-}
-
-static inline
-unsigned uncore_perf_ctr(struct intel_uncore_box *box, int idx)
-{
- if (box->pci_dev)
- return uncore_pci_perf_ctr(box, idx);
- else
- return uncore_msr_perf_ctr(box, idx);
-}
-
-static inline int uncore_perf_ctr_bits(struct intel_uncore_box *box)
-{
- return box->pmu->type->perf_ctr_bits;
-}
-
-static inline int uncore_fixed_ctr_bits(struct intel_uncore_box *box)
-{
- return box->pmu->type->fixed_ctr_bits;
-}
-
-static inline int uncore_num_counters(struct intel_uncore_box *box)
-{
- return box->pmu->type->num_counters;
-}
-
-static inline void uncore_disable_box(struct intel_uncore_box *box)
-{
- if (box->pmu->type->ops->disable_box)
- box->pmu->type->ops->disable_box(box);
-}
-
-static inline void uncore_enable_box(struct intel_uncore_box *box)
-{
- if (box->pmu->type->ops->enable_box)
- box->pmu->type->ops->enable_box(box);
-}
-
-static inline void uncore_disable_event(struct intel_uncore_box *box,
- struct perf_event *event)
-{
- box->pmu->type->ops->disable_event(box, event);
-}
-
-static inline void uncore_enable_event(struct intel_uncore_box *box,
- struct perf_event *event)
-{
- box->pmu->type->ops->enable_event(box, event);
-}
-
-static inline u64 uncore_read_counter(struct intel_uncore_box *box,
- struct perf_event *event)
-{
- return box->pmu->type->ops->read_counter(box, event);
-}
-
-static inline void uncore_box_init(struct intel_uncore_box *box)
-{
- if (!test_and_set_bit(UNCORE_BOX_FLAG_INITIATED, &box->flags)) {
- if (box->pmu->type->ops->init_box)
- box->pmu->type->ops->init_box(box);
- }
-}
-
-static inline bool uncore_box_is_fake(struct intel_uncore_box *box)
-{
- return (box->phys_id < 0);
-}
-
-struct intel_uncore_pmu *uncore_event_to_pmu(struct perf_event *event);
-struct intel_uncore_box *uncore_pmu_to_box(struct intel_uncore_pmu *pmu, int cpu);
-struct intel_uncore_box *uncore_event_to_box(struct perf_event *event);
-u64 uncore_msr_read_counter(struct intel_uncore_box *box, struct perf_event *event);
-void uncore_pmu_start_hrtimer(struct intel_uncore_box *box);
-void uncore_pmu_cancel_hrtimer(struct intel_uncore_box *box);
-void uncore_pmu_event_read(struct perf_event *event);
-void uncore_perf_event_update(struct intel_uncore_box *box, struct perf_event *event);
-struct event_constraint *
-uncore_get_constraint(struct intel_uncore_box *box, struct perf_event *event);
-void uncore_put_constraint(struct intel_uncore_box *box, struct perf_event *event);
-u64 uncore_shared_reg_config(struct intel_uncore_box *box, int idx);
-
-extern struct intel_uncore_type **uncore_msr_uncores;
-extern struct intel_uncore_type **uncore_pci_uncores;
-extern struct pci_driver *uncore_pci_driver;
-extern raw_spinlock_t pci2phy_map_lock;
-extern struct list_head pci2phy_map_head;
-extern struct pci_dev *uncore_extra_pci_dev[UNCORE_SOCKET_MAX][UNCORE_EXTRA_PCI_DEV_MAX];
-extern struct event_constraint uncore_constraint_empty;
-
-/* perf_event_intel_uncore_snb.c */
-int snb_uncore_pci_init(void);
-int ivb_uncore_pci_init(void);
-int hsw_uncore_pci_init(void);
-int bdw_uncore_pci_init(void);
-int skl_uncore_pci_init(void);
-void snb_uncore_cpu_init(void);
-void nhm_uncore_cpu_init(void);
-int snb_pci2phy_map_init(int devid);
-
-/* perf_event_intel_uncore_snbep.c */
-int snbep_uncore_pci_init(void);
-void snbep_uncore_cpu_init(void);
-int ivbep_uncore_pci_init(void);
-void ivbep_uncore_cpu_init(void);
-int hswep_uncore_pci_init(void);
-void hswep_uncore_cpu_init(void);
-int bdx_uncore_pci_init(void);
-void bdx_uncore_cpu_init(void);
-int knl_uncore_pci_init(void);
-void knl_uncore_cpu_init(void);
-
-/* perf_event_intel_uncore_nhmex.c */
-void nhmex_uncore_cpu_init(void);
diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore_nhmex.c b/arch/x86/kernel/cpu/perf_event_intel_uncore_nhmex.c
deleted file mode 100644
index 2749965af..000000000
--- a/arch/x86/kernel/cpu/perf_event_intel_uncore_nhmex.c
+++ /dev/null
@@ -1,1221 +0,0 @@
-/* Nehalem-EX/Westmere-EX uncore support */
-#include "perf_event_intel_uncore.h"
-
-/* NHM-EX event control */
-#define NHMEX_PMON_CTL_EV_SEL_MASK 0x000000ff
-#define NHMEX_PMON_CTL_UMASK_MASK 0x0000ff00
-#define NHMEX_PMON_CTL_EN_BIT0 (1 << 0)
-#define NHMEX_PMON_CTL_EDGE_DET (1 << 18)
-#define NHMEX_PMON_CTL_PMI_EN (1 << 20)
-#define NHMEX_PMON_CTL_EN_BIT22 (1 << 22)
-#define NHMEX_PMON_CTL_INVERT (1 << 23)
-#define NHMEX_PMON_CTL_TRESH_MASK 0xff000000
-#define NHMEX_PMON_RAW_EVENT_MASK (NHMEX_PMON_CTL_EV_SEL_MASK | \
- NHMEX_PMON_CTL_UMASK_MASK | \
- NHMEX_PMON_CTL_EDGE_DET | \
- NHMEX_PMON_CTL_INVERT | \
- NHMEX_PMON_CTL_TRESH_MASK)
-
-/* NHM-EX Ubox */
-#define NHMEX_U_MSR_PMON_GLOBAL_CTL 0xc00
-#define NHMEX_U_MSR_PMON_CTR 0xc11
-#define NHMEX_U_MSR_PMON_EV_SEL 0xc10
-
-#define NHMEX_U_PMON_GLOBAL_EN (1 << 0)
-#define NHMEX_U_PMON_GLOBAL_PMI_CORE_SEL 0x0000001e
-#define NHMEX_U_PMON_GLOBAL_EN_ALL (1 << 28)
-#define NHMEX_U_PMON_GLOBAL_RST_ALL (1 << 29)
-#define NHMEX_U_PMON_GLOBAL_FRZ_ALL (1 << 31)
-
-#define NHMEX_U_PMON_RAW_EVENT_MASK \
- (NHMEX_PMON_CTL_EV_SEL_MASK | \
- NHMEX_PMON_CTL_EDGE_DET)
-
-/* NHM-EX Cbox */
-#define NHMEX_C0_MSR_PMON_GLOBAL_CTL 0xd00
-#define NHMEX_C0_MSR_PMON_CTR0 0xd11
-#define NHMEX_C0_MSR_PMON_EV_SEL0 0xd10
-#define NHMEX_C_MSR_OFFSET 0x20
-
-/* NHM-EX Bbox */
-#define NHMEX_B0_MSR_PMON_GLOBAL_CTL 0xc20
-#define NHMEX_B0_MSR_PMON_CTR0 0xc31
-#define NHMEX_B0_MSR_PMON_CTL0 0xc30
-#define NHMEX_B_MSR_OFFSET 0x40
-#define NHMEX_B0_MSR_MATCH 0xe45
-#define NHMEX_B0_MSR_MASK 0xe46
-#define NHMEX_B1_MSR_MATCH 0xe4d
-#define NHMEX_B1_MSR_MASK 0xe4e
-
-#define NHMEX_B_PMON_CTL_EN (1 << 0)
-#define NHMEX_B_PMON_CTL_EV_SEL_SHIFT 1
-#define NHMEX_B_PMON_CTL_EV_SEL_MASK \
- (0x1f << NHMEX_B_PMON_CTL_EV_SEL_SHIFT)
-#define NHMEX_B_PMON_CTR_SHIFT 6
-#define NHMEX_B_PMON_CTR_MASK \
- (0x3 << NHMEX_B_PMON_CTR_SHIFT)
-#define NHMEX_B_PMON_RAW_EVENT_MASK \
- (NHMEX_B_PMON_CTL_EV_SEL_MASK | \
- NHMEX_B_PMON_CTR_MASK)
-
-/* NHM-EX Sbox */
-#define NHMEX_S0_MSR_PMON_GLOBAL_CTL 0xc40
-#define NHMEX_S0_MSR_PMON_CTR0 0xc51
-#define NHMEX_S0_MSR_PMON_CTL0 0xc50
-#define NHMEX_S_MSR_OFFSET 0x80
-#define NHMEX_S0_MSR_MM_CFG 0xe48
-#define NHMEX_S0_MSR_MATCH 0xe49
-#define NHMEX_S0_MSR_MASK 0xe4a
-#define NHMEX_S1_MSR_MM_CFG 0xe58
-#define NHMEX_S1_MSR_MATCH 0xe59
-#define NHMEX_S1_MSR_MASK 0xe5a
-
-#define NHMEX_S_PMON_MM_CFG_EN (0x1ULL << 63)
-#define NHMEX_S_EVENT_TO_R_PROG_EV 0
-
-/* NHM-EX Mbox */
-#define NHMEX_M0_MSR_GLOBAL_CTL 0xca0
-#define NHMEX_M0_MSR_PMU_DSP 0xca5
-#define NHMEX_M0_MSR_PMU_ISS 0xca6
-#define NHMEX_M0_MSR_PMU_MAP 0xca7
-#define NHMEX_M0_MSR_PMU_MSC_THR 0xca8
-#define NHMEX_M0_MSR_PMU_PGT 0xca9
-#define NHMEX_M0_MSR_PMU_PLD 0xcaa
-#define NHMEX_M0_MSR_PMU_ZDP_CTL_FVC 0xcab
-#define NHMEX_M0_MSR_PMU_CTL0 0xcb0
-#define NHMEX_M0_MSR_PMU_CNT0 0xcb1
-#define NHMEX_M_MSR_OFFSET 0x40
-#define NHMEX_M0_MSR_PMU_MM_CFG 0xe54
-#define NHMEX_M1_MSR_PMU_MM_CFG 0xe5c
-
-#define NHMEX_M_PMON_MM_CFG_EN (1ULL << 63)
-#define NHMEX_M_PMON_ADDR_MATCH_MASK 0x3ffffffffULL
-#define NHMEX_M_PMON_ADDR_MASK_MASK 0x7ffffffULL
-#define NHMEX_M_PMON_ADDR_MASK_SHIFT 34
-
-#define NHMEX_M_PMON_CTL_EN (1 << 0)
-#define NHMEX_M_PMON_CTL_PMI_EN (1 << 1)
-#define NHMEX_M_PMON_CTL_COUNT_MODE_SHIFT 2
-#define NHMEX_M_PMON_CTL_COUNT_MODE_MASK \
- (0x3 << NHMEX_M_PMON_CTL_COUNT_MODE_SHIFT)
-#define NHMEX_M_PMON_CTL_STORAGE_MODE_SHIFT 4
-#define NHMEX_M_PMON_CTL_STORAGE_MODE_MASK \
- (0x3 << NHMEX_M_PMON_CTL_STORAGE_MODE_SHIFT)
-#define NHMEX_M_PMON_CTL_WRAP_MODE (1 << 6)
-#define NHMEX_M_PMON_CTL_FLAG_MODE (1 << 7)
-#define NHMEX_M_PMON_CTL_INC_SEL_SHIFT 9
-#define NHMEX_M_PMON_CTL_INC_SEL_MASK \
- (0x1f << NHMEX_M_PMON_CTL_INC_SEL_SHIFT)
-#define NHMEX_M_PMON_CTL_SET_FLAG_SEL_SHIFT 19
-#define NHMEX_M_PMON_CTL_SET_FLAG_SEL_MASK \
- (0x7 << NHMEX_M_PMON_CTL_SET_FLAG_SEL_SHIFT)
-#define NHMEX_M_PMON_RAW_EVENT_MASK \
- (NHMEX_M_PMON_CTL_COUNT_MODE_MASK | \
- NHMEX_M_PMON_CTL_STORAGE_MODE_MASK | \
- NHMEX_M_PMON_CTL_WRAP_MODE | \
- NHMEX_M_PMON_CTL_FLAG_MODE | \
- NHMEX_M_PMON_CTL_INC_SEL_MASK | \
- NHMEX_M_PMON_CTL_SET_FLAG_SEL_MASK)
-
-#define NHMEX_M_PMON_ZDP_CTL_FVC_MASK (((1 << 11) - 1) | (1 << 23))
-#define NHMEX_M_PMON_ZDP_CTL_FVC_EVENT_MASK(n) (0x7ULL << (11 + 3 * (n)))
-
-#define WSMEX_M_PMON_ZDP_CTL_FVC_MASK (((1 << 12) - 1) | (1 << 24))
-#define WSMEX_M_PMON_ZDP_CTL_FVC_EVENT_MASK(n) (0x7ULL << (12 + 3 * (n)))
-
-/*
- * use the 9~13 bits to select event If the 7th bit is not set,
- * otherwise use the 19~21 bits to select event.
- */
-#define MBOX_INC_SEL(x) ((x) << NHMEX_M_PMON_CTL_INC_SEL_SHIFT)
-#define MBOX_SET_FLAG_SEL(x) (((x) << NHMEX_M_PMON_CTL_SET_FLAG_SEL_SHIFT) | \
- NHMEX_M_PMON_CTL_FLAG_MODE)
-#define MBOX_INC_SEL_MASK (NHMEX_M_PMON_CTL_INC_SEL_MASK | \
- NHMEX_M_PMON_CTL_FLAG_MODE)
-#define MBOX_SET_FLAG_SEL_MASK (NHMEX_M_PMON_CTL_SET_FLAG_SEL_MASK | \
- NHMEX_M_PMON_CTL_FLAG_MODE)
-#define MBOX_INC_SEL_EXTAR_REG(c, r) \
- EVENT_EXTRA_REG(MBOX_INC_SEL(c), NHMEX_M0_MSR_PMU_##r, \
- MBOX_INC_SEL_MASK, (u64)-1, NHMEX_M_##r)
-#define MBOX_SET_FLAG_SEL_EXTRA_REG(c, r) \
- EVENT_EXTRA_REG(MBOX_SET_FLAG_SEL(c), NHMEX_M0_MSR_PMU_##r, \
- MBOX_SET_FLAG_SEL_MASK, \
- (u64)-1, NHMEX_M_##r)
-
-/* NHM-EX Rbox */
-#define NHMEX_R_MSR_GLOBAL_CTL 0xe00
-#define NHMEX_R_MSR_PMON_CTL0 0xe10
-#define NHMEX_R_MSR_PMON_CNT0 0xe11
-#define NHMEX_R_MSR_OFFSET 0x20
-
-#define NHMEX_R_MSR_PORTN_QLX_CFG(n) \
- ((n) < 4 ? (0xe0c + (n)) : (0xe2c + (n) - 4))
-#define NHMEX_R_MSR_PORTN_IPERF_CFG0(n) (0xe04 + (n))
-#define NHMEX_R_MSR_PORTN_IPERF_CFG1(n) (0xe24 + (n))
-#define NHMEX_R_MSR_PORTN_XBR_OFFSET(n) \
- (((n) < 4 ? 0 : 0x10) + (n) * 4)
-#define NHMEX_R_MSR_PORTN_XBR_SET1_MM_CFG(n) \
- (0xe60 + NHMEX_R_MSR_PORTN_XBR_OFFSET(n))
-#define NHMEX_R_MSR_PORTN_XBR_SET1_MATCH(n) \
- (NHMEX_R_MSR_PORTN_XBR_SET1_MM_CFG(n) + 1)
-#define NHMEX_R_MSR_PORTN_XBR_SET1_MASK(n) \
- (NHMEX_R_MSR_PORTN_XBR_SET1_MM_CFG(n) + 2)
-#define NHMEX_R_MSR_PORTN_XBR_SET2_MM_CFG(n) \
- (0xe70 + NHMEX_R_MSR_PORTN_XBR_OFFSET(n))
-#define NHMEX_R_MSR_PORTN_XBR_SET2_MATCH(n) \
- (NHMEX_R_MSR_PORTN_XBR_SET2_MM_CFG(n) + 1)
-#define NHMEX_R_MSR_PORTN_XBR_SET2_MASK(n) \
- (NHMEX_R_MSR_PORTN_XBR_SET2_MM_CFG(n) + 2)
-
-#define NHMEX_R_PMON_CTL_EN (1 << 0)
-#define NHMEX_R_PMON_CTL_EV_SEL_SHIFT 1
-#define NHMEX_R_PMON_CTL_EV_SEL_MASK \
- (0x1f << NHMEX_R_PMON_CTL_EV_SEL_SHIFT)
-#define NHMEX_R_PMON_CTL_PMI_EN (1 << 6)
-#define NHMEX_R_PMON_RAW_EVENT_MASK NHMEX_R_PMON_CTL_EV_SEL_MASK
-
-/* NHM-EX Wbox */
-#define NHMEX_W_MSR_GLOBAL_CTL 0xc80
-#define NHMEX_W_MSR_PMON_CNT0 0xc90
-#define NHMEX_W_MSR_PMON_EVT_SEL0 0xc91
-#define NHMEX_W_MSR_PMON_FIXED_CTR 0x394
-#define NHMEX_W_MSR_PMON_FIXED_CTL 0x395
-
-#define NHMEX_W_PMON_GLOBAL_FIXED_EN (1ULL << 31)
-
-#define __BITS_VALUE(x, i, n) ((typeof(x))(((x) >> ((i) * (n))) & \
- ((1ULL << (n)) - 1)))
-
-DEFINE_UNCORE_FORMAT_ATTR(event, event, "config:0-7");
-DEFINE_UNCORE_FORMAT_ATTR(event5, event, "config:1-5");
-DEFINE_UNCORE_FORMAT_ATTR(umask, umask, "config:8-15");
-DEFINE_UNCORE_FORMAT_ATTR(edge, edge, "config:18");
-DEFINE_UNCORE_FORMAT_ATTR(inv, inv, "config:23");
-DEFINE_UNCORE_FORMAT_ATTR(thresh8, thresh, "config:24-31");
-DEFINE_UNCORE_FORMAT_ATTR(counter, counter, "config:6-7");
-DEFINE_UNCORE_FORMAT_ATTR(match, match, "config1:0-63");
-DEFINE_UNCORE_FORMAT_ATTR(mask, mask, "config2:0-63");
-
-static void nhmex_uncore_msr_init_box(struct intel_uncore_box *box)
-{
- wrmsrl(NHMEX_U_MSR_PMON_GLOBAL_CTL, NHMEX_U_PMON_GLOBAL_EN_ALL);
-}
-
-static void nhmex_uncore_msr_disable_box(struct intel_uncore_box *box)
-{
- unsigned msr = uncore_msr_box_ctl(box);
- u64 config;
-
- if (msr) {
- rdmsrl(msr, config);
- config &= ~((1ULL << uncore_num_counters(box)) - 1);
- /* WBox has a fixed counter */
- if (uncore_msr_fixed_ctl(box))
- config &= ~NHMEX_W_PMON_GLOBAL_FIXED_EN;
- wrmsrl(msr, config);
- }
-}
-
-static void nhmex_uncore_msr_enable_box(struct intel_uncore_box *box)
-{
- unsigned msr = uncore_msr_box_ctl(box);
- u64 config;
-
- if (msr) {
- rdmsrl(msr, config);
- config |= (1ULL << uncore_num_counters(box)) - 1;
- /* WBox has a fixed counter */
- if (uncore_msr_fixed_ctl(box))
- config |= NHMEX_W_PMON_GLOBAL_FIXED_EN;
- wrmsrl(msr, config);
- }
-}
-
-static void nhmex_uncore_msr_disable_event(struct intel_uncore_box *box, struct perf_event *event)
-{
- wrmsrl(event->hw.config_base, 0);
-}
-
-static void nhmex_uncore_msr_enable_event(struct intel_uncore_box *box, struct perf_event *event)
-{
- struct hw_perf_event *hwc = &event->hw;
-
- if (hwc->idx >= UNCORE_PMC_IDX_FIXED)
- wrmsrl(hwc->config_base, NHMEX_PMON_CTL_EN_BIT0);
- else if (box->pmu->type->event_mask & NHMEX_PMON_CTL_EN_BIT0)
- wrmsrl(hwc->config_base, hwc->config | NHMEX_PMON_CTL_EN_BIT22);
- else
- wrmsrl(hwc->config_base, hwc->config | NHMEX_PMON_CTL_EN_BIT0);
-}
-
-#define NHMEX_UNCORE_OPS_COMMON_INIT() \
- .init_box = nhmex_uncore_msr_init_box, \
- .disable_box = nhmex_uncore_msr_disable_box, \
- .enable_box = nhmex_uncore_msr_enable_box, \
- .disable_event = nhmex_uncore_msr_disable_event, \
- .read_counter = uncore_msr_read_counter
-
-static struct intel_uncore_ops nhmex_uncore_ops = {
- NHMEX_UNCORE_OPS_COMMON_INIT(),
- .enable_event = nhmex_uncore_msr_enable_event,
-};
-
-static struct attribute *nhmex_uncore_ubox_formats_attr[] = {
- &format_attr_event.attr,
- &format_attr_edge.attr,
- NULL,
-};
-
-static struct attribute_group nhmex_uncore_ubox_format_group = {
- .name = "format",
- .attrs = nhmex_uncore_ubox_formats_attr,
-};
-
-static struct intel_uncore_type nhmex_uncore_ubox = {
- .name = "ubox",
- .num_counters = 1,
- .num_boxes = 1,
- .perf_ctr_bits = 48,
- .event_ctl = NHMEX_U_MSR_PMON_EV_SEL,
- .perf_ctr = NHMEX_U_MSR_PMON_CTR,
- .event_mask = NHMEX_U_PMON_RAW_EVENT_MASK,
- .box_ctl = NHMEX_U_MSR_PMON_GLOBAL_CTL,
- .ops = &nhmex_uncore_ops,
- .format_group = &nhmex_uncore_ubox_format_group
-};
-
-static struct attribute *nhmex_uncore_cbox_formats_attr[] = {
- &format_attr_event.attr,
- &format_attr_umask.attr,
- &format_attr_edge.attr,
- &format_attr_inv.attr,
- &format_attr_thresh8.attr,
- NULL,
-};
-
-static struct attribute_group nhmex_uncore_cbox_format_group = {
- .name = "format",
- .attrs = nhmex_uncore_cbox_formats_attr,
-};
-
-/* msr offset for each instance of cbox */
-static unsigned nhmex_cbox_msr_offsets[] = {
- 0x0, 0x80, 0x40, 0xc0, 0x20, 0xa0, 0x60, 0xe0, 0x240, 0x2c0,
-};
-
-static struct intel_uncore_type nhmex_uncore_cbox = {
- .name = "cbox",
- .num_counters = 6,
- .num_boxes = 10,
- .perf_ctr_bits = 48,
- .event_ctl = NHMEX_C0_MSR_PMON_EV_SEL0,
- .perf_ctr = NHMEX_C0_MSR_PMON_CTR0,
- .event_mask = NHMEX_PMON_RAW_EVENT_MASK,
- .box_ctl = NHMEX_C0_MSR_PMON_GLOBAL_CTL,
- .msr_offsets = nhmex_cbox_msr_offsets,
- .pair_ctr_ctl = 1,
- .ops = &nhmex_uncore_ops,
- .format_group = &nhmex_uncore_cbox_format_group
-};
-
-static struct uncore_event_desc nhmex_uncore_wbox_events[] = {
- INTEL_UNCORE_EVENT_DESC(clockticks, "event=0xff,umask=0"),
- { /* end: all zeroes */ },
-};
-
-static struct intel_uncore_type nhmex_uncore_wbox = {
- .name = "wbox",
- .num_counters = 4,
- .num_boxes = 1,
- .perf_ctr_bits = 48,
- .event_ctl = NHMEX_W_MSR_PMON_CNT0,
- .perf_ctr = NHMEX_W_MSR_PMON_EVT_SEL0,
- .fixed_ctr = NHMEX_W_MSR_PMON_FIXED_CTR,
- .fixed_ctl = NHMEX_W_MSR_PMON_FIXED_CTL,
- .event_mask = NHMEX_PMON_RAW_EVENT_MASK,
- .box_ctl = NHMEX_W_MSR_GLOBAL_CTL,
- .pair_ctr_ctl = 1,
- .event_descs = nhmex_uncore_wbox_events,
- .ops = &nhmex_uncore_ops,
- .format_group = &nhmex_uncore_cbox_format_group
-};
-
-static int nhmex_bbox_hw_config(struct intel_uncore_box *box, struct perf_event *event)
-{
- struct hw_perf_event *hwc = &event->hw;
- struct hw_perf_event_extra *reg1 = &hwc->extra_reg;
- struct hw_perf_event_extra *reg2 = &hwc->branch_reg;
- int ctr, ev_sel;
-
- ctr = (hwc->config & NHMEX_B_PMON_CTR_MASK) >>
- NHMEX_B_PMON_CTR_SHIFT;
- ev_sel = (hwc->config & NHMEX_B_PMON_CTL_EV_SEL_MASK) >>
- NHMEX_B_PMON_CTL_EV_SEL_SHIFT;
-
- /* events that do not use the match/mask registers */
- if ((ctr == 0 && ev_sel > 0x3) || (ctr == 1 && ev_sel > 0x6) ||
- (ctr == 2 && ev_sel != 0x4) || ctr == 3)
- return 0;
-
- if (box->pmu->pmu_idx == 0)
- reg1->reg = NHMEX_B0_MSR_MATCH;
- else
- reg1->reg = NHMEX_B1_MSR_MATCH;
- reg1->idx = 0;
- reg1->config = event->attr.config1;
- reg2->config = event->attr.config2;
- return 0;
-}
-
-static void nhmex_bbox_msr_enable_event(struct intel_uncore_box *box, struct perf_event *event)
-{
- struct hw_perf_event *hwc = &event->hw;
- struct hw_perf_event_extra *reg1 = &hwc->extra_reg;
- struct hw_perf_event_extra *reg2 = &hwc->branch_reg;
-
- if (reg1->idx != EXTRA_REG_NONE) {
- wrmsrl(reg1->reg, reg1->config);
- wrmsrl(reg1->reg + 1, reg2->config);
- }
- wrmsrl(hwc->config_base, NHMEX_PMON_CTL_EN_BIT0 |
- (hwc->config & NHMEX_B_PMON_CTL_EV_SEL_MASK));
-}
-
-/*
- * The Bbox has 4 counters, but each counter monitors different events.
- * Use bits 6-7 in the event config to select counter.
- */
-static struct event_constraint nhmex_uncore_bbox_constraints[] = {
- EVENT_CONSTRAINT(0 , 1, 0xc0),
- EVENT_CONSTRAINT(0x40, 2, 0xc0),
- EVENT_CONSTRAINT(0x80, 4, 0xc0),
- EVENT_CONSTRAINT(0xc0, 8, 0xc0),
- EVENT_CONSTRAINT_END,
-};
-
-static struct attribute *nhmex_uncore_bbox_formats_attr[] = {
- &format_attr_event5.attr,
- &format_attr_counter.attr,
- &format_attr_match.attr,
- &format_attr_mask.attr,
- NULL,
-};
-
-static struct attribute_group nhmex_uncore_bbox_format_group = {
- .name = "format",
- .attrs = nhmex_uncore_bbox_formats_attr,
-};
-
-static struct intel_uncore_ops nhmex_uncore_bbox_ops = {
- NHMEX_UNCORE_OPS_COMMON_INIT(),
- .enable_event = nhmex_bbox_msr_enable_event,
- .hw_config = nhmex_bbox_hw_config,
- .get_constraint = uncore_get_constraint,
- .put_constraint = uncore_put_constraint,
-};
-
-static struct intel_uncore_type nhmex_uncore_bbox = {
- .name = "bbox",
- .num_counters = 4,
- .num_boxes = 2,
- .perf_ctr_bits = 48,
- .event_ctl = NHMEX_B0_MSR_PMON_CTL0,
- .perf_ctr = NHMEX_B0_MSR_PMON_CTR0,
- .event_mask = NHMEX_B_PMON_RAW_EVENT_MASK,
- .box_ctl = NHMEX_B0_MSR_PMON_GLOBAL_CTL,
- .msr_offset = NHMEX_B_MSR_OFFSET,
- .pair_ctr_ctl = 1,
- .num_shared_regs = 1,
- .constraints = nhmex_uncore_bbox_constraints,
- .ops = &nhmex_uncore_bbox_ops,
- .format_group = &nhmex_uncore_bbox_format_group
-};
-
-static int nhmex_sbox_hw_config(struct intel_uncore_box *box, struct perf_event *event)
-{
- struct hw_perf_event *hwc = &event->hw;
- struct hw_perf_event_extra *reg1 = &hwc->extra_reg;
- struct hw_perf_event_extra *reg2 = &hwc->branch_reg;
-
- /* only TO_R_PROG_EV event uses the match/mask register */
- if ((hwc->config & NHMEX_PMON_CTL_EV_SEL_MASK) !=
- NHMEX_S_EVENT_TO_R_PROG_EV)
- return 0;
-
- if (box->pmu->pmu_idx == 0)
- reg1->reg = NHMEX_S0_MSR_MM_CFG;
- else
- reg1->reg = NHMEX_S1_MSR_MM_CFG;
- reg1->idx = 0;
- reg1->config = event->attr.config1;
- reg2->config = event->attr.config2;
- return 0;
-}
-
-static void nhmex_sbox_msr_enable_event(struct intel_uncore_box *box, struct perf_event *event)
-{
- struct hw_perf_event *hwc = &event->hw;
- struct hw_perf_event_extra *reg1 = &hwc->extra_reg;
- struct hw_perf_event_extra *reg2 = &hwc->branch_reg;
-
- if (reg1->idx != EXTRA_REG_NONE) {
- wrmsrl(reg1->reg, 0);
- wrmsrl(reg1->reg + 1, reg1->config);
- wrmsrl(reg1->reg + 2, reg2->config);
- wrmsrl(reg1->reg, NHMEX_S_PMON_MM_CFG_EN);
- }
- wrmsrl(hwc->config_base, hwc->config | NHMEX_PMON_CTL_EN_BIT22);
-}
-
-static struct attribute *nhmex_uncore_sbox_formats_attr[] = {
- &format_attr_event.attr,
- &format_attr_umask.attr,
- &format_attr_edge.attr,
- &format_attr_inv.attr,
- &format_attr_thresh8.attr,
- &format_attr_match.attr,
- &format_attr_mask.attr,
- NULL,
-};
-
-static struct attribute_group nhmex_uncore_sbox_format_group = {
- .name = "format",
- .attrs = nhmex_uncore_sbox_formats_attr,
-};
-
-static struct intel_uncore_ops nhmex_uncore_sbox_ops = {
- NHMEX_UNCORE_OPS_COMMON_INIT(),
- .enable_event = nhmex_sbox_msr_enable_event,
- .hw_config = nhmex_sbox_hw_config,
- .get_constraint = uncore_get_constraint,
- .put_constraint = uncore_put_constraint,
-};
-
-static struct intel_uncore_type nhmex_uncore_sbox = {
- .name = "sbox",
- .num_counters = 4,
- .num_boxes = 2,
- .perf_ctr_bits = 48,
- .event_ctl = NHMEX_S0_MSR_PMON_CTL0,
- .perf_ctr = NHMEX_S0_MSR_PMON_CTR0,
- .event_mask = NHMEX_PMON_RAW_EVENT_MASK,
- .box_ctl = NHMEX_S0_MSR_PMON_GLOBAL_CTL,
- .msr_offset = NHMEX_S_MSR_OFFSET,
- .pair_ctr_ctl = 1,
- .num_shared_regs = 1,
- .ops = &nhmex_uncore_sbox_ops,
- .format_group = &nhmex_uncore_sbox_format_group
-};
-
-enum {
- EXTRA_REG_NHMEX_M_FILTER,
- EXTRA_REG_NHMEX_M_DSP,
- EXTRA_REG_NHMEX_M_ISS,
- EXTRA_REG_NHMEX_M_MAP,
- EXTRA_REG_NHMEX_M_MSC_THR,
- EXTRA_REG_NHMEX_M_PGT,
- EXTRA_REG_NHMEX_M_PLD,
- EXTRA_REG_NHMEX_M_ZDP_CTL_FVC,
-};
-
-static struct extra_reg nhmex_uncore_mbox_extra_regs[] = {
- MBOX_INC_SEL_EXTAR_REG(0x0, DSP),
- MBOX_INC_SEL_EXTAR_REG(0x4, MSC_THR),
- MBOX_INC_SEL_EXTAR_REG(0x5, MSC_THR),
- MBOX_INC_SEL_EXTAR_REG(0x9, ISS),
- /* event 0xa uses two extra registers */
- MBOX_INC_SEL_EXTAR_REG(0xa, ISS),
- MBOX_INC_SEL_EXTAR_REG(0xa, PLD),
- MBOX_INC_SEL_EXTAR_REG(0xb, PLD),
- /* events 0xd ~ 0x10 use the same extra register */
- MBOX_INC_SEL_EXTAR_REG(0xd, ZDP_CTL_FVC),
- MBOX_INC_SEL_EXTAR_REG(0xe, ZDP_CTL_FVC),
- MBOX_INC_SEL_EXTAR_REG(0xf, ZDP_CTL_FVC),
- MBOX_INC_SEL_EXTAR_REG(0x10, ZDP_CTL_FVC),
- MBOX_INC_SEL_EXTAR_REG(0x16, PGT),
- MBOX_SET_FLAG_SEL_EXTRA_REG(0x0, DSP),
- MBOX_SET_FLAG_SEL_EXTRA_REG(0x1, ISS),
- MBOX_SET_FLAG_SEL_EXTRA_REG(0x5, PGT),
- MBOX_SET_FLAG_SEL_EXTRA_REG(0x6, MAP),
- EVENT_EXTRA_END
-};
-
-/* Nehalem-EX or Westmere-EX ? */
-static bool uncore_nhmex;
-
-static bool nhmex_mbox_get_shared_reg(struct intel_uncore_box *box, int idx, u64 config)
-{
- struct intel_uncore_extra_reg *er;
- unsigned long flags;
- bool ret = false;
- u64 mask;
-
- if (idx < EXTRA_REG_NHMEX_M_ZDP_CTL_FVC) {
- er = &box->shared_regs[idx];
- raw_spin_lock_irqsave(&er->lock, flags);
- if (!atomic_read(&er->ref) || er->config == config) {
- atomic_inc(&er->ref);
- er->config = config;
- ret = true;
- }
- raw_spin_unlock_irqrestore(&er->lock, flags);
-
- return ret;
- }
- /*
- * The ZDP_CTL_FVC MSR has 4 fields which are used to control
- * events 0xd ~ 0x10. Besides these 4 fields, there are additional
- * fields which are shared.
- */
- idx -= EXTRA_REG_NHMEX_M_ZDP_CTL_FVC;
- if (WARN_ON_ONCE(idx >= 4))
- return false;
-
- /* mask of the shared fields */
- if (uncore_nhmex)
- mask = NHMEX_M_PMON_ZDP_CTL_FVC_MASK;
- else
- mask = WSMEX_M_PMON_ZDP_CTL_FVC_MASK;
- er = &box->shared_regs[EXTRA_REG_NHMEX_M_ZDP_CTL_FVC];
-
- raw_spin_lock_irqsave(&er->lock, flags);
- /* add mask of the non-shared field if it's in use */
- if (__BITS_VALUE(atomic_read(&er->ref), idx, 8)) {
- if (uncore_nhmex)
- mask |= NHMEX_M_PMON_ZDP_CTL_FVC_EVENT_MASK(idx);
- else
- mask |= WSMEX_M_PMON_ZDP_CTL_FVC_EVENT_MASK(idx);
- }
-
- if (!atomic_read(&er->ref) || !((er->config ^ config) & mask)) {
- atomic_add(1 << (idx * 8), &er->ref);
- if (uncore_nhmex)
- mask = NHMEX_M_PMON_ZDP_CTL_FVC_MASK |
- NHMEX_M_PMON_ZDP_CTL_FVC_EVENT_MASK(idx);
- else
- mask = WSMEX_M_PMON_ZDP_CTL_FVC_MASK |
- WSMEX_M_PMON_ZDP_CTL_FVC_EVENT_MASK(idx);
- er->config &= ~mask;
- er->config |= (config & mask);
- ret = true;
- }
- raw_spin_unlock_irqrestore(&er->lock, flags);
-
- return ret;
-}
-
-static void nhmex_mbox_put_shared_reg(struct intel_uncore_box *box, int idx)
-{
- struct intel_uncore_extra_reg *er;
-
- if (idx < EXTRA_REG_NHMEX_M_ZDP_CTL_FVC) {
- er = &box->shared_regs[idx];
- atomic_dec(&er->ref);
- return;
- }
-
- idx -= EXTRA_REG_NHMEX_M_ZDP_CTL_FVC;
- er = &box->shared_regs[EXTRA_REG_NHMEX_M_ZDP_CTL_FVC];
- atomic_sub(1 << (idx * 8), &er->ref);
-}
-
-static u64 nhmex_mbox_alter_er(struct perf_event *event, int new_idx, bool modify)
-{
- struct hw_perf_event *hwc = &event->hw;
- struct hw_perf_event_extra *reg1 = &hwc->extra_reg;
- u64 idx, orig_idx = __BITS_VALUE(reg1->idx, 0, 8);
- u64 config = reg1->config;
-
- /* get the non-shared control bits and shift them */
- idx = orig_idx - EXTRA_REG_NHMEX_M_ZDP_CTL_FVC;
- if (uncore_nhmex)
- config &= NHMEX_M_PMON_ZDP_CTL_FVC_EVENT_MASK(idx);
- else
- config &= WSMEX_M_PMON_ZDP_CTL_FVC_EVENT_MASK(idx);
- if (new_idx > orig_idx) {
- idx = new_idx - orig_idx;
- config <<= 3 * idx;
- } else {
- idx = orig_idx - new_idx;
- config >>= 3 * idx;
- }
-
- /* add the shared control bits back */
- if (uncore_nhmex)
- config |= NHMEX_M_PMON_ZDP_CTL_FVC_MASK & reg1->config;
- else
- config |= WSMEX_M_PMON_ZDP_CTL_FVC_MASK & reg1->config;
- config |= NHMEX_M_PMON_ZDP_CTL_FVC_MASK & reg1->config;
- if (modify) {
- /* adjust the main event selector */
- if (new_idx > orig_idx)
- hwc->config += idx << NHMEX_M_PMON_CTL_INC_SEL_SHIFT;
- else
- hwc->config -= idx << NHMEX_M_PMON_CTL_INC_SEL_SHIFT;
- reg1->config = config;
- reg1->idx = ~0xff | new_idx;
- }
- return config;
-}
-
-static struct event_constraint *
-nhmex_mbox_get_constraint(struct intel_uncore_box *box, struct perf_event *event)
-{
- struct hw_perf_event_extra *reg1 = &event->hw.extra_reg;
- struct hw_perf_event_extra *reg2 = &event->hw.branch_reg;
- int i, idx[2], alloc = 0;
- u64 config1 = reg1->config;
-
- idx[0] = __BITS_VALUE(reg1->idx, 0, 8);
- idx[1] = __BITS_VALUE(reg1->idx, 1, 8);
-again:
- for (i = 0; i < 2; i++) {
- if (!uncore_box_is_fake(box) && (reg1->alloc & (0x1 << i)))
- idx[i] = 0xff;
-
- if (idx[i] == 0xff)
- continue;
-
- if (!nhmex_mbox_get_shared_reg(box, idx[i],
- __BITS_VALUE(config1, i, 32)))
- goto fail;
- alloc |= (0x1 << i);
- }
-
- /* for the match/mask registers */
- if (reg2->idx != EXTRA_REG_NONE &&
- (uncore_box_is_fake(box) || !reg2->alloc) &&
- !nhmex_mbox_get_shared_reg(box, reg2->idx, reg2->config))
- goto fail;
-
- /*
- * If it's a fake box -- as per validate_{group,event}() we
- * shouldn't touch event state and we can avoid doing so
- * since both will only call get_event_constraints() once
- * on each event, this avoids the need for reg->alloc.
- */
- if (!uncore_box_is_fake(box)) {
- if (idx[0] != 0xff && idx[0] != __BITS_VALUE(reg1->idx, 0, 8))
- nhmex_mbox_alter_er(event, idx[0], true);
- reg1->alloc |= alloc;
- if (reg2->idx != EXTRA_REG_NONE)
- reg2->alloc = 1;
- }
- return NULL;
-fail:
- if (idx[0] != 0xff && !(alloc & 0x1) &&
- idx[0] >= EXTRA_REG_NHMEX_M_ZDP_CTL_FVC) {
- /*
- * events 0xd ~ 0x10 are functional identical, but are
- * controlled by different fields in the ZDP_CTL_FVC
- * register. If we failed to take one field, try the
- * rest 3 choices.
- */
- BUG_ON(__BITS_VALUE(reg1->idx, 1, 8) != 0xff);
- idx[0] -= EXTRA_REG_NHMEX_M_ZDP_CTL_FVC;
- idx[0] = (idx[0] + 1) % 4;
- idx[0] += EXTRA_REG_NHMEX_M_ZDP_CTL_FVC;
- if (idx[0] != __BITS_VALUE(reg1->idx, 0, 8)) {
- config1 = nhmex_mbox_alter_er(event, idx[0], false);
- goto again;
- }
- }
-
- if (alloc & 0x1)
- nhmex_mbox_put_shared_reg(box, idx[0]);
- if (alloc & 0x2)
- nhmex_mbox_put_shared_reg(box, idx[1]);
- return &uncore_constraint_empty;
-}
-
-static void nhmex_mbox_put_constraint(struct intel_uncore_box *box, struct perf_event *event)
-{
- struct hw_perf_event_extra *reg1 = &event->hw.extra_reg;
- struct hw_perf_event_extra *reg2 = &event->hw.branch_reg;
-
- if (uncore_box_is_fake(box))
- return;
-
- if (reg1->alloc & 0x1)
- nhmex_mbox_put_shared_reg(box, __BITS_VALUE(reg1->idx, 0, 8));
- if (reg1->alloc & 0x2)
- nhmex_mbox_put_shared_reg(box, __BITS_VALUE(reg1->idx, 1, 8));
- reg1->alloc = 0;
-
- if (reg2->alloc) {
- nhmex_mbox_put_shared_reg(box, reg2->idx);
- reg2->alloc = 0;
- }
-}
-
-static int nhmex_mbox_extra_reg_idx(struct extra_reg *er)
-{
- if (er->idx < EXTRA_REG_NHMEX_M_ZDP_CTL_FVC)
- return er->idx;
- return er->idx + (er->event >> NHMEX_M_PMON_CTL_INC_SEL_SHIFT) - 0xd;
-}
-
-static int nhmex_mbox_hw_config(struct intel_uncore_box *box, struct perf_event *event)
-{
- struct intel_uncore_type *type = box->pmu->type;
- struct hw_perf_event_extra *reg1 = &event->hw.extra_reg;
- struct hw_perf_event_extra *reg2 = &event->hw.branch_reg;
- struct extra_reg *er;
- unsigned msr;
- int reg_idx = 0;
- /*
- * The mbox events may require 2 extra MSRs at the most. But only
- * the lower 32 bits in these MSRs are significant, so we can use
- * config1 to pass two MSRs' config.
- */
- for (er = nhmex_uncore_mbox_extra_regs; er->msr; er++) {
- if (er->event != (event->hw.config & er->config_mask))
- continue;
- if (event->attr.config1 & ~er->valid_mask)
- return -EINVAL;
-
- msr = er->msr + type->msr_offset * box->pmu->pmu_idx;
- if (WARN_ON_ONCE(msr >= 0xffff || er->idx >= 0xff))
- return -EINVAL;
-
- /* always use the 32~63 bits to pass the PLD config */
- if (er->idx == EXTRA_REG_NHMEX_M_PLD)
- reg_idx = 1;
- else if (WARN_ON_ONCE(reg_idx > 0))
- return -EINVAL;
-
- reg1->idx &= ~(0xff << (reg_idx * 8));
- reg1->reg &= ~(0xffff << (reg_idx * 16));
- reg1->idx |= nhmex_mbox_extra_reg_idx(er) << (reg_idx * 8);
- reg1->reg |= msr << (reg_idx * 16);
- reg1->config = event->attr.config1;
- reg_idx++;
- }
- /*
- * The mbox only provides ability to perform address matching
- * for the PLD events.
- */
- if (reg_idx == 2) {
- reg2->idx = EXTRA_REG_NHMEX_M_FILTER;
- if (event->attr.config2 & NHMEX_M_PMON_MM_CFG_EN)
- reg2->config = event->attr.config2;
- else
- reg2->config = ~0ULL;
- if (box->pmu->pmu_idx == 0)
- reg2->reg = NHMEX_M0_MSR_PMU_MM_CFG;
- else
- reg2->reg = NHMEX_M1_MSR_PMU_MM_CFG;
- }
- return 0;
-}
-
-static u64 nhmex_mbox_shared_reg_config(struct intel_uncore_box *box, int idx)
-{
- struct intel_uncore_extra_reg *er;
- unsigned long flags;
- u64 config;
-
- if (idx < EXTRA_REG_NHMEX_M_ZDP_CTL_FVC)
- return box->shared_regs[idx].config;
-
- er = &box->shared_regs[EXTRA_REG_NHMEX_M_ZDP_CTL_FVC];
- raw_spin_lock_irqsave(&er->lock, flags);
- config = er->config;
- raw_spin_unlock_irqrestore(&er->lock, flags);
- return config;
-}
-
-static void nhmex_mbox_msr_enable_event(struct intel_uncore_box *box, struct perf_event *event)
-{
- struct hw_perf_event *hwc = &event->hw;
- struct hw_perf_event_extra *reg1 = &hwc->extra_reg;
- struct hw_perf_event_extra *reg2 = &hwc->branch_reg;
- int idx;
-
- idx = __BITS_VALUE(reg1->idx, 0, 8);
- if (idx != 0xff)
- wrmsrl(__BITS_VALUE(reg1->reg, 0, 16),
- nhmex_mbox_shared_reg_config(box, idx));
- idx = __BITS_VALUE(reg1->idx, 1, 8);
- if (idx != 0xff)
- wrmsrl(__BITS_VALUE(reg1->reg, 1, 16),
- nhmex_mbox_shared_reg_config(box, idx));
-
- if (reg2->idx != EXTRA_REG_NONE) {
- wrmsrl(reg2->reg, 0);
- if (reg2->config != ~0ULL) {
- wrmsrl(reg2->reg + 1,
- reg2->config & NHMEX_M_PMON_ADDR_MATCH_MASK);
- wrmsrl(reg2->reg + 2, NHMEX_M_PMON_ADDR_MASK_MASK &
- (reg2->config >> NHMEX_M_PMON_ADDR_MASK_SHIFT));
- wrmsrl(reg2->reg, NHMEX_M_PMON_MM_CFG_EN);
- }
- }
-
- wrmsrl(hwc->config_base, hwc->config | NHMEX_PMON_CTL_EN_BIT0);
-}
-
-DEFINE_UNCORE_FORMAT_ATTR(count_mode, count_mode, "config:2-3");
-DEFINE_UNCORE_FORMAT_ATTR(storage_mode, storage_mode, "config:4-5");
-DEFINE_UNCORE_FORMAT_ATTR(wrap_mode, wrap_mode, "config:6");
-DEFINE_UNCORE_FORMAT_ATTR(flag_mode, flag_mode, "config:7");
-DEFINE_UNCORE_FORMAT_ATTR(inc_sel, inc_sel, "config:9-13");
-DEFINE_UNCORE_FORMAT_ATTR(set_flag_sel, set_flag_sel, "config:19-21");
-DEFINE_UNCORE_FORMAT_ATTR(filter_cfg_en, filter_cfg_en, "config2:63");
-DEFINE_UNCORE_FORMAT_ATTR(filter_match, filter_match, "config2:0-33");
-DEFINE_UNCORE_FORMAT_ATTR(filter_mask, filter_mask, "config2:34-61");
-DEFINE_UNCORE_FORMAT_ATTR(dsp, dsp, "config1:0-31");
-DEFINE_UNCORE_FORMAT_ATTR(thr, thr, "config1:0-31");
-DEFINE_UNCORE_FORMAT_ATTR(fvc, fvc, "config1:0-31");
-DEFINE_UNCORE_FORMAT_ATTR(pgt, pgt, "config1:0-31");
-DEFINE_UNCORE_FORMAT_ATTR(map, map, "config1:0-31");
-DEFINE_UNCORE_FORMAT_ATTR(iss, iss, "config1:0-31");
-DEFINE_UNCORE_FORMAT_ATTR(pld, pld, "config1:32-63");
-
-static struct attribute *nhmex_uncore_mbox_formats_attr[] = {
- &format_attr_count_mode.attr,
- &format_attr_storage_mode.attr,
- &format_attr_wrap_mode.attr,
- &format_attr_flag_mode.attr,
- &format_attr_inc_sel.attr,
- &format_attr_set_flag_sel.attr,
- &format_attr_filter_cfg_en.attr,
- &format_attr_filter_match.attr,
- &format_attr_filter_mask.attr,
- &format_attr_dsp.attr,
- &format_attr_thr.attr,
- &format_attr_fvc.attr,
- &format_attr_pgt.attr,
- &format_attr_map.attr,
- &format_attr_iss.attr,
- &format_attr_pld.attr,
- NULL,
-};
-
-static struct attribute_group nhmex_uncore_mbox_format_group = {
- .name = "format",
- .attrs = nhmex_uncore_mbox_formats_attr,
-};
-
-static struct uncore_event_desc nhmex_uncore_mbox_events[] = {
- INTEL_UNCORE_EVENT_DESC(bbox_cmds_read, "inc_sel=0xd,fvc=0x2800"),
- INTEL_UNCORE_EVENT_DESC(bbox_cmds_write, "inc_sel=0xd,fvc=0x2820"),
- { /* end: all zeroes */ },
-};
-
-static struct uncore_event_desc wsmex_uncore_mbox_events[] = {
- INTEL_UNCORE_EVENT_DESC(bbox_cmds_read, "inc_sel=0xd,fvc=0x5000"),
- INTEL_UNCORE_EVENT_DESC(bbox_cmds_write, "inc_sel=0xd,fvc=0x5040"),
- { /* end: all zeroes */ },
-};
-
-static struct intel_uncore_ops nhmex_uncore_mbox_ops = {
- NHMEX_UNCORE_OPS_COMMON_INIT(),
- .enable_event = nhmex_mbox_msr_enable_event,
- .hw_config = nhmex_mbox_hw_config,
- .get_constraint = nhmex_mbox_get_constraint,
- .put_constraint = nhmex_mbox_put_constraint,
-};
-
-static struct intel_uncore_type nhmex_uncore_mbox = {
- .name = "mbox",
- .num_counters = 6,
- .num_boxes = 2,
- .perf_ctr_bits = 48,
- .event_ctl = NHMEX_M0_MSR_PMU_CTL0,
- .perf_ctr = NHMEX_M0_MSR_PMU_CNT0,
- .event_mask = NHMEX_M_PMON_RAW_EVENT_MASK,
- .box_ctl = NHMEX_M0_MSR_GLOBAL_CTL,
- .msr_offset = NHMEX_M_MSR_OFFSET,
- .pair_ctr_ctl = 1,
- .num_shared_regs = 8,
- .event_descs = nhmex_uncore_mbox_events,
- .ops = &nhmex_uncore_mbox_ops,
- .format_group = &nhmex_uncore_mbox_format_group,
-};
-
-static void nhmex_rbox_alter_er(struct intel_uncore_box *box, struct perf_event *event)
-{
- struct hw_perf_event *hwc = &event->hw;
- struct hw_perf_event_extra *reg1 = &hwc->extra_reg;
-
- /* adjust the main event selector and extra register index */
- if (reg1->idx % 2) {
- reg1->idx--;
- hwc->config -= 1 << NHMEX_R_PMON_CTL_EV_SEL_SHIFT;
- } else {
- reg1->idx++;
- hwc->config += 1 << NHMEX_R_PMON_CTL_EV_SEL_SHIFT;
- }
-
- /* adjust extra register config */
- switch (reg1->idx % 6) {
- case 2:
- /* shift the 8~15 bits to the 0~7 bits */
- reg1->config >>= 8;
- break;
- case 3:
- /* shift the 0~7 bits to the 8~15 bits */
- reg1->config <<= 8;
- break;
- }
-}
-
-/*
- * Each rbox has 4 event set which monitor PQI port 0~3 or 4~7.
- * An event set consists of 6 events, the 3rd and 4th events in
- * an event set use the same extra register. So an event set uses
- * 5 extra registers.
- */
-static struct event_constraint *
-nhmex_rbox_get_constraint(struct intel_uncore_box *box, struct perf_event *event)
-{
- struct hw_perf_event *hwc = &event->hw;
- struct hw_perf_event_extra *reg1 = &hwc->extra_reg;
- struct hw_perf_event_extra *reg2 = &hwc->branch_reg;
- struct intel_uncore_extra_reg *er;
- unsigned long flags;
- int idx, er_idx;
- u64 config1;
- bool ok = false;
-
- if (!uncore_box_is_fake(box) && reg1->alloc)
- return NULL;
-
- idx = reg1->idx % 6;
- config1 = reg1->config;
-again:
- er_idx = idx;
- /* the 3rd and 4th events use the same extra register */
- if (er_idx > 2)
- er_idx--;
- er_idx += (reg1->idx / 6) * 5;
-
- er = &box->shared_regs[er_idx];
- raw_spin_lock_irqsave(&er->lock, flags);
- if (idx < 2) {
- if (!atomic_read(&er->ref) || er->config == reg1->config) {
- atomic_inc(&er->ref);
- er->config = reg1->config;
- ok = true;
- }
- } else if (idx == 2 || idx == 3) {
- /*
- * these two events use different fields in a extra register,
- * the 0~7 bits and the 8~15 bits respectively.
- */
- u64 mask = 0xff << ((idx - 2) * 8);
- if (!__BITS_VALUE(atomic_read(&er->ref), idx - 2, 8) ||
- !((er->config ^ config1) & mask)) {
- atomic_add(1 << ((idx - 2) * 8), &er->ref);
- er->config &= ~mask;
- er->config |= config1 & mask;
- ok = true;
- }
- } else {
- if (!atomic_read(&er->ref) ||
- (er->config == (hwc->config >> 32) &&
- er->config1 == reg1->config &&
- er->config2 == reg2->config)) {
- atomic_inc(&er->ref);
- er->config = (hwc->config >> 32);
- er->config1 = reg1->config;
- er->config2 = reg2->config;
- ok = true;
- }
- }
- raw_spin_unlock_irqrestore(&er->lock, flags);
-
- if (!ok) {
- /*
- * The Rbox events are always in pairs. The paired
- * events are functional identical, but use different
- * extra registers. If we failed to take an extra
- * register, try the alternative.
- */
- idx ^= 1;
- if (idx != reg1->idx % 6) {
- if (idx == 2)
- config1 >>= 8;
- else if (idx == 3)
- config1 <<= 8;
- goto again;
- }
- } else {
- if (!uncore_box_is_fake(box)) {
- if (idx != reg1->idx % 6)
- nhmex_rbox_alter_er(box, event);
- reg1->alloc = 1;
- }
- return NULL;
- }
- return &uncore_constraint_empty;
-}
-
-static void nhmex_rbox_put_constraint(struct intel_uncore_box *box, struct perf_event *event)
-{
- struct intel_uncore_extra_reg *er;
- struct hw_perf_event_extra *reg1 = &event->hw.extra_reg;
- int idx, er_idx;
-
- if (uncore_box_is_fake(box) || !reg1->alloc)
- return;
-
- idx = reg1->idx % 6;
- er_idx = idx;
- if (er_idx > 2)
- er_idx--;
- er_idx += (reg1->idx / 6) * 5;
-
- er = &box->shared_regs[er_idx];
- if (idx == 2 || idx == 3)
- atomic_sub(1 << ((idx - 2) * 8), &er->ref);
- else
- atomic_dec(&er->ref);
-
- reg1->alloc = 0;
-}
-
-static int nhmex_rbox_hw_config(struct intel_uncore_box *box, struct perf_event *event)
-{
- struct hw_perf_event *hwc = &event->hw;
- struct hw_perf_event_extra *reg1 = &event->hw.extra_reg;
- struct hw_perf_event_extra *reg2 = &event->hw.branch_reg;
- int idx;
-
- idx = (event->hw.config & NHMEX_R_PMON_CTL_EV_SEL_MASK) >>
- NHMEX_R_PMON_CTL_EV_SEL_SHIFT;
- if (idx >= 0x18)
- return -EINVAL;
-
- reg1->idx = idx;
- reg1->config = event->attr.config1;
-
- switch (idx % 6) {
- case 4:
- case 5:
- hwc->config |= event->attr.config & (~0ULL << 32);
- reg2->config = event->attr.config2;
- break;
- }
- return 0;
-}
-
-static void nhmex_rbox_msr_enable_event(struct intel_uncore_box *box, struct perf_event *event)
-{
- struct hw_perf_event *hwc = &event->hw;
- struct hw_perf_event_extra *reg1 = &hwc->extra_reg;
- struct hw_perf_event_extra *reg2 = &hwc->branch_reg;
- int idx, port;
-
- idx = reg1->idx;
- port = idx / 6 + box->pmu->pmu_idx * 4;
-
- switch (idx % 6) {
- case 0:
- wrmsrl(NHMEX_R_MSR_PORTN_IPERF_CFG0(port), reg1->config);
- break;
- case 1:
- wrmsrl(NHMEX_R_MSR_PORTN_IPERF_CFG1(port), reg1->config);
- break;
- case 2:
- case 3:
- wrmsrl(NHMEX_R_MSR_PORTN_QLX_CFG(port),
- uncore_shared_reg_config(box, 2 + (idx / 6) * 5));
- break;
- case 4:
- wrmsrl(NHMEX_R_MSR_PORTN_XBR_SET1_MM_CFG(port),
- hwc->config >> 32);
- wrmsrl(NHMEX_R_MSR_PORTN_XBR_SET1_MATCH(port), reg1->config);
- wrmsrl(NHMEX_R_MSR_PORTN_XBR_SET1_MASK(port), reg2->config);
- break;
- case 5:
- wrmsrl(NHMEX_R_MSR_PORTN_XBR_SET2_MM_CFG(port),
- hwc->config >> 32);
- wrmsrl(NHMEX_R_MSR_PORTN_XBR_SET2_MATCH(port), reg1->config);
- wrmsrl(NHMEX_R_MSR_PORTN_XBR_SET2_MASK(port), reg2->config);
- break;
- }
-
- wrmsrl(hwc->config_base, NHMEX_PMON_CTL_EN_BIT0 |
- (hwc->config & NHMEX_R_PMON_CTL_EV_SEL_MASK));
-}
-
-DEFINE_UNCORE_FORMAT_ATTR(xbr_mm_cfg, xbr_mm_cfg, "config:32-63");
-DEFINE_UNCORE_FORMAT_ATTR(xbr_match, xbr_match, "config1:0-63");
-DEFINE_UNCORE_FORMAT_ATTR(xbr_mask, xbr_mask, "config2:0-63");
-DEFINE_UNCORE_FORMAT_ATTR(qlx_cfg, qlx_cfg, "config1:0-15");
-DEFINE_UNCORE_FORMAT_ATTR(iperf_cfg, iperf_cfg, "config1:0-31");
-
-static struct attribute *nhmex_uncore_rbox_formats_attr[] = {
- &format_attr_event5.attr,
- &format_attr_xbr_mm_cfg.attr,
- &format_attr_xbr_match.attr,
- &format_attr_xbr_mask.attr,
- &format_attr_qlx_cfg.attr,
- &format_attr_iperf_cfg.attr,
- NULL,
-};
-
-static struct attribute_group nhmex_uncore_rbox_format_group = {
- .name = "format",
- .attrs = nhmex_uncore_rbox_formats_attr,
-};
-
-static struct uncore_event_desc nhmex_uncore_rbox_events[] = {
- INTEL_UNCORE_EVENT_DESC(qpi0_flit_send, "event=0x0,iperf_cfg=0x80000000"),
- INTEL_UNCORE_EVENT_DESC(qpi1_filt_send, "event=0x6,iperf_cfg=0x80000000"),
- INTEL_UNCORE_EVENT_DESC(qpi0_idle_filt, "event=0x0,iperf_cfg=0x40000000"),
- INTEL_UNCORE_EVENT_DESC(qpi1_idle_filt, "event=0x6,iperf_cfg=0x40000000"),
- INTEL_UNCORE_EVENT_DESC(qpi0_date_response, "event=0x0,iperf_cfg=0xc4"),
- INTEL_UNCORE_EVENT_DESC(qpi1_date_response, "event=0x6,iperf_cfg=0xc4"),
- { /* end: all zeroes */ },
-};
-
-static struct intel_uncore_ops nhmex_uncore_rbox_ops = {
- NHMEX_UNCORE_OPS_COMMON_INIT(),
- .enable_event = nhmex_rbox_msr_enable_event,
- .hw_config = nhmex_rbox_hw_config,
- .get_constraint = nhmex_rbox_get_constraint,
- .put_constraint = nhmex_rbox_put_constraint,
-};
-
-static struct intel_uncore_type nhmex_uncore_rbox = {
- .name = "rbox",
- .num_counters = 8,
- .num_boxes = 2,
- .perf_ctr_bits = 48,
- .event_ctl = NHMEX_R_MSR_PMON_CTL0,
- .perf_ctr = NHMEX_R_MSR_PMON_CNT0,
- .event_mask = NHMEX_R_PMON_RAW_EVENT_MASK,
- .box_ctl = NHMEX_R_MSR_GLOBAL_CTL,
- .msr_offset = NHMEX_R_MSR_OFFSET,
- .pair_ctr_ctl = 1,
- .num_shared_regs = 20,
- .event_descs = nhmex_uncore_rbox_events,
- .ops = &nhmex_uncore_rbox_ops,
- .format_group = &nhmex_uncore_rbox_format_group
-};
-
-static struct intel_uncore_type *nhmex_msr_uncores[] = {
- &nhmex_uncore_ubox,
- &nhmex_uncore_cbox,
- &nhmex_uncore_bbox,
- &nhmex_uncore_sbox,
- &nhmex_uncore_mbox,
- &nhmex_uncore_rbox,
- &nhmex_uncore_wbox,
- NULL,
-};
-
-void nhmex_uncore_cpu_init(void)
-{
- if (boot_cpu_data.x86_model == 46)
- uncore_nhmex = true;
- else
- nhmex_uncore_mbox.event_descs = wsmex_uncore_mbox_events;
- if (nhmex_uncore_cbox.num_boxes > boot_cpu_data.x86_max_cores)
- nhmex_uncore_cbox.num_boxes = boot_cpu_data.x86_max_cores;
- uncore_msr_uncores = nhmex_msr_uncores;
-}
-/* end of Nehalem-EX uncore support */
diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore_snb.c b/arch/x86/kernel/cpu/perf_event_intel_uncore_snb.c
deleted file mode 100644
index 2bd030ddd..000000000
--- a/arch/x86/kernel/cpu/perf_event_intel_uncore_snb.c
+++ /dev/null
@@ -1,717 +0,0 @@
-/* Nehalem/SandBridge/Haswell uncore support */
-#include "perf_event_intel_uncore.h"
-
-/* Uncore IMC PCI IDs */
-#define PCI_DEVICE_ID_INTEL_SNB_IMC 0x0100
-#define PCI_DEVICE_ID_INTEL_IVB_IMC 0x0154
-#define PCI_DEVICE_ID_INTEL_IVB_E3_IMC 0x0150
-#define PCI_DEVICE_ID_INTEL_HSW_IMC 0x0c00
-#define PCI_DEVICE_ID_INTEL_HSW_U_IMC 0x0a04
-#define PCI_DEVICE_ID_INTEL_BDW_IMC 0x1604
-#define PCI_DEVICE_ID_INTEL_SKL_IMC 0x191f
-
-/* SNB event control */
-#define SNB_UNC_CTL_EV_SEL_MASK 0x000000ff
-#define SNB_UNC_CTL_UMASK_MASK 0x0000ff00
-#define SNB_UNC_CTL_EDGE_DET (1 << 18)
-#define SNB_UNC_CTL_EN (1 << 22)
-#define SNB_UNC_CTL_INVERT (1 << 23)
-#define SNB_UNC_CTL_CMASK_MASK 0x1f000000
-#define NHM_UNC_CTL_CMASK_MASK 0xff000000
-#define NHM_UNC_FIXED_CTR_CTL_EN (1 << 0)
-
-#define SNB_UNC_RAW_EVENT_MASK (SNB_UNC_CTL_EV_SEL_MASK | \
- SNB_UNC_CTL_UMASK_MASK | \
- SNB_UNC_CTL_EDGE_DET | \
- SNB_UNC_CTL_INVERT | \
- SNB_UNC_CTL_CMASK_MASK)
-
-#define NHM_UNC_RAW_EVENT_MASK (SNB_UNC_CTL_EV_SEL_MASK | \
- SNB_UNC_CTL_UMASK_MASK | \
- SNB_UNC_CTL_EDGE_DET | \
- SNB_UNC_CTL_INVERT | \
- NHM_UNC_CTL_CMASK_MASK)
-
-/* SNB global control register */
-#define SNB_UNC_PERF_GLOBAL_CTL 0x391
-#define SNB_UNC_FIXED_CTR_CTRL 0x394
-#define SNB_UNC_FIXED_CTR 0x395
-
-/* SNB uncore global control */
-#define SNB_UNC_GLOBAL_CTL_CORE_ALL ((1 << 4) - 1)
-#define SNB_UNC_GLOBAL_CTL_EN (1 << 29)
-
-/* SNB Cbo register */
-#define SNB_UNC_CBO_0_PERFEVTSEL0 0x700
-#define SNB_UNC_CBO_0_PER_CTR0 0x706
-#define SNB_UNC_CBO_MSR_OFFSET 0x10
-
-/* SNB ARB register */
-#define SNB_UNC_ARB_PER_CTR0 0x3b0
-#define SNB_UNC_ARB_PERFEVTSEL0 0x3b2
-#define SNB_UNC_ARB_MSR_OFFSET 0x10
-
-/* NHM global control register */
-#define NHM_UNC_PERF_GLOBAL_CTL 0x391
-#define NHM_UNC_FIXED_CTR 0x394
-#define NHM_UNC_FIXED_CTR_CTRL 0x395
-
-/* NHM uncore global control */
-#define NHM_UNC_GLOBAL_CTL_EN_PC_ALL ((1ULL << 8) - 1)
-#define NHM_UNC_GLOBAL_CTL_EN_FC (1ULL << 32)
-
-/* NHM uncore register */
-#define NHM_UNC_PERFEVTSEL0 0x3c0
-#define NHM_UNC_UNCORE_PMC0 0x3b0
-
-DEFINE_UNCORE_FORMAT_ATTR(event, event, "config:0-7");
-DEFINE_UNCORE_FORMAT_ATTR(umask, umask, "config:8-15");
-DEFINE_UNCORE_FORMAT_ATTR(edge, edge, "config:18");
-DEFINE_UNCORE_FORMAT_ATTR(inv, inv, "config:23");
-DEFINE_UNCORE_FORMAT_ATTR(cmask5, cmask, "config:24-28");
-DEFINE_UNCORE_FORMAT_ATTR(cmask8, cmask, "config:24-31");
-
-/* Sandy Bridge uncore support */
-static void snb_uncore_msr_enable_event(struct intel_uncore_box *box, struct perf_event *event)
-{
- struct hw_perf_event *hwc = &event->hw;
-
- if (hwc->idx < UNCORE_PMC_IDX_FIXED)
- wrmsrl(hwc->config_base, hwc->config | SNB_UNC_CTL_EN);
- else
- wrmsrl(hwc->config_base, SNB_UNC_CTL_EN);
-}
-
-static void snb_uncore_msr_disable_event(struct intel_uncore_box *box, struct perf_event *event)
-{
- wrmsrl(event->hw.config_base, 0);
-}
-
-static void snb_uncore_msr_init_box(struct intel_uncore_box *box)
-{
- if (box->pmu->pmu_idx == 0) {
- wrmsrl(SNB_UNC_PERF_GLOBAL_CTL,
- SNB_UNC_GLOBAL_CTL_EN | SNB_UNC_GLOBAL_CTL_CORE_ALL);
- }
-}
-
-static struct uncore_event_desc snb_uncore_events[] = {
- INTEL_UNCORE_EVENT_DESC(clockticks, "event=0xff,umask=0x00"),
- { /* end: all zeroes */ },
-};
-
-static struct attribute *snb_uncore_formats_attr[] = {
- &format_attr_event.attr,
- &format_attr_umask.attr,
- &format_attr_edge.attr,
- &format_attr_inv.attr,
- &format_attr_cmask5.attr,
- NULL,
-};
-
-static struct attribute_group snb_uncore_format_group = {
- .name = "format",
- .attrs = snb_uncore_formats_attr,
-};
-
-static struct intel_uncore_ops snb_uncore_msr_ops = {
- .init_box = snb_uncore_msr_init_box,
- .disable_event = snb_uncore_msr_disable_event,
- .enable_event = snb_uncore_msr_enable_event,
- .read_counter = uncore_msr_read_counter,
-};
-
-static struct event_constraint snb_uncore_arb_constraints[] = {
- UNCORE_EVENT_CONSTRAINT(0x80, 0x1),
- UNCORE_EVENT_CONSTRAINT(0x83, 0x1),
- EVENT_CONSTRAINT_END
-};
-
-static struct intel_uncore_type snb_uncore_cbox = {
- .name = "cbox",
- .num_counters = 2,
- .num_boxes = 4,
- .perf_ctr_bits = 44,
- .fixed_ctr_bits = 48,
- .perf_ctr = SNB_UNC_CBO_0_PER_CTR0,
- .event_ctl = SNB_UNC_CBO_0_PERFEVTSEL0,
- .fixed_ctr = SNB_UNC_FIXED_CTR,
- .fixed_ctl = SNB_UNC_FIXED_CTR_CTRL,
- .single_fixed = 1,
- .event_mask = SNB_UNC_RAW_EVENT_MASK,
- .msr_offset = SNB_UNC_CBO_MSR_OFFSET,
- .ops = &snb_uncore_msr_ops,
- .format_group = &snb_uncore_format_group,
- .event_descs = snb_uncore_events,
-};
-
-static struct intel_uncore_type snb_uncore_arb = {
- .name = "arb",
- .num_counters = 2,
- .num_boxes = 1,
- .perf_ctr_bits = 44,
- .perf_ctr = SNB_UNC_ARB_PER_CTR0,
- .event_ctl = SNB_UNC_ARB_PERFEVTSEL0,
- .event_mask = SNB_UNC_RAW_EVENT_MASK,
- .msr_offset = SNB_UNC_ARB_MSR_OFFSET,
- .constraints = snb_uncore_arb_constraints,
- .ops = &snb_uncore_msr_ops,
- .format_group = &snb_uncore_format_group,
-};
-
-static struct intel_uncore_type *snb_msr_uncores[] = {
- &snb_uncore_cbox,
- &snb_uncore_arb,
- NULL,
-};
-
-void snb_uncore_cpu_init(void)
-{
- uncore_msr_uncores = snb_msr_uncores;
- if (snb_uncore_cbox.num_boxes > boot_cpu_data.x86_max_cores)
- snb_uncore_cbox.num_boxes = boot_cpu_data.x86_max_cores;
-}
-
-enum {
- SNB_PCI_UNCORE_IMC,
-};
-
-static struct uncore_event_desc snb_uncore_imc_events[] = {
- INTEL_UNCORE_EVENT_DESC(data_reads, "event=0x01"),
- INTEL_UNCORE_EVENT_DESC(data_reads.scale, "6.103515625e-5"),
- INTEL_UNCORE_EVENT_DESC(data_reads.unit, "MiB"),
-
- INTEL_UNCORE_EVENT_DESC(data_writes, "event=0x02"),
- INTEL_UNCORE_EVENT_DESC(data_writes.scale, "6.103515625e-5"),
- INTEL_UNCORE_EVENT_DESC(data_writes.unit, "MiB"),
-
- { /* end: all zeroes */ },
-};
-
-#define SNB_UNCORE_PCI_IMC_EVENT_MASK 0xff
-#define SNB_UNCORE_PCI_IMC_BAR_OFFSET 0x48
-
-/* page size multiple covering all config regs */
-#define SNB_UNCORE_PCI_IMC_MAP_SIZE 0x6000
-
-#define SNB_UNCORE_PCI_IMC_DATA_READS 0x1
-#define SNB_UNCORE_PCI_IMC_DATA_READS_BASE 0x5050
-#define SNB_UNCORE_PCI_IMC_DATA_WRITES 0x2
-#define SNB_UNCORE_PCI_IMC_DATA_WRITES_BASE 0x5054
-#define SNB_UNCORE_PCI_IMC_CTR_BASE SNB_UNCORE_PCI_IMC_DATA_READS_BASE
-
-static struct attribute *snb_uncore_imc_formats_attr[] = {
- &format_attr_event.attr,
- NULL,
-};
-
-static struct attribute_group snb_uncore_imc_format_group = {
- .name = "format",
- .attrs = snb_uncore_imc_formats_attr,
-};
-
-static void snb_uncore_imc_init_box(struct intel_uncore_box *box)
-{
- struct pci_dev *pdev = box->pci_dev;
- int where = SNB_UNCORE_PCI_IMC_BAR_OFFSET;
- resource_size_t addr;
- u32 pci_dword;
-
- pci_read_config_dword(pdev, where, &pci_dword);
- addr = pci_dword;
-
-#ifdef CONFIG_PHYS_ADDR_T_64BIT
- pci_read_config_dword(pdev, where + 4, &pci_dword);
- addr |= ((resource_size_t)pci_dword << 32);
-#endif
-
- addr &= ~(PAGE_SIZE - 1);
-
- box->io_addr = ioremap(addr, SNB_UNCORE_PCI_IMC_MAP_SIZE);
- box->hrtimer_duration = UNCORE_SNB_IMC_HRTIMER_INTERVAL;
-}
-
-static void snb_uncore_imc_enable_box(struct intel_uncore_box *box)
-{}
-
-static void snb_uncore_imc_disable_box(struct intel_uncore_box *box)
-{}
-
-static void snb_uncore_imc_enable_event(struct intel_uncore_box *box, struct perf_event *event)
-{}
-
-static void snb_uncore_imc_disable_event(struct intel_uncore_box *box, struct perf_event *event)
-{}
-
-static u64 snb_uncore_imc_read_counter(struct intel_uncore_box *box, struct perf_event *event)
-{
- struct hw_perf_event *hwc = &event->hw;
-
- return (u64)*(unsigned int *)(box->io_addr + hwc->event_base);
-}
-
-/*
- * custom event_init() function because we define our own fixed, free
- * running counters, so we do not want to conflict with generic uncore
- * logic. Also simplifies processing
- */
-static int snb_uncore_imc_event_init(struct perf_event *event)
-{
- struct intel_uncore_pmu *pmu;
- struct intel_uncore_box *box;
- struct hw_perf_event *hwc = &event->hw;
- u64 cfg = event->attr.config & SNB_UNCORE_PCI_IMC_EVENT_MASK;
- int idx, base;
-
- if (event->attr.type != event->pmu->type)
- return -ENOENT;
-
- pmu = uncore_event_to_pmu(event);
- /* no device found for this pmu */
- if (pmu->func_id < 0)
- return -ENOENT;
-
- /* Sampling not supported yet */
- if (hwc->sample_period)
- return -EINVAL;
-
- /* unsupported modes and filters */
- if (event->attr.exclude_user ||
- event->attr.exclude_kernel ||
- event->attr.exclude_hv ||
- event->attr.exclude_idle ||
- event->attr.exclude_host ||
- event->attr.exclude_guest ||
- event->attr.sample_period) /* no sampling */
- return -EINVAL;
-
- /*
- * Place all uncore events for a particular physical package
- * onto a single cpu
- */
- if (event->cpu < 0)
- return -EINVAL;
-
- /* check only supported bits are set */
- if (event->attr.config & ~SNB_UNCORE_PCI_IMC_EVENT_MASK)
- return -EINVAL;
-
- box = uncore_pmu_to_box(pmu, event->cpu);
- if (!box || box->cpu < 0)
- return -EINVAL;
-
- event->cpu = box->cpu;
-
- event->hw.idx = -1;
- event->hw.last_tag = ~0ULL;
- event->hw.extra_reg.idx = EXTRA_REG_NONE;
- event->hw.branch_reg.idx = EXTRA_REG_NONE;
- /*
- * check event is known (whitelist, determines counter)
- */
- switch (cfg) {
- case SNB_UNCORE_PCI_IMC_DATA_READS:
- base = SNB_UNCORE_PCI_IMC_DATA_READS_BASE;
- idx = UNCORE_PMC_IDX_FIXED;
- break;
- case SNB_UNCORE_PCI_IMC_DATA_WRITES:
- base = SNB_UNCORE_PCI_IMC_DATA_WRITES_BASE;
- idx = UNCORE_PMC_IDX_FIXED + 1;
- break;
- default:
- return -EINVAL;
- }
-
- /* must be done before validate_group */
- event->hw.event_base = base;
- event->hw.config = cfg;
- event->hw.idx = idx;
-
- /* no group validation needed, we have free running counters */
-
- return 0;
-}
-
-static int snb_uncore_imc_hw_config(struct intel_uncore_box *box, struct perf_event *event)
-{
- return 0;
-}
-
-static void snb_uncore_imc_event_start(struct perf_event *event, int flags)
-{
- struct intel_uncore_box *box = uncore_event_to_box(event);
- u64 count;
-
- if (WARN_ON_ONCE(!(event->hw.state & PERF_HES_STOPPED)))
- return;
-
- event->hw.state = 0;
- box->n_active++;
-
- list_add_tail(&event->active_entry, &box->active_list);
-
- count = snb_uncore_imc_read_counter(box, event);
- local64_set(&event->hw.prev_count, count);
-
- if (box->n_active == 1)
- uncore_pmu_start_hrtimer(box);
-}
-
-static void snb_uncore_imc_event_stop(struct perf_event *event, int flags)
-{
- struct intel_uncore_box *box = uncore_event_to_box(event);
- struct hw_perf_event *hwc = &event->hw;
-
- if (!(hwc->state & PERF_HES_STOPPED)) {
- box->n_active--;
-
- WARN_ON_ONCE(hwc->state & PERF_HES_STOPPED);
- hwc->state |= PERF_HES_STOPPED;
-
- list_del(&event->active_entry);
-
- if (box->n_active == 0)
- uncore_pmu_cancel_hrtimer(box);
- }
-
- if ((flags & PERF_EF_UPDATE) && !(hwc->state & PERF_HES_UPTODATE)) {
- /*
- * Drain the remaining delta count out of a event
- * that we are disabling:
- */
- uncore_perf_event_update(box, event);
- hwc->state |= PERF_HES_UPTODATE;
- }
-}
-
-static int snb_uncore_imc_event_add(struct perf_event *event, int flags)
-{
- struct intel_uncore_box *box = uncore_event_to_box(event);
- struct hw_perf_event *hwc = &event->hw;
-
- if (!box)
- return -ENODEV;
-
- hwc->state = PERF_HES_UPTODATE | PERF_HES_STOPPED;
- if (!(flags & PERF_EF_START))
- hwc->state |= PERF_HES_ARCH;
-
- snb_uncore_imc_event_start(event, 0);
-
- box->n_events++;
-
- return 0;
-}
-
-static void snb_uncore_imc_event_del(struct perf_event *event, int flags)
-{
- struct intel_uncore_box *box = uncore_event_to_box(event);
- int i;
-
- snb_uncore_imc_event_stop(event, PERF_EF_UPDATE);
-
- for (i = 0; i < box->n_events; i++) {
- if (event == box->event_list[i]) {
- --box->n_events;
- break;
- }
- }
-}
-
-int snb_pci2phy_map_init(int devid)
-{
- struct pci_dev *dev = NULL;
- struct pci2phy_map *map;
- int bus, segment;
-
- dev = pci_get_device(PCI_VENDOR_ID_INTEL, devid, dev);
- if (!dev)
- return -ENOTTY;
-
- bus = dev->bus->number;
- segment = pci_domain_nr(dev->bus);
-
- raw_spin_lock(&pci2phy_map_lock);
- map = __find_pci2phy_map(segment);
- if (!map) {
- raw_spin_unlock(&pci2phy_map_lock);
- pci_dev_put(dev);
- return -ENOMEM;
- }
- map->pbus_to_physid[bus] = 0;
- raw_spin_unlock(&pci2phy_map_lock);
-
- pci_dev_put(dev);
-
- return 0;
-}
-
-static struct pmu snb_uncore_imc_pmu = {
- .task_ctx_nr = perf_invalid_context,
- .event_init = snb_uncore_imc_event_init,
- .add = snb_uncore_imc_event_add,
- .del = snb_uncore_imc_event_del,
- .start = snb_uncore_imc_event_start,
- .stop = snb_uncore_imc_event_stop,
- .read = uncore_pmu_event_read,
-};
-
-static struct intel_uncore_ops snb_uncore_imc_ops = {
- .init_box = snb_uncore_imc_init_box,
- .enable_box = snb_uncore_imc_enable_box,
- .disable_box = snb_uncore_imc_disable_box,
- .disable_event = snb_uncore_imc_disable_event,
- .enable_event = snb_uncore_imc_enable_event,
- .hw_config = snb_uncore_imc_hw_config,
- .read_counter = snb_uncore_imc_read_counter,
-};
-
-static struct intel_uncore_type snb_uncore_imc = {
- .name = "imc",
- .num_counters = 2,
- .num_boxes = 1,
- .fixed_ctr_bits = 32,
- .fixed_ctr = SNB_UNCORE_PCI_IMC_CTR_BASE,
- .event_descs = snb_uncore_imc_events,
- .format_group = &snb_uncore_imc_format_group,
- .perf_ctr = SNB_UNCORE_PCI_IMC_DATA_READS_BASE,
- .event_mask = SNB_UNCORE_PCI_IMC_EVENT_MASK,
- .ops = &snb_uncore_imc_ops,
- .pmu = &snb_uncore_imc_pmu,
-};
-
-static struct intel_uncore_type *snb_pci_uncores[] = {
- [SNB_PCI_UNCORE_IMC] = &snb_uncore_imc,
- NULL,
-};
-
-static const struct pci_device_id snb_uncore_pci_ids[] = {
- { /* IMC */
- PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_SNB_IMC),
- .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
- },
- { /* end: all zeroes */ },
-};
-
-static const struct pci_device_id ivb_uncore_pci_ids[] = {
- { /* IMC */
- PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IVB_IMC),
- .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
- },
- { /* IMC */
- PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IVB_E3_IMC),
- .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
- },
- { /* end: all zeroes */ },
-};
-
-static const struct pci_device_id hsw_uncore_pci_ids[] = {
- { /* IMC */
- PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_HSW_IMC),
- .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
- },
- { /* IMC */
- PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_HSW_U_IMC),
- .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
- },
- { /* end: all zeroes */ },
-};
-
-static const struct pci_device_id bdw_uncore_pci_ids[] = {
- { /* IMC */
- PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_BDW_IMC),
- .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
- },
- { /* end: all zeroes */ },
-};
-
-static const struct pci_device_id skl_uncore_pci_ids[] = {
- { /* IMC */
- PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_SKL_IMC),
- .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
- },
- { /* end: all zeroes */ },
-};
-
-static struct pci_driver snb_uncore_pci_driver = {
- .name = "snb_uncore",
- .id_table = snb_uncore_pci_ids,
-};
-
-static struct pci_driver ivb_uncore_pci_driver = {
- .name = "ivb_uncore",
- .id_table = ivb_uncore_pci_ids,
-};
-
-static struct pci_driver hsw_uncore_pci_driver = {
- .name = "hsw_uncore",
- .id_table = hsw_uncore_pci_ids,
-};
-
-static struct pci_driver bdw_uncore_pci_driver = {
- .name = "bdw_uncore",
- .id_table = bdw_uncore_pci_ids,
-};
-
-static struct pci_driver skl_uncore_pci_driver = {
- .name = "skl_uncore",
- .id_table = skl_uncore_pci_ids,
-};
-
-struct imc_uncore_pci_dev {
- __u32 pci_id;
- struct pci_driver *driver;
-};
-#define IMC_DEV(a, d) \
- { .pci_id = PCI_DEVICE_ID_INTEL_##a, .driver = (d) }
-
-static const struct imc_uncore_pci_dev desktop_imc_pci_ids[] = {
- IMC_DEV(SNB_IMC, &snb_uncore_pci_driver),
- IMC_DEV(IVB_IMC, &ivb_uncore_pci_driver), /* 3rd Gen Core processor */
- IMC_DEV(IVB_E3_IMC, &ivb_uncore_pci_driver), /* Xeon E3-1200 v2/3rd Gen Core processor */
- IMC_DEV(HSW_IMC, &hsw_uncore_pci_driver), /* 4th Gen Core Processor */
- IMC_DEV(HSW_U_IMC, &hsw_uncore_pci_driver), /* 4th Gen Core ULT Mobile Processor */
- IMC_DEV(BDW_IMC, &bdw_uncore_pci_driver), /* 5th Gen Core U */
- IMC_DEV(SKL_IMC, &skl_uncore_pci_driver), /* 6th Gen Core */
- { /* end marker */ }
-};
-
-
-#define for_each_imc_pci_id(x, t) \
- for (x = (t); (x)->pci_id; x++)
-
-static struct pci_driver *imc_uncore_find_dev(void)
-{
- const struct imc_uncore_pci_dev *p;
- int ret;
-
- for_each_imc_pci_id(p, desktop_imc_pci_ids) {
- ret = snb_pci2phy_map_init(p->pci_id);
- if (ret == 0)
- return p->driver;
- }
- return NULL;
-}
-
-static int imc_uncore_pci_init(void)
-{
- struct pci_driver *imc_drv = imc_uncore_find_dev();
-
- if (!imc_drv)
- return -ENODEV;
-
- uncore_pci_uncores = snb_pci_uncores;
- uncore_pci_driver = imc_drv;
-
- return 0;
-}
-
-int snb_uncore_pci_init(void)
-{
- return imc_uncore_pci_init();
-}
-
-int ivb_uncore_pci_init(void)
-{
- return imc_uncore_pci_init();
-}
-int hsw_uncore_pci_init(void)
-{
- return imc_uncore_pci_init();
-}
-
-int bdw_uncore_pci_init(void)
-{
- return imc_uncore_pci_init();
-}
-
-int skl_uncore_pci_init(void)
-{
- return imc_uncore_pci_init();
-}
-
-/* end of Sandy Bridge uncore support */
-
-/* Nehalem uncore support */
-static void nhm_uncore_msr_disable_box(struct intel_uncore_box *box)
-{
- wrmsrl(NHM_UNC_PERF_GLOBAL_CTL, 0);
-}
-
-static void nhm_uncore_msr_enable_box(struct intel_uncore_box *box)
-{
- wrmsrl(NHM_UNC_PERF_GLOBAL_CTL, NHM_UNC_GLOBAL_CTL_EN_PC_ALL | NHM_UNC_GLOBAL_CTL_EN_FC);
-}
-
-static void nhm_uncore_msr_enable_event(struct intel_uncore_box *box, struct perf_event *event)
-{
- struct hw_perf_event *hwc = &event->hw;
-
- if (hwc->idx < UNCORE_PMC_IDX_FIXED)
- wrmsrl(hwc->config_base, hwc->config | SNB_UNC_CTL_EN);
- else
- wrmsrl(hwc->config_base, NHM_UNC_FIXED_CTR_CTL_EN);
-}
-
-static struct attribute *nhm_uncore_formats_attr[] = {
- &format_attr_event.attr,
- &format_attr_umask.attr,
- &format_attr_edge.attr,
- &format_attr_inv.attr,
- &format_attr_cmask8.attr,
- NULL,
-};
-
-static struct attribute_group nhm_uncore_format_group = {
- .name = "format",
- .attrs = nhm_uncore_formats_attr,
-};
-
-static struct uncore_event_desc nhm_uncore_events[] = {
- INTEL_UNCORE_EVENT_DESC(clockticks, "event=0xff,umask=0x00"),
- INTEL_UNCORE_EVENT_DESC(qmc_writes_full_any, "event=0x2f,umask=0x0f"),
- INTEL_UNCORE_EVENT_DESC(qmc_normal_reads_any, "event=0x2c,umask=0x0f"),
- INTEL_UNCORE_EVENT_DESC(qhl_request_ioh_reads, "event=0x20,umask=0x01"),
- INTEL_UNCORE_EVENT_DESC(qhl_request_ioh_writes, "event=0x20,umask=0x02"),
- INTEL_UNCORE_EVENT_DESC(qhl_request_remote_reads, "event=0x20,umask=0x04"),
- INTEL_UNCORE_EVENT_DESC(qhl_request_remote_writes, "event=0x20,umask=0x08"),
- INTEL_UNCORE_EVENT_DESC(qhl_request_local_reads, "event=0x20,umask=0x10"),
- INTEL_UNCORE_EVENT_DESC(qhl_request_local_writes, "event=0x20,umask=0x20"),
- { /* end: all zeroes */ },
-};
-
-static struct intel_uncore_ops nhm_uncore_msr_ops = {
- .disable_box = nhm_uncore_msr_disable_box,
- .enable_box = nhm_uncore_msr_enable_box,
- .disable_event = snb_uncore_msr_disable_event,
- .enable_event = nhm_uncore_msr_enable_event,
- .read_counter = uncore_msr_read_counter,
-};
-
-static struct intel_uncore_type nhm_uncore = {
- .name = "",
- .num_counters = 8,
- .num_boxes = 1,
- .perf_ctr_bits = 48,
- .fixed_ctr_bits = 48,
- .event_ctl = NHM_UNC_PERFEVTSEL0,
- .perf_ctr = NHM_UNC_UNCORE_PMC0,
- .fixed_ctr = NHM_UNC_FIXED_CTR,
- .fixed_ctl = NHM_UNC_FIXED_CTR_CTRL,
- .event_mask = NHM_UNC_RAW_EVENT_MASK,
- .event_descs = nhm_uncore_events,
- .ops = &nhm_uncore_msr_ops,
- .format_group = &nhm_uncore_format_group,
-};
-
-static struct intel_uncore_type *nhm_msr_uncores[] = {
- &nhm_uncore,
- NULL,
-};
-
-void nhm_uncore_cpu_init(void)
-{
- uncore_msr_uncores = nhm_msr_uncores;
-}
-
-/* end of Nehalem uncore support */
diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore_snbep.c b/arch/x86/kernel/cpu/perf_event_intel_uncore_snbep.c
deleted file mode 100644
index 4547b2cca..000000000
--- a/arch/x86/kernel/cpu/perf_event_intel_uncore_snbep.c
+++ /dev/null
@@ -1,3132 +0,0 @@
-/* SandyBridge-EP/IvyTown uncore support */
-#include "perf_event_intel_uncore.h"
-
-
-/* SNB-EP Box level control */
-#define SNBEP_PMON_BOX_CTL_RST_CTRL (1 << 0)
-#define SNBEP_PMON_BOX_CTL_RST_CTRS (1 << 1)
-#define SNBEP_PMON_BOX_CTL_FRZ (1 << 8)
-#define SNBEP_PMON_BOX_CTL_FRZ_EN (1 << 16)
-#define SNBEP_PMON_BOX_CTL_INT (SNBEP_PMON_BOX_CTL_RST_CTRL | \
- SNBEP_PMON_BOX_CTL_RST_CTRS | \
- SNBEP_PMON_BOX_CTL_FRZ_EN)
-/* SNB-EP event control */
-#define SNBEP_PMON_CTL_EV_SEL_MASK 0x000000ff
-#define SNBEP_PMON_CTL_UMASK_MASK 0x0000ff00
-#define SNBEP_PMON_CTL_RST (1 << 17)
-#define SNBEP_PMON_CTL_EDGE_DET (1 << 18)
-#define SNBEP_PMON_CTL_EV_SEL_EXT (1 << 21)
-#define SNBEP_PMON_CTL_EN (1 << 22)
-#define SNBEP_PMON_CTL_INVERT (1 << 23)
-#define SNBEP_PMON_CTL_TRESH_MASK 0xff000000
-#define SNBEP_PMON_RAW_EVENT_MASK (SNBEP_PMON_CTL_EV_SEL_MASK | \
- SNBEP_PMON_CTL_UMASK_MASK | \
- SNBEP_PMON_CTL_EDGE_DET | \
- SNBEP_PMON_CTL_INVERT | \
- SNBEP_PMON_CTL_TRESH_MASK)
-
-/* SNB-EP Ubox event control */
-#define SNBEP_U_MSR_PMON_CTL_TRESH_MASK 0x1f000000
-#define SNBEP_U_MSR_PMON_RAW_EVENT_MASK \
- (SNBEP_PMON_CTL_EV_SEL_MASK | \
- SNBEP_PMON_CTL_UMASK_MASK | \
- SNBEP_PMON_CTL_EDGE_DET | \
- SNBEP_PMON_CTL_INVERT | \
- SNBEP_U_MSR_PMON_CTL_TRESH_MASK)
-
-#define SNBEP_CBO_PMON_CTL_TID_EN (1 << 19)
-#define SNBEP_CBO_MSR_PMON_RAW_EVENT_MASK (SNBEP_PMON_RAW_EVENT_MASK | \
- SNBEP_CBO_PMON_CTL_TID_EN)
-
-/* SNB-EP PCU event control */
-#define SNBEP_PCU_MSR_PMON_CTL_OCC_SEL_MASK 0x0000c000
-#define SNBEP_PCU_MSR_PMON_CTL_TRESH_MASK 0x1f000000
-#define SNBEP_PCU_MSR_PMON_CTL_OCC_INVERT (1 << 30)
-#define SNBEP_PCU_MSR_PMON_CTL_OCC_EDGE_DET (1 << 31)
-#define SNBEP_PCU_MSR_PMON_RAW_EVENT_MASK \
- (SNBEP_PMON_CTL_EV_SEL_MASK | \
- SNBEP_PCU_MSR_PMON_CTL_OCC_SEL_MASK | \
- SNBEP_PMON_CTL_EDGE_DET | \
- SNBEP_PMON_CTL_EV_SEL_EXT | \
- SNBEP_PMON_CTL_INVERT | \
- SNBEP_PCU_MSR_PMON_CTL_TRESH_MASK | \
- SNBEP_PCU_MSR_PMON_CTL_OCC_INVERT | \
- SNBEP_PCU_MSR_PMON_CTL_OCC_EDGE_DET)
-
-#define SNBEP_QPI_PCI_PMON_RAW_EVENT_MASK \
- (SNBEP_PMON_RAW_EVENT_MASK | \
- SNBEP_PMON_CTL_EV_SEL_EXT)
-
-/* SNB-EP pci control register */
-#define SNBEP_PCI_PMON_BOX_CTL 0xf4
-#define SNBEP_PCI_PMON_CTL0 0xd8
-/* SNB-EP pci counter register */
-#define SNBEP_PCI_PMON_CTR0 0xa0
-
-/* SNB-EP home agent register */
-#define SNBEP_HA_PCI_PMON_BOX_ADDRMATCH0 0x40
-#define SNBEP_HA_PCI_PMON_BOX_ADDRMATCH1 0x44
-#define SNBEP_HA_PCI_PMON_BOX_OPCODEMATCH 0x48
-/* SNB-EP memory controller register */
-#define SNBEP_MC_CHy_PCI_PMON_FIXED_CTL 0xf0
-#define SNBEP_MC_CHy_PCI_PMON_FIXED_CTR 0xd0
-/* SNB-EP QPI register */
-#define SNBEP_Q_Py_PCI_PMON_PKT_MATCH0 0x228
-#define SNBEP_Q_Py_PCI_PMON_PKT_MATCH1 0x22c
-#define SNBEP_Q_Py_PCI_PMON_PKT_MASK0 0x238
-#define SNBEP_Q_Py_PCI_PMON_PKT_MASK1 0x23c
-
-/* SNB-EP Ubox register */
-#define SNBEP_U_MSR_PMON_CTR0 0xc16
-#define SNBEP_U_MSR_PMON_CTL0 0xc10
-
-#define SNBEP_U_MSR_PMON_UCLK_FIXED_CTL 0xc08
-#define SNBEP_U_MSR_PMON_UCLK_FIXED_CTR 0xc09
-
-/* SNB-EP Cbo register */
-#define SNBEP_C0_MSR_PMON_CTR0 0xd16
-#define SNBEP_C0_MSR_PMON_CTL0 0xd10
-#define SNBEP_C0_MSR_PMON_BOX_CTL 0xd04
-#define SNBEP_C0_MSR_PMON_BOX_FILTER 0xd14
-#define SNBEP_CBO_MSR_OFFSET 0x20
-
-#define SNBEP_CB0_MSR_PMON_BOX_FILTER_TID 0x1f
-#define SNBEP_CB0_MSR_PMON_BOX_FILTER_NID 0x3fc00
-#define SNBEP_CB0_MSR_PMON_BOX_FILTER_STATE 0x7c0000
-#define SNBEP_CB0_MSR_PMON_BOX_FILTER_OPC 0xff800000
-
-#define SNBEP_CBO_EVENT_EXTRA_REG(e, m, i) { \
- .event = (e), \
- .msr = SNBEP_C0_MSR_PMON_BOX_FILTER, \
- .config_mask = (m), \
- .idx = (i) \
-}
-
-/* SNB-EP PCU register */
-#define SNBEP_PCU_MSR_PMON_CTR0 0xc36
-#define SNBEP_PCU_MSR_PMON_CTL0 0xc30
-#define SNBEP_PCU_MSR_PMON_BOX_CTL 0xc24
-#define SNBEP_PCU_MSR_PMON_BOX_FILTER 0xc34
-#define SNBEP_PCU_MSR_PMON_BOX_FILTER_MASK 0xffffffff
-#define SNBEP_PCU_MSR_CORE_C3_CTR 0x3fc
-#define SNBEP_PCU_MSR_CORE_C6_CTR 0x3fd
-
-/* IVBEP event control */
-#define IVBEP_PMON_BOX_CTL_INT (SNBEP_PMON_BOX_CTL_RST_CTRL | \
- SNBEP_PMON_BOX_CTL_RST_CTRS)
-#define IVBEP_PMON_RAW_EVENT_MASK (SNBEP_PMON_CTL_EV_SEL_MASK | \
- SNBEP_PMON_CTL_UMASK_MASK | \
- SNBEP_PMON_CTL_EDGE_DET | \
- SNBEP_PMON_CTL_TRESH_MASK)
-/* IVBEP Ubox */
-#define IVBEP_U_MSR_PMON_GLOBAL_CTL 0xc00
-#define IVBEP_U_PMON_GLOBAL_FRZ_ALL (1 << 31)
-#define IVBEP_U_PMON_GLOBAL_UNFRZ_ALL (1 << 29)
-
-#define IVBEP_U_MSR_PMON_RAW_EVENT_MASK \
- (SNBEP_PMON_CTL_EV_SEL_MASK | \
- SNBEP_PMON_CTL_UMASK_MASK | \
- SNBEP_PMON_CTL_EDGE_DET | \
- SNBEP_U_MSR_PMON_CTL_TRESH_MASK)
-/* IVBEP Cbo */
-#define IVBEP_CBO_MSR_PMON_RAW_EVENT_MASK (IVBEP_PMON_RAW_EVENT_MASK | \
- SNBEP_CBO_PMON_CTL_TID_EN)
-
-#define IVBEP_CB0_MSR_PMON_BOX_FILTER_TID (0x1fULL << 0)
-#define IVBEP_CB0_MSR_PMON_BOX_FILTER_LINK (0xfULL << 5)
-#define IVBEP_CB0_MSR_PMON_BOX_FILTER_STATE (0x3fULL << 17)
-#define IVBEP_CB0_MSR_PMON_BOX_FILTER_NID (0xffffULL << 32)
-#define IVBEP_CB0_MSR_PMON_BOX_FILTER_OPC (0x1ffULL << 52)
-#define IVBEP_CB0_MSR_PMON_BOX_FILTER_C6 (0x1ULL << 61)
-#define IVBEP_CB0_MSR_PMON_BOX_FILTER_NC (0x1ULL << 62)
-#define IVBEP_CB0_MSR_PMON_BOX_FILTER_ISOC (0x1ULL << 63)
-
-/* IVBEP home agent */
-#define IVBEP_HA_PCI_PMON_CTL_Q_OCC_RST (1 << 16)
-#define IVBEP_HA_PCI_PMON_RAW_EVENT_MASK \
- (IVBEP_PMON_RAW_EVENT_MASK | \
- IVBEP_HA_PCI_PMON_CTL_Q_OCC_RST)
-/* IVBEP PCU */
-#define IVBEP_PCU_MSR_PMON_RAW_EVENT_MASK \
- (SNBEP_PMON_CTL_EV_SEL_MASK | \
- SNBEP_PMON_CTL_EV_SEL_EXT | \
- SNBEP_PCU_MSR_PMON_CTL_OCC_SEL_MASK | \
- SNBEP_PMON_CTL_EDGE_DET | \
- SNBEP_PCU_MSR_PMON_CTL_TRESH_MASK | \
- SNBEP_PCU_MSR_PMON_CTL_OCC_INVERT | \
- SNBEP_PCU_MSR_PMON_CTL_OCC_EDGE_DET)
-/* IVBEP QPI */
-#define IVBEP_QPI_PCI_PMON_RAW_EVENT_MASK \
- (IVBEP_PMON_RAW_EVENT_MASK | \
- SNBEP_PMON_CTL_EV_SEL_EXT)
-
-#define __BITS_VALUE(x, i, n) ((typeof(x))(((x) >> ((i) * (n))) & \
- ((1ULL << (n)) - 1)))
-
-/* Haswell-EP Ubox */
-#define HSWEP_U_MSR_PMON_CTR0 0x709
-#define HSWEP_U_MSR_PMON_CTL0 0x705
-#define HSWEP_U_MSR_PMON_FILTER 0x707
-
-#define HSWEP_U_MSR_PMON_UCLK_FIXED_CTL 0x703
-#define HSWEP_U_MSR_PMON_UCLK_FIXED_CTR 0x704
-
-#define HSWEP_U_MSR_PMON_BOX_FILTER_TID (0x1 << 0)
-#define HSWEP_U_MSR_PMON_BOX_FILTER_CID (0x1fULL << 1)
-#define HSWEP_U_MSR_PMON_BOX_FILTER_MASK \
- (HSWEP_U_MSR_PMON_BOX_FILTER_TID | \
- HSWEP_U_MSR_PMON_BOX_FILTER_CID)
-
-/* Haswell-EP CBo */
-#define HSWEP_C0_MSR_PMON_CTR0 0xe08
-#define HSWEP_C0_MSR_PMON_CTL0 0xe01
-#define HSWEP_C0_MSR_PMON_BOX_CTL 0xe00
-#define HSWEP_C0_MSR_PMON_BOX_FILTER0 0xe05
-#define HSWEP_CBO_MSR_OFFSET 0x10
-
-
-#define HSWEP_CB0_MSR_PMON_BOX_FILTER_TID (0x3fULL << 0)
-#define HSWEP_CB0_MSR_PMON_BOX_FILTER_LINK (0xfULL << 6)
-#define HSWEP_CB0_MSR_PMON_BOX_FILTER_STATE (0x7fULL << 17)
-#define HSWEP_CB0_MSR_PMON_BOX_FILTER_NID (0xffffULL << 32)
-#define HSWEP_CB0_MSR_PMON_BOX_FILTER_OPC (0x1ffULL << 52)
-#define HSWEP_CB0_MSR_PMON_BOX_FILTER_C6 (0x1ULL << 61)
-#define HSWEP_CB0_MSR_PMON_BOX_FILTER_NC (0x1ULL << 62)
-#define HSWEP_CB0_MSR_PMON_BOX_FILTER_ISOC (0x1ULL << 63)
-
-
-/* Haswell-EP Sbox */
-#define HSWEP_S0_MSR_PMON_CTR0 0x726
-#define HSWEP_S0_MSR_PMON_CTL0 0x721
-#define HSWEP_S0_MSR_PMON_BOX_CTL 0x720
-#define HSWEP_SBOX_MSR_OFFSET 0xa
-#define HSWEP_S_MSR_PMON_RAW_EVENT_MASK (SNBEP_PMON_RAW_EVENT_MASK | \
- SNBEP_CBO_PMON_CTL_TID_EN)
-
-/* Haswell-EP PCU */
-#define HSWEP_PCU_MSR_PMON_CTR0 0x717
-#define HSWEP_PCU_MSR_PMON_CTL0 0x711
-#define HSWEP_PCU_MSR_PMON_BOX_CTL 0x710
-#define HSWEP_PCU_MSR_PMON_BOX_FILTER 0x715
-
-/* KNL Ubox */
-#define KNL_U_MSR_PMON_RAW_EVENT_MASK \
- (SNBEP_U_MSR_PMON_RAW_EVENT_MASK | \
- SNBEP_CBO_PMON_CTL_TID_EN)
-/* KNL CHA */
-#define KNL_CHA_MSR_OFFSET 0xc
-#define KNL_CHA_MSR_PMON_CTL_QOR (1 << 16)
-#define KNL_CHA_MSR_PMON_RAW_EVENT_MASK \
- (SNBEP_CBO_MSR_PMON_RAW_EVENT_MASK | \
- KNL_CHA_MSR_PMON_CTL_QOR)
-#define KNL_CHA_MSR_PMON_BOX_FILTER_TID 0x1ff
-#define KNL_CHA_MSR_PMON_BOX_FILTER_STATE (7 << 18)
-#define KNL_CHA_MSR_PMON_BOX_FILTER_OP (0xfffffe2aULL << 32)
-
-/* KNL EDC/MC UCLK */
-#define KNL_UCLK_MSR_PMON_CTR0_LOW 0x400
-#define KNL_UCLK_MSR_PMON_CTL0 0x420
-#define KNL_UCLK_MSR_PMON_BOX_CTL 0x430
-#define KNL_UCLK_MSR_PMON_UCLK_FIXED_LOW 0x44c
-#define KNL_UCLK_MSR_PMON_UCLK_FIXED_CTL 0x454
-#define KNL_PMON_FIXED_CTL_EN 0x1
-
-/* KNL EDC */
-#define KNL_EDC0_ECLK_MSR_PMON_CTR0_LOW 0xa00
-#define KNL_EDC0_ECLK_MSR_PMON_CTL0 0xa20
-#define KNL_EDC0_ECLK_MSR_PMON_BOX_CTL 0xa30
-#define KNL_EDC0_ECLK_MSR_PMON_ECLK_FIXED_LOW 0xa3c
-#define KNL_EDC0_ECLK_MSR_PMON_ECLK_FIXED_CTL 0xa44
-
-/* KNL MC */
-#define KNL_MC0_CH0_MSR_PMON_CTR0_LOW 0xb00
-#define KNL_MC0_CH0_MSR_PMON_CTL0 0xb20
-#define KNL_MC0_CH0_MSR_PMON_BOX_CTL 0xb30
-#define KNL_MC0_CH0_MSR_PMON_FIXED_LOW 0xb3c
-#define KNL_MC0_CH0_MSR_PMON_FIXED_CTL 0xb44
-
-/* KNL IRP */
-#define KNL_IRP_PCI_PMON_BOX_CTL 0xf0
-#define KNL_IRP_PCI_PMON_RAW_EVENT_MASK (SNBEP_PMON_RAW_EVENT_MASK | \
- KNL_CHA_MSR_PMON_CTL_QOR)
-/* KNL PCU */
-#define KNL_PCU_PMON_CTL_EV_SEL_MASK 0x0000007f
-#define KNL_PCU_PMON_CTL_USE_OCC_CTR (1 << 7)
-#define KNL_PCU_MSR_PMON_CTL_TRESH_MASK 0x3f000000
-#define KNL_PCU_MSR_PMON_RAW_EVENT_MASK \
- (KNL_PCU_PMON_CTL_EV_SEL_MASK | \
- KNL_PCU_PMON_CTL_USE_OCC_CTR | \
- SNBEP_PCU_MSR_PMON_CTL_OCC_SEL_MASK | \
- SNBEP_PMON_CTL_EDGE_DET | \
- SNBEP_CBO_PMON_CTL_TID_EN | \
- SNBEP_PMON_CTL_EV_SEL_EXT | \
- SNBEP_PMON_CTL_INVERT | \
- KNL_PCU_MSR_PMON_CTL_TRESH_MASK | \
- SNBEP_PCU_MSR_PMON_CTL_OCC_INVERT | \
- SNBEP_PCU_MSR_PMON_CTL_OCC_EDGE_DET)
-
-DEFINE_UNCORE_FORMAT_ATTR(event, event, "config:0-7");
-DEFINE_UNCORE_FORMAT_ATTR(event2, event, "config:0-6");
-DEFINE_UNCORE_FORMAT_ATTR(event_ext, event, "config:0-7,21");
-DEFINE_UNCORE_FORMAT_ATTR(use_occ_ctr, use_occ_ctr, "config:7");
-DEFINE_UNCORE_FORMAT_ATTR(umask, umask, "config:8-15");
-DEFINE_UNCORE_FORMAT_ATTR(qor, qor, "config:16");
-DEFINE_UNCORE_FORMAT_ATTR(edge, edge, "config:18");
-DEFINE_UNCORE_FORMAT_ATTR(tid_en, tid_en, "config:19");
-DEFINE_UNCORE_FORMAT_ATTR(inv, inv, "config:23");
-DEFINE_UNCORE_FORMAT_ATTR(thresh8, thresh, "config:24-31");
-DEFINE_UNCORE_FORMAT_ATTR(thresh6, thresh, "config:24-29");
-DEFINE_UNCORE_FORMAT_ATTR(thresh5, thresh, "config:24-28");
-DEFINE_UNCORE_FORMAT_ATTR(occ_sel, occ_sel, "config:14-15");
-DEFINE_UNCORE_FORMAT_ATTR(occ_invert, occ_invert, "config:30");
-DEFINE_UNCORE_FORMAT_ATTR(occ_edge, occ_edge, "config:14-51");
-DEFINE_UNCORE_FORMAT_ATTR(occ_edge_det, occ_edge_det, "config:31");
-DEFINE_UNCORE_FORMAT_ATTR(filter_tid, filter_tid, "config1:0-4");
-DEFINE_UNCORE_FORMAT_ATTR(filter_tid2, filter_tid, "config1:0");
-DEFINE_UNCORE_FORMAT_ATTR(filter_tid3, filter_tid, "config1:0-5");
-DEFINE_UNCORE_FORMAT_ATTR(filter_tid4, filter_tid, "config1:0-8");
-DEFINE_UNCORE_FORMAT_ATTR(filter_cid, filter_cid, "config1:5");
-DEFINE_UNCORE_FORMAT_ATTR(filter_link, filter_link, "config1:5-8");
-DEFINE_UNCORE_FORMAT_ATTR(filter_link2, filter_link, "config1:6-8");
-DEFINE_UNCORE_FORMAT_ATTR(filter_link3, filter_link, "config1:12");
-DEFINE_UNCORE_FORMAT_ATTR(filter_nid, filter_nid, "config1:10-17");
-DEFINE_UNCORE_FORMAT_ATTR(filter_nid2, filter_nid, "config1:32-47");
-DEFINE_UNCORE_FORMAT_ATTR(filter_state, filter_state, "config1:18-22");
-DEFINE_UNCORE_FORMAT_ATTR(filter_state2, filter_state, "config1:17-22");
-DEFINE_UNCORE_FORMAT_ATTR(filter_state3, filter_state, "config1:17-23");
-DEFINE_UNCORE_FORMAT_ATTR(filter_state4, filter_state, "config1:18-20");
-DEFINE_UNCORE_FORMAT_ATTR(filter_local, filter_local, "config1:33");
-DEFINE_UNCORE_FORMAT_ATTR(filter_all_op, filter_all_op, "config1:35");
-DEFINE_UNCORE_FORMAT_ATTR(filter_nnm, filter_nnm, "config1:37");
-DEFINE_UNCORE_FORMAT_ATTR(filter_opc, filter_opc, "config1:23-31");
-DEFINE_UNCORE_FORMAT_ATTR(filter_opc2, filter_opc, "config1:52-60");
-DEFINE_UNCORE_FORMAT_ATTR(filter_opc3, filter_opc, "config1:41-60");
-DEFINE_UNCORE_FORMAT_ATTR(filter_nc, filter_nc, "config1:62");
-DEFINE_UNCORE_FORMAT_ATTR(filter_c6, filter_c6, "config1:61");
-DEFINE_UNCORE_FORMAT_ATTR(filter_isoc, filter_isoc, "config1:63");
-DEFINE_UNCORE_FORMAT_ATTR(filter_band0, filter_band0, "config1:0-7");
-DEFINE_UNCORE_FORMAT_ATTR(filter_band1, filter_band1, "config1:8-15");
-DEFINE_UNCORE_FORMAT_ATTR(filter_band2, filter_band2, "config1:16-23");
-DEFINE_UNCORE_FORMAT_ATTR(filter_band3, filter_band3, "config1:24-31");
-DEFINE_UNCORE_FORMAT_ATTR(match_rds, match_rds, "config1:48-51");
-DEFINE_UNCORE_FORMAT_ATTR(match_rnid30, match_rnid30, "config1:32-35");
-DEFINE_UNCORE_FORMAT_ATTR(match_rnid4, match_rnid4, "config1:31");
-DEFINE_UNCORE_FORMAT_ATTR(match_dnid, match_dnid, "config1:13-17");
-DEFINE_UNCORE_FORMAT_ATTR(match_mc, match_mc, "config1:9-12");
-DEFINE_UNCORE_FORMAT_ATTR(match_opc, match_opc, "config1:5-8");
-DEFINE_UNCORE_FORMAT_ATTR(match_vnw, match_vnw, "config1:3-4");
-DEFINE_UNCORE_FORMAT_ATTR(match0, match0, "config1:0-31");
-DEFINE_UNCORE_FORMAT_ATTR(match1, match1, "config1:32-63");
-DEFINE_UNCORE_FORMAT_ATTR(mask_rds, mask_rds, "config2:48-51");
-DEFINE_UNCORE_FORMAT_ATTR(mask_rnid30, mask_rnid30, "config2:32-35");
-DEFINE_UNCORE_FORMAT_ATTR(mask_rnid4, mask_rnid4, "config2:31");
-DEFINE_UNCORE_FORMAT_ATTR(mask_dnid, mask_dnid, "config2:13-17");
-DEFINE_UNCORE_FORMAT_ATTR(mask_mc, mask_mc, "config2:9-12");
-DEFINE_UNCORE_FORMAT_ATTR(mask_opc, mask_opc, "config2:5-8");
-DEFINE_UNCORE_FORMAT_ATTR(mask_vnw, mask_vnw, "config2:3-4");
-DEFINE_UNCORE_FORMAT_ATTR(mask0, mask0, "config2:0-31");
-DEFINE_UNCORE_FORMAT_ATTR(mask1, mask1, "config2:32-63");
-
-static void snbep_uncore_pci_disable_box(struct intel_uncore_box *box)
-{
- struct pci_dev *pdev = box->pci_dev;
- int box_ctl = uncore_pci_box_ctl(box);
- u32 config = 0;
-
- if (!pci_read_config_dword(pdev, box_ctl, &config)) {
- config |= SNBEP_PMON_BOX_CTL_FRZ;
- pci_write_config_dword(pdev, box_ctl, config);
- }
-}
-
-static void snbep_uncore_pci_enable_box(struct intel_uncore_box *box)
-{
- struct pci_dev *pdev = box->pci_dev;
- int box_ctl = uncore_pci_box_ctl(box);
- u32 config = 0;
-
- if (!pci_read_config_dword(pdev, box_ctl, &config)) {
- config &= ~SNBEP_PMON_BOX_CTL_FRZ;
- pci_write_config_dword(pdev, box_ctl, config);
- }
-}
-
-static void snbep_uncore_pci_enable_event(struct intel_uncore_box *box, struct perf_event *event)
-{
- struct pci_dev *pdev = box->pci_dev;
- struct hw_perf_event *hwc = &event->hw;
-
- pci_write_config_dword(pdev, hwc->config_base, hwc->config | SNBEP_PMON_CTL_EN);
-}
-
-static void snbep_uncore_pci_disable_event(struct intel_uncore_box *box, struct perf_event *event)
-{
- struct pci_dev *pdev = box->pci_dev;
- struct hw_perf_event *hwc = &event->hw;
-
- pci_write_config_dword(pdev, hwc->config_base, hwc->config);
-}
-
-static u64 snbep_uncore_pci_read_counter(struct intel_uncore_box *box, struct perf_event *event)
-{
- struct pci_dev *pdev = box->pci_dev;
- struct hw_perf_event *hwc = &event->hw;
- u64 count = 0;
-
- pci_read_config_dword(pdev, hwc->event_base, (u32 *)&count);
- pci_read_config_dword(pdev, hwc->event_base + 4, (u32 *)&count + 1);
-
- return count;
-}
-
-static void snbep_uncore_pci_init_box(struct intel_uncore_box *box)
-{
- struct pci_dev *pdev = box->pci_dev;
- int box_ctl = uncore_pci_box_ctl(box);
-
- pci_write_config_dword(pdev, box_ctl, SNBEP_PMON_BOX_CTL_INT);
-}
-
-static void snbep_uncore_msr_disable_box(struct intel_uncore_box *box)
-{
- u64 config;
- unsigned msr;
-
- msr = uncore_msr_box_ctl(box);
- if (msr) {
- rdmsrl(msr, config);
- config |= SNBEP_PMON_BOX_CTL_FRZ;
- wrmsrl(msr, config);
- }
-}
-
-static void snbep_uncore_msr_enable_box(struct intel_uncore_box *box)
-{
- u64 config;
- unsigned msr;
-
- msr = uncore_msr_box_ctl(box);
- if (msr) {
- rdmsrl(msr, config);
- config &= ~SNBEP_PMON_BOX_CTL_FRZ;
- wrmsrl(msr, config);
- }
-}
-
-static void snbep_uncore_msr_enable_event(struct intel_uncore_box *box, struct perf_event *event)
-{
- struct hw_perf_event *hwc = &event->hw;
- struct hw_perf_event_extra *reg1 = &hwc->extra_reg;
-
- if (reg1->idx != EXTRA_REG_NONE)
- wrmsrl(reg1->reg, uncore_shared_reg_config(box, 0));
-
- wrmsrl(hwc->config_base, hwc->config | SNBEP_PMON_CTL_EN);
-}
-
-static void snbep_uncore_msr_disable_event(struct intel_uncore_box *box,
- struct perf_event *event)
-{
- struct hw_perf_event *hwc = &event->hw;
-
- wrmsrl(hwc->config_base, hwc->config);
-}
-
-static void snbep_uncore_msr_init_box(struct intel_uncore_box *box)
-{
- unsigned msr = uncore_msr_box_ctl(box);
-
- if (msr)
- wrmsrl(msr, SNBEP_PMON_BOX_CTL_INT);
-}
-
-static struct attribute *snbep_uncore_formats_attr[] = {
- &format_attr_event.attr,
- &format_attr_umask.attr,
- &format_attr_edge.attr,
- &format_attr_inv.attr,
- &format_attr_thresh8.attr,
- NULL,
-};
-
-static struct attribute *snbep_uncore_ubox_formats_attr[] = {
- &format_attr_event.attr,
- &format_attr_umask.attr,
- &format_attr_edge.attr,
- &format_attr_inv.attr,
- &format_attr_thresh5.attr,
- NULL,
-};
-
-static struct attribute *snbep_uncore_cbox_formats_attr[] = {
- &format_attr_event.attr,
- &format_attr_umask.attr,
- &format_attr_edge.attr,
- &format_attr_tid_en.attr,
- &format_attr_inv.attr,
- &format_attr_thresh8.attr,
- &format_attr_filter_tid.attr,
- &format_attr_filter_nid.attr,
- &format_attr_filter_state.attr,
- &format_attr_filter_opc.attr,
- NULL,
-};
-
-static struct attribute *snbep_uncore_pcu_formats_attr[] = {
- &format_attr_event_ext.attr,
- &format_attr_occ_sel.attr,
- &format_attr_edge.attr,
- &format_attr_inv.attr,
- &format_attr_thresh5.attr,
- &format_attr_occ_invert.attr,
- &format_attr_occ_edge.attr,
- &format_attr_filter_band0.attr,
- &format_attr_filter_band1.attr,
- &format_attr_filter_band2.attr,
- &format_attr_filter_band3.attr,
- NULL,
-};
-
-static struct attribute *snbep_uncore_qpi_formats_attr[] = {
- &format_attr_event_ext.attr,
- &format_attr_umask.attr,
- &format_attr_edge.attr,
- &format_attr_inv.attr,
- &format_attr_thresh8.attr,
- &format_attr_match_rds.attr,
- &format_attr_match_rnid30.attr,
- &format_attr_match_rnid4.attr,
- &format_attr_match_dnid.attr,
- &format_attr_match_mc.attr,
- &format_attr_match_opc.attr,
- &format_attr_match_vnw.attr,
- &format_attr_match0.attr,
- &format_attr_match1.attr,
- &format_attr_mask_rds.attr,
- &format_attr_mask_rnid30.attr,
- &format_attr_mask_rnid4.attr,
- &format_attr_mask_dnid.attr,
- &format_attr_mask_mc.attr,
- &format_attr_mask_opc.attr,
- &format_attr_mask_vnw.attr,
- &format_attr_mask0.attr,
- &format_attr_mask1.attr,
- NULL,
-};
-
-static struct uncore_event_desc snbep_uncore_imc_events[] = {
- INTEL_UNCORE_EVENT_DESC(clockticks, "event=0xff,umask=0x00"),
- INTEL_UNCORE_EVENT_DESC(cas_count_read, "event=0x04,umask=0x03"),
- INTEL_UNCORE_EVENT_DESC(cas_count_read.scale, "6.103515625e-5"),
- INTEL_UNCORE_EVENT_DESC(cas_count_read.unit, "MiB"),
- INTEL_UNCORE_EVENT_DESC(cas_count_write, "event=0x04,umask=0x0c"),
- INTEL_UNCORE_EVENT_DESC(cas_count_write.scale, "6.103515625e-5"),
- INTEL_UNCORE_EVENT_DESC(cas_count_write.unit, "MiB"),
- { /* end: all zeroes */ },
-};
-
-static struct uncore_event_desc snbep_uncore_qpi_events[] = {
- INTEL_UNCORE_EVENT_DESC(clockticks, "event=0x14"),
- INTEL_UNCORE_EVENT_DESC(txl_flits_active, "event=0x00,umask=0x06"),
- INTEL_UNCORE_EVENT_DESC(drs_data, "event=0x102,umask=0x08"),
- INTEL_UNCORE_EVENT_DESC(ncb_data, "event=0x103,umask=0x04"),
- { /* end: all zeroes */ },
-};
-
-static struct attribute_group snbep_uncore_format_group = {
- .name = "format",
- .attrs = snbep_uncore_formats_attr,
-};
-
-static struct attribute_group snbep_uncore_ubox_format_group = {
- .name = "format",
- .attrs = snbep_uncore_ubox_formats_attr,
-};
-
-static struct attribute_group snbep_uncore_cbox_format_group = {
- .name = "format",
- .attrs = snbep_uncore_cbox_formats_attr,
-};
-
-static struct attribute_group snbep_uncore_pcu_format_group = {
- .name = "format",
- .attrs = snbep_uncore_pcu_formats_attr,
-};
-
-static struct attribute_group snbep_uncore_qpi_format_group = {
- .name = "format",
- .attrs = snbep_uncore_qpi_formats_attr,
-};
-
-#define __SNBEP_UNCORE_MSR_OPS_COMMON_INIT() \
- .disable_box = snbep_uncore_msr_disable_box, \
- .enable_box = snbep_uncore_msr_enable_box, \
- .disable_event = snbep_uncore_msr_disable_event, \
- .enable_event = snbep_uncore_msr_enable_event, \
- .read_counter = uncore_msr_read_counter
-
-#define SNBEP_UNCORE_MSR_OPS_COMMON_INIT() \
- __SNBEP_UNCORE_MSR_OPS_COMMON_INIT(), \
- .init_box = snbep_uncore_msr_init_box \
-
-static struct intel_uncore_ops snbep_uncore_msr_ops = {
- SNBEP_UNCORE_MSR_OPS_COMMON_INIT(),
-};
-
-#define SNBEP_UNCORE_PCI_OPS_COMMON_INIT() \
- .init_box = snbep_uncore_pci_init_box, \
- .disable_box = snbep_uncore_pci_disable_box, \
- .enable_box = snbep_uncore_pci_enable_box, \
- .disable_event = snbep_uncore_pci_disable_event, \
- .read_counter = snbep_uncore_pci_read_counter
-
-static struct intel_uncore_ops snbep_uncore_pci_ops = {
- SNBEP_UNCORE_PCI_OPS_COMMON_INIT(),
- .enable_event = snbep_uncore_pci_enable_event, \
-};
-
-static struct event_constraint snbep_uncore_cbox_constraints[] = {
- UNCORE_EVENT_CONSTRAINT(0x01, 0x1),
- UNCORE_EVENT_CONSTRAINT(0x02, 0x3),
- UNCORE_EVENT_CONSTRAINT(0x04, 0x3),
- UNCORE_EVENT_CONSTRAINT(0x05, 0x3),
- UNCORE_EVENT_CONSTRAINT(0x07, 0x3),
- UNCORE_EVENT_CONSTRAINT(0x09, 0x3),
- UNCORE_EVENT_CONSTRAINT(0x11, 0x1),
- UNCORE_EVENT_CONSTRAINT(0x12, 0x3),
- UNCORE_EVENT_CONSTRAINT(0x13, 0x3),
- UNCORE_EVENT_CONSTRAINT(0x1b, 0xc),
- UNCORE_EVENT_CONSTRAINT(0x1c, 0xc),
- UNCORE_EVENT_CONSTRAINT(0x1d, 0xc),
- UNCORE_EVENT_CONSTRAINT(0x1e, 0xc),
- EVENT_CONSTRAINT_OVERLAP(0x1f, 0xe, 0xff),
- UNCORE_EVENT_CONSTRAINT(0x21, 0x3),
- UNCORE_EVENT_CONSTRAINT(0x23, 0x3),
- UNCORE_EVENT_CONSTRAINT(0x31, 0x3),
- UNCORE_EVENT_CONSTRAINT(0x32, 0x3),
- UNCORE_EVENT_CONSTRAINT(0x33, 0x3),
- UNCORE_EVENT_CONSTRAINT(0x34, 0x3),
- UNCORE_EVENT_CONSTRAINT(0x35, 0x3),
- UNCORE_EVENT_CONSTRAINT(0x36, 0x1),
- UNCORE_EVENT_CONSTRAINT(0x37, 0x3),
- UNCORE_EVENT_CONSTRAINT(0x38, 0x3),
- UNCORE_EVENT_CONSTRAINT(0x39, 0x3),
- UNCORE_EVENT_CONSTRAINT(0x3b, 0x1),
- EVENT_CONSTRAINT_END
-};
-
-static struct event_constraint snbep_uncore_r2pcie_constraints[] = {
- UNCORE_EVENT_CONSTRAINT(0x10, 0x3),
- UNCORE_EVENT_CONSTRAINT(0x11, 0x3),
- UNCORE_EVENT_CONSTRAINT(0x12, 0x1),
- UNCORE_EVENT_CONSTRAINT(0x23, 0x3),
- UNCORE_EVENT_CONSTRAINT(0x24, 0x3),
- UNCORE_EVENT_CONSTRAINT(0x25, 0x3),
- UNCORE_EVENT_CONSTRAINT(0x26, 0x3),
- UNCORE_EVENT_CONSTRAINT(0x32, 0x3),
- UNCORE_EVENT_CONSTRAINT(0x33, 0x3),
- UNCORE_EVENT_CONSTRAINT(0x34, 0x3),
- EVENT_CONSTRAINT_END
-};
-
-static struct event_constraint snbep_uncore_r3qpi_constraints[] = {
- UNCORE_EVENT_CONSTRAINT(0x10, 0x3),
- UNCORE_EVENT_CONSTRAINT(0x11, 0x3),
- UNCORE_EVENT_CONSTRAINT(0x12, 0x3),
- UNCORE_EVENT_CONSTRAINT(0x13, 0x1),
- UNCORE_EVENT_CONSTRAINT(0x20, 0x3),
- UNCORE_EVENT_CONSTRAINT(0x21, 0x3),
- UNCORE_EVENT_CONSTRAINT(0x22, 0x3),
- UNCORE_EVENT_CONSTRAINT(0x23, 0x3),
- UNCORE_EVENT_CONSTRAINT(0x24, 0x3),
- UNCORE_EVENT_CONSTRAINT(0x25, 0x3),
- UNCORE_EVENT_CONSTRAINT(0x26, 0x3),
- UNCORE_EVENT_CONSTRAINT(0x28, 0x3),
- UNCORE_EVENT_CONSTRAINT(0x29, 0x3),
- UNCORE_EVENT_CONSTRAINT(0x2a, 0x3),
- UNCORE_EVENT_CONSTRAINT(0x2b, 0x3),
- UNCORE_EVENT_CONSTRAINT(0x2c, 0x3),
- UNCORE_EVENT_CONSTRAINT(0x2d, 0x3),
- UNCORE_EVENT_CONSTRAINT(0x2e, 0x3),
- UNCORE_EVENT_CONSTRAINT(0x2f, 0x3),
- UNCORE_EVENT_CONSTRAINT(0x30, 0x3),
- UNCORE_EVENT_CONSTRAINT(0x31, 0x3),
- UNCORE_EVENT_CONSTRAINT(0x32, 0x3),
- UNCORE_EVENT_CONSTRAINT(0x33, 0x3),
- UNCORE_EVENT_CONSTRAINT(0x34, 0x3),
- UNCORE_EVENT_CONSTRAINT(0x36, 0x3),
- UNCORE_EVENT_CONSTRAINT(0x37, 0x3),
- UNCORE_EVENT_CONSTRAINT(0x38, 0x3),
- UNCORE_EVENT_CONSTRAINT(0x39, 0x3),
- EVENT_CONSTRAINT_END
-};
-
-static struct intel_uncore_type snbep_uncore_ubox = {
- .name = "ubox",
- .num_counters = 2,
- .num_boxes = 1,
- .perf_ctr_bits = 44,
- .fixed_ctr_bits = 48,
- .perf_ctr = SNBEP_U_MSR_PMON_CTR0,
- .event_ctl = SNBEP_U_MSR_PMON_CTL0,
- .event_mask = SNBEP_U_MSR_PMON_RAW_EVENT_MASK,
- .fixed_ctr = SNBEP_U_MSR_PMON_UCLK_FIXED_CTR,
- .fixed_ctl = SNBEP_U_MSR_PMON_UCLK_FIXED_CTL,
- .ops = &snbep_uncore_msr_ops,
- .format_group = &snbep_uncore_ubox_format_group,
-};
-
-static struct extra_reg snbep_uncore_cbox_extra_regs[] = {
- SNBEP_CBO_EVENT_EXTRA_REG(SNBEP_CBO_PMON_CTL_TID_EN,
- SNBEP_CBO_PMON_CTL_TID_EN, 0x1),
- SNBEP_CBO_EVENT_EXTRA_REG(0x0334, 0xffff, 0x4),
- SNBEP_CBO_EVENT_EXTRA_REG(0x4334, 0xffff, 0x6),
- SNBEP_CBO_EVENT_EXTRA_REG(0x0534, 0xffff, 0x4),
- SNBEP_CBO_EVENT_EXTRA_REG(0x4534, 0xffff, 0x6),
- SNBEP_CBO_EVENT_EXTRA_REG(0x0934, 0xffff, 0x4),
- SNBEP_CBO_EVENT_EXTRA_REG(0x4934, 0xffff, 0x6),
- SNBEP_CBO_EVENT_EXTRA_REG(0x4134, 0xffff, 0x6),
- SNBEP_CBO_EVENT_EXTRA_REG(0x0135, 0xffff, 0x8),
- SNBEP_CBO_EVENT_EXTRA_REG(0x0335, 0xffff, 0x8),
- SNBEP_CBO_EVENT_EXTRA_REG(0x4135, 0xffff, 0xa),
- SNBEP_CBO_EVENT_EXTRA_REG(0x4335, 0xffff, 0xa),
- SNBEP_CBO_EVENT_EXTRA_REG(0x4435, 0xffff, 0x2),
- SNBEP_CBO_EVENT_EXTRA_REG(0x4835, 0xffff, 0x2),
- SNBEP_CBO_EVENT_EXTRA_REG(0x4a35, 0xffff, 0x2),
- SNBEP_CBO_EVENT_EXTRA_REG(0x5035, 0xffff, 0x2),
- SNBEP_CBO_EVENT_EXTRA_REG(0x0136, 0xffff, 0x8),
- SNBEP_CBO_EVENT_EXTRA_REG(0x0336, 0xffff, 0x8),
- SNBEP_CBO_EVENT_EXTRA_REG(0x4136, 0xffff, 0xa),
- SNBEP_CBO_EVENT_EXTRA_REG(0x4336, 0xffff, 0xa),
- SNBEP_CBO_EVENT_EXTRA_REG(0x4436, 0xffff, 0x2),
- SNBEP_CBO_EVENT_EXTRA_REG(0x4836, 0xffff, 0x2),
- SNBEP_CBO_EVENT_EXTRA_REG(0x4a36, 0xffff, 0x2),
- SNBEP_CBO_EVENT_EXTRA_REG(0x4037, 0x40ff, 0x2),
- EVENT_EXTRA_END
-};
-
-static void snbep_cbox_put_constraint(struct intel_uncore_box *box, struct perf_event *event)
-{
- struct hw_perf_event_extra *reg1 = &event->hw.extra_reg;
- struct intel_uncore_extra_reg *er = &box->shared_regs[0];
- int i;
-
- if (uncore_box_is_fake(box))
- return;
-
- for (i = 0; i < 5; i++) {
- if (reg1->alloc & (0x1 << i))
- atomic_sub(1 << (i * 6), &er->ref);
- }
- reg1->alloc = 0;
-}
-
-static struct event_constraint *
-__snbep_cbox_get_constraint(struct intel_uncore_box *box, struct perf_event *event,
- u64 (*cbox_filter_mask)(int fields))
-{
- struct hw_perf_event_extra *reg1 = &event->hw.extra_reg;
- struct intel_uncore_extra_reg *er = &box->shared_regs[0];
- int i, alloc = 0;
- unsigned long flags;
- u64 mask;
-
- if (reg1->idx == EXTRA_REG_NONE)
- return NULL;
-
- raw_spin_lock_irqsave(&er->lock, flags);
- for (i = 0; i < 5; i++) {
- if (!(reg1->idx & (0x1 << i)))
- continue;
- if (!uncore_box_is_fake(box) && (reg1->alloc & (0x1 << i)))
- continue;
-
- mask = cbox_filter_mask(0x1 << i);
- if (!__BITS_VALUE(atomic_read(&er->ref), i, 6) ||
- !((reg1->config ^ er->config) & mask)) {
- atomic_add(1 << (i * 6), &er->ref);
- er->config &= ~mask;
- er->config |= reg1->config & mask;
- alloc |= (0x1 << i);
- } else {
- break;
- }
- }
- raw_spin_unlock_irqrestore(&er->lock, flags);
- if (i < 5)
- goto fail;
-
- if (!uncore_box_is_fake(box))
- reg1->alloc |= alloc;
-
- return NULL;
-fail:
- for (; i >= 0; i--) {
- if (alloc & (0x1 << i))
- atomic_sub(1 << (i * 6), &er->ref);
- }
- return &uncore_constraint_empty;
-}
-
-static u64 snbep_cbox_filter_mask(int fields)
-{
- u64 mask = 0;
-
- if (fields & 0x1)
- mask |= SNBEP_CB0_MSR_PMON_BOX_FILTER_TID;
- if (fields & 0x2)
- mask |= SNBEP_CB0_MSR_PMON_BOX_FILTER_NID;
- if (fields & 0x4)
- mask |= SNBEP_CB0_MSR_PMON_BOX_FILTER_STATE;
- if (fields & 0x8)
- mask |= SNBEP_CB0_MSR_PMON_BOX_FILTER_OPC;
-
- return mask;
-}
-
-static struct event_constraint *
-snbep_cbox_get_constraint(struct intel_uncore_box *box, struct perf_event *event)
-{
- return __snbep_cbox_get_constraint(box, event, snbep_cbox_filter_mask);
-}
-
-static int snbep_cbox_hw_config(struct intel_uncore_box *box, struct perf_event *event)
-{
- struct hw_perf_event_extra *reg1 = &event->hw.extra_reg;
- struct extra_reg *er;
- int idx = 0;
-
- for (er = snbep_uncore_cbox_extra_regs; er->msr; er++) {
- if (er->event != (event->hw.config & er->config_mask))
- continue;
- idx |= er->idx;
- }
-
- if (idx) {
- reg1->reg = SNBEP_C0_MSR_PMON_BOX_FILTER +
- SNBEP_CBO_MSR_OFFSET * box->pmu->pmu_idx;
- reg1->config = event->attr.config1 & snbep_cbox_filter_mask(idx);
- reg1->idx = idx;
- }
- return 0;
-}
-
-static struct intel_uncore_ops snbep_uncore_cbox_ops = {
- SNBEP_UNCORE_MSR_OPS_COMMON_INIT(),
- .hw_config = snbep_cbox_hw_config,
- .get_constraint = snbep_cbox_get_constraint,
- .put_constraint = snbep_cbox_put_constraint,
-};
-
-static struct intel_uncore_type snbep_uncore_cbox = {
- .name = "cbox",
- .num_counters = 4,
- .num_boxes = 8,
- .perf_ctr_bits = 44,
- .event_ctl = SNBEP_C0_MSR_PMON_CTL0,
- .perf_ctr = SNBEP_C0_MSR_PMON_CTR0,
- .event_mask = SNBEP_CBO_MSR_PMON_RAW_EVENT_MASK,
- .box_ctl = SNBEP_C0_MSR_PMON_BOX_CTL,
- .msr_offset = SNBEP_CBO_MSR_OFFSET,
- .num_shared_regs = 1,
- .constraints = snbep_uncore_cbox_constraints,
- .ops = &snbep_uncore_cbox_ops,
- .format_group = &snbep_uncore_cbox_format_group,
-};
-
-static u64 snbep_pcu_alter_er(struct perf_event *event, int new_idx, bool modify)
-{
- struct hw_perf_event *hwc = &event->hw;
- struct hw_perf_event_extra *reg1 = &hwc->extra_reg;
- u64 config = reg1->config;
-
- if (new_idx > reg1->idx)
- config <<= 8 * (new_idx - reg1->idx);
- else
- config >>= 8 * (reg1->idx - new_idx);
-
- if (modify) {
- hwc->config += new_idx - reg1->idx;
- reg1->config = config;
- reg1->idx = new_idx;
- }
- return config;
-}
-
-static struct event_constraint *
-snbep_pcu_get_constraint(struct intel_uncore_box *box, struct perf_event *event)
-{
- struct hw_perf_event_extra *reg1 = &event->hw.extra_reg;
- struct intel_uncore_extra_reg *er = &box->shared_regs[0];
- unsigned long flags;
- int idx = reg1->idx;
- u64 mask, config1 = reg1->config;
- bool ok = false;
-
- if (reg1->idx == EXTRA_REG_NONE ||
- (!uncore_box_is_fake(box) && reg1->alloc))
- return NULL;
-again:
- mask = 0xffULL << (idx * 8);
- raw_spin_lock_irqsave(&er->lock, flags);
- if (!__BITS_VALUE(atomic_read(&er->ref), idx, 8) ||
- !((config1 ^ er->config) & mask)) {
- atomic_add(1 << (idx * 8), &er->ref);
- er->config &= ~mask;
- er->config |= config1 & mask;
- ok = true;
- }
- raw_spin_unlock_irqrestore(&er->lock, flags);
-
- if (!ok) {
- idx = (idx + 1) % 4;
- if (idx != reg1->idx) {
- config1 = snbep_pcu_alter_er(event, idx, false);
- goto again;
- }
- return &uncore_constraint_empty;
- }
-
- if (!uncore_box_is_fake(box)) {
- if (idx != reg1->idx)
- snbep_pcu_alter_er(event, idx, true);
- reg1->alloc = 1;
- }
- return NULL;
-}
-
-static void snbep_pcu_put_constraint(struct intel_uncore_box *box, struct perf_event *event)
-{
- struct hw_perf_event_extra *reg1 = &event->hw.extra_reg;
- struct intel_uncore_extra_reg *er = &box->shared_regs[0];
-
- if (uncore_box_is_fake(box) || !reg1->alloc)
- return;
-
- atomic_sub(1 << (reg1->idx * 8), &er->ref);
- reg1->alloc = 0;
-}
-
-static int snbep_pcu_hw_config(struct intel_uncore_box *box, struct perf_event *event)
-{
- struct hw_perf_event *hwc = &event->hw;
- struct hw_perf_event_extra *reg1 = &hwc->extra_reg;
- int ev_sel = hwc->config & SNBEP_PMON_CTL_EV_SEL_MASK;
-
- if (ev_sel >= 0xb && ev_sel <= 0xe) {
- reg1->reg = SNBEP_PCU_MSR_PMON_BOX_FILTER;
- reg1->idx = ev_sel - 0xb;
- reg1->config = event->attr.config1 & (0xff << (reg1->idx * 8));
- }
- return 0;
-}
-
-static struct intel_uncore_ops snbep_uncore_pcu_ops = {
- SNBEP_UNCORE_MSR_OPS_COMMON_INIT(),
- .hw_config = snbep_pcu_hw_config,
- .get_constraint = snbep_pcu_get_constraint,
- .put_constraint = snbep_pcu_put_constraint,
-};
-
-static struct intel_uncore_type snbep_uncore_pcu = {
- .name = "pcu",
- .num_counters = 4,
- .num_boxes = 1,
- .perf_ctr_bits = 48,
- .perf_ctr = SNBEP_PCU_MSR_PMON_CTR0,
- .event_ctl = SNBEP_PCU_MSR_PMON_CTL0,
- .event_mask = SNBEP_PCU_MSR_PMON_RAW_EVENT_MASK,
- .box_ctl = SNBEP_PCU_MSR_PMON_BOX_CTL,
- .num_shared_regs = 1,
- .ops = &snbep_uncore_pcu_ops,
- .format_group = &snbep_uncore_pcu_format_group,
-};
-
-static struct intel_uncore_type *snbep_msr_uncores[] = {
- &snbep_uncore_ubox,
- &snbep_uncore_cbox,
- &snbep_uncore_pcu,
- NULL,
-};
-
-void snbep_uncore_cpu_init(void)
-{
- if (snbep_uncore_cbox.num_boxes > boot_cpu_data.x86_max_cores)
- snbep_uncore_cbox.num_boxes = boot_cpu_data.x86_max_cores;
- uncore_msr_uncores = snbep_msr_uncores;
-}
-
-enum {
- SNBEP_PCI_QPI_PORT0_FILTER,
- SNBEP_PCI_QPI_PORT1_FILTER,
- HSWEP_PCI_PCU_3,
-};
-
-static int snbep_qpi_hw_config(struct intel_uncore_box *box, struct perf_event *event)
-{
- struct hw_perf_event *hwc = &event->hw;
- struct hw_perf_event_extra *reg1 = &hwc->extra_reg;
- struct hw_perf_event_extra *reg2 = &hwc->branch_reg;
-
- if ((hwc->config & SNBEP_PMON_CTL_EV_SEL_MASK) == 0x38) {
- reg1->idx = 0;
- reg1->reg = SNBEP_Q_Py_PCI_PMON_PKT_MATCH0;
- reg1->config = event->attr.config1;
- reg2->reg = SNBEP_Q_Py_PCI_PMON_PKT_MASK0;
- reg2->config = event->attr.config2;
- }
- return 0;
-}
-
-static void snbep_qpi_enable_event(struct intel_uncore_box *box, struct perf_event *event)
-{
- struct pci_dev *pdev = box->pci_dev;
- struct hw_perf_event *hwc = &event->hw;
- struct hw_perf_event_extra *reg1 = &hwc->extra_reg;
- struct hw_perf_event_extra *reg2 = &hwc->branch_reg;
-
- if (reg1->idx != EXTRA_REG_NONE) {
- int idx = box->pmu->pmu_idx + SNBEP_PCI_QPI_PORT0_FILTER;
- struct pci_dev *filter_pdev = uncore_extra_pci_dev[box->phys_id][idx];
- if (filter_pdev) {
- pci_write_config_dword(filter_pdev, reg1->reg,
- (u32)reg1->config);
- pci_write_config_dword(filter_pdev, reg1->reg + 4,
- (u32)(reg1->config >> 32));
- pci_write_config_dword(filter_pdev, reg2->reg,
- (u32)reg2->config);
- pci_write_config_dword(filter_pdev, reg2->reg + 4,
- (u32)(reg2->config >> 32));
- }
- }
-
- pci_write_config_dword(pdev, hwc->config_base, hwc->config | SNBEP_PMON_CTL_EN);
-}
-
-static struct intel_uncore_ops snbep_uncore_qpi_ops = {
- SNBEP_UNCORE_PCI_OPS_COMMON_INIT(),
- .enable_event = snbep_qpi_enable_event,
- .hw_config = snbep_qpi_hw_config,
- .get_constraint = uncore_get_constraint,
- .put_constraint = uncore_put_constraint,
-};
-
-#define SNBEP_UNCORE_PCI_COMMON_INIT() \
- .perf_ctr = SNBEP_PCI_PMON_CTR0, \
- .event_ctl = SNBEP_PCI_PMON_CTL0, \
- .event_mask = SNBEP_PMON_RAW_EVENT_MASK, \
- .box_ctl = SNBEP_PCI_PMON_BOX_CTL, \
- .ops = &snbep_uncore_pci_ops, \
- .format_group = &snbep_uncore_format_group
-
-static struct intel_uncore_type snbep_uncore_ha = {
- .name = "ha",
- .num_counters = 4,
- .num_boxes = 1,
- .perf_ctr_bits = 48,
- SNBEP_UNCORE_PCI_COMMON_INIT(),
-};
-
-static struct intel_uncore_type snbep_uncore_imc = {
- .name = "imc",
- .num_counters = 4,
- .num_boxes = 4,
- .perf_ctr_bits = 48,
- .fixed_ctr_bits = 48,
- .fixed_ctr = SNBEP_MC_CHy_PCI_PMON_FIXED_CTR,
- .fixed_ctl = SNBEP_MC_CHy_PCI_PMON_FIXED_CTL,
- .event_descs = snbep_uncore_imc_events,
- SNBEP_UNCORE_PCI_COMMON_INIT(),
-};
-
-static struct intel_uncore_type snbep_uncore_qpi = {
- .name = "qpi",
- .num_counters = 4,
- .num_boxes = 2,
- .perf_ctr_bits = 48,
- .perf_ctr = SNBEP_PCI_PMON_CTR0,
- .event_ctl = SNBEP_PCI_PMON_CTL0,
- .event_mask = SNBEP_QPI_PCI_PMON_RAW_EVENT_MASK,
- .box_ctl = SNBEP_PCI_PMON_BOX_CTL,
- .num_shared_regs = 1,
- .ops = &snbep_uncore_qpi_ops,
- .event_descs = snbep_uncore_qpi_events,
- .format_group = &snbep_uncore_qpi_format_group,
-};
-
-
-static struct intel_uncore_type snbep_uncore_r2pcie = {
- .name = "r2pcie",
- .num_counters = 4,
- .num_boxes = 1,
- .perf_ctr_bits = 44,
- .constraints = snbep_uncore_r2pcie_constraints,
- SNBEP_UNCORE_PCI_COMMON_INIT(),
-};
-
-static struct intel_uncore_type snbep_uncore_r3qpi = {
- .name = "r3qpi",
- .num_counters = 3,
- .num_boxes = 2,
- .perf_ctr_bits = 44,
- .constraints = snbep_uncore_r3qpi_constraints,
- SNBEP_UNCORE_PCI_COMMON_INIT(),
-};
-
-enum {
- SNBEP_PCI_UNCORE_HA,
- SNBEP_PCI_UNCORE_IMC,
- SNBEP_PCI_UNCORE_QPI,
- SNBEP_PCI_UNCORE_R2PCIE,
- SNBEP_PCI_UNCORE_R3QPI,
-};
-
-static struct intel_uncore_type *snbep_pci_uncores[] = {
- [SNBEP_PCI_UNCORE_HA] = &snbep_uncore_ha,
- [SNBEP_PCI_UNCORE_IMC] = &snbep_uncore_imc,
- [SNBEP_PCI_UNCORE_QPI] = &snbep_uncore_qpi,
- [SNBEP_PCI_UNCORE_R2PCIE] = &snbep_uncore_r2pcie,
- [SNBEP_PCI_UNCORE_R3QPI] = &snbep_uncore_r3qpi,
- NULL,
-};
-
-static const struct pci_device_id snbep_uncore_pci_ids[] = {
- { /* Home Agent */
- PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_UNC_HA),
- .driver_data = UNCORE_PCI_DEV_DATA(SNBEP_PCI_UNCORE_HA, 0),
- },
- { /* MC Channel 0 */
- PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_UNC_IMC0),
- .driver_data = UNCORE_PCI_DEV_DATA(SNBEP_PCI_UNCORE_IMC, 0),
- },
- { /* MC Channel 1 */
- PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_UNC_IMC1),
- .driver_data = UNCORE_PCI_DEV_DATA(SNBEP_PCI_UNCORE_IMC, 1),
- },
- { /* MC Channel 2 */
- PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_UNC_IMC2),
- .driver_data = UNCORE_PCI_DEV_DATA(SNBEP_PCI_UNCORE_IMC, 2),
- },
- { /* MC Channel 3 */
- PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_UNC_IMC3),
- .driver_data = UNCORE_PCI_DEV_DATA(SNBEP_PCI_UNCORE_IMC, 3),
- },
- { /* QPI Port 0 */
- PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_UNC_QPI0),
- .driver_data = UNCORE_PCI_DEV_DATA(SNBEP_PCI_UNCORE_QPI, 0),
- },
- { /* QPI Port 1 */
- PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_UNC_QPI1),
- .driver_data = UNCORE_PCI_DEV_DATA(SNBEP_PCI_UNCORE_QPI, 1),
- },
- { /* R2PCIe */
- PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_UNC_R2PCIE),
- .driver_data = UNCORE_PCI_DEV_DATA(SNBEP_PCI_UNCORE_R2PCIE, 0),
- },
- { /* R3QPI Link 0 */
- PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_UNC_R3QPI0),
- .driver_data = UNCORE_PCI_DEV_DATA(SNBEP_PCI_UNCORE_R3QPI, 0),
- },
- { /* R3QPI Link 1 */
- PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_UNC_R3QPI1),
- .driver_data = UNCORE_PCI_DEV_DATA(SNBEP_PCI_UNCORE_R3QPI, 1),
- },
- { /* QPI Port 0 filter */
- PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x3c86),
- .driver_data = UNCORE_PCI_DEV_DATA(UNCORE_EXTRA_PCI_DEV,
- SNBEP_PCI_QPI_PORT0_FILTER),
- },
- { /* QPI Port 0 filter */
- PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x3c96),
- .driver_data = UNCORE_PCI_DEV_DATA(UNCORE_EXTRA_PCI_DEV,
- SNBEP_PCI_QPI_PORT1_FILTER),
- },
- { /* end: all zeroes */ }
-};
-
-static struct pci_driver snbep_uncore_pci_driver = {
- .name = "snbep_uncore",
- .id_table = snbep_uncore_pci_ids,
-};
-
-/*
- * build pci bus to socket mapping
- */
-static int snbep_pci2phy_map_init(int devid)
-{
- struct pci_dev *ubox_dev = NULL;
- int i, bus, nodeid, segment;
- struct pci2phy_map *map;
- int err = 0;
- u32 config = 0;
-
- while (1) {
- /* find the UBOX device */
- ubox_dev = pci_get_device(PCI_VENDOR_ID_INTEL, devid, ubox_dev);
- if (!ubox_dev)
- break;
- bus = ubox_dev->bus->number;
- /* get the Node ID of the local register */
- err = pci_read_config_dword(ubox_dev, 0x40, &config);
- if (err)
- break;
- nodeid = config;
- /* get the Node ID mapping */
- err = pci_read_config_dword(ubox_dev, 0x54, &config);
- if (err)
- break;
-
- segment = pci_domain_nr(ubox_dev->bus);
- raw_spin_lock(&pci2phy_map_lock);
- map = __find_pci2phy_map(segment);
- if (!map) {
- raw_spin_unlock(&pci2phy_map_lock);
- err = -ENOMEM;
- break;
- }
-
- /*
- * every three bits in the Node ID mapping register maps
- * to a particular node.
- */
- for (i = 0; i < 8; i++) {
- if (nodeid == ((config >> (3 * i)) & 0x7)) {
- map->pbus_to_physid[bus] = i;
- break;
- }
- }
- raw_spin_unlock(&pci2phy_map_lock);
- }
-
- if (!err) {
- /*
- * For PCI bus with no UBOX device, find the next bus
- * that has UBOX device and use its mapping.
- */
- raw_spin_lock(&pci2phy_map_lock);
- list_for_each_entry(map, &pci2phy_map_head, list) {
- i = -1;
- for (bus = 255; bus >= 0; bus--) {
- if (map->pbus_to_physid[bus] >= 0)
- i = map->pbus_to_physid[bus];
- else
- map->pbus_to_physid[bus] = i;
- }
- }
- raw_spin_unlock(&pci2phy_map_lock);
- }
-
- pci_dev_put(ubox_dev);
-
- return err ? pcibios_err_to_errno(err) : 0;
-}
-
-int snbep_uncore_pci_init(void)
-{
- int ret = snbep_pci2phy_map_init(0x3ce0);
- if (ret)
- return ret;
- uncore_pci_uncores = snbep_pci_uncores;
- uncore_pci_driver = &snbep_uncore_pci_driver;
- return 0;
-}
-/* end of Sandy Bridge-EP uncore support */
-
-/* IvyTown uncore support */
-static void ivbep_uncore_msr_init_box(struct intel_uncore_box *box)
-{
- unsigned msr = uncore_msr_box_ctl(box);
- if (msr)
- wrmsrl(msr, IVBEP_PMON_BOX_CTL_INT);
-}
-
-static void ivbep_uncore_pci_init_box(struct intel_uncore_box *box)
-{
- struct pci_dev *pdev = box->pci_dev;
-
- pci_write_config_dword(pdev, SNBEP_PCI_PMON_BOX_CTL, IVBEP_PMON_BOX_CTL_INT);
-}
-
-#define IVBEP_UNCORE_MSR_OPS_COMMON_INIT() \
- .init_box = ivbep_uncore_msr_init_box, \
- .disable_box = snbep_uncore_msr_disable_box, \
- .enable_box = snbep_uncore_msr_enable_box, \
- .disable_event = snbep_uncore_msr_disable_event, \
- .enable_event = snbep_uncore_msr_enable_event, \
- .read_counter = uncore_msr_read_counter
-
-static struct intel_uncore_ops ivbep_uncore_msr_ops = {
- IVBEP_UNCORE_MSR_OPS_COMMON_INIT(),
-};
-
-static struct intel_uncore_ops ivbep_uncore_pci_ops = {
- .init_box = ivbep_uncore_pci_init_box,
- .disable_box = snbep_uncore_pci_disable_box,
- .enable_box = snbep_uncore_pci_enable_box,
- .disable_event = snbep_uncore_pci_disable_event,
- .enable_event = snbep_uncore_pci_enable_event,
- .read_counter = snbep_uncore_pci_read_counter,
-};
-
-#define IVBEP_UNCORE_PCI_COMMON_INIT() \
- .perf_ctr = SNBEP_PCI_PMON_CTR0, \
- .event_ctl = SNBEP_PCI_PMON_CTL0, \
- .event_mask = IVBEP_PMON_RAW_EVENT_MASK, \
- .box_ctl = SNBEP_PCI_PMON_BOX_CTL, \
- .ops = &ivbep_uncore_pci_ops, \
- .format_group = &ivbep_uncore_format_group
-
-static struct attribute *ivbep_uncore_formats_attr[] = {
- &format_attr_event.attr,
- &format_attr_umask.attr,
- &format_attr_edge.attr,
- &format_attr_inv.attr,
- &format_attr_thresh8.attr,
- NULL,
-};
-
-static struct attribute *ivbep_uncore_ubox_formats_attr[] = {
- &format_attr_event.attr,
- &format_attr_umask.attr,
- &format_attr_edge.attr,
- &format_attr_inv.attr,
- &format_attr_thresh5.attr,
- NULL,
-};
-
-static struct attribute *ivbep_uncore_cbox_formats_attr[] = {
- &format_attr_event.attr,
- &format_attr_umask.attr,
- &format_attr_edge.attr,
- &format_attr_tid_en.attr,
- &format_attr_thresh8.attr,
- &format_attr_filter_tid.attr,
- &format_attr_filter_link.attr,
- &format_attr_filter_state2.attr,
- &format_attr_filter_nid2.attr,
- &format_attr_filter_opc2.attr,
- &format_attr_filter_nc.attr,
- &format_attr_filter_c6.attr,
- &format_attr_filter_isoc.attr,
- NULL,
-};
-
-static struct attribute *ivbep_uncore_pcu_formats_attr[] = {
- &format_attr_event_ext.attr,
- &format_attr_occ_sel.attr,
- &format_attr_edge.attr,
- &format_attr_thresh5.attr,
- &format_attr_occ_invert.attr,
- &format_attr_occ_edge.attr,
- &format_attr_filter_band0.attr,
- &format_attr_filter_band1.attr,
- &format_attr_filter_band2.attr,
- &format_attr_filter_band3.attr,
- NULL,
-};
-
-static struct attribute *ivbep_uncore_qpi_formats_attr[] = {
- &format_attr_event_ext.attr,
- &format_attr_umask.attr,
- &format_attr_edge.attr,
- &format_attr_thresh8.attr,
- &format_attr_match_rds.attr,
- &format_attr_match_rnid30.attr,
- &format_attr_match_rnid4.attr,
- &format_attr_match_dnid.attr,
- &format_attr_match_mc.attr,
- &format_attr_match_opc.attr,
- &format_attr_match_vnw.attr,
- &format_attr_match0.attr,
- &format_attr_match1.attr,
- &format_attr_mask_rds.attr,
- &format_attr_mask_rnid30.attr,
- &format_attr_mask_rnid4.attr,
- &format_attr_mask_dnid.attr,
- &format_attr_mask_mc.attr,
- &format_attr_mask_opc.attr,
- &format_attr_mask_vnw.attr,
- &format_attr_mask0.attr,
- &format_attr_mask1.attr,
- NULL,
-};
-
-static struct attribute_group ivbep_uncore_format_group = {
- .name = "format",
- .attrs = ivbep_uncore_formats_attr,
-};
-
-static struct attribute_group ivbep_uncore_ubox_format_group = {
- .name = "format",
- .attrs = ivbep_uncore_ubox_formats_attr,
-};
-
-static struct attribute_group ivbep_uncore_cbox_format_group = {
- .name = "format",
- .attrs = ivbep_uncore_cbox_formats_attr,
-};
-
-static struct attribute_group ivbep_uncore_pcu_format_group = {
- .name = "format",
- .attrs = ivbep_uncore_pcu_formats_attr,
-};
-
-static struct attribute_group ivbep_uncore_qpi_format_group = {
- .name = "format",
- .attrs = ivbep_uncore_qpi_formats_attr,
-};
-
-static struct intel_uncore_type ivbep_uncore_ubox = {
- .name = "ubox",
- .num_counters = 2,
- .num_boxes = 1,
- .perf_ctr_bits = 44,
- .fixed_ctr_bits = 48,
- .perf_ctr = SNBEP_U_MSR_PMON_CTR0,
- .event_ctl = SNBEP_U_MSR_PMON_CTL0,
- .event_mask = IVBEP_U_MSR_PMON_RAW_EVENT_MASK,
- .fixed_ctr = SNBEP_U_MSR_PMON_UCLK_FIXED_CTR,
- .fixed_ctl = SNBEP_U_MSR_PMON_UCLK_FIXED_CTL,
- .ops = &ivbep_uncore_msr_ops,
- .format_group = &ivbep_uncore_ubox_format_group,
-};
-
-static struct extra_reg ivbep_uncore_cbox_extra_regs[] = {
- SNBEP_CBO_EVENT_EXTRA_REG(SNBEP_CBO_PMON_CTL_TID_EN,
- SNBEP_CBO_PMON_CTL_TID_EN, 0x1),
- SNBEP_CBO_EVENT_EXTRA_REG(0x1031, 0x10ff, 0x2),
- SNBEP_CBO_EVENT_EXTRA_REG(0x1134, 0xffff, 0x4),
- SNBEP_CBO_EVENT_EXTRA_REG(0x4134, 0xffff, 0xc),
- SNBEP_CBO_EVENT_EXTRA_REG(0x5134, 0xffff, 0xc),
- SNBEP_CBO_EVENT_EXTRA_REG(0x0334, 0xffff, 0x4),
- SNBEP_CBO_EVENT_EXTRA_REG(0x4334, 0xffff, 0xc),
- SNBEP_CBO_EVENT_EXTRA_REG(0x0534, 0xffff, 0x4),
- SNBEP_CBO_EVENT_EXTRA_REG(0x4534, 0xffff, 0xc),
- SNBEP_CBO_EVENT_EXTRA_REG(0x0934, 0xffff, 0x4),
- SNBEP_CBO_EVENT_EXTRA_REG(0x4934, 0xffff, 0xc),
- SNBEP_CBO_EVENT_EXTRA_REG(0x0135, 0xffff, 0x10),
- SNBEP_CBO_EVENT_EXTRA_REG(0x0335, 0xffff, 0x10),
- SNBEP_CBO_EVENT_EXTRA_REG(0x2135, 0xffff, 0x10),
- SNBEP_CBO_EVENT_EXTRA_REG(0x2335, 0xffff, 0x10),
- SNBEP_CBO_EVENT_EXTRA_REG(0x4135, 0xffff, 0x18),
- SNBEP_CBO_EVENT_EXTRA_REG(0x4335, 0xffff, 0x18),
- SNBEP_CBO_EVENT_EXTRA_REG(0x4435, 0xffff, 0x8),
- SNBEP_CBO_EVENT_EXTRA_REG(0x4835, 0xffff, 0x8),
- SNBEP_CBO_EVENT_EXTRA_REG(0x4a35, 0xffff, 0x8),
- SNBEP_CBO_EVENT_EXTRA_REG(0x5035, 0xffff, 0x8),
- SNBEP_CBO_EVENT_EXTRA_REG(0x8135, 0xffff, 0x10),
- SNBEP_CBO_EVENT_EXTRA_REG(0x8335, 0xffff, 0x10),
- SNBEP_CBO_EVENT_EXTRA_REG(0x0136, 0xffff, 0x10),
- SNBEP_CBO_EVENT_EXTRA_REG(0x0336, 0xffff, 0x10),
- SNBEP_CBO_EVENT_EXTRA_REG(0x2136, 0xffff, 0x10),
- SNBEP_CBO_EVENT_EXTRA_REG(0x2336, 0xffff, 0x10),
- SNBEP_CBO_EVENT_EXTRA_REG(0x4136, 0xffff, 0x18),
- SNBEP_CBO_EVENT_EXTRA_REG(0x4336, 0xffff, 0x18),
- SNBEP_CBO_EVENT_EXTRA_REG(0x4436, 0xffff, 0x8),
- SNBEP_CBO_EVENT_EXTRA_REG(0x4836, 0xffff, 0x8),
- SNBEP_CBO_EVENT_EXTRA_REG(0x4a36, 0xffff, 0x8),
- SNBEP_CBO_EVENT_EXTRA_REG(0x5036, 0xffff, 0x8),
- SNBEP_CBO_EVENT_EXTRA_REG(0x8136, 0xffff, 0x10),
- SNBEP_CBO_EVENT_EXTRA_REG(0x8336, 0xffff, 0x10),
- SNBEP_CBO_EVENT_EXTRA_REG(0x4037, 0x40ff, 0x8),
- EVENT_EXTRA_END
-};
-
-static u64 ivbep_cbox_filter_mask(int fields)
-{
- u64 mask = 0;
-
- if (fields & 0x1)
- mask |= IVBEP_CB0_MSR_PMON_BOX_FILTER_TID;
- if (fields & 0x2)
- mask |= IVBEP_CB0_MSR_PMON_BOX_FILTER_LINK;
- if (fields & 0x4)
- mask |= IVBEP_CB0_MSR_PMON_BOX_FILTER_STATE;
- if (fields & 0x8)
- mask |= IVBEP_CB0_MSR_PMON_BOX_FILTER_NID;
- if (fields & 0x10) {
- mask |= IVBEP_CB0_MSR_PMON_BOX_FILTER_OPC;
- mask |= IVBEP_CB0_MSR_PMON_BOX_FILTER_NC;
- mask |= IVBEP_CB0_MSR_PMON_BOX_FILTER_C6;
- mask |= IVBEP_CB0_MSR_PMON_BOX_FILTER_ISOC;
- }
-
- return mask;
-}
-
-static struct event_constraint *
-ivbep_cbox_get_constraint(struct intel_uncore_box *box, struct perf_event *event)
-{
- return __snbep_cbox_get_constraint(box, event, ivbep_cbox_filter_mask);
-}
-
-static int ivbep_cbox_hw_config(struct intel_uncore_box *box, struct perf_event *event)
-{
- struct hw_perf_event_extra *reg1 = &event->hw.extra_reg;
- struct extra_reg *er;
- int idx = 0;
-
- for (er = ivbep_uncore_cbox_extra_regs; er->msr; er++) {
- if (er->event != (event->hw.config & er->config_mask))
- continue;
- idx |= er->idx;
- }
-
- if (idx) {
- reg1->reg = SNBEP_C0_MSR_PMON_BOX_FILTER +
- SNBEP_CBO_MSR_OFFSET * box->pmu->pmu_idx;
- reg1->config = event->attr.config1 & ivbep_cbox_filter_mask(idx);
- reg1->idx = idx;
- }
- return 0;
-}
-
-static void ivbep_cbox_enable_event(struct intel_uncore_box *box, struct perf_event *event)
-{
- struct hw_perf_event *hwc = &event->hw;
- struct hw_perf_event_extra *reg1 = &hwc->extra_reg;
-
- if (reg1->idx != EXTRA_REG_NONE) {
- u64 filter = uncore_shared_reg_config(box, 0);
- wrmsrl(reg1->reg, filter & 0xffffffff);
- wrmsrl(reg1->reg + 6, filter >> 32);
- }
-
- wrmsrl(hwc->config_base, hwc->config | SNBEP_PMON_CTL_EN);
-}
-
-static struct intel_uncore_ops ivbep_uncore_cbox_ops = {
- .init_box = ivbep_uncore_msr_init_box,
- .disable_box = snbep_uncore_msr_disable_box,
- .enable_box = snbep_uncore_msr_enable_box,
- .disable_event = snbep_uncore_msr_disable_event,
- .enable_event = ivbep_cbox_enable_event,
- .read_counter = uncore_msr_read_counter,
- .hw_config = ivbep_cbox_hw_config,
- .get_constraint = ivbep_cbox_get_constraint,
- .put_constraint = snbep_cbox_put_constraint,
-};
-
-static struct intel_uncore_type ivbep_uncore_cbox = {
- .name = "cbox",
- .num_counters = 4,
- .num_boxes = 15,
- .perf_ctr_bits = 44,
- .event_ctl = SNBEP_C0_MSR_PMON_CTL0,
- .perf_ctr = SNBEP_C0_MSR_PMON_CTR0,
- .event_mask = IVBEP_CBO_MSR_PMON_RAW_EVENT_MASK,
- .box_ctl = SNBEP_C0_MSR_PMON_BOX_CTL,
- .msr_offset = SNBEP_CBO_MSR_OFFSET,
- .num_shared_regs = 1,
- .constraints = snbep_uncore_cbox_constraints,
- .ops = &ivbep_uncore_cbox_ops,
- .format_group = &ivbep_uncore_cbox_format_group,
-};
-
-static struct intel_uncore_ops ivbep_uncore_pcu_ops = {
- IVBEP_UNCORE_MSR_OPS_COMMON_INIT(),
- .hw_config = snbep_pcu_hw_config,
- .get_constraint = snbep_pcu_get_constraint,
- .put_constraint = snbep_pcu_put_constraint,
-};
-
-static struct intel_uncore_type ivbep_uncore_pcu = {
- .name = "pcu",
- .num_counters = 4,
- .num_boxes = 1,
- .perf_ctr_bits = 48,
- .perf_ctr = SNBEP_PCU_MSR_PMON_CTR0,
- .event_ctl = SNBEP_PCU_MSR_PMON_CTL0,
- .event_mask = IVBEP_PCU_MSR_PMON_RAW_EVENT_MASK,
- .box_ctl = SNBEP_PCU_MSR_PMON_BOX_CTL,
- .num_shared_regs = 1,
- .ops = &ivbep_uncore_pcu_ops,
- .format_group = &ivbep_uncore_pcu_format_group,
-};
-
-static struct intel_uncore_type *ivbep_msr_uncores[] = {
- &ivbep_uncore_ubox,
- &ivbep_uncore_cbox,
- &ivbep_uncore_pcu,
- NULL,
-};
-
-void ivbep_uncore_cpu_init(void)
-{
- if (ivbep_uncore_cbox.num_boxes > boot_cpu_data.x86_max_cores)
- ivbep_uncore_cbox.num_boxes = boot_cpu_data.x86_max_cores;
- uncore_msr_uncores = ivbep_msr_uncores;
-}
-
-static struct intel_uncore_type ivbep_uncore_ha = {
- .name = "ha",
- .num_counters = 4,
- .num_boxes = 2,
- .perf_ctr_bits = 48,
- IVBEP_UNCORE_PCI_COMMON_INIT(),
-};
-
-static struct intel_uncore_type ivbep_uncore_imc = {
- .name = "imc",
- .num_counters = 4,
- .num_boxes = 8,
- .perf_ctr_bits = 48,
- .fixed_ctr_bits = 48,
- .fixed_ctr = SNBEP_MC_CHy_PCI_PMON_FIXED_CTR,
- .fixed_ctl = SNBEP_MC_CHy_PCI_PMON_FIXED_CTL,
- .event_descs = snbep_uncore_imc_events,
- IVBEP_UNCORE_PCI_COMMON_INIT(),
-};
-
-/* registers in IRP boxes are not properly aligned */
-static unsigned ivbep_uncore_irp_ctls[] = {0xd8, 0xdc, 0xe0, 0xe4};
-static unsigned ivbep_uncore_irp_ctrs[] = {0xa0, 0xb0, 0xb8, 0xc0};
-
-static void ivbep_uncore_irp_enable_event(struct intel_uncore_box *box, struct perf_event *event)
-{
- struct pci_dev *pdev = box->pci_dev;
- struct hw_perf_event *hwc = &event->hw;
-
- pci_write_config_dword(pdev, ivbep_uncore_irp_ctls[hwc->idx],
- hwc->config | SNBEP_PMON_CTL_EN);
-}
-
-static void ivbep_uncore_irp_disable_event(struct intel_uncore_box *box, struct perf_event *event)
-{
- struct pci_dev *pdev = box->pci_dev;
- struct hw_perf_event *hwc = &event->hw;
-
- pci_write_config_dword(pdev, ivbep_uncore_irp_ctls[hwc->idx], hwc->config);
-}
-
-static u64 ivbep_uncore_irp_read_counter(struct intel_uncore_box *box, struct perf_event *event)
-{
- struct pci_dev *pdev = box->pci_dev;
- struct hw_perf_event *hwc = &event->hw;
- u64 count = 0;
-
- pci_read_config_dword(pdev, ivbep_uncore_irp_ctrs[hwc->idx], (u32 *)&count);
- pci_read_config_dword(pdev, ivbep_uncore_irp_ctrs[hwc->idx] + 4, (u32 *)&count + 1);
-
- return count;
-}
-
-static struct intel_uncore_ops ivbep_uncore_irp_ops = {
- .init_box = ivbep_uncore_pci_init_box,
- .disable_box = snbep_uncore_pci_disable_box,
- .enable_box = snbep_uncore_pci_enable_box,
- .disable_event = ivbep_uncore_irp_disable_event,
- .enable_event = ivbep_uncore_irp_enable_event,
- .read_counter = ivbep_uncore_irp_read_counter,
-};
-
-static struct intel_uncore_type ivbep_uncore_irp = {
- .name = "irp",
- .num_counters = 4,
- .num_boxes = 1,
- .perf_ctr_bits = 48,
- .event_mask = IVBEP_PMON_RAW_EVENT_MASK,
- .box_ctl = SNBEP_PCI_PMON_BOX_CTL,
- .ops = &ivbep_uncore_irp_ops,
- .format_group = &ivbep_uncore_format_group,
-};
-
-static struct intel_uncore_ops ivbep_uncore_qpi_ops = {
- .init_box = ivbep_uncore_pci_init_box,
- .disable_box = snbep_uncore_pci_disable_box,
- .enable_box = snbep_uncore_pci_enable_box,
- .disable_event = snbep_uncore_pci_disable_event,
- .enable_event = snbep_qpi_enable_event,
- .read_counter = snbep_uncore_pci_read_counter,
- .hw_config = snbep_qpi_hw_config,
- .get_constraint = uncore_get_constraint,
- .put_constraint = uncore_put_constraint,
-};
-
-static struct intel_uncore_type ivbep_uncore_qpi = {
- .name = "qpi",
- .num_counters = 4,
- .num_boxes = 3,
- .perf_ctr_bits = 48,
- .perf_ctr = SNBEP_PCI_PMON_CTR0,
- .event_ctl = SNBEP_PCI_PMON_CTL0,
- .event_mask = IVBEP_QPI_PCI_PMON_RAW_EVENT_MASK,
- .box_ctl = SNBEP_PCI_PMON_BOX_CTL,
- .num_shared_regs = 1,
- .ops = &ivbep_uncore_qpi_ops,
- .format_group = &ivbep_uncore_qpi_format_group,
-};
-
-static struct intel_uncore_type ivbep_uncore_r2pcie = {
- .name = "r2pcie",
- .num_counters = 4,
- .num_boxes = 1,
- .perf_ctr_bits = 44,
- .constraints = snbep_uncore_r2pcie_constraints,
- IVBEP_UNCORE_PCI_COMMON_INIT(),
-};
-
-static struct intel_uncore_type ivbep_uncore_r3qpi = {
- .name = "r3qpi",
- .num_counters = 3,
- .num_boxes = 2,
- .perf_ctr_bits = 44,
- .constraints = snbep_uncore_r3qpi_constraints,
- IVBEP_UNCORE_PCI_COMMON_INIT(),
-};
-
-enum {
- IVBEP_PCI_UNCORE_HA,
- IVBEP_PCI_UNCORE_IMC,
- IVBEP_PCI_UNCORE_IRP,
- IVBEP_PCI_UNCORE_QPI,
- IVBEP_PCI_UNCORE_R2PCIE,
- IVBEP_PCI_UNCORE_R3QPI,
-};
-
-static struct intel_uncore_type *ivbep_pci_uncores[] = {
- [IVBEP_PCI_UNCORE_HA] = &ivbep_uncore_ha,
- [IVBEP_PCI_UNCORE_IMC] = &ivbep_uncore_imc,
- [IVBEP_PCI_UNCORE_IRP] = &ivbep_uncore_irp,
- [IVBEP_PCI_UNCORE_QPI] = &ivbep_uncore_qpi,
- [IVBEP_PCI_UNCORE_R2PCIE] = &ivbep_uncore_r2pcie,
- [IVBEP_PCI_UNCORE_R3QPI] = &ivbep_uncore_r3qpi,
- NULL,
-};
-
-static const struct pci_device_id ivbep_uncore_pci_ids[] = {
- { /* Home Agent 0 */
- PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xe30),
- .driver_data = UNCORE_PCI_DEV_DATA(IVBEP_PCI_UNCORE_HA, 0),
- },
- { /* Home Agent 1 */
- PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xe38),
- .driver_data = UNCORE_PCI_DEV_DATA(IVBEP_PCI_UNCORE_HA, 1),
- },
- { /* MC0 Channel 0 */
- PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xeb4),
- .driver_data = UNCORE_PCI_DEV_DATA(IVBEP_PCI_UNCORE_IMC, 0),
- },
- { /* MC0 Channel 1 */
- PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xeb5),
- .driver_data = UNCORE_PCI_DEV_DATA(IVBEP_PCI_UNCORE_IMC, 1),
- },
- { /* MC0 Channel 3 */
- PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xeb0),
- .driver_data = UNCORE_PCI_DEV_DATA(IVBEP_PCI_UNCORE_IMC, 2),
- },
- { /* MC0 Channel 4 */
- PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xeb1),
- .driver_data = UNCORE_PCI_DEV_DATA(IVBEP_PCI_UNCORE_IMC, 3),
- },
- { /* MC1 Channel 0 */
- PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xef4),
- .driver_data = UNCORE_PCI_DEV_DATA(IVBEP_PCI_UNCORE_IMC, 4),
- },
- { /* MC1 Channel 1 */
- PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xef5),
- .driver_data = UNCORE_PCI_DEV_DATA(IVBEP_PCI_UNCORE_IMC, 5),
- },
- { /* MC1 Channel 3 */
- PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xef0),
- .driver_data = UNCORE_PCI_DEV_DATA(IVBEP_PCI_UNCORE_IMC, 6),
- },
- { /* MC1 Channel 4 */
- PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xef1),
- .driver_data = UNCORE_PCI_DEV_DATA(IVBEP_PCI_UNCORE_IMC, 7),
- },
- { /* IRP */
- PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xe39),
- .driver_data = UNCORE_PCI_DEV_DATA(IVBEP_PCI_UNCORE_IRP, 0),
- },
- { /* QPI0 Port 0 */
- PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xe32),
- .driver_data = UNCORE_PCI_DEV_DATA(IVBEP_PCI_UNCORE_QPI, 0),
- },
- { /* QPI0 Port 1 */
- PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xe33),
- .driver_data = UNCORE_PCI_DEV_DATA(IVBEP_PCI_UNCORE_QPI, 1),
- },
- { /* QPI1 Port 2 */
- PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xe3a),
- .driver_data = UNCORE_PCI_DEV_DATA(IVBEP_PCI_UNCORE_QPI, 2),
- },
- { /* R2PCIe */
- PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xe34),
- .driver_data = UNCORE_PCI_DEV_DATA(IVBEP_PCI_UNCORE_R2PCIE, 0),
- },
- { /* R3QPI0 Link 0 */
- PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xe36),
- .driver_data = UNCORE_PCI_DEV_DATA(IVBEP_PCI_UNCORE_R3QPI, 0),
- },
- { /* R3QPI0 Link 1 */
- PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xe37),
- .driver_data = UNCORE_PCI_DEV_DATA(IVBEP_PCI_UNCORE_R3QPI, 1),
- },
- { /* R3QPI1 Link 2 */
- PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xe3e),
- .driver_data = UNCORE_PCI_DEV_DATA(IVBEP_PCI_UNCORE_R3QPI, 2),
- },
- { /* QPI Port 0 filter */
- PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xe86),
- .driver_data = UNCORE_PCI_DEV_DATA(UNCORE_EXTRA_PCI_DEV,
- SNBEP_PCI_QPI_PORT0_FILTER),
- },
- { /* QPI Port 0 filter */
- PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xe96),
- .driver_data = UNCORE_PCI_DEV_DATA(UNCORE_EXTRA_PCI_DEV,
- SNBEP_PCI_QPI_PORT1_FILTER),
- },
- { /* end: all zeroes */ }
-};
-
-static struct pci_driver ivbep_uncore_pci_driver = {
- .name = "ivbep_uncore",
- .id_table = ivbep_uncore_pci_ids,
-};
-
-int ivbep_uncore_pci_init(void)
-{
- int ret = snbep_pci2phy_map_init(0x0e1e);
- if (ret)
- return ret;
- uncore_pci_uncores = ivbep_pci_uncores;
- uncore_pci_driver = &ivbep_uncore_pci_driver;
- return 0;
-}
-/* end of IvyTown uncore support */
-
-/* KNL uncore support */
-static struct attribute *knl_uncore_ubox_formats_attr[] = {
- &format_attr_event.attr,
- &format_attr_umask.attr,
- &format_attr_edge.attr,
- &format_attr_tid_en.attr,
- &format_attr_inv.attr,
- &format_attr_thresh5.attr,
- NULL,
-};
-
-static struct attribute_group knl_uncore_ubox_format_group = {
- .name = "format",
- .attrs = knl_uncore_ubox_formats_attr,
-};
-
-static struct intel_uncore_type knl_uncore_ubox = {
- .name = "ubox",
- .num_counters = 2,
- .num_boxes = 1,
- .perf_ctr_bits = 48,
- .fixed_ctr_bits = 48,
- .perf_ctr = HSWEP_U_MSR_PMON_CTR0,
- .event_ctl = HSWEP_U_MSR_PMON_CTL0,
- .event_mask = KNL_U_MSR_PMON_RAW_EVENT_MASK,
- .fixed_ctr = HSWEP_U_MSR_PMON_UCLK_FIXED_CTR,
- .fixed_ctl = HSWEP_U_MSR_PMON_UCLK_FIXED_CTL,
- .ops = &snbep_uncore_msr_ops,
- .format_group = &knl_uncore_ubox_format_group,
-};
-
-static struct attribute *knl_uncore_cha_formats_attr[] = {
- &format_attr_event.attr,
- &format_attr_umask.attr,
- &format_attr_qor.attr,
- &format_attr_edge.attr,
- &format_attr_tid_en.attr,
- &format_attr_inv.attr,
- &format_attr_thresh8.attr,
- &format_attr_filter_tid4.attr,
- &format_attr_filter_link3.attr,
- &format_attr_filter_state4.attr,
- &format_attr_filter_local.attr,
- &format_attr_filter_all_op.attr,
- &format_attr_filter_nnm.attr,
- &format_attr_filter_opc3.attr,
- &format_attr_filter_nc.attr,
- &format_attr_filter_isoc.attr,
- NULL,
-};
-
-static struct attribute_group knl_uncore_cha_format_group = {
- .name = "format",
- .attrs = knl_uncore_cha_formats_attr,
-};
-
-static struct event_constraint knl_uncore_cha_constraints[] = {
- UNCORE_EVENT_CONSTRAINT(0x11, 0x1),
- UNCORE_EVENT_CONSTRAINT(0x1f, 0x1),
- UNCORE_EVENT_CONSTRAINT(0x36, 0x1),
- EVENT_CONSTRAINT_END
-};
-
-static struct extra_reg knl_uncore_cha_extra_regs[] = {
- SNBEP_CBO_EVENT_EXTRA_REG(SNBEP_CBO_PMON_CTL_TID_EN,
- SNBEP_CBO_PMON_CTL_TID_EN, 0x1),
- SNBEP_CBO_EVENT_EXTRA_REG(0x3d, 0xff, 0x2),
- SNBEP_CBO_EVENT_EXTRA_REG(0x35, 0xff, 0x4),
- SNBEP_CBO_EVENT_EXTRA_REG(0x36, 0xff, 0x4),
- EVENT_EXTRA_END
-};
-
-static u64 knl_cha_filter_mask(int fields)
-{
- u64 mask = 0;
-
- if (fields & 0x1)
- mask |= KNL_CHA_MSR_PMON_BOX_FILTER_TID;
- if (fields & 0x2)
- mask |= KNL_CHA_MSR_PMON_BOX_FILTER_STATE;
- if (fields & 0x4)
- mask |= KNL_CHA_MSR_PMON_BOX_FILTER_OP;
- return mask;
-}
-
-static struct event_constraint *
-knl_cha_get_constraint(struct intel_uncore_box *box, struct perf_event *event)
-{
- return __snbep_cbox_get_constraint(box, event, knl_cha_filter_mask);
-}
-
-static int knl_cha_hw_config(struct intel_uncore_box *box,
- struct perf_event *event)
-{
- struct hw_perf_event_extra *reg1 = &event->hw.extra_reg;
- struct extra_reg *er;
- int idx = 0;
-
- for (er = knl_uncore_cha_extra_regs; er->msr; er++) {
- if (er->event != (event->hw.config & er->config_mask))
- continue;
- idx |= er->idx;
- }
-
- if (idx) {
- reg1->reg = HSWEP_C0_MSR_PMON_BOX_FILTER0 +
- KNL_CHA_MSR_OFFSET * box->pmu->pmu_idx;
- reg1->config = event->attr.config1 & knl_cha_filter_mask(idx);
- reg1->idx = idx;
- }
- return 0;
-}
-
-static void hswep_cbox_enable_event(struct intel_uncore_box *box,
- struct perf_event *event);
-
-static struct intel_uncore_ops knl_uncore_cha_ops = {
- .init_box = snbep_uncore_msr_init_box,
- .disable_box = snbep_uncore_msr_disable_box,
- .enable_box = snbep_uncore_msr_enable_box,
- .disable_event = snbep_uncore_msr_disable_event,
- .enable_event = hswep_cbox_enable_event,
- .read_counter = uncore_msr_read_counter,
- .hw_config = knl_cha_hw_config,
- .get_constraint = knl_cha_get_constraint,
- .put_constraint = snbep_cbox_put_constraint,
-};
-
-static struct intel_uncore_type knl_uncore_cha = {
- .name = "cha",
- .num_counters = 4,
- .num_boxes = 38,
- .perf_ctr_bits = 48,
- .event_ctl = HSWEP_C0_MSR_PMON_CTL0,
- .perf_ctr = HSWEP_C0_MSR_PMON_CTR0,
- .event_mask = KNL_CHA_MSR_PMON_RAW_EVENT_MASK,
- .box_ctl = HSWEP_C0_MSR_PMON_BOX_CTL,
- .msr_offset = KNL_CHA_MSR_OFFSET,
- .num_shared_regs = 1,
- .constraints = knl_uncore_cha_constraints,
- .ops = &knl_uncore_cha_ops,
- .format_group = &knl_uncore_cha_format_group,
-};
-
-static struct attribute *knl_uncore_pcu_formats_attr[] = {
- &format_attr_event2.attr,
- &format_attr_use_occ_ctr.attr,
- &format_attr_occ_sel.attr,
- &format_attr_edge.attr,
- &format_attr_tid_en.attr,
- &format_attr_inv.attr,
- &format_attr_thresh6.attr,
- &format_attr_occ_invert.attr,
- &format_attr_occ_edge_det.attr,
- NULL,
-};
-
-static struct attribute_group knl_uncore_pcu_format_group = {
- .name = "format",
- .attrs = knl_uncore_pcu_formats_attr,
-};
-
-static struct intel_uncore_type knl_uncore_pcu = {
- .name = "pcu",
- .num_counters = 4,
- .num_boxes = 1,
- .perf_ctr_bits = 48,
- .perf_ctr = HSWEP_PCU_MSR_PMON_CTR0,
- .event_ctl = HSWEP_PCU_MSR_PMON_CTL0,
- .event_mask = KNL_PCU_MSR_PMON_RAW_EVENT_MASK,
- .box_ctl = HSWEP_PCU_MSR_PMON_BOX_CTL,
- .ops = &snbep_uncore_msr_ops,
- .format_group = &knl_uncore_pcu_format_group,
-};
-
-static struct intel_uncore_type *knl_msr_uncores[] = {
- &knl_uncore_ubox,
- &knl_uncore_cha,
- &knl_uncore_pcu,
- NULL,
-};
-
-void knl_uncore_cpu_init(void)
-{
- uncore_msr_uncores = knl_msr_uncores;
-}
-
-static void knl_uncore_imc_enable_box(struct intel_uncore_box *box)
-{
- struct pci_dev *pdev = box->pci_dev;
- int box_ctl = uncore_pci_box_ctl(box);
-
- pci_write_config_dword(pdev, box_ctl, 0);
-}
-
-static void knl_uncore_imc_enable_event(struct intel_uncore_box *box,
- struct perf_event *event)
-{
- struct pci_dev *pdev = box->pci_dev;
- struct hw_perf_event *hwc = &event->hw;
-
- if ((event->attr.config & SNBEP_PMON_CTL_EV_SEL_MASK)
- == UNCORE_FIXED_EVENT)
- pci_write_config_dword(pdev, hwc->config_base,
- hwc->config | KNL_PMON_FIXED_CTL_EN);
- else
- pci_write_config_dword(pdev, hwc->config_base,
- hwc->config | SNBEP_PMON_CTL_EN);
-}
-
-static struct intel_uncore_ops knl_uncore_imc_ops = {
- .init_box = snbep_uncore_pci_init_box,
- .disable_box = snbep_uncore_pci_disable_box,
- .enable_box = knl_uncore_imc_enable_box,
- .read_counter = snbep_uncore_pci_read_counter,
- .enable_event = knl_uncore_imc_enable_event,
- .disable_event = snbep_uncore_pci_disable_event,
-};
-
-static struct intel_uncore_type knl_uncore_imc_uclk = {
- .name = "imc_uclk",
- .num_counters = 4,
- .num_boxes = 2,
- .perf_ctr_bits = 48,
- .fixed_ctr_bits = 48,
- .perf_ctr = KNL_UCLK_MSR_PMON_CTR0_LOW,
- .event_ctl = KNL_UCLK_MSR_PMON_CTL0,
- .event_mask = SNBEP_PMON_RAW_EVENT_MASK,
- .fixed_ctr = KNL_UCLK_MSR_PMON_UCLK_FIXED_LOW,
- .fixed_ctl = KNL_UCLK_MSR_PMON_UCLK_FIXED_CTL,
- .box_ctl = KNL_UCLK_MSR_PMON_BOX_CTL,
- .ops = &knl_uncore_imc_ops,
- .format_group = &snbep_uncore_format_group,
-};
-
-static struct intel_uncore_type knl_uncore_imc_dclk = {
- .name = "imc",
- .num_counters = 4,
- .num_boxes = 6,
- .perf_ctr_bits = 48,
- .fixed_ctr_bits = 48,
- .perf_ctr = KNL_MC0_CH0_MSR_PMON_CTR0_LOW,
- .event_ctl = KNL_MC0_CH0_MSR_PMON_CTL0,
- .event_mask = SNBEP_PMON_RAW_EVENT_MASK,
- .fixed_ctr = KNL_MC0_CH0_MSR_PMON_FIXED_LOW,
- .fixed_ctl = KNL_MC0_CH0_MSR_PMON_FIXED_CTL,
- .box_ctl = KNL_MC0_CH0_MSR_PMON_BOX_CTL,
- .ops = &knl_uncore_imc_ops,
- .format_group = &snbep_uncore_format_group,
-};
-
-static struct intel_uncore_type knl_uncore_edc_uclk = {
- .name = "edc_uclk",
- .num_counters = 4,
- .num_boxes = 8,
- .perf_ctr_bits = 48,
- .fixed_ctr_bits = 48,
- .perf_ctr = KNL_UCLK_MSR_PMON_CTR0_LOW,
- .event_ctl = KNL_UCLK_MSR_PMON_CTL0,
- .event_mask = SNBEP_PMON_RAW_EVENT_MASK,
- .fixed_ctr = KNL_UCLK_MSR_PMON_UCLK_FIXED_LOW,
- .fixed_ctl = KNL_UCLK_MSR_PMON_UCLK_FIXED_CTL,
- .box_ctl = KNL_UCLK_MSR_PMON_BOX_CTL,
- .ops = &knl_uncore_imc_ops,
- .format_group = &snbep_uncore_format_group,
-};
-
-static struct intel_uncore_type knl_uncore_edc_eclk = {
- .name = "edc_eclk",
- .num_counters = 4,
- .num_boxes = 8,
- .perf_ctr_bits = 48,
- .fixed_ctr_bits = 48,
- .perf_ctr = KNL_EDC0_ECLK_MSR_PMON_CTR0_LOW,
- .event_ctl = KNL_EDC0_ECLK_MSR_PMON_CTL0,
- .event_mask = SNBEP_PMON_RAW_EVENT_MASK,
- .fixed_ctr = KNL_EDC0_ECLK_MSR_PMON_ECLK_FIXED_LOW,
- .fixed_ctl = KNL_EDC0_ECLK_MSR_PMON_ECLK_FIXED_CTL,
- .box_ctl = KNL_EDC0_ECLK_MSR_PMON_BOX_CTL,
- .ops = &knl_uncore_imc_ops,
- .format_group = &snbep_uncore_format_group,
-};
-
-static struct event_constraint knl_uncore_m2pcie_constraints[] = {
- UNCORE_EVENT_CONSTRAINT(0x23, 0x3),
- EVENT_CONSTRAINT_END
-};
-
-static struct intel_uncore_type knl_uncore_m2pcie = {
- .name = "m2pcie",
- .num_counters = 4,
- .num_boxes = 1,
- .perf_ctr_bits = 48,
- .constraints = knl_uncore_m2pcie_constraints,
- SNBEP_UNCORE_PCI_COMMON_INIT(),
-};
-
-static struct attribute *knl_uncore_irp_formats_attr[] = {
- &format_attr_event.attr,
- &format_attr_umask.attr,
- &format_attr_qor.attr,
- &format_attr_edge.attr,
- &format_attr_inv.attr,
- &format_attr_thresh8.attr,
- NULL,
-};
-
-static struct attribute_group knl_uncore_irp_format_group = {
- .name = "format",
- .attrs = knl_uncore_irp_formats_attr,
-};
-
-static struct intel_uncore_type knl_uncore_irp = {
- .name = "irp",
- .num_counters = 2,
- .num_boxes = 1,
- .perf_ctr_bits = 48,
- .perf_ctr = SNBEP_PCI_PMON_CTR0,
- .event_ctl = SNBEP_PCI_PMON_CTL0,
- .event_mask = KNL_IRP_PCI_PMON_RAW_EVENT_MASK,
- .box_ctl = KNL_IRP_PCI_PMON_BOX_CTL,
- .ops = &snbep_uncore_pci_ops,
- .format_group = &knl_uncore_irp_format_group,
-};
-
-enum {
- KNL_PCI_UNCORE_MC_UCLK,
- KNL_PCI_UNCORE_MC_DCLK,
- KNL_PCI_UNCORE_EDC_UCLK,
- KNL_PCI_UNCORE_EDC_ECLK,
- KNL_PCI_UNCORE_M2PCIE,
- KNL_PCI_UNCORE_IRP,
-};
-
-static struct intel_uncore_type *knl_pci_uncores[] = {
- [KNL_PCI_UNCORE_MC_UCLK] = &knl_uncore_imc_uclk,
- [KNL_PCI_UNCORE_MC_DCLK] = &knl_uncore_imc_dclk,
- [KNL_PCI_UNCORE_EDC_UCLK] = &knl_uncore_edc_uclk,
- [KNL_PCI_UNCORE_EDC_ECLK] = &knl_uncore_edc_eclk,
- [KNL_PCI_UNCORE_M2PCIE] = &knl_uncore_m2pcie,
- [KNL_PCI_UNCORE_IRP] = &knl_uncore_irp,
- NULL,
-};
-
-/*
- * KNL uses a common PCI device ID for multiple instances of an Uncore PMU
- * device type. prior to KNL, each instance of a PMU device type had a unique
- * device ID.
- *
- * PCI Device ID Uncore PMU Devices
- * ----------------------------------
- * 0x7841 MC0 UClk, MC1 UClk
- * 0x7843 MC0 DClk CH 0, MC0 DClk CH 1, MC0 DClk CH 2,
- * MC1 DClk CH 0, MC1 DClk CH 1, MC1 DClk CH 2
- * 0x7833 EDC0 UClk, EDC1 UClk, EDC2 UClk, EDC3 UClk,
- * EDC4 UClk, EDC5 UClk, EDC6 UClk, EDC7 UClk
- * 0x7835 EDC0 EClk, EDC1 EClk, EDC2 EClk, EDC3 EClk,
- * EDC4 EClk, EDC5 EClk, EDC6 EClk, EDC7 EClk
- * 0x7817 M2PCIe
- * 0x7814 IRP
-*/
-
-static const struct pci_device_id knl_uncore_pci_ids[] = {
- { /* MC UClk */
- PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x7841),
- .driver_data = UNCORE_PCI_DEV_DATA(KNL_PCI_UNCORE_MC_UCLK, 0),
- },
- { /* MC DClk Channel */
- PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x7843),
- .driver_data = UNCORE_PCI_DEV_DATA(KNL_PCI_UNCORE_MC_DCLK, 0),
- },
- { /* EDC UClk */
- PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x7833),
- .driver_data = UNCORE_PCI_DEV_DATA(KNL_PCI_UNCORE_EDC_UCLK, 0),
- },
- { /* EDC EClk */
- PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x7835),
- .driver_data = UNCORE_PCI_DEV_DATA(KNL_PCI_UNCORE_EDC_ECLK, 0),
- },
- { /* M2PCIe */
- PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x7817),
- .driver_data = UNCORE_PCI_DEV_DATA(KNL_PCI_UNCORE_M2PCIE, 0),
- },
- { /* IRP */
- PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x7814),
- .driver_data = UNCORE_PCI_DEV_DATA(KNL_PCI_UNCORE_IRP, 0),
- },
- { /* end: all zeroes */ }
-};
-
-static struct pci_driver knl_uncore_pci_driver = {
- .name = "knl_uncore",
- .id_table = knl_uncore_pci_ids,
-};
-
-int knl_uncore_pci_init(void)
-{
- int ret;
-
- /* All KNL PCI based PMON units are on the same PCI bus except IRP */
- ret = snb_pci2phy_map_init(0x7814); /* IRP */
- if (ret)
- return ret;
- ret = snb_pci2phy_map_init(0x7817); /* M2PCIe */
- if (ret)
- return ret;
- uncore_pci_uncores = knl_pci_uncores;
- uncore_pci_driver = &knl_uncore_pci_driver;
- return 0;
-}
-
-/* end of KNL uncore support */
-
-/* Haswell-EP uncore support */
-static struct attribute *hswep_uncore_ubox_formats_attr[] = {
- &format_attr_event.attr,
- &format_attr_umask.attr,
- &format_attr_edge.attr,
- &format_attr_inv.attr,
- &format_attr_thresh5.attr,
- &format_attr_filter_tid2.attr,
- &format_attr_filter_cid.attr,
- NULL,
-};
-
-static struct attribute_group hswep_uncore_ubox_format_group = {
- .name = "format",
- .attrs = hswep_uncore_ubox_formats_attr,
-};
-
-static int hswep_ubox_hw_config(struct intel_uncore_box *box, struct perf_event *event)
-{
- struct hw_perf_event_extra *reg1 = &event->hw.extra_reg;
- reg1->reg = HSWEP_U_MSR_PMON_FILTER;
- reg1->config = event->attr.config1 & HSWEP_U_MSR_PMON_BOX_FILTER_MASK;
- reg1->idx = 0;
- return 0;
-}
-
-static struct intel_uncore_ops hswep_uncore_ubox_ops = {
- SNBEP_UNCORE_MSR_OPS_COMMON_INIT(),
- .hw_config = hswep_ubox_hw_config,
- .get_constraint = uncore_get_constraint,
- .put_constraint = uncore_put_constraint,
-};
-
-static struct intel_uncore_type hswep_uncore_ubox = {
- .name = "ubox",
- .num_counters = 2,
- .num_boxes = 1,
- .perf_ctr_bits = 44,
- .fixed_ctr_bits = 48,
- .perf_ctr = HSWEP_U_MSR_PMON_CTR0,
- .event_ctl = HSWEP_U_MSR_PMON_CTL0,
- .event_mask = SNBEP_U_MSR_PMON_RAW_EVENT_MASK,
- .fixed_ctr = HSWEP_U_MSR_PMON_UCLK_FIXED_CTR,
- .fixed_ctl = HSWEP_U_MSR_PMON_UCLK_FIXED_CTL,
- .num_shared_regs = 1,
- .ops = &hswep_uncore_ubox_ops,
- .format_group = &hswep_uncore_ubox_format_group,
-};
-
-static struct attribute *hswep_uncore_cbox_formats_attr[] = {
- &format_attr_event.attr,
- &format_attr_umask.attr,
- &format_attr_edge.attr,
- &format_attr_tid_en.attr,
- &format_attr_thresh8.attr,
- &format_attr_filter_tid3.attr,
- &format_attr_filter_link2.attr,
- &format_attr_filter_state3.attr,
- &format_attr_filter_nid2.attr,
- &format_attr_filter_opc2.attr,
- &format_attr_filter_nc.attr,
- &format_attr_filter_c6.attr,
- &format_attr_filter_isoc.attr,
- NULL,
-};
-
-static struct attribute_group hswep_uncore_cbox_format_group = {
- .name = "format",
- .attrs = hswep_uncore_cbox_formats_attr,
-};
-
-static struct event_constraint hswep_uncore_cbox_constraints[] = {
- UNCORE_EVENT_CONSTRAINT(0x01, 0x1),
- UNCORE_EVENT_CONSTRAINT(0x09, 0x1),
- UNCORE_EVENT_CONSTRAINT(0x11, 0x1),
- UNCORE_EVENT_CONSTRAINT(0x36, 0x1),
- UNCORE_EVENT_CONSTRAINT(0x38, 0x3),
- UNCORE_EVENT_CONSTRAINT(0x3b, 0x1),
- UNCORE_EVENT_CONSTRAINT(0x3e, 0x1),
- EVENT_CONSTRAINT_END
-};
-
-static struct extra_reg hswep_uncore_cbox_extra_regs[] = {
- SNBEP_CBO_EVENT_EXTRA_REG(SNBEP_CBO_PMON_CTL_TID_EN,
- SNBEP_CBO_PMON_CTL_TID_EN, 0x1),
- SNBEP_CBO_EVENT_EXTRA_REG(0x0334, 0xffff, 0x4),
- SNBEP_CBO_EVENT_EXTRA_REG(0x0534, 0xffff, 0x4),
- SNBEP_CBO_EVENT_EXTRA_REG(0x0934, 0xffff, 0x4),
- SNBEP_CBO_EVENT_EXTRA_REG(0x1134, 0xffff, 0x4),
- SNBEP_CBO_EVENT_EXTRA_REG(0x2134, 0xffff, 0x4),
- SNBEP_CBO_EVENT_EXTRA_REG(0x4134, 0xffff, 0x4),
- SNBEP_CBO_EVENT_EXTRA_REG(0x4037, 0x40ff, 0x8),
- SNBEP_CBO_EVENT_EXTRA_REG(0x4028, 0x40ff, 0x8),
- SNBEP_CBO_EVENT_EXTRA_REG(0x4032, 0x40ff, 0x8),
- SNBEP_CBO_EVENT_EXTRA_REG(0x4029, 0x40ff, 0x8),
- SNBEP_CBO_EVENT_EXTRA_REG(0x4033, 0x40ff, 0x8),
- SNBEP_CBO_EVENT_EXTRA_REG(0x402A, 0x40ff, 0x8),
- SNBEP_CBO_EVENT_EXTRA_REG(0x0135, 0xffff, 0x12),
- SNBEP_CBO_EVENT_EXTRA_REG(0x0335, 0xffff, 0x10),
- SNBEP_CBO_EVENT_EXTRA_REG(0x4135, 0xffff, 0x18),
- SNBEP_CBO_EVENT_EXTRA_REG(0x4435, 0xffff, 0x8),
- SNBEP_CBO_EVENT_EXTRA_REG(0x4835, 0xffff, 0x8),
- SNBEP_CBO_EVENT_EXTRA_REG(0x5035, 0xffff, 0x8),
- SNBEP_CBO_EVENT_EXTRA_REG(0x4335, 0xffff, 0x18),
- SNBEP_CBO_EVENT_EXTRA_REG(0x4a35, 0xffff, 0x8),
- SNBEP_CBO_EVENT_EXTRA_REG(0x2335, 0xffff, 0x10),
- SNBEP_CBO_EVENT_EXTRA_REG(0x8335, 0xffff, 0x10),
- SNBEP_CBO_EVENT_EXTRA_REG(0x2135, 0xffff, 0x10),
- SNBEP_CBO_EVENT_EXTRA_REG(0x8135, 0xffff, 0x10),
- SNBEP_CBO_EVENT_EXTRA_REG(0x0136, 0xffff, 0x10),
- SNBEP_CBO_EVENT_EXTRA_REG(0x0336, 0xffff, 0x10),
- SNBEP_CBO_EVENT_EXTRA_REG(0x4136, 0xffff, 0x18),
- SNBEP_CBO_EVENT_EXTRA_REG(0x4436, 0xffff, 0x8),
- SNBEP_CBO_EVENT_EXTRA_REG(0x4836, 0xffff, 0x8),
- SNBEP_CBO_EVENT_EXTRA_REG(0x4336, 0xffff, 0x18),
- SNBEP_CBO_EVENT_EXTRA_REG(0x4a36, 0xffff, 0x8),
- SNBEP_CBO_EVENT_EXTRA_REG(0x2336, 0xffff, 0x10),
- SNBEP_CBO_EVENT_EXTRA_REG(0x8336, 0xffff, 0x10),
- SNBEP_CBO_EVENT_EXTRA_REG(0x2136, 0xffff, 0x10),
- SNBEP_CBO_EVENT_EXTRA_REG(0x8136, 0xffff, 0x10),
- SNBEP_CBO_EVENT_EXTRA_REG(0x5036, 0xffff, 0x8),
- EVENT_EXTRA_END
-};
-
-static u64 hswep_cbox_filter_mask(int fields)
-{
- u64 mask = 0;
- if (fields & 0x1)
- mask |= HSWEP_CB0_MSR_PMON_BOX_FILTER_TID;
- if (fields & 0x2)
- mask |= HSWEP_CB0_MSR_PMON_BOX_FILTER_LINK;
- if (fields & 0x4)
- mask |= HSWEP_CB0_MSR_PMON_BOX_FILTER_STATE;
- if (fields & 0x8)
- mask |= HSWEP_CB0_MSR_PMON_BOX_FILTER_NID;
- if (fields & 0x10) {
- mask |= HSWEP_CB0_MSR_PMON_BOX_FILTER_OPC;
- mask |= HSWEP_CB0_MSR_PMON_BOX_FILTER_NC;
- mask |= HSWEP_CB0_MSR_PMON_BOX_FILTER_C6;
- mask |= HSWEP_CB0_MSR_PMON_BOX_FILTER_ISOC;
- }
- return mask;
-}
-
-static struct event_constraint *
-hswep_cbox_get_constraint(struct intel_uncore_box *box, struct perf_event *event)
-{
- return __snbep_cbox_get_constraint(box, event, hswep_cbox_filter_mask);
-}
-
-static int hswep_cbox_hw_config(struct intel_uncore_box *box, struct perf_event *event)
-{
- struct hw_perf_event_extra *reg1 = &event->hw.extra_reg;
- struct extra_reg *er;
- int idx = 0;
-
- for (er = hswep_uncore_cbox_extra_regs; er->msr; er++) {
- if (er->event != (event->hw.config & er->config_mask))
- continue;
- idx |= er->idx;
- }
-
- if (idx) {
- reg1->reg = HSWEP_C0_MSR_PMON_BOX_FILTER0 +
- HSWEP_CBO_MSR_OFFSET * box->pmu->pmu_idx;
- reg1->config = event->attr.config1 & hswep_cbox_filter_mask(idx);
- reg1->idx = idx;
- }
- return 0;
-}
-
-static void hswep_cbox_enable_event(struct intel_uncore_box *box,
- struct perf_event *event)
-{
- struct hw_perf_event *hwc = &event->hw;
- struct hw_perf_event_extra *reg1 = &hwc->extra_reg;
-
- if (reg1->idx != EXTRA_REG_NONE) {
- u64 filter = uncore_shared_reg_config(box, 0);
- wrmsrl(reg1->reg, filter & 0xffffffff);
- wrmsrl(reg1->reg + 1, filter >> 32);
- }
-
- wrmsrl(hwc->config_base, hwc->config | SNBEP_PMON_CTL_EN);
-}
-
-static struct intel_uncore_ops hswep_uncore_cbox_ops = {
- .init_box = snbep_uncore_msr_init_box,
- .disable_box = snbep_uncore_msr_disable_box,
- .enable_box = snbep_uncore_msr_enable_box,
- .disable_event = snbep_uncore_msr_disable_event,
- .enable_event = hswep_cbox_enable_event,
- .read_counter = uncore_msr_read_counter,
- .hw_config = hswep_cbox_hw_config,
- .get_constraint = hswep_cbox_get_constraint,
- .put_constraint = snbep_cbox_put_constraint,
-};
-
-static struct intel_uncore_type hswep_uncore_cbox = {
- .name = "cbox",
- .num_counters = 4,
- .num_boxes = 18,
- .perf_ctr_bits = 48,
- .event_ctl = HSWEP_C0_MSR_PMON_CTL0,
- .perf_ctr = HSWEP_C0_MSR_PMON_CTR0,
- .event_mask = SNBEP_CBO_MSR_PMON_RAW_EVENT_MASK,
- .box_ctl = HSWEP_C0_MSR_PMON_BOX_CTL,
- .msr_offset = HSWEP_CBO_MSR_OFFSET,
- .num_shared_regs = 1,
- .constraints = hswep_uncore_cbox_constraints,
- .ops = &hswep_uncore_cbox_ops,
- .format_group = &hswep_uncore_cbox_format_group,
-};
-
-/*
- * Write SBOX Initialization register bit by bit to avoid spurious #GPs
- */
-static void hswep_uncore_sbox_msr_init_box(struct intel_uncore_box *box)
-{
- unsigned msr = uncore_msr_box_ctl(box);
-
- if (msr) {
- u64 init = SNBEP_PMON_BOX_CTL_INT;
- u64 flags = 0;
- int i;
-
- for_each_set_bit(i, (unsigned long *)&init, 64) {
- flags |= (1ULL << i);
- wrmsrl(msr, flags);
- }
- }
-}
-
-static struct intel_uncore_ops hswep_uncore_sbox_msr_ops = {
- __SNBEP_UNCORE_MSR_OPS_COMMON_INIT(),
- .init_box = hswep_uncore_sbox_msr_init_box
-};
-
-static struct attribute *hswep_uncore_sbox_formats_attr[] = {
- &format_attr_event.attr,
- &format_attr_umask.attr,
- &format_attr_edge.attr,
- &format_attr_tid_en.attr,
- &format_attr_inv.attr,
- &format_attr_thresh8.attr,
- NULL,
-};
-
-static struct attribute_group hswep_uncore_sbox_format_group = {
- .name = "format",
- .attrs = hswep_uncore_sbox_formats_attr,
-};
-
-static struct intel_uncore_type hswep_uncore_sbox = {
- .name = "sbox",
- .num_counters = 4,
- .num_boxes = 4,
- .perf_ctr_bits = 44,
- .event_ctl = HSWEP_S0_MSR_PMON_CTL0,
- .perf_ctr = HSWEP_S0_MSR_PMON_CTR0,
- .event_mask = HSWEP_S_MSR_PMON_RAW_EVENT_MASK,
- .box_ctl = HSWEP_S0_MSR_PMON_BOX_CTL,
- .msr_offset = HSWEP_SBOX_MSR_OFFSET,
- .ops = &hswep_uncore_sbox_msr_ops,
- .format_group = &hswep_uncore_sbox_format_group,
-};
-
-static int hswep_pcu_hw_config(struct intel_uncore_box *box, struct perf_event *event)
-{
- struct hw_perf_event *hwc = &event->hw;
- struct hw_perf_event_extra *reg1 = &hwc->extra_reg;
- int ev_sel = hwc->config & SNBEP_PMON_CTL_EV_SEL_MASK;
-
- if (ev_sel >= 0xb && ev_sel <= 0xe) {
- reg1->reg = HSWEP_PCU_MSR_PMON_BOX_FILTER;
- reg1->idx = ev_sel - 0xb;
- reg1->config = event->attr.config1 & (0xff << reg1->idx);
- }
- return 0;
-}
-
-static struct intel_uncore_ops hswep_uncore_pcu_ops = {
- SNBEP_UNCORE_MSR_OPS_COMMON_INIT(),
- .hw_config = hswep_pcu_hw_config,
- .get_constraint = snbep_pcu_get_constraint,
- .put_constraint = snbep_pcu_put_constraint,
-};
-
-static struct intel_uncore_type hswep_uncore_pcu = {
- .name = "pcu",
- .num_counters = 4,
- .num_boxes = 1,
- .perf_ctr_bits = 48,
- .perf_ctr = HSWEP_PCU_MSR_PMON_CTR0,
- .event_ctl = HSWEP_PCU_MSR_PMON_CTL0,
- .event_mask = SNBEP_PCU_MSR_PMON_RAW_EVENT_MASK,
- .box_ctl = HSWEP_PCU_MSR_PMON_BOX_CTL,
- .num_shared_regs = 1,
- .ops = &hswep_uncore_pcu_ops,
- .format_group = &snbep_uncore_pcu_format_group,
-};
-
-static struct intel_uncore_type *hswep_msr_uncores[] = {
- &hswep_uncore_ubox,
- &hswep_uncore_cbox,
- &hswep_uncore_sbox,
- &hswep_uncore_pcu,
- NULL,
-};
-
-void hswep_uncore_cpu_init(void)
-{
- if (hswep_uncore_cbox.num_boxes > boot_cpu_data.x86_max_cores)
- hswep_uncore_cbox.num_boxes = boot_cpu_data.x86_max_cores;
-
- /* Detect 6-8 core systems with only two SBOXes */
- if (uncore_extra_pci_dev[0][HSWEP_PCI_PCU_3]) {
- u32 capid4;
-
- pci_read_config_dword(uncore_extra_pci_dev[0][HSWEP_PCI_PCU_3],
- 0x94, &capid4);
- if (((capid4 >> 6) & 0x3) == 0)
- hswep_uncore_sbox.num_boxes = 2;
- }
-
- uncore_msr_uncores = hswep_msr_uncores;
-}
-
-static struct intel_uncore_type hswep_uncore_ha = {
- .name = "ha",
- .num_counters = 5,
- .num_boxes = 2,
- .perf_ctr_bits = 48,
- SNBEP_UNCORE_PCI_COMMON_INIT(),
-};
-
-static struct uncore_event_desc hswep_uncore_imc_events[] = {
- INTEL_UNCORE_EVENT_DESC(clockticks, "event=0x00,umask=0x00"),
- INTEL_UNCORE_EVENT_DESC(cas_count_read, "event=0x04,umask=0x03"),
- INTEL_UNCORE_EVENT_DESC(cas_count_read.scale, "6.103515625e-5"),
- INTEL_UNCORE_EVENT_DESC(cas_count_read.unit, "MiB"),
- INTEL_UNCORE_EVENT_DESC(cas_count_write, "event=0x04,umask=0x0c"),
- INTEL_UNCORE_EVENT_DESC(cas_count_write.scale, "6.103515625e-5"),
- INTEL_UNCORE_EVENT_DESC(cas_count_write.unit, "MiB"),
- { /* end: all zeroes */ },
-};
-
-static struct intel_uncore_type hswep_uncore_imc = {
- .name = "imc",
- .num_counters = 5,
- .num_boxes = 8,
- .perf_ctr_bits = 48,
- .fixed_ctr_bits = 48,
- .fixed_ctr = SNBEP_MC_CHy_PCI_PMON_FIXED_CTR,
- .fixed_ctl = SNBEP_MC_CHy_PCI_PMON_FIXED_CTL,
- .event_descs = hswep_uncore_imc_events,
- SNBEP_UNCORE_PCI_COMMON_INIT(),
-};
-
-static unsigned hswep_uncore_irp_ctrs[] = {0xa0, 0xa8, 0xb0, 0xb8};
-
-static u64 hswep_uncore_irp_read_counter(struct intel_uncore_box *box, struct perf_event *event)
-{
- struct pci_dev *pdev = box->pci_dev;
- struct hw_perf_event *hwc = &event->hw;
- u64 count = 0;
-
- pci_read_config_dword(pdev, hswep_uncore_irp_ctrs[hwc->idx], (u32 *)&count);
- pci_read_config_dword(pdev, hswep_uncore_irp_ctrs[hwc->idx] + 4, (u32 *)&count + 1);
-
- return count;
-}
-
-static struct intel_uncore_ops hswep_uncore_irp_ops = {
- .init_box = snbep_uncore_pci_init_box,
- .disable_box = snbep_uncore_pci_disable_box,
- .enable_box = snbep_uncore_pci_enable_box,
- .disable_event = ivbep_uncore_irp_disable_event,
- .enable_event = ivbep_uncore_irp_enable_event,
- .read_counter = hswep_uncore_irp_read_counter,
-};
-
-static struct intel_uncore_type hswep_uncore_irp = {
- .name = "irp",
- .num_counters = 4,
- .num_boxes = 1,
- .perf_ctr_bits = 48,
- .event_mask = SNBEP_PMON_RAW_EVENT_MASK,
- .box_ctl = SNBEP_PCI_PMON_BOX_CTL,
- .ops = &hswep_uncore_irp_ops,
- .format_group = &snbep_uncore_format_group,
-};
-
-static struct intel_uncore_type hswep_uncore_qpi = {
- .name = "qpi",
- .num_counters = 5,
- .num_boxes = 3,
- .perf_ctr_bits = 48,
- .perf_ctr = SNBEP_PCI_PMON_CTR0,
- .event_ctl = SNBEP_PCI_PMON_CTL0,
- .event_mask = SNBEP_QPI_PCI_PMON_RAW_EVENT_MASK,
- .box_ctl = SNBEP_PCI_PMON_BOX_CTL,
- .num_shared_regs = 1,
- .ops = &snbep_uncore_qpi_ops,
- .format_group = &snbep_uncore_qpi_format_group,
-};
-
-static struct event_constraint hswep_uncore_r2pcie_constraints[] = {
- UNCORE_EVENT_CONSTRAINT(0x10, 0x3),
- UNCORE_EVENT_CONSTRAINT(0x11, 0x3),
- UNCORE_EVENT_CONSTRAINT(0x13, 0x1),
- UNCORE_EVENT_CONSTRAINT(0x23, 0x1),
- UNCORE_EVENT_CONSTRAINT(0x24, 0x1),
- UNCORE_EVENT_CONSTRAINT(0x25, 0x1),
- UNCORE_EVENT_CONSTRAINT(0x26, 0x3),
- UNCORE_EVENT_CONSTRAINT(0x27, 0x1),
- UNCORE_EVENT_CONSTRAINT(0x28, 0x3),
- UNCORE_EVENT_CONSTRAINT(0x29, 0x3),
- UNCORE_EVENT_CONSTRAINT(0x2a, 0x1),
- UNCORE_EVENT_CONSTRAINT(0x2b, 0x3),
- UNCORE_EVENT_CONSTRAINT(0x2c, 0x3),
- UNCORE_EVENT_CONSTRAINT(0x2d, 0x3),
- UNCORE_EVENT_CONSTRAINT(0x32, 0x3),
- UNCORE_EVENT_CONSTRAINT(0x33, 0x3),
- UNCORE_EVENT_CONSTRAINT(0x34, 0x3),
- UNCORE_EVENT_CONSTRAINT(0x35, 0x3),
- EVENT_CONSTRAINT_END
-};
-
-static struct intel_uncore_type hswep_uncore_r2pcie = {
- .name = "r2pcie",
- .num_counters = 4,
- .num_boxes = 1,
- .perf_ctr_bits = 48,
- .constraints = hswep_uncore_r2pcie_constraints,
- SNBEP_UNCORE_PCI_COMMON_INIT(),
-};
-
-static struct event_constraint hswep_uncore_r3qpi_constraints[] = {
- UNCORE_EVENT_CONSTRAINT(0x01, 0x3),
- UNCORE_EVENT_CONSTRAINT(0x07, 0x7),
- UNCORE_EVENT_CONSTRAINT(0x08, 0x7),
- UNCORE_EVENT_CONSTRAINT(0x09, 0x7),
- UNCORE_EVENT_CONSTRAINT(0x0a, 0x7),
- UNCORE_EVENT_CONSTRAINT(0x0e, 0x7),
- UNCORE_EVENT_CONSTRAINT(0x10, 0x3),
- UNCORE_EVENT_CONSTRAINT(0x11, 0x3),
- UNCORE_EVENT_CONSTRAINT(0x12, 0x3),
- UNCORE_EVENT_CONSTRAINT(0x13, 0x1),
- UNCORE_EVENT_CONSTRAINT(0x14, 0x3),
- UNCORE_EVENT_CONSTRAINT(0x15, 0x3),
- UNCORE_EVENT_CONSTRAINT(0x1f, 0x3),
- UNCORE_EVENT_CONSTRAINT(0x20, 0x3),
- UNCORE_EVENT_CONSTRAINT(0x21, 0x3),
- UNCORE_EVENT_CONSTRAINT(0x22, 0x3),
- UNCORE_EVENT_CONSTRAINT(0x23, 0x3),
- UNCORE_EVENT_CONSTRAINT(0x25, 0x3),
- UNCORE_EVENT_CONSTRAINT(0x26, 0x3),
- UNCORE_EVENT_CONSTRAINT(0x28, 0x3),
- UNCORE_EVENT_CONSTRAINT(0x29, 0x3),
- UNCORE_EVENT_CONSTRAINT(0x2c, 0x3),
- UNCORE_EVENT_CONSTRAINT(0x2d, 0x3),
- UNCORE_EVENT_CONSTRAINT(0x2e, 0x3),
- UNCORE_EVENT_CONSTRAINT(0x2f, 0x3),
- UNCORE_EVENT_CONSTRAINT(0x31, 0x3),
- UNCORE_EVENT_CONSTRAINT(0x32, 0x3),
- UNCORE_EVENT_CONSTRAINT(0x33, 0x3),
- UNCORE_EVENT_CONSTRAINT(0x34, 0x3),
- UNCORE_EVENT_CONSTRAINT(0x36, 0x3),
- UNCORE_EVENT_CONSTRAINT(0x37, 0x3),
- UNCORE_EVENT_CONSTRAINT(0x38, 0x3),
- UNCORE_EVENT_CONSTRAINT(0x39, 0x3),
- EVENT_CONSTRAINT_END
-};
-
-static struct intel_uncore_type hswep_uncore_r3qpi = {
- .name = "r3qpi",
- .num_counters = 4,
- .num_boxes = 3,
- .perf_ctr_bits = 44,
- .constraints = hswep_uncore_r3qpi_constraints,
- SNBEP_UNCORE_PCI_COMMON_INIT(),
-};
-
-enum {
- HSWEP_PCI_UNCORE_HA,
- HSWEP_PCI_UNCORE_IMC,
- HSWEP_PCI_UNCORE_IRP,
- HSWEP_PCI_UNCORE_QPI,
- HSWEP_PCI_UNCORE_R2PCIE,
- HSWEP_PCI_UNCORE_R3QPI,
-};
-
-static struct intel_uncore_type *hswep_pci_uncores[] = {
- [HSWEP_PCI_UNCORE_HA] = &hswep_uncore_ha,
- [HSWEP_PCI_UNCORE_IMC] = &hswep_uncore_imc,
- [HSWEP_PCI_UNCORE_IRP] = &hswep_uncore_irp,
- [HSWEP_PCI_UNCORE_QPI] = &hswep_uncore_qpi,
- [HSWEP_PCI_UNCORE_R2PCIE] = &hswep_uncore_r2pcie,
- [HSWEP_PCI_UNCORE_R3QPI] = &hswep_uncore_r3qpi,
- NULL,
-};
-
-static const struct pci_device_id hswep_uncore_pci_ids[] = {
- { /* Home Agent 0 */
- PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x2f30),
- .driver_data = UNCORE_PCI_DEV_DATA(HSWEP_PCI_UNCORE_HA, 0),
- },
- { /* Home Agent 1 */
- PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x2f38),
- .driver_data = UNCORE_PCI_DEV_DATA(HSWEP_PCI_UNCORE_HA, 1),
- },
- { /* MC0 Channel 0 */
- PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x2fb0),
- .driver_data = UNCORE_PCI_DEV_DATA(HSWEP_PCI_UNCORE_IMC, 0),
- },
- { /* MC0 Channel 1 */
- PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x2fb1),
- .driver_data = UNCORE_PCI_DEV_DATA(HSWEP_PCI_UNCORE_IMC, 1),
- },
- { /* MC0 Channel 2 */
- PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x2fb4),
- .driver_data = UNCORE_PCI_DEV_DATA(HSWEP_PCI_UNCORE_IMC, 2),
- },
- { /* MC0 Channel 3 */
- PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x2fb5),
- .driver_data = UNCORE_PCI_DEV_DATA(HSWEP_PCI_UNCORE_IMC, 3),
- },
- { /* MC1 Channel 0 */
- PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x2fd0),
- .driver_data = UNCORE_PCI_DEV_DATA(HSWEP_PCI_UNCORE_IMC, 4),
- },
- { /* MC1 Channel 1 */
- PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x2fd1),
- .driver_data = UNCORE_PCI_DEV_DATA(HSWEP_PCI_UNCORE_IMC, 5),
- },
- { /* MC1 Channel 2 */
- PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x2fd4),
- .driver_data = UNCORE_PCI_DEV_DATA(HSWEP_PCI_UNCORE_IMC, 6),
- },
- { /* MC1 Channel 3 */
- PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x2fd5),
- .driver_data = UNCORE_PCI_DEV_DATA(HSWEP_PCI_UNCORE_IMC, 7),
- },
- { /* IRP */
- PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x2f39),
- .driver_data = UNCORE_PCI_DEV_DATA(HSWEP_PCI_UNCORE_IRP, 0),
- },
- { /* QPI0 Port 0 */
- PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x2f32),
- .driver_data = UNCORE_PCI_DEV_DATA(HSWEP_PCI_UNCORE_QPI, 0),
- },
- { /* QPI0 Port 1 */
- PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x2f33),
- .driver_data = UNCORE_PCI_DEV_DATA(HSWEP_PCI_UNCORE_QPI, 1),
- },
- { /* QPI1 Port 2 */
- PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x2f3a),
- .driver_data = UNCORE_PCI_DEV_DATA(HSWEP_PCI_UNCORE_QPI, 2),
- },
- { /* R2PCIe */
- PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x2f34),
- .driver_data = UNCORE_PCI_DEV_DATA(HSWEP_PCI_UNCORE_R2PCIE, 0),
- },
- { /* R3QPI0 Link 0 */
- PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x2f36),
- .driver_data = UNCORE_PCI_DEV_DATA(HSWEP_PCI_UNCORE_R3QPI, 0),
- },
- { /* R3QPI0 Link 1 */
- PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x2f37),
- .driver_data = UNCORE_PCI_DEV_DATA(HSWEP_PCI_UNCORE_R3QPI, 1),
- },
- { /* R3QPI1 Link 2 */
- PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x2f3e),
- .driver_data = UNCORE_PCI_DEV_DATA(HSWEP_PCI_UNCORE_R3QPI, 2),
- },
- { /* QPI Port 0 filter */
- PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x2f86),
- .driver_data = UNCORE_PCI_DEV_DATA(UNCORE_EXTRA_PCI_DEV,
- SNBEP_PCI_QPI_PORT0_FILTER),
- },
- { /* QPI Port 1 filter */
- PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x2f96),
- .driver_data = UNCORE_PCI_DEV_DATA(UNCORE_EXTRA_PCI_DEV,
- SNBEP_PCI_QPI_PORT1_FILTER),
- },
- { /* PCU.3 (for Capability registers) */
- PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x2fc0),
- .driver_data = UNCORE_PCI_DEV_DATA(UNCORE_EXTRA_PCI_DEV,
- HSWEP_PCI_PCU_3),
- },
- { /* end: all zeroes */ }
-};
-
-static struct pci_driver hswep_uncore_pci_driver = {
- .name = "hswep_uncore",
- .id_table = hswep_uncore_pci_ids,
-};
-
-int hswep_uncore_pci_init(void)
-{
- int ret = snbep_pci2phy_map_init(0x2f1e);
- if (ret)
- return ret;
- uncore_pci_uncores = hswep_pci_uncores;
- uncore_pci_driver = &hswep_uncore_pci_driver;
- return 0;
-}
-/* end of Haswell-EP uncore support */
-
-/* BDX uncore support */
-
-static struct intel_uncore_type bdx_uncore_ubox = {
- .name = "ubox",
- .num_counters = 2,
- .num_boxes = 1,
- .perf_ctr_bits = 48,
- .fixed_ctr_bits = 48,
- .perf_ctr = HSWEP_U_MSR_PMON_CTR0,
- .event_ctl = HSWEP_U_MSR_PMON_CTL0,
- .event_mask = SNBEP_U_MSR_PMON_RAW_EVENT_MASK,
- .fixed_ctr = HSWEP_U_MSR_PMON_UCLK_FIXED_CTR,
- .fixed_ctl = HSWEP_U_MSR_PMON_UCLK_FIXED_CTL,
- .num_shared_regs = 1,
- .ops = &ivbep_uncore_msr_ops,
- .format_group = &ivbep_uncore_ubox_format_group,
-};
-
-static struct event_constraint bdx_uncore_cbox_constraints[] = {
- UNCORE_EVENT_CONSTRAINT(0x09, 0x3),
- UNCORE_EVENT_CONSTRAINT(0x11, 0x1),
- UNCORE_EVENT_CONSTRAINT(0x36, 0x1),
- UNCORE_EVENT_CONSTRAINT(0x3e, 0x1),
- EVENT_CONSTRAINT_END
-};
-
-static struct intel_uncore_type bdx_uncore_cbox = {
- .name = "cbox",
- .num_counters = 4,
- .num_boxes = 24,
- .perf_ctr_bits = 48,
- .event_ctl = HSWEP_C0_MSR_PMON_CTL0,
- .perf_ctr = HSWEP_C0_MSR_PMON_CTR0,
- .event_mask = SNBEP_CBO_MSR_PMON_RAW_EVENT_MASK,
- .box_ctl = HSWEP_C0_MSR_PMON_BOX_CTL,
- .msr_offset = HSWEP_CBO_MSR_OFFSET,
- .num_shared_regs = 1,
- .constraints = bdx_uncore_cbox_constraints,
- .ops = &hswep_uncore_cbox_ops,
- .format_group = &hswep_uncore_cbox_format_group,
-};
-
-static struct intel_uncore_type bdx_uncore_sbox = {
- .name = "sbox",
- .num_counters = 4,
- .num_boxes = 4,
- .perf_ctr_bits = 48,
- .event_ctl = HSWEP_S0_MSR_PMON_CTL0,
- .perf_ctr = HSWEP_S0_MSR_PMON_CTR0,
- .event_mask = HSWEP_S_MSR_PMON_RAW_EVENT_MASK,
- .box_ctl = HSWEP_S0_MSR_PMON_BOX_CTL,
- .msr_offset = HSWEP_SBOX_MSR_OFFSET,
- .ops = &hswep_uncore_sbox_msr_ops,
- .format_group = &hswep_uncore_sbox_format_group,
-};
-
-#define BDX_MSR_UNCORE_SBOX 3
-
-static struct intel_uncore_type *bdx_msr_uncores[] = {
- &bdx_uncore_ubox,
- &bdx_uncore_cbox,
- &hswep_uncore_pcu,
- &bdx_uncore_sbox,
- NULL,
-};
-
-void bdx_uncore_cpu_init(void)
-{
- if (bdx_uncore_cbox.num_boxes > boot_cpu_data.x86_max_cores)
- bdx_uncore_cbox.num_boxes = boot_cpu_data.x86_max_cores;
- uncore_msr_uncores = bdx_msr_uncores;
-
- /* BDX-DE doesn't have SBOX */
- if (boot_cpu_data.x86_model == 86)
- uncore_msr_uncores[BDX_MSR_UNCORE_SBOX] = NULL;
-}
-
-static struct intel_uncore_type bdx_uncore_ha = {
- .name = "ha",
- .num_counters = 4,
- .num_boxes = 2,
- .perf_ctr_bits = 48,
- SNBEP_UNCORE_PCI_COMMON_INIT(),
-};
-
-static struct intel_uncore_type bdx_uncore_imc = {
- .name = "imc",
- .num_counters = 5,
- .num_boxes = 8,
- .perf_ctr_bits = 48,
- .fixed_ctr_bits = 48,
- .fixed_ctr = SNBEP_MC_CHy_PCI_PMON_FIXED_CTR,
- .fixed_ctl = SNBEP_MC_CHy_PCI_PMON_FIXED_CTL,
- .event_descs = hswep_uncore_imc_events,
- SNBEP_UNCORE_PCI_COMMON_INIT(),
-};
-
-static struct intel_uncore_type bdx_uncore_irp = {
- .name = "irp",
- .num_counters = 4,
- .num_boxes = 1,
- .perf_ctr_bits = 48,
- .event_mask = SNBEP_PMON_RAW_EVENT_MASK,
- .box_ctl = SNBEP_PCI_PMON_BOX_CTL,
- .ops = &hswep_uncore_irp_ops,
- .format_group = &snbep_uncore_format_group,
-};
-
-static struct intel_uncore_type bdx_uncore_qpi = {
- .name = "qpi",
- .num_counters = 4,
- .num_boxes = 3,
- .perf_ctr_bits = 48,
- .perf_ctr = SNBEP_PCI_PMON_CTR0,
- .event_ctl = SNBEP_PCI_PMON_CTL0,
- .event_mask = SNBEP_QPI_PCI_PMON_RAW_EVENT_MASK,
- .box_ctl = SNBEP_PCI_PMON_BOX_CTL,
- .num_shared_regs = 1,
- .ops = &snbep_uncore_qpi_ops,
- .format_group = &snbep_uncore_qpi_format_group,
-};
-
-static struct event_constraint bdx_uncore_r2pcie_constraints[] = {
- UNCORE_EVENT_CONSTRAINT(0x10, 0x3),
- UNCORE_EVENT_CONSTRAINT(0x11, 0x3),
- UNCORE_EVENT_CONSTRAINT(0x13, 0x1),
- UNCORE_EVENT_CONSTRAINT(0x23, 0x1),
- UNCORE_EVENT_CONSTRAINT(0x25, 0x1),
- UNCORE_EVENT_CONSTRAINT(0x26, 0x3),
- UNCORE_EVENT_CONSTRAINT(0x28, 0x3),
- UNCORE_EVENT_CONSTRAINT(0x2c, 0x3),
- UNCORE_EVENT_CONSTRAINT(0x2d, 0x3),
- EVENT_CONSTRAINT_END
-};
-
-static struct intel_uncore_type bdx_uncore_r2pcie = {
- .name = "r2pcie",
- .num_counters = 4,
- .num_boxes = 1,
- .perf_ctr_bits = 48,
- .constraints = bdx_uncore_r2pcie_constraints,
- SNBEP_UNCORE_PCI_COMMON_INIT(),
-};
-
-static struct event_constraint bdx_uncore_r3qpi_constraints[] = {
- UNCORE_EVENT_CONSTRAINT(0x01, 0x7),
- UNCORE_EVENT_CONSTRAINT(0x07, 0x7),
- UNCORE_EVENT_CONSTRAINT(0x08, 0x7),
- UNCORE_EVENT_CONSTRAINT(0x09, 0x7),
- UNCORE_EVENT_CONSTRAINT(0x0a, 0x7),
- UNCORE_EVENT_CONSTRAINT(0x0e, 0x7),
- UNCORE_EVENT_CONSTRAINT(0x10, 0x3),
- UNCORE_EVENT_CONSTRAINT(0x11, 0x3),
- UNCORE_EVENT_CONSTRAINT(0x13, 0x1),
- UNCORE_EVENT_CONSTRAINT(0x14, 0x3),
- UNCORE_EVENT_CONSTRAINT(0x15, 0x3),
- UNCORE_EVENT_CONSTRAINT(0x1f, 0x3),
- UNCORE_EVENT_CONSTRAINT(0x20, 0x3),
- UNCORE_EVENT_CONSTRAINT(0x21, 0x3),
- UNCORE_EVENT_CONSTRAINT(0x22, 0x3),
- UNCORE_EVENT_CONSTRAINT(0x23, 0x3),
- UNCORE_EVENT_CONSTRAINT(0x25, 0x3),
- UNCORE_EVENT_CONSTRAINT(0x26, 0x3),
- UNCORE_EVENT_CONSTRAINT(0x28, 0x3),
- UNCORE_EVENT_CONSTRAINT(0x29, 0x3),
- UNCORE_EVENT_CONSTRAINT(0x2c, 0x3),
- UNCORE_EVENT_CONSTRAINT(0x2d, 0x3),
- UNCORE_EVENT_CONSTRAINT(0x2e, 0x3),
- UNCORE_EVENT_CONSTRAINT(0x2f, 0x3),
- UNCORE_EVENT_CONSTRAINT(0x33, 0x3),
- UNCORE_EVENT_CONSTRAINT(0x34, 0x3),
- UNCORE_EVENT_CONSTRAINT(0x36, 0x3),
- UNCORE_EVENT_CONSTRAINT(0x37, 0x3),
- UNCORE_EVENT_CONSTRAINT(0x38, 0x3),
- UNCORE_EVENT_CONSTRAINT(0x39, 0x3),
- EVENT_CONSTRAINT_END
-};
-
-static struct intel_uncore_type bdx_uncore_r3qpi = {
- .name = "r3qpi",
- .num_counters = 3,
- .num_boxes = 3,
- .perf_ctr_bits = 48,
- .constraints = bdx_uncore_r3qpi_constraints,
- SNBEP_UNCORE_PCI_COMMON_INIT(),
-};
-
-enum {
- BDX_PCI_UNCORE_HA,
- BDX_PCI_UNCORE_IMC,
- BDX_PCI_UNCORE_IRP,
- BDX_PCI_UNCORE_QPI,
- BDX_PCI_UNCORE_R2PCIE,
- BDX_PCI_UNCORE_R3QPI,
-};
-
-static struct intel_uncore_type *bdx_pci_uncores[] = {
- [BDX_PCI_UNCORE_HA] = &bdx_uncore_ha,
- [BDX_PCI_UNCORE_IMC] = &bdx_uncore_imc,
- [BDX_PCI_UNCORE_IRP] = &bdx_uncore_irp,
- [BDX_PCI_UNCORE_QPI] = &bdx_uncore_qpi,
- [BDX_PCI_UNCORE_R2PCIE] = &bdx_uncore_r2pcie,
- [BDX_PCI_UNCORE_R3QPI] = &bdx_uncore_r3qpi,
- NULL,
-};
-
-static const struct pci_device_id bdx_uncore_pci_ids[] = {
- { /* Home Agent 0 */
- PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x6f30),
- .driver_data = UNCORE_PCI_DEV_DATA(BDX_PCI_UNCORE_HA, 0),
- },
- { /* Home Agent 1 */
- PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x6f38),
- .driver_data = UNCORE_PCI_DEV_DATA(BDX_PCI_UNCORE_HA, 1),
- },
- { /* MC0 Channel 0 */
- PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x6fb0),
- .driver_data = UNCORE_PCI_DEV_DATA(BDX_PCI_UNCORE_IMC, 0),
- },
- { /* MC0 Channel 1 */
- PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x6fb1),
- .driver_data = UNCORE_PCI_DEV_DATA(BDX_PCI_UNCORE_IMC, 1),
- },
- { /* MC0 Channel 2 */
- PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x6fb4),
- .driver_data = UNCORE_PCI_DEV_DATA(BDX_PCI_UNCORE_IMC, 2),
- },
- { /* MC0 Channel 3 */
- PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x6fb5),
- .driver_data = UNCORE_PCI_DEV_DATA(BDX_PCI_UNCORE_IMC, 3),
- },
- { /* MC1 Channel 0 */
- PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x6fd0),
- .driver_data = UNCORE_PCI_DEV_DATA(BDX_PCI_UNCORE_IMC, 4),
- },
- { /* MC1 Channel 1 */
- PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x6fd1),
- .driver_data = UNCORE_PCI_DEV_DATA(BDX_PCI_UNCORE_IMC, 5),
- },
- { /* MC1 Channel 2 */
- PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x6fd4),
- .driver_data = UNCORE_PCI_DEV_DATA(BDX_PCI_UNCORE_IMC, 6),
- },
- { /* MC1 Channel 3 */
- PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x6fd5),
- .driver_data = UNCORE_PCI_DEV_DATA(BDX_PCI_UNCORE_IMC, 7),
- },
- { /* IRP */
- PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x6f39),
- .driver_data = UNCORE_PCI_DEV_DATA(BDX_PCI_UNCORE_IRP, 0),
- },
- { /* QPI0 Port 0 */
- PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x6f32),
- .driver_data = UNCORE_PCI_DEV_DATA(BDX_PCI_UNCORE_QPI, 0),
- },
- { /* QPI0 Port 1 */
- PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x6f33),
- .driver_data = UNCORE_PCI_DEV_DATA(BDX_PCI_UNCORE_QPI, 1),
- },
- { /* QPI1 Port 2 */
- PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x6f3a),
- .driver_data = UNCORE_PCI_DEV_DATA(BDX_PCI_UNCORE_QPI, 2),
- },
- { /* R2PCIe */
- PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x6f34),
- .driver_data = UNCORE_PCI_DEV_DATA(BDX_PCI_UNCORE_R2PCIE, 0),
- },
- { /* R3QPI0 Link 0 */
- PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x6f36),
- .driver_data = UNCORE_PCI_DEV_DATA(BDX_PCI_UNCORE_R3QPI, 0),
- },
- { /* R3QPI0 Link 1 */
- PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x6f37),
- .driver_data = UNCORE_PCI_DEV_DATA(BDX_PCI_UNCORE_R3QPI, 1),
- },
- { /* R3QPI1 Link 2 */
- PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x6f3e),
- .driver_data = UNCORE_PCI_DEV_DATA(BDX_PCI_UNCORE_R3QPI, 2),
- },
- { /* QPI Port 0 filter */
- PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x6f86),
- .driver_data = UNCORE_PCI_DEV_DATA(UNCORE_EXTRA_PCI_DEV, 0),
- },
- { /* QPI Port 1 filter */
- PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x6f96),
- .driver_data = UNCORE_PCI_DEV_DATA(UNCORE_EXTRA_PCI_DEV, 1),
- },
- { /* QPI Port 2 filter */
- PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x6f46),
- .driver_data = UNCORE_PCI_DEV_DATA(UNCORE_EXTRA_PCI_DEV, 2),
- },
- { /* end: all zeroes */ }
-};
-
-static struct pci_driver bdx_uncore_pci_driver = {
- .name = "bdx_uncore",
- .id_table = bdx_uncore_pci_ids,
-};
-
-int bdx_uncore_pci_init(void)
-{
- int ret = snbep_pci2phy_map_init(0x6f1e);
-
- if (ret)
- return ret;
- uncore_pci_uncores = bdx_pci_uncores;
- uncore_pci_driver = &bdx_uncore_pci_driver;
- return 0;
-}
-
-/* end of BDX uncore support */
diff --git a/arch/x86/kernel/cpu/perf_event_knc.c b/arch/x86/kernel/cpu/perf_event_knc.c
deleted file mode 100644
index b931095e8..000000000
--- a/arch/x86/kernel/cpu/perf_event_knc.c
+++ /dev/null
@@ -1,321 +0,0 @@
-/* Driver for Intel Xeon Phi "Knights Corner" PMU */
-
-#include <linux/perf_event.h>
-#include <linux/types.h>
-
-#include <asm/hardirq.h>
-
-#include "perf_event.h"
-
-static const u64 knc_perfmon_event_map[] =
-{
- [PERF_COUNT_HW_CPU_CYCLES] = 0x002a,
- [PERF_COUNT_HW_INSTRUCTIONS] = 0x0016,
- [PERF_COUNT_HW_CACHE_REFERENCES] = 0x0028,
- [PERF_COUNT_HW_CACHE_MISSES] = 0x0029,
- [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x0012,
- [PERF_COUNT_HW_BRANCH_MISSES] = 0x002b,
-};
-
-static const u64 __initconst knc_hw_cache_event_ids
- [PERF_COUNT_HW_CACHE_MAX]
- [PERF_COUNT_HW_CACHE_OP_MAX]
- [PERF_COUNT_HW_CACHE_RESULT_MAX] =
-{
- [ C(L1D) ] = {
- [ C(OP_READ) ] = {
- /* On Xeon Phi event "0" is a valid DATA_READ */
- /* (L1 Data Cache Reads) Instruction. */
- /* We code this as ARCH_PERFMON_EVENTSEL_INT as this */
- /* bit will always be set in x86_pmu_hw_config(). */
- [ C(RESULT_ACCESS) ] = ARCH_PERFMON_EVENTSEL_INT,
- /* DATA_READ */
- [ C(RESULT_MISS) ] = 0x0003, /* DATA_READ_MISS */
- },
- [ C(OP_WRITE) ] = {
- [ C(RESULT_ACCESS) ] = 0x0001, /* DATA_WRITE */
- [ C(RESULT_MISS) ] = 0x0004, /* DATA_WRITE_MISS */
- },
- [ C(OP_PREFETCH) ] = {
- [ C(RESULT_ACCESS) ] = 0x0011, /* L1_DATA_PF1 */
- [ C(RESULT_MISS) ] = 0x001c, /* L1_DATA_PF1_MISS */
- },
- },
- [ C(L1I ) ] = {
- [ C(OP_READ) ] = {
- [ C(RESULT_ACCESS) ] = 0x000c, /* CODE_READ */
- [ C(RESULT_MISS) ] = 0x000e, /* CODE_CACHE_MISS */
- },
- [ C(OP_WRITE) ] = {
- [ C(RESULT_ACCESS) ] = -1,
- [ C(RESULT_MISS) ] = -1,
- },
- [ C(OP_PREFETCH) ] = {
- [ C(RESULT_ACCESS) ] = 0x0,
- [ C(RESULT_MISS) ] = 0x0,
- },
- },
- [ C(LL ) ] = {
- [ C(OP_READ) ] = {
- [ C(RESULT_ACCESS) ] = 0,
- [ C(RESULT_MISS) ] = 0x10cb, /* L2_READ_MISS */
- },
- [ C(OP_WRITE) ] = {
- [ C(RESULT_ACCESS) ] = 0x10cc, /* L2_WRITE_HIT */
- [ C(RESULT_MISS) ] = 0,
- },
- [ C(OP_PREFETCH) ] = {
- [ C(RESULT_ACCESS) ] = 0x10fc, /* L2_DATA_PF2 */
- [ C(RESULT_MISS) ] = 0x10fe, /* L2_DATA_PF2_MISS */
- },
- },
- [ C(DTLB) ] = {
- [ C(OP_READ) ] = {
- [ C(RESULT_ACCESS) ] = ARCH_PERFMON_EVENTSEL_INT,
- /* DATA_READ */
- /* see note on L1 OP_READ */
- [ C(RESULT_MISS) ] = 0x0002, /* DATA_PAGE_WALK */
- },
- [ C(OP_WRITE) ] = {
- [ C(RESULT_ACCESS) ] = 0x0001, /* DATA_WRITE */
- [ C(RESULT_MISS) ] = 0x0002, /* DATA_PAGE_WALK */
- },
- [ C(OP_PREFETCH) ] = {
- [ C(RESULT_ACCESS) ] = 0x0,
- [ C(RESULT_MISS) ] = 0x0,
- },
- },
- [ C(ITLB) ] = {
- [ C(OP_READ) ] = {
- [ C(RESULT_ACCESS) ] = 0x000c, /* CODE_READ */
- [ C(RESULT_MISS) ] = 0x000d, /* CODE_PAGE_WALK */
- },
- [ C(OP_WRITE) ] = {
- [ C(RESULT_ACCESS) ] = -1,
- [ C(RESULT_MISS) ] = -1,
- },
- [ C(OP_PREFETCH) ] = {
- [ C(RESULT_ACCESS) ] = -1,
- [ C(RESULT_MISS) ] = -1,
- },
- },
- [ C(BPU ) ] = {
- [ C(OP_READ) ] = {
- [ C(RESULT_ACCESS) ] = 0x0012, /* BRANCHES */
- [ C(RESULT_MISS) ] = 0x002b, /* BRANCHES_MISPREDICTED */
- },
- [ C(OP_WRITE) ] = {
- [ C(RESULT_ACCESS) ] = -1,
- [ C(RESULT_MISS) ] = -1,
- },
- [ C(OP_PREFETCH) ] = {
- [ C(RESULT_ACCESS) ] = -1,
- [ C(RESULT_MISS) ] = -1,
- },
- },
-};
-
-
-static u64 knc_pmu_event_map(int hw_event)
-{
- return knc_perfmon_event_map[hw_event];
-}
-
-static struct event_constraint knc_event_constraints[] =
-{
- INTEL_EVENT_CONSTRAINT(0xc3, 0x1), /* HWP_L2HIT */
- INTEL_EVENT_CONSTRAINT(0xc4, 0x1), /* HWP_L2MISS */
- INTEL_EVENT_CONSTRAINT(0xc8, 0x1), /* L2_READ_HIT_E */
- INTEL_EVENT_CONSTRAINT(0xc9, 0x1), /* L2_READ_HIT_M */
- INTEL_EVENT_CONSTRAINT(0xca, 0x1), /* L2_READ_HIT_S */
- INTEL_EVENT_CONSTRAINT(0xcb, 0x1), /* L2_READ_MISS */
- INTEL_EVENT_CONSTRAINT(0xcc, 0x1), /* L2_WRITE_HIT */
- INTEL_EVENT_CONSTRAINT(0xce, 0x1), /* L2_STRONGLY_ORDERED_STREAMING_VSTORES_MISS */
- INTEL_EVENT_CONSTRAINT(0xcf, 0x1), /* L2_WEAKLY_ORDERED_STREAMING_VSTORE_MISS */
- INTEL_EVENT_CONSTRAINT(0xd7, 0x1), /* L2_VICTIM_REQ_WITH_DATA */
- INTEL_EVENT_CONSTRAINT(0xe3, 0x1), /* SNP_HITM_BUNIT */
- INTEL_EVENT_CONSTRAINT(0xe6, 0x1), /* SNP_HIT_L2 */
- INTEL_EVENT_CONSTRAINT(0xe7, 0x1), /* SNP_HITM_L2 */
- INTEL_EVENT_CONSTRAINT(0xf1, 0x1), /* L2_DATA_READ_MISS_CACHE_FILL */
- INTEL_EVENT_CONSTRAINT(0xf2, 0x1), /* L2_DATA_WRITE_MISS_CACHE_FILL */
- INTEL_EVENT_CONSTRAINT(0xf6, 0x1), /* L2_DATA_READ_MISS_MEM_FILL */
- INTEL_EVENT_CONSTRAINT(0xf7, 0x1), /* L2_DATA_WRITE_MISS_MEM_FILL */
- INTEL_EVENT_CONSTRAINT(0xfc, 0x1), /* L2_DATA_PF2 */
- INTEL_EVENT_CONSTRAINT(0xfd, 0x1), /* L2_DATA_PF2_DROP */
- INTEL_EVENT_CONSTRAINT(0xfe, 0x1), /* L2_DATA_PF2_MISS */
- INTEL_EVENT_CONSTRAINT(0xff, 0x1), /* L2_DATA_HIT_INFLIGHT_PF2 */
- EVENT_CONSTRAINT_END
-};
-
-#define MSR_KNC_IA32_PERF_GLOBAL_STATUS 0x0000002d
-#define MSR_KNC_IA32_PERF_GLOBAL_OVF_CONTROL 0x0000002e
-#define MSR_KNC_IA32_PERF_GLOBAL_CTRL 0x0000002f
-
-#define KNC_ENABLE_COUNTER0 0x00000001
-#define KNC_ENABLE_COUNTER1 0x00000002
-
-static void knc_pmu_disable_all(void)
-{
- u64 val;
-
- rdmsrl(MSR_KNC_IA32_PERF_GLOBAL_CTRL, val);
- val &= ~(KNC_ENABLE_COUNTER0|KNC_ENABLE_COUNTER1);
- wrmsrl(MSR_KNC_IA32_PERF_GLOBAL_CTRL, val);
-}
-
-static void knc_pmu_enable_all(int added)
-{
- u64 val;
-
- rdmsrl(MSR_KNC_IA32_PERF_GLOBAL_CTRL, val);
- val |= (KNC_ENABLE_COUNTER0|KNC_ENABLE_COUNTER1);
- wrmsrl(MSR_KNC_IA32_PERF_GLOBAL_CTRL, val);
-}
-
-static inline void
-knc_pmu_disable_event(struct perf_event *event)
-{
- struct hw_perf_event *hwc = &event->hw;
- u64 val;
-
- val = hwc->config;
- val &= ~ARCH_PERFMON_EVENTSEL_ENABLE;
-
- (void)wrmsrl_safe(hwc->config_base + hwc->idx, val);
-}
-
-static void knc_pmu_enable_event(struct perf_event *event)
-{
- struct hw_perf_event *hwc = &event->hw;
- u64 val;
-
- val = hwc->config;
- val |= ARCH_PERFMON_EVENTSEL_ENABLE;
-
- (void)wrmsrl_safe(hwc->config_base + hwc->idx, val);
-}
-
-static inline u64 knc_pmu_get_status(void)
-{
- u64 status;
-
- rdmsrl(MSR_KNC_IA32_PERF_GLOBAL_STATUS, status);
-
- return status;
-}
-
-static inline void knc_pmu_ack_status(u64 ack)
-{
- wrmsrl(MSR_KNC_IA32_PERF_GLOBAL_OVF_CONTROL, ack);
-}
-
-static int knc_pmu_handle_irq(struct pt_regs *regs)
-{
- struct perf_sample_data data;
- struct cpu_hw_events *cpuc;
- int handled = 0;
- int bit, loops;
- u64 status;
-
- cpuc = this_cpu_ptr(&cpu_hw_events);
-
- knc_pmu_disable_all();
-
- status = knc_pmu_get_status();
- if (!status) {
- knc_pmu_enable_all(0);
- return handled;
- }
-
- loops = 0;
-again:
- knc_pmu_ack_status(status);
- if (++loops > 100) {
- WARN_ONCE(1, "perf: irq loop stuck!\n");
- perf_event_print_debug();
- goto done;
- }
-
- inc_irq_stat(apic_perf_irqs);
-
- for_each_set_bit(bit, (unsigned long *)&status, X86_PMC_IDX_MAX) {
- struct perf_event *event = cpuc->events[bit];
-
- handled++;
-
- if (!test_bit(bit, cpuc->active_mask))
- continue;
-
- if (!intel_pmu_save_and_restart(event))
- continue;
-
- perf_sample_data_init(&data, 0, event->hw.last_period);
-
- if (perf_event_overflow(event, &data, regs))
- x86_pmu_stop(event, 0);
- }
-
- /*
- * Repeat if there is more work to be done:
- */
- status = knc_pmu_get_status();
- if (status)
- goto again;
-
-done:
- /* Only restore PMU state when it's active. See x86_pmu_disable(). */
- if (cpuc->enabled)
- knc_pmu_enable_all(0);
-
- return handled;
-}
-
-
-PMU_FORMAT_ATTR(event, "config:0-7" );
-PMU_FORMAT_ATTR(umask, "config:8-15" );
-PMU_FORMAT_ATTR(edge, "config:18" );
-PMU_FORMAT_ATTR(inv, "config:23" );
-PMU_FORMAT_ATTR(cmask, "config:24-31" );
-
-static struct attribute *intel_knc_formats_attr[] = {
- &format_attr_event.attr,
- &format_attr_umask.attr,
- &format_attr_edge.attr,
- &format_attr_inv.attr,
- &format_attr_cmask.attr,
- NULL,
-};
-
-static const struct x86_pmu knc_pmu __initconst = {
- .name = "knc",
- .handle_irq = knc_pmu_handle_irq,
- .disable_all = knc_pmu_disable_all,
- .enable_all = knc_pmu_enable_all,
- .enable = knc_pmu_enable_event,
- .disable = knc_pmu_disable_event,
- .hw_config = x86_pmu_hw_config,
- .schedule_events = x86_schedule_events,
- .eventsel = MSR_KNC_EVNTSEL0,
- .perfctr = MSR_KNC_PERFCTR0,
- .event_map = knc_pmu_event_map,
- .max_events = ARRAY_SIZE(knc_perfmon_event_map),
- .apic = 1,
- .max_period = (1ULL << 39) - 1,
- .version = 0,
- .num_counters = 2,
- .cntval_bits = 40,
- .cntval_mask = (1ULL << 40) - 1,
- .get_event_constraints = x86_get_event_constraints,
- .event_constraints = knc_event_constraints,
- .format_attrs = intel_knc_formats_attr,
-};
-
-__init int knc_pmu_init(void)
-{
- x86_pmu = knc_pmu;
-
- memcpy(hw_cache_event_ids, knc_hw_cache_event_ids,
- sizeof(hw_cache_event_ids));
-
- return 0;
-}
diff --git a/arch/x86/kernel/cpu/perf_event_msr.c b/arch/x86/kernel/cpu/perf_event_msr.c
deleted file mode 100644
index ec863b9a9..000000000
--- a/arch/x86/kernel/cpu/perf_event_msr.c
+++ /dev/null
@@ -1,241 +0,0 @@
-#include <linux/perf_event.h>
-
-enum perf_msr_id {
- PERF_MSR_TSC = 0,
- PERF_MSR_APERF = 1,
- PERF_MSR_MPERF = 2,
- PERF_MSR_PPERF = 3,
- PERF_MSR_SMI = 4,
-
- PERF_MSR_EVENT_MAX,
-};
-
-static bool test_aperfmperf(int idx)
-{
- return boot_cpu_has(X86_FEATURE_APERFMPERF);
-}
-
-static bool test_intel(int idx)
-{
- if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL ||
- boot_cpu_data.x86 != 6)
- return false;
-
- switch (boot_cpu_data.x86_model) {
- case 30: /* 45nm Nehalem */
- case 26: /* 45nm Nehalem-EP */
- case 46: /* 45nm Nehalem-EX */
-
- case 37: /* 32nm Westmere */
- case 44: /* 32nm Westmere-EP */
- case 47: /* 32nm Westmere-EX */
-
- case 42: /* 32nm SandyBridge */
- case 45: /* 32nm SandyBridge-E/EN/EP */
-
- case 58: /* 22nm IvyBridge */
- case 62: /* 22nm IvyBridge-EP/EX */
-
- case 60: /* 22nm Haswell Core */
- case 63: /* 22nm Haswell Server */
- case 69: /* 22nm Haswell ULT */
- case 70: /* 22nm Haswell + GT3e (Intel Iris Pro graphics) */
-
- case 61: /* 14nm Broadwell Core-M */
- case 86: /* 14nm Broadwell Xeon D */
- case 71: /* 14nm Broadwell + GT3e (Intel Iris Pro graphics) */
- case 79: /* 14nm Broadwell Server */
-
- case 55: /* 22nm Atom "Silvermont" */
- case 77: /* 22nm Atom "Silvermont Avoton/Rangely" */
- case 76: /* 14nm Atom "Airmont" */
- if (idx == PERF_MSR_SMI)
- return true;
- break;
-
- case 78: /* 14nm Skylake Mobile */
- case 94: /* 14nm Skylake Desktop */
- if (idx == PERF_MSR_SMI || idx == PERF_MSR_PPERF)
- return true;
- break;
- }
-
- return false;
-}
-
-struct perf_msr {
- u64 msr;
- struct perf_pmu_events_attr *attr;
- bool (*test)(int idx);
-};
-
-PMU_EVENT_ATTR_STRING(tsc, evattr_tsc, "event=0x00");
-PMU_EVENT_ATTR_STRING(aperf, evattr_aperf, "event=0x01");
-PMU_EVENT_ATTR_STRING(mperf, evattr_mperf, "event=0x02");
-PMU_EVENT_ATTR_STRING(pperf, evattr_pperf, "event=0x03");
-PMU_EVENT_ATTR_STRING(smi, evattr_smi, "event=0x04");
-
-static struct perf_msr msr[] = {
- [PERF_MSR_TSC] = { 0, &evattr_tsc, NULL, },
- [PERF_MSR_APERF] = { MSR_IA32_APERF, &evattr_aperf, test_aperfmperf, },
- [PERF_MSR_MPERF] = { MSR_IA32_MPERF, &evattr_mperf, test_aperfmperf, },
- [PERF_MSR_PPERF] = { MSR_PPERF, &evattr_pperf, test_intel, },
- [PERF_MSR_SMI] = { MSR_SMI_COUNT, &evattr_smi, test_intel, },
-};
-
-static struct attribute *events_attrs[PERF_MSR_EVENT_MAX + 1] = {
- NULL,
-};
-
-static struct attribute_group events_attr_group = {
- .name = "events",
- .attrs = events_attrs,
-};
-
-PMU_FORMAT_ATTR(event, "config:0-63");
-static struct attribute *format_attrs[] = {
- &format_attr_event.attr,
- NULL,
-};
-static struct attribute_group format_attr_group = {
- .name = "format",
- .attrs = format_attrs,
-};
-
-static const struct attribute_group *attr_groups[] = {
- &events_attr_group,
- &format_attr_group,
- NULL,
-};
-
-static int msr_event_init(struct perf_event *event)
-{
- u64 cfg = event->attr.config;
-
- if (event->attr.type != event->pmu->type)
- return -ENOENT;
-
- if (cfg >= PERF_MSR_EVENT_MAX)
- return -EINVAL;
-
- /* unsupported modes and filters */
- if (event->attr.exclude_user ||
- event->attr.exclude_kernel ||
- event->attr.exclude_hv ||
- event->attr.exclude_idle ||
- event->attr.exclude_host ||
- event->attr.exclude_guest ||
- event->attr.sample_period) /* no sampling */
- return -EINVAL;
-
- if (!msr[cfg].attr)
- return -EINVAL;
-
- event->hw.idx = -1;
- event->hw.event_base = msr[cfg].msr;
- event->hw.config = cfg;
-
- return 0;
-}
-
-static inline u64 msr_read_counter(struct perf_event *event)
-{
- u64 now;
-
- if (event->hw.event_base)
- rdmsrl(event->hw.event_base, now);
- else
- rdtscll(now);
-
- return now;
-}
-static void msr_event_update(struct perf_event *event)
-{
- u64 prev, now;
- s64 delta;
-
- /* Careful, an NMI might modify the previous event value. */
-again:
- prev = local64_read(&event->hw.prev_count);
- now = msr_read_counter(event);
-
- if (local64_cmpxchg(&event->hw.prev_count, prev, now) != prev)
- goto again;
-
- delta = now - prev;
- if (unlikely(event->hw.event_base == MSR_SMI_COUNT))
- delta = sign_extend64(delta, 31);
-
- local64_add(now - prev, &event->count);
-}
-
-static void msr_event_start(struct perf_event *event, int flags)
-{
- u64 now;
-
- now = msr_read_counter(event);
- local64_set(&event->hw.prev_count, now);
-}
-
-static void msr_event_stop(struct perf_event *event, int flags)
-{
- msr_event_update(event);
-}
-
-static void msr_event_del(struct perf_event *event, int flags)
-{
- msr_event_stop(event, PERF_EF_UPDATE);
-}
-
-static int msr_event_add(struct perf_event *event, int flags)
-{
- if (flags & PERF_EF_START)
- msr_event_start(event, flags);
-
- return 0;
-}
-
-static struct pmu pmu_msr = {
- .task_ctx_nr = perf_sw_context,
- .attr_groups = attr_groups,
- .event_init = msr_event_init,
- .add = msr_event_add,
- .del = msr_event_del,
- .start = msr_event_start,
- .stop = msr_event_stop,
- .read = msr_event_update,
- .capabilities = PERF_PMU_CAP_NO_INTERRUPT,
-};
-
-static int __init msr_init(void)
-{
- int i, j = 0;
-
- if (!boot_cpu_has(X86_FEATURE_TSC)) {
- pr_cont("no MSR PMU driver.\n");
- return 0;
- }
-
- /* Probe the MSRs. */
- for (i = PERF_MSR_TSC + 1; i < PERF_MSR_EVENT_MAX; i++) {
- u64 val;
-
- /*
- * Virt sucks arse; you cannot tell if a R/O MSR is present :/
- */
- if (!msr[i].test(i) || rdmsrl_safe(msr[i].msr, &val))
- msr[i].attr = NULL;
- }
-
- /* List remaining MSRs in the sysfs attrs. */
- for (i = 0; i < PERF_MSR_EVENT_MAX; i++) {
- if (msr[i].attr)
- events_attrs[j++] = &msr[i].attr->attr.attr;
- }
- events_attrs[j] = NULL;
-
- perf_pmu_register(&pmu_msr, "msr", -1);
-
- return 0;
-}
-device_initcall(msr_init);
diff --git a/arch/x86/kernel/cpu/perf_event_p4.c b/arch/x86/kernel/cpu/perf_event_p4.c
deleted file mode 100644
index f2e56783a..000000000
--- a/arch/x86/kernel/cpu/perf_event_p4.c
+++ /dev/null
@@ -1,1376 +0,0 @@
-/*
- * Netburst Performance Events (P4, old Xeon)
- *
- * Copyright (C) 2010 Parallels, Inc., Cyrill Gorcunov <gorcunov@openvz.org>
- * Copyright (C) 2010 Intel Corporation, Lin Ming <ming.m.lin@intel.com>
- *
- * For licencing details see kernel-base/COPYING
- */
-
-#include <linux/perf_event.h>
-
-#include <asm/perf_event_p4.h>
-#include <asm/hardirq.h>
-#include <asm/apic.h>
-
-#include "perf_event.h"
-
-#define P4_CNTR_LIMIT 3
-/*
- * array indices: 0,1 - HT threads, used with HT enabled cpu
- */
-struct p4_event_bind {
- unsigned int opcode; /* Event code and ESCR selector */
- unsigned int escr_msr[2]; /* ESCR MSR for this event */
- unsigned int escr_emask; /* valid ESCR EventMask bits */
- unsigned int shared; /* event is shared across threads */
- char cntr[2][P4_CNTR_LIMIT]; /* counter index (offset), -1 on abscence */
-};
-
-struct p4_pebs_bind {
- unsigned int metric_pebs;
- unsigned int metric_vert;
-};
-
-/* it sets P4_PEBS_ENABLE_UOP_TAG as well */
-#define P4_GEN_PEBS_BIND(name, pebs, vert) \
- [P4_PEBS_METRIC__##name] = { \
- .metric_pebs = pebs | P4_PEBS_ENABLE_UOP_TAG, \
- .metric_vert = vert, \
- }
-
-/*
- * note we have P4_PEBS_ENABLE_UOP_TAG always set here
- *
- * it's needed for mapping P4_PEBS_CONFIG_METRIC_MASK bits of
- * event configuration to find out which values are to be
- * written into MSR_IA32_PEBS_ENABLE and MSR_P4_PEBS_MATRIX_VERT
- * resgisters
- */
-static struct p4_pebs_bind p4_pebs_bind_map[] = {
- P4_GEN_PEBS_BIND(1stl_cache_load_miss_retired, 0x0000001, 0x0000001),
- P4_GEN_PEBS_BIND(2ndl_cache_load_miss_retired, 0x0000002, 0x0000001),
- P4_GEN_PEBS_BIND(dtlb_load_miss_retired, 0x0000004, 0x0000001),
- P4_GEN_PEBS_BIND(dtlb_store_miss_retired, 0x0000004, 0x0000002),
- P4_GEN_PEBS_BIND(dtlb_all_miss_retired, 0x0000004, 0x0000003),
- P4_GEN_PEBS_BIND(tagged_mispred_branch, 0x0018000, 0x0000010),
- P4_GEN_PEBS_BIND(mob_load_replay_retired, 0x0000200, 0x0000001),
- P4_GEN_PEBS_BIND(split_load_retired, 0x0000400, 0x0000001),
- P4_GEN_PEBS_BIND(split_store_retired, 0x0000400, 0x0000002),
-};
-
-/*
- * Note that we don't use CCCR1 here, there is an
- * exception for P4_BSQ_ALLOCATION but we just have
- * no workaround
- *
- * consider this binding as resources which particular
- * event may borrow, it doesn't contain EventMask,
- * Tags and friends -- they are left to a caller
- */
-static struct p4_event_bind p4_event_bind_map[] = {
- [P4_EVENT_TC_DELIVER_MODE] = {
- .opcode = P4_OPCODE(P4_EVENT_TC_DELIVER_MODE),
- .escr_msr = { MSR_P4_TC_ESCR0, MSR_P4_TC_ESCR1 },
- .escr_emask =
- P4_ESCR_EMASK_BIT(P4_EVENT_TC_DELIVER_MODE, DD) |
- P4_ESCR_EMASK_BIT(P4_EVENT_TC_DELIVER_MODE, DB) |
- P4_ESCR_EMASK_BIT(P4_EVENT_TC_DELIVER_MODE, DI) |
- P4_ESCR_EMASK_BIT(P4_EVENT_TC_DELIVER_MODE, BD) |
- P4_ESCR_EMASK_BIT(P4_EVENT_TC_DELIVER_MODE, BB) |
- P4_ESCR_EMASK_BIT(P4_EVENT_TC_DELIVER_MODE, BI) |
- P4_ESCR_EMASK_BIT(P4_EVENT_TC_DELIVER_MODE, ID),
- .shared = 1,
- .cntr = { {4, 5, -1}, {6, 7, -1} },
- },
- [P4_EVENT_BPU_FETCH_REQUEST] = {
- .opcode = P4_OPCODE(P4_EVENT_BPU_FETCH_REQUEST),
- .escr_msr = { MSR_P4_BPU_ESCR0, MSR_P4_BPU_ESCR1 },
- .escr_emask =
- P4_ESCR_EMASK_BIT(P4_EVENT_BPU_FETCH_REQUEST, TCMISS),
- .cntr = { {0, -1, -1}, {2, -1, -1} },
- },
- [P4_EVENT_ITLB_REFERENCE] = {
- .opcode = P4_OPCODE(P4_EVENT_ITLB_REFERENCE),
- .escr_msr = { MSR_P4_ITLB_ESCR0, MSR_P4_ITLB_ESCR1 },
- .escr_emask =
- P4_ESCR_EMASK_BIT(P4_EVENT_ITLB_REFERENCE, HIT) |
- P4_ESCR_EMASK_BIT(P4_EVENT_ITLB_REFERENCE, MISS) |
- P4_ESCR_EMASK_BIT(P4_EVENT_ITLB_REFERENCE, HIT_UK),
- .cntr = { {0, -1, -1}, {2, -1, -1} },
- },
- [P4_EVENT_MEMORY_CANCEL] = {
- .opcode = P4_OPCODE(P4_EVENT_MEMORY_CANCEL),
- .escr_msr = { MSR_P4_DAC_ESCR0, MSR_P4_DAC_ESCR1 },
- .escr_emask =
- P4_ESCR_EMASK_BIT(P4_EVENT_MEMORY_CANCEL, ST_RB_FULL) |
- P4_ESCR_EMASK_BIT(P4_EVENT_MEMORY_CANCEL, 64K_CONF),
- .cntr = { {8, 9, -1}, {10, 11, -1} },
- },
- [P4_EVENT_MEMORY_COMPLETE] = {
- .opcode = P4_OPCODE(P4_EVENT_MEMORY_COMPLETE),
- .escr_msr = { MSR_P4_SAAT_ESCR0 , MSR_P4_SAAT_ESCR1 },
- .escr_emask =
- P4_ESCR_EMASK_BIT(P4_EVENT_MEMORY_COMPLETE, LSC) |
- P4_ESCR_EMASK_BIT(P4_EVENT_MEMORY_COMPLETE, SSC),
- .cntr = { {8, 9, -1}, {10, 11, -1} },
- },
- [P4_EVENT_LOAD_PORT_REPLAY] = {
- .opcode = P4_OPCODE(P4_EVENT_LOAD_PORT_REPLAY),
- .escr_msr = { MSR_P4_SAAT_ESCR0, MSR_P4_SAAT_ESCR1 },
- .escr_emask =
- P4_ESCR_EMASK_BIT(P4_EVENT_LOAD_PORT_REPLAY, SPLIT_LD),
- .cntr = { {8, 9, -1}, {10, 11, -1} },
- },
- [P4_EVENT_STORE_PORT_REPLAY] = {
- .opcode = P4_OPCODE(P4_EVENT_STORE_PORT_REPLAY),
- .escr_msr = { MSR_P4_SAAT_ESCR0 , MSR_P4_SAAT_ESCR1 },
- .escr_emask =
- P4_ESCR_EMASK_BIT(P4_EVENT_STORE_PORT_REPLAY, SPLIT_ST),
- .cntr = { {8, 9, -1}, {10, 11, -1} },
- },
- [P4_EVENT_MOB_LOAD_REPLAY] = {
- .opcode = P4_OPCODE(P4_EVENT_MOB_LOAD_REPLAY),
- .escr_msr = { MSR_P4_MOB_ESCR0, MSR_P4_MOB_ESCR1 },
- .escr_emask =
- P4_ESCR_EMASK_BIT(P4_EVENT_MOB_LOAD_REPLAY, NO_STA) |
- P4_ESCR_EMASK_BIT(P4_EVENT_MOB_LOAD_REPLAY, NO_STD) |
- P4_ESCR_EMASK_BIT(P4_EVENT_MOB_LOAD_REPLAY, PARTIAL_DATA) |
- P4_ESCR_EMASK_BIT(P4_EVENT_MOB_LOAD_REPLAY, UNALGN_ADDR),
- .cntr = { {0, -1, -1}, {2, -1, -1} },
- },
- [P4_EVENT_PAGE_WALK_TYPE] = {
- .opcode = P4_OPCODE(P4_EVENT_PAGE_WALK_TYPE),
- .escr_msr = { MSR_P4_PMH_ESCR0, MSR_P4_PMH_ESCR1 },
- .escr_emask =
- P4_ESCR_EMASK_BIT(P4_EVENT_PAGE_WALK_TYPE, DTMISS) |
- P4_ESCR_EMASK_BIT(P4_EVENT_PAGE_WALK_TYPE, ITMISS),
- .shared = 1,
- .cntr = { {0, -1, -1}, {2, -1, -1} },
- },
- [P4_EVENT_BSQ_CACHE_REFERENCE] = {
- .opcode = P4_OPCODE(P4_EVENT_BSQ_CACHE_REFERENCE),
- .escr_msr = { MSR_P4_BSU_ESCR0, MSR_P4_BSU_ESCR1 },
- .escr_emask =
- P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_2ndL_HITS) |
- P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_2ndL_HITE) |
- P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_2ndL_HITM) |
- P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_3rdL_HITS) |
- P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_3rdL_HITE) |
- P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_3rdL_HITM) |
- P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_2ndL_MISS) |
- P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_3rdL_MISS) |
- P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, WR_2ndL_MISS),
- .cntr = { {0, -1, -1}, {2, -1, -1} },
- },
- [P4_EVENT_IOQ_ALLOCATION] = {
- .opcode = P4_OPCODE(P4_EVENT_IOQ_ALLOCATION),
- .escr_msr = { MSR_P4_FSB_ESCR0, MSR_P4_FSB_ESCR1 },
- .escr_emask =
- P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ALLOCATION, DEFAULT) |
- P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ALLOCATION, ALL_READ) |
- P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ALLOCATION, ALL_WRITE) |
- P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ALLOCATION, MEM_UC) |
- P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ALLOCATION, MEM_WC) |
- P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ALLOCATION, MEM_WT) |
- P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ALLOCATION, MEM_WP) |
- P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ALLOCATION, MEM_WB) |
- P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ALLOCATION, OWN) |
- P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ALLOCATION, OTHER) |
- P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ALLOCATION, PREFETCH),
- .cntr = { {0, -1, -1}, {2, -1, -1} },
- },
- [P4_EVENT_IOQ_ACTIVE_ENTRIES] = { /* shared ESCR */
- .opcode = P4_OPCODE(P4_EVENT_IOQ_ACTIVE_ENTRIES),
- .escr_msr = { MSR_P4_FSB_ESCR1, MSR_P4_FSB_ESCR1 },
- .escr_emask =
- P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ACTIVE_ENTRIES, DEFAULT) |
- P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ACTIVE_ENTRIES, ALL_READ) |
- P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ACTIVE_ENTRIES, ALL_WRITE) |
- P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ACTIVE_ENTRIES, MEM_UC) |
- P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ACTIVE_ENTRIES, MEM_WC) |
- P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ACTIVE_ENTRIES, MEM_WT) |
- P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ACTIVE_ENTRIES, MEM_WP) |
- P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ACTIVE_ENTRIES, MEM_WB) |
- P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ACTIVE_ENTRIES, OWN) |
- P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ACTIVE_ENTRIES, OTHER) |
- P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ACTIVE_ENTRIES, PREFETCH),
- .cntr = { {2, -1, -1}, {3, -1, -1} },
- },
- [P4_EVENT_FSB_DATA_ACTIVITY] = {
- .opcode = P4_OPCODE(P4_EVENT_FSB_DATA_ACTIVITY),
- .escr_msr = { MSR_P4_FSB_ESCR0, MSR_P4_FSB_ESCR1 },
- .escr_emask =
- P4_ESCR_EMASK_BIT(P4_EVENT_FSB_DATA_ACTIVITY, DRDY_DRV) |
- P4_ESCR_EMASK_BIT(P4_EVENT_FSB_DATA_ACTIVITY, DRDY_OWN) |
- P4_ESCR_EMASK_BIT(P4_EVENT_FSB_DATA_ACTIVITY, DRDY_OTHER) |
- P4_ESCR_EMASK_BIT(P4_EVENT_FSB_DATA_ACTIVITY, DBSY_DRV) |
- P4_ESCR_EMASK_BIT(P4_EVENT_FSB_DATA_ACTIVITY, DBSY_OWN) |
- P4_ESCR_EMASK_BIT(P4_EVENT_FSB_DATA_ACTIVITY, DBSY_OTHER),
- .shared = 1,
- .cntr = { {0, -1, -1}, {2, -1, -1} },
- },
- [P4_EVENT_BSQ_ALLOCATION] = { /* shared ESCR, broken CCCR1 */
- .opcode = P4_OPCODE(P4_EVENT_BSQ_ALLOCATION),
- .escr_msr = { MSR_P4_BSU_ESCR0, MSR_P4_BSU_ESCR0 },
- .escr_emask =
- P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ALLOCATION, REQ_TYPE0) |
- P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ALLOCATION, REQ_TYPE1) |
- P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ALLOCATION, REQ_LEN0) |
- P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ALLOCATION, REQ_LEN1) |
- P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ALLOCATION, REQ_IO_TYPE) |
- P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ALLOCATION, REQ_LOCK_TYPE) |
- P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ALLOCATION, REQ_CACHE_TYPE) |
- P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ALLOCATION, REQ_SPLIT_TYPE) |
- P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ALLOCATION, REQ_DEM_TYPE) |
- P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ALLOCATION, REQ_ORD_TYPE) |
- P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ALLOCATION, MEM_TYPE0) |
- P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ALLOCATION, MEM_TYPE1) |
- P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ALLOCATION, MEM_TYPE2),
- .cntr = { {0, -1, -1}, {1, -1, -1} },
- },
- [P4_EVENT_BSQ_ACTIVE_ENTRIES] = { /* shared ESCR */
- .opcode = P4_OPCODE(P4_EVENT_BSQ_ACTIVE_ENTRIES),
- .escr_msr = { MSR_P4_BSU_ESCR1 , MSR_P4_BSU_ESCR1 },
- .escr_emask =
- P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ACTIVE_ENTRIES, REQ_TYPE0) |
- P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ACTIVE_ENTRIES, REQ_TYPE1) |
- P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ACTIVE_ENTRIES, REQ_LEN0) |
- P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ACTIVE_ENTRIES, REQ_LEN1) |
- P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ACTIVE_ENTRIES, REQ_IO_TYPE) |
- P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ACTIVE_ENTRIES, REQ_LOCK_TYPE) |
- P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ACTIVE_ENTRIES, REQ_CACHE_TYPE) |
- P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ACTIVE_ENTRIES, REQ_SPLIT_TYPE) |
- P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ACTIVE_ENTRIES, REQ_DEM_TYPE) |
- P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ACTIVE_ENTRIES, REQ_ORD_TYPE) |
- P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ACTIVE_ENTRIES, MEM_TYPE0) |
- P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ACTIVE_ENTRIES, MEM_TYPE1) |
- P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ACTIVE_ENTRIES, MEM_TYPE2),
- .cntr = { {2, -1, -1}, {3, -1, -1} },
- },
- [P4_EVENT_SSE_INPUT_ASSIST] = {
- .opcode = P4_OPCODE(P4_EVENT_SSE_INPUT_ASSIST),
- .escr_msr = { MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1 },
- .escr_emask =
- P4_ESCR_EMASK_BIT(P4_EVENT_SSE_INPUT_ASSIST, ALL),
- .shared = 1,
- .cntr = { {8, 9, -1}, {10, 11, -1} },
- },
- [P4_EVENT_PACKED_SP_UOP] = {
- .opcode = P4_OPCODE(P4_EVENT_PACKED_SP_UOP),
- .escr_msr = { MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1 },
- .escr_emask =
- P4_ESCR_EMASK_BIT(P4_EVENT_PACKED_SP_UOP, ALL),
- .shared = 1,
- .cntr = { {8, 9, -1}, {10, 11, -1} },
- },
- [P4_EVENT_PACKED_DP_UOP] = {
- .opcode = P4_OPCODE(P4_EVENT_PACKED_DP_UOP),
- .escr_msr = { MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1 },
- .escr_emask =
- P4_ESCR_EMASK_BIT(P4_EVENT_PACKED_DP_UOP, ALL),
- .shared = 1,
- .cntr = { {8, 9, -1}, {10, 11, -1} },
- },
- [P4_EVENT_SCALAR_SP_UOP] = {
- .opcode = P4_OPCODE(P4_EVENT_SCALAR_SP_UOP),
- .escr_msr = { MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1 },
- .escr_emask =
- P4_ESCR_EMASK_BIT(P4_EVENT_SCALAR_SP_UOP, ALL),
- .shared = 1,
- .cntr = { {8, 9, -1}, {10, 11, -1} },
- },
- [P4_EVENT_SCALAR_DP_UOP] = {
- .opcode = P4_OPCODE(P4_EVENT_SCALAR_DP_UOP),
- .escr_msr = { MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1 },
- .escr_emask =
- P4_ESCR_EMASK_BIT(P4_EVENT_SCALAR_DP_UOP, ALL),
- .shared = 1,
- .cntr = { {8, 9, -1}, {10, 11, -1} },
- },
- [P4_EVENT_64BIT_MMX_UOP] = {
- .opcode = P4_OPCODE(P4_EVENT_64BIT_MMX_UOP),
- .escr_msr = { MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1 },
- .escr_emask =
- P4_ESCR_EMASK_BIT(P4_EVENT_64BIT_MMX_UOP, ALL),
- .shared = 1,
- .cntr = { {8, 9, -1}, {10, 11, -1} },
- },
- [P4_EVENT_128BIT_MMX_UOP] = {
- .opcode = P4_OPCODE(P4_EVENT_128BIT_MMX_UOP),
- .escr_msr = { MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1 },
- .escr_emask =
- P4_ESCR_EMASK_BIT(P4_EVENT_128BIT_MMX_UOP, ALL),
- .shared = 1,
- .cntr = { {8, 9, -1}, {10, 11, -1} },
- },
- [P4_EVENT_X87_FP_UOP] = {
- .opcode = P4_OPCODE(P4_EVENT_X87_FP_UOP),
- .escr_msr = { MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1 },
- .escr_emask =
- P4_ESCR_EMASK_BIT(P4_EVENT_X87_FP_UOP, ALL),
- .shared = 1,
- .cntr = { {8, 9, -1}, {10, 11, -1} },
- },
- [P4_EVENT_TC_MISC] = {
- .opcode = P4_OPCODE(P4_EVENT_TC_MISC),
- .escr_msr = { MSR_P4_TC_ESCR0, MSR_P4_TC_ESCR1 },
- .escr_emask =
- P4_ESCR_EMASK_BIT(P4_EVENT_TC_MISC, FLUSH),
- .cntr = { {4, 5, -1}, {6, 7, -1} },
- },
- [P4_EVENT_GLOBAL_POWER_EVENTS] = {
- .opcode = P4_OPCODE(P4_EVENT_GLOBAL_POWER_EVENTS),
- .escr_msr = { MSR_P4_FSB_ESCR0, MSR_P4_FSB_ESCR1 },
- .escr_emask =
- P4_ESCR_EMASK_BIT(P4_EVENT_GLOBAL_POWER_EVENTS, RUNNING),
- .cntr = { {0, -1, -1}, {2, -1, -1} },
- },
- [P4_EVENT_TC_MS_XFER] = {
- .opcode = P4_OPCODE(P4_EVENT_TC_MS_XFER),
- .escr_msr = { MSR_P4_MS_ESCR0, MSR_P4_MS_ESCR1 },
- .escr_emask =
- P4_ESCR_EMASK_BIT(P4_EVENT_TC_MS_XFER, CISC),
- .cntr = { {4, 5, -1}, {6, 7, -1} },
- },
- [P4_EVENT_UOP_QUEUE_WRITES] = {
- .opcode = P4_OPCODE(P4_EVENT_UOP_QUEUE_WRITES),
- .escr_msr = { MSR_P4_MS_ESCR0, MSR_P4_MS_ESCR1 },
- .escr_emask =
- P4_ESCR_EMASK_BIT(P4_EVENT_UOP_QUEUE_WRITES, FROM_TC_BUILD) |
- P4_ESCR_EMASK_BIT(P4_EVENT_UOP_QUEUE_WRITES, FROM_TC_DELIVER) |
- P4_ESCR_EMASK_BIT(P4_EVENT_UOP_QUEUE_WRITES, FROM_ROM),
- .cntr = { {4, 5, -1}, {6, 7, -1} },
- },
- [P4_EVENT_RETIRED_MISPRED_BRANCH_TYPE] = {
- .opcode = P4_OPCODE(P4_EVENT_RETIRED_MISPRED_BRANCH_TYPE),
- .escr_msr = { MSR_P4_TBPU_ESCR0 , MSR_P4_TBPU_ESCR0 },
- .escr_emask =
- P4_ESCR_EMASK_BIT(P4_EVENT_RETIRED_MISPRED_BRANCH_TYPE, CONDITIONAL) |
- P4_ESCR_EMASK_BIT(P4_EVENT_RETIRED_MISPRED_BRANCH_TYPE, CALL) |
- P4_ESCR_EMASK_BIT(P4_EVENT_RETIRED_MISPRED_BRANCH_TYPE, RETURN) |
- P4_ESCR_EMASK_BIT(P4_EVENT_RETIRED_MISPRED_BRANCH_TYPE, INDIRECT),
- .cntr = { {4, 5, -1}, {6, 7, -1} },
- },
- [P4_EVENT_RETIRED_BRANCH_TYPE] = {
- .opcode = P4_OPCODE(P4_EVENT_RETIRED_BRANCH_TYPE),
- .escr_msr = { MSR_P4_TBPU_ESCR0 , MSR_P4_TBPU_ESCR1 },
- .escr_emask =
- P4_ESCR_EMASK_BIT(P4_EVENT_RETIRED_BRANCH_TYPE, CONDITIONAL) |
- P4_ESCR_EMASK_BIT(P4_EVENT_RETIRED_BRANCH_TYPE, CALL) |
- P4_ESCR_EMASK_BIT(P4_EVENT_RETIRED_BRANCH_TYPE, RETURN) |
- P4_ESCR_EMASK_BIT(P4_EVENT_RETIRED_BRANCH_TYPE, INDIRECT),
- .cntr = { {4, 5, -1}, {6, 7, -1} },
- },
- [P4_EVENT_RESOURCE_STALL] = {
- .opcode = P4_OPCODE(P4_EVENT_RESOURCE_STALL),
- .escr_msr = { MSR_P4_ALF_ESCR0, MSR_P4_ALF_ESCR1 },
- .escr_emask =
- P4_ESCR_EMASK_BIT(P4_EVENT_RESOURCE_STALL, SBFULL),
- .cntr = { {12, 13, 16}, {14, 15, 17} },
- },
- [P4_EVENT_WC_BUFFER] = {
- .opcode = P4_OPCODE(P4_EVENT_WC_BUFFER),
- .escr_msr = { MSR_P4_DAC_ESCR0, MSR_P4_DAC_ESCR1 },
- .escr_emask =
- P4_ESCR_EMASK_BIT(P4_EVENT_WC_BUFFER, WCB_EVICTS) |
- P4_ESCR_EMASK_BIT(P4_EVENT_WC_BUFFER, WCB_FULL_EVICTS),
- .shared = 1,
- .cntr = { {8, 9, -1}, {10, 11, -1} },
- },
- [P4_EVENT_B2B_CYCLES] = {
- .opcode = P4_OPCODE(P4_EVENT_B2B_CYCLES),
- .escr_msr = { MSR_P4_FSB_ESCR0, MSR_P4_FSB_ESCR1 },
- .escr_emask = 0,
- .cntr = { {0, -1, -1}, {2, -1, -1} },
- },
- [P4_EVENT_BNR] = {
- .opcode = P4_OPCODE(P4_EVENT_BNR),
- .escr_msr = { MSR_P4_FSB_ESCR0, MSR_P4_FSB_ESCR1 },
- .escr_emask = 0,
- .cntr = { {0, -1, -1}, {2, -1, -1} },
- },
- [P4_EVENT_SNOOP] = {
- .opcode = P4_OPCODE(P4_EVENT_SNOOP),
- .escr_msr = { MSR_P4_FSB_ESCR0, MSR_P4_FSB_ESCR1 },
- .escr_emask = 0,
- .cntr = { {0, -1, -1}, {2, -1, -1} },
- },
- [P4_EVENT_RESPONSE] = {
- .opcode = P4_OPCODE(P4_EVENT_RESPONSE),
- .escr_msr = { MSR_P4_FSB_ESCR0, MSR_P4_FSB_ESCR1 },
- .escr_emask = 0,
- .cntr = { {0, -1, -1}, {2, -1, -1} },
- },
- [P4_EVENT_FRONT_END_EVENT] = {
- .opcode = P4_OPCODE(P4_EVENT_FRONT_END_EVENT),
- .escr_msr = { MSR_P4_CRU_ESCR2, MSR_P4_CRU_ESCR3 },
- .escr_emask =
- P4_ESCR_EMASK_BIT(P4_EVENT_FRONT_END_EVENT, NBOGUS) |
- P4_ESCR_EMASK_BIT(P4_EVENT_FRONT_END_EVENT, BOGUS),
- .cntr = { {12, 13, 16}, {14, 15, 17} },
- },
- [P4_EVENT_EXECUTION_EVENT] = {
- .opcode = P4_OPCODE(P4_EVENT_EXECUTION_EVENT),
- .escr_msr = { MSR_P4_CRU_ESCR2, MSR_P4_CRU_ESCR3 },
- .escr_emask =
- P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, NBOGUS0) |
- P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, NBOGUS1) |
- P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, NBOGUS2) |
- P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, NBOGUS3) |
- P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, BOGUS0) |
- P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, BOGUS1) |
- P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, BOGUS2) |
- P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, BOGUS3),
- .cntr = { {12, 13, 16}, {14, 15, 17} },
- },
- [P4_EVENT_REPLAY_EVENT] = {
- .opcode = P4_OPCODE(P4_EVENT_REPLAY_EVENT),
- .escr_msr = { MSR_P4_CRU_ESCR2, MSR_P4_CRU_ESCR3 },
- .escr_emask =
- P4_ESCR_EMASK_BIT(P4_EVENT_REPLAY_EVENT, NBOGUS) |
- P4_ESCR_EMASK_BIT(P4_EVENT_REPLAY_EVENT, BOGUS),
- .cntr = { {12, 13, 16}, {14, 15, 17} },
- },
- [P4_EVENT_INSTR_RETIRED] = {
- .opcode = P4_OPCODE(P4_EVENT_INSTR_RETIRED),
- .escr_msr = { MSR_P4_CRU_ESCR0, MSR_P4_CRU_ESCR1 },
- .escr_emask =
- P4_ESCR_EMASK_BIT(P4_EVENT_INSTR_RETIRED, NBOGUSNTAG) |
- P4_ESCR_EMASK_BIT(P4_EVENT_INSTR_RETIRED, NBOGUSTAG) |
- P4_ESCR_EMASK_BIT(P4_EVENT_INSTR_RETIRED, BOGUSNTAG) |
- P4_ESCR_EMASK_BIT(P4_EVENT_INSTR_RETIRED, BOGUSTAG),
- .cntr = { {12, 13, 16}, {14, 15, 17} },
- },
- [P4_EVENT_UOPS_RETIRED] = {
- .opcode = P4_OPCODE(P4_EVENT_UOPS_RETIRED),
- .escr_msr = { MSR_P4_CRU_ESCR0, MSR_P4_CRU_ESCR1 },
- .escr_emask =
- P4_ESCR_EMASK_BIT(P4_EVENT_UOPS_RETIRED, NBOGUS) |
- P4_ESCR_EMASK_BIT(P4_EVENT_UOPS_RETIRED, BOGUS),
- .cntr = { {12, 13, 16}, {14, 15, 17} },
- },
- [P4_EVENT_UOP_TYPE] = {
- .opcode = P4_OPCODE(P4_EVENT_UOP_TYPE),
- .escr_msr = { MSR_P4_RAT_ESCR0, MSR_P4_RAT_ESCR1 },
- .escr_emask =
- P4_ESCR_EMASK_BIT(P4_EVENT_UOP_TYPE, TAGLOADS) |
- P4_ESCR_EMASK_BIT(P4_EVENT_UOP_TYPE, TAGSTORES),
- .cntr = { {12, 13, 16}, {14, 15, 17} },
- },
- [P4_EVENT_BRANCH_RETIRED] = {
- .opcode = P4_OPCODE(P4_EVENT_BRANCH_RETIRED),
- .escr_msr = { MSR_P4_CRU_ESCR2, MSR_P4_CRU_ESCR3 },
- .escr_emask =
- P4_ESCR_EMASK_BIT(P4_EVENT_BRANCH_RETIRED, MMNP) |
- P4_ESCR_EMASK_BIT(P4_EVENT_BRANCH_RETIRED, MMNM) |
- P4_ESCR_EMASK_BIT(P4_EVENT_BRANCH_RETIRED, MMTP) |
- P4_ESCR_EMASK_BIT(P4_EVENT_BRANCH_RETIRED, MMTM),
- .cntr = { {12, 13, 16}, {14, 15, 17} },
- },
- [P4_EVENT_MISPRED_BRANCH_RETIRED] = {
- .opcode = P4_OPCODE(P4_EVENT_MISPRED_BRANCH_RETIRED),
- .escr_msr = { MSR_P4_CRU_ESCR0, MSR_P4_CRU_ESCR1 },
- .escr_emask =
- P4_ESCR_EMASK_BIT(P4_EVENT_MISPRED_BRANCH_RETIRED, NBOGUS),
- .cntr = { {12, 13, 16}, {14, 15, 17} },
- },
- [P4_EVENT_X87_ASSIST] = {
- .opcode = P4_OPCODE(P4_EVENT_X87_ASSIST),
- .escr_msr = { MSR_P4_CRU_ESCR2, MSR_P4_CRU_ESCR3 },
- .escr_emask =
- P4_ESCR_EMASK_BIT(P4_EVENT_X87_ASSIST, FPSU) |
- P4_ESCR_EMASK_BIT(P4_EVENT_X87_ASSIST, FPSO) |
- P4_ESCR_EMASK_BIT(P4_EVENT_X87_ASSIST, POAO) |
- P4_ESCR_EMASK_BIT(P4_EVENT_X87_ASSIST, POAU) |
- P4_ESCR_EMASK_BIT(P4_EVENT_X87_ASSIST, PREA),
- .cntr = { {12, 13, 16}, {14, 15, 17} },
- },
- [P4_EVENT_MACHINE_CLEAR] = {
- .opcode = P4_OPCODE(P4_EVENT_MACHINE_CLEAR),
- .escr_msr = { MSR_P4_CRU_ESCR2, MSR_P4_CRU_ESCR3 },
- .escr_emask =
- P4_ESCR_EMASK_BIT(P4_EVENT_MACHINE_CLEAR, CLEAR) |
- P4_ESCR_EMASK_BIT(P4_EVENT_MACHINE_CLEAR, MOCLEAR) |
- P4_ESCR_EMASK_BIT(P4_EVENT_MACHINE_CLEAR, SMCLEAR),
- .cntr = { {12, 13, 16}, {14, 15, 17} },
- },
- [P4_EVENT_INSTR_COMPLETED] = {
- .opcode = P4_OPCODE(P4_EVENT_INSTR_COMPLETED),
- .escr_msr = { MSR_P4_CRU_ESCR0, MSR_P4_CRU_ESCR1 },
- .escr_emask =
- P4_ESCR_EMASK_BIT(P4_EVENT_INSTR_COMPLETED, NBOGUS) |
- P4_ESCR_EMASK_BIT(P4_EVENT_INSTR_COMPLETED, BOGUS),
- .cntr = { {12, 13, 16}, {14, 15, 17} },
- },
-};
-
-#define P4_GEN_CACHE_EVENT(event, bit, metric) \
- p4_config_pack_escr(P4_ESCR_EVENT(event) | \
- P4_ESCR_EMASK_BIT(event, bit)) | \
- p4_config_pack_cccr(metric | \
- P4_CCCR_ESEL(P4_OPCODE_ESEL(P4_OPCODE(event))))
-
-static __initconst const u64 p4_hw_cache_event_ids
- [PERF_COUNT_HW_CACHE_MAX]
- [PERF_COUNT_HW_CACHE_OP_MAX]
- [PERF_COUNT_HW_CACHE_RESULT_MAX] =
-{
- [ C(L1D ) ] = {
- [ C(OP_READ) ] = {
- [ C(RESULT_ACCESS) ] = 0x0,
- [ C(RESULT_MISS) ] = P4_GEN_CACHE_EVENT(P4_EVENT_REPLAY_EVENT, NBOGUS,
- P4_PEBS_METRIC__1stl_cache_load_miss_retired),
- },
- },
- [ C(LL ) ] = {
- [ C(OP_READ) ] = {
- [ C(RESULT_ACCESS) ] = 0x0,
- [ C(RESULT_MISS) ] = P4_GEN_CACHE_EVENT(P4_EVENT_REPLAY_EVENT, NBOGUS,
- P4_PEBS_METRIC__2ndl_cache_load_miss_retired),
- },
-},
- [ C(DTLB) ] = {
- [ C(OP_READ) ] = {
- [ C(RESULT_ACCESS) ] = 0x0,
- [ C(RESULT_MISS) ] = P4_GEN_CACHE_EVENT(P4_EVENT_REPLAY_EVENT, NBOGUS,
- P4_PEBS_METRIC__dtlb_load_miss_retired),
- },
- [ C(OP_WRITE) ] = {
- [ C(RESULT_ACCESS) ] = 0x0,
- [ C(RESULT_MISS) ] = P4_GEN_CACHE_EVENT(P4_EVENT_REPLAY_EVENT, NBOGUS,
- P4_PEBS_METRIC__dtlb_store_miss_retired),
- },
- },
- [ C(ITLB) ] = {
- [ C(OP_READ) ] = {
- [ C(RESULT_ACCESS) ] = P4_GEN_CACHE_EVENT(P4_EVENT_ITLB_REFERENCE, HIT,
- P4_PEBS_METRIC__none),
- [ C(RESULT_MISS) ] = P4_GEN_CACHE_EVENT(P4_EVENT_ITLB_REFERENCE, MISS,
- P4_PEBS_METRIC__none),
- },
- [ C(OP_WRITE) ] = {
- [ C(RESULT_ACCESS) ] = -1,
- [ C(RESULT_MISS) ] = -1,
- },
- [ C(OP_PREFETCH) ] = {
- [ C(RESULT_ACCESS) ] = -1,
- [ C(RESULT_MISS) ] = -1,
- },
- },
- [ C(NODE) ] = {
- [ C(OP_READ) ] = {
- [ C(RESULT_ACCESS) ] = -1,
- [ C(RESULT_MISS) ] = -1,
- },
- [ C(OP_WRITE) ] = {
- [ C(RESULT_ACCESS) ] = -1,
- [ C(RESULT_MISS) ] = -1,
- },
- [ C(OP_PREFETCH) ] = {
- [ C(RESULT_ACCESS) ] = -1,
- [ C(RESULT_MISS) ] = -1,
- },
- },
-};
-
-/*
- * Because of Netburst being quite restricted in how many
- * identical events may run simultaneously, we introduce event aliases,
- * ie the different events which have the same functionality but
- * utilize non-intersected resources (ESCR/CCCR/counter registers).
- *
- * This allow us to relax restrictions a bit and run two or more
- * identical events together.
- *
- * Never set any custom internal bits such as P4_CONFIG_HT,
- * P4_CONFIG_ALIASABLE or bits for P4_PEBS_METRIC, they are
- * either up to date automatically or not applicable at all.
- */
-struct p4_event_alias {
- u64 original;
- u64 alternative;
-} p4_event_aliases[] = {
- {
- /*
- * Non-halted cycles can be substituted with non-sleeping cycles (see
- * Intel SDM Vol3b for details). We need this alias to be able
- * to run nmi-watchdog and 'perf top' (or any other user space tool
- * which is interested in running PERF_COUNT_HW_CPU_CYCLES)
- * simultaneously.
- */
- .original =
- p4_config_pack_escr(P4_ESCR_EVENT(P4_EVENT_GLOBAL_POWER_EVENTS) |
- P4_ESCR_EMASK_BIT(P4_EVENT_GLOBAL_POWER_EVENTS, RUNNING)),
- .alternative =
- p4_config_pack_escr(P4_ESCR_EVENT(P4_EVENT_EXECUTION_EVENT) |
- P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, NBOGUS0)|
- P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, NBOGUS1)|
- P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, NBOGUS2)|
- P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, NBOGUS3)|
- P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, BOGUS0) |
- P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, BOGUS1) |
- P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, BOGUS2) |
- P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, BOGUS3))|
- p4_config_pack_cccr(P4_CCCR_THRESHOLD(15) | P4_CCCR_COMPLEMENT |
- P4_CCCR_COMPARE),
- },
-};
-
-static u64 p4_get_alias_event(u64 config)
-{
- u64 config_match;
- int i;
-
- /*
- * Only event with special mark is allowed,
- * we're to be sure it didn't come as malformed
- * RAW event.
- */
- if (!(config & P4_CONFIG_ALIASABLE))
- return 0;
-
- config_match = config & P4_CONFIG_EVENT_ALIAS_MASK;
-
- for (i = 0; i < ARRAY_SIZE(p4_event_aliases); i++) {
- if (config_match == p4_event_aliases[i].original) {
- config_match = p4_event_aliases[i].alternative;
- break;
- } else if (config_match == p4_event_aliases[i].alternative) {
- config_match = p4_event_aliases[i].original;
- break;
- }
- }
-
- if (i >= ARRAY_SIZE(p4_event_aliases))
- return 0;
-
- return config_match | (config & P4_CONFIG_EVENT_ALIAS_IMMUTABLE_BITS);
-}
-
-static u64 p4_general_events[PERF_COUNT_HW_MAX] = {
- /* non-halted CPU clocks */
- [PERF_COUNT_HW_CPU_CYCLES] =
- p4_config_pack_escr(P4_ESCR_EVENT(P4_EVENT_GLOBAL_POWER_EVENTS) |
- P4_ESCR_EMASK_BIT(P4_EVENT_GLOBAL_POWER_EVENTS, RUNNING)) |
- P4_CONFIG_ALIASABLE,
-
- /*
- * retired instructions
- * in a sake of simplicity we don't use the FSB tagging
- */
- [PERF_COUNT_HW_INSTRUCTIONS] =
- p4_config_pack_escr(P4_ESCR_EVENT(P4_EVENT_INSTR_RETIRED) |
- P4_ESCR_EMASK_BIT(P4_EVENT_INSTR_RETIRED, NBOGUSNTAG) |
- P4_ESCR_EMASK_BIT(P4_EVENT_INSTR_RETIRED, BOGUSNTAG)),
-
- /* cache hits */
- [PERF_COUNT_HW_CACHE_REFERENCES] =
- p4_config_pack_escr(P4_ESCR_EVENT(P4_EVENT_BSQ_CACHE_REFERENCE) |
- P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_2ndL_HITS) |
- P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_2ndL_HITE) |
- P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_2ndL_HITM) |
- P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_3rdL_HITS) |
- P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_3rdL_HITE) |
- P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_3rdL_HITM)),
-
- /* cache misses */
- [PERF_COUNT_HW_CACHE_MISSES] =
- p4_config_pack_escr(P4_ESCR_EVENT(P4_EVENT_BSQ_CACHE_REFERENCE) |
- P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_2ndL_MISS) |
- P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_3rdL_MISS) |
- P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, WR_2ndL_MISS)),
-
- /* branch instructions retired */
- [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] =
- p4_config_pack_escr(P4_ESCR_EVENT(P4_EVENT_RETIRED_BRANCH_TYPE) |
- P4_ESCR_EMASK_BIT(P4_EVENT_RETIRED_BRANCH_TYPE, CONDITIONAL) |
- P4_ESCR_EMASK_BIT(P4_EVENT_RETIRED_BRANCH_TYPE, CALL) |
- P4_ESCR_EMASK_BIT(P4_EVENT_RETIRED_BRANCH_TYPE, RETURN) |
- P4_ESCR_EMASK_BIT(P4_EVENT_RETIRED_BRANCH_TYPE, INDIRECT)),
-
- /* mispredicted branches retired */
- [PERF_COUNT_HW_BRANCH_MISSES] =
- p4_config_pack_escr(P4_ESCR_EVENT(P4_EVENT_MISPRED_BRANCH_RETIRED) |
- P4_ESCR_EMASK_BIT(P4_EVENT_MISPRED_BRANCH_RETIRED, NBOGUS)),
-
- /* bus ready clocks (cpu is driving #DRDY_DRV\#DRDY_OWN): */
- [PERF_COUNT_HW_BUS_CYCLES] =
- p4_config_pack_escr(P4_ESCR_EVENT(P4_EVENT_FSB_DATA_ACTIVITY) |
- P4_ESCR_EMASK_BIT(P4_EVENT_FSB_DATA_ACTIVITY, DRDY_DRV) |
- P4_ESCR_EMASK_BIT(P4_EVENT_FSB_DATA_ACTIVITY, DRDY_OWN)) |
- p4_config_pack_cccr(P4_CCCR_EDGE | P4_CCCR_COMPARE),
-};
-
-static struct p4_event_bind *p4_config_get_bind(u64 config)
-{
- unsigned int evnt = p4_config_unpack_event(config);
- struct p4_event_bind *bind = NULL;
-
- if (evnt < ARRAY_SIZE(p4_event_bind_map))
- bind = &p4_event_bind_map[evnt];
-
- return bind;
-}
-
-static u64 p4_pmu_event_map(int hw_event)
-{
- struct p4_event_bind *bind;
- unsigned int esel;
- u64 config;
-
- config = p4_general_events[hw_event];
- bind = p4_config_get_bind(config);
- esel = P4_OPCODE_ESEL(bind->opcode);
- config |= p4_config_pack_cccr(P4_CCCR_ESEL(esel));
-
- return config;
-}
-
-/* check cpu model specifics */
-static bool p4_event_match_cpu_model(unsigned int event_idx)
-{
- /* INSTR_COMPLETED event only exist for model 3, 4, 6 (Prescott) */
- if (event_idx == P4_EVENT_INSTR_COMPLETED) {
- if (boot_cpu_data.x86_model != 3 &&
- boot_cpu_data.x86_model != 4 &&
- boot_cpu_data.x86_model != 6)
- return false;
- }
-
- /*
- * For info
- * - IQ_ESCR0, IQ_ESCR1 only for models 1 and 2
- */
-
- return true;
-}
-
-static int p4_validate_raw_event(struct perf_event *event)
-{
- unsigned int v, emask;
-
- /* User data may have out-of-bound event index */
- v = p4_config_unpack_event(event->attr.config);
- if (v >= ARRAY_SIZE(p4_event_bind_map))
- return -EINVAL;
-
- /* It may be unsupported: */
- if (!p4_event_match_cpu_model(v))
- return -EINVAL;
-
- /*
- * NOTE: P4_CCCR_THREAD_ANY has not the same meaning as
- * in Architectural Performance Monitoring, it means not
- * on _which_ logical cpu to count but rather _when_, ie it
- * depends on logical cpu state -- count event if one cpu active,
- * none, both or any, so we just allow user to pass any value
- * desired.
- *
- * In turn we always set Tx_OS/Tx_USR bits bound to logical
- * cpu without their propagation to another cpu
- */
-
- /*
- * if an event is shared across the logical threads
- * the user needs special permissions to be able to use it
- */
- if (p4_ht_active() && p4_event_bind_map[v].shared) {
- if (perf_paranoid_cpu() && !capable(CAP_SYS_ADMIN))
- return -EACCES;
- }
-
- /* ESCR EventMask bits may be invalid */
- emask = p4_config_unpack_escr(event->attr.config) & P4_ESCR_EVENTMASK_MASK;
- if (emask & ~p4_event_bind_map[v].escr_emask)
- return -EINVAL;
-
- /*
- * it may have some invalid PEBS bits
- */
- if (p4_config_pebs_has(event->attr.config, P4_PEBS_CONFIG_ENABLE))
- return -EINVAL;
-
- v = p4_config_unpack_metric(event->attr.config);
- if (v >= ARRAY_SIZE(p4_pebs_bind_map))
- return -EINVAL;
-
- return 0;
-}
-
-static int p4_hw_config(struct perf_event *event)
-{
- int cpu = get_cpu();
- int rc = 0;
- u32 escr, cccr;
-
- /*
- * the reason we use cpu that early is that: if we get scheduled
- * first time on the same cpu -- we will not need swap thread
- * specific flags in config (and will save some cpu cycles)
- */
-
- cccr = p4_default_cccr_conf(cpu);
- escr = p4_default_escr_conf(cpu, event->attr.exclude_kernel,
- event->attr.exclude_user);
- event->hw.config = p4_config_pack_escr(escr) |
- p4_config_pack_cccr(cccr);
-
- if (p4_ht_active() && p4_ht_thread(cpu))
- event->hw.config = p4_set_ht_bit(event->hw.config);
-
- if (event->attr.type == PERF_TYPE_RAW) {
- struct p4_event_bind *bind;
- unsigned int esel;
- /*
- * Clear bits we reserve to be managed by kernel itself
- * and never allowed from a user space
- */
- event->attr.config &= P4_CONFIG_MASK;
-
- rc = p4_validate_raw_event(event);
- if (rc)
- goto out;
-
- /*
- * Note that for RAW events we allow user to use P4_CCCR_RESERVED
- * bits since we keep additional info here (for cache events and etc)
- */
- event->hw.config |= event->attr.config;
- bind = p4_config_get_bind(event->attr.config);
- if (!bind) {
- rc = -EINVAL;
- goto out;
- }
- esel = P4_OPCODE_ESEL(bind->opcode);
- event->hw.config |= p4_config_pack_cccr(P4_CCCR_ESEL(esel));
- }
-
- rc = x86_setup_perfctr(event);
-out:
- put_cpu();
- return rc;
-}
-
-static inline int p4_pmu_clear_cccr_ovf(struct hw_perf_event *hwc)
-{
- u64 v;
-
- /* an official way for overflow indication */
- rdmsrl(hwc->config_base, v);
- if (v & P4_CCCR_OVF) {
- wrmsrl(hwc->config_base, v & ~P4_CCCR_OVF);
- return 1;
- }
-
- /*
- * In some circumstances the overflow might issue an NMI but did
- * not set P4_CCCR_OVF bit. Because a counter holds a negative value
- * we simply check for high bit being set, if it's cleared it means
- * the counter has reached zero value and continued counting before
- * real NMI signal was received:
- */
- rdmsrl(hwc->event_base, v);
- if (!(v & ARCH_P4_UNFLAGGED_BIT))
- return 1;
-
- return 0;
-}
-
-static void p4_pmu_disable_pebs(void)
-{
- /*
- * FIXME
- *
- * It's still allowed that two threads setup same cache
- * events so we can't simply clear metrics until we knew
- * no one is depending on us, so we need kind of counter
- * for "ReplayEvent" users.
- *
- * What is more complex -- RAW events, if user (for some
- * reason) will pass some cache event metric with improper
- * event opcode -- it's fine from hardware point of view
- * but completely nonsense from "meaning" of such action.
- *
- * So at moment let leave metrics turned on forever -- it's
- * ok for now but need to be revisited!
- *
- * (void)wrmsrl_safe(MSR_IA32_PEBS_ENABLE, 0);
- * (void)wrmsrl_safe(MSR_P4_PEBS_MATRIX_VERT, 0);
- */
-}
-
-static inline void p4_pmu_disable_event(struct perf_event *event)
-{
- struct hw_perf_event *hwc = &event->hw;
-
- /*
- * If event gets disabled while counter is in overflowed
- * state we need to clear P4_CCCR_OVF, otherwise interrupt get
- * asserted again and again
- */
- (void)wrmsrl_safe(hwc->config_base,
- p4_config_unpack_cccr(hwc->config) & ~P4_CCCR_ENABLE & ~P4_CCCR_OVF & ~P4_CCCR_RESERVED);
-}
-
-static void p4_pmu_disable_all(void)
-{
- struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
- int idx;
-
- for (idx = 0; idx < x86_pmu.num_counters; idx++) {
- struct perf_event *event = cpuc->events[idx];
- if (!test_bit(idx, cpuc->active_mask))
- continue;
- p4_pmu_disable_event(event);
- }
-
- p4_pmu_disable_pebs();
-}
-
-/* configuration must be valid */
-static void p4_pmu_enable_pebs(u64 config)
-{
- struct p4_pebs_bind *bind;
- unsigned int idx;
-
- BUILD_BUG_ON(P4_PEBS_METRIC__max > P4_PEBS_CONFIG_METRIC_MASK);
-
- idx = p4_config_unpack_metric(config);
- if (idx == P4_PEBS_METRIC__none)
- return;
-
- bind = &p4_pebs_bind_map[idx];
-
- (void)wrmsrl_safe(MSR_IA32_PEBS_ENABLE, (u64)bind->metric_pebs);
- (void)wrmsrl_safe(MSR_P4_PEBS_MATRIX_VERT, (u64)bind->metric_vert);
-}
-
-static void p4_pmu_enable_event(struct perf_event *event)
-{
- struct hw_perf_event *hwc = &event->hw;
- int thread = p4_ht_config_thread(hwc->config);
- u64 escr_conf = p4_config_unpack_escr(p4_clear_ht_bit(hwc->config));
- unsigned int idx = p4_config_unpack_event(hwc->config);
- struct p4_event_bind *bind;
- u64 escr_addr, cccr;
-
- bind = &p4_event_bind_map[idx];
- escr_addr = bind->escr_msr[thread];
-
- /*
- * - we dont support cascaded counters yet
- * - and counter 1 is broken (erratum)
- */
- WARN_ON_ONCE(p4_is_event_cascaded(hwc->config));
- WARN_ON_ONCE(hwc->idx == 1);
-
- /* we need a real Event value */
- escr_conf &= ~P4_ESCR_EVENT_MASK;
- escr_conf |= P4_ESCR_EVENT(P4_OPCODE_EVNT(bind->opcode));
-
- cccr = p4_config_unpack_cccr(hwc->config);
-
- /*
- * it could be Cache event so we need to write metrics
- * into additional MSRs
- */
- p4_pmu_enable_pebs(hwc->config);
-
- (void)wrmsrl_safe(escr_addr, escr_conf);
- (void)wrmsrl_safe(hwc->config_base,
- (cccr & ~P4_CCCR_RESERVED) | P4_CCCR_ENABLE);
-}
-
-static void p4_pmu_enable_all(int added)
-{
- struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
- int idx;
-
- for (idx = 0; idx < x86_pmu.num_counters; idx++) {
- struct perf_event *event = cpuc->events[idx];
- if (!test_bit(idx, cpuc->active_mask))
- continue;
- p4_pmu_enable_event(event);
- }
-}
-
-static int p4_pmu_handle_irq(struct pt_regs *regs)
-{
- struct perf_sample_data data;
- struct cpu_hw_events *cpuc;
- struct perf_event *event;
- struct hw_perf_event *hwc;
- int idx, handled = 0;
- u64 val;
-
- cpuc = this_cpu_ptr(&cpu_hw_events);
-
- for (idx = 0; idx < x86_pmu.num_counters; idx++) {
- int overflow;
-
- if (!test_bit(idx, cpuc->active_mask)) {
- /* catch in-flight IRQs */
- if (__test_and_clear_bit(idx, cpuc->running))
- handled++;
- continue;
- }
-
- event = cpuc->events[idx];
- hwc = &event->hw;
-
- WARN_ON_ONCE(hwc->idx != idx);
-
- /* it might be unflagged overflow */
- overflow = p4_pmu_clear_cccr_ovf(hwc);
-
- val = x86_perf_event_update(event);
- if (!overflow && (val & (1ULL << (x86_pmu.cntval_bits - 1))))
- continue;
-
- handled += overflow;
-
- /* event overflow for sure */
- perf_sample_data_init(&data, 0, hwc->last_period);
-
- if (!x86_perf_event_set_period(event))
- continue;
-
-
- if (perf_event_overflow(event, &data, regs))
- x86_pmu_stop(event, 0);
- }
-
- if (handled)
- inc_irq_stat(apic_perf_irqs);
-
- /*
- * When dealing with the unmasking of the LVTPC on P4 perf hw, it has
- * been observed that the OVF bit flag has to be cleared first _before_
- * the LVTPC can be unmasked.
- *
- * The reason is the NMI line will continue to be asserted while the OVF
- * bit is set. This causes a second NMI to generate if the LVTPC is
- * unmasked before the OVF bit is cleared, leading to unknown NMI
- * messages.
- */
- apic_write(APIC_LVTPC, APIC_DM_NMI);
-
- return handled;
-}
-
-/*
- * swap thread specific fields according to a thread
- * we are going to run on
- */
-static void p4_pmu_swap_config_ts(struct hw_perf_event *hwc, int cpu)
-{
- u32 escr, cccr;
-
- /*
- * we either lucky and continue on same cpu or no HT support
- */
- if (!p4_should_swap_ts(hwc->config, cpu))
- return;
-
- /*
- * the event is migrated from an another logical
- * cpu, so we need to swap thread specific flags
- */
-
- escr = p4_config_unpack_escr(hwc->config);
- cccr = p4_config_unpack_cccr(hwc->config);
-
- if (p4_ht_thread(cpu)) {
- cccr &= ~P4_CCCR_OVF_PMI_T0;
- cccr |= P4_CCCR_OVF_PMI_T1;
- if (escr & P4_ESCR_T0_OS) {
- escr &= ~P4_ESCR_T0_OS;
- escr |= P4_ESCR_T1_OS;
- }
- if (escr & P4_ESCR_T0_USR) {
- escr &= ~P4_ESCR_T0_USR;
- escr |= P4_ESCR_T1_USR;
- }
- hwc->config = p4_config_pack_escr(escr);
- hwc->config |= p4_config_pack_cccr(cccr);
- hwc->config |= P4_CONFIG_HT;
- } else {
- cccr &= ~P4_CCCR_OVF_PMI_T1;
- cccr |= P4_CCCR_OVF_PMI_T0;
- if (escr & P4_ESCR_T1_OS) {
- escr &= ~P4_ESCR_T1_OS;
- escr |= P4_ESCR_T0_OS;
- }
- if (escr & P4_ESCR_T1_USR) {
- escr &= ~P4_ESCR_T1_USR;
- escr |= P4_ESCR_T0_USR;
- }
- hwc->config = p4_config_pack_escr(escr);
- hwc->config |= p4_config_pack_cccr(cccr);
- hwc->config &= ~P4_CONFIG_HT;
- }
-}
-
-/*
- * ESCR address hashing is tricky, ESCRs are not sequential
- * in memory but all starts from MSR_P4_BSU_ESCR0 (0x03a0) and
- * the metric between any ESCRs is laid in range [0xa0,0xe1]
- *
- * so we make ~70% filled hashtable
- */
-
-#define P4_ESCR_MSR_BASE 0x000003a0
-#define P4_ESCR_MSR_MAX 0x000003e1
-#define P4_ESCR_MSR_TABLE_SIZE (P4_ESCR_MSR_MAX - P4_ESCR_MSR_BASE + 1)
-#define P4_ESCR_MSR_IDX(msr) (msr - P4_ESCR_MSR_BASE)
-#define P4_ESCR_MSR_TABLE_ENTRY(msr) [P4_ESCR_MSR_IDX(msr)] = msr
-
-static const unsigned int p4_escr_table[P4_ESCR_MSR_TABLE_SIZE] = {
- P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_ALF_ESCR0),
- P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_ALF_ESCR1),
- P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_BPU_ESCR0),
- P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_BPU_ESCR1),
- P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_BSU_ESCR0),
- P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_BSU_ESCR1),
- P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_CRU_ESCR0),
- P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_CRU_ESCR1),
- P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_CRU_ESCR2),
- P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_CRU_ESCR3),
- P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_CRU_ESCR4),
- P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_CRU_ESCR5),
- P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_DAC_ESCR0),
- P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_DAC_ESCR1),
- P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_FIRM_ESCR0),
- P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_FIRM_ESCR1),
- P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_FLAME_ESCR0),
- P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_FLAME_ESCR1),
- P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_FSB_ESCR0),
- P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_FSB_ESCR1),
- P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_IQ_ESCR0),
- P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_IQ_ESCR1),
- P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_IS_ESCR0),
- P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_IS_ESCR1),
- P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_ITLB_ESCR0),
- P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_ITLB_ESCR1),
- P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_IX_ESCR0),
- P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_IX_ESCR1),
- P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_MOB_ESCR0),
- P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_MOB_ESCR1),
- P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_MS_ESCR0),
- P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_MS_ESCR1),
- P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_PMH_ESCR0),
- P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_PMH_ESCR1),
- P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_RAT_ESCR0),
- P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_RAT_ESCR1),
- P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_SAAT_ESCR0),
- P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_SAAT_ESCR1),
- P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_SSU_ESCR0),
- P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_SSU_ESCR1),
- P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_TBPU_ESCR0),
- P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_TBPU_ESCR1),
- P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_TC_ESCR0),
- P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_TC_ESCR1),
- P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_U2L_ESCR0),
- P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_U2L_ESCR1),
-};
-
-static int p4_get_escr_idx(unsigned int addr)
-{
- unsigned int idx = P4_ESCR_MSR_IDX(addr);
-
- if (unlikely(idx >= P4_ESCR_MSR_TABLE_SIZE ||
- !p4_escr_table[idx] ||
- p4_escr_table[idx] != addr)) {
- WARN_ONCE(1, "P4 PMU: Wrong address passed: %x\n", addr);
- return -1;
- }
-
- return idx;
-}
-
-static int p4_next_cntr(int thread, unsigned long *used_mask,
- struct p4_event_bind *bind)
-{
- int i, j;
-
- for (i = 0; i < P4_CNTR_LIMIT; i++) {
- j = bind->cntr[thread][i];
- if (j != -1 && !test_bit(j, used_mask))
- return j;
- }
-
- return -1;
-}
-
-static int p4_pmu_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign)
-{
- unsigned long used_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
- unsigned long escr_mask[BITS_TO_LONGS(P4_ESCR_MSR_TABLE_SIZE)];
- int cpu = smp_processor_id();
- struct hw_perf_event *hwc;
- struct p4_event_bind *bind;
- unsigned int i, thread, num;
- int cntr_idx, escr_idx;
- u64 config_alias;
- int pass;
-
- bitmap_zero(used_mask, X86_PMC_IDX_MAX);
- bitmap_zero(escr_mask, P4_ESCR_MSR_TABLE_SIZE);
-
- for (i = 0, num = n; i < n; i++, num--) {
-
- hwc = &cpuc->event_list[i]->hw;
- thread = p4_ht_thread(cpu);
- pass = 0;
-
-again:
- /*
- * It's possible to hit a circular lock
- * between original and alternative events
- * if both are scheduled already.
- */
- if (pass > 2)
- goto done;
-
- bind = p4_config_get_bind(hwc->config);
- escr_idx = p4_get_escr_idx(bind->escr_msr[thread]);
- if (unlikely(escr_idx == -1))
- goto done;
-
- if (hwc->idx != -1 && !p4_should_swap_ts(hwc->config, cpu)) {
- cntr_idx = hwc->idx;
- if (assign)
- assign[i] = hwc->idx;
- goto reserve;
- }
-
- cntr_idx = p4_next_cntr(thread, used_mask, bind);
- if (cntr_idx == -1 || test_bit(escr_idx, escr_mask)) {
- /*
- * Check whether an event alias is still available.
- */
- config_alias = p4_get_alias_event(hwc->config);
- if (!config_alias)
- goto done;
- hwc->config = config_alias;
- pass++;
- goto again;
- }
- /*
- * Perf does test runs to see if a whole group can be assigned
- * together succesfully. There can be multiple rounds of this.
- * Unfortunately, p4_pmu_swap_config_ts touches the hwc->config
- * bits, such that the next round of group assignments will
- * cause the above p4_should_swap_ts to pass instead of fail.
- * This leads to counters exclusive to thread0 being used by
- * thread1.
- *
- * Solve this with a cheap hack, reset the idx back to -1 to
- * force a new lookup (p4_next_cntr) to get the right counter
- * for the right thread.
- *
- * This probably doesn't comply with the general spirit of how
- * perf wants to work, but P4 is special. :-(
- */
- if (p4_should_swap_ts(hwc->config, cpu))
- hwc->idx = -1;
- p4_pmu_swap_config_ts(hwc, cpu);
- if (assign)
- assign[i] = cntr_idx;
-reserve:
- set_bit(cntr_idx, used_mask);
- set_bit(escr_idx, escr_mask);
- }
-
-done:
- return num ? -EINVAL : 0;
-}
-
-PMU_FORMAT_ATTR(cccr, "config:0-31" );
-PMU_FORMAT_ATTR(escr, "config:32-62");
-PMU_FORMAT_ATTR(ht, "config:63" );
-
-static struct attribute *intel_p4_formats_attr[] = {
- &format_attr_cccr.attr,
- &format_attr_escr.attr,
- &format_attr_ht.attr,
- NULL,
-};
-
-static __initconst const struct x86_pmu p4_pmu = {
- .name = "Netburst P4/Xeon",
- .handle_irq = p4_pmu_handle_irq,
- .disable_all = p4_pmu_disable_all,
- .enable_all = p4_pmu_enable_all,
- .enable = p4_pmu_enable_event,
- .disable = p4_pmu_disable_event,
- .eventsel = MSR_P4_BPU_CCCR0,
- .perfctr = MSR_P4_BPU_PERFCTR0,
- .event_map = p4_pmu_event_map,
- .max_events = ARRAY_SIZE(p4_general_events),
- .get_event_constraints = x86_get_event_constraints,
- /*
- * IF HT disabled we may need to use all
- * ARCH_P4_MAX_CCCR counters simulaneously
- * though leave it restricted at moment assuming
- * HT is on
- */
- .num_counters = ARCH_P4_MAX_CCCR,
- .apic = 1,
- .cntval_bits = ARCH_P4_CNTRVAL_BITS,
- .cntval_mask = ARCH_P4_CNTRVAL_MASK,
- .max_period = (1ULL << (ARCH_P4_CNTRVAL_BITS - 1)) - 1,
- .hw_config = p4_hw_config,
- .schedule_events = p4_pmu_schedule_events,
- /*
- * This handles erratum N15 in intel doc 249199-029,
- * the counter may not be updated correctly on write
- * so we need a second write operation to do the trick
- * (the official workaround didn't work)
- *
- * the former idea is taken from OProfile code
- */
- .perfctr_second_write = 1,
-
- .format_attrs = intel_p4_formats_attr,
-};
-
-__init int p4_pmu_init(void)
-{
- unsigned int low, high;
- int i, reg;
-
- /* If we get stripped -- indexing fails */
- BUILD_BUG_ON(ARCH_P4_MAX_CCCR > INTEL_PMC_MAX_GENERIC);
-
- rdmsr(MSR_IA32_MISC_ENABLE, low, high);
- if (!(low & (1 << 7))) {
- pr_cont("unsupported Netburst CPU model %d ",
- boot_cpu_data.x86_model);
- return -ENODEV;
- }
-
- memcpy(hw_cache_event_ids, p4_hw_cache_event_ids,
- sizeof(hw_cache_event_ids));
-
- pr_cont("Netburst events, ");
-
- x86_pmu = p4_pmu;
-
- /*
- * Even though the counters are configured to interrupt a particular
- * logical processor when an overflow happens, testing has shown that
- * on kdump kernels (which uses a single cpu), thread1's counter
- * continues to run and will report an NMI on thread0. Due to the
- * overflow bug, this leads to a stream of unknown NMIs.
- *
- * Solve this by zero'ing out the registers to mimic a reset.
- */
- for (i = 0; i < x86_pmu.num_counters; i++) {
- reg = x86_pmu_config_addr(i);
- wrmsrl_safe(reg, 0ULL);
- }
-
- return 0;
-}
diff --git a/arch/x86/kernel/cpu/perf_event_p6.c b/arch/x86/kernel/cpu/perf_event_p6.c
deleted file mode 100644
index 7c1a0c07b..000000000
--- a/arch/x86/kernel/cpu/perf_event_p6.c
+++ /dev/null
@@ -1,279 +0,0 @@
-#include <linux/perf_event.h>
-#include <linux/types.h>
-
-#include "perf_event.h"
-
-/*
- * Not sure about some of these
- */
-static const u64 p6_perfmon_event_map[] =
-{
- [PERF_COUNT_HW_CPU_CYCLES] = 0x0079, /* CPU_CLK_UNHALTED */
- [PERF_COUNT_HW_INSTRUCTIONS] = 0x00c0, /* INST_RETIRED */
- [PERF_COUNT_HW_CACHE_REFERENCES] = 0x0f2e, /* L2_RQSTS:M:E:S:I */
- [PERF_COUNT_HW_CACHE_MISSES] = 0x012e, /* L2_RQSTS:I */
- [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x00c4, /* BR_INST_RETIRED */
- [PERF_COUNT_HW_BRANCH_MISSES] = 0x00c5, /* BR_MISS_PRED_RETIRED */
- [PERF_COUNT_HW_BUS_CYCLES] = 0x0062, /* BUS_DRDY_CLOCKS */
- [PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = 0x00a2, /* RESOURCE_STALLS */
-
-};
-
-static const u64 __initconst p6_hw_cache_event_ids
- [PERF_COUNT_HW_CACHE_MAX]
- [PERF_COUNT_HW_CACHE_OP_MAX]
- [PERF_COUNT_HW_CACHE_RESULT_MAX] =
-{
- [ C(L1D) ] = {
- [ C(OP_READ) ] = {
- [ C(RESULT_ACCESS) ] = 0x0043, /* DATA_MEM_REFS */
- [ C(RESULT_MISS) ] = 0x0045, /* DCU_LINES_IN */
- },
- [ C(OP_WRITE) ] = {
- [ C(RESULT_ACCESS) ] = 0,
- [ C(RESULT_MISS) ] = 0x0f29, /* L2_LD:M:E:S:I */
- },
- [ C(OP_PREFETCH) ] = {
- [ C(RESULT_ACCESS) ] = 0,
- [ C(RESULT_MISS) ] = 0,
- },
- },
- [ C(L1I ) ] = {
- [ C(OP_READ) ] = {
- [ C(RESULT_ACCESS) ] = 0x0080, /* IFU_IFETCH */
- [ C(RESULT_MISS) ] = 0x0f28, /* L2_IFETCH:M:E:S:I */
- },
- [ C(OP_WRITE) ] = {
- [ C(RESULT_ACCESS) ] = -1,
- [ C(RESULT_MISS) ] = -1,
- },
- [ C(OP_PREFETCH) ] = {
- [ C(RESULT_ACCESS) ] = 0,
- [ C(RESULT_MISS) ] = 0,
- },
- },
- [ C(LL ) ] = {
- [ C(OP_READ) ] = {
- [ C(RESULT_ACCESS) ] = 0,
- [ C(RESULT_MISS) ] = 0,
- },
- [ C(OP_WRITE) ] = {
- [ C(RESULT_ACCESS) ] = 0,
- [ C(RESULT_MISS) ] = 0x0025, /* L2_M_LINES_INM */
- },
- [ C(OP_PREFETCH) ] = {
- [ C(RESULT_ACCESS) ] = 0,
- [ C(RESULT_MISS) ] = 0,
- },
- },
- [ C(DTLB) ] = {
- [ C(OP_READ) ] = {
- [ C(RESULT_ACCESS) ] = 0x0043, /* DATA_MEM_REFS */
- [ C(RESULT_MISS) ] = 0,
- },
- [ C(OP_WRITE) ] = {
- [ C(RESULT_ACCESS) ] = 0,
- [ C(RESULT_MISS) ] = 0,
- },
- [ C(OP_PREFETCH) ] = {
- [ C(RESULT_ACCESS) ] = 0,
- [ C(RESULT_MISS) ] = 0,
- },
- },
- [ C(ITLB) ] = {
- [ C(OP_READ) ] = {
- [ C(RESULT_ACCESS) ] = 0x0080, /* IFU_IFETCH */
- [ C(RESULT_MISS) ] = 0x0085, /* ITLB_MISS */
- },
- [ C(OP_WRITE) ] = {
- [ C(RESULT_ACCESS) ] = -1,
- [ C(RESULT_MISS) ] = -1,
- },
- [ C(OP_PREFETCH) ] = {
- [ C(RESULT_ACCESS) ] = -1,
- [ C(RESULT_MISS) ] = -1,
- },
- },
- [ C(BPU ) ] = {
- [ C(OP_READ) ] = {
- [ C(RESULT_ACCESS) ] = 0x00c4, /* BR_INST_RETIRED */
- [ C(RESULT_MISS) ] = 0x00c5, /* BR_MISS_PRED_RETIRED */
- },
- [ C(OP_WRITE) ] = {
- [ C(RESULT_ACCESS) ] = -1,
- [ C(RESULT_MISS) ] = -1,
- },
- [ C(OP_PREFETCH) ] = {
- [ C(RESULT_ACCESS) ] = -1,
- [ C(RESULT_MISS) ] = -1,
- },
- },
-};
-
-static u64 p6_pmu_event_map(int hw_event)
-{
- return p6_perfmon_event_map[hw_event];
-}
-
-/*
- * Event setting that is specified not to count anything.
- * We use this to effectively disable a counter.
- *
- * L2_RQSTS with 0 MESI unit mask.
- */
-#define P6_NOP_EVENT 0x0000002EULL
-
-static struct event_constraint p6_event_constraints[] =
-{
- INTEL_EVENT_CONSTRAINT(0xc1, 0x1), /* FLOPS */
- INTEL_EVENT_CONSTRAINT(0x10, 0x1), /* FP_COMP_OPS_EXE */
- INTEL_EVENT_CONSTRAINT(0x11, 0x2), /* FP_ASSIST */
- INTEL_EVENT_CONSTRAINT(0x12, 0x2), /* MUL */
- INTEL_EVENT_CONSTRAINT(0x13, 0x2), /* DIV */
- INTEL_EVENT_CONSTRAINT(0x14, 0x1), /* CYCLES_DIV_BUSY */
- EVENT_CONSTRAINT_END
-};
-
-static void p6_pmu_disable_all(void)
-{
- u64 val;
-
- /* p6 only has one enable register */
- rdmsrl(MSR_P6_EVNTSEL0, val);
- val &= ~ARCH_PERFMON_EVENTSEL_ENABLE;
- wrmsrl(MSR_P6_EVNTSEL0, val);
-}
-
-static void p6_pmu_enable_all(int added)
-{
- unsigned long val;
-
- /* p6 only has one enable register */
- rdmsrl(MSR_P6_EVNTSEL0, val);
- val |= ARCH_PERFMON_EVENTSEL_ENABLE;
- wrmsrl(MSR_P6_EVNTSEL0, val);
-}
-
-static inline void
-p6_pmu_disable_event(struct perf_event *event)
-{
- struct hw_perf_event *hwc = &event->hw;
- u64 val = P6_NOP_EVENT;
-
- (void)wrmsrl_safe(hwc->config_base, val);
-}
-
-static void p6_pmu_enable_event(struct perf_event *event)
-{
- struct hw_perf_event *hwc = &event->hw;
- u64 val;
-
- val = hwc->config;
-
- /*
- * p6 only has a global event enable, set on PerfEvtSel0
- * We "disable" events by programming P6_NOP_EVENT
- * and we rely on p6_pmu_enable_all() being called
- * to actually enable the events.
- */
-
- (void)wrmsrl_safe(hwc->config_base, val);
-}
-
-PMU_FORMAT_ATTR(event, "config:0-7" );
-PMU_FORMAT_ATTR(umask, "config:8-15" );
-PMU_FORMAT_ATTR(edge, "config:18" );
-PMU_FORMAT_ATTR(pc, "config:19" );
-PMU_FORMAT_ATTR(inv, "config:23" );
-PMU_FORMAT_ATTR(cmask, "config:24-31" );
-
-static struct attribute *intel_p6_formats_attr[] = {
- &format_attr_event.attr,
- &format_attr_umask.attr,
- &format_attr_edge.attr,
- &format_attr_pc.attr,
- &format_attr_inv.attr,
- &format_attr_cmask.attr,
- NULL,
-};
-
-static __initconst const struct x86_pmu p6_pmu = {
- .name = "p6",
- .handle_irq = x86_pmu_handle_irq,
- .disable_all = p6_pmu_disable_all,
- .enable_all = p6_pmu_enable_all,
- .enable = p6_pmu_enable_event,
- .disable = p6_pmu_disable_event,
- .hw_config = x86_pmu_hw_config,
- .schedule_events = x86_schedule_events,
- .eventsel = MSR_P6_EVNTSEL0,
- .perfctr = MSR_P6_PERFCTR0,
- .event_map = p6_pmu_event_map,
- .max_events = ARRAY_SIZE(p6_perfmon_event_map),
- .apic = 1,
- .max_period = (1ULL << 31) - 1,
- .version = 0,
- .num_counters = 2,
- /*
- * Events have 40 bits implemented. However they are designed such
- * that bits [32-39] are sign extensions of bit 31. As such the
- * effective width of a event for P6-like PMU is 32 bits only.
- *
- * See IA-32 Intel Architecture Software developer manual Vol 3B
- */
- .cntval_bits = 32,
- .cntval_mask = (1ULL << 32) - 1,
- .get_event_constraints = x86_get_event_constraints,
- .event_constraints = p6_event_constraints,
-
- .format_attrs = intel_p6_formats_attr,
- .events_sysfs_show = intel_event_sysfs_show,
-
-};
-
-static __init void p6_pmu_rdpmc_quirk(void)
-{
- if (boot_cpu_data.x86_mask < 9) {
- /*
- * PPro erratum 26; fixed in stepping 9 and above.
- */
- pr_warn("Userspace RDPMC support disabled due to a CPU erratum\n");
- x86_pmu.attr_rdpmc_broken = 1;
- x86_pmu.attr_rdpmc = 0;
- }
-}
-
-__init int p6_pmu_init(void)
-{
- x86_pmu = p6_pmu;
-
- switch (boot_cpu_data.x86_model) {
- case 1: /* Pentium Pro */
- x86_add_quirk(p6_pmu_rdpmc_quirk);
- break;
-
- case 3: /* Pentium II - Klamath */
- case 5: /* Pentium II - Deschutes */
- case 6: /* Pentium II - Mendocino */
- break;
-
- case 7: /* Pentium III - Katmai */
- case 8: /* Pentium III - Coppermine */
- case 10: /* Pentium III Xeon */
- case 11: /* Pentium III - Tualatin */
- break;
-
- case 9: /* Pentium M - Banias */
- case 13: /* Pentium M - Dothan */
- break;
-
- default:
- pr_cont("unsupported p6 CPU model %d ", boot_cpu_data.x86_model);
- return -ENODEV;
- }
-
- memcpy(hw_cache_event_ids, p6_hw_cache_event_ids,
- sizeof(hw_cache_event_ids));
-
- return 0;
-}
diff --git a/arch/x86/kernel/cpu/powerflags.c b/arch/x86/kernel/cpu/powerflags.c
index 31f0f335e..1dd8294fd 100644
--- a/arch/x86/kernel/cpu/powerflags.c
+++ b/arch/x86/kernel/cpu/powerflags.c
@@ -18,4 +18,6 @@ const char *const x86_power_flags[32] = {
"", /* tsc invariant mapped to constant_tsc */
"cpb", /* core performance boost */
"eff_freq_ro", /* Readonly aperf/mperf */
+ "proc_feedback", /* processor feedback interface */
+ "acc_power", /* accumulated power mechanism */
};
diff --git a/arch/x86/kernel/cpu/rdrand.c b/arch/x86/kernel/cpu/rdrand.c
index 819d94982..f6f50c4ce 100644
--- a/arch/x86/kernel/cpu/rdrand.c
+++ b/arch/x86/kernel/cpu/rdrand.c
@@ -51,7 +51,7 @@ void x86_init_rdrand(struct cpuinfo_x86 *c)
for (i = 0; i < SANITY_CHECK_LOOPS; i++) {
if (!rdrand_long(&tmp)) {
clear_cpu_cap(c, X86_FEATURE_RDRAND);
- printk_once(KERN_WARNING "rdrand: disabled\n");
+ pr_warn_once("rdrand: disabled\n");
return;
}
}
diff --git a/arch/x86/kernel/cpu/topology.c b/arch/x86/kernel/cpu/topology.c
index 4c60eaf05..cd531355e 100644
--- a/arch/x86/kernel/cpu/topology.c
+++ b/arch/x86/kernel/cpu/topology.c
@@ -87,10 +87,10 @@ void detect_extended_topology(struct cpuinfo_x86 *c)
c->x86_max_cores = (core_level_siblings / smp_num_siblings);
if (!printed) {
- printk(KERN_INFO "CPU: Physical Processor ID: %d\n",
+ pr_info("CPU: Physical Processor ID: %d\n",
c->phys_proc_id);
if (c->x86_max_cores > 1)
- printk(KERN_INFO "CPU: Processor Core ID: %d\n",
+ pr_info("CPU: Processor Core ID: %d\n",
c->cpu_core_id);
printed = 1;
}
diff --git a/arch/x86/kernel/cpu/transmeta.c b/arch/x86/kernel/cpu/transmeta.c
index 252da7ace..34178564b 100644
--- a/arch/x86/kernel/cpu/transmeta.c
+++ b/arch/x86/kernel/cpu/transmeta.c
@@ -1,6 +1,6 @@
#include <linux/kernel.h>
#include <linux/mm.h>
-#include <asm/processor.h>
+#include <asm/cpufeature.h>
#include <asm/msr.h>
#include "cpu.h"
@@ -33,7 +33,7 @@ static void init_transmeta(struct cpuinfo_x86 *c)
if (max >= 0x80860001) {
cpuid(0x80860001, &dummy, &cpu_rev, &cpu_freq, &cpu_flags);
if (cpu_rev != 0x02000000) {
- printk(KERN_INFO "CPU: Processor revision %u.%u.%u.%u, %u MHz\n",
+ pr_info("CPU: Processor revision %u.%u.%u.%u, %u MHz\n",
(cpu_rev >> 24) & 0xff,
(cpu_rev >> 16) & 0xff,
(cpu_rev >> 8) & 0xff,
@@ -44,10 +44,10 @@ static void init_transmeta(struct cpuinfo_x86 *c)
if (max >= 0x80860002) {
cpuid(0x80860002, &new_cpu_rev, &cms_rev1, &cms_rev2, &dummy);
if (cpu_rev == 0x02000000) {
- printk(KERN_INFO "CPU: Processor revision %08X, %u MHz\n",
+ pr_info("CPU: Processor revision %08X, %u MHz\n",
new_cpu_rev, cpu_freq);
}
- printk(KERN_INFO "CPU: Code Morphing Software revision %u.%u.%u-%u-%u\n",
+ pr_info("CPU: Code Morphing Software revision %u.%u.%u-%u-%u\n",
(cms_rev1 >> 24) & 0xff,
(cms_rev1 >> 16) & 0xff,
(cms_rev1 >> 8) & 0xff,
@@ -76,7 +76,7 @@ static void init_transmeta(struct cpuinfo_x86 *c)
(void *)&cpu_info[56],
(void *)&cpu_info[60]);
cpu_info[64] = '\0';
- printk(KERN_INFO "CPU: %s\n", cpu_info);
+ pr_info("CPU: %s\n", cpu_info);
}
/* Unhide possibly hidden capability flags */
diff --git a/arch/x86/kernel/cpu/vmware.c b/arch/x86/kernel/cpu/vmware.c
index 628a059a9..364e58346 100644
--- a/arch/x86/kernel/cpu/vmware.c
+++ b/arch/x86/kernel/cpu/vmware.c
@@ -62,7 +62,7 @@ static unsigned long vmware_get_tsc_khz(void)
tsc_hz = eax | (((uint64_t)ebx) << 32);
do_div(tsc_hz, 1000);
BUG_ON(tsc_hz >> 32);
- printk(KERN_INFO "TSC freq read from hypervisor : %lu.%03lu MHz\n",
+ pr_info("TSC freq read from hypervisor : %lu.%03lu MHz\n",
(unsigned long) tsc_hz / 1000,
(unsigned long) tsc_hz % 1000);
@@ -84,8 +84,7 @@ static void __init vmware_platform_setup(void)
if (ebx != UINT_MAX)
x86_platform.calibrate_tsc = vmware_get_tsc_khz;
else
- printk(KERN_WARNING
- "Failed to get TSC freq from the hypervisor\n");
+ pr_warn("Failed to get TSC freq from the hypervisor\n");
}
/*
diff --git a/arch/x86/kernel/crash.c b/arch/x86/kernel/crash.c
index 58f34319b..9ef978d69 100644
--- a/arch/x86/kernel/crash.c
+++ b/arch/x86/kernel/crash.c
@@ -57,10 +57,9 @@ struct crash_elf_data {
struct kimage *image;
/*
* Total number of ram ranges we have after various adjustments for
- * GART, crash reserved region etc.
+ * crash reserved region, etc.
*/
unsigned int max_nr_ranges;
- unsigned long gart_start, gart_end;
/* Pointer to elf header */
void *ehdr;
@@ -201,17 +200,6 @@ static int get_nr_ram_ranges_callback(u64 start, u64 end, void *arg)
return 0;
}
-static int get_gart_ranges_callback(u64 start, u64 end, void *arg)
-{
- struct crash_elf_data *ced = arg;
-
- ced->gart_start = start;
- ced->gart_end = end;
-
- /* Not expecting more than 1 gart aperture */
- return 1;
-}
-
/* Gather all the required information to prepare elf headers for ram regions */
static void fill_up_crash_elf_data(struct crash_elf_data *ced,
@@ -226,22 +214,6 @@ static void fill_up_crash_elf_data(struct crash_elf_data *ced,
ced->max_nr_ranges = nr_ranges;
- /*
- * We don't create ELF headers for GART aperture as an attempt
- * to dump this memory in second kernel leads to hang/crash.
- * If gart aperture is present, one needs to exclude that region
- * and that could lead to need of extra phdr.
- */
- walk_iomem_res("GART", IORESOURCE_MEM, 0, -1,
- ced, get_gart_ranges_callback);
-
- /*
- * If we have gart region, excluding that could potentially split
- * a memory range, resulting in extra header. Account for that.
- */
- if (ced->gart_end)
- ced->max_nr_ranges++;
-
/* Exclusion of crash region could split memory ranges */
ced->max_nr_ranges++;
@@ -350,13 +322,6 @@ static int elf_header_exclude_ranges(struct crash_elf_data *ced,
return ret;
}
- /* Exclude GART region */
- if (ced->gart_end) {
- ret = exclude_mem_range(cmem, ced->gart_start, ced->gart_end);
- if (ret)
- return ret;
- }
-
return ret;
}
@@ -599,12 +564,12 @@ int crash_setup_memmap_entries(struct kimage *image, struct boot_params *params)
/* Add ACPI tables */
cmd.type = E820_ACPI;
flags = IORESOURCE_MEM | IORESOURCE_BUSY;
- walk_iomem_res("ACPI Tables", flags, 0, -1, &cmd,
+ walk_iomem_res_desc(IORES_DESC_ACPI_TABLES, flags, 0, -1, &cmd,
memmap_entry_callback);
/* Add ACPI Non-volatile Storage */
cmd.type = E820_NVS;
- walk_iomem_res("ACPI Non-volatile Storage", flags, 0, -1, &cmd,
+ walk_iomem_res_desc(IORES_DESC_ACPI_NV_STORAGE, flags, 0, -1, &cmd,
memmap_entry_callback);
/* Add crashk_low_res region */
diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c
index 9c30acfad..8efa57a5f 100644
--- a/arch/x86/kernel/dumpstack.c
+++ b/arch/x86/kernel/dumpstack.c
@@ -135,7 +135,8 @@ print_context_stack_bp(struct thread_info *tinfo,
if (!__kernel_text_address(addr))
break;
- ops->address(data, addr, 1);
+ if (ops->address(data, addr, 1))
+ break;
frame = frame->next_frame;
ret_addr = &frame->return_address;
print_ftrace_graph_addr(addr, data, ops, tinfo, graph);
@@ -154,10 +155,11 @@ static int print_trace_stack(void *data, char *name)
/*
* Print one address/symbol entries per line.
*/
-static void print_trace_address(void *data, unsigned long addr, int reliable)
+static int print_trace_address(void *data, unsigned long addr, int reliable)
{
touch_nmi_watchdog();
printk_stack_address(addr, reliable, data);
+ return 0;
}
static const struct stacktrace_ops print_trace_ops = {
@@ -265,9 +267,8 @@ int __die(const char *str, struct pt_regs *regs, long err)
#ifdef CONFIG_SMP
printk("SMP ");
#endif
-#ifdef CONFIG_DEBUG_PAGEALLOC
- printk("DEBUG_PAGEALLOC ");
-#endif
+ if (debug_pagealloc_enabled())
+ printk("DEBUG_PAGEALLOC ");
#ifdef CONFIG_KASAN
printk("KASAN");
#endif
diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c
index 569c1e4f9..621b501f8 100644
--- a/arch/x86/kernel/e820.c
+++ b/arch/x86/kernel/e820.c
@@ -24,6 +24,7 @@
#include <asm/e820.h>
#include <asm/proto.h>
#include <asm/setup.h>
+#include <asm/cpufeature.h>
/*
* The e820 map is the map that gets modified e.g. with command line parameters
@@ -925,6 +926,41 @@ static const char *e820_type_to_string(int e820_type)
}
}
+static unsigned long e820_type_to_iomem_type(int e820_type)
+{
+ switch (e820_type) {
+ case E820_RESERVED_KERN:
+ case E820_RAM:
+ return IORESOURCE_SYSTEM_RAM;
+ case E820_ACPI:
+ case E820_NVS:
+ case E820_UNUSABLE:
+ case E820_PRAM:
+ case E820_PMEM:
+ default:
+ return IORESOURCE_MEM;
+ }
+}
+
+static unsigned long e820_type_to_iores_desc(int e820_type)
+{
+ switch (e820_type) {
+ case E820_ACPI:
+ return IORES_DESC_ACPI_TABLES;
+ case E820_NVS:
+ return IORES_DESC_ACPI_NV_STORAGE;
+ case E820_PMEM:
+ return IORES_DESC_PERSISTENT_MEMORY;
+ case E820_PRAM:
+ return IORES_DESC_PERSISTENT_MEMORY_LEGACY;
+ case E820_RESERVED_KERN:
+ case E820_RAM:
+ case E820_UNUSABLE:
+ default:
+ return IORES_DESC_NONE;
+ }
+}
+
static bool do_mark_busy(u32 type, struct resource *res)
{
/* this is the legacy bios/dos rom-shadow + mmio region */
@@ -967,7 +1003,8 @@ void __init e820_reserve_resources(void)
res->start = e820.map[i].addr;
res->end = end;
- res->flags = IORESOURCE_MEM;
+ res->flags = e820_type_to_iomem_type(e820.map[i].type);
+ res->desc = e820_type_to_iores_desc(e820.map[i].type);
/*
* don't register the region that could be conflicted with
diff --git a/arch/x86/kernel/early_printk.c b/arch/x86/kernel/early_printk.c
index 1fbf5af91..88d0ae205 100644
--- a/arch/x86/kernel/early_printk.c
+++ b/arch/x86/kernel/early_printk.c
@@ -289,7 +289,7 @@ static __init void early_pci_serial_init(char *s)
}
/*
- * Lastly, initalize the hardware
+ * Lastly, initialize the hardware
*/
if (*s) {
if (strcmp(s, "nocfg") == 0)
diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c
index d5804adfa..8e37cc8a5 100644
--- a/arch/x86/kernel/fpu/core.c
+++ b/arch/x86/kernel/fpu/core.c
@@ -114,6 +114,10 @@ void __kernel_fpu_begin(void)
kernel_fpu_disable();
if (fpu->fpregs_active) {
+ /*
+ * Ignore return value -- we don't care if reg state
+ * is clobbered.
+ */
copy_fpregs_to_fpstate(fpu);
} else {
this_cpu_write(fpu_fpregs_owner_ctx, NULL);
@@ -189,8 +193,12 @@ void fpu__save(struct fpu *fpu)
preempt_disable();
if (fpu->fpregs_active) {
- if (!copy_fpregs_to_fpstate(fpu))
- fpregs_deactivate(fpu);
+ if (!copy_fpregs_to_fpstate(fpu)) {
+ if (use_eager_fpu())
+ copy_kernel_to_fpregs(&fpu->state);
+ else
+ fpregs_deactivate(fpu);
+ }
}
preempt_enable();
}
@@ -223,14 +231,15 @@ void fpstate_init(union fpregs_state *state)
}
EXPORT_SYMBOL_GPL(fpstate_init);
-/*
- * Copy the current task's FPU state to a new task's FPU context.
- *
- * In both the 'eager' and the 'lazy' case we save hardware registers
- * directly to the destination buffer.
- */
-static void fpu_copy(struct fpu *dst_fpu, struct fpu *src_fpu)
+int fpu__copy(struct fpu *dst_fpu, struct fpu *src_fpu)
{
+ dst_fpu->counter = 0;
+ dst_fpu->fpregs_active = 0;
+ dst_fpu->last_cpu = -1;
+
+ if (!src_fpu->fpstate_active || !cpu_has_fpu)
+ return 0;
+
WARN_ON_FPU(src_fpu != &current->thread.fpu);
/*
@@ -243,10 +252,9 @@ static void fpu_copy(struct fpu *dst_fpu, struct fpu *src_fpu)
/*
* Save current FPU registers directly into the child
* FPU context, without any memory-to-memory copying.
- *
- * If the FPU context got destroyed in the process (FNSAVE
- * done on old CPUs) then copy it back into the source
- * context and mark the current task for lazy restore.
+ * In lazy mode, if the FPU context isn't loaded into
+ * fpregs, CR0.TS will be set and do_device_not_available
+ * will load the FPU context.
*
* We have to do all this with preemption disabled,
* mostly because of the FNSAVE case, because in that
@@ -259,19 +267,13 @@ static void fpu_copy(struct fpu *dst_fpu, struct fpu *src_fpu)
preempt_disable();
if (!copy_fpregs_to_fpstate(dst_fpu)) {
memcpy(&src_fpu->state, &dst_fpu->state, xstate_size);
- fpregs_deactivate(src_fpu);
+
+ if (use_eager_fpu())
+ copy_kernel_to_fpregs(&src_fpu->state);
+ else
+ fpregs_deactivate(src_fpu);
}
preempt_enable();
-}
-
-int fpu__copy(struct fpu *dst_fpu, struct fpu *src_fpu)
-{
- dst_fpu->counter = 0;
- dst_fpu->fpregs_active = 0;
- dst_fpu->last_cpu = -1;
-
- if (src_fpu->fpstate_active && cpu_has_fpu)
- fpu_copy(dst_fpu, src_fpu);
return 0;
}
@@ -352,6 +354,69 @@ void fpu__activate_fpstate_write(struct fpu *fpu)
}
/*
+ * This function must be called before we write the current
+ * task's fpstate.
+ *
+ * This call gets the current FPU register state and moves
+ * it in to the 'fpstate'. Preemption is disabled so that
+ * no writes to the 'fpstate' can occur from context
+ * swiches.
+ *
+ * Must be followed by a fpu__current_fpstate_write_end().
+ */
+void fpu__current_fpstate_write_begin(void)
+{
+ struct fpu *fpu = &current->thread.fpu;
+
+ /*
+ * Ensure that the context-switching code does not write
+ * over the fpstate while we are doing our update.
+ */
+ preempt_disable();
+
+ /*
+ * Move the fpregs in to the fpu's 'fpstate'.
+ */
+ fpu__activate_fpstate_read(fpu);
+
+ /*
+ * The caller is about to write to 'fpu'. Ensure that no
+ * CPU thinks that its fpregs match the fpstate. This
+ * ensures we will not be lazy and skip a XRSTOR in the
+ * future.
+ */
+ fpu->last_cpu = -1;
+}
+
+/*
+ * This function must be paired with fpu__current_fpstate_write_begin()
+ *
+ * This will ensure that the modified fpstate gets placed back in
+ * the fpregs if necessary.
+ *
+ * Note: This function may be called whether or not an _actual_
+ * write to the fpstate occurred.
+ */
+void fpu__current_fpstate_write_end(void)
+{
+ struct fpu *fpu = &current->thread.fpu;
+
+ /*
+ * 'fpu' now has an updated copy of the state, but the
+ * registers may still be out of date. Update them with
+ * an XRSTOR if they are active.
+ */
+ if (fpregs_active())
+ copy_kernel_to_fpregs(&fpu->state);
+
+ /*
+ * Our update is done and the fpregs/fpstate are in sync
+ * if necessary. Context switches can happen again.
+ */
+ preempt_enable();
+}
+
+/*
* 'fpu__restore()' is called to copy FPU registers from
* the FPU fpstate to the live hw registers and to activate
* access to the hardware registers, so that FPU instructions
@@ -425,7 +490,7 @@ void fpu__clear(struct fpu *fpu)
{
WARN_ON_FPU(fpu != &current->thread.fpu); /* Almost certainly an anomaly */
- if (!use_eager_fpu()) {
+ if (!use_eager_fpu() || !static_cpu_has(X86_FEATURE_FPU)) {
/* FPU state will be reallocated lazily at the first use. */
fpu__drop(fpu);
} else {
diff --git a/arch/x86/kernel/fpu/init.c b/arch/x86/kernel/fpu/init.c
index bd08fb770..54c86fffb 100644
--- a/arch/x86/kernel/fpu/init.c
+++ b/arch/x86/kernel/fpu/init.c
@@ -262,7 +262,10 @@ static void __init fpu__init_system_xstate_size_legacy(void)
* not only saved the restores along the way, but we also have the
* FPU ready to be used for the original task.
*
- * 'eager' switching is used on modern CPUs, there we switch the FPU
+ * 'lazy' is deprecated because it's almost never a performance win
+ * and it's much more complicated than 'eager'.
+ *
+ * 'eager' switching is by default on all CPUs, there we switch the FPU
* state during every context switch, regardless of whether the task
* has used FPU instructions in that time slice or not. This is done
* because modern FPU context saving instructions are able to optimize
@@ -273,7 +276,7 @@ static void __init fpu__init_system_xstate_size_legacy(void)
* to use 'eager' restores, if we detect that a task is using the FPU
* frequently. See the fpu->counter logic in fpu/internal.h for that. ]
*/
-static enum { AUTO, ENABLE, DISABLE } eagerfpu = AUTO;
+static enum { ENABLE, DISABLE } eagerfpu = ENABLE;
/*
* Find supported xfeatures based on cpu features and command-line input.
@@ -344,15 +347,9 @@ static void __init fpu__init_system_ctx_switch(void)
*/
static void __init fpu__init_parse_early_param(void)
{
- /*
- * No need to check "eagerfpu=auto" again, since it is the
- * initial default.
- */
if (cmdline_find_option_bool(boot_command_line, "eagerfpu=off")) {
eagerfpu = DISABLE;
fpu__clear_eager_fpu_features();
- } else if (cmdline_find_option_bool(boot_command_line, "eagerfpu=on")) {
- eagerfpu = ENABLE;
}
if (cmdline_find_option_bool(boot_command_line, "no387"))
diff --git a/arch/x86/kernel/fpu/regset.c b/arch/x86/kernel/fpu/regset.c
index 0bc349042..8bd1c0039 100644
--- a/arch/x86/kernel/fpu/regset.c
+++ b/arch/x86/kernel/fpu/regset.c
@@ -8,7 +8,7 @@
/*
* The xstateregs_active() routine is the same as the regset_fpregs_active() routine,
* as the "regset->n" for the xstate regset will be updated based on the feature
- * capabilites supported by the xsave.
+ * capabilities supported by the xsave.
*/
int regset_fpregs_active(struct task_struct *target, const struct user_regset *regset)
{
diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c
index d425cda5a..b48ef35b2 100644
--- a/arch/x86/kernel/fpu/xstate.c
+++ b/arch/x86/kernel/fpu/xstate.c
@@ -5,6 +5,7 @@
*/
#include <linux/compat.h>
#include <linux/cpu.h>
+#include <linux/pkeys.h>
#include <asm/fpu/api.h>
#include <asm/fpu/internal.h>
@@ -13,6 +14,11 @@
#include <asm/tlbflush.h>
+/*
+ * Although we spell it out in here, the Processor Trace
+ * xfeature is completely unused. We use other mechanisms
+ * to save/restore PT state in Linux.
+ */
static const char *xfeature_names[] =
{
"x87 floating point registers" ,
@@ -23,6 +29,8 @@ static const char *xfeature_names[] =
"AVX-512 opmask" ,
"AVX-512 Hi256" ,
"AVX-512 ZMM_Hi256" ,
+ "Processor Trace (unused)" ,
+ "Protection Keys User registers",
"unknown xstate feature" ,
};
@@ -51,8 +59,12 @@ void fpu__xstate_clear_all_cpu_caps(void)
setup_clear_cpu_cap(X86_FEATURE_AVX512PF);
setup_clear_cpu_cap(X86_FEATURE_AVX512ER);
setup_clear_cpu_cap(X86_FEATURE_AVX512CD);
+ setup_clear_cpu_cap(X86_FEATURE_AVX512DQ);
+ setup_clear_cpu_cap(X86_FEATURE_AVX512BW);
+ setup_clear_cpu_cap(X86_FEATURE_AVX512VL);
setup_clear_cpu_cap(X86_FEATURE_MPX);
setup_clear_cpu_cap(X86_FEATURE_XGETBV1);
+ setup_clear_cpu_cap(X86_FEATURE_PKU);
}
/*
@@ -231,7 +243,7 @@ static void __init print_xstate_feature(u64 xstate_mask)
const char *feature_name;
if (cpu_has_xfeatures(xstate_mask, &feature_name))
- pr_info("x86/fpu: Supporting XSAVE feature 0x%02Lx: '%s'\n", xstate_mask, feature_name);
+ pr_info("x86/fpu: Supporting XSAVE feature 0x%03Lx: '%s'\n", xstate_mask, feature_name);
}
/*
@@ -247,6 +259,7 @@ static void __init print_xstate_features(void)
print_xstate_feature(XFEATURE_MASK_OPMASK);
print_xstate_feature(XFEATURE_MASK_ZMM_Hi256);
print_xstate_feature(XFEATURE_MASK_Hi16_ZMM);
+ print_xstate_feature(XFEATURE_MASK_PKRU);
}
/*
@@ -463,6 +476,7 @@ static void check_xstate_against_struct(int nr)
XCHECK_SZ(sz, nr, XFEATURE_OPMASK, struct avx_512_opmask_state);
XCHECK_SZ(sz, nr, XFEATURE_ZMM_Hi256, struct avx_512_zmm_uppers_state);
XCHECK_SZ(sz, nr, XFEATURE_Hi16_ZMM, struct avx_512_hi16_state);
+ XCHECK_SZ(sz, nr, XFEATURE_PKRU, struct pkru_state);
/*
* Make *SURE* to add any feature numbers in below if
@@ -470,7 +484,8 @@ static void check_xstate_against_struct(int nr)
* numbers.
*/
if ((nr < XFEATURE_YMM) ||
- (nr >= XFEATURE_MAX)) {
+ (nr >= XFEATURE_MAX) ||
+ (nr == XFEATURE_PT_UNIMPLEMENTED_SO_FAR)) {
WARN_ONCE(1, "no structure for xstate: %d\n", nr);
XSTATE_WARN_ON(1);
}
@@ -668,6 +683,19 @@ void fpu__resume_cpu(void)
}
/*
+ * Given an xstate feature mask, calculate where in the xsave
+ * buffer the state is. Callers should ensure that the buffer
+ * is valid.
+ *
+ * Note: does not work for compacted buffers.
+ */
+void *__raw_xsave_addr(struct xregs_state *xsave, int xstate_feature_mask)
+{
+ int feature_nr = fls64(xstate_feature_mask) - 1;
+
+ return (void *)xsave + xstate_comp_offsets[feature_nr];
+}
+/*
* Given the xsave area and a state inside, this function returns the
* address of the state.
*
@@ -687,7 +715,6 @@ void fpu__resume_cpu(void)
*/
void *get_xsave_addr(struct xregs_state *xsave, int xstate_feature)
{
- int feature_nr = fls64(xstate_feature) - 1;
/*
* Do we even *have* xsave state?
*/
@@ -715,7 +742,7 @@ void *get_xsave_addr(struct xregs_state *xsave, int xstate_feature)
if (!(xsave->header.xfeatures & xstate_feature))
return NULL;
- return (void *)xsave + xstate_comp_offsets[feature_nr];
+ return __raw_xsave_addr(xsave, xstate_feature);
}
EXPORT_SYMBOL_GPL(get_xsave_addr);
@@ -750,3 +777,156 @@ const void *get_xsave_field_ptr(int xsave_state)
return get_xsave_addr(&fpu->state.xsave, xsave_state);
}
+
+
+/*
+ * Set xfeatures (aka XSTATE_BV) bit for a feature that we want
+ * to take out of its "init state". This will ensure that an
+ * XRSTOR actually restores the state.
+ */
+static void fpu__xfeature_set_non_init(struct xregs_state *xsave,
+ int xstate_feature_mask)
+{
+ xsave->header.xfeatures |= xstate_feature_mask;
+}
+
+/*
+ * This function is safe to call whether the FPU is in use or not.
+ *
+ * Note that this only works on the current task.
+ *
+ * Inputs:
+ * @xsave_state: state which is defined in xsave.h (e.g. XFEATURE_MASK_FP,
+ * XFEATURE_MASK_SSE, etc...)
+ * @xsave_state_ptr: a pointer to a copy of the state that you would
+ * like written in to the current task's FPU xsave state. This pointer
+ * must not be located in the current tasks's xsave area.
+ * Output:
+ * address of the state in the xsave area or NULL if the state
+ * is not present or is in its 'init state'.
+ */
+static void fpu__xfeature_set_state(int xstate_feature_mask,
+ void *xstate_feature_src, size_t len)
+{
+ struct xregs_state *xsave = &current->thread.fpu.state.xsave;
+ struct fpu *fpu = &current->thread.fpu;
+ void *dst;
+
+ if (!boot_cpu_has(X86_FEATURE_XSAVE)) {
+ WARN_ONCE(1, "%s() attempted with no xsave support", __func__);
+ return;
+ }
+
+ /*
+ * Tell the FPU code that we need the FPU state to be in
+ * 'fpu' (not in the registers), and that we need it to
+ * be stable while we write to it.
+ */
+ fpu__current_fpstate_write_begin();
+
+ /*
+ * This method *WILL* *NOT* work for compact-format
+ * buffers. If the 'xstate_feature_mask' is unset in
+ * xcomp_bv then we may need to move other feature state
+ * "up" in the buffer.
+ */
+ if (xsave->header.xcomp_bv & xstate_feature_mask) {
+ WARN_ON_ONCE(1);
+ goto out;
+ }
+
+ /* find the location in the xsave buffer of the desired state */
+ dst = __raw_xsave_addr(&fpu->state.xsave, xstate_feature_mask);
+
+ /*
+ * Make sure that the pointer being passed in did not
+ * come from the xsave buffer itself.
+ */
+ WARN_ONCE(xstate_feature_src == dst, "set from xsave buffer itself");
+
+ /* put the caller-provided data in the location */
+ memcpy(dst, xstate_feature_src, len);
+
+ /*
+ * Mark the xfeature so that the CPU knows there is state
+ * in the buffer now.
+ */
+ fpu__xfeature_set_non_init(xsave, xstate_feature_mask);
+out:
+ /*
+ * We are done writing to the 'fpu'. Reenable preeption
+ * and (possibly) move the fpstate back in to the fpregs.
+ */
+ fpu__current_fpstate_write_end();
+}
+
+#define NR_VALID_PKRU_BITS (CONFIG_NR_PROTECTION_KEYS * 2)
+#define PKRU_VALID_MASK (NR_VALID_PKRU_BITS - 1)
+
+/*
+ * This will go out and modify the XSAVE buffer so that PKRU is
+ * set to a particular state for access to 'pkey'.
+ *
+ * PKRU state does affect kernel access to user memory. We do
+ * not modfiy PKRU *itself* here, only the XSAVE state that will
+ * be restored in to PKRU when we return back to userspace.
+ */
+int arch_set_user_pkey_access(struct task_struct *tsk, int pkey,
+ unsigned long init_val)
+{
+ struct xregs_state *xsave = &tsk->thread.fpu.state.xsave;
+ struct pkru_state *old_pkru_state;
+ struct pkru_state new_pkru_state;
+ int pkey_shift = (pkey * PKRU_BITS_PER_PKEY);
+ u32 new_pkru_bits = 0;
+
+ /*
+ * This check implies XSAVE support. OSPKE only gets
+ * set if we enable XSAVE and we enable PKU in XCR0.
+ */
+ if (!boot_cpu_has(X86_FEATURE_OSPKE))
+ return -EINVAL;
+
+ /* Set the bits we need in PKRU */
+ if (init_val & PKEY_DISABLE_ACCESS)
+ new_pkru_bits |= PKRU_AD_BIT;
+ if (init_val & PKEY_DISABLE_WRITE)
+ new_pkru_bits |= PKRU_WD_BIT;
+
+ /* Shift the bits in to the correct place in PKRU for pkey. */
+ new_pkru_bits <<= pkey_shift;
+
+ /* Locate old copy of the state in the xsave buffer */
+ old_pkru_state = get_xsave_addr(xsave, XFEATURE_MASK_PKRU);
+
+ /*
+ * When state is not in the buffer, it is in the init
+ * state, set it manually. Otherwise, copy out the old
+ * state.
+ */
+ if (!old_pkru_state)
+ new_pkru_state.pkru = 0;
+ else
+ new_pkru_state.pkru = old_pkru_state->pkru;
+
+ /* mask off any old bits in place */
+ new_pkru_state.pkru &= ~((PKRU_AD_BIT|PKRU_WD_BIT) << pkey_shift);
+ /* Set the newly-requested bits */
+ new_pkru_state.pkru |= new_pkru_bits;
+
+ /*
+ * We could theoretically live without zeroing pkru.pad.
+ * The current XSAVE feature state definition says that
+ * only bytes 0->3 are used. But we do not want to
+ * chance leaking kernel stack out to userspace in case a
+ * memcpy() of the whole xsave buffer was done.
+ *
+ * They're in the same cacheline anyway.
+ */
+ new_pkru_state.pad = 0;
+
+ fpu__xfeature_set_state(XFEATURE_MASK_PKRU, &new_pkru_state,
+ sizeof(new_pkru_state));
+
+ return 0;
+}
diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c
index 29408d6d6..d036cfb44 100644
--- a/arch/x86/kernel/ftrace.c
+++ b/arch/x86/kernel/ftrace.c
@@ -1,5 +1,5 @@
/*
- * Code for replacing ftrace calls with jumps.
+ * Dynamic function tracing support.
*
* Copyright (C) 2007-2008 Steven Rostedt <srostedt@redhat.com>
*
@@ -81,9 +81,9 @@ within(unsigned long addr, unsigned long start, unsigned long end)
static unsigned long text_ip_addr(unsigned long ip)
{
/*
- * On x86_64, kernel text mappings are mapped read-only with
- * CONFIG_DEBUG_RODATA. So we use the kernel identity mapping instead
- * of the kernel text mapping to modify the kernel text.
+ * On x86_64, kernel text mappings are mapped read-only, so we use
+ * the kernel identity mapping instead of the kernel text mapping
+ * to modify the kernel text.
*
* For 32bit kernels, these mappings are same and we can use
* kernel identity mapping to modify code.
@@ -697,9 +697,8 @@ static inline void tramp_free(void *tramp) { }
#endif
/* Defined as markers to the end of the ftrace default trampolines */
-extern void ftrace_caller_end(void);
extern void ftrace_regs_caller_end(void);
-extern void ftrace_return(void);
+extern void ftrace_epilogue(void);
extern void ftrace_caller_op_ptr(void);
extern void ftrace_regs_caller_op_ptr(void);
@@ -746,7 +745,7 @@ create_trampoline(struct ftrace_ops *ops, unsigned int *tramp_size)
op_offset = (unsigned long)ftrace_regs_caller_op_ptr;
} else {
start_offset = (unsigned long)ftrace_caller;
- end_offset = (unsigned long)ftrace_caller_end;
+ end_offset = (unsigned long)ftrace_epilogue;
op_offset = (unsigned long)ftrace_caller_op_ptr;
}
@@ -754,7 +753,7 @@ create_trampoline(struct ftrace_ops *ops, unsigned int *tramp_size)
/*
* Allocate enough size to store the ftrace_caller code,
- * the jmp to ftrace_return, as well as the address of
+ * the jmp to ftrace_epilogue, as well as the address of
* the ftrace_ops this trampoline is used for.
*/
trampoline = alloc_tramp(size + MCOUNT_INSN_SIZE + sizeof(void *));
@@ -772,8 +771,8 @@ create_trampoline(struct ftrace_ops *ops, unsigned int *tramp_size)
ip = (unsigned long)trampoline + size;
- /* The trampoline ends with a jmp to ftrace_return */
- jmp = ftrace_jmp_replace(ip, (unsigned long)ftrace_return);
+ /* The trampoline ends with a jmp to ftrace_epilogue */
+ jmp = ftrace_jmp_replace(ip, (unsigned long)ftrace_epilogue);
memcpy(trampoline + size, jmp, MCOUNT_INSN_SIZE);
/*
diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c
index 2c0f3407b..1f4422d5c 100644
--- a/arch/x86/kernel/head64.c
+++ b/arch/x86/kernel/head64.c
@@ -40,13 +40,8 @@ pmdval_t early_pmd_flags = __PAGE_KERNEL_LARGE & ~(_PAGE_GLOBAL | _PAGE_NX);
/* Wipe all early page tables except for the kernel symbol map */
static void __init reset_early_page_tables(void)
{
- unsigned long i;
-
- for (i = 0; i < PTRS_PER_PGD-1; i++)
- early_level4_pgt[i].pgd = 0;
-
+ memset(early_level4_pgt, 0, sizeof(pgd_t)*(PTRS_PER_PGD-1));
next_early_pgt = 0;
-
write_cr3(__pa_nodebug(early_level4_pgt));
}
@@ -54,7 +49,6 @@ static void __init reset_early_page_tables(void)
int __init early_make_pgtable(unsigned long address)
{
unsigned long physaddr = address - __PAGE_OFFSET;
- unsigned long i;
pgdval_t pgd, *pgd_p;
pudval_t pud, *pud_p;
pmdval_t pmd, *pmd_p;
@@ -81,8 +75,7 @@ again:
}
pud_p = (pudval_t *)early_dynamic_pgts[next_early_pgt++];
- for (i = 0; i < PTRS_PER_PUD; i++)
- pud_p[i] = 0;
+ memset(pud_p, 0, sizeof(*pud_p) * PTRS_PER_PUD);
*pgd_p = (pgdval_t)pud_p - __START_KERNEL_map + phys_base + _KERNPG_TABLE;
}
pud_p += pud_index(address);
@@ -97,8 +90,7 @@ again:
}
pmd_p = (pmdval_t *)early_dynamic_pgts[next_early_pgt++];
- for (i = 0; i < PTRS_PER_PMD; i++)
- pmd_p[i] = 0;
+ memset(pmd_p, 0, sizeof(*pmd_p) * PTRS_PER_PMD);
*pud_p = (pudval_t)pmd_p - __START_KERNEL_map + phys_base + _KERNPG_TABLE;
}
pmd = (physaddr & PMD_MASK) + early_pmd_flags;
diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S
index 6bc9ae24b..af1112980 100644
--- a/arch/x86/kernel/head_32.S
+++ b/arch/x86/kernel/head_32.S
@@ -19,7 +19,7 @@
#include <asm/setup.h>
#include <asm/processor-flags.h>
#include <asm/msr-index.h>
-#include <asm/cpufeature.h>
+#include <asm/cpufeatures.h>
#include <asm/percpu.h>
#include <asm/nops.h>
#include <asm/bootparam.h>
diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S
index ffdc0e860..22fbf9df6 100644
--- a/arch/x86/kernel/head_64.S
+++ b/arch/x86/kernel/head_64.S
@@ -38,7 +38,6 @@
#define pud_index(x) (((x) >> PUD_SHIFT) & (PTRS_PER_PUD-1))
L4_PAGE_OFFSET = pgd_index(__PAGE_OFFSET)
-L3_PAGE_OFFSET = pud_index(__PAGE_OFFSET)
L4_START_KERNEL = pgd_index(__START_KERNEL_map)
L3_START_KERNEL = pud_index(__START_KERNEL_map)
@@ -76,9 +75,7 @@ startup_64:
subq $_text - __START_KERNEL_map, %rbp
/* Is the address not 2M aligned? */
- movq %rbp, %rax
- andl $~PMD_PAGE_MASK, %eax
- testl %eax, %eax
+ testl $~PMD_PAGE_MASK, %ebp
jnz bad_address
/*
diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c
index b8e6ff5cd..a1f0e4a5c 100644
--- a/arch/x86/kernel/hpet.c
+++ b/arch/x86/kernel/hpet.c
@@ -12,6 +12,7 @@
#include <linux/pm.h>
#include <linux/io.h>
+#include <asm/cpufeature.h>
#include <asm/irqdomain.h>
#include <asm/fixmap.h>
#include <asm/hpet.h>
@@ -716,7 +717,7 @@ static int hpet_cpuhp_notify(struct notifier_block *n,
struct hpet_work_struct work;
struct hpet_dev *hdev = per_cpu(cpu_hpet_dev, cpu);
- switch (action & 0xf) {
+ switch (action & ~CPU_TASKS_FROZEN) {
case CPU_ONLINE:
INIT_DELAYED_WORK_ONSTACK(&work.work, hpet_work);
init_completion(&work.complete);
diff --git a/arch/x86/kernel/kexec-bzimage64.c b/arch/x86/kernel/kexec-bzimage64.c
index 0f8a6bbaa..2af478e3f 100644
--- a/arch/x86/kernel/kexec-bzimage64.c
+++ b/arch/x86/kernel/kexec-bzimage64.c
@@ -271,7 +271,7 @@ static int bzImage64_probe(const char *buf, unsigned long len)
int ret = -ENOEXEC;
struct setup_header *header;
- /* kernel should be atleast two sectors long */
+ /* kernel should be at least two sectors long */
if (len < 2 * 512) {
pr_err("File is too short to be a bzImage\n");
return ret;
diff --git a/arch/x86/kernel/kgdb.c b/arch/x86/kernel/kgdb.c
index 44256a627..2da6ee9ae 100644
--- a/arch/x86/kernel/kgdb.c
+++ b/arch/x86/kernel/kgdb.c
@@ -609,9 +609,9 @@ static struct notifier_block kgdb_notifier = {
};
/**
- * kgdb_arch_init - Perform any architecture specific initalization.
+ * kgdb_arch_init - Perform any architecture specific initialization.
*
- * This function will handle the initalization of any architecture
+ * This function will handle the initialization of any architecture
* specific callbacks.
*/
int kgdb_arch_init(void)
@@ -750,9 +750,7 @@ void kgdb_arch_set_pc(struct pt_regs *regs, unsigned long ip)
int kgdb_arch_set_breakpoint(struct kgdb_bkpt *bpt)
{
int err;
-#ifdef CONFIG_DEBUG_RODATA
char opc[BREAK_INSTR_SIZE];
-#endif /* CONFIG_DEBUG_RODATA */
bpt->type = BP_BREAKPOINT;
err = probe_kernel_read(bpt->saved_instr, (char *)bpt->bpt_addr,
@@ -761,7 +759,6 @@ int kgdb_arch_set_breakpoint(struct kgdb_bkpt *bpt)
return err;
err = probe_kernel_write((char *)bpt->bpt_addr,
arch_kgdb_ops.gdb_bpt_instr, BREAK_INSTR_SIZE);
-#ifdef CONFIG_DEBUG_RODATA
if (!err)
return err;
/*
@@ -778,13 +775,12 @@ int kgdb_arch_set_breakpoint(struct kgdb_bkpt *bpt)
if (memcmp(opc, arch_kgdb_ops.gdb_bpt_instr, BREAK_INSTR_SIZE))
return -EINVAL;
bpt->type = BP_POKE_BREAKPOINT;
-#endif /* CONFIG_DEBUG_RODATA */
+
return err;
}
int kgdb_arch_remove_breakpoint(struct kgdb_bkpt *bpt)
{
-#ifdef CONFIG_DEBUG_RODATA
int err;
char opc[BREAK_INSTR_SIZE];
@@ -801,8 +797,8 @@ int kgdb_arch_remove_breakpoint(struct kgdb_bkpt *bpt)
if (err || memcmp(opc, bpt->saved_instr, BREAK_INSTR_SIZE))
goto knl_write;
return err;
+
knl_write:
-#endif /* CONFIG_DEBUG_RODATA */
return probe_kernel_write((char *)bpt->bpt_addr,
(char *)bpt->saved_instr, BREAK_INSTR_SIZE);
}
diff --git a/arch/x86/kernel/kprobes/core.c b/arch/x86/kernel/kprobes/core.c
index 1deffe6cc..ae703acb8 100644
--- a/arch/x86/kernel/kprobes/core.c
+++ b/arch/x86/kernel/kprobes/core.c
@@ -49,6 +49,7 @@
#include <linux/kdebug.h>
#include <linux/kallsyms.h>
#include <linux/ftrace.h>
+#include <linux/frame.h>
#include <asm/cacheflush.h>
#include <asm/desc.h>
@@ -671,39 +672,39 @@ NOKPROBE_SYMBOL(kprobe_int3_handler);
* When a retprobed function returns, this code saves registers and
* calls trampoline_handler() runs, which calls the kretprobe's handler.
*/
-static void __used kretprobe_trampoline_holder(void)
-{
- asm volatile (
- ".global kretprobe_trampoline\n"
- "kretprobe_trampoline: \n"
+asm(
+ ".global kretprobe_trampoline\n"
+ ".type kretprobe_trampoline, @function\n"
+ "kretprobe_trampoline:\n"
#ifdef CONFIG_X86_64
- /* We don't bother saving the ss register */
- " pushq %rsp\n"
- " pushfq\n"
- SAVE_REGS_STRING
- " movq %rsp, %rdi\n"
- " call trampoline_handler\n"
- /* Replace saved sp with true return address. */
- " movq %rax, 152(%rsp)\n"
- RESTORE_REGS_STRING
- " popfq\n"
+ /* We don't bother saving the ss register */
+ " pushq %rsp\n"
+ " pushfq\n"
+ SAVE_REGS_STRING
+ " movq %rsp, %rdi\n"
+ " call trampoline_handler\n"
+ /* Replace saved sp with true return address. */
+ " movq %rax, 152(%rsp)\n"
+ RESTORE_REGS_STRING
+ " popfq\n"
#else
- " pushf\n"
- SAVE_REGS_STRING
- " movl %esp, %eax\n"
- " call trampoline_handler\n"
- /* Move flags to cs */
- " movl 56(%esp), %edx\n"
- " movl %edx, 52(%esp)\n"
- /* Replace saved flags with true return address. */
- " movl %eax, 56(%esp)\n"
- RESTORE_REGS_STRING
- " popf\n"
+ " pushf\n"
+ SAVE_REGS_STRING
+ " movl %esp, %eax\n"
+ " call trampoline_handler\n"
+ /* Move flags to cs */
+ " movl 56(%esp), %edx\n"
+ " movl %edx, 52(%esp)\n"
+ /* Replace saved flags with true return address. */
+ " movl %eax, 56(%esp)\n"
+ RESTORE_REGS_STRING
+ " popf\n"
#endif
- " ret\n");
-}
-NOKPROBE_SYMBOL(kretprobe_trampoline_holder);
+ " ret\n"
+ ".size kretprobe_trampoline, .-kretprobe_trampoline\n"
+);
NOKPROBE_SYMBOL(kretprobe_trampoline);
+STACK_FRAME_NON_STANDARD(kretprobe_trampoline);
/*
* Called from kretprobe_trampoline
@@ -988,7 +989,7 @@ int kprobe_fault_handler(struct pt_regs *regs, int trapnr)
* In case the user-specified fault handler returned
* zero, try to fix up.
*/
- if (fixup_exception(regs))
+ if (fixup_exception(regs, trapnr))
return 1;
/*
diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c
index 47190bd39..807950860 100644
--- a/arch/x86/kernel/kvm.c
+++ b/arch/x86/kernel/kvm.c
@@ -36,6 +36,7 @@
#include <linux/kprobes.h>
#include <linux/debugfs.h>
#include <linux/nmi.h>
+#include <linux/swait.h>
#include <asm/timer.h>
#include <asm/cpu.h>
#include <asm/traps.h>
@@ -91,14 +92,14 @@ static void kvm_io_delay(void)
struct kvm_task_sleep_node {
struct hlist_node link;
- wait_queue_head_t wq;
+ struct swait_queue_head wq;
u32 token;
int cpu;
bool halted;
};
static struct kvm_task_sleep_head {
- spinlock_t lock;
+ raw_spinlock_t lock;
struct hlist_head list;
} async_pf_sleepers[KVM_TASK_SLEEP_HASHSIZE];
@@ -122,17 +123,17 @@ void kvm_async_pf_task_wait(u32 token)
u32 key = hash_32(token, KVM_TASK_SLEEP_HASHBITS);
struct kvm_task_sleep_head *b = &async_pf_sleepers[key];
struct kvm_task_sleep_node n, *e;
- DEFINE_WAIT(wait);
+ DECLARE_SWAITQUEUE(wait);
rcu_irq_enter();
- spin_lock(&b->lock);
+ raw_spin_lock(&b->lock);
e = _find_apf_task(b, token);
if (e) {
/* dummy entry exist -> wake up was delivered ahead of PF */
hlist_del(&e->link);
kfree(e);
- spin_unlock(&b->lock);
+ raw_spin_unlock(&b->lock);
rcu_irq_exit();
return;
@@ -141,13 +142,13 @@ void kvm_async_pf_task_wait(u32 token)
n.token = token;
n.cpu = smp_processor_id();
n.halted = is_idle_task(current) || preempt_count() > 1;
- init_waitqueue_head(&n.wq);
+ init_swait_queue_head(&n.wq);
hlist_add_head(&n.link, &b->list);
- spin_unlock(&b->lock);
+ raw_spin_unlock(&b->lock);
for (;;) {
if (!n.halted)
- prepare_to_wait(&n.wq, &wait, TASK_UNINTERRUPTIBLE);
+ prepare_to_swait(&n.wq, &wait, TASK_UNINTERRUPTIBLE);
if (hlist_unhashed(&n.link))
break;
@@ -166,7 +167,7 @@ void kvm_async_pf_task_wait(u32 token)
}
}
if (!n.halted)
- finish_wait(&n.wq, &wait);
+ finish_swait(&n.wq, &wait);
rcu_irq_exit();
return;
@@ -178,8 +179,8 @@ static void apf_task_wake_one(struct kvm_task_sleep_node *n)
hlist_del_init(&n->link);
if (n->halted)
smp_send_reschedule(n->cpu);
- else if (waitqueue_active(&n->wq))
- wake_up(&n->wq);
+ else if (swait_active(&n->wq))
+ swake_up(&n->wq);
}
static void apf_task_wake_all(void)
@@ -189,14 +190,14 @@ static void apf_task_wake_all(void)
for (i = 0; i < KVM_TASK_SLEEP_HASHSIZE; i++) {
struct hlist_node *p, *next;
struct kvm_task_sleep_head *b = &async_pf_sleepers[i];
- spin_lock(&b->lock);
+ raw_spin_lock(&b->lock);
hlist_for_each_safe(p, next, &b->list) {
struct kvm_task_sleep_node *n =
hlist_entry(p, typeof(*n), link);
if (n->cpu == smp_processor_id())
apf_task_wake_one(n);
}
- spin_unlock(&b->lock);
+ raw_spin_unlock(&b->lock);
}
}
@@ -212,7 +213,7 @@ void kvm_async_pf_task_wake(u32 token)
}
again:
- spin_lock(&b->lock);
+ raw_spin_lock(&b->lock);
n = _find_apf_task(b, token);
if (!n) {
/*
@@ -225,17 +226,17 @@ again:
* Allocation failed! Busy wait while other cpu
* handles async PF.
*/
- spin_unlock(&b->lock);
+ raw_spin_unlock(&b->lock);
cpu_relax();
goto again;
}
n->token = token;
n->cpu = smp_processor_id();
- init_waitqueue_head(&n->wq);
+ init_swait_queue_head(&n->wq);
hlist_add_head(&n->link, &b->list);
} else
apf_task_wake_one(n);
- spin_unlock(&b->lock);
+ raw_spin_unlock(&b->lock);
return;
}
EXPORT_SYMBOL_GPL(kvm_async_pf_task_wake);
@@ -486,7 +487,7 @@ void __init kvm_guest_init(void)
paravirt_ops_setup();
register_reboot_notifier(&kvm_pv_reboot_nb);
for (i = 0; i < KVM_TASK_SLEEP_HASHSIZE; i++)
- spin_lock_init(&async_pf_sleepers[i].lock);
+ raw_spin_lock_init(&async_pf_sleepers[i].lock);
if (kvm_para_has_feature(KVM_FEATURE_ASYNC_PF))
x86_init.irqs.trap_init = kvm_apf_trap_init;
diff --git a/arch/x86/kernel/kvmclock.c b/arch/x86/kernel/kvmclock.c
index 72cef5869..1d39bfbd2 100644
--- a/arch/x86/kernel/kvmclock.c
+++ b/arch/x86/kernel/kvmclock.c
@@ -226,7 +226,7 @@ static void kvm_setup_secondary_clock(void)
* registered memory location. If the guest happens to shutdown, this memory
* won't be valid. In cases like kexec, in which you install a new kernel, this
* means a random memory location will be kept being written. So before any
- * kind of shutdown from our side, we unregister the clock by writting anything
+ * kind of shutdown from our side, we unregister the clock by writing anything
* that does not have the 'enable' bit set in the msr
*/
#ifdef CONFIG_KEXEC_CORE
diff --git a/arch/x86/kernel/ldt.c b/arch/x86/kernel/ldt.c
index 6acc9dd91..6707039b9 100644
--- a/arch/x86/kernel/ldt.c
+++ b/arch/x86/kernel/ldt.c
@@ -103,7 +103,7 @@ static void free_ldt_struct(struct ldt_struct *ldt)
* we do not have to muck with descriptors here, that is
* done in switch_mm() as needed.
*/
-int init_new_context(struct task_struct *tsk, struct mm_struct *mm)
+int init_new_context_ldt(struct task_struct *tsk, struct mm_struct *mm)
{
struct ldt_struct *new_ldt;
struct mm_struct *old_mm;
@@ -144,7 +144,7 @@ out_unlock:
*
* 64bit: Don't touch the LDT register - we're already in the next thread.
*/
-void destroy_context(struct mm_struct *mm)
+void destroy_context_ldt(struct mm_struct *mm)
{
free_ldt_struct(mm->context.ldt);
mm->context.ldt = NULL;
diff --git a/arch/x86/kernel/mcount_64.S b/arch/x86/kernel/mcount_64.S
index 87e1762e2..ed48a9f46 100644
--- a/arch/x86/kernel/mcount_64.S
+++ b/arch/x86/kernel/mcount_64.S
@@ -168,12 +168,14 @@ GLOBAL(ftrace_call)
restore_mcount_regs
/*
- * The copied trampoline must call ftrace_return as it
+ * The copied trampoline must call ftrace_epilogue as it
* still may need to call the function graph tracer.
+ *
+ * The code up to this label is copied into trampolines so
+ * think twice before adding any new code or changing the
+ * layout here.
*/
-GLOBAL(ftrace_caller_end)
-
-GLOBAL(ftrace_return)
+GLOBAL(ftrace_epilogue)
#ifdef CONFIG_FUNCTION_GRAPH_TRACER
GLOBAL(ftrace_graph_call)
@@ -244,14 +246,14 @@ GLOBAL(ftrace_regs_call)
popfq
/*
- * As this jmp to ftrace_return can be a short jump
+ * As this jmp to ftrace_epilogue can be a short jump
* it must not be copied into the trampoline.
* The trampoline will add the code to jump
* to the return.
*/
GLOBAL(ftrace_regs_caller_end)
- jmp ftrace_return
+ jmp ftrace_epilogue
END(ftrace_regs_caller)
diff --git a/arch/x86/kernel/mpparse.c b/arch/x86/kernel/mpparse.c
index 30ca7607c..97340f2c4 100644
--- a/arch/x86/kernel/mpparse.c
+++ b/arch/x86/kernel/mpparse.c
@@ -408,7 +408,7 @@ static inline void __init construct_default_ISA_mptable(int mpc_default_type)
processor.cpuflag = CPU_ENABLED;
processor.cpufeature = (boot_cpu_data.x86 << 8) |
(boot_cpu_data.x86_model << 4) | boot_cpu_data.x86_mask;
- processor.featureflag = boot_cpu_data.x86_capability[0];
+ processor.featureflag = boot_cpu_data.x86_capability[CPUID_1_EDX];
processor.reserved[0] = 0;
processor.reserved[1] = 0;
for (i = 0; i < 2; i++) {
diff --git a/arch/x86/kernel/msr.c b/arch/x86/kernel/msr.c
index 64f9616f9..7f3550acd 100644
--- a/arch/x86/kernel/msr.c
+++ b/arch/x86/kernel/msr.c
@@ -40,7 +40,7 @@
#include <linux/uaccess.h>
#include <linux/gfp.h>
-#include <asm/processor.h>
+#include <asm/cpufeature.h>
#include <asm/msr.h>
static struct class *msr_class;
diff --git a/arch/x86/kernel/nmi.c b/arch/x86/kernel/nmi.c
index 8a2cdd736..04b132a76 100644
--- a/arch/x86/kernel/nmi.c
+++ b/arch/x86/kernel/nmi.c
@@ -30,6 +30,7 @@
#include <asm/nmi.h>
#include <asm/x86_init.h>
#include <asm/reboot.h>
+#include <asm/cache.h>
#define CREATE_TRACE_POINTS
#include <trace/events/nmi.h>
@@ -69,7 +70,7 @@ struct nmi_stats {
static DEFINE_PER_CPU(struct nmi_stats, nmi_stats);
-static int ignore_nmis;
+static int ignore_nmis __read_mostly;
int unknown_nmi_panic;
/*
diff --git a/arch/x86/kernel/pmem.c b/arch/x86/kernel/pmem.c
index 14415aff1..92f70147a 100644
--- a/arch/x86/kernel/pmem.c
+++ b/arch/x86/kernel/pmem.c
@@ -13,11 +13,11 @@ static int found(u64 start, u64 end, void *data)
static __init int register_e820_pmem(void)
{
- char *pmem = "Persistent Memory (legacy)";
struct platform_device *pdev;
int rc;
- rc = walk_iomem_res(pmem, IORESOURCE_MEM, 0, -1, NULL, found);
+ rc = walk_iomem_res_desc(IORES_DESC_PERSISTENT_MEMORY_LEGACY,
+ IORESOURCE_MEM, 0, -1, NULL, found);
if (rc <= 0)
return 0;
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index 9f7c21c22..2915d54e9 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -57,6 +57,9 @@ __visible DEFINE_PER_CPU_SHARED_ALIGNED(struct tss_struct, cpu_tss) = {
*/
.io_bitmap = { [0 ... IO_BITMAP_LONGS] = ~0 },
#endif
+#ifdef CONFIG_X86_32
+ .SYSENTER_stack_canary = STACK_END_MAGIC,
+#endif
};
EXPORT_PER_CPU_SYMBOL(cpu_tss);
@@ -418,9 +421,9 @@ static void mwait_idle(void)
if (!current_set_polling_and_test()) {
trace_cpu_idle_rcuidle(1, smp_processor_id());
if (this_cpu_has(X86_BUG_CLFLUSH_MONITOR)) {
- smp_mb(); /* quirk */
+ mb(); /* quirk */
clflush((void *)&current_thread_info()->flags);
- smp_mb(); /* quirk */
+ mb(); /* quirk */
}
__monitor((void *)&current_thread_info()->flags, 0, 0);
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index 9f7518760..6cbab31ac 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -117,6 +117,8 @@ void __show_regs(struct pt_regs *regs, int all)
printk(KERN_DEFAULT "DR0: %016lx DR1: %016lx DR2: %016lx\n", d0, d1, d2);
printk(KERN_DEFAULT "DR3: %016lx DR6: %016lx DR7: %016lx\n", d3, d6, d7);
+ if (boot_cpu_has(X86_FEATURE_OSPKE))
+ printk(KERN_DEFAULT "PKRU: %08x\n", read_pkru());
}
void release_thread(struct task_struct *dead_task)
@@ -488,7 +490,7 @@ void set_personality_ia32(bool x32)
if (current->mm)
current->mm->context.ia32_compat = TIF_X32;
current->personality &= ~READ_IMPLIES_EXEC;
- /* is_compat_task() uses the presence of the x32
+ /* in_compat_syscall() uses the presence of the x32
syscall bit flag to determine compat status */
current_thread_info()->status &= ~TS_COMPAT;
} else {
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index d3d80e6d4..2367ae07e 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -112,6 +112,7 @@
#include <asm/alternative.h>
#include <asm/prom.h>
#include <asm/microcode.h>
+#include <asm/mmu_context.h>
/*
* max_low_pfn_mapped: highest direct mapped pfn under 4GB
@@ -152,21 +153,21 @@ static struct resource data_resource = {
.name = "Kernel data",
.start = 0,
.end = 0,
- .flags = IORESOURCE_BUSY | IORESOURCE_MEM
+ .flags = IORESOURCE_BUSY | IORESOURCE_SYSTEM_RAM
};
static struct resource code_resource = {
.name = "Kernel code",
.start = 0,
.end = 0,
- .flags = IORESOURCE_BUSY | IORESOURCE_MEM
+ .flags = IORESOURCE_BUSY | IORESOURCE_SYSTEM_RAM
};
static struct resource bss_resource = {
.name = "Kernel bss",
.start = 0,
.end = 0,
- .flags = IORESOURCE_BUSY | IORESOURCE_MEM
+ .flags = IORESOURCE_BUSY | IORESOURCE_SYSTEM_RAM
};
@@ -1282,3 +1283,11 @@ static int __init register_kernel_offset_dumper(void)
return 0;
}
__initcall(register_kernel_offset_dumper);
+
+void arch_show_smap(struct seq_file *m, struct vm_area_struct *vma)
+{
+ if (!boot_cpu_has(X86_FEATURE_OSPKE))
+ return;
+
+ seq_printf(m, "ProtectionKey: %8u\n", vma_pkey(vma));
+}
diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c
index cb6282c36..548ddf7d6 100644
--- a/arch/x86/kernel/signal.c
+++ b/arch/x86/kernel/signal.c
@@ -61,7 +61,38 @@
regs->seg = GET_SEG(seg) | 3; \
} while (0)
-int restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc)
+#ifdef CONFIG_X86_64
+/*
+ * If regs->ss will cause an IRET fault, change it. Otherwise leave it
+ * alone. Using this generally makes no sense unless
+ * user_64bit_mode(regs) would return true.
+ */
+static void force_valid_ss(struct pt_regs *regs)
+{
+ u32 ar;
+ asm volatile ("lar %[old_ss], %[ar]\n\t"
+ "jz 1f\n\t" /* If invalid: */
+ "xorl %[ar], %[ar]\n\t" /* set ar = 0 */
+ "1:"
+ : [ar] "=r" (ar)
+ : [old_ss] "rm" ((u16)regs->ss));
+
+ /*
+ * For a valid 64-bit user context, we need DPL 3, type
+ * read-write data or read-write exp-down data, and S and P
+ * set. We can't use VERW because VERW doesn't check the
+ * P bit.
+ */
+ ar &= AR_DPL_MASK | AR_S | AR_P | AR_TYPE_MASK;
+ if (ar != (AR_DPL3 | AR_S | AR_P | AR_TYPE_RWDATA) &&
+ ar != (AR_DPL3 | AR_S | AR_P | AR_TYPE_RWDATA_EXPDOWN))
+ regs->ss = __USER_DS;
+}
+#endif
+
+static int restore_sigcontext(struct pt_regs *regs,
+ struct sigcontext __user *sc,
+ unsigned long uc_flags)
{
unsigned long buf_val;
void __user *buf;
@@ -94,15 +125,18 @@ int restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc)
COPY(r15);
#endif /* CONFIG_X86_64 */
-#ifdef CONFIG_X86_32
COPY_SEG_CPL3(cs);
COPY_SEG_CPL3(ss);
-#else /* !CONFIG_X86_32 */
- /* Kernel saves and restores only the CS segment register on signals,
- * which is the bare minimum needed to allow mixed 32/64-bit code.
- * App's signal handler can save/restore other segments if needed. */
- COPY_SEG_CPL3(cs);
-#endif /* CONFIG_X86_32 */
+
+#ifdef CONFIG_X86_64
+ /*
+ * Fix up SS if needed for the benefit of old DOSEMU and
+ * CRIU.
+ */
+ if (unlikely(!(uc_flags & UC_STRICT_RESTORE_SS) &&
+ user_64bit_mode(regs)))
+ force_valid_ss(regs);
+#endif
get_user_ex(tmpflags, &sc->flags);
regs->flags = (regs->flags & ~FIX_EFLAGS) | (tmpflags & FIX_EFLAGS);
@@ -165,6 +199,7 @@ int setup_sigcontext(struct sigcontext __user *sc, void __user *fpstate,
put_user_ex(regs->cs, &sc->cs);
put_user_ex(0, &sc->gs);
put_user_ex(0, &sc->fs);
+ put_user_ex(regs->ss, &sc->ss);
#endif /* CONFIG_X86_32 */
put_user_ex(fpstate, &sc->fpstate);
@@ -403,6 +438,21 @@ static int __setup_rt_frame(int sig, struct ksignal *ksig,
return 0;
}
#else /* !CONFIG_X86_32 */
+static unsigned long frame_uc_flags(struct pt_regs *regs)
+{
+ unsigned long flags;
+
+ if (cpu_has_xsave)
+ flags = UC_FP_XSTATE | UC_SIGCONTEXT_SS;
+ else
+ flags = UC_SIGCONTEXT_SS;
+
+ if (likely(user_64bit_mode(regs)))
+ flags |= UC_STRICT_RESTORE_SS;
+
+ return flags;
+}
+
static int __setup_rt_frame(int sig, struct ksignal *ksig,
sigset_t *set, struct pt_regs *regs)
{
@@ -422,10 +472,7 @@ static int __setup_rt_frame(int sig, struct ksignal *ksig,
put_user_try {
/* Create the ucontext. */
- if (cpu_has_xsave)
- put_user_ex(UC_FP_XSTATE, &frame->uc.uc_flags);
- else
- put_user_ex(0, &frame->uc.uc_flags);
+ put_user_ex(frame_uc_flags(regs), &frame->uc.uc_flags);
put_user_ex(0, &frame->uc.uc_link);
save_altstack_ex(&frame->uc.uc_stack, regs->sp);
@@ -459,10 +506,28 @@ static int __setup_rt_frame(int sig, struct ksignal *ksig,
regs->sp = (unsigned long)frame;
- /* Set up the CS register to run signal handlers in 64-bit mode,
- even if the handler happens to be interrupting 32-bit code. */
+ /*
+ * Set up the CS and SS registers to run signal handlers in
+ * 64-bit mode, even if the handler happens to be interrupting
+ * 32-bit or 16-bit code.
+ *
+ * SS is subtle. In 64-bit mode, we don't need any particular
+ * SS descriptor, but we do need SS to be valid. It's possible
+ * that the old SS is entirely bogus -- this can happen if the
+ * signal we're trying to deliver is #GP or #SS caused by a bad
+ * SS value. We also have a compatbility issue here: DOSEMU
+ * relies on the contents of the SS register indicating the
+ * SS value at the time of the signal, even though that code in
+ * DOSEMU predates sigreturn's ability to restore SS. (DOSEMU
+ * avoids relying on sigreturn to restore SS; instead it uses
+ * a trampoline.) So we do our best: if the old SS was valid,
+ * we keep it. Otherwise we replace it.
+ */
regs->cs = __USER_CS;
+ if (unlikely(regs->ss != __USER_DS))
+ force_valid_ss(regs);
+
return 0;
}
#endif /* CONFIG_X86_32 */
@@ -489,10 +554,7 @@ static int x32_setup_rt_frame(struct ksignal *ksig,
put_user_try {
/* Create the ucontext. */
- if (cpu_has_xsave)
- put_user_ex(UC_FP_XSTATE, &frame->uc.uc_flags);
- else
- put_user_ex(0, &frame->uc.uc_flags);
+ put_user_ex(frame_uc_flags(regs), &frame->uc.uc_flags);
put_user_ex(0, &frame->uc.uc_link);
compat_save_altstack_ex(&frame->uc.uc_stack, regs->sp);
put_user_ex(0, &frame->uc.uc__pad0);
@@ -554,7 +616,11 @@ asmlinkage unsigned long sys_sigreturn(void)
set_current_blocked(&set);
- if (restore_sigcontext(regs, &frame->sc))
+ /*
+ * x86_32 has no uc_flags bits relevant to restore_sigcontext.
+ * Save a few cycles by skipping the __get_user.
+ */
+ if (restore_sigcontext(regs, &frame->sc, 0))
goto badframe;
return regs->ax;
@@ -570,16 +636,19 @@ asmlinkage long sys_rt_sigreturn(void)
struct pt_regs *regs = current_pt_regs();
struct rt_sigframe __user *frame;
sigset_t set;
+ unsigned long uc_flags;
frame = (struct rt_sigframe __user *)(regs->sp - sizeof(long));
if (!access_ok(VERIFY_READ, frame, sizeof(*frame)))
goto badframe;
if (__copy_from_user(&set, &frame->uc.uc_sigmask, sizeof(set)))
goto badframe;
+ if (__get_user(uc_flags, &frame->uc.uc_flags))
+ goto badframe;
set_current_blocked(&set);
- if (restore_sigcontext(regs, &frame->uc.uc_mcontext))
+ if (restore_sigcontext(regs, &frame->uc.uc_mcontext, uc_flags))
goto badframe;
if (restore_altstack(&frame->uc.uc_stack))
@@ -692,12 +761,15 @@ handle_signal(struct ksignal *ksig, struct pt_regs *regs)
static inline unsigned long get_nr_restart_syscall(const struct pt_regs *regs)
{
-#if defined(CONFIG_X86_32) || !defined(CONFIG_X86_64)
+#ifdef CONFIG_X86_64
+ if (is_ia32_task())
+ return __NR_ia32_restart_syscall;
+#endif
+#ifdef CONFIG_X86_X32_ABI
+ return __NR_restart_syscall | (regs->orig_ax & __X32_SYSCALL_BIT);
+#else
return __NR_restart_syscall;
-#else /* !CONFIG_X86_32 && CONFIG_X86_64 */
- return test_thread_flag(TIF_IA32) ? __NR_ia32_restart_syscall :
- __NR_restart_syscall | (regs->orig_ax & __X32_SYSCALL_BIT);
-#endif /* CONFIG_X86_32 || !CONFIG_X86_64 */
+#endif
}
/*
@@ -763,6 +835,7 @@ asmlinkage long sys32_x32_rt_sigreturn(void)
struct pt_regs *regs = current_pt_regs();
struct rt_sigframe_x32 __user *frame;
sigset_t set;
+ unsigned long uc_flags;
frame = (struct rt_sigframe_x32 __user *)(regs->sp - 8);
@@ -770,10 +843,12 @@ asmlinkage long sys32_x32_rt_sigreturn(void)
goto badframe;
if (__copy_from_user(&set, &frame->uc.uc_sigmask, sizeof(set)))
goto badframe;
+ if (__get_user(uc_flags, &frame->uc.uc_flags))
+ goto badframe;
set_current_blocked(&set);
- if (restore_sigcontext(regs, &frame->uc.uc_mcontext))
+ if (restore_sigcontext(regs, &frame->uc.uc_mcontext, uc_flags))
goto badframe;
if (compat_restore_altstack(&frame->uc.uc_stack))
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index 24d57f77b..0e4329ed9 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -97,6 +97,14 @@ DEFINE_PER_CPU_READ_MOSTLY(cpumask_var_t, cpu_llc_shared_map);
DEFINE_PER_CPU_READ_MOSTLY(struct cpuinfo_x86, cpu_info);
EXPORT_PER_CPU_SYMBOL(cpu_info);
+/* Logical package management. We might want to allocate that dynamically */
+static int *physical_to_logical_pkg __read_mostly;
+static unsigned long *physical_package_map __read_mostly;;
+static unsigned long *logical_package_map __read_mostly;
+static unsigned int max_physical_pkg_id __read_mostly;
+unsigned int __max_logical_packages __read_mostly;
+EXPORT_SYMBOL(__max_logical_packages);
+
static inline void smpboot_setup_warm_reset_vector(unsigned long start_eip)
{
unsigned long flags;
@@ -248,7 +256,116 @@ static void notrace start_secondary(void *unused)
x86_cpuinit.setup_percpu_clockev();
wmb();
- cpu_startup_entry(CPUHP_ONLINE);
+ cpu_startup_entry(CPUHP_AP_ONLINE_IDLE);
+}
+
+int topology_update_package_map(unsigned int apicid, unsigned int cpu)
+{
+ unsigned int new, pkg = apicid >> boot_cpu_data.x86_coreid_bits;
+
+ /* Called from early boot ? */
+ if (!physical_package_map)
+ return 0;
+
+ if (pkg >= max_physical_pkg_id)
+ return -EINVAL;
+
+ /* Set the logical package id */
+ if (test_and_set_bit(pkg, physical_package_map))
+ goto found;
+
+ new = find_first_zero_bit(logical_package_map, __max_logical_packages);
+ if (new >= __max_logical_packages) {
+ physical_to_logical_pkg[pkg] = -1;
+ pr_warn("APIC(%x) Package %u exceeds logical package map\n",
+ apicid, pkg);
+ return -ENOSPC;
+ }
+ set_bit(new, logical_package_map);
+ pr_info("APIC(%x) Converting physical %u to logical package %u\n",
+ apicid, pkg, new);
+ physical_to_logical_pkg[pkg] = new;
+
+found:
+ cpu_data(cpu).logical_proc_id = physical_to_logical_pkg[pkg];
+ return 0;
+}
+
+/**
+ * topology_phys_to_logical_pkg - Map a physical package id to a logical
+ *
+ * Returns logical package id or -1 if not found
+ */
+int topology_phys_to_logical_pkg(unsigned int phys_pkg)
+{
+ if (phys_pkg >= max_physical_pkg_id)
+ return -1;
+ return physical_to_logical_pkg[phys_pkg];
+}
+EXPORT_SYMBOL(topology_phys_to_logical_pkg);
+
+static void __init smp_init_package_map(void)
+{
+ unsigned int ncpus, cpu;
+ size_t size;
+
+ /*
+ * Today neither Intel nor AMD support heterogenous systems. That
+ * might change in the future....
+ *
+ * While ideally we'd want '* smp_num_siblings' in the below @ncpus
+ * computation, this won't actually work since some Intel BIOSes
+ * report inconsistent HT data when they disable HT.
+ *
+ * In particular, they reduce the APIC-IDs to only include the cores,
+ * but leave the CPUID topology to say there are (2) siblings.
+ * This means we don't know how many threads there will be until
+ * after the APIC enumeration.
+ *
+ * By not including this we'll sometimes over-estimate the number of
+ * logical packages by the amount of !present siblings, but this is
+ * still better than MAX_LOCAL_APIC.
+ *
+ * We use total_cpus not nr_cpu_ids because nr_cpu_ids can be limited
+ * on the command line leading to a similar issue as the HT disable
+ * problem because the hyperthreads are usually enumerated after the
+ * primary cores.
+ */
+ ncpus = boot_cpu_data.x86_max_cores;
+ if (!ncpus) {
+ pr_warn("x86_max_cores == zero !?!?");
+ ncpus = 1;
+ }
+
+ __max_logical_packages = DIV_ROUND_UP(total_cpus, ncpus);
+
+ /*
+ * Possibly larger than what we need as the number of apic ids per
+ * package can be smaller than the actual used apic ids.
+ */
+ max_physical_pkg_id = DIV_ROUND_UP(MAX_LOCAL_APIC, ncpus);
+ size = max_physical_pkg_id * sizeof(unsigned int);
+ physical_to_logical_pkg = kmalloc(size, GFP_KERNEL);
+ memset(physical_to_logical_pkg, 0xff, size);
+ size = BITS_TO_LONGS(max_physical_pkg_id) * sizeof(unsigned long);
+ physical_package_map = kzalloc(size, GFP_KERNEL);
+ size = BITS_TO_LONGS(__max_logical_packages) * sizeof(unsigned long);
+ logical_package_map = kzalloc(size, GFP_KERNEL);
+
+ pr_info("Max logical packages: %u\n", __max_logical_packages);
+
+ for_each_present_cpu(cpu) {
+ unsigned int apicid = apic->cpu_present_to_apicid(cpu);
+
+ if (apicid == BAD_APICID || !apic->apic_id_valid(apicid))
+ continue;
+ if (!topology_update_package_map(apicid, cpu))
+ continue;
+ pr_warn("CPU %u APICId %x disabled\n", cpu, apicid);
+ per_cpu(x86_bios_cpu_apicid, cpu) = BAD_APICID;
+ set_cpu_possible(cpu, false);
+ set_cpu_present(cpu, false);
+ }
}
void __init smp_store_boot_cpu_info(void)
@@ -258,6 +375,7 @@ void __init smp_store_boot_cpu_info(void)
*c = boot_cpu_data;
c->cpu_index = id;
+ smp_init_package_map();
}
/*
@@ -309,7 +427,7 @@ static bool match_smt(struct cpuinfo_x86 *c, struct cpuinfo_x86 *o)
if (c->phys_proc_id == o->phys_proc_id &&
per_cpu(cpu_llc_id, cpu1) == per_cpu(cpu_llc_id, cpu2) &&
- c->compute_unit_id == o->compute_unit_id)
+ c->cpu_core_id == o->cpu_core_id)
return topology_sane(c, o, "smt");
} else if (c->phys_proc_id == o->phys_proc_id &&
diff --git a/arch/x86/kernel/stacktrace.c b/arch/x86/kernel/stacktrace.c
index fdd0c6430..9ee98eefc 100644
--- a/arch/x86/kernel/stacktrace.c
+++ b/arch/x86/kernel/stacktrace.c
@@ -14,30 +14,34 @@ static int save_stack_stack(void *data, char *name)
return 0;
}
-static void
+static int
__save_stack_address(void *data, unsigned long addr, bool reliable, bool nosched)
{
struct stack_trace *trace = data;
#ifdef CONFIG_FRAME_POINTER
if (!reliable)
- return;
+ return 0;
#endif
if (nosched && in_sched_functions(addr))
- return;
+ return 0;
if (trace->skip > 0) {
trace->skip--;
- return;
+ return 0;
}
- if (trace->nr_entries < trace->max_entries)
+ if (trace->nr_entries < trace->max_entries) {
trace->entries[trace->nr_entries++] = addr;
+ return 0;
+ } else {
+ return -1; /* no more room, stop walking the stack */
+ }
}
-static void save_stack_address(void *data, unsigned long addr, int reliable)
+static int save_stack_address(void *data, unsigned long addr, int reliable)
{
return __save_stack_address(data, addr, reliable, false);
}
-static void
+static int
save_stack_address_nosched(void *data, unsigned long addr, int reliable)
{
return __save_stack_address(data, addr, reliable, true);
diff --git a/arch/x86/kernel/tboot.c b/arch/x86/kernel/tboot.c
index 91a4496db..e72a07f20 100644
--- a/arch/x86/kernel/tboot.c
+++ b/arch/x86/kernel/tboot.c
@@ -135,7 +135,7 @@ static int map_tboot_page(unsigned long vaddr, unsigned long pfn,
pmd = pmd_alloc(&tboot_mm, pud, vaddr);
if (!pmd)
return -1;
- pte = pte_alloc_map(&tboot_mm, NULL, pmd, vaddr);
+ pte = pte_alloc_map(&tboot_mm, pmd, vaddr);
if (!pte)
return -1;
set_pte_at(&tboot_mm, vaddr, pte, pfn_pte(pfn, prot));
diff --git a/arch/x86/kernel/test_nx.c b/arch/x86/kernel/test_nx.c
index 3f92ce07e..27538f183 100644
--- a/arch/x86/kernel/test_nx.c
+++ b/arch/x86/kernel/test_nx.c
@@ -142,7 +142,6 @@ static int test_NX(void)
* by the error message
*/
-#ifdef CONFIG_DEBUG_RODATA
/* Test 3: Check if the .rodata section is executable */
if (rodata_test_data != 0xC3) {
printk(KERN_ERR "test_nx: .rodata marker has invalid value\n");
@@ -151,7 +150,6 @@ static int test_NX(void)
printk(KERN_ERR "test_nx: .rodata section is executable\n");
ret = -ENODEV;
}
-#endif
#if 0
/* Test 4: Check if the .data section of a module is executable */
diff --git a/arch/x86/kernel/test_rodata.c b/arch/x86/kernel/test_rodata.c
index 5ecbfe509..cb4a01b41 100644
--- a/arch/x86/kernel/test_rodata.c
+++ b/arch/x86/kernel/test_rodata.c
@@ -76,5 +76,5 @@ int rodata_test(void)
}
MODULE_LICENSE("GPL");
-MODULE_DESCRIPTION("Testcase for the DEBUG_RODATA infrastructure");
+MODULE_DESCRIPTION("Testcase for marking rodata as read-only");
MODULE_AUTHOR("Arjan van de Ven <arjan@linux.intel.com>");
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index ade185a46..06cbe2586 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -83,30 +83,16 @@ gate_desc idt_table[NR_VECTORS] __page_aligned_bss;
DECLARE_BITMAP(used_vectors, NR_VECTORS);
EXPORT_SYMBOL_GPL(used_vectors);
-static inline void conditional_sti(struct pt_regs *regs)
+static inline void cond_local_irq_enable(struct pt_regs *regs)
{
if (regs->flags & X86_EFLAGS_IF)
local_irq_enable();
}
-static inline void preempt_conditional_sti(struct pt_regs *regs)
-{
- preempt_count_inc();
- if (regs->flags & X86_EFLAGS_IF)
- local_irq_enable();
-}
-
-static inline void conditional_cli(struct pt_regs *regs)
-{
- if (regs->flags & X86_EFLAGS_IF)
- local_irq_disable();
-}
-
-static inline void preempt_conditional_cli(struct pt_regs *regs)
+static inline void cond_local_irq_disable(struct pt_regs *regs)
{
if (regs->flags & X86_EFLAGS_IF)
local_irq_disable();
- preempt_count_dec();
}
void ist_enter(struct pt_regs *regs)
@@ -199,7 +185,7 @@ do_trap_no_signal(struct task_struct *tsk, int trapnr, char *str,
}
if (!user_mode(regs)) {
- if (!fixup_exception(regs)) {
+ if (!fixup_exception(regs, trapnr)) {
tsk->thread.error_code = error_code;
tsk->thread.trap_nr = trapnr;
die(str, regs, error_code);
@@ -262,7 +248,6 @@ do_trap(int trapnr, int signr, char *str, struct pt_regs *regs,
tsk->thread.error_code = error_code;
tsk->thread.trap_nr = trapnr;
-#ifdef CONFIG_X86_64
if (show_unhandled_signals && unhandled_signal(tsk, signr) &&
printk_ratelimit()) {
pr_info("%s[%d] trap %s ip:%lx sp:%lx error:%lx",
@@ -271,7 +256,6 @@ do_trap(int trapnr, int signr, char *str, struct pt_regs *regs,
print_vma_addr(" in ", regs->ip);
pr_cont("\n");
}
-#endif
force_sig_info(signr, info ?: SEND_SIG_PRIV, tsk);
}
@@ -286,7 +270,7 @@ static void do_error_trap(struct pt_regs *regs, long error_code, char *str,
if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) !=
NOTIFY_STOP) {
- conditional_sti(regs);
+ cond_local_irq_enable(regs);
do_trap(trapnr, signr, str, regs, error_code,
fill_trap_info(regs, signr, trapnr, &info));
}
@@ -368,7 +352,7 @@ dotraplinkage void do_bounds(struct pt_regs *regs, long error_code)
if (notify_die(DIE_TRAP, "bounds", regs, error_code,
X86_TRAP_BR, SIGSEGV) == NOTIFY_STOP)
return;
- conditional_sti(regs);
+ cond_local_irq_enable(regs);
if (!user_mode(regs))
die("bounds", regs, error_code);
@@ -443,7 +427,7 @@ do_general_protection(struct pt_regs *regs, long error_code)
struct task_struct *tsk;
RCU_LOCKDEP_WARN(!rcu_is_watching(), "entry code didn't wake RCU");
- conditional_sti(regs);
+ cond_local_irq_enable(regs);
if (v8086_mode(regs)) {
local_irq_enable();
@@ -453,7 +437,7 @@ do_general_protection(struct pt_regs *regs, long error_code)
tsk = current;
if (!user_mode(regs)) {
- if (fixup_exception(regs))
+ if (fixup_exception(regs, X86_TRAP_GP))
return;
tsk->thread.error_code = error_code;
@@ -517,9 +501,11 @@ dotraplinkage void notrace do_int3(struct pt_regs *regs, long error_code)
* as we may switch to the interrupt stack.
*/
debug_stack_usage_inc();
- preempt_conditional_sti(regs);
+ preempt_disable();
+ cond_local_irq_enable(regs);
do_trap(X86_TRAP_BP, SIGTRAP, "int3", regs, error_code, NULL);
- preempt_conditional_cli(regs);
+ cond_local_irq_disable(regs);
+ preempt_enable_no_resched();
debug_stack_usage_dec();
exit:
ist_exit(regs);
@@ -571,6 +557,29 @@ struct bad_iret_stack *fixup_bad_iret(struct bad_iret_stack *s)
NOKPROBE_SYMBOL(fixup_bad_iret);
#endif
+static bool is_sysenter_singlestep(struct pt_regs *regs)
+{
+ /*
+ * We don't try for precision here. If we're anywhere in the region of
+ * code that can be single-stepped in the SYSENTER entry path, then
+ * assume that this is a useless single-step trap due to SYSENTER
+ * being invoked with TF set. (We don't know in advance exactly
+ * which instructions will be hit because BTF could plausibly
+ * be set.)
+ */
+#ifdef CONFIG_X86_32
+ return (regs->ip - (unsigned long)__begin_SYSENTER_singlestep_region) <
+ (unsigned long)__end_SYSENTER_singlestep_region -
+ (unsigned long)__begin_SYSENTER_singlestep_region;
+#elif defined(CONFIG_IA32_EMULATION)
+ return (regs->ip - (unsigned long)entry_SYSENTER_compat) <
+ (unsigned long)__end_entry_SYSENTER_compat -
+ (unsigned long)entry_SYSENTER_compat;
+#else
+ return false;
+#endif
+}
+
/*
* Our handling of the processor debug registers is non-trivial.
* We do not clear them on entry and exit from the kernel. Therefore
@@ -605,11 +614,42 @@ dotraplinkage void do_debug(struct pt_regs *regs, long error_code)
ist_enter(regs);
get_debugreg(dr6, 6);
+ /*
+ * The Intel SDM says:
+ *
+ * Certain debug exceptions may clear bits 0-3. The remaining
+ * contents of the DR6 register are never cleared by the
+ * processor. To avoid confusion in identifying debug
+ * exceptions, debug handlers should clear the register before
+ * returning to the interrupted task.
+ *
+ * Keep it simple: clear DR6 immediately.
+ */
+ set_debugreg(0, 6);
/* Filter out all the reserved bits which are preset to 1 */
dr6 &= ~DR6_RESERVED;
/*
+ * The SDM says "The processor clears the BTF flag when it
+ * generates a debug exception." Clear TIF_BLOCKSTEP to keep
+ * TIF_BLOCKSTEP in sync with the hardware BTF flag.
+ */
+ clear_tsk_thread_flag(tsk, TIF_BLOCKSTEP);
+
+ if (unlikely(!user_mode(regs) && (dr6 & DR_STEP) &&
+ is_sysenter_singlestep(regs))) {
+ dr6 &= ~DR_STEP;
+ if (!dr6)
+ goto exit;
+ /*
+ * else we might have gotten a single-step trap and hit a
+ * watchpoint at the same time, in which case we should fall
+ * through and handle the watchpoint.
+ */
+ }
+
+ /*
* If dr6 has no reason to give us about the origin of this trap,
* then it's very likely the result of an icebp/int01 trap.
* User wants a sigtrap for that.
@@ -617,18 +657,10 @@ dotraplinkage void do_debug(struct pt_regs *regs, long error_code)
if (!dr6 && user_mode(regs))
user_icebp = 1;
- /* Catch kmemcheck conditions first of all! */
+ /* Catch kmemcheck conditions! */
if ((dr6 & DR_STEP) && kmemcheck_trap(regs))
goto exit;
- /* DR6 may or may not be cleared by the CPU */
- set_debugreg(0, 6);
-
- /*
- * The processor cleared BTF, so don't mark that we need it set.
- */
- clear_tsk_thread_flag(tsk, TIF_BLOCKSTEP);
-
/* Store the virtualized DR6 value */
tsk->thread.debugreg6 = dr6;
@@ -648,24 +680,25 @@ dotraplinkage void do_debug(struct pt_regs *regs, long error_code)
debug_stack_usage_inc();
/* It's safe to allow irq's after DR6 has been saved */
- preempt_conditional_sti(regs);
+ preempt_disable();
+ cond_local_irq_enable(regs);
if (v8086_mode(regs)) {
handle_vm86_trap((struct kernel_vm86_regs *) regs, error_code,
X86_TRAP_DB);
- preempt_conditional_cli(regs);
+ cond_local_irq_disable(regs);
+ preempt_enable_no_resched();
debug_stack_usage_dec();
goto exit;
}
- /*
- * Single-stepping through system calls: ignore any exceptions in
- * kernel space, but re-enable TF when returning to user mode.
- *
- * We already checked v86 mode above, so we can check for kernel mode
- * by just checking the CPL of CS.
- */
- if ((dr6 & DR_STEP) && !user_mode(regs)) {
+ if (WARN_ON_ONCE((dr6 & DR_STEP) && !user_mode(regs))) {
+ /*
+ * Historical junk that used to handle SYSENTER single-stepping.
+ * This should be unreachable now. If we survive for a while
+ * without anyone hitting this warning, we'll turn this into
+ * an oops.
+ */
tsk->thread.debugreg6 &= ~DR_STEP;
set_tsk_thread_flag(tsk, TIF_SINGLESTEP);
regs->flags &= ~X86_EFLAGS_TF;
@@ -673,10 +706,19 @@ dotraplinkage void do_debug(struct pt_regs *regs, long error_code)
si_code = get_si_code(tsk->thread.debugreg6);
if (tsk->thread.debugreg6 & (DR_STEP | DR_TRAP_BITS) || user_icebp)
send_sigtrap(tsk, regs, error_code, si_code);
- preempt_conditional_cli(regs);
+ cond_local_irq_disable(regs);
+ preempt_enable_no_resched();
debug_stack_usage_dec();
exit:
+#if defined(CONFIG_X86_32)
+ /*
+ * This is the most likely code path that involves non-trivial use
+ * of the SYSENTER stack. Check that we haven't overrun it.
+ */
+ WARN(this_cpu_read(cpu_tss.SYSENTER_stack_canary) != STACK_END_MAGIC,
+ "Overran or corrupted SYSENTER stack\n");
+#endif
ist_exit(regs);
}
NOKPROBE_SYMBOL(do_debug);
@@ -696,10 +738,10 @@ static void math_error(struct pt_regs *regs, int error_code, int trapnr)
if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, SIGFPE) == NOTIFY_STOP)
return;
- conditional_sti(regs);
+ cond_local_irq_enable(regs);
if (!user_mode(regs)) {
- if (!fixup_exception(regs)) {
+ if (!fixup_exception(regs, trapnr)) {
task->thread.error_code = error_code;
task->thread.trap_nr = trapnr;
die(str, regs, error_code);
@@ -743,20 +785,19 @@ do_simd_coprocessor_error(struct pt_regs *regs, long error_code)
dotraplinkage void
do_spurious_interrupt_bug(struct pt_regs *regs, long error_code)
{
- conditional_sti(regs);
+ cond_local_irq_enable(regs);
}
dotraplinkage void
do_device_not_available(struct pt_regs *regs, long error_code)
{
RCU_LOCKDEP_WARN(!rcu_is_watching(), "entry code didn't wake RCU");
- BUG_ON(use_eager_fpu());
#ifdef CONFIG_MATH_EMULATION
- if (read_cr0() & X86_CR0_EM) {
+ if (!boot_cpu_has(X86_FEATURE_FPU) && (read_cr0() & X86_CR0_EM)) {
struct math_emu_info info = { };
- conditional_sti(regs);
+ cond_local_irq_enable(regs);
info.regs = regs;
math_emulate(&info);
@@ -765,7 +806,7 @@ do_device_not_available(struct pt_regs *regs, long error_code)
#endif
fpu__restore(&current->thread.fpu); /* interrupts still off */
#ifdef CONFIG_X86_32
- conditional_sti(regs);
+ cond_local_irq_enable(regs);
#endif
}
NOKPROBE_SYMBOL(do_device_not_available);
@@ -868,7 +909,7 @@ void __init trap_init(void)
#endif
#ifdef CONFIG_X86_32
- set_system_trap_gate(IA32_SYSCALL_VECTOR, entry_INT80_32);
+ set_system_intr_gate(IA32_SYSCALL_VECTOR, entry_INT80_32);
set_bit(IA32_SYSCALL_VECTOR, used_vectors);
#endif
diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c
index 4b20bb75b..75b939d39 100644
--- a/arch/x86/kernel/tsc.c
+++ b/arch/x86/kernel/tsc.c
@@ -44,6 +44,11 @@ static DEFINE_STATIC_KEY_FALSE(__use_tsc);
int tsc_clocksource_reliable;
+static u32 art_to_tsc_numerator;
+static u32 art_to_tsc_denominator;
+static u64 art_to_tsc_offset;
+struct clocksource *art_related_clocksource;
+
/*
* Use a ring-buffer like data structure, where a writer advances the head by
* writing a new data entry and a reader advances the tail when it observes a
@@ -881,7 +886,7 @@ void tsc_restore_sched_clock_state(void)
local_irq_save(flags);
/*
- * We're comming out of suspend, there's no concurrency yet; don't
+ * We're coming out of suspend, there's no concurrency yet; don't
* bother being nice about the RCU stuff, just write to both
* data fields.
*/
@@ -969,6 +974,37 @@ core_initcall(cpufreq_tsc);
#endif /* CONFIG_CPU_FREQ */
+#define ART_CPUID_LEAF (0x15)
+#define ART_MIN_DENOMINATOR (1)
+
+
+/*
+ * If ART is present detect the numerator:denominator to convert to TSC
+ */
+static void detect_art(void)
+{
+ unsigned int unused[2];
+
+ if (boot_cpu_data.cpuid_level < ART_CPUID_LEAF)
+ return;
+
+ cpuid(ART_CPUID_LEAF, &art_to_tsc_denominator,
+ &art_to_tsc_numerator, unused, unused+1);
+
+ /* Don't enable ART in a VM, non-stop TSC required */
+ if (boot_cpu_has(X86_FEATURE_HYPERVISOR) ||
+ !boot_cpu_has(X86_FEATURE_NONSTOP_TSC) ||
+ art_to_tsc_denominator < ART_MIN_DENOMINATOR)
+ return;
+
+ if (rdmsrl_safe(MSR_IA32_TSC_ADJUST, &art_to_tsc_offset))
+ return;
+
+ /* Make this sticky over multiple CPU init calls */
+ setup_force_cpu_cap(X86_FEATURE_ART);
+}
+
+
/* clocksource code */
static struct clocksource clocksource_tsc;
@@ -1076,6 +1112,25 @@ int unsynchronized_tsc(void)
return 0;
}
+/*
+ * Convert ART to TSC given numerator/denominator found in detect_art()
+ */
+struct system_counterval_t convert_art_to_tsc(cycle_t art)
+{
+ u64 tmp, res, rem;
+
+ rem = do_div(art, art_to_tsc_denominator);
+
+ res = art * art_to_tsc_numerator;
+ tmp = rem * art_to_tsc_numerator;
+
+ do_div(tmp, art_to_tsc_denominator);
+ res += tmp + art_to_tsc_offset;
+
+ return (struct system_counterval_t) {.cs = art_related_clocksource,
+ .cycles = res};
+}
+EXPORT_SYMBOL(convert_art_to_tsc);
static void tsc_refine_calibration_work(struct work_struct *work);
static DECLARE_DELAYED_WORK(tsc_irqwork, tsc_refine_calibration_work);
@@ -1147,6 +1202,8 @@ static void tsc_refine_calibration_work(struct work_struct *work)
(unsigned long)tsc_khz % 1000);
out:
+ if (boot_cpu_has(X86_FEATURE_ART))
+ art_related_clocksource = &clocksource_tsc;
clocksource_register_khz(&clocksource_tsc, tsc_khz);
}
@@ -1240,6 +1297,8 @@ void __init tsc_init(void)
mark_tsc_unstable("TSCs unsynchronized");
check_system_tsc_reliable();
+
+ detect_art();
}
#ifdef CONFIG_SMP
@@ -1251,14 +1310,18 @@ void __init tsc_init(void)
*/
unsigned long calibrate_delay_is_known(void)
{
- int i, cpu = smp_processor_id();
+ int sibling, cpu = smp_processor_id();
+ struct cpumask *mask = topology_core_cpumask(cpu);
if (!tsc_disabled && !cpu_has(&cpu_data(cpu), X86_FEATURE_CONSTANT_TSC))
return 0;
- for_each_online_cpu(i)
- if (cpu_data(i).phys_proc_id == cpu_data(cpu).phys_proc_id)
- return cpu_data(i).loops_per_jiffy;
+ if (!mask)
+ return 0;
+
+ sibling = cpumask_any_but(mask, cpu);
+ if (sibling < nr_cpu_ids)
+ return cpu_data(sibling).loops_per_jiffy;
return 0;
}
#endif
diff --git a/arch/x86/kernel/verify_cpu.S b/arch/x86/kernel/verify_cpu.S
index 07efb35ee..014ea59aa 100644
--- a/arch/x86/kernel/verify_cpu.S
+++ b/arch/x86/kernel/verify_cpu.S
@@ -30,7 +30,7 @@
* appropriately. Either display a message or halt.
*/
-#include <asm/cpufeature.h>
+#include <asm/cpufeatures.h>
#include <asm/msr-index.h>
verify_cpu:
diff --git a/arch/x86/kernel/vm86_32.c b/arch/x86/kernel/vm86_32.c
index e574b8546..3dce1ca0a 100644
--- a/arch/x86/kernel/vm86_32.c
+++ b/arch/x86/kernel/vm86_32.c
@@ -362,7 +362,7 @@ static long do_sys_vm86(struct vm86plus_struct __user *user_vm86, bool plus)
/* make room for real-mode segments */
tsk->thread.sp0 += 16;
- if (static_cpu_has_safe(X86_FEATURE_SEP))
+ if (static_cpu_has(X86_FEATURE_SEP))
tsk->thread.sysenter_cs = 0;
load_sp0(tss, &tsk->thread);
diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S
index 74e4bf11f..4c941f88d 100644
--- a/arch/x86/kernel/vmlinux.lds.S
+++ b/arch/x86/kernel/vmlinux.lds.S
@@ -41,29 +41,28 @@ ENTRY(phys_startup_64)
jiffies_64 = jiffies;
#endif
-#if defined(CONFIG_X86_64) && defined(CONFIG_DEBUG_RODATA)
+#if defined(CONFIG_X86_64)
/*
- * On 64-bit, align RODATA to 2MB so that even with CONFIG_DEBUG_RODATA
- * we retain large page mappings for boundaries spanning kernel text, rodata
- * and data sections.
+ * On 64-bit, align RODATA to 2MB so we retain large page mappings for
+ * boundaries spanning kernel text, rodata and data sections.
*
* However, kernel identity mappings will have different RWX permissions
* to the pages mapping to text and to the pages padding (which are freed) the
* text section. Hence kernel identity mappings will be broken to smaller
* pages. For 64-bit, kernel text and kernel identity mappings are different,
- * so we can enable protection checks that come with CONFIG_DEBUG_RODATA,
- * as well as retain 2MB large page mappings for kernel text.
+ * so we can enable protection checks as well as retain 2MB large page
+ * mappings for kernel text.
*/
-#define X64_ALIGN_DEBUG_RODATA_BEGIN . = ALIGN(HPAGE_SIZE);
+#define X64_ALIGN_RODATA_BEGIN . = ALIGN(HPAGE_SIZE);
-#define X64_ALIGN_DEBUG_RODATA_END \
+#define X64_ALIGN_RODATA_END \
. = ALIGN(HPAGE_SIZE); \
__end_rodata_hpage_align = .;
#else
-#define X64_ALIGN_DEBUG_RODATA_BEGIN
-#define X64_ALIGN_DEBUG_RODATA_END
+#define X64_ALIGN_RODATA_BEGIN
+#define X64_ALIGN_RODATA_END
#endif
@@ -82,11 +81,11 @@ PHDRS {
SECTIONS
{
#ifdef CONFIG_X86_32
- . = LOAD_OFFSET + LOAD_PHYSICAL_ADDR;
- phys_startup_32 = startup_32 - LOAD_OFFSET;
+ . = LOAD_OFFSET + LOAD_PHYSICAL_ADDR;
+ phys_startup_32 = ABSOLUTE(startup_32 - LOAD_OFFSET);
#else
- . = __START_KERNEL;
- phys_startup_64 = startup_64 - LOAD_OFFSET;
+ . = __START_KERNEL;
+ phys_startup_64 = ABSOLUTE(startup_64 - LOAD_OFFSET);
#endif
/* Text and read-only data */
@@ -102,6 +101,7 @@ SECTIONS
KPROBES_TEXT
ENTRY_TEXT
IRQENTRY_TEXT
+ SOFTIRQENTRY_TEXT
*(.fixup)
*(.gnu.warning)
/* End of text section */
@@ -112,13 +112,11 @@ SECTIONS
EXCEPTION_TABLE(16) :text = 0x9090
-#if defined(CONFIG_DEBUG_RODATA)
/* .text should occupy whole number of pages */
. = ALIGN(PAGE_SIZE);
-#endif
- X64_ALIGN_DEBUG_RODATA_BEGIN
+ X64_ALIGN_RODATA_BEGIN
RO_DATA(PAGE_SIZE)
- X64_ALIGN_DEBUG_RODATA_END
+ X64_ALIGN_RODATA_END
/* Data */
.data : AT(ADDR(.data) - LOAD_OFFSET) {
@@ -195,6 +193,17 @@ SECTIONS
:init
#endif
+ /*
+ * Section for code used exclusively before alternatives are run. All
+ * references to such code must be patched out by alternatives, normally
+ * by using X86_FEATURE_ALWAYS CPU feature bit.
+ *
+ * See static_cpu_has() for an example.
+ */
+ .altinstr_aux : AT(ADDR(.altinstr_aux) - LOAD_OFFSET) {
+ *(.altinstr_aux)
+ }
+
INIT_DATA_SECTION(16)
.x86_cpu_dev.init : AT(ADDR(.x86_cpu_dev.init) - LOAD_OFFSET) {
@@ -325,6 +334,7 @@ SECTIONS
__brk_limit = .;
}
+ . = ALIGN(PAGE_SIZE);
_end = .;
STABS_DEBUG
@@ -332,7 +342,10 @@ SECTIONS
/* Sections to be discarded */
DISCARDS
- /DISCARD/ : { *(.eh_frame) }
+ /DISCARD/ : {
+ *(.eh_frame)
+ *(__func_stack_frame_non_standard)
+ }
}
diff --git a/arch/x86/kernel/x8664_ksyms_64.c b/arch/x86/kernel/x8664_ksyms_64.c
index a0695be19..cd05942bc 100644
--- a/arch/x86/kernel/x8664_ksyms_64.c
+++ b/arch/x86/kernel/x8664_ksyms_64.c
@@ -37,6 +37,8 @@ EXPORT_SYMBOL(__copy_user_nocache);
EXPORT_SYMBOL(_copy_from_user);
EXPORT_SYMBOL(_copy_to_user);
+EXPORT_SYMBOL_GPL(memcpy_mcsafe);
+
EXPORT_SYMBOL(copy_page);
EXPORT_SYMBOL(clear_page);