diff options
Diffstat (limited to 'arch/x86/events')
-rw-r--r-- | arch/x86/events/amd/core.c | 6 | ||||
-rw-r--r-- | arch/x86/events/amd/ibs.c | 75 | ||||
-rw-r--r-- | arch/x86/events/amd/iommu.c | 2 | ||||
-rw-r--r-- | arch/x86/events/amd/power.c | 60 | ||||
-rw-r--r-- | arch/x86/events/amd/uncore.c | 144 | ||||
-rw-r--r-- | arch/x86/events/core.c | 140 | ||||
-rw-r--r-- | arch/x86/events/intel/bts.c | 126 | ||||
-rw-r--r-- | arch/x86/events/intel/core.c | 220 | ||||
-rw-r--r-- | arch/x86/events/intel/cqm.c | 49 | ||||
-rw-r--r-- | arch/x86/events/intel/cstate.c | 98 | ||||
-rw-r--r-- | arch/x86/events/intel/lbr.c | 124 | ||||
-rw-r--r-- | arch/x86/events/intel/rapl.c | 116 | ||||
-rw-r--r-- | arch/x86/events/intel/uncore.c | 223 | ||||
-rw-r--r-- | arch/x86/events/intel/uncore.h | 6 | ||||
-rw-r--r-- | arch/x86/events/intel/uncore_snb.c | 81 | ||||
-rw-r--r-- | arch/x86/events/intel/uncore_snbep.c | 96 | ||||
-rw-r--r-- | arch/x86/events/msr.c | 63 | ||||
-rw-r--r-- | arch/x86/events/perf_event.h | 12 |
18 files changed, 977 insertions, 664 deletions
diff --git a/arch/x86/events/amd/core.c b/arch/x86/events/amd/core.c index a8309ea67..f5f4b3fbb 100644 --- a/arch/x86/events/amd/core.c +++ b/arch/x86/events/amd/core.c @@ -370,13 +370,13 @@ static int amd_pmu_cpu_prepare(int cpu) WARN_ON_ONCE(cpuc->amd_nb); if (!x86_pmu.amd_nb_constraints) - return NOTIFY_OK; + return 0; cpuc->amd_nb = amd_alloc_nb(cpu); if (!cpuc->amd_nb) - return NOTIFY_BAD; + return -ENOMEM; - return NOTIFY_OK; + return 0; } static void amd_pmu_cpu_starting(int cpu) diff --git a/arch/x86/events/amd/ibs.c b/arch/x86/events/amd/ibs.c index feb90f673..b26ee32f7 100644 --- a/arch/x86/events/amd/ibs.c +++ b/arch/x86/events/amd/ibs.c @@ -7,7 +7,8 @@ */ #include <linux/perf_event.h> -#include <linux/module.h> +#include <linux/init.h> +#include <linux/export.h> #include <linux/pci.h> #include <linux/ptrace.h> #include <linux/syscore_ops.h> @@ -655,8 +656,12 @@ fail: } if (event->attr.sample_type & PERF_SAMPLE_RAW) { - raw.size = sizeof(u32) + ibs_data.size; - raw.data = ibs_data.data; + raw = (struct perf_raw_record){ + .frag = { + .size = sizeof(u32) + ibs_data.size, + .data = ibs_data.data, + }, + }; data.raw = &raw; } @@ -721,13 +726,10 @@ static __init int perf_ibs_pmu_init(struct perf_ibs *perf_ibs, char *name) return ret; } -static __init int perf_event_ibs_init(void) +static __init void perf_event_ibs_init(void) { struct attribute **attr = ibs_op_format_attrs; - if (!ibs_caps) - return -ENODEV; /* ibs not supported by the cpu */ - perf_ibs_pmu_init(&perf_ibs_fetch, "ibs_fetch"); if (ibs_caps & IBS_CAPS_OPCNT) { @@ -738,13 +740,11 @@ static __init int perf_event_ibs_init(void) register_nmi_handler(NMI_LOCAL, perf_ibs_nmi_handler, 0, "perf_ibs"); pr_info("perf: AMD IBS detected (0x%08x)\n", ibs_caps); - - return 0; } #else /* defined(CONFIG_PERF_EVENTS) && defined(CONFIG_CPU_SUP_AMD) */ -static __init int perf_event_ibs_init(void) { return 0; } +static __init void perf_event_ibs_init(void) { } #endif @@ -921,7 +921,7 @@ static inline int get_ibs_lvt_offset(void) return val & IBSCTL_LVT_OFFSET_MASK; } -static void setup_APIC_ibs(void *dummy) +static void setup_APIC_ibs(void) { int offset; @@ -936,7 +936,7 @@ failed: smp_processor_id()); } -static void clear_APIC_ibs(void *dummy) +static void clear_APIC_ibs(void) { int offset; @@ -945,18 +945,24 @@ static void clear_APIC_ibs(void *dummy) setup_APIC_eilvt(offset, 0, APIC_EILVT_MSG_FIX, 1); } +static int x86_pmu_amd_ibs_starting_cpu(unsigned int cpu) +{ + setup_APIC_ibs(); + return 0; +} + #ifdef CONFIG_PM static int perf_ibs_suspend(void) { - clear_APIC_ibs(NULL); + clear_APIC_ibs(); return 0; } static void perf_ibs_resume(void) { ibs_eilvt_setup(); - setup_APIC_ibs(NULL); + setup_APIC_ibs(); } static struct syscore_ops perf_ibs_syscore_ops = { @@ -975,27 +981,15 @@ static inline void perf_ibs_pm_init(void) { } #endif -static int -perf_ibs_cpu_notifier(struct notifier_block *self, unsigned long action, void *hcpu) +static int x86_pmu_amd_ibs_dying_cpu(unsigned int cpu) { - switch (action & ~CPU_TASKS_FROZEN) { - case CPU_STARTING: - setup_APIC_ibs(NULL); - break; - case CPU_DYING: - clear_APIC_ibs(NULL); - break; - default: - break; - } - - return NOTIFY_OK; + clear_APIC_ibs(); + return 0; } static __init int amd_ibs_init(void) { u32 caps; - int ret = -EINVAL; caps = __get_ibs_caps(); if (!caps) @@ -1004,22 +998,25 @@ static __init int amd_ibs_init(void) ibs_eilvt_setup(); if (!ibs_eilvt_valid()) - goto out; + return -EINVAL; perf_ibs_pm_init(); - cpu_notifier_register_begin(); + ibs_caps = caps; /* make ibs_caps visible to other cpus: */ smp_mb(); - smp_call_function(setup_APIC_ibs, NULL, 1); - __perf_cpu_notifier(perf_ibs_cpu_notifier); - cpu_notifier_register_done(); + /* + * x86_pmu_amd_ibs_starting_cpu will be called from core on + * all online cpus. + */ + cpuhp_setup_state(CPUHP_AP_PERF_X86_AMD_IBS_STARTING, + "AP_PERF_X86_AMD_IBS_STARTING", + x86_pmu_amd_ibs_starting_cpu, + x86_pmu_amd_ibs_dying_cpu); - ret = perf_event_ibs_init(); -out: - if (ret) - pr_err("Failed to setup IBS, %d\n", ret); - return ret; + perf_event_ibs_init(); + + return 0; } /* Since we need the pci subsystem to init ibs we can't do this earlier: */ diff --git a/arch/x86/events/amd/iommu.c b/arch/x86/events/amd/iommu.c index 6011a573d..b28200dea 100644 --- a/arch/x86/events/amd/iommu.c +++ b/arch/x86/events/amd/iommu.c @@ -12,7 +12,7 @@ */ #include <linux/perf_event.h> -#include <linux/module.h> +#include <linux/init.h> #include <linux/cpumask.h> #include <linux/slab.h> diff --git a/arch/x86/events/amd/power.c b/arch/x86/events/amd/power.c index 55a3529db..9842270ed 100644 --- a/arch/x86/events/amd/power.c +++ b/arch/x86/events/amd/power.c @@ -228,12 +228,12 @@ static struct pmu pmu_class = { .read = pmu_event_read, }; -static void power_cpu_exit(int cpu) +static int power_cpu_exit(unsigned int cpu) { int target; if (!cpumask_test_and_clear_cpu(cpu, &cpu_mask)) - return; + return 0; /* * Find a new CPU on the same compute unit, if was set in cpumask @@ -245,9 +245,10 @@ static void power_cpu_exit(int cpu) cpumask_set_cpu(target, &cpu_mask); perf_pmu_migrate_context(&pmu_class, cpu, target); } + return 0; } -static void power_cpu_init(int cpu) +static int power_cpu_init(unsigned int cpu) { int target; @@ -255,7 +256,7 @@ static void power_cpu_init(int cpu) * 1) If any CPU is set at cpu_mask in the same compute unit, do * nothing. * 2) If no CPU is set at cpu_mask in the same compute unit, - * set current STARTING CPU. + * set current ONLINE CPU. * * Note: if there is a CPU aside of the new one already in the * sibling mask, then it is also in cpu_mask. @@ -263,33 +264,9 @@ static void power_cpu_init(int cpu) target = cpumask_any_but(topology_sibling_cpumask(cpu), cpu); if (target >= nr_cpumask_bits) cpumask_set_cpu(cpu, &cpu_mask); + return 0; } -static int -power_cpu_notifier(struct notifier_block *self, unsigned long action, void *hcpu) -{ - unsigned int cpu = (long)hcpu; - - switch (action & ~CPU_TASKS_FROZEN) { - case CPU_DOWN_FAILED: - case CPU_STARTING: - power_cpu_init(cpu); - break; - case CPU_DOWN_PREPARE: - power_cpu_exit(cpu); - break; - default: - break; - } - - return NOTIFY_OK; -} - -static struct notifier_block power_cpu_notifier_nb = { - .notifier_call = power_cpu_notifier, - .priority = CPU_PRI_PERF, -}; - static const struct x86_cpu_id cpu_match[] = { { .vendor = X86_VENDOR_AMD, .family = 0x15 }, {}, @@ -297,7 +274,7 @@ static const struct x86_cpu_id cpu_match[] = { static int __init amd_power_pmu_init(void) { - int cpu, target, ret; + int ret; if (!x86_match_cpu(cpu_match)) return 0; @@ -312,38 +289,25 @@ static int __init amd_power_pmu_init(void) return -ENODEV; } - cpu_notifier_register_begin(); - /* Choose one online core of each compute unit. */ - for_each_online_cpu(cpu) { - target = cpumask_first(topology_sibling_cpumask(cpu)); - if (!cpumask_test_cpu(target, &cpu_mask)) - cpumask_set_cpu(target, &cpu_mask); - } + cpuhp_setup_state(CPUHP_AP_PERF_X86_AMD_POWER_ONLINE, + "AP_PERF_X86_AMD_POWER_ONLINE", + power_cpu_init, power_cpu_exit); ret = perf_pmu_register(&pmu_class, "power", -1); if (WARN_ON(ret)) { pr_warn("AMD Power PMU registration failed\n"); - goto out; + return ret; } - __register_cpu_notifier(&power_cpu_notifier_nb); - pr_info("AMD Power PMU detected\n"); - -out: - cpu_notifier_register_done(); - return ret; } module_init(amd_power_pmu_init); static void __exit amd_power_pmu_exit(void) { - cpu_notifier_register_begin(); - __unregister_cpu_notifier(&power_cpu_notifier_nb); - cpu_notifier_register_done(); - + cpuhp_remove_state_nocalls(CPUHP_AP_PERF_X86_AMD_POWER_ONLINE); perf_pmu_unregister(&pmu_class); } module_exit(amd_power_pmu_exit); diff --git a/arch/x86/events/amd/uncore.c b/arch/x86/events/amd/uncore.c index 98ac57381..65577f081 100644 --- a/arch/x86/events/amd/uncore.c +++ b/arch/x86/events/amd/uncore.c @@ -29,6 +29,8 @@ #define COUNTER_SHIFT 16 +static HLIST_HEAD(uncore_unused_list); + struct amd_uncore { int id; int refcnt; @@ -39,7 +41,7 @@ struct amd_uncore { cpumask_t *active_mask; struct pmu *pmu; struct perf_event *events[MAX_COUNTERS]; - struct amd_uncore *free_when_cpu_online; + struct hlist_node node; }; static struct amd_uncore * __percpu *amd_uncore_nb; @@ -306,6 +308,7 @@ static int amd_uncore_cpu_up_prepare(unsigned int cpu) uncore_nb->msr_base = MSR_F15H_NB_PERF_CTL; uncore_nb->active_mask = &amd_nb_active_mask; uncore_nb->pmu = &amd_nb_pmu; + uncore_nb->id = -1; *per_cpu_ptr(amd_uncore_nb, cpu) = uncore_nb; } @@ -319,6 +322,7 @@ static int amd_uncore_cpu_up_prepare(unsigned int cpu) uncore_l2->msr_base = MSR_F16H_L2I_PERF_CTL; uncore_l2->active_mask = &amd_l2_active_mask; uncore_l2->pmu = &amd_l2_pmu; + uncore_l2->id = -1; *per_cpu_ptr(amd_uncore_l2, cpu) = uncore_l2; } @@ -348,7 +352,7 @@ amd_uncore_find_online_sibling(struct amd_uncore *this, continue; if (this->id == that->id) { - that->free_when_cpu_online = this; + hlist_add_head(&this->node, &uncore_unused_list); this = that; break; } @@ -358,7 +362,7 @@ amd_uncore_find_online_sibling(struct amd_uncore *this, return this; } -static void amd_uncore_cpu_starting(unsigned int cpu) +static int amd_uncore_cpu_starting(unsigned int cpu) { unsigned int eax, ebx, ecx, edx; struct amd_uncore *uncore; @@ -384,6 +388,19 @@ static void amd_uncore_cpu_starting(unsigned int cpu) uncore = amd_uncore_find_online_sibling(uncore, amd_uncore_l2); *per_cpu_ptr(amd_uncore_l2, cpu) = uncore; } + + return 0; +} + +static void uncore_clean_online(void) +{ + struct amd_uncore *uncore; + struct hlist_node *n; + + hlist_for_each_entry_safe(uncore, n, &uncore_unused_list, node) { + hlist_del(&uncore->node); + kfree(uncore); + } } static void uncore_online(unsigned int cpu, @@ -391,20 +408,21 @@ static void uncore_online(unsigned int cpu, { struct amd_uncore *uncore = *per_cpu_ptr(uncores, cpu); - kfree(uncore->free_when_cpu_online); - uncore->free_when_cpu_online = NULL; + uncore_clean_online(); if (cpu == uncore->cpu) cpumask_set_cpu(cpu, uncore->active_mask); } -static void amd_uncore_cpu_online(unsigned int cpu) +static int amd_uncore_cpu_online(unsigned int cpu) { if (amd_uncore_nb) uncore_online(cpu, amd_uncore_nb); if (amd_uncore_l2) uncore_online(cpu, amd_uncore_l2); + + return 0; } static void uncore_down_prepare(unsigned int cpu, @@ -433,13 +451,15 @@ static void uncore_down_prepare(unsigned int cpu, } } -static void amd_uncore_cpu_down_prepare(unsigned int cpu) +static int amd_uncore_cpu_down_prepare(unsigned int cpu) { if (amd_uncore_nb) uncore_down_prepare(cpu, amd_uncore_nb); if (amd_uncore_l2) uncore_down_prepare(cpu, amd_uncore_l2); + + return 0; } static void uncore_dead(unsigned int cpu, struct amd_uncore * __percpu *uncores) @@ -454,74 +474,19 @@ static void uncore_dead(unsigned int cpu, struct amd_uncore * __percpu *uncores) *per_cpu_ptr(uncores, cpu) = NULL; } -static void amd_uncore_cpu_dead(unsigned int cpu) +static int amd_uncore_cpu_dead(unsigned int cpu) { if (amd_uncore_nb) uncore_dead(cpu, amd_uncore_nb); if (amd_uncore_l2) uncore_dead(cpu, amd_uncore_l2); -} - -static int -amd_uncore_cpu_notifier(struct notifier_block *self, unsigned long action, - void *hcpu) -{ - unsigned int cpu = (long)hcpu; - - switch (action & ~CPU_TASKS_FROZEN) { - case CPU_UP_PREPARE: - if (amd_uncore_cpu_up_prepare(cpu)) - return notifier_from_errno(-ENOMEM); - break; - - case CPU_STARTING: - amd_uncore_cpu_starting(cpu); - break; - - case CPU_ONLINE: - amd_uncore_cpu_online(cpu); - break; - - case CPU_DOWN_PREPARE: - amd_uncore_cpu_down_prepare(cpu); - break; - - case CPU_UP_CANCELED: - case CPU_DEAD: - amd_uncore_cpu_dead(cpu); - break; - - default: - break; - } - - return NOTIFY_OK; -} - -static struct notifier_block amd_uncore_cpu_notifier_block = { - .notifier_call = amd_uncore_cpu_notifier, - .priority = CPU_PRI_PERF + 1, -}; - -static void __init init_cpu_already_online(void *dummy) -{ - unsigned int cpu = smp_processor_id(); - - amd_uncore_cpu_starting(cpu); - amd_uncore_cpu_online(cpu); -} -static void cleanup_cpu_online(void *dummy) -{ - unsigned int cpu = smp_processor_id(); - - amd_uncore_cpu_dead(cpu); + return 0; } static int __init amd_uncore_init(void) { - unsigned int cpu, cpu2; int ret = -ENODEV; if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD) @@ -558,38 +523,29 @@ static int __init amd_uncore_init(void) ret = 0; } - if (ret) - goto fail_nodev; - - cpu_notifier_register_begin(); - - /* init cpus already online before registering for hotplug notifier */ - for_each_online_cpu(cpu) { - ret = amd_uncore_cpu_up_prepare(cpu); - if (ret) - goto fail_online; - smp_call_function_single(cpu, init_cpu_already_online, NULL, 1); - } - - __register_cpu_notifier(&amd_uncore_cpu_notifier_block); - cpu_notifier_register_done(); - + /* + * Install callbacks. Core will call them for each online cpu. + */ + if (cpuhp_setup_state(CPUHP_PERF_X86_AMD_UNCORE_PREP, + "PERF_X86_AMD_UNCORE_PREP", + amd_uncore_cpu_up_prepare, amd_uncore_cpu_dead)) + goto fail_l2; + + if (cpuhp_setup_state(CPUHP_AP_PERF_X86_AMD_UNCORE_STARTING, + "AP_PERF_X86_AMD_UNCORE_STARTING", + amd_uncore_cpu_starting, NULL)) + goto fail_prep; + if (cpuhp_setup_state(CPUHP_AP_PERF_X86_AMD_UNCORE_ONLINE, + "AP_PERF_X86_AMD_UNCORE_ONLINE", + amd_uncore_cpu_online, + amd_uncore_cpu_down_prepare)) + goto fail_start; return 0; - -fail_online: - for_each_online_cpu(cpu2) { - if (cpu2 == cpu) - break; - smp_call_function_single(cpu, cleanup_cpu_online, NULL, 1); - } - cpu_notifier_register_done(); - - /* amd_uncore_nb/l2 should have been freed by cleanup_cpu_online */ - amd_uncore_nb = amd_uncore_l2 = NULL; - - if (boot_cpu_has(X86_FEATURE_PERFCTR_L2)) - perf_pmu_unregister(&amd_l2_pmu); +fail_start: + cpuhp_remove_state(CPUHP_AP_PERF_X86_AMD_UNCORE_STARTING); +fail_prep: + cpuhp_remove_state(CPUHP_PERF_X86_AMD_UNCORE_PREP); fail_l2: if (boot_cpu_has(X86_FEATURE_PERFCTR_NB)) perf_pmu_unregister(&amd_nb_pmu); diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c index 91eac3962..d0efb5cb1 100644 --- a/arch/x86/events/core.c +++ b/arch/x86/events/core.c @@ -17,7 +17,8 @@ #include <linux/notifier.h> #include <linux/hardirq.h> #include <linux/kprobes.h> -#include <linux/module.h> +#include <linux/export.h> +#include <linux/init.h> #include <linux/kdebug.h> #include <linux/sched.h> #include <linux/uaccess.h> @@ -262,10 +263,13 @@ static bool check_hw_exists(void) return true; msr_fail: - pr_cont("Broken PMU hardware detected, using software events only.\n"); - printk("%sFailed to access perfctr msr (MSR %x is %Lx)\n", - boot_cpu_has(X86_FEATURE_HYPERVISOR) ? KERN_INFO : KERN_ERR, - reg, val_new); + if (boot_cpu_has(X86_FEATURE_HYPERVISOR)) { + pr_cont("PMU not available due to virtualization, using software events only.\n"); + } else { + pr_cont("Broken PMU hardware detected, using software events only.\n"); + pr_err("Failed to access perfctr msr (MSR %x is %Lx)\n", + reg, val_new); + } return false; } @@ -1477,49 +1481,49 @@ NOKPROBE_SYMBOL(perf_event_nmi_handler); struct event_constraint emptyconstraint; struct event_constraint unconstrained; -static int -x86_pmu_notifier(struct notifier_block *self, unsigned long action, void *hcpu) +static int x86_pmu_prepare_cpu(unsigned int cpu) { - unsigned int cpu = (long)hcpu; struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu); - int i, ret = NOTIFY_OK; - - switch (action & ~CPU_TASKS_FROZEN) { - case CPU_UP_PREPARE: - for (i = 0 ; i < X86_PERF_KFREE_MAX; i++) - cpuc->kfree_on_online[i] = NULL; - if (x86_pmu.cpu_prepare) - ret = x86_pmu.cpu_prepare(cpu); - break; - - case CPU_STARTING: - if (x86_pmu.cpu_starting) - x86_pmu.cpu_starting(cpu); - break; + int i; - case CPU_ONLINE: - for (i = 0 ; i < X86_PERF_KFREE_MAX; i++) { - kfree(cpuc->kfree_on_online[i]); - cpuc->kfree_on_online[i] = NULL; - } - break; + for (i = 0 ; i < X86_PERF_KFREE_MAX; i++) + cpuc->kfree_on_online[i] = NULL; + if (x86_pmu.cpu_prepare) + return x86_pmu.cpu_prepare(cpu); + return 0; +} - case CPU_DYING: - if (x86_pmu.cpu_dying) - x86_pmu.cpu_dying(cpu); - break; +static int x86_pmu_dead_cpu(unsigned int cpu) +{ + if (x86_pmu.cpu_dead) + x86_pmu.cpu_dead(cpu); + return 0; +} - case CPU_UP_CANCELED: - case CPU_DEAD: - if (x86_pmu.cpu_dead) - x86_pmu.cpu_dead(cpu); - break; +static int x86_pmu_online_cpu(unsigned int cpu) +{ + struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu); + int i; - default: - break; + for (i = 0 ; i < X86_PERF_KFREE_MAX; i++) { + kfree(cpuc->kfree_on_online[i]); + cpuc->kfree_on_online[i] = NULL; } + return 0; +} - return ret; +static int x86_pmu_starting_cpu(unsigned int cpu) +{ + if (x86_pmu.cpu_starting) + x86_pmu.cpu_starting(cpu); + return 0; +} + +static int x86_pmu_dying_cpu(unsigned int cpu) +{ + if (x86_pmu.cpu_dying) + x86_pmu.cpu_dying(cpu); + return 0; } static void __init pmu_check_apic(void) @@ -1622,6 +1626,29 @@ ssize_t events_sysfs_show(struct device *dev, struct device_attribute *attr, cha } EXPORT_SYMBOL_GPL(events_sysfs_show); +ssize_t events_ht_sysfs_show(struct device *dev, struct device_attribute *attr, + char *page) +{ + struct perf_pmu_events_ht_attr *pmu_attr = + container_of(attr, struct perf_pmu_events_ht_attr, attr); + + /* + * Report conditional events depending on Hyper-Threading. + * + * This is overly conservative as usually the HT special + * handling is not needed if the other CPU thread is idle. + * + * Note this does not (and cannot) handle the case when thread + * siblings are invisible, for example with virtualization + * if they are owned by some other guest. The user tool + * has to re-read when a thread sibling gets onlined later. + */ + return sprintf(page, "%s", + topology_max_smt_threads() > 1 ? + pmu_attr->event_str_ht : + pmu_attr->event_str_noht); +} + EVENT_ATTR(cpu-cycles, CPU_CYCLES ); EVENT_ATTR(instructions, INSTRUCTIONS ); EVENT_ATTR(cache-references, CACHE_REFERENCES ); @@ -1764,10 +1791,39 @@ static int __init init_hw_perf_events(void) pr_info("... fixed-purpose events: %d\n", x86_pmu.num_counters_fixed); pr_info("... event mask: %016Lx\n", x86_pmu.intel_ctrl); - perf_pmu_register(&pmu, "cpu", PERF_TYPE_RAW); - perf_cpu_notifier(x86_pmu_notifier); + /* + * Install callbacks. Core will call them for each online + * cpu. + */ + err = cpuhp_setup_state(CPUHP_PERF_X86_PREPARE, "PERF_X86_PREPARE", + x86_pmu_prepare_cpu, x86_pmu_dead_cpu); + if (err) + return err; + + err = cpuhp_setup_state(CPUHP_AP_PERF_X86_STARTING, + "AP_PERF_X86_STARTING", x86_pmu_starting_cpu, + x86_pmu_dying_cpu); + if (err) + goto out; + + err = cpuhp_setup_state(CPUHP_AP_PERF_X86_ONLINE, "AP_PERF_X86_ONLINE", + x86_pmu_online_cpu, NULL); + if (err) + goto out1; + + err = perf_pmu_register(&pmu, "cpu", PERF_TYPE_RAW); + if (err) + goto out2; return 0; + +out2: + cpuhp_remove_state(CPUHP_AP_PERF_X86_ONLINE); +out1: + cpuhp_remove_state(CPUHP_AP_PERF_X86_STARTING); +out: + cpuhp_remove_state(CPUHP_PERF_X86_PREPARE); + return err; } early_initcall(init_hw_perf_events); diff --git a/arch/x86/events/intel/bts.c b/arch/x86/events/intel/bts.c index 0a6e393a2..982c9e31d 100644 --- a/arch/x86/events/intel/bts.c +++ b/arch/x86/events/intel/bts.c @@ -31,7 +31,17 @@ struct bts_ctx { struct perf_output_handle handle; struct debug_store ds_back; - int started; + int state; +}; + +/* BTS context states: */ +enum { + /* no ongoing AUX transactions */ + BTS_STATE_STOPPED = 0, + /* AUX transaction is on, BTS tracing is disabled */ + BTS_STATE_INACTIVE, + /* AUX transaction is on, BTS tracing is running */ + BTS_STATE_ACTIVE, }; static DEFINE_PER_CPU(struct bts_ctx, bts_ctx); @@ -204,6 +214,15 @@ static void bts_update(struct bts_ctx *bts) static int bts_buffer_reset(struct bts_buffer *buf, struct perf_output_handle *handle); +/* + * Ordering PMU callbacks wrt themselves and the PMI is done by means + * of bts::state, which: + * - is set when bts::handle::event is valid, that is, between + * perf_aux_output_begin() and perf_aux_output_end(); + * - is zero otherwise; + * - is ordered against bts::handle::event with a compiler barrier. + */ + static void __bts_event_start(struct perf_event *event) { struct bts_ctx *bts = this_cpu_ptr(&bts_ctx); @@ -221,10 +240,13 @@ static void __bts_event_start(struct perf_event *event) /* * local barrier to make sure that ds configuration made it - * before we enable BTS + * before we enable BTS and bts::state goes ACTIVE */ wmb(); + /* INACTIVE/STOPPED -> ACTIVE */ + WRITE_ONCE(bts->state, BTS_STATE_ACTIVE); + intel_pmu_enable_bts(config); } @@ -251,9 +273,6 @@ static void bts_event_start(struct perf_event *event, int flags) __bts_event_start(event); - /* PMI handler: this counter is running and likely generating PMIs */ - ACCESS_ONCE(bts->started) = 1; - return; fail_end_stop: @@ -263,30 +282,34 @@ fail_stop: event->hw.state = PERF_HES_STOPPED; } -static void __bts_event_stop(struct perf_event *event) +static void __bts_event_stop(struct perf_event *event, int state) { + struct bts_ctx *bts = this_cpu_ptr(&bts_ctx); + + /* ACTIVE -> INACTIVE(PMI)/STOPPED(->stop()) */ + WRITE_ONCE(bts->state, state); + /* * No extra synchronization is mandated by the documentation to have * BTS data stores globally visible. */ intel_pmu_disable_bts(); - - if (event->hw.state & PERF_HES_STOPPED) - return; - - ACCESS_ONCE(event->hw.state) |= PERF_HES_STOPPED; } static void bts_event_stop(struct perf_event *event, int flags) { struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); struct bts_ctx *bts = this_cpu_ptr(&bts_ctx); - struct bts_buffer *buf = perf_get_aux(&bts->handle); + struct bts_buffer *buf = NULL; + int state = READ_ONCE(bts->state); - /* PMI handler: don't restart this counter */ - ACCESS_ONCE(bts->started) = 0; + if (state == BTS_STATE_ACTIVE) + __bts_event_stop(event, BTS_STATE_STOPPED); - __bts_event_stop(event); + if (state != BTS_STATE_STOPPED) + buf = perf_get_aux(&bts->handle); + + event->hw.state |= PERF_HES_STOPPED; if (flags & PERF_EF_UPDATE) { bts_update(bts); @@ -296,6 +319,7 @@ static void bts_event_stop(struct perf_event *event, int flags) bts->handle.head = local_xchg(&buf->data_size, buf->nr_pages << PAGE_SHIFT); + perf_aux_output_end(&bts->handle, local_xchg(&buf->data_size, 0), !!local_xchg(&buf->lost, 0)); } @@ -310,8 +334,20 @@ static void bts_event_stop(struct perf_event *event, int flags) void intel_bts_enable_local(void) { struct bts_ctx *bts = this_cpu_ptr(&bts_ctx); + int state = READ_ONCE(bts->state); + + /* + * Here we transition from INACTIVE to ACTIVE; + * if we instead are STOPPED from the interrupt handler, + * stay that way. Can't be ACTIVE here though. + */ + if (WARN_ON_ONCE(state == BTS_STATE_ACTIVE)) + return; + + if (state == BTS_STATE_STOPPED) + return; - if (bts->handle.event && bts->started) + if (bts->handle.event) __bts_event_start(bts->handle.event); } @@ -319,8 +355,15 @@ void intel_bts_disable_local(void) { struct bts_ctx *bts = this_cpu_ptr(&bts_ctx); + /* + * Here we transition from ACTIVE to INACTIVE; + * do nothing for STOPPED or INACTIVE. + */ + if (READ_ONCE(bts->state) != BTS_STATE_ACTIVE) + return; + if (bts->handle.event) - __bts_event_stop(bts->handle.event); + __bts_event_stop(bts->handle.event, BTS_STATE_INACTIVE); } static int @@ -335,8 +378,6 @@ bts_buffer_reset(struct bts_buffer *buf, struct perf_output_handle *handle) return 0; head = handle->head & ((buf->nr_pages << PAGE_SHIFT) - 1); - if (WARN_ON_ONCE(head != local_read(&buf->head))) - return -EINVAL; phys = &buf->buf[buf->cur_buf]; space = phys->offset + phys->displacement + phys->size - head; @@ -403,22 +444,37 @@ bts_buffer_reset(struct bts_buffer *buf, struct perf_output_handle *handle) int intel_bts_interrupt(void) { + struct debug_store *ds = this_cpu_ptr(&cpu_hw_events)->ds; struct bts_ctx *bts = this_cpu_ptr(&bts_ctx); struct perf_event *event = bts->handle.event; struct bts_buffer *buf; s64 old_head; - int err; + int err = -ENOSPC, handled = 0; - if (!event || !bts->started) - return 0; + /* + * The only surefire way of knowing if this NMI is ours is by checking + * the write ptr against the PMI threshold. + */ + if (ds && (ds->bts_index >= ds->bts_interrupt_threshold)) + handled = 1; + + /* + * this is wrapped in intel_bts_enable_local/intel_bts_disable_local, + * so we can only be INACTIVE or STOPPED + */ + if (READ_ONCE(bts->state) == BTS_STATE_STOPPED) + return handled; buf = perf_get_aux(&bts->handle); + if (!buf) + return handled; + /* * Skip snapshot counters: they don't use the interrupt, but * there's no other way of telling, because the pointer will * keep moving */ - if (!buf || buf->snapshot) + if (buf->snapshot) return 0; old_head = local_read(&buf->head); @@ -426,18 +482,27 @@ int intel_bts_interrupt(void) /* no new data */ if (old_head == local_read(&buf->head)) - return 0; + return handled; perf_aux_output_end(&bts->handle, local_xchg(&buf->data_size, 0), !!local_xchg(&buf->lost, 0)); buf = perf_aux_output_begin(&bts->handle, event); - if (!buf) - return 1; + if (buf) + err = bts_buffer_reset(buf, &bts->handle); + + if (err) { + WRITE_ONCE(bts->state, BTS_STATE_STOPPED); - err = bts_buffer_reset(buf, &bts->handle); - if (err) - perf_aux_output_end(&bts->handle, 0, false); + if (buf) { + /* + * BTS_STATE_STOPPED should be visible before + * cleared handle::event + */ + barrier(); + perf_aux_output_end(&bts->handle, 0, false); + } + } return 1; } @@ -519,7 +584,8 @@ static __init int bts_init(void) if (!boot_cpu_has(X86_FEATURE_DTES64) || !x86_pmu.bts) return -ENODEV; - bts_pmu.capabilities = PERF_PMU_CAP_AUX_NO_SG | PERF_PMU_CAP_ITRACE; + bts_pmu.capabilities = PERF_PMU_CAP_AUX_NO_SG | PERF_PMU_CAP_ITRACE | + PERF_PMU_CAP_EXCLUSIVE; bts_pmu.task_ctx_nr = perf_sw_context; bts_pmu.event_init = bts_event_init; bts_pmu.add = bts_event_add; diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c index 9b4f9d3ce..4c9a79b9c 100644 --- a/arch/x86/events/intel/core.c +++ b/arch/x86/events/intel/core.c @@ -16,6 +16,7 @@ #include <asm/cpufeature.h> #include <asm/hardirq.h> +#include <asm/intel-family.h> #include <asm/apic.h> #include "../perf_event.h" @@ -185,7 +186,7 @@ static struct event_constraint intel_slm_event_constraints[] __read_mostly = EVENT_CONSTRAINT_END }; -struct event_constraint intel_skl_event_constraints[] = { +static struct event_constraint intel_skl_event_constraints[] = { FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */ FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */ FIXED_EVENT_CONSTRAINT(0x0300, 2), /* CPU_CLK_UNHALTED.REF */ @@ -204,10 +205,8 @@ struct event_constraint intel_skl_event_constraints[] = { }; static struct extra_reg intel_knl_extra_regs[] __read_mostly = { - INTEL_UEVENT_EXTRA_REG(0x01b7, - MSR_OFFCORE_RSP_0, 0x7f9ffbffffull, RSP_0), - INTEL_UEVENT_EXTRA_REG(0x02b7, - MSR_OFFCORE_RSP_1, 0x3f9ffbffffull, RSP_1), + INTEL_UEVENT_EXTRA_REG(0x01b7, MSR_OFFCORE_RSP_0, 0x799ffbb6e7ull, RSP_0), + INTEL_UEVENT_EXTRA_REG(0x02b7, MSR_OFFCORE_RSP_1, 0x399ffbffe7ull, RSP_1), EVENT_EXTRA_END }; @@ -243,14 +242,51 @@ EVENT_ATTR_STR(mem-loads, mem_ld_nhm, "event=0x0b,umask=0x10,ldlat=3"); EVENT_ATTR_STR(mem-loads, mem_ld_snb, "event=0xcd,umask=0x1,ldlat=3"); EVENT_ATTR_STR(mem-stores, mem_st_snb, "event=0xcd,umask=0x2"); -struct attribute *nhm_events_attrs[] = { +static struct attribute *nhm_events_attrs[] = { EVENT_PTR(mem_ld_nhm), NULL, }; -struct attribute *snb_events_attrs[] = { +/* + * topdown events for Intel Core CPUs. + * + * The events are all in slots, which is a free slot in a 4 wide + * pipeline. Some events are already reported in slots, for cycle + * events we multiply by the pipeline width (4). + * + * With Hyper Threading on, topdown metrics are either summed or averaged + * between the threads of a core: (count_t0 + count_t1). + * + * For the average case the metric is always scaled to pipeline width, + * so we use factor 2 ((count_t0 + count_t1) / 2 * 4) + */ + +EVENT_ATTR_STR_HT(topdown-total-slots, td_total_slots, + "event=0x3c,umask=0x0", /* cpu_clk_unhalted.thread */ + "event=0x3c,umask=0x0,any=1"); /* cpu_clk_unhalted.thread_any */ +EVENT_ATTR_STR_HT(topdown-total-slots.scale, td_total_slots_scale, "4", "2"); +EVENT_ATTR_STR(topdown-slots-issued, td_slots_issued, + "event=0xe,umask=0x1"); /* uops_issued.any */ +EVENT_ATTR_STR(topdown-slots-retired, td_slots_retired, + "event=0xc2,umask=0x2"); /* uops_retired.retire_slots */ +EVENT_ATTR_STR(topdown-fetch-bubbles, td_fetch_bubbles, + "event=0x9c,umask=0x1"); /* idq_uops_not_delivered_core */ +EVENT_ATTR_STR_HT(topdown-recovery-bubbles, td_recovery_bubbles, + "event=0xd,umask=0x3,cmask=1", /* int_misc.recovery_cycles */ + "event=0xd,umask=0x3,cmask=1,any=1"); /* int_misc.recovery_cycles_any */ +EVENT_ATTR_STR_HT(topdown-recovery-bubbles.scale, td_recovery_bubbles_scale, + "4", "2"); + +static struct attribute *snb_events_attrs[] = { EVENT_PTR(mem_ld_snb), EVENT_PTR(mem_st_snb), + EVENT_PTR(td_slots_issued), + EVENT_PTR(td_slots_retired), + EVENT_PTR(td_fetch_bubbles), + EVENT_PTR(td_total_slots), + EVENT_PTR(td_total_slots_scale), + EVENT_PTR(td_recovery_bubbles), + EVENT_PTR(td_recovery_bubbles_scale), NULL, }; @@ -280,7 +316,7 @@ static struct event_constraint intel_hsw_event_constraints[] = { EVENT_CONSTRAINT_END }; -struct event_constraint intel_bdw_event_constraints[] = { +static struct event_constraint intel_bdw_event_constraints[] = { FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */ FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */ FIXED_EVENT_CONSTRAINT(0x0300, 2), /* CPU_CLK_UNHALTED.REF */ @@ -1361,6 +1397,29 @@ static __initconst const u64 atom_hw_cache_event_ids }, }; +EVENT_ATTR_STR(topdown-total-slots, td_total_slots_slm, "event=0x3c"); +EVENT_ATTR_STR(topdown-total-slots.scale, td_total_slots_scale_slm, "2"); +/* no_alloc_cycles.not_delivered */ +EVENT_ATTR_STR(topdown-fetch-bubbles, td_fetch_bubbles_slm, + "event=0xca,umask=0x50"); +EVENT_ATTR_STR(topdown-fetch-bubbles.scale, td_fetch_bubbles_scale_slm, "2"); +/* uops_retired.all */ +EVENT_ATTR_STR(topdown-slots-issued, td_slots_issued_slm, + "event=0xc2,umask=0x10"); +/* uops_retired.all */ +EVENT_ATTR_STR(topdown-slots-retired, td_slots_retired_slm, + "event=0xc2,umask=0x10"); + +static struct attribute *slm_events_attrs[] = { + EVENT_PTR(td_total_slots_slm), + EVENT_PTR(td_total_slots_scale_slm), + EVENT_PTR(td_fetch_bubbles_slm), + EVENT_PTR(td_fetch_bubbles_scale_slm), + EVENT_PTR(td_slots_issued_slm), + EVENT_PTR(td_slots_retired_slm), + NULL +}; + static struct extra_reg intel_slm_extra_regs[] __read_mostly = { /* must define OFFCORE_RSP_X first, see intel_fixup_er() */ @@ -1671,9 +1730,11 @@ static __initconst const u64 knl_hw_cache_extra_regs * disabled state if called consecutively. * * During consecutive calls, the same disable value will be written to related - * registers, so the PMU state remains unchanged. hw.state in - * intel_bts_disable_local will remain PERF_HES_STOPPED too in consecutive - * calls. + * registers, so the PMU state remains unchanged. + * + * intel_bts events don't coexist with intel PMU's BTS events because of + * x86_add_exclusive(x86_lbr_exclusive_lbr); there's no need to keep them + * disabled around intel PMU's event batching etc, only inside the PMI handler. */ static void __intel_pmu_disable_all(void) { @@ -1683,8 +1744,6 @@ static void __intel_pmu_disable_all(void) if (test_bit(INTEL_PMC_IDX_FIXED_BTS, cpuc->active_mask)) intel_pmu_disable_bts(); - else - intel_bts_disable_local(); intel_pmu_pebs_disable_all(); } @@ -1712,8 +1771,7 @@ static void __intel_pmu_enable_all(int added, bool pmi) return; intel_pmu_enable_bts(event->hw.config); - } else - intel_bts_enable_local(); + } } static void intel_pmu_enable_all(int added) @@ -2014,6 +2072,7 @@ static int intel_pmu_handle_irq(struct pt_regs *regs) */ if (!x86_pmu.late_ack) apic_write(APIC_LVTPC, APIC_DM_NMI); + intel_bts_disable_local(); __intel_pmu_disable_all(); handled = intel_pmu_drain_bts_buffer(); handled += intel_bts_interrupt(); @@ -2113,6 +2172,7 @@ done: /* Only restore PMU state when it's active. See x86_pmu_disable(). */ if (cpuc->enabled) __intel_pmu_enable_all(0, true); + intel_bts_enable_local(); /* * Only unmask the NMI after the overflow counters @@ -3050,7 +3110,7 @@ static int intel_pmu_cpu_prepare(int cpu) cpuc->excl_thread_id = 0; } - return NOTIFY_OK; + return 0; err_constraint_list: kfree(cpuc->constraint_list); @@ -3061,7 +3121,7 @@ err_shared_regs: cpuc->shared_regs = NULL; err: - return NOTIFY_BAD; + return -ENOMEM; } static void intel_pmu_cpu_starting(int cpu) @@ -3290,11 +3350,11 @@ static int intel_snb_pebs_broken(int cpu) u32 rev = UINT_MAX; /* default to broken for unknown models */ switch (cpu_data(cpu).x86_model) { - case 42: /* SNB */ + case INTEL_FAM6_SANDYBRIDGE: rev = 0x28; break; - case 45: /* SNB-EP */ + case INTEL_FAM6_SANDYBRIDGE_X: switch (cpu_data(cpu).x86_mask) { case 6: rev = 0x618; break; case 7: rev = 0x70c; break; @@ -3331,6 +3391,13 @@ static void intel_snb_check_microcode(void) } } +static bool is_lbr_from(unsigned long msr) +{ + unsigned long lbr_from_nr = x86_pmu.lbr_from + x86_pmu.lbr_nr; + + return x86_pmu.lbr_from <= msr && msr < lbr_from_nr; +} + /* * Under certain circumstances, access certain MSR may cause #GP. * The function tests if the input MSR can be safely accessed. @@ -3351,13 +3418,24 @@ static bool check_msr(unsigned long msr, u64 mask) * Only change the bits which can be updated by wrmsrl. */ val_tmp = val_old ^ mask; + + if (is_lbr_from(msr)) + val_tmp = lbr_from_signext_quirk_wr(val_tmp); + if (wrmsrl_safe(msr, val_tmp) || rdmsrl_safe(msr, &val_new)) return false; + /* + * Quirk only affects validation in wrmsr(), so wrmsrl()'s value + * should equal rdmsrl()'s even with the quirk. + */ if (val_new != val_tmp) return false; + if (is_lbr_from(msr)) + val_old = lbr_from_signext_quirk_wr(val_old); + /* Here it's sure that the MSR can be safely accessed. * Restore the old value and return. */ @@ -3466,6 +3544,13 @@ static struct attribute *hsw_events_attrs[] = { EVENT_PTR(cycles_ct), EVENT_PTR(mem_ld_hsw), EVENT_PTR(mem_st_hsw), + EVENT_PTR(td_slots_issued), + EVENT_PTR(td_slots_retired), + EVENT_PTR(td_fetch_bubbles), + EVENT_PTR(td_total_slots), + EVENT_PTR(td_total_slots_scale), + EVENT_PTR(td_recovery_bubbles), + EVENT_PTR(td_recovery_bubbles_scale), NULL }; @@ -3537,15 +3622,15 @@ __init int intel_pmu_init(void) * Install the hw-cache-events table: */ switch (boot_cpu_data.x86_model) { - case 14: /* 65nm Core "Yonah" */ + case INTEL_FAM6_CORE_YONAH: pr_cont("Core events, "); break; - case 15: /* 65nm Core2 "Merom" */ + case INTEL_FAM6_CORE2_MEROM: x86_add_quirk(intel_clovertown_quirk); - case 22: /* 65nm Core2 "Merom-L" */ - case 23: /* 45nm Core2 "Penryn" */ - case 29: /* 45nm Core2 "Dunnington (MP) */ + case INTEL_FAM6_CORE2_MEROM_L: + case INTEL_FAM6_CORE2_PENRYN: + case INTEL_FAM6_CORE2_DUNNINGTON: memcpy(hw_cache_event_ids, core2_hw_cache_event_ids, sizeof(hw_cache_event_ids)); @@ -3556,9 +3641,9 @@ __init int intel_pmu_init(void) pr_cont("Core2 events, "); break; - case 30: /* 45nm Nehalem */ - case 26: /* 45nm Nehalem-EP */ - case 46: /* 45nm Nehalem-EX */ + case INTEL_FAM6_NEHALEM: + case INTEL_FAM6_NEHALEM_EP: + case INTEL_FAM6_NEHALEM_EX: memcpy(hw_cache_event_ids, nehalem_hw_cache_event_ids, sizeof(hw_cache_event_ids)); memcpy(hw_cache_extra_regs, nehalem_hw_cache_extra_regs, @@ -3586,11 +3671,11 @@ __init int intel_pmu_init(void) pr_cont("Nehalem events, "); break; - case 28: /* 45nm Atom "Pineview" */ - case 38: /* 45nm Atom "Lincroft" */ - case 39: /* 32nm Atom "Penwell" */ - case 53: /* 32nm Atom "Cloverview" */ - case 54: /* 32nm Atom "Cedarview" */ + case INTEL_FAM6_ATOM_PINEVIEW: + case INTEL_FAM6_ATOM_LINCROFT: + case INTEL_FAM6_ATOM_PENWELL: + case INTEL_FAM6_ATOM_CLOVERVIEW: + case INTEL_FAM6_ATOM_CEDARVIEW: memcpy(hw_cache_event_ids, atom_hw_cache_event_ids, sizeof(hw_cache_event_ids)); @@ -3602,9 +3687,9 @@ __init int intel_pmu_init(void) pr_cont("Atom events, "); break; - case 55: /* 22nm Atom "Silvermont" */ - case 76: /* 14nm Atom "Airmont" */ - case 77: /* 22nm Atom "Silvermont Avoton/Rangely" */ + case INTEL_FAM6_ATOM_SILVERMONT1: + case INTEL_FAM6_ATOM_SILVERMONT2: + case INTEL_FAM6_ATOM_AIRMONT: memcpy(hw_cache_event_ids, slm_hw_cache_event_ids, sizeof(hw_cache_event_ids)); memcpy(hw_cache_extra_regs, slm_hw_cache_extra_regs, @@ -3616,11 +3701,12 @@ __init int intel_pmu_init(void) x86_pmu.pebs_constraints = intel_slm_pebs_event_constraints; x86_pmu.extra_regs = intel_slm_extra_regs; x86_pmu.flags |= PMU_FL_HAS_RSP_1; + x86_pmu.cpu_events = slm_events_attrs; pr_cont("Silvermont events, "); break; - case 92: /* 14nm Atom "Goldmont" */ - case 95: /* 14nm Atom "Goldmont Denverton" */ + case INTEL_FAM6_ATOM_GOLDMONT: + case INTEL_FAM6_ATOM_DENVERTON: memcpy(hw_cache_event_ids, glm_hw_cache_event_ids, sizeof(hw_cache_event_ids)); memcpy(hw_cache_extra_regs, glm_hw_cache_extra_regs, @@ -3643,9 +3729,9 @@ __init int intel_pmu_init(void) pr_cont("Goldmont events, "); break; - case 37: /* 32nm Westmere */ - case 44: /* 32nm Westmere-EP */ - case 47: /* 32nm Westmere-EX */ + case INTEL_FAM6_WESTMERE: + case INTEL_FAM6_WESTMERE_EP: + case INTEL_FAM6_WESTMERE_EX: memcpy(hw_cache_event_ids, westmere_hw_cache_event_ids, sizeof(hw_cache_event_ids)); memcpy(hw_cache_extra_regs, nehalem_hw_cache_extra_regs, @@ -3672,8 +3758,8 @@ __init int intel_pmu_init(void) pr_cont("Westmere events, "); break; - case 42: /* 32nm SandyBridge */ - case 45: /* 32nm SandyBridge-E/EN/EP */ + case INTEL_FAM6_SANDYBRIDGE: + case INTEL_FAM6_SANDYBRIDGE_X: x86_add_quirk(intel_sandybridge_quirk); x86_add_quirk(intel_ht_bug); memcpy(hw_cache_event_ids, snb_hw_cache_event_ids, @@ -3686,7 +3772,7 @@ __init int intel_pmu_init(void) x86_pmu.event_constraints = intel_snb_event_constraints; x86_pmu.pebs_constraints = intel_snb_pebs_event_constraints; x86_pmu.pebs_aliases = intel_pebs_aliases_snb; - if (boot_cpu_data.x86_model == 45) + if (boot_cpu_data.x86_model == INTEL_FAM6_SANDYBRIDGE_X) x86_pmu.extra_regs = intel_snbep_extra_regs; else x86_pmu.extra_regs = intel_snb_extra_regs; @@ -3708,8 +3794,8 @@ __init int intel_pmu_init(void) pr_cont("SandyBridge events, "); break; - case 58: /* 22nm IvyBridge */ - case 62: /* 22nm IvyBridge-EP/EX */ + case INTEL_FAM6_IVYBRIDGE: + case INTEL_FAM6_IVYBRIDGE_X: x86_add_quirk(intel_ht_bug); memcpy(hw_cache_event_ids, snb_hw_cache_event_ids, sizeof(hw_cache_event_ids)); @@ -3725,7 +3811,7 @@ __init int intel_pmu_init(void) x86_pmu.pebs_constraints = intel_ivb_pebs_event_constraints; x86_pmu.pebs_aliases = intel_pebs_aliases_ivb; x86_pmu.pebs_prec_dist = true; - if (boot_cpu_data.x86_model == 62) + if (boot_cpu_data.x86_model == INTEL_FAM6_IVYBRIDGE_X) x86_pmu.extra_regs = intel_snbep_extra_regs; else x86_pmu.extra_regs = intel_snb_extra_regs; @@ -3743,10 +3829,10 @@ __init int intel_pmu_init(void) break; - case 60: /* 22nm Haswell Core */ - case 63: /* 22nm Haswell Server */ - case 69: /* 22nm Haswell ULT */ - case 70: /* 22nm Haswell + GT3e (Intel Iris Pro graphics) */ + case INTEL_FAM6_HASWELL_CORE: + case INTEL_FAM6_HASWELL_X: + case INTEL_FAM6_HASWELL_ULT: + case INTEL_FAM6_HASWELL_GT3E: x86_add_quirk(intel_ht_bug); x86_pmu.late_ack = true; memcpy(hw_cache_event_ids, hsw_hw_cache_event_ids, sizeof(hw_cache_event_ids)); @@ -3770,10 +3856,10 @@ __init int intel_pmu_init(void) pr_cont("Haswell events, "); break; - case 61: /* 14nm Broadwell Core-M */ - case 86: /* 14nm Broadwell Xeon D */ - case 71: /* 14nm Broadwell + GT3e (Intel Iris Pro graphics) */ - case 79: /* 14nm Broadwell Server */ + case INTEL_FAM6_BROADWELL_CORE: + case INTEL_FAM6_BROADWELL_XEON_D: + case INTEL_FAM6_BROADWELL_GT3E: + case INTEL_FAM6_BROADWELL_X: x86_pmu.late_ack = true; memcpy(hw_cache_event_ids, hsw_hw_cache_event_ids, sizeof(hw_cache_event_ids)); memcpy(hw_cache_extra_regs, hsw_hw_cache_extra_regs, sizeof(hw_cache_extra_regs)); @@ -3806,7 +3892,7 @@ __init int intel_pmu_init(void) pr_cont("Broadwell events, "); break; - case 87: /* Knights Landing Xeon Phi */ + case INTEL_FAM6_XEON_PHI_KNL: memcpy(hw_cache_event_ids, slm_hw_cache_event_ids, sizeof(hw_cache_event_ids)); memcpy(hw_cache_extra_regs, @@ -3824,16 +3910,22 @@ __init int intel_pmu_init(void) pr_cont("Knights Landing events, "); break; - case 142: /* 14nm Kabylake Mobile */ - case 158: /* 14nm Kabylake Desktop */ - case 78: /* 14nm Skylake Mobile */ - case 94: /* 14nm Skylake Desktop */ - case 85: /* 14nm Skylake Server */ + case INTEL_FAM6_SKYLAKE_MOBILE: + case INTEL_FAM6_SKYLAKE_DESKTOP: + case INTEL_FAM6_SKYLAKE_X: + case INTEL_FAM6_KABYLAKE_MOBILE: + case INTEL_FAM6_KABYLAKE_DESKTOP: x86_pmu.late_ack = true; memcpy(hw_cache_event_ids, skl_hw_cache_event_ids, sizeof(hw_cache_event_ids)); memcpy(hw_cache_extra_regs, skl_hw_cache_extra_regs, sizeof(hw_cache_extra_regs)); intel_pmu_lbr_init_skl(); + /* INT_MISC.RECOVERY_CYCLES has umask 1 in Skylake */ + event_attr_td_recovery_bubbles.event_str_noht = + "event=0xd,umask=0x1,cmask=1"; + event_attr_td_recovery_bubbles.event_str_ht = + "event=0xd,umask=0x1,cmask=1,any=1"; + x86_pmu.event_constraints = intel_skl_event_constraints; x86_pmu.pebs_constraints = intel_skl_pebs_event_constraints; x86_pmu.extra_regs = intel_skl_extra_regs; @@ -3914,6 +4006,8 @@ __init int intel_pmu_init(void) x86_pmu.lbr_nr = 0; } + if (x86_pmu.lbr_nr) + pr_cont("%d-deep LBR, ", x86_pmu.lbr_nr); /* * Access extra MSR may cause #GP under certain circumstances. * E.g. KVM doesn't support offcore event @@ -3946,16 +4040,14 @@ __init int intel_pmu_init(void) */ static __init int fixup_ht_bug(void) { - int cpu = smp_processor_id(); - int w, c; + int c; /* * problem not present on this CPU model, nothing to do */ if (!(x86_pmu.flags & PMU_FL_EXCL_ENABLED)) return 0; - w = cpumask_weight(topology_sibling_cpumask(cpu)); - if (w > 1) { + if (topology_max_smt_threads() > 1) { pr_info("PMU erratum BJ122, BV98, HSD29 worked around, HT is on\n"); return 0; } diff --git a/arch/x86/events/intel/cqm.c b/arch/x86/events/intel/cqm.c index 4ff41ae51..8f82b0293 100644 --- a/arch/x86/events/intel/cqm.c +++ b/arch/x86/events/intel/cqm.c @@ -1586,7 +1586,7 @@ static inline void cqm_pick_event_reader(int cpu) cpumask_set_cpu(cpu, &cqm_cpumask); } -static void intel_cqm_cpu_starting(unsigned int cpu) +static int intel_cqm_cpu_starting(unsigned int cpu) { struct intel_pqr_state *state = &per_cpu(pqr_state, cpu); struct cpuinfo_x86 *c = &cpu_data(cpu); @@ -1597,39 +1597,26 @@ static void intel_cqm_cpu_starting(unsigned int cpu) WARN_ON(c->x86_cache_max_rmid != cqm_max_rmid); WARN_ON(c->x86_cache_occ_scale != cqm_l3_scale); + + cqm_pick_event_reader(cpu); + return 0; } -static void intel_cqm_cpu_exit(unsigned int cpu) +static int intel_cqm_cpu_exit(unsigned int cpu) { int target; /* Is @cpu the current cqm reader for this package ? */ if (!cpumask_test_and_clear_cpu(cpu, &cqm_cpumask)) - return; + return 0; /* Find another online reader in this package */ target = cpumask_any_but(topology_core_cpumask(cpu), cpu); if (target < nr_cpu_ids) cpumask_set_cpu(target, &cqm_cpumask); -} - -static int intel_cqm_cpu_notifier(struct notifier_block *nb, - unsigned long action, void *hcpu) -{ - unsigned int cpu = (unsigned long)hcpu; - - switch (action & ~CPU_TASKS_FROZEN) { - case CPU_DOWN_PREPARE: - intel_cqm_cpu_exit(cpu); - break; - case CPU_STARTING: - intel_cqm_cpu_starting(cpu); - cqm_pick_event_reader(cpu); - break; - } - return NOTIFY_OK; + return 0; } static const struct x86_cpu_id intel_cqm_match[] = { @@ -1691,7 +1678,7 @@ out: static int __init intel_cqm_init(void) { char *str = NULL, scale[20]; - int i, cpu, ret; + int cpu, ret; if (x86_match_cpu(intel_cqm_match)) cqm_enabled = true; @@ -1714,8 +1701,7 @@ static int __init intel_cqm_init(void) * * Also, check that the scales match on all cpus. */ - cpu_notifier_register_begin(); - + get_online_cpus(); for_each_online_cpu(cpu) { struct cpuinfo_x86 *c = &cpu_data(cpu); @@ -1752,11 +1738,6 @@ static int __init intel_cqm_init(void) if (ret) goto out; - for_each_online_cpu(i) { - intel_cqm_cpu_starting(i); - cqm_pick_event_reader(i); - } - if (mbm_enabled) ret = intel_mbm_init(); if (ret && !cqm_enabled) @@ -1781,12 +1762,18 @@ static int __init intel_cqm_init(void) pr_info("Intel MBM enabled\n"); /* - * Register the hot cpu notifier once we are sure cqm + * Setup the hot cpu notifier once we are sure cqm * is enabled to avoid notifier leak. */ - __perf_cpu_notifier(intel_cqm_cpu_notifier); + cpuhp_setup_state(CPUHP_AP_PERF_X86_CQM_STARTING, + "AP_PERF_X86_CQM_STARTING", + intel_cqm_cpu_starting, NULL); + cpuhp_setup_state(CPUHP_AP_PERF_X86_CQM_ONLINE, "AP_PERF_X86_CQM_ONLINE", + NULL, intel_cqm_cpu_exit); + out: - cpu_notifier_register_done(); + put_online_cpus(); + if (ret) { kfree(str); cqm_cleanup(); diff --git a/arch/x86/events/intel/cstate.c b/arch/x86/events/intel/cstate.c index 9ba4e4136..3ca87b5a8 100644 --- a/arch/x86/events/intel/cstate.c +++ b/arch/x86/events/intel/cstate.c @@ -89,6 +89,7 @@ #include <linux/slab.h> #include <linux/perf_event.h> #include <asm/cpu_device_id.h> +#include <asm/intel-family.h> #include "../perf_event.h" MODULE_LICENSE("GPL"); @@ -365,7 +366,7 @@ static int cstate_pmu_event_add(struct perf_event *event, int mode) * Check if exiting cpu is the designated reader. If so migrate the * events when there is a valid target available */ -static void cstate_cpu_exit(int cpu) +static int cstate_cpu_exit(unsigned int cpu) { unsigned int target; @@ -390,9 +391,10 @@ static void cstate_cpu_exit(int cpu) perf_pmu_migrate_context(&cstate_pkg_pmu, cpu, target); } } + return 0; } -static void cstate_cpu_init(int cpu) +static int cstate_cpu_init(unsigned int cpu) { unsigned int target; @@ -414,31 +416,10 @@ static void cstate_cpu_init(int cpu) topology_core_cpumask(cpu)); if (has_cstate_pkg && target >= nr_cpu_ids) cpumask_set_cpu(cpu, &cstate_pkg_cpu_mask); -} -static int cstate_cpu_notifier(struct notifier_block *self, - unsigned long action, void *hcpu) -{ - unsigned int cpu = (long)hcpu; - - switch (action & ~CPU_TASKS_FROZEN) { - case CPU_STARTING: - cstate_cpu_init(cpu); - break; - case CPU_DOWN_PREPARE: - cstate_cpu_exit(cpu); - break; - default: - break; - } - return NOTIFY_OK; + return 0; } -static struct notifier_block cstate_cpu_nb = { - .notifier_call = cstate_cpu_notifier, - .priority = CPU_PRI_PERF + 1, -}; - static struct pmu cstate_core_pmu = { .attr_groups = core_attr_groups, .name = "cstate_core", @@ -511,37 +492,37 @@ static const struct cstate_model slm_cstates __initconst = { { X86_VENDOR_INTEL, 6, model, X86_FEATURE_ANY, (unsigned long) &(states) } static const struct x86_cpu_id intel_cstates_match[] __initconst = { - X86_CSTATES_MODEL(30, nhm_cstates), /* 45nm Nehalem */ - X86_CSTATES_MODEL(26, nhm_cstates), /* 45nm Nehalem-EP */ - X86_CSTATES_MODEL(46, nhm_cstates), /* 45nm Nehalem-EX */ + X86_CSTATES_MODEL(INTEL_FAM6_NEHALEM, nhm_cstates), + X86_CSTATES_MODEL(INTEL_FAM6_NEHALEM_EP, nhm_cstates), + X86_CSTATES_MODEL(INTEL_FAM6_NEHALEM_EX, nhm_cstates), - X86_CSTATES_MODEL(37, nhm_cstates), /* 32nm Westmere */ - X86_CSTATES_MODEL(44, nhm_cstates), /* 32nm Westmere-EP */ - X86_CSTATES_MODEL(47, nhm_cstates), /* 32nm Westmere-EX */ + X86_CSTATES_MODEL(INTEL_FAM6_WESTMERE, nhm_cstates), + X86_CSTATES_MODEL(INTEL_FAM6_WESTMERE_EP, nhm_cstates), + X86_CSTATES_MODEL(INTEL_FAM6_WESTMERE_EX, nhm_cstates), - X86_CSTATES_MODEL(42, snb_cstates), /* 32nm SandyBridge */ - X86_CSTATES_MODEL(45, snb_cstates), /* 32nm SandyBridge-E/EN/EP */ + X86_CSTATES_MODEL(INTEL_FAM6_SANDYBRIDGE, snb_cstates), + X86_CSTATES_MODEL(INTEL_FAM6_SANDYBRIDGE_X, snb_cstates), - X86_CSTATES_MODEL(58, snb_cstates), /* 22nm IvyBridge */ - X86_CSTATES_MODEL(62, snb_cstates), /* 22nm IvyBridge-EP/EX */ + X86_CSTATES_MODEL(INTEL_FAM6_IVYBRIDGE, snb_cstates), + X86_CSTATES_MODEL(INTEL_FAM6_IVYBRIDGE_X, snb_cstates), - X86_CSTATES_MODEL(60, snb_cstates), /* 22nm Haswell Core */ - X86_CSTATES_MODEL(63, snb_cstates), /* 22nm Haswell Server */ - X86_CSTATES_MODEL(70, snb_cstates), /* 22nm Haswell + GT3e */ + X86_CSTATES_MODEL(INTEL_FAM6_HASWELL_CORE, snb_cstates), + X86_CSTATES_MODEL(INTEL_FAM6_HASWELL_X, snb_cstates), + X86_CSTATES_MODEL(INTEL_FAM6_HASWELL_GT3E, snb_cstates), - X86_CSTATES_MODEL(69, hswult_cstates), /* 22nm Haswell ULT */ + X86_CSTATES_MODEL(INTEL_FAM6_HASWELL_ULT, hswult_cstates), - X86_CSTATES_MODEL(55, slm_cstates), /* 22nm Atom Silvermont */ - X86_CSTATES_MODEL(77, slm_cstates), /* 22nm Atom Avoton/Rangely */ - X86_CSTATES_MODEL(76, slm_cstates), /* 22nm Atom Airmont */ + X86_CSTATES_MODEL(INTEL_FAM6_ATOM_SILVERMONT1, slm_cstates), + X86_CSTATES_MODEL(INTEL_FAM6_ATOM_SILVERMONT2, slm_cstates), + X86_CSTATES_MODEL(INTEL_FAM6_ATOM_AIRMONT, slm_cstates), - X86_CSTATES_MODEL(61, snb_cstates), /* 14nm Broadwell Core-M */ - X86_CSTATES_MODEL(86, snb_cstates), /* 14nm Broadwell Xeon D */ - X86_CSTATES_MODEL(71, snb_cstates), /* 14nm Broadwell + GT3e */ - X86_CSTATES_MODEL(79, snb_cstates), /* 14nm Broadwell Server */ + X86_CSTATES_MODEL(INTEL_FAM6_BROADWELL_CORE, snb_cstates), + X86_CSTATES_MODEL(INTEL_FAM6_BROADWELL_XEON_D, snb_cstates), + X86_CSTATES_MODEL(INTEL_FAM6_BROADWELL_GT3E, snb_cstates), + X86_CSTATES_MODEL(INTEL_FAM6_BROADWELL_X, snb_cstates), - X86_CSTATES_MODEL(78, snb_cstates), /* 14nm Skylake Mobile */ - X86_CSTATES_MODEL(94, snb_cstates), /* 14nm Skylake Desktop */ + X86_CSTATES_MODEL(INTEL_FAM6_SKYLAKE_MOBILE, snb_cstates), + X86_CSTATES_MODEL(INTEL_FAM6_SKYLAKE_DESKTOP, snb_cstates), { }, }; MODULE_DEVICE_TABLE(x86cpu, intel_cstates_match); @@ -599,18 +580,20 @@ static inline void cstate_cleanup(void) static int __init cstate_init(void) { - int cpu, err; + int err; - cpu_notifier_register_begin(); - for_each_online_cpu(cpu) - cstate_cpu_init(cpu); + cpuhp_setup_state(CPUHP_AP_PERF_X86_CSTATE_STARTING, + "AP_PERF_X86_CSTATE_STARTING", cstate_cpu_init, + NULL); + cpuhp_setup_state(CPUHP_AP_PERF_X86_CSTATE_ONLINE, + "AP_PERF_X86_CSTATE_ONLINE", NULL, cstate_cpu_exit); if (has_cstate_core) { err = perf_pmu_register(&cstate_core_pmu, cstate_core_pmu.name, -1); if (err) { has_cstate_core = false; pr_info("Failed to register cstate core pmu\n"); - goto out; + return err; } } @@ -620,12 +603,10 @@ static int __init cstate_init(void) has_cstate_pkg = false; pr_info("Failed to register cstate pkg pmu\n"); cstate_cleanup(); - goto out; + return err; } } - __register_cpu_notifier(&cstate_cpu_nb); -out: - cpu_notifier_register_done(); + return err; } @@ -651,9 +632,8 @@ module_init(cstate_pmu_init); static void __exit cstate_pmu_exit(void) { - cpu_notifier_register_begin(); - __unregister_cpu_notifier(&cstate_cpu_nb); + cpuhp_remove_state_nocalls(CPUHP_AP_PERF_X86_CSTATE_ONLINE); + cpuhp_remove_state_nocalls(CPUHP_AP_PERF_X86_CSTATE_STARTING); cstate_cleanup(); - cpu_notifier_register_done(); } module_exit(cstate_pmu_exit); diff --git a/arch/x86/events/intel/lbr.c b/arch/x86/events/intel/lbr.c index 9e2b40cdb..707d358e0 100644 --- a/arch/x86/events/intel/lbr.c +++ b/arch/x86/events/intel/lbr.c @@ -77,9 +77,11 @@ static enum { LBR_IND_JMP |\ LBR_FAR) -#define LBR_FROM_FLAG_MISPRED (1ULL << 63) -#define LBR_FROM_FLAG_IN_TX (1ULL << 62) -#define LBR_FROM_FLAG_ABORT (1ULL << 61) +#define LBR_FROM_FLAG_MISPRED BIT_ULL(63) +#define LBR_FROM_FLAG_IN_TX BIT_ULL(62) +#define LBR_FROM_FLAG_ABORT BIT_ULL(61) + +#define LBR_FROM_SIGNEXT_2MSB (BIT_ULL(60) | BIT_ULL(59)) /* * x86control flow change classification @@ -235,6 +237,97 @@ enum { LBR_VALID, }; +/* + * For formats with LBR_TSX flags (e.g. LBR_FORMAT_EIP_FLAGS2), bits 61:62 in + * MSR_LAST_BRANCH_FROM_x are the TSX flags when TSX is supported, but when + * TSX is not supported they have no consistent behavior: + * + * - For wrmsr(), bits 61:62 are considered part of the sign extension. + * - For HW updates (branch captures) bits 61:62 are always OFF and are not + * part of the sign extension. + * + * Therefore, if: + * + * 1) LBR has TSX format + * 2) CPU has no TSX support enabled + * + * ... then any value passed to wrmsr() must be sign extended to 63 bits and any + * value from rdmsr() must be converted to have a 61 bits sign extension, + * ignoring the TSX flags. + */ +static inline bool lbr_from_signext_quirk_needed(void) +{ + int lbr_format = x86_pmu.intel_cap.lbr_format; + bool tsx_support = boot_cpu_has(X86_FEATURE_HLE) || + boot_cpu_has(X86_FEATURE_RTM); + + return !tsx_support && (lbr_desc[lbr_format] & LBR_TSX); +} + +DEFINE_STATIC_KEY_FALSE(lbr_from_quirk_key); + +/* If quirk is enabled, ensure sign extension is 63 bits: */ +inline u64 lbr_from_signext_quirk_wr(u64 val) +{ + if (static_branch_unlikely(&lbr_from_quirk_key)) { + /* + * Sign extend into bits 61:62 while preserving bit 63. + * + * Quirk is enabled when TSX is disabled. Therefore TSX bits + * in val are always OFF and must be changed to be sign + * extension bits. Since bits 59:60 are guaranteed to be + * part of the sign extension bits, we can just copy them + * to 61:62. + */ + val |= (LBR_FROM_SIGNEXT_2MSB & val) << 2; + } + return val; +} + +/* + * If quirk is needed, ensure sign extension is 61 bits: + */ +u64 lbr_from_signext_quirk_rd(u64 val) +{ + if (static_branch_unlikely(&lbr_from_quirk_key)) { + /* + * Quirk is on when TSX is not enabled. Therefore TSX + * flags must be read as OFF. + */ + val &= ~(LBR_FROM_FLAG_IN_TX | LBR_FROM_FLAG_ABORT); + } + return val; +} + +static inline void wrlbr_from(unsigned int idx, u64 val) +{ + val = lbr_from_signext_quirk_wr(val); + wrmsrl(x86_pmu.lbr_from + idx, val); +} + +static inline void wrlbr_to(unsigned int idx, u64 val) +{ + wrmsrl(x86_pmu.lbr_to + idx, val); +} + +static inline u64 rdlbr_from(unsigned int idx) +{ + u64 val; + + rdmsrl(x86_pmu.lbr_from + idx, val); + + return lbr_from_signext_quirk_rd(val); +} + +static inline u64 rdlbr_to(unsigned int idx) +{ + u64 val; + + rdmsrl(x86_pmu.lbr_to + idx, val); + + return val; +} + static void __intel_pmu_lbr_restore(struct x86_perf_task_context *task_ctx) { int i; @@ -251,8 +344,9 @@ static void __intel_pmu_lbr_restore(struct x86_perf_task_context *task_ctx) tos = task_ctx->tos; for (i = 0; i < tos; i++) { lbr_idx = (tos - i) & mask; - wrmsrl(x86_pmu.lbr_from + lbr_idx, task_ctx->lbr_from[i]); - wrmsrl(x86_pmu.lbr_to + lbr_idx, task_ctx->lbr_to[i]); + wrlbr_from(lbr_idx, task_ctx->lbr_from[i]); + wrlbr_to (lbr_idx, task_ctx->lbr_to[i]); + if (x86_pmu.intel_cap.lbr_format == LBR_FORMAT_INFO) wrmsrl(MSR_LBR_INFO_0 + lbr_idx, task_ctx->lbr_info[i]); } @@ -262,9 +356,9 @@ static void __intel_pmu_lbr_restore(struct x86_perf_task_context *task_ctx) static void __intel_pmu_lbr_save(struct x86_perf_task_context *task_ctx) { - int i; unsigned lbr_idx, mask; u64 tos; + int i; if (task_ctx->lbr_callstack_users == 0) { task_ctx->lbr_stack_state = LBR_NONE; @@ -275,8 +369,8 @@ static void __intel_pmu_lbr_save(struct x86_perf_task_context *task_ctx) tos = intel_pmu_lbr_tos(); for (i = 0; i < tos; i++) { lbr_idx = (tos - i) & mask; - rdmsrl(x86_pmu.lbr_from + lbr_idx, task_ctx->lbr_from[i]); - rdmsrl(x86_pmu.lbr_to + lbr_idx, task_ctx->lbr_to[i]); + task_ctx->lbr_from[i] = rdlbr_from(lbr_idx); + task_ctx->lbr_to[i] = rdlbr_to(lbr_idx); if (x86_pmu.intel_cap.lbr_format == LBR_FORMAT_INFO) rdmsrl(MSR_LBR_INFO_0 + lbr_idx, task_ctx->lbr_info[i]); } @@ -452,8 +546,8 @@ static void intel_pmu_lbr_read_64(struct cpu_hw_events *cpuc) u16 cycles = 0; int lbr_flags = lbr_desc[lbr_format]; - rdmsrl(x86_pmu.lbr_from + lbr_idx, from); - rdmsrl(x86_pmu.lbr_to + lbr_idx, to); + from = rdlbr_from(lbr_idx); + to = rdlbr_to(lbr_idx); if (lbr_format == LBR_FORMAT_INFO && need_info) { u64 info; @@ -956,7 +1050,6 @@ void __init intel_pmu_lbr_init_core(void) * SW branch filter usage: * - compensate for lack of HW filter */ - pr_cont("4-deep LBR, "); } /* nehalem/westmere */ @@ -977,7 +1070,6 @@ void __init intel_pmu_lbr_init_nhm(void) * That requires LBR_FAR but that means far * jmp need to be filtered out */ - pr_cont("16-deep LBR, "); } /* sandy bridge */ @@ -997,7 +1089,6 @@ void __init intel_pmu_lbr_init_snb(void) * That requires LBR_FAR but that means far * jmp need to be filtered out */ - pr_cont("16-deep LBR, "); } /* haswell */ @@ -1011,7 +1102,8 @@ void intel_pmu_lbr_init_hsw(void) x86_pmu.lbr_sel_mask = LBR_SEL_MASK; x86_pmu.lbr_sel_map = hsw_lbr_sel_map; - pr_cont("16-deep LBR, "); + if (lbr_from_signext_quirk_needed()) + static_branch_enable(&lbr_from_quirk_key); } /* skylake */ @@ -1031,7 +1123,6 @@ __init void intel_pmu_lbr_init_skl(void) * That requires LBR_FAR but that means far * jmp need to be filtered out */ - pr_cont("32-deep LBR, "); } /* atom */ @@ -1057,7 +1148,6 @@ void __init intel_pmu_lbr_init_atom(void) * SW branch filter usage: * - compensate for lack of HW filter */ - pr_cont("8-deep LBR, "); } /* slm */ @@ -1088,6 +1178,4 @@ void intel_pmu_lbr_init_knl(void) x86_pmu.lbr_sel_mask = LBR_SEL_MASK; x86_pmu.lbr_sel_map = snb_lbr_sel_map; - - pr_cont("8-deep LBR, "); } diff --git a/arch/x86/events/intel/rapl.c b/arch/x86/events/intel/rapl.c index e30eef4f2..28865938a 100644 --- a/arch/x86/events/intel/rapl.c +++ b/arch/x86/events/intel/rapl.c @@ -55,6 +55,7 @@ #include <linux/slab.h> #include <linux/perf_event.h> #include <asm/cpu_device_id.h> +#include <asm/intel-family.h> #include "../perf_event.h" MODULE_LICENSE("GPL"); @@ -555,14 +556,14 @@ const struct attribute_group *rapl_attr_groups[] = { NULL, }; -static void rapl_cpu_exit(int cpu) +static int rapl_cpu_offline(unsigned int cpu) { struct rapl_pmu *pmu = cpu_to_rapl_pmu(cpu); int target; /* Check if exiting cpu is used for collecting rapl events */ if (!cpumask_test_and_clear_cpu(cpu, &rapl_cpu_mask)) - return; + return 0; pmu->cpu = -1; /* Find a new cpu to collect rapl events */ @@ -574,9 +575,10 @@ static void rapl_cpu_exit(int cpu) pmu->cpu = target; perf_pmu_migrate_context(pmu->pmu, cpu, target); } + return 0; } -static void rapl_cpu_init(int cpu) +static int rapl_cpu_online(unsigned int cpu) { struct rapl_pmu *pmu = cpu_to_rapl_pmu(cpu); int target; @@ -587,13 +589,14 @@ static void rapl_cpu_init(int cpu) */ target = cpumask_any_and(&rapl_cpu_mask, topology_core_cpumask(cpu)); if (target < nr_cpu_ids) - return; + return 0; cpumask_set_cpu(cpu, &rapl_cpu_mask); pmu->cpu = cpu; + return 0; } -static int rapl_cpu_prepare(int cpu) +static int rapl_cpu_prepare(unsigned int cpu) { struct rapl_pmu *pmu = cpu_to_rapl_pmu(cpu); @@ -614,33 +617,6 @@ static int rapl_cpu_prepare(int cpu) return 0; } -static int rapl_cpu_notifier(struct notifier_block *self, - unsigned long action, void *hcpu) -{ - unsigned int cpu = (long)hcpu; - - switch (action & ~CPU_TASKS_FROZEN) { - case CPU_UP_PREPARE: - rapl_cpu_prepare(cpu); - break; - - case CPU_DOWN_FAILED: - case CPU_ONLINE: - rapl_cpu_init(cpu); - break; - - case CPU_DOWN_PREPARE: - rapl_cpu_exit(cpu); - break; - } - return NOTIFY_OK; -} - -static struct notifier_block rapl_cpu_nb = { - .notifier_call = rapl_cpu_notifier, - .priority = CPU_PRI_PERF + 1, -}; - static int rapl_check_hw_unit(bool apply_quirk) { u64 msr_rapl_power_unit_bits; @@ -691,24 +667,6 @@ static void __init rapl_advertise(void) } } -static int __init rapl_prepare_cpus(void) -{ - unsigned int cpu, pkg; - int ret; - - for_each_online_cpu(cpu) { - pkg = topology_logical_package_id(cpu); - if (rapl_pmus->pmus[pkg]) - continue; - - ret = rapl_cpu_prepare(cpu); - if (ret) - return ret; - rapl_cpu_init(cpu); - } - return 0; -} - static void cleanup_rapl_pmus(void) { int i; @@ -786,26 +744,27 @@ static const struct intel_rapl_init_fun skl_rapl_init __initconst = { }; static const struct x86_cpu_id rapl_cpu_match[] __initconst = { - X86_RAPL_MODEL_MATCH(42, snb_rapl_init), /* Sandy Bridge */ - X86_RAPL_MODEL_MATCH(45, snbep_rapl_init), /* Sandy Bridge-EP */ + X86_RAPL_MODEL_MATCH(INTEL_FAM6_SANDYBRIDGE, snb_rapl_init), + X86_RAPL_MODEL_MATCH(INTEL_FAM6_SANDYBRIDGE_X, snbep_rapl_init), - X86_RAPL_MODEL_MATCH(58, snb_rapl_init), /* Ivy Bridge */ - X86_RAPL_MODEL_MATCH(62, snbep_rapl_init), /* IvyTown */ + X86_RAPL_MODEL_MATCH(INTEL_FAM6_IVYBRIDGE, snb_rapl_init), + X86_RAPL_MODEL_MATCH(INTEL_FAM6_IVYBRIDGE_X, snbep_rapl_init), - X86_RAPL_MODEL_MATCH(60, hsw_rapl_init), /* Haswell */ - X86_RAPL_MODEL_MATCH(63, hsx_rapl_init), /* Haswell-Server */ - X86_RAPL_MODEL_MATCH(69, hsw_rapl_init), /* Haswell-Celeron */ - X86_RAPL_MODEL_MATCH(70, hsw_rapl_init), /* Haswell GT3e */ + X86_RAPL_MODEL_MATCH(INTEL_FAM6_HASWELL_CORE, hsw_rapl_init), + X86_RAPL_MODEL_MATCH(INTEL_FAM6_HASWELL_X, hsw_rapl_init), + X86_RAPL_MODEL_MATCH(INTEL_FAM6_HASWELL_ULT, hsw_rapl_init), + X86_RAPL_MODEL_MATCH(INTEL_FAM6_HASWELL_GT3E, hsw_rapl_init), - X86_RAPL_MODEL_MATCH(61, hsw_rapl_init), /* Broadwell */ - X86_RAPL_MODEL_MATCH(71, hsw_rapl_init), /* Broadwell-H */ - X86_RAPL_MODEL_MATCH(79, hsx_rapl_init), /* Broadwell-Server */ - X86_RAPL_MODEL_MATCH(86, hsx_rapl_init), /* Broadwell Xeon D */ + X86_RAPL_MODEL_MATCH(INTEL_FAM6_BROADWELL_CORE, hsw_rapl_init), + X86_RAPL_MODEL_MATCH(INTEL_FAM6_BROADWELL_GT3E, hsw_rapl_init), + X86_RAPL_MODEL_MATCH(INTEL_FAM6_BROADWELL_X, hsw_rapl_init), + X86_RAPL_MODEL_MATCH(INTEL_FAM6_BROADWELL_XEON_D, hsw_rapl_init), - X86_RAPL_MODEL_MATCH(87, knl_rapl_init), /* Knights Landing */ + X86_RAPL_MODEL_MATCH(INTEL_FAM6_XEON_PHI_KNL, knl_rapl_init), - X86_RAPL_MODEL_MATCH(78, skl_rapl_init), /* Skylake */ - X86_RAPL_MODEL_MATCH(94, skl_rapl_init), /* Skylake H/S */ + X86_RAPL_MODEL_MATCH(INTEL_FAM6_SKYLAKE_MOBILE, skl_rapl_init), + X86_RAPL_MODEL_MATCH(INTEL_FAM6_SKYLAKE_DESKTOP, skl_rapl_init), + X86_RAPL_MODEL_MATCH(INTEL_FAM6_SKYLAKE_X, hsx_rapl_init), {}, }; @@ -835,35 +794,44 @@ static int __init rapl_pmu_init(void) if (ret) return ret; - cpu_notifier_register_begin(); + /* + * Install callbacks. Core will call them for each online cpu. + */ - ret = rapl_prepare_cpus(); + ret = cpuhp_setup_state(CPUHP_PERF_X86_RAPL_PREP, "PERF_X86_RAPL_PREP", + rapl_cpu_prepare, NULL); if (ret) goto out; + ret = cpuhp_setup_state(CPUHP_AP_PERF_X86_RAPL_ONLINE, + "AP_PERF_X86_RAPL_ONLINE", + rapl_cpu_online, rapl_cpu_offline); + if (ret) + goto out1; + ret = perf_pmu_register(&rapl_pmus->pmu, "power", -1); if (ret) - goto out; + goto out2; - __register_cpu_notifier(&rapl_cpu_nb); - cpu_notifier_register_done(); rapl_advertise(); return 0; +out2: + cpuhp_remove_state(CPUHP_AP_PERF_X86_RAPL_ONLINE); +out1: + cpuhp_remove_state(CPUHP_PERF_X86_RAPL_PREP); out: pr_warn("Initialization failed (%d), disabled\n", ret); cleanup_rapl_pmus(); - cpu_notifier_register_done(); return ret; } module_init(rapl_pmu_init); static void __exit intel_rapl_exit(void) { - cpu_notifier_register_begin(); - __unregister_cpu_notifier(&rapl_cpu_nb); + cpuhp_remove_state_nocalls(CPUHP_AP_PERF_X86_RAPL_ONLINE); + cpuhp_remove_state_nocalls(CPUHP_PERF_X86_RAPL_PREP); perf_pmu_unregister(&rapl_pmus->pmu); cleanup_rapl_pmus(); - cpu_notifier_register_done(); } module_exit(intel_rapl_exit); diff --git a/arch/x86/events/intel/uncore.c b/arch/x86/events/intel/uncore.c index fce74062d..463dc7a5a 100644 --- a/arch/x86/events/intel/uncore.c +++ b/arch/x86/events/intel/uncore.c @@ -1,4 +1,7 @@ +#include <linux/module.h> + #include <asm/cpu_device_id.h> +#include <asm/intel-family.h> #include "uncore.h" static struct intel_uncore_type *empty_uncore[] = { NULL, }; @@ -882,7 +885,7 @@ uncore_types_init(struct intel_uncore_type **types, bool setid) static int uncore_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) { struct intel_uncore_type *type; - struct intel_uncore_pmu *pmu; + struct intel_uncore_pmu *pmu = NULL; struct intel_uncore_box *box; int phys_id, pkg, ret; @@ -903,20 +906,37 @@ static int uncore_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id } type = uncore_pci_uncores[UNCORE_PCI_DEV_TYPE(id->driver_data)]; + /* - * for performance monitoring unit with multiple boxes, - * each box has a different function id. - */ - pmu = &type->pmus[UNCORE_PCI_DEV_IDX(id->driver_data)]; - /* Knights Landing uses a common PCI device ID for multiple instances of - * an uncore PMU device type. There is only one entry per device type in - * the knl_uncore_pci_ids table inspite of multiple devices present for - * some device types. Hence PCI device idx would be 0 for all devices. - * So increment pmu pointer to point to an unused array element. + * Some platforms, e.g. Knights Landing, use a common PCI device ID + * for multiple instances of an uncore PMU device type. We should check + * PCI slot and func to indicate the uncore box. */ - if (boot_cpu_data.x86_model == 87) { - while (pmu->func_id >= 0) - pmu++; + if (id->driver_data & ~0xffff) { + struct pci_driver *pci_drv = pdev->driver; + const struct pci_device_id *ids = pci_drv->id_table; + unsigned int devfn; + + while (ids && ids->vendor) { + if ((ids->vendor == pdev->vendor) && + (ids->device == pdev->device)) { + devfn = PCI_DEVFN(UNCORE_PCI_DEV_DEV(ids->driver_data), + UNCORE_PCI_DEV_FUNC(ids->driver_data)); + if (devfn == pdev->devfn) { + pmu = &type->pmus[UNCORE_PCI_DEV_IDX(ids->driver_data)]; + break; + } + } + ids++; + } + if (pmu == NULL) + return -ENODEV; + } else { + /* + * for performance monitoring unit with multiple boxes, + * each box has a different function id. + */ + pmu = &type->pmus[UNCORE_PCI_DEV_IDX(id->driver_data)]; } if (WARN_ON_ONCE(pmu->boxes[pkg] != NULL)) @@ -956,7 +976,7 @@ static int uncore_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id static void uncore_pci_remove(struct pci_dev *pdev) { - struct intel_uncore_box *box = pci_get_drvdata(pdev); + struct intel_uncore_box *box; struct intel_uncore_pmu *pmu; int i, phys_id, pkg; @@ -1034,7 +1054,7 @@ static void uncore_pci_exit(void) } } -static void uncore_cpu_dying(int cpu) +static int uncore_cpu_dying(unsigned int cpu) { struct intel_uncore_type *type, **types = uncore_msr_uncores; struct intel_uncore_pmu *pmu; @@ -1051,16 +1071,19 @@ static void uncore_cpu_dying(int cpu) uncore_box_exit(box); } } + return 0; } -static void uncore_cpu_starting(int cpu, bool init) +static int first_init; + +static int uncore_cpu_starting(unsigned int cpu) { struct intel_uncore_type *type, **types = uncore_msr_uncores; struct intel_uncore_pmu *pmu; struct intel_uncore_box *box; int i, pkg, ncpus = 1; - if (init) { + if (first_init) { /* * On init we get the number of online cpus in the package * and set refcount for all of them. @@ -1081,9 +1104,11 @@ static void uncore_cpu_starting(int cpu, bool init) uncore_box_init(box); } } + + return 0; } -static int uncore_cpu_prepare(int cpu) +static int uncore_cpu_prepare(unsigned int cpu) { struct intel_uncore_type *type, **types = uncore_msr_uncores; struct intel_uncore_pmu *pmu; @@ -1146,13 +1171,13 @@ static void uncore_change_context(struct intel_uncore_type **uncores, uncore_change_type_ctx(*uncores, old_cpu, new_cpu); } -static void uncore_event_exit_cpu(int cpu) +static int uncore_event_cpu_offline(unsigned int cpu) { int target; /* Check if exiting cpu is used for collecting uncore events */ if (!cpumask_test_and_clear_cpu(cpu, &uncore_cpu_mask)) - return; + return 0; /* Find a new cpu to collect uncore events */ target = cpumask_any_but(topology_core_cpumask(cpu), cpu); @@ -1165,9 +1190,10 @@ static void uncore_event_exit_cpu(int cpu) uncore_change_context(uncore_msr_uncores, cpu, target); uncore_change_context(uncore_pci_uncores, cpu, target); + return 0; } -static void uncore_event_init_cpu(int cpu) +static int uncore_event_cpu_online(unsigned int cpu) { int target; @@ -1177,50 +1203,15 @@ static void uncore_event_init_cpu(int cpu) */ target = cpumask_any_and(&uncore_cpu_mask, topology_core_cpumask(cpu)); if (target < nr_cpu_ids) - return; + return 0; cpumask_set_cpu(cpu, &uncore_cpu_mask); uncore_change_context(uncore_msr_uncores, -1, cpu); uncore_change_context(uncore_pci_uncores, -1, cpu); + return 0; } -static int uncore_cpu_notifier(struct notifier_block *self, - unsigned long action, void *hcpu) -{ - unsigned int cpu = (long)hcpu; - - switch (action & ~CPU_TASKS_FROZEN) { - case CPU_UP_PREPARE: - return notifier_from_errno(uncore_cpu_prepare(cpu)); - - case CPU_STARTING: - uncore_cpu_starting(cpu, false); - case CPU_DOWN_FAILED: - uncore_event_init_cpu(cpu); - break; - - case CPU_UP_CANCELED: - case CPU_DYING: - uncore_cpu_dying(cpu); - break; - - case CPU_DOWN_PREPARE: - uncore_event_exit_cpu(cpu); - break; - } - return NOTIFY_OK; -} - -static struct notifier_block uncore_cpu_nb = { - .notifier_call = uncore_cpu_notifier, - /* - * to migrate uncore events, our notifier should be executed - * before perf core's notifier. - */ - .priority = CPU_PRI_PERF + 1, -}; - static int __init type_pmu_register(struct intel_uncore_type *type) { int i, ret; @@ -1264,41 +1255,6 @@ err: return ret; } -static void __init uncore_cpu_setup(void *dummy) -{ - uncore_cpu_starting(smp_processor_id(), true); -} - -/* Lazy to avoid allocation of a few bytes for the normal case */ -static __initdata DECLARE_BITMAP(packages, MAX_LOCAL_APIC); - -static int __init uncore_cpumask_init(bool msr) -{ - unsigned int cpu; - - for_each_online_cpu(cpu) { - unsigned int pkg = topology_logical_package_id(cpu); - int ret; - - if (test_and_set_bit(pkg, packages)) - continue; - /* - * The first online cpu of each package allocates and takes - * the refcounts for all other online cpus in that package. - * If msrs are not enabled no allocation is required. - */ - if (msr) { - ret = uncore_cpu_prepare(cpu); - if (ret) - return ret; - } - uncore_event_init_cpu(cpu); - smp_call_function_single(cpu, uncore_cpu_setup, NULL, 1); - } - __register_cpu_notifier(&uncore_cpu_nb); - return 0; -} - #define X86_UNCORE_MODEL_MATCH(model, init) \ { X86_VENDOR_INTEL, 6, model, X86_FEATURE_ANY, (unsigned long)&init } @@ -1361,30 +1317,32 @@ static const struct intel_uncore_init_fun knl_uncore_init __initconst = { }; static const struct intel_uncore_init_fun skl_uncore_init __initconst = { + .cpu_init = skl_uncore_cpu_init, .pci_init = skl_uncore_pci_init, }; static const struct x86_cpu_id intel_uncore_match[] __initconst = { - X86_UNCORE_MODEL_MATCH(26, nhm_uncore_init), /* Nehalem */ - X86_UNCORE_MODEL_MATCH(30, nhm_uncore_init), - X86_UNCORE_MODEL_MATCH(37, nhm_uncore_init), /* Westmere */ - X86_UNCORE_MODEL_MATCH(44, nhm_uncore_init), - X86_UNCORE_MODEL_MATCH(42, snb_uncore_init), /* Sandy Bridge */ - X86_UNCORE_MODEL_MATCH(58, ivb_uncore_init), /* Ivy Bridge */ - X86_UNCORE_MODEL_MATCH(60, hsw_uncore_init), /* Haswell */ - X86_UNCORE_MODEL_MATCH(69, hsw_uncore_init), /* Haswell Celeron */ - X86_UNCORE_MODEL_MATCH(70, hsw_uncore_init), /* Haswell */ - X86_UNCORE_MODEL_MATCH(61, bdw_uncore_init), /* Broadwell */ - X86_UNCORE_MODEL_MATCH(71, bdw_uncore_init), /* Broadwell */ - X86_UNCORE_MODEL_MATCH(45, snbep_uncore_init), /* Sandy Bridge-EP */ - X86_UNCORE_MODEL_MATCH(46, nhmex_uncore_init), /* Nehalem-EX */ - X86_UNCORE_MODEL_MATCH(47, nhmex_uncore_init), /* Westmere-EX aka. Xeon E7 */ - X86_UNCORE_MODEL_MATCH(62, ivbep_uncore_init), /* Ivy Bridge-EP */ - X86_UNCORE_MODEL_MATCH(63, hswep_uncore_init), /* Haswell-EP */ - X86_UNCORE_MODEL_MATCH(79, bdx_uncore_init), /* BDX-EP */ - X86_UNCORE_MODEL_MATCH(86, bdx_uncore_init), /* BDX-DE */ - X86_UNCORE_MODEL_MATCH(87, knl_uncore_init), /* Knights Landing */ - X86_UNCORE_MODEL_MATCH(94, skl_uncore_init), /* SkyLake */ + X86_UNCORE_MODEL_MATCH(INTEL_FAM6_NEHALEM_EP, nhm_uncore_init), + X86_UNCORE_MODEL_MATCH(INTEL_FAM6_NEHALEM, nhm_uncore_init), + X86_UNCORE_MODEL_MATCH(INTEL_FAM6_WESTMERE, nhm_uncore_init), + X86_UNCORE_MODEL_MATCH(INTEL_FAM6_WESTMERE_EP, nhm_uncore_init), + X86_UNCORE_MODEL_MATCH(INTEL_FAM6_SANDYBRIDGE, snb_uncore_init), + X86_UNCORE_MODEL_MATCH(INTEL_FAM6_IVYBRIDGE, ivb_uncore_init), + X86_UNCORE_MODEL_MATCH(INTEL_FAM6_HASWELL_CORE, hsw_uncore_init), + X86_UNCORE_MODEL_MATCH(INTEL_FAM6_HASWELL_ULT, hsw_uncore_init), + X86_UNCORE_MODEL_MATCH(INTEL_FAM6_HASWELL_GT3E, hsw_uncore_init), + X86_UNCORE_MODEL_MATCH(INTEL_FAM6_BROADWELL_CORE, bdw_uncore_init), + X86_UNCORE_MODEL_MATCH(INTEL_FAM6_BROADWELL_GT3E, bdw_uncore_init), + X86_UNCORE_MODEL_MATCH(INTEL_FAM6_SANDYBRIDGE_X, snbep_uncore_init), + X86_UNCORE_MODEL_MATCH(INTEL_FAM6_NEHALEM_EX, nhmex_uncore_init), + X86_UNCORE_MODEL_MATCH(INTEL_FAM6_WESTMERE_EX, nhmex_uncore_init), + X86_UNCORE_MODEL_MATCH(INTEL_FAM6_IVYBRIDGE_X, ivbep_uncore_init), + X86_UNCORE_MODEL_MATCH(INTEL_FAM6_HASWELL_X, hswep_uncore_init), + X86_UNCORE_MODEL_MATCH(INTEL_FAM6_BROADWELL_X, bdx_uncore_init), + X86_UNCORE_MODEL_MATCH(INTEL_FAM6_BROADWELL_XEON_D, bdx_uncore_init), + X86_UNCORE_MODEL_MATCH(INTEL_FAM6_XEON_PHI_KNL, knl_uncore_init), + X86_UNCORE_MODEL_MATCH(INTEL_FAM6_SKYLAKE_DESKTOP,skl_uncore_init), + X86_UNCORE_MODEL_MATCH(INTEL_FAM6_SKYLAKE_MOBILE, skl_uncore_init), {}, }; @@ -1420,11 +1378,33 @@ static int __init intel_uncore_init(void) if (cret && pret) return -ENODEV; - cpu_notifier_register_begin(); - ret = uncore_cpumask_init(!cret); - if (ret) - goto err; - cpu_notifier_register_done(); + /* + * Install callbacks. Core will call them for each online cpu. + * + * The first online cpu of each package allocates and takes + * the refcounts for all other online cpus in that package. + * If msrs are not enabled no allocation is required and + * uncore_cpu_prepare() is not called for each online cpu. + */ + if (!cret) { + ret = cpuhp_setup_state(CPUHP_PERF_X86_UNCORE_PREP, + "PERF_X86_UNCORE_PREP", + uncore_cpu_prepare, NULL); + if (ret) + goto err; + } else { + cpuhp_setup_state_nocalls(CPUHP_PERF_X86_UNCORE_PREP, + "PERF_X86_UNCORE_PREP", + uncore_cpu_prepare, NULL); + } + first_init = 1; + cpuhp_setup_state(CPUHP_AP_PERF_X86_UNCORE_STARTING, + "AP_PERF_X86_UNCORE_STARTING", + uncore_cpu_starting, uncore_cpu_dying); + first_init = 0; + cpuhp_setup_state(CPUHP_AP_PERF_X86_UNCORE_ONLINE, + "AP_PERF_X86_UNCORE_ONLINE", + uncore_event_cpu_online, uncore_event_cpu_offline); return 0; err: @@ -1432,17 +1412,16 @@ err: on_each_cpu_mask(&uncore_cpu_mask, uncore_exit_boxes, NULL, 1); uncore_types_exit(uncore_msr_uncores); uncore_pci_exit(); - cpu_notifier_register_done(); return ret; } module_init(intel_uncore_init); static void __exit intel_uncore_exit(void) { - cpu_notifier_register_begin(); - __unregister_cpu_notifier(&uncore_cpu_nb); + cpuhp_remove_state_nocalls(CPUHP_AP_PERF_X86_UNCORE_ONLINE); + cpuhp_remove_state_nocalls(CPUHP_AP_PERF_X86_UNCORE_STARTING); + cpuhp_remove_state_nocalls(CPUHP_PERF_X86_UNCORE_PREP); uncore_types_exit(uncore_msr_uncores); uncore_pci_exit(); - cpu_notifier_register_done(); } module_exit(intel_uncore_exit); diff --git a/arch/x86/events/intel/uncore.h b/arch/x86/events/intel/uncore.h index 79766b9a3..78b9c23e2 100644 --- a/arch/x86/events/intel/uncore.h +++ b/arch/x86/events/intel/uncore.h @@ -1,4 +1,3 @@ -#include <linux/module.h> #include <linux/slab.h> #include <linux/pci.h> #include <asm/apicdef.h> @@ -15,7 +14,11 @@ #define UNCORE_PMC_IDX_FIXED UNCORE_PMC_IDX_MAX_GENERIC #define UNCORE_PMC_IDX_MAX (UNCORE_PMC_IDX_FIXED + 1) +#define UNCORE_PCI_DEV_FULL_DATA(dev, func, type, idx) \ + ((dev << 24) | (func << 16) | (type << 8) | idx) #define UNCORE_PCI_DEV_DATA(type, idx) ((type << 8) | idx) +#define UNCORE_PCI_DEV_DEV(data) ((data >> 24) & 0xff) +#define UNCORE_PCI_DEV_FUNC(data) ((data >> 16) & 0xff) #define UNCORE_PCI_DEV_TYPE(data) ((data >> 8) & 0xff) #define UNCORE_PCI_DEV_IDX(data) (data & 0xff) #define UNCORE_EXTRA_PCI_DEV 0xff @@ -360,6 +363,7 @@ int bdw_uncore_pci_init(void); int skl_uncore_pci_init(void); void snb_uncore_cpu_init(void); void nhm_uncore_cpu_init(void); +void skl_uncore_cpu_init(void); int snb_pci2phy_map_init(int devid); /* perf_event_intel_uncore_snbep.c */ diff --git a/arch/x86/events/intel/uncore_snb.c b/arch/x86/events/intel/uncore_snb.c index 96531d2b8..9d35ec0cb 100644 --- a/arch/x86/events/intel/uncore_snb.c +++ b/arch/x86/events/intel/uncore_snb.c @@ -1,4 +1,4 @@ -/* Nehalem/SandBridge/Haswell uncore support */ +/* Nehalem/SandBridge/Haswell/Broadwell/Skylake uncore support */ #include "uncore.h" /* Uncore IMC PCI IDs */ @@ -9,6 +9,7 @@ #define PCI_DEVICE_ID_INTEL_HSW_U_IMC 0x0a04 #define PCI_DEVICE_ID_INTEL_BDW_IMC 0x1604 #define PCI_DEVICE_ID_INTEL_SKL_IMC 0x191f +#define PCI_DEVICE_ID_INTEL_SKL_U_IMC 0x190c /* SNB event control */ #define SNB_UNC_CTL_EV_SEL_MASK 0x000000ff @@ -64,6 +65,10 @@ #define NHM_UNC_PERFEVTSEL0 0x3c0 #define NHM_UNC_UNCORE_PMC0 0x3b0 +/* SKL uncore global control */ +#define SKL_UNC_PERF_GLOBAL_CTL 0xe01 +#define SKL_UNC_GLOBAL_CTL_CORE_ALL ((1 << 5) - 1) + DEFINE_UNCORE_FORMAT_ATTR(event, event, "config:0-7"); DEFINE_UNCORE_FORMAT_ATTR(umask, umask, "config:8-15"); DEFINE_UNCORE_FORMAT_ATTR(edge, edge, "config:18"); @@ -95,6 +100,12 @@ static void snb_uncore_msr_init_box(struct intel_uncore_box *box) } } +static void snb_uncore_msr_enable_box(struct intel_uncore_box *box) +{ + wrmsrl(SNB_UNC_PERF_GLOBAL_CTL, + SNB_UNC_GLOBAL_CTL_EN | SNB_UNC_GLOBAL_CTL_CORE_ALL); +} + static void snb_uncore_msr_exit_box(struct intel_uncore_box *box) { if (box->pmu->pmu_idx == 0) @@ -122,6 +133,7 @@ static struct attribute_group snb_uncore_format_group = { static struct intel_uncore_ops snb_uncore_msr_ops = { .init_box = snb_uncore_msr_init_box, + .enable_box = snb_uncore_msr_enable_box, .exit_box = snb_uncore_msr_exit_box, .disable_event = snb_uncore_msr_disable_event, .enable_event = snb_uncore_msr_enable_event, @@ -179,6 +191,67 @@ void snb_uncore_cpu_init(void) snb_uncore_cbox.num_boxes = boot_cpu_data.x86_max_cores; } +static void skl_uncore_msr_init_box(struct intel_uncore_box *box) +{ + if (box->pmu->pmu_idx == 0) { + wrmsrl(SKL_UNC_PERF_GLOBAL_CTL, + SNB_UNC_GLOBAL_CTL_EN | SKL_UNC_GLOBAL_CTL_CORE_ALL); + } +} + +static void skl_uncore_msr_enable_box(struct intel_uncore_box *box) +{ + wrmsrl(SKL_UNC_PERF_GLOBAL_CTL, + SNB_UNC_GLOBAL_CTL_EN | SKL_UNC_GLOBAL_CTL_CORE_ALL); +} + +static void skl_uncore_msr_exit_box(struct intel_uncore_box *box) +{ + if (box->pmu->pmu_idx == 0) + wrmsrl(SKL_UNC_PERF_GLOBAL_CTL, 0); +} + +static struct intel_uncore_ops skl_uncore_msr_ops = { + .init_box = skl_uncore_msr_init_box, + .enable_box = skl_uncore_msr_enable_box, + .exit_box = skl_uncore_msr_exit_box, + .disable_event = snb_uncore_msr_disable_event, + .enable_event = snb_uncore_msr_enable_event, + .read_counter = uncore_msr_read_counter, +}; + +static struct intel_uncore_type skl_uncore_cbox = { + .name = "cbox", + .num_counters = 4, + .num_boxes = 5, + .perf_ctr_bits = 44, + .fixed_ctr_bits = 48, + .perf_ctr = SNB_UNC_CBO_0_PER_CTR0, + .event_ctl = SNB_UNC_CBO_0_PERFEVTSEL0, + .fixed_ctr = SNB_UNC_FIXED_CTR, + .fixed_ctl = SNB_UNC_FIXED_CTR_CTRL, + .single_fixed = 1, + .event_mask = SNB_UNC_RAW_EVENT_MASK, + .msr_offset = SNB_UNC_CBO_MSR_OFFSET, + .ops = &skl_uncore_msr_ops, + .format_group = &snb_uncore_format_group, + .event_descs = snb_uncore_events, +}; + +static struct intel_uncore_type *skl_msr_uncores[] = { + &skl_uncore_cbox, + &snb_uncore_arb, + NULL, +}; + +void skl_uncore_cpu_init(void) +{ + uncore_msr_uncores = skl_msr_uncores; + if (skl_uncore_cbox.num_boxes > boot_cpu_data.x86_max_cores) + skl_uncore_cbox.num_boxes = boot_cpu_data.x86_max_cores; + snb_uncore_arb.ops = &skl_uncore_msr_ops; +} + enum { SNB_PCI_UNCORE_IMC, }; @@ -544,6 +617,11 @@ static const struct pci_device_id skl_uncore_pci_ids[] = { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_SKL_IMC), .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0), }, + { /* IMC */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_SKL_U_IMC), + .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0), + }, + { /* end: all zeroes */ }, }; @@ -587,6 +665,7 @@ static const struct imc_uncore_pci_dev desktop_imc_pci_ids[] = { IMC_DEV(HSW_U_IMC, &hsw_uncore_pci_driver), /* 4th Gen Core ULT Mobile Processor */ IMC_DEV(BDW_IMC, &bdw_uncore_pci_driver), /* 5th Gen Core U */ IMC_DEV(SKL_IMC, &skl_uncore_pci_driver), /* 6th Gen Core */ + IMC_DEV(SKL_U_IMC, &skl_uncore_pci_driver), /* 6th Gen Core U */ { /* end marker */ } }; diff --git a/arch/x86/events/intel/uncore_snbep.c b/arch/x86/events/intel/uncore_snbep.c index bd136ac14..8aee83bcf 100644 --- a/arch/x86/events/intel/uncore_snbep.c +++ b/arch/x86/events/intel/uncore_snbep.c @@ -2164,21 +2164,101 @@ static struct intel_uncore_type *knl_pci_uncores[] = { */ static const struct pci_device_id knl_uncore_pci_ids[] = { - { /* MC UClk */ + { /* MC0 UClk */ PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x7841), - .driver_data = UNCORE_PCI_DEV_DATA(KNL_PCI_UNCORE_MC_UCLK, 0), + .driver_data = UNCORE_PCI_DEV_FULL_DATA(10, 0, KNL_PCI_UNCORE_MC_UCLK, 0), }, - { /* MC DClk Channel */ + { /* MC1 UClk */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x7841), + .driver_data = UNCORE_PCI_DEV_FULL_DATA(11, 0, KNL_PCI_UNCORE_MC_UCLK, 1), + }, + { /* MC0 DClk CH 0 */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x7843), + .driver_data = UNCORE_PCI_DEV_FULL_DATA(8, 2, KNL_PCI_UNCORE_MC_DCLK, 0), + }, + { /* MC0 DClk CH 1 */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x7843), + .driver_data = UNCORE_PCI_DEV_FULL_DATA(8, 3, KNL_PCI_UNCORE_MC_DCLK, 1), + }, + { /* MC0 DClk CH 2 */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x7843), + .driver_data = UNCORE_PCI_DEV_FULL_DATA(8, 4, KNL_PCI_UNCORE_MC_DCLK, 2), + }, + { /* MC1 DClk CH 0 */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x7843), + .driver_data = UNCORE_PCI_DEV_FULL_DATA(9, 2, KNL_PCI_UNCORE_MC_DCLK, 3), + }, + { /* MC1 DClk CH 1 */ PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x7843), - .driver_data = UNCORE_PCI_DEV_DATA(KNL_PCI_UNCORE_MC_DCLK, 0), + .driver_data = UNCORE_PCI_DEV_FULL_DATA(9, 3, KNL_PCI_UNCORE_MC_DCLK, 4), + }, + { /* MC1 DClk CH 2 */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x7843), + .driver_data = UNCORE_PCI_DEV_FULL_DATA(9, 4, KNL_PCI_UNCORE_MC_DCLK, 5), + }, + { /* EDC0 UClk */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x7833), + .driver_data = UNCORE_PCI_DEV_FULL_DATA(15, 0, KNL_PCI_UNCORE_EDC_UCLK, 0), + }, + { /* EDC1 UClk */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x7833), + .driver_data = UNCORE_PCI_DEV_FULL_DATA(16, 0, KNL_PCI_UNCORE_EDC_UCLK, 1), + }, + { /* EDC2 UClk */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x7833), + .driver_data = UNCORE_PCI_DEV_FULL_DATA(17, 0, KNL_PCI_UNCORE_EDC_UCLK, 2), + }, + { /* EDC3 UClk */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x7833), + .driver_data = UNCORE_PCI_DEV_FULL_DATA(18, 0, KNL_PCI_UNCORE_EDC_UCLK, 3), }, - { /* EDC UClk */ + { /* EDC4 UClk */ PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x7833), - .driver_data = UNCORE_PCI_DEV_DATA(KNL_PCI_UNCORE_EDC_UCLK, 0), + .driver_data = UNCORE_PCI_DEV_FULL_DATA(19, 0, KNL_PCI_UNCORE_EDC_UCLK, 4), + }, + { /* EDC5 UClk */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x7833), + .driver_data = UNCORE_PCI_DEV_FULL_DATA(20, 0, KNL_PCI_UNCORE_EDC_UCLK, 5), + }, + { /* EDC6 UClk */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x7833), + .driver_data = UNCORE_PCI_DEV_FULL_DATA(21, 0, KNL_PCI_UNCORE_EDC_UCLK, 6), + }, + { /* EDC7 UClk */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x7833), + .driver_data = UNCORE_PCI_DEV_FULL_DATA(22, 0, KNL_PCI_UNCORE_EDC_UCLK, 7), + }, + { /* EDC0 EClk */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x7835), + .driver_data = UNCORE_PCI_DEV_FULL_DATA(24, 2, KNL_PCI_UNCORE_EDC_ECLK, 0), + }, + { /* EDC1 EClk */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x7835), + .driver_data = UNCORE_PCI_DEV_FULL_DATA(25, 2, KNL_PCI_UNCORE_EDC_ECLK, 1), + }, + { /* EDC2 EClk */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x7835), + .driver_data = UNCORE_PCI_DEV_FULL_DATA(26, 2, KNL_PCI_UNCORE_EDC_ECLK, 2), + }, + { /* EDC3 EClk */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x7835), + .driver_data = UNCORE_PCI_DEV_FULL_DATA(27, 2, KNL_PCI_UNCORE_EDC_ECLK, 3), + }, + { /* EDC4 EClk */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x7835), + .driver_data = UNCORE_PCI_DEV_FULL_DATA(28, 2, KNL_PCI_UNCORE_EDC_ECLK, 4), + }, + { /* EDC5 EClk */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x7835), + .driver_data = UNCORE_PCI_DEV_FULL_DATA(29, 2, KNL_PCI_UNCORE_EDC_ECLK, 5), + }, + { /* EDC6 EClk */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x7835), + .driver_data = UNCORE_PCI_DEV_FULL_DATA(30, 2, KNL_PCI_UNCORE_EDC_ECLK, 6), }, - { /* EDC EClk */ + { /* EDC7 EClk */ PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x7835), - .driver_data = UNCORE_PCI_DEV_DATA(KNL_PCI_UNCORE_EDC_ECLK, 0), + .driver_data = UNCORE_PCI_DEV_FULL_DATA(31, 2, KNL_PCI_UNCORE_EDC_ECLK, 7), }, { /* M2PCIe */ PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x7817), diff --git a/arch/x86/events/msr.c b/arch/x86/events/msr.c index 85ef3c2e8..4bb3ec69e 100644 --- a/arch/x86/events/msr.c +++ b/arch/x86/events/msr.c @@ -1,4 +1,5 @@ #include <linux/perf_event.h> +#include <asm/intel-family.h> enum perf_msr_id { PERF_MSR_TSC = 0, @@ -34,39 +35,43 @@ static bool test_intel(int idx) return false; switch (boot_cpu_data.x86_model) { - case 30: /* 45nm Nehalem */ - case 26: /* 45nm Nehalem-EP */ - case 46: /* 45nm Nehalem-EX */ - - case 37: /* 32nm Westmere */ - case 44: /* 32nm Westmere-EP */ - case 47: /* 32nm Westmere-EX */ - - case 42: /* 32nm SandyBridge */ - case 45: /* 32nm SandyBridge-E/EN/EP */ - - case 58: /* 22nm IvyBridge */ - case 62: /* 22nm IvyBridge-EP/EX */ - - case 60: /* 22nm Haswell Core */ - case 63: /* 22nm Haswell Server */ - case 69: /* 22nm Haswell ULT */ - case 70: /* 22nm Haswell + GT3e (Intel Iris Pro graphics) */ - - case 61: /* 14nm Broadwell Core-M */ - case 86: /* 14nm Broadwell Xeon D */ - case 71: /* 14nm Broadwell + GT3e (Intel Iris Pro graphics) */ - case 79: /* 14nm Broadwell Server */ - - case 55: /* 22nm Atom "Silvermont" */ - case 77: /* 22nm Atom "Silvermont Avoton/Rangely" */ - case 76: /* 14nm Atom "Airmont" */ + case INTEL_FAM6_NEHALEM: + case INTEL_FAM6_NEHALEM_G: + case INTEL_FAM6_NEHALEM_EP: + case INTEL_FAM6_NEHALEM_EX: + + case INTEL_FAM6_WESTMERE: + case INTEL_FAM6_WESTMERE_EP: + case INTEL_FAM6_WESTMERE_EX: + + case INTEL_FAM6_SANDYBRIDGE: + case INTEL_FAM6_SANDYBRIDGE_X: + + case INTEL_FAM6_IVYBRIDGE: + case INTEL_FAM6_IVYBRIDGE_X: + + case INTEL_FAM6_HASWELL_CORE: + case INTEL_FAM6_HASWELL_X: + case INTEL_FAM6_HASWELL_ULT: + case INTEL_FAM6_HASWELL_GT3E: + + case INTEL_FAM6_BROADWELL_CORE: + case INTEL_FAM6_BROADWELL_XEON_D: + case INTEL_FAM6_BROADWELL_GT3E: + case INTEL_FAM6_BROADWELL_X: + + case INTEL_FAM6_ATOM_SILVERMONT1: + case INTEL_FAM6_ATOM_SILVERMONT2: + case INTEL_FAM6_ATOM_AIRMONT: if (idx == PERF_MSR_SMI) return true; break; - case 78: /* 14nm Skylake Mobile */ - case 94: /* 14nm Skylake Desktop */ + case INTEL_FAM6_SKYLAKE_MOBILE: + case INTEL_FAM6_SKYLAKE_DESKTOP: + case INTEL_FAM6_SKYLAKE_X: + case INTEL_FAM6_KABYLAKE_MOBILE: + case INTEL_FAM6_KABYLAKE_DESKTOP: if (idx == PERF_MSR_SMI || idx == PERF_MSR_PPERF) return true; break; diff --git a/arch/x86/events/perf_event.h b/arch/x86/events/perf_event.h index 8bd764df8..8c4a47706 100644 --- a/arch/x86/events/perf_event.h +++ b/arch/x86/events/perf_event.h @@ -668,6 +668,14 @@ static struct perf_pmu_events_attr event_attr_##v = { \ .event_str = str, \ }; +#define EVENT_ATTR_STR_HT(_name, v, noht, ht) \ +static struct perf_pmu_events_ht_attr event_attr_##v = { \ + .attr = __ATTR(_name, 0444, events_ht_sysfs_show, NULL),\ + .id = 0, \ + .event_str_noht = noht, \ + .event_str_ht = ht, \ +} + extern struct x86_pmu x86_pmu __read_mostly; static inline bool x86_pmu_has_lbr_callstack(void) @@ -803,6 +811,8 @@ struct attribute **merge_attr(struct attribute **a, struct attribute **b); ssize_t events_sysfs_show(struct device *dev, struct device_attribute *attr, char *page); +ssize_t events_ht_sysfs_show(struct device *dev, struct device_attribute *attr, + char *page); #ifdef CONFIG_CPU_SUP_AMD @@ -892,6 +902,8 @@ void intel_ds_init(void); void intel_pmu_lbr_sched_task(struct perf_event_context *ctx, bool sched_in); +u64 lbr_from_signext_quirk_wr(u64 val); + void intel_pmu_lbr_reset(void); void intel_pmu_lbr_enable(struct perf_event *event); |