summaryrefslogtreecommitdiff
path: root/arch/x86/kernel/cpu
diff options
context:
space:
mode:
authorAndré Fabian Silva Delgado <emulatorman@parabola.nu>2016-09-11 04:34:46 -0300
committerAndré Fabian Silva Delgado <emulatorman@parabola.nu>2016-09-11 04:34:46 -0300
commit863981e96738983919de841ec669e157e6bdaeb0 (patch)
treed6d89a12e7eb8017837c057935a2271290907f76 /arch/x86/kernel/cpu
parent8dec7c70575785729a6a9e6719a955e9c545bcab (diff)
Linux-libre 4.7.1-gnupck-4.7.1-gnu
Diffstat (limited to 'arch/x86/kernel/cpu')
-rw-r--r--arch/x86/kernel/cpu/amd.c24
-rw-r--r--arch/x86/kernel/cpu/common.c95
-rw-r--r--arch/x86/kernel/cpu/cyrix.c2
-rw-r--r--arch/x86/kernel/cpu/intel.c51
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce-genpool.c46
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce-internal.h15
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce-severity.c30
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce.c160
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce_amd.c94
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce_intel.c2
-rw-r--r--arch/x86/kernel/cpu/mcheck/therm_throt.c2
-rw-r--r--arch/x86/kernel/cpu/microcode/core.c3
-rw-r--r--arch/x86/kernel/cpu/microcode/intel.c2
-rw-r--r--arch/x86/kernel/cpu/mtrr/cyrix.c4
-rw-r--r--arch/x86/kernel/cpu/mtrr/generic.c28
-rw-r--r--arch/x86/kernel/cpu/mtrr/main.c13
-rw-r--r--arch/x86/kernel/cpu/mtrr/mtrr.h1
-rw-r--r--arch/x86/kernel/cpu/vmware.c2
18 files changed, 430 insertions, 144 deletions
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index 7b76eb67a..f5c69d897 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -565,14 +565,17 @@ static void early_init_amd(struct cpuinfo_x86 *c)
* can safely set X86_FEATURE_EXTD_APICID unconditionally for families
* after 16h.
*/
- if (cpu_has_apic && c->x86 > 0x16) {
- set_cpu_cap(c, X86_FEATURE_EXTD_APICID);
- } else if (cpu_has_apic && c->x86 >= 0xf) {
- /* check CPU config space for extended APIC ID */
- unsigned int val;
- val = read_pci_config(0, 24, 0, 0x68);
- if ((val & ((1 << 17) | (1 << 18))) == ((1 << 17) | (1 << 18)))
+ if (boot_cpu_has(X86_FEATURE_APIC)) {
+ if (c->x86 > 0x16)
set_cpu_cap(c, X86_FEATURE_EXTD_APICID);
+ else if (c->x86 >= 0xf) {
+ /* check CPU config space for extended APIC ID */
+ unsigned int val;
+
+ val = read_pci_config(0, 24, 0, 0x68);
+ if ((val >> 17 & 0x3) == 0x3)
+ set_cpu_cap(c, X86_FEATURE_EXTD_APICID);
+ }
}
#endif
@@ -628,6 +631,7 @@ static void init_amd_k8(struct cpuinfo_x86 *c)
*/
msr_set_bit(MSR_K7_HWCR, 6);
#endif
+ set_cpu_bug(c, X86_BUG_SWAPGS_FENCE);
}
static void init_amd_gh(struct cpuinfo_x86 *c)
@@ -670,14 +674,14 @@ static void init_amd_bd(struct cpuinfo_x86 *c)
u64 value;
/* re-enable TopologyExtensions if switched off by BIOS */
- if ((c->x86_model >= 0x10) && (c->x86_model <= 0x1f) &&
+ if ((c->x86_model >= 0x10) && (c->x86_model <= 0x6f) &&
!cpu_has(c, X86_FEATURE_TOPOEXT)) {
if (msr_set_bit(0xc0011005, 54) > 0) {
rdmsrl(0xc0011005, value);
if (value & BIT_64(54)) {
set_cpu_cap(c, X86_FEATURE_TOPOEXT);
- pr_info(FW_INFO "CPU: Re-enabling disabled Topology Extensions Support.\n");
+ pr_info_once(FW_INFO "CPU: Re-enabling disabled Topology Extensions Support.\n");
}
}
}
@@ -746,7 +750,7 @@ static void init_amd(struct cpuinfo_x86 *c)
if (c->x86 >= 0xf)
set_cpu_cap(c, X86_FEATURE_K8);
- if (cpu_has_xmm2) {
+ if (cpu_has(c, X86_FEATURE_XMM2)) {
/* MFENCE stops RDTSC speculation */
set_cpu_cap(c, X86_FEATURE_MFENCE_RDTSC);
}
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index f45a4b9d2..0fe6953f4 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -37,6 +37,7 @@
#include <asm/mtrr.h>
#include <linux/numa.h>
#include <asm/asm.h>
+#include <asm/bugs.h>
#include <asm/cpu.h>
#include <asm/mce.h>
#include <asm/msr.h>
@@ -270,6 +271,8 @@ static inline void squash_the_stupid_serial_number(struct cpuinfo_x86 *c)
static __init int setup_disable_smep(char *arg)
{
setup_clear_cpu_cap(X86_FEATURE_SMEP);
+ /* Check for things that depend on SMEP being enabled: */
+ check_mpx_erratum(&boot_cpu_data);
return 1;
}
__setup("nosmep", setup_disable_smep);
@@ -434,7 +437,7 @@ void load_percpu_segment(int cpu)
#ifdef CONFIG_X86_32
loadsegment(fs, __KERNEL_PERCPU);
#else
- loadsegment(gs, 0);
+ __loadsegment_simple(gs, 0);
wrmsrl(MSR_GS_BASE, (unsigned long)per_cpu(irq_stack_union.gs_base, cpu));
#endif
load_stack_canary_segment();
@@ -721,6 +724,13 @@ void get_cpu_cap(struct cpuinfo_x86 *c)
}
}
+ if (c->extended_cpuid_level >= 0x80000007) {
+ cpuid(0x80000007, &eax, &ebx, &ecx, &edx);
+
+ c->x86_capability[CPUID_8000_0007_EBX] = ebx;
+ c->x86_power = edx;
+ }
+
if (c->extended_cpuid_level >= 0x80000008) {
cpuid(0x80000008, &eax, &ebx, &ecx, &edx);
@@ -733,9 +743,6 @@ void get_cpu_cap(struct cpuinfo_x86 *c)
c->x86_phys_bits = 36;
#endif
- if (c->extended_cpuid_level >= 0x80000007)
- c->x86_power = cpuid_edx(0x80000007);
-
if (c->extended_cpuid_level >= 0x8000000a)
c->x86_capability[CPUID_8000_000A_EDX] = cpuid_edx(0x8000000a);
@@ -866,30 +873,34 @@ static void detect_nopl(struct cpuinfo_x86 *c)
#else
set_cpu_cap(c, X86_FEATURE_NOPL);
#endif
+}
+static void detect_null_seg_behavior(struct cpuinfo_x86 *c)
+{
+#ifdef CONFIG_X86_64
/*
- * ESPFIX is a strange bug. All real CPUs have it. Paravirt
- * systems that run Linux at CPL > 0 may or may not have the
- * issue, but, even if they have the issue, there's absolutely
- * nothing we can do about it because we can't use the real IRET
- * instruction.
+ * Empirically, writing zero to a segment selector on AMD does
+ * not clear the base, whereas writing zero to a segment
+ * selector on Intel does clear the base. Intel's behavior
+ * allows slightly faster context switches in the common case
+ * where GS is unused by the prev and next threads.
*
- * NB: For the time being, only 32-bit kernels support
- * X86_BUG_ESPFIX as such. 64-bit kernels directly choose
- * whether to apply espfix using paravirt hooks. If any
- * non-paravirt system ever shows up that does *not* have the
- * ESPFIX issue, we can change this.
+ * Since neither vendor documents this anywhere that I can see,
+ * detect it directly instead of hardcoding the choice by
+ * vendor.
+ *
+ * I've designated AMD's behavior as the "bug" because it's
+ * counterintuitive and less friendly.
*/
-#ifdef CONFIG_X86_32
-#ifdef CONFIG_PARAVIRT
- do {
- extern void native_iret(void);
- if (pv_cpu_ops.iret == native_iret)
- set_cpu_bug(c, X86_BUG_ESPFIX);
- } while (0);
-#else
- set_cpu_bug(c, X86_BUG_ESPFIX);
-#endif
+
+ unsigned long old_base, tmp;
+ rdmsrl(MSR_FS_BASE, old_base);
+ wrmsrl(MSR_FS_BASE, 1);
+ loadsegment(fs, 0);
+ rdmsrl(MSR_FS_BASE, tmp);
+ if (tmp != 0)
+ set_cpu_bug(c, X86_BUG_NULL_SEG);
+ wrmsrl(MSR_FS_BASE, old_base);
#endif
}
@@ -925,6 +936,33 @@ static void generic_identify(struct cpuinfo_x86 *c)
get_model_name(c); /* Default name */
detect_nopl(c);
+
+ detect_null_seg_behavior(c);
+
+ /*
+ * ESPFIX is a strange bug. All real CPUs have it. Paravirt
+ * systems that run Linux at CPL > 0 may or may not have the
+ * issue, but, even if they have the issue, there's absolutely
+ * nothing we can do about it because we can't use the real IRET
+ * instruction.
+ *
+ * NB: For the time being, only 32-bit kernels support
+ * X86_BUG_ESPFIX as such. 64-bit kernels directly choose
+ * whether to apply espfix using paravirt hooks. If any
+ * non-paravirt system ever shows up that does *not* have the
+ * ESPFIX issue, we can change this.
+ */
+#ifdef CONFIG_X86_32
+# ifdef CONFIG_PARAVIRT
+ do {
+ extern void native_iret(void);
+ if (pv_cpu_ops.iret == native_iret)
+ set_cpu_bug(c, X86_BUG_ESPFIX);
+ } while (0);
+# else
+ set_cpu_bug(c, X86_BUG_ESPFIX);
+# endif
+#endif
}
static void x86_init_cache_qos(struct cpuinfo_x86 *c)
@@ -1080,12 +1118,12 @@ void enable_sep_cpu(void)
struct tss_struct *tss;
int cpu;
+ if (!boot_cpu_has(X86_FEATURE_SEP))
+ return;
+
cpu = get_cpu();
tss = &per_cpu(cpu_tss, cpu);
- if (!boot_cpu_has(X86_FEATURE_SEP))
- goto out;
-
/*
* We cache MSR_IA32_SYSENTER_CS's value in the TSS's ss1 field --
* see the big comment in struct x86_hw_tss's definition.
@@ -1100,7 +1138,6 @@ void enable_sep_cpu(void)
wrmsr(MSR_IA32_SYSENTER_EIP, (unsigned long)entry_SYSENTER_32, 0);
-out:
put_cpu();
}
#endif
@@ -1532,7 +1569,7 @@ void cpu_init(void)
pr_info("Initializing CPU#%d\n", cpu);
if (cpu_feature_enabled(X86_FEATURE_VME) ||
- cpu_has_tsc ||
+ boot_cpu_has(X86_FEATURE_TSC) ||
boot_cpu_has(X86_FEATURE_DE))
cr4_clear_bits(X86_CR4_VME|X86_CR4_PVI|X86_CR4_TSD|X86_CR4_DE);
diff --git a/arch/x86/kernel/cpu/cyrix.c b/arch/x86/kernel/cpu/cyrix.c
index 6adef9cac..bd9dcd6b7 100644
--- a/arch/x86/kernel/cpu/cyrix.c
+++ b/arch/x86/kernel/cpu/cyrix.c
@@ -333,7 +333,7 @@ static void init_cyrix(struct cpuinfo_x86 *c)
switch (dir0_lsn) {
case 0xd: /* either a 486SLC or DLC w/o DEVID */
dir0_msn = 0;
- p = Cx486_name[(cpu_has_fpu ? 1 : 0)];
+ p = Cx486_name[!!boot_cpu_has(X86_FEATURE_FPU)];
break;
case 0xe: /* a 486S A step */
diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c
index e4393bfc7..6e2ffbebb 100644
--- a/arch/x86/kernel/cpu/intel.c
+++ b/arch/x86/kernel/cpu/intel.c
@@ -25,6 +25,41 @@
#include <asm/apic.h>
#endif
+/*
+ * Just in case our CPU detection goes bad, or you have a weird system,
+ * allow a way to override the automatic disabling of MPX.
+ */
+static int forcempx;
+
+static int __init forcempx_setup(char *__unused)
+{
+ forcempx = 1;
+
+ return 1;
+}
+__setup("intel-skd-046-workaround=disable", forcempx_setup);
+
+void check_mpx_erratum(struct cpuinfo_x86 *c)
+{
+ if (forcempx)
+ return;
+ /*
+ * Turn off the MPX feature on CPUs where SMEP is not
+ * available or disabled.
+ *
+ * Works around Intel Erratum SKD046: "Branch Instructions
+ * May Initialize MPX Bound Registers Incorrectly".
+ *
+ * This might falsely disable MPX on systems without
+ * SMEP, like Atom processors without SMEP. But there
+ * is no such hardware known at the moment.
+ */
+ if (cpu_has(c, X86_FEATURE_MPX) && !cpu_has(c, X86_FEATURE_SMEP)) {
+ setup_clear_cpu_cap(X86_FEATURE_MPX);
+ pr_warn("x86/mpx: Disabling MPX since SMEP not present\n");
+ }
+}
+
static void early_init_intel(struct cpuinfo_x86 *c)
{
u64 misc_enable;
@@ -152,9 +187,9 @@ static void early_init_intel(struct cpuinfo_x86 *c)
* the TLB when any changes are made to any of the page table entries.
* The operating system must reload CR3 to cause the TLB to be flushed"
*
- * As a result cpu_has_pge() in arch/x86/include/asm/tlbflush.h should
- * be false so that __flush_tlb_all() causes CR3 insted of CR4.PGE
- * to be modified
+ * As a result, boot_cpu_has(X86_FEATURE_PGE) in arch/x86/include/asm/tlbflush.h
+ * should be false so that __flush_tlb_all() causes CR3 insted of CR4.PGE
+ * to be modified.
*/
if (c->x86 == 5 && c->x86_model == 9) {
pr_info("Disabling PGE capability bit\n");
@@ -173,6 +208,8 @@ static void early_init_intel(struct cpuinfo_x86 *c)
if (edx & (1U << 28))
c->x86_coreid_bits = get_count_order((ebx >> 16) & 0xff);
}
+
+ check_mpx_erratum(c);
}
#ifdef CONFIG_X86_32
@@ -233,7 +270,7 @@ static void intel_workarounds(struct cpuinfo_x86 *c)
* The Quark is also family 5, but does not have the same bug.
*/
clear_cpu_bug(c, X86_BUG_F00F);
- if (!paravirt_enabled() && c->x86 == 5 && c->x86_model < 9) {
+ if (c->x86 == 5 && c->x86_model < 9) {
static int f00f_workaround_enabled;
set_cpu_bug(c, X86_BUG_F00F);
@@ -281,7 +318,7 @@ static void intel_workarounds(struct cpuinfo_x86 *c)
* integrated APIC (see 11AP erratum in "Pentium Processor
* Specification Update").
*/
- if (cpu_has_apic && (c->x86<<8 | c->x86_model<<4) == 0x520 &&
+ if (boot_cpu_has(X86_FEATURE_APIC) && (c->x86<<8 | c->x86_model<<4) == 0x520 &&
(c->x86_mask < 0x6 || c->x86_mask == 0xb))
set_cpu_bug(c, X86_BUG_11AP);
@@ -456,7 +493,7 @@ static void init_intel(struct cpuinfo_x86 *c)
set_cpu_cap(c, X86_FEATURE_ARCH_PERFMON);
}
- if (cpu_has_xmm2)
+ if (cpu_has(c, X86_FEATURE_XMM2))
set_cpu_cap(c, X86_FEATURE_LFENCE_RDTSC);
if (boot_cpu_has(X86_FEATURE_DS)) {
@@ -468,7 +505,7 @@ static void init_intel(struct cpuinfo_x86 *c)
set_cpu_cap(c, X86_FEATURE_PEBS);
}
- if (c->x86 == 6 && cpu_has_clflush &&
+ if (c->x86 == 6 && boot_cpu_has(X86_FEATURE_CLFLUSH) &&
(c->x86_model == 29 || c->x86_model == 46 || c->x86_model == 47))
set_cpu_bug(c, X86_BUG_CLFLUSH_MONITOR);
diff --git a/arch/x86/kernel/cpu/mcheck/mce-genpool.c b/arch/x86/kernel/cpu/mcheck/mce-genpool.c
index 2658e2af7..93d824ec3 100644
--- a/arch/x86/kernel/cpu/mcheck/mce-genpool.c
+++ b/arch/x86/kernel/cpu/mcheck/mce-genpool.c
@@ -26,6 +26,52 @@ static struct gen_pool *mce_evt_pool;
static LLIST_HEAD(mce_event_llist);
static char gen_pool_buf[MCE_POOLSZ];
+/*
+ * Compare the record "t" with each of the records on list "l" to see if
+ * an equivalent one is present in the list.
+ */
+static bool is_duplicate_mce_record(struct mce_evt_llist *t, struct mce_evt_llist *l)
+{
+ struct mce_evt_llist *node;
+ struct mce *m1, *m2;
+
+ m1 = &t->mce;
+
+ llist_for_each_entry(node, &l->llnode, llnode) {
+ m2 = &node->mce;
+
+ if (!mce_cmp(m1, m2))
+ return true;
+ }
+ return false;
+}
+
+/*
+ * The system has panicked - we'd like to peruse the list of MCE records
+ * that have been queued, but not seen by anyone yet. The list is in
+ * reverse time order, so we need to reverse it. While doing that we can
+ * also drop duplicate records (these were logged because some banks are
+ * shared between cores or by all threads on a socket).
+ */
+struct llist_node *mce_gen_pool_prepare_records(void)
+{
+ struct llist_node *head;
+ LLIST_HEAD(new_head);
+ struct mce_evt_llist *node, *t;
+
+ head = llist_del_all(&mce_event_llist);
+ if (!head)
+ return NULL;
+
+ /* squeeze out duplicates while reversing order */
+ llist_for_each_entry_safe(node, t, head, llnode) {
+ if (!is_duplicate_mce_record(node, t))
+ llist_add(&node->llnode, &new_head);
+ }
+
+ return new_head.first;
+}
+
void mce_gen_pool_process(void)
{
struct llist_node *head;
diff --git a/arch/x86/kernel/cpu/mcheck/mce-internal.h b/arch/x86/kernel/cpu/mcheck/mce-internal.h
index 547720efd..cd74a3f00 100644
--- a/arch/x86/kernel/cpu/mcheck/mce-internal.h
+++ b/arch/x86/kernel/cpu/mcheck/mce-internal.h
@@ -35,6 +35,7 @@ void mce_gen_pool_process(void);
bool mce_gen_pool_empty(void);
int mce_gen_pool_add(struct mce *mce);
int mce_gen_pool_init(void);
+struct llist_node *mce_gen_pool_prepare_records(void);
extern int (*mce_severity)(struct mce *a, int tolerant, char **msg, bool is_excp);
struct dentry *mce_get_debugfs_dir(void);
@@ -81,3 +82,17 @@ static inline int apei_clear_mce(u64 record_id)
#endif
void mce_inject_log(struct mce *m);
+
+/*
+ * We consider records to be equivalent if bank+status+addr+misc all match.
+ * This is only used when the system is going down because of a fatal error
+ * to avoid cluttering the console log with essentially repeated information.
+ * In normal processing all errors seen are logged.
+ */
+static inline bool mce_cmp(struct mce *m1, struct mce *m2)
+{
+ return m1->bank != m2->bank ||
+ m1->status != m2->status ||
+ m1->addr != m2->addr ||
+ m1->misc != m2->misc;
+}
diff --git a/arch/x86/kernel/cpu/mcheck/mce-severity.c b/arch/x86/kernel/cpu/mcheck/mce-severity.c
index 5119766d9..631356c8c 100644
--- a/arch/x86/kernel/cpu/mcheck/mce-severity.c
+++ b/arch/x86/kernel/cpu/mcheck/mce-severity.c
@@ -204,6 +204,33 @@ static int error_context(struct mce *m)
return IN_KERNEL;
}
+static int mce_severity_amd_smca(struct mce *m, int err_ctx)
+{
+ u32 addr = MSR_AMD64_SMCA_MCx_CONFIG(m->bank);
+ u32 low, high;
+
+ /*
+ * We need to look at the following bits:
+ * - "succor" bit (data poisoning support), and
+ * - TCC bit (Task Context Corrupt)
+ * in MCi_STATUS to determine error severity.
+ */
+ if (!mce_flags.succor)
+ return MCE_PANIC_SEVERITY;
+
+ if (rdmsr_safe(addr, &low, &high))
+ return MCE_PANIC_SEVERITY;
+
+ /* TCC (Task context corrupt). If set and if IN_KERNEL, panic. */
+ if ((low & MCI_CONFIG_MCAX) &&
+ (m->status & MCI_STATUS_TCC) &&
+ (err_ctx == IN_KERNEL))
+ return MCE_PANIC_SEVERITY;
+
+ /* ...otherwise invoke hwpoison handler. */
+ return MCE_AR_SEVERITY;
+}
+
/*
* See AMD Error Scope Hierarchy table in a newer BKDG. For example
* 49125_15h_Models_30h-3Fh_BKDG.pdf, section "RAS Features"
@@ -225,6 +252,9 @@ static int mce_severity_amd(struct mce *m, int tolerant, char **msg, bool is_exc
* to at least kill process to prolong system operation.
*/
if (mce_flags.overflow_recov) {
+ if (mce_flags.smca)
+ return mce_severity_amd_smca(m, ctx);
+
/* software can try to contain */
if (!(m->mcgstatus & MCG_STATUS_RIPV) && (ctx == IN_KERNEL))
return MCE_PANIC_SEVERITY;
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
index f0c921b03..92e5e37d9 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -161,7 +161,6 @@ void mce_log(struct mce *mce)
if (!mce_gen_pool_add(mce))
irq_work_queue(&mce_irq_work);
- mce->finished = 0;
wmb();
for (;;) {
entry = mce_log_get_idx_check(mcelog.next);
@@ -194,7 +193,6 @@ void mce_log(struct mce *mce)
mcelog.entry[entry].finished = 1;
wmb();
- mce->finished = 1;
set_bit(0, &mce_need_notify);
}
@@ -224,6 +222,53 @@ void mce_unregister_decode_chain(struct notifier_block *nb)
}
EXPORT_SYMBOL_GPL(mce_unregister_decode_chain);
+static inline u32 ctl_reg(int bank)
+{
+ return MSR_IA32_MCx_CTL(bank);
+}
+
+static inline u32 status_reg(int bank)
+{
+ return MSR_IA32_MCx_STATUS(bank);
+}
+
+static inline u32 addr_reg(int bank)
+{
+ return MSR_IA32_MCx_ADDR(bank);
+}
+
+static inline u32 misc_reg(int bank)
+{
+ return MSR_IA32_MCx_MISC(bank);
+}
+
+static inline u32 smca_ctl_reg(int bank)
+{
+ return MSR_AMD64_SMCA_MCx_CTL(bank);
+}
+
+static inline u32 smca_status_reg(int bank)
+{
+ return MSR_AMD64_SMCA_MCx_STATUS(bank);
+}
+
+static inline u32 smca_addr_reg(int bank)
+{
+ return MSR_AMD64_SMCA_MCx_ADDR(bank);
+}
+
+static inline u32 smca_misc_reg(int bank)
+{
+ return MSR_AMD64_SMCA_MCx_MISC(bank);
+}
+
+struct mca_msr_regs msr_ops = {
+ .ctl = ctl_reg,
+ .status = status_reg,
+ .addr = addr_reg,
+ .misc = misc_reg
+};
+
static void print_mce(struct mce *m)
{
int ret = 0;
@@ -290,7 +335,9 @@ static void wait_for_panic(void)
static void mce_panic(const char *msg, struct mce *final, char *exp)
{
- int i, apei_err = 0;
+ int apei_err = 0;
+ struct llist_node *pending;
+ struct mce_evt_llist *l;
if (!fake_panic) {
/*
@@ -307,11 +354,10 @@ static void mce_panic(const char *msg, struct mce *final, char *exp)
if (atomic_inc_return(&mce_fake_panicked) > 1)
return;
}
+ pending = mce_gen_pool_prepare_records();
/* First print corrected ones that are still unlogged */
- for (i = 0; i < MCE_LOG_LEN; i++) {
- struct mce *m = &mcelog.entry[i];
- if (!(m->status & MCI_STATUS_VAL))
- continue;
+ llist_for_each_entry(l, pending, llnode) {
+ struct mce *m = &l->mce;
if (!(m->status & MCI_STATUS_UC)) {
print_mce(m);
if (!apei_err)
@@ -319,13 +365,11 @@ static void mce_panic(const char *msg, struct mce *final, char *exp)
}
}
/* Now print uncorrected but with the final one last */
- for (i = 0; i < MCE_LOG_LEN; i++) {
- struct mce *m = &mcelog.entry[i];
- if (!(m->status & MCI_STATUS_VAL))
- continue;
+ llist_for_each_entry(l, pending, llnode) {
+ struct mce *m = &l->mce;
if (!(m->status & MCI_STATUS_UC))
continue;
- if (!final || memcmp(m, final, sizeof(struct mce))) {
+ if (!final || mce_cmp(m, final)) {
print_mce(m);
if (!apei_err)
apei_err = apei_write_mce(m);
@@ -356,11 +400,11 @@ static int msr_to_offset(u32 msr)
if (msr == mca_cfg.rip_msr)
return offsetof(struct mce, ip);
- if (msr == MSR_IA32_MCx_STATUS(bank))
+ if (msr == msr_ops.status(bank))
return offsetof(struct mce, status);
- if (msr == MSR_IA32_MCx_ADDR(bank))
+ if (msr == msr_ops.addr(bank))
return offsetof(struct mce, addr);
- if (msr == MSR_IA32_MCx_MISC(bank))
+ if (msr == msr_ops.misc(bank))
return offsetof(struct mce, misc);
if (msr == MSR_IA32_MCG_STATUS)
return offsetof(struct mce, mcgstatus);
@@ -523,9 +567,9 @@ static struct notifier_block mce_srao_nb = {
static void mce_read_aux(struct mce *m, int i)
{
if (m->status & MCI_STATUS_MISCV)
- m->misc = mce_rdmsrl(MSR_IA32_MCx_MISC(i));
+ m->misc = mce_rdmsrl(msr_ops.misc(i));
if (m->status & MCI_STATUS_ADDRV) {
- m->addr = mce_rdmsrl(MSR_IA32_MCx_ADDR(i));
+ m->addr = mce_rdmsrl(msr_ops.addr(i));
/*
* Mask the reported address by the reported granularity.
@@ -607,7 +651,7 @@ bool machine_check_poll(enum mcp_flags flags, mce_banks_t *b)
m.tsc = 0;
barrier();
- m.status = mce_rdmsrl(MSR_IA32_MCx_STATUS(i));
+ m.status = mce_rdmsrl(msr_ops.status(i));
if (!(m.status & MCI_STATUS_VAL))
continue;
@@ -654,7 +698,7 @@ bool machine_check_poll(enum mcp_flags flags, mce_banks_t *b)
/*
* Clear state for this bank.
*/
- mce_wrmsrl(MSR_IA32_MCx_STATUS(i), 0);
+ mce_wrmsrl(msr_ops.status(i), 0);
}
/*
@@ -679,7 +723,7 @@ static int mce_no_way_out(struct mce *m, char **msg, unsigned long *validp,
char *tmp;
for (i = 0; i < mca_cfg.banks; i++) {
- m->status = mce_rdmsrl(MSR_IA32_MCx_STATUS(i));
+ m->status = mce_rdmsrl(msr_ops.status(i));
if (m->status & MCI_STATUS_VAL) {
__set_bit(i, validp);
if (quirk_no_way_out)
@@ -830,9 +874,9 @@ static int mce_start(int *no_way_out)
atomic_add(*no_way_out, &global_nwo);
/*
- * global_nwo should be updated before mce_callin
+ * Rely on the implied barrier below, such that global_nwo
+ * is updated before mce_callin.
*/
- smp_wmb();
order = atomic_inc_return(&mce_callin);
/*
@@ -957,7 +1001,7 @@ static void mce_clear_state(unsigned long *toclear)
for (i = 0; i < mca_cfg.banks; i++) {
if (test_bit(i, toclear))
- mce_wrmsrl(MSR_IA32_MCx_STATUS(i), 0);
+ mce_wrmsrl(msr_ops.status(i), 0);
}
}
@@ -994,11 +1038,12 @@ void do_machine_check(struct pt_regs *regs, long error_code)
int i;
int worst = 0;
int severity;
+
/*
* Establish sequential order between the CPUs entering the machine
* check handler.
*/
- int order;
+ int order = -1;
/*
* If no_way_out gets set, there is no safe way to recover from this
* MCE. If mca_cfg.tolerant is cranked up, we'll try anyway.
@@ -1012,7 +1057,12 @@ void do_machine_check(struct pt_regs *regs, long error_code)
DECLARE_BITMAP(toclear, MAX_NR_BANKS);
DECLARE_BITMAP(valid_banks, MAX_NR_BANKS);
char *msg = "Unknown";
- int lmce = 0;
+
+ /*
+ * MCEs are always local on AMD. Same is determined by MCG_STATUS_LMCES
+ * on Intel.
+ */
+ int lmce = 1;
/* If this CPU is offline, just bail out. */
if (cpu_is_offline(smp_processor_id())) {
@@ -1051,19 +1101,20 @@ void do_machine_check(struct pt_regs *regs, long error_code)
kill_it = 1;
/*
- * Check if this MCE is signaled to only this logical processor
+ * Check if this MCE is signaled to only this logical processor,
+ * on Intel only.
*/
- if (m.mcgstatus & MCG_STATUS_LMCES)
- lmce = 1;
- else {
- /*
- * Go through all the banks in exclusion of the other CPUs.
- * This way we don't report duplicated events on shared banks
- * because the first one to see it will clear it.
- * If this is a Local MCE, then no need to perform rendezvous.
- */
+ if (m.cpuvendor == X86_VENDOR_INTEL)
+ lmce = m.mcgstatus & MCG_STATUS_LMCES;
+
+ /*
+ * Go through all banks in exclusion of the other CPUs. This way we
+ * don't report duplicated events on shared banks because the first one
+ * to see it will clear it. If this is a Local MCE, then no need to
+ * perform rendezvous.
+ */
+ if (!lmce)
order = mce_start(&no_way_out);
- }
for (i = 0; i < cfg->banks; i++) {
__clear_bit(i, toclear);
@@ -1076,7 +1127,7 @@ void do_machine_check(struct pt_regs *regs, long error_code)
m.addr = 0;
m.bank = i;
- m.status = mce_rdmsrl(MSR_IA32_MCx_STATUS(i));
+ m.status = mce_rdmsrl(msr_ops.status(i));
if ((m.status & MCI_STATUS_VAL) == 0)
continue;
@@ -1420,7 +1471,6 @@ static void __mcheck_cpu_init_generic(void)
enum mcp_flags m_fl = 0;
mce_banks_t all_banks;
u64 cap;
- int i;
if (!mca_cfg.bootlog)
m_fl = MCP_DONTLOG;
@@ -1436,14 +1486,19 @@ static void __mcheck_cpu_init_generic(void)
rdmsrl(MSR_IA32_MCG_CAP, cap);
if (cap & MCG_CTL_P)
wrmsr(MSR_IA32_MCG_CTL, 0xffffffff, 0xffffffff);
+}
+
+static void __mcheck_cpu_init_clear_banks(void)
+{
+ int i;
for (i = 0; i < mca_cfg.banks; i++) {
struct mce_bank *b = &mce_banks[i];
if (!b->init)
continue;
- wrmsrl(MSR_IA32_MCx_CTL(i), b->ctl);
- wrmsrl(MSR_IA32_MCx_STATUS(i), 0);
+ wrmsrl(msr_ops.ctl(i), b->ctl);
+ wrmsrl(msr_ops.status(i), 0);
}
}
@@ -1495,7 +1550,7 @@ static int __mcheck_cpu_apply_quirks(struct cpuinfo_x86 *c)
*/
clear_bit(10, (unsigned long *)&mce_banks[4].ctl);
}
- if (c->x86 <= 17 && cfg->bootlog < 0) {
+ if (c->x86 < 17 && cfg->bootlog < 0) {
/*
* Lots of broken BIOS around that don't clear them
* by default and leave crap in there. Don't log:
@@ -1628,11 +1683,19 @@ static void __mcheck_cpu_init_vendor(struct cpuinfo_x86 *c)
break;
case X86_VENDOR_AMD: {
- u32 ebx = cpuid_ebx(0x80000007);
+ mce_flags.overflow_recov = !!cpu_has(c, X86_FEATURE_OVERFLOW_RECOV);
+ mce_flags.succor = !!cpu_has(c, X86_FEATURE_SUCCOR);
+ mce_flags.smca = !!cpu_has(c, X86_FEATURE_SMCA);
- mce_flags.overflow_recov = !!(ebx & BIT(0));
- mce_flags.succor = !!(ebx & BIT(1));
- mce_flags.smca = !!(ebx & BIT(3));
+ /*
+ * Install proper ops for Scalable MCA enabled processors
+ */
+ if (mce_flags.smca) {
+ msr_ops.ctl = smca_ctl_reg;
+ msr_ops.status = smca_status_reg;
+ msr_ops.addr = smca_addr_reg;
+ msr_ops.misc = smca_misc_reg;
+ }
mce_amd_feature_init(c);
break;
@@ -1717,6 +1780,7 @@ void mcheck_cpu_init(struct cpuinfo_x86 *c)
__mcheck_cpu_init_generic();
__mcheck_cpu_init_vendor(c);
+ __mcheck_cpu_init_clear_banks();
__mcheck_cpu_init_timer();
}
@@ -2082,7 +2146,7 @@ static void mce_disable_error_reporting(void)
struct mce_bank *b = &mce_banks[i];
if (b->init)
- wrmsrl(MSR_IA32_MCx_CTL(i), 0);
+ wrmsrl(msr_ops.ctl(i), 0);
}
return;
}
@@ -2121,6 +2185,7 @@ static void mce_syscore_resume(void)
{
__mcheck_cpu_init_generic();
__mcheck_cpu_init_vendor(raw_cpu_ptr(&cpu_info));
+ __mcheck_cpu_init_clear_banks();
}
static struct syscore_ops mce_syscore_ops = {
@@ -2138,6 +2203,7 @@ static void mce_cpu_restart(void *data)
if (!mce_available(raw_cpu_ptr(&cpu_info)))
return;
__mcheck_cpu_init_generic();
+ __mcheck_cpu_init_clear_banks();
__mcheck_cpu_init_timer();
}
@@ -2413,7 +2479,7 @@ static void mce_reenable_cpu(void *h)
struct mce_bank *b = &mce_banks[i];
if (b->init)
- wrmsrl(MSR_IA32_MCx_CTL(i), b->ctl);
+ wrmsrl(msr_ops.ctl(i), b->ctl);
}
}
diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd.c b/arch/x86/kernel/cpu/mcheck/mce_amd.c
index 9d656fd43..10b066165 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_amd.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_amd.c
@@ -54,14 +54,6 @@
/* Threshold LVT offset is at MSR0xC0000410[15:12] */
#define SMCA_THR_LVT_OFF 0xF000
-/*
- * OS is required to set the MCAX bit to acknowledge that it is now using the
- * new MSR ranges and new registers under each bank. It also means that the OS
- * will configure deferred errors in the new MCx_CONFIG register. If the bit is
- * not set, uncorrectable errors will cause a system panic.
- */
-#define SMCA_MCAX_EN_OFF 0x1
-
static const char * const th_names[] = {
"load_store",
"insn_fetch",
@@ -333,7 +325,7 @@ static u32 get_block_address(u32 current_addr, u32 low, u32 high,
/* Fall back to method we used for older processors: */
switch (block) {
case 0:
- addr = MSR_IA32_MCx_MISC(bank);
+ addr = msr_ops.misc(bank);
break;
case 1:
offset = ((low & MASK_BLKPTR_LO) >> 21);
@@ -351,6 +343,7 @@ prepare_threshold_block(unsigned int bank, unsigned int block, u32 addr,
int offset, u32 misc_high)
{
unsigned int cpu = smp_processor_id();
+ u32 smca_low, smca_high, smca_addr;
struct threshold_block b;
int new;
@@ -369,24 +362,49 @@ prepare_threshold_block(unsigned int bank, unsigned int block, u32 addr,
b.interrupt_enable = 1;
- if (mce_flags.smca) {
- u32 smca_low, smca_high;
- u32 smca_addr = MSR_AMD64_SMCA_MCx_CONFIG(bank);
+ if (!mce_flags.smca) {
+ new = (misc_high & MASK_LVTOFF_HI) >> 20;
+ goto set_offset;
+ }
- if (!rdmsr_safe(smca_addr, &smca_low, &smca_high)) {
- smca_high |= SMCA_MCAX_EN_OFF;
- wrmsr(smca_addr, smca_low, smca_high);
- }
+ smca_addr = MSR_AMD64_SMCA_MCx_CONFIG(bank);
- /* Gather LVT offset for thresholding: */
- if (rdmsr_safe(MSR_CU_DEF_ERR, &smca_low, &smca_high))
- goto out;
+ if (!rdmsr_safe(smca_addr, &smca_low, &smca_high)) {
+ /*
+ * OS is required to set the MCAX bit to acknowledge that it is
+ * now using the new MSR ranges and new registers under each
+ * bank. It also means that the OS will configure deferred
+ * errors in the new MCx_CONFIG register. If the bit is not set,
+ * uncorrectable errors will cause a system panic.
+ *
+ * MCA_CONFIG[MCAX] is bit 32 (0 in the high portion of the MSR.)
+ */
+ smca_high |= BIT(0);
- new = (smca_low & SMCA_THR_LVT_OFF) >> 12;
- } else {
- new = (misc_high & MASK_LVTOFF_HI) >> 20;
+ /*
+ * SMCA logs Deferred Error information in MCA_DE{STAT,ADDR}
+ * registers with the option of additionally logging to
+ * MCA_{STATUS,ADDR} if MCA_CONFIG[LogDeferredInMcaStat] is set.
+ *
+ * This bit is usually set by BIOS to retain the old behavior
+ * for OSes that don't use the new registers. Linux supports the
+ * new registers so let's disable that additional logging here.
+ *
+ * MCA_CONFIG[LogDeferredInMcaStat] is bit 34 (bit 2 in the high
+ * portion of the MSR).
+ */
+ smca_high &= ~BIT(2);
+
+ wrmsr(smca_addr, smca_low, smca_high);
}
+ /* Gather LVT offset for thresholding: */
+ if (rdmsr_safe(MSR_CU_DEF_ERR, &smca_low, &smca_high))
+ goto out;
+
+ new = (smca_low & SMCA_THR_LVT_OFF) >> 12;
+
+set_offset:
offset = setup_APIC_mce_threshold(offset, new);
if ((offset == new) && (mce_threshold_vector != amd_threshold_interrupt))
@@ -430,12 +448,23 @@ void mce_amd_feature_init(struct cpuinfo_x86 *c)
deferred_error_interrupt_enable(c);
}
-static void __log_error(unsigned int bank, bool threshold_err, u64 misc)
+static void
+__log_error(unsigned int bank, bool deferred_err, bool threshold_err, u64 misc)
{
+ u32 msr_status = msr_ops.status(bank);
+ u32 msr_addr = msr_ops.addr(bank);
struct mce m;
u64 status;
- rdmsrl(MSR_IA32_MCx_STATUS(bank), status);
+ WARN_ON_ONCE(deferred_err && threshold_err);
+
+ if (deferred_err && mce_flags.smca) {
+ msr_status = MSR_AMD64_SMCA_MCx_DESTAT(bank);
+ msr_addr = MSR_AMD64_SMCA_MCx_DEADDR(bank);
+ }
+
+ rdmsrl(msr_status, status);
+
if (!(status & MCI_STATUS_VAL))
return;
@@ -448,10 +477,11 @@ static void __log_error(unsigned int bank, bool threshold_err, u64 misc)
m.misc = misc;
if (m.status & MCI_STATUS_ADDRV)
- rdmsrl(MSR_IA32_MCx_ADDR(bank), m.addr);
+ rdmsrl(msr_addr, m.addr);
mce_log(&m);
- wrmsrl(MSR_IA32_MCx_STATUS(bank), 0);
+
+ wrmsrl(msr_status, 0);
}
static inline void __smp_deferred_error_interrupt(void)
@@ -479,17 +509,21 @@ asmlinkage __visible void smp_trace_deferred_error_interrupt(void)
/* APIC interrupt handler for deferred errors */
static void amd_deferred_error_interrupt(void)
{
- u64 status;
unsigned int bank;
+ u32 msr_status;
+ u64 status;
for (bank = 0; bank < mca_cfg.banks; ++bank) {
- rdmsrl(MSR_IA32_MCx_STATUS(bank), status);
+ msr_status = (mce_flags.smca) ? MSR_AMD64_SMCA_MCx_DESTAT(bank)
+ : msr_ops.status(bank);
+
+ rdmsrl(msr_status, status);
if (!(status & MCI_STATUS_VAL) ||
!(status & MCI_STATUS_DEFERRED))
continue;
- __log_error(bank, false, 0);
+ __log_error(bank, true, false, 0);
break;
}
}
@@ -544,7 +578,7 @@ static void amd_threshold_interrupt(void)
return;
log:
- __log_error(bank, true, ((u64)high << 32) | low);
+ __log_error(bank, false, true, ((u64)high << 32) | low);
}
/*
diff --git a/arch/x86/kernel/cpu/mcheck/mce_intel.c b/arch/x86/kernel/cpu/mcheck/mce_intel.c
index 1e8bb6c94..1defb8ea8 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_intel.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_intel.c
@@ -84,7 +84,7 @@ static int cmci_supported(int *banks)
*/
if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL)
return 0;
- if (!cpu_has_apic || lapic_get_maxlvt() < 6)
+ if (!boot_cpu_has(X86_FEATURE_APIC) || lapic_get_maxlvt() < 6)
return 0;
rdmsrl(MSR_IA32_MCG_CAP, cap);
*banks = min_t(unsigned, MAX_NR_BANKS, cap & 0xff);
diff --git a/arch/x86/kernel/cpu/mcheck/therm_throt.c b/arch/x86/kernel/cpu/mcheck/therm_throt.c
index ac780cad3..6b9dc4d18 100644
--- a/arch/x86/kernel/cpu/mcheck/therm_throt.c
+++ b/arch/x86/kernel/cpu/mcheck/therm_throt.c
@@ -450,7 +450,7 @@ asmlinkage __visible void smp_trace_thermal_interrupt(struct pt_regs *regs)
/* Thermal monitoring depends on APIC, ACPI and clock modulation */
static int intel_thermal_supported(struct cpuinfo_x86 *c)
{
- if (!cpu_has_apic)
+ if (!boot_cpu_has(X86_FEATURE_APIC))
return 0;
if (!cpu_has(c, X86_FEATURE_ACPI) || !cpu_has(c, X86_FEATURE_ACC))
return 0;
diff --git a/arch/x86/kernel/cpu/microcode/core.c b/arch/x86/kernel/cpu/microcode/core.c
index ac360bfbb..12823b6eb 100644
--- a/arch/x86/kernel/cpu/microcode/core.c
+++ b/arch/x86/kernel/cpu/microcode/core.c
@@ -175,7 +175,7 @@ void load_ucode_ap(void)
}
}
-int __init save_microcode_in_initrd(void)
+static int __init save_microcode_in_initrd(void)
{
struct cpuinfo_x86 *c = &boot_cpu_data;
@@ -691,4 +691,5 @@ int __init microcode_init(void)
return error;
}
+fs_initcall(save_microcode_in_initrd);
late_initcall(microcode_init);
diff --git a/arch/x86/kernel/cpu/microcode/intel.c b/arch/x86/kernel/cpu/microcode/intel.c
index 63a0888e3..2ea7cc260 100644
--- a/arch/x86/kernel/cpu/microcode/intel.c
+++ b/arch/x86/kernel/cpu/microcode/intel.c
@@ -422,7 +422,7 @@ static void show_saved_mc(void)
data_size = get_datasize(mc_saved_header);
date = mc_saved_header->date;
- pr_debug("mc_saved[%d]: sig=0x%x, pf=0x%x, rev=0x%x, toal size=0x%x, date = %04x-%02x-%02x\n",
+ pr_debug("mc_saved[%d]: sig=0x%x, pf=0x%x, rev=0x%x, total size=0x%x, date = %04x-%02x-%02x\n",
i, sig, pf, rev, total_size,
date & 0xffff,
date >> 24,
diff --git a/arch/x86/kernel/cpu/mtrr/cyrix.c b/arch/x86/kernel/cpu/mtrr/cyrix.c
index f8c81ba0b..b1086f79e 100644
--- a/arch/x86/kernel/cpu/mtrr/cyrix.c
+++ b/arch/x86/kernel/cpu/mtrr/cyrix.c
@@ -137,7 +137,7 @@ static void prepare_set(void)
u32 cr0;
/* Save value of CR4 and clear Page Global Enable (bit 7) */
- if (cpu_has_pge) {
+ if (boot_cpu_has(X86_FEATURE_PGE)) {
cr4 = __read_cr4();
__write_cr4(cr4 & ~X86_CR4_PGE);
}
@@ -170,7 +170,7 @@ static void post_set(void)
write_cr0(read_cr0() & ~X86_CR0_CD);
/* Restore value of CR4 */
- if (cpu_has_pge)
+ if (boot_cpu_has(X86_FEATURE_PGE))
__write_cr4(cr4);
}
diff --git a/arch/x86/kernel/cpu/mtrr/generic.c b/arch/x86/kernel/cpu/mtrr/generic.c
index 19f57360d..16e37a258 100644
--- a/arch/x86/kernel/cpu/mtrr/generic.c
+++ b/arch/x86/kernel/cpu/mtrr/generic.c
@@ -444,11 +444,24 @@ static void __init print_mtrr_state(void)
pr_debug("TOM2: %016llx aka %lldM\n", mtrr_tom2, mtrr_tom2>>20);
}
+/* PAT setup for BP. We need to go through sync steps here */
+void __init mtrr_bp_pat_init(void)
+{
+ unsigned long flags;
+
+ local_irq_save(flags);
+ prepare_set();
+
+ pat_init();
+
+ post_set();
+ local_irq_restore(flags);
+}
+
/* Grab all of the MTRR state for this CPU into *state */
bool __init get_mtrr_state(void)
{
struct mtrr_var_range *vrs;
- unsigned long flags;
unsigned lo, dummy;
unsigned int i;
@@ -481,15 +494,6 @@ bool __init get_mtrr_state(void)
mtrr_state_set = 1;
- /* PAT setup for BP. We need to go through sync steps here */
- local_irq_save(flags);
- prepare_set();
-
- pat_init();
-
- post_set();
- local_irq_restore(flags);
-
return !!(mtrr_state.enabled & MTRR_STATE_MTRR_ENABLED);
}
@@ -741,7 +745,7 @@ static void prepare_set(void) __acquires(set_atomicity_lock)
wbinvd();
/* Save value of CR4 and clear Page Global Enable (bit 7) */
- if (cpu_has_pge) {
+ if (boot_cpu_has(X86_FEATURE_PGE)) {
cr4 = __read_cr4();
__write_cr4(cr4 & ~X86_CR4_PGE);
}
@@ -771,7 +775,7 @@ static void post_set(void) __releases(set_atomicity_lock)
write_cr0(read_cr0() & ~X86_CR0_CD);
/* Restore value of CR4 */
- if (cpu_has_pge)
+ if (boot_cpu_has(X86_FEATURE_PGE))
__write_cr4(cr4);
raw_spin_unlock(&set_atomicity_lock);
}
diff --git a/arch/x86/kernel/cpu/mtrr/main.c b/arch/x86/kernel/cpu/mtrr/main.c
index 10f8d4796..7d393ecde 100644
--- a/arch/x86/kernel/cpu/mtrr/main.c
+++ b/arch/x86/kernel/cpu/mtrr/main.c
@@ -752,6 +752,9 @@ void __init mtrr_bp_init(void)
/* BIOS may override */
__mtrr_enabled = get_mtrr_state();
+ if (mtrr_enabled())
+ mtrr_bp_pat_init();
+
if (mtrr_cleanup(phys_addr)) {
changed_by_mtrr_cleanup = 1;
mtrr_if->set_all();
@@ -759,8 +762,16 @@ void __init mtrr_bp_init(void)
}
}
- if (!mtrr_enabled())
+ if (!mtrr_enabled()) {
pr_info("MTRR: Disabled\n");
+
+ /*
+ * PAT initialization relies on MTRR's rendezvous handler.
+ * Skip PAT init until the handler can initialize both
+ * features independently.
+ */
+ pat_disable("MTRRs disabled, skipping PAT initialization too.");
+ }
}
void mtrr_ap_init(void)
diff --git a/arch/x86/kernel/cpu/mtrr/mtrr.h b/arch/x86/kernel/cpu/mtrr/mtrr.h
index 951884dcc..6c7ced07d 100644
--- a/arch/x86/kernel/cpu/mtrr/mtrr.h
+++ b/arch/x86/kernel/cpu/mtrr/mtrr.h
@@ -52,6 +52,7 @@ void set_mtrr_prepare_save(struct set_mtrr_context *ctxt);
void fill_mtrr_var_range(unsigned int index,
u32 base_lo, u32 base_hi, u32 mask_lo, u32 mask_hi);
bool get_mtrr_state(void);
+void mtrr_bp_pat_init(void);
extern void set_mtrr_ops(const struct mtrr_ops *ops);
diff --git a/arch/x86/kernel/cpu/vmware.c b/arch/x86/kernel/cpu/vmware.c
index 364e58346..8cac429b6 100644
--- a/arch/x86/kernel/cpu/vmware.c
+++ b/arch/x86/kernel/cpu/vmware.c
@@ -94,7 +94,7 @@ static void __init vmware_platform_setup(void)
*/
static uint32_t __init vmware_platform(void)
{
- if (cpu_has_hypervisor) {
+ if (boot_cpu_has(X86_FEATURE_HYPERVISOR)) {
unsigned int eax;
unsigned int hyper_vendor_id[3];