summaryrefslogtreecommitdiff
path: root/arch/x86/kernel/cpu/mcheck
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86/kernel/cpu/mcheck')
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce.c65
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce_amd.c141
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce_intel.c44
3 files changed, 212 insertions, 38 deletions
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
index 20190bdac..df919ff10 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -53,9 +53,12 @@
static DEFINE_MUTEX(mce_chrdev_read_mutex);
#define rcu_dereference_check_mce(p) \
- rcu_dereference_index_check((p), \
- rcu_read_lock_sched_held() || \
- lockdep_is_held(&mce_chrdev_read_mutex))
+({ \
+ rcu_lockdep_assert(rcu_read_lock_sched_held() || \
+ lockdep_is_held(&mce_chrdev_read_mutex), \
+ "suspicious rcu_dereference_check_mce() usage"); \
+ smp_load_acquire(&(p)); \
+})
#define CREATE_TRACE_POINTS
#include <trace/events/mce.h>
@@ -1050,6 +1053,7 @@ void do_machine_check(struct pt_regs *regs, long error_code)
char *msg = "Unknown";
u64 recover_paddr = ~0ull;
int flags = MF_ACTION_REQUIRED;
+ int lmce = 0;
prev_state = ist_enter(regs);
@@ -1077,11 +1081,20 @@ void do_machine_check(struct pt_regs *regs, long error_code)
kill_it = 1;
/*
- * Go through all the banks in exclusion of the other CPUs.
- * This way we don't report duplicated events on shared banks
- * because the first one to see it will clear it.
+ * Check if this MCE is signaled to only this logical processor
*/
- order = mce_start(&no_way_out);
+ if (m.mcgstatus & MCG_STATUS_LMCES)
+ lmce = 1;
+ else {
+ /*
+ * Go through all the banks in exclusion of the other CPUs.
+ * This way we don't report duplicated events on shared banks
+ * because the first one to see it will clear it.
+ * If this is a Local MCE, then no need to perform rendezvous.
+ */
+ order = mce_start(&no_way_out);
+ }
+
for (i = 0; i < cfg->banks; i++) {
__clear_bit(i, toclear);
if (!test_bit(i, valid_banks))
@@ -1158,8 +1171,18 @@ void do_machine_check(struct pt_regs *regs, long error_code)
* Do most of the synchronization with other CPUs.
* When there's any problem use only local no_way_out state.
*/
- if (mce_end(order) < 0)
- no_way_out = worst >= MCE_PANIC_SEVERITY;
+ if (!lmce) {
+ if (mce_end(order) < 0)
+ no_way_out = worst >= MCE_PANIC_SEVERITY;
+ } else {
+ /*
+ * Local MCE skipped calling mce_reign()
+ * If we found a fatal error, we need to panic here.
+ */
+ if (worst >= MCE_PANIC_SEVERITY && mca_cfg.tolerant < 3)
+ mce_panic("Machine check from unknown source",
+ NULL, NULL);
+ }
/*
* At insane "tolerant" levels we take no action. Otherwise
@@ -1640,10 +1663,16 @@ static void __mcheck_cpu_init_vendor(struct cpuinfo_x86 *c)
mce_intel_feature_init(c);
mce_adjust_timer = cmci_intel_adjust_timer;
break;
- case X86_VENDOR_AMD:
+
+ case X86_VENDOR_AMD: {
+ u32 ebx = cpuid_ebx(0x80000007);
+
mce_amd_feature_init(c);
- mce_flags.overflow_recov = cpuid_ebx(0x80000007) & 0x1;
+ mce_flags.overflow_recov = !!(ebx & BIT(0));
+ mce_flags.succor = !!(ebx & BIT(1));
break;
+ }
+
default:
break;
}
@@ -1887,7 +1916,7 @@ out:
static unsigned int mce_chrdev_poll(struct file *file, poll_table *wait)
{
poll_wait(file, &mce_chrdev_wait, wait);
- if (rcu_access_index(mcelog.next))
+ if (READ_ONCE(mcelog.next))
return POLLIN | POLLRDNORM;
if (!mce_apei_read_done && apei_check_mce())
return POLLIN | POLLRDNORM;
@@ -1932,8 +1961,8 @@ void register_mce_write_callback(ssize_t (*fn)(struct file *filp,
}
EXPORT_SYMBOL_GPL(register_mce_write_callback);
-ssize_t mce_chrdev_write(struct file *filp, const char __user *ubuf,
- size_t usize, loff_t *off)
+static ssize_t mce_chrdev_write(struct file *filp, const char __user *ubuf,
+ size_t usize, loff_t *off)
{
if (mce_write)
return mce_write(filp, ubuf, usize, off);
@@ -1979,6 +2008,7 @@ void mce_disable_bank(int bank)
/*
* mce=off Disables machine check
* mce=no_cmci Disables CMCI
+ * mce=no_lmce Disables LMCE
* mce=dont_log_ce Clears corrected events silently, no log created for CEs.
* mce=ignore_ce Disables polling and CMCI, corrected events are not cleared.
* mce=TOLERANCELEVEL[,monarchtimeout] (number, see above)
@@ -2002,6 +2032,8 @@ static int __init mcheck_enable(char *str)
cfg->disabled = true;
else if (!strcmp(str, "no_cmci"))
cfg->cmci_disabled = true;
+ else if (!strcmp(str, "no_lmce"))
+ cfg->lmce_disabled = true;
else if (!strcmp(str, "dont_log_ce"))
cfg->dont_log_ce = true;
else if (!strcmp(str, "ignore_ce"))
@@ -2011,11 +2043,8 @@ static int __init mcheck_enable(char *str)
else if (!strcmp(str, "bios_cmci_threshold"))
cfg->bios_cmci_threshold = true;
else if (isdigit(str[0])) {
- get_option(&str, &(cfg->tolerant));
- if (*str == ',') {
- ++str;
+ if (get_option(&str, &cfg->tolerant) == 2)
get_option(&str, &(cfg->monarch_timeout));
- }
} else {
pr_info("mce argument %s ignored. Please use /sys\n", str);
return 0;
diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd.c b/arch/x86/kernel/cpu/mcheck/mce_amd.c
index 55ad9b37c..e99b15077 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_amd.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_amd.c
@@ -1,19 +1,13 @@
/*
- * (c) 2005-2012 Advanced Micro Devices, Inc.
+ * (c) 2005-2015 Advanced Micro Devices, Inc.
* Your use of this code is subject to the terms and conditions of the
* GNU general public license version 2. See "COPYING" or
* http://www.gnu.org/licenses/gpl.html
*
* Written by Jacob Shin - AMD, Inc.
- *
* Maintained by: Borislav Petkov <bp@alien8.de>
*
- * April 2006
- * - added support for AMD Family 0x10 processors
- * May 2012
- * - major scrubbing
- *
- * All MC4_MISCi registers are shared between multi-cores
+ * All MC4_MISCi registers are shared between cores on a node.
*/
#include <linux/interrupt.h>
#include <linux/notifier.h>
@@ -32,6 +26,7 @@
#include <asm/idle.h>
#include <asm/mce.h>
#include <asm/msr.h>
+#include <asm/trace/irq_vectors.h>
#define NR_BLOCKS 9
#define THRESHOLD_MAX 0xFFF
@@ -47,6 +42,13 @@
#define MASK_BLKPTR_LO 0xFF000000
#define MCG_XBLK_ADDR 0xC0000400
+/* Deferred error settings */
+#define MSR_CU_DEF_ERR 0xC0000410
+#define MASK_DEF_LVTOFF 0x000000F0
+#define MASK_DEF_INT_TYPE 0x00000006
+#define DEF_LVT_OFF 0x2
+#define DEF_INT_TYPE_APIC 0x2
+
static const char * const th_names[] = {
"load_store",
"insn_fetch",
@@ -60,6 +62,13 @@ static DEFINE_PER_CPU(struct threshold_bank **, threshold_banks);
static DEFINE_PER_CPU(unsigned char, bank_map); /* see which banks are on */
static void amd_threshold_interrupt(void);
+static void amd_deferred_error_interrupt(void);
+
+static void default_deferred_error_interrupt(void)
+{
+ pr_err("Unexpected deferred interrupt at vector %x\n", DEFERRED_ERROR_VECTOR);
+}
+void (*deferred_error_int_vector)(void) = default_deferred_error_interrupt;
/*
* CPU Initialization
@@ -196,7 +205,7 @@ static void mce_threshold_block_init(struct threshold_block *b, int offset)
threshold_restart_bank(&tr);
};
-static int setup_APIC_mce(int reserved, int new)
+static int setup_APIC_mce_threshold(int reserved, int new)
{
if (reserved < 0 && !setup_APIC_eilvt(new, THRESHOLD_APIC_VECTOR,
APIC_EILVT_MSG_FIX, 0))
@@ -205,6 +214,39 @@ static int setup_APIC_mce(int reserved, int new)
return reserved;
}
+static int setup_APIC_deferred_error(int reserved, int new)
+{
+ if (reserved < 0 && !setup_APIC_eilvt(new, DEFERRED_ERROR_VECTOR,
+ APIC_EILVT_MSG_FIX, 0))
+ return new;
+
+ return reserved;
+}
+
+static void deferred_error_interrupt_enable(struct cpuinfo_x86 *c)
+{
+ u32 low = 0, high = 0;
+ int def_offset = -1, def_new;
+
+ if (rdmsr_safe(MSR_CU_DEF_ERR, &low, &high))
+ return;
+
+ def_new = (low & MASK_DEF_LVTOFF) >> 4;
+ if (!(low & MASK_DEF_LVTOFF)) {
+ pr_err(FW_BUG "Your BIOS is not setting up LVT offset 0x2 for deferred error IRQs correctly.\n");
+ def_new = DEF_LVT_OFF;
+ low = (low & ~MASK_DEF_LVTOFF) | (DEF_LVT_OFF << 4);
+ }
+
+ def_offset = setup_APIC_deferred_error(def_offset, def_new);
+ if ((def_offset == def_new) &&
+ (deferred_error_int_vector != amd_deferred_error_interrupt))
+ deferred_error_int_vector = amd_deferred_error_interrupt;
+
+ low = (low & ~MASK_DEF_INT_TYPE) | DEF_INT_TYPE_APIC;
+ wrmsr(MSR_CU_DEF_ERR, low, high);
+}
+
/* cpu init entry point, called from mce.c with preempt off */
void mce_amd_feature_init(struct cpuinfo_x86 *c)
{
@@ -252,7 +294,7 @@ void mce_amd_feature_init(struct cpuinfo_x86 *c)
b.interrupt_enable = 1;
new = (high & MASK_LVTOFF_HI) >> 20;
- offset = setup_APIC_mce(offset, new);
+ offset = setup_APIC_mce_threshold(offset, new);
if ((offset == new) &&
(mce_threshold_vector != amd_threshold_interrupt))
@@ -262,6 +304,73 @@ init:
mce_threshold_block_init(&b, offset);
}
}
+
+ if (mce_flags.succor)
+ deferred_error_interrupt_enable(c);
+}
+
+static void __log_error(unsigned int bank, bool threshold_err, u64 misc)
+{
+ struct mce m;
+ u64 status;
+
+ rdmsrl(MSR_IA32_MCx_STATUS(bank), status);
+ if (!(status & MCI_STATUS_VAL))
+ return;
+
+ mce_setup(&m);
+
+ m.status = status;
+ m.bank = bank;
+
+ if (threshold_err)
+ m.misc = misc;
+
+ if (m.status & MCI_STATUS_ADDRV)
+ rdmsrl(MSR_IA32_MCx_ADDR(bank), m.addr);
+
+ mce_log(&m);
+ wrmsrl(MSR_IA32_MCx_STATUS(bank), 0);
+}
+
+static inline void __smp_deferred_error_interrupt(void)
+{
+ inc_irq_stat(irq_deferred_error_count);
+ deferred_error_int_vector();
+}
+
+asmlinkage __visible void smp_deferred_error_interrupt(void)
+{
+ entering_irq();
+ __smp_deferred_error_interrupt();
+ exiting_ack_irq();
+}
+
+asmlinkage __visible void smp_trace_deferred_error_interrupt(void)
+{
+ entering_irq();
+ trace_deferred_error_apic_entry(DEFERRED_ERROR_VECTOR);
+ __smp_deferred_error_interrupt();
+ trace_deferred_error_apic_exit(DEFERRED_ERROR_VECTOR);
+ exiting_ack_irq();
+}
+
+/* APIC interrupt handler for deferred errors */
+static void amd_deferred_error_interrupt(void)
+{
+ u64 status;
+ unsigned int bank;
+
+ for (bank = 0; bank < mca_cfg.banks; ++bank) {
+ rdmsrl(MSR_IA32_MCx_STATUS(bank), status);
+
+ if (!(status & MCI_STATUS_VAL) ||
+ !(status & MCI_STATUS_DEFERRED))
+ continue;
+
+ __log_error(bank, false, 0);
+ break;
+ }
}
/*
@@ -273,12 +382,12 @@ init:
* the interrupt goes off when error_count reaches threshold_limit.
* the handler will simply log mcelog w/ software defined bank number.
*/
+
static void amd_threshold_interrupt(void)
{
u32 low = 0, high = 0, address = 0;
int cpu = smp_processor_id();
unsigned int bank, block;
- struct mce m;
/* assume first bank caused it */
for (bank = 0; bank < mca_cfg.banks; ++bank) {
@@ -321,15 +430,7 @@ static void amd_threshold_interrupt(void)
return;
log:
- mce_setup(&m);
- rdmsrl(MSR_IA32_MCx_STATUS(bank), m.status);
- if (!(m.status & MCI_STATUS_VAL))
- return;
- m.misc = ((u64)high << 32) | low;
- m.bank = bank;
- mce_log(&m);
-
- wrmsrl(MSR_IA32_MCx_STATUS(bank), 0);
+ __log_error(bank, true, ((u64)high << 32) | low);
}
/*
diff --git a/arch/x86/kernel/cpu/mcheck/mce_intel.c b/arch/x86/kernel/cpu/mcheck/mce_intel.c
index b4a41cf03..844f56c56 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_intel.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_intel.c
@@ -91,6 +91,36 @@ static int cmci_supported(int *banks)
return !!(cap & MCG_CMCI_P);
}
+static bool lmce_supported(void)
+{
+ u64 tmp;
+
+ if (mca_cfg.lmce_disabled)
+ return false;
+
+ rdmsrl(MSR_IA32_MCG_CAP, tmp);
+
+ /*
+ * LMCE depends on recovery support in the processor. Hence both
+ * MCG_SER_P and MCG_LMCE_P should be present in MCG_CAP.
+ */
+ if ((tmp & (MCG_SER_P | MCG_LMCE_P)) !=
+ (MCG_SER_P | MCG_LMCE_P))
+ return false;
+
+ /*
+ * BIOS should indicate support for LMCE by setting bit 20 in
+ * IA32_FEATURE_CONTROL without which touching MCG_EXT_CTL will
+ * generate a #GP fault.
+ */
+ rdmsrl(MSR_IA32_FEATURE_CONTROL, tmp);
+ if ((tmp & (FEATURE_CONTROL_LOCKED | FEATURE_CONTROL_LMCE)) ==
+ (FEATURE_CONTROL_LOCKED | FEATURE_CONTROL_LMCE))
+ return true;
+
+ return false;
+}
+
bool mce_intel_cmci_poll(void)
{
if (__this_cpu_read(cmci_storm_state) == CMCI_STORM_NONE)
@@ -405,8 +435,22 @@ static void intel_init_cmci(void)
cmci_recheck();
}
+void intel_init_lmce(void)
+{
+ u64 val;
+
+ if (!lmce_supported())
+ return;
+
+ rdmsrl(MSR_IA32_MCG_EXT_CTL, val);
+
+ if (!(val & MCG_EXT_CTL_LMCE_EN))
+ wrmsrl(MSR_IA32_MCG_EXT_CTL, val | MCG_EXT_CTL_LMCE_EN);
+}
+
void mce_intel_feature_init(struct cpuinfo_x86 *c)
{
intel_init_thermal(c);
intel_init_cmci();
+ intel_init_lmce();
}