summaryrefslogtreecommitdiff
path: root/drivers/edac
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/edac')
-rw-r--r--drivers/edac/Kconfig26
-rw-r--r--drivers/edac/Makefile2
-rw-r--r--drivers/edac/altera_edac.c492
-rw-r--r--drivers/edac/debugfs.c2
-rw-r--r--drivers/edac/edac_mc.c64
-rw-r--r--drivers/edac/edac_pci.c67
-rw-r--r--drivers/edac/mce_amd.c335
-rw-r--r--drivers/edac/mpc85xx_edac.c2
-rw-r--r--drivers/edac/xgene_edac.c70
9 files changed, 947 insertions, 113 deletions
diff --git a/drivers/edac/Kconfig b/drivers/edac/Kconfig
index ef25000a5..37755e63c 100644
--- a/drivers/edac/Kconfig
+++ b/drivers/edac/Kconfig
@@ -367,14 +367,30 @@ config EDAC_OCTEON_PCI
Support for error detection and correction on the
Cavium Octeon family of SOCs.
-config EDAC_ALTERA_MC
- bool "Altera SDRAM Memory Controller EDAC"
+config EDAC_ALTERA
+ bool "Altera SOCFPGA ECC"
depends on EDAC_MM_EDAC=y && ARCH_SOCFPGA
help
Support for error detection and correction on the
- Altera SDRAM memory controller. Note that the
- preloader must initialize the SDRAM before loading
- the kernel.
+ Altera SOCs. This must be selected for SDRAM ECC.
+ Note that the preloader must initialize the SDRAM
+ before loading the kernel.
+
+config EDAC_ALTERA_L2C
+ bool "Altera L2 Cache ECC"
+ depends on EDAC_ALTERA=y
+ select CACHE_L2X0
+ help
+ Support for error detection and correction on the
+ Altera L2 cache Memory for Altera SoCs. This option
+ requires L2 cache so it will force that selection.
+
+config EDAC_ALTERA_OCRAM
+ bool "Altera On-Chip RAM ECC"
+ depends on EDAC_ALTERA=y && SRAM && GENERIC_ALLOCATOR
+ help
+ Support for error detection and correction on the
+ Altera On-Chip RAM Memory for Altera SoCs.
config EDAC_SYNOPSYS
tristate "Synopsys DDR Memory Controller"
diff --git a/drivers/edac/Makefile b/drivers/edac/Makefile
index be163e20f..f9e4a3e0e 100644
--- a/drivers/edac/Makefile
+++ b/drivers/edac/Makefile
@@ -67,6 +67,6 @@ obj-$(CONFIG_EDAC_OCTEON_L2C) += octeon_edac-l2c.o
obj-$(CONFIG_EDAC_OCTEON_LMC) += octeon_edac-lmc.o
obj-$(CONFIG_EDAC_OCTEON_PCI) += octeon_edac-pci.o
-obj-$(CONFIG_EDAC_ALTERA_MC) += altera_edac.o
+obj-$(CONFIG_EDAC_ALTERA) += altera_edac.o
obj-$(CONFIG_EDAC_SYNOPSYS) += synopsys_edac.o
obj-$(CONFIG_EDAC_XGENE) += xgene_edac.o
diff --git a/drivers/edac/altera_edac.c b/drivers/edac/altera_edac.c
index 929640981..63e420987 100644
--- a/drivers/edac/altera_edac.c
+++ b/drivers/edac/altera_edac.c
@@ -1,5 +1,5 @@
/*
- * Copyright Altera Corporation (C) 2014-2015. All rights reserved.
+ * Copyright Altera Corporation (C) 2014-2016. All rights reserved.
* Copyright 2011-2012 Calxeda, Inc.
*
* This program is free software; you can redistribute it and/or modify it
@@ -17,8 +17,10 @@
* Adapted from the highbank_mc_edac driver.
*/
+#include <asm/cacheflush.h>
#include <linux/ctype.h>
#include <linux/edac.h>
+#include <linux/genalloc.h>
#include <linux/interrupt.h>
#include <linux/kernel.h>
#include <linux/mfd/syscon.h>
@@ -34,6 +36,7 @@
#define EDAC_MOD_STR "altera_edac"
#define EDAC_VERSION "1"
+#define EDAC_DEVICE "Altera"
static const struct altr_sdram_prv_data c5_data = {
.ecc_ctrl_offset = CV_CTLCFG_OFST,
@@ -75,6 +78,31 @@ static const struct altr_sdram_prv_data a10_data = {
.ue_set_mask = A10_DIAGINT_TDERRA_MASK,
};
+/************************** EDAC Device Defines **************************/
+
+/* OCRAM ECC Management Group Defines */
+#define ALTR_MAN_GRP_OCRAM_ECC_OFFSET 0x04
+#define ALTR_OCR_ECC_EN BIT(0)
+#define ALTR_OCR_ECC_INJS BIT(1)
+#define ALTR_OCR_ECC_INJD BIT(2)
+#define ALTR_OCR_ECC_SERR BIT(3)
+#define ALTR_OCR_ECC_DERR BIT(4)
+
+/* L2 ECC Management Group Defines */
+#define ALTR_MAN_GRP_L2_ECC_OFFSET 0x00
+#define ALTR_L2_ECC_EN BIT(0)
+#define ALTR_L2_ECC_INJS BIT(1)
+#define ALTR_L2_ECC_INJD BIT(2)
+
+#define ALTR_UE_TRIGGER_CHAR 'U' /* Trigger for UE */
+#define ALTR_TRIGGER_READ_WRD_CNT 32 /* Line size x 4 */
+#define ALTR_TRIG_OCRAM_BYTE_SIZE 128 /* Line size x 4 */
+#define ALTR_TRIG_L2C_BYTE_SIZE 4096 /* Full Page */
+
+/*********************** EDAC Memory Controller Functions ****************/
+
+/* The SDRAM controller uses the EDAC Memory Controller framework. */
+
static irqreturn_t altr_sdram_mc_err_handler(int irq, void *dev_id)
{
struct mem_ctl_info *mci = dev_id;
@@ -504,6 +532,466 @@ static struct platform_driver altr_sdram_edac_driver = {
module_platform_driver(altr_sdram_edac_driver);
+/************************* EDAC Parent Probe *************************/
+
+static const struct of_device_id altr_edac_device_of_match[];
+
+static const struct of_device_id altr_edac_of_match[] = {
+ { .compatible = "altr,socfpga-ecc-manager" },
+ {},
+};
+MODULE_DEVICE_TABLE(of, altr_edac_of_match);
+
+static int altr_edac_probe(struct platform_device *pdev)
+{
+ of_platform_populate(pdev->dev.of_node, altr_edac_device_of_match,
+ NULL, &pdev->dev);
+ return 0;
+}
+
+static struct platform_driver altr_edac_driver = {
+ .probe = altr_edac_probe,
+ .driver = {
+ .name = "socfpga_ecc_manager",
+ .of_match_table = altr_edac_of_match,
+ },
+};
+module_platform_driver(altr_edac_driver);
+
+/************************* EDAC Device Functions *************************/
+
+/*
+ * EDAC Device Functions (shared between various IPs).
+ * The discrete memories use the EDAC Device framework. The probe
+ * and error handling functions are very similar between memories
+ * so they are shared. The memory allocation and freeing for EDAC
+ * trigger testing are different for each memory.
+ */
+
+const struct edac_device_prv_data ocramecc_data;
+const struct edac_device_prv_data l2ecc_data;
+
+struct edac_device_prv_data {
+ int (*setup)(struct platform_device *pdev, void __iomem *base);
+ int ce_clear_mask;
+ int ue_clear_mask;
+ char dbgfs_name[20];
+ void * (*alloc_mem)(size_t size, void **other);
+ void (*free_mem)(void *p, size_t size, void *other);
+ int ecc_enable_mask;
+ int ce_set_mask;
+ int ue_set_mask;
+ int trig_alloc_sz;
+};
+
+struct altr_edac_device_dev {
+ void __iomem *base;
+ int sb_irq;
+ int db_irq;
+ const struct edac_device_prv_data *data;
+ struct dentry *debugfs_dir;
+ char *edac_dev_name;
+};
+
+static irqreturn_t altr_edac_device_handler(int irq, void *dev_id)
+{
+ irqreturn_t ret_value = IRQ_NONE;
+ struct edac_device_ctl_info *dci = dev_id;
+ struct altr_edac_device_dev *drvdata = dci->pvt_info;
+ const struct edac_device_prv_data *priv = drvdata->data;
+
+ if (irq == drvdata->sb_irq) {
+ if (priv->ce_clear_mask)
+ writel(priv->ce_clear_mask, drvdata->base);
+ edac_device_handle_ce(dci, 0, 0, drvdata->edac_dev_name);
+ ret_value = IRQ_HANDLED;
+ } else if (irq == drvdata->db_irq) {
+ if (priv->ue_clear_mask)
+ writel(priv->ue_clear_mask, drvdata->base);
+ edac_device_handle_ue(dci, 0, 0, drvdata->edac_dev_name);
+ panic("\nEDAC:ECC_DEVICE[Uncorrectable errors]\n");
+ ret_value = IRQ_HANDLED;
+ } else {
+ WARN_ON(1);
+ }
+
+ return ret_value;
+}
+
+static ssize_t altr_edac_device_trig(struct file *file,
+ const char __user *user_buf,
+ size_t count, loff_t *ppos)
+
+{
+ u32 *ptemp, i, error_mask;
+ int result = 0;
+ u8 trig_type;
+ unsigned long flags;
+ struct edac_device_ctl_info *edac_dci = file->private_data;
+ struct altr_edac_device_dev *drvdata = edac_dci->pvt_info;
+ const struct edac_device_prv_data *priv = drvdata->data;
+ void *generic_ptr = edac_dci->dev;
+
+ if (!user_buf || get_user(trig_type, user_buf))
+ return -EFAULT;
+
+ if (!priv->alloc_mem)
+ return -ENOMEM;
+
+ /*
+ * Note that generic_ptr is initialized to the device * but in
+ * some alloc_functions, this is overridden and returns data.
+ */
+ ptemp = priv->alloc_mem(priv->trig_alloc_sz, &generic_ptr);
+ if (!ptemp) {
+ edac_printk(KERN_ERR, EDAC_DEVICE,
+ "Inject: Buffer Allocation error\n");
+ return -ENOMEM;
+ }
+
+ if (trig_type == ALTR_UE_TRIGGER_CHAR)
+ error_mask = priv->ue_set_mask;
+ else
+ error_mask = priv->ce_set_mask;
+
+ edac_printk(KERN_ALERT, EDAC_DEVICE,
+ "Trigger Error Mask (0x%X)\n", error_mask);
+
+ local_irq_save(flags);
+ /* write ECC corrupted data out. */
+ for (i = 0; i < (priv->trig_alloc_sz / sizeof(*ptemp)); i++) {
+ /* Read data so we're in the correct state */
+ rmb();
+ if (ACCESS_ONCE(ptemp[i]))
+ result = -1;
+ /* Toggle Error bit (it is latched), leave ECC enabled */
+ writel(error_mask, drvdata->base);
+ writel(priv->ecc_enable_mask, drvdata->base);
+ ptemp[i] = i;
+ }
+ /* Ensure it has been written out */
+ wmb();
+ local_irq_restore(flags);
+
+ if (result)
+ edac_printk(KERN_ERR, EDAC_DEVICE, "Mem Not Cleared\n");
+
+ /* Read out written data. ECC error caused here */
+ for (i = 0; i < ALTR_TRIGGER_READ_WRD_CNT; i++)
+ if (ACCESS_ONCE(ptemp[i]) != i)
+ edac_printk(KERN_ERR, EDAC_DEVICE,
+ "Read doesn't match written data\n");
+
+ if (priv->free_mem)
+ priv->free_mem(ptemp, priv->trig_alloc_sz, generic_ptr);
+
+ return count;
+}
+
+static const struct file_operations altr_edac_device_inject_fops = {
+ .open = simple_open,
+ .write = altr_edac_device_trig,
+ .llseek = generic_file_llseek,
+};
+
+static void altr_create_edacdev_dbgfs(struct edac_device_ctl_info *edac_dci,
+ const struct edac_device_prv_data *priv)
+{
+ struct altr_edac_device_dev *drvdata = edac_dci->pvt_info;
+
+ if (!IS_ENABLED(CONFIG_EDAC_DEBUG))
+ return;
+
+ drvdata->debugfs_dir = edac_debugfs_create_dir(drvdata->edac_dev_name);
+ if (!drvdata->debugfs_dir)
+ return;
+
+ if (!edac_debugfs_create_file(priv->dbgfs_name, S_IWUSR,
+ drvdata->debugfs_dir, edac_dci,
+ &altr_edac_device_inject_fops))
+ debugfs_remove_recursive(drvdata->debugfs_dir);
+}
+
+static const struct of_device_id altr_edac_device_of_match[] = {
+#ifdef CONFIG_EDAC_ALTERA_L2C
+ { .compatible = "altr,socfpga-l2-ecc", .data = (void *)&l2ecc_data },
+#endif
+#ifdef CONFIG_EDAC_ALTERA_OCRAM
+ { .compatible = "altr,socfpga-ocram-ecc",
+ .data = (void *)&ocramecc_data },
+#endif
+ {},
+};
+MODULE_DEVICE_TABLE(of, altr_edac_device_of_match);
+
+/*
+ * altr_edac_device_probe()
+ * This is a generic EDAC device driver that will support
+ * various Altera memory devices such as the L2 cache ECC and
+ * OCRAM ECC as well as the memories for other peripherals.
+ * Module specific initialization is done by passing the
+ * function index in the device tree.
+ */
+static int altr_edac_device_probe(struct platform_device *pdev)
+{
+ struct edac_device_ctl_info *dci;
+ struct altr_edac_device_dev *drvdata;
+ struct resource *r;
+ int res = 0;
+ struct device_node *np = pdev->dev.of_node;
+ char *ecc_name = (char *)np->name;
+ static int dev_instance;
+
+ if (!devres_open_group(&pdev->dev, NULL, GFP_KERNEL)) {
+ edac_printk(KERN_ERR, EDAC_DEVICE,
+ "Unable to open devm\n");
+ return -ENOMEM;
+ }
+
+ r = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+ if (!r) {
+ edac_printk(KERN_ERR, EDAC_DEVICE,
+ "Unable to get mem resource\n");
+ res = -ENODEV;
+ goto fail;
+ }
+
+ if (!devm_request_mem_region(&pdev->dev, r->start, resource_size(r),
+ dev_name(&pdev->dev))) {
+ edac_printk(KERN_ERR, EDAC_DEVICE,
+ "%s:Error requesting mem region\n", ecc_name);
+ res = -EBUSY;
+ goto fail;
+ }
+
+ dci = edac_device_alloc_ctl_info(sizeof(*drvdata), ecc_name,
+ 1, ecc_name, 1, 0, NULL, 0,
+ dev_instance++);
+
+ if (!dci) {
+ edac_printk(KERN_ERR, EDAC_DEVICE,
+ "%s: Unable to allocate EDAC device\n", ecc_name);
+ res = -ENOMEM;
+ goto fail;
+ }
+
+ drvdata = dci->pvt_info;
+ dci->dev = &pdev->dev;
+ platform_set_drvdata(pdev, dci);
+ drvdata->edac_dev_name = ecc_name;
+
+ drvdata->base = devm_ioremap(&pdev->dev, r->start, resource_size(r));
+ if (!drvdata->base)
+ goto fail1;
+
+ /* Get driver specific data for this EDAC device */
+ drvdata->data = of_match_node(altr_edac_device_of_match, np)->data;
+
+ /* Check specific dependencies for the module */
+ if (drvdata->data->setup) {
+ res = drvdata->data->setup(pdev, drvdata->base);
+ if (res)
+ goto fail1;
+ }
+
+ drvdata->sb_irq = platform_get_irq(pdev, 0);
+ res = devm_request_irq(&pdev->dev, drvdata->sb_irq,
+ altr_edac_device_handler,
+ 0, dev_name(&pdev->dev), dci);
+ if (res)
+ goto fail1;
+
+ drvdata->db_irq = platform_get_irq(pdev, 1);
+ res = devm_request_irq(&pdev->dev, drvdata->db_irq,
+ altr_edac_device_handler,
+ 0, dev_name(&pdev->dev), dci);
+ if (res)
+ goto fail1;
+
+ dci->mod_name = "Altera ECC Manager";
+ dci->dev_name = drvdata->edac_dev_name;
+
+ res = edac_device_add_device(dci);
+ if (res)
+ goto fail1;
+
+ altr_create_edacdev_dbgfs(dci, drvdata->data);
+
+ devres_close_group(&pdev->dev, NULL);
+
+ return 0;
+
+fail1:
+ edac_device_free_ctl_info(dci);
+fail:
+ devres_release_group(&pdev->dev, NULL);
+ edac_printk(KERN_ERR, EDAC_DEVICE,
+ "%s:Error setting up EDAC device: %d\n", ecc_name, res);
+
+ return res;
+}
+
+static int altr_edac_device_remove(struct platform_device *pdev)
+{
+ struct edac_device_ctl_info *dci = platform_get_drvdata(pdev);
+ struct altr_edac_device_dev *drvdata = dci->pvt_info;
+
+ debugfs_remove_recursive(drvdata->debugfs_dir);
+ edac_device_del_device(&pdev->dev);
+ edac_device_free_ctl_info(dci);
+
+ return 0;
+}
+
+static struct platform_driver altr_edac_device_driver = {
+ .probe = altr_edac_device_probe,
+ .remove = altr_edac_device_remove,
+ .driver = {
+ .name = "altr_edac_device",
+ .of_match_table = altr_edac_device_of_match,
+ },
+};
+module_platform_driver(altr_edac_device_driver);
+
+/*********************** OCRAM EDAC Device Functions *********************/
+
+#ifdef CONFIG_EDAC_ALTERA_OCRAM
+
+static void *ocram_alloc_mem(size_t size, void **other)
+{
+ struct device_node *np;
+ struct gen_pool *gp;
+ void *sram_addr;
+
+ np = of_find_compatible_node(NULL, NULL, "altr,socfpga-ocram-ecc");
+ if (!np)
+ return NULL;
+
+ gp = of_gen_pool_get(np, "iram", 0);
+ of_node_put(np);
+ if (!gp)
+ return NULL;
+
+ sram_addr = (void *)gen_pool_alloc(gp, size);
+ if (!sram_addr)
+ return NULL;
+
+ memset(sram_addr, 0, size);
+ /* Ensure data is written out */
+ wmb();
+
+ /* Remember this handle for freeing later */
+ *other = gp;
+
+ return sram_addr;
+}
+
+static void ocram_free_mem(void *p, size_t size, void *other)
+{
+ gen_pool_free((struct gen_pool *)other, (u32)p, size);
+}
+
+/*
+ * altr_ocram_check_deps()
+ * Test for OCRAM cache ECC dependencies upon entry because
+ * platform specific startup should have initialized the
+ * On-Chip RAM memory and enabled the ECC.
+ * Can't turn on ECC here because accessing un-initialized
+ * memory will cause CE/UE errors possibly causing an ABORT.
+ */
+static int altr_ocram_check_deps(struct platform_device *pdev,
+ void __iomem *base)
+{
+ if (readl(base) & ALTR_OCR_ECC_EN)
+ return 0;
+
+ edac_printk(KERN_ERR, EDAC_DEVICE,
+ "OCRAM: No ECC present or ECC disabled.\n");
+ return -ENODEV;
+}
+
+const struct edac_device_prv_data ocramecc_data = {
+ .setup = altr_ocram_check_deps,
+ .ce_clear_mask = (ALTR_OCR_ECC_EN | ALTR_OCR_ECC_SERR),
+ .ue_clear_mask = (ALTR_OCR_ECC_EN | ALTR_OCR_ECC_DERR),
+ .dbgfs_name = "altr_ocram_trigger",
+ .alloc_mem = ocram_alloc_mem,
+ .free_mem = ocram_free_mem,
+ .ecc_enable_mask = ALTR_OCR_ECC_EN,
+ .ce_set_mask = (ALTR_OCR_ECC_EN | ALTR_OCR_ECC_INJS),
+ .ue_set_mask = (ALTR_OCR_ECC_EN | ALTR_OCR_ECC_INJD),
+ .trig_alloc_sz = ALTR_TRIG_OCRAM_BYTE_SIZE,
+};
+
+#endif /* CONFIG_EDAC_ALTERA_OCRAM */
+
+/********************* L2 Cache EDAC Device Functions ********************/
+
+#ifdef CONFIG_EDAC_ALTERA_L2C
+
+static void *l2_alloc_mem(size_t size, void **other)
+{
+ struct device *dev = *other;
+ void *ptemp = devm_kzalloc(dev, size, GFP_KERNEL);
+
+ if (!ptemp)
+ return NULL;
+
+ /* Make sure everything is written out */
+ wmb();
+
+ /*
+ * Clean all cache levels up to LoC (includes L2)
+ * This ensures the corrupted data is written into
+ * L2 cache for readback test (which causes ECC error).
+ */
+ flush_cache_all();
+
+ return ptemp;
+}
+
+static void l2_free_mem(void *p, size_t size, void *other)
+{
+ struct device *dev = other;
+
+ if (dev && p)
+ devm_kfree(dev, p);
+}
+
+/*
+ * altr_l2_check_deps()
+ * Test for L2 cache ECC dependencies upon entry because
+ * platform specific startup should have initialized the L2
+ * memory and enabled the ECC.
+ * Bail if ECC is not enabled.
+ * Note that L2 Cache Enable is forced at build time.
+ */
+static int altr_l2_check_deps(struct platform_device *pdev,
+ void __iomem *base)
+{
+ if (readl(base) & ALTR_L2_ECC_EN)
+ return 0;
+
+ edac_printk(KERN_ERR, EDAC_DEVICE,
+ "L2: No ECC present, or ECC disabled\n");
+ return -ENODEV;
+}
+
+const struct edac_device_prv_data l2ecc_data = {
+ .setup = altr_l2_check_deps,
+ .ce_clear_mask = 0,
+ .ue_clear_mask = 0,
+ .dbgfs_name = "altr_l2_trigger",
+ .alloc_mem = l2_alloc_mem,
+ .free_mem = l2_free_mem,
+ .ecc_enable_mask = ALTR_L2_ECC_EN,
+ .ce_set_mask = (ALTR_L2_ECC_EN | ALTR_L2_ECC_INJS),
+ .ue_set_mask = (ALTR_L2_ECC_EN | ALTR_L2_ECC_INJD),
+ .trig_alloc_sz = ALTR_TRIG_L2C_BYTE_SIZE,
+};
+
+#endif /* CONFIG_EDAC_ALTERA_L2C */
+
MODULE_LICENSE("GPL v2");
MODULE_AUTHOR("Thor Thayer");
-MODULE_DESCRIPTION("EDAC Driver for Altera SDRAM Controller");
+MODULE_DESCRIPTION("EDAC Driver for Altera Memories");
diff --git a/drivers/edac/debugfs.c b/drivers/edac/debugfs.c
index 54d2f668c..92dbb7e23 100644
--- a/drivers/edac/debugfs.c
+++ b/drivers/edac/debugfs.c
@@ -53,7 +53,7 @@ int __init edac_debugfs_init(void)
void edac_debugfs_exit(void)
{
- debugfs_remove(edac_debugfs);
+ debugfs_remove_recursive(edac_debugfs);
}
int edac_create_debugfs_nodes(struct mem_ctl_info *mci)
diff --git a/drivers/edac/edac_mc.c b/drivers/edac/edac_mc.c
index 8adfc167c..1472f48c8 100644
--- a/drivers/edac/edac_mc.c
+++ b/drivers/edac/edac_mc.c
@@ -535,60 +535,21 @@ static void edac_mc_workq_function(struct work_struct *work_req)
mutex_lock(&mem_ctls_mutex);
- /* if this control struct has movd to offline state, we are done */
- if (mci->op_state == OP_OFFLINE) {
+ if (mci->op_state != OP_RUNNING_POLL) {
mutex_unlock(&mem_ctls_mutex);
return;
}
- /* Only poll controllers that are running polled and have a check */
- if (edac_mc_assert_error_check_and_clear() && (mci->edac_check != NULL))
+ if (edac_mc_assert_error_check_and_clear())
mci->edac_check(mci);
mutex_unlock(&mem_ctls_mutex);
- /* Reschedule */
+ /* Queue ourselves again. */
edac_queue_work(&mci->work, msecs_to_jiffies(edac_mc_get_poll_msec()));
}
/*
- * edac_mc_workq_setup
- * initialize a workq item for this mci
- * passing in the new delay period in msec
- *
- * locking model:
- *
- * called with the mem_ctls_mutex held
- */
-static void edac_mc_workq_setup(struct mem_ctl_info *mci, unsigned msec)
-{
- edac_dbg(0, "\n");
-
- /* if this instance is not in the POLL state, then simply return */
- if (mci->op_state != OP_RUNNING_POLL)
- return;
-
- INIT_DELAYED_WORK(&mci->work, edac_mc_workq_function);
-
- edac_queue_work(&mci->work, msecs_to_jiffies(msec));
-}
-
-/*
- * edac_mc_workq_teardown
- * stop the workq processing on this mci
- *
- * locking model:
- *
- * called WITHOUT lock held
- */
-static void edac_mc_workq_teardown(struct mem_ctl_info *mci)
-{
- mci->op_state = OP_OFFLINE;
-
- edac_stop_work(&mci->work);
-}
-
-/*
* edac_mc_reset_delay_period(unsigned long value)
*
* user space has updated our poll period value, need to
@@ -771,12 +732,12 @@ int edac_mc_add_mc_with_groups(struct mem_ctl_info *mci,
goto fail1;
}
- /* If there IS a check routine, then we are running POLLED */
- if (mci->edac_check != NULL) {
- /* This instance is NOW RUNNING */
+ if (mci->edac_check) {
mci->op_state = OP_RUNNING_POLL;
- edac_mc_workq_setup(mci, edac_mc_get_poll_msec());
+ INIT_DELAYED_WORK(&mci->work, edac_mc_workq_function);
+ edac_queue_work(&mci->work, msecs_to_jiffies(edac_mc_get_poll_msec()));
+
} else {
mci->op_state = OP_RUNNING_INTERRUPT;
}
@@ -823,15 +784,16 @@ struct mem_ctl_info *edac_mc_del_mc(struct device *dev)
return NULL;
}
+ /* mark MCI offline: */
+ mci->op_state = OP_OFFLINE;
+
if (!del_mc_from_global_list(mci))
edac_mc_owner = NULL;
- mutex_unlock(&mem_ctls_mutex);
- /* flush workq processes */
- edac_mc_workq_teardown(mci);
+ mutex_unlock(&mem_ctls_mutex);
- /* marking MCI offline */
- mci->op_state = OP_OFFLINE;
+ if (mci->edac_check)
+ edac_stop_work(&mci->work);
/* remove from sysfs */
edac_remove_sysfs_mci_device(mci);
diff --git a/drivers/edac/edac_pci.c b/drivers/edac/edac_pci.c
index 99685388d..8f2f2899a 100644
--- a/drivers/edac/edac_pci.c
+++ b/drivers/edac/edac_pci.c
@@ -195,55 +195,24 @@ static void edac_pci_workq_function(struct work_struct *work_req)
mutex_lock(&edac_pci_ctls_mutex);
- if (pci->op_state == OP_RUNNING_POLL) {
- /* we might be in POLL mode, but there may NOT be a poll func
- */
- if ((pci->edac_check != NULL) && edac_pci_get_check_errors())
- pci->edac_check(pci);
-
- /* if we are on a one second period, then use round */
- msec = edac_pci_get_poll_msec();
- if (msec == 1000)
- delay = round_jiffies_relative(msecs_to_jiffies(msec));
- else
- delay = msecs_to_jiffies(msec);
-
- /* Reschedule only if we are in POLL mode */
- edac_queue_work(&pci->work, delay);
+ if (pci->op_state != OP_RUNNING_POLL) {
+ mutex_unlock(&edac_pci_ctls_mutex);
+ return;
}
- mutex_unlock(&edac_pci_ctls_mutex);
-}
-
-/*
- * edac_pci_workq_setup()
- * initialize a workq item for this edac_pci instance
- * passing in the new delay period in msec
- *
- * locking model:
- * called when 'edac_pci_ctls_mutex' is locked
- */
-static void edac_pci_workq_setup(struct edac_pci_ctl_info *pci,
- unsigned int msec)
-{
- edac_dbg(0, "\n");
+ if (edac_pci_get_check_errors())
+ pci->edac_check(pci);
- INIT_DELAYED_WORK(&pci->work, edac_pci_workq_function);
+ /* if we are on a one second period, then use round */
+ msec = edac_pci_get_poll_msec();
+ if (msec == 1000)
+ delay = round_jiffies_relative(msecs_to_jiffies(msec));
+ else
+ delay = msecs_to_jiffies(msec);
- edac_queue_work(&pci->work, msecs_to_jiffies(edac_pci_get_poll_msec()));
-}
+ edac_queue_work(&pci->work, delay);
-/*
- * edac_pci_workq_teardown()
- * stop the workq processing on this edac_pci instance
- */
-static void edac_pci_workq_teardown(struct edac_pci_ctl_info *pci)
-{
- edac_dbg(0, "\n");
-
- pci->op_state = OP_OFFLINE;
-
- edac_stop_work(&pci->work);
+ mutex_unlock(&edac_pci_ctls_mutex);
}
/*
@@ -289,10 +258,12 @@ int edac_pci_add_device(struct edac_pci_ctl_info *pci, int edac_idx)
goto fail1;
}
- if (pci->edac_check != NULL) {
+ if (pci->edac_check) {
pci->op_state = OP_RUNNING_POLL;
- edac_pci_workq_setup(pci, 1000);
+ INIT_DELAYED_WORK(&pci->work, edac_pci_workq_function);
+ edac_queue_work(&pci->work, msecs_to_jiffies(edac_pci_get_poll_msec()));
+
} else {
pci->op_state = OP_RUNNING_INTERRUPT;
}
@@ -350,8 +321,8 @@ struct edac_pci_ctl_info *edac_pci_del_device(struct device *dev)
mutex_unlock(&edac_pci_ctls_mutex);
- /* stop the workq timer */
- edac_pci_workq_teardown(pci);
+ if (pci->edac_check)
+ edac_stop_work(&pci->work);
edac_printk(KERN_INFO, EDAC_PCI,
"Removed device %d for %s %s: DEV %s\n",
diff --git a/drivers/edac/mce_amd.c b/drivers/edac/mce_amd.c
index e3a945ce3..49768c08a 100644
--- a/drivers/edac/mce_amd.c
+++ b/drivers/edac/mce_amd.c
@@ -147,6 +147,135 @@ static const char * const mc6_mce_desc[] = {
"Status Register File",
};
+/* Scalable MCA error strings */
+static const char * const f17h_ls_mce_desc[] = {
+ "Load queue parity",
+ "Store queue parity",
+ "Miss address buffer payload parity",
+ "L1 TLB parity",
+ "", /* reserved */
+ "DC tag error type 6",
+ "DC tag error type 1",
+ "Internal error type 1",
+ "Internal error type 2",
+ "Sys Read data error thread 0",
+ "Sys read data error thread 1",
+ "DC tag error type 2",
+ "DC data error type 1 (poison comsumption)",
+ "DC data error type 2",
+ "DC data error type 3",
+ "DC tag error type 4",
+ "L2 TLB parity",
+ "PDC parity error",
+ "DC tag error type 3",
+ "DC tag error type 5",
+ "L2 fill data error",
+};
+
+static const char * const f17h_if_mce_desc[] = {
+ "microtag probe port parity error",
+ "IC microtag or full tag multi-hit error",
+ "IC full tag parity",
+ "IC data array parity",
+ "Decoupling queue phys addr parity error",
+ "L0 ITLB parity error",
+ "L1 ITLB parity error",
+ "L2 ITLB parity error",
+ "BPQ snoop parity on Thread 0",
+ "BPQ snoop parity on Thread 1",
+ "L1 BTB multi-match error",
+ "L2 BTB multi-match error",
+};
+
+static const char * const f17h_l2_mce_desc[] = {
+ "L2M tag multi-way-hit error",
+ "L2M tag ECC error",
+ "L2M data ECC error",
+ "HW assert",
+};
+
+static const char * const f17h_de_mce_desc[] = {
+ "uop cache tag parity error",
+ "uop cache data parity error",
+ "Insn buffer parity error",
+ "Insn dispatch queue parity error",
+ "Fetch address FIFO parity",
+ "Patch RAM data parity",
+ "Patch RAM sequencer parity",
+ "uop buffer parity"
+};
+
+static const char * const f17h_ex_mce_desc[] = {
+ "Watchdog timeout error",
+ "Phy register file parity",
+ "Flag register file parity",
+ "Immediate displacement register file parity",
+ "Address generator payload parity",
+ "EX payload parity",
+ "Checkpoint queue parity",
+ "Retire dispatch queue parity",
+};
+
+static const char * const f17h_fp_mce_desc[] = {
+ "Physical register file parity",
+ "Freelist parity error",
+ "Schedule queue parity",
+ "NSQ parity error",
+ "Retire queue parity",
+ "Status register file parity",
+};
+
+static const char * const f17h_l3_mce_desc[] = {
+ "Shadow tag macro ECC error",
+ "Shadow tag macro multi-way-hit error",
+ "L3M tag ECC error",
+ "L3M tag multi-way-hit error",
+ "L3M data ECC error",
+ "XI parity, L3 fill done channel error",
+ "L3 victim queue parity",
+ "L3 HW assert",
+};
+
+static const char * const f17h_cs_mce_desc[] = {
+ "Illegal request from transport layer",
+ "Address violation",
+ "Security violation",
+ "Illegal response from transport layer",
+ "Unexpected response",
+ "Parity error on incoming request or probe response data",
+ "Parity error on incoming read response data",
+ "Atomic request parity",
+ "ECC error on probe filter access",
+};
+
+static const char * const f17h_pie_mce_desc[] = {
+ "HW assert",
+ "Internal PIE register security violation",
+ "Error on GMI link",
+ "Poison data written to internal PIE register",
+};
+
+static const char * const f17h_umc_mce_desc[] = {
+ "DRAM ECC error",
+ "Data poison error on DRAM",
+ "SDP parity error",
+ "Advanced peripheral bus error",
+ "Command/address parity error",
+ "Write data CRC error",
+};
+
+static const char * const f17h_pb_mce_desc[] = {
+ "Parameter Block RAM ECC error",
+};
+
+static const char * const f17h_psp_mce_desc[] = {
+ "PSP RAM ECC or parity error",
+};
+
+static const char * const f17h_smu_mce_desc[] = {
+ "SMU RAM ECC or parity error",
+};
+
static bool f12h_mc0_mce(u16 ec, u8 xec)
{
bool ret = false;
@@ -691,6 +820,177 @@ static void decode_mc6_mce(struct mce *m)
pr_emerg(HW_ERR "Corrupted MC6 MCE info?\n");
}
+static void decode_f17h_core_errors(const char *ip_name, u8 xec,
+ unsigned int mca_type)
+{
+ const char * const *error_desc_array;
+ size_t len;
+
+ pr_emerg(HW_ERR "%s Error: ", ip_name);
+
+ switch (mca_type) {
+ case SMCA_LS:
+ error_desc_array = f17h_ls_mce_desc;
+ len = ARRAY_SIZE(f17h_ls_mce_desc) - 1;
+
+ if (xec == 0x4) {
+ pr_cont("Unrecognized LS MCA error code.\n");
+ return;
+ }
+ break;
+
+ case SMCA_IF:
+ error_desc_array = f17h_if_mce_desc;
+ len = ARRAY_SIZE(f17h_if_mce_desc) - 1;
+ break;
+
+ case SMCA_L2_CACHE:
+ error_desc_array = f17h_l2_mce_desc;
+ len = ARRAY_SIZE(f17h_l2_mce_desc) - 1;
+ break;
+
+ case SMCA_DE:
+ error_desc_array = f17h_de_mce_desc;
+ len = ARRAY_SIZE(f17h_de_mce_desc) - 1;
+ break;
+
+ case SMCA_EX:
+ error_desc_array = f17h_ex_mce_desc;
+ len = ARRAY_SIZE(f17h_ex_mce_desc) - 1;
+ break;
+
+ case SMCA_FP:
+ error_desc_array = f17h_fp_mce_desc;
+ len = ARRAY_SIZE(f17h_fp_mce_desc) - 1;
+ break;
+
+ case SMCA_L3_CACHE:
+ error_desc_array = f17h_l3_mce_desc;
+ len = ARRAY_SIZE(f17h_l3_mce_desc) - 1;
+ break;
+
+ default:
+ pr_cont("Corrupted MCA core error info.\n");
+ return;
+ }
+
+ if (xec > len) {
+ pr_cont("Unrecognized %s MCA bank error code.\n",
+ amd_core_mcablock_names[mca_type]);
+ return;
+ }
+
+ pr_cont("%s.\n", error_desc_array[xec]);
+}
+
+static void decode_df_errors(u8 xec, unsigned int mca_type)
+{
+ const char * const *error_desc_array;
+ size_t len;
+
+ pr_emerg(HW_ERR "Data Fabric Error: ");
+
+ switch (mca_type) {
+ case SMCA_CS:
+ error_desc_array = f17h_cs_mce_desc;
+ len = ARRAY_SIZE(f17h_cs_mce_desc) - 1;
+ break;
+
+ case SMCA_PIE:
+ error_desc_array = f17h_pie_mce_desc;
+ len = ARRAY_SIZE(f17h_pie_mce_desc) - 1;
+ break;
+
+ default:
+ pr_cont("Corrupted MCA Data Fabric info.\n");
+ return;
+ }
+
+ if (xec > len) {
+ pr_cont("Unrecognized %s MCA bank error code.\n",
+ amd_df_mcablock_names[mca_type]);
+ return;
+ }
+
+ pr_cont("%s.\n", error_desc_array[xec]);
+}
+
+/* Decode errors according to Scalable MCA specification */
+static void decode_smca_errors(struct mce *m)
+{
+ u32 addr = MSR_AMD64_SMCA_MCx_IPID(m->bank);
+ unsigned int hwid, mca_type, i;
+ u8 xec = XEC(m->status, xec_mask);
+ const char * const *error_desc_array;
+ const char *ip_name;
+ u32 low, high;
+ size_t len;
+
+ if (rdmsr_safe(addr, &low, &high)) {
+ pr_emerg("Invalid IP block specified, error information is unreliable.\n");
+ return;
+ }
+
+ hwid = high & MCI_IPID_HWID;
+ mca_type = (high & MCI_IPID_MCATYPE) >> 16;
+
+ pr_emerg(HW_ERR "MC%d IPID value: 0x%08x%08x\n", m->bank, high, low);
+
+ /*
+ * Based on hwid and mca_type values, decode errors from respective IPs.
+ * Note: mca_type values make sense only in the context of an hwid.
+ */
+ for (i = 0; i < ARRAY_SIZE(amd_hwids); i++)
+ if (amd_hwids[i].hwid == hwid)
+ break;
+
+ switch (i) {
+ case SMCA_F17H_CORE:
+ ip_name = (mca_type == SMCA_L3_CACHE) ?
+ "L3 Cache" : "F17h Core";
+ return decode_f17h_core_errors(ip_name, xec, mca_type);
+ break;
+
+ case SMCA_DF:
+ return decode_df_errors(xec, mca_type);
+ break;
+
+ case SMCA_UMC:
+ error_desc_array = f17h_umc_mce_desc;
+ len = ARRAY_SIZE(f17h_umc_mce_desc) - 1;
+ break;
+
+ case SMCA_PB:
+ error_desc_array = f17h_pb_mce_desc;
+ len = ARRAY_SIZE(f17h_pb_mce_desc) - 1;
+ break;
+
+ case SMCA_PSP:
+ error_desc_array = f17h_psp_mce_desc;
+ len = ARRAY_SIZE(f17h_psp_mce_desc) - 1;
+ break;
+
+ case SMCA_SMU:
+ error_desc_array = f17h_smu_mce_desc;
+ len = ARRAY_SIZE(f17h_smu_mce_desc) - 1;
+ break;
+
+ default:
+ pr_emerg(HW_ERR "HWID:%d does not match any existing IPs.\n", hwid);
+ return;
+ }
+
+ ip_name = amd_hwids[i].name;
+ pr_emerg(HW_ERR "%s Error: ", ip_name);
+
+ if (xec > len) {
+ pr_cont("Unrecognized %s MCA bank error code.\n", ip_name);
+ return;
+ }
+
+ pr_cont("%s.\n", error_desc_array[xec]);
+}
+
static inline void amd_decode_err_code(u16 ec)
{
if (INT_ERROR(ec)) {
@@ -752,6 +1052,7 @@ int amd_decode_mce(struct notifier_block *nb, unsigned long val, void *data)
struct mce *m = (struct mce *)data;
struct cpuinfo_x86 *c = &cpu_data(m->extcpu);
int ecc;
+ u32 ebx = cpuid_ebx(0x80000007);
if (amd_filter_mce(m))
return NOTIFY_STOP;
@@ -769,11 +1070,20 @@ int amd_decode_mce(struct notifier_block *nb, unsigned long val, void *data)
((m->status & MCI_STATUS_PCC) ? "PCC" : "-"),
((m->status & MCI_STATUS_ADDRV) ? "AddrV" : "-"));
- if (c->x86 == 0x15 || c->x86 == 0x16)
+ if (c->x86 >= 0x15)
pr_cont("|%s|%s",
((m->status & MCI_STATUS_DEFERRED) ? "Deferred" : "-"),
((m->status & MCI_STATUS_POISON) ? "Poison" : "-"));
+ if (!!(ebx & BIT(3))) {
+ u32 low, high;
+ u32 addr = MSR_AMD64_SMCA_MCx_CONFIG(m->bank);
+
+ if (!rdmsr_safe(addr, &low, &high) &&
+ (low & MCI_CONFIG_MCAX))
+ pr_cont("|%s", ((m->status & MCI_STATUS_TCC) ? "TCC" : "-"));
+ }
+
/* do the two bits[14:13] together */
ecc = (m->status >> 45) & 0x3;
if (ecc)
@@ -784,6 +1094,11 @@ int amd_decode_mce(struct notifier_block *nb, unsigned long val, void *data)
if (m->status & MCI_STATUS_ADDRV)
pr_emerg(HW_ERR "MC%d Error Address: 0x%016llx\n", m->bank, m->addr);
+ if (!!(ebx & BIT(3))) {
+ decode_smca_errors(m);
+ goto err_code;
+ }
+
if (!fam_ops)
goto err_code;
@@ -834,6 +1149,7 @@ static struct notifier_block amd_mce_dec_nb = {
static int __init mce_amd_init(void)
{
struct cpuinfo_x86 *c = &boot_cpu_data;
+ u32 ebx;
if (c->x86_vendor != X86_VENDOR_AMD)
return -ENODEV;
@@ -888,10 +1204,18 @@ static int __init mce_amd_init(void)
fam_ops->mc2_mce = f16h_mc2_mce;
break;
+ case 0x17:
+ ebx = cpuid_ebx(0x80000007);
+ xec_mask = 0x3f;
+ if (!(ebx & BIT(3))) {
+ printk(KERN_WARNING "Decoding supported only on Scalable MCA processors.\n");
+ goto err_out;
+ }
+ break;
+
default:
printk(KERN_WARNING "Huh? What family is it: 0x%x?!\n", c->x86);
- kfree(fam_ops);
- fam_ops = NULL;
+ goto err_out;
}
pr_info("MCE: In-kernel MCE decoding enabled.\n");
@@ -899,6 +1223,11 @@ static int __init mce_amd_init(void)
mce_register_decode_chain(&amd_mce_dec_nb);
return 0;
+
+err_out:
+ kfree(fam_ops);
+ fam_ops = NULL;
+ return -EINVAL;
}
early_initcall(mce_amd_init);
diff --git a/drivers/edac/mpc85xx_edac.c b/drivers/edac/mpc85xx_edac.c
index b7139c160..ca63d0da8 100644
--- a/drivers/edac/mpc85xx_edac.c
+++ b/drivers/edac/mpc85xx_edac.c
@@ -1244,7 +1244,7 @@ static struct platform_driver * const drivers[] = {
static int __init mpc85xx_mc_init(void)
{
int res = 0;
- u32 pvr = 0;
+ u32 __maybe_unused pvr = 0;
printk(KERN_INFO "Freescale(R) MPC85xx EDAC driver, "
"(C) 2006 Montavista Software\n");
diff --git a/drivers/edac/xgene_edac.c b/drivers/edac/xgene_edac.c
index 41f876414..bf19b6e3b 100644
--- a/drivers/edac/xgene_edac.c
+++ b/drivers/edac/xgene_edac.c
@@ -61,6 +61,7 @@ struct xgene_edac {
struct regmap *mcba_map;
struct regmap *mcbb_map;
struct regmap *efuse_map;
+ struct regmap *rb_map;
void __iomem *pcp_csr;
spinlock_t lock;
struct dentry *dfs;
@@ -1057,7 +1058,7 @@ static bool xgene_edac_l3_promote_to_uc_err(u32 l3cesr, u32 l3celr)
case 0x041:
return true;
}
- } else if (L3C_ELR_ERRSYN(l3celr) == 9)
+ } else if (L3C_ELR_ERRWAY(l3celr) == 9)
return true;
return false;
@@ -1353,6 +1354,17 @@ static int xgene_edac_l3_remove(struct xgene_edac_dev_ctx *l3)
#define GLBL_MDED_ERRH 0x0848
#define GLBL_MDED_ERRHMASK 0x084c
+/* IO Bus Registers */
+#define RBCSR 0x0000
+#define STICKYERR_MASK BIT(0)
+#define RBEIR 0x0008
+#define AGENT_OFFLINE_ERR_MASK BIT(30)
+#define UNIMPL_RBPAGE_ERR_MASK BIT(29)
+#define WORD_ALIGNED_ERR_MASK BIT(28)
+#define PAGE_ACCESS_ERR_MASK BIT(27)
+#define WRITE_ACCESS_MASK BIT(26)
+#define RBERRADDR_RD(src) ((src) & 0x03FFFFFF)
+
static const char * const soc_mem_err_v1[] = {
"10GbE0",
"10GbE1",
@@ -1470,6 +1482,51 @@ static void xgene_edac_rb_report(struct edac_device_ctl_info *edac_dev)
u32 err_addr_hi;
u32 reg;
+ /* If the register bus resource isn't available, just skip it */
+ if (!ctx->edac->rb_map)
+ goto rb_skip;
+
+ /*
+ * Check RB access errors
+ * 1. Out of range
+ * 2. Un-implemented page
+ * 3. Un-aligned access
+ * 4. Offline slave IP
+ */
+ if (regmap_read(ctx->edac->rb_map, RBCSR, &reg))
+ return;
+ if (reg & STICKYERR_MASK) {
+ bool write;
+ u32 address;
+
+ dev_err(edac_dev->dev, "IOB bus access error(s)\n");
+ if (regmap_read(ctx->edac->rb_map, RBEIR, &reg))
+ return;
+ write = reg & WRITE_ACCESS_MASK ? 1 : 0;
+ address = RBERRADDR_RD(reg);
+ if (reg & AGENT_OFFLINE_ERR_MASK)
+ dev_err(edac_dev->dev,
+ "IOB bus %s access to offline agent error\n",
+ write ? "write" : "read");
+ if (reg & UNIMPL_RBPAGE_ERR_MASK)
+ dev_err(edac_dev->dev,
+ "IOB bus %s access to unimplemented page error\n",
+ write ? "write" : "read");
+ if (reg & WORD_ALIGNED_ERR_MASK)
+ dev_err(edac_dev->dev,
+ "IOB bus %s word aligned access error\n",
+ write ? "write" : "read");
+ if (reg & PAGE_ACCESS_ERR_MASK)
+ dev_err(edac_dev->dev,
+ "IOB bus %s to page out of range access error\n",
+ write ? "write" : "read");
+ if (regmap_write(ctx->edac->rb_map, RBEIR, 0))
+ return;
+ if (regmap_write(ctx->edac->rb_map, RBCSR, 0))
+ return;
+ }
+rb_skip:
+
/* IOB Bridge agent transaction error interrupt */
reg = readl(ctx->dev_csr + IOBBATRANSERRINTSTS);
if (!reg)
@@ -1852,6 +1909,17 @@ static int xgene_edac_probe(struct platform_device *pdev)
goto out_err;
}
+ /*
+ * NOTE: The register bus resource is optional for compatibility
+ * reason.
+ */
+ edac->rb_map = syscon_regmap_lookup_by_phandle(pdev->dev.of_node,
+ "regmap-rb");
+ if (IS_ERR(edac->rb_map)) {
+ dev_warn(edac->dev, "missing syscon regmap rb\n");
+ edac->rb_map = NULL;
+ }
+
res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
edac->pcp_csr = devm_ioremap_resource(&pdev->dev, res);
if (IS_ERR(edac->pcp_csr)) {