summaryrefslogtreecommitdiff
path: root/drivers/acpi/apei
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/acpi/apei')
-rw-r--r--drivers/acpi/apei/Kconfig64
-rw-r--r--drivers/acpi/apei/Makefile6
-rw-r--r--drivers/acpi/apei/apei-base.c805
-rw-r--r--drivers/acpi/apei/apei-internal.h143
-rw-r--r--drivers/acpi/apei/einj.c833
-rw-r--r--drivers/acpi/apei/erst-dbg.c243
-rw-r--r--drivers/acpi/apei/erst.c1230
-rw-r--r--drivers/acpi/apei/ghes.c1163
-rw-r--r--drivers/acpi/apei/hest.c255
9 files changed, 4742 insertions, 0 deletions
diff --git a/drivers/acpi/apei/Kconfig b/drivers/acpi/apei/Kconfig
new file mode 100644
index 000000000..b0140c8fc
--- /dev/null
+++ b/drivers/acpi/apei/Kconfig
@@ -0,0 +1,64 @@
+config HAVE_ACPI_APEI
+ bool
+
+config HAVE_ACPI_APEI_NMI
+ bool
+
+config ACPI_APEI
+ bool "ACPI Platform Error Interface (APEI)"
+ select MISC_FILESYSTEMS
+ select PSTORE
+ select UEFI_CPER
+ depends on HAVE_ACPI_APEI
+ help
+ APEI allows to report errors (for example from the chipset)
+ to the operating system. This improves NMI handling
+ especially. In addition it supports error serialization and
+ error injection.
+
+config ACPI_APEI_GHES
+ bool "APEI Generic Hardware Error Source"
+ depends on ACPI_APEI
+ select ACPI_HED
+ select IRQ_WORK
+ select GENERIC_ALLOCATOR
+ help
+ Generic Hardware Error Source provides a way to report
+ platform hardware errors (such as that from chipset). It
+ works in so called "Firmware First" mode, that is, hardware
+ errors are reported to firmware firstly, then reported to
+ Linux by firmware. This way, some non-standard hardware
+ error registers or non-standard hardware link can be checked
+ by firmware to produce more valuable hardware error
+ information for Linux.
+
+config ACPI_APEI_PCIEAER
+ bool "APEI PCIe AER logging/recovering support"
+ depends on ACPI_APEI && PCIEAER
+ help
+ PCIe AER errors may be reported via APEI firmware first mode.
+ Turn on this option to enable the corresponding support.
+
+config ACPI_APEI_MEMORY_FAILURE
+ bool "APEI memory error recovering support"
+ depends on ACPI_APEI && MEMORY_FAILURE
+ help
+ Memory errors may be reported via APEI firmware first mode.
+ Turn on this option to enable the memory recovering support.
+
+config ACPI_APEI_EINJ
+ tristate "APEI Error INJection (EINJ)"
+ depends on ACPI_APEI && DEBUG_FS
+ help
+ EINJ provides a hardware error injection mechanism, it is
+ mainly used for debugging and testing the other parts of
+ APEI and some other RAS features.
+
+config ACPI_APEI_ERST_DEBUG
+ tristate "APEI Error Record Serialization Table (ERST) Debug Support"
+ depends on ACPI_APEI
+ help
+ ERST is a way provided by APEI to save and retrieve hardware
+ error information to and from a persistent store. Enable this
+ if you want to debugging and testing the ERST kernel support
+ and firmware implementation.
diff --git a/drivers/acpi/apei/Makefile b/drivers/acpi/apei/Makefile
new file mode 100644
index 000000000..5d575a955
--- /dev/null
+++ b/drivers/acpi/apei/Makefile
@@ -0,0 +1,6 @@
+obj-$(CONFIG_ACPI_APEI) += apei.o
+obj-$(CONFIG_ACPI_APEI_GHES) += ghes.o
+obj-$(CONFIG_ACPI_APEI_EINJ) += einj.o
+obj-$(CONFIG_ACPI_APEI_ERST_DEBUG) += erst-dbg.o
+
+apei-y := apei-base.o hest.o erst.o
diff --git a/drivers/acpi/apei/apei-base.c b/drivers/acpi/apei/apei-base.c
new file mode 100644
index 000000000..a85ac07f3
--- /dev/null
+++ b/drivers/acpi/apei/apei-base.c
@@ -0,0 +1,805 @@
+/*
+ * apei-base.c - ACPI Platform Error Interface (APEI) supporting
+ * infrastructure
+ *
+ * APEI allows to report errors (for example from the chipset) to the
+ * the operating system. This improves NMI handling especially. In
+ * addition it supports error serialization and error injection.
+ *
+ * For more information about APEI, please refer to ACPI Specification
+ * version 4.0, chapter 17.
+ *
+ * This file has Common functions used by more than one APEI table,
+ * including framework of interpreter for ERST and EINJ; resource
+ * management for APEI registers.
+ *
+ * Copyright (C) 2009, Intel Corp.
+ * Author: Huang Ying <ying.huang@intel.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License version
+ * 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/acpi.h>
+#include <linux/slab.h>
+#include <linux/io.h>
+#include <linux/kref.h>
+#include <linux/rculist.h>
+#include <linux/interrupt.h>
+#include <linux/debugfs.h>
+#include <asm/unaligned.h>
+
+#include "apei-internal.h"
+
+#define APEI_PFX "APEI: "
+
+/*
+ * APEI ERST (Error Record Serialization Table) and EINJ (Error
+ * INJection) interpreter framework.
+ */
+
+#define APEI_EXEC_PRESERVE_REGISTER 0x1
+
+void apei_exec_ctx_init(struct apei_exec_context *ctx,
+ struct apei_exec_ins_type *ins_table,
+ u32 instructions,
+ struct acpi_whea_header *action_table,
+ u32 entries)
+{
+ ctx->ins_table = ins_table;
+ ctx->instructions = instructions;
+ ctx->action_table = action_table;
+ ctx->entries = entries;
+}
+EXPORT_SYMBOL_GPL(apei_exec_ctx_init);
+
+int __apei_exec_read_register(struct acpi_whea_header *entry, u64 *val)
+{
+ int rc;
+
+ rc = apei_read(val, &entry->register_region);
+ if (rc)
+ return rc;
+ *val >>= entry->register_region.bit_offset;
+ *val &= entry->mask;
+
+ return 0;
+}
+
+int apei_exec_read_register(struct apei_exec_context *ctx,
+ struct acpi_whea_header *entry)
+{
+ int rc;
+ u64 val = 0;
+
+ rc = __apei_exec_read_register(entry, &val);
+ if (rc)
+ return rc;
+ ctx->value = val;
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(apei_exec_read_register);
+
+int apei_exec_read_register_value(struct apei_exec_context *ctx,
+ struct acpi_whea_header *entry)
+{
+ int rc;
+
+ rc = apei_exec_read_register(ctx, entry);
+ if (rc)
+ return rc;
+ ctx->value = (ctx->value == entry->value);
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(apei_exec_read_register_value);
+
+int __apei_exec_write_register(struct acpi_whea_header *entry, u64 val)
+{
+ int rc;
+
+ val &= entry->mask;
+ val <<= entry->register_region.bit_offset;
+ if (entry->flags & APEI_EXEC_PRESERVE_REGISTER) {
+ u64 valr = 0;
+ rc = apei_read(&valr, &entry->register_region);
+ if (rc)
+ return rc;
+ valr &= ~(entry->mask << entry->register_region.bit_offset);
+ val |= valr;
+ }
+ rc = apei_write(val, &entry->register_region);
+
+ return rc;
+}
+
+int apei_exec_write_register(struct apei_exec_context *ctx,
+ struct acpi_whea_header *entry)
+{
+ return __apei_exec_write_register(entry, ctx->value);
+}
+EXPORT_SYMBOL_GPL(apei_exec_write_register);
+
+int apei_exec_write_register_value(struct apei_exec_context *ctx,
+ struct acpi_whea_header *entry)
+{
+ int rc;
+
+ ctx->value = entry->value;
+ rc = apei_exec_write_register(ctx, entry);
+
+ return rc;
+}
+EXPORT_SYMBOL_GPL(apei_exec_write_register_value);
+
+int apei_exec_noop(struct apei_exec_context *ctx,
+ struct acpi_whea_header *entry)
+{
+ return 0;
+}
+EXPORT_SYMBOL_GPL(apei_exec_noop);
+
+/*
+ * Interpret the specified action. Go through whole action table,
+ * execute all instructions belong to the action.
+ */
+int __apei_exec_run(struct apei_exec_context *ctx, u8 action,
+ bool optional)
+{
+ int rc = -ENOENT;
+ u32 i, ip;
+ struct acpi_whea_header *entry;
+ apei_exec_ins_func_t run;
+
+ ctx->ip = 0;
+
+ /*
+ * "ip" is the instruction pointer of current instruction,
+ * "ctx->ip" specifies the next instruction to executed,
+ * instruction "run" function may change the "ctx->ip" to
+ * implement "goto" semantics.
+ */
+rewind:
+ ip = 0;
+ for (i = 0; i < ctx->entries; i++) {
+ entry = &ctx->action_table[i];
+ if (entry->action != action)
+ continue;
+ if (ip == ctx->ip) {
+ if (entry->instruction >= ctx->instructions ||
+ !ctx->ins_table[entry->instruction].run) {
+ pr_warning(FW_WARN APEI_PFX
+ "Invalid action table, unknown instruction type: %d\n",
+ entry->instruction);
+ return -EINVAL;
+ }
+ run = ctx->ins_table[entry->instruction].run;
+ rc = run(ctx, entry);
+ if (rc < 0)
+ return rc;
+ else if (rc != APEI_EXEC_SET_IP)
+ ctx->ip++;
+ }
+ ip++;
+ if (ctx->ip < ip)
+ goto rewind;
+ }
+
+ return !optional && rc < 0 ? rc : 0;
+}
+EXPORT_SYMBOL_GPL(__apei_exec_run);
+
+typedef int (*apei_exec_entry_func_t)(struct apei_exec_context *ctx,
+ struct acpi_whea_header *entry,
+ void *data);
+
+static int apei_exec_for_each_entry(struct apei_exec_context *ctx,
+ apei_exec_entry_func_t func,
+ void *data,
+ int *end)
+{
+ u8 ins;
+ int i, rc;
+ struct acpi_whea_header *entry;
+ struct apei_exec_ins_type *ins_table = ctx->ins_table;
+
+ for (i = 0; i < ctx->entries; i++) {
+ entry = ctx->action_table + i;
+ ins = entry->instruction;
+ if (end)
+ *end = i;
+ if (ins >= ctx->instructions || !ins_table[ins].run) {
+ pr_warning(FW_WARN APEI_PFX
+ "Invalid action table, unknown instruction type: %d\n",
+ ins);
+ return -EINVAL;
+ }
+ rc = func(ctx, entry, data);
+ if (rc)
+ return rc;
+ }
+
+ return 0;
+}
+
+static int pre_map_gar_callback(struct apei_exec_context *ctx,
+ struct acpi_whea_header *entry,
+ void *data)
+{
+ u8 ins = entry->instruction;
+
+ if (ctx->ins_table[ins].flags & APEI_EXEC_INS_ACCESS_REGISTER)
+ return apei_map_generic_address(&entry->register_region);
+
+ return 0;
+}
+
+/*
+ * Pre-map all GARs in action table to make it possible to access them
+ * in NMI handler.
+ */
+int apei_exec_pre_map_gars(struct apei_exec_context *ctx)
+{
+ int rc, end;
+
+ rc = apei_exec_for_each_entry(ctx, pre_map_gar_callback,
+ NULL, &end);
+ if (rc) {
+ struct apei_exec_context ctx_unmap;
+ memcpy(&ctx_unmap, ctx, sizeof(*ctx));
+ ctx_unmap.entries = end;
+ apei_exec_post_unmap_gars(&ctx_unmap);
+ }
+
+ return rc;
+}
+EXPORT_SYMBOL_GPL(apei_exec_pre_map_gars);
+
+static int post_unmap_gar_callback(struct apei_exec_context *ctx,
+ struct acpi_whea_header *entry,
+ void *data)
+{
+ u8 ins = entry->instruction;
+
+ if (ctx->ins_table[ins].flags & APEI_EXEC_INS_ACCESS_REGISTER)
+ apei_unmap_generic_address(&entry->register_region);
+
+ return 0;
+}
+
+/* Post-unmap all GAR in action table. */
+int apei_exec_post_unmap_gars(struct apei_exec_context *ctx)
+{
+ return apei_exec_for_each_entry(ctx, post_unmap_gar_callback,
+ NULL, NULL);
+}
+EXPORT_SYMBOL_GPL(apei_exec_post_unmap_gars);
+
+/*
+ * Resource management for GARs in APEI
+ */
+struct apei_res {
+ struct list_head list;
+ unsigned long start;
+ unsigned long end;
+};
+
+/* Collect all resources requested, to avoid conflict */
+struct apei_resources apei_resources_all = {
+ .iomem = LIST_HEAD_INIT(apei_resources_all.iomem),
+ .ioport = LIST_HEAD_INIT(apei_resources_all.ioport),
+};
+
+static int apei_res_add(struct list_head *res_list,
+ unsigned long start, unsigned long size)
+{
+ struct apei_res *res, *resn, *res_ins = NULL;
+ unsigned long end = start + size;
+
+ if (end <= start)
+ return 0;
+repeat:
+ list_for_each_entry_safe(res, resn, res_list, list) {
+ if (res->start > end || res->end < start)
+ continue;
+ else if (end <= res->end && start >= res->start) {
+ kfree(res_ins);
+ return 0;
+ }
+ list_del(&res->list);
+ res->start = start = min(res->start, start);
+ res->end = end = max(res->end, end);
+ kfree(res_ins);
+ res_ins = res;
+ goto repeat;
+ }
+
+ if (res_ins)
+ list_add(&res_ins->list, res_list);
+ else {
+ res_ins = kmalloc(sizeof(*res), GFP_KERNEL);
+ if (!res_ins)
+ return -ENOMEM;
+ res_ins->start = start;
+ res_ins->end = end;
+ list_add(&res_ins->list, res_list);
+ }
+
+ return 0;
+}
+
+static int apei_res_sub(struct list_head *res_list1,
+ struct list_head *res_list2)
+{
+ struct apei_res *res1, *resn1, *res2, *res;
+ res1 = list_entry(res_list1->next, struct apei_res, list);
+ resn1 = list_entry(res1->list.next, struct apei_res, list);
+ while (&res1->list != res_list1) {
+ list_for_each_entry(res2, res_list2, list) {
+ if (res1->start >= res2->end ||
+ res1->end <= res2->start)
+ continue;
+ else if (res1->end <= res2->end &&
+ res1->start >= res2->start) {
+ list_del(&res1->list);
+ kfree(res1);
+ break;
+ } else if (res1->end > res2->end &&
+ res1->start < res2->start) {
+ res = kmalloc(sizeof(*res), GFP_KERNEL);
+ if (!res)
+ return -ENOMEM;
+ res->start = res2->end;
+ res->end = res1->end;
+ res1->end = res2->start;
+ list_add(&res->list, &res1->list);
+ resn1 = res;
+ } else {
+ if (res1->start < res2->start)
+ res1->end = res2->start;
+ else
+ res1->start = res2->end;
+ }
+ }
+ res1 = resn1;
+ resn1 = list_entry(resn1->list.next, struct apei_res, list);
+ }
+
+ return 0;
+}
+
+static void apei_res_clean(struct list_head *res_list)
+{
+ struct apei_res *res, *resn;
+
+ list_for_each_entry_safe(res, resn, res_list, list) {
+ list_del(&res->list);
+ kfree(res);
+ }
+}
+
+void apei_resources_fini(struct apei_resources *resources)
+{
+ apei_res_clean(&resources->iomem);
+ apei_res_clean(&resources->ioport);
+}
+EXPORT_SYMBOL_GPL(apei_resources_fini);
+
+static int apei_resources_merge(struct apei_resources *resources1,
+ struct apei_resources *resources2)
+{
+ int rc;
+ struct apei_res *res;
+
+ list_for_each_entry(res, &resources2->iomem, list) {
+ rc = apei_res_add(&resources1->iomem, res->start,
+ res->end - res->start);
+ if (rc)
+ return rc;
+ }
+ list_for_each_entry(res, &resources2->ioport, list) {
+ rc = apei_res_add(&resources1->ioport, res->start,
+ res->end - res->start);
+ if (rc)
+ return rc;
+ }
+
+ return 0;
+}
+
+int apei_resources_add(struct apei_resources *resources,
+ unsigned long start, unsigned long size,
+ bool iomem)
+{
+ if (iomem)
+ return apei_res_add(&resources->iomem, start, size);
+ else
+ return apei_res_add(&resources->ioport, start, size);
+}
+EXPORT_SYMBOL_GPL(apei_resources_add);
+
+/*
+ * EINJ has two groups of GARs (EINJ table entry and trigger table
+ * entry), so common resources are subtracted from the trigger table
+ * resources before the second requesting.
+ */
+int apei_resources_sub(struct apei_resources *resources1,
+ struct apei_resources *resources2)
+{
+ int rc;
+
+ rc = apei_res_sub(&resources1->iomem, &resources2->iomem);
+ if (rc)
+ return rc;
+ return apei_res_sub(&resources1->ioport, &resources2->ioport);
+}
+EXPORT_SYMBOL_GPL(apei_resources_sub);
+
+static int apei_get_res_callback(__u64 start, __u64 size, void *data)
+{
+ struct apei_resources *resources = data;
+ return apei_res_add(&resources->iomem, start, size);
+}
+
+static int apei_get_nvs_resources(struct apei_resources *resources)
+{
+ return acpi_nvs_for_each_region(apei_get_res_callback, resources);
+}
+
+int (*arch_apei_filter_addr)(int (*func)(__u64 start, __u64 size,
+ void *data), void *data);
+static int apei_get_arch_resources(struct apei_resources *resources)
+
+{
+ return arch_apei_filter_addr(apei_get_res_callback, resources);
+}
+
+/*
+ * IO memory/port resource management mechanism is used to check
+ * whether memory/port area used by GARs conflicts with normal memory
+ * or IO memory/port of devices.
+ */
+int apei_resources_request(struct apei_resources *resources,
+ const char *desc)
+{
+ struct apei_res *res, *res_bak = NULL;
+ struct resource *r;
+ struct apei_resources nvs_resources, arch_res;
+ int rc;
+
+ rc = apei_resources_sub(resources, &apei_resources_all);
+ if (rc)
+ return rc;
+
+ /*
+ * Some firmware uses ACPI NVS region, that has been marked as
+ * busy, so exclude it from APEI resources to avoid false
+ * conflict.
+ */
+ apei_resources_init(&nvs_resources);
+ rc = apei_get_nvs_resources(&nvs_resources);
+ if (rc)
+ goto nvs_res_fini;
+ rc = apei_resources_sub(resources, &nvs_resources);
+ if (rc)
+ goto nvs_res_fini;
+
+ if (arch_apei_filter_addr) {
+ apei_resources_init(&arch_res);
+ rc = apei_get_arch_resources(&arch_res);
+ if (rc)
+ goto arch_res_fini;
+ rc = apei_resources_sub(resources, &arch_res);
+ if (rc)
+ goto arch_res_fini;
+ }
+
+ rc = -EINVAL;
+ list_for_each_entry(res, &resources->iomem, list) {
+ r = request_mem_region(res->start, res->end - res->start,
+ desc);
+ if (!r) {
+ pr_err(APEI_PFX
+ "Can not request [mem %#010llx-%#010llx] for %s registers\n",
+ (unsigned long long)res->start,
+ (unsigned long long)res->end - 1, desc);
+ res_bak = res;
+ goto err_unmap_iomem;
+ }
+ }
+
+ list_for_each_entry(res, &resources->ioport, list) {
+ r = request_region(res->start, res->end - res->start, desc);
+ if (!r) {
+ pr_err(APEI_PFX
+ "Can not request [io %#06llx-%#06llx] for %s registers\n",
+ (unsigned long long)res->start,
+ (unsigned long long)res->end - 1, desc);
+ res_bak = res;
+ goto err_unmap_ioport;
+ }
+ }
+
+ rc = apei_resources_merge(&apei_resources_all, resources);
+ if (rc) {
+ pr_err(APEI_PFX "Fail to merge resources!\n");
+ goto err_unmap_ioport;
+ }
+
+ return 0;
+err_unmap_ioport:
+ list_for_each_entry(res, &resources->ioport, list) {
+ if (res == res_bak)
+ break;
+ release_region(res->start, res->end - res->start);
+ }
+ res_bak = NULL;
+err_unmap_iomem:
+ list_for_each_entry(res, &resources->iomem, list) {
+ if (res == res_bak)
+ break;
+ release_mem_region(res->start, res->end - res->start);
+ }
+arch_res_fini:
+ apei_resources_fini(&arch_res);
+nvs_res_fini:
+ apei_resources_fini(&nvs_resources);
+ return rc;
+}
+EXPORT_SYMBOL_GPL(apei_resources_request);
+
+void apei_resources_release(struct apei_resources *resources)
+{
+ int rc;
+ struct apei_res *res;
+
+ list_for_each_entry(res, &resources->iomem, list)
+ release_mem_region(res->start, res->end - res->start);
+ list_for_each_entry(res, &resources->ioport, list)
+ release_region(res->start, res->end - res->start);
+
+ rc = apei_resources_sub(&apei_resources_all, resources);
+ if (rc)
+ pr_err(APEI_PFX "Fail to sub resources!\n");
+}
+EXPORT_SYMBOL_GPL(apei_resources_release);
+
+static int apei_check_gar(struct acpi_generic_address *reg, u64 *paddr,
+ u32 *access_bit_width)
+{
+ u32 bit_width, bit_offset, access_size_code, space_id;
+
+ bit_width = reg->bit_width;
+ bit_offset = reg->bit_offset;
+ access_size_code = reg->access_width;
+ space_id = reg->space_id;
+ *paddr = get_unaligned(&reg->address);
+ if (!*paddr) {
+ pr_warning(FW_BUG APEI_PFX
+ "Invalid physical address in GAR [0x%llx/%u/%u/%u/%u]\n",
+ *paddr, bit_width, bit_offset, access_size_code,
+ space_id);
+ return -EINVAL;
+ }
+
+ if (access_size_code < 1 || access_size_code > 4) {
+ pr_warning(FW_BUG APEI_PFX
+ "Invalid access size code in GAR [0x%llx/%u/%u/%u/%u]\n",
+ *paddr, bit_width, bit_offset, access_size_code,
+ space_id);
+ return -EINVAL;
+ }
+ *access_bit_width = 1UL << (access_size_code + 2);
+
+ /* Fixup common BIOS bug */
+ if (bit_width == 32 && bit_offset == 0 && (*paddr & 0x03) == 0 &&
+ *access_bit_width < 32)
+ *access_bit_width = 32;
+ else if (bit_width == 64 && bit_offset == 0 && (*paddr & 0x07) == 0 &&
+ *access_bit_width < 64)
+ *access_bit_width = 64;
+
+ if ((bit_width + bit_offset) > *access_bit_width) {
+ pr_warning(FW_BUG APEI_PFX
+ "Invalid bit width + offset in GAR [0x%llx/%u/%u/%u/%u]\n",
+ *paddr, bit_width, bit_offset, access_size_code,
+ space_id);
+ return -EINVAL;
+ }
+
+ if (space_id != ACPI_ADR_SPACE_SYSTEM_MEMORY &&
+ space_id != ACPI_ADR_SPACE_SYSTEM_IO) {
+ pr_warning(FW_BUG APEI_PFX
+ "Invalid address space type in GAR [0x%llx/%u/%u/%u/%u]\n",
+ *paddr, bit_width, bit_offset, access_size_code,
+ space_id);
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+int apei_map_generic_address(struct acpi_generic_address *reg)
+{
+ int rc;
+ u32 access_bit_width;
+ u64 address;
+
+ rc = apei_check_gar(reg, &address, &access_bit_width);
+ if (rc)
+ return rc;
+ return acpi_os_map_generic_address(reg);
+}
+EXPORT_SYMBOL_GPL(apei_map_generic_address);
+
+/* read GAR in interrupt (including NMI) or process context */
+int apei_read(u64 *val, struct acpi_generic_address *reg)
+{
+ int rc;
+ u32 access_bit_width;
+ u64 address;
+ acpi_status status;
+
+ rc = apei_check_gar(reg, &address, &access_bit_width);
+ if (rc)
+ return rc;
+
+ *val = 0;
+ switch(reg->space_id) {
+ case ACPI_ADR_SPACE_SYSTEM_MEMORY:
+ status = acpi_os_read_memory((acpi_physical_address) address,
+ val, access_bit_width);
+ if (ACPI_FAILURE(status))
+ return -EIO;
+ break;
+ case ACPI_ADR_SPACE_SYSTEM_IO:
+ status = acpi_os_read_port(address, (u32 *)val,
+ access_bit_width);
+ if (ACPI_FAILURE(status))
+ return -EIO;
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(apei_read);
+
+/* write GAR in interrupt (including NMI) or process context */
+int apei_write(u64 val, struct acpi_generic_address *reg)
+{
+ int rc;
+ u32 access_bit_width;
+ u64 address;
+ acpi_status status;
+
+ rc = apei_check_gar(reg, &address, &access_bit_width);
+ if (rc)
+ return rc;
+
+ switch (reg->space_id) {
+ case ACPI_ADR_SPACE_SYSTEM_MEMORY:
+ status = acpi_os_write_memory((acpi_physical_address) address,
+ val, access_bit_width);
+ if (ACPI_FAILURE(status))
+ return -EIO;
+ break;
+ case ACPI_ADR_SPACE_SYSTEM_IO:
+ status = acpi_os_write_port(address, val, access_bit_width);
+ if (ACPI_FAILURE(status))
+ return -EIO;
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(apei_write);
+
+static int collect_res_callback(struct apei_exec_context *ctx,
+ struct acpi_whea_header *entry,
+ void *data)
+{
+ struct apei_resources *resources = data;
+ struct acpi_generic_address *reg = &entry->register_region;
+ u8 ins = entry->instruction;
+ u32 access_bit_width;
+ u64 paddr;
+ int rc;
+
+ if (!(ctx->ins_table[ins].flags & APEI_EXEC_INS_ACCESS_REGISTER))
+ return 0;
+
+ rc = apei_check_gar(reg, &paddr, &access_bit_width);
+ if (rc)
+ return rc;
+
+ switch (reg->space_id) {
+ case ACPI_ADR_SPACE_SYSTEM_MEMORY:
+ return apei_res_add(&resources->iomem, paddr,
+ access_bit_width / 8);
+ case ACPI_ADR_SPACE_SYSTEM_IO:
+ return apei_res_add(&resources->ioport, paddr,
+ access_bit_width / 8);
+ default:
+ return -EINVAL;
+ }
+}
+
+/*
+ * Same register may be used by multiple instructions in GARs, so
+ * resources are collected before requesting.
+ */
+int apei_exec_collect_resources(struct apei_exec_context *ctx,
+ struct apei_resources *resources)
+{
+ return apei_exec_for_each_entry(ctx, collect_res_callback,
+ resources, NULL);
+}
+EXPORT_SYMBOL_GPL(apei_exec_collect_resources);
+
+struct dentry *apei_get_debugfs_dir(void)
+{
+ static struct dentry *dapei;
+
+ if (!dapei)
+ dapei = debugfs_create_dir("apei", NULL);
+
+ return dapei;
+}
+EXPORT_SYMBOL_GPL(apei_get_debugfs_dir);
+
+int __weak arch_apei_enable_cmcff(struct acpi_hest_header *hest_hdr,
+ void *data)
+{
+ return 1;
+}
+EXPORT_SYMBOL_GPL(arch_apei_enable_cmcff);
+
+void __weak arch_apei_report_mem_error(int sev,
+ struct cper_sec_mem_err *mem_err)
+{
+}
+EXPORT_SYMBOL_GPL(arch_apei_report_mem_error);
+
+int apei_osc_setup(void)
+{
+ static u8 whea_uuid_str[] = "ed855e0c-6c90-47bf-a62a-26de0fc5ad5c";
+ acpi_handle handle;
+ u32 capbuf[3];
+ struct acpi_osc_context context = {
+ .uuid_str = whea_uuid_str,
+ .rev = 1,
+ .cap.length = sizeof(capbuf),
+ .cap.pointer = capbuf,
+ };
+
+ capbuf[OSC_QUERY_DWORD] = OSC_QUERY_ENABLE;
+ capbuf[OSC_SUPPORT_DWORD] = 1;
+ capbuf[OSC_CONTROL_DWORD] = 0;
+
+ if (ACPI_FAILURE(acpi_get_handle(NULL, "\\_SB", &handle))
+ || ACPI_FAILURE(acpi_run_osc(handle, &context)))
+ return -EIO;
+ else {
+ kfree(context.ret.pointer);
+ return 0;
+ }
+}
+EXPORT_SYMBOL_GPL(apei_osc_setup);
diff --git a/drivers/acpi/apei/apei-internal.h b/drivers/acpi/apei/apei-internal.h
new file mode 100644
index 000000000..16129c78b
--- /dev/null
+++ b/drivers/acpi/apei/apei-internal.h
@@ -0,0 +1,143 @@
+/*
+ * apei-internal.h - ACPI Platform Error Interface internal
+ * definations.
+ */
+
+#ifndef APEI_INTERNAL_H
+#define APEI_INTERNAL_H
+
+#include <linux/cper.h>
+#include <linux/acpi.h>
+
+struct apei_exec_context;
+
+typedef int (*apei_exec_ins_func_t)(struct apei_exec_context *ctx,
+ struct acpi_whea_header *entry);
+
+#define APEI_EXEC_INS_ACCESS_REGISTER 0x0001
+
+struct apei_exec_ins_type {
+ u32 flags;
+ apei_exec_ins_func_t run;
+};
+
+struct apei_exec_context {
+ u32 ip;
+ u64 value;
+ u64 var1;
+ u64 var2;
+ u64 src_base;
+ u64 dst_base;
+ struct apei_exec_ins_type *ins_table;
+ u32 instructions;
+ struct acpi_whea_header *action_table;
+ u32 entries;
+};
+
+void apei_exec_ctx_init(struct apei_exec_context *ctx,
+ struct apei_exec_ins_type *ins_table,
+ u32 instructions,
+ struct acpi_whea_header *action_table,
+ u32 entries);
+
+static inline void apei_exec_ctx_set_input(struct apei_exec_context *ctx,
+ u64 input)
+{
+ ctx->value = input;
+}
+
+static inline u64 apei_exec_ctx_get_output(struct apei_exec_context *ctx)
+{
+ return ctx->value;
+}
+
+int __apei_exec_run(struct apei_exec_context *ctx, u8 action, bool optional);
+
+static inline int apei_exec_run(struct apei_exec_context *ctx, u8 action)
+{
+ return __apei_exec_run(ctx, action, 0);
+}
+
+/* It is optional whether the firmware provides the action */
+static inline int apei_exec_run_optional(struct apei_exec_context *ctx, u8 action)
+{
+ return __apei_exec_run(ctx, action, 1);
+}
+
+/* Common instruction implementation */
+
+/* IP has been set in instruction function */
+#define APEI_EXEC_SET_IP 1
+
+int apei_map_generic_address(struct acpi_generic_address *reg);
+
+static inline void apei_unmap_generic_address(struct acpi_generic_address *reg)
+{
+ acpi_os_unmap_generic_address(reg);
+}
+
+int apei_read(u64 *val, struct acpi_generic_address *reg);
+int apei_write(u64 val, struct acpi_generic_address *reg);
+
+int __apei_exec_read_register(struct acpi_whea_header *entry, u64 *val);
+int __apei_exec_write_register(struct acpi_whea_header *entry, u64 val);
+int apei_exec_read_register(struct apei_exec_context *ctx,
+ struct acpi_whea_header *entry);
+int apei_exec_read_register_value(struct apei_exec_context *ctx,
+ struct acpi_whea_header *entry);
+int apei_exec_write_register(struct apei_exec_context *ctx,
+ struct acpi_whea_header *entry);
+int apei_exec_write_register_value(struct apei_exec_context *ctx,
+ struct acpi_whea_header *entry);
+int apei_exec_noop(struct apei_exec_context *ctx,
+ struct acpi_whea_header *entry);
+int apei_exec_pre_map_gars(struct apei_exec_context *ctx);
+int apei_exec_post_unmap_gars(struct apei_exec_context *ctx);
+
+struct apei_resources {
+ struct list_head iomem;
+ struct list_head ioport;
+};
+
+static inline void apei_resources_init(struct apei_resources *resources)
+{
+ INIT_LIST_HEAD(&resources->iomem);
+ INIT_LIST_HEAD(&resources->ioport);
+}
+
+void apei_resources_fini(struct apei_resources *resources);
+int apei_resources_add(struct apei_resources *resources,
+ unsigned long start, unsigned long size,
+ bool iomem);
+int apei_resources_sub(struct apei_resources *resources1,
+ struct apei_resources *resources2);
+int apei_resources_request(struct apei_resources *resources,
+ const char *desc);
+void apei_resources_release(struct apei_resources *resources);
+int apei_exec_collect_resources(struct apei_exec_context *ctx,
+ struct apei_resources *resources);
+
+struct dentry;
+struct dentry *apei_get_debugfs_dir(void);
+
+#define apei_estatus_for_each_section(estatus, section) \
+ for (section = (struct acpi_hest_generic_data *)(estatus + 1); \
+ (void *)section - (void *)estatus < estatus->data_length; \
+ section = (void *)(section+1) + section->error_data_length)
+
+static inline u32 cper_estatus_len(struct acpi_hest_generic_status *estatus)
+{
+ if (estatus->raw_data_length)
+ return estatus->raw_data_offset + \
+ estatus->raw_data_length;
+ else
+ return sizeof(*estatus) + estatus->data_length;
+}
+
+void cper_estatus_print(const char *pfx,
+ const struct acpi_hest_generic_status *estatus);
+int cper_estatus_check_header(const struct acpi_hest_generic_status *estatus);
+int cper_estatus_check(const struct acpi_hest_generic_status *estatus);
+
+int apei_osc_setup(void);
+#endif
diff --git a/drivers/acpi/apei/einj.c b/drivers/acpi/apei/einj.c
new file mode 100644
index 000000000..a095d4f85
--- /dev/null
+++ b/drivers/acpi/apei/einj.c
@@ -0,0 +1,833 @@
+/*
+ * APEI Error INJection support
+ *
+ * EINJ provides a hardware error injection mechanism, this is useful
+ * for debugging and testing of other APEI and RAS features.
+ *
+ * For more information about EINJ, please refer to ACPI Specification
+ * version 4.0, section 17.5.
+ *
+ * Copyright 2009-2010 Intel Corp.
+ * Author: Huang Ying <ying.huang@intel.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License version
+ * 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/io.h>
+#include <linux/debugfs.h>
+#include <linux/seq_file.h>
+#include <linux/nmi.h>
+#include <linux/delay.h>
+#include <linux/mm.h>
+#include <asm/unaligned.h>
+
+#include "apei-internal.h"
+
+#define EINJ_PFX "EINJ: "
+
+#define SPIN_UNIT 100 /* 100ns */
+/* Firmware should respond within 1 milliseconds */
+#define FIRMWARE_TIMEOUT (1 * NSEC_PER_MSEC)
+#define ACPI5_VENDOR_BIT BIT(31)
+#define MEM_ERROR_MASK (ACPI_EINJ_MEMORY_CORRECTABLE | \
+ ACPI_EINJ_MEMORY_UNCORRECTABLE | \
+ ACPI_EINJ_MEMORY_FATAL)
+
+/*
+ * ACPI version 5 provides a SET_ERROR_TYPE_WITH_ADDRESS action.
+ */
+static int acpi5;
+
+struct set_error_type_with_address {
+ u32 type;
+ u32 vendor_extension;
+ u32 flags;
+ u32 apicid;
+ u64 memory_address;
+ u64 memory_address_range;
+ u32 pcie_sbdf;
+};
+enum {
+ SETWA_FLAGS_APICID = 1,
+ SETWA_FLAGS_MEM = 2,
+ SETWA_FLAGS_PCIE_SBDF = 4,
+};
+
+/*
+ * Vendor extensions for platform specific operations
+ */
+struct vendor_error_type_extension {
+ u32 length;
+ u32 pcie_sbdf;
+ u16 vendor_id;
+ u16 device_id;
+ u8 rev_id;
+ u8 reserved[3];
+};
+
+static u32 notrigger;
+
+static u32 vendor_flags;
+static struct debugfs_blob_wrapper vendor_blob;
+static char vendor_dev[64];
+
+/*
+ * Some BIOSes allow parameters to the SET_ERROR_TYPE entries in the
+ * EINJ table through an unpublished extension. Use with caution as
+ * most will ignore the parameter and make their own choice of address
+ * for error injection. This extension is used only if
+ * param_extension module parameter is specified.
+ */
+struct einj_parameter {
+ u64 type;
+ u64 reserved1;
+ u64 reserved2;
+ u64 param1;
+ u64 param2;
+};
+
+#define EINJ_OP_BUSY 0x1
+#define EINJ_STATUS_SUCCESS 0x0
+#define EINJ_STATUS_FAIL 0x1
+#define EINJ_STATUS_INVAL 0x2
+
+#define EINJ_TAB_ENTRY(tab) \
+ ((struct acpi_whea_header *)((char *)(tab) + \
+ sizeof(struct acpi_table_einj)))
+
+static bool param_extension;
+module_param(param_extension, bool, 0);
+
+static struct acpi_table_einj *einj_tab;
+
+static struct apei_resources einj_resources;
+
+static struct apei_exec_ins_type einj_ins_type[] = {
+ [ACPI_EINJ_READ_REGISTER] = {
+ .flags = APEI_EXEC_INS_ACCESS_REGISTER,
+ .run = apei_exec_read_register,
+ },
+ [ACPI_EINJ_READ_REGISTER_VALUE] = {
+ .flags = APEI_EXEC_INS_ACCESS_REGISTER,
+ .run = apei_exec_read_register_value,
+ },
+ [ACPI_EINJ_WRITE_REGISTER] = {
+ .flags = APEI_EXEC_INS_ACCESS_REGISTER,
+ .run = apei_exec_write_register,
+ },
+ [ACPI_EINJ_WRITE_REGISTER_VALUE] = {
+ .flags = APEI_EXEC_INS_ACCESS_REGISTER,
+ .run = apei_exec_write_register_value,
+ },
+ [ACPI_EINJ_NOOP] = {
+ .flags = 0,
+ .run = apei_exec_noop,
+ },
+};
+
+/*
+ * Prevent EINJ interpreter to run simultaneously, because the
+ * corresponding firmware implementation may not work properly when
+ * invoked simultaneously.
+ */
+static DEFINE_MUTEX(einj_mutex);
+
+static void *einj_param;
+
+static void einj_exec_ctx_init(struct apei_exec_context *ctx)
+{
+ apei_exec_ctx_init(ctx, einj_ins_type, ARRAY_SIZE(einj_ins_type),
+ EINJ_TAB_ENTRY(einj_tab), einj_tab->entries);
+}
+
+static int __einj_get_available_error_type(u32 *type)
+{
+ struct apei_exec_context ctx;
+ int rc;
+
+ einj_exec_ctx_init(&ctx);
+ rc = apei_exec_run(&ctx, ACPI_EINJ_GET_ERROR_TYPE);
+ if (rc)
+ return rc;
+ *type = apei_exec_ctx_get_output(&ctx);
+
+ return 0;
+}
+
+/* Get error injection capabilities of the platform */
+static int einj_get_available_error_type(u32 *type)
+{
+ int rc;
+
+ mutex_lock(&einj_mutex);
+ rc = __einj_get_available_error_type(type);
+ mutex_unlock(&einj_mutex);
+
+ return rc;
+}
+
+static int einj_timedout(u64 *t)
+{
+ if ((s64)*t < SPIN_UNIT) {
+ pr_warning(FW_WARN EINJ_PFX
+ "Firmware does not respond in time\n");
+ return 1;
+ }
+ *t -= SPIN_UNIT;
+ ndelay(SPIN_UNIT);
+ touch_nmi_watchdog();
+ return 0;
+}
+
+static void check_vendor_extension(u64 paddr,
+ struct set_error_type_with_address *v5param)
+{
+ int offset = v5param->vendor_extension;
+ struct vendor_error_type_extension *v;
+ u32 sbdf;
+
+ if (!offset)
+ return;
+ v = acpi_os_map_iomem(paddr + offset, sizeof(*v));
+ if (!v)
+ return;
+ sbdf = v->pcie_sbdf;
+ sprintf(vendor_dev, "%x:%x:%x.%x vendor_id=%x device_id=%x rev_id=%x\n",
+ sbdf >> 24, (sbdf >> 16) & 0xff,
+ (sbdf >> 11) & 0x1f, (sbdf >> 8) & 0x7,
+ v->vendor_id, v->device_id, v->rev_id);
+ acpi_os_unmap_iomem(v, sizeof(*v));
+}
+
+static void *einj_get_parameter_address(void)
+{
+ int i;
+ u64 pa_v4 = 0, pa_v5 = 0;
+ struct acpi_whea_header *entry;
+
+ entry = EINJ_TAB_ENTRY(einj_tab);
+ for (i = 0; i < einj_tab->entries; i++) {
+ if (entry->action == ACPI_EINJ_SET_ERROR_TYPE &&
+ entry->instruction == ACPI_EINJ_WRITE_REGISTER &&
+ entry->register_region.space_id ==
+ ACPI_ADR_SPACE_SYSTEM_MEMORY)
+ pa_v4 = get_unaligned(&entry->register_region.address);
+ if (entry->action == ACPI_EINJ_SET_ERROR_TYPE_WITH_ADDRESS &&
+ entry->instruction == ACPI_EINJ_WRITE_REGISTER &&
+ entry->register_region.space_id ==
+ ACPI_ADR_SPACE_SYSTEM_MEMORY)
+ pa_v5 = get_unaligned(&entry->register_region.address);
+ entry++;
+ }
+ if (pa_v5) {
+ struct set_error_type_with_address *v5param;
+
+ v5param = acpi_os_map_iomem(pa_v5, sizeof(*v5param));
+ if (v5param) {
+ acpi5 = 1;
+ check_vendor_extension(pa_v5, v5param);
+ return v5param;
+ }
+ }
+ if (param_extension && pa_v4) {
+ struct einj_parameter *v4param;
+
+ v4param = acpi_os_map_iomem(pa_v4, sizeof(*v4param));
+ if (!v4param)
+ return NULL;
+ if (v4param->reserved1 || v4param->reserved2) {
+ acpi_os_unmap_iomem(v4param, sizeof(*v4param));
+ return NULL;
+ }
+ return v4param;
+ }
+
+ return NULL;
+}
+
+/* do sanity check to trigger table */
+static int einj_check_trigger_header(struct acpi_einj_trigger *trigger_tab)
+{
+ if (trigger_tab->header_size != sizeof(struct acpi_einj_trigger))
+ return -EINVAL;
+ if (trigger_tab->table_size > PAGE_SIZE ||
+ trigger_tab->table_size < trigger_tab->header_size)
+ return -EINVAL;
+ if (trigger_tab->entry_count !=
+ (trigger_tab->table_size - trigger_tab->header_size) /
+ sizeof(struct acpi_einj_entry))
+ return -EINVAL;
+
+ return 0;
+}
+
+static struct acpi_generic_address *einj_get_trigger_parameter_region(
+ struct acpi_einj_trigger *trigger_tab, u64 param1, u64 param2)
+{
+ int i;
+ struct acpi_whea_header *entry;
+
+ entry = (struct acpi_whea_header *)
+ ((char *)trigger_tab + sizeof(struct acpi_einj_trigger));
+ for (i = 0; i < trigger_tab->entry_count; i++) {
+ if (entry->action == ACPI_EINJ_TRIGGER_ERROR &&
+ entry->instruction == ACPI_EINJ_WRITE_REGISTER_VALUE &&
+ entry->register_region.space_id ==
+ ACPI_ADR_SPACE_SYSTEM_MEMORY &&
+ (entry->register_region.address & param2) == (param1 & param2))
+ return &entry->register_region;
+ entry++;
+ }
+
+ return NULL;
+}
+/* Execute instructions in trigger error action table */
+static int __einj_error_trigger(u64 trigger_paddr, u32 type,
+ u64 param1, u64 param2)
+{
+ struct acpi_einj_trigger *trigger_tab = NULL;
+ struct apei_exec_context trigger_ctx;
+ struct apei_resources trigger_resources;
+ struct acpi_whea_header *trigger_entry;
+ struct resource *r;
+ u32 table_size;
+ int rc = -EIO;
+ struct acpi_generic_address *trigger_param_region = NULL;
+
+ r = request_mem_region(trigger_paddr, sizeof(*trigger_tab),
+ "APEI EINJ Trigger Table");
+ if (!r) {
+ pr_err(EINJ_PFX
+ "Can not request [mem %#010llx-%#010llx] for Trigger table\n",
+ (unsigned long long)trigger_paddr,
+ (unsigned long long)trigger_paddr +
+ sizeof(*trigger_tab) - 1);
+ goto out;
+ }
+ trigger_tab = ioremap_cache(trigger_paddr, sizeof(*trigger_tab));
+ if (!trigger_tab) {
+ pr_err(EINJ_PFX "Failed to map trigger table!\n");
+ goto out_rel_header;
+ }
+ rc = einj_check_trigger_header(trigger_tab);
+ if (rc) {
+ pr_warning(FW_BUG EINJ_PFX
+ "The trigger error action table is invalid\n");
+ goto out_rel_header;
+ }
+
+ /* No action structures in the TRIGGER_ERROR table, nothing to do */
+ if (!trigger_tab->entry_count)
+ goto out_rel_header;
+
+ rc = -EIO;
+ table_size = trigger_tab->table_size;
+ r = request_mem_region(trigger_paddr + sizeof(*trigger_tab),
+ table_size - sizeof(*trigger_tab),
+ "APEI EINJ Trigger Table");
+ if (!r) {
+ pr_err(EINJ_PFX
+"Can not request [mem %#010llx-%#010llx] for Trigger Table Entry\n",
+ (unsigned long long)trigger_paddr + sizeof(*trigger_tab),
+ (unsigned long long)trigger_paddr + table_size - 1);
+ goto out_rel_header;
+ }
+ iounmap(trigger_tab);
+ trigger_tab = ioremap_cache(trigger_paddr, table_size);
+ if (!trigger_tab) {
+ pr_err(EINJ_PFX "Failed to map trigger table!\n");
+ goto out_rel_entry;
+ }
+ trigger_entry = (struct acpi_whea_header *)
+ ((char *)trigger_tab + sizeof(struct acpi_einj_trigger));
+ apei_resources_init(&trigger_resources);
+ apei_exec_ctx_init(&trigger_ctx, einj_ins_type,
+ ARRAY_SIZE(einj_ins_type),
+ trigger_entry, trigger_tab->entry_count);
+ rc = apei_exec_collect_resources(&trigger_ctx, &trigger_resources);
+ if (rc)
+ goto out_fini;
+ rc = apei_resources_sub(&trigger_resources, &einj_resources);
+ if (rc)
+ goto out_fini;
+ /*
+ * Some firmware will access target address specified in
+ * param1 to trigger the error when injecting memory error.
+ * This will cause resource conflict with regular memory. So
+ * remove it from trigger table resources.
+ */
+ if ((param_extension || acpi5) && (type & MEM_ERROR_MASK) && param2) {
+ struct apei_resources addr_resources;
+ apei_resources_init(&addr_resources);
+ trigger_param_region = einj_get_trigger_parameter_region(
+ trigger_tab, param1, param2);
+ if (trigger_param_region) {
+ rc = apei_resources_add(&addr_resources,
+ trigger_param_region->address,
+ trigger_param_region->bit_width/8, true);
+ if (rc)
+ goto out_fini;
+ rc = apei_resources_sub(&trigger_resources,
+ &addr_resources);
+ }
+ apei_resources_fini(&addr_resources);
+ if (rc)
+ goto out_fini;
+ }
+ rc = apei_resources_request(&trigger_resources, "APEI EINJ Trigger");
+ if (rc)
+ goto out_fini;
+ rc = apei_exec_pre_map_gars(&trigger_ctx);
+ if (rc)
+ goto out_release;
+
+ rc = apei_exec_run(&trigger_ctx, ACPI_EINJ_TRIGGER_ERROR);
+
+ apei_exec_post_unmap_gars(&trigger_ctx);
+out_release:
+ apei_resources_release(&trigger_resources);
+out_fini:
+ apei_resources_fini(&trigger_resources);
+out_rel_entry:
+ release_mem_region(trigger_paddr + sizeof(*trigger_tab),
+ table_size - sizeof(*trigger_tab));
+out_rel_header:
+ release_mem_region(trigger_paddr, sizeof(*trigger_tab));
+out:
+ if (trigger_tab)
+ iounmap(trigger_tab);
+
+ return rc;
+}
+
+static int __einj_error_inject(u32 type, u32 flags, u64 param1, u64 param2,
+ u64 param3, u64 param4)
+{
+ struct apei_exec_context ctx;
+ u64 val, trigger_paddr, timeout = FIRMWARE_TIMEOUT;
+ int rc;
+
+ einj_exec_ctx_init(&ctx);
+
+ rc = apei_exec_run_optional(&ctx, ACPI_EINJ_BEGIN_OPERATION);
+ if (rc)
+ return rc;
+ apei_exec_ctx_set_input(&ctx, type);
+ if (acpi5) {
+ struct set_error_type_with_address *v5param = einj_param;
+
+ v5param->type = type;
+ if (type & ACPI5_VENDOR_BIT) {
+ switch (vendor_flags) {
+ case SETWA_FLAGS_APICID:
+ v5param->apicid = param1;
+ break;
+ case SETWA_FLAGS_MEM:
+ v5param->memory_address = param1;
+ v5param->memory_address_range = param2;
+ break;
+ case SETWA_FLAGS_PCIE_SBDF:
+ v5param->pcie_sbdf = param1;
+ break;
+ }
+ v5param->flags = vendor_flags;
+ } else if (flags) {
+ v5param->flags = flags;
+ v5param->memory_address = param1;
+ v5param->memory_address_range = param2;
+ v5param->apicid = param3;
+ v5param->pcie_sbdf = param4;
+ } else {
+ switch (type) {
+ case ACPI_EINJ_PROCESSOR_CORRECTABLE:
+ case ACPI_EINJ_PROCESSOR_UNCORRECTABLE:
+ case ACPI_EINJ_PROCESSOR_FATAL:
+ v5param->apicid = param1;
+ v5param->flags = SETWA_FLAGS_APICID;
+ break;
+ case ACPI_EINJ_MEMORY_CORRECTABLE:
+ case ACPI_EINJ_MEMORY_UNCORRECTABLE:
+ case ACPI_EINJ_MEMORY_FATAL:
+ v5param->memory_address = param1;
+ v5param->memory_address_range = param2;
+ v5param->flags = SETWA_FLAGS_MEM;
+ break;
+ case ACPI_EINJ_PCIX_CORRECTABLE:
+ case ACPI_EINJ_PCIX_UNCORRECTABLE:
+ case ACPI_EINJ_PCIX_FATAL:
+ v5param->pcie_sbdf = param1;
+ v5param->flags = SETWA_FLAGS_PCIE_SBDF;
+ break;
+ }
+ }
+ } else {
+ rc = apei_exec_run(&ctx, ACPI_EINJ_SET_ERROR_TYPE);
+ if (rc)
+ return rc;
+ if (einj_param) {
+ struct einj_parameter *v4param = einj_param;
+ v4param->param1 = param1;
+ v4param->param2 = param2;
+ }
+ }
+ rc = apei_exec_run(&ctx, ACPI_EINJ_EXECUTE_OPERATION);
+ if (rc)
+ return rc;
+ for (;;) {
+ rc = apei_exec_run(&ctx, ACPI_EINJ_CHECK_BUSY_STATUS);
+ if (rc)
+ return rc;
+ val = apei_exec_ctx_get_output(&ctx);
+ if (!(val & EINJ_OP_BUSY))
+ break;
+ if (einj_timedout(&timeout))
+ return -EIO;
+ }
+ rc = apei_exec_run(&ctx, ACPI_EINJ_GET_COMMAND_STATUS);
+ if (rc)
+ return rc;
+ val = apei_exec_ctx_get_output(&ctx);
+ if (val != EINJ_STATUS_SUCCESS)
+ return -EBUSY;
+
+ rc = apei_exec_run(&ctx, ACPI_EINJ_GET_TRIGGER_TABLE);
+ if (rc)
+ return rc;
+ trigger_paddr = apei_exec_ctx_get_output(&ctx);
+ if (notrigger == 0) {
+ rc = __einj_error_trigger(trigger_paddr, type, param1, param2);
+ if (rc)
+ return rc;
+ }
+ rc = apei_exec_run_optional(&ctx, ACPI_EINJ_END_OPERATION);
+
+ return rc;
+}
+
+/* Inject the specified hardware error */
+static int einj_error_inject(u32 type, u32 flags, u64 param1, u64 param2,
+ u64 param3, u64 param4)
+{
+ int rc;
+ unsigned long pfn;
+
+ /* If user manually set "flags", make sure it is legal */
+ if (flags && (flags &
+ ~(SETWA_FLAGS_APICID|SETWA_FLAGS_MEM|SETWA_FLAGS_PCIE_SBDF)))
+ return -EINVAL;
+
+ /*
+ * We need extra sanity checks for memory errors.
+ * Other types leap directly to injection.
+ */
+
+ /* ensure param1/param2 existed */
+ if (!(param_extension || acpi5))
+ goto inject;
+
+ /* ensure injection is memory related */
+ if (type & ACPI5_VENDOR_BIT) {
+ if (vendor_flags != SETWA_FLAGS_MEM)
+ goto inject;
+ } else if (!(type & MEM_ERROR_MASK) && !(flags & SETWA_FLAGS_MEM))
+ goto inject;
+
+ /*
+ * Disallow crazy address masks that give BIOS leeway to pick
+ * injection address almost anywhere. Insist on page or
+ * better granularity and that target address is normal RAM.
+ */
+ pfn = PFN_DOWN(param1 & param2);
+ if (!page_is_ram(pfn) || ((param2 & PAGE_MASK) != PAGE_MASK))
+ return -EINVAL;
+
+inject:
+ mutex_lock(&einj_mutex);
+ rc = __einj_error_inject(type, flags, param1, param2, param3, param4);
+ mutex_unlock(&einj_mutex);
+
+ return rc;
+}
+
+static u32 error_type;
+static u32 error_flags;
+static u64 error_param1;
+static u64 error_param2;
+static u64 error_param3;
+static u64 error_param4;
+static struct dentry *einj_debug_dir;
+
+static int available_error_type_show(struct seq_file *m, void *v)
+{
+ int rc;
+ u32 available_error_type = 0;
+
+ rc = einj_get_available_error_type(&available_error_type);
+ if (rc)
+ return rc;
+ if (available_error_type & 0x0001)
+ seq_printf(m, "0x00000001\tProcessor Correctable\n");
+ if (available_error_type & 0x0002)
+ seq_printf(m, "0x00000002\tProcessor Uncorrectable non-fatal\n");
+ if (available_error_type & 0x0004)
+ seq_printf(m, "0x00000004\tProcessor Uncorrectable fatal\n");
+ if (available_error_type & 0x0008)
+ seq_printf(m, "0x00000008\tMemory Correctable\n");
+ if (available_error_type & 0x0010)
+ seq_printf(m, "0x00000010\tMemory Uncorrectable non-fatal\n");
+ if (available_error_type & 0x0020)
+ seq_printf(m, "0x00000020\tMemory Uncorrectable fatal\n");
+ if (available_error_type & 0x0040)
+ seq_printf(m, "0x00000040\tPCI Express Correctable\n");
+ if (available_error_type & 0x0080)
+ seq_printf(m, "0x00000080\tPCI Express Uncorrectable non-fatal\n");
+ if (available_error_type & 0x0100)
+ seq_printf(m, "0x00000100\tPCI Express Uncorrectable fatal\n");
+ if (available_error_type & 0x0200)
+ seq_printf(m, "0x00000200\tPlatform Correctable\n");
+ if (available_error_type & 0x0400)
+ seq_printf(m, "0x00000400\tPlatform Uncorrectable non-fatal\n");
+ if (available_error_type & 0x0800)
+ seq_printf(m, "0x00000800\tPlatform Uncorrectable fatal\n");
+
+ return 0;
+}
+
+static int available_error_type_open(struct inode *inode, struct file *file)
+{
+ return single_open(file, available_error_type_show, NULL);
+}
+
+static const struct file_operations available_error_type_fops = {
+ .open = available_error_type_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = single_release,
+};
+
+static int error_type_get(void *data, u64 *val)
+{
+ *val = error_type;
+
+ return 0;
+}
+
+static int error_type_set(void *data, u64 val)
+{
+ int rc;
+ u32 available_error_type = 0;
+ u32 tval, vendor;
+
+ /*
+ * Vendor defined types have 0x80000000 bit set, and
+ * are not enumerated by ACPI_EINJ_GET_ERROR_TYPE
+ */
+ vendor = val & ACPI5_VENDOR_BIT;
+ tval = val & 0x7fffffff;
+
+ /* Only one error type can be specified */
+ if (tval & (tval - 1))
+ return -EINVAL;
+ if (!vendor) {
+ rc = einj_get_available_error_type(&available_error_type);
+ if (rc)
+ return rc;
+ if (!(val & available_error_type))
+ return -EINVAL;
+ }
+ error_type = val;
+
+ return 0;
+}
+
+DEFINE_SIMPLE_ATTRIBUTE(error_type_fops, error_type_get,
+ error_type_set, "0x%llx\n");
+
+static int error_inject_set(void *data, u64 val)
+{
+ if (!error_type)
+ return -EINVAL;
+
+ return einj_error_inject(error_type, error_flags, error_param1, error_param2,
+ error_param3, error_param4);
+}
+
+DEFINE_SIMPLE_ATTRIBUTE(error_inject_fops, NULL,
+ error_inject_set, "%llu\n");
+
+static int einj_check_table(struct acpi_table_einj *einj_tab)
+{
+ if ((einj_tab->header_length !=
+ (sizeof(struct acpi_table_einj) - sizeof(einj_tab->header)))
+ && (einj_tab->header_length != sizeof(struct acpi_table_einj)))
+ return -EINVAL;
+ if (einj_tab->header.length < sizeof(struct acpi_table_einj))
+ return -EINVAL;
+ if (einj_tab->entries !=
+ (einj_tab->header.length - sizeof(struct acpi_table_einj)) /
+ sizeof(struct acpi_einj_entry))
+ return -EINVAL;
+
+ return 0;
+}
+
+static int __init einj_init(void)
+{
+ int rc;
+ acpi_status status;
+ struct dentry *fentry;
+ struct apei_exec_context ctx;
+
+ if (acpi_disabled)
+ return -ENODEV;
+
+ status = acpi_get_table(ACPI_SIG_EINJ, 0,
+ (struct acpi_table_header **)&einj_tab);
+ if (status == AE_NOT_FOUND)
+ return -ENODEV;
+ else if (ACPI_FAILURE(status)) {
+ const char *msg = acpi_format_exception(status);
+ pr_err(EINJ_PFX "Failed to get table, %s\n", msg);
+ return -EINVAL;
+ }
+
+ rc = einj_check_table(einj_tab);
+ if (rc) {
+ pr_warning(FW_BUG EINJ_PFX "EINJ table is invalid\n");
+ return -EINVAL;
+ }
+
+ rc = -ENOMEM;
+ einj_debug_dir = debugfs_create_dir("einj", apei_get_debugfs_dir());
+ if (!einj_debug_dir)
+ goto err_cleanup;
+ fentry = debugfs_create_file("available_error_type", S_IRUSR,
+ einj_debug_dir, NULL,
+ &available_error_type_fops);
+ if (!fentry)
+ goto err_cleanup;
+ fentry = debugfs_create_file("error_type", S_IRUSR | S_IWUSR,
+ einj_debug_dir, NULL, &error_type_fops);
+ if (!fentry)
+ goto err_cleanup;
+ fentry = debugfs_create_file("error_inject", S_IWUSR,
+ einj_debug_dir, NULL, &error_inject_fops);
+ if (!fentry)
+ goto err_cleanup;
+
+ apei_resources_init(&einj_resources);
+ einj_exec_ctx_init(&ctx);
+ rc = apei_exec_collect_resources(&ctx, &einj_resources);
+ if (rc)
+ goto err_fini;
+ rc = apei_resources_request(&einj_resources, "APEI EINJ");
+ if (rc)
+ goto err_fini;
+ rc = apei_exec_pre_map_gars(&ctx);
+ if (rc)
+ goto err_release;
+
+ rc = -ENOMEM;
+ einj_param = einj_get_parameter_address();
+ if ((param_extension || acpi5) && einj_param) {
+ fentry = debugfs_create_x32("flags", S_IRUSR | S_IWUSR,
+ einj_debug_dir, &error_flags);
+ if (!fentry)
+ goto err_unmap;
+ fentry = debugfs_create_x64("param1", S_IRUSR | S_IWUSR,
+ einj_debug_dir, &error_param1);
+ if (!fentry)
+ goto err_unmap;
+ fentry = debugfs_create_x64("param2", S_IRUSR | S_IWUSR,
+ einj_debug_dir, &error_param2);
+ if (!fentry)
+ goto err_unmap;
+ fentry = debugfs_create_x64("param3", S_IRUSR | S_IWUSR,
+ einj_debug_dir, &error_param3);
+ if (!fentry)
+ goto err_unmap;
+ fentry = debugfs_create_x64("param4", S_IRUSR | S_IWUSR,
+ einj_debug_dir, &error_param4);
+ if (!fentry)
+ goto err_unmap;
+
+ fentry = debugfs_create_x32("notrigger", S_IRUSR | S_IWUSR,
+ einj_debug_dir, &notrigger);
+ if (!fentry)
+ goto err_unmap;
+ }
+
+ if (vendor_dev[0]) {
+ vendor_blob.data = vendor_dev;
+ vendor_blob.size = strlen(vendor_dev);
+ fentry = debugfs_create_blob("vendor", S_IRUSR,
+ einj_debug_dir, &vendor_blob);
+ if (!fentry)
+ goto err_unmap;
+ fentry = debugfs_create_x32("vendor_flags", S_IRUSR | S_IWUSR,
+ einj_debug_dir, &vendor_flags);
+ if (!fentry)
+ goto err_unmap;
+ }
+
+ pr_info(EINJ_PFX "Error INJection is initialized.\n");
+
+ return 0;
+
+err_unmap:
+ if (einj_param) {
+ acpi_size size = (acpi5) ?
+ sizeof(struct set_error_type_with_address) :
+ sizeof(struct einj_parameter);
+
+ acpi_os_unmap_iomem(einj_param, size);
+ }
+ apei_exec_post_unmap_gars(&ctx);
+err_release:
+ apei_resources_release(&einj_resources);
+err_fini:
+ apei_resources_fini(&einj_resources);
+err_cleanup:
+ debugfs_remove_recursive(einj_debug_dir);
+
+ return rc;
+}
+
+static void __exit einj_exit(void)
+{
+ struct apei_exec_context ctx;
+
+ if (einj_param) {
+ acpi_size size = (acpi5) ?
+ sizeof(struct set_error_type_with_address) :
+ sizeof(struct einj_parameter);
+
+ acpi_os_unmap_iomem(einj_param, size);
+ }
+ einj_exec_ctx_init(&ctx);
+ apei_exec_post_unmap_gars(&ctx);
+ apei_resources_release(&einj_resources);
+ apei_resources_fini(&einj_resources);
+ debugfs_remove_recursive(einj_debug_dir);
+}
+
+module_init(einj_init);
+module_exit(einj_exit);
+
+MODULE_AUTHOR("Huang Ying");
+MODULE_DESCRIPTION("APEI Error INJection support");
+MODULE_LICENSE("GPL");
diff --git a/drivers/acpi/apei/erst-dbg.c b/drivers/acpi/apei/erst-dbg.c
new file mode 100644
index 000000000..04ab5c9d3
--- /dev/null
+++ b/drivers/acpi/apei/erst-dbg.c
@@ -0,0 +1,243 @@
+/*
+ * APEI Error Record Serialization Table debug support
+ *
+ * ERST is a way provided by APEI to save and retrieve hardware error
+ * information to and from a persistent store. This file provide the
+ * debugging/testing support for ERST kernel support and firmware
+ * implementation.
+ *
+ * Copyright 2010 Intel Corp.
+ * Author: Huang Ying <ying.huang@intel.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License version
+ * 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/uaccess.h>
+#include <acpi/apei.h>
+#include <linux/miscdevice.h>
+
+#include "apei-internal.h"
+
+#define ERST_DBG_PFX "ERST DBG: "
+
+#define ERST_DBG_RECORD_LEN_MAX 0x4000
+
+static void *erst_dbg_buf;
+static unsigned int erst_dbg_buf_len;
+
+/* Prevent erst_dbg_read/write from being invoked concurrently */
+static DEFINE_MUTEX(erst_dbg_mutex);
+
+static int erst_dbg_open(struct inode *inode, struct file *file)
+{
+ int rc, *pos;
+
+ if (erst_disable)
+ return -ENODEV;
+
+ pos = (int *)&file->private_data;
+
+ rc = erst_get_record_id_begin(pos);
+ if (rc)
+ return rc;
+
+ return nonseekable_open(inode, file);
+}
+
+static int erst_dbg_release(struct inode *inode, struct file *file)
+{
+ erst_get_record_id_end();
+
+ return 0;
+}
+
+static long erst_dbg_ioctl(struct file *f, unsigned int cmd, unsigned long arg)
+{
+ int rc;
+ u64 record_id;
+ u32 record_count;
+
+ switch (cmd) {
+ case APEI_ERST_CLEAR_RECORD:
+ rc = copy_from_user(&record_id, (void __user *)arg,
+ sizeof(record_id));
+ if (rc)
+ return -EFAULT;
+ return erst_clear(record_id);
+ case APEI_ERST_GET_RECORD_COUNT:
+ rc = erst_get_record_count();
+ if (rc < 0)
+ return rc;
+ record_count = rc;
+ rc = put_user(record_count, (u32 __user *)arg);
+ if (rc)
+ return rc;
+ return 0;
+ default:
+ return -ENOTTY;
+ }
+}
+
+static ssize_t erst_dbg_read(struct file *filp, char __user *ubuf,
+ size_t usize, loff_t *off)
+{
+ int rc, *pos;
+ ssize_t len = 0;
+ u64 id;
+
+ if (*off)
+ return -EINVAL;
+
+ if (mutex_lock_interruptible(&erst_dbg_mutex) != 0)
+ return -EINTR;
+
+ pos = (int *)&filp->private_data;
+
+retry_next:
+ rc = erst_get_record_id_next(pos, &id);
+ if (rc)
+ goto out;
+ /* no more record */
+ if (id == APEI_ERST_INVALID_RECORD_ID) {
+ /*
+ * If the persistent store is empty initially, the function
+ * 'erst_read' below will return "-ENOENT" value. This causes
+ * 'retry_next' label is entered again. The returned value
+ * should be zero indicating the read operation is EOF.
+ */
+ len = 0;
+
+ goto out;
+ }
+retry:
+ rc = len = erst_read(id, erst_dbg_buf, erst_dbg_buf_len);
+ /* The record may be cleared by others, try read next record */
+ if (rc == -ENOENT)
+ goto retry_next;
+ if (rc < 0)
+ goto out;
+ if (len > ERST_DBG_RECORD_LEN_MAX) {
+ pr_warning(ERST_DBG_PFX
+ "Record (ID: 0x%llx) length is too long: %zd\n",
+ id, len);
+ rc = -EIO;
+ goto out;
+ }
+ if (len > erst_dbg_buf_len) {
+ void *p;
+ rc = -ENOMEM;
+ p = kmalloc(len, GFP_KERNEL);
+ if (!p)
+ goto out;
+ kfree(erst_dbg_buf);
+ erst_dbg_buf = p;
+ erst_dbg_buf_len = len;
+ goto retry;
+ }
+
+ rc = -EINVAL;
+ if (len > usize)
+ goto out;
+
+ rc = -EFAULT;
+ if (copy_to_user(ubuf, erst_dbg_buf, len))
+ goto out;
+ rc = 0;
+out:
+ mutex_unlock(&erst_dbg_mutex);
+ return rc ? rc : len;
+}
+
+static ssize_t erst_dbg_write(struct file *filp, const char __user *ubuf,
+ size_t usize, loff_t *off)
+{
+ int rc;
+ struct cper_record_header *rcd;
+
+ if (!capable(CAP_SYS_ADMIN))
+ return -EPERM;
+
+ if (usize > ERST_DBG_RECORD_LEN_MAX) {
+ pr_err(ERST_DBG_PFX "Too long record to be written\n");
+ return -EINVAL;
+ }
+
+ if (mutex_lock_interruptible(&erst_dbg_mutex))
+ return -EINTR;
+ if (usize > erst_dbg_buf_len) {
+ void *p;
+ rc = -ENOMEM;
+ p = kmalloc(usize, GFP_KERNEL);
+ if (!p)
+ goto out;
+ kfree(erst_dbg_buf);
+ erst_dbg_buf = p;
+ erst_dbg_buf_len = usize;
+ }
+ rc = copy_from_user(erst_dbg_buf, ubuf, usize);
+ if (rc) {
+ rc = -EFAULT;
+ goto out;
+ }
+ rcd = erst_dbg_buf;
+ rc = -EINVAL;
+ if (rcd->record_length != usize)
+ goto out;
+
+ rc = erst_write(erst_dbg_buf);
+
+out:
+ mutex_unlock(&erst_dbg_mutex);
+ return rc < 0 ? rc : usize;
+}
+
+static const struct file_operations erst_dbg_ops = {
+ .owner = THIS_MODULE,
+ .open = erst_dbg_open,
+ .release = erst_dbg_release,
+ .read = erst_dbg_read,
+ .write = erst_dbg_write,
+ .unlocked_ioctl = erst_dbg_ioctl,
+ .llseek = no_llseek,
+};
+
+static struct miscdevice erst_dbg_dev = {
+ .minor = MISC_DYNAMIC_MINOR,
+ .name = "erst_dbg",
+ .fops = &erst_dbg_ops,
+};
+
+static __init int erst_dbg_init(void)
+{
+ if (erst_disable) {
+ pr_info(ERST_DBG_PFX "ERST support is disabled.\n");
+ return -ENODEV;
+ }
+ return misc_register(&erst_dbg_dev);
+}
+
+static __exit void erst_dbg_exit(void)
+{
+ misc_deregister(&erst_dbg_dev);
+ kfree(erst_dbg_buf);
+}
+
+module_init(erst_dbg_init);
+module_exit(erst_dbg_exit);
+
+MODULE_AUTHOR("Huang Ying");
+MODULE_DESCRIPTION("APEI Error Record Serialization Table debug support");
+MODULE_LICENSE("GPL");
diff --git a/drivers/acpi/apei/erst.c b/drivers/acpi/apei/erst.c
new file mode 100644
index 000000000..ed65e9c4b
--- /dev/null
+++ b/drivers/acpi/apei/erst.c
@@ -0,0 +1,1230 @@
+/*
+ * APEI Error Record Serialization Table support
+ *
+ * ERST is a way provided by APEI to save and retrieve hardware error
+ * information to and from a persistent store.
+ *
+ * For more information about ERST, please refer to ACPI Specification
+ * version 4.0, section 17.4.
+ *
+ * Copyright 2010 Intel Corp.
+ * Author: Huang Ying <ying.huang@intel.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License version
+ * 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/delay.h>
+#include <linux/io.h>
+#include <linux/acpi.h>
+#include <linux/uaccess.h>
+#include <linux/cper.h>
+#include <linux/nmi.h>
+#include <linux/hardirq.h>
+#include <linux/pstore.h>
+#include <acpi/apei.h>
+
+#include "apei-internal.h"
+
+#undef pr_fmt
+#define pr_fmt(fmt) "ERST: " fmt
+
+/* ERST command status */
+#define ERST_STATUS_SUCCESS 0x0
+#define ERST_STATUS_NOT_ENOUGH_SPACE 0x1
+#define ERST_STATUS_HARDWARE_NOT_AVAILABLE 0x2
+#define ERST_STATUS_FAILED 0x3
+#define ERST_STATUS_RECORD_STORE_EMPTY 0x4
+#define ERST_STATUS_RECORD_NOT_FOUND 0x5
+
+#define ERST_TAB_ENTRY(tab) \
+ ((struct acpi_whea_header *)((char *)(tab) + \
+ sizeof(struct acpi_table_erst)))
+
+#define SPIN_UNIT 100 /* 100ns */
+/* Firmware should respond within 1 milliseconds */
+#define FIRMWARE_TIMEOUT (1 * NSEC_PER_MSEC)
+#define FIRMWARE_MAX_STALL 50 /* 50us */
+
+int erst_disable;
+EXPORT_SYMBOL_GPL(erst_disable);
+
+static struct acpi_table_erst *erst_tab;
+
+/* ERST Error Log Address Range atrributes */
+#define ERST_RANGE_RESERVED 0x0001
+#define ERST_RANGE_NVRAM 0x0002
+#define ERST_RANGE_SLOW 0x0004
+
+/*
+ * ERST Error Log Address Range, used as buffer for reading/writing
+ * error records.
+ */
+static struct erst_erange {
+ u64 base;
+ u64 size;
+ void __iomem *vaddr;
+ u32 attr;
+} erst_erange;
+
+/*
+ * Prevent ERST interpreter to run simultaneously, because the
+ * corresponding firmware implementation may not work properly when
+ * invoked simultaneously.
+ *
+ * It is used to provide exclusive accessing for ERST Error Log
+ * Address Range too.
+ */
+static DEFINE_RAW_SPINLOCK(erst_lock);
+
+static inline int erst_errno(int command_status)
+{
+ switch (command_status) {
+ case ERST_STATUS_SUCCESS:
+ return 0;
+ case ERST_STATUS_HARDWARE_NOT_AVAILABLE:
+ return -ENODEV;
+ case ERST_STATUS_NOT_ENOUGH_SPACE:
+ return -ENOSPC;
+ case ERST_STATUS_RECORD_STORE_EMPTY:
+ case ERST_STATUS_RECORD_NOT_FOUND:
+ return -ENOENT;
+ default:
+ return -EINVAL;
+ }
+}
+
+static int erst_timedout(u64 *t, u64 spin_unit)
+{
+ if ((s64)*t < spin_unit) {
+ pr_warn(FW_WARN "Firmware does not respond in time.\n");
+ return 1;
+ }
+ *t -= spin_unit;
+ ndelay(spin_unit);
+ touch_nmi_watchdog();
+ return 0;
+}
+
+static int erst_exec_load_var1(struct apei_exec_context *ctx,
+ struct acpi_whea_header *entry)
+{
+ return __apei_exec_read_register(entry, &ctx->var1);
+}
+
+static int erst_exec_load_var2(struct apei_exec_context *ctx,
+ struct acpi_whea_header *entry)
+{
+ return __apei_exec_read_register(entry, &ctx->var2);
+}
+
+static int erst_exec_store_var1(struct apei_exec_context *ctx,
+ struct acpi_whea_header *entry)
+{
+ return __apei_exec_write_register(entry, ctx->var1);
+}
+
+static int erst_exec_add(struct apei_exec_context *ctx,
+ struct acpi_whea_header *entry)
+{
+ ctx->var1 += ctx->var2;
+ return 0;
+}
+
+static int erst_exec_subtract(struct apei_exec_context *ctx,
+ struct acpi_whea_header *entry)
+{
+ ctx->var1 -= ctx->var2;
+ return 0;
+}
+
+static int erst_exec_add_value(struct apei_exec_context *ctx,
+ struct acpi_whea_header *entry)
+{
+ int rc;
+ u64 val;
+
+ rc = __apei_exec_read_register(entry, &val);
+ if (rc)
+ return rc;
+ val += ctx->value;
+ rc = __apei_exec_write_register(entry, val);
+ return rc;
+}
+
+static int erst_exec_subtract_value(struct apei_exec_context *ctx,
+ struct acpi_whea_header *entry)
+{
+ int rc;
+ u64 val;
+
+ rc = __apei_exec_read_register(entry, &val);
+ if (rc)
+ return rc;
+ val -= ctx->value;
+ rc = __apei_exec_write_register(entry, val);
+ return rc;
+}
+
+static int erst_exec_stall(struct apei_exec_context *ctx,
+ struct acpi_whea_header *entry)
+{
+ u64 stall_time;
+
+ if (ctx->value > FIRMWARE_MAX_STALL) {
+ if (!in_nmi())
+ pr_warn(FW_WARN
+ "Too long stall time for stall instruction: 0x%llx.\n",
+ ctx->value);
+ stall_time = FIRMWARE_MAX_STALL;
+ } else
+ stall_time = ctx->value;
+ udelay(stall_time);
+ return 0;
+}
+
+static int erst_exec_stall_while_true(struct apei_exec_context *ctx,
+ struct acpi_whea_header *entry)
+{
+ int rc;
+ u64 val;
+ u64 timeout = FIRMWARE_TIMEOUT;
+ u64 stall_time;
+
+ if (ctx->var1 > FIRMWARE_MAX_STALL) {
+ if (!in_nmi())
+ pr_warn(FW_WARN
+ "Too long stall time for stall while true instruction: 0x%llx.\n",
+ ctx->var1);
+ stall_time = FIRMWARE_MAX_STALL;
+ } else
+ stall_time = ctx->var1;
+
+ for (;;) {
+ rc = __apei_exec_read_register(entry, &val);
+ if (rc)
+ return rc;
+ if (val != ctx->value)
+ break;
+ if (erst_timedout(&timeout, stall_time * NSEC_PER_USEC))
+ return -EIO;
+ }
+ return 0;
+}
+
+static int erst_exec_skip_next_instruction_if_true(
+ struct apei_exec_context *ctx,
+ struct acpi_whea_header *entry)
+{
+ int rc;
+ u64 val;
+
+ rc = __apei_exec_read_register(entry, &val);
+ if (rc)
+ return rc;
+ if (val == ctx->value) {
+ ctx->ip += 2;
+ return APEI_EXEC_SET_IP;
+ }
+
+ return 0;
+}
+
+static int erst_exec_goto(struct apei_exec_context *ctx,
+ struct acpi_whea_header *entry)
+{
+ ctx->ip = ctx->value;
+ return APEI_EXEC_SET_IP;
+}
+
+static int erst_exec_set_src_address_base(struct apei_exec_context *ctx,
+ struct acpi_whea_header *entry)
+{
+ return __apei_exec_read_register(entry, &ctx->src_base);
+}
+
+static int erst_exec_set_dst_address_base(struct apei_exec_context *ctx,
+ struct acpi_whea_header *entry)
+{
+ return __apei_exec_read_register(entry, &ctx->dst_base);
+}
+
+static int erst_exec_move_data(struct apei_exec_context *ctx,
+ struct acpi_whea_header *entry)
+{
+ int rc;
+ u64 offset;
+ void *src, *dst;
+
+ /* ioremap does not work in interrupt context */
+ if (in_interrupt()) {
+ pr_warn("MOVE_DATA can not be used in interrupt context.\n");
+ return -EBUSY;
+ }
+
+ rc = __apei_exec_read_register(entry, &offset);
+ if (rc)
+ return rc;
+
+ src = ioremap(ctx->src_base + offset, ctx->var2);
+ if (!src)
+ return -ENOMEM;
+ dst = ioremap(ctx->dst_base + offset, ctx->var2);
+ if (!dst) {
+ iounmap(src);
+ return -ENOMEM;
+ }
+
+ memmove(dst, src, ctx->var2);
+
+ iounmap(src);
+ iounmap(dst);
+
+ return 0;
+}
+
+static struct apei_exec_ins_type erst_ins_type[] = {
+ [ACPI_ERST_READ_REGISTER] = {
+ .flags = APEI_EXEC_INS_ACCESS_REGISTER,
+ .run = apei_exec_read_register,
+ },
+ [ACPI_ERST_READ_REGISTER_VALUE] = {
+ .flags = APEI_EXEC_INS_ACCESS_REGISTER,
+ .run = apei_exec_read_register_value,
+ },
+ [ACPI_ERST_WRITE_REGISTER] = {
+ .flags = APEI_EXEC_INS_ACCESS_REGISTER,
+ .run = apei_exec_write_register,
+ },
+ [ACPI_ERST_WRITE_REGISTER_VALUE] = {
+ .flags = APEI_EXEC_INS_ACCESS_REGISTER,
+ .run = apei_exec_write_register_value,
+ },
+ [ACPI_ERST_NOOP] = {
+ .flags = 0,
+ .run = apei_exec_noop,
+ },
+ [ACPI_ERST_LOAD_VAR1] = {
+ .flags = APEI_EXEC_INS_ACCESS_REGISTER,
+ .run = erst_exec_load_var1,
+ },
+ [ACPI_ERST_LOAD_VAR2] = {
+ .flags = APEI_EXEC_INS_ACCESS_REGISTER,
+ .run = erst_exec_load_var2,
+ },
+ [ACPI_ERST_STORE_VAR1] = {
+ .flags = APEI_EXEC_INS_ACCESS_REGISTER,
+ .run = erst_exec_store_var1,
+ },
+ [ACPI_ERST_ADD] = {
+ .flags = 0,
+ .run = erst_exec_add,
+ },
+ [ACPI_ERST_SUBTRACT] = {
+ .flags = 0,
+ .run = erst_exec_subtract,
+ },
+ [ACPI_ERST_ADD_VALUE] = {
+ .flags = APEI_EXEC_INS_ACCESS_REGISTER,
+ .run = erst_exec_add_value,
+ },
+ [ACPI_ERST_SUBTRACT_VALUE] = {
+ .flags = APEI_EXEC_INS_ACCESS_REGISTER,
+ .run = erst_exec_subtract_value,
+ },
+ [ACPI_ERST_STALL] = {
+ .flags = 0,
+ .run = erst_exec_stall,
+ },
+ [ACPI_ERST_STALL_WHILE_TRUE] = {
+ .flags = APEI_EXEC_INS_ACCESS_REGISTER,
+ .run = erst_exec_stall_while_true,
+ },
+ [ACPI_ERST_SKIP_NEXT_IF_TRUE] = {
+ .flags = APEI_EXEC_INS_ACCESS_REGISTER,
+ .run = erst_exec_skip_next_instruction_if_true,
+ },
+ [ACPI_ERST_GOTO] = {
+ .flags = 0,
+ .run = erst_exec_goto,
+ },
+ [ACPI_ERST_SET_SRC_ADDRESS_BASE] = {
+ .flags = APEI_EXEC_INS_ACCESS_REGISTER,
+ .run = erst_exec_set_src_address_base,
+ },
+ [ACPI_ERST_SET_DST_ADDRESS_BASE] = {
+ .flags = APEI_EXEC_INS_ACCESS_REGISTER,
+ .run = erst_exec_set_dst_address_base,
+ },
+ [ACPI_ERST_MOVE_DATA] = {
+ .flags = APEI_EXEC_INS_ACCESS_REGISTER,
+ .run = erst_exec_move_data,
+ },
+};
+
+static inline void erst_exec_ctx_init(struct apei_exec_context *ctx)
+{
+ apei_exec_ctx_init(ctx, erst_ins_type, ARRAY_SIZE(erst_ins_type),
+ ERST_TAB_ENTRY(erst_tab), erst_tab->entries);
+}
+
+static int erst_get_erange(struct erst_erange *range)
+{
+ struct apei_exec_context ctx;
+ int rc;
+
+ erst_exec_ctx_init(&ctx);
+ rc = apei_exec_run(&ctx, ACPI_ERST_GET_ERROR_RANGE);
+ if (rc)
+ return rc;
+ range->base = apei_exec_ctx_get_output(&ctx);
+ rc = apei_exec_run(&ctx, ACPI_ERST_GET_ERROR_LENGTH);
+ if (rc)
+ return rc;
+ range->size = apei_exec_ctx_get_output(&ctx);
+ rc = apei_exec_run(&ctx, ACPI_ERST_GET_ERROR_ATTRIBUTES);
+ if (rc)
+ return rc;
+ range->attr = apei_exec_ctx_get_output(&ctx);
+
+ return 0;
+}
+
+static ssize_t __erst_get_record_count(void)
+{
+ struct apei_exec_context ctx;
+ int rc;
+
+ erst_exec_ctx_init(&ctx);
+ rc = apei_exec_run(&ctx, ACPI_ERST_GET_RECORD_COUNT);
+ if (rc)
+ return rc;
+ return apei_exec_ctx_get_output(&ctx);
+}
+
+ssize_t erst_get_record_count(void)
+{
+ ssize_t count;
+ unsigned long flags;
+
+ if (erst_disable)
+ return -ENODEV;
+
+ raw_spin_lock_irqsave(&erst_lock, flags);
+ count = __erst_get_record_count();
+ raw_spin_unlock_irqrestore(&erst_lock, flags);
+
+ return count;
+}
+EXPORT_SYMBOL_GPL(erst_get_record_count);
+
+#define ERST_RECORD_ID_CACHE_SIZE_MIN 16
+#define ERST_RECORD_ID_CACHE_SIZE_MAX 1024
+
+struct erst_record_id_cache {
+ struct mutex lock;
+ u64 *entries;
+ int len;
+ int size;
+ int refcount;
+};
+
+static struct erst_record_id_cache erst_record_id_cache = {
+ .lock = __MUTEX_INITIALIZER(erst_record_id_cache.lock),
+ .refcount = 0,
+};
+
+static int __erst_get_next_record_id(u64 *record_id)
+{
+ struct apei_exec_context ctx;
+ int rc;
+
+ erst_exec_ctx_init(&ctx);
+ rc = apei_exec_run(&ctx, ACPI_ERST_GET_RECORD_ID);
+ if (rc)
+ return rc;
+ *record_id = apei_exec_ctx_get_output(&ctx);
+
+ return 0;
+}
+
+int erst_get_record_id_begin(int *pos)
+{
+ int rc;
+
+ if (erst_disable)
+ return -ENODEV;
+
+ rc = mutex_lock_interruptible(&erst_record_id_cache.lock);
+ if (rc)
+ return rc;
+ erst_record_id_cache.refcount++;
+ mutex_unlock(&erst_record_id_cache.lock);
+
+ *pos = 0;
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(erst_get_record_id_begin);
+
+/* erst_record_id_cache.lock must be held by caller */
+static int __erst_record_id_cache_add_one(void)
+{
+ u64 id, prev_id, first_id;
+ int i, rc;
+ u64 *entries;
+ unsigned long flags;
+
+ id = prev_id = first_id = APEI_ERST_INVALID_RECORD_ID;
+retry:
+ raw_spin_lock_irqsave(&erst_lock, flags);
+ rc = __erst_get_next_record_id(&id);
+ raw_spin_unlock_irqrestore(&erst_lock, flags);
+ if (rc == -ENOENT)
+ return 0;
+ if (rc)
+ return rc;
+ if (id == APEI_ERST_INVALID_RECORD_ID)
+ return 0;
+ /* can not skip current ID, or loop back to first ID */
+ if (id == prev_id || id == first_id)
+ return 0;
+ if (first_id == APEI_ERST_INVALID_RECORD_ID)
+ first_id = id;
+ prev_id = id;
+
+ entries = erst_record_id_cache.entries;
+ for (i = 0; i < erst_record_id_cache.len; i++) {
+ if (entries[i] == id)
+ break;
+ }
+ /* record id already in cache, try next */
+ if (i < erst_record_id_cache.len)
+ goto retry;
+ if (erst_record_id_cache.len >= erst_record_id_cache.size) {
+ int new_size, alloc_size;
+ u64 *new_entries;
+
+ new_size = erst_record_id_cache.size * 2;
+ new_size = clamp_val(new_size, ERST_RECORD_ID_CACHE_SIZE_MIN,
+ ERST_RECORD_ID_CACHE_SIZE_MAX);
+ if (new_size <= erst_record_id_cache.size) {
+ if (printk_ratelimit())
+ pr_warn(FW_WARN "too many record IDs!\n");
+ return 0;
+ }
+ alloc_size = new_size * sizeof(entries[0]);
+ if (alloc_size < PAGE_SIZE)
+ new_entries = kmalloc(alloc_size, GFP_KERNEL);
+ else
+ new_entries = vmalloc(alloc_size);
+ if (!new_entries)
+ return -ENOMEM;
+ memcpy(new_entries, entries,
+ erst_record_id_cache.len * sizeof(entries[0]));
+ if (erst_record_id_cache.size < PAGE_SIZE)
+ kfree(entries);
+ else
+ vfree(entries);
+ erst_record_id_cache.entries = entries = new_entries;
+ erst_record_id_cache.size = new_size;
+ }
+ entries[i] = id;
+ erst_record_id_cache.len++;
+
+ return 1;
+}
+
+/*
+ * Get the record ID of an existing error record on the persistent
+ * storage. If there is no error record on the persistent storage, the
+ * returned record_id is APEI_ERST_INVALID_RECORD_ID.
+ */
+int erst_get_record_id_next(int *pos, u64 *record_id)
+{
+ int rc = 0;
+ u64 *entries;
+
+ if (erst_disable)
+ return -ENODEV;
+
+ /* must be enclosed by erst_get_record_id_begin/end */
+ BUG_ON(!erst_record_id_cache.refcount);
+ BUG_ON(*pos < 0 || *pos > erst_record_id_cache.len);
+
+ mutex_lock(&erst_record_id_cache.lock);
+ entries = erst_record_id_cache.entries;
+ for (; *pos < erst_record_id_cache.len; (*pos)++)
+ if (entries[*pos] != APEI_ERST_INVALID_RECORD_ID)
+ break;
+ /* found next record id in cache */
+ if (*pos < erst_record_id_cache.len) {
+ *record_id = entries[*pos];
+ (*pos)++;
+ goto out_unlock;
+ }
+
+ /* Try to add one more record ID to cache */
+ rc = __erst_record_id_cache_add_one();
+ if (rc < 0)
+ goto out_unlock;
+ /* successfully add one new ID */
+ if (rc == 1) {
+ *record_id = erst_record_id_cache.entries[*pos];
+ (*pos)++;
+ rc = 0;
+ } else {
+ *pos = -1;
+ *record_id = APEI_ERST_INVALID_RECORD_ID;
+ }
+out_unlock:
+ mutex_unlock(&erst_record_id_cache.lock);
+
+ return rc;
+}
+EXPORT_SYMBOL_GPL(erst_get_record_id_next);
+
+/* erst_record_id_cache.lock must be held by caller */
+static void __erst_record_id_cache_compact(void)
+{
+ int i, wpos = 0;
+ u64 *entries;
+
+ if (erst_record_id_cache.refcount)
+ return;
+
+ entries = erst_record_id_cache.entries;
+ for (i = 0; i < erst_record_id_cache.len; i++) {
+ if (entries[i] == APEI_ERST_INVALID_RECORD_ID)
+ continue;
+ if (wpos != i)
+ entries[wpos] = entries[i];
+ wpos++;
+ }
+ erst_record_id_cache.len = wpos;
+}
+
+void erst_get_record_id_end(void)
+{
+ /*
+ * erst_disable != 0 should be detected by invoker via the
+ * return value of erst_get_record_id_begin/next, so this
+ * function should not be called for erst_disable != 0.
+ */
+ BUG_ON(erst_disable);
+
+ mutex_lock(&erst_record_id_cache.lock);
+ erst_record_id_cache.refcount--;
+ BUG_ON(erst_record_id_cache.refcount < 0);
+ __erst_record_id_cache_compact();
+ mutex_unlock(&erst_record_id_cache.lock);
+}
+EXPORT_SYMBOL_GPL(erst_get_record_id_end);
+
+static int __erst_write_to_storage(u64 offset)
+{
+ struct apei_exec_context ctx;
+ u64 timeout = FIRMWARE_TIMEOUT;
+ u64 val;
+ int rc;
+
+ erst_exec_ctx_init(&ctx);
+ rc = apei_exec_run_optional(&ctx, ACPI_ERST_BEGIN_WRITE);
+ if (rc)
+ return rc;
+ apei_exec_ctx_set_input(&ctx, offset);
+ rc = apei_exec_run(&ctx, ACPI_ERST_SET_RECORD_OFFSET);
+ if (rc)
+ return rc;
+ rc = apei_exec_run(&ctx, ACPI_ERST_EXECUTE_OPERATION);
+ if (rc)
+ return rc;
+ for (;;) {
+ rc = apei_exec_run(&ctx, ACPI_ERST_CHECK_BUSY_STATUS);
+ if (rc)
+ return rc;
+ val = apei_exec_ctx_get_output(&ctx);
+ if (!val)
+ break;
+ if (erst_timedout(&timeout, SPIN_UNIT))
+ return -EIO;
+ }
+ rc = apei_exec_run(&ctx, ACPI_ERST_GET_COMMAND_STATUS);
+ if (rc)
+ return rc;
+ val = apei_exec_ctx_get_output(&ctx);
+ rc = apei_exec_run_optional(&ctx, ACPI_ERST_END);
+ if (rc)
+ return rc;
+
+ return erst_errno(val);
+}
+
+static int __erst_read_from_storage(u64 record_id, u64 offset)
+{
+ struct apei_exec_context ctx;
+ u64 timeout = FIRMWARE_TIMEOUT;
+ u64 val;
+ int rc;
+
+ erst_exec_ctx_init(&ctx);
+ rc = apei_exec_run_optional(&ctx, ACPI_ERST_BEGIN_READ);
+ if (rc)
+ return rc;
+ apei_exec_ctx_set_input(&ctx, offset);
+ rc = apei_exec_run(&ctx, ACPI_ERST_SET_RECORD_OFFSET);
+ if (rc)
+ return rc;
+ apei_exec_ctx_set_input(&ctx, record_id);
+ rc = apei_exec_run(&ctx, ACPI_ERST_SET_RECORD_ID);
+ if (rc)
+ return rc;
+ rc = apei_exec_run(&ctx, ACPI_ERST_EXECUTE_OPERATION);
+ if (rc)
+ return rc;
+ for (;;) {
+ rc = apei_exec_run(&ctx, ACPI_ERST_CHECK_BUSY_STATUS);
+ if (rc)
+ return rc;
+ val = apei_exec_ctx_get_output(&ctx);
+ if (!val)
+ break;
+ if (erst_timedout(&timeout, SPIN_UNIT))
+ return -EIO;
+ };
+ rc = apei_exec_run(&ctx, ACPI_ERST_GET_COMMAND_STATUS);
+ if (rc)
+ return rc;
+ val = apei_exec_ctx_get_output(&ctx);
+ rc = apei_exec_run_optional(&ctx, ACPI_ERST_END);
+ if (rc)
+ return rc;
+
+ return erst_errno(val);
+}
+
+static int __erst_clear_from_storage(u64 record_id)
+{
+ struct apei_exec_context ctx;
+ u64 timeout = FIRMWARE_TIMEOUT;
+ u64 val;
+ int rc;
+
+ erst_exec_ctx_init(&ctx);
+ rc = apei_exec_run_optional(&ctx, ACPI_ERST_BEGIN_CLEAR);
+ if (rc)
+ return rc;
+ apei_exec_ctx_set_input(&ctx, record_id);
+ rc = apei_exec_run(&ctx, ACPI_ERST_SET_RECORD_ID);
+ if (rc)
+ return rc;
+ rc = apei_exec_run(&ctx, ACPI_ERST_EXECUTE_OPERATION);
+ if (rc)
+ return rc;
+ for (;;) {
+ rc = apei_exec_run(&ctx, ACPI_ERST_CHECK_BUSY_STATUS);
+ if (rc)
+ return rc;
+ val = apei_exec_ctx_get_output(&ctx);
+ if (!val)
+ break;
+ if (erst_timedout(&timeout, SPIN_UNIT))
+ return -EIO;
+ }
+ rc = apei_exec_run(&ctx, ACPI_ERST_GET_COMMAND_STATUS);
+ if (rc)
+ return rc;
+ val = apei_exec_ctx_get_output(&ctx);
+ rc = apei_exec_run_optional(&ctx, ACPI_ERST_END);
+ if (rc)
+ return rc;
+
+ return erst_errno(val);
+}
+
+/* NVRAM ERST Error Log Address Range is not supported yet */
+static void pr_unimpl_nvram(void)
+{
+ if (printk_ratelimit())
+ pr_warn("NVRAM ERST Log Address Range not implemented yet.\n");
+}
+
+static int __erst_write_to_nvram(const struct cper_record_header *record)
+{
+ /* do not print message, because printk is not safe for NMI */
+ return -ENOSYS;
+}
+
+static int __erst_read_to_erange_from_nvram(u64 record_id, u64 *offset)
+{
+ pr_unimpl_nvram();
+ return -ENOSYS;
+}
+
+static int __erst_clear_from_nvram(u64 record_id)
+{
+ pr_unimpl_nvram();
+ return -ENOSYS;
+}
+
+int erst_write(const struct cper_record_header *record)
+{
+ int rc;
+ unsigned long flags;
+ struct cper_record_header *rcd_erange;
+
+ if (erst_disable)
+ return -ENODEV;
+
+ if (memcmp(record->signature, CPER_SIG_RECORD, CPER_SIG_SIZE))
+ return -EINVAL;
+
+ if (erst_erange.attr & ERST_RANGE_NVRAM) {
+ if (!raw_spin_trylock_irqsave(&erst_lock, flags))
+ return -EBUSY;
+ rc = __erst_write_to_nvram(record);
+ raw_spin_unlock_irqrestore(&erst_lock, flags);
+ return rc;
+ }
+
+ if (record->record_length > erst_erange.size)
+ return -EINVAL;
+
+ if (!raw_spin_trylock_irqsave(&erst_lock, flags))
+ return -EBUSY;
+ memcpy(erst_erange.vaddr, record, record->record_length);
+ rcd_erange = erst_erange.vaddr;
+ /* signature for serialization system */
+ memcpy(&rcd_erange->persistence_information, "ER", 2);
+
+ rc = __erst_write_to_storage(0);
+ raw_spin_unlock_irqrestore(&erst_lock, flags);
+
+ return rc;
+}
+EXPORT_SYMBOL_GPL(erst_write);
+
+static int __erst_read_to_erange(u64 record_id, u64 *offset)
+{
+ int rc;
+
+ if (erst_erange.attr & ERST_RANGE_NVRAM)
+ return __erst_read_to_erange_from_nvram(
+ record_id, offset);
+
+ rc = __erst_read_from_storage(record_id, 0);
+ if (rc)
+ return rc;
+ *offset = 0;
+
+ return 0;
+}
+
+static ssize_t __erst_read(u64 record_id, struct cper_record_header *record,
+ size_t buflen)
+{
+ int rc;
+ u64 offset, len = 0;
+ struct cper_record_header *rcd_tmp;
+
+ rc = __erst_read_to_erange(record_id, &offset);
+ if (rc)
+ return rc;
+ rcd_tmp = erst_erange.vaddr + offset;
+ len = rcd_tmp->record_length;
+ if (len <= buflen)
+ memcpy(record, rcd_tmp, len);
+
+ return len;
+}
+
+/*
+ * If return value > buflen, the buffer size is not big enough,
+ * else if return value < 0, something goes wrong,
+ * else everything is OK, and return value is record length
+ */
+ssize_t erst_read(u64 record_id, struct cper_record_header *record,
+ size_t buflen)
+{
+ ssize_t len;
+ unsigned long flags;
+
+ if (erst_disable)
+ return -ENODEV;
+
+ raw_spin_lock_irqsave(&erst_lock, flags);
+ len = __erst_read(record_id, record, buflen);
+ raw_spin_unlock_irqrestore(&erst_lock, flags);
+ return len;
+}
+EXPORT_SYMBOL_GPL(erst_read);
+
+int erst_clear(u64 record_id)
+{
+ int rc, i;
+ unsigned long flags;
+ u64 *entries;
+
+ if (erst_disable)
+ return -ENODEV;
+
+ rc = mutex_lock_interruptible(&erst_record_id_cache.lock);
+ if (rc)
+ return rc;
+ raw_spin_lock_irqsave(&erst_lock, flags);
+ if (erst_erange.attr & ERST_RANGE_NVRAM)
+ rc = __erst_clear_from_nvram(record_id);
+ else
+ rc = __erst_clear_from_storage(record_id);
+ raw_spin_unlock_irqrestore(&erst_lock, flags);
+ if (rc)
+ goto out;
+ entries = erst_record_id_cache.entries;
+ for (i = 0; i < erst_record_id_cache.len; i++) {
+ if (entries[i] == record_id)
+ entries[i] = APEI_ERST_INVALID_RECORD_ID;
+ }
+ __erst_record_id_cache_compact();
+out:
+ mutex_unlock(&erst_record_id_cache.lock);
+ return rc;
+}
+EXPORT_SYMBOL_GPL(erst_clear);
+
+static int __init setup_erst_disable(char *str)
+{
+ erst_disable = 1;
+ return 0;
+}
+
+__setup("erst_disable", setup_erst_disable);
+
+static int erst_check_table(struct acpi_table_erst *erst_tab)
+{
+ if ((erst_tab->header_length !=
+ (sizeof(struct acpi_table_erst) - sizeof(erst_tab->header)))
+ && (erst_tab->header_length != sizeof(struct acpi_table_erst)))
+ return -EINVAL;
+ if (erst_tab->header.length < sizeof(struct acpi_table_erst))
+ return -EINVAL;
+ if (erst_tab->entries !=
+ (erst_tab->header.length - sizeof(struct acpi_table_erst)) /
+ sizeof(struct acpi_erst_entry))
+ return -EINVAL;
+
+ return 0;
+}
+
+static int erst_open_pstore(struct pstore_info *psi);
+static int erst_close_pstore(struct pstore_info *psi);
+static ssize_t erst_reader(u64 *id, enum pstore_type_id *type, int *count,
+ struct timespec *time, char **buf,
+ bool *compressed, struct pstore_info *psi);
+static int erst_writer(enum pstore_type_id type, enum kmsg_dump_reason reason,
+ u64 *id, unsigned int part, int count, bool compressed,
+ size_t size, struct pstore_info *psi);
+static int erst_clearer(enum pstore_type_id type, u64 id, int count,
+ struct timespec time, struct pstore_info *psi);
+
+static struct pstore_info erst_info = {
+ .owner = THIS_MODULE,
+ .name = "erst",
+ .flags = PSTORE_FLAGS_FRAGILE,
+ .open = erst_open_pstore,
+ .close = erst_close_pstore,
+ .read = erst_reader,
+ .write = erst_writer,
+ .erase = erst_clearer
+};
+
+#define CPER_CREATOR_PSTORE \
+ UUID_LE(0x75a574e3, 0x5052, 0x4b29, 0x8a, 0x8e, 0xbe, 0x2c, \
+ 0x64, 0x90, 0xb8, 0x9d)
+#define CPER_SECTION_TYPE_DMESG \
+ UUID_LE(0xc197e04e, 0xd545, 0x4a70, 0x9c, 0x17, 0xa5, 0x54, \
+ 0x94, 0x19, 0xeb, 0x12)
+#define CPER_SECTION_TYPE_DMESG_Z \
+ UUID_LE(0x4f118707, 0x04dd, 0x4055, 0xb5, 0xdd, 0x95, 0x6d, \
+ 0x34, 0xdd, 0xfa, 0xc6)
+#define CPER_SECTION_TYPE_MCE \
+ UUID_LE(0xfe08ffbe, 0x95e4, 0x4be7, 0xbc, 0x73, 0x40, 0x96, \
+ 0x04, 0x4a, 0x38, 0xfc)
+
+struct cper_pstore_record {
+ struct cper_record_header hdr;
+ struct cper_section_descriptor sec_hdr;
+ char data[];
+} __packed;
+
+static int reader_pos;
+
+static int erst_open_pstore(struct pstore_info *psi)
+{
+ int rc;
+
+ if (erst_disable)
+ return -ENODEV;
+
+ rc = erst_get_record_id_begin(&reader_pos);
+
+ return rc;
+}
+
+static int erst_close_pstore(struct pstore_info *psi)
+{
+ erst_get_record_id_end();
+
+ return 0;
+}
+
+static ssize_t erst_reader(u64 *id, enum pstore_type_id *type, int *count,
+ struct timespec *time, char **buf,
+ bool *compressed, struct pstore_info *psi)
+{
+ int rc;
+ ssize_t len = 0;
+ u64 record_id;
+ struct cper_pstore_record *rcd;
+ size_t rcd_len = sizeof(*rcd) + erst_info.bufsize;
+
+ if (erst_disable)
+ return -ENODEV;
+
+ rcd = kmalloc(rcd_len, GFP_KERNEL);
+ if (!rcd) {
+ rc = -ENOMEM;
+ goto out;
+ }
+skip:
+ rc = erst_get_record_id_next(&reader_pos, &record_id);
+ if (rc)
+ goto out;
+
+ /* no more record */
+ if (record_id == APEI_ERST_INVALID_RECORD_ID) {
+ rc = -EINVAL;
+ goto out;
+ }
+
+ len = erst_read(record_id, &rcd->hdr, rcd_len);
+ /* The record may be cleared by others, try read next record */
+ if (len == -ENOENT)
+ goto skip;
+ else if (len < sizeof(*rcd)) {
+ rc = -EIO;
+ goto out;
+ }
+ if (uuid_le_cmp(rcd->hdr.creator_id, CPER_CREATOR_PSTORE) != 0)
+ goto skip;
+
+ *buf = kmalloc(len, GFP_KERNEL);
+ if (*buf == NULL) {
+ rc = -ENOMEM;
+ goto out;
+ }
+ memcpy(*buf, rcd->data, len - sizeof(*rcd));
+ *id = record_id;
+ *compressed = false;
+ if (uuid_le_cmp(rcd->sec_hdr.section_type,
+ CPER_SECTION_TYPE_DMESG_Z) == 0) {
+ *type = PSTORE_TYPE_DMESG;
+ *compressed = true;
+ } else if (uuid_le_cmp(rcd->sec_hdr.section_type,
+ CPER_SECTION_TYPE_DMESG) == 0)
+ *type = PSTORE_TYPE_DMESG;
+ else if (uuid_le_cmp(rcd->sec_hdr.section_type,
+ CPER_SECTION_TYPE_MCE) == 0)
+ *type = PSTORE_TYPE_MCE;
+ else
+ *type = PSTORE_TYPE_UNKNOWN;
+
+ if (rcd->hdr.validation_bits & CPER_VALID_TIMESTAMP)
+ time->tv_sec = rcd->hdr.timestamp;
+ else
+ time->tv_sec = 0;
+ time->tv_nsec = 0;
+
+out:
+ kfree(rcd);
+ return (rc < 0) ? rc : (len - sizeof(*rcd));
+}
+
+static int erst_writer(enum pstore_type_id type, enum kmsg_dump_reason reason,
+ u64 *id, unsigned int part, int count, bool compressed,
+ size_t size, struct pstore_info *psi)
+{
+ struct cper_pstore_record *rcd = (struct cper_pstore_record *)
+ (erst_info.buf - sizeof(*rcd));
+ int ret;
+
+ memset(rcd, 0, sizeof(*rcd));
+ memcpy(rcd->hdr.signature, CPER_SIG_RECORD, CPER_SIG_SIZE);
+ rcd->hdr.revision = CPER_RECORD_REV;
+ rcd->hdr.signature_end = CPER_SIG_END;
+ rcd->hdr.section_count = 1;
+ rcd->hdr.error_severity = CPER_SEV_FATAL;
+ /* timestamp valid. platform_id, partition_id are invalid */
+ rcd->hdr.validation_bits = CPER_VALID_TIMESTAMP;
+ rcd->hdr.timestamp = get_seconds();
+ rcd->hdr.record_length = sizeof(*rcd) + size;
+ rcd->hdr.creator_id = CPER_CREATOR_PSTORE;
+ rcd->hdr.notification_type = CPER_NOTIFY_MCE;
+ rcd->hdr.record_id = cper_next_record_id();
+ rcd->hdr.flags = CPER_HW_ERROR_FLAGS_PREVERR;
+
+ rcd->sec_hdr.section_offset = sizeof(*rcd);
+ rcd->sec_hdr.section_length = size;
+ rcd->sec_hdr.revision = CPER_SEC_REV;
+ /* fru_id and fru_text is invalid */
+ rcd->sec_hdr.validation_bits = 0;
+ rcd->sec_hdr.flags = CPER_SEC_PRIMARY;
+ switch (type) {
+ case PSTORE_TYPE_DMESG:
+ if (compressed)
+ rcd->sec_hdr.section_type = CPER_SECTION_TYPE_DMESG_Z;
+ else
+ rcd->sec_hdr.section_type = CPER_SECTION_TYPE_DMESG;
+ break;
+ case PSTORE_TYPE_MCE:
+ rcd->sec_hdr.section_type = CPER_SECTION_TYPE_MCE;
+ break;
+ default:
+ return -EINVAL;
+ }
+ rcd->sec_hdr.section_severity = CPER_SEV_FATAL;
+
+ ret = erst_write(&rcd->hdr);
+ *id = rcd->hdr.record_id;
+
+ return ret;
+}
+
+static int erst_clearer(enum pstore_type_id type, u64 id, int count,
+ struct timespec time, struct pstore_info *psi)
+{
+ return erst_clear(id);
+}
+
+static int __init erst_init(void)
+{
+ int rc = 0;
+ acpi_status status;
+ struct apei_exec_context ctx;
+ struct apei_resources erst_resources;
+ struct resource *r;
+ char *buf;
+
+ if (acpi_disabled)
+ goto err;
+
+ if (erst_disable) {
+ pr_info(
+ "Error Record Serialization Table (ERST) support is disabled.\n");
+ goto err;
+ }
+
+ status = acpi_get_table(ACPI_SIG_ERST, 0,
+ (struct acpi_table_header **)&erst_tab);
+ if (status == AE_NOT_FOUND)
+ goto err;
+ else if (ACPI_FAILURE(status)) {
+ const char *msg = acpi_format_exception(status);
+ pr_err("Failed to get table, %s\n", msg);
+ rc = -EINVAL;
+ goto err;
+ }
+
+ rc = erst_check_table(erst_tab);
+ if (rc) {
+ pr_err(FW_BUG "ERST table is invalid.\n");
+ goto err;
+ }
+
+ apei_resources_init(&erst_resources);
+ erst_exec_ctx_init(&ctx);
+ rc = apei_exec_collect_resources(&ctx, &erst_resources);
+ if (rc)
+ goto err_fini;
+ rc = apei_resources_request(&erst_resources, "APEI ERST");
+ if (rc)
+ goto err_fini;
+ rc = apei_exec_pre_map_gars(&ctx);
+ if (rc)
+ goto err_release;
+ rc = erst_get_erange(&erst_erange);
+ if (rc) {
+ if (rc == -ENODEV)
+ pr_info(
+ "The corresponding hardware device or firmware implementation "
+ "is not available.\n");
+ else
+ pr_err("Failed to get Error Log Address Range.\n");
+ goto err_unmap_reg;
+ }
+
+ r = request_mem_region(erst_erange.base, erst_erange.size, "APEI ERST");
+ if (!r) {
+ pr_err("Can not request [mem %#010llx-%#010llx] for ERST.\n",
+ (unsigned long long)erst_erange.base,
+ (unsigned long long)erst_erange.base + erst_erange.size - 1);
+ rc = -EIO;
+ goto err_unmap_reg;
+ }
+ rc = -ENOMEM;
+ erst_erange.vaddr = ioremap_cache(erst_erange.base,
+ erst_erange.size);
+ if (!erst_erange.vaddr)
+ goto err_release_erange;
+
+ pr_info(
+ "Error Record Serialization Table (ERST) support is initialized.\n");
+
+ buf = kmalloc(erst_erange.size, GFP_KERNEL);
+ spin_lock_init(&erst_info.buf_lock);
+ if (buf) {
+ erst_info.buf = buf + sizeof(struct cper_pstore_record);
+ erst_info.bufsize = erst_erange.size -
+ sizeof(struct cper_pstore_record);
+ rc = pstore_register(&erst_info);
+ if (rc) {
+ if (rc != -EPERM)
+ pr_info(
+ "Could not register with persistent store.\n");
+ erst_info.buf = NULL;
+ erst_info.bufsize = 0;
+ kfree(buf);
+ }
+ } else
+ pr_err(
+ "Failed to allocate %lld bytes for persistent store error log.\n",
+ erst_erange.size);
+
+ return 0;
+
+err_release_erange:
+ release_mem_region(erst_erange.base, erst_erange.size);
+err_unmap_reg:
+ apei_exec_post_unmap_gars(&ctx);
+err_release:
+ apei_resources_release(&erst_resources);
+err_fini:
+ apei_resources_fini(&erst_resources);
+err:
+ erst_disable = 1;
+ return rc;
+}
+
+device_initcall(erst_init);
diff --git a/drivers/acpi/apei/ghes.c b/drivers/acpi/apei/ghes.c
new file mode 100644
index 000000000..e82d0976a
--- /dev/null
+++ b/drivers/acpi/apei/ghes.c
@@ -0,0 +1,1163 @@
+/*
+ * APEI Generic Hardware Error Source support
+ *
+ * Generic Hardware Error Source provides a way to report platform
+ * hardware errors (such as that from chipset). It works in so called
+ * "Firmware First" mode, that is, hardware errors are reported to
+ * firmware firstly, then reported to Linux by firmware. This way,
+ * some non-standard hardware error registers or non-standard hardware
+ * link can be checked by firmware to produce more hardware error
+ * information for Linux.
+ *
+ * For more information about Generic Hardware Error Source, please
+ * refer to ACPI Specification version 4.0, section 17.3.2.6
+ *
+ * Copyright 2010,2011 Intel Corp.
+ * Author: Huang Ying <ying.huang@intel.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License version
+ * 2 as published by the Free Software Foundation;
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/acpi.h>
+#include <linux/io.h>
+#include <linux/interrupt.h>
+#include <linux/timer.h>
+#include <linux/cper.h>
+#include <linux/kdebug.h>
+#include <linux/platform_device.h>
+#include <linux/mutex.h>
+#include <linux/ratelimit.h>
+#include <linux/vmalloc.h>
+#include <linux/irq_work.h>
+#include <linux/llist.h>
+#include <linux/genalloc.h>
+#include <linux/pci.h>
+#include <linux/aer.h>
+#include <linux/nmi.h>
+
+#include <acpi/ghes.h>
+#include <acpi/apei.h>
+#include <asm/tlbflush.h>
+
+#include "apei-internal.h"
+
+#define GHES_PFX "GHES: "
+
+#define GHES_ESTATUS_MAX_SIZE 65536
+#define GHES_ESOURCE_PREALLOC_MAX_SIZE 65536
+
+#define GHES_ESTATUS_POOL_MIN_ALLOC_ORDER 3
+
+/* This is just an estimation for memory pool allocation */
+#define GHES_ESTATUS_CACHE_AVG_SIZE 512
+
+#define GHES_ESTATUS_CACHES_SIZE 4
+
+#define GHES_ESTATUS_IN_CACHE_MAX_NSEC 10000000000ULL
+/* Prevent too many caches are allocated because of RCU */
+#define GHES_ESTATUS_CACHE_ALLOCED_MAX (GHES_ESTATUS_CACHES_SIZE * 3 / 2)
+
+#define GHES_ESTATUS_CACHE_LEN(estatus_len) \
+ (sizeof(struct ghes_estatus_cache) + (estatus_len))
+#define GHES_ESTATUS_FROM_CACHE(estatus_cache) \
+ ((struct acpi_hest_generic_status *) \
+ ((struct ghes_estatus_cache *)(estatus_cache) + 1))
+
+#define GHES_ESTATUS_NODE_LEN(estatus_len) \
+ (sizeof(struct ghes_estatus_node) + (estatus_len))
+#define GHES_ESTATUS_FROM_NODE(estatus_node) \
+ ((struct acpi_hest_generic_status *) \
+ ((struct ghes_estatus_node *)(estatus_node) + 1))
+
+bool ghes_disable;
+module_param_named(disable, ghes_disable, bool, 0);
+
+/*
+ * All error sources notified with SCI shares one notifier function,
+ * so they need to be linked and checked one by one. This is applied
+ * to NMI too.
+ *
+ * RCU is used for these lists, so ghes_list_mutex is only used for
+ * list changing, not for traversing.
+ */
+static LIST_HEAD(ghes_sci);
+static DEFINE_MUTEX(ghes_list_mutex);
+
+/*
+ * Because the memory area used to transfer hardware error information
+ * from BIOS to Linux can be determined only in NMI, IRQ or timer
+ * handler, but general ioremap can not be used in atomic context, so
+ * a special version of atomic ioremap is implemented for that.
+ */
+
+/*
+ * Two virtual pages are used, one for IRQ/PROCESS context, the other for
+ * NMI context (optionally).
+ */
+#ifdef CONFIG_HAVE_ACPI_APEI_NMI
+#define GHES_IOREMAP_PAGES 2
+#else
+#define GHES_IOREMAP_PAGES 1
+#endif
+#define GHES_IOREMAP_IRQ_PAGE(base) (base)
+#define GHES_IOREMAP_NMI_PAGE(base) ((base) + PAGE_SIZE)
+
+/* virtual memory area for atomic ioremap */
+static struct vm_struct *ghes_ioremap_area;
+/*
+ * These 2 spinlock is used to prevent atomic ioremap virtual memory
+ * area from being mapped simultaneously.
+ */
+static DEFINE_RAW_SPINLOCK(ghes_ioremap_lock_nmi);
+static DEFINE_SPINLOCK(ghes_ioremap_lock_irq);
+
+static struct gen_pool *ghes_estatus_pool;
+static unsigned long ghes_estatus_pool_size_request;
+
+static struct ghes_estatus_cache *ghes_estatus_caches[GHES_ESTATUS_CACHES_SIZE];
+static atomic_t ghes_estatus_cache_alloced;
+
+static int ghes_ioremap_init(void)
+{
+ ghes_ioremap_area = __get_vm_area(PAGE_SIZE * GHES_IOREMAP_PAGES,
+ VM_IOREMAP, VMALLOC_START, VMALLOC_END);
+ if (!ghes_ioremap_area) {
+ pr_err(GHES_PFX "Failed to allocate virtual memory area for atomic ioremap.\n");
+ return -ENOMEM;
+ }
+
+ return 0;
+}
+
+static void ghes_ioremap_exit(void)
+{
+ free_vm_area(ghes_ioremap_area);
+}
+
+static void __iomem *ghes_ioremap_pfn_nmi(u64 pfn)
+{
+ unsigned long vaddr;
+
+ vaddr = (unsigned long)GHES_IOREMAP_NMI_PAGE(ghes_ioremap_area->addr);
+ ioremap_page_range(vaddr, vaddr + PAGE_SIZE,
+ pfn << PAGE_SHIFT, PAGE_KERNEL);
+
+ return (void __iomem *)vaddr;
+}
+
+static void __iomem *ghes_ioremap_pfn_irq(u64 pfn)
+{
+ unsigned long vaddr;
+
+ vaddr = (unsigned long)GHES_IOREMAP_IRQ_PAGE(ghes_ioremap_area->addr);
+ ioremap_page_range(vaddr, vaddr + PAGE_SIZE,
+ pfn << PAGE_SHIFT, PAGE_KERNEL);
+
+ return (void __iomem *)vaddr;
+}
+
+static void ghes_iounmap_nmi(void __iomem *vaddr_ptr)
+{
+ unsigned long vaddr = (unsigned long __force)vaddr_ptr;
+ void *base = ghes_ioremap_area->addr;
+
+ BUG_ON(vaddr != (unsigned long)GHES_IOREMAP_NMI_PAGE(base));
+ unmap_kernel_range_noflush(vaddr, PAGE_SIZE);
+ arch_apei_flush_tlb_one(vaddr);
+}
+
+static void ghes_iounmap_irq(void __iomem *vaddr_ptr)
+{
+ unsigned long vaddr = (unsigned long __force)vaddr_ptr;
+ void *base = ghes_ioremap_area->addr;
+
+ BUG_ON(vaddr != (unsigned long)GHES_IOREMAP_IRQ_PAGE(base));
+ unmap_kernel_range_noflush(vaddr, PAGE_SIZE);
+ arch_apei_flush_tlb_one(vaddr);
+}
+
+static int ghes_estatus_pool_init(void)
+{
+ ghes_estatus_pool = gen_pool_create(GHES_ESTATUS_POOL_MIN_ALLOC_ORDER, -1);
+ if (!ghes_estatus_pool)
+ return -ENOMEM;
+ return 0;
+}
+
+static void ghes_estatus_pool_free_chunk_page(struct gen_pool *pool,
+ struct gen_pool_chunk *chunk,
+ void *data)
+{
+ free_page(chunk->start_addr);
+}
+
+static void ghes_estatus_pool_exit(void)
+{
+ gen_pool_for_each_chunk(ghes_estatus_pool,
+ ghes_estatus_pool_free_chunk_page, NULL);
+ gen_pool_destroy(ghes_estatus_pool);
+}
+
+static int ghes_estatus_pool_expand(unsigned long len)
+{
+ unsigned long i, pages, size, addr;
+ int ret;
+
+ ghes_estatus_pool_size_request += PAGE_ALIGN(len);
+ size = gen_pool_size(ghes_estatus_pool);
+ if (size >= ghes_estatus_pool_size_request)
+ return 0;
+ pages = (ghes_estatus_pool_size_request - size) / PAGE_SIZE;
+ for (i = 0; i < pages; i++) {
+ addr = __get_free_page(GFP_KERNEL);
+ if (!addr)
+ return -ENOMEM;
+ ret = gen_pool_add(ghes_estatus_pool, addr, PAGE_SIZE, -1);
+ if (ret)
+ return ret;
+ }
+
+ return 0;
+}
+
+static struct ghes *ghes_new(struct acpi_hest_generic *generic)
+{
+ struct ghes *ghes;
+ unsigned int error_block_length;
+ int rc;
+
+ ghes = kzalloc(sizeof(*ghes), GFP_KERNEL);
+ if (!ghes)
+ return ERR_PTR(-ENOMEM);
+ ghes->generic = generic;
+ rc = apei_map_generic_address(&generic->error_status_address);
+ if (rc)
+ goto err_free;
+ error_block_length = generic->error_block_length;
+ if (error_block_length > GHES_ESTATUS_MAX_SIZE) {
+ pr_warning(FW_WARN GHES_PFX
+ "Error status block length is too long: %u for "
+ "generic hardware error source: %d.\n",
+ error_block_length, generic->header.source_id);
+ error_block_length = GHES_ESTATUS_MAX_SIZE;
+ }
+ ghes->estatus = kmalloc(error_block_length, GFP_KERNEL);
+ if (!ghes->estatus) {
+ rc = -ENOMEM;
+ goto err_unmap;
+ }
+
+ return ghes;
+
+err_unmap:
+ apei_unmap_generic_address(&generic->error_status_address);
+err_free:
+ kfree(ghes);
+ return ERR_PTR(rc);
+}
+
+static void ghes_fini(struct ghes *ghes)
+{
+ kfree(ghes->estatus);
+ apei_unmap_generic_address(&ghes->generic->error_status_address);
+}
+
+static inline int ghes_severity(int severity)
+{
+ switch (severity) {
+ case CPER_SEV_INFORMATIONAL:
+ return GHES_SEV_NO;
+ case CPER_SEV_CORRECTED:
+ return GHES_SEV_CORRECTED;
+ case CPER_SEV_RECOVERABLE:
+ return GHES_SEV_RECOVERABLE;
+ case CPER_SEV_FATAL:
+ return GHES_SEV_PANIC;
+ default:
+ /* Unknown, go panic */
+ return GHES_SEV_PANIC;
+ }
+}
+
+static void ghes_copy_tofrom_phys(void *buffer, u64 paddr, u32 len,
+ int from_phys)
+{
+ void __iomem *vaddr;
+ unsigned long flags = 0;
+ int in_nmi = in_nmi();
+ u64 offset;
+ u32 trunk;
+
+ while (len > 0) {
+ offset = paddr - (paddr & PAGE_MASK);
+ if (in_nmi) {
+ raw_spin_lock(&ghes_ioremap_lock_nmi);
+ vaddr = ghes_ioremap_pfn_nmi(paddr >> PAGE_SHIFT);
+ } else {
+ spin_lock_irqsave(&ghes_ioremap_lock_irq, flags);
+ vaddr = ghes_ioremap_pfn_irq(paddr >> PAGE_SHIFT);
+ }
+ trunk = PAGE_SIZE - offset;
+ trunk = min(trunk, len);
+ if (from_phys)
+ memcpy_fromio(buffer, vaddr + offset, trunk);
+ else
+ memcpy_toio(vaddr + offset, buffer, trunk);
+ len -= trunk;
+ paddr += trunk;
+ buffer += trunk;
+ if (in_nmi) {
+ ghes_iounmap_nmi(vaddr);
+ raw_spin_unlock(&ghes_ioremap_lock_nmi);
+ } else {
+ ghes_iounmap_irq(vaddr);
+ spin_unlock_irqrestore(&ghes_ioremap_lock_irq, flags);
+ }
+ }
+}
+
+static int ghes_read_estatus(struct ghes *ghes, int silent)
+{
+ struct acpi_hest_generic *g = ghes->generic;
+ u64 buf_paddr;
+ u32 len;
+ int rc;
+
+ rc = apei_read(&buf_paddr, &g->error_status_address);
+ if (rc) {
+ if (!silent && printk_ratelimit())
+ pr_warning(FW_WARN GHES_PFX
+"Failed to read error status block address for hardware error source: %d.\n",
+ g->header.source_id);
+ return -EIO;
+ }
+ if (!buf_paddr)
+ return -ENOENT;
+
+ ghes_copy_tofrom_phys(ghes->estatus, buf_paddr,
+ sizeof(*ghes->estatus), 1);
+ if (!ghes->estatus->block_status)
+ return -ENOENT;
+
+ ghes->buffer_paddr = buf_paddr;
+ ghes->flags |= GHES_TO_CLEAR;
+
+ rc = -EIO;
+ len = cper_estatus_len(ghes->estatus);
+ if (len < sizeof(*ghes->estatus))
+ goto err_read_block;
+ if (len > ghes->generic->error_block_length)
+ goto err_read_block;
+ if (cper_estatus_check_header(ghes->estatus))
+ goto err_read_block;
+ ghes_copy_tofrom_phys(ghes->estatus + 1,
+ buf_paddr + sizeof(*ghes->estatus),
+ len - sizeof(*ghes->estatus), 1);
+ if (cper_estatus_check(ghes->estatus))
+ goto err_read_block;
+ rc = 0;
+
+err_read_block:
+ if (rc && !silent && printk_ratelimit())
+ pr_warning(FW_WARN GHES_PFX
+ "Failed to read error status block!\n");
+ return rc;
+}
+
+static void ghes_clear_estatus(struct ghes *ghes)
+{
+ ghes->estatus->block_status = 0;
+ if (!(ghes->flags & GHES_TO_CLEAR))
+ return;
+ ghes_copy_tofrom_phys(ghes->estatus, ghes->buffer_paddr,
+ sizeof(ghes->estatus->block_status), 0);
+ ghes->flags &= ~GHES_TO_CLEAR;
+}
+
+static void ghes_handle_memory_failure(struct acpi_hest_generic_data *gdata, int sev)
+{
+#ifdef CONFIG_ACPI_APEI_MEMORY_FAILURE
+ unsigned long pfn;
+ int flags = -1;
+ int sec_sev = ghes_severity(gdata->error_severity);
+ struct cper_sec_mem_err *mem_err;
+ mem_err = (struct cper_sec_mem_err *)(gdata + 1);
+
+ if (!(mem_err->validation_bits & CPER_MEM_VALID_PA))
+ return;
+
+ pfn = mem_err->physical_addr >> PAGE_SHIFT;
+ if (!pfn_valid(pfn)) {
+ pr_warn_ratelimited(FW_WARN GHES_PFX
+ "Invalid address in generic error data: %#llx\n",
+ mem_err->physical_addr);
+ return;
+ }
+
+ /* iff following two events can be handled properly by now */
+ if (sec_sev == GHES_SEV_CORRECTED &&
+ (gdata->flags & CPER_SEC_ERROR_THRESHOLD_EXCEEDED))
+ flags = MF_SOFT_OFFLINE;
+ if (sev == GHES_SEV_RECOVERABLE && sec_sev == GHES_SEV_RECOVERABLE)
+ flags = 0;
+
+ if (flags != -1)
+ memory_failure_queue(pfn, 0, flags);
+#endif
+}
+
+static void ghes_do_proc(struct ghes *ghes,
+ const struct acpi_hest_generic_status *estatus)
+{
+ int sev, sec_sev;
+ struct acpi_hest_generic_data *gdata;
+
+ sev = ghes_severity(estatus->error_severity);
+ apei_estatus_for_each_section(estatus, gdata) {
+ sec_sev = ghes_severity(gdata->error_severity);
+ if (!uuid_le_cmp(*(uuid_le *)gdata->section_type,
+ CPER_SEC_PLATFORM_MEM)) {
+ struct cper_sec_mem_err *mem_err;
+ mem_err = (struct cper_sec_mem_err *)(gdata+1);
+ ghes_edac_report_mem_error(ghes, sev, mem_err);
+
+ arch_apei_report_mem_error(sev, mem_err);
+ ghes_handle_memory_failure(gdata, sev);
+ }
+#ifdef CONFIG_ACPI_APEI_PCIEAER
+ else if (!uuid_le_cmp(*(uuid_le *)gdata->section_type,
+ CPER_SEC_PCIE)) {
+ struct cper_sec_pcie *pcie_err;
+ pcie_err = (struct cper_sec_pcie *)(gdata+1);
+ if (sev == GHES_SEV_RECOVERABLE &&
+ sec_sev == GHES_SEV_RECOVERABLE &&
+ pcie_err->validation_bits & CPER_PCIE_VALID_DEVICE_ID &&
+ pcie_err->validation_bits & CPER_PCIE_VALID_AER_INFO) {
+ unsigned int devfn;
+ int aer_severity;
+
+ devfn = PCI_DEVFN(pcie_err->device_id.device,
+ pcie_err->device_id.function);
+ aer_severity = cper_severity_to_aer(sev);
+
+ /*
+ * If firmware reset the component to contain
+ * the error, we must reinitialize it before
+ * use, so treat it as a fatal AER error.
+ */
+ if (gdata->flags & CPER_SEC_RESET)
+ aer_severity = AER_FATAL;
+
+ aer_recover_queue(pcie_err->device_id.segment,
+ pcie_err->device_id.bus,
+ devfn, aer_severity,
+ (struct aer_capability_regs *)
+ pcie_err->aer_info);
+ }
+
+ }
+#endif
+ }
+}
+
+static void __ghes_print_estatus(const char *pfx,
+ const struct acpi_hest_generic *generic,
+ const struct acpi_hest_generic_status *estatus)
+{
+ static atomic_t seqno;
+ unsigned int curr_seqno;
+ char pfx_seq[64];
+
+ if (pfx == NULL) {
+ if (ghes_severity(estatus->error_severity) <=
+ GHES_SEV_CORRECTED)
+ pfx = KERN_WARNING;
+ else
+ pfx = KERN_ERR;
+ }
+ curr_seqno = atomic_inc_return(&seqno);
+ snprintf(pfx_seq, sizeof(pfx_seq), "%s{%u}" HW_ERR, pfx, curr_seqno);
+ printk("%s""Hardware error from APEI Generic Hardware Error Source: %d\n",
+ pfx_seq, generic->header.source_id);
+ cper_estatus_print(pfx_seq, estatus);
+}
+
+static int ghes_print_estatus(const char *pfx,
+ const struct acpi_hest_generic *generic,
+ const struct acpi_hest_generic_status *estatus)
+{
+ /* Not more than 2 messages every 5 seconds */
+ static DEFINE_RATELIMIT_STATE(ratelimit_corrected, 5*HZ, 2);
+ static DEFINE_RATELIMIT_STATE(ratelimit_uncorrected, 5*HZ, 2);
+ struct ratelimit_state *ratelimit;
+
+ if (ghes_severity(estatus->error_severity) <= GHES_SEV_CORRECTED)
+ ratelimit = &ratelimit_corrected;
+ else
+ ratelimit = &ratelimit_uncorrected;
+ if (__ratelimit(ratelimit)) {
+ __ghes_print_estatus(pfx, generic, estatus);
+ return 1;
+ }
+ return 0;
+}
+
+/*
+ * GHES error status reporting throttle, to report more kinds of
+ * errors, instead of just most frequently occurred errors.
+ */
+static int ghes_estatus_cached(struct acpi_hest_generic_status *estatus)
+{
+ u32 len;
+ int i, cached = 0;
+ unsigned long long now;
+ struct ghes_estatus_cache *cache;
+ struct acpi_hest_generic_status *cache_estatus;
+
+ len = cper_estatus_len(estatus);
+ rcu_read_lock();
+ for (i = 0; i < GHES_ESTATUS_CACHES_SIZE; i++) {
+ cache = rcu_dereference(ghes_estatus_caches[i]);
+ if (cache == NULL)
+ continue;
+ if (len != cache->estatus_len)
+ continue;
+ cache_estatus = GHES_ESTATUS_FROM_CACHE(cache);
+ if (memcmp(estatus, cache_estatus, len))
+ continue;
+ atomic_inc(&cache->count);
+ now = sched_clock();
+ if (now - cache->time_in < GHES_ESTATUS_IN_CACHE_MAX_NSEC)
+ cached = 1;
+ break;
+ }
+ rcu_read_unlock();
+ return cached;
+}
+
+static struct ghes_estatus_cache *ghes_estatus_cache_alloc(
+ struct acpi_hest_generic *generic,
+ struct acpi_hest_generic_status *estatus)
+{
+ int alloced;
+ u32 len, cache_len;
+ struct ghes_estatus_cache *cache;
+ struct acpi_hest_generic_status *cache_estatus;
+
+ alloced = atomic_add_return(1, &ghes_estatus_cache_alloced);
+ if (alloced > GHES_ESTATUS_CACHE_ALLOCED_MAX) {
+ atomic_dec(&ghes_estatus_cache_alloced);
+ return NULL;
+ }
+ len = cper_estatus_len(estatus);
+ cache_len = GHES_ESTATUS_CACHE_LEN(len);
+ cache = (void *)gen_pool_alloc(ghes_estatus_pool, cache_len);
+ if (!cache) {
+ atomic_dec(&ghes_estatus_cache_alloced);
+ return NULL;
+ }
+ cache_estatus = GHES_ESTATUS_FROM_CACHE(cache);
+ memcpy(cache_estatus, estatus, len);
+ cache->estatus_len = len;
+ atomic_set(&cache->count, 0);
+ cache->generic = generic;
+ cache->time_in = sched_clock();
+ return cache;
+}
+
+static void ghes_estatus_cache_free(struct ghes_estatus_cache *cache)
+{
+ u32 len;
+
+ len = cper_estatus_len(GHES_ESTATUS_FROM_CACHE(cache));
+ len = GHES_ESTATUS_CACHE_LEN(len);
+ gen_pool_free(ghes_estatus_pool, (unsigned long)cache, len);
+ atomic_dec(&ghes_estatus_cache_alloced);
+}
+
+static void ghes_estatus_cache_rcu_free(struct rcu_head *head)
+{
+ struct ghes_estatus_cache *cache;
+
+ cache = container_of(head, struct ghes_estatus_cache, rcu);
+ ghes_estatus_cache_free(cache);
+}
+
+static void ghes_estatus_cache_add(
+ struct acpi_hest_generic *generic,
+ struct acpi_hest_generic_status *estatus)
+{
+ int i, slot = -1, count;
+ unsigned long long now, duration, period, max_period = 0;
+ struct ghes_estatus_cache *cache, *slot_cache = NULL, *new_cache;
+
+ new_cache = ghes_estatus_cache_alloc(generic, estatus);
+ if (new_cache == NULL)
+ return;
+ rcu_read_lock();
+ now = sched_clock();
+ for (i = 0; i < GHES_ESTATUS_CACHES_SIZE; i++) {
+ cache = rcu_dereference(ghes_estatus_caches[i]);
+ if (cache == NULL) {
+ slot = i;
+ slot_cache = NULL;
+ break;
+ }
+ duration = now - cache->time_in;
+ if (duration >= GHES_ESTATUS_IN_CACHE_MAX_NSEC) {
+ slot = i;
+ slot_cache = cache;
+ break;
+ }
+ count = atomic_read(&cache->count);
+ period = duration;
+ do_div(period, (count + 1));
+ if (period > max_period) {
+ max_period = period;
+ slot = i;
+ slot_cache = cache;
+ }
+ }
+ /* new_cache must be put into array after its contents are written */
+ smp_wmb();
+ if (slot != -1 && cmpxchg(ghes_estatus_caches + slot,
+ slot_cache, new_cache) == slot_cache) {
+ if (slot_cache)
+ call_rcu(&slot_cache->rcu, ghes_estatus_cache_rcu_free);
+ } else
+ ghes_estatus_cache_free(new_cache);
+ rcu_read_unlock();
+}
+
+static int ghes_proc(struct ghes *ghes)
+{
+ int rc;
+
+ rc = ghes_read_estatus(ghes, 0);
+ if (rc)
+ goto out;
+ if (!ghes_estatus_cached(ghes->estatus)) {
+ if (ghes_print_estatus(NULL, ghes->generic, ghes->estatus))
+ ghes_estatus_cache_add(ghes->generic, ghes->estatus);
+ }
+ ghes_do_proc(ghes, ghes->estatus);
+out:
+ ghes_clear_estatus(ghes);
+ return 0;
+}
+
+static void ghes_add_timer(struct ghes *ghes)
+{
+ struct acpi_hest_generic *g = ghes->generic;
+ unsigned long expire;
+
+ if (!g->notify.poll_interval) {
+ pr_warning(FW_WARN GHES_PFX "Poll interval is 0 for generic hardware error source: %d, disabled.\n",
+ g->header.source_id);
+ return;
+ }
+ expire = jiffies + msecs_to_jiffies(g->notify.poll_interval);
+ ghes->timer.expires = round_jiffies_relative(expire);
+ add_timer(&ghes->timer);
+}
+
+static void ghes_poll_func(unsigned long data)
+{
+ struct ghes *ghes = (void *)data;
+
+ ghes_proc(ghes);
+ if (!(ghes->flags & GHES_EXITING))
+ ghes_add_timer(ghes);
+}
+
+static irqreturn_t ghes_irq_func(int irq, void *data)
+{
+ struct ghes *ghes = data;
+ int rc;
+
+ rc = ghes_proc(ghes);
+ if (rc)
+ return IRQ_NONE;
+
+ return IRQ_HANDLED;
+}
+
+static int ghes_notify_sci(struct notifier_block *this,
+ unsigned long event, void *data)
+{
+ struct ghes *ghes;
+ int ret = NOTIFY_DONE;
+
+ rcu_read_lock();
+ list_for_each_entry_rcu(ghes, &ghes_sci, list) {
+ if (!ghes_proc(ghes))
+ ret = NOTIFY_OK;
+ }
+ rcu_read_unlock();
+
+ return ret;
+}
+
+static struct notifier_block ghes_notifier_sci = {
+ .notifier_call = ghes_notify_sci,
+};
+
+#ifdef CONFIG_HAVE_ACPI_APEI_NMI
+/*
+ * printk is not safe in NMI context. So in NMI handler, we allocate
+ * required memory from lock-less memory allocator
+ * (ghes_estatus_pool), save estatus into it, put them into lock-less
+ * list (ghes_estatus_llist), then delay printk into IRQ context via
+ * irq_work (ghes_proc_irq_work). ghes_estatus_size_request record
+ * required pool size by all NMI error source.
+ */
+static struct llist_head ghes_estatus_llist;
+static struct irq_work ghes_proc_irq_work;
+
+/*
+ * NMI may be triggered on any CPU, so ghes_nmi_lock is used for
+ * mutual exclusion.
+ */
+static DEFINE_RAW_SPINLOCK(ghes_nmi_lock);
+
+static LIST_HEAD(ghes_nmi);
+
+static int ghes_panic_timeout __read_mostly = 30;
+
+static void ghes_proc_in_irq(struct irq_work *irq_work)
+{
+ struct llist_node *llnode, *next;
+ struct ghes_estatus_node *estatus_node;
+ struct acpi_hest_generic *generic;
+ struct acpi_hest_generic_status *estatus;
+ u32 len, node_len;
+
+ llnode = llist_del_all(&ghes_estatus_llist);
+ /*
+ * Because the time order of estatus in list is reversed,
+ * revert it back to proper order.
+ */
+ llnode = llist_reverse_order(llnode);
+ while (llnode) {
+ next = llnode->next;
+ estatus_node = llist_entry(llnode, struct ghes_estatus_node,
+ llnode);
+ estatus = GHES_ESTATUS_FROM_NODE(estatus_node);
+ len = cper_estatus_len(estatus);
+ node_len = GHES_ESTATUS_NODE_LEN(len);
+ ghes_do_proc(estatus_node->ghes, estatus);
+ if (!ghes_estatus_cached(estatus)) {
+ generic = estatus_node->generic;
+ if (ghes_print_estatus(NULL, generic, estatus))
+ ghes_estatus_cache_add(generic, estatus);
+ }
+ gen_pool_free(ghes_estatus_pool, (unsigned long)estatus_node,
+ node_len);
+ llnode = next;
+ }
+}
+
+static void ghes_print_queued_estatus(void)
+{
+ struct llist_node *llnode;
+ struct ghes_estatus_node *estatus_node;
+ struct acpi_hest_generic *generic;
+ struct acpi_hest_generic_status *estatus;
+ u32 len, node_len;
+
+ llnode = llist_del_all(&ghes_estatus_llist);
+ /*
+ * Because the time order of estatus in list is reversed,
+ * revert it back to proper order.
+ */
+ llnode = llist_reverse_order(llnode);
+ while (llnode) {
+ estatus_node = llist_entry(llnode, struct ghes_estatus_node,
+ llnode);
+ estatus = GHES_ESTATUS_FROM_NODE(estatus_node);
+ len = cper_estatus_len(estatus);
+ node_len = GHES_ESTATUS_NODE_LEN(len);
+ generic = estatus_node->generic;
+ ghes_print_estatus(NULL, generic, estatus);
+ llnode = llnode->next;
+ }
+}
+
+static int ghes_notify_nmi(unsigned int cmd, struct pt_regs *regs)
+{
+ struct ghes *ghes, *ghes_global = NULL;
+ int sev, sev_global = -1;
+ int ret = NMI_DONE;
+
+ raw_spin_lock(&ghes_nmi_lock);
+ list_for_each_entry_rcu(ghes, &ghes_nmi, list) {
+ if (ghes_read_estatus(ghes, 1)) {
+ ghes_clear_estatus(ghes);
+ continue;
+ }
+ sev = ghes_severity(ghes->estatus->error_severity);
+ if (sev > sev_global) {
+ sev_global = sev;
+ ghes_global = ghes;
+ }
+ ret = NMI_HANDLED;
+ }
+
+ if (ret == NMI_DONE)
+ goto out;
+
+ if (sev_global >= GHES_SEV_PANIC) {
+ oops_begin();
+ ghes_print_queued_estatus();
+ __ghes_print_estatus(KERN_EMERG, ghes_global->generic,
+ ghes_global->estatus);
+ /* reboot to log the error! */
+ if (panic_timeout == 0)
+ panic_timeout = ghes_panic_timeout;
+ panic("Fatal hardware error!");
+ }
+
+ list_for_each_entry_rcu(ghes, &ghes_nmi, list) {
+#ifdef CONFIG_ARCH_HAVE_NMI_SAFE_CMPXCHG
+ u32 len, node_len;
+ struct ghes_estatus_node *estatus_node;
+ struct acpi_hest_generic_status *estatus;
+#endif
+ if (!(ghes->flags & GHES_TO_CLEAR))
+ continue;
+#ifdef CONFIG_ARCH_HAVE_NMI_SAFE_CMPXCHG
+ if (ghes_estatus_cached(ghes->estatus))
+ goto next;
+ /* Save estatus for further processing in IRQ context */
+ len = cper_estatus_len(ghes->estatus);
+ node_len = GHES_ESTATUS_NODE_LEN(len);
+ estatus_node = (void *)gen_pool_alloc(ghes_estatus_pool,
+ node_len);
+ if (estatus_node) {
+ estatus_node->ghes = ghes;
+ estatus_node->generic = ghes->generic;
+ estatus = GHES_ESTATUS_FROM_NODE(estatus_node);
+ memcpy(estatus, ghes->estatus, len);
+ llist_add(&estatus_node->llnode, &ghes_estatus_llist);
+ }
+next:
+#endif
+ ghes_clear_estatus(ghes);
+ }
+#ifdef CONFIG_ARCH_HAVE_NMI_SAFE_CMPXCHG
+ irq_work_queue(&ghes_proc_irq_work);
+#endif
+
+out:
+ raw_spin_unlock(&ghes_nmi_lock);
+ return ret;
+}
+
+static unsigned long ghes_esource_prealloc_size(
+ const struct acpi_hest_generic *generic)
+{
+ unsigned long block_length, prealloc_records, prealloc_size;
+
+ block_length = min_t(unsigned long, generic->error_block_length,
+ GHES_ESTATUS_MAX_SIZE);
+ prealloc_records = max_t(unsigned long,
+ generic->records_to_preallocate, 1);
+ prealloc_size = min_t(unsigned long, block_length * prealloc_records,
+ GHES_ESOURCE_PREALLOC_MAX_SIZE);
+
+ return prealloc_size;
+}
+
+static void ghes_estatus_pool_shrink(unsigned long len)
+{
+ ghes_estatus_pool_size_request -= PAGE_ALIGN(len);
+}
+
+static void ghes_nmi_add(struct ghes *ghes)
+{
+ unsigned long len;
+
+ len = ghes_esource_prealloc_size(ghes->generic);
+ ghes_estatus_pool_expand(len);
+ mutex_lock(&ghes_list_mutex);
+ if (list_empty(&ghes_nmi))
+ register_nmi_handler(NMI_LOCAL, ghes_notify_nmi, 0, "ghes");
+ list_add_rcu(&ghes->list, &ghes_nmi);
+ mutex_unlock(&ghes_list_mutex);
+}
+
+static void ghes_nmi_remove(struct ghes *ghes)
+{
+ unsigned long len;
+
+ mutex_lock(&ghes_list_mutex);
+ list_del_rcu(&ghes->list);
+ if (list_empty(&ghes_nmi))
+ unregister_nmi_handler(NMI_LOCAL, "ghes");
+ mutex_unlock(&ghes_list_mutex);
+ /*
+ * To synchronize with NMI handler, ghes can only be
+ * freed after NMI handler finishes.
+ */
+ synchronize_rcu();
+ len = ghes_esource_prealloc_size(ghes->generic);
+ ghes_estatus_pool_shrink(len);
+}
+
+static void ghes_nmi_init_cxt(void)
+{
+ init_irq_work(&ghes_proc_irq_work, ghes_proc_in_irq);
+}
+#else /* CONFIG_HAVE_ACPI_APEI_NMI */
+static inline void ghes_nmi_add(struct ghes *ghes)
+{
+ pr_err(GHES_PFX "ID: %d, trying to add NMI notification which is not supported!\n",
+ ghes->generic->header.source_id);
+ BUG();
+}
+
+static inline void ghes_nmi_remove(struct ghes *ghes)
+{
+ pr_err(GHES_PFX "ID: %d, trying to remove NMI notification which is not supported!\n",
+ ghes->generic->header.source_id);
+ BUG();
+}
+
+static inline void ghes_nmi_init_cxt(void)
+{
+}
+#endif /* CONFIG_HAVE_ACPI_APEI_NMI */
+
+static int ghes_probe(struct platform_device *ghes_dev)
+{
+ struct acpi_hest_generic *generic;
+ struct ghes *ghes = NULL;
+
+ int rc = -EINVAL;
+
+ generic = *(struct acpi_hest_generic **)ghes_dev->dev.platform_data;
+ if (!generic->enabled)
+ return -ENODEV;
+
+ switch (generic->notify.type) {
+ case ACPI_HEST_NOTIFY_POLLED:
+ case ACPI_HEST_NOTIFY_EXTERNAL:
+ case ACPI_HEST_NOTIFY_SCI:
+ break;
+ case ACPI_HEST_NOTIFY_NMI:
+ if (!IS_ENABLED(CONFIG_HAVE_ACPI_APEI_NMI)) {
+ pr_warn(GHES_PFX "Generic hardware error source: %d notified via NMI interrupt is not supported!\n",
+ generic->header.source_id);
+ goto err;
+ }
+ break;
+ case ACPI_HEST_NOTIFY_LOCAL:
+ pr_warning(GHES_PFX "Generic hardware error source: %d notified via local interrupt is not supported!\n",
+ generic->header.source_id);
+ goto err;
+ default:
+ pr_warning(FW_WARN GHES_PFX "Unknown notification type: %u for generic hardware error source: %d\n",
+ generic->notify.type, generic->header.source_id);
+ goto err;
+ }
+
+ rc = -EIO;
+ if (generic->error_block_length <
+ sizeof(struct acpi_hest_generic_status)) {
+ pr_warning(FW_BUG GHES_PFX "Invalid error block length: %u for generic hardware error source: %d\n",
+ generic->error_block_length,
+ generic->header.source_id);
+ goto err;
+ }
+ ghes = ghes_new(generic);
+ if (IS_ERR(ghes)) {
+ rc = PTR_ERR(ghes);
+ ghes = NULL;
+ goto err;
+ }
+
+ rc = ghes_edac_register(ghes, &ghes_dev->dev);
+ if (rc < 0)
+ goto err;
+
+ switch (generic->notify.type) {
+ case ACPI_HEST_NOTIFY_POLLED:
+ ghes->timer.function = ghes_poll_func;
+ ghes->timer.data = (unsigned long)ghes;
+ init_timer_deferrable(&ghes->timer);
+ ghes_add_timer(ghes);
+ break;
+ case ACPI_HEST_NOTIFY_EXTERNAL:
+ /* External interrupt vector is GSI */
+ rc = acpi_gsi_to_irq(generic->notify.vector, &ghes->irq);
+ if (rc) {
+ pr_err(GHES_PFX "Failed to map GSI to IRQ for generic hardware error source: %d\n",
+ generic->header.source_id);
+ goto err_edac_unreg;
+ }
+ rc = request_irq(ghes->irq, ghes_irq_func, 0, "GHES IRQ", ghes);
+ if (rc) {
+ pr_err(GHES_PFX "Failed to register IRQ for generic hardware error source: %d\n",
+ generic->header.source_id);
+ goto err_edac_unreg;
+ }
+ break;
+ case ACPI_HEST_NOTIFY_SCI:
+ mutex_lock(&ghes_list_mutex);
+ if (list_empty(&ghes_sci))
+ register_acpi_hed_notifier(&ghes_notifier_sci);
+ list_add_rcu(&ghes->list, &ghes_sci);
+ mutex_unlock(&ghes_list_mutex);
+ break;
+ case ACPI_HEST_NOTIFY_NMI:
+ ghes_nmi_add(ghes);
+ break;
+ default:
+ BUG();
+ }
+ platform_set_drvdata(ghes_dev, ghes);
+
+ return 0;
+err_edac_unreg:
+ ghes_edac_unregister(ghes);
+err:
+ if (ghes) {
+ ghes_fini(ghes);
+ kfree(ghes);
+ }
+ return rc;
+}
+
+static int ghes_remove(struct platform_device *ghes_dev)
+{
+ struct ghes *ghes;
+ struct acpi_hest_generic *generic;
+
+ ghes = platform_get_drvdata(ghes_dev);
+ generic = ghes->generic;
+
+ ghes->flags |= GHES_EXITING;
+ switch (generic->notify.type) {
+ case ACPI_HEST_NOTIFY_POLLED:
+ del_timer_sync(&ghes->timer);
+ break;
+ case ACPI_HEST_NOTIFY_EXTERNAL:
+ free_irq(ghes->irq, ghes);
+ break;
+ case ACPI_HEST_NOTIFY_SCI:
+ mutex_lock(&ghes_list_mutex);
+ list_del_rcu(&ghes->list);
+ if (list_empty(&ghes_sci))
+ unregister_acpi_hed_notifier(&ghes_notifier_sci);
+ mutex_unlock(&ghes_list_mutex);
+ break;
+ case ACPI_HEST_NOTIFY_NMI:
+ ghes_nmi_remove(ghes);
+ break;
+ default:
+ BUG();
+ break;
+ }
+
+ ghes_fini(ghes);
+
+ ghes_edac_unregister(ghes);
+
+ kfree(ghes);
+
+ platform_set_drvdata(ghes_dev, NULL);
+
+ return 0;
+}
+
+static struct platform_driver ghes_platform_driver = {
+ .driver = {
+ .name = "GHES",
+ },
+ .probe = ghes_probe,
+ .remove = ghes_remove,
+};
+
+static int __init ghes_init(void)
+{
+ int rc;
+
+ if (acpi_disabled)
+ return -ENODEV;
+
+ if (hest_disable) {
+ pr_info(GHES_PFX "HEST is not enabled!\n");
+ return -EINVAL;
+ }
+
+ if (ghes_disable) {
+ pr_info(GHES_PFX "GHES is not enabled!\n");
+ return -EINVAL;
+ }
+
+ ghes_nmi_init_cxt();
+
+ rc = ghes_ioremap_init();
+ if (rc)
+ goto err;
+
+ rc = ghes_estatus_pool_init();
+ if (rc)
+ goto err_ioremap_exit;
+
+ rc = ghes_estatus_pool_expand(GHES_ESTATUS_CACHE_AVG_SIZE *
+ GHES_ESTATUS_CACHE_ALLOCED_MAX);
+ if (rc)
+ goto err_pool_exit;
+
+ rc = platform_driver_register(&ghes_platform_driver);
+ if (rc)
+ goto err_pool_exit;
+
+ rc = apei_osc_setup();
+ if (rc == 0 && osc_sb_apei_support_acked)
+ pr_info(GHES_PFX "APEI firmware first mode is enabled by APEI bit and WHEA _OSC.\n");
+ else if (rc == 0 && !osc_sb_apei_support_acked)
+ pr_info(GHES_PFX "APEI firmware first mode is enabled by WHEA _OSC.\n");
+ else if (rc && osc_sb_apei_support_acked)
+ pr_info(GHES_PFX "APEI firmware first mode is enabled by APEI bit.\n");
+ else
+ pr_info(GHES_PFX "Failed to enable APEI firmware first mode.\n");
+
+ return 0;
+err_pool_exit:
+ ghes_estatus_pool_exit();
+err_ioremap_exit:
+ ghes_ioremap_exit();
+err:
+ return rc;
+}
+
+static void __exit ghes_exit(void)
+{
+ platform_driver_unregister(&ghes_platform_driver);
+ ghes_estatus_pool_exit();
+ ghes_ioremap_exit();
+}
+
+module_init(ghes_init);
+module_exit(ghes_exit);
+
+MODULE_AUTHOR("Huang Ying");
+MODULE_DESCRIPTION("APEI Generic Hardware Error Source support");
+MODULE_LICENSE("GPL");
+MODULE_ALIAS("platform:GHES");
diff --git a/drivers/acpi/apei/hest.c b/drivers/acpi/apei/hest.c
new file mode 100644
index 000000000..06e9b411a
--- /dev/null
+++ b/drivers/acpi/apei/hest.c
@@ -0,0 +1,255 @@
+/*
+ * APEI Hardware Error Souce Table support
+ *
+ * HEST describes error sources in detail; communicates operational
+ * parameters (i.e. severity levels, masking bits, and threshold
+ * values) to Linux as necessary. It also allows the BIOS to report
+ * non-standard error sources to Linux (for example, chipset-specific
+ * error registers).
+ *
+ * For more information about HEST, please refer to ACPI Specification
+ * version 4.0, section 17.3.2.
+ *
+ * Copyright 2009 Intel Corp.
+ * Author: Huang Ying <ying.huang@intel.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License version
+ * 2 as published by the Free Software Foundation;
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/acpi.h>
+#include <linux/kdebug.h>
+#include <linux/highmem.h>
+#include <linux/io.h>
+#include <linux/platform_device.h>
+#include <acpi/apei.h>
+
+#include "apei-internal.h"
+
+#define HEST_PFX "HEST: "
+
+bool hest_disable;
+EXPORT_SYMBOL_GPL(hest_disable);
+
+/* HEST table parsing */
+
+static struct acpi_table_hest *__read_mostly hest_tab;
+
+static const int hest_esrc_len_tab[ACPI_HEST_TYPE_RESERVED] = {
+ [ACPI_HEST_TYPE_IA32_CHECK] = -1, /* need further calculation */
+ [ACPI_HEST_TYPE_IA32_CORRECTED_CHECK] = -1,
+ [ACPI_HEST_TYPE_IA32_NMI] = sizeof(struct acpi_hest_ia_nmi),
+ [ACPI_HEST_TYPE_AER_ROOT_PORT] = sizeof(struct acpi_hest_aer_root),
+ [ACPI_HEST_TYPE_AER_ENDPOINT] = sizeof(struct acpi_hest_aer),
+ [ACPI_HEST_TYPE_AER_BRIDGE] = sizeof(struct acpi_hest_aer_bridge),
+ [ACPI_HEST_TYPE_GENERIC_ERROR] = sizeof(struct acpi_hest_generic),
+};
+
+static int hest_esrc_len(struct acpi_hest_header *hest_hdr)
+{
+ u16 hest_type = hest_hdr->type;
+ int len;
+
+ if (hest_type >= ACPI_HEST_TYPE_RESERVED)
+ return 0;
+
+ len = hest_esrc_len_tab[hest_type];
+
+ if (hest_type == ACPI_HEST_TYPE_IA32_CORRECTED_CHECK) {
+ struct acpi_hest_ia_corrected *cmc;
+ cmc = (struct acpi_hest_ia_corrected *)hest_hdr;
+ len = sizeof(*cmc) + cmc->num_hardware_banks *
+ sizeof(struct acpi_hest_ia_error_bank);
+ } else if (hest_type == ACPI_HEST_TYPE_IA32_CHECK) {
+ struct acpi_hest_ia_machine_check *mc;
+ mc = (struct acpi_hest_ia_machine_check *)hest_hdr;
+ len = sizeof(*mc) + mc->num_hardware_banks *
+ sizeof(struct acpi_hest_ia_error_bank);
+ }
+ BUG_ON(len == -1);
+
+ return len;
+};
+
+int apei_hest_parse(apei_hest_func_t func, void *data)
+{
+ struct acpi_hest_header *hest_hdr;
+ int i, rc, len;
+
+ if (hest_disable || !hest_tab)
+ return -EINVAL;
+
+ hest_hdr = (struct acpi_hest_header *)(hest_tab + 1);
+ for (i = 0; i < hest_tab->error_source_count; i++) {
+ len = hest_esrc_len(hest_hdr);
+ if (!len) {
+ pr_warning(FW_WARN HEST_PFX
+ "Unknown or unused hardware error source "
+ "type: %d for hardware error source: %d.\n",
+ hest_hdr->type, hest_hdr->source_id);
+ return -EINVAL;
+ }
+ if ((void *)hest_hdr + len >
+ (void *)hest_tab + hest_tab->header.length) {
+ pr_warning(FW_BUG HEST_PFX
+ "Table contents overflow for hardware error source: %d.\n",
+ hest_hdr->source_id);
+ return -EINVAL;
+ }
+
+ rc = func(hest_hdr, data);
+ if (rc)
+ return rc;
+
+ hest_hdr = (void *)hest_hdr + len;
+ }
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(apei_hest_parse);
+
+/*
+ * Check if firmware advertises firmware first mode. We need FF bit to be set
+ * along with a set of MC banks which work in FF mode.
+ */
+static int __init hest_parse_cmc(struct acpi_hest_header *hest_hdr, void *data)
+{
+ return arch_apei_enable_cmcff(hest_hdr, data);
+}
+
+struct ghes_arr {
+ struct platform_device **ghes_devs;
+ unsigned int count;
+};
+
+static int __init hest_parse_ghes_count(struct acpi_hest_header *hest_hdr, void *data)
+{
+ int *count = data;
+
+ if (hest_hdr->type == ACPI_HEST_TYPE_GENERIC_ERROR)
+ (*count)++;
+ return 0;
+}
+
+static int __init hest_parse_ghes(struct acpi_hest_header *hest_hdr, void *data)
+{
+ struct platform_device *ghes_dev;
+ struct ghes_arr *ghes_arr = data;
+ int rc, i;
+
+ if (hest_hdr->type != ACPI_HEST_TYPE_GENERIC_ERROR)
+ return 0;
+
+ if (!((struct acpi_hest_generic *)hest_hdr)->enabled)
+ return 0;
+ for (i = 0; i < ghes_arr->count; i++) {
+ struct acpi_hest_header *hdr;
+ ghes_dev = ghes_arr->ghes_devs[i];
+ hdr = *(struct acpi_hest_header **)ghes_dev->dev.platform_data;
+ if (hdr->source_id == hest_hdr->source_id) {
+ pr_warning(FW_WARN HEST_PFX "Duplicated hardware error source ID: %d.\n",
+ hdr->source_id);
+ return -EIO;
+ }
+ }
+ ghes_dev = platform_device_alloc("GHES", hest_hdr->source_id);
+ if (!ghes_dev)
+ return -ENOMEM;
+
+ rc = platform_device_add_data(ghes_dev, &hest_hdr, sizeof(void *));
+ if (rc)
+ goto err;
+
+ rc = platform_device_add(ghes_dev);
+ if (rc)
+ goto err;
+ ghes_arr->ghes_devs[ghes_arr->count++] = ghes_dev;
+
+ return 0;
+err:
+ platform_device_put(ghes_dev);
+ return rc;
+}
+
+static int __init hest_ghes_dev_register(unsigned int ghes_count)
+{
+ int rc, i;
+ struct ghes_arr ghes_arr;
+
+ ghes_arr.count = 0;
+ ghes_arr.ghes_devs = kmalloc(sizeof(void *) * ghes_count, GFP_KERNEL);
+ if (!ghes_arr.ghes_devs)
+ return -ENOMEM;
+
+ rc = apei_hest_parse(hest_parse_ghes, &ghes_arr);
+ if (rc)
+ goto err;
+out:
+ kfree(ghes_arr.ghes_devs);
+ return rc;
+err:
+ for (i = 0; i < ghes_arr.count; i++)
+ platform_device_unregister(ghes_arr.ghes_devs[i]);
+ goto out;
+}
+
+static int __init setup_hest_disable(char *str)
+{
+ hest_disable = 1;
+ return 0;
+}
+
+__setup("hest_disable", setup_hest_disable);
+
+void __init acpi_hest_init(void)
+{
+ acpi_status status;
+ int rc = -ENODEV;
+ unsigned int ghes_count = 0;
+
+ if (hest_disable) {
+ pr_info(HEST_PFX "Table parsing disabled.\n");
+ return;
+ }
+
+ status = acpi_get_table(ACPI_SIG_HEST, 0,
+ (struct acpi_table_header **)&hest_tab);
+ if (status == AE_NOT_FOUND)
+ goto err;
+ else if (ACPI_FAILURE(status)) {
+ const char *msg = acpi_format_exception(status);
+ pr_err(HEST_PFX "Failed to get table, %s\n", msg);
+ rc = -EINVAL;
+ goto err;
+ }
+
+ if (!acpi_disable_cmcff)
+ apei_hest_parse(hest_parse_cmc, NULL);
+
+ if (!ghes_disable) {
+ rc = apei_hest_parse(hest_parse_ghes_count, &ghes_count);
+ if (rc)
+ goto err;
+ rc = hest_ghes_dev_register(ghes_count);
+ if (rc)
+ goto err;
+ }
+
+ pr_info(HEST_PFX "Table parsing has been initialized.\n");
+ return;
+err:
+ hest_disable = 1;
+}