summaryrefslogtreecommitdiff
path: root/drivers
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2014-01-20 12:10:27 -0800
committerLinus Torvalds <torvalds@linux-foundation.org>2014-01-20 12:10:27 -0800
commitfab5669d556200c4dd119af705bff14085845d1e (patch)
tree366d05fc858fc599177d687f37b227a4832c2f48 /drivers
parent74e8ee8262c3f93bbc41804037b43f07b95897bb (diff)
parentb769e014f3ae4af8a56c6327077b3c40410dedad (diff)
Merge branch 'x86-ras-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull x86 RAS changes from Ingo Molnar: - SCI reporting for other error types not only correctable ones - GHES cleanups - Add the functionality to override error reporting agents as some machines are sporting a new extended error logging capability which, if done properly in the BIOS, makes a corresponding EDAC module redundant - PCIe AER tracepoint severity levels fix - Error path correction for the mce device init - MCE timer fix - Add more flexibility to the error injection (EINJ) debugfs interface * 'x86-ras-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: x86, mce: Fix mce_start_timer semantics ACPI, APEI, GHES: Cleanup ghes memory error handling ACPI, APEI: Cleanup alignment-aware accesses ACPI, APEI, GHES: Do not report only correctable errors with SCI ACPI, APEI, EINJ: Changes to the ACPI/APEI/EINJ debugfs interface ACPI, eMCA: Combine eMCA/EDAC event reporting priority EDAC, sb_edac: Modify H/W event reporting policy EDAC: Add an edac_report parameter to EDAC PCI, AER: Fix severity usage in aer trace event x86, mce: Call put_device on device_register failure
Diffstat (limited to 'drivers')
-rw-r--r--drivers/acpi/acpi_extlog.c18
-rw-r--r--drivers/acpi/apei/apei-base.c4
-rw-r--r--drivers/acpi/apei/einj.c58
-rw-r--r--drivers/acpi/apei/erst.c2
-rw-r--r--drivers/acpi/apei/ghes.c39
-rw-r--r--drivers/edac/edac_stub.c19
-rw-r--r--drivers/edac/sb_edac.c6
7 files changed, 107 insertions, 39 deletions
diff --git a/drivers/acpi/acpi_extlog.c b/drivers/acpi/acpi_extlog.c
index a6869e110ce5..5d33c5415405 100644
--- a/drivers/acpi/acpi_extlog.c
+++ b/drivers/acpi/acpi_extlog.c
@@ -12,6 +12,7 @@
#include <acpi/acpi_bus.h>
#include <linux/cper.h>
#include <linux/ratelimit.h>
+#include <linux/edac.h>
#include <asm/cpu.h>
#include <asm/mce.h>
@@ -43,6 +44,8 @@ struct extlog_l1_head {
u8 rev1[12];
};
+static int old_edac_report_status;
+
static u8 extlog_dsm_uuid[] = "663E35AF-CC10-41A4-88EA-5470AF055295";
/* L1 table related physical address */
@@ -150,7 +153,7 @@ static int extlog_print(struct notifier_block *nb, unsigned long val,
rc = print_extlog_rcd(NULL, (struct acpi_generic_status *)elog_buf, cpu);
- return NOTIFY_DONE;
+ return NOTIFY_STOP;
}
static int extlog_get_dsm(acpi_handle handle, int rev, int func, u64 *ret)
@@ -231,8 +234,12 @@ static int __init extlog_init(void)
u64 cap;
int rc;
- rc = -ENODEV;
+ if (get_edac_report_status() == EDAC_REPORTING_FORCE) {
+ pr_warn("Not loading eMCA, error reporting force-enabled through EDAC.\n");
+ return -EPERM;
+ }
+ rc = -ENODEV;
rdmsrl(MSR_IA32_MCG_CAP, cap);
if (!(cap & MCG_ELOG_P))
return rc;
@@ -287,6 +294,12 @@ static int __init extlog_init(void)
if (elog_buf == NULL)
goto err_release_elog;
+ /*
+ * eMCA event report method has higher priority than EDAC method,
+ * unless EDAC event report method is mandatory.
+ */
+ old_edac_report_status = get_edac_report_status();
+ set_edac_report_status(EDAC_REPORTING_DISABLED);
mce_register_decode_chain(&extlog_mce_dec);
/* enable OS to be involved to take over management from BIOS */
((struct extlog_l1_head *)extlog_l1_addr)->flags |= FLAG_OS_OPTIN;
@@ -308,6 +321,7 @@ err:
static void __exit extlog_exit(void)
{
+ set_edac_report_status(old_edac_report_status);
mce_unregister_decode_chain(&extlog_mce_dec);
((struct extlog_l1_head *)extlog_l1_addr)->flags &= ~FLAG_OS_OPTIN;
if (extlog_l1_addr)
diff --git a/drivers/acpi/apei/apei-base.c b/drivers/acpi/apei/apei-base.c
index 6d2c49b86b7f..e55584a072c6 100644
--- a/drivers/acpi/apei/apei-base.c
+++ b/drivers/acpi/apei/apei-base.c
@@ -41,6 +41,7 @@
#include <linux/rculist.h>
#include <linux/interrupt.h>
#include <linux/debugfs.h>
+#include <asm/unaligned.h>
#include "apei-internal.h"
@@ -567,8 +568,7 @@ static int apei_check_gar(struct acpi_generic_address *reg, u64 *paddr,
bit_offset = reg->bit_offset;
access_size_code = reg->access_width;
space_id = reg->space_id;
- /* Handle possible alignment issues */
- memcpy(paddr, &reg->address, sizeof(*paddr));
+ *paddr = get_unaligned(&reg->address);
if (!*paddr) {
pr_warning(FW_BUG APEI_PFX
"Invalid physical address in GAR [0x%llx/%u/%u/%u/%u]\n",
diff --git a/drivers/acpi/apei/einj.c b/drivers/acpi/apei/einj.c
index fb57d03e698b..7dcc8a824aae 100644
--- a/drivers/acpi/apei/einj.c
+++ b/drivers/acpi/apei/einj.c
@@ -34,6 +34,7 @@
#include <linux/delay.h>
#include <linux/mm.h>
#include <acpi/acpi.h>
+#include <asm/unaligned.h>
#include "apei-internal.h"
@@ -216,7 +217,7 @@ static void check_vendor_extension(u64 paddr,
static void *einj_get_parameter_address(void)
{
int i;
- u64 paddrv4 = 0, paddrv5 = 0;
+ u64 pa_v4 = 0, pa_v5 = 0;
struct acpi_whea_header *entry;
entry = EINJ_TAB_ENTRY(einj_tab);
@@ -225,30 +226,28 @@ static void *einj_get_parameter_address(void)
entry->instruction == ACPI_EINJ_WRITE_REGISTER &&
entry->register_region.space_id ==
ACPI_ADR_SPACE_SYSTEM_MEMORY)
- memcpy(&paddrv4, &entry->register_region.address,
- sizeof(paddrv4));
+ pa_v4 = get_unaligned(&entry->register_region.address);
if (entry->action == ACPI_EINJ_SET_ERROR_TYPE_WITH_ADDRESS &&
entry->instruction == ACPI_EINJ_WRITE_REGISTER &&
entry->register_region.space_id ==
ACPI_ADR_SPACE_SYSTEM_MEMORY)
- memcpy(&paddrv5, &entry->register_region.address,
- sizeof(paddrv5));
+ pa_v5 = get_unaligned(&entry->register_region.address);
entry++;
}
- if (paddrv5) {
+ if (pa_v5) {
struct set_error_type_with_address *v5param;
- v5param = acpi_os_map_memory(paddrv5, sizeof(*v5param));
+ v5param = acpi_os_map_memory(pa_v5, sizeof(*v5param));
if (v5param) {
acpi5 = 1;
- check_vendor_extension(paddrv5, v5param);
+ check_vendor_extension(pa_v5, v5param);
return v5param;
}
}
- if (param_extension && paddrv4) {
+ if (param_extension && pa_v4) {
struct einj_parameter *v4param;
- v4param = acpi_os_map_memory(paddrv4, sizeof(*v4param));
+ v4param = acpi_os_map_memory(pa_v4, sizeof(*v4param));
if (!v4param)
return NULL;
if (v4param->reserved1 || v4param->reserved2) {
@@ -416,7 +415,8 @@ out:
return rc;
}
-static int __einj_error_inject(u32 type, u64 param1, u64 param2)
+static int __einj_error_inject(u32 type, u32 flags, u64 param1, u64 param2,
+ u64 param3, u64 param4)
{
struct apei_exec_context ctx;
u64 val, trigger_paddr, timeout = FIRMWARE_TIMEOUT;
@@ -446,6 +446,12 @@ static int __einj_error_inject(u32 type, u64 param1, u64 param2)
break;
}
v5param->flags = vendor_flags;
+ } else if (flags) {
+ v5param->flags = flags;
+ v5param->memory_address = param1;
+ v5param->memory_address_range = param2;
+ v5param->apicid = param3;
+ v5param->pcie_sbdf = param4;
} else {
switch (type) {
case ACPI_EINJ_PROCESSOR_CORRECTABLE:
@@ -514,11 +520,17 @@ static int __einj_error_inject(u32 type, u64 param1, u64 param2)
}
/* Inject the specified hardware error */
-static int einj_error_inject(u32 type, u64 param1, u64 param2)
+static int einj_error_inject(u32 type, u32 flags, u64 param1, u64 param2,
+ u64 param3, u64 param4)
{
int rc;
unsigned long pfn;
+ /* If user manually set "flags", make sure it is legal */
+ if (flags && (flags &
+ ~(SETWA_FLAGS_APICID|SETWA_FLAGS_MEM|SETWA_FLAGS_PCIE_SBDF)))
+ return -EINVAL;
+
/*
* We need extra sanity checks for memory errors.
* Other types leap directly to injection.
@@ -532,7 +544,7 @@ static int einj_error_inject(u32 type, u64 param1, u64 param2)
if (type & ACPI5_VENDOR_BIT) {
if (vendor_flags != SETWA_FLAGS_MEM)
goto inject;
- } else if (!(type & MEM_ERROR_MASK))
+ } else if (!(type & MEM_ERROR_MASK) && !(flags & SETWA_FLAGS_MEM))
goto inject;
/*
@@ -546,15 +558,18 @@ static int einj_error_inject(u32 type, u64 param1, u64 param2)
inject:
mutex_lock(&einj_mutex);
- rc = __einj_error_inject(type, param1, param2);
+ rc = __einj_error_inject(type, flags, param1, param2, param3, param4);
mutex_unlock(&einj_mutex);
return rc;
}
static u32 error_type;
+static u32 error_flags;
static u64 error_param1;
static u64 error_param2;
+static u64 error_param3;
+static u64 error_param4;
static struct dentry *einj_debug_dir;
static int available_error_type_show(struct seq_file *m, void *v)
@@ -648,7 +663,8 @@ static int error_inject_set(void *data, u64 val)
if (!error_type)
return -EINVAL;
- return einj_error_inject(error_type, error_param1, error_param2);
+ return einj_error_inject(error_type, error_flags, error_param1, error_param2,
+ error_param3, error_param4);
}
DEFINE_SIMPLE_ATTRIBUTE(error_inject_fops, NULL,
@@ -729,6 +745,10 @@ static int __init einj_init(void)
rc = -ENOMEM;
einj_param = einj_get_parameter_address();
if ((param_extension || acpi5) && einj_param) {
+ fentry = debugfs_create_x32("flags", S_IRUSR | S_IWUSR,
+ einj_debug_dir, &error_flags);
+ if (!fentry)
+ goto err_unmap;
fentry = debugfs_create_x64("param1", S_IRUSR | S_IWUSR,
einj_debug_dir, &error_param1);
if (!fentry)
@@ -737,6 +757,14 @@ static int __init einj_init(void)
einj_debug_dir, &error_param2);
if (!fentry)
goto err_unmap;
+ fentry = debugfs_create_x64("param3", S_IRUSR | S_IWUSR,
+ einj_debug_dir, &error_param3);
+ if (!fentry)
+ goto err_unmap;
+ fentry = debugfs_create_x64("param4", S_IRUSR | S_IWUSR,
+ einj_debug_dir, &error_param4);
+ if (!fentry)
+ goto err_unmap;
fentry = debugfs_create_x32("notrigger", S_IRUSR | S_IWUSR,
einj_debug_dir, &notrigger);
diff --git a/drivers/acpi/apei/erst.c b/drivers/acpi/apei/erst.c
index cb1d557fc22c..ed65e9c4b5b0 100644
--- a/drivers/acpi/apei/erst.c
+++ b/drivers/acpi/apei/erst.c
@@ -611,7 +611,7 @@ static void __erst_record_id_cache_compact(void)
if (entries[i] == APEI_ERST_INVALID_RECORD_ID)
continue;
if (wpos != i)
- memcpy(&entries[wpos], &entries[i], sizeof(entries[i]));
+ entries[wpos] = entries[i];
wpos++;
}
erst_record_id_cache.len = wpos;
diff --git a/drivers/acpi/apei/ghes.c b/drivers/acpi/apei/ghes.c
index a30bc313787b..46766ef7ef5d 100644
--- a/drivers/acpi/apei/ghes.c
+++ b/drivers/acpi/apei/ghes.c
@@ -413,27 +413,31 @@ static void ghes_handle_memory_failure(struct acpi_generic_data *gdata, int sev)
{
#ifdef CONFIG_ACPI_APEI_MEMORY_FAILURE
unsigned long pfn;
+ int flags = -1;
int sec_sev = ghes_severity(gdata->error_severity);
struct cper_sec_mem_err *mem_err;
mem_err = (struct cper_sec_mem_err *)(gdata + 1);
- if (sec_sev == GHES_SEV_CORRECTED &&
- (gdata->flags & CPER_SEC_ERROR_THRESHOLD_EXCEEDED) &&
- (mem_err->validation_bits & CPER_MEM_VALID_PA)) {
- pfn = mem_err->physical_addr >> PAGE_SHIFT;
- if (pfn_valid(pfn))
- memory_failure_queue(pfn, 0, MF_SOFT_OFFLINE);
- else if (printk_ratelimit())
- pr_warn(FW_WARN GHES_PFX
- "Invalid address in generic error data: %#llx\n",
- mem_err->physical_addr);
- }
- if (sev == GHES_SEV_RECOVERABLE &&
- sec_sev == GHES_SEV_RECOVERABLE &&
- mem_err->validation_bits & CPER_MEM_VALID_PA) {
- pfn = mem_err->physical_addr >> PAGE_SHIFT;
- memory_failure_queue(pfn, 0, 0);
+ if (!(mem_err->validation_bits & CPER_MEM_VALID_PA))
+ return;
+
+ pfn = mem_err->physical_addr >> PAGE_SHIFT;
+ if (!pfn_valid(pfn)) {
+ pr_warn_ratelimited(FW_WARN GHES_PFX
+ "Invalid address in generic error data: %#llx\n",
+ mem_err->physical_addr);
+ return;
}
+
+ /* iff following two events can be handled properly by now */
+ if (sec_sev == GHES_SEV_CORRECTED &&
+ (gdata->flags & CPER_SEC_ERROR_THRESHOLD_EXCEEDED))
+ flags = MF_SOFT_OFFLINE;
+ if (sev == GHES_SEV_RECOVERABLE && sec_sev == GHES_SEV_RECOVERABLE)
+ flags = 0;
+
+ if (flags != -1)
+ memory_failure_queue(pfn, 0, flags);
#endif
}
@@ -453,8 +457,7 @@ static void ghes_do_proc(struct ghes *ghes,
ghes_edac_report_mem_error(ghes, sev, mem_err);
#ifdef CONFIG_X86_MCE
- apei_mce_report_mem_error(sev == GHES_SEV_CORRECTED,
- mem_err);
+ apei_mce_report_mem_error(sev, mem_err);
#endif
ghes_handle_memory_failure(gdata, sev);
}
diff --git a/drivers/edac/edac_stub.c b/drivers/edac/edac_stub.c
index 351945fa2ecd..9d9e18aefaaa 100644
--- a/drivers/edac/edac_stub.c
+++ b/drivers/edac/edac_stub.c
@@ -29,6 +29,25 @@ EXPORT_SYMBOL_GPL(edac_err_assert);
static atomic_t edac_subsys_valid = ATOMIC_INIT(0);
+int edac_report_status = EDAC_REPORTING_ENABLED;
+EXPORT_SYMBOL_GPL(edac_report_status);
+
+static int __init edac_report_setup(char *str)
+{
+ if (!str)
+ return -EINVAL;
+
+ if (!strncmp(str, "on", 2))
+ set_edac_report_status(EDAC_REPORTING_ENABLED);
+ else if (!strncmp(str, "off", 3))
+ set_edac_report_status(EDAC_REPORTING_DISABLED);
+ else if (!strncmp(str, "force", 5))
+ set_edac_report_status(EDAC_REPORTING_FORCE);
+
+ return 0;
+}
+__setup("edac_report=", edac_report_setup);
+
/*
* called to determine if there is an EDAC driver interested in
* knowing an event (such as NMI) occurred
diff --git a/drivers/edac/sb_edac.c b/drivers/edac/sb_edac.c
index de988c8da1c8..54e2abe671f7 100644
--- a/drivers/edac/sb_edac.c
+++ b/drivers/edac/sb_edac.c
@@ -1829,6 +1829,9 @@ static int sbridge_mce_check_error(struct notifier_block *nb, unsigned long val,
struct mem_ctl_info *mci;
struct sbridge_pvt *pvt;
+ if (get_edac_report_status() == EDAC_REPORTING_DISABLED)
+ return NOTIFY_DONE;
+
mci = get_mci_for_node_id(mce->socketid);
if (!mci)
return NOTIFY_BAD;
@@ -2142,9 +2145,10 @@ static int __init sbridge_init(void)
opstate_init();
pci_rc = pci_register_driver(&sbridge_driver);
-
if (pci_rc >= 0) {
mce_register_decode_chain(&sbridge_mce_dec);
+ if (get_edac_report_status() == EDAC_REPORTING_DISABLED)
+ sbridge_printk(KERN_WARNING, "Loading driver, error reporting disabled.\n");
return 0;
}