summaryrefslogtreecommitdiff
path: root/drivers
diff options
context:
space:
mode:
authorChen, Gong <gong.chen@linux.intel.com>2014-06-17 22:33:07 -0400
committerTony Luck <tony.luck@intel.com>2014-06-25 13:26:47 -0700
commit2dfb7d51a61d7ca91b131c8db612f27d9390f2d5 (patch)
treeb2e9375f1ffaf2dc93418d78c27b6a13b34c8e88 /drivers
parentd963cd95bea93b7db9390a71d1e2cabbb3b2c3ea (diff)
trace, RAS: Add eMCA trace event interface
Add trace interface to elaborate all H/W error related information. Signed-off-by: Chen, Gong <gong.chen@linux.intel.com> Acked-by: Borislav Petkov <bp@suse.de> Signed-off-by: Tony Luck <tony.luck@intel.com>
Diffstat (limited to 'drivers')
-rw-r--r--drivers/acpi/Kconfig4
-rw-r--r--drivers/acpi/acpi_extlog.c27
-rw-r--r--drivers/firmware/efi/cper.c45
-rw-r--r--drivers/ras/ras.c3
4 files changed, 71 insertions, 8 deletions
diff --git a/drivers/acpi/Kconfig b/drivers/acpi/Kconfig
index a34a22841002..206942b8d105 100644
--- a/drivers/acpi/Kconfig
+++ b/drivers/acpi/Kconfig
@@ -370,6 +370,7 @@ config ACPI_EXTLOG
tristate "Extended Error Log support"
depends on X86_MCE && X86_LOCAL_APIC
select UEFI_CPER
+ select RAS
default n
help
Certain usages such as Predictive Failure Analysis (PFA) require
@@ -384,6 +385,7 @@ config ACPI_EXTLOG
Enhanced MCA Logging allows firmware to provide additional error
information to system software, synchronous with MCE or CMCI. This
- driver adds support for that functionality.
+ driver adds support for that functionality with corresponding
+ tracepoint which carries that information to userspace.
endif # ACPI
diff --git a/drivers/acpi/acpi_extlog.c b/drivers/acpi/acpi_extlog.c
index 185334114d71..e61da957f30f 100644
--- a/drivers/acpi/acpi_extlog.c
+++ b/drivers/acpi/acpi_extlog.c
@@ -16,6 +16,7 @@
#include <asm/mce.h>
#include "apei/apei-internal.h"
+#include <ras/ras_event.h>
#define EXT_ELOG_ENTRY_MASK GENMASK_ULL(51, 0) /* elog entry address mask */
@@ -137,8 +138,12 @@ static int extlog_print(struct notifier_block *nb, unsigned long val,
struct mce *mce = (struct mce *)data;
int bank = mce->bank;
int cpu = mce->extcpu;
- struct acpi_generic_status *estatus;
- int rc;
+ struct acpi_generic_status *estatus, *tmp;
+ struct acpi_generic_data *gdata;
+ const uuid_le *fru_id = &NULL_UUID_LE;
+ char *fru_text = "";
+ uuid_le *sec_type;
+ static u32 err_seq;
estatus = extlog_elog_entry_check(cpu, bank);
if (estatus == NULL)
@@ -148,7 +153,23 @@ static int extlog_print(struct notifier_block *nb, unsigned long val,
/* clear record status to enable BIOS to update it again */
estatus->block_status = 0;
- rc = print_extlog_rcd(NULL, (struct acpi_generic_status *)elog_buf, cpu);
+ tmp = (struct acpi_generic_status *)elog_buf;
+ print_extlog_rcd(NULL, tmp, cpu);
+
+ /* log event via trace */
+ err_seq++;
+ gdata = (struct acpi_generic_data *)(tmp + 1);
+ if (gdata->validation_bits & CPER_SEC_VALID_FRU_ID)
+ fru_id = (uuid_le *)gdata->fru_id;
+ if (gdata->validation_bits & CPER_SEC_VALID_FRU_TEXT)
+ fru_text = gdata->fru_text;
+ sec_type = (uuid_le *)gdata->section_type;
+ if (!uuid_le_cmp(*sec_type, CPER_SEC_PLATFORM_MEM)) {
+ struct cper_sec_mem_err *mem = (void *)(gdata + 1);
+ if (gdata->error_data_length >= sizeof(*mem))
+ trace_extlog_mem_event(mem, err_seq, fru_id, fru_text,
+ (u8)gdata->error_severity);
+ }
return NOTIFY_STOP;
}
diff --git a/drivers/firmware/efi/cper.c b/drivers/firmware/efi/cper.c
index ac33a9fed341..437e6fd47311 100644
--- a/drivers/firmware/efi/cper.c
+++ b/drivers/firmware/efi/cper.c
@@ -207,7 +207,7 @@ const char *cper_mem_err_type_str(unsigned int etype)
}
EXPORT_SYMBOL_GPL(cper_mem_err_type_str);
-static int cper_mem_err_location(const struct cper_sec_mem_err *mem, char *msg)
+static int cper_mem_err_location(struct cper_mem_err_compact *mem, char *msg)
{
u32 len, n;
@@ -249,7 +249,7 @@ static int cper_mem_err_location(const struct cper_sec_mem_err *mem, char *msg)
return n;
}
-static int cper_dimm_err_location(const struct cper_sec_mem_err *mem, char *msg)
+static int cper_dimm_err_location(struct cper_mem_err_compact *mem, char *msg)
{
u32 len, n;
const char *bank = NULL, *device = NULL;
@@ -271,8 +271,44 @@ static int cper_dimm_err_location(const struct cper_sec_mem_err *mem, char *msg)
return n;
}
+void cper_mem_err_pack(const struct cper_sec_mem_err *mem,
+ struct cper_mem_err_compact *cmem)
+{
+ cmem->validation_bits = mem->validation_bits;
+ cmem->node = mem->node;
+ cmem->card = mem->card;
+ cmem->module = mem->module;
+ cmem->bank = mem->bank;
+ cmem->device = mem->device;
+ cmem->row = mem->row;
+ cmem->column = mem->column;
+ cmem->bit_pos = mem->bit_pos;
+ cmem->requestor_id = mem->requestor_id;
+ cmem->responder_id = mem->responder_id;
+ cmem->target_id = mem->target_id;
+ cmem->rank = mem->rank;
+ cmem->mem_array_handle = mem->mem_array_handle;
+ cmem->mem_dev_handle = mem->mem_dev_handle;
+}
+
+const char *cper_mem_err_unpack(struct trace_seq *p,
+ struct cper_mem_err_compact *cmem)
+{
+ const char *ret = p->buffer + p->len;
+
+ if (cper_mem_err_location(cmem, rcd_decode_str))
+ trace_seq_printf(p, "%s", rcd_decode_str);
+ if (cper_dimm_err_location(cmem, rcd_decode_str))
+ trace_seq_printf(p, "%s", rcd_decode_str);
+ trace_seq_putc(p, '\0');
+
+ return ret;
+}
+
static void cper_print_mem(const char *pfx, const struct cper_sec_mem_err *mem)
{
+ struct cper_mem_err_compact cmem;
+
if (mem->validation_bits & CPER_MEM_VALID_ERROR_STATUS)
printk("%s""error_status: 0x%016llx\n", pfx, mem->error_status);
if (mem->validation_bits & CPER_MEM_VALID_PA)
@@ -281,14 +317,15 @@ static void cper_print_mem(const char *pfx, const struct cper_sec_mem_err *mem)
if (mem->validation_bits & CPER_MEM_VALID_PA_MASK)
printk("%s""physical_address_mask: 0x%016llx\n",
pfx, mem->physical_addr_mask);
- if (cper_mem_err_location(mem, rcd_decode_str))
+ cper_mem_err_pack(mem, &cmem);
+ if (cper_mem_err_location(&cmem, rcd_decode_str))
printk("%s%s\n", pfx, rcd_decode_str);
if (mem->validation_bits & CPER_MEM_VALID_ERROR_TYPE) {
u8 etype = mem->error_type;
printk("%s""error_type: %d, %s\n", pfx, etype,
cper_mem_err_type_str(etype));
}
- if (cper_dimm_err_location(mem, rcd_decode_str))
+ if (cper_dimm_err_location(&cmem, rcd_decode_str))
printk("%s%s\n", pfx, rcd_decode_str);
}
diff --git a/drivers/ras/ras.c b/drivers/ras/ras.c
index 4cac43a1e25c..b67dd362b7b6 100644
--- a/drivers/ras/ras.c
+++ b/drivers/ras/ras.c
@@ -23,4 +23,7 @@ static int __init ras_init(void)
}
subsys_initcall(ras_init);
+#if defined(CONFIG_ACPI_EXTLOG) || defined(CONFIG_ACPI_EXTLOG_MODULE)
+EXPORT_TRACEPOINT_SYMBOL_GPL(extlog_mem_event);
+#endif
EXPORT_TRACEPOINT_SYMBOL_GPL(mc_event);