From ca7cc5110a313a609da40ae948978a585352564b Mon Sep 17 00:00:00 2001 From: Huang Ying Date: Wed, 13 Jul 2011 13:14:13 +0800 Subject: ACPI, APEI, ERST, Prevent erst_dbg from loading if ERST is disabled erst_dbg module can not work when ERST is disabled. So disable module loading to provide clearer information to user. Signed-off-by: Huang Ying Signed-off-by: Len Brown --- drivers/acpi/apei/erst-dbg.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'drivers/acpi') diff --git a/drivers/acpi/apei/erst-dbg.c b/drivers/acpi/apei/erst-dbg.c index a4cfb64c86a1..cb04aa43c33a 100644 --- a/drivers/acpi/apei/erst-dbg.c +++ b/drivers/acpi/apei/erst-dbg.c @@ -213,6 +213,10 @@ static struct miscdevice erst_dbg_dev = { static __init int erst_dbg_init(void) { + if (erst_disable) { + pr_info(ERST_DBG_PFX "ERST support is disabled.\n"); + return -ENODEV; + } return misc_register(&erst_dbg_dev); } -- cgit v1.2.3 From d37afc50e618271839f001ea653949eefc728167 Mon Sep 17 00:00:00 2001 From: Chen Gong Date: Wed, 13 Jul 2011 13:14:14 +0800 Subject: ACPI, APEI, ERST, Fix erst-dbg long record reading issue When we debug ERST table with erst-dbg, if the error record in ERST table is too long(>4K), it can't be read out. So this patch increases the buffer size to 16K to ensure such error records can be read from ERST table. Signed-off-by: Chen Gong Signed-off-by: Huang Ying Signed-off-by: Len Brown --- drivers/acpi/apei/erst-dbg.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/acpi') diff --git a/drivers/acpi/apei/erst-dbg.c b/drivers/acpi/apei/erst-dbg.c index cb04aa43c33a..903549df809b 100644 --- a/drivers/acpi/apei/erst-dbg.c +++ b/drivers/acpi/apei/erst-dbg.c @@ -33,7 +33,7 @@ #define ERST_DBG_PFX "ERST DBG: " -#define ERST_DBG_RECORD_LEN_MAX 4096 +#define ERST_DBG_RECORD_LEN_MAX 0x4000 static void *erst_dbg_buf; static unsigned int erst_dbg_buf_len; -- cgit v1.2.3 From 5588340d46a484da53bbce8136184d9c7fbc259c Mon Sep 17 00:00:00 2001 From: Huang Ying Date: Wed, 13 Jul 2011 13:14:15 +0800 Subject: ACPI, APEI, GHES, Do not ratelimit fatal error printk before panic printk is used by GHES to report hardware errors. Normally, the printk will be ratelimited to avoid too many hardware error reports in kernel log. Because there may be thousands or even millions of corrected hardware errors during system running. That is different for fatal hardware error, because system will go panic as soon as possible, there will be no more than several error records. And these error records are valuable for system fault diagnosis, so they should not be ratelimited. Signed-off-by: Huang Ying Signed-off-by: Len Brown --- drivers/acpi/apei/ghes.c | 25 ++++++++++++++----------- 1 file changed, 14 insertions(+), 11 deletions(-) (limited to 'drivers/acpi') diff --git a/drivers/acpi/apei/ghes.c b/drivers/acpi/apei/ghes.c index f703b2881153..f339c0f8369c 100644 --- a/drivers/acpi/apei/ghes.c +++ b/drivers/acpi/apei/ghes.c @@ -360,11 +360,8 @@ static void ghes_do_proc(struct ghes *ghes) } } -static void ghes_print_estatus(const char *pfx, struct ghes *ghes) +static void __ghes_print_estatus(const char *pfx, struct ghes *ghes) { - /* Not more than 2 messages every 5 seconds */ - static DEFINE_RATELIMIT_STATE(ratelimit, 5*HZ, 2); - if (pfx == NULL) { if (ghes_severity(ghes->estatus->error_severity) <= GHES_SEV_CORRECTED) @@ -372,12 +369,18 @@ static void ghes_print_estatus(const char *pfx, struct ghes *ghes) else pfx = KERN_ERR HW_ERR; } - if (__ratelimit(&ratelimit)) { - printk( - "%s""Hardware error from APEI Generic Hardware Error Source: %d\n", - pfx, ghes->generic->header.source_id); - apei_estatus_print(pfx, ghes->estatus); - } + printk("%s""Hardware error from APEI Generic Hardware Error Source: %d\n", + pfx, ghes->generic->header.source_id); + apei_estatus_print(pfx, ghes->estatus); +} + +static void ghes_print_estatus(const char *pfx, struct ghes *ghes) +{ + /* Not more than 2 messages every 5 seconds */ + static DEFINE_RATELIMIT_STATE(ratelimit, 5*HZ, 2); + + if (__ratelimit(&ratelimit)) + __ghes_print_estatus(pfx, ghes); } static int ghes_proc(struct ghes *ghes) @@ -476,7 +479,7 @@ static int ghes_notify_nmi(struct notifier_block *this, if (sev_global >= GHES_SEV_PANIC) { oops_begin(); - ghes_print_estatus(KERN_EMERG HW_ERR, ghes_global); + __ghes_print_estatus(KERN_EMERG HW_ERR, ghes_global); /* reboot to log the error! */ if (panic_timeout == 0) panic_timeout = ghes_panic_timeout; -- cgit v1.2.3 From eecf2f7124834dd1cad21807526a8ea031ba8217 Mon Sep 17 00:00:00 2001 From: Huang Ying Date: Wed, 13 Jul 2011 13:14:16 +0800 Subject: ACPI, APEI, Add apei_exec_run_optional Some actions in APEI ERST and EINJ tables are optional, for example, ACPI_EINJ_BEGIN_OPERATION action is used to do some preparation for error injection, and firmware may choose to do nothing here. While some other actions are mandatory, for example, firmware must provide ACPI_EINJ_GET_ERROR_TYPE implementation. Original implementation treats all actions as optional (that is, can have no instructions), that may cause issue if firmware does not provide some mandatory actions. To fix this, this patch adds apei_exec_run_optional, which should be used for optional actions. The original apei_exec_run should be used for mandatory actions. Cc: Thomas Renninger Signed-off-by: Huang Ying Signed-off-by: Len Brown --- drivers/acpi/apei/apei-base.c | 9 +++++---- drivers/acpi/apei/apei-internal.h | 13 ++++++++++++- 2 files changed, 17 insertions(+), 5 deletions(-) (limited to 'drivers/acpi') diff --git a/drivers/acpi/apei/apei-base.c b/drivers/acpi/apei/apei-base.c index 4a904a4bf05f..0714194229da 100644 --- a/drivers/acpi/apei/apei-base.c +++ b/drivers/acpi/apei/apei-base.c @@ -157,9 +157,10 @@ EXPORT_SYMBOL_GPL(apei_exec_noop); * Interpret the specified action. Go through whole action table, * execute all instructions belong to the action. */ -int apei_exec_run(struct apei_exec_context *ctx, u8 action) +int __apei_exec_run(struct apei_exec_context *ctx, u8 action, + bool optional) { - int rc; + int rc = -ENOENT; u32 i, ip; struct acpi_whea_header *entry; apei_exec_ins_func_t run; @@ -198,9 +199,9 @@ rewind: goto rewind; } - return 0; + return !optional && rc < 0 ? rc : 0; } -EXPORT_SYMBOL_GPL(apei_exec_run); +EXPORT_SYMBOL_GPL(__apei_exec_run); typedef int (*apei_exec_entry_func_t)(struct apei_exec_context *ctx, struct acpi_whea_header *entry, diff --git a/drivers/acpi/apei/apei-internal.h b/drivers/acpi/apei/apei-internal.h index ef0581f2094d..f286cf753f32 100644 --- a/drivers/acpi/apei/apei-internal.h +++ b/drivers/acpi/apei/apei-internal.h @@ -50,7 +50,18 @@ static inline u64 apei_exec_ctx_get_output(struct apei_exec_context *ctx) return ctx->value; } -int apei_exec_run(struct apei_exec_context *ctx, u8 action); +int __apei_exec_run(struct apei_exec_context *ctx, u8 action, bool optional); + +static inline int apei_exec_run(struct apei_exec_context *ctx, u8 action) +{ + return __apei_exec_run(ctx, action, 0); +} + +/* It is optional whether the firmware provides the action */ +static inline int apei_exec_run_optional(struct apei_exec_context *ctx, u8 action) +{ + return __apei_exec_run(ctx, action, 1); +} /* Common instruction implementation */ -- cgit v1.2.3 From 392913de7cc7446531922f29c0a4382d8d09626c Mon Sep 17 00:00:00 2001 From: Huang Ying Date: Wed, 13 Jul 2011 13:14:17 +0800 Subject: ACPI, APEI, Use apei_exec_run_optional in APEI EINJ and ERST This patch changes APEI EINJ and ERST to use apei_exec_run for mandatory actions, and apei_exec_run_optional for optional actions. Cc: Thomas Renninger Signed-off-by: Huang Ying Signed-off-by: Len Brown --- drivers/acpi/apei/einj.c | 4 ++-- drivers/acpi/apei/erst.c | 12 ++++++------ 2 files changed, 8 insertions(+), 8 deletions(-) (limited to 'drivers/acpi') diff --git a/drivers/acpi/apei/einj.c b/drivers/acpi/apei/einj.c index f74b2ea11f21..ab821f1cfa1f 100644 --- a/drivers/acpi/apei/einj.c +++ b/drivers/acpi/apei/einj.c @@ -285,7 +285,7 @@ static int __einj_error_inject(u32 type, u64 param1, u64 param2) einj_exec_ctx_init(&ctx); - rc = apei_exec_run(&ctx, ACPI_EINJ_BEGIN_OPERATION); + rc = apei_exec_run_optional(&ctx, ACPI_EINJ_BEGIN_OPERATION); if (rc) return rc; apei_exec_ctx_set_input(&ctx, type); @@ -323,7 +323,7 @@ static int __einj_error_inject(u32 type, u64 param1, u64 param2) rc = __einj_error_trigger(trigger_paddr); if (rc) return rc; - rc = apei_exec_run(&ctx, ACPI_EINJ_END_OPERATION); + rc = apei_exec_run_optional(&ctx, ACPI_EINJ_END_OPERATION); return rc; } diff --git a/drivers/acpi/apei/erst.c b/drivers/acpi/apei/erst.c index e6cef8e1b534..c8ba9da14e43 100644 --- a/drivers/acpi/apei/erst.c +++ b/drivers/acpi/apei/erst.c @@ -642,7 +642,7 @@ static int __erst_write_to_storage(u64 offset) int rc; erst_exec_ctx_init(&ctx); - rc = apei_exec_run(&ctx, ACPI_ERST_BEGIN_WRITE); + rc = apei_exec_run_optional(&ctx, ACPI_ERST_BEGIN_WRITE); if (rc) return rc; apei_exec_ctx_set_input(&ctx, offset); @@ -666,7 +666,7 @@ static int __erst_write_to_storage(u64 offset) if (rc) return rc; val = apei_exec_ctx_get_output(&ctx); - rc = apei_exec_run(&ctx, ACPI_ERST_END); + rc = apei_exec_run_optional(&ctx, ACPI_ERST_END); if (rc) return rc; @@ -681,7 +681,7 @@ static int __erst_read_from_storage(u64 record_id, u64 offset) int rc; erst_exec_ctx_init(&ctx); - rc = apei_exec_run(&ctx, ACPI_ERST_BEGIN_READ); + rc = apei_exec_run_optional(&ctx, ACPI_ERST_BEGIN_READ); if (rc) return rc; apei_exec_ctx_set_input(&ctx, offset); @@ -709,7 +709,7 @@ static int __erst_read_from_storage(u64 record_id, u64 offset) if (rc) return rc; val = apei_exec_ctx_get_output(&ctx); - rc = apei_exec_run(&ctx, ACPI_ERST_END); + rc = apei_exec_run_optional(&ctx, ACPI_ERST_END); if (rc) return rc; @@ -724,7 +724,7 @@ static int __erst_clear_from_storage(u64 record_id) int rc; erst_exec_ctx_init(&ctx); - rc = apei_exec_run(&ctx, ACPI_ERST_BEGIN_CLEAR); + rc = apei_exec_run_optional(&ctx, ACPI_ERST_BEGIN_CLEAR); if (rc) return rc; apei_exec_ctx_set_input(&ctx, record_id); @@ -748,7 +748,7 @@ static int __erst_clear_from_storage(u64 record_id) if (rc) return rc; val = apei_exec_ctx_get_output(&ctx); - rc = apei_exec_run(&ctx, ACPI_ERST_END); + rc = apei_exec_run_optional(&ctx, ACPI_ERST_END); if (rc) return rc; -- cgit v1.2.3 From 86cd47334b00b6aa9b5d0ebf389a6fe76f21c641 Mon Sep 17 00:00:00 2001 From: Huang Ying Date: Wed, 13 Jul 2011 13:14:18 +0800 Subject: ACPI, APEI, GHES, Prevent GHES to be built as module GHES (Generic Hardware Error Source) is used to process hardware error notification in firmware first mode. But because firmware first mode can be turned on but can not be turned off, it is unreasonable to unload the GHES module with firmware first mode turned on. To avoid confusion, this patch makes GHES can be enabled/disabled in configuration time, but not built as module and unloaded at run time. Signed-off-by: Huang Ying Reviewed-by: Andi Kleen Reviewed-by: Matthew Garrett Signed-off-by: Len Brown --- drivers/acpi/apei/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/acpi') diff --git a/drivers/acpi/apei/Kconfig b/drivers/acpi/apei/Kconfig index f739a70b1c70..3f45dde17aec 100644 --- a/drivers/acpi/apei/Kconfig +++ b/drivers/acpi/apei/Kconfig @@ -10,7 +10,7 @@ config ACPI_APEI error injection. config ACPI_APEI_GHES - tristate "APEI Generic Hardware Error Source" + bool "APEI Generic Hardware Error Source" depends on ACPI_APEI && X86 select ACPI_HED help -- cgit v1.2.3 From b6a9501658530d8b8374e37f1edb549039a8a260 Mon Sep 17 00:00:00 2001 From: Huang Ying Date: Wed, 13 Jul 2011 13:14:19 +0800 Subject: ACPI, APEI, GHES, Support disable GHES at boot time Some machine may have broken firmware so that GHES and firmware first mode should be disabled. This patch adds support to that. Signed-off-by: Huang Ying Reviewed-by: Andi Kleen Reviewed-by: Matthew Garrett Signed-off-by: Len Brown --- drivers/acpi/apei/ghes.c | 8 ++++++++ drivers/acpi/apei/hest.c | 17 +++++++++-------- 2 files changed, 17 insertions(+), 8 deletions(-) (limited to 'drivers/acpi') diff --git a/drivers/acpi/apei/ghes.c b/drivers/acpi/apei/ghes.c index f339c0f8369c..b142b94bf8b2 100644 --- a/drivers/acpi/apei/ghes.c +++ b/drivers/acpi/apei/ghes.c @@ -77,6 +77,9 @@ struct ghes { }; }; +int ghes_disable; +module_param_named(disable, ghes_disable, bool, 0); + static int ghes_panic_timeout __read_mostly = 30; /* @@ -665,6 +668,11 @@ static int __init ghes_init(void) return -EINVAL; } + if (ghes_disable) { + pr_info(GHES_PFX "GHES is not enabled!\n"); + return -EINVAL; + } + rc = ghes_ioremap_init(); if (rc) goto err; diff --git a/drivers/acpi/apei/hest.c b/drivers/acpi/apei/hest.c index 181bc2f7bb74..05fee06f4d6e 100644 --- a/drivers/acpi/apei/hest.c +++ b/drivers/acpi/apei/hest.c @@ -231,16 +231,17 @@ void __init acpi_hest_init(void) goto err; } - rc = apei_hest_parse(hest_parse_ghes_count, &ghes_count); - if (rc) - goto err; - - rc = hest_ghes_dev_register(ghes_count); - if (!rc) { - pr_info(HEST_PFX "Table parsing has been initialized.\n"); - return; + if (!ghes_disable) { + rc = apei_hest_parse(hest_parse_ghes_count, &ghes_count); + if (rc) + goto err; + rc = hest_ghes_dev_register(ghes_count); + if (rc) + goto err; } + pr_info(HEST_PFX "Table parsing has been initialized.\n"); + return; err: hest_disable = 1; } -- cgit v1.2.3 From eccddd32ced0df8f9130024157bf8d37df860d76 Mon Sep 17 00:00:00 2001 From: Huang Ying Date: Wed, 13 Jul 2011 13:14:20 +0800 Subject: ACPI, APEI, Add APEI bit support in generic _OSC call In APEI firmware first mode, hardware error is reported by hardware to firmware firstly, then firmware reports the error to Linux in a GHES error record via POLL/SCI/IRQ/NMI etc. This may result in some issues if OS has no full APEI support. So some firmware implementation will work in a back-compatible mode by default. Where firmware will only notify OS in old-fashion, without GHES record. For example, for a fatal hardware error, only NMI is signaled, no GHES record. To gain full APEI power on these machines, APEI bit in generic _OSC call can be specified to tell firmware that Linux has full APEI support. This patch adds the APEI bit support in generic _OSC call. Signed-off-by: Huang Ying Reviewed-by: Andi Kleen Reviewed-by: Matthew Garrett Signed-off-by: Len Brown --- drivers/acpi/bus.c | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) (limited to 'drivers/acpi') diff --git a/drivers/acpi/bus.c b/drivers/acpi/bus.c index d1e06c182cdb..43ce3b0c4921 100644 --- a/drivers/acpi/bus.c +++ b/drivers/acpi/bus.c @@ -39,6 +39,7 @@ #include #include #include +#include #include #include @@ -519,6 +520,7 @@ out_kfree: } EXPORT_SYMBOL(acpi_run_osc); +bool osc_sb_apei_support_acked; static u8 sb_uuid_str[] = "0811B06E-4A27-44F9-8D60-3CBBC22E7B48"; static void acpi_bus_osc_support(void) { @@ -541,11 +543,21 @@ static void acpi_bus_osc_support(void) #if defined(CONFIG_ACPI_PROCESSOR) || defined(CONFIG_ACPI_PROCESSOR_MODULE) capbuf[OSC_SUPPORT_TYPE] |= OSC_SB_PPC_OST_SUPPORT; #endif + +#ifdef CONFIG_ACPI_APEI_GHES + if (!ghes_disable) + capbuf[OSC_SUPPORT_TYPE] |= OSC_SB_APEI_SUPPORT; +#endif if (ACPI_FAILURE(acpi_get_handle(NULL, "\\_SB", &handle))) return; - if (ACPI_SUCCESS(acpi_run_osc(handle, &context))) + if (ACPI_SUCCESS(acpi_run_osc(handle, &context))) { + u32 *capbuf_ret = context.ret.pointer; + if (context.ret.length > OSC_SUPPORT_TYPE) + osc_sb_apei_support_acked = + capbuf_ret[OSC_SUPPORT_TYPE] & OSC_SB_APEI_SUPPORT; kfree(context.ret.pointer); - /* do we need to check the returned cap? Sounds no */ + } + /* do we need to check other returned cap? Sounds no */ } /* -------------------------------------------------------------------------- -- cgit v1.2.3 From 9fb0bfe1408d5506b7b83d13d1eed573fd71d67d Mon Sep 17 00:00:00 2001 From: Huang Ying Date: Wed, 13 Jul 2011 13:14:21 +0800 Subject: ACPI, APEI, Add WHEA _OSC support APEI firmware first mode must be turned on explicitly on some machines, otherwise there may be no GHES hardware error record for hardware error notification. APEI bit in generic _OSC call can be used to do that, but on some machine, a special WHEA _OSC call must be used. This patch adds the support to that WHEA _OSC call. Signed-off-by: Huang Ying Reviewed-by: Andi Kleen Reviewed-by: Matthew Garrett Signed-off-by: Len Brown --- drivers/acpi/apei/apei-base.c | 26 ++++++++++++++++++++++++++ drivers/acpi/apei/apei-internal.h | 2 ++ drivers/acpi/apei/ghes.c | 10 ++++++++++ 3 files changed, 38 insertions(+) (limited to 'drivers/acpi') diff --git a/drivers/acpi/apei/apei-base.c b/drivers/acpi/apei/apei-base.c index 0714194229da..8041248fce9b 100644 --- a/drivers/acpi/apei/apei-base.c +++ b/drivers/acpi/apei/apei-base.c @@ -604,3 +604,29 @@ struct dentry *apei_get_debugfs_dir(void) return dapei; } EXPORT_SYMBOL_GPL(apei_get_debugfs_dir); + +int apei_osc_setup(void) +{ + static u8 whea_uuid_str[] = "ed855e0c-6c90-47bf-a62a-26de0fc5ad5c"; + acpi_handle handle; + u32 capbuf[3]; + struct acpi_osc_context context = { + .uuid_str = whea_uuid_str, + .rev = 1, + .cap.length = sizeof(capbuf), + .cap.pointer = capbuf, + }; + + capbuf[OSC_QUERY_TYPE] = OSC_QUERY_ENABLE; + capbuf[OSC_SUPPORT_TYPE] = 0; + capbuf[OSC_CONTROL_TYPE] = 0; + + if (ACPI_FAILURE(acpi_get_handle(NULL, "\\_SB", &handle)) + || ACPI_FAILURE(acpi_run_osc(handle, &context))) + return -EIO; + else { + kfree(context.ret.pointer); + return 0; + } +} +EXPORT_SYMBOL_GPL(apei_osc_setup); diff --git a/drivers/acpi/apei/apei-internal.h b/drivers/acpi/apei/apei-internal.h index f286cf753f32..f57050e7a5e7 100644 --- a/drivers/acpi/apei/apei-internal.h +++ b/drivers/acpi/apei/apei-internal.h @@ -124,4 +124,6 @@ void apei_estatus_print(const char *pfx, const struct acpi_hest_generic_status *estatus); int apei_estatus_check_header(const struct acpi_hest_generic_status *estatus); int apei_estatus_check(const struct acpi_hest_generic_status *estatus); + +int apei_osc_setup(void); #endif diff --git a/drivers/acpi/apei/ghes.c b/drivers/acpi/apei/ghes.c index b142b94bf8b2..b1390a61cde1 100644 --- a/drivers/acpi/apei/ghes.c +++ b/drivers/acpi/apei/ghes.c @@ -681,6 +681,16 @@ static int __init ghes_init(void) if (rc) goto err_ioremap_exit; + rc = apei_osc_setup(); + if (rc == 0 && osc_sb_apei_support_acked) + pr_info(GHES_PFX "APEI firmware first mode is enabled by APEI bit and WHEA _OSC.\n"); + else if (rc == 0 && !osc_sb_apei_support_acked) + pr_info(GHES_PFX "APEI firmware first mode is enabled by WHEA _OSC.\n"); + else if (rc && osc_sb_apei_support_acked) + pr_info(GHES_PFX "APEI firmware first mode is enabled by APEI bit.\n"); + else + pr_info(GHES_PFX "Failed to enable APEI firmware first mode.\n"); + return 0; err_ioremap_exit: ghes_ioremap_exit(); -- cgit v1.2.3 From 67eb2e99076708cc790019a6a08ca3e0ae130a3a Mon Sep 17 00:00:00 2001 From: Huang Ying Date: Wed, 13 Jul 2011 13:14:25 +0800 Subject: ACPI, APEI, GHES, printk support for recoverable error via NMI Some APEI GHES recoverable errors are reported via NMI, but printk is not safe in NMI context. To solve the issue, a lock-less memory allocator is used to allocate memory in NMI handler, save the error record into the allocated memory, put the error record into a lock-less list. On the other hand, an irq_work is used to delay the operation from NMI context to IRQ context. The irq_work IRQ handler will remove nodes from lock-less list, printk the error record and do some further processing include recovery operation, then free the memory. Signed-off-by: Huang Ying Signed-off-by: Len Brown --- drivers/acpi/apei/Kconfig | 2 + drivers/acpi/apei/ghes.c | 209 ++++++++++++++++++++++++++++++++++++++++++---- 2 files changed, 193 insertions(+), 18 deletions(-) (limited to 'drivers/acpi') diff --git a/drivers/acpi/apei/Kconfig b/drivers/acpi/apei/Kconfig index 3f45dde17aec..35596eaaca17 100644 --- a/drivers/acpi/apei/Kconfig +++ b/drivers/acpi/apei/Kconfig @@ -13,6 +13,8 @@ config ACPI_APEI_GHES bool "APEI Generic Hardware Error Source" depends on ACPI_APEI && X86 select ACPI_HED + select LLIST + select GENERIC_ALLOCATOR help Generic Hardware Error Source provides a way to report platform hardware errors (such as that from chipset). It diff --git a/drivers/acpi/apei/ghes.c b/drivers/acpi/apei/ghes.c index b1390a61cde1..d1a40218e17e 100644 --- a/drivers/acpi/apei/ghes.c +++ b/drivers/acpi/apei/ghes.c @@ -12,7 +12,7 @@ * For more information about Generic Hardware Error Source, please * refer to ACPI Specification version 4.0, section 17.3.2.6 * - * Copyright 2010 Intel Corp. + * Copyright 2010,2011 Intel Corp. * Author: Huang Ying * * This program is free software; you can redistribute it and/or @@ -42,6 +42,9 @@ #include #include #include +#include +#include +#include #include #include #include @@ -53,6 +56,15 @@ #define GHES_PFX "GHES: " #define GHES_ESTATUS_MAX_SIZE 65536 +#define GHES_ESOURCE_PREALLOC_MAX_SIZE 65536 + +#define GHES_ESTATUS_POOL_MIN_ALLOC_ORDER 3 + +#define GHES_ESTATUS_NODE_LEN(estatus_len) \ + (sizeof(struct ghes_estatus_node) + (estatus_len)) +#define GHES_ESTATUS_FROM_NODE(estatus_node) \ + ((struct acpi_hest_generic_status *) \ + ((struct ghes_estatus_node *)(estatus_node) + 1)) /* * One struct ghes is created for each generic hardware error source. @@ -77,6 +89,11 @@ struct ghes { }; }; +struct ghes_estatus_node { + struct llist_node llnode; + struct acpi_hest_generic *generic; +}; + int ghes_disable; module_param_named(disable, ghes_disable, bool, 0); @@ -124,6 +141,19 @@ static struct vm_struct *ghes_ioremap_area; static DEFINE_RAW_SPINLOCK(ghes_ioremap_lock_nmi); static DEFINE_SPINLOCK(ghes_ioremap_lock_irq); +/* + * printk is not safe in NMI context. So in NMI handler, we allocate + * required memory from lock-less memory allocator + * (ghes_estatus_pool), save estatus into it, put them into lock-less + * list (ghes_estatus_llist), then delay printk into IRQ context via + * irq_work (ghes_proc_irq_work). ghes_estatus_size_request record + * required pool size by all NMI error source. + */ +static struct gen_pool *ghes_estatus_pool; +static unsigned long ghes_estatus_pool_size_request; +static struct llist_head ghes_estatus_llist; +static struct irq_work ghes_proc_irq_work; + static int ghes_ioremap_init(void) { ghes_ioremap_area = __get_vm_area(PAGE_SIZE * GHES_IOREMAP_PAGES, @@ -183,6 +213,55 @@ static void ghes_iounmap_irq(void __iomem *vaddr_ptr) __flush_tlb_one(vaddr); } +static int ghes_estatus_pool_init(void) +{ + ghes_estatus_pool = gen_pool_create(GHES_ESTATUS_POOL_MIN_ALLOC_ORDER, -1); + if (!ghes_estatus_pool) + return -ENOMEM; + return 0; +} + +static void ghes_estatus_pool_free_chunk_page(struct gen_pool *pool, + struct gen_pool_chunk *chunk, + void *data) +{ + free_page(chunk->start_addr); +} + +static void ghes_estatus_pool_exit(void) +{ + gen_pool_for_each_chunk(ghes_estatus_pool, + ghes_estatus_pool_free_chunk_page, NULL); + gen_pool_destroy(ghes_estatus_pool); +} + +static int ghes_estatus_pool_expand(unsigned long len) +{ + unsigned long i, pages, size, addr; + int ret; + + ghes_estatus_pool_size_request += PAGE_ALIGN(len); + size = gen_pool_size(ghes_estatus_pool); + if (size >= ghes_estatus_pool_size_request) + return 0; + pages = (ghes_estatus_pool_size_request - size) / PAGE_SIZE; + for (i = 0; i < pages; i++) { + addr = __get_free_page(GFP_KERNEL); + if (!addr) + return -ENOMEM; + ret = gen_pool_add(ghes_estatus_pool, addr, PAGE_SIZE, -1); + if (ret) + return ret; + } + + return 0; +} + +static void ghes_estatus_pool_shrink(unsigned long len) +{ + ghes_estatus_pool_size_request -= PAGE_ALIGN(len); +} + static struct ghes *ghes_new(struct acpi_hest_generic *generic) { struct ghes *ghes; @@ -344,13 +423,13 @@ static void ghes_clear_estatus(struct ghes *ghes) ghes->flags &= ~GHES_TO_CLEAR; } -static void ghes_do_proc(struct ghes *ghes) +static void ghes_do_proc(const struct acpi_hest_generic_status *estatus) { int sev, processed = 0; struct acpi_hest_generic_data *gdata; - sev = ghes_severity(ghes->estatus->error_severity); - apei_estatus_for_each_section(ghes->estatus, gdata) { + sev = ghes_severity(estatus->error_severity); + apei_estatus_for_each_section(estatus, gdata) { #ifdef CONFIG_X86_MCE if (!uuid_le_cmp(*(uuid_le *)gdata->section_type, CPER_SEC_PLATFORM_MEM)) { @@ -363,27 +442,37 @@ static void ghes_do_proc(struct ghes *ghes) } } -static void __ghes_print_estatus(const char *pfx, struct ghes *ghes) +static void __ghes_print_estatus(const char *pfx, + const struct acpi_hest_generic *generic, + const struct acpi_hest_generic_status *estatus) { if (pfx == NULL) { - if (ghes_severity(ghes->estatus->error_severity) <= + if (ghes_severity(estatus->error_severity) <= GHES_SEV_CORRECTED) pfx = KERN_WARNING HW_ERR; else pfx = KERN_ERR HW_ERR; } printk("%s""Hardware error from APEI Generic Hardware Error Source: %d\n", - pfx, ghes->generic->header.source_id); - apei_estatus_print(pfx, ghes->estatus); + pfx, generic->header.source_id); + apei_estatus_print(pfx, estatus); } -static void ghes_print_estatus(const char *pfx, struct ghes *ghes) +static void ghes_print_estatus(const char *pfx, + const struct acpi_hest_generic *generic, + const struct acpi_hest_generic_status *estatus) { /* Not more than 2 messages every 5 seconds */ - static DEFINE_RATELIMIT_STATE(ratelimit, 5*HZ, 2); + static DEFINE_RATELIMIT_STATE(ratelimit_corrected, 5*HZ, 2); + static DEFINE_RATELIMIT_STATE(ratelimit_uncorrected, 5*HZ, 2); + struct ratelimit_state *ratelimit; - if (__ratelimit(&ratelimit)) - __ghes_print_estatus(pfx, ghes); + if (ghes_severity(estatus->error_severity) <= GHES_SEV_CORRECTED) + ratelimit = &ratelimit_corrected; + else + ratelimit = &ratelimit_uncorrected; + if (__ratelimit(ratelimit)) + __ghes_print_estatus(pfx, generic, estatus); } static int ghes_proc(struct ghes *ghes) @@ -393,8 +482,8 @@ static int ghes_proc(struct ghes *ghes) rc = ghes_read_estatus(ghes, 0); if (rc) goto out; - ghes_print_estatus(NULL, ghes); - ghes_do_proc(ghes); + ghes_print_estatus(NULL, ghes->generic, ghes->estatus); + ghes_do_proc(ghes->estatus); out: ghes_clear_estatus(ghes); @@ -453,6 +542,40 @@ static int ghes_notify_sci(struct notifier_block *this, return ret; } +static void ghes_proc_in_irq(struct irq_work *irq_work) +{ + struct llist_node *llnode, *next, *tail = NULL; + struct ghes_estatus_node *estatus_node; + struct acpi_hest_generic_status *estatus; + u32 len, node_len; + + /* + * Because the time order of estatus in list is reversed, + * revert it back to proper order. + */ + llnode = llist_del_all(&ghes_estatus_llist); + while (llnode) { + next = llnode->next; + llnode->next = tail; + tail = llnode; + llnode = next; + } + llnode = tail; + while (llnode) { + next = llnode->next; + estatus_node = llist_entry(llnode, struct ghes_estatus_node, + llnode); + estatus = GHES_ESTATUS_FROM_NODE(estatus_node); + len = apei_estatus_len(estatus); + node_len = GHES_ESTATUS_NODE_LEN(len); + ghes_do_proc(estatus); + ghes_print_estatus(NULL, estatus_node->generic, estatus); + gen_pool_free(ghes_estatus_pool, (unsigned long)estatus_node, + node_len); + llnode = next; + } +} + static int ghes_notify_nmi(struct notifier_block *this, unsigned long cmd, void *data) { @@ -482,7 +605,8 @@ static int ghes_notify_nmi(struct notifier_block *this, if (sev_global >= GHES_SEV_PANIC) { oops_begin(); - __ghes_print_estatus(KERN_EMERG HW_ERR, ghes_global); + __ghes_print_estatus(KERN_EMERG HW_ERR, ghes_global->generic, + ghes_global->estatus); /* reboot to log the error! */ if (panic_timeout == 0) panic_timeout = ghes_panic_timeout; @@ -490,12 +614,31 @@ static int ghes_notify_nmi(struct notifier_block *this, } list_for_each_entry_rcu(ghes, &ghes_nmi, list) { +#ifdef CONFIG_ARCH_HAVE_NMI_SAFE_CMPXCHG + u32 len, node_len; + struct ghes_estatus_node *estatus_node; + struct acpi_hest_generic_status *estatus; +#endif if (!(ghes->flags & GHES_TO_CLEAR)) continue; - /* Do not print estatus because printk is not NMI safe */ - ghes_do_proc(ghes); +#ifdef CONFIG_ARCH_HAVE_NMI_SAFE_CMPXCHG + /* Save estatus for further processing in IRQ context */ + len = apei_estatus_len(ghes->estatus); + node_len = GHES_ESTATUS_NODE_LEN(len); + estatus_node = (void *)gen_pool_alloc(ghes_estatus_pool, + node_len); + if (estatus_node) { + estatus_node->generic = ghes->generic; + estatus = GHES_ESTATUS_FROM_NODE(estatus_node); + memcpy(estatus, ghes->estatus, len); + llist_add(&estatus_node->llnode, &ghes_estatus_llist); + } +#endif ghes_clear_estatus(ghes); } +#ifdef CONFIG_ARCH_HAVE_NMI_SAFE_CMPXCHG + irq_work_queue(&ghes_proc_irq_work); +#endif out: raw_spin_unlock(&ghes_nmi_lock); @@ -510,10 +653,26 @@ static struct notifier_block ghes_notifier_nmi = { .notifier_call = ghes_notify_nmi, }; +static unsigned long ghes_esource_prealloc_size( + const struct acpi_hest_generic *generic) +{ + unsigned long block_length, prealloc_records, prealloc_size; + + block_length = min_t(unsigned long, generic->error_block_length, + GHES_ESTATUS_MAX_SIZE); + prealloc_records = max_t(unsigned long, + generic->records_to_preallocate, 1); + prealloc_size = min_t(unsigned long, block_length * prealloc_records, + GHES_ESOURCE_PREALLOC_MAX_SIZE); + + return prealloc_size; +} + static int __devinit ghes_probe(struct platform_device *ghes_dev) { struct acpi_hest_generic *generic; struct ghes *ghes = NULL; + unsigned long len; int rc = -EINVAL; generic = *(struct acpi_hest_generic **)ghes_dev->dev.platform_data; @@ -579,6 +738,8 @@ static int __devinit ghes_probe(struct platform_device *ghes_dev) mutex_unlock(&ghes_list_mutex); break; case ACPI_HEST_NOTIFY_NMI: + len = ghes_esource_prealloc_size(generic); + ghes_estatus_pool_expand(len); mutex_lock(&ghes_list_mutex); if (list_empty(&ghes_nmi)) register_die_notifier(&ghes_notifier_nmi); @@ -603,6 +764,7 @@ static int __devexit ghes_remove(struct platform_device *ghes_dev) { struct ghes *ghes; struct acpi_hest_generic *generic; + unsigned long len; ghes = platform_get_drvdata(ghes_dev); generic = ghes->generic; @@ -633,6 +795,8 @@ static int __devexit ghes_remove(struct platform_device *ghes_dev) * freed after NMI handler finishes. */ synchronize_rcu(); + len = ghes_esource_prealloc_size(generic); + ghes_estatus_pool_shrink(len); break; default: BUG(); @@ -673,14 +837,20 @@ static int __init ghes_init(void) return -EINVAL; } + init_irq_work(&ghes_proc_irq_work, ghes_proc_in_irq); + rc = ghes_ioremap_init(); if (rc) goto err; - rc = platform_driver_register(&ghes_platform_driver); + rc = ghes_estatus_pool_init(); if (rc) goto err_ioremap_exit; + rc = platform_driver_register(&ghes_platform_driver); + if (rc) + goto err_pool_exit; + rc = apei_osc_setup(); if (rc == 0 && osc_sb_apei_support_acked) pr_info(GHES_PFX "APEI firmware first mode is enabled by APEI bit and WHEA _OSC.\n"); @@ -692,6 +862,8 @@ static int __init ghes_init(void) pr_info(GHES_PFX "Failed to enable APEI firmware first mode.\n"); return 0; +err_pool_exit: + ghes_estatus_pool_exit(); err_ioremap_exit: ghes_ioremap_exit(); err: @@ -701,6 +873,7 @@ err: static void __exit ghes_exit(void) { platform_driver_unregister(&ghes_platform_driver); + ghes_estatus_pool_exit(); ghes_ioremap_exit(); } -- cgit v1.2.3 From 152cef40a808d3034e383465b3f7d6783613e458 Mon Sep 17 00:00:00 2001 From: Huang Ying Date: Wed, 13 Jul 2011 13:14:26 +0800 Subject: ACPI, APEI, GHES, Error records content based throttle printk is used by GHES to report hardware errors. Ratelimit is enforced on the printk to avoid too many hardware error reports in kernel log. Because there may be thousands or even millions of corrected hardware errors during system running. Currently, a simple scheme is used. That is, the total number of hardware error reporting is ratelimited. This may cause some issues in practice. For example, there are two kinds of hardware errors occurred in system. One is corrected memory error, because the fault memory address is accessed frequently, there may be hundreds error report per-second. The other is corrected PCIe AER error, it will be reported once per-second. Because they share one ratelimit control structure, it is highly possible that only memory error is reported. To avoid the above issue, an error record content based throttle algorithm is implemented in the patch. Where after the first successful reporting, all error records that are same are throttled for some time, to let other kinds of error records have the opportunity to be reported. In above example, the memory errors will be throttled for some time, after being printked. Then the PCIe AER error will be printked successfully. Signed-off-by: Huang Ying Signed-off-by: Len Brown --- drivers/acpi/apei/ghes.c | 184 +++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 177 insertions(+), 7 deletions(-) (limited to 'drivers/acpi') diff --git a/drivers/acpi/apei/ghes.c b/drivers/acpi/apei/ghes.c index d1a40218e17e..931410d31a96 100644 --- a/drivers/acpi/apei/ghes.c +++ b/drivers/acpi/apei/ghes.c @@ -60,6 +60,21 @@ #define GHES_ESTATUS_POOL_MIN_ALLOC_ORDER 3 +/* This is just an estimation for memory pool allocation */ +#define GHES_ESTATUS_CACHE_AVG_SIZE 512 + +#define GHES_ESTATUS_CACHES_SIZE 4 + +#define GHES_ESTATUS_IN_CACHE_MAX_NSEC (10 * NSEC_PER_SEC) +/* Prevent too many caches are allocated because of RCU */ +#define GHES_ESTATUS_CACHE_ALLOCED_MAX (GHES_ESTATUS_CACHES_SIZE * 3 / 2) + +#define GHES_ESTATUS_CACHE_LEN(estatus_len) \ + (sizeof(struct ghes_estatus_cache) + (estatus_len)) +#define GHES_ESTATUS_FROM_CACHE(estatus_cache) \ + ((struct acpi_hest_generic_status *) \ + ((struct ghes_estatus_cache *)(estatus_cache) + 1)) + #define GHES_ESTATUS_NODE_LEN(estatus_len) \ (sizeof(struct ghes_estatus_node) + (estatus_len)) #define GHES_ESTATUS_FROM_NODE(estatus_node) \ @@ -94,6 +109,14 @@ struct ghes_estatus_node { struct acpi_hest_generic *generic; }; +struct ghes_estatus_cache { + u32 estatus_len; + atomic_t count; + struct acpi_hest_generic *generic; + unsigned long long time_in; + struct rcu_head rcu; +}; + int ghes_disable; module_param_named(disable, ghes_disable, bool, 0); @@ -154,6 +177,9 @@ static unsigned long ghes_estatus_pool_size_request; static struct llist_head ghes_estatus_llist; static struct irq_work ghes_proc_irq_work; +struct ghes_estatus_cache *ghes_estatus_caches[GHES_ESTATUS_CACHES_SIZE]; +static atomic_t ghes_estatus_cache_alloced; + static int ghes_ioremap_init(void) { ghes_ioremap_area = __get_vm_area(PAGE_SIZE * GHES_IOREMAP_PAGES, @@ -458,9 +484,9 @@ static void __ghes_print_estatus(const char *pfx, apei_estatus_print(pfx, estatus); } -static void ghes_print_estatus(const char *pfx, - const struct acpi_hest_generic *generic, - const struct acpi_hest_generic_status *estatus) +static int ghes_print_estatus(const char *pfx, + const struct acpi_hest_generic *generic, + const struct acpi_hest_generic_status *estatus) { /* Not more than 2 messages every 5 seconds */ static DEFINE_RATELIMIT_STATE(ratelimit_corrected, 5*HZ, 2); @@ -471,8 +497,137 @@ static void ghes_print_estatus(const char *pfx, ratelimit = &ratelimit_corrected; else ratelimit = &ratelimit_uncorrected; - if (__ratelimit(ratelimit)) + if (__ratelimit(ratelimit)) { __ghes_print_estatus(pfx, generic, estatus); + return 1; + } + return 0; +} + +/* + * GHES error status reporting throttle, to report more kinds of + * errors, instead of just most frequently occurred errors. + */ +static int ghes_estatus_cached(struct acpi_hest_generic_status *estatus) +{ + u32 len; + int i, cached = 0; + unsigned long long now; + struct ghes_estatus_cache *cache; + struct acpi_hest_generic_status *cache_estatus; + + len = apei_estatus_len(estatus); + rcu_read_lock(); + for (i = 0; i < GHES_ESTATUS_CACHES_SIZE; i++) { + cache = rcu_dereference(ghes_estatus_caches[i]); + if (cache == NULL) + continue; + if (len != cache->estatus_len) + continue; + cache_estatus = GHES_ESTATUS_FROM_CACHE(cache); + if (memcmp(estatus, cache_estatus, len)) + continue; + atomic_inc(&cache->count); + now = sched_clock(); + if (now - cache->time_in < GHES_ESTATUS_IN_CACHE_MAX_NSEC) + cached = 1; + break; + } + rcu_read_unlock(); + return cached; +} + +static struct ghes_estatus_cache *ghes_estatus_cache_alloc( + struct acpi_hest_generic *generic, + struct acpi_hest_generic_status *estatus) +{ + int alloced; + u32 len, cache_len; + struct ghes_estatus_cache *cache; + struct acpi_hest_generic_status *cache_estatus; + + alloced = atomic_add_return(1, &ghes_estatus_cache_alloced); + if (alloced > GHES_ESTATUS_CACHE_ALLOCED_MAX) { + atomic_dec(&ghes_estatus_cache_alloced); + return NULL; + } + len = apei_estatus_len(estatus); + cache_len = GHES_ESTATUS_CACHE_LEN(len); + cache = (void *)gen_pool_alloc(ghes_estatus_pool, cache_len); + if (!cache) { + atomic_dec(&ghes_estatus_cache_alloced); + return NULL; + } + cache_estatus = GHES_ESTATUS_FROM_CACHE(cache); + memcpy(cache_estatus, estatus, len); + cache->estatus_len = len; + atomic_set(&cache->count, 0); + cache->generic = generic; + cache->time_in = sched_clock(); + return cache; +} + +static void ghes_estatus_cache_free(struct ghes_estatus_cache *cache) +{ + u32 len; + + len = apei_estatus_len(GHES_ESTATUS_FROM_CACHE(cache)); + len = GHES_ESTATUS_CACHE_LEN(len); + gen_pool_free(ghes_estatus_pool, (unsigned long)cache, len); + atomic_dec(&ghes_estatus_cache_alloced); +} + +static void ghes_estatus_cache_rcu_free(struct rcu_head *head) +{ + struct ghes_estatus_cache *cache; + + cache = container_of(head, struct ghes_estatus_cache, rcu); + ghes_estatus_cache_free(cache); +} + +static void ghes_estatus_cache_add( + struct acpi_hest_generic *generic, + struct acpi_hest_generic_status *estatus) +{ + int i, slot = -1, count; + unsigned long long now, duration, period, max_period = 0; + struct ghes_estatus_cache *cache, *slot_cache = NULL, *new_cache; + + new_cache = ghes_estatus_cache_alloc(generic, estatus); + if (new_cache == NULL) + return; + rcu_read_lock(); + now = sched_clock(); + for (i = 0; i < GHES_ESTATUS_CACHES_SIZE; i++) { + cache = rcu_dereference(ghes_estatus_caches[i]); + if (cache == NULL) { + slot = i; + slot_cache = NULL; + break; + } + duration = now - cache->time_in; + if (duration >= GHES_ESTATUS_IN_CACHE_MAX_NSEC) { + slot = i; + slot_cache = cache; + break; + } + count = atomic_read(&cache->count); + period = duration / (count + 1); + if (period > max_period) { + max_period = period; + slot = i; + slot_cache = cache; + } + } + /* new_cache must be put into array after its contents are written */ + smp_wmb(); + if (slot != -1 && cmpxchg(ghes_estatus_caches + slot, + slot_cache, new_cache) == slot_cache) { + if (slot_cache) + call_rcu(&slot_cache->rcu, ghes_estatus_cache_rcu_free); + } else + ghes_estatus_cache_free(new_cache); + rcu_read_unlock(); } static int ghes_proc(struct ghes *ghes) @@ -482,9 +637,11 @@ static int ghes_proc(struct ghes *ghes) rc = ghes_read_estatus(ghes, 0); if (rc) goto out; - ghes_print_estatus(NULL, ghes->generic, ghes->estatus); + if (!ghes_estatus_cached(ghes->estatus)) { + if (ghes_print_estatus(NULL, ghes->generic, ghes->estatus)) + ghes_estatus_cache_add(ghes->generic, ghes->estatus); + } ghes_do_proc(ghes->estatus); - out: ghes_clear_estatus(ghes); return 0; @@ -546,6 +703,7 @@ static void ghes_proc_in_irq(struct irq_work *irq_work) { struct llist_node *llnode, *next, *tail = NULL; struct ghes_estatus_node *estatus_node; + struct acpi_hest_generic *generic; struct acpi_hest_generic_status *estatus; u32 len, node_len; @@ -569,7 +727,11 @@ static void ghes_proc_in_irq(struct irq_work *irq_work) len = apei_estatus_len(estatus); node_len = GHES_ESTATUS_NODE_LEN(len); ghes_do_proc(estatus); - ghes_print_estatus(NULL, estatus_node->generic, estatus); + if (!ghes_estatus_cached(estatus)) { + generic = estatus_node->generic; + if (ghes_print_estatus(NULL, generic, estatus)) + ghes_estatus_cache_add(generic, estatus); + } gen_pool_free(ghes_estatus_pool, (unsigned long)estatus_node, node_len); llnode = next; @@ -622,6 +784,8 @@ static int ghes_notify_nmi(struct notifier_block *this, if (!(ghes->flags & GHES_TO_CLEAR)) continue; #ifdef CONFIG_ARCH_HAVE_NMI_SAFE_CMPXCHG + if (ghes_estatus_cached(ghes->estatus)) + goto next; /* Save estatus for further processing in IRQ context */ len = apei_estatus_len(ghes->estatus); node_len = GHES_ESTATUS_NODE_LEN(len); @@ -633,6 +797,7 @@ static int ghes_notify_nmi(struct notifier_block *this, memcpy(estatus, ghes->estatus, len); llist_add(&estatus_node->llnode, &ghes_estatus_llist); } +next: #endif ghes_clear_estatus(ghes); } @@ -847,6 +1012,11 @@ static int __init ghes_init(void) if (rc) goto err_ioremap_exit; + rc = ghes_estatus_pool_expand(GHES_ESTATUS_CACHE_AVG_SIZE * + GHES_ESTATUS_CACHE_ALLOCED_MAX); + if (rc) + goto err_pool_exit; + rc = platform_driver_register(&ghes_platform_driver); if (rc) goto err_pool_exit; -- cgit v1.2.3 From ba61ca4aab47441f1c6cec28a9a6aa0489fd1df3 Mon Sep 17 00:00:00 2001 From: Huang Ying Date: Wed, 13 Jul 2011 13:14:28 +0800 Subject: ACPI, APEI, GHES: Add hardware memory error recovery support memory_failure_queue() is called when recoverable memory errors are notified by firmware to do the recovery work. Signed-off-by: Huang Ying Signed-off-by: Len Brown --- drivers/acpi/apei/Kconfig | 7 +++++++ drivers/acpi/apei/ghes.c | 24 +++++++++++++++++------- 2 files changed, 24 insertions(+), 7 deletions(-) (limited to 'drivers/acpi') diff --git a/drivers/acpi/apei/Kconfig b/drivers/acpi/apei/Kconfig index 35596eaaca17..c34aa51af4ee 100644 --- a/drivers/acpi/apei/Kconfig +++ b/drivers/acpi/apei/Kconfig @@ -32,6 +32,13 @@ config ACPI_APEI_PCIEAER PCIe AER errors may be reported via APEI firmware first mode. Turn on this option to enable the corresponding support. +config ACPI_APEI_MEMORY_FAILURE + bool "APEI memory error recovering support" + depends on ACPI_APEI && MEMORY_FAILURE + help + Memory errors may be reported via APEI firmware first mode. + Turn on this option to enable the memory recovering support. + config ACPI_APEI_EINJ tristate "APEI Error INJection (EINJ)" depends on ACPI_APEI && DEBUG_FS diff --git a/drivers/acpi/apei/ghes.c b/drivers/acpi/apei/ghes.c index 931410d31a96..e92c47c46f91 100644 --- a/drivers/acpi/apei/ghes.c +++ b/drivers/acpi/apei/ghes.c @@ -451,20 +451,30 @@ static void ghes_clear_estatus(struct ghes *ghes) static void ghes_do_proc(const struct acpi_hest_generic_status *estatus) { - int sev, processed = 0; + int sev, sec_sev; struct acpi_hest_generic_data *gdata; sev = ghes_severity(estatus->error_severity); apei_estatus_for_each_section(estatus, gdata) { -#ifdef CONFIG_X86_MCE + sec_sev = ghes_severity(gdata->error_severity); if (!uuid_le_cmp(*(uuid_le *)gdata->section_type, CPER_SEC_PLATFORM_MEM)) { - apei_mce_report_mem_error( - sev == GHES_SEV_CORRECTED, - (struct cper_sec_mem_err *)(gdata+1)); - processed = 1; - } + struct cper_sec_mem_err *mem_err; + mem_err = (struct cper_sec_mem_err *)(gdata+1); +#ifdef CONFIG_X86_MCE + apei_mce_report_mem_error(sev == GHES_SEV_CORRECTED, + mem_err); #endif +#ifdef CONFIG_ACPI_APEI_MEMORY_FAILURE + if (sev == GHES_SEV_RECOVERABLE && + sec_sev == GHES_SEV_RECOVERABLE && + mem_err->validation_bits & CPER_MEM_VALID_PHYSICAL_ADDRESS) { + unsigned long pfn; + pfn = mem_err->physical_addr >> PAGE_SHIFT; + memory_failure_queue(pfn, 0, 0); + } +#endif + } } } -- cgit v1.2.3 From a7e09d450b2e0b068e850d103b6ee1af537d1910 Mon Sep 17 00:00:00 2001 From: Len Brown Date: Sat, 16 Jul 2011 18:14:21 -0400 Subject: ACPI: APEI build fix as GHES is optional... When # CONFIG_ACPI_APEI_GHES is not set: (.init.text+0x4c22): undefined reference to `ghes_disable' Reported-by: Randy Dunlap Acked-by: Randy Dunlap Signed-off-by: Len Brown --- drivers/acpi/bus.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'drivers/acpi') diff --git a/drivers/acpi/bus.c b/drivers/acpi/bus.c index 43ce3b0c4921..437ddbf0c49a 100644 --- a/drivers/acpi/bus.c +++ b/drivers/acpi/bus.c @@ -544,10 +544,8 @@ static void acpi_bus_osc_support(void) capbuf[OSC_SUPPORT_TYPE] |= OSC_SB_PPC_OST_SUPPORT; #endif -#ifdef CONFIG_ACPI_APEI_GHES if (!ghes_disable) capbuf[OSC_SUPPORT_TYPE] |= OSC_SB_APEI_SUPPORT; -#endif if (ACPI_FAILURE(acpi_get_handle(NULL, "\\_SB", &handle))) return; if (ACPI_SUCCESS(acpi_run_osc(handle, &context))) { -- cgit v1.2.3 From 70cb6e1da00db6c9212e6fd69bd96fd41c797077 Mon Sep 17 00:00:00 2001 From: Len Brown Date: Tue, 2 Aug 2011 18:00:21 -0400 Subject: APEI GHES: 32-bit buildfix MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit drivers/acpi/apei/ghes.c:542: warning: integer overflow in expression drivers/acpi/apei/ghes.c:619: warning: integer overflow in expression ghes.c:(.text+0x46289): undefined reference to `__udivdi3'   in function ghes_estatus_cache_add(). Reported-by: Randy Dunlap Signed-off-by: Len Brown --- drivers/acpi/apei/ghes.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'drivers/acpi') diff --git a/drivers/acpi/apei/ghes.c b/drivers/acpi/apei/ghes.c index e92c47c46f91..0784f99a4665 100644 --- a/drivers/acpi/apei/ghes.c +++ b/drivers/acpi/apei/ghes.c @@ -65,7 +65,7 @@ #define GHES_ESTATUS_CACHES_SIZE 4 -#define GHES_ESTATUS_IN_CACHE_MAX_NSEC (10 * NSEC_PER_SEC) +#define GHES_ESTATUS_IN_CACHE_MAX_NSEC 10000000000ULL /* Prevent too many caches are allocated because of RCU */ #define GHES_ESTATUS_CACHE_ALLOCED_MAX (GHES_ESTATUS_CACHES_SIZE * 3 / 2) @@ -622,7 +622,8 @@ static void ghes_estatus_cache_add( break; } count = atomic_read(&cache->count); - period = duration / (count + 1); + period = duration; + do_div(period, (count + 1)); if (period > max_period) { max_period = period; slot = i; -- cgit v1.2.3 From c3e6088e1036f8084bc7444b38437da136b7588b Mon Sep 17 00:00:00 2001 From: Huang Ying Date: Wed, 20 Jul 2011 16:09:29 +0800 Subject: ACPI, APEI, EINJ Param support is disabled by default EINJ parameter support is only usable for some specific BIOS. Originally, it is expected to have no harm for BIOS does not support it. But now, we found it will cause issue (memory overwriting) for some BIOS. So param support is disabled by default and only enabled when newly added module parameter named "param_extension" is explicitly specified. Signed-off-by: Huang Ying Cc: Matthew Garrett Acked-by: Don Zickus Acked-by: Tony Luck Signed-off-by: Len Brown --- drivers/acpi/apei/einj.c | 39 ++++++++++++++++++++++++--------------- 1 file changed, 24 insertions(+), 15 deletions(-) (limited to 'drivers/acpi') diff --git a/drivers/acpi/apei/einj.c b/drivers/acpi/apei/einj.c index ab821f1cfa1f..589b96c38704 100644 --- a/drivers/acpi/apei/einj.c +++ b/drivers/acpi/apei/einj.c @@ -46,7 +46,8 @@ * Some BIOSes allow parameters to the SET_ERROR_TYPE entries in the * EINJ table through an unpublished extension. Use with caution as * most will ignore the parameter and make their own choice of address - * for error injection. + * for error injection. This extension is used only if + * param_extension module parameter is specified. */ struct einj_parameter { u64 type; @@ -65,6 +66,9 @@ struct einj_parameter { ((struct acpi_whea_header *)((char *)(tab) + \ sizeof(struct acpi_table_einj))) +static bool param_extension; +module_param(param_extension, bool, 0); + static struct acpi_table_einj *einj_tab; static struct apei_resources einj_resources; @@ -489,14 +493,6 @@ static int __init einj_init(void) einj_debug_dir, NULL, &error_type_fops); if (!fentry) goto err_cleanup; - fentry = debugfs_create_x64("param1", S_IRUSR | S_IWUSR, - einj_debug_dir, &error_param1); - if (!fentry) - goto err_cleanup; - fentry = debugfs_create_x64("param2", S_IRUSR | S_IWUSR, - einj_debug_dir, &error_param2); - if (!fentry) - goto err_cleanup; fentry = debugfs_create_file("error_inject", S_IWUSR, einj_debug_dir, NULL, &error_inject_fops); if (!fentry) @@ -513,12 +509,23 @@ static int __init einj_init(void) rc = apei_exec_pre_map_gars(&ctx); if (rc) goto err_release; - param_paddr = einj_get_parameter_address(); - if (param_paddr) { - einj_param = ioremap(param_paddr, sizeof(*einj_param)); - rc = -ENOMEM; - if (!einj_param) - goto err_unmap; + if (param_extension) { + param_paddr = einj_get_parameter_address(); + if (param_paddr) { + einj_param = ioremap(param_paddr, sizeof(*einj_param)); + rc = -ENOMEM; + if (!einj_param) + goto err_unmap; + fentry = debugfs_create_x64("param1", S_IRUSR | S_IWUSR, + einj_debug_dir, &error_param1); + if (!fentry) + goto err_unmap; + fentry = debugfs_create_x64("param2", S_IRUSR | S_IWUSR, + einj_debug_dir, &error_param2); + if (!fentry) + goto err_unmap; + } else + pr_warn(EINJ_PFX "Parameter extension is not supported.\n"); } pr_info(EINJ_PFX "Error INJection is initialized.\n"); @@ -526,6 +533,8 @@ static int __init einj_init(void) return 0; err_unmap: + if (einj_param) + iounmap(einj_param); apei_exec_post_unmap_gars(&ctx); err_release: apei_resources_release(&einj_resources); -- cgit v1.2.3