summaryrefslogtreecommitdiff
path: root/drivers/edac/amd64_edac.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/edac/amd64_edac.c')
-rw-r--r--drivers/edac/amd64_edac.c240
1 files changed, 132 insertions, 108 deletions
diff --git a/drivers/edac/amd64_edac.c b/drivers/edac/amd64_edac.c
index a38831c82649..2aa339e94968 100644
--- a/drivers/edac/amd64_edac.c
+++ b/drivers/edac/amd64_edac.c
@@ -13,6 +13,8 @@ module_param(report_gart_errors, int, 0644);
static int ecc_enable_override;
module_param(ecc_enable_override, int, 0644);
+static struct msr *msrs;
+
/* Lookup table for all possible MC control instances */
struct amd64_pvt;
static struct mem_ctl_info *mci_lookup[EDAC_MAX_NUMNODES];
@@ -154,7 +156,7 @@ static int amd64_set_scrub_rate(struct mem_ctl_info *mci, u32 *bandwidth)
default:
amd64_printk(KERN_ERR, "Unsupported family!\n");
- break;
+ return -EINVAL;
}
return amd64_search_set_scrub_rate(pvt->misc_f3_ctl, *bandwidth,
min_scrubrate);
@@ -1489,7 +1491,7 @@ static inline u64 f10_get_base_addr_offset(u64 sys_addr, int hi_range_sel,
u64 chan_off;
if (hi_range_sel) {
- if (!(dct_sel_base_addr & 0xFFFFF800) &&
+ if (!(dct_sel_base_addr & 0xFFFF0000) &&
hole_valid && (sys_addr >= 0x100000000ULL))
chan_off = hole_off << 16;
else
@@ -2618,6 +2620,90 @@ static int amd64_init_csrows(struct mem_ctl_info *mci)
return empty;
}
+/* get all cores on this DCT */
+static void get_cpus_on_this_dct_cpumask(struct cpumask *mask, int nid)
+{
+ int cpu;
+
+ for_each_online_cpu(cpu)
+ if (amd_get_nb_id(cpu) == nid)
+ cpumask_set_cpu(cpu, mask);
+}
+
+/* check MCG_CTL on all the cpus on this node */
+static bool amd64_nb_mce_bank_enabled_on_node(int nid)
+{
+ cpumask_var_t mask;
+ int cpu, nbe;
+ bool ret = false;
+
+ if (!zalloc_cpumask_var(&mask, GFP_KERNEL)) {
+ amd64_printk(KERN_WARNING, "%s: error allocating mask\n",
+ __func__);
+ return false;
+ }
+
+ get_cpus_on_this_dct_cpumask(mask, nid);
+
+ rdmsr_on_cpus(mask, MSR_IA32_MCG_CTL, msrs);
+
+ for_each_cpu(cpu, mask) {
+ struct msr *reg = per_cpu_ptr(msrs, cpu);
+ nbe = reg->l & K8_MSR_MCGCTL_NBE;
+
+ debugf0("core: %u, MCG_CTL: 0x%llx, NB MSR is %s\n",
+ cpu, reg->q,
+ (nbe ? "enabled" : "disabled"));
+
+ if (!nbe)
+ goto out;
+ }
+ ret = true;
+
+out:
+ free_cpumask_var(mask);
+ return ret;
+}
+
+static int amd64_toggle_ecc_err_reporting(struct amd64_pvt *pvt, bool on)
+{
+ cpumask_var_t cmask;
+ int cpu;
+
+ if (!zalloc_cpumask_var(&cmask, GFP_KERNEL)) {
+ amd64_printk(KERN_WARNING, "%s: error allocating mask\n",
+ __func__);
+ return false;
+ }
+
+ get_cpus_on_this_dct_cpumask(cmask, pvt->mc_node_id);
+
+ rdmsr_on_cpus(cmask, MSR_IA32_MCG_CTL, msrs);
+
+ for_each_cpu(cpu, cmask) {
+
+ struct msr *reg = per_cpu_ptr(msrs, cpu);
+
+ if (on) {
+ if (reg->l & K8_MSR_MCGCTL_NBE)
+ pvt->flags.ecc_report = 1;
+
+ reg->l |= K8_MSR_MCGCTL_NBE;
+ } else {
+ /*
+ * Turn off ECC reporting only when it was off before
+ */
+ if (!pvt->flags.ecc_report)
+ reg->l &= ~K8_MSR_MCGCTL_NBE;
+ }
+ }
+ wrmsr_on_cpus(cmask, MSR_IA32_MCG_CTL, msrs);
+
+ free_cpumask_var(cmask);
+
+ return 0;
+}
+
/*
* Only if 'ecc_enable_override' is set AND BIOS had ECC disabled, do "we"
* enable it.
@@ -2625,17 +2711,12 @@ static int amd64_init_csrows(struct mem_ctl_info *mci)
static void amd64_enable_ecc_error_reporting(struct mem_ctl_info *mci)
{
struct amd64_pvt *pvt = mci->pvt_info;
- const cpumask_t *cpumask = cpumask_of_node(pvt->mc_node_id);
- int cpu, idx = 0, err = 0;
- struct msr msrs[cpumask_weight(cpumask)];
- u32 value;
- u32 mask = K8_NBCTL_CECCEn | K8_NBCTL_UECCEn;
+ int err = 0;
+ u32 value, mask = K8_NBCTL_CECCEn | K8_NBCTL_UECCEn;
if (!ecc_enable_override)
return;
- memset(msrs, 0, sizeof(msrs));
-
amd64_printk(KERN_WARNING,
"'ecc_enable_override' parameter is active, "
"Enabling AMD ECC hardware now: CAUTION\n");
@@ -2651,16 +2732,9 @@ static void amd64_enable_ecc_error_reporting(struct mem_ctl_info *mci)
value |= mask;
pci_write_config_dword(pvt->misc_f3_ctl, K8_NBCTL, value);
- rdmsr_on_cpus(cpumask, K8_MSR_MCGCTL, msrs);
-
- for_each_cpu(cpu, cpumask) {
- if (msrs[idx].l & K8_MSR_MCGCTL_NBE)
- set_bit(idx, &pvt->old_mcgctl);
-
- msrs[idx].l |= K8_MSR_MCGCTL_NBE;
- idx++;
- }
- wrmsr_on_cpus(cpumask, K8_MSR_MCGCTL, msrs);
+ if (amd64_toggle_ecc_err_reporting(pvt, ON))
+ amd64_printk(KERN_WARNING, "Error enabling ECC reporting over "
+ "MCGCTL!\n");
err = pci_read_config_dword(pvt->misc_f3_ctl, K8_NBCFG, &value);
if (err)
@@ -2701,17 +2775,12 @@ static void amd64_enable_ecc_error_reporting(struct mem_ctl_info *mci)
static void amd64_restore_ecc_error_reporting(struct amd64_pvt *pvt)
{
- const cpumask_t *cpumask = cpumask_of_node(pvt->mc_node_id);
- int cpu, idx = 0, err = 0;
- struct msr msrs[cpumask_weight(cpumask)];
- u32 value;
- u32 mask = K8_NBCTL_CECCEn | K8_NBCTL_UECCEn;
+ int err = 0;
+ u32 value, mask = K8_NBCTL_CECCEn | K8_NBCTL_UECCEn;
if (!pvt->nbctl_mcgctl_saved)
return;
- memset(msrs, 0, sizeof(msrs));
-
err = pci_read_config_dword(pvt->misc_f3_ctl, K8_NBCTL, &value);
if (err)
debugf0("Reading K8_NBCTL failed\n");
@@ -2721,66 +2790,9 @@ static void amd64_restore_ecc_error_reporting(struct amd64_pvt *pvt)
/* restore the NB Enable MCGCTL bit */
pci_write_config_dword(pvt->misc_f3_ctl, K8_NBCTL, value);
- rdmsr_on_cpus(cpumask, K8_MSR_MCGCTL, msrs);
-
- for_each_cpu(cpu, cpumask) {
- msrs[idx].l &= ~K8_MSR_MCGCTL_NBE;
- msrs[idx].l |=
- test_bit(idx, &pvt->old_mcgctl) << K8_MSR_MCGCTL_NBE;
- idx++;
- }
-
- wrmsr_on_cpus(cpumask, K8_MSR_MCGCTL, msrs);
-}
-
-/* get all cores on this DCT */
-static void get_cpus_on_this_dct_cpumask(cpumask_t *mask, int nid)
-{
- int cpu;
-
- for_each_online_cpu(cpu)
- if (amd_get_nb_id(cpu) == nid)
- cpumask_set_cpu(cpu, mask);
-}
-
-/* check MCG_CTL on all the cpus on this node */
-static bool amd64_nb_mce_bank_enabled_on_node(int nid)
-{
- cpumask_t mask;
- struct msr *msrs;
- int cpu, nbe, idx = 0;
- bool ret = false;
-
- cpumask_clear(&mask);
-
- get_cpus_on_this_dct_cpumask(&mask, nid);
-
- msrs = kzalloc(sizeof(struct msr) * cpumask_weight(&mask), GFP_KERNEL);
- if (!msrs) {
- amd64_printk(KERN_WARNING, "%s: error allocating msrs\n",
- __func__);
- return false;
- }
-
- rdmsr_on_cpus(&mask, MSR_IA32_MCG_CTL, msrs);
-
- for_each_cpu(cpu, &mask) {
- nbe = msrs[idx].l & K8_MSR_MCGCTL_NBE;
-
- debugf0("core: %u, MCG_CTL: 0x%llx, NB MSR is %s\n",
- cpu, msrs[idx].q,
- (nbe ? "enabled" : "disabled"));
-
- if (!nbe)
- goto out;
-
- idx++;
- }
- ret = true;
-
-out:
- kfree(msrs);
- return ret;
+ if (amd64_toggle_ecc_err_reporting(pvt, OFF))
+ amd64_printk(KERN_WARNING, "Error restoring ECC reporting over "
+ "MCGCTL!\n");
}
/*
@@ -2789,10 +2801,11 @@ out:
* the memory system completely. A command line option allows to force-enable
* hardware ECC later in amd64_enable_ecc_error_reporting().
*/
-static const char *ecc_warning =
- "WARNING: ECC is disabled by BIOS. Module will NOT be loaded.\n"
- " Either Enable ECC in the BIOS, or set 'ecc_enable_override'.\n"
- " Also, use of the override can cause unknown side effects.\n";
+static const char *ecc_msg =
+ "ECC disabled in the BIOS or no ECC capability, module will not load.\n"
+ " Either enable ECC checking or force module loading by setting "
+ "'ecc_enable_override'.\n"
+ " (Note that use of the override may cause unknown side effects.)\n";
static int amd64_check_ecc_enabled(struct amd64_pvt *pvt)
{
@@ -2807,7 +2820,7 @@ static int amd64_check_ecc_enabled(struct amd64_pvt *pvt)
ecc_enabled = !!(value & K8_NBCFG_ECC_ENABLE);
if (!ecc_enabled)
- amd64_printk(KERN_WARNING, "This node reports that Memory ECC "
+ amd64_printk(KERN_NOTICE, "This node reports that Memory ECC "
"is currently disabled, set F3x%x[22] (%s).\n",
K8_NBCFG, pci_name(pvt->misc_f3_ctl));
else
@@ -2815,18 +2828,17 @@ static int amd64_check_ecc_enabled(struct amd64_pvt *pvt)
nb_mce_en = amd64_nb_mce_bank_enabled_on_node(pvt->mc_node_id);
if (!nb_mce_en)
- amd64_printk(KERN_WARNING, "NB MCE bank disabled, set MSR "
+ amd64_printk(KERN_NOTICE, "NB MCE bank disabled, set MSR "
"0x%08x[4] on node %d to enable.\n",
MSR_IA32_MCG_CTL, pvt->mc_node_id);
if (!ecc_enabled || !nb_mce_en) {
if (!ecc_enable_override) {
- amd64_printk(KERN_WARNING, "%s", ecc_warning);
+ amd64_printk(KERN_NOTICE, "%s", ecc_msg);
return -ENODEV;
}
- } else
- /* CLEAR the override, since BIOS controlled it */
ecc_enable_override = 0;
+ }
return 0;
}
@@ -2909,7 +2921,6 @@ static int amd64_probe_one_instance(struct pci_dev *dram_f2_ctl,
pvt->ext_model = boot_cpu_data.x86_model >> 4;
pvt->mc_type_index = mc_type_index;
pvt->ops = family_ops(mc_type_index);
- pvt->old_mcgctl = 0;
/*
* We have the dram_f2_ctl device as an argument, now go reserve its
@@ -3071,16 +3082,15 @@ static void __devexit amd64_remove_one_instance(struct pci_dev *pdev)
amd64_free_mc_sibling_devices(pvt);
- kfree(pvt);
- mci->pvt_info = NULL;
-
- mci_lookup[pvt->mc_node_id] = NULL;
-
/* unregister from EDAC MCE */
amd_report_gart_errors(false);
amd_unregister_ecc_decoder(amd64_decode_bus_error);
/* Free the EDAC CORE resources */
+ mci->pvt_info = NULL;
+ mci_lookup[pvt->mc_node_id] = NULL;
+
+ kfree(pvt);
edac_mc_free(mci);
}
@@ -3157,23 +3167,29 @@ static void amd64_setup_pci_device(void)
static int __init amd64_edac_init(void)
{
int nb, err = -ENODEV;
+ bool load_ok = false;
edac_printk(KERN_INFO, EDAC_MOD_STR, EDAC_AMD64_VERSION "\n");
opstate_init();
if (cache_k8_northbridges() < 0)
- return err;
+ goto err_ret;
+
+ msrs = msrs_alloc();
+ if (!msrs)
+ goto err_ret;
err = pci_register_driver(&amd64_pci_driver);
if (err)
- return err;
+ goto err_pci;
/*
* At this point, the array 'pvt_lookup[]' contains pointers to alloc'd
* amd64_pvt structs. These will be used in the 2nd stage init function
* to finish initialization of the MC instances.
*/
+ err = -ENODEV;
for (nb = 0; nb < num_k8_northbridges; nb++) {
if (!pvt_lookup[nb])
continue;
@@ -3181,16 +3197,21 @@ static int __init amd64_edac_init(void)
err = amd64_init_2nd_stage(pvt_lookup[nb]);
if (err)
goto err_2nd_stage;
- }
- amd64_setup_pci_device();
+ load_ok = true;
+ }
- return 0;
+ if (load_ok) {
+ amd64_setup_pci_device();
+ return 0;
+ }
err_2nd_stage:
- debugf0("2nd stage failed\n");
pci_unregister_driver(&amd64_pci_driver);
-
+err_pci:
+ msrs_free(msrs);
+ msrs = NULL;
+err_ret:
return err;
}
@@ -3200,6 +3221,9 @@ static void __exit amd64_edac_exit(void)
edac_pci_release_generic_ctl(amd64_ctl_pci);
pci_unregister_driver(&amd64_pci_driver);
+
+ msrs_free(msrs);
+ msrs = NULL;
}
module_init(amd64_edac_init);