From fcaf780b2ad352edaeb1d1c07a6da053266b1eed Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Tue, 24 Aug 2010 23:22:57 -0300 Subject: i7300_edac: start a driver for i7300 chipset (Clarksboro) Signed-off-by: Mauro Carvalho Chehab --- drivers/edac/Kconfig | 7 + drivers/edac/Makefile | 1 + drivers/edac/i7300_edac.c | 1373 +++++++++++++++++++++++++++++++++++++++++++++ include/linux/pci_ids.h | 19 +- 4 files changed, 1392 insertions(+), 8 deletions(-) create mode 100644 drivers/edac/i7300_edac.c diff --git a/drivers/edac/Kconfig b/drivers/edac/Kconfig index 70bb350de996..4573ccc11f93 100644 --- a/drivers/edac/Kconfig +++ b/drivers/edac/Kconfig @@ -199,6 +199,13 @@ config EDAC_I5100 Support for error detection and correction the Intel San Clemente MCH. +config EDAC_I7300 + tristate "Intel Clarksboro MCH" + depends on EDAC_MM_EDAC && X86 && PCI + help + Support for error detection and correction the Intel + Clarksboro MCH (Intel 7300 chipset). + config EDAC_MPC85XX tristate "Freescale MPC83xx / MPC85xx" depends on EDAC_MM_EDAC && FSL_SOC && (PPC_83xx || PPC_85xx) diff --git a/drivers/edac/Makefile b/drivers/edac/Makefile index ca6b1bb24ccc..bca4369d6b7a 100644 --- a/drivers/edac/Makefile +++ b/drivers/edac/Makefile @@ -24,6 +24,7 @@ obj-$(CONFIG_EDAC_CPC925) += cpc925_edac.o obj-$(CONFIG_EDAC_I5000) += i5000_edac.o obj-$(CONFIG_EDAC_I5100) += i5100_edac.o obj-$(CONFIG_EDAC_I5400) += i5400_edac.o +obj-$(CONFIG_EDAC_I7300) += i7300_edac.o obj-$(CONFIG_EDAC_I7CORE) += i7core_edac.o obj-$(CONFIG_EDAC_E7XXX) += e7xxx_edac.o obj-$(CONFIG_EDAC_E752X) += e752x_edac.o diff --git a/drivers/edac/i7300_edac.c b/drivers/edac/i7300_edac.c new file mode 100644 index 000000000000..eb3f30e96ee3 --- /dev/null +++ b/drivers/edac/i7300_edac.c @@ -0,0 +1,1373 @@ +/* + * Intel 7300 class Memory Controllers kernel module (Clarksboro) + * + * This file may be distributed under the terms of the + * GNU General Public License version 2 only. + * + * Copyright (c) 2010 by: + * Mauro Carvalho Chehab + * + * Red Hat Inc. http://www.redhat.com + * + * Intel 7300 Chipset Memory Controller Hub (MCH) - Datasheet + * http://www.intel.com/Assets/PDF/datasheet/318082.pdf + * + * TODO: The chipset allow checking for PCI Express errors also. Currently, + * the driver covers only memory error errors + * + * This driver uses "csrows" EDAC attribute to represent DIMM slot# + */ + +#include +#include +#include +#include +#include +#include +#include + +#include "edac_core.h" + +/* + * Alter this version for the I7300 module when modifications are made + */ +#define I7300_REVISION " Ver: 1.0.0 " __DATE__ + +#define EDAC_MOD_STR "i7300_edac" + +#define i7300_printk(level, fmt, arg...) \ + edac_printk(level, "i7300", fmt, ##arg) + +#define i7300_mc_printk(mci, level, fmt, arg...) \ + edac_mc_chipset_printk(mci, level, "i7300", fmt, ##arg) + +/* + * Memory topology is organized as: + * Branch 0 - 2 channels: channels 0 and 1 (FDB0 PCI dev 21.0) + * Branch 1 - 2 channels: channels 2 and 3 (FDB1 PCI dev 22.0) + * Each channel can have to 8 DIMM sets (called as SLOTS) + * Slots should generally be filled in pairs + * Except on Single Channel mode of operation + * just slot 0/channel0 filled on this mode + * On normal operation mode, the two channels on a branch should be + filled together for the same SLOT# + * When in mirrored mode, Branch 1 replicate memory at Branch 0, so, the four + * channels on both branches should be filled + */ + +/* Limits for i7300 */ +#define MAX_SLOTS 8 +#define MAX_BRANCHES 2 +#define MAX_CH_PER_BRANCH 2 +#define MAX_CHANNELS (MAX_CH_PER_BRANCH * MAX_BRANCHES) +#define MAX_MIR 3 + +#define to_channel(ch, branch) ((((branch)) << 1) | (ch)) + +#define to_csrow(slot, ch, branch) \ + (to_channel(ch, branch) | ((slot) << 2)) + + +/* Device 16, + * Function 0: System Address (not documented) + * Function 1: Memory Branch Map, Control, Errors Register + * Function 2: FSB Error Registers + * + * All 3 functions of Device 16 (0,1,2) share the SAME DID and + * uses PCI_DEVICE_ID_INTEL_I7300_MCH_ERR for device 16 (0,1,2), + * PCI_DEVICE_ID_INTEL_I7300_MCH_FB0 and PCI_DEVICE_ID_INTEL_I7300_MCH_FB1 + * for device 21 (0,1). + */ + + /* OFFSETS for Function 0 */ +#define AMBASE 0x48 /* AMB Mem Mapped Reg Region Base */ +#define MAXCH 0x56 /* Max Channel Number */ +#define MAXDIMMPERCH 0x57 /* Max DIMM PER Channel Number */ + + /* OFFSETS for Function 1 */ +#define TOLM 0x6C +#define REDMEMB 0x7C + +#define MIR0 0x80 +#define MIR1 0x84 +#define MIR2 0x88 + +#if 0 +#define AMIR0 0x8c +#define AMIR1 0x90 +#define AMIR2 0x94 + +/*TODO: double check it */ +#define REC_ECC_LOCATOR_ODD(x) ((x) & 0x3fe00) /* bits [17:9] indicate ODD, [8:0] indicate EVEN */ + + /* Fatal error registers */ +#define FERR_FAT_FBD 0x98 + +/*TODO: double check it */ +#define FERR_FAT_FBDCHAN (3<<28) /* channel index where the highest-order error occurred */ + +#define NERR_FAT_FBD 0x9c +#define FERR_NF_FBD 0xa0 + + /* Non-fatal error register */ +#define NERR_NF_FBD 0xa4 + + /* Enable error mask */ +#define EMASK_FBD 0xa8 + +#define ERR0_FBD 0xac +#define ERR1_FBD 0xb0 +#define ERR2_FBD 0xb4 +#define MCERR_FBD 0xb8 + +#endif + +/* TODO: Dev 16 fn1 allows memory error injection - offsets 0x100-0x10b */ + + /* TODO: OFFSETS for Device 16 Function 2 */ + +/* + * Device 21, + * Function 0: Memory Map Branch 0 + * + * Device 22, + * Function 0: Memory Map Branch 1 + */ + + /* OFFSETS for Function 0 */ + +/* + * Note: Other Intel EDAC drivers use AMBPRESENT to identify if the available + * memory. From datasheet item 7.3.1 (FB-DIMM technology & organization), it + * seems that we cannot use this information directly for the same usage. + * Each memory slot may have up to 2 AMB interfaces, one for income and another + * for outcome interface to the next slot. + * For now, the driver just stores the AMB present registers, but rely only at + * the MTR info to detect memory. + * Datasheet is also not clear about how to map each AMBPRESENT registers to + * one of the 4 available channels. + */ +#define AMBPRESENT_0 0x64 +#define AMBPRESENT_1 0x66 + +const static u16 mtr_regs [MAX_SLOTS] = { + 0x80, 0x84, 0x88, 0x8c, + 0x82, 0x86, 0x8a, 0x8e +}; + +/* Defines to extract the vaious fields from the + * MTRx - Memory Technology Registers + */ +#define MTR_DIMMS_PRESENT(mtr) ((mtr) & (1 << 8)) +#define MTR_DIMMS_ETHROTTLE(mtr) ((mtr) & (1 << 7)) +#define MTR_DRAM_WIDTH(mtr) (((mtr) & (1 << 6)) ? 8 : 4) +#define MTR_DRAM_BANKS(mtr) (((mtr) & (1 << 5)) ? 8 : 4) +#define MTR_DIMM_RANKS(mtr) (((mtr) & (1 << 4)) ? 1 : 0) +#define MTR_DIMM_ROWS(mtr) (((mtr) >> 2) & 0x3) +#define MTR_DRAM_BANKS_ADDR_BITS 2 +#define MTR_DIMM_ROWS_ADDR_BITS(mtr) (MTR_DIMM_ROWS(mtr) + 13) +#define MTR_DIMM_COLS(mtr) ((mtr) & 0x3) +#define MTR_DIMM_COLS_ADDR_BITS(mtr) (MTR_DIMM_COLS(mtr) + 10) + +#if 0 + /* OFFSETS for Function 1 */ + +/* TODO */ +#define NRECFGLOG 0x74 +#define RECFGLOG 0x78 +#define NRECMEMA 0xbe +#define NRECMEMB 0xc0 +#define NRECFB_DIMMA 0xc4 +#define NRECFB_DIMMB 0xc8 +#define NRECFB_DIMMC 0xcc +#define NRECFB_DIMMD 0xd0 +#define NRECFB_DIMME 0xd4 +#define NRECFB_DIMMF 0xd8 +#define REDMEMA 0xdC +#define RECMEMA 0xf0 +#define RECMEMB 0xf4 +#define RECFB_DIMMA 0xf8 +#define RECFB_DIMMB 0xec +#define RECFB_DIMMC 0xf0 +#define RECFB_DIMMD 0xf4 +#define RECFB_DIMME 0xf8 +#define RECFB_DIMMF 0xfC + +/* This applies to FERR_NF_FB-DIMM as well as FERR_FAT_FB-DIMM */ +static inline int extract_fbdchan_indx(u32 x) +{ + return (x>>28) & 0x3; +} +#endif + +#ifdef CONFIG_EDAC_DEBUG +/* MTR NUMROW */ +static const char *numrow_toString[] = { + "8,192 - 13 rows", + "16,384 - 14 rows", + "32,768 - 15 rows", + "65,536 - 16 rows" +}; + +/* MTR NUMCOL */ +static const char *numcol_toString[] = { + "1,024 - 10 columns", + "2,048 - 11 columns", + "4,096 - 12 columns", + "reserved" +}; +#endif + +#if 0 + +/* + * Error indicator bits and masks + * Error masks are according with Table 5-17 of i7300 datasheet + */ + +enum error_mask { + EMASK_M1 = 1<<0, /* Memory Write error on non-redundant retry */ + EMASK_M2 = 1<<1, /* Memory or FB-DIMM configuration CRC read error */ + EMASK_M3 = 1<<2, /* Reserved */ + EMASK_M4 = 1<<3, /* Uncorrectable Data ECC on Replay */ + EMASK_M5 = 1<<4, /* Aliased Uncorrectable Non-Mirrored Demand Data ECC */ + EMASK_M6 = 1<<5, /* Unsupported on i7300 */ + EMASK_M7 = 1<<6, /* Aliased Uncorrectable Resilver- or Spare-Copy Data ECC */ + EMASK_M8 = 1<<7, /* Aliased Uncorrectable Patrol Data ECC */ + EMASK_M9 = 1<<8, /* Non-Aliased Uncorrectable Non-Mirrored Demand Data ECC */ + EMASK_M10 = 1<<9, /* Unsupported on i7300 */ + EMASK_M11 = 1<<10, /* Non-Aliased Uncorrectable Resilver- or Spare-Copy Data ECC */ + EMASK_M12 = 1<<11, /* Non-Aliased Uncorrectable Patrol Data ECC */ + EMASK_M13 = 1<<12, /* Memory Write error on first attempt */ + EMASK_M14 = 1<<13, /* FB-DIMM Configuration Write error on first attempt */ + EMASK_M15 = 1<<14, /* Memory or FB-DIMM configuration CRC read error */ + EMASK_M16 = 1<<15, /* Channel Failed-Over Occurred */ + EMASK_M17 = 1<<16, /* Correctable Non-Mirrored Demand Data ECC */ + EMASK_M18 = 1<<17, /* Unsupported on i7300 */ + EMASK_M19 = 1<<18, /* Correctable Resilver- or Spare-Copy Data ECC */ + EMASK_M20 = 1<<19, /* Correctable Patrol Data ECC */ + EMASK_M21 = 1<<20, /* FB-DIMM Northbound parity error on FB-DIMM Sync Status */ + EMASK_M22 = 1<<21, /* SPD protocol Error */ + EMASK_M23 = 1<<22, /* Non-Redundant Fast Reset Timeout */ + EMASK_M24 = 1<<23, /* Refresh error */ + EMASK_M25 = 1<<24, /* Memory Write error on redundant retry */ + EMASK_M26 = 1<<25, /* Redundant Fast Reset Timeout */ + EMASK_M27 = 1<<26, /* Correctable Counter Threshold Exceeded */ + EMASK_M28 = 1<<27, /* DIMM-Spare Copy Completed */ + EMASK_M29 = 1<<28, /* DIMM-Isolation Completed */ +}; + +/* + * Names to translate bit error into something useful + */ +static const char *error_name[] = { + [0] = "Memory Write error on non-redundant retry", + [1] = "Memory or FB-DIMM configuration CRC read error", + /* Reserved */ + [3] = "Uncorrectable Data ECC on Replay", + [4] = "Aliased Uncorrectable Non-Mirrored Demand Data ECC", + /* M6 Unsupported on i7300 */ + [6] = "Aliased Uncorrectable Resilver- or Spare-Copy Data ECC", + [7] = "Aliased Uncorrectable Patrol Data ECC", + [8] = "Non-Aliased Uncorrectable Non-Mirrored Demand Data ECC", + /* M10 Unsupported on i7300 */ + [10] = "Non-Aliased Uncorrectable Resilver- or Spare-Copy Data ECC", + [11] = "Non-Aliased Uncorrectable Patrol Data ECC", + [12] = "Memory Write error on first attempt", + [13] = "FB-DIMM Configuration Write error on first attempt", + [14] = "Memory or FB-DIMM configuration CRC read error", + [15] = "Channel Failed-Over Occurred", + [16] = "Correctable Non-Mirrored Demand Data ECC", + /* M18 Unsupported on i7300 */ + [18] = "Correctable Resilver- or Spare-Copy Data ECC", + [19] = "Correctable Patrol Data ECC", + [20] = "FB-DIMM Northbound parity error on FB-DIMM Sync Status", + [21] = "SPD protocol Error", + [22] = "Non-Redundant Fast Reset Timeout", + [23] = "Refresh error", + [24] = "Memory Write error on redundant retry", + [25] = "Redundant Fast Reset Timeout", + [26] = "Correctable Counter Threshold Exceeded", + [27] = "DIMM-Spare Copy Completed", + [28] = "DIMM-Isolation Completed", +}; + +/* Fatal errors */ +#define ERROR_FAT_MASK (EMASK_M1 | \ + EMASK_M2 | \ + EMASK_M23) + +/* Correctable errors */ +#define ERROR_NF_CORRECTABLE (EMASK_M27 | \ + EMASK_M20 | \ + EMASK_M19 | \ + EMASK_M18 | \ + EMASK_M17 | \ + EMASK_M16) +#define ERROR_NF_DIMM_SPARE (EMASK_M29 | \ + EMASK_M28) +#define ERROR_NF_SPD_PROTOCOL (EMASK_M22) +#define ERROR_NF_NORTH_CRC (EMASK_M21) + +/* Recoverable errors */ +#define ERROR_NF_RECOVERABLE (EMASK_M26 | \ + EMASK_M25 | \ + EMASK_M24 | \ + EMASK_M15 | \ + EMASK_M14 | \ + EMASK_M13 | \ + EMASK_M12 | \ + EMASK_M11 | \ + EMASK_M9 | \ + EMASK_M8 | \ + EMASK_M7 | \ + EMASK_M5) + +/* uncorrectable errors */ +#define ERROR_NF_UNCORRECTABLE (EMASK_M4) + +/* mask to all non-fatal errors */ +#define ERROR_NF_MASK (ERROR_NF_CORRECTABLE | \ + ERROR_NF_UNCORRECTABLE | \ + ERROR_NF_RECOVERABLE | \ + ERROR_NF_DIMM_SPARE | \ + ERROR_NF_SPD_PROTOCOL | \ + ERROR_NF_NORTH_CRC) + +/* + * Define error masks for the several registers + */ + +/* Enable all fatal and non fatal errors */ +#define ENABLE_EMASK_ALL (ERROR_FAT_MASK | ERROR_NF_MASK) + +/* mask for fatal error registers */ +#define FERR_FAT_MASK ERROR_FAT_MASK + +/* masks for non-fatal error register */ +static inline int to_nf_mask(unsigned int mask) +{ + return (mask & EMASK_M29) | (mask >> 3); +}; + +static inline int from_nf_ferr(unsigned int mask) +{ + return (mask & EMASK_M29) | /* Bit 28 */ + (mask & ((1 << 28) - 1) << 3); /* Bits 0 to 27 */ +}; + +#define FERR_NF_MASK to_nf_mask(ERROR_NF_MASK) +#define FERR_NF_CORRECTABLE to_nf_mask(ERROR_NF_CORRECTABLE) +#define FERR_NF_DIMM_SPARE to_nf_mask(ERROR_NF_DIMM_SPARE) +#define FERR_NF_SPD_PROTOCOL to_nf_mask(ERROR_NF_SPD_PROTOCOL) +#define FERR_NF_NORTH_CRC to_nf_mask(ERROR_NF_NORTH_CRC) +#define FERR_NF_RECOVERABLE to_nf_mask(ERROR_NF_RECOVERABLE) +#define FERR_NF_UNCORRECTABLE to_nf_mask(ERROR_NF_UNCORRECTABLE) + +#endif + +/* Device name and register DID (Device ID) */ +struct i7300_dev_info { + const char *ctl_name; /* name for this device */ + u16 fsb_mapping_errors; /* DID for the branchmap,control */ +}; + +/* Table of devices attributes supported by this driver */ +static const struct i7300_dev_info i7300_devs[] = { + { + .ctl_name = "I7300", + .fsb_mapping_errors = PCI_DEVICE_ID_INTEL_I7300_MCH_ERR, + }, +}; + +struct i7300_dimm_info { + int megabytes; /* size, 0 means not present */ +}; + +/* driver private data structure */ +struct i7300_pvt { + struct pci_dev *system_address; /* 16.0 */ + struct pci_dev *branchmap_werrors; /* 16.1 */ + struct pci_dev *fsb_error_regs; /* 16.2 */ + struct pci_dev *branch_pci[MAX_BRANCHES]; /* 21.0 and 22.0 */ + + u16 tolm; /* top of low memory */ + u64 ambase; /* AMB BAR */ + + u16 mir[MAX_MIR]; + + u16 mtr[MAX_SLOTS][MAX_BRANCHES]; /* Memory Technlogy Reg */ + u16 ambpresent[MAX_CHANNELS]; /* AMB present regs */ + + /* DIMM information matrix, allocating architecture maximums */ + struct i7300_dimm_info dimm_info[MAX_SLOTS][MAX_CHANNELS]; +}; + +#if 0 +/* I7300 MCH error information retrieved from Hardware */ +struct i7300_error_info { + /* These registers are always read from the MC */ + u32 ferr_fat_fbd; /* First Errors Fatal */ + u32 nerr_fat_fbd; /* Next Errors Fatal */ + u32 ferr_nf_fbd; /* First Errors Non-Fatal */ + u32 nerr_nf_fbd; /* Next Errors Non-Fatal */ + + /* These registers are input ONLY if there was a Recoverable Error */ + u32 redmemb; /* Recoverable Mem Data Error log B */ + u16 recmema; /* Recoverable Mem Error log A */ + u32 recmemb; /* Recoverable Mem Error log B */ + + /* These registers are input ONLY if there was a Non-Rec Error */ + u16 nrecmema; /* Non-Recoverable Mem log A */ + u16 nrecmemb; /* Non-Recoverable Mem log B */ + +}; +#endif + +/* FIXME: Why do we need to have this static? */ +static struct edac_pci_ctl_info *i7300_pci; + + +#if 0 +/* note that nrec_rdwr changed from NRECMEMA to NRECMEMB between the 5000 and + 5400 better to use an inline function than a macro in this case */ +static inline int nrec_bank(struct i7300_error_info *info) +{ + return ((info->nrecmema) >> 12) & 0x7; +} +static inline int nrec_rank(struct i7300_error_info *info) +{ + return ((info->nrecmema) >> 8) & 0xf; +} +static inline int nrec_buf_id(struct i7300_error_info *info) +{ + return ((info->nrecmema)) & 0xff; +} +static inline int nrec_rdwr(struct i7300_error_info *info) +{ + return (info->nrecmemb) >> 31; +} +/* This applies to both NREC and REC string so it can be used with nrec_rdwr + and rec_rdwr */ +static inline const char *rdwr_str(int rdwr) +{ + return rdwr ? "Write" : "Read"; +} +static inline int nrec_cas(struct i7300_error_info *info) +{ + return ((info->nrecmemb) >> 16) & 0x1fff; +} +static inline int nrec_ras(struct i7300_error_info *info) +{ + return (info->nrecmemb) & 0xffff; +} +static inline int rec_bank(struct i7300_error_info *info) +{ + return ((info->recmema) >> 12) & 0x7; +} +static inline int rec_rank(struct i7300_error_info *info) +{ + return ((info->recmema) >> 8) & 0xf; +} +static inline int rec_rdwr(struct i7300_error_info *info) +{ + return (info->recmemb) >> 31; +} +static inline int rec_cas(struct i7300_error_info *info) +{ + return ((info->recmemb) >> 16) & 0x1fff; +} +static inline int rec_ras(struct i7300_error_info *info) +{ + return (info->recmemb) & 0xffff; +} + +/* + * i7300_get_error_info Retrieve the hardware error information from + * the hardware and cache it in the 'info' + * structure + */ +static void i7300_get_error_info(struct mem_ctl_info *mci, + struct i7300_error_info *info) +{ + struct i7300_pvt *pvt; + u32 value; + + pvt = mci->pvt_info; + + /* read in the 1st FATAL error register */ + pci_read_config_dword(pvt->branchmap_werrors, FERR_FAT_FBD, &value); + + /* Mask only the bits that the doc says are valid + */ + value &= (FERR_FAT_FBDCHAN | FERR_FAT_MASK); + + /* If there is an error, then read in the + NEXT FATAL error register and the Memory Error Log Register A + */ + if (value & FERR_FAT_MASK) { + info->ferr_fat_fbd = value; + + /* harvest the various error data we need */ + pci_read_config_dword(pvt->branchmap_werrors, + NERR_FAT_FBD, &info->nerr_fat_fbd); + pci_read_config_word(pvt->branchmap_werrors, + NRECMEMA, &info->nrecmema); + pci_read_config_word(pvt->branchmap_werrors, + NRECMEMB, &info->nrecmemb); + + /* Clear the error bits, by writing them back */ + pci_write_config_dword(pvt->branchmap_werrors, + FERR_FAT_FBD, value); + } else { + info->ferr_fat_fbd = 0; + info->nerr_fat_fbd = 0; + info->nrecmema = 0; + info->nrecmemb = 0; + } + + /* read in the 1st NON-FATAL error register */ + pci_read_config_dword(pvt->branchmap_werrors, FERR_NF_FBD, &value); + + /* If there is an error, then read in the 1st NON-FATAL error + * register as well */ + if (value & FERR_NF_MASK) { + info->ferr_nf_fbd = value; + + /* harvest the various error data we need */ + pci_read_config_dword(pvt->branchmap_werrors, + NERR_NF_FBD, &info->nerr_nf_fbd); + pci_read_config_word(pvt->branchmap_werrors, + RECMEMA, &info->recmema); + pci_read_config_dword(pvt->branchmap_werrors, + RECMEMB, &info->recmemb); + pci_read_config_dword(pvt->branchmap_werrors, + REDMEMB, &info->redmemb); + + /* Clear the error bits, by writing them back */ + pci_write_config_dword(pvt->branchmap_werrors, + FERR_NF_FBD, value); + } else { + info->ferr_nf_fbd = 0; + info->nerr_nf_fbd = 0; + info->recmema = 0; + info->recmemb = 0; + info->redmemb = 0; + } +} + +/* + * i7300_proccess_non_recoverable_info(struct mem_ctl_info *mci, + * struct i7300_error_info *info, + * int handle_errors); + * + * handle the Intel FATAL and unrecoverable errors, if any + */ +static void i7300_proccess_non_recoverable_info(struct mem_ctl_info *mci, + struct i7300_error_info *info, + unsigned long allErrors) +{ + char msg[EDAC_MC_LABEL_LEN + 1 + 90 + 80]; + int branch; + int channel; + int bank; + int buf_id; + int rank; + int rdwr; + int ras, cas; + int errnum; + char *type = NULL; + + if (!allErrors) + return; /* if no error, return now */ + + if (allErrors & ERROR_FAT_MASK) + type = "FATAL"; + else if (allErrors & FERR_NF_UNCORRECTABLE) + type = "NON-FATAL uncorrected"; + else + type = "NON-FATAL recoverable"; + + /* ONLY ONE of the possible error bits will be set, as per the docs */ + + branch = extract_fbdchan_indx(info->ferr_fat_fbd); + channel = branch; + + /* Use the NON-Recoverable macros to extract data */ + bank = nrec_bank(info); + rank = nrec_rank(info); + buf_id = nrec_buf_id(info); + rdwr = nrec_rdwr(info); + ras = nrec_ras(info); + cas = nrec_cas(info); + + debugf0("\t\tCSROW= %d Channels= %d,%d (Branch= %d " + "DRAM Bank= %d Buffer ID = %d rdwr= %s ras= %d cas= %d)\n", + rank, channel, channel + 1, branch >> 1, bank, + buf_id, rdwr_str(rdwr), ras, cas); + + /* Only 1 bit will be on */ + errnum = find_first_bit(&allErrors, ARRAY_SIZE(error_name)); + + /* Form out message */ + snprintf(msg, sizeof(msg), + "%s (Branch=%d DRAM-Bank=%d Buffer ID = %d RDWR=%s " + "RAS=%d CAS=%d %s Err=0x%lx (%s))", + type, branch >> 1, bank, buf_id, rdwr_str(rdwr), ras, cas, + type, allErrors, error_name[errnum]); + + /* Call the helper to output message */ + edac_mc_handle_fbd_ue(mci, rank, channel, channel + 1, msg); +} + +/* + * i7300_process_fatal_error_info(struct mem_ctl_info *mci, + * struct i7300_error_info *info, + * int handle_errors); + * + * handle the Intel NON-FATAL errors, if any + */ +static void i7300_process_nonfatal_error_info(struct mem_ctl_info *mci, + struct i7300_error_info *info) +{ + char msg[EDAC_MC_LABEL_LEN + 1 + 90 + 80]; + unsigned long allErrors; + int branch; + int channel; + int bank; + int rank; + int rdwr; + int ras, cas; + int errnum; + + /* mask off the Error bits that are possible */ + allErrors = from_nf_ferr(info->ferr_nf_fbd & FERR_NF_MASK); + if (!allErrors) + return; /* if no error, return now */ + + /* ONLY ONE of the possible error bits will be set, as per the docs */ + + if (allErrors & (ERROR_NF_UNCORRECTABLE | ERROR_NF_RECOVERABLE)) { + i7300_proccess_non_recoverable_info(mci, info, allErrors); + return; + } + + /* Correctable errors */ + if (allErrors & ERROR_NF_CORRECTABLE) { + debugf0("\tCorrected bits= 0x%lx\n", allErrors); + + branch = extract_fbdchan_indx(info->ferr_nf_fbd); + + channel = 0; + if (REC_ECC_LOCATOR_ODD(info->redmemb)) + channel = 1; + + /* Convert channel to be based from zero, instead of + * from branch base of 0 */ + channel += branch; + + bank = rec_bank(info); + rank = rec_rank(info); + rdwr = rec_rdwr(info); + ras = rec_ras(info); + cas = rec_cas(info); + + /* Only 1 bit will be on */ + errnum = find_first_bit(&allErrors, ARRAY_SIZE(error_name)); + + debugf0("\t\tCSROW= %d Channel= %d (Branch %d " + "DRAM Bank= %d rdwr= %s ras= %d cas= %d)\n", + rank, channel, branch >> 1, bank, + rdwr_str(rdwr), ras, cas); + + /* Form out message */ + snprintf(msg, sizeof(msg), + "Corrected error (Branch=%d DRAM-Bank=%d RDWR=%s " + "RAS=%d CAS=%d, CE Err=0x%lx (%s))", + branch >> 1, bank, rdwr_str(rdwr), ras, cas, + allErrors, error_name[errnum]); + + /* Call the helper to output message */ + edac_mc_handle_fbd_ce(mci, rank, channel, msg); + + return; + } + + /* Miscelaneous errors */ + errnum = find_first_bit(&allErrors, ARRAY_SIZE(error_name)); + + branch = extract_fbdchan_indx(info->ferr_nf_fbd); + + i7300_mc_printk(mci, KERN_EMERG, + "Non-Fatal misc error (Branch=%d Err=%#lx (%s))", + branch >> 1, allErrors, error_name[errnum]); +} + +/* + * i7300_process_error_info Process the error info that is + * in the 'info' structure, previously retrieved from hardware + */ +static void i7300_process_error_info(struct mem_ctl_info *mci, + struct i7300_error_info *info) +{ u32 allErrors; + + /* First handle any fatal errors that occurred */ + allErrors = (info->ferr_fat_fbd & FERR_FAT_MASK); + i7300_proccess_non_recoverable_info(mci, info, allErrors); + + /* now handle any non-fatal errors that occurred */ + i7300_process_nonfatal_error_info(mci, info); +} + +/* + * i7300_clear_error Retrieve any error from the hardware + * but do NOT process that error. + * Used for 'clearing' out of previous errors + * Called by the Core module. + */ +static void i7300_clear_error(struct mem_ctl_info *mci) +{ + struct i7300_error_info info; + + i7300_get_error_info(mci, &info); +} + +/* + * i7300_check_error Retrieve and process errors reported by the + * hardware. Called by the Core module. + */ +static void i7300_check_error(struct mem_ctl_info *mci) +{ + struct i7300_error_info info; + debugf4("MC%d: " __FILE__ ": %s()\n", mci->mc_idx, __func__); + i7300_get_error_info(mci, &info); + i7300_process_error_info(mci, &info); +} + +/* + * i7300_enable_error_reporting + * Turn on the memory reporting features of the hardware + */ +static void i7300_enable_error_reporting(struct mem_ctl_info *mci) +{ + struct i7300_pvt *pvt; + u32 fbd_error_mask; + + pvt = mci->pvt_info; + + /* Read the FBD Error Mask Register */ + pci_read_config_dword(pvt->branchmap_werrors, EMASK_FBD, + &fbd_error_mask); + + /* Enable with a '0' */ + fbd_error_mask &= ~(ENABLE_EMASK_ALL); + + pci_write_config_dword(pvt->branchmap_werrors, EMASK_FBD, + fbd_error_mask); +} +#endif + +/* + * determine_mtr(pvt, csrow, channel) + * + * return the proper MTR register as determine by the csrow and desired channel + */ +static int decode_mtr(struct i7300_pvt *pvt, + int slot, int ch, int branch, + struct i7300_dimm_info *dinfo, + struct csrow_info *p_csrow) +{ + int mtr, ans, addrBits, channel; + + channel = to_channel(ch, branch); + + mtr = pvt->mtr[slot][branch]; + ans = MTR_DIMMS_PRESENT(mtr) ? 1 : 0; + + debugf2("\tMTR%d CH%d: DIMMs are %s (mtr)\n", + slot, channel, + ans ? "Present" : "NOT Present"); + + /* Determine if there is a DIMM present in this DIMM slot */ + +#if 0 + if (!amb_present || !ans) + return 0; +#else + if (!ans) + return 0; +#endif + + /* Start with the number of bits for a Bank + * on the DRAM */ + addrBits = MTR_DRAM_BANKS_ADDR_BITS; + /* Add thenumber of ROW bits */ + addrBits += MTR_DIMM_ROWS_ADDR_BITS(mtr); + /* add the number of COLUMN bits */ + addrBits += MTR_DIMM_COLS_ADDR_BITS(mtr); + /* add the number of RANK bits */ + addrBits += MTR_DIMM_RANKS(mtr); + + addrBits += 6; /* add 64 bits per DIMM */ + addrBits -= 20; /* divide by 2^^20 */ + addrBits -= 3; /* 8 bits per bytes */ + + dinfo->megabytes = 1 << addrBits; + + debugf2("\t\tWIDTH: x%d\n", MTR_DRAM_WIDTH(mtr)); + + debugf2("\t\tELECTRICAL THROTTLING is %s\n", + MTR_DIMMS_ETHROTTLE(mtr) ? "enabled" : "disabled"); + + debugf2("\t\tNUMBANK: %d bank(s)\n", MTR_DRAM_BANKS(mtr)); + debugf2("\t\tNUMRANK: %s\n", MTR_DIMM_RANKS(mtr) ? "double" : "single"); + debugf2("\t\tNUMROW: %s\n", numrow_toString[MTR_DIMM_ROWS(mtr)]); + debugf2("\t\tNUMCOL: %s\n", numcol_toString[MTR_DIMM_COLS(mtr)]); + debugf2("\t\tSIZE: %d MB\n", dinfo->megabytes); + + p_csrow->grain = 8; + p_csrow->nr_pages = dinfo->megabytes << 8; + p_csrow->mtype = MEM_FB_DDR2; + p_csrow->edac_mode = EDAC_S8ECD8ED; + + /* ask what device type on this row */ + if (MTR_DRAM_WIDTH(mtr)) + p_csrow->dtype = DEV_X8; + else + p_csrow->dtype = DEV_X4; + + return mtr; +} + +/* + * print_dimm_size + * + * also will output a DIMM matrix map, if debug is enabled, for viewing + * how the DIMMs are populated + */ +static void print_dimm_size(struct i7300_pvt *pvt) +{ + struct i7300_dimm_info *dinfo; + char *p, *mem_buffer; + int space, n; + int channel, slot; + + space = PAGE_SIZE; + mem_buffer = p = kmalloc(space, GFP_KERNEL); + if (p == NULL) { + i7300_printk(KERN_ERR, "MC: %s:%s() kmalloc() failed\n", + __FILE__, __func__); + return; + } + + n = snprintf(p, space, " "); + p += n; + space -= n; + for (channel = 0; channel < MAX_CHANNELS; channel++) { + n = snprintf(p, space, "channel %d | ", channel); + p += n; + space -= n; + } + debugf2("%s\n", mem_buffer); + p = mem_buffer; + space = PAGE_SIZE; + n = snprintf(p, space, "-------------------------------" + "------------------------------"); + p += n; + space -= n; + debugf2("%s\n", mem_buffer); + p = mem_buffer; + space = PAGE_SIZE; + + for (slot = 0; slot < MAX_SLOTS; slot++) { + n = snprintf(p, space, "csrow/SLOT %d ", slot); + p += n; + space -= n; + + for (channel = 0; channel < MAX_CHANNELS; channel++) { + dinfo = &pvt->dimm_info[slot][channel]; + n = snprintf(p, space, "%4d MB | ", dinfo->megabytes); + p += n; + space -= n; + } + + debugf2("%s\n", mem_buffer); + p = mem_buffer; + space = PAGE_SIZE; + } + + n = snprintf(p, space, "-------------------------------" + "------------------------------"); + p += n; + space -= n; + debugf2("%s\n", mem_buffer); + p = mem_buffer; + space = PAGE_SIZE; + + kfree(mem_buffer); +} + +/* + * i7300_init_csrows Initialize the 'csrows' table within + * the mci control structure with the + * addressing of memory. + * + * return: + * 0 success + * 1 no actual memory found on this MC + */ +static int i7300_init_csrows(struct mem_ctl_info *mci) +{ + struct i7300_pvt *pvt; + struct i7300_dimm_info *dinfo; + struct csrow_info *p_csrow; + int empty; + int mtr; + int ch, branch, slot, channel; + + pvt = mci->pvt_info; + + empty = 1; /* Assume NO memory */ + + debugf2("Memory Technology Registers:\n"); + + /* Get the AMB present registers for the four channels */ + for (branch = 0; branch < MAX_BRANCHES; branch++) { + /* Read and dump branch 0's MTRs */ + channel = to_channel(0, branch); + pci_read_config_word(pvt->branch_pci[branch], AMBPRESENT_0, + &pvt->ambpresent[channel]); + debugf2("\t\tAMB-present CH%d = 0x%x:\n", + channel, pvt->ambpresent[channel]); + + channel = to_channel(1, branch); + pci_read_config_word(pvt->branch_pci[branch], AMBPRESENT_1, + &pvt->ambpresent[channel]); + debugf2("\t\tAMB-present CH%d = 0x%x:\n", + channel, pvt->ambpresent[channel]); + } + + /* Get the set of MTR[0-7] regs by each branch */ + for (slot = 0; slot < MAX_SLOTS; slot++) { + int where = mtr_regs[slot]; + for (branch = 0; branch < MAX_BRANCHES; branch++) { + pci_read_config_word(pvt->branch_pci[branch], + where, + &pvt->mtr[slot][branch]); + for (ch = 0; ch < MAX_BRANCHES; ch++) { + int channel = to_channel(ch, branch); + + dinfo = &pvt->dimm_info[slot][channel]; + p_csrow = &mci->csrows[slot]; + + mtr = decode_mtr(pvt, slot, ch, branch, + dinfo, p_csrow); + /* if no DIMMS on this row, continue */ + if (!MTR_DIMMS_PRESENT(mtr)) + continue; + + p_csrow->csrow_idx = slot; + + /* FAKE OUT VALUES, FIXME */ + p_csrow->first_page = 0 + slot * 20; + p_csrow->last_page = 9 + slot * 20; + p_csrow->page_mask = 0xfff; + + empty = 0; + } + } + } + + return empty; +} + +static void decode_mir(int mir_no, u16 mir[MAX_MIR]) +{ + if (mir[mir_no] & 3) + debugf2("MIR%d: limit= 0x%x Branch(es) that participate: %s %s\n", + mir_no, + (mir[mir_no] >> 4) & 0xfff, + (mir[mir_no] & 1) ? "B0" : "", + (mir[mir_no] & 2) ? "B1": ""); +} + +/* + * i7300_get_mc_regs read in the necessary registers and + * cache locally + * + * Fills in the private data members + */ +static int i7300_get_mc_regs(struct mem_ctl_info *mci) +{ + struct i7300_pvt *pvt; + u32 actual_tolm; + int i, rc; + + pvt = mci->pvt_info; + + pci_read_config_dword(pvt->system_address, AMBASE, + (u32 *) &pvt->ambase); + + debugf2("AMBASE= 0x%lx\n", (long unsigned int)pvt->ambase); + + /* Get the Branch Map regs */ + pci_read_config_word(pvt->branchmap_werrors, TOLM, &pvt->tolm); + pvt->tolm >>= 12; + debugf2("TOLM (number of 256M regions) =%u (0x%x)\n", pvt->tolm, + pvt->tolm); + + actual_tolm = (u32) ((1000l * pvt->tolm) >> (30 - 28)); + debugf2("Actual TOLM byte addr=%u.%03u GB (0x%x)\n", + actual_tolm/1000, actual_tolm % 1000, pvt->tolm << 28); + + pci_read_config_word(pvt->branchmap_werrors, MIR0, &pvt->mir[0]); + pci_read_config_word(pvt->branchmap_werrors, MIR1, &pvt->mir[1]); + pci_read_config_word(pvt->branchmap_werrors, MIR2, &pvt->mir[2]); + + /* Decode the MIR regs */ + for (i = 0; i < MAX_MIR; i++) + decode_mir(i, pvt->mir); + + rc = i7300_init_csrows(mci); + if (rc < 0) + return rc; + + /* Go and determine the size of each DIMM and place in an + * orderly matrix */ + print_dimm_size(pvt); + + return 0; +} + +/* + * i7300_put_devices 'put' all the devices that we have + * reserved via 'get' + */ +static void i7300_put_devices(struct mem_ctl_info *mci) +{ + struct i7300_pvt *pvt; + int branch; + + pvt = mci->pvt_info; + + /* Decrement usage count for devices */ + for (branch = 0; branch < MAX_CH_PER_BRANCH; branch++) + pci_dev_put(pvt->branch_pci[branch]); + pci_dev_put(pvt->fsb_error_regs); + pci_dev_put(pvt->branchmap_werrors); +} + +/* + * i7300_get_devices Find and perform 'get' operation on the MCH's + * device/functions we want to reference for this driver + * + * Need to 'get' device 16 func 1 and func 2 + */ +static int i7300_get_devices(struct mem_ctl_info *mci, int dev_idx) +{ + struct i7300_pvt *pvt; + struct pci_dev *pdev; + + pvt = mci->pvt_info; + + /* Attempt to 'get' the MCH register we want */ + pdev = NULL; + while (!pvt->branchmap_werrors || !pvt->fsb_error_regs) { + pdev = pci_get_device(PCI_VENDOR_ID_INTEL, + PCI_DEVICE_ID_INTEL_I7300_MCH_ERR, pdev); + if (!pdev) { + /* End of list, leave */ + i7300_printk(KERN_ERR, + "'system address,Process Bus' " + "device not found:" + "vendor 0x%x device 0x%x ERR funcs " + "(broken BIOS?)\n", + PCI_VENDOR_ID_INTEL, + PCI_DEVICE_ID_INTEL_I7300_MCH_ERR); + goto error; + } + + /* Store device 16 funcs 1 and 2 */ + switch (PCI_FUNC(pdev->devfn)) { + case 1: + pvt->branchmap_werrors = pdev; + break; + case 2: + pvt->fsb_error_regs = pdev; + break; + } + } + + debugf1("System Address, processor bus- PCI Bus ID: %s %x:%x\n", + pci_name(pvt->system_address), + pvt->system_address->vendor, pvt->system_address->device); + debugf1("Branchmap, control and errors - PCI Bus ID: %s %x:%x\n", + pci_name(pvt->branchmap_werrors), + pvt->branchmap_werrors->vendor, pvt->branchmap_werrors->device); + debugf1("FSB Error Regs - PCI Bus ID: %s %x:%x\n", + pci_name(pvt->fsb_error_regs), + pvt->fsb_error_regs->vendor, pvt->fsb_error_regs->device); + + pvt->branch_pci[0] = pci_get_device(PCI_VENDOR_ID_INTEL, + PCI_DEVICE_ID_INTEL_I7300_MCH_FB0, + NULL); + if (!pvt->branch_pci[0]) { + i7300_printk(KERN_ERR, + "MC: 'BRANCH 0' device not found:" + "vendor 0x%x device 0x%x Func 0 (broken BIOS?)\n", + PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_I7300_MCH_FB0); + goto error; + } + + pvt->branch_pci[1] = pci_get_device(PCI_VENDOR_ID_INTEL, + PCI_DEVICE_ID_INTEL_I7300_MCH_FB1, + NULL); + if (!pvt->branch_pci[1]) { + i7300_printk(KERN_ERR, + "MC: 'BRANCH 1' device not found:" + "vendor 0x%x device 0x%x Func 0 " + "(broken BIOS?)\n", + PCI_VENDOR_ID_INTEL, + PCI_DEVICE_ID_INTEL_I7300_MCH_FB1); + goto error; + } + + return 0; + +error: + i7300_put_devices(mci); + return -ENODEV; +} + +/* + * i7300_probe1 Probe for ONE instance of device to see if it is + * present. + * return: + * 0 for FOUND a device + * < 0 for error code + */ +static int i7300_probe1(struct pci_dev *pdev, int dev_idx) +{ + struct mem_ctl_info *mci; + struct i7300_pvt *pvt; + int num_channels; + int num_dimms_per_channel; + int num_csrows; + + if (dev_idx >= ARRAY_SIZE(i7300_devs)) + return -EINVAL; + + debugf0("MC: " __FILE__ ": %s(), pdev bus %u dev=0x%x fn=0x%x\n", + __func__, + pdev->bus->number, + PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn)); + + /* We only are looking for func 0 of the set */ + if (PCI_FUNC(pdev->devfn) != 0) + return -ENODEV; + + /* As we don't have a motherboard identification routine to determine + * actual number of slots/dimms per channel, we thus utilize the + * resource as specified by the chipset. Thus, we might have + * have more DIMMs per channel than actually on the mobo, but this + * allows the driver to support upto the chipset max, without + * some fancy mobo determination. + */ + num_dimms_per_channel = MAX_SLOTS; + num_channels = MAX_CHANNELS; + num_csrows = MAX_SLOTS * MAX_CHANNELS; + + debugf0("MC: %s(): Number of - Channels= %d DIMMS= %d CSROWS= %d\n", + __func__, num_channels, num_dimms_per_channel, num_csrows); + + /* allocate a new MC control structure */ + mci = edac_mc_alloc(sizeof(*pvt), num_csrows, num_channels, 0); + + if (mci == NULL) + return -ENOMEM; + + debugf0("MC: " __FILE__ ": %s(): mci = %p\n", __func__, mci); + + mci->dev = &pdev->dev; /* record ptr to the generic device */ + + pvt = mci->pvt_info; + pvt->system_address = pdev; /* Record this device in our private */ + + /* 'get' the pci devices we want to reserve for our use */ + if (i7300_get_devices(mci, dev_idx)) + goto fail0; + + mci->mc_idx = 0; + mci->mtype_cap = MEM_FLAG_FB_DDR2; + mci->edac_ctl_cap = EDAC_FLAG_NONE; + mci->edac_cap = EDAC_FLAG_NONE; + mci->mod_name = "i7300_edac.c"; + mci->mod_ver = I7300_REVISION; + mci->ctl_name = i7300_devs[dev_idx].ctl_name; + mci->dev_name = pci_name(pdev); + mci->ctl_page_to_phys = NULL; + +#if 0 + /* Set the function pointer to an actual operation function */ + mci->edac_check = i7300_check_error; +#endif + + /* initialize the MC control structure 'csrows' table + * with the mapping and control information */ + if (i7300_get_mc_regs(mci)) { + debugf0("MC: Setting mci->edac_cap to EDAC_FLAG_NONE\n" + " because i7300_init_csrows() returned nonzero " + "value\n"); + mci->edac_cap = EDAC_FLAG_NONE; /* no csrows found */ + } else { +#if 0 + debugf1("MC: Enable error reporting now\n"); + i7300_enable_error_reporting(mci); +#endif + } + + /* add this new MC control structure to EDAC's list of MCs */ + if (edac_mc_add_mc(mci)) { + debugf0("MC: " __FILE__ + ": %s(): failed edac_mc_add_mc()\n", __func__); + /* FIXME: perhaps some code should go here that disables error + * reporting if we just enabled it + */ + goto fail1; + } + +#if 0 + i7300_clear_error(mci); +#endif + + /* allocating generic PCI control info */ + i7300_pci = edac_pci_create_generic_ctl(&pdev->dev, EDAC_MOD_STR); + if (!i7300_pci) { + printk(KERN_WARNING + "%s(): Unable to create PCI control\n", + __func__); + printk(KERN_WARNING + "%s(): PCI error report via EDAC not setup\n", + __func__); + } + + return 0; + + /* Error exit unwinding stack */ +fail1: + + i7300_put_devices(mci); + +fail0: + edac_mc_free(mci); + return -ENODEV; +} + +/* + * i7300_init_one constructor for one instance of device + * + * returns: + * negative on error + * count (>= 0) + */ +static int __devinit i7300_init_one(struct pci_dev *pdev, + const struct pci_device_id *id) +{ + int rc; + + debugf0("MC: " __FILE__ ": %s()\n", __func__); + + /* wake up device */ + rc = pci_enable_device(pdev); + if (rc == -EIO) + return rc; + + /* now probe and enable the device */ + return i7300_probe1(pdev, id->driver_data); +} + +/* + * i7300_remove_one destructor for one instance of device + * + */ +static void __devexit i7300_remove_one(struct pci_dev *pdev) +{ + struct mem_ctl_info *mci; + + debugf0(__FILE__ ": %s()\n", __func__); + + if (i7300_pci) + edac_pci_release_generic_ctl(i7300_pci); + + mci = edac_mc_del_mc(&pdev->dev); + if (!mci) + return; + + /* retrieve references to resources, and free those resources */ + i7300_put_devices(mci); + + edac_mc_free(mci); +} + +/* + * pci_device_id table for which devices we are looking for + * + * The "E500P" device is the first device supported. + */ +static const struct pci_device_id i7300_pci_tbl[] __devinitdata = { + {PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_I7300_MCH_ERR)}, + {0,} /* 0 terminated list. */ +}; + +MODULE_DEVICE_TABLE(pci, i7300_pci_tbl); + +/* + * i7300_driver pci_driver structure for this module + * + */ +static struct pci_driver i7300_driver = { + .name = "i7300_edac", + .probe = i7300_init_one, + .remove = __devexit_p(i7300_remove_one), + .id_table = i7300_pci_tbl, +}; + +/* + * i7300_init Module entry function + * Try to initialize this module for its devices + */ +static int __init i7300_init(void) +{ + int pci_rc; + + debugf2("MC: " __FILE__ ": %s()\n", __func__); + + /* Ensure that the OPSTATE is set correctly for POLL or NMI */ + opstate_init(); + + pci_rc = pci_register_driver(&i7300_driver); + + return (pci_rc < 0) ? pci_rc : 0; +} + +/* + * i7300_exit() Module exit function + * Unregister the driver + */ +static void __exit i7300_exit(void) +{ + debugf2("MC: " __FILE__ ": %s()\n", __func__); + pci_unregister_driver(&i7300_driver); +} + +module_init(i7300_init); +module_exit(i7300_exit); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Mauro Carvalho Chehab "); +MODULE_AUTHOR("Red Hat Inc. (http://www.redhat.com)"); +MODULE_DESCRIPTION("MC Driver for Intel I7300 memory controllers - " + I7300_REVISION); + +module_param(edac_op_state, int, 0444); +MODULE_PARM_DESC(edac_op_state, "EDAC Error Reporting state: 0=Poll,1=NMI"); diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h index f6a3b2d36cad..2eabe311f0d3 100644 --- a/include/linux/pci_ids.h +++ b/include/linux/pci_ids.h @@ -815,7 +815,7 @@ #define PCI_VENDOR_ID_ANIGMA 0x1051 #define PCI_DEVICE_ID_ANIGMA_MC145575 0x0100 - + #define PCI_VENDOR_ID_EFAR 0x1055 #define PCI_DEVICE_ID_EFAR_SLC90E66_1 0x9130 #define PCI_DEVICE_ID_EFAR_SLC90E66_3 0x9463 @@ -1446,7 +1446,7 @@ #define PCI_VENDOR_ID_ZIATECH 0x1138 #define PCI_DEVICE_ID_ZIATECH_5550_HC 0x5550 - + #define PCI_VENDOR_ID_SYSKONNECT 0x1148 #define PCI_DEVICE_ID_SYSKONNECT_TR 0x4200 @@ -1600,8 +1600,8 @@ #define PCI_DEVICE_ID_RP8OCTA 0x0005 #define PCI_DEVICE_ID_RP8J 0x0006 #define PCI_DEVICE_ID_RP4J 0x0007 -#define PCI_DEVICE_ID_RP8SNI 0x0008 -#define PCI_DEVICE_ID_RP16SNI 0x0009 +#define PCI_DEVICE_ID_RP8SNI 0x0008 +#define PCI_DEVICE_ID_RP16SNI 0x0009 #define PCI_DEVICE_ID_RPP4 0x000A #define PCI_DEVICE_ID_RPP8 0x000B #define PCI_DEVICE_ID_RP4M 0x000D @@ -1611,9 +1611,9 @@ #define PCI_DEVICE_ID_URP8INTF 0x0802 #define PCI_DEVICE_ID_URP16INTF 0x0803 #define PCI_DEVICE_ID_URP8OCTA 0x0805 -#define PCI_DEVICE_ID_UPCI_RM3_8PORT 0x080C +#define PCI_DEVICE_ID_UPCI_RM3_8PORT 0x080C #define PCI_DEVICE_ID_UPCI_RM3_4PORT 0x080D -#define PCI_DEVICE_ID_CRP16INTF 0x0903 +#define PCI_DEVICE_ID_CRP16INTF 0x0903 #define PCI_VENDOR_ID_CYCLADES 0x120e #define PCI_DEVICE_ID_CYCLOM_Y_Lo 0x0100 @@ -2139,7 +2139,7 @@ #define PCI_DEVICE_ID_RASTEL_2PORT 0x2000 #define PCI_VENDOR_ID_ZOLTRIX 0x15b0 -#define PCI_DEVICE_ID_ZOLTRIX_2BD0 0x2bd0 +#define PCI_DEVICE_ID_ZOLTRIX_2BD0 0x2bd0 #define PCI_VENDOR_ID_MELLANOX 0x15b3 #define PCI_DEVICE_ID_MELLANOX_TAVOR 0x5a44 @@ -2413,7 +2413,7 @@ #define PCI_DEVICE_ID_INTEL_82815_MC 0x1130 #define PCI_DEVICE_ID_INTEL_82815_CGC 0x1132 #define PCI_DEVICE_ID_INTEL_82092AA_0 0x1221 -#define PCI_DEVICE_ID_INTEL_7505_0 0x2550 +#define PCI_DEVICE_ID_INTEL_7505_0 0x2550 #define PCI_DEVICE_ID_INTEL_7205_0 0x255d #define PCI_DEVICE_ID_INTEL_82437 0x122d #define PCI_DEVICE_ID_INTEL_82371FB_0 0x122e @@ -2616,6 +2616,9 @@ #define PCI_DEVICE_ID_INTEL_MCH_PC 0x3599 #define PCI_DEVICE_ID_INTEL_MCH_PC1 0x359a #define PCI_DEVICE_ID_INTEL_E7525_MCH 0x359e +#define PCI_DEVICE_ID_INTEL_I7300_MCH_ERR 0x360c +#define PCI_DEVICE_ID_INTEL_I7300_MCH_FB0 0x360f +#define PCI_DEVICE_ID_INTEL_I7300_MCH_FB1 0x3610 #define PCI_DEVICE_ID_INTEL_IOAT_CNB 0x360b #define PCI_DEVICE_ID_INTEL_FBD_CNB 0x360c #define PCI_DEVICE_ID_INTEL_IOAT_JSF0 0x3710 -- cgit v1.2.3 From af3d8831e7e2036cd453c852d206b892b19c8820 Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Thu, 26 Aug 2010 20:58:45 -0300 Subject: i7300_edac: display info if ECC is enabled or not Signed-off-by: Mauro Carvalho Chehab --- drivers/edac/i7300_edac.c | 28 ++++++++++++++++++++-------- 1 file changed, 20 insertions(+), 8 deletions(-) diff --git a/drivers/edac/i7300_edac.c b/drivers/edac/i7300_edac.c index eb3f30e96ee3..db194b6b3e42 100644 --- a/drivers/edac/i7300_edac.c +++ b/drivers/edac/i7300_edac.c @@ -80,17 +80,19 @@ */ /* OFFSETS for Function 0 */ -#define AMBASE 0x48 /* AMB Mem Mapped Reg Region Base */ -#define MAXCH 0x56 /* Max Channel Number */ -#define MAXDIMMPERCH 0x57 /* Max DIMM PER Channel Number */ +#define AMBASE 0x48 /* AMB Mem Mapped Reg Region Base */ +#define MAXCH 0x56 /* Max Channel Number */ +#define MAXDIMMPERCH 0x57 /* Max DIMM PER Channel Number */ /* OFFSETS for Function 1 */ -#define TOLM 0x6C -#define REDMEMB 0x7C +#define MC_SETTINGS 0x40 -#define MIR0 0x80 -#define MIR1 0x84 -#define MIR2 0x88 +#define TOLM 0x6C +#define REDMEMB 0x7C + +#define MIR0 0x80 +#define MIR1 0x84 +#define MIR2 0x88 #if 0 #define AMIR0 0x8c @@ -393,6 +395,7 @@ struct i7300_pvt { u16 tolm; /* top of low memory */ u64 ambase; /* AMB BAR */ + u32 mc_settings; u16 mir[MAX_MIR]; @@ -1020,6 +1023,15 @@ static int i7300_get_mc_regs(struct mem_ctl_info *mci) debugf2("Actual TOLM byte addr=%u.%03u GB (0x%x)\n", actual_tolm/1000, actual_tolm % 1000, pvt->tolm << 28); + /* Get memory controller settings */ + pci_read_config_dword(pvt->branchmap_werrors, MC_SETTINGS, + &pvt->mc_settings); + debugf0("Memory controller operating on %s mode\n", + pvt->mc_settings & (1 << 16)? "mirrored" : "non-mirrored"); + debugf0("Error detection is %s\n", + pvt->mc_settings & (1 << 5)? "enabled" : "disabled"); + + /* Get Memory Interleave Range registers */ pci_read_config_word(pvt->branchmap_werrors, MIR0, &pvt->mir[0]); pci_read_config_word(pvt->branchmap_werrors, MIR1, &pvt->mir[1]); pci_read_config_word(pvt->branchmap_werrors, MIR2, &pvt->mir[2]); -- cgit v1.2.3 From c3af2eaf7a3257f7b44165ec487215574c47fd32 Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Thu, 26 Aug 2010 19:54:51 -0300 Subject: i7300_edac: add global error registers Signed-off-by: Mauro Carvalho Chehab --- drivers/edac/i7300_edac.c | 174 ++++++++++++++++++++++++---------------------- 1 file changed, 92 insertions(+), 82 deletions(-) diff --git a/drivers/edac/i7300_edac.c b/drivers/edac/i7300_edac.c index db194b6b3e42..7e035b6d0d0f 100644 --- a/drivers/edac/i7300_edac.c +++ b/drivers/edac/i7300_edac.c @@ -50,7 +50,7 @@ * Except on Single Channel mode of operation * just slot 0/channel0 filled on this mode * On normal operation mode, the two channels on a branch should be - filled together for the same SLOT# + * filled together for the same SLOT# * When in mirrored mode, Branch 1 replicate memory at Branch 0, so, the four * channels on both branches should be filled */ @@ -67,16 +67,22 @@ #define to_csrow(slot, ch, branch) \ (to_channel(ch, branch) | ((slot) << 2)) - -/* Device 16, - * Function 0: System Address (not documented) - * Function 1: Memory Branch Map, Control, Errors Register - * Function 2: FSB Error Registers - * +/* + * I7300 devices * All 3 functions of Device 16 (0,1,2) share the SAME DID and * uses PCI_DEVICE_ID_INTEL_I7300_MCH_ERR for device 16 (0,1,2), * PCI_DEVICE_ID_INTEL_I7300_MCH_FB0 and PCI_DEVICE_ID_INTEL_I7300_MCH_FB1 * for device 21 (0,1). + */ + +/**************************************************** + * i7300 Register definitions for memory enumberation + ****************************************************/ + +/* + * Device 16, + * Function 0: System Address (not documented) + * Function 1: Memory Branch Map, Control, Errors Register */ /* OFFSETS for Function 0 */ @@ -94,50 +100,6 @@ #define MIR1 0x84 #define MIR2 0x88 -#if 0 -#define AMIR0 0x8c -#define AMIR1 0x90 -#define AMIR2 0x94 - -/*TODO: double check it */ -#define REC_ECC_LOCATOR_ODD(x) ((x) & 0x3fe00) /* bits [17:9] indicate ODD, [8:0] indicate EVEN */ - - /* Fatal error registers */ -#define FERR_FAT_FBD 0x98 - -/*TODO: double check it */ -#define FERR_FAT_FBDCHAN (3<<28) /* channel index where the highest-order error occurred */ - -#define NERR_FAT_FBD 0x9c -#define FERR_NF_FBD 0xa0 - - /* Non-fatal error register */ -#define NERR_NF_FBD 0xa4 - - /* Enable error mask */ -#define EMASK_FBD 0xa8 - -#define ERR0_FBD 0xac -#define ERR1_FBD 0xb0 -#define ERR2_FBD 0xb4 -#define MCERR_FBD 0xb8 - -#endif - -/* TODO: Dev 16 fn1 allows memory error injection - offsets 0x100-0x10b */ - - /* TODO: OFFSETS for Device 16 Function 2 */ - -/* - * Device 21, - * Function 0: Memory Map Branch 0 - * - * Device 22, - * Function 0: Memory Map Branch 1 - */ - - /* OFFSETS for Function 0 */ - /* * Note: Other Intel EDAC drivers use AMBPRESENT to identify if the available * memory. From datasheet item 7.3.1 (FB-DIMM technology & organization), it @@ -171,37 +133,6 @@ const static u16 mtr_regs [MAX_SLOTS] = { #define MTR_DIMM_COLS(mtr) ((mtr) & 0x3) #define MTR_DIMM_COLS_ADDR_BITS(mtr) (MTR_DIMM_COLS(mtr) + 10) -#if 0 - /* OFFSETS for Function 1 */ - -/* TODO */ -#define NRECFGLOG 0x74 -#define RECFGLOG 0x78 -#define NRECMEMA 0xbe -#define NRECMEMB 0xc0 -#define NRECFB_DIMMA 0xc4 -#define NRECFB_DIMMB 0xc8 -#define NRECFB_DIMMC 0xcc -#define NRECFB_DIMMD 0xd0 -#define NRECFB_DIMME 0xd4 -#define NRECFB_DIMMF 0xd8 -#define REDMEMA 0xdC -#define RECMEMA 0xf0 -#define RECMEMB 0xf4 -#define RECFB_DIMMA 0xf8 -#define RECFB_DIMMB 0xec -#define RECFB_DIMMC 0xf0 -#define RECFB_DIMMD 0xf4 -#define RECFB_DIMME 0xf8 -#define RECFB_DIMMF 0xfC - -/* This applies to FERR_NF_FB-DIMM as well as FERR_FAT_FB-DIMM */ -static inline int extract_fbdchan_indx(u32 x) -{ - return (x>>28) & 0x3; -} -#endif - #ifdef CONFIG_EDAC_DEBUG /* MTR NUMROW */ static const char *numrow_toString[] = { @@ -220,6 +151,85 @@ static const char *numcol_toString[] = { }; #endif +/************************************************ + * i7300 Register definitions for error detection + ************************************************/ +/* + * Device 16.2: Global Error Registers + */ + +#define FERR_GLOBAL_LO 0x40 +static const char *ferr_global_name[] = { + [31] = "Internal MCH Fatal Error", + [30] = "Intel QuickData Technology Device Fatal Error", + [29] = "FSB1 Fatal Error", + [28] = "FSB0 Fatal Error", + [27] = "FBD Channel 3 Fatal Error", + [26] = "FBD Channel 2 Fatal Error", + [25] = "FBD Channel 1 Fatal Error", + [24] = "FBD Channel 0 Fatal Error", + [23] = "PCI Express Device 7Fatal Error", + [22] = "PCI Express Device 6 Fatal Error", + [21] = "PCI Express Device 5 Fatal Error", + [20] = "PCI Express Device 4 Fatal Error", + [19] = "PCI Express Device 3 Fatal Error", + [18] = "PCI Express Device 2 Fatal Error", + [17] = "PCI Express Device 1 Fatal Error", + [16] = "ESI Fatal Error", + [15] = "Internal MCH Non-Fatal Error", + [14] = "Intel QuickData Technology Device Non Fatal Error", + [13] = "FSB1 Non-Fatal Error", + [12] = "FSB 0 Non-Fatal Error", + [11] = "FBD Channel 3 Non-Fatal Error", + [10] = "FBD Channel 2 Non-Fatal Error", + [9] = "FBD Channel 1 Non-Fatal Error", + [8] = "FBD Channel 0 Non-Fatal Error", + [7] = "PCI Express Device 7 Non-Fatal Error", + [6] = "PCI Express Device 6 Non-Fatal Error", + [5] = "PCI Express Device 5 Non-Fatal Error", + [4] = "PCI Express Device 4 Non-Fatal Error", + [3] = "PCI Express Device 3 Non-Fatal Error", + [2] = "PCI Express Device 2 Non-Fatal Error", + [1] = "PCI Express Device 1 Non-Fatal Error", + [0] = "ESI Non-Fatal Error", +}; + +#define NERR_GLOBAL 0x44 +static const char *nerr_global_name[] = { + [31] = "Internal MCH Fatal Error", + [30] = "Intel QuickData Technology Device Fatal Error", + [29] = "FSB1 Fatal Error", + [28] = "FSB0 Fatal Error", + [27] = "FSB2 Fatal Error", + [26] = "FSB3 Fatal Error", + [25] = "Reserved", + [24] = "FBD Channel 0,1,2 or 3 Fatal Error", + [23] = "PCI Express Device 7 Fatal Error", + [22] = "PCI Express Device 6 Fatal Error", + [21] = "PCI Express Device 5 Fatal Error", + [20] = "PCI Express Device 4 Fatal Error", + [19] = "PCI Express Device 3 Fatal Error", + [18] = "PCI Express Device 2 Fatal Error", + [17] = "PCI Express Device 1 Fatal Error", + [16] = "ESI Fatal Error", + [15] = "Internal MCH Non-Fatal Error", + [14] = "Intel QuickData Technology Device Non Fatal Error", + [13] = "FSB1 Non-Fatal Error", + [12] = "FSB0 Non-Fatal Error", + [11] = "FSB2 Non-Fatal Error", + [10] = "FSB3 Non-Fatal Error", + [9] = "Reserved", + [8] = "FBD Channel 0,1, 2 or 3 Non-Fatal Error", + [7] = "PCI Express Device 7 Non-Fatal Error", + [6] = "PCI Express Device 6 Non-Fatal Error", + [5] = "PCI Express Device 5 Non-Fatal Error", + [4] = "PCI Express Device 4 Non-Fatal Error", + [3] = "PCI Express Device 3 Non-Fatal Error", + [2] = "PCI Express Device 2 Non-Fatal Error", + [1] = "PCI Express Device 1 Non-Fatal Error", + [0] = "ESI Non-Fatal Error", +}; + #if 0 /* -- cgit v1.2.3 From 116389ed21e4ad88f65e7ec5ed6ca224acb89115 Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Thu, 26 Aug 2010 23:19:54 -0300 Subject: i7300_edac: Add a FIXME note about the error correction type Signed-off-by: Mauro Carvalho Chehab --- drivers/edac/i7300_edac.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/drivers/edac/i7300_edac.c b/drivers/edac/i7300_edac.c index 7e035b6d0d0f..36265e21fef2 100644 --- a/drivers/edac/i7300_edac.c +++ b/drivers/edac/i7300_edac.c @@ -841,6 +841,15 @@ static int decode_mtr(struct i7300_pvt *pvt, p_csrow->grain = 8; p_csrow->nr_pages = dinfo->megabytes << 8; p_csrow->mtype = MEM_FB_DDR2; + + /* + * FIXME: the type of error detection actually depends of the + * mode of operation. When it is just one single memory chip, at + * socket 0, channel 0, it uses 8-byte-over-32-byte SECDED+ code. + * In normal or mirrored mode, it uses Single Device Data correction, + * with the possibility of using an extended algorithm for x8 memories + * See datasheet Sections 7.3.6 to 7.3.8 + */ p_csrow->edac_mode = EDAC_S8ECD8ED; /* ask what device type on this row */ -- cgit v1.2.3 From 3e57eef64c53d4a45790fb7bb60a4ee6bf2bad30 Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Thu, 26 Aug 2010 23:38:11 -0300 Subject: i7300_edac: Better name PCI devices Signed-off-by: Mauro Carvalho Chehab --- drivers/edac/i7300_edac.c | 86 +++++++++++++++++++++++------------------------ 1 file changed, 43 insertions(+), 43 deletions(-) diff --git a/drivers/edac/i7300_edac.c b/drivers/edac/i7300_edac.c index 36265e21fef2..3e60dbaa0e7e 100644 --- a/drivers/edac/i7300_edac.c +++ b/drivers/edac/i7300_edac.c @@ -398,10 +398,10 @@ struct i7300_dimm_info { /* driver private data structure */ struct i7300_pvt { - struct pci_dev *system_address; /* 16.0 */ - struct pci_dev *branchmap_werrors; /* 16.1 */ - struct pci_dev *fsb_error_regs; /* 16.2 */ - struct pci_dev *branch_pci[MAX_BRANCHES]; /* 21.0 and 22.0 */ + struct pci_dev *pci_dev_16_0_fsb_ctlr; /* 16.0 */ + struct pci_dev *pci_dev_16_1_fsb_addr_map; /* 16.1 */ + struct pci_dev *pci_dev_16_2_fsb_err_regs; /* 16.2 */ + struct pci_dev *pci_dev_2x_0_fbd_branch[MAX_BRANCHES]; /* 21.0 and 22.0 */ u16 tolm; /* top of low memory */ u64 ambase; /* AMB BAR */ @@ -509,7 +509,7 @@ static void i7300_get_error_info(struct mem_ctl_info *mci, pvt = mci->pvt_info; /* read in the 1st FATAL error register */ - pci_read_config_dword(pvt->branchmap_werrors, FERR_FAT_FBD, &value); + pci_read_config_dword(pvt->pci_dev_16_1_fsb_addr_map, FERR_FAT_FBD, &value); /* Mask only the bits that the doc says are valid */ @@ -522,15 +522,15 @@ static void i7300_get_error_info(struct mem_ctl_info *mci, info->ferr_fat_fbd = value; /* harvest the various error data we need */ - pci_read_config_dword(pvt->branchmap_werrors, + pci_read_config_dword(pvt->pci_dev_16_1_fsb_addr_map, NERR_FAT_FBD, &info->nerr_fat_fbd); - pci_read_config_word(pvt->branchmap_werrors, + pci_read_config_word(pvt->pci_dev_16_1_fsb_addr_map, NRECMEMA, &info->nrecmema); - pci_read_config_word(pvt->branchmap_werrors, + pci_read_config_word(pvt->pci_dev_16_1_fsb_addr_map, NRECMEMB, &info->nrecmemb); /* Clear the error bits, by writing them back */ - pci_write_config_dword(pvt->branchmap_werrors, + pci_write_config_dword(pvt->pci_dev_16_1_fsb_addr_map, FERR_FAT_FBD, value); } else { info->ferr_fat_fbd = 0; @@ -540,7 +540,7 @@ static void i7300_get_error_info(struct mem_ctl_info *mci, } /* read in the 1st NON-FATAL error register */ - pci_read_config_dword(pvt->branchmap_werrors, FERR_NF_FBD, &value); + pci_read_config_dword(pvt->pci_dev_16_1_fsb_addr_map, FERR_NF_FBD, &value); /* If there is an error, then read in the 1st NON-FATAL error * register as well */ @@ -548,17 +548,17 @@ static void i7300_get_error_info(struct mem_ctl_info *mci, info->ferr_nf_fbd = value; /* harvest the various error data we need */ - pci_read_config_dword(pvt->branchmap_werrors, + pci_read_config_dword(pvt->pci_dev_16_1_fsb_addr_map, NERR_NF_FBD, &info->nerr_nf_fbd); - pci_read_config_word(pvt->branchmap_werrors, + pci_read_config_word(pvt->pci_dev_16_1_fsb_addr_map, RECMEMA, &info->recmema); - pci_read_config_dword(pvt->branchmap_werrors, + pci_read_config_dword(pvt->pci_dev_16_1_fsb_addr_map, RECMEMB, &info->recmemb); - pci_read_config_dword(pvt->branchmap_werrors, + pci_read_config_dword(pvt->pci_dev_16_1_fsb_addr_map, REDMEMB, &info->redmemb); /* Clear the error bits, by writing them back */ - pci_write_config_dword(pvt->branchmap_werrors, + pci_write_config_dword(pvt->pci_dev_16_1_fsb_addr_map, FERR_NF_FBD, value); } else { info->ferr_nf_fbd = 0; @@ -769,13 +769,13 @@ static void i7300_enable_error_reporting(struct mem_ctl_info *mci) pvt = mci->pvt_info; /* Read the FBD Error Mask Register */ - pci_read_config_dword(pvt->branchmap_werrors, EMASK_FBD, + pci_read_config_dword(pvt->pci_dev_16_1_fsb_addr_map, EMASK_FBD, &fbd_error_mask); /* Enable with a '0' */ fbd_error_mask &= ~(ENABLE_EMASK_ALL); - pci_write_config_dword(pvt->branchmap_werrors, EMASK_FBD, + pci_write_config_dword(pvt->pci_dev_16_1_fsb_addr_map, EMASK_FBD, fbd_error_mask); } #endif @@ -957,13 +957,13 @@ static int i7300_init_csrows(struct mem_ctl_info *mci) for (branch = 0; branch < MAX_BRANCHES; branch++) { /* Read and dump branch 0's MTRs */ channel = to_channel(0, branch); - pci_read_config_word(pvt->branch_pci[branch], AMBPRESENT_0, + pci_read_config_word(pvt->pci_dev_2x_0_fbd_branch[branch], AMBPRESENT_0, &pvt->ambpresent[channel]); debugf2("\t\tAMB-present CH%d = 0x%x:\n", channel, pvt->ambpresent[channel]); channel = to_channel(1, branch); - pci_read_config_word(pvt->branch_pci[branch], AMBPRESENT_1, + pci_read_config_word(pvt->pci_dev_2x_0_fbd_branch[branch], AMBPRESENT_1, &pvt->ambpresent[channel]); debugf2("\t\tAMB-present CH%d = 0x%x:\n", channel, pvt->ambpresent[channel]); @@ -973,7 +973,7 @@ static int i7300_init_csrows(struct mem_ctl_info *mci) for (slot = 0; slot < MAX_SLOTS; slot++) { int where = mtr_regs[slot]; for (branch = 0; branch < MAX_BRANCHES; branch++) { - pci_read_config_word(pvt->branch_pci[branch], + pci_read_config_word(pvt->pci_dev_2x_0_fbd_branch[branch], where, &pvt->mtr[slot][branch]); for (ch = 0; ch < MAX_BRANCHES; ch++) { @@ -1027,13 +1027,13 @@ static int i7300_get_mc_regs(struct mem_ctl_info *mci) pvt = mci->pvt_info; - pci_read_config_dword(pvt->system_address, AMBASE, + pci_read_config_dword(pvt->pci_dev_16_0_fsb_ctlr, AMBASE, (u32 *) &pvt->ambase); debugf2("AMBASE= 0x%lx\n", (long unsigned int)pvt->ambase); /* Get the Branch Map regs */ - pci_read_config_word(pvt->branchmap_werrors, TOLM, &pvt->tolm); + pci_read_config_word(pvt->pci_dev_16_1_fsb_addr_map, TOLM, &pvt->tolm); pvt->tolm >>= 12; debugf2("TOLM (number of 256M regions) =%u (0x%x)\n", pvt->tolm, pvt->tolm); @@ -1043,7 +1043,7 @@ static int i7300_get_mc_regs(struct mem_ctl_info *mci) actual_tolm/1000, actual_tolm % 1000, pvt->tolm << 28); /* Get memory controller settings */ - pci_read_config_dword(pvt->branchmap_werrors, MC_SETTINGS, + pci_read_config_dword(pvt->pci_dev_16_1_fsb_addr_map, MC_SETTINGS, &pvt->mc_settings); debugf0("Memory controller operating on %s mode\n", pvt->mc_settings & (1 << 16)? "mirrored" : "non-mirrored"); @@ -1051,9 +1051,9 @@ static int i7300_get_mc_regs(struct mem_ctl_info *mci) pvt->mc_settings & (1 << 5)? "enabled" : "disabled"); /* Get Memory Interleave Range registers */ - pci_read_config_word(pvt->branchmap_werrors, MIR0, &pvt->mir[0]); - pci_read_config_word(pvt->branchmap_werrors, MIR1, &pvt->mir[1]); - pci_read_config_word(pvt->branchmap_werrors, MIR2, &pvt->mir[2]); + pci_read_config_word(pvt->pci_dev_16_1_fsb_addr_map, MIR0, &pvt->mir[0]); + pci_read_config_word(pvt->pci_dev_16_1_fsb_addr_map, MIR1, &pvt->mir[1]); + pci_read_config_word(pvt->pci_dev_16_1_fsb_addr_map, MIR2, &pvt->mir[2]); /* Decode the MIR regs */ for (i = 0; i < MAX_MIR; i++) @@ -1083,9 +1083,9 @@ static void i7300_put_devices(struct mem_ctl_info *mci) /* Decrement usage count for devices */ for (branch = 0; branch < MAX_CH_PER_BRANCH; branch++) - pci_dev_put(pvt->branch_pci[branch]); - pci_dev_put(pvt->fsb_error_regs); - pci_dev_put(pvt->branchmap_werrors); + pci_dev_put(pvt->pci_dev_2x_0_fbd_branch[branch]); + pci_dev_put(pvt->pci_dev_16_2_fsb_err_regs); + pci_dev_put(pvt->pci_dev_16_1_fsb_addr_map); } /* @@ -1103,7 +1103,7 @@ static int i7300_get_devices(struct mem_ctl_info *mci, int dev_idx) /* Attempt to 'get' the MCH register we want */ pdev = NULL; - while (!pvt->branchmap_werrors || !pvt->fsb_error_regs) { + while (!pvt->pci_dev_16_1_fsb_addr_map || !pvt->pci_dev_16_2_fsb_err_regs) { pdev = pci_get_device(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_I7300_MCH_ERR, pdev); if (!pdev) { @@ -1121,28 +1121,28 @@ static int i7300_get_devices(struct mem_ctl_info *mci, int dev_idx) /* Store device 16 funcs 1 and 2 */ switch (PCI_FUNC(pdev->devfn)) { case 1: - pvt->branchmap_werrors = pdev; + pvt->pci_dev_16_1_fsb_addr_map = pdev; break; case 2: - pvt->fsb_error_regs = pdev; + pvt->pci_dev_16_2_fsb_err_regs = pdev; break; } } debugf1("System Address, processor bus- PCI Bus ID: %s %x:%x\n", - pci_name(pvt->system_address), - pvt->system_address->vendor, pvt->system_address->device); + pci_name(pvt->pci_dev_16_0_fsb_ctlr), + pvt->pci_dev_16_0_fsb_ctlr->vendor, pvt->pci_dev_16_0_fsb_ctlr->device); debugf1("Branchmap, control and errors - PCI Bus ID: %s %x:%x\n", - pci_name(pvt->branchmap_werrors), - pvt->branchmap_werrors->vendor, pvt->branchmap_werrors->device); + pci_name(pvt->pci_dev_16_1_fsb_addr_map), + pvt->pci_dev_16_1_fsb_addr_map->vendor, pvt->pci_dev_16_1_fsb_addr_map->device); debugf1("FSB Error Regs - PCI Bus ID: %s %x:%x\n", - pci_name(pvt->fsb_error_regs), - pvt->fsb_error_regs->vendor, pvt->fsb_error_regs->device); + pci_name(pvt->pci_dev_16_2_fsb_err_regs), + pvt->pci_dev_16_2_fsb_err_regs->vendor, pvt->pci_dev_16_2_fsb_err_regs->device); - pvt->branch_pci[0] = pci_get_device(PCI_VENDOR_ID_INTEL, + pvt->pci_dev_2x_0_fbd_branch[0] = pci_get_device(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_I7300_MCH_FB0, NULL); - if (!pvt->branch_pci[0]) { + if (!pvt->pci_dev_2x_0_fbd_branch[0]) { i7300_printk(KERN_ERR, "MC: 'BRANCH 0' device not found:" "vendor 0x%x device 0x%x Func 0 (broken BIOS?)\n", @@ -1150,10 +1150,10 @@ static int i7300_get_devices(struct mem_ctl_info *mci, int dev_idx) goto error; } - pvt->branch_pci[1] = pci_get_device(PCI_VENDOR_ID_INTEL, + pvt->pci_dev_2x_0_fbd_branch[1] = pci_get_device(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_I7300_MCH_FB1, NULL); - if (!pvt->branch_pci[1]) { + if (!pvt->pci_dev_2x_0_fbd_branch[1]) { i7300_printk(KERN_ERR, "MC: 'BRANCH 1' device not found:" "vendor 0x%x device 0x%x Func 0 " @@ -1222,7 +1222,7 @@ static int i7300_probe1(struct pci_dev *pdev, int dev_idx) mci->dev = &pdev->dev; /* record ptr to the generic device */ pvt = mci->pvt_info; - pvt->system_address = pdev; /* Record this device in our private */ + pvt->pci_dev_16_0_fsb_ctlr = pdev; /* Record this device in our private */ /* 'get' the pci devices we want to reserve for our use */ if (i7300_get_devices(mci, dev_idx)) -- cgit v1.2.3 From 5de6e07ed75ee29a302f50e149339ca747131121 Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Fri, 27 Aug 2010 00:16:12 -0300 Subject: i7300_edac: Add error detection code for global errors There's no mention at the datasheet about how to enable global error reporting. So, I'm assuming that those errors are always enabled. Maybe I'm plain wrong about that ;) Signed-off-by: Mauro Carvalho Chehab --- drivers/edac/i7300_edac.c | 568 +++++++--------------------------------------- 1 file changed, 77 insertions(+), 491 deletions(-) diff --git a/drivers/edac/i7300_edac.c b/drivers/edac/i7300_edac.c index 3e60dbaa0e7e..e617b4f79dd8 100644 --- a/drivers/edac/i7300_edac.c +++ b/drivers/edac/i7300_edac.c @@ -158,8 +158,17 @@ static const char *numcol_toString[] = { * Device 16.2: Global Error Registers */ +#define FERR_GLOBAL_HI 0x48 +static const char *ferr_global_hi_name[] = { + [3] = "FSB 3 Fatal Error", + [2] = "FSB 2 Fatal Error", + [1] = "FSB 1 Fatal Error", + [0] = "FSB 0 Fatal Error", +}; +#define ferr_global_hi_is_fatal(errno) 1 + #define FERR_GLOBAL_LO 0x40 -static const char *ferr_global_name[] = { +static const char *ferr_global_lo_name[] = { [31] = "Internal MCH Fatal Error", [30] = "Intel QuickData Technology Device Fatal Error", [29] = "FSB1 Fatal Error", @@ -193,190 +202,7 @@ static const char *ferr_global_name[] = { [1] = "PCI Express Device 1 Non-Fatal Error", [0] = "ESI Non-Fatal Error", }; - -#define NERR_GLOBAL 0x44 -static const char *nerr_global_name[] = { - [31] = "Internal MCH Fatal Error", - [30] = "Intel QuickData Technology Device Fatal Error", - [29] = "FSB1 Fatal Error", - [28] = "FSB0 Fatal Error", - [27] = "FSB2 Fatal Error", - [26] = "FSB3 Fatal Error", - [25] = "Reserved", - [24] = "FBD Channel 0,1,2 or 3 Fatal Error", - [23] = "PCI Express Device 7 Fatal Error", - [22] = "PCI Express Device 6 Fatal Error", - [21] = "PCI Express Device 5 Fatal Error", - [20] = "PCI Express Device 4 Fatal Error", - [19] = "PCI Express Device 3 Fatal Error", - [18] = "PCI Express Device 2 Fatal Error", - [17] = "PCI Express Device 1 Fatal Error", - [16] = "ESI Fatal Error", - [15] = "Internal MCH Non-Fatal Error", - [14] = "Intel QuickData Technology Device Non Fatal Error", - [13] = "FSB1 Non-Fatal Error", - [12] = "FSB0 Non-Fatal Error", - [11] = "FSB2 Non-Fatal Error", - [10] = "FSB3 Non-Fatal Error", - [9] = "Reserved", - [8] = "FBD Channel 0,1, 2 or 3 Non-Fatal Error", - [7] = "PCI Express Device 7 Non-Fatal Error", - [6] = "PCI Express Device 6 Non-Fatal Error", - [5] = "PCI Express Device 5 Non-Fatal Error", - [4] = "PCI Express Device 4 Non-Fatal Error", - [3] = "PCI Express Device 3 Non-Fatal Error", - [2] = "PCI Express Device 2 Non-Fatal Error", - [1] = "PCI Express Device 1 Non-Fatal Error", - [0] = "ESI Non-Fatal Error", -}; - -#if 0 - -/* - * Error indicator bits and masks - * Error masks are according with Table 5-17 of i7300 datasheet - */ - -enum error_mask { - EMASK_M1 = 1<<0, /* Memory Write error on non-redundant retry */ - EMASK_M2 = 1<<1, /* Memory or FB-DIMM configuration CRC read error */ - EMASK_M3 = 1<<2, /* Reserved */ - EMASK_M4 = 1<<3, /* Uncorrectable Data ECC on Replay */ - EMASK_M5 = 1<<4, /* Aliased Uncorrectable Non-Mirrored Demand Data ECC */ - EMASK_M6 = 1<<5, /* Unsupported on i7300 */ - EMASK_M7 = 1<<6, /* Aliased Uncorrectable Resilver- or Spare-Copy Data ECC */ - EMASK_M8 = 1<<7, /* Aliased Uncorrectable Patrol Data ECC */ - EMASK_M9 = 1<<8, /* Non-Aliased Uncorrectable Non-Mirrored Demand Data ECC */ - EMASK_M10 = 1<<9, /* Unsupported on i7300 */ - EMASK_M11 = 1<<10, /* Non-Aliased Uncorrectable Resilver- or Spare-Copy Data ECC */ - EMASK_M12 = 1<<11, /* Non-Aliased Uncorrectable Patrol Data ECC */ - EMASK_M13 = 1<<12, /* Memory Write error on first attempt */ - EMASK_M14 = 1<<13, /* FB-DIMM Configuration Write error on first attempt */ - EMASK_M15 = 1<<14, /* Memory or FB-DIMM configuration CRC read error */ - EMASK_M16 = 1<<15, /* Channel Failed-Over Occurred */ - EMASK_M17 = 1<<16, /* Correctable Non-Mirrored Demand Data ECC */ - EMASK_M18 = 1<<17, /* Unsupported on i7300 */ - EMASK_M19 = 1<<18, /* Correctable Resilver- or Spare-Copy Data ECC */ - EMASK_M20 = 1<<19, /* Correctable Patrol Data ECC */ - EMASK_M21 = 1<<20, /* FB-DIMM Northbound parity error on FB-DIMM Sync Status */ - EMASK_M22 = 1<<21, /* SPD protocol Error */ - EMASK_M23 = 1<<22, /* Non-Redundant Fast Reset Timeout */ - EMASK_M24 = 1<<23, /* Refresh error */ - EMASK_M25 = 1<<24, /* Memory Write error on redundant retry */ - EMASK_M26 = 1<<25, /* Redundant Fast Reset Timeout */ - EMASK_M27 = 1<<26, /* Correctable Counter Threshold Exceeded */ - EMASK_M28 = 1<<27, /* DIMM-Spare Copy Completed */ - EMASK_M29 = 1<<28, /* DIMM-Isolation Completed */ -}; - -/* - * Names to translate bit error into something useful - */ -static const char *error_name[] = { - [0] = "Memory Write error on non-redundant retry", - [1] = "Memory or FB-DIMM configuration CRC read error", - /* Reserved */ - [3] = "Uncorrectable Data ECC on Replay", - [4] = "Aliased Uncorrectable Non-Mirrored Demand Data ECC", - /* M6 Unsupported on i7300 */ - [6] = "Aliased Uncorrectable Resilver- or Spare-Copy Data ECC", - [7] = "Aliased Uncorrectable Patrol Data ECC", - [8] = "Non-Aliased Uncorrectable Non-Mirrored Demand Data ECC", - /* M10 Unsupported on i7300 */ - [10] = "Non-Aliased Uncorrectable Resilver- or Spare-Copy Data ECC", - [11] = "Non-Aliased Uncorrectable Patrol Data ECC", - [12] = "Memory Write error on first attempt", - [13] = "FB-DIMM Configuration Write error on first attempt", - [14] = "Memory or FB-DIMM configuration CRC read error", - [15] = "Channel Failed-Over Occurred", - [16] = "Correctable Non-Mirrored Demand Data ECC", - /* M18 Unsupported on i7300 */ - [18] = "Correctable Resilver- or Spare-Copy Data ECC", - [19] = "Correctable Patrol Data ECC", - [20] = "FB-DIMM Northbound parity error on FB-DIMM Sync Status", - [21] = "SPD protocol Error", - [22] = "Non-Redundant Fast Reset Timeout", - [23] = "Refresh error", - [24] = "Memory Write error on redundant retry", - [25] = "Redundant Fast Reset Timeout", - [26] = "Correctable Counter Threshold Exceeded", - [27] = "DIMM-Spare Copy Completed", - [28] = "DIMM-Isolation Completed", -}; - -/* Fatal errors */ -#define ERROR_FAT_MASK (EMASK_M1 | \ - EMASK_M2 | \ - EMASK_M23) - -/* Correctable errors */ -#define ERROR_NF_CORRECTABLE (EMASK_M27 | \ - EMASK_M20 | \ - EMASK_M19 | \ - EMASK_M18 | \ - EMASK_M17 | \ - EMASK_M16) -#define ERROR_NF_DIMM_SPARE (EMASK_M29 | \ - EMASK_M28) -#define ERROR_NF_SPD_PROTOCOL (EMASK_M22) -#define ERROR_NF_NORTH_CRC (EMASK_M21) - -/* Recoverable errors */ -#define ERROR_NF_RECOVERABLE (EMASK_M26 | \ - EMASK_M25 | \ - EMASK_M24 | \ - EMASK_M15 | \ - EMASK_M14 | \ - EMASK_M13 | \ - EMASK_M12 | \ - EMASK_M11 | \ - EMASK_M9 | \ - EMASK_M8 | \ - EMASK_M7 | \ - EMASK_M5) - -/* uncorrectable errors */ -#define ERROR_NF_UNCORRECTABLE (EMASK_M4) - -/* mask to all non-fatal errors */ -#define ERROR_NF_MASK (ERROR_NF_CORRECTABLE | \ - ERROR_NF_UNCORRECTABLE | \ - ERROR_NF_RECOVERABLE | \ - ERROR_NF_DIMM_SPARE | \ - ERROR_NF_SPD_PROTOCOL | \ - ERROR_NF_NORTH_CRC) - -/* - * Define error masks for the several registers - */ - -/* Enable all fatal and non fatal errors */ -#define ENABLE_EMASK_ALL (ERROR_FAT_MASK | ERROR_NF_MASK) - -/* mask for fatal error registers */ -#define FERR_FAT_MASK ERROR_FAT_MASK - -/* masks for non-fatal error register */ -static inline int to_nf_mask(unsigned int mask) -{ - return (mask & EMASK_M29) | (mask >> 3); -}; - -static inline int from_nf_ferr(unsigned int mask) -{ - return (mask & EMASK_M29) | /* Bit 28 */ - (mask & ((1 << 28) - 1) << 3); /* Bits 0 to 27 */ -}; - -#define FERR_NF_MASK to_nf_mask(ERROR_NF_MASK) -#define FERR_NF_CORRECTABLE to_nf_mask(ERROR_NF_CORRECTABLE) -#define FERR_NF_DIMM_SPARE to_nf_mask(ERROR_NF_DIMM_SPARE) -#define FERR_NF_SPD_PROTOCOL to_nf_mask(ERROR_NF_SPD_PROTOCOL) -#define FERR_NF_NORTH_CRC to_nf_mask(ERROR_NF_NORTH_CRC) -#define FERR_NF_RECOVERABLE to_nf_mask(ERROR_NF_RECOVERABLE) -#define FERR_NF_UNCORRECTABLE to_nf_mask(ERROR_NF_UNCORRECTABLE) - -#endif +#define ferr_global_lo_is_fatal(errno) ((errno < 16) ? 0 : 1) /* Device name and register DID (Device ID) */ struct i7300_dev_info { @@ -416,85 +242,28 @@ struct i7300_pvt { struct i7300_dimm_info dimm_info[MAX_SLOTS][MAX_CHANNELS]; }; -#if 0 -/* I7300 MCH error information retrieved from Hardware */ -struct i7300_error_info { - /* These registers are always read from the MC */ - u32 ferr_fat_fbd; /* First Errors Fatal */ - u32 nerr_fat_fbd; /* Next Errors Fatal */ - u32 ferr_nf_fbd; /* First Errors Non-Fatal */ - u32 nerr_nf_fbd; /* Next Errors Non-Fatal */ - - /* These registers are input ONLY if there was a Recoverable Error */ - u32 redmemb; /* Recoverable Mem Data Error log B */ - u16 recmema; /* Recoverable Mem Error log A */ - u32 recmemb; /* Recoverable Mem Error log B */ - - /* These registers are input ONLY if there was a Non-Rec Error */ - u16 nrecmema; /* Non-Recoverable Mem log A */ - u16 nrecmemb; /* Non-Recoverable Mem log B */ - -}; -#endif - /* FIXME: Why do we need to have this static? */ static struct edac_pci_ctl_info *i7300_pci; +/******************************************** + * i7300 Functions related to error detection + ********************************************/ -#if 0 -/* note that nrec_rdwr changed from NRECMEMA to NRECMEMB between the 5000 and - 5400 better to use an inline function than a macro in this case */ -static inline int nrec_bank(struct i7300_error_info *info) -{ - return ((info->nrecmema) >> 12) & 0x7; -} -static inline int nrec_rank(struct i7300_error_info *info) -{ - return ((info->nrecmema) >> 8) & 0xf; -} -static inline int nrec_buf_id(struct i7300_error_info *info) -{ - return ((info->nrecmema)) & 0xff; -} -static inline int nrec_rdwr(struct i7300_error_info *info) -{ - return (info->nrecmemb) >> 31; -} -/* This applies to both NREC and REC string so it can be used with nrec_rdwr - and rec_rdwr */ -static inline const char *rdwr_str(int rdwr) -{ - return rdwr ? "Write" : "Read"; -} -static inline int nrec_cas(struct i7300_error_info *info) -{ - return ((info->nrecmemb) >> 16) & 0x1fff; -} -static inline int nrec_ras(struct i7300_error_info *info) -{ - return (info->nrecmemb) & 0xffff; -} -static inline int rec_bank(struct i7300_error_info *info) -{ - return ((info->recmema) >> 12) & 0x7; -} -static inline int rec_rank(struct i7300_error_info *info) -{ - return ((info->recmema) >> 8) & 0xf; -} -static inline int rec_rdwr(struct i7300_error_info *info) -{ - return (info->recmemb) >> 31; -} -static inline int rec_cas(struct i7300_error_info *info) -{ - return ((info->recmemb) >> 16) & 0x1fff; -} -static inline int rec_ras(struct i7300_error_info *info) +struct i7300_error_info { + int dummy; /* FIXME */ +}; + +const char *get_err_from_table(const char *table[], int size, int pos) { - return (info->recmemb) & 0xffff; + if (pos >= size) + return "Reserved"; + + return table[pos]; } +#define GET_ERR_FROM_TABLE(table, pos) \ + get_err_from_table(table, ARRAY_SIZE(table), pos) + /* * i7300_get_error_info Retrieve the hardware error information from * the hardware and cache it in the 'info' @@ -503,234 +272,63 @@ static inline int rec_ras(struct i7300_error_info *info) static void i7300_get_error_info(struct mem_ctl_info *mci, struct i7300_error_info *info) { - struct i7300_pvt *pvt; - u32 value; - - pvt = mci->pvt_info; - - /* read in the 1st FATAL error register */ - pci_read_config_dword(pvt->pci_dev_16_1_fsb_addr_map, FERR_FAT_FBD, &value); - - /* Mask only the bits that the doc says are valid - */ - value &= (FERR_FAT_FBDCHAN | FERR_FAT_MASK); - - /* If there is an error, then read in the - NEXT FATAL error register and the Memory Error Log Register A - */ - if (value & FERR_FAT_MASK) { - info->ferr_fat_fbd = value; - - /* harvest the various error data we need */ - pci_read_config_dword(pvt->pci_dev_16_1_fsb_addr_map, - NERR_FAT_FBD, &info->nerr_fat_fbd); - pci_read_config_word(pvt->pci_dev_16_1_fsb_addr_map, - NRECMEMA, &info->nrecmema); - pci_read_config_word(pvt->pci_dev_16_1_fsb_addr_map, - NRECMEMB, &info->nrecmemb); - - /* Clear the error bits, by writing them back */ - pci_write_config_dword(pvt->pci_dev_16_1_fsb_addr_map, - FERR_FAT_FBD, value); - } else { - info->ferr_fat_fbd = 0; - info->nerr_fat_fbd = 0; - info->nrecmema = 0; - info->nrecmemb = 0; - } - - /* read in the 1st NON-FATAL error register */ - pci_read_config_dword(pvt->pci_dev_16_1_fsb_addr_map, FERR_NF_FBD, &value); - - /* If there is an error, then read in the 1st NON-FATAL error - * register as well */ - if (value & FERR_NF_MASK) { - info->ferr_nf_fbd = value; - - /* harvest the various error data we need */ - pci_read_config_dword(pvt->pci_dev_16_1_fsb_addr_map, - NERR_NF_FBD, &info->nerr_nf_fbd); - pci_read_config_word(pvt->pci_dev_16_1_fsb_addr_map, - RECMEMA, &info->recmema); - pci_read_config_dword(pvt->pci_dev_16_1_fsb_addr_map, - RECMEMB, &info->recmemb); - pci_read_config_dword(pvt->pci_dev_16_1_fsb_addr_map, - REDMEMB, &info->redmemb); - - /* Clear the error bits, by writing them back */ - pci_write_config_dword(pvt->pci_dev_16_1_fsb_addr_map, - FERR_NF_FBD, value); - } else { - info->ferr_nf_fbd = 0; - info->nerr_nf_fbd = 0; - info->recmema = 0; - info->recmemb = 0; - info->redmemb = 0; - } } /* - * i7300_proccess_non_recoverable_info(struct mem_ctl_info *mci, - * struct i7300_error_info *info, - * int handle_errors); - * - * handle the Intel FATAL and unrecoverable errors, if any - */ -static void i7300_proccess_non_recoverable_info(struct mem_ctl_info *mci, - struct i7300_error_info *info, - unsigned long allErrors) -{ - char msg[EDAC_MC_LABEL_LEN + 1 + 90 + 80]; - int branch; - int channel; - int bank; - int buf_id; - int rank; - int rdwr; - int ras, cas; - int errnum; - char *type = NULL; - - if (!allErrors) - return; /* if no error, return now */ - - if (allErrors & ERROR_FAT_MASK) - type = "FATAL"; - else if (allErrors & FERR_NF_UNCORRECTABLE) - type = "NON-FATAL uncorrected"; - else - type = "NON-FATAL recoverable"; - - /* ONLY ONE of the possible error bits will be set, as per the docs */ - - branch = extract_fbdchan_indx(info->ferr_fat_fbd); - channel = branch; - - /* Use the NON-Recoverable macros to extract data */ - bank = nrec_bank(info); - rank = nrec_rank(info); - buf_id = nrec_buf_id(info); - rdwr = nrec_rdwr(info); - ras = nrec_ras(info); - cas = nrec_cas(info); - - debugf0("\t\tCSROW= %d Channels= %d,%d (Branch= %d " - "DRAM Bank= %d Buffer ID = %d rdwr= %s ras= %d cas= %d)\n", - rank, channel, channel + 1, branch >> 1, bank, - buf_id, rdwr_str(rdwr), ras, cas); - - /* Only 1 bit will be on */ - errnum = find_first_bit(&allErrors, ARRAY_SIZE(error_name)); - - /* Form out message */ - snprintf(msg, sizeof(msg), - "%s (Branch=%d DRAM-Bank=%d Buffer ID = %d RDWR=%s " - "RAS=%d CAS=%d %s Err=0x%lx (%s))", - type, branch >> 1, bank, buf_id, rdwr_str(rdwr), ras, cas, - type, allErrors, error_name[errnum]); - - /* Call the helper to output message */ - edac_mc_handle_fbd_ue(mci, rank, channel, channel + 1, msg); -} - -/* - * i7300_process_fatal_error_info(struct mem_ctl_info *mci, - * struct i7300_error_info *info, - * int handle_errors); - * - * handle the Intel NON-FATAL errors, if any + * i7300_process_error_global Retrieve the hardware error information from + * the hardware and cache it in the 'info' + * structure */ -static void i7300_process_nonfatal_error_info(struct mem_ctl_info *mci, - struct i7300_error_info *info) +static void i7300_process_error_global(struct mem_ctl_info *mci, + struct i7300_error_info *info) { - char msg[EDAC_MC_LABEL_LEN + 1 + 90 + 80]; - unsigned long allErrors; - int branch; - int channel; - int bank; - int rank; - int rdwr; - int ras, cas; - int errnum; - - /* mask off the Error bits that are possible */ - allErrors = from_nf_ferr(info->ferr_nf_fbd & FERR_NF_MASK); - if (!allErrors) - return; /* if no error, return now */ - - /* ONLY ONE of the possible error bits will be set, as per the docs */ - - if (allErrors & (ERROR_NF_UNCORRECTABLE | ERROR_NF_RECOVERABLE)) { - i7300_proccess_non_recoverable_info(mci, info, allErrors); - return; - } - - /* Correctable errors */ - if (allErrors & ERROR_NF_CORRECTABLE) { - debugf0("\tCorrected bits= 0x%lx\n", allErrors); - - branch = extract_fbdchan_indx(info->ferr_nf_fbd); - - channel = 0; - if (REC_ECC_LOCATOR_ODD(info->redmemb)) - channel = 1; - - /* Convert channel to be based from zero, instead of - * from branch base of 0 */ - channel += branch; - - bank = rec_bank(info); - rank = rec_rank(info); - rdwr = rec_rdwr(info); - ras = rec_ras(info); - cas = rec_cas(info); - - /* Only 1 bit will be on */ - errnum = find_first_bit(&allErrors, ARRAY_SIZE(error_name)); - - debugf0("\t\tCSROW= %d Channel= %d (Branch %d " - "DRAM Bank= %d rdwr= %s ras= %d cas= %d)\n", - rank, channel, branch >> 1, bank, - rdwr_str(rdwr), ras, cas); - - /* Form out message */ - snprintf(msg, sizeof(msg), - "Corrected error (Branch=%d DRAM-Bank=%d RDWR=%s " - "RAS=%d CAS=%d, CE Err=0x%lx (%s))", - branch >> 1, bank, rdwr_str(rdwr), ras, cas, - allErrors, error_name[errnum]); + struct i7300_pvt *pvt; + u32 errnum, value; + unsigned long errors; + const char *specific; + bool is_fatal; - /* Call the helper to output message */ - edac_mc_handle_fbd_ce(mci, rank, channel, msg); + pvt = mci->pvt_info; - return; + /* read in the 1st FATAL error register */ + pci_read_config_dword(pvt->pci_dev_16_2_fsb_err_regs, + FERR_GLOBAL_HI, &value); + if (unlikely(value)) { + errors = value; + errnum = find_first_bit(&errors, + ARRAY_SIZE(ferr_global_hi_name)); + specific = GET_ERR_FROM_TABLE(ferr_global_hi_name, errnum); + is_fatal = ferr_global_hi_is_fatal(errnum); + goto error_global; } - /* Miscelaneous errors */ - errnum = find_first_bit(&allErrors, ARRAY_SIZE(error_name)); - - branch = extract_fbdchan_indx(info->ferr_nf_fbd); + pci_read_config_dword(pvt->pci_dev_16_2_fsb_err_regs, + FERR_GLOBAL_LO, &value); + if (unlikely(value)) { + errors = value; + errnum = find_first_bit(&errors, + ARRAY_SIZE(ferr_global_lo_name)); + specific = GET_ERR_FROM_TABLE(ferr_global_lo_name, errnum); + is_fatal = ferr_global_lo_is_fatal(errnum); + goto error_global; + } + return; - i7300_mc_printk(mci, KERN_EMERG, - "Non-Fatal misc error (Branch=%d Err=%#lx (%s))", - branch >> 1, allErrors, error_name[errnum]); +error_global: + i7300_mc_printk(mci, KERN_EMERG, "%s misc error: %s\n", + is_fatal ? "Fatal" : "NOT fatal", specific); } /* - * i7300_process_error_info Process the error info that is - * in the 'info' structure, previously retrieved from hardware + * i7300_process_error_info Retrieve the hardware error information from + * the hardware and cache it in the 'info' + * structure */ static void i7300_process_error_info(struct mem_ctl_info *mci, - struct i7300_error_info *info) -{ u32 allErrors; - - /* First handle any fatal errors that occurred */ - allErrors = (info->ferr_fat_fbd & FERR_FAT_MASK); - i7300_proccess_non_recoverable_info(mci, info, allErrors); - - /* now handle any non-fatal errors that occurred */ - i7300_process_nonfatal_error_info(mci, info); -} + struct i7300_error_info *info) +{ + i7300_process_error_global(mci, info); +}; /* * i7300_clear_error Retrieve any error from the hardware @@ -753,6 +351,7 @@ static void i7300_check_error(struct mem_ctl_info *mci) { struct i7300_error_info info; debugf4("MC%d: " __FILE__ ": %s()\n", mci->mc_idx, __func__); + i7300_get_error_info(mci, &info); i7300_process_error_info(mci, &info); } @@ -763,22 +362,11 @@ static void i7300_check_error(struct mem_ctl_info *mci) */ static void i7300_enable_error_reporting(struct mem_ctl_info *mci) { - struct i7300_pvt *pvt; - u32 fbd_error_mask; - - pvt = mci->pvt_info; - - /* Read the FBD Error Mask Register */ - pci_read_config_dword(pvt->pci_dev_16_1_fsb_addr_map, EMASK_FBD, - &fbd_error_mask); - - /* Enable with a '0' */ - fbd_error_mask &= ~(ENABLE_EMASK_ALL); - - pci_write_config_dword(pvt->pci_dev_16_1_fsb_addr_map, EMASK_FBD, - fbd_error_mask); } -#endif + +/************************************************ + * i7300 Functions related to memory enumberation + ************************************************/ /* * determine_mtr(pvt, csrow, channel) @@ -1070,6 +658,10 @@ static int i7300_get_mc_regs(struct mem_ctl_info *mci) return 0; } +/************************************************* + * i7300 Functions related to device probe/release + *************************************************/ + /* * i7300_put_devices 'put' all the devices that we have * reserved via 'get' @@ -1238,10 +830,8 @@ static int i7300_probe1(struct pci_dev *pdev, int dev_idx) mci->dev_name = pci_name(pdev); mci->ctl_page_to_phys = NULL; -#if 0 /* Set the function pointer to an actual operation function */ mci->edac_check = i7300_check_error; -#endif /* initialize the MC control structure 'csrows' table * with the mapping and control information */ @@ -1251,10 +841,8 @@ static int i7300_probe1(struct pci_dev *pdev, int dev_idx) "value\n"); mci->edac_cap = EDAC_FLAG_NONE; /* no csrows found */ } else { -#if 0 debugf1("MC: Enable error reporting now\n"); i7300_enable_error_reporting(mci); -#endif } /* add this new MC control structure to EDAC's list of MCs */ @@ -1267,9 +855,7 @@ static int i7300_probe1(struct pci_dev *pdev, int dev_idx) goto fail1; } -#if 0 i7300_clear_error(mci); -#endif /* allocating generic PCI control info */ i7300_pci = edac_pci_create_generic_ctl(&pdev->dev, EDAC_MOD_STR); -- cgit v1.2.3 From 86002324cf8809c72858741ab20bb7a855654b4c Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Fri, 27 Aug 2010 00:46:57 -0300 Subject: i7300_edac: Clear the error bit after reading Signed-off-by: Mauro Carvalho Chehab --- drivers/edac/i7300_edac.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/drivers/edac/i7300_edac.c b/drivers/edac/i7300_edac.c index e617b4f79dd8..f2f171d0356a 100644 --- a/drivers/edac/i7300_edac.c +++ b/drivers/edac/i7300_edac.c @@ -299,6 +299,11 @@ static void i7300_process_error_global(struct mem_ctl_info *mci, ARRAY_SIZE(ferr_global_hi_name)); specific = GET_ERR_FROM_TABLE(ferr_global_hi_name, errnum); is_fatal = ferr_global_hi_is_fatal(errnum); + + /* Clear the error bit */ + pci_write_config_dword(pvt->pci_dev_16_2_fsb_err_regs, + FERR_GLOBAL_HI, value); + goto error_global; } @@ -310,6 +315,11 @@ static void i7300_process_error_global(struct mem_ctl_info *mci, ARRAY_SIZE(ferr_global_lo_name)); specific = GET_ERR_FROM_TABLE(ferr_global_lo_name, errnum); is_fatal = ferr_global_lo_is_fatal(errnum); + + /* Clear the error bit */ + pci_write_config_dword(pvt->pci_dev_16_2_fsb_err_regs, + FERR_GLOBAL_LO, value); + goto error_global; } return; -- cgit v1.2.3 From d7de2bdb0e15c594aefbc71d899c4684a5ce6559 Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Fri, 27 Aug 2010 08:56:48 -0300 Subject: i7300_edac: Adds detection for enhanced scrub mode on x8 While here, do some cleanup by adding some macros to check for device features. Signed-off-by: Mauro Carvalho Chehab --- drivers/edac/i7300_edac.c | 21 +++++++++++++++++---- 1 file changed, 17 insertions(+), 4 deletions(-) diff --git a/drivers/edac/i7300_edac.c b/drivers/edac/i7300_edac.c index f2f171d0356a..27088af79672 100644 --- a/drivers/edac/i7300_edac.c +++ b/drivers/edac/i7300_edac.c @@ -93,6 +93,12 @@ /* OFFSETS for Function 1 */ #define MC_SETTINGS 0x40 +#define IS_MIRRORED(mc) ((mc) & (1 << 16)) +#define IS_ECC_ENABLED(mc) ((mc) & (1 << 5)) +#define IS_RETRY_ENABLED(mc) ((mc) & (1 << 31)) +#define IS_SCRBALGO_ENHANCED(mc) ((mc) & (1 << 8)) + + #define TOLM 0x6C #define REDMEMB 0x7C @@ -451,9 +457,13 @@ static int decode_mtr(struct i7300_pvt *pvt, p_csrow->edac_mode = EDAC_S8ECD8ED; /* ask what device type on this row */ - if (MTR_DRAM_WIDTH(mtr)) + if (MTR_DRAM_WIDTH(mtr)) { + debugf0("Scrub algorithm for x8 is on %s mode\n", + IS_SCRBALGO_ENHANCED(pvt->mc_settings) ? + "enhanced" : "normal"); + p_csrow->dtype = DEV_X8; - else + } else p_csrow->dtype = DEV_X4; return mtr; @@ -643,10 +653,13 @@ static int i7300_get_mc_regs(struct mem_ctl_info *mci) /* Get memory controller settings */ pci_read_config_dword(pvt->pci_dev_16_1_fsb_addr_map, MC_SETTINGS, &pvt->mc_settings); + debugf0("Memory controller operating on %s mode\n", - pvt->mc_settings & (1 << 16)? "mirrored" : "non-mirrored"); + IS_MIRRORED(pvt->mc_settings) ? "mirrored" : "non-mirrored"); debugf0("Error detection is %s\n", - pvt->mc_settings & (1 << 5)? "enabled" : "disabled"); + IS_ECC_ENABLED(pvt->mc_settings) ? "enabled" : "disabled"); + debugf0("Retry is %s\n", + IS_RETRY_ENABLED(pvt->mc_settings) ? "enabled" : "disabled"); /* Get Memory Interleave Range registers */ pci_read_config_word(pvt->pci_dev_16_1_fsb_addr_map, MIR0, &pvt->mir[0]); -- cgit v1.2.3 From bb81a21637f84e2192bf327575645a7843c70cdb Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Fri, 27 Aug 2010 09:04:11 -0300 Subject: i7300_edac: Detect if the device is on single mode Signed-off-by: Mauro Carvalho Chehab --- drivers/edac/i7300_edac.c | 25 +++++++++++++++++-------- 1 file changed, 17 insertions(+), 8 deletions(-) diff --git a/drivers/edac/i7300_edac.c b/drivers/edac/i7300_edac.c index 27088af79672..a88742abf1f4 100644 --- a/drivers/edac/i7300_edac.c +++ b/drivers/edac/i7300_edac.c @@ -92,12 +92,13 @@ /* OFFSETS for Function 1 */ #define MC_SETTINGS 0x40 + #define IS_MIRRORED(mc) ((mc) & (1 << 16)) + #define IS_ECC_ENABLED(mc) ((mc) & (1 << 5)) + #define IS_RETRY_ENABLED(mc) ((mc) & (1 << 31)) + #define IS_SCRBALGO_ENHANCED(mc) ((mc) & (1 << 8)) -#define IS_MIRRORED(mc) ((mc) & (1 << 16)) -#define IS_ECC_ENABLED(mc) ((mc) & (1 << 5)) -#define IS_RETRY_ENABLED(mc) ((mc) & (1 << 31)) -#define IS_SCRBALGO_ENHANCED(mc) ((mc) & (1 << 8)) - +#define MC_SETTINGS_A 0x58 + #define IS_SINGLE_MODE(mca) ((mca) & (1 << 14)) #define TOLM 0x6C #define REDMEMB 0x7C @@ -237,9 +238,11 @@ struct i7300_pvt { u16 tolm; /* top of low memory */ u64 ambase; /* AMB BAR */ - u32 mc_settings; - u16 mir[MAX_MIR]; + u32 mc_settings; /* Report several settings */ + u32 mc_settings_a; + + u16 mir[MAX_MIR]; /* Memory Interleave Reg*/ u16 mtr[MAX_SLOTS][MAX_BRANCHES]; /* Memory Technlogy Reg */ u16 ambpresent[MAX_CHANNELS]; /* AMB present regs */ @@ -653,9 +656,15 @@ static int i7300_get_mc_regs(struct mem_ctl_info *mci) /* Get memory controller settings */ pci_read_config_dword(pvt->pci_dev_16_1_fsb_addr_map, MC_SETTINGS, &pvt->mc_settings); + pci_read_config_dword(pvt->pci_dev_16_1_fsb_addr_map, MC_SETTINGS_A, + &pvt->mc_settings_a); - debugf0("Memory controller operating on %s mode\n", + if (IS_SINGLE_MODE(pvt->mc_settings_a)) + debugf0("Memory controller operating on single mode\n"); + else + debugf0("Memory controller operating on %s mode\n", IS_MIRRORED(pvt->mc_settings) ? "mirrored" : "non-mirrored"); + debugf0("Error detection is %s\n", IS_ECC_ENABLED(pvt->mc_settings) ? "enabled" : "disabled"); debugf0("Retry is %s\n", -- cgit v1.2.3 From 15154c57c62494292f43df9133a7b370cbbf1ecb Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Fri, 27 Aug 2010 09:16:06 -0300 Subject: i7300_edac: Properly detect the type of error correction Signed-off-by: Mauro Carvalho Chehab --- drivers/edac/i7300_edac.c | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/drivers/edac/i7300_edac.c b/drivers/edac/i7300_edac.c index a88742abf1f4..b5152256967f 100644 --- a/drivers/edac/i7300_edac.c +++ b/drivers/edac/i7300_edac.c @@ -450,14 +450,24 @@ static int decode_mtr(struct i7300_pvt *pvt, p_csrow->mtype = MEM_FB_DDR2; /* - * FIXME: the type of error detection actually depends of the + * The type of error detection actually depends of the * mode of operation. When it is just one single memory chip, at - * socket 0, channel 0, it uses 8-byte-over-32-byte SECDED+ code. - * In normal or mirrored mode, it uses Single Device Data correction, + * socket 0, channel 0, it uses 8-byte-over-32-byte SECDED+ code. + * In normal or mirrored mode, it uses Lockstep mode, * with the possibility of using an extended algorithm for x8 memories * See datasheet Sections 7.3.6 to 7.3.8 */ - p_csrow->edac_mode = EDAC_S8ECD8ED; + + if (IS_SINGLE_MODE(pvt->mc_settings_a)) { + p_csrow->edac_mode = EDAC_SECDED; + debugf0("ECC code is 8-byte-over-32-byte SECDED+ code\n"); + } else { + debugf0("ECC code is on Lockstep mode\n"); + if (MTR_DRAM_WIDTH(mtr)) + p_csrow->edac_mode = EDAC_S8ECD8ED; + else + p_csrow->edac_mode = EDAC_S4ECD4ED; + } /* ask what device type on this row */ if (MTR_DRAM_WIDTH(mtr)) { -- cgit v1.2.3 From 57021918aa9c310524d7e9754506e4e8272b4c0e Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Fri, 27 Aug 2010 10:22:36 -0300 Subject: i7300_edac: Add support for reporting FBD errors Signed-off-by: Mauro Carvalho Chehab --- drivers/edac/i7300_edac.c | 125 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 125 insertions(+) diff --git a/drivers/edac/i7300_edac.c b/drivers/edac/i7300_edac.c index b5152256967f..728f9b0ab62c 100644 --- a/drivers/edac/i7300_edac.c +++ b/drivers/edac/i7300_edac.c @@ -161,6 +161,63 @@ static const char *numcol_toString[] = { /************************************************ * i7300 Register definitions for error detection ************************************************/ + +/* + * Device 16.1: FBD Error Registers + */ +#define FERR_FAT_FBD 0x98 +static const char *ferr_fat_fbd_name[] = { + [22] = "Non-Redundant Fast Reset Timeout", + [2] = ">Tmid Thermal event with intelligent throttling disabled", + [1] = "Memory or FBD configuration CRC read error", + [0] = "Memory Write error on non-redundant retry or " + "FBD configuration Write error on retry", +}; +#define GET_FBD_FAT_IDX(fbderr) (fbderr & (3 << 28)) +#define FERR_FAT_FBD_ERR_MASK ((1 << 0) | (1 << 1) | (1 << 2) | (1 << 3)) + +#define FERR_NF_FBD 0xa0 +static const char *ferr_nf_fbd_name[] = { + [24] = "DIMM-Spare Copy Completed", + [23] = "DIMM-Spare Copy Initiated", + [22] = "Redundant Fast Reset Timeout", + [21] = "Memory Write error on redundant retry", + [18] = "SPD protocol Error", + [17] = "FBD Northbound parity error on FBD Sync Status", + [16] = "Correctable Patrol Data ECC", + [15] = "Correctable Resilver- or Spare-Copy Data ECC", + [14] = "Correctable Mirrored Demand Data ECC", + [13] = "Correctable Non-Mirrored Demand Data ECC", + [11] = "Memory or FBD configuration CRC read error", + [10] = "FBD Configuration Write error on first attempt", + [9] = "Memory Write error on first attempt", + [8] = "Non-Aliased Uncorrectable Patrol Data ECC", + [7] = "Non-Aliased Uncorrectable Resilver- or Spare-Copy Data ECC", + [6] = "Non-Aliased Uncorrectable Mirrored Demand Data ECC", + [5] = "Non-Aliased Uncorrectable Non-Mirrored Demand Data ECC", + [4] = "Aliased Uncorrectable Patrol Data ECC", + [3] = "Aliased Uncorrectable Resilver- or Spare-Copy Data ECC", + [2] = "Aliased Uncorrectable Mirrored Demand Data ECC", + [1] = "Aliased Uncorrectable Non-Mirrored Demand Data ECC", + [0] = "Uncorrectable Data ECC on Replay", +}; +#define GET_FBD_NF_IDX(fbderr) (fbderr & (3 << 28)) +#define FERR_NF_FBD_ERR_MASK ((1 << 24) | (1 << 23) | (1 << 22) | (1 << 21) |\ + (1 << 18) | (1 << 17) | (1 << 16) | (1 << 15) |\ + (1 << 14) | (1 << 13) | (1 << 11) | (1 << 10) |\ + (1 << 9) | (1 << 8) | (1 << 7) | (1 << 6) |\ + (1 << 5) | (1 << 4) | (1 << 3) | (1 << 2) |\ + (1 << 1) | (1 << 0)) + +#define EMASK_FBD 0xa8 +#define EMASK_FBD_ERR_MASK ((1 << 27) | (1 << 26) | (1 << 25) | (1 << 24) |\ + (1 << 22) | (1 << 21) | (1 << 20) | (1 << 19) |\ + (1 << 18) | (1 << 17) | (1 << 16) | (1 << 14) |\ + (1 << 13) | (1 << 12) | (1 << 11) | (1 << 10) |\ + (1 << 9) | (1 << 8) | (1 << 7) | (1 << 6) |\ + (1 << 5) | (1 << 4) | (1 << 3) | (1 << 2) |\ + (1 << 1) | (1 << 0)) + /* * Device 16.2: Global Error Registers */ @@ -338,6 +395,61 @@ error_global: is_fatal ? "Fatal" : "NOT fatal", specific); } +/* + * i7300_process_fbd_error Retrieve the hardware error information from + * the hardware and cache it in the 'info' + * structure + */ +static void i7300_process_fbd_error(struct mem_ctl_info *mci, + struct i7300_error_info *info) +{ + struct i7300_pvt *pvt; + u32 errnum, value; + int branch; + unsigned long errors; + const char *specific; + bool is_fatal; + + pvt = mci->pvt_info; + + /* read in the 1st FATAL error register */ + pci_read_config_dword(pvt->pci_dev_16_1_fsb_addr_map, + FERR_FAT_FBD, &value); + if (unlikely(value & FERR_FAT_FBD_ERR_MASK)) { + errors = value & FERR_FAT_FBD_ERR_MASK ; + errnum = find_first_bit(&errors, + ARRAY_SIZE(ferr_fat_fbd_name)); + specific = GET_ERR_FROM_TABLE(ferr_fat_fbd_name, errnum); + is_fatal = 1; + + branch = (GET_FBD_FAT_IDX(value) == 2) ? 1 : 0; + + goto error_fbd; + } + + /* read in the 1st NON-FATAL error register */ + pci_read_config_dword(pvt->pci_dev_16_1_fsb_addr_map, + FERR_NF_FBD, &value); + if (unlikely(value & FERR_NF_FBD_ERR_MASK)) { + errors = value & FERR_NF_FBD_ERR_MASK; + errnum = find_first_bit(&errors, + ARRAY_SIZE(ferr_nf_fbd_name)); + specific = GET_ERR_FROM_TABLE(ferr_nf_fbd_name, errnum); + is_fatal = 0; + + /* Clear the error bit */ + pci_write_config_dword(pvt->pci_dev_16_2_fsb_err_regs, + FERR_GLOBAL_LO, value); + + goto error_fbd; + } + return; + +error_fbd: + i7300_mc_printk(mci, KERN_EMERG, "%s FBD error on branch %d: %s\n", + is_fatal ? "Fatal" : "NOT fatal", branch, specific); +} + /* * i7300_process_error_info Retrieve the hardware error information from * the hardware and cache it in the 'info' @@ -347,6 +459,7 @@ static void i7300_process_error_info(struct mem_ctl_info *mci, struct i7300_error_info *info) { i7300_process_error_global(mci, info); + i7300_process_fbd_error(mci, info); }; /* @@ -381,6 +494,18 @@ static void i7300_check_error(struct mem_ctl_info *mci) */ static void i7300_enable_error_reporting(struct mem_ctl_info *mci) { + struct i7300_pvt *pvt = mci->pvt_info; + u32 fbd_error_mask; + + /* Read the FBD Error Mask Register */ + pci_read_config_dword(pvt->pci_dev_16_1_fsb_addr_map, + EMASK_FBD, &fbd_error_mask); + + /* Enable with a '0' */ + fbd_error_mask &= ~(EMASK_FBD_ERR_MASK); + + pci_write_config_dword(pvt->pci_dev_16_1_fsb_addr_map, + EMASK_FBD, fbd_error_mask); } /************************************************ -- cgit v1.2.3 From e43276050927c7dfc45b1e2f090b94f72f87c052 Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Fri, 27 Aug 2010 10:30:18 -0300 Subject: i7300_edac: Add a code to cleanup error registers Signed-off-by: Mauro Carvalho Chehab --- drivers/edac/i7300_edac.c | 29 +++++++++++++++++++++++++++-- 1 file changed, 27 insertions(+), 2 deletions(-) diff --git a/drivers/edac/i7300_edac.c b/drivers/edac/i7300_edac.c index 728f9b0ab62c..3b6da20de4e5 100644 --- a/drivers/edac/i7300_edac.c +++ b/drivers/edac/i7300_edac.c @@ -470,9 +470,34 @@ static void i7300_process_error_info(struct mem_ctl_info *mci, */ static void i7300_clear_error(struct mem_ctl_info *mci) { - struct i7300_error_info info; + struct i7300_pvt *pvt = mci->pvt_info; + u32 value; + /* + * All error values are RWC - we need to read and write 1 to the + * bit that we want to cleanup + */ - i7300_get_error_info(mci, &info); + /* Clear global error registers */ + pci_read_config_dword(pvt->pci_dev_16_2_fsb_err_regs, + FERR_GLOBAL_HI, &value); + pci_write_config_dword(pvt->pci_dev_16_2_fsb_err_regs, + FERR_GLOBAL_HI, value); + + pci_read_config_dword(pvt->pci_dev_16_2_fsb_err_regs, + FERR_GLOBAL_LO, &value); + pci_write_config_dword(pvt->pci_dev_16_2_fsb_err_regs, + FERR_GLOBAL_LO, value); + + /* Clear FBD error registers */ + pci_read_config_dword(pvt->pci_dev_16_1_fsb_addr_map, + FERR_FAT_FBD, &value); + pci_write_config_dword(pvt->pci_dev_16_1_fsb_addr_map, + FERR_FAT_FBD, value); + + pci_read_config_dword(pvt->pci_dev_16_1_fsb_addr_map, + FERR_NF_FBD, &value); + pci_write_config_dword(pvt->pci_dev_16_1_fsb_addr_map, + FERR_NF_FBD, value); } /* -- cgit v1.2.3 From f42774224860d7c3f7c06559f98b681197999f9e Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Fri, 27 Aug 2010 10:33:25 -0300 Subject: i7300_edac: Cleanup: remove get_error_info logic As the error logic in this driver came from i5400 driver, it were using one function to get errors, and another to display. Let's make it simpler and avoid doing it into two steps. Signed-off-by: Mauro Carvalho Chehab --- drivers/edac/i7300_edac.c | 42 ++++++------------------------------------ 1 file changed, 6 insertions(+), 36 deletions(-) diff --git a/drivers/edac/i7300_edac.c b/drivers/edac/i7300_edac.c index 3b6da20de4e5..3ae8fae74e5e 100644 --- a/drivers/edac/i7300_edac.c +++ b/drivers/edac/i7300_edac.c @@ -315,10 +315,6 @@ static struct edac_pci_ctl_info *i7300_pci; * i7300 Functions related to error detection ********************************************/ -struct i7300_error_info { - int dummy; /* FIXME */ -}; - const char *get_err_from_table(const char *table[], int size, int pos) { if (pos >= size) @@ -330,23 +326,12 @@ const char *get_err_from_table(const char *table[], int size, int pos) #define GET_ERR_FROM_TABLE(table, pos) \ get_err_from_table(table, ARRAY_SIZE(table), pos) -/* - * i7300_get_error_info Retrieve the hardware error information from - * the hardware and cache it in the 'info' - * structure - */ -static void i7300_get_error_info(struct mem_ctl_info *mci, - struct i7300_error_info *info) -{ -} - /* * i7300_process_error_global Retrieve the hardware error information from * the hardware and cache it in the 'info' * structure */ -static void i7300_process_error_global(struct mem_ctl_info *mci, - struct i7300_error_info *info) +static void i7300_process_error_global(struct mem_ctl_info *mci) { struct i7300_pvt *pvt; u32 errnum, value; @@ -400,8 +385,7 @@ error_global: * the hardware and cache it in the 'info' * structure */ -static void i7300_process_fbd_error(struct mem_ctl_info *mci, - struct i7300_error_info *info) +static void i7300_process_fbd_error(struct mem_ctl_info *mci) { struct i7300_pvt *pvt; u32 errnum, value; @@ -451,15 +435,14 @@ error_fbd: } /* - * i7300_process_error_info Retrieve the hardware error information from + * i7300_check_error Retrieve the hardware error information from * the hardware and cache it in the 'info' * structure */ -static void i7300_process_error_info(struct mem_ctl_info *mci, - struct i7300_error_info *info) +static void i7300_check_error(struct mem_ctl_info *mci) { - i7300_process_error_global(mci, info); - i7300_process_fbd_error(mci, info); + i7300_process_error_global(mci); + i7300_process_fbd_error(mci); }; /* @@ -500,19 +483,6 @@ static void i7300_clear_error(struct mem_ctl_info *mci) FERR_NF_FBD, value); } -/* - * i7300_check_error Retrieve and process errors reported by the - * hardware. Called by the Core module. - */ -static void i7300_check_error(struct mem_ctl_info *mci) -{ - struct i7300_error_info info; - debugf4("MC%d: " __FILE__ ": %s()\n", mci->mc_idx, __func__); - - i7300_get_error_info(mci, &info); - i7300_process_error_info(mci, &info); -} - /* * i7300_enable_error_reporting * Turn on the memory reporting features of the hardware -- cgit v1.2.3 From 3b330f67581db4f5301438cc0d9499f13314542d Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Fri, 27 Aug 2010 10:39:35 -0300 Subject: i7300_edac: Make the debug messages coherent with the others Signed-off-by: Mauro Carvalho Chehab --- drivers/edac/i7300_edac.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/edac/i7300_edac.c b/drivers/edac/i7300_edac.c index 3ae8fae74e5e..218d463fb1a8 100644 --- a/drivers/edac/i7300_edac.c +++ b/drivers/edac/i7300_edac.c @@ -580,9 +580,9 @@ static int decode_mtr(struct i7300_pvt *pvt, if (IS_SINGLE_MODE(pvt->mc_settings_a)) { p_csrow->edac_mode = EDAC_SECDED; - debugf0("ECC code is 8-byte-over-32-byte SECDED+ code\n"); + debugf2("\t\tECC code is 8-byte-over-32-byte SECDED+ code\n"); } else { - debugf0("ECC code is on Lockstep mode\n"); + debugf2("\t\tECC code is on Lockstep mode\n"); if (MTR_DRAM_WIDTH(mtr)) p_csrow->edac_mode = EDAC_S8ECD8ED; else @@ -591,7 +591,7 @@ static int decode_mtr(struct i7300_pvt *pvt, /* ask what device type on this row */ if (MTR_DRAM_WIDTH(mtr)) { - debugf0("Scrub algorithm for x8 is on %s mode\n", + debugf2("\t\tScrub algorithm for x8 is on %s mode\n", IS_SCRBALGO_ENHANCED(pvt->mc_settings) ? "enhanced" : "normal"); -- cgit v1.2.3 From 28c2ce7c8b275a8e6950bacb2dbad70b36a2996b Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Fri, 27 Aug 2010 11:20:38 -0300 Subject: i7300_edac: Fix MTR x4/x8 detection logic Signed-off-by: Mauro Carvalho Chehab --- drivers/edac/i7300_edac.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/edac/i7300_edac.c b/drivers/edac/i7300_edac.c index 218d463fb1a8..a4f47fda078d 100644 --- a/drivers/edac/i7300_edac.c +++ b/drivers/edac/i7300_edac.c @@ -583,14 +583,14 @@ static int decode_mtr(struct i7300_pvt *pvt, debugf2("\t\tECC code is 8-byte-over-32-byte SECDED+ code\n"); } else { debugf2("\t\tECC code is on Lockstep mode\n"); - if (MTR_DRAM_WIDTH(mtr)) + if (MTR_DRAM_WIDTH(mtr) == 8) p_csrow->edac_mode = EDAC_S8ECD8ED; else p_csrow->edac_mode = EDAC_S4ECD4ED; } /* ask what device type on this row */ - if (MTR_DRAM_WIDTH(mtr)) { + if (MTR_DRAM_WIDTH(mtr) == 8) { debugf2("\t\tScrub algorithm for x8 is on %s mode\n", IS_SCRBALGO_ENHANCED(pvt->mc_settings) ? "enhanced" : "normal"); -- cgit v1.2.3 From 85580ea4f72ce08e4d9140a3bb22806185a0bba9 Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Fri, 27 Aug 2010 11:36:23 -0300 Subject: i7300_edac: pre-allocate a buffer used to prepare err messages Instead of dynamically allocating a buffer for it where needed, just allocate it once. As we'll use the same buffer also during fatal and non-fatal errors, is is very risky to dynamically allocate it during an error. Signed-off-by: Mauro Carvalho Chehab --- drivers/edac/i7300_edac.c | 41 ++++++++++++++++++++++++----------------- 1 file changed, 24 insertions(+), 17 deletions(-) diff --git a/drivers/edac/i7300_edac.c b/drivers/edac/i7300_edac.c index a4f47fda078d..4c239ce6986b 100644 --- a/drivers/edac/i7300_edac.c +++ b/drivers/edac/i7300_edac.c @@ -306,6 +306,9 @@ struct i7300_pvt { /* DIMM information matrix, allocating architecture maximums */ struct i7300_dimm_info dimm_info[MAX_SLOTS][MAX_CHANNELS]; + + /* Temporary buffer for use when preparing error messages */ + char *tmp_prt_buffer; }; /* FIXME: Why do we need to have this static? */ @@ -611,17 +614,12 @@ static int decode_mtr(struct i7300_pvt *pvt, static void print_dimm_size(struct i7300_pvt *pvt) { struct i7300_dimm_info *dinfo; - char *p, *mem_buffer; + char *p; int space, n; int channel, slot; space = PAGE_SIZE; - mem_buffer = p = kmalloc(space, GFP_KERNEL); - if (p == NULL) { - i7300_printk(KERN_ERR, "MC: %s:%s() kmalloc() failed\n", - __FILE__, __func__); - return; - } + p = pvt->tmp_prt_buffer; n = snprintf(p, space, " "); p += n; @@ -631,15 +629,15 @@ static void print_dimm_size(struct i7300_pvt *pvt) p += n; space -= n; } - debugf2("%s\n", mem_buffer); - p = mem_buffer; + debugf2("%s\n", pvt->tmp_prt_buffer); + p = pvt->tmp_prt_buffer; space = PAGE_SIZE; n = snprintf(p, space, "-------------------------------" "------------------------------"); p += n; space -= n; - debugf2("%s\n", mem_buffer); - p = mem_buffer; + debugf2("%s\n", pvt->tmp_prt_buffer); + p = pvt->tmp_prt_buffer; space = PAGE_SIZE; for (slot = 0; slot < MAX_SLOTS; slot++) { @@ -654,8 +652,8 @@ static void print_dimm_size(struct i7300_pvt *pvt) space -= n; } - debugf2("%s\n", mem_buffer); - p = mem_buffer; + debugf2("%s\n", pvt->tmp_prt_buffer); + p = pvt->tmp_prt_buffer; space = PAGE_SIZE; } @@ -663,11 +661,9 @@ static void print_dimm_size(struct i7300_pvt *pvt) "------------------------------"); p += n; space -= n; - debugf2("%s\n", mem_buffer); - p = mem_buffer; + debugf2("%s\n", pvt->tmp_prt_buffer); + p = pvt->tmp_prt_buffer; space = PAGE_SIZE; - - kfree(mem_buffer); } /* @@ -978,6 +974,12 @@ static int i7300_probe1(struct pci_dev *pdev, int dev_idx) pvt = mci->pvt_info; pvt->pci_dev_16_0_fsb_ctlr = pdev; /* Record this device in our private */ + pvt->tmp_prt_buffer = kmalloc(PAGE_SIZE, GFP_KERNEL); + if (!pvt->tmp_prt_buffer) { + edac_mc_free(mci); + return -ENOMEM; + } + /* 'get' the pci devices we want to reserve for our use */ if (i7300_get_devices(mci, dev_idx)) goto fail0; @@ -1038,6 +1040,7 @@ fail1: i7300_put_devices(mci); fail0: + kfree(pvt->tmp_prt_buffer); edac_mc_free(mci); return -ENODEV; } @@ -1072,6 +1075,7 @@ static int __devinit i7300_init_one(struct pci_dev *pdev, static void __devexit i7300_remove_one(struct pci_dev *pdev) { struct mem_ctl_info *mci; + char *tmp; debugf0(__FILE__ ": %s()\n", __func__); @@ -1082,9 +1086,12 @@ static void __devexit i7300_remove_one(struct pci_dev *pdev) if (!mci) return; + tmp = ((struct i7300_pvt *)mci->pvt_info)->tmp_prt_buffer; + /* retrieve references to resources, and free those resources */ i7300_put_devices(mci); + kfree(tmp); edac_mc_free(mci); } -- cgit v1.2.3 From 8199d8cc65787bfd83abbfb69d9de1b51e027c41 Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Fri, 27 Aug 2010 11:51:48 -0300 Subject: i7300_edac: enrich FBD error info for fatal errors Signed-off-by: Mauro Carvalho Chehab --- drivers/edac/i7300_edac.c | 43 +++++++++++++++++++++++++++++++++++++++---- 1 file changed, 39 insertions(+), 4 deletions(-) diff --git a/drivers/edac/i7300_edac.c b/drivers/edac/i7300_edac.c index 4c239ce6986b..e33c9900e269 100644 --- a/drivers/edac/i7300_edac.c +++ b/drivers/edac/i7300_edac.c @@ -268,6 +268,16 @@ static const char *ferr_global_lo_name[] = { }; #define ferr_global_lo_is_fatal(errno) ((errno < 16) ? 0 : 1) +#define NRECMEMA 0xbe + #define NRECMEMA_BANK(v) (((v) >> 12) & 7) + #define NRECMEMA_RANK(v) (((v) >> 8) & 15) + +#define NRECMEMB 0xc0 + #define NRECMEMB_IS_WR(v) ((v) & (1 << 31)) + #define NRECMEMB_CAS(v) (((v) >> 16) & 0x1fff) + #define NRECMEMB_RAS(v) ((v) & 0xffff) + + /* Device name and register DID (Device ID) */ struct i7300_dev_info { const char *ctl_name; /* name for this device */ @@ -392,10 +402,11 @@ static void i7300_process_fbd_error(struct mem_ctl_info *mci) { struct i7300_pvt *pvt; u32 errnum, value; - int branch; + u16 val16; + int branch, bank, rank, cas, ras; unsigned long errors; const char *specific; - bool is_fatal; + bool is_fatal, is_wr; pvt = mci->pvt_info; @@ -410,8 +421,31 @@ static void i7300_process_fbd_error(struct mem_ctl_info *mci) is_fatal = 1; branch = (GET_FBD_FAT_IDX(value) == 2) ? 1 : 0; - - goto error_fbd; + pci_read_config_word(pvt->pci_dev_16_1_fsb_addr_map, + NRECMEMA, &val16); + bank = NRECMEMA_BANK(val16); + rank = NRECMEMA_RANK(val16); + + pci_read_config_dword(pvt->pci_dev_16_1_fsb_addr_map, + NRECMEMB, &value); + + is_wr = NRECMEMB_IS_WR(value); + cas = NRECMEMB_CAS(value); + ras = NRECMEMB_RAS(value); + + snprintf(pvt->tmp_prt_buffer, PAGE_SIZE, + "FATAL (Branch=%d DRAM-Bank=%d %s " + "RAS=%d CAS=%d Err=0x%lx (%s))", + branch >> 1, bank, + is_wr ? "RDWR" : "RD", + ras, cas, + errors, specific); + + /* Call the helper to output message */ + edac_mc_handle_fbd_ue(mci, rank, branch << 1, + (branch << 1) + 1, + pvt->tmp_prt_buffer); + return; } /* read in the 1st NON-FATAL error register */ @@ -433,6 +467,7 @@ static void i7300_process_fbd_error(struct mem_ctl_info *mci) return; error_fbd: + i7300_mc_printk(mci, KERN_EMERG, "%s FBD error on branch %d: %s\n", is_fatal ? "Fatal" : "NOT fatal", branch, specific); } -- cgit v1.2.3 From 32f9472613b30791d8cb5a953791cf4647166744 Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Fri, 27 Aug 2010 12:13:05 -0300 Subject: i7300_edac: enrich FBD error info for corrected errors Signed-off-by: Mauro Carvalho Chehab --- drivers/edac/i7300_edac.c | 63 ++++++++++++++++++++++++++++++++++++++--------- 1 file changed, 51 insertions(+), 12 deletions(-) diff --git a/drivers/edac/i7300_edac.c b/drivers/edac/i7300_edac.c index e33c9900e269..a06db65e1fef 100644 --- a/drivers/edac/i7300_edac.c +++ b/drivers/edac/i7300_edac.c @@ -277,6 +277,17 @@ static const char *ferr_global_lo_name[] = { #define NRECMEMB_CAS(v) (((v) >> 16) & 0x1fff) #define NRECMEMB_RAS(v) ((v) & 0xffff) +#define REDMEMA 0xdc + +#define RECMEMA 0xe0 + #define RECMEMA_BANK(v) (((v) >> 12) & 7) + #define RECMEMA_RANK(v) (((v) >> 8) & 15) + +#define RECMEMB 0xe4 + #define RECMEMB_IS_WR(v) ((v) & (1 << 31)) + #define RECMEMB_CAS(v) (((v) >> 16) & 0x1fff) + #define RECMEMB_RAS(v) ((v) & 0xffff) + /* Device name and register DID (Device ID) */ struct i7300_dev_info { @@ -403,10 +414,12 @@ static void i7300_process_fbd_error(struct mem_ctl_info *mci) struct i7300_pvt *pvt; u32 errnum, value; u16 val16; - int branch, bank, rank, cas, ras; + unsigned branch, bank, rank, cas, ras; + u32 syndrome; + unsigned long errors; const char *specific; - bool is_fatal, is_wr; + bool is_wr; pvt = mci->pvt_info; @@ -418,7 +431,6 @@ static void i7300_process_fbd_error(struct mem_ctl_info *mci) errnum = find_first_bit(&errors, ARRAY_SIZE(ferr_fat_fbd_name)); specific = GET_ERR_FROM_TABLE(ferr_fat_fbd_name, errnum); - is_fatal = 1; branch = (GET_FBD_FAT_IDX(value) == 2) ? 1 : 0; pci_read_config_word(pvt->pci_dev_16_1_fsb_addr_map, @@ -436,7 +448,7 @@ static void i7300_process_fbd_error(struct mem_ctl_info *mci) snprintf(pvt->tmp_prt_buffer, PAGE_SIZE, "FATAL (Branch=%d DRAM-Bank=%d %s " "RAS=%d CAS=%d Err=0x%lx (%s))", - branch >> 1, bank, + branch, bank, is_wr ? "RDWR" : "RD", ras, cas, errors, specific); @@ -445,7 +457,6 @@ static void i7300_process_fbd_error(struct mem_ctl_info *mci) edac_mc_handle_fbd_ue(mci, rank, branch << 1, (branch << 1) + 1, pvt->tmp_prt_buffer); - return; } /* read in the 1st NON-FATAL error register */ @@ -456,20 +467,48 @@ static void i7300_process_fbd_error(struct mem_ctl_info *mci) errnum = find_first_bit(&errors, ARRAY_SIZE(ferr_nf_fbd_name)); specific = GET_ERR_FROM_TABLE(ferr_nf_fbd_name, errnum); - is_fatal = 0; /* Clear the error bit */ pci_write_config_dword(pvt->pci_dev_16_2_fsb_err_regs, FERR_GLOBAL_LO, value); - goto error_fbd; - } - return; + pci_read_config_dword(pvt->pci_dev_16_1_fsb_addr_map, + REDMEMA, &syndrome); -error_fbd: + branch = (GET_FBD_FAT_IDX(value) == 2) ? 1 : 0; + pci_read_config_word(pvt->pci_dev_16_1_fsb_addr_map, + RECMEMA, &val16); + bank = RECMEMA_BANK(val16); + rank = RECMEMA_RANK(val16); - i7300_mc_printk(mci, KERN_EMERG, "%s FBD error on branch %d: %s\n", - is_fatal ? "Fatal" : "NOT fatal", branch, specific); + pci_read_config_dword(pvt->pci_dev_16_1_fsb_addr_map, + RECMEMB, &value); + + is_wr = RECMEMB_IS_WR(value); + cas = RECMEMB_CAS(value); + ras = RECMEMB_RAS(value); + + /* Form out message */ + snprintf(pvt->tmp_prt_buffer, PAGE_SIZE, + "Corrected error (Branch=%d (channel %d or %d), " + " DRAM-Bank=%d %s " + "RAS=%d CAS=%d, CE Err=0x%lx, Syndrome=0x%08x(%s))", + branch, branch << 1, (branch << 1) + 1, + bank, + is_wr ? "RDWR" : "RD", + ras, cas, + errors, syndrome, specific); + + /* + * Call the helper to output message + * NOTE: Errors are reported per-branch, and not per-channel + * Currently, we don't know how to identify the right + * channel. + */ + edac_mc_handle_fbd_ce(mci, rank, branch << 1, + pvt->tmp_prt_buffer); + } + return; } /* -- cgit v1.2.3 From 37b69cf91c2c6e60856ad1ac4c37ccb2005ebbd3 Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Fri, 27 Aug 2010 15:44:43 -0300 Subject: i7300_edac: Properly detect channel on CE errors Signed-off-by: Mauro Carvalho Chehab --- drivers/edac/i7300_edac.c | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) diff --git a/drivers/edac/i7300_edac.c b/drivers/edac/i7300_edac.c index a06db65e1fef..a85a8e5163b7 100644 --- a/drivers/edac/i7300_edac.c +++ b/drivers/edac/i7300_edac.c @@ -101,7 +101,6 @@ #define IS_SINGLE_MODE(mca) ((mca) & (1 << 14)) #define TOLM 0x6C -#define REDMEMB 0x7C #define MIR0 0x80 #define MIR1 0x84 @@ -279,6 +278,9 @@ static const char *ferr_global_lo_name[] = { #define REDMEMA 0xdc +#define REDMEMB 0x7c + #define IS_SECOND_CH(v) ((v) * (1 << 17)) + #define RECMEMA 0xe0 #define RECMEMA_BANK(v) (((v) >> 12) & 7) #define RECMEMA_RANK(v) (((v) >> 8) & 15) @@ -414,7 +416,7 @@ static void i7300_process_fbd_error(struct mem_ctl_info *mci) struct i7300_pvt *pvt; u32 errnum, value; u16 val16; - unsigned branch, bank, rank, cas, ras; + unsigned branch, channel, bank, rank, cas, ras; u32 syndrome; unsigned long errors; @@ -488,12 +490,19 @@ static void i7300_process_fbd_error(struct mem_ctl_info *mci) cas = RECMEMB_CAS(value); ras = RECMEMB_RAS(value); + pci_read_config_dword(pvt->pci_dev_16_1_fsb_addr_map, + REDMEMB, &value); + + channel = (branch << 1); + if (IS_SECOND_CH(value)) + channel++; + /* Form out message */ snprintf(pvt->tmp_prt_buffer, PAGE_SIZE, - "Corrected error (Branch=%d (channel %d or %d), " + "Corrected error (Branch=%d, Channel %d), " " DRAM-Bank=%d %s " "RAS=%d CAS=%d, CE Err=0x%lx, Syndrome=0x%08x(%s))", - branch, branch << 1, (branch << 1) + 1, + branch, channel, bank, is_wr ? "RDWR" : "RD", ras, cas, @@ -505,7 +514,7 @@ static void i7300_process_fbd_error(struct mem_ctl_info *mci) * Currently, we don't know how to identify the right * channel. */ - edac_mc_handle_fbd_ce(mci, rank, branch << 1, + edac_mc_handle_fbd_ce(mci, rank, channel, pvt->tmp_prt_buffer); } return; -- cgit v1.2.3 From b4552aceb37ef953db14b9851bd4ededabc3c77b Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Fri, 27 Aug 2010 16:43:01 -0300 Subject: i7300_edac: Cleanup: reorganize the file contents This change should do no functional change. It just rearranges the contents of the c file, in order to make easier to understand and maintain it. Signed-off-by: Mauro Carvalho Chehab --- drivers/edac/i7300_edac.c | 116 ++++++++++++++++++++++++---------------------- 1 file changed, 60 insertions(+), 56 deletions(-) diff --git a/drivers/edac/i7300_edac.c b/drivers/edac/i7300_edac.c index a85a8e5163b7..6278209fec07 100644 --- a/drivers/edac/i7300_edac.c +++ b/drivers/edac/i7300_edac.c @@ -41,6 +41,10 @@ #define i7300_mc_printk(mci, level, fmt, arg...) \ edac_mc_chipset_printk(mci, level, "i7300", fmt, ##arg) +/*********************************************** + * i7300 Limit constants Structs and static vars + ***********************************************/ + /* * Memory topology is organized as: * Branch 0 - 2 channels: channels 0 and 1 (FDB0 PCI dev 21.0) @@ -67,18 +71,64 @@ #define to_csrow(slot, ch, branch) \ (to_channel(ch, branch) | ((slot) << 2)) +/* Device name and register DID (Device ID) */ +struct i7300_dev_info { + const char *ctl_name; /* name for this device */ + u16 fsb_mapping_errors; /* DID for the branchmap,control */ +}; + +/* Table of devices attributes supported by this driver */ +static const struct i7300_dev_info i7300_devs[] = { + { + .ctl_name = "I7300", + .fsb_mapping_errors = PCI_DEVICE_ID_INTEL_I7300_MCH_ERR, + }, +}; + +struct i7300_dimm_info { + int megabytes; /* size, 0 means not present */ +}; + +/* driver private data structure */ +struct i7300_pvt { + struct pci_dev *pci_dev_16_0_fsb_ctlr; /* 16.0 */ + struct pci_dev *pci_dev_16_1_fsb_addr_map; /* 16.1 */ + struct pci_dev *pci_dev_16_2_fsb_err_regs; /* 16.2 */ + struct pci_dev *pci_dev_2x_0_fbd_branch[MAX_BRANCHES]; /* 21.0 and 22.0 */ + + u16 tolm; /* top of low memory */ + u64 ambase; /* AMB BAR */ + + u32 mc_settings; /* Report several settings */ + u32 mc_settings_a; + + u16 mir[MAX_MIR]; /* Memory Interleave Reg*/ + + u16 mtr[MAX_SLOTS][MAX_BRANCHES]; /* Memory Technlogy Reg */ + u16 ambpresent[MAX_CHANNELS]; /* AMB present regs */ + + /* DIMM information matrix, allocating architecture maximums */ + struct i7300_dimm_info dimm_info[MAX_SLOTS][MAX_CHANNELS]; + + /* Temporary buffer for use when preparing error messages */ + char *tmp_prt_buffer; +}; + +/* FIXME: Why do we need to have this static? */ +static struct edac_pci_ctl_info *i7300_pci; + +/*************************************************** + * i7300 Register definitions for memory enumeration + ***************************************************/ + /* - * I7300 devices + * I7300 devices: * All 3 functions of Device 16 (0,1,2) share the SAME DID and - * uses PCI_DEVICE_ID_INTEL_I7300_MCH_ERR for device 16 (0,1,2), - * PCI_DEVICE_ID_INTEL_I7300_MCH_FB0 and PCI_DEVICE_ID_INTEL_I7300_MCH_FB1 - * for device 21 (0,1). + * uses PCI_DEVICE_ID_INTEL_I7300_MCH_ERR for device 16 (0,1,2). + * PCI_DEVICE_ID_INTEL_I7300_MCH_FB0 is used for device 21 (0,1) + * and PCI_DEVICE_ID_INTEL_I7300_MCH_FB1 is used for device 21 (0,1). */ -/**************************************************** - * i7300 Register definitions for memory enumberation - ****************************************************/ - /* * Device 16, * Function 0: System Address (not documented) @@ -125,7 +175,8 @@ const static u16 mtr_regs [MAX_SLOTS] = { 0x82, 0x86, 0x8a, 0x8e }; -/* Defines to extract the vaious fields from the +/* + * Defines to extract the vaious fields from the * MTRx - Memory Technology Registers */ #define MTR_DIMMS_PRESENT(mtr) ((mtr) & (1 << 8)) @@ -290,53 +341,6 @@ static const char *ferr_global_lo_name[] = { #define RECMEMB_CAS(v) (((v) >> 16) & 0x1fff) #define RECMEMB_RAS(v) ((v) & 0xffff) - -/* Device name and register DID (Device ID) */ -struct i7300_dev_info { - const char *ctl_name; /* name for this device */ - u16 fsb_mapping_errors; /* DID for the branchmap,control */ -}; - -/* Table of devices attributes supported by this driver */ -static const struct i7300_dev_info i7300_devs[] = { - { - .ctl_name = "I7300", - .fsb_mapping_errors = PCI_DEVICE_ID_INTEL_I7300_MCH_ERR, - }, -}; - -struct i7300_dimm_info { - int megabytes; /* size, 0 means not present */ -}; - -/* driver private data structure */ -struct i7300_pvt { - struct pci_dev *pci_dev_16_0_fsb_ctlr; /* 16.0 */ - struct pci_dev *pci_dev_16_1_fsb_addr_map; /* 16.1 */ - struct pci_dev *pci_dev_16_2_fsb_err_regs; /* 16.2 */ - struct pci_dev *pci_dev_2x_0_fbd_branch[MAX_BRANCHES]; /* 21.0 and 22.0 */ - - u16 tolm; /* top of low memory */ - u64 ambase; /* AMB BAR */ - - u32 mc_settings; /* Report several settings */ - u32 mc_settings_a; - - u16 mir[MAX_MIR]; /* Memory Interleave Reg*/ - - u16 mtr[MAX_SLOTS][MAX_BRANCHES]; /* Memory Technlogy Reg */ - u16 ambpresent[MAX_CHANNELS]; /* AMB present regs */ - - /* DIMM information matrix, allocating architecture maximums */ - struct i7300_dimm_info dimm_info[MAX_SLOTS][MAX_CHANNELS]; - - /* Temporary buffer for use when preparing error messages */ - char *tmp_prt_buffer; -}; - -/* FIXME: Why do we need to have this static? */ -static struct edac_pci_ctl_info *i7300_pci; - /******************************************** * i7300 Functions related to error detection ********************************************/ -- cgit v1.2.3 From d091a6eb177dd3da8f55f8fd73c5b80db0e1656f Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Fri, 27 Aug 2010 17:28:50 -0300 Subject: i7300_edac: Improve comments This is basically a cleanup patch, improving the comments for each function. While here, do a few cleanups. Signed-off-by: Mauro Carvalho Chehab --- drivers/edac/i7300_edac.c | 220 ++++++++++++++++++++++------------------------ 1 file changed, 104 insertions(+), 116 deletions(-) diff --git a/drivers/edac/i7300_edac.c b/drivers/edac/i7300_edac.c index 6278209fec07..a2a9ad499b6b 100644 --- a/drivers/edac/i7300_edac.c +++ b/drivers/edac/i7300_edac.c @@ -121,14 +121,6 @@ static struct edac_pci_ctl_info *i7300_pci; * i7300 Register definitions for memory enumeration ***************************************************/ -/* - * I7300 devices: - * All 3 functions of Device 16 (0,1,2) share the SAME DID and - * uses PCI_DEVICE_ID_INTEL_I7300_MCH_ERR for device 16 (0,1,2). - * PCI_DEVICE_ID_INTEL_I7300_MCH_FB0 is used for device 21 (0,1) - * and PCI_DEVICE_ID_INTEL_I7300_MCH_FB1 is used for device 21 (0,1). - */ - /* * Device 16, * Function 0: System Address (not documented) @@ -345,9 +337,24 @@ static const char *ferr_global_lo_name[] = { * i7300 Functions related to error detection ********************************************/ -const char *get_err_from_table(const char *table[], int size, int pos) +/** + * get_err_from_table() - Gets the error message from a table + * @table: table name (array of char *) + * @size: number of elements at the table + * @pos: position of the element to be returned + * + * This is a small routine that gets the pos-th element of a table. If the + * element doesn't exist (or it is empty), it returns "reserved". + * Instead of calling it directly, the better is to call via the macro + * GET_ERR_FROM_TABLE(), that automatically checks the table size via + * ARRAY_SIZE() macro + */ +static const char *get_err_from_table(const char *table[], int size, int pos) { - if (pos >= size) + if (unlikely(pos >= size)) + return "Reserved"; + + if (unlikely(!table[pos])) return "Reserved"; return table[pos]; @@ -356,10 +363,11 @@ const char *get_err_from_table(const char *table[], int size, int pos) #define GET_ERR_FROM_TABLE(table, pos) \ get_err_from_table(table, ARRAY_SIZE(table), pos) -/* - * i7300_process_error_global Retrieve the hardware error information from - * the hardware and cache it in the 'info' - * structure +/** + * i7300_process_error_global() - Retrieve the hardware error information from + * the hardware global error registers and + * sends it to dmesg + * @mci: struct mem_ctl_info pointer */ static void i7300_process_error_global(struct mem_ctl_info *mci) { @@ -410,10 +418,11 @@ error_global: is_fatal ? "Fatal" : "NOT fatal", specific); } -/* - * i7300_process_fbd_error Retrieve the hardware error information from - * the hardware and cache it in the 'info' - * structure +/** + * i7300_process_fbd_error() - Retrieve the hardware error information from + * the FBD error registers and sends it via + * EDAC error API calls + * @mci: struct mem_ctl_info pointer */ static void i7300_process_fbd_error(struct mem_ctl_info *mci) { @@ -524,10 +533,9 @@ static void i7300_process_fbd_error(struct mem_ctl_info *mci) return; } -/* - * i7300_check_error Retrieve the hardware error information from - * the hardware and cache it in the 'info' - * structure +/** + * i7300_check_error() - Calls the error checking subroutines + * @mci: struct mem_ctl_info pointer */ static void i7300_check_error(struct mem_ctl_info *mci) { @@ -535,11 +543,9 @@ static void i7300_check_error(struct mem_ctl_info *mci) i7300_process_fbd_error(mci); }; -/* - * i7300_clear_error Retrieve any error from the hardware - * but do NOT process that error. - * Used for 'clearing' out of previous errors - * Called by the Core module. +/** + * i7300_clear_error() - Clears the error registers + * @mci: struct mem_ctl_info pointer */ static void i7300_clear_error(struct mem_ctl_info *mci) { @@ -573,9 +579,10 @@ static void i7300_clear_error(struct mem_ctl_info *mci) FERR_NF_FBD, value); } -/* - * i7300_enable_error_reporting - * Turn on the memory reporting features of the hardware +/** + * i7300_enable_error_reporting() - Enable the memory reporting logic at the + * hardware + * @mci: struct mem_ctl_info pointer */ static void i7300_enable_error_reporting(struct mem_ctl_info *mci) { @@ -597,10 +604,14 @@ static void i7300_enable_error_reporting(struct mem_ctl_info *mci) * i7300 Functions related to memory enumberation ************************************************/ -/* - * determine_mtr(pvt, csrow, channel) - * - * return the proper MTR register as determine by the csrow and desired channel +/** + * decode_mtr() - Decodes the MTR descriptor, filling the edac structs + * @pvt: pointer to the private data struct used by i7300 driver + * @slot: DIMM slot (0 to 7) + * @ch: Channel number within the branch (0 or 1) + * @branch: Branch number (0 or 1) + * @dinfo: Pointer to DIMM info where dimm size is stored + * @p_csrow: Pointer to the struct csrow_info that corresponds to that element */ static int decode_mtr(struct i7300_pvt *pvt, int slot, int ch, int branch, @@ -619,14 +630,8 @@ static int decode_mtr(struct i7300_pvt *pvt, ans ? "Present" : "NOT Present"); /* Determine if there is a DIMM present in this DIMM slot */ - -#if 0 - if (!amb_present || !ans) - return 0; -#else if (!ans) return 0; -#endif /* Start with the number of bits for a Bank * on the DRAM */ @@ -692,14 +697,15 @@ static int decode_mtr(struct i7300_pvt *pvt, return mtr; } -/* - * print_dimm_size +/** + * print_dimm_size() - Prints dump of the memory organization + * @pvt: pointer to the private data struct used by i7300 driver * - * also will output a DIMM matrix map, if debug is enabled, for viewing - * how the DIMMs are populated + * Useful for debug. If debug is disabled, this routine do nothing */ static void print_dimm_size(struct i7300_pvt *pvt) { +#ifdef CONFIG_EDAC_DEBUG struct i7300_dimm_info *dinfo; char *p; int space, n; @@ -751,30 +757,26 @@ static void print_dimm_size(struct i7300_pvt *pvt) debugf2("%s\n", pvt->tmp_prt_buffer); p = pvt->tmp_prt_buffer; space = PAGE_SIZE; +#endif } -/* - * i7300_init_csrows Initialize the 'csrows' table within - * the mci control structure with the - * addressing of memory. - * - * return: - * 0 success - * 1 no actual memory found on this MC +/** + * i7300_init_csrows() - Initialize the 'csrows' table within + * the mci control structure with the + * addressing of memory. + * @mci: struct mem_ctl_info pointer */ static int i7300_init_csrows(struct mem_ctl_info *mci) { struct i7300_pvt *pvt; struct i7300_dimm_info *dinfo; struct csrow_info *p_csrow; - int empty; + int rc = -ENODEV; int mtr; int ch, branch, slot, channel; pvt = mci->pvt_info; - empty = 1; /* Assume NO memory */ - debugf2("Memory Technology Registers:\n"); /* Get the AMB present registers for the four channels */ @@ -819,14 +821,19 @@ static int i7300_init_csrows(struct mem_ctl_info *mci) p_csrow->last_page = 9 + slot * 20; p_csrow->page_mask = 0xfff; - empty = 0; + rc = 0; } } } - return empty; + return rc; } +/** + * decode_mir() - Decodes Memory Interleave Register (MIR) info + * @int mir_no: number of the MIR register to decode + * @mir: array with the MIR data cached on the driver + */ static void decode_mir(int mir_no, u16 mir[MAX_MIR]) { if (mir[mir_no] & 3) @@ -837,11 +844,11 @@ static void decode_mir(int mir_no, u16 mir[MAX_MIR]) (mir[mir_no] & 2) ? "B1": ""); } -/* - * i7300_get_mc_regs read in the necessary registers and - * cache locally +/** + * i7300_get_mc_regs() - Get the contents of the MC enumeration registers + * @mci: struct mem_ctl_info pointer * - * Fills in the private data members + * Data read is cached internally for its usage when needed */ static int i7300_get_mc_regs(struct mem_ctl_info *mci) { @@ -907,9 +914,9 @@ static int i7300_get_mc_regs(struct mem_ctl_info *mci) * i7300 Functions related to device probe/release *************************************************/ -/* - * i7300_put_devices 'put' all the devices that we have - * reserved via 'get' +/** + * i7300_put_devices() - Release the PCI devices + * @mci: struct mem_ctl_info pointer */ static void i7300_put_devices(struct mem_ctl_info *mci) { @@ -925,13 +932,18 @@ static void i7300_put_devices(struct mem_ctl_info *mci) pci_dev_put(pvt->pci_dev_16_1_fsb_addr_map); } -/* - * i7300_get_devices Find and perform 'get' operation on the MCH's - * device/functions we want to reference for this driver +/** + * i7300_get_devices() - Find and perform 'get' operation on the MCH's + * device/functions we want to reference for this driver + * @mci: struct mem_ctl_info pointer * - * Need to 'get' device 16 func 1 and func 2 + * Access and prepare the several devices for usage: + * I7300 devices used by this driver: + * Device 16, functions 0,1 and 2: PCI_DEVICE_ID_INTEL_I7300_MCH_ERR + * Device 21 function 0: PCI_DEVICE_ID_INTEL_I7300_MCH_FB0 + * Device 22 function 0: PCI_DEVICE_ID_INTEL_I7300_MCH_FB1 */ -static int i7300_get_devices(struct mem_ctl_info *mci, int dev_idx) +static int __devinit i7300_get_devices(struct mem_ctl_info *mci) { struct i7300_pvt *pvt; struct pci_dev *pdev; @@ -1007,23 +1019,25 @@ error: return -ENODEV; } -/* - * i7300_probe1 Probe for ONE instance of device to see if it is - * present. - * return: - * 0 for FOUND a device - * < 0 for error code +/** + * i7300_init_one() - Probe for one instance of the device + * @pdev: struct pci_dev pointer + * @id: struct pci_device_id pointer - currently unused */ -static int i7300_probe1(struct pci_dev *pdev, int dev_idx) +static int __devinit i7300_init_one(struct pci_dev *pdev, + const struct pci_device_id *id) { struct mem_ctl_info *mci; struct i7300_pvt *pvt; int num_channels; int num_dimms_per_channel; int num_csrows; + int rc; - if (dev_idx >= ARRAY_SIZE(i7300_devs)) - return -EINVAL; + /* wake up device */ + rc = pci_enable_device(pdev); + if (rc == -EIO) + return rc; debugf0("MC: " __FILE__ ": %s(), pdev bus %u dev=0x%x fn=0x%x\n", __func__, @@ -1068,7 +1082,7 @@ static int i7300_probe1(struct pci_dev *pdev, int dev_idx) } /* 'get' the pci devices we want to reserve for our use */ - if (i7300_get_devices(mci, dev_idx)) + if (i7300_get_devices(mci)) goto fail0; mci->mc_idx = 0; @@ -1077,7 +1091,7 @@ static int i7300_probe1(struct pci_dev *pdev, int dev_idx) mci->edac_cap = EDAC_FLAG_NONE; mci->mod_name = "i7300_edac.c"; mci->mod_ver = I7300_REVISION; - mci->ctl_name = i7300_devs[dev_idx].ctl_name; + mci->ctl_name = i7300_devs[0].ctl_name; mci->dev_name = pci_name(pdev); mci->ctl_page_to_phys = NULL; @@ -1132,32 +1146,9 @@ fail0: return -ENODEV; } -/* - * i7300_init_one constructor for one instance of device - * - * returns: - * negative on error - * count (>= 0) - */ -static int __devinit i7300_init_one(struct pci_dev *pdev, - const struct pci_device_id *id) -{ - int rc; - - debugf0("MC: " __FILE__ ": %s()\n", __func__); - - /* wake up device */ - rc = pci_enable_device(pdev); - if (rc == -EIO) - return rc; - - /* now probe and enable the device */ - return i7300_probe1(pdev, id->driver_data); -} - -/* - * i7300_remove_one destructor for one instance of device - * +/** + * i7300_remove_one() - Remove the driver + * @pdev: struct pci_dev pointer */ static void __devexit i7300_remove_one(struct pci_dev *pdev) { @@ -1183,9 +1174,9 @@ static void __devexit i7300_remove_one(struct pci_dev *pdev) } /* - * pci_device_id table for which devices we are looking for + * pci_device_id: table for which devices we are looking for * - * The "E500P" device is the first device supported. + * Has only 8086:360c PCI ID */ static const struct pci_device_id i7300_pci_tbl[] __devinitdata = { {PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_I7300_MCH_ERR)}, @@ -1195,8 +1186,7 @@ static const struct pci_device_id i7300_pci_tbl[] __devinitdata = { MODULE_DEVICE_TABLE(pci, i7300_pci_tbl); /* - * i7300_driver pci_driver structure for this module - * + * i7300_driver: pci_driver structure for this module */ static struct pci_driver i7300_driver = { .name = "i7300_edac", @@ -1205,9 +1195,8 @@ static struct pci_driver i7300_driver = { .id_table = i7300_pci_tbl, }; -/* - * i7300_init Module entry function - * Try to initialize this module for its devices +/** + * i7300_init() - Registers the driver */ static int __init i7300_init(void) { @@ -1223,9 +1212,8 @@ static int __init i7300_init(void) return (pci_rc < 0) ? pci_rc : 0; } -/* - * i7300_exit() Module exit function - * Unregister the driver +/** + * i7300_init() - Unregisters the driver */ static void __exit i7300_exit(void) { -- cgit v1.2.3 From 9c6f6b65d25aa7fe890377a92ea049c8e20da906 Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Fri, 27 Aug 2010 17:43:43 -0300 Subject: i7300-edac: CodingStyle cleanup Signed-off-by: Mauro Carvalho Chehab --- drivers/edac/i7300_edac.c | 42 ++++++++++++++++++++++++++---------------- 1 file changed, 26 insertions(+), 16 deletions(-) diff --git a/drivers/edac/i7300_edac.c b/drivers/edac/i7300_edac.c index a2a9ad499b6b..38920c0b5fbb 100644 --- a/drivers/edac/i7300_edac.c +++ b/drivers/edac/i7300_edac.c @@ -104,7 +104,7 @@ struct i7300_pvt { u16 mir[MAX_MIR]; /* Memory Interleave Reg*/ - u16 mtr[MAX_SLOTS][MAX_BRANCHES]; /* Memory Technlogy Reg */ + u16 mtr[MAX_SLOTS][MAX_BRANCHES]; /* Memory Technlogy Reg */ u16 ambpresent[MAX_CHANNELS]; /* AMB present regs */ /* DIMM information matrix, allocating architecture maximums */ @@ -162,7 +162,7 @@ static struct edac_pci_ctl_info *i7300_pci; #define AMBPRESENT_0 0x64 #define AMBPRESENT_1 0x66 -const static u16 mtr_regs [MAX_SLOTS] = { +const static u16 mtr_regs[MAX_SLOTS] = { 0x80, 0x84, 0x88, 0x8c, 0x82, 0x86, 0x8a, 0x8e }; @@ -726,7 +726,7 @@ static void print_dimm_size(struct i7300_pvt *pvt) p = pvt->tmp_prt_buffer; space = PAGE_SIZE; n = snprintf(p, space, "-------------------------------" - "------------------------------"); + "------------------------------"); p += n; space -= n; debugf2("%s\n", pvt->tmp_prt_buffer); @@ -751,7 +751,7 @@ static void print_dimm_size(struct i7300_pvt *pvt) } n = snprintf(p, space, "-------------------------------" - "------------------------------"); + "------------------------------"); p += n; space -= n; debugf2("%s\n", pvt->tmp_prt_buffer); @@ -783,13 +783,15 @@ static int i7300_init_csrows(struct mem_ctl_info *mci) for (branch = 0; branch < MAX_BRANCHES; branch++) { /* Read and dump branch 0's MTRs */ channel = to_channel(0, branch); - pci_read_config_word(pvt->pci_dev_2x_0_fbd_branch[branch], AMBPRESENT_0, + pci_read_config_word(pvt->pci_dev_2x_0_fbd_branch[branch], + AMBPRESENT_0, &pvt->ambpresent[channel]); debugf2("\t\tAMB-present CH%d = 0x%x:\n", channel, pvt->ambpresent[channel]); channel = to_channel(1, branch); - pci_read_config_word(pvt->pci_dev_2x_0_fbd_branch[branch], AMBPRESENT_1, + pci_read_config_word(pvt->pci_dev_2x_0_fbd_branch[branch], + AMBPRESENT_1, &pvt->ambpresent[channel]); debugf2("\t\tAMB-present CH%d = 0x%x:\n", channel, pvt->ambpresent[channel]); @@ -837,11 +839,12 @@ static int i7300_init_csrows(struct mem_ctl_info *mci) static void decode_mir(int mir_no, u16 mir[MAX_MIR]) { if (mir[mir_no] & 3) - debugf2("MIR%d: limit= 0x%x Branch(es) that participate: %s %s\n", + debugf2("MIR%d: limit= 0x%x Branch(es) that participate:" + " %s %s\n", mir_no, (mir[mir_no] >> 4) & 0xfff, (mir[mir_no] & 1) ? "B0" : "", - (mir[mir_no] & 2) ? "B1": ""); + (mir[mir_no] & 2) ? "B1" : ""); } /** @@ -891,9 +894,12 @@ static int i7300_get_mc_regs(struct mem_ctl_info *mci) IS_RETRY_ENABLED(pvt->mc_settings) ? "enabled" : "disabled"); /* Get Memory Interleave Range registers */ - pci_read_config_word(pvt->pci_dev_16_1_fsb_addr_map, MIR0, &pvt->mir[0]); - pci_read_config_word(pvt->pci_dev_16_1_fsb_addr_map, MIR1, &pvt->mir[1]); - pci_read_config_word(pvt->pci_dev_16_1_fsb_addr_map, MIR2, &pvt->mir[2]); + pci_read_config_word(pvt->pci_dev_16_1_fsb_addr_map, MIR0, + &pvt->mir[0]); + pci_read_config_word(pvt->pci_dev_16_1_fsb_addr_map, MIR1, + &pvt->mir[1]); + pci_read_config_word(pvt->pci_dev_16_1_fsb_addr_map, MIR2, + &pvt->mir[2]); /* Decode the MIR regs */ for (i = 0; i < MAX_MIR; i++) @@ -952,7 +958,8 @@ static int __devinit i7300_get_devices(struct mem_ctl_info *mci) /* Attempt to 'get' the MCH register we want */ pdev = NULL; - while (!pvt->pci_dev_16_1_fsb_addr_map || !pvt->pci_dev_16_2_fsb_err_regs) { + while (!pvt->pci_dev_16_1_fsb_addr_map || + !pvt->pci_dev_16_2_fsb_err_regs) { pdev = pci_get_device(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_I7300_MCH_ERR, pdev); if (!pdev) { @@ -980,16 +987,19 @@ static int __devinit i7300_get_devices(struct mem_ctl_info *mci) debugf1("System Address, processor bus- PCI Bus ID: %s %x:%x\n", pci_name(pvt->pci_dev_16_0_fsb_ctlr), - pvt->pci_dev_16_0_fsb_ctlr->vendor, pvt->pci_dev_16_0_fsb_ctlr->device); + pvt->pci_dev_16_0_fsb_ctlr->vendor, + pvt->pci_dev_16_0_fsb_ctlr->device); debugf1("Branchmap, control and errors - PCI Bus ID: %s %x:%x\n", pci_name(pvt->pci_dev_16_1_fsb_addr_map), - pvt->pci_dev_16_1_fsb_addr_map->vendor, pvt->pci_dev_16_1_fsb_addr_map->device); + pvt->pci_dev_16_1_fsb_addr_map->vendor, + pvt->pci_dev_16_1_fsb_addr_map->device); debugf1("FSB Error Regs - PCI Bus ID: %s %x:%x\n", pci_name(pvt->pci_dev_16_2_fsb_err_regs), - pvt->pci_dev_16_2_fsb_err_regs->vendor, pvt->pci_dev_16_2_fsb_err_regs->device); + pvt->pci_dev_16_2_fsb_err_regs->vendor, + pvt->pci_dev_16_2_fsb_err_regs->device); pvt->pci_dev_2x_0_fbd_branch[0] = pci_get_device(PCI_VENDOR_ID_INTEL, - PCI_DEVICE_ID_INTEL_I7300_MCH_FB0, + PCI_DEVICE_ID_INTEL_I7300_MCH_FB0, NULL); if (!pvt->pci_dev_2x_0_fbd_branch[0]) { i7300_printk(KERN_ERR, -- cgit v1.2.3 From 3c9c92b6b5016a2e02c3438386b1c5efe7f588e4 Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Wed, 22 Sep 2010 09:36:54 -0300 Subject: MAINTAINERS: Add maintainer for i7300-edac driver Signed-off-by: Mauro Carvalho Chehab --- MAINTAINERS | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/MAINTAINERS b/MAINTAINERS index b5b8baa1d70e..c3461855eb34 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -2140,6 +2140,13 @@ W: bluesmoke.sourceforge.net S: Maintained F: drivers/edac/i5400_edac.c +EDAC-I7300 +M: Mauro Carvalho Chehab +L: linux-edac@vger.kernel.org +W: bluesmoke.sourceforge.net +S: Maintained +F: drivers/edac/i7300_edac.c + EDAC-I7CORE M: Mauro Carvalho Chehab L: linux-edac@vger.kernel.org -- cgit v1.2.3 From 1aa4a7b6b082adbfa704988dd098bc96b8837d5a Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Fri, 24 Sep 2010 11:29:02 -0300 Subject: V4L/DVB: i7300_edac: better initialize page counts It is still somewhat fake, as the pages may not be on this exact order, and may even be used in mirror mode, but this is a best guess than the other random fake values. Signed-off-by: Mauro Carvalho Chehab --- drivers/edac/i7300_edac.c | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/drivers/edac/i7300_edac.c b/drivers/edac/i7300_edac.c index 38920c0b5fbb..38d9cb8fad7c 100644 --- a/drivers/edac/i7300_edac.c +++ b/drivers/edac/i7300_edac.c @@ -616,7 +616,8 @@ static void i7300_enable_error_reporting(struct mem_ctl_info *mci) static int decode_mtr(struct i7300_pvt *pvt, int slot, int ch, int branch, struct i7300_dimm_info *dinfo, - struct csrow_info *p_csrow) + struct csrow_info *p_csrow, + u32 *last_page) { int mtr, ans, addrBits, channel; @@ -663,6 +664,11 @@ static int decode_mtr(struct i7300_pvt *pvt, p_csrow->grain = 8; p_csrow->nr_pages = dinfo->megabytes << 8; p_csrow->mtype = MEM_FB_DDR2; + p_csrow->csrow_idx = slot; + p_csrow->first_page = *last_page; + *last_page += p_csrow->nr_pages; + p_csrow->last_page = *last_page; + p_csrow->page_mask = 0; /* * The type of error detection actually depends of the @@ -774,6 +780,7 @@ static int i7300_init_csrows(struct mem_ctl_info *mci) int rc = -ENODEV; int mtr; int ch, branch, slot, channel; + u32 last_page = 0; pvt = mci->pvt_info; @@ -811,18 +818,11 @@ static int i7300_init_csrows(struct mem_ctl_info *mci) p_csrow = &mci->csrows[slot]; mtr = decode_mtr(pvt, slot, ch, branch, - dinfo, p_csrow); + dinfo, p_csrow, &last_page); /* if no DIMMS on this row, continue */ if (!MTR_DIMMS_PRESENT(mtr)) continue; - p_csrow->csrow_idx = slot; - - /* FAKE OUT VALUES, FIXME */ - p_csrow->first_page = 0 + slot * 20; - p_csrow->last_page = 9 + slot * 20; - p_csrow->page_mask = 0xfff; - rc = 0; } } -- cgit v1.2.3 From e6649cc62949f1ed473bf1131fa425cfe72d3f64 Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Fri, 24 Sep 2010 11:53:52 -0300 Subject: i7300_edac: Properly initialize per-csrow memory size Due to the current edac-core limits, we cannot represent a per-channel memory size, for FB-DIMM drivers. So, we need to sum-up all values for each slot, in order to properly represent the total amount of memory found by the i7300 driver. Signed-off-by: Mauro Carvalho Chehab --- drivers/edac/i7300_edac.c | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/drivers/edac/i7300_edac.c b/drivers/edac/i7300_edac.c index 38d9cb8fad7c..05523b504271 100644 --- a/drivers/edac/i7300_edac.c +++ b/drivers/edac/i7300_edac.c @@ -617,7 +617,7 @@ static int decode_mtr(struct i7300_pvt *pvt, int slot, int ch, int branch, struct i7300_dimm_info *dinfo, struct csrow_info *p_csrow, - u32 *last_page) + u32 *nr_pages) { int mtr, ans, addrBits, channel; @@ -649,6 +649,7 @@ static int decode_mtr(struct i7300_pvt *pvt, addrBits -= 3; /* 8 bits per bytes */ dinfo->megabytes = 1 << addrBits; + *nr_pages = dinfo->megabytes << 8; debugf2("\t\tWIDTH: x%d\n", MTR_DRAM_WIDTH(mtr)); @@ -662,12 +663,8 @@ static int decode_mtr(struct i7300_pvt *pvt, debugf2("\t\tSIZE: %d MB\n", dinfo->megabytes); p_csrow->grain = 8; - p_csrow->nr_pages = dinfo->megabytes << 8; p_csrow->mtype = MEM_FB_DDR2; p_csrow->csrow_idx = slot; - p_csrow->first_page = *last_page; - *last_page += p_csrow->nr_pages; - p_csrow->last_page = *last_page; p_csrow->page_mask = 0; /* @@ -780,7 +777,7 @@ static int i7300_init_csrows(struct mem_ctl_info *mci) int rc = -ENODEV; int mtr; int ch, branch, slot, channel; - u32 last_page = 0; + u32 last_page = 0, nr_pages; pvt = mci->pvt_info; @@ -818,11 +815,17 @@ static int i7300_init_csrows(struct mem_ctl_info *mci) p_csrow = &mci->csrows[slot]; mtr = decode_mtr(pvt, slot, ch, branch, - dinfo, p_csrow, &last_page); + dinfo, p_csrow, &nr_pages); /* if no DIMMS on this row, continue */ if (!MTR_DIMMS_PRESENT(mtr)) continue; + /* Update per_csrow memory count */ + p_csrow->nr_pages += nr_pages; + p_csrow->first_page = last_page; + last_page += nr_pages; + p_csrow->last_page = last_page; + rc = 0; } } -- cgit v1.2.3