cachepc-linux

Fork of AMDESE/linux with modifications for CachePC side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-linux
Log | Files | Refs | README | LICENSE | sfeed.txt

i5400_edac.c (40744B)


      1/*
      2 * Intel 5400 class Memory Controllers kernel module (Seaburg)
      3 *
      4 * This file may be distributed under the terms of the
      5 * GNU General Public License.
      6 *
      7 * Copyright (c) 2008 by:
      8 *	 Ben Woodard <woodard@redhat.com>
      9 *	 Mauro Carvalho Chehab
     10 *
     11 * Red Hat Inc. https://www.redhat.com
     12 *
     13 * Forked and adapted from the i5000_edac driver which was
     14 * written by Douglas Thompson Linux Networx <norsk5@xmission.com>
     15 *
     16 * This module is based on the following document:
     17 *
     18 * Intel 5400 Chipset Memory Controller Hub (MCH) - Datasheet
     19 * 	http://developer.intel.com/design/chipsets/datashts/313070.htm
     20 *
     21 * This Memory Controller manages DDR2 FB-DIMMs. It has 2 branches, each with
     22 * 2 channels operating in lockstep no-mirror mode. Each channel can have up to
     23 * 4 dimm's, each with up to 8GB.
     24 *
     25 */
     26
     27#include <linux/module.h>
     28#include <linux/init.h>
     29#include <linux/pci.h>
     30#include <linux/pci_ids.h>
     31#include <linux/slab.h>
     32#include <linux/edac.h>
     33#include <linux/mmzone.h>
     34
     35#include "edac_module.h"
     36
     37/*
     38 * Alter this version for the I5400 module when modifications are made
     39 */
     40#define I5400_REVISION    " Ver: 1.0.0"
     41
     42#define EDAC_MOD_STR      "i5400_edac"
     43
     44#define i5400_printk(level, fmt, arg...) \
     45	edac_printk(level, "i5400", fmt, ##arg)
     46
     47#define i5400_mc_printk(mci, level, fmt, arg...) \
     48	edac_mc_chipset_printk(mci, level, "i5400", fmt, ##arg)
     49
     50/* Limits for i5400 */
     51#define MAX_BRANCHES		2
     52#define CHANNELS_PER_BRANCH	2
     53#define DIMMS_PER_CHANNEL	4
     54#define	MAX_CHANNELS		(MAX_BRANCHES * CHANNELS_PER_BRANCH)
     55
     56/* Device 16,
     57 * Function 0: System Address
     58 * Function 1: Memory Branch Map, Control, Errors Register
     59 * Function 2: FSB Error Registers
     60 *
     61 * All 3 functions of Device 16 (0,1,2) share the SAME DID and
     62 * uses PCI_DEVICE_ID_INTEL_5400_ERR for device 16 (0,1,2),
     63 * PCI_DEVICE_ID_INTEL_5400_FBD0 and PCI_DEVICE_ID_INTEL_5400_FBD1
     64 * for device 21 (0,1).
     65 */
     66
     67	/* OFFSETS for Function 0 */
     68#define		AMBASE			0x48 /* AMB Mem Mapped Reg Region Base */
     69#define		MAXCH			0x56 /* Max Channel Number */
     70#define		MAXDIMMPERCH		0x57 /* Max DIMM PER Channel Number */
     71
     72	/* OFFSETS for Function 1 */
     73#define		TOLM			0x6C
     74#define		REDMEMB			0x7C
     75#define			REC_ECC_LOCATOR_ODD(x)	((x) & 0x3fe00) /* bits [17:9] indicate ODD, [8:0]  indicate EVEN */
     76#define		MIR0			0x80
     77#define		MIR1			0x84
     78#define		AMIR0			0x8c
     79#define		AMIR1			0x90
     80
     81	/* Fatal error registers */
     82#define		FERR_FAT_FBD		0x98	/* also called as FERR_FAT_FB_DIMM at datasheet */
     83#define			FERR_FAT_FBDCHAN (3<<28)	/* channel index where the highest-order error occurred */
     84
     85#define		NERR_FAT_FBD		0x9c
     86#define		FERR_NF_FBD		0xa0	/* also called as FERR_NFAT_FB_DIMM at datasheet */
     87
     88	/* Non-fatal error register */
     89#define		NERR_NF_FBD		0xa4
     90
     91	/* Enable error mask */
     92#define		EMASK_FBD		0xa8
     93
     94#define		ERR0_FBD		0xac
     95#define		ERR1_FBD		0xb0
     96#define		ERR2_FBD		0xb4
     97#define		MCERR_FBD		0xb8
     98
     99	/* No OFFSETS for Device 16 Function 2 */
    100
    101/*
    102 * Device 21,
    103 * Function 0: Memory Map Branch 0
    104 *
    105 * Device 22,
    106 * Function 0: Memory Map Branch 1
    107 */
    108
    109	/* OFFSETS for Function 0 */
    110#define AMBPRESENT_0	0x64
    111#define AMBPRESENT_1	0x66
    112#define MTR0		0x80
    113#define MTR1		0x82
    114#define MTR2		0x84
    115#define MTR3		0x86
    116
    117	/* OFFSETS for Function 1 */
    118#define NRECFGLOG		0x74
    119#define RECFGLOG		0x78
    120#define NRECMEMA		0xbe
    121#define NRECMEMB		0xc0
    122#define NRECFB_DIMMA		0xc4
    123#define NRECFB_DIMMB		0xc8
    124#define NRECFB_DIMMC		0xcc
    125#define NRECFB_DIMMD		0xd0
    126#define NRECFB_DIMME		0xd4
    127#define NRECFB_DIMMF		0xd8
    128#define REDMEMA			0xdC
    129#define RECMEMA			0xf0
    130#define RECMEMB			0xf4
    131#define RECFB_DIMMA		0xf8
    132#define RECFB_DIMMB		0xec
    133#define RECFB_DIMMC		0xf0
    134#define RECFB_DIMMD		0xf4
    135#define RECFB_DIMME		0xf8
    136#define RECFB_DIMMF		0xfC
    137
    138/*
    139 * Error indicator bits and masks
    140 * Error masks are according with Table 5-17 of i5400 datasheet
    141 */
    142
    143enum error_mask {
    144	EMASK_M1  = 1<<0,  /* Memory Write error on non-redundant retry */
    145	EMASK_M2  = 1<<1,  /* Memory or FB-DIMM configuration CRC read error */
    146	EMASK_M3  = 1<<2,  /* Reserved */
    147	EMASK_M4  = 1<<3,  /* Uncorrectable Data ECC on Replay */
    148	EMASK_M5  = 1<<4,  /* Aliased Uncorrectable Non-Mirrored Demand Data ECC */
    149	EMASK_M6  = 1<<5,  /* Unsupported on i5400 */
    150	EMASK_M7  = 1<<6,  /* Aliased Uncorrectable Resilver- or Spare-Copy Data ECC */
    151	EMASK_M8  = 1<<7,  /* Aliased Uncorrectable Patrol Data ECC */
    152	EMASK_M9  = 1<<8,  /* Non-Aliased Uncorrectable Non-Mirrored Demand Data ECC */
    153	EMASK_M10 = 1<<9,  /* Unsupported on i5400 */
    154	EMASK_M11 = 1<<10, /* Non-Aliased Uncorrectable Resilver- or Spare-Copy Data ECC  */
    155	EMASK_M12 = 1<<11, /* Non-Aliased Uncorrectable Patrol Data ECC */
    156	EMASK_M13 = 1<<12, /* Memory Write error on first attempt */
    157	EMASK_M14 = 1<<13, /* FB-DIMM Configuration Write error on first attempt */
    158	EMASK_M15 = 1<<14, /* Memory or FB-DIMM configuration CRC read error */
    159	EMASK_M16 = 1<<15, /* Channel Failed-Over Occurred */
    160	EMASK_M17 = 1<<16, /* Correctable Non-Mirrored Demand Data ECC */
    161	EMASK_M18 = 1<<17, /* Unsupported on i5400 */
    162	EMASK_M19 = 1<<18, /* Correctable Resilver- or Spare-Copy Data ECC */
    163	EMASK_M20 = 1<<19, /* Correctable Patrol Data ECC */
    164	EMASK_M21 = 1<<20, /* FB-DIMM Northbound parity error on FB-DIMM Sync Status */
    165	EMASK_M22 = 1<<21, /* SPD protocol Error */
    166	EMASK_M23 = 1<<22, /* Non-Redundant Fast Reset Timeout */
    167	EMASK_M24 = 1<<23, /* Refresh error */
    168	EMASK_M25 = 1<<24, /* Memory Write error on redundant retry */
    169	EMASK_M26 = 1<<25, /* Redundant Fast Reset Timeout */
    170	EMASK_M27 = 1<<26, /* Correctable Counter Threshold Exceeded */
    171	EMASK_M28 = 1<<27, /* DIMM-Spare Copy Completed */
    172	EMASK_M29 = 1<<28, /* DIMM-Isolation Completed */
    173};
    174
    175/*
    176 * Names to translate bit error into something useful
    177 */
    178static const char *error_name[] = {
    179	[0]  = "Memory Write error on non-redundant retry",
    180	[1]  = "Memory or FB-DIMM configuration CRC read error",
    181	/* Reserved */
    182	[3]  = "Uncorrectable Data ECC on Replay",
    183	[4]  = "Aliased Uncorrectable Non-Mirrored Demand Data ECC",
    184	/* M6 Unsupported on i5400 */
    185	[6]  = "Aliased Uncorrectable Resilver- or Spare-Copy Data ECC",
    186	[7]  = "Aliased Uncorrectable Patrol Data ECC",
    187	[8]  = "Non-Aliased Uncorrectable Non-Mirrored Demand Data ECC",
    188	/* M10 Unsupported on i5400 */
    189	[10] = "Non-Aliased Uncorrectable Resilver- or Spare-Copy Data ECC",
    190	[11] = "Non-Aliased Uncorrectable Patrol Data ECC",
    191	[12] = "Memory Write error on first attempt",
    192	[13] = "FB-DIMM Configuration Write error on first attempt",
    193	[14] = "Memory or FB-DIMM configuration CRC read error",
    194	[15] = "Channel Failed-Over Occurred",
    195	[16] = "Correctable Non-Mirrored Demand Data ECC",
    196	/* M18 Unsupported on i5400 */
    197	[18] = "Correctable Resilver- or Spare-Copy Data ECC",
    198	[19] = "Correctable Patrol Data ECC",
    199	[20] = "FB-DIMM Northbound parity error on FB-DIMM Sync Status",
    200	[21] = "SPD protocol Error",
    201	[22] = "Non-Redundant Fast Reset Timeout",
    202	[23] = "Refresh error",
    203	[24] = "Memory Write error on redundant retry",
    204	[25] = "Redundant Fast Reset Timeout",
    205	[26] = "Correctable Counter Threshold Exceeded",
    206	[27] = "DIMM-Spare Copy Completed",
    207	[28] = "DIMM-Isolation Completed",
    208};
    209
    210/* Fatal errors */
    211#define ERROR_FAT_MASK		(EMASK_M1 | \
    212				 EMASK_M2 | \
    213				 EMASK_M23)
    214
    215/* Correctable errors */
    216#define ERROR_NF_CORRECTABLE	(EMASK_M27 | \
    217				 EMASK_M20 | \
    218				 EMASK_M19 | \
    219				 EMASK_M18 | \
    220				 EMASK_M17 | \
    221				 EMASK_M16)
    222#define ERROR_NF_DIMM_SPARE	(EMASK_M29 | \
    223				 EMASK_M28)
    224#define ERROR_NF_SPD_PROTOCOL	(EMASK_M22)
    225#define ERROR_NF_NORTH_CRC	(EMASK_M21)
    226
    227/* Recoverable errors */
    228#define ERROR_NF_RECOVERABLE	(EMASK_M26 | \
    229				 EMASK_M25 | \
    230				 EMASK_M24 | \
    231				 EMASK_M15 | \
    232				 EMASK_M14 | \
    233				 EMASK_M13 | \
    234				 EMASK_M12 | \
    235				 EMASK_M11 | \
    236				 EMASK_M9  | \
    237				 EMASK_M8  | \
    238				 EMASK_M7  | \
    239				 EMASK_M5)
    240
    241/* uncorrectable errors */
    242#define ERROR_NF_UNCORRECTABLE	(EMASK_M4)
    243
    244/* mask to all non-fatal errors */
    245#define ERROR_NF_MASK		(ERROR_NF_CORRECTABLE   | \
    246				 ERROR_NF_UNCORRECTABLE | \
    247				 ERROR_NF_RECOVERABLE   | \
    248				 ERROR_NF_DIMM_SPARE    | \
    249				 ERROR_NF_SPD_PROTOCOL  | \
    250				 ERROR_NF_NORTH_CRC)
    251
    252/*
    253 * Define error masks for the several registers
    254 */
    255
    256/* Enable all fatal and non fatal errors */
    257#define ENABLE_EMASK_ALL	(ERROR_FAT_MASK | ERROR_NF_MASK)
    258
    259/* mask for fatal error registers */
    260#define FERR_FAT_MASK ERROR_FAT_MASK
    261
    262/* masks for non-fatal error register */
    263static inline int to_nf_mask(unsigned int mask)
    264{
    265	return (mask & EMASK_M29) | (mask >> 3);
    266};
    267
    268static inline int from_nf_ferr(unsigned int mask)
    269{
    270	return (mask & EMASK_M29) |		/* Bit 28 */
    271	       (mask & ((1 << 28) - 1) << 3);	/* Bits 0 to 27 */
    272};
    273
    274#define FERR_NF_MASK		to_nf_mask(ERROR_NF_MASK)
    275#define FERR_NF_CORRECTABLE	to_nf_mask(ERROR_NF_CORRECTABLE)
    276#define FERR_NF_DIMM_SPARE	to_nf_mask(ERROR_NF_DIMM_SPARE)
    277#define FERR_NF_SPD_PROTOCOL	to_nf_mask(ERROR_NF_SPD_PROTOCOL)
    278#define FERR_NF_NORTH_CRC	to_nf_mask(ERROR_NF_NORTH_CRC)
    279#define FERR_NF_RECOVERABLE	to_nf_mask(ERROR_NF_RECOVERABLE)
    280#define FERR_NF_UNCORRECTABLE	to_nf_mask(ERROR_NF_UNCORRECTABLE)
    281
    282/* Defines to extract the vaious fields from the
    283 *	MTRx - Memory Technology Registers
    284 */
    285#define MTR_DIMMS_PRESENT(mtr)		((mtr) & (1 << 10))
    286#define MTR_DIMMS_ETHROTTLE(mtr)	((mtr) & (1 << 9))
    287#define MTR_DRAM_WIDTH(mtr)		(((mtr) & (1 << 8)) ? 8 : 4)
    288#define MTR_DRAM_BANKS(mtr)		(((mtr) & (1 << 6)) ? 8 : 4)
    289#define MTR_DRAM_BANKS_ADDR_BITS(mtr)	((MTR_DRAM_BANKS(mtr) == 8) ? 3 : 2)
    290#define MTR_DIMM_RANK(mtr)		(((mtr) >> 5) & 0x1)
    291#define MTR_DIMM_RANK_ADDR_BITS(mtr)	(MTR_DIMM_RANK(mtr) ? 2 : 1)
    292#define MTR_DIMM_ROWS(mtr)		(((mtr) >> 2) & 0x3)
    293#define MTR_DIMM_ROWS_ADDR_BITS(mtr)	(MTR_DIMM_ROWS(mtr) + 13)
    294#define MTR_DIMM_COLS(mtr)		((mtr) & 0x3)
    295#define MTR_DIMM_COLS_ADDR_BITS(mtr)	(MTR_DIMM_COLS(mtr) + 10)
    296
    297/* This applies to FERR_NF_FB-DIMM as well as FERR_FAT_FB-DIMM */
    298static inline int extract_fbdchan_indx(u32 x)
    299{
    300	return (x>>28) & 0x3;
    301}
    302
    303/* Device name and register DID (Device ID) */
    304struct i5400_dev_info {
    305	const char *ctl_name;	/* name for this device */
    306	u16 fsb_mapping_errors;	/* DID for the branchmap,control */
    307};
    308
    309/* Table of devices attributes supported by this driver */
    310static const struct i5400_dev_info i5400_devs[] = {
    311	{
    312		.ctl_name = "I5400",
    313		.fsb_mapping_errors = PCI_DEVICE_ID_INTEL_5400_ERR,
    314	},
    315};
    316
    317struct i5400_dimm_info {
    318	int megabytes;		/* size, 0 means not present  */
    319};
    320
    321/* driver private data structure */
    322struct i5400_pvt {
    323	struct pci_dev *system_address;		/* 16.0 */
    324	struct pci_dev *branchmap_werrors;	/* 16.1 */
    325	struct pci_dev *fsb_error_regs;		/* 16.2 */
    326	struct pci_dev *branch_0;		/* 21.0 */
    327	struct pci_dev *branch_1;		/* 22.0 */
    328
    329	u16 tolm;				/* top of low memory */
    330	union {
    331		u64 ambase;				/* AMB BAR */
    332		struct {
    333			u32 ambase_bottom;
    334			u32 ambase_top;
    335		} u __packed;
    336	};
    337
    338	u16 mir0, mir1;
    339
    340	u16 b0_mtr[DIMMS_PER_CHANNEL];	/* Memory Technlogy Reg */
    341	u16 b0_ambpresent0;			/* Branch 0, Channel 0 */
    342	u16 b0_ambpresent1;			/* Brnach 0, Channel 1 */
    343
    344	u16 b1_mtr[DIMMS_PER_CHANNEL];	/* Memory Technlogy Reg */
    345	u16 b1_ambpresent0;			/* Branch 1, Channel 8 */
    346	u16 b1_ambpresent1;			/* Branch 1, Channel 1 */
    347
    348	/* DIMM information matrix, allocating architecture maximums */
    349	struct i5400_dimm_info dimm_info[DIMMS_PER_CHANNEL][MAX_CHANNELS];
    350
    351	/* Actual values for this controller */
    352	int maxch;				/* Max channels */
    353	int maxdimmperch;			/* Max DIMMs per channel */
    354};
    355
    356/* I5400 MCH error information retrieved from Hardware */
    357struct i5400_error_info {
    358	/* These registers are always read from the MC */
    359	u32 ferr_fat_fbd;	/* First Errors Fatal */
    360	u32 nerr_fat_fbd;	/* Next Errors Fatal */
    361	u32 ferr_nf_fbd;	/* First Errors Non-Fatal */
    362	u32 nerr_nf_fbd;	/* Next Errors Non-Fatal */
    363
    364	/* These registers are input ONLY if there was a Recoverable Error */
    365	u32 redmemb;		/* Recoverable Mem Data Error log B */
    366	u16 recmema;		/* Recoverable Mem Error log A */
    367	u32 recmemb;		/* Recoverable Mem Error log B */
    368
    369	/* These registers are input ONLY if there was a Non-Rec Error */
    370	u16 nrecmema;		/* Non-Recoverable Mem log A */
    371	u32 nrecmemb;		/* Non-Recoverable Mem log B */
    372
    373};
    374
    375/* note that nrec_rdwr changed from NRECMEMA to NRECMEMB between the 5000 and
    376   5400 better to use an inline function than a macro in this case */
    377static inline int nrec_bank(struct i5400_error_info *info)
    378{
    379	return ((info->nrecmema) >> 12) & 0x7;
    380}
    381static inline int nrec_rank(struct i5400_error_info *info)
    382{
    383	return ((info->nrecmema) >> 8) & 0xf;
    384}
    385static inline int nrec_buf_id(struct i5400_error_info *info)
    386{
    387	return ((info->nrecmema)) & 0xff;
    388}
    389static inline int nrec_rdwr(struct i5400_error_info *info)
    390{
    391	return (info->nrecmemb) >> 31;
    392}
    393/* This applies to both NREC and REC string so it can be used with nrec_rdwr
    394   and rec_rdwr */
    395static inline const char *rdwr_str(int rdwr)
    396{
    397	return rdwr ? "Write" : "Read";
    398}
    399static inline int nrec_cas(struct i5400_error_info *info)
    400{
    401	return ((info->nrecmemb) >> 16) & 0x1fff;
    402}
    403static inline int nrec_ras(struct i5400_error_info *info)
    404{
    405	return (info->nrecmemb) & 0xffff;
    406}
    407static inline int rec_bank(struct i5400_error_info *info)
    408{
    409	return ((info->recmema) >> 12) & 0x7;
    410}
    411static inline int rec_rank(struct i5400_error_info *info)
    412{
    413	return ((info->recmema) >> 8) & 0xf;
    414}
    415static inline int rec_rdwr(struct i5400_error_info *info)
    416{
    417	return (info->recmemb) >> 31;
    418}
    419static inline int rec_cas(struct i5400_error_info *info)
    420{
    421	return ((info->recmemb) >> 16) & 0x1fff;
    422}
    423static inline int rec_ras(struct i5400_error_info *info)
    424{
    425	return (info->recmemb) & 0xffff;
    426}
    427
    428static struct edac_pci_ctl_info *i5400_pci;
    429
    430/*
    431 *	i5400_get_error_info	Retrieve the hardware error information from
    432 *				the hardware and cache it in the 'info'
    433 *				structure
    434 */
    435static void i5400_get_error_info(struct mem_ctl_info *mci,
    436				 struct i5400_error_info *info)
    437{
    438	struct i5400_pvt *pvt;
    439	u32 value;
    440
    441	pvt = mci->pvt_info;
    442
    443	/* read in the 1st FATAL error register */
    444	pci_read_config_dword(pvt->branchmap_werrors, FERR_FAT_FBD, &value);
    445
    446	/* Mask only the bits that the doc says are valid
    447	 */
    448	value &= (FERR_FAT_FBDCHAN | FERR_FAT_MASK);
    449
    450	/* If there is an error, then read in the
    451	   NEXT FATAL error register and the Memory Error Log Register A
    452	 */
    453	if (value & FERR_FAT_MASK) {
    454		info->ferr_fat_fbd = value;
    455
    456		/* harvest the various error data we need */
    457		pci_read_config_dword(pvt->branchmap_werrors,
    458				NERR_FAT_FBD, &info->nerr_fat_fbd);
    459		pci_read_config_word(pvt->branchmap_werrors,
    460				NRECMEMA, &info->nrecmema);
    461		pci_read_config_dword(pvt->branchmap_werrors,
    462				NRECMEMB, &info->nrecmemb);
    463
    464		/* Clear the error bits, by writing them back */
    465		pci_write_config_dword(pvt->branchmap_werrors,
    466				FERR_FAT_FBD, value);
    467	} else {
    468		info->ferr_fat_fbd = 0;
    469		info->nerr_fat_fbd = 0;
    470		info->nrecmema = 0;
    471		info->nrecmemb = 0;
    472	}
    473
    474	/* read in the 1st NON-FATAL error register */
    475	pci_read_config_dword(pvt->branchmap_werrors, FERR_NF_FBD, &value);
    476
    477	/* If there is an error, then read in the 1st NON-FATAL error
    478	 * register as well */
    479	if (value & FERR_NF_MASK) {
    480		info->ferr_nf_fbd = value;
    481
    482		/* harvest the various error data we need */
    483		pci_read_config_dword(pvt->branchmap_werrors,
    484				NERR_NF_FBD, &info->nerr_nf_fbd);
    485		pci_read_config_word(pvt->branchmap_werrors,
    486				RECMEMA, &info->recmema);
    487		pci_read_config_dword(pvt->branchmap_werrors,
    488				RECMEMB, &info->recmemb);
    489		pci_read_config_dword(pvt->branchmap_werrors,
    490				REDMEMB, &info->redmemb);
    491
    492		/* Clear the error bits, by writing them back */
    493		pci_write_config_dword(pvt->branchmap_werrors,
    494				FERR_NF_FBD, value);
    495	} else {
    496		info->ferr_nf_fbd = 0;
    497		info->nerr_nf_fbd = 0;
    498		info->recmema = 0;
    499		info->recmemb = 0;
    500		info->redmemb = 0;
    501	}
    502}
    503
    504/*
    505 * i5400_proccess_non_recoverable_info(struct mem_ctl_info *mci,
    506 * 					struct i5400_error_info *info,
    507 * 					int handle_errors);
    508 *
    509 *	handle the Intel FATAL and unrecoverable errors, if any
    510 */
    511static void i5400_proccess_non_recoverable_info(struct mem_ctl_info *mci,
    512				    struct i5400_error_info *info,
    513				    unsigned long allErrors)
    514{
    515	char msg[EDAC_MC_LABEL_LEN + 1 + 90 + 80];
    516	int branch;
    517	int channel;
    518	int bank;
    519	int buf_id;
    520	int rank;
    521	int rdwr;
    522	int ras, cas;
    523	int errnum;
    524	char *type = NULL;
    525	enum hw_event_mc_err_type tp_event = HW_EVENT_ERR_UNCORRECTED;
    526
    527	if (!allErrors)
    528		return;		/* if no error, return now */
    529
    530	if (allErrors &  ERROR_FAT_MASK) {
    531		type = "FATAL";
    532		tp_event = HW_EVENT_ERR_FATAL;
    533	} else if (allErrors & FERR_NF_UNCORRECTABLE)
    534		type = "NON-FATAL uncorrected";
    535	else
    536		type = "NON-FATAL recoverable";
    537
    538	/* ONLY ONE of the possible error bits will be set, as per the docs */
    539
    540	branch = extract_fbdchan_indx(info->ferr_fat_fbd);
    541	channel = branch;
    542
    543	/* Use the NON-Recoverable macros to extract data */
    544	bank = nrec_bank(info);
    545	rank = nrec_rank(info);
    546	buf_id = nrec_buf_id(info);
    547	rdwr = nrec_rdwr(info);
    548	ras = nrec_ras(info);
    549	cas = nrec_cas(info);
    550
    551	edac_dbg(0, "\t\t%s DIMM= %d  Channels= %d,%d  (Branch= %d DRAM Bank= %d Buffer ID = %d rdwr= %s ras= %d cas= %d)\n",
    552		 type, rank, channel, channel + 1, branch >> 1, bank,
    553		 buf_id, rdwr_str(rdwr), ras, cas);
    554
    555	/* Only 1 bit will be on */
    556	errnum = find_first_bit(&allErrors, ARRAY_SIZE(error_name));
    557
    558	/* Form out message */
    559	snprintf(msg, sizeof(msg),
    560		 "Bank=%d Buffer ID = %d RAS=%d CAS=%d Err=0x%lx (%s)",
    561		 bank, buf_id, ras, cas, allErrors, error_name[errnum]);
    562
    563	edac_mc_handle_error(tp_event, mci, 1, 0, 0, 0,
    564			     branch >> 1, -1, rank,
    565			     rdwr ? "Write error" : "Read error",
    566			     msg);
    567}
    568
    569/*
    570 * i5400_process_fatal_error_info(struct mem_ctl_info *mci,
    571 * 				struct i5400_error_info *info,
    572 * 				int handle_errors);
    573 *
    574 *	handle the Intel NON-FATAL errors, if any
    575 */
    576static void i5400_process_nonfatal_error_info(struct mem_ctl_info *mci,
    577					struct i5400_error_info *info)
    578{
    579	char msg[EDAC_MC_LABEL_LEN + 1 + 90 + 80];
    580	unsigned long allErrors;
    581	int branch;
    582	int channel;
    583	int bank;
    584	int rank;
    585	int rdwr;
    586	int ras, cas;
    587	int errnum;
    588
    589	/* mask off the Error bits that are possible */
    590	allErrors = from_nf_ferr(info->ferr_nf_fbd & FERR_NF_MASK);
    591	if (!allErrors)
    592		return;		/* if no error, return now */
    593
    594	/* ONLY ONE of the possible error bits will be set, as per the docs */
    595
    596	if (allErrors & (ERROR_NF_UNCORRECTABLE | ERROR_NF_RECOVERABLE)) {
    597		i5400_proccess_non_recoverable_info(mci, info, allErrors);
    598		return;
    599	}
    600
    601	/* Correctable errors */
    602	if (allErrors & ERROR_NF_CORRECTABLE) {
    603		edac_dbg(0, "\tCorrected bits= 0x%lx\n", allErrors);
    604
    605		branch = extract_fbdchan_indx(info->ferr_nf_fbd);
    606
    607		channel = 0;
    608		if (REC_ECC_LOCATOR_ODD(info->redmemb))
    609			channel = 1;
    610
    611		/* Convert channel to be based from zero, instead of
    612		 * from branch base of 0 */
    613		channel += branch;
    614
    615		bank = rec_bank(info);
    616		rank = rec_rank(info);
    617		rdwr = rec_rdwr(info);
    618		ras = rec_ras(info);
    619		cas = rec_cas(info);
    620
    621		/* Only 1 bit will be on */
    622		errnum = find_first_bit(&allErrors, ARRAY_SIZE(error_name));
    623
    624		edac_dbg(0, "\t\tDIMM= %d Channel= %d  (Branch %d DRAM Bank= %d rdwr= %s ras= %d cas= %d)\n",
    625			 rank, channel, branch >> 1, bank,
    626			 rdwr_str(rdwr), ras, cas);
    627
    628		/* Form out message */
    629		snprintf(msg, sizeof(msg),
    630			 "Corrected error (Branch=%d DRAM-Bank=%d RDWR=%s "
    631			 "RAS=%d CAS=%d, CE Err=0x%lx (%s))",
    632			 branch >> 1, bank, rdwr_str(rdwr), ras, cas,
    633			 allErrors, error_name[errnum]);
    634
    635		edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci, 1, 0, 0, 0,
    636				     branch >> 1, channel % 2, rank,
    637				     rdwr ? "Write error" : "Read error",
    638				     msg);
    639
    640		return;
    641	}
    642
    643	/* Miscellaneous errors */
    644	errnum = find_first_bit(&allErrors, ARRAY_SIZE(error_name));
    645
    646	branch = extract_fbdchan_indx(info->ferr_nf_fbd);
    647
    648	i5400_mc_printk(mci, KERN_EMERG,
    649			"Non-Fatal misc error (Branch=%d Err=%#lx (%s))",
    650			branch >> 1, allErrors, error_name[errnum]);
    651}
    652
    653/*
    654 *	i5400_process_error_info	Process the error info that is
    655 *	in the 'info' structure, previously retrieved from hardware
    656 */
    657static void i5400_process_error_info(struct mem_ctl_info *mci,
    658				struct i5400_error_info *info)
    659{	u32 allErrors;
    660
    661	/* First handle any fatal errors that occurred */
    662	allErrors = (info->ferr_fat_fbd & FERR_FAT_MASK);
    663	i5400_proccess_non_recoverable_info(mci, info, allErrors);
    664
    665	/* now handle any non-fatal errors that occurred */
    666	i5400_process_nonfatal_error_info(mci, info);
    667}
    668
    669/*
    670 *	i5400_clear_error	Retrieve any error from the hardware
    671 *				but do NOT process that error.
    672 *				Used for 'clearing' out of previous errors
    673 *				Called by the Core module.
    674 */
    675static void i5400_clear_error(struct mem_ctl_info *mci)
    676{
    677	struct i5400_error_info info;
    678
    679	i5400_get_error_info(mci, &info);
    680}
    681
    682/*
    683 *	i5400_check_error	Retrieve and process errors reported by the
    684 *				hardware. Called by the Core module.
    685 */
    686static void i5400_check_error(struct mem_ctl_info *mci)
    687{
    688	struct i5400_error_info info;
    689
    690	i5400_get_error_info(mci, &info);
    691	i5400_process_error_info(mci, &info);
    692}
    693
    694/*
    695 *	i5400_put_devices	'put' all the devices that we have
    696 *				reserved via 'get'
    697 */
    698static void i5400_put_devices(struct mem_ctl_info *mci)
    699{
    700	struct i5400_pvt *pvt;
    701
    702	pvt = mci->pvt_info;
    703
    704	/* Decrement usage count for devices */
    705	pci_dev_put(pvt->branch_1);
    706	pci_dev_put(pvt->branch_0);
    707	pci_dev_put(pvt->fsb_error_regs);
    708	pci_dev_put(pvt->branchmap_werrors);
    709}
    710
    711/*
    712 *	i5400_get_devices	Find and perform 'get' operation on the MCH's
    713 *			device/functions we want to reference for this driver
    714 *
    715 *			Need to 'get' device 16 func 1 and func 2
    716 */
    717static int i5400_get_devices(struct mem_ctl_info *mci, int dev_idx)
    718{
    719	struct i5400_pvt *pvt;
    720	struct pci_dev *pdev;
    721
    722	pvt = mci->pvt_info;
    723	pvt->branchmap_werrors = NULL;
    724	pvt->fsb_error_regs = NULL;
    725	pvt->branch_0 = NULL;
    726	pvt->branch_1 = NULL;
    727
    728	/* Attempt to 'get' the MCH register we want */
    729	pdev = NULL;
    730	while (1) {
    731		pdev = pci_get_device(PCI_VENDOR_ID_INTEL,
    732				      PCI_DEVICE_ID_INTEL_5400_ERR, pdev);
    733		if (!pdev) {
    734			/* End of list, leave */
    735			i5400_printk(KERN_ERR,
    736				"'system address,Process Bus' "
    737				"device not found:"
    738				"vendor 0x%x device 0x%x ERR func 1 "
    739				"(broken BIOS?)\n",
    740				PCI_VENDOR_ID_INTEL,
    741				PCI_DEVICE_ID_INTEL_5400_ERR);
    742			return -ENODEV;
    743		}
    744
    745		/* Store device 16 func 1 */
    746		if (PCI_FUNC(pdev->devfn) == 1)
    747			break;
    748	}
    749	pvt->branchmap_werrors = pdev;
    750
    751	pdev = NULL;
    752	while (1) {
    753		pdev = pci_get_device(PCI_VENDOR_ID_INTEL,
    754				      PCI_DEVICE_ID_INTEL_5400_ERR, pdev);
    755		if (!pdev) {
    756			/* End of list, leave */
    757			i5400_printk(KERN_ERR,
    758				"'system address,Process Bus' "
    759				"device not found:"
    760				"vendor 0x%x device 0x%x ERR func 2 "
    761				"(broken BIOS?)\n",
    762				PCI_VENDOR_ID_INTEL,
    763				PCI_DEVICE_ID_INTEL_5400_ERR);
    764
    765			pci_dev_put(pvt->branchmap_werrors);
    766			return -ENODEV;
    767		}
    768
    769		/* Store device 16 func 2 */
    770		if (PCI_FUNC(pdev->devfn) == 2)
    771			break;
    772	}
    773	pvt->fsb_error_regs = pdev;
    774
    775	edac_dbg(1, "System Address, processor bus- PCI Bus ID: %s  %x:%x\n",
    776		 pci_name(pvt->system_address),
    777		 pvt->system_address->vendor, pvt->system_address->device);
    778	edac_dbg(1, "Branchmap, control and errors - PCI Bus ID: %s  %x:%x\n",
    779		 pci_name(pvt->branchmap_werrors),
    780		 pvt->branchmap_werrors->vendor,
    781		 pvt->branchmap_werrors->device);
    782	edac_dbg(1, "FSB Error Regs - PCI Bus ID: %s  %x:%x\n",
    783		 pci_name(pvt->fsb_error_regs),
    784		 pvt->fsb_error_regs->vendor, pvt->fsb_error_regs->device);
    785
    786	pvt->branch_0 = pci_get_device(PCI_VENDOR_ID_INTEL,
    787				       PCI_DEVICE_ID_INTEL_5400_FBD0, NULL);
    788	if (!pvt->branch_0) {
    789		i5400_printk(KERN_ERR,
    790			"MC: 'BRANCH 0' device not found:"
    791			"vendor 0x%x device 0x%x Func 0 (broken BIOS?)\n",
    792			PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_5400_FBD0);
    793
    794		pci_dev_put(pvt->fsb_error_regs);
    795		pci_dev_put(pvt->branchmap_werrors);
    796		return -ENODEV;
    797	}
    798
    799	/* If this device claims to have more than 2 channels then
    800	 * fetch Branch 1's information
    801	 */
    802	if (pvt->maxch < CHANNELS_PER_BRANCH)
    803		return 0;
    804
    805	pvt->branch_1 = pci_get_device(PCI_VENDOR_ID_INTEL,
    806				       PCI_DEVICE_ID_INTEL_5400_FBD1, NULL);
    807	if (!pvt->branch_1) {
    808		i5400_printk(KERN_ERR,
    809			"MC: 'BRANCH 1' device not found:"
    810			"vendor 0x%x device 0x%x Func 0 "
    811			"(broken BIOS?)\n",
    812			PCI_VENDOR_ID_INTEL,
    813			PCI_DEVICE_ID_INTEL_5400_FBD1);
    814
    815		pci_dev_put(pvt->branch_0);
    816		pci_dev_put(pvt->fsb_error_regs);
    817		pci_dev_put(pvt->branchmap_werrors);
    818		return -ENODEV;
    819	}
    820
    821	return 0;
    822}
    823
    824/*
    825 *	determine_amb_present
    826 *
    827 *		the information is contained in DIMMS_PER_CHANNEL different
    828 *		registers determining which of the DIMMS_PER_CHANNEL requires
    829 *              knowing which channel is in question
    830 *
    831 *	2 branches, each with 2 channels
    832 *		b0_ambpresent0 for channel '0'
    833 *		b0_ambpresent1 for channel '1'
    834 *		b1_ambpresent0 for channel '2'
    835 *		b1_ambpresent1 for channel '3'
    836 */
    837static int determine_amb_present_reg(struct i5400_pvt *pvt, int channel)
    838{
    839	int amb_present;
    840
    841	if (channel < CHANNELS_PER_BRANCH) {
    842		if (channel & 0x1)
    843			amb_present = pvt->b0_ambpresent1;
    844		else
    845			amb_present = pvt->b0_ambpresent0;
    846	} else {
    847		if (channel & 0x1)
    848			amb_present = pvt->b1_ambpresent1;
    849		else
    850			amb_present = pvt->b1_ambpresent0;
    851	}
    852
    853	return amb_present;
    854}
    855
    856/*
    857 * determine_mtr(pvt, dimm, channel)
    858 *
    859 * return the proper MTR register as determine by the dimm and desired channel
    860 */
    861static int determine_mtr(struct i5400_pvt *pvt, int dimm, int channel)
    862{
    863	int mtr;
    864	int n;
    865
    866	/* There is one MTR for each slot pair of FB-DIMMs,
    867	   Each slot pair may be at branch 0 or branch 1.
    868	 */
    869	n = dimm;
    870
    871	if (n >= DIMMS_PER_CHANNEL) {
    872		edac_dbg(0, "ERROR: trying to access an invalid dimm: %d\n",
    873			 dimm);
    874		return 0;
    875	}
    876
    877	if (channel < CHANNELS_PER_BRANCH)
    878		mtr = pvt->b0_mtr[n];
    879	else
    880		mtr = pvt->b1_mtr[n];
    881
    882	return mtr;
    883}
    884
    885/*
    886 */
    887static void decode_mtr(int slot_row, u16 mtr)
    888{
    889	int ans;
    890
    891	ans = MTR_DIMMS_PRESENT(mtr);
    892
    893	edac_dbg(2, "\tMTR%d=0x%x:  DIMMs are %sPresent\n",
    894		 slot_row, mtr, ans ? "" : "NOT ");
    895	if (!ans)
    896		return;
    897
    898	edac_dbg(2, "\t\tWIDTH: x%d\n", MTR_DRAM_WIDTH(mtr));
    899
    900	edac_dbg(2, "\t\tELECTRICAL THROTTLING is %s\n",
    901		 MTR_DIMMS_ETHROTTLE(mtr) ? "enabled" : "disabled");
    902
    903	edac_dbg(2, "\t\tNUMBANK: %d bank(s)\n", MTR_DRAM_BANKS(mtr));
    904	edac_dbg(2, "\t\tNUMRANK: %s\n",
    905		 MTR_DIMM_RANK(mtr) ? "double" : "single");
    906	edac_dbg(2, "\t\tNUMROW: %s\n",
    907		 MTR_DIMM_ROWS(mtr) == 0 ? "8,192 - 13 rows" :
    908		 MTR_DIMM_ROWS(mtr) == 1 ? "16,384 - 14 rows" :
    909		 MTR_DIMM_ROWS(mtr) == 2 ? "32,768 - 15 rows" :
    910		 "65,536 - 16 rows");
    911	edac_dbg(2, "\t\tNUMCOL: %s\n",
    912		 MTR_DIMM_COLS(mtr) == 0 ? "1,024 - 10 columns" :
    913		 MTR_DIMM_COLS(mtr) == 1 ? "2,048 - 11 columns" :
    914		 MTR_DIMM_COLS(mtr) == 2 ? "4,096 - 12 columns" :
    915		 "reserved");
    916}
    917
    918static void handle_channel(struct i5400_pvt *pvt, int dimm, int channel,
    919			struct i5400_dimm_info *dinfo)
    920{
    921	int mtr;
    922	int amb_present_reg;
    923	int addrBits;
    924
    925	mtr = determine_mtr(pvt, dimm, channel);
    926	if (MTR_DIMMS_PRESENT(mtr)) {
    927		amb_present_reg = determine_amb_present_reg(pvt, channel);
    928
    929		/* Determine if there is a DIMM present in this DIMM slot */
    930		if (amb_present_reg & (1 << dimm)) {
    931			/* Start with the number of bits for a Bank
    932			 * on the DRAM */
    933			addrBits = MTR_DRAM_BANKS_ADDR_BITS(mtr);
    934			/* Add thenumber of ROW bits */
    935			addrBits += MTR_DIMM_ROWS_ADDR_BITS(mtr);
    936			/* add the number of COLUMN bits */
    937			addrBits += MTR_DIMM_COLS_ADDR_BITS(mtr);
    938			/* add the number of RANK bits */
    939			addrBits += MTR_DIMM_RANK(mtr);
    940
    941			addrBits += 6;	/* add 64 bits per DIMM */
    942			addrBits -= 20;	/* divide by 2^^20 */
    943			addrBits -= 3;	/* 8 bits per bytes */
    944
    945			dinfo->megabytes = 1 << addrBits;
    946		}
    947	}
    948}
    949
    950/*
    951 *	calculate_dimm_size
    952 *
    953 *	also will output a DIMM matrix map, if debug is enabled, for viewing
    954 *	how the DIMMs are populated
    955 */
    956static void calculate_dimm_size(struct i5400_pvt *pvt)
    957{
    958	struct i5400_dimm_info *dinfo;
    959	int dimm, max_dimms;
    960	char *p, *mem_buffer;
    961	int space, n;
    962	int channel, branch;
    963
    964	/* ================= Generate some debug output ================= */
    965	space = PAGE_SIZE;
    966	mem_buffer = p = kmalloc(space, GFP_KERNEL);
    967	if (p == NULL) {
    968		i5400_printk(KERN_ERR, "MC: %s:%s() kmalloc() failed\n",
    969			__FILE__, __func__);
    970		return;
    971	}
    972
    973	/* Scan all the actual DIMMS
    974	 * and calculate the information for each DIMM
    975	 * Start with the highest dimm first, to display it first
    976	 * and work toward the 0th dimm
    977	 */
    978	max_dimms = pvt->maxdimmperch;
    979	for (dimm = max_dimms - 1; dimm >= 0; dimm--) {
    980
    981		/* on an odd dimm, first output a 'boundary' marker,
    982		 * then reset the message buffer  */
    983		if (dimm & 0x1) {
    984			n = snprintf(p, space, "---------------------------"
    985					"-------------------------------");
    986			p += n;
    987			space -= n;
    988			edac_dbg(2, "%s\n", mem_buffer);
    989			p = mem_buffer;
    990			space = PAGE_SIZE;
    991		}
    992		n = snprintf(p, space, "dimm %2d    ", dimm);
    993		p += n;
    994		space -= n;
    995
    996		for (channel = 0; channel < pvt->maxch; channel++) {
    997			dinfo = &pvt->dimm_info[dimm][channel];
    998			handle_channel(pvt, dimm, channel, dinfo);
    999			n = snprintf(p, space, "%4d MB   | ", dinfo->megabytes);
   1000			p += n;
   1001			space -= n;
   1002		}
   1003		edac_dbg(2, "%s\n", mem_buffer);
   1004		p = mem_buffer;
   1005		space = PAGE_SIZE;
   1006	}
   1007
   1008	/* Output the last bottom 'boundary' marker */
   1009	n = snprintf(p, space, "---------------------------"
   1010			"-------------------------------");
   1011	p += n;
   1012	space -= n;
   1013	edac_dbg(2, "%s\n", mem_buffer);
   1014	p = mem_buffer;
   1015	space = PAGE_SIZE;
   1016
   1017	/* now output the 'channel' labels */
   1018	n = snprintf(p, space, "           ");
   1019	p += n;
   1020	space -= n;
   1021	for (channel = 0; channel < pvt->maxch; channel++) {
   1022		n = snprintf(p, space, "channel %d | ", channel);
   1023		p += n;
   1024		space -= n;
   1025	}
   1026
   1027	space -= n;
   1028	edac_dbg(2, "%s\n", mem_buffer);
   1029	p = mem_buffer;
   1030	space = PAGE_SIZE;
   1031
   1032	n = snprintf(p, space, "           ");
   1033	p += n;
   1034	for (branch = 0; branch < MAX_BRANCHES; branch++) {
   1035		n = snprintf(p, space, "       branch %d       | ", branch);
   1036		p += n;
   1037		space -= n;
   1038	}
   1039
   1040	/* output the last message and free buffer */
   1041	edac_dbg(2, "%s\n", mem_buffer);
   1042	kfree(mem_buffer);
   1043}
   1044
   1045/*
   1046 *	i5400_get_mc_regs	read in the necessary registers and
   1047 *				cache locally
   1048 *
   1049 *			Fills in the private data members
   1050 */
   1051static void i5400_get_mc_regs(struct mem_ctl_info *mci)
   1052{
   1053	struct i5400_pvt *pvt;
   1054	u32 actual_tolm;
   1055	u16 limit;
   1056	int slot_row;
   1057	int way0, way1;
   1058
   1059	pvt = mci->pvt_info;
   1060
   1061	pci_read_config_dword(pvt->system_address, AMBASE,
   1062			&pvt->u.ambase_bottom);
   1063	pci_read_config_dword(pvt->system_address, AMBASE + sizeof(u32),
   1064			&pvt->u.ambase_top);
   1065
   1066	edac_dbg(2, "AMBASE= 0x%lx  MAXCH= %d  MAX-DIMM-Per-CH= %d\n",
   1067		 (long unsigned int)pvt->ambase, pvt->maxch, pvt->maxdimmperch);
   1068
   1069	/* Get the Branch Map regs */
   1070	pci_read_config_word(pvt->branchmap_werrors, TOLM, &pvt->tolm);
   1071	pvt->tolm >>= 12;
   1072	edac_dbg(2, "\nTOLM (number of 256M regions) =%u (0x%x)\n",
   1073		 pvt->tolm, pvt->tolm);
   1074
   1075	actual_tolm = (u32) ((1000l * pvt->tolm) >> (30 - 28));
   1076	edac_dbg(2, "Actual TOLM byte addr=%u.%03u GB (0x%x)\n",
   1077		 actual_tolm/1000, actual_tolm % 1000, pvt->tolm << 28);
   1078
   1079	pci_read_config_word(pvt->branchmap_werrors, MIR0, &pvt->mir0);
   1080	pci_read_config_word(pvt->branchmap_werrors, MIR1, &pvt->mir1);
   1081
   1082	/* Get the MIR[0-1] regs */
   1083	limit = (pvt->mir0 >> 4) & 0x0fff;
   1084	way0 = pvt->mir0 & 0x1;
   1085	way1 = pvt->mir0 & 0x2;
   1086	edac_dbg(2, "MIR0: limit= 0x%x  WAY1= %u  WAY0= %x\n",
   1087		 limit, way1, way0);
   1088	limit = (pvt->mir1 >> 4) & 0xfff;
   1089	way0 = pvt->mir1 & 0x1;
   1090	way1 = pvt->mir1 & 0x2;
   1091	edac_dbg(2, "MIR1: limit= 0x%x  WAY1= %u  WAY0= %x\n",
   1092		 limit, way1, way0);
   1093
   1094	/* Get the set of MTR[0-3] regs by each branch */
   1095	for (slot_row = 0; slot_row < DIMMS_PER_CHANNEL; slot_row++) {
   1096		int where = MTR0 + (slot_row * sizeof(u16));
   1097
   1098		/* Branch 0 set of MTR registers */
   1099		pci_read_config_word(pvt->branch_0, where,
   1100				&pvt->b0_mtr[slot_row]);
   1101
   1102		edac_dbg(2, "MTR%d where=0x%x B0 value=0x%x\n",
   1103			 slot_row, where, pvt->b0_mtr[slot_row]);
   1104
   1105		if (pvt->maxch < CHANNELS_PER_BRANCH) {
   1106			pvt->b1_mtr[slot_row] = 0;
   1107			continue;
   1108		}
   1109
   1110		/* Branch 1 set of MTR registers */
   1111		pci_read_config_word(pvt->branch_1, where,
   1112				&pvt->b1_mtr[slot_row]);
   1113		edac_dbg(2, "MTR%d where=0x%x B1 value=0x%x\n",
   1114			 slot_row, where, pvt->b1_mtr[slot_row]);
   1115	}
   1116
   1117	/* Read and dump branch 0's MTRs */
   1118	edac_dbg(2, "Memory Technology Registers:\n");
   1119	edac_dbg(2, "   Branch 0:\n");
   1120	for (slot_row = 0; slot_row < DIMMS_PER_CHANNEL; slot_row++)
   1121		decode_mtr(slot_row, pvt->b0_mtr[slot_row]);
   1122
   1123	pci_read_config_word(pvt->branch_0, AMBPRESENT_0,
   1124			&pvt->b0_ambpresent0);
   1125	edac_dbg(2, "\t\tAMB-Branch 0-present0 0x%x:\n", pvt->b0_ambpresent0);
   1126	pci_read_config_word(pvt->branch_0, AMBPRESENT_1,
   1127			&pvt->b0_ambpresent1);
   1128	edac_dbg(2, "\t\tAMB-Branch 0-present1 0x%x:\n", pvt->b0_ambpresent1);
   1129
   1130	/* Only if we have 2 branchs (4 channels) */
   1131	if (pvt->maxch < CHANNELS_PER_BRANCH) {
   1132		pvt->b1_ambpresent0 = 0;
   1133		pvt->b1_ambpresent1 = 0;
   1134	} else {
   1135		/* Read and dump  branch 1's MTRs */
   1136		edac_dbg(2, "   Branch 1:\n");
   1137		for (slot_row = 0; slot_row < DIMMS_PER_CHANNEL; slot_row++)
   1138			decode_mtr(slot_row, pvt->b1_mtr[slot_row]);
   1139
   1140		pci_read_config_word(pvt->branch_1, AMBPRESENT_0,
   1141				&pvt->b1_ambpresent0);
   1142		edac_dbg(2, "\t\tAMB-Branch 1-present0 0x%x:\n",
   1143			 pvt->b1_ambpresent0);
   1144		pci_read_config_word(pvt->branch_1, AMBPRESENT_1,
   1145				&pvt->b1_ambpresent1);
   1146		edac_dbg(2, "\t\tAMB-Branch 1-present1 0x%x:\n",
   1147			 pvt->b1_ambpresent1);
   1148	}
   1149
   1150	/* Go and determine the size of each DIMM and place in an
   1151	 * orderly matrix */
   1152	calculate_dimm_size(pvt);
   1153}
   1154
   1155/*
   1156 *	i5400_init_dimms	Initialize the 'dimms' table within
   1157 *				the mci control	structure with the
   1158 *				addressing of memory.
   1159 *
   1160 *	return:
   1161 *		0	success
   1162 *		1	no actual memory found on this MC
   1163 */
   1164static int i5400_init_dimms(struct mem_ctl_info *mci)
   1165{
   1166	struct i5400_pvt *pvt;
   1167	struct dimm_info *dimm;
   1168	int ndimms;
   1169	int mtr;
   1170	int size_mb;
   1171	int  channel, slot;
   1172
   1173	pvt = mci->pvt_info;
   1174
   1175	ndimms = 0;
   1176
   1177	/*
   1178	 * FIXME: remove  pvt->dimm_info[slot][channel] and use the 3
   1179	 * layers here.
   1180	 */
   1181	for (channel = 0; channel < mci->layers[0].size * mci->layers[1].size;
   1182	     channel++) {
   1183		for (slot = 0; slot < mci->layers[2].size; slot++) {
   1184			mtr = determine_mtr(pvt, slot, channel);
   1185
   1186			/* if no DIMMS on this slot, continue */
   1187			if (!MTR_DIMMS_PRESENT(mtr))
   1188				continue;
   1189
   1190			dimm = edac_get_dimm(mci, channel / 2, channel % 2, slot);
   1191
   1192			size_mb =  pvt->dimm_info[slot][channel].megabytes;
   1193
   1194			edac_dbg(2, "dimm (branch %d channel %d slot %d): %d.%03d GB\n",
   1195				 channel / 2, channel % 2, slot,
   1196				 size_mb / 1000, size_mb % 1000);
   1197
   1198			dimm->nr_pages = size_mb << 8;
   1199			dimm->grain = 8;
   1200			dimm->dtype = MTR_DRAM_WIDTH(mtr) == 8 ?
   1201				      DEV_X8 : DEV_X4;
   1202			dimm->mtype = MEM_FB_DDR2;
   1203			/*
   1204			 * The eccc mechanism is SDDC (aka SECC), with
   1205			 * is similar to Chipkill.
   1206			 */
   1207			dimm->edac_mode = MTR_DRAM_WIDTH(mtr) == 8 ?
   1208					  EDAC_S8ECD8ED : EDAC_S4ECD4ED;
   1209			ndimms++;
   1210		}
   1211	}
   1212
   1213	/*
   1214	 * When just one memory is provided, it should be at location (0,0,0).
   1215	 * With such single-DIMM mode, the SDCC algorithm degrades to SECDEC+.
   1216	 */
   1217	if (ndimms == 1)
   1218		mci->dimms[0]->edac_mode = EDAC_SECDED;
   1219
   1220	return (ndimms == 0);
   1221}
   1222
   1223/*
   1224 *	i5400_enable_error_reporting
   1225 *			Turn on the memory reporting features of the hardware
   1226 */
   1227static void i5400_enable_error_reporting(struct mem_ctl_info *mci)
   1228{
   1229	struct i5400_pvt *pvt;
   1230	u32 fbd_error_mask;
   1231
   1232	pvt = mci->pvt_info;
   1233
   1234	/* Read the FBD Error Mask Register */
   1235	pci_read_config_dword(pvt->branchmap_werrors, EMASK_FBD,
   1236			&fbd_error_mask);
   1237
   1238	/* Enable with a '0' */
   1239	fbd_error_mask &= ~(ENABLE_EMASK_ALL);
   1240
   1241	pci_write_config_dword(pvt->branchmap_werrors, EMASK_FBD,
   1242			fbd_error_mask);
   1243}
   1244
   1245/*
   1246 *	i5400_probe1	Probe for ONE instance of device to see if it is
   1247 *			present.
   1248 *	return:
   1249 *		0 for FOUND a device
   1250 *		< 0 for error code
   1251 */
   1252static int i5400_probe1(struct pci_dev *pdev, int dev_idx)
   1253{
   1254	struct mem_ctl_info *mci;
   1255	struct i5400_pvt *pvt;
   1256	struct edac_mc_layer layers[3];
   1257
   1258	if (dev_idx >= ARRAY_SIZE(i5400_devs))
   1259		return -EINVAL;
   1260
   1261	edac_dbg(0, "MC: pdev bus %u dev=0x%x fn=0x%x\n",
   1262		 pdev->bus->number,
   1263		 PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn));
   1264
   1265	/* We only are looking for func 0 of the set */
   1266	if (PCI_FUNC(pdev->devfn) != 0)
   1267		return -ENODEV;
   1268
   1269	/*
   1270	 * allocate a new MC control structure
   1271	 *
   1272	 * This drivers uses the DIMM slot as "csrow" and the rest as "channel".
   1273	 */
   1274	layers[0].type = EDAC_MC_LAYER_BRANCH;
   1275	layers[0].size = MAX_BRANCHES;
   1276	layers[0].is_virt_csrow = false;
   1277	layers[1].type = EDAC_MC_LAYER_CHANNEL;
   1278	layers[1].size = CHANNELS_PER_BRANCH;
   1279	layers[1].is_virt_csrow = false;
   1280	layers[2].type = EDAC_MC_LAYER_SLOT;
   1281	layers[2].size = DIMMS_PER_CHANNEL;
   1282	layers[2].is_virt_csrow = true;
   1283	mci = edac_mc_alloc(0, ARRAY_SIZE(layers), layers, sizeof(*pvt));
   1284	if (mci == NULL)
   1285		return -ENOMEM;
   1286
   1287	edac_dbg(0, "MC: mci = %p\n", mci);
   1288
   1289	mci->pdev = &pdev->dev;	/* record ptr  to the generic device */
   1290
   1291	pvt = mci->pvt_info;
   1292	pvt->system_address = pdev;	/* Record this device in our private */
   1293	pvt->maxch = MAX_CHANNELS;
   1294	pvt->maxdimmperch = DIMMS_PER_CHANNEL;
   1295
   1296	/* 'get' the pci devices we want to reserve for our use */
   1297	if (i5400_get_devices(mci, dev_idx))
   1298		goto fail0;
   1299
   1300	/* Time to get serious */
   1301	i5400_get_mc_regs(mci);	/* retrieve the hardware registers */
   1302
   1303	mci->mc_idx = 0;
   1304	mci->mtype_cap = MEM_FLAG_FB_DDR2;
   1305	mci->edac_ctl_cap = EDAC_FLAG_NONE;
   1306	mci->edac_cap = EDAC_FLAG_NONE;
   1307	mci->mod_name = "i5400_edac.c";
   1308	mci->ctl_name = i5400_devs[dev_idx].ctl_name;
   1309	mci->dev_name = pci_name(pdev);
   1310	mci->ctl_page_to_phys = NULL;
   1311
   1312	/* Set the function pointer to an actual operation function */
   1313	mci->edac_check = i5400_check_error;
   1314
   1315	/* initialize the MC control structure 'dimms' table
   1316	 * with the mapping and control information */
   1317	if (i5400_init_dimms(mci)) {
   1318		edac_dbg(0, "MC: Setting mci->edac_cap to EDAC_FLAG_NONE because i5400_init_dimms() returned nonzero value\n");
   1319		mci->edac_cap = EDAC_FLAG_NONE;	/* no dimms found */
   1320	} else {
   1321		edac_dbg(1, "MC: Enable error reporting now\n");
   1322		i5400_enable_error_reporting(mci);
   1323	}
   1324
   1325	/* add this new MC control structure to EDAC's list of MCs */
   1326	if (edac_mc_add_mc(mci)) {
   1327		edac_dbg(0, "MC: failed edac_mc_add_mc()\n");
   1328		/* FIXME: perhaps some code should go here that disables error
   1329		 * reporting if we just enabled it
   1330		 */
   1331		goto fail1;
   1332	}
   1333
   1334	i5400_clear_error(mci);
   1335
   1336	/* allocating generic PCI control info */
   1337	i5400_pci = edac_pci_create_generic_ctl(&pdev->dev, EDAC_MOD_STR);
   1338	if (!i5400_pci) {
   1339		printk(KERN_WARNING
   1340			"%s(): Unable to create PCI control\n",
   1341			__func__);
   1342		printk(KERN_WARNING
   1343			"%s(): PCI error report via EDAC not setup\n",
   1344			__func__);
   1345	}
   1346
   1347	return 0;
   1348
   1349	/* Error exit unwinding stack */
   1350fail1:
   1351
   1352	i5400_put_devices(mci);
   1353
   1354fail0:
   1355	edac_mc_free(mci);
   1356	return -ENODEV;
   1357}
   1358
   1359/*
   1360 *	i5400_init_one	constructor for one instance of device
   1361 *
   1362 * 	returns:
   1363 *		negative on error
   1364 *		count (>= 0)
   1365 */
   1366static int i5400_init_one(struct pci_dev *pdev, const struct pci_device_id *id)
   1367{
   1368	int rc;
   1369
   1370	edac_dbg(0, "MC:\n");
   1371
   1372	/* wake up device */
   1373	rc = pci_enable_device(pdev);
   1374	if (rc)
   1375		return rc;
   1376
   1377	/* now probe and enable the device */
   1378	return i5400_probe1(pdev, id->driver_data);
   1379}
   1380
   1381/*
   1382 *	i5400_remove_one	destructor for one instance of device
   1383 *
   1384 */
   1385static void i5400_remove_one(struct pci_dev *pdev)
   1386{
   1387	struct mem_ctl_info *mci;
   1388
   1389	edac_dbg(0, "\n");
   1390
   1391	if (i5400_pci)
   1392		edac_pci_release_generic_ctl(i5400_pci);
   1393
   1394	mci = edac_mc_del_mc(&pdev->dev);
   1395	if (!mci)
   1396		return;
   1397
   1398	/* retrieve references to resources, and free those resources */
   1399	i5400_put_devices(mci);
   1400
   1401	pci_disable_device(pdev);
   1402
   1403	edac_mc_free(mci);
   1404}
   1405
   1406/*
   1407 *	pci_device_id	table for which devices we are looking for
   1408 *
   1409 *	The "E500P" device is the first device supported.
   1410 */
   1411static const struct pci_device_id i5400_pci_tbl[] = {
   1412	{PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_5400_ERR)},
   1413	{0,}			/* 0 terminated list. */
   1414};
   1415
   1416MODULE_DEVICE_TABLE(pci, i5400_pci_tbl);
   1417
   1418/*
   1419 *	i5400_driver	pci_driver structure for this module
   1420 *
   1421 */
   1422static struct pci_driver i5400_driver = {
   1423	.name = "i5400_edac",
   1424	.probe = i5400_init_one,
   1425	.remove = i5400_remove_one,
   1426	.id_table = i5400_pci_tbl,
   1427};
   1428
   1429/*
   1430 *	i5400_init		Module entry function
   1431 *			Try to initialize this module for its devices
   1432 */
   1433static int __init i5400_init(void)
   1434{
   1435	int pci_rc;
   1436
   1437	edac_dbg(2, "MC:\n");
   1438
   1439	/* Ensure that the OPSTATE is set correctly for POLL or NMI */
   1440	opstate_init();
   1441
   1442	pci_rc = pci_register_driver(&i5400_driver);
   1443
   1444	return (pci_rc < 0) ? pci_rc : 0;
   1445}
   1446
   1447/*
   1448 *	i5400_exit()	Module exit function
   1449 *			Unregister the driver
   1450 */
   1451static void __exit i5400_exit(void)
   1452{
   1453	edac_dbg(2, "MC:\n");
   1454	pci_unregister_driver(&i5400_driver);
   1455}
   1456
   1457module_init(i5400_init);
   1458module_exit(i5400_exit);
   1459
   1460MODULE_LICENSE("GPL");
   1461MODULE_AUTHOR("Ben Woodard <woodard@redhat.com>");
   1462MODULE_AUTHOR("Mauro Carvalho Chehab");
   1463MODULE_AUTHOR("Red Hat Inc. (https://www.redhat.com)");
   1464MODULE_DESCRIPTION("MC Driver for Intel I5400 memory controllers - "
   1465		   I5400_REVISION);
   1466
   1467module_param(edac_op_state, int, 0444);
   1468MODULE_PARM_DESC(edac_op_state, "EDAC Error Reporting state: 0=Poll,1=NMI");