16 years ago · f65ac45e20
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -183,6 +183,11 @@ void mce_log(struct mce *mce)
 
				 	set_bit(0, &mce_need_notify);
			
 
				 }
			
 
				 
			
 
				+void __weak decode_mce(struct mce *m)
			
 
				+{
			
 
				+	return;
			
 
				+}
			
 
				+
			
 
				 static void print_mce(struct mce *m)
			
 
				 {
			
 
				 	printk(KERN_EMERG
			
@@ -205,6 +210,8 @@ static void print_mce(struct mce *m)
 
				 	printk(KERN_EMERG "PROCESSOR %u:%x TIME %llu SOCKET %u APIC %x\n",
			
 
				 			m->cpuvendor, m->cpuid, m->time, m->socketid,
			
 
				 			m->apicid);
			
 
				+
			
 
				+	decode_mce(m);
			
 
				 }
			
 
				 
			
 
				 static void print_mce_head(void)
			
@@ -215,7 +222,10 @@ static void print_mce_head(void)
 
				 static void print_mce_tail(void)
			
 
				 {
			
 
				 	printk(KERN_EMERG "This is not a software problem!\n"
			
 
				-	       "Run through mcelog --ascii to decode and contact your hardware vendor\n");
			
 
				+#if (!defined(CONFIG_EDAC) || !defined(CONFIG_CPU_SUP_AMD))
			
 
				+	       "Run through mcelog --ascii to decode and contact your hardware vendor\n"
			
 
				+#endif
			
 
				+	       );
			
 
				 }
			
 
				 
			
 
				 #define PANIC_TIMEOUT 5 /* 5 seconds */
			
--- a/drivers/edac/Makefile
+++ b/drivers/edac/Makefile
@@ -17,6 +17,10 @@ ifdef CONFIG_PCI
 
				 edac_core-objs	+= edac_pci.o edac_pci_sysfs.o
			
 
				 endif
			
 
				 
			
 
				+ifdef CONFIG_CPU_SUP_AMD
			
 
				+edac_core-objs  += edac_mce_amd.o
			
 
				+endif
			
 
				+
			
 
				 obj-$(CONFIG_EDAC_AMD76X)		+= amd76x_edac.o
			
 
				 obj-$(CONFIG_EDAC_CPC925)		+= cpc925_edac.o
			
 
				 obj-$(CONFIG_EDAC_I5000)		+= i5000_edac.o
			
@@ -32,7 +36,7 @@ obj-$(CONFIG_EDAC_X38)			+= x38_edac.o
 
				 obj-$(CONFIG_EDAC_I82860)		+= i82860_edac.o
			
 
				 obj-$(CONFIG_EDAC_R82600)		+= r82600_edac.o
			
 
				 
			
 
				-amd64_edac_mod-y :=  amd64_edac_err_types.o amd64_edac.o
			
 
				+amd64_edac_mod-y := amd64_edac.o
			
 
				 amd64_edac_mod-$(CONFIG_EDAC_DEBUG) += amd64_edac_dbg.o
			
 
				 amd64_edac_mod-$(CONFIG_EDAC_AMD64_ERROR_INJECTION) += amd64_edac_inj.o
			
 
				 
			
--- a/drivers/edac/amd64_edac.c
+++ b/drivers/edac/amd64_edac.c
@@ -18,6 +18,63 @@ struct amd64_pvt;
 
				 static struct mem_ctl_info *mci_lookup[MAX_NUMNODES];
			
 
				 static struct amd64_pvt *pvt_lookup[MAX_NUMNODES];
			
 
				 
			
 
				+/*
			
 
				+ * See F2x80 for K8 and F2x[1,0]80 for Fam10 and later. The table below is only
			
 
				+ * for DDR2 DRAM mapping.
			
 
				+ */
			
 
				+u32 revf_quad_ddr2_shift[] = {
			
 
				+	0,	/* 0000b NULL DIMM (128mb) */
			
 
				+	28,	/* 0001b 256mb */
			
 
				+	29,	/* 0010b 512mb */
			
 
				+	29,	/* 0011b 512mb */
			
 
				+	29,	/* 0100b 512mb */
			
 
				+	30,	/* 0101b 1gb */
			
 
				+	30,	/* 0110b 1gb */
			
 
				+	31,	/* 0111b 2gb */
			
 
				+	31,	/* 1000b 2gb */
			
 
				+	32,	/* 1001b 4gb */
			
 
				+	32,	/* 1010b 4gb */
			
 
				+	33,	/* 1011b 8gb */
			
 
				+	0,	/* 1100b future */
			
 
				+	0,	/* 1101b future */
			
 
				+	0,	/* 1110b future */
			
 
				+	0	/* 1111b future */
			
 
				+};
			
 
				+
			
 
				+/*
			
 
				+ * Valid scrub rates for the K8 hardware memory scrubber. We map the scrubbing
			
 
				+ * bandwidth to a valid bit pattern. The 'set' operation finds the 'matching-
			
 
				+ * or higher value'.
			
 
				+ *
			
 
				+ *FIXME: Produce a better mapping/linearisation.
			
 
				+ */
			
 
				+
			
 
				+struct scrubrate scrubrates[] = {
			
 
				+	{ 0x01, 1600000000UL},
			
 
				+	{ 0x02, 800000000UL},
			
 
				+	{ 0x03, 400000000UL},
			
 
				+	{ 0x04, 200000000UL},
			
 
				+	{ 0x05, 100000000UL},
			
 
				+	{ 0x06, 50000000UL},
			
 
				+	{ 0x07, 25000000UL},
			
 
				+	{ 0x08, 12284069UL},
			
 
				+	{ 0x09, 6274509UL},
			
 
				+	{ 0x0A, 3121951UL},
			
 
				+	{ 0x0B, 1560975UL},
			
 
				+	{ 0x0C, 781440UL},
			
 
				+	{ 0x0D, 390720UL},
			
 
				+	{ 0x0E, 195300UL},
			
 
				+	{ 0x0F, 97650UL},
			
 
				+	{ 0x10, 48854UL},
			
 
				+	{ 0x11, 24427UL},
			
 
				+	{ 0x12, 12213UL},
			
 
				+	{ 0x13, 6101UL},
			
 
				+	{ 0x14, 3051UL},
			
 
				+	{ 0x15, 1523UL},
			
 
				+	{ 0x16, 761UL},
			
 
				+	{ 0x00, 0UL},        /* scrubbing off */
			
 
				+};
			
 
				+
			
 
				 /*
			
 
				  * Memory scrubber control interface. For K8, memory scrubbing is handled by
			
 
				  * hardware and can involve L2 cache, dcache as well as the main memory. With
			
@@ -693,7 +750,7 @@ static void find_csrow_limits(struct mem_ctl_info *mci, int csrow,
 
				  * specific.
			
 
				  */
			
 
				 static u64 extract_error_address(struct mem_ctl_info *mci,
			
 
				-				 struct amd64_error_info_regs *info)
			
 
				+				 struct err_regs *info)
			
 
				 {
			
 
				 	struct amd64_pvt *pvt = mci->pvt_info;
			
 
				 
			
@@ -1049,7 +1106,7 @@ static int k8_early_channel_count(struct amd64_pvt *pvt)
 
				 
			
 
				 /* extract the ERROR ADDRESS for the K8 CPUs */
			
 
				 static u64 k8_get_error_address(struct mem_ctl_info *mci,
			
 
				-				struct amd64_error_info_regs *info)
			
 
				+				struct err_regs *info)
			
 
				 {
			
 
				 	return (((u64) (info->nbeah & 0xff)) << 32) +
			
 
				 			(info->nbeal & ~0x03);
			
@@ -1092,7 +1149,7 @@ static void k8_read_dram_base_limit(struct amd64_pvt *pvt, int dram)
 
				 }
			
 
				 
			
 
				 static void k8_map_sysaddr_to_csrow(struct mem_ctl_info *mci,
			
 
				-					struct amd64_error_info_regs *info,
			
 
				+					struct err_regs *info,
			
 
				 					u64 SystemAddress)
			
 
				 {
			
 
				 	struct mem_ctl_info *src_mci;
			
@@ -1101,8 +1158,8 @@ static void k8_map_sysaddr_to_csrow(struct mem_ctl_info *mci,
 
				 	u32 page, offset;
			
 
				 
			
 
				 	/* Extract the syndrome parts and form a 16-bit syndrome */
			
 
				-	syndrome = EXTRACT_HIGH_SYNDROME(info->nbsl) << 8;
			
 
				-	syndrome |= EXTRACT_LOW_SYNDROME(info->nbsh);
			
 
				+	syndrome  = HIGH_SYNDROME(info->nbsl) << 8;
			
 
				+	syndrome |= LOW_SYNDROME(info->nbsh);
			
 
				 
			
 
				 	/* CHIPKILL enabled */
			
 
				 	if (info->nbcfg & K8_NBCFG_CHIPKILL) {
			
@@ -1311,7 +1368,7 @@ static void amd64_teardown(struct amd64_pvt *pvt)
 
				 }
			
 
				 
			
 
				 static u64 f10_get_error_address(struct mem_ctl_info *mci,
			
 
				-			struct amd64_error_info_regs *info)
			
 
				+			struct err_regs *info)
			
 
				 {
			
 
				 	return (((u64) (info->nbeah & 0xffff)) << 32) +
			
 
				 			(info->nbeal & ~0x01);
			
@@ -1688,7 +1745,7 @@ static int f10_translate_sysaddr_to_cs(struct amd64_pvt *pvt, u64 sys_addr,
 
				  * The @sys_addr is usually an error address received from the hardware.
			
 
				  */
			
 
				 static void f10_map_sysaddr_to_csrow(struct mem_ctl_info *mci,
			
 
				-				     struct amd64_error_info_regs *info,
			
 
				+				     struct err_regs *info,
			
 
				 				     u64 sys_addr)
			
 
				 {
			
 
				 	struct amd64_pvt *pvt = mci->pvt_info;
			
@@ -1701,8 +1758,8 @@ static void f10_map_sysaddr_to_csrow(struct mem_ctl_info *mci,
 
				 	if (csrow >= 0) {
			
 
				 		error_address_to_page_and_offset(sys_addr, &page, &offset);
			
 
				 
			
 
				-		syndrome = EXTRACT_HIGH_SYNDROME(info->nbsl) << 8;
			
 
				-		syndrome |= EXTRACT_LOW_SYNDROME(info->nbsh);
			
 
				+		syndrome  = HIGH_SYNDROME(info->nbsl) << 8;
			
 
				+		syndrome |= LOW_SYNDROME(info->nbsh);
			
 
				 
			
 
				 		/*
			
 
				 		 * Is CHIPKILL on? If so, then we can attempt to use the
			
@@ -2045,7 +2102,7 @@ static int get_channel_from_ecc_syndrome(unsigned short syndrome)
 
				  *	- 0: if no valid error is indicated
			
 
				  */
			
 
				 static int amd64_get_error_info_regs(struct mem_ctl_info *mci,
			
 
				-				     struct amd64_error_info_regs *regs)
			
 
				+				     struct err_regs *regs)
			
 
				 {
			
 
				 	struct amd64_pvt *pvt;
			
 
				 	struct pci_dev *misc_f3_ctl;
			
@@ -2094,10 +2151,10 @@ err_reg:
 
				  *	- 0: if no error is found
			
 
				  */
			
 
				 static int amd64_get_error_info(struct mem_ctl_info *mci,
			
 
				-				struct amd64_error_info_regs *info)
			
 
				+				struct err_regs *info)
			
 
				 {
			
 
				 	struct amd64_pvt *pvt;
			
 
				-	struct amd64_error_info_regs regs;
			
 
				+	struct err_regs regs;
			
 
				 
			
 
				 	pvt = mci->pvt_info;
			
 
				 
			
@@ -2152,48 +2209,12 @@ static int amd64_get_error_info(struct mem_ctl_info *mci,
 
				 	return 1;
			
 
				 }
			
 
				 
			
 
				-static inline void amd64_decode_gart_tlb_error(struct mem_ctl_info *mci,
			
 
				-					 struct amd64_error_info_regs *info)
			
 
				-{
			
 
				-	u32 err_code;
			
 
				-	u32 ec_tt;		/* error code transaction type (2b) */
			
 
				-	u32 ec_ll;		/* error code cache level (2b) */
			
 
				-
			
 
				-	err_code = EXTRACT_ERROR_CODE(info->nbsl);
			
 
				-	ec_ll = EXTRACT_LL_CODE(err_code);
			
 
				-	ec_tt = EXTRACT_TT_CODE(err_code);
			
 
				-
			
 
				-	amd64_mc_printk(mci, KERN_ERR,
			
 
				-		     "GART TLB event: transaction type(%s), "
			
 
				-		     "cache level(%s)\n", tt_msgs[ec_tt], ll_msgs[ec_ll]);
			
 
				-}
			
 
				-
			
 
				-static inline void amd64_decode_mem_cache_error(struct mem_ctl_info *mci,
			
 
				-				      struct amd64_error_info_regs *info)
			
 
				-{
			
 
				-	u32 err_code;
			
 
				-	u32 ec_rrrr;		/* error code memory transaction (4b) */
			
 
				-	u32 ec_tt;		/* error code transaction type (2b) */
			
 
				-	u32 ec_ll;		/* error code cache level (2b) */
			
 
				-
			
 
				-	err_code = EXTRACT_ERROR_CODE(info->nbsl);
			
 
				-	ec_ll = EXTRACT_LL_CODE(err_code);
			
 
				-	ec_tt = EXTRACT_TT_CODE(err_code);
			
 
				-	ec_rrrr = EXTRACT_RRRR_CODE(err_code);
			
 
				-
			
 
				-	amd64_mc_printk(mci, KERN_ERR,
			
 
				-		     "cache hierarchy error: memory transaction type(%s), "
			
 
				-		     "transaction type(%s), cache level(%s)\n",
			
 
				-		     rrrr_msgs[ec_rrrr], tt_msgs[ec_tt], ll_msgs[ec_ll]);
			
 
				-}
			
 
				-
			
 
				-
			
 
				 /*
			
 
				  * Handle any Correctable Errors (CEs) that have occurred. Check for valid ERROR
			
 
				  * ADDRESS and process.
			
 
				  */
			
 
				 static void amd64_handle_ce(struct mem_ctl_info *mci,
			
 
				-			    struct amd64_error_info_regs *info)
			
 
				+			    struct err_regs *info)
			
 
				 {
			
 
				 	struct amd64_pvt *pvt = mci->pvt_info;
			
 
				 	u64 SystemAddress;
			
@@ -2216,7 +2237,7 @@ static void amd64_handle_ce(struct mem_ctl_info *mci,
 
				 
			
 
				 /* Handle any Un-correctable Errors (UEs) */
			
 
				 static void amd64_handle_ue(struct mem_ctl_info *mci,
			
 
				-			    struct amd64_error_info_regs *info)
			
 
				+			    struct err_regs *info)
			
 
				 {
			
 
				 	int csrow;
			
 
				 	u64 SystemAddress;
			
@@ -2261,59 +2282,24 @@ static void amd64_handle_ue(struct mem_ctl_info *mci,
 
				 	}
			
 
				 }
			
 
				 
			
 
				-static void amd64_decode_bus_error(struct mem_ctl_info *mci,
			
 
				-				   struct amd64_error_info_regs *info)
			
 
				+static inline void __amd64_decode_bus_error(struct mem_ctl_info *mci,
			
 
				+					    struct err_regs *info)
			
 
				 {
			
 
				-	u32 err_code, ext_ec;
			
 
				-	u32 ec_pp;		/* error code participating processor (2p) */
			
 
				-	u32 ec_to;		/* error code timed out (1b) */
			
 
				-	u32 ec_rrrr;		/* error code memory transaction (4b) */
			
 
				-	u32 ec_ii;		/* error code memory or I/O (2b) */
			
 
				-	u32 ec_ll;		/* error code cache level (2b) */
			
 
				+	u32 ec  = ERROR_CODE(info->nbsl);
			
 
				+	u32 xec = EXT_ERROR_CODE(info->nbsl);
			
 
				+	int ecc_type = info->nbsh & (0x3 << 13);
			
 
				 
			
 
				-	ext_ec = EXTRACT_EXT_ERROR_CODE(info->nbsl);
			
 
				-	err_code = EXTRACT_ERROR_CODE(info->nbsl);
			
 
				-
			
 
				-	ec_ll = EXTRACT_LL_CODE(err_code);
			
 
				-	ec_ii = EXTRACT_II_CODE(err_code);
			
 
				-	ec_rrrr = EXTRACT_RRRR_CODE(err_code);
			
 
				-	ec_to = EXTRACT_TO_CODE(err_code);
			
 
				-	ec_pp = EXTRACT_PP_CODE(err_code);
			
 
				-
			
 
				-	amd64_mc_printk(mci, KERN_ERR,
			
 
				-		"BUS ERROR:\n"
			
 
				-		"  time-out(%s) mem or i/o(%s)\n"
			
 
				-		"  participating processor(%s)\n"
			
 
				-		"  memory transaction type(%s)\n"
			
 
				-		"  cache level(%s) Error Found by: %s\n",
			
 
				-		to_msgs[ec_to],
			
 
				-		ii_msgs[ec_ii],
			
 
				-		pp_msgs[ec_pp],
			
 
				-		rrrr_msgs[ec_rrrr],
			
 
				-		ll_msgs[ec_ll],
			
 
				-		(info->nbsh & K8_NBSH_ERR_SCRUBER) ?
			
 
				-			"Scrubber" : "Normal Operation");
			
 
				-
			
 
				-	/* If this was an 'observed' error, early out */
			
 
				-	if (ec_pp == K8_NBSL_PP_OBS)
			
 
				-		return;		/* We aren't the node involved */
			
 
				-
			
 
				-	/* Parse out the extended error code for ECC events */
			
 
				-	switch (ext_ec) {
			
 
				-	/* F10 changed to one Extended ECC error code */
			
 
				-	case F10_NBSL_EXT_ERR_RES:		/* Reserved field */
			
 
				-	case F10_NBSL_EXT_ERR_ECC:		/* F10 ECC ext err code */
			
 
				-		break;
			
 
				+	/* Bail early out if this was an 'observed' error */
			
 
				+	if (PP(ec) == K8_NBSL_PP_OBS)
			
 
				+		return;
			
 
				 
			
 
				-	default:
			
 
				-		amd64_mc_printk(mci, KERN_ERR, "NOT ECC: no special error "
			
 
				-					       "handling for this error\n");
			
 
				+	/* Do only ECC errors */
			
 
				+	if (xec && xec != F10_NBSL_EXT_ERR_ECC)
			
 
				 		return;
			
 
				-	}
			
 
				 
			
 
				-	if (info->nbsh & K8_NBSH_CECC)
			
 
				+	if (ecc_type == 2)
			
 
				 		amd64_handle_ce(mci, info);
			
 
				-	else if (info->nbsh & K8_NBSH_UECC)
			
 
				+	else if (ecc_type == 1)
			
 
				 		amd64_handle_ue(mci, info);
			
 
				 
			
 
				 	/*
			
@@ -2324,139 +2310,26 @@ static void amd64_decode_bus_error(struct mem_ctl_info *mci,
 
				 	 * catastrophic.
			
 
				 	 */
			
 
				 	if (info->nbsh & K8_NBSH_OVERFLOW)
			
 
				-		edac_mc_handle_ce_no_info(mci, EDAC_MOD_STR
			
 
				-					  "Error Overflow set");
			
 
				+		edac_mc_handle_ce_no_info(mci, EDAC_MOD_STR "Error Overflow");
			
 
				 }
			
 
				 
			
 
				-int amd64_process_error_info(struct mem_ctl_info *mci,
			
 
				-			     struct amd64_error_info_regs *info,
			
 
				-			     int handle_errors)
			
 
				+void amd64_decode_bus_error(int node_id, struct err_regs *regs)
			
 
				 {
			
 
				-	struct amd64_pvt *pvt;
			
 
				-	struct amd64_error_info_regs *regs;
			
 
				-	u32 err_code, ext_ec;
			
 
				-	int gart_tlb_error = 0;
			
 
				-
			
 
				-	pvt = mci->pvt_info;
			
 
				-
			
 
				-	/* If caller doesn't want us to process the error, return */
			
 
				-	if (!handle_errors)
			
 
				-		return 1;
			
 
				-
			
 
				-	regs = info;
			
 
				-
			
 
				-	debugf1("NorthBridge ERROR: mci(0x%p)\n", mci);
			
 
				-	debugf1("  MC node(%d) Error-Address(0x%.8x-%.8x)\n",
			
 
				-		pvt->mc_node_id, regs->nbeah, regs->nbeal);
			
 
				-	debugf1("  nbsh(0x%.8x) nbsl(0x%.8x)\n",
			
 
				-		regs->nbsh, regs->nbsl);
			
 
				-	debugf1("  Valid Error=%s Overflow=%s\n",
			
 
				-		(regs->nbsh & K8_NBSH_VALID_BIT) ? "True" : "False",
			
 
				-		(regs->nbsh & K8_NBSH_OVERFLOW) ? "True" : "False");
			
 
				-	debugf1("  Err Uncorrected=%s MCA Error Reporting=%s\n",
			
 
				-		(regs->nbsh & K8_NBSH_UNCORRECTED_ERR) ?
			
 
				-			"True" : "False",
			
 
				-		(regs->nbsh & K8_NBSH_ERR_ENABLE) ?
			
 
				-			"True" : "False");
			
 
				-	debugf1("  MiscErr Valid=%s ErrAddr Valid=%s PCC=%s\n",
			
 
				-		(regs->nbsh & K8_NBSH_MISC_ERR_VALID) ?
			
 
				-			"True" : "False",
			
 
				-		(regs->nbsh & K8_NBSH_VALID_ERROR_ADDR) ?
			
 
				-			"True" : "False",
			
 
				-		(regs->nbsh & K8_NBSH_PCC) ?
			
 
				-			"True" : "False");
			
 
				-	debugf1("  CECC=%s UECC=%s Found by Scruber=%s\n",
			
 
				-		(regs->nbsh & K8_NBSH_CECC) ?
			
 
				-			"True" : "False",
			
 
				-		(regs->nbsh & K8_NBSH_UECC) ?
			
 
				-			"True" : "False",
			
 
				-		(regs->nbsh & K8_NBSH_ERR_SCRUBER) ?
			
 
				-			"True" : "False");
			
 
				-	debugf1("  CORE0=%s CORE1=%s CORE2=%s CORE3=%s\n",
			
 
				-		(regs->nbsh & K8_NBSH_CORE0) ? "True" : "False",
			
 
				-		(regs->nbsh & K8_NBSH_CORE1) ? "True" : "False",
			
 
				-		(regs->nbsh & K8_NBSH_CORE2) ? "True" : "False",
			
 
				-		(regs->nbsh & K8_NBSH_CORE3) ? "True" : "False");
			
 
				-
			
 
				-
			
 
				-	err_code = EXTRACT_ERROR_CODE(regs->nbsl);
			
 
				-
			
 
				-	/* Determine which error type:
			
 
				-	 *	1) GART errors - non-fatal, developmental events
			
 
				-	 *	2) MEMORY errors
			
 
				-	 *	3) BUS errors
			
 
				-	 *	4) Unknown error
			
 
				-	 */
			
 
				-	if (TEST_TLB_ERROR(err_code)) {
			
 
				-		/*
			
 
				-		 * GART errors are intended to help graphics driver developers
			
 
				-		 * to detect bad GART PTEs. It is recommended by AMD to disable
			
 
				-		 * GART table walk error reporting by default[1] (currently
			
 
				-		 * being disabled in mce_cpu_quirks()) and according to the
			
 
				-		 * comment in mce_cpu_quirks(), such GART errors can be
			
 
				-		 * incorrectly triggered. We may see these errors anyway and
			
 
				-		 * unless requested by the user, they won't be reported.
			
 
				-		 *
			
 
				-		 * [1] section 13.10.1 on BIOS and Kernel Developers Guide for
			
 
				-		 *     AMD NPT family 0Fh processors
			
 
				-		 */
			
 
				-		if (report_gart_errors == 0)
			
 
				-			return 1;
			
 
				-
			
 
				-		/*
			
 
				-		 * Only if GART error reporting is requested should we generate
			
 
				-		 * any logs.
			
 
				-		 */
			
 
				-		gart_tlb_error = 1;
			
 
				-
			
 
				-		debugf1("GART TLB error\n");
			
 
				-		amd64_decode_gart_tlb_error(mci, info);
			
 
				-	} else if (TEST_MEM_ERROR(err_code)) {
			
 
				-		debugf1("Memory/Cache error\n");
			
 
				-		amd64_decode_mem_cache_error(mci, info);
			
 
				-	} else if (TEST_BUS_ERROR(err_code)) {
			
 
				-		debugf1("Bus (Link/DRAM) error\n");
			
 
				-		amd64_decode_bus_error(mci, info);
			
 
				-	} else {
			
 
				-		/* shouldn't reach here! */
			
 
				-		amd64_mc_printk(mci, KERN_WARNING,
			
 
				-			     "%s(): unknown MCE error 0x%x\n", __func__,
			
 
				-			     err_code);
			
 
				-	}
			
 
				-
			
 
				-	ext_ec = EXTRACT_EXT_ERROR_CODE(regs->nbsl);
			
 
				-	amd64_mc_printk(mci, KERN_ERR,
			
 
				-		"ExtErr=(0x%x) %s\n", ext_ec, ext_msgs[ext_ec]);
			
 
				+	struct mem_ctl_info *mci = mci_lookup[node_id];
			
 
				 
			
 
				-	if (((ext_ec >= F10_NBSL_EXT_ERR_CRC &&
			
 
				-			ext_ec <= F10_NBSL_EXT_ERR_TGT) ||
			
 
				-			(ext_ec == F10_NBSL_EXT_ERR_RMW)) &&
			
 
				-			EXTRACT_LDT_LINK(info->nbsh)) {
			
 
				-
			
 
				-		amd64_mc_printk(mci, KERN_ERR,
			
 
				-			"Error on hypertransport link: %s\n",
			
 
				-			htlink_msgs[
			
 
				-			EXTRACT_LDT_LINK(info->nbsh)]);
			
 
				-	}
			
 
				+	__amd64_decode_bus_error(mci, regs);
			
 
				 
			
 
				 	/*
			
 
				 	 * Check the UE bit of the NB status high register, if set generate some
			
 
				 	 * logs. If NOT a GART error, then process the event as a NO-INFO event.
			
 
				 	 * If it was a GART error, skip that process.
			
 
				+	 *
			
 
				+	 * FIXME: this should go somewhere else, if at all.
			
 
				 	 */
			
 
				-	if (regs->nbsh & K8_NBSH_UNCORRECTED_ERR) {
			
 
				-		amd64_mc_printk(mci, KERN_CRIT, "uncorrected error\n");
			
 
				-		if (!gart_tlb_error)
			
 
				-			edac_mc_handle_ue_no_info(mci, "UE bit is set\n");
			
 
				-	}
			
 
				-
			
 
				-	if (regs->nbsh & K8_NBSH_PCC)
			
 
				-		amd64_mc_printk(mci, KERN_CRIT,
			
 
				-			"PCC (processor context corrupt) set\n");
			
 
				+	if (regs->nbsh & K8_NBSH_UC_ERR && !report_gart_errors)
			
 
				+		edac_mc_handle_ue_no_info(mci, "UE bit is set");
			
 
				 
			
 
				-	return 1;
			
 
				 }
			
 
				-EXPORT_SYMBOL_GPL(amd64_process_error_info);
			
 
				 
			
 
				 /*
			
 
				  * The main polling 'check' function, called FROM the edac core to perform the
			
@@ -2464,10 +2337,12 @@ EXPORT_SYMBOL_GPL(amd64_process_error_info);
 
				  */
			
 
				 static void amd64_check(struct mem_ctl_info *mci)
			
 
				 {
			
 
				-	struct amd64_error_info_regs info;
			
 
				+	struct err_regs regs;
			
 
				 
			
 
				-	if (amd64_get_error_info(mci, &info))
			
 
				-		amd64_process_error_info(mci, &info, 1);
			
 
				+	if (amd64_get_error_info(mci, &regs)) {
			
 
				+		struct amd64_pvt *pvt = mci->pvt_info;
			
 
				+		amd_decode_nb_mce(pvt->mc_node_id, &regs, 1);
			
 
				+	}
			
 
				 }
			
 
				 
			
 
				 /*
			
@@ -3163,6 +3038,13 @@ static int amd64_init_2nd_stage(struct amd64_pvt *pvt)
 
				 
			
 
				 	mci_lookup[node_id] = mci;
			
 
				 	pvt_lookup[node_id] = NULL;
			
 
				+
			
 
				+	/* register stuff with EDAC MCE */
			
 
				+	if (report_gart_errors)
			
 
				+		amd_report_gart_errors(true);
			
 
				+
			
 
				+	amd_register_ecc_decoder(amd64_decode_bus_error);
			
 
				+
			
 
				 	return 0;
			
 
				 
			
 
				 err_add_mc:
			
@@ -3229,6 +3111,10 @@ static void __devexit amd64_remove_one_instance(struct pci_dev *pdev)
 
				 
			
 
				 	mci_lookup[pvt->mc_node_id] = NULL;
			
 
				 
			
 
				+	/* unregister from EDAC MCE */
			
 
				+	amd_report_gart_errors(false);
			
 
				+	amd_unregister_ecc_decoder(amd64_decode_bus_error);
			
 
				+
			
 
				 	/* Free the EDAC CORE resources */
			
 
				 	edac_mc_free(mci);
			
 
				 }
			
--- a/drivers/edac/amd64_edac.h
+++ b/drivers/edac/amd64_edac.h
@@ -72,6 +72,7 @@
 
				 #include <linux/edac.h>
			
 
				 #include <asm/msr.h>
			
 
				 #include "edac_core.h"
			
 
				+#include "edac_mce_amd.h"
			
 
				 
			
 
				 #define amd64_printk(level, fmt, arg...) \
			
 
				 	edac_printk(level, "amd64", fmt, ##arg)
			
@@ -303,21 +304,9 @@ enum {
 
				 #define K8_NBSL				0x48
			
 
				 
			
 
				 
			
 
				-#define EXTRACT_HIGH_SYNDROME(x)	(((x) >> 24) & 0xff)
			
 
				-#define EXTRACT_EXT_ERROR_CODE(x)	(((x) >> 16) & 0x1f)
			
 
				-
			
 
				 /* Family F10h: Normalized Extended Error Codes */
			
 
				 #define F10_NBSL_EXT_ERR_RES		0x0
			
 
				-#define F10_NBSL_EXT_ERR_CRC		0x1
			
 
				-#define F10_NBSL_EXT_ERR_SYNC		0x2
			
 
				-#define F10_NBSL_EXT_ERR_MST		0x3
			
 
				-#define F10_NBSL_EXT_ERR_TGT		0x4
			
 
				-#define F10_NBSL_EXT_ERR_GART		0x5
			
 
				-#define F10_NBSL_EXT_ERR_RMW		0x6
			
 
				-#define F10_NBSL_EXT_ERR_WDT		0x7
			
 
				 #define F10_NBSL_EXT_ERR_ECC		0x8
			
 
				-#define F10_NBSL_EXT_ERR_DEV		0x9
			
 
				-#define F10_NBSL_EXT_ERR_LINK_DATA	0xA
			
 
				 
			
 
				 /* Next two are overloaded values */
			
 
				 #define F10_NBSL_EXT_ERR_LINK_PROTO	0xB
			
@@ -348,17 +337,6 @@ enum {
 
				 #define K8_NBSL_EXT_ERR_CHIPKILL_ECC	0x8
			
 
				 #define K8_NBSL_EXT_ERR_DRAM_PARITY	0xD
			
 
				 
			
 
				-#define EXTRACT_ERROR_CODE(x)		((x) & 0xffff)
			
 
				-#define	TEST_TLB_ERROR(x)		(((x) & 0xFFF0) == 0x0010)
			
 
				-#define	TEST_MEM_ERROR(x)		(((x) & 0xFF00) == 0x0100)
			
 
				-#define	TEST_BUS_ERROR(x)		(((x) & 0xF800) == 0x0800)
			
 
				-#define	EXTRACT_TT_CODE(x)		(((x) >> 2) & 0x3)
			
 
				-#define	EXTRACT_II_CODE(x)		(((x) >> 2) & 0x3)
			
 
				-#define	EXTRACT_LL_CODE(x)		(((x) >> 0) & 0x3)
			
 
				-#define	EXTRACT_RRRR_CODE(x)		(((x) >> 4) & 0xf)
			
 
				-#define	EXTRACT_TO_CODE(x)		(((x) >> 8) & 0x1)
			
 
				-#define	EXTRACT_PP_CODE(x)		(((x) >> 9) & 0x3)
			
 
				-
			
 
				 /*
			
 
				  * The following are for BUS type errors AFTER values have been normalized by
			
 
				  * shifting right
			
@@ -368,28 +346,7 @@ enum {
 
				 #define K8_NBSL_PP_OBS			0x2
			
 
				 #define K8_NBSL_PP_GENERIC		0x3
			
 
				 
			
 
				-
			
 
				-#define K8_NBSH				0x4C
			
 
				-
			
 
				-#define K8_NBSH_VALID_BIT		BIT(31)
			
 
				-#define K8_NBSH_OVERFLOW		BIT(30)
			
 
				-#define K8_NBSH_UNCORRECTED_ERR		BIT(29)
			
 
				-#define K8_NBSH_ERR_ENABLE		BIT(28)
			
 
				-#define K8_NBSH_MISC_ERR_VALID		BIT(27)
			
 
				-#define K8_NBSH_VALID_ERROR_ADDR	BIT(26)
			
 
				-#define K8_NBSH_PCC			BIT(25)
			
 
				-#define K8_NBSH_CECC			BIT(14)
			
 
				-#define K8_NBSH_UECC			BIT(13)
			
 
				-#define K8_NBSH_ERR_SCRUBER		BIT(8)
			
 
				-#define K8_NBSH_CORE3			BIT(3)
			
 
				-#define K8_NBSH_CORE2			BIT(2)
			
 
				-#define K8_NBSH_CORE1			BIT(1)
			
 
				-#define K8_NBSH_CORE0			BIT(0)
			
 
				-
			
 
				-#define EXTRACT_LDT_LINK(x)		(((x) >> 4) & 0x7)
			
 
				 #define EXTRACT_ERR_CPU_MAP(x)		((x) & 0xF)
			
 
				-#define EXTRACT_LOW_SYNDROME(x)		(((x) >> 15) & 0xff)
			
 
				-
			
 
				 
			
 
				 #define K8_NBEAL			0x50
			
 
				 #define K8_NBEAH			0x54
			
@@ -455,23 +412,6 @@ enum amd64_chipset_families {
 
				 	F11_CPUS,
			
 
				 };
			
 
				 
			
 
				-/*
			
 
				- * Structure to hold:
			
 
				- *
			
 
				- * 1) dynamically read status and error address HW registers
			
 
				- * 2) sysfs entered values
			
 
				- * 3) MCE values
			
 
				- *
			
 
				- * Depends on entry into the modules
			
 
				- */
			
 
				-struct amd64_error_info_regs {
			
 
				-	u32 nbcfg;
			
 
				-	u32 nbsh;
			
 
				-	u32 nbsl;
			
 
				-	u32 nbeah;
			
 
				-	u32 nbeal;
			
 
				-};
			
 
				-
			
 
				 /* Error injection control structure */
			
 
				 struct error_injection {
			
 
				 	u32	section;
			
@@ -542,7 +482,7 @@ struct amd64_pvt {
 
				 	u32 online_spare;               /* On-Line spare Reg */
			
 
				 
			
 
				 	/* temp storage for when input is received from sysfs */
			
 
				-	struct amd64_error_info_regs ctl_error_info;
			
 
				+	struct err_regs ctl_error_info;
			
 
				 
			
 
				 	/* place to store error injection parameters prior to issue */
			
 
				 	struct error_injection injection;
			
@@ -601,11 +541,11 @@ struct low_ops {
 
				 	int (*early_channel_count)(struct amd64_pvt *pvt);
			
 
				 
			
 
				 	u64 (*get_error_address)(struct mem_ctl_info *mci,
			
 
				-			struct amd64_error_info_regs *info);
			
 
				+			struct err_regs *info);
			
 
				 	void (*read_dram_base_limit)(struct amd64_pvt *pvt, int dram);
			
 
				 	void (*read_dram_ctl_register)(struct amd64_pvt *pvt);
			
 
				 	void (*map_sysaddr_to_csrow)(struct mem_ctl_info *mci,
			
 
				-					struct amd64_error_info_regs *info,
			
 
				+					struct err_regs *info,
			
 
				 					u64 SystemAddr);
			
 
				 	int (*dbam_map_to_pages)(struct amd64_pvt *pvt, int dram_map);
			
 
				 };
			
@@ -637,8 +577,5 @@ static inline struct low_ops *family_ops(int index)
 
				 #define F10_MIN_SCRUB_RATE_BITS	0x5
			
 
				 #define F11_MIN_SCRUB_RATE_BITS	0x6
			
 
				 
			
 
				-int amd64_process_error_info(struct mem_ctl_info *mci,
			
 
				-			     struct amd64_error_info_regs *info,
			
 
				-			     int handle_errors);
			
 
				 int amd64_get_dram_hole_info(struct mem_ctl_info *mci, u64 *hole_base,
			
 
				 			     u64 *hole_offset, u64 *hole_size);
			
--- a/drivers/edac/amd64_edac_dbg.c
+++ b/drivers/edac/amd64_edac_dbg.c
@@ -24,7 +24,7 @@ static ssize_t amd64_nbea_store(struct mem_ctl_info *mci, const char *data,
 
				 
			
 
				 		/* Process the Mapping request */
			
 
				 		/* TODO: Add race prevention */
			
 
				-		amd64_process_error_info(mci, &pvt->ctl_error_info, 1);
			
 
				+		amd_decode_nb_mce(pvt->mc_node_id, &pvt->ctl_error_info, 1);
			
 
				 
			
 
				 		return count;
			
 
				 	}
			
--- a/drivers/edac/amd64_edac_err_types.c
+++ b/drivers/edac/amd64_edac_err_types.c
@@ -1,161 +0,0 @@
 
				-#include "amd64_edac.h"
			
 
				-
			
 
				-/*
			
 
				- * See F2x80 for K8 and F2x[1,0]80 for Fam10 and later. The table below is only
			
 
				- * for DDR2 DRAM mapping.
			
 
				- */
			
 
				-u32 revf_quad_ddr2_shift[] = {
			
 
				-	0,	/* 0000b NULL DIMM (128mb) */
			
 
				-	28,	/* 0001b 256mb */
			
 
				-	29,	/* 0010b 512mb */
			
 
				-	29,	/* 0011b 512mb */
			
 
				-	29,	/* 0100b 512mb */
			
 
				-	30,	/* 0101b 1gb */
			
 
				-	30,	/* 0110b 1gb */
			
 
				-	31,	/* 0111b 2gb */
			
 
				-	31,	/* 1000b 2gb */
			
 
				-	32,	/* 1001b 4gb */
			
 
				-	32,	/* 1010b 4gb */
			
 
				-	33,	/* 1011b 8gb */
			
 
				-	0,	/* 1100b future */
			
 
				-	0,	/* 1101b future */
			
 
				-	0,	/* 1110b future */
			
 
				-	0	/* 1111b future */
			
 
				-};
			
 
				-
			
 
				-/*
			
 
				- * Valid scrub rates for the K8 hardware memory scrubber. We map the scrubbing
			
 
				- * bandwidth to a valid bit pattern. The 'set' operation finds the 'matching-
			
 
				- * or higher value'.
			
 
				- *
			
 
				- *FIXME: Produce a better mapping/linearisation.
			
 
				- */
			
 
				-
			
 
				-struct scrubrate scrubrates[] = {
			
 
				-	{ 0x01, 1600000000UL},
			
 
				-	{ 0x02, 800000000UL},
			
 
				-	{ 0x03, 400000000UL},
			
 
				-	{ 0x04, 200000000UL},
			
 
				-	{ 0x05, 100000000UL},
			
 
				-	{ 0x06, 50000000UL},
			
 
				-	{ 0x07, 25000000UL},
			
 
				-	{ 0x08, 12284069UL},
			
 
				-	{ 0x09, 6274509UL},
			
 
				-	{ 0x0A, 3121951UL},
			
 
				-	{ 0x0B, 1560975UL},
			
 
				-	{ 0x0C, 781440UL},
			
 
				-	{ 0x0D, 390720UL},
			
 
				-	{ 0x0E, 195300UL},
			
 
				-	{ 0x0F, 97650UL},
			
 
				-	{ 0x10, 48854UL},
			
 
				-	{ 0x11, 24427UL},
			
 
				-	{ 0x12, 12213UL},
			
 
				-	{ 0x13, 6101UL},
			
 
				-	{ 0x14, 3051UL},
			
 
				-	{ 0x15, 1523UL},
			
 
				-	{ 0x16, 761UL},
			
 
				-	{ 0x00, 0UL},        /* scrubbing off */
			
 
				-};
			
 
				-
			
 
				-/*
			
 
				- * string representation for the different MCA reported error types, see F3x48
			
 
				- * or MSR0000_0411.
			
 
				- */
			
 
				-const char *tt_msgs[] = {        /* transaction type */
			
 
				-	"instruction",
			
 
				-	"data",
			
 
				-	"generic",
			
 
				-	"reserved"
			
 
				-};
			
 
				-
			
 
				-const char *ll_msgs[] = {	/* cache level */
			
 
				-	"L0",
			
 
				-	"L1",
			
 
				-	"L2",
			
 
				-	"L3/generic"
			
 
				-};
			
 
				-
			
 
				-const char *rrrr_msgs[] = {
			
 
				-	"generic",
			
 
				-	"generic read",
			
 
				-	"generic write",
			
 
				-	"data read",
			
 
				-	"data write",
			
 
				-	"inst fetch",
			
 
				-	"prefetch",
			
 
				-	"evict",
			
 
				-	"snoop",
			
 
				-	"reserved RRRR= 9",
			
 
				-	"reserved RRRR= 10",
			
 
				-	"reserved RRRR= 11",
			
 
				-	"reserved RRRR= 12",
			
 
				-	"reserved RRRR= 13",
			
 
				-	"reserved RRRR= 14",
			
 
				-	"reserved RRRR= 15"
			
 
				-};
			
 
				-
			
 
				-const char *pp_msgs[] = {	/* participating processor */
			
 
				-	"local node originated (SRC)",
			
 
				-	"local node responded to request (RES)",
			
 
				-	"local node observed as 3rd party (OBS)",
			
 
				-	"generic"
			
 
				-};
			
 
				-
			
 
				-const char *to_msgs[] = {
			
 
				-	"no timeout",
			
 
				-	"timed out"
			
 
				-};
			
 
				-
			
 
				-const char *ii_msgs[] = {	/* memory or i/o */
			
 
				-	"mem access",
			
 
				-	"reserved",
			
 
				-	"i/o access",
			
 
				-	"generic"
			
 
				-};
			
 
				-
			
 
				-/* Map the 5 bits of Extended Error code to the string table. */
			
 
				-const char *ext_msgs[] = {	/* extended error */
			
 
				-	"K8 ECC error/F10 reserved",	/* 0_0000b */
			
 
				-	"CRC error",			/* 0_0001b */
			
 
				-	"sync error",			/* 0_0010b */
			
 
				-	"mst abort",			/* 0_0011b */
			
 
				-	"tgt abort",			/* 0_0100b */
			
 
				-	"GART error",			/* 0_0101b */
			
 
				-	"RMW error",			/* 0_0110b */
			
 
				-	"Wdog timer error",		/* 0_0111b */
			
 
				-	"F10-ECC/K8-Chipkill error",	/* 0_1000b */
			
 
				-	"DEV Error",			/* 0_1001b */
			
 
				-	"Link Data error",		/* 0_1010b */
			
 
				-	"Link or L3 Protocol error",	/* 0_1011b */
			
 
				-	"NB Array error",		/* 0_1100b */
			
 
				-	"DRAM Parity error",		/* 0_1101b */
			
 
				-	"Link Retry/GART Table Walk/DEV Table Walk error", /* 0_1110b */
			
 
				-	"Res 0x0ff error",		/* 0_1111b */
			
 
				-	"Res 0x100 error",		/* 1_0000b */
			
 
				-	"Res 0x101 error",		/* 1_0001b */
			
 
				-	"Res 0x102 error",		/* 1_0010b */
			
 
				-	"Res 0x103 error",		/* 1_0011b */
			
 
				-	"Res 0x104 error",		/* 1_0100b */
			
 
				-	"Res 0x105 error",		/* 1_0101b */
			
 
				-	"Res 0x106 error",		/* 1_0110b */
			
 
				-	"Res 0x107 error",		/* 1_0111b */
			
 
				-	"Res 0x108 error",		/* 1_1000b */
			
 
				-	"Res 0x109 error",		/* 1_1001b */
			
 
				-	"Res 0x10A error",		/* 1_1010b */
			
 
				-	"Res 0x10B error",		/* 1_1011b */
			
 
				-	"L3 Cache Data error",		/* 1_1100b */
			
 
				-	"L3 CacheTag error",		/* 1_1101b */
			
 
				-	"L3 Cache LRU error",		/* 1_1110b */
			
 
				-	"Res 0x1FF error"		/* 1_1111b */
			
 
				-};
			
 
				-
			
 
				-const char *htlink_msgs[] = {
			
 
				-	"none",
			
 
				-	"1",
			
 
				-	"2",
			
 
				-	"1 2",
			
 
				-	"3",
			
 
				-	"1 3",
			
 
				-	"2 3",
			
 
				-	"1 2 3"
			
 
				-};
			
--- a/drivers/edac/edac_mce_amd.c
+++ b/drivers/edac/edac_mce_amd.c
@@ -0,0 +1,422 @@
 
				+#include <linux/module.h>
			
 
				+#include "edac_mce_amd.h"
			
 
				+
			
 
				+static bool report_gart_errors;
			
 
				+static void (*nb_bus_decoder)(int node_id, struct err_regs *regs);
			
 
				+
			
 
				+void amd_report_gart_errors(bool v)
			
 
				+{
			
 
				+	report_gart_errors = v;
			
 
				+}
			
 
				+EXPORT_SYMBOL_GPL(amd_report_gart_errors);
			
 
				+
			
 
				+void amd_register_ecc_decoder(void (*f)(int, struct err_regs *))
			
 
				+{
			
 
				+	nb_bus_decoder = f;
			
 
				+}
			
 
				+EXPORT_SYMBOL_GPL(amd_register_ecc_decoder);
			
 
				+
			
 
				+void amd_unregister_ecc_decoder(void (*f)(int, struct err_regs *))
			
 
				+{
			
 
				+	if (nb_bus_decoder) {
			
 
				+		WARN_ON(nb_bus_decoder != f);
			
 
				+
			
 
				+		nb_bus_decoder = NULL;
			
 
				+	}
			
 
				+}
			
 
				+EXPORT_SYMBOL_GPL(amd_unregister_ecc_decoder);
			
 
				+
			
 
				+/*
			
 
				+ * string representation for the different MCA reported error types, see F3x48
			
 
				+ * or MSR0000_0411.
			
 
				+ */
			
 
				+const char *tt_msgs[] = {        /* transaction type */
			
 
				+	"instruction",
			
 
				+	"data",
			
 
				+	"generic",
			
 
				+	"reserved"
			
 
				+};
			
 
				+EXPORT_SYMBOL_GPL(tt_msgs);
			
 
				+
			
 
				+const char *ll_msgs[] = {	/* cache level */
			
 
				+	"L0",
			
 
				+	"L1",
			
 
				+	"L2",
			
 
				+	"L3/generic"
			
 
				+};
			
 
				+EXPORT_SYMBOL_GPL(ll_msgs);
			
 
				+
			
 
				+const char *rrrr_msgs[] = {
			
 
				+	"generic",
			
 
				+	"generic read",
			
 
				+	"generic write",
			
 
				+	"data read",
			
 
				+	"data write",
			
 
				+	"inst fetch",
			
 
				+	"prefetch",
			
 
				+	"evict",
			
 
				+	"snoop",
			
 
				+	"reserved RRRR= 9",
			
 
				+	"reserved RRRR= 10",
			
 
				+	"reserved RRRR= 11",
			
 
				+	"reserved RRRR= 12",
			
 
				+	"reserved RRRR= 13",
			
 
				+	"reserved RRRR= 14",
			
 
				+	"reserved RRRR= 15"
			
 
				+};
			
 
				+EXPORT_SYMBOL_GPL(rrrr_msgs);
			
 
				+
			
 
				+const char *pp_msgs[] = {	/* participating processor */
			
 
				+	"local node originated (SRC)",
			
 
				+	"local node responded to request (RES)",
			
 
				+	"local node observed as 3rd party (OBS)",
			
 
				+	"generic"
			
 
				+};
			
 
				+EXPORT_SYMBOL_GPL(pp_msgs);
			
 
				+
			
 
				+const char *to_msgs[] = {
			
 
				+	"no timeout",
			
 
				+	"timed out"
			
 
				+};
			
 
				+EXPORT_SYMBOL_GPL(to_msgs);
			
 
				+
			
 
				+const char *ii_msgs[] = {	/* memory or i/o */
			
 
				+	"mem access",
			
 
				+	"reserved",
			
 
				+	"i/o access",
			
 
				+	"generic"
			
 
				+};
			
 
				+EXPORT_SYMBOL_GPL(ii_msgs);
			
 
				+
			
 
				+/*
			
 
				+ * Map the 4 or 5 (family-specific) bits of Extended Error code to the
			
 
				+ * string table.
			
 
				+ */
			
 
				+const char *ext_msgs[] = {
			
 
				+	"K8 ECC error",					/* 0_0000b */
			
 
				+	"CRC error on link",				/* 0_0001b */
			
 
				+	"Sync error packets on link",			/* 0_0010b */
			
 
				+	"Master Abort during link operation",		/* 0_0011b */
			
 
				+	"Target Abort during link operation",		/* 0_0100b */
			
 
				+	"Invalid GART PTE entry during table walk",	/* 0_0101b */
			
 
				+	"Unsupported atomic RMW command received",	/* 0_0110b */
			
 
				+	"WDT error: NB transaction timeout",		/* 0_0111b */
			
 
				+	"ECC/ChipKill ECC error",			/* 0_1000b */
			
 
				+	"SVM DEV Error",				/* 0_1001b */
			
 
				+	"Link Data error",				/* 0_1010b */
			
 
				+	"Link/L3/Probe Filter Protocol error",		/* 0_1011b */
			
 
				+	"NB Internal Arrays Parity error",		/* 0_1100b */
			
 
				+	"DRAM Address/Control Parity error",		/* 0_1101b */
			
 
				+	"Link Transmission error",			/* 0_1110b */
			
 
				+	"GART/DEV Table Walk Data error"		/* 0_1111b */
			
 
				+	"Res 0x100 error",				/* 1_0000b */
			
 
				+	"Res 0x101 error",				/* 1_0001b */
			
 
				+	"Res 0x102 error",				/* 1_0010b */
			
 
				+	"Res 0x103 error",				/* 1_0011b */
			
 
				+	"Res 0x104 error",				/* 1_0100b */
			
 
				+	"Res 0x105 error",				/* 1_0101b */
			
 
				+	"Res 0x106 error",				/* 1_0110b */
			
 
				+	"Res 0x107 error",				/* 1_0111b */
			
 
				+	"Res 0x108 error",				/* 1_1000b */
			
 
				+	"Res 0x109 error",				/* 1_1001b */
			
 
				+	"Res 0x10A error",				/* 1_1010b */
			
 
				+	"Res 0x10B error",				/* 1_1011b */
			
 
				+	"ECC error in L3 Cache Data",			/* 1_1100b */
			
 
				+	"L3 Cache Tag error",				/* 1_1101b */
			
 
				+	"L3 Cache LRU Parity error",			/* 1_1110b */
			
 
				+	"Probe Filter error"				/* 1_1111b */
			
 
				+};
			
 
				+EXPORT_SYMBOL_GPL(ext_msgs);
			
 
				+
			
 
				+static void amd_decode_dc_mce(u64 mc0_status)
			
 
				+{
			
 
				+	u32 ec  = mc0_status & 0xffff;
			
 
				+	u32 xec = (mc0_status >> 16) & 0xf;
			
 
				+
			
 
				+	pr_emerg(" Data Cache Error");
			
 
				+
			
 
				+	if (xec == 1 && TLB_ERROR(ec))
			
 
				+		pr_cont(": %s TLB multimatch.\n", LL_MSG(ec));
			
 
				+	else if (xec == 0) {
			
 
				+		if (mc0_status & (1ULL << 40))
			
 
				+			pr_cont(" during Data Scrub.\n");
			
 
				+		else if (TLB_ERROR(ec))
			
 
				+			pr_cont(": %s TLB parity error.\n", LL_MSG(ec));
			
 
				+		else if (MEM_ERROR(ec)) {
			
 
				+			u8 ll   = ec & 0x3;
			
 
				+			u8 tt   = (ec >> 2) & 0x3;
			
 
				+			u8 rrrr = (ec >> 4) & 0xf;
			
 
				+
			
 
				+			/* see F10h BKDG (31116), Table 92. */
			
 
				+			if (ll == 0x1) {
			
 
				+				if (tt != 0x1)
			
 
				+					goto wrong_dc_mce;
			
 
				+
			
 
				+				pr_cont(": Data/Tag %s error.\n", RRRR_MSG(ec));
			
 
				+
			
 
				+			} else if (ll == 0x2 && rrrr == 0x3)
			
 
				+				pr_cont(" during L1 linefill from L2.\n");
			
 
				+			else
			
 
				+				goto wrong_dc_mce;
			
 
				+		} else if (BUS_ERROR(ec) && boot_cpu_data.x86 == 0xf)
			
 
				+			pr_cont(" during system linefill.\n");
			
 
				+		else
			
 
				+			goto wrong_dc_mce;
			
 
				+	} else
			
 
				+		goto wrong_dc_mce;
			
 
				+
			
 
				+	return;
			
 
				+
			
 
				+wrong_dc_mce:
			
 
				+	pr_warning("Corrupted DC MCE info?\n");
			
 
				+}
			
 
				+
			
 
				+static void amd_decode_ic_mce(u64 mc1_status)
			
 
				+{
			
 
				+	u32 ec  = mc1_status & 0xffff;
			
 
				+	u32 xec = (mc1_status >> 16) & 0xf;
			
 
				+
			
 
				+	pr_emerg(" Instruction Cache Error");
			
 
				+
			
 
				+	if (xec == 1 && TLB_ERROR(ec))
			
 
				+		pr_cont(": %s TLB multimatch.\n", LL_MSG(ec));
			
 
				+	else if (xec == 0) {
			
 
				+		if (TLB_ERROR(ec))
			
 
				+			pr_cont(": %s TLB Parity error.\n", LL_MSG(ec));
			
 
				+		else if (BUS_ERROR(ec)) {
			
 
				+			if (boot_cpu_data.x86 == 0xf &&
			
 
				+			    (mc1_status & (1ULL << 58)))
			
 
				+				pr_cont(" during system linefill.\n");
			
 
				+			else
			
 
				+				pr_cont(" during attempted NB data read.\n");
			
 
				+		} else if (MEM_ERROR(ec)) {
			
 
				+			u8 ll   = ec & 0x3;
			
 
				+			u8 rrrr = (ec >> 4) & 0xf;
			
 
				+
			
 
				+			if (ll == 0x2)
			
 
				+				pr_cont(" during a linefill from L2.\n");
			
 
				+			else if (ll == 0x1) {
			
 
				+
			
 
				+				switch (rrrr) {
			
 
				+				case 0x5:
			
 
				+					pr_cont(": Parity error during "
			
 
				+					       "data load.\n");
			
 
				+					break;
			
 
				+
			
 
				+				case 0x7:
			
 
				+					pr_cont(": Copyback Parity/Victim"
			
 
				+						" error.\n");
			
 
				+					break;
			
 
				+
			
 
				+				case 0x8:
			
 
				+					pr_cont(": Tag Snoop error.\n");
			
 
				+					break;
			
 
				+
			
 
				+				default:
			
 
				+					goto wrong_ic_mce;
			
 
				+					break;
			
 
				+				}
			
 
				+			}
			
 
				+		} else
			
 
				+			goto wrong_ic_mce;
			
 
				+	} else
			
 
				+		goto wrong_ic_mce;
			
 
				+
			
 
				+	return;
			
 
				+
			
 
				+wrong_ic_mce:
			
 
				+	pr_warning("Corrupted IC MCE info?\n");
			
 
				+}
			
 
				+
			
 
				+static void amd_decode_bu_mce(u64 mc2_status)
			
 
				+{
			
 
				+	u32 ec = mc2_status & 0xffff;
			
 
				+	u32 xec = (mc2_status >> 16) & 0xf;
			
 
				+
			
 
				+	pr_emerg(" Bus Unit Error");
			
 
				+
			
 
				+	if (xec == 0x1)
			
 
				+		pr_cont(" in the write data buffers.\n");
			
 
				+	else if (xec == 0x3)
			
 
				+		pr_cont(" in the victim data buffers.\n");
			
 
				+	else if (xec == 0x2 && MEM_ERROR(ec))
			
 
				+		pr_cont(": %s error in the L2 cache tags.\n", RRRR_MSG(ec));
			
 
				+	else if (xec == 0x0) {
			
 
				+		if (TLB_ERROR(ec))
			
 
				+			pr_cont(": %s error in a Page Descriptor Cache or "
			
 
				+				"Guest TLB.\n", TT_MSG(ec));
			
 
				+		else if (BUS_ERROR(ec))
			
 
				+			pr_cont(": %s/ECC error in data read from NB: %s.\n",
			
 
				+				RRRR_MSG(ec), PP_MSG(ec));
			
 
				+		else if (MEM_ERROR(ec)) {
			
 
				+			u8 rrrr = (ec >> 4) & 0xf;
			
 
				+
			
 
				+			if (rrrr >= 0x7)
			
 
				+				pr_cont(": %s error during data copyback.\n",
			
 
				+					RRRR_MSG(ec));
			
 
				+			else if (rrrr <= 0x1)
			
 
				+				pr_cont(": %s parity/ECC error during data "
			
 
				+					"access from L2.\n", RRRR_MSG(ec));
			
 
				+			else
			
 
				+				goto wrong_bu_mce;
			
 
				+		} else
			
 
				+			goto wrong_bu_mce;
			
 
				+	} else
			
 
				+		goto wrong_bu_mce;
			
 
				+
			
 
				+	return;
			
 
				+
			
 
				+wrong_bu_mce:
			
 
				+	pr_warning("Corrupted BU MCE info?\n");
			
 
				+}
			
 
				+
			
 
				+static void amd_decode_ls_mce(u64 mc3_status)
			
 
				+{
			
 
				+	u32 ec  = mc3_status & 0xffff;
			
 
				+	u32 xec = (mc3_status >> 16) & 0xf;
			
 
				+
			
 
				+	pr_emerg(" Load Store Error");
			
 
				+
			
 
				+	if (xec == 0x0) {
			
 
				+		u8 rrrr = (ec >> 4) & 0xf;
			
 
				+
			
 
				+		if (!BUS_ERROR(ec) || (rrrr != 0x3 && rrrr != 0x4))
			
 
				+			goto wrong_ls_mce;
			
 
				+
			
 
				+		pr_cont(" during %s.\n", RRRR_MSG(ec));
			
 
				+	}
			
 
				+	return;
			
 
				+
			
 
				+wrong_ls_mce:
			
 
				+	pr_warning("Corrupted LS MCE info?\n");
			
 
				+}
			
 
				+
			
 
				+void amd_decode_nb_mce(int node_id, struct err_regs *regs, int handle_errors)
			
 
				+{
			
 
				+	u32 ec  = ERROR_CODE(regs->nbsl);
			
 
				+	u32 xec = EXT_ERROR_CODE(regs->nbsl);
			
 
				+
			
 
				+	if (!handle_errors)
			
 
				+		return;
			
 
				+
			
 
				+	pr_emerg(" Northbridge Error, node %d", node_id);
			
 
				+
			
 
				+	/*
			
 
				+	 * F10h, revD can disable ErrCpu[3:0] so check that first and also the
			
 
				+	 * value encoding has changed so interpret those differently
			
 
				+	 */
			
 
				+	if ((boot_cpu_data.x86 == 0x10) &&
			
 
				+	    (boot_cpu_data.x86_model > 8)) {
			
 
				+		if (regs->nbsh & K8_NBSH_ERR_CPU_VAL)
			
 
				+			pr_cont(", core: %u\n", (u8)(regs->nbsh & 0xf));
			
 
				+	} else {
			
 
				+		pr_cont(", core: %d\n", ilog2((regs->nbsh & 0xf)));
			
 
				+	}
			
 
				+
			
 
				+
			
 
				+	pr_emerg("%s.\n", EXT_ERR_MSG(xec));
			
 
				+
			
 
				+	if (BUS_ERROR(ec) && nb_bus_decoder)
			
 
				+		nb_bus_decoder(node_id, regs);
			
 
				+}
			
 
				+EXPORT_SYMBOL_GPL(amd_decode_nb_mce);
			
 
				+
			
 
				+static void amd_decode_fr_mce(u64 mc5_status)
			
 
				+{
			
 
				+	/* we have only one error signature so match all fields at once. */
			
 
				+	if ((mc5_status & 0xffff) == 0x0f0f)
			
 
				+		pr_emerg(" FR Error: CPU Watchdog timer expire.\n");
			
 
				+	else
			
 
				+		pr_warning("Corrupted FR MCE info?\n");
			
 
				+}
			
 
				+
			
 
				+static inline void amd_decode_err_code(unsigned int ec)
			
 
				+{
			
 
				+	if (TLB_ERROR(ec)) {
			
 
				+		/*
			
 
				+		 * GART errors are intended to help graphics driver developers
			
 
				+		 * to detect bad GART PTEs. It is recommended by AMD to disable
			
 
				+		 * GART table walk error reporting by default[1] (currently
			
 
				+		 * being disabled in mce_cpu_quirks()) and according to the
			
 
				+		 * comment in mce_cpu_quirks(), such GART errors can be
			
 
				+		 * incorrectly triggered. We may see these errors anyway and
			
 
				+		 * unless requested by the user, they won't be reported.
			
 
				+		 *
			
 
				+		 * [1] section 13.10.1 on BIOS and Kernel Developers Guide for
			
 
				+		 *     AMD NPT family 0Fh processors
			
 
				+		 */
			
 
				+		if (!report_gart_errors)
			
 
				+			return;
			
 
				+
			
 
				+		pr_emerg(" Transaction: %s, Cache Level %s\n",
			
 
				+			 TT_MSG(ec), LL_MSG(ec));
			
 
				+	} else if (MEM_ERROR(ec)) {
			
 
				+		pr_emerg(" Transaction: %s, Type: %s, Cache Level: %s",
			
 
				+			 RRRR_MSG(ec), TT_MSG(ec), LL_MSG(ec));
			
 
				+	} else if (BUS_ERROR(ec)) {
			
 
				+		pr_emerg(" Transaction type: %s(%s), %s, Cache Level: %s, "
			
 
				+			 "Participating Processor: %s\n",
			
 
				+			  RRRR_MSG(ec), II_MSG(ec), TO_MSG(ec), LL_MSG(ec),
			
 
				+			  PP_MSG(ec));
			
 
				+	} else
			
 
				+		pr_warning("Huh? Unknown MCE error 0x%x\n", ec);
			
 
				+}
			
 
				+
			
 
				+void decode_mce(struct mce *m)
			
 
				+{
			
 
				+	struct err_regs regs;
			
 
				+	int node, ecc;
			
 
				+
			
 
				+	pr_emerg("MC%d_STATUS: ", m->bank);
			
 
				+
			
 
				+	pr_cont("%sorrected error, report: %s, MiscV: %svalid, "
			
 
				+		 "CPU context corrupt: %s",
			
 
				+		 ((m->status & MCI_STATUS_UC) ? "Unc"  : "C"),
			
 
				+		 ((m->status & MCI_STATUS_EN) ? "yes"  : "no"),
			
 
				+		 ((m->status & MCI_STATUS_MISCV) ? ""  : "in"),
			
 
				+		 ((m->status & MCI_STATUS_PCC) ? "yes" : "no"));
			
 
				+
			
 
				+	/* do the two bits[14:13] together */
			
 
				+	ecc = m->status & (3ULL << 45);
			
 
				+	if (ecc)
			
 
				+		pr_cont(", %sECC Error", ((ecc == 2) ? "C" : "U"));
			
 
				+
			
 
				+	pr_cont("\n");
			
 
				+
			
 
				+	switch (m->bank) {
			
 
				+	case 0:
			
 
				+		amd_decode_dc_mce(m->status);
			
 
				+		break;
			
 
				+
			
 
				+	case 1:
			
 
				+		amd_decode_ic_mce(m->status);
			
 
				+		break;
			
 
				+
			
 
				+	case 2:
			
 
				+		amd_decode_bu_mce(m->status);
			
 
				+		break;
			
 
				+
			
 
				+	case 3:
			
 
				+		amd_decode_ls_mce(m->status);
			
 
				+		break;
			
 
				+
			
 
				+	case 4:
			
 
				+		regs.nbsl  = (u32) m->status;
			
 
				+		regs.nbsh  = (u32)(m->status >> 32);
			
 
				+		regs.nbeal = (u32) m->addr;
			
 
				+		regs.nbeah = (u32)(m->addr >> 32);
			
 
				+		node       = per_cpu(cpu_llc_id, m->extcpu);
			
 
				+
			
 
				+		amd_decode_nb_mce(node, &regs, 1);
			
 
				+		break;
			
 
				+
			
 
				+	case 5:
			
 
				+		amd_decode_fr_mce(m->status);
			
 
				+		break;
			
 
				+
			
 
				+	default:
			
 
				+		break;
			
 
				+	}
			
 
				+
			
 
				+	amd_decode_err_code(m->status & 0xffff);
			
 
				+}
			
--- a/drivers/edac/edac_mce_amd.h
+++ b/drivers/edac/edac_mce_amd.h
@@ -0,0 +1,69 @@
 
				+#ifndef _EDAC_MCE_AMD_H
			
 
				+#define _EDAC_MCE_AMD_H
			
 
				+
			
 
				+#include <asm/mce.h>
			
 
				+
			
 
				+#define ERROR_CODE(x)			((x) & 0xffff)
			
 
				+#define EXT_ERROR_CODE(x)		(((x) >> 16) & 0x1f)
			
 
				+#define EXT_ERR_MSG(x)			ext_msgs[EXT_ERROR_CODE(x)]
			
 
				+
			
 
				+#define LOW_SYNDROME(x)			(((x) >> 15) & 0xff)
			
 
				+#define HIGH_SYNDROME(x)		(((x) >> 24) & 0xff)
			
 
				+
			
 
				+#define TLB_ERROR(x)			(((x) & 0xFFF0) == 0x0010)
			
 
				+#define MEM_ERROR(x)			(((x) & 0xFF00) == 0x0100)
			
 
				+#define BUS_ERROR(x)			(((x) & 0xF800) == 0x0800)
			
 
				+
			
 
				+#define TT(x)				(((x) >> 2) & 0x3)
			
 
				+#define TT_MSG(x)			tt_msgs[TT(x)]
			
 
				+#define II(x)				(((x) >> 2) & 0x3)
			
 
				+#define II_MSG(x)			ii_msgs[II(x)]
			
 
				+#define LL(x)				(((x) >> 0) & 0x3)
			
 
				+#define LL_MSG(x)			ll_msgs[LL(x)]
			
 
				+#define RRRR(x)				(((x) >> 4) & 0xf)
			
 
				+#define RRRR_MSG(x)			rrrr_msgs[RRRR(x)]
			
 
				+#define TO(x)				(((x) >> 8) & 0x1)
			
 
				+#define TO_MSG(x)			to_msgs[TO(x)]
			
 
				+#define PP(x)				(((x) >> 9) & 0x3)
			
 
				+#define PP_MSG(x)			pp_msgs[PP(x)]
			
 
				+
			
 
				+#define K8_NBSH				0x4C
			
 
				+
			
 
				+#define K8_NBSH_VALID_BIT		BIT(31)
			
 
				+#define K8_NBSH_OVERFLOW		BIT(30)
			
 
				+#define K8_NBSH_UC_ERR			BIT(29)
			
 
				+#define K8_NBSH_ERR_EN			BIT(28)
			
 
				+#define K8_NBSH_MISCV			BIT(27)
			
 
				+#define K8_NBSH_VALID_ERROR_ADDR	BIT(26)
			
 
				+#define K8_NBSH_PCC			BIT(25)
			
 
				+#define K8_NBSH_ERR_CPU_VAL		BIT(24)
			
 
				+#define K8_NBSH_CECC			BIT(14)
			
 
				+#define K8_NBSH_UECC			BIT(13)
			
 
				+#define K8_NBSH_ERR_SCRUBER		BIT(8)
			
 
				+
			
 
				+extern const char *tt_msgs[];
			
 
				+extern const char *ll_msgs[];
			
 
				+extern const char *rrrr_msgs[];
			
 
				+extern const char *pp_msgs[];
			
 
				+extern const char *to_msgs[];
			
 
				+extern const char *ii_msgs[];
			
 
				+extern const char *ext_msgs[];
			
 
				+
			
 
				+/*
			
 
				+ * relevant NB regs
			
 
				+ */
			
 
				+struct err_regs {
			
 
				+	u32 nbcfg;
			
 
				+	u32 nbsh;
			
 
				+	u32 nbsl;
			
 
				+	u32 nbeah;
			
 
				+	u32 nbeal;
			
 
				+};
			
 
				+
			
 
				+
			
 
				+void amd_report_gart_errors(bool);
			
 
				+void amd_register_ecc_decoder(void (*f)(int, struct err_regs *));
			
 
				+void amd_unregister_ecc_decoder(void (*f)(int, struct err_regs *));
			
 
				+void amd_decode_nb_mce(int, struct err_regs *, int);
			
 
				+
			
 
				+#endif /* _EDAC_MCE_AMD_H */