|
@@ -22,6 +22,10 @@ struct ghes_edac_pvt {
|
|
|
struct list_head list;
|
|
|
struct ghes *ghes;
|
|
|
struct mem_ctl_info *mci;
|
|
|
+
|
|
|
+ /* Buffers for the error handling routine */
|
|
|
+ char other_detail[160];
|
|
|
+ char msg[80];
|
|
|
};
|
|
|
|
|
|
static LIST_HEAD(ghes_reglist);
|
|
@@ -186,6 +190,7 @@ void ghes_edac_report_mem_error(struct ghes *ghes, int sev,
|
|
|
struct edac_raw_error_desc *e;
|
|
|
struct mem_ctl_info *mci;
|
|
|
struct ghes_edac_pvt *pvt = NULL;
|
|
|
+ char *p;
|
|
|
|
|
|
list_for_each_entry(pvt, &ghes_reglist, list) {
|
|
|
if (ghes == pvt->ghes)
|
|
@@ -201,15 +206,14 @@ void ghes_edac_report_mem_error(struct ghes *ghes, int sev,
|
|
|
/* Cleans the error report buffer */
|
|
|
memset(e, 0, sizeof (*e));
|
|
|
e->error_count = 1;
|
|
|
- e->msg = "APEI";
|
|
|
- strcpy(e->label, "unknown");
|
|
|
- e->other_detail = "";
|
|
|
-
|
|
|
- if (mem_err->validation_bits & CPER_MEM_VALID_PHYSICAL_ADDRESS) {
|
|
|
- e->page_frame_number = mem_err->physical_addr >> PAGE_SHIFT;
|
|
|
- e->offset_in_page = mem_err->physical_addr & ~PAGE_MASK;
|
|
|
- e->grain = ~(mem_err->physical_addr_mask & ~PAGE_MASK);
|
|
|
- }
|
|
|
+ strcpy(e->label, "unknown label");
|
|
|
+ e->msg = pvt->msg;
|
|
|
+ e->other_detail = pvt->other_detail;
|
|
|
+ e->top_layer = -1;
|
|
|
+ e->mid_layer = -1;
|
|
|
+ e->low_layer = -1;
|
|
|
+ *pvt->other_detail = '\0';
|
|
|
+ *pvt->msg = '\0';
|
|
|
|
|
|
switch (sev) {
|
|
|
case GHES_SEV_CORRECTED:
|
|
@@ -226,12 +230,173 @@ void ghes_edac_report_mem_error(struct ghes *ghes, int sev,
|
|
|
type = HW_EVENT_ERR_INFO;
|
|
|
}
|
|
|
|
|
|
- sprintf(e->location,
|
|
|
- "node:%d card:%d module:%d bank:%d device:%d row: %d column:%d bit_pos:%d",
|
|
|
- mem_err->node, mem_err->card, mem_err->module,
|
|
|
- mem_err->bank, mem_err->device, mem_err->row, mem_err->column,
|
|
|
- mem_err->bit_pos);
|
|
|
- edac_dbg(3, "error at location %s\n", e->location);
|
|
|
+ edac_dbg(1, "error validation_bits: 0x%08llx\n",
|
|
|
+ (long long)mem_err->validation_bits);
|
|
|
+
|
|
|
+ /* Error type, mapped on e->msg */
|
|
|
+ if (mem_err->validation_bits & CPER_MEM_VALID_ERROR_TYPE) {
|
|
|
+ p = pvt->msg;
|
|
|
+ switch (mem_err->error_type) {
|
|
|
+ case 0:
|
|
|
+ p += sprintf(p, "Unknown");
|
|
|
+ break;
|
|
|
+ case 1:
|
|
|
+ p += sprintf(p, "No error");
|
|
|
+ break;
|
|
|
+ case 2:
|
|
|
+ p += sprintf(p, "Single-bit ECC");
|
|
|
+ break;
|
|
|
+ case 3:
|
|
|
+ p += sprintf(p, "Multi-bit ECC");
|
|
|
+ break;
|
|
|
+ case 4:
|
|
|
+ p += sprintf(p, "Single-symbol ChipKill ECC");
|
|
|
+ break;
|
|
|
+ case 5:
|
|
|
+ p += sprintf(p, "Multi-symbol ChipKill ECC");
|
|
|
+ break;
|
|
|
+ case 6:
|
|
|
+ p += sprintf(p, "Master abort");
|
|
|
+ break;
|
|
|
+ case 7:
|
|
|
+ p += sprintf(p, "Target abort");
|
|
|
+ break;
|
|
|
+ case 8:
|
|
|
+ p += sprintf(p, "Parity Error");
|
|
|
+ break;
|
|
|
+ case 9:
|
|
|
+ p += sprintf(p, "Watchdog timeout");
|
|
|
+ break;
|
|
|
+ case 10:
|
|
|
+ p += sprintf(p, "Invalid address");
|
|
|
+ break;
|
|
|
+ case 11:
|
|
|
+ p += sprintf(p, "Mirror Broken");
|
|
|
+ break;
|
|
|
+ case 12:
|
|
|
+ p += sprintf(p, "Memory Sparing");
|
|
|
+ break;
|
|
|
+ case 13:
|
|
|
+ p += sprintf(p, "Scrub corrected error");
|
|
|
+ break;
|
|
|
+ case 14:
|
|
|
+ p += sprintf(p, "Scrub uncorrected error");
|
|
|
+ break;
|
|
|
+ case 15:
|
|
|
+ p += sprintf(p, "Physical Memory Map-out event");
|
|
|
+ break;
|
|
|
+ default:
|
|
|
+ p += sprintf(p, "reserved error (%d)",
|
|
|
+ mem_err->error_type);
|
|
|
+ }
|
|
|
+ } else {
|
|
|
+ strcpy(pvt->msg, "unknown error");
|
|
|
+ }
|
|
|
+
|
|
|
+ /* Error address */
|
|
|
+ if (mem_err->validation_bits & CPER_MEM_VALID_PHYSICAL_ADDRESS) {
|
|
|
+ e->page_frame_number = mem_err->physical_addr >> PAGE_SHIFT;
|
|
|
+ e->offset_in_page = mem_err->physical_addr & ~PAGE_MASK;
|
|
|
+ }
|
|
|
+
|
|
|
+ /* Error grain */
|
|
|
+ if (mem_err->validation_bits & CPER_MEM_VALID_PHYSICAL_ADDRESS_MASK) {
|
|
|
+ e->grain = ~(mem_err->physical_addr_mask & ~PAGE_MASK);
|
|
|
+ }
|
|
|
+
|
|
|
+ /* Memory error location, mapped on e->location */
|
|
|
+ p = e->location;
|
|
|
+ if (mem_err->validation_bits & CPER_MEM_VALID_NODE)
|
|
|
+ p += sprintf(p, "node:%d ", mem_err->node);
|
|
|
+ if (mem_err->validation_bits & CPER_MEM_VALID_CARD)
|
|
|
+ p += sprintf(p, "card:%d ", mem_err->card);
|
|
|
+ if (mem_err->validation_bits & CPER_MEM_VALID_MODULE)
|
|
|
+ p += sprintf(p, "module:%d ", mem_err->module);
|
|
|
+ if (mem_err->validation_bits & CPER_MEM_VALID_BANK)
|
|
|
+ p += sprintf(p, "bank:%d ", mem_err->bank);
|
|
|
+ if (mem_err->validation_bits & CPER_MEM_VALID_ROW)
|
|
|
+ p += sprintf(p, "row:%d ", mem_err->row);
|
|
|
+ if (mem_err->validation_bits & CPER_MEM_VALID_COLUMN)
|
|
|
+ p += sprintf(p, "col:%d ", mem_err->column);
|
|
|
+ if (mem_err->validation_bits & CPER_MEM_VALID_BIT_POSITION)
|
|
|
+ p += sprintf(p, "bit_pos:%d ", mem_err->bit_pos);
|
|
|
+ if (p > e->location)
|
|
|
+ *(p - 1) = '\0';
|
|
|
+
|
|
|
+ /* All other fields are mapped on e->other_detail */
|
|
|
+ p = pvt->other_detail;
|
|
|
+ if (mem_err->validation_bits & CPER_MEM_VALID_ERROR_STATUS) {
|
|
|
+ u64 status = mem_err->error_status;
|
|
|
+
|
|
|
+ p += sprintf(p, "status(0x%016llx): ", (long long)status);
|
|
|
+ switch ((status >> 8) & 0xff) {
|
|
|
+ case 1:
|
|
|
+ p += sprintf(p, "Error detected internal to the component ");
|
|
|
+ break;
|
|
|
+ case 16:
|
|
|
+ p += sprintf(p, "Error detected in the bus ");
|
|
|
+ break;
|
|
|
+ case 4:
|
|
|
+ p += sprintf(p, "Storage error in DRAM memory ");
|
|
|
+ break;
|
|
|
+ case 5:
|
|
|
+ p += sprintf(p, "Storage error in TLB ");
|
|
|
+ break;
|
|
|
+ case 6:
|
|
|
+ p += sprintf(p, "Storage error in cache ");
|
|
|
+ break;
|
|
|
+ case 7:
|
|
|
+ p += sprintf(p, "Error in one or more functional units ");
|
|
|
+ break;
|
|
|
+ case 8:
|
|
|
+ p += sprintf(p, "component failed self test ");
|
|
|
+ break;
|
|
|
+ case 9:
|
|
|
+ p += sprintf(p, "Overflow or undervalue of internal queue ");
|
|
|
+ break;
|
|
|
+ case 17:
|
|
|
+ p += sprintf(p, "Virtual address not found on IO-TLB or IO-PDIR ");
|
|
|
+ break;
|
|
|
+ case 18:
|
|
|
+ p += sprintf(p, "Improper access error ");
|
|
|
+ break;
|
|
|
+ case 19:
|
|
|
+ p += sprintf(p, "Access to a memory address which is not mapped to any component ");
|
|
|
+ break;
|
|
|
+ case 20:
|
|
|
+ p += sprintf(p, "Loss of Lockstep ");
|
|
|
+ break;
|
|
|
+ case 21:
|
|
|
+ p += sprintf(p, "Response not associated with a request ");
|
|
|
+ break;
|
|
|
+ case 22:
|
|
|
+ p += sprintf(p, "Bus parity error - must also set the A, C, or D Bits ");
|
|
|
+ break;
|
|
|
+ case 23:
|
|
|
+ p += sprintf(p, "Detection of a PATH_ERROR ");
|
|
|
+ break;
|
|
|
+ case 25:
|
|
|
+ p += sprintf(p, "Bus operation timeout ");
|
|
|
+ break;
|
|
|
+ case 26:
|
|
|
+ p += sprintf(p, "A read was issued to data that has been poisoned ");
|
|
|
+ break;
|
|
|
+ default:
|
|
|
+ p += sprintf(p, "reserved ");
|
|
|
+ break;
|
|
|
+ }
|
|
|
+ }
|
|
|
+ if (mem_err->validation_bits & CPER_MEM_VALID_REQUESTOR_ID)
|
|
|
+ p += sprintf(p, "requestorID: 0x%016llx ",
|
|
|
+ (long long)mem_err->requestor_id);
|
|
|
+ if (mem_err->validation_bits & CPER_MEM_VALID_RESPONDER_ID)
|
|
|
+ p += sprintf(p, "responderID: 0x%016llx ",
|
|
|
+ (long long)mem_err->responder_id);
|
|
|
+ if (mem_err->validation_bits & CPER_MEM_VALID_TARGET_ID)
|
|
|
+ p += sprintf(p, "targetID: 0x%016llx ",
|
|
|
+ (long long)mem_err->responder_id);
|
|
|
+ if (p > pvt->other_detail)
|
|
|
+ *(p - 1) = '\0';
|
|
|
|
|
|
edac_raw_mc_handle_error(type, mci, e);
|
|
|
}
|