|
@@ -5,10 +5,10 @@
|
|
|
* Author: Huang Ying <ying.huang@intel.com>
|
|
|
*
|
|
|
* CPER is the format used to describe platform hardware error by
|
|
|
- * various APEI tables, such as ERST, BERT and HEST etc.
|
|
|
+ * various tables, such as ERST, BERT and HEST etc.
|
|
|
*
|
|
|
* For more information about CPER, please refer to Appendix N of UEFI
|
|
|
- * Specification version 2.3.
|
|
|
+ * Specification version 2.4.
|
|
|
*
|
|
|
* This program is free software; you can redistribute it and/or
|
|
|
* modify it under the terms of the GNU General Public License version
|
|
@@ -28,10 +28,12 @@
|
|
|
#include <linux/module.h>
|
|
|
#include <linux/time.h>
|
|
|
#include <linux/cper.h>
|
|
|
+#include <linux/dmi.h>
|
|
|
#include <linux/acpi.h>
|
|
|
#include <linux/pci.h>
|
|
|
#include <linux/aer.h>
|
|
|
|
|
|
+#define INDENT_SP " "
|
|
|
/*
|
|
|
* CPER record ID need to be unique even after reboot, because record
|
|
|
* ID is used as index for ERST storage, while CPER records from
|
|
@@ -73,7 +75,7 @@ static const char *cper_severity_str(unsigned int severity)
|
|
|
* printed, with @pfx is printed at the beginning of each line.
|
|
|
*/
|
|
|
void cper_print_bits(const char *pfx, unsigned int bits,
|
|
|
- const char *strs[], unsigned int strs_size)
|
|
|
+ const char * const strs[], unsigned int strs_size)
|
|
|
{
|
|
|
int i, len = 0;
|
|
|
const char *str;
|
|
@@ -98,32 +100,32 @@ void cper_print_bits(const char *pfx, unsigned int bits,
|
|
|
printk("%s\n", buf);
|
|
|
}
|
|
|
|
|
|
-static const char *cper_proc_type_strs[] = {
|
|
|
+static const char * const cper_proc_type_strs[] = {
|
|
|
"IA32/X64",
|
|
|
"IA64",
|
|
|
};
|
|
|
|
|
|
-static const char *cper_proc_isa_strs[] = {
|
|
|
+static const char * const cper_proc_isa_strs[] = {
|
|
|
"IA32",
|
|
|
"IA64",
|
|
|
"X64",
|
|
|
};
|
|
|
|
|
|
-static const char *cper_proc_error_type_strs[] = {
|
|
|
+static const char * const cper_proc_error_type_strs[] = {
|
|
|
"cache error",
|
|
|
"TLB error",
|
|
|
"bus error",
|
|
|
"micro-architectural error",
|
|
|
};
|
|
|
|
|
|
-static const char *cper_proc_op_strs[] = {
|
|
|
+static const char * const cper_proc_op_strs[] = {
|
|
|
"unknown or generic",
|
|
|
"data read",
|
|
|
"data write",
|
|
|
"instruction execution",
|
|
|
};
|
|
|
|
|
|
-static const char *cper_proc_flag_strs[] = {
|
|
|
+static const char * const cper_proc_flag_strs[] = {
|
|
|
"restartable",
|
|
|
"precise IP",
|
|
|
"overflow",
|
|
@@ -191,46 +193,58 @@ static const char *cper_mem_err_type_strs[] = {
|
|
|
"memory sparing",
|
|
|
"scrub corrected error",
|
|
|
"scrub uncorrected error",
|
|
|
+ "physical memory map-out event",
|
|
|
};
|
|
|
|
|
|
static void cper_print_mem(const char *pfx, const struct cper_sec_mem_err *mem)
|
|
|
{
|
|
|
if (mem->validation_bits & CPER_MEM_VALID_ERROR_STATUS)
|
|
|
printk("%s""error_status: 0x%016llx\n", pfx, mem->error_status);
|
|
|
- if (mem->validation_bits & CPER_MEM_VALID_PHYSICAL_ADDRESS)
|
|
|
+ if (mem->validation_bits & CPER_MEM_VALID_PA)
|
|
|
printk("%s""physical_address: 0x%016llx\n",
|
|
|
pfx, mem->physical_addr);
|
|
|
- if (mem->validation_bits & CPER_MEM_VALID_PHYSICAL_ADDRESS_MASK)
|
|
|
+ if (mem->validation_bits & CPER_MEM_VALID_PA_MASK)
|
|
|
printk("%s""physical_address_mask: 0x%016llx\n",
|
|
|
pfx, mem->physical_addr_mask);
|
|
|
if (mem->validation_bits & CPER_MEM_VALID_NODE)
|
|
|
- printk("%s""node: %d\n", pfx, mem->node);
|
|
|
+ pr_debug("node: %d\n", mem->node);
|
|
|
if (mem->validation_bits & CPER_MEM_VALID_CARD)
|
|
|
- printk("%s""card: %d\n", pfx, mem->card);
|
|
|
+ pr_debug("card: %d\n", mem->card);
|
|
|
if (mem->validation_bits & CPER_MEM_VALID_MODULE)
|
|
|
- printk("%s""module: %d\n", pfx, mem->module);
|
|
|
+ pr_debug("module: %d\n", mem->module);
|
|
|
+ if (mem->validation_bits & CPER_MEM_VALID_RANK_NUMBER)
|
|
|
+ pr_debug("rank: %d\n", mem->rank);
|
|
|
if (mem->validation_bits & CPER_MEM_VALID_BANK)
|
|
|
- printk("%s""bank: %d\n", pfx, mem->bank);
|
|
|
+ pr_debug("bank: %d\n", mem->bank);
|
|
|
if (mem->validation_bits & CPER_MEM_VALID_DEVICE)
|
|
|
- printk("%s""device: %d\n", pfx, mem->device);
|
|
|
+ pr_debug("device: %d\n", mem->device);
|
|
|
if (mem->validation_bits & CPER_MEM_VALID_ROW)
|
|
|
- printk("%s""row: %d\n", pfx, mem->row);
|
|
|
+ pr_debug("row: %d\n", mem->row);
|
|
|
if (mem->validation_bits & CPER_MEM_VALID_COLUMN)
|
|
|
- printk("%s""column: %d\n", pfx, mem->column);
|
|
|
+ pr_debug("column: %d\n", mem->column);
|
|
|
if (mem->validation_bits & CPER_MEM_VALID_BIT_POSITION)
|
|
|
- printk("%s""bit_position: %d\n", pfx, mem->bit_pos);
|
|
|
+ pr_debug("bit_position: %d\n", mem->bit_pos);
|
|
|
if (mem->validation_bits & CPER_MEM_VALID_REQUESTOR_ID)
|
|
|
- printk("%s""requestor_id: 0x%016llx\n", pfx, mem->requestor_id);
|
|
|
+ pr_debug("requestor_id: 0x%016llx\n", mem->requestor_id);
|
|
|
if (mem->validation_bits & CPER_MEM_VALID_RESPONDER_ID)
|
|
|
- printk("%s""responder_id: 0x%016llx\n", pfx, mem->responder_id);
|
|
|
+ pr_debug("responder_id: 0x%016llx\n", mem->responder_id);
|
|
|
if (mem->validation_bits & CPER_MEM_VALID_TARGET_ID)
|
|
|
- printk("%s""target_id: 0x%016llx\n", pfx, mem->target_id);
|
|
|
+ pr_debug("target_id: 0x%016llx\n", mem->target_id);
|
|
|
if (mem->validation_bits & CPER_MEM_VALID_ERROR_TYPE) {
|
|
|
u8 etype = mem->error_type;
|
|
|
printk("%s""error_type: %d, %s\n", pfx, etype,
|
|
|
etype < ARRAY_SIZE(cper_mem_err_type_strs) ?
|
|
|
cper_mem_err_type_strs[etype] : "unknown");
|
|
|
}
|
|
|
+ if (mem->validation_bits & CPER_MEM_VALID_MODULE_HANDLE) {
|
|
|
+ const char *bank = NULL, *device = NULL;
|
|
|
+ dmi_memdev_name(mem->mem_dev_handle, &bank, &device);
|
|
|
+ if (bank != NULL && device != NULL)
|
|
|
+ printk("%s""DIMM location: %s %s", pfx, bank, device);
|
|
|
+ else
|
|
|
+ printk("%s""DIMM DMI handle: 0x%.4x",
|
|
|
+ pfx, mem->mem_dev_handle);
|
|
|
+ }
|
|
|
}
|
|
|
|
|
|
static const char *cper_pcie_port_type_strs[] = {
|
|
@@ -248,7 +262,7 @@ static const char *cper_pcie_port_type_strs[] = {
|
|
|
};
|
|
|
|
|
|
static void cper_print_pcie(const char *pfx, const struct cper_sec_pcie *pcie,
|
|
|
- const struct acpi_hest_generic_data *gdata)
|
|
|
+ const struct acpi_generic_data *gdata)
|
|
|
{
|
|
|
if (pcie->validation_bits & CPER_PCIE_VALID_PORT_TYPE)
|
|
|
printk("%s""port_type: %d, %s\n", pfx, pcie->port_type,
|
|
@@ -283,55 +297,45 @@ static void cper_print_pcie(const char *pfx, const struct cper_sec_pcie *pcie,
|
|
|
pfx, pcie->bridge.secondary_status, pcie->bridge.control);
|
|
|
}
|
|
|
|
|
|
-static const char *apei_estatus_section_flag_strs[] = {
|
|
|
- "primary",
|
|
|
- "containment warning",
|
|
|
- "reset",
|
|
|
- "threshold exceeded",
|
|
|
- "resource not accessible",
|
|
|
- "latent error",
|
|
|
-};
|
|
|
-
|
|
|
-static void apei_estatus_print_section(
|
|
|
- const char *pfx, const struct acpi_hest_generic_data *gdata, int sec_no)
|
|
|
+static void cper_estatus_print_section(
|
|
|
+ const char *pfx, const struct acpi_generic_data *gdata, int sec_no)
|
|
|
{
|
|
|
uuid_le *sec_type = (uuid_le *)gdata->section_type;
|
|
|
__u16 severity;
|
|
|
+ char newpfx[64];
|
|
|
|
|
|
severity = gdata->error_severity;
|
|
|
- printk("%s""section: %d, severity: %d, %s\n", pfx, sec_no, severity,
|
|
|
+ printk("%s""Error %d, type: %s\n", pfx, sec_no,
|
|
|
cper_severity_str(severity));
|
|
|
- printk("%s""flags: 0x%02x\n", pfx, gdata->flags);
|
|
|
- cper_print_bits(pfx, gdata->flags, apei_estatus_section_flag_strs,
|
|
|
- ARRAY_SIZE(apei_estatus_section_flag_strs));
|
|
|
if (gdata->validation_bits & CPER_SEC_VALID_FRU_ID)
|
|
|
printk("%s""fru_id: %pUl\n", pfx, (uuid_le *)gdata->fru_id);
|
|
|
if (gdata->validation_bits & CPER_SEC_VALID_FRU_TEXT)
|
|
|
printk("%s""fru_text: %.20s\n", pfx, gdata->fru_text);
|
|
|
|
|
|
+ snprintf(newpfx, sizeof(newpfx), "%s%s", pfx, INDENT_SP);
|
|
|
if (!uuid_le_cmp(*sec_type, CPER_SEC_PROC_GENERIC)) {
|
|
|
struct cper_sec_proc_generic *proc_err = (void *)(gdata + 1);
|
|
|
- printk("%s""section_type: general processor error\n", pfx);
|
|
|
+ printk("%s""section_type: general processor error\n", newpfx);
|
|
|
if (gdata->error_data_length >= sizeof(*proc_err))
|
|
|
- cper_print_proc_generic(pfx, proc_err);
|
|
|
+ cper_print_proc_generic(newpfx, proc_err);
|
|
|
else
|
|
|
goto err_section_too_small;
|
|
|
} else if (!uuid_le_cmp(*sec_type, CPER_SEC_PLATFORM_MEM)) {
|
|
|
struct cper_sec_mem_err *mem_err = (void *)(gdata + 1);
|
|
|
- printk("%s""section_type: memory error\n", pfx);
|
|
|
+ printk("%s""section_type: memory error\n", newpfx);
|
|
|
if (gdata->error_data_length >= sizeof(*mem_err))
|
|
|
- cper_print_mem(pfx, mem_err);
|
|
|
+ cper_print_mem(newpfx, mem_err);
|
|
|
else
|
|
|
goto err_section_too_small;
|
|
|
} else if (!uuid_le_cmp(*sec_type, CPER_SEC_PCIE)) {
|
|
|
struct cper_sec_pcie *pcie = (void *)(gdata + 1);
|
|
|
- printk("%s""section_type: PCIe error\n", pfx);
|
|
|
+ printk("%s""section_type: PCIe error\n", newpfx);
|
|
|
if (gdata->error_data_length >= sizeof(*pcie))
|
|
|
- cper_print_pcie(pfx, pcie, gdata);
|
|
|
+ cper_print_pcie(newpfx, pcie, gdata);
|
|
|
else
|
|
|
goto err_section_too_small;
|
|
|
} else
|
|
|
- printk("%s""section type: unknown, %pUl\n", pfx, sec_type);
|
|
|
+ printk("%s""section type: unknown, %pUl\n", newpfx, sec_type);
|
|
|
|
|
|
return;
|
|
|
|
|
@@ -339,34 +343,38 @@ err_section_too_small:
|
|
|
pr_err(FW_WARN "error section length is too small\n");
|
|
|
}
|
|
|
|
|
|
-void apei_estatus_print(const char *pfx,
|
|
|
- const struct acpi_hest_generic_status *estatus)
|
|
|
+void cper_estatus_print(const char *pfx,
|
|
|
+ const struct acpi_generic_status *estatus)
|
|
|
{
|
|
|
- struct acpi_hest_generic_data *gdata;
|
|
|
+ struct acpi_generic_data *gdata;
|
|
|
unsigned int data_len, gedata_len;
|
|
|
int sec_no = 0;
|
|
|
+ char newpfx[64];
|
|
|
__u16 severity;
|
|
|
|
|
|
- printk("%s""APEI generic hardware error status\n", pfx);
|
|
|
severity = estatus->error_severity;
|
|
|
- printk("%s""severity: %d, %s\n", pfx, severity,
|
|
|
- cper_severity_str(severity));
|
|
|
+ if (severity == CPER_SEV_CORRECTED)
|
|
|
+ printk("%s%s\n", pfx,
|
|
|
+ "It has been corrected by h/w "
|
|
|
+ "and requires no further action");
|
|
|
+ printk("%s""event severity: %s\n", pfx, cper_severity_str(severity));
|
|
|
data_len = estatus->data_length;
|
|
|
- gdata = (struct acpi_hest_generic_data *)(estatus + 1);
|
|
|
- while (data_len > sizeof(*gdata)) {
|
|
|
+ gdata = (struct acpi_generic_data *)(estatus + 1);
|
|
|
+ snprintf(newpfx, sizeof(newpfx), "%s%s", pfx, INDENT_SP);
|
|
|
+ while (data_len >= sizeof(*gdata)) {
|
|
|
gedata_len = gdata->error_data_length;
|
|
|
- apei_estatus_print_section(pfx, gdata, sec_no);
|
|
|
+ cper_estatus_print_section(newpfx, gdata, sec_no);
|
|
|
data_len -= gedata_len + sizeof(*gdata);
|
|
|
gdata = (void *)(gdata + 1) + gedata_len;
|
|
|
sec_no++;
|
|
|
}
|
|
|
}
|
|
|
-EXPORT_SYMBOL_GPL(apei_estatus_print);
|
|
|
+EXPORT_SYMBOL_GPL(cper_estatus_print);
|
|
|
|
|
|
-int apei_estatus_check_header(const struct acpi_hest_generic_status *estatus)
|
|
|
+int cper_estatus_check_header(const struct acpi_generic_status *estatus)
|
|
|
{
|
|
|
if (estatus->data_length &&
|
|
|
- estatus->data_length < sizeof(struct acpi_hest_generic_data))
|
|
|
+ estatus->data_length < sizeof(struct acpi_generic_data))
|
|
|
return -EINVAL;
|
|
|
if (estatus->raw_data_length &&
|
|
|
estatus->raw_data_offset < sizeof(*estatus) + estatus->data_length)
|
|
@@ -374,19 +382,19 @@ int apei_estatus_check_header(const struct acpi_hest_generic_status *estatus)
|
|
|
|
|
|
return 0;
|
|
|
}
|
|
|
-EXPORT_SYMBOL_GPL(apei_estatus_check_header);
|
|
|
+EXPORT_SYMBOL_GPL(cper_estatus_check_header);
|
|
|
|
|
|
-int apei_estatus_check(const struct acpi_hest_generic_status *estatus)
|
|
|
+int cper_estatus_check(const struct acpi_generic_status *estatus)
|
|
|
{
|
|
|
- struct acpi_hest_generic_data *gdata;
|
|
|
+ struct acpi_generic_data *gdata;
|
|
|
unsigned int data_len, gedata_len;
|
|
|
int rc;
|
|
|
|
|
|
- rc = apei_estatus_check_header(estatus);
|
|
|
+ rc = cper_estatus_check_header(estatus);
|
|
|
if (rc)
|
|
|
return rc;
|
|
|
data_len = estatus->data_length;
|
|
|
- gdata = (struct acpi_hest_generic_data *)(estatus + 1);
|
|
|
+ gdata = (struct acpi_generic_data *)(estatus + 1);
|
|
|
while (data_len >= sizeof(*gdata)) {
|
|
|
gedata_len = gdata->error_data_length;
|
|
|
if (gedata_len > data_len - sizeof(*gdata))
|
|
@@ -399,4 +407,4 @@ int apei_estatus_check(const struct acpi_hest_generic_status *estatus)
|
|
|
|
|
|
return 0;
|
|
|
}
|
|
|
-EXPORT_SYMBOL_GPL(apei_estatus_check);
|
|
|
+EXPORT_SYMBOL_GPL(cper_estatus_check);
|