Browse Source

Merge branch 'apei-release' into release

Len Brown 14 years ago
parent
commit
25076246e8

+ 25 - 0
Documentation/acpi/apei/output_format.txt

@@ -92,6 +92,11 @@ vendor_id: <integer>, device_id: <integer>
 class_code: <integer>]
 [serial number: <integer>, <integer>]
 [bridge: secondary_status: <integer>, control: <integer>]
+[aer_status: <integer>, aer_mask: <integer>
+<aer status string>
+[aer_uncor_severity: <integer>]
+aer_layer=<aer layer string>, aer_agent=<aer agent string>
+aer_tlp_header: <integer> <integer> <integer> <integer>]
 
 <pcie port type string>* := PCIe end point | legacy PCI end point | \
 unknown | unknown | root port | upstream switch port | \
@@ -99,6 +104,26 @@ downstream switch port | PCIe to PCI/PCI-X bridge | \
 PCI/PCI-X to PCIe bridge | root complex integrated endpoint device | \
 root complex event collector
 
+if section severity is fatal or recoverable
+<aer status string># :=
+unknown | unknown | unknown | unknown | Data Link Protocol | \
+unknown | unknown | unknown | unknown | unknown | unknown | unknown | \
+Poisoned TLP | Flow Control Protocol | Completion Timeout | \
+Completer Abort | Unexpected Completion | Receiver Overflow | \
+Malformed TLP | ECRC | Unsupported Request
+else
+<aer status string># :=
+Receiver Error | unknown | unknown | unknown | unknown | unknown | \
+Bad TLP | Bad DLLP | RELAY_NUM Rollover | unknown | unknown | unknown | \
+Replay Timer Timeout | Advisory Non-Fatal
+fi
+
+<aer layer string> :=
+Physical Layer | Data Link Layer | Transaction Layer
+
+<aer agent string> :=
+Receiver ID | Requester ID | Completer ID | Transmitter ID
+
 Where, [] designate corresponding content is optional
 
 All <field string> description with * has the following format:

+ 26 - 16
arch/x86/kernel/cpu/mcheck/mce-apei.c

@@ -106,24 +106,34 @@ int apei_write_mce(struct mce *m)
 ssize_t apei_read_mce(struct mce *m, u64 *record_id)
 {
 	struct cper_mce_record rcd;
-	ssize_t len;
-
-	len = erst_read_next(&rcd.hdr, sizeof(rcd));
-	if (len <= 0)
-		return len;
-	/* Can not skip other records in storage via ERST unless clear them */
-	else if (len != sizeof(rcd) ||
-		 uuid_le_cmp(rcd.hdr.creator_id, CPER_CREATOR_MCE)) {
-		if (printk_ratelimit())
-			pr_warning(
-			"MCE-APEI: Can not skip the unknown record in ERST");
-		return -EIO;
-	}
-
+	int rc, pos;
+
+	rc = erst_get_record_id_begin(&pos);
+	if (rc)
+		return rc;
+retry:
+	rc = erst_get_record_id_next(&pos, record_id);
+	if (rc)
+		goto out;
+	/* no more record */
+	if (*record_id == APEI_ERST_INVALID_RECORD_ID)
+		goto out;
+	rc = erst_read(*record_id, &rcd.hdr, sizeof(rcd));
+	/* someone else has cleared the record, try next one */
+	if (rc == -ENOENT)
+		goto retry;
+	else if (rc < 0)
+		goto out;
+	/* try to skip other type records in storage */
+	else if (rc != sizeof(rcd) ||
+		 uuid_le_cmp(rcd.hdr.creator_id, CPER_CREATOR_MCE))
+		goto retry;
 	memcpy(m, &rcd.mce, sizeof(*m));
-	*record_id = rcd.hdr.record_id;
+	rc = sizeof(*m);
+out:
+	erst_get_record_id_end();
 
-	return sizeof(*m);
+	return rc;
 }
 
 /* Check whether there is record in ERST */

+ 7 - 0
drivers/acpi/apei/Kconfig

@@ -21,6 +21,13 @@ config ACPI_APEI_GHES
 	  by firmware to produce more valuable hardware error
 	  information for Linux.
 
+config ACPI_APEI_PCIEAER
+	bool "APEI PCIe AER logging/recovering support"
+	depends on ACPI_APEI && PCIEAER
+	help
+	  PCIe AER errors may be reported via APEI firmware first mode.
+	  Turn on this option to enable the corresponding support.
+
 config ACPI_APEI_EINJ
 	tristate "APEI Error INJection (EINJ)"
 	depends on ACPI_APEI && DEBUG_FS

+ 14 - 4
drivers/acpi/apei/cper.c

@@ -29,6 +29,7 @@
 #include <linux/time.h>
 #include <linux/cper.h>
 #include <linux/acpi.h>
+#include <linux/aer.h>
 
 /*
  * CPER record ID need to be unique even after reboot, because record
@@ -70,8 +71,8 @@ static const char *cper_severity_str(unsigned int severity)
  * If the output length is longer than 80, multiple line will be
  * printed, with @pfx is printed at the beginning of each line.
  */
-static void cper_print_bits(const char *pfx, unsigned int bits,
-			    const char *strs[], unsigned int strs_size)
+void cper_print_bits(const char *pfx, unsigned int bits,
+		     const char *strs[], unsigned int strs_size)
 {
 	int i, len = 0;
 	const char *str;
@@ -81,6 +82,8 @@ static void cper_print_bits(const char *pfx, unsigned int bits,
 		if (!(bits & (1U << i)))
 			continue;
 		str = strs[i];
+		if (!str)
+			continue;
 		if (len && len + strlen(str) + 2 > 80) {
 			printk("%s\n", buf);
 			len = 0;
@@ -243,7 +246,8 @@ static const char *cper_pcie_port_type_strs[] = {
 	"root complex event collector",
 };
 
-static void cper_print_pcie(const char *pfx, const struct cper_sec_pcie *pcie)
+static void cper_print_pcie(const char *pfx, const struct cper_sec_pcie *pcie,
+			    const struct acpi_hest_generic_data *gdata)
 {
 	if (pcie->validation_bits & CPER_PCIE_VALID_PORT_TYPE)
 		printk("%s""port_type: %d, %s\n", pfx, pcie->port_type,
@@ -276,6 +280,12 @@ static void cper_print_pcie(const char *pfx, const struct cper_sec_pcie *pcie)
 		printk(
 	"%s""bridge: secondary_status: 0x%04x, control: 0x%04x\n",
 	pfx, pcie->bridge.secondary_status, pcie->bridge.control);
+#ifdef CONFIG_ACPI_APEI_PCIEAER
+	if (pcie->validation_bits & CPER_PCIE_VALID_AER_INFO) {
+		struct aer_capability_regs *aer_regs = (void *)pcie->aer_info;
+		cper_print_aer(pfx, gdata->error_severity, aer_regs);
+	}
+#endif
 }
 
 static const char *apei_estatus_section_flag_strs[] = {
@@ -322,7 +332,7 @@ static void apei_estatus_print_section(
 		struct cper_sec_pcie *pcie = (void *)(gdata + 1);
 		printk("%s""section_type: PCIe error\n", pfx);
 		if (gdata->error_data_length >= sizeof(*pcie))
-			cper_print_pcie(pfx, pcie);
+			cper_print_pcie(pfx, pcie, gdata);
 		else
 			goto err_section_too_small;
 	} else

+ 21 - 3
drivers/acpi/apei/erst-dbg.c

@@ -43,12 +43,27 @@ static DEFINE_MUTEX(erst_dbg_mutex);
 
 static int erst_dbg_open(struct inode *inode, struct file *file)
 {
+	int rc, *pos;
+
 	if (erst_disable)
 		return -ENODEV;
 
+	pos = (int *)&file->private_data;
+
+	rc = erst_get_record_id_begin(pos);
+	if (rc)
+		return rc;
+
 	return nonseekable_open(inode, file);
 }
 
+static int erst_dbg_release(struct inode *inode, struct file *file)
+{
+	erst_get_record_id_end();
+
+	return 0;
+}
+
 static long erst_dbg_ioctl(struct file *f, unsigned int cmd, unsigned long arg)
 {
 	int rc;
@@ -79,18 +94,20 @@ static long erst_dbg_ioctl(struct file *f, unsigned int cmd, unsigned long arg)
 static ssize_t erst_dbg_read(struct file *filp, char __user *ubuf,
 			     size_t usize, loff_t *off)
 {
-	int rc;
+	int rc, *pos;
 	ssize_t len = 0;
 	u64 id;
 
-	if (*off != 0)
+	if (*off)
 		return -EINVAL;
 
 	if (mutex_lock_interruptible(&erst_dbg_mutex) != 0)
 		return -EINTR;
 
+	pos = (int *)&filp->private_data;
+
 retry_next:
-	rc = erst_get_next_record_id(&id);
+	rc = erst_get_record_id_next(pos, &id);
 	if (rc)
 		goto out;
 	/* no more record */
@@ -181,6 +198,7 @@ out:
 static const struct file_operations erst_dbg_ops = {
 	.owner		= THIS_MODULE,
 	.open		= erst_dbg_open,
+	.release	= erst_dbg_release,
 	.read		= erst_dbg_read,
 	.write		= erst_dbg_write,
 	.unlocked_ioctl	= erst_dbg_ioctl,

+ 191 - 44
drivers/acpi/apei/erst.c

@@ -429,6 +429,22 @@ ssize_t erst_get_record_count(void)
 }
 EXPORT_SYMBOL_GPL(erst_get_record_count);
 
+#define ERST_RECORD_ID_CACHE_SIZE_MIN	16
+#define ERST_RECORD_ID_CACHE_SIZE_MAX	1024
+
+struct erst_record_id_cache {
+	struct mutex lock;
+	u64 *entries;
+	int len;
+	int size;
+	int refcount;
+};
+
+static struct erst_record_id_cache erst_record_id_cache = {
+	.lock = __MUTEX_INITIALIZER(erst_record_id_cache.lock),
+	.refcount = 0,
+};
+
 static int __erst_get_next_record_id(u64 *record_id)
 {
 	struct apei_exec_context ctx;
@@ -443,26 +459,179 @@ static int __erst_get_next_record_id(u64 *record_id)
 	return 0;
 }
 
+int erst_get_record_id_begin(int *pos)
+{
+	int rc;
+
+	if (erst_disable)
+		return -ENODEV;
+
+	rc = mutex_lock_interruptible(&erst_record_id_cache.lock);
+	if (rc)
+		return rc;
+	erst_record_id_cache.refcount++;
+	mutex_unlock(&erst_record_id_cache.lock);
+
+	*pos = 0;
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(erst_get_record_id_begin);
+
+/* erst_record_id_cache.lock must be held by caller */
+static int __erst_record_id_cache_add_one(void)
+{
+	u64 id, prev_id, first_id;
+	int i, rc;
+	u64 *entries;
+	unsigned long flags;
+
+	id = prev_id = first_id = APEI_ERST_INVALID_RECORD_ID;
+retry:
+	raw_spin_lock_irqsave(&erst_lock, flags);
+	rc = __erst_get_next_record_id(&id);
+	raw_spin_unlock_irqrestore(&erst_lock, flags);
+	if (rc == -ENOENT)
+		return 0;
+	if (rc)
+		return rc;
+	if (id == APEI_ERST_INVALID_RECORD_ID)
+		return 0;
+	/* can not skip current ID, or loop back to first ID */
+	if (id == prev_id || id == first_id)
+		return 0;
+	if (first_id == APEI_ERST_INVALID_RECORD_ID)
+		first_id = id;
+	prev_id = id;
+
+	entries = erst_record_id_cache.entries;
+	for (i = 0; i < erst_record_id_cache.len; i++) {
+		if (entries[i] == id)
+			break;
+	}
+	/* record id already in cache, try next */
+	if (i < erst_record_id_cache.len)
+		goto retry;
+	if (erst_record_id_cache.len >= erst_record_id_cache.size) {
+		int new_size, alloc_size;
+		u64 *new_entries;
+
+		new_size = erst_record_id_cache.size * 2;
+		new_size = clamp_val(new_size, ERST_RECORD_ID_CACHE_SIZE_MIN,
+				     ERST_RECORD_ID_CACHE_SIZE_MAX);
+		if (new_size <= erst_record_id_cache.size) {
+			if (printk_ratelimit())
+				pr_warning(FW_WARN ERST_PFX
+					   "too many record ID!\n");
+			return 0;
+		}
+		alloc_size = new_size * sizeof(entries[0]);
+		if (alloc_size < PAGE_SIZE)
+			new_entries = kmalloc(alloc_size, GFP_KERNEL);
+		else
+			new_entries = vmalloc(alloc_size);
+		if (!new_entries)
+			return -ENOMEM;
+		memcpy(new_entries, entries,
+		       erst_record_id_cache.len * sizeof(entries[0]));
+		if (erst_record_id_cache.size < PAGE_SIZE)
+			kfree(entries);
+		else
+			vfree(entries);
+		erst_record_id_cache.entries = entries = new_entries;
+		erst_record_id_cache.size = new_size;
+	}
+	entries[i] = id;
+	erst_record_id_cache.len++;
+
+	return 1;
+}
+
 /*
  * Get the record ID of an existing error record on the persistent
  * storage. If there is no error record on the persistent storage, the
  * returned record_id is APEI_ERST_INVALID_RECORD_ID.
  */
-int erst_get_next_record_id(u64 *record_id)
+int erst_get_record_id_next(int *pos, u64 *record_id)
 {
-	int rc;
-	unsigned long flags;
+	int rc = 0;
+	u64 *entries;
 
 	if (erst_disable)
 		return -ENODEV;
 
-	raw_spin_lock_irqsave(&erst_lock, flags);
-	rc = __erst_get_next_record_id(record_id);
-	raw_spin_unlock_irqrestore(&erst_lock, flags);
+	/* must be enclosed by erst_get_record_id_begin/end */
+	BUG_ON(!erst_record_id_cache.refcount);
+	BUG_ON(*pos < 0 || *pos > erst_record_id_cache.len);
+
+	mutex_lock(&erst_record_id_cache.lock);
+	entries = erst_record_id_cache.entries;
+	for (; *pos < erst_record_id_cache.len; (*pos)++)
+		if (entries[*pos] != APEI_ERST_INVALID_RECORD_ID)
+			break;
+	/* found next record id in cache */
+	if (*pos < erst_record_id_cache.len) {
+		*record_id = entries[*pos];
+		(*pos)++;
+		goto out_unlock;
+	}
+
+	/* Try to add one more record ID to cache */
+	rc = __erst_record_id_cache_add_one();
+	if (rc < 0)
+		goto out_unlock;
+	/* successfully add one new ID */
+	if (rc == 1) {
+		*record_id = erst_record_id_cache.entries[*pos];
+		(*pos)++;
+		rc = 0;
+	} else {
+		*pos = -1;
+		*record_id = APEI_ERST_INVALID_RECORD_ID;
+	}
+out_unlock:
+	mutex_unlock(&erst_record_id_cache.lock);
 
 	return rc;
 }
-EXPORT_SYMBOL_GPL(erst_get_next_record_id);
+EXPORT_SYMBOL_GPL(erst_get_record_id_next);
+
+/* erst_record_id_cache.lock must be held by caller */
+static void __erst_record_id_cache_compact(void)
+{
+	int i, wpos = 0;
+	u64 *entries;
+
+	if (erst_record_id_cache.refcount)
+		return;
+
+	entries = erst_record_id_cache.entries;
+	for (i = 0; i < erst_record_id_cache.len; i++) {
+		if (entries[i] == APEI_ERST_INVALID_RECORD_ID)
+			continue;
+		if (wpos != i)
+			memcpy(&entries[wpos], &entries[i], sizeof(entries[i]));
+		wpos++;
+	}
+	erst_record_id_cache.len = wpos;
+}
+
+void erst_get_record_id_end(void)
+{
+	/*
+	 * erst_disable != 0 should be detected by invoker via the
+	 * return value of erst_get_record_id_begin/next, so this
+	 * function should not be called for erst_disable != 0.
+	 */
+	BUG_ON(erst_disable);
+
+	mutex_lock(&erst_record_id_cache.lock);
+	erst_record_id_cache.refcount--;
+	BUG_ON(erst_record_id_cache.refcount < 0);
+	__erst_record_id_cache_compact();
+	mutex_unlock(&erst_record_id_cache.lock);
+}
+EXPORT_SYMBOL_GPL(erst_get_record_id_end);
 
 static int __erst_write_to_storage(u64 offset)
 {
@@ -703,56 +872,34 @@ ssize_t erst_read(u64 record_id, struct cper_record_header *record,
 }
 EXPORT_SYMBOL_GPL(erst_read);
 
-/*
- * If return value > buflen, the buffer size is not big enough,
- * else if return value = 0, there is no more record to read,
- * else if return value < 0, something goes wrong,
- * else everything is OK, and return value is record length
- */
-ssize_t erst_read_next(struct cper_record_header *record, size_t buflen)
-{
-	int rc;
-	ssize_t len;
-	unsigned long flags;
-	u64 record_id;
-
-	if (erst_disable)
-		return -ENODEV;
-
-	raw_spin_lock_irqsave(&erst_lock, flags);
-	rc = __erst_get_next_record_id(&record_id);
-	if (rc) {
-		raw_spin_unlock_irqrestore(&erst_lock, flags);
-		return rc;
-	}
-	/* no more record */
-	if (record_id == APEI_ERST_INVALID_RECORD_ID) {
-		raw_spin_unlock_irqrestore(&erst_lock, flags);
-		return 0;
-	}
-
-	len = __erst_read(record_id, record, buflen);
-	raw_spin_unlock_irqrestore(&erst_lock, flags);
-
-	return len;
-}
-EXPORT_SYMBOL_GPL(erst_read_next);
-
 int erst_clear(u64 record_id)
 {
-	int rc;
+	int rc, i;
 	unsigned long flags;
+	u64 *entries;
 
 	if (erst_disable)
 		return -ENODEV;
 
+	rc = mutex_lock_interruptible(&erst_record_id_cache.lock);
+	if (rc)
+		return rc;
 	raw_spin_lock_irqsave(&erst_lock, flags);
 	if (erst_erange.attr & ERST_RANGE_NVRAM)
 		rc = __erst_clear_from_nvram(record_id);
 	else
 		rc = __erst_clear_from_storage(record_id);
 	raw_spin_unlock_irqrestore(&erst_lock, flags);
-
+	if (rc)
+		goto out;
+	entries = erst_record_id_cache.entries;
+	for (i = 0; i < erst_record_id_cache.len; i++) {
+		if (entries[i] == record_id)
+			entries[i] = APEI_ERST_INVALID_RECORD_ID;
+	}
+	__erst_record_id_cache_compact();
+out:
+	mutex_unlock(&erst_record_id_cache.lock);
 	return rc;
 }
 EXPORT_SYMBOL_GPL(erst_clear);

+ 1 - 8
drivers/pci/pcie/aer/aerdrv.h

@@ -35,13 +35,6 @@
 					PCI_ERR_UNC_UNX_COMP|		\
 					PCI_ERR_UNC_MALF_TLP)
 
-struct header_log_regs {
-	unsigned int dw0;
-	unsigned int dw1;
-	unsigned int dw2;
-	unsigned int dw3;
-};
-
 #define AER_MAX_MULTI_ERR_DEVICES	5	/* Not likely to have more */
 struct aer_err_info {
 	struct pci_dev *dev[AER_MAX_MULTI_ERR_DEVICES];
@@ -59,7 +52,7 @@ struct aer_err_info {
 
 	unsigned int status;		/* COR/UNCOR Error Status */
 	unsigned int mask;		/* COR/UNCOR Error Mask */
-	struct header_log_regs tlp;	/* TLP Header */
+	struct aer_header_log_regs tlp;	/* TLP Header */
 };
 
 struct aer_err_source {

+ 107 - 75
drivers/pci/pcie/aer/aerdrv_errprint.c

@@ -19,6 +19,7 @@
 #include <linux/errno.h>
 #include <linux/pm.h>
 #include <linux/suspend.h>
+#include <linux/cper.h>
 
 #include "aerdrv.h"
 
@@ -57,86 +58,44 @@
 	(e & AER_DATA_LINK_LAYER_ERROR_MASK(t)) ? AER_DATA_LINK_LAYER_ERROR : \
 	AER_TRANSACTION_LAYER_ERROR)
 
-#define AER_PR(info, pdev, fmt, args...)				\
-	printk("%s%s %s: " fmt, (info->severity == AER_CORRECTABLE) ?	\
-		KERN_WARNING : KERN_ERR, dev_driver_string(&pdev->dev),	\
-		dev_name(&pdev->dev), ## args)
-
 /*
  * AER error strings
  */
-static char *aer_error_severity_string[] = {
+static const char *aer_error_severity_string[] = {
 	"Uncorrected (Non-Fatal)",
 	"Uncorrected (Fatal)",
 	"Corrected"
 };
 
-static char *aer_error_layer[] = {
+static const char *aer_error_layer[] = {
 	"Physical Layer",
 	"Data Link Layer",
 	"Transaction Layer"
 };
-static char *aer_correctable_error_string[] = {
-	"Receiver Error        ",	/* Bit Position 0	*/
-	NULL,
-	NULL,
-	NULL,
-	NULL,
-	NULL,
-	"Bad TLP               ",	/* Bit Position 6	*/
-	"Bad DLLP              ",	/* Bit Position 7	*/
-	"RELAY_NUM Rollover    ",	/* Bit Position 8	*/
-	NULL,
-	NULL,
-	NULL,
-	"Replay Timer Timeout  ",	/* Bit Position 12	*/
-	"Advisory Non-Fatal    ",	/* Bit Position 13	*/
-	NULL,
-	NULL,
-	NULL,
-	NULL,
-	NULL,
-	NULL,
-	NULL,
-	NULL,
-	NULL,
-	NULL,
+
+static const char *aer_correctable_error_string[] = {
+	"Receiver Error",		/* Bit Position 0	*/
 	NULL,
 	NULL,
 	NULL,
 	NULL,
 	NULL,
+	"Bad TLP",			/* Bit Position 6	*/
+	"Bad DLLP",			/* Bit Position 7	*/
+	"RELAY_NUM Rollover",		/* Bit Position 8	*/
 	NULL,
 	NULL,
 	NULL,
+	"Replay Timer Timeout",		/* Bit Position 12	*/
+	"Advisory Non-Fatal",		/* Bit Position 13	*/
 };
 
-static char *aer_uncorrectable_error_string[] = {
-	NULL,
-	NULL,
-	NULL,
-	NULL,
-	"Data Link Protocol    ",	/* Bit Position 4	*/
-	NULL,
-	NULL,
-	NULL,
-	NULL,
-	NULL,
-	NULL,
-	NULL,
-	"Poisoned TLP          ",	/* Bit Position 12	*/
-	"Flow Control Protocol ",	/* Bit Position 13	*/
-	"Completion Timeout    ",	/* Bit Position 14	*/
-	"Completer Abort       ",	/* Bit Position 15	*/
-	"Unexpected Completion ",	/* Bit Position 16	*/
-	"Receiver Overflow     ",	/* Bit Position 17	*/
-	"Malformed TLP         ",	/* Bit Position 18	*/
-	"ECRC                  ",	/* Bit Position 19	*/
-	"Unsupported Request   ",	/* Bit Position 20	*/
+static const char *aer_uncorrectable_error_string[] = {
 	NULL,
 	NULL,
 	NULL,
 	NULL,
+	"Data Link Protocol",		/* Bit Position 4	*/
 	NULL,
 	NULL,
 	NULL,
@@ -144,19 +103,29 @@ static char *aer_uncorrectable_error_string[] = {
 	NULL,
 	NULL,
 	NULL,
+	"Poisoned TLP",			/* Bit Position 12	*/
+	"Flow Control Protocol",	/* Bit Position 13	*/
+	"Completion Timeout",		/* Bit Position 14	*/
+	"Completer Abort",		/* Bit Position 15	*/
+	"Unexpected Completion",	/* Bit Position 16	*/
+	"Receiver Overflow",		/* Bit Position 17	*/
+	"Malformed TLP",		/* Bit Position 18	*/
+	"ECRC",				/* Bit Position 19	*/
+	"Unsupported Request",		/* Bit Position 20	*/
 };
 
-static char *aer_agent_string[] = {
+static const char *aer_agent_string[] = {
 	"Receiver ID",
 	"Requester ID",
 	"Completer ID",
 	"Transmitter ID"
 };
 
-static void __aer_print_error(struct aer_err_info *info, struct pci_dev *dev)
+static void __aer_print_error(const char *prefix,
+			      struct aer_err_info *info)
 {
 	int i, status;
-	char *errmsg = NULL;
+	const char *errmsg = NULL;
 
 	status = (info->status & ~info->mask);
 
@@ -165,15 +134,17 @@ static void __aer_print_error(struct aer_err_info *info, struct pci_dev *dev)
 			continue;
 
 		if (info->severity == AER_CORRECTABLE)
-			errmsg = aer_correctable_error_string[i];
+			errmsg = i < ARRAY_SIZE(aer_correctable_error_string) ?
+				aer_correctable_error_string[i] : NULL;
 		else
-			errmsg = aer_uncorrectable_error_string[i];
+			errmsg = i < ARRAY_SIZE(aer_uncorrectable_error_string) ?
+				aer_uncorrectable_error_string[i] : NULL;
 
 		if (errmsg)
-			AER_PR(info, dev, "   [%2d] %s%s\n", i, errmsg,
+			printk("%s""   [%2d] %-22s%s\n", prefix, i, errmsg,
 				info->first_error == i ? " (First)" : "");
 		else
-			AER_PR(info, dev, "   [%2d] Unknown Error Bit%s\n", i,
+			printk("%s""   [%2d] Unknown Error Bit%s\n", prefix, i,
 				info->first_error == i ? " (First)" : "");
 	}
 }
@@ -181,11 +152,15 @@ static void __aer_print_error(struct aer_err_info *info, struct pci_dev *dev)
 void aer_print_error(struct pci_dev *dev, struct aer_err_info *info)
 {
 	int id = ((dev->bus->number << 8) | dev->devfn);
+	char prefix[44];
+
+	snprintf(prefix, sizeof(prefix), "%s%s %s: ",
+		 (info->severity == AER_CORRECTABLE) ? KERN_WARNING : KERN_ERR,
+		 dev_driver_string(&dev->dev), dev_name(&dev->dev));
 
 	if (info->status == 0) {
-		AER_PR(info, dev,
-			"PCIe Bus Error: severity=%s, type=Unaccessible, "
-			"id=%04x(Unregistered Agent ID)\n",
+		printk("%s""PCIe Bus Error: severity=%s, type=Unaccessible, "
+			"id=%04x(Unregistered Agent ID)\n", prefix,
 			aer_error_severity_string[info->severity], id);
 	} else {
 		int layer, agent;
@@ -193,23 +168,22 @@ void aer_print_error(struct pci_dev *dev, struct aer_err_info *info)
 		layer = AER_GET_LAYER_ERROR(info->severity, info->status);
 		agent = AER_GET_AGENT(info->severity, info->status);
 
-		AER_PR(info, dev,
-			"PCIe Bus Error: severity=%s, type=%s, id=%04x(%s)\n",
-			aer_error_severity_string[info->severity],
+		printk("%s""PCIe Bus Error: severity=%s, type=%s, id=%04x(%s)\n",
+			prefix, aer_error_severity_string[info->severity],
 			aer_error_layer[layer], id, aer_agent_string[agent]);
 
-		AER_PR(info, dev,
-			"  device [%04x:%04x] error status/mask=%08x/%08x\n",
-			dev->vendor, dev->device, info->status, info->mask);
+		printk("%s""  device [%04x:%04x] error status/mask=%08x/%08x\n",
+			prefix, dev->vendor, dev->device,
+			info->status, info->mask);
 
-		__aer_print_error(info, dev);
+		__aer_print_error(prefix, info);
 
 		if (info->tlp_header_valid) {
 			unsigned char *tlp = (unsigned char *) &info->tlp;
-			AER_PR(info, dev, "  TLP Header:"
+			printk("%s""  TLP Header:"
 				" %02x%02x%02x%02x %02x%02x%02x%02x"
 				" %02x%02x%02x%02x %02x%02x%02x%02x\n",
-				*(tlp + 3), *(tlp + 2), *(tlp + 1), *tlp,
+				prefix, *(tlp + 3), *(tlp + 2), *(tlp + 1), *tlp,
 				*(tlp + 7), *(tlp + 6), *(tlp + 5), *(tlp + 4),
 				*(tlp + 11), *(tlp + 10), *(tlp + 9),
 				*(tlp + 8), *(tlp + 15), *(tlp + 14),
@@ -218,8 +192,8 @@ void aer_print_error(struct pci_dev *dev, struct aer_err_info *info)
 	}
 
 	if (info->id && info->error_dev_num > 1 && info->id == id)
-		AER_PR(info, dev,
-			"  Error of this Agent(%04x) is reported first\n", id);
+		printk("%s""  Error of this Agent(%04x) is reported first\n",
+			prefix, id);
 }
 
 void aer_print_port_info(struct pci_dev *dev, struct aer_err_info *info)
@@ -228,3 +202,61 @@ void aer_print_port_info(struct pci_dev *dev, struct aer_err_info *info)
 		info->multi_error_valid ? "Multiple " : "",
 		aer_error_severity_string[info->severity], info->id);
 }
+
+#ifdef CONFIG_ACPI_APEI_PCIEAER
+static int cper_severity_to_aer(int cper_severity)
+{
+	switch (cper_severity) {
+	case CPER_SEV_RECOVERABLE:
+		return AER_NONFATAL;
+	case CPER_SEV_FATAL:
+		return AER_FATAL;
+	default:
+		return AER_CORRECTABLE;
+	}
+}
+
+void cper_print_aer(const char *prefix, int cper_severity,
+		    struct aer_capability_regs *aer)
+{
+	int aer_severity, layer, agent, status_strs_size, tlp_header_valid = 0;
+	u32 status, mask;
+	const char **status_strs;
+
+	aer_severity = cper_severity_to_aer(cper_severity);
+	if (aer_severity == AER_CORRECTABLE) {
+		status = aer->cor_status;
+		mask = aer->cor_mask;
+		status_strs = aer_correctable_error_string;
+		status_strs_size = ARRAY_SIZE(aer_correctable_error_string);
+	} else {
+		status = aer->uncor_status;
+		mask = aer->uncor_mask;
+		status_strs = aer_uncorrectable_error_string;
+		status_strs_size = ARRAY_SIZE(aer_uncorrectable_error_string);
+		tlp_header_valid = status & AER_LOG_TLP_MASKS;
+	}
+	layer = AER_GET_LAYER_ERROR(aer_severity, status);
+	agent = AER_GET_AGENT(aer_severity, status);
+	printk("%s""aer_status: 0x%08x, aer_mask: 0x%08x\n",
+	       prefix, status, mask);
+	cper_print_bits(prefix, status, status_strs, status_strs_size);
+	printk("%s""aer_layer=%s, aer_agent=%s\n", prefix,
+	       aer_error_layer[layer], aer_agent_string[agent]);
+	if (aer_severity != AER_CORRECTABLE)
+		printk("%s""aer_uncor_severity: 0x%08x\n",
+		       prefix, aer->uncor_severity);
+	if (tlp_header_valid) {
+		const unsigned char *tlp;
+		tlp = (const unsigned char *)&aer->header_log;
+		printk("%s""aer_tlp_header:"
+			" %02x%02x%02x%02x %02x%02x%02x%02x"
+			" %02x%02x%02x%02x %02x%02x%02x%02x\n",
+			prefix, *(tlp + 3), *(tlp + 2), *(tlp + 1), *tlp,
+			*(tlp + 7), *(tlp + 6), *(tlp + 5), *(tlp + 4),
+			*(tlp + 11), *(tlp + 10), *(tlp + 9),
+			*(tlp + 8), *(tlp + 15), *(tlp + 14),
+			*(tlp + 13), *(tlp + 12));
+	}
+}
+#endif

+ 3 - 2
include/acpi/apei.h

@@ -30,10 +30,11 @@ int apei_hest_parse(apei_hest_func_t func, void *data);
 
 int erst_write(const struct cper_record_header *record);
 ssize_t erst_get_record_count(void);
-int erst_get_next_record_id(u64 *record_id);
+int erst_get_record_id_begin(int *pos);
+int erst_get_record_id_next(int *pos, u64 *record_id);
+void erst_get_record_id_end(void);
 ssize_t erst_read(u64 record_id, struct cper_record_header *record,
 		  size_t buflen);
-ssize_t erst_read_next(struct cper_record_header *record, size_t buflen);
 int erst_clear(u64 record_id);
 
 #endif

+ 24 - 0
include/linux/aer.h

@@ -7,6 +7,28 @@
 #ifndef _AER_H_
 #define _AER_H_
 
+struct aer_header_log_regs {
+	unsigned int dw0;
+	unsigned int dw1;
+	unsigned int dw2;
+	unsigned int dw3;
+};
+
+struct aer_capability_regs {
+	u32 header;
+	u32 uncor_status;
+	u32 uncor_mask;
+	u32 uncor_severity;
+	u32 cor_status;
+	u32 cor_mask;
+	u32 cap_control;
+	struct aer_header_log_regs header_log;
+	u32 root_command;
+	u32 root_status;
+	u16 cor_err_source;
+	u16 uncor_err_source;
+};
+
 #if defined(CONFIG_PCIEAER)
 /* pci-e port driver needs this function to enable aer */
 extern int pci_enable_pcie_error_reporting(struct pci_dev *dev);
@@ -27,5 +49,7 @@ static inline int pci_cleanup_aer_uncorrect_error_status(struct pci_dev *dev)
 }
 #endif
 
+extern void cper_print_aer(const char *prefix, int cper_severity,
+			   struct aer_capability_regs *aer);
 #endif //_AER_H_
 

+ 2 - 0
include/linux/cper.h

@@ -388,5 +388,7 @@ struct cper_sec_pcie {
 #pragma pack()
 
 u64 cper_next_record_id(void);
+void cper_print_bits(const char *prefix, unsigned int bits,
+		     const char *strs[], unsigned int strs_size);
 
 #endif