|
@@ -27,12 +27,17 @@
|
|
|
#include <linux/list.h>
|
|
|
#include <linux/ctype.h>
|
|
|
#include <linux/edac.h>
|
|
|
+#include <linux/bitops.h>
|
|
|
#include <asm/uaccess.h>
|
|
|
#include <asm/page.h>
|
|
|
#include <asm/edac.h>
|
|
|
#include "edac_core.h"
|
|
|
#include "edac_module.h"
|
|
|
|
|
|
+#define CREATE_TRACE_POINTS
|
|
|
+#define TRACE_INCLUDE_PATH ../../include/ras
|
|
|
+#include <ras/ras_event.h>
|
|
|
+
|
|
|
/* lock to memory controller's control array */
|
|
|
static DEFINE_MUTEX(mem_ctls_mutex);
|
|
|
static LIST_HEAD(mc_devices);
|
|
@@ -384,6 +389,7 @@ struct mem_ctl_info *edac_mc_alloc(unsigned mc_num,
|
|
|
* which will perform kobj unregistration and the actual free
|
|
|
* will occur during the kobject callback operation
|
|
|
*/
|
|
|
+
|
|
|
return mci;
|
|
|
}
|
|
|
EXPORT_SYMBOL_GPL(edac_mc_alloc);
|
|
@@ -902,19 +908,19 @@ static void edac_ce_error(struct mem_ctl_info *mci,
|
|
|
const bool enable_per_layer_report,
|
|
|
const unsigned long page_frame_number,
|
|
|
const unsigned long offset_in_page,
|
|
|
- u32 grain)
|
|
|
+ long grain)
|
|
|
{
|
|
|
unsigned long remapped_page;
|
|
|
|
|
|
if (edac_mc_get_log_ce()) {
|
|
|
if (other_detail && *other_detail)
|
|
|
edac_mc_printk(mci, KERN_WARNING,
|
|
|
- "CE %s on %s (%s%s - %s)\n",
|
|
|
+ "CE %s on %s (%s %s - %s)\n",
|
|
|
msg, label, location,
|
|
|
detail, other_detail);
|
|
|
else
|
|
|
edac_mc_printk(mci, KERN_WARNING,
|
|
|
- "CE %s on %s (%s%s)\n",
|
|
|
+ "CE %s on %s (%s %s)\n",
|
|
|
msg, label, location,
|
|
|
detail);
|
|
|
}
|
|
@@ -953,12 +959,12 @@ static void edac_ue_error(struct mem_ctl_info *mci,
|
|
|
if (edac_mc_get_log_ue()) {
|
|
|
if (other_detail && *other_detail)
|
|
|
edac_mc_printk(mci, KERN_WARNING,
|
|
|
- "UE %s on %s (%s%s - %s)\n",
|
|
|
+ "UE %s on %s (%s %s - %s)\n",
|
|
|
msg, label, location, detail,
|
|
|
other_detail);
|
|
|
else
|
|
|
edac_mc_printk(mci, KERN_WARNING,
|
|
|
- "UE %s on %s (%s%s)\n",
|
|
|
+ "UE %s on %s (%s %s)\n",
|
|
|
msg, label, location, detail);
|
|
|
}
|
|
|
|
|
@@ -975,27 +981,50 @@ static void edac_ue_error(struct mem_ctl_info *mci,
|
|
|
}
|
|
|
|
|
|
#define OTHER_LABEL " or "
|
|
|
+
|
|
|
+/**
|
|
|
+ * edac_mc_handle_error - reports a memory event to userspace
|
|
|
+ *
|
|
|
+ * @type: severity of the error (CE/UE/Fatal)
|
|
|
+ * @mci: a struct mem_ctl_info pointer
|
|
|
+ * @page_frame_number: mem page where the error occurred
|
|
|
+ * @offset_in_page: offset of the error inside the page
|
|
|
+ * @syndrome: ECC syndrome
|
|
|
+ * @top_layer: Memory layer[0] position
|
|
|
+ * @mid_layer: Memory layer[1] position
|
|
|
+ * @low_layer: Memory layer[2] position
|
|
|
+ * @msg: Message meaningful to the end users that
|
|
|
+ * explains the event
|
|
|
+ * @other_detail: Technical details about the event that
|
|
|
+ * may help hardware manufacturers and
|
|
|
+ * EDAC developers to analyse the event
|
|
|
+ * @arch_log: Architecture-specific struct that can
|
|
|
+ * be used to add extended information to the
|
|
|
+ * tracepoint, like dumping MCE registers.
|
|
|
+ */
|
|
|
void edac_mc_handle_error(const enum hw_event_mc_err_type type,
|
|
|
struct mem_ctl_info *mci,
|
|
|
const unsigned long page_frame_number,
|
|
|
const unsigned long offset_in_page,
|
|
|
const unsigned long syndrome,
|
|
|
- const int layer0,
|
|
|
- const int layer1,
|
|
|
- const int layer2,
|
|
|
+ const int top_layer,
|
|
|
+ const int mid_layer,
|
|
|
+ const int low_layer,
|
|
|
const char *msg,
|
|
|
const char *other_detail,
|
|
|
- const void *mcelog)
|
|
|
+ const void *arch_log)
|
|
|
{
|
|
|
/* FIXME: too much for stack: move it to some pre-alocated area */
|
|
|
char detail[80], location[80];
|
|
|
char label[(EDAC_MC_LABEL_LEN + 1 + sizeof(OTHER_LABEL)) * mci->tot_dimms];
|
|
|
char *p;
|
|
|
int row = -1, chan = -1;
|
|
|
- int pos[EDAC_MAX_LAYERS] = { layer0, layer1, layer2 };
|
|
|
+ int pos[EDAC_MAX_LAYERS] = { top_layer, mid_layer, low_layer };
|
|
|
int i;
|
|
|
- u32 grain;
|
|
|
+ long grain;
|
|
|
bool enable_per_layer_report = false;
|
|
|
+ u16 error_count; /* FIXME: make it a parameter */
|
|
|
+ u8 grain_bits;
|
|
|
|
|
|
debugf3("MC%d: %s()\n", mci->mc_idx, __func__);
|
|
|
|
|
@@ -1045,11 +1074,11 @@ void edac_mc_handle_error(const enum hw_event_mc_err_type type,
|
|
|
for (i = 0; i < mci->tot_dimms; i++) {
|
|
|
struct dimm_info *dimm = &mci->dimms[i];
|
|
|
|
|
|
- if (layer0 >= 0 && layer0 != dimm->location[0])
|
|
|
+ if (top_layer >= 0 && top_layer != dimm->location[0])
|
|
|
continue;
|
|
|
- if (layer1 >= 0 && layer1 != dimm->location[1])
|
|
|
+ if (mid_layer >= 0 && mid_layer != dimm->location[1])
|
|
|
continue;
|
|
|
- if (layer2 >= 0 && layer2 != dimm->location[2])
|
|
|
+ if (low_layer >= 0 && low_layer != dimm->location[2])
|
|
|
continue;
|
|
|
|
|
|
/* get the max grain, over the error match range */
|
|
@@ -1120,11 +1149,22 @@ void edac_mc_handle_error(const enum hw_event_mc_err_type type,
|
|
|
edac_layer_name[mci->layers[i].type],
|
|
|
pos[i]);
|
|
|
}
|
|
|
+ if (p > location)
|
|
|
+ *(p - 1) = '\0';
|
|
|
+
|
|
|
+ /* Report the error via the trace interface */
|
|
|
+
|
|
|
+ error_count = 1; /* FIXME: allow change it */
|
|
|
+ grain_bits = fls_long(grain) + 1;
|
|
|
+ trace_mc_event(type, msg, label, error_count,
|
|
|
+ mci->mc_idx, top_layer, mid_layer, low_layer,
|
|
|
+ PAGES_TO_MiB(page_frame_number) | offset_in_page,
|
|
|
+ grain_bits, syndrome, other_detail);
|
|
|
|
|
|
/* Memory type dependent details about the error */
|
|
|
if (type == HW_EVENT_ERR_CORRECTED) {
|
|
|
snprintf(detail, sizeof(detail),
|
|
|
- "page:0x%lx offset:0x%lx grain:%d syndrome:0x%lx",
|
|
|
+ "page:0x%lx offset:0x%lx grain:%ld syndrome:0x%lx",
|
|
|
page_frame_number, offset_in_page,
|
|
|
grain, syndrome);
|
|
|
edac_ce_error(mci, pos, msg, location, label, detail,
|
|
@@ -1132,7 +1172,7 @@ void edac_mc_handle_error(const enum hw_event_mc_err_type type,
|
|
|
page_frame_number, offset_in_page, grain);
|
|
|
} else {
|
|
|
snprintf(detail, sizeof(detail),
|
|
|
- "page:0x%lx offset:0x%lx grain:%d",
|
|
|
+ "page:0x%lx offset:0x%lx grain:%ld",
|
|
|
page_frame_number, offset_in_page, grain);
|
|
|
|
|
|
edac_ue_error(mci, pos, msg, location, label, detail,
|