ghes_edac.c 7.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334
  1. /*
  2. * GHES/EDAC Linux driver
  3. *
  4. * This file may be distributed under the terms of the GNU General Public
  5. * License version 2.
  6. *
  7. * Copyright (c) 2013 by Mauro Carvalho Chehab <mchehab@redhat.com>
  8. *
  9. * Red Hat Inc. http://www.redhat.com
  10. */
  11. #include <acpi/ghes.h>
  12. #include <linux/edac.h>
  13. #include <linux/dmi.h>
  14. #include "edac_core.h"
  15. #define GHES_PFX "ghes_edac: "
  16. #define GHES_EDAC_REVISION " Ver: 1.0.0"
  17. struct ghes_edac_pvt {
  18. struct list_head list;
  19. struct ghes *ghes;
  20. struct mem_ctl_info *mci;
  21. };
  22. static LIST_HEAD(ghes_reglist);
  23. static DEFINE_MUTEX(ghes_edac_lock);
  24. static int ghes_edac_mc_num;
  25. /* Memory Device - Type 17 of SMBIOS spec */
  26. struct memdev_dmi_entry {
  27. u8 type;
  28. u8 length;
  29. u16 handle;
  30. u16 phys_mem_array_handle;
  31. u16 mem_err_info_handle;
  32. u16 total_width;
  33. u16 data_width;
  34. u16 size;
  35. u8 form_factor;
  36. u8 device_set;
  37. u8 device_locator;
  38. u8 bank_locator;
  39. u8 memory_type;
  40. u16 type_detail;
  41. u16 speed;
  42. u8 manufacturer;
  43. u8 serial_number;
  44. u8 asset_tag;
  45. u8 part_number;
  46. u8 attributes;
  47. u32 extended_size;
  48. u16 conf_mem_clk_speed;
  49. } __attribute__((__packed__));
  50. struct ghes_edac_dimm_fill {
  51. struct mem_ctl_info *mci;
  52. unsigned count;
  53. };
  54. char *memory_type[] = {
  55. [MEM_EMPTY] = "EMPTY",
  56. [MEM_RESERVED] = "RESERVED",
  57. [MEM_UNKNOWN] = "UNKNOWN",
  58. [MEM_FPM] = "FPM",
  59. [MEM_EDO] = "EDO",
  60. [MEM_BEDO] = "BEDO",
  61. [MEM_SDR] = "SDR",
  62. [MEM_RDR] = "RDR",
  63. [MEM_DDR] = "DDR",
  64. [MEM_RDDR] = "RDDR",
  65. [MEM_RMBS] = "RMBS",
  66. [MEM_DDR2] = "DDR2",
  67. [MEM_FB_DDR2] = "FB_DDR2",
  68. [MEM_RDDR2] = "RDDR2",
  69. [MEM_XDR] = "XDR",
  70. [MEM_DDR3] = "DDR3",
  71. [MEM_RDDR3] = "RDDR3",
  72. };
  73. static void ghes_edac_count_dimms(const struct dmi_header *dh, void *arg)
  74. {
  75. int *num_dimm = arg;
  76. if (dh->type == DMI_ENTRY_MEM_DEVICE)
  77. (*num_dimm)++;
  78. }
  79. static void ghes_edac_dmidecode(const struct dmi_header *dh, void *arg)
  80. {
  81. struct ghes_edac_dimm_fill *dimm_fill = arg;
  82. struct mem_ctl_info *mci = dimm_fill->mci;
  83. if (dh->type == DMI_ENTRY_MEM_DEVICE) {
  84. struct memdev_dmi_entry *entry = (struct memdev_dmi_entry *)dh;
  85. struct dimm_info *dimm = EDAC_DIMM_PTR(mci->layers, mci->dimms,
  86. mci->n_layers,
  87. dimm_fill->count, 0, 0);
  88. if (entry->size == 0xffff) {
  89. pr_info(GHES_PFX "Can't get dimm size\n");
  90. dimm->nr_pages = MiB_TO_PAGES(32);/* Unknown */
  91. } else if (entry->size == 0x7fff) {
  92. dimm->nr_pages = MiB_TO_PAGES(entry->extended_size);
  93. } else {
  94. if (entry->size & 1 << 15)
  95. dimm->nr_pages = MiB_TO_PAGES((entry->size &
  96. 0x7fff) << 10);
  97. else
  98. dimm->nr_pages = MiB_TO_PAGES(entry->size);
  99. }
  100. switch (entry->memory_type) {
  101. case 0x12:
  102. if (entry->type_detail & 1 << 13)
  103. dimm->mtype = MEM_RDDR;
  104. else
  105. dimm->mtype = MEM_DDR;
  106. break;
  107. case 0x13:
  108. if (entry->type_detail & 1 << 13)
  109. dimm->mtype = MEM_RDDR2;
  110. else
  111. dimm->mtype = MEM_DDR2;
  112. break;
  113. case 0x14:
  114. dimm->mtype = MEM_FB_DDR2;
  115. break;
  116. case 0x18:
  117. if (entry->type_detail & 1 << 13)
  118. dimm->mtype = MEM_RDDR3;
  119. else
  120. dimm->mtype = MEM_DDR3;
  121. break;
  122. default:
  123. if (entry->type_detail & 1 << 6)
  124. dimm->mtype = MEM_RMBS;
  125. else if ((entry->type_detail & ((1 << 7) | (1 << 13)))
  126. == ((1 << 7) | (1 << 13)))
  127. dimm->mtype = MEM_RDR;
  128. else if (entry->type_detail & 1 << 7)
  129. dimm->mtype = MEM_SDR;
  130. else if (entry->type_detail & 1 << 9)
  131. dimm->mtype = MEM_EDO;
  132. else
  133. dimm->mtype = MEM_UNKNOWN;
  134. }
  135. /*
  136. * Actually, we can only detect if the memory has bits for
  137. * checksum or not
  138. */
  139. if (entry->total_width == entry->data_width)
  140. dimm->edac_mode = EDAC_NONE;
  141. else
  142. dimm->edac_mode = EDAC_SECDED;
  143. dimm->dtype = DEV_UNKNOWN;
  144. dimm->grain = 128; /* Likely, worse case */
  145. /*
  146. * FIXME: It shouldn't be hard to also fill the DIMM labels
  147. */
  148. if (dimm->nr_pages) {
  149. pr_info(GHES_PFX "DIMM%i: %s size = %d MB%s\n",
  150. dimm_fill->count, memory_type[dimm->mtype],
  151. PAGES_TO_MiB(dimm->nr_pages),
  152. (dimm->edac_mode != EDAC_NONE) ? "(ECC)" : "");
  153. pr_info(GHES_PFX "\ttype %d, detail 0x%02x, width %d(total %d)\n",
  154. entry->memory_type, entry->type_detail,
  155. entry->total_width, entry->data_width);
  156. }
  157. dimm_fill->count++;
  158. }
  159. }
  160. void ghes_edac_report_mem_error(struct ghes *ghes, int sev,
  161. struct cper_sec_mem_err *mem_err)
  162. {
  163. enum hw_event_mc_err_type type;
  164. struct edac_raw_error_desc *e;
  165. struct mem_ctl_info *mci;
  166. struct ghes_edac_pvt *pvt = NULL;
  167. list_for_each_entry(pvt, &ghes_reglist, list) {
  168. if (ghes == pvt->ghes)
  169. break;
  170. }
  171. if (!pvt) {
  172. pr_err("Internal error: Can't find EDAC structure\n");
  173. return;
  174. }
  175. mci = pvt->mci;
  176. e = &mci->error_desc;
  177. /* Cleans the error report buffer */
  178. memset(e, 0, sizeof (*e));
  179. e->error_count = 1;
  180. e->msg = "APEI";
  181. strcpy(e->label, "unknown");
  182. e->other_detail = "";
  183. if (mem_err->validation_bits & CPER_MEM_VALID_PHYSICAL_ADDRESS) {
  184. e->page_frame_number = mem_err->physical_addr >> PAGE_SHIFT;
  185. e->offset_in_page = mem_err->physical_addr & ~PAGE_MASK;
  186. e->grain = ~(mem_err->physical_addr_mask & ~PAGE_MASK);
  187. }
  188. switch (sev) {
  189. case GHES_SEV_CORRECTED:
  190. type = HW_EVENT_ERR_CORRECTED;
  191. break;
  192. case GHES_SEV_RECOVERABLE:
  193. type = HW_EVENT_ERR_UNCORRECTED;
  194. break;
  195. case GHES_SEV_PANIC:
  196. type = HW_EVENT_ERR_FATAL;
  197. break;
  198. default:
  199. case GHES_SEV_NO:
  200. type = HW_EVENT_ERR_INFO;
  201. }
  202. sprintf(e->location,
  203. "node:%d card:%d module:%d bank:%d device:%d row: %d column:%d bit_pos:%d",
  204. mem_err->node, mem_err->card, mem_err->module,
  205. mem_err->bank, mem_err->device, mem_err->row, mem_err->column,
  206. mem_err->bit_pos);
  207. edac_dbg(3, "error at location %s\n", e->location);
  208. edac_raw_mc_handle_error(type, mci, e);
  209. }
  210. EXPORT_SYMBOL_GPL(ghes_edac_report_mem_error);
  211. int ghes_edac_register(struct ghes *ghes, struct device *dev)
  212. {
  213. bool fake = false;
  214. int rc, num_dimm = 0;
  215. struct mem_ctl_info *mci;
  216. struct edac_mc_layer layers[1];
  217. struct ghes_edac_pvt *pvt;
  218. struct ghes_edac_dimm_fill dimm_fill;
  219. /* Get the number of DIMMs */
  220. dmi_walk(ghes_edac_count_dimms, &num_dimm);
  221. /* Check if we've got a bogus BIOS */
  222. if (num_dimm == 0) {
  223. fake = true;
  224. num_dimm = 1;
  225. }
  226. layers[0].type = EDAC_MC_LAYER_ALL_MEM;
  227. layers[0].size = num_dimm;
  228. layers[0].is_virt_csrow = true;
  229. /*
  230. * We need to serialize edac_mc_alloc() and edac_mc_add_mc(),
  231. * to avoid duplicated memory controller numbers
  232. */
  233. mutex_lock(&ghes_edac_lock);
  234. pr_info("ghes_edac#%d: allocating space for %d dimms\n",
  235. ghes_edac_mc_num, num_dimm);
  236. mci = edac_mc_alloc(ghes_edac_mc_num, ARRAY_SIZE(layers), layers,
  237. sizeof(*pvt));
  238. if (!mci) {
  239. pr_info(GHES_PFX "Can't allocate memory for EDAC data\n");
  240. mutex_unlock(&ghes_edac_lock);
  241. return -ENOMEM;
  242. }
  243. pvt = mci->pvt_info;
  244. memset(pvt, 0, sizeof(*pvt));
  245. list_add_tail(&pvt->list, &ghes_reglist);
  246. pvt->ghes = ghes;
  247. pvt->mci = mci;
  248. mci->pdev = dev;
  249. mci->mtype_cap = MEM_FLAG_EMPTY;
  250. mci->edac_ctl_cap = EDAC_FLAG_NONE;
  251. mci->edac_cap = EDAC_FLAG_NONE;
  252. mci->mod_name = "ghes_edac.c";
  253. mci->mod_ver = GHES_EDAC_REVISION;
  254. mci->ctl_name = "ghes_edac";
  255. mci->dev_name = "ghes";
  256. if (!fake) {
  257. /* Fill DIMM info from DMI */
  258. dimm_fill.count = 0;
  259. dimm_fill.mci = mci;
  260. dmi_walk(ghes_edac_dmidecode, &dimm_fill);
  261. } else {
  262. struct dimm_info *dimm = EDAC_DIMM_PTR(mci->layers, mci->dimms,
  263. mci->n_layers, 0, 0, 0);
  264. pr_info(GHES_PFX "Crappy BIOS detected. Faking DIMM EDAC data\n");
  265. dimm->nr_pages = 1000;
  266. dimm->grain = 128;
  267. dimm->mtype = MEM_UNKNOWN;
  268. dimm->dtype = DEV_UNKNOWN;
  269. dimm->edac_mode = EDAC_SECDED;
  270. }
  271. rc = edac_mc_add_mc(mci);
  272. if (rc < 0) {
  273. pr_info(GHES_PFX "Can't register at EDAC core\n");
  274. edac_mc_free(mci);
  275. mutex_unlock(&ghes_edac_lock);
  276. return -ENODEV;
  277. }
  278. ghes_edac_mc_num++;
  279. mutex_unlock(&ghes_edac_lock);
  280. return 0;
  281. }
  282. EXPORT_SYMBOL_GPL(ghes_edac_register);
  283. void ghes_edac_unregister(struct ghes *ghes)
  284. {
  285. struct mem_ctl_info *mci;
  286. struct ghes_edac_pvt *pvt;
  287. list_for_each_entry(pvt, &ghes_reglist, list) {
  288. if (ghes == pvt->ghes) {
  289. mci = pvt->mci;
  290. edac_mc_del_mc(mci->pdev);
  291. edac_mc_free(mci);
  292. list_del(&pvt->list);
  293. }
  294. }
  295. }
  296. EXPORT_SYMBOL_GPL(ghes_edac_unregister);