ghes_edac.c 3.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166
  1. /*
  2. * GHES/EDAC Linux driver
  3. *
  4. * This file may be distributed under the terms of the GNU General Public
  5. * License version 2.
  6. *
  7. * Copyright (c) 2013 by Mauro Carvalho Chehab <mchehab@redhat.com>
  8. *
  9. * Red Hat Inc. http://www.redhat.com
  10. */
  11. #include <acpi/ghes.h>
  12. #include <linux/edac.h>
  13. #include "edac_core.h"
  14. #define GHES_PFX "ghes_edac: "
  15. #define GHES_EDAC_REVISION " Ver: 1.0.0"
  16. struct ghes_edac_pvt {
  17. struct list_head list;
  18. struct ghes *ghes;
  19. struct mem_ctl_info *mci;
  20. };
  21. static LIST_HEAD(ghes_reglist);
  22. static DEFINE_MUTEX(ghes_edac_lock);
  23. static int ghes_edac_mc_num;
  24. void ghes_edac_report_mem_error(struct ghes *ghes, int sev,
  25. struct cper_sec_mem_err *mem_err)
  26. {
  27. enum hw_event_mc_err_type type;
  28. struct edac_raw_error_desc *e;
  29. struct mem_ctl_info *mci;
  30. struct ghes_edac_pvt *pvt = NULL;
  31. list_for_each_entry(pvt, &ghes_reglist, list) {
  32. if (ghes == pvt->ghes)
  33. break;
  34. }
  35. if (!pvt) {
  36. pr_err("Internal error: Can't find EDAC structure\n");
  37. return;
  38. }
  39. mci = pvt->mci;
  40. e = &mci->error_desc;
  41. /* Cleans the error report buffer */
  42. memset(e, 0, sizeof (*e));
  43. e->error_count = 1;
  44. e->msg = "APEI";
  45. strcpy(e->label, "unknown");
  46. e->other_detail = "";
  47. if (mem_err->validation_bits & CPER_MEM_VALID_PHYSICAL_ADDRESS) {
  48. e->page_frame_number = mem_err->physical_addr >> PAGE_SHIFT;
  49. e->offset_in_page = mem_err->physical_addr & ~PAGE_MASK;
  50. e->grain = ~(mem_err->physical_addr_mask & ~PAGE_MASK);
  51. }
  52. switch (sev) {
  53. case GHES_SEV_CORRECTED:
  54. type = HW_EVENT_ERR_CORRECTED;
  55. break;
  56. case GHES_SEV_RECOVERABLE:
  57. type = HW_EVENT_ERR_UNCORRECTED;
  58. break;
  59. case GHES_SEV_PANIC:
  60. type = HW_EVENT_ERR_FATAL;
  61. break;
  62. default:
  63. case GHES_SEV_NO:
  64. type = HW_EVENT_ERR_INFO;
  65. }
  66. sprintf(e->location,
  67. "node:%d card:%d module:%d bank:%d device:%d row: %d column:%d bit_pos:%d",
  68. mem_err->node, mem_err->card, mem_err->module,
  69. mem_err->bank, mem_err->device, mem_err->row, mem_err->column,
  70. mem_err->bit_pos);
  71. edac_dbg(3, "error at location %s\n", e->location);
  72. edac_raw_mc_handle_error(type, mci, e);
  73. }
  74. EXPORT_SYMBOL_GPL(ghes_edac_report_mem_error);
  75. int ghes_edac_register(struct ghes *ghes, struct device *dev)
  76. {
  77. int rc;
  78. struct mem_ctl_info *mci;
  79. struct edac_mc_layer layers[1];
  80. struct csrow_info *csrow;
  81. struct dimm_info *dimm;
  82. struct ghes_edac_pvt *pvt;
  83. layers[0].type = EDAC_MC_LAYER_ALL_MEM;
  84. layers[0].size = 1;
  85. layers[0].is_virt_csrow = true;
  86. /*
  87. * We need to serialize edac_mc_alloc() and edac_mc_add_mc(),
  88. * to avoid duplicated memory controller numbers
  89. */
  90. mutex_lock(&ghes_edac_lock);
  91. mci = edac_mc_alloc(ghes_edac_mc_num, ARRAY_SIZE(layers), layers,
  92. sizeof(*pvt));
  93. if (!mci) {
  94. pr_info(GHES_PFX "Can't allocate memory for EDAC data\n");
  95. mutex_unlock(&ghes_edac_lock);
  96. return -ENOMEM;
  97. }
  98. pvt = mci->pvt_info;
  99. memset(pvt, 0, sizeof(*pvt));
  100. list_add_tail(&pvt->list, &ghes_reglist);
  101. pvt->ghes = ghes;
  102. pvt->mci = mci;
  103. mci->pdev = dev;
  104. mci->mtype_cap = MEM_FLAG_EMPTY;
  105. mci->edac_ctl_cap = EDAC_FLAG_NONE;
  106. mci->edac_cap = EDAC_FLAG_NONE;
  107. mci->mod_name = "ghes_edac.c";
  108. mci->mod_ver = GHES_EDAC_REVISION;
  109. mci->ctl_name = "ghes_edac";
  110. mci->dev_name = "ghes";
  111. csrow = mci->csrows[0];
  112. dimm = csrow->channels[0]->dimm;
  113. /* FIXME: FAKE DATA */
  114. dimm->nr_pages = 1000;
  115. dimm->grain = 128;
  116. dimm->mtype = MEM_UNKNOWN;
  117. dimm->dtype = DEV_UNKNOWN;
  118. dimm->edac_mode = EDAC_SECDED;
  119. rc = edac_mc_add_mc(mci);
  120. if (rc < 0) {
  121. pr_info(GHES_PFX "Can't register at EDAC core\n");
  122. edac_mc_free(mci);
  123. mutex_unlock(&ghes_edac_lock);
  124. return -ENODEV;
  125. }
  126. ghes_edac_mc_num++;
  127. mutex_unlock(&ghes_edac_lock);
  128. return 0;
  129. }
  130. EXPORT_SYMBOL_GPL(ghes_edac_register);
  131. void ghes_edac_unregister(struct ghes *ghes)
  132. {
  133. struct mem_ctl_info *mci;
  134. struct ghes_edac_pvt *pvt;
  135. list_for_each_entry(pvt, &ghes_reglist, list) {
  136. if (ghes == pvt->ghes) {
  137. mci = pvt->mci;
  138. edac_mc_del_mc(mci->pdev);
  139. edac_mc_free(mci);
  140. list_del(&pvt->list);
  141. }
  142. }
  143. }
  144. EXPORT_SYMBOL_GPL(ghes_edac_unregister);