edac_pci_sysfs.c 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620
  1. /*
  2. * (C) 2005, 2006 Linux Networx (http://lnxi.com)
  3. * This file may be distributed under the terms of the
  4. * GNU General Public License.
  5. *
  6. * Written Doug Thompson <norsk5@xmission.com>
  7. *
  8. */
  9. #include <linux/module.h>
  10. #include <linux/sysdev.h>
  11. #include <linux/ctype.h>
  12. #include "edac_core.h"
  13. #include "edac_module.h"
  14. #ifdef CONFIG_PCI
  15. #define EDAC_PCI_SYMLINK "device"
  16. static int check_pci_errors; /* default YES check PCI parity */
  17. static int edac_pci_panic_on_pe; /* default no panic on PCI Parity */
  18. static int edac_pci_log_pe = 1; /* log PCI parity errors */
  19. static int edac_pci_log_npe = 1; /* log PCI non-parity error errors */
  20. static atomic_t pci_parity_count = ATOMIC_INIT(0);
  21. static atomic_t pci_nonparity_count = ATOMIC_INIT(0);
  22. static int edac_pci_poll_msec = 1000;
  23. static struct kobject edac_pci_kobj; /* /sys/devices/system/edac/pci */
  24. static struct completion edac_pci_kobj_complete;
  25. static atomic_t edac_pci_sysfs_refcount = ATOMIC_INIT(0);
  26. int edac_pci_get_check_errors(void)
  27. {
  28. return check_pci_errors;
  29. }
  30. int edac_pci_get_log_pe(void)
  31. {
  32. return edac_pci_log_pe;
  33. }
  34. int edac_pci_get_log_npe(void)
  35. {
  36. return edac_pci_log_npe;
  37. }
  38. int edac_pci_get_panic_on_pe(void)
  39. {
  40. return edac_pci_panic_on_pe;
  41. }
  42. int edac_pci_get_poll_msec(void)
  43. {
  44. return edac_pci_poll_msec;
  45. }
  46. /**************************** EDAC PCI sysfs instance *******************/
  47. static ssize_t instance_pe_count_show(struct edac_pci_ctl_info *pci, char *data)
  48. {
  49. return sprintf(data, "%u\n", atomic_read(&pci->counters.pe_count));
  50. }
  51. static ssize_t instance_npe_count_show(struct edac_pci_ctl_info *pci,
  52. char *data)
  53. {
  54. return sprintf(data, "%u\n", atomic_read(&pci->counters.npe_count));
  55. }
  56. #define to_instance(k) container_of(k, struct edac_pci_ctl_info, kobj)
  57. #define to_instance_attr(a) container_of(a, struct instance_attribute, attr)
  58. /* DEVICE instance kobject release() function */
  59. static void edac_pci_instance_release(struct kobject *kobj)
  60. {
  61. struct edac_pci_ctl_info *pci;
  62. debugf1("%s()\n", __func__);
  63. pci = to_instance(kobj);
  64. complete(&pci->kobj_complete);
  65. }
  66. /* instance specific attribute structure */
  67. struct instance_attribute {
  68. struct attribute attr;
  69. ssize_t(*show) (struct edac_pci_ctl_info *, char *);
  70. ssize_t(*store) (struct edac_pci_ctl_info *, const char *, size_t);
  71. };
  72. /* Function to 'show' fields from the edac_pci 'instance' structure */
  73. static ssize_t edac_pci_instance_show(struct kobject *kobj,
  74. struct attribute *attr, char *buffer)
  75. {
  76. struct edac_pci_ctl_info *pci = to_instance(kobj);
  77. struct instance_attribute *instance_attr = to_instance_attr(attr);
  78. if (instance_attr->show)
  79. return instance_attr->show(pci, buffer);
  80. return -EIO;
  81. }
  82. /* Function to 'store' fields into the edac_pci 'instance' structure */
  83. static ssize_t edac_pci_instance_store(struct kobject *kobj,
  84. struct attribute *attr,
  85. const char *buffer, size_t count)
  86. {
  87. struct edac_pci_ctl_info *pci = to_instance(kobj);
  88. struct instance_attribute *instance_attr = to_instance_attr(attr);
  89. if (instance_attr->store)
  90. return instance_attr->store(pci, buffer, count);
  91. return -EIO;
  92. }
  93. static struct sysfs_ops pci_instance_ops = {
  94. .show = edac_pci_instance_show,
  95. .store = edac_pci_instance_store
  96. };
  97. #define INSTANCE_ATTR(_name, _mode, _show, _store) \
  98. static struct instance_attribute attr_instance_##_name = { \
  99. .attr = {.name = __stringify(_name), .mode = _mode }, \
  100. .show = _show, \
  101. .store = _store, \
  102. };
  103. INSTANCE_ATTR(pe_count, S_IRUGO, instance_pe_count_show, NULL);
  104. INSTANCE_ATTR(npe_count, S_IRUGO, instance_npe_count_show, NULL);
  105. /* pci instance attributes */
  106. static struct instance_attribute *pci_instance_attr[] = {
  107. &attr_instance_pe_count,
  108. &attr_instance_npe_count,
  109. NULL
  110. };
  111. /* the ktype for pci instance */
  112. static struct kobj_type ktype_pci_instance = {
  113. .release = edac_pci_instance_release,
  114. .sysfs_ops = &pci_instance_ops,
  115. .default_attrs = (struct attribute **)pci_instance_attr,
  116. };
  117. static int edac_pci_create_instance_kobj(struct edac_pci_ctl_info *pci, int idx)
  118. {
  119. int err;
  120. pci->kobj.parent = &edac_pci_kobj;
  121. pci->kobj.ktype = &ktype_pci_instance;
  122. err = kobject_set_name(&pci->kobj, "pci%d", idx);
  123. if (err)
  124. return err;
  125. err = kobject_register(&pci->kobj);
  126. if (err != 0) {
  127. debugf2("%s() failed to register instance pci%d\n",
  128. __func__, idx);
  129. return err;
  130. }
  131. debugf1("%s() Register instance 'pci%d' kobject\n", __func__, idx);
  132. return 0;
  133. }
  134. static void
  135. edac_pci_delete_instance_kobj(struct edac_pci_ctl_info *pci, int idx)
  136. {
  137. init_completion(&pci->kobj_complete);
  138. kobject_unregister(&pci->kobj);
  139. wait_for_completion(&pci->kobj_complete);
  140. }
  141. /***************************** EDAC PCI sysfs root **********************/
  142. #define to_edacpci(k) container_of(k, struct edac_pci_ctl_info, kobj)
  143. #define to_edacpci_attr(a) container_of(a, struct edac_pci_attr, attr)
  144. static ssize_t edac_pci_int_show(void *ptr, char *buffer)
  145. {
  146. int *value = ptr;
  147. return sprintf(buffer, "%d\n", *value);
  148. }
  149. static ssize_t edac_pci_int_store(void *ptr, const char *buffer, size_t count)
  150. {
  151. int *value = ptr;
  152. if (isdigit(*buffer))
  153. *value = simple_strtoul(buffer, NULL, 0);
  154. return count;
  155. }
  156. struct edac_pci_dev_attribute {
  157. struct attribute attr;
  158. void *value;
  159. ssize_t(*show) (void *, char *);
  160. ssize_t(*store) (void *, const char *, size_t);
  161. };
  162. /* Set of show/store abstract level functions for PCI Parity object */
  163. static ssize_t edac_pci_dev_show(struct kobject *kobj, struct attribute *attr,
  164. char *buffer)
  165. {
  166. struct edac_pci_dev_attribute *edac_pci_dev;
  167. edac_pci_dev = (struct edac_pci_dev_attribute *)attr;
  168. if (edac_pci_dev->show)
  169. return edac_pci_dev->show(edac_pci_dev->value, buffer);
  170. return -EIO;
  171. }
  172. static ssize_t edac_pci_dev_store(struct kobject *kobj,
  173. struct attribute *attr, const char *buffer,
  174. size_t count)
  175. {
  176. struct edac_pci_dev_attribute *edac_pci_dev;
  177. edac_pci_dev = (struct edac_pci_dev_attribute *)attr;
  178. if (edac_pci_dev->show)
  179. return edac_pci_dev->store(edac_pci_dev->value, buffer, count);
  180. return -EIO;
  181. }
  182. static struct sysfs_ops edac_pci_sysfs_ops = {
  183. .show = edac_pci_dev_show,
  184. .store = edac_pci_dev_store
  185. };
  186. #define EDAC_PCI_ATTR(_name,_mode,_show,_store) \
  187. static struct edac_pci_dev_attribute edac_pci_attr_##_name = { \
  188. .attr = {.name = __stringify(_name), .mode = _mode }, \
  189. .value = &_name, \
  190. .show = _show, \
  191. .store = _store, \
  192. };
  193. #define EDAC_PCI_STRING_ATTR(_name,_data,_mode,_show,_store) \
  194. static struct edac_pci_dev_attribute edac_pci_attr_##_name = { \
  195. .attr = {.name = __stringify(_name), .mode = _mode }, \
  196. .value = _data, \
  197. .show = _show, \
  198. .store = _store, \
  199. };
  200. /* PCI Parity control files */
  201. EDAC_PCI_ATTR(check_pci_errors, S_IRUGO | S_IWUSR, edac_pci_int_show,
  202. edac_pci_int_store);
  203. EDAC_PCI_ATTR(edac_pci_log_pe, S_IRUGO | S_IWUSR, edac_pci_int_show,
  204. edac_pci_int_store);
  205. EDAC_PCI_ATTR(edac_pci_log_npe, S_IRUGO | S_IWUSR, edac_pci_int_show,
  206. edac_pci_int_store);
  207. EDAC_PCI_ATTR(edac_pci_panic_on_pe, S_IRUGO | S_IWUSR, edac_pci_int_show,
  208. edac_pci_int_store);
  209. EDAC_PCI_ATTR(pci_parity_count, S_IRUGO, edac_pci_int_show, NULL);
  210. EDAC_PCI_ATTR(pci_nonparity_count, S_IRUGO, edac_pci_int_show, NULL);
  211. /* Base Attributes of the memory ECC object */
  212. static struct edac_pci_dev_attribute *edac_pci_attr[] = {
  213. &edac_pci_attr_check_pci_errors,
  214. &edac_pci_attr_edac_pci_log_pe,
  215. &edac_pci_attr_edac_pci_log_npe,
  216. &edac_pci_attr_edac_pci_panic_on_pe,
  217. &edac_pci_attr_pci_parity_count,
  218. &edac_pci_attr_pci_nonparity_count,
  219. NULL,
  220. };
  221. /* No memory to release */
  222. static void edac_pci_release(struct kobject *kobj)
  223. {
  224. struct edac_pci_ctl_info *pci;
  225. pci = to_edacpci(kobj);
  226. debugf1("%s()\n", __func__);
  227. complete(&pci->kobj_complete);
  228. }
  229. static struct kobj_type ktype_edac_pci = {
  230. .release = edac_pci_release,
  231. .sysfs_ops = &edac_pci_sysfs_ops,
  232. .default_attrs = (struct attribute **)edac_pci_attr,
  233. };
  234. /**
  235. * edac_sysfs_pci_setup()
  236. *
  237. * setup the sysfs for EDAC PCI attributes
  238. * assumes edac_class has already been initialized
  239. */
  240. int edac_pci_register_main_kobj(void)
  241. {
  242. int err;
  243. struct sysdev_class *edac_class;
  244. debugf1("%s()\n", __func__);
  245. edac_class = edac_get_edac_class();
  246. if (edac_class == NULL) {
  247. debugf1("%s() no edac_class\n", __func__);
  248. return -ENODEV;
  249. }
  250. edac_pci_kobj.ktype = &ktype_edac_pci;
  251. edac_pci_kobj.parent = &edac_class->kset.kobj;
  252. err = kobject_set_name(&edac_pci_kobj, "pci");
  253. if (err)
  254. return err;
  255. /* Instanstiate the pci object */
  256. /* FIXME: maybe new sysdev_create_subdir() */
  257. err = kobject_register(&edac_pci_kobj);
  258. if (err) {
  259. debugf1("Failed to register '.../edac/pci'\n");
  260. return err;
  261. }
  262. debugf1("Registered '.../edac/pci' kobject\n");
  263. return 0;
  264. }
  265. /*
  266. * edac_pci_unregister_main_kobj()
  267. *
  268. * perform the sysfs teardown for the PCI attributes
  269. */
  270. void edac_pci_unregister_main_kobj(void)
  271. {
  272. debugf0("%s()\n", __func__);
  273. init_completion(&edac_pci_kobj_complete);
  274. kobject_unregister(&edac_pci_kobj);
  275. wait_for_completion(&edac_pci_kobj_complete);
  276. }
  277. int edac_pci_create_sysfs(struct edac_pci_ctl_info *pci)
  278. {
  279. int err;
  280. struct kobject *edac_kobj = &pci->kobj;
  281. if (atomic_inc_return(&edac_pci_sysfs_refcount) == 1) {
  282. err = edac_pci_register_main_kobj();
  283. if (err) {
  284. atomic_dec(&edac_pci_sysfs_refcount);
  285. return err;
  286. }
  287. }
  288. err = edac_pci_create_instance_kobj(pci, pci->pci_idx);
  289. if (err) {
  290. if (atomic_dec_return(&edac_pci_sysfs_refcount) == 0)
  291. edac_pci_unregister_main_kobj();
  292. }
  293. debugf0("%s() idx=%d\n", __func__, pci->pci_idx);
  294. err = sysfs_create_link(edac_kobj, &pci->dev->kobj, EDAC_PCI_SYMLINK);
  295. if (err) {
  296. debugf0("%s() sysfs_create_link() returned err= %d\n",
  297. __func__, err);
  298. return err;
  299. }
  300. return 0;
  301. }
  302. void edac_pci_remove_sysfs(struct edac_pci_ctl_info *pci)
  303. {
  304. debugf0("%s()\n", __func__);
  305. edac_pci_delete_instance_kobj(pci, pci->pci_idx);
  306. sysfs_remove_link(&pci->kobj, EDAC_PCI_SYMLINK);
  307. if (atomic_dec_return(&edac_pci_sysfs_refcount) == 0)
  308. edac_pci_unregister_main_kobj();
  309. }
  310. /************************ PCI error handling *************************/
  311. static u16 get_pci_parity_status(struct pci_dev *dev, int secondary)
  312. {
  313. int where;
  314. u16 status;
  315. where = secondary ? PCI_SEC_STATUS : PCI_STATUS;
  316. pci_read_config_word(dev, where, &status);
  317. /* If we get back 0xFFFF then we must suspect that the card has been
  318. * pulled but the Linux PCI layer has not yet finished cleaning up.
  319. * We don't want to report on such devices
  320. */
  321. if (status == 0xFFFF) {
  322. u32 sanity;
  323. pci_read_config_dword(dev, 0, &sanity);
  324. if (sanity == 0xFFFFFFFF)
  325. return 0;
  326. }
  327. status &= PCI_STATUS_DETECTED_PARITY | PCI_STATUS_SIG_SYSTEM_ERROR |
  328. PCI_STATUS_PARITY;
  329. if (status)
  330. /* reset only the bits we are interested in */
  331. pci_write_config_word(dev, where, status);
  332. return status;
  333. }
  334. typedef void (*pci_parity_check_fn_t) (struct pci_dev * dev);
  335. /* Clear any PCI parity errors logged by this device. */
  336. static void edac_pci_dev_parity_clear(struct pci_dev *dev)
  337. {
  338. u8 header_type;
  339. get_pci_parity_status(dev, 0);
  340. /* read the device TYPE, looking for bridges */
  341. pci_read_config_byte(dev, PCI_HEADER_TYPE, &header_type);
  342. if ((header_type & 0x7F) == PCI_HEADER_TYPE_BRIDGE)
  343. get_pci_parity_status(dev, 1);
  344. }
  345. /*
  346. * PCI Parity polling
  347. *
  348. */
  349. static void edac_pci_dev_parity_test(struct pci_dev *dev)
  350. {
  351. u16 status;
  352. u8 header_type;
  353. /* read the STATUS register on this device
  354. */
  355. status = get_pci_parity_status(dev, 0);
  356. debugf2("PCI STATUS= 0x%04x %s\n", status, dev->dev.bus_id);
  357. /* check the status reg for errors */
  358. if (status) {
  359. if (status & (PCI_STATUS_SIG_SYSTEM_ERROR)) {
  360. edac_printk(KERN_CRIT, EDAC_PCI,
  361. "Signaled System Error on %s\n",
  362. pci_name(dev));
  363. atomic_inc(&pci_nonparity_count);
  364. }
  365. if (status & (PCI_STATUS_PARITY)) {
  366. edac_printk(KERN_CRIT, EDAC_PCI,
  367. "Master Data Parity Error on %s\n",
  368. pci_name(dev));
  369. atomic_inc(&pci_parity_count);
  370. }
  371. if (status & (PCI_STATUS_DETECTED_PARITY)) {
  372. edac_printk(KERN_CRIT, EDAC_PCI,
  373. "Detected Parity Error on %s\n",
  374. pci_name(dev));
  375. atomic_inc(&pci_parity_count);
  376. }
  377. }
  378. /* read the device TYPE, looking for bridges */
  379. pci_read_config_byte(dev, PCI_HEADER_TYPE, &header_type);
  380. debugf2("PCI HEADER TYPE= 0x%02x %s\n", header_type, dev->dev.bus_id);
  381. if ((header_type & 0x7F) == PCI_HEADER_TYPE_BRIDGE) {
  382. /* On bridges, need to examine secondary status register */
  383. status = get_pci_parity_status(dev, 1);
  384. debugf2("PCI SEC_STATUS= 0x%04x %s\n", status, dev->dev.bus_id);
  385. /* check the secondary status reg for errors */
  386. if (status) {
  387. if (status & (PCI_STATUS_SIG_SYSTEM_ERROR)) {
  388. edac_printk(KERN_CRIT, EDAC_PCI, "Bridge "
  389. "Signaled System Error on %s\n",
  390. pci_name(dev));
  391. atomic_inc(&pci_nonparity_count);
  392. }
  393. if (status & (PCI_STATUS_PARITY)) {
  394. edac_printk(KERN_CRIT, EDAC_PCI, "Bridge "
  395. "Master Data Parity Error on "
  396. "%s\n", pci_name(dev));
  397. atomic_inc(&pci_parity_count);
  398. }
  399. if (status & (PCI_STATUS_DETECTED_PARITY)) {
  400. edac_printk(KERN_CRIT, EDAC_PCI, "Bridge "
  401. "Detected Parity Error on %s\n",
  402. pci_name(dev));
  403. atomic_inc(&pci_parity_count);
  404. }
  405. }
  406. }
  407. }
  408. /*
  409. * pci_dev parity list iterator
  410. * Scan the PCI device list for one iteration, looking for SERRORs
  411. * Master Parity ERRORS or Parity ERRORs on primary or secondary devices
  412. */
  413. static inline void edac_pci_dev_parity_iterator(pci_parity_check_fn_t fn)
  414. {
  415. struct pci_dev *dev = NULL;
  416. /* request for kernel access to the next PCI device, if any,
  417. * and while we are looking at it have its reference count
  418. * bumped until we are done with it
  419. */
  420. while ((dev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev)) != NULL) {
  421. fn(dev);
  422. }
  423. }
  424. /*
  425. * edac_pci_do_parity_check
  426. *
  427. * performs the actual PCI parity check operation
  428. */
  429. void edac_pci_do_parity_check(void)
  430. {
  431. unsigned long flags;
  432. int before_count;
  433. debugf3("%s()\n", __func__);
  434. if (!check_pci_errors)
  435. return;
  436. before_count = atomic_read(&pci_parity_count);
  437. /* scan all PCI devices looking for a Parity Error on devices and
  438. * bridges
  439. */
  440. local_irq_save(flags);
  441. edac_pci_dev_parity_iterator(edac_pci_dev_parity_test);
  442. local_irq_restore(flags);
  443. /* Only if operator has selected panic on PCI Error */
  444. if (edac_pci_get_panic_on_pe()) {
  445. /* If the count is different 'after' from 'before' */
  446. if (before_count != atomic_read(&pci_parity_count))
  447. panic("EDAC: PCI Parity Error");
  448. }
  449. }
  450. void edac_pci_clear_parity_errors(void)
  451. {
  452. /* Clear any PCI bus parity errors that devices initially have logged
  453. * in their registers.
  454. */
  455. edac_pci_dev_parity_iterator(edac_pci_dev_parity_clear);
  456. }
  457. void edac_pci_handle_pe(struct edac_pci_ctl_info *pci, const char *msg)
  458. {
  459. /* global PE counter incremented by edac_pci_do_parity_check() */
  460. atomic_inc(&pci->counters.pe_count);
  461. if (edac_pci_get_log_pe())
  462. edac_pci_printk(pci, KERN_WARNING,
  463. "Parity Error ctl: %s %d: %s\n",
  464. pci->ctl_name, pci->pci_idx, msg);
  465. /*
  466. * poke all PCI devices and see which one is the troublemaker
  467. * panic() is called if set
  468. */
  469. edac_pci_do_parity_check();
  470. }
  471. EXPORT_SYMBOL_GPL(edac_pci_handle_pe);
  472. void edac_pci_handle_npe(struct edac_pci_ctl_info *pci, const char *msg)
  473. {
  474. /* global NPE counter incremented by edac_pci_do_parity_check() */
  475. atomic_inc(&pci->counters.npe_count);
  476. if (edac_pci_get_log_npe())
  477. edac_pci_printk(pci, KERN_WARNING,
  478. "Non-Parity Error ctl: %s %d: %s\n",
  479. pci->ctl_name, pci->pci_idx, msg);
  480. /*
  481. * poke all PCI devices and see which one is the troublemaker
  482. * panic() is called if set
  483. */
  484. edac_pci_do_parity_check();
  485. }
  486. EXPORT_SYMBOL_GPL(edac_pci_handle_npe);
  487. /*
  488. * Define the PCI parameter to the module
  489. */
  490. module_param(check_pci_errors, int, 0644);
  491. MODULE_PARM_DESC(check_pci_errors,
  492. "Check for PCI bus parity errors: 0=off 1=on");
  493. module_param(edac_pci_panic_on_pe, int, 0644);
  494. MODULE_PARM_DESC(edac_pci_panic_on_pe,
  495. "Panic on PCI Bus Parity error: 0=off 1=on");
  496. #endif /* CONFIG_PCI */