edac_device.c 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644
  1. /*
  2. * edac_device.c
  3. * (C) 2007 www.douglaskthompson.com
  4. *
  5. * This file may be distributed under the terms of the
  6. * GNU General Public License.
  7. *
  8. * Written by Doug Thompson <norsk5@xmission.com>
  9. *
  10. * edac_device API implementation
  11. * 19 Jan 2007
  12. */
  13. #include <linux/module.h>
  14. #include <linux/types.h>
  15. #include <linux/smp.h>
  16. #include <linux/init.h>
  17. #include <linux/sysctl.h>
  18. #include <linux/highmem.h>
  19. #include <linux/timer.h>
  20. #include <linux/slab.h>
  21. #include <linux/jiffies.h>
  22. #include <linux/spinlock.h>
  23. #include <linux/list.h>
  24. #include <linux/sysdev.h>
  25. #include <linux/ctype.h>
  26. #include <linux/workqueue.h>
  27. #include <asm/uaccess.h>
  28. #include <asm/page.h>
  29. #include "edac_core.h"
  30. #include "edac_module.h"
  31. /* lock to memory controller's control array 'edac_device_list' */
  32. static DECLARE_MUTEX(device_ctls_mutex);
  33. static struct list_head edac_device_list = LIST_HEAD_INIT(edac_device_list);
  34. #ifdef CONFIG_EDAC_DEBUG
  35. static void edac_device_dump_device(struct edac_device_ctl_info *edac_dev)
  36. {
  37. debugf3("\tedac_dev = %p dev_idx=%d \n", edac_dev, edac_dev->dev_idx);
  38. debugf4("\tedac_dev->edac_check = %p\n", edac_dev->edac_check);
  39. debugf3("\tdev = %p\n", edac_dev->dev);
  40. debugf3("\tmod_name:ctl_name = %s:%s\n",
  41. edac_dev->mod_name, edac_dev->ctl_name);
  42. debugf3("\tpvt_info = %p\n\n", edac_dev->pvt_info);
  43. }
  44. #endif /* CONFIG_EDAC_DEBUG */
  45. /*
  46. * edac_device_alloc_ctl_info()
  47. * Allocate a new edac device control info structure
  48. *
  49. * The control structure is allocated in complete chunk
  50. * from the OS. It is in turn sub allocated to the
  51. * various objects that compose the struture
  52. *
  53. * The structure has a 'nr_instance' array within itself.
  54. * Each instance represents a major component
  55. * Example: L1 cache and L2 cache are 2 instance components
  56. *
  57. * Within each instance is an array of 'nr_blocks' blockoffsets
  58. */
  59. struct edac_device_ctl_info *edac_device_alloc_ctl_info(
  60. unsigned sz_private,
  61. char *edac_device_name, unsigned nr_instances,
  62. char *edac_block_name, unsigned nr_blocks,
  63. unsigned offset_value, /* zero, 1, or other based offset */
  64. struct edac_attrib_spec *attrib_spec, unsigned nr_attribs)
  65. {
  66. struct edac_device_ctl_info *dev_ctl;
  67. struct edac_device_instance *dev_inst, *inst;
  68. struct edac_device_block *dev_blk, *blk_p, *blk;
  69. struct edac_attrib *dev_attrib, *attrib_p, *attrib;
  70. unsigned total_size;
  71. unsigned count;
  72. unsigned instance, block, attr;
  73. void *pvt;
  74. debugf1("%s() instances=%d blocks=%d\n",
  75. __func__, nr_instances, nr_blocks);
  76. /* Figure out the offsets of the various items from the start of an
  77. * ctl_info structure. We want the alignment of each item
  78. * to be at least as stringent as what the compiler would
  79. * provide if we could simply hardcode everything into a single struct.
  80. */
  81. dev_ctl = (struct edac_device_ctl_info *)NULL;
  82. /* Calc the 'end' offset past the ctl_info structure */
  83. dev_inst = (struct edac_device_instance *)
  84. edac_align_ptr(&dev_ctl[1], sizeof(*dev_inst));
  85. /* Calc the 'end' offset past the instance array */
  86. dev_blk = (struct edac_device_block *)
  87. edac_align_ptr(&dev_inst[nr_instances], sizeof(*dev_blk));
  88. /* Calc the 'end' offset past the dev_blk array */
  89. count = nr_instances * nr_blocks;
  90. dev_attrib = (struct edac_attrib *)
  91. edac_align_ptr(&dev_blk[count], sizeof(*dev_attrib));
  92. /* Check for case of NO attributes specified */
  93. if (nr_attribs > 0)
  94. count *= nr_attribs;
  95. /* Calc the 'end' offset past the attributes array */
  96. pvt = edac_align_ptr(&dev_attrib[count], sz_private);
  97. total_size = ((unsigned long)pvt) + sz_private;
  98. /* Allocate the amount of memory for the set of control structures */
  99. dev_ctl = kzalloc(total_size, GFP_KERNEL);
  100. if (dev_ctl == NULL)
  101. return NULL;
  102. /* Adjust pointers so they point within the memory we just allocated
  103. * rather than an imaginary chunk of memory located at address 0.
  104. */
  105. dev_inst = (struct edac_device_instance *)
  106. (((char *)dev_ctl) + ((unsigned long)dev_inst));
  107. dev_blk = (struct edac_device_block *)
  108. (((char *)dev_ctl) + ((unsigned long)dev_blk));
  109. dev_attrib = (struct edac_attrib *)
  110. (((char *)dev_ctl) + ((unsigned long)dev_attrib));
  111. pvt = sz_private ? (((char *)dev_ctl) + ((unsigned long)pvt)) : NULL;
  112. dev_ctl->nr_instances = nr_instances;
  113. dev_ctl->instances = dev_inst;
  114. dev_ctl->pvt_info = pvt;
  115. /* Name of this edac device */
  116. snprintf(dev_ctl->name,sizeof(dev_ctl->name),"%s",edac_device_name);
  117. /* Initialize every Instance */
  118. for (instance = 0; instance < nr_instances; instance++) {
  119. inst = &dev_inst[instance];
  120. inst->ctl = dev_ctl;
  121. inst->nr_blocks = nr_blocks;
  122. blk_p = &dev_blk[instance * nr_blocks];
  123. inst->blocks = blk_p;
  124. /* name of this instance */
  125. snprintf(inst->name, sizeof(inst->name),
  126. "%s%u", edac_device_name, instance);
  127. /* Initialize every block in each instance */
  128. for (block = 0; block < nr_blocks; block++) {
  129. blk = &blk_p[block];
  130. blk->instance = inst;
  131. blk->nr_attribs = nr_attribs;
  132. attrib_p = &dev_attrib[block * nr_attribs];
  133. blk->attribs = attrib_p;
  134. snprintf(blk->name, sizeof(blk->name),
  135. "%s%d", edac_block_name, block+offset_value);
  136. debugf1("%s() instance=%d block=%d name=%s\n",
  137. __func__, instance, block, blk->name);
  138. if (attrib_spec != NULL) {
  139. /* when there is an attrib_spec passed int then
  140. * Initialize every attrib of each block
  141. */
  142. for (attr = 0; attr < nr_attribs; attr++) {
  143. attrib = &attrib_p[attr];
  144. attrib->block = blk;
  145. /* Link each attribute to the caller's
  146. * spec entry, for name and type
  147. */
  148. attrib->spec = &attrib_spec[attr];
  149. }
  150. }
  151. }
  152. }
  153. /* Mark this instance as merely ALLOCATED */
  154. dev_ctl->op_state = OP_ALLOC;
  155. return dev_ctl;
  156. }
  157. EXPORT_SYMBOL_GPL(edac_device_alloc_ctl_info);
  158. /*
  159. * edac_device_free_ctl_info()
  160. * frees the memory allocated by the edac_device_alloc_ctl_info()
  161. * function
  162. */
  163. void edac_device_free_ctl_info(struct edac_device_ctl_info *ctl_info)
  164. {
  165. kfree(ctl_info);
  166. }
  167. EXPORT_SYMBOL_GPL(edac_device_free_ctl_info);
  168. /*
  169. * find_edac_device_by_dev
  170. * scans the edac_device list for a specific 'struct device *'
  171. *
  172. * lock to be held prior to call: device_ctls_mutex
  173. *
  174. * Return:
  175. * pointer to control structure managing 'dev'
  176. * NULL if not found on list
  177. */
  178. static struct edac_device_ctl_info *find_edac_device_by_dev(struct device *dev)
  179. {
  180. struct edac_device_ctl_info *edac_dev;
  181. struct list_head *item;
  182. debugf3("%s()\n", __func__);
  183. list_for_each(item, &edac_device_list) {
  184. edac_dev = list_entry(item, struct edac_device_ctl_info, link);
  185. if (edac_dev->dev == dev)
  186. return edac_dev;
  187. }
  188. return NULL;
  189. }
  190. /*
  191. * add_edac_dev_to_global_list
  192. * Before calling this function, caller must
  193. * assign a unique value to edac_dev->dev_idx.
  194. *
  195. * lock to be held prior to call: device_ctls_mutex
  196. *
  197. * Return:
  198. * 0 on success
  199. * 1 on failure.
  200. */
  201. static int add_edac_dev_to_global_list(struct edac_device_ctl_info *edac_dev)
  202. {
  203. struct list_head *item, *insert_before;
  204. struct edac_device_ctl_info *rover;
  205. insert_before = &edac_device_list;
  206. /* Determine if already on the list */
  207. rover = find_edac_device_by_dev(edac_dev->dev);
  208. if (unlikely(rover != NULL))
  209. goto fail0;
  210. /* Insert in ascending order by 'dev_idx', so find position */
  211. list_for_each(item, &edac_device_list) {
  212. rover = list_entry(item, struct edac_device_ctl_info, link);
  213. if (rover->dev_idx >= edac_dev->dev_idx) {
  214. if (unlikely(rover->dev_idx == edac_dev->dev_idx))
  215. goto fail1;
  216. insert_before = item;
  217. break;
  218. }
  219. }
  220. list_add_tail_rcu(&edac_dev->link, insert_before);
  221. return 0;
  222. fail0:
  223. edac_printk(KERN_WARNING, EDAC_MC,
  224. "%s (%s) %s %s already assigned %d\n",
  225. rover->dev->bus_id, dev_name(rover),
  226. rover->mod_name, rover->ctl_name, rover->dev_idx);
  227. return 1;
  228. fail1:
  229. edac_printk(KERN_WARNING, EDAC_MC,
  230. "bug in low-level driver: attempt to assign\n"
  231. " duplicate dev_idx %d in %s()\n", rover->dev_idx,
  232. __func__);
  233. return 1;
  234. }
  235. /*
  236. * complete_edac_device_list_del
  237. *
  238. * callback function when reference count is zero
  239. */
  240. static void complete_edac_device_list_del(struct rcu_head *head)
  241. {
  242. struct edac_device_ctl_info *edac_dev;
  243. edac_dev = container_of(head, struct edac_device_ctl_info, rcu);
  244. INIT_LIST_HEAD(&edac_dev->link);
  245. complete(&edac_dev->complete);
  246. }
  247. /*
  248. * del_edac_device_from_global_list
  249. *
  250. * remove the RCU, setup for a callback call, then wait for the
  251. * callback to occur
  252. */
  253. static void del_edac_device_from_global_list(struct edac_device_ctl_info
  254. *edac_device)
  255. {
  256. list_del_rcu(&edac_device->link);
  257. init_completion(&edac_device->complete);
  258. call_rcu(&edac_device->rcu, complete_edac_device_list_del);
  259. wait_for_completion(&edac_device->complete);
  260. }
  261. /**
  262. * edac_device_find
  263. * Search for a edac_device_ctl_info structure whose index is 'idx'.
  264. *
  265. * If found, return a pointer to the structure.
  266. * Else return NULL.
  267. *
  268. * Caller must hold device_ctls_mutex.
  269. */
  270. struct edac_device_ctl_info *edac_device_find(int idx)
  271. {
  272. struct list_head *item;
  273. struct edac_device_ctl_info *edac_dev;
  274. /* Iterate over list, looking for exact match of ID */
  275. list_for_each(item, &edac_device_list) {
  276. edac_dev = list_entry(item, struct edac_device_ctl_info, link);
  277. if (edac_dev->dev_idx >= idx) {
  278. if (edac_dev->dev_idx == idx)
  279. return edac_dev;
  280. /* not on list, so terminate early */
  281. break;
  282. }
  283. }
  284. return NULL;
  285. }
  286. EXPORT_SYMBOL_GPL(edac_device_find);
  287. /*
  288. * edac_device_workq_function
  289. * performs the operation scheduled by a workq request
  290. */
  291. static void edac_device_workq_function(struct work_struct *work_req)
  292. {
  293. struct delayed_work *d_work = (struct delayed_work *)work_req;
  294. struct edac_device_ctl_info *edac_dev = to_edac_device_ctl_work(d_work);
  295. //debugf0("%s() here and running\n", __func__);
  296. down(&device_ctls_mutex);
  297. /* Only poll controllers that are running polled and have a check */
  298. if ((edac_dev->op_state == OP_RUNNING_POLL) &&
  299. (edac_dev->edac_check != NULL)) {
  300. edac_dev->edac_check(edac_dev);
  301. }
  302. up(&device_ctls_mutex);
  303. /* Reschedule */
  304. queue_delayed_work(edac_workqueue, &edac_dev->work, edac_dev->delay);
  305. }
  306. /*
  307. * edac_device_workq_setup
  308. * initialize a workq item for this edac_device instance
  309. * passing in the new delay period in msec
  310. */
  311. void edac_device_workq_setup(struct edac_device_ctl_info *edac_dev,
  312. unsigned msec)
  313. {
  314. debugf0("%s()\n", __func__);
  315. edac_dev->poll_msec = msec;
  316. edac_dev->delay = msecs_to_jiffies(msec); /* Calc delay jiffies */
  317. INIT_DELAYED_WORK(&edac_dev->work, edac_device_workq_function);
  318. queue_delayed_work(edac_workqueue, &edac_dev->work, edac_dev->delay);
  319. }
  320. /*
  321. * edac_device_workq_teardown
  322. * stop the workq processing on this edac_dev
  323. */
  324. void edac_device_workq_teardown(struct edac_device_ctl_info *edac_dev)
  325. {
  326. int status;
  327. status = cancel_delayed_work(&edac_dev->work);
  328. if (status == 0) {
  329. /* workq instance might be running, wait for it */
  330. flush_workqueue(edac_workqueue);
  331. }
  332. }
  333. /*
  334. * edac_device_reset_delay_period
  335. */
  336. void edac_device_reset_delay_period(struct edac_device_ctl_info *edac_dev,
  337. unsigned long value)
  338. {
  339. down(&device_ctls_mutex);
  340. /* cancel the current workq request */
  341. edac_device_workq_teardown(edac_dev);
  342. /* restart the workq request, with new delay value */
  343. edac_device_workq_setup(edac_dev, value);
  344. up(&device_ctls_mutex);
  345. }
  346. /**
  347. * edac_device_add_device: Insert the 'edac_dev' structure into the
  348. * edac_device global list and create sysfs entries associated with
  349. * edac_device structure.
  350. * @edac_device: pointer to the edac_device structure to be added to the list
  351. * @edac_idx: A unique numeric identifier to be assigned to the
  352. * 'edac_device' structure.
  353. *
  354. * Return:
  355. * 0 Success
  356. * !0 Failure
  357. */
  358. int edac_device_add_device(struct edac_device_ctl_info *edac_dev, int edac_idx)
  359. {
  360. debugf0("%s()\n", __func__);
  361. edac_dev->dev_idx = edac_idx;
  362. #ifdef CONFIG_EDAC_DEBUG
  363. if (edac_debug_level >= 3)
  364. edac_device_dump_device(edac_dev);
  365. #endif
  366. down(&device_ctls_mutex);
  367. if (add_edac_dev_to_global_list(edac_dev))
  368. goto fail0;
  369. /* set load time so that error rate can be tracked */
  370. edac_dev->start_time = jiffies;
  371. /* create this instance's sysfs entries */
  372. if (edac_device_create_sysfs(edac_dev)) {
  373. edac_device_printk(edac_dev, KERN_WARNING,
  374. "failed to create sysfs device\n");
  375. goto fail1;
  376. }
  377. /* If there IS a check routine, then we are running POLLED */
  378. if (edac_dev->edac_check != NULL) {
  379. /* This instance is NOW RUNNING */
  380. edac_dev->op_state = OP_RUNNING_POLL;
  381. /*
  382. * enable workq processing on this instance,
  383. * default = 1000 msec
  384. */
  385. edac_device_workq_setup(edac_dev, 1000);
  386. } else {
  387. edac_dev->op_state = OP_RUNNING_INTERRUPT;
  388. }
  389. /* Report action taken */
  390. edac_device_printk(edac_dev, KERN_INFO,
  391. "Giving out device to module '%s' controller "
  392. "'%s': DEV '%s' (%s)\n",
  393. edac_dev->mod_name,
  394. edac_dev->ctl_name,
  395. dev_name(edac_dev),
  396. edac_op_state_toString(edac_dev->op_state));
  397. up(&device_ctls_mutex);
  398. return 0;
  399. fail1:
  400. /* Some error, so remove the entry from the lsit */
  401. del_edac_device_from_global_list(edac_dev);
  402. fail0:
  403. up(&device_ctls_mutex);
  404. return 1;
  405. }
  406. EXPORT_SYMBOL_GPL(edac_device_add_device);
  407. /**
  408. * edac_device_del_device:
  409. * Remove sysfs entries for specified edac_device structure and
  410. * then remove edac_device structure from global list
  411. *
  412. * @pdev:
  413. * Pointer to 'struct device' representing edac_device
  414. * structure to remove.
  415. *
  416. * Return:
  417. * Pointer to removed edac_device structure,
  418. * OR NULL if device not found.
  419. */
  420. struct edac_device_ctl_info *edac_device_del_device(struct device *dev)
  421. {
  422. struct edac_device_ctl_info *edac_dev;
  423. debugf0("MC: %s()\n", __func__);
  424. down(&device_ctls_mutex);
  425. /* Find the structure on the list, if not there, then leave */
  426. edac_dev = find_edac_device_by_dev(dev);
  427. if (edac_dev == NULL) {
  428. up(&device_ctls_mutex);
  429. return NULL;
  430. }
  431. /* mark this instance as OFFLINE */
  432. edac_dev->op_state = OP_OFFLINE;
  433. /* clear workq processing on this instance */
  434. edac_device_workq_teardown(edac_dev);
  435. /* Tear down the sysfs entries for this instance */
  436. edac_device_remove_sysfs(edac_dev);
  437. /* deregister from global list */
  438. del_edac_device_from_global_list(edac_dev);
  439. up(&device_ctls_mutex);
  440. edac_printk(KERN_INFO, EDAC_MC,
  441. "Removed device %d for %s %s: DEV %s\n",
  442. edac_dev->dev_idx,
  443. edac_dev->mod_name, edac_dev->ctl_name, dev_name(edac_dev));
  444. return edac_dev;
  445. }
  446. EXPORT_SYMBOL_GPL(edac_device_del_device);
  447. static inline int edac_device_get_log_ce(struct edac_device_ctl_info *edac_dev)
  448. {
  449. return edac_dev->log_ce;
  450. }
  451. static inline int edac_device_get_log_ue(struct edac_device_ctl_info *edac_dev)
  452. {
  453. return edac_dev->log_ue;
  454. }
  455. static inline int edac_device_get_panic_on_ue(struct edac_device_ctl_info
  456. *edac_dev)
  457. {
  458. return edac_dev->panic_on_ue;
  459. }
  460. /*
  461. * edac_device_handle_ce
  462. * perform a common output and handling of an 'edac_dev' CE event
  463. */
  464. void edac_device_handle_ce(struct edac_device_ctl_info *edac_dev,
  465. int inst_nr, int block_nr, const char *msg)
  466. {
  467. struct edac_device_instance *instance;
  468. struct edac_device_block *block = NULL;
  469. if ((inst_nr >= edac_dev->nr_instances) || (inst_nr < 0)) {
  470. edac_device_printk(edac_dev, KERN_ERR,
  471. "INTERNAL ERROR: 'instance' out of range "
  472. "(%d >= %d)\n", inst_nr,
  473. edac_dev->nr_instances);
  474. return;
  475. }
  476. instance = edac_dev->instances + inst_nr;
  477. if ((block_nr >= instance->nr_blocks) || (block_nr < 0)) {
  478. edac_device_printk(edac_dev, KERN_ERR,
  479. "INTERNAL ERROR: instance %d 'block' "
  480. "out of range (%d >= %d)\n",
  481. inst_nr, block_nr,
  482. instance->nr_blocks);
  483. return;
  484. }
  485. if (instance->nr_blocks > 0) {
  486. block = instance->blocks + block_nr;
  487. block->counters.ce_count++;
  488. }
  489. /* Propogate the count up the 'totals' tree */
  490. instance->counters.ce_count++;
  491. edac_dev->counters.ce_count++;
  492. if (edac_device_get_log_ce(edac_dev))
  493. edac_device_printk(edac_dev, KERN_WARNING,
  494. "CE: %s instance: %s block: %s '%s'\n",
  495. edac_dev->ctl_name, instance->name,
  496. block ? block->name : "N/A", msg);
  497. }
  498. EXPORT_SYMBOL_GPL(edac_device_handle_ce);
  499. /*
  500. * edac_device_handle_ue
  501. * perform a common output and handling of an 'edac_dev' UE event
  502. */
  503. void edac_device_handle_ue(struct edac_device_ctl_info *edac_dev,
  504. int inst_nr, int block_nr, const char *msg)
  505. {
  506. struct edac_device_instance *instance;
  507. struct edac_device_block *block = NULL;
  508. if ((inst_nr >= edac_dev->nr_instances) || (inst_nr < 0)) {
  509. edac_device_printk(edac_dev, KERN_ERR,
  510. "INTERNAL ERROR: 'instance' out of range "
  511. "(%d >= %d)\n", inst_nr,
  512. edac_dev->nr_instances);
  513. return;
  514. }
  515. instance = edac_dev->instances + inst_nr;
  516. if ((block_nr >= instance->nr_blocks) || (block_nr < 0)) {
  517. edac_device_printk(edac_dev, KERN_ERR,
  518. "INTERNAL ERROR: instance %d 'block' "
  519. "out of range (%d >= %d)\n",
  520. inst_nr, block_nr,
  521. instance->nr_blocks);
  522. return;
  523. }
  524. if (instance->nr_blocks > 0) {
  525. block = instance->blocks + block_nr;
  526. block->counters.ue_count++;
  527. }
  528. /* Propogate the count up the 'totals' tree */
  529. instance->counters.ue_count++;
  530. edac_dev->counters.ue_count++;
  531. if (edac_device_get_log_ue(edac_dev))
  532. edac_device_printk(edac_dev, KERN_EMERG,
  533. "UE: %s instance: %s block: %s '%s'\n",
  534. edac_dev->ctl_name, instance->name,
  535. block ? block->name : "N/A", msg);
  536. if (edac_device_get_panic_on_ue(edac_dev))
  537. panic("EDAC %s: UE instance: %s block %s '%s'\n",
  538. edac_dev->ctl_name, instance->name,
  539. block ? block->name : "N/A", msg);
  540. }
  541. EXPORT_SYMBOL_GPL(edac_device_handle_ue);