edac_core.h 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529
  1. /*
  2. * Defines, structures, APIs for edac_core module
  3. *
  4. * (C) 2007 Linux Networx (http://lnxi.com)
  5. * This file may be distributed under the terms of the
  6. * GNU General Public License.
  7. *
  8. * Written by Thayne Harbaugh
  9. * Based on work by Dan Hollis <goemon at anime dot net> and others.
  10. * http://www.anime.net/~goemon/linux-ecc/
  11. *
  12. * NMI handling support added by
  13. * Dave Peterson <dsp@llnl.gov> <dave_peterson@pobox.com>
  14. *
  15. * Refactored for multi-source files:
  16. * Doug Thompson <norsk5@xmission.com>
  17. *
  18. */
  19. #ifndef _EDAC_CORE_H_
  20. #define _EDAC_CORE_H_
  21. #include <linux/kernel.h>
  22. #include <linux/types.h>
  23. #include <linux/module.h>
  24. #include <linux/spinlock.h>
  25. #include <linux/smp.h>
  26. #include <linux/pci.h>
  27. #include <linux/time.h>
  28. #include <linux/nmi.h>
  29. #include <linux/rcupdate.h>
  30. #include <linux/completion.h>
  31. #include <linux/kobject.h>
  32. #include <linux/platform_device.h>
  33. #include <linux/sysdev.h>
  34. #include <linux/workqueue.h>
  35. #include <linux/edac.h>
  36. #define EDAC_DEVICE_NAME_LEN 31
  37. #define EDAC_ATTRIB_VALUE_LEN 15
  38. #if PAGE_SHIFT < 20
  39. #define PAGES_TO_MiB(pages) ((pages) >> (20 - PAGE_SHIFT))
  40. #define MiB_TO_PAGES(mb) ((mb) << (20 - PAGE_SHIFT))
  41. #else /* PAGE_SHIFT > 20 */
  42. #define PAGES_TO_MiB(pages) ((pages) << (PAGE_SHIFT - 20))
  43. #define MiB_TO_PAGES(mb) ((mb) >> (PAGE_SHIFT - 20))
  44. #endif
  45. #define edac_printk(level, prefix, fmt, arg...) \
  46. printk(level "EDAC " prefix ": " fmt, ##arg)
  47. #define edac_mc_printk(mci, level, fmt, arg...) \
  48. printk(level "EDAC MC%d: " fmt, mci->mc_idx, ##arg)
  49. #define edac_mc_chipset_printk(mci, level, prefix, fmt, arg...) \
  50. printk(level "EDAC " prefix " MC%d: " fmt, mci->mc_idx, ##arg)
  51. #define edac_device_printk(ctl, level, fmt, arg...) \
  52. printk(level "EDAC DEVICE%d: " fmt, ctl->dev_idx, ##arg)
  53. #define edac_pci_printk(ctl, level, fmt, arg...) \
  54. printk(level "EDAC PCI%d: " fmt, ctl->pci_idx, ##arg)
  55. /* prefixes for edac_printk() and edac_mc_printk() */
  56. #define EDAC_MC "MC"
  57. #define EDAC_PCI "PCI"
  58. #define EDAC_DEBUG "DEBUG"
  59. extern const char *edac_mem_types[];
  60. #ifdef CONFIG_EDAC_DEBUG
  61. extern int edac_debug_level;
  62. #define edac_debug_printk(level, fmt, arg...) \
  63. do { \
  64. if (level <= edac_debug_level) \
  65. edac_printk(KERN_DEBUG, EDAC_DEBUG, \
  66. "%s: " fmt, __func__, ##arg); \
  67. } while (0)
  68. #define debugf0( ... ) edac_debug_printk(0, __VA_ARGS__ )
  69. #define debugf1( ... ) edac_debug_printk(1, __VA_ARGS__ )
  70. #define debugf2( ... ) edac_debug_printk(2, __VA_ARGS__ )
  71. #define debugf3( ... ) edac_debug_printk(3, __VA_ARGS__ )
  72. #define debugf4( ... ) edac_debug_printk(4, __VA_ARGS__ )
  73. #else /* !CONFIG_EDAC_DEBUG */
  74. #define debugf0( ... )
  75. #define debugf1( ... )
  76. #define debugf2( ... )
  77. #define debugf3( ... )
  78. #define debugf4( ... )
  79. #endif /* !CONFIG_EDAC_DEBUG */
  80. #define PCI_VEND_DEV(vend, dev) PCI_VENDOR_ID_ ## vend, \
  81. PCI_DEVICE_ID_ ## vend ## _ ## dev
  82. #define edac_dev_name(dev) (dev)->dev_name
  83. /*
  84. * The following are the structures to provide for a generic
  85. * or abstract 'edac_device'. This set of structures and the
  86. * code that implements the APIs for the same, provide for
  87. * registering EDAC type devices which are NOT standard memory.
  88. *
  89. * CPU caches (L1 and L2)
  90. * DMA engines
  91. * Core CPU swithces
  92. * Fabric switch units
  93. * PCIe interface controllers
  94. * other EDAC/ECC type devices that can be monitored for
  95. * errors, etc.
  96. *
  97. * It allows for a 2 level set of hiearchry. For example:
  98. *
  99. * cache could be composed of L1, L2 and L3 levels of cache.
  100. * Each CPU core would have its own L1 cache, while sharing
  101. * L2 and maybe L3 caches.
  102. *
  103. * View them arranged, via the sysfs presentation:
  104. * /sys/devices/system/edac/..
  105. *
  106. * mc/ <existing memory device directory>
  107. * cpu/cpu0/.. <L1 and L2 block directory>
  108. * /L1-cache/ce_count
  109. * /ue_count
  110. * /L2-cache/ce_count
  111. * /ue_count
  112. * cpu/cpu1/.. <L1 and L2 block directory>
  113. * /L1-cache/ce_count
  114. * /ue_count
  115. * /L2-cache/ce_count
  116. * /ue_count
  117. * ...
  118. *
  119. * the L1 and L2 directories would be "edac_device_block's"
  120. */
  121. struct edac_device_counter {
  122. u32 ue_count;
  123. u32 ce_count;
  124. };
  125. /* forward reference */
  126. struct edac_device_ctl_info;
  127. struct edac_device_block;
  128. /* edac_dev_sysfs_attribute structure
  129. * used for driver sysfs attributes in mem_ctl_info
  130. * for extra controls and attributes:
  131. * like high level error Injection controls
  132. */
  133. struct edac_dev_sysfs_attribute {
  134. struct attribute attr;
  135. ssize_t (*show)(struct edac_device_ctl_info *, char *);
  136. ssize_t (*store)(struct edac_device_ctl_info *, const char *, size_t);
  137. };
  138. /* edac_dev_sysfs_block_attribute structure
  139. *
  140. * used in leaf 'block' nodes for adding controls/attributes
  141. *
  142. * each block in each instance of the containing control structure
  143. * can have an array of the following. The show and store functions
  144. * will be filled in with the show/store function in the
  145. * low level driver.
  146. *
  147. * The 'value' field will be the actual value field used for
  148. * counting
  149. */
  150. struct edac_dev_sysfs_block_attribute {
  151. struct attribute attr;
  152. ssize_t (*show)(struct kobject *, struct attribute *, char *);
  153. ssize_t (*store)(struct kobject *, struct attribute *,
  154. const char *, size_t);
  155. struct edac_device_block *block;
  156. unsigned int value;
  157. };
  158. /* device block control structure */
  159. struct edac_device_block {
  160. struct edac_device_instance *instance; /* Up Pointer */
  161. char name[EDAC_DEVICE_NAME_LEN + 1];
  162. struct edac_device_counter counters; /* basic UE and CE counters */
  163. int nr_attribs; /* how many attributes */
  164. /* this block's attributes, could be NULL */
  165. struct edac_dev_sysfs_block_attribute *block_attributes;
  166. /* edac sysfs device control */
  167. struct kobject kobj;
  168. };
  169. /* device instance control structure */
  170. struct edac_device_instance {
  171. struct edac_device_ctl_info *ctl; /* Up pointer */
  172. char name[EDAC_DEVICE_NAME_LEN + 4];
  173. struct edac_device_counter counters; /* instance counters */
  174. u32 nr_blocks; /* how many blocks */
  175. struct edac_device_block *blocks; /* block array */
  176. /* edac sysfs device control */
  177. struct kobject kobj;
  178. };
  179. /*
  180. * Abstract edac_device control info structure
  181. *
  182. */
  183. struct edac_device_ctl_info {
  184. /* for global list of edac_device_ctl_info structs */
  185. struct list_head link;
  186. struct module *owner; /* Module owner of this control struct */
  187. int dev_idx;
  188. /* Per instance controls for this edac_device */
  189. int log_ue; /* boolean for logging UEs */
  190. int log_ce; /* boolean for logging CEs */
  191. int panic_on_ue; /* boolean for panic'ing on an UE */
  192. unsigned poll_msec; /* number of milliseconds to poll interval */
  193. unsigned long delay; /* number of jiffies for poll_msec */
  194. /* Additional top controller level attributes, but specified
  195. * by the low level driver.
  196. *
  197. * Set by the low level driver to provide attributes at the
  198. * controller level, same level as 'ue_count' and 'ce_count' above.
  199. * An array of structures, NULL terminated
  200. *
  201. * If attributes are desired, then set to array of attributes
  202. * If no attributes are desired, leave NULL
  203. */
  204. struct edac_dev_sysfs_attribute *sysfs_attributes;
  205. /* pointer to main 'edac' class in sysfs */
  206. struct sysdev_class *edac_class;
  207. /* the internal state of this controller instance */
  208. int op_state;
  209. /* work struct for this instance */
  210. struct delayed_work work;
  211. /* pointer to edac polling checking routine:
  212. * If NOT NULL: points to polling check routine
  213. * If NULL: Then assumes INTERRUPT operation, where
  214. * MC driver will receive events
  215. */
  216. void (*edac_check) (struct edac_device_ctl_info * edac_dev);
  217. struct device *dev; /* pointer to device structure */
  218. const char *mod_name; /* module name */
  219. const char *ctl_name; /* edac controller name */
  220. const char *dev_name; /* pci/platform/etc... name */
  221. void *pvt_info; /* pointer to 'private driver' info */
  222. unsigned long start_time; /* edac_device load start time (jiffies) */
  223. struct completion removal_complete;
  224. /* sysfs top name under 'edac' directory
  225. * and instance name:
  226. * cpu/cpu0/...
  227. * cpu/cpu1/...
  228. * cpu/cpu2/...
  229. * ...
  230. */
  231. char name[EDAC_DEVICE_NAME_LEN + 1];
  232. /* Number of instances supported on this control structure
  233. * and the array of those instances
  234. */
  235. u32 nr_instances;
  236. struct edac_device_instance *instances;
  237. /* Event counters for the this whole EDAC Device */
  238. struct edac_device_counter counters;
  239. /* edac sysfs device control for the 'name'
  240. * device this structure controls
  241. */
  242. struct kobject kobj;
  243. };
  244. /* To get from the instance's wq to the beginning of the ctl structure */
  245. #define to_edac_mem_ctl_work(w) \
  246. container_of(w, struct mem_ctl_info, work)
  247. #define to_edac_device_ctl_work(w) \
  248. container_of(w,struct edac_device_ctl_info,work)
  249. /*
  250. * The alloc() and free() functions for the 'edac_device' control info
  251. * structure. A MC driver will allocate one of these for each edac_device
  252. * it is going to control/register with the EDAC CORE.
  253. */
  254. extern struct edac_device_ctl_info *edac_device_alloc_ctl_info(
  255. unsigned sizeof_private,
  256. char *edac_device_name, unsigned nr_instances,
  257. char *edac_block_name, unsigned nr_blocks,
  258. unsigned offset_value,
  259. struct edac_dev_sysfs_block_attribute *block_attributes,
  260. unsigned nr_attribs,
  261. int device_index);
  262. /* The offset value can be:
  263. * -1 indicating no offset value
  264. * 0 for zero-based block numbers
  265. * 1 for 1-based block number
  266. * other for other-based block number
  267. */
  268. #define BLOCK_OFFSET_VALUE_OFF ((unsigned) -1)
  269. extern void edac_device_free_ctl_info(struct edac_device_ctl_info *ctl_info);
  270. #ifdef CONFIG_PCI
  271. struct edac_pci_counter {
  272. atomic_t pe_count;
  273. atomic_t npe_count;
  274. };
  275. /*
  276. * Abstract edac_pci control info structure
  277. *
  278. */
  279. struct edac_pci_ctl_info {
  280. /* for global list of edac_pci_ctl_info structs */
  281. struct list_head link;
  282. int pci_idx;
  283. struct sysdev_class *edac_class; /* pointer to class */
  284. /* the internal state of this controller instance */
  285. int op_state;
  286. /* work struct for this instance */
  287. struct delayed_work work;
  288. /* pointer to edac polling checking routine:
  289. * If NOT NULL: points to polling check routine
  290. * If NULL: Then assumes INTERRUPT operation, where
  291. * MC driver will receive events
  292. */
  293. void (*edac_check) (struct edac_pci_ctl_info * edac_dev);
  294. struct device *dev; /* pointer to device structure */
  295. const char *mod_name; /* module name */
  296. const char *ctl_name; /* edac controller name */
  297. const char *dev_name; /* pci/platform/etc... name */
  298. void *pvt_info; /* pointer to 'private driver' info */
  299. unsigned long start_time; /* edac_pci load start time (jiffies) */
  300. struct completion complete;
  301. /* sysfs top name under 'edac' directory
  302. * and instance name:
  303. * cpu/cpu0/...
  304. * cpu/cpu1/...
  305. * cpu/cpu2/...
  306. * ...
  307. */
  308. char name[EDAC_DEVICE_NAME_LEN + 1];
  309. /* Event counters for the this whole EDAC Device */
  310. struct edac_pci_counter counters;
  311. /* edac sysfs device control for the 'name'
  312. * device this structure controls
  313. */
  314. struct kobject kobj;
  315. struct completion kobj_complete;
  316. };
  317. #define to_edac_pci_ctl_work(w) \
  318. container_of(w, struct edac_pci_ctl_info,work)
  319. /* write all or some bits in a byte-register*/
  320. static inline void pci_write_bits8(struct pci_dev *pdev, int offset, u8 value,
  321. u8 mask)
  322. {
  323. if (mask != 0xff) {
  324. u8 buf;
  325. pci_read_config_byte(pdev, offset, &buf);
  326. value &= mask;
  327. buf &= ~mask;
  328. value |= buf;
  329. }
  330. pci_write_config_byte(pdev, offset, value);
  331. }
  332. /* write all or some bits in a word-register*/
  333. static inline void pci_write_bits16(struct pci_dev *pdev, int offset,
  334. u16 value, u16 mask)
  335. {
  336. if (mask != 0xffff) {
  337. u16 buf;
  338. pci_read_config_word(pdev, offset, &buf);
  339. value &= mask;
  340. buf &= ~mask;
  341. value |= buf;
  342. }
  343. pci_write_config_word(pdev, offset, value);
  344. }
  345. /*
  346. * pci_write_bits32
  347. *
  348. * edac local routine to do pci_write_config_dword, but adds
  349. * a mask parameter. If mask is all ones, ignore the mask.
  350. * Otherwise utilize the mask to isolate specified bits
  351. *
  352. * write all or some bits in a dword-register
  353. */
  354. static inline void pci_write_bits32(struct pci_dev *pdev, int offset,
  355. u32 value, u32 mask)
  356. {
  357. if (mask != 0xffffffff) {
  358. u32 buf;
  359. pci_read_config_dword(pdev, offset, &buf);
  360. value &= mask;
  361. buf &= ~mask;
  362. value |= buf;
  363. }
  364. pci_write_config_dword(pdev, offset, value);
  365. }
  366. #endif /* CONFIG_PCI */
  367. extern struct mem_ctl_info *edac_mc_alloc(unsigned sz_pvt, unsigned nr_csrows,
  368. unsigned nr_chans, int edac_index);
  369. extern int edac_mc_add_mc(struct mem_ctl_info *mci);
  370. extern void edac_mc_free(struct mem_ctl_info *mci);
  371. extern struct mem_ctl_info *edac_mc_find(int idx);
  372. extern struct mem_ctl_info *find_mci_by_dev(struct device *dev);
  373. extern struct mem_ctl_info *edac_mc_del_mc(struct device *dev);
  374. extern int edac_mc_find_csrow_by_page(struct mem_ctl_info *mci,
  375. unsigned long page);
  376. /*
  377. * The no info errors are used when error overflows are reported.
  378. * There are a limited number of error logging registers that can
  379. * be exausted. When all registers are exhausted and an additional
  380. * error occurs then an error overflow register records that an
  381. * error occurred and the type of error, but doesn't have any
  382. * further information. The ce/ue versions make for cleaner
  383. * reporting logic and function interface - reduces conditional
  384. * statement clutter and extra function arguments.
  385. */
  386. extern void edac_mc_handle_ce(struct mem_ctl_info *mci,
  387. unsigned long page_frame_number,
  388. unsigned long offset_in_page,
  389. unsigned long syndrome, int row, int channel,
  390. const char *msg);
  391. extern void edac_mc_handle_ce_no_info(struct mem_ctl_info *mci,
  392. const char *msg);
  393. extern void edac_mc_handle_ue(struct mem_ctl_info *mci,
  394. unsigned long page_frame_number,
  395. unsigned long offset_in_page, int row,
  396. const char *msg);
  397. extern void edac_mc_handle_ue_no_info(struct mem_ctl_info *mci,
  398. const char *msg);
  399. extern void edac_mc_handle_fbd_ue(struct mem_ctl_info *mci, unsigned int csrow,
  400. unsigned int channel0, unsigned int channel1,
  401. char *msg);
  402. extern void edac_mc_handle_fbd_ce(struct mem_ctl_info *mci, unsigned int csrow,
  403. unsigned int channel, char *msg);
  404. /*
  405. * edac_device APIs
  406. */
  407. extern int edac_device_add_device(struct edac_device_ctl_info *edac_dev);
  408. extern struct edac_device_ctl_info *edac_device_del_device(struct device *dev);
  409. extern void edac_device_handle_ue(struct edac_device_ctl_info *edac_dev,
  410. int inst_nr, int block_nr, const char *msg);
  411. extern void edac_device_handle_ce(struct edac_device_ctl_info *edac_dev,
  412. int inst_nr, int block_nr, const char *msg);
  413. extern int edac_device_alloc_index(void);
  414. /*
  415. * edac_pci APIs
  416. */
  417. extern struct edac_pci_ctl_info *edac_pci_alloc_ctl_info(unsigned int sz_pvt,
  418. const char *edac_pci_name);
  419. extern void edac_pci_free_ctl_info(struct edac_pci_ctl_info *pci);
  420. extern void edac_pci_reset_delay_period(struct edac_pci_ctl_info *pci,
  421. unsigned long value);
  422. extern int edac_pci_alloc_index(void);
  423. extern int edac_pci_add_device(struct edac_pci_ctl_info *pci, int edac_idx);
  424. extern struct edac_pci_ctl_info *edac_pci_del_device(struct device *dev);
  425. extern struct edac_pci_ctl_info *edac_pci_create_generic_ctl(
  426. struct device *dev,
  427. const char *mod_name);
  428. extern void edac_pci_release_generic_ctl(struct edac_pci_ctl_info *pci);
  429. extern int edac_pci_create_sysfs(struct edac_pci_ctl_info *pci);
  430. extern void edac_pci_remove_sysfs(struct edac_pci_ctl_info *pci);
  431. /*
  432. * edac misc APIs
  433. */
  434. extern char *edac_op_state_to_string(int op_state);
  435. #endif /* _EDAC_CORE_H_ */