i7300_edac.c 39 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395
  1. /*
  2. * Intel 7300 class Memory Controllers kernel module (Clarksboro)
  3. *
  4. * This file may be distributed under the terms of the
  5. * GNU General Public License version 2 only.
  6. *
  7. * Copyright (c) 2010 by:
  8. * Mauro Carvalho Chehab <mchehab@redhat.com>
  9. *
  10. * Red Hat Inc. http://www.redhat.com
  11. *
  12. * Intel 7300 Chipset Memory Controller Hub (MCH) - Datasheet
  13. * http://www.intel.com/Assets/PDF/datasheet/318082.pdf
  14. *
  15. * TODO: The chipset allow checking for PCI Express errors also. Currently,
  16. * the driver covers only memory error errors
  17. *
  18. * This driver uses "csrows" EDAC attribute to represent DIMM slot#
  19. */
  20. #include <linux/module.h>
  21. #include <linux/init.h>
  22. #include <linux/pci.h>
  23. #include <linux/pci_ids.h>
  24. #include <linux/slab.h>
  25. #include <linux/edac.h>
  26. #include <linux/mmzone.h>
  27. #include "edac_core.h"
  28. /*
  29. * Alter this version for the I7300 module when modifications are made
  30. */
  31. #define I7300_REVISION " Ver: 1.0.0 " __DATE__
  32. #define EDAC_MOD_STR "i7300_edac"
  33. #define i7300_printk(level, fmt, arg...) \
  34. edac_printk(level, "i7300", fmt, ##arg)
  35. #define i7300_mc_printk(mci, level, fmt, arg...) \
  36. edac_mc_chipset_printk(mci, level, "i7300", fmt, ##arg)
  37. /*
  38. * Memory topology is organized as:
  39. * Branch 0 - 2 channels: channels 0 and 1 (FDB0 PCI dev 21.0)
  40. * Branch 1 - 2 channels: channels 2 and 3 (FDB1 PCI dev 22.0)
  41. * Each channel can have to 8 DIMM sets (called as SLOTS)
  42. * Slots should generally be filled in pairs
  43. * Except on Single Channel mode of operation
  44. * just slot 0/channel0 filled on this mode
  45. * On normal operation mode, the two channels on a branch should be
  46. * filled together for the same SLOT#
  47. * When in mirrored mode, Branch 1 replicate memory at Branch 0, so, the four
  48. * channels on both branches should be filled
  49. */
  50. /* Limits for i7300 */
  51. #define MAX_SLOTS 8
  52. #define MAX_BRANCHES 2
  53. #define MAX_CH_PER_BRANCH 2
  54. #define MAX_CHANNELS (MAX_CH_PER_BRANCH * MAX_BRANCHES)
  55. #define MAX_MIR 3
  56. #define to_channel(ch, branch) ((((branch)) << 1) | (ch))
  57. #define to_csrow(slot, ch, branch) \
  58. (to_channel(ch, branch) | ((slot) << 2))
  59. /*
  60. * I7300 devices
  61. * All 3 functions of Device 16 (0,1,2) share the SAME DID and
  62. * uses PCI_DEVICE_ID_INTEL_I7300_MCH_ERR for device 16 (0,1,2),
  63. * PCI_DEVICE_ID_INTEL_I7300_MCH_FB0 and PCI_DEVICE_ID_INTEL_I7300_MCH_FB1
  64. * for device 21 (0,1).
  65. */
  66. /****************************************************
  67. * i7300 Register definitions for memory enumberation
  68. ****************************************************/
  69. /*
  70. * Device 16,
  71. * Function 0: System Address (not documented)
  72. * Function 1: Memory Branch Map, Control, Errors Register
  73. */
  74. /* OFFSETS for Function 0 */
  75. #define AMBASE 0x48 /* AMB Mem Mapped Reg Region Base */
  76. #define MAXCH 0x56 /* Max Channel Number */
  77. #define MAXDIMMPERCH 0x57 /* Max DIMM PER Channel Number */
  78. /* OFFSETS for Function 1 */
  79. #define MC_SETTINGS 0x40
  80. #define TOLM 0x6C
  81. #define REDMEMB 0x7C
  82. #define MIR0 0x80
  83. #define MIR1 0x84
  84. #define MIR2 0x88
  85. /*
  86. * Note: Other Intel EDAC drivers use AMBPRESENT to identify if the available
  87. * memory. From datasheet item 7.3.1 (FB-DIMM technology & organization), it
  88. * seems that we cannot use this information directly for the same usage.
  89. * Each memory slot may have up to 2 AMB interfaces, one for income and another
  90. * for outcome interface to the next slot.
  91. * For now, the driver just stores the AMB present registers, but rely only at
  92. * the MTR info to detect memory.
  93. * Datasheet is also not clear about how to map each AMBPRESENT registers to
  94. * one of the 4 available channels.
  95. */
  96. #define AMBPRESENT_0 0x64
  97. #define AMBPRESENT_1 0x66
  98. const static u16 mtr_regs [MAX_SLOTS] = {
  99. 0x80, 0x84, 0x88, 0x8c,
  100. 0x82, 0x86, 0x8a, 0x8e
  101. };
  102. /* Defines to extract the vaious fields from the
  103. * MTRx - Memory Technology Registers
  104. */
  105. #define MTR_DIMMS_PRESENT(mtr) ((mtr) & (1 << 8))
  106. #define MTR_DIMMS_ETHROTTLE(mtr) ((mtr) & (1 << 7))
  107. #define MTR_DRAM_WIDTH(mtr) (((mtr) & (1 << 6)) ? 8 : 4)
  108. #define MTR_DRAM_BANKS(mtr) (((mtr) & (1 << 5)) ? 8 : 4)
  109. #define MTR_DIMM_RANKS(mtr) (((mtr) & (1 << 4)) ? 1 : 0)
  110. #define MTR_DIMM_ROWS(mtr) (((mtr) >> 2) & 0x3)
  111. #define MTR_DRAM_BANKS_ADDR_BITS 2
  112. #define MTR_DIMM_ROWS_ADDR_BITS(mtr) (MTR_DIMM_ROWS(mtr) + 13)
  113. #define MTR_DIMM_COLS(mtr) ((mtr) & 0x3)
  114. #define MTR_DIMM_COLS_ADDR_BITS(mtr) (MTR_DIMM_COLS(mtr) + 10)
  115. #ifdef CONFIG_EDAC_DEBUG
  116. /* MTR NUMROW */
  117. static const char *numrow_toString[] = {
  118. "8,192 - 13 rows",
  119. "16,384 - 14 rows",
  120. "32,768 - 15 rows",
  121. "65,536 - 16 rows"
  122. };
  123. /* MTR NUMCOL */
  124. static const char *numcol_toString[] = {
  125. "1,024 - 10 columns",
  126. "2,048 - 11 columns",
  127. "4,096 - 12 columns",
  128. "reserved"
  129. };
  130. #endif
  131. /************************************************
  132. * i7300 Register definitions for error detection
  133. ************************************************/
  134. /*
  135. * Device 16.2: Global Error Registers
  136. */
  137. #define FERR_GLOBAL_LO 0x40
  138. static const char *ferr_global_name[] = {
  139. [31] = "Internal MCH Fatal Error",
  140. [30] = "Intel QuickData Technology Device Fatal Error",
  141. [29] = "FSB1 Fatal Error",
  142. [28] = "FSB0 Fatal Error",
  143. [27] = "FBD Channel 3 Fatal Error",
  144. [26] = "FBD Channel 2 Fatal Error",
  145. [25] = "FBD Channel 1 Fatal Error",
  146. [24] = "FBD Channel 0 Fatal Error",
  147. [23] = "PCI Express Device 7Fatal Error",
  148. [22] = "PCI Express Device 6 Fatal Error",
  149. [21] = "PCI Express Device 5 Fatal Error",
  150. [20] = "PCI Express Device 4 Fatal Error",
  151. [19] = "PCI Express Device 3 Fatal Error",
  152. [18] = "PCI Express Device 2 Fatal Error",
  153. [17] = "PCI Express Device 1 Fatal Error",
  154. [16] = "ESI Fatal Error",
  155. [15] = "Internal MCH Non-Fatal Error",
  156. [14] = "Intel QuickData Technology Device Non Fatal Error",
  157. [13] = "FSB1 Non-Fatal Error",
  158. [12] = "FSB 0 Non-Fatal Error",
  159. [11] = "FBD Channel 3 Non-Fatal Error",
  160. [10] = "FBD Channel 2 Non-Fatal Error",
  161. [9] = "FBD Channel 1 Non-Fatal Error",
  162. [8] = "FBD Channel 0 Non-Fatal Error",
  163. [7] = "PCI Express Device 7 Non-Fatal Error",
  164. [6] = "PCI Express Device 6 Non-Fatal Error",
  165. [5] = "PCI Express Device 5 Non-Fatal Error",
  166. [4] = "PCI Express Device 4 Non-Fatal Error",
  167. [3] = "PCI Express Device 3 Non-Fatal Error",
  168. [2] = "PCI Express Device 2 Non-Fatal Error",
  169. [1] = "PCI Express Device 1 Non-Fatal Error",
  170. [0] = "ESI Non-Fatal Error",
  171. };
  172. #define NERR_GLOBAL 0x44
  173. static const char *nerr_global_name[] = {
  174. [31] = "Internal MCH Fatal Error",
  175. [30] = "Intel QuickData Technology Device Fatal Error",
  176. [29] = "FSB1 Fatal Error",
  177. [28] = "FSB0 Fatal Error",
  178. [27] = "FSB2 Fatal Error",
  179. [26] = "FSB3 Fatal Error",
  180. [25] = "Reserved",
  181. [24] = "FBD Channel 0,1,2 or 3 Fatal Error",
  182. [23] = "PCI Express Device 7 Fatal Error",
  183. [22] = "PCI Express Device 6 Fatal Error",
  184. [21] = "PCI Express Device 5 Fatal Error",
  185. [20] = "PCI Express Device 4 Fatal Error",
  186. [19] = "PCI Express Device 3 Fatal Error",
  187. [18] = "PCI Express Device 2 Fatal Error",
  188. [17] = "PCI Express Device 1 Fatal Error",
  189. [16] = "ESI Fatal Error",
  190. [15] = "Internal MCH Non-Fatal Error",
  191. [14] = "Intel QuickData Technology Device Non Fatal Error",
  192. [13] = "FSB1 Non-Fatal Error",
  193. [12] = "FSB0 Non-Fatal Error",
  194. [11] = "FSB2 Non-Fatal Error",
  195. [10] = "FSB3 Non-Fatal Error",
  196. [9] = "Reserved",
  197. [8] = "FBD Channel 0,1, 2 or 3 Non-Fatal Error",
  198. [7] = "PCI Express Device 7 Non-Fatal Error",
  199. [6] = "PCI Express Device 6 Non-Fatal Error",
  200. [5] = "PCI Express Device 5 Non-Fatal Error",
  201. [4] = "PCI Express Device 4 Non-Fatal Error",
  202. [3] = "PCI Express Device 3 Non-Fatal Error",
  203. [2] = "PCI Express Device 2 Non-Fatal Error",
  204. [1] = "PCI Express Device 1 Non-Fatal Error",
  205. [0] = "ESI Non-Fatal Error",
  206. };
  207. #if 0
  208. /*
  209. * Error indicator bits and masks
  210. * Error masks are according with Table 5-17 of i7300 datasheet
  211. */
  212. enum error_mask {
  213. EMASK_M1 = 1<<0, /* Memory Write error on non-redundant retry */
  214. EMASK_M2 = 1<<1, /* Memory or FB-DIMM configuration CRC read error */
  215. EMASK_M3 = 1<<2, /* Reserved */
  216. EMASK_M4 = 1<<3, /* Uncorrectable Data ECC on Replay */
  217. EMASK_M5 = 1<<4, /* Aliased Uncorrectable Non-Mirrored Demand Data ECC */
  218. EMASK_M6 = 1<<5, /* Unsupported on i7300 */
  219. EMASK_M7 = 1<<6, /* Aliased Uncorrectable Resilver- or Spare-Copy Data ECC */
  220. EMASK_M8 = 1<<7, /* Aliased Uncorrectable Patrol Data ECC */
  221. EMASK_M9 = 1<<8, /* Non-Aliased Uncorrectable Non-Mirrored Demand Data ECC */
  222. EMASK_M10 = 1<<9, /* Unsupported on i7300 */
  223. EMASK_M11 = 1<<10, /* Non-Aliased Uncorrectable Resilver- or Spare-Copy Data ECC */
  224. EMASK_M12 = 1<<11, /* Non-Aliased Uncorrectable Patrol Data ECC */
  225. EMASK_M13 = 1<<12, /* Memory Write error on first attempt */
  226. EMASK_M14 = 1<<13, /* FB-DIMM Configuration Write error on first attempt */
  227. EMASK_M15 = 1<<14, /* Memory or FB-DIMM configuration CRC read error */
  228. EMASK_M16 = 1<<15, /* Channel Failed-Over Occurred */
  229. EMASK_M17 = 1<<16, /* Correctable Non-Mirrored Demand Data ECC */
  230. EMASK_M18 = 1<<17, /* Unsupported on i7300 */
  231. EMASK_M19 = 1<<18, /* Correctable Resilver- or Spare-Copy Data ECC */
  232. EMASK_M20 = 1<<19, /* Correctable Patrol Data ECC */
  233. EMASK_M21 = 1<<20, /* FB-DIMM Northbound parity error on FB-DIMM Sync Status */
  234. EMASK_M22 = 1<<21, /* SPD protocol Error */
  235. EMASK_M23 = 1<<22, /* Non-Redundant Fast Reset Timeout */
  236. EMASK_M24 = 1<<23, /* Refresh error */
  237. EMASK_M25 = 1<<24, /* Memory Write error on redundant retry */
  238. EMASK_M26 = 1<<25, /* Redundant Fast Reset Timeout */
  239. EMASK_M27 = 1<<26, /* Correctable Counter Threshold Exceeded */
  240. EMASK_M28 = 1<<27, /* DIMM-Spare Copy Completed */
  241. EMASK_M29 = 1<<28, /* DIMM-Isolation Completed */
  242. };
  243. /*
  244. * Names to translate bit error into something useful
  245. */
  246. static const char *error_name[] = {
  247. [0] = "Memory Write error on non-redundant retry",
  248. [1] = "Memory or FB-DIMM configuration CRC read error",
  249. /* Reserved */
  250. [3] = "Uncorrectable Data ECC on Replay",
  251. [4] = "Aliased Uncorrectable Non-Mirrored Demand Data ECC",
  252. /* M6 Unsupported on i7300 */
  253. [6] = "Aliased Uncorrectable Resilver- or Spare-Copy Data ECC",
  254. [7] = "Aliased Uncorrectable Patrol Data ECC",
  255. [8] = "Non-Aliased Uncorrectable Non-Mirrored Demand Data ECC",
  256. /* M10 Unsupported on i7300 */
  257. [10] = "Non-Aliased Uncorrectable Resilver- or Spare-Copy Data ECC",
  258. [11] = "Non-Aliased Uncorrectable Patrol Data ECC",
  259. [12] = "Memory Write error on first attempt",
  260. [13] = "FB-DIMM Configuration Write error on first attempt",
  261. [14] = "Memory or FB-DIMM configuration CRC read error",
  262. [15] = "Channel Failed-Over Occurred",
  263. [16] = "Correctable Non-Mirrored Demand Data ECC",
  264. /* M18 Unsupported on i7300 */
  265. [18] = "Correctable Resilver- or Spare-Copy Data ECC",
  266. [19] = "Correctable Patrol Data ECC",
  267. [20] = "FB-DIMM Northbound parity error on FB-DIMM Sync Status",
  268. [21] = "SPD protocol Error",
  269. [22] = "Non-Redundant Fast Reset Timeout",
  270. [23] = "Refresh error",
  271. [24] = "Memory Write error on redundant retry",
  272. [25] = "Redundant Fast Reset Timeout",
  273. [26] = "Correctable Counter Threshold Exceeded",
  274. [27] = "DIMM-Spare Copy Completed",
  275. [28] = "DIMM-Isolation Completed",
  276. };
  277. /* Fatal errors */
  278. #define ERROR_FAT_MASK (EMASK_M1 | \
  279. EMASK_M2 | \
  280. EMASK_M23)
  281. /* Correctable errors */
  282. #define ERROR_NF_CORRECTABLE (EMASK_M27 | \
  283. EMASK_M20 | \
  284. EMASK_M19 | \
  285. EMASK_M18 | \
  286. EMASK_M17 | \
  287. EMASK_M16)
  288. #define ERROR_NF_DIMM_SPARE (EMASK_M29 | \
  289. EMASK_M28)
  290. #define ERROR_NF_SPD_PROTOCOL (EMASK_M22)
  291. #define ERROR_NF_NORTH_CRC (EMASK_M21)
  292. /* Recoverable errors */
  293. #define ERROR_NF_RECOVERABLE (EMASK_M26 | \
  294. EMASK_M25 | \
  295. EMASK_M24 | \
  296. EMASK_M15 | \
  297. EMASK_M14 | \
  298. EMASK_M13 | \
  299. EMASK_M12 | \
  300. EMASK_M11 | \
  301. EMASK_M9 | \
  302. EMASK_M8 | \
  303. EMASK_M7 | \
  304. EMASK_M5)
  305. /* uncorrectable errors */
  306. #define ERROR_NF_UNCORRECTABLE (EMASK_M4)
  307. /* mask to all non-fatal errors */
  308. #define ERROR_NF_MASK (ERROR_NF_CORRECTABLE | \
  309. ERROR_NF_UNCORRECTABLE | \
  310. ERROR_NF_RECOVERABLE | \
  311. ERROR_NF_DIMM_SPARE | \
  312. ERROR_NF_SPD_PROTOCOL | \
  313. ERROR_NF_NORTH_CRC)
  314. /*
  315. * Define error masks for the several registers
  316. */
  317. /* Enable all fatal and non fatal errors */
  318. #define ENABLE_EMASK_ALL (ERROR_FAT_MASK | ERROR_NF_MASK)
  319. /* mask for fatal error registers */
  320. #define FERR_FAT_MASK ERROR_FAT_MASK
  321. /* masks for non-fatal error register */
  322. static inline int to_nf_mask(unsigned int mask)
  323. {
  324. return (mask & EMASK_M29) | (mask >> 3);
  325. };
  326. static inline int from_nf_ferr(unsigned int mask)
  327. {
  328. return (mask & EMASK_M29) | /* Bit 28 */
  329. (mask & ((1 << 28) - 1) << 3); /* Bits 0 to 27 */
  330. };
  331. #define FERR_NF_MASK to_nf_mask(ERROR_NF_MASK)
  332. #define FERR_NF_CORRECTABLE to_nf_mask(ERROR_NF_CORRECTABLE)
  333. #define FERR_NF_DIMM_SPARE to_nf_mask(ERROR_NF_DIMM_SPARE)
  334. #define FERR_NF_SPD_PROTOCOL to_nf_mask(ERROR_NF_SPD_PROTOCOL)
  335. #define FERR_NF_NORTH_CRC to_nf_mask(ERROR_NF_NORTH_CRC)
  336. #define FERR_NF_RECOVERABLE to_nf_mask(ERROR_NF_RECOVERABLE)
  337. #define FERR_NF_UNCORRECTABLE to_nf_mask(ERROR_NF_UNCORRECTABLE)
  338. #endif
  339. /* Device name and register DID (Device ID) */
  340. struct i7300_dev_info {
  341. const char *ctl_name; /* name for this device */
  342. u16 fsb_mapping_errors; /* DID for the branchmap,control */
  343. };
  344. /* Table of devices attributes supported by this driver */
  345. static const struct i7300_dev_info i7300_devs[] = {
  346. {
  347. .ctl_name = "I7300",
  348. .fsb_mapping_errors = PCI_DEVICE_ID_INTEL_I7300_MCH_ERR,
  349. },
  350. };
  351. struct i7300_dimm_info {
  352. int megabytes; /* size, 0 means not present */
  353. };
  354. /* driver private data structure */
  355. struct i7300_pvt {
  356. struct pci_dev *system_address; /* 16.0 */
  357. struct pci_dev *branchmap_werrors; /* 16.1 */
  358. struct pci_dev *fsb_error_regs; /* 16.2 */
  359. struct pci_dev *branch_pci[MAX_BRANCHES]; /* 21.0 and 22.0 */
  360. u16 tolm; /* top of low memory */
  361. u64 ambase; /* AMB BAR */
  362. u32 mc_settings;
  363. u16 mir[MAX_MIR];
  364. u16 mtr[MAX_SLOTS][MAX_BRANCHES]; /* Memory Technlogy Reg */
  365. u16 ambpresent[MAX_CHANNELS]; /* AMB present regs */
  366. /* DIMM information matrix, allocating architecture maximums */
  367. struct i7300_dimm_info dimm_info[MAX_SLOTS][MAX_CHANNELS];
  368. };
  369. #if 0
  370. /* I7300 MCH error information retrieved from Hardware */
  371. struct i7300_error_info {
  372. /* These registers are always read from the MC */
  373. u32 ferr_fat_fbd; /* First Errors Fatal */
  374. u32 nerr_fat_fbd; /* Next Errors Fatal */
  375. u32 ferr_nf_fbd; /* First Errors Non-Fatal */
  376. u32 nerr_nf_fbd; /* Next Errors Non-Fatal */
  377. /* These registers are input ONLY if there was a Recoverable Error */
  378. u32 redmemb; /* Recoverable Mem Data Error log B */
  379. u16 recmema; /* Recoverable Mem Error log A */
  380. u32 recmemb; /* Recoverable Mem Error log B */
  381. /* These registers are input ONLY if there was a Non-Rec Error */
  382. u16 nrecmema; /* Non-Recoverable Mem log A */
  383. u16 nrecmemb; /* Non-Recoverable Mem log B */
  384. };
  385. #endif
  386. /* FIXME: Why do we need to have this static? */
  387. static struct edac_pci_ctl_info *i7300_pci;
  388. #if 0
  389. /* note that nrec_rdwr changed from NRECMEMA to NRECMEMB between the 5000 and
  390. 5400 better to use an inline function than a macro in this case */
  391. static inline int nrec_bank(struct i7300_error_info *info)
  392. {
  393. return ((info->nrecmema) >> 12) & 0x7;
  394. }
  395. static inline int nrec_rank(struct i7300_error_info *info)
  396. {
  397. return ((info->nrecmema) >> 8) & 0xf;
  398. }
  399. static inline int nrec_buf_id(struct i7300_error_info *info)
  400. {
  401. return ((info->nrecmema)) & 0xff;
  402. }
  403. static inline int nrec_rdwr(struct i7300_error_info *info)
  404. {
  405. return (info->nrecmemb) >> 31;
  406. }
  407. /* This applies to both NREC and REC string so it can be used with nrec_rdwr
  408. and rec_rdwr */
  409. static inline const char *rdwr_str(int rdwr)
  410. {
  411. return rdwr ? "Write" : "Read";
  412. }
  413. static inline int nrec_cas(struct i7300_error_info *info)
  414. {
  415. return ((info->nrecmemb) >> 16) & 0x1fff;
  416. }
  417. static inline int nrec_ras(struct i7300_error_info *info)
  418. {
  419. return (info->nrecmemb) & 0xffff;
  420. }
  421. static inline int rec_bank(struct i7300_error_info *info)
  422. {
  423. return ((info->recmema) >> 12) & 0x7;
  424. }
  425. static inline int rec_rank(struct i7300_error_info *info)
  426. {
  427. return ((info->recmema) >> 8) & 0xf;
  428. }
  429. static inline int rec_rdwr(struct i7300_error_info *info)
  430. {
  431. return (info->recmemb) >> 31;
  432. }
  433. static inline int rec_cas(struct i7300_error_info *info)
  434. {
  435. return ((info->recmemb) >> 16) & 0x1fff;
  436. }
  437. static inline int rec_ras(struct i7300_error_info *info)
  438. {
  439. return (info->recmemb) & 0xffff;
  440. }
  441. /*
  442. * i7300_get_error_info Retrieve the hardware error information from
  443. * the hardware and cache it in the 'info'
  444. * structure
  445. */
  446. static void i7300_get_error_info(struct mem_ctl_info *mci,
  447. struct i7300_error_info *info)
  448. {
  449. struct i7300_pvt *pvt;
  450. u32 value;
  451. pvt = mci->pvt_info;
  452. /* read in the 1st FATAL error register */
  453. pci_read_config_dword(pvt->branchmap_werrors, FERR_FAT_FBD, &value);
  454. /* Mask only the bits that the doc says are valid
  455. */
  456. value &= (FERR_FAT_FBDCHAN | FERR_FAT_MASK);
  457. /* If there is an error, then read in the
  458. NEXT FATAL error register and the Memory Error Log Register A
  459. */
  460. if (value & FERR_FAT_MASK) {
  461. info->ferr_fat_fbd = value;
  462. /* harvest the various error data we need */
  463. pci_read_config_dword(pvt->branchmap_werrors,
  464. NERR_FAT_FBD, &info->nerr_fat_fbd);
  465. pci_read_config_word(pvt->branchmap_werrors,
  466. NRECMEMA, &info->nrecmema);
  467. pci_read_config_word(pvt->branchmap_werrors,
  468. NRECMEMB, &info->nrecmemb);
  469. /* Clear the error bits, by writing them back */
  470. pci_write_config_dword(pvt->branchmap_werrors,
  471. FERR_FAT_FBD, value);
  472. } else {
  473. info->ferr_fat_fbd = 0;
  474. info->nerr_fat_fbd = 0;
  475. info->nrecmema = 0;
  476. info->nrecmemb = 0;
  477. }
  478. /* read in the 1st NON-FATAL error register */
  479. pci_read_config_dword(pvt->branchmap_werrors, FERR_NF_FBD, &value);
  480. /* If there is an error, then read in the 1st NON-FATAL error
  481. * register as well */
  482. if (value & FERR_NF_MASK) {
  483. info->ferr_nf_fbd = value;
  484. /* harvest the various error data we need */
  485. pci_read_config_dword(pvt->branchmap_werrors,
  486. NERR_NF_FBD, &info->nerr_nf_fbd);
  487. pci_read_config_word(pvt->branchmap_werrors,
  488. RECMEMA, &info->recmema);
  489. pci_read_config_dword(pvt->branchmap_werrors,
  490. RECMEMB, &info->recmemb);
  491. pci_read_config_dword(pvt->branchmap_werrors,
  492. REDMEMB, &info->redmemb);
  493. /* Clear the error bits, by writing them back */
  494. pci_write_config_dword(pvt->branchmap_werrors,
  495. FERR_NF_FBD, value);
  496. } else {
  497. info->ferr_nf_fbd = 0;
  498. info->nerr_nf_fbd = 0;
  499. info->recmema = 0;
  500. info->recmemb = 0;
  501. info->redmemb = 0;
  502. }
  503. }
  504. /*
  505. * i7300_proccess_non_recoverable_info(struct mem_ctl_info *mci,
  506. * struct i7300_error_info *info,
  507. * int handle_errors);
  508. *
  509. * handle the Intel FATAL and unrecoverable errors, if any
  510. */
  511. static void i7300_proccess_non_recoverable_info(struct mem_ctl_info *mci,
  512. struct i7300_error_info *info,
  513. unsigned long allErrors)
  514. {
  515. char msg[EDAC_MC_LABEL_LEN + 1 + 90 + 80];
  516. int branch;
  517. int channel;
  518. int bank;
  519. int buf_id;
  520. int rank;
  521. int rdwr;
  522. int ras, cas;
  523. int errnum;
  524. char *type = NULL;
  525. if (!allErrors)
  526. return; /* if no error, return now */
  527. if (allErrors & ERROR_FAT_MASK)
  528. type = "FATAL";
  529. else if (allErrors & FERR_NF_UNCORRECTABLE)
  530. type = "NON-FATAL uncorrected";
  531. else
  532. type = "NON-FATAL recoverable";
  533. /* ONLY ONE of the possible error bits will be set, as per the docs */
  534. branch = extract_fbdchan_indx(info->ferr_fat_fbd);
  535. channel = branch;
  536. /* Use the NON-Recoverable macros to extract data */
  537. bank = nrec_bank(info);
  538. rank = nrec_rank(info);
  539. buf_id = nrec_buf_id(info);
  540. rdwr = nrec_rdwr(info);
  541. ras = nrec_ras(info);
  542. cas = nrec_cas(info);
  543. debugf0("\t\tCSROW= %d Channels= %d,%d (Branch= %d "
  544. "DRAM Bank= %d Buffer ID = %d rdwr= %s ras= %d cas= %d)\n",
  545. rank, channel, channel + 1, branch >> 1, bank,
  546. buf_id, rdwr_str(rdwr), ras, cas);
  547. /* Only 1 bit will be on */
  548. errnum = find_first_bit(&allErrors, ARRAY_SIZE(error_name));
  549. /* Form out message */
  550. snprintf(msg, sizeof(msg),
  551. "%s (Branch=%d DRAM-Bank=%d Buffer ID = %d RDWR=%s "
  552. "RAS=%d CAS=%d %s Err=0x%lx (%s))",
  553. type, branch >> 1, bank, buf_id, rdwr_str(rdwr), ras, cas,
  554. type, allErrors, error_name[errnum]);
  555. /* Call the helper to output message */
  556. edac_mc_handle_fbd_ue(mci, rank, channel, channel + 1, msg);
  557. }
  558. /*
  559. * i7300_process_fatal_error_info(struct mem_ctl_info *mci,
  560. * struct i7300_error_info *info,
  561. * int handle_errors);
  562. *
  563. * handle the Intel NON-FATAL errors, if any
  564. */
  565. static void i7300_process_nonfatal_error_info(struct mem_ctl_info *mci,
  566. struct i7300_error_info *info)
  567. {
  568. char msg[EDAC_MC_LABEL_LEN + 1 + 90 + 80];
  569. unsigned long allErrors;
  570. int branch;
  571. int channel;
  572. int bank;
  573. int rank;
  574. int rdwr;
  575. int ras, cas;
  576. int errnum;
  577. /* mask off the Error bits that are possible */
  578. allErrors = from_nf_ferr(info->ferr_nf_fbd & FERR_NF_MASK);
  579. if (!allErrors)
  580. return; /* if no error, return now */
  581. /* ONLY ONE of the possible error bits will be set, as per the docs */
  582. if (allErrors & (ERROR_NF_UNCORRECTABLE | ERROR_NF_RECOVERABLE)) {
  583. i7300_proccess_non_recoverable_info(mci, info, allErrors);
  584. return;
  585. }
  586. /* Correctable errors */
  587. if (allErrors & ERROR_NF_CORRECTABLE) {
  588. debugf0("\tCorrected bits= 0x%lx\n", allErrors);
  589. branch = extract_fbdchan_indx(info->ferr_nf_fbd);
  590. channel = 0;
  591. if (REC_ECC_LOCATOR_ODD(info->redmemb))
  592. channel = 1;
  593. /* Convert channel to be based from zero, instead of
  594. * from branch base of 0 */
  595. channel += branch;
  596. bank = rec_bank(info);
  597. rank = rec_rank(info);
  598. rdwr = rec_rdwr(info);
  599. ras = rec_ras(info);
  600. cas = rec_cas(info);
  601. /* Only 1 bit will be on */
  602. errnum = find_first_bit(&allErrors, ARRAY_SIZE(error_name));
  603. debugf0("\t\tCSROW= %d Channel= %d (Branch %d "
  604. "DRAM Bank= %d rdwr= %s ras= %d cas= %d)\n",
  605. rank, channel, branch >> 1, bank,
  606. rdwr_str(rdwr), ras, cas);
  607. /* Form out message */
  608. snprintf(msg, sizeof(msg),
  609. "Corrected error (Branch=%d DRAM-Bank=%d RDWR=%s "
  610. "RAS=%d CAS=%d, CE Err=0x%lx (%s))",
  611. branch >> 1, bank, rdwr_str(rdwr), ras, cas,
  612. allErrors, error_name[errnum]);
  613. /* Call the helper to output message */
  614. edac_mc_handle_fbd_ce(mci, rank, channel, msg);
  615. return;
  616. }
  617. /* Miscelaneous errors */
  618. errnum = find_first_bit(&allErrors, ARRAY_SIZE(error_name));
  619. branch = extract_fbdchan_indx(info->ferr_nf_fbd);
  620. i7300_mc_printk(mci, KERN_EMERG,
  621. "Non-Fatal misc error (Branch=%d Err=%#lx (%s))",
  622. branch >> 1, allErrors, error_name[errnum]);
  623. }
  624. /*
  625. * i7300_process_error_info Process the error info that is
  626. * in the 'info' structure, previously retrieved from hardware
  627. */
  628. static void i7300_process_error_info(struct mem_ctl_info *mci,
  629. struct i7300_error_info *info)
  630. { u32 allErrors;
  631. /* First handle any fatal errors that occurred */
  632. allErrors = (info->ferr_fat_fbd & FERR_FAT_MASK);
  633. i7300_proccess_non_recoverable_info(mci, info, allErrors);
  634. /* now handle any non-fatal errors that occurred */
  635. i7300_process_nonfatal_error_info(mci, info);
  636. }
  637. /*
  638. * i7300_clear_error Retrieve any error from the hardware
  639. * but do NOT process that error.
  640. * Used for 'clearing' out of previous errors
  641. * Called by the Core module.
  642. */
  643. static void i7300_clear_error(struct mem_ctl_info *mci)
  644. {
  645. struct i7300_error_info info;
  646. i7300_get_error_info(mci, &info);
  647. }
  648. /*
  649. * i7300_check_error Retrieve and process errors reported by the
  650. * hardware. Called by the Core module.
  651. */
  652. static void i7300_check_error(struct mem_ctl_info *mci)
  653. {
  654. struct i7300_error_info info;
  655. debugf4("MC%d: " __FILE__ ": %s()\n", mci->mc_idx, __func__);
  656. i7300_get_error_info(mci, &info);
  657. i7300_process_error_info(mci, &info);
  658. }
  659. /*
  660. * i7300_enable_error_reporting
  661. * Turn on the memory reporting features of the hardware
  662. */
  663. static void i7300_enable_error_reporting(struct mem_ctl_info *mci)
  664. {
  665. struct i7300_pvt *pvt;
  666. u32 fbd_error_mask;
  667. pvt = mci->pvt_info;
  668. /* Read the FBD Error Mask Register */
  669. pci_read_config_dword(pvt->branchmap_werrors, EMASK_FBD,
  670. &fbd_error_mask);
  671. /* Enable with a '0' */
  672. fbd_error_mask &= ~(ENABLE_EMASK_ALL);
  673. pci_write_config_dword(pvt->branchmap_werrors, EMASK_FBD,
  674. fbd_error_mask);
  675. }
  676. #endif
  677. /*
  678. * determine_mtr(pvt, csrow, channel)
  679. *
  680. * return the proper MTR register as determine by the csrow and desired channel
  681. */
  682. static int decode_mtr(struct i7300_pvt *pvt,
  683. int slot, int ch, int branch,
  684. struct i7300_dimm_info *dinfo,
  685. struct csrow_info *p_csrow)
  686. {
  687. int mtr, ans, addrBits, channel;
  688. channel = to_channel(ch, branch);
  689. mtr = pvt->mtr[slot][branch];
  690. ans = MTR_DIMMS_PRESENT(mtr) ? 1 : 0;
  691. debugf2("\tMTR%d CH%d: DIMMs are %s (mtr)\n",
  692. slot, channel,
  693. ans ? "Present" : "NOT Present");
  694. /* Determine if there is a DIMM present in this DIMM slot */
  695. #if 0
  696. if (!amb_present || !ans)
  697. return 0;
  698. #else
  699. if (!ans)
  700. return 0;
  701. #endif
  702. /* Start with the number of bits for a Bank
  703. * on the DRAM */
  704. addrBits = MTR_DRAM_BANKS_ADDR_BITS;
  705. /* Add thenumber of ROW bits */
  706. addrBits += MTR_DIMM_ROWS_ADDR_BITS(mtr);
  707. /* add the number of COLUMN bits */
  708. addrBits += MTR_DIMM_COLS_ADDR_BITS(mtr);
  709. /* add the number of RANK bits */
  710. addrBits += MTR_DIMM_RANKS(mtr);
  711. addrBits += 6; /* add 64 bits per DIMM */
  712. addrBits -= 20; /* divide by 2^^20 */
  713. addrBits -= 3; /* 8 bits per bytes */
  714. dinfo->megabytes = 1 << addrBits;
  715. debugf2("\t\tWIDTH: x%d\n", MTR_DRAM_WIDTH(mtr));
  716. debugf2("\t\tELECTRICAL THROTTLING is %s\n",
  717. MTR_DIMMS_ETHROTTLE(mtr) ? "enabled" : "disabled");
  718. debugf2("\t\tNUMBANK: %d bank(s)\n", MTR_DRAM_BANKS(mtr));
  719. debugf2("\t\tNUMRANK: %s\n", MTR_DIMM_RANKS(mtr) ? "double" : "single");
  720. debugf2("\t\tNUMROW: %s\n", numrow_toString[MTR_DIMM_ROWS(mtr)]);
  721. debugf2("\t\tNUMCOL: %s\n", numcol_toString[MTR_DIMM_COLS(mtr)]);
  722. debugf2("\t\tSIZE: %d MB\n", dinfo->megabytes);
  723. p_csrow->grain = 8;
  724. p_csrow->nr_pages = dinfo->megabytes << 8;
  725. p_csrow->mtype = MEM_FB_DDR2;
  726. p_csrow->edac_mode = EDAC_S8ECD8ED;
  727. /* ask what device type on this row */
  728. if (MTR_DRAM_WIDTH(mtr))
  729. p_csrow->dtype = DEV_X8;
  730. else
  731. p_csrow->dtype = DEV_X4;
  732. return mtr;
  733. }
  734. /*
  735. * print_dimm_size
  736. *
  737. * also will output a DIMM matrix map, if debug is enabled, for viewing
  738. * how the DIMMs are populated
  739. */
  740. static void print_dimm_size(struct i7300_pvt *pvt)
  741. {
  742. struct i7300_dimm_info *dinfo;
  743. char *p, *mem_buffer;
  744. int space, n;
  745. int channel, slot;
  746. space = PAGE_SIZE;
  747. mem_buffer = p = kmalloc(space, GFP_KERNEL);
  748. if (p == NULL) {
  749. i7300_printk(KERN_ERR, "MC: %s:%s() kmalloc() failed\n",
  750. __FILE__, __func__);
  751. return;
  752. }
  753. n = snprintf(p, space, " ");
  754. p += n;
  755. space -= n;
  756. for (channel = 0; channel < MAX_CHANNELS; channel++) {
  757. n = snprintf(p, space, "channel %d | ", channel);
  758. p += n;
  759. space -= n;
  760. }
  761. debugf2("%s\n", mem_buffer);
  762. p = mem_buffer;
  763. space = PAGE_SIZE;
  764. n = snprintf(p, space, "-------------------------------"
  765. "------------------------------");
  766. p += n;
  767. space -= n;
  768. debugf2("%s\n", mem_buffer);
  769. p = mem_buffer;
  770. space = PAGE_SIZE;
  771. for (slot = 0; slot < MAX_SLOTS; slot++) {
  772. n = snprintf(p, space, "csrow/SLOT %d ", slot);
  773. p += n;
  774. space -= n;
  775. for (channel = 0; channel < MAX_CHANNELS; channel++) {
  776. dinfo = &pvt->dimm_info[slot][channel];
  777. n = snprintf(p, space, "%4d MB | ", dinfo->megabytes);
  778. p += n;
  779. space -= n;
  780. }
  781. debugf2("%s\n", mem_buffer);
  782. p = mem_buffer;
  783. space = PAGE_SIZE;
  784. }
  785. n = snprintf(p, space, "-------------------------------"
  786. "------------------------------");
  787. p += n;
  788. space -= n;
  789. debugf2("%s\n", mem_buffer);
  790. p = mem_buffer;
  791. space = PAGE_SIZE;
  792. kfree(mem_buffer);
  793. }
  794. /*
  795. * i7300_init_csrows Initialize the 'csrows' table within
  796. * the mci control structure with the
  797. * addressing of memory.
  798. *
  799. * return:
  800. * 0 success
  801. * 1 no actual memory found on this MC
  802. */
  803. static int i7300_init_csrows(struct mem_ctl_info *mci)
  804. {
  805. struct i7300_pvt *pvt;
  806. struct i7300_dimm_info *dinfo;
  807. struct csrow_info *p_csrow;
  808. int empty;
  809. int mtr;
  810. int ch, branch, slot, channel;
  811. pvt = mci->pvt_info;
  812. empty = 1; /* Assume NO memory */
  813. debugf2("Memory Technology Registers:\n");
  814. /* Get the AMB present registers for the four channels */
  815. for (branch = 0; branch < MAX_BRANCHES; branch++) {
  816. /* Read and dump branch 0's MTRs */
  817. channel = to_channel(0, branch);
  818. pci_read_config_word(pvt->branch_pci[branch], AMBPRESENT_0,
  819. &pvt->ambpresent[channel]);
  820. debugf2("\t\tAMB-present CH%d = 0x%x:\n",
  821. channel, pvt->ambpresent[channel]);
  822. channel = to_channel(1, branch);
  823. pci_read_config_word(pvt->branch_pci[branch], AMBPRESENT_1,
  824. &pvt->ambpresent[channel]);
  825. debugf2("\t\tAMB-present CH%d = 0x%x:\n",
  826. channel, pvt->ambpresent[channel]);
  827. }
  828. /* Get the set of MTR[0-7] regs by each branch */
  829. for (slot = 0; slot < MAX_SLOTS; slot++) {
  830. int where = mtr_regs[slot];
  831. for (branch = 0; branch < MAX_BRANCHES; branch++) {
  832. pci_read_config_word(pvt->branch_pci[branch],
  833. where,
  834. &pvt->mtr[slot][branch]);
  835. for (ch = 0; ch < MAX_BRANCHES; ch++) {
  836. int channel = to_channel(ch, branch);
  837. dinfo = &pvt->dimm_info[slot][channel];
  838. p_csrow = &mci->csrows[slot];
  839. mtr = decode_mtr(pvt, slot, ch, branch,
  840. dinfo, p_csrow);
  841. /* if no DIMMS on this row, continue */
  842. if (!MTR_DIMMS_PRESENT(mtr))
  843. continue;
  844. p_csrow->csrow_idx = slot;
  845. /* FAKE OUT VALUES, FIXME */
  846. p_csrow->first_page = 0 + slot * 20;
  847. p_csrow->last_page = 9 + slot * 20;
  848. p_csrow->page_mask = 0xfff;
  849. empty = 0;
  850. }
  851. }
  852. }
  853. return empty;
  854. }
  855. static void decode_mir(int mir_no, u16 mir[MAX_MIR])
  856. {
  857. if (mir[mir_no] & 3)
  858. debugf2("MIR%d: limit= 0x%x Branch(es) that participate: %s %s\n",
  859. mir_no,
  860. (mir[mir_no] >> 4) & 0xfff,
  861. (mir[mir_no] & 1) ? "B0" : "",
  862. (mir[mir_no] & 2) ? "B1": "");
  863. }
  864. /*
  865. * i7300_get_mc_regs read in the necessary registers and
  866. * cache locally
  867. *
  868. * Fills in the private data members
  869. */
  870. static int i7300_get_mc_regs(struct mem_ctl_info *mci)
  871. {
  872. struct i7300_pvt *pvt;
  873. u32 actual_tolm;
  874. int i, rc;
  875. pvt = mci->pvt_info;
  876. pci_read_config_dword(pvt->system_address, AMBASE,
  877. (u32 *) &pvt->ambase);
  878. debugf2("AMBASE= 0x%lx\n", (long unsigned int)pvt->ambase);
  879. /* Get the Branch Map regs */
  880. pci_read_config_word(pvt->branchmap_werrors, TOLM, &pvt->tolm);
  881. pvt->tolm >>= 12;
  882. debugf2("TOLM (number of 256M regions) =%u (0x%x)\n", pvt->tolm,
  883. pvt->tolm);
  884. actual_tolm = (u32) ((1000l * pvt->tolm) >> (30 - 28));
  885. debugf2("Actual TOLM byte addr=%u.%03u GB (0x%x)\n",
  886. actual_tolm/1000, actual_tolm % 1000, pvt->tolm << 28);
  887. /* Get memory controller settings */
  888. pci_read_config_dword(pvt->branchmap_werrors, MC_SETTINGS,
  889. &pvt->mc_settings);
  890. debugf0("Memory controller operating on %s mode\n",
  891. pvt->mc_settings & (1 << 16)? "mirrored" : "non-mirrored");
  892. debugf0("Error detection is %s\n",
  893. pvt->mc_settings & (1 << 5)? "enabled" : "disabled");
  894. /* Get Memory Interleave Range registers */
  895. pci_read_config_word(pvt->branchmap_werrors, MIR0, &pvt->mir[0]);
  896. pci_read_config_word(pvt->branchmap_werrors, MIR1, &pvt->mir[1]);
  897. pci_read_config_word(pvt->branchmap_werrors, MIR2, &pvt->mir[2]);
  898. /* Decode the MIR regs */
  899. for (i = 0; i < MAX_MIR; i++)
  900. decode_mir(i, pvt->mir);
  901. rc = i7300_init_csrows(mci);
  902. if (rc < 0)
  903. return rc;
  904. /* Go and determine the size of each DIMM and place in an
  905. * orderly matrix */
  906. print_dimm_size(pvt);
  907. return 0;
  908. }
  909. /*
  910. * i7300_put_devices 'put' all the devices that we have
  911. * reserved via 'get'
  912. */
  913. static void i7300_put_devices(struct mem_ctl_info *mci)
  914. {
  915. struct i7300_pvt *pvt;
  916. int branch;
  917. pvt = mci->pvt_info;
  918. /* Decrement usage count for devices */
  919. for (branch = 0; branch < MAX_CH_PER_BRANCH; branch++)
  920. pci_dev_put(pvt->branch_pci[branch]);
  921. pci_dev_put(pvt->fsb_error_regs);
  922. pci_dev_put(pvt->branchmap_werrors);
  923. }
  924. /*
  925. * i7300_get_devices Find and perform 'get' operation on the MCH's
  926. * device/functions we want to reference for this driver
  927. *
  928. * Need to 'get' device 16 func 1 and func 2
  929. */
  930. static int i7300_get_devices(struct mem_ctl_info *mci, int dev_idx)
  931. {
  932. struct i7300_pvt *pvt;
  933. struct pci_dev *pdev;
  934. pvt = mci->pvt_info;
  935. /* Attempt to 'get' the MCH register we want */
  936. pdev = NULL;
  937. while (!pvt->branchmap_werrors || !pvt->fsb_error_regs) {
  938. pdev = pci_get_device(PCI_VENDOR_ID_INTEL,
  939. PCI_DEVICE_ID_INTEL_I7300_MCH_ERR, pdev);
  940. if (!pdev) {
  941. /* End of list, leave */
  942. i7300_printk(KERN_ERR,
  943. "'system address,Process Bus' "
  944. "device not found:"
  945. "vendor 0x%x device 0x%x ERR funcs "
  946. "(broken BIOS?)\n",
  947. PCI_VENDOR_ID_INTEL,
  948. PCI_DEVICE_ID_INTEL_I7300_MCH_ERR);
  949. goto error;
  950. }
  951. /* Store device 16 funcs 1 and 2 */
  952. switch (PCI_FUNC(pdev->devfn)) {
  953. case 1:
  954. pvt->branchmap_werrors = pdev;
  955. break;
  956. case 2:
  957. pvt->fsb_error_regs = pdev;
  958. break;
  959. }
  960. }
  961. debugf1("System Address, processor bus- PCI Bus ID: %s %x:%x\n",
  962. pci_name(pvt->system_address),
  963. pvt->system_address->vendor, pvt->system_address->device);
  964. debugf1("Branchmap, control and errors - PCI Bus ID: %s %x:%x\n",
  965. pci_name(pvt->branchmap_werrors),
  966. pvt->branchmap_werrors->vendor, pvt->branchmap_werrors->device);
  967. debugf1("FSB Error Regs - PCI Bus ID: %s %x:%x\n",
  968. pci_name(pvt->fsb_error_regs),
  969. pvt->fsb_error_regs->vendor, pvt->fsb_error_regs->device);
  970. pvt->branch_pci[0] = pci_get_device(PCI_VENDOR_ID_INTEL,
  971. PCI_DEVICE_ID_INTEL_I7300_MCH_FB0,
  972. NULL);
  973. if (!pvt->branch_pci[0]) {
  974. i7300_printk(KERN_ERR,
  975. "MC: 'BRANCH 0' device not found:"
  976. "vendor 0x%x device 0x%x Func 0 (broken BIOS?)\n",
  977. PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_I7300_MCH_FB0);
  978. goto error;
  979. }
  980. pvt->branch_pci[1] = pci_get_device(PCI_VENDOR_ID_INTEL,
  981. PCI_DEVICE_ID_INTEL_I7300_MCH_FB1,
  982. NULL);
  983. if (!pvt->branch_pci[1]) {
  984. i7300_printk(KERN_ERR,
  985. "MC: 'BRANCH 1' device not found:"
  986. "vendor 0x%x device 0x%x Func 0 "
  987. "(broken BIOS?)\n",
  988. PCI_VENDOR_ID_INTEL,
  989. PCI_DEVICE_ID_INTEL_I7300_MCH_FB1);
  990. goto error;
  991. }
  992. return 0;
  993. error:
  994. i7300_put_devices(mci);
  995. return -ENODEV;
  996. }
  997. /*
  998. * i7300_probe1 Probe for ONE instance of device to see if it is
  999. * present.
  1000. * return:
  1001. * 0 for FOUND a device
  1002. * < 0 for error code
  1003. */
  1004. static int i7300_probe1(struct pci_dev *pdev, int dev_idx)
  1005. {
  1006. struct mem_ctl_info *mci;
  1007. struct i7300_pvt *pvt;
  1008. int num_channels;
  1009. int num_dimms_per_channel;
  1010. int num_csrows;
  1011. if (dev_idx >= ARRAY_SIZE(i7300_devs))
  1012. return -EINVAL;
  1013. debugf0("MC: " __FILE__ ": %s(), pdev bus %u dev=0x%x fn=0x%x\n",
  1014. __func__,
  1015. pdev->bus->number,
  1016. PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn));
  1017. /* We only are looking for func 0 of the set */
  1018. if (PCI_FUNC(pdev->devfn) != 0)
  1019. return -ENODEV;
  1020. /* As we don't have a motherboard identification routine to determine
  1021. * actual number of slots/dimms per channel, we thus utilize the
  1022. * resource as specified by the chipset. Thus, we might have
  1023. * have more DIMMs per channel than actually on the mobo, but this
  1024. * allows the driver to support upto the chipset max, without
  1025. * some fancy mobo determination.
  1026. */
  1027. num_dimms_per_channel = MAX_SLOTS;
  1028. num_channels = MAX_CHANNELS;
  1029. num_csrows = MAX_SLOTS * MAX_CHANNELS;
  1030. debugf0("MC: %s(): Number of - Channels= %d DIMMS= %d CSROWS= %d\n",
  1031. __func__, num_channels, num_dimms_per_channel, num_csrows);
  1032. /* allocate a new MC control structure */
  1033. mci = edac_mc_alloc(sizeof(*pvt), num_csrows, num_channels, 0);
  1034. if (mci == NULL)
  1035. return -ENOMEM;
  1036. debugf0("MC: " __FILE__ ": %s(): mci = %p\n", __func__, mci);
  1037. mci->dev = &pdev->dev; /* record ptr to the generic device */
  1038. pvt = mci->pvt_info;
  1039. pvt->system_address = pdev; /* Record this device in our private */
  1040. /* 'get' the pci devices we want to reserve for our use */
  1041. if (i7300_get_devices(mci, dev_idx))
  1042. goto fail0;
  1043. mci->mc_idx = 0;
  1044. mci->mtype_cap = MEM_FLAG_FB_DDR2;
  1045. mci->edac_ctl_cap = EDAC_FLAG_NONE;
  1046. mci->edac_cap = EDAC_FLAG_NONE;
  1047. mci->mod_name = "i7300_edac.c";
  1048. mci->mod_ver = I7300_REVISION;
  1049. mci->ctl_name = i7300_devs[dev_idx].ctl_name;
  1050. mci->dev_name = pci_name(pdev);
  1051. mci->ctl_page_to_phys = NULL;
  1052. #if 0
  1053. /* Set the function pointer to an actual operation function */
  1054. mci->edac_check = i7300_check_error;
  1055. #endif
  1056. /* initialize the MC control structure 'csrows' table
  1057. * with the mapping and control information */
  1058. if (i7300_get_mc_regs(mci)) {
  1059. debugf0("MC: Setting mci->edac_cap to EDAC_FLAG_NONE\n"
  1060. " because i7300_init_csrows() returned nonzero "
  1061. "value\n");
  1062. mci->edac_cap = EDAC_FLAG_NONE; /* no csrows found */
  1063. } else {
  1064. #if 0
  1065. debugf1("MC: Enable error reporting now\n");
  1066. i7300_enable_error_reporting(mci);
  1067. #endif
  1068. }
  1069. /* add this new MC control structure to EDAC's list of MCs */
  1070. if (edac_mc_add_mc(mci)) {
  1071. debugf0("MC: " __FILE__
  1072. ": %s(): failed edac_mc_add_mc()\n", __func__);
  1073. /* FIXME: perhaps some code should go here that disables error
  1074. * reporting if we just enabled it
  1075. */
  1076. goto fail1;
  1077. }
  1078. #if 0
  1079. i7300_clear_error(mci);
  1080. #endif
  1081. /* allocating generic PCI control info */
  1082. i7300_pci = edac_pci_create_generic_ctl(&pdev->dev, EDAC_MOD_STR);
  1083. if (!i7300_pci) {
  1084. printk(KERN_WARNING
  1085. "%s(): Unable to create PCI control\n",
  1086. __func__);
  1087. printk(KERN_WARNING
  1088. "%s(): PCI error report via EDAC not setup\n",
  1089. __func__);
  1090. }
  1091. return 0;
  1092. /* Error exit unwinding stack */
  1093. fail1:
  1094. i7300_put_devices(mci);
  1095. fail0:
  1096. edac_mc_free(mci);
  1097. return -ENODEV;
  1098. }
  1099. /*
  1100. * i7300_init_one constructor for one instance of device
  1101. *
  1102. * returns:
  1103. * negative on error
  1104. * count (>= 0)
  1105. */
  1106. static int __devinit i7300_init_one(struct pci_dev *pdev,
  1107. const struct pci_device_id *id)
  1108. {
  1109. int rc;
  1110. debugf0("MC: " __FILE__ ": %s()\n", __func__);
  1111. /* wake up device */
  1112. rc = pci_enable_device(pdev);
  1113. if (rc == -EIO)
  1114. return rc;
  1115. /* now probe and enable the device */
  1116. return i7300_probe1(pdev, id->driver_data);
  1117. }
  1118. /*
  1119. * i7300_remove_one destructor for one instance of device
  1120. *
  1121. */
  1122. static void __devexit i7300_remove_one(struct pci_dev *pdev)
  1123. {
  1124. struct mem_ctl_info *mci;
  1125. debugf0(__FILE__ ": %s()\n", __func__);
  1126. if (i7300_pci)
  1127. edac_pci_release_generic_ctl(i7300_pci);
  1128. mci = edac_mc_del_mc(&pdev->dev);
  1129. if (!mci)
  1130. return;
  1131. /* retrieve references to resources, and free those resources */
  1132. i7300_put_devices(mci);
  1133. edac_mc_free(mci);
  1134. }
  1135. /*
  1136. * pci_device_id table for which devices we are looking for
  1137. *
  1138. * The "E500P" device is the first device supported.
  1139. */
  1140. static const struct pci_device_id i7300_pci_tbl[] __devinitdata = {
  1141. {PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_I7300_MCH_ERR)},
  1142. {0,} /* 0 terminated list. */
  1143. };
  1144. MODULE_DEVICE_TABLE(pci, i7300_pci_tbl);
  1145. /*
  1146. * i7300_driver pci_driver structure for this module
  1147. *
  1148. */
  1149. static struct pci_driver i7300_driver = {
  1150. .name = "i7300_edac",
  1151. .probe = i7300_init_one,
  1152. .remove = __devexit_p(i7300_remove_one),
  1153. .id_table = i7300_pci_tbl,
  1154. };
  1155. /*
  1156. * i7300_init Module entry function
  1157. * Try to initialize this module for its devices
  1158. */
  1159. static int __init i7300_init(void)
  1160. {
  1161. int pci_rc;
  1162. debugf2("MC: " __FILE__ ": %s()\n", __func__);
  1163. /* Ensure that the OPSTATE is set correctly for POLL or NMI */
  1164. opstate_init();
  1165. pci_rc = pci_register_driver(&i7300_driver);
  1166. return (pci_rc < 0) ? pci_rc : 0;
  1167. }
  1168. /*
  1169. * i7300_exit() Module exit function
  1170. * Unregister the driver
  1171. */
  1172. static void __exit i7300_exit(void)
  1173. {
  1174. debugf2("MC: " __FILE__ ": %s()\n", __func__);
  1175. pci_unregister_driver(&i7300_driver);
  1176. }
  1177. module_init(i7300_init);
  1178. module_exit(i7300_exit);
  1179. MODULE_LICENSE("GPL");
  1180. MODULE_AUTHOR("Mauro Carvalho Chehab <mchehab@redhat.com>");
  1181. MODULE_AUTHOR("Red Hat Inc. (http://www.redhat.com)");
  1182. MODULE_DESCRIPTION("MC Driver for Intel I7300 memory controllers - "
  1183. I7300_REVISION);
  1184. module_param(edac_op_state, int, 0444);
  1185. MODULE_PARM_DESC(edac_op_state, "EDAC Error Reporting state: 0=Poll,1=NMI");