eeh.c 42 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482
  1. /*
  2. * Copyright IBM Corporation 2001, 2005, 2006
  3. * Copyright Dave Engebretsen & Todd Inglett 2001
  4. * Copyright Linas Vepstas 2005, 2006
  5. * Copyright 2001-2012 IBM Corporation.
  6. *
  7. * This program is free software; you can redistribute it and/or modify
  8. * it under the terms of the GNU General Public License as published by
  9. * the Free Software Foundation; either version 2 of the License, or
  10. * (at your option) any later version.
  11. *
  12. * This program is distributed in the hope that it will be useful,
  13. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  14. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  15. * GNU General Public License for more details.
  16. *
  17. * You should have received a copy of the GNU General Public License
  18. * along with this program; if not, write to the Free Software
  19. * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
  20. *
  21. * Please address comments and feedback to Linas Vepstas <linas@austin.ibm.com>
  22. */
  23. #include <linux/delay.h>
  24. #include <linux/sched.h>
  25. #include <linux/init.h>
  26. #include <linux/list.h>
  27. #include <linux/pci.h>
  28. #include <linux/proc_fs.h>
  29. #include <linux/rbtree.h>
  30. #include <linux/seq_file.h>
  31. #include <linux/spinlock.h>
  32. #include <linux/export.h>
  33. #include <linux/of.h>
  34. #include <linux/atomic.h>
  35. #include <asm/eeh.h>
  36. #include <asm/eeh_event.h>
  37. #include <asm/io.h>
  38. #include <asm/machdep.h>
  39. #include <asm/ppc-pci.h>
  40. #include <asm/rtas.h>
  41. /** Overview:
  42. * EEH, or "Extended Error Handling" is a PCI bridge technology for
  43. * dealing with PCI bus errors that can't be dealt with within the
  44. * usual PCI framework, except by check-stopping the CPU. Systems
  45. * that are designed for high-availability/reliability cannot afford
  46. * to crash due to a "mere" PCI error, thus the need for EEH.
  47. * An EEH-capable bridge operates by converting a detected error
  48. * into a "slot freeze", taking the PCI adapter off-line, making
  49. * the slot behave, from the OS'es point of view, as if the slot
  50. * were "empty": all reads return 0xff's and all writes are silently
  51. * ignored. EEH slot isolation events can be triggered by parity
  52. * errors on the address or data busses (e.g. during posted writes),
  53. * which in turn might be caused by low voltage on the bus, dust,
  54. * vibration, humidity, radioactivity or plain-old failed hardware.
  55. *
  56. * Note, however, that one of the leading causes of EEH slot
  57. * freeze events are buggy device drivers, buggy device microcode,
  58. * or buggy device hardware. This is because any attempt by the
  59. * device to bus-master data to a memory address that is not
  60. * assigned to the device will trigger a slot freeze. (The idea
  61. * is to prevent devices-gone-wild from corrupting system memory).
  62. * Buggy hardware/drivers will have a miserable time co-existing
  63. * with EEH.
  64. *
  65. * Ideally, a PCI device driver, when suspecting that an isolation
  66. * event has occurred (e.g. by reading 0xff's), will then ask EEH
  67. * whether this is the case, and then take appropriate steps to
  68. * reset the PCI slot, the PCI device, and then resume operations.
  69. * However, until that day, the checking is done here, with the
  70. * eeh_check_failure() routine embedded in the MMIO macros. If
  71. * the slot is found to be isolated, an "EEH Event" is synthesized
  72. * and sent out for processing.
  73. */
  74. /* If a device driver keeps reading an MMIO register in an interrupt
  75. * handler after a slot isolation event, it might be broken.
  76. * This sets the threshold for how many read attempts we allow
  77. * before printing an error message.
  78. */
  79. #define EEH_MAX_FAILS 2100000
  80. /* Time to wait for a PCI slot to report status, in milliseconds */
  81. #define PCI_BUS_RESET_WAIT_MSEC (60*1000)
  82. /* RTAS tokens */
  83. static int ibm_set_eeh_option;
  84. static int ibm_set_slot_reset;
  85. static int ibm_read_slot_reset_state;
  86. static int ibm_read_slot_reset_state2;
  87. static int ibm_slot_error_detail;
  88. static int ibm_get_config_addr_info;
  89. static int ibm_get_config_addr_info2;
  90. static int ibm_configure_bridge;
  91. static int ibm_configure_pe;
  92. int eeh_subsystem_enabled;
  93. EXPORT_SYMBOL(eeh_subsystem_enabled);
  94. /* Lock to avoid races due to multiple reports of an error */
  95. static DEFINE_RAW_SPINLOCK(confirm_error_lock);
  96. /* Buffer for reporting slot-error-detail rtas calls. Its here
  97. * in BSS, and not dynamically alloced, so that it ends up in
  98. * RMO where RTAS can access it.
  99. */
  100. static unsigned char slot_errbuf[RTAS_ERROR_LOG_MAX];
  101. static DEFINE_SPINLOCK(slot_errbuf_lock);
  102. static int eeh_error_buf_size;
  103. /* Buffer for reporting pci register dumps. Its here in BSS, and
  104. * not dynamically alloced, so that it ends up in RMO where RTAS
  105. * can access it.
  106. */
  107. #define EEH_PCI_REGS_LOG_LEN 4096
  108. static unsigned char pci_regs_buf[EEH_PCI_REGS_LOG_LEN];
  109. /* System monitoring statistics */
  110. static unsigned long no_device;
  111. static unsigned long no_dn;
  112. static unsigned long no_cfg_addr;
  113. static unsigned long ignored_check;
  114. static unsigned long total_mmio_ffs;
  115. static unsigned long false_positives;
  116. static unsigned long slot_resets;
  117. #define IS_BRIDGE(class_code) (((class_code)<<16) == PCI_BASE_CLASS_BRIDGE)
  118. /**
  119. * rtas_slot_error_detail - Retrieve error log through RTAS call
  120. * @pdn: device node
  121. * @severity: temporary or permanent error log
  122. * @driver_log: driver log to be combined with the retrieved error log
  123. * @loglen: length of driver log
  124. *
  125. * This routine should be called to retrieve error log through the dedicated
  126. * RTAS call.
  127. */
  128. static void rtas_slot_error_detail(struct pci_dn *pdn, int severity,
  129. char *driver_log, size_t loglen)
  130. {
  131. int config_addr;
  132. unsigned long flags;
  133. int rc;
  134. /* Log the error with the rtas logger */
  135. spin_lock_irqsave(&slot_errbuf_lock, flags);
  136. memset(slot_errbuf, 0, eeh_error_buf_size);
  137. /* Use PE configuration address, if present */
  138. config_addr = pdn->eeh_config_addr;
  139. if (pdn->eeh_pe_config_addr)
  140. config_addr = pdn->eeh_pe_config_addr;
  141. rc = rtas_call(ibm_slot_error_detail,
  142. 8, 1, NULL, config_addr,
  143. BUID_HI(pdn->phb->buid),
  144. BUID_LO(pdn->phb->buid),
  145. virt_to_phys(driver_log), loglen,
  146. virt_to_phys(slot_errbuf),
  147. eeh_error_buf_size,
  148. severity);
  149. if (rc == 0)
  150. log_error(slot_errbuf, ERR_TYPE_RTAS_LOG, 0);
  151. spin_unlock_irqrestore(&slot_errbuf_lock, flags);
  152. }
  153. /**
  154. * gather_pci_data - Copy assorted PCI config space registers to buff
  155. * @pdn: device to report data for
  156. * @buf: point to buffer in which to log
  157. * @len: amount of room in buffer
  158. *
  159. * This routine captures assorted PCI configuration space data,
  160. * and puts them into a buffer for RTAS error logging.
  161. */
  162. static size_t gather_pci_data(struct pci_dn *pdn, char * buf, size_t len)
  163. {
  164. struct pci_dev *dev = pdn->pcidev;
  165. u32 cfg;
  166. int cap, i;
  167. int n = 0;
  168. n += scnprintf(buf+n, len-n, "%s\n", pdn->node->full_name);
  169. printk(KERN_WARNING "EEH: of node=%s\n", pdn->node->full_name);
  170. rtas_read_config(pdn, PCI_VENDOR_ID, 4, &cfg);
  171. n += scnprintf(buf+n, len-n, "dev/vend:%08x\n", cfg);
  172. printk(KERN_WARNING "EEH: PCI device/vendor: %08x\n", cfg);
  173. rtas_read_config(pdn, PCI_COMMAND, 4, &cfg);
  174. n += scnprintf(buf+n, len-n, "cmd/stat:%x\n", cfg);
  175. printk(KERN_WARNING "EEH: PCI cmd/status register: %08x\n", cfg);
  176. if (!dev) {
  177. printk(KERN_WARNING "EEH: no PCI device for this of node\n");
  178. return n;
  179. }
  180. /* Gather bridge-specific registers */
  181. if (dev->class >> 16 == PCI_BASE_CLASS_BRIDGE) {
  182. rtas_read_config(pdn, PCI_SEC_STATUS, 2, &cfg);
  183. n += scnprintf(buf+n, len-n, "sec stat:%x\n", cfg);
  184. printk(KERN_WARNING "EEH: Bridge secondary status: %04x\n", cfg);
  185. rtas_read_config(pdn, PCI_BRIDGE_CONTROL, 2, &cfg);
  186. n += scnprintf(buf+n, len-n, "brdg ctl:%x\n", cfg);
  187. printk(KERN_WARNING "EEH: Bridge control: %04x\n", cfg);
  188. }
  189. /* Dump out the PCI-X command and status regs */
  190. cap = pci_find_capability(dev, PCI_CAP_ID_PCIX);
  191. if (cap) {
  192. rtas_read_config(pdn, cap, 4, &cfg);
  193. n += scnprintf(buf+n, len-n, "pcix-cmd:%x\n", cfg);
  194. printk(KERN_WARNING "EEH: PCI-X cmd: %08x\n", cfg);
  195. rtas_read_config(pdn, cap+4, 4, &cfg);
  196. n += scnprintf(buf+n, len-n, "pcix-stat:%x\n", cfg);
  197. printk(KERN_WARNING "EEH: PCI-X status: %08x\n", cfg);
  198. }
  199. /* If PCI-E capable, dump PCI-E cap 10, and the AER */
  200. cap = pci_find_capability(dev, PCI_CAP_ID_EXP);
  201. if (cap) {
  202. n += scnprintf(buf+n, len-n, "pci-e cap10:\n");
  203. printk(KERN_WARNING
  204. "EEH: PCI-E capabilities and status follow:\n");
  205. for (i=0; i<=8; i++) {
  206. rtas_read_config(pdn, cap+4*i, 4, &cfg);
  207. n += scnprintf(buf+n, len-n, "%02x:%x\n", 4*i, cfg);
  208. printk(KERN_WARNING "EEH: PCI-E %02x: %08x\n", i, cfg);
  209. }
  210. cap = pci_find_ext_capability(dev, PCI_EXT_CAP_ID_ERR);
  211. if (cap) {
  212. n += scnprintf(buf+n, len-n, "pci-e AER:\n");
  213. printk(KERN_WARNING
  214. "EEH: PCI-E AER capability register set follows:\n");
  215. for (i=0; i<14; i++) {
  216. rtas_read_config(pdn, cap+4*i, 4, &cfg);
  217. n += scnprintf(buf+n, len-n, "%02x:%x\n", 4*i, cfg);
  218. printk(KERN_WARNING "EEH: PCI-E AER %02x: %08x\n", i, cfg);
  219. }
  220. }
  221. }
  222. /* Gather status on devices under the bridge */
  223. if (dev->class >> 16 == PCI_BASE_CLASS_BRIDGE) {
  224. struct device_node *dn;
  225. for_each_child_of_node(pdn->node, dn) {
  226. pdn = PCI_DN(dn);
  227. if (pdn)
  228. n += gather_pci_data(pdn, buf+n, len-n);
  229. }
  230. }
  231. return n;
  232. }
  233. /**
  234. * eeh_slot_error_detail - Generate combined log including driver log and error log
  235. * @pdn: device node
  236. * @severity: temporary or permanent error log
  237. *
  238. * This routine should be called to generate the combined log, which
  239. * is comprised of driver log and error log. The driver log is figured
  240. * out from the config space of the corresponding PCI device, while
  241. * the error log is fetched through platform dependent function call.
  242. */
  243. void eeh_slot_error_detail(struct pci_dn *pdn, int severity)
  244. {
  245. size_t loglen = 0;
  246. pci_regs_buf[0] = 0;
  247. rtas_pci_enable(pdn, EEH_THAW_MMIO);
  248. rtas_configure_bridge(pdn);
  249. eeh_restore_bars(pdn);
  250. loglen = gather_pci_data(pdn, pci_regs_buf, EEH_PCI_REGS_LOG_LEN);
  251. rtas_slot_error_detail(pdn, severity, pci_regs_buf, loglen);
  252. }
  253. /**
  254. * read_slot_reset_state - Read the reset state of a device node's slot
  255. * @dn: device node to read
  256. * @rets: array to return results in
  257. *
  258. * Read the reset state of a device node's slot through platform dependent
  259. * function call.
  260. */
  261. static int read_slot_reset_state(struct pci_dn *pdn, int rets[])
  262. {
  263. int token, outputs;
  264. int config_addr;
  265. if (ibm_read_slot_reset_state2 != RTAS_UNKNOWN_SERVICE) {
  266. token = ibm_read_slot_reset_state2;
  267. outputs = 4;
  268. } else {
  269. token = ibm_read_slot_reset_state;
  270. rets[2] = 0; /* fake PE Unavailable info */
  271. outputs = 3;
  272. }
  273. /* Use PE configuration address, if present */
  274. config_addr = pdn->eeh_config_addr;
  275. if (pdn->eeh_pe_config_addr)
  276. config_addr = pdn->eeh_pe_config_addr;
  277. return rtas_call(token, 3, outputs, rets, config_addr,
  278. BUID_HI(pdn->phb->buid), BUID_LO(pdn->phb->buid));
  279. }
  280. /**
  281. * eeh_wait_for_slot_status - Returns error status of slot
  282. * @pdn: pci device node
  283. * @max_wait_msecs: maximum number to millisecs to wait
  284. *
  285. * Return negative value if a permanent error, else return
  286. * Partition Endpoint (PE) status value.
  287. *
  288. * If @max_wait_msecs is positive, then this routine will
  289. * sleep until a valid status can be obtained, or until
  290. * the max allowed wait time is exceeded, in which case
  291. * a -2 is returned.
  292. */
  293. int
  294. eeh_wait_for_slot_status(struct pci_dn *pdn, int max_wait_msecs)
  295. {
  296. int rc;
  297. int rets[3];
  298. int mwait;
  299. while (1) {
  300. rc = read_slot_reset_state(pdn, rets);
  301. if (rc) return rc;
  302. if (rets[1] == 0) return -1; /* EEH is not supported */
  303. if (rets[0] != 5) return rets[0]; /* return actual status */
  304. if (rets[2] == 0) return -1; /* permanently unavailable */
  305. if (max_wait_msecs <= 0) break;
  306. mwait = rets[2];
  307. if (mwait <= 0) {
  308. printk(KERN_WARNING "EEH: Firmware returned bad wait value=%d\n",
  309. mwait);
  310. mwait = 1000;
  311. } else if (mwait > 300*1000) {
  312. printk(KERN_WARNING "EEH: Firmware is taking too long, time=%d\n",
  313. mwait);
  314. mwait = 300*1000;
  315. }
  316. max_wait_msecs -= mwait;
  317. msleep(mwait);
  318. }
  319. printk(KERN_WARNING "EEH: Timed out waiting for slot status\n");
  320. return -2;
  321. }
  322. /**
  323. * eeh_token_to_phys - Convert EEH address token to phys address
  324. * @token: I/O token, should be address in the form 0xA....
  325. *
  326. * This routine should be called to convert virtual I/O address
  327. * to physical one.
  328. */
  329. static inline unsigned long eeh_token_to_phys(unsigned long token)
  330. {
  331. pte_t *ptep;
  332. unsigned long pa;
  333. ptep = find_linux_pte(init_mm.pgd, token);
  334. if (!ptep)
  335. return token;
  336. pa = pte_pfn(*ptep) << PAGE_SHIFT;
  337. return pa | (token & (PAGE_SIZE-1));
  338. }
  339. /**
  340. * find_device_pe - Retrieve the PE for the given device
  341. * @dn: device node
  342. *
  343. * Return the PE under which this device lies
  344. */
  345. struct device_node * find_device_pe(struct device_node *dn)
  346. {
  347. while ((dn->parent) && PCI_DN(dn->parent) &&
  348. (PCI_DN(dn->parent)->eeh_mode & EEH_MODE_SUPPORTED)) {
  349. dn = dn->parent;
  350. }
  351. return dn;
  352. }
  353. /**
  354. * __eeh_mark_slot - Mark all child devices as failed
  355. * @parent: parent device
  356. * @mode_flag: failure flag
  357. *
  358. * Mark all devices that are children of this device as failed.
  359. * Mark the device driver too, so that it can see the failure
  360. * immediately; this is critical, since some drivers poll
  361. * status registers in interrupts ... If a driver is polling,
  362. * and the slot is frozen, then the driver can deadlock in
  363. * an interrupt context, which is bad.
  364. */
  365. static void __eeh_mark_slot(struct device_node *parent, int mode_flag)
  366. {
  367. struct device_node *dn;
  368. for_each_child_of_node(parent, dn) {
  369. if (PCI_DN(dn)) {
  370. /* Mark the pci device driver too */
  371. struct pci_dev *dev = PCI_DN(dn)->pcidev;
  372. PCI_DN(dn)->eeh_mode |= mode_flag;
  373. if (dev && dev->driver)
  374. dev->error_state = pci_channel_io_frozen;
  375. __eeh_mark_slot(dn, mode_flag);
  376. }
  377. }
  378. }
  379. /**
  380. * eeh_mark_slot - Mark the indicated device and its children as failed
  381. * @dn: parent device
  382. * @mode_flag: failure flag
  383. *
  384. * Mark the indicated device and its child devices as failed.
  385. * The device drivers are marked as failed as well.
  386. */
  387. void eeh_mark_slot(struct device_node *dn, int mode_flag)
  388. {
  389. struct pci_dev *dev;
  390. dn = find_device_pe(dn);
  391. /* Back up one, since config addrs might be shared */
  392. if (!pcibios_find_pci_bus(dn) && PCI_DN(dn->parent))
  393. dn = dn->parent;
  394. PCI_DN(dn)->eeh_mode |= mode_flag;
  395. /* Mark the pci device too */
  396. dev = PCI_DN(dn)->pcidev;
  397. if (dev)
  398. dev->error_state = pci_channel_io_frozen;
  399. __eeh_mark_slot(dn, mode_flag);
  400. }
  401. /**
  402. * __eeh_clear_slot - Clear failure flag for the child devices
  403. * @parent: parent device
  404. * @mode_flag: flag to be cleared
  405. *
  406. * Clear failure flag for the child devices.
  407. */
  408. static void __eeh_clear_slot(struct device_node *parent, int mode_flag)
  409. {
  410. struct device_node *dn;
  411. for_each_child_of_node(parent, dn) {
  412. if (PCI_DN(dn)) {
  413. PCI_DN(dn)->eeh_mode &= ~mode_flag;
  414. PCI_DN(dn)->eeh_check_count = 0;
  415. __eeh_clear_slot(dn, mode_flag);
  416. }
  417. }
  418. }
  419. /**
  420. * eeh_clear_slot - Clear failure flag for the indicated device and its children
  421. * @dn: parent device
  422. * @mode_flag: flag to be cleared
  423. *
  424. * Clear failure flag for the indicated device and its children.
  425. */
  426. void eeh_clear_slot(struct device_node *dn, int mode_flag)
  427. {
  428. unsigned long flags;
  429. raw_spin_lock_irqsave(&confirm_error_lock, flags);
  430. dn = find_device_pe(dn);
  431. /* Back up one, since config addrs might be shared */
  432. if (!pcibios_find_pci_bus(dn) && PCI_DN(dn->parent))
  433. dn = dn->parent;
  434. PCI_DN(dn)->eeh_mode &= ~mode_flag;
  435. PCI_DN(dn)->eeh_check_count = 0;
  436. __eeh_clear_slot(dn, mode_flag);
  437. raw_spin_unlock_irqrestore(&confirm_error_lock, flags);
  438. }
  439. /**
  440. * eeh_dn_check_failure - Check if all 1's data is due to EEH slot freeze
  441. * @dn: device node
  442. * @dev: pci device, if known
  443. *
  444. * Check for an EEH failure for the given device node. Call this
  445. * routine if the result of a read was all 0xff's and you want to
  446. * find out if this is due to an EEH slot freeze. This routine
  447. * will query firmware for the EEH status.
  448. *
  449. * Returns 0 if there has not been an EEH error; otherwise returns
  450. * a non-zero value and queues up a slot isolation event notification.
  451. *
  452. * It is safe to call this routine in an interrupt context.
  453. */
  454. int eeh_dn_check_failure(struct device_node *dn, struct pci_dev *dev)
  455. {
  456. int ret;
  457. int rets[3];
  458. unsigned long flags;
  459. struct pci_dn *pdn;
  460. int rc = 0;
  461. const char *location;
  462. total_mmio_ffs++;
  463. if (!eeh_subsystem_enabled)
  464. return 0;
  465. if (!dn) {
  466. no_dn++;
  467. return 0;
  468. }
  469. dn = find_device_pe(dn);
  470. pdn = PCI_DN(dn);
  471. /* Access to IO BARs might get this far and still not want checking. */
  472. if (!(pdn->eeh_mode & EEH_MODE_SUPPORTED) ||
  473. pdn->eeh_mode & EEH_MODE_NOCHECK) {
  474. ignored_check++;
  475. pr_debug("EEH: Ignored check (%x) for %s %s\n",
  476. pdn->eeh_mode, eeh_pci_name(dev), dn->full_name);
  477. return 0;
  478. }
  479. if (!pdn->eeh_config_addr && !pdn->eeh_pe_config_addr) {
  480. no_cfg_addr++;
  481. return 0;
  482. }
  483. /* If we already have a pending isolation event for this
  484. * slot, we know it's bad already, we don't need to check.
  485. * Do this checking under a lock; as multiple PCI devices
  486. * in one slot might report errors simultaneously, and we
  487. * only want one error recovery routine running.
  488. */
  489. raw_spin_lock_irqsave(&confirm_error_lock, flags);
  490. rc = 1;
  491. if (pdn->eeh_mode & EEH_MODE_ISOLATED) {
  492. pdn->eeh_check_count ++;
  493. if (pdn->eeh_check_count % EEH_MAX_FAILS == 0) {
  494. location = of_get_property(dn, "ibm,loc-code", NULL);
  495. printk(KERN_ERR "EEH: %d reads ignored for recovering device at "
  496. "location=%s driver=%s pci addr=%s\n",
  497. pdn->eeh_check_count, location,
  498. eeh_driver_name(dev), eeh_pci_name(dev));
  499. printk(KERN_ERR "EEH: Might be infinite loop in %s driver\n",
  500. eeh_driver_name(dev));
  501. dump_stack();
  502. }
  503. goto dn_unlock;
  504. }
  505. /*
  506. * Now test for an EEH failure. This is VERY expensive.
  507. * Note that the eeh_config_addr may be a parent device
  508. * in the case of a device behind a bridge, or it may be
  509. * function zero of a multi-function device.
  510. * In any case they must share a common PHB.
  511. */
  512. ret = read_slot_reset_state(pdn, rets);
  513. /* If the call to firmware failed, punt */
  514. if (ret != 0) {
  515. printk(KERN_WARNING "EEH: read_slot_reset_state() failed; rc=%d dn=%s\n",
  516. ret, dn->full_name);
  517. false_positives++;
  518. pdn->eeh_false_positives ++;
  519. rc = 0;
  520. goto dn_unlock;
  521. }
  522. /* Note that config-io to empty slots may fail;
  523. * they are empty when they don't have children.
  524. */
  525. if ((rets[0] == 5) && (rets[2] == 0) && (dn->child == NULL)) {
  526. false_positives++;
  527. pdn->eeh_false_positives ++;
  528. rc = 0;
  529. goto dn_unlock;
  530. }
  531. /* If EEH is not supported on this device, punt. */
  532. if (rets[1] != 1) {
  533. printk(KERN_WARNING "EEH: event on unsupported device, rc=%d dn=%s\n",
  534. ret, dn->full_name);
  535. false_positives++;
  536. pdn->eeh_false_positives ++;
  537. rc = 0;
  538. goto dn_unlock;
  539. }
  540. /* If not the kind of error we know about, punt. */
  541. if (rets[0] != 1 && rets[0] != 2 && rets[0] != 4 && rets[0] != 5) {
  542. false_positives++;
  543. pdn->eeh_false_positives ++;
  544. rc = 0;
  545. goto dn_unlock;
  546. }
  547. slot_resets++;
  548. /* Avoid repeated reports of this failure, including problems
  549. * with other functions on this device, and functions under
  550. * bridges.
  551. */
  552. eeh_mark_slot(dn, EEH_MODE_ISOLATED);
  553. raw_spin_unlock_irqrestore(&confirm_error_lock, flags);
  554. eeh_send_failure_event(dn, dev);
  555. /* Most EEH events are due to device driver bugs. Having
  556. * a stack trace will help the device-driver authors figure
  557. * out what happened. So print that out.
  558. */
  559. dump_stack();
  560. return 1;
  561. dn_unlock:
  562. raw_spin_unlock_irqrestore(&confirm_error_lock, flags);
  563. return rc;
  564. }
  565. EXPORT_SYMBOL_GPL(eeh_dn_check_failure);
  566. /**
  567. * eeh_check_failure - Check if all 1's data is due to EEH slot freeze
  568. * @token: I/O token, should be address in the form 0xA....
  569. * @val: value, should be all 1's (XXX why do we need this arg??)
  570. *
  571. * Check for an EEH failure at the given token address. Call this
  572. * routine if the result of a read was all 0xff's and you want to
  573. * find out if this is due to an EEH slot freeze event. This routine
  574. * will query firmware for the EEH status.
  575. *
  576. * Note this routine is safe to call in an interrupt context.
  577. */
  578. unsigned long eeh_check_failure(const volatile void __iomem *token, unsigned long val)
  579. {
  580. unsigned long addr;
  581. struct pci_dev *dev;
  582. struct device_node *dn;
  583. /* Finding the phys addr + pci device; this is pretty quick. */
  584. addr = eeh_token_to_phys((unsigned long __force) token);
  585. dev = pci_get_device_by_addr(addr);
  586. if (!dev) {
  587. no_device++;
  588. return val;
  589. }
  590. dn = pci_device_to_OF_node(dev);
  591. eeh_dn_check_failure(dn, dev);
  592. pci_dev_put(dev);
  593. return val;
  594. }
  595. EXPORT_SYMBOL(eeh_check_failure);
  596. /**
  597. * rtas_pci_enable - Enable MMIO or DMA transfers for this slot
  598. * @pdn pci device node
  599. *
  600. * This routine should be called to reenable frozen MMIO or DMA
  601. * so that it would work correctly again. It's useful while doing
  602. * recovery or log collection on the indicated device.
  603. */
  604. int
  605. rtas_pci_enable(struct pci_dn *pdn, int function)
  606. {
  607. int config_addr;
  608. int rc;
  609. /* Use PE configuration address, if present */
  610. config_addr = pdn->eeh_config_addr;
  611. if (pdn->eeh_pe_config_addr)
  612. config_addr = pdn->eeh_pe_config_addr;
  613. rc = rtas_call(ibm_set_eeh_option, 4, 1, NULL,
  614. config_addr,
  615. BUID_HI(pdn->phb->buid),
  616. BUID_LO(pdn->phb->buid),
  617. function);
  618. if (rc)
  619. printk(KERN_WARNING "EEH: Unexpected state change %d, err=%d dn=%s\n",
  620. function, rc, pdn->node->full_name);
  621. rc = eeh_wait_for_slot_status(pdn, PCI_BUS_RESET_WAIT_MSEC);
  622. if ((rc == 4) && (function == EEH_THAW_MMIO))
  623. return 0;
  624. return rc;
  625. }
  626. /**
  627. * rtas_pci_slot_reset - Raises/Lowers the pci #RST line
  628. * @pdn: pci device node
  629. * @state: 1/0 to raise/lower the #RST
  630. *
  631. * Clear the EEH-frozen condition on a slot. This routine
  632. * asserts the PCI #RST line if the 'state' argument is '1',
  633. * and drops the #RST line if 'state is '0'. This routine is
  634. * safe to call in an interrupt context.
  635. */
  636. static void
  637. rtas_pci_slot_reset(struct pci_dn *pdn, int state)
  638. {
  639. int config_addr;
  640. int rc;
  641. BUG_ON(pdn==NULL);
  642. if (!pdn->phb) {
  643. printk(KERN_WARNING "EEH: in slot reset, device node %s has no phb\n",
  644. pdn->node->full_name);
  645. return;
  646. }
  647. /* Use PE configuration address, if present */
  648. config_addr = pdn->eeh_config_addr;
  649. if (pdn->eeh_pe_config_addr)
  650. config_addr = pdn->eeh_pe_config_addr;
  651. rc = rtas_call(ibm_set_slot_reset, 4, 1, NULL,
  652. config_addr,
  653. BUID_HI(pdn->phb->buid),
  654. BUID_LO(pdn->phb->buid),
  655. state);
  656. /* Fundamental-reset not supported on this PE, try hot-reset */
  657. if (rc == -8 && state == 3) {
  658. rc = rtas_call(ibm_set_slot_reset, 4, 1, NULL,
  659. config_addr,
  660. BUID_HI(pdn->phb->buid),
  661. BUID_LO(pdn->phb->buid), 1);
  662. if (rc)
  663. printk(KERN_WARNING
  664. "EEH: Unable to reset the failed slot,"
  665. " #RST=%d dn=%s\n",
  666. rc, pdn->node->full_name);
  667. }
  668. }
  669. /**
  670. * pcibios_set_pcie_slot_reset - Set PCI-E reset state
  671. * @dev: pci device struct
  672. * @state: reset state to enter
  673. *
  674. * Return value:
  675. * 0 if success
  676. */
  677. int pcibios_set_pcie_reset_state(struct pci_dev *dev, enum pcie_reset_state state)
  678. {
  679. struct device_node *dn = pci_device_to_OF_node(dev);
  680. struct pci_dn *pdn = PCI_DN(dn);
  681. switch (state) {
  682. case pcie_deassert_reset:
  683. rtas_pci_slot_reset(pdn, 0);
  684. break;
  685. case pcie_hot_reset:
  686. rtas_pci_slot_reset(pdn, 1);
  687. break;
  688. case pcie_warm_reset:
  689. rtas_pci_slot_reset(pdn, 3);
  690. break;
  691. default:
  692. return -EINVAL;
  693. };
  694. return 0;
  695. }
  696. /**
  697. * __eeh_set_pe_freset - Check the required reset for child devices
  698. * @parent: parent device
  699. * @freset: return value
  700. *
  701. * Each device might have its preferred reset type: fundamental or
  702. * hot reset. The routine is used to collect the information from
  703. * the child devices so that they could be reset accordingly.
  704. */
  705. void __eeh_set_pe_freset(struct device_node *parent, unsigned int *freset)
  706. {
  707. struct device_node *dn;
  708. for_each_child_of_node(parent, dn) {
  709. if (PCI_DN(dn)) {
  710. struct pci_dev *dev = PCI_DN(dn)->pcidev;
  711. if (dev && dev->driver)
  712. *freset |= dev->needs_freset;
  713. __eeh_set_pe_freset(dn, freset);
  714. }
  715. }
  716. }
  717. /**
  718. * eeh_set_pe_freset - Check the required reset for the indicated device and its children
  719. * @dn: parent device
  720. * @freset: return value
  721. *
  722. * Each device might have its preferred reset type: fundamental or
  723. * hot reset. The routine is used to collected the information for
  724. * the indicated device and its children so that the bunch of the
  725. * devices could be reset properly.
  726. */
  727. void eeh_set_pe_freset(struct device_node *dn, unsigned int *freset)
  728. {
  729. struct pci_dev *dev;
  730. dn = find_device_pe(dn);
  731. /* Back up one, since config addrs might be shared */
  732. if (!pcibios_find_pci_bus(dn) && PCI_DN(dn->parent))
  733. dn = dn->parent;
  734. dev = PCI_DN(dn)->pcidev;
  735. if (dev)
  736. *freset |= dev->needs_freset;
  737. __eeh_set_pe_freset(dn, freset);
  738. }
  739. /**
  740. * __rtas_set_slot_reset - Assert the pci #RST line for 1/4 second
  741. * @pdn: pci device node to be reset.
  742. *
  743. * Assert the PCI #RST line for 1/4 second.
  744. */
  745. static void __rtas_set_slot_reset(struct pci_dn *pdn)
  746. {
  747. unsigned int freset = 0;
  748. /* Determine type of EEH reset required for
  749. * Partitionable Endpoint, a hot-reset (1)
  750. * or a fundamental reset (3).
  751. * A fundamental reset required by any device under
  752. * Partitionable Endpoint trumps hot-reset.
  753. */
  754. eeh_set_pe_freset(pdn->node, &freset);
  755. if (freset)
  756. rtas_pci_slot_reset(pdn, 3);
  757. else
  758. rtas_pci_slot_reset(pdn, 1);
  759. /* The PCI bus requires that the reset be held high for at least
  760. * a 100 milliseconds. We wait a bit longer 'just in case'.
  761. */
  762. #define PCI_BUS_RST_HOLD_TIME_MSEC 250
  763. msleep(PCI_BUS_RST_HOLD_TIME_MSEC);
  764. /* We might get hit with another EEH freeze as soon as the
  765. * pci slot reset line is dropped. Make sure we don't miss
  766. * these, and clear the flag now.
  767. */
  768. eeh_clear_slot(pdn->node, EEH_MODE_ISOLATED);
  769. rtas_pci_slot_reset(pdn, 0);
  770. /* After a PCI slot has been reset, the PCI Express spec requires
  771. * a 1.5 second idle time for the bus to stabilize, before starting
  772. * up traffic.
  773. */
  774. #define PCI_BUS_SETTLE_TIME_MSEC 1800
  775. msleep(PCI_BUS_SETTLE_TIME_MSEC);
  776. }
  777. /**
  778. * rtas_set_slot_reset - Reset the indicated PE
  779. * @pdn: PCI device node
  780. *
  781. * This routine should be called to reset indicated device, including
  782. * PE. A PE might include multiple PCI devices and sometimes PCI bridges
  783. * might be involved as well.
  784. */
  785. int rtas_set_slot_reset(struct pci_dn *pdn)
  786. {
  787. int i, rc;
  788. /* Take three shots at resetting the bus */
  789. for (i=0; i<3; i++) {
  790. __rtas_set_slot_reset(pdn);
  791. rc = eeh_wait_for_slot_status(pdn, PCI_BUS_RESET_WAIT_MSEC);
  792. if (rc == 0)
  793. return 0;
  794. if (rc < 0) {
  795. printk(KERN_ERR "EEH: unrecoverable slot failure %s\n",
  796. pdn->node->full_name);
  797. return -1;
  798. }
  799. printk(KERN_ERR "EEH: bus reset %d failed on slot %s, rc=%d\n",
  800. i+1, pdn->node->full_name, rc);
  801. }
  802. return -1;
  803. }
  804. /** Save and restore of PCI BARs
  805. *
  806. * Although firmware will set up BARs during boot, it doesn't
  807. * set up device BAR's after a device reset, although it will,
  808. * if requested, set up bridge configuration. Thus, we need to
  809. * configure the PCI devices ourselves.
  810. */
  811. /**
  812. * __restore_bars - Restore the Base Address Registers
  813. * @pdn: pci device node
  814. *
  815. * Loads the PCI configuration space base address registers,
  816. * the expansion ROM base address, the latency timer, and etc.
  817. * from the saved values in the device node.
  818. */
  819. static inline void __restore_bars(struct pci_dn *pdn)
  820. {
  821. int i;
  822. u32 cmd;
  823. if (NULL==pdn->phb) return;
  824. for (i=4; i<10; i++) {
  825. rtas_write_config(pdn, i*4, 4, pdn->config_space[i]);
  826. }
  827. /* 12 == Expansion ROM Address */
  828. rtas_write_config(pdn, 12*4, 4, pdn->config_space[12]);
  829. #define BYTE_SWAP(OFF) (8*((OFF)/4)+3-(OFF))
  830. #define SAVED_BYTE(OFF) (((u8 *)(pdn->config_space))[BYTE_SWAP(OFF)])
  831. rtas_write_config(pdn, PCI_CACHE_LINE_SIZE, 1,
  832. SAVED_BYTE(PCI_CACHE_LINE_SIZE));
  833. rtas_write_config(pdn, PCI_LATENCY_TIMER, 1,
  834. SAVED_BYTE(PCI_LATENCY_TIMER));
  835. /* max latency, min grant, interrupt pin and line */
  836. rtas_write_config(pdn, 15*4, 4, pdn->config_space[15]);
  837. /* Restore PERR & SERR bits, some devices require it,
  838. * don't touch the other command bits
  839. */
  840. rtas_read_config(pdn, PCI_COMMAND, 4, &cmd);
  841. if (pdn->config_space[1] & PCI_COMMAND_PARITY)
  842. cmd |= PCI_COMMAND_PARITY;
  843. else
  844. cmd &= ~PCI_COMMAND_PARITY;
  845. if (pdn->config_space[1] & PCI_COMMAND_SERR)
  846. cmd |= PCI_COMMAND_SERR;
  847. else
  848. cmd &= ~PCI_COMMAND_SERR;
  849. rtas_write_config(pdn, PCI_COMMAND, 4, cmd);
  850. }
  851. /**
  852. * eeh_restore_bars - Restore the PCI config space info
  853. * @pdn: PCI device node
  854. *
  855. * This routine performs a recursive walk to the children
  856. * of this device as well.
  857. */
  858. void eeh_restore_bars(struct pci_dn *pdn)
  859. {
  860. struct device_node *dn;
  861. if (!pdn)
  862. return;
  863. if ((pdn->eeh_mode & EEH_MODE_SUPPORTED) && !IS_BRIDGE(pdn->class_code))
  864. __restore_bars(pdn);
  865. for_each_child_of_node(pdn->node, dn)
  866. eeh_restore_bars(PCI_DN(dn));
  867. }
  868. /**
  869. * eeh_save_bars - Save device bars
  870. * @pdn: PCI device node
  871. *
  872. * Save the values of the device bars. Unlike the restore
  873. * routine, this routine is *not* recursive. This is because
  874. * PCI devices are added individually; but, for the restore,
  875. * an entire slot is reset at a time.
  876. */
  877. static void eeh_save_bars(struct pci_dn *pdn)
  878. {
  879. int i;
  880. if (!pdn )
  881. return;
  882. for (i = 0; i < 16; i++)
  883. rtas_read_config(pdn, i * 4, 4, &pdn->config_space[i]);
  884. }
  885. /**
  886. * rtas_configure_bridge - Configure PCI bridges for the indicated PE
  887. * @pdn: PCI device node
  888. *
  889. * PCI bridges might be included in PE. In order to make the PE work
  890. * again. The included PCI bridges should be recovered after the PE
  891. * encounters frozen state.
  892. */
  893. void
  894. rtas_configure_bridge(struct pci_dn *pdn)
  895. {
  896. int config_addr;
  897. int rc;
  898. int token;
  899. /* Use PE configuration address, if present */
  900. config_addr = pdn->eeh_config_addr;
  901. if (pdn->eeh_pe_config_addr)
  902. config_addr = pdn->eeh_pe_config_addr;
  903. /* Use new configure-pe function, if supported */
  904. if (ibm_configure_pe != RTAS_UNKNOWN_SERVICE)
  905. token = ibm_configure_pe;
  906. else
  907. token = ibm_configure_bridge;
  908. rc = rtas_call(token, 3, 1, NULL,
  909. config_addr,
  910. BUID_HI(pdn->phb->buid),
  911. BUID_LO(pdn->phb->buid));
  912. if (rc) {
  913. printk(KERN_WARNING "EEH: Unable to configure device bridge (%d) for %s\n",
  914. rc, pdn->node->full_name);
  915. }
  916. }
  917. #define EEH_ENABLE 1
  918. struct eeh_early_enable_info {
  919. unsigned int buid_hi;
  920. unsigned int buid_lo;
  921. };
  922. /**
  923. * get_pe_addr - Retrieve PE address with given BDF address
  924. * @config_addr: BDF address
  925. * @info: BUID of the associated PHB
  926. *
  927. * There're 2 kinds of addresses existing in EEH core components:
  928. * BDF address and PE address. Besides, there has dedicated platform
  929. * dependent function call to retrieve the PE address according to
  930. * the given BDF address. Further more, we prefer PE address on BDF
  931. * address in EEH core components.
  932. */
  933. static int get_pe_addr(int config_addr,
  934. struct eeh_early_enable_info *info)
  935. {
  936. unsigned int rets[3];
  937. int ret;
  938. /* Use latest config-addr token on power6 */
  939. if (ibm_get_config_addr_info2 != RTAS_UNKNOWN_SERVICE) {
  940. /* Make sure we have a PE in hand */
  941. ret = rtas_call(ibm_get_config_addr_info2, 4, 2, rets,
  942. config_addr, info->buid_hi, info->buid_lo, 1);
  943. if (ret || (rets[0]==0))
  944. return 0;
  945. ret = rtas_call(ibm_get_config_addr_info2, 4, 2, rets,
  946. config_addr, info->buid_hi, info->buid_lo, 0);
  947. if (ret)
  948. return 0;
  949. return rets[0];
  950. }
  951. /* Use older config-addr token on power5 */
  952. if (ibm_get_config_addr_info != RTAS_UNKNOWN_SERVICE) {
  953. ret = rtas_call(ibm_get_config_addr_info, 4, 2, rets,
  954. config_addr, info->buid_hi, info->buid_lo, 0);
  955. if (ret)
  956. return 0;
  957. return rets[0];
  958. }
  959. return 0;
  960. }
  961. /**
  962. * early_enable_eeh - Early enable EEH on the indicated device
  963. * @dn: device node
  964. * @data: BUID
  965. *
  966. * Enable EEH functionality on the specified PCI device. The function
  967. * is expected to be called before real PCI probing is done. However,
  968. * the PHBs have been initialized at this point.
  969. */
  970. static void *early_enable_eeh(struct device_node *dn, void *data)
  971. {
  972. unsigned int rets[3];
  973. struct eeh_early_enable_info *info = data;
  974. int ret;
  975. const u32 *class_code = of_get_property(dn, "class-code", NULL);
  976. const u32 *vendor_id = of_get_property(dn, "vendor-id", NULL);
  977. const u32 *device_id = of_get_property(dn, "device-id", NULL);
  978. const u32 *regs;
  979. int enable;
  980. struct pci_dn *pdn = PCI_DN(dn);
  981. pdn->class_code = 0;
  982. pdn->eeh_mode = 0;
  983. pdn->eeh_check_count = 0;
  984. pdn->eeh_freeze_count = 0;
  985. pdn->eeh_false_positives = 0;
  986. if (!of_device_is_available(dn))
  987. return NULL;
  988. /* Ignore bad nodes. */
  989. if (!class_code || !vendor_id || !device_id)
  990. return NULL;
  991. /* There is nothing to check on PCI to ISA bridges */
  992. if (dn->type && !strcmp(dn->type, "isa")) {
  993. pdn->eeh_mode |= EEH_MODE_NOCHECK;
  994. return NULL;
  995. }
  996. pdn->class_code = *class_code;
  997. /* Ok... see if this device supports EEH. Some do, some don't,
  998. * and the only way to find out is to check each and every one.
  999. */
  1000. regs = of_get_property(dn, "reg", NULL);
  1001. if (regs) {
  1002. /* First register entry is addr (00BBSS00) */
  1003. /* Try to enable eeh */
  1004. ret = rtas_call(ibm_set_eeh_option, 4, 1, NULL,
  1005. regs[0], info->buid_hi, info->buid_lo,
  1006. EEH_ENABLE);
  1007. enable = 0;
  1008. if (ret == 0) {
  1009. pdn->eeh_config_addr = regs[0];
  1010. /* If the newer, better, ibm,get-config-addr-info is supported,
  1011. * then use that instead.
  1012. */
  1013. pdn->eeh_pe_config_addr = get_pe_addr(pdn->eeh_config_addr, info);
  1014. /* Some older systems (Power4) allow the
  1015. * ibm,set-eeh-option call to succeed even on nodes
  1016. * where EEH is not supported. Verify support
  1017. * explicitly.
  1018. */
  1019. ret = read_slot_reset_state(pdn, rets);
  1020. if ((ret == 0) && (rets[1] == 1))
  1021. enable = 1;
  1022. }
  1023. if (enable) {
  1024. eeh_subsystem_enabled = 1;
  1025. pdn->eeh_mode |= EEH_MODE_SUPPORTED;
  1026. pr_debug("EEH: %s: eeh enabled, config=%x pe_config=%x\n",
  1027. dn->full_name, pdn->eeh_config_addr,
  1028. pdn->eeh_pe_config_addr);
  1029. } else {
  1030. /* This device doesn't support EEH, but it may have an
  1031. * EEH parent, in which case we mark it as supported.
  1032. */
  1033. if (dn->parent && PCI_DN(dn->parent)
  1034. && (PCI_DN(dn->parent)->eeh_mode & EEH_MODE_SUPPORTED)) {
  1035. /* Parent supports EEH. */
  1036. pdn->eeh_mode |= EEH_MODE_SUPPORTED;
  1037. pdn->eeh_config_addr = PCI_DN(dn->parent)->eeh_config_addr;
  1038. return NULL;
  1039. }
  1040. }
  1041. } else {
  1042. printk(KERN_WARNING "EEH: %s: unable to get reg property.\n",
  1043. dn->full_name);
  1044. }
  1045. eeh_save_bars(pdn);
  1046. return NULL;
  1047. }
  1048. /**
  1049. * eeh_init - EEH initialization
  1050. *
  1051. * Initialize EEH by trying to enable it for all of the adapters in the system.
  1052. * As a side effect we can determine here if eeh is supported at all.
  1053. * Note that we leave EEH on so failed config cycles won't cause a machine
  1054. * check. If a user turns off EEH for a particular adapter they are really
  1055. * telling Linux to ignore errors. Some hardware (e.g. POWER5) won't
  1056. * grant access to a slot if EEH isn't enabled, and so we always enable
  1057. * EEH for all slots/all devices.
  1058. *
  1059. * The eeh-force-off option disables EEH checking globally, for all slots.
  1060. * Even if force-off is set, the EEH hardware is still enabled, so that
  1061. * newer systems can boot.
  1062. */
  1063. void __init eeh_init(void)
  1064. {
  1065. struct device_node *phb, *np;
  1066. struct eeh_early_enable_info info;
  1067. raw_spin_lock_init(&confirm_error_lock);
  1068. spin_lock_init(&slot_errbuf_lock);
  1069. np = of_find_node_by_path("/rtas");
  1070. if (np == NULL)
  1071. return;
  1072. ibm_set_eeh_option = rtas_token("ibm,set-eeh-option");
  1073. ibm_set_slot_reset = rtas_token("ibm,set-slot-reset");
  1074. ibm_read_slot_reset_state2 = rtas_token("ibm,read-slot-reset-state2");
  1075. ibm_read_slot_reset_state = rtas_token("ibm,read-slot-reset-state");
  1076. ibm_slot_error_detail = rtas_token("ibm,slot-error-detail");
  1077. ibm_get_config_addr_info = rtas_token("ibm,get-config-addr-info");
  1078. ibm_get_config_addr_info2 = rtas_token("ibm,get-config-addr-info2");
  1079. ibm_configure_bridge = rtas_token("ibm,configure-bridge");
  1080. ibm_configure_pe = rtas_token("ibm,configure-pe");
  1081. if (ibm_set_eeh_option == RTAS_UNKNOWN_SERVICE)
  1082. return;
  1083. eeh_error_buf_size = rtas_token("rtas-error-log-max");
  1084. if (eeh_error_buf_size == RTAS_UNKNOWN_SERVICE) {
  1085. eeh_error_buf_size = 1024;
  1086. }
  1087. if (eeh_error_buf_size > RTAS_ERROR_LOG_MAX) {
  1088. printk(KERN_WARNING "EEH: rtas-error-log-max is bigger than allocated "
  1089. "buffer ! (%d vs %d)", eeh_error_buf_size, RTAS_ERROR_LOG_MAX);
  1090. eeh_error_buf_size = RTAS_ERROR_LOG_MAX;
  1091. }
  1092. /* Enable EEH for all adapters. Note that eeh requires buid's */
  1093. for (phb = of_find_node_by_name(NULL, "pci"); phb;
  1094. phb = of_find_node_by_name(phb, "pci")) {
  1095. unsigned long buid;
  1096. buid = get_phb_buid(phb);
  1097. if (buid == 0 || PCI_DN(phb) == NULL)
  1098. continue;
  1099. info.buid_lo = BUID_LO(buid);
  1100. info.buid_hi = BUID_HI(buid);
  1101. traverse_pci_devices(phb, early_enable_eeh, &info);
  1102. }
  1103. if (eeh_subsystem_enabled)
  1104. printk(KERN_INFO "EEH: PCI Enhanced I/O Error Handling Enabled\n");
  1105. else
  1106. printk(KERN_WARNING "EEH: No capable adapters found\n");
  1107. }
  1108. /**
  1109. * eeh_add_device_early - Enable EEH for the indicated device_node
  1110. * @dn: device node for which to set up EEH
  1111. *
  1112. * This routine must be used to perform EEH initialization for PCI
  1113. * devices that were added after system boot (e.g. hotplug, dlpar).
  1114. * This routine must be called before any i/o is performed to the
  1115. * adapter (inluding any config-space i/o).
  1116. * Whether this actually enables EEH or not for this device depends
  1117. * on the CEC architecture, type of the device, on earlier boot
  1118. * command-line arguments & etc.
  1119. */
  1120. static void eeh_add_device_early(struct device_node *dn)
  1121. {
  1122. struct pci_controller *phb;
  1123. struct eeh_early_enable_info info;
  1124. if (!dn || !PCI_DN(dn))
  1125. return;
  1126. phb = PCI_DN(dn)->phb;
  1127. /* USB Bus children of PCI devices will not have BUID's */
  1128. if (NULL == phb || 0 == phb->buid)
  1129. return;
  1130. info.buid_hi = BUID_HI(phb->buid);
  1131. info.buid_lo = BUID_LO(phb->buid);
  1132. early_enable_eeh(dn, &info);
  1133. }
  1134. /**
  1135. * eeh_add_device_tree_early - Enable EEH for the indicated device
  1136. * @dn: device node
  1137. *
  1138. * This routine must be used to perform EEH initialization for the
  1139. * indicated PCI device that was added after system boot (e.g.
  1140. * hotplug, dlpar).
  1141. */
  1142. void eeh_add_device_tree_early(struct device_node *dn)
  1143. {
  1144. struct device_node *sib;
  1145. for_each_child_of_node(dn, sib)
  1146. eeh_add_device_tree_early(sib);
  1147. eeh_add_device_early(dn);
  1148. }
  1149. EXPORT_SYMBOL_GPL(eeh_add_device_tree_early);
  1150. /**
  1151. * eeh_add_device_late - Perform EEH initialization for the indicated pci device
  1152. * @dev: pci device for which to set up EEH
  1153. *
  1154. * This routine must be used to complete EEH initialization for PCI
  1155. * devices that were added after system boot (e.g. hotplug, dlpar).
  1156. */
  1157. static void eeh_add_device_late(struct pci_dev *dev)
  1158. {
  1159. struct device_node *dn;
  1160. struct pci_dn *pdn;
  1161. if (!dev || !eeh_subsystem_enabled)
  1162. return;
  1163. pr_debug("EEH: Adding device %s\n", pci_name(dev));
  1164. dn = pci_device_to_OF_node(dev);
  1165. pdn = PCI_DN(dn);
  1166. if (pdn->pcidev == dev) {
  1167. pr_debug("EEH: Already referenced !\n");
  1168. return;
  1169. }
  1170. WARN_ON(pdn->pcidev);
  1171. pci_dev_get(dev);
  1172. pdn->pcidev = dev;
  1173. pci_addr_cache_insert_device(dev);
  1174. eeh_sysfs_add_device(dev);
  1175. }
  1176. /**
  1177. * eeh_add_device_tree_late - Perform EEH initialization for the indicated PCI bus
  1178. * @bus: PCI bus
  1179. *
  1180. * This routine must be used to perform EEH initialization for PCI
  1181. * devices which are attached to the indicated PCI bus. The PCI bus
  1182. * is added after system boot through hotplug or dlpar.
  1183. */
  1184. void eeh_add_device_tree_late(struct pci_bus *bus)
  1185. {
  1186. struct pci_dev *dev;
  1187. list_for_each_entry(dev, &bus->devices, bus_list) {
  1188. eeh_add_device_late(dev);
  1189. if (dev->hdr_type == PCI_HEADER_TYPE_BRIDGE) {
  1190. struct pci_bus *subbus = dev->subordinate;
  1191. if (subbus)
  1192. eeh_add_device_tree_late(subbus);
  1193. }
  1194. }
  1195. }
  1196. EXPORT_SYMBOL_GPL(eeh_add_device_tree_late);
  1197. /**
  1198. * eeh_remove_device - Undo EEH setup for the indicated pci device
  1199. * @dev: pci device to be removed
  1200. *
  1201. * This routine should be called when a device is removed from
  1202. * a running system (e.g. by hotplug or dlpar). It unregisters
  1203. * the PCI device from the EEH subsystem. I/O errors affecting
  1204. * this device will no longer be detected after this call; thus,
  1205. * i/o errors affecting this slot may leave this device unusable.
  1206. */
  1207. static void eeh_remove_device(struct pci_dev *dev)
  1208. {
  1209. struct device_node *dn;
  1210. if (!dev || !eeh_subsystem_enabled)
  1211. return;
  1212. /* Unregister the device with the EEH/PCI address search system */
  1213. pr_debug("EEH: Removing device %s\n", pci_name(dev));
  1214. dn = pci_device_to_OF_node(dev);
  1215. if (PCI_DN(dn)->pcidev == NULL) {
  1216. pr_debug("EEH: Not referenced !\n");
  1217. return;
  1218. }
  1219. PCI_DN(dn)->pcidev = NULL;
  1220. pci_dev_put(dev);
  1221. pci_addr_cache_remove_device(dev);
  1222. eeh_sysfs_remove_device(dev);
  1223. }
  1224. /**
  1225. * eeh_remove_bus_device - Undo EEH setup for the indicated PCI device
  1226. * @dev: PCI device
  1227. *
  1228. * This routine must be called when a device is removed from the
  1229. * running system through hotplug or dlpar. The corresponding
  1230. * PCI address cache will be removed.
  1231. */
  1232. void eeh_remove_bus_device(struct pci_dev *dev)
  1233. {
  1234. struct pci_bus *bus = dev->subordinate;
  1235. struct pci_dev *child, *tmp;
  1236. eeh_remove_device(dev);
  1237. if (bus && dev->hdr_type == PCI_HEADER_TYPE_BRIDGE) {
  1238. list_for_each_entry_safe(child, tmp, &bus->devices, bus_list)
  1239. eeh_remove_bus_device(child);
  1240. }
  1241. }
  1242. EXPORT_SYMBOL_GPL(eeh_remove_bus_device);
  1243. static int proc_eeh_show(struct seq_file *m, void *v)
  1244. {
  1245. if (0 == eeh_subsystem_enabled) {
  1246. seq_printf(m, "EEH Subsystem is globally disabled\n");
  1247. seq_printf(m, "eeh_total_mmio_ffs=%ld\n", total_mmio_ffs);
  1248. } else {
  1249. seq_printf(m, "EEH Subsystem is enabled\n");
  1250. seq_printf(m,
  1251. "no device=%ld\n"
  1252. "no device node=%ld\n"
  1253. "no config address=%ld\n"
  1254. "check not wanted=%ld\n"
  1255. "eeh_total_mmio_ffs=%ld\n"
  1256. "eeh_false_positives=%ld\n"
  1257. "eeh_slot_resets=%ld\n",
  1258. no_device, no_dn, no_cfg_addr,
  1259. ignored_check, total_mmio_ffs,
  1260. false_positives,
  1261. slot_resets);
  1262. }
  1263. return 0;
  1264. }
  1265. static int proc_eeh_open(struct inode *inode, struct file *file)
  1266. {
  1267. return single_open(file, proc_eeh_show, NULL);
  1268. }
  1269. static const struct file_operations proc_eeh_operations = {
  1270. .open = proc_eeh_open,
  1271. .read = seq_read,
  1272. .llseek = seq_lseek,
  1273. .release = single_release,
  1274. };
  1275. static int __init eeh_init_proc(void)
  1276. {
  1277. if (machine_is(pseries))
  1278. proc_create("powerpc/eeh", 0, NULL, &proc_eeh_operations);
  1279. return 0;
  1280. }
  1281. __initcall(eeh_init_proc);