eeh.c 31 KB


  1. /*
  2. * eeh.c
  3. * Copyright (C) 2001 Dave Engebretsen & Todd Inglett IBM Corporation
  4. *
  5. * This program is free software; you can redistribute it and/or modify
  6. * it under the terms of the GNU General Public License as published by
  7. * the Free Software Foundation; either version 2 of the License, or
  8. * (at your option) any later version.
  9. *
  10. * This program is distributed in the hope that it will be useful,
  11. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  12. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  13. * GNU General Public License for more details.
  14. *
  15. * You should have received a copy of the GNU General Public License
  16. * along with this program; if not, write to the Free Software
  17. * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
  18. */
  19. #include <linux/delay.h>
  20. #include <linux/init.h>
  21. #include <linux/list.h>
  22. #include <linux/pci.h>
  23. #include <linux/proc_fs.h>
  24. #include <linux/rbtree.h>
  25. #include <linux/seq_file.h>
  26. #include <linux/spinlock.h>
  27. #include <asm/atomic.h>
  28. #include <asm/eeh.h>
  29. #include <asm/eeh_event.h>
  30. #include <asm/io.h>
  31. #include <asm/machdep.h>
  32. #include <asm/ppc-pci.h>
  33. #include <asm/rtas.h>
  34. #include <asm/systemcfg.h>
  35. #undef DEBUG
  36. /** Overview:
  37. * EEH, or "Extended Error Handling" is a PCI bridge technology for
  38. * dealing with PCI bus errors that can't be dealt with within the
  39. * usual PCI framework, except by check-stopping the CPU. Systems
  40. * that are designed for high-availability/reliability cannot afford
  41. * to crash due to a "mere" PCI error, thus the need for EEH.
  42. * An EEH-capable bridge operates by converting a detected error
  43. * into a "slot freeze", taking the PCI adapter off-line, making
  44. * the slot behave, from the OS'es point of view, as if the slot
  45. * were "empty": all reads return 0xff's and all writes are silently
  46. * ignored. EEH slot isolation events can be triggered by parity
  47. * errors on the address or data busses (e.g. during posted writes),
  48. * which in turn might be caused by low voltage on the bus, dust,
  49. * vibration, humidity, radioactivity or plain-old failed hardware.
  50. *
  51. * Note, however, that one of the leading causes of EEH slot
  52. * freeze events are buggy device drivers, buggy device microcode,
  53. * or buggy device hardware. This is because any attempt by the
  54. * device to bus-master data to a memory address that is not
  55. * assigned to the device will trigger a slot freeze. (The idea
  56. * is to prevent devices-gone-wild from corrupting system memory).
  57. * Buggy hardware/drivers will have a miserable time co-existing
  58. * with EEH.
  59. *
  60. * Ideally, a PCI device driver, when suspecting that an isolation
  61. * event has occured (e.g. by reading 0xff's), will then ask EEH
  62. * whether this is the case, and then take appropriate steps to
  63. * reset the PCI slot, the PCI device, and then resume operations.
  64. * However, until that day, the checking is done here, with the
  65. * eeh_check_failure() routine embedded in the MMIO macros. If
  66. * the slot is found to be isolated, an "EEH Event" is synthesized
  67. * and sent out for processing.
  68. */
  69. /* If a device driver keeps reading an MMIO register in an interrupt
  70. * handler after a slot isolation event has occurred, we assume it
  71. * is broken and panic. This sets the threshold for how many read
  72. * attempts we allow before panicking.
  73. */
  74. #define EEH_MAX_FAILS 100000
  75. /* RTAS tokens */
  76. static int ibm_set_eeh_option;
  77. static int ibm_set_slot_reset;
  78. static int ibm_read_slot_reset_state;
  79. static int ibm_read_slot_reset_state2;
  80. static int ibm_slot_error_detail;
  81. static int eeh_subsystem_enabled;
  82. /* Lock to avoid races due to multiple reports of an error */
  83. static DEFINE_SPINLOCK(confirm_error_lock);
  84. /* Buffer for reporting slot-error-detail rtas calls */
  85. static unsigned char slot_errbuf[RTAS_ERROR_LOG_MAX];
  86. static DEFINE_SPINLOCK(slot_errbuf_lock);
  87. static int eeh_error_buf_size;
  88. /* System monitoring statistics */
  89. static DEFINE_PER_CPU(unsigned long, no_device);
  90. static DEFINE_PER_CPU(unsigned long, no_dn);
  91. static DEFINE_PER_CPU(unsigned long, no_cfg_addr);
  92. static DEFINE_PER_CPU(unsigned long, ignored_check);
  93. static DEFINE_PER_CPU(unsigned long, total_mmio_ffs);
  94. static DEFINE_PER_CPU(unsigned long, false_positives);
  95. static DEFINE_PER_CPU(unsigned long, ignored_failures);
  96. static DEFINE_PER_CPU(unsigned long, slot_resets);
  97. /**
  98. * The pci address cache subsystem. This subsystem places
  99. * PCI device address resources into a red-black tree, sorted
  100. * according to the address range, so that given only an i/o
  101. * address, the corresponding PCI device can be **quickly**
  102. * found. It is safe to perform an address lookup in an interrupt
  103. * context; this ability is an important feature.
  104. *
  105. * Currently, the only customer of this code is the EEH subsystem;
  106. * thus, this code has been somewhat tailored to suit EEH better.
  107. * In particular, the cache does *not* hold the addresses of devices
  108. * for which EEH is not enabled.
  109. *
  110. * (Implementation Note: The RB tree seems to be better/faster
  111. * than any hash algo I could think of for this problem, even
  112. * with the penalty of slow pointer chases for d-cache misses).
  113. */
  114. struct pci_io_addr_range
  115. {
  116. struct rb_node rb_node;
  117. unsigned long addr_lo;
  118. unsigned long addr_hi;
  119. struct pci_dev *pcidev;
  120. unsigned int flags;
  121. };
  122. static struct pci_io_addr_cache
  123. {
  124. struct rb_root rb_root;
  125. spinlock_t piar_lock;
  126. } pci_io_addr_cache_root;
  127. static inline struct pci_dev *__pci_get_device_by_addr(unsigned long addr)
  128. {
  129. struct rb_node *n = pci_io_addr_cache_root.rb_root.rb_node;
  130. while (n) {
  131. struct pci_io_addr_range *piar;
  132. piar = rb_entry(n, struct pci_io_addr_range, rb_node);
  133. if (addr < piar->addr_lo) {
  134. n = n->rb_left;
  135. } else {
  136. if (addr > piar->addr_hi) {
  137. n = n->rb_right;
  138. } else {
  139. pci_dev_get(piar->pcidev);
  140. return piar->pcidev;
  141. }
  142. }
  143. }
  144. return NULL;
  145. }
  146. /**
  147. * pci_get_device_by_addr - Get device, given only address
  148. * @addr: mmio (PIO) phys address or i/o port number
  149. *
  150. * Given an mmio phys address, or a port number, find a pci device
  151. * that implements this address. Be sure to pci_dev_put the device
  152. * when finished. I/O port numbers are assumed to be offset
  153. * from zero (that is, they do *not* have pci_io_addr added in).
  154. * It is safe to call this function within an interrupt.
  155. */
  156. static struct pci_dev *pci_get_device_by_addr(unsigned long addr)
  157. {
  158. struct pci_dev *dev;
  159. unsigned long flags;
  160. spin_lock_irqsave(&pci_io_addr_cache_root.piar_lock, flags);
  161. dev = __pci_get_device_by_addr(addr);
  162. spin_unlock_irqrestore(&pci_io_addr_cache_root.piar_lock, flags);
  163. return dev;
  164. }
  165. #ifdef DEBUG
  166. /*
  167. * Handy-dandy debug print routine, does nothing more
  168. * than print out the contents of our addr cache.
  169. */
  170. static void pci_addr_cache_print(struct pci_io_addr_cache *cache)
  171. {
  172. struct rb_node *n;
  173. int cnt = 0;
  174. n = rb_first(&cache->rb_root);
  175. while (n) {
  176. struct pci_io_addr_range *piar;
  177. piar = rb_entry(n, struct pci_io_addr_range, rb_node);
  178. printk(KERN_DEBUG "PCI: %s addr range %d [%lx-%lx]: %s\n",
  179. (piar->flags & IORESOURCE_IO) ? "i/o" : "mem", cnt,
  180. piar->addr_lo, piar->addr_hi, pci_name(piar->pcidev));
  181. cnt++;
  182. n = rb_next(n);
  183. }
  184. }
  185. #endif
  186. /* Insert address range into the rb tree. */
  187. static struct pci_io_addr_range *
  188. pci_addr_cache_insert(struct pci_dev *dev, unsigned long alo,
  189. unsigned long ahi, unsigned int flags)
  190. {
  191. struct rb_node **p = &pci_io_addr_cache_root.rb_root.rb_node;
  192. struct rb_node *parent = NULL;
  193. struct pci_io_addr_range *piar;
  194. /* Walk tree, find a place to insert into tree */
  195. while (*p) {
  196. parent = *p;
  197. piar = rb_entry(parent, struct pci_io_addr_range, rb_node);
  198. if (ahi < piar->addr_lo) {
  199. p = &parent->rb_left;
  200. } else if (alo > piar->addr_hi) {
  201. p = &parent->rb_right;
  202. } else {
  203. if (dev != piar->pcidev ||
  204. alo != piar->addr_lo || ahi != piar->addr_hi) {
  205. printk(KERN_WARNING "PIAR: overlapping address range\n");
  206. }
  207. return piar;
  208. }
  209. }
  210. piar = (struct pci_io_addr_range *)kmalloc(sizeof(struct pci_io_addr_range), GFP_ATOMIC);
  211. if (!piar)
  212. return NULL;
  213. piar->addr_lo = alo;
  214. piar->addr_hi = ahi;
  215. piar->pcidev = dev;
  216. piar->flags = flags;
  217. #ifdef DEBUG
  218. printk(KERN_DEBUG "PIAR: insert range=[%lx:%lx] dev=%s\n",
  219. alo, ahi, pci_name (dev));
  220. #endif
  221. rb_link_node(&piar->rb_node, parent, p);
  222. rb_insert_color(&piar->rb_node, &pci_io_addr_cache_root.rb_root);
  223. return piar;
  224. }
  225. static void __pci_addr_cache_insert_device(struct pci_dev *dev)
  226. {
  227. struct device_node *dn;
  228. struct pci_dn *pdn;
  229. int i;
  230. int inserted = 0;
  231. dn = pci_device_to_OF_node(dev);
  232. if (!dn) {
  233. printk(KERN_WARNING "PCI: no pci dn found for dev=%s\n", pci_name(dev));
  234. return;
  235. }
  236. /* Skip any devices for which EEH is not enabled. */
  237. pdn = PCI_DN(dn);
  238. if (!(pdn->eeh_mode & EEH_MODE_SUPPORTED) ||
  239. pdn->eeh_mode & EEH_MODE_NOCHECK) {
  240. #ifdef DEBUG
  241. printk(KERN_INFO "PCI: skip building address cache for=%s - %s\n",
  242. pci_name(dev), pdn->node->full_name);
  243. #endif
  244. return;
  245. }
  246. /* The cache holds a reference to the device... */
  247. pci_dev_get(dev);
  248. /* Walk resources on this device, poke them into the tree */
  249. for (i = 0; i < DEVICE_COUNT_RESOURCE; i++) {
  250. unsigned long start = pci_resource_start(dev,i);
  251. unsigned long end = pci_resource_end(dev,i);
  252. unsigned int flags = pci_resource_flags(dev,i);
  253. /* We are interested only bus addresses, not dma or other stuff */
  254. if (0 == (flags & (IORESOURCE_IO | IORESOURCE_MEM)))
  255. continue;
  256. if (start == 0 || ~start == 0 || end == 0 || ~end == 0)
  257. continue;
  258. pci_addr_cache_insert(dev, start, end, flags);
  259. inserted = 1;
  260. }
  261. /* If there was nothing to add, the cache has no reference... */
  262. if (!inserted)
  263. pci_dev_put(dev);
  264. }
  265. /**
  266. * pci_addr_cache_insert_device - Add a device to the address cache
  267. * @dev: PCI device whose I/O addresses we are interested in.
  268. *
  269. * In order to support the fast lookup of devices based on addresses,
  270. * we maintain a cache of devices that can be quickly searched.
  271. * This routine adds a device to that cache.
  272. */
  273. static void pci_addr_cache_insert_device(struct pci_dev *dev)
  274. {
  275. unsigned long flags;
  276. spin_lock_irqsave(&pci_io_addr_cache_root.piar_lock, flags);
  277. __pci_addr_cache_insert_device(dev);
  278. spin_unlock_irqrestore(&pci_io_addr_cache_root.piar_lock, flags);
  279. }
  280. static inline void __pci_addr_cache_remove_device(struct pci_dev *dev)
  281. {
  282. struct rb_node *n;
  283. int removed = 0;
  284. restart:
  285. n = rb_first(&pci_io_addr_cache_root.rb_root);
  286. while (n) {
  287. struct pci_io_addr_range *piar;
  288. piar = rb_entry(n, struct pci_io_addr_range, rb_node);
  289. if (piar->pcidev == dev) {
  290. rb_erase(n, &pci_io_addr_cache_root.rb_root);
  291. removed = 1;
  292. kfree(piar);
  293. goto restart;
  294. }
  295. n = rb_next(n);
  296. }
  297. /* The cache no longer holds its reference to this device... */
  298. if (removed)
  299. pci_dev_put(dev);
  300. }
  301. /**
  302. * pci_addr_cache_remove_device - remove pci device from addr cache
  303. * @dev: device to remove
  304. *
  305. * Remove a device from the addr-cache tree.
  306. * This is potentially expensive, since it will walk
  307. * the tree multiple times (once per resource).
  308. * But so what; device removal doesn't need to be that fast.
  309. */
  310. static void pci_addr_cache_remove_device(struct pci_dev *dev)
  311. {
  312. unsigned long flags;
  313. spin_lock_irqsave(&pci_io_addr_cache_root.piar_lock, flags);
  314. __pci_addr_cache_remove_device(dev);
  315. spin_unlock_irqrestore(&pci_io_addr_cache_root.piar_lock, flags);
  316. }
  317. /**
  318. * pci_addr_cache_build - Build a cache of I/O addresses
  319. *
  320. * Build a cache of pci i/o addresses. This cache will be used to
  321. * find the pci device that corresponds to a given address.
  322. * This routine scans all pci busses to build the cache.
  323. * Must be run late in boot process, after the pci controllers
  324. * have been scaned for devices (after all device resources are known).
  325. */
  326. void __init pci_addr_cache_build(void)
  327. {
  328. struct pci_dev *dev = NULL;
  329. if (!eeh_subsystem_enabled)
  330. return;
  331. spin_lock_init(&pci_io_addr_cache_root.piar_lock);
  332. while ((dev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev)) != NULL) {
  333. /* Ignore PCI bridges ( XXX why ??) */
  334. if ((dev->class >> 16) == PCI_BASE_CLASS_BRIDGE) {
  335. continue;
  336. }
  337. pci_addr_cache_insert_device(dev);
  338. }
  339. #ifdef DEBUG
  340. /* Verify tree built up above, echo back the list of addrs. */
  341. pci_addr_cache_print(&pci_io_addr_cache_root);
  342. #endif
  343. }
  344. /* --------------------------------------------------------------- */
  345. /* Above lies the PCI Address Cache. Below lies the EEH event infrastructure */
  346. void eeh_slot_error_detail (struct pci_dn *pdn, int severity)
  347. {
  348. unsigned long flags;
  349. int rc;
  350. /* Log the error with the rtas logger */
  351. spin_lock_irqsave(&slot_errbuf_lock, flags);
  352. memset(slot_errbuf, 0, eeh_error_buf_size);
  353. rc = rtas_call(ibm_slot_error_detail,
  354. 8, 1, NULL, pdn->eeh_config_addr,
  355. BUID_HI(pdn->phb->buid),
  356. BUID_LO(pdn->phb->buid), NULL, 0,
  357. virt_to_phys(slot_errbuf),
  358. eeh_error_buf_size,
  359. severity);
  360. if (rc == 0)
  361. log_error(slot_errbuf, ERR_TYPE_RTAS_LOG, 0);
  362. spin_unlock_irqrestore(&slot_errbuf_lock, flags);
  363. }
  364. /**
  365. * read_slot_reset_state - Read the reset state of a device node's slot
  366. * @dn: device node to read
  367. * @rets: array to return results in
  368. */
  369. static int read_slot_reset_state(struct pci_dn *pdn, int rets[])
  370. {
  371. int token, outputs;
  372. if (ibm_read_slot_reset_state2 != RTAS_UNKNOWN_SERVICE) {
  373. token = ibm_read_slot_reset_state2;
  374. outputs = 4;
  375. } else {
  376. token = ibm_read_slot_reset_state;
  377. rets[2] = 0; /* fake PE Unavailable info */
  378. outputs = 3;
  379. }
  380. return rtas_call(token, 3, outputs, rets, pdn->eeh_config_addr,
  381. BUID_HI(pdn->phb->buid), BUID_LO(pdn->phb->buid));
  382. }
  383. /**
  384. * eeh_token_to_phys - convert EEH address token to phys address
  385. * @token i/o token, should be address in the form 0xA....
  386. */
  387. static inline unsigned long eeh_token_to_phys(unsigned long token)
  388. {
  389. pte_t *ptep;
  390. unsigned long pa;
  391. ptep = find_linux_pte(init_mm.pgd, token);
  392. if (!ptep)
  393. return token;
  394. pa = pte_pfn(*ptep) << PAGE_SHIFT;
  395. return pa | (token & (PAGE_SIZE-1));
  396. }
  397. /**
  398. * Return the "partitionable endpoint" (pe) under which this device lies
  399. */
  400. static struct device_node * find_device_pe(struct device_node *dn)
  401. {
  402. while ((dn->parent) && PCI_DN(dn->parent) &&
  403. (PCI_DN(dn->parent)->eeh_mode & EEH_MODE_SUPPORTED)) {
  404. dn = dn->parent;
  405. }
  406. return dn;
  407. }
  408. /** Mark all devices that are peers of this device as failed.
  409. * Mark the device driver too, so that it can see the failure
  410. * immediately; this is critical, since some drivers poll
  411. * status registers in interrupts ... If a driver is polling,
  412. * and the slot is frozen, then the driver can deadlock in
  413. * an interrupt context, which is bad.
  414. */
  415. static inline void __eeh_mark_slot (struct device_node *dn)
  416. {
  417. while (dn) {
  418. PCI_DN(dn)->eeh_mode |= EEH_MODE_ISOLATED;
  419. if (dn->child)
  420. __eeh_mark_slot (dn->child);
  421. dn = dn->sibling;
  422. }
  423. }
  424. static inline void __eeh_clear_slot (struct device_node *dn)
  425. {
  426. while (dn) {
  427. PCI_DN(dn)->eeh_mode &= ~EEH_MODE_ISOLATED;
  428. if (dn->child)
  429. __eeh_clear_slot (dn->child);
  430. dn = dn->sibling;
  431. }
  432. }
  433. static inline void eeh_clear_slot (struct device_node *dn)
  434. {
  435. unsigned long flags;
  436. spin_lock_irqsave(&confirm_error_lock, flags);
  437. __eeh_clear_slot (dn);
  438. spin_unlock_irqrestore(&confirm_error_lock, flags);
  439. }
  440. /**
  441. * eeh_dn_check_failure - check if all 1's data is due to EEH slot freeze
  442. * @dn device node
  443. * @dev pci device, if known
  444. *
  445. * Check for an EEH failure for the given device node. Call this
  446. * routine if the result of a read was all 0xff's and you want to
  447. * find out if this is due to an EEH slot freeze. This routine
  448. * will query firmware for the EEH status.
  449. *
  450. * Returns 0 if there has not been an EEH error; otherwise returns
  451. * a non-zero value and queues up a slot isolation event notification.
  452. *
  453. * It is safe to call this routine in an interrupt context.
  454. */
  455. int eeh_dn_check_failure(struct device_node *dn, struct pci_dev *dev)
  456. {
  457. int ret;
  458. int rets[3];
  459. unsigned long flags;
  460. struct pci_dn *pdn;
  461. struct device_node *pe_dn;
  462. int rc = 0;
  463. __get_cpu_var(total_mmio_ffs)++;
  464. if (!eeh_subsystem_enabled)
  465. return 0;
  466. if (!dn) {
  467. __get_cpu_var(no_dn)++;
  468. return 0;
  469. }
  470. pdn = PCI_DN(dn);
  471. /* Access to IO BARs might get this far and still not want checking. */
  472. if (!(pdn->eeh_mode & EEH_MODE_SUPPORTED) ||
  473. pdn->eeh_mode & EEH_MODE_NOCHECK) {
  474. __get_cpu_var(ignored_check)++;
  475. #ifdef DEBUG
  476. printk ("EEH:ignored check (%x) for %s %s\n",
  477. pdn->eeh_mode, pci_name (dev), dn->full_name);
  478. #endif
  479. return 0;
  480. }
  481. if (!pdn->eeh_config_addr) {
  482. __get_cpu_var(no_cfg_addr)++;
  483. return 0;
  484. }
  485. /* If we already have a pending isolation event for this
  486. * slot, we know it's bad already, we don't need to check.
  487. * Do this checking under a lock; as multiple PCI devices
  488. * in one slot might report errors simultaneously, and we
  489. * only want one error recovery routine running.
  490. */
  491. spin_lock_irqsave(&confirm_error_lock, flags);
  492. rc = 1;
  493. if (pdn->eeh_mode & EEH_MODE_ISOLATED) {
  494. pdn->eeh_check_count ++;
  495. if (pdn->eeh_check_count >= EEH_MAX_FAILS) {
  496. printk (KERN_ERR "EEH: Device driver ignored %d bad reads, panicing\n",
  497. pdn->eeh_check_count);
  498. dump_stack();
  499. /* re-read the slot reset state */
  500. if (read_slot_reset_state(pdn, rets) != 0)
  501. rets[0] = -1; /* reset state unknown */
  502. /* If we are here, then we hit an infinite loop. Stop. */
  503. panic("EEH: MMIO halt (%d) on device:%s\n", rets[0], pci_name(dev));
  504. }
  505. goto dn_unlock;
  506. }
  507. /*
  508. * Now test for an EEH failure. This is VERY expensive.
  509. * Note that the eeh_config_addr may be a parent device
  510. * in the case of a device behind a bridge, or it may be
  511. * function zero of a multi-function device.
  512. * In any case they must share a common PHB.
  513. */
  514. ret = read_slot_reset_state(pdn, rets);
  515. /* If the call to firmware failed, punt */
  516. if (ret != 0) {
  517. printk(KERN_WARNING "EEH: read_slot_reset_state() failed; rc=%d dn=%s\n",
  518. ret, dn->full_name);
  519. __get_cpu_var(false_positives)++;
  520. rc = 0;
  521. goto dn_unlock;
  522. }
  523. /* If EEH is not supported on this device, punt. */
  524. if (rets[1] != 1) {
  525. printk(KERN_WARNING "EEH: event on unsupported device, rc=%d dn=%s\n",
  526. ret, dn->full_name);
  527. __get_cpu_var(false_positives)++;
  528. rc = 0;
  529. goto dn_unlock;
  530. }
  531. /* If not the kind of error we know about, punt. */
  532. if (rets[0] != 2 && rets[0] != 4 && rets[0] != 5) {
  533. __get_cpu_var(false_positives)++;
  534. rc = 0;
  535. goto dn_unlock;
  536. }
  537. /* Note that config-io to empty slots may fail;
  538. * we recognize empty because they don't have children. */
  539. if ((rets[0] == 5) && (dn->child == NULL)) {
  540. __get_cpu_var(false_positives)++;
  541. rc = 0;
  542. goto dn_unlock;
  543. }
  544. __get_cpu_var(slot_resets)++;
  545. /* Avoid repeated reports of this failure, including problems
  546. * with other functions on this device, and functions under
  547. * bridges. */
  548. pe_dn = find_device_pe (dn);
  549. __eeh_mark_slot (pe_dn);
  550. spin_unlock_irqrestore(&confirm_error_lock, flags);
  551. eeh_send_failure_event (dn, dev, rets[0], rets[2]);
  552. /* Most EEH events are due to device driver bugs. Having
  553. * a stack trace will help the device-driver authors figure
  554. * out what happened. So print that out. */
  555. if (rets[0] != 5) dump_stack();
  556. return 1;
  557. dn_unlock:
  558. spin_unlock_irqrestore(&confirm_error_lock, flags);
  559. return rc;
  560. }
  561. EXPORT_SYMBOL_GPL(eeh_dn_check_failure);
  562. /**
  563. * eeh_check_failure - check if all 1's data is due to EEH slot freeze
  564. * @token i/o token, should be address in the form 0xA....
  565. * @val value, should be all 1's (XXX why do we need this arg??)
  566. *
  567. * Check for an EEH failure at the given token address. Call this
  568. * routine if the result of a read was all 0xff's and you want to
  569. * find out if this is due to an EEH slot freeze event. This routine
  570. * will query firmware for the EEH status.
  571. *
  572. * Note this routine is safe to call in an interrupt context.
  573. */
  574. unsigned long eeh_check_failure(const volatile void __iomem *token, unsigned long val)
  575. {
  576. unsigned long addr;
  577. struct pci_dev *dev;
  578. struct device_node *dn;
  579. /* Finding the phys addr + pci device; this is pretty quick. */
  580. addr = eeh_token_to_phys((unsigned long __force) token);
  581. dev = pci_get_device_by_addr(addr);
  582. if (!dev) {
  583. __get_cpu_var(no_device)++;
  584. return val;
  585. }
  586. dn = pci_device_to_OF_node(dev);
  587. eeh_dn_check_failure (dn, dev);
  588. pci_dev_put(dev);
  589. return val;
  590. }
  591. EXPORT_SYMBOL(eeh_check_failure);
  592. /* ------------------------------------------------------------- */
  593. /* The code below deals with error recovery */
  594. /** Return negative value if a permanent error, else return
  595. * a number of milliseconds to wait until the PCI slot is
  596. * ready to be used.
  597. */
  598. static int
  599. eeh_slot_availability(struct pci_dn *pdn)
  600. {
  601. int rc;
  602. int rets[3];
  603. rc = read_slot_reset_state(pdn, rets);
  604. if (rc) return rc;
  605. if (rets[1] == 0) return -1; /* EEH is not supported */
  606. if (rets[0] == 0) return 0; /* Oll Korrect */
  607. if (rets[0] == 5) {
  608. if (rets[2] == 0) return -1; /* permanently unavailable */
  609. return rets[2]; /* number of millisecs to wait */
  610. }
  611. return -1;
  612. }
  613. /** rtas_pci_slot_reset raises/lowers the pci #RST line
  614. * state: 1/0 to raise/lower the #RST
  615. *
  616. * Clear the EEH-frozen condition on a slot. This routine
  617. * asserts the PCI #RST line if the 'state' argument is '1',
  618. * and drops the #RST line if 'state is '0'. This routine is
  619. * safe to call in an interrupt context.
  620. *
  621. */
  622. static void
  623. rtas_pci_slot_reset(struct pci_dn *pdn, int state)
  624. {
  625. int rc;
  626. BUG_ON (pdn==NULL);
  627. if (!pdn->phb) {
  628. printk (KERN_WARNING "EEH: in slot reset, device node %s has no phb\n",
  629. pdn->node->full_name);
  630. return;
  631. }
  632. rc = rtas_call(ibm_set_slot_reset,4,1, NULL,
  633. pdn->eeh_config_addr,
  634. BUID_HI(pdn->phb->buid),
  635. BUID_LO(pdn->phb->buid),
  636. state);
  637. if (rc) {
  638. printk (KERN_WARNING "EEH: Unable to reset the failed slot, (%d) #RST=%d dn=%s\n",
  639. rc, state, pdn->node->full_name);
  640. return;
  641. }
  642. if (state == 0)
  643. eeh_clear_slot (pdn->node->parent->child);
  644. }
  645. /** rtas_set_slot_reset -- assert the pci #RST line for 1/4 second
  646. * dn -- device node to be reset.
  647. */
  648. void
  649. rtas_set_slot_reset(struct pci_dn *pdn)
  650. {
  651. int i, rc;
  652. rtas_pci_slot_reset (pdn, 1);
  653. /* The PCI bus requires that the reset be held high for at least
  654. * a 100 milliseconds. We wait a bit longer 'just in case'. */
  655. #define PCI_BUS_RST_HOLD_TIME_MSEC 250
  656. msleep (PCI_BUS_RST_HOLD_TIME_MSEC);
  657. rtas_pci_slot_reset (pdn, 0);
  658. /* After a PCI slot has been reset, the PCI Express spec requires
  659. * a 1.5 second idle time for the bus to stabilize, before starting
  660. * up traffic. */
  661. #define PCI_BUS_SETTLE_TIME_MSEC 1800
  662. msleep (PCI_BUS_SETTLE_TIME_MSEC);
  663. /* Now double check with the firmware to make sure the device is
  664. * ready to be used; if not, wait for recovery. */
  665. for (i=0; i<10; i++) {
  666. rc = eeh_slot_availability (pdn);
  667. if (rc <= 0) break;
  668. msleep (rc+100);
  669. }
  670. }
  671. /* ------------------------------------------------------------- */
  672. /* The code below deals with enabling EEH for devices during the
  673. * early boot sequence. EEH must be enabled before any PCI probing
  674. * can be done.
  675. */
  676. #define EEH_ENABLE 1
  677. struct eeh_early_enable_info {
  678. unsigned int buid_hi;
  679. unsigned int buid_lo;
  680. };
  681. /* Enable eeh for the given device node. */
  682. static void *early_enable_eeh(struct device_node *dn, void *data)
  683. {
  684. struct eeh_early_enable_info *info = data;
  685. int ret;
  686. char *status = get_property(dn, "status", NULL);
  687. u32 *class_code = (u32 *)get_property(dn, "class-code", NULL);
  688. u32 *vendor_id = (u32 *)get_property(dn, "vendor-id", NULL);
  689. u32 *device_id = (u32 *)get_property(dn, "device-id", NULL);
  690. u32 *regs;
  691. int enable;
  692. struct pci_dn *pdn = PCI_DN(dn);
  693. pdn->eeh_mode = 0;
  694. pdn->eeh_check_count = 0;
  695. pdn->eeh_freeze_count = 0;
  696. if (status && strcmp(status, "ok") != 0)
  697. return NULL; /* ignore devices with bad status */
  698. /* Ignore bad nodes. */
  699. if (!class_code || !vendor_id || !device_id)
  700. return NULL;
  701. /* There is nothing to check on PCI to ISA bridges */
  702. if (dn->type && !strcmp(dn->type, "isa")) {
  703. pdn->eeh_mode |= EEH_MODE_NOCHECK;
  704. return NULL;
  705. }
  706. /*
  707. * Now decide if we are going to "Disable" EEH checking
  708. * for this device. We still run with the EEH hardware active,
  709. * but we won't be checking for ff's. This means a driver
  710. * could return bad data (very bad!), an interrupt handler could
  711. * hang waiting on status bits that won't change, etc.
  712. * But there are a few cases like display devices that make sense.
  713. */
  714. enable = 1; /* i.e. we will do checking */
  715. if ((*class_code >> 16) == PCI_BASE_CLASS_DISPLAY)
  716. enable = 0;
  717. if (!enable)
  718. pdn->eeh_mode |= EEH_MODE_NOCHECK;
  719. /* Ok... see if this device supports EEH. Some do, some don't,
  720. * and the only way to find out is to check each and every one. */
  721. regs = (u32 *)get_property(dn, "reg", NULL);
  722. if (regs) {
  723. /* First register entry is addr (00BBSS00) */
  724. /* Try to enable eeh */
  725. ret = rtas_call(ibm_set_eeh_option, 4, 1, NULL,
  726. regs[0], info->buid_hi, info->buid_lo,
  727. EEH_ENABLE);
  728. if (ret == 0) {
  729. eeh_subsystem_enabled = 1;
  730. pdn->eeh_mode |= EEH_MODE_SUPPORTED;
  731. pdn->eeh_config_addr = regs[0];
  732. #ifdef DEBUG
  733. printk(KERN_DEBUG "EEH: %s: eeh enabled\n", dn->full_name);
  734. #endif
  735. } else {
  736. /* This device doesn't support EEH, but it may have an
  737. * EEH parent, in which case we mark it as supported. */
  738. if (dn->parent && PCI_DN(dn->parent)
  739. && (PCI_DN(dn->parent)->eeh_mode & EEH_MODE_SUPPORTED)) {
  740. /* Parent supports EEH. */
  741. pdn->eeh_mode |= EEH_MODE_SUPPORTED;
  742. pdn->eeh_config_addr = PCI_DN(dn->parent)->eeh_config_addr;
  743. return NULL;
  744. }
  745. }
  746. } else {
  747. printk(KERN_WARNING "EEH: %s: unable to get reg property.\n",
  748. dn->full_name);
  749. }
  750. return NULL;
  751. }
  752. /*
  753. * Initialize EEH by trying to enable it for all of the adapters in the system.
  754. * As a side effect we can determine here if eeh is supported at all.
  755. * Note that we leave EEH on so failed config cycles won't cause a machine
  756. * check. If a user turns off EEH for a particular adapter they are really
  757. * telling Linux to ignore errors. Some hardware (e.g. POWER5) won't
  758. * grant access to a slot if EEH isn't enabled, and so we always enable
  759. * EEH for all slots/all devices.
  760. *
  761. * The eeh-force-off option disables EEH checking globally, for all slots.
  762. * Even if force-off is set, the EEH hardware is still enabled, so that
  763. * newer systems can boot.
  764. */
  765. void __init eeh_init(void)
  766. {
  767. struct device_node *phb, *np;
  768. struct eeh_early_enable_info info;
  769. spin_lock_init(&confirm_error_lock);
  770. spin_lock_init(&slot_errbuf_lock);
  771. np = of_find_node_by_path("/rtas");
  772. if (np == NULL)
  773. return;
  774. ibm_set_eeh_option = rtas_token("ibm,set-eeh-option");
  775. ibm_set_slot_reset = rtas_token("ibm,set-slot-reset");
  776. ibm_read_slot_reset_state2 = rtas_token("ibm,read-slot-reset-state2");
  777. ibm_read_slot_reset_state = rtas_token("ibm,read-slot-reset-state");
  778. ibm_slot_error_detail = rtas_token("ibm,slot-error-detail");
  779. if (ibm_set_eeh_option == RTAS_UNKNOWN_SERVICE)
  780. return;
  781. eeh_error_buf_size = rtas_token("rtas-error-log-max");
  782. if (eeh_error_buf_size == RTAS_UNKNOWN_SERVICE) {
  783. eeh_error_buf_size = 1024;
  784. }
  785. if (eeh_error_buf_size > RTAS_ERROR_LOG_MAX) {
  786. printk(KERN_WARNING "EEH: rtas-error-log-max is bigger than allocated "
  787. "buffer ! (%d vs %d)", eeh_error_buf_size, RTAS_ERROR_LOG_MAX);
  788. eeh_error_buf_size = RTAS_ERROR_LOG_MAX;
  789. }
  790. /* Enable EEH for all adapters. Note that eeh requires buid's */
  791. for (phb = of_find_node_by_name(NULL, "pci"); phb;
  792. phb = of_find_node_by_name(phb, "pci")) {
  793. unsigned long buid;
  794. buid = get_phb_buid(phb);
  795. if (buid == 0 || PCI_DN(phb) == NULL)
  796. continue;
  797. info.buid_lo = BUID_LO(buid);
  798. info.buid_hi = BUID_HI(buid);
  799. traverse_pci_devices(phb, early_enable_eeh, &info);
  800. }
  801. if (eeh_subsystem_enabled)
  802. printk(KERN_INFO "EEH: PCI Enhanced I/O Error Handling Enabled\n");
  803. else
  804. printk(KERN_WARNING "EEH: No capable adapters found\n");
  805. }
  806. /**
  807. * eeh_add_device_early - enable EEH for the indicated device_node
  808. * @dn: device node for which to set up EEH
  809. *
  810. * This routine must be used to perform EEH initialization for PCI
  811. * devices that were added after system boot (e.g. hotplug, dlpar).
  812. * This routine must be called before any i/o is performed to the
  813. * adapter (inluding any config-space i/o).
  814. * Whether this actually enables EEH or not for this device depends
  815. * on the CEC architecture, type of the device, on earlier boot
  816. * command-line arguments & etc.
  817. */
  818. void eeh_add_device_early(struct device_node *dn)
  819. {
  820. struct pci_controller *phb;
  821. struct eeh_early_enable_info info;
  822. if (!dn || !PCI_DN(dn))
  823. return;
  824. phb = PCI_DN(dn)->phb;
  825. if (NULL == phb || 0 == phb->buid) {
  826. printk(KERN_WARNING "EEH: Expected buid but found none for %s\n",
  827. dn->full_name);
  828. dump_stack();
  829. return;
  830. }
  831. info.buid_hi = BUID_HI(phb->buid);
  832. info.buid_lo = BUID_LO(phb->buid);
  833. early_enable_eeh(dn, &info);
  834. }
  835. EXPORT_SYMBOL_GPL(eeh_add_device_early);
  836. /**
  837. * eeh_add_device_late - perform EEH initialization for the indicated pci device
  838. * @dev: pci device for which to set up EEH
  839. *
  840. * This routine must be used to complete EEH initialization for PCI
  841. * devices that were added after system boot (e.g. hotplug, dlpar).
  842. */
  843. void eeh_add_device_late(struct pci_dev *dev)
  844. {
  845. struct device_node *dn;
  846. if (!dev || !eeh_subsystem_enabled)
  847. return;
  848. #ifdef DEBUG
  849. printk(KERN_DEBUG "EEH: adding device %s\n", pci_name(dev));
  850. #endif
  851. pci_dev_get (dev);
  852. dn = pci_device_to_OF_node(dev);
  853. PCI_DN(dn)->pcidev = dev;
  854. pci_addr_cache_insert_device (dev);
  855. }
  856. EXPORT_SYMBOL_GPL(eeh_add_device_late);
  857. /**
  858. * eeh_remove_device - undo EEH setup for the indicated pci device
  859. * @dev: pci device to be removed
  860. *
  861. * This routine should be when a device is removed from a running
  862. * system (e.g. by hotplug or dlpar).
  863. */
  864. void eeh_remove_device(struct pci_dev *dev)
  865. {
  866. struct device_node *dn;
  867. if (!dev || !eeh_subsystem_enabled)
  868. return;
  869. /* Unregister the device with the EEH/PCI address search system */
  870. #ifdef DEBUG
  871. printk(KERN_DEBUG "EEH: remove device %s\n", pci_name(dev));
  872. #endif
  873. pci_addr_cache_remove_device(dev);
  874. dn = pci_device_to_OF_node(dev);
  875. PCI_DN(dn)->pcidev = NULL;
  876. pci_dev_put (dev);
  877. }
  878. EXPORT_SYMBOL_GPL(eeh_remove_device);
  879. static int proc_eeh_show(struct seq_file *m, void *v)
  880. {
  881. unsigned int cpu;
  882. unsigned long ffs = 0, positives = 0, failures = 0;
  883. unsigned long resets = 0;
  884. unsigned long no_dev = 0, no_dn = 0, no_cfg = 0, no_check = 0;
  885. for_each_cpu(cpu) {
  886. ffs += per_cpu(total_mmio_ffs, cpu);
  887. positives += per_cpu(false_positives, cpu);
  888. failures += per_cpu(ignored_failures, cpu);
  889. resets += per_cpu(slot_resets, cpu);
  890. no_dev += per_cpu(no_device, cpu);
  891. no_dn += per_cpu(no_dn, cpu);
  892. no_cfg += per_cpu(no_cfg_addr, cpu);
  893. no_check += per_cpu(ignored_check, cpu);
  894. }
  895. if (0 == eeh_subsystem_enabled) {
  896. seq_printf(m, "EEH Subsystem is globally disabled\n");
  897. seq_printf(m, "eeh_total_mmio_ffs=%ld\n", ffs);
  898. } else {
  899. seq_printf(m, "EEH Subsystem is enabled\n");
  900. seq_printf(m,
  901. "no device=%ld\n"
  902. "no device node=%ld\n"
  903. "no config address=%ld\n"
  904. "check not wanted=%ld\n"
  905. "eeh_total_mmio_ffs=%ld\n"
  906. "eeh_false_positives=%ld\n"
  907. "eeh_ignored_failures=%ld\n"
  908. "eeh_slot_resets=%ld\n",
  909. no_dev, no_dn, no_cfg, no_check,
  910. ffs, positives, failures, resets);
  911. }
  912. return 0;
  913. }
  914. static int proc_eeh_open(struct inode *inode, struct file *file)
  915. {
  916. return single_open(file, proc_eeh_show, NULL);
  917. }
  918. static struct file_operations proc_eeh_operations = {
  919. .open = proc_eeh_open,
  920. .read = seq_read,
  921. .llseek = seq_lseek,
  922. .release = single_release,
  923. };
  924. static int __init eeh_init_proc(void)
  925. {
  926. struct proc_dir_entry *e;
  927. if (systemcfg->platform & PLATFORM_PSERIES) {
  928. e = create_proc_entry("ppc64/eeh", 0, NULL);
  929. if (e)
  930. e->proc_fops = &proc_eeh_operations;
  931. }
  932. return 0;
  933. }
  934. __initcall(eeh_init_proc);