eeh.c 34 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196
  1. /*
  2. * eeh.c
  3. * Copyright (C) 2001 Dave Engebretsen & Todd Inglett IBM Corporation
  4. *
  5. * This program is free software; you can redistribute it and/or modify
  6. * it under the terms of the GNU General Public License as published by
  7. * the Free Software Foundation; either version 2 of the License, or
  8. * (at your option) any later version.
  9. *
  10. * This program is distributed in the hope that it will be useful,
  11. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  12. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  13. * GNU General Public License for more details.
  14. *
  15. * You should have received a copy of the GNU General Public License
  16. * along with this program; if not, write to the Free Software
  17. * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
  18. */
  19. #include <linux/delay.h>
  20. #include <linux/init.h>
  21. #include <linux/list.h>
  22. #include <linux/pci.h>
  23. #include <linux/proc_fs.h>
  24. #include <linux/rbtree.h>
  25. #include <linux/seq_file.h>
  26. #include <linux/spinlock.h>
  27. #include <asm/atomic.h>
  28. #include <asm/eeh.h>
  29. #include <asm/eeh_event.h>
  30. #include <asm/io.h>
  31. #include <asm/machdep.h>
  32. #include <asm/ppc-pci.h>
  33. #include <asm/rtas.h>
  34. #undef DEBUG
  35. /** Overview:
  36. * EEH, or "Extended Error Handling" is a PCI bridge technology for
  37. * dealing with PCI bus errors that can't be dealt with within the
  38. * usual PCI framework, except by check-stopping the CPU. Systems
  39. * that are designed for high-availability/reliability cannot afford
  40. * to crash due to a "mere" PCI error, thus the need for EEH.
  41. * An EEH-capable bridge operates by converting a detected error
  42. * into a "slot freeze", taking the PCI adapter off-line, making
  43. * the slot behave, from the OS'es point of view, as if the slot
  44. * were "empty": all reads return 0xff's and all writes are silently
  45. * ignored. EEH slot isolation events can be triggered by parity
  46. * errors on the address or data busses (e.g. during posted writes),
  47. * which in turn might be caused by low voltage on the bus, dust,
  48. * vibration, humidity, radioactivity or plain-old failed hardware.
  49. *
  50. * Note, however, that one of the leading causes of EEH slot
  51. * freeze events are buggy device drivers, buggy device microcode,
  52. * or buggy device hardware. This is because any attempt by the
  53. * device to bus-master data to a memory address that is not
  54. * assigned to the device will trigger a slot freeze. (The idea
  55. * is to prevent devices-gone-wild from corrupting system memory).
  56. * Buggy hardware/drivers will have a miserable time co-existing
  57. * with EEH.
  58. *
  59. * Ideally, a PCI device driver, when suspecting that an isolation
  60. * event has occured (e.g. by reading 0xff's), will then ask EEH
  61. * whether this is the case, and then take appropriate steps to
  62. * reset the PCI slot, the PCI device, and then resume operations.
  63. * However, until that day, the checking is done here, with the
  64. * eeh_check_failure() routine embedded in the MMIO macros. If
  65. * the slot is found to be isolated, an "EEH Event" is synthesized
  66. * and sent out for processing.
  67. */
  68. /* If a device driver keeps reading an MMIO register in an interrupt
  69. * handler after a slot isolation event has occurred, we assume it
  70. * is broken and panic. This sets the threshold for how many read
  71. * attempts we allow before panicking.
  72. */
  73. #define EEH_MAX_FAILS 100000
  74. /* Misc forward declaraions */
  75. static void eeh_save_bars(struct pci_dev * pdev, struct pci_dn *pdn);
  76. /* RTAS tokens */
  77. static int ibm_set_eeh_option;
  78. static int ibm_set_slot_reset;
  79. static int ibm_read_slot_reset_state;
  80. static int ibm_read_slot_reset_state2;
  81. static int ibm_slot_error_detail;
  82. static int eeh_subsystem_enabled;
  83. /* Lock to avoid races due to multiple reports of an error */
  84. static DEFINE_SPINLOCK(confirm_error_lock);
  85. /* Buffer for reporting slot-error-detail rtas calls */
  86. static unsigned char slot_errbuf[RTAS_ERROR_LOG_MAX];
  87. static DEFINE_SPINLOCK(slot_errbuf_lock);
  88. static int eeh_error_buf_size;
  89. /* System monitoring statistics */
  90. static DEFINE_PER_CPU(unsigned long, no_device);
  91. static DEFINE_PER_CPU(unsigned long, no_dn);
  92. static DEFINE_PER_CPU(unsigned long, no_cfg_addr);
  93. static DEFINE_PER_CPU(unsigned long, ignored_check);
  94. static DEFINE_PER_CPU(unsigned long, total_mmio_ffs);
  95. static DEFINE_PER_CPU(unsigned long, false_positives);
  96. static DEFINE_PER_CPU(unsigned long, ignored_failures);
  97. static DEFINE_PER_CPU(unsigned long, slot_resets);
  98. /**
  99. * The pci address cache subsystem. This subsystem places
  100. * PCI device address resources into a red-black tree, sorted
  101. * according to the address range, so that given only an i/o
  102. * address, the corresponding PCI device can be **quickly**
  103. * found. It is safe to perform an address lookup in an interrupt
  104. * context; this ability is an important feature.
  105. *
  106. * Currently, the only customer of this code is the EEH subsystem;
  107. * thus, this code has been somewhat tailored to suit EEH better.
  108. * In particular, the cache does *not* hold the addresses of devices
  109. * for which EEH is not enabled.
  110. *
  111. * (Implementation Note: The RB tree seems to be better/faster
  112. * than any hash algo I could think of for this problem, even
  113. * with the penalty of slow pointer chases for d-cache misses).
  114. */
  115. struct pci_io_addr_range
  116. {
  117. struct rb_node rb_node;
  118. unsigned long addr_lo;
  119. unsigned long addr_hi;
  120. struct pci_dev *pcidev;
  121. unsigned int flags;
  122. };
  123. static struct pci_io_addr_cache
  124. {
  125. struct rb_root rb_root;
  126. spinlock_t piar_lock;
  127. } pci_io_addr_cache_root;
  128. static inline struct pci_dev *__pci_get_device_by_addr(unsigned long addr)
  129. {
  130. struct rb_node *n = pci_io_addr_cache_root.rb_root.rb_node;
  131. while (n) {
  132. struct pci_io_addr_range *piar;
  133. piar = rb_entry(n, struct pci_io_addr_range, rb_node);
  134. if (addr < piar->addr_lo) {
  135. n = n->rb_left;
  136. } else {
  137. if (addr > piar->addr_hi) {
  138. n = n->rb_right;
  139. } else {
  140. pci_dev_get(piar->pcidev);
  141. return piar->pcidev;
  142. }
  143. }
  144. }
  145. return NULL;
  146. }
  147. /**
  148. * pci_get_device_by_addr - Get device, given only address
  149. * @addr: mmio (PIO) phys address or i/o port number
  150. *
  151. * Given an mmio phys address, or a port number, find a pci device
  152. * that implements this address. Be sure to pci_dev_put the device
  153. * when finished. I/O port numbers are assumed to be offset
  154. * from zero (that is, they do *not* have pci_io_addr added in).
  155. * It is safe to call this function within an interrupt.
  156. */
  157. static struct pci_dev *pci_get_device_by_addr(unsigned long addr)
  158. {
  159. struct pci_dev *dev;
  160. unsigned long flags;
  161. spin_lock_irqsave(&pci_io_addr_cache_root.piar_lock, flags);
  162. dev = __pci_get_device_by_addr(addr);
  163. spin_unlock_irqrestore(&pci_io_addr_cache_root.piar_lock, flags);
  164. return dev;
  165. }
  166. #ifdef DEBUG
  167. /*
  168. * Handy-dandy debug print routine, does nothing more
  169. * than print out the contents of our addr cache.
  170. */
  171. static void pci_addr_cache_print(struct pci_io_addr_cache *cache)
  172. {
  173. struct rb_node *n;
  174. int cnt = 0;
  175. n = rb_first(&cache->rb_root);
  176. while (n) {
  177. struct pci_io_addr_range *piar;
  178. piar = rb_entry(n, struct pci_io_addr_range, rb_node);
  179. printk(KERN_DEBUG "PCI: %s addr range %d [%lx-%lx]: %s\n",
  180. (piar->flags & IORESOURCE_IO) ? "i/o" : "mem", cnt,
  181. piar->addr_lo, piar->addr_hi, pci_name(piar->pcidev));
  182. cnt++;
  183. n = rb_next(n);
  184. }
  185. }
  186. #endif
  187. /* Insert address range into the rb tree. */
  188. static struct pci_io_addr_range *
  189. pci_addr_cache_insert(struct pci_dev *dev, unsigned long alo,
  190. unsigned long ahi, unsigned int flags)
  191. {
  192. struct rb_node **p = &pci_io_addr_cache_root.rb_root.rb_node;
  193. struct rb_node *parent = NULL;
  194. struct pci_io_addr_range *piar;
  195. /* Walk tree, find a place to insert into tree */
  196. while (*p) {
  197. parent = *p;
  198. piar = rb_entry(parent, struct pci_io_addr_range, rb_node);
  199. if (ahi < piar->addr_lo) {
  200. p = &parent->rb_left;
  201. } else if (alo > piar->addr_hi) {
  202. p = &parent->rb_right;
  203. } else {
  204. if (dev != piar->pcidev ||
  205. alo != piar->addr_lo || ahi != piar->addr_hi) {
  206. printk(KERN_WARNING "PIAR: overlapping address range\n");
  207. }
  208. return piar;
  209. }
  210. }
  211. piar = (struct pci_io_addr_range *)kmalloc(sizeof(struct pci_io_addr_range), GFP_ATOMIC);
  212. if (!piar)
  213. return NULL;
  214. piar->addr_lo = alo;
  215. piar->addr_hi = ahi;
  216. piar->pcidev = dev;
  217. piar->flags = flags;
  218. #ifdef DEBUG
  219. printk(KERN_DEBUG "PIAR: insert range=[%lx:%lx] dev=%s\n",
  220. alo, ahi, pci_name (dev));
  221. #endif
  222. rb_link_node(&piar->rb_node, parent, p);
  223. rb_insert_color(&piar->rb_node, &pci_io_addr_cache_root.rb_root);
  224. return piar;
  225. }
  226. static void __pci_addr_cache_insert_device(struct pci_dev *dev)
  227. {
  228. struct device_node *dn;
  229. struct pci_dn *pdn;
  230. int i;
  231. int inserted = 0;
  232. dn = pci_device_to_OF_node(dev);
  233. if (!dn) {
  234. printk(KERN_WARNING "PCI: no pci dn found for dev=%s\n", pci_name(dev));
  235. return;
  236. }
  237. /* Skip any devices for which EEH is not enabled. */
  238. pdn = PCI_DN(dn);
  239. if (!(pdn->eeh_mode & EEH_MODE_SUPPORTED) ||
  240. pdn->eeh_mode & EEH_MODE_NOCHECK) {
  241. #ifdef DEBUG
  242. printk(KERN_INFO "PCI: skip building address cache for=%s - %s\n",
  243. pci_name(dev), pdn->node->full_name);
  244. #endif
  245. return;
  246. }
  247. /* The cache holds a reference to the device... */
  248. pci_dev_get(dev);
  249. /* Walk resources on this device, poke them into the tree */
  250. for (i = 0; i < DEVICE_COUNT_RESOURCE; i++) {
  251. unsigned long start = pci_resource_start(dev,i);
  252. unsigned long end = pci_resource_end(dev,i);
  253. unsigned int flags = pci_resource_flags(dev,i);
  254. /* We are interested only bus addresses, not dma or other stuff */
  255. if (0 == (flags & (IORESOURCE_IO | IORESOURCE_MEM)))
  256. continue;
  257. if (start == 0 || ~start == 0 || end == 0 || ~end == 0)
  258. continue;
  259. pci_addr_cache_insert(dev, start, end, flags);
  260. inserted = 1;
  261. }
  262. /* If there was nothing to add, the cache has no reference... */
  263. if (!inserted)
  264. pci_dev_put(dev);
  265. }
  266. /**
  267. * pci_addr_cache_insert_device - Add a device to the address cache
  268. * @dev: PCI device whose I/O addresses we are interested in.
  269. *
  270. * In order to support the fast lookup of devices based on addresses,
  271. * we maintain a cache of devices that can be quickly searched.
  272. * This routine adds a device to that cache.
  273. */
  274. static void pci_addr_cache_insert_device(struct pci_dev *dev)
  275. {
  276. unsigned long flags;
  277. spin_lock_irqsave(&pci_io_addr_cache_root.piar_lock, flags);
  278. __pci_addr_cache_insert_device(dev);
  279. spin_unlock_irqrestore(&pci_io_addr_cache_root.piar_lock, flags);
  280. }
  281. static inline void __pci_addr_cache_remove_device(struct pci_dev *dev)
  282. {
  283. struct rb_node *n;
  284. int removed = 0;
  285. restart:
  286. n = rb_first(&pci_io_addr_cache_root.rb_root);
  287. while (n) {
  288. struct pci_io_addr_range *piar;
  289. piar = rb_entry(n, struct pci_io_addr_range, rb_node);
  290. if (piar->pcidev == dev) {
  291. rb_erase(n, &pci_io_addr_cache_root.rb_root);
  292. removed = 1;
  293. kfree(piar);
  294. goto restart;
  295. }
  296. n = rb_next(n);
  297. }
  298. /* The cache no longer holds its reference to this device... */
  299. if (removed)
  300. pci_dev_put(dev);
  301. }
  302. /**
  303. * pci_addr_cache_remove_device - remove pci device from addr cache
  304. * @dev: device to remove
  305. *
  306. * Remove a device from the addr-cache tree.
  307. * This is potentially expensive, since it will walk
  308. * the tree multiple times (once per resource).
  309. * But so what; device removal doesn't need to be that fast.
  310. */
  311. static void pci_addr_cache_remove_device(struct pci_dev *dev)
  312. {
  313. unsigned long flags;
  314. spin_lock_irqsave(&pci_io_addr_cache_root.piar_lock, flags);
  315. __pci_addr_cache_remove_device(dev);
  316. spin_unlock_irqrestore(&pci_io_addr_cache_root.piar_lock, flags);
  317. }
  318. /**
  319. * pci_addr_cache_build - Build a cache of I/O addresses
  320. *
  321. * Build a cache of pci i/o addresses. This cache will be used to
  322. * find the pci device that corresponds to a given address.
  323. * This routine scans all pci busses to build the cache.
  324. * Must be run late in boot process, after the pci controllers
  325. * have been scaned for devices (after all device resources are known).
  326. */
  327. void __init pci_addr_cache_build(void)
  328. {
  329. struct device_node *dn;
  330. struct pci_dev *dev = NULL;
  331. if (!eeh_subsystem_enabled)
  332. return;
  333. spin_lock_init(&pci_io_addr_cache_root.piar_lock);
  334. while ((dev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev)) != NULL) {
  335. /* Ignore PCI bridges ( XXX why ??) */
  336. if ((dev->class >> 16) == PCI_BASE_CLASS_BRIDGE) {
  337. continue;
  338. }
  339. pci_addr_cache_insert_device(dev);
  340. /* Save the BAR's; firmware doesn't restore these after EEH reset */
  341. dn = pci_device_to_OF_node(dev);
  342. eeh_save_bars(dev, PCI_DN(dn));
  343. }
  344. #ifdef DEBUG
  345. /* Verify tree built up above, echo back the list of addrs. */
  346. pci_addr_cache_print(&pci_io_addr_cache_root);
  347. #endif
  348. }
  349. /* --------------------------------------------------------------- */
  350. /* Above lies the PCI Address Cache. Below lies the EEH event infrastructure */
  351. void eeh_slot_error_detail (struct pci_dn *pdn, int severity)
  352. {
  353. unsigned long flags;
  354. int rc;
  355. /* Log the error with the rtas logger */
  356. spin_lock_irqsave(&slot_errbuf_lock, flags);
  357. memset(slot_errbuf, 0, eeh_error_buf_size);
  358. rc = rtas_call(ibm_slot_error_detail,
  359. 8, 1, NULL, pdn->eeh_config_addr,
  360. BUID_HI(pdn->phb->buid),
  361. BUID_LO(pdn->phb->buid), NULL, 0,
  362. virt_to_phys(slot_errbuf),
  363. eeh_error_buf_size,
  364. severity);
  365. if (rc == 0)
  366. log_error(slot_errbuf, ERR_TYPE_RTAS_LOG, 0);
  367. spin_unlock_irqrestore(&slot_errbuf_lock, flags);
  368. }
  369. /**
  370. * read_slot_reset_state - Read the reset state of a device node's slot
  371. * @dn: device node to read
  372. * @rets: array to return results in
  373. */
  374. static int read_slot_reset_state(struct pci_dn *pdn, int rets[])
  375. {
  376. int token, outputs;
  377. if (ibm_read_slot_reset_state2 != RTAS_UNKNOWN_SERVICE) {
  378. token = ibm_read_slot_reset_state2;
  379. outputs = 4;
  380. } else {
  381. token = ibm_read_slot_reset_state;
  382. rets[2] = 0; /* fake PE Unavailable info */
  383. outputs = 3;
  384. }
  385. return rtas_call(token, 3, outputs, rets, pdn->eeh_config_addr,
  386. BUID_HI(pdn->phb->buid), BUID_LO(pdn->phb->buid));
  387. }
  388. /**
  389. * eeh_token_to_phys - convert EEH address token to phys address
  390. * @token i/o token, should be address in the form 0xA....
  391. */
  392. static inline unsigned long eeh_token_to_phys(unsigned long token)
  393. {
  394. pte_t *ptep;
  395. unsigned long pa;
  396. ptep = find_linux_pte(init_mm.pgd, token);
  397. if (!ptep)
  398. return token;
  399. pa = pte_pfn(*ptep) << PAGE_SHIFT;
  400. return pa | (token & (PAGE_SIZE-1));
  401. }
  402. /**
  403. * Return the "partitionable endpoint" (pe) under which this device lies
  404. */
  405. static struct device_node * find_device_pe(struct device_node *dn)
  406. {
  407. while ((dn->parent) && PCI_DN(dn->parent) &&
  408. (PCI_DN(dn->parent)->eeh_mode & EEH_MODE_SUPPORTED)) {
  409. dn = dn->parent;
  410. }
  411. return dn;
  412. }
  413. /** Mark all devices that are peers of this device as failed.
  414. * Mark the device driver too, so that it can see the failure
  415. * immediately; this is critical, since some drivers poll
  416. * status registers in interrupts ... If a driver is polling,
  417. * and the slot is frozen, then the driver can deadlock in
  418. * an interrupt context, which is bad.
  419. */
  420. static void __eeh_mark_slot (struct device_node *dn)
  421. {
  422. while (dn) {
  423. PCI_DN(dn)->eeh_mode |= EEH_MODE_ISOLATED;
  424. if (dn->child)
  425. __eeh_mark_slot (dn->child);
  426. dn = dn->sibling;
  427. }
  428. }
  429. static void __eeh_clear_slot (struct device_node *dn)
  430. {
  431. while (dn) {
  432. PCI_DN(dn)->eeh_mode &= ~EEH_MODE_ISOLATED;
  433. if (dn->child)
  434. __eeh_clear_slot (dn->child);
  435. dn = dn->sibling;
  436. }
  437. }
  438. static inline void eeh_clear_slot (struct device_node *dn)
  439. {
  440. unsigned long flags;
  441. spin_lock_irqsave(&confirm_error_lock, flags);
  442. __eeh_clear_slot (dn);
  443. spin_unlock_irqrestore(&confirm_error_lock, flags);
  444. }
  445. /**
  446. * eeh_dn_check_failure - check if all 1's data is due to EEH slot freeze
  447. * @dn device node
  448. * @dev pci device, if known
  449. *
  450. * Check for an EEH failure for the given device node. Call this
  451. * routine if the result of a read was all 0xff's and you want to
  452. * find out if this is due to an EEH slot freeze. This routine
  453. * will query firmware for the EEH status.
  454. *
  455. * Returns 0 if there has not been an EEH error; otherwise returns
  456. * a non-zero value and queues up a slot isolation event notification.
  457. *
  458. * It is safe to call this routine in an interrupt context.
  459. */
  460. int eeh_dn_check_failure(struct device_node *dn, struct pci_dev *dev)
  461. {
  462. int ret;
  463. int rets[3];
  464. unsigned long flags;
  465. struct pci_dn *pdn;
  466. struct device_node *pe_dn;
  467. int rc = 0;
  468. __get_cpu_var(total_mmio_ffs)++;
  469. if (!eeh_subsystem_enabled)
  470. return 0;
  471. if (!dn) {
  472. __get_cpu_var(no_dn)++;
  473. return 0;
  474. }
  475. pdn = PCI_DN(dn);
  476. /* Access to IO BARs might get this far and still not want checking. */
  477. if (!(pdn->eeh_mode & EEH_MODE_SUPPORTED) ||
  478. pdn->eeh_mode & EEH_MODE_NOCHECK) {
  479. __get_cpu_var(ignored_check)++;
  480. #ifdef DEBUG
  481. printk ("EEH:ignored check (%x) for %s %s\n",
  482. pdn->eeh_mode, pci_name (dev), dn->full_name);
  483. #endif
  484. return 0;
  485. }
  486. if (!pdn->eeh_config_addr) {
  487. __get_cpu_var(no_cfg_addr)++;
  488. return 0;
  489. }
  490. /* If we already have a pending isolation event for this
  491. * slot, we know it's bad already, we don't need to check.
  492. * Do this checking under a lock; as multiple PCI devices
  493. * in one slot might report errors simultaneously, and we
  494. * only want one error recovery routine running.
  495. */
  496. spin_lock_irqsave(&confirm_error_lock, flags);
  497. rc = 1;
  498. if (pdn->eeh_mode & EEH_MODE_ISOLATED) {
  499. pdn->eeh_check_count ++;
  500. if (pdn->eeh_check_count >= EEH_MAX_FAILS) {
  501. printk (KERN_ERR "EEH: Device driver ignored %d bad reads, panicing\n",
  502. pdn->eeh_check_count);
  503. dump_stack();
  504. /* re-read the slot reset state */
  505. if (read_slot_reset_state(pdn, rets) != 0)
  506. rets[0] = -1; /* reset state unknown */
  507. /* If we are here, then we hit an infinite loop. Stop. */
  508. panic("EEH: MMIO halt (%d) on device:%s\n", rets[0], pci_name(dev));
  509. }
  510. goto dn_unlock;
  511. }
  512. /*
  513. * Now test for an EEH failure. This is VERY expensive.
  514. * Note that the eeh_config_addr may be a parent device
  515. * in the case of a device behind a bridge, or it may be
  516. * function zero of a multi-function device.
  517. * In any case they must share a common PHB.
  518. */
  519. ret = read_slot_reset_state(pdn, rets);
  520. /* If the call to firmware failed, punt */
  521. if (ret != 0) {
  522. printk(KERN_WARNING "EEH: read_slot_reset_state() failed; rc=%d dn=%s\n",
  523. ret, dn->full_name);
  524. __get_cpu_var(false_positives)++;
  525. rc = 0;
  526. goto dn_unlock;
  527. }
  528. /* If EEH is not supported on this device, punt. */
  529. if (rets[1] != 1) {
  530. printk(KERN_WARNING "EEH: event on unsupported device, rc=%d dn=%s\n",
  531. ret, dn->full_name);
  532. __get_cpu_var(false_positives)++;
  533. rc = 0;
  534. goto dn_unlock;
  535. }
  536. /* If not the kind of error we know about, punt. */
  537. if (rets[0] != 2 && rets[0] != 4 && rets[0] != 5) {
  538. __get_cpu_var(false_positives)++;
  539. rc = 0;
  540. goto dn_unlock;
  541. }
  542. /* Note that config-io to empty slots may fail;
  543. * we recognize empty because they don't have children. */
  544. if ((rets[0] == 5) && (dn->child == NULL)) {
  545. __get_cpu_var(false_positives)++;
  546. rc = 0;
  547. goto dn_unlock;
  548. }
  549. __get_cpu_var(slot_resets)++;
  550. /* Avoid repeated reports of this failure, including problems
  551. * with other functions on this device, and functions under
  552. * bridges. */
  553. pe_dn = find_device_pe (dn);
  554. __eeh_mark_slot (pe_dn);
  555. spin_unlock_irqrestore(&confirm_error_lock, flags);
  556. eeh_send_failure_event (dn, dev, rets[0], rets[2]);
  557. /* Most EEH events are due to device driver bugs. Having
  558. * a stack trace will help the device-driver authors figure
  559. * out what happened. So print that out. */
  560. if (rets[0] != 5) dump_stack();
  561. return 1;
  562. dn_unlock:
  563. spin_unlock_irqrestore(&confirm_error_lock, flags);
  564. return rc;
  565. }
  566. EXPORT_SYMBOL_GPL(eeh_dn_check_failure);
  567. /**
  568. * eeh_check_failure - check if all 1's data is due to EEH slot freeze
  569. * @token i/o token, should be address in the form 0xA....
  570. * @val value, should be all 1's (XXX why do we need this arg??)
  571. *
  572. * Check for an EEH failure at the given token address. Call this
  573. * routine if the result of a read was all 0xff's and you want to
  574. * find out if this is due to an EEH slot freeze event. This routine
  575. * will query firmware for the EEH status.
  576. *
  577. * Note this routine is safe to call in an interrupt context.
  578. */
  579. unsigned long eeh_check_failure(const volatile void __iomem *token, unsigned long val)
  580. {
  581. unsigned long addr;
  582. struct pci_dev *dev;
  583. struct device_node *dn;
  584. /* Finding the phys addr + pci device; this is pretty quick. */
  585. addr = eeh_token_to_phys((unsigned long __force) token);
  586. dev = pci_get_device_by_addr(addr);
  587. if (!dev) {
  588. __get_cpu_var(no_device)++;
  589. return val;
  590. }
  591. dn = pci_device_to_OF_node(dev);
  592. eeh_dn_check_failure (dn, dev);
  593. pci_dev_put(dev);
  594. return val;
  595. }
  596. EXPORT_SYMBOL(eeh_check_failure);
  597. /* ------------------------------------------------------------- */
  598. /* The code below deals with error recovery */
  599. /** Return negative value if a permanent error, else return
  600. * a number of milliseconds to wait until the PCI slot is
  601. * ready to be used.
  602. */
  603. static int
  604. eeh_slot_availability(struct pci_dn *pdn)
  605. {
  606. int rc;
  607. int rets[3];
  608. rc = read_slot_reset_state(pdn, rets);
  609. if (rc) return rc;
  610. if (rets[1] == 0) return -1; /* EEH is not supported */
  611. if (rets[0] == 0) return 0; /* Oll Korrect */
  612. if (rets[0] == 5) {
  613. if (rets[2] == 0) return -1; /* permanently unavailable */
  614. return rets[2]; /* number of millisecs to wait */
  615. }
  616. return -1;
  617. }
  618. /** rtas_pci_slot_reset raises/lowers the pci #RST line
  619. * state: 1/0 to raise/lower the #RST
  620. *
  621. * Clear the EEH-frozen condition on a slot. This routine
  622. * asserts the PCI #RST line if the 'state' argument is '1',
  623. * and drops the #RST line if 'state is '0'. This routine is
  624. * safe to call in an interrupt context.
  625. *
  626. */
  627. static void
  628. rtas_pci_slot_reset(struct pci_dn *pdn, int state)
  629. {
  630. int rc;
  631. BUG_ON (pdn==NULL);
  632. if (!pdn->phb) {
  633. printk (KERN_WARNING "EEH: in slot reset, device node %s has no phb\n",
  634. pdn->node->full_name);
  635. return;
  636. }
  637. rc = rtas_call(ibm_set_slot_reset,4,1, NULL,
  638. pdn->eeh_config_addr,
  639. BUID_HI(pdn->phb->buid),
  640. BUID_LO(pdn->phb->buid),
  641. state);
  642. if (rc) {
  643. printk (KERN_WARNING "EEH: Unable to reset the failed slot, (%d) #RST=%d dn=%s\n",
  644. rc, state, pdn->node->full_name);
  645. return;
  646. }
  647. if (state == 0)
  648. eeh_clear_slot (pdn->node->parent->child);
  649. }
  650. /** rtas_set_slot_reset -- assert the pci #RST line for 1/4 second
  651. * dn -- device node to be reset.
  652. */
  653. void
  654. rtas_set_slot_reset(struct pci_dn *pdn)
  655. {
  656. int i, rc;
  657. rtas_pci_slot_reset (pdn, 1);
  658. /* The PCI bus requires that the reset be held high for at least
  659. * a 100 milliseconds. We wait a bit longer 'just in case'. */
  660. #define PCI_BUS_RST_HOLD_TIME_MSEC 250
  661. msleep (PCI_BUS_RST_HOLD_TIME_MSEC);
  662. rtas_pci_slot_reset (pdn, 0);
  663. /* After a PCI slot has been reset, the PCI Express spec requires
  664. * a 1.5 second idle time for the bus to stabilize, before starting
  665. * up traffic. */
  666. #define PCI_BUS_SETTLE_TIME_MSEC 1800
  667. msleep (PCI_BUS_SETTLE_TIME_MSEC);
  668. /* Now double check with the firmware to make sure the device is
  669. * ready to be used; if not, wait for recovery. */
  670. for (i=0; i<10; i++) {
  671. rc = eeh_slot_availability (pdn);
  672. if (rc <= 0) break;
  673. msleep (rc+100);
  674. }
  675. }
  676. /* ------------------------------------------------------- */
  677. /** Save and restore of PCI BARs
  678. *
  679. * Although firmware will set up BARs during boot, it doesn't
  680. * set up device BAR's after a device reset, although it will,
  681. * if requested, set up bridge configuration. Thus, we need to
  682. * configure the PCI devices ourselves.
  683. */
  684. /**
  685. * __restore_bars - Restore the Base Address Registers
  686. * Loads the PCI configuration space base address registers,
  687. * the expansion ROM base address, the latency timer, and etc.
  688. * from the saved values in the device node.
  689. */
  690. static inline void __restore_bars (struct pci_dn *pdn)
  691. {
  692. int i;
  693. if (NULL==pdn->phb) return;
  694. for (i=4; i<10; i++) {
  695. rtas_write_config(pdn, i*4, 4, pdn->config_space[i]);
  696. }
  697. /* 12 == Expansion ROM Address */
  698. rtas_write_config(pdn, 12*4, 4, pdn->config_space[12]);
  699. #define BYTE_SWAP(OFF) (8*((OFF)/4)+3-(OFF))
  700. #define SAVED_BYTE(OFF) (((u8 *)(pdn->config_space))[BYTE_SWAP(OFF)])
  701. rtas_write_config (pdn, PCI_CACHE_LINE_SIZE, 1,
  702. SAVED_BYTE(PCI_CACHE_LINE_SIZE));
  703. rtas_write_config (pdn, PCI_LATENCY_TIMER, 1,
  704. SAVED_BYTE(PCI_LATENCY_TIMER));
  705. /* max latency, min grant, interrupt pin and line */
  706. rtas_write_config(pdn, 15*4, 4, pdn->config_space[15]);
  707. }
  708. /**
  709. * eeh_restore_bars - restore the PCI config space info
  710. *
  711. * This routine performs a recursive walk to the children
  712. * of this device as well.
  713. */
  714. void eeh_restore_bars(struct pci_dn *pdn)
  715. {
  716. struct device_node *dn;
  717. if (!pdn)
  718. return;
  719. if (! pdn->eeh_is_bridge)
  720. __restore_bars (pdn);
  721. dn = pdn->node->child;
  722. while (dn) {
  723. eeh_restore_bars (PCI_DN(dn));
  724. dn = dn->sibling;
  725. }
  726. }
  727. /**
  728. * eeh_save_bars - save device bars
  729. *
  730. * Save the values of the device bars. Unlike the restore
  731. * routine, this routine is *not* recursive. This is because
  732. * PCI devices are added individuallly; but, for the restore,
  733. * an entire slot is reset at a time.
  734. */
  735. static void eeh_save_bars(struct pci_dev * pdev, struct pci_dn *pdn)
  736. {
  737. int i;
  738. if (!pdev || !pdn )
  739. return;
  740. for (i = 0; i < 16; i++)
  741. pci_read_config_dword(pdev, i * 4, &pdn->config_space[i]);
  742. if (pdev->hdr_type == PCI_HEADER_TYPE_BRIDGE)
  743. pdn->eeh_is_bridge = 1;
  744. }
  745. void
  746. rtas_configure_bridge(struct pci_dn *pdn)
  747. {
  748. int token = rtas_token ("ibm,configure-bridge");
  749. int rc;
  750. if (token == RTAS_UNKNOWN_SERVICE)
  751. return;
  752. rc = rtas_call(token,3,1, NULL,
  753. pdn->eeh_config_addr,
  754. BUID_HI(pdn->phb->buid),
  755. BUID_LO(pdn->phb->buid));
  756. if (rc) {
  757. printk (KERN_WARNING "EEH: Unable to configure device bridge (%d) for %s\n",
  758. rc, pdn->node->full_name);
  759. }
  760. }
  761. /* ------------------------------------------------------------- */
  762. /* The code below deals with enabling EEH for devices during the
  763. * early boot sequence. EEH must be enabled before any PCI probing
  764. * can be done.
  765. */
  766. #define EEH_ENABLE 1
  767. struct eeh_early_enable_info {
  768. unsigned int buid_hi;
  769. unsigned int buid_lo;
  770. };
  771. /* Enable eeh for the given device node. */
  772. static void *early_enable_eeh(struct device_node *dn, void *data)
  773. {
  774. struct eeh_early_enable_info *info = data;
  775. int ret;
  776. char *status = get_property(dn, "status", NULL);
  777. u32 *class_code = (u32 *)get_property(dn, "class-code", NULL);
  778. u32 *vendor_id = (u32 *)get_property(dn, "vendor-id", NULL);
  779. u32 *device_id = (u32 *)get_property(dn, "device-id", NULL);
  780. u32 *regs;
  781. int enable;
  782. struct pci_dn *pdn = PCI_DN(dn);
  783. pdn->eeh_mode = 0;
  784. pdn->eeh_check_count = 0;
  785. pdn->eeh_freeze_count = 0;
  786. if (status && strcmp(status, "ok") != 0)
  787. return NULL; /* ignore devices with bad status */
  788. /* Ignore bad nodes. */
  789. if (!class_code || !vendor_id || !device_id)
  790. return NULL;
  791. /* There is nothing to check on PCI to ISA bridges */
  792. if (dn->type && !strcmp(dn->type, "isa")) {
  793. pdn->eeh_mode |= EEH_MODE_NOCHECK;
  794. return NULL;
  795. }
  796. /*
  797. * Now decide if we are going to "Disable" EEH checking
  798. * for this device. We still run with the EEH hardware active,
  799. * but we won't be checking for ff's. This means a driver
  800. * could return bad data (very bad!), an interrupt handler could
  801. * hang waiting on status bits that won't change, etc.
  802. * But there are a few cases like display devices that make sense.
  803. */
  804. enable = 1; /* i.e. we will do checking */
  805. if ((*class_code >> 16) == PCI_BASE_CLASS_DISPLAY)
  806. enable = 0;
  807. if (!enable)
  808. pdn->eeh_mode |= EEH_MODE_NOCHECK;
  809. /* Ok... see if this device supports EEH. Some do, some don't,
  810. * and the only way to find out is to check each and every one. */
  811. regs = (u32 *)get_property(dn, "reg", NULL);
  812. if (regs) {
  813. /* First register entry is addr (00BBSS00) */
  814. /* Try to enable eeh */
  815. ret = rtas_call(ibm_set_eeh_option, 4, 1, NULL,
  816. regs[0], info->buid_hi, info->buid_lo,
  817. EEH_ENABLE);
  818. if (ret == 0) {
  819. eeh_subsystem_enabled = 1;
  820. pdn->eeh_mode |= EEH_MODE_SUPPORTED;
  821. pdn->eeh_config_addr = regs[0];
  822. #ifdef DEBUG
  823. printk(KERN_DEBUG "EEH: %s: eeh enabled\n", dn->full_name);
  824. #endif
  825. } else {
  826. /* This device doesn't support EEH, but it may have an
  827. * EEH parent, in which case we mark it as supported. */
  828. if (dn->parent && PCI_DN(dn->parent)
  829. && (PCI_DN(dn->parent)->eeh_mode & EEH_MODE_SUPPORTED)) {
  830. /* Parent supports EEH. */
  831. pdn->eeh_mode |= EEH_MODE_SUPPORTED;
  832. pdn->eeh_config_addr = PCI_DN(dn->parent)->eeh_config_addr;
  833. return NULL;
  834. }
  835. }
  836. } else {
  837. printk(KERN_WARNING "EEH: %s: unable to get reg property.\n",
  838. dn->full_name);
  839. }
  840. return NULL;
  841. }
  842. /*
  843. * Initialize EEH by trying to enable it for all of the adapters in the system.
  844. * As a side effect we can determine here if eeh is supported at all.
  845. * Note that we leave EEH on so failed config cycles won't cause a machine
  846. * check. If a user turns off EEH for a particular adapter they are really
  847. * telling Linux to ignore errors. Some hardware (e.g. POWER5) won't
  848. * grant access to a slot if EEH isn't enabled, and so we always enable
  849. * EEH for all slots/all devices.
  850. *
  851. * The eeh-force-off option disables EEH checking globally, for all slots.
  852. * Even if force-off is set, the EEH hardware is still enabled, so that
  853. * newer systems can boot.
  854. */
  855. void __init eeh_init(void)
  856. {
  857. struct device_node *phb, *np;
  858. struct eeh_early_enable_info info;
  859. spin_lock_init(&confirm_error_lock);
  860. spin_lock_init(&slot_errbuf_lock);
  861. np = of_find_node_by_path("/rtas");
  862. if (np == NULL)
  863. return;
  864. ibm_set_eeh_option = rtas_token("ibm,set-eeh-option");
  865. ibm_set_slot_reset = rtas_token("ibm,set-slot-reset");
  866. ibm_read_slot_reset_state2 = rtas_token("ibm,read-slot-reset-state2");
  867. ibm_read_slot_reset_state = rtas_token("ibm,read-slot-reset-state");
  868. ibm_slot_error_detail = rtas_token("ibm,slot-error-detail");
  869. if (ibm_set_eeh_option == RTAS_UNKNOWN_SERVICE)
  870. return;
  871. eeh_error_buf_size = rtas_token("rtas-error-log-max");
  872. if (eeh_error_buf_size == RTAS_UNKNOWN_SERVICE) {
  873. eeh_error_buf_size = 1024;
  874. }
  875. if (eeh_error_buf_size > RTAS_ERROR_LOG_MAX) {
  876. printk(KERN_WARNING "EEH: rtas-error-log-max is bigger than allocated "
  877. "buffer ! (%d vs %d)", eeh_error_buf_size, RTAS_ERROR_LOG_MAX);
  878. eeh_error_buf_size = RTAS_ERROR_LOG_MAX;
  879. }
  880. /* Enable EEH for all adapters. Note that eeh requires buid's */
  881. for (phb = of_find_node_by_name(NULL, "pci"); phb;
  882. phb = of_find_node_by_name(phb, "pci")) {
  883. unsigned long buid;
  884. buid = get_phb_buid(phb);
  885. if (buid == 0 || PCI_DN(phb) == NULL)
  886. continue;
  887. info.buid_lo = BUID_LO(buid);
  888. info.buid_hi = BUID_HI(buid);
  889. traverse_pci_devices(phb, early_enable_eeh, &info);
  890. }
  891. if (eeh_subsystem_enabled)
  892. printk(KERN_INFO "EEH: PCI Enhanced I/O Error Handling Enabled\n");
  893. else
  894. printk(KERN_WARNING "EEH: No capable adapters found\n");
  895. }
  896. /**
  897. * eeh_add_device_early - enable EEH for the indicated device_node
  898. * @dn: device node for which to set up EEH
  899. *
  900. * This routine must be used to perform EEH initialization for PCI
  901. * devices that were added after system boot (e.g. hotplug, dlpar).
  902. * This routine must be called before any i/o is performed to the
  903. * adapter (inluding any config-space i/o).
  904. * Whether this actually enables EEH or not for this device depends
  905. * on the CEC architecture, type of the device, on earlier boot
  906. * command-line arguments & etc.
  907. */
  908. void eeh_add_device_early(struct device_node *dn)
  909. {
  910. struct pci_controller *phb;
  911. struct eeh_early_enable_info info;
  912. if (!dn || !PCI_DN(dn))
  913. return;
  914. phb = PCI_DN(dn)->phb;
  915. if (NULL == phb || 0 == phb->buid) {
  916. printk(KERN_WARNING "EEH: Expected buid but found none for %s\n",
  917. dn->full_name);
  918. dump_stack();
  919. return;
  920. }
  921. info.buid_hi = BUID_HI(phb->buid);
  922. info.buid_lo = BUID_LO(phb->buid);
  923. early_enable_eeh(dn, &info);
  924. }
  925. EXPORT_SYMBOL_GPL(eeh_add_device_early);
  926. /**
  927. * eeh_add_device_late - perform EEH initialization for the indicated pci device
  928. * @dev: pci device for which to set up EEH
  929. *
  930. * This routine must be used to complete EEH initialization for PCI
  931. * devices that were added after system boot (e.g. hotplug, dlpar).
  932. */
  933. void eeh_add_device_late(struct pci_dev *dev)
  934. {
  935. struct device_node *dn;
  936. struct pci_dn *pdn;
  937. if (!dev || !eeh_subsystem_enabled)
  938. return;
  939. #ifdef DEBUG
  940. printk(KERN_DEBUG "EEH: adding device %s\n", pci_name(dev));
  941. #endif
  942. pci_dev_get (dev);
  943. dn = pci_device_to_OF_node(dev);
  944. pdn = PCI_DN(dn);
  945. pdn->pcidev = dev;
  946. pci_addr_cache_insert_device (dev);
  947. eeh_save_bars(dev, pdn);
  948. }
  949. EXPORT_SYMBOL_GPL(eeh_add_device_late);
  950. /**
  951. * eeh_remove_device - undo EEH setup for the indicated pci device
  952. * @dev: pci device to be removed
  953. *
  954. * This routine should be when a device is removed from a running
  955. * system (e.g. by hotplug or dlpar).
  956. */
  957. void eeh_remove_device(struct pci_dev *dev)
  958. {
  959. struct device_node *dn;
  960. if (!dev || !eeh_subsystem_enabled)
  961. return;
  962. /* Unregister the device with the EEH/PCI address search system */
  963. #ifdef DEBUG
  964. printk(KERN_DEBUG "EEH: remove device %s\n", pci_name(dev));
  965. #endif
  966. pci_addr_cache_remove_device(dev);
  967. dn = pci_device_to_OF_node(dev);
  968. PCI_DN(dn)->pcidev = NULL;
  969. pci_dev_put (dev);
  970. }
  971. EXPORT_SYMBOL_GPL(eeh_remove_device);
  972. static int proc_eeh_show(struct seq_file *m, void *v)
  973. {
  974. unsigned int cpu;
  975. unsigned long ffs = 0, positives = 0, failures = 0;
  976. unsigned long resets = 0;
  977. unsigned long no_dev = 0, no_dn = 0, no_cfg = 0, no_check = 0;
  978. for_each_cpu(cpu) {
  979. ffs += per_cpu(total_mmio_ffs, cpu);
  980. positives += per_cpu(false_positives, cpu);
  981. failures += per_cpu(ignored_failures, cpu);
  982. resets += per_cpu(slot_resets, cpu);
  983. no_dev += per_cpu(no_device, cpu);
  984. no_dn += per_cpu(no_dn, cpu);
  985. no_cfg += per_cpu(no_cfg_addr, cpu);
  986. no_check += per_cpu(ignored_check, cpu);
  987. }
  988. if (0 == eeh_subsystem_enabled) {
  989. seq_printf(m, "EEH Subsystem is globally disabled\n");
  990. seq_printf(m, "eeh_total_mmio_ffs=%ld\n", ffs);
  991. } else {
  992. seq_printf(m, "EEH Subsystem is enabled\n");
  993. seq_printf(m,
  994. "no device=%ld\n"
  995. "no device node=%ld\n"
  996. "no config address=%ld\n"
  997. "check not wanted=%ld\n"
  998. "eeh_total_mmio_ffs=%ld\n"
  999. "eeh_false_positives=%ld\n"
  1000. "eeh_ignored_failures=%ld\n"
  1001. "eeh_slot_resets=%ld\n",
  1002. no_dev, no_dn, no_cfg, no_check,
  1003. ffs, positives, failures, resets);
  1004. }
  1005. return 0;
  1006. }
  1007. static int proc_eeh_open(struct inode *inode, struct file *file)
  1008. {
  1009. return single_open(file, proc_eeh_show, NULL);
  1010. }
  1011. static struct file_operations proc_eeh_operations = {
  1012. .open = proc_eeh_open,
  1013. .read = seq_read,
  1014. .llseek = seq_lseek,
  1015. .release = single_release,
  1016. };
  1017. static int __init eeh_init_proc(void)
  1018. {
  1019. struct proc_dir_entry *e;
  1020. if (platform_is_pseries()) {
  1021. e = create_proc_entry("ppc64/eeh", 0, NULL);
  1022. if (e)
  1023. e->proc_fops = &proc_eeh_operations;
  1024. }
  1025. return 0;
  1026. }
  1027. __initcall(eeh_init_proc);