eeh.c 42 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478
  1. /*
  2. * Copyright IBM Corporation 2001, 2005, 2006
  3. * Copyright Dave Engebretsen & Todd Inglett 2001
  4. * Copyright Linas Vepstas 2005, 2006
  5. * Copyright 2001-2012 IBM Corporation.
  6. *
  7. * This program is free software; you can redistribute it and/or modify
  8. * it under the terms of the GNU General Public License as published by
  9. * the Free Software Foundation; either version 2 of the License, or
  10. * (at your option) any later version.
  11. *
  12. * This program is distributed in the hope that it will be useful,
  13. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  14. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  15. * GNU General Public License for more details.
  16. *
  17. * You should have received a copy of the GNU General Public License
  18. * along with this program; if not, write to the Free Software
  19. * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
  20. *
  21. * Please address comments and feedback to Linas Vepstas <linas@austin.ibm.com>
  22. */
  23. #include <linux/delay.h>
  24. #include <linux/sched.h>
  25. #include <linux/init.h>
  26. #include <linux/list.h>
  27. #include <linux/pci.h>
  28. #include <linux/proc_fs.h>
  29. #include <linux/rbtree.h>
  30. #include <linux/seq_file.h>
  31. #include <linux/spinlock.h>
  32. #include <linux/export.h>
  33. #include <linux/of.h>
  34. #include <linux/atomic.h>
  35. #include <asm/eeh.h>
  36. #include <asm/eeh_event.h>
  37. #include <asm/io.h>
  38. #include <asm/machdep.h>
  39. #include <asm/ppc-pci.h>
  40. #include <asm/rtas.h>
  41. /** Overview:
  42. * EEH, or "Extended Error Handling" is a PCI bridge technology for
  43. * dealing with PCI bus errors that can't be dealt with within the
  44. * usual PCI framework, except by check-stopping the CPU. Systems
  45. * that are designed for high-availability/reliability cannot afford
  46. * to crash due to a "mere" PCI error, thus the need for EEH.
  47. * An EEH-capable bridge operates by converting a detected error
  48. * into a "slot freeze", taking the PCI adapter off-line, making
  49. * the slot behave, from the OS'es point of view, as if the slot
  50. * were "empty": all reads return 0xff's and all writes are silently
  51. * ignored. EEH slot isolation events can be triggered by parity
  52. * errors on the address or data busses (e.g. during posted writes),
  53. * which in turn might be caused by low voltage on the bus, dust,
  54. * vibration, humidity, radioactivity or plain-old failed hardware.
  55. *
  56. * Note, however, that one of the leading causes of EEH slot
  57. * freeze events are buggy device drivers, buggy device microcode,
  58. * or buggy device hardware. This is because any attempt by the
  59. * device to bus-master data to a memory address that is not
  60. * assigned to the device will trigger a slot freeze. (The idea
  61. * is to prevent devices-gone-wild from corrupting system memory).
  62. * Buggy hardware/drivers will have a miserable time co-existing
  63. * with EEH.
  64. *
  65. * Ideally, a PCI device driver, when suspecting that an isolation
  66. * event has occurred (e.g. by reading 0xff's), will then ask EEH
  67. * whether this is the case, and then take appropriate steps to
  68. * reset the PCI slot, the PCI device, and then resume operations.
  69. * However, until that day, the checking is done here, with the
  70. * eeh_check_failure() routine embedded in the MMIO macros. If
  71. * the slot is found to be isolated, an "EEH Event" is synthesized
  72. * and sent out for processing.
  73. */
  74. /* If a device driver keeps reading an MMIO register in an interrupt
  75. * handler after a slot isolation event, it might be broken.
  76. * This sets the threshold for how many read attempts we allow
  77. * before printing an error message.
  78. */
  79. #define EEH_MAX_FAILS 2100000
  80. /* Time to wait for a PCI slot to report status, in milliseconds */
  81. #define PCI_BUS_RESET_WAIT_MSEC (60*1000)
  82. /* RTAS tokens */
  83. static int ibm_set_eeh_option;
  84. static int ibm_set_slot_reset;
  85. static int ibm_read_slot_reset_state;
  86. static int ibm_read_slot_reset_state2;
  87. static int ibm_slot_error_detail;
  88. static int ibm_get_config_addr_info;
  89. static int ibm_get_config_addr_info2;
  90. static int ibm_configure_bridge;
  91. static int ibm_configure_pe;
  92. int eeh_subsystem_enabled;
  93. EXPORT_SYMBOL(eeh_subsystem_enabled);
  94. /* Lock to avoid races due to multiple reports of an error */
  95. static DEFINE_RAW_SPINLOCK(confirm_error_lock);
  96. /* Buffer for reporting slot-error-detail rtas calls. Its here
  97. * in BSS, and not dynamically alloced, so that it ends up in
  98. * RMO where RTAS can access it.
  99. */
  100. static unsigned char slot_errbuf[RTAS_ERROR_LOG_MAX];
  101. static DEFINE_SPINLOCK(slot_errbuf_lock);
  102. static int eeh_error_buf_size;
  103. /* Buffer for reporting pci register dumps. Its here in BSS, and
  104. * not dynamically alloced, so that it ends up in RMO where RTAS
  105. * can access it.
  106. */
  107. #define EEH_PCI_REGS_LOG_LEN 4096
  108. static unsigned char pci_regs_buf[EEH_PCI_REGS_LOG_LEN];
  109. /* System monitoring statistics */
  110. static unsigned long no_device;
  111. static unsigned long no_dn;
  112. static unsigned long no_cfg_addr;
  113. static unsigned long ignored_check;
  114. static unsigned long total_mmio_ffs;
  115. static unsigned long false_positives;
  116. static unsigned long slot_resets;
  117. #define IS_BRIDGE(class_code) (((class_code)<<16) == PCI_BASE_CLASS_BRIDGE)
  118. /**
  119. * eeh_rtas_slot_error_detail - Retrieve error log through RTAS call
  120. * @pdn: device node
  121. * @severity: temporary or permanent error log
  122. * @driver_log: driver log to be combined with the retrieved error log
  123. * @loglen: length of driver log
  124. *
  125. * This routine should be called to retrieve error log through the dedicated
  126. * RTAS call.
  127. */
  128. static void eeh_rtas_slot_error_detail(struct pci_dn *pdn, int severity,
  129. char *driver_log, size_t loglen)
  130. {
  131. int config_addr;
  132. unsigned long flags;
  133. int rc;
  134. /* Log the error with the rtas logger */
  135. spin_lock_irqsave(&slot_errbuf_lock, flags);
  136. memset(slot_errbuf, 0, eeh_error_buf_size);
  137. /* Use PE configuration address, if present */
  138. config_addr = pdn->eeh_config_addr;
  139. if (pdn->eeh_pe_config_addr)
  140. config_addr = pdn->eeh_pe_config_addr;
  141. rc = rtas_call(ibm_slot_error_detail,
  142. 8, 1, NULL, config_addr,
  143. BUID_HI(pdn->phb->buid),
  144. BUID_LO(pdn->phb->buid),
  145. virt_to_phys(driver_log), loglen,
  146. virt_to_phys(slot_errbuf),
  147. eeh_error_buf_size,
  148. severity);
  149. if (rc == 0)
  150. log_error(slot_errbuf, ERR_TYPE_RTAS_LOG, 0);
  151. spin_unlock_irqrestore(&slot_errbuf_lock, flags);
  152. }
  153. /**
  154. * eeh_gather_pci_data - Copy assorted PCI config space registers to buff
  155. * @pdn: device to report data for
  156. * @buf: point to buffer in which to log
  157. * @len: amount of room in buffer
  158. *
  159. * This routine captures assorted PCI configuration space data,
  160. * and puts them into a buffer for RTAS error logging.
  161. */
  162. static size_t eeh_gather_pci_data(struct pci_dn *pdn, char * buf, size_t len)
  163. {
  164. struct pci_dev *dev = pdn->pcidev;
  165. u32 cfg;
  166. int cap, i;
  167. int n = 0;
  168. n += scnprintf(buf+n, len-n, "%s\n", pdn->node->full_name);
  169. printk(KERN_WARNING "EEH: of node=%s\n", pdn->node->full_name);
  170. rtas_read_config(pdn, PCI_VENDOR_ID, 4, &cfg);
  171. n += scnprintf(buf+n, len-n, "dev/vend:%08x\n", cfg);
  172. printk(KERN_WARNING "EEH: PCI device/vendor: %08x\n", cfg);
  173. rtas_read_config(pdn, PCI_COMMAND, 4, &cfg);
  174. n += scnprintf(buf+n, len-n, "cmd/stat:%x\n", cfg);
  175. printk(KERN_WARNING "EEH: PCI cmd/status register: %08x\n", cfg);
  176. if (!dev) {
  177. printk(KERN_WARNING "EEH: no PCI device for this of node\n");
  178. return n;
  179. }
  180. /* Gather bridge-specific registers */
  181. if (dev->class >> 16 == PCI_BASE_CLASS_BRIDGE) {
  182. rtas_read_config(pdn, PCI_SEC_STATUS, 2, &cfg);
  183. n += scnprintf(buf+n, len-n, "sec stat:%x\n", cfg);
  184. printk(KERN_WARNING "EEH: Bridge secondary status: %04x\n", cfg);
  185. rtas_read_config(pdn, PCI_BRIDGE_CONTROL, 2, &cfg);
  186. n += scnprintf(buf+n, len-n, "brdg ctl:%x\n", cfg);
  187. printk(KERN_WARNING "EEH: Bridge control: %04x\n", cfg);
  188. }
  189. /* Dump out the PCI-X command and status regs */
  190. cap = pci_find_capability(dev, PCI_CAP_ID_PCIX);
  191. if (cap) {
  192. rtas_read_config(pdn, cap, 4, &cfg);
  193. n += scnprintf(buf+n, len-n, "pcix-cmd:%x\n", cfg);
  194. printk(KERN_WARNING "EEH: PCI-X cmd: %08x\n", cfg);
  195. rtas_read_config(pdn, cap+4, 4, &cfg);
  196. n += scnprintf(buf+n, len-n, "pcix-stat:%x\n", cfg);
  197. printk(KERN_WARNING "EEH: PCI-X status: %08x\n", cfg);
  198. }
  199. /* If PCI-E capable, dump PCI-E cap 10, and the AER */
  200. cap = pci_find_capability(dev, PCI_CAP_ID_EXP);
  201. if (cap) {
  202. n += scnprintf(buf+n, len-n, "pci-e cap10:\n");
  203. printk(KERN_WARNING
  204. "EEH: PCI-E capabilities and status follow:\n");
  205. for (i=0; i<=8; i++) {
  206. rtas_read_config(pdn, cap+4*i, 4, &cfg);
  207. n += scnprintf(buf+n, len-n, "%02x:%x\n", 4*i, cfg);
  208. printk(KERN_WARNING "EEH: PCI-E %02x: %08x\n", i, cfg);
  209. }
  210. cap = pci_find_ext_capability(dev, PCI_EXT_CAP_ID_ERR);
  211. if (cap) {
  212. n += scnprintf(buf+n, len-n, "pci-e AER:\n");
  213. printk(KERN_WARNING
  214. "EEH: PCI-E AER capability register set follows:\n");
  215. for (i=0; i<14; i++) {
  216. rtas_read_config(pdn, cap+4*i, 4, &cfg);
  217. n += scnprintf(buf+n, len-n, "%02x:%x\n", 4*i, cfg);
  218. printk(KERN_WARNING "EEH: PCI-E AER %02x: %08x\n", i, cfg);
  219. }
  220. }
  221. }
  222. /* Gather status on devices under the bridge */
  223. if (dev->class >> 16 == PCI_BASE_CLASS_BRIDGE) {
  224. struct device_node *dn;
  225. for_each_child_of_node(pdn->node, dn) {
  226. pdn = PCI_DN(dn);
  227. if (pdn)
  228. n += eeh_gather_pci_data(pdn, buf+n, len-n);
  229. }
  230. }
  231. return n;
  232. }
  233. /**
  234. * eeh_slot_error_detail - Generate combined log including driver log and error log
  235. * @pdn: device node
  236. * @severity: temporary or permanent error log
  237. *
  238. * This routine should be called to generate the combined log, which
  239. * is comprised of driver log and error log. The driver log is figured
  240. * out from the config space of the corresponding PCI device, while
  241. * the error log is fetched through platform dependent function call.
  242. */
  243. void eeh_slot_error_detail(struct pci_dn *pdn, int severity)
  244. {
  245. size_t loglen = 0;
  246. pci_regs_buf[0] = 0;
  247. eeh_pci_enable(pdn, EEH_THAW_MMIO);
  248. eeh_configure_bridge(pdn);
  249. eeh_restore_bars(pdn);
  250. loglen = eeh_gather_pci_data(pdn, pci_regs_buf, EEH_PCI_REGS_LOG_LEN);
  251. eeh_rtas_slot_error_detail(pdn, severity, pci_regs_buf, loglen);
  252. }
  253. /**
  254. * eeh_read_slot_reset_state - Read the reset state of a device node's slot
  255. * @dn: device node to read
  256. * @rets: array to return results in
  257. *
  258. * Read the reset state of a device node's slot through platform dependent
  259. * function call.
  260. */
  261. static int eeh_read_slot_reset_state(struct pci_dn *pdn, int rets[])
  262. {
  263. int token, outputs;
  264. int config_addr;
  265. if (ibm_read_slot_reset_state2 != RTAS_UNKNOWN_SERVICE) {
  266. token = ibm_read_slot_reset_state2;
  267. outputs = 4;
  268. } else {
  269. token = ibm_read_slot_reset_state;
  270. rets[2] = 0; /* fake PE Unavailable info */
  271. outputs = 3;
  272. }
  273. /* Use PE configuration address, if present */
  274. config_addr = pdn->eeh_config_addr;
  275. if (pdn->eeh_pe_config_addr)
  276. config_addr = pdn->eeh_pe_config_addr;
  277. return rtas_call(token, 3, outputs, rets, config_addr,
  278. BUID_HI(pdn->phb->buid), BUID_LO(pdn->phb->buid));
  279. }
  280. /**
  281. * eeh_wait_for_slot_status - Returns error status of slot
  282. * @pdn: pci device node
  283. * @max_wait_msecs: maximum number to millisecs to wait
  284. *
  285. * Return negative value if a permanent error, else return
  286. * Partition Endpoint (PE) status value.
  287. *
  288. * If @max_wait_msecs is positive, then this routine will
  289. * sleep until a valid status can be obtained, or until
  290. * the max allowed wait time is exceeded, in which case
  291. * a -2 is returned.
  292. */
  293. int eeh_wait_for_slot_status(struct pci_dn *pdn, int max_wait_msecs)
  294. {
  295. int rc;
  296. int rets[3];
  297. int mwait;
  298. while (1) {
  299. rc = eeh_read_slot_reset_state(pdn, rets);
  300. if (rc) return rc;
  301. if (rets[1] == 0) return -1; /* EEH is not supported */
  302. if (rets[0] != 5) return rets[0]; /* return actual status */
  303. if (rets[2] == 0) return -1; /* permanently unavailable */
  304. if (max_wait_msecs <= 0) break;
  305. mwait = rets[2];
  306. if (mwait <= 0) {
  307. printk(KERN_WARNING "EEH: Firmware returned bad wait value=%d\n",
  308. mwait);
  309. mwait = 1000;
  310. } else if (mwait > 300*1000) {
  311. printk(KERN_WARNING "EEH: Firmware is taking too long, time=%d\n",
  312. mwait);
  313. mwait = 300*1000;
  314. }
  315. max_wait_msecs -= mwait;
  316. msleep(mwait);
  317. }
  318. printk(KERN_WARNING "EEH: Timed out waiting for slot status\n");
  319. return -2;
  320. }
  321. /**
  322. * eeh_token_to_phys - Convert EEH address token to phys address
  323. * @token: I/O token, should be address in the form 0xA....
  324. *
  325. * This routine should be called to convert virtual I/O address
  326. * to physical one.
  327. */
  328. static inline unsigned long eeh_token_to_phys(unsigned long token)
  329. {
  330. pte_t *ptep;
  331. unsigned long pa;
  332. ptep = find_linux_pte(init_mm.pgd, token);
  333. if (!ptep)
  334. return token;
  335. pa = pte_pfn(*ptep) << PAGE_SHIFT;
  336. return pa | (token & (PAGE_SIZE-1));
  337. }
  338. /**
  339. * eeh_find_device_pe - Retrieve the PE for the given device
  340. * @dn: device node
  341. *
  342. * Return the PE under which this device lies
  343. */
  344. struct device_node *eeh_find_device_pe(struct device_node *dn)
  345. {
  346. while ((dn->parent) && PCI_DN(dn->parent) &&
  347. (PCI_DN(dn->parent)->eeh_mode & EEH_MODE_SUPPORTED)) {
  348. dn = dn->parent;
  349. }
  350. return dn;
  351. }
  352. /**
  353. * __eeh_mark_slot - Mark all child devices as failed
  354. * @parent: parent device
  355. * @mode_flag: failure flag
  356. *
  357. * Mark all devices that are children of this device as failed.
  358. * Mark the device driver too, so that it can see the failure
  359. * immediately; this is critical, since some drivers poll
  360. * status registers in interrupts ... If a driver is polling,
  361. * and the slot is frozen, then the driver can deadlock in
  362. * an interrupt context, which is bad.
  363. */
  364. static void __eeh_mark_slot(struct device_node *parent, int mode_flag)
  365. {
  366. struct device_node *dn;
  367. for_each_child_of_node(parent, dn) {
  368. if (PCI_DN(dn)) {
  369. /* Mark the pci device driver too */
  370. struct pci_dev *dev = PCI_DN(dn)->pcidev;
  371. PCI_DN(dn)->eeh_mode |= mode_flag;
  372. if (dev && dev->driver)
  373. dev->error_state = pci_channel_io_frozen;
  374. __eeh_mark_slot(dn, mode_flag);
  375. }
  376. }
  377. }
  378. /**
  379. * eeh_mark_slot - Mark the indicated device and its children as failed
  380. * @dn: parent device
  381. * @mode_flag: failure flag
  382. *
  383. * Mark the indicated device and its child devices as failed.
  384. * The device drivers are marked as failed as well.
  385. */
  386. void eeh_mark_slot(struct device_node *dn, int mode_flag)
  387. {
  388. struct pci_dev *dev;
  389. dn = eeh_find_device_pe(dn);
  390. /* Back up one, since config addrs might be shared */
  391. if (!pcibios_find_pci_bus(dn) && PCI_DN(dn->parent))
  392. dn = dn->parent;
  393. PCI_DN(dn)->eeh_mode |= mode_flag;
  394. /* Mark the pci device too */
  395. dev = PCI_DN(dn)->pcidev;
  396. if (dev)
  397. dev->error_state = pci_channel_io_frozen;
  398. __eeh_mark_slot(dn, mode_flag);
  399. }
  400. /**
  401. * __eeh_clear_slot - Clear failure flag for the child devices
  402. * @parent: parent device
  403. * @mode_flag: flag to be cleared
  404. *
  405. * Clear failure flag for the child devices.
  406. */
  407. static void __eeh_clear_slot(struct device_node *parent, int mode_flag)
  408. {
  409. struct device_node *dn;
  410. for_each_child_of_node(parent, dn) {
  411. if (PCI_DN(dn)) {
  412. PCI_DN(dn)->eeh_mode &= ~mode_flag;
  413. PCI_DN(dn)->eeh_check_count = 0;
  414. __eeh_clear_slot(dn, mode_flag);
  415. }
  416. }
  417. }
  418. /**
  419. * eeh_clear_slot - Clear failure flag for the indicated device and its children
  420. * @dn: parent device
  421. * @mode_flag: flag to be cleared
  422. *
  423. * Clear failure flag for the indicated device and its children.
  424. */
  425. void eeh_clear_slot(struct device_node *dn, int mode_flag)
  426. {
  427. unsigned long flags;
  428. raw_spin_lock_irqsave(&confirm_error_lock, flags);
  429. dn = eeh_find_device_pe(dn);
  430. /* Back up one, since config addrs might be shared */
  431. if (!pcibios_find_pci_bus(dn) && PCI_DN(dn->parent))
  432. dn = dn->parent;
  433. PCI_DN(dn)->eeh_mode &= ~mode_flag;
  434. PCI_DN(dn)->eeh_check_count = 0;
  435. __eeh_clear_slot(dn, mode_flag);
  436. raw_spin_unlock_irqrestore(&confirm_error_lock, flags);
  437. }
  438. /**
  439. * eeh_dn_check_failure - Check if all 1's data is due to EEH slot freeze
  440. * @dn: device node
  441. * @dev: pci device, if known
  442. *
  443. * Check for an EEH failure for the given device node. Call this
  444. * routine if the result of a read was all 0xff's and you want to
  445. * find out if this is due to an EEH slot freeze. This routine
  446. * will query firmware for the EEH status.
  447. *
  448. * Returns 0 if there has not been an EEH error; otherwise returns
  449. * a non-zero value and queues up a slot isolation event notification.
  450. *
  451. * It is safe to call this routine in an interrupt context.
  452. */
  453. int eeh_dn_check_failure(struct device_node *dn, struct pci_dev *dev)
  454. {
  455. int ret;
  456. int rets[3];
  457. unsigned long flags;
  458. struct pci_dn *pdn;
  459. int rc = 0;
  460. const char *location;
  461. total_mmio_ffs++;
  462. if (!eeh_subsystem_enabled)
  463. return 0;
  464. if (!dn) {
  465. no_dn++;
  466. return 0;
  467. }
  468. dn = eeh_find_device_pe(dn);
  469. pdn = PCI_DN(dn);
  470. /* Access to IO BARs might get this far and still not want checking. */
  471. if (!(pdn->eeh_mode & EEH_MODE_SUPPORTED) ||
  472. pdn->eeh_mode & EEH_MODE_NOCHECK) {
  473. ignored_check++;
  474. pr_debug("EEH: Ignored check (%x) for %s %s\n",
  475. pdn->eeh_mode, eeh_pci_name(dev), dn->full_name);
  476. return 0;
  477. }
  478. if (!pdn->eeh_config_addr && !pdn->eeh_pe_config_addr) {
  479. no_cfg_addr++;
  480. return 0;
  481. }
  482. /* If we already have a pending isolation event for this
  483. * slot, we know it's bad already, we don't need to check.
  484. * Do this checking under a lock; as multiple PCI devices
  485. * in one slot might report errors simultaneously, and we
  486. * only want one error recovery routine running.
  487. */
  488. raw_spin_lock_irqsave(&confirm_error_lock, flags);
  489. rc = 1;
  490. if (pdn->eeh_mode & EEH_MODE_ISOLATED) {
  491. pdn->eeh_check_count ++;
  492. if (pdn->eeh_check_count % EEH_MAX_FAILS == 0) {
  493. location = of_get_property(dn, "ibm,loc-code", NULL);
  494. printk(KERN_ERR "EEH: %d reads ignored for recovering device at "
  495. "location=%s driver=%s pci addr=%s\n",
  496. pdn->eeh_check_count, location,
  497. eeh_driver_name(dev), eeh_pci_name(dev));
  498. printk(KERN_ERR "EEH: Might be infinite loop in %s driver\n",
  499. eeh_driver_name(dev));
  500. dump_stack();
  501. }
  502. goto dn_unlock;
  503. }
  504. /*
  505. * Now test for an EEH failure. This is VERY expensive.
  506. * Note that the eeh_config_addr may be a parent device
  507. * in the case of a device behind a bridge, or it may be
  508. * function zero of a multi-function device.
  509. * In any case they must share a common PHB.
  510. */
  511. ret = eeh_read_slot_reset_state(pdn, rets);
  512. /* If the call to firmware failed, punt */
  513. if (ret != 0) {
  514. printk(KERN_WARNING "EEH: eeh_read_slot_reset_state() failed; rc=%d dn=%s\n",
  515. ret, dn->full_name);
  516. false_positives++;
  517. pdn->eeh_false_positives ++;
  518. rc = 0;
  519. goto dn_unlock;
  520. }
  521. /* Note that config-io to empty slots may fail;
  522. * they are empty when they don't have children.
  523. */
  524. if ((rets[0] == 5) && (rets[2] == 0) && (dn->child == NULL)) {
  525. false_positives++;
  526. pdn->eeh_false_positives ++;
  527. rc = 0;
  528. goto dn_unlock;
  529. }
  530. /* If EEH is not supported on this device, punt. */
  531. if (rets[1] != 1) {
  532. printk(KERN_WARNING "EEH: event on unsupported device, rc=%d dn=%s\n",
  533. ret, dn->full_name);
  534. false_positives++;
  535. pdn->eeh_false_positives ++;
  536. rc = 0;
  537. goto dn_unlock;
  538. }
  539. /* If not the kind of error we know about, punt. */
  540. if (rets[0] != 1 && rets[0] != 2 && rets[0] != 4 && rets[0] != 5) {
  541. false_positives++;
  542. pdn->eeh_false_positives ++;
  543. rc = 0;
  544. goto dn_unlock;
  545. }
  546. slot_resets++;
  547. /* Avoid repeated reports of this failure, including problems
  548. * with other functions on this device, and functions under
  549. * bridges.
  550. */
  551. eeh_mark_slot(dn, EEH_MODE_ISOLATED);
  552. raw_spin_unlock_irqrestore(&confirm_error_lock, flags);
  553. eeh_send_failure_event(dn, dev);
  554. /* Most EEH events are due to device driver bugs. Having
  555. * a stack trace will help the device-driver authors figure
  556. * out what happened. So print that out.
  557. */
  558. dump_stack();
  559. return 1;
  560. dn_unlock:
  561. raw_spin_unlock_irqrestore(&confirm_error_lock, flags);
  562. return rc;
  563. }
  564. EXPORT_SYMBOL_GPL(eeh_dn_check_failure);
  565. /**
  566. * eeh_check_failure - Check if all 1's data is due to EEH slot freeze
  567. * @token: I/O token, should be address in the form 0xA....
  568. * @val: value, should be all 1's (XXX why do we need this arg??)
  569. *
  570. * Check for an EEH failure at the given token address. Call this
  571. * routine if the result of a read was all 0xff's and you want to
  572. * find out if this is due to an EEH slot freeze event. This routine
  573. * will query firmware for the EEH status.
  574. *
  575. * Note this routine is safe to call in an interrupt context.
  576. */
  577. unsigned long eeh_check_failure(const volatile void __iomem *token, unsigned long val)
  578. {
  579. unsigned long addr;
  580. struct pci_dev *dev;
  581. struct device_node *dn;
  582. /* Finding the phys addr + pci device; this is pretty quick. */
  583. addr = eeh_token_to_phys((unsigned long __force) token);
  584. dev = pci_get_device_by_addr(addr);
  585. if (!dev) {
  586. no_device++;
  587. return val;
  588. }
  589. dn = pci_device_to_OF_node(dev);
  590. eeh_dn_check_failure(dn, dev);
  591. pci_dev_put(dev);
  592. return val;
  593. }
  594. EXPORT_SYMBOL(eeh_check_failure);
  595. /**
  596. * eeh_pci_enable - Enable MMIO or DMA transfers for this slot
  597. * @pdn pci device node
  598. *
  599. * This routine should be called to reenable frozen MMIO or DMA
  600. * so that it would work correctly again. It's useful while doing
  601. * recovery or log collection on the indicated device.
  602. */
  603. int eeh_pci_enable(struct pci_dn *pdn, int function)
  604. {
  605. int config_addr;
  606. int rc;
  607. /* Use PE configuration address, if present */
  608. config_addr = pdn->eeh_config_addr;
  609. if (pdn->eeh_pe_config_addr)
  610. config_addr = pdn->eeh_pe_config_addr;
  611. rc = rtas_call(ibm_set_eeh_option, 4, 1, NULL,
  612. config_addr,
  613. BUID_HI(pdn->phb->buid),
  614. BUID_LO(pdn->phb->buid),
  615. function);
  616. if (rc)
  617. printk(KERN_WARNING "EEH: Unexpected state change %d, err=%d dn=%s\n",
  618. function, rc, pdn->node->full_name);
  619. rc = eeh_wait_for_slot_status(pdn, PCI_BUS_RESET_WAIT_MSEC);
  620. if ((rc == 4) && (function == EEH_THAW_MMIO))
  621. return 0;
  622. return rc;
  623. }
  624. /**
  625. * eeh_slot_reset - Raises/Lowers the pci #RST line
  626. * @pdn: pci device node
  627. * @state: 1/0 to raise/lower the #RST
  628. *
  629. * Clear the EEH-frozen condition on a slot. This routine
  630. * asserts the PCI #RST line if the 'state' argument is '1',
  631. * and drops the #RST line if 'state is '0'. This routine is
  632. * safe to call in an interrupt context.
  633. */
  634. static void eeh_slot_reset(struct pci_dn *pdn, int state)
  635. {
  636. int config_addr;
  637. int rc;
  638. BUG_ON(pdn==NULL);
  639. if (!pdn->phb) {
  640. printk(KERN_WARNING "EEH: in slot reset, device node %s has no phb\n",
  641. pdn->node->full_name);
  642. return;
  643. }
  644. /* Use PE configuration address, if present */
  645. config_addr = pdn->eeh_config_addr;
  646. if (pdn->eeh_pe_config_addr)
  647. config_addr = pdn->eeh_pe_config_addr;
  648. rc = rtas_call(ibm_set_slot_reset, 4, 1, NULL,
  649. config_addr,
  650. BUID_HI(pdn->phb->buid),
  651. BUID_LO(pdn->phb->buid),
  652. state);
  653. /* Fundamental-reset not supported on this PE, try hot-reset */
  654. if (rc == -8 && state == 3) {
  655. rc = rtas_call(ibm_set_slot_reset, 4, 1, NULL,
  656. config_addr,
  657. BUID_HI(pdn->phb->buid),
  658. BUID_LO(pdn->phb->buid), 1);
  659. if (rc)
  660. printk(KERN_WARNING
  661. "EEH: Unable to reset the failed slot,"
  662. " #RST=%d dn=%s\n",
  663. rc, pdn->node->full_name);
  664. }
  665. }
  666. /**
  667. * pcibios_set_pcie_slot_reset - Set PCI-E reset state
  668. * @dev: pci device struct
  669. * @state: reset state to enter
  670. *
  671. * Return value:
  672. * 0 if success
  673. */
  674. int pcibios_set_pcie_reset_state(struct pci_dev *dev, enum pcie_reset_state state)
  675. {
  676. struct device_node *dn = pci_device_to_OF_node(dev);
  677. struct pci_dn *pdn = PCI_DN(dn);
  678. switch (state) {
  679. case pcie_deassert_reset:
  680. eeh_slot_reset(pdn, 0);
  681. break;
  682. case pcie_hot_reset:
  683. eeh_slot_reset(pdn, 1);
  684. break;
  685. case pcie_warm_reset:
  686. eeh_slot_reset(pdn, 3);
  687. break;
  688. default:
  689. return -EINVAL;
  690. };
  691. return 0;
  692. }
  693. /**
  694. * __eeh_set_pe_freset - Check the required reset for child devices
  695. * @parent: parent device
  696. * @freset: return value
  697. *
  698. * Each device might have its preferred reset type: fundamental or
  699. * hot reset. The routine is used to collect the information from
  700. * the child devices so that they could be reset accordingly.
  701. */
  702. void __eeh_set_pe_freset(struct device_node *parent, unsigned int *freset)
  703. {
  704. struct device_node *dn;
  705. for_each_child_of_node(parent, dn) {
  706. if (PCI_DN(dn)) {
  707. struct pci_dev *dev = PCI_DN(dn)->pcidev;
  708. if (dev && dev->driver)
  709. *freset |= dev->needs_freset;
  710. __eeh_set_pe_freset(dn, freset);
  711. }
  712. }
  713. }
  714. /**
  715. * eeh_set_pe_freset - Check the required reset for the indicated device and its children
  716. * @dn: parent device
  717. * @freset: return value
  718. *
  719. * Each device might have its preferred reset type: fundamental or
  720. * hot reset. The routine is used to collected the information for
  721. * the indicated device and its children so that the bunch of the
  722. * devices could be reset properly.
  723. */
  724. void eeh_set_pe_freset(struct device_node *dn, unsigned int *freset)
  725. {
  726. struct pci_dev *dev;
  727. dn = eeh_find_device_pe(dn);
  728. /* Back up one, since config addrs might be shared */
  729. if (!pcibios_find_pci_bus(dn) && PCI_DN(dn->parent))
  730. dn = dn->parent;
  731. dev = PCI_DN(dn)->pcidev;
  732. if (dev)
  733. *freset |= dev->needs_freset;
  734. __eeh_set_pe_freset(dn, freset);
  735. }
  736. /**
  737. * eeh_reset_pe_once - Assert the pci #RST line for 1/4 second
  738. * @pdn: pci device node to be reset.
  739. *
  740. * Assert the PCI #RST line for 1/4 second.
  741. */
  742. static void eeh_reset_pe_once(struct pci_dn *pdn)
  743. {
  744. unsigned int freset = 0;
  745. /* Determine type of EEH reset required for
  746. * Partitionable Endpoint, a hot-reset (1)
  747. * or a fundamental reset (3).
  748. * A fundamental reset required by any device under
  749. * Partitionable Endpoint trumps hot-reset.
  750. */
  751. eeh_set_pe_freset(pdn->node, &freset);
  752. if (freset)
  753. eeh_slot_reset(pdn, 3);
  754. else
  755. eeh_slot_reset(pdn, 1);
  756. /* The PCI bus requires that the reset be held high for at least
  757. * a 100 milliseconds. We wait a bit longer 'just in case'.
  758. */
  759. #define PCI_BUS_RST_HOLD_TIME_MSEC 250
  760. msleep(PCI_BUS_RST_HOLD_TIME_MSEC);
  761. /* We might get hit with another EEH freeze as soon as the
  762. * pci slot reset line is dropped. Make sure we don't miss
  763. * these, and clear the flag now.
  764. */
  765. eeh_clear_slot(pdn->node, EEH_MODE_ISOLATED);
  766. eeh_slot_reset(pdn, 0);
  767. /* After a PCI slot has been reset, the PCI Express spec requires
  768. * a 1.5 second idle time for the bus to stabilize, before starting
  769. * up traffic.
  770. */
  771. #define PCI_BUS_SETTLE_TIME_MSEC 1800
  772. msleep(PCI_BUS_SETTLE_TIME_MSEC);
  773. }
  774. /**
  775. * eeh_reset_pe - Reset the indicated PE
  776. * @pdn: PCI device node
  777. *
  778. * This routine should be called to reset indicated device, including
  779. * PE. A PE might include multiple PCI devices and sometimes PCI bridges
  780. * might be involved as well.
  781. */
  782. int eeh_reset_pe(struct pci_dn *pdn)
  783. {
  784. int i, rc;
  785. /* Take three shots at resetting the bus */
  786. for (i=0; i<3; i++) {
  787. eeh_reset_pe_once(pdn);
  788. rc = eeh_wait_for_slot_status(pdn, PCI_BUS_RESET_WAIT_MSEC);
  789. if (rc == 0)
  790. return 0;
  791. if (rc < 0) {
  792. printk(KERN_ERR "EEH: unrecoverable slot failure %s\n",
  793. pdn->node->full_name);
  794. return -1;
  795. }
  796. printk(KERN_ERR "EEH: bus reset %d failed on slot %s, rc=%d\n",
  797. i+1, pdn->node->full_name, rc);
  798. }
  799. return -1;
  800. }
  801. /** Save and restore of PCI BARs
  802. *
  803. * Although firmware will set up BARs during boot, it doesn't
  804. * set up device BAR's after a device reset, although it will,
  805. * if requested, set up bridge configuration. Thus, we need to
  806. * configure the PCI devices ourselves.
  807. */
  808. /**
  809. * eeh_restore_one_device_bars - Restore the Base Address Registers for one device
  810. * @pdn: pci device node
  811. *
  812. * Loads the PCI configuration space base address registers,
  813. * the expansion ROM base address, the latency timer, and etc.
  814. * from the saved values in the device node.
  815. */
  816. static inline void eeh_restore_one_device_bars(struct pci_dn *pdn)
  817. {
  818. int i;
  819. u32 cmd;
  820. if (NULL==pdn->phb) return;
  821. for (i=4; i<10; i++) {
  822. rtas_write_config(pdn, i*4, 4, pdn->config_space[i]);
  823. }
  824. /* 12 == Expansion ROM Address */
  825. rtas_write_config(pdn, 12*4, 4, pdn->config_space[12]);
  826. #define BYTE_SWAP(OFF) (8*((OFF)/4)+3-(OFF))
  827. #define SAVED_BYTE(OFF) (((u8 *)(pdn->config_space))[BYTE_SWAP(OFF)])
  828. rtas_write_config(pdn, PCI_CACHE_LINE_SIZE, 1,
  829. SAVED_BYTE(PCI_CACHE_LINE_SIZE));
  830. rtas_write_config(pdn, PCI_LATENCY_TIMER, 1,
  831. SAVED_BYTE(PCI_LATENCY_TIMER));
  832. /* max latency, min grant, interrupt pin and line */
  833. rtas_write_config(pdn, 15*4, 4, pdn->config_space[15]);
  834. /* Restore PERR & SERR bits, some devices require it,
  835. * don't touch the other command bits
  836. */
  837. rtas_read_config(pdn, PCI_COMMAND, 4, &cmd);
  838. if (pdn->config_space[1] & PCI_COMMAND_PARITY)
  839. cmd |= PCI_COMMAND_PARITY;
  840. else
  841. cmd &= ~PCI_COMMAND_PARITY;
  842. if (pdn->config_space[1] & PCI_COMMAND_SERR)
  843. cmd |= PCI_COMMAND_SERR;
  844. else
  845. cmd &= ~PCI_COMMAND_SERR;
  846. rtas_write_config(pdn, PCI_COMMAND, 4, cmd);
  847. }
  848. /**
  849. * eeh_restore_bars - Restore the PCI config space info
  850. * @pdn: PCI device node
  851. *
  852. * This routine performs a recursive walk to the children
  853. * of this device as well.
  854. */
  855. void eeh_restore_bars(struct pci_dn *pdn)
  856. {
  857. struct device_node *dn;
  858. if (!pdn)
  859. return;
  860. if ((pdn->eeh_mode & EEH_MODE_SUPPORTED) && !IS_BRIDGE(pdn->class_code))
  861. eeh_restore_one_device_bars(pdn);
  862. for_each_child_of_node(pdn->node, dn)
  863. eeh_restore_bars(PCI_DN(dn));
  864. }
  865. /**
  866. * eeh_save_bars - Save device bars
  867. * @pdn: PCI device node
  868. *
  869. * Save the values of the device bars. Unlike the restore
  870. * routine, this routine is *not* recursive. This is because
  871. * PCI devices are added individually; but, for the restore,
  872. * an entire slot is reset at a time.
  873. */
  874. static void eeh_save_bars(struct pci_dn *pdn)
  875. {
  876. int i;
  877. if (!pdn )
  878. return;
  879. for (i = 0; i < 16; i++)
  880. rtas_read_config(pdn, i * 4, 4, &pdn->config_space[i]);
  881. }
  882. /**
  883. * eeh_configure_bridge - Configure PCI bridges for the indicated PE
  884. * @pdn: PCI device node
  885. *
  886. * PCI bridges might be included in PE. In order to make the PE work
  887. * again. The included PCI bridges should be recovered after the PE
  888. * encounters frozen state.
  889. */
  890. void eeh_configure_bridge(struct pci_dn *pdn)
  891. {
  892. int config_addr;
  893. int rc;
  894. int token;
  895. /* Use PE configuration address, if present */
  896. config_addr = pdn->eeh_config_addr;
  897. if (pdn->eeh_pe_config_addr)
  898. config_addr = pdn->eeh_pe_config_addr;
  899. /* Use new configure-pe function, if supported */
  900. if (ibm_configure_pe != RTAS_UNKNOWN_SERVICE)
  901. token = ibm_configure_pe;
  902. else
  903. token = ibm_configure_bridge;
  904. rc = rtas_call(token, 3, 1, NULL,
  905. config_addr,
  906. BUID_HI(pdn->phb->buid),
  907. BUID_LO(pdn->phb->buid));
  908. if (rc) {
  909. printk(KERN_WARNING "EEH: Unable to configure device bridge (%d) for %s\n",
  910. rc, pdn->node->full_name);
  911. }
  912. }
  913. #define EEH_ENABLE 1
  914. struct eeh_early_enable_info {
  915. unsigned int buid_hi;
  916. unsigned int buid_lo;
  917. };
  918. /**
  919. * eeh_get_pe_addr - Retrieve PE address with given BDF address
  920. * @config_addr: BDF address
  921. * @info: BUID of the associated PHB
  922. *
  923. * There're 2 kinds of addresses existing in EEH core components:
  924. * BDF address and PE address. Besides, there has dedicated platform
  925. * dependent function call to retrieve the PE address according to
  926. * the given BDF address. Further more, we prefer PE address on BDF
  927. * address in EEH core components.
  928. */
  929. static int eeh_get_pe_addr(int config_addr,
  930. struct eeh_early_enable_info *info)
  931. {
  932. unsigned int rets[3];
  933. int ret;
  934. /* Use latest config-addr token on power6 */
  935. if (ibm_get_config_addr_info2 != RTAS_UNKNOWN_SERVICE) {
  936. /* Make sure we have a PE in hand */
  937. ret = rtas_call(ibm_get_config_addr_info2, 4, 2, rets,
  938. config_addr, info->buid_hi, info->buid_lo, 1);
  939. if (ret || (rets[0]==0))
  940. return 0;
  941. ret = rtas_call(ibm_get_config_addr_info2, 4, 2, rets,
  942. config_addr, info->buid_hi, info->buid_lo, 0);
  943. if (ret)
  944. return 0;
  945. return rets[0];
  946. }
  947. /* Use older config-addr token on power5 */
  948. if (ibm_get_config_addr_info != RTAS_UNKNOWN_SERVICE) {
  949. ret = rtas_call(ibm_get_config_addr_info, 4, 2, rets,
  950. config_addr, info->buid_hi, info->buid_lo, 0);
  951. if (ret)
  952. return 0;
  953. return rets[0];
  954. }
  955. return 0;
  956. }
  957. /**
  958. * eeh_early_enable - Early enable EEH on the indicated device
  959. * @dn: device node
  960. * @data: BUID
  961. *
  962. * Enable EEH functionality on the specified PCI device. The function
  963. * is expected to be called before real PCI probing is done. However,
  964. * the PHBs have been initialized at this point.
  965. */
  966. static void *eeh_early_enable(struct device_node *dn, void *data)
  967. {
  968. unsigned int rets[3];
  969. struct eeh_early_enable_info *info = data;
  970. int ret;
  971. const u32 *class_code = of_get_property(dn, "class-code", NULL);
  972. const u32 *vendor_id = of_get_property(dn, "vendor-id", NULL);
  973. const u32 *device_id = of_get_property(dn, "device-id", NULL);
  974. const u32 *regs;
  975. int enable;
  976. struct pci_dn *pdn = PCI_DN(dn);
  977. pdn->class_code = 0;
  978. pdn->eeh_mode = 0;
  979. pdn->eeh_check_count = 0;
  980. pdn->eeh_freeze_count = 0;
  981. pdn->eeh_false_positives = 0;
  982. if (!of_device_is_available(dn))
  983. return NULL;
  984. /* Ignore bad nodes. */
  985. if (!class_code || !vendor_id || !device_id)
  986. return NULL;
  987. /* There is nothing to check on PCI to ISA bridges */
  988. if (dn->type && !strcmp(dn->type, "isa")) {
  989. pdn->eeh_mode |= EEH_MODE_NOCHECK;
  990. return NULL;
  991. }
  992. pdn->class_code = *class_code;
  993. /* Ok... see if this device supports EEH. Some do, some don't,
  994. * and the only way to find out is to check each and every one.
  995. */
  996. regs = of_get_property(dn, "reg", NULL);
  997. if (regs) {
  998. /* First register entry is addr (00BBSS00) */
  999. /* Try to enable eeh */
  1000. ret = rtas_call(ibm_set_eeh_option, 4, 1, NULL,
  1001. regs[0], info->buid_hi, info->buid_lo,
  1002. EEH_ENABLE);
  1003. enable = 0;
  1004. if (ret == 0) {
  1005. pdn->eeh_config_addr = regs[0];
  1006. /* If the newer, better, ibm,get-config-addr-info is supported,
  1007. * then use that instead.
  1008. */
  1009. pdn->eeh_pe_config_addr = eeh_get_pe_addr(pdn->eeh_config_addr, info);
  1010. /* Some older systems (Power4) allow the
  1011. * ibm,set-eeh-option call to succeed even on nodes
  1012. * where EEH is not supported. Verify support
  1013. * explicitly.
  1014. */
  1015. ret = eeh_read_slot_reset_state(pdn, rets);
  1016. if ((ret == 0) && (rets[1] == 1))
  1017. enable = 1;
  1018. }
  1019. if (enable) {
  1020. eeh_subsystem_enabled = 1;
  1021. pdn->eeh_mode |= EEH_MODE_SUPPORTED;
  1022. pr_debug("EEH: %s: eeh enabled, config=%x pe_config=%x\n",
  1023. dn->full_name, pdn->eeh_config_addr,
  1024. pdn->eeh_pe_config_addr);
  1025. } else {
  1026. /* This device doesn't support EEH, but it may have an
  1027. * EEH parent, in which case we mark it as supported.
  1028. */
  1029. if (dn->parent && PCI_DN(dn->parent)
  1030. && (PCI_DN(dn->parent)->eeh_mode & EEH_MODE_SUPPORTED)) {
  1031. /* Parent supports EEH. */
  1032. pdn->eeh_mode |= EEH_MODE_SUPPORTED;
  1033. pdn->eeh_config_addr = PCI_DN(dn->parent)->eeh_config_addr;
  1034. return NULL;
  1035. }
  1036. }
  1037. } else {
  1038. printk(KERN_WARNING "EEH: %s: unable to get reg property.\n",
  1039. dn->full_name);
  1040. }
  1041. eeh_save_bars(pdn);
  1042. return NULL;
  1043. }
  1044. /**
  1045. * eeh_init - EEH initialization
  1046. *
  1047. * Initialize EEH by trying to enable it for all of the adapters in the system.
  1048. * As a side effect we can determine here if eeh is supported at all.
  1049. * Note that we leave EEH on so failed config cycles won't cause a machine
  1050. * check. If a user turns off EEH for a particular adapter they are really
  1051. * telling Linux to ignore errors. Some hardware (e.g. POWER5) won't
  1052. * grant access to a slot if EEH isn't enabled, and so we always enable
  1053. * EEH for all slots/all devices.
  1054. *
  1055. * The eeh-force-off option disables EEH checking globally, for all slots.
  1056. * Even if force-off is set, the EEH hardware is still enabled, so that
  1057. * newer systems can boot.
  1058. */
  1059. void __init eeh_init(void)
  1060. {
  1061. struct device_node *phb, *np;
  1062. struct eeh_early_enable_info info;
  1063. raw_spin_lock_init(&confirm_error_lock);
  1064. spin_lock_init(&slot_errbuf_lock);
  1065. np = of_find_node_by_path("/rtas");
  1066. if (np == NULL)
  1067. return;
  1068. ibm_set_eeh_option = rtas_token("ibm,set-eeh-option");
  1069. ibm_set_slot_reset = rtas_token("ibm,set-slot-reset");
  1070. ibm_read_slot_reset_state2 = rtas_token("ibm,read-slot-reset-state2");
  1071. ibm_read_slot_reset_state = rtas_token("ibm,read-slot-reset-state");
  1072. ibm_slot_error_detail = rtas_token("ibm,slot-error-detail");
  1073. ibm_get_config_addr_info = rtas_token("ibm,get-config-addr-info");
  1074. ibm_get_config_addr_info2 = rtas_token("ibm,get-config-addr-info2");
  1075. ibm_configure_bridge = rtas_token("ibm,configure-bridge");
  1076. ibm_configure_pe = rtas_token("ibm,configure-pe");
  1077. if (ibm_set_eeh_option == RTAS_UNKNOWN_SERVICE)
  1078. return;
  1079. eeh_error_buf_size = rtas_token("rtas-error-log-max");
  1080. if (eeh_error_buf_size == RTAS_UNKNOWN_SERVICE) {
  1081. eeh_error_buf_size = 1024;
  1082. }
  1083. if (eeh_error_buf_size > RTAS_ERROR_LOG_MAX) {
  1084. printk(KERN_WARNING "EEH: rtas-error-log-max is bigger than allocated "
  1085. "buffer ! (%d vs %d)", eeh_error_buf_size, RTAS_ERROR_LOG_MAX);
  1086. eeh_error_buf_size = RTAS_ERROR_LOG_MAX;
  1087. }
  1088. /* Enable EEH for all adapters. Note that eeh requires buid's */
  1089. for (phb = of_find_node_by_name(NULL, "pci"); phb;
  1090. phb = of_find_node_by_name(phb, "pci")) {
  1091. unsigned long buid;
  1092. buid = get_phb_buid(phb);
  1093. if (buid == 0 || PCI_DN(phb) == NULL)
  1094. continue;
  1095. info.buid_lo = BUID_LO(buid);
  1096. info.buid_hi = BUID_HI(buid);
  1097. traverse_pci_devices(phb, eeh_early_enable, &info);
  1098. }
  1099. if (eeh_subsystem_enabled)
  1100. printk(KERN_INFO "EEH: PCI Enhanced I/O Error Handling Enabled\n");
  1101. else
  1102. printk(KERN_WARNING "EEH: No capable adapters found\n");
  1103. }
  1104. /**
  1105. * eeh_add_device_early - Enable EEH for the indicated device_node
  1106. * @dn: device node for which to set up EEH
  1107. *
  1108. * This routine must be used to perform EEH initialization for PCI
  1109. * devices that were added after system boot (e.g. hotplug, dlpar).
  1110. * This routine must be called before any i/o is performed to the
  1111. * adapter (inluding any config-space i/o).
  1112. * Whether this actually enables EEH or not for this device depends
  1113. * on the CEC architecture, type of the device, on earlier boot
  1114. * command-line arguments & etc.
  1115. */
  1116. static void eeh_add_device_early(struct device_node *dn)
  1117. {
  1118. struct pci_controller *phb;
  1119. struct eeh_early_enable_info info;
  1120. if (!dn || !PCI_DN(dn))
  1121. return;
  1122. phb = PCI_DN(dn)->phb;
  1123. /* USB Bus children of PCI devices will not have BUID's */
  1124. if (NULL == phb || 0 == phb->buid)
  1125. return;
  1126. info.buid_hi = BUID_HI(phb->buid);
  1127. info.buid_lo = BUID_LO(phb->buid);
  1128. eeh_early_enable(dn, &info);
  1129. }
  1130. /**
  1131. * eeh_add_device_tree_early - Enable EEH for the indicated device
  1132. * @dn: device node
  1133. *
  1134. * This routine must be used to perform EEH initialization for the
  1135. * indicated PCI device that was added after system boot (e.g.
  1136. * hotplug, dlpar).
  1137. */
  1138. void eeh_add_device_tree_early(struct device_node *dn)
  1139. {
  1140. struct device_node *sib;
  1141. for_each_child_of_node(dn, sib)
  1142. eeh_add_device_tree_early(sib);
  1143. eeh_add_device_early(dn);
  1144. }
  1145. EXPORT_SYMBOL_GPL(eeh_add_device_tree_early);
  1146. /**
  1147. * eeh_add_device_late - Perform EEH initialization for the indicated pci device
  1148. * @dev: pci device for which to set up EEH
  1149. *
  1150. * This routine must be used to complete EEH initialization for PCI
  1151. * devices that were added after system boot (e.g. hotplug, dlpar).
  1152. */
  1153. static void eeh_add_device_late(struct pci_dev *dev)
  1154. {
  1155. struct device_node *dn;
  1156. struct pci_dn *pdn;
  1157. if (!dev || !eeh_subsystem_enabled)
  1158. return;
  1159. pr_debug("EEH: Adding device %s\n", pci_name(dev));
  1160. dn = pci_device_to_OF_node(dev);
  1161. pdn = PCI_DN(dn);
  1162. if (pdn->pcidev == dev) {
  1163. pr_debug("EEH: Already referenced !\n");
  1164. return;
  1165. }
  1166. WARN_ON(pdn->pcidev);
  1167. pci_dev_get(dev);
  1168. pdn->pcidev = dev;
  1169. pci_addr_cache_insert_device(dev);
  1170. eeh_sysfs_add_device(dev);
  1171. }
  1172. /**
  1173. * eeh_add_device_tree_late - Perform EEH initialization for the indicated PCI bus
  1174. * @bus: PCI bus
  1175. *
  1176. * This routine must be used to perform EEH initialization for PCI
  1177. * devices which are attached to the indicated PCI bus. The PCI bus
  1178. * is added after system boot through hotplug or dlpar.
  1179. */
  1180. void eeh_add_device_tree_late(struct pci_bus *bus)
  1181. {
  1182. struct pci_dev *dev;
  1183. list_for_each_entry(dev, &bus->devices, bus_list) {
  1184. eeh_add_device_late(dev);
  1185. if (dev->hdr_type == PCI_HEADER_TYPE_BRIDGE) {
  1186. struct pci_bus *subbus = dev->subordinate;
  1187. if (subbus)
  1188. eeh_add_device_tree_late(subbus);
  1189. }
  1190. }
  1191. }
  1192. EXPORT_SYMBOL_GPL(eeh_add_device_tree_late);
  1193. /**
  1194. * eeh_remove_device - Undo EEH setup for the indicated pci device
  1195. * @dev: pci device to be removed
  1196. *
  1197. * This routine should be called when a device is removed from
  1198. * a running system (e.g. by hotplug or dlpar). It unregisters
  1199. * the PCI device from the EEH subsystem. I/O errors affecting
  1200. * this device will no longer be detected after this call; thus,
  1201. * i/o errors affecting this slot may leave this device unusable.
  1202. */
  1203. static void eeh_remove_device(struct pci_dev *dev)
  1204. {
  1205. struct device_node *dn;
  1206. if (!dev || !eeh_subsystem_enabled)
  1207. return;
  1208. /* Unregister the device with the EEH/PCI address search system */
  1209. pr_debug("EEH: Removing device %s\n", pci_name(dev));
  1210. dn = pci_device_to_OF_node(dev);
  1211. if (PCI_DN(dn)->pcidev == NULL) {
  1212. pr_debug("EEH: Not referenced !\n");
  1213. return;
  1214. }
  1215. PCI_DN(dn)->pcidev = NULL;
  1216. pci_dev_put(dev);
  1217. pci_addr_cache_remove_device(dev);
  1218. eeh_sysfs_remove_device(dev);
  1219. }
  1220. /**
  1221. * eeh_remove_bus_device - Undo EEH setup for the indicated PCI device
  1222. * @dev: PCI device
  1223. *
  1224. * This routine must be called when a device is removed from the
  1225. * running system through hotplug or dlpar. The corresponding
  1226. * PCI address cache will be removed.
  1227. */
  1228. void eeh_remove_bus_device(struct pci_dev *dev)
  1229. {
  1230. struct pci_bus *bus = dev->subordinate;
  1231. struct pci_dev *child, *tmp;
  1232. eeh_remove_device(dev);
  1233. if (bus && dev->hdr_type == PCI_HEADER_TYPE_BRIDGE) {
  1234. list_for_each_entry_safe(child, tmp, &bus->devices, bus_list)
  1235. eeh_remove_bus_device(child);
  1236. }
  1237. }
  1238. EXPORT_SYMBOL_GPL(eeh_remove_bus_device);
  1239. static int proc_eeh_show(struct seq_file *m, void *v)
  1240. {
  1241. if (0 == eeh_subsystem_enabled) {
  1242. seq_printf(m, "EEH Subsystem is globally disabled\n");
  1243. seq_printf(m, "eeh_total_mmio_ffs=%ld\n", total_mmio_ffs);
  1244. } else {
  1245. seq_printf(m, "EEH Subsystem is enabled\n");
  1246. seq_printf(m,
  1247. "no device=%ld\n"
  1248. "no device node=%ld\n"
  1249. "no config address=%ld\n"
  1250. "check not wanted=%ld\n"
  1251. "eeh_total_mmio_ffs=%ld\n"
  1252. "eeh_false_positives=%ld\n"
  1253. "eeh_slot_resets=%ld\n",
  1254. no_device, no_dn, no_cfg_addr,
  1255. ignored_check, total_mmio_ffs,
  1256. false_positives,
  1257. slot_resets);
  1258. }
  1259. return 0;
  1260. }
  1261. static int proc_eeh_open(struct inode *inode, struct file *file)
  1262. {
  1263. return single_open(file, proc_eeh_show, NULL);
  1264. }
  1265. static const struct file_operations proc_eeh_operations = {
  1266. .open = proc_eeh_open,
  1267. .read = seq_read,
  1268. .llseek = seq_lseek,
  1269. .release = single_release,
  1270. };
  1271. static int __init eeh_init_proc(void)
  1272. {
  1273. if (machine_is(pseries))
  1274. proc_create("powerpc/eeh", 0, NULL, &proc_eeh_operations);
  1275. return 0;
  1276. }
  1277. __initcall(eeh_init_proc);