eeh.c 43 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531
  1. /*
  2. * Copyright IBM Corporation 2001, 2005, 2006
  3. * Copyright Dave Engebretsen & Todd Inglett 2001
  4. * Copyright Linas Vepstas 2005, 2006
  5. * Copyright 2001-2012 IBM Corporation.
  6. *
  7. * This program is free software; you can redistribute it and/or modify
  8. * it under the terms of the GNU General Public License as published by
  9. * the Free Software Foundation; either version 2 of the License, or
  10. * (at your option) any later version.
  11. *
  12. * This program is distributed in the hope that it will be useful,
  13. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  14. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  15. * GNU General Public License for more details.
  16. *
  17. * You should have received a copy of the GNU General Public License
  18. * along with this program; if not, write to the Free Software
  19. * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
  20. *
  21. * Please address comments and feedback to Linas Vepstas <linas@austin.ibm.com>
  22. */
  23. #include <linux/delay.h>
  24. #include <linux/sched.h>
  25. #include <linux/init.h>
  26. #include <linux/list.h>
  27. #include <linux/pci.h>
  28. #include <linux/proc_fs.h>
  29. #include <linux/rbtree.h>
  30. #include <linux/seq_file.h>
  31. #include <linux/spinlock.h>
  32. #include <linux/export.h>
  33. #include <linux/of.h>
  34. #include <linux/atomic.h>
  35. #include <asm/eeh.h>
  36. #include <asm/eeh_event.h>
  37. #include <asm/io.h>
  38. #include <asm/machdep.h>
  39. #include <asm/ppc-pci.h>
  40. #include <asm/rtas.h>
  41. /** Overview:
  42. * EEH, or "Extended Error Handling" is a PCI bridge technology for
  43. * dealing with PCI bus errors that can't be dealt with within the
  44. * usual PCI framework, except by check-stopping the CPU. Systems
  45. * that are designed for high-availability/reliability cannot afford
  46. * to crash due to a "mere" PCI error, thus the need for EEH.
  47. * An EEH-capable bridge operates by converting a detected error
  48. * into a "slot freeze", taking the PCI adapter off-line, making
  49. * the slot behave, from the OS'es point of view, as if the slot
  50. * were "empty": all reads return 0xff's and all writes are silently
  51. * ignored. EEH slot isolation events can be triggered by parity
  52. * errors on the address or data busses (e.g. during posted writes),
  53. * which in turn might be caused by low voltage on the bus, dust,
  54. * vibration, humidity, radioactivity or plain-old failed hardware.
  55. *
  56. * Note, however, that one of the leading causes of EEH slot
  57. * freeze events are buggy device drivers, buggy device microcode,
  58. * or buggy device hardware. This is because any attempt by the
  59. * device to bus-master data to a memory address that is not
  60. * assigned to the device will trigger a slot freeze. (The idea
  61. * is to prevent devices-gone-wild from corrupting system memory).
  62. * Buggy hardware/drivers will have a miserable time co-existing
  63. * with EEH.
  64. *
  65. * Ideally, a PCI device driver, when suspecting that an isolation
  66. * event has occurred (e.g. by reading 0xff's), will then ask EEH
  67. * whether this is the case, and then take appropriate steps to
  68. * reset the PCI slot, the PCI device, and then resume operations.
  69. * However, until that day, the checking is done here, with the
  70. * eeh_check_failure() routine embedded in the MMIO macros. If
  71. * the slot is found to be isolated, an "EEH Event" is synthesized
  72. * and sent out for processing.
  73. */
  74. /* If a device driver keeps reading an MMIO register in an interrupt
  75. * handler after a slot isolation event, it might be broken.
  76. * This sets the threshold for how many read attempts we allow
  77. * before printing an error message.
  78. */
  79. #define EEH_MAX_FAILS 2100000
  80. /* Time to wait for a PCI slot to report status, in milliseconds */
  81. #define PCI_BUS_RESET_WAIT_MSEC (60*1000)
  82. /* RTAS tokens */
  83. static int ibm_set_eeh_option;
  84. static int ibm_set_slot_reset;
  85. static int ibm_read_slot_reset_state;
  86. static int ibm_read_slot_reset_state2;
  87. static int ibm_slot_error_detail;
  88. static int ibm_get_config_addr_info;
  89. static int ibm_get_config_addr_info2;
  90. static int ibm_configure_bridge;
  91. static int ibm_configure_pe;
  92. /* Platform dependent EEH operations */
  93. struct eeh_ops *eeh_ops = NULL;
  94. int eeh_subsystem_enabled;
  95. EXPORT_SYMBOL(eeh_subsystem_enabled);
  96. /* Lock to avoid races due to multiple reports of an error */
  97. static DEFINE_RAW_SPINLOCK(confirm_error_lock);
  98. /* Buffer for reporting slot-error-detail rtas calls. Its here
  99. * in BSS, and not dynamically alloced, so that it ends up in
  100. * RMO where RTAS can access it.
  101. */
  102. static unsigned char slot_errbuf[RTAS_ERROR_LOG_MAX];
  103. static DEFINE_SPINLOCK(slot_errbuf_lock);
  104. static int eeh_error_buf_size;
  105. /* Buffer for reporting pci register dumps. Its here in BSS, and
  106. * not dynamically alloced, so that it ends up in RMO where RTAS
  107. * can access it.
  108. */
  109. #define EEH_PCI_REGS_LOG_LEN 4096
  110. static unsigned char pci_regs_buf[EEH_PCI_REGS_LOG_LEN];
  111. /* System monitoring statistics */
  112. static unsigned long no_device;
  113. static unsigned long no_dn;
  114. static unsigned long no_cfg_addr;
  115. static unsigned long ignored_check;
  116. static unsigned long total_mmio_ffs;
  117. static unsigned long false_positives;
  118. static unsigned long slot_resets;
  119. #define IS_BRIDGE(class_code) (((class_code)<<16) == PCI_BASE_CLASS_BRIDGE)
  120. /**
  121. * eeh_rtas_slot_error_detail - Retrieve error log through RTAS call
  122. * @pdn: device node
  123. * @severity: temporary or permanent error log
  124. * @driver_log: driver log to be combined with the retrieved error log
  125. * @loglen: length of driver log
  126. *
  127. * This routine should be called to retrieve error log through the dedicated
  128. * RTAS call.
  129. */
  130. static void eeh_rtas_slot_error_detail(struct pci_dn *pdn, int severity,
  131. char *driver_log, size_t loglen)
  132. {
  133. int config_addr;
  134. unsigned long flags;
  135. int rc;
  136. /* Log the error with the rtas logger */
  137. spin_lock_irqsave(&slot_errbuf_lock, flags);
  138. memset(slot_errbuf, 0, eeh_error_buf_size);
  139. /* Use PE configuration address, if present */
  140. config_addr = pdn->eeh_config_addr;
  141. if (pdn->eeh_pe_config_addr)
  142. config_addr = pdn->eeh_pe_config_addr;
  143. rc = rtas_call(ibm_slot_error_detail,
  144. 8, 1, NULL, config_addr,
  145. BUID_HI(pdn->phb->buid),
  146. BUID_LO(pdn->phb->buid),
  147. virt_to_phys(driver_log), loglen,
  148. virt_to_phys(slot_errbuf),
  149. eeh_error_buf_size,
  150. severity);
  151. if (rc == 0)
  152. log_error(slot_errbuf, ERR_TYPE_RTAS_LOG, 0);
  153. spin_unlock_irqrestore(&slot_errbuf_lock, flags);
  154. }
  155. /**
  156. * eeh_gather_pci_data - Copy assorted PCI config space registers to buff
  157. * @pdn: device to report data for
  158. * @buf: point to buffer in which to log
  159. * @len: amount of room in buffer
  160. *
  161. * This routine captures assorted PCI configuration space data,
  162. * and puts them into a buffer for RTAS error logging.
  163. */
  164. static size_t eeh_gather_pci_data(struct pci_dn *pdn, char * buf, size_t len)
  165. {
  166. struct pci_dev *dev = pdn->pcidev;
  167. u32 cfg;
  168. int cap, i;
  169. int n = 0;
  170. n += scnprintf(buf+n, len-n, "%s\n", pdn->node->full_name);
  171. printk(KERN_WARNING "EEH: of node=%s\n", pdn->node->full_name);
  172. rtas_read_config(pdn, PCI_VENDOR_ID, 4, &cfg);
  173. n += scnprintf(buf+n, len-n, "dev/vend:%08x\n", cfg);
  174. printk(KERN_WARNING "EEH: PCI device/vendor: %08x\n", cfg);
  175. rtas_read_config(pdn, PCI_COMMAND, 4, &cfg);
  176. n += scnprintf(buf+n, len-n, "cmd/stat:%x\n", cfg);
  177. printk(KERN_WARNING "EEH: PCI cmd/status register: %08x\n", cfg);
  178. if (!dev) {
  179. printk(KERN_WARNING "EEH: no PCI device for this of node\n");
  180. return n;
  181. }
  182. /* Gather bridge-specific registers */
  183. if (dev->class >> 16 == PCI_BASE_CLASS_BRIDGE) {
  184. rtas_read_config(pdn, PCI_SEC_STATUS, 2, &cfg);
  185. n += scnprintf(buf+n, len-n, "sec stat:%x\n", cfg);
  186. printk(KERN_WARNING "EEH: Bridge secondary status: %04x\n", cfg);
  187. rtas_read_config(pdn, PCI_BRIDGE_CONTROL, 2, &cfg);
  188. n += scnprintf(buf+n, len-n, "brdg ctl:%x\n", cfg);
  189. printk(KERN_WARNING "EEH: Bridge control: %04x\n", cfg);
  190. }
  191. /* Dump out the PCI-X command and status regs */
  192. cap = pci_find_capability(dev, PCI_CAP_ID_PCIX);
  193. if (cap) {
  194. rtas_read_config(pdn, cap, 4, &cfg);
  195. n += scnprintf(buf+n, len-n, "pcix-cmd:%x\n", cfg);
  196. printk(KERN_WARNING "EEH: PCI-X cmd: %08x\n", cfg);
  197. rtas_read_config(pdn, cap+4, 4, &cfg);
  198. n += scnprintf(buf+n, len-n, "pcix-stat:%x\n", cfg);
  199. printk(KERN_WARNING "EEH: PCI-X status: %08x\n", cfg);
  200. }
  201. /* If PCI-E capable, dump PCI-E cap 10, and the AER */
  202. cap = pci_find_capability(dev, PCI_CAP_ID_EXP);
  203. if (cap) {
  204. n += scnprintf(buf+n, len-n, "pci-e cap10:\n");
  205. printk(KERN_WARNING
  206. "EEH: PCI-E capabilities and status follow:\n");
  207. for (i=0; i<=8; i++) {
  208. rtas_read_config(pdn, cap+4*i, 4, &cfg);
  209. n += scnprintf(buf+n, len-n, "%02x:%x\n", 4*i, cfg);
  210. printk(KERN_WARNING "EEH: PCI-E %02x: %08x\n", i, cfg);
  211. }
  212. cap = pci_find_ext_capability(dev, PCI_EXT_CAP_ID_ERR);
  213. if (cap) {
  214. n += scnprintf(buf+n, len-n, "pci-e AER:\n");
  215. printk(KERN_WARNING
  216. "EEH: PCI-E AER capability register set follows:\n");
  217. for (i=0; i<14; i++) {
  218. rtas_read_config(pdn, cap+4*i, 4, &cfg);
  219. n += scnprintf(buf+n, len-n, "%02x:%x\n", 4*i, cfg);
  220. printk(KERN_WARNING "EEH: PCI-E AER %02x: %08x\n", i, cfg);
  221. }
  222. }
  223. }
  224. /* Gather status on devices under the bridge */
  225. if (dev->class >> 16 == PCI_BASE_CLASS_BRIDGE) {
  226. struct device_node *dn;
  227. for_each_child_of_node(pdn->node, dn) {
  228. pdn = PCI_DN(dn);
  229. if (pdn)
  230. n += eeh_gather_pci_data(pdn, buf+n, len-n);
  231. }
  232. }
  233. return n;
  234. }
  235. /**
  236. * eeh_slot_error_detail - Generate combined log including driver log and error log
  237. * @pdn: device node
  238. * @severity: temporary or permanent error log
  239. *
  240. * This routine should be called to generate the combined log, which
  241. * is comprised of driver log and error log. The driver log is figured
  242. * out from the config space of the corresponding PCI device, while
  243. * the error log is fetched through platform dependent function call.
  244. */
  245. void eeh_slot_error_detail(struct pci_dn *pdn, int severity)
  246. {
  247. size_t loglen = 0;
  248. pci_regs_buf[0] = 0;
  249. eeh_pci_enable(pdn, EEH_THAW_MMIO);
  250. eeh_configure_bridge(pdn);
  251. eeh_restore_bars(pdn);
  252. loglen = eeh_gather_pci_data(pdn, pci_regs_buf, EEH_PCI_REGS_LOG_LEN);
  253. eeh_rtas_slot_error_detail(pdn, severity, pci_regs_buf, loglen);
  254. }
  255. /**
  256. * eeh_read_slot_reset_state - Read the reset state of a device node's slot
  257. * @dn: device node to read
  258. * @rets: array to return results in
  259. *
  260. * Read the reset state of a device node's slot through platform dependent
  261. * function call.
  262. */
  263. static int eeh_read_slot_reset_state(struct pci_dn *pdn, int rets[])
  264. {
  265. int token, outputs;
  266. int config_addr;
  267. if (ibm_read_slot_reset_state2 != RTAS_UNKNOWN_SERVICE) {
  268. token = ibm_read_slot_reset_state2;
  269. outputs = 4;
  270. } else {
  271. token = ibm_read_slot_reset_state;
  272. rets[2] = 0; /* fake PE Unavailable info */
  273. outputs = 3;
  274. }
  275. /* Use PE configuration address, if present */
  276. config_addr = pdn->eeh_config_addr;
  277. if (pdn->eeh_pe_config_addr)
  278. config_addr = pdn->eeh_pe_config_addr;
  279. return rtas_call(token, 3, outputs, rets, config_addr,
  280. BUID_HI(pdn->phb->buid), BUID_LO(pdn->phb->buid));
  281. }
  282. /**
  283. * eeh_wait_for_slot_status - Returns error status of slot
  284. * @pdn: pci device node
  285. * @max_wait_msecs: maximum number to millisecs to wait
  286. *
  287. * Return negative value if a permanent error, else return
  288. * Partition Endpoint (PE) status value.
  289. *
  290. * If @max_wait_msecs is positive, then this routine will
  291. * sleep until a valid status can be obtained, or until
  292. * the max allowed wait time is exceeded, in which case
  293. * a -2 is returned.
  294. */
  295. int eeh_wait_for_slot_status(struct pci_dn *pdn, int max_wait_msecs)
  296. {
  297. int rc;
  298. int rets[3];
  299. int mwait;
  300. while (1) {
  301. rc = eeh_read_slot_reset_state(pdn, rets);
  302. if (rc) return rc;
  303. if (rets[1] == 0) return -1; /* EEH is not supported */
  304. if (rets[0] != 5) return rets[0]; /* return actual status */
  305. if (rets[2] == 0) return -1; /* permanently unavailable */
  306. if (max_wait_msecs <= 0) break;
  307. mwait = rets[2];
  308. if (mwait <= 0) {
  309. printk(KERN_WARNING "EEH: Firmware returned bad wait value=%d\n",
  310. mwait);
  311. mwait = 1000;
  312. } else if (mwait > 300*1000) {
  313. printk(KERN_WARNING "EEH: Firmware is taking too long, time=%d\n",
  314. mwait);
  315. mwait = 300*1000;
  316. }
  317. max_wait_msecs -= mwait;
  318. msleep(mwait);
  319. }
  320. printk(KERN_WARNING "EEH: Timed out waiting for slot status\n");
  321. return -2;
  322. }
  323. /**
  324. * eeh_token_to_phys - Convert EEH address token to phys address
  325. * @token: I/O token, should be address in the form 0xA....
  326. *
  327. * This routine should be called to convert virtual I/O address
  328. * to physical one.
  329. */
  330. static inline unsigned long eeh_token_to_phys(unsigned long token)
  331. {
  332. pte_t *ptep;
  333. unsigned long pa;
  334. ptep = find_linux_pte(init_mm.pgd, token);
  335. if (!ptep)
  336. return token;
  337. pa = pte_pfn(*ptep) << PAGE_SHIFT;
  338. return pa | (token & (PAGE_SIZE-1));
  339. }
  340. /**
  341. * eeh_find_device_pe - Retrieve the PE for the given device
  342. * @dn: device node
  343. *
  344. * Return the PE under which this device lies
  345. */
  346. struct device_node *eeh_find_device_pe(struct device_node *dn)
  347. {
  348. while ((dn->parent) && PCI_DN(dn->parent) &&
  349. (PCI_DN(dn->parent)->eeh_mode & EEH_MODE_SUPPORTED)) {
  350. dn = dn->parent;
  351. }
  352. return dn;
  353. }
  354. /**
  355. * __eeh_mark_slot - Mark all child devices as failed
  356. * @parent: parent device
  357. * @mode_flag: failure flag
  358. *
  359. * Mark all devices that are children of this device as failed.
  360. * Mark the device driver too, so that it can see the failure
  361. * immediately; this is critical, since some drivers poll
  362. * status registers in interrupts ... If a driver is polling,
  363. * and the slot is frozen, then the driver can deadlock in
  364. * an interrupt context, which is bad.
  365. */
  366. static void __eeh_mark_slot(struct device_node *parent, int mode_flag)
  367. {
  368. struct device_node *dn;
  369. for_each_child_of_node(parent, dn) {
  370. if (PCI_DN(dn)) {
  371. /* Mark the pci device driver too */
  372. struct pci_dev *dev = PCI_DN(dn)->pcidev;
  373. PCI_DN(dn)->eeh_mode |= mode_flag;
  374. if (dev && dev->driver)
  375. dev->error_state = pci_channel_io_frozen;
  376. __eeh_mark_slot(dn, mode_flag);
  377. }
  378. }
  379. }
  380. /**
  381. * eeh_mark_slot - Mark the indicated device and its children as failed
  382. * @dn: parent device
  383. * @mode_flag: failure flag
  384. *
  385. * Mark the indicated device and its child devices as failed.
  386. * The device drivers are marked as failed as well.
  387. */
  388. void eeh_mark_slot(struct device_node *dn, int mode_flag)
  389. {
  390. struct pci_dev *dev;
  391. dn = eeh_find_device_pe(dn);
  392. /* Back up one, since config addrs might be shared */
  393. if (!pcibios_find_pci_bus(dn) && PCI_DN(dn->parent))
  394. dn = dn->parent;
  395. PCI_DN(dn)->eeh_mode |= mode_flag;
  396. /* Mark the pci device too */
  397. dev = PCI_DN(dn)->pcidev;
  398. if (dev)
  399. dev->error_state = pci_channel_io_frozen;
  400. __eeh_mark_slot(dn, mode_flag);
  401. }
  402. /**
  403. * __eeh_clear_slot - Clear failure flag for the child devices
  404. * @parent: parent device
  405. * @mode_flag: flag to be cleared
  406. *
  407. * Clear failure flag for the child devices.
  408. */
  409. static void __eeh_clear_slot(struct device_node *parent, int mode_flag)
  410. {
  411. struct device_node *dn;
  412. for_each_child_of_node(parent, dn) {
  413. if (PCI_DN(dn)) {
  414. PCI_DN(dn)->eeh_mode &= ~mode_flag;
  415. PCI_DN(dn)->eeh_check_count = 0;
  416. __eeh_clear_slot(dn, mode_flag);
  417. }
  418. }
  419. }
  420. /**
  421. * eeh_clear_slot - Clear failure flag for the indicated device and its children
  422. * @dn: parent device
  423. * @mode_flag: flag to be cleared
  424. *
  425. * Clear failure flag for the indicated device and its children.
  426. */
  427. void eeh_clear_slot(struct device_node *dn, int mode_flag)
  428. {
  429. unsigned long flags;
  430. raw_spin_lock_irqsave(&confirm_error_lock, flags);
  431. dn = eeh_find_device_pe(dn);
  432. /* Back up one, since config addrs might be shared */
  433. if (!pcibios_find_pci_bus(dn) && PCI_DN(dn->parent))
  434. dn = dn->parent;
  435. PCI_DN(dn)->eeh_mode &= ~mode_flag;
  436. PCI_DN(dn)->eeh_check_count = 0;
  437. __eeh_clear_slot(dn, mode_flag);
  438. raw_spin_unlock_irqrestore(&confirm_error_lock, flags);
  439. }
  440. /**
  441. * eeh_dn_check_failure - Check if all 1's data is due to EEH slot freeze
  442. * @dn: device node
  443. * @dev: pci device, if known
  444. *
  445. * Check for an EEH failure for the given device node. Call this
  446. * routine if the result of a read was all 0xff's and you want to
  447. * find out if this is due to an EEH slot freeze. This routine
  448. * will query firmware for the EEH status.
  449. *
  450. * Returns 0 if there has not been an EEH error; otherwise returns
  451. * a non-zero value and queues up a slot isolation event notification.
  452. *
  453. * It is safe to call this routine in an interrupt context.
  454. */
  455. int eeh_dn_check_failure(struct device_node *dn, struct pci_dev *dev)
  456. {
  457. int ret;
  458. int rets[3];
  459. unsigned long flags;
  460. struct pci_dn *pdn;
  461. int rc = 0;
  462. const char *location;
  463. total_mmio_ffs++;
  464. if (!eeh_subsystem_enabled)
  465. return 0;
  466. if (!dn) {
  467. no_dn++;
  468. return 0;
  469. }
  470. dn = eeh_find_device_pe(dn);
  471. pdn = PCI_DN(dn);
  472. /* Access to IO BARs might get this far and still not want checking. */
  473. if (!(pdn->eeh_mode & EEH_MODE_SUPPORTED) ||
  474. pdn->eeh_mode & EEH_MODE_NOCHECK) {
  475. ignored_check++;
  476. pr_debug("EEH: Ignored check (%x) for %s %s\n",
  477. pdn->eeh_mode, eeh_pci_name(dev), dn->full_name);
  478. return 0;
  479. }
  480. if (!pdn->eeh_config_addr && !pdn->eeh_pe_config_addr) {
  481. no_cfg_addr++;
  482. return 0;
  483. }
  484. /* If we already have a pending isolation event for this
  485. * slot, we know it's bad already, we don't need to check.
  486. * Do this checking under a lock; as multiple PCI devices
  487. * in one slot might report errors simultaneously, and we
  488. * only want one error recovery routine running.
  489. */
  490. raw_spin_lock_irqsave(&confirm_error_lock, flags);
  491. rc = 1;
  492. if (pdn->eeh_mode & EEH_MODE_ISOLATED) {
  493. pdn->eeh_check_count ++;
  494. if (pdn->eeh_check_count % EEH_MAX_FAILS == 0) {
  495. location = of_get_property(dn, "ibm,loc-code", NULL);
  496. printk(KERN_ERR "EEH: %d reads ignored for recovering device at "
  497. "location=%s driver=%s pci addr=%s\n",
  498. pdn->eeh_check_count, location,
  499. eeh_driver_name(dev), eeh_pci_name(dev));
  500. printk(KERN_ERR "EEH: Might be infinite loop in %s driver\n",
  501. eeh_driver_name(dev));
  502. dump_stack();
  503. }
  504. goto dn_unlock;
  505. }
  506. /*
  507. * Now test for an EEH failure. This is VERY expensive.
  508. * Note that the eeh_config_addr may be a parent device
  509. * in the case of a device behind a bridge, or it may be
  510. * function zero of a multi-function device.
  511. * In any case they must share a common PHB.
  512. */
  513. ret = eeh_read_slot_reset_state(pdn, rets);
  514. /* If the call to firmware failed, punt */
  515. if (ret != 0) {
  516. printk(KERN_WARNING "EEH: eeh_read_slot_reset_state() failed; rc=%d dn=%s\n",
  517. ret, dn->full_name);
  518. false_positives++;
  519. pdn->eeh_false_positives ++;
  520. rc = 0;
  521. goto dn_unlock;
  522. }
  523. /* Note that config-io to empty slots may fail;
  524. * they are empty when they don't have children.
  525. */
  526. if ((rets[0] == 5) && (rets[2] == 0) && (dn->child == NULL)) {
  527. false_positives++;
  528. pdn->eeh_false_positives ++;
  529. rc = 0;
  530. goto dn_unlock;
  531. }
  532. /* If EEH is not supported on this device, punt. */
  533. if (rets[1] != 1) {
  534. printk(KERN_WARNING "EEH: event on unsupported device, rc=%d dn=%s\n",
  535. ret, dn->full_name);
  536. false_positives++;
  537. pdn->eeh_false_positives ++;
  538. rc = 0;
  539. goto dn_unlock;
  540. }
  541. /* If not the kind of error we know about, punt. */
  542. if (rets[0] != 1 && rets[0] != 2 && rets[0] != 4 && rets[0] != 5) {
  543. false_positives++;
  544. pdn->eeh_false_positives ++;
  545. rc = 0;
  546. goto dn_unlock;
  547. }
  548. slot_resets++;
  549. /* Avoid repeated reports of this failure, including problems
  550. * with other functions on this device, and functions under
  551. * bridges.
  552. */
  553. eeh_mark_slot(dn, EEH_MODE_ISOLATED);
  554. raw_spin_unlock_irqrestore(&confirm_error_lock, flags);
  555. eeh_send_failure_event(dn, dev);
  556. /* Most EEH events are due to device driver bugs. Having
  557. * a stack trace will help the device-driver authors figure
  558. * out what happened. So print that out.
  559. */
  560. dump_stack();
  561. return 1;
  562. dn_unlock:
  563. raw_spin_unlock_irqrestore(&confirm_error_lock, flags);
  564. return rc;
  565. }
  566. EXPORT_SYMBOL_GPL(eeh_dn_check_failure);
  567. /**
  568. * eeh_check_failure - Check if all 1's data is due to EEH slot freeze
  569. * @token: I/O token, should be address in the form 0xA....
  570. * @val: value, should be all 1's (XXX why do we need this arg??)
  571. *
  572. * Check for an EEH failure at the given token address. Call this
  573. * routine if the result of a read was all 0xff's and you want to
  574. * find out if this is due to an EEH slot freeze event. This routine
  575. * will query firmware for the EEH status.
  576. *
  577. * Note this routine is safe to call in an interrupt context.
  578. */
  579. unsigned long eeh_check_failure(const volatile void __iomem *token, unsigned long val)
  580. {
  581. unsigned long addr;
  582. struct pci_dev *dev;
  583. struct device_node *dn;
  584. /* Finding the phys addr + pci device; this is pretty quick. */
  585. addr = eeh_token_to_phys((unsigned long __force) token);
  586. dev = pci_get_device_by_addr(addr);
  587. if (!dev) {
  588. no_device++;
  589. return val;
  590. }
  591. dn = pci_device_to_OF_node(dev);
  592. eeh_dn_check_failure(dn, dev);
  593. pci_dev_put(dev);
  594. return val;
  595. }
  596. EXPORT_SYMBOL(eeh_check_failure);
  597. /**
  598. * eeh_pci_enable - Enable MMIO or DMA transfers for this slot
  599. * @pdn pci device node
  600. *
  601. * This routine should be called to reenable frozen MMIO or DMA
  602. * so that it would work correctly again. It's useful while doing
  603. * recovery or log collection on the indicated device.
  604. */
  605. int eeh_pci_enable(struct pci_dn *pdn, int function)
  606. {
  607. int config_addr;
  608. int rc;
  609. /* Use PE configuration address, if present */
  610. config_addr = pdn->eeh_config_addr;
  611. if (pdn->eeh_pe_config_addr)
  612. config_addr = pdn->eeh_pe_config_addr;
  613. rc = rtas_call(ibm_set_eeh_option, 4, 1, NULL,
  614. config_addr,
  615. BUID_HI(pdn->phb->buid),
  616. BUID_LO(pdn->phb->buid),
  617. function);
  618. if (rc)
  619. printk(KERN_WARNING "EEH: Unexpected state change %d, err=%d dn=%s\n",
  620. function, rc, pdn->node->full_name);
  621. rc = eeh_wait_for_slot_status(pdn, PCI_BUS_RESET_WAIT_MSEC);
  622. if ((rc == 4) && (function == EEH_THAW_MMIO))
  623. return 0;
  624. return rc;
  625. }
  626. /**
  627. * eeh_slot_reset - Raises/Lowers the pci #RST line
  628. * @pdn: pci device node
  629. * @state: 1/0 to raise/lower the #RST
  630. *
  631. * Clear the EEH-frozen condition on a slot. This routine
  632. * asserts the PCI #RST line if the 'state' argument is '1',
  633. * and drops the #RST line if 'state is '0'. This routine is
  634. * safe to call in an interrupt context.
  635. */
  636. static void eeh_slot_reset(struct pci_dn *pdn, int state)
  637. {
  638. int config_addr;
  639. int rc;
  640. BUG_ON(pdn==NULL);
  641. if (!pdn->phb) {
  642. printk(KERN_WARNING "EEH: in slot reset, device node %s has no phb\n",
  643. pdn->node->full_name);
  644. return;
  645. }
  646. /* Use PE configuration address, if present */
  647. config_addr = pdn->eeh_config_addr;
  648. if (pdn->eeh_pe_config_addr)
  649. config_addr = pdn->eeh_pe_config_addr;
  650. rc = rtas_call(ibm_set_slot_reset, 4, 1, NULL,
  651. config_addr,
  652. BUID_HI(pdn->phb->buid),
  653. BUID_LO(pdn->phb->buid),
  654. state);
  655. /* Fundamental-reset not supported on this PE, try hot-reset */
  656. if (rc == -8 && state == 3) {
  657. rc = rtas_call(ibm_set_slot_reset, 4, 1, NULL,
  658. config_addr,
  659. BUID_HI(pdn->phb->buid),
  660. BUID_LO(pdn->phb->buid), 1);
  661. if (rc)
  662. printk(KERN_WARNING
  663. "EEH: Unable to reset the failed slot,"
  664. " #RST=%d dn=%s\n",
  665. rc, pdn->node->full_name);
  666. }
  667. }
  668. /**
  669. * pcibios_set_pcie_slot_reset - Set PCI-E reset state
  670. * @dev: pci device struct
  671. * @state: reset state to enter
  672. *
  673. * Return value:
  674. * 0 if success
  675. */
  676. int pcibios_set_pcie_reset_state(struct pci_dev *dev, enum pcie_reset_state state)
  677. {
  678. struct device_node *dn = pci_device_to_OF_node(dev);
  679. struct pci_dn *pdn = PCI_DN(dn);
  680. switch (state) {
  681. case pcie_deassert_reset:
  682. eeh_slot_reset(pdn, 0);
  683. break;
  684. case pcie_hot_reset:
  685. eeh_slot_reset(pdn, 1);
  686. break;
  687. case pcie_warm_reset:
  688. eeh_slot_reset(pdn, 3);
  689. break;
  690. default:
  691. return -EINVAL;
  692. };
  693. return 0;
  694. }
  695. /**
  696. * __eeh_set_pe_freset - Check the required reset for child devices
  697. * @parent: parent device
  698. * @freset: return value
  699. *
  700. * Each device might have its preferred reset type: fundamental or
  701. * hot reset. The routine is used to collect the information from
  702. * the child devices so that they could be reset accordingly.
  703. */
  704. void __eeh_set_pe_freset(struct device_node *parent, unsigned int *freset)
  705. {
  706. struct device_node *dn;
  707. for_each_child_of_node(parent, dn) {
  708. if (PCI_DN(dn)) {
  709. struct pci_dev *dev = PCI_DN(dn)->pcidev;
  710. if (dev && dev->driver)
  711. *freset |= dev->needs_freset;
  712. __eeh_set_pe_freset(dn, freset);
  713. }
  714. }
  715. }
  716. /**
  717. * eeh_set_pe_freset - Check the required reset for the indicated device and its children
  718. * @dn: parent device
  719. * @freset: return value
  720. *
  721. * Each device might have its preferred reset type: fundamental or
  722. * hot reset. The routine is used to collected the information for
  723. * the indicated device and its children so that the bunch of the
  724. * devices could be reset properly.
  725. */
  726. void eeh_set_pe_freset(struct device_node *dn, unsigned int *freset)
  727. {
  728. struct pci_dev *dev;
  729. dn = eeh_find_device_pe(dn);
  730. /* Back up one, since config addrs might be shared */
  731. if (!pcibios_find_pci_bus(dn) && PCI_DN(dn->parent))
  732. dn = dn->parent;
  733. dev = PCI_DN(dn)->pcidev;
  734. if (dev)
  735. *freset |= dev->needs_freset;
  736. __eeh_set_pe_freset(dn, freset);
  737. }
  738. /**
  739. * eeh_reset_pe_once - Assert the pci #RST line for 1/4 second
  740. * @pdn: pci device node to be reset.
  741. *
  742. * Assert the PCI #RST line for 1/4 second.
  743. */
  744. static void eeh_reset_pe_once(struct pci_dn *pdn)
  745. {
  746. unsigned int freset = 0;
  747. /* Determine type of EEH reset required for
  748. * Partitionable Endpoint, a hot-reset (1)
  749. * or a fundamental reset (3).
  750. * A fundamental reset required by any device under
  751. * Partitionable Endpoint trumps hot-reset.
  752. */
  753. eeh_set_pe_freset(pdn->node, &freset);
  754. if (freset)
  755. eeh_slot_reset(pdn, 3);
  756. else
  757. eeh_slot_reset(pdn, 1);
  758. /* The PCI bus requires that the reset be held high for at least
  759. * a 100 milliseconds. We wait a bit longer 'just in case'.
  760. */
  761. #define PCI_BUS_RST_HOLD_TIME_MSEC 250
  762. msleep(PCI_BUS_RST_HOLD_TIME_MSEC);
  763. /* We might get hit with another EEH freeze as soon as the
  764. * pci slot reset line is dropped. Make sure we don't miss
  765. * these, and clear the flag now.
  766. */
  767. eeh_clear_slot(pdn->node, EEH_MODE_ISOLATED);
  768. eeh_slot_reset(pdn, 0);
  769. /* After a PCI slot has been reset, the PCI Express spec requires
  770. * a 1.5 second idle time for the bus to stabilize, before starting
  771. * up traffic.
  772. */
  773. #define PCI_BUS_SETTLE_TIME_MSEC 1800
  774. msleep(PCI_BUS_SETTLE_TIME_MSEC);
  775. }
  776. /**
  777. * eeh_reset_pe - Reset the indicated PE
  778. * @pdn: PCI device node
  779. *
  780. * This routine should be called to reset indicated device, including
  781. * PE. A PE might include multiple PCI devices and sometimes PCI bridges
  782. * might be involved as well.
  783. */
  784. int eeh_reset_pe(struct pci_dn *pdn)
  785. {
  786. int i, rc;
  787. /* Take three shots at resetting the bus */
  788. for (i=0; i<3; i++) {
  789. eeh_reset_pe_once(pdn);
  790. rc = eeh_wait_for_slot_status(pdn, PCI_BUS_RESET_WAIT_MSEC);
  791. if (rc == 0)
  792. return 0;
  793. if (rc < 0) {
  794. printk(KERN_ERR "EEH: unrecoverable slot failure %s\n",
  795. pdn->node->full_name);
  796. return -1;
  797. }
  798. printk(KERN_ERR "EEH: bus reset %d failed on slot %s, rc=%d\n",
  799. i+1, pdn->node->full_name, rc);
  800. }
  801. return -1;
  802. }
  803. /** Save and restore of PCI BARs
  804. *
  805. * Although firmware will set up BARs during boot, it doesn't
  806. * set up device BAR's after a device reset, although it will,
  807. * if requested, set up bridge configuration. Thus, we need to
  808. * configure the PCI devices ourselves.
  809. */
  810. /**
  811. * eeh_restore_one_device_bars - Restore the Base Address Registers for one device
  812. * @pdn: pci device node
  813. *
  814. * Loads the PCI configuration space base address registers,
  815. * the expansion ROM base address, the latency timer, and etc.
  816. * from the saved values in the device node.
  817. */
  818. static inline void eeh_restore_one_device_bars(struct pci_dn *pdn)
  819. {
  820. int i;
  821. u32 cmd;
  822. if (NULL==pdn->phb) return;
  823. for (i=4; i<10; i++) {
  824. rtas_write_config(pdn, i*4, 4, pdn->config_space[i]);
  825. }
  826. /* 12 == Expansion ROM Address */
  827. rtas_write_config(pdn, 12*4, 4, pdn->config_space[12]);
  828. #define BYTE_SWAP(OFF) (8*((OFF)/4)+3-(OFF))
  829. #define SAVED_BYTE(OFF) (((u8 *)(pdn->config_space))[BYTE_SWAP(OFF)])
  830. rtas_write_config(pdn, PCI_CACHE_LINE_SIZE, 1,
  831. SAVED_BYTE(PCI_CACHE_LINE_SIZE));
  832. rtas_write_config(pdn, PCI_LATENCY_TIMER, 1,
  833. SAVED_BYTE(PCI_LATENCY_TIMER));
  834. /* max latency, min grant, interrupt pin and line */
  835. rtas_write_config(pdn, 15*4, 4, pdn->config_space[15]);
  836. /* Restore PERR & SERR bits, some devices require it,
  837. * don't touch the other command bits
  838. */
  839. rtas_read_config(pdn, PCI_COMMAND, 4, &cmd);
  840. if (pdn->config_space[1] & PCI_COMMAND_PARITY)
  841. cmd |= PCI_COMMAND_PARITY;
  842. else
  843. cmd &= ~PCI_COMMAND_PARITY;
  844. if (pdn->config_space[1] & PCI_COMMAND_SERR)
  845. cmd |= PCI_COMMAND_SERR;
  846. else
  847. cmd &= ~PCI_COMMAND_SERR;
  848. rtas_write_config(pdn, PCI_COMMAND, 4, cmd);
  849. }
  850. /**
  851. * eeh_restore_bars - Restore the PCI config space info
  852. * @pdn: PCI device node
  853. *
  854. * This routine performs a recursive walk to the children
  855. * of this device as well.
  856. */
  857. void eeh_restore_bars(struct pci_dn *pdn)
  858. {
  859. struct device_node *dn;
  860. if (!pdn)
  861. return;
  862. if ((pdn->eeh_mode & EEH_MODE_SUPPORTED) && !IS_BRIDGE(pdn->class_code))
  863. eeh_restore_one_device_bars(pdn);
  864. for_each_child_of_node(pdn->node, dn)
  865. eeh_restore_bars(PCI_DN(dn));
  866. }
  867. /**
  868. * eeh_save_bars - Save device bars
  869. * @pdn: PCI device node
  870. *
  871. * Save the values of the device bars. Unlike the restore
  872. * routine, this routine is *not* recursive. This is because
  873. * PCI devices are added individually; but, for the restore,
  874. * an entire slot is reset at a time.
  875. */
  876. static void eeh_save_bars(struct pci_dn *pdn)
  877. {
  878. int i;
  879. if (!pdn )
  880. return;
  881. for (i = 0; i < 16; i++)
  882. rtas_read_config(pdn, i * 4, 4, &pdn->config_space[i]);
  883. }
  884. /**
  885. * eeh_configure_bridge - Configure PCI bridges for the indicated PE
  886. * @pdn: PCI device node
  887. *
  888. * PCI bridges might be included in PE. In order to make the PE work
  889. * again. The included PCI bridges should be recovered after the PE
  890. * encounters frozen state.
  891. */
  892. void eeh_configure_bridge(struct pci_dn *pdn)
  893. {
  894. int config_addr;
  895. int rc;
  896. int token;
  897. /* Use PE configuration address, if present */
  898. config_addr = pdn->eeh_config_addr;
  899. if (pdn->eeh_pe_config_addr)
  900. config_addr = pdn->eeh_pe_config_addr;
  901. /* Use new configure-pe function, if supported */
  902. if (ibm_configure_pe != RTAS_UNKNOWN_SERVICE)
  903. token = ibm_configure_pe;
  904. else
  905. token = ibm_configure_bridge;
  906. rc = rtas_call(token, 3, 1, NULL,
  907. config_addr,
  908. BUID_HI(pdn->phb->buid),
  909. BUID_LO(pdn->phb->buid));
  910. if (rc) {
  911. printk(KERN_WARNING "EEH: Unable to configure device bridge (%d) for %s\n",
  912. rc, pdn->node->full_name);
  913. }
  914. }
  915. #define EEH_ENABLE 1
  916. struct eeh_early_enable_info {
  917. unsigned int buid_hi;
  918. unsigned int buid_lo;
  919. };
  920. /**
  921. * eeh_get_pe_addr - Retrieve PE address with given BDF address
  922. * @config_addr: BDF address
  923. * @info: BUID of the associated PHB
  924. *
  925. * There're 2 kinds of addresses existing in EEH core components:
  926. * BDF address and PE address. Besides, there has dedicated platform
  927. * dependent function call to retrieve the PE address according to
  928. * the given BDF address. Further more, we prefer PE address on BDF
  929. * address in EEH core components.
  930. */
  931. static int eeh_get_pe_addr(int config_addr,
  932. struct eeh_early_enable_info *info)
  933. {
  934. unsigned int rets[3];
  935. int ret;
  936. /* Use latest config-addr token on power6 */
  937. if (ibm_get_config_addr_info2 != RTAS_UNKNOWN_SERVICE) {
  938. /* Make sure we have a PE in hand */
  939. ret = rtas_call(ibm_get_config_addr_info2, 4, 2, rets,
  940. config_addr, info->buid_hi, info->buid_lo, 1);
  941. if (ret || (rets[0]==0))
  942. return 0;
  943. ret = rtas_call(ibm_get_config_addr_info2, 4, 2, rets,
  944. config_addr, info->buid_hi, info->buid_lo, 0);
  945. if (ret)
  946. return 0;
  947. return rets[0];
  948. }
  949. /* Use older config-addr token on power5 */
  950. if (ibm_get_config_addr_info != RTAS_UNKNOWN_SERVICE) {
  951. ret = rtas_call(ibm_get_config_addr_info, 4, 2, rets,
  952. config_addr, info->buid_hi, info->buid_lo, 0);
  953. if (ret)
  954. return 0;
  955. return rets[0];
  956. }
  957. return 0;
  958. }
  959. /**
  960. * eeh_early_enable - Early enable EEH on the indicated device
  961. * @dn: device node
  962. * @data: BUID
  963. *
  964. * Enable EEH functionality on the specified PCI device. The function
  965. * is expected to be called before real PCI probing is done. However,
  966. * the PHBs have been initialized at this point.
  967. */
  968. static void *eeh_early_enable(struct device_node *dn, void *data)
  969. {
  970. unsigned int rets[3];
  971. struct eeh_early_enable_info *info = data;
  972. int ret;
  973. const u32 *class_code = of_get_property(dn, "class-code", NULL);
  974. const u32 *vendor_id = of_get_property(dn, "vendor-id", NULL);
  975. const u32 *device_id = of_get_property(dn, "device-id", NULL);
  976. const u32 *regs;
  977. int enable;
  978. struct pci_dn *pdn = PCI_DN(dn);
  979. pdn->class_code = 0;
  980. pdn->eeh_mode = 0;
  981. pdn->eeh_check_count = 0;
  982. pdn->eeh_freeze_count = 0;
  983. pdn->eeh_false_positives = 0;
  984. if (!of_device_is_available(dn))
  985. return NULL;
  986. /* Ignore bad nodes. */
  987. if (!class_code || !vendor_id || !device_id)
  988. return NULL;
  989. /* There is nothing to check on PCI to ISA bridges */
  990. if (dn->type && !strcmp(dn->type, "isa")) {
  991. pdn->eeh_mode |= EEH_MODE_NOCHECK;
  992. return NULL;
  993. }
  994. pdn->class_code = *class_code;
  995. /* Ok... see if this device supports EEH. Some do, some don't,
  996. * and the only way to find out is to check each and every one.
  997. */
  998. regs = of_get_property(dn, "reg", NULL);
  999. if (regs) {
  1000. /* First register entry is addr (00BBSS00) */
  1001. /* Try to enable eeh */
  1002. ret = rtas_call(ibm_set_eeh_option, 4, 1, NULL,
  1003. regs[0], info->buid_hi, info->buid_lo,
  1004. EEH_ENABLE);
  1005. enable = 0;
  1006. if (ret == 0) {
  1007. pdn->eeh_config_addr = regs[0];
  1008. /* If the newer, better, ibm,get-config-addr-info is supported,
  1009. * then use that instead.
  1010. */
  1011. pdn->eeh_pe_config_addr = eeh_get_pe_addr(pdn->eeh_config_addr, info);
  1012. /* Some older systems (Power4) allow the
  1013. * ibm,set-eeh-option call to succeed even on nodes
  1014. * where EEH is not supported. Verify support
  1015. * explicitly.
  1016. */
  1017. ret = eeh_read_slot_reset_state(pdn, rets);
  1018. if ((ret == 0) && (rets[1] == 1))
  1019. enable = 1;
  1020. }
  1021. if (enable) {
  1022. eeh_subsystem_enabled = 1;
  1023. pdn->eeh_mode |= EEH_MODE_SUPPORTED;
  1024. pr_debug("EEH: %s: eeh enabled, config=%x pe_config=%x\n",
  1025. dn->full_name, pdn->eeh_config_addr,
  1026. pdn->eeh_pe_config_addr);
  1027. } else {
  1028. /* This device doesn't support EEH, but it may have an
  1029. * EEH parent, in which case we mark it as supported.
  1030. */
  1031. if (dn->parent && PCI_DN(dn->parent)
  1032. && (PCI_DN(dn->parent)->eeh_mode & EEH_MODE_SUPPORTED)) {
  1033. /* Parent supports EEH. */
  1034. pdn->eeh_mode |= EEH_MODE_SUPPORTED;
  1035. pdn->eeh_config_addr = PCI_DN(dn->parent)->eeh_config_addr;
  1036. return NULL;
  1037. }
  1038. }
  1039. } else {
  1040. printk(KERN_WARNING "EEH: %s: unable to get reg property.\n",
  1041. dn->full_name);
  1042. }
  1043. eeh_save_bars(pdn);
  1044. return NULL;
  1045. }
  1046. /**
  1047. * eeh_ops_register - Register platform dependent EEH operations
  1048. * @ops: platform dependent EEH operations
  1049. *
  1050. * Register the platform dependent EEH operation callback
  1051. * functions. The platform should call this function before
  1052. * any other EEH operations.
  1053. */
  1054. int __init eeh_ops_register(struct eeh_ops *ops)
  1055. {
  1056. if (!ops->name) {
  1057. pr_warning("%s: Invalid EEH ops name for %p\n",
  1058. __func__, ops);
  1059. return -EINVAL;
  1060. }
  1061. if (eeh_ops && eeh_ops != ops) {
  1062. pr_warning("%s: EEH ops of platform %s already existing (%s)\n",
  1063. __func__, eeh_ops->name, ops->name);
  1064. return -EEXIST;
  1065. }
  1066. eeh_ops = ops;
  1067. return 0;
  1068. }
  1069. /**
  1070. * eeh_ops_unregister - Unreigster platform dependent EEH operations
  1071. * @name: name of EEH platform operations
  1072. *
  1073. * Unregister the platform dependent EEH operation callback
  1074. * functions.
  1075. */
  1076. int __exit eeh_ops_unregister(const char *name)
  1077. {
  1078. if (!name || !strlen(name)) {
  1079. pr_warning("%s: Invalid EEH ops name\n",
  1080. __func__);
  1081. return -EINVAL;
  1082. }
  1083. if (eeh_ops && !strcmp(eeh_ops->name, name)) {
  1084. eeh_ops = NULL;
  1085. return 0;
  1086. }
  1087. return -EEXIST;
  1088. }
  1089. /**
  1090. * eeh_init - EEH initialization
  1091. *
  1092. * Initialize EEH by trying to enable it for all of the adapters in the system.
  1093. * As a side effect we can determine here if eeh is supported at all.
  1094. * Note that we leave EEH on so failed config cycles won't cause a machine
  1095. * check. If a user turns off EEH for a particular adapter they are really
  1096. * telling Linux to ignore errors. Some hardware (e.g. POWER5) won't
  1097. * grant access to a slot if EEH isn't enabled, and so we always enable
  1098. * EEH for all slots/all devices.
  1099. *
  1100. * The eeh-force-off option disables EEH checking globally, for all slots.
  1101. * Even if force-off is set, the EEH hardware is still enabled, so that
  1102. * newer systems can boot.
  1103. */
  1104. void __init eeh_init(void)
  1105. {
  1106. struct device_node *phb, *np;
  1107. struct eeh_early_enable_info info;
  1108. raw_spin_lock_init(&confirm_error_lock);
  1109. spin_lock_init(&slot_errbuf_lock);
  1110. np = of_find_node_by_path("/rtas");
  1111. if (np == NULL)
  1112. return;
  1113. ibm_set_eeh_option = rtas_token("ibm,set-eeh-option");
  1114. ibm_set_slot_reset = rtas_token("ibm,set-slot-reset");
  1115. ibm_read_slot_reset_state2 = rtas_token("ibm,read-slot-reset-state2");
  1116. ibm_read_slot_reset_state = rtas_token("ibm,read-slot-reset-state");
  1117. ibm_slot_error_detail = rtas_token("ibm,slot-error-detail");
  1118. ibm_get_config_addr_info = rtas_token("ibm,get-config-addr-info");
  1119. ibm_get_config_addr_info2 = rtas_token("ibm,get-config-addr-info2");
  1120. ibm_configure_bridge = rtas_token("ibm,configure-bridge");
  1121. ibm_configure_pe = rtas_token("ibm,configure-pe");
  1122. if (ibm_set_eeh_option == RTAS_UNKNOWN_SERVICE)
  1123. return;
  1124. eeh_error_buf_size = rtas_token("rtas-error-log-max");
  1125. if (eeh_error_buf_size == RTAS_UNKNOWN_SERVICE) {
  1126. eeh_error_buf_size = 1024;
  1127. }
  1128. if (eeh_error_buf_size > RTAS_ERROR_LOG_MAX) {
  1129. printk(KERN_WARNING "EEH: rtas-error-log-max is bigger than allocated "
  1130. "buffer ! (%d vs %d)", eeh_error_buf_size, RTAS_ERROR_LOG_MAX);
  1131. eeh_error_buf_size = RTAS_ERROR_LOG_MAX;
  1132. }
  1133. /* Enable EEH for all adapters. Note that eeh requires buid's */
  1134. for (phb = of_find_node_by_name(NULL, "pci"); phb;
  1135. phb = of_find_node_by_name(phb, "pci")) {
  1136. unsigned long buid;
  1137. buid = get_phb_buid(phb);
  1138. if (buid == 0 || PCI_DN(phb) == NULL)
  1139. continue;
  1140. info.buid_lo = BUID_LO(buid);
  1141. info.buid_hi = BUID_HI(buid);
  1142. traverse_pci_devices(phb, eeh_early_enable, &info);
  1143. }
  1144. if (eeh_subsystem_enabled)
  1145. printk(KERN_INFO "EEH: PCI Enhanced I/O Error Handling Enabled\n");
  1146. else
  1147. printk(KERN_WARNING "EEH: No capable adapters found\n");
  1148. }
  1149. /**
  1150. * eeh_add_device_early - Enable EEH for the indicated device_node
  1151. * @dn: device node for which to set up EEH
  1152. *
  1153. * This routine must be used to perform EEH initialization for PCI
  1154. * devices that were added after system boot (e.g. hotplug, dlpar).
  1155. * This routine must be called before any i/o is performed to the
  1156. * adapter (inluding any config-space i/o).
  1157. * Whether this actually enables EEH or not for this device depends
  1158. * on the CEC architecture, type of the device, on earlier boot
  1159. * command-line arguments & etc.
  1160. */
  1161. static void eeh_add_device_early(struct device_node *dn)
  1162. {
  1163. struct pci_controller *phb;
  1164. struct eeh_early_enable_info info;
  1165. if (!dn || !PCI_DN(dn))
  1166. return;
  1167. phb = PCI_DN(dn)->phb;
  1168. /* USB Bus children of PCI devices will not have BUID's */
  1169. if (NULL == phb || 0 == phb->buid)
  1170. return;
  1171. info.buid_hi = BUID_HI(phb->buid);
  1172. info.buid_lo = BUID_LO(phb->buid);
  1173. eeh_early_enable(dn, &info);
  1174. }
  1175. /**
  1176. * eeh_add_device_tree_early - Enable EEH for the indicated device
  1177. * @dn: device node
  1178. *
  1179. * This routine must be used to perform EEH initialization for the
  1180. * indicated PCI device that was added after system boot (e.g.
  1181. * hotplug, dlpar).
  1182. */
  1183. void eeh_add_device_tree_early(struct device_node *dn)
  1184. {
  1185. struct device_node *sib;
  1186. for_each_child_of_node(dn, sib)
  1187. eeh_add_device_tree_early(sib);
  1188. eeh_add_device_early(dn);
  1189. }
  1190. EXPORT_SYMBOL_GPL(eeh_add_device_tree_early);
  1191. /**
  1192. * eeh_add_device_late - Perform EEH initialization for the indicated pci device
  1193. * @dev: pci device for which to set up EEH
  1194. *
  1195. * This routine must be used to complete EEH initialization for PCI
  1196. * devices that were added after system boot (e.g. hotplug, dlpar).
  1197. */
  1198. static void eeh_add_device_late(struct pci_dev *dev)
  1199. {
  1200. struct device_node *dn;
  1201. struct pci_dn *pdn;
  1202. if (!dev || !eeh_subsystem_enabled)
  1203. return;
  1204. pr_debug("EEH: Adding device %s\n", pci_name(dev));
  1205. dn = pci_device_to_OF_node(dev);
  1206. pdn = PCI_DN(dn);
  1207. if (pdn->pcidev == dev) {
  1208. pr_debug("EEH: Already referenced !\n");
  1209. return;
  1210. }
  1211. WARN_ON(pdn->pcidev);
  1212. pci_dev_get(dev);
  1213. pdn->pcidev = dev;
  1214. pci_addr_cache_insert_device(dev);
  1215. eeh_sysfs_add_device(dev);
  1216. }
  1217. /**
  1218. * eeh_add_device_tree_late - Perform EEH initialization for the indicated PCI bus
  1219. * @bus: PCI bus
  1220. *
  1221. * This routine must be used to perform EEH initialization for PCI
  1222. * devices which are attached to the indicated PCI bus. The PCI bus
  1223. * is added after system boot through hotplug or dlpar.
  1224. */
  1225. void eeh_add_device_tree_late(struct pci_bus *bus)
  1226. {
  1227. struct pci_dev *dev;
  1228. list_for_each_entry(dev, &bus->devices, bus_list) {
  1229. eeh_add_device_late(dev);
  1230. if (dev->hdr_type == PCI_HEADER_TYPE_BRIDGE) {
  1231. struct pci_bus *subbus = dev->subordinate;
  1232. if (subbus)
  1233. eeh_add_device_tree_late(subbus);
  1234. }
  1235. }
  1236. }
  1237. EXPORT_SYMBOL_GPL(eeh_add_device_tree_late);
  1238. /**
  1239. * eeh_remove_device - Undo EEH setup for the indicated pci device
  1240. * @dev: pci device to be removed
  1241. *
  1242. * This routine should be called when a device is removed from
  1243. * a running system (e.g. by hotplug or dlpar). It unregisters
  1244. * the PCI device from the EEH subsystem. I/O errors affecting
  1245. * this device will no longer be detected after this call; thus,
  1246. * i/o errors affecting this slot may leave this device unusable.
  1247. */
  1248. static void eeh_remove_device(struct pci_dev *dev)
  1249. {
  1250. struct device_node *dn;
  1251. if (!dev || !eeh_subsystem_enabled)
  1252. return;
  1253. /* Unregister the device with the EEH/PCI address search system */
  1254. pr_debug("EEH: Removing device %s\n", pci_name(dev));
  1255. dn = pci_device_to_OF_node(dev);
  1256. if (PCI_DN(dn)->pcidev == NULL) {
  1257. pr_debug("EEH: Not referenced !\n");
  1258. return;
  1259. }
  1260. PCI_DN(dn)->pcidev = NULL;
  1261. pci_dev_put(dev);
  1262. pci_addr_cache_remove_device(dev);
  1263. eeh_sysfs_remove_device(dev);
  1264. }
  1265. /**
  1266. * eeh_remove_bus_device - Undo EEH setup for the indicated PCI device
  1267. * @dev: PCI device
  1268. *
  1269. * This routine must be called when a device is removed from the
  1270. * running system through hotplug or dlpar. The corresponding
  1271. * PCI address cache will be removed.
  1272. */
  1273. void eeh_remove_bus_device(struct pci_dev *dev)
  1274. {
  1275. struct pci_bus *bus = dev->subordinate;
  1276. struct pci_dev *child, *tmp;
  1277. eeh_remove_device(dev);
  1278. if (bus && dev->hdr_type == PCI_HEADER_TYPE_BRIDGE) {
  1279. list_for_each_entry_safe(child, tmp, &bus->devices, bus_list)
  1280. eeh_remove_bus_device(child);
  1281. }
  1282. }
  1283. EXPORT_SYMBOL_GPL(eeh_remove_bus_device);
  1284. static int proc_eeh_show(struct seq_file *m, void *v)
  1285. {
  1286. if (0 == eeh_subsystem_enabled) {
  1287. seq_printf(m, "EEH Subsystem is globally disabled\n");
  1288. seq_printf(m, "eeh_total_mmio_ffs=%ld\n", total_mmio_ffs);
  1289. } else {
  1290. seq_printf(m, "EEH Subsystem is enabled\n");
  1291. seq_printf(m,
  1292. "no device=%ld\n"
  1293. "no device node=%ld\n"
  1294. "no config address=%ld\n"
  1295. "check not wanted=%ld\n"
  1296. "eeh_total_mmio_ffs=%ld\n"
  1297. "eeh_false_positives=%ld\n"
  1298. "eeh_slot_resets=%ld\n",
  1299. no_device, no_dn, no_cfg_addr,
  1300. ignored_check, total_mmio_ffs,
  1301. false_positives,
  1302. slot_resets);
  1303. }
  1304. return 0;
  1305. }
  1306. static int proc_eeh_open(struct inode *inode, struct file *file)
  1307. {
  1308. return single_open(file, proc_eeh_show, NULL);
  1309. }
  1310. static const struct file_operations proc_eeh_operations = {
  1311. .open = proc_eeh_open,
  1312. .read = seq_read,
  1313. .llseek = seq_lseek,
  1314. .release = single_release,
  1315. };
  1316. static int __init eeh_init_proc(void)
  1317. {
  1318. if (machine_is(pseries))
  1319. proc_create("powerpc/eeh", 0, NULL, &proc_eeh_operations);
  1320. return 0;
  1321. }
  1322. __initcall(eeh_init_proc);