eeh.c 43 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525
  1. /*
  2. * Copyright IBM Corporation 2001, 2005, 2006
  3. * Copyright Dave Engebretsen & Todd Inglett 2001
  4. * Copyright Linas Vepstas 2005, 2006
  5. * Copyright 2001-2012 IBM Corporation.
  6. *
  7. * This program is free software; you can redistribute it and/or modify
  8. * it under the terms of the GNU General Public License as published by
  9. * the Free Software Foundation; either version 2 of the License, or
  10. * (at your option) any later version.
  11. *
  12. * This program is distributed in the hope that it will be useful,
  13. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  14. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  15. * GNU General Public License for more details.
  16. *
  17. * You should have received a copy of the GNU General Public License
  18. * along with this program; if not, write to the Free Software
  19. * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
  20. *
  21. * Please address comments and feedback to Linas Vepstas <linas@austin.ibm.com>
  22. */
  23. #include <linux/delay.h>
  24. #include <linux/sched.h>
  25. #include <linux/init.h>
  26. #include <linux/list.h>
  27. #include <linux/pci.h>
  28. #include <linux/proc_fs.h>
  29. #include <linux/rbtree.h>
  30. #include <linux/seq_file.h>
  31. #include <linux/spinlock.h>
  32. #include <linux/export.h>
  33. #include <linux/of.h>
  34. #include <linux/atomic.h>
  35. #include <asm/eeh.h>
  36. #include <asm/eeh_event.h>
  37. #include <asm/io.h>
  38. #include <asm/machdep.h>
  39. #include <asm/ppc-pci.h>
  40. #include <asm/rtas.h>
  41. /** Overview:
  42. * EEH, or "Extended Error Handling" is a PCI bridge technology for
  43. * dealing with PCI bus errors that can't be dealt with within the
  44. * usual PCI framework, except by check-stopping the CPU. Systems
  45. * that are designed for high-availability/reliability cannot afford
  46. * to crash due to a "mere" PCI error, thus the need for EEH.
  47. * An EEH-capable bridge operates by converting a detected error
  48. * into a "slot freeze", taking the PCI adapter off-line, making
  49. * the slot behave, from the OS'es point of view, as if the slot
  50. * were "empty": all reads return 0xff's and all writes are silently
  51. * ignored. EEH slot isolation events can be triggered by parity
  52. * errors on the address or data busses (e.g. during posted writes),
  53. * which in turn might be caused by low voltage on the bus, dust,
  54. * vibration, humidity, radioactivity or plain-old failed hardware.
  55. *
  56. * Note, however, that one of the leading causes of EEH slot
  57. * freeze events are buggy device drivers, buggy device microcode,
  58. * or buggy device hardware. This is because any attempt by the
  59. * device to bus-master data to a memory address that is not
  60. * assigned to the device will trigger a slot freeze. (The idea
  61. * is to prevent devices-gone-wild from corrupting system memory).
  62. * Buggy hardware/drivers will have a miserable time co-existing
  63. * with EEH.
  64. *
  65. * Ideally, a PCI device driver, when suspecting that an isolation
  66. * event has occurred (e.g. by reading 0xff's), will then ask EEH
  67. * whether this is the case, and then take appropriate steps to
  68. * reset the PCI slot, the PCI device, and then resume operations.
  69. * However, until that day, the checking is done here, with the
  70. * eeh_check_failure() routine embedded in the MMIO macros. If
  71. * the slot is found to be isolated, an "EEH Event" is synthesized
  72. * and sent out for processing.
  73. */
  74. /* If a device driver keeps reading an MMIO register in an interrupt
  75. * handler after a slot isolation event, it might be broken.
  76. * This sets the threshold for how many read attempts we allow
  77. * before printing an error message.
  78. */
  79. #define EEH_MAX_FAILS 2100000
  80. /* Time to wait for a PCI slot to report status, in milliseconds */
  81. #define PCI_BUS_RESET_WAIT_MSEC (60*1000)
  82. /* RTAS tokens */
  83. static int ibm_set_slot_reset;
  84. static int ibm_read_slot_reset_state;
  85. static int ibm_read_slot_reset_state2;
  86. static int ibm_slot_error_detail;
  87. static int ibm_get_config_addr_info;
  88. static int ibm_get_config_addr_info2;
  89. static int ibm_configure_bridge;
  90. static int ibm_configure_pe;
  91. /* Platform dependent EEH operations */
  92. struct eeh_ops *eeh_ops = NULL;
  93. int eeh_subsystem_enabled;
  94. EXPORT_SYMBOL(eeh_subsystem_enabled);
  95. /* Lock to avoid races due to multiple reports of an error */
  96. static DEFINE_RAW_SPINLOCK(confirm_error_lock);
  97. /* Buffer for reporting slot-error-detail rtas calls. Its here
  98. * in BSS, and not dynamically alloced, so that it ends up in
  99. * RMO where RTAS can access it.
  100. */
  101. static unsigned char slot_errbuf[RTAS_ERROR_LOG_MAX];
  102. static DEFINE_SPINLOCK(slot_errbuf_lock);
  103. static int eeh_error_buf_size;
  104. /* Buffer for reporting pci register dumps. Its here in BSS, and
  105. * not dynamically alloced, so that it ends up in RMO where RTAS
  106. * can access it.
  107. */
  108. #define EEH_PCI_REGS_LOG_LEN 4096
  109. static unsigned char pci_regs_buf[EEH_PCI_REGS_LOG_LEN];
  110. /* System monitoring statistics */
  111. static unsigned long no_device;
  112. static unsigned long no_dn;
  113. static unsigned long no_cfg_addr;
  114. static unsigned long ignored_check;
  115. static unsigned long total_mmio_ffs;
  116. static unsigned long false_positives;
  117. static unsigned long slot_resets;
  118. #define IS_BRIDGE(class_code) (((class_code)<<16) == PCI_BASE_CLASS_BRIDGE)
  119. /**
  120. * eeh_rtas_slot_error_detail - Retrieve error log through RTAS call
  121. * @pdn: device node
  122. * @severity: temporary or permanent error log
  123. * @driver_log: driver log to be combined with the retrieved error log
  124. * @loglen: length of driver log
  125. *
  126. * This routine should be called to retrieve error log through the dedicated
  127. * RTAS call.
  128. */
  129. static void eeh_rtas_slot_error_detail(struct pci_dn *pdn, int severity,
  130. char *driver_log, size_t loglen)
  131. {
  132. int config_addr;
  133. unsigned long flags;
  134. int rc;
  135. /* Log the error with the rtas logger */
  136. spin_lock_irqsave(&slot_errbuf_lock, flags);
  137. memset(slot_errbuf, 0, eeh_error_buf_size);
  138. /* Use PE configuration address, if present */
  139. config_addr = pdn->eeh_config_addr;
  140. if (pdn->eeh_pe_config_addr)
  141. config_addr = pdn->eeh_pe_config_addr;
  142. rc = rtas_call(ibm_slot_error_detail,
  143. 8, 1, NULL, config_addr,
  144. BUID_HI(pdn->phb->buid),
  145. BUID_LO(pdn->phb->buid),
  146. virt_to_phys(driver_log), loglen,
  147. virt_to_phys(slot_errbuf),
  148. eeh_error_buf_size,
  149. severity);
  150. if (rc == 0)
  151. log_error(slot_errbuf, ERR_TYPE_RTAS_LOG, 0);
  152. spin_unlock_irqrestore(&slot_errbuf_lock, flags);
  153. }
  154. /**
  155. * eeh_gather_pci_data - Copy assorted PCI config space registers to buff
  156. * @pdn: device to report data for
  157. * @buf: point to buffer in which to log
  158. * @len: amount of room in buffer
  159. *
  160. * This routine captures assorted PCI configuration space data,
  161. * and puts them into a buffer for RTAS error logging.
  162. */
  163. static size_t eeh_gather_pci_data(struct pci_dn *pdn, char * buf, size_t len)
  164. {
  165. struct pci_dev *dev = pdn->pcidev;
  166. u32 cfg;
  167. int cap, i;
  168. int n = 0;
  169. n += scnprintf(buf+n, len-n, "%s\n", pdn->node->full_name);
  170. printk(KERN_WARNING "EEH: of node=%s\n", pdn->node->full_name);
  171. rtas_read_config(pdn, PCI_VENDOR_ID, 4, &cfg);
  172. n += scnprintf(buf+n, len-n, "dev/vend:%08x\n", cfg);
  173. printk(KERN_WARNING "EEH: PCI device/vendor: %08x\n", cfg);
  174. rtas_read_config(pdn, PCI_COMMAND, 4, &cfg);
  175. n += scnprintf(buf+n, len-n, "cmd/stat:%x\n", cfg);
  176. printk(KERN_WARNING "EEH: PCI cmd/status register: %08x\n", cfg);
  177. if (!dev) {
  178. printk(KERN_WARNING "EEH: no PCI device for this of node\n");
  179. return n;
  180. }
  181. /* Gather bridge-specific registers */
  182. if (dev->class >> 16 == PCI_BASE_CLASS_BRIDGE) {
  183. rtas_read_config(pdn, PCI_SEC_STATUS, 2, &cfg);
  184. n += scnprintf(buf+n, len-n, "sec stat:%x\n", cfg);
  185. printk(KERN_WARNING "EEH: Bridge secondary status: %04x\n", cfg);
  186. rtas_read_config(pdn, PCI_BRIDGE_CONTROL, 2, &cfg);
  187. n += scnprintf(buf+n, len-n, "brdg ctl:%x\n", cfg);
  188. printk(KERN_WARNING "EEH: Bridge control: %04x\n", cfg);
  189. }
  190. /* Dump out the PCI-X command and status regs */
  191. cap = pci_find_capability(dev, PCI_CAP_ID_PCIX);
  192. if (cap) {
  193. rtas_read_config(pdn, cap, 4, &cfg);
  194. n += scnprintf(buf+n, len-n, "pcix-cmd:%x\n", cfg);
  195. printk(KERN_WARNING "EEH: PCI-X cmd: %08x\n", cfg);
  196. rtas_read_config(pdn, cap+4, 4, &cfg);
  197. n += scnprintf(buf+n, len-n, "pcix-stat:%x\n", cfg);
  198. printk(KERN_WARNING "EEH: PCI-X status: %08x\n", cfg);
  199. }
  200. /* If PCI-E capable, dump PCI-E cap 10, and the AER */
  201. cap = pci_find_capability(dev, PCI_CAP_ID_EXP);
  202. if (cap) {
  203. n += scnprintf(buf+n, len-n, "pci-e cap10:\n");
  204. printk(KERN_WARNING
  205. "EEH: PCI-E capabilities and status follow:\n");
  206. for (i=0; i<=8; i++) {
  207. rtas_read_config(pdn, cap+4*i, 4, &cfg);
  208. n += scnprintf(buf+n, len-n, "%02x:%x\n", 4*i, cfg);
  209. printk(KERN_WARNING "EEH: PCI-E %02x: %08x\n", i, cfg);
  210. }
  211. cap = pci_find_ext_capability(dev, PCI_EXT_CAP_ID_ERR);
  212. if (cap) {
  213. n += scnprintf(buf+n, len-n, "pci-e AER:\n");
  214. printk(KERN_WARNING
  215. "EEH: PCI-E AER capability register set follows:\n");
  216. for (i=0; i<14; i++) {
  217. rtas_read_config(pdn, cap+4*i, 4, &cfg);
  218. n += scnprintf(buf+n, len-n, "%02x:%x\n", 4*i, cfg);
  219. printk(KERN_WARNING "EEH: PCI-E AER %02x: %08x\n", i, cfg);
  220. }
  221. }
  222. }
  223. /* Gather status on devices under the bridge */
  224. if (dev->class >> 16 == PCI_BASE_CLASS_BRIDGE) {
  225. struct device_node *dn;
  226. for_each_child_of_node(pdn->node, dn) {
  227. pdn = PCI_DN(dn);
  228. if (pdn)
  229. n += eeh_gather_pci_data(pdn, buf+n, len-n);
  230. }
  231. }
  232. return n;
  233. }
  234. /**
  235. * eeh_slot_error_detail - Generate combined log including driver log and error log
  236. * @pdn: device node
  237. * @severity: temporary or permanent error log
  238. *
  239. * This routine should be called to generate the combined log, which
  240. * is comprised of driver log and error log. The driver log is figured
  241. * out from the config space of the corresponding PCI device, while
  242. * the error log is fetched through platform dependent function call.
  243. */
  244. void eeh_slot_error_detail(struct pci_dn *pdn, int severity)
  245. {
  246. size_t loglen = 0;
  247. pci_regs_buf[0] = 0;
  248. eeh_pci_enable(pdn, EEH_OPT_THAW_MMIO);
  249. eeh_configure_bridge(pdn);
  250. eeh_restore_bars(pdn);
  251. loglen = eeh_gather_pci_data(pdn, pci_regs_buf, EEH_PCI_REGS_LOG_LEN);
  252. eeh_rtas_slot_error_detail(pdn, severity, pci_regs_buf, loglen);
  253. }
  254. /**
  255. * eeh_read_slot_reset_state - Read the reset state of a device node's slot
  256. * @dn: device node to read
  257. * @rets: array to return results in
  258. *
  259. * Read the reset state of a device node's slot through platform dependent
  260. * function call.
  261. */
  262. static int eeh_read_slot_reset_state(struct pci_dn *pdn, int rets[])
  263. {
  264. int token, outputs;
  265. int config_addr;
  266. if (ibm_read_slot_reset_state2 != RTAS_UNKNOWN_SERVICE) {
  267. token = ibm_read_slot_reset_state2;
  268. outputs = 4;
  269. } else {
  270. token = ibm_read_slot_reset_state;
  271. rets[2] = 0; /* fake PE Unavailable info */
  272. outputs = 3;
  273. }
  274. /* Use PE configuration address, if present */
  275. config_addr = pdn->eeh_config_addr;
  276. if (pdn->eeh_pe_config_addr)
  277. config_addr = pdn->eeh_pe_config_addr;
  278. return rtas_call(token, 3, outputs, rets, config_addr,
  279. BUID_HI(pdn->phb->buid), BUID_LO(pdn->phb->buid));
  280. }
  281. /**
  282. * eeh_wait_for_slot_status - Returns error status of slot
  283. * @pdn: pci device node
  284. * @max_wait_msecs: maximum number to millisecs to wait
  285. *
  286. * Return negative value if a permanent error, else return
  287. * Partition Endpoint (PE) status value.
  288. *
  289. * If @max_wait_msecs is positive, then this routine will
  290. * sleep until a valid status can be obtained, or until
  291. * the max allowed wait time is exceeded, in which case
  292. * a -2 is returned.
  293. */
  294. int eeh_wait_for_slot_status(struct pci_dn *pdn, int max_wait_msecs)
  295. {
  296. int rc;
  297. int rets[3];
  298. int mwait;
  299. while (1) {
  300. rc = eeh_read_slot_reset_state(pdn, rets);
  301. if (rc) return rc;
  302. if (rets[1] == 0) return -1; /* EEH is not supported */
  303. if (rets[0] != 5) return rets[0]; /* return actual status */
  304. if (rets[2] == 0) return -1; /* permanently unavailable */
  305. if (max_wait_msecs <= 0) break;
  306. mwait = rets[2];
  307. if (mwait <= 0) {
  308. printk(KERN_WARNING "EEH: Firmware returned bad wait value=%d\n",
  309. mwait);
  310. mwait = 1000;
  311. } else if (mwait > 300*1000) {
  312. printk(KERN_WARNING "EEH: Firmware is taking too long, time=%d\n",
  313. mwait);
  314. mwait = 300*1000;
  315. }
  316. max_wait_msecs -= mwait;
  317. msleep(mwait);
  318. }
  319. printk(KERN_WARNING "EEH: Timed out waiting for slot status\n");
  320. return -2;
  321. }
  322. /**
  323. * eeh_token_to_phys - Convert EEH address token to phys address
  324. * @token: I/O token, should be address in the form 0xA....
  325. *
  326. * This routine should be called to convert virtual I/O address
  327. * to physical one.
  328. */
  329. static inline unsigned long eeh_token_to_phys(unsigned long token)
  330. {
  331. pte_t *ptep;
  332. unsigned long pa;
  333. ptep = find_linux_pte(init_mm.pgd, token);
  334. if (!ptep)
  335. return token;
  336. pa = pte_pfn(*ptep) << PAGE_SHIFT;
  337. return pa | (token & (PAGE_SIZE-1));
  338. }
  339. /**
  340. * eeh_find_device_pe - Retrieve the PE for the given device
  341. * @dn: device node
  342. *
  343. * Return the PE under which this device lies
  344. */
  345. struct device_node *eeh_find_device_pe(struct device_node *dn)
  346. {
  347. while ((dn->parent) && PCI_DN(dn->parent) &&
  348. (PCI_DN(dn->parent)->eeh_mode & EEH_MODE_SUPPORTED)) {
  349. dn = dn->parent;
  350. }
  351. return dn;
  352. }
  353. /**
  354. * __eeh_mark_slot - Mark all child devices as failed
  355. * @parent: parent device
  356. * @mode_flag: failure flag
  357. *
  358. * Mark all devices that are children of this device as failed.
  359. * Mark the device driver too, so that it can see the failure
  360. * immediately; this is critical, since some drivers poll
  361. * status registers in interrupts ... If a driver is polling,
  362. * and the slot is frozen, then the driver can deadlock in
  363. * an interrupt context, which is bad.
  364. */
  365. static void __eeh_mark_slot(struct device_node *parent, int mode_flag)
  366. {
  367. struct device_node *dn;
  368. for_each_child_of_node(parent, dn) {
  369. if (PCI_DN(dn)) {
  370. /* Mark the pci device driver too */
  371. struct pci_dev *dev = PCI_DN(dn)->pcidev;
  372. PCI_DN(dn)->eeh_mode |= mode_flag;
  373. if (dev && dev->driver)
  374. dev->error_state = pci_channel_io_frozen;
  375. __eeh_mark_slot(dn, mode_flag);
  376. }
  377. }
  378. }
  379. /**
  380. * eeh_mark_slot - Mark the indicated device and its children as failed
  381. * @dn: parent device
  382. * @mode_flag: failure flag
  383. *
  384. * Mark the indicated device and its child devices as failed.
  385. * The device drivers are marked as failed as well.
  386. */
  387. void eeh_mark_slot(struct device_node *dn, int mode_flag)
  388. {
  389. struct pci_dev *dev;
  390. dn = eeh_find_device_pe(dn);
  391. /* Back up one, since config addrs might be shared */
  392. if (!pcibios_find_pci_bus(dn) && PCI_DN(dn->parent))
  393. dn = dn->parent;
  394. PCI_DN(dn)->eeh_mode |= mode_flag;
  395. /* Mark the pci device too */
  396. dev = PCI_DN(dn)->pcidev;
  397. if (dev)
  398. dev->error_state = pci_channel_io_frozen;
  399. __eeh_mark_slot(dn, mode_flag);
  400. }
  401. /**
  402. * __eeh_clear_slot - Clear failure flag for the child devices
  403. * @parent: parent device
  404. * @mode_flag: flag to be cleared
  405. *
  406. * Clear failure flag for the child devices.
  407. */
  408. static void __eeh_clear_slot(struct device_node *parent, int mode_flag)
  409. {
  410. struct device_node *dn;
  411. for_each_child_of_node(parent, dn) {
  412. if (PCI_DN(dn)) {
  413. PCI_DN(dn)->eeh_mode &= ~mode_flag;
  414. PCI_DN(dn)->eeh_check_count = 0;
  415. __eeh_clear_slot(dn, mode_flag);
  416. }
  417. }
  418. }
  419. /**
  420. * eeh_clear_slot - Clear failure flag for the indicated device and its children
  421. * @dn: parent device
  422. * @mode_flag: flag to be cleared
  423. *
  424. * Clear failure flag for the indicated device and its children.
  425. */
  426. void eeh_clear_slot(struct device_node *dn, int mode_flag)
  427. {
  428. unsigned long flags;
  429. raw_spin_lock_irqsave(&confirm_error_lock, flags);
  430. dn = eeh_find_device_pe(dn);
  431. /* Back up one, since config addrs might be shared */
  432. if (!pcibios_find_pci_bus(dn) && PCI_DN(dn->parent))
  433. dn = dn->parent;
  434. PCI_DN(dn)->eeh_mode &= ~mode_flag;
  435. PCI_DN(dn)->eeh_check_count = 0;
  436. __eeh_clear_slot(dn, mode_flag);
  437. raw_spin_unlock_irqrestore(&confirm_error_lock, flags);
  438. }
  439. /**
  440. * eeh_dn_check_failure - Check if all 1's data is due to EEH slot freeze
  441. * @dn: device node
  442. * @dev: pci device, if known
  443. *
  444. * Check for an EEH failure for the given device node. Call this
  445. * routine if the result of a read was all 0xff's and you want to
  446. * find out if this is due to an EEH slot freeze. This routine
  447. * will query firmware for the EEH status.
  448. *
  449. * Returns 0 if there has not been an EEH error; otherwise returns
  450. * a non-zero value and queues up a slot isolation event notification.
  451. *
  452. * It is safe to call this routine in an interrupt context.
  453. */
  454. int eeh_dn_check_failure(struct device_node *dn, struct pci_dev *dev)
  455. {
  456. int ret;
  457. int rets[3];
  458. unsigned long flags;
  459. struct pci_dn *pdn;
  460. int rc = 0;
  461. const char *location;
  462. total_mmio_ffs++;
  463. if (!eeh_subsystem_enabled)
  464. return 0;
  465. if (!dn) {
  466. no_dn++;
  467. return 0;
  468. }
  469. dn = eeh_find_device_pe(dn);
  470. pdn = PCI_DN(dn);
  471. /* Access to IO BARs might get this far and still not want checking. */
  472. if (!(pdn->eeh_mode & EEH_MODE_SUPPORTED) ||
  473. pdn->eeh_mode & EEH_MODE_NOCHECK) {
  474. ignored_check++;
  475. pr_debug("EEH: Ignored check (%x) for %s %s\n",
  476. pdn->eeh_mode, eeh_pci_name(dev), dn->full_name);
  477. return 0;
  478. }
  479. if (!pdn->eeh_config_addr && !pdn->eeh_pe_config_addr) {
  480. no_cfg_addr++;
  481. return 0;
  482. }
  483. /* If we already have a pending isolation event for this
  484. * slot, we know it's bad already, we don't need to check.
  485. * Do this checking under a lock; as multiple PCI devices
  486. * in one slot might report errors simultaneously, and we
  487. * only want one error recovery routine running.
  488. */
  489. raw_spin_lock_irqsave(&confirm_error_lock, flags);
  490. rc = 1;
  491. if (pdn->eeh_mode & EEH_MODE_ISOLATED) {
  492. pdn->eeh_check_count ++;
  493. if (pdn->eeh_check_count % EEH_MAX_FAILS == 0) {
  494. location = of_get_property(dn, "ibm,loc-code", NULL);
  495. printk(KERN_ERR "EEH: %d reads ignored for recovering device at "
  496. "location=%s driver=%s pci addr=%s\n",
  497. pdn->eeh_check_count, location,
  498. eeh_driver_name(dev), eeh_pci_name(dev));
  499. printk(KERN_ERR "EEH: Might be infinite loop in %s driver\n",
  500. eeh_driver_name(dev));
  501. dump_stack();
  502. }
  503. goto dn_unlock;
  504. }
  505. /*
  506. * Now test for an EEH failure. This is VERY expensive.
  507. * Note that the eeh_config_addr may be a parent device
  508. * in the case of a device behind a bridge, or it may be
  509. * function zero of a multi-function device.
  510. * In any case they must share a common PHB.
  511. */
  512. ret = eeh_read_slot_reset_state(pdn, rets);
  513. /* If the call to firmware failed, punt */
  514. if (ret != 0) {
  515. printk(KERN_WARNING "EEH: eeh_read_slot_reset_state() failed; rc=%d dn=%s\n",
  516. ret, dn->full_name);
  517. false_positives++;
  518. pdn->eeh_false_positives ++;
  519. rc = 0;
  520. goto dn_unlock;
  521. }
  522. /* Note that config-io to empty slots may fail;
  523. * they are empty when they don't have children.
  524. */
  525. if ((rets[0] == 5) && (rets[2] == 0) && (dn->child == NULL)) {
  526. false_positives++;
  527. pdn->eeh_false_positives ++;
  528. rc = 0;
  529. goto dn_unlock;
  530. }
  531. /* If EEH is not supported on this device, punt. */
  532. if (rets[1] != 1) {
  533. printk(KERN_WARNING "EEH: event on unsupported device, rc=%d dn=%s\n",
  534. ret, dn->full_name);
  535. false_positives++;
  536. pdn->eeh_false_positives ++;
  537. rc = 0;
  538. goto dn_unlock;
  539. }
  540. /* If not the kind of error we know about, punt. */
  541. if (rets[0] != 1 && rets[0] != 2 && rets[0] != 4 && rets[0] != 5) {
  542. false_positives++;
  543. pdn->eeh_false_positives ++;
  544. rc = 0;
  545. goto dn_unlock;
  546. }
  547. slot_resets++;
  548. /* Avoid repeated reports of this failure, including problems
  549. * with other functions on this device, and functions under
  550. * bridges.
  551. */
  552. eeh_mark_slot(dn, EEH_MODE_ISOLATED);
  553. raw_spin_unlock_irqrestore(&confirm_error_lock, flags);
  554. eeh_send_failure_event(dn, dev);
  555. /* Most EEH events are due to device driver bugs. Having
  556. * a stack trace will help the device-driver authors figure
  557. * out what happened. So print that out.
  558. */
  559. dump_stack();
  560. return 1;
  561. dn_unlock:
  562. raw_spin_unlock_irqrestore(&confirm_error_lock, flags);
  563. return rc;
  564. }
  565. EXPORT_SYMBOL_GPL(eeh_dn_check_failure);
  566. /**
  567. * eeh_check_failure - Check if all 1's data is due to EEH slot freeze
  568. * @token: I/O token, should be address in the form 0xA....
  569. * @val: value, should be all 1's (XXX why do we need this arg??)
  570. *
  571. * Check for an EEH failure at the given token address. Call this
  572. * routine if the result of a read was all 0xff's and you want to
  573. * find out if this is due to an EEH slot freeze event. This routine
  574. * will query firmware for the EEH status.
  575. *
  576. * Note this routine is safe to call in an interrupt context.
  577. */
  578. unsigned long eeh_check_failure(const volatile void __iomem *token, unsigned long val)
  579. {
  580. unsigned long addr;
  581. struct pci_dev *dev;
  582. struct device_node *dn;
  583. /* Finding the phys addr + pci device; this is pretty quick. */
  584. addr = eeh_token_to_phys((unsigned long __force) token);
  585. dev = pci_get_device_by_addr(addr);
  586. if (!dev) {
  587. no_device++;
  588. return val;
  589. }
  590. dn = pci_device_to_OF_node(dev);
  591. eeh_dn_check_failure(dn, dev);
  592. pci_dev_put(dev);
  593. return val;
  594. }
  595. EXPORT_SYMBOL(eeh_check_failure);
  596. /**
  597. * eeh_pci_enable - Enable MMIO or DMA transfers for this slot
  598. * @pdn pci device node
  599. *
  600. * This routine should be called to reenable frozen MMIO or DMA
  601. * so that it would work correctly again. It's useful while doing
  602. * recovery or log collection on the indicated device.
  603. */
  604. int eeh_pci_enable(struct pci_dn *pdn, int function)
  605. {
  606. int rc;
  607. rc = eeh_ops->set_option(pdn->node, function);
  608. if (rc)
  609. printk(KERN_WARNING "EEH: Unexpected state change %d, err=%d dn=%s\n",
  610. function, rc, pdn->node->full_name);
  611. rc = eeh_wait_for_slot_status(pdn, PCI_BUS_RESET_WAIT_MSEC);
  612. if ((rc == 4) && (function == EEH_OPT_THAW_MMIO))
  613. return 0;
  614. return rc;
  615. }
  616. /**
  617. * eeh_slot_reset - Raises/Lowers the pci #RST line
  618. * @pdn: pci device node
  619. * @state: 1/0 to raise/lower the #RST
  620. *
  621. * Clear the EEH-frozen condition on a slot. This routine
  622. * asserts the PCI #RST line if the 'state' argument is '1',
  623. * and drops the #RST line if 'state is '0'. This routine is
  624. * safe to call in an interrupt context.
  625. */
  626. static void eeh_slot_reset(struct pci_dn *pdn, int state)
  627. {
  628. int config_addr;
  629. int rc;
  630. BUG_ON(pdn==NULL);
  631. if (!pdn->phb) {
  632. printk(KERN_WARNING "EEH: in slot reset, device node %s has no phb\n",
  633. pdn->node->full_name);
  634. return;
  635. }
  636. /* Use PE configuration address, if present */
  637. config_addr = pdn->eeh_config_addr;
  638. if (pdn->eeh_pe_config_addr)
  639. config_addr = pdn->eeh_pe_config_addr;
  640. rc = rtas_call(ibm_set_slot_reset, 4, 1, NULL,
  641. config_addr,
  642. BUID_HI(pdn->phb->buid),
  643. BUID_LO(pdn->phb->buid),
  644. state);
  645. /* Fundamental-reset not supported on this PE, try hot-reset */
  646. if (rc == -8 && state == 3) {
  647. rc = rtas_call(ibm_set_slot_reset, 4, 1, NULL,
  648. config_addr,
  649. BUID_HI(pdn->phb->buid),
  650. BUID_LO(pdn->phb->buid), 1);
  651. if (rc)
  652. printk(KERN_WARNING
  653. "EEH: Unable to reset the failed slot,"
  654. " #RST=%d dn=%s\n",
  655. rc, pdn->node->full_name);
  656. }
  657. }
  658. /**
  659. * pcibios_set_pcie_slot_reset - Set PCI-E reset state
  660. * @dev: pci device struct
  661. * @state: reset state to enter
  662. *
  663. * Return value:
  664. * 0 if success
  665. */
  666. int pcibios_set_pcie_reset_state(struct pci_dev *dev, enum pcie_reset_state state)
  667. {
  668. struct device_node *dn = pci_device_to_OF_node(dev);
  669. struct pci_dn *pdn = PCI_DN(dn);
  670. switch (state) {
  671. case pcie_deassert_reset:
  672. eeh_slot_reset(pdn, 0);
  673. break;
  674. case pcie_hot_reset:
  675. eeh_slot_reset(pdn, 1);
  676. break;
  677. case pcie_warm_reset:
  678. eeh_slot_reset(pdn, 3);
  679. break;
  680. default:
  681. return -EINVAL;
  682. };
  683. return 0;
  684. }
  685. /**
  686. * __eeh_set_pe_freset - Check the required reset for child devices
  687. * @parent: parent device
  688. * @freset: return value
  689. *
  690. * Each device might have its preferred reset type: fundamental or
  691. * hot reset. The routine is used to collect the information from
  692. * the child devices so that they could be reset accordingly.
  693. */
  694. void __eeh_set_pe_freset(struct device_node *parent, unsigned int *freset)
  695. {
  696. struct device_node *dn;
  697. for_each_child_of_node(parent, dn) {
  698. if (PCI_DN(dn)) {
  699. struct pci_dev *dev = PCI_DN(dn)->pcidev;
  700. if (dev && dev->driver)
  701. *freset |= dev->needs_freset;
  702. __eeh_set_pe_freset(dn, freset);
  703. }
  704. }
  705. }
  706. /**
  707. * eeh_set_pe_freset - Check the required reset for the indicated device and its children
  708. * @dn: parent device
  709. * @freset: return value
  710. *
  711. * Each device might have its preferred reset type: fundamental or
  712. * hot reset. The routine is used to collected the information for
  713. * the indicated device and its children so that the bunch of the
  714. * devices could be reset properly.
  715. */
  716. void eeh_set_pe_freset(struct device_node *dn, unsigned int *freset)
  717. {
  718. struct pci_dev *dev;
  719. dn = eeh_find_device_pe(dn);
  720. /* Back up one, since config addrs might be shared */
  721. if (!pcibios_find_pci_bus(dn) && PCI_DN(dn->parent))
  722. dn = dn->parent;
  723. dev = PCI_DN(dn)->pcidev;
  724. if (dev)
  725. *freset |= dev->needs_freset;
  726. __eeh_set_pe_freset(dn, freset);
  727. }
  728. /**
  729. * eeh_reset_pe_once - Assert the pci #RST line for 1/4 second
  730. * @pdn: pci device node to be reset.
  731. *
  732. * Assert the PCI #RST line for 1/4 second.
  733. */
  734. static void eeh_reset_pe_once(struct pci_dn *pdn)
  735. {
  736. unsigned int freset = 0;
  737. /* Determine type of EEH reset required for
  738. * Partitionable Endpoint, a hot-reset (1)
  739. * or a fundamental reset (3).
  740. * A fundamental reset required by any device under
  741. * Partitionable Endpoint trumps hot-reset.
  742. */
  743. eeh_set_pe_freset(pdn->node, &freset);
  744. if (freset)
  745. eeh_slot_reset(pdn, 3);
  746. else
  747. eeh_slot_reset(pdn, 1);
  748. /* The PCI bus requires that the reset be held high for at least
  749. * a 100 milliseconds. We wait a bit longer 'just in case'.
  750. */
  751. #define PCI_BUS_RST_HOLD_TIME_MSEC 250
  752. msleep(PCI_BUS_RST_HOLD_TIME_MSEC);
  753. /* We might get hit with another EEH freeze as soon as the
  754. * pci slot reset line is dropped. Make sure we don't miss
  755. * these, and clear the flag now.
  756. */
  757. eeh_clear_slot(pdn->node, EEH_MODE_ISOLATED);
  758. eeh_slot_reset(pdn, 0);
  759. /* After a PCI slot has been reset, the PCI Express spec requires
  760. * a 1.5 second idle time for the bus to stabilize, before starting
  761. * up traffic.
  762. */
  763. #define PCI_BUS_SETTLE_TIME_MSEC 1800
  764. msleep(PCI_BUS_SETTLE_TIME_MSEC);
  765. }
  766. /**
  767. * eeh_reset_pe - Reset the indicated PE
  768. * @pdn: PCI device node
  769. *
  770. * This routine should be called to reset indicated device, including
  771. * PE. A PE might include multiple PCI devices and sometimes PCI bridges
  772. * might be involved as well.
  773. */
  774. int eeh_reset_pe(struct pci_dn *pdn)
  775. {
  776. int i, rc;
  777. /* Take three shots at resetting the bus */
  778. for (i=0; i<3; i++) {
  779. eeh_reset_pe_once(pdn);
  780. rc = eeh_wait_for_slot_status(pdn, PCI_BUS_RESET_WAIT_MSEC);
  781. if (rc == 0)
  782. return 0;
  783. if (rc < 0) {
  784. printk(KERN_ERR "EEH: unrecoverable slot failure %s\n",
  785. pdn->node->full_name);
  786. return -1;
  787. }
  788. printk(KERN_ERR "EEH: bus reset %d failed on slot %s, rc=%d\n",
  789. i+1, pdn->node->full_name, rc);
  790. }
  791. return -1;
  792. }
  793. /** Save and restore of PCI BARs
  794. *
  795. * Although firmware will set up BARs during boot, it doesn't
  796. * set up device BAR's after a device reset, although it will,
  797. * if requested, set up bridge configuration. Thus, we need to
  798. * configure the PCI devices ourselves.
  799. */
  800. /**
  801. * eeh_restore_one_device_bars - Restore the Base Address Registers for one device
  802. * @pdn: pci device node
  803. *
  804. * Loads the PCI configuration space base address registers,
  805. * the expansion ROM base address, the latency timer, and etc.
  806. * from the saved values in the device node.
  807. */
  808. static inline void eeh_restore_one_device_bars(struct pci_dn *pdn)
  809. {
  810. int i;
  811. u32 cmd;
  812. if (NULL==pdn->phb) return;
  813. for (i=4; i<10; i++) {
  814. rtas_write_config(pdn, i*4, 4, pdn->config_space[i]);
  815. }
  816. /* 12 == Expansion ROM Address */
  817. rtas_write_config(pdn, 12*4, 4, pdn->config_space[12]);
  818. #define BYTE_SWAP(OFF) (8*((OFF)/4)+3-(OFF))
  819. #define SAVED_BYTE(OFF) (((u8 *)(pdn->config_space))[BYTE_SWAP(OFF)])
  820. rtas_write_config(pdn, PCI_CACHE_LINE_SIZE, 1,
  821. SAVED_BYTE(PCI_CACHE_LINE_SIZE));
  822. rtas_write_config(pdn, PCI_LATENCY_TIMER, 1,
  823. SAVED_BYTE(PCI_LATENCY_TIMER));
  824. /* max latency, min grant, interrupt pin and line */
  825. rtas_write_config(pdn, 15*4, 4, pdn->config_space[15]);
  826. /* Restore PERR & SERR bits, some devices require it,
  827. * don't touch the other command bits
  828. */
  829. rtas_read_config(pdn, PCI_COMMAND, 4, &cmd);
  830. if (pdn->config_space[1] & PCI_COMMAND_PARITY)
  831. cmd |= PCI_COMMAND_PARITY;
  832. else
  833. cmd &= ~PCI_COMMAND_PARITY;
  834. if (pdn->config_space[1] & PCI_COMMAND_SERR)
  835. cmd |= PCI_COMMAND_SERR;
  836. else
  837. cmd &= ~PCI_COMMAND_SERR;
  838. rtas_write_config(pdn, PCI_COMMAND, 4, cmd);
  839. }
  840. /**
  841. * eeh_restore_bars - Restore the PCI config space info
  842. * @pdn: PCI device node
  843. *
  844. * This routine performs a recursive walk to the children
  845. * of this device as well.
  846. */
  847. void eeh_restore_bars(struct pci_dn *pdn)
  848. {
  849. struct device_node *dn;
  850. if (!pdn)
  851. return;
  852. if ((pdn->eeh_mode & EEH_MODE_SUPPORTED) && !IS_BRIDGE(pdn->class_code))
  853. eeh_restore_one_device_bars(pdn);
  854. for_each_child_of_node(pdn->node, dn)
  855. eeh_restore_bars(PCI_DN(dn));
  856. }
  857. /**
  858. * eeh_save_bars - Save device bars
  859. * @pdn: PCI device node
  860. *
  861. * Save the values of the device bars. Unlike the restore
  862. * routine, this routine is *not* recursive. This is because
  863. * PCI devices are added individually; but, for the restore,
  864. * an entire slot is reset at a time.
  865. */
  866. static void eeh_save_bars(struct pci_dn *pdn)
  867. {
  868. int i;
  869. if (!pdn )
  870. return;
  871. for (i = 0; i < 16; i++)
  872. rtas_read_config(pdn, i * 4, 4, &pdn->config_space[i]);
  873. }
  874. /**
  875. * eeh_configure_bridge - Configure PCI bridges for the indicated PE
  876. * @pdn: PCI device node
  877. *
  878. * PCI bridges might be included in PE. In order to make the PE work
  879. * again. The included PCI bridges should be recovered after the PE
  880. * encounters frozen state.
  881. */
  882. void eeh_configure_bridge(struct pci_dn *pdn)
  883. {
  884. int config_addr;
  885. int rc;
  886. int token;
  887. /* Use PE configuration address, if present */
  888. config_addr = pdn->eeh_config_addr;
  889. if (pdn->eeh_pe_config_addr)
  890. config_addr = pdn->eeh_pe_config_addr;
  891. /* Use new configure-pe function, if supported */
  892. if (ibm_configure_pe != RTAS_UNKNOWN_SERVICE)
  893. token = ibm_configure_pe;
  894. else
  895. token = ibm_configure_bridge;
  896. rc = rtas_call(token, 3, 1, NULL,
  897. config_addr,
  898. BUID_HI(pdn->phb->buid),
  899. BUID_LO(pdn->phb->buid));
  900. if (rc) {
  901. printk(KERN_WARNING "EEH: Unable to configure device bridge (%d) for %s\n",
  902. rc, pdn->node->full_name);
  903. }
  904. }
  905. #define EEH_ENABLE 1
  906. struct eeh_early_enable_info {
  907. unsigned int buid_hi;
  908. unsigned int buid_lo;
  909. };
  910. /**
  911. * eeh_get_pe_addr - Retrieve PE address with given BDF address
  912. * @config_addr: BDF address
  913. * @info: BUID of the associated PHB
  914. *
  915. * There're 2 kinds of addresses existing in EEH core components:
  916. * BDF address and PE address. Besides, there has dedicated platform
  917. * dependent function call to retrieve the PE address according to
  918. * the given BDF address. Further more, we prefer PE address on BDF
  919. * address in EEH core components.
  920. */
  921. static int eeh_get_pe_addr(int config_addr,
  922. struct eeh_early_enable_info *info)
  923. {
  924. unsigned int rets[3];
  925. int ret;
  926. /* Use latest config-addr token on power6 */
  927. if (ibm_get_config_addr_info2 != RTAS_UNKNOWN_SERVICE) {
  928. /* Make sure we have a PE in hand */
  929. ret = rtas_call(ibm_get_config_addr_info2, 4, 2, rets,
  930. config_addr, info->buid_hi, info->buid_lo, 1);
  931. if (ret || (rets[0]==0))
  932. return 0;
  933. ret = rtas_call(ibm_get_config_addr_info2, 4, 2, rets,
  934. config_addr, info->buid_hi, info->buid_lo, 0);
  935. if (ret)
  936. return 0;
  937. return rets[0];
  938. }
  939. /* Use older config-addr token on power5 */
  940. if (ibm_get_config_addr_info != RTAS_UNKNOWN_SERVICE) {
  941. ret = rtas_call(ibm_get_config_addr_info, 4, 2, rets,
  942. config_addr, info->buid_hi, info->buid_lo, 0);
  943. if (ret)
  944. return 0;
  945. return rets[0];
  946. }
  947. return 0;
  948. }
  949. /**
  950. * eeh_early_enable - Early enable EEH on the indicated device
  951. * @dn: device node
  952. * @data: BUID
  953. *
  954. * Enable EEH functionality on the specified PCI device. The function
  955. * is expected to be called before real PCI probing is done. However,
  956. * the PHBs have been initialized at this point.
  957. */
  958. static void *eeh_early_enable(struct device_node *dn, void *data)
  959. {
  960. unsigned int rets[3];
  961. struct eeh_early_enable_info *info = data;
  962. int ret;
  963. const u32 *class_code = of_get_property(dn, "class-code", NULL);
  964. const u32 *vendor_id = of_get_property(dn, "vendor-id", NULL);
  965. const u32 *device_id = of_get_property(dn, "device-id", NULL);
  966. const u32 *regs;
  967. int enable;
  968. struct pci_dn *pdn = PCI_DN(dn);
  969. pdn->class_code = 0;
  970. pdn->eeh_mode = 0;
  971. pdn->eeh_check_count = 0;
  972. pdn->eeh_freeze_count = 0;
  973. pdn->eeh_false_positives = 0;
  974. if (!of_device_is_available(dn))
  975. return NULL;
  976. /* Ignore bad nodes. */
  977. if (!class_code || !vendor_id || !device_id)
  978. return NULL;
  979. /* There is nothing to check on PCI to ISA bridges */
  980. if (dn->type && !strcmp(dn->type, "isa")) {
  981. pdn->eeh_mode |= EEH_MODE_NOCHECK;
  982. return NULL;
  983. }
  984. pdn->class_code = *class_code;
  985. /* Ok... see if this device supports EEH. Some do, some don't,
  986. * and the only way to find out is to check each and every one.
  987. */
  988. regs = of_get_property(dn, "reg", NULL);
  989. if (regs) {
  990. /* First register entry is addr (00BBSS00) */
  991. /* Try to enable eeh */
  992. ret = eeh_ops->set_option(dn, EEH_OPT_ENABLE);
  993. enable = 0;
  994. if (ret == 0) {
  995. pdn->eeh_config_addr = regs[0];
  996. /* If the newer, better, ibm,get-config-addr-info is supported,
  997. * then use that instead.
  998. */
  999. pdn->eeh_pe_config_addr = eeh_get_pe_addr(pdn->eeh_config_addr, info);
  1000. /* Some older systems (Power4) allow the
  1001. * ibm,set-eeh-option call to succeed even on nodes
  1002. * where EEH is not supported. Verify support
  1003. * explicitly.
  1004. */
  1005. ret = eeh_read_slot_reset_state(pdn, rets);
  1006. if ((ret == 0) && (rets[1] == 1))
  1007. enable = 1;
  1008. }
  1009. if (enable) {
  1010. eeh_subsystem_enabled = 1;
  1011. pdn->eeh_mode |= EEH_MODE_SUPPORTED;
  1012. pr_debug("EEH: %s: eeh enabled, config=%x pe_config=%x\n",
  1013. dn->full_name, pdn->eeh_config_addr,
  1014. pdn->eeh_pe_config_addr);
  1015. } else {
  1016. /* This device doesn't support EEH, but it may have an
  1017. * EEH parent, in which case we mark it as supported.
  1018. */
  1019. if (dn->parent && PCI_DN(dn->parent)
  1020. && (PCI_DN(dn->parent)->eeh_mode & EEH_MODE_SUPPORTED)) {
  1021. /* Parent supports EEH. */
  1022. pdn->eeh_mode |= EEH_MODE_SUPPORTED;
  1023. pdn->eeh_config_addr = PCI_DN(dn->parent)->eeh_config_addr;
  1024. return NULL;
  1025. }
  1026. }
  1027. } else {
  1028. printk(KERN_WARNING "EEH: %s: unable to get reg property.\n",
  1029. dn->full_name);
  1030. }
  1031. eeh_save_bars(pdn);
  1032. return NULL;
  1033. }
  1034. /**
  1035. * eeh_ops_register - Register platform dependent EEH operations
  1036. * @ops: platform dependent EEH operations
  1037. *
  1038. * Register the platform dependent EEH operation callback
  1039. * functions. The platform should call this function before
  1040. * any other EEH operations.
  1041. */
  1042. int __init eeh_ops_register(struct eeh_ops *ops)
  1043. {
  1044. if (!ops->name) {
  1045. pr_warning("%s: Invalid EEH ops name for %p\n",
  1046. __func__, ops);
  1047. return -EINVAL;
  1048. }
  1049. if (eeh_ops && eeh_ops != ops) {
  1050. pr_warning("%s: EEH ops of platform %s already existing (%s)\n",
  1051. __func__, eeh_ops->name, ops->name);
  1052. return -EEXIST;
  1053. }
  1054. eeh_ops = ops;
  1055. return 0;
  1056. }
  1057. /**
  1058. * eeh_ops_unregister - Unreigster platform dependent EEH operations
  1059. * @name: name of EEH platform operations
  1060. *
  1061. * Unregister the platform dependent EEH operation callback
  1062. * functions.
  1063. */
  1064. int __exit eeh_ops_unregister(const char *name)
  1065. {
  1066. if (!name || !strlen(name)) {
  1067. pr_warning("%s: Invalid EEH ops name\n",
  1068. __func__);
  1069. return -EINVAL;
  1070. }
  1071. if (eeh_ops && !strcmp(eeh_ops->name, name)) {
  1072. eeh_ops = NULL;
  1073. return 0;
  1074. }
  1075. return -EEXIST;
  1076. }
  1077. /**
  1078. * eeh_init - EEH initialization
  1079. *
  1080. * Initialize EEH by trying to enable it for all of the adapters in the system.
  1081. * As a side effect we can determine here if eeh is supported at all.
  1082. * Note that we leave EEH on so failed config cycles won't cause a machine
  1083. * check. If a user turns off EEH for a particular adapter they are really
  1084. * telling Linux to ignore errors. Some hardware (e.g. POWER5) won't
  1085. * grant access to a slot if EEH isn't enabled, and so we always enable
  1086. * EEH for all slots/all devices.
  1087. *
  1088. * The eeh-force-off option disables EEH checking globally, for all slots.
  1089. * Even if force-off is set, the EEH hardware is still enabled, so that
  1090. * newer systems can boot.
  1091. */
  1092. void __init eeh_init(void)
  1093. {
  1094. struct device_node *phb, *np;
  1095. struct eeh_early_enable_info info;
  1096. int ret;
  1097. /* call platform initialization function */
  1098. if (!eeh_ops) {
  1099. pr_warning("%s: Platform EEH operation not found\n",
  1100. __func__);
  1101. return;
  1102. } else if ((ret = eeh_ops->init())) {
  1103. pr_warning("%s: Failed to call platform init function (%d)\n",
  1104. __func__, ret);
  1105. return;
  1106. }
  1107. raw_spin_lock_init(&confirm_error_lock);
  1108. spin_lock_init(&slot_errbuf_lock);
  1109. np = of_find_node_by_path("/rtas");
  1110. if (np == NULL)
  1111. return;
  1112. ibm_set_slot_reset = rtas_token("ibm,set-slot-reset");
  1113. ibm_read_slot_reset_state2 = rtas_token("ibm,read-slot-reset-state2");
  1114. ibm_read_slot_reset_state = rtas_token("ibm,read-slot-reset-state");
  1115. ibm_slot_error_detail = rtas_token("ibm,slot-error-detail");
  1116. ibm_get_config_addr_info = rtas_token("ibm,get-config-addr-info");
  1117. ibm_get_config_addr_info2 = rtas_token("ibm,get-config-addr-info2");
  1118. ibm_configure_bridge = rtas_token("ibm,configure-bridge");
  1119. ibm_configure_pe = rtas_token("ibm,configure-pe");
  1120. eeh_error_buf_size = rtas_token("rtas-error-log-max");
  1121. if (eeh_error_buf_size == RTAS_UNKNOWN_SERVICE) {
  1122. eeh_error_buf_size = 1024;
  1123. }
  1124. if (eeh_error_buf_size > RTAS_ERROR_LOG_MAX) {
  1125. printk(KERN_WARNING "EEH: rtas-error-log-max is bigger than allocated "
  1126. "buffer ! (%d vs %d)", eeh_error_buf_size, RTAS_ERROR_LOG_MAX);
  1127. eeh_error_buf_size = RTAS_ERROR_LOG_MAX;
  1128. }
  1129. /* Enable EEH for all adapters. Note that eeh requires buid's */
  1130. for (phb = of_find_node_by_name(NULL, "pci"); phb;
  1131. phb = of_find_node_by_name(phb, "pci")) {
  1132. unsigned long buid;
  1133. buid = get_phb_buid(phb);
  1134. if (buid == 0 || PCI_DN(phb) == NULL)
  1135. continue;
  1136. info.buid_lo = BUID_LO(buid);
  1137. info.buid_hi = BUID_HI(buid);
  1138. traverse_pci_devices(phb, eeh_early_enable, &info);
  1139. }
  1140. if (eeh_subsystem_enabled)
  1141. printk(KERN_INFO "EEH: PCI Enhanced I/O Error Handling Enabled\n");
  1142. else
  1143. printk(KERN_WARNING "EEH: No capable adapters found\n");
  1144. }
  1145. /**
  1146. * eeh_add_device_early - Enable EEH for the indicated device_node
  1147. * @dn: device node for which to set up EEH
  1148. *
  1149. * This routine must be used to perform EEH initialization for PCI
  1150. * devices that were added after system boot (e.g. hotplug, dlpar).
  1151. * This routine must be called before any i/o is performed to the
  1152. * adapter (inluding any config-space i/o).
  1153. * Whether this actually enables EEH or not for this device depends
  1154. * on the CEC architecture, type of the device, on earlier boot
  1155. * command-line arguments & etc.
  1156. */
  1157. static void eeh_add_device_early(struct device_node *dn)
  1158. {
  1159. struct pci_controller *phb;
  1160. struct eeh_early_enable_info info;
  1161. if (!dn || !PCI_DN(dn))
  1162. return;
  1163. phb = PCI_DN(dn)->phb;
  1164. /* USB Bus children of PCI devices will not have BUID's */
  1165. if (NULL == phb || 0 == phb->buid)
  1166. return;
  1167. info.buid_hi = BUID_HI(phb->buid);
  1168. info.buid_lo = BUID_LO(phb->buid);
  1169. eeh_early_enable(dn, &info);
  1170. }
  1171. /**
  1172. * eeh_add_device_tree_early - Enable EEH for the indicated device
  1173. * @dn: device node
  1174. *
  1175. * This routine must be used to perform EEH initialization for the
  1176. * indicated PCI device that was added after system boot (e.g.
  1177. * hotplug, dlpar).
  1178. */
  1179. void eeh_add_device_tree_early(struct device_node *dn)
  1180. {
  1181. struct device_node *sib;
  1182. for_each_child_of_node(dn, sib)
  1183. eeh_add_device_tree_early(sib);
  1184. eeh_add_device_early(dn);
  1185. }
  1186. EXPORT_SYMBOL_GPL(eeh_add_device_tree_early);
  1187. /**
  1188. * eeh_add_device_late - Perform EEH initialization for the indicated pci device
  1189. * @dev: pci device for which to set up EEH
  1190. *
  1191. * This routine must be used to complete EEH initialization for PCI
  1192. * devices that were added after system boot (e.g. hotplug, dlpar).
  1193. */
  1194. static void eeh_add_device_late(struct pci_dev *dev)
  1195. {
  1196. struct device_node *dn;
  1197. struct pci_dn *pdn;
  1198. if (!dev || !eeh_subsystem_enabled)
  1199. return;
  1200. pr_debug("EEH: Adding device %s\n", pci_name(dev));
  1201. dn = pci_device_to_OF_node(dev);
  1202. pdn = PCI_DN(dn);
  1203. if (pdn->pcidev == dev) {
  1204. pr_debug("EEH: Already referenced !\n");
  1205. return;
  1206. }
  1207. WARN_ON(pdn->pcidev);
  1208. pci_dev_get(dev);
  1209. pdn->pcidev = dev;
  1210. pci_addr_cache_insert_device(dev);
  1211. eeh_sysfs_add_device(dev);
  1212. }
  1213. /**
  1214. * eeh_add_device_tree_late - Perform EEH initialization for the indicated PCI bus
  1215. * @bus: PCI bus
  1216. *
  1217. * This routine must be used to perform EEH initialization for PCI
  1218. * devices which are attached to the indicated PCI bus. The PCI bus
  1219. * is added after system boot through hotplug or dlpar.
  1220. */
  1221. void eeh_add_device_tree_late(struct pci_bus *bus)
  1222. {
  1223. struct pci_dev *dev;
  1224. list_for_each_entry(dev, &bus->devices, bus_list) {
  1225. eeh_add_device_late(dev);
  1226. if (dev->hdr_type == PCI_HEADER_TYPE_BRIDGE) {
  1227. struct pci_bus *subbus = dev->subordinate;
  1228. if (subbus)
  1229. eeh_add_device_tree_late(subbus);
  1230. }
  1231. }
  1232. }
  1233. EXPORT_SYMBOL_GPL(eeh_add_device_tree_late);
  1234. /**
  1235. * eeh_remove_device - Undo EEH setup for the indicated pci device
  1236. * @dev: pci device to be removed
  1237. *
  1238. * This routine should be called when a device is removed from
  1239. * a running system (e.g. by hotplug or dlpar). It unregisters
  1240. * the PCI device from the EEH subsystem. I/O errors affecting
  1241. * this device will no longer be detected after this call; thus,
  1242. * i/o errors affecting this slot may leave this device unusable.
  1243. */
  1244. static void eeh_remove_device(struct pci_dev *dev)
  1245. {
  1246. struct device_node *dn;
  1247. if (!dev || !eeh_subsystem_enabled)
  1248. return;
  1249. /* Unregister the device with the EEH/PCI address search system */
  1250. pr_debug("EEH: Removing device %s\n", pci_name(dev));
  1251. dn = pci_device_to_OF_node(dev);
  1252. if (PCI_DN(dn)->pcidev == NULL) {
  1253. pr_debug("EEH: Not referenced !\n");
  1254. return;
  1255. }
  1256. PCI_DN(dn)->pcidev = NULL;
  1257. pci_dev_put(dev);
  1258. pci_addr_cache_remove_device(dev);
  1259. eeh_sysfs_remove_device(dev);
  1260. }
  1261. /**
  1262. * eeh_remove_bus_device - Undo EEH setup for the indicated PCI device
  1263. * @dev: PCI device
  1264. *
  1265. * This routine must be called when a device is removed from the
  1266. * running system through hotplug or dlpar. The corresponding
  1267. * PCI address cache will be removed.
  1268. */
  1269. void eeh_remove_bus_device(struct pci_dev *dev)
  1270. {
  1271. struct pci_bus *bus = dev->subordinate;
  1272. struct pci_dev *child, *tmp;
  1273. eeh_remove_device(dev);
  1274. if (bus && dev->hdr_type == PCI_HEADER_TYPE_BRIDGE) {
  1275. list_for_each_entry_safe(child, tmp, &bus->devices, bus_list)
  1276. eeh_remove_bus_device(child);
  1277. }
  1278. }
  1279. EXPORT_SYMBOL_GPL(eeh_remove_bus_device);
  1280. static int proc_eeh_show(struct seq_file *m, void *v)
  1281. {
  1282. if (0 == eeh_subsystem_enabled) {
  1283. seq_printf(m, "EEH Subsystem is globally disabled\n");
  1284. seq_printf(m, "eeh_total_mmio_ffs=%ld\n", total_mmio_ffs);
  1285. } else {
  1286. seq_printf(m, "EEH Subsystem is enabled\n");
  1287. seq_printf(m,
  1288. "no device=%ld\n"
  1289. "no device node=%ld\n"
  1290. "no config address=%ld\n"
  1291. "check not wanted=%ld\n"
  1292. "eeh_total_mmio_ffs=%ld\n"
  1293. "eeh_false_positives=%ld\n"
  1294. "eeh_slot_resets=%ld\n",
  1295. no_device, no_dn, no_cfg_addr,
  1296. ignored_check, total_mmio_ffs,
  1297. false_positives,
  1298. slot_resets);
  1299. }
  1300. return 0;
  1301. }
  1302. static int proc_eeh_open(struct inode *inode, struct file *file)
  1303. {
  1304. return single_open(file, proc_eeh_show, NULL);
  1305. }
  1306. static const struct file_operations proc_eeh_operations = {
  1307. .open = proc_eeh_open,
  1308. .read = seq_read,
  1309. .llseek = seq_lseek,
  1310. .release = single_release,
  1311. };
  1312. static int __init eeh_init_proc(void)
  1313. {
  1314. if (machine_is(pseries))
  1315. proc_create("powerpc/eeh", 0, NULL, &proc_eeh_operations);
  1316. return 0;
  1317. }
  1318. __initcall(eeh_init_proc);