pci-ioda.c 28 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039
  1. /*
  2. * Support PCI/PCIe on PowerNV platforms
  3. *
  4. * Copyright 2011 Benjamin Herrenschmidt, IBM Corp.
  5. *
  6. * This program is free software; you can redistribute it and/or
  7. * modify it under the terms of the GNU General Public License
  8. * as published by the Free Software Foundation; either version
  9. * 2 of the License, or (at your option) any later version.
  10. */
  11. #undef DEBUG
  12. #include <linux/kernel.h>
  13. #include <linux/pci.h>
  14. #include <linux/delay.h>
  15. #include <linux/string.h>
  16. #include <linux/init.h>
  17. #include <linux/bootmem.h>
  18. #include <linux/irq.h>
  19. #include <linux/io.h>
  20. #include <linux/msi.h>
  21. #include <asm/sections.h>
  22. #include <asm/io.h>
  23. #include <asm/prom.h>
  24. #include <asm/pci-bridge.h>
  25. #include <asm/machdep.h>
  26. #include <asm/ppc-pci.h>
  27. #include <asm/opal.h>
  28. #include <asm/iommu.h>
  29. #include <asm/tce.h>
  30. #include "powernv.h"
  31. #include "pci.h"
  32. #define define_pe_printk_level(func, kern_level) \
  33. static int func(const struct pnv_ioda_pe *pe, const char *fmt, ...) \
  34. { \
  35. struct va_format vaf; \
  36. va_list args; \
  37. char pfix[32]; \
  38. int r; \
  39. \
  40. va_start(args, fmt); \
  41. \
  42. vaf.fmt = fmt; \
  43. vaf.va = &args; \
  44. \
  45. if (pe->pdev) \
  46. strlcpy(pfix, dev_name(&pe->pdev->dev), \
  47. sizeof(pfix)); \
  48. else \
  49. sprintf(pfix, "%04x:%02x ", \
  50. pci_domain_nr(pe->pbus), \
  51. pe->pbus->number); \
  52. r = printk(kern_level "pci %s: [PE# %.3d] %pV", \
  53. pfix, pe->pe_number, &vaf); \
  54. \
  55. va_end(args); \
  56. \
  57. return r; \
  58. } \
  59. define_pe_printk_level(pe_err, KERN_ERR);
  60. define_pe_printk_level(pe_warn, KERN_WARNING);
  61. define_pe_printk_level(pe_info, KERN_INFO);
  62. static struct pci_dn *pnv_ioda_get_pdn(struct pci_dev *dev)
  63. {
  64. struct device_node *np;
  65. np = pci_device_to_OF_node(dev);
  66. if (!np)
  67. return NULL;
  68. return PCI_DN(np);
  69. }
  70. static int pnv_ioda_alloc_pe(struct pnv_phb *phb)
  71. {
  72. unsigned long pe;
  73. do {
  74. pe = find_next_zero_bit(phb->ioda.pe_alloc,
  75. phb->ioda.total_pe, 0);
  76. if (pe >= phb->ioda.total_pe)
  77. return IODA_INVALID_PE;
  78. } while(test_and_set_bit(pe, phb->ioda.pe_alloc));
  79. phb->ioda.pe_array[pe].pe_number = pe;
  80. return pe;
  81. }
  82. static void pnv_ioda_free_pe(struct pnv_phb *phb, int pe)
  83. {
  84. WARN_ON(phb->ioda.pe_array[pe].pdev);
  85. memset(&phb->ioda.pe_array[pe], 0, sizeof(struct pnv_ioda_pe));
  86. clear_bit(pe, phb->ioda.pe_alloc);
  87. }
  88. /* Currently those 2 are only used when MSIs are enabled, this will change
  89. * but in the meantime, we need to protect them to avoid warnings
  90. */
  91. #ifdef CONFIG_PCI_MSI
  92. static struct pnv_ioda_pe *pnv_ioda_get_pe(struct pci_dev *dev)
  93. {
  94. struct pci_controller *hose = pci_bus_to_host(dev->bus);
  95. struct pnv_phb *phb = hose->private_data;
  96. struct pci_dn *pdn = pnv_ioda_get_pdn(dev);
  97. if (!pdn)
  98. return NULL;
  99. if (pdn->pe_number == IODA_INVALID_PE)
  100. return NULL;
  101. return &phb->ioda.pe_array[pdn->pe_number];
  102. }
  103. #endif /* CONFIG_PCI_MSI */
  104. static int pnv_ioda_configure_pe(struct pnv_phb *phb, struct pnv_ioda_pe *pe)
  105. {
  106. struct pci_dev *parent;
  107. uint8_t bcomp, dcomp, fcomp;
  108. long rc, rid_end, rid;
  109. /* Bus validation ? */
  110. if (pe->pbus) {
  111. int count;
  112. dcomp = OPAL_IGNORE_RID_DEVICE_NUMBER;
  113. fcomp = OPAL_IGNORE_RID_FUNCTION_NUMBER;
  114. parent = pe->pbus->self;
  115. if (pe->flags & PNV_IODA_PE_BUS_ALL)
  116. count = pe->pbus->busn_res.end - pe->pbus->busn_res.start + 1;
  117. else
  118. count = 1;
  119. switch(count) {
  120. case 1: bcomp = OpalPciBusAll; break;
  121. case 2: bcomp = OpalPciBus7Bits; break;
  122. case 4: bcomp = OpalPciBus6Bits; break;
  123. case 8: bcomp = OpalPciBus5Bits; break;
  124. case 16: bcomp = OpalPciBus4Bits; break;
  125. case 32: bcomp = OpalPciBus3Bits; break;
  126. default:
  127. pr_err("%s: Number of subordinate busses %d"
  128. " unsupported\n",
  129. pci_name(pe->pbus->self), count);
  130. /* Do an exact match only */
  131. bcomp = OpalPciBusAll;
  132. }
  133. rid_end = pe->rid + (count << 8);
  134. } else {
  135. parent = pe->pdev->bus->self;
  136. bcomp = OpalPciBusAll;
  137. dcomp = OPAL_COMPARE_RID_DEVICE_NUMBER;
  138. fcomp = OPAL_COMPARE_RID_FUNCTION_NUMBER;
  139. rid_end = pe->rid + 1;
  140. }
  141. /* Associate PE in PELT */
  142. rc = opal_pci_set_pe(phb->opal_id, pe->pe_number, pe->rid,
  143. bcomp, dcomp, fcomp, OPAL_MAP_PE);
  144. if (rc) {
  145. pe_err(pe, "OPAL error %ld trying to setup PELT table\n", rc);
  146. return -ENXIO;
  147. }
  148. opal_pci_eeh_freeze_clear(phb->opal_id, pe->pe_number,
  149. OPAL_EEH_ACTION_CLEAR_FREEZE_ALL);
  150. /* Add to all parents PELT-V */
  151. while (parent) {
  152. struct pci_dn *pdn = pnv_ioda_get_pdn(parent);
  153. if (pdn && pdn->pe_number != IODA_INVALID_PE) {
  154. rc = opal_pci_set_peltv(phb->opal_id, pdn->pe_number,
  155. pe->pe_number, OPAL_ADD_PE_TO_DOMAIN);
  156. /* XXX What to do in case of error ? */
  157. }
  158. parent = parent->bus->self;
  159. }
  160. /* Setup reverse map */
  161. for (rid = pe->rid; rid < rid_end; rid++)
  162. phb->ioda.pe_rmap[rid] = pe->pe_number;
  163. /* Setup one MVTs on IODA1 */
  164. if (phb->type == PNV_PHB_IODA1) {
  165. pe->mve_number = pe->pe_number;
  166. rc = opal_pci_set_mve(phb->opal_id, pe->mve_number,
  167. pe->pe_number);
  168. if (rc) {
  169. pe_err(pe, "OPAL error %ld setting up MVE %d\n",
  170. rc, pe->mve_number);
  171. pe->mve_number = -1;
  172. } else {
  173. rc = opal_pci_set_mve_enable(phb->opal_id,
  174. pe->mve_number, OPAL_ENABLE_MVE);
  175. if (rc) {
  176. pe_err(pe, "OPAL error %ld enabling MVE %d\n",
  177. rc, pe->mve_number);
  178. pe->mve_number = -1;
  179. }
  180. }
  181. } else if (phb->type == PNV_PHB_IODA2)
  182. pe->mve_number = 0;
  183. return 0;
  184. }
  185. static void pnv_ioda_link_pe_by_weight(struct pnv_phb *phb,
  186. struct pnv_ioda_pe *pe)
  187. {
  188. struct pnv_ioda_pe *lpe;
  189. list_for_each_entry(lpe, &phb->ioda.pe_dma_list, dma_link) {
  190. if (lpe->dma_weight < pe->dma_weight) {
  191. list_add_tail(&pe->dma_link, &lpe->dma_link);
  192. return;
  193. }
  194. }
  195. list_add_tail(&pe->dma_link, &phb->ioda.pe_dma_list);
  196. }
  197. static unsigned int pnv_ioda_dma_weight(struct pci_dev *dev)
  198. {
  199. /* This is quite simplistic. The "base" weight of a device
  200. * is 10. 0 means no DMA is to be accounted for it.
  201. */
  202. /* If it's a bridge, no DMA */
  203. if (dev->hdr_type != PCI_HEADER_TYPE_NORMAL)
  204. return 0;
  205. /* Reduce the weight of slow USB controllers */
  206. if (dev->class == PCI_CLASS_SERIAL_USB_UHCI ||
  207. dev->class == PCI_CLASS_SERIAL_USB_OHCI ||
  208. dev->class == PCI_CLASS_SERIAL_USB_EHCI)
  209. return 3;
  210. /* Increase the weight of RAID (includes Obsidian) */
  211. if ((dev->class >> 8) == PCI_CLASS_STORAGE_RAID)
  212. return 15;
  213. /* Default */
  214. return 10;
  215. }
  216. #if 0
  217. static struct pnv_ioda_pe *pnv_ioda_setup_dev_PE(struct pci_dev *dev)
  218. {
  219. struct pci_controller *hose = pci_bus_to_host(dev->bus);
  220. struct pnv_phb *phb = hose->private_data;
  221. struct pci_dn *pdn = pnv_ioda_get_pdn(dev);
  222. struct pnv_ioda_pe *pe;
  223. int pe_num;
  224. if (!pdn) {
  225. pr_err("%s: Device tree node not associated properly\n",
  226. pci_name(dev));
  227. return NULL;
  228. }
  229. if (pdn->pe_number != IODA_INVALID_PE)
  230. return NULL;
  231. /* PE#0 has been pre-set */
  232. if (dev->bus->number == 0)
  233. pe_num = 0;
  234. else
  235. pe_num = pnv_ioda_alloc_pe(phb);
  236. if (pe_num == IODA_INVALID_PE) {
  237. pr_warning("%s: Not enough PE# available, disabling device\n",
  238. pci_name(dev));
  239. return NULL;
  240. }
  241. /* NOTE: We get only one ref to the pci_dev for the pdn, not for the
  242. * pointer in the PE data structure, both should be destroyed at the
  243. * same time. However, this needs to be looked at more closely again
  244. * once we actually start removing things (Hotplug, SR-IOV, ...)
  245. *
  246. * At some point we want to remove the PDN completely anyways
  247. */
  248. pe = &phb->ioda.pe_array[pe_num];
  249. pci_dev_get(dev);
  250. pdn->pcidev = dev;
  251. pdn->pe_number = pe_num;
  252. pe->pdev = dev;
  253. pe->pbus = NULL;
  254. pe->tce32_seg = -1;
  255. pe->mve_number = -1;
  256. pe->rid = dev->bus->number << 8 | pdn->devfn;
  257. pe_info(pe, "Associated device to PE\n");
  258. if (pnv_ioda_configure_pe(phb, pe)) {
  259. /* XXX What do we do here ? */
  260. if (pe_num)
  261. pnv_ioda_free_pe(phb, pe_num);
  262. pdn->pe_number = IODA_INVALID_PE;
  263. pe->pdev = NULL;
  264. pci_dev_put(dev);
  265. return NULL;
  266. }
  267. /* Assign a DMA weight to the device */
  268. pe->dma_weight = pnv_ioda_dma_weight(dev);
  269. if (pe->dma_weight != 0) {
  270. phb->ioda.dma_weight += pe->dma_weight;
  271. phb->ioda.dma_pe_count++;
  272. }
  273. /* Link the PE */
  274. pnv_ioda_link_pe_by_weight(phb, pe);
  275. return pe;
  276. }
  277. #endif /* Useful for SRIOV case */
  278. static void pnv_ioda_setup_same_PE(struct pci_bus *bus, struct pnv_ioda_pe *pe)
  279. {
  280. struct pci_dev *dev;
  281. list_for_each_entry(dev, &bus->devices, bus_list) {
  282. struct pci_dn *pdn = pnv_ioda_get_pdn(dev);
  283. if (pdn == NULL) {
  284. pr_warn("%s: No device node associated with device !\n",
  285. pci_name(dev));
  286. continue;
  287. }
  288. pci_dev_get(dev);
  289. pdn->pcidev = dev;
  290. pdn->pe_number = pe->pe_number;
  291. pe->dma_weight += pnv_ioda_dma_weight(dev);
  292. if ((pe->flags & PNV_IODA_PE_BUS_ALL) && dev->subordinate)
  293. pnv_ioda_setup_same_PE(dev->subordinate, pe);
  294. }
  295. }
  296. /*
  297. * There're 2 types of PCI bus sensitive PEs: One that is compromised of
  298. * single PCI bus. Another one that contains the primary PCI bus and its
  299. * subordinate PCI devices and buses. The second type of PE is normally
  300. * orgiriated by PCIe-to-PCI bridge or PLX switch downstream ports.
  301. */
  302. static void pnv_ioda_setup_bus_PE(struct pci_bus *bus, int all)
  303. {
  304. struct pci_controller *hose = pci_bus_to_host(bus);
  305. struct pnv_phb *phb = hose->private_data;
  306. struct pnv_ioda_pe *pe;
  307. int pe_num;
  308. pe_num = pnv_ioda_alloc_pe(phb);
  309. if (pe_num == IODA_INVALID_PE) {
  310. pr_warning("%s: Not enough PE# available for PCI bus %04x:%02x\n",
  311. __func__, pci_domain_nr(bus), bus->number);
  312. return;
  313. }
  314. pe = &phb->ioda.pe_array[pe_num];
  315. pe->flags = (all ? PNV_IODA_PE_BUS_ALL : PNV_IODA_PE_BUS);
  316. pe->pbus = bus;
  317. pe->pdev = NULL;
  318. pe->tce32_seg = -1;
  319. pe->mve_number = -1;
  320. pe->rid = bus->busn_res.start << 8;
  321. pe->dma_weight = 0;
  322. if (all)
  323. pe_info(pe, "Secondary bus %d..%d associated with PE#%d\n",
  324. bus->busn_res.start, bus->busn_res.end, pe_num);
  325. else
  326. pe_info(pe, "Secondary bus %d associated with PE#%d\n",
  327. bus->busn_res.start, pe_num);
  328. if (pnv_ioda_configure_pe(phb, pe)) {
  329. /* XXX What do we do here ? */
  330. if (pe_num)
  331. pnv_ioda_free_pe(phb, pe_num);
  332. pe->pbus = NULL;
  333. return;
  334. }
  335. /* Associate it with all child devices */
  336. pnv_ioda_setup_same_PE(bus, pe);
  337. /* Put PE to the list */
  338. list_add_tail(&pe->list, &phb->ioda.pe_list);
  339. /* Account for one DMA PE if at least one DMA capable device exist
  340. * below the bridge
  341. */
  342. if (pe->dma_weight != 0) {
  343. phb->ioda.dma_weight += pe->dma_weight;
  344. phb->ioda.dma_pe_count++;
  345. }
  346. /* Link the PE */
  347. pnv_ioda_link_pe_by_weight(phb, pe);
  348. }
  349. static void pnv_ioda_setup_PEs(struct pci_bus *bus)
  350. {
  351. struct pci_dev *dev;
  352. pnv_ioda_setup_bus_PE(bus, 0);
  353. list_for_each_entry(dev, &bus->devices, bus_list) {
  354. if (dev->subordinate) {
  355. if (pci_pcie_type(dev) == PCI_EXP_TYPE_PCI_BRIDGE)
  356. pnv_ioda_setup_bus_PE(dev->subordinate, 1);
  357. else
  358. pnv_ioda_setup_PEs(dev->subordinate);
  359. }
  360. }
  361. }
  362. /*
  363. * Configure PEs so that the downstream PCI buses and devices
  364. * could have their associated PE#. Unfortunately, we didn't
  365. * figure out the way to identify the PLX bridge yet. So we
  366. * simply put the PCI bus and the subordinate behind the root
  367. * port to PE# here. The game rule here is expected to be changed
  368. * as soon as we can detected PLX bridge correctly.
  369. */
  370. static void pnv_pci_ioda_setup_PEs(void)
  371. {
  372. struct pci_controller *hose, *tmp;
  373. list_for_each_entry_safe(hose, tmp, &hose_list, list_node) {
  374. pnv_ioda_setup_PEs(hose->bus);
  375. }
  376. }
  377. static void pnv_pci_ioda_dma_dev_setup(struct pnv_phb *phb, struct pci_dev *dev)
  378. {
  379. /* We delay DMA setup after we have assigned all PE# */
  380. }
  381. static void pnv_ioda_setup_bus_dma(struct pnv_ioda_pe *pe, struct pci_bus *bus)
  382. {
  383. struct pci_dev *dev;
  384. list_for_each_entry(dev, &bus->devices, bus_list) {
  385. set_iommu_table_base(&dev->dev, &pe->tce32_table);
  386. if (dev->subordinate)
  387. pnv_ioda_setup_bus_dma(pe, dev->subordinate);
  388. }
  389. }
  390. static void pnv_pci_ioda_setup_dma_pe(struct pnv_phb *phb,
  391. struct pnv_ioda_pe *pe, unsigned int base,
  392. unsigned int segs)
  393. {
  394. struct page *tce_mem = NULL;
  395. const __be64 *swinvp;
  396. struct iommu_table *tbl;
  397. unsigned int i;
  398. int64_t rc;
  399. void *addr;
  400. /* 256M DMA window, 4K TCE pages, 8 bytes TCE */
  401. #define TCE32_TABLE_SIZE ((0x10000000 / 0x1000) * 8)
  402. /* XXX FIXME: Handle 64-bit only DMA devices */
  403. /* XXX FIXME: Provide 64-bit DMA facilities & non-4K TCE tables etc.. */
  404. /* XXX FIXME: Allocate multi-level tables on PHB3 */
  405. /* We shouldn't already have a 32-bit DMA associated */
  406. if (WARN_ON(pe->tce32_seg >= 0))
  407. return;
  408. /* Grab a 32-bit TCE table */
  409. pe->tce32_seg = base;
  410. pe_info(pe, " Setting up 32-bit TCE table at %08x..%08x\n",
  411. (base << 28), ((base + segs) << 28) - 1);
  412. /* XXX Currently, we allocate one big contiguous table for the
  413. * TCEs. We only really need one chunk per 256M of TCE space
  414. * (ie per segment) but that's an optimization for later, it
  415. * requires some added smarts with our get/put_tce implementation
  416. */
  417. tce_mem = alloc_pages_node(phb->hose->node, GFP_KERNEL,
  418. get_order(TCE32_TABLE_SIZE * segs));
  419. if (!tce_mem) {
  420. pe_err(pe, " Failed to allocate a 32-bit TCE memory\n");
  421. goto fail;
  422. }
  423. addr = page_address(tce_mem);
  424. memset(addr, 0, TCE32_TABLE_SIZE * segs);
  425. /* Configure HW */
  426. for (i = 0; i < segs; i++) {
  427. rc = opal_pci_map_pe_dma_window(phb->opal_id,
  428. pe->pe_number,
  429. base + i, 1,
  430. __pa(addr) + TCE32_TABLE_SIZE * i,
  431. TCE32_TABLE_SIZE, 0x1000);
  432. if (rc) {
  433. pe_err(pe, " Failed to configure 32-bit TCE table,"
  434. " err %ld\n", rc);
  435. goto fail;
  436. }
  437. }
  438. /* Setup linux iommu table */
  439. tbl = &pe->tce32_table;
  440. pnv_pci_setup_iommu_table(tbl, addr, TCE32_TABLE_SIZE * segs,
  441. base << 28);
  442. /* OPAL variant of P7IOC SW invalidated TCEs */
  443. swinvp = of_get_property(phb->hose->dn, "ibm,opal-tce-kill", NULL);
  444. if (swinvp) {
  445. /* We need a couple more fields -- an address and a data
  446. * to or. Since the bus is only printed out on table free
  447. * errors, and on the first pass the data will be a relative
  448. * bus number, print that out instead.
  449. */
  450. tbl->it_busno = 0;
  451. tbl->it_index = (unsigned long)ioremap(be64_to_cpup(swinvp), 8);
  452. tbl->it_type = TCE_PCI_SWINV_CREATE | TCE_PCI_SWINV_FREE
  453. | TCE_PCI_SWINV_PAIR;
  454. }
  455. iommu_init_table(tbl, phb->hose->node);
  456. if (pe->pdev)
  457. set_iommu_table_base(&pe->pdev->dev, tbl);
  458. else
  459. pnv_ioda_setup_bus_dma(pe, pe->pbus);
  460. return;
  461. fail:
  462. /* XXX Failure: Try to fallback to 64-bit only ? */
  463. if (pe->tce32_seg >= 0)
  464. pe->tce32_seg = -1;
  465. if (tce_mem)
  466. __free_pages(tce_mem, get_order(TCE32_TABLE_SIZE * segs));
  467. }
  468. static void pnv_ioda_setup_dma(struct pnv_phb *phb)
  469. {
  470. struct pci_controller *hose = phb->hose;
  471. unsigned int residual, remaining, segs, tw, base;
  472. struct pnv_ioda_pe *pe;
  473. /* If we have more PE# than segments available, hand out one
  474. * per PE until we run out and let the rest fail. If not,
  475. * then we assign at least one segment per PE, plus more based
  476. * on the amount of devices under that PE
  477. */
  478. if (phb->ioda.dma_pe_count > phb->ioda.tce32_count)
  479. residual = 0;
  480. else
  481. residual = phb->ioda.tce32_count -
  482. phb->ioda.dma_pe_count;
  483. pr_info("PCI: Domain %04x has %ld available 32-bit DMA segments\n",
  484. hose->global_number, phb->ioda.tce32_count);
  485. pr_info("PCI: %d PE# for a total weight of %d\n",
  486. phb->ioda.dma_pe_count, phb->ioda.dma_weight);
  487. /* Walk our PE list and configure their DMA segments, hand them
  488. * out one base segment plus any residual segments based on
  489. * weight
  490. */
  491. remaining = phb->ioda.tce32_count;
  492. tw = phb->ioda.dma_weight;
  493. base = 0;
  494. list_for_each_entry(pe, &phb->ioda.pe_dma_list, dma_link) {
  495. if (!pe->dma_weight)
  496. continue;
  497. if (!remaining) {
  498. pe_warn(pe, "No DMA32 resources available\n");
  499. continue;
  500. }
  501. segs = 1;
  502. if (residual) {
  503. segs += ((pe->dma_weight * residual) + (tw / 2)) / tw;
  504. if (segs > remaining)
  505. segs = remaining;
  506. }
  507. pe_info(pe, "DMA weight %d, assigned %d DMA32 segments\n",
  508. pe->dma_weight, segs);
  509. pnv_pci_ioda_setup_dma_pe(phb, pe, base, segs);
  510. remaining -= segs;
  511. base += segs;
  512. }
  513. }
  514. #ifdef CONFIG_PCI_MSI
  515. static int pnv_pci_ioda_msi_setup(struct pnv_phb *phb, struct pci_dev *dev,
  516. unsigned int hwirq, unsigned int is_64,
  517. struct msi_msg *msg)
  518. {
  519. struct pnv_ioda_pe *pe = pnv_ioda_get_pe(dev);
  520. unsigned int xive_num = hwirq - phb->msi_base;
  521. uint64_t addr64;
  522. uint32_t addr32, data;
  523. int rc;
  524. /* No PE assigned ? bail out ... no MSI for you ! */
  525. if (pe == NULL)
  526. return -ENXIO;
  527. /* Check if we have an MVE */
  528. if (pe->mve_number < 0)
  529. return -ENXIO;
  530. /* Assign XIVE to PE */
  531. rc = opal_pci_set_xive_pe(phb->opal_id, pe->pe_number, xive_num);
  532. if (rc) {
  533. pr_warn("%s: OPAL error %d setting XIVE %d PE\n",
  534. pci_name(dev), rc, xive_num);
  535. return -EIO;
  536. }
  537. if (is_64) {
  538. rc = opal_get_msi_64(phb->opal_id, pe->mve_number, xive_num, 1,
  539. &addr64, &data);
  540. if (rc) {
  541. pr_warn("%s: OPAL error %d getting 64-bit MSI data\n",
  542. pci_name(dev), rc);
  543. return -EIO;
  544. }
  545. msg->address_hi = addr64 >> 32;
  546. msg->address_lo = addr64 & 0xfffffffful;
  547. } else {
  548. rc = opal_get_msi_32(phb->opal_id, pe->mve_number, xive_num, 1,
  549. &addr32, &data);
  550. if (rc) {
  551. pr_warn("%s: OPAL error %d getting 32-bit MSI data\n",
  552. pci_name(dev), rc);
  553. return -EIO;
  554. }
  555. msg->address_hi = 0;
  556. msg->address_lo = addr32;
  557. }
  558. msg->data = data;
  559. pr_devel("%s: %s-bit MSI on hwirq %x (xive #%d),"
  560. " address=%x_%08x data=%x PE# %d\n",
  561. pci_name(dev), is_64 ? "64" : "32", hwirq, xive_num,
  562. msg->address_hi, msg->address_lo, data, pe->pe_number);
  563. return 0;
  564. }
  565. static void pnv_pci_init_ioda_msis(struct pnv_phb *phb)
  566. {
  567. unsigned int bmap_size;
  568. const __be32 *prop = of_get_property(phb->hose->dn,
  569. "ibm,opal-msi-ranges", NULL);
  570. if (!prop) {
  571. /* BML Fallback */
  572. prop = of_get_property(phb->hose->dn, "msi-ranges", NULL);
  573. }
  574. if (!prop)
  575. return;
  576. phb->msi_base = be32_to_cpup(prop);
  577. phb->msi_count = be32_to_cpup(prop + 1);
  578. bmap_size = BITS_TO_LONGS(phb->msi_count) * sizeof(unsigned long);
  579. phb->msi_map = zalloc_maybe_bootmem(bmap_size, GFP_KERNEL);
  580. if (!phb->msi_map) {
  581. pr_err("PCI %d: Failed to allocate MSI bitmap !\n",
  582. phb->hose->global_number);
  583. return;
  584. }
  585. phb->msi_setup = pnv_pci_ioda_msi_setup;
  586. phb->msi32_support = 1;
  587. pr_info(" Allocated bitmap for %d MSIs (base IRQ 0x%x)\n",
  588. phb->msi_count, phb->msi_base);
  589. }
  590. #else
  591. static void pnv_pci_init_ioda_msis(struct pnv_phb *phb) { }
  592. #endif /* CONFIG_PCI_MSI */
  593. /*
  594. * This function is supposed to be called on basis of PE from top
  595. * to bottom style. So the the I/O or MMIO segment assigned to
  596. * parent PE could be overrided by its child PEs if necessary.
  597. */
  598. static void pnv_ioda_setup_pe_seg(struct pci_controller *hose,
  599. struct pnv_ioda_pe *pe)
  600. {
  601. struct pnv_phb *phb = hose->private_data;
  602. struct pci_bus_region region;
  603. struct resource *res;
  604. int i, index;
  605. int rc;
  606. /*
  607. * NOTE: We only care PCI bus based PE for now. For PCI
  608. * device based PE, for example SRIOV sensitive VF should
  609. * be figured out later.
  610. */
  611. BUG_ON(!(pe->flags & (PNV_IODA_PE_BUS | PNV_IODA_PE_BUS_ALL)));
  612. pci_bus_for_each_resource(pe->pbus, res, i) {
  613. if (!res || !res->flags ||
  614. res->start > res->end)
  615. continue;
  616. if (res->flags & IORESOURCE_IO) {
  617. region.start = res->start - phb->ioda.io_pci_base;
  618. region.end = res->end - phb->ioda.io_pci_base;
  619. index = region.start / phb->ioda.io_segsize;
  620. while (index < phb->ioda.total_pe &&
  621. region.start <= region.end) {
  622. phb->ioda.io_segmap[index] = pe->pe_number;
  623. rc = opal_pci_map_pe_mmio_window(phb->opal_id,
  624. pe->pe_number, OPAL_IO_WINDOW_TYPE, 0, index);
  625. if (rc != OPAL_SUCCESS) {
  626. pr_err("%s: OPAL error %d when mapping IO "
  627. "segment #%d to PE#%d\n",
  628. __func__, rc, index, pe->pe_number);
  629. break;
  630. }
  631. region.start += phb->ioda.io_segsize;
  632. index++;
  633. }
  634. } else if (res->flags & IORESOURCE_MEM) {
  635. region.start = res->start -
  636. hose->pci_mem_offset -
  637. phb->ioda.m32_pci_base;
  638. region.end = res->end -
  639. hose->pci_mem_offset -
  640. phb->ioda.m32_pci_base;
  641. index = region.start / phb->ioda.m32_segsize;
  642. while (index < phb->ioda.total_pe &&
  643. region.start <= region.end) {
  644. phb->ioda.m32_segmap[index] = pe->pe_number;
  645. rc = opal_pci_map_pe_mmio_window(phb->opal_id,
  646. pe->pe_number, OPAL_M32_WINDOW_TYPE, 0, index);
  647. if (rc != OPAL_SUCCESS) {
  648. pr_err("%s: OPAL error %d when mapping M32 "
  649. "segment#%d to PE#%d",
  650. __func__, rc, index, pe->pe_number);
  651. break;
  652. }
  653. region.start += phb->ioda.m32_segsize;
  654. index++;
  655. }
  656. }
  657. }
  658. }
  659. static void pnv_pci_ioda_setup_seg(void)
  660. {
  661. struct pci_controller *tmp, *hose;
  662. struct pnv_phb *phb;
  663. struct pnv_ioda_pe *pe;
  664. list_for_each_entry_safe(hose, tmp, &hose_list, list_node) {
  665. phb = hose->private_data;
  666. list_for_each_entry(pe, &phb->ioda.pe_list, list) {
  667. pnv_ioda_setup_pe_seg(hose, pe);
  668. }
  669. }
  670. }
  671. static void pnv_pci_ioda_setup_DMA(void)
  672. {
  673. struct pci_controller *hose, *tmp;
  674. struct pnv_phb *phb;
  675. list_for_each_entry_safe(hose, tmp, &hose_list, list_node) {
  676. pnv_ioda_setup_dma(hose->private_data);
  677. /* Mark the PHB initialization done */
  678. phb = hose->private_data;
  679. phb->initialized = 1;
  680. }
  681. }
  682. static void pnv_pci_ioda_fixup(void)
  683. {
  684. pnv_pci_ioda_setup_PEs();
  685. pnv_pci_ioda_setup_seg();
  686. pnv_pci_ioda_setup_DMA();
  687. }
  688. /*
  689. * Returns the alignment for I/O or memory windows for P2P
  690. * bridges. That actually depends on how PEs are segmented.
  691. * For now, we return I/O or M32 segment size for PE sensitive
  692. * P2P bridges. Otherwise, the default values (4KiB for I/O,
  693. * 1MiB for memory) will be returned.
  694. *
  695. * The current PCI bus might be put into one PE, which was
  696. * create against the parent PCI bridge. For that case, we
  697. * needn't enlarge the alignment so that we can save some
  698. * resources.
  699. */
  700. static resource_size_t pnv_pci_window_alignment(struct pci_bus *bus,
  701. unsigned long type)
  702. {
  703. struct pci_dev *bridge;
  704. struct pci_controller *hose = pci_bus_to_host(bus);
  705. struct pnv_phb *phb = hose->private_data;
  706. int num_pci_bridges = 0;
  707. bridge = bus->self;
  708. while (bridge) {
  709. if (pci_pcie_type(bridge) == PCI_EXP_TYPE_PCI_BRIDGE) {
  710. num_pci_bridges++;
  711. if (num_pci_bridges >= 2)
  712. return 1;
  713. }
  714. bridge = bridge->bus->self;
  715. }
  716. /* We need support prefetchable memory window later */
  717. if (type & IORESOURCE_MEM)
  718. return phb->ioda.m32_segsize;
  719. return phb->ioda.io_segsize;
  720. }
  721. /* Prevent enabling devices for which we couldn't properly
  722. * assign a PE
  723. */
  724. static int pnv_pci_enable_device_hook(struct pci_dev *dev)
  725. {
  726. struct pci_controller *hose = pci_bus_to_host(dev->bus);
  727. struct pnv_phb *phb = hose->private_data;
  728. struct pci_dn *pdn;
  729. /* The function is probably called while the PEs have
  730. * not be created yet. For example, resource reassignment
  731. * during PCI probe period. We just skip the check if
  732. * PEs isn't ready.
  733. */
  734. if (!phb->initialized)
  735. return 0;
  736. pdn = pnv_ioda_get_pdn(dev);
  737. if (!pdn || pdn->pe_number == IODA_INVALID_PE)
  738. return -EINVAL;
  739. return 0;
  740. }
  741. static u32 pnv_ioda_bdfn_to_pe(struct pnv_phb *phb, struct pci_bus *bus,
  742. u32 devfn)
  743. {
  744. return phb->ioda.pe_rmap[(bus->number << 8) | devfn];
  745. }
  746. void __init pnv_pci_init_ioda1_phb(struct device_node *np)
  747. {
  748. struct pci_controller *hose;
  749. static int primary = 1;
  750. struct pnv_phb *phb;
  751. unsigned long size, m32map_off, iomap_off, pemap_off;
  752. const u64 *prop64;
  753. u64 phb_id;
  754. void *aux;
  755. long rc;
  756. pr_info(" Initializing IODA OPAL PHB %s\n", np->full_name);
  757. prop64 = of_get_property(np, "ibm,opal-phbid", NULL);
  758. if (!prop64) {
  759. pr_err(" Missing \"ibm,opal-phbid\" property !\n");
  760. return;
  761. }
  762. phb_id = be64_to_cpup(prop64);
  763. pr_debug(" PHB-ID : 0x%016llx\n", phb_id);
  764. phb = alloc_bootmem(sizeof(struct pnv_phb));
  765. if (phb) {
  766. memset(phb, 0, sizeof(struct pnv_phb));
  767. phb->hose = hose = pcibios_alloc_controller(np);
  768. }
  769. if (!phb || !phb->hose) {
  770. pr_err("PCI: Failed to allocate PCI controller for %s\n",
  771. np->full_name);
  772. return;
  773. }
  774. spin_lock_init(&phb->lock);
  775. /* XXX Use device-tree */
  776. hose->first_busno = 0;
  777. hose->last_busno = 0xff;
  778. hose->private_data = phb;
  779. phb->opal_id = phb_id;
  780. phb->type = PNV_PHB_IODA1;
  781. /* Detect specific models for error handling */
  782. if (of_device_is_compatible(np, "ibm,p7ioc-pciex"))
  783. phb->model = PNV_PHB_MODEL_P7IOC;
  784. else
  785. phb->model = PNV_PHB_MODEL_UNKNOWN;
  786. /* We parse "ranges" now since we need to deduce the register base
  787. * from the IO base
  788. */
  789. pci_process_bridge_OF_ranges(phb->hose, np, primary);
  790. primary = 0;
  791. /* Magic formula from Milton */
  792. phb->regs = of_iomap(np, 0);
  793. if (phb->regs == NULL)
  794. pr_err(" Failed to map registers !\n");
  795. /* XXX This is hack-a-thon. This needs to be changed so that:
  796. * - we obtain stuff like PE# etc... from device-tree
  797. * - we properly re-allocate M32 ourselves
  798. * (the OFW one isn't very good)
  799. */
  800. /* Initialize more IODA stuff */
  801. phb->ioda.total_pe = 128;
  802. phb->ioda.m32_size = resource_size(&hose->mem_resources[0]);
  803. /* OFW Has already off top 64k of M32 space (MSI space) */
  804. phb->ioda.m32_size += 0x10000;
  805. phb->ioda.m32_segsize = phb->ioda.m32_size / phb->ioda.total_pe;
  806. phb->ioda.m32_pci_base = hose->mem_resources[0].start -
  807. hose->pci_mem_offset;
  808. phb->ioda.io_size = hose->pci_io_size;
  809. phb->ioda.io_segsize = phb->ioda.io_size / phb->ioda.total_pe;
  810. phb->ioda.io_pci_base = 0; /* XXX calculate this ? */
  811. /* Allocate aux data & arrays */
  812. size = _ALIGN_UP(phb->ioda.total_pe / 8, sizeof(unsigned long));
  813. m32map_off = size;
  814. size += phb->ioda.total_pe * sizeof(phb->ioda.m32_segmap[0]);
  815. iomap_off = size;
  816. size += phb->ioda.total_pe * sizeof(phb->ioda.io_segmap[0]);
  817. pemap_off = size;
  818. size += phb->ioda.total_pe * sizeof(struct pnv_ioda_pe);
  819. aux = alloc_bootmem(size);
  820. memset(aux, 0, size);
  821. phb->ioda.pe_alloc = aux;
  822. phb->ioda.m32_segmap = aux + m32map_off;
  823. phb->ioda.io_segmap = aux + iomap_off;
  824. phb->ioda.pe_array = aux + pemap_off;
  825. set_bit(0, phb->ioda.pe_alloc);
  826. INIT_LIST_HEAD(&phb->ioda.pe_dma_list);
  827. INIT_LIST_HEAD(&phb->ioda.pe_list);
  828. /* Calculate how many 32-bit TCE segments we have */
  829. phb->ioda.tce32_count = phb->ioda.m32_pci_base >> 28;
  830. /* Clear unusable m64 */
  831. hose->mem_resources[1].flags = 0;
  832. hose->mem_resources[1].start = 0;
  833. hose->mem_resources[1].end = 0;
  834. hose->mem_resources[2].flags = 0;
  835. hose->mem_resources[2].start = 0;
  836. hose->mem_resources[2].end = 0;
  837. #if 0
  838. rc = opal_pci_set_phb_mem_window(opal->phb_id,
  839. window_type,
  840. window_num,
  841. starting_real_address,
  842. starting_pci_address,
  843. segment_size);
  844. #endif
  845. pr_info(" %d PE's M32: 0x%x [segment=0x%x] IO: 0x%x [segment=0x%x]\n",
  846. phb->ioda.total_pe,
  847. phb->ioda.m32_size, phb->ioda.m32_segsize,
  848. phb->ioda.io_size, phb->ioda.io_segsize);
  849. if (phb->regs) {
  850. pr_devel(" BUID = 0x%016llx\n", in_be64(phb->regs + 0x100));
  851. pr_devel(" PHB2_CR = 0x%016llx\n", in_be64(phb->regs + 0x160));
  852. pr_devel(" IO_BAR = 0x%016llx\n", in_be64(phb->regs + 0x170));
  853. pr_devel(" IO_BAMR = 0x%016llx\n", in_be64(phb->regs + 0x178));
  854. pr_devel(" IO_SAR = 0x%016llx\n", in_be64(phb->regs + 0x180));
  855. pr_devel(" M32_BAR = 0x%016llx\n", in_be64(phb->regs + 0x190));
  856. pr_devel(" M32_BAMR = 0x%016llx\n", in_be64(phb->regs + 0x198));
  857. pr_devel(" M32_SAR = 0x%016llx\n", in_be64(phb->regs + 0x1a0));
  858. }
  859. phb->hose->ops = &pnv_pci_ops;
  860. /* Setup RID -> PE mapping function */
  861. phb->bdfn_to_pe = pnv_ioda_bdfn_to_pe;
  862. /* Setup TCEs */
  863. phb->dma_dev_setup = pnv_pci_ioda_dma_dev_setup;
  864. /* Setup MSI support */
  865. pnv_pci_init_ioda_msis(phb);
  866. /*
  867. * We pass the PCI probe flag PCI_REASSIGN_ALL_RSRC here
  868. * to let the PCI core do resource assignment. It's supposed
  869. * that the PCI core will do correct I/O and MMIO alignment
  870. * for the P2P bridge bars so that each PCI bus (excluding
  871. * the child P2P bridges) can form individual PE.
  872. */
  873. ppc_md.pcibios_fixup = pnv_pci_ioda_fixup;
  874. ppc_md.pcibios_enable_device_hook = pnv_pci_enable_device_hook;
  875. ppc_md.pcibios_window_alignment = pnv_pci_window_alignment;
  876. pci_add_flags(PCI_REASSIGN_ALL_RSRC);
  877. /* Reset IODA tables to a clean state */
  878. rc = opal_pci_reset(phb_id, OPAL_PCI_IODA_TABLE_RESET, OPAL_ASSERT_RESET);
  879. if (rc)
  880. pr_warning(" OPAL Error %ld performing IODA table reset !\n", rc);
  881. opal_pci_set_pe(phb_id, 0, 0, 7, 1, 1 , OPAL_MAP_PE);
  882. }
  883. void __init pnv_pci_init_ioda_hub(struct device_node *np)
  884. {
  885. struct device_node *phbn;
  886. const u64 *prop64;
  887. u64 hub_id;
  888. pr_info("Probing IODA IO-Hub %s\n", np->full_name);
  889. prop64 = of_get_property(np, "ibm,opal-hubid", NULL);
  890. if (!prop64) {
  891. pr_err(" Missing \"ibm,opal-hubid\" property !\n");
  892. return;
  893. }
  894. hub_id = be64_to_cpup(prop64);
  895. pr_devel(" HUB-ID : 0x%016llx\n", hub_id);
  896. /* Count child PHBs */
  897. for_each_child_of_node(np, phbn) {
  898. /* Look for IODA1 PHBs */
  899. if (of_device_is_compatible(phbn, "ibm,ioda-phb"))
  900. pnv_pci_init_ioda1_phb(phbn);
  901. }
  902. }