pci-ioda.c 28 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039
  1. /*
  2. * Support PCI/PCIe on PowerNV platforms
  3. *
  4. * Copyright 2011 Benjamin Herrenschmidt, IBM Corp.
  5. *
  6. * This program is free software; you can redistribute it and/or
  7. * modify it under the terms of the GNU General Public License
  8. * as published by the Free Software Foundation; either version
  9. * 2 of the License, or (at your option) any later version.
  10. */
  11. #undef DEBUG
  12. #include <linux/kernel.h>
  13. #include <linux/pci.h>
  14. #include <linux/delay.h>
  15. #include <linux/string.h>
  16. #include <linux/init.h>
  17. #include <linux/bootmem.h>
  18. #include <linux/irq.h>
  19. #include <linux/io.h>
  20. #include <linux/msi.h>
  21. #include <asm/sections.h>
  22. #include <asm/io.h>
  23. #include <asm/prom.h>
  24. #include <asm/pci-bridge.h>
  25. #include <asm/machdep.h>
  26. #include <asm/msi_bitmap.h>
  27. #include <asm/ppc-pci.h>
  28. #include <asm/opal.h>
  29. #include <asm/iommu.h>
  30. #include <asm/tce.h>
  31. #include "powernv.h"
  32. #include "pci.h"
  33. #define define_pe_printk_level(func, kern_level) \
  34. static int func(const struct pnv_ioda_pe *pe, const char *fmt, ...) \
  35. { \
  36. struct va_format vaf; \
  37. va_list args; \
  38. char pfix[32]; \
  39. int r; \
  40. \
  41. va_start(args, fmt); \
  42. \
  43. vaf.fmt = fmt; \
  44. vaf.va = &args; \
  45. \
  46. if (pe->pdev) \
  47. strlcpy(pfix, dev_name(&pe->pdev->dev), \
  48. sizeof(pfix)); \
  49. else \
  50. sprintf(pfix, "%04x:%02x ", \
  51. pci_domain_nr(pe->pbus), \
  52. pe->pbus->number); \
  53. r = printk(kern_level "pci %s: [PE# %.3d] %pV", \
  54. pfix, pe->pe_number, &vaf); \
  55. \
  56. va_end(args); \
  57. \
  58. return r; \
  59. } \
  60. define_pe_printk_level(pe_err, KERN_ERR);
  61. define_pe_printk_level(pe_warn, KERN_WARNING);
  62. define_pe_printk_level(pe_info, KERN_INFO);
  63. static struct pci_dn *pnv_ioda_get_pdn(struct pci_dev *dev)
  64. {
  65. struct device_node *np;
  66. np = pci_device_to_OF_node(dev);
  67. if (!np)
  68. return NULL;
  69. return PCI_DN(np);
  70. }
  71. static int pnv_ioda_alloc_pe(struct pnv_phb *phb)
  72. {
  73. unsigned long pe;
  74. do {
  75. pe = find_next_zero_bit(phb->ioda.pe_alloc,
  76. phb->ioda.total_pe, 0);
  77. if (pe >= phb->ioda.total_pe)
  78. return IODA_INVALID_PE;
  79. } while(test_and_set_bit(pe, phb->ioda.pe_alloc));
  80. phb->ioda.pe_array[pe].pe_number = pe;
  81. return pe;
  82. }
  83. static void pnv_ioda_free_pe(struct pnv_phb *phb, int pe)
  84. {
  85. WARN_ON(phb->ioda.pe_array[pe].pdev);
  86. memset(&phb->ioda.pe_array[pe], 0, sizeof(struct pnv_ioda_pe));
  87. clear_bit(pe, phb->ioda.pe_alloc);
  88. }
  89. /* Currently those 2 are only used when MSIs are enabled, this will change
  90. * but in the meantime, we need to protect them to avoid warnings
  91. */
  92. #ifdef CONFIG_PCI_MSI
  93. static struct pnv_ioda_pe *pnv_ioda_get_pe(struct pci_dev *dev)
  94. {
  95. struct pci_controller *hose = pci_bus_to_host(dev->bus);
  96. struct pnv_phb *phb = hose->private_data;
  97. struct pci_dn *pdn = pnv_ioda_get_pdn(dev);
  98. if (!pdn)
  99. return NULL;
  100. if (pdn->pe_number == IODA_INVALID_PE)
  101. return NULL;
  102. return &phb->ioda.pe_array[pdn->pe_number];
  103. }
  104. #endif /* CONFIG_PCI_MSI */
  105. static int pnv_ioda_configure_pe(struct pnv_phb *phb, struct pnv_ioda_pe *pe)
  106. {
  107. struct pci_dev *parent;
  108. uint8_t bcomp, dcomp, fcomp;
  109. long rc, rid_end, rid;
  110. /* Bus validation ? */
  111. if (pe->pbus) {
  112. int count;
  113. dcomp = OPAL_IGNORE_RID_DEVICE_NUMBER;
  114. fcomp = OPAL_IGNORE_RID_FUNCTION_NUMBER;
  115. parent = pe->pbus->self;
  116. if (pe->flags & PNV_IODA_PE_BUS_ALL)
  117. count = pe->pbus->busn_res.end - pe->pbus->busn_res.start + 1;
  118. else
  119. count = 1;
  120. switch(count) {
  121. case 1: bcomp = OpalPciBusAll; break;
  122. case 2: bcomp = OpalPciBus7Bits; break;
  123. case 4: bcomp = OpalPciBus6Bits; break;
  124. case 8: bcomp = OpalPciBus5Bits; break;
  125. case 16: bcomp = OpalPciBus4Bits; break;
  126. case 32: bcomp = OpalPciBus3Bits; break;
  127. default:
  128. pr_err("%s: Number of subordinate busses %d"
  129. " unsupported\n",
  130. pci_name(pe->pbus->self), count);
  131. /* Do an exact match only */
  132. bcomp = OpalPciBusAll;
  133. }
  134. rid_end = pe->rid + (count << 8);
  135. } else {
  136. parent = pe->pdev->bus->self;
  137. bcomp = OpalPciBusAll;
  138. dcomp = OPAL_COMPARE_RID_DEVICE_NUMBER;
  139. fcomp = OPAL_COMPARE_RID_FUNCTION_NUMBER;
  140. rid_end = pe->rid + 1;
  141. }
  142. /* Associate PE in PELT */
  143. rc = opal_pci_set_pe(phb->opal_id, pe->pe_number, pe->rid,
  144. bcomp, dcomp, fcomp, OPAL_MAP_PE);
  145. if (rc) {
  146. pe_err(pe, "OPAL error %ld trying to setup PELT table\n", rc);
  147. return -ENXIO;
  148. }
  149. opal_pci_eeh_freeze_clear(phb->opal_id, pe->pe_number,
  150. OPAL_EEH_ACTION_CLEAR_FREEZE_ALL);
  151. /* Add to all parents PELT-V */
  152. while (parent) {
  153. struct pci_dn *pdn = pnv_ioda_get_pdn(parent);
  154. if (pdn && pdn->pe_number != IODA_INVALID_PE) {
  155. rc = opal_pci_set_peltv(phb->opal_id, pdn->pe_number,
  156. pe->pe_number, OPAL_ADD_PE_TO_DOMAIN);
  157. /* XXX What to do in case of error ? */
  158. }
  159. parent = parent->bus->self;
  160. }
  161. /* Setup reverse map */
  162. for (rid = pe->rid; rid < rid_end; rid++)
  163. phb->ioda.pe_rmap[rid] = pe->pe_number;
  164. /* Setup one MVTs on IODA1 */
  165. if (phb->type == PNV_PHB_IODA1) {
  166. pe->mve_number = pe->pe_number;
  167. rc = opal_pci_set_mve(phb->opal_id, pe->mve_number,
  168. pe->pe_number);
  169. if (rc) {
  170. pe_err(pe, "OPAL error %ld setting up MVE %d\n",
  171. rc, pe->mve_number);
  172. pe->mve_number = -1;
  173. } else {
  174. rc = opal_pci_set_mve_enable(phb->opal_id,
  175. pe->mve_number, OPAL_ENABLE_MVE);
  176. if (rc) {
  177. pe_err(pe, "OPAL error %ld enabling MVE %d\n",
  178. rc, pe->mve_number);
  179. pe->mve_number = -1;
  180. }
  181. }
  182. } else if (phb->type == PNV_PHB_IODA2)
  183. pe->mve_number = 0;
  184. return 0;
  185. }
  186. static void pnv_ioda_link_pe_by_weight(struct pnv_phb *phb,
  187. struct pnv_ioda_pe *pe)
  188. {
  189. struct pnv_ioda_pe *lpe;
  190. list_for_each_entry(lpe, &phb->ioda.pe_dma_list, dma_link) {
  191. if (lpe->dma_weight < pe->dma_weight) {
  192. list_add_tail(&pe->dma_link, &lpe->dma_link);
  193. return;
  194. }
  195. }
  196. list_add_tail(&pe->dma_link, &phb->ioda.pe_dma_list);
  197. }
  198. static unsigned int pnv_ioda_dma_weight(struct pci_dev *dev)
  199. {
  200. /* This is quite simplistic. The "base" weight of a device
  201. * is 10. 0 means no DMA is to be accounted for it.
  202. */
  203. /* If it's a bridge, no DMA */
  204. if (dev->hdr_type != PCI_HEADER_TYPE_NORMAL)
  205. return 0;
  206. /* Reduce the weight of slow USB controllers */
  207. if (dev->class == PCI_CLASS_SERIAL_USB_UHCI ||
  208. dev->class == PCI_CLASS_SERIAL_USB_OHCI ||
  209. dev->class == PCI_CLASS_SERIAL_USB_EHCI)
  210. return 3;
  211. /* Increase the weight of RAID (includes Obsidian) */
  212. if ((dev->class >> 8) == PCI_CLASS_STORAGE_RAID)
  213. return 15;
  214. /* Default */
  215. return 10;
  216. }
  217. #if 0
  218. static struct pnv_ioda_pe *pnv_ioda_setup_dev_PE(struct pci_dev *dev)
  219. {
  220. struct pci_controller *hose = pci_bus_to_host(dev->bus);
  221. struct pnv_phb *phb = hose->private_data;
  222. struct pci_dn *pdn = pnv_ioda_get_pdn(dev);
  223. struct pnv_ioda_pe *pe;
  224. int pe_num;
  225. if (!pdn) {
  226. pr_err("%s: Device tree node not associated properly\n",
  227. pci_name(dev));
  228. return NULL;
  229. }
  230. if (pdn->pe_number != IODA_INVALID_PE)
  231. return NULL;
  232. /* PE#0 has been pre-set */
  233. if (dev->bus->number == 0)
  234. pe_num = 0;
  235. else
  236. pe_num = pnv_ioda_alloc_pe(phb);
  237. if (pe_num == IODA_INVALID_PE) {
  238. pr_warning("%s: Not enough PE# available, disabling device\n",
  239. pci_name(dev));
  240. return NULL;
  241. }
  242. /* NOTE: We get only one ref to the pci_dev for the pdn, not for the
  243. * pointer in the PE data structure, both should be destroyed at the
  244. * same time. However, this needs to be looked at more closely again
  245. * once we actually start removing things (Hotplug, SR-IOV, ...)
  246. *
  247. * At some point we want to remove the PDN completely anyways
  248. */
  249. pe = &phb->ioda.pe_array[pe_num];
  250. pci_dev_get(dev);
  251. pdn->pcidev = dev;
  252. pdn->pe_number = pe_num;
  253. pe->pdev = dev;
  254. pe->pbus = NULL;
  255. pe->tce32_seg = -1;
  256. pe->mve_number = -1;
  257. pe->rid = dev->bus->number << 8 | pdn->devfn;
  258. pe_info(pe, "Associated device to PE\n");
  259. if (pnv_ioda_configure_pe(phb, pe)) {
  260. /* XXX What do we do here ? */
  261. if (pe_num)
  262. pnv_ioda_free_pe(phb, pe_num);
  263. pdn->pe_number = IODA_INVALID_PE;
  264. pe->pdev = NULL;
  265. pci_dev_put(dev);
  266. return NULL;
  267. }
  268. /* Assign a DMA weight to the device */
  269. pe->dma_weight = pnv_ioda_dma_weight(dev);
  270. if (pe->dma_weight != 0) {
  271. phb->ioda.dma_weight += pe->dma_weight;
  272. phb->ioda.dma_pe_count++;
  273. }
  274. /* Link the PE */
  275. pnv_ioda_link_pe_by_weight(phb, pe);
  276. return pe;
  277. }
  278. #endif /* Useful for SRIOV case */
  279. static void pnv_ioda_setup_same_PE(struct pci_bus *bus, struct pnv_ioda_pe *pe)
  280. {
  281. struct pci_dev *dev;
  282. list_for_each_entry(dev, &bus->devices, bus_list) {
  283. struct pci_dn *pdn = pnv_ioda_get_pdn(dev);
  284. if (pdn == NULL) {
  285. pr_warn("%s: No device node associated with device !\n",
  286. pci_name(dev));
  287. continue;
  288. }
  289. pci_dev_get(dev);
  290. pdn->pcidev = dev;
  291. pdn->pe_number = pe->pe_number;
  292. pe->dma_weight += pnv_ioda_dma_weight(dev);
  293. if ((pe->flags & PNV_IODA_PE_BUS_ALL) && dev->subordinate)
  294. pnv_ioda_setup_same_PE(dev->subordinate, pe);
  295. }
  296. }
  297. /*
  298. * There're 2 types of PCI bus sensitive PEs: One that is compromised of
  299. * single PCI bus. Another one that contains the primary PCI bus and its
  300. * subordinate PCI devices and buses. The second type of PE is normally
  301. * orgiriated by PCIe-to-PCI bridge or PLX switch downstream ports.
  302. */
  303. static void pnv_ioda_setup_bus_PE(struct pci_bus *bus, int all)
  304. {
  305. struct pci_controller *hose = pci_bus_to_host(bus);
  306. struct pnv_phb *phb = hose->private_data;
  307. struct pnv_ioda_pe *pe;
  308. int pe_num;
  309. pe_num = pnv_ioda_alloc_pe(phb);
  310. if (pe_num == IODA_INVALID_PE) {
  311. pr_warning("%s: Not enough PE# available for PCI bus %04x:%02x\n",
  312. __func__, pci_domain_nr(bus), bus->number);
  313. return;
  314. }
  315. pe = &phb->ioda.pe_array[pe_num];
  316. pe->flags = (all ? PNV_IODA_PE_BUS_ALL : PNV_IODA_PE_BUS);
  317. pe->pbus = bus;
  318. pe->pdev = NULL;
  319. pe->tce32_seg = -1;
  320. pe->mve_number = -1;
  321. pe->rid = bus->busn_res.start << 8;
  322. pe->dma_weight = 0;
  323. if (all)
  324. pe_info(pe, "Secondary bus %d..%d associated with PE#%d\n",
  325. bus->busn_res.start, bus->busn_res.end, pe_num);
  326. else
  327. pe_info(pe, "Secondary bus %d associated with PE#%d\n",
  328. bus->busn_res.start, pe_num);
  329. if (pnv_ioda_configure_pe(phb, pe)) {
  330. /* XXX What do we do here ? */
  331. if (pe_num)
  332. pnv_ioda_free_pe(phb, pe_num);
  333. pe->pbus = NULL;
  334. return;
  335. }
  336. /* Associate it with all child devices */
  337. pnv_ioda_setup_same_PE(bus, pe);
  338. /* Put PE to the list */
  339. list_add_tail(&pe->list, &phb->ioda.pe_list);
  340. /* Account for one DMA PE if at least one DMA capable device exist
  341. * below the bridge
  342. */
  343. if (pe->dma_weight != 0) {
  344. phb->ioda.dma_weight += pe->dma_weight;
  345. phb->ioda.dma_pe_count++;
  346. }
  347. /* Link the PE */
  348. pnv_ioda_link_pe_by_weight(phb, pe);
  349. }
  350. static void pnv_ioda_setup_PEs(struct pci_bus *bus)
  351. {
  352. struct pci_dev *dev;
  353. pnv_ioda_setup_bus_PE(bus, 0);
  354. list_for_each_entry(dev, &bus->devices, bus_list) {
  355. if (dev->subordinate) {
  356. if (pci_pcie_type(dev) == PCI_EXP_TYPE_PCI_BRIDGE)
  357. pnv_ioda_setup_bus_PE(dev->subordinate, 1);
  358. else
  359. pnv_ioda_setup_PEs(dev->subordinate);
  360. }
  361. }
  362. }
  363. /*
  364. * Configure PEs so that the downstream PCI buses and devices
  365. * could have their associated PE#. Unfortunately, we didn't
  366. * figure out the way to identify the PLX bridge yet. So we
  367. * simply put the PCI bus and the subordinate behind the root
  368. * port to PE# here. The game rule here is expected to be changed
  369. * as soon as we can detected PLX bridge correctly.
  370. */
  371. static void pnv_pci_ioda_setup_PEs(void)
  372. {
  373. struct pci_controller *hose, *tmp;
  374. list_for_each_entry_safe(hose, tmp, &hose_list, list_node) {
  375. pnv_ioda_setup_PEs(hose->bus);
  376. }
  377. }
  378. static void pnv_pci_ioda_dma_dev_setup(struct pnv_phb *phb, struct pci_dev *dev)
  379. {
  380. /* We delay DMA setup after we have assigned all PE# */
  381. }
  382. static void pnv_ioda_setup_bus_dma(struct pnv_ioda_pe *pe, struct pci_bus *bus)
  383. {
  384. struct pci_dev *dev;
  385. list_for_each_entry(dev, &bus->devices, bus_list) {
  386. set_iommu_table_base(&dev->dev, &pe->tce32_table);
  387. if (dev->subordinate)
  388. pnv_ioda_setup_bus_dma(pe, dev->subordinate);
  389. }
  390. }
  391. static void pnv_pci_ioda_setup_dma_pe(struct pnv_phb *phb,
  392. struct pnv_ioda_pe *pe, unsigned int base,
  393. unsigned int segs)
  394. {
  395. struct page *tce_mem = NULL;
  396. const __be64 *swinvp;
  397. struct iommu_table *tbl;
  398. unsigned int i;
  399. int64_t rc;
  400. void *addr;
  401. /* 256M DMA window, 4K TCE pages, 8 bytes TCE */
  402. #define TCE32_TABLE_SIZE ((0x10000000 / 0x1000) * 8)
  403. /* XXX FIXME: Handle 64-bit only DMA devices */
  404. /* XXX FIXME: Provide 64-bit DMA facilities & non-4K TCE tables etc.. */
  405. /* XXX FIXME: Allocate multi-level tables on PHB3 */
  406. /* We shouldn't already have a 32-bit DMA associated */
  407. if (WARN_ON(pe->tce32_seg >= 0))
  408. return;
  409. /* Grab a 32-bit TCE table */
  410. pe->tce32_seg = base;
  411. pe_info(pe, " Setting up 32-bit TCE table at %08x..%08x\n",
  412. (base << 28), ((base + segs) << 28) - 1);
  413. /* XXX Currently, we allocate one big contiguous table for the
  414. * TCEs. We only really need one chunk per 256M of TCE space
  415. * (ie per segment) but that's an optimization for later, it
  416. * requires some added smarts with our get/put_tce implementation
  417. */
  418. tce_mem = alloc_pages_node(phb->hose->node, GFP_KERNEL,
  419. get_order(TCE32_TABLE_SIZE * segs));
  420. if (!tce_mem) {
  421. pe_err(pe, " Failed to allocate a 32-bit TCE memory\n");
  422. goto fail;
  423. }
  424. addr = page_address(tce_mem);
  425. memset(addr, 0, TCE32_TABLE_SIZE * segs);
  426. /* Configure HW */
  427. for (i = 0; i < segs; i++) {
  428. rc = opal_pci_map_pe_dma_window(phb->opal_id,
  429. pe->pe_number,
  430. base + i, 1,
  431. __pa(addr) + TCE32_TABLE_SIZE * i,
  432. TCE32_TABLE_SIZE, 0x1000);
  433. if (rc) {
  434. pe_err(pe, " Failed to configure 32-bit TCE table,"
  435. " err %ld\n", rc);
  436. goto fail;
  437. }
  438. }
  439. /* Setup linux iommu table */
  440. tbl = &pe->tce32_table;
  441. pnv_pci_setup_iommu_table(tbl, addr, TCE32_TABLE_SIZE * segs,
  442. base << 28);
  443. /* OPAL variant of P7IOC SW invalidated TCEs */
  444. swinvp = of_get_property(phb->hose->dn, "ibm,opal-tce-kill", NULL);
  445. if (swinvp) {
  446. /* We need a couple more fields -- an address and a data
  447. * to or. Since the bus is only printed out on table free
  448. * errors, and on the first pass the data will be a relative
  449. * bus number, print that out instead.
  450. */
  451. tbl->it_busno = 0;
  452. tbl->it_index = (unsigned long)ioremap(be64_to_cpup(swinvp), 8);
  453. tbl->it_type = TCE_PCI_SWINV_CREATE | TCE_PCI_SWINV_FREE
  454. | TCE_PCI_SWINV_PAIR;
  455. }
  456. iommu_init_table(tbl, phb->hose->node);
  457. if (pe->pdev)
  458. set_iommu_table_base(&pe->pdev->dev, tbl);
  459. else
  460. pnv_ioda_setup_bus_dma(pe, pe->pbus);
  461. return;
  462. fail:
  463. /* XXX Failure: Try to fallback to 64-bit only ? */
  464. if (pe->tce32_seg >= 0)
  465. pe->tce32_seg = -1;
  466. if (tce_mem)
  467. __free_pages(tce_mem, get_order(TCE32_TABLE_SIZE * segs));
  468. }
  469. static void pnv_ioda_setup_dma(struct pnv_phb *phb)
  470. {
  471. struct pci_controller *hose = phb->hose;
  472. unsigned int residual, remaining, segs, tw, base;
  473. struct pnv_ioda_pe *pe;
  474. /* If we have more PE# than segments available, hand out one
  475. * per PE until we run out and let the rest fail. If not,
  476. * then we assign at least one segment per PE, plus more based
  477. * on the amount of devices under that PE
  478. */
  479. if (phb->ioda.dma_pe_count > phb->ioda.tce32_count)
  480. residual = 0;
  481. else
  482. residual = phb->ioda.tce32_count -
  483. phb->ioda.dma_pe_count;
  484. pr_info("PCI: Domain %04x has %ld available 32-bit DMA segments\n",
  485. hose->global_number, phb->ioda.tce32_count);
  486. pr_info("PCI: %d PE# for a total weight of %d\n",
  487. phb->ioda.dma_pe_count, phb->ioda.dma_weight);
  488. /* Walk our PE list and configure their DMA segments, hand them
  489. * out one base segment plus any residual segments based on
  490. * weight
  491. */
  492. remaining = phb->ioda.tce32_count;
  493. tw = phb->ioda.dma_weight;
  494. base = 0;
  495. list_for_each_entry(pe, &phb->ioda.pe_dma_list, dma_link) {
  496. if (!pe->dma_weight)
  497. continue;
  498. if (!remaining) {
  499. pe_warn(pe, "No DMA32 resources available\n");
  500. continue;
  501. }
  502. segs = 1;
  503. if (residual) {
  504. segs += ((pe->dma_weight * residual) + (tw / 2)) / tw;
  505. if (segs > remaining)
  506. segs = remaining;
  507. }
  508. pe_info(pe, "DMA weight %d, assigned %d DMA32 segments\n",
  509. pe->dma_weight, segs);
  510. pnv_pci_ioda_setup_dma_pe(phb, pe, base, segs);
  511. remaining -= segs;
  512. base += segs;
  513. }
  514. }
  515. #ifdef CONFIG_PCI_MSI
  516. static int pnv_pci_ioda_msi_setup(struct pnv_phb *phb, struct pci_dev *dev,
  517. unsigned int hwirq, unsigned int is_64,
  518. struct msi_msg *msg)
  519. {
  520. struct pnv_ioda_pe *pe = pnv_ioda_get_pe(dev);
  521. unsigned int xive_num = hwirq - phb->msi_base;
  522. uint64_t addr64;
  523. uint32_t addr32, data;
  524. int rc;
  525. /* No PE assigned ? bail out ... no MSI for you ! */
  526. if (pe == NULL)
  527. return -ENXIO;
  528. /* Check if we have an MVE */
  529. if (pe->mve_number < 0)
  530. return -ENXIO;
  531. /* Assign XIVE to PE */
  532. rc = opal_pci_set_xive_pe(phb->opal_id, pe->pe_number, xive_num);
  533. if (rc) {
  534. pr_warn("%s: OPAL error %d setting XIVE %d PE\n",
  535. pci_name(dev), rc, xive_num);
  536. return -EIO;
  537. }
  538. if (is_64) {
  539. rc = opal_get_msi_64(phb->opal_id, pe->mve_number, xive_num, 1,
  540. &addr64, &data);
  541. if (rc) {
  542. pr_warn("%s: OPAL error %d getting 64-bit MSI data\n",
  543. pci_name(dev), rc);
  544. return -EIO;
  545. }
  546. msg->address_hi = addr64 >> 32;
  547. msg->address_lo = addr64 & 0xfffffffful;
  548. } else {
  549. rc = opal_get_msi_32(phb->opal_id, pe->mve_number, xive_num, 1,
  550. &addr32, &data);
  551. if (rc) {
  552. pr_warn("%s: OPAL error %d getting 32-bit MSI data\n",
  553. pci_name(dev), rc);
  554. return -EIO;
  555. }
  556. msg->address_hi = 0;
  557. msg->address_lo = addr32;
  558. }
  559. msg->data = data;
  560. pr_devel("%s: %s-bit MSI on hwirq %x (xive #%d),"
  561. " address=%x_%08x data=%x PE# %d\n",
  562. pci_name(dev), is_64 ? "64" : "32", hwirq, xive_num,
  563. msg->address_hi, msg->address_lo, data, pe->pe_number);
  564. return 0;
  565. }
  566. static void pnv_pci_init_ioda_msis(struct pnv_phb *phb)
  567. {
  568. unsigned int count;
  569. const __be32 *prop = of_get_property(phb->hose->dn,
  570. "ibm,opal-msi-ranges", NULL);
  571. if (!prop) {
  572. /* BML Fallback */
  573. prop = of_get_property(phb->hose->dn, "msi-ranges", NULL);
  574. }
  575. if (!prop)
  576. return;
  577. phb->msi_base = be32_to_cpup(prop);
  578. count = be32_to_cpup(prop + 1);
  579. if (msi_bitmap_alloc(&phb->msi_bmp, count, phb->hose->dn)) {
  580. pr_err("PCI %d: Failed to allocate MSI bitmap !\n",
  581. phb->hose->global_number);
  582. return;
  583. }
  584. phb->msi_setup = pnv_pci_ioda_msi_setup;
  585. phb->msi32_support = 1;
  586. pr_info(" Allocated bitmap for %d MSIs (base IRQ 0x%x)\n",
  587. count, phb->msi_base);
  588. }
  589. #else
  590. static void pnv_pci_init_ioda_msis(struct pnv_phb *phb) { }
  591. #endif /* CONFIG_PCI_MSI */
  592. /*
  593. * This function is supposed to be called on basis of PE from top
  594. * to bottom style. So the the I/O or MMIO segment assigned to
  595. * parent PE could be overrided by its child PEs if necessary.
  596. */
  597. static void pnv_ioda_setup_pe_seg(struct pci_controller *hose,
  598. struct pnv_ioda_pe *pe)
  599. {
  600. struct pnv_phb *phb = hose->private_data;
  601. struct pci_bus_region region;
  602. struct resource *res;
  603. int i, index;
  604. int rc;
  605. /*
  606. * NOTE: We only care PCI bus based PE for now. For PCI
  607. * device based PE, for example SRIOV sensitive VF should
  608. * be figured out later.
  609. */
  610. BUG_ON(!(pe->flags & (PNV_IODA_PE_BUS | PNV_IODA_PE_BUS_ALL)));
  611. pci_bus_for_each_resource(pe->pbus, res, i) {
  612. if (!res || !res->flags ||
  613. res->start > res->end)
  614. continue;
  615. if (res->flags & IORESOURCE_IO) {
  616. region.start = res->start - phb->ioda.io_pci_base;
  617. region.end = res->end - phb->ioda.io_pci_base;
  618. index = region.start / phb->ioda.io_segsize;
  619. while (index < phb->ioda.total_pe &&
  620. region.start <= region.end) {
  621. phb->ioda.io_segmap[index] = pe->pe_number;
  622. rc = opal_pci_map_pe_mmio_window(phb->opal_id,
  623. pe->pe_number, OPAL_IO_WINDOW_TYPE, 0, index);
  624. if (rc != OPAL_SUCCESS) {
  625. pr_err("%s: OPAL error %d when mapping IO "
  626. "segment #%d to PE#%d\n",
  627. __func__, rc, index, pe->pe_number);
  628. break;
  629. }
  630. region.start += phb->ioda.io_segsize;
  631. index++;
  632. }
  633. } else if (res->flags & IORESOURCE_MEM) {
  634. region.start = res->start -
  635. hose->pci_mem_offset -
  636. phb->ioda.m32_pci_base;
  637. region.end = res->end -
  638. hose->pci_mem_offset -
  639. phb->ioda.m32_pci_base;
  640. index = region.start / phb->ioda.m32_segsize;
  641. while (index < phb->ioda.total_pe &&
  642. region.start <= region.end) {
  643. phb->ioda.m32_segmap[index] = pe->pe_number;
  644. rc = opal_pci_map_pe_mmio_window(phb->opal_id,
  645. pe->pe_number, OPAL_M32_WINDOW_TYPE, 0, index);
  646. if (rc != OPAL_SUCCESS) {
  647. pr_err("%s: OPAL error %d when mapping M32 "
  648. "segment#%d to PE#%d",
  649. __func__, rc, index, pe->pe_number);
  650. break;
  651. }
  652. region.start += phb->ioda.m32_segsize;
  653. index++;
  654. }
  655. }
  656. }
  657. }
  658. static void pnv_pci_ioda_setup_seg(void)
  659. {
  660. struct pci_controller *tmp, *hose;
  661. struct pnv_phb *phb;
  662. struct pnv_ioda_pe *pe;
  663. list_for_each_entry_safe(hose, tmp, &hose_list, list_node) {
  664. phb = hose->private_data;
  665. list_for_each_entry(pe, &phb->ioda.pe_list, list) {
  666. pnv_ioda_setup_pe_seg(hose, pe);
  667. }
  668. }
  669. }
  670. static void pnv_pci_ioda_setup_DMA(void)
  671. {
  672. struct pci_controller *hose, *tmp;
  673. struct pnv_phb *phb;
  674. list_for_each_entry_safe(hose, tmp, &hose_list, list_node) {
  675. pnv_ioda_setup_dma(hose->private_data);
  676. /* Mark the PHB initialization done */
  677. phb = hose->private_data;
  678. phb->initialized = 1;
  679. }
  680. }
  681. static void pnv_pci_ioda_fixup(void)
  682. {
  683. pnv_pci_ioda_setup_PEs();
  684. pnv_pci_ioda_setup_seg();
  685. pnv_pci_ioda_setup_DMA();
  686. }
  687. /*
  688. * Returns the alignment for I/O or memory windows for P2P
  689. * bridges. That actually depends on how PEs are segmented.
  690. * For now, we return I/O or M32 segment size for PE sensitive
  691. * P2P bridges. Otherwise, the default values (4KiB for I/O,
  692. * 1MiB for memory) will be returned.
  693. *
  694. * The current PCI bus might be put into one PE, which was
  695. * create against the parent PCI bridge. For that case, we
  696. * needn't enlarge the alignment so that we can save some
  697. * resources.
  698. */
  699. static resource_size_t pnv_pci_window_alignment(struct pci_bus *bus,
  700. unsigned long type)
  701. {
  702. struct pci_dev *bridge;
  703. struct pci_controller *hose = pci_bus_to_host(bus);
  704. struct pnv_phb *phb = hose->private_data;
  705. int num_pci_bridges = 0;
  706. bridge = bus->self;
  707. while (bridge) {
  708. if (pci_pcie_type(bridge) == PCI_EXP_TYPE_PCI_BRIDGE) {
  709. num_pci_bridges++;
  710. if (num_pci_bridges >= 2)
  711. return 1;
  712. }
  713. bridge = bridge->bus->self;
  714. }
  715. /* We need support prefetchable memory window later */
  716. if (type & IORESOURCE_MEM)
  717. return phb->ioda.m32_segsize;
  718. return phb->ioda.io_segsize;
  719. }
  720. /* Prevent enabling devices for which we couldn't properly
  721. * assign a PE
  722. */
  723. static int pnv_pci_enable_device_hook(struct pci_dev *dev)
  724. {
  725. struct pci_controller *hose = pci_bus_to_host(dev->bus);
  726. struct pnv_phb *phb = hose->private_data;
  727. struct pci_dn *pdn;
  728. /* The function is probably called while the PEs have
  729. * not be created yet. For example, resource reassignment
  730. * during PCI probe period. We just skip the check if
  731. * PEs isn't ready.
  732. */
  733. if (!phb->initialized)
  734. return 0;
  735. pdn = pnv_ioda_get_pdn(dev);
  736. if (!pdn || pdn->pe_number == IODA_INVALID_PE)
  737. return -EINVAL;
  738. return 0;
  739. }
  740. static u32 pnv_ioda_bdfn_to_pe(struct pnv_phb *phb, struct pci_bus *bus,
  741. u32 devfn)
  742. {
  743. return phb->ioda.pe_rmap[(bus->number << 8) | devfn];
  744. }
  745. void __init pnv_pci_init_ioda1_phb(struct device_node *np)
  746. {
  747. struct pci_controller *hose;
  748. static int primary = 1;
  749. struct pnv_phb *phb;
  750. unsigned long size, m32map_off, iomap_off, pemap_off;
  751. const u64 *prop64;
  752. u64 phb_id;
  753. void *aux;
  754. long rc;
  755. pr_info(" Initializing IODA OPAL PHB %s\n", np->full_name);
  756. prop64 = of_get_property(np, "ibm,opal-phbid", NULL);
  757. if (!prop64) {
  758. pr_err(" Missing \"ibm,opal-phbid\" property !\n");
  759. return;
  760. }
  761. phb_id = be64_to_cpup(prop64);
  762. pr_debug(" PHB-ID : 0x%016llx\n", phb_id);
  763. phb = alloc_bootmem(sizeof(struct pnv_phb));
  764. if (phb) {
  765. memset(phb, 0, sizeof(struct pnv_phb));
  766. phb->hose = hose = pcibios_alloc_controller(np);
  767. }
  768. if (!phb || !phb->hose) {
  769. pr_err("PCI: Failed to allocate PCI controller for %s\n",
  770. np->full_name);
  771. return;
  772. }
  773. spin_lock_init(&phb->lock);
  774. /* XXX Use device-tree */
  775. hose->first_busno = 0;
  776. hose->last_busno = 0xff;
  777. hose->private_data = phb;
  778. phb->opal_id = phb_id;
  779. phb->type = PNV_PHB_IODA1;
  780. /* Detect specific models for error handling */
  781. if (of_device_is_compatible(np, "ibm,p7ioc-pciex"))
  782. phb->model = PNV_PHB_MODEL_P7IOC;
  783. else
  784. phb->model = PNV_PHB_MODEL_UNKNOWN;
  785. /* We parse "ranges" now since we need to deduce the register base
  786. * from the IO base
  787. */
  788. pci_process_bridge_OF_ranges(phb->hose, np, primary);
  789. primary = 0;
  790. /* Magic formula from Milton */
  791. phb->regs = of_iomap(np, 0);
  792. if (phb->regs == NULL)
  793. pr_err(" Failed to map registers !\n");
  794. /* XXX This is hack-a-thon. This needs to be changed so that:
  795. * - we obtain stuff like PE# etc... from device-tree
  796. * - we properly re-allocate M32 ourselves
  797. * (the OFW one isn't very good)
  798. */
  799. /* Initialize more IODA stuff */
  800. phb->ioda.total_pe = 128;
  801. phb->ioda.m32_size = resource_size(&hose->mem_resources[0]);
  802. /* OFW Has already off top 64k of M32 space (MSI space) */
  803. phb->ioda.m32_size += 0x10000;
  804. phb->ioda.m32_segsize = phb->ioda.m32_size / phb->ioda.total_pe;
  805. phb->ioda.m32_pci_base = hose->mem_resources[0].start -
  806. hose->pci_mem_offset;
  807. phb->ioda.io_size = hose->pci_io_size;
  808. phb->ioda.io_segsize = phb->ioda.io_size / phb->ioda.total_pe;
  809. phb->ioda.io_pci_base = 0; /* XXX calculate this ? */
  810. /* Allocate aux data & arrays */
  811. size = _ALIGN_UP(phb->ioda.total_pe / 8, sizeof(unsigned long));
  812. m32map_off = size;
  813. size += phb->ioda.total_pe * sizeof(phb->ioda.m32_segmap[0]);
  814. iomap_off = size;
  815. size += phb->ioda.total_pe * sizeof(phb->ioda.io_segmap[0]);
  816. pemap_off = size;
  817. size += phb->ioda.total_pe * sizeof(struct pnv_ioda_pe);
  818. aux = alloc_bootmem(size);
  819. memset(aux, 0, size);
  820. phb->ioda.pe_alloc = aux;
  821. phb->ioda.m32_segmap = aux + m32map_off;
  822. phb->ioda.io_segmap = aux + iomap_off;
  823. phb->ioda.pe_array = aux + pemap_off;
  824. set_bit(0, phb->ioda.pe_alloc);
  825. INIT_LIST_HEAD(&phb->ioda.pe_dma_list);
  826. INIT_LIST_HEAD(&phb->ioda.pe_list);
  827. /* Calculate how many 32-bit TCE segments we have */
  828. phb->ioda.tce32_count = phb->ioda.m32_pci_base >> 28;
  829. /* Clear unusable m64 */
  830. hose->mem_resources[1].flags = 0;
  831. hose->mem_resources[1].start = 0;
  832. hose->mem_resources[1].end = 0;
  833. hose->mem_resources[2].flags = 0;
  834. hose->mem_resources[2].start = 0;
  835. hose->mem_resources[2].end = 0;
  836. #if 0
  837. rc = opal_pci_set_phb_mem_window(opal->phb_id,
  838. window_type,
  839. window_num,
  840. starting_real_address,
  841. starting_pci_address,
  842. segment_size);
  843. #endif
  844. pr_info(" %d PE's M32: 0x%x [segment=0x%x] IO: 0x%x [segment=0x%x]\n",
  845. phb->ioda.total_pe,
  846. phb->ioda.m32_size, phb->ioda.m32_segsize,
  847. phb->ioda.io_size, phb->ioda.io_segsize);
  848. if (phb->regs) {
  849. pr_devel(" BUID = 0x%016llx\n", in_be64(phb->regs + 0x100));
  850. pr_devel(" PHB2_CR = 0x%016llx\n", in_be64(phb->regs + 0x160));
  851. pr_devel(" IO_BAR = 0x%016llx\n", in_be64(phb->regs + 0x170));
  852. pr_devel(" IO_BAMR = 0x%016llx\n", in_be64(phb->regs + 0x178));
  853. pr_devel(" IO_SAR = 0x%016llx\n", in_be64(phb->regs + 0x180));
  854. pr_devel(" M32_BAR = 0x%016llx\n", in_be64(phb->regs + 0x190));
  855. pr_devel(" M32_BAMR = 0x%016llx\n", in_be64(phb->regs + 0x198));
  856. pr_devel(" M32_SAR = 0x%016llx\n", in_be64(phb->regs + 0x1a0));
  857. }
  858. phb->hose->ops = &pnv_pci_ops;
  859. /* Setup RID -> PE mapping function */
  860. phb->bdfn_to_pe = pnv_ioda_bdfn_to_pe;
  861. /* Setup TCEs */
  862. phb->dma_dev_setup = pnv_pci_ioda_dma_dev_setup;
  863. /* Setup MSI support */
  864. pnv_pci_init_ioda_msis(phb);
  865. /*
  866. * We pass the PCI probe flag PCI_REASSIGN_ALL_RSRC here
  867. * to let the PCI core do resource assignment. It's supposed
  868. * that the PCI core will do correct I/O and MMIO alignment
  869. * for the P2P bridge bars so that each PCI bus (excluding
  870. * the child P2P bridges) can form individual PE.
  871. */
  872. ppc_md.pcibios_fixup = pnv_pci_ioda_fixup;
  873. ppc_md.pcibios_enable_device_hook = pnv_pci_enable_device_hook;
  874. ppc_md.pcibios_window_alignment = pnv_pci_window_alignment;
  875. pci_add_flags(PCI_REASSIGN_ALL_RSRC);
  876. /* Reset IODA tables to a clean state */
  877. rc = opal_pci_reset(phb_id, OPAL_PCI_IODA_TABLE_RESET, OPAL_ASSERT_RESET);
  878. if (rc)
  879. pr_warning(" OPAL Error %ld performing IODA table reset !\n", rc);
  880. opal_pci_set_pe(phb_id, 0, 0, 7, 1, 1 , OPAL_MAP_PE);
  881. }
  882. void __init pnv_pci_init_ioda_hub(struct device_node *np)
  883. {
  884. struct device_node *phbn;
  885. const u64 *prop64;
  886. u64 hub_id;
  887. pr_info("Probing IODA IO-Hub %s\n", np->full_name);
  888. prop64 = of_get_property(np, "ibm,opal-hubid", NULL);
  889. if (!prop64) {
  890. pr_err(" Missing \"ibm,opal-hubid\" property !\n");
  891. return;
  892. }
  893. hub_id = be64_to_cpup(prop64);
  894. pr_devel(" HUB-ID : 0x%016llx\n", hub_id);
  895. /* Count child PHBs */
  896. for_each_child_of_node(np, phbn) {
  897. /* Look for IODA1 PHBs */
  898. if (of_device_is_compatible(phbn, "ibm,ioda-phb"))
  899. pnv_pci_init_ioda1_phb(phbn);
  900. }
  901. }