pci-dma.c 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586
  1. /*
  2. * Copyright 2010 Tilera Corporation. All Rights Reserved.
  3. *
  4. * This program is free software; you can redistribute it and/or
  5. * modify it under the terms of the GNU General Public License
  6. * as published by the Free Software Foundation, version 2.
  7. *
  8. * This program is distributed in the hope that it will be useful, but
  9. * WITHOUT ANY WARRANTY; without even the implied warranty of
  10. * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
  11. * NON INFRINGEMENT. See the GNU General Public License for
  12. * more details.
  13. */
  14. #include <linux/mm.h>
  15. #include <linux/dma-mapping.h>
  16. #include <linux/swiotlb.h>
  17. #include <linux/vmalloc.h>
  18. #include <linux/export.h>
  19. #include <asm/tlbflush.h>
  20. #include <asm/homecache.h>
  21. /* Generic DMA mapping functions: */
  22. /*
  23. * Allocate what Linux calls "coherent" memory. On TILEPro this is
  24. * uncached memory; on TILE-Gx it is hash-for-home memory.
  25. */
  26. #ifdef __tilepro__
  27. #define PAGE_HOME_DMA PAGE_HOME_UNCACHED
  28. #else
  29. #define PAGE_HOME_DMA PAGE_HOME_HASH
  30. #endif
  31. static void *tile_dma_alloc_coherent(struct device *dev, size_t size,
  32. dma_addr_t *dma_handle, gfp_t gfp,
  33. struct dma_attrs *attrs)
  34. {
  35. u64 dma_mask = dev->coherent_dma_mask ?: DMA_BIT_MASK(32);
  36. int node = dev_to_node(dev);
  37. int order = get_order(size);
  38. struct page *pg;
  39. dma_addr_t addr;
  40. gfp |= __GFP_ZERO;
  41. /*
  42. * If the mask specifies that the memory be in the first 4 GB, then
  43. * we force the allocation to come from the DMA zone. We also
  44. * force the node to 0 since that's the only node where the DMA
  45. * zone isn't empty. If the mask size is smaller than 32 bits, we
  46. * may still not be able to guarantee a suitable memory address, in
  47. * which case we will return NULL. But such devices are uncommon.
  48. */
  49. if (dma_mask <= DMA_BIT_MASK(32)) {
  50. gfp |= GFP_DMA;
  51. node = 0;
  52. }
  53. pg = homecache_alloc_pages_node(node, gfp, order, PAGE_HOME_DMA);
  54. if (pg == NULL)
  55. return NULL;
  56. addr = page_to_phys(pg);
  57. if (addr + size > dma_mask) {
  58. __homecache_free_pages(pg, order);
  59. return NULL;
  60. }
  61. *dma_handle = addr;
  62. return page_address(pg);
  63. }
  64. /*
  65. * Free memory that was allocated with tile_dma_alloc_coherent.
  66. */
  67. static void tile_dma_free_coherent(struct device *dev, size_t size,
  68. void *vaddr, dma_addr_t dma_handle,
  69. struct dma_attrs *attrs)
  70. {
  71. homecache_free_pages((unsigned long)vaddr, get_order(size));
  72. }
  73. /*
  74. * The map routines "map" the specified address range for DMA
  75. * accesses. The memory belongs to the device after this call is
  76. * issued, until it is unmapped with dma_unmap_single.
  77. *
  78. * We don't need to do any mapping, we just flush the address range
  79. * out of the cache and return a DMA address.
  80. *
  81. * The unmap routines do whatever is necessary before the processor
  82. * accesses the memory again, and must be called before the driver
  83. * touches the memory. We can get away with a cache invalidate if we
  84. * can count on nothing having been touched.
  85. */
  86. /* Set up a single page for DMA access. */
  87. static void __dma_prep_page(struct page *page, unsigned long offset,
  88. size_t size, enum dma_data_direction direction)
  89. {
  90. /*
  91. * Flush the page from cache if necessary.
  92. * On tilegx, data is delivered to hash-for-home L3; on tilepro,
  93. * data is delivered direct to memory.
  94. *
  95. * NOTE: If we were just doing DMA_TO_DEVICE we could optimize
  96. * this to be a "flush" not a "finv" and keep some of the
  97. * state in cache across the DMA operation, but it doesn't seem
  98. * worth creating the necessary flush_buffer_xxx() infrastructure.
  99. */
  100. int home = page_home(page);
  101. switch (home) {
  102. case PAGE_HOME_HASH:
  103. #ifdef __tilegx__
  104. return;
  105. #endif
  106. break;
  107. case PAGE_HOME_UNCACHED:
  108. #ifdef __tilepro__
  109. return;
  110. #endif
  111. break;
  112. case PAGE_HOME_IMMUTABLE:
  113. /* Should be going to the device only. */
  114. BUG_ON(direction == DMA_FROM_DEVICE ||
  115. direction == DMA_BIDIRECTIONAL);
  116. return;
  117. case PAGE_HOME_INCOHERENT:
  118. /* Incoherent anyway, so no need to work hard here. */
  119. return;
  120. default:
  121. BUG_ON(home < 0 || home >= NR_CPUS);
  122. break;
  123. }
  124. homecache_finv_page(page);
  125. #ifdef DEBUG_ALIGNMENT
  126. /* Warn if the region isn't cacheline aligned. */
  127. if (offset & (L2_CACHE_BYTES - 1) || (size & (L2_CACHE_BYTES - 1)))
  128. pr_warn("Unaligned DMA to non-hfh memory: PA %#llx/%#lx\n",
  129. PFN_PHYS(page_to_pfn(page)) + offset, size);
  130. #endif
  131. }
  132. /* Make the page ready to be read by the core. */
  133. static void __dma_complete_page(struct page *page, unsigned long offset,
  134. size_t size, enum dma_data_direction direction)
  135. {
  136. #ifdef __tilegx__
  137. switch (page_home(page)) {
  138. case PAGE_HOME_HASH:
  139. /* I/O device delivered data the way the cpu wanted it. */
  140. break;
  141. case PAGE_HOME_INCOHERENT:
  142. /* Incoherent anyway, so no need to work hard here. */
  143. break;
  144. case PAGE_HOME_IMMUTABLE:
  145. /* Extra read-only copies are not a problem. */
  146. break;
  147. default:
  148. /* Flush the bogus hash-for-home I/O entries to memory. */
  149. homecache_finv_map_page(page, PAGE_HOME_HASH);
  150. break;
  151. }
  152. #endif
  153. }
  154. static void __dma_prep_pa_range(dma_addr_t dma_addr, size_t size,
  155. enum dma_data_direction direction)
  156. {
  157. struct page *page = pfn_to_page(PFN_DOWN(dma_addr));
  158. unsigned long offset = dma_addr & (PAGE_SIZE - 1);
  159. size_t bytes = min(size, (size_t)(PAGE_SIZE - offset));
  160. while (size != 0) {
  161. __dma_prep_page(page, offset, bytes, direction);
  162. size -= bytes;
  163. ++page;
  164. offset = 0;
  165. bytes = min((size_t)PAGE_SIZE, size);
  166. }
  167. }
  168. static void __dma_complete_pa_range(dma_addr_t dma_addr, size_t size,
  169. enum dma_data_direction direction)
  170. {
  171. struct page *page = pfn_to_page(PFN_DOWN(dma_addr));
  172. unsigned long offset = dma_addr & (PAGE_SIZE - 1);
  173. size_t bytes = min(size, (size_t)(PAGE_SIZE - offset));
  174. while (size != 0) {
  175. __dma_complete_page(page, offset, bytes, direction);
  176. size -= bytes;
  177. ++page;
  178. offset = 0;
  179. bytes = min((size_t)PAGE_SIZE, size);
  180. }
  181. }
  182. static int tile_dma_map_sg(struct device *dev, struct scatterlist *sglist,
  183. int nents, enum dma_data_direction direction,
  184. struct dma_attrs *attrs)
  185. {
  186. struct scatterlist *sg;
  187. int i;
  188. BUG_ON(!valid_dma_direction(direction));
  189. WARN_ON(nents == 0 || sglist->length == 0);
  190. for_each_sg(sglist, sg, nents, i) {
  191. sg->dma_address = sg_phys(sg);
  192. __dma_prep_pa_range(sg->dma_address, sg->length, direction);
  193. #ifdef CONFIG_NEED_SG_DMA_LENGTH
  194. sg->dma_length = sg->length;
  195. #endif
  196. }
  197. return nents;
  198. }
  199. static void tile_dma_unmap_sg(struct device *dev, struct scatterlist *sglist,
  200. int nents, enum dma_data_direction direction,
  201. struct dma_attrs *attrs)
  202. {
  203. struct scatterlist *sg;
  204. int i;
  205. BUG_ON(!valid_dma_direction(direction));
  206. for_each_sg(sglist, sg, nents, i) {
  207. sg->dma_address = sg_phys(sg);
  208. __dma_complete_pa_range(sg->dma_address, sg->length,
  209. direction);
  210. }
  211. }
  212. static dma_addr_t tile_dma_map_page(struct device *dev, struct page *page,
  213. unsigned long offset, size_t size,
  214. enum dma_data_direction direction,
  215. struct dma_attrs *attrs)
  216. {
  217. BUG_ON(!valid_dma_direction(direction));
  218. BUG_ON(offset + size > PAGE_SIZE);
  219. __dma_prep_page(page, offset, size, direction);
  220. return page_to_pa(page) + offset;
  221. }
  222. static void tile_dma_unmap_page(struct device *dev, dma_addr_t dma_address,
  223. size_t size, enum dma_data_direction direction,
  224. struct dma_attrs *attrs)
  225. {
  226. BUG_ON(!valid_dma_direction(direction));
  227. __dma_complete_page(pfn_to_page(PFN_DOWN(dma_address)),
  228. dma_address & PAGE_OFFSET, size, direction);
  229. }
  230. static void tile_dma_sync_single_for_cpu(struct device *dev,
  231. dma_addr_t dma_handle,
  232. size_t size,
  233. enum dma_data_direction direction)
  234. {
  235. BUG_ON(!valid_dma_direction(direction));
  236. __dma_complete_pa_range(dma_handle, size, direction);
  237. }
  238. static void tile_dma_sync_single_for_device(struct device *dev,
  239. dma_addr_t dma_handle, size_t size,
  240. enum dma_data_direction direction)
  241. {
  242. __dma_prep_pa_range(dma_handle, size, direction);
  243. }
  244. static void tile_dma_sync_sg_for_cpu(struct device *dev,
  245. struct scatterlist *sglist, int nelems,
  246. enum dma_data_direction direction)
  247. {
  248. struct scatterlist *sg;
  249. int i;
  250. BUG_ON(!valid_dma_direction(direction));
  251. WARN_ON(nelems == 0 || sglist->length == 0);
  252. for_each_sg(sglist, sg, nelems, i) {
  253. dma_sync_single_for_cpu(dev, sg->dma_address,
  254. sg_dma_len(sg), direction);
  255. }
  256. }
  257. static void tile_dma_sync_sg_for_device(struct device *dev,
  258. struct scatterlist *sglist, int nelems,
  259. enum dma_data_direction direction)
  260. {
  261. struct scatterlist *sg;
  262. int i;
  263. BUG_ON(!valid_dma_direction(direction));
  264. WARN_ON(nelems == 0 || sglist->length == 0);
  265. for_each_sg(sglist, sg, nelems, i) {
  266. dma_sync_single_for_device(dev, sg->dma_address,
  267. sg_dma_len(sg), direction);
  268. }
  269. }
  270. static inline int
  271. tile_dma_mapping_error(struct device *dev, dma_addr_t dma_addr)
  272. {
  273. return 0;
  274. }
  275. static inline int
  276. tile_dma_supported(struct device *dev, u64 mask)
  277. {
  278. return 1;
  279. }
  280. static struct dma_map_ops tile_default_dma_map_ops = {
  281. .alloc = tile_dma_alloc_coherent,
  282. .free = tile_dma_free_coherent,
  283. .map_page = tile_dma_map_page,
  284. .unmap_page = tile_dma_unmap_page,
  285. .map_sg = tile_dma_map_sg,
  286. .unmap_sg = tile_dma_unmap_sg,
  287. .sync_single_for_cpu = tile_dma_sync_single_for_cpu,
  288. .sync_single_for_device = tile_dma_sync_single_for_device,
  289. .sync_sg_for_cpu = tile_dma_sync_sg_for_cpu,
  290. .sync_sg_for_device = tile_dma_sync_sg_for_device,
  291. .mapping_error = tile_dma_mapping_error,
  292. .dma_supported = tile_dma_supported
  293. };
  294. struct dma_map_ops *tile_dma_map_ops = &tile_default_dma_map_ops;
  295. EXPORT_SYMBOL(tile_dma_map_ops);
  296. /* Generic PCI DMA mapping functions */
  297. static void *tile_pci_dma_alloc_coherent(struct device *dev, size_t size,
  298. dma_addr_t *dma_handle, gfp_t gfp,
  299. struct dma_attrs *attrs)
  300. {
  301. int node = dev_to_node(dev);
  302. int order = get_order(size);
  303. struct page *pg;
  304. dma_addr_t addr;
  305. gfp |= __GFP_ZERO;
  306. pg = homecache_alloc_pages_node(node, gfp, order, PAGE_HOME_DMA);
  307. if (pg == NULL)
  308. return NULL;
  309. addr = page_to_phys(pg);
  310. *dma_handle = phys_to_dma(dev, addr);
  311. return page_address(pg);
  312. }
  313. /*
  314. * Free memory that was allocated with tile_pci_dma_alloc_coherent.
  315. */
  316. static void tile_pci_dma_free_coherent(struct device *dev, size_t size,
  317. void *vaddr, dma_addr_t dma_handle,
  318. struct dma_attrs *attrs)
  319. {
  320. homecache_free_pages((unsigned long)vaddr, get_order(size));
  321. }
  322. static int tile_pci_dma_map_sg(struct device *dev, struct scatterlist *sglist,
  323. int nents, enum dma_data_direction direction,
  324. struct dma_attrs *attrs)
  325. {
  326. struct scatterlist *sg;
  327. int i;
  328. BUG_ON(!valid_dma_direction(direction));
  329. WARN_ON(nents == 0 || sglist->length == 0);
  330. for_each_sg(sglist, sg, nents, i) {
  331. sg->dma_address = sg_phys(sg);
  332. __dma_prep_pa_range(sg->dma_address, sg->length, direction);
  333. sg->dma_address = phys_to_dma(dev, sg->dma_address);
  334. #ifdef CONFIG_NEED_SG_DMA_LENGTH
  335. sg->dma_length = sg->length;
  336. #endif
  337. }
  338. return nents;
  339. }
  340. static void tile_pci_dma_unmap_sg(struct device *dev,
  341. struct scatterlist *sglist, int nents,
  342. enum dma_data_direction direction,
  343. struct dma_attrs *attrs)
  344. {
  345. struct scatterlist *sg;
  346. int i;
  347. BUG_ON(!valid_dma_direction(direction));
  348. for_each_sg(sglist, sg, nents, i) {
  349. sg->dma_address = sg_phys(sg);
  350. __dma_complete_pa_range(sg->dma_address, sg->length,
  351. direction);
  352. }
  353. }
  354. static dma_addr_t tile_pci_dma_map_page(struct device *dev, struct page *page,
  355. unsigned long offset, size_t size,
  356. enum dma_data_direction direction,
  357. struct dma_attrs *attrs)
  358. {
  359. BUG_ON(!valid_dma_direction(direction));
  360. BUG_ON(offset + size > PAGE_SIZE);
  361. __dma_prep_page(page, offset, size, direction);
  362. return phys_to_dma(dev, page_to_pa(page) + offset);
  363. }
  364. static void tile_pci_dma_unmap_page(struct device *dev, dma_addr_t dma_address,
  365. size_t size,
  366. enum dma_data_direction direction,
  367. struct dma_attrs *attrs)
  368. {
  369. BUG_ON(!valid_dma_direction(direction));
  370. dma_address = dma_to_phys(dev, dma_address);
  371. __dma_complete_page(pfn_to_page(PFN_DOWN(dma_address)),
  372. dma_address & PAGE_OFFSET, size, direction);
  373. }
  374. static void tile_pci_dma_sync_single_for_cpu(struct device *dev,
  375. dma_addr_t dma_handle,
  376. size_t size,
  377. enum dma_data_direction direction)
  378. {
  379. BUG_ON(!valid_dma_direction(direction));
  380. dma_handle = dma_to_phys(dev, dma_handle);
  381. __dma_complete_pa_range(dma_handle, size, direction);
  382. }
  383. static void tile_pci_dma_sync_single_for_device(struct device *dev,
  384. dma_addr_t dma_handle,
  385. size_t size,
  386. enum dma_data_direction
  387. direction)
  388. {
  389. dma_handle = dma_to_phys(dev, dma_handle);
  390. __dma_prep_pa_range(dma_handle, size, direction);
  391. }
  392. static void tile_pci_dma_sync_sg_for_cpu(struct device *dev,
  393. struct scatterlist *sglist,
  394. int nelems,
  395. enum dma_data_direction direction)
  396. {
  397. struct scatterlist *sg;
  398. int i;
  399. BUG_ON(!valid_dma_direction(direction));
  400. WARN_ON(nelems == 0 || sglist->length == 0);
  401. for_each_sg(sglist, sg, nelems, i) {
  402. dma_sync_single_for_cpu(dev, sg->dma_address,
  403. sg_dma_len(sg), direction);
  404. }
  405. }
  406. static void tile_pci_dma_sync_sg_for_device(struct device *dev,
  407. struct scatterlist *sglist,
  408. int nelems,
  409. enum dma_data_direction direction)
  410. {
  411. struct scatterlist *sg;
  412. int i;
  413. BUG_ON(!valid_dma_direction(direction));
  414. WARN_ON(nelems == 0 || sglist->length == 0);
  415. for_each_sg(sglist, sg, nelems, i) {
  416. dma_sync_single_for_device(dev, sg->dma_address,
  417. sg_dma_len(sg), direction);
  418. }
  419. }
  420. static inline int
  421. tile_pci_dma_mapping_error(struct device *dev, dma_addr_t dma_addr)
  422. {
  423. return 0;
  424. }
  425. static inline int
  426. tile_pci_dma_supported(struct device *dev, u64 mask)
  427. {
  428. return 1;
  429. }
  430. static struct dma_map_ops tile_pci_default_dma_map_ops = {
  431. .alloc = tile_pci_dma_alloc_coherent,
  432. .free = tile_pci_dma_free_coherent,
  433. .map_page = tile_pci_dma_map_page,
  434. .unmap_page = tile_pci_dma_unmap_page,
  435. .map_sg = tile_pci_dma_map_sg,
  436. .unmap_sg = tile_pci_dma_unmap_sg,
  437. .sync_single_for_cpu = tile_pci_dma_sync_single_for_cpu,
  438. .sync_single_for_device = tile_pci_dma_sync_single_for_device,
  439. .sync_sg_for_cpu = tile_pci_dma_sync_sg_for_cpu,
  440. .sync_sg_for_device = tile_pci_dma_sync_sg_for_device,
  441. .mapping_error = tile_pci_dma_mapping_error,
  442. .dma_supported = tile_pci_dma_supported
  443. };
  444. struct dma_map_ops *gx_pci_dma_map_ops = &tile_pci_default_dma_map_ops;
  445. EXPORT_SYMBOL(gx_pci_dma_map_ops);
  446. /* PCI DMA mapping functions for legacy PCI devices */
  447. #ifdef CONFIG_SWIOTLB
  448. static void *tile_swiotlb_alloc_coherent(struct device *dev, size_t size,
  449. dma_addr_t *dma_handle, gfp_t gfp,
  450. struct dma_attrs *attrs)
  451. {
  452. gfp |= GFP_DMA;
  453. return swiotlb_alloc_coherent(dev, size, dma_handle, gfp);
  454. }
  455. static void tile_swiotlb_free_coherent(struct device *dev, size_t size,
  456. void *vaddr, dma_addr_t dma_addr,
  457. struct dma_attrs *attrs)
  458. {
  459. swiotlb_free_coherent(dev, size, vaddr, dma_addr);
  460. }
  461. static struct dma_map_ops pci_swiotlb_dma_ops = {
  462. .alloc = tile_swiotlb_alloc_coherent,
  463. .free = tile_swiotlb_free_coherent,
  464. .map_page = swiotlb_map_page,
  465. .unmap_page = swiotlb_unmap_page,
  466. .map_sg = swiotlb_map_sg_attrs,
  467. .unmap_sg = swiotlb_unmap_sg_attrs,
  468. .sync_single_for_cpu = swiotlb_sync_single_for_cpu,
  469. .sync_single_for_device = swiotlb_sync_single_for_device,
  470. .sync_sg_for_cpu = swiotlb_sync_sg_for_cpu,
  471. .sync_sg_for_device = swiotlb_sync_sg_for_device,
  472. .dma_supported = swiotlb_dma_supported,
  473. .mapping_error = swiotlb_dma_mapping_error,
  474. };
  475. struct dma_map_ops *gx_legacy_pci_dma_map_ops = &pci_swiotlb_dma_ops;
  476. #else
  477. struct dma_map_ops *gx_legacy_pci_dma_map_ops;
  478. #endif
  479. EXPORT_SYMBOL(gx_legacy_pci_dma_map_ops);
  480. #ifdef CONFIG_ARCH_HAS_DMA_SET_COHERENT_MASK
  481. int dma_set_coherent_mask(struct device *dev, u64 mask)
  482. {
  483. struct dma_map_ops *dma_ops = get_dma_ops(dev);
  484. /* Handle legacy PCI devices with limited memory addressability. */
  485. if (((dma_ops == gx_pci_dma_map_ops) ||
  486. (dma_ops == gx_legacy_pci_dma_map_ops)) &&
  487. (mask <= DMA_BIT_MASK(32))) {
  488. if (mask > dev->archdata.max_direct_dma_addr)
  489. mask = dev->archdata.max_direct_dma_addr;
  490. }
  491. if (!dma_supported(dev, mask))
  492. return -EIO;
  493. dev->coherent_dma_mask = mask;
  494. return 0;
  495. }
  496. EXPORT_SYMBOL(dma_set_coherent_mask);
  497. #endif