ioat_dma.c 26 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963
  1. /*
  2. * Intel I/OAT DMA Linux driver
  3. * Copyright(c) 2004 - 2007 Intel Corporation.
  4. *
  5. * This program is free software; you can redistribute it and/or modify it
  6. * under the terms and conditions of the GNU General Public License,
  7. * version 2, as published by the Free Software Foundation.
  8. *
  9. * This program is distributed in the hope that it will be useful, but WITHOUT
  10. * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11. * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
  12. * more details.
  13. *
  14. * You should have received a copy of the GNU General Public License along with
  15. * this program; if not, write to the Free Software Foundation, Inc.,
  16. * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
  17. *
  18. * The full GNU General Public License is included in this distribution in
  19. * the file called "COPYING".
  20. *
  21. */
  22. /*
  23. * This driver supports an Intel I/OAT DMA engine, which does asynchronous
  24. * copy operations.
  25. */
  26. #include <linux/init.h>
  27. #include <linux/module.h>
  28. #include <linux/pci.h>
  29. #include <linux/interrupt.h>
  30. #include <linux/dmaengine.h>
  31. #include <linux/delay.h>
  32. #include <linux/dma-mapping.h>
  33. #include "ioatdma.h"
  34. #include "ioatdma_registers.h"
  35. #include "ioatdma_hw.h"
  36. #define INITIAL_IOAT_DESC_COUNT 128
  37. #define to_ioat_chan(chan) container_of(chan, struct ioat_dma_chan, common)
  38. #define to_ioatdma_device(dev) container_of(dev, struct ioatdma_device, common)
  39. #define to_ioat_desc(lh) container_of(lh, struct ioat_desc_sw, node)
  40. #define tx_to_ioat_desc(tx) container_of(tx, struct ioat_desc_sw, async_tx)
  41. /* internal functions */
  42. static void ioat_dma_start_null_desc(struct ioat_dma_chan *ioat_chan);
  43. static void ioat_dma_memcpy_cleanup(struct ioat_dma_chan *ioat_chan);
  44. static struct ioat_dma_chan *ioat_lookup_chan_by_index(struct ioatdma_device *device,
  45. int index)
  46. {
  47. return device->idx[index];
  48. }
  49. /**
  50. * ioat_dma_do_interrupt - handler used for single vector interrupt mode
  51. * @irq: interrupt id
  52. * @data: interrupt data
  53. */
  54. static irqreturn_t ioat_dma_do_interrupt(int irq, void *data)
  55. {
  56. struct ioatdma_device *instance = data;
  57. struct ioat_dma_chan *ioat_chan;
  58. unsigned long attnstatus;
  59. int bit;
  60. u8 intrctrl;
  61. intrctrl = readb(instance->reg_base + IOAT_INTRCTRL_OFFSET);
  62. if (!(intrctrl & IOAT_INTRCTRL_MASTER_INT_EN))
  63. return IRQ_NONE;
  64. if (!(intrctrl & IOAT_INTRCTRL_INT_STATUS)) {
  65. writeb(intrctrl, instance->reg_base + IOAT_INTRCTRL_OFFSET);
  66. return IRQ_NONE;
  67. }
  68. attnstatus = readl(instance->reg_base + IOAT_ATTNSTATUS_OFFSET);
  69. for_each_bit(bit, &attnstatus, BITS_PER_LONG) {
  70. ioat_chan = ioat_lookup_chan_by_index(instance, bit);
  71. tasklet_schedule(&ioat_chan->cleanup_task);
  72. }
  73. writeb(intrctrl, instance->reg_base + IOAT_INTRCTRL_OFFSET);
  74. return IRQ_HANDLED;
  75. }
  76. /**
  77. * ioat_dma_do_interrupt_msix - handler used for vector-per-channel interrupt mode
  78. * @irq: interrupt id
  79. * @data: interrupt data
  80. */
  81. static irqreturn_t ioat_dma_do_interrupt_msix(int irq, void *data)
  82. {
  83. struct ioat_dma_chan *ioat_chan = data;
  84. tasklet_schedule(&ioat_chan->cleanup_task);
  85. return IRQ_HANDLED;
  86. }
  87. static void ioat_dma_cleanup_tasklet(unsigned long data);
  88. /**
  89. * ioat_dma_enumerate_channels - find and initialize the device's channels
  90. * @device: the device to be enumerated
  91. */
  92. static int ioat_dma_enumerate_channels(struct ioatdma_device *device)
  93. {
  94. u8 xfercap_scale;
  95. u32 xfercap;
  96. int i;
  97. struct ioat_dma_chan *ioat_chan;
  98. device->common.chancnt = readb(device->reg_base + IOAT_CHANCNT_OFFSET);
  99. xfercap_scale = readb(device->reg_base + IOAT_XFERCAP_OFFSET);
  100. xfercap = (xfercap_scale == 0 ? -1 : (1UL << xfercap_scale));
  101. for (i = 0; i < device->common.chancnt; i++) {
  102. ioat_chan = kzalloc(sizeof(*ioat_chan), GFP_KERNEL);
  103. if (!ioat_chan) {
  104. device->common.chancnt = i;
  105. break;
  106. }
  107. ioat_chan->device = device;
  108. ioat_chan->reg_base = device->reg_base + (0x80 * (i + 1));
  109. ioat_chan->xfercap = xfercap;
  110. spin_lock_init(&ioat_chan->cleanup_lock);
  111. spin_lock_init(&ioat_chan->desc_lock);
  112. INIT_LIST_HEAD(&ioat_chan->free_desc);
  113. INIT_LIST_HEAD(&ioat_chan->used_desc);
  114. /* This should be made common somewhere in dmaengine.c */
  115. ioat_chan->common.device = &device->common;
  116. list_add_tail(&ioat_chan->common.device_node,
  117. &device->common.channels);
  118. device->idx[i] = ioat_chan;
  119. tasklet_init(&ioat_chan->cleanup_task,
  120. ioat_dma_cleanup_tasklet,
  121. (unsigned long) ioat_chan);
  122. tasklet_disable(&ioat_chan->cleanup_task);
  123. }
  124. return device->common.chancnt;
  125. }
  126. static void ioat_set_src(dma_addr_t addr,
  127. struct dma_async_tx_descriptor *tx,
  128. int index)
  129. {
  130. struct ioat_desc_sw *iter, *desc = tx_to_ioat_desc(tx);
  131. struct ioat_dma_chan *ioat_chan = to_ioat_chan(tx->chan);
  132. pci_unmap_addr_set(desc, src, addr);
  133. list_for_each_entry(iter, &desc->async_tx.tx_list, node) {
  134. iter->hw->src_addr = addr;
  135. addr += ioat_chan->xfercap;
  136. }
  137. }
  138. static void ioat_set_dest(dma_addr_t addr,
  139. struct dma_async_tx_descriptor *tx,
  140. int index)
  141. {
  142. struct ioat_desc_sw *iter, *desc = tx_to_ioat_desc(tx);
  143. struct ioat_dma_chan *ioat_chan = to_ioat_chan(tx->chan);
  144. pci_unmap_addr_set(desc, dst, addr);
  145. list_for_each_entry(iter, &desc->async_tx.tx_list, node) {
  146. iter->hw->dst_addr = addr;
  147. addr += ioat_chan->xfercap;
  148. }
  149. }
  150. static dma_cookie_t ioat_tx_submit(struct dma_async_tx_descriptor *tx)
  151. {
  152. struct ioat_dma_chan *ioat_chan = to_ioat_chan(tx->chan);
  153. struct ioat_desc_sw *desc = tx_to_ioat_desc(tx);
  154. int append = 0;
  155. dma_cookie_t cookie;
  156. struct ioat_desc_sw *group_start;
  157. group_start = list_entry(desc->async_tx.tx_list.next,
  158. struct ioat_desc_sw, node);
  159. spin_lock_bh(&ioat_chan->desc_lock);
  160. /* cookie incr and addition to used_list must be atomic */
  161. cookie = ioat_chan->common.cookie;
  162. cookie++;
  163. if (cookie < 0)
  164. cookie = 1;
  165. ioat_chan->common.cookie = desc->async_tx.cookie = cookie;
  166. /* write address into NextDescriptor field of last desc in chain */
  167. to_ioat_desc(ioat_chan->used_desc.prev)->hw->next =
  168. group_start->async_tx.phys;
  169. list_splice_init(&desc->async_tx.tx_list, ioat_chan->used_desc.prev);
  170. ioat_chan->pending += desc->tx_cnt;
  171. if (ioat_chan->pending >= 4) {
  172. append = 1;
  173. ioat_chan->pending = 0;
  174. }
  175. spin_unlock_bh(&ioat_chan->desc_lock);
  176. if (append)
  177. writeb(IOAT_CHANCMD_APPEND,
  178. ioat_chan->reg_base + IOAT_CHANCMD_OFFSET);
  179. return cookie;
  180. }
  181. static struct ioat_desc_sw *ioat_dma_alloc_descriptor(
  182. struct ioat_dma_chan *ioat_chan,
  183. gfp_t flags)
  184. {
  185. struct ioat_dma_descriptor *desc;
  186. struct ioat_desc_sw *desc_sw;
  187. struct ioatdma_device *ioatdma_device;
  188. dma_addr_t phys;
  189. ioatdma_device = to_ioatdma_device(ioat_chan->common.device);
  190. desc = pci_pool_alloc(ioatdma_device->dma_pool, flags, &phys);
  191. if (unlikely(!desc))
  192. return NULL;
  193. desc_sw = kzalloc(sizeof(*desc_sw), flags);
  194. if (unlikely(!desc_sw)) {
  195. pci_pool_free(ioatdma_device->dma_pool, desc, phys);
  196. return NULL;
  197. }
  198. memset(desc, 0, sizeof(*desc));
  199. dma_async_tx_descriptor_init(&desc_sw->async_tx, &ioat_chan->common);
  200. desc_sw->async_tx.tx_set_src = ioat_set_src;
  201. desc_sw->async_tx.tx_set_dest = ioat_set_dest;
  202. desc_sw->async_tx.tx_submit = ioat_tx_submit;
  203. INIT_LIST_HEAD(&desc_sw->async_tx.tx_list);
  204. desc_sw->hw = desc;
  205. desc_sw->async_tx.phys = phys;
  206. return desc_sw;
  207. }
  208. /* returns the actual number of allocated descriptors */
  209. static int ioat_dma_alloc_chan_resources(struct dma_chan *chan)
  210. {
  211. struct ioat_dma_chan *ioat_chan = to_ioat_chan(chan);
  212. struct ioat_desc_sw *desc = NULL;
  213. u16 chanctrl;
  214. u32 chanerr;
  215. int i;
  216. LIST_HEAD(tmp_list);
  217. /* have we already been set up? */
  218. if (!list_empty(&ioat_chan->free_desc))
  219. return INITIAL_IOAT_DESC_COUNT;
  220. /* Setup register to interrupt and write completion status on error */
  221. chanctrl = IOAT_CHANCTRL_ERR_INT_EN |
  222. IOAT_CHANCTRL_ANY_ERR_ABORT_EN |
  223. IOAT_CHANCTRL_ERR_COMPLETION_EN;
  224. writew(chanctrl, ioat_chan->reg_base + IOAT_CHANCTRL_OFFSET);
  225. chanerr = readl(ioat_chan->reg_base + IOAT_CHANERR_OFFSET);
  226. if (chanerr) {
  227. dev_err(&ioat_chan->device->pdev->dev,
  228. "ioatdma: CHANERR = %x, clearing\n", chanerr);
  229. writel(chanerr, ioat_chan->reg_base + IOAT_CHANERR_OFFSET);
  230. }
  231. /* Allocate descriptors */
  232. for (i = 0; i < INITIAL_IOAT_DESC_COUNT; i++) {
  233. desc = ioat_dma_alloc_descriptor(ioat_chan, GFP_KERNEL);
  234. if (!desc) {
  235. dev_err(&ioat_chan->device->pdev->dev,
  236. "ioatdma: Only %d initial descriptors\n", i);
  237. break;
  238. }
  239. list_add_tail(&desc->node, &tmp_list);
  240. }
  241. spin_lock_bh(&ioat_chan->desc_lock);
  242. list_splice(&tmp_list, &ioat_chan->free_desc);
  243. spin_unlock_bh(&ioat_chan->desc_lock);
  244. /* allocate a completion writeback area */
  245. /* doing 2 32bit writes to mmio since 1 64b write doesn't work */
  246. ioat_chan->completion_virt =
  247. pci_pool_alloc(ioat_chan->device->completion_pool,
  248. GFP_KERNEL,
  249. &ioat_chan->completion_addr);
  250. memset(ioat_chan->completion_virt, 0,
  251. sizeof(*ioat_chan->completion_virt));
  252. writel(((u64) ioat_chan->completion_addr) & 0x00000000FFFFFFFF,
  253. ioat_chan->reg_base + IOAT_CHANCMP_OFFSET_LOW);
  254. writel(((u64) ioat_chan->completion_addr) >> 32,
  255. ioat_chan->reg_base + IOAT_CHANCMP_OFFSET_HIGH);
  256. tasklet_enable(&ioat_chan->cleanup_task);
  257. ioat_dma_start_null_desc(ioat_chan);
  258. return i;
  259. }
  260. static void ioat_dma_free_chan_resources(struct dma_chan *chan)
  261. {
  262. struct ioat_dma_chan *ioat_chan = to_ioat_chan(chan);
  263. struct ioatdma_device *ioatdma_device = to_ioatdma_device(chan->device);
  264. struct ioat_desc_sw *desc, *_desc;
  265. int in_use_descs = 0;
  266. tasklet_disable(&ioat_chan->cleanup_task);
  267. ioat_dma_memcpy_cleanup(ioat_chan);
  268. /* Delay 100ms after reset to allow internal DMA logic to quiesce
  269. * before removing DMA descriptor resources.
  270. */
  271. writeb(IOAT_CHANCMD_RESET, ioat_chan->reg_base + IOAT_CHANCMD_OFFSET);
  272. mdelay(100);
  273. spin_lock_bh(&ioat_chan->desc_lock);
  274. list_for_each_entry_safe(desc, _desc, &ioat_chan->used_desc, node) {
  275. in_use_descs++;
  276. list_del(&desc->node);
  277. pci_pool_free(ioatdma_device->dma_pool, desc->hw,
  278. desc->async_tx.phys);
  279. kfree(desc);
  280. }
  281. list_for_each_entry_safe(desc, _desc, &ioat_chan->free_desc, node) {
  282. list_del(&desc->node);
  283. pci_pool_free(ioatdma_device->dma_pool, desc->hw,
  284. desc->async_tx.phys);
  285. kfree(desc);
  286. }
  287. spin_unlock_bh(&ioat_chan->desc_lock);
  288. pci_pool_free(ioatdma_device->completion_pool,
  289. ioat_chan->completion_virt,
  290. ioat_chan->completion_addr);
  291. /* one is ok since we left it on there on purpose */
  292. if (in_use_descs > 1)
  293. dev_err(&ioat_chan->device->pdev->dev,
  294. "ioatdma: Freeing %d in use descriptors!\n",
  295. in_use_descs - 1);
  296. ioat_chan->last_completion = ioat_chan->completion_addr = 0;
  297. ioat_chan->pending = 0;
  298. }
  299. /**
  300. * ioat_dma_get_next_descriptor - return the next available descriptor
  301. * @ioat_chan: IOAT DMA channel handle
  302. *
  303. * Gets the next descriptor from the chain, and must be called with the
  304. * channel's desc_lock held. Allocates more descriptors if the channel
  305. * has run out.
  306. */
  307. static struct ioat_desc_sw *ioat_dma_get_next_descriptor(
  308. struct ioat_dma_chan *ioat_chan)
  309. {
  310. struct ioat_desc_sw *new = NULL;
  311. if (!list_empty(&ioat_chan->free_desc)) {
  312. new = to_ioat_desc(ioat_chan->free_desc.next);
  313. list_del(&new->node);
  314. } else {
  315. /* try to get another desc */
  316. new = ioat_dma_alloc_descriptor(ioat_chan, GFP_ATOMIC);
  317. /* will this ever happen? */
  318. /* TODO add upper limit on these */
  319. BUG_ON(!new);
  320. }
  321. prefetch(new->hw);
  322. return new;
  323. }
  324. static struct dma_async_tx_descriptor *ioat_dma_prep_memcpy(
  325. struct dma_chan *chan,
  326. size_t len,
  327. int int_en)
  328. {
  329. struct ioat_dma_chan *ioat_chan = to_ioat_chan(chan);
  330. struct ioat_desc_sw *first, *prev, *new;
  331. LIST_HEAD(new_chain);
  332. u32 copy;
  333. size_t orig_len;
  334. int desc_count = 0;
  335. if (!len)
  336. return NULL;
  337. orig_len = len;
  338. first = NULL;
  339. prev = NULL;
  340. spin_lock_bh(&ioat_chan->desc_lock);
  341. while (len) {
  342. new = ioat_dma_get_next_descriptor(ioat_chan);
  343. copy = min((u32) len, ioat_chan->xfercap);
  344. new->hw->size = copy;
  345. new->hw->ctl = 0;
  346. new->async_tx.cookie = 0;
  347. new->async_tx.ack = 1;
  348. /* chain together the physical address list for the HW */
  349. if (!first)
  350. first = new;
  351. else
  352. prev->hw->next = (u64) new->async_tx.phys;
  353. prev = new;
  354. len -= copy;
  355. list_add_tail(&new->node, &new_chain);
  356. desc_count++;
  357. }
  358. list_splice(&new_chain, &new->async_tx.tx_list);
  359. new->hw->ctl = IOAT_DMA_DESCRIPTOR_CTL_CP_STS;
  360. new->hw->next = 0;
  361. new->tx_cnt = desc_count;
  362. new->async_tx.ack = 0; /* client is in control of this ack */
  363. new->async_tx.cookie = -EBUSY;
  364. pci_unmap_len_set(new, len, orig_len);
  365. spin_unlock_bh(&ioat_chan->desc_lock);
  366. return new ? &new->async_tx : NULL;
  367. }
  368. /**
  369. * ioat_dma_memcpy_issue_pending - push potentially unrecognized appended
  370. * descriptors to hw
  371. * @chan: DMA channel handle
  372. */
  373. static void ioat_dma_memcpy_issue_pending(struct dma_chan *chan)
  374. {
  375. struct ioat_dma_chan *ioat_chan = to_ioat_chan(chan);
  376. if (ioat_chan->pending != 0) {
  377. ioat_chan->pending = 0;
  378. writeb(IOAT_CHANCMD_APPEND,
  379. ioat_chan->reg_base + IOAT_CHANCMD_OFFSET);
  380. }
  381. }
  382. static void ioat_dma_cleanup_tasklet(unsigned long data)
  383. {
  384. struct ioat_dma_chan *chan = (void *)data;
  385. ioat_dma_memcpy_cleanup(chan);
  386. writew(IOAT_CHANCTRL_INT_DISABLE,
  387. chan->reg_base + IOAT_CHANCTRL_OFFSET);
  388. }
  389. static void ioat_dma_memcpy_cleanup(struct ioat_dma_chan *ioat_chan)
  390. {
  391. unsigned long phys_complete;
  392. struct ioat_desc_sw *desc, *_desc;
  393. dma_cookie_t cookie = 0;
  394. prefetch(ioat_chan->completion_virt);
  395. if (!spin_trylock(&ioat_chan->cleanup_lock))
  396. return;
  397. /* The completion writeback can happen at any time,
  398. so reads by the driver need to be atomic operations
  399. The descriptor physical addresses are limited to 32-bits
  400. when the CPU can only do a 32-bit mov */
  401. #if (BITS_PER_LONG == 64)
  402. phys_complete =
  403. ioat_chan->completion_virt->full & IOAT_CHANSTS_COMPLETED_DESCRIPTOR_ADDR;
  404. #else
  405. phys_complete = ioat_chan->completion_virt->low & IOAT_LOW_COMPLETION_MASK;
  406. #endif
  407. if ((ioat_chan->completion_virt->full & IOAT_CHANSTS_DMA_TRANSFER_STATUS) ==
  408. IOAT_CHANSTS_DMA_TRANSFER_STATUS_HALTED) {
  409. dev_err(&ioat_chan->device->pdev->dev,
  410. "ioatdma: Channel halted, chanerr = %x\n",
  411. readl(ioat_chan->reg_base + IOAT_CHANERR_OFFSET));
  412. /* TODO do something to salvage the situation */
  413. }
  414. if (phys_complete == ioat_chan->last_completion) {
  415. spin_unlock(&ioat_chan->cleanup_lock);
  416. return;
  417. }
  418. cookie = 0;
  419. spin_lock_bh(&ioat_chan->desc_lock);
  420. list_for_each_entry_safe(desc, _desc, &ioat_chan->used_desc, node) {
  421. /*
  422. * Incoming DMA requests may use multiple descriptors, due to
  423. * exceeding xfercap, perhaps. If so, only the last one will
  424. * have a cookie, and require unmapping.
  425. */
  426. if (desc->async_tx.cookie) {
  427. cookie = desc->async_tx.cookie;
  428. /*
  429. * yes we are unmapping both _page and _single alloc'd
  430. * regions with unmap_page. Is this *really* that bad?
  431. */
  432. pci_unmap_page(ioat_chan->device->pdev,
  433. pci_unmap_addr(desc, dst),
  434. pci_unmap_len(desc, len),
  435. PCI_DMA_FROMDEVICE);
  436. pci_unmap_page(ioat_chan->device->pdev,
  437. pci_unmap_addr(desc, src),
  438. pci_unmap_len(desc, len),
  439. PCI_DMA_TODEVICE);
  440. }
  441. if (desc->async_tx.phys != phys_complete) {
  442. /*
  443. * a completed entry, but not the last, so cleanup
  444. * if the client is done with the descriptor
  445. */
  446. if (desc->async_tx.ack) {
  447. list_del(&desc->node);
  448. list_add_tail(&desc->node,
  449. &ioat_chan->free_desc);
  450. } else
  451. desc->async_tx.cookie = 0;
  452. } else {
  453. /*
  454. * last used desc. Do not remove, so we can append from
  455. * it, but don't look at it next time, either
  456. */
  457. desc->async_tx.cookie = 0;
  458. /* TODO check status bits? */
  459. break;
  460. }
  461. }
  462. spin_unlock_bh(&ioat_chan->desc_lock);
  463. ioat_chan->last_completion = phys_complete;
  464. if (cookie != 0)
  465. ioat_chan->completed_cookie = cookie;
  466. spin_unlock(&ioat_chan->cleanup_lock);
  467. }
  468. static void ioat_dma_dependency_added(struct dma_chan *chan)
  469. {
  470. struct ioat_dma_chan *ioat_chan = to_ioat_chan(chan);
  471. spin_lock_bh(&ioat_chan->desc_lock);
  472. if (ioat_chan->pending == 0) {
  473. spin_unlock_bh(&ioat_chan->desc_lock);
  474. ioat_dma_memcpy_cleanup(ioat_chan);
  475. } else
  476. spin_unlock_bh(&ioat_chan->desc_lock);
  477. }
  478. /**
  479. * ioat_dma_is_complete - poll the status of a IOAT DMA transaction
  480. * @chan: IOAT DMA channel handle
  481. * @cookie: DMA transaction identifier
  482. * @done: if not %NULL, updated with last completed transaction
  483. * @used: if not %NULL, updated with last used transaction
  484. */
  485. static enum dma_status ioat_dma_is_complete(struct dma_chan *chan,
  486. dma_cookie_t cookie,
  487. dma_cookie_t *done,
  488. dma_cookie_t *used)
  489. {
  490. struct ioat_dma_chan *ioat_chan = to_ioat_chan(chan);
  491. dma_cookie_t last_used;
  492. dma_cookie_t last_complete;
  493. enum dma_status ret;
  494. last_used = chan->cookie;
  495. last_complete = ioat_chan->completed_cookie;
  496. if (done)
  497. *done = last_complete;
  498. if (used)
  499. *used = last_used;
  500. ret = dma_async_is_complete(cookie, last_complete, last_used);
  501. if (ret == DMA_SUCCESS)
  502. return ret;
  503. ioat_dma_memcpy_cleanup(ioat_chan);
  504. last_used = chan->cookie;
  505. last_complete = ioat_chan->completed_cookie;
  506. if (done)
  507. *done = last_complete;
  508. if (used)
  509. *used = last_used;
  510. return dma_async_is_complete(cookie, last_complete, last_used);
  511. }
  512. /* PCI API */
  513. static void ioat_dma_start_null_desc(struct ioat_dma_chan *ioat_chan)
  514. {
  515. struct ioat_desc_sw *desc;
  516. spin_lock_bh(&ioat_chan->desc_lock);
  517. desc = ioat_dma_get_next_descriptor(ioat_chan);
  518. desc->hw->ctl = IOAT_DMA_DESCRIPTOR_NUL;
  519. desc->hw->next = 0;
  520. desc->async_tx.ack = 1;
  521. list_add_tail(&desc->node, &ioat_chan->used_desc);
  522. spin_unlock_bh(&ioat_chan->desc_lock);
  523. writel(((u64) desc->async_tx.phys) & 0x00000000FFFFFFFF,
  524. ioat_chan->reg_base + IOAT_CHAINADDR_OFFSET_LOW);
  525. writel(((u64) desc->async_tx.phys) >> 32,
  526. ioat_chan->reg_base + IOAT_CHAINADDR_OFFSET_HIGH);
  527. writeb(IOAT_CHANCMD_START, ioat_chan->reg_base + IOAT_CHANCMD_OFFSET);
  528. }
  529. /*
  530. * Perform a IOAT transaction to verify the HW works.
  531. */
  532. #define IOAT_TEST_SIZE 2000
  533. /**
  534. * ioat_dma_self_test - Perform a IOAT transaction to verify the HW works.
  535. * @device: device to be tested
  536. */
  537. static int ioat_dma_self_test(struct ioatdma_device *device)
  538. {
  539. int i;
  540. u8 *src;
  541. u8 *dest;
  542. struct dma_chan *dma_chan;
  543. struct dma_async_tx_descriptor *tx;
  544. dma_addr_t addr;
  545. dma_cookie_t cookie;
  546. int err = 0;
  547. src = kzalloc(sizeof(u8) * IOAT_TEST_SIZE, GFP_KERNEL);
  548. if (!src)
  549. return -ENOMEM;
  550. dest = kzalloc(sizeof(u8) * IOAT_TEST_SIZE, GFP_KERNEL);
  551. if (!dest) {
  552. kfree(src);
  553. return -ENOMEM;
  554. }
  555. /* Fill in src buffer */
  556. for (i = 0; i < IOAT_TEST_SIZE; i++)
  557. src[i] = (u8)i;
  558. /* Start copy, using first DMA channel */
  559. dma_chan = container_of(device->common.channels.next,
  560. struct dma_chan,
  561. device_node);
  562. if (ioat_dma_alloc_chan_resources(dma_chan) < 1) {
  563. dev_err(&device->pdev->dev,
  564. "selftest cannot allocate chan resource\n");
  565. err = -ENODEV;
  566. goto out;
  567. }
  568. tx = ioat_dma_prep_memcpy(dma_chan, IOAT_TEST_SIZE, 0);
  569. async_tx_ack(tx);
  570. addr = dma_map_single(dma_chan->device->dev, src, IOAT_TEST_SIZE,
  571. DMA_TO_DEVICE);
  572. ioat_set_src(addr, tx, 0);
  573. addr = dma_map_single(dma_chan->device->dev, dest, IOAT_TEST_SIZE,
  574. DMA_FROM_DEVICE);
  575. ioat_set_dest(addr, tx, 0);
  576. cookie = ioat_tx_submit(tx);
  577. ioat_dma_memcpy_issue_pending(dma_chan);
  578. msleep(1);
  579. if (ioat_dma_is_complete(dma_chan, cookie, NULL, NULL) != DMA_SUCCESS) {
  580. dev_err(&device->pdev->dev,
  581. "ioatdma: Self-test copy timed out, disabling\n");
  582. err = -ENODEV;
  583. goto free_resources;
  584. }
  585. if (memcmp(src, dest, IOAT_TEST_SIZE)) {
  586. dev_err(&device->pdev->dev,
  587. "ioatdma: Self-test copy failed compare, disabling\n");
  588. err = -ENODEV;
  589. goto free_resources;
  590. }
  591. free_resources:
  592. ioat_dma_free_chan_resources(dma_chan);
  593. out:
  594. kfree(src);
  595. kfree(dest);
  596. return err;
  597. }
  598. static char ioat_interrupt_style[32] = "msix";
  599. module_param_string(ioat_interrupt_style, ioat_interrupt_style,
  600. sizeof(ioat_interrupt_style), 0644);
  601. MODULE_PARM_DESC(ioat_interrupt_style,
  602. "set ioat interrupt style: msix (default), "
  603. "msix-single-vector, msi, intx)");
  604. /**
  605. * ioat_dma_setup_interrupts - setup interrupt handler
  606. * @device: ioat device
  607. */
  608. static int ioat_dma_setup_interrupts(struct ioatdma_device *device)
  609. {
  610. struct ioat_dma_chan *ioat_chan;
  611. int err, i, j, msixcnt;
  612. u8 intrctrl = 0;
  613. if (!strcmp(ioat_interrupt_style, "msix"))
  614. goto msix;
  615. if (!strcmp(ioat_interrupt_style, "msix-single-vector"))
  616. goto msix_single_vector;
  617. if (!strcmp(ioat_interrupt_style, "msi"))
  618. goto msi;
  619. if (!strcmp(ioat_interrupt_style, "intx"))
  620. goto intx;
  621. msix:
  622. /* The number of MSI-X vectors should equal the number of channels */
  623. msixcnt = device->common.chancnt;
  624. for (i = 0; i < msixcnt; i++)
  625. device->msix_entries[i].entry = i;
  626. err = pci_enable_msix(device->pdev, device->msix_entries, msixcnt);
  627. if (err < 0)
  628. goto msi;
  629. if (err > 0)
  630. goto msix_single_vector;
  631. for (i = 0; i < msixcnt; i++) {
  632. ioat_chan = ioat_lookup_chan_by_index(device, i);
  633. err = request_irq(device->msix_entries[i].vector,
  634. ioat_dma_do_interrupt_msix,
  635. 0, "ioat-msix", ioat_chan);
  636. if (err) {
  637. for (j = 0; j < i; j++) {
  638. ioat_chan =
  639. ioat_lookup_chan_by_index(device, j);
  640. free_irq(device->msix_entries[j].vector,
  641. ioat_chan);
  642. }
  643. goto msix_single_vector;
  644. }
  645. }
  646. intrctrl |= IOAT_INTRCTRL_MSIX_VECTOR_CONTROL;
  647. device->irq_mode = msix_multi_vector;
  648. goto done;
  649. msix_single_vector:
  650. device->msix_entries[0].entry = 0;
  651. err = pci_enable_msix(device->pdev, device->msix_entries, 1);
  652. if (err)
  653. goto msi;
  654. err = request_irq(device->msix_entries[0].vector, ioat_dma_do_interrupt,
  655. 0, "ioat-msix", device);
  656. if (err) {
  657. pci_disable_msix(device->pdev);
  658. goto msi;
  659. }
  660. device->irq_mode = msix_single_vector;
  661. goto done;
  662. msi:
  663. err = pci_enable_msi(device->pdev);
  664. if (err)
  665. goto intx;
  666. err = request_irq(device->pdev->irq, ioat_dma_do_interrupt,
  667. 0, "ioat-msi", device);
  668. if (err) {
  669. pci_disable_msi(device->pdev);
  670. goto intx;
  671. }
  672. /*
  673. * CB 1.2 devices need a bit set in configuration space to enable MSI
  674. */
  675. if (device->version == IOAT_VER_1_2) {
  676. u32 dmactrl;
  677. pci_read_config_dword(device->pdev,
  678. IOAT_PCI_DMACTRL_OFFSET, &dmactrl);
  679. dmactrl |= IOAT_PCI_DMACTRL_MSI_EN;
  680. pci_write_config_dword(device->pdev,
  681. IOAT_PCI_DMACTRL_OFFSET, dmactrl);
  682. }
  683. device->irq_mode = msi;
  684. goto done;
  685. intx:
  686. err = request_irq(device->pdev->irq, ioat_dma_do_interrupt,
  687. IRQF_SHARED, "ioat-intx", device);
  688. if (err)
  689. goto err_no_irq;
  690. device->irq_mode = intx;
  691. done:
  692. intrctrl |= IOAT_INTRCTRL_MASTER_INT_EN;
  693. writeb(intrctrl, device->reg_base + IOAT_INTRCTRL_OFFSET);
  694. return 0;
  695. err_no_irq:
  696. /* Disable all interrupt generation */
  697. writeb(0, device->reg_base + IOAT_INTRCTRL_OFFSET);
  698. dev_err(&device->pdev->dev, "no usable interrupts\n");
  699. device->irq_mode = none;
  700. return -1;
  701. }
  702. /**
  703. * ioat_dma_remove_interrupts - remove whatever interrupts were set
  704. * @device: ioat device
  705. */
  706. static void ioat_dma_remove_interrupts(struct ioatdma_device *device)
  707. {
  708. struct ioat_dma_chan *ioat_chan;
  709. int i;
  710. /* Disable all interrupt generation */
  711. writeb(0, device->reg_base + IOAT_INTRCTRL_OFFSET);
  712. switch (device->irq_mode) {
  713. case msix_multi_vector:
  714. for (i = 0; i < device->common.chancnt; i++) {
  715. ioat_chan = ioat_lookup_chan_by_index(device, i);
  716. free_irq(device->msix_entries[i].vector, ioat_chan);
  717. }
  718. pci_disable_msix(device->pdev);
  719. break;
  720. case msix_single_vector:
  721. free_irq(device->msix_entries[0].vector, device);
  722. pci_disable_msix(device->pdev);
  723. break;
  724. case msi:
  725. free_irq(device->pdev->irq, device);
  726. pci_disable_msi(device->pdev);
  727. break;
  728. case intx:
  729. free_irq(device->pdev->irq, device);
  730. break;
  731. case none:
  732. dev_warn(&device->pdev->dev,
  733. "call to %s without interrupts setup\n", __func__);
  734. }
  735. device->irq_mode = none;
  736. }
  737. struct ioatdma_device *ioat_dma_probe(struct pci_dev *pdev,
  738. void __iomem *iobase)
  739. {
  740. int err;
  741. struct ioatdma_device *device;
  742. device = kzalloc(sizeof(*device), GFP_KERNEL);
  743. if (!device) {
  744. err = -ENOMEM;
  745. goto err_kzalloc;
  746. }
  747. device->pdev = pdev;
  748. device->reg_base = iobase;
  749. device->version = readb(device->reg_base + IOAT_VER_OFFSET);
  750. /* DMA coherent memory pool for DMA descriptor allocations */
  751. device->dma_pool = pci_pool_create("dma_desc_pool", pdev,
  752. sizeof(struct ioat_dma_descriptor),
  753. 64, 0);
  754. if (!device->dma_pool) {
  755. err = -ENOMEM;
  756. goto err_dma_pool;
  757. }
  758. device->completion_pool = pci_pool_create("completion_pool", pdev,
  759. sizeof(u64), SMP_CACHE_BYTES,
  760. SMP_CACHE_BYTES);
  761. if (!device->completion_pool) {
  762. err = -ENOMEM;
  763. goto err_completion_pool;
  764. }
  765. INIT_LIST_HEAD(&device->common.channels);
  766. ioat_dma_enumerate_channels(device);
  767. dma_cap_set(DMA_MEMCPY, device->common.cap_mask);
  768. device->common.device_alloc_chan_resources =
  769. ioat_dma_alloc_chan_resources;
  770. device->common.device_free_chan_resources =
  771. ioat_dma_free_chan_resources;
  772. device->common.device_prep_dma_memcpy = ioat_dma_prep_memcpy;
  773. device->common.device_is_tx_complete = ioat_dma_is_complete;
  774. device->common.device_issue_pending = ioat_dma_memcpy_issue_pending;
  775. device->common.device_dependency_added = ioat_dma_dependency_added;
  776. device->common.dev = &pdev->dev;
  777. dev_err(&device->pdev->dev,
  778. "ioatdma: Intel(R) I/OAT DMA Engine found,"
  779. " %d channels, device version 0x%02x\n",
  780. device->common.chancnt, device->version);
  781. err = ioat_dma_setup_interrupts(device);
  782. if (err)
  783. goto err_setup_interrupts;
  784. err = ioat_dma_self_test(device);
  785. if (err)
  786. goto err_self_test;
  787. dma_async_device_register(&device->common);
  788. return device;
  789. err_self_test:
  790. ioat_dma_remove_interrupts(device);
  791. err_setup_interrupts:
  792. pci_pool_destroy(device->completion_pool);
  793. err_completion_pool:
  794. pci_pool_destroy(device->dma_pool);
  795. err_dma_pool:
  796. kfree(device);
  797. err_kzalloc:
  798. dev_err(&device->pdev->dev,
  799. "ioatdma: Intel(R) I/OAT DMA Engine initialization failed\n");
  800. return NULL;
  801. }
  802. void ioat_dma_remove(struct ioatdma_device *device)
  803. {
  804. struct dma_chan *chan, *_chan;
  805. struct ioat_dma_chan *ioat_chan;
  806. ioat_dma_remove_interrupts(device);
  807. dma_async_device_unregister(&device->common);
  808. pci_pool_destroy(device->dma_pool);
  809. pci_pool_destroy(device->completion_pool);
  810. iounmap(device->reg_base);
  811. pci_release_regions(device->pdev);
  812. pci_disable_device(device->pdev);
  813. list_for_each_entry_safe(chan, _chan,
  814. &device->common.channels, device_node) {
  815. ioat_chan = to_ioat_chan(chan);
  816. list_del(&chan->device_node);
  817. kfree(ioat_chan);
  818. }
  819. kfree(device);
  820. }