ioatdma.c 22 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828
  1. /*
  2. * Copyright(c) 2004 - 2006 Intel Corporation. All rights reserved.
  3. *
  4. * This program is free software; you can redistribute it and/or modify it
  5. * under the terms of the GNU General Public License as published by the Free
  6. * Software Foundation; either version 2 of the License, or (at your option)
  7. * any later version.
  8. *
  9. * This program is distributed in the hope that it will be useful, but WITHOUT
  10. * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11. * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
  12. * more details.
  13. *
  14. * You should have received a copy of the GNU General Public License along with
  15. * this program; if not, write to the Free Software Foundation, Inc., 59
  16. * Temple Place - Suite 330, Boston, MA 02111-1307, USA.
  17. *
  18. * The full GNU General Public License is included in this distribution in the
  19. * file called COPYING.
  20. */
  21. /*
  22. * This driver supports an Intel I/OAT DMA engine, which does asynchronous
  23. * copy operations.
  24. */
  25. #include <linux/init.h>
  26. #include <linux/module.h>
  27. #include <linux/pci.h>
  28. #include <linux/interrupt.h>
  29. #include <linux/dmaengine.h>
  30. #include <linux/delay.h>
  31. #include <linux/dma-mapping.h>
  32. #include "ioatdma.h"
  33. #include "ioatdma_registers.h"
  34. #include "ioatdma_hw.h"
  35. #define to_ioat_chan(chan) container_of(chan, struct ioat_dma_chan, common)
  36. #define to_ioat_device(dev) container_of(dev, struct ioat_device, common)
  37. #define to_ioat_desc(lh) container_of(lh, struct ioat_desc_sw, node)
  38. #define tx_to_ioat_desc(tx) container_of(tx, struct ioat_desc_sw, async_tx)
  39. /* internal functions */
  40. static int __devinit ioat_probe(struct pci_dev *pdev, const struct pci_device_id *ent);
  41. static void ioat_shutdown(struct pci_dev *pdev);
  42. static void __devexit ioat_remove(struct pci_dev *pdev);
  43. static int enumerate_dma_channels(struct ioat_device *device)
  44. {
  45. u8 xfercap_scale;
  46. u32 xfercap;
  47. int i;
  48. struct ioat_dma_chan *ioat_chan;
  49. device->common.chancnt = readb(device->reg_base + IOAT_CHANCNT_OFFSET);
  50. xfercap_scale = readb(device->reg_base + IOAT_XFERCAP_OFFSET);
  51. xfercap = (xfercap_scale == 0 ? -1 : (1UL << xfercap_scale));
  52. for (i = 0; i < device->common.chancnt; i++) {
  53. ioat_chan = kzalloc(sizeof(*ioat_chan), GFP_KERNEL);
  54. if (!ioat_chan) {
  55. device->common.chancnt = i;
  56. break;
  57. }
  58. ioat_chan->device = device;
  59. ioat_chan->reg_base = device->reg_base + (0x80 * (i + 1));
  60. ioat_chan->xfercap = xfercap;
  61. spin_lock_init(&ioat_chan->cleanup_lock);
  62. spin_lock_init(&ioat_chan->desc_lock);
  63. INIT_LIST_HEAD(&ioat_chan->free_desc);
  64. INIT_LIST_HEAD(&ioat_chan->used_desc);
  65. /* This should be made common somewhere in dmaengine.c */
  66. ioat_chan->common.device = &device->common;
  67. list_add_tail(&ioat_chan->common.device_node,
  68. &device->common.channels);
  69. }
  70. return device->common.chancnt;
  71. }
  72. static void
  73. ioat_set_src(dma_addr_t addr, struct dma_async_tx_descriptor *tx, int index)
  74. {
  75. struct ioat_desc_sw *iter, *desc = tx_to_ioat_desc(tx);
  76. struct ioat_dma_chan *ioat_chan = to_ioat_chan(tx->chan);
  77. pci_unmap_addr_set(desc, src, addr);
  78. list_for_each_entry(iter, &desc->async_tx.tx_list, node) {
  79. iter->hw->src_addr = addr;
  80. addr += ioat_chan->xfercap;
  81. }
  82. }
  83. static void
  84. ioat_set_dest(dma_addr_t addr, struct dma_async_tx_descriptor *tx, int index)
  85. {
  86. struct ioat_desc_sw *iter, *desc = tx_to_ioat_desc(tx);
  87. struct ioat_dma_chan *ioat_chan = to_ioat_chan(tx->chan);
  88. pci_unmap_addr_set(desc, dst, addr);
  89. list_for_each_entry(iter, &desc->async_tx.tx_list, node) {
  90. iter->hw->dst_addr = addr;
  91. addr += ioat_chan->xfercap;
  92. }
  93. }
  94. static dma_cookie_t
  95. ioat_tx_submit(struct dma_async_tx_descriptor *tx)
  96. {
  97. struct ioat_dma_chan *ioat_chan = to_ioat_chan(tx->chan);
  98. struct ioat_desc_sw *desc = tx_to_ioat_desc(tx);
  99. int append = 0;
  100. dma_cookie_t cookie;
  101. struct ioat_desc_sw *group_start;
  102. group_start = list_entry(desc->async_tx.tx_list.next,
  103. struct ioat_desc_sw, node);
  104. spin_lock_bh(&ioat_chan->desc_lock);
  105. /* cookie incr and addition to used_list must be atomic */
  106. cookie = ioat_chan->common.cookie;
  107. cookie++;
  108. if (cookie < 0)
  109. cookie = 1;
  110. ioat_chan->common.cookie = desc->async_tx.cookie = cookie;
  111. /* write address into NextDescriptor field of last desc in chain */
  112. to_ioat_desc(ioat_chan->used_desc.prev)->hw->next =
  113. group_start->async_tx.phys;
  114. list_splice_init(&desc->async_tx.tx_list, ioat_chan->used_desc.prev);
  115. ioat_chan->pending += desc->tx_cnt;
  116. if (ioat_chan->pending >= 4) {
  117. append = 1;
  118. ioat_chan->pending = 0;
  119. }
  120. spin_unlock_bh(&ioat_chan->desc_lock);
  121. if (append)
  122. writeb(IOAT_CHANCMD_APPEND,
  123. ioat_chan->reg_base + IOAT_CHANCMD_OFFSET);
  124. return cookie;
  125. }
  126. static struct ioat_desc_sw *ioat_dma_alloc_descriptor(
  127. struct ioat_dma_chan *ioat_chan,
  128. gfp_t flags)
  129. {
  130. struct ioat_dma_descriptor *desc;
  131. struct ioat_desc_sw *desc_sw;
  132. struct ioat_device *ioat_device;
  133. dma_addr_t phys;
  134. ioat_device = to_ioat_device(ioat_chan->common.device);
  135. desc = pci_pool_alloc(ioat_device->dma_pool, flags, &phys);
  136. if (unlikely(!desc))
  137. return NULL;
  138. desc_sw = kzalloc(sizeof(*desc_sw), flags);
  139. if (unlikely(!desc_sw)) {
  140. pci_pool_free(ioat_device->dma_pool, desc, phys);
  141. return NULL;
  142. }
  143. memset(desc, 0, sizeof(*desc));
  144. dma_async_tx_descriptor_init(&desc_sw->async_tx, &ioat_chan->common);
  145. desc_sw->async_tx.tx_set_src = ioat_set_src;
  146. desc_sw->async_tx.tx_set_dest = ioat_set_dest;
  147. desc_sw->async_tx.tx_submit = ioat_tx_submit;
  148. INIT_LIST_HEAD(&desc_sw->async_tx.tx_list);
  149. desc_sw->hw = desc;
  150. desc_sw->async_tx.phys = phys;
  151. return desc_sw;
  152. }
  153. #define INITIAL_IOAT_DESC_COUNT 128
  154. static void ioat_start_null_desc(struct ioat_dma_chan *ioat_chan);
  155. /* returns the actual number of allocated descriptors */
  156. static int ioat_dma_alloc_chan_resources(struct dma_chan *chan)
  157. {
  158. struct ioat_dma_chan *ioat_chan = to_ioat_chan(chan);
  159. struct ioat_desc_sw *desc = NULL;
  160. u16 chanctrl;
  161. u32 chanerr;
  162. int i;
  163. LIST_HEAD(tmp_list);
  164. /*
  165. * In-use bit automatically set by reading chanctrl
  166. * If 0, we got it, if 1, someone else did
  167. */
  168. chanctrl = readw(ioat_chan->reg_base + IOAT_CHANCTRL_OFFSET);
  169. if (chanctrl & IOAT_CHANCTRL_CHANNEL_IN_USE)
  170. return -EBUSY;
  171. /* Setup register to interrupt and write completion status on error */
  172. chanctrl = IOAT_CHANCTRL_CHANNEL_IN_USE |
  173. IOAT_CHANCTRL_ERR_INT_EN |
  174. IOAT_CHANCTRL_ANY_ERR_ABORT_EN |
  175. IOAT_CHANCTRL_ERR_COMPLETION_EN;
  176. writew(chanctrl, ioat_chan->reg_base + IOAT_CHANCTRL_OFFSET);
  177. chanerr = readl(ioat_chan->reg_base + IOAT_CHANERR_OFFSET);
  178. if (chanerr) {
  179. printk("IOAT: CHANERR = %x, clearing\n", chanerr);
  180. writel(chanerr, ioat_chan->reg_base + IOAT_CHANERR_OFFSET);
  181. }
  182. /* Allocate descriptors */
  183. for (i = 0; i < INITIAL_IOAT_DESC_COUNT; i++) {
  184. desc = ioat_dma_alloc_descriptor(ioat_chan, GFP_KERNEL);
  185. if (!desc) {
  186. printk(KERN_ERR "IOAT: Only %d initial descriptors\n", i);
  187. break;
  188. }
  189. list_add_tail(&desc->node, &tmp_list);
  190. }
  191. spin_lock_bh(&ioat_chan->desc_lock);
  192. list_splice(&tmp_list, &ioat_chan->free_desc);
  193. spin_unlock_bh(&ioat_chan->desc_lock);
  194. /* allocate a completion writeback area */
  195. /* doing 2 32bit writes to mmio since 1 64b write doesn't work */
  196. ioat_chan->completion_virt =
  197. pci_pool_alloc(ioat_chan->device->completion_pool,
  198. GFP_KERNEL,
  199. &ioat_chan->completion_addr);
  200. memset(ioat_chan->completion_virt, 0,
  201. sizeof(*ioat_chan->completion_virt));
  202. writel(((u64) ioat_chan->completion_addr) & 0x00000000FFFFFFFF,
  203. ioat_chan->reg_base + IOAT_CHANCMP_OFFSET_LOW);
  204. writel(((u64) ioat_chan->completion_addr) >> 32,
  205. ioat_chan->reg_base + IOAT_CHANCMP_OFFSET_HIGH);
  206. ioat_start_null_desc(ioat_chan);
  207. return i;
  208. }
  209. static void ioat_dma_memcpy_cleanup(struct ioat_dma_chan *ioat_chan);
  210. static void ioat_dma_free_chan_resources(struct dma_chan *chan)
  211. {
  212. struct ioat_dma_chan *ioat_chan = to_ioat_chan(chan);
  213. struct ioat_device *ioat_device = to_ioat_device(chan->device);
  214. struct ioat_desc_sw *desc, *_desc;
  215. u16 chanctrl;
  216. int in_use_descs = 0;
  217. ioat_dma_memcpy_cleanup(ioat_chan);
  218. writeb(IOAT_CHANCMD_RESET, ioat_chan->reg_base + IOAT_CHANCMD_OFFSET);
  219. spin_lock_bh(&ioat_chan->desc_lock);
  220. list_for_each_entry_safe(desc, _desc, &ioat_chan->used_desc, node) {
  221. in_use_descs++;
  222. list_del(&desc->node);
  223. pci_pool_free(ioat_device->dma_pool, desc->hw,
  224. desc->async_tx.phys);
  225. kfree(desc);
  226. }
  227. list_for_each_entry_safe(desc, _desc, &ioat_chan->free_desc, node) {
  228. list_del(&desc->node);
  229. pci_pool_free(ioat_device->dma_pool, desc->hw,
  230. desc->async_tx.phys);
  231. kfree(desc);
  232. }
  233. spin_unlock_bh(&ioat_chan->desc_lock);
  234. pci_pool_free(ioat_device->completion_pool,
  235. ioat_chan->completion_virt,
  236. ioat_chan->completion_addr);
  237. /* one is ok since we left it on there on purpose */
  238. if (in_use_descs > 1)
  239. printk(KERN_ERR "IOAT: Freeing %d in use descriptors!\n",
  240. in_use_descs - 1);
  241. ioat_chan->last_completion = ioat_chan->completion_addr = 0;
  242. /* Tell hw the chan is free */
  243. chanctrl = readw(ioat_chan->reg_base + IOAT_CHANCTRL_OFFSET);
  244. chanctrl &= ~IOAT_CHANCTRL_CHANNEL_IN_USE;
  245. writew(chanctrl, ioat_chan->reg_base + IOAT_CHANCTRL_OFFSET);
  246. }
  247. static struct dma_async_tx_descriptor *
  248. ioat_dma_prep_memcpy(struct dma_chan *chan, size_t len, int int_en)
  249. {
  250. struct ioat_dma_chan *ioat_chan = to_ioat_chan(chan);
  251. struct ioat_desc_sw *first, *prev, *new;
  252. LIST_HEAD(new_chain);
  253. u32 copy;
  254. size_t orig_len;
  255. int desc_count = 0;
  256. if (!len)
  257. return NULL;
  258. orig_len = len;
  259. first = NULL;
  260. prev = NULL;
  261. spin_lock_bh(&ioat_chan->desc_lock);
  262. while (len) {
  263. if (!list_empty(&ioat_chan->free_desc)) {
  264. new = to_ioat_desc(ioat_chan->free_desc.next);
  265. list_del(&new->node);
  266. } else {
  267. /* try to get another desc */
  268. new = ioat_dma_alloc_descriptor(ioat_chan, GFP_ATOMIC);
  269. /* will this ever happen? */
  270. /* TODO add upper limit on these */
  271. BUG_ON(!new);
  272. }
  273. copy = min((u32) len, ioat_chan->xfercap);
  274. new->hw->size = copy;
  275. new->hw->ctl = 0;
  276. new->async_tx.cookie = 0;
  277. new->async_tx.ack = 1;
  278. /* chain together the physical address list for the HW */
  279. if (!first)
  280. first = new;
  281. else
  282. prev->hw->next = (u64) new->async_tx.phys;
  283. prev = new;
  284. len -= copy;
  285. list_add_tail(&new->node, &new_chain);
  286. desc_count++;
  287. }
  288. list_splice(&new_chain, &new->async_tx.tx_list);
  289. new->hw->ctl = IOAT_DMA_DESCRIPTOR_CTL_CP_STS;
  290. new->hw->next = 0;
  291. new->tx_cnt = desc_count;
  292. new->async_tx.ack = 0; /* client is in control of this ack */
  293. new->async_tx.cookie = -EBUSY;
  294. pci_unmap_len_set(new, src_len, orig_len);
  295. pci_unmap_len_set(new, dst_len, orig_len);
  296. spin_unlock_bh(&ioat_chan->desc_lock);
  297. return new ? &new->async_tx : NULL;
  298. }
  299. /**
  300. * ioat_dma_memcpy_issue_pending - push potentially unrecognized appended descriptors to hw
  301. * @chan: DMA channel handle
  302. */
  303. static void ioat_dma_memcpy_issue_pending(struct dma_chan *chan)
  304. {
  305. struct ioat_dma_chan *ioat_chan = to_ioat_chan(chan);
  306. if (ioat_chan->pending != 0) {
  307. ioat_chan->pending = 0;
  308. writeb(IOAT_CHANCMD_APPEND,
  309. ioat_chan->reg_base + IOAT_CHANCMD_OFFSET);
  310. }
  311. }
  312. static void ioat_dma_memcpy_cleanup(struct ioat_dma_chan *chan)
  313. {
  314. unsigned long phys_complete;
  315. struct ioat_desc_sw *desc, *_desc;
  316. dma_cookie_t cookie = 0;
  317. prefetch(chan->completion_virt);
  318. if (!spin_trylock(&chan->cleanup_lock))
  319. return;
  320. /* The completion writeback can happen at any time,
  321. so reads by the driver need to be atomic operations
  322. The descriptor physical addresses are limited to 32-bits
  323. when the CPU can only do a 32-bit mov */
  324. #if (BITS_PER_LONG == 64)
  325. phys_complete =
  326. chan->completion_virt->full & IOAT_CHANSTS_COMPLETED_DESCRIPTOR_ADDR;
  327. #else
  328. phys_complete = chan->completion_virt->low & IOAT_LOW_COMPLETION_MASK;
  329. #endif
  330. if ((chan->completion_virt->full & IOAT_CHANSTS_DMA_TRANSFER_STATUS) ==
  331. IOAT_CHANSTS_DMA_TRANSFER_STATUS_HALTED) {
  332. printk("IOAT: Channel halted, chanerr = %x\n",
  333. readl(chan->reg_base + IOAT_CHANERR_OFFSET));
  334. /* TODO do something to salvage the situation */
  335. }
  336. if (phys_complete == chan->last_completion) {
  337. spin_unlock(&chan->cleanup_lock);
  338. return;
  339. }
  340. spin_lock_bh(&chan->desc_lock);
  341. list_for_each_entry_safe(desc, _desc, &chan->used_desc, node) {
  342. /*
  343. * Incoming DMA requests may use multiple descriptors, due to
  344. * exceeding xfercap, perhaps. If so, only the last one will
  345. * have a cookie, and require unmapping.
  346. */
  347. if (desc->async_tx.cookie) {
  348. cookie = desc->async_tx.cookie;
  349. /* yes we are unmapping both _page and _single alloc'd
  350. regions with unmap_page. Is this *really* that bad?
  351. */
  352. pci_unmap_page(chan->device->pdev,
  353. pci_unmap_addr(desc, dst),
  354. pci_unmap_len(desc, dst_len),
  355. PCI_DMA_FROMDEVICE);
  356. pci_unmap_page(chan->device->pdev,
  357. pci_unmap_addr(desc, src),
  358. pci_unmap_len(desc, src_len),
  359. PCI_DMA_TODEVICE);
  360. }
  361. if (desc->async_tx.phys != phys_complete) {
  362. /* a completed entry, but not the last, so cleanup
  363. * if the client is done with the descriptor
  364. */
  365. if (desc->async_tx.ack) {
  366. list_del(&desc->node);
  367. list_add_tail(&desc->node, &chan->free_desc);
  368. } else
  369. desc->async_tx.cookie = 0;
  370. } else {
  371. /* last used desc. Do not remove, so we can append from
  372. it, but don't look at it next time, either */
  373. desc->async_tx.cookie = 0;
  374. /* TODO check status bits? */
  375. break;
  376. }
  377. }
  378. spin_unlock_bh(&chan->desc_lock);
  379. chan->last_completion = phys_complete;
  380. if (cookie != 0)
  381. chan->completed_cookie = cookie;
  382. spin_unlock(&chan->cleanup_lock);
  383. }
  384. static void ioat_dma_dependency_added(struct dma_chan *chan)
  385. {
  386. struct ioat_dma_chan *ioat_chan = to_ioat_chan(chan);
  387. spin_lock_bh(&ioat_chan->desc_lock);
  388. if (ioat_chan->pending == 0) {
  389. spin_unlock_bh(&ioat_chan->desc_lock);
  390. ioat_dma_memcpy_cleanup(ioat_chan);
  391. } else
  392. spin_unlock_bh(&ioat_chan->desc_lock);
  393. }
  394. /**
  395. * ioat_dma_is_complete - poll the status of a IOAT DMA transaction
  396. * @chan: IOAT DMA channel handle
  397. * @cookie: DMA transaction identifier
  398. * @done: if not %NULL, updated with last completed transaction
  399. * @used: if not %NULL, updated with last used transaction
  400. */
  401. static enum dma_status ioat_dma_is_complete(struct dma_chan *chan,
  402. dma_cookie_t cookie,
  403. dma_cookie_t *done,
  404. dma_cookie_t *used)
  405. {
  406. struct ioat_dma_chan *ioat_chan = to_ioat_chan(chan);
  407. dma_cookie_t last_used;
  408. dma_cookie_t last_complete;
  409. enum dma_status ret;
  410. last_used = chan->cookie;
  411. last_complete = ioat_chan->completed_cookie;
  412. if (done)
  413. *done= last_complete;
  414. if (used)
  415. *used = last_used;
  416. ret = dma_async_is_complete(cookie, last_complete, last_used);
  417. if (ret == DMA_SUCCESS)
  418. return ret;
  419. ioat_dma_memcpy_cleanup(ioat_chan);
  420. last_used = chan->cookie;
  421. last_complete = ioat_chan->completed_cookie;
  422. if (done)
  423. *done= last_complete;
  424. if (used)
  425. *used = last_used;
  426. return dma_async_is_complete(cookie, last_complete, last_used);
  427. }
  428. /* PCI API */
  429. static struct pci_device_id ioat_pci_tbl[] = {
  430. { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT) },
  431. { PCI_DEVICE(PCI_VENDOR_ID_UNISYS,
  432. PCI_DEVICE_ID_UNISYS_DMA_DIRECTOR) },
  433. { 0, }
  434. };
  435. static struct pci_driver ioat_pci_driver = {
  436. .name = "ioatdma",
  437. .id_table = ioat_pci_tbl,
  438. .probe = ioat_probe,
  439. .shutdown = ioat_shutdown,
  440. .remove = __devexit_p(ioat_remove),
  441. };
  442. static irqreturn_t ioat_do_interrupt(int irq, void *data)
  443. {
  444. struct ioat_device *instance = data;
  445. unsigned long attnstatus;
  446. u8 intrctrl;
  447. intrctrl = readb(instance->reg_base + IOAT_INTRCTRL_OFFSET);
  448. if (!(intrctrl & IOAT_INTRCTRL_MASTER_INT_EN))
  449. return IRQ_NONE;
  450. if (!(intrctrl & IOAT_INTRCTRL_INT_STATUS)) {
  451. writeb(intrctrl, instance->reg_base + IOAT_INTRCTRL_OFFSET);
  452. return IRQ_NONE;
  453. }
  454. attnstatus = readl(instance->reg_base + IOAT_ATTNSTATUS_OFFSET);
  455. printk(KERN_ERR "ioatdma error: interrupt! status %lx\n", attnstatus);
  456. writeb(intrctrl, instance->reg_base + IOAT_INTRCTRL_OFFSET);
  457. return IRQ_HANDLED;
  458. }
  459. static void ioat_start_null_desc(struct ioat_dma_chan *ioat_chan)
  460. {
  461. struct ioat_desc_sw *desc;
  462. spin_lock_bh(&ioat_chan->desc_lock);
  463. if (!list_empty(&ioat_chan->free_desc)) {
  464. desc = to_ioat_desc(ioat_chan->free_desc.next);
  465. list_del(&desc->node);
  466. } else {
  467. /* try to get another desc */
  468. spin_unlock_bh(&ioat_chan->desc_lock);
  469. desc = ioat_dma_alloc_descriptor(ioat_chan, GFP_KERNEL);
  470. spin_lock_bh(&ioat_chan->desc_lock);
  471. /* will this ever happen? */
  472. BUG_ON(!desc);
  473. }
  474. desc->hw->ctl = IOAT_DMA_DESCRIPTOR_NUL;
  475. desc->hw->next = 0;
  476. desc->async_tx.ack = 1;
  477. list_add_tail(&desc->node, &ioat_chan->used_desc);
  478. spin_unlock_bh(&ioat_chan->desc_lock);
  479. writel(((u64) desc->async_tx.phys) & 0x00000000FFFFFFFF,
  480. ioat_chan->reg_base + IOAT_CHAINADDR_OFFSET_LOW);
  481. writel(((u64) desc->async_tx.phys) >> 32,
  482. ioat_chan->reg_base + IOAT_CHAINADDR_OFFSET_HIGH);
  483. writeb(IOAT_CHANCMD_START, ioat_chan->reg_base + IOAT_CHANCMD_OFFSET);
  484. }
  485. /*
  486. * Perform a IOAT transaction to verify the HW works.
  487. */
  488. #define IOAT_TEST_SIZE 2000
  489. static int ioat_self_test(struct ioat_device *device)
  490. {
  491. int i;
  492. u8 *src;
  493. u8 *dest;
  494. struct dma_chan *dma_chan;
  495. struct dma_async_tx_descriptor *tx;
  496. dma_addr_t addr;
  497. dma_cookie_t cookie;
  498. int err = 0;
  499. src = kzalloc(sizeof(u8) * IOAT_TEST_SIZE, GFP_KERNEL);
  500. if (!src)
  501. return -ENOMEM;
  502. dest = kzalloc(sizeof(u8) * IOAT_TEST_SIZE, GFP_KERNEL);
  503. if (!dest) {
  504. kfree(src);
  505. return -ENOMEM;
  506. }
  507. /* Fill in src buffer */
  508. for (i = 0; i < IOAT_TEST_SIZE; i++)
  509. src[i] = (u8)i;
  510. /* Start copy, using first DMA channel */
  511. dma_chan = container_of(device->common.channels.next,
  512. struct dma_chan,
  513. device_node);
  514. if (ioat_dma_alloc_chan_resources(dma_chan) < 1) {
  515. err = -ENODEV;
  516. goto out;
  517. }
  518. tx = ioat_dma_prep_memcpy(dma_chan, IOAT_TEST_SIZE, 0);
  519. async_tx_ack(tx);
  520. addr = dma_map_single(dma_chan->device->dev, src, IOAT_TEST_SIZE,
  521. DMA_TO_DEVICE);
  522. ioat_set_src(addr, tx, 0);
  523. addr = dma_map_single(dma_chan->device->dev, dest, IOAT_TEST_SIZE,
  524. DMA_FROM_DEVICE);
  525. ioat_set_dest(addr, tx, 0);
  526. cookie = ioat_tx_submit(tx);
  527. ioat_dma_memcpy_issue_pending(dma_chan);
  528. msleep(1);
  529. if (ioat_dma_is_complete(dma_chan, cookie, NULL, NULL) != DMA_SUCCESS) {
  530. printk(KERN_ERR "ioatdma: Self-test copy timed out, disabling\n");
  531. err = -ENODEV;
  532. goto free_resources;
  533. }
  534. if (memcmp(src, dest, IOAT_TEST_SIZE)) {
  535. printk(KERN_ERR "ioatdma: Self-test copy failed compare, disabling\n");
  536. err = -ENODEV;
  537. goto free_resources;
  538. }
  539. free_resources:
  540. ioat_dma_free_chan_resources(dma_chan);
  541. out:
  542. kfree(src);
  543. kfree(dest);
  544. return err;
  545. }
  546. static int __devinit ioat_probe(struct pci_dev *pdev,
  547. const struct pci_device_id *ent)
  548. {
  549. int err;
  550. unsigned long mmio_start, mmio_len;
  551. void __iomem *reg_base;
  552. struct ioat_device *device;
  553. err = pci_enable_device(pdev);
  554. if (err)
  555. goto err_enable_device;
  556. err = pci_set_dma_mask(pdev, DMA_64BIT_MASK);
  557. if (err)
  558. err = pci_set_dma_mask(pdev, DMA_32BIT_MASK);
  559. if (err)
  560. goto err_set_dma_mask;
  561. err = pci_request_regions(pdev, ioat_pci_driver.name);
  562. if (err)
  563. goto err_request_regions;
  564. mmio_start = pci_resource_start(pdev, 0);
  565. mmio_len = pci_resource_len(pdev, 0);
  566. reg_base = ioremap(mmio_start, mmio_len);
  567. if (!reg_base) {
  568. err = -ENOMEM;
  569. goto err_ioremap;
  570. }
  571. device = kzalloc(sizeof(*device), GFP_KERNEL);
  572. if (!device) {
  573. err = -ENOMEM;
  574. goto err_kzalloc;
  575. }
  576. /* DMA coherent memory pool for DMA descriptor allocations */
  577. device->dma_pool = pci_pool_create("dma_desc_pool", pdev,
  578. sizeof(struct ioat_dma_descriptor), 64, 0);
  579. if (!device->dma_pool) {
  580. err = -ENOMEM;
  581. goto err_dma_pool;
  582. }
  583. device->completion_pool = pci_pool_create("completion_pool", pdev, sizeof(u64), SMP_CACHE_BYTES, SMP_CACHE_BYTES);
  584. if (!device->completion_pool) {
  585. err = -ENOMEM;
  586. goto err_completion_pool;
  587. }
  588. device->pdev = pdev;
  589. pci_set_drvdata(pdev, device);
  590. #ifdef CONFIG_PCI_MSI
  591. if (pci_enable_msi(pdev) == 0) {
  592. device->msi = 1;
  593. } else {
  594. device->msi = 0;
  595. }
  596. #endif
  597. err = request_irq(pdev->irq, &ioat_do_interrupt, IRQF_SHARED, "ioat",
  598. device);
  599. if (err)
  600. goto err_irq;
  601. device->reg_base = reg_base;
  602. writeb(IOAT_INTRCTRL_MASTER_INT_EN, device->reg_base + IOAT_INTRCTRL_OFFSET);
  603. pci_set_master(pdev);
  604. INIT_LIST_HEAD(&device->common.channels);
  605. enumerate_dma_channels(device);
  606. dma_cap_set(DMA_MEMCPY, device->common.cap_mask);
  607. device->common.device_alloc_chan_resources = ioat_dma_alloc_chan_resources;
  608. device->common.device_free_chan_resources = ioat_dma_free_chan_resources;
  609. device->common.device_prep_dma_memcpy = ioat_dma_prep_memcpy;
  610. device->common.device_is_tx_complete = ioat_dma_is_complete;
  611. device->common.device_issue_pending = ioat_dma_memcpy_issue_pending;
  612. device->common.device_dependency_added = ioat_dma_dependency_added;
  613. device->common.dev = &pdev->dev;
  614. printk(KERN_INFO "Intel(R) I/OAT DMA Engine found, %d channels\n",
  615. device->common.chancnt);
  616. err = ioat_self_test(device);
  617. if (err)
  618. goto err_self_test;
  619. dma_async_device_register(&device->common);
  620. return 0;
  621. err_self_test:
  622. err_irq:
  623. pci_pool_destroy(device->completion_pool);
  624. err_completion_pool:
  625. pci_pool_destroy(device->dma_pool);
  626. err_dma_pool:
  627. kfree(device);
  628. err_kzalloc:
  629. iounmap(reg_base);
  630. err_ioremap:
  631. pci_release_regions(pdev);
  632. err_request_regions:
  633. err_set_dma_mask:
  634. pci_disable_device(pdev);
  635. err_enable_device:
  636. printk(KERN_ERR "Intel(R) I/OAT DMA Engine initialization failed\n");
  637. return err;
  638. }
  639. static void ioat_shutdown(struct pci_dev *pdev)
  640. {
  641. struct ioat_device *device;
  642. device = pci_get_drvdata(pdev);
  643. dma_async_device_unregister(&device->common);
  644. }
  645. static void __devexit ioat_remove(struct pci_dev *pdev)
  646. {
  647. struct ioat_device *device;
  648. struct dma_chan *chan, *_chan;
  649. struct ioat_dma_chan *ioat_chan;
  650. device = pci_get_drvdata(pdev);
  651. dma_async_device_unregister(&device->common);
  652. free_irq(device->pdev->irq, device);
  653. #ifdef CONFIG_PCI_MSI
  654. if (device->msi)
  655. pci_disable_msi(device->pdev);
  656. #endif
  657. pci_pool_destroy(device->dma_pool);
  658. pci_pool_destroy(device->completion_pool);
  659. iounmap(device->reg_base);
  660. pci_release_regions(pdev);
  661. pci_disable_device(pdev);
  662. list_for_each_entry_safe(chan, _chan, &device->common.channels, device_node) {
  663. ioat_chan = to_ioat_chan(chan);
  664. list_del(&chan->device_node);
  665. kfree(ioat_chan);
  666. }
  667. kfree(device);
  668. }
  669. /* MODULE API */
  670. MODULE_VERSION("1.9");
  671. MODULE_LICENSE("GPL");
  672. MODULE_AUTHOR("Intel Corporation");
  673. static int __init ioat_init_module(void)
  674. {
  675. /* it's currently unsafe to unload this module */
  676. /* if forced, worst case is that rmmod hangs */
  677. __unsafe(THIS_MODULE);
  678. return pci_register_driver(&ioat_pci_driver);
  679. }
  680. module_init(ioat_init_module);
  681. static void __exit ioat_exit_module(void)
  682. {
  683. pci_unregister_driver(&ioat_pci_driver);
  684. }
  685. module_exit(ioat_exit_module);