dma.c 30 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140
  1. /*
  2. * Intel I/OAT DMA Linux driver
  3. * Copyright(c) 2004 - 2009 Intel Corporation.
  4. *
  5. * This program is free software; you can redistribute it and/or modify it
  6. * under the terms and conditions of the GNU General Public License,
  7. * version 2, as published by the Free Software Foundation.
  8. *
  9. * This program is distributed in the hope that it will be useful, but WITHOUT
  10. * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11. * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
  12. * more details.
  13. *
  14. * You should have received a copy of the GNU General Public License along with
  15. * this program; if not, write to the Free Software Foundation, Inc.,
  16. * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
  17. *
  18. * The full GNU General Public License is included in this distribution in
  19. * the file called "COPYING".
  20. *
  21. */
  22. /*
  23. * This driver supports an Intel I/OAT DMA engine, which does asynchronous
  24. * copy operations.
  25. */
  26. #include <linux/init.h>
  27. #include <linux/module.h>
  28. #include <linux/pci.h>
  29. #include <linux/interrupt.h>
  30. #include <linux/dmaengine.h>
  31. #include <linux/delay.h>
  32. #include <linux/dma-mapping.h>
  33. #include <linux/workqueue.h>
  34. #include <linux/i7300_idle.h>
  35. #include "dma.h"
  36. #include "registers.h"
  37. #include "hw.h"
  38. int ioat_pending_level = 4;
  39. module_param(ioat_pending_level, int, 0644);
  40. MODULE_PARM_DESC(ioat_pending_level,
  41. "high-water mark for pushing ioat descriptors (default: 4)");
  42. /* internal functions */
  43. static void ioat1_cleanup(struct ioat_dma_chan *ioat);
  44. static void ioat1_dma_start_null_desc(struct ioat_dma_chan *ioat);
  45. /**
  46. * ioat_dma_do_interrupt - handler used for single vector interrupt mode
  47. * @irq: interrupt id
  48. * @data: interrupt data
  49. */
  50. static irqreturn_t ioat_dma_do_interrupt(int irq, void *data)
  51. {
  52. struct ioatdma_device *instance = data;
  53. struct ioat_chan_common *chan;
  54. unsigned long attnstatus;
  55. int bit;
  56. u8 intrctrl;
  57. intrctrl = readb(instance->reg_base + IOAT_INTRCTRL_OFFSET);
  58. if (!(intrctrl & IOAT_INTRCTRL_MASTER_INT_EN))
  59. return IRQ_NONE;
  60. if (!(intrctrl & IOAT_INTRCTRL_INT_STATUS)) {
  61. writeb(intrctrl, instance->reg_base + IOAT_INTRCTRL_OFFSET);
  62. return IRQ_NONE;
  63. }
  64. attnstatus = readl(instance->reg_base + IOAT_ATTNSTATUS_OFFSET);
  65. for_each_bit(bit, &attnstatus, BITS_PER_LONG) {
  66. chan = ioat_chan_by_index(instance, bit);
  67. tasklet_schedule(&chan->cleanup_task);
  68. }
  69. writeb(intrctrl, instance->reg_base + IOAT_INTRCTRL_OFFSET);
  70. return IRQ_HANDLED;
  71. }
  72. /**
  73. * ioat_dma_do_interrupt_msix - handler used for vector-per-channel interrupt mode
  74. * @irq: interrupt id
  75. * @data: interrupt data
  76. */
  77. static irqreturn_t ioat_dma_do_interrupt_msix(int irq, void *data)
  78. {
  79. struct ioat_chan_common *chan = data;
  80. tasklet_schedule(&chan->cleanup_task);
  81. return IRQ_HANDLED;
  82. }
  83. static void ioat1_cleanup_tasklet(unsigned long data);
  84. /* common channel initialization */
  85. void ioat_init_channel(struct ioatdma_device *device,
  86. struct ioat_chan_common *chan, int idx,
  87. void (*timer_fn)(unsigned long),
  88. void (*tasklet)(unsigned long),
  89. unsigned long ioat)
  90. {
  91. struct dma_device *dma = &device->common;
  92. chan->device = device;
  93. chan->reg_base = device->reg_base + (0x80 * (idx + 1));
  94. spin_lock_init(&chan->cleanup_lock);
  95. chan->common.device = dma;
  96. list_add_tail(&chan->common.device_node, &dma->channels);
  97. device->idx[idx] = chan;
  98. init_timer(&chan->timer);
  99. chan->timer.function = timer_fn;
  100. chan->timer.data = ioat;
  101. tasklet_init(&chan->cleanup_task, tasklet, ioat);
  102. tasklet_disable(&chan->cleanup_task);
  103. }
  104. static void ioat1_timer_event(unsigned long data);
  105. /**
  106. * ioat1_dma_enumerate_channels - find and initialize the device's channels
  107. * @device: the device to be enumerated
  108. */
  109. static int ioat1_enumerate_channels(struct ioatdma_device *device)
  110. {
  111. u8 xfercap_scale;
  112. u32 xfercap;
  113. int i;
  114. struct ioat_dma_chan *ioat;
  115. struct device *dev = &device->pdev->dev;
  116. struct dma_device *dma = &device->common;
  117. INIT_LIST_HEAD(&dma->channels);
  118. dma->chancnt = readb(device->reg_base + IOAT_CHANCNT_OFFSET);
  119. dma->chancnt &= 0x1f; /* bits [4:0] valid */
  120. if (dma->chancnt > ARRAY_SIZE(device->idx)) {
  121. dev_warn(dev, "(%d) exceeds max supported channels (%zu)\n",
  122. dma->chancnt, ARRAY_SIZE(device->idx));
  123. dma->chancnt = ARRAY_SIZE(device->idx);
  124. }
  125. xfercap_scale = readb(device->reg_base + IOAT_XFERCAP_OFFSET);
  126. xfercap_scale &= 0x1f; /* bits [4:0] valid */
  127. xfercap = (xfercap_scale == 0 ? -1 : (1UL << xfercap_scale));
  128. dev_dbg(dev, "%s: xfercap = %d\n", __func__, xfercap);
  129. #ifdef CONFIG_I7300_IDLE_IOAT_CHANNEL
  130. if (i7300_idle_platform_probe(NULL, NULL, 1) == 0)
  131. dma->chancnt--;
  132. #endif
  133. for (i = 0; i < dma->chancnt; i++) {
  134. ioat = devm_kzalloc(dev, sizeof(*ioat), GFP_KERNEL);
  135. if (!ioat)
  136. break;
  137. ioat_init_channel(device, &ioat->base, i,
  138. ioat1_timer_event,
  139. ioat1_cleanup_tasklet,
  140. (unsigned long) ioat);
  141. ioat->xfercap = xfercap;
  142. spin_lock_init(&ioat->desc_lock);
  143. INIT_LIST_HEAD(&ioat->free_desc);
  144. INIT_LIST_HEAD(&ioat->used_desc);
  145. }
  146. dma->chancnt = i;
  147. return i;
  148. }
  149. /**
  150. * ioat_dma_memcpy_issue_pending - push potentially unrecognized appended
  151. * descriptors to hw
  152. * @chan: DMA channel handle
  153. */
  154. static inline void
  155. __ioat1_dma_memcpy_issue_pending(struct ioat_dma_chan *ioat)
  156. {
  157. void __iomem *reg_base = ioat->base.reg_base;
  158. dev_dbg(to_dev(&ioat->base), "%s: pending: %d\n",
  159. __func__, ioat->pending);
  160. ioat->pending = 0;
  161. writeb(IOAT_CHANCMD_APPEND, reg_base + IOAT1_CHANCMD_OFFSET);
  162. }
  163. static void ioat1_dma_memcpy_issue_pending(struct dma_chan *chan)
  164. {
  165. struct ioat_dma_chan *ioat = to_ioat_chan(chan);
  166. if (ioat->pending > 0) {
  167. spin_lock_bh(&ioat->desc_lock);
  168. __ioat1_dma_memcpy_issue_pending(ioat);
  169. spin_unlock_bh(&ioat->desc_lock);
  170. }
  171. }
  172. /**
  173. * ioat1_reset_channel - restart a channel
  174. * @ioat: IOAT DMA channel handle
  175. */
  176. static void ioat1_reset_channel(struct ioat_dma_chan *ioat)
  177. {
  178. struct ioat_chan_common *chan = &ioat->base;
  179. void __iomem *reg_base = chan->reg_base;
  180. u32 chansts, chanerr;
  181. dev_warn(to_dev(chan), "reset\n");
  182. chanerr = readl(reg_base + IOAT_CHANERR_OFFSET);
  183. chansts = *chan->completion & IOAT_CHANSTS_STATUS;
  184. if (chanerr) {
  185. dev_err(to_dev(chan),
  186. "chan%d, CHANSTS = 0x%08x CHANERR = 0x%04x, clearing\n",
  187. chan_num(chan), chansts, chanerr);
  188. writel(chanerr, reg_base + IOAT_CHANERR_OFFSET);
  189. }
  190. /*
  191. * whack it upside the head with a reset
  192. * and wait for things to settle out.
  193. * force the pending count to a really big negative
  194. * to make sure no one forces an issue_pending
  195. * while we're waiting.
  196. */
  197. ioat->pending = INT_MIN;
  198. writeb(IOAT_CHANCMD_RESET,
  199. reg_base + IOAT_CHANCMD_OFFSET(chan->device->version));
  200. set_bit(IOAT_RESET_PENDING, &chan->state);
  201. mod_timer(&chan->timer, jiffies + RESET_DELAY);
  202. }
  203. static dma_cookie_t ioat1_tx_submit(struct dma_async_tx_descriptor *tx)
  204. {
  205. struct dma_chan *c = tx->chan;
  206. struct ioat_dma_chan *ioat = to_ioat_chan(c);
  207. struct ioat_desc_sw *desc = tx_to_ioat_desc(tx);
  208. struct ioat_chan_common *chan = &ioat->base;
  209. struct ioat_desc_sw *first;
  210. struct ioat_desc_sw *chain_tail;
  211. dma_cookie_t cookie;
  212. spin_lock_bh(&ioat->desc_lock);
  213. /* cookie incr and addition to used_list must be atomic */
  214. cookie = c->cookie;
  215. cookie++;
  216. if (cookie < 0)
  217. cookie = 1;
  218. c->cookie = cookie;
  219. tx->cookie = cookie;
  220. dev_dbg(to_dev(&ioat->base), "%s: cookie: %d\n", __func__, cookie);
  221. /* write address into NextDescriptor field of last desc in chain */
  222. first = to_ioat_desc(desc->tx_list.next);
  223. chain_tail = to_ioat_desc(ioat->used_desc.prev);
  224. /* make descriptor updates globally visible before chaining */
  225. wmb();
  226. chain_tail->hw->next = first->txd.phys;
  227. list_splice_tail_init(&desc->tx_list, &ioat->used_desc);
  228. dump_desc_dbg(ioat, chain_tail);
  229. dump_desc_dbg(ioat, first);
  230. if (!test_and_set_bit(IOAT_COMPLETION_PENDING, &chan->state))
  231. mod_timer(&chan->timer, jiffies + COMPLETION_TIMEOUT);
  232. ioat->pending += desc->hw->tx_cnt;
  233. if (ioat->pending >= ioat_pending_level)
  234. __ioat1_dma_memcpy_issue_pending(ioat);
  235. spin_unlock_bh(&ioat->desc_lock);
  236. return cookie;
  237. }
  238. /**
  239. * ioat_dma_alloc_descriptor - allocate and return a sw and hw descriptor pair
  240. * @ioat: the channel supplying the memory pool for the descriptors
  241. * @flags: allocation flags
  242. */
  243. static struct ioat_desc_sw *
  244. ioat_dma_alloc_descriptor(struct ioat_dma_chan *ioat, gfp_t flags)
  245. {
  246. struct ioat_dma_descriptor *desc;
  247. struct ioat_desc_sw *desc_sw;
  248. struct ioatdma_device *ioatdma_device;
  249. dma_addr_t phys;
  250. ioatdma_device = ioat->base.device;
  251. desc = pci_pool_alloc(ioatdma_device->dma_pool, flags, &phys);
  252. if (unlikely(!desc))
  253. return NULL;
  254. desc_sw = kzalloc(sizeof(*desc_sw), flags);
  255. if (unlikely(!desc_sw)) {
  256. pci_pool_free(ioatdma_device->dma_pool, desc, phys);
  257. return NULL;
  258. }
  259. memset(desc, 0, sizeof(*desc));
  260. INIT_LIST_HEAD(&desc_sw->tx_list);
  261. dma_async_tx_descriptor_init(&desc_sw->txd, &ioat->base.common);
  262. desc_sw->txd.tx_submit = ioat1_tx_submit;
  263. desc_sw->hw = desc;
  264. desc_sw->txd.phys = phys;
  265. set_desc_id(desc_sw, -1);
  266. return desc_sw;
  267. }
  268. static int ioat_initial_desc_count = 256;
  269. module_param(ioat_initial_desc_count, int, 0644);
  270. MODULE_PARM_DESC(ioat_initial_desc_count,
  271. "ioat1: initial descriptors per channel (default: 256)");
  272. /**
  273. * ioat1_dma_alloc_chan_resources - returns the number of allocated descriptors
  274. * @chan: the channel to be filled out
  275. */
  276. static int ioat1_dma_alloc_chan_resources(struct dma_chan *c)
  277. {
  278. struct ioat_dma_chan *ioat = to_ioat_chan(c);
  279. struct ioat_chan_common *chan = &ioat->base;
  280. struct ioat_desc_sw *desc;
  281. u32 chanerr;
  282. int i;
  283. LIST_HEAD(tmp_list);
  284. /* have we already been set up? */
  285. if (!list_empty(&ioat->free_desc))
  286. return ioat->desccount;
  287. /* Setup register to interrupt and write completion status on error */
  288. writew(IOAT_CHANCTRL_RUN, chan->reg_base + IOAT_CHANCTRL_OFFSET);
  289. chanerr = readl(chan->reg_base + IOAT_CHANERR_OFFSET);
  290. if (chanerr) {
  291. dev_err(to_dev(chan), "CHANERR = %x, clearing\n", chanerr);
  292. writel(chanerr, chan->reg_base + IOAT_CHANERR_OFFSET);
  293. }
  294. /* Allocate descriptors */
  295. for (i = 0; i < ioat_initial_desc_count; i++) {
  296. desc = ioat_dma_alloc_descriptor(ioat, GFP_KERNEL);
  297. if (!desc) {
  298. dev_err(to_dev(chan), "Only %d initial descriptors\n", i);
  299. break;
  300. }
  301. set_desc_id(desc, i);
  302. list_add_tail(&desc->node, &tmp_list);
  303. }
  304. spin_lock_bh(&ioat->desc_lock);
  305. ioat->desccount = i;
  306. list_splice(&tmp_list, &ioat->free_desc);
  307. spin_unlock_bh(&ioat->desc_lock);
  308. /* allocate a completion writeback area */
  309. /* doing 2 32bit writes to mmio since 1 64b write doesn't work */
  310. chan->completion = pci_pool_alloc(chan->device->completion_pool,
  311. GFP_KERNEL, &chan->completion_dma);
  312. memset(chan->completion, 0, sizeof(*chan->completion));
  313. writel(((u64) chan->completion_dma) & 0x00000000FFFFFFFF,
  314. chan->reg_base + IOAT_CHANCMP_OFFSET_LOW);
  315. writel(((u64) chan->completion_dma) >> 32,
  316. chan->reg_base + IOAT_CHANCMP_OFFSET_HIGH);
  317. tasklet_enable(&chan->cleanup_task);
  318. ioat1_dma_start_null_desc(ioat); /* give chain to dma device */
  319. dev_dbg(to_dev(chan), "%s: allocated %d descriptors\n",
  320. __func__, ioat->desccount);
  321. return ioat->desccount;
  322. }
  323. /**
  324. * ioat1_dma_free_chan_resources - release all the descriptors
  325. * @chan: the channel to be cleaned
  326. */
  327. static void ioat1_dma_free_chan_resources(struct dma_chan *c)
  328. {
  329. struct ioat_dma_chan *ioat = to_ioat_chan(c);
  330. struct ioat_chan_common *chan = &ioat->base;
  331. struct ioatdma_device *ioatdma_device = chan->device;
  332. struct ioat_desc_sw *desc, *_desc;
  333. int in_use_descs = 0;
  334. /* Before freeing channel resources first check
  335. * if they have been previously allocated for this channel.
  336. */
  337. if (ioat->desccount == 0)
  338. return;
  339. tasklet_disable(&chan->cleanup_task);
  340. del_timer_sync(&chan->timer);
  341. ioat1_cleanup(ioat);
  342. /* Delay 100ms after reset to allow internal DMA logic to quiesce
  343. * before removing DMA descriptor resources.
  344. */
  345. writeb(IOAT_CHANCMD_RESET,
  346. chan->reg_base + IOAT_CHANCMD_OFFSET(chan->device->version));
  347. mdelay(100);
  348. spin_lock_bh(&ioat->desc_lock);
  349. list_for_each_entry_safe(desc, _desc, &ioat->used_desc, node) {
  350. dev_dbg(to_dev(chan), "%s: freeing %d from used list\n",
  351. __func__, desc_id(desc));
  352. dump_desc_dbg(ioat, desc);
  353. in_use_descs++;
  354. list_del(&desc->node);
  355. pci_pool_free(ioatdma_device->dma_pool, desc->hw,
  356. desc->txd.phys);
  357. kfree(desc);
  358. }
  359. list_for_each_entry_safe(desc, _desc,
  360. &ioat->free_desc, node) {
  361. list_del(&desc->node);
  362. pci_pool_free(ioatdma_device->dma_pool, desc->hw,
  363. desc->txd.phys);
  364. kfree(desc);
  365. }
  366. spin_unlock_bh(&ioat->desc_lock);
  367. pci_pool_free(ioatdma_device->completion_pool,
  368. chan->completion,
  369. chan->completion_dma);
  370. /* one is ok since we left it on there on purpose */
  371. if (in_use_descs > 1)
  372. dev_err(to_dev(chan), "Freeing %d in use descriptors!\n",
  373. in_use_descs - 1);
  374. chan->last_completion = 0;
  375. chan->completion_dma = 0;
  376. ioat->pending = 0;
  377. ioat->desccount = 0;
  378. }
  379. /**
  380. * ioat1_dma_get_next_descriptor - return the next available descriptor
  381. * @ioat: IOAT DMA channel handle
  382. *
  383. * Gets the next descriptor from the chain, and must be called with the
  384. * channel's desc_lock held. Allocates more descriptors if the channel
  385. * has run out.
  386. */
  387. static struct ioat_desc_sw *
  388. ioat1_dma_get_next_descriptor(struct ioat_dma_chan *ioat)
  389. {
  390. struct ioat_desc_sw *new;
  391. if (!list_empty(&ioat->free_desc)) {
  392. new = to_ioat_desc(ioat->free_desc.next);
  393. list_del(&new->node);
  394. } else {
  395. /* try to get another desc */
  396. new = ioat_dma_alloc_descriptor(ioat, GFP_ATOMIC);
  397. if (!new) {
  398. dev_err(to_dev(&ioat->base), "alloc failed\n");
  399. return NULL;
  400. }
  401. }
  402. dev_dbg(to_dev(&ioat->base), "%s: allocated: %d\n",
  403. __func__, desc_id(new));
  404. prefetch(new->hw);
  405. return new;
  406. }
  407. static struct dma_async_tx_descriptor *
  408. ioat1_dma_prep_memcpy(struct dma_chan *c, dma_addr_t dma_dest,
  409. dma_addr_t dma_src, size_t len, unsigned long flags)
  410. {
  411. struct ioat_dma_chan *ioat = to_ioat_chan(c);
  412. struct ioat_desc_sw *desc;
  413. size_t copy;
  414. LIST_HEAD(chain);
  415. dma_addr_t src = dma_src;
  416. dma_addr_t dest = dma_dest;
  417. size_t total_len = len;
  418. struct ioat_dma_descriptor *hw = NULL;
  419. int tx_cnt = 0;
  420. spin_lock_bh(&ioat->desc_lock);
  421. desc = ioat1_dma_get_next_descriptor(ioat);
  422. do {
  423. if (!desc)
  424. break;
  425. tx_cnt++;
  426. copy = min_t(size_t, len, ioat->xfercap);
  427. hw = desc->hw;
  428. hw->size = copy;
  429. hw->ctl = 0;
  430. hw->src_addr = src;
  431. hw->dst_addr = dest;
  432. list_add_tail(&desc->node, &chain);
  433. len -= copy;
  434. dest += copy;
  435. src += copy;
  436. if (len) {
  437. struct ioat_desc_sw *next;
  438. async_tx_ack(&desc->txd);
  439. next = ioat1_dma_get_next_descriptor(ioat);
  440. hw->next = next ? next->txd.phys : 0;
  441. dump_desc_dbg(ioat, desc);
  442. desc = next;
  443. } else
  444. hw->next = 0;
  445. } while (len);
  446. if (!desc) {
  447. struct ioat_chan_common *chan = &ioat->base;
  448. dev_err(to_dev(chan),
  449. "chan%d - get_next_desc failed\n", chan_num(chan));
  450. list_splice(&chain, &ioat->free_desc);
  451. spin_unlock_bh(&ioat->desc_lock);
  452. return NULL;
  453. }
  454. spin_unlock_bh(&ioat->desc_lock);
  455. desc->txd.flags = flags;
  456. desc->len = total_len;
  457. list_splice(&chain, &desc->tx_list);
  458. hw->ctl_f.int_en = !!(flags & DMA_PREP_INTERRUPT);
  459. hw->ctl_f.compl_write = 1;
  460. hw->tx_cnt = tx_cnt;
  461. dump_desc_dbg(ioat, desc);
  462. return &desc->txd;
  463. }
  464. static void ioat1_cleanup_tasklet(unsigned long data)
  465. {
  466. struct ioat_dma_chan *chan = (void *)data;
  467. ioat1_cleanup(chan);
  468. writew(IOAT_CHANCTRL_RUN, chan->base.reg_base + IOAT_CHANCTRL_OFFSET);
  469. }
  470. static void ioat_unmap(struct pci_dev *pdev, dma_addr_t addr, size_t len,
  471. int direction, enum dma_ctrl_flags flags, bool dst)
  472. {
  473. if ((dst && (flags & DMA_COMPL_DEST_UNMAP_SINGLE)) ||
  474. (!dst && (flags & DMA_COMPL_SRC_UNMAP_SINGLE)))
  475. pci_unmap_single(pdev, addr, len, direction);
  476. else
  477. pci_unmap_page(pdev, addr, len, direction);
  478. }
  479. void ioat_dma_unmap(struct ioat_chan_common *chan, enum dma_ctrl_flags flags,
  480. size_t len, struct ioat_dma_descriptor *hw)
  481. {
  482. struct pci_dev *pdev = chan->device->pdev;
  483. size_t offset = len - hw->size;
  484. if (!(flags & DMA_COMPL_SKIP_DEST_UNMAP))
  485. ioat_unmap(pdev, hw->dst_addr - offset, len,
  486. PCI_DMA_FROMDEVICE, flags, 1);
  487. if (!(flags & DMA_COMPL_SKIP_SRC_UNMAP))
  488. ioat_unmap(pdev, hw->src_addr - offset, len,
  489. PCI_DMA_TODEVICE, flags, 0);
  490. }
  491. unsigned long ioat_get_current_completion(struct ioat_chan_common *chan)
  492. {
  493. unsigned long phys_complete;
  494. u64 completion;
  495. completion = *chan->completion;
  496. phys_complete = ioat_chansts_to_addr(completion);
  497. dev_dbg(to_dev(chan), "%s: phys_complete: %#llx\n", __func__,
  498. (unsigned long long) phys_complete);
  499. if (is_ioat_halted(completion)) {
  500. u32 chanerr = readl(chan->reg_base + IOAT_CHANERR_OFFSET);
  501. dev_err(to_dev(chan), "Channel halted, chanerr = %x\n",
  502. chanerr);
  503. /* TODO do something to salvage the situation */
  504. }
  505. return phys_complete;
  506. }
  507. bool ioat_cleanup_preamble(struct ioat_chan_common *chan,
  508. unsigned long *phys_complete)
  509. {
  510. *phys_complete = ioat_get_current_completion(chan);
  511. if (*phys_complete == chan->last_completion)
  512. return false;
  513. clear_bit(IOAT_COMPLETION_ACK, &chan->state);
  514. mod_timer(&chan->timer, jiffies + COMPLETION_TIMEOUT);
  515. return true;
  516. }
  517. static void __cleanup(struct ioat_dma_chan *ioat, unsigned long phys_complete)
  518. {
  519. struct ioat_chan_common *chan = &ioat->base;
  520. struct list_head *_desc, *n;
  521. struct dma_async_tx_descriptor *tx;
  522. dev_dbg(to_dev(chan), "%s: phys_complete: %lx\n",
  523. __func__, phys_complete);
  524. list_for_each_safe(_desc, n, &ioat->used_desc) {
  525. struct ioat_desc_sw *desc;
  526. prefetch(n);
  527. desc = list_entry(_desc, typeof(*desc), node);
  528. tx = &desc->txd;
  529. /*
  530. * Incoming DMA requests may use multiple descriptors,
  531. * due to exceeding xfercap, perhaps. If so, only the
  532. * last one will have a cookie, and require unmapping.
  533. */
  534. dump_desc_dbg(ioat, desc);
  535. if (tx->cookie) {
  536. chan->completed_cookie = tx->cookie;
  537. tx->cookie = 0;
  538. ioat_dma_unmap(chan, tx->flags, desc->len, desc->hw);
  539. if (tx->callback) {
  540. tx->callback(tx->callback_param);
  541. tx->callback = NULL;
  542. }
  543. }
  544. if (tx->phys != phys_complete) {
  545. /*
  546. * a completed entry, but not the last, so clean
  547. * up if the client is done with the descriptor
  548. */
  549. if (async_tx_test_ack(tx))
  550. list_move_tail(&desc->node, &ioat->free_desc);
  551. } else {
  552. /*
  553. * last used desc. Do not remove, so we can
  554. * append from it.
  555. */
  556. /* if nothing else is pending, cancel the
  557. * completion timeout
  558. */
  559. if (n == &ioat->used_desc) {
  560. dev_dbg(to_dev(chan),
  561. "%s cancel completion timeout\n",
  562. __func__);
  563. clear_bit(IOAT_COMPLETION_PENDING, &chan->state);
  564. }
  565. /* TODO check status bits? */
  566. break;
  567. }
  568. }
  569. chan->last_completion = phys_complete;
  570. }
  571. /**
  572. * ioat1_cleanup - cleanup up finished descriptors
  573. * @chan: ioat channel to be cleaned up
  574. *
  575. * To prevent lock contention we defer cleanup when the locks are
  576. * contended with a terminal timeout that forces cleanup and catches
  577. * completion notification errors.
  578. */
  579. static void ioat1_cleanup(struct ioat_dma_chan *ioat)
  580. {
  581. struct ioat_chan_common *chan = &ioat->base;
  582. unsigned long phys_complete;
  583. prefetch(chan->completion);
  584. if (!spin_trylock_bh(&chan->cleanup_lock))
  585. return;
  586. if (!ioat_cleanup_preamble(chan, &phys_complete)) {
  587. spin_unlock_bh(&chan->cleanup_lock);
  588. return;
  589. }
  590. if (!spin_trylock_bh(&ioat->desc_lock)) {
  591. spin_unlock_bh(&chan->cleanup_lock);
  592. return;
  593. }
  594. __cleanup(ioat, phys_complete);
  595. spin_unlock_bh(&ioat->desc_lock);
  596. spin_unlock_bh(&chan->cleanup_lock);
  597. }
  598. static void ioat1_timer_event(unsigned long data)
  599. {
  600. struct ioat_dma_chan *ioat = (void *) data;
  601. struct ioat_chan_common *chan = &ioat->base;
  602. dev_dbg(to_dev(chan), "%s: state: %lx\n", __func__, chan->state);
  603. spin_lock_bh(&chan->cleanup_lock);
  604. if (test_and_clear_bit(IOAT_RESET_PENDING, &chan->state)) {
  605. struct ioat_desc_sw *desc;
  606. spin_lock_bh(&ioat->desc_lock);
  607. /* restart active descriptors */
  608. desc = to_ioat_desc(ioat->used_desc.prev);
  609. ioat_set_chainaddr(ioat, desc->txd.phys);
  610. ioat_start(chan);
  611. ioat->pending = 0;
  612. set_bit(IOAT_COMPLETION_PENDING, &chan->state);
  613. mod_timer(&chan->timer, jiffies + COMPLETION_TIMEOUT);
  614. spin_unlock_bh(&ioat->desc_lock);
  615. } else if (test_bit(IOAT_COMPLETION_PENDING, &chan->state)) {
  616. unsigned long phys_complete;
  617. spin_lock_bh(&ioat->desc_lock);
  618. /* if we haven't made progress and we have already
  619. * acknowledged a pending completion once, then be more
  620. * forceful with a restart
  621. */
  622. if (ioat_cleanup_preamble(chan, &phys_complete))
  623. __cleanup(ioat, phys_complete);
  624. else if (test_bit(IOAT_COMPLETION_ACK, &chan->state))
  625. ioat1_reset_channel(ioat);
  626. else {
  627. u64 status = ioat_chansts(chan);
  628. /* manually update the last completion address */
  629. if (ioat_chansts_to_addr(status) != 0)
  630. *chan->completion = status;
  631. set_bit(IOAT_COMPLETION_ACK, &chan->state);
  632. mod_timer(&chan->timer, jiffies + COMPLETION_TIMEOUT);
  633. }
  634. spin_unlock_bh(&ioat->desc_lock);
  635. }
  636. spin_unlock_bh(&chan->cleanup_lock);
  637. }
  638. static enum dma_status
  639. ioat1_dma_is_complete(struct dma_chan *c, dma_cookie_t cookie,
  640. dma_cookie_t *done, dma_cookie_t *used)
  641. {
  642. struct ioat_dma_chan *ioat = to_ioat_chan(c);
  643. if (ioat_is_complete(c, cookie, done, used) == DMA_SUCCESS)
  644. return DMA_SUCCESS;
  645. ioat1_cleanup(ioat);
  646. return ioat_is_complete(c, cookie, done, used);
  647. }
  648. static void ioat1_dma_start_null_desc(struct ioat_dma_chan *ioat)
  649. {
  650. struct ioat_chan_common *chan = &ioat->base;
  651. struct ioat_desc_sw *desc;
  652. struct ioat_dma_descriptor *hw;
  653. spin_lock_bh(&ioat->desc_lock);
  654. desc = ioat1_dma_get_next_descriptor(ioat);
  655. if (!desc) {
  656. dev_err(to_dev(chan),
  657. "Unable to start null desc - get next desc failed\n");
  658. spin_unlock_bh(&ioat->desc_lock);
  659. return;
  660. }
  661. hw = desc->hw;
  662. hw->ctl = 0;
  663. hw->ctl_f.null = 1;
  664. hw->ctl_f.int_en = 1;
  665. hw->ctl_f.compl_write = 1;
  666. /* set size to non-zero value (channel returns error when size is 0) */
  667. hw->size = NULL_DESC_BUFFER_SIZE;
  668. hw->src_addr = 0;
  669. hw->dst_addr = 0;
  670. async_tx_ack(&desc->txd);
  671. hw->next = 0;
  672. list_add_tail(&desc->node, &ioat->used_desc);
  673. dump_desc_dbg(ioat, desc);
  674. ioat_set_chainaddr(ioat, desc->txd.phys);
  675. ioat_start(chan);
  676. spin_unlock_bh(&ioat->desc_lock);
  677. }
  678. /*
  679. * Perform a IOAT transaction to verify the HW works.
  680. */
  681. #define IOAT_TEST_SIZE 2000
  682. static void __devinit ioat_dma_test_callback(void *dma_async_param)
  683. {
  684. struct completion *cmp = dma_async_param;
  685. complete(cmp);
  686. }
  687. /**
  688. * ioat_dma_self_test - Perform a IOAT transaction to verify the HW works.
  689. * @device: device to be tested
  690. */
  691. static int __devinit ioat_dma_self_test(struct ioatdma_device *device)
  692. {
  693. int i;
  694. u8 *src;
  695. u8 *dest;
  696. struct dma_device *dma = &device->common;
  697. struct device *dev = &device->pdev->dev;
  698. struct dma_chan *dma_chan;
  699. struct dma_async_tx_descriptor *tx;
  700. dma_addr_t dma_dest, dma_src;
  701. dma_cookie_t cookie;
  702. int err = 0;
  703. struct completion cmp;
  704. unsigned long tmo;
  705. unsigned long flags;
  706. src = kzalloc(sizeof(u8) * IOAT_TEST_SIZE, GFP_KERNEL);
  707. if (!src)
  708. return -ENOMEM;
  709. dest = kzalloc(sizeof(u8) * IOAT_TEST_SIZE, GFP_KERNEL);
  710. if (!dest) {
  711. kfree(src);
  712. return -ENOMEM;
  713. }
  714. /* Fill in src buffer */
  715. for (i = 0; i < IOAT_TEST_SIZE; i++)
  716. src[i] = (u8)i;
  717. /* Start copy, using first DMA channel */
  718. dma_chan = container_of(dma->channels.next, struct dma_chan,
  719. device_node);
  720. if (dma->device_alloc_chan_resources(dma_chan) < 1) {
  721. dev_err(dev, "selftest cannot allocate chan resource\n");
  722. err = -ENODEV;
  723. goto out;
  724. }
  725. dma_src = dma_map_single(dev, src, IOAT_TEST_SIZE, DMA_TO_DEVICE);
  726. dma_dest = dma_map_single(dev, dest, IOAT_TEST_SIZE, DMA_FROM_DEVICE);
  727. flags = DMA_COMPL_SRC_UNMAP_SINGLE | DMA_COMPL_DEST_UNMAP_SINGLE |
  728. DMA_PREP_INTERRUPT;
  729. tx = device->common.device_prep_dma_memcpy(dma_chan, dma_dest, dma_src,
  730. IOAT_TEST_SIZE, flags);
  731. if (!tx) {
  732. dev_err(dev, "Self-test prep failed, disabling\n");
  733. err = -ENODEV;
  734. goto free_resources;
  735. }
  736. async_tx_ack(tx);
  737. init_completion(&cmp);
  738. tx->callback = ioat_dma_test_callback;
  739. tx->callback_param = &cmp;
  740. cookie = tx->tx_submit(tx);
  741. if (cookie < 0) {
  742. dev_err(dev, "Self-test setup failed, disabling\n");
  743. err = -ENODEV;
  744. goto free_resources;
  745. }
  746. dma->device_issue_pending(dma_chan);
  747. tmo = wait_for_completion_timeout(&cmp, msecs_to_jiffies(3000));
  748. if (tmo == 0 ||
  749. dma->device_is_tx_complete(dma_chan, cookie, NULL, NULL)
  750. != DMA_SUCCESS) {
  751. dev_err(dev, "Self-test copy timed out, disabling\n");
  752. err = -ENODEV;
  753. goto free_resources;
  754. }
  755. if (memcmp(src, dest, IOAT_TEST_SIZE)) {
  756. dev_err(dev, "Self-test copy failed compare, disabling\n");
  757. err = -ENODEV;
  758. goto free_resources;
  759. }
  760. free_resources:
  761. dma->device_free_chan_resources(dma_chan);
  762. out:
  763. kfree(src);
  764. kfree(dest);
  765. return err;
  766. }
  767. static char ioat_interrupt_style[32] = "msix";
  768. module_param_string(ioat_interrupt_style, ioat_interrupt_style,
  769. sizeof(ioat_interrupt_style), 0644);
  770. MODULE_PARM_DESC(ioat_interrupt_style,
  771. "set ioat interrupt style: msix (default), "
  772. "msix-single-vector, msi, intx)");
  773. /**
  774. * ioat_dma_setup_interrupts - setup interrupt handler
  775. * @device: ioat device
  776. */
  777. static int ioat_dma_setup_interrupts(struct ioatdma_device *device)
  778. {
  779. struct ioat_chan_common *chan;
  780. struct pci_dev *pdev = device->pdev;
  781. struct device *dev = &pdev->dev;
  782. struct msix_entry *msix;
  783. int i, j, msixcnt;
  784. int err = -EINVAL;
  785. u8 intrctrl = 0;
  786. if (!strcmp(ioat_interrupt_style, "msix"))
  787. goto msix;
  788. if (!strcmp(ioat_interrupt_style, "msix-single-vector"))
  789. goto msix_single_vector;
  790. if (!strcmp(ioat_interrupt_style, "msi"))
  791. goto msi;
  792. if (!strcmp(ioat_interrupt_style, "intx"))
  793. goto intx;
  794. dev_err(dev, "invalid ioat_interrupt_style %s\n", ioat_interrupt_style);
  795. goto err_no_irq;
  796. msix:
  797. /* The number of MSI-X vectors should equal the number of channels */
  798. msixcnt = device->common.chancnt;
  799. for (i = 0; i < msixcnt; i++)
  800. device->msix_entries[i].entry = i;
  801. err = pci_enable_msix(pdev, device->msix_entries, msixcnt);
  802. if (err < 0)
  803. goto msi;
  804. if (err > 0)
  805. goto msix_single_vector;
  806. for (i = 0; i < msixcnt; i++) {
  807. msix = &device->msix_entries[i];
  808. chan = ioat_chan_by_index(device, i);
  809. err = devm_request_irq(dev, msix->vector,
  810. ioat_dma_do_interrupt_msix, 0,
  811. "ioat-msix", chan);
  812. if (err) {
  813. for (j = 0; j < i; j++) {
  814. msix = &device->msix_entries[j];
  815. chan = ioat_chan_by_index(device, j);
  816. devm_free_irq(dev, msix->vector, chan);
  817. }
  818. goto msix_single_vector;
  819. }
  820. }
  821. intrctrl |= IOAT_INTRCTRL_MSIX_VECTOR_CONTROL;
  822. goto done;
  823. msix_single_vector:
  824. msix = &device->msix_entries[0];
  825. msix->entry = 0;
  826. err = pci_enable_msix(pdev, device->msix_entries, 1);
  827. if (err)
  828. goto msi;
  829. err = devm_request_irq(dev, msix->vector, ioat_dma_do_interrupt, 0,
  830. "ioat-msix", device);
  831. if (err) {
  832. pci_disable_msix(pdev);
  833. goto msi;
  834. }
  835. goto done;
  836. msi:
  837. err = pci_enable_msi(pdev);
  838. if (err)
  839. goto intx;
  840. err = devm_request_irq(dev, pdev->irq, ioat_dma_do_interrupt, 0,
  841. "ioat-msi", device);
  842. if (err) {
  843. pci_disable_msi(pdev);
  844. goto intx;
  845. }
  846. goto done;
  847. intx:
  848. err = devm_request_irq(dev, pdev->irq, ioat_dma_do_interrupt,
  849. IRQF_SHARED, "ioat-intx", device);
  850. if (err)
  851. goto err_no_irq;
  852. done:
  853. if (device->intr_quirk)
  854. device->intr_quirk(device);
  855. intrctrl |= IOAT_INTRCTRL_MASTER_INT_EN;
  856. writeb(intrctrl, device->reg_base + IOAT_INTRCTRL_OFFSET);
  857. return 0;
  858. err_no_irq:
  859. /* Disable all interrupt generation */
  860. writeb(0, device->reg_base + IOAT_INTRCTRL_OFFSET);
  861. dev_err(dev, "no usable interrupts\n");
  862. return err;
  863. }
  864. static void ioat_disable_interrupts(struct ioatdma_device *device)
  865. {
  866. /* Disable all interrupt generation */
  867. writeb(0, device->reg_base + IOAT_INTRCTRL_OFFSET);
  868. }
  869. int __devinit ioat_probe(struct ioatdma_device *device)
  870. {
  871. int err = -ENODEV;
  872. struct dma_device *dma = &device->common;
  873. struct pci_dev *pdev = device->pdev;
  874. struct device *dev = &pdev->dev;
  875. /* DMA coherent memory pool for DMA descriptor allocations */
  876. device->dma_pool = pci_pool_create("dma_desc_pool", pdev,
  877. sizeof(struct ioat_dma_descriptor),
  878. 64, 0);
  879. if (!device->dma_pool) {
  880. err = -ENOMEM;
  881. goto err_dma_pool;
  882. }
  883. device->completion_pool = pci_pool_create("completion_pool", pdev,
  884. sizeof(u64), SMP_CACHE_BYTES,
  885. SMP_CACHE_BYTES);
  886. if (!device->completion_pool) {
  887. err = -ENOMEM;
  888. goto err_completion_pool;
  889. }
  890. device->enumerate_channels(device);
  891. dma_cap_set(DMA_MEMCPY, dma->cap_mask);
  892. dma->dev = &pdev->dev;
  893. dev_err(dev, "Intel(R) I/OAT DMA Engine found,"
  894. " %d channels, device version 0x%02x, driver version %s\n",
  895. dma->chancnt, device->version, IOAT_DMA_VERSION);
  896. if (!dma->chancnt) {
  897. dev_err(dev, "Intel(R) I/OAT DMA Engine problem found: "
  898. "zero channels detected\n");
  899. goto err_setup_interrupts;
  900. }
  901. err = ioat_dma_setup_interrupts(device);
  902. if (err)
  903. goto err_setup_interrupts;
  904. err = ioat_dma_self_test(device);
  905. if (err)
  906. goto err_self_test;
  907. return 0;
  908. err_self_test:
  909. ioat_disable_interrupts(device);
  910. err_setup_interrupts:
  911. pci_pool_destroy(device->completion_pool);
  912. err_completion_pool:
  913. pci_pool_destroy(device->dma_pool);
  914. err_dma_pool:
  915. return err;
  916. }
  917. int __devinit ioat_register(struct ioatdma_device *device)
  918. {
  919. int err = dma_async_device_register(&device->common);
  920. if (err) {
  921. ioat_disable_interrupts(device);
  922. pci_pool_destroy(device->completion_pool);
  923. pci_pool_destroy(device->dma_pool);
  924. }
  925. return err;
  926. }
  927. /* ioat1_intr_quirk - fix up dma ctrl register to enable / disable msi */
  928. static void ioat1_intr_quirk(struct ioatdma_device *device)
  929. {
  930. struct pci_dev *pdev = device->pdev;
  931. u32 dmactrl;
  932. pci_read_config_dword(pdev, IOAT_PCI_DMACTRL_OFFSET, &dmactrl);
  933. if (pdev->msi_enabled)
  934. dmactrl |= IOAT_PCI_DMACTRL_MSI_EN;
  935. else
  936. dmactrl &= ~IOAT_PCI_DMACTRL_MSI_EN;
  937. pci_write_config_dword(pdev, IOAT_PCI_DMACTRL_OFFSET, dmactrl);
  938. }
  939. int __devinit ioat1_dma_probe(struct ioatdma_device *device, int dca)
  940. {
  941. struct pci_dev *pdev = device->pdev;
  942. struct dma_device *dma;
  943. int err;
  944. device->intr_quirk = ioat1_intr_quirk;
  945. device->enumerate_channels = ioat1_enumerate_channels;
  946. dma = &device->common;
  947. dma->device_prep_dma_memcpy = ioat1_dma_prep_memcpy;
  948. dma->device_issue_pending = ioat1_dma_memcpy_issue_pending;
  949. dma->device_alloc_chan_resources = ioat1_dma_alloc_chan_resources;
  950. dma->device_free_chan_resources = ioat1_dma_free_chan_resources;
  951. dma->device_is_tx_complete = ioat1_dma_is_complete;
  952. err = ioat_probe(device);
  953. if (err)
  954. return err;
  955. ioat_set_tcp_copy_break(4096);
  956. err = ioat_register(device);
  957. if (err)
  958. return err;
  959. if (dca)
  960. device->dca = ioat_dca_init(pdev, device->reg_base);
  961. return err;
  962. }
  963. void __devexit ioat_dma_remove(struct ioatdma_device *device)
  964. {
  965. struct dma_device *dma = &device->common;
  966. ioat_disable_interrupts(device);
  967. dma_async_device_unregister(dma);
  968. pci_pool_destroy(device->dma_pool);
  969. pci_pool_destroy(device->completion_pool);
  970. INIT_LIST_HEAD(&dma->channels);
  971. }