Explorar o código

Merge commit 'dmaengine-3.13-v2' of git://git.kernel.org/pub/scm/linux/kernel/git/djbw/dmaengine

Pull dmaengine changes from Dan

1/ Bartlomiej and Dan finalized a rework of the dma address unmap
   implementation.

2/ In the course of testing 1/ a collection of enhancements to dmatest
   fell out.  Notably basic performance statistics, and fixed / enhanced
   test control through new module parameters 'run', 'wait', 'noverify',
   and 'verbose'.  Thanks to Andriy and Linus for their review.

3/ Testing the raid related corner cases of 1/ triggered bugs in the
   recently added 16-source operation support in the ioatdma driver.

4/ Some minor fixes / cleanups to mv_xor and ioatdma.

Conflicts:
	drivers/dma/dmatest.c

Signed-off-by: Vinod Koul <vinod.koul@intel.com>
Vinod Koul %!s(int64=11) %!d(string=hai) anos
pai
achega
df12a3178d
Modificáronse 39 ficheiros con 974 adicións e 1750 borrados
  1. 41 31
      Documentation/dmatest.txt
  2. 0 30
      arch/arm/include/asm/hardware/iop3xx-adma.h
  3. 0 4
      arch/arm/include/asm/hardware/iop_adma.h
  4. 0 26
      arch/arm/mach-iop13xx/include/mach/adma.h
  5. 21 16
      crypto/async_tx/async_memcpy.c
  6. 99 75
      crypto/async_tx/async_pq.c
  7. 43 18
      crypto/async_tx/async_raid6_recov.c
  8. 66 57
      crypto/async_tx/async_xor.c
  9. 9 1
      crypto/async_tx/raid6test.c
  10. 1 2
      drivers/ata/pata_arasan_cf.c
  11. 1 31
      drivers/dma/amba-pl08x.c
  12. 1 25
      drivers/dma/at_hdmac.c
  13. 178 84
      drivers/dma/dmaengine.c
  14. 307 403
      drivers/dma/dmatest.c
  15. 1 24
      drivers/dma/dw/core.c
  16. 1 29
      drivers/dma/ep93xx_dma.c
  17. 1 16
      drivers/dma/fsldma.c
  18. 6 43
      drivers/dma/ioat/dma.c
  19. 0 14
      drivers/dma/ioat/dma.h
  20. 1 1
      drivers/dma/ioat/dma_v2.c
  21. 0 1
      drivers/dma/ioat/dma_v2.h
  22. 43 272
      drivers/dma/ioat/dma_v3.c
  23. 15 5
      drivers/dma/ioat/pci.c
  24. 3 94
      drivers/dma/iop-adma.c
  25. 4 48
      drivers/dma/mv_xor.c
  26. 13 12
      drivers/dma/mv_xor.h
  27. 2 0
      drivers/dma/pl330.c
  28. 1 269
      drivers/dma/ppc4xx/adma.c
  29. 1 36
      drivers/dma/timb_dma.c
  30. 1 24
      drivers/dma/txx9dmac.c
  31. 1 2
      drivers/media/platform/m2m-deinterlace.c
  32. 1 1
      drivers/media/platform/timblogiw.c
  33. 1 2
      drivers/misc/carma/carma-fpga.c
  34. 1 2
      drivers/mtd/nand/atmel_nand.c
  35. 0 2
      drivers/mtd/nand/fsmc_nand.c
  36. 2 4
      drivers/net/ethernet/micrel/ks8842.c
  37. 56 30
      drivers/ntb/ntb_transport.c
  38. 2 2
      drivers/spi/spi-dw-mid.c
  39. 50 14
      include/linux/dmaengine.h

+ 41 - 31
Documentation/dmatest.txt

@@ -15,39 +15,48 @@ be built as module or inside kernel. Let's consider those cases.
 
 	Part 2 - When dmatest is built as a module...
 
-After mounting debugfs and loading the module, the /sys/kernel/debug/dmatest
-folder with nodes will be created. There are two important files located. First
-is the 'run' node that controls run and stop phases of the test, and the second
-one, 'results', is used to get the test case results.
-
-Note that in this case test will not run on load automatically.
-
 Example of usage:
+	% modprobe dmatest channel=dma0chan0 timeout=2000 iterations=1 run=1
+
+...or:
+	% modprobe dmatest
 	% echo dma0chan0 > /sys/module/dmatest/parameters/channel
 	% echo 2000 > /sys/module/dmatest/parameters/timeout
 	% echo 1 > /sys/module/dmatest/parameters/iterations
-	% echo 1 > /sys/kernel/debug/dmatest/run
+	% echo 1 > /sys/module/dmatest/parameters/run
+
+...or on the kernel command line:
+
+	dmatest.channel=dma0chan0 dmatest.timeout=2000 dmatest.iterations=1 dmatest.run=1
 
 Hint: available channel list could be extracted by running the following
 command:
 	% ls -1 /sys/class/dma/
 
-After a while you will start to get messages about current status or error like
-in the original code.
+Once started a message like "dmatest: Started 1 threads using dma0chan0" is
+emitted.  After that only test failure messages are reported until the test
+stops.
 
 Note that running a new test will not stop any in progress test.
 
-The following command should return actual state of the test.
-	% cat /sys/kernel/debug/dmatest/run
-
-To wait for test done the user may perform a busy loop that checks the state.
-
-	% while [ $(cat /sys/kernel/debug/dmatest/run) = "Y" ]
-	> do
-	> 	echo -n "."
-	> 	sleep 1
-	> done
-	> echo
+The following command returns the state of the test.
+	% cat /sys/module/dmatest/parameters/run
+
+To wait for test completion userpace can poll 'run' until it is false, or use
+the wait parameter.  Specifying 'wait=1' when loading the module causes module
+initialization to pause until a test run has completed, while reading
+/sys/module/dmatest/parameters/wait waits for any running test to complete
+before returning.  For example, the following scripts wait for 42 tests
+to complete before exiting.  Note that if 'iterations' is set to 'infinite' then
+waiting is disabled.
+
+Example:
+	% modprobe dmatest run=1 iterations=42 wait=1
+	% modprobe -r dmatest
+...or:
+	% modprobe dmatest run=1 iterations=42
+	% cat /sys/module/dmatest/parameters/wait
+	% modprobe -r dmatest
 
 	Part 3 - When built-in in the kernel...
 
@@ -62,21 +71,22 @@ case. You always could check them at run-time by running
 
 	Part 4 - Gathering the test results
 
-The module provides a storage for the test results in the memory. The gathered
-data could be used after test is done.
+Test results are printed to the kernel log buffer with the format:
 
-The special file 'results' in the debugfs represents gathered data of the in
-progress test. The messages collected are printed to the kernel log as well.
+"dmatest: result <channel>: <test id>: '<error msg>' with src_off=<val> dst_off=<val> len=<val> (<err code>)"
 
 Example of output:
-	% cat /sys/kernel/debug/dmatest/results
-	dma0chan0-copy0: #1: No errors with src_off=0x7bf dst_off=0x8ad len=0x3fea (0)
+	% dmesg | tail -n 1
+	dmatest: result dma0chan0-copy0: #1: No errors with src_off=0x7bf dst_off=0x8ad len=0x3fea (0)
 
 The message format is unified across the different types of errors. A number in
 the parens represents additional information, e.g. error code, error counter,
-or status.
+or status.  A test thread also emits a summary line at completion listing the
+number of tests executed, number that failed, and a result code.
 
-Comparison between buffers is stored to the dedicated structure.
+Example:
+	% dmesg | tail -n 1
+	dmatest: dma0chan0-copy0: summary 1 test, 0 failures 1000 iops 100000 KB/s (0)
 
-Note that the verify result is now accessible only via file 'results' in the
-debugfs.
+The details of a data miscompare error are also emitted, but do not follow the
+above format.

+ 0 - 30
arch/arm/include/asm/hardware/iop3xx-adma.h

@@ -393,36 +393,6 @@ static inline int iop_chan_zero_sum_slot_count(size_t len, int src_cnt,
 	return slot_cnt;
 }
 
-static inline int iop_desc_is_pq(struct iop_adma_desc_slot *desc)
-{
-	return 0;
-}
-
-static inline u32 iop_desc_get_dest_addr(struct iop_adma_desc_slot *desc,
-					struct iop_adma_chan *chan)
-{
-	union iop3xx_desc hw_desc = { .ptr = desc->hw_desc, };
-
-	switch (chan->device->id) {
-	case DMA0_ID:
-	case DMA1_ID:
-		return hw_desc.dma->dest_addr;
-	case AAU_ID:
-		return hw_desc.aau->dest_addr;
-	default:
-		BUG();
-	}
-	return 0;
-}
-
-
-static inline u32 iop_desc_get_qdest_addr(struct iop_adma_desc_slot *desc,
-					  struct iop_adma_chan *chan)
-{
-	BUG();
-	return 0;
-}
-
 static inline u32 iop_desc_get_byte_count(struct iop_adma_desc_slot *desc,
 					struct iop_adma_chan *chan)
 {

+ 0 - 4
arch/arm/include/asm/hardware/iop_adma.h

@@ -82,8 +82,6 @@ struct iop_adma_chan {
  * @slot_cnt: total slots used in an transaction (group of operations)
  * @slots_per_op: number of slots per operation
  * @idx: pool index
- * @unmap_src_cnt: number of xor sources
- * @unmap_len: transaction bytecount
  * @tx_list: list of descriptors that are associated with one operation
  * @async_tx: support for the async_tx api
  * @group_list: list of slots that make up a multi-descriptor transaction
@@ -99,8 +97,6 @@ struct iop_adma_desc_slot {
 	u16 slot_cnt;
 	u16 slots_per_op;
 	u16 idx;
-	u16 unmap_src_cnt;
-	size_t unmap_len;
 	struct list_head tx_list;
 	struct dma_async_tx_descriptor async_tx;
 	union {

+ 0 - 26
arch/arm/mach-iop13xx/include/mach/adma.h

@@ -218,20 +218,6 @@ iop_chan_xor_slot_count(size_t len, int src_cnt, int *slots_per_op)
 #define iop_chan_pq_slot_count iop_chan_xor_slot_count
 #define iop_chan_pq_zero_sum_slot_count iop_chan_xor_slot_count
 
-static inline u32 iop_desc_get_dest_addr(struct iop_adma_desc_slot *desc,
-					struct iop_adma_chan *chan)
-{
-	struct iop13xx_adma_desc_hw *hw_desc = desc->hw_desc;
-	return hw_desc->dest_addr;
-}
-
-static inline u32 iop_desc_get_qdest_addr(struct iop_adma_desc_slot *desc,
-					  struct iop_adma_chan *chan)
-{
-	struct iop13xx_adma_desc_hw *hw_desc = desc->hw_desc;
-	return hw_desc->q_dest_addr;
-}
-
 static inline u32 iop_desc_get_byte_count(struct iop_adma_desc_slot *desc,
 					struct iop_adma_chan *chan)
 {
@@ -350,18 +336,6 @@ iop_desc_init_pq(struct iop_adma_desc_slot *desc, int src_cnt,
 	hw_desc->desc_ctrl = u_desc_ctrl.value;
 }
 
-static inline int iop_desc_is_pq(struct iop_adma_desc_slot *desc)
-{
-	struct iop13xx_adma_desc_hw *hw_desc = desc->hw_desc;
-	union {
-		u32 value;
-		struct iop13xx_adma_desc_ctrl field;
-	} u_desc_ctrl;
-
-	u_desc_ctrl.value = hw_desc->desc_ctrl;
-	return u_desc_ctrl.field.pq_xfer_en;
-}
-
 static inline void
 iop_desc_init_pq_zero_sum(struct iop_adma_desc_slot *desc, int src_cnt,
 			  unsigned long flags)

+ 21 - 16
crypto/async_tx/async_memcpy.c

@@ -50,33 +50,36 @@ async_memcpy(struct page *dest, struct page *src, unsigned int dest_offset,
 						      &dest, 1, &src, 1, len);
 	struct dma_device *device = chan ? chan->device : NULL;
 	struct dma_async_tx_descriptor *tx = NULL;
+	struct dmaengine_unmap_data *unmap = NULL;
 
-	if (device && is_dma_copy_aligned(device, src_offset, dest_offset, len)) {
-		dma_addr_t dma_dest, dma_src;
+	if (device)
+		unmap = dmaengine_get_unmap_data(device->dev, 2, GFP_NOIO);
+
+	if (unmap && is_dma_copy_aligned(device, src_offset, dest_offset, len)) {
 		unsigned long dma_prep_flags = 0;
 
 		if (submit->cb_fn)
 			dma_prep_flags |= DMA_PREP_INTERRUPT;
 		if (submit->flags & ASYNC_TX_FENCE)
 			dma_prep_flags |= DMA_PREP_FENCE;
-		dma_dest = dma_map_page(device->dev, dest, dest_offset, len,
-					DMA_FROM_DEVICE);
-
-		dma_src = dma_map_page(device->dev, src, src_offset, len,
-				       DMA_TO_DEVICE);
-
-		tx = device->device_prep_dma_memcpy(chan, dma_dest, dma_src,
-						    len, dma_prep_flags);
-		if (!tx) {
-			dma_unmap_page(device->dev, dma_dest, len,
-				       DMA_FROM_DEVICE);
-			dma_unmap_page(device->dev, dma_src, len,
-				       DMA_TO_DEVICE);
-		}
+
+		unmap->to_cnt = 1;
+		unmap->addr[0] = dma_map_page(device->dev, src, src_offset, len,
+					      DMA_TO_DEVICE);
+		unmap->from_cnt = 1;
+		unmap->addr[1] = dma_map_page(device->dev, dest, dest_offset, len,
+					      DMA_FROM_DEVICE);
+		unmap->len = len;
+
+		tx = device->device_prep_dma_memcpy(chan, unmap->addr[1],
+						    unmap->addr[0], len,
+						    dma_prep_flags);
 	}
 
 	if (tx) {
 		pr_debug("%s: (async) len: %zu\n", __func__, len);
+
+		dma_set_unmap(tx, unmap);
 		async_tx_submit(chan, tx, submit);
 	} else {
 		void *dest_buf, *src_buf;
@@ -96,6 +99,8 @@ async_memcpy(struct page *dest, struct page *src, unsigned int dest_offset,
 		async_tx_sync_epilog(submit);
 	}
 
+	dmaengine_unmap_put(unmap);
+
 	return tx;
 }
 EXPORT_SYMBOL_GPL(async_memcpy);

+ 99 - 75
crypto/async_tx/async_pq.c

@@ -46,49 +46,24 @@ static struct page *pq_scribble_page;
  * do_async_gen_syndrome - asynchronously calculate P and/or Q
  */
 static __async_inline struct dma_async_tx_descriptor *
-do_async_gen_syndrome(struct dma_chan *chan, struct page **blocks,
-		      const unsigned char *scfs, unsigned int offset, int disks,
-		      size_t len, dma_addr_t *dma_src,
+do_async_gen_syndrome(struct dma_chan *chan,
+		      const unsigned char *scfs, int disks,
+		      struct dmaengine_unmap_data *unmap,
+		      enum dma_ctrl_flags dma_flags,
 		      struct async_submit_ctl *submit)
 {
 	struct dma_async_tx_descriptor *tx = NULL;
 	struct dma_device *dma = chan->device;
-	enum dma_ctrl_flags dma_flags = 0;
 	enum async_tx_flags flags_orig = submit->flags;
 	dma_async_tx_callback cb_fn_orig = submit->cb_fn;
 	dma_async_tx_callback cb_param_orig = submit->cb_param;
 	int src_cnt = disks - 2;
-	unsigned char coefs[src_cnt];
 	unsigned short pq_src_cnt;
 	dma_addr_t dma_dest[2];
 	int src_off = 0;
-	int idx;
-	int i;
 
-	/* DMAs use destinations as sources, so use BIDIRECTIONAL mapping */
-	if (P(blocks, disks))
-		dma_dest[0] = dma_map_page(dma->dev, P(blocks, disks), offset,
-					   len, DMA_BIDIRECTIONAL);
-	else
-		dma_flags |= DMA_PREP_PQ_DISABLE_P;
-	if (Q(blocks, disks))
-		dma_dest[1] = dma_map_page(dma->dev, Q(blocks, disks), offset,
-					   len, DMA_BIDIRECTIONAL);
-	else
-		dma_flags |= DMA_PREP_PQ_DISABLE_Q;
-
-	/* convert source addresses being careful to collapse 'empty'
-	 * sources and update the coefficients accordingly
-	 */
-	for (i = 0, idx = 0; i < src_cnt; i++) {
-		if (blocks[i] == NULL)
-			continue;
-		dma_src[idx] = dma_map_page(dma->dev, blocks[i], offset, len,
-					    DMA_TO_DEVICE);
-		coefs[idx] = scfs[i];
-		idx++;
-	}
-	src_cnt = idx;
+	if (submit->flags & ASYNC_TX_FENCE)
+		dma_flags |= DMA_PREP_FENCE;
 
 	while (src_cnt > 0) {
 		submit->flags = flags_orig;
@@ -100,28 +75,25 @@ do_async_gen_syndrome(struct dma_chan *chan, struct page **blocks,
 		if (src_cnt > pq_src_cnt) {
 			submit->flags &= ~ASYNC_TX_ACK;
 			submit->flags |= ASYNC_TX_FENCE;
-			dma_flags |= DMA_COMPL_SKIP_DEST_UNMAP;
 			submit->cb_fn = NULL;
 			submit->cb_param = NULL;
 		} else {
-			dma_flags &= ~DMA_COMPL_SKIP_DEST_UNMAP;
 			submit->cb_fn = cb_fn_orig;
 			submit->cb_param = cb_param_orig;
 			if (cb_fn_orig)
 				dma_flags |= DMA_PREP_INTERRUPT;
 		}
-		if (submit->flags & ASYNC_TX_FENCE)
-			dma_flags |= DMA_PREP_FENCE;
 
-		/* Since we have clobbered the src_list we are committed
-		 * to doing this asynchronously.  Drivers force forward
-		 * progress in case they can not provide a descriptor
+		/* Drivers force forward progress in case they can not provide
+		 * a descriptor
 		 */
 		for (;;) {
+			dma_dest[0] = unmap->addr[disks - 2];
+			dma_dest[1] = unmap->addr[disks - 1];
 			tx = dma->device_prep_dma_pq(chan, dma_dest,
-						     &dma_src[src_off],
+						     &unmap->addr[src_off],
 						     pq_src_cnt,
-						     &coefs[src_off], len,
+						     &scfs[src_off], unmap->len,
 						     dma_flags);
 			if (likely(tx))
 				break;
@@ -129,6 +101,7 @@ do_async_gen_syndrome(struct dma_chan *chan, struct page **blocks,
 			dma_async_issue_pending(chan);
 		}
 
+		dma_set_unmap(tx, unmap);
 		async_tx_submit(chan, tx, submit);
 		submit->depend_tx = tx;
 
@@ -188,10 +161,6 @@ do_sync_gen_syndrome(struct page **blocks, unsigned int offset, int disks,
  * set to NULL those buffers will be replaced with the raid6_zero_page
  * in the synchronous path and omitted in the hardware-asynchronous
  * path.
- *
- * 'blocks' note: if submit->scribble is NULL then the contents of
- * 'blocks' may be overwritten to perform address conversions
- * (dma_map_page() or page_address()).
  */
 struct dma_async_tx_descriptor *
 async_gen_syndrome(struct page **blocks, unsigned int offset, int disks,
@@ -202,26 +171,69 @@ async_gen_syndrome(struct page **blocks, unsigned int offset, int disks,
 						      &P(blocks, disks), 2,
 						      blocks, src_cnt, len);
 	struct dma_device *device = chan ? chan->device : NULL;
-	dma_addr_t *dma_src = NULL;
+	struct dmaengine_unmap_data *unmap = NULL;
 
 	BUG_ON(disks > 255 || !(P(blocks, disks) || Q(blocks, disks)));
 
-	if (submit->scribble)
-		dma_src = submit->scribble;
-	else if (sizeof(dma_addr_t) <= sizeof(struct page *))
-		dma_src = (dma_addr_t *) blocks;
+	if (device)
+		unmap = dmaengine_get_unmap_data(device->dev, disks, GFP_NOIO);
 
-	if (dma_src && device &&
+	if (unmap &&
 	    (src_cnt <= dma_maxpq(device, 0) ||
 	     dma_maxpq(device, DMA_PREP_CONTINUE) > 0) &&
 	    is_dma_pq_aligned(device, offset, 0, len)) {
+		struct dma_async_tx_descriptor *tx;
+		enum dma_ctrl_flags dma_flags = 0;
+		unsigned char coefs[src_cnt];
+		int i, j;
+
 		/* run the p+q asynchronously */
 		pr_debug("%s: (async) disks: %d len: %zu\n",
 			 __func__, disks, len);
-		return do_async_gen_syndrome(chan, blocks, raid6_gfexp, offset,
-					     disks, len, dma_src, submit);
+
+		/* convert source addresses being careful to collapse 'empty'
+		 * sources and update the coefficients accordingly
+		 */
+		unmap->len = len;
+		for (i = 0, j = 0; i < src_cnt; i++) {
+			if (blocks[i] == NULL)
+				continue;
+			unmap->addr[j] = dma_map_page(device->dev, blocks[i], offset,
+						      len, DMA_TO_DEVICE);
+			coefs[j] = raid6_gfexp[i];
+			unmap->to_cnt++;
+			j++;
+		}
+
+		/*
+		 * DMAs use destinations as sources,
+		 * so use BIDIRECTIONAL mapping
+		 */
+		unmap->bidi_cnt++;
+		if (P(blocks, disks))
+			unmap->addr[j++] = dma_map_page(device->dev, P(blocks, disks),
+							offset, len, DMA_BIDIRECTIONAL);
+		else {
+			unmap->addr[j++] = 0;
+			dma_flags |= DMA_PREP_PQ_DISABLE_P;
+		}
+
+		unmap->bidi_cnt++;
+		if (Q(blocks, disks))
+			unmap->addr[j++] = dma_map_page(device->dev, Q(blocks, disks),
+						       offset, len, DMA_BIDIRECTIONAL);
+		else {
+			unmap->addr[j++] = 0;
+			dma_flags |= DMA_PREP_PQ_DISABLE_Q;
+		}
+
+		tx = do_async_gen_syndrome(chan, coefs, j, unmap, dma_flags, submit);
+		dmaengine_unmap_put(unmap);
+		return tx;
 	}
 
+	dmaengine_unmap_put(unmap);
+
 	/* run the pq synchronously */
 	pr_debug("%s: (sync) disks: %d len: %zu\n", __func__, disks, len);
 
@@ -277,50 +289,60 @@ async_syndrome_val(struct page **blocks, unsigned int offset, int disks,
 	struct dma_async_tx_descriptor *tx;
 	unsigned char coefs[disks-2];
 	enum dma_ctrl_flags dma_flags = submit->cb_fn ? DMA_PREP_INTERRUPT : 0;
-	dma_addr_t *dma_src = NULL;
-	int src_cnt = 0;
+	struct dmaengine_unmap_data *unmap = NULL;
 
 	BUG_ON(disks < 4);
 
-	if (submit->scribble)
-		dma_src = submit->scribble;
-	else if (sizeof(dma_addr_t) <= sizeof(struct page *))
-		dma_src = (dma_addr_t *) blocks;
+	if (device)
+		unmap = dmaengine_get_unmap_data(device->dev, disks, GFP_NOIO);
 
-	if (dma_src && device && disks <= dma_maxpq(device, 0) &&
+	if (unmap && disks <= dma_maxpq(device, 0) &&
 	    is_dma_pq_aligned(device, offset, 0, len)) {
 		struct device *dev = device->dev;
-		dma_addr_t *pq = &dma_src[disks-2];
-		int i;
+		dma_addr_t pq[2];
+		int i, j = 0, src_cnt = 0;
 
 		pr_debug("%s: (async) disks: %d len: %zu\n",
 			 __func__, disks, len);
-		if (!P(blocks, disks))
+
+		unmap->len = len;
+		for (i = 0; i < disks-2; i++)
+			if (likely(blocks[i])) {
+				unmap->addr[j] = dma_map_page(dev, blocks[i],
+							      offset, len,
+							      DMA_TO_DEVICE);
+				coefs[j] = raid6_gfexp[i];
+				unmap->to_cnt++;
+				src_cnt++;
+				j++;
+			}
+
+		if (!P(blocks, disks)) {
+			pq[0] = 0;
 			dma_flags |= DMA_PREP_PQ_DISABLE_P;
-		else
+		} else {
 			pq[0] = dma_map_page(dev, P(blocks, disks),
 					     offset, len,
 					     DMA_TO_DEVICE);
-		if (!Q(blocks, disks))
+			unmap->addr[j++] = pq[0];
+			unmap->to_cnt++;
+		}
+		if (!Q(blocks, disks)) {
+			pq[1] = 0;
 			dma_flags |= DMA_PREP_PQ_DISABLE_Q;
-		else
+		} else {
 			pq[1] = dma_map_page(dev, Q(blocks, disks),
 					     offset, len,
 					     DMA_TO_DEVICE);
+			unmap->addr[j++] = pq[1];
+			unmap->to_cnt++;
+		}
 
 		if (submit->flags & ASYNC_TX_FENCE)
 			dma_flags |= DMA_PREP_FENCE;
-		for (i = 0; i < disks-2; i++)
-			if (likely(blocks[i])) {
-				dma_src[src_cnt] = dma_map_page(dev, blocks[i],
-								offset, len,
-								DMA_TO_DEVICE);
-				coefs[src_cnt] = raid6_gfexp[i];
-				src_cnt++;
-			}
-
 		for (;;) {
-			tx = device->device_prep_dma_pq_val(chan, pq, dma_src,
+			tx = device->device_prep_dma_pq_val(chan, pq,
+							    unmap->addr,
 							    src_cnt,
 							    coefs,
 							    len, pqres,
@@ -330,6 +352,8 @@ async_syndrome_val(struct page **blocks, unsigned int offset, int disks,
 			async_tx_quiesce(&submit->depend_tx);
 			dma_async_issue_pending(chan);
 		}
+
+		dma_set_unmap(tx, unmap);
 		async_tx_submit(chan, tx, submit);
 
 		return tx;

+ 43 - 18
crypto/async_tx/async_raid6_recov.c

@@ -26,6 +26,7 @@
 #include <linux/dma-mapping.h>
 #include <linux/raid/pq.h>
 #include <linux/async_tx.h>
+#include <linux/dmaengine.h>
 
 static struct dma_async_tx_descriptor *
 async_sum_product(struct page *dest, struct page **srcs, unsigned char *coef,
@@ -34,35 +35,45 @@ async_sum_product(struct page *dest, struct page **srcs, unsigned char *coef,
 	struct dma_chan *chan = async_tx_find_channel(submit, DMA_PQ,
 						      &dest, 1, srcs, 2, len);
 	struct dma_device *dma = chan ? chan->device : NULL;
+	struct dmaengine_unmap_data *unmap = NULL;
 	const u8 *amul, *bmul;
 	u8 ax, bx;
 	u8 *a, *b, *c;
 
-	if (dma) {
-		dma_addr_t dma_dest[2];
-		dma_addr_t dma_src[2];
+	if (dma)
+		unmap = dmaengine_get_unmap_data(dma->dev, 3, GFP_NOIO);
+
+	if (unmap) {
 		struct device *dev = dma->dev;
+		dma_addr_t pq[2];
 		struct dma_async_tx_descriptor *tx;
 		enum dma_ctrl_flags dma_flags = DMA_PREP_PQ_DISABLE_P;
 
 		if (submit->flags & ASYNC_TX_FENCE)
 			dma_flags |= DMA_PREP_FENCE;
-		dma_dest[1] = dma_map_page(dev, dest, 0, len, DMA_BIDIRECTIONAL);
-		dma_src[0] = dma_map_page(dev, srcs[0], 0, len, DMA_TO_DEVICE);
-		dma_src[1] = dma_map_page(dev, srcs[1], 0, len, DMA_TO_DEVICE);
-		tx = dma->device_prep_dma_pq(chan, dma_dest, dma_src, 2, coef,
+		unmap->addr[0] = dma_map_page(dev, srcs[0], 0, len, DMA_TO_DEVICE);
+		unmap->addr[1] = dma_map_page(dev, srcs[1], 0, len, DMA_TO_DEVICE);
+		unmap->to_cnt = 2;
+
+		unmap->addr[2] = dma_map_page(dev, dest, 0, len, DMA_BIDIRECTIONAL);
+		unmap->bidi_cnt = 1;
+		/* engine only looks at Q, but expects it to follow P */
+		pq[1] = unmap->addr[2];
+
+		unmap->len = len;
+		tx = dma->device_prep_dma_pq(chan, pq, unmap->addr, 2, coef,
 					     len, dma_flags);
 		if (tx) {
+			dma_set_unmap(tx, unmap);
 			async_tx_submit(chan, tx, submit);
+			dmaengine_unmap_put(unmap);
 			return tx;
 		}
 
 		/* could not get a descriptor, unmap and fall through to
 		 * the synchronous path
 		 */
-		dma_unmap_page(dev, dma_dest[1], len, DMA_BIDIRECTIONAL);
-		dma_unmap_page(dev, dma_src[0], len, DMA_TO_DEVICE);
-		dma_unmap_page(dev, dma_src[1], len, DMA_TO_DEVICE);
+		dmaengine_unmap_put(unmap);
 	}
 
 	/* run the operation synchronously */
@@ -89,23 +100,38 @@ async_mult(struct page *dest, struct page *src, u8 coef, size_t len,
 	struct dma_chan *chan = async_tx_find_channel(submit, DMA_PQ,
 						      &dest, 1, &src, 1, len);
 	struct dma_device *dma = chan ? chan->device : NULL;
+	struct dmaengine_unmap_data *unmap = NULL;
 	const u8 *qmul; /* Q multiplier table */
 	u8 *d, *s;
 
-	if (dma) {
+	if (dma)
+		unmap = dmaengine_get_unmap_data(dma->dev, 3, GFP_NOIO);
+
+	if (unmap) {
 		dma_addr_t dma_dest[2];
-		dma_addr_t dma_src[1];
 		struct device *dev = dma->dev;
 		struct dma_async_tx_descriptor *tx;
 		enum dma_ctrl_flags dma_flags = DMA_PREP_PQ_DISABLE_P;
 
 		if (submit->flags & ASYNC_TX_FENCE)
 			dma_flags |= DMA_PREP_FENCE;
-		dma_dest[1] = dma_map_page(dev, dest, 0, len, DMA_BIDIRECTIONAL);
-		dma_src[0] = dma_map_page(dev, src, 0, len, DMA_TO_DEVICE);
-		tx = dma->device_prep_dma_pq(chan, dma_dest, dma_src, 1, &coef,
-					     len, dma_flags);
+		unmap->addr[0] = dma_map_page(dev, src, 0, len, DMA_TO_DEVICE);
+		unmap->to_cnt++;
+		unmap->addr[1] = dma_map_page(dev, dest, 0, len, DMA_BIDIRECTIONAL);
+		dma_dest[1] = unmap->addr[1];
+		unmap->bidi_cnt++;
+		unmap->len = len;
+
+		/* this looks funny, but the engine looks for Q at
+		 * dma_dest[1] and ignores dma_dest[0] as a dest
+		 * due to DMA_PREP_PQ_DISABLE_P
+		 */
+		tx = dma->device_prep_dma_pq(chan, dma_dest, unmap->addr,
+					     1, &coef, len, dma_flags);
+
 		if (tx) {
+			dma_set_unmap(tx, unmap);
+			dmaengine_unmap_put(unmap);
 			async_tx_submit(chan, tx, submit);
 			return tx;
 		}
@@ -113,8 +139,7 @@ async_mult(struct page *dest, struct page *src, u8 coef, size_t len,
 		/* could not get a descriptor, unmap and fall through to
 		 * the synchronous path
 		 */
-		dma_unmap_page(dev, dma_dest[1], len, DMA_BIDIRECTIONAL);
-		dma_unmap_page(dev, dma_src[0], len, DMA_TO_DEVICE);
+		dmaengine_unmap_put(unmap);
 	}
 
 	/* no channel available, or failed to allocate a descriptor, so

+ 66 - 57
crypto/async_tx/async_xor.c

@@ -33,48 +33,31 @@
 
 /* do_async_xor - dma map the pages and perform the xor with an engine */
 static __async_inline struct dma_async_tx_descriptor *
-do_async_xor(struct dma_chan *chan, struct page *dest, struct page **src_list,
-	     unsigned int offset, int src_cnt, size_t len, dma_addr_t *dma_src,
+do_async_xor(struct dma_chan *chan, struct dmaengine_unmap_data *unmap,
 	     struct async_submit_ctl *submit)
 {
 	struct dma_device *dma = chan->device;
 	struct dma_async_tx_descriptor *tx = NULL;
-	int src_off = 0;
-	int i;
 	dma_async_tx_callback cb_fn_orig = submit->cb_fn;
 	void *cb_param_orig = submit->cb_param;
 	enum async_tx_flags flags_orig = submit->flags;
-	enum dma_ctrl_flags dma_flags;
-	int xor_src_cnt = 0;
-	dma_addr_t dma_dest;
-
-	/* map the dest bidrectional in case it is re-used as a source */
-	dma_dest = dma_map_page(dma->dev, dest, offset, len, DMA_BIDIRECTIONAL);
-	for (i = 0; i < src_cnt; i++) {
-		/* only map the dest once */
-		if (!src_list[i])
-			continue;
-		if (unlikely(src_list[i] == dest)) {
-			dma_src[xor_src_cnt++] = dma_dest;
-			continue;
-		}
-		dma_src[xor_src_cnt++] = dma_map_page(dma->dev, src_list[i], offset,
-						      len, DMA_TO_DEVICE);
-	}
-	src_cnt = xor_src_cnt;
+	enum dma_ctrl_flags dma_flags = 0;
+	int src_cnt = unmap->to_cnt;
+	int xor_src_cnt;
+	dma_addr_t dma_dest = unmap->addr[unmap->to_cnt];
+	dma_addr_t *src_list = unmap->addr;
 
 	while (src_cnt) {
+		dma_addr_t tmp;
+
 		submit->flags = flags_orig;
-		dma_flags = 0;
 		xor_src_cnt = min(src_cnt, (int)dma->max_xor);
-		/* if we are submitting additional xors, leave the chain open,
-		 * clear the callback parameters, and leave the destination
-		 * buffer mapped
+		/* if we are submitting additional xors, leave the chain open
+		 * and clear the callback parameters
 		 */
 		if (src_cnt > xor_src_cnt) {
 			submit->flags &= ~ASYNC_TX_ACK;
 			submit->flags |= ASYNC_TX_FENCE;
-			dma_flags = DMA_COMPL_SKIP_DEST_UNMAP;
 			submit->cb_fn = NULL;
 			submit->cb_param = NULL;
 		} else {
@@ -85,12 +68,18 @@ do_async_xor(struct dma_chan *chan, struct page *dest, struct page **src_list,
 			dma_flags |= DMA_PREP_INTERRUPT;
 		if (submit->flags & ASYNC_TX_FENCE)
 			dma_flags |= DMA_PREP_FENCE;
-		/* Since we have clobbered the src_list we are committed
-		 * to doing this asynchronously.  Drivers force forward progress
-		 * in case they can not provide a descriptor
+
+		/* Drivers force forward progress in case they can not provide a
+		 * descriptor
 		 */
-		tx = dma->device_prep_dma_xor(chan, dma_dest, &dma_src[src_off],
-					      xor_src_cnt, len, dma_flags);
+		tmp = src_list[0];
+		if (src_list > unmap->addr)
+			src_list[0] = dma_dest;
+		tx = dma->device_prep_dma_xor(chan, dma_dest, src_list,
+					      xor_src_cnt, unmap->len,
+					      dma_flags);
+		src_list[0] = tmp;
+
 
 		if (unlikely(!tx))
 			async_tx_quiesce(&submit->depend_tx);
@@ -99,22 +88,21 @@ do_async_xor(struct dma_chan *chan, struct page *dest, struct page **src_list,
 		while (unlikely(!tx)) {
 			dma_async_issue_pending(chan);
 			tx = dma->device_prep_dma_xor(chan, dma_dest,
-						      &dma_src[src_off],
-						      xor_src_cnt, len,
+						      src_list,
+						      xor_src_cnt, unmap->len,
 						      dma_flags);
 		}
 
+		dma_set_unmap(tx, unmap);
 		async_tx_submit(chan, tx, submit);
 		submit->depend_tx = tx;
 
 		if (src_cnt > xor_src_cnt) {
 			/* drop completed sources */
 			src_cnt -= xor_src_cnt;
-			src_off += xor_src_cnt;
-
 			/* use the intermediate result a source */
-			dma_src[--src_off] = dma_dest;
 			src_cnt++;
+			src_list += xor_src_cnt - 1;
 		} else
 			break;
 	}
@@ -189,22 +177,40 @@ async_xor(struct page *dest, struct page **src_list, unsigned int offset,
 	struct dma_chan *chan = async_tx_find_channel(submit, DMA_XOR,
 						      &dest, 1, src_list,
 						      src_cnt, len);
-	dma_addr_t *dma_src = NULL;
+	struct dma_device *device = chan ? chan->device : NULL;
+	struct dmaengine_unmap_data *unmap = NULL;
 
 	BUG_ON(src_cnt <= 1);
 
-	if (submit->scribble)
-		dma_src = submit->scribble;
-	else if (sizeof(dma_addr_t) <= sizeof(struct page *))
-		dma_src = (dma_addr_t *) src_list;
+	if (device)
+		unmap = dmaengine_get_unmap_data(device->dev, src_cnt+1, GFP_NOIO);
+
+	if (unmap && is_dma_xor_aligned(device, offset, 0, len)) {
+		struct dma_async_tx_descriptor *tx;
+		int i, j;
 
-	if (dma_src && chan && is_dma_xor_aligned(chan->device, offset, 0, len)) {
 		/* run the xor asynchronously */
 		pr_debug("%s (async): len: %zu\n", __func__, len);
 
-		return do_async_xor(chan, dest, src_list, offset, src_cnt, len,
-				    dma_src, submit);
+		unmap->len = len;
+		for (i = 0, j = 0; i < src_cnt; i++) {
+			if (!src_list[i])
+				continue;
+			unmap->to_cnt++;
+			unmap->addr[j++] = dma_map_page(device->dev, src_list[i],
+							offset, len, DMA_TO_DEVICE);
+		}
+
+		/* map it bidirectional as it may be re-used as a source */
+		unmap->addr[j] = dma_map_page(device->dev, dest, offset, len,
+					      DMA_BIDIRECTIONAL);
+		unmap->bidi_cnt = 1;
+
+		tx = do_async_xor(chan, unmap, submit);
+		dmaengine_unmap_put(unmap);
+		return tx;
 	} else {
+		dmaengine_unmap_put(unmap);
 		/* run the xor synchronously */
 		pr_debug("%s (sync): len: %zu\n", __func__, len);
 		WARN_ONCE(chan, "%s: no space for dma address conversion\n",
@@ -268,16 +274,14 @@ async_xor_val(struct page *dest, struct page **src_list, unsigned int offset,
 	struct dma_chan *chan = xor_val_chan(submit, dest, src_list, src_cnt, len);
 	struct dma_device *device = chan ? chan->device : NULL;
 	struct dma_async_tx_descriptor *tx = NULL;
-	dma_addr_t *dma_src = NULL;
+	struct dmaengine_unmap_data *unmap = NULL;
 
 	BUG_ON(src_cnt <= 1);
 
-	if (submit->scribble)
-		dma_src = submit->scribble;
-	else if (sizeof(dma_addr_t) <= sizeof(struct page *))
-		dma_src = (dma_addr_t *) src_list;
+	if (device)
+		unmap = dmaengine_get_unmap_data(device->dev, src_cnt, GFP_NOIO);
 
-	if (dma_src && device && src_cnt <= device->max_xor &&
+	if (unmap && src_cnt <= device->max_xor &&
 	    is_dma_xor_aligned(device, offset, 0, len)) {
 		unsigned long dma_prep_flags = 0;
 		int i;
@@ -288,11 +292,15 @@ async_xor_val(struct page *dest, struct page **src_list, unsigned int offset,
 			dma_prep_flags |= DMA_PREP_INTERRUPT;
 		if (submit->flags & ASYNC_TX_FENCE)
 			dma_prep_flags |= DMA_PREP_FENCE;
-		for (i = 0; i < src_cnt; i++)
-			dma_src[i] = dma_map_page(device->dev, src_list[i],
-						  offset, len, DMA_TO_DEVICE);
 
-		tx = device->device_prep_dma_xor_val(chan, dma_src, src_cnt,
+		for (i = 0; i < src_cnt; i++) {
+			unmap->addr[i] = dma_map_page(device->dev, src_list[i],
+						      offset, len, DMA_TO_DEVICE);
+			unmap->to_cnt++;
+		}
+		unmap->len = len;
+
+		tx = device->device_prep_dma_xor_val(chan, unmap->addr, src_cnt,
 						     len, result,
 						     dma_prep_flags);
 		if (unlikely(!tx)) {
@@ -301,11 +309,11 @@ async_xor_val(struct page *dest, struct page **src_list, unsigned int offset,
 			while (!tx) {
 				dma_async_issue_pending(chan);
 				tx = device->device_prep_dma_xor_val(chan,
-					dma_src, src_cnt, len, result,
+					unmap->addr, src_cnt, len, result,
 					dma_prep_flags);
 			}
 		}
-
+		dma_set_unmap(tx, unmap);
 		async_tx_submit(chan, tx, submit);
 	} else {
 		enum async_tx_flags flags_orig = submit->flags;
@@ -327,6 +335,7 @@ async_xor_val(struct page *dest, struct page **src_list, unsigned int offset,
 		async_tx_sync_epilog(submit);
 		submit->flags = flags_orig;
 	}
+	dmaengine_unmap_put(unmap);
 
 	return tx;
 }

+ 9 - 1
crypto/async_tx/raid6test.c

@@ -28,7 +28,7 @@
 #undef pr
 #define pr(fmt, args...) pr_info("raid6test: " fmt, ##args)
 
-#define NDISKS 16 /* Including P and Q */
+#define NDISKS 64 /* Including P and Q */
 
 static struct page *dataptrs[NDISKS];
 static addr_conv_t addr_conv[NDISKS];
@@ -219,6 +219,14 @@ static int raid6_test(void)
 		err += test(11, &tests);
 		err += test(12, &tests);
 	}
+
+	/* the 24 disk case is special for ioatdma as it is the boudary point
+	 * at which it needs to switch from 8-source ops to 16-source
+	 * ops for continuation (assumes DMA_HAS_PQ_CONTINUE is not set)
+	 */
+	if (NDISKS > 24)
+		err += test(24, &tests);
+
 	err += test(NDISKS, &tests);
 
 	pr("\n");

+ 1 - 2
drivers/ata/pata_arasan_cf.c

@@ -396,8 +396,7 @@ dma_xfer(struct arasan_cf_dev *acdev, dma_addr_t src, dma_addr_t dest, u32 len)
 	struct dma_async_tx_descriptor *tx;
 	struct dma_chan *chan = acdev->dma_chan;
 	dma_cookie_t cookie;
-	unsigned long flags = DMA_PREP_INTERRUPT | DMA_COMPL_SKIP_SRC_UNMAP |
-		DMA_COMPL_SKIP_DEST_UNMAP;
+	unsigned long flags = DMA_PREP_INTERRUPT;
 	int ret = 0;
 
 	tx = chan->device->device_prep_dma_memcpy(chan, dest, src, len, flags);

+ 1 - 31
drivers/dma/amba-pl08x.c

@@ -1164,42 +1164,12 @@ static void pl08x_free_txd(struct pl08x_driver_data *pl08x,
 	kfree(txd);
 }
 
-static void pl08x_unmap_buffers(struct pl08x_txd *txd)
-{
-	struct device *dev = txd->vd.tx.chan->device->dev;
-	struct pl08x_sg *dsg;
-
-	if (!(txd->vd.tx.flags & DMA_COMPL_SKIP_SRC_UNMAP)) {
-		if (txd->vd.tx.flags & DMA_COMPL_SRC_UNMAP_SINGLE)
-			list_for_each_entry(dsg, &txd->dsg_list, node)
-				dma_unmap_single(dev, dsg->src_addr, dsg->len,
-						DMA_TO_DEVICE);
-		else {
-			list_for_each_entry(dsg, &txd->dsg_list, node)
-				dma_unmap_page(dev, dsg->src_addr, dsg->len,
-						DMA_TO_DEVICE);
-		}
-	}
-	if (!(txd->vd.tx.flags & DMA_COMPL_SKIP_DEST_UNMAP)) {
-		if (txd->vd.tx.flags & DMA_COMPL_DEST_UNMAP_SINGLE)
-			list_for_each_entry(dsg, &txd->dsg_list, node)
-				dma_unmap_single(dev, dsg->dst_addr, dsg->len,
-						DMA_FROM_DEVICE);
-		else
-			list_for_each_entry(dsg, &txd->dsg_list, node)
-				dma_unmap_page(dev, dsg->dst_addr, dsg->len,
-						DMA_FROM_DEVICE);
-	}
-}
-
 static void pl08x_desc_free(struct virt_dma_desc *vd)
 {
 	struct pl08x_txd *txd = to_pl08x_txd(&vd->tx);
 	struct pl08x_dma_chan *plchan = to_pl08x_chan(vd->tx.chan);
 
-	if (!plchan->slave)
-		pl08x_unmap_buffers(txd);
-
+	dma_descriptor_unmap(txd);
 	if (!txd->done)
 		pl08x_release_mux(plchan);
 

+ 1 - 25
drivers/dma/at_hdmac.c

@@ -344,31 +344,7 @@ atc_chain_complete(struct at_dma_chan *atchan, struct at_desc *desc)
 	/* move myself to free_list */
 	list_move(&desc->desc_node, &atchan->free_list);
 
-	/* unmap dma addresses (not on slave channels) */
-	if (!atchan->chan_common.private) {
-		struct device *parent = chan2parent(&atchan->chan_common);
-		if (!(txd->flags & DMA_COMPL_SKIP_DEST_UNMAP)) {
-			if (txd->flags & DMA_COMPL_DEST_UNMAP_SINGLE)
-				dma_unmap_single(parent,
-						desc->lli.daddr,
-						desc->len, DMA_FROM_DEVICE);
-			else
-				dma_unmap_page(parent,
-						desc->lli.daddr,
-						desc->len, DMA_FROM_DEVICE);
-		}
-		if (!(txd->flags & DMA_COMPL_SKIP_SRC_UNMAP)) {
-			if (txd->flags & DMA_COMPL_SRC_UNMAP_SINGLE)
-				dma_unmap_single(parent,
-						desc->lli.saddr,
-						desc->len, DMA_TO_DEVICE);
-			else
-				dma_unmap_page(parent,
-						desc->lli.saddr,
-						desc->len, DMA_TO_DEVICE);
-		}
-	}
-
+	dma_descriptor_unmap(txd);
 	/* for cyclic transfers,
 	 * no need to replay callback function while stopping */
 	if (!atc_chan_is_cyclic(atchan)) {

+ 178 - 84
drivers/dma/dmaengine.c

@@ -65,6 +65,7 @@
 #include <linux/acpi.h>
 #include <linux/acpi_dma.h>
 #include <linux/of_dma.h>
+#include <linux/mempool.h>
 
 static DEFINE_MUTEX(dma_list_mutex);
 static DEFINE_IDR(dma_idr);
@@ -901,98 +902,132 @@ void dma_async_device_unregister(struct dma_device *device)
 }
 EXPORT_SYMBOL(dma_async_device_unregister);
 
-/**
- * dma_async_memcpy_buf_to_buf - offloaded copy between virtual addresses
- * @chan: DMA channel to offload copy to
- * @dest: destination address (virtual)
- * @src: source address (virtual)
- * @len: length
- *
- * Both @dest and @src must be mappable to a bus address according to the
- * DMA mapping API rules for streaming mappings.
- * Both @dest and @src must stay memory resident (kernel memory or locked
- * user space pages).
- */
-dma_cookie_t
-dma_async_memcpy_buf_to_buf(struct dma_chan *chan, void *dest,
-			void *src, size_t len)
-{
-	struct dma_device *dev = chan->device;
-	struct dma_async_tx_descriptor *tx;
-	dma_addr_t dma_dest, dma_src;
-	dma_cookie_t cookie;
-	unsigned long flags;
+struct dmaengine_unmap_pool {
+	struct kmem_cache *cache;
+	const char *name;
+	mempool_t *pool;
+	size_t size;
+};
 
-	dma_src = dma_map_single(dev->dev, src, len, DMA_TO_DEVICE);
-	dma_dest = dma_map_single(dev->dev, dest, len, DMA_FROM_DEVICE);
-	flags = DMA_CTRL_ACK |
-		DMA_COMPL_SRC_UNMAP_SINGLE |
-		DMA_COMPL_DEST_UNMAP_SINGLE;
-	tx = dev->device_prep_dma_memcpy(chan, dma_dest, dma_src, len, flags);
+#define __UNMAP_POOL(x) { .size = x, .name = "dmaengine-unmap-" __stringify(x) }
+static struct dmaengine_unmap_pool unmap_pool[] = {
+	__UNMAP_POOL(2),
+	#if IS_ENABLED(CONFIG_ASYNC_TX_DMA)
+	__UNMAP_POOL(16),
+	__UNMAP_POOL(128),
+	__UNMAP_POOL(256),
+	#endif
+};
 
-	if (!tx) {
-		dma_unmap_single(dev->dev, dma_src, len, DMA_TO_DEVICE);
-		dma_unmap_single(dev->dev, dma_dest, len, DMA_FROM_DEVICE);
-		return -ENOMEM;
+static struct dmaengine_unmap_pool *__get_unmap_pool(int nr)
+{
+	int order = get_count_order(nr);
+
+	switch (order) {
+	case 0 ... 1:
+		return &unmap_pool[0];
+	case 2 ... 4:
+		return &unmap_pool[1];
+	case 5 ... 7:
+		return &unmap_pool[2];
+	case 8:
+		return &unmap_pool[3];
+	default:
+		BUG();
+		return NULL;
 	}
+}
 
-	tx->callback = NULL;
-	cookie = tx->tx_submit(tx);
+static void dmaengine_unmap(struct kref *kref)
+{
+	struct dmaengine_unmap_data *unmap = container_of(kref, typeof(*unmap), kref);
+	struct device *dev = unmap->dev;
+	int cnt, i;
+
+	cnt = unmap->to_cnt;
+	for (i = 0; i < cnt; i++)
+		dma_unmap_page(dev, unmap->addr[i], unmap->len,
+			       DMA_TO_DEVICE);
+	cnt += unmap->from_cnt;
+	for (; i < cnt; i++)
+		dma_unmap_page(dev, unmap->addr[i], unmap->len,
+			       DMA_FROM_DEVICE);
+	cnt += unmap->bidi_cnt;
+	for (; i < cnt; i++) {
+		if (unmap->addr[i] == 0)
+			continue;
+		dma_unmap_page(dev, unmap->addr[i], unmap->len,
+			       DMA_BIDIRECTIONAL);
+	}
+	mempool_free(unmap, __get_unmap_pool(cnt)->pool);
+}
 
-	preempt_disable();
-	__this_cpu_add(chan->local->bytes_transferred, len);
-	__this_cpu_inc(chan->local->memcpy_count);
-	preempt_enable();
+void dmaengine_unmap_put(struct dmaengine_unmap_data *unmap)
+{
+	if (unmap)
+		kref_put(&unmap->kref, dmaengine_unmap);
+}
+EXPORT_SYMBOL_GPL(dmaengine_unmap_put);
 
-	return cookie;
+static void dmaengine_destroy_unmap_pool(void)
+{
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(unmap_pool); i++) {
+		struct dmaengine_unmap_pool *p = &unmap_pool[i];
+
+		if (p->pool)
+			mempool_destroy(p->pool);
+		p->pool = NULL;
+		if (p->cache)
+			kmem_cache_destroy(p->cache);
+		p->cache = NULL;
+	}
 }
-EXPORT_SYMBOL(dma_async_memcpy_buf_to_buf);
 
-/**
- * dma_async_memcpy_buf_to_pg - offloaded copy from address to page
- * @chan: DMA channel to offload copy to
- * @page: destination page
- * @offset: offset in page to copy to
- * @kdata: source address (virtual)
- * @len: length
- *
- * Both @page/@offset and @kdata must be mappable to a bus address according
- * to the DMA mapping API rules for streaming mappings.
- * Both @page/@offset and @kdata must stay memory resident (kernel memory or
- * locked user space pages)
- */
-dma_cookie_t
-dma_async_memcpy_buf_to_pg(struct dma_chan *chan, struct page *page,
-			unsigned int offset, void *kdata, size_t len)
+static int __init dmaengine_init_unmap_pool(void)
 {
-	struct dma_device *dev = chan->device;
-	struct dma_async_tx_descriptor *tx;
-	dma_addr_t dma_dest, dma_src;
-	dma_cookie_t cookie;
-	unsigned long flags;
+	int i;
 
-	dma_src = dma_map_single(dev->dev, kdata, len, DMA_TO_DEVICE);
-	dma_dest = dma_map_page(dev->dev, page, offset, len, DMA_FROM_DEVICE);
-	flags = DMA_CTRL_ACK | DMA_COMPL_SRC_UNMAP_SINGLE;
-	tx = dev->device_prep_dma_memcpy(chan, dma_dest, dma_src, len, flags);
+	for (i = 0; i < ARRAY_SIZE(unmap_pool); i++) {
+		struct dmaengine_unmap_pool *p = &unmap_pool[i];
+		size_t size;
 
-	if (!tx) {
-		dma_unmap_single(dev->dev, dma_src, len, DMA_TO_DEVICE);
-		dma_unmap_page(dev->dev, dma_dest, len, DMA_FROM_DEVICE);
-		return -ENOMEM;
+		size = sizeof(struct dmaengine_unmap_data) +
+		       sizeof(dma_addr_t) * p->size;
+
+		p->cache = kmem_cache_create(p->name, size, 0,
+					     SLAB_HWCACHE_ALIGN, NULL);
+		if (!p->cache)
+			break;
+		p->pool = mempool_create_slab_pool(1, p->cache);
+		if (!p->pool)
+			break;
 	}
 
-	tx->callback = NULL;
-	cookie = tx->tx_submit(tx);
+	if (i == ARRAY_SIZE(unmap_pool))
+		return 0;
 
-	preempt_disable();
-	__this_cpu_add(chan->local->bytes_transferred, len);
-	__this_cpu_inc(chan->local->memcpy_count);
-	preempt_enable();
+	dmaengine_destroy_unmap_pool();
+	return -ENOMEM;
+}
 
-	return cookie;
+struct dmaengine_unmap_data *
+dmaengine_get_unmap_data(struct device *dev, int nr, gfp_t flags)
+{
+	struct dmaengine_unmap_data *unmap;
+
+	unmap = mempool_alloc(__get_unmap_pool(nr)->pool, flags);
+	if (!unmap)
+		return NULL;
+
+	memset(unmap, 0, sizeof(*unmap));
+	kref_init(&unmap->kref);
+	unmap->dev = dev;
+
+	return unmap;
 }
-EXPORT_SYMBOL(dma_async_memcpy_buf_to_pg);
+EXPORT_SYMBOL(dmaengine_get_unmap_data);
 
 /**
  * dma_async_memcpy_pg_to_pg - offloaded copy from page to page
@@ -1015,24 +1050,33 @@ dma_async_memcpy_pg_to_pg(struct dma_chan *chan, struct page *dest_pg,
 {
 	struct dma_device *dev = chan->device;
 	struct dma_async_tx_descriptor *tx;
-	dma_addr_t dma_dest, dma_src;
+	struct dmaengine_unmap_data *unmap;
 	dma_cookie_t cookie;
 	unsigned long flags;
 
-	dma_src = dma_map_page(dev->dev, src_pg, src_off, len, DMA_TO_DEVICE);
-	dma_dest = dma_map_page(dev->dev, dest_pg, dest_off, len,
-				DMA_FROM_DEVICE);
+	unmap = dmaengine_get_unmap_data(dev->dev, 2, GFP_NOIO);
+	if (!unmap)
+		return -ENOMEM;
+
+	unmap->to_cnt = 1;
+	unmap->from_cnt = 1;
+	unmap->addr[0] = dma_map_page(dev->dev, src_pg, src_off, len,
+				      DMA_TO_DEVICE);
+	unmap->addr[1] = dma_map_page(dev->dev, dest_pg, dest_off, len,
+				      DMA_FROM_DEVICE);
+	unmap->len = len;
 	flags = DMA_CTRL_ACK;
-	tx = dev->device_prep_dma_memcpy(chan, dma_dest, dma_src, len, flags);
+	tx = dev->device_prep_dma_memcpy(chan, unmap->addr[1], unmap->addr[0],
+					 len, flags);
 
 	if (!tx) {
-		dma_unmap_page(dev->dev, dma_src, len, DMA_TO_DEVICE);
-		dma_unmap_page(dev->dev, dma_dest, len, DMA_FROM_DEVICE);
+		dmaengine_unmap_put(unmap);
 		return -ENOMEM;
 	}
 
-	tx->callback = NULL;
+	dma_set_unmap(tx, unmap);
 	cookie = tx->tx_submit(tx);
+	dmaengine_unmap_put(unmap);
 
 	preempt_disable();
 	__this_cpu_add(chan->local->bytes_transferred, len);
@@ -1043,6 +1087,52 @@ dma_async_memcpy_pg_to_pg(struct dma_chan *chan, struct page *dest_pg,
 }
 EXPORT_SYMBOL(dma_async_memcpy_pg_to_pg);
 
+/**
+ * dma_async_memcpy_buf_to_buf - offloaded copy between virtual addresses
+ * @chan: DMA channel to offload copy to
+ * @dest: destination address (virtual)
+ * @src: source address (virtual)
+ * @len: length
+ *
+ * Both @dest and @src must be mappable to a bus address according to the
+ * DMA mapping API rules for streaming mappings.
+ * Both @dest and @src must stay memory resident (kernel memory or locked
+ * user space pages).
+ */
+dma_cookie_t
+dma_async_memcpy_buf_to_buf(struct dma_chan *chan, void *dest,
+			    void *src, size_t len)
+{
+	return dma_async_memcpy_pg_to_pg(chan, virt_to_page(dest),
+					 (unsigned long) dest & ~PAGE_MASK,
+					 virt_to_page(src),
+					 (unsigned long) src & ~PAGE_MASK, len);
+}
+EXPORT_SYMBOL(dma_async_memcpy_buf_to_buf);
+
+/**
+ * dma_async_memcpy_buf_to_pg - offloaded copy from address to page
+ * @chan: DMA channel to offload copy to
+ * @page: destination page
+ * @offset: offset in page to copy to
+ * @kdata: source address (virtual)
+ * @len: length
+ *
+ * Both @page/@offset and @kdata must be mappable to a bus address according
+ * to the DMA mapping API rules for streaming mappings.
+ * Both @page/@offset and @kdata must stay memory resident (kernel memory or
+ * locked user space pages)
+ */
+dma_cookie_t
+dma_async_memcpy_buf_to_pg(struct dma_chan *chan, struct page *page,
+			   unsigned int offset, void *kdata, size_t len)
+{
+	return dma_async_memcpy_pg_to_pg(chan, page, offset,
+					 virt_to_page(kdata),
+					 (unsigned long) kdata & ~PAGE_MASK, len);
+}
+EXPORT_SYMBOL(dma_async_memcpy_buf_to_pg);
+
 void dma_async_tx_descriptor_init(struct dma_async_tx_descriptor *tx,
 	struct dma_chan *chan)
 {
@@ -1116,6 +1206,10 @@ EXPORT_SYMBOL_GPL(dma_run_dependencies);
 
 static int __init dma_bus_init(void)
 {
+	int err = dmaengine_init_unmap_pool();
+
+	if (err)
+		return err;
 	return class_register(&dma_devclass);
 }
 arch_initcall(dma_bus_init);

A diferenza do arquivo foi suprimida porque é demasiado grande
+ 307 - 403
drivers/dma/dmatest.c


+ 1 - 24
drivers/dma/dw/core.c

@@ -85,10 +85,6 @@ static struct device *chan2dev(struct dma_chan *chan)
 {
 	return &chan->dev->device;
 }
-static struct device *chan2parent(struct dma_chan *chan)
-{
-	return chan->dev->device.parent;
-}
 
 static struct dw_desc *dwc_first_active(struct dw_dma_chan *dwc)
 {
@@ -311,26 +307,7 @@ dwc_descriptor_complete(struct dw_dma_chan *dwc, struct dw_desc *desc,
 	list_splice_init(&desc->tx_list, &dwc->free_list);
 	list_move(&desc->desc_node, &dwc->free_list);
 
-	if (!is_slave_direction(dwc->direction)) {
-		struct device *parent = chan2parent(&dwc->chan);
-		if (!(txd->flags & DMA_COMPL_SKIP_DEST_UNMAP)) {
-			if (txd->flags & DMA_COMPL_DEST_UNMAP_SINGLE)
-				dma_unmap_single(parent, desc->lli.dar,
-					desc->total_len, DMA_FROM_DEVICE);
-			else
-				dma_unmap_page(parent, desc->lli.dar,
-					desc->total_len, DMA_FROM_DEVICE);
-		}
-		if (!(txd->flags & DMA_COMPL_SKIP_SRC_UNMAP)) {
-			if (txd->flags & DMA_COMPL_SRC_UNMAP_SINGLE)
-				dma_unmap_single(parent, desc->lli.sar,
-					desc->total_len, DMA_TO_DEVICE);
-			else
-				dma_unmap_page(parent, desc->lli.sar,
-					desc->total_len, DMA_TO_DEVICE);
-		}
-	}
-
+	dma_descriptor_unmap(txd);
 	spin_unlock_irqrestore(&dwc->lock, flags);
 
 	if (callback)

+ 1 - 29
drivers/dma/ep93xx_dma.c

@@ -733,28 +733,6 @@ static void ep93xx_dma_advance_work(struct ep93xx_dma_chan *edmac)
 	spin_unlock_irqrestore(&edmac->lock, flags);
 }
 
-static void ep93xx_dma_unmap_buffers(struct ep93xx_dma_desc *desc)
-{
-	struct device *dev = desc->txd.chan->device->dev;
-
-	if (!(desc->txd.flags & DMA_COMPL_SKIP_SRC_UNMAP)) {
-		if (desc->txd.flags & DMA_COMPL_SRC_UNMAP_SINGLE)
-			dma_unmap_single(dev, desc->src_addr, desc->size,
-					 DMA_TO_DEVICE);
-		else
-			dma_unmap_page(dev, desc->src_addr, desc->size,
-				       DMA_TO_DEVICE);
-	}
-	if (!(desc->txd.flags & DMA_COMPL_SKIP_DEST_UNMAP)) {
-		if (desc->txd.flags & DMA_COMPL_DEST_UNMAP_SINGLE)
-			dma_unmap_single(dev, desc->dst_addr, desc->size,
-					 DMA_FROM_DEVICE);
-		else
-			dma_unmap_page(dev, desc->dst_addr, desc->size,
-				       DMA_FROM_DEVICE);
-	}
-}
-
 static void ep93xx_dma_tasklet(unsigned long data)
 {
 	struct ep93xx_dma_chan *edmac = (struct ep93xx_dma_chan *)data;
@@ -787,13 +765,7 @@ static void ep93xx_dma_tasklet(unsigned long data)
 
 	/* Now we can release all the chained descriptors */
 	list_for_each_entry_safe(desc, d, &list, node) {
-		/*
-		 * For the memcpy channels the API requires us to unmap the
-		 * buffers unless requested otherwise.
-		 */
-		if (!edmac->chan.private)
-			ep93xx_dma_unmap_buffers(desc);
-
+		dma_descriptor_unmap(&desc->txd);
 		ep93xx_dma_desc_put(edmac, desc);
 	}
 

+ 1 - 16
drivers/dma/fsldma.c

@@ -868,22 +868,7 @@ static void fsldma_cleanup_descriptor(struct fsldma_chan *chan,
 	/* Run any dependencies */
 	dma_run_dependencies(txd);
 
-	/* Unmap the dst buffer, if requested */
-	if (!(txd->flags & DMA_COMPL_SKIP_DEST_UNMAP)) {
-		if (txd->flags & DMA_COMPL_DEST_UNMAP_SINGLE)
-			dma_unmap_single(dev, dst, len, DMA_FROM_DEVICE);
-		else
-			dma_unmap_page(dev, dst, len, DMA_FROM_DEVICE);
-	}
-
-	/* Unmap the src buffer, if requested */
-	if (!(txd->flags & DMA_COMPL_SKIP_SRC_UNMAP)) {
-		if (txd->flags & DMA_COMPL_SRC_UNMAP_SINGLE)
-			dma_unmap_single(dev, src, len, DMA_TO_DEVICE);
-		else
-			dma_unmap_page(dev, src, len, DMA_TO_DEVICE);
-	}
-
+	dma_descriptor_unmap(txd);
 #ifdef FSL_DMA_LD_DEBUG
 	chan_dbg(chan, "LD %p free\n", desc);
 #endif

+ 6 - 43
drivers/dma/ioat/dma.c

@@ -531,21 +531,6 @@ static void ioat1_cleanup_event(unsigned long data)
 	writew(IOAT_CHANCTRL_RUN, ioat->base.reg_base + IOAT_CHANCTRL_OFFSET);
 }
 
-void ioat_dma_unmap(struct ioat_chan_common *chan, enum dma_ctrl_flags flags,
-		    size_t len, struct ioat_dma_descriptor *hw)
-{
-	struct pci_dev *pdev = chan->device->pdev;
-	size_t offset = len - hw->size;
-
-	if (!(flags & DMA_COMPL_SKIP_DEST_UNMAP))
-		ioat_unmap(pdev, hw->dst_addr - offset, len,
-			   PCI_DMA_FROMDEVICE, flags, 1);
-
-	if (!(flags & DMA_COMPL_SKIP_SRC_UNMAP))
-		ioat_unmap(pdev, hw->src_addr - offset, len,
-			   PCI_DMA_TODEVICE, flags, 0);
-}
-
 dma_addr_t ioat_get_current_completion(struct ioat_chan_common *chan)
 {
 	dma_addr_t phys_complete;
@@ -602,7 +587,7 @@ static void __cleanup(struct ioat_dma_chan *ioat, dma_addr_t phys_complete)
 		dump_desc_dbg(ioat, desc);
 		if (tx->cookie) {
 			dma_cookie_complete(tx);
-			ioat_dma_unmap(chan, tx->flags, desc->len, desc->hw);
+			dma_descriptor_unmap(tx);
 			ioat->active -= desc->hw->tx_cnt;
 			if (tx->callback) {
 				tx->callback(tx->callback_param);
@@ -833,8 +818,7 @@ int ioat_dma_self_test(struct ioatdma_device *device)
 
 	dma_src = dma_map_single(dev, src, IOAT_TEST_SIZE, DMA_TO_DEVICE);
 	dma_dest = dma_map_single(dev, dest, IOAT_TEST_SIZE, DMA_FROM_DEVICE);
-	flags = DMA_COMPL_SKIP_SRC_UNMAP | DMA_COMPL_SKIP_DEST_UNMAP |
-		DMA_PREP_INTERRUPT;
+	flags = DMA_PREP_INTERRUPT;
 	tx = device->common.device_prep_dma_memcpy(dma_chan, dma_dest, dma_src,
 						   IOAT_TEST_SIZE, flags);
 	if (!tx) {
@@ -885,8 +869,7 @@ static char ioat_interrupt_style[32] = "msix";
 module_param_string(ioat_interrupt_style, ioat_interrupt_style,
 		    sizeof(ioat_interrupt_style), 0644);
 MODULE_PARM_DESC(ioat_interrupt_style,
-		 "set ioat interrupt style: msix (default), "
-		 "msix-single-vector, msi, intx)");
+		 "set ioat interrupt style: msix (default), msi, intx");
 
 /**
  * ioat_dma_setup_interrupts - setup interrupt handler
@@ -904,8 +887,6 @@ int ioat_dma_setup_interrupts(struct ioatdma_device *device)
 
 	if (!strcmp(ioat_interrupt_style, "msix"))
 		goto msix;
-	if (!strcmp(ioat_interrupt_style, "msix-single-vector"))
-		goto msix_single_vector;
 	if (!strcmp(ioat_interrupt_style, "msi"))
 		goto msi;
 	if (!strcmp(ioat_interrupt_style, "intx"))
@@ -920,10 +901,8 @@ msix:
 		device->msix_entries[i].entry = i;
 
 	err = pci_enable_msix(pdev, device->msix_entries, msixcnt);
-	if (err < 0)
+	if (err)
 		goto msi;
-	if (err > 0)
-		goto msix_single_vector;
 
 	for (i = 0; i < msixcnt; i++) {
 		msix = &device->msix_entries[i];
@@ -937,29 +916,13 @@ msix:
 				chan = ioat_chan_by_index(device, j);
 				devm_free_irq(dev, msix->vector, chan);
 			}
-			goto msix_single_vector;
+			goto msi;
 		}
 	}
 	intrctrl |= IOAT_INTRCTRL_MSIX_VECTOR_CONTROL;
 	device->irq_mode = IOAT_MSIX;
 	goto done;
 
-msix_single_vector:
-	msix = &device->msix_entries[0];
-	msix->entry = 0;
-	err = pci_enable_msix(pdev, device->msix_entries, 1);
-	if (err)
-		goto msi;
-
-	err = devm_request_irq(dev, msix->vector, ioat_dma_do_interrupt, 0,
-			       "ioat-msix", device);
-	if (err) {
-		pci_disable_msix(pdev);
-		goto msi;
-	}
-	device->irq_mode = IOAT_MSIX_SINGLE;
-	goto done;
-
 msi:
 	err = pci_enable_msi(pdev);
 	if (err)
@@ -971,7 +934,7 @@ msi:
 		pci_disable_msi(pdev);
 		goto intx;
 	}
-	device->irq_mode = IOAT_MSIX;
+	device->irq_mode = IOAT_MSI;
 	goto done;
 
 intx:

+ 0 - 14
drivers/dma/ioat/dma.h

@@ -52,7 +52,6 @@
 enum ioat_irq_mode {
 	IOAT_NOIRQ = 0,
 	IOAT_MSIX,
-	IOAT_MSIX_SINGLE,
 	IOAT_MSI,
 	IOAT_INTX
 };
@@ -83,7 +82,6 @@ struct ioatdma_device {
 	struct pci_pool *completion_pool;
 #define MAX_SED_POOLS	5
 	struct dma_pool *sed_hw_pool[MAX_SED_POOLS];
-	struct kmem_cache *sed_pool;
 	struct dma_device common;
 	u8 version;
 	struct msix_entry msix_entries[4];
@@ -342,16 +340,6 @@ static inline bool is_ioat_bug(unsigned long err)
 	return !!err;
 }
 
-static inline void ioat_unmap(struct pci_dev *pdev, dma_addr_t addr, size_t len,
-			      int direction, enum dma_ctrl_flags flags, bool dst)
-{
-	if ((dst && (flags & DMA_COMPL_DEST_UNMAP_SINGLE)) ||
-	    (!dst && (flags & DMA_COMPL_SRC_UNMAP_SINGLE)))
-		pci_unmap_single(pdev, addr, len, direction);
-	else
-		pci_unmap_page(pdev, addr, len, direction);
-}
-
 int ioat_probe(struct ioatdma_device *device);
 int ioat_register(struct ioatdma_device *device);
 int ioat1_dma_probe(struct ioatdma_device *dev, int dca);
@@ -363,8 +351,6 @@ void ioat_init_channel(struct ioatdma_device *device,
 		       struct ioat_chan_common *chan, int idx);
 enum dma_status ioat_dma_tx_status(struct dma_chan *c, dma_cookie_t cookie,
 				   struct dma_tx_state *txstate);
-void ioat_dma_unmap(struct ioat_chan_common *chan, enum dma_ctrl_flags flags,
-		    size_t len, struct ioat_dma_descriptor *hw);
 bool ioat_cleanup_preamble(struct ioat_chan_common *chan,
 			   dma_addr_t *phys_complete);
 void ioat_kobject_add(struct ioatdma_device *device, struct kobj_type *type);

+ 1 - 1
drivers/dma/ioat/dma_v2.c

@@ -148,7 +148,7 @@ static void __cleanup(struct ioat2_dma_chan *ioat, dma_addr_t phys_complete)
 		tx = &desc->txd;
 		dump_desc_dbg(ioat, desc);
 		if (tx->cookie) {
-			ioat_dma_unmap(chan, tx->flags, desc->len, desc->hw);
+			dma_descriptor_unmap(tx);
 			dma_cookie_complete(tx);
 			if (tx->callback) {
 				tx->callback(tx->callback_param);

+ 0 - 1
drivers/dma/ioat/dma_v2.h

@@ -157,7 +157,6 @@ static inline void ioat2_set_chainaddr(struct ioat2_dma_chan *ioat, u64 addr)
 
 int ioat2_dma_probe(struct ioatdma_device *dev, int dca);
 int ioat3_dma_probe(struct ioatdma_device *dev, int dca);
-void ioat3_dma_remove(struct ioatdma_device *dev);
 struct dca_provider *ioat2_dca_init(struct pci_dev *pdev, void __iomem *iobase);
 struct dca_provider *ioat3_dca_init(struct pci_dev *pdev, void __iomem *iobase);
 int ioat2_check_space_lock(struct ioat2_dma_chan *ioat, int num_descs);

+ 43 - 272
drivers/dma/ioat/dma_v3.c

@@ -67,6 +67,8 @@
 #include "dma.h"
 #include "dma_v2.h"
 
+extern struct kmem_cache *ioat3_sed_cache;
+
 /* ioat hardware assumes at least two sources for raid operations */
 #define src_cnt_to_sw(x) ((x) + 2)
 #define src_cnt_to_hw(x) ((x) - 2)
@@ -87,22 +89,8 @@ static const u8 pq_idx_to_field[] = { 1, 4, 5, 0, 1, 2, 4, 5 };
 static const u8 pq16_idx_to_field[] = { 1, 4, 1, 2, 3, 4, 5, 6, 7,
 					0, 1, 2, 3, 4, 5, 6 };
 
-/*
- * technically sources 1 and 2 do not require SED, but the op will have
- * at least 9 descriptors so that's irrelevant.
- */
-static const u8 pq16_idx_to_sed[] = { 0, 0, 0, 0, 0, 0, 0, 0, 0,
-				      1, 1, 1, 1, 1, 1, 1 };
-
 static void ioat3_eh(struct ioat2_dma_chan *ioat);
 
-static dma_addr_t xor_get_src(struct ioat_raw_descriptor *descs[2], int idx)
-{
-	struct ioat_raw_descriptor *raw = descs[xor_idx_to_desc >> idx & 1];
-
-	return raw->field[xor_idx_to_field[idx]];
-}
-
 static void xor_set_src(struct ioat_raw_descriptor *descs[2],
 			dma_addr_t addr, u32 offset, int idx)
 {
@@ -135,12 +123,6 @@ static void pq_set_src(struct ioat_raw_descriptor *descs[2],
 	pq->coef[idx] = coef;
 }
 
-static int sed_get_pq16_pool_idx(int src_cnt)
-{
-
-	return pq16_idx_to_sed[src_cnt];
-}
-
 static bool is_jf_ioat(struct pci_dev *pdev)
 {
 	switch (pdev->device) {
@@ -272,7 +254,7 @@ ioat3_alloc_sed(struct ioatdma_device *device, unsigned int hw_pool)
 	struct ioat_sed_ent *sed;
 	gfp_t flags = __GFP_ZERO | GFP_ATOMIC;
 
-	sed = kmem_cache_alloc(device->sed_pool, flags);
+	sed = kmem_cache_alloc(ioat3_sed_cache, flags);
 	if (!sed)
 		return NULL;
 
@@ -280,7 +262,7 @@ ioat3_alloc_sed(struct ioatdma_device *device, unsigned int hw_pool)
 	sed->hw = dma_pool_alloc(device->sed_hw_pool[hw_pool],
 				 flags, &sed->dma);
 	if (!sed->hw) {
-		kmem_cache_free(device->sed_pool, sed);
+		kmem_cache_free(ioat3_sed_cache, sed);
 		return NULL;
 	}
 
@@ -293,165 +275,7 @@ static void ioat3_free_sed(struct ioatdma_device *device, struct ioat_sed_ent *s
 		return;
 
 	dma_pool_free(device->sed_hw_pool[sed->hw_pool], sed->hw, sed->dma);
-	kmem_cache_free(device->sed_pool, sed);
-}
-
-static void ioat3_dma_unmap(struct ioat2_dma_chan *ioat,
-			    struct ioat_ring_ent *desc, int idx)
-{
-	struct ioat_chan_common *chan = &ioat->base;
-	struct pci_dev *pdev = chan->device->pdev;
-	size_t len = desc->len;
-	size_t offset = len - desc->hw->size;
-	struct dma_async_tx_descriptor *tx = &desc->txd;
-	enum dma_ctrl_flags flags = tx->flags;
-
-	switch (desc->hw->ctl_f.op) {
-	case IOAT_OP_COPY:
-		if (!desc->hw->ctl_f.null) /* skip 'interrupt' ops */
-			ioat_dma_unmap(chan, flags, len, desc->hw);
-		break;
-	case IOAT_OP_XOR_VAL:
-	case IOAT_OP_XOR: {
-		struct ioat_xor_descriptor *xor = desc->xor;
-		struct ioat_ring_ent *ext;
-		struct ioat_xor_ext_descriptor *xor_ex = NULL;
-		int src_cnt = src_cnt_to_sw(xor->ctl_f.src_cnt);
-		struct ioat_raw_descriptor *descs[2];
-		int i;
-
-		if (src_cnt > 5) {
-			ext = ioat2_get_ring_ent(ioat, idx + 1);
-			xor_ex = ext->xor_ex;
-		}
-
-		if (!(flags & DMA_COMPL_SKIP_SRC_UNMAP)) {
-			descs[0] = (struct ioat_raw_descriptor *) xor;
-			descs[1] = (struct ioat_raw_descriptor *) xor_ex;
-			for (i = 0; i < src_cnt; i++) {
-				dma_addr_t src = xor_get_src(descs, i);
-
-				ioat_unmap(pdev, src - offset, len,
-					   PCI_DMA_TODEVICE, flags, 0);
-			}
-
-			/* dest is a source in xor validate operations */
-			if (xor->ctl_f.op == IOAT_OP_XOR_VAL) {
-				ioat_unmap(pdev, xor->dst_addr - offset, len,
-					   PCI_DMA_TODEVICE, flags, 1);
-				break;
-			}
-		}
-
-		if (!(flags & DMA_COMPL_SKIP_DEST_UNMAP))
-			ioat_unmap(pdev, xor->dst_addr - offset, len,
-				   PCI_DMA_FROMDEVICE, flags, 1);
-		break;
-	}
-	case IOAT_OP_PQ_VAL:
-	case IOAT_OP_PQ: {
-		struct ioat_pq_descriptor *pq = desc->pq;
-		struct ioat_ring_ent *ext;
-		struct ioat_pq_ext_descriptor *pq_ex = NULL;
-		int src_cnt = src_cnt_to_sw(pq->ctl_f.src_cnt);
-		struct ioat_raw_descriptor *descs[2];
-		int i;
-
-		if (src_cnt > 3) {
-			ext = ioat2_get_ring_ent(ioat, idx + 1);
-			pq_ex = ext->pq_ex;
-		}
-
-		/* in the 'continue' case don't unmap the dests as sources */
-		if (dmaf_p_disabled_continue(flags))
-			src_cnt--;
-		else if (dmaf_continue(flags))
-			src_cnt -= 3;
-
-		if (!(flags & DMA_COMPL_SKIP_SRC_UNMAP)) {
-			descs[0] = (struct ioat_raw_descriptor *) pq;
-			descs[1] = (struct ioat_raw_descriptor *) pq_ex;
-			for (i = 0; i < src_cnt; i++) {
-				dma_addr_t src = pq_get_src(descs, i);
-
-				ioat_unmap(pdev, src - offset, len,
-					   PCI_DMA_TODEVICE, flags, 0);
-			}
-
-			/* the dests are sources in pq validate operations */
-			if (pq->ctl_f.op == IOAT_OP_XOR_VAL) {
-				if (!(flags & DMA_PREP_PQ_DISABLE_P))
-					ioat_unmap(pdev, pq->p_addr - offset,
-						   len, PCI_DMA_TODEVICE, flags, 0);
-				if (!(flags & DMA_PREP_PQ_DISABLE_Q))
-					ioat_unmap(pdev, pq->q_addr - offset,
-						   len, PCI_DMA_TODEVICE, flags, 0);
-				break;
-			}
-		}
-
-		if (!(flags & DMA_COMPL_SKIP_DEST_UNMAP)) {
-			if (!(flags & DMA_PREP_PQ_DISABLE_P))
-				ioat_unmap(pdev, pq->p_addr - offset, len,
-					   PCI_DMA_BIDIRECTIONAL, flags, 1);
-			if (!(flags & DMA_PREP_PQ_DISABLE_Q))
-				ioat_unmap(pdev, pq->q_addr - offset, len,
-					   PCI_DMA_BIDIRECTIONAL, flags, 1);
-		}
-		break;
-	}
-	case IOAT_OP_PQ_16S:
-	case IOAT_OP_PQ_VAL_16S: {
-		struct ioat_pq_descriptor *pq = desc->pq;
-		int src_cnt = src16_cnt_to_sw(pq->ctl_f.src_cnt);
-		struct ioat_raw_descriptor *descs[4];
-		int i;
-
-		/* in the 'continue' case don't unmap the dests as sources */
-		if (dmaf_p_disabled_continue(flags))
-			src_cnt--;
-		else if (dmaf_continue(flags))
-			src_cnt -= 3;
-
-		if (!(flags & DMA_COMPL_SKIP_SRC_UNMAP)) {
-			descs[0] = (struct ioat_raw_descriptor *)pq;
-			descs[1] = (struct ioat_raw_descriptor *)(desc->sed->hw);
-			descs[2] = (struct ioat_raw_descriptor *)(&desc->sed->hw->b[0]);
-			for (i = 0; i < src_cnt; i++) {
-				dma_addr_t src = pq16_get_src(descs, i);
-
-				ioat_unmap(pdev, src - offset, len,
-					   PCI_DMA_TODEVICE, flags, 0);
-			}
-
-			/* the dests are sources in pq validate operations */
-			if (pq->ctl_f.op == IOAT_OP_XOR_VAL) {
-				if (!(flags & DMA_PREP_PQ_DISABLE_P))
-					ioat_unmap(pdev, pq->p_addr - offset,
-						   len, PCI_DMA_TODEVICE,
-						   flags, 0);
-				if (!(flags & DMA_PREP_PQ_DISABLE_Q))
-					ioat_unmap(pdev, pq->q_addr - offset,
-						   len, PCI_DMA_TODEVICE,
-						   flags, 0);
-				break;
-			}
-		}
-
-		if (!(flags & DMA_COMPL_SKIP_DEST_UNMAP)) {
-			if (!(flags & DMA_PREP_PQ_DISABLE_P))
-				ioat_unmap(pdev, pq->p_addr - offset, len,
-					   PCI_DMA_BIDIRECTIONAL, flags, 1);
-			if (!(flags & DMA_PREP_PQ_DISABLE_Q))
-				ioat_unmap(pdev, pq->q_addr - offset, len,
-					   PCI_DMA_BIDIRECTIONAL, flags, 1);
-		}
-		break;
-	}
-	default:
-		dev_err(&pdev->dev, "%s: unknown op type: %#x\n",
-			__func__, desc->hw->ctl_f.op);
-	}
+	kmem_cache_free(ioat3_sed_cache, sed);
 }
 
 static bool desc_has_ext(struct ioat_ring_ent *desc)
@@ -577,7 +401,7 @@ static void __cleanup(struct ioat2_dma_chan *ioat, dma_addr_t phys_complete)
 		tx = &desc->txd;
 		if (tx->cookie) {
 			dma_cookie_complete(tx);
-			ioat3_dma_unmap(ioat, desc, idx + i);
+			dma_descriptor_unmap(tx);
 			if (tx->callback) {
 				tx->callback(tx->callback_param);
 				tx->callback = NULL;
@@ -1129,9 +953,6 @@ __ioat3_prep_pq16_lock(struct dma_chan *c, enum sum_check_flags *result,
 	u8 op;
 	int i, s, idx, num_descs;
 
-	/* this function only handles src_cnt 9 - 16 */
-	BUG_ON(src_cnt < 9);
-
 	/* this function is only called with 9-16 sources */
 	op = result ? IOAT_OP_PQ_VAL_16S : IOAT_OP_PQ_16S;
 
@@ -1159,8 +980,7 @@ __ioat3_prep_pq16_lock(struct dma_chan *c, enum sum_check_flags *result,
 
 		descs[0] = (struct ioat_raw_descriptor *) pq;
 
-		desc->sed = ioat3_alloc_sed(device,
-					    sed_get_pq16_pool_idx(src_cnt));
+		desc->sed = ioat3_alloc_sed(device, (src_cnt-2) >> 3);
 		if (!desc->sed) {
 			dev_err(to_dev(chan),
 				"%s: no free sed entries\n", __func__);
@@ -1218,13 +1038,21 @@ __ioat3_prep_pq16_lock(struct dma_chan *c, enum sum_check_flags *result,
 	return &desc->txd;
 }
 
+static int src_cnt_flags(unsigned int src_cnt, unsigned long flags)
+{
+	if (dmaf_p_disabled_continue(flags))
+		return src_cnt + 1;
+	else if (dmaf_continue(flags))
+		return src_cnt + 3;
+	else
+		return src_cnt;
+}
+
 static struct dma_async_tx_descriptor *
 ioat3_prep_pq(struct dma_chan *chan, dma_addr_t *dst, dma_addr_t *src,
 	      unsigned int src_cnt, const unsigned char *scf, size_t len,
 	      unsigned long flags)
 {
-	struct dma_device *dma = chan->device;
-
 	/* specify valid address for disabled result */
 	if (flags & DMA_PREP_PQ_DISABLE_P)
 		dst[0] = dst[1];
@@ -1244,7 +1072,7 @@ ioat3_prep_pq(struct dma_chan *chan, dma_addr_t *dst, dma_addr_t *src,
 		single_source_coef[0] = scf[0];
 		single_source_coef[1] = 0;
 
-		return (src_cnt > 8) && (dma->max_pq > 8) ?
+		return src_cnt_flags(src_cnt, flags) > 8 ?
 			__ioat3_prep_pq16_lock(chan, NULL, dst, single_source,
 					       2, single_source_coef, len,
 					       flags) :
@@ -1252,7 +1080,7 @@ ioat3_prep_pq(struct dma_chan *chan, dma_addr_t *dst, dma_addr_t *src,
 					     single_source_coef, len, flags);
 
 	} else {
-		return (src_cnt > 8) && (dma->max_pq > 8) ?
+		return src_cnt_flags(src_cnt, flags) > 8 ?
 			__ioat3_prep_pq16_lock(chan, NULL, dst, src, src_cnt,
 					       scf, len, flags) :
 			__ioat3_prep_pq_lock(chan, NULL, dst, src, src_cnt,
@@ -1265,8 +1093,6 @@ ioat3_prep_pq_val(struct dma_chan *chan, dma_addr_t *pq, dma_addr_t *src,
 		  unsigned int src_cnt, const unsigned char *scf, size_t len,
 		  enum sum_check_flags *pqres, unsigned long flags)
 {
-	struct dma_device *dma = chan->device;
-
 	/* specify valid address for disabled result */
 	if (flags & DMA_PREP_PQ_DISABLE_P)
 		pq[0] = pq[1];
@@ -1278,7 +1104,7 @@ ioat3_prep_pq_val(struct dma_chan *chan, dma_addr_t *pq, dma_addr_t *src,
 	 */
 	*pqres = 0;
 
-	return (src_cnt > 8) && (dma->max_pq > 8) ?
+	return src_cnt_flags(src_cnt, flags) > 8 ?
 		__ioat3_prep_pq16_lock(chan, pqres, pq, src, src_cnt, scf, len,
 				       flags) :
 		__ioat3_prep_pq_lock(chan, pqres, pq, src, src_cnt, scf, len,
@@ -1289,7 +1115,6 @@ static struct dma_async_tx_descriptor *
 ioat3_prep_pqxor(struct dma_chan *chan, dma_addr_t dst, dma_addr_t *src,
 		 unsigned int src_cnt, size_t len, unsigned long flags)
 {
-	struct dma_device *dma = chan->device;
 	unsigned char scf[src_cnt];
 	dma_addr_t pq[2];
 
@@ -1298,7 +1123,7 @@ ioat3_prep_pqxor(struct dma_chan *chan, dma_addr_t dst, dma_addr_t *src,
 	flags |= DMA_PREP_PQ_DISABLE_Q;
 	pq[1] = dst; /* specify valid address for disabled result */
 
-	return (src_cnt > 8) && (dma->max_pq > 8) ?
+	return src_cnt_flags(src_cnt, flags) > 8 ?
 		__ioat3_prep_pq16_lock(chan, NULL, pq, src, src_cnt, scf, len,
 				       flags) :
 		__ioat3_prep_pq_lock(chan, NULL, pq, src, src_cnt, scf, len,
@@ -1310,7 +1135,6 @@ ioat3_prep_pqxor_val(struct dma_chan *chan, dma_addr_t *src,
 		     unsigned int src_cnt, size_t len,
 		     enum sum_check_flags *result, unsigned long flags)
 {
-	struct dma_device *dma = chan->device;
 	unsigned char scf[src_cnt];
 	dma_addr_t pq[2];
 
@@ -1324,8 +1148,7 @@ ioat3_prep_pqxor_val(struct dma_chan *chan, dma_addr_t *src,
 	flags |= DMA_PREP_PQ_DISABLE_Q;
 	pq[1] = pq[0]; /* specify valid address for disabled result */
 
-
-	return (src_cnt > 8) && (dma->max_pq > 8) ?
+	return src_cnt_flags(src_cnt, flags) > 8 ?
 		__ioat3_prep_pq16_lock(chan, result, pq, &src[1], src_cnt - 1,
 				       scf, len, flags) :
 		__ioat3_prep_pq_lock(chan, result, pq, &src[1], src_cnt - 1,
@@ -1444,9 +1267,7 @@ static int ioat_xor_val_self_test(struct ioatdma_device *device)
 					   DMA_TO_DEVICE);
 	tx = dma->device_prep_dma_xor(dma_chan, dest_dma, dma_srcs,
 				      IOAT_NUM_SRC_TEST, PAGE_SIZE,
-				      DMA_PREP_INTERRUPT |
-				      DMA_COMPL_SKIP_SRC_UNMAP |
-				      DMA_COMPL_SKIP_DEST_UNMAP);
+				      DMA_PREP_INTERRUPT);
 
 	if (!tx) {
 		dev_err(dev, "Self-test xor prep failed\n");
@@ -1507,9 +1328,7 @@ static int ioat_xor_val_self_test(struct ioatdma_device *device)
 					   DMA_TO_DEVICE);
 	tx = dma->device_prep_dma_xor_val(dma_chan, dma_srcs,
 					  IOAT_NUM_SRC_TEST + 1, PAGE_SIZE,
-					  &xor_val_result, DMA_PREP_INTERRUPT |
-					  DMA_COMPL_SKIP_SRC_UNMAP |
-					  DMA_COMPL_SKIP_DEST_UNMAP);
+					  &xor_val_result, DMA_PREP_INTERRUPT);
 	if (!tx) {
 		dev_err(dev, "Self-test zero prep failed\n");
 		err = -ENODEV;
@@ -1545,6 +1364,8 @@ static int ioat_xor_val_self_test(struct ioatdma_device *device)
 		goto free_resources;
 	}
 
+	memset(page_address(dest), 0, PAGE_SIZE);
+
 	/* test for non-zero parity sum */
 	op = IOAT_OP_XOR_VAL;
 
@@ -1554,9 +1375,7 @@ static int ioat_xor_val_self_test(struct ioatdma_device *device)
 					   DMA_TO_DEVICE);
 	tx = dma->device_prep_dma_xor_val(dma_chan, dma_srcs,
 					  IOAT_NUM_SRC_TEST + 1, PAGE_SIZE,
-					  &xor_val_result, DMA_PREP_INTERRUPT |
-					  DMA_COMPL_SKIP_SRC_UNMAP |
-					  DMA_COMPL_SKIP_DEST_UNMAP);
+					  &xor_val_result, DMA_PREP_INTERRUPT);
 	if (!tx) {
 		dev_err(dev, "Self-test 2nd zero prep failed\n");
 		err = -ENODEV;
@@ -1630,52 +1449,36 @@ static int ioat3_dma_self_test(struct ioatdma_device *device)
 
 static int ioat3_irq_reinit(struct ioatdma_device *device)
 {
-	int msixcnt = device->common.chancnt;
 	struct pci_dev *pdev = device->pdev;
-	int i;
-	struct msix_entry *msix;
-	struct ioat_chan_common *chan;
-	int err = 0;
+	int irq = pdev->irq, i;
+
+	if (!is_bwd_ioat(pdev))
+		return 0;
 
 	switch (device->irq_mode) {
 	case IOAT_MSIX:
+		for (i = 0; i < device->common.chancnt; i++) {
+			struct msix_entry *msix = &device->msix_entries[i];
+			struct ioat_chan_common *chan;
 
-		for (i = 0; i < msixcnt; i++) {
-			msix = &device->msix_entries[i];
 			chan = ioat_chan_by_index(device, i);
 			devm_free_irq(&pdev->dev, msix->vector, chan);
 		}
 
 		pci_disable_msix(pdev);
 		break;
-
-	case IOAT_MSIX_SINGLE:
-		msix = &device->msix_entries[0];
-		chan = ioat_chan_by_index(device, 0);
-		devm_free_irq(&pdev->dev, msix->vector, chan);
-		pci_disable_msix(pdev);
-		break;
-
 	case IOAT_MSI:
-		chan = ioat_chan_by_index(device, 0);
-		devm_free_irq(&pdev->dev, pdev->irq, chan);
 		pci_disable_msi(pdev);
-		break;
-
+		/* fall through */
 	case IOAT_INTX:
-		chan = ioat_chan_by_index(device, 0);
-		devm_free_irq(&pdev->dev, pdev->irq, chan);
+		devm_free_irq(&pdev->dev, irq, device);
 		break;
-
 	default:
 		return 0;
 	}
-
 	device->irq_mode = IOAT_NOIRQ;
 
-	err = ioat_dma_setup_interrupts(device);
-
-	return err;
+	return ioat_dma_setup_interrupts(device);
 }
 
 static int ioat3_reset_hw(struct ioat_chan_common *chan)
@@ -1718,14 +1521,12 @@ static int ioat3_reset_hw(struct ioat_chan_common *chan)
 	}
 
 	err = ioat2_reset_sync(chan, msecs_to_jiffies(200));
-	if (err) {
-		dev_err(&pdev->dev, "Failed to reset!\n");
-		return err;
-	}
-
-	if (device->irq_mode != IOAT_NOIRQ && is_bwd_ioat(pdev))
+	if (!err)
 		err = ioat3_irq_reinit(device);
 
+	if (err)
+		dev_err(&pdev->dev, "Failed to reset: %d\n", err);
+
 	return err;
 }
 
@@ -1835,21 +1636,15 @@ int ioat3_dma_probe(struct ioatdma_device *device, int dca)
 		char pool_name[14];
 		int i;
 
-		/* allocate sw descriptor pool for SED */
-		device->sed_pool = kmem_cache_create("ioat_sed",
-				sizeof(struct ioat_sed_ent), 0, 0, NULL);
-		if (!device->sed_pool)
-			return -ENOMEM;
-
 		for (i = 0; i < MAX_SED_POOLS; i++) {
 			snprintf(pool_name, 14, "ioat_hw%d_sed", i);
 
 			/* allocate SED DMA pool */
-			device->sed_hw_pool[i] = dma_pool_create(pool_name,
+			device->sed_hw_pool[i] = dmam_pool_create(pool_name,
 					&pdev->dev,
 					SED_SIZE * (i + 1), 64, 0);
 			if (!device->sed_hw_pool[i])
-				goto sed_pool_cleanup;
+				return -ENOMEM;
 
 		}
 	}
@@ -1875,28 +1670,4 @@ int ioat3_dma_probe(struct ioatdma_device *device, int dca)
 		device->dca = ioat3_dca_init(pdev, device->reg_base);
 
 	return 0;
-
-sed_pool_cleanup:
-	if (device->sed_pool) {
-		int i;
-		kmem_cache_destroy(device->sed_pool);
-
-		for (i = 0; i < MAX_SED_POOLS; i++)
-			if (device->sed_hw_pool[i])
-				dma_pool_destroy(device->sed_hw_pool[i]);
-	}
-
-	return -ENOMEM;
-}
-
-void ioat3_dma_remove(struct ioatdma_device *device)
-{
-	if (device->sed_pool) {
-		int i;
-		kmem_cache_destroy(device->sed_pool);
-
-		for (i = 0; i < MAX_SED_POOLS; i++)
-			if (device->sed_hw_pool[i])
-				dma_pool_destroy(device->sed_hw_pool[i]);
-	}
 }

+ 15 - 5
drivers/dma/ioat/pci.c

@@ -123,6 +123,7 @@ module_param(ioat_dca_enabled, int, 0644);
 MODULE_PARM_DESC(ioat_dca_enabled, "control support of dca service (default: 1)");
 
 struct kmem_cache *ioat2_cache;
+struct kmem_cache *ioat3_sed_cache;
 
 #define DRV_NAME "ioatdma"
 
@@ -207,9 +208,6 @@ static void ioat_remove(struct pci_dev *pdev)
 	if (!device)
 		return;
 
-	if (device->version >= IOAT_VER_3_0)
-		ioat3_dma_remove(device);
-
 	dev_err(&pdev->dev, "Removing dma and dca services\n");
 	if (device->dca) {
 		unregister_dca_provider(device->dca, &pdev->dev);
@@ -221,7 +219,7 @@ static void ioat_remove(struct pci_dev *pdev)
 
 static int __init ioat_init_module(void)
 {
-	int err;
+	int err = -ENOMEM;
 
 	pr_info("%s: Intel(R) QuickData Technology Driver %s\n",
 		DRV_NAME, IOAT_DMA_VERSION);
@@ -231,9 +229,21 @@ static int __init ioat_init_module(void)
 	if (!ioat2_cache)
 		return -ENOMEM;
 
+	ioat3_sed_cache = KMEM_CACHE(ioat_sed_ent, 0);
+	if (!ioat3_sed_cache)
+		goto err_ioat2_cache;
+
 	err = pci_register_driver(&ioat_pci_driver);
 	if (err)
-		kmem_cache_destroy(ioat2_cache);
+		goto err_ioat3_cache;
+
+	return 0;
+
+ err_ioat3_cache:
+	kmem_cache_destroy(ioat3_sed_cache);
+
+ err_ioat2_cache:
+	kmem_cache_destroy(ioat2_cache);
 
 	return err;
 }

+ 3 - 94
drivers/dma/iop-adma.c

@@ -61,80 +61,6 @@ static void iop_adma_free_slots(struct iop_adma_desc_slot *slot)
 	}
 }
 
-static void
-iop_desc_unmap(struct iop_adma_chan *iop_chan, struct iop_adma_desc_slot *desc)
-{
-	struct dma_async_tx_descriptor *tx = &desc->async_tx;
-	struct iop_adma_desc_slot *unmap = desc->group_head;
-	struct device *dev = &iop_chan->device->pdev->dev;
-	u32 len = unmap->unmap_len;
-	enum dma_ctrl_flags flags = tx->flags;
-	u32 src_cnt;
-	dma_addr_t addr;
-	dma_addr_t dest;
-
-	src_cnt = unmap->unmap_src_cnt;
-	dest = iop_desc_get_dest_addr(unmap, iop_chan);
-	if (!(flags & DMA_COMPL_SKIP_DEST_UNMAP)) {
-		enum dma_data_direction dir;
-
-		if (src_cnt > 1) /* is xor? */
-			dir = DMA_BIDIRECTIONAL;
-		else
-			dir = DMA_FROM_DEVICE;
-
-		dma_unmap_page(dev, dest, len, dir);
-	}
-
-	if (!(flags & DMA_COMPL_SKIP_SRC_UNMAP)) {
-		while (src_cnt--) {
-			addr = iop_desc_get_src_addr(unmap, iop_chan, src_cnt);
-			if (addr == dest)
-				continue;
-			dma_unmap_page(dev, addr, len, DMA_TO_DEVICE);
-		}
-	}
-	desc->group_head = NULL;
-}
-
-static void
-iop_desc_unmap_pq(struct iop_adma_chan *iop_chan, struct iop_adma_desc_slot *desc)
-{
-	struct dma_async_tx_descriptor *tx = &desc->async_tx;
-	struct iop_adma_desc_slot *unmap = desc->group_head;
-	struct device *dev = &iop_chan->device->pdev->dev;
-	u32 len = unmap->unmap_len;
-	enum dma_ctrl_flags flags = tx->flags;
-	u32 src_cnt = unmap->unmap_src_cnt;
-	dma_addr_t pdest = iop_desc_get_dest_addr(unmap, iop_chan);
-	dma_addr_t qdest = iop_desc_get_qdest_addr(unmap, iop_chan);
-	int i;
-
-	if (tx->flags & DMA_PREP_CONTINUE)
-		src_cnt -= 3;
-
-	if (!(flags & DMA_COMPL_SKIP_DEST_UNMAP) && !desc->pq_check_result) {
-		dma_unmap_page(dev, pdest, len, DMA_BIDIRECTIONAL);
-		dma_unmap_page(dev, qdest, len, DMA_BIDIRECTIONAL);
-	}
-
-	if (!(flags & DMA_COMPL_SKIP_SRC_UNMAP)) {
-		dma_addr_t addr;
-
-		for (i = 0; i < src_cnt; i++) {
-			addr = iop_desc_get_src_addr(unmap, iop_chan, i);
-			dma_unmap_page(dev, addr, len, DMA_TO_DEVICE);
-		}
-		if (desc->pq_check_result) {
-			dma_unmap_page(dev, pdest, len, DMA_TO_DEVICE);
-			dma_unmap_page(dev, qdest, len, DMA_TO_DEVICE);
-		}
-	}
-
-	desc->group_head = NULL;
-}
-
-
 static dma_cookie_t
 iop_adma_run_tx_complete_actions(struct iop_adma_desc_slot *desc,
 	struct iop_adma_chan *iop_chan, dma_cookie_t cookie)
@@ -152,15 +78,9 @@ iop_adma_run_tx_complete_actions(struct iop_adma_desc_slot *desc,
 		if (tx->callback)
 			tx->callback(tx->callback_param);
 
-		/* unmap dma addresses
-		 * (unmap_single vs unmap_page?)
-		 */
-		if (desc->group_head && desc->unmap_len) {
-			if (iop_desc_is_pq(desc))
-				iop_desc_unmap_pq(iop_chan, desc);
-			else
-				iop_desc_unmap(iop_chan, desc);
-		}
+		dma_descriptor_unmap(tx);
+		if (desc->group_head)
+			desc->group_head = NULL;
 	}
 
 	/* run dependent operations */
@@ -591,7 +511,6 @@ iop_adma_prep_dma_interrupt(struct dma_chan *chan, unsigned long flags)
 	if (sw_desc) {
 		grp_start = sw_desc->group_head;
 		iop_desc_init_interrupt(grp_start, iop_chan);
-		grp_start->unmap_len = 0;
 		sw_desc->async_tx.flags = flags;
 	}
 	spin_unlock_bh(&iop_chan->lock);
@@ -623,8 +542,6 @@ iop_adma_prep_dma_memcpy(struct dma_chan *chan, dma_addr_t dma_dest,
 		iop_desc_set_byte_count(grp_start, iop_chan, len);
 		iop_desc_set_dest_addr(grp_start, iop_chan, dma_dest);
 		iop_desc_set_memcpy_src_addr(grp_start, dma_src);
-		sw_desc->unmap_src_cnt = 1;
-		sw_desc->unmap_len = len;
 		sw_desc->async_tx.flags = flags;
 	}
 	spin_unlock_bh(&iop_chan->lock);
@@ -657,8 +574,6 @@ iop_adma_prep_dma_xor(struct dma_chan *chan, dma_addr_t dma_dest,
 		iop_desc_init_xor(grp_start, src_cnt, flags);
 		iop_desc_set_byte_count(grp_start, iop_chan, len);
 		iop_desc_set_dest_addr(grp_start, iop_chan, dma_dest);
-		sw_desc->unmap_src_cnt = src_cnt;
-		sw_desc->unmap_len = len;
 		sw_desc->async_tx.flags = flags;
 		while (src_cnt--)
 			iop_desc_set_xor_src_addr(grp_start, src_cnt,
@@ -694,8 +609,6 @@ iop_adma_prep_dma_xor_val(struct dma_chan *chan, dma_addr_t *dma_src,
 		grp_start->xor_check_result = result;
 		pr_debug("\t%s: grp_start->xor_check_result: %p\n",
 			__func__, grp_start->xor_check_result);
-		sw_desc->unmap_src_cnt = src_cnt;
-		sw_desc->unmap_len = len;
 		sw_desc->async_tx.flags = flags;
 		while (src_cnt--)
 			iop_desc_set_zero_sum_src_addr(grp_start, src_cnt,
@@ -748,8 +661,6 @@ iop_adma_prep_dma_pq(struct dma_chan *chan, dma_addr_t *dst, dma_addr_t *src,
 			dst[0] = dst[1] & 0x7;
 
 		iop_desc_set_pq_addr(g, dst);
-		sw_desc->unmap_src_cnt = src_cnt;
-		sw_desc->unmap_len = len;
 		sw_desc->async_tx.flags = flags;
 		for (i = 0; i < src_cnt; i++)
 			iop_desc_set_pq_src_addr(g, i, src[i], scf[i]);
@@ -804,8 +715,6 @@ iop_adma_prep_dma_pq_val(struct dma_chan *chan, dma_addr_t *pq, dma_addr_t *src,
 		g->pq_check_result = pqres;
 		pr_debug("\t%s: g->pq_check_result: %p\n",
 			__func__, g->pq_check_result);
-		sw_desc->unmap_src_cnt = src_cnt+2;
-		sw_desc->unmap_len = len;
 		sw_desc->async_tx.flags = flags;
 		while (src_cnt--)
 			iop_desc_set_pq_zero_sum_src_addr(g, src_cnt,

+ 4 - 48
drivers/dma/mv_xor.c

@@ -60,14 +60,6 @@ static u32 mv_desc_get_dest_addr(struct mv_xor_desc_slot *desc)
 	return hw_desc->phy_dest_addr;
 }
 
-static u32 mv_desc_get_src_addr(struct mv_xor_desc_slot *desc,
-				int src_idx)
-{
-	struct mv_xor_desc *hw_desc = desc->hw_desc;
-	return hw_desc->phy_src_addr[mv_phy_src_idx(src_idx)];
-}
-
-
 static void mv_desc_set_byte_count(struct mv_xor_desc_slot *desc,
 				   u32 byte_count)
 {
@@ -278,42 +270,9 @@ mv_xor_run_tx_complete_actions(struct mv_xor_desc_slot *desc,
 			desc->async_tx.callback(
 				desc->async_tx.callback_param);
 
-		/* unmap dma addresses
-		 * (unmap_single vs unmap_page?)
-		 */
-		if (desc->group_head && desc->unmap_len) {
-			struct mv_xor_desc_slot *unmap = desc->group_head;
-			struct device *dev = mv_chan_to_devp(mv_chan);
-			u32 len = unmap->unmap_len;
-			enum dma_ctrl_flags flags = desc->async_tx.flags;
-			u32 src_cnt;
-			dma_addr_t addr;
-			dma_addr_t dest;
-
-			src_cnt = unmap->unmap_src_cnt;
-			dest = mv_desc_get_dest_addr(unmap);
-			if (!(flags & DMA_COMPL_SKIP_DEST_UNMAP)) {
-				enum dma_data_direction dir;
-
-				if (src_cnt > 1) /* is xor ? */
-					dir = DMA_BIDIRECTIONAL;
-				else
-					dir = DMA_FROM_DEVICE;
-				dma_unmap_page(dev, dest, len, dir);
-			}
-
-			if (!(flags & DMA_COMPL_SKIP_SRC_UNMAP)) {
-				while (src_cnt--) {
-					addr = mv_desc_get_src_addr(unmap,
-								    src_cnt);
-					if (addr == dest)
-						continue;
-					dma_unmap_page(dev, addr, len,
-						       DMA_TO_DEVICE);
-				}
-			}
+		dma_descriptor_unmap(&desc->async_tx);
+		if (desc->group_head)
 			desc->group_head = NULL;
-		}
 	}
 
 	/* run dependent operations */
@@ -1076,10 +1035,7 @@ mv_xor_channel_add(struct mv_xor_device *xordev,
 	}
 
 	mv_chan->mmr_base = xordev->xor_base;
-	if (!mv_chan->mmr_base) {
-		ret = -ENOMEM;
-		goto err_free_dma;
-	}
+	mv_chan->mmr_high_base = xordev->xor_high_base;
 	tasklet_init(&mv_chan->irq_tasklet, mv_xor_tasklet, (unsigned long)
 		     mv_chan);
 
@@ -1138,7 +1094,7 @@ static void
 mv_xor_conf_mbus_windows(struct mv_xor_device *xordev,
 			 const struct mbus_dram_target_info *dram)
 {
-	void __iomem *base = xordev->xor_base;
+	void __iomem *base = xordev->xor_high_base;
 	u32 win_enable = 0;
 	int i;
 

+ 13 - 12
drivers/dma/mv_xor.h

@@ -34,13 +34,13 @@
 #define XOR_OPERATION_MODE_MEMCPY	2
 #define XOR_DESCRIPTOR_SWAP		BIT(14)
 
-#define XOR_CURR_DESC(chan)	(chan->mmr_base + 0x210 + (chan->idx * 4))
-#define XOR_NEXT_DESC(chan)	(chan->mmr_base + 0x200 + (chan->idx * 4))
-#define XOR_BYTE_COUNT(chan)	(chan->mmr_base + 0x220 + (chan->idx * 4))
-#define XOR_DEST_POINTER(chan)	(chan->mmr_base + 0x2B0 + (chan->idx * 4))
-#define XOR_BLOCK_SIZE(chan)	(chan->mmr_base + 0x2C0 + (chan->idx * 4))
-#define XOR_INIT_VALUE_LOW(chan)	(chan->mmr_base + 0x2E0)
-#define XOR_INIT_VALUE_HIGH(chan)	(chan->mmr_base + 0x2E4)
+#define XOR_CURR_DESC(chan)	(chan->mmr_high_base + 0x10 + (chan->idx * 4))
+#define XOR_NEXT_DESC(chan)	(chan->mmr_high_base + 0x00 + (chan->idx * 4))
+#define XOR_BYTE_COUNT(chan)	(chan->mmr_high_base + 0x20 + (chan->idx * 4))
+#define XOR_DEST_POINTER(chan)	(chan->mmr_high_base + 0xB0 + (chan->idx * 4))
+#define XOR_BLOCK_SIZE(chan)	(chan->mmr_high_base + 0xC0 + (chan->idx * 4))
+#define XOR_INIT_VALUE_LOW(chan)	(chan->mmr_high_base + 0xE0)
+#define XOR_INIT_VALUE_HIGH(chan)	(chan->mmr_high_base + 0xE4)
 
 #define XOR_CONFIG(chan)	(chan->mmr_base + 0x10 + (chan->idx * 4))
 #define XOR_ACTIVATION(chan)	(chan->mmr_base + 0x20 + (chan->idx * 4))
@@ -50,11 +50,11 @@
 #define XOR_ERROR_ADDR(chan)	(chan->mmr_base + 0x60)
 #define XOR_INTR_MASK_VALUE	0x3F5
 
-#define WINDOW_BASE(w)		(0x250 + ((w) << 2))
-#define WINDOW_SIZE(w)		(0x270 + ((w) << 2))
-#define WINDOW_REMAP_HIGH(w)	(0x290 + ((w) << 2))
-#define WINDOW_BAR_ENABLE(chan)	(0x240 + ((chan) << 2))
-#define WINDOW_OVERRIDE_CTRL(chan)	(0x2A0 + ((chan) << 2))
+#define WINDOW_BASE(w)		(0x50 + ((w) << 2))
+#define WINDOW_SIZE(w)		(0x70 + ((w) << 2))
+#define WINDOW_REMAP_HIGH(w)	(0x90 + ((w) << 2))
+#define WINDOW_BAR_ENABLE(chan)	(0x40 + ((chan) << 2))
+#define WINDOW_OVERRIDE_CTRL(chan)	(0xA0 + ((chan) << 2))
 
 struct mv_xor_device {
 	void __iomem	     *xor_base;
@@ -82,6 +82,7 @@ struct mv_xor_chan {
 	int			pending;
 	spinlock_t		lock; /* protects the descriptor slot pool */
 	void __iomem		*mmr_base;
+	void __iomem		*mmr_high_base;
 	unsigned int		idx;
 	int                     irq;
 	enum dma_transaction_type	current_type;

+ 2 - 0
drivers/dma/pl330.c

@@ -2268,6 +2268,8 @@ static void pl330_tasklet(unsigned long data)
 			list_move_tail(&desc->node, &pch->dmac->desc_pool);
 		}
 
+		dma_descriptor_unmap(&desc->txd);
+
 		if (callback) {
 			spin_unlock_irqrestore(&pch->lock, flags);
 			callback(callback_param);

+ 1 - 269
drivers/dma/ppc4xx/adma.c

@@ -801,218 +801,6 @@ static void ppc440spe_desc_set_link(struct ppc440spe_adma_chan *chan,
 	local_irq_restore(flags);
 }
 
-/**
- * ppc440spe_desc_get_src_addr - extract the source address from the descriptor
- */
-static u32 ppc440spe_desc_get_src_addr(struct ppc440spe_adma_desc_slot *desc,
-				struct ppc440spe_adma_chan *chan, int src_idx)
-{
-	struct dma_cdb *dma_hw_desc;
-	struct xor_cb *xor_hw_desc;
-
-	switch (chan->device->id) {
-	case PPC440SPE_DMA0_ID:
-	case PPC440SPE_DMA1_ID:
-		dma_hw_desc = desc->hw_desc;
-		/* May have 0, 1, 2, or 3 sources */
-		switch (dma_hw_desc->opc) {
-		case DMA_CDB_OPC_NO_OP:
-		case DMA_CDB_OPC_DFILL128:
-			return 0;
-		case DMA_CDB_OPC_DCHECK128:
-			if (unlikely(src_idx)) {
-				printk(KERN_ERR "%s: try to get %d source for"
-				    " DCHECK128\n", __func__, src_idx);
-				BUG();
-			}
-			return le32_to_cpu(dma_hw_desc->sg1l);
-		case DMA_CDB_OPC_MULTICAST:
-		case DMA_CDB_OPC_MV_SG1_SG2:
-			if (unlikely(src_idx > 2)) {
-				printk(KERN_ERR "%s: try to get %d source from"
-				    " DMA descr\n", __func__, src_idx);
-				BUG();
-			}
-			if (src_idx) {
-				if (le32_to_cpu(dma_hw_desc->sg1u) &
-				    DMA_CUED_XOR_WIN_MSK) {
-					u8 region;
-
-					if (src_idx == 1)
-						return le32_to_cpu(
-						    dma_hw_desc->sg1l) +
-							desc->unmap_len;
-
-					region = (le32_to_cpu(
-					    dma_hw_desc->sg1u)) >>
-						DMA_CUED_REGION_OFF;
-
-					region &= DMA_CUED_REGION_MSK;
-					switch (region) {
-					case DMA_RXOR123:
-						return le32_to_cpu(
-						    dma_hw_desc->sg1l) +
-							(desc->unmap_len << 1);
-					case DMA_RXOR124:
-						return le32_to_cpu(
-						    dma_hw_desc->sg1l) +
-							(desc->unmap_len * 3);
-					case DMA_RXOR125:
-						return le32_to_cpu(
-						    dma_hw_desc->sg1l) +
-							(desc->unmap_len << 2);
-					default:
-						printk(KERN_ERR
-						    "%s: try to"
-						    " get src3 for region %02x"
-						    "PPC440SPE_DESC_RXOR12?\n",
-						    __func__, region);
-						BUG();
-					}
-				} else {
-					printk(KERN_ERR
-						"%s: try to get %d"
-						" source for non-cued descr\n",
-						__func__, src_idx);
-					BUG();
-				}
-			}
-			return le32_to_cpu(dma_hw_desc->sg1l);
-		default:
-			printk(KERN_ERR "%s: unknown OPC 0x%02x\n",
-				__func__, dma_hw_desc->opc);
-			BUG();
-		}
-		return le32_to_cpu(dma_hw_desc->sg1l);
-	case PPC440SPE_XOR_ID:
-		/* May have up to 16 sources */
-		xor_hw_desc = desc->hw_desc;
-		return xor_hw_desc->ops[src_idx].l;
-	}
-	return 0;
-}
-
-/**
- * ppc440spe_desc_get_dest_addr - extract the destination address from the
- * descriptor
- */
-static u32 ppc440spe_desc_get_dest_addr(struct ppc440spe_adma_desc_slot *desc,
-				struct ppc440spe_adma_chan *chan, int idx)
-{
-	struct dma_cdb *dma_hw_desc;
-	struct xor_cb *xor_hw_desc;
-
-	switch (chan->device->id) {
-	case PPC440SPE_DMA0_ID:
-	case PPC440SPE_DMA1_ID:
-		dma_hw_desc = desc->hw_desc;
-
-		if (likely(!idx))
-			return le32_to_cpu(dma_hw_desc->sg2l);
-		return le32_to_cpu(dma_hw_desc->sg3l);
-	case PPC440SPE_XOR_ID:
-		xor_hw_desc = desc->hw_desc;
-		return xor_hw_desc->cbtal;
-	}
-	return 0;
-}
-
-/**
- * ppc440spe_desc_get_src_num - extract the number of source addresses from
- * the descriptor
- */
-static u32 ppc440spe_desc_get_src_num(struct ppc440spe_adma_desc_slot *desc,
-				struct ppc440spe_adma_chan *chan)
-{
-	struct dma_cdb *dma_hw_desc;
-	struct xor_cb *xor_hw_desc;
-
-	switch (chan->device->id) {
-	case PPC440SPE_DMA0_ID:
-	case PPC440SPE_DMA1_ID:
-		dma_hw_desc = desc->hw_desc;
-
-		switch (dma_hw_desc->opc) {
-		case DMA_CDB_OPC_NO_OP:
-		case DMA_CDB_OPC_DFILL128:
-			return 0;
-		case DMA_CDB_OPC_DCHECK128:
-			return 1;
-		case DMA_CDB_OPC_MV_SG1_SG2:
-		case DMA_CDB_OPC_MULTICAST:
-			/*
-			 * Only for RXOR operations we have more than
-			 * one source
-			 */
-			if (le32_to_cpu(dma_hw_desc->sg1u) &
-			    DMA_CUED_XOR_WIN_MSK) {
-				/* RXOR op, there are 2 or 3 sources */
-				if (((le32_to_cpu(dma_hw_desc->sg1u) >>
-				    DMA_CUED_REGION_OFF) &
-				      DMA_CUED_REGION_MSK) == DMA_RXOR12) {
-					/* RXOR 1-2 */
-					return 2;
-				} else {
-					/* RXOR 1-2-3/1-2-4/1-2-5 */
-					return 3;
-				}
-			}
-			return 1;
-		default:
-			printk(KERN_ERR "%s: unknown OPC 0x%02x\n",
-				__func__, dma_hw_desc->opc);
-			BUG();
-		}
-	case PPC440SPE_XOR_ID:
-		/* up to 16 sources */
-		xor_hw_desc = desc->hw_desc;
-		return xor_hw_desc->cbc & XOR_CDCR_OAC_MSK;
-	default:
-		BUG();
-	}
-	return 0;
-}
-
-/**
- * ppc440spe_desc_get_dst_num - get the number of destination addresses in
- * this descriptor
- */
-static u32 ppc440spe_desc_get_dst_num(struct ppc440spe_adma_desc_slot *desc,
-				struct ppc440spe_adma_chan *chan)
-{
-	struct dma_cdb *dma_hw_desc;
-
-	switch (chan->device->id) {
-	case PPC440SPE_DMA0_ID:
-	case PPC440SPE_DMA1_ID:
-		/* May be 1 or 2 destinations */
-		dma_hw_desc = desc->hw_desc;
-		switch (dma_hw_desc->opc) {
-		case DMA_CDB_OPC_NO_OP:
-		case DMA_CDB_OPC_DCHECK128:
-			return 0;
-		case DMA_CDB_OPC_MV_SG1_SG2:
-		case DMA_CDB_OPC_DFILL128:
-			return 1;
-		case DMA_CDB_OPC_MULTICAST:
-			if (desc->dst_cnt == 2)
-				return 2;
-			else
-				return 1;
-		default:
-			printk(KERN_ERR "%s: unknown OPC 0x%02x\n",
-				__func__, dma_hw_desc->opc);
-			BUG();
-		}
-	case PPC440SPE_XOR_ID:
-		/* Always only 1 destination */
-		return 1;
-	default:
-		BUG();
-	}
-	return 0;
-}
-
 /**
  * ppc440spe_desc_get_link - get the address of the descriptor that
  * follows this one
@@ -1705,43 +1493,6 @@ static void ppc440spe_adma_free_slots(struct ppc440spe_adma_desc_slot *slot,
 	}
 }
 
-static void ppc440spe_adma_unmap(struct ppc440spe_adma_chan *chan,
-				 struct ppc440spe_adma_desc_slot *desc)
-{
-	u32 src_cnt, dst_cnt;
-	dma_addr_t addr;
-
-	/*
-	 * get the number of sources & destination
-	 * included in this descriptor and unmap
-	 * them all
-	 */
-	src_cnt = ppc440spe_desc_get_src_num(desc, chan);
-	dst_cnt = ppc440spe_desc_get_dst_num(desc, chan);
-
-	/* unmap destinations */
-	if (!(desc->async_tx.flags & DMA_COMPL_SKIP_DEST_UNMAP)) {
-		while (dst_cnt--) {
-			addr = ppc440spe_desc_get_dest_addr(
-				desc, chan, dst_cnt);
-			dma_unmap_page(chan->device->dev,
-					addr, desc->unmap_len,
-					DMA_FROM_DEVICE);
-		}
-	}
-
-	/* unmap sources */
-	if (!(desc->async_tx.flags & DMA_COMPL_SKIP_SRC_UNMAP)) {
-		while (src_cnt--) {
-			addr = ppc440spe_desc_get_src_addr(
-				desc, chan, src_cnt);
-			dma_unmap_page(chan->device->dev,
-					addr, desc->unmap_len,
-					DMA_TO_DEVICE);
-		}
-	}
-}
-
 /**
  * ppc440spe_adma_run_tx_complete_actions - call functions to be called
  * upon completion
@@ -1765,26 +1516,7 @@ static dma_cookie_t ppc440spe_adma_run_tx_complete_actions(
 			desc->async_tx.callback(
 				desc->async_tx.callback_param);
 
-		/* unmap dma addresses
-		 * (unmap_single vs unmap_page?)
-		 *
-		 * actually, ppc's dma_unmap_page() functions are empty, so
-		 * the following code is just for the sake of completeness
-		 */
-		if (chan && chan->needs_unmap && desc->group_head &&
-		     desc->unmap_len) {
-			struct ppc440spe_adma_desc_slot *unmap =
-							desc->group_head;
-			/* assume 1 slot per op always */
-			u32 slot_count = unmap->slot_cnt;
-
-			/* Run through the group list and unmap addresses */
-			for (i = 0; i < slot_count; i++) {
-				BUG_ON(!unmap);
-				ppc440spe_adma_unmap(chan, unmap);
-				unmap = unmap->hw_next;
-			}
-		}
+		dma_descriptor_unmap(&desc->async_tx);
 	}
 
 	/* run dependent operations */

+ 1 - 36
drivers/dma/timb_dma.c

@@ -154,38 +154,6 @@ static bool __td_dma_done_ack(struct timb_dma_chan *td_chan)
 	return done;
 }
 
-static void __td_unmap_desc(struct timb_dma_chan *td_chan, const u8 *dma_desc,
-	bool single)
-{
-	dma_addr_t addr;
-	int len;
-
-	addr = (dma_desc[7] << 24) | (dma_desc[6] << 16) | (dma_desc[5] << 8) |
-		dma_desc[4];
-
-	len = (dma_desc[3] << 8) | dma_desc[2];
-
-	if (single)
-		dma_unmap_single(chan2dev(&td_chan->chan), addr, len,
-			DMA_TO_DEVICE);
-	else
-		dma_unmap_page(chan2dev(&td_chan->chan), addr, len,
-			DMA_TO_DEVICE);
-}
-
-static void __td_unmap_descs(struct timb_dma_desc *td_desc, bool single)
-{
-	struct timb_dma_chan *td_chan = container_of(td_desc->txd.chan,
-		struct timb_dma_chan, chan);
-	u8 *descs;
-
-	for (descs = td_desc->desc_list; ; descs += TIMB_DMA_DESC_SIZE) {
-		__td_unmap_desc(td_chan, descs, single);
-		if (descs[0] & 0x02)
-			break;
-	}
-}
-
 static int td_fill_desc(struct timb_dma_chan *td_chan, u8 *dma_desc,
 	struct scatterlist *sg, bool last)
 {
@@ -293,10 +261,7 @@ static void __td_finish(struct timb_dma_chan *td_chan)
 
 	list_move(&td_desc->desc_node, &td_chan->free_list);
 
-	if (!(txd->flags & DMA_COMPL_SKIP_SRC_UNMAP))
-		__td_unmap_descs(td_desc,
-			txd->flags & DMA_COMPL_SRC_UNMAP_SINGLE);
-
+	dma_descriptor_unmap(txd);
 	/*
 	 * The API requires that no submissions are done from a
 	 * callback, so we don't need to drop the lock here

+ 1 - 24
drivers/dma/txx9dmac.c

@@ -419,30 +419,7 @@ txx9dmac_descriptor_complete(struct txx9dmac_chan *dc,
 	list_splice_init(&desc->tx_list, &dc->free_list);
 	list_move(&desc->desc_node, &dc->free_list);
 
-	if (!ds) {
-		dma_addr_t dmaaddr;
-		if (!(txd->flags & DMA_COMPL_SKIP_DEST_UNMAP)) {
-			dmaaddr = is_dmac64(dc) ?
-				desc->hwdesc.DAR : desc->hwdesc32.DAR;
-			if (txd->flags & DMA_COMPL_DEST_UNMAP_SINGLE)
-				dma_unmap_single(chan2parent(&dc->chan),
-					dmaaddr, desc->len, DMA_FROM_DEVICE);
-			else
-				dma_unmap_page(chan2parent(&dc->chan),
-					dmaaddr, desc->len, DMA_FROM_DEVICE);
-		}
-		if (!(txd->flags & DMA_COMPL_SKIP_SRC_UNMAP)) {
-			dmaaddr = is_dmac64(dc) ?
-				desc->hwdesc.SAR : desc->hwdesc32.SAR;
-			if (txd->flags & DMA_COMPL_SRC_UNMAP_SINGLE)
-				dma_unmap_single(chan2parent(&dc->chan),
-					dmaaddr, desc->len, DMA_TO_DEVICE);
-			else
-				dma_unmap_page(chan2parent(&dc->chan),
-					dmaaddr, desc->len, DMA_TO_DEVICE);
-		}
-	}
-
+	dma_descriptor_unmap(txd);
 	/*
 	 * The API requires that no submissions are done from a
 	 * callback, so we don't need to drop the lock here

+ 1 - 2
drivers/media/platform/m2m-deinterlace.c

@@ -341,8 +341,7 @@ static void deinterlace_issue_dma(struct deinterlace_ctx *ctx, int op,
 	ctx->xt->dir = DMA_MEM_TO_MEM;
 	ctx->xt->src_sgl = false;
 	ctx->xt->dst_sgl = true;
-	flags = DMA_CTRL_ACK | DMA_PREP_INTERRUPT |
-		DMA_COMPL_SKIP_DEST_UNMAP | DMA_COMPL_SKIP_SRC_UNMAP;
+	flags = DMA_CTRL_ACK | DMA_PREP_INTERRUPT;
 
 	tx = dmadev->device_prep_interleaved_dma(chan, ctx->xt, flags);
 	if (tx == NULL) {

+ 1 - 1
drivers/media/platform/timblogiw.c

@@ -565,7 +565,7 @@ static void buffer_queue(struct videobuf_queue *vq, struct videobuf_buffer *vb)
 
 	desc = dmaengine_prep_slave_sg(fh->chan,
 		buf->sg, sg_elems, DMA_DEV_TO_MEM,
-		DMA_PREP_INTERRUPT | DMA_COMPL_SKIP_SRC_UNMAP);
+		DMA_PREP_INTERRUPT);
 	if (!desc) {
 		spin_lock_irq(&fh->queue_lock);
 		list_del_init(&vb->queue);

+ 1 - 2
drivers/misc/carma/carma-fpga.c

@@ -631,8 +631,7 @@ static int data_submit_dma(struct fpga_device *priv, struct data_buf *buf)
 	struct dma_async_tx_descriptor *tx;
 	dma_cookie_t cookie;
 	dma_addr_t dst, src;
-	unsigned long dma_flags = DMA_COMPL_SKIP_DEST_UNMAP |
-				  DMA_COMPL_SKIP_SRC_UNMAP;
+	unsigned long dma_flags = 0;
 
 	dst_sg = buf->vb.sglist;
 	dst_nents = buf->vb.sglen;

+ 1 - 2
drivers/mtd/nand/atmel_nand.c

@@ -375,8 +375,7 @@ static int atmel_nand_dma_op(struct mtd_info *mtd, void *buf, int len,
 
 	dma_dev = host->dma_chan->device;
 
-	flags = DMA_CTRL_ACK | DMA_PREP_INTERRUPT | DMA_COMPL_SKIP_SRC_UNMAP |
-		DMA_COMPL_SKIP_DEST_UNMAP;
+	flags = DMA_CTRL_ACK | DMA_PREP_INTERRUPT;
 
 	phys_addr = dma_map_single(dma_dev->dev, p, len, dir);
 	if (dma_mapping_error(dma_dev->dev, phys_addr)) {

+ 0 - 2
drivers/mtd/nand/fsmc_nand.c

@@ -573,8 +573,6 @@ static int dma_xfer(struct fsmc_nand_data *host, void *buffer, int len,
 	dma_dev = chan->device;
 	dma_addr = dma_map_single(dma_dev->dev, buffer, len, direction);
 
-	flags |= DMA_COMPL_SKIP_SRC_UNMAP | DMA_COMPL_SKIP_DEST_UNMAP;
-
 	if (direction == DMA_TO_DEVICE) {
 		dma_src = dma_addr;
 		dma_dst = host->data_pa;

+ 2 - 4
drivers/net/ethernet/micrel/ks8842.c

@@ -459,8 +459,7 @@ static int ks8842_tx_frame_dma(struct sk_buff *skb, struct net_device *netdev)
 		sg_dma_len(&ctl->sg) += 4 - sg_dma_len(&ctl->sg) % 4;
 
 	ctl->adesc = dmaengine_prep_slave_sg(ctl->chan,
-		&ctl->sg, 1, DMA_MEM_TO_DEV,
-		DMA_PREP_INTERRUPT | DMA_COMPL_SKIP_SRC_UNMAP);
+		&ctl->sg, 1, DMA_MEM_TO_DEV, DMA_PREP_INTERRUPT);
 	if (!ctl->adesc)
 		return NETDEV_TX_BUSY;
 
@@ -571,8 +570,7 @@ static int __ks8842_start_new_rx_dma(struct net_device *netdev)
 		sg_dma_len(sg) = DMA_BUFFER_SIZE;
 
 		ctl->adesc = dmaengine_prep_slave_sg(ctl->chan,
-			sg, 1, DMA_DEV_TO_MEM,
-			DMA_PREP_INTERRUPT | DMA_COMPL_SKIP_SRC_UNMAP);
+			sg, 1, DMA_DEV_TO_MEM, DMA_PREP_INTERRUPT);
 
 		if (!ctl->adesc)
 			goto out;

+ 56 - 30
drivers/ntb/ntb_transport.c

@@ -1034,10 +1034,9 @@ static void ntb_async_rx(struct ntb_queue_entry *entry, void *offset,
 	struct dma_chan *chan = qp->dma_chan;
 	struct dma_device *device;
 	size_t pay_off, buff_off;
-	dma_addr_t src, dest;
+	struct dmaengine_unmap_data *unmap;
 	dma_cookie_t cookie;
 	void *buf = entry->buf;
-	unsigned long flags;
 
 	entry->len = len;
 
@@ -1045,35 +1044,49 @@ static void ntb_async_rx(struct ntb_queue_entry *entry, void *offset,
 		goto err;
 
 	if (len < copy_bytes) 
-		goto err1;
+		goto err_wait;
 
 	device = chan->device;
 	pay_off = (size_t) offset & ~PAGE_MASK;
 	buff_off = (size_t) buf & ~PAGE_MASK;
 
 	if (!is_dma_copy_aligned(device, pay_off, buff_off, len))
-		goto err1;
+		goto err_wait;
 
-	dest = dma_map_single(device->dev, buf, len, DMA_FROM_DEVICE);
-	if (dma_mapping_error(device->dev, dest))
-		goto err1;
+	unmap = dmaengine_get_unmap_data(device->dev, 2, GFP_NOWAIT);
+	if (!unmap)
+		goto err_wait;
 
-	src = dma_map_single(device->dev, offset, len, DMA_TO_DEVICE);
-	if (dma_mapping_error(device->dev, src))
-		goto err2;
+	unmap->len = len;
+	unmap->addr[0] = dma_map_page(device->dev, virt_to_page(offset),
+				      pay_off, len, DMA_TO_DEVICE);
+	if (dma_mapping_error(device->dev, unmap->addr[0]))
+		goto err_get_unmap;
+
+	unmap->to_cnt = 1;
 
-	flags = DMA_COMPL_DEST_UNMAP_SINGLE | DMA_COMPL_SRC_UNMAP_SINGLE |
-		DMA_PREP_INTERRUPT;
-	txd = device->device_prep_dma_memcpy(chan, dest, src, len, flags);
+	unmap->addr[1] = dma_map_page(device->dev, virt_to_page(buf),
+				      buff_off, len, DMA_FROM_DEVICE);
+	if (dma_mapping_error(device->dev, unmap->addr[1]))
+		goto err_get_unmap;
+
+	unmap->from_cnt = 1;
+
+	txd = device->device_prep_dma_memcpy(chan, unmap->addr[1],
+					     unmap->addr[0], len,
+					     DMA_PREP_INTERRUPT);
 	if (!txd)
-		goto err3;
+		goto err_get_unmap;
 
 	txd->callback = ntb_rx_copy_callback;
 	txd->callback_param = entry;
+	dma_set_unmap(txd, unmap);
 
 	cookie = dmaengine_submit(txd);
 	if (dma_submit_error(cookie))
-		goto err3;
+		goto err_set_unmap;
+
+	dmaengine_unmap_put(unmap);
 
 	qp->last_cookie = cookie;
 
@@ -1081,11 +1094,11 @@ static void ntb_async_rx(struct ntb_queue_entry *entry, void *offset,
 
 	return;
 
-err3:
-	dma_unmap_single(device->dev, src, len, DMA_TO_DEVICE);
-err2:
-	dma_unmap_single(device->dev, dest, len, DMA_FROM_DEVICE);
-err1:
+err_set_unmap:
+	dmaengine_unmap_put(unmap);
+err_get_unmap:
+	dmaengine_unmap_put(unmap);
+err_wait:
 	/* If the callbacks come out of order, the writing of the index to the
 	 * last completed will be out of order.  This may result in the
 	 * receive stalling forever.
@@ -1245,12 +1258,12 @@ static void ntb_async_tx(struct ntb_transport_qp *qp,
 	struct dma_chan *chan = qp->dma_chan;
 	struct dma_device *device;
 	size_t dest_off, buff_off;
-	dma_addr_t src, dest;
+	struct dmaengine_unmap_data *unmap;
+	dma_addr_t dest;
 	dma_cookie_t cookie;
 	void __iomem *offset;
 	size_t len = entry->len;
 	void *buf = entry->buf;
-	unsigned long flags;
 
 	offset = qp->tx_mw + qp->tx_max_frame * qp->tx_index;
 	hdr = offset + qp->tx_max_frame - sizeof(struct ntb_payload_header);
@@ -1273,28 +1286,41 @@ static void ntb_async_tx(struct ntb_transport_qp *qp,
 	if (!is_dma_copy_aligned(device, buff_off, dest_off, len))
 		goto err;
 
-	src = dma_map_single(device->dev, buf, len, DMA_TO_DEVICE);
-	if (dma_mapping_error(device->dev, src))
+	unmap = dmaengine_get_unmap_data(device->dev, 1, GFP_NOWAIT);
+	if (!unmap)
 		goto err;
 
-	flags = DMA_COMPL_SRC_UNMAP_SINGLE | DMA_PREP_INTERRUPT;
-	txd = device->device_prep_dma_memcpy(chan, dest, src, len, flags);
+	unmap->len = len;
+	unmap->addr[0] = dma_map_page(device->dev, virt_to_page(buf),
+				      buff_off, len, DMA_TO_DEVICE);
+	if (dma_mapping_error(device->dev, unmap->addr[0]))
+		goto err_get_unmap;
+
+	unmap->to_cnt = 1;
+
+	txd = device->device_prep_dma_memcpy(chan, dest, unmap->addr[0], len,
+					     DMA_PREP_INTERRUPT);
 	if (!txd)
-		goto err1;
+		goto err_get_unmap;
 
 	txd->callback = ntb_tx_copy_callback;
 	txd->callback_param = entry;
+	dma_set_unmap(txd, unmap);
 
 	cookie = dmaengine_submit(txd);
 	if (dma_submit_error(cookie))
-		goto err1;
+		goto err_set_unmap;
+
+	dmaengine_unmap_put(unmap);
 
 	dma_async_issue_pending(chan);
 	qp->tx_async++;
 
 	return;
-err1:
-	dma_unmap_single(device->dev, src, len, DMA_TO_DEVICE);
+err_set_unmap:
+	dmaengine_unmap_put(unmap);
+err_get_unmap:
+	dmaengine_unmap_put(unmap);
 err:
 	ntb_memcpy_tx(entry, offset);
 	qp->tx_memcpy++;

+ 2 - 2
drivers/spi/spi-dw-mid.c

@@ -150,7 +150,7 @@ static int mid_spi_dma_transfer(struct dw_spi *dws, int cs_change)
 				&dws->tx_sgl,
 				1,
 				DMA_MEM_TO_DEV,
-				DMA_PREP_INTERRUPT | DMA_COMPL_SKIP_DEST_UNMAP);
+				DMA_PREP_INTERRUPT);
 	txdesc->callback = dw_spi_dma_done;
 	txdesc->callback_param = dws;
 
@@ -173,7 +173,7 @@ static int mid_spi_dma_transfer(struct dw_spi *dws, int cs_change)
 				&dws->rx_sgl,
 				1,
 				DMA_DEV_TO_MEM,
-				DMA_PREP_INTERRUPT | DMA_COMPL_SKIP_DEST_UNMAP);
+				DMA_PREP_INTERRUPT);
 	rxdesc->callback = dw_spi_dma_done;
 	rxdesc->callback_param = dws;
 

+ 50 - 14
include/linux/dmaengine.h

@@ -171,12 +171,6 @@ struct dma_interleaved_template {
  * @DMA_CTRL_ACK - if clear, the descriptor cannot be reused until the client
  *  acknowledges receipt, i.e. has has a chance to establish any dependency
  *  chains
- * @DMA_COMPL_SKIP_SRC_UNMAP - set to disable dma-unmapping the source buffer(s)
- * @DMA_COMPL_SKIP_DEST_UNMAP - set to disable dma-unmapping the destination(s)
- * @DMA_COMPL_SRC_UNMAP_SINGLE - set to do the source dma-unmapping as single
- * 	(if not set, do the source dma-unmapping as page)
- * @DMA_COMPL_DEST_UNMAP_SINGLE - set to do the destination dma-unmapping as single
- * 	(if not set, do the destination dma-unmapping as page)
  * @DMA_PREP_PQ_DISABLE_P - prevent generation of P while generating Q
  * @DMA_PREP_PQ_DISABLE_Q - prevent generation of Q while generating P
  * @DMA_PREP_CONTINUE - indicate to a driver that it is reusing buffers as
@@ -188,14 +182,10 @@ struct dma_interleaved_template {
 enum dma_ctrl_flags {
 	DMA_PREP_INTERRUPT = (1 << 0),
 	DMA_CTRL_ACK = (1 << 1),
-	DMA_COMPL_SKIP_SRC_UNMAP = (1 << 2),
-	DMA_COMPL_SKIP_DEST_UNMAP = (1 << 3),
-	DMA_COMPL_SRC_UNMAP_SINGLE = (1 << 4),
-	DMA_COMPL_DEST_UNMAP_SINGLE = (1 << 5),
-	DMA_PREP_PQ_DISABLE_P = (1 << 6),
-	DMA_PREP_PQ_DISABLE_Q = (1 << 7),
-	DMA_PREP_CONTINUE = (1 << 8),
-	DMA_PREP_FENCE = (1 << 9),
+	DMA_PREP_PQ_DISABLE_P = (1 << 2),
+	DMA_PREP_PQ_DISABLE_Q = (1 << 3),
+	DMA_PREP_CONTINUE = (1 << 4),
+	DMA_PREP_FENCE = (1 << 5),
 };
 
 /**
@@ -413,6 +403,17 @@ void dma_chan_cleanup(struct kref *kref);
 typedef bool (*dma_filter_fn)(struct dma_chan *chan, void *filter_param);
 
 typedef void (*dma_async_tx_callback)(void *dma_async_param);
+
+struct dmaengine_unmap_data {
+	u8 to_cnt;
+	u8 from_cnt;
+	u8 bidi_cnt;
+	struct device *dev;
+	struct kref kref;
+	size_t len;
+	dma_addr_t addr[0];
+};
+
 /**
  * struct dma_async_tx_descriptor - async transaction descriptor
  * ---dma generic offload fields---
@@ -438,6 +439,7 @@ struct dma_async_tx_descriptor {
 	dma_cookie_t (*tx_submit)(struct dma_async_tx_descriptor *tx);
 	dma_async_tx_callback callback;
 	void *callback_param;
+	struct dmaengine_unmap_data *unmap;
 #ifdef CONFIG_ASYNC_TX_ENABLE_CHANNEL_SWITCH
 	struct dma_async_tx_descriptor *next;
 	struct dma_async_tx_descriptor *parent;
@@ -445,6 +447,40 @@ struct dma_async_tx_descriptor {
 #endif
 };
 
+#ifdef CONFIG_DMA_ENGINE
+static inline void dma_set_unmap(struct dma_async_tx_descriptor *tx,
+				 struct dmaengine_unmap_data *unmap)
+{
+	kref_get(&unmap->kref);
+	tx->unmap = unmap;
+}
+
+struct dmaengine_unmap_data *
+dmaengine_get_unmap_data(struct device *dev, int nr, gfp_t flags);
+void dmaengine_unmap_put(struct dmaengine_unmap_data *unmap);
+#else
+static inline void dma_set_unmap(struct dma_async_tx_descriptor *tx,
+				 struct dmaengine_unmap_data *unmap)
+{
+}
+static inline struct dmaengine_unmap_data *
+dmaengine_get_unmap_data(struct device *dev, int nr, gfp_t flags)
+{
+	return NULL;
+}
+static inline void dmaengine_unmap_put(struct dmaengine_unmap_data *unmap)
+{
+}
+#endif
+
+static inline void dma_descriptor_unmap(struct dma_async_tx_descriptor *tx)
+{
+	if (tx->unmap) {
+		dmaengine_unmap_put(tx->unmap);
+		tx->unmap = NULL;
+	}
+}
+
 #ifndef CONFIG_ASYNC_TX_ENABLE_CHANNEL_SWITCH
 static inline void txd_lock(struct dma_async_tx_descriptor *txd)
 {

Algúns arquivos non se mostraron porque demasiados arquivos cambiaron neste cambio