Browse Source

drm/nv50: Fix large 3D performance regression caused by the interchannel sync patches.

Reported-by: Christoph Bumiller <e0425955@student.tuwien.ac.at>
Signed-off-by: Francisco Jerez <currojerez@riseup.net>
Tested-by: Maarten Maathuis <madman2003@gmail.com>
Tested-by: Xavier Chantry <chantry.xavier@gmail.com>
Tested-by: Ben Skeggs <bskeggs@redhat.com>
Signed-off-by: Ben Skeggs <bskeggs@redhat.com>
Francisco Jerez 14 years ago
parent
commit
8af29ccd79
2 changed files with 40 additions and 7 deletions
  1. 1 0
      drivers/gpu/drm/nouveau/nouveau_drv.h
  2. 39 7
      drivers/gpu/drm/nouveau/nouveau_fence.c

+ 1 - 0
drivers/gpu/drm/nouveau/nouveau_drv.h

@@ -1473,6 +1473,7 @@ nv_match_device(struct drm_device *dev, unsigned device,
 #define NV_SW_SEMAPHORE_OFFSET                                       0x00000064
 #define NV_SW_SEMAPHORE_ACQUIRE                                      0x00000068
 #define NV_SW_SEMAPHORE_RELEASE                                      0x0000006c
+#define NV_SW_YIELD                                                  0x00000080
 #define NV_SW_DMA_VBLSEM                                             0x0000018c
 #define NV_SW_VBLSEM_OFFSET                                          0x00000400
 #define NV_SW_VBLSEM_RELEASE_VALUE                                   0x00000404

+ 39 - 7
drivers/gpu/drm/nouveau/nouveau_fence.c

@@ -308,21 +308,54 @@ emit_semaphore(struct nouveau_channel *chan, int method,
 {
 	struct drm_nouveau_private *dev_priv = sema->dev->dev_private;
 	struct nouveau_fence *fence;
+	bool smart = (dev_priv->card_type >= NV_50);
 	int ret;
 
-	ret = RING_SPACE(chan, dev_priv->card_type >= NV_50 ? 6 : 4);
+	ret = RING_SPACE(chan, smart ? 8 : 4);
 	if (ret)
 		return ret;
 
-	if (dev_priv->card_type >= NV_50) {
+	if (smart) {
 		BEGIN_RING(chan, NvSubSw, NV_SW_DMA_SEMAPHORE, 1);
 		OUT_RING(chan, NvSema);
 	}
 	BEGIN_RING(chan, NvSubSw, NV_SW_SEMAPHORE_OFFSET, 1);
 	OUT_RING(chan, sema->mem->start);
+
+	if (smart && method == NV_SW_SEMAPHORE_ACQUIRE) {
+		/*
+		 * NV50 tries to be too smart and context-switch
+		 * between semaphores instead of doing a "first come,
+		 * first served" strategy like previous cards
+		 * do.
+		 *
+		 * That's bad because the ACQUIRE latency can get as
+		 * large as the PFIFO context time slice in the
+		 * typical DRI2 case where you have several
+		 * outstanding semaphores at the same moment.
+		 *
+		 * If we're going to ACQUIRE, force the card to
+		 * context switch before, just in case the matching
+		 * RELEASE is already scheduled to be executed in
+		 * another channel.
+		 */
+		BEGIN_RING(chan, NvSubSw, NV_SW_YIELD, 1);
+		OUT_RING(chan, 0);
+	}
+
 	BEGIN_RING(chan, NvSubSw, method, 1);
 	OUT_RING(chan, 1);
 
+	if (smart && method == NV_SW_SEMAPHORE_RELEASE) {
+		/*
+		 * Force the card to context switch, there may be
+		 * another channel waiting for the semaphore we just
+		 * released.
+		 */
+		BEGIN_RING(chan, NvSubSw, NV_SW_YIELD, 1);
+		OUT_RING(chan, 0);
+	}
+
 	/* Delay semaphore destruction until its work is done */
 	ret = nouveau_fence_new(chan, &fence, true);
 	if (ret)
@@ -355,14 +388,13 @@ nouveau_fence_sync(struct nouveau_fence *fence,
 		return nouveau_fence_wait(fence, NULL, false, false);
 	}
 
-	/* Signal the semaphore from chan */
-	ret = emit_semaphore(chan, NV_SW_SEMAPHORE_RELEASE, sema);
-	if (ret)
-		goto out;
-
 	/* Make wchan wait until it gets signalled */
 	ret = emit_semaphore(wchan, NV_SW_SEMAPHORE_ACQUIRE, sema);
+	if (ret)
+		goto out;
 
+	/* Signal the semaphore from chan */
+	ret = emit_semaphore(chan, NV_SW_SEMAPHORE_RELEASE, sema);
 out:
 	kref_put(&sema->ref, free_semaphore);
 	return ret;