Procházet zdrojové kódy

Merge branch 'drm-radeon-next' of ../drm-radeon-next into drm-core-next

* 'drm-radeon-next' of ../drm-radeon-next:
  drm/radeon: introduce a sub allocator and convert ib pool to it v4
  drm/radeon/kms: add support for per-ring fence interrupts
  drm/radeon/kms: add cayman specific fence_ring_emit
  drm/radeon/kms: add some new ring params to better handle other ring types
  drm/radeon: improve radeon_test_syncing function
  drm/radeon: precompute fence cpu/gpu addr once v3
  drm/radeon: move ring debugfs into radeon_ring.c
  drm/radeon: rename struct radeon_cp to radeon_ring
  drm/radeon: disable compute rings on cayman for now
  drm/radeon: add radeon_fence_count_emited function
  drm/radeon: make some asic pointers per ring
  drm/radeon: Add radeon_test_syncing function v2
  drm/radeon: make cp variable an array
  drm/radeon: make ring rptr and wptr register offsets variable
  drm/radeon: make all functions work with multiple rings.
  drm/radeon/kms: add support for semaphores v3
  drm/radeon/kms: add support for multiple fence queues v2
  drm/radeon: fix a spelling mistake
  drm/radeon: no need to check all relocs for duplicates
  drm/radeon: fix debugfs handling v3
Dave Airlie před 13 roky
rodič
revize
4bc22a1aa0
35 změnil soubory, kde provedl 2629 přidání a 1338 odebrání
  1. 3 2
      drivers/gpu/drm/radeon/Makefile
  2. 148 86
      drivers/gpu/drm/radeon/evergreen.c
  3. 125 117
      drivers/gpu/drm/radeon/evergreen_blit_kms.c
  4. 147 90
      drivers/gpu/drm/radeon/ni.c
  5. 33 0
      drivers/gpu/drm/radeon/nid.h
  6. 122 105
      drivers/gpu/drm/radeon/r100.c
  7. 11 10
      drivers/gpu/drm/radeon/r200.c
  8. 90 64
      drivers/gpu/drm/radeon/r300.c
  9. 38 11
      drivers/gpu/drm/radeon/r420.c
  10. 23 2
      drivers/gpu/drm/radeon/r520.c
  11. 136 137
      drivers/gpu/drm/radeon/r600.c
  12. 115 106
      drivers/gpu/drm/radeon/r600_blit_kms.c
  13. 1 1
      drivers/gpu/drm/radeon/r600_cp.c
  14. 2 0
      drivers/gpu/drm/radeon/r600d.h
  15. 165 56
      drivers/gpu/drm/radeon/radeon.h
  16. 129 51
      drivers/gpu/drm/radeon/radeon_asic.c
  17. 22 14
      drivers/gpu/drm/radeon/radeon_asic.h
  18. 1 1
      drivers/gpu/drm/radeon/radeon_benchmark.c
  19. 4 3
      drivers/gpu/drm/radeon/radeon_cs.c
  20. 37 24
      drivers/gpu/drm/radeon/radeon_device.c
  21. 183 121
      drivers/gpu/drm/radeon/radeon_fence.c
  22. 4 2
      drivers/gpu/drm/radeon/radeon_gem.c
  23. 14 10
      drivers/gpu/drm/radeon/radeon_irq_kms.c
  24. 20 0
      drivers/gpu/drm/radeon/radeon_object.h
  25. 18 16
      drivers/gpu/drm/radeon/radeon_pm.c
  26. 240 222
      drivers/gpu/drm/radeon/radeon_ring.c
  27. 189 0
      drivers/gpu/drm/radeon/radeon_sa.c
  28. 161 0
      drivers/gpu/drm/radeon/radeon_semaphore.c
  29. 264 3
      drivers/gpu/drm/radeon/radeon_test.c
  30. 4 4
      drivers/gpu/drm/radeon/radeon_ttm.c
  31. 25 2
      drivers/gpu/drm/radeon/rs400.c
  32. 28 6
      drivers/gpu/drm/radeon/rs600.c
  33. 26 4
      drivers/gpu/drm/radeon/rs690.c
  34. 64 42
      drivers/gpu/drm/radeon/rv515.c
  35. 37 26
      drivers/gpu/drm/radeon/rv770.c

+ 3 - 2
drivers/gpu/drm/radeon/Makefile

@@ -70,7 +70,8 @@ radeon-y += radeon_device.o radeon_asic.o radeon_kms.o \
 	r200.o radeon_legacy_tv.o r600_cs.o r600_blit.o r600_blit_shaders.o \
 	r600_blit_kms.o radeon_pm.o atombios_dp.o r600_audio.o r600_hdmi.o \
 	evergreen.o evergreen_cs.o evergreen_blit_shaders.o evergreen_blit_kms.o \
-	radeon_trace_points.o ni.o cayman_blit_shaders.o atombios_encoders.o
+	radeon_trace_points.o ni.o cayman_blit_shaders.o atombios_encoders.o \
+	radeon_semaphore.o radeon_sa.o
 
 radeon-$(CONFIG_COMPAT) += radeon_ioc32.o
 radeon-$(CONFIG_VGA_SWITCHEROO) += radeon_atpx_handler.o
@@ -78,4 +79,4 @@ radeon-$(CONFIG_ACPI) += radeon_acpi.o
 
 obj-$(CONFIG_DRM_RADEON)+= radeon.o
 
-CFLAGS_radeon_trace_points.o := -I$(src)
+CFLAGS_radeon_trace_points.o := -I$(src)

+ 148 - 86
drivers/gpu/drm/radeon/evergreen.c

@@ -40,6 +40,8 @@
 static void evergreen_gpu_init(struct radeon_device *rdev);
 void evergreen_fini(struct radeon_device *rdev);
 void evergreen_pcie_gen2_enable(struct radeon_device *rdev);
+extern void cayman_cp_int_cntl_setup(struct radeon_device *rdev,
+				     int ring, u32 cp_int_cntl);
 
 void evergreen_fix_pci_max_read_req_size(struct radeon_device *rdev)
 {
@@ -1311,18 +1313,20 @@ void evergreen_mc_program(struct radeon_device *rdev)
  */
 void evergreen_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib)
 {
+	struct radeon_ring *ring = &rdev->ring[ib->fence->ring];
+
 	/* set to DX10/11 mode */
-	radeon_ring_write(rdev, PACKET3(PACKET3_MODE_CONTROL, 0));
-	radeon_ring_write(rdev, 1);
+	radeon_ring_write(ring, PACKET3(PACKET3_MODE_CONTROL, 0));
+	radeon_ring_write(ring, 1);
 	/* FIXME: implement */
-	radeon_ring_write(rdev, PACKET3(PACKET3_INDIRECT_BUFFER, 2));
-	radeon_ring_write(rdev,
+	radeon_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2));
+	radeon_ring_write(ring,
 #ifdef __BIG_ENDIAN
 			  (2 << 0) |
 #endif
 			  (ib->gpu_addr & 0xFFFFFFFC));
-	radeon_ring_write(rdev, upper_32_bits(ib->gpu_addr) & 0xFF);
-	radeon_ring_write(rdev, ib->length_dw);
+	radeon_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFF);
+	radeon_ring_write(ring, ib->length_dw);
 }
 
 
@@ -1360,71 +1364,73 @@ static int evergreen_cp_load_microcode(struct radeon_device *rdev)
 
 static int evergreen_cp_start(struct radeon_device *rdev)
 {
+	struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
 	int r, i;
 	uint32_t cp_me;
 
-	r = radeon_ring_lock(rdev, 7);
+	r = radeon_ring_lock(rdev, ring, 7);
 	if (r) {
 		DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r);
 		return r;
 	}
-	radeon_ring_write(rdev, PACKET3(PACKET3_ME_INITIALIZE, 5));
-	radeon_ring_write(rdev, 0x1);
-	radeon_ring_write(rdev, 0x0);
-	radeon_ring_write(rdev, rdev->config.evergreen.max_hw_contexts - 1);
-	radeon_ring_write(rdev, PACKET3_ME_INITIALIZE_DEVICE_ID(1));
-	radeon_ring_write(rdev, 0);
-	radeon_ring_write(rdev, 0);
-	radeon_ring_unlock_commit(rdev);
+	radeon_ring_write(ring, PACKET3(PACKET3_ME_INITIALIZE, 5));
+	radeon_ring_write(ring, 0x1);
+	radeon_ring_write(ring, 0x0);
+	radeon_ring_write(ring, rdev->config.evergreen.max_hw_contexts - 1);
+	radeon_ring_write(ring, PACKET3_ME_INITIALIZE_DEVICE_ID(1));
+	radeon_ring_write(ring, 0);
+	radeon_ring_write(ring, 0);
+	radeon_ring_unlock_commit(rdev, ring);
 
 	cp_me = 0xff;
 	WREG32(CP_ME_CNTL, cp_me);
 
-	r = radeon_ring_lock(rdev, evergreen_default_size + 19);
+	r = radeon_ring_lock(rdev, ring, evergreen_default_size + 19);
 	if (r) {
 		DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r);
 		return r;
 	}
 
 	/* setup clear context state */
-	radeon_ring_write(rdev, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
-	radeon_ring_write(rdev, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
+	radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
+	radeon_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
 
 	for (i = 0; i < evergreen_default_size; i++)
-		radeon_ring_write(rdev, evergreen_default_state[i]);
+		radeon_ring_write(ring, evergreen_default_state[i]);
 
-	radeon_ring_write(rdev, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
-	radeon_ring_write(rdev, PACKET3_PREAMBLE_END_CLEAR_STATE);
+	radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
+	radeon_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
 
 	/* set clear context state */
-	radeon_ring_write(rdev, PACKET3(PACKET3_CLEAR_STATE, 0));
-	radeon_ring_write(rdev, 0);
+	radeon_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
+	radeon_ring_write(ring, 0);
 
 	/* SQ_VTX_BASE_VTX_LOC */
-	radeon_ring_write(rdev, 0xc0026f00);
-	radeon_ring_write(rdev, 0x00000000);
-	radeon_ring_write(rdev, 0x00000000);
-	radeon_ring_write(rdev, 0x00000000);
+	radeon_ring_write(ring, 0xc0026f00);
+	radeon_ring_write(ring, 0x00000000);
+	radeon_ring_write(ring, 0x00000000);
+	radeon_ring_write(ring, 0x00000000);
 
 	/* Clear consts */
-	radeon_ring_write(rdev, 0xc0036f00);
-	radeon_ring_write(rdev, 0x00000bc4);
-	radeon_ring_write(rdev, 0xffffffff);
-	radeon_ring_write(rdev, 0xffffffff);
-	radeon_ring_write(rdev, 0xffffffff);
+	radeon_ring_write(ring, 0xc0036f00);
+	radeon_ring_write(ring, 0x00000bc4);
+	radeon_ring_write(ring, 0xffffffff);
+	radeon_ring_write(ring, 0xffffffff);
+	radeon_ring_write(ring, 0xffffffff);
 
-	radeon_ring_write(rdev, 0xc0026900);
-	radeon_ring_write(rdev, 0x00000316);
-	radeon_ring_write(rdev, 0x0000000e); /* VGT_VERTEX_REUSE_BLOCK_CNTL */
-	radeon_ring_write(rdev, 0x00000010); /*  */
+	radeon_ring_write(ring, 0xc0026900);
+	radeon_ring_write(ring, 0x00000316);
+	radeon_ring_write(ring, 0x0000000e); /* VGT_VERTEX_REUSE_BLOCK_CNTL */
+	radeon_ring_write(ring, 0x00000010); /*  */
 
-	radeon_ring_unlock_commit(rdev);
+	radeon_ring_unlock_commit(rdev, ring);
 
 	return 0;
 }
 
 int evergreen_cp_resume(struct radeon_device *rdev)
 {
+	struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
 	u32 tmp;
 	u32 rb_bufsz;
 	int r;
@@ -1442,13 +1448,13 @@ int evergreen_cp_resume(struct radeon_device *rdev)
 	RREG32(GRBM_SOFT_RESET);
 
 	/* Set ring buffer size */
-	rb_bufsz = drm_order(rdev->cp.ring_size / 8);
+	rb_bufsz = drm_order(ring->ring_size / 8);
 	tmp = (drm_order(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
 #ifdef __BIG_ENDIAN
 	tmp |= BUF_SWAP_32BIT;
 #endif
 	WREG32(CP_RB_CNTL, tmp);
-	WREG32(CP_SEM_WAIT_TIMER, 0x4);
+	WREG32(CP_SEM_WAIT_TIMER, 0x0);
 
 	/* Set the write pointer delay */
 	WREG32(CP_RB_WPTR_DELAY, 0);
@@ -1456,8 +1462,8 @@ int evergreen_cp_resume(struct radeon_device *rdev)
 	/* Initialize the ring buffer's read and write pointers */
 	WREG32(CP_RB_CNTL, tmp | RB_RPTR_WR_ENA);
 	WREG32(CP_RB_RPTR_WR, 0);
-	rdev->cp.wptr = 0;
-	WREG32(CP_RB_WPTR, rdev->cp.wptr);
+	ring->wptr = 0;
+	WREG32(CP_RB_WPTR, ring->wptr);
 
 	/* set the wb address wether it's enabled or not */
 	WREG32(CP_RB_RPTR_ADDR,
@@ -1475,16 +1481,16 @@ int evergreen_cp_resume(struct radeon_device *rdev)
 	mdelay(1);
 	WREG32(CP_RB_CNTL, tmp);
 
-	WREG32(CP_RB_BASE, rdev->cp.gpu_addr >> 8);
+	WREG32(CP_RB_BASE, ring->gpu_addr >> 8);
 	WREG32(CP_DEBUG, (1 << 27) | (1 << 28));
 
-	rdev->cp.rptr = RREG32(CP_RB_RPTR);
+	ring->rptr = RREG32(CP_RB_RPTR);
 
 	evergreen_cp_start(rdev);
-	rdev->cp.ready = true;
-	r = radeon_ring_test(rdev);
+	ring->ready = true;
+	r = radeon_ring_test(rdev, ring);
 	if (r) {
-		rdev->cp.ready = false;
+		ring->ready = false;
 		return r;
 	}
 	return 0;
@@ -2353,7 +2359,7 @@ int evergreen_mc_init(struct radeon_device *rdev)
 	return 0;
 }
 
-bool evergreen_gpu_is_lockup(struct radeon_device *rdev)
+bool evergreen_gpu_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
 {
 	u32 srbm_status;
 	u32 grbm_status;
@@ -2366,19 +2372,19 @@ bool evergreen_gpu_is_lockup(struct radeon_device *rdev)
 	grbm_status_se0 = RREG32(GRBM_STATUS_SE0);
 	grbm_status_se1 = RREG32(GRBM_STATUS_SE1);
 	if (!(grbm_status & GUI_ACTIVE)) {
-		r100_gpu_lockup_update(lockup, &rdev->cp);
+		r100_gpu_lockup_update(lockup, ring);
 		return false;
 	}
 	/* force CP activities */
-	r = radeon_ring_lock(rdev, 2);
+	r = radeon_ring_lock(rdev, ring, 2);
 	if (!r) {
 		/* PACKET2 NOP */
-		radeon_ring_write(rdev, 0x80000000);
-		radeon_ring_write(rdev, 0x80000000);
-		radeon_ring_unlock_commit(rdev);
+		radeon_ring_write(ring, 0x80000000);
+		radeon_ring_write(ring, 0x80000000);
+		radeon_ring_unlock_commit(rdev, ring);
 	}
-	rdev->cp.rptr = RREG32(CP_RB_RPTR);
-	return r100_gpu_cp_is_lockup(rdev, lockup, &rdev->cp);
+	ring->rptr = RREG32(CP_RB_RPTR);
+	return r100_gpu_cp_is_lockup(rdev, lockup, ring);
 }
 
 static int evergreen_gpu_soft_reset(struct radeon_device *rdev)
@@ -2470,7 +2476,13 @@ void evergreen_disable_interrupt_state(struct radeon_device *rdev)
 {
 	u32 tmp;
 
-	WREG32(CP_INT_CNTL, CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
+	if (rdev->family >= CHIP_CAYMAN) {
+		cayman_cp_int_cntl_setup(rdev, 0,
+					 CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
+		cayman_cp_int_cntl_setup(rdev, 1, 0);
+		cayman_cp_int_cntl_setup(rdev, 2, 0);
+	} else
+		WREG32(CP_INT_CNTL, CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
 	WREG32(GRBM_INT_CNTL, 0);
 	WREG32(INT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, 0);
 	WREG32(INT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, 0);
@@ -2515,6 +2527,7 @@ void evergreen_disable_interrupt_state(struct radeon_device *rdev)
 int evergreen_irq_set(struct radeon_device *rdev)
 {
 	u32 cp_int_cntl = CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE;
+	u32 cp_int_cntl1 = 0, cp_int_cntl2 = 0;
 	u32 crtc1 = 0, crtc2 = 0, crtc3 = 0, crtc4 = 0, crtc5 = 0, crtc6 = 0;
 	u32 hpd1, hpd2, hpd3, hpd4, hpd5, hpd6;
 	u32 grbm_int_cntl = 0;
@@ -2539,11 +2552,28 @@ int evergreen_irq_set(struct radeon_device *rdev)
 	hpd5 = RREG32(DC_HPD5_INT_CONTROL) & ~DC_HPDx_INT_EN;
 	hpd6 = RREG32(DC_HPD6_INT_CONTROL) & ~DC_HPDx_INT_EN;
 
-	if (rdev->irq.sw_int) {
-		DRM_DEBUG("evergreen_irq_set: sw int\n");
-		cp_int_cntl |= RB_INT_ENABLE;
-		cp_int_cntl |= TIME_STAMP_INT_ENABLE;
+	if (rdev->family >= CHIP_CAYMAN) {
+		/* enable CP interrupts on all rings */
+		if (rdev->irq.sw_int[RADEON_RING_TYPE_GFX_INDEX]) {
+			DRM_DEBUG("evergreen_irq_set: sw int gfx\n");
+			cp_int_cntl |= TIME_STAMP_INT_ENABLE;
+		}
+		if (rdev->irq.sw_int[CAYMAN_RING_TYPE_CP1_INDEX]) {
+			DRM_DEBUG("evergreen_irq_set: sw int cp1\n");
+			cp_int_cntl1 |= TIME_STAMP_INT_ENABLE;
+		}
+		if (rdev->irq.sw_int[CAYMAN_RING_TYPE_CP2_INDEX]) {
+			DRM_DEBUG("evergreen_irq_set: sw int cp2\n");
+			cp_int_cntl2 |= TIME_STAMP_INT_ENABLE;
+		}
+	} else {
+		if (rdev->irq.sw_int[RADEON_RING_TYPE_GFX_INDEX]) {
+			DRM_DEBUG("evergreen_irq_set: sw int gfx\n");
+			cp_int_cntl |= RB_INT_ENABLE;
+			cp_int_cntl |= TIME_STAMP_INT_ENABLE;
+		}
 	}
+
 	if (rdev->irq.crtc_vblank_int[0] ||
 	    rdev->irq.pflip[0]) {
 		DRM_DEBUG("evergreen_irq_set: vblank 0\n");
@@ -2603,7 +2633,12 @@ int evergreen_irq_set(struct radeon_device *rdev)
 		grbm_int_cntl |= GUI_IDLE_INT_ENABLE;
 	}
 
-	WREG32(CP_INT_CNTL, cp_int_cntl);
+	if (rdev->family >= CHIP_CAYMAN) {
+		cayman_cp_int_cntl_setup(rdev, 0, cp_int_cntl);
+		cayman_cp_int_cntl_setup(rdev, 1, cp_int_cntl1);
+		cayman_cp_int_cntl_setup(rdev, 2, cp_int_cntl2);
+	} else
+		WREG32(CP_INT_CNTL, cp_int_cntl);
 	WREG32(GRBM_INT_CNTL, grbm_int_cntl);
 
 	WREG32(INT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, crtc1);
@@ -3018,11 +3053,24 @@ restart_ih:
 		case 177: /* CP_INT in IB1 */
 		case 178: /* CP_INT in IB2 */
 			DRM_DEBUG("IH: CP int: 0x%08x\n", src_data);
-			radeon_fence_process(rdev);
+			radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
 			break;
 		case 181: /* CP EOP event */
 			DRM_DEBUG("IH: CP EOP\n");
-			radeon_fence_process(rdev);
+			if (rdev->family >= CHIP_CAYMAN) {
+				switch (src_data) {
+				case 0:
+					radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
+					break;
+				case 1:
+					radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
+					break;
+				case 2:
+					radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
+					break;
+				}
+			} else
+				radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
 			break;
 		case 233: /* GUI IDLE */
 			DRM_DEBUG("IH: GUI idle\n");
@@ -3052,6 +3100,7 @@ restart_ih:
 
 static int evergreen_startup(struct radeon_device *rdev)
 {
+	struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
 	int r;
 
 	/* enable pcie gen2 link */
@@ -3106,6 +3155,12 @@ static int evergreen_startup(struct radeon_device *rdev)
 	if (r)
 		return r;
 
+	r = radeon_fence_driver_start_ring(rdev, RADEON_RING_TYPE_GFX_INDEX);
+	if (r) {
+		dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
+		return r;
+	}
+
 	/* Enable IRQ */
 	r = r600_irq_init(rdev);
 	if (r) {
@@ -3115,7 +3170,9 @@ static int evergreen_startup(struct radeon_device *rdev)
 	}
 	evergreen_irq_set(rdev);
 
-	r = radeon_ring_init(rdev, rdev->cp.ring_size);
+	r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP_RPTR_OFFSET,
+			     R600_CP_RB_RPTR, R600_CP_RB_WPTR,
+			     0, 0xfffff, RADEON_CP_PACKET2);
 	if (r)
 		return r;
 	r = evergreen_cp_load_microcode(rdev);
@@ -3125,6 +3182,17 @@ static int evergreen_startup(struct radeon_device *rdev)
 	if (r)
 		return r;
 
+	r = radeon_ib_pool_start(rdev);
+	if (r)
+		return r;
+
+	r = r600_ib_test(rdev, RADEON_RING_TYPE_GFX_INDEX);
+	if (r) {
+		DRM_ERROR("radeon: failed testing IB (%d).\n", r);
+		rdev->accel_working = false;
+		return r;
+	}
+
 	return 0;
 }
 
@@ -3144,31 +3212,29 @@ int evergreen_resume(struct radeon_device *rdev)
 	/* post card */
 	atom_asic_init(rdev->mode_info.atom_context);
 
+	rdev->accel_working = true;
 	r = evergreen_startup(rdev);
 	if (r) {
 		DRM_ERROR("evergreen startup failed on resume\n");
 		return r;
 	}
 
-	r = r600_ib_test(rdev);
-	if (r) {
-		DRM_ERROR("radeon: failed testing IB (%d).\n", r);
-		return r;
-	}
-
 	return r;
 
 }
 
 int evergreen_suspend(struct radeon_device *rdev)
 {
+	struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
+
 	/* FIXME: we should wait for ring to be empty */
+	radeon_ib_pool_suspend(rdev);
+	r600_blit_suspend(rdev);
 	r700_cp_stop(rdev);
-	rdev->cp.ready = false;
+	ring->ready = false;
 	evergreen_irq_suspend(rdev);
 	radeon_wb_disable(rdev);
 	evergreen_pcie_gart_disable(rdev);
-	r600_blit_suspend(rdev);
 
 	return 0;
 }
@@ -3243,8 +3309,8 @@ int evergreen_init(struct radeon_device *rdev)
 	if (r)
 		return r;
 
-	rdev->cp.ring_obj = NULL;
-	r600_ring_init(rdev, 1024 * 1024);
+	rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ring_obj = NULL;
+	r600_ring_init(rdev, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX], 1024 * 1024);
 
 	rdev->ih.ring_obj = NULL;
 	r600_ih_ring_init(rdev, 64 * 1024);
@@ -3253,29 +3319,24 @@ int evergreen_init(struct radeon_device *rdev)
 	if (r)
 		return r;
 
+	r = radeon_ib_pool_init(rdev);
 	rdev->accel_working = true;
+	if (r) {
+		dev_err(rdev->dev, "IB initialization failed (%d).\n", r);
+		rdev->accel_working = false;
+	}
+
 	r = evergreen_startup(rdev);
 	if (r) {
 		dev_err(rdev->dev, "disabling GPU acceleration\n");
 		r700_cp_fini(rdev);
 		r600_irq_fini(rdev);
 		radeon_wb_fini(rdev);
+		r100_ib_fini(rdev);
 		radeon_irq_kms_fini(rdev);
 		evergreen_pcie_gart_fini(rdev);
 		rdev->accel_working = false;
 	}
-	if (rdev->accel_working) {
-		r = radeon_ib_pool_init(rdev);
-		if (r) {
-			DRM_ERROR("radeon: failed initializing IB pool (%d).\n", r);
-			rdev->accel_working = false;
-		}
-		r = r600_ib_test(rdev);
-		if (r) {
-			DRM_ERROR("radeon: failed testing IB (%d).\n", r);
-			rdev->accel_working = false;
-		}
-	}
 	return 0;
 }
 
@@ -3285,11 +3346,12 @@ void evergreen_fini(struct radeon_device *rdev)
 	r700_cp_fini(rdev);
 	r600_irq_fini(rdev);
 	radeon_wb_fini(rdev);
-	radeon_ib_pool_fini(rdev);
+	r100_ib_fini(rdev);
 	radeon_irq_kms_fini(rdev);
 	evergreen_pcie_gart_fini(rdev);
 	r600_vram_scratch_fini(rdev);
 	radeon_gem_fini(rdev);
+	radeon_semaphore_driver_fini(rdev);
 	radeon_fence_driver_fini(rdev);
 	radeon_agp_fini(rdev);
 	radeon_bo_fini(rdev);

+ 125 - 117
drivers/gpu/drm/radeon/evergreen_blit_kms.c

@@ -49,6 +49,7 @@ static void
 set_render_target(struct radeon_device *rdev, int format,
 		  int w, int h, u64 gpu_addr)
 {
+	struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
 	u32 cb_color_info;
 	int pitch, slice;
 
@@ -62,23 +63,23 @@ set_render_target(struct radeon_device *rdev, int format,
 	pitch = (w / 8) - 1;
 	slice = ((w * h) / 64) - 1;
 
-	radeon_ring_write(rdev, PACKET3(PACKET3_SET_CONTEXT_REG, 15));
-	radeon_ring_write(rdev, (CB_COLOR0_BASE - PACKET3_SET_CONTEXT_REG_START) >> 2);
-	radeon_ring_write(rdev, gpu_addr >> 8);
-	radeon_ring_write(rdev, pitch);
-	radeon_ring_write(rdev, slice);
-	radeon_ring_write(rdev, 0);
-	radeon_ring_write(rdev, cb_color_info);
-	radeon_ring_write(rdev, 0);
-	radeon_ring_write(rdev, (w - 1) | ((h - 1) << 16));
-	radeon_ring_write(rdev, 0);
-	radeon_ring_write(rdev, 0);
-	radeon_ring_write(rdev, 0);
-	radeon_ring_write(rdev, 0);
-	radeon_ring_write(rdev, 0);
-	radeon_ring_write(rdev, 0);
-	radeon_ring_write(rdev, 0);
-	radeon_ring_write(rdev, 0);
+	radeon_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 15));
+	radeon_ring_write(ring, (CB_COLOR0_BASE - PACKET3_SET_CONTEXT_REG_START) >> 2);
+	radeon_ring_write(ring, gpu_addr >> 8);
+	radeon_ring_write(ring, pitch);
+	radeon_ring_write(ring, slice);
+	radeon_ring_write(ring, 0);
+	radeon_ring_write(ring, cb_color_info);
+	radeon_ring_write(ring, 0);
+	radeon_ring_write(ring, (w - 1) | ((h - 1) << 16));
+	radeon_ring_write(ring, 0);
+	radeon_ring_write(ring, 0);
+	radeon_ring_write(ring, 0);
+	radeon_ring_write(ring, 0);
+	radeon_ring_write(ring, 0);
+	radeon_ring_write(ring, 0);
+	radeon_ring_write(ring, 0);
+	radeon_ring_write(ring, 0);
 }
 
 /* emits 5dw */
@@ -87,6 +88,7 @@ cp_set_surface_sync(struct radeon_device *rdev,
 		    u32 sync_type, u32 size,
 		    u64 mc_addr)
 {
+	struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
 	u32 cp_coher_size;
 
 	if (size == 0xffffffff)
@@ -99,39 +101,40 @@ cp_set_surface_sync(struct radeon_device *rdev,
 		 * to the RB directly. For IBs, the CP programs this as part of the
 		 * surface_sync packet.
 		 */
-		radeon_ring_write(rdev, PACKET3(PACKET3_SET_CONFIG_REG, 1));
-		radeon_ring_write(rdev, (0x85e8 - PACKET3_SET_CONFIG_REG_START) >> 2);
-		radeon_ring_write(rdev, 0); /* CP_COHER_CNTL2 */
+		radeon_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
+		radeon_ring_write(ring, (0x85e8 - PACKET3_SET_CONFIG_REG_START) >> 2);
+		radeon_ring_write(ring, 0); /* CP_COHER_CNTL2 */
 	}
-	radeon_ring_write(rdev, PACKET3(PACKET3_SURFACE_SYNC, 3));
-	radeon_ring_write(rdev, sync_type);
-	radeon_ring_write(rdev, cp_coher_size);
-	radeon_ring_write(rdev, mc_addr >> 8);
-	radeon_ring_write(rdev, 10); /* poll interval */
+	radeon_ring_write(ring, PACKET3(PACKET3_SURFACE_SYNC, 3));
+	radeon_ring_write(ring, sync_type);
+	radeon_ring_write(ring, cp_coher_size);
+	radeon_ring_write(ring, mc_addr >> 8);
+	radeon_ring_write(ring, 10); /* poll interval */
 }
 
 /* emits 11dw + 1 surface sync = 16dw */
 static void
 set_shaders(struct radeon_device *rdev)
 {
+	struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
 	u64 gpu_addr;
 
 	/* VS */
 	gpu_addr = rdev->r600_blit.shader_gpu_addr + rdev->r600_blit.vs_offset;
-	radeon_ring_write(rdev, PACKET3(PACKET3_SET_CONTEXT_REG, 3));
-	radeon_ring_write(rdev, (SQ_PGM_START_VS - PACKET3_SET_CONTEXT_REG_START) >> 2);
-	radeon_ring_write(rdev, gpu_addr >> 8);
-	radeon_ring_write(rdev, 2);
-	radeon_ring_write(rdev, 0);
+	radeon_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 3));
+	radeon_ring_write(ring, (SQ_PGM_START_VS - PACKET3_SET_CONTEXT_REG_START) >> 2);
+	radeon_ring_write(ring, gpu_addr >> 8);
+	radeon_ring_write(ring, 2);
+	radeon_ring_write(ring, 0);
 
 	/* PS */
 	gpu_addr = rdev->r600_blit.shader_gpu_addr + rdev->r600_blit.ps_offset;
-	radeon_ring_write(rdev, PACKET3(PACKET3_SET_CONTEXT_REG, 4));
-	radeon_ring_write(rdev, (SQ_PGM_START_PS - PACKET3_SET_CONTEXT_REG_START) >> 2);
-	radeon_ring_write(rdev, gpu_addr >> 8);
-	radeon_ring_write(rdev, 1);
-	radeon_ring_write(rdev, 0);
-	radeon_ring_write(rdev, 2);
+	radeon_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 4));
+	radeon_ring_write(ring, (SQ_PGM_START_PS - PACKET3_SET_CONTEXT_REG_START) >> 2);
+	radeon_ring_write(ring, gpu_addr >> 8);
+	radeon_ring_write(ring, 1);
+	radeon_ring_write(ring, 0);
+	radeon_ring_write(ring, 2);
 
 	gpu_addr = rdev->r600_blit.shader_gpu_addr + rdev->r600_blit.vs_offset;
 	cp_set_surface_sync(rdev, PACKET3_SH_ACTION_ENA, 512, gpu_addr);
@@ -141,6 +144,7 @@ set_shaders(struct radeon_device *rdev)
 static void
 set_vtx_resource(struct radeon_device *rdev, u64 gpu_addr)
 {
+	struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
 	u32 sq_vtx_constant_word2, sq_vtx_constant_word3;
 
 	/* high addr, stride */
@@ -155,16 +159,16 @@ set_vtx_resource(struct radeon_device *rdev, u64 gpu_addr)
 		SQ_VTCX_SEL_Z(SQ_SEL_Z) |
 		SQ_VTCX_SEL_W(SQ_SEL_W);
 
-	radeon_ring_write(rdev, PACKET3(PACKET3_SET_RESOURCE, 8));
-	radeon_ring_write(rdev, 0x580);
-	radeon_ring_write(rdev, gpu_addr & 0xffffffff);
-	radeon_ring_write(rdev, 48 - 1); /* size */
-	radeon_ring_write(rdev, sq_vtx_constant_word2);
-	radeon_ring_write(rdev, sq_vtx_constant_word3);
-	radeon_ring_write(rdev, 0);
-	radeon_ring_write(rdev, 0);
-	radeon_ring_write(rdev, 0);
-	radeon_ring_write(rdev, S__SQ_CONSTANT_TYPE(SQ_TEX_VTX_VALID_BUFFER));
+	radeon_ring_write(ring, PACKET3(PACKET3_SET_RESOURCE, 8));
+	radeon_ring_write(ring, 0x580);
+	radeon_ring_write(ring, gpu_addr & 0xffffffff);
+	radeon_ring_write(ring, 48 - 1); /* size */
+	radeon_ring_write(ring, sq_vtx_constant_word2);
+	radeon_ring_write(ring, sq_vtx_constant_word3);
+	radeon_ring_write(ring, 0);
+	radeon_ring_write(ring, 0);
+	radeon_ring_write(ring, 0);
+	radeon_ring_write(ring, S__SQ_CONSTANT_TYPE(SQ_TEX_VTX_VALID_BUFFER));
 
 	if ((rdev->family == CHIP_CEDAR) ||
 	    (rdev->family == CHIP_PALM) ||
@@ -185,6 +189,7 @@ set_tex_resource(struct radeon_device *rdev,
 		 int format, int w, int h, int pitch,
 		 u64 gpu_addr, u32 size)
 {
+	struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
 	u32 sq_tex_resource_word0, sq_tex_resource_word1;
 	u32 sq_tex_resource_word4, sq_tex_resource_word7;
 
@@ -208,16 +213,16 @@ set_tex_resource(struct radeon_device *rdev,
 	cp_set_surface_sync(rdev,
 			    PACKET3_TC_ACTION_ENA, size, gpu_addr);
 
-	radeon_ring_write(rdev, PACKET3(PACKET3_SET_RESOURCE, 8));
-	radeon_ring_write(rdev, 0);
-	radeon_ring_write(rdev, sq_tex_resource_word0);
-	radeon_ring_write(rdev, sq_tex_resource_word1);
-	radeon_ring_write(rdev, gpu_addr >> 8);
-	radeon_ring_write(rdev, gpu_addr >> 8);
-	radeon_ring_write(rdev, sq_tex_resource_word4);
-	radeon_ring_write(rdev, 0);
-	radeon_ring_write(rdev, 0);
-	radeon_ring_write(rdev, sq_tex_resource_word7);
+	radeon_ring_write(ring, PACKET3(PACKET3_SET_RESOURCE, 8));
+	radeon_ring_write(ring, 0);
+	radeon_ring_write(ring, sq_tex_resource_word0);
+	radeon_ring_write(ring, sq_tex_resource_word1);
+	radeon_ring_write(ring, gpu_addr >> 8);
+	radeon_ring_write(ring, gpu_addr >> 8);
+	radeon_ring_write(ring, sq_tex_resource_word4);
+	radeon_ring_write(ring, 0);
+	radeon_ring_write(ring, 0);
+	radeon_ring_write(ring, sq_tex_resource_word7);
 }
 
 /* emits 12 */
@@ -225,6 +230,7 @@ static void
 set_scissors(struct radeon_device *rdev, int x1, int y1,
 	     int x2, int y2)
 {
+	struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
 	/* workaround some hw bugs */
 	if (x2 == 0)
 		x1 = 1;
@@ -235,43 +241,44 @@ set_scissors(struct radeon_device *rdev, int x1, int y1,
 			x2 = 2;
 	}
 
-	radeon_ring_write(rdev, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
-	radeon_ring_write(rdev, (PA_SC_SCREEN_SCISSOR_TL - PACKET3_SET_CONTEXT_REG_START) >> 2);
-	radeon_ring_write(rdev, (x1 << 0) | (y1 << 16));
-	radeon_ring_write(rdev, (x2 << 0) | (y2 << 16));
+	radeon_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
+	radeon_ring_write(ring, (PA_SC_SCREEN_SCISSOR_TL - PACKET3_SET_CONTEXT_REG_START) >> 2);
+	radeon_ring_write(ring, (x1 << 0) | (y1 << 16));
+	radeon_ring_write(ring, (x2 << 0) | (y2 << 16));
 
-	radeon_ring_write(rdev, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
-	radeon_ring_write(rdev, (PA_SC_GENERIC_SCISSOR_TL - PACKET3_SET_CONTEXT_REG_START) >> 2);
-	radeon_ring_write(rdev, (x1 << 0) | (y1 << 16) | (1 << 31));
-	radeon_ring_write(rdev, (x2 << 0) | (y2 << 16));
+	radeon_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
+	radeon_ring_write(ring, (PA_SC_GENERIC_SCISSOR_TL - PACKET3_SET_CONTEXT_REG_START) >> 2);
+	radeon_ring_write(ring, (x1 << 0) | (y1 << 16) | (1 << 31));
+	radeon_ring_write(ring, (x2 << 0) | (y2 << 16));
 
-	radeon_ring_write(rdev, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
-	radeon_ring_write(rdev, (PA_SC_WINDOW_SCISSOR_TL - PACKET3_SET_CONTEXT_REG_START) >> 2);
-	radeon_ring_write(rdev, (x1 << 0) | (y1 << 16) | (1 << 31));
-	radeon_ring_write(rdev, (x2 << 0) | (y2 << 16));
+	radeon_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
+	radeon_ring_write(ring, (PA_SC_WINDOW_SCISSOR_TL - PACKET3_SET_CONTEXT_REG_START) >> 2);
+	radeon_ring_write(ring, (x1 << 0) | (y1 << 16) | (1 << 31));
+	radeon_ring_write(ring, (x2 << 0) | (y2 << 16));
 }
 
 /* emits 10 */
 static void
 draw_auto(struct radeon_device *rdev)
 {
-	radeon_ring_write(rdev, PACKET3(PACKET3_SET_CONFIG_REG, 1));
-	radeon_ring_write(rdev, (VGT_PRIMITIVE_TYPE - PACKET3_SET_CONFIG_REG_START) >> 2);
-	radeon_ring_write(rdev, DI_PT_RECTLIST);
+	struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
+	radeon_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
+	radeon_ring_write(ring, (VGT_PRIMITIVE_TYPE - PACKET3_SET_CONFIG_REG_START) >> 2);
+	radeon_ring_write(ring, DI_PT_RECTLIST);
 
-	radeon_ring_write(rdev, PACKET3(PACKET3_INDEX_TYPE, 0));
-	radeon_ring_write(rdev,
+	radeon_ring_write(ring, PACKET3(PACKET3_INDEX_TYPE, 0));
+	radeon_ring_write(ring,
 #ifdef __BIG_ENDIAN
 			  (2 << 2) |
 #endif
 			  DI_INDEX_SIZE_16_BIT);
 
-	radeon_ring_write(rdev, PACKET3(PACKET3_NUM_INSTANCES, 0));
-	radeon_ring_write(rdev, 1);
+	radeon_ring_write(ring, PACKET3(PACKET3_NUM_INSTANCES, 0));
+	radeon_ring_write(ring, 1);
 
-	radeon_ring_write(rdev, PACKET3(PACKET3_DRAW_INDEX_AUTO, 1));
-	radeon_ring_write(rdev, 3);
-	radeon_ring_write(rdev, DI_SRC_SEL_AUTO_INDEX);
+	radeon_ring_write(ring, PACKET3(PACKET3_DRAW_INDEX_AUTO, 1));
+	radeon_ring_write(ring, 3);
+	radeon_ring_write(ring, DI_SRC_SEL_AUTO_INDEX);
 
 }
 
@@ -279,6 +286,7 @@ draw_auto(struct radeon_device *rdev)
 static void
 set_default_state(struct radeon_device *rdev)
 {
+	struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
 	u32 sq_config, sq_gpr_resource_mgmt_1, sq_gpr_resource_mgmt_2, sq_gpr_resource_mgmt_3;
 	u32 sq_thread_resource_mgmt, sq_thread_resource_mgmt_2;
 	u32 sq_stack_resource_mgmt_1, sq_stack_resource_mgmt_2, sq_stack_resource_mgmt_3;
@@ -292,8 +300,8 @@ set_default_state(struct radeon_device *rdev)
 	int dwords;
 
 	/* set clear context state */
-	radeon_ring_write(rdev, PACKET3(PACKET3_CLEAR_STATE, 0));
-	radeon_ring_write(rdev, 0);
+	radeon_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
+	radeon_ring_write(ring, 0);
 
 	if (rdev->family < CHIP_CAYMAN) {
 		switch (rdev->family) {
@@ -550,60 +558,60 @@ set_default_state(struct radeon_device *rdev)
 					    NUM_LS_STACK_ENTRIES(num_ls_stack_entries));
 
 		/* disable dyn gprs */
-		radeon_ring_write(rdev, PACKET3(PACKET3_SET_CONFIG_REG, 1));
-		radeon_ring_write(rdev, (SQ_DYN_GPR_CNTL_PS_FLUSH_REQ - PACKET3_SET_CONFIG_REG_START) >> 2);
-		radeon_ring_write(rdev, 0);
+		radeon_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
+		radeon_ring_write(ring, (SQ_DYN_GPR_CNTL_PS_FLUSH_REQ - PACKET3_SET_CONFIG_REG_START) >> 2);
+		radeon_ring_write(ring, 0);
 
 		/* setup LDS */
-		radeon_ring_write(rdev, PACKET3(PACKET3_SET_CONFIG_REG, 1));
-		radeon_ring_write(rdev, (SQ_LDS_RESOURCE_MGMT - PACKET3_SET_CONFIG_REG_START) >> 2);
-		radeon_ring_write(rdev, 0x10001000);
+		radeon_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
+		radeon_ring_write(ring, (SQ_LDS_RESOURCE_MGMT - PACKET3_SET_CONFIG_REG_START) >> 2);
+		radeon_ring_write(ring, 0x10001000);
 
 		/* SQ config */
-		radeon_ring_write(rdev, PACKET3(PACKET3_SET_CONFIG_REG, 11));
-		radeon_ring_write(rdev, (SQ_CONFIG - PACKET3_SET_CONFIG_REG_START) >> 2);
-		radeon_ring_write(rdev, sq_config);
-		radeon_ring_write(rdev, sq_gpr_resource_mgmt_1);
-		radeon_ring_write(rdev, sq_gpr_resource_mgmt_2);
-		radeon_ring_write(rdev, sq_gpr_resource_mgmt_3);
-		radeon_ring_write(rdev, 0);
-		radeon_ring_write(rdev, 0);
-		radeon_ring_write(rdev, sq_thread_resource_mgmt);
-		radeon_ring_write(rdev, sq_thread_resource_mgmt_2);
-		radeon_ring_write(rdev, sq_stack_resource_mgmt_1);
-		radeon_ring_write(rdev, sq_stack_resource_mgmt_2);
-		radeon_ring_write(rdev, sq_stack_resource_mgmt_3);
+		radeon_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 11));
+		radeon_ring_write(ring, (SQ_CONFIG - PACKET3_SET_CONFIG_REG_START) >> 2);
+		radeon_ring_write(ring, sq_config);
+		radeon_ring_write(ring, sq_gpr_resource_mgmt_1);
+		radeon_ring_write(ring, sq_gpr_resource_mgmt_2);
+		radeon_ring_write(ring, sq_gpr_resource_mgmt_3);
+		radeon_ring_write(ring, 0);
+		radeon_ring_write(ring, 0);
+		radeon_ring_write(ring, sq_thread_resource_mgmt);
+		radeon_ring_write(ring, sq_thread_resource_mgmt_2);
+		radeon_ring_write(ring, sq_stack_resource_mgmt_1);
+		radeon_ring_write(ring, sq_stack_resource_mgmt_2);
+		radeon_ring_write(ring, sq_stack_resource_mgmt_3);
 	}
 
 	/* CONTEXT_CONTROL */
-	radeon_ring_write(rdev, 0xc0012800);
-	radeon_ring_write(rdev, 0x80000000);
-	radeon_ring_write(rdev, 0x80000000);
+	radeon_ring_write(ring, 0xc0012800);
+	radeon_ring_write(ring, 0x80000000);
+	radeon_ring_write(ring, 0x80000000);
 
 	/* SQ_VTX_BASE_VTX_LOC */
-	radeon_ring_write(rdev, 0xc0026f00);
-	radeon_ring_write(rdev, 0x00000000);
-	radeon_ring_write(rdev, 0x00000000);
-	radeon_ring_write(rdev, 0x00000000);
+	radeon_ring_write(ring, 0xc0026f00);
+	radeon_ring_write(ring, 0x00000000);
+	radeon_ring_write(ring, 0x00000000);
+	radeon_ring_write(ring, 0x00000000);
 
 	/* SET_SAMPLER */
-	radeon_ring_write(rdev, 0xc0036e00);
-	radeon_ring_write(rdev, 0x00000000);
-	radeon_ring_write(rdev, 0x00000012);
-	radeon_ring_write(rdev, 0x00000000);
-	radeon_ring_write(rdev, 0x00000000);
+	radeon_ring_write(ring, 0xc0036e00);
+	radeon_ring_write(ring, 0x00000000);
+	radeon_ring_write(ring, 0x00000012);
+	radeon_ring_write(ring, 0x00000000);
+	radeon_ring_write(ring, 0x00000000);
 
 	/* set to DX10/11 mode */
-	radeon_ring_write(rdev, PACKET3(PACKET3_MODE_CONTROL, 0));
-	radeon_ring_write(rdev, 1);
+	radeon_ring_write(ring, PACKET3(PACKET3_MODE_CONTROL, 0));
+	radeon_ring_write(ring, 1);
 
 	/* emit an IB pointing at default state */
 	dwords = ALIGN(rdev->r600_blit.state_len, 0x10);
 	gpu_addr = rdev->r600_blit.shader_gpu_addr + rdev->r600_blit.state_offset;
-	radeon_ring_write(rdev, PACKET3(PACKET3_INDIRECT_BUFFER, 2));
-	radeon_ring_write(rdev, gpu_addr & 0xFFFFFFFC);
-	radeon_ring_write(rdev, upper_32_bits(gpu_addr) & 0xFF);
-	radeon_ring_write(rdev, dwords);
+	radeon_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2));
+	radeon_ring_write(ring, gpu_addr & 0xFFFFFFFC);
+	radeon_ring_write(ring, upper_32_bits(gpu_addr) & 0xFF);
+	radeon_ring_write(ring, dwords);
 
 }
 

+ 147 - 90
drivers/gpu/drm/radeon/ni.c

@@ -1006,9 +1006,39 @@ void cayman_pcie_gart_fini(struct radeon_device *rdev)
 	radeon_gart_fini(rdev);
 }
 
+void cayman_cp_int_cntl_setup(struct radeon_device *rdev,
+			      int ring, u32 cp_int_cntl)
+{
+	u32 srbm_gfx_cntl = RREG32(SRBM_GFX_CNTL) & ~3;
+
+	WREG32(SRBM_GFX_CNTL, srbm_gfx_cntl | (ring & 3));
+	WREG32(CP_INT_CNTL, cp_int_cntl);
+}
+
 /*
  * CP.
  */
+void cayman_fence_ring_emit(struct radeon_device *rdev,
+			    struct radeon_fence *fence)
+{
+	struct radeon_ring *ring = &rdev->ring[fence->ring];
+	u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
+
+	/* flush read cache over gart */
+	radeon_ring_write(ring, PACKET3(PACKET3_SURFACE_SYNC, 3));
+	radeon_ring_write(ring, PACKET3_TC_ACTION_ENA | PACKET3_SH_ACTION_ENA);
+	radeon_ring_write(ring, 0xFFFFFFFF);
+	radeon_ring_write(ring, 0);
+	radeon_ring_write(ring, 10); /* poll interval */
+	/* EVENT_WRITE_EOP - flush caches, send int */
+	radeon_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
+	radeon_ring_write(ring, EVENT_TYPE(CACHE_FLUSH_AND_INV_EVENT_TS) | EVENT_INDEX(5));
+	radeon_ring_write(ring, addr & 0xffffffff);
+	radeon_ring_write(ring, (upper_32_bits(addr) & 0xff) | DATA_SEL(1) | INT_SEL(2));
+	radeon_ring_write(ring, fence->seq);
+	radeon_ring_write(ring, 0);
+}
+
 static void cayman_cp_enable(struct radeon_device *rdev, bool enable)
 {
 	if (enable)
@@ -1049,63 +1079,64 @@ static int cayman_cp_load_microcode(struct radeon_device *rdev)
 
 static int cayman_cp_start(struct radeon_device *rdev)
 {
+	struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
 	int r, i;
 
-	r = radeon_ring_lock(rdev, 7);
+	r = radeon_ring_lock(rdev, ring, 7);
 	if (r) {
 		DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r);
 		return r;
 	}
-	radeon_ring_write(rdev, PACKET3(PACKET3_ME_INITIALIZE, 5));
-	radeon_ring_write(rdev, 0x1);
-	radeon_ring_write(rdev, 0x0);
-	radeon_ring_write(rdev, rdev->config.cayman.max_hw_contexts - 1);
-	radeon_ring_write(rdev, PACKET3_ME_INITIALIZE_DEVICE_ID(1));
-	radeon_ring_write(rdev, 0);
-	radeon_ring_write(rdev, 0);
-	radeon_ring_unlock_commit(rdev);
+	radeon_ring_write(ring, PACKET3(PACKET3_ME_INITIALIZE, 5));
+	radeon_ring_write(ring, 0x1);
+	radeon_ring_write(ring, 0x0);
+	radeon_ring_write(ring, rdev->config.cayman.max_hw_contexts - 1);
+	radeon_ring_write(ring, PACKET3_ME_INITIALIZE_DEVICE_ID(1));
+	radeon_ring_write(ring, 0);
+	radeon_ring_write(ring, 0);
+	radeon_ring_unlock_commit(rdev, ring);
 
 	cayman_cp_enable(rdev, true);
 
-	r = radeon_ring_lock(rdev, cayman_default_size + 19);
+	r = radeon_ring_lock(rdev, ring, cayman_default_size + 19);
 	if (r) {
 		DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r);
 		return r;
 	}
 
 	/* setup clear context state */
-	radeon_ring_write(rdev, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
-	radeon_ring_write(rdev, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
+	radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
+	radeon_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
 
 	for (i = 0; i < cayman_default_size; i++)
-		radeon_ring_write(rdev, cayman_default_state[i]);
+		radeon_ring_write(ring, cayman_default_state[i]);
 
-	radeon_ring_write(rdev, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
-	radeon_ring_write(rdev, PACKET3_PREAMBLE_END_CLEAR_STATE);
+	radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
+	radeon_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
 
 	/* set clear context state */
-	radeon_ring_write(rdev, PACKET3(PACKET3_CLEAR_STATE, 0));
-	radeon_ring_write(rdev, 0);
+	radeon_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
+	radeon_ring_write(ring, 0);
 
 	/* SQ_VTX_BASE_VTX_LOC */
-	radeon_ring_write(rdev, 0xc0026f00);
-	radeon_ring_write(rdev, 0x00000000);
-	radeon_ring_write(rdev, 0x00000000);
-	radeon_ring_write(rdev, 0x00000000);
+	radeon_ring_write(ring, 0xc0026f00);
+	radeon_ring_write(ring, 0x00000000);
+	radeon_ring_write(ring, 0x00000000);
+	radeon_ring_write(ring, 0x00000000);
 
 	/* Clear consts */
-	radeon_ring_write(rdev, 0xc0036f00);
-	radeon_ring_write(rdev, 0x00000bc4);
-	radeon_ring_write(rdev, 0xffffffff);
-	radeon_ring_write(rdev, 0xffffffff);
-	radeon_ring_write(rdev, 0xffffffff);
+	radeon_ring_write(ring, 0xc0036f00);
+	radeon_ring_write(ring, 0x00000bc4);
+	radeon_ring_write(ring, 0xffffffff);
+	radeon_ring_write(ring, 0xffffffff);
+	radeon_ring_write(ring, 0xffffffff);
 
-	radeon_ring_write(rdev, 0xc0026900);
-	radeon_ring_write(rdev, 0x00000316);
-	radeon_ring_write(rdev, 0x0000000e); /* VGT_VERTEX_REUSE_BLOCK_CNTL */
-	radeon_ring_write(rdev, 0x00000010); /*  */
+	radeon_ring_write(ring, 0xc0026900);
+	radeon_ring_write(ring, 0x00000316);
+	radeon_ring_write(ring, 0x0000000e); /* VGT_VERTEX_REUSE_BLOCK_CNTL */
+	radeon_ring_write(ring, 0x00000010); /*  */
 
-	radeon_ring_unlock_commit(rdev);
+	radeon_ring_unlock_commit(rdev, ring);
 
 	/* XXX init other rings */
 
@@ -1115,11 +1146,12 @@ static int cayman_cp_start(struct radeon_device *rdev)
 static void cayman_cp_fini(struct radeon_device *rdev)
 {
 	cayman_cp_enable(rdev, false);
-	radeon_ring_fini(rdev);
+	radeon_ring_fini(rdev, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
 }
 
 int cayman_cp_resume(struct radeon_device *rdev)
 {
+	struct radeon_ring *ring;
 	u32 tmp;
 	u32 rb_bufsz;
 	int r;
@@ -1136,7 +1168,7 @@ int cayman_cp_resume(struct radeon_device *rdev)
 	WREG32(GRBM_SOFT_RESET, 0);
 	RREG32(GRBM_SOFT_RESET);
 
-	WREG32(CP_SEM_WAIT_TIMER, 0x4);
+	WREG32(CP_SEM_WAIT_TIMER, 0x0);
 
 	/* Set the write pointer delay */
 	WREG32(CP_RB_WPTR_DELAY, 0);
@@ -1145,7 +1177,8 @@ int cayman_cp_resume(struct radeon_device *rdev)
 
 	/* ring 0 - compute and gfx */
 	/* Set ring buffer size */
-	rb_bufsz = drm_order(rdev->cp.ring_size / 8);
+	ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
+	rb_bufsz = drm_order(ring->ring_size / 8);
 	tmp = (drm_order(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
 #ifdef __BIG_ENDIAN
 	tmp |= BUF_SWAP_32BIT;
@@ -1154,8 +1187,8 @@ int cayman_cp_resume(struct radeon_device *rdev)
 
 	/* Initialize the ring buffer's read and write pointers */
 	WREG32(CP_RB0_CNTL, tmp | RB_RPTR_WR_ENA);
-	rdev->cp.wptr = 0;
-	WREG32(CP_RB0_WPTR, rdev->cp.wptr);
+	ring->wptr = 0;
+	WREG32(CP_RB0_WPTR, ring->wptr);
 
 	/* set the wb address wether it's enabled or not */
 	WREG32(CP_RB0_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFFFFFFFC);
@@ -1172,13 +1205,14 @@ int cayman_cp_resume(struct radeon_device *rdev)
 	mdelay(1);
 	WREG32(CP_RB0_CNTL, tmp);
 
-	WREG32(CP_RB0_BASE, rdev->cp.gpu_addr >> 8);
+	WREG32(CP_RB0_BASE, ring->gpu_addr >> 8);
 
-	rdev->cp.rptr = RREG32(CP_RB0_RPTR);
+	ring->rptr = RREG32(CP_RB0_RPTR);
 
 	/* ring1  - compute only */
 	/* Set ring buffer size */
-	rb_bufsz = drm_order(rdev->cp1.ring_size / 8);
+	ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
+	rb_bufsz = drm_order(ring->ring_size / 8);
 	tmp = (drm_order(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
 #ifdef __BIG_ENDIAN
 	tmp |= BUF_SWAP_32BIT;
@@ -1187,8 +1221,8 @@ int cayman_cp_resume(struct radeon_device *rdev)
 
 	/* Initialize the ring buffer's read and write pointers */
 	WREG32(CP_RB1_CNTL, tmp | RB_RPTR_WR_ENA);
-	rdev->cp1.wptr = 0;
-	WREG32(CP_RB1_WPTR, rdev->cp1.wptr);
+	ring->wptr = 0;
+	WREG32(CP_RB1_WPTR, ring->wptr);
 
 	/* set the wb address wether it's enabled or not */
 	WREG32(CP_RB1_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP1_RPTR_OFFSET) & 0xFFFFFFFC);
@@ -1197,13 +1231,14 @@ int cayman_cp_resume(struct radeon_device *rdev)
 	mdelay(1);
 	WREG32(CP_RB1_CNTL, tmp);
 
-	WREG32(CP_RB1_BASE, rdev->cp1.gpu_addr >> 8);
+	WREG32(CP_RB1_BASE, ring->gpu_addr >> 8);
 
-	rdev->cp1.rptr = RREG32(CP_RB1_RPTR);
+	ring->rptr = RREG32(CP_RB1_RPTR);
 
 	/* ring2 - compute only */
 	/* Set ring buffer size */
-	rb_bufsz = drm_order(rdev->cp2.ring_size / 8);
+	ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
+	rb_bufsz = drm_order(ring->ring_size / 8);
 	tmp = (drm_order(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
 #ifdef __BIG_ENDIAN
 	tmp |= BUF_SWAP_32BIT;
@@ -1212,8 +1247,8 @@ int cayman_cp_resume(struct radeon_device *rdev)
 
 	/* Initialize the ring buffer's read and write pointers */
 	WREG32(CP_RB2_CNTL, tmp | RB_RPTR_WR_ENA);
-	rdev->cp2.wptr = 0;
-	WREG32(CP_RB2_WPTR, rdev->cp2.wptr);
+	ring->wptr = 0;
+	WREG32(CP_RB2_WPTR, ring->wptr);
 
 	/* set the wb address wether it's enabled or not */
 	WREG32(CP_RB2_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP2_RPTR_OFFSET) & 0xFFFFFFFC);
@@ -1222,28 +1257,28 @@ int cayman_cp_resume(struct radeon_device *rdev)
 	mdelay(1);
 	WREG32(CP_RB2_CNTL, tmp);
 
-	WREG32(CP_RB2_BASE, rdev->cp2.gpu_addr >> 8);
+	WREG32(CP_RB2_BASE, ring->gpu_addr >> 8);
 
-	rdev->cp2.rptr = RREG32(CP_RB2_RPTR);
+	ring->rptr = RREG32(CP_RB2_RPTR);
 
 	/* start the rings */
 	cayman_cp_start(rdev);
-	rdev->cp.ready = true;
-	rdev->cp1.ready = true;
-	rdev->cp2.ready = true;
+	rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = true;
+	rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = false;
+	rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = false;
 	/* this only test cp0 */
-	r = radeon_ring_test(rdev);
+	r = radeon_ring_test(rdev, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
 	if (r) {
-		rdev->cp.ready = false;
-		rdev->cp1.ready = false;
-		rdev->cp2.ready = false;
+		rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
+		rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = false;
+		rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = false;
 		return r;
 	}
 
 	return 0;
 }
 
-bool cayman_gpu_is_lockup(struct radeon_device *rdev)
+bool cayman_gpu_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
 {
 	u32 srbm_status;
 	u32 grbm_status;
@@ -1256,20 +1291,20 @@ bool cayman_gpu_is_lockup(struct radeon_device *rdev)
 	grbm_status_se0 = RREG32(GRBM_STATUS_SE0);
 	grbm_status_se1 = RREG32(GRBM_STATUS_SE1);
 	if (!(grbm_status & GUI_ACTIVE)) {
-		r100_gpu_lockup_update(lockup, &rdev->cp);
+		r100_gpu_lockup_update(lockup, ring);
 		return false;
 	}
 	/* force CP activities */
-	r = radeon_ring_lock(rdev, 2);
+	r = radeon_ring_lock(rdev, ring, 2);
 	if (!r) {
 		/* PACKET2 NOP */
-		radeon_ring_write(rdev, 0x80000000);
-		radeon_ring_write(rdev, 0x80000000);
-		radeon_ring_unlock_commit(rdev);
+		radeon_ring_write(ring, 0x80000000);
+		radeon_ring_write(ring, 0x80000000);
+		radeon_ring_unlock_commit(rdev, ring);
 	}
 	/* XXX deal with CP0,1,2 */
-	rdev->cp.rptr = RREG32(CP_RB0_RPTR);
-	return r100_gpu_cp_is_lockup(rdev, lockup, &rdev->cp);
+	ring->rptr = RREG32(ring->rptr_reg);
+	return r100_gpu_cp_is_lockup(rdev, lockup, ring);
 }
 
 static int cayman_gpu_soft_reset(struct radeon_device *rdev)
@@ -1338,6 +1373,7 @@ int cayman_asic_reset(struct radeon_device *rdev)
 
 static int cayman_startup(struct radeon_device *rdev)
 {
+	struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
 	int r;
 
 	/* enable pcie gen2 link */
@@ -1378,6 +1414,24 @@ static int cayman_startup(struct radeon_device *rdev)
 	if (r)
 		return r;
 
+	r = radeon_fence_driver_start_ring(rdev, RADEON_RING_TYPE_GFX_INDEX);
+	if (r) {
+		dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
+		return r;
+	}
+
+	r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
+	if (r) {
+		dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
+		return r;
+	}
+
+	r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
+	if (r) {
+		dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
+		return r;
+	}
+
 	/* Enable IRQ */
 	r = r600_irq_init(rdev);
 	if (r) {
@@ -1387,7 +1441,9 @@ static int cayman_startup(struct radeon_device *rdev)
 	}
 	evergreen_irq_set(rdev);
 
-	r = radeon_ring_init(rdev, rdev->cp.ring_size);
+	r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP_RPTR_OFFSET,
+			     CP_RB0_RPTR, CP_RB0_WPTR,
+			     0, 0xfffff, RADEON_CP_PACKET2);
 	if (r)
 		return r;
 	r = cayman_cp_load_microcode(rdev);
@@ -1397,6 +1453,17 @@ static int cayman_startup(struct radeon_device *rdev)
 	if (r)
 		return r;
 
+	r = radeon_ib_pool_start(rdev);
+	if (r)
+		return r;
+
+	r = r600_ib_test(rdev, RADEON_RING_TYPE_GFX_INDEX);
+	if (r) {
+		DRM_ERROR("radeon: failed testing IB (%d).\n", r);
+		rdev->accel_working = false;
+		return r;
+	}
+
 	return 0;
 }
 
@@ -1411,32 +1478,25 @@ int cayman_resume(struct radeon_device *rdev)
 	/* post card */
 	atom_asic_init(rdev->mode_info.atom_context);
 
+	rdev->accel_working = true;
 	r = cayman_startup(rdev);
 	if (r) {
 		DRM_ERROR("cayman startup failed on resume\n");
 		return r;
 	}
-
-	r = r600_ib_test(rdev);
-	if (r) {
-		DRM_ERROR("radeon: failled testing IB (%d).\n", r);
-		return r;
-	}
-
 	return r;
-
 }
 
 int cayman_suspend(struct radeon_device *rdev)
 {
 	/* FIXME: we should wait for ring to be empty */
+	radeon_ib_pool_suspend(rdev);
+	r600_blit_suspend(rdev);
 	cayman_cp_enable(rdev, false);
-	rdev->cp.ready = false;
+	rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
 	evergreen_irq_suspend(rdev);
 	radeon_wb_disable(rdev);
 	cayman_pcie_gart_disable(rdev);
-	r600_blit_suspend(rdev);
-
 	return 0;
 }
 
@@ -1448,6 +1508,7 @@ int cayman_suspend(struct radeon_device *rdev)
  */
 int cayman_init(struct radeon_device *rdev)
 {
+	struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
 	int r;
 
 	/* This don't do much */
@@ -1500,8 +1561,8 @@ int cayman_init(struct radeon_device *rdev)
 	if (r)
 		return r;
 
-	rdev->cp.ring_obj = NULL;
-	r600_ring_init(rdev, 1024 * 1024);
+	ring->ring_obj = NULL;
+	r600_ring_init(rdev, ring, 1024 * 1024);
 
 	rdev->ih.ring_obj = NULL;
 	r600_ih_ring_init(rdev, 64 * 1024);
@@ -1510,29 +1571,24 @@ int cayman_init(struct radeon_device *rdev)
 	if (r)
 		return r;
 
+	r = radeon_ib_pool_init(rdev);
 	rdev->accel_working = true;
+	if (r) {
+		dev_err(rdev->dev, "IB initialization failed (%d).\n", r);
+		rdev->accel_working = false;
+	}
+
 	r = cayman_startup(rdev);
 	if (r) {
 		dev_err(rdev->dev, "disabling GPU acceleration\n");
 		cayman_cp_fini(rdev);
 		r600_irq_fini(rdev);
 		radeon_wb_fini(rdev);
+		r100_ib_fini(rdev);
 		radeon_irq_kms_fini(rdev);
 		cayman_pcie_gart_fini(rdev);
 		rdev->accel_working = false;
 	}
-	if (rdev->accel_working) {
-		r = radeon_ib_pool_init(rdev);
-		if (r) {
-			DRM_ERROR("radeon: failed initializing IB pool (%d).\n", r);
-			rdev->accel_working = false;
-		}
-		r = r600_ib_test(rdev);
-		if (r) {
-			DRM_ERROR("radeon: failed testing IB (%d).\n", r);
-			rdev->accel_working = false;
-		}
-	}
 
 	/* Don't start up if the MC ucode is missing.
 	 * The default clocks and voltages before the MC ucode
@@ -1552,11 +1608,12 @@ void cayman_fini(struct radeon_device *rdev)
 	cayman_cp_fini(rdev);
 	r600_irq_fini(rdev);
 	radeon_wb_fini(rdev);
-	radeon_ib_pool_fini(rdev);
+	r100_ib_fini(rdev);
 	radeon_irq_kms_fini(rdev);
 	cayman_pcie_gart_fini(rdev);
 	r600_vram_scratch_fini(rdev);
 	radeon_gem_fini(rdev);
+	radeon_semaphore_driver_fini(rdev);
 	radeon_fence_driver_fini(rdev);
 	radeon_bo_fini(rdev);
 	radeon_atombios_fini(rdev);

+ 33 - 0
drivers/gpu/drm/radeon/nid.h

@@ -42,6 +42,9 @@
 #define CAYMAN_MAX_TCC_MASK          0xFF
 
 #define DMIF_ADDR_CONFIG  				0xBD4
+#define	SRBM_GFX_CNTL				        0x0E44
+#define		RINGID(x)					(((x) & 0x3) << 0)
+#define		VMID(x)						(((x) & 0x7) << 0)
 #define	SRBM_STATUS				        0x0E50
 
 #define VM_CONTEXT0_REQUEST_RESPONSE			0x1470
@@ -394,6 +397,12 @@
 #define	CP_RB0_RPTR_ADDR				0xC10C
 #define	CP_RB0_RPTR_ADDR_HI				0xC110
 #define	CP_RB0_WPTR					0xC114
+
+#define CP_INT_CNTL                                     0xC124
+#       define CNTX_BUSY_INT_ENABLE                     (1 << 19)
+#       define CNTX_EMPTY_INT_ENABLE                    (1 << 20)
+#       define TIME_STAMP_INT_ENABLE                    (1 << 26)
+
 #define	CP_RB1_BASE					0xC180
 #define	CP_RB1_CNTL					0xC184
 #define	CP_RB1_RPTR_ADDR				0xC188
@@ -411,6 +420,10 @@
 #define	CP_ME_RAM_DATA					0xC160
 #define	CP_DEBUG					0xC1FC
 
+#define VGT_EVENT_INITIATOR                             0x28a90
+#       define CACHE_FLUSH_AND_INV_EVENT_TS                     (0x14 << 0)
+#       define CACHE_FLUSH_AND_INV_EVENT                        (0x16 << 0)
+
 /*
  * PM4
  */
@@ -494,7 +507,27 @@
 #define		PACKET3_ME_INITIALIZE_DEVICE_ID(x) ((x) << 16)
 #define	PACKET3_COND_WRITE				0x45
 #define	PACKET3_EVENT_WRITE				0x46
+#define		EVENT_TYPE(x)                           ((x) << 0)
+#define		EVENT_INDEX(x)                          ((x) << 8)
+                /* 0 - any non-TS event
+		 * 1 - ZPASS_DONE
+		 * 2 - SAMPLE_PIPELINESTAT
+		 * 3 - SAMPLE_STREAMOUTSTAT*
+		 * 4 - *S_PARTIAL_FLUSH
+		 * 5 - TS events
+		 */
 #define	PACKET3_EVENT_WRITE_EOP				0x47
+#define		DATA_SEL(x)                             ((x) << 29)
+                /* 0 - discard
+		 * 1 - send low 32bit data
+		 * 2 - send 64bit data
+		 * 3 - send 64bit counter value
+		 */
+#define		INT_SEL(x)                              ((x) << 24)
+                /* 0 - none
+		 * 1 - interrupt only (DATA_SEL = 0)
+		 * 2 - interrupt when data write is confirmed
+		 */
 #define	PACKET3_EVENT_WRITE_EOS				0x48
 #define	PACKET3_PREAMBLE_CNTL				0x4A
 #              define PACKET3_PREAMBLE_BEGIN_CLEAR_STATE     (2 << 28)

+ 122 - 105
drivers/gpu/drm/radeon/r100.c

@@ -667,7 +667,7 @@ int r100_irq_set(struct radeon_device *rdev)
 		WREG32(R_000040_GEN_INT_CNTL, 0);
 		return -EINVAL;
 	}
-	if (rdev->irq.sw_int) {
+	if (rdev->irq.sw_int[RADEON_RING_TYPE_GFX_INDEX]) {
 		tmp |= RADEON_SW_INT_ENABLE;
 	}
 	if (rdev->irq.gui_idle) {
@@ -739,7 +739,7 @@ int r100_irq_process(struct radeon_device *rdev)
 	while (status) {
 		/* SW interrupt */
 		if (status & RADEON_SW_INT_TEST) {
-			radeon_fence_process(rdev);
+			radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
 		}
 		/* gui idle interrupt */
 		if (status & RADEON_GUI_IDLE_STAT) {
@@ -811,25 +811,36 @@ u32 r100_get_vblank_counter(struct radeon_device *rdev, int crtc)
 void r100_fence_ring_emit(struct radeon_device *rdev,
 			  struct radeon_fence *fence)
 {
+	struct radeon_ring *ring = &rdev->ring[fence->ring];
+
 	/* We have to make sure that caches are flushed before
 	 * CPU might read something from VRAM. */
-	radeon_ring_write(rdev, PACKET0(RADEON_RB3D_DSTCACHE_CTLSTAT, 0));
-	radeon_ring_write(rdev, RADEON_RB3D_DC_FLUSH_ALL);
-	radeon_ring_write(rdev, PACKET0(RADEON_RB3D_ZCACHE_CTLSTAT, 0));
-	radeon_ring_write(rdev, RADEON_RB3D_ZC_FLUSH_ALL);
+	radeon_ring_write(ring, PACKET0(RADEON_RB3D_DSTCACHE_CTLSTAT, 0));
+	radeon_ring_write(ring, RADEON_RB3D_DC_FLUSH_ALL);
+	radeon_ring_write(ring, PACKET0(RADEON_RB3D_ZCACHE_CTLSTAT, 0));
+	radeon_ring_write(ring, RADEON_RB3D_ZC_FLUSH_ALL);
 	/* Wait until IDLE & CLEAN */
-	radeon_ring_write(rdev, PACKET0(RADEON_WAIT_UNTIL, 0));
-	radeon_ring_write(rdev, RADEON_WAIT_2D_IDLECLEAN | RADEON_WAIT_3D_IDLECLEAN);
-	radeon_ring_write(rdev, PACKET0(RADEON_HOST_PATH_CNTL, 0));
-	radeon_ring_write(rdev, rdev->config.r100.hdp_cntl |
+	radeon_ring_write(ring, PACKET0(RADEON_WAIT_UNTIL, 0));
+	radeon_ring_write(ring, RADEON_WAIT_2D_IDLECLEAN | RADEON_WAIT_3D_IDLECLEAN);
+	radeon_ring_write(ring, PACKET0(RADEON_HOST_PATH_CNTL, 0));
+	radeon_ring_write(ring, rdev->config.r100.hdp_cntl |
 				RADEON_HDP_READ_BUFFER_INVALIDATE);
-	radeon_ring_write(rdev, PACKET0(RADEON_HOST_PATH_CNTL, 0));
-	radeon_ring_write(rdev, rdev->config.r100.hdp_cntl);
+	radeon_ring_write(ring, PACKET0(RADEON_HOST_PATH_CNTL, 0));
+	radeon_ring_write(ring, rdev->config.r100.hdp_cntl);
 	/* Emit fence sequence & fire IRQ */
-	radeon_ring_write(rdev, PACKET0(rdev->fence_drv.scratch_reg, 0));
-	radeon_ring_write(rdev, fence->seq);
-	radeon_ring_write(rdev, PACKET0(RADEON_GEN_INT_STATUS, 0));
-	radeon_ring_write(rdev, RADEON_SW_INT_FIRE);
+	radeon_ring_write(ring, PACKET0(rdev->fence_drv[fence->ring].scratch_reg, 0));
+	radeon_ring_write(ring, fence->seq);
+	radeon_ring_write(ring, PACKET0(RADEON_GEN_INT_STATUS, 0));
+	radeon_ring_write(ring, RADEON_SW_INT_FIRE);
+}
+
+void r100_semaphore_ring_emit(struct radeon_device *rdev,
+			      struct radeon_ring *ring,
+			      struct radeon_semaphore *semaphore,
+			      bool emit_wait)
+{
+	/* Unused on older asics, since we don't have semaphores or multiple rings */
+	BUG();
 }
 
 int r100_copy_blit(struct radeon_device *rdev,
@@ -838,6 +849,7 @@ int r100_copy_blit(struct radeon_device *rdev,
 		   unsigned num_gpu_pages,
 		   struct radeon_fence *fence)
 {
+	struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
 	uint32_t cur_pages;
 	uint32_t stride_bytes = RADEON_GPU_PAGE_SIZE;
 	uint32_t pitch;
@@ -855,7 +867,7 @@ int r100_copy_blit(struct radeon_device *rdev,
 
 	/* Ask for enough room for blit + flush + fence */
 	ndw = 64 + (10 * num_loops);
-	r = radeon_ring_lock(rdev, ndw);
+	r = radeon_ring_lock(rdev, ring, ndw);
 	if (r) {
 		DRM_ERROR("radeon: moving bo (%d) asking for %u dw.\n", r, ndw);
 		return -EINVAL;
@@ -869,8 +881,8 @@ int r100_copy_blit(struct radeon_device *rdev,
 
 		/* pages are in Y direction - height
 		   page width in X direction - width */
-		radeon_ring_write(rdev, PACKET3(PACKET3_BITBLT_MULTI, 8));
-		radeon_ring_write(rdev,
+		radeon_ring_write(ring, PACKET3(PACKET3_BITBLT_MULTI, 8));
+		radeon_ring_write(ring,
 				  RADEON_GMC_SRC_PITCH_OFFSET_CNTL |
 				  RADEON_GMC_DST_PITCH_OFFSET_CNTL |
 				  RADEON_GMC_SRC_CLIPPING |
@@ -882,26 +894,26 @@ int r100_copy_blit(struct radeon_device *rdev,
 				  RADEON_DP_SRC_SOURCE_MEMORY |
 				  RADEON_GMC_CLR_CMP_CNTL_DIS |
 				  RADEON_GMC_WR_MSK_DIS);
-		radeon_ring_write(rdev, (pitch << 22) | (src_offset >> 10));
-		radeon_ring_write(rdev, (pitch << 22) | (dst_offset >> 10));
-		radeon_ring_write(rdev, (0x1fff) | (0x1fff << 16));
-		radeon_ring_write(rdev, 0);
-		radeon_ring_write(rdev, (0x1fff) | (0x1fff << 16));
-		radeon_ring_write(rdev, num_gpu_pages);
-		radeon_ring_write(rdev, num_gpu_pages);
-		radeon_ring_write(rdev, cur_pages | (stride_pixels << 16));
-	}
-	radeon_ring_write(rdev, PACKET0(RADEON_DSTCACHE_CTLSTAT, 0));
-	radeon_ring_write(rdev, RADEON_RB2D_DC_FLUSH_ALL);
-	radeon_ring_write(rdev, PACKET0(RADEON_WAIT_UNTIL, 0));
-	radeon_ring_write(rdev,
+		radeon_ring_write(ring, (pitch << 22) | (src_offset >> 10));
+		radeon_ring_write(ring, (pitch << 22) | (dst_offset >> 10));
+		radeon_ring_write(ring, (0x1fff) | (0x1fff << 16));
+		radeon_ring_write(ring, 0);
+		radeon_ring_write(ring, (0x1fff) | (0x1fff << 16));
+		radeon_ring_write(ring, num_gpu_pages);
+		radeon_ring_write(ring, num_gpu_pages);
+		radeon_ring_write(ring, cur_pages | (stride_pixels << 16));
+	}
+	radeon_ring_write(ring, PACKET0(RADEON_DSTCACHE_CTLSTAT, 0));
+	radeon_ring_write(ring, RADEON_RB2D_DC_FLUSH_ALL);
+	radeon_ring_write(ring, PACKET0(RADEON_WAIT_UNTIL, 0));
+	radeon_ring_write(ring,
 			  RADEON_WAIT_2D_IDLECLEAN |
 			  RADEON_WAIT_HOST_IDLECLEAN |
 			  RADEON_WAIT_DMA_GUI_IDLE);
 	if (fence) {
 		r = radeon_fence_emit(rdev, fence);
 	}
-	radeon_ring_unlock_commit(rdev);
+	radeon_ring_unlock_commit(rdev, ring);
 	return r;
 }
 
@@ -922,19 +934,20 @@ static int r100_cp_wait_for_idle(struct radeon_device *rdev)
 
 void r100_ring_start(struct radeon_device *rdev)
 {
+	struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
 	int r;
 
-	r = radeon_ring_lock(rdev, 2);
+	r = radeon_ring_lock(rdev, ring, 2);
 	if (r) {
 		return;
 	}
-	radeon_ring_write(rdev, PACKET0(RADEON_ISYNC_CNTL, 0));
-	radeon_ring_write(rdev,
+	radeon_ring_write(ring, PACKET0(RADEON_ISYNC_CNTL, 0));
+	radeon_ring_write(ring,
 			  RADEON_ISYNC_ANY2D_IDLE3D |
 			  RADEON_ISYNC_ANY3D_IDLE2D |
 			  RADEON_ISYNC_WAIT_IDLEGUI |
 			  RADEON_ISYNC_CPSCRATCH_IDLEGUI);
-	radeon_ring_unlock_commit(rdev);
+	radeon_ring_unlock_commit(rdev, ring);
 }
 
 
@@ -1035,6 +1048,7 @@ static void r100_cp_load_microcode(struct radeon_device *rdev)
 
 int r100_cp_init(struct radeon_device *rdev, unsigned ring_size)
 {
+	struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
 	unsigned rb_bufsz;
 	unsigned rb_blksz;
 	unsigned max_fetch;
@@ -1060,7 +1074,9 @@ int r100_cp_init(struct radeon_device *rdev, unsigned ring_size)
 	rb_bufsz = drm_order(ring_size / 8);
 	ring_size = (1 << (rb_bufsz + 1)) * 4;
 	r100_cp_load_microcode(rdev);
-	r = radeon_ring_init(rdev, ring_size);
+	r = radeon_ring_init(rdev, ring, ring_size, RADEON_WB_CP_RPTR_OFFSET,
+			     RADEON_CP_RB_RPTR, RADEON_CP_RB_WPTR,
+			     0, 0x7fffff, RADEON_CP_PACKET2);
 	if (r) {
 		return r;
 	}
@@ -1069,7 +1085,7 @@ int r100_cp_init(struct radeon_device *rdev, unsigned ring_size)
 	rb_blksz = 9;
 	/* cp will read 128bytes at a time (4 dwords) */
 	max_fetch = 1;
-	rdev->cp.align_mask = 16 - 1;
+	ring->align_mask = 16 - 1;
 	/* Write to CP_RB_WPTR will be delayed for pre_write_timer clocks */
 	pre_write_timer = 64;
 	/* Force CP_RB_WPTR write if written more than one time before the
@@ -1099,13 +1115,13 @@ int r100_cp_init(struct radeon_device *rdev, unsigned ring_size)
 	WREG32(RADEON_CP_RB_CNTL, tmp | RADEON_RB_NO_UPDATE);
 
 	/* Set ring address */
-	DRM_INFO("radeon: ring at 0x%016lX\n", (unsigned long)rdev->cp.gpu_addr);
-	WREG32(RADEON_CP_RB_BASE, rdev->cp.gpu_addr);
+	DRM_INFO("radeon: ring at 0x%016lX\n", (unsigned long)ring->gpu_addr);
+	WREG32(RADEON_CP_RB_BASE, ring->gpu_addr);
 	/* Force read & write ptr to 0 */
 	WREG32(RADEON_CP_RB_CNTL, tmp | RADEON_RB_RPTR_WR_ENA | RADEON_RB_NO_UPDATE);
 	WREG32(RADEON_CP_RB_RPTR_WR, 0);
-	rdev->cp.wptr = 0;
-	WREG32(RADEON_CP_RB_WPTR, rdev->cp.wptr);
+	ring->wptr = 0;
+	WREG32(RADEON_CP_RB_WPTR, ring->wptr);
 
 	/* set the wb address whether it's enabled or not */
 	WREG32(R_00070C_CP_RB_RPTR_ADDR,
@@ -1121,7 +1137,7 @@ int r100_cp_init(struct radeon_device *rdev, unsigned ring_size)
 
 	WREG32(RADEON_CP_RB_CNTL, tmp);
 	udelay(10);
-	rdev->cp.rptr = RREG32(RADEON_CP_RB_RPTR);
+	ring->rptr = RREG32(RADEON_CP_RB_RPTR);
 	/* Set cp mode to bus mastering & enable cp*/
 	WREG32(RADEON_CP_CSQ_MODE,
 	       REG_SET(RADEON_INDIRECT2_START, indirect2_start) |
@@ -1130,12 +1146,12 @@ int r100_cp_init(struct radeon_device *rdev, unsigned ring_size)
 	WREG32(RADEON_CP_CSQ_MODE, 0x00004D4D);
 	WREG32(RADEON_CP_CSQ_CNTL, RADEON_CSQ_PRIBM_INDBM);
 	radeon_ring_start(rdev);
-	r = radeon_ring_test(rdev);
+	r = radeon_ring_test(rdev, ring);
 	if (r) {
 		DRM_ERROR("radeon: cp isn't working (%d).\n", r);
 		return r;
 	}
-	rdev->cp.ready = true;
+	ring->ready = true;
 	radeon_ttm_set_active_vram_size(rdev, rdev->mc.real_vram_size);
 	return 0;
 }
@@ -1147,7 +1163,7 @@ void r100_cp_fini(struct radeon_device *rdev)
 	}
 	/* Disable ring */
 	r100_cp_disable(rdev);
-	radeon_ring_fini(rdev);
+	radeon_ring_fini(rdev, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
 	DRM_INFO("radeon: cp finalized\n");
 }
 
@@ -1155,7 +1171,7 @@ void r100_cp_disable(struct radeon_device *rdev)
 {
 	/* Disable ring */
 	radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size);
-	rdev->cp.ready = false;
+	rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
 	WREG32(RADEON_CP_CSQ_MODE, 0);
 	WREG32(RADEON_CP_CSQ_CNTL, 0);
 	WREG32(R_000770_SCRATCH_UMSK, 0);
@@ -1165,13 +1181,6 @@ void r100_cp_disable(struct radeon_device *rdev)
 	}
 }
 
-void r100_cp_commit(struct radeon_device *rdev)
-{
-	WREG32(RADEON_CP_RB_WPTR, rdev->cp.wptr);
-	(void)RREG32(RADEON_CP_RB_WPTR);
-}
-
-
 /*
  * CS functions
  */
@@ -2099,9 +2108,9 @@ int r100_mc_wait_for_idle(struct radeon_device *rdev)
 	return -1;
 }
 
-void r100_gpu_lockup_update(struct r100_gpu_lockup *lockup, struct radeon_cp *cp)
+void r100_gpu_lockup_update(struct r100_gpu_lockup *lockup, struct radeon_ring *ring)
 {
-	lockup->last_cp_rptr = cp->rptr;
+	lockup->last_cp_rptr = ring->rptr;
 	lockup->last_jiffies = jiffies;
 }
 
@@ -2126,20 +2135,20 @@ void r100_gpu_lockup_update(struct r100_gpu_lockup *lockup, struct radeon_cp *cp
  * false positive when CP is just gived nothing to do.
  *
  **/
-bool r100_gpu_cp_is_lockup(struct radeon_device *rdev, struct r100_gpu_lockup *lockup, struct radeon_cp *cp)
+bool r100_gpu_cp_is_lockup(struct radeon_device *rdev, struct r100_gpu_lockup *lockup, struct radeon_ring *ring)
 {
 	unsigned long cjiffies, elapsed;
 
 	cjiffies = jiffies;
 	if (!time_after(cjiffies, lockup->last_jiffies)) {
 		/* likely a wrap around */
-		lockup->last_cp_rptr = cp->rptr;
+		lockup->last_cp_rptr = ring->rptr;
 		lockup->last_jiffies = jiffies;
 		return false;
 	}
-	if (cp->rptr != lockup->last_cp_rptr) {
+	if (ring->rptr != lockup->last_cp_rptr) {
 		/* CP is still working no lockup */
-		lockup->last_cp_rptr = cp->rptr;
+		lockup->last_cp_rptr = ring->rptr;
 		lockup->last_jiffies = jiffies;
 		return false;
 	}
@@ -2152,26 +2161,26 @@ bool r100_gpu_cp_is_lockup(struct radeon_device *rdev, struct r100_gpu_lockup *l
 	return false;
 }
 
-bool r100_gpu_is_lockup(struct radeon_device *rdev)
+bool r100_gpu_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
 {
 	u32 rbbm_status;
 	int r;
 
 	rbbm_status = RREG32(R_000E40_RBBM_STATUS);
 	if (!G_000E40_GUI_ACTIVE(rbbm_status)) {
-		r100_gpu_lockup_update(&rdev->config.r100.lockup, &rdev->cp);
+		r100_gpu_lockup_update(&rdev->config.r100.lockup, ring);
 		return false;
 	}
 	/* force CP activities */
-	r = radeon_ring_lock(rdev, 2);
+	r = radeon_ring_lock(rdev, ring, 2);
 	if (!r) {
 		/* PACKET2 NOP */
-		radeon_ring_write(rdev, 0x80000000);
-		radeon_ring_write(rdev, 0x80000000);
-		radeon_ring_unlock_commit(rdev);
+		radeon_ring_write(ring, 0x80000000);
+		radeon_ring_write(ring, 0x80000000);
+		radeon_ring_unlock_commit(rdev, ring);
 	}
-	rdev->cp.rptr = RREG32(RADEON_CP_RB_RPTR);
-	return r100_gpu_cp_is_lockup(rdev, &rdev->config.r100.lockup, &rdev->cp);
+	ring->rptr = RREG32(ring->rptr_reg);
+	return r100_gpu_cp_is_lockup(rdev, &rdev->config.r100.lockup, ring);
 }
 
 void r100_bm_disable(struct radeon_device *rdev)
@@ -2579,21 +2588,22 @@ static int r100_debugfs_cp_ring_info(struct seq_file *m, void *data)
 	struct drm_info_node *node = (struct drm_info_node *) m->private;
 	struct drm_device *dev = node->minor->dev;
 	struct radeon_device *rdev = dev->dev_private;
+	struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
 	uint32_t rdp, wdp;
 	unsigned count, i, j;
 
-	radeon_ring_free_size(rdev);
+	radeon_ring_free_size(rdev, ring);
 	rdp = RREG32(RADEON_CP_RB_RPTR);
 	wdp = RREG32(RADEON_CP_RB_WPTR);
-	count = (rdp + rdev->cp.ring_size - wdp) & rdev->cp.ptr_mask;
+	count = (rdp + ring->ring_size - wdp) & ring->ptr_mask;
 	seq_printf(m, "CP_STAT 0x%08x\n", RREG32(RADEON_CP_STAT));
 	seq_printf(m, "CP_RB_WPTR 0x%08x\n", wdp);
 	seq_printf(m, "CP_RB_RPTR 0x%08x\n", rdp);
-	seq_printf(m, "%u free dwords in ring\n", rdev->cp.ring_free_dw);
+	seq_printf(m, "%u free dwords in ring\n", ring->ring_free_dw);
 	seq_printf(m, "%u dwords in ring\n", count);
 	for (j = 0; j <= count; j++) {
-		i = (rdp + j) & rdev->cp.ptr_mask;
-		seq_printf(m, "r[%04d]=0x%08x\n", i, rdev->cp.ring[i]);
+		i = (rdp + j) & ring->ptr_mask;
+		seq_printf(m, "r[%04d]=0x%08x\n", i, ring->ring[i]);
 	}
 	return 0;
 }
@@ -3635,7 +3645,7 @@ void r100_cs_track_clear(struct radeon_device *rdev, struct r100_cs_track *track
 	}
 }
 
-int r100_ring_test(struct radeon_device *rdev)
+int r100_ring_test(struct radeon_device *rdev, struct radeon_ring *ring)
 {
 	uint32_t scratch;
 	uint32_t tmp = 0;
@@ -3648,15 +3658,15 @@ int r100_ring_test(struct radeon_device *rdev)
 		return r;
 	}
 	WREG32(scratch, 0xCAFEDEAD);
-	r = radeon_ring_lock(rdev, 2);
+	r = radeon_ring_lock(rdev, ring, 2);
 	if (r) {
 		DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r);
 		radeon_scratch_free(rdev, scratch);
 		return r;
 	}
-	radeon_ring_write(rdev, PACKET0(scratch, 0));
-	radeon_ring_write(rdev, 0xDEADBEEF);
-	radeon_ring_unlock_commit(rdev);
+	radeon_ring_write(ring, PACKET0(scratch, 0));
+	radeon_ring_write(ring, 0xDEADBEEF);
+	radeon_ring_unlock_commit(rdev, ring);
 	for (i = 0; i < rdev->usec_timeout; i++) {
 		tmp = RREG32(scratch);
 		if (tmp == 0xDEADBEEF) {
@@ -3677,9 +3687,11 @@ int r100_ring_test(struct radeon_device *rdev)
 
 void r100_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib)
 {
-	radeon_ring_write(rdev, PACKET0(RADEON_CP_IB_BASE, 1));
-	radeon_ring_write(rdev, ib->gpu_addr);
-	radeon_ring_write(rdev, ib->length_dw);
+	struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
+
+	radeon_ring_write(ring, PACKET0(RADEON_CP_IB_BASE, 1));
+	radeon_ring_write(ring, ib->gpu_addr);
+	radeon_ring_write(ring, ib->length_dw);
 }
 
 int r100_ib_test(struct radeon_device *rdev)
@@ -3696,7 +3708,7 @@ int r100_ib_test(struct radeon_device *rdev)
 		return r;
 	}
 	WREG32(scratch, 0xCAFEDEAD);
-	r = radeon_ib_get(rdev, &ib);
+	r = radeon_ib_get(rdev, RADEON_RING_TYPE_GFX_INDEX, &ib);
 	if (r) {
 		return r;
 	}
@@ -3740,34 +3752,16 @@ int r100_ib_test(struct radeon_device *rdev)
 
 void r100_ib_fini(struct radeon_device *rdev)
 {
+	radeon_ib_pool_suspend(rdev);
 	radeon_ib_pool_fini(rdev);
 }
 
-int r100_ib_init(struct radeon_device *rdev)
-{
-	int r;
-
-	r = radeon_ib_pool_init(rdev);
-	if (r) {
-		dev_err(rdev->dev, "failed initializing IB pool (%d).\n", r);
-		r100_ib_fini(rdev);
-		return r;
-	}
-	r = r100_ib_test(rdev);
-	if (r) {
-		dev_err(rdev->dev, "failed testing IB (%d).\n", r);
-		r100_ib_fini(rdev);
-		return r;
-	}
-	return 0;
-}
-
 void r100_mc_stop(struct radeon_device *rdev, struct r100_mc_save *save)
 {
 	/* Shutdown CP we shouldn't need to do that but better be safe than
 	 * sorry
 	 */
-	rdev->cp.ready = false;
+	rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
 	WREG32(R_000740_CP_CSQ_CNTL, 0);
 
 	/* Save few CRTC registers */
@@ -3905,6 +3899,12 @@ static int r100_startup(struct radeon_device *rdev)
 	if (r)
 		return r;
 
+	r = radeon_fence_driver_start_ring(rdev, RADEON_RING_TYPE_GFX_INDEX);
+	if (r) {
+		dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
+		return r;
+	}
+
 	/* Enable IRQ */
 	r100_irq_set(rdev);
 	rdev->config.r100.hdp_cntl = RREG32(RADEON_HOST_PATH_CNTL);
@@ -3914,11 +3914,18 @@ static int r100_startup(struct radeon_device *rdev)
 		dev_err(rdev->dev, "failed initializing CP (%d).\n", r);
 		return r;
 	}
-	r = r100_ib_init(rdev);
+
+	r = radeon_ib_pool_start(rdev);
+	if (r)
+		return r;
+
+	r = r100_ib_test(rdev);
 	if (r) {
-		dev_err(rdev->dev, "failed initializing IB (%d).\n", r);
+		dev_err(rdev->dev, "failed testing IB (%d).\n", r);
+		rdev->accel_working = false;
 		return r;
 	}
+
 	return 0;
 }
 
@@ -3941,11 +3948,14 @@ int r100_resume(struct radeon_device *rdev)
 	r100_clock_startup(rdev);
 	/* Initialize surface registers */
 	radeon_surface_init(rdev);
+
+	rdev->accel_working = true;
 	return r100_startup(rdev);
 }
 
 int r100_suspend(struct radeon_device *rdev)
 {
+	radeon_ib_pool_suspend(rdev);
 	r100_cp_disable(rdev);
 	radeon_wb_disable(rdev);
 	r100_irq_disable(rdev);
@@ -4064,7 +4074,14 @@ int r100_init(struct radeon_device *rdev)
 			return r;
 	}
 	r100_set_safe_registers(rdev);
+
+	r = radeon_ib_pool_init(rdev);
 	rdev->accel_working = true;
+	if (r) {
+		dev_err(rdev->dev, "IB initialization failed (%d).\n", r);
+		rdev->accel_working = false;
+	}
+
 	r = r100_startup(rdev);
 	if (r) {
 		/* Somethings want wront with the accel init stop accel */

+ 11 - 10
drivers/gpu/drm/radeon/r200.c

@@ -87,6 +87,7 @@ int r200_copy_dma(struct radeon_device *rdev,
 		  unsigned num_gpu_pages,
 		  struct radeon_fence *fence)
 {
+	struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
 	uint32_t size;
 	uint32_t cur_size;
 	int i, num_loops;
@@ -95,33 +96,33 @@ int r200_copy_dma(struct radeon_device *rdev,
 	/* radeon pitch is /64 */
 	size = num_gpu_pages << RADEON_GPU_PAGE_SHIFT;
 	num_loops = DIV_ROUND_UP(size, 0x1FFFFF);
-	r = radeon_ring_lock(rdev, num_loops * 4 + 64);
+	r = radeon_ring_lock(rdev, ring, num_loops * 4 + 64);
 	if (r) {
 		DRM_ERROR("radeon: moving bo (%d).\n", r);
 		return r;
 	}
 	/* Must wait for 2D idle & clean before DMA or hangs might happen */
-	radeon_ring_write(rdev, PACKET0(RADEON_WAIT_UNTIL, 0));
-	radeon_ring_write(rdev, (1 << 16));
+	radeon_ring_write(ring, PACKET0(RADEON_WAIT_UNTIL, 0));
+	radeon_ring_write(ring, (1 << 16));
 	for (i = 0; i < num_loops; i++) {
 		cur_size = size;
 		if (cur_size > 0x1FFFFF) {
 			cur_size = 0x1FFFFF;
 		}
 		size -= cur_size;
-		radeon_ring_write(rdev, PACKET0(0x720, 2));
-		radeon_ring_write(rdev, src_offset);
-		radeon_ring_write(rdev, dst_offset);
-		radeon_ring_write(rdev, cur_size | (1 << 31) | (1 << 30));
+		radeon_ring_write(ring, PACKET0(0x720, 2));
+		radeon_ring_write(ring, src_offset);
+		radeon_ring_write(ring, dst_offset);
+		radeon_ring_write(ring, cur_size | (1 << 31) | (1 << 30));
 		src_offset += cur_size;
 		dst_offset += cur_size;
 	}
-	radeon_ring_write(rdev, PACKET0(RADEON_WAIT_UNTIL, 0));
-	radeon_ring_write(rdev, RADEON_WAIT_DMA_GUI_IDLE);
+	radeon_ring_write(ring, PACKET0(RADEON_WAIT_UNTIL, 0));
+	radeon_ring_write(ring, RADEON_WAIT_DMA_GUI_IDLE);
 	if (fence) {
 		r = radeon_fence_emit(rdev, fence);
 	}
-	radeon_ring_unlock_commit(rdev);
+	radeon_ring_unlock_commit(rdev, ring);
 	return r;
 }
 

+ 90 - 64
drivers/gpu/drm/radeon/r300.c

@@ -175,37 +175,40 @@ void rv370_pcie_gart_fini(struct radeon_device *rdev)
 void r300_fence_ring_emit(struct radeon_device *rdev,
 			  struct radeon_fence *fence)
 {
+	struct radeon_ring *ring = &rdev->ring[fence->ring];
+
 	/* Who ever call radeon_fence_emit should call ring_lock and ask
 	 * for enough space (today caller are ib schedule and buffer move) */
 	/* Write SC register so SC & US assert idle */
-	radeon_ring_write(rdev, PACKET0(R300_RE_SCISSORS_TL, 0));
-	radeon_ring_write(rdev, 0);
-	radeon_ring_write(rdev, PACKET0(R300_RE_SCISSORS_BR, 0));
-	radeon_ring_write(rdev, 0);
+	radeon_ring_write(ring, PACKET0(R300_RE_SCISSORS_TL, 0));
+	radeon_ring_write(ring, 0);
+	radeon_ring_write(ring, PACKET0(R300_RE_SCISSORS_BR, 0));
+	radeon_ring_write(ring, 0);
 	/* Flush 3D cache */
-	radeon_ring_write(rdev, PACKET0(R300_RB3D_DSTCACHE_CTLSTAT, 0));
-	radeon_ring_write(rdev, R300_RB3D_DC_FLUSH);
-	radeon_ring_write(rdev, PACKET0(R300_RB3D_ZCACHE_CTLSTAT, 0));
-	radeon_ring_write(rdev, R300_ZC_FLUSH);
+	radeon_ring_write(ring, PACKET0(R300_RB3D_DSTCACHE_CTLSTAT, 0));
+	radeon_ring_write(ring, R300_RB3D_DC_FLUSH);
+	radeon_ring_write(ring, PACKET0(R300_RB3D_ZCACHE_CTLSTAT, 0));
+	radeon_ring_write(ring, R300_ZC_FLUSH);
 	/* Wait until IDLE & CLEAN */
-	radeon_ring_write(rdev, PACKET0(RADEON_WAIT_UNTIL, 0));
-	radeon_ring_write(rdev, (RADEON_WAIT_3D_IDLECLEAN |
+	radeon_ring_write(ring, PACKET0(RADEON_WAIT_UNTIL, 0));
+	radeon_ring_write(ring, (RADEON_WAIT_3D_IDLECLEAN |
 				 RADEON_WAIT_2D_IDLECLEAN |
 				 RADEON_WAIT_DMA_GUI_IDLE));
-	radeon_ring_write(rdev, PACKET0(RADEON_HOST_PATH_CNTL, 0));
-	radeon_ring_write(rdev, rdev->config.r300.hdp_cntl |
+	radeon_ring_write(ring, PACKET0(RADEON_HOST_PATH_CNTL, 0));
+	radeon_ring_write(ring, rdev->config.r300.hdp_cntl |
 				RADEON_HDP_READ_BUFFER_INVALIDATE);
-	radeon_ring_write(rdev, PACKET0(RADEON_HOST_PATH_CNTL, 0));
-	radeon_ring_write(rdev, rdev->config.r300.hdp_cntl);
+	radeon_ring_write(ring, PACKET0(RADEON_HOST_PATH_CNTL, 0));
+	radeon_ring_write(ring, rdev->config.r300.hdp_cntl);
 	/* Emit fence sequence & fire IRQ */
-	radeon_ring_write(rdev, PACKET0(rdev->fence_drv.scratch_reg, 0));
-	radeon_ring_write(rdev, fence->seq);
-	radeon_ring_write(rdev, PACKET0(RADEON_GEN_INT_STATUS, 0));
-	radeon_ring_write(rdev, RADEON_SW_INT_FIRE);
+	radeon_ring_write(ring, PACKET0(rdev->fence_drv[fence->ring].scratch_reg, 0));
+	radeon_ring_write(ring, fence->seq);
+	radeon_ring_write(ring, PACKET0(RADEON_GEN_INT_STATUS, 0));
+	radeon_ring_write(ring, RADEON_SW_INT_FIRE);
 }
 
 void r300_ring_start(struct radeon_device *rdev)
 {
+	struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
 	unsigned gb_tile_config;
 	int r;
 
@@ -227,44 +230,44 @@ void r300_ring_start(struct radeon_device *rdev)
 		break;
 	}
 
-	r = radeon_ring_lock(rdev, 64);
+	r = radeon_ring_lock(rdev, ring, 64);
 	if (r) {
 		return;
 	}
-	radeon_ring_write(rdev, PACKET0(RADEON_ISYNC_CNTL, 0));
-	radeon_ring_write(rdev,
+	radeon_ring_write(ring, PACKET0(RADEON_ISYNC_CNTL, 0));
+	radeon_ring_write(ring,
 			  RADEON_ISYNC_ANY2D_IDLE3D |
 			  RADEON_ISYNC_ANY3D_IDLE2D |
 			  RADEON_ISYNC_WAIT_IDLEGUI |
 			  RADEON_ISYNC_CPSCRATCH_IDLEGUI);
-	radeon_ring_write(rdev, PACKET0(R300_GB_TILE_CONFIG, 0));
-	radeon_ring_write(rdev, gb_tile_config);
-	radeon_ring_write(rdev, PACKET0(RADEON_WAIT_UNTIL, 0));
-	radeon_ring_write(rdev,
+	radeon_ring_write(ring, PACKET0(R300_GB_TILE_CONFIG, 0));
+	radeon_ring_write(ring, gb_tile_config);
+	radeon_ring_write(ring, PACKET0(RADEON_WAIT_UNTIL, 0));
+	radeon_ring_write(ring,
 			  RADEON_WAIT_2D_IDLECLEAN |
 			  RADEON_WAIT_3D_IDLECLEAN);
-	radeon_ring_write(rdev, PACKET0(R300_DST_PIPE_CONFIG, 0));
-	radeon_ring_write(rdev, R300_PIPE_AUTO_CONFIG);
-	radeon_ring_write(rdev, PACKET0(R300_GB_SELECT, 0));
-	radeon_ring_write(rdev, 0);
-	radeon_ring_write(rdev, PACKET0(R300_GB_ENABLE, 0));
-	radeon_ring_write(rdev, 0);
-	radeon_ring_write(rdev, PACKET0(R300_RB3D_DSTCACHE_CTLSTAT, 0));
-	radeon_ring_write(rdev, R300_RB3D_DC_FLUSH | R300_RB3D_DC_FREE);
-	radeon_ring_write(rdev, PACKET0(R300_RB3D_ZCACHE_CTLSTAT, 0));
-	radeon_ring_write(rdev, R300_ZC_FLUSH | R300_ZC_FREE);
-	radeon_ring_write(rdev, PACKET0(RADEON_WAIT_UNTIL, 0));
-	radeon_ring_write(rdev,
+	radeon_ring_write(ring, PACKET0(R300_DST_PIPE_CONFIG, 0));
+	radeon_ring_write(ring, R300_PIPE_AUTO_CONFIG);
+	radeon_ring_write(ring, PACKET0(R300_GB_SELECT, 0));
+	radeon_ring_write(ring, 0);
+	radeon_ring_write(ring, PACKET0(R300_GB_ENABLE, 0));
+	radeon_ring_write(ring, 0);
+	radeon_ring_write(ring, PACKET0(R300_RB3D_DSTCACHE_CTLSTAT, 0));
+	radeon_ring_write(ring, R300_RB3D_DC_FLUSH | R300_RB3D_DC_FREE);
+	radeon_ring_write(ring, PACKET0(R300_RB3D_ZCACHE_CTLSTAT, 0));
+	radeon_ring_write(ring, R300_ZC_FLUSH | R300_ZC_FREE);
+	radeon_ring_write(ring, PACKET0(RADEON_WAIT_UNTIL, 0));
+	radeon_ring_write(ring,
 			  RADEON_WAIT_2D_IDLECLEAN |
 			  RADEON_WAIT_3D_IDLECLEAN);
-	radeon_ring_write(rdev, PACKET0(R300_GB_AA_CONFIG, 0));
-	radeon_ring_write(rdev, 0);
-	radeon_ring_write(rdev, PACKET0(R300_RB3D_DSTCACHE_CTLSTAT, 0));
-	radeon_ring_write(rdev, R300_RB3D_DC_FLUSH | R300_RB3D_DC_FREE);
-	radeon_ring_write(rdev, PACKET0(R300_RB3D_ZCACHE_CTLSTAT, 0));
-	radeon_ring_write(rdev, R300_ZC_FLUSH | R300_ZC_FREE);
-	radeon_ring_write(rdev, PACKET0(R300_GB_MSPOS0, 0));
-	radeon_ring_write(rdev,
+	radeon_ring_write(ring, PACKET0(R300_GB_AA_CONFIG, 0));
+	radeon_ring_write(ring, 0);
+	radeon_ring_write(ring, PACKET0(R300_RB3D_DSTCACHE_CTLSTAT, 0));
+	radeon_ring_write(ring, R300_RB3D_DC_FLUSH | R300_RB3D_DC_FREE);
+	radeon_ring_write(ring, PACKET0(R300_RB3D_ZCACHE_CTLSTAT, 0));
+	radeon_ring_write(ring, R300_ZC_FLUSH | R300_ZC_FREE);
+	radeon_ring_write(ring, PACKET0(R300_GB_MSPOS0, 0));
+	radeon_ring_write(ring,
 			  ((6 << R300_MS_X0_SHIFT) |
 			   (6 << R300_MS_Y0_SHIFT) |
 			   (6 << R300_MS_X1_SHIFT) |
@@ -273,8 +276,8 @@ void r300_ring_start(struct radeon_device *rdev)
 			   (6 << R300_MS_Y2_SHIFT) |
 			   (6 << R300_MSBD0_Y_SHIFT) |
 			   (6 << R300_MSBD0_X_SHIFT)));
-	radeon_ring_write(rdev, PACKET0(R300_GB_MSPOS1, 0));
-	radeon_ring_write(rdev,
+	radeon_ring_write(ring, PACKET0(R300_GB_MSPOS1, 0));
+	radeon_ring_write(ring,
 			  ((6 << R300_MS_X3_SHIFT) |
 			   (6 << R300_MS_Y3_SHIFT) |
 			   (6 << R300_MS_X4_SHIFT) |
@@ -282,16 +285,16 @@ void r300_ring_start(struct radeon_device *rdev)
 			   (6 << R300_MS_X5_SHIFT) |
 			   (6 << R300_MS_Y5_SHIFT) |
 			   (6 << R300_MSBD1_SHIFT)));
-	radeon_ring_write(rdev, PACKET0(R300_GA_ENHANCE, 0));
-	radeon_ring_write(rdev, R300_GA_DEADLOCK_CNTL | R300_GA_FASTSYNC_CNTL);
-	radeon_ring_write(rdev, PACKET0(R300_GA_POLY_MODE, 0));
-	radeon_ring_write(rdev,
+	radeon_ring_write(ring, PACKET0(R300_GA_ENHANCE, 0));
+	radeon_ring_write(ring, R300_GA_DEADLOCK_CNTL | R300_GA_FASTSYNC_CNTL);
+	radeon_ring_write(ring, PACKET0(R300_GA_POLY_MODE, 0));
+	radeon_ring_write(ring,
 			  R300_FRONT_PTYPE_TRIANGE | R300_BACK_PTYPE_TRIANGE);
-	radeon_ring_write(rdev, PACKET0(R300_GA_ROUND_MODE, 0));
-	radeon_ring_write(rdev,
+	radeon_ring_write(ring, PACKET0(R300_GA_ROUND_MODE, 0));
+	radeon_ring_write(ring,
 			  R300_GEOMETRY_ROUND_NEAREST |
 			  R300_COLOR_ROUND_NEAREST);
-	radeon_ring_unlock_commit(rdev);
+	radeon_ring_unlock_commit(rdev, ring);
 }
 
 void r300_errata(struct radeon_device *rdev)
@@ -375,26 +378,26 @@ void r300_gpu_init(struct radeon_device *rdev)
 		 rdev->num_gb_pipes, rdev->num_z_pipes);
 }
 
-bool r300_gpu_is_lockup(struct radeon_device *rdev)
+bool r300_gpu_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
 {
 	u32 rbbm_status;
 	int r;
 
 	rbbm_status = RREG32(R_000E40_RBBM_STATUS);
 	if (!G_000E40_GUI_ACTIVE(rbbm_status)) {
-		r100_gpu_lockup_update(&rdev->config.r300.lockup, &rdev->cp);
+		r100_gpu_lockup_update(&rdev->config.r300.lockup, ring);
 		return false;
 	}
 	/* force CP activities */
-	r = radeon_ring_lock(rdev, 2);
+	r = radeon_ring_lock(rdev, ring, 2);
 	if (!r) {
 		/* PACKET2 NOP */
-		radeon_ring_write(rdev, 0x80000000);
-		radeon_ring_write(rdev, 0x80000000);
-		radeon_ring_unlock_commit(rdev);
+		radeon_ring_write(ring, 0x80000000);
+		radeon_ring_write(ring, 0x80000000);
+		radeon_ring_unlock_commit(rdev, ring);
 	}
-	rdev->cp.rptr = RREG32(RADEON_CP_RB_RPTR);
-	return r100_gpu_cp_is_lockup(rdev, &rdev->config.r300.lockup, &rdev->cp);
+	ring->rptr = RREG32(RADEON_CP_RB_RPTR);
+	return r100_gpu_cp_is_lockup(rdev, &rdev->config.r300.lockup, ring);
 }
 
 int r300_asic_reset(struct radeon_device *rdev)
@@ -1396,6 +1399,12 @@ static int r300_startup(struct radeon_device *rdev)
 	if (r)
 		return r;
 
+	r = radeon_fence_driver_start_ring(rdev, RADEON_RING_TYPE_GFX_INDEX);
+	if (r) {
+		dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
+		return r;
+	}
+
 	/* Enable IRQ */
 	r100_irq_set(rdev);
 	rdev->config.r300.hdp_cntl = RREG32(RADEON_HOST_PATH_CNTL);
@@ -1405,11 +1414,18 @@ static int r300_startup(struct radeon_device *rdev)
 		dev_err(rdev->dev, "failed initializing CP (%d).\n", r);
 		return r;
 	}
-	r = r100_ib_init(rdev);
+
+	r = radeon_ib_pool_start(rdev);
+	if (r)
+		return r;
+
+	r = r100_ib_test(rdev);
 	if (r) {
-		dev_err(rdev->dev, "failed initializing IB (%d).\n", r);
+		dev_err(rdev->dev, "failed testing IB (%d).\n", r);
+		rdev->accel_working = false;
 		return r;
 	}
+
 	return 0;
 }
 
@@ -1434,11 +1450,14 @@ int r300_resume(struct radeon_device *rdev)
 	r300_clock_startup(rdev);
 	/* Initialize surface registers */
 	radeon_surface_init(rdev);
+
+	rdev->accel_working = true;
 	return r300_startup(rdev);
 }
 
 int r300_suspend(struct radeon_device *rdev)
 {
+	radeon_ib_pool_suspend(rdev);
 	r100_cp_disable(rdev);
 	radeon_wb_disable(rdev);
 	r100_irq_disable(rdev);
@@ -1539,7 +1558,14 @@ int r300_init(struct radeon_device *rdev)
 			return r;
 	}
 	r300_set_reg_safe(rdev);
+
+	r = radeon_ib_pool_init(rdev);
 	rdev->accel_working = true;
+	if (r) {
+		dev_err(rdev->dev, "IB initialization failed (%d).\n", r);
+		rdev->accel_working = false;
+	}
+
 	r = r300_startup(rdev);
 	if (r) {
 		/* Somethings want wront with the accel init stop accel */

+ 38 - 11
drivers/gpu/drm/radeon/r420.c

@@ -199,6 +199,8 @@ static void r420_clock_resume(struct radeon_device *rdev)
 
 static void r420_cp_errata_init(struct radeon_device *rdev)
 {
+	struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
+
 	/* RV410 and R420 can lock up if CP DMA to host memory happens
 	 * while the 2D engine is busy.
 	 *
@@ -206,22 +208,24 @@ static void r420_cp_errata_init(struct radeon_device *rdev)
 	 * of the CP init, apparently.
 	 */
 	radeon_scratch_get(rdev, &rdev->config.r300.resync_scratch);
-	radeon_ring_lock(rdev, 8);
-	radeon_ring_write(rdev, PACKET0(R300_CP_RESYNC_ADDR, 1));
-	radeon_ring_write(rdev, rdev->config.r300.resync_scratch);
-	radeon_ring_write(rdev, 0xDEADBEEF);
-	radeon_ring_unlock_commit(rdev);
+	radeon_ring_lock(rdev, ring, 8);
+	radeon_ring_write(ring, PACKET0(R300_CP_RESYNC_ADDR, 1));
+	radeon_ring_write(ring, rdev->config.r300.resync_scratch);
+	radeon_ring_write(ring, 0xDEADBEEF);
+	radeon_ring_unlock_commit(rdev, ring);
 }
 
 static void r420_cp_errata_fini(struct radeon_device *rdev)
 {
+	struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
+
 	/* Catch the RESYNC we dispatched all the way back,
 	 * at the very beginning of the CP init.
 	 */
-	radeon_ring_lock(rdev, 8);
-	radeon_ring_write(rdev, PACKET0(R300_RB3D_DSTCACHE_CTLSTAT, 0));
-	radeon_ring_write(rdev, R300_RB3D_DC_FINISH);
-	radeon_ring_unlock_commit(rdev);
+	radeon_ring_lock(rdev, ring, 8);
+	radeon_ring_write(ring, PACKET0(R300_RB3D_DSTCACHE_CTLSTAT, 0));
+	radeon_ring_write(ring, R300_RB3D_DC_FINISH);
+	radeon_ring_unlock_commit(rdev, ring);
 	radeon_scratch_free(rdev, rdev->config.r300.resync_scratch);
 }
 
@@ -254,6 +258,12 @@ static int r420_startup(struct radeon_device *rdev)
 	if (r)
 		return r;
 
+	r = radeon_fence_driver_start_ring(rdev, RADEON_RING_TYPE_GFX_INDEX);
+	if (r) {
+		dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
+		return r;
+	}
+
 	/* Enable IRQ */
 	r100_irq_set(rdev);
 	rdev->config.r300.hdp_cntl = RREG32(RADEON_HOST_PATH_CNTL);
@@ -264,11 +274,18 @@ static int r420_startup(struct radeon_device *rdev)
 		return r;
 	}
 	r420_cp_errata_init(rdev);
-	r = r100_ib_init(rdev);
+
+	r = radeon_ib_pool_start(rdev);
+	if (r)
+		return r;
+
+	r = r100_ib_test(rdev);
 	if (r) {
-		dev_err(rdev->dev, "failed initializing IB (%d).\n", r);
+		dev_err(rdev->dev, "failed testing IB (%d).\n", r);
+		rdev->accel_working = false;
 		return r;
 	}
+
 	return 0;
 }
 
@@ -297,11 +314,14 @@ int r420_resume(struct radeon_device *rdev)
 	r420_clock_resume(rdev);
 	/* Initialize surface registers */
 	radeon_surface_init(rdev);
+
+	rdev->accel_working = true;
 	return r420_startup(rdev);
 }
 
 int r420_suspend(struct radeon_device *rdev)
 {
+	radeon_ib_pool_suspend(rdev);
 	r420_cp_errata_fini(rdev);
 	r100_cp_disable(rdev);
 	radeon_wb_disable(rdev);
@@ -414,7 +434,14 @@ int r420_init(struct radeon_device *rdev)
 			return r;
 	}
 	r420_set_reg_safe(rdev);
+
+	r = radeon_ib_pool_init(rdev);
 	rdev->accel_working = true;
+	if (r) {
+		dev_err(rdev->dev, "IB initialization failed (%d).\n", r);
+		rdev->accel_working = false;
+	}
+
 	r = r420_startup(rdev);
 	if (r) {
 		/* Somethings want wront with the accel init stop accel */

+ 23 - 2
drivers/gpu/drm/radeon/r520.c

@@ -187,6 +187,12 @@ static int r520_startup(struct radeon_device *rdev)
 	if (r)
 		return r;
 
+	r = radeon_fence_driver_start_ring(rdev, RADEON_RING_TYPE_GFX_INDEX);
+	if (r) {
+		dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
+		return r;
+	}
+
 	/* Enable IRQ */
 	rs600_irq_set(rdev);
 	rdev->config.r300.hdp_cntl = RREG32(RADEON_HOST_PATH_CNTL);
@@ -196,9 +202,15 @@ static int r520_startup(struct radeon_device *rdev)
 		dev_err(rdev->dev, "failed initializing CP (%d).\n", r);
 		return r;
 	}
-	r = r100_ib_init(rdev);
+
+	r = radeon_ib_pool_start(rdev);
+	if (r)
+		return r;
+
+	r = r100_ib_test(rdev);
 	if (r) {
-		dev_err(rdev->dev, "failed initializing IB (%d).\n", r);
+		dev_err(rdev->dev, "failed testing IB (%d).\n", r);
+		rdev->accel_working = false;
 		return r;
 	}
 	return 0;
@@ -223,6 +235,8 @@ int r520_resume(struct radeon_device *rdev)
 	rv515_clock_startup(rdev);
 	/* Initialize surface registers */
 	radeon_surface_init(rdev);
+
+	rdev->accel_working = true;
 	return r520_startup(rdev);
 }
 
@@ -292,7 +306,14 @@ int r520_init(struct radeon_device *rdev)
 	if (r)
 		return r;
 	rv515_set_safe_registers(rdev);
+
+	r = radeon_ib_pool_init(rdev);
 	rdev->accel_working = true;
+	if (r) {
+		dev_err(rdev->dev, "IB initialization failed (%d).\n", r);
+		rdev->accel_working = false;
+	}
+
 	r = r520_startup(rdev);
 	if (r) {
 		/* Somethings want wront with the accel init stop accel */

+ 136 - 137
drivers/gpu/drm/radeon/r600.c

@@ -1344,7 +1344,7 @@ int r600_gpu_soft_reset(struct radeon_device *rdev)
 	return 0;
 }
 
-bool r600_gpu_is_lockup(struct radeon_device *rdev)
+bool r600_gpu_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
 {
 	u32 srbm_status;
 	u32 grbm_status;
@@ -1361,19 +1361,19 @@ bool r600_gpu_is_lockup(struct radeon_device *rdev)
 	grbm_status = RREG32(R_008010_GRBM_STATUS);
 	grbm_status2 = RREG32(R_008014_GRBM_STATUS2);
 	if (!G_008010_GUI_ACTIVE(grbm_status)) {
-		r100_gpu_lockup_update(lockup, &rdev->cp);
+		r100_gpu_lockup_update(lockup, ring);
 		return false;
 	}
 	/* force CP activities */
-	r = radeon_ring_lock(rdev, 2);
+	r = radeon_ring_lock(rdev, ring, 2);
 	if (!r) {
 		/* PACKET2 NOP */
-		radeon_ring_write(rdev, 0x80000000);
-		radeon_ring_write(rdev, 0x80000000);
-		radeon_ring_unlock_commit(rdev);
+		radeon_ring_write(ring, 0x80000000);
+		radeon_ring_write(ring, 0x80000000);
+		radeon_ring_unlock_commit(rdev, ring);
 	}
-	rdev->cp.rptr = RREG32(R600_CP_RB_RPTR);
-	return r100_gpu_cp_is_lockup(rdev, lockup, &rdev->cp);
+	ring->rptr = RREG32(ring->rptr_reg);
+	return r100_gpu_cp_is_lockup(rdev, lockup, ring);
 }
 
 int r600_asic_reset(struct radeon_device *rdev)
@@ -2144,27 +2144,28 @@ static int r600_cp_load_microcode(struct radeon_device *rdev)
 
 int r600_cp_start(struct radeon_device *rdev)
 {
+	struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
 	int r;
 	uint32_t cp_me;
 
-	r = radeon_ring_lock(rdev, 7);
+	r = radeon_ring_lock(rdev, ring, 7);
 	if (r) {
 		DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r);
 		return r;
 	}
-	radeon_ring_write(rdev, PACKET3(PACKET3_ME_INITIALIZE, 5));
-	radeon_ring_write(rdev, 0x1);
+	radeon_ring_write(ring, PACKET3(PACKET3_ME_INITIALIZE, 5));
+	radeon_ring_write(ring, 0x1);
 	if (rdev->family >= CHIP_RV770) {
-		radeon_ring_write(rdev, 0x0);
-		radeon_ring_write(rdev, rdev->config.rv770.max_hw_contexts - 1);
+		radeon_ring_write(ring, 0x0);
+		radeon_ring_write(ring, rdev->config.rv770.max_hw_contexts - 1);
 	} else {
-		radeon_ring_write(rdev, 0x3);
-		radeon_ring_write(rdev, rdev->config.r600.max_hw_contexts - 1);
+		radeon_ring_write(ring, 0x3);
+		radeon_ring_write(ring, rdev->config.r600.max_hw_contexts - 1);
 	}
-	radeon_ring_write(rdev, PACKET3_ME_INITIALIZE_DEVICE_ID(1));
-	radeon_ring_write(rdev, 0);
-	radeon_ring_write(rdev, 0);
-	radeon_ring_unlock_commit(rdev);
+	radeon_ring_write(ring, PACKET3_ME_INITIALIZE_DEVICE_ID(1));
+	radeon_ring_write(ring, 0);
+	radeon_ring_write(ring, 0);
+	radeon_ring_unlock_commit(rdev, ring);
 
 	cp_me = 0xff;
 	WREG32(R_0086D8_CP_ME_CNTL, cp_me);
@@ -2173,6 +2174,7 @@ int r600_cp_start(struct radeon_device *rdev)
 
 int r600_cp_resume(struct radeon_device *rdev)
 {
+	struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
 	u32 tmp;
 	u32 rb_bufsz;
 	int r;
@@ -2184,13 +2186,13 @@ int r600_cp_resume(struct radeon_device *rdev)
 	WREG32(GRBM_SOFT_RESET, 0);
 
 	/* Set ring buffer size */
-	rb_bufsz = drm_order(rdev->cp.ring_size / 8);
+	rb_bufsz = drm_order(ring->ring_size / 8);
 	tmp = (drm_order(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
 #ifdef __BIG_ENDIAN
 	tmp |= BUF_SWAP_32BIT;
 #endif
 	WREG32(CP_RB_CNTL, tmp);
-	WREG32(CP_SEM_WAIT_TIMER, 0x4);
+	WREG32(CP_SEM_WAIT_TIMER, 0x0);
 
 	/* Set the write pointer delay */
 	WREG32(CP_RB_WPTR_DELAY, 0);
@@ -2198,8 +2200,8 @@ int r600_cp_resume(struct radeon_device *rdev)
 	/* Initialize the ring buffer's read and write pointers */
 	WREG32(CP_RB_CNTL, tmp | RB_RPTR_WR_ENA);
 	WREG32(CP_RB_RPTR_WR, 0);
-	rdev->cp.wptr = 0;
-	WREG32(CP_RB_WPTR, rdev->cp.wptr);
+	ring->wptr = 0;
+	WREG32(CP_RB_WPTR, ring->wptr);
 
 	/* set the wb address whether it's enabled or not */
 	WREG32(CP_RB_RPTR_ADDR,
@@ -2217,42 +2219,36 @@ int r600_cp_resume(struct radeon_device *rdev)
 	mdelay(1);
 	WREG32(CP_RB_CNTL, tmp);
 
-	WREG32(CP_RB_BASE, rdev->cp.gpu_addr >> 8);
+	WREG32(CP_RB_BASE, ring->gpu_addr >> 8);
 	WREG32(CP_DEBUG, (1 << 27) | (1 << 28));
 
-	rdev->cp.rptr = RREG32(CP_RB_RPTR);
+	ring->rptr = RREG32(CP_RB_RPTR);
 
 	r600_cp_start(rdev);
-	rdev->cp.ready = true;
-	r = radeon_ring_test(rdev);
+	ring->ready = true;
+	r = radeon_ring_test(rdev, ring);
 	if (r) {
-		rdev->cp.ready = false;
+		ring->ready = false;
 		return r;
 	}
 	return 0;
 }
 
-void r600_cp_commit(struct radeon_device *rdev)
-{
-	WREG32(CP_RB_WPTR, rdev->cp.wptr);
-	(void)RREG32(CP_RB_WPTR);
-}
-
-void r600_ring_init(struct radeon_device *rdev, unsigned ring_size)
+void r600_ring_init(struct radeon_device *rdev, struct radeon_ring *ring, unsigned ring_size)
 {
 	u32 rb_bufsz;
 
 	/* Align ring size */
 	rb_bufsz = drm_order(ring_size / 8);
 	ring_size = (1 << (rb_bufsz + 1)) * 4;
-	rdev->cp.ring_size = ring_size;
-	rdev->cp.align_mask = 16 - 1;
+	ring->ring_size = ring_size;
+	ring->align_mask = 16 - 1;
 }
 
 void r600_cp_fini(struct radeon_device *rdev)
 {
 	r600_cp_stop(rdev);
-	radeon_ring_fini(rdev);
+	radeon_ring_fini(rdev, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
 }
 
 
@@ -2271,11 +2267,11 @@ void r600_scratch_init(struct radeon_device *rdev)
 	}
 }
 
-int r600_ring_test(struct radeon_device *rdev)
+int r600_ring_test(struct radeon_device *rdev, struct radeon_ring *ring)
 {
 	uint32_t scratch;
 	uint32_t tmp = 0;
-	unsigned i;
+	unsigned i, ridx = radeon_ring_index(rdev, ring);
 	int r;
 
 	r = radeon_scratch_get(rdev, &scratch);
@@ -2284,16 +2280,16 @@ int r600_ring_test(struct radeon_device *rdev)
 		return r;
 	}
 	WREG32(scratch, 0xCAFEDEAD);
-	r = radeon_ring_lock(rdev, 3);
+	r = radeon_ring_lock(rdev, ring, 3);
 	if (r) {
-		DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r);
+		DRM_ERROR("radeon: cp failed to lock ring %d (%d).\n", ridx, r);
 		radeon_scratch_free(rdev, scratch);
 		return r;
 	}
-	radeon_ring_write(rdev, PACKET3(PACKET3_SET_CONFIG_REG, 1));
-	radeon_ring_write(rdev, ((scratch - PACKET3_SET_CONFIG_REG_OFFSET) >> 2));
-	radeon_ring_write(rdev, 0xDEADBEEF);
-	radeon_ring_unlock_commit(rdev);
+	radeon_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
+	radeon_ring_write(ring, ((scratch - PACKET3_SET_CONFIG_REG_OFFSET) >> 2));
+	radeon_ring_write(ring, 0xDEADBEEF);
+	radeon_ring_unlock_commit(rdev, ring);
 	for (i = 0; i < rdev->usec_timeout; i++) {
 		tmp = RREG32(scratch);
 		if (tmp == 0xDEADBEEF)
@@ -2301,10 +2297,10 @@ int r600_ring_test(struct radeon_device *rdev)
 		DRM_UDELAY(1);
 	}
 	if (i < rdev->usec_timeout) {
-		DRM_INFO("ring test succeeded in %d usecs\n", i);
+		DRM_INFO("ring test on %d succeeded in %d usecs\n", ridx, i);
 	} else {
-		DRM_ERROR("radeon: ring test failed (scratch(0x%04X)=0x%08X)\n",
-			  scratch, tmp);
+		DRM_ERROR("radeon: ring %d test failed (scratch(0x%04X)=0x%08X)\n",
+			  ridx, scratch, tmp);
 		r = -EINVAL;
 	}
 	radeon_scratch_free(rdev, scratch);
@@ -2314,49 +2310,63 @@ int r600_ring_test(struct radeon_device *rdev)
 void r600_fence_ring_emit(struct radeon_device *rdev,
 			  struct radeon_fence *fence)
 {
+	struct radeon_ring *ring = &rdev->ring[fence->ring];
+
 	if (rdev->wb.use_event) {
-		u64 addr = rdev->wb.gpu_addr + R600_WB_EVENT_OFFSET +
-			(u64)(rdev->fence_drv.scratch_reg - rdev->scratch.reg_base);
+		u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
 		/* flush read cache over gart */
-		radeon_ring_write(rdev, PACKET3(PACKET3_SURFACE_SYNC, 3));
-		radeon_ring_write(rdev, PACKET3_TC_ACTION_ENA |
+		radeon_ring_write(ring, PACKET3(PACKET3_SURFACE_SYNC, 3));
+		radeon_ring_write(ring, PACKET3_TC_ACTION_ENA |
 					PACKET3_VC_ACTION_ENA |
 					PACKET3_SH_ACTION_ENA);
-		radeon_ring_write(rdev, 0xFFFFFFFF);
-		radeon_ring_write(rdev, 0);
-		radeon_ring_write(rdev, 10); /* poll interval */
+		radeon_ring_write(ring, 0xFFFFFFFF);
+		radeon_ring_write(ring, 0);
+		radeon_ring_write(ring, 10); /* poll interval */
 		/* EVENT_WRITE_EOP - flush caches, send int */
-		radeon_ring_write(rdev, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
-		radeon_ring_write(rdev, EVENT_TYPE(CACHE_FLUSH_AND_INV_EVENT_TS) | EVENT_INDEX(5));
-		radeon_ring_write(rdev, addr & 0xffffffff);
-		radeon_ring_write(rdev, (upper_32_bits(addr) & 0xff) | DATA_SEL(1) | INT_SEL(2));
-		radeon_ring_write(rdev, fence->seq);
-		radeon_ring_write(rdev, 0);
+		radeon_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
+		radeon_ring_write(ring, EVENT_TYPE(CACHE_FLUSH_AND_INV_EVENT_TS) | EVENT_INDEX(5));
+		radeon_ring_write(ring, addr & 0xffffffff);
+		radeon_ring_write(ring, (upper_32_bits(addr) & 0xff) | DATA_SEL(1) | INT_SEL(2));
+		radeon_ring_write(ring, fence->seq);
+		radeon_ring_write(ring, 0);
 	} else {
 		/* flush read cache over gart */
-		radeon_ring_write(rdev, PACKET3(PACKET3_SURFACE_SYNC, 3));
-		radeon_ring_write(rdev, PACKET3_TC_ACTION_ENA |
+		radeon_ring_write(ring, PACKET3(PACKET3_SURFACE_SYNC, 3));
+		radeon_ring_write(ring, PACKET3_TC_ACTION_ENA |
 					PACKET3_VC_ACTION_ENA |
 					PACKET3_SH_ACTION_ENA);
-		radeon_ring_write(rdev, 0xFFFFFFFF);
-		radeon_ring_write(rdev, 0);
-		radeon_ring_write(rdev, 10); /* poll interval */
-		radeon_ring_write(rdev, PACKET3(PACKET3_EVENT_WRITE, 0));
-		radeon_ring_write(rdev, EVENT_TYPE(CACHE_FLUSH_AND_INV_EVENT) | EVENT_INDEX(0));
+		radeon_ring_write(ring, 0xFFFFFFFF);
+		radeon_ring_write(ring, 0);
+		radeon_ring_write(ring, 10); /* poll interval */
+		radeon_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE, 0));
+		radeon_ring_write(ring, EVENT_TYPE(CACHE_FLUSH_AND_INV_EVENT) | EVENT_INDEX(0));
 		/* wait for 3D idle clean */
-		radeon_ring_write(rdev, PACKET3(PACKET3_SET_CONFIG_REG, 1));
-		radeon_ring_write(rdev, (WAIT_UNTIL - PACKET3_SET_CONFIG_REG_OFFSET) >> 2);
-		radeon_ring_write(rdev, WAIT_3D_IDLE_bit | WAIT_3D_IDLECLEAN_bit);
+		radeon_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
+		radeon_ring_write(ring, (WAIT_UNTIL - PACKET3_SET_CONFIG_REG_OFFSET) >> 2);
+		radeon_ring_write(ring, WAIT_3D_IDLE_bit | WAIT_3D_IDLECLEAN_bit);
 		/* Emit fence sequence & fire IRQ */
-		radeon_ring_write(rdev, PACKET3(PACKET3_SET_CONFIG_REG, 1));
-		radeon_ring_write(rdev, ((rdev->fence_drv.scratch_reg - PACKET3_SET_CONFIG_REG_OFFSET) >> 2));
-		radeon_ring_write(rdev, fence->seq);
+		radeon_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
+		radeon_ring_write(ring, ((rdev->fence_drv[fence->ring].scratch_reg - PACKET3_SET_CONFIG_REG_OFFSET) >> 2));
+		radeon_ring_write(ring, fence->seq);
 		/* CP_INTERRUPT packet 3 no longer exists, use packet 0 */
-		radeon_ring_write(rdev, PACKET0(CP_INT_STATUS, 0));
-		radeon_ring_write(rdev, RB_INT_STAT);
+		radeon_ring_write(ring, PACKET0(CP_INT_STATUS, 0));
+		radeon_ring_write(ring, RB_INT_STAT);
 	}
 }
 
+void r600_semaphore_ring_emit(struct radeon_device *rdev,
+			      struct radeon_ring *ring,
+			      struct radeon_semaphore *semaphore,
+			      bool emit_wait)
+{
+	uint64_t addr = semaphore->gpu_addr;
+	unsigned sel = emit_wait ? PACKET3_SEM_SEL_WAIT : PACKET3_SEM_SEL_SIGNAL;
+
+	radeon_ring_write(ring, PACKET3(PACKET3_MEM_SEMAPHORE, 1));
+	radeon_ring_write(ring, addr & 0xffffffff);
+	radeon_ring_write(ring, (upper_32_bits(addr) & 0xff) | sel);
+}
+
 int r600_copy_blit(struct radeon_device *rdev,
 		   uint64_t src_offset,
 		   uint64_t dst_offset,
@@ -2409,6 +2419,7 @@ void r600_clear_surface_reg(struct radeon_device *rdev, int reg)
 
 int r600_startup(struct radeon_device *rdev)
 {
+	struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
 	int r;
 
 	/* enable pcie gen2 link */
@@ -2447,6 +2458,12 @@ int r600_startup(struct radeon_device *rdev)
 	if (r)
 		return r;
 
+	r = radeon_fence_driver_start_ring(rdev, RADEON_RING_TYPE_GFX_INDEX);
+	if (r) {
+		dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
+		return r;
+	}
+
 	/* Enable IRQ */
 	r = r600_irq_init(rdev);
 	if (r) {
@@ -2456,7 +2473,10 @@ int r600_startup(struct radeon_device *rdev)
 	}
 	r600_irq_set(rdev);
 
-	r = radeon_ring_init(rdev, rdev->cp.ring_size);
+	r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP_RPTR_OFFSET,
+			     R600_CP_RB_RPTR, R600_CP_RB_WPTR,
+			     0, 0xfffff, RADEON_CP_PACKET2);
+
 	if (r)
 		return r;
 	r = r600_cp_load_microcode(rdev);
@@ -2466,6 +2486,17 @@ int r600_startup(struct radeon_device *rdev)
 	if (r)
 		return r;
 
+	r = radeon_ib_pool_start(rdev);
+	if (r)
+		return r;
+
+	r = r600_ib_test(rdev, RADEON_RING_TYPE_GFX_INDEX);
+	if (r) {
+		DRM_ERROR("radeon: failed testing IB (%d).\n", r);
+		rdev->accel_working = false;
+		return r;
+	}
+
 	return 0;
 }
 
@@ -2494,18 +2525,13 @@ int r600_resume(struct radeon_device *rdev)
 	/* post card */
 	atom_asic_init(rdev->mode_info.atom_context);
 
+	rdev->accel_working = true;
 	r = r600_startup(rdev);
 	if (r) {
 		DRM_ERROR("r600 startup failed on resume\n");
 		return r;
 	}
 
-	r = r600_ib_test(rdev);
-	if (r) {
-		DRM_ERROR("radeon: failed testing IB (%d).\n", r);
-		return r;
-	}
-
 	r = r600_audio_init(rdev);
 	if (r) {
 		DRM_ERROR("radeon: audio resume failed\n");
@@ -2518,13 +2544,14 @@ int r600_resume(struct radeon_device *rdev)
 int r600_suspend(struct radeon_device *rdev)
 {
 	r600_audio_fini(rdev);
+	radeon_ib_pool_suspend(rdev);
+	r600_blit_suspend(rdev);
 	/* FIXME: we should wait for ring to be empty */
 	r600_cp_stop(rdev);
-	rdev->cp.ready = false;
+	rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
 	r600_irq_suspend(rdev);
 	radeon_wb_disable(rdev);
 	r600_pcie_gart_disable(rdev);
-	r600_blit_suspend(rdev);
 
 	return 0;
 }
@@ -2595,8 +2622,8 @@ int r600_init(struct radeon_device *rdev)
 	if (r)
 		return r;
 
-	rdev->cp.ring_obj = NULL;
-	r600_ring_init(rdev, 1024 * 1024);
+	rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ring_obj = NULL;
+	r600_ring_init(rdev, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX], 1024 * 1024);
 
 	rdev->ih.ring_obj = NULL;
 	r600_ih_ring_init(rdev, 64 * 1024);
@@ -2605,30 +2632,24 @@ int r600_init(struct radeon_device *rdev)
 	if (r)
 		return r;
 
+	r = radeon_ib_pool_init(rdev);
 	rdev->accel_working = true;
+	if (r) {
+		dev_err(rdev->dev, "IB initialization failed (%d).\n", r);
+		rdev->accel_working = false;
+	}
+
 	r = r600_startup(rdev);
 	if (r) {
 		dev_err(rdev->dev, "disabling GPU acceleration\n");
 		r600_cp_fini(rdev);
 		r600_irq_fini(rdev);
 		radeon_wb_fini(rdev);
+		r100_ib_fini(rdev);
 		radeon_irq_kms_fini(rdev);
 		r600_pcie_gart_fini(rdev);
 		rdev->accel_working = false;
 	}
-	if (rdev->accel_working) {
-		r = radeon_ib_pool_init(rdev);
-		if (r) {
-			dev_err(rdev->dev, "IB initialization failed (%d).\n", r);
-			rdev->accel_working = false;
-		} else {
-			r = r600_ib_test(rdev);
-			if (r) {
-				dev_err(rdev->dev, "IB test failed (%d).\n", r);
-				rdev->accel_working = false;
-			}
-		}
-	}
 
 	r = r600_audio_init(rdev);
 	if (r)
@@ -2643,12 +2664,13 @@ void r600_fini(struct radeon_device *rdev)
 	r600_cp_fini(rdev);
 	r600_irq_fini(rdev);
 	radeon_wb_fini(rdev);
-	radeon_ib_pool_fini(rdev);
+	r100_ib_fini(rdev);
 	radeon_irq_kms_fini(rdev);
 	r600_pcie_gart_fini(rdev);
 	r600_vram_scratch_fini(rdev);
 	radeon_agp_fini(rdev);
 	radeon_gem_fini(rdev);
+	radeon_semaphore_driver_fini(rdev);
 	radeon_fence_driver_fini(rdev);
 	radeon_bo_fini(rdev);
 	radeon_atombios_fini(rdev);
@@ -2662,18 +2684,20 @@ void r600_fini(struct radeon_device *rdev)
  */
 void r600_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib)
 {
+	struct radeon_ring *ring = &rdev->ring[ib->fence->ring];
+
 	/* FIXME: implement */
-	radeon_ring_write(rdev, PACKET3(PACKET3_INDIRECT_BUFFER, 2));
-	radeon_ring_write(rdev,
+	radeon_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2));
+	radeon_ring_write(ring,
 #ifdef __BIG_ENDIAN
 			  (2 << 0) |
 #endif
 			  (ib->gpu_addr & 0xFFFFFFFC));
-	radeon_ring_write(rdev, upper_32_bits(ib->gpu_addr) & 0xFF);
-	radeon_ring_write(rdev, ib->length_dw);
+	radeon_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFF);
+	radeon_ring_write(ring, ib->length_dw);
 }
 
-int r600_ib_test(struct radeon_device *rdev)
+int r600_ib_test(struct radeon_device *rdev, int ring)
 {
 	struct radeon_ib *ib;
 	uint32_t scratch;
@@ -2687,7 +2711,7 @@ int r600_ib_test(struct radeon_device *rdev)
 		return r;
 	}
 	WREG32(scratch, 0xCAFEDEAD);
-	r = radeon_ib_get(rdev, &ib);
+	r = radeon_ib_get(rdev, ring, &ib);
 	if (r) {
 		DRM_ERROR("radeon: failed to get ib (%d).\n", r);
 		return r;
@@ -2728,7 +2752,7 @@ int r600_ib_test(struct radeon_device *rdev)
 		DRM_UDELAY(1);
 	}
 	if (i < rdev->usec_timeout) {
-		DRM_INFO("ib test succeeded in %u usecs\n", i);
+		DRM_INFO("ib test on ring %d succeeded in %u usecs\n", ib->fence->ring, i);
 	} else {
 		DRM_ERROR("radeon: ib test failed (scratch(0x%04X)=0x%08X)\n",
 			  scratch, tmp);
@@ -3075,7 +3099,7 @@ int r600_irq_set(struct radeon_device *rdev)
 		hpd3 = RREG32(DC_HOT_PLUG_DETECT3_INT_CONTROL) & ~DC_HPDx_INT_EN;
 	}
 
-	if (rdev->irq.sw_int) {
+	if (rdev->irq.sw_int[RADEON_RING_TYPE_GFX_INDEX]) {
 		DRM_DEBUG("r600_irq_set: sw int\n");
 		cp_int_cntl |= RB_INT_ENABLE;
 		cp_int_cntl |= TIME_STAMP_INT_ENABLE;
@@ -3459,11 +3483,11 @@ restart_ih:
 		case 177: /* CP_INT in IB1 */
 		case 178: /* CP_INT in IB2 */
 			DRM_DEBUG("IH: CP int: 0x%08x\n", src_data);
-			radeon_fence_process(rdev);
+			radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
 			break;
 		case 181: /* CP EOP event */
 			DRM_DEBUG("IH: CP EOP\n");
-			radeon_fence_process(rdev);
+			radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
 			break;
 		case 233: /* GUI IDLE */
 			DRM_DEBUG("IH: GUI idle\n");
@@ -3496,30 +3520,6 @@ restart_ih:
  */
 #if defined(CONFIG_DEBUG_FS)
 
-static int r600_debugfs_cp_ring_info(struct seq_file *m, void *data)
-{
-	struct drm_info_node *node = (struct drm_info_node *) m->private;
-	struct drm_device *dev = node->minor->dev;
-	struct radeon_device *rdev = dev->dev_private;
-	unsigned count, i, j;
-
-	radeon_ring_free_size(rdev);
-	count = (rdev->cp.ring_size / 4) - rdev->cp.ring_free_dw;
-	seq_printf(m, "CP_STAT 0x%08x\n", RREG32(CP_STAT));
-	seq_printf(m, "CP_RB_WPTR 0x%08x\n", RREG32(CP_RB_WPTR));
-	seq_printf(m, "CP_RB_RPTR 0x%08x\n", RREG32(CP_RB_RPTR));
-	seq_printf(m, "driver's copy of the CP_RB_WPTR 0x%08x\n", rdev->cp.wptr);
-	seq_printf(m, "driver's copy of the CP_RB_RPTR 0x%08x\n", rdev->cp.rptr);
-	seq_printf(m, "%u free dwords in ring\n", rdev->cp.ring_free_dw);
-	seq_printf(m, "%u dwords in ring\n", count);
-	i = rdev->cp.rptr;
-	for (j = 0; j <= count; j++) {
-		seq_printf(m, "r[%04d]=0x%08x\n", i, rdev->cp.ring[i]);
-		i = (i + 1) & rdev->cp.ptr_mask;
-	}
-	return 0;
-}
-
 static int r600_debugfs_mc_info(struct seq_file *m, void *data)
 {
 	struct drm_info_node *node = (struct drm_info_node *) m->private;
@@ -3533,7 +3533,6 @@ static int r600_debugfs_mc_info(struct seq_file *m, void *data)
 
 static struct drm_info_list r600_mc_info_list[] = {
 	{"r600_mc_info", r600_debugfs_mc_info, 0, NULL},
-	{"r600_ring_info", r600_debugfs_cp_ring_info, 0, NULL},
 };
 #endif
 

+ 115 - 106
drivers/gpu/drm/radeon/r600_blit_kms.c

@@ -50,6 +50,7 @@ static void
 set_render_target(struct radeon_device *rdev, int format,
 		  int w, int h, u64 gpu_addr)
 {
+	struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
 	u32 cb_color_info;
 	int pitch, slice;
 
@@ -63,38 +64,38 @@ set_render_target(struct radeon_device *rdev, int format,
 	pitch = (w / 8) - 1;
 	slice = ((w * h) / 64) - 1;
 
-	radeon_ring_write(rdev, PACKET3(PACKET3_SET_CONTEXT_REG, 1));
-	radeon_ring_write(rdev, (CB_COLOR0_BASE - PACKET3_SET_CONTEXT_REG_OFFSET) >> 2);
-	radeon_ring_write(rdev, gpu_addr >> 8);
+	radeon_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 1));
+	radeon_ring_write(ring, (CB_COLOR0_BASE - PACKET3_SET_CONTEXT_REG_OFFSET) >> 2);
+	radeon_ring_write(ring, gpu_addr >> 8);
 
 	if (rdev->family > CHIP_R600 && rdev->family < CHIP_RV770) {
-		radeon_ring_write(rdev, PACKET3(PACKET3_SURFACE_BASE_UPDATE, 0));
-		radeon_ring_write(rdev, 2 << 0);
+		radeon_ring_write(ring, PACKET3(PACKET3_SURFACE_BASE_UPDATE, 0));
+		radeon_ring_write(ring, 2 << 0);
 	}
 
-	radeon_ring_write(rdev, PACKET3(PACKET3_SET_CONTEXT_REG, 1));
-	radeon_ring_write(rdev, (CB_COLOR0_SIZE - PACKET3_SET_CONTEXT_REG_OFFSET) >> 2);
-	radeon_ring_write(rdev, (pitch << 0) | (slice << 10));
+	radeon_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 1));
+	radeon_ring_write(ring, (CB_COLOR0_SIZE - PACKET3_SET_CONTEXT_REG_OFFSET) >> 2);
+	radeon_ring_write(ring, (pitch << 0) | (slice << 10));
 
-	radeon_ring_write(rdev, PACKET3(PACKET3_SET_CONTEXT_REG, 1));
-	radeon_ring_write(rdev, (CB_COLOR0_VIEW - PACKET3_SET_CONTEXT_REG_OFFSET) >> 2);
-	radeon_ring_write(rdev, 0);
+	radeon_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 1));
+	radeon_ring_write(ring, (CB_COLOR0_VIEW - PACKET3_SET_CONTEXT_REG_OFFSET) >> 2);
+	radeon_ring_write(ring, 0);
 
-	radeon_ring_write(rdev, PACKET3(PACKET3_SET_CONTEXT_REG, 1));
-	radeon_ring_write(rdev, (CB_COLOR0_INFO - PACKET3_SET_CONTEXT_REG_OFFSET) >> 2);
-	radeon_ring_write(rdev, cb_color_info);
+	radeon_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 1));
+	radeon_ring_write(ring, (CB_COLOR0_INFO - PACKET3_SET_CONTEXT_REG_OFFSET) >> 2);
+	radeon_ring_write(ring, cb_color_info);
 
-	radeon_ring_write(rdev, PACKET3(PACKET3_SET_CONTEXT_REG, 1));
-	radeon_ring_write(rdev, (CB_COLOR0_TILE - PACKET3_SET_CONTEXT_REG_OFFSET) >> 2);
-	radeon_ring_write(rdev, 0);
+	radeon_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 1));
+	radeon_ring_write(ring, (CB_COLOR0_TILE - PACKET3_SET_CONTEXT_REG_OFFSET) >> 2);
+	radeon_ring_write(ring, 0);
 
-	radeon_ring_write(rdev, PACKET3(PACKET3_SET_CONTEXT_REG, 1));
-	radeon_ring_write(rdev, (CB_COLOR0_FRAG - PACKET3_SET_CONTEXT_REG_OFFSET) >> 2);
-	radeon_ring_write(rdev, 0);
+	radeon_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 1));
+	radeon_ring_write(ring, (CB_COLOR0_FRAG - PACKET3_SET_CONTEXT_REG_OFFSET) >> 2);
+	radeon_ring_write(ring, 0);
 
-	radeon_ring_write(rdev, PACKET3(PACKET3_SET_CONTEXT_REG, 1));
-	radeon_ring_write(rdev, (CB_COLOR0_MASK - PACKET3_SET_CONTEXT_REG_OFFSET) >> 2);
-	radeon_ring_write(rdev, 0);
+	radeon_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 1));
+	radeon_ring_write(ring, (CB_COLOR0_MASK - PACKET3_SET_CONTEXT_REG_OFFSET) >> 2);
+	radeon_ring_write(ring, 0);
 }
 
 /* emits 5dw */
@@ -103,6 +104,7 @@ cp_set_surface_sync(struct radeon_device *rdev,
 		    u32 sync_type, u32 size,
 		    u64 mc_addr)
 {
+	struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
 	u32 cp_coher_size;
 
 	if (size == 0xffffffff)
@@ -110,17 +112,18 @@ cp_set_surface_sync(struct radeon_device *rdev,
 	else
 		cp_coher_size = ((size + 255) >> 8);
 
-	radeon_ring_write(rdev, PACKET3(PACKET3_SURFACE_SYNC, 3));
-	radeon_ring_write(rdev, sync_type);
-	radeon_ring_write(rdev, cp_coher_size);
-	radeon_ring_write(rdev, mc_addr >> 8);
-	radeon_ring_write(rdev, 10); /* poll interval */
+	radeon_ring_write(ring, PACKET3(PACKET3_SURFACE_SYNC, 3));
+	radeon_ring_write(ring, sync_type);
+	radeon_ring_write(ring, cp_coher_size);
+	radeon_ring_write(ring, mc_addr >> 8);
+	radeon_ring_write(ring, 10); /* poll interval */
 }
 
 /* emits 21dw + 1 surface sync = 26dw */
 static void
 set_shaders(struct radeon_device *rdev)
 {
+	struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
 	u64 gpu_addr;
 	u32 sq_pgm_resources;
 
@@ -129,35 +132,35 @@ set_shaders(struct radeon_device *rdev)
 
 	/* VS */
 	gpu_addr = rdev->r600_blit.shader_gpu_addr + rdev->r600_blit.vs_offset;
-	radeon_ring_write(rdev, PACKET3(PACKET3_SET_CONTEXT_REG, 1));
-	radeon_ring_write(rdev, (SQ_PGM_START_VS - PACKET3_SET_CONTEXT_REG_OFFSET) >> 2);
-	radeon_ring_write(rdev, gpu_addr >> 8);
+	radeon_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 1));
+	radeon_ring_write(ring, (SQ_PGM_START_VS - PACKET3_SET_CONTEXT_REG_OFFSET) >> 2);
+	radeon_ring_write(ring, gpu_addr >> 8);
 
-	radeon_ring_write(rdev, PACKET3(PACKET3_SET_CONTEXT_REG, 1));
-	radeon_ring_write(rdev, (SQ_PGM_RESOURCES_VS - PACKET3_SET_CONTEXT_REG_OFFSET) >> 2);
-	radeon_ring_write(rdev, sq_pgm_resources);
+	radeon_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 1));
+	radeon_ring_write(ring, (SQ_PGM_RESOURCES_VS - PACKET3_SET_CONTEXT_REG_OFFSET) >> 2);
+	radeon_ring_write(ring, sq_pgm_resources);
 
-	radeon_ring_write(rdev, PACKET3(PACKET3_SET_CONTEXT_REG, 1));
-	radeon_ring_write(rdev, (SQ_PGM_CF_OFFSET_VS - PACKET3_SET_CONTEXT_REG_OFFSET) >> 2);
-	radeon_ring_write(rdev, 0);
+	radeon_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 1));
+	radeon_ring_write(ring, (SQ_PGM_CF_OFFSET_VS - PACKET3_SET_CONTEXT_REG_OFFSET) >> 2);
+	radeon_ring_write(ring, 0);
 
 	/* PS */
 	gpu_addr = rdev->r600_blit.shader_gpu_addr + rdev->r600_blit.ps_offset;
-	radeon_ring_write(rdev, PACKET3(PACKET3_SET_CONTEXT_REG, 1));
-	radeon_ring_write(rdev, (SQ_PGM_START_PS - PACKET3_SET_CONTEXT_REG_OFFSET) >> 2);
-	radeon_ring_write(rdev, gpu_addr >> 8);
+	radeon_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 1));
+	radeon_ring_write(ring, (SQ_PGM_START_PS - PACKET3_SET_CONTEXT_REG_OFFSET) >> 2);
+	radeon_ring_write(ring, gpu_addr >> 8);
 
-	radeon_ring_write(rdev, PACKET3(PACKET3_SET_CONTEXT_REG, 1));
-	radeon_ring_write(rdev, (SQ_PGM_RESOURCES_PS - PACKET3_SET_CONTEXT_REG_OFFSET) >> 2);
-	radeon_ring_write(rdev, sq_pgm_resources | (1 << 28));
+	radeon_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 1));
+	radeon_ring_write(ring, (SQ_PGM_RESOURCES_PS - PACKET3_SET_CONTEXT_REG_OFFSET) >> 2);
+	radeon_ring_write(ring, sq_pgm_resources | (1 << 28));
 
-	radeon_ring_write(rdev, PACKET3(PACKET3_SET_CONTEXT_REG, 1));
-	radeon_ring_write(rdev, (SQ_PGM_EXPORTS_PS - PACKET3_SET_CONTEXT_REG_OFFSET) >> 2);
-	radeon_ring_write(rdev, 2);
+	radeon_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 1));
+	radeon_ring_write(ring, (SQ_PGM_EXPORTS_PS - PACKET3_SET_CONTEXT_REG_OFFSET) >> 2);
+	radeon_ring_write(ring, 2);
 
-	radeon_ring_write(rdev, PACKET3(PACKET3_SET_CONTEXT_REG, 1));
-	radeon_ring_write(rdev, (SQ_PGM_CF_OFFSET_PS - PACKET3_SET_CONTEXT_REG_OFFSET) >> 2);
-	radeon_ring_write(rdev, 0);
+	radeon_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 1));
+	radeon_ring_write(ring, (SQ_PGM_CF_OFFSET_PS - PACKET3_SET_CONTEXT_REG_OFFSET) >> 2);
+	radeon_ring_write(ring, 0);
 
 	gpu_addr = rdev->r600_blit.shader_gpu_addr + rdev->r600_blit.vs_offset;
 	cp_set_surface_sync(rdev, PACKET3_SH_ACTION_ENA, 512, gpu_addr);
@@ -167,6 +170,7 @@ set_shaders(struct radeon_device *rdev)
 static void
 set_vtx_resource(struct radeon_device *rdev, u64 gpu_addr)
 {
+	struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
 	u32 sq_vtx_constant_word2;
 
 	sq_vtx_constant_word2 = SQ_VTXC_BASE_ADDR_HI(upper_32_bits(gpu_addr) & 0xff) |
@@ -175,15 +179,15 @@ set_vtx_resource(struct radeon_device *rdev, u64 gpu_addr)
 	sq_vtx_constant_word2 |=  SQ_VTXC_ENDIAN_SWAP(SQ_ENDIAN_8IN32);
 #endif
 
-	radeon_ring_write(rdev, PACKET3(PACKET3_SET_RESOURCE, 7));
-	radeon_ring_write(rdev, 0x460);
-	radeon_ring_write(rdev, gpu_addr & 0xffffffff);
-	radeon_ring_write(rdev, 48 - 1);
-	radeon_ring_write(rdev, sq_vtx_constant_word2);
-	radeon_ring_write(rdev, 1 << 0);
-	radeon_ring_write(rdev, 0);
-	radeon_ring_write(rdev, 0);
-	radeon_ring_write(rdev, SQ_TEX_VTX_VALID_BUFFER << 30);
+	radeon_ring_write(ring, PACKET3(PACKET3_SET_RESOURCE, 7));
+	radeon_ring_write(ring, 0x460);
+	radeon_ring_write(ring, gpu_addr & 0xffffffff);
+	radeon_ring_write(ring, 48 - 1);
+	radeon_ring_write(ring, sq_vtx_constant_word2);
+	radeon_ring_write(ring, 1 << 0);
+	radeon_ring_write(ring, 0);
+	radeon_ring_write(ring, 0);
+	radeon_ring_write(ring, SQ_TEX_VTX_VALID_BUFFER << 30);
 
 	if ((rdev->family == CHIP_RV610) ||
 	    (rdev->family == CHIP_RV620) ||
@@ -203,6 +207,7 @@ set_tex_resource(struct radeon_device *rdev,
 		 int format, int w, int h, int pitch,
 		 u64 gpu_addr, u32 size)
 {
+	struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
 	uint32_t sq_tex_resource_word0, sq_tex_resource_word1, sq_tex_resource_word4;
 
 	if (h < 1)
@@ -225,15 +230,15 @@ set_tex_resource(struct radeon_device *rdev,
 	cp_set_surface_sync(rdev,
 			    PACKET3_TC_ACTION_ENA, size, gpu_addr);
 
-	radeon_ring_write(rdev, PACKET3(PACKET3_SET_RESOURCE, 7));
-	radeon_ring_write(rdev, 0);
-	radeon_ring_write(rdev, sq_tex_resource_word0);
-	radeon_ring_write(rdev, sq_tex_resource_word1);
-	radeon_ring_write(rdev, gpu_addr >> 8);
-	radeon_ring_write(rdev, gpu_addr >> 8);
-	radeon_ring_write(rdev, sq_tex_resource_word4);
-	radeon_ring_write(rdev, 0);
-	radeon_ring_write(rdev, SQ_TEX_VTX_VALID_TEXTURE << 30);
+	radeon_ring_write(ring, PACKET3(PACKET3_SET_RESOURCE, 7));
+	radeon_ring_write(ring, 0);
+	radeon_ring_write(ring, sq_tex_resource_word0);
+	radeon_ring_write(ring, sq_tex_resource_word1);
+	radeon_ring_write(ring, gpu_addr >> 8);
+	radeon_ring_write(ring, gpu_addr >> 8);
+	radeon_ring_write(ring, sq_tex_resource_word4);
+	radeon_ring_write(ring, 0);
+	radeon_ring_write(ring, SQ_TEX_VTX_VALID_TEXTURE << 30);
 }
 
 /* emits 12 */
@@ -241,43 +246,45 @@ static void
 set_scissors(struct radeon_device *rdev, int x1, int y1,
 	     int x2, int y2)
 {
-	radeon_ring_write(rdev, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
-	radeon_ring_write(rdev, (PA_SC_SCREEN_SCISSOR_TL - PACKET3_SET_CONTEXT_REG_OFFSET) >> 2);
-	radeon_ring_write(rdev, (x1 << 0) | (y1 << 16));
-	radeon_ring_write(rdev, (x2 << 0) | (y2 << 16));
-
-	radeon_ring_write(rdev, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
-	radeon_ring_write(rdev, (PA_SC_GENERIC_SCISSOR_TL - PACKET3_SET_CONTEXT_REG_OFFSET) >> 2);
-	radeon_ring_write(rdev, (x1 << 0) | (y1 << 16) | (1 << 31));
-	radeon_ring_write(rdev, (x2 << 0) | (y2 << 16));
-
-	radeon_ring_write(rdev, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
-	radeon_ring_write(rdev, (PA_SC_WINDOW_SCISSOR_TL - PACKET3_SET_CONTEXT_REG_OFFSET) >> 2);
-	radeon_ring_write(rdev, (x1 << 0) | (y1 << 16) | (1 << 31));
-	radeon_ring_write(rdev, (x2 << 0) | (y2 << 16));
+	struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
+	radeon_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
+	radeon_ring_write(ring, (PA_SC_SCREEN_SCISSOR_TL - PACKET3_SET_CONTEXT_REG_OFFSET) >> 2);
+	radeon_ring_write(ring, (x1 << 0) | (y1 << 16));
+	radeon_ring_write(ring, (x2 << 0) | (y2 << 16));
+
+	radeon_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
+	radeon_ring_write(ring, (PA_SC_GENERIC_SCISSOR_TL - PACKET3_SET_CONTEXT_REG_OFFSET) >> 2);
+	radeon_ring_write(ring, (x1 << 0) | (y1 << 16) | (1 << 31));
+	radeon_ring_write(ring, (x2 << 0) | (y2 << 16));
+
+	radeon_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
+	radeon_ring_write(ring, (PA_SC_WINDOW_SCISSOR_TL - PACKET3_SET_CONTEXT_REG_OFFSET) >> 2);
+	radeon_ring_write(ring, (x1 << 0) | (y1 << 16) | (1 << 31));
+	radeon_ring_write(ring, (x2 << 0) | (y2 << 16));
 }
 
 /* emits 10 */
 static void
 draw_auto(struct radeon_device *rdev)
 {
-	radeon_ring_write(rdev, PACKET3(PACKET3_SET_CONFIG_REG, 1));
-	radeon_ring_write(rdev, (VGT_PRIMITIVE_TYPE - PACKET3_SET_CONFIG_REG_OFFSET) >> 2);
-	radeon_ring_write(rdev, DI_PT_RECTLIST);
+	struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
+	radeon_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
+	radeon_ring_write(ring, (VGT_PRIMITIVE_TYPE - PACKET3_SET_CONFIG_REG_OFFSET) >> 2);
+	radeon_ring_write(ring, DI_PT_RECTLIST);
 
-	radeon_ring_write(rdev, PACKET3(PACKET3_INDEX_TYPE, 0));
-	radeon_ring_write(rdev,
+	radeon_ring_write(ring, PACKET3(PACKET3_INDEX_TYPE, 0));
+	radeon_ring_write(ring,
 #ifdef __BIG_ENDIAN
 			  (2 << 2) |
 #endif
 			  DI_INDEX_SIZE_16_BIT);
 
-	radeon_ring_write(rdev, PACKET3(PACKET3_NUM_INSTANCES, 0));
-	radeon_ring_write(rdev, 1);
+	radeon_ring_write(ring, PACKET3(PACKET3_NUM_INSTANCES, 0));
+	radeon_ring_write(ring, 1);
 
-	radeon_ring_write(rdev, PACKET3(PACKET3_DRAW_INDEX_AUTO, 1));
-	radeon_ring_write(rdev, 3);
-	radeon_ring_write(rdev, DI_SRC_SEL_AUTO_INDEX);
+	radeon_ring_write(ring, PACKET3(PACKET3_DRAW_INDEX_AUTO, 1));
+	radeon_ring_write(ring, 3);
+	radeon_ring_write(ring, DI_SRC_SEL_AUTO_INDEX);
 
 }
 
@@ -285,6 +292,7 @@ draw_auto(struct radeon_device *rdev)
 static void
 set_default_state(struct radeon_device *rdev)
 {
+	struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
 	u32 sq_config, sq_gpr_resource_mgmt_1, sq_gpr_resource_mgmt_2;
 	u32 sq_thread_resource_mgmt, sq_stack_resource_mgmt_1, sq_stack_resource_mgmt_2;
 	int num_ps_gprs, num_vs_gprs, num_temp_gprs, num_gs_gprs, num_es_gprs;
@@ -440,24 +448,24 @@ set_default_state(struct radeon_device *rdev)
 	/* emit an IB pointing at default state */
 	dwords = ALIGN(rdev->r600_blit.state_len, 0x10);
 	gpu_addr = rdev->r600_blit.shader_gpu_addr + rdev->r600_blit.state_offset;
-	radeon_ring_write(rdev, PACKET3(PACKET3_INDIRECT_BUFFER, 2));
-	radeon_ring_write(rdev,
+	radeon_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2));
+	radeon_ring_write(ring,
 #ifdef __BIG_ENDIAN
 			  (2 << 0) |
 #endif
 			  (gpu_addr & 0xFFFFFFFC));
-	radeon_ring_write(rdev, upper_32_bits(gpu_addr) & 0xFF);
-	radeon_ring_write(rdev, dwords);
+	radeon_ring_write(ring, upper_32_bits(gpu_addr) & 0xFF);
+	radeon_ring_write(ring, dwords);
 
 	/* SQ config */
-	radeon_ring_write(rdev, PACKET3(PACKET3_SET_CONFIG_REG, 6));
-	radeon_ring_write(rdev, (SQ_CONFIG - PACKET3_SET_CONFIG_REG_OFFSET) >> 2);
-	radeon_ring_write(rdev, sq_config);
-	radeon_ring_write(rdev, sq_gpr_resource_mgmt_1);
-	radeon_ring_write(rdev, sq_gpr_resource_mgmt_2);
-	radeon_ring_write(rdev, sq_thread_resource_mgmt);
-	radeon_ring_write(rdev, sq_stack_resource_mgmt_1);
-	radeon_ring_write(rdev, sq_stack_resource_mgmt_2);
+	radeon_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 6));
+	radeon_ring_write(ring, (SQ_CONFIG - PACKET3_SET_CONFIG_REG_OFFSET) >> 2);
+	radeon_ring_write(ring, sq_config);
+	radeon_ring_write(ring, sq_gpr_resource_mgmt_1);
+	radeon_ring_write(ring, sq_gpr_resource_mgmt_2);
+	radeon_ring_write(ring, sq_thread_resource_mgmt);
+	radeon_ring_write(ring, sq_stack_resource_mgmt_1);
+	radeon_ring_write(ring, sq_stack_resource_mgmt_2);
 }
 
 static uint32_t i2f(uint32_t input)
@@ -614,7 +622,7 @@ void r600_blit_fini(struct radeon_device *rdev)
 static int r600_vb_ib_get(struct radeon_device *rdev)
 {
 	int r;
-	r = radeon_ib_get(rdev, &rdev->r600_blit.vb_ib);
+	r = radeon_ib_get(rdev, RADEON_RING_TYPE_GFX_INDEX, &rdev->r600_blit.vb_ib);
 	if (r) {
 		DRM_ERROR("failed to get IB for vertex buffer\n");
 		return r;
@@ -679,6 +687,7 @@ static unsigned r600_blit_create_rect(unsigned num_gpu_pages,
 
 int r600_blit_prepare_copy(struct radeon_device *rdev, unsigned num_gpu_pages)
 {
+	struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
 	int r;
 	int ring_size;
 	int num_loops = 0;
@@ -699,7 +708,7 @@ int r600_blit_prepare_copy(struct radeon_device *rdev, unsigned num_gpu_pages)
 	/* calculate number of loops correctly */
 	ring_size = num_loops * dwords_per_loop;
 	ring_size += rdev->r600_blit.ring_size_common;
-	r = radeon_ring_lock(rdev, ring_size);
+	r = radeon_ring_lock(rdev, ring, ring_size);
 	if (r)
 		return r;
 
@@ -718,7 +727,7 @@ void r600_blit_done_copy(struct radeon_device *rdev, struct radeon_fence *fence)
 	if (fence)
 		r = radeon_fence_emit(rdev, fence);
 
-	radeon_ring_unlock_commit(rdev);
+	radeon_ring_unlock_commit(rdev, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
 }
 
 void r600_kms_blit_copy(struct radeon_device *rdev,

+ 1 - 1
drivers/gpu/drm/radeon/r600_cp.c

@@ -1815,7 +1815,7 @@ static void r600_cp_init_ring_buffer(struct drm_device *dev,
 		     dev_priv->ring.size_l2qw);
 #endif
 
-	RADEON_WRITE(R600_CP_SEM_WAIT_TIMER, 0x4);
+	RADEON_WRITE(R600_CP_SEM_WAIT_TIMER, 0x0);
 
 	/* Set the write pointer delay */
 	RADEON_WRITE(R600_CP_RB_WPTR_DELAY, 0);

+ 2 - 0
drivers/gpu/drm/radeon/r600d.h

@@ -831,6 +831,8 @@
 #define	PACKET3_STRMOUT_BUFFER_UPDATE			0x34
 #define	PACKET3_INDIRECT_BUFFER_MP			0x38
 #define	PACKET3_MEM_SEMAPHORE				0x39
+#              define PACKET3_SEM_SEL_SIGNAL	    (0x6 << 29)
+#              define PACKET3_SEM_SEL_WAIT	    (0x7 << 29)
 #define	PACKET3_MPEG_INDEX				0x3A
 #define	PACKET3_WAIT_REG_MEM				0x3C
 #define	PACKET3_MEM_WRITE				0x3D

+ 165 - 56
drivers/gpu/drm/radeon/radeon.h

@@ -107,6 +107,17 @@ extern int radeon_msi;
 #define RADEONFB_CONN_LIMIT		4
 #define RADEON_BIOS_NUM_SCRATCH		8
 
+/* max number of rings */
+#define RADEON_NUM_RINGS 3
+
+/* internal ring indices */
+/* r1xx+ has gfx CP ring */
+#define RADEON_RING_TYPE_GFX_INDEX  0
+
+/* cayman has 2 compute CP rings */
+#define CAYMAN_RING_TYPE_CP1_INDEX 1
+#define CAYMAN_RING_TYPE_CP2_INDEX 2
+
 /*
  * Errata workarounds.
  */
@@ -192,14 +203,15 @@ extern int sumo_get_temp(struct radeon_device *rdev);
  */
 struct radeon_fence_driver {
 	uint32_t			scratch_reg;
+	uint64_t			gpu_addr;
+	volatile uint32_t		*cpu_addr;
 	atomic_t			seq;
 	uint32_t			last_seq;
 	unsigned long			last_jiffies;
 	unsigned long			last_timeout;
 	wait_queue_head_t		queue;
-	rwlock_t			lock;
 	struct list_head		created;
-	struct list_head		emited;
+	struct list_head		emitted;
 	struct list_head		signaled;
 	bool				initialized;
 };
@@ -210,21 +222,51 @@ struct radeon_fence {
 	struct list_head		list;
 	/* protected by radeon_fence.lock */
 	uint32_t			seq;
-	bool				emited;
+	bool				emitted;
 	bool				signaled;
+	/* RB, DMA, etc. */
+	int				ring;
 };
 
+int radeon_fence_driver_start_ring(struct radeon_device *rdev, int ring);
 int radeon_fence_driver_init(struct radeon_device *rdev);
 void radeon_fence_driver_fini(struct radeon_device *rdev);
-int radeon_fence_create(struct radeon_device *rdev, struct radeon_fence **fence);
+int radeon_fence_create(struct radeon_device *rdev, struct radeon_fence **fence, int ring);
 int radeon_fence_emit(struct radeon_device *rdev, struct radeon_fence *fence);
-void radeon_fence_process(struct radeon_device *rdev);
+void radeon_fence_process(struct radeon_device *rdev, int ring);
 bool radeon_fence_signaled(struct radeon_fence *fence);
 int radeon_fence_wait(struct radeon_fence *fence, bool interruptible);
-int radeon_fence_wait_next(struct radeon_device *rdev);
-int radeon_fence_wait_last(struct radeon_device *rdev);
+int radeon_fence_wait_next(struct radeon_device *rdev, int ring);
+int radeon_fence_wait_last(struct radeon_device *rdev, int ring);
 struct radeon_fence *radeon_fence_ref(struct radeon_fence *fence);
 void radeon_fence_unref(struct radeon_fence **fence);
+int radeon_fence_count_emitted(struct radeon_device *rdev, int ring);
+
+/*
+ * Semaphores.
+ */
+struct radeon_ring;
+
+struct radeon_semaphore_driver {
+	rwlock_t		lock;
+	struct list_head	free;
+};
+
+struct radeon_semaphore {
+	struct radeon_bo	*robj;
+	struct list_head	list;
+	uint64_t		gpu_addr;
+};
+
+void radeon_semaphore_driver_fini(struct radeon_device *rdev);
+int radeon_semaphore_create(struct radeon_device *rdev,
+			    struct radeon_semaphore **semaphore);
+void radeon_semaphore_emit_signal(struct radeon_device *rdev, int ring,
+				  struct radeon_semaphore *semaphore);
+void radeon_semaphore_emit_wait(struct radeon_device *rdev, int ring,
+				struct radeon_semaphore *semaphore);
+void radeon_semaphore_free(struct radeon_device *rdev,
+			   struct radeon_semaphore *semaphore);
 
 /*
  * Tiling registers
@@ -274,6 +316,48 @@ struct radeon_bo_list {
 	u32			tiling_flags;
 };
 
+/* sub-allocation manager, it has to be protected by another lock.
+ * By conception this is an helper for other part of the driver
+ * like the indirect buffer or semaphore, which both have their
+ * locking.
+ *
+ * Principe is simple, we keep a list of sub allocation in offset
+ * order (first entry has offset == 0, last entry has the highest
+ * offset).
+ *
+ * When allocating new object we first check if there is room at
+ * the end total_size - (last_object_offset + last_object_size) >=
+ * alloc_size. If so we allocate new object there.
+ *
+ * When there is not enough room at the end, we start waiting for
+ * each sub object until we reach object_offset+object_size >=
+ * alloc_size, this object then become the sub object we return.
+ *
+ * Alignment can't be bigger than page size.
+ *
+ * Hole are not considered for allocation to keep things simple.
+ * Assumption is that there won't be hole (all object on same
+ * alignment).
+ */
+struct radeon_sa_manager {
+	struct radeon_bo	*bo;
+	struct list_head	sa_bo;
+	unsigned		size;
+	uint64_t		gpu_addr;
+	void			*cpu_ptr;
+	uint32_t		domain;
+};
+
+struct radeon_sa_bo;
+
+/* sub-allocation buffer */
+struct radeon_sa_bo {
+	struct list_head		list;
+	struct radeon_sa_manager	*manager;
+	unsigned			offset;
+	unsigned			size;
+};
+
 /*
  * GEM objects.
  */
@@ -433,7 +517,7 @@ union radeon_irq_stat_regs {
 
 struct radeon_irq {
 	bool		installed;
-	bool		sw_int;
+	bool		sw_int[RADEON_NUM_RINGS];
 	bool		crtc_vblank_int[RADEON_MAX_CRTCS];
 	bool		pflip[RADEON_MAX_CRTCS];
 	wait_queue_head_t	vblank_queue;
@@ -443,7 +527,7 @@ struct radeon_irq {
 	wait_queue_head_t	idle_queue;
 	bool		hdmi[RADEON_MAX_HDMI_BLOCKS];
 	spinlock_t sw_lock;
-	int sw_refcount;
+	int sw_refcount[RADEON_NUM_RINGS];
 	union radeon_irq_stat_regs stat_regs;
 	spinlock_t pflip_lock[RADEON_MAX_CRTCS];
 	int pflip_refcount[RADEON_MAX_CRTCS];
@@ -451,22 +535,22 @@ struct radeon_irq {
 
 int radeon_irq_kms_init(struct radeon_device *rdev);
 void radeon_irq_kms_fini(struct radeon_device *rdev);
-void radeon_irq_kms_sw_irq_get(struct radeon_device *rdev);
-void radeon_irq_kms_sw_irq_put(struct radeon_device *rdev);
+void radeon_irq_kms_sw_irq_get(struct radeon_device *rdev, int ring);
+void radeon_irq_kms_sw_irq_put(struct radeon_device *rdev, int ring);
 void radeon_irq_kms_pflip_irq_get(struct radeon_device *rdev, int crtc);
 void radeon_irq_kms_pflip_irq_put(struct radeon_device *rdev, int crtc);
 
 /*
- * CP & ring.
+ * CP & rings.
  */
+
 struct radeon_ib {
-	struct list_head	list;
+	struct radeon_sa_bo	sa_bo;
 	unsigned		idx;
+	uint32_t		length_dw;
 	uint64_t		gpu_addr;
-	struct radeon_fence	*fence;
 	uint32_t		*ptr;
-	uint32_t		length_dw;
-	bool			free;
+	struct radeon_fence	*fence;
 };
 
 /*
@@ -474,20 +558,22 @@ struct radeon_ib {
  * mutex protects scheduled_ibs, ready, alloc_bm
  */
 struct radeon_ib_pool {
-	struct mutex		mutex;
-	struct radeon_bo	*robj;
-	struct list_head	bogus_ib;
-	struct radeon_ib	ibs[RADEON_IB_POOL_SIZE];
-	bool			ready;
-	unsigned		head_id;
+	struct mutex			mutex;
+	struct radeon_sa_manager	sa_manager;
+	struct radeon_ib		ibs[RADEON_IB_POOL_SIZE];
+	bool				ready;
+	unsigned			head_id;
 };
 
-struct radeon_cp {
+struct radeon_ring {
 	struct radeon_bo	*ring_obj;
 	volatile uint32_t	*ring;
 	unsigned		rptr;
+	unsigned		rptr_offs;
+	unsigned		rptr_reg;
 	unsigned		wptr;
 	unsigned		wptr_old;
+	unsigned		wptr_reg;
 	unsigned		ring_size;
 	unsigned		ring_free_dw;
 	int			count_dw;
@@ -496,6 +582,9 @@ struct radeon_cp {
 	uint32_t		ptr_mask;
 	struct mutex		mutex;
 	bool			ready;
+	u32			ptr_reg_shift;
+	u32			ptr_reg_mask;
+	u32			nop;
 };
 
 /*
@@ -505,6 +594,7 @@ struct r600_ih {
 	struct radeon_bo	*ring_obj;
 	volatile uint32_t	*ring;
 	unsigned		rptr;
+	unsigned		rptr_offs;
 	unsigned		wptr;
 	unsigned		wptr_old;
 	unsigned		ring_size;
@@ -548,23 +638,27 @@ struct r600_blit {
 
 void r600_blit_suspend(struct radeon_device *rdev);
 
-int radeon_ib_get(struct radeon_device *rdev, struct radeon_ib **ib);
+int radeon_ib_get(struct radeon_device *rdev, int ring, struct radeon_ib **ib);
 void radeon_ib_free(struct radeon_device *rdev, struct radeon_ib **ib);
 int radeon_ib_schedule(struct radeon_device *rdev, struct radeon_ib *ib);
 int radeon_ib_pool_init(struct radeon_device *rdev);
 void radeon_ib_pool_fini(struct radeon_device *rdev);
+int radeon_ib_pool_start(struct radeon_device *rdev);
+int radeon_ib_pool_suspend(struct radeon_device *rdev);
 int radeon_ib_test(struct radeon_device *rdev);
-extern void radeon_ib_bogus_add(struct radeon_device *rdev, struct radeon_ib *ib);
 /* Ring access between begin & end cannot sleep */
-void radeon_ring_free_size(struct radeon_device *rdev);
-int radeon_ring_alloc(struct radeon_device *rdev, unsigned ndw);
-int radeon_ring_lock(struct radeon_device *rdev, unsigned ndw);
-void radeon_ring_commit(struct radeon_device *rdev);
-void radeon_ring_unlock_commit(struct radeon_device *rdev);
-void radeon_ring_unlock_undo(struct radeon_device *rdev);
-int radeon_ring_test(struct radeon_device *rdev);
-int radeon_ring_init(struct radeon_device *rdev, unsigned ring_size);
-void radeon_ring_fini(struct radeon_device *rdev);
+int radeon_ring_index(struct radeon_device *rdev, struct radeon_ring *cp);
+void radeon_ring_free_size(struct radeon_device *rdev, struct radeon_ring *cp);
+int radeon_ring_alloc(struct radeon_device *rdev, struct radeon_ring *cp, unsigned ndw);
+int radeon_ring_lock(struct radeon_device *rdev, struct radeon_ring *cp, unsigned ndw);
+void radeon_ring_commit(struct radeon_device *rdev, struct radeon_ring *cp);
+void radeon_ring_unlock_commit(struct radeon_device *rdev, struct radeon_ring *cp);
+void radeon_ring_unlock_undo(struct radeon_device *rdev, struct radeon_ring *cp);
+int radeon_ring_test(struct radeon_device *rdev, struct radeon_ring *cp);
+int radeon_ring_init(struct radeon_device *rdev, struct radeon_ring *cp, unsigned ring_size,
+		     unsigned rptr_offs, unsigned rptr_reg, unsigned wptr_reg,
+		     u32 ptr_reg_shift, u32 ptr_reg_mask, u32 nop);
+void radeon_ring_fini(struct radeon_device *rdev, struct radeon_ring *cp);
 
 
 /*
@@ -868,11 +962,20 @@ void radeon_benchmark(struct radeon_device *rdev, int test_number);
  * Testing
  */
 void radeon_test_moves(struct radeon_device *rdev);
+void radeon_test_ring_sync(struct radeon_device *rdev,
+			   struct radeon_ring *cpA,
+			   struct radeon_ring *cpB);
+void radeon_test_syncing(struct radeon_device *rdev);
 
 
 /*
  * Debugfs
  */
+struct radeon_debugfs {
+	struct drm_info_list	*files;
+	unsigned		num_files;
+};
+
 int radeon_debugfs_add_files(struct radeon_device *rdev,
 			     struct drm_info_list *files,
 			     unsigned nfiles);
@@ -888,21 +991,26 @@ struct radeon_asic {
 	int (*resume)(struct radeon_device *rdev);
 	int (*suspend)(struct radeon_device *rdev);
 	void (*vga_set_state)(struct radeon_device *rdev, bool state);
-	bool (*gpu_is_lockup)(struct radeon_device *rdev);
+	bool (*gpu_is_lockup)(struct radeon_device *rdev, struct radeon_ring *cp);
 	int (*asic_reset)(struct radeon_device *rdev);
 	void (*gart_tlb_flush)(struct radeon_device *rdev);
 	int (*gart_set_page)(struct radeon_device *rdev, int i, uint64_t addr);
 	int (*cp_init)(struct radeon_device *rdev, unsigned ring_size);
 	void (*cp_fini)(struct radeon_device *rdev);
 	void (*cp_disable)(struct radeon_device *rdev);
-	void (*cp_commit)(struct radeon_device *rdev);
 	void (*ring_start)(struct radeon_device *rdev);
-	int (*ring_test)(struct radeon_device *rdev);
-	void (*ring_ib_execute)(struct radeon_device *rdev, struct radeon_ib *ib);
+
+	struct {
+		void (*ib_execute)(struct radeon_device *rdev, struct radeon_ib *ib);
+		void (*emit_fence)(struct radeon_device *rdev, struct radeon_fence *fence);
+		void (*emit_semaphore)(struct radeon_device *rdev, struct radeon_ring *cp,
+				       struct radeon_semaphore *semaphore, bool emit_wait);
+	} ring[RADEON_NUM_RINGS];
+
+	int (*ring_test)(struct radeon_device *rdev, struct radeon_ring *cp);
 	int (*irq_set)(struct radeon_device *rdev);
 	int (*irq_process)(struct radeon_device *rdev);
 	u32 (*get_vblank_counter)(struct radeon_device *rdev, int crtc);
-	void (*fence_ring_emit)(struct radeon_device *rdev, struct radeon_fence *fence);
 	int (*cs_parse)(struct radeon_cs_parser *p);
 	int (*copy_blit)(struct radeon_device *rdev,
 			 uint64_t src_offset,
@@ -1230,11 +1338,10 @@ struct radeon_device {
 	struct radeon_mode_info		mode_info;
 	struct radeon_scratch		scratch;
 	struct radeon_mman		mman;
-	struct radeon_fence_driver	fence_drv;
-	struct radeon_cp		cp;
-	/* cayman compute rings */
-	struct radeon_cp		cp1;
-	struct radeon_cp		cp2;
+	rwlock_t			fence_lock;
+	struct radeon_fence_driver	fence_drv[RADEON_NUM_RINGS];
+	struct radeon_semaphore_driver	semaphore_drv;
+	struct radeon_ring		ring[RADEON_NUM_RINGS];
 	struct radeon_ib_pool		ib_pool;
 	struct radeon_irq		irq;
 	struct radeon_asic		*asic;
@@ -1278,6 +1385,9 @@ struct radeon_device {
 	struct drm_file *cmask_filp;
 	/* i2c buses */
 	struct radeon_i2c_chan *i2c_bus[RADEON_MAX_I2C_BUS];
+	/* debugfs */
+	struct radeon_debugfs	debugfs[RADEON_DEBUGFS_MAX_COMPONENTS];
+	unsigned 		debugfs_count;
 };
 
 int radeon_device_init(struct radeon_device *rdev,
@@ -1413,18 +1523,17 @@ void radeon_atombios_fini(struct radeon_device *rdev);
 /*
  * RING helpers.
  */
-
 #if DRM_DEBUG_CODE == 0
-static inline void radeon_ring_write(struct radeon_device *rdev, uint32_t v)
+static inline void radeon_ring_write(struct radeon_ring *ring, uint32_t v)
 {
-	rdev->cp.ring[rdev->cp.wptr++] = v;
-	rdev->cp.wptr &= rdev->cp.ptr_mask;
-	rdev->cp.count_dw--;
-	rdev->cp.ring_free_dw--;
+	ring->ring[ring->wptr++] = v;
+	ring->wptr &= ring->ptr_mask;
+	ring->count_dw--;
+	ring->ring_free_dw--;
 }
 #else
 /* With debugging this is just too big to inline */
-void radeon_ring_write(struct radeon_device *rdev, uint32_t v);
+void radeon_ring_write(struct radeon_ring *ring, uint32_t v);
 #endif
 
 /*
@@ -1436,18 +1545,18 @@ void radeon_ring_write(struct radeon_device *rdev, uint32_t v);
 #define radeon_suspend(rdev) (rdev)->asic->suspend((rdev))
 #define radeon_cs_parse(p) rdev->asic->cs_parse((p))
 #define radeon_vga_set_state(rdev, state) (rdev)->asic->vga_set_state((rdev), (state))
-#define radeon_gpu_is_lockup(rdev) (rdev)->asic->gpu_is_lockup((rdev))
+#define radeon_gpu_is_lockup(rdev, cp) (rdev)->asic->gpu_is_lockup((rdev), (cp))
 #define radeon_asic_reset(rdev) (rdev)->asic->asic_reset((rdev))
 #define radeon_gart_tlb_flush(rdev) (rdev)->asic->gart_tlb_flush((rdev))
 #define radeon_gart_set_page(rdev, i, p) (rdev)->asic->gart_set_page((rdev), (i), (p))
-#define radeon_cp_commit(rdev) (rdev)->asic->cp_commit((rdev))
 #define radeon_ring_start(rdev) (rdev)->asic->ring_start((rdev))
-#define radeon_ring_test(rdev) (rdev)->asic->ring_test((rdev))
-#define radeon_ring_ib_execute(rdev, ib) (rdev)->asic->ring_ib_execute((rdev), (ib))
+#define radeon_ring_test(rdev, cp) (rdev)->asic->ring_test((rdev), (cp))
+#define radeon_ring_ib_execute(rdev, r, ib) (rdev)->asic->ring[(r)].ib_execute((rdev), (ib))
 #define radeon_irq_set(rdev) (rdev)->asic->irq_set((rdev))
 #define radeon_irq_process(rdev) (rdev)->asic->irq_process((rdev))
 #define radeon_get_vblank_counter(rdev, crtc) (rdev)->asic->get_vblank_counter((rdev), (crtc))
-#define radeon_fence_ring_emit(rdev, fence) (rdev)->asic->fence_ring_emit((rdev), (fence))
+#define radeon_fence_ring_emit(rdev, r, fence) (rdev)->asic->ring[(r)].emit_fence((rdev), (fence))
+#define radeon_semaphore_ring_emit(rdev, r, cp, semaphore, emit_wait) (rdev)->asic->ring[(r)].emit_semaphore((rdev), (cp), (semaphore), (emit_wait))
 #define radeon_copy_blit(rdev, s, d, np, f) (rdev)->asic->copy_blit((rdev), (s), (d), (np), (f))
 #define radeon_copy_dma(rdev, s, d, np, f) (rdev)->asic->copy_dma((rdev), (s), (d), (np), (f))
 #define radeon_copy(rdev, s, d, np, f) (rdev)->asic->copy((rdev), (s), (d), (np), (f))

+ 129 - 51
drivers/gpu/drm/radeon/radeon_asic.c

@@ -138,14 +138,18 @@ static struct radeon_asic r100_asic = {
 	.asic_reset = &r100_asic_reset,
 	.gart_tlb_flush = &r100_pci_gart_tlb_flush,
 	.gart_set_page = &r100_pci_gart_set_page,
-	.cp_commit = &r100_cp_commit,
 	.ring_start = &r100_ring_start,
 	.ring_test = &r100_ring_test,
-	.ring_ib_execute = &r100_ring_ib_execute,
+	.ring = {
+		[RADEON_RING_TYPE_GFX_INDEX] = {
+			.ib_execute = &r100_ring_ib_execute,
+			.emit_fence = &r100_fence_ring_emit,
+			.emit_semaphore = &r100_semaphore_ring_emit,
+		}
+	},
 	.irq_set = &r100_irq_set,
 	.irq_process = &r100_irq_process,
 	.get_vblank_counter = &r100_get_vblank_counter,
-	.fence_ring_emit = &r100_fence_ring_emit,
 	.cs_parse = &r100_cs_parse,
 	.copy_blit = &r100_copy_blit,
 	.copy_dma = NULL,
@@ -186,14 +190,18 @@ static struct radeon_asic r200_asic = {
 	.asic_reset = &r100_asic_reset,
 	.gart_tlb_flush = &r100_pci_gart_tlb_flush,
 	.gart_set_page = &r100_pci_gart_set_page,
-	.cp_commit = &r100_cp_commit,
 	.ring_start = &r100_ring_start,
 	.ring_test = &r100_ring_test,
-	.ring_ib_execute = &r100_ring_ib_execute,
+	.ring = {
+		[RADEON_RING_TYPE_GFX_INDEX] = {
+			.ib_execute = &r100_ring_ib_execute,
+			.emit_fence = &r100_fence_ring_emit,
+			.emit_semaphore = &r100_semaphore_ring_emit,
+		}
+	},
 	.irq_set = &r100_irq_set,
 	.irq_process = &r100_irq_process,
 	.get_vblank_counter = &r100_get_vblank_counter,
-	.fence_ring_emit = &r100_fence_ring_emit,
 	.cs_parse = &r100_cs_parse,
 	.copy_blit = &r100_copy_blit,
 	.copy_dma = &r200_copy_dma,
@@ -233,14 +241,18 @@ static struct radeon_asic r300_asic = {
 	.asic_reset = &r300_asic_reset,
 	.gart_tlb_flush = &r100_pci_gart_tlb_flush,
 	.gart_set_page = &r100_pci_gart_set_page,
-	.cp_commit = &r100_cp_commit,
 	.ring_start = &r300_ring_start,
 	.ring_test = &r100_ring_test,
-	.ring_ib_execute = &r100_ring_ib_execute,
+	.ring = {
+		[RADEON_RING_TYPE_GFX_INDEX] = {
+			.ib_execute = &r100_ring_ib_execute,
+			.emit_fence = &r300_fence_ring_emit,
+			.emit_semaphore = &r100_semaphore_ring_emit,
+		}
+	},
 	.irq_set = &r100_irq_set,
 	.irq_process = &r100_irq_process,
 	.get_vblank_counter = &r100_get_vblank_counter,
-	.fence_ring_emit = &r300_fence_ring_emit,
 	.cs_parse = &r300_cs_parse,
 	.copy_blit = &r100_copy_blit,
 	.copy_dma = &r200_copy_dma,
@@ -281,14 +293,18 @@ static struct radeon_asic r300_asic_pcie = {
 	.asic_reset = &r300_asic_reset,
 	.gart_tlb_flush = &rv370_pcie_gart_tlb_flush,
 	.gart_set_page = &rv370_pcie_gart_set_page,
-	.cp_commit = &r100_cp_commit,
 	.ring_start = &r300_ring_start,
 	.ring_test = &r100_ring_test,
-	.ring_ib_execute = &r100_ring_ib_execute,
+	.ring = {
+		[RADEON_RING_TYPE_GFX_INDEX] = {
+			.ib_execute = &r100_ring_ib_execute,
+			.emit_fence = &r300_fence_ring_emit,
+			.emit_semaphore = &r100_semaphore_ring_emit,
+		}
+	},
 	.irq_set = &r100_irq_set,
 	.irq_process = &r100_irq_process,
 	.get_vblank_counter = &r100_get_vblank_counter,
-	.fence_ring_emit = &r300_fence_ring_emit,
 	.cs_parse = &r300_cs_parse,
 	.copy_blit = &r100_copy_blit,
 	.copy_dma = &r200_copy_dma,
@@ -328,14 +344,18 @@ static struct radeon_asic r420_asic = {
 	.asic_reset = &r300_asic_reset,
 	.gart_tlb_flush = &rv370_pcie_gart_tlb_flush,
 	.gart_set_page = &rv370_pcie_gart_set_page,
-	.cp_commit = &r100_cp_commit,
 	.ring_start = &r300_ring_start,
 	.ring_test = &r100_ring_test,
-	.ring_ib_execute = &r100_ring_ib_execute,
+	.ring = {
+		[RADEON_RING_TYPE_GFX_INDEX] = {
+			.ib_execute = &r100_ring_ib_execute,
+			.emit_fence = &r300_fence_ring_emit,
+			.emit_semaphore = &r100_semaphore_ring_emit,
+		}
+	},
 	.irq_set = &r100_irq_set,
 	.irq_process = &r100_irq_process,
 	.get_vblank_counter = &r100_get_vblank_counter,
-	.fence_ring_emit = &r300_fence_ring_emit,
 	.cs_parse = &r300_cs_parse,
 	.copy_blit = &r100_copy_blit,
 	.copy_dma = &r200_copy_dma,
@@ -376,14 +396,18 @@ static struct radeon_asic rs400_asic = {
 	.asic_reset = &r300_asic_reset,
 	.gart_tlb_flush = &rs400_gart_tlb_flush,
 	.gart_set_page = &rs400_gart_set_page,
-	.cp_commit = &r100_cp_commit,
 	.ring_start = &r300_ring_start,
 	.ring_test = &r100_ring_test,
-	.ring_ib_execute = &r100_ring_ib_execute,
+	.ring = {
+		[RADEON_RING_TYPE_GFX_INDEX] = {
+			.ib_execute = &r100_ring_ib_execute,
+			.emit_fence = &r300_fence_ring_emit,
+			.emit_semaphore = &r100_semaphore_ring_emit,
+		}
+	},
 	.irq_set = &r100_irq_set,
 	.irq_process = &r100_irq_process,
 	.get_vblank_counter = &r100_get_vblank_counter,
-	.fence_ring_emit = &r300_fence_ring_emit,
 	.cs_parse = &r300_cs_parse,
 	.copy_blit = &r100_copy_blit,
 	.copy_dma = &r200_copy_dma,
@@ -424,14 +448,18 @@ static struct radeon_asic rs600_asic = {
 	.asic_reset = &rs600_asic_reset,
 	.gart_tlb_flush = &rs600_gart_tlb_flush,
 	.gart_set_page = &rs600_gart_set_page,
-	.cp_commit = &r100_cp_commit,
 	.ring_start = &r300_ring_start,
 	.ring_test = &r100_ring_test,
-	.ring_ib_execute = &r100_ring_ib_execute,
+	.ring = {
+		[RADEON_RING_TYPE_GFX_INDEX] = {
+			.ib_execute = &r100_ring_ib_execute,
+			.emit_fence = &r300_fence_ring_emit,
+			.emit_semaphore = &r100_semaphore_ring_emit,
+		}
+	},
 	.irq_set = &rs600_irq_set,
 	.irq_process = &rs600_irq_process,
 	.get_vblank_counter = &rs600_get_vblank_counter,
-	.fence_ring_emit = &r300_fence_ring_emit,
 	.cs_parse = &r300_cs_parse,
 	.copy_blit = &r100_copy_blit,
 	.copy_dma = &r200_copy_dma,
@@ -472,14 +500,18 @@ static struct radeon_asic rs690_asic = {
 	.asic_reset = &rs600_asic_reset,
 	.gart_tlb_flush = &rs400_gart_tlb_flush,
 	.gart_set_page = &rs400_gart_set_page,
-	.cp_commit = &r100_cp_commit,
 	.ring_start = &r300_ring_start,
 	.ring_test = &r100_ring_test,
-	.ring_ib_execute = &r100_ring_ib_execute,
+	.ring = {
+		[RADEON_RING_TYPE_GFX_INDEX] = {
+			.ib_execute = &r100_ring_ib_execute,
+			.emit_fence = &r300_fence_ring_emit,
+			.emit_semaphore = &r100_semaphore_ring_emit,
+		}
+	},
 	.irq_set = &rs600_irq_set,
 	.irq_process = &rs600_irq_process,
 	.get_vblank_counter = &rs600_get_vblank_counter,
-	.fence_ring_emit = &r300_fence_ring_emit,
 	.cs_parse = &r300_cs_parse,
 	.copy_blit = &r100_copy_blit,
 	.copy_dma = &r200_copy_dma,
@@ -520,14 +552,18 @@ static struct radeon_asic rv515_asic = {
 	.asic_reset = &rs600_asic_reset,
 	.gart_tlb_flush = &rv370_pcie_gart_tlb_flush,
 	.gart_set_page = &rv370_pcie_gart_set_page,
-	.cp_commit = &r100_cp_commit,
 	.ring_start = &rv515_ring_start,
 	.ring_test = &r100_ring_test,
-	.ring_ib_execute = &r100_ring_ib_execute,
+	.ring = {
+		[RADEON_RING_TYPE_GFX_INDEX] = {
+			.ib_execute = &r100_ring_ib_execute,
+			.emit_fence = &r300_fence_ring_emit,
+			.emit_semaphore = &r100_semaphore_ring_emit,
+		}
+	},
 	.irq_set = &rs600_irq_set,
 	.irq_process = &rs600_irq_process,
 	.get_vblank_counter = &rs600_get_vblank_counter,
-	.fence_ring_emit = &r300_fence_ring_emit,
 	.cs_parse = &r300_cs_parse,
 	.copy_blit = &r100_copy_blit,
 	.copy_dma = &r200_copy_dma,
@@ -568,14 +604,18 @@ static struct radeon_asic r520_asic = {
 	.asic_reset = &rs600_asic_reset,
 	.gart_tlb_flush = &rv370_pcie_gart_tlb_flush,
 	.gart_set_page = &rv370_pcie_gart_set_page,
-	.cp_commit = &r100_cp_commit,
 	.ring_start = &rv515_ring_start,
 	.ring_test = &r100_ring_test,
-	.ring_ib_execute = &r100_ring_ib_execute,
+	.ring = {
+		[RADEON_RING_TYPE_GFX_INDEX] = {
+			.ib_execute = &r100_ring_ib_execute,
+			.emit_fence = &r300_fence_ring_emit,
+			.emit_semaphore = &r100_semaphore_ring_emit,
+		}
+	},
 	.irq_set = &rs600_irq_set,
 	.irq_process = &rs600_irq_process,
 	.get_vblank_counter = &rs600_get_vblank_counter,
-	.fence_ring_emit = &r300_fence_ring_emit,
 	.cs_parse = &r300_cs_parse,
 	.copy_blit = &r100_copy_blit,
 	.copy_dma = &r200_copy_dma,
@@ -611,18 +651,22 @@ static struct radeon_asic r600_asic = {
 	.fini = &r600_fini,
 	.suspend = &r600_suspend,
 	.resume = &r600_resume,
-	.cp_commit = &r600_cp_commit,
 	.vga_set_state = &r600_vga_set_state,
 	.gpu_is_lockup = &r600_gpu_is_lockup,
 	.asic_reset = &r600_asic_reset,
 	.gart_tlb_flush = &r600_pcie_gart_tlb_flush,
 	.gart_set_page = &rs600_gart_set_page,
 	.ring_test = &r600_ring_test,
-	.ring_ib_execute = &r600_ring_ib_execute,
+	.ring = {
+		[RADEON_RING_TYPE_GFX_INDEX] = {
+			.ib_execute = &r600_ring_ib_execute,
+			.emit_fence = &r600_fence_ring_emit,
+			.emit_semaphore = &r600_semaphore_ring_emit,
+		}
+	},
 	.irq_set = &r600_irq_set,
 	.irq_process = &r600_irq_process,
 	.get_vblank_counter = &rs600_get_vblank_counter,
-	.fence_ring_emit = &r600_fence_ring_emit,
 	.cs_parse = &r600_cs_parse,
 	.copy_blit = &r600_copy_blit,
 	.copy_dma = NULL,
@@ -658,18 +702,22 @@ static struct radeon_asic rs780_asic = {
 	.fini = &r600_fini,
 	.suspend = &r600_suspend,
 	.resume = &r600_resume,
-	.cp_commit = &r600_cp_commit,
 	.gpu_is_lockup = &r600_gpu_is_lockup,
 	.vga_set_state = &r600_vga_set_state,
 	.asic_reset = &r600_asic_reset,
 	.gart_tlb_flush = &r600_pcie_gart_tlb_flush,
 	.gart_set_page = &rs600_gart_set_page,
 	.ring_test = &r600_ring_test,
-	.ring_ib_execute = &r600_ring_ib_execute,
+	.ring = {
+		[RADEON_RING_TYPE_GFX_INDEX] = {
+			.ib_execute = &r600_ring_ib_execute,
+			.emit_fence = &r600_fence_ring_emit,
+			.emit_semaphore = &r600_semaphore_ring_emit,
+		}
+	},
 	.irq_set = &r600_irq_set,
 	.irq_process = &r600_irq_process,
 	.get_vblank_counter = &rs600_get_vblank_counter,
-	.fence_ring_emit = &r600_fence_ring_emit,
 	.cs_parse = &r600_cs_parse,
 	.copy_blit = &r600_copy_blit,
 	.copy_dma = NULL,
@@ -705,18 +753,22 @@ static struct radeon_asic rv770_asic = {
 	.fini = &rv770_fini,
 	.suspend = &rv770_suspend,
 	.resume = &rv770_resume,
-	.cp_commit = &r600_cp_commit,
 	.asic_reset = &r600_asic_reset,
 	.gpu_is_lockup = &r600_gpu_is_lockup,
 	.vga_set_state = &r600_vga_set_state,
 	.gart_tlb_flush = &r600_pcie_gart_tlb_flush,
 	.gart_set_page = &rs600_gart_set_page,
 	.ring_test = &r600_ring_test,
-	.ring_ib_execute = &r600_ring_ib_execute,
+	.ring = {
+		[RADEON_RING_TYPE_GFX_INDEX] = {
+			.ib_execute = &r600_ring_ib_execute,
+			.emit_fence = &r600_fence_ring_emit,
+			.emit_semaphore = &r600_semaphore_ring_emit,
+		}
+	},
 	.irq_set = &r600_irq_set,
 	.irq_process = &r600_irq_process,
 	.get_vblank_counter = &rs600_get_vblank_counter,
-	.fence_ring_emit = &r600_fence_ring_emit,
 	.cs_parse = &r600_cs_parse,
 	.copy_blit = &r600_copy_blit,
 	.copy_dma = NULL,
@@ -752,18 +804,22 @@ static struct radeon_asic evergreen_asic = {
 	.fini = &evergreen_fini,
 	.suspend = &evergreen_suspend,
 	.resume = &evergreen_resume,
-	.cp_commit = &r600_cp_commit,
 	.gpu_is_lockup = &evergreen_gpu_is_lockup,
 	.asic_reset = &evergreen_asic_reset,
 	.vga_set_state = &r600_vga_set_state,
 	.gart_tlb_flush = &evergreen_pcie_gart_tlb_flush,
 	.gart_set_page = &rs600_gart_set_page,
 	.ring_test = &r600_ring_test,
-	.ring_ib_execute = &evergreen_ring_ib_execute,
+	.ring = {
+		[RADEON_RING_TYPE_GFX_INDEX] = {
+			.ib_execute = &evergreen_ring_ib_execute,
+			.emit_fence = &r600_fence_ring_emit,
+			.emit_semaphore = &r600_semaphore_ring_emit,
+		}
+	},
 	.irq_set = &evergreen_irq_set,
 	.irq_process = &evergreen_irq_process,
 	.get_vblank_counter = &evergreen_get_vblank_counter,
-	.fence_ring_emit = &r600_fence_ring_emit,
 	.cs_parse = &evergreen_cs_parse,
 	.copy_blit = &r600_copy_blit,
 	.copy_dma = NULL,
@@ -799,18 +855,22 @@ static struct radeon_asic sumo_asic = {
 	.fini = &evergreen_fini,
 	.suspend = &evergreen_suspend,
 	.resume = &evergreen_resume,
-	.cp_commit = &r600_cp_commit,
 	.gpu_is_lockup = &evergreen_gpu_is_lockup,
 	.asic_reset = &evergreen_asic_reset,
 	.vga_set_state = &r600_vga_set_state,
 	.gart_tlb_flush = &evergreen_pcie_gart_tlb_flush,
 	.gart_set_page = &rs600_gart_set_page,
 	.ring_test = &r600_ring_test,
-	.ring_ib_execute = &evergreen_ring_ib_execute,
+	.ring = {
+		[RADEON_RING_TYPE_GFX_INDEX] = {
+			.ib_execute = &evergreen_ring_ib_execute,
+			.emit_fence = &r600_fence_ring_emit,
+			.emit_semaphore = &r600_semaphore_ring_emit,
+		}
+	},
 	.irq_set = &evergreen_irq_set,
 	.irq_process = &evergreen_irq_process,
 	.get_vblank_counter = &evergreen_get_vblank_counter,
-	.fence_ring_emit = &r600_fence_ring_emit,
 	.cs_parse = &evergreen_cs_parse,
 	.copy_blit = &r600_copy_blit,
 	.copy_dma = NULL,
@@ -846,18 +906,22 @@ static struct radeon_asic btc_asic = {
 	.fini = &evergreen_fini,
 	.suspend = &evergreen_suspend,
 	.resume = &evergreen_resume,
-	.cp_commit = &r600_cp_commit,
 	.gpu_is_lockup = &evergreen_gpu_is_lockup,
 	.asic_reset = &evergreen_asic_reset,
 	.vga_set_state = &r600_vga_set_state,
 	.gart_tlb_flush = &evergreen_pcie_gart_tlb_flush,
 	.gart_set_page = &rs600_gart_set_page,
 	.ring_test = &r600_ring_test,
-	.ring_ib_execute = &evergreen_ring_ib_execute,
+	.ring = {
+		[RADEON_RING_TYPE_GFX_INDEX] = {
+			.ib_execute = &evergreen_ring_ib_execute,
+			.emit_fence = &r600_fence_ring_emit,
+			.emit_semaphore = &r600_semaphore_ring_emit,
+		}
+	},
 	.irq_set = &evergreen_irq_set,
 	.irq_process = &evergreen_irq_process,
 	.get_vblank_counter = &evergreen_get_vblank_counter,
-	.fence_ring_emit = &r600_fence_ring_emit,
 	.cs_parse = &evergreen_cs_parse,
 	.copy_blit = &r600_copy_blit,
 	.copy_dma = NULL,
@@ -893,18 +957,32 @@ static struct radeon_asic cayman_asic = {
 	.fini = &cayman_fini,
 	.suspend = &cayman_suspend,
 	.resume = &cayman_resume,
-	.cp_commit = &r600_cp_commit,
 	.gpu_is_lockup = &cayman_gpu_is_lockup,
 	.asic_reset = &cayman_asic_reset,
 	.vga_set_state = &r600_vga_set_state,
 	.gart_tlb_flush = &cayman_pcie_gart_tlb_flush,
 	.gart_set_page = &rs600_gart_set_page,
 	.ring_test = &r600_ring_test,
-	.ring_ib_execute = &evergreen_ring_ib_execute,
+	.ring = {
+		[RADEON_RING_TYPE_GFX_INDEX] = {
+			.ib_execute = &evergreen_ring_ib_execute,
+			.emit_fence = &cayman_fence_ring_emit,
+			.emit_semaphore = &r600_semaphore_ring_emit,
+		},
+		[CAYMAN_RING_TYPE_CP1_INDEX] = {
+			.ib_execute = &r600_ring_ib_execute,
+			.emit_fence = &cayman_fence_ring_emit,
+			.emit_semaphore = &r600_semaphore_ring_emit,
+		},
+		[CAYMAN_RING_TYPE_CP2_INDEX] = {
+			.ib_execute = &r600_ring_ib_execute,
+			.emit_fence = &cayman_fence_ring_emit,
+			.emit_semaphore = &r600_semaphore_ring_emit,
+		}
+	},
 	.irq_set = &evergreen_irq_set,
 	.irq_process = &evergreen_irq_process,
 	.get_vblank_counter = &evergreen_get_vblank_counter,
-	.fence_ring_emit = &r600_fence_ring_emit,
 	.cs_parse = &evergreen_cs_parse,
 	.copy_blit = &r600_copy_blit,
 	.copy_dma = NULL,

+ 22 - 14
drivers/gpu/drm/radeon/radeon_asic.h

@@ -58,17 +58,20 @@ void r100_fini(struct radeon_device *rdev);
 int r100_suspend(struct radeon_device *rdev);
 int r100_resume(struct radeon_device *rdev);
 void r100_vga_set_state(struct radeon_device *rdev, bool state);
-bool r100_gpu_is_lockup(struct radeon_device *rdev);
+bool r100_gpu_is_lockup(struct radeon_device *rdev, struct radeon_ring *cp);
 int r100_asic_reset(struct radeon_device *rdev);
 u32 r100_get_vblank_counter(struct radeon_device *rdev, int crtc);
 void r100_pci_gart_tlb_flush(struct radeon_device *rdev);
 int r100_pci_gart_set_page(struct radeon_device *rdev, int i, uint64_t addr);
-void r100_cp_commit(struct radeon_device *rdev);
 void r100_ring_start(struct radeon_device *rdev);
 int r100_irq_set(struct radeon_device *rdev);
 int r100_irq_process(struct radeon_device *rdev);
 void r100_fence_ring_emit(struct radeon_device *rdev,
 			  struct radeon_fence *fence);
+void r100_semaphore_ring_emit(struct radeon_device *rdev,
+			      struct radeon_ring *cp,
+			      struct radeon_semaphore *semaphore,
+			      bool emit_wait);
 int r100_cs_parse(struct radeon_cs_parser *p);
 void r100_pll_wreg(struct radeon_device *rdev, uint32_t reg, uint32_t v);
 uint32_t r100_pll_rreg(struct radeon_device *rdev, uint32_t reg);
@@ -83,7 +86,7 @@ int r100_set_surface_reg(struct radeon_device *rdev, int reg,
 void r100_clear_surface_reg(struct radeon_device *rdev, int reg);
 void r100_bandwidth_update(struct radeon_device *rdev);
 void r100_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib);
-int r100_ring_test(struct radeon_device *rdev);
+int r100_ring_test(struct radeon_device *rdev, struct radeon_ring *cp);
 void r100_hpd_init(struct radeon_device *rdev);
 void r100_hpd_fini(struct radeon_device *rdev);
 bool r100_hpd_sense(struct radeon_device *rdev, enum radeon_hpd_id hpd);
@@ -101,12 +104,12 @@ void r100_pci_gart_disable(struct radeon_device *rdev);
 int r100_debugfs_mc_info_init(struct radeon_device *rdev);
 int r100_gui_wait_for_idle(struct radeon_device *rdev);
 void r100_gpu_lockup_update(struct r100_gpu_lockup *lockup,
-			    struct radeon_cp *cp);
+			    struct radeon_ring *cp);
 bool r100_gpu_cp_is_lockup(struct radeon_device *rdev,
 			   struct r100_gpu_lockup *lockup,
-			   struct radeon_cp *cp);
+			   struct radeon_ring *cp);
 void r100_ib_fini(struct radeon_device *rdev);
-int r100_ib_init(struct radeon_device *rdev);
+int r100_ib_test(struct radeon_device *rdev);
 void r100_irq_disable(struct radeon_device *rdev);
 void r100_mc_stop(struct radeon_device *rdev, struct r100_mc_save *save);
 void r100_mc_resume(struct radeon_device *rdev, struct r100_mc_save *save);
@@ -154,7 +157,7 @@ extern int r300_init(struct radeon_device *rdev);
 extern void r300_fini(struct radeon_device *rdev);
 extern int r300_suspend(struct radeon_device *rdev);
 extern int r300_resume(struct radeon_device *rdev);
-extern bool r300_gpu_is_lockup(struct radeon_device *rdev);
+extern bool r300_gpu_is_lockup(struct radeon_device *rdev, struct radeon_ring *cp);
 extern int r300_asic_reset(struct radeon_device *rdev);
 extern void r300_ring_start(struct radeon_device *rdev);
 extern void r300_fence_ring_emit(struct radeon_device *rdev,
@@ -293,22 +296,25 @@ int r600_resume(struct radeon_device *rdev);
 void r600_vga_set_state(struct radeon_device *rdev, bool state);
 int r600_wb_init(struct radeon_device *rdev);
 void r600_wb_fini(struct radeon_device *rdev);
-void r600_cp_commit(struct radeon_device *rdev);
 void r600_pcie_gart_tlb_flush(struct radeon_device *rdev);
 uint32_t r600_pciep_rreg(struct radeon_device *rdev, uint32_t reg);
 void r600_pciep_wreg(struct radeon_device *rdev, uint32_t reg, uint32_t v);
 int r600_cs_parse(struct radeon_cs_parser *p);
 void r600_fence_ring_emit(struct radeon_device *rdev,
 			  struct radeon_fence *fence);
-bool r600_gpu_is_lockup(struct radeon_device *rdev);
+void r600_semaphore_ring_emit(struct radeon_device *rdev,
+			      struct radeon_ring *cp,
+			      struct radeon_semaphore *semaphore,
+			      bool emit_wait);
+bool r600_gpu_is_lockup(struct radeon_device *rdev, struct radeon_ring *cp);
 int r600_asic_reset(struct radeon_device *rdev);
 int r600_set_surface_reg(struct radeon_device *rdev, int reg,
 			 uint32_t tiling_flags, uint32_t pitch,
 			 uint32_t offset, uint32_t obj_size);
 void r600_clear_surface_reg(struct radeon_device *rdev, int reg);
-int r600_ib_test(struct radeon_device *rdev);
+int r600_ib_test(struct radeon_device *rdev, int ring);
 void r600_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib);
-int r600_ring_test(struct radeon_device *rdev);
+int r600_ring_test(struct radeon_device *rdev, struct radeon_ring *cp);
 int r600_copy_blit(struct radeon_device *rdev,
 		   uint64_t src_offset, uint64_t dst_offset,
 		   unsigned num_gpu_pages, struct radeon_fence *fence);
@@ -328,7 +334,7 @@ extern int r600_get_pcie_lanes(struct radeon_device *rdev);
 bool r600_card_posted(struct radeon_device *rdev);
 void r600_cp_stop(struct radeon_device *rdev);
 int r600_cp_start(struct radeon_device *rdev);
-void r600_ring_init(struct radeon_device *rdev, unsigned ring_size);
+void r600_ring_init(struct radeon_device *rdev, struct radeon_ring *cp, unsigned ring_size);
 int r600_cp_resume(struct radeon_device *rdev);
 void r600_cp_fini(struct radeon_device *rdev);
 int r600_count_pipe_bits(uint32_t val);
@@ -397,7 +403,7 @@ int evergreen_init(struct radeon_device *rdev);
 void evergreen_fini(struct radeon_device *rdev);
 int evergreen_suspend(struct radeon_device *rdev);
 int evergreen_resume(struct radeon_device *rdev);
-bool evergreen_gpu_is_lockup(struct radeon_device *rdev);
+bool evergreen_gpu_is_lockup(struct radeon_device *rdev, struct radeon_ring *cp);
 int evergreen_asic_reset(struct radeon_device *rdev);
 void evergreen_bandwidth_update(struct radeon_device *rdev);
 void evergreen_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib);
@@ -423,12 +429,14 @@ int evergreen_blit_init(struct radeon_device *rdev);
 /*
  * cayman
  */
+void cayman_fence_ring_emit(struct radeon_device *rdev,
+			    struct radeon_fence *fence);
 void cayman_pcie_gart_tlb_flush(struct radeon_device *rdev);
 int cayman_init(struct radeon_device *rdev);
 void cayman_fini(struct radeon_device *rdev);
 int cayman_suspend(struct radeon_device *rdev);
 int cayman_resume(struct radeon_device *rdev);
-bool cayman_gpu_is_lockup(struct radeon_device *rdev);
+bool cayman_gpu_is_lockup(struct radeon_device *rdev, struct radeon_ring *cp);
 int cayman_asic_reset(struct radeon_device *rdev);
 
 #endif

+ 1 - 1
drivers/gpu/drm/radeon/radeon_benchmark.c

@@ -43,7 +43,7 @@ static int radeon_benchmark_do_move(struct radeon_device *rdev, unsigned size,
 
 	start_jiffies = jiffies;
 	for (i = 0; i < n; i++) {
-		r = radeon_fence_create(rdev, &fence);
+		r = radeon_fence_create(rdev, &fence, RADEON_RING_TYPE_GFX_INDEX);
 		if (r)
 			return r;
 

+ 4 - 3
drivers/gpu/drm/radeon/radeon_cs.c

@@ -58,7 +58,7 @@ int radeon_cs_parser_relocs(struct radeon_cs_parser *p)
 
 		duplicate = false;
 		r = (struct drm_radeon_cs_reloc *)&chunk->kdata[i*4];
-		for (j = 0; j < p->nrelocs; j++) {
+		for (j = 0; j < i; j++) {
 			if (r->handle == p->relocs[j].handle) {
 				p->relocs_ptr[i] = &p->relocs[j];
 				duplicate = true;
@@ -84,7 +84,8 @@ int radeon_cs_parser_relocs(struct radeon_cs_parser *p)
 			p->relocs[i].flags = r->flags;
 			radeon_bo_list_add_object(&p->relocs[i].lobj,
 						  &p->validated);
-		}
+		} else
+			p->relocs[i].handle = 0;
 	}
 	return radeon_bo_list_validate(&p->validated);
 }
@@ -245,7 +246,7 @@ int radeon_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
 		radeon_mutex_unlock(&rdev->cs_mutex);
 		return r;
 	}
-	r =  radeon_ib_get(rdev, &parser.ib);
+	r =  radeon_ib_get(rdev, RADEON_RING_TYPE_GFX_INDEX, &parser.ib);
 	if (r) {
 		DRM_ERROR("Failed to get ib !\n");
 		radeon_cs_parser_fini(&parser, r);

+ 37 - 24
drivers/gpu/drm/radeon/radeon_device.c

@@ -718,17 +718,20 @@ int radeon_device_init(struct radeon_device *rdev,
 	 * can recall function without having locking issues */
 	radeon_mutex_init(&rdev->cs_mutex);
 	mutex_init(&rdev->ib_pool.mutex);
-	mutex_init(&rdev->cp.mutex);
+	for (i = 0; i < RADEON_NUM_RINGS; ++i)
+		mutex_init(&rdev->ring[i].mutex);
 	mutex_init(&rdev->dc_hw_i2c_mutex);
 	if (rdev->family >= CHIP_R600)
 		spin_lock_init(&rdev->ih.lock);
 	mutex_init(&rdev->gem.mutex);
 	mutex_init(&rdev->pm.mutex);
 	mutex_init(&rdev->vram_mutex);
-	rwlock_init(&rdev->fence_drv.lock);
+	rwlock_init(&rdev->fence_lock);
+	rwlock_init(&rdev->semaphore_drv.lock);
 	INIT_LIST_HEAD(&rdev->gem.objects);
 	init_waitqueue_head(&rdev->irq.vblank_queue);
 	init_waitqueue_head(&rdev->irq.idle_queue);
+	INIT_LIST_HEAD(&rdev->semaphore_drv.free);
 
 	/* Set asic functions */
 	r = radeon_asic_init(rdev);
@@ -820,15 +823,20 @@ int radeon_device_init(struct radeon_device *rdev,
 		if (r)
 			return r;
 	}
-	if (radeon_testing) {
+	if ((radeon_testing & 1)) {
 		radeon_test_moves(rdev);
 	}
+	if ((radeon_testing & 2)) {
+		radeon_test_syncing(rdev);
+	}
 	if (radeon_benchmarking) {
 		radeon_benchmark(rdev, radeon_benchmarking);
 	}
 	return 0;
 }
 
+static void radeon_debugfs_remove_files(struct radeon_device *rdev);
+
 void radeon_device_fini(struct radeon_device *rdev)
 {
 	DRM_INFO("radeon: finishing device.\n");
@@ -843,6 +851,7 @@ void radeon_device_fini(struct radeon_device *rdev)
 	rdev->rio_mem = NULL;
 	iounmap(rdev->rmmio);
 	rdev->rmmio = NULL;
+	radeon_debugfs_remove_files(rdev);
 }
 
 
@@ -854,7 +863,7 @@ int radeon_suspend_kms(struct drm_device *dev, pm_message_t state)
 	struct radeon_device *rdev;
 	struct drm_crtc *crtc;
 	struct drm_connector *connector;
-	int r;
+	int i, r;
 
 	if (dev == NULL || dev->dev_private == NULL) {
 		return -ENODEV;
@@ -893,7 +902,8 @@ int radeon_suspend_kms(struct drm_device *dev, pm_message_t state)
 	/* evict vram memory */
 	radeon_bo_evict_vram(rdev);
 	/* wait for gpu to finish processing current batch */
-	radeon_fence_wait_last(rdev);
+	for (i = 0; i < RADEON_NUM_RINGS; i++)
+		radeon_fence_wait_last(rdev, i);
 
 	radeon_save_bios_scratch_regs(rdev);
 
@@ -992,36 +1002,29 @@ int radeon_gpu_reset(struct radeon_device *rdev)
 /*
  * Debugfs
  */
-struct radeon_debugfs {
-	struct drm_info_list	*files;
-	unsigned		num_files;
-};
-static struct radeon_debugfs _radeon_debugfs[RADEON_DEBUGFS_MAX_COMPONENTS];
-static unsigned _radeon_debugfs_count = 0;
-
 int radeon_debugfs_add_files(struct radeon_device *rdev,
 			     struct drm_info_list *files,
 			     unsigned nfiles)
 {
 	unsigned i;
 
-	for (i = 0; i < _radeon_debugfs_count; i++) {
-		if (_radeon_debugfs[i].files == files) {
+	for (i = 0; i < rdev->debugfs_count; i++) {
+		if (rdev->debugfs[i].files == files) {
 			/* Already registered */
 			return 0;
 		}
 	}
 
-	i = _radeon_debugfs_count + 1;
+	i = rdev->debugfs_count + 1;
 	if (i > RADEON_DEBUGFS_MAX_COMPONENTS) {
 		DRM_ERROR("Reached maximum number of debugfs components.\n");
 		DRM_ERROR("Report so we increase "
 		          "RADEON_DEBUGFS_MAX_COMPONENTS.\n");
 		return -EINVAL;
 	}
-	_radeon_debugfs[_radeon_debugfs_count].files = files;
-	_radeon_debugfs[_radeon_debugfs_count].num_files = nfiles;
-	_radeon_debugfs_count = i;
+	rdev->debugfs[rdev->debugfs_count].files = files;
+	rdev->debugfs[rdev->debugfs_count].num_files = nfiles;
+	rdev->debugfs_count = i;
 #if defined(CONFIG_DEBUG_FS)
 	drm_debugfs_create_files(files, nfiles,
 				 rdev->ddev->control->debugfs_root,
@@ -1033,6 +1036,22 @@ int radeon_debugfs_add_files(struct radeon_device *rdev,
 	return 0;
 }
 
+static void radeon_debugfs_remove_files(struct radeon_device *rdev)
+{
+#if defined(CONFIG_DEBUG_FS)
+	unsigned i;
+
+	for (i = 0; i < rdev->debugfs_count; i++) {
+		drm_debugfs_remove_files(rdev->debugfs[i].files,
+					 rdev->debugfs[i].num_files,
+					 rdev->ddev->control);
+		drm_debugfs_remove_files(rdev->debugfs[i].files,
+					 rdev->debugfs[i].num_files,
+					 rdev->ddev->primary);
+	}
+#endif
+}
+
 #if defined(CONFIG_DEBUG_FS)
 int radeon_debugfs_init(struct drm_minor *minor)
 {
@@ -1041,11 +1060,5 @@ int radeon_debugfs_init(struct drm_minor *minor)
 
 void radeon_debugfs_cleanup(struct drm_minor *minor)
 {
-	unsigned i;
-
-	for (i = 0; i < _radeon_debugfs_count; i++) {
-		drm_debugfs_remove_files(_radeon_debugfs[i].files,
-					 _radeon_debugfs[i].num_files, minor);
-	}
 }
 #endif

+ 183 - 121
drivers/gpu/drm/radeon/radeon_fence.c

@@ -40,32 +40,24 @@
 #include "radeon.h"
 #include "radeon_trace.h"
 
-static void radeon_fence_write(struct radeon_device *rdev, u32 seq)
+static void radeon_fence_write(struct radeon_device *rdev, u32 seq, int ring)
 {
 	if (rdev->wb.enabled) {
-		u32 scratch_index;
-		if (rdev->wb.use_event)
-			scratch_index = R600_WB_EVENT_OFFSET + rdev->fence_drv.scratch_reg - rdev->scratch.reg_base;
-		else
-			scratch_index = RADEON_WB_SCRATCH_OFFSET + rdev->fence_drv.scratch_reg - rdev->scratch.reg_base;
-		rdev->wb.wb[scratch_index/4] = cpu_to_le32(seq);
-	} else
-		WREG32(rdev->fence_drv.scratch_reg, seq);
+		*rdev->fence_drv[ring].cpu_addr = cpu_to_le32(seq);
+	} else {
+		WREG32(rdev->fence_drv[ring].scratch_reg, seq);
+	}
 }
 
-static u32 radeon_fence_read(struct radeon_device *rdev)
+static u32 radeon_fence_read(struct radeon_device *rdev, int ring)
 {
-	u32 seq;
+	u32 seq = 0;
 
 	if (rdev->wb.enabled) {
-		u32 scratch_index;
-		if (rdev->wb.use_event)
-			scratch_index = R600_WB_EVENT_OFFSET + rdev->fence_drv.scratch_reg - rdev->scratch.reg_base;
-		else
-			scratch_index = RADEON_WB_SCRATCH_OFFSET + rdev->fence_drv.scratch_reg - rdev->scratch.reg_base;
-		seq = le32_to_cpu(rdev->wb.wb[scratch_index/4]);
-	} else
-		seq = RREG32(rdev->fence_drv.scratch_reg);
+		seq = le32_to_cpu(*rdev->fence_drv[ring].cpu_addr);
+	} else {
+		seq = RREG32(rdev->fence_drv[ring].scratch_reg);
+	}
 	return seq;
 }
 
@@ -73,28 +65,28 @@ int radeon_fence_emit(struct radeon_device *rdev, struct radeon_fence *fence)
 {
 	unsigned long irq_flags;
 
-	write_lock_irqsave(&rdev->fence_drv.lock, irq_flags);
-	if (fence->emited) {
-		write_unlock_irqrestore(&rdev->fence_drv.lock, irq_flags);
+	write_lock_irqsave(&rdev->fence_lock, irq_flags);
+	if (fence->emitted) {
+		write_unlock_irqrestore(&rdev->fence_lock, irq_flags);
 		return 0;
 	}
-	fence->seq = atomic_add_return(1, &rdev->fence_drv.seq);
-	if (!rdev->cp.ready)
+	fence->seq = atomic_add_return(1, &rdev->fence_drv[fence->ring].seq);
+	if (!rdev->ring[fence->ring].ready)
 		/* FIXME: cp is not running assume everythings is done right
 		 * away
 		 */
-		radeon_fence_write(rdev, fence->seq);
+		radeon_fence_write(rdev, fence->seq, fence->ring);
 	else
-		radeon_fence_ring_emit(rdev, fence);
+		radeon_fence_ring_emit(rdev, fence->ring, fence);
 
 	trace_radeon_fence_emit(rdev->ddev, fence->seq);
-	fence->emited = true;
-	list_move_tail(&fence->list, &rdev->fence_drv.emited);
-	write_unlock_irqrestore(&rdev->fence_drv.lock, irq_flags);
+	fence->emitted = true;
+	list_move_tail(&fence->list, &rdev->fence_drv[fence->ring].emitted);
+	write_unlock_irqrestore(&rdev->fence_lock, irq_flags);
 	return 0;
 }
 
-static bool radeon_fence_poll_locked(struct radeon_device *rdev)
+static bool radeon_fence_poll_locked(struct radeon_device *rdev, int ring)
 {
 	struct radeon_fence *fence;
 	struct list_head *i, *n;
@@ -102,34 +94,34 @@ static bool radeon_fence_poll_locked(struct radeon_device *rdev)
 	bool wake = false;
 	unsigned long cjiffies;
 
-	seq = radeon_fence_read(rdev);
-	if (seq != rdev->fence_drv.last_seq) {
-		rdev->fence_drv.last_seq = seq;
-		rdev->fence_drv.last_jiffies = jiffies;
-		rdev->fence_drv.last_timeout = RADEON_FENCE_JIFFIES_TIMEOUT;
+	seq = radeon_fence_read(rdev, ring);
+	if (seq != rdev->fence_drv[ring].last_seq) {
+		rdev->fence_drv[ring].last_seq = seq;
+		rdev->fence_drv[ring].last_jiffies = jiffies;
+		rdev->fence_drv[ring].last_timeout = RADEON_FENCE_JIFFIES_TIMEOUT;
 	} else {
 		cjiffies = jiffies;
-		if (time_after(cjiffies, rdev->fence_drv.last_jiffies)) {
-			cjiffies -= rdev->fence_drv.last_jiffies;
-			if (time_after(rdev->fence_drv.last_timeout, cjiffies)) {
+		if (time_after(cjiffies, rdev->fence_drv[ring].last_jiffies)) {
+			cjiffies -= rdev->fence_drv[ring].last_jiffies;
+			if (time_after(rdev->fence_drv[ring].last_timeout, cjiffies)) {
 				/* update the timeout */
-				rdev->fence_drv.last_timeout -= cjiffies;
+				rdev->fence_drv[ring].last_timeout -= cjiffies;
 			} else {
 				/* the 500ms timeout is elapsed we should test
 				 * for GPU lockup
 				 */
-				rdev->fence_drv.last_timeout = 1;
+				rdev->fence_drv[ring].last_timeout = 1;
 			}
 		} else {
 			/* wrap around update last jiffies, we will just wait
 			 * a little longer
 			 */
-			rdev->fence_drv.last_jiffies = cjiffies;
+			rdev->fence_drv[ring].last_jiffies = cjiffies;
 		}
 		return false;
 	}
 	n = NULL;
-	list_for_each(i, &rdev->fence_drv.emited) {
+	list_for_each(i, &rdev->fence_drv[ring].emitted) {
 		fence = list_entry(i, struct radeon_fence, list);
 		if (fence->seq == seq) {
 			n = i;
@@ -141,11 +133,11 @@ static bool radeon_fence_poll_locked(struct radeon_device *rdev)
 		i = n;
 		do {
 			n = i->prev;
-			list_move_tail(i, &rdev->fence_drv.signaled);
+			list_move_tail(i, &rdev->fence_drv[ring].signaled);
 			fence = list_entry(i, struct radeon_fence, list);
 			fence->signaled = true;
 			i = n;
-		} while (i != &rdev->fence_drv.emited);
+		} while (i != &rdev->fence_drv[ring].emitted);
 		wake = true;
 	}
 	return wake;
@@ -157,14 +149,16 @@ static void radeon_fence_destroy(struct kref *kref)
         struct radeon_fence *fence;
 
 	fence = container_of(kref, struct radeon_fence, kref);
-	write_lock_irqsave(&fence->rdev->fence_drv.lock, irq_flags);
+	write_lock_irqsave(&fence->rdev->fence_lock, irq_flags);
 	list_del(&fence->list);
-	fence->emited = false;
-	write_unlock_irqrestore(&fence->rdev->fence_drv.lock, irq_flags);
+	fence->emitted = false;
+	write_unlock_irqrestore(&fence->rdev->fence_lock, irq_flags);
 	kfree(fence);
 }
 
-int radeon_fence_create(struct radeon_device *rdev, struct radeon_fence **fence)
+int radeon_fence_create(struct radeon_device *rdev,
+			struct radeon_fence **fence,
+			int ring)
 {
 	unsigned long irq_flags;
 
@@ -174,18 +168,18 @@ int radeon_fence_create(struct radeon_device *rdev, struct radeon_fence **fence)
 	}
 	kref_init(&((*fence)->kref));
 	(*fence)->rdev = rdev;
-	(*fence)->emited = false;
+	(*fence)->emitted = false;
 	(*fence)->signaled = false;
 	(*fence)->seq = 0;
+	(*fence)->ring = ring;
 	INIT_LIST_HEAD(&(*fence)->list);
 
-	write_lock_irqsave(&rdev->fence_drv.lock, irq_flags);
-	list_add_tail(&(*fence)->list, &rdev->fence_drv.created);
-	write_unlock_irqrestore(&rdev->fence_drv.lock, irq_flags);
+	write_lock_irqsave(&rdev->fence_lock, irq_flags);
+	list_add_tail(&(*fence)->list, &rdev->fence_drv[ring].created);
+	write_unlock_irqrestore(&rdev->fence_lock, irq_flags);
 	return 0;
 }
 
-
 bool radeon_fence_signaled(struct radeon_fence *fence)
 {
 	unsigned long irq_flags;
@@ -197,21 +191,21 @@ bool radeon_fence_signaled(struct radeon_fence *fence)
 	if (fence->rdev->gpu_lockup)
 		return true;
 
-	write_lock_irqsave(&fence->rdev->fence_drv.lock, irq_flags);
+	write_lock_irqsave(&fence->rdev->fence_lock, irq_flags);
 	signaled = fence->signaled;
 	/* if we are shuting down report all fence as signaled */
 	if (fence->rdev->shutdown) {
 		signaled = true;
 	}
-	if (!fence->emited) {
-		WARN(1, "Querying an unemited fence : %p !\n", fence);
+	if (!fence->emitted) {
+		WARN(1, "Querying an unemitted fence : %p !\n", fence);
 		signaled = true;
 	}
 	if (!signaled) {
-		radeon_fence_poll_locked(fence->rdev);
+		radeon_fence_poll_locked(fence->rdev, fence->ring);
 		signaled = fence->signaled;
 	}
-	write_unlock_irqrestore(&fence->rdev->fence_drv.lock, irq_flags);
+	write_unlock_irqrestore(&fence->rdev->fence_lock, irq_flags);
 	return signaled;
 }
 
@@ -230,24 +224,24 @@ int radeon_fence_wait(struct radeon_fence *fence, bool intr)
 	if (radeon_fence_signaled(fence)) {
 		return 0;
 	}
-	timeout = rdev->fence_drv.last_timeout;
+	timeout = rdev->fence_drv[fence->ring].last_timeout;
 retry:
 	/* save current sequence used to check for GPU lockup */
-	seq = rdev->fence_drv.last_seq;
+	seq = rdev->fence_drv[fence->ring].last_seq;
 	trace_radeon_fence_wait_begin(rdev->ddev, seq);
 	if (intr) {
-		radeon_irq_kms_sw_irq_get(rdev);
-		r = wait_event_interruptible_timeout(rdev->fence_drv.queue,
+		radeon_irq_kms_sw_irq_get(rdev, fence->ring);
+		r = wait_event_interruptible_timeout(rdev->fence_drv[fence->ring].queue,
 				radeon_fence_signaled(fence), timeout);
-		radeon_irq_kms_sw_irq_put(rdev);
+		radeon_irq_kms_sw_irq_put(rdev, fence->ring);
 		if (unlikely(r < 0)) {
 			return r;
 		}
 	} else {
-		radeon_irq_kms_sw_irq_get(rdev);
-		r = wait_event_timeout(rdev->fence_drv.queue,
+		radeon_irq_kms_sw_irq_get(rdev, fence->ring);
+		r = wait_event_timeout(rdev->fence_drv[fence->ring].queue,
 			 radeon_fence_signaled(fence), timeout);
-		radeon_irq_kms_sw_irq_put(rdev);
+		radeon_irq_kms_sw_irq_put(rdev, fence->ring);
 	}
 	trace_radeon_fence_wait_end(rdev->ddev, seq);
 	if (unlikely(!radeon_fence_signaled(fence))) {
@@ -258,10 +252,11 @@ retry:
 			timeout = r;
 			goto retry;
 		}
-		/* don't protect read access to rdev->fence_drv.last_seq
+		/* don't protect read access to rdev->fence_drv[t].last_seq
 		 * if we experiencing a lockup the value doesn't change
 		 */
-		if (seq == rdev->fence_drv.last_seq && radeon_gpu_is_lockup(rdev)) {
+		if (seq == rdev->fence_drv[fence->ring].last_seq &&
+		    radeon_gpu_is_lockup(rdev, &rdev->ring[fence->ring])) {
 			/* good news we believe it's a lockup */
 			printk(KERN_WARNING "GPU lockup (waiting for 0x%08X last fence id 0x%08X)\n",
 			     fence->seq, seq);
@@ -272,20 +267,20 @@ retry:
 			r = radeon_gpu_reset(rdev);
 			if (r)
 				return r;
-			radeon_fence_write(rdev, fence->seq);
+			radeon_fence_write(rdev, fence->seq, fence->ring);
 			rdev->gpu_lockup = false;
 		}
 		timeout = RADEON_FENCE_JIFFIES_TIMEOUT;
-		write_lock_irqsave(&rdev->fence_drv.lock, irq_flags);
-		rdev->fence_drv.last_timeout = RADEON_FENCE_JIFFIES_TIMEOUT;
-		rdev->fence_drv.last_jiffies = jiffies;
-		write_unlock_irqrestore(&rdev->fence_drv.lock, irq_flags);
+		write_lock_irqsave(&rdev->fence_lock, irq_flags);
+		rdev->fence_drv[fence->ring].last_timeout = RADEON_FENCE_JIFFIES_TIMEOUT;
+		rdev->fence_drv[fence->ring].last_jiffies = jiffies;
+		write_unlock_irqrestore(&rdev->fence_lock, irq_flags);
 		goto retry;
 	}
 	return 0;
 }
 
-int radeon_fence_wait_next(struct radeon_device *rdev)
+int radeon_fence_wait_next(struct radeon_device *rdev, int ring)
 {
 	unsigned long irq_flags;
 	struct radeon_fence *fence;
@@ -294,21 +289,21 @@ int radeon_fence_wait_next(struct radeon_device *rdev)
 	if (rdev->gpu_lockup) {
 		return 0;
 	}
-	write_lock_irqsave(&rdev->fence_drv.lock, irq_flags);
-	if (list_empty(&rdev->fence_drv.emited)) {
-		write_unlock_irqrestore(&rdev->fence_drv.lock, irq_flags);
+	write_lock_irqsave(&rdev->fence_lock, irq_flags);
+	if (list_empty(&rdev->fence_drv[ring].emitted)) {
+		write_unlock_irqrestore(&rdev->fence_lock, irq_flags);
 		return 0;
 	}
-	fence = list_entry(rdev->fence_drv.emited.next,
+	fence = list_entry(rdev->fence_drv[ring].emitted.next,
 			   struct radeon_fence, list);
 	radeon_fence_ref(fence);
-	write_unlock_irqrestore(&rdev->fence_drv.lock, irq_flags);
+	write_unlock_irqrestore(&rdev->fence_lock, irq_flags);
 	r = radeon_fence_wait(fence, false);
 	radeon_fence_unref(&fence);
 	return r;
 }
 
-int radeon_fence_wait_last(struct radeon_device *rdev)
+int radeon_fence_wait_last(struct radeon_device *rdev, int ring)
 {
 	unsigned long irq_flags;
 	struct radeon_fence *fence;
@@ -317,15 +312,15 @@ int radeon_fence_wait_last(struct radeon_device *rdev)
 	if (rdev->gpu_lockup) {
 		return 0;
 	}
-	write_lock_irqsave(&rdev->fence_drv.lock, irq_flags);
-	if (list_empty(&rdev->fence_drv.emited)) {
-		write_unlock_irqrestore(&rdev->fence_drv.lock, irq_flags);
+	write_lock_irqsave(&rdev->fence_lock, irq_flags);
+	if (list_empty(&rdev->fence_drv[ring].emitted)) {
+		write_unlock_irqrestore(&rdev->fence_lock, irq_flags);
 		return 0;
 	}
-	fence = list_entry(rdev->fence_drv.emited.prev,
+	fence = list_entry(rdev->fence_drv[ring].emitted.prev,
 			   struct radeon_fence, list);
 	radeon_fence_ref(fence);
-	write_unlock_irqrestore(&rdev->fence_drv.lock, irq_flags);
+	write_unlock_irqrestore(&rdev->fence_lock, irq_flags);
 	r = radeon_fence_wait(fence, false);
 	radeon_fence_unref(&fence);
 	return r;
@@ -347,39 +342,95 @@ void radeon_fence_unref(struct radeon_fence **fence)
 	}
 }
 
-void radeon_fence_process(struct radeon_device *rdev)
+void radeon_fence_process(struct radeon_device *rdev, int ring)
 {
 	unsigned long irq_flags;
 	bool wake;
 
-	write_lock_irqsave(&rdev->fence_drv.lock, irq_flags);
-	wake = radeon_fence_poll_locked(rdev);
-	write_unlock_irqrestore(&rdev->fence_drv.lock, irq_flags);
+	write_lock_irqsave(&rdev->fence_lock, irq_flags);
+	wake = radeon_fence_poll_locked(rdev, ring);
+	write_unlock_irqrestore(&rdev->fence_lock, irq_flags);
 	if (wake) {
-		wake_up_all(&rdev->fence_drv.queue);
+		wake_up_all(&rdev->fence_drv[ring].queue);
 	}
 }
 
-int radeon_fence_driver_init(struct radeon_device *rdev)
+int radeon_fence_count_emitted(struct radeon_device *rdev, int ring)
+{
+	unsigned long irq_flags;
+	int not_processed = 0;
+
+	read_lock_irqsave(&rdev->fence_lock, irq_flags);
+	if (!rdev->fence_drv[ring].initialized)
+		return 0;
+
+	if (!list_empty(&rdev->fence_drv[ring].emitted)) {
+		struct list_head *ptr;
+		list_for_each(ptr, &rdev->fence_drv[ring].emitted) {
+			/* count up to 3, that's enought info */
+			if (++not_processed >= 3)
+				break;
+		}
+	}
+	read_unlock_irqrestore(&rdev->fence_lock, irq_flags);
+	return not_processed;
+}
+
+int radeon_fence_driver_start_ring(struct radeon_device *rdev, int ring)
 {
 	unsigned long irq_flags;
+	uint64_t index;
 	int r;
 
-	write_lock_irqsave(&rdev->fence_drv.lock, irq_flags);
-	r = radeon_scratch_get(rdev, &rdev->fence_drv.scratch_reg);
-	if (r) {
-		dev_err(rdev->dev, "fence failed to get scratch register\n");
-		write_unlock_irqrestore(&rdev->fence_drv.lock, irq_flags);
-		return r;
+	write_lock_irqsave(&rdev->fence_lock, irq_flags);
+	radeon_scratch_free(rdev, rdev->fence_drv[ring].scratch_reg);
+	if (rdev->wb.use_event) {
+		rdev->fence_drv[ring].scratch_reg = 0;
+		index = R600_WB_EVENT_OFFSET + ring * 4;
+	} else {
+		r = radeon_scratch_get(rdev, &rdev->fence_drv[ring].scratch_reg);
+		if (r) {
+			dev_err(rdev->dev, "fence failed to get scratch register\n");
+			write_unlock_irqrestore(&rdev->fence_lock, irq_flags);
+			return r;
+		}
+		index = RADEON_WB_SCRATCH_OFFSET +
+			rdev->fence_drv[ring].scratch_reg -
+			rdev->scratch.reg_base;
 	}
-	radeon_fence_write(rdev, 0);
-	atomic_set(&rdev->fence_drv.seq, 0);
-	INIT_LIST_HEAD(&rdev->fence_drv.created);
-	INIT_LIST_HEAD(&rdev->fence_drv.emited);
-	INIT_LIST_HEAD(&rdev->fence_drv.signaled);
-	init_waitqueue_head(&rdev->fence_drv.queue);
-	rdev->fence_drv.initialized = true;
-	write_unlock_irqrestore(&rdev->fence_drv.lock, irq_flags);
+	rdev->fence_drv[ring].cpu_addr = &rdev->wb.wb[index/4];
+	rdev->fence_drv[ring].gpu_addr = rdev->wb.gpu_addr + index;
+	radeon_fence_write(rdev, atomic_read(&rdev->fence_drv[ring].seq), ring);
+	rdev->fence_drv[ring].initialized = true;
+	DRM_INFO("fence driver on ring %d use gpu addr 0x%08Lx and cpu addr 0x%p\n",
+		 ring, rdev->fence_drv[ring].gpu_addr, rdev->fence_drv[ring].cpu_addr);
+	write_unlock_irqrestore(&rdev->fence_lock, irq_flags);
+	return 0;
+}
+
+static void radeon_fence_driver_init_ring(struct radeon_device *rdev, int ring)
+{
+	rdev->fence_drv[ring].scratch_reg = -1;
+	rdev->fence_drv[ring].cpu_addr = NULL;
+	rdev->fence_drv[ring].gpu_addr = 0;
+	atomic_set(&rdev->fence_drv[ring].seq, 0);
+	INIT_LIST_HEAD(&rdev->fence_drv[ring].created);
+	INIT_LIST_HEAD(&rdev->fence_drv[ring].emitted);
+	INIT_LIST_HEAD(&rdev->fence_drv[ring].signaled);
+	init_waitqueue_head(&rdev->fence_drv[ring].queue);
+	rdev->fence_drv[ring].initialized = false;
+}
+
+int radeon_fence_driver_init(struct radeon_device *rdev)
+{
+	unsigned long irq_flags;
+	int ring;
+
+	write_lock_irqsave(&rdev->fence_lock, irq_flags);
+	for (ring = 0; ring < RADEON_NUM_RINGS; ring++) {
+		radeon_fence_driver_init_ring(rdev, ring);
+	}
+	write_unlock_irqrestore(&rdev->fence_lock, irq_flags);
 	if (radeon_debugfs_fence_init(rdev)) {
 		dev_err(rdev->dev, "fence debugfs file creation failed\n");
 	}
@@ -389,14 +440,18 @@ int radeon_fence_driver_init(struct radeon_device *rdev)
 void radeon_fence_driver_fini(struct radeon_device *rdev)
 {
 	unsigned long irq_flags;
-
-	if (!rdev->fence_drv.initialized)
-		return;
-	wake_up_all(&rdev->fence_drv.queue);
-	write_lock_irqsave(&rdev->fence_drv.lock, irq_flags);
-	radeon_scratch_free(rdev, rdev->fence_drv.scratch_reg);
-	write_unlock_irqrestore(&rdev->fence_drv.lock, irq_flags);
-	rdev->fence_drv.initialized = false;
+	int ring;
+
+	for (ring = 0; ring < RADEON_NUM_RINGS; ring++) {
+		if (!rdev->fence_drv[ring].initialized)
+			continue;
+		radeon_fence_wait_last(rdev, ring);
+		wake_up_all(&rdev->fence_drv[ring].queue);
+		write_lock_irqsave(&rdev->fence_lock, irq_flags);
+		radeon_scratch_free(rdev, rdev->fence_drv[ring].scratch_reg);
+		write_unlock_irqrestore(&rdev->fence_lock, irq_flags);
+		rdev->fence_drv[ring].initialized = false;
+	}
 }
 
 
@@ -410,14 +465,21 @@ static int radeon_debugfs_fence_info(struct seq_file *m, void *data)
 	struct drm_device *dev = node->minor->dev;
 	struct radeon_device *rdev = dev->dev_private;
 	struct radeon_fence *fence;
-
-	seq_printf(m, "Last signaled fence 0x%08X\n",
-		   radeon_fence_read(rdev));
-	if (!list_empty(&rdev->fence_drv.emited)) {
-		   fence = list_entry(rdev->fence_drv.emited.prev,
-				      struct radeon_fence, list);
-		   seq_printf(m, "Last emited fence %p with 0x%08X\n",
-			      fence,  fence->seq);
+	int i;
+
+	for (i = 0; i < RADEON_NUM_RINGS; ++i) {
+		if (!rdev->fence_drv[i].initialized)
+			continue;
+
+		seq_printf(m, "--- ring %d ---\n", i);
+		seq_printf(m, "Last signaled fence 0x%08X\n",
+			   radeon_fence_read(rdev, i));
+		if (!list_empty(&rdev->fence_drv[i].emitted)) {
+			fence = list_entry(rdev->fence_drv[i].emitted.prev,
+					   struct radeon_fence, list);
+			seq_printf(m, "Last emitted fence %p with 0x%08X\n",
+				   fence,  fence->seq);
+		}
 	}
 	return 0;
 }

+ 4 - 2
drivers/gpu/drm/radeon/radeon_gem.c

@@ -152,6 +152,7 @@ int radeon_gem_info_ioctl(struct drm_device *dev, void *data,
 	struct radeon_device *rdev = dev->dev_private;
 	struct drm_radeon_gem_info *args = data;
 	struct ttm_mem_type_manager *man;
+	unsigned i;
 
 	man = &rdev->mman.bdev.man[TTM_PL_VRAM];
 
@@ -160,8 +161,9 @@ int radeon_gem_info_ioctl(struct drm_device *dev, void *data,
 	if (rdev->stollen_vga_memory)
 		args->vram_visible -= radeon_bo_size(rdev->stollen_vga_memory);
 	args->vram_visible -= radeon_fbdev_total_size(rdev);
-	args->gart_size = rdev->mc.gtt_size - rdev->cp.ring_size - 4096 -
-		RADEON_IB_POOL_SIZE*64*1024;
+	args->gart_size = rdev->mc.gtt_size - 4096 - RADEON_IB_POOL_SIZE*64*1024;
+	for(i = 0; i < RADEON_NUM_RINGS; ++i)
+		args->gart_size -= rdev->ring[i].ring_size;
 	return 0;
 }
 

+ 14 - 10
drivers/gpu/drm/radeon/radeon_irq_kms.c

@@ -65,7 +65,8 @@ void radeon_driver_irq_preinstall_kms(struct drm_device *dev)
 	unsigned i;
 
 	/* Disable *all* interrupts */
-	rdev->irq.sw_int = false;
+	for (i = 0; i < RADEON_NUM_RINGS; i++)
+		rdev->irq.sw_int[i] = false;
 	rdev->irq.gui_idle = false;
 	for (i = 0; i < RADEON_MAX_HPD_PINS; i++)
 		rdev->irq.hpd[i] = false;
@@ -81,9 +82,11 @@ void radeon_driver_irq_preinstall_kms(struct drm_device *dev)
 int radeon_driver_irq_postinstall_kms(struct drm_device *dev)
 {
 	struct radeon_device *rdev = dev->dev_private;
+	unsigned i;
 
 	dev->max_vblank_count = 0x001fffff;
-	rdev->irq.sw_int = true;
+	for (i = 0; i < RADEON_NUM_RINGS; i++)
+		rdev->irq.sw_int[i] = true;
 	radeon_irq_set(rdev);
 	return 0;
 }
@@ -97,7 +100,8 @@ void radeon_driver_irq_uninstall_kms(struct drm_device *dev)
 		return;
 	}
 	/* Disable *all* interrupts */
-	rdev->irq.sw_int = false;
+	for (i = 0; i < RADEON_NUM_RINGS; i++)
+		rdev->irq.sw_int[i] = false;
 	rdev->irq.gui_idle = false;
 	for (i = 0; i < RADEON_MAX_HPD_PINS; i++)
 		rdev->irq.hpd[i] = false;
@@ -194,26 +198,26 @@ void radeon_irq_kms_fini(struct radeon_device *rdev)
 	flush_work_sync(&rdev->hotplug_work);
 }
 
-void radeon_irq_kms_sw_irq_get(struct radeon_device *rdev)
+void radeon_irq_kms_sw_irq_get(struct radeon_device *rdev, int ring)
 {
 	unsigned long irqflags;
 
 	spin_lock_irqsave(&rdev->irq.sw_lock, irqflags);
-	if (rdev->ddev->irq_enabled && (++rdev->irq.sw_refcount == 1)) {
-		rdev->irq.sw_int = true;
+	if (rdev->ddev->irq_enabled && (++rdev->irq.sw_refcount[ring] == 1)) {
+		rdev->irq.sw_int[ring] = true;
 		radeon_irq_set(rdev);
 	}
 	spin_unlock_irqrestore(&rdev->irq.sw_lock, irqflags);
 }
 
-void radeon_irq_kms_sw_irq_put(struct radeon_device *rdev)
+void radeon_irq_kms_sw_irq_put(struct radeon_device *rdev, int ring)
 {
 	unsigned long irqflags;
 
 	spin_lock_irqsave(&rdev->irq.sw_lock, irqflags);
-	BUG_ON(rdev->ddev->irq_enabled && rdev->irq.sw_refcount <= 0);
-	if (rdev->ddev->irq_enabled && (--rdev->irq.sw_refcount == 0)) {
-		rdev->irq.sw_int = false;
+	BUG_ON(rdev->ddev->irq_enabled && rdev->irq.sw_refcount[ring] <= 0);
+	if (rdev->ddev->irq_enabled && (--rdev->irq.sw_refcount[ring] == 0)) {
+		rdev->irq.sw_int[ring] = false;
 		radeon_irq_set(rdev);
 	}
 	spin_unlock_irqrestore(&rdev->irq.sw_lock, irqflags);

+ 20 - 0
drivers/gpu/drm/radeon/radeon_object.h

@@ -128,4 +128,24 @@ extern void radeon_bo_move_notify(struct ttm_buffer_object *bo,
 					struct ttm_mem_reg *mem);
 extern int radeon_bo_fault_reserve_notify(struct ttm_buffer_object *bo);
 extern int radeon_bo_get_surface_reg(struct radeon_bo *bo);
+
+/*
+ * sub allocation
+ */
+extern int radeon_sa_bo_manager_init(struct radeon_device *rdev,
+				     struct radeon_sa_manager *sa_manager,
+				     unsigned size, u32 domain);
+extern void radeon_sa_bo_manager_fini(struct radeon_device *rdev,
+				      struct radeon_sa_manager *sa_manager);
+extern int radeon_sa_bo_manager_start(struct radeon_device *rdev,
+				      struct radeon_sa_manager *sa_manager);
+extern int radeon_sa_bo_manager_suspend(struct radeon_device *rdev,
+					struct radeon_sa_manager *sa_manager);
+extern int radeon_sa_bo_new(struct radeon_device *rdev,
+			    struct radeon_sa_manager *sa_manager,
+			    struct radeon_sa_bo *sa_bo,
+			    unsigned size, unsigned align);
+extern void radeon_sa_bo_free(struct radeon_device *rdev,
+			      struct radeon_sa_bo *sa_bo);
+
 #endif

+ 18 - 16
drivers/gpu/drm/radeon/radeon_pm.c

@@ -252,7 +252,10 @@ static void radeon_pm_set_clocks(struct radeon_device *rdev)
 
 	mutex_lock(&rdev->ddev->struct_mutex);
 	mutex_lock(&rdev->vram_mutex);
-	mutex_lock(&rdev->cp.mutex);
+	for (i = 0; i < RADEON_NUM_RINGS; ++i) {
+		if (rdev->ring[i].ring_obj)
+			mutex_lock(&rdev->ring[i].mutex);
+	}
 
 	/* gui idle int has issues on older chips it seems */
 	if (rdev->family >= CHIP_R600) {
@@ -268,12 +271,13 @@ static void radeon_pm_set_clocks(struct radeon_device *rdev)
 			radeon_irq_set(rdev);
 		}
 	} else {
-		if (rdev->cp.ready) {
+		struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
+		if (ring->ready) {
 			struct radeon_fence *fence;
-			radeon_ring_alloc(rdev, 64);
-			radeon_fence_create(rdev, &fence);
+			radeon_ring_alloc(rdev, ring, 64);
+			radeon_fence_create(rdev, &fence, radeon_ring_index(rdev, ring));
 			radeon_fence_emit(rdev, fence);
-			radeon_ring_commit(rdev);
+			radeon_ring_commit(rdev, ring);
 			radeon_fence_wait(fence, false);
 			radeon_fence_unref(&fence);
 		}
@@ -307,7 +311,10 @@ static void radeon_pm_set_clocks(struct radeon_device *rdev)
 
 	rdev->pm.dynpm_planned_action = DYNPM_ACTION_NONE;
 
-	mutex_unlock(&rdev->cp.mutex);
+	for (i = 0; i < RADEON_NUM_RINGS; ++i) {
+		if (rdev->ring[i].ring_obj)
+			mutex_unlock(&rdev->ring[i].mutex);
+	}
 	mutex_unlock(&rdev->vram_mutex);
 	mutex_unlock(&rdev->ddev->struct_mutex);
 }
@@ -795,19 +802,14 @@ static void radeon_dynpm_idle_work_handler(struct work_struct *work)
 	resched = ttm_bo_lock_delayed_workqueue(&rdev->mman.bdev);
 	mutex_lock(&rdev->pm.mutex);
 	if (rdev->pm.dynpm_state == DYNPM_STATE_ACTIVE) {
-		unsigned long irq_flags;
 		int not_processed = 0;
+		int i;
 
-		read_lock_irqsave(&rdev->fence_drv.lock, irq_flags);
-		if (!list_empty(&rdev->fence_drv.emited)) {
-			struct list_head *ptr;
-			list_for_each(ptr, &rdev->fence_drv.emited) {
-				/* count up to 3, that's enought info */
-				if (++not_processed >= 3)
-					break;
-			}
+		for (i = 0; i < RADEON_NUM_RINGS; ++i) {
+			not_processed += radeon_fence_count_emitted(rdev, i);
+			if (not_processed >= 3)
+				break;
 		}
-		read_unlock_irqrestore(&rdev->fence_drv.lock, irq_flags);
 
 		if (not_processed >= 3) { /* should upclock */
 			if (rdev->pm.dynpm_planned_action == DYNPM_ACTION_DOWNCLOCK) {

+ 240 - 222
drivers/gpu/drm/radeon/radeon_ring.c

@@ -34,6 +34,7 @@
 #include "atom.h"
 
 int radeon_debugfs_ib_init(struct radeon_device *rdev);
+int radeon_debugfs_ring_init(struct radeon_device *rdev);
 
 u32 radeon_get_ib_value(struct radeon_cs_parser *p, int idx)
 {
@@ -60,105 +61,103 @@ u32 radeon_get_ib_value(struct radeon_cs_parser *p, int idx)
 	return idx_value;
 }
 
-void radeon_ring_write(struct radeon_device *rdev, uint32_t v)
+void radeon_ring_write(struct radeon_ring *ring, uint32_t v)
 {
 #if DRM_DEBUG_CODE
-	if (rdev->cp.count_dw <= 0) {
+	if (ring->count_dw <= 0) {
 		DRM_ERROR("radeon: writting more dword to ring than expected !\n");
 	}
 #endif
-	rdev->cp.ring[rdev->cp.wptr++] = v;
-	rdev->cp.wptr &= rdev->cp.ptr_mask;
-	rdev->cp.count_dw--;
-	rdev->cp.ring_free_dw--;
+	ring->ring[ring->wptr++] = v;
+	ring->wptr &= ring->ptr_mask;
+	ring->count_dw--;
+	ring->ring_free_dw--;
 }
 
-void radeon_ib_bogus_cleanup(struct radeon_device *rdev)
-{
-	struct radeon_ib *ib, *n;
-
-	list_for_each_entry_safe(ib, n, &rdev->ib_pool.bogus_ib, list) {
-		list_del(&ib->list);
-		vfree(ib->ptr);
-		kfree(ib);
-	}
-}
-
-void radeon_ib_bogus_add(struct radeon_device *rdev, struct radeon_ib *ib)
+/*
+ * IB.
+ */
+static bool radeon_ib_try_free(struct radeon_device *rdev,
+			       struct radeon_ib *ib)
 {
-	struct radeon_ib *bib;
-
-	bib = kmalloc(sizeof(*bib), GFP_KERNEL);
-	if (bib == NULL)
-		return;
-	bib->ptr = vmalloc(ib->length_dw * 4);
-	if (bib->ptr == NULL) {
-		kfree(bib);
-		return;
+	bool done = false;
+
+	/* only free ib which have been emited */
+	if (ib->fence && ib->fence->emitted) {
+		if (radeon_fence_signaled(ib->fence)) {
+			radeon_fence_unref(&ib->fence);
+			radeon_sa_bo_free(rdev, &ib->sa_bo);
+			done = true;
+		}
 	}
-	memcpy(bib->ptr, ib->ptr, ib->length_dw * 4);
-	bib->length_dw = ib->length_dw;
-	mutex_lock(&rdev->ib_pool.mutex);
-	list_add_tail(&bib->list, &rdev->ib_pool.bogus_ib);
-	mutex_unlock(&rdev->ib_pool.mutex);
+	return done;
 }
 
-/*
- * IB.
- */
-int radeon_ib_get(struct radeon_device *rdev, struct radeon_ib **ib)
+int radeon_ib_get(struct radeon_device *rdev, int ring, struct radeon_ib **ib)
 {
 	struct radeon_fence *fence;
-	struct radeon_ib *nib;
-	int r = 0, i, c;
+	unsigned cretry = 0;
+	int r = 0, i, idx;
 
 	*ib = NULL;
-	r = radeon_fence_create(rdev, &fence);
+
+	r = radeon_fence_create(rdev, &fence, ring);
 	if (r) {
 		dev_err(rdev->dev, "failed to create fence for new IB\n");
 		return r;
 	}
+
 	mutex_lock(&rdev->ib_pool.mutex);
-	for (i = rdev->ib_pool.head_id, c = 0, nib = NULL; c < RADEON_IB_POOL_SIZE; c++, i++) {
-		i &= (RADEON_IB_POOL_SIZE - 1);
-		if (rdev->ib_pool.ibs[i].free) {
-			nib = &rdev->ib_pool.ibs[i];
-			break;
-		}
-	}
-	if (nib == NULL) {
-		/* This should never happen, it means we allocated all
-		 * IB and haven't scheduled one yet, return EBUSY to
-		 * userspace hoping that on ioctl recall we get better
-		 * luck
-		 */
-		dev_err(rdev->dev, "no free indirect buffer !\n");
+	idx = rdev->ib_pool.head_id;
+retry:
+	if (cretry > 5) {
+		dev_err(rdev->dev, "failed to get an ib after 5 retry\n");
 		mutex_unlock(&rdev->ib_pool.mutex);
 		radeon_fence_unref(&fence);
-		return -EBUSY;
+		return -ENOMEM;
 	}
-	rdev->ib_pool.head_id = (nib->idx + 1) & (RADEON_IB_POOL_SIZE - 1);
-	nib->free = false;
-	if (nib->fence) {
-		mutex_unlock(&rdev->ib_pool.mutex);
-		r = radeon_fence_wait(nib->fence, false);
-		if (r) {
-			dev_err(rdev->dev, "error waiting fence of IB(%u:0x%016lX:%u)\n",
-				nib->idx, (unsigned long)nib->gpu_addr, nib->length_dw);
-			mutex_lock(&rdev->ib_pool.mutex);
-			nib->free = true;
-			mutex_unlock(&rdev->ib_pool.mutex);
-			radeon_fence_unref(&fence);
-			return r;
+	cretry++;
+	for (i = 0; i < RADEON_IB_POOL_SIZE; i++) {
+		radeon_ib_try_free(rdev, &rdev->ib_pool.ibs[idx]);
+		if (rdev->ib_pool.ibs[idx].fence == NULL) {
+			r = radeon_sa_bo_new(rdev, &rdev->ib_pool.sa_manager,
+					     &rdev->ib_pool.ibs[idx].sa_bo,
+					     64*1024, 64);
+			if (!r) {
+				*ib = &rdev->ib_pool.ibs[idx];
+				(*ib)->ptr = rdev->ib_pool.sa_manager.cpu_ptr;
+				(*ib)->ptr += ((*ib)->sa_bo.offset >> 2);
+				(*ib)->gpu_addr = rdev->ib_pool.sa_manager.gpu_addr;
+				(*ib)->gpu_addr += (*ib)->sa_bo.offset;
+				(*ib)->fence = fence;
+				/* ib are most likely to be allocated in a ring fashion
+				 * thus rdev->ib_pool.head_id should be the id of the
+				 * oldest ib
+				 */
+				rdev->ib_pool.head_id = (1 + idx);
+				rdev->ib_pool.head_id &= (RADEON_IB_POOL_SIZE - 1);
+				mutex_unlock(&rdev->ib_pool.mutex);
+				return 0;
+			}
 		}
-		mutex_lock(&rdev->ib_pool.mutex);
+		idx = (idx + 1) & (RADEON_IB_POOL_SIZE - 1);
+	}
+	/* this should be rare event, ie all ib scheduled none signaled yet.
+	 */
+	for (i = 0; i < RADEON_IB_POOL_SIZE; i++) {
+		if (rdev->ib_pool.ibs[idx].fence) {
+			r = radeon_fence_wait(rdev->ib_pool.ibs[idx].fence, false);
+			if (!r) {
+				goto retry;
+			}
+			/* an error happened */
+			break;
+		}
+		idx = (idx + 1) & (RADEON_IB_POOL_SIZE - 1);
 	}
-	radeon_fence_unref(&nib->fence);
-	nib->fence = fence;
-	nib->length_dw = 0;
 	mutex_unlock(&rdev->ib_pool.mutex);
-	*ib = nib;
-	return 0;
+	radeon_fence_unref(&fence);
+	return r;
 }
 
 void radeon_ib_free(struct radeon_device *rdev, struct radeon_ib **ib)
@@ -169,247 +168,255 @@ void radeon_ib_free(struct radeon_device *rdev, struct radeon_ib **ib)
 	if (tmp == NULL) {
 		return;
 	}
-	if (!tmp->fence->emited)
-		radeon_fence_unref(&tmp->fence);
 	mutex_lock(&rdev->ib_pool.mutex);
-	tmp->free = true;
+	if (tmp->fence && !tmp->fence->emitted) {
+		radeon_sa_bo_free(rdev, &tmp->sa_bo);
+		radeon_fence_unref(&tmp->fence);
+	}
 	mutex_unlock(&rdev->ib_pool.mutex);
 }
 
 int radeon_ib_schedule(struct radeon_device *rdev, struct radeon_ib *ib)
 {
+	struct radeon_ring *ring = &rdev->ring[ib->fence->ring];
 	int r = 0;
 
-	if (!ib->length_dw || !rdev->cp.ready) {
+	if (!ib->length_dw || !ring->ready) {
 		/* TODO: Nothings in the ib we should report. */
 		DRM_ERROR("radeon: couldn't schedule IB(%u).\n", ib->idx);
 		return -EINVAL;
 	}
 
 	/* 64 dwords should be enough for fence too */
-	r = radeon_ring_lock(rdev, 64);
+	r = radeon_ring_lock(rdev, ring, 64);
 	if (r) {
 		DRM_ERROR("radeon: scheduling IB failed (%d).\n", r);
 		return r;
 	}
-	radeon_ring_ib_execute(rdev, ib);
+	radeon_ring_ib_execute(rdev, ib->fence->ring, ib);
 	radeon_fence_emit(rdev, ib->fence);
-	mutex_lock(&rdev->ib_pool.mutex);
-	/* once scheduled IB is considered free and protected by the fence */
-	ib->free = true;
-	mutex_unlock(&rdev->ib_pool.mutex);
-	radeon_ring_unlock_commit(rdev);
+	radeon_ring_unlock_commit(rdev, ring);
 	return 0;
 }
 
 int radeon_ib_pool_init(struct radeon_device *rdev)
 {
-	void *ptr;
-	uint64_t gpu_addr;
-	int i;
-	int r = 0;
+	int i, r;
 
-	if (rdev->ib_pool.robj)
+	mutex_lock(&rdev->ib_pool.mutex);
+	if (rdev->ib_pool.ready) {
+		mutex_unlock(&rdev->ib_pool.mutex);
 		return 0;
-	INIT_LIST_HEAD(&rdev->ib_pool.bogus_ib);
-	/* Allocate 1M object buffer */
-	r = radeon_bo_create(rdev, RADEON_IB_POOL_SIZE*64*1024,
-			     PAGE_SIZE, true, RADEON_GEM_DOMAIN_GTT,
-			     &rdev->ib_pool.robj);
-	if (r) {
-		DRM_ERROR("radeon: failed to ib pool (%d).\n", r);
-		return r;
 	}
-	r = radeon_bo_reserve(rdev->ib_pool.robj, false);
-	if (unlikely(r != 0))
-		return r;
-	r = radeon_bo_pin(rdev->ib_pool.robj, RADEON_GEM_DOMAIN_GTT, &gpu_addr);
-	if (r) {
-		radeon_bo_unreserve(rdev->ib_pool.robj);
-		DRM_ERROR("radeon: failed to pin ib pool (%d).\n", r);
-		return r;
-	}
-	r = radeon_bo_kmap(rdev->ib_pool.robj, &ptr);
-	radeon_bo_unreserve(rdev->ib_pool.robj);
+
+	r = radeon_sa_bo_manager_init(rdev, &rdev->ib_pool.sa_manager,
+				      RADEON_IB_POOL_SIZE*64*1024,
+				      RADEON_GEM_DOMAIN_GTT);
 	if (r) {
-		DRM_ERROR("radeon: failed to map ib pool (%d).\n", r);
+		mutex_unlock(&rdev->ib_pool.mutex);
 		return r;
 	}
-	for (i = 0; i < RADEON_IB_POOL_SIZE; i++) {
-		unsigned offset;
 
-		offset = i * 64 * 1024;
-		rdev->ib_pool.ibs[i].gpu_addr = gpu_addr + offset;
-		rdev->ib_pool.ibs[i].ptr = ptr + offset;
+	for (i = 0; i < RADEON_IB_POOL_SIZE; i++) {
+		rdev->ib_pool.ibs[i].fence = NULL;
 		rdev->ib_pool.ibs[i].idx = i;
 		rdev->ib_pool.ibs[i].length_dw = 0;
-		rdev->ib_pool.ibs[i].free = true;
+		INIT_LIST_HEAD(&rdev->ib_pool.ibs[i].sa_bo.list);
 	}
 	rdev->ib_pool.head_id = 0;
 	rdev->ib_pool.ready = true;
 	DRM_INFO("radeon: ib pool ready.\n");
+
 	if (radeon_debugfs_ib_init(rdev)) {
 		DRM_ERROR("Failed to register debugfs file for IB !\n");
 	}
-	return r;
+	if (radeon_debugfs_ring_init(rdev)) {
+		DRM_ERROR("Failed to register debugfs file for rings !\n");
+	}
+	mutex_unlock(&rdev->ib_pool.mutex);
+	return 0;
 }
 
 void radeon_ib_pool_fini(struct radeon_device *rdev)
 {
-	int r;
-	struct radeon_bo *robj;
+	unsigned i;
 
-	if (!rdev->ib_pool.ready) {
-		return;
-	}
 	mutex_lock(&rdev->ib_pool.mutex);
-	radeon_ib_bogus_cleanup(rdev);
-	robj = rdev->ib_pool.robj;
-	rdev->ib_pool.robj = NULL;
-	mutex_unlock(&rdev->ib_pool.mutex);
-
-	if (robj) {
-		r = radeon_bo_reserve(robj, false);
-		if (likely(r == 0)) {
-			radeon_bo_kunmap(robj);
-			radeon_bo_unpin(robj);
-			radeon_bo_unreserve(robj);
+	if (rdev->ib_pool.ready) {
+		for (i = 0; i < RADEON_IB_POOL_SIZE; i++) {
+			radeon_sa_bo_free(rdev, &rdev->ib_pool.ibs[i].sa_bo);
+			radeon_fence_unref(&rdev->ib_pool.ibs[i].fence);
 		}
-		radeon_bo_unref(&robj);
+		radeon_sa_bo_manager_fini(rdev, &rdev->ib_pool.sa_manager);
+		rdev->ib_pool.ready = false;
 	}
+	mutex_unlock(&rdev->ib_pool.mutex);
 }
 
+int radeon_ib_pool_start(struct radeon_device *rdev)
+{
+	return radeon_sa_bo_manager_start(rdev, &rdev->ib_pool.sa_manager);
+}
+
+int radeon_ib_pool_suspend(struct radeon_device *rdev)
+{
+	return radeon_sa_bo_manager_suspend(rdev, &rdev->ib_pool.sa_manager);
+}
 
 /*
  * Ring.
  */
-void radeon_ring_free_size(struct radeon_device *rdev)
+int radeon_ring_index(struct radeon_device *rdev, struct radeon_ring *ring)
 {
-	if (rdev->wb.enabled)
-		rdev->cp.rptr = le32_to_cpu(rdev->wb.wb[RADEON_WB_CP_RPTR_OFFSET/4]);
-	else {
-		if (rdev->family >= CHIP_R600)
-			rdev->cp.rptr = RREG32(R600_CP_RB_RPTR);
-		else
-			rdev->cp.rptr = RREG32(RADEON_CP_RB_RPTR);
+	/* r1xx-r5xx only has CP ring */
+	if (rdev->family < CHIP_R600)
+		return RADEON_RING_TYPE_GFX_INDEX;
+
+	if (rdev->family >= CHIP_CAYMAN) {
+		if (ring == &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX])
+			return CAYMAN_RING_TYPE_CP1_INDEX;
+		else if (ring == &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX])
+			return CAYMAN_RING_TYPE_CP2_INDEX;
 	}
+	return RADEON_RING_TYPE_GFX_INDEX;
+}
+
+void radeon_ring_free_size(struct radeon_device *rdev, struct radeon_ring *ring)
+{
+	u32 rptr;
+
+	if (rdev->wb.enabled)
+		rptr = le32_to_cpu(rdev->wb.wb[ring->rptr_offs/4]);
+	else
+		rptr = RREG32(ring->rptr_reg);
+	ring->rptr = (rptr & ring->ptr_reg_mask) >> ring->ptr_reg_shift;
 	/* This works because ring_size is a power of 2 */
-	rdev->cp.ring_free_dw = (rdev->cp.rptr + (rdev->cp.ring_size / 4));
-	rdev->cp.ring_free_dw -= rdev->cp.wptr;
-	rdev->cp.ring_free_dw &= rdev->cp.ptr_mask;
-	if (!rdev->cp.ring_free_dw) {
-		rdev->cp.ring_free_dw = rdev->cp.ring_size / 4;
+	ring->ring_free_dw = (ring->rptr + (ring->ring_size / 4));
+	ring->ring_free_dw -= ring->wptr;
+	ring->ring_free_dw &= ring->ptr_mask;
+	if (!ring->ring_free_dw) {
+		ring->ring_free_dw = ring->ring_size / 4;
 	}
 }
 
-int radeon_ring_alloc(struct radeon_device *rdev, unsigned ndw)
+
+int radeon_ring_alloc(struct radeon_device *rdev, struct radeon_ring *ring, unsigned ndw)
 {
 	int r;
 
 	/* Align requested size with padding so unlock_commit can
 	 * pad safely */
-	ndw = (ndw + rdev->cp.align_mask) & ~rdev->cp.align_mask;
-	while (ndw > (rdev->cp.ring_free_dw - 1)) {
-		radeon_ring_free_size(rdev);
-		if (ndw < rdev->cp.ring_free_dw) {
+	ndw = (ndw + ring->align_mask) & ~ring->align_mask;
+	while (ndw > (ring->ring_free_dw - 1)) {
+		radeon_ring_free_size(rdev, ring);
+		if (ndw < ring->ring_free_dw) {
 			break;
 		}
-		r = radeon_fence_wait_next(rdev);
+		r = radeon_fence_wait_next(rdev, radeon_ring_index(rdev, ring));
 		if (r)
 			return r;
 	}
-	rdev->cp.count_dw = ndw;
-	rdev->cp.wptr_old = rdev->cp.wptr;
+	ring->count_dw = ndw;
+	ring->wptr_old = ring->wptr;
 	return 0;
 }
 
-int radeon_ring_lock(struct radeon_device *rdev, unsigned ndw)
+int radeon_ring_lock(struct radeon_device *rdev, struct radeon_ring *ring, unsigned ndw)
 {
 	int r;
 
-	mutex_lock(&rdev->cp.mutex);
-	r = radeon_ring_alloc(rdev, ndw);
+	mutex_lock(&ring->mutex);
+	r = radeon_ring_alloc(rdev, ring, ndw);
 	if (r) {
-		mutex_unlock(&rdev->cp.mutex);
+		mutex_unlock(&ring->mutex);
 		return r;
 	}
 	return 0;
 }
 
-void radeon_ring_commit(struct radeon_device *rdev)
+void radeon_ring_commit(struct radeon_device *rdev, struct radeon_ring *ring)
 {
 	unsigned count_dw_pad;
 	unsigned i;
 
 	/* We pad to match fetch size */
-	count_dw_pad = (rdev->cp.align_mask + 1) -
-		       (rdev->cp.wptr & rdev->cp.align_mask);
+	count_dw_pad = (ring->align_mask + 1) -
+		       (ring->wptr & ring->align_mask);
 	for (i = 0; i < count_dw_pad; i++) {
-		radeon_ring_write(rdev, 2 << 30);
+		radeon_ring_write(ring, ring->nop);
 	}
 	DRM_MEMORYBARRIER();
-	radeon_cp_commit(rdev);
+	WREG32(ring->wptr_reg, (ring->wptr << ring->ptr_reg_shift) & ring->ptr_reg_mask);
+	(void)RREG32(ring->wptr_reg);
 }
 
-void radeon_ring_unlock_commit(struct radeon_device *rdev)
+void radeon_ring_unlock_commit(struct radeon_device *rdev, struct radeon_ring *ring)
 {
-	radeon_ring_commit(rdev);
-	mutex_unlock(&rdev->cp.mutex);
+	radeon_ring_commit(rdev, ring);
+	mutex_unlock(&ring->mutex);
 }
 
-void radeon_ring_unlock_undo(struct radeon_device *rdev)
+void radeon_ring_unlock_undo(struct radeon_device *rdev, struct radeon_ring *ring)
 {
-	rdev->cp.wptr = rdev->cp.wptr_old;
-	mutex_unlock(&rdev->cp.mutex);
+	ring->wptr = ring->wptr_old;
+	mutex_unlock(&ring->mutex);
 }
 
-int radeon_ring_init(struct radeon_device *rdev, unsigned ring_size)
+int radeon_ring_init(struct radeon_device *rdev, struct radeon_ring *ring, unsigned ring_size,
+		     unsigned rptr_offs, unsigned rptr_reg, unsigned wptr_reg,
+		     u32 ptr_reg_shift, u32 ptr_reg_mask, u32 nop)
 {
 	int r;
 
-	rdev->cp.ring_size = ring_size;
+	ring->ring_size = ring_size;
+	ring->rptr_offs = rptr_offs;
+	ring->rptr_reg = rptr_reg;
+	ring->wptr_reg = wptr_reg;
+	ring->ptr_reg_shift = ptr_reg_shift;
+	ring->ptr_reg_mask = ptr_reg_mask;
+	ring->nop = nop;
 	/* Allocate ring buffer */
-	if (rdev->cp.ring_obj == NULL) {
-		r = radeon_bo_create(rdev, rdev->cp.ring_size, PAGE_SIZE, true,
+	if (ring->ring_obj == NULL) {
+		r = radeon_bo_create(rdev, ring->ring_size, PAGE_SIZE, true,
 					RADEON_GEM_DOMAIN_GTT,
-					&rdev->cp.ring_obj);
+					&ring->ring_obj);
 		if (r) {
 			dev_err(rdev->dev, "(%d) ring create failed\n", r);
 			return r;
 		}
-		r = radeon_bo_reserve(rdev->cp.ring_obj, false);
+		r = radeon_bo_reserve(ring->ring_obj, false);
 		if (unlikely(r != 0))
 			return r;
-		r = radeon_bo_pin(rdev->cp.ring_obj, RADEON_GEM_DOMAIN_GTT,
-					&rdev->cp.gpu_addr);
+		r = radeon_bo_pin(ring->ring_obj, RADEON_GEM_DOMAIN_GTT,
+					&ring->gpu_addr);
 		if (r) {
-			radeon_bo_unreserve(rdev->cp.ring_obj);
+			radeon_bo_unreserve(ring->ring_obj);
 			dev_err(rdev->dev, "(%d) ring pin failed\n", r);
 			return r;
 		}
-		r = radeon_bo_kmap(rdev->cp.ring_obj,
-				       (void **)&rdev->cp.ring);
-		radeon_bo_unreserve(rdev->cp.ring_obj);
+		r = radeon_bo_kmap(ring->ring_obj,
+				       (void **)&ring->ring);
+		radeon_bo_unreserve(ring->ring_obj);
 		if (r) {
 			dev_err(rdev->dev, "(%d) ring map failed\n", r);
 			return r;
 		}
 	}
-	rdev->cp.ptr_mask = (rdev->cp.ring_size / 4) - 1;
-	rdev->cp.ring_free_dw = rdev->cp.ring_size / 4;
+	ring->ptr_mask = (ring->ring_size / 4) - 1;
+	ring->ring_free_dw = ring->ring_size / 4;
 	return 0;
 }
 
-void radeon_ring_fini(struct radeon_device *rdev)
+void radeon_ring_fini(struct radeon_device *rdev, struct radeon_ring *ring)
 {
 	int r;
 	struct radeon_bo *ring_obj;
 
-	mutex_lock(&rdev->cp.mutex);
-	ring_obj = rdev->cp.ring_obj;
-	rdev->cp.ring = NULL;
-	rdev->cp.ring_obj = NULL;
-	mutex_unlock(&rdev->cp.mutex);
+	mutex_lock(&ring->mutex);
+	ring_obj = ring->ring_obj;
+	ring->ring = NULL;
+	ring->ring_obj = NULL;
+	mutex_unlock(&ring->mutex);
 
 	if (ring_obj) {
 		r = radeon_bo_reserve(ring_obj, false);
@@ -422,72 +429,83 @@ void radeon_ring_fini(struct radeon_device *rdev)
 	}
 }
 
-
 /*
  * Debugfs info
  */
 #if defined(CONFIG_DEBUG_FS)
-static int radeon_debugfs_ib_info(struct seq_file *m, void *data)
+
+static int radeon_debugfs_ring_info(struct seq_file *m, void *data)
 {
 	struct drm_info_node *node = (struct drm_info_node *) m->private;
-	struct radeon_ib *ib = node->info_ent->data;
-	unsigned i;
-
-	if (ib == NULL) {
-		return 0;
-	}
-	seq_printf(m, "IB %04u\n", ib->idx);
-	seq_printf(m, "IB fence %p\n", ib->fence);
-	seq_printf(m, "IB size %05u dwords\n", ib->length_dw);
-	for (i = 0; i < ib->length_dw; i++) {
-		seq_printf(m, "[%05u]=0x%08X\n", i, ib->ptr[i]);
+	struct drm_device *dev = node->minor->dev;
+	struct radeon_device *rdev = dev->dev_private;
+	int ridx = *(int*)node->info_ent->data;
+	struct radeon_ring *ring = &rdev->ring[ridx];
+	unsigned count, i, j;
+
+	radeon_ring_free_size(rdev, ring);
+	count = (ring->ring_size / 4) - ring->ring_free_dw;
+	seq_printf(m, "wptr(0x%04x): 0x%08x\n", ring->wptr_reg, RREG32(ring->wptr_reg));
+	seq_printf(m, "rptr(0x%04x): 0x%08x\n", ring->rptr_reg, RREG32(ring->rptr_reg));
+	seq_printf(m, "driver's copy of the wptr: 0x%08x\n", ring->wptr);
+	seq_printf(m, "driver's copy of the rptr: 0x%08x\n", ring->rptr);
+	seq_printf(m, "%u free dwords in ring\n", ring->ring_free_dw);
+	seq_printf(m, "%u dwords in ring\n", count);
+	i = ring->rptr;
+	for (j = 0; j <= count; j++) {
+		seq_printf(m, "r[%04d]=0x%08x\n", i, ring->ring[i]);
+		i = (i + 1) & ring->ptr_mask;
 	}
 	return 0;
 }
 
-static int radeon_debugfs_ib_bogus_info(struct seq_file *m, void *data)
+static int radeon_ring_type_gfx_index = RADEON_RING_TYPE_GFX_INDEX;
+static int cayman_ring_type_cp1_index = CAYMAN_RING_TYPE_CP1_INDEX;
+static int cayman_ring_type_cp2_index = CAYMAN_RING_TYPE_CP2_INDEX;
+
+static struct drm_info_list radeon_debugfs_ring_info_list[] = {
+	{"radeon_ring_gfx", radeon_debugfs_ring_info, 0, &radeon_ring_type_gfx_index},
+	{"radeon_ring_cp1", radeon_debugfs_ring_info, 0, &cayman_ring_type_cp1_index},
+	{"radeon_ring_cp2", radeon_debugfs_ring_info, 0, &cayman_ring_type_cp2_index},
+};
+
+static int radeon_debugfs_ib_info(struct seq_file *m, void *data)
 {
 	struct drm_info_node *node = (struct drm_info_node *) m->private;
-	struct radeon_device *rdev = node->info_ent->data;
-	struct radeon_ib *ib;
+	struct radeon_ib *ib = node->info_ent->data;
 	unsigned i;
 
-	mutex_lock(&rdev->ib_pool.mutex);
-	if (list_empty(&rdev->ib_pool.bogus_ib)) {
-		mutex_unlock(&rdev->ib_pool.mutex);
-		seq_printf(m, "no bogus IB recorded\n");
+	if (ib == NULL) {
 		return 0;
 	}
-	ib = list_first_entry(&rdev->ib_pool.bogus_ib, struct radeon_ib, list);
-	list_del_init(&ib->list);
-	mutex_unlock(&rdev->ib_pool.mutex);
+	seq_printf(m, "IB %04u\n", ib->idx);
+	seq_printf(m, "IB fence %p\n", ib->fence);
 	seq_printf(m, "IB size %05u dwords\n", ib->length_dw);
 	for (i = 0; i < ib->length_dw; i++) {
 		seq_printf(m, "[%05u]=0x%08X\n", i, ib->ptr[i]);
 	}
-	vfree(ib->ptr);
-	kfree(ib);
 	return 0;
 }
 
 static struct drm_info_list radeon_debugfs_ib_list[RADEON_IB_POOL_SIZE];
 static char radeon_debugfs_ib_names[RADEON_IB_POOL_SIZE][32];
+#endif
 
-static struct drm_info_list radeon_debugfs_ib_bogus_info_list[] = {
-	{"radeon_ib_bogus", radeon_debugfs_ib_bogus_info, 0, NULL},
-};
+int radeon_debugfs_ring_init(struct radeon_device *rdev)
+{
+#if defined(CONFIG_DEBUG_FS)
+	return radeon_debugfs_add_files(rdev, radeon_debugfs_ring_info_list,
+					ARRAY_SIZE(radeon_debugfs_ring_info_list));
+#else
+	return 0;
 #endif
+}
 
 int radeon_debugfs_ib_init(struct radeon_device *rdev)
 {
 #if defined(CONFIG_DEBUG_FS)
 	unsigned i;
-	int r;
 
-	radeon_debugfs_ib_bogus_info_list[0].data = rdev;
-	r = radeon_debugfs_add_files(rdev, radeon_debugfs_ib_bogus_info_list, 1);
-	if (r)
-		return r;
 	for (i = 0; i < RADEON_IB_POOL_SIZE; i++) {
 		sprintf(radeon_debugfs_ib_names[i], "radeon_ib_%04u", i);
 		radeon_debugfs_ib_list[i].name = radeon_debugfs_ib_names[i];

+ 189 - 0
drivers/gpu/drm/radeon/radeon_sa.c

@@ -0,0 +1,189 @@
+/*
+ * Copyright 2011 Red Hat Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ */
+/*
+ * Authors:
+ *    Jerome Glisse <glisse@freedesktop.org>
+ */
+#include "drmP.h"
+#include "drm.h"
+#include "radeon.h"
+
+int radeon_sa_bo_manager_init(struct radeon_device *rdev,
+			      struct radeon_sa_manager *sa_manager,
+			      unsigned size, u32 domain)
+{
+	int r;
+
+	sa_manager->bo = NULL;
+	sa_manager->size = size;
+	sa_manager->domain = domain;
+	INIT_LIST_HEAD(&sa_manager->sa_bo);
+
+	r = radeon_bo_create(rdev, size, RADEON_GPU_PAGE_SIZE, true,
+			     RADEON_GEM_DOMAIN_CPU, &sa_manager->bo);
+	if (r) {
+		dev_err(rdev->dev, "(%d) failed to allocate bo for manager\n", r);
+		return r;
+	}
+
+	return r;
+}
+
+void radeon_sa_bo_manager_fini(struct radeon_device *rdev,
+			       struct radeon_sa_manager *sa_manager)
+{
+	struct radeon_sa_bo *sa_bo, *tmp;
+
+	if (!list_empty(&sa_manager->sa_bo)) {
+		dev_err(rdev->dev, "sa_manager is not empty, clearing anyway\n");
+	}
+	list_for_each_entry_safe(sa_bo, tmp, &sa_manager->sa_bo, list) {
+		list_del_init(&sa_bo->list);
+	}
+	radeon_bo_unref(&sa_manager->bo);
+	sa_manager->size = 0;
+}
+
+int radeon_sa_bo_manager_start(struct radeon_device *rdev,
+			       struct radeon_sa_manager *sa_manager)
+{
+	int r;
+
+	if (sa_manager->bo == NULL) {
+		dev_err(rdev->dev, "no bo for sa manager\n");
+		return -EINVAL;
+	}
+
+	/* map the buffer */
+	r = radeon_bo_reserve(sa_manager->bo, false);
+	if (r) {
+		dev_err(rdev->dev, "(%d) failed to reserve manager bo\n", r);
+		return r;
+	}
+	r = radeon_bo_pin(sa_manager->bo, sa_manager->domain, &sa_manager->gpu_addr);
+	if (r) {
+		radeon_bo_unreserve(sa_manager->bo);
+		dev_err(rdev->dev, "(%d) failed to pin manager bo\n", r);
+		return r;
+	}
+	r = radeon_bo_kmap(sa_manager->bo, &sa_manager->cpu_ptr);
+	radeon_bo_unreserve(sa_manager->bo);
+	return r;
+}
+
+int radeon_sa_bo_manager_suspend(struct radeon_device *rdev,
+				 struct radeon_sa_manager *sa_manager)
+{
+	int r;
+
+	if (sa_manager->bo == NULL) {
+		dev_err(rdev->dev, "no bo for sa manager\n");
+		return -EINVAL;
+	}
+
+	r = radeon_bo_reserve(sa_manager->bo, false);
+	if (!r) {
+		radeon_bo_kunmap(sa_manager->bo);
+		radeon_bo_unpin(sa_manager->bo);
+		radeon_bo_unreserve(sa_manager->bo);
+	}
+	return r;
+}
+
+/*
+ * Principe is simple, we keep a list of sub allocation in offset
+ * order (first entry has offset == 0, last entry has the highest
+ * offset).
+ *
+ * When allocating new object we first check if there is room at
+ * the end total_size - (last_object_offset + last_object_size) >=
+ * alloc_size. If so we allocate new object there.
+ *
+ * When there is not enough room at the end, we start waiting for
+ * each sub object until we reach object_offset+object_size >=
+ * alloc_size, this object then become the sub object we return.
+ *
+ * Alignment can't be bigger than page size
+ */
+int radeon_sa_bo_new(struct radeon_device *rdev,
+		     struct radeon_sa_manager *sa_manager,
+		     struct radeon_sa_bo *sa_bo,
+		     unsigned size, unsigned align)
+{
+	struct radeon_sa_bo *tmp;
+	struct list_head *head;
+	unsigned offset = 0, wasted = 0;
+
+	BUG_ON(align > RADEON_GPU_PAGE_SIZE);
+	BUG_ON(size > sa_manager->size);
+
+	/* no one ? */
+	head = sa_manager->sa_bo.prev;
+	if (list_empty(&sa_manager->sa_bo)) {
+		goto out;
+	}
+
+	/* look for a hole big enough */
+	offset = 0;
+	list_for_each_entry(tmp, &sa_manager->sa_bo, list) {
+		/* room before this object ? */
+		if ((tmp->offset - offset) >= size) {
+			head = tmp->list.prev;
+			goto out;
+		}
+		offset = tmp->offset + tmp->size;
+		wasted = offset % align;
+		if (wasted) {
+			wasted = align - wasted;
+		}
+		offset += wasted;
+	}
+	/* room at the end ? */
+	head = sa_manager->sa_bo.prev;
+	tmp = list_entry(head, struct radeon_sa_bo, list);
+	offset = tmp->offset + tmp->size;
+	wasted = offset % align;
+	if (wasted) {
+		wasted = align - wasted;
+	}
+	offset += wasted;
+	if ((sa_manager->size - offset) < size) {
+		/* failed to find somethings big enough */
+		return -ENOMEM;
+	}
+
+out:
+	sa_bo->manager = sa_manager;
+	sa_bo->offset = offset;
+	sa_bo->size = size;
+	list_add(&sa_bo->list, head);
+	return 0;
+}
+
+void radeon_sa_bo_free(struct radeon_device *rdev, struct radeon_sa_bo *sa_bo)
+{
+	list_del_init(&sa_bo->list);
+}

+ 161 - 0
drivers/gpu/drm/radeon/radeon_semaphore.c

@@ -0,0 +1,161 @@
+/*
+ * Copyright 2011 Christian König.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ */
+/*
+ * Authors:
+ *    Christian König <deathsimple@vodafone.de>
+ */
+#include "drmP.h"
+#include "drm.h"
+#include "radeon.h"
+
+static int allocate_semaphores(struct radeon_device *rdev)
+{
+	const unsigned long bo_size = PAGE_SIZE * 4;
+
+	struct radeon_bo *bo;
+	struct list_head new_entrys;
+	unsigned long irq_flags;
+	uint64_t gpu_addr;
+	void *map;
+	int i, r;
+
+	r = radeon_bo_create(rdev, bo_size, RADEON_GPU_PAGE_SIZE, true,
+			     RADEON_GEM_DOMAIN_GTT, &bo);
+	if (r) {
+		dev_err(rdev->dev, "(%d) failed to allocate semaphore bo\n", r);
+		return r;
+	}
+
+	r = radeon_bo_reserve(bo, false);
+	if (r) {
+		radeon_bo_unref(&bo);
+		dev_err(rdev->dev, "(%d) failed to reserve semaphore bo\n", r);
+		return r;
+	}
+
+	r = radeon_bo_kmap(bo, &map);
+	if (r) {
+		radeon_bo_unreserve(bo);
+		radeon_bo_unref(&bo);
+		dev_err(rdev->dev, "(%d) semaphore map failed\n", r);
+		return r;
+	}
+	memset(map, 0, bo_size);
+	radeon_bo_kunmap(bo);
+
+	r = radeon_bo_pin(bo, RADEON_GEM_DOMAIN_VRAM, &gpu_addr);
+	if (r) {
+		radeon_bo_unreserve(bo);
+		radeon_bo_unref(&bo);
+		dev_err(rdev->dev, "(%d) semaphore pin failed\n", r);
+		return r;
+	}
+
+	INIT_LIST_HEAD(&new_entrys);
+	for (i = 0; i < bo_size/8; ++i) {
+		struct radeon_semaphore *sem = kmalloc(sizeof(struct radeon_semaphore), GFP_KERNEL);
+		ttm_bo_reference(&bo->tbo);
+		sem->robj = bo;
+		sem->gpu_addr = gpu_addr;
+		gpu_addr += 8;
+		list_add_tail(&sem->list, &new_entrys);
+	}
+
+	radeon_bo_unreserve(bo);
+	radeon_bo_unref(&bo);
+
+	write_lock_irqsave(&rdev->semaphore_drv.lock, irq_flags);
+	list_splice_tail(&new_entrys, &rdev->semaphore_drv.free);
+	write_unlock_irqrestore(&rdev->semaphore_drv.lock, irq_flags);
+
+	DRM_INFO("%d new semaphores allocated\n", (int)(bo_size/8));
+
+	return 0;
+}
+
+int radeon_semaphore_create(struct radeon_device *rdev,
+			    struct radeon_semaphore **semaphore)
+{
+	unsigned long irq_flags;
+
+	write_lock_irqsave(&rdev->semaphore_drv.lock, irq_flags);
+	if (list_empty(&rdev->semaphore_drv.free)) {
+		int r;
+		write_unlock_irqrestore(&rdev->semaphore_drv.lock, irq_flags);
+		r = allocate_semaphores(rdev);
+		if (r)
+			return r;
+		write_lock_irqsave(&rdev->semaphore_drv.lock, irq_flags);
+	}
+
+	*semaphore = list_first_entry(&rdev->semaphore_drv.free, struct radeon_semaphore, list);
+	list_del(&(*semaphore)->list);
+
+	write_unlock_irqrestore(&rdev->semaphore_drv.lock, irq_flags);
+	return 0;
+}
+
+void radeon_semaphore_emit_signal(struct radeon_device *rdev, int ring,
+			          struct radeon_semaphore *semaphore)
+{
+	radeon_semaphore_ring_emit(rdev, ring, &rdev->ring[ring], semaphore, false);
+}
+
+void radeon_semaphore_emit_wait(struct radeon_device *rdev, int ring,
+			        struct radeon_semaphore *semaphore)
+{
+	radeon_semaphore_ring_emit(rdev, ring, &rdev->ring[ring], semaphore, true);
+}
+
+void radeon_semaphore_free(struct radeon_device *rdev,
+			  struct radeon_semaphore *semaphore)
+{
+	unsigned long irq_flags;
+
+	write_lock_irqsave(&rdev->semaphore_drv.lock, irq_flags);
+	list_add_tail(&semaphore->list, &rdev->semaphore_drv.free);
+	write_unlock_irqrestore(&rdev->semaphore_drv.lock, irq_flags);
+}
+
+void radeon_semaphore_driver_fini(struct radeon_device *rdev)
+{
+	struct radeon_semaphore *i, *n;
+	struct list_head entrys;
+	unsigned long irq_flags;
+
+	INIT_LIST_HEAD(&entrys);
+	write_lock_irqsave(&rdev->semaphore_drv.lock, irq_flags);
+	if (!list_empty(&rdev->semaphore_drv.free)) {
+		list_splice(&rdev->semaphore_drv.free, &entrys);
+	}
+	INIT_LIST_HEAD(&rdev->semaphore_drv.free);
+	write_unlock_irqrestore(&rdev->semaphore_drv.lock, irq_flags);
+
+	list_for_each_entry_safe(i, n, &entrys, list) {
+		radeon_bo_unref(&i->robj);
+		kfree(i);
+	}
+}

+ 264 - 3
drivers/gpu/drm/radeon/radeon_test.c

@@ -42,7 +42,9 @@ void radeon_test_moves(struct radeon_device *rdev)
 	/* Number of tests =
 	 * (Total GTT - IB pool - writeback page - ring buffers) / test size
 	 */
-	n = rdev->mc.gtt_size - RADEON_IB_POOL_SIZE*64*1024 - rdev->cp.ring_size;
+	n = rdev->mc.gtt_size - RADEON_IB_POOL_SIZE*64*1024;
+	for (i = 0; i < RADEON_NUM_RINGS; ++i)
+		n -= rdev->ring[i].ring_size;
 	if (rdev->wb.wb_obj)
 		n -= RADEON_GPU_PAGE_SIZE;
 	if (rdev->ih.ring_obj)
@@ -104,7 +106,7 @@ void radeon_test_moves(struct radeon_device *rdev)
 
 		radeon_bo_kunmap(gtt_obj[i]);
 
-		r = radeon_fence_create(rdev, &fence);
+		r = radeon_fence_create(rdev, &fence, RADEON_RING_TYPE_GFX_INDEX);
 		if (r) {
 			DRM_ERROR("Failed to create GTT->VRAM fence %d\n", i);
 			goto out_cleanup;
@@ -153,7 +155,7 @@ void radeon_test_moves(struct radeon_device *rdev)
 
 		radeon_bo_kunmap(vram_obj);
 
-		r = radeon_fence_create(rdev, &fence);
+		r = radeon_fence_create(rdev, &fence, RADEON_RING_TYPE_GFX_INDEX);
 		if (r) {
 			DRM_ERROR("Failed to create VRAM->GTT fence %d\n", i);
 			goto out_cleanup;
@@ -232,3 +234,262 @@ out_cleanup:
 		printk(KERN_WARNING "Error while testing BO move.\n");
 	}
 }
+
+void radeon_test_ring_sync(struct radeon_device *rdev,
+			   struct radeon_ring *ringA,
+			   struct radeon_ring *ringB)
+{
+	struct radeon_fence *fence1 = NULL, *fence2 = NULL;
+	struct radeon_semaphore *semaphore = NULL;
+	int ridxA = radeon_ring_index(rdev, ringA);
+	int ridxB = radeon_ring_index(rdev, ringB);
+	int r;
+
+	r = radeon_fence_create(rdev, &fence1, ridxA);
+	if (r) {
+		DRM_ERROR("Failed to create sync fence 1\n");
+		goto out_cleanup;
+	}
+	r = radeon_fence_create(rdev, &fence2, ridxA);
+	if (r) {
+		DRM_ERROR("Failed to create sync fence 2\n");
+		goto out_cleanup;
+	}
+
+	r = radeon_semaphore_create(rdev, &semaphore);
+	if (r) {
+		DRM_ERROR("Failed to create semaphore\n");
+		goto out_cleanup;
+	}
+
+	r = radeon_ring_lock(rdev, ringA, 64);
+	if (r) {
+		DRM_ERROR("Failed to lock ring A %d\n", ridxA);
+		goto out_cleanup;
+	}
+	radeon_semaphore_emit_wait(rdev, ridxA, semaphore);
+	radeon_fence_emit(rdev, fence1);
+	radeon_semaphore_emit_wait(rdev, ridxA, semaphore);
+	radeon_fence_emit(rdev, fence2);
+	radeon_ring_unlock_commit(rdev, ringA);
+
+	mdelay(1000);
+
+	if (radeon_fence_signaled(fence1)) {
+		DRM_ERROR("Fence 1 signaled without waiting for semaphore.\n");
+		goto out_cleanup;
+	}
+
+	r = radeon_ring_lock(rdev, ringB, 64);
+	if (r) {
+		DRM_ERROR("Failed to lock ring B %p\n", ringB);
+		goto out_cleanup;
+	}
+	radeon_semaphore_emit_signal(rdev, ridxB, semaphore);
+	radeon_ring_unlock_commit(rdev, ringB);
+
+	r = radeon_fence_wait(fence1, false);
+	if (r) {
+		DRM_ERROR("Failed to wait for sync fence 1\n");
+		goto out_cleanup;
+	}
+
+	mdelay(1000);
+
+	if (radeon_fence_signaled(fence2)) {
+		DRM_ERROR("Fence 2 signaled without waiting for semaphore.\n");
+		goto out_cleanup;
+	}
+
+	r = radeon_ring_lock(rdev, ringB, 64);
+	if (r) {
+		DRM_ERROR("Failed to lock ring B %p\n", ringB);
+		goto out_cleanup;
+	}
+	radeon_semaphore_emit_signal(rdev, ridxB, semaphore);
+	radeon_ring_unlock_commit(rdev, ringB);
+
+	r = radeon_fence_wait(fence2, false);
+	if (r) {
+		DRM_ERROR("Failed to wait for sync fence 1\n");
+		goto out_cleanup;
+	}
+
+out_cleanup:
+	if (semaphore)
+		radeon_semaphore_free(rdev, semaphore);
+
+	if (fence1)
+		radeon_fence_unref(&fence1);
+
+	if (fence2)
+		radeon_fence_unref(&fence2);
+
+	if (r)
+		printk(KERN_WARNING "Error while testing ring sync (%d).\n", r);
+}
+
+void radeon_test_ring_sync2(struct radeon_device *rdev,
+			    struct radeon_ring *ringA,
+			    struct radeon_ring *ringB,
+			    struct radeon_ring *ringC)
+{
+	struct radeon_fence *fenceA = NULL, *fenceB = NULL;
+	struct radeon_semaphore *semaphore = NULL;
+	int ridxA = radeon_ring_index(rdev, ringA);
+	int ridxB = radeon_ring_index(rdev, ringB);
+	int ridxC = radeon_ring_index(rdev, ringC);
+	bool sigA, sigB;
+	int i, r;
+
+	r = radeon_fence_create(rdev, &fenceA, ridxA);
+	if (r) {
+		DRM_ERROR("Failed to create sync fence 1\n");
+		goto out_cleanup;
+	}
+	r = radeon_fence_create(rdev, &fenceB, ridxB);
+	if (r) {
+		DRM_ERROR("Failed to create sync fence 2\n");
+		goto out_cleanup;
+	}
+
+	r = radeon_semaphore_create(rdev, &semaphore);
+	if (r) {
+		DRM_ERROR("Failed to create semaphore\n");
+		goto out_cleanup;
+	}
+
+	r = radeon_ring_lock(rdev, ringA, 64);
+	if (r) {
+		DRM_ERROR("Failed to lock ring A %d\n", ridxA);
+		goto out_cleanup;
+	}
+	radeon_semaphore_emit_wait(rdev, ridxA, semaphore);
+	radeon_fence_emit(rdev, fenceA);
+	radeon_ring_unlock_commit(rdev, ringA);
+
+	r = radeon_ring_lock(rdev, ringB, 64);
+	if (r) {
+		DRM_ERROR("Failed to lock ring B %d\n", ridxB);
+		goto out_cleanup;
+	}
+	radeon_semaphore_emit_wait(rdev, ridxB, semaphore);
+	radeon_fence_emit(rdev, fenceB);
+	radeon_ring_unlock_commit(rdev, ringB);
+
+	mdelay(1000);
+
+	if (radeon_fence_signaled(fenceA)) {
+		DRM_ERROR("Fence A signaled without waiting for semaphore.\n");
+		goto out_cleanup;
+	}
+	if (radeon_fence_signaled(fenceB)) {
+		DRM_ERROR("Fence A signaled without waiting for semaphore.\n");
+		goto out_cleanup;
+	}
+
+	r = radeon_ring_lock(rdev, ringC, 64);
+	if (r) {
+		DRM_ERROR("Failed to lock ring B %p\n", ringC);
+		goto out_cleanup;
+	}
+	radeon_semaphore_emit_signal(rdev, ridxC, semaphore);
+	radeon_ring_unlock_commit(rdev, ringC);
+
+	for (i = 0; i < 30; ++i) {
+		mdelay(100);
+		sigA = radeon_fence_signaled(fenceA);
+		sigB = radeon_fence_signaled(fenceB);
+		if (sigA || sigB)
+			break;
+	}
+
+	if (!sigA && !sigB) {
+		DRM_ERROR("Neither fence A nor B has been signaled\n");
+		goto out_cleanup;
+	} else if (sigA && sigB) {
+		DRM_ERROR("Both fence A and B has been signaled\n");
+		goto out_cleanup;
+	}
+
+	DRM_INFO("Fence %c was first signaled\n", sigA ? 'A' : 'B');
+
+	r = radeon_ring_lock(rdev, ringC, 64);
+	if (r) {
+		DRM_ERROR("Failed to lock ring B %p\n", ringC);
+		goto out_cleanup;
+	}
+	radeon_semaphore_emit_signal(rdev, ridxC, semaphore);
+	radeon_ring_unlock_commit(rdev, ringC);
+
+	mdelay(1000);
+
+	r = radeon_fence_wait(fenceA, false);
+	if (r) {
+		DRM_ERROR("Failed to wait for sync fence A\n");
+		goto out_cleanup;
+	}
+	r = radeon_fence_wait(fenceB, false);
+	if (r) {
+		DRM_ERROR("Failed to wait for sync fence B\n");
+		goto out_cleanup;
+	}
+
+out_cleanup:
+	if (semaphore)
+		radeon_semaphore_free(rdev, semaphore);
+
+	if (fenceA)
+		radeon_fence_unref(&fenceA);
+
+	if (fenceB)
+		radeon_fence_unref(&fenceB);
+
+	if (r)
+		printk(KERN_WARNING "Error while testing ring sync (%d).\n", r);
+}
+
+void radeon_test_syncing(struct radeon_device *rdev)
+{
+	int i, j, k;
+
+	for (i = 1; i < RADEON_NUM_RINGS; ++i) {
+		struct radeon_ring *ringA = &rdev->ring[i];
+		if (!ringA->ready)
+			continue;
+
+		for (j = 0; j < i; ++j) {
+			struct radeon_ring *ringB = &rdev->ring[j];
+			if (!ringB->ready)
+				continue;
+
+			DRM_INFO("Testing syncing between rings %d and %d...\n", i, j);
+			radeon_test_ring_sync(rdev, ringA, ringB);
+
+			DRM_INFO("Testing syncing between rings %d and %d...\n", j, i);
+			radeon_test_ring_sync(rdev, ringB, ringA);
+
+			for (k = 0; k < j; ++k) {
+				struct radeon_ring *ringC = &rdev->ring[k];
+
+				DRM_INFO("Testing syncing between rings %d, %d and %d...\n", i, j, k);
+				radeon_test_ring_sync2(rdev, ringA, ringB, ringC);
+
+				DRM_INFO("Testing syncing between rings %d, %d and %d...\n", i, k, j);
+				radeon_test_ring_sync2(rdev, ringA, ringC, ringB);
+
+				DRM_INFO("Testing syncing between rings %d, %d and %d...\n", j, i, k);
+				radeon_test_ring_sync2(rdev, ringB, ringA, ringC);
+
+				DRM_INFO("Testing syncing between rings %d, %d and %d...\n", j, k, i);
+				radeon_test_ring_sync2(rdev, ringB, ringC, ringA);
+
+				DRM_INFO("Testing syncing between rings %d, %d and %d...\n", k, i, j);
+				radeon_test_ring_sync2(rdev, ringC, ringA, ringB);
+
+				DRM_INFO("Testing syncing between rings %d, %d and %d...\n", k, j, i);
+				radeon_test_ring_sync2(rdev, ringC, ringB, ringA);
+			}
+		}
+	}
+}

+ 4 - 4
drivers/gpu/drm/radeon/radeon_ttm.c

@@ -188,7 +188,7 @@ static void radeon_evict_flags(struct ttm_buffer_object *bo,
 	rbo = container_of(bo, struct radeon_bo, tbo);
 	switch (bo->mem.mem_type) {
 	case TTM_PL_VRAM:
-		if (rbo->rdev->cp.ready == false)
+		if (rbo->rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready == false)
 			radeon_ttm_placement_from_domain(rbo, RADEON_GEM_DOMAIN_CPU);
 		else
 			radeon_ttm_placement_from_domain(rbo, RADEON_GEM_DOMAIN_GTT);
@@ -226,7 +226,7 @@ static int radeon_move_blit(struct ttm_buffer_object *bo,
 	int r;
 
 	rdev = radeon_get_rdev(bo->bdev);
-	r = radeon_fence_create(rdev, &fence);
+	r = radeon_fence_create(rdev, &fence, RADEON_RING_TYPE_GFX_INDEX);
 	if (unlikely(r)) {
 		return r;
 	}
@@ -255,7 +255,7 @@ static int radeon_move_blit(struct ttm_buffer_object *bo,
 		DRM_ERROR("Unknown placement %d\n", old_mem->mem_type);
 		return -EINVAL;
 	}
-	if (!rdev->cp.ready) {
+	if (!rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready) {
 		DRM_ERROR("Trying to move memory with CP turned off.\n");
 		return -EINVAL;
 	}
@@ -380,7 +380,7 @@ static int radeon_bo_move(struct ttm_buffer_object *bo,
 		radeon_move_null(bo, new_mem);
 		return 0;
 	}
-	if (!rdev->cp.ready || rdev->asic->copy == NULL) {
+	if (!rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready || rdev->asic->copy == NULL) {
 		/* use memcpy */
 		goto memcpy;
 	}

+ 25 - 2
drivers/gpu/drm/radeon/rs400.c

@@ -410,6 +410,12 @@ static int rs400_startup(struct radeon_device *rdev)
 	if (r)
 		return r;
 
+	r = radeon_fence_driver_start_ring(rdev, RADEON_RING_TYPE_GFX_INDEX);
+	if (r) {
+		dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
+		return r;
+	}
+
 	/* Enable IRQ */
 	r100_irq_set(rdev);
 	rdev->config.r300.hdp_cntl = RREG32(RADEON_HOST_PATH_CNTL);
@@ -419,11 +425,18 @@ static int rs400_startup(struct radeon_device *rdev)
 		dev_err(rdev->dev, "failed initializing CP (%d).\n", r);
 		return r;
 	}
-	r = r100_ib_init(rdev);
+
+	r = radeon_ib_pool_start(rdev);
+	if (r)
+		return r;
+
+	r = r100_ib_test(rdev);
 	if (r) {
-		dev_err(rdev->dev, "failed initializing IB (%d).\n", r);
+		dev_err(rdev->dev, "failed testing IB (%d).\n", r);
+		rdev->accel_working = false;
 		return r;
 	}
+
 	return 0;
 }
 
@@ -447,11 +460,14 @@ int rs400_resume(struct radeon_device *rdev)
 	r300_clock_startup(rdev);
 	/* Initialize surface registers */
 	radeon_surface_init(rdev);
+
+	rdev->accel_working = true;
 	return rs400_startup(rdev);
 }
 
 int rs400_suspend(struct radeon_device *rdev)
 {
+	radeon_ib_pool_suspend(rdev);
 	r100_cp_disable(rdev);
 	radeon_wb_disable(rdev);
 	r100_irq_disable(rdev);
@@ -530,7 +546,14 @@ int rs400_init(struct radeon_device *rdev)
 	if (r)
 		return r;
 	r300_set_reg_safe(rdev);
+
+	r = radeon_ib_pool_init(rdev);
 	rdev->accel_working = true;
+	if (r) {
+		dev_err(rdev->dev, "IB initialization failed (%d).\n", r);
+		rdev->accel_working = false;
+	}
+
 	r = rs400_startup(rdev);
 	if (r) {
 		/* Somethings want wront with the accel init stop accel */

+ 28 - 6
drivers/gpu/drm/radeon/rs600.c

@@ -549,7 +549,7 @@ int rs600_irq_set(struct radeon_device *rdev)
 		WREG32(R_000040_GEN_INT_CNTL, 0);
 		return -EINVAL;
 	}
-	if (rdev->irq.sw_int) {
+	if (rdev->irq.sw_int[RADEON_RING_TYPE_GFX_INDEX]) {
 		tmp |= S_000040_SW_INT_EN(1);
 	}
 	if (rdev->irq.gui_idle) {
@@ -642,7 +642,7 @@ int rs600_irq_process(struct radeon_device *rdev)
 	while (status || rdev->irq.stat_regs.r500.disp_int) {
 		/* SW interrupt */
 		if (G_000044_SW_INT(status)) {
-			radeon_fence_process(rdev);
+			radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
 		}
 		/* GUI idle */
 		if (G_000040_GUI_IDLE(status)) {
@@ -849,6 +849,12 @@ static int rs600_startup(struct radeon_device *rdev)
 	if (r)
 		return r;
 
+	r = radeon_fence_driver_start_ring(rdev, RADEON_RING_TYPE_GFX_INDEX);
+	if (r) {
+		dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
+		return r;
+	}
+
 	/* Enable IRQ */
 	rs600_irq_set(rdev);
 	rdev->config.r300.hdp_cntl = RREG32(RADEON_HOST_PATH_CNTL);
@@ -858,15 +864,21 @@ static int rs600_startup(struct radeon_device *rdev)
 		dev_err(rdev->dev, "failed initializing CP (%d).\n", r);
 		return r;
 	}
-	r = r100_ib_init(rdev);
+
+	r = r600_audio_init(rdev);
 	if (r) {
-		dev_err(rdev->dev, "failed initializing IB (%d).\n", r);
+		dev_err(rdev->dev, "failed initializing audio\n");
 		return r;
 	}
 
-	r = r600_audio_init(rdev);
+	r = radeon_ib_pool_start(rdev);
+	if (r)
+		return r;
+
+	r = r100_ib_test(rdev);
 	if (r) {
-		dev_err(rdev->dev, "failed initializing audio\n");
+		dev_err(rdev->dev, "failed testing IB (%d).\n", r);
+		rdev->accel_working = false;
 		return r;
 	}
 
@@ -891,11 +903,14 @@ int rs600_resume(struct radeon_device *rdev)
 	rv515_clock_startup(rdev);
 	/* Initialize surface registers */
 	radeon_surface_init(rdev);
+
+	rdev->accel_working = true;
 	return rs600_startup(rdev);
 }
 
 int rs600_suspend(struct radeon_device *rdev)
 {
+	radeon_ib_pool_suspend(rdev);
 	r600_audio_fini(rdev);
 	r100_cp_disable(rdev);
 	radeon_wb_disable(rdev);
@@ -976,7 +991,14 @@ int rs600_init(struct radeon_device *rdev)
 	if (r)
 		return r;
 	rs600_set_safe_registers(rdev);
+
+	r = radeon_ib_pool_init(rdev);
 	rdev->accel_working = true;
+	if (r) {
+		dev_err(rdev->dev, "IB initialization failed (%d).\n", r);
+		rdev->accel_working = false;
+	}
+
 	r = rs600_startup(rdev);
 	if (r) {
 		/* Somethings want wront with the accel init stop accel */

+ 26 - 4
drivers/gpu/drm/radeon/rs690.c

@@ -621,6 +621,12 @@ static int rs690_startup(struct radeon_device *rdev)
 	if (r)
 		return r;
 
+	r = radeon_fence_driver_start_ring(rdev, RADEON_RING_TYPE_GFX_INDEX);
+	if (r) {
+		dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
+		return r;
+	}
+
 	/* Enable IRQ */
 	rs600_irq_set(rdev);
 	rdev->config.r300.hdp_cntl = RREG32(RADEON_HOST_PATH_CNTL);
@@ -630,15 +636,21 @@ static int rs690_startup(struct radeon_device *rdev)
 		dev_err(rdev->dev, "failed initializing CP (%d).\n", r);
 		return r;
 	}
-	r = r100_ib_init(rdev);
+
+	r = r600_audio_init(rdev);
 	if (r) {
-		dev_err(rdev->dev, "failed initializing IB (%d).\n", r);
+		dev_err(rdev->dev, "failed initializing audio\n");
 		return r;
 	}
 
-	r = r600_audio_init(rdev);
+	r = radeon_ib_pool_start(rdev);
+	if (r)
+		return r;
+
+	r = r100_ib_test(rdev);
 	if (r) {
-		dev_err(rdev->dev, "failed initializing audio\n");
+		dev_err(rdev->dev, "failed testing IB (%d).\n", r);
+		rdev->accel_working = false;
 		return r;
 	}
 
@@ -663,11 +675,14 @@ int rs690_resume(struct radeon_device *rdev)
 	rv515_clock_startup(rdev);
 	/* Initialize surface registers */
 	radeon_surface_init(rdev);
+
+	rdev->accel_working = true;
 	return rs690_startup(rdev);
 }
 
 int rs690_suspend(struct radeon_device *rdev)
 {
+	radeon_ib_pool_suspend(rdev);
 	r600_audio_fini(rdev);
 	r100_cp_disable(rdev);
 	radeon_wb_disable(rdev);
@@ -749,7 +764,14 @@ int rs690_init(struct radeon_device *rdev)
 	if (r)
 		return r;
 	rs600_set_safe_registers(rdev);
+
+	r = radeon_ib_pool_init(rdev);
 	rdev->accel_working = true;
+	if (r) {
+		dev_err(rdev->dev, "IB initialization failed (%d).\n", r);
+		rdev->accel_working = false;
+	}
+
 	r = rs690_startup(rdev);
 	if (r) {
 		/* Somethings want wront with the accel init stop accel */

+ 64 - 42
drivers/gpu/drm/radeon/rv515.c

@@ -55,44 +55,45 @@ void rv515_debugfs(struct radeon_device *rdev)
 
 void rv515_ring_start(struct radeon_device *rdev)
 {
+	struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
 	int r;
 
-	r = radeon_ring_lock(rdev, 64);
+	r = radeon_ring_lock(rdev, ring, 64);
 	if (r) {
 		return;
 	}
-	radeon_ring_write(rdev, PACKET0(ISYNC_CNTL, 0));
-	radeon_ring_write(rdev,
+	radeon_ring_write(ring, PACKET0(ISYNC_CNTL, 0));
+	radeon_ring_write(ring,
 			  ISYNC_ANY2D_IDLE3D |
 			  ISYNC_ANY3D_IDLE2D |
 			  ISYNC_WAIT_IDLEGUI |
 			  ISYNC_CPSCRATCH_IDLEGUI);
-	radeon_ring_write(rdev, PACKET0(WAIT_UNTIL, 0));
-	radeon_ring_write(rdev, WAIT_2D_IDLECLEAN | WAIT_3D_IDLECLEAN);
-	radeon_ring_write(rdev, PACKET0(R300_DST_PIPE_CONFIG, 0));
-	radeon_ring_write(rdev, R300_PIPE_AUTO_CONFIG);
-	radeon_ring_write(rdev, PACKET0(GB_SELECT, 0));
-	radeon_ring_write(rdev, 0);
-	radeon_ring_write(rdev, PACKET0(GB_ENABLE, 0));
-	radeon_ring_write(rdev, 0);
-	radeon_ring_write(rdev, PACKET0(R500_SU_REG_DEST, 0));
-	radeon_ring_write(rdev, (1 << rdev->num_gb_pipes) - 1);
-	radeon_ring_write(rdev, PACKET0(VAP_INDEX_OFFSET, 0));
-	radeon_ring_write(rdev, 0);
-	radeon_ring_write(rdev, PACKET0(RB3D_DSTCACHE_CTLSTAT, 0));
-	radeon_ring_write(rdev, RB3D_DC_FLUSH | RB3D_DC_FREE);
-	radeon_ring_write(rdev, PACKET0(ZB_ZCACHE_CTLSTAT, 0));
-	radeon_ring_write(rdev, ZC_FLUSH | ZC_FREE);
-	radeon_ring_write(rdev, PACKET0(WAIT_UNTIL, 0));
-	radeon_ring_write(rdev, WAIT_2D_IDLECLEAN | WAIT_3D_IDLECLEAN);
-	radeon_ring_write(rdev, PACKET0(GB_AA_CONFIG, 0));
-	radeon_ring_write(rdev, 0);
-	radeon_ring_write(rdev, PACKET0(RB3D_DSTCACHE_CTLSTAT, 0));
-	radeon_ring_write(rdev, RB3D_DC_FLUSH | RB3D_DC_FREE);
-	radeon_ring_write(rdev, PACKET0(ZB_ZCACHE_CTLSTAT, 0));
-	radeon_ring_write(rdev, ZC_FLUSH | ZC_FREE);
-	radeon_ring_write(rdev, PACKET0(GB_MSPOS0, 0));
-	radeon_ring_write(rdev,
+	radeon_ring_write(ring, PACKET0(WAIT_UNTIL, 0));
+	radeon_ring_write(ring, WAIT_2D_IDLECLEAN | WAIT_3D_IDLECLEAN);
+	radeon_ring_write(ring, PACKET0(R300_DST_PIPE_CONFIG, 0));
+	radeon_ring_write(ring, R300_PIPE_AUTO_CONFIG);
+	radeon_ring_write(ring, PACKET0(GB_SELECT, 0));
+	radeon_ring_write(ring, 0);
+	radeon_ring_write(ring, PACKET0(GB_ENABLE, 0));
+	radeon_ring_write(ring, 0);
+	radeon_ring_write(ring, PACKET0(R500_SU_REG_DEST, 0));
+	radeon_ring_write(ring, (1 << rdev->num_gb_pipes) - 1);
+	radeon_ring_write(ring, PACKET0(VAP_INDEX_OFFSET, 0));
+	radeon_ring_write(ring, 0);
+	radeon_ring_write(ring, PACKET0(RB3D_DSTCACHE_CTLSTAT, 0));
+	radeon_ring_write(ring, RB3D_DC_FLUSH | RB3D_DC_FREE);
+	radeon_ring_write(ring, PACKET0(ZB_ZCACHE_CTLSTAT, 0));
+	radeon_ring_write(ring, ZC_FLUSH | ZC_FREE);
+	radeon_ring_write(ring, PACKET0(WAIT_UNTIL, 0));
+	radeon_ring_write(ring, WAIT_2D_IDLECLEAN | WAIT_3D_IDLECLEAN);
+	radeon_ring_write(ring, PACKET0(GB_AA_CONFIG, 0));
+	radeon_ring_write(ring, 0);
+	radeon_ring_write(ring, PACKET0(RB3D_DSTCACHE_CTLSTAT, 0));
+	radeon_ring_write(ring, RB3D_DC_FLUSH | RB3D_DC_FREE);
+	radeon_ring_write(ring, PACKET0(ZB_ZCACHE_CTLSTAT, 0));
+	radeon_ring_write(ring, ZC_FLUSH | ZC_FREE);
+	radeon_ring_write(ring, PACKET0(GB_MSPOS0, 0));
+	radeon_ring_write(ring,
 			  ((6 << MS_X0_SHIFT) |
 			   (6 << MS_Y0_SHIFT) |
 			   (6 << MS_X1_SHIFT) |
@@ -101,8 +102,8 @@ void rv515_ring_start(struct radeon_device *rdev)
 			   (6 << MS_Y2_SHIFT) |
 			   (6 << MSBD0_Y_SHIFT) |
 			   (6 << MSBD0_X_SHIFT)));
-	radeon_ring_write(rdev, PACKET0(GB_MSPOS1, 0));
-	radeon_ring_write(rdev,
+	radeon_ring_write(ring, PACKET0(GB_MSPOS1, 0));
+	radeon_ring_write(ring,
 			  ((6 << MS_X3_SHIFT) |
 			   (6 << MS_Y3_SHIFT) |
 			   (6 << MS_X4_SHIFT) |
@@ -110,15 +111,15 @@ void rv515_ring_start(struct radeon_device *rdev)
 			   (6 << MS_X5_SHIFT) |
 			   (6 << MS_Y5_SHIFT) |
 			   (6 << MSBD1_SHIFT)));
-	radeon_ring_write(rdev, PACKET0(GA_ENHANCE, 0));
-	radeon_ring_write(rdev, GA_DEADLOCK_CNTL | GA_FASTSYNC_CNTL);
-	radeon_ring_write(rdev, PACKET0(GA_POLY_MODE, 0));
-	radeon_ring_write(rdev, FRONT_PTYPE_TRIANGE | BACK_PTYPE_TRIANGE);
-	radeon_ring_write(rdev, PACKET0(GA_ROUND_MODE, 0));
-	radeon_ring_write(rdev, GEOMETRY_ROUND_NEAREST | COLOR_ROUND_NEAREST);
-	radeon_ring_write(rdev, PACKET0(0x20C8, 0));
-	radeon_ring_write(rdev, 0);
-	radeon_ring_unlock_commit(rdev);
+	radeon_ring_write(ring, PACKET0(GA_ENHANCE, 0));
+	radeon_ring_write(ring, GA_DEADLOCK_CNTL | GA_FASTSYNC_CNTL);
+	radeon_ring_write(ring, PACKET0(GA_POLY_MODE, 0));
+	radeon_ring_write(ring, FRONT_PTYPE_TRIANGE | BACK_PTYPE_TRIANGE);
+	radeon_ring_write(ring, PACKET0(GA_ROUND_MODE, 0));
+	radeon_ring_write(ring, GEOMETRY_ROUND_NEAREST | COLOR_ROUND_NEAREST);
+	radeon_ring_write(ring, PACKET0(0x20C8, 0));
+	radeon_ring_write(ring, 0);
+	radeon_ring_unlock_commit(rdev, ring);
 }
 
 int rv515_mc_wait_for_idle(struct radeon_device *rdev)
@@ -392,6 +393,12 @@ static int rv515_startup(struct radeon_device *rdev)
 	if (r)
 		return r;
 
+	r = radeon_fence_driver_start_ring(rdev, RADEON_RING_TYPE_GFX_INDEX);
+	if (r) {
+		dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
+		return r;
+	}
+
 	/* Enable IRQ */
 	rs600_irq_set(rdev);
 	rdev->config.r300.hdp_cntl = RREG32(RADEON_HOST_PATH_CNTL);
@@ -401,9 +408,15 @@ static int rv515_startup(struct radeon_device *rdev)
 		dev_err(rdev->dev, "failed initializing CP (%d).\n", r);
 		return r;
 	}
-	r = r100_ib_init(rdev);
+
+	r = radeon_ib_pool_start(rdev);
+	if (r)
+		return r;
+
+	r = r100_ib_test(rdev);
 	if (r) {
-		dev_err(rdev->dev, "failed initializing IB (%d).\n", r);
+		dev_err(rdev->dev, "failed testing IB (%d).\n", r);
+		rdev->accel_working = false;
 		return r;
 	}
 	return 0;
@@ -428,6 +441,8 @@ int rv515_resume(struct radeon_device *rdev)
 	rv515_clock_startup(rdev);
 	/* Initialize surface registers */
 	radeon_surface_init(rdev);
+
+	rdev->accel_working = true;
 	return rv515_startup(rdev);
 }
 
@@ -524,7 +539,14 @@ int rv515_init(struct radeon_device *rdev)
 	if (r)
 		return r;
 	rv515_set_safe_registers(rdev);
+
+	r = radeon_ib_pool_init(rdev);
 	rdev->accel_working = true;
+	if (r) {
+		dev_err(rdev->dev, "IB initialization failed (%d).\n", r);
+		rdev->accel_working = false;
+	}
+
 	r = rv515_startup(rdev);
 	if (r) {
 		/* Somethings want wront with the accel init stop accel */

+ 37 - 26
drivers/gpu/drm/radeon/rv770.c

@@ -357,7 +357,7 @@ static int rv770_cp_load_microcode(struct radeon_device *rdev)
 void r700_cp_fini(struct radeon_device *rdev)
 {
 	r700_cp_stop(rdev);
-	radeon_ring_fini(rdev);
+	radeon_ring_fini(rdev, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
 }
 
 /*
@@ -1043,6 +1043,7 @@ int rv770_mc_init(struct radeon_device *rdev)
 
 static int rv770_startup(struct radeon_device *rdev)
 {
+	struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
 	int r;
 
 	/* enable pcie gen2 link */
@@ -1082,6 +1083,12 @@ static int rv770_startup(struct radeon_device *rdev)
 	if (r)
 		return r;
 
+	r = radeon_fence_driver_start_ring(rdev, RADEON_RING_TYPE_GFX_INDEX);
+	if (r) {
+		dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
+		return r;
+	}
+
 	/* Enable IRQ */
 	r = r600_irq_init(rdev);
 	if (r) {
@@ -1091,7 +1098,9 @@ static int rv770_startup(struct radeon_device *rdev)
 	}
 	r600_irq_set(rdev);
 
-	r = radeon_ring_init(rdev, rdev->cp.ring_size);
+	r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP_RPTR_OFFSET,
+			     R600_CP_RB_RPTR, R600_CP_RB_WPTR,
+			     0, 0xfffff, RADEON_CP_PACKET2);
 	if (r)
 		return r;
 	r = rv770_cp_load_microcode(rdev);
@@ -1101,6 +1110,17 @@ static int rv770_startup(struct radeon_device *rdev)
 	if (r)
 		return r;
 
+	r = radeon_ib_pool_start(rdev);
+	if (r)
+		return r;
+
+	r = r600_ib_test(rdev, RADEON_RING_TYPE_GFX_INDEX);
+	if (r) {
+		dev_err(rdev->dev, "IB test failed (%d).\n", r);
+		rdev->accel_working = false;
+		return r;
+	}
+
 	return 0;
 }
 
@@ -1115,18 +1135,13 @@ int rv770_resume(struct radeon_device *rdev)
 	/* post card */
 	atom_asic_init(rdev->mode_info.atom_context);
 
+	rdev->accel_working = true;
 	r = rv770_startup(rdev);
 	if (r) {
 		DRM_ERROR("r600 startup failed on resume\n");
 		return r;
 	}
 
-	r = r600_ib_test(rdev);
-	if (r) {
-		DRM_ERROR("radeon: failed testing IB (%d).\n", r);
-		return r;
-	}
-
 	r = r600_audio_init(rdev);
 	if (r) {
 		dev_err(rdev->dev, "radeon: audio init failed\n");
@@ -1140,13 +1155,14 @@ int rv770_resume(struct radeon_device *rdev)
 int rv770_suspend(struct radeon_device *rdev)
 {
 	r600_audio_fini(rdev);
+	radeon_ib_pool_suspend(rdev);
+	r600_blit_suspend(rdev);
 	/* FIXME: we should wait for ring to be empty */
 	r700_cp_stop(rdev);
-	rdev->cp.ready = false;
+	rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
 	r600_irq_suspend(rdev);
 	radeon_wb_disable(rdev);
 	rv770_pcie_gart_disable(rdev);
-	r600_blit_suspend(rdev);
 
 	return 0;
 }
@@ -1215,8 +1231,8 @@ int rv770_init(struct radeon_device *rdev)
 	if (r)
 		return r;
 
-	rdev->cp.ring_obj = NULL;
-	r600_ring_init(rdev, 1024 * 1024);
+	rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ring_obj = NULL;
+	r600_ring_init(rdev, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX], 1024 * 1024);
 
 	rdev->ih.ring_obj = NULL;
 	r600_ih_ring_init(rdev, 64 * 1024);
@@ -1225,30 +1241,24 @@ int rv770_init(struct radeon_device *rdev)
 	if (r)
 		return r;
 
+	r = radeon_ib_pool_init(rdev);
 	rdev->accel_working = true;
+	if (r) {
+		dev_err(rdev->dev, "IB initialization failed (%d).\n", r);
+		rdev->accel_working = false;
+	}
+
 	r = rv770_startup(rdev);
 	if (r) {
 		dev_err(rdev->dev, "disabling GPU acceleration\n");
 		r700_cp_fini(rdev);
 		r600_irq_fini(rdev);
 		radeon_wb_fini(rdev);
+		r100_ib_fini(rdev);
 		radeon_irq_kms_fini(rdev);
 		rv770_pcie_gart_fini(rdev);
 		rdev->accel_working = false;
 	}
-	if (rdev->accel_working) {
-		r = radeon_ib_pool_init(rdev);
-		if (r) {
-			dev_err(rdev->dev, "IB initialization failed (%d).\n", r);
-			rdev->accel_working = false;
-		} else {
-			r = r600_ib_test(rdev);
-			if (r) {
-				dev_err(rdev->dev, "IB test failed (%d).\n", r);
-				rdev->accel_working = false;
-			}
-		}
-	}
 
 	r = r600_audio_init(rdev);
 	if (r) {
@@ -1265,11 +1275,12 @@ void rv770_fini(struct radeon_device *rdev)
 	r700_cp_fini(rdev);
 	r600_irq_fini(rdev);
 	radeon_wb_fini(rdev);
-	radeon_ib_pool_fini(rdev);
+	r100_ib_fini(rdev);
 	radeon_irq_kms_fini(rdev);
 	rv770_pcie_gart_fini(rdev);
 	r600_vram_scratch_fini(rdev);
 	radeon_gem_fini(rdev);
+	radeon_semaphore_driver_fini(rdev);
 	radeon_fence_driver_fini(rdev);
 	radeon_agp_fini(rdev);
 	radeon_bo_fini(rdev);