浏览代码

Merge branch 'drm-nouveau-next' of git://anongit.freedesktop.org/git/nouveau/linux-2.6 into drm-core-next

Ben was distracted:

"Apologies for being really late with this, feel free to bash me in the
future so I remember on time!

Overview:
- improvements to reclocking (especially memory) on nva3+
- kepler accel support (if you have blob ucode)
- better inter-channel synchronisation on nv84+
- async ttm buffer moves on nv84+ (earlier cards don't have a non-PGRAPH
engine that's useful)"

* 'drm-nouveau-next' of git://anongit.freedesktop.org/git/nouveau/linux-2.6: (60 commits)
  drm/nouveau/nvd9: Fix GPIO initialisation sequence.
  drm/nouveau: Unregister switcheroo client on exit
  drm/nouveau: Check dsm on switcheroo unregister
  drm/nouveau: fix a minor annoyance in an output string
  drm/nouveau: turn a BUG into a WARN
  drm/nv50: decode PGRAPH DATA_ERROR = 0x24
  drm/nouveau/disp: fix dithering not being enabled on some eDP macbooks
  drm/nvd9/copy: initialise copy engine, seems to work like nvc0
  drm/nvc0/ttm: use copy engines for async buffer moves
  drm/nva3/ttm: use copy engine for async buffer moves
  drm/nv98/ttm: add in a (disabled) crypto engine buffer copy method
  drm/nv84/ttm: use crypto engine for async buffer copies
  drm/nouveau/ttm: untangle code to support accelerated buffer moves
  drm/nouveau/fbcon: use fence for sync, rather than notifier
  drm/nv98/crypt: non-stub implementation of the engine hooks
  drm/nouveau/fifo: turn all fifo modules into engine modules
  drm/nv50/graph: remove ability to do interrupt-driven context switching
  drm/nv50: remove manual context unload on context destruction
  drm/nv50: remove execution engine context saves on suspend
  drm/nv50/fifo: use hardware channel kickoff functionality
  ...
Dave Airlie 13 年之前
父节点
当前提交
8c914028f5
共有 76 个文件被更改,包括 9520 次插入3026 次删除
  1. 6 3
      drivers/gpu/drm/nouveau/Makefile
  2. 2 1
      drivers/gpu/drm/nouveau/nouveau_acpi.c
  3. 31 20
      drivers/gpu/drm/nouveau/nouveau_bios.c
  4. 297 44
      drivers/gpu/drm/nouveau/nouveau_bo.c
  5. 43 43
      drivers/gpu/drm/nouveau/nouveau_channel.c
  6. 7 1
      drivers/gpu/drm/nouveau/nouveau_connector.c
  7. 0 2
      drivers/gpu/drm/nouveau/nouveau_debugfs.c
  8. 25 13
      drivers/gpu/drm/nouveau/nouveau_display.c
  9. 27 8
      drivers/gpu/drm/nouveau/nouveau_dma.h
  10. 5 10
      drivers/gpu/drm/nouveau/nouveau_drv.c
  11. 17 149
      drivers/gpu/drm/nouveau/nouveau_drv.h
  12. 2 32
      drivers/gpu/drm/nouveau/nouveau_fbcon.c
  13. 88 490
      drivers/gpu/drm/nouveau/nouveau_fence.c
  14. 52 0
      drivers/gpu/drm/nouveau/nouveau_fence.h
  15. 32 0
      drivers/gpu/drm/nouveau/nouveau_fifo.h
  16. 3 2
      drivers/gpu/drm/nouveau/nouveau_gem.c
  17. 1 1
      drivers/gpu/drm/nouveau/nouveau_gpio.c
  18. 0 4
      drivers/gpu/drm/nouveau/nouveau_grctx.h
  19. 0 5
      drivers/gpu/drm/nouveau/nouveau_hw.c
  20. 11 6
      drivers/gpu/drm/nouveau/nouveau_mem.c
  21. 19 196
      drivers/gpu/drm/nouveau/nouveau_object.c
  22. 1 1
      drivers/gpu/drm/nouveau/nouveau_perf.c
  23. 3 1
      drivers/gpu/drm/nouveau/nouveau_pm.h
  24. 2 2
      drivers/gpu/drm/nouveau/nouveau_sgdma.c
  25. 69 0
      drivers/gpu/drm/nouveau/nouveau_software.h
  26. 93 167
      drivers/gpu/drm/nouveau/nouveau_state.c
  27. 11 0
      drivers/gpu/drm/nouveau/nv04_display.c
  28. 24 24
      drivers/gpu/drm/nouveau/nv04_fbcon.c
  29. 140 0
      drivers/gpu/drm/nouveau/nv04_fence.c
  30. 191 228
      drivers/gpu/drm/nouveau/nv04_fifo.c
  31. 6 33
      drivers/gpu/drm/nouveau/nv04_graph.c
  32. 12 11
      drivers/gpu/drm/nouveau/nv04_instmem.c
  33. 147 0
      drivers/gpu/drm/nouveau/nv04_software.c
  34. 214 0
      drivers/gpu/drm/nouveau/nv10_fence.c
  35. 85 193
      drivers/gpu/drm/nouveau/nv10_fifo.c
  36. 4 9
      drivers/gpu/drm/nouveau/nv10_graph.c
  37. 177 0
      drivers/gpu/drm/nouveau/nv17_fifo.c
  38. 1 7
      drivers/gpu/drm/nouveau/nv20_graph.c
  39. 3 1
      drivers/gpu/drm/nouveau/nv31_mpeg.c
  40. 127 224
      drivers/gpu/drm/nouveau/nv40_fifo.c
  41. 7 30
      drivers/gpu/drm/nouveau/nv40_graph.c
  42. 30 2
      drivers/gpu/drm/nouveau/nv40_grctx.c
  43. 1 0
      drivers/gpu/drm/nouveau/nv40_pm.c
  44. 44 54
      drivers/gpu/drm/nouveau/nv50_crtc.c
  45. 6 6
      drivers/gpu/drm/nouveau/nv50_cursor.c
  46. 3 3
      drivers/gpu/drm/nouveau/nv50_dac.c
  47. 33 42
      drivers/gpu/drm/nouveau/nv50_display.c
  48. 1 0
      drivers/gpu/drm/nouveau/nv50_display.h
  49. 3 1
      drivers/gpu/drm/nouveau/nv50_fb.c
  50. 29 30
      drivers/gpu/drm/nouveau/nv50_fbcon.c
  51. 192 404
      drivers/gpu/drm/nouveau/nv50_fifo.c
  52. 9 220
      drivers/gpu/drm/nouveau/nv50_graph.c
  53. 30 3
      drivers/gpu/drm/nouveau/nv50_grctx.c
  54. 1 1
      drivers/gpu/drm/nouveau/nv50_instmem.c
  55. 2 17
      drivers/gpu/drm/nouveau/nv50_mpeg.c
  56. 214 0
      drivers/gpu/drm/nouveau/nv50_software.c
  57. 3 3
      drivers/gpu/drm/nouveau/nv50_sor.c
  58. 0 2
      drivers/gpu/drm/nouveau/nv50_vm.c
  59. 177 0
      drivers/gpu/drm/nouveau/nv84_fence.c
  60. 241 0
      drivers/gpu/drm/nouveau/nv84_fifo.c
  61. 152 14
      drivers/gpu/drm/nouveau/nv98_crypt.c
  62. 698 0
      drivers/gpu/drm/nouveau/nv98_crypt.fuc
  63. 584 0
      drivers/gpu/drm/nouveau/nv98_crypt.fuc.h
  64. 4 27
      drivers/gpu/drm/nouveau/nva3_copy.c
  65. 272 18
      drivers/gpu/drm/nouveau/nva3_pm.c
  66. 27 27
      drivers/gpu/drm/nouveau/nvc0_fbcon.c
  67. 184 0
      drivers/gpu/drm/nouveau/nvc0_fence.c
  68. 125 185
      drivers/gpu/drm/nouveau/nvc0_fifo.c
  69. 3 1
      drivers/gpu/drm/nouveau/nvc0_graph.c
  70. 189 0
      drivers/gpu/drm/nouveau/nvc0_pm.c
  71. 153 0
      drivers/gpu/drm/nouveau/nvc0_software.c
  72. 5 5
      drivers/gpu/drm/nouveau/nvd0_display.c
  73. 423 0
      drivers/gpu/drm/nouveau/nve0_fifo.c
  74. 831 0
      drivers/gpu/drm/nouveau/nve0_graph.c
  75. 89 0
      drivers/gpu/drm/nouveau/nve0_graph.h
  76. 2777 0
      drivers/gpu/drm/nouveau/nve0_grctx.c

+ 6 - 3
drivers/gpu/drm/nouveau/Makefile

@@ -16,10 +16,13 @@ nouveau-y := nouveau_drv.o nouveau_state.o nouveau_channel.o nouveau_mem.o \
              nv04_mc.o nv40_mc.o nv50_mc.o \
              nv04_fb.o nv10_fb.o nv20_fb.o nv30_fb.o nv40_fb.o \
              nv50_fb.o nvc0_fb.o \
-             nv04_fifo.o nv10_fifo.o nv40_fifo.o nv50_fifo.o nvc0_fifo.o \
+             nv04_fifo.o nv10_fifo.o nv17_fifo.o nv40_fifo.o nv50_fifo.o \
+             nv84_fifo.o nvc0_fifo.o nve0_fifo.o \
+             nv04_fence.o nv10_fence.o nv84_fence.o nvc0_fence.o \
+             nv04_software.o nv50_software.o nvc0_software.o \
              nv04_graph.o nv10_graph.o nv20_graph.o \
-             nv40_graph.o nv50_graph.o nvc0_graph.o \
-             nv40_grctx.o nv50_grctx.o nvc0_grctx.o \
+             nv40_graph.o nv50_graph.o nvc0_graph.o nve0_graph.o \
+             nv40_grctx.o nv50_grctx.o nvc0_grctx.o nve0_grctx.o \
              nv84_crypt.o nv98_crypt.o \
              nva3_copy.o nvc0_copy.o \
              nv31_mpeg.o nv50_mpeg.o \

+ 2 - 1
drivers/gpu/drm/nouveau/nouveau_acpi.c

@@ -338,7 +338,8 @@ void nouveau_switcheroo_optimus_dsm(void)
 
 void nouveau_unregister_dsm_handler(void)
 {
-	vga_switcheroo_unregister_handler();
+	if (nouveau_dsm_priv.optimus_detected || nouveau_dsm_priv.dsm_detected)
+		vga_switcheroo_unregister_handler();
 }
 
 /* retrieve the ROM in 4k blocks */

+ 31 - 20
drivers/gpu/drm/nouveau/nouveau_bios.c

@@ -30,6 +30,7 @@
 #include "nouveau_gpio.h"
 
 #include <linux/io-mapping.h>
+#include <linux/firmware.h>
 
 /* these defines are made up */
 #define NV_CIO_CRE_44_HEADA 0x0
@@ -195,35 +196,24 @@ static void
 bios_shadow_acpi(struct nvbios *bios)
 {
 	struct pci_dev *pdev = bios->dev->pdev;
-	int ptr, len, ret;
-	u8 data[3];
+	int cnt = 65536 / ROM_BIOS_PAGE;
+	int ret;
 
 	if (!nouveau_acpi_rom_supported(pdev))
 		return;
 
-	ret = nouveau_acpi_get_bios_chunk(data, 0, sizeof(data));
-	if (ret != sizeof(data))
-		return;
-
-	bios->length = min(data[2] * 512, 65536);
-	bios->data = kmalloc(bios->length, GFP_KERNEL);
+	bios->data = kmalloc(cnt * ROM_BIOS_PAGE, GFP_KERNEL);
 	if (!bios->data)
 		return;
 
-	len = bios->length;
-	ptr = 0;
-	while (len) {
-		int size = (len > ROM_BIOS_PAGE) ? ROM_BIOS_PAGE : len;
-
-		ret = nouveau_acpi_get_bios_chunk(bios->data, ptr, size);
-		if (ret != size) {
-			kfree(bios->data);
-			bios->data = NULL;
+	bios->length = 0;
+	while (cnt--) {
+		ret = nouveau_acpi_get_bios_chunk(bios->data, bios->length,
+						  ROM_BIOS_PAGE);
+		if (ret != ROM_BIOS_PAGE)
 			return;
-		}
 
-		len -= size;
-		ptr += size;
+		bios->length += ROM_BIOS_PAGE;
 	}
 }
 
@@ -249,8 +239,12 @@ bios_shadow(struct drm_device *dev)
 	struct drm_nouveau_private *dev_priv = dev->dev_private;
 	struct nvbios *bios = &dev_priv->vbios;
 	struct methods *mthd, *best;
+	const struct firmware *fw;
+	char fname[32];
+	int ret;
 
 	if (nouveau_vbios) {
+		/* try to match one of the built-in methods */
 		mthd = shadow_methods;
 		do {
 			if (strcasecmp(nouveau_vbios, mthd->desc))
@@ -263,6 +257,22 @@ bios_shadow(struct drm_device *dev)
 				return true;
 		} while ((++mthd)->shadow);
 
+		/* attempt to load firmware image */
+		snprintf(fname, sizeof(fname), "nouveau/%s", nouveau_vbios);
+		ret = request_firmware(&fw, fname, &dev->pdev->dev);
+		if (ret == 0) {
+			bios->length = fw->size;
+			bios->data   = kmemdup(fw->data, fw->size, GFP_KERNEL);
+			release_firmware(fw);
+
+			NV_INFO(dev, "VBIOS image: %s\n", nouveau_vbios);
+			if (score_vbios(bios, 1))
+				return true;
+
+			kfree(bios->data);
+			bios->data = NULL;
+		}
+
 		NV_ERROR(dev, "VBIOS source \'%s\' invalid\n", nouveau_vbios);
 	}
 
@@ -273,6 +283,7 @@ bios_shadow(struct drm_device *dev)
 		mthd->score = score_vbios(bios, mthd->rw);
 		mthd->size = bios->length;
 		mthd->data = bios->data;
+		bios->data = NULL;
 	} while (mthd->score != 3 && (++mthd)->shadow);
 
 	mthd = shadow_methods;

+ 297 - 44
drivers/gpu/drm/nouveau/nouveau_bo.c

@@ -35,6 +35,8 @@
 #include "nouveau_dma.h"
 #include "nouveau_mm.h"
 #include "nouveau_vm.h"
+#include "nouveau_fence.h"
+#include "nouveau_ramht.h"
 
 #include <linux/log2.h>
 #include <linux/slab.h>
@@ -478,7 +480,7 @@ nouveau_bo_move_accel_cleanup(struct nouveau_channel *chan,
 	struct nouveau_fence *fence = NULL;
 	int ret;
 
-	ret = nouveau_fence_new(chan, &fence, true);
+	ret = nouveau_fence_new(chan, &fence);
 	if (ret)
 		return ret;
 
@@ -488,6 +490,76 @@ nouveau_bo_move_accel_cleanup(struct nouveau_channel *chan,
 	return ret;
 }
 
+static int
+nve0_bo_move_copy(struct nouveau_channel *chan, struct ttm_buffer_object *bo,
+		  struct ttm_mem_reg *old_mem, struct ttm_mem_reg *new_mem)
+{
+	struct nouveau_mem *node = old_mem->mm_node;
+	int ret = RING_SPACE(chan, 10);
+	if (ret == 0) {
+		BEGIN_NVC0(chan, NvSubCopy, 0x0400, 8);
+		OUT_RING  (chan, upper_32_bits(node->vma[0].offset));
+		OUT_RING  (chan, lower_32_bits(node->vma[0].offset));
+		OUT_RING  (chan, upper_32_bits(node->vma[1].offset));
+		OUT_RING  (chan, lower_32_bits(node->vma[1].offset));
+		OUT_RING  (chan, PAGE_SIZE);
+		OUT_RING  (chan, PAGE_SIZE);
+		OUT_RING  (chan, PAGE_SIZE);
+		OUT_RING  (chan, new_mem->num_pages);
+		BEGIN_IMC0(chan, NvSubCopy, 0x0300, 0x0386);
+	}
+	return ret;
+}
+
+static int
+nvc0_bo_move_init(struct nouveau_channel *chan, u32 handle)
+{
+	int ret = RING_SPACE(chan, 2);
+	if (ret == 0) {
+		BEGIN_NVC0(chan, NvSubCopy, 0x0000, 1);
+		OUT_RING  (chan, handle);
+	}
+	return ret;
+}
+
+static int
+nvc0_bo_move_copy(struct nouveau_channel *chan, struct ttm_buffer_object *bo,
+		  struct ttm_mem_reg *old_mem, struct ttm_mem_reg *new_mem)
+{
+	struct nouveau_mem *node = old_mem->mm_node;
+	u64 src_offset = node->vma[0].offset;
+	u64 dst_offset = node->vma[1].offset;
+	u32 page_count = new_mem->num_pages;
+	int ret;
+
+	page_count = new_mem->num_pages;
+	while (page_count) {
+		int line_count = (page_count > 8191) ? 8191 : page_count;
+
+		ret = RING_SPACE(chan, 11);
+		if (ret)
+			return ret;
+
+		BEGIN_NVC0(chan, NvSubCopy, 0x030c, 8);
+		OUT_RING  (chan, upper_32_bits(src_offset));
+		OUT_RING  (chan, lower_32_bits(src_offset));
+		OUT_RING  (chan, upper_32_bits(dst_offset));
+		OUT_RING  (chan, lower_32_bits(dst_offset));
+		OUT_RING  (chan, PAGE_SIZE);
+		OUT_RING  (chan, PAGE_SIZE);
+		OUT_RING  (chan, PAGE_SIZE);
+		OUT_RING  (chan, line_count);
+		BEGIN_NVC0(chan, NvSubCopy, 0x0300, 1);
+		OUT_RING  (chan, 0x00000110);
+
+		page_count -= line_count;
+		src_offset += (PAGE_SIZE * line_count);
+		dst_offset += (PAGE_SIZE * line_count);
+	}
+
+	return 0;
+}
+
 static int
 nvc0_bo_move_m2mf(struct nouveau_channel *chan, struct ttm_buffer_object *bo,
 		  struct ttm_mem_reg *old_mem, struct ttm_mem_reg *new_mem)
@@ -506,17 +578,17 @@ nvc0_bo_move_m2mf(struct nouveau_channel *chan, struct ttm_buffer_object *bo,
 		if (ret)
 			return ret;
 
-		BEGIN_NVC0(chan, 2, NvSubM2MF, 0x0238, 2);
+		BEGIN_NVC0(chan, NvSubCopy, 0x0238, 2);
 		OUT_RING  (chan, upper_32_bits(dst_offset));
 		OUT_RING  (chan, lower_32_bits(dst_offset));
-		BEGIN_NVC0(chan, 2, NvSubM2MF, 0x030c, 6);
+		BEGIN_NVC0(chan, NvSubCopy, 0x030c, 6);
 		OUT_RING  (chan, upper_32_bits(src_offset));
 		OUT_RING  (chan, lower_32_bits(src_offset));
 		OUT_RING  (chan, PAGE_SIZE); /* src_pitch */
 		OUT_RING  (chan, PAGE_SIZE); /* dst_pitch */
 		OUT_RING  (chan, PAGE_SIZE); /* line_length */
 		OUT_RING  (chan, line_count);
-		BEGIN_NVC0(chan, 2, NvSubM2MF, 0x0300, 1);
+		BEGIN_NVC0(chan, NvSubCopy, 0x0300, 1);
 		OUT_RING  (chan, 0x00100110);
 
 		page_count -= line_count;
@@ -527,6 +599,102 @@ nvc0_bo_move_m2mf(struct nouveau_channel *chan, struct ttm_buffer_object *bo,
 	return 0;
 }
 
+static int
+nva3_bo_move_copy(struct nouveau_channel *chan, struct ttm_buffer_object *bo,
+		  struct ttm_mem_reg *old_mem, struct ttm_mem_reg *new_mem)
+{
+	struct nouveau_mem *node = old_mem->mm_node;
+	u64 src_offset = node->vma[0].offset;
+	u64 dst_offset = node->vma[1].offset;
+	u32 page_count = new_mem->num_pages;
+	int ret;
+
+	page_count = new_mem->num_pages;
+	while (page_count) {
+		int line_count = (page_count > 8191) ? 8191 : page_count;
+
+		ret = RING_SPACE(chan, 11);
+		if (ret)
+			return ret;
+
+		BEGIN_NV04(chan, NvSubCopy, 0x030c, 8);
+		OUT_RING  (chan, upper_32_bits(src_offset));
+		OUT_RING  (chan, lower_32_bits(src_offset));
+		OUT_RING  (chan, upper_32_bits(dst_offset));
+		OUT_RING  (chan, lower_32_bits(dst_offset));
+		OUT_RING  (chan, PAGE_SIZE);
+		OUT_RING  (chan, PAGE_SIZE);
+		OUT_RING  (chan, PAGE_SIZE);
+		OUT_RING  (chan, line_count);
+		BEGIN_NV04(chan, NvSubCopy, 0x0300, 1);
+		OUT_RING  (chan, 0x00000110);
+
+		page_count -= line_count;
+		src_offset += (PAGE_SIZE * line_count);
+		dst_offset += (PAGE_SIZE * line_count);
+	}
+
+	return 0;
+}
+
+static int
+nv98_bo_move_exec(struct nouveau_channel *chan, struct ttm_buffer_object *bo,
+		  struct ttm_mem_reg *old_mem, struct ttm_mem_reg *new_mem)
+{
+	struct nouveau_mem *node = old_mem->mm_node;
+	int ret = RING_SPACE(chan, 7);
+	if (ret == 0) {
+		BEGIN_NV04(chan, NvSubCopy, 0x0320, 6);
+		OUT_RING  (chan, upper_32_bits(node->vma[0].offset));
+		OUT_RING  (chan, lower_32_bits(node->vma[0].offset));
+		OUT_RING  (chan, upper_32_bits(node->vma[1].offset));
+		OUT_RING  (chan, lower_32_bits(node->vma[1].offset));
+		OUT_RING  (chan, 0x00000000 /* COPY */);
+		OUT_RING  (chan, new_mem->num_pages << PAGE_SHIFT);
+	}
+	return ret;
+}
+
+static int
+nv84_bo_move_exec(struct nouveau_channel *chan, struct ttm_buffer_object *bo,
+		  struct ttm_mem_reg *old_mem, struct ttm_mem_reg *new_mem)
+{
+	struct nouveau_mem *node = old_mem->mm_node;
+	int ret = RING_SPACE(chan, 7);
+	if (ret == 0) {
+		BEGIN_NV04(chan, NvSubCopy, 0x0304, 6);
+		OUT_RING  (chan, new_mem->num_pages << PAGE_SHIFT);
+		OUT_RING  (chan, upper_32_bits(node->vma[0].offset));
+		OUT_RING  (chan, lower_32_bits(node->vma[0].offset));
+		OUT_RING  (chan, upper_32_bits(node->vma[1].offset));
+		OUT_RING  (chan, lower_32_bits(node->vma[1].offset));
+		OUT_RING  (chan, 0x00000000 /* MODE_COPY, QUERY_NONE */);
+	}
+	return ret;
+}
+
+static int
+nv50_bo_move_init(struct nouveau_channel *chan, u32 handle)
+{
+	int ret = nouveau_notifier_alloc(chan, NvNotify0, 32, 0xfe0, 0x1000,
+					 &chan->m2mf_ntfy);
+	if (ret == 0) {
+		ret = RING_SPACE(chan, 6);
+		if (ret == 0) {
+			BEGIN_NV04(chan, NvSubCopy, 0x0000, 1);
+			OUT_RING  (chan, handle);
+			BEGIN_NV04(chan, NvSubCopy, 0x0180, 3);
+			OUT_RING  (chan, NvNotify0);
+			OUT_RING  (chan, NvDmaFB);
+			OUT_RING  (chan, NvDmaFB);
+		} else {
+			nouveau_ramht_remove(chan, NvNotify0);
+		}
+	}
+
+	return ret;
+}
+
 static int
 nv50_bo_move_m2mf(struct nouveau_channel *chan, struct ttm_buffer_object *bo,
 		  struct ttm_mem_reg *old_mem, struct ttm_mem_reg *new_mem)
@@ -551,7 +719,7 @@ nv50_bo_move_m2mf(struct nouveau_channel *chan, struct ttm_buffer_object *bo,
 			if (ret)
 				return ret;
 
-			BEGIN_RING(chan, NvSubM2MF, 0x0200, 7);
+			BEGIN_NV04(chan, NvSubCopy, 0x0200, 7);
 			OUT_RING  (chan, 0);
 			OUT_RING  (chan, 0);
 			OUT_RING  (chan, stride);
@@ -564,7 +732,7 @@ nv50_bo_move_m2mf(struct nouveau_channel *chan, struct ttm_buffer_object *bo,
 			if (ret)
 				return ret;
 
-			BEGIN_RING(chan, NvSubM2MF, 0x0200, 1);
+			BEGIN_NV04(chan, NvSubCopy, 0x0200, 1);
 			OUT_RING  (chan, 1);
 		}
 		if (old_mem->mem_type == TTM_PL_VRAM &&
@@ -573,7 +741,7 @@ nv50_bo_move_m2mf(struct nouveau_channel *chan, struct ttm_buffer_object *bo,
 			if (ret)
 				return ret;
 
-			BEGIN_RING(chan, NvSubM2MF, 0x021c, 7);
+			BEGIN_NV04(chan, NvSubCopy, 0x021c, 7);
 			OUT_RING  (chan, 0);
 			OUT_RING  (chan, 0);
 			OUT_RING  (chan, stride);
@@ -586,7 +754,7 @@ nv50_bo_move_m2mf(struct nouveau_channel *chan, struct ttm_buffer_object *bo,
 			if (ret)
 				return ret;
 
-			BEGIN_RING(chan, NvSubM2MF, 0x021c, 1);
+			BEGIN_NV04(chan, NvSubCopy, 0x021c, 1);
 			OUT_RING  (chan, 1);
 		}
 
@@ -594,10 +762,10 @@ nv50_bo_move_m2mf(struct nouveau_channel *chan, struct ttm_buffer_object *bo,
 		if (ret)
 			return ret;
 
-		BEGIN_RING(chan, NvSubM2MF, 0x0238, 2);
+		BEGIN_NV04(chan, NvSubCopy, 0x0238, 2);
 		OUT_RING  (chan, upper_32_bits(src_offset));
 		OUT_RING  (chan, upper_32_bits(dst_offset));
-		BEGIN_RING(chan, NvSubM2MF, 0x030c, 8);
+		BEGIN_NV04(chan, NvSubCopy, 0x030c, 8);
 		OUT_RING  (chan, lower_32_bits(src_offset));
 		OUT_RING  (chan, lower_32_bits(dst_offset));
 		OUT_RING  (chan, stride);
@@ -606,7 +774,7 @@ nv50_bo_move_m2mf(struct nouveau_channel *chan, struct ttm_buffer_object *bo,
 		OUT_RING  (chan, height);
 		OUT_RING  (chan, 0x00000101);
 		OUT_RING  (chan, 0x00000000);
-		BEGIN_RING(chan, NvSubM2MF, NV_MEMORY_TO_MEMORY_FORMAT_NOP, 1);
+		BEGIN_NV04(chan, NvSubCopy, NV_MEMORY_TO_MEMORY_FORMAT_NOP, 1);
 		OUT_RING  (chan, 0);
 
 		length -= amount;
@@ -617,6 +785,24 @@ nv50_bo_move_m2mf(struct nouveau_channel *chan, struct ttm_buffer_object *bo,
 	return 0;
 }
 
+static int
+nv04_bo_move_init(struct nouveau_channel *chan, u32 handle)
+{
+	int ret = nouveau_notifier_alloc(chan, NvNotify0, 32, 0xfe0, 0x1000,
+					 &chan->m2mf_ntfy);
+	if (ret == 0) {
+		ret = RING_SPACE(chan, 4);
+		if (ret == 0) {
+			BEGIN_NV04(chan, NvSubCopy, 0x0000, 1);
+			OUT_RING  (chan, handle);
+			BEGIN_NV04(chan, NvSubCopy, 0x0180, 1);
+			OUT_RING  (chan, NvNotify0);
+		}
+	}
+
+	return ret;
+}
+
 static inline uint32_t
 nouveau_bo_mem_ctxdma(struct ttm_buffer_object *bo,
 		      struct nouveau_channel *chan, struct ttm_mem_reg *mem)
@@ -639,7 +825,7 @@ nv04_bo_move_m2mf(struct nouveau_channel *chan, struct ttm_buffer_object *bo,
 	if (ret)
 		return ret;
 
-	BEGIN_RING(chan, NvSubM2MF, NV_MEMORY_TO_MEMORY_FORMAT_DMA_SOURCE, 2);
+	BEGIN_NV04(chan, NvSubCopy, NV_MEMORY_TO_MEMORY_FORMAT_DMA_SOURCE, 2);
 	OUT_RING  (chan, nouveau_bo_mem_ctxdma(bo, chan, old_mem));
 	OUT_RING  (chan, nouveau_bo_mem_ctxdma(bo, chan, new_mem));
 
@@ -651,7 +837,7 @@ nv04_bo_move_m2mf(struct nouveau_channel *chan, struct ttm_buffer_object *bo,
 		if (ret)
 			return ret;
 
-		BEGIN_RING(chan, NvSubM2MF,
+		BEGIN_NV04(chan, NvSubCopy,
 				 NV_MEMORY_TO_MEMORY_FORMAT_OFFSET_IN, 8);
 		OUT_RING  (chan, src_offset);
 		OUT_RING  (chan, dst_offset);
@@ -661,7 +847,7 @@ nv04_bo_move_m2mf(struct nouveau_channel *chan, struct ttm_buffer_object *bo,
 		OUT_RING  (chan, line_count);
 		OUT_RING  (chan, 0x00000101);
 		OUT_RING  (chan, 0x00000000);
-		BEGIN_RING(chan, NvSubM2MF, NV_MEMORY_TO_MEMORY_FORMAT_NOP, 1);
+		BEGIN_NV04(chan, NvSubCopy, NV_MEMORY_TO_MEMORY_FORMAT_NOP, 1);
 		OUT_RING  (chan, 0);
 
 		page_count -= line_count;
@@ -721,13 +907,7 @@ nouveau_bo_move_m2mf(struct ttm_buffer_object *bo, int evict, bool intr,
 			goto out;
 	}
 
-	if (dev_priv->card_type < NV_50)
-		ret = nv04_bo_move_m2mf(chan, bo, &bo->mem, new_mem);
-	else
-	if (dev_priv->card_type < NV_C0)
-		ret = nv50_bo_move_m2mf(chan, bo, &bo->mem, new_mem);
-	else
-		ret = nvc0_bo_move_m2mf(chan, bo, &bo->mem, new_mem);
+	ret = dev_priv->ttm.move(chan, bo, &bo->mem, new_mem);
 	if (ret == 0) {
 		ret = nouveau_bo_move_accel_cleanup(chan, nvbo, evict,
 						    no_wait_reserve,
@@ -739,6 +919,49 @@ out:
 	return ret;
 }
 
+void
+nouveau_bo_move_init(struct nouveau_channel *chan)
+{
+	struct drm_nouveau_private *dev_priv = chan->dev->dev_private;
+	static const struct {
+		const char *name;
+		int engine;
+		u32 oclass;
+		int (*exec)(struct nouveau_channel *,
+			    struct ttm_buffer_object *,
+			    struct ttm_mem_reg *, struct ttm_mem_reg *);
+		int (*init)(struct nouveau_channel *, u32 handle);
+	} _methods[] = {
+		{  "COPY", 0, 0xa0b5, nve0_bo_move_copy, nvc0_bo_move_init },
+		{ "COPY1", 5, 0x90b8, nvc0_bo_move_copy, nvc0_bo_move_init },
+		{ "COPY0", 4, 0x90b5, nvc0_bo_move_copy, nvc0_bo_move_init },
+		{  "COPY", 0, 0x85b5, nva3_bo_move_copy, nv50_bo_move_init },
+		{ "CRYPT", 0, 0x74c1, nv84_bo_move_exec, nv50_bo_move_init },
+		{  "M2MF", 0, 0x9039, nvc0_bo_move_m2mf, nvc0_bo_move_init },
+		{  "M2MF", 0, 0x5039, nv50_bo_move_m2mf, nv50_bo_move_init },
+		{  "M2MF", 0, 0x0039, nv04_bo_move_m2mf, nv04_bo_move_init },
+		{},
+		{ "CRYPT", 0, 0x88b4, nv98_bo_move_exec, nv50_bo_move_init },
+	}, *mthd = _methods;
+	const char *name = "CPU";
+	int ret;
+
+	do {
+		u32 handle = (mthd->engine << 16) | mthd->oclass;
+		ret = nouveau_gpuobj_gr_new(chan, handle, mthd->oclass);
+		if (ret == 0) {
+			ret = mthd->init(chan, handle);
+			if (ret == 0) {
+				dev_priv->ttm.move = mthd->exec;
+				name = mthd->name;
+				break;
+			}
+		}
+	} while ((++mthd)->exec);
+
+	NV_INFO(chan->dev, "MM: using %s for buffer copies\n", name);
+}
+
 static int
 nouveau_bo_move_flipd(struct ttm_buffer_object *bo, bool evict, bool intr,
 		      bool no_wait_reserve, bool no_wait_gpu,
@@ -895,8 +1118,8 @@ nouveau_bo_move(struct ttm_buffer_object *bo, bool evict, bool intr,
 		goto out;
 	}
 
-	/* Software copy if the card isn't up and running yet. */
-	if (!dev_priv->channel) {
+	/* CPU copy if we have no accelerated method available */
+	if (!dev_priv->ttm.move) {
 		ret = ttm_bo_move_memcpy(bo, evict, no_wait_reserve, no_wait_gpu, new_mem);
 		goto out;
 	}
@@ -1044,22 +1267,6 @@ nouveau_ttm_fault_reserve_notify(struct ttm_buffer_object *bo)
 	return nouveau_bo_validate(nvbo, false, true, false);
 }
 
-void
-nouveau_bo_fence(struct nouveau_bo *nvbo, struct nouveau_fence *fence)
-{
-	struct nouveau_fence *old_fence;
-
-	if (likely(fence))
-		nouveau_fence_ref(fence);
-
-	spin_lock(&nvbo->bo.bdev->fence_lock);
-	old_fence = nvbo->bo.sync_obj;
-	nvbo->bo.sync_obj = fence;
-	spin_unlock(&nvbo->bo.bdev->fence_lock);
-
-	nouveau_fence_unref(&old_fence);
-}
-
 static int
 nouveau_ttm_tt_populate(struct ttm_tt *ttm)
 {
@@ -1157,6 +1364,52 @@ nouveau_ttm_tt_unpopulate(struct ttm_tt *ttm)
 	ttm_pool_unpopulate(ttm);
 }
 
+void
+nouveau_bo_fence(struct nouveau_bo *nvbo, struct nouveau_fence *fence)
+{
+	struct nouveau_fence *old_fence = NULL;
+
+	if (likely(fence))
+		nouveau_fence_ref(fence);
+
+	spin_lock(&nvbo->bo.bdev->fence_lock);
+	old_fence = nvbo->bo.sync_obj;
+	nvbo->bo.sync_obj = fence;
+	spin_unlock(&nvbo->bo.bdev->fence_lock);
+
+	nouveau_fence_unref(&old_fence);
+}
+
+static void
+nouveau_bo_fence_unref(void **sync_obj)
+{
+	nouveau_fence_unref((struct nouveau_fence **)sync_obj);
+}
+
+static void *
+nouveau_bo_fence_ref(void *sync_obj)
+{
+	return nouveau_fence_ref(sync_obj);
+}
+
+static bool
+nouveau_bo_fence_signalled(void *sync_obj, void *sync_arg)
+{
+	return nouveau_fence_done(sync_obj);
+}
+
+static int
+nouveau_bo_fence_wait(void *sync_obj, void *sync_arg, bool lazy, bool intr)
+{
+	return nouveau_fence_wait(sync_obj, lazy, intr);
+}
+
+static int
+nouveau_bo_fence_flush(void *sync_obj, void *sync_arg)
+{
+	return 0;
+}
+
 struct ttm_bo_driver nouveau_bo_driver = {
 	.ttm_tt_create = &nouveau_ttm_tt_create,
 	.ttm_tt_populate = &nouveau_ttm_tt_populate,
@@ -1167,11 +1420,11 @@ struct ttm_bo_driver nouveau_bo_driver = {
 	.move_notify = nouveau_bo_move_ntfy,
 	.move = nouveau_bo_move,
 	.verify_access = nouveau_bo_verify_access,
-	.sync_obj_signaled = __nouveau_fence_signalled,
-	.sync_obj_wait = __nouveau_fence_wait,
-	.sync_obj_flush = __nouveau_fence_flush,
-	.sync_obj_unref = __nouveau_fence_unref,
-	.sync_obj_ref = __nouveau_fence_ref,
+	.sync_obj_signaled = nouveau_bo_fence_signalled,
+	.sync_obj_wait = nouveau_bo_fence_wait,
+	.sync_obj_flush = nouveau_bo_fence_flush,
+	.sync_obj_unref = nouveau_bo_fence_unref,
+	.sync_obj_ref = nouveau_bo_fence_ref,
 	.fault_reserve_notify = &nouveau_ttm_fault_reserve_notify,
 	.io_mem_reserve = &nouveau_ttm_io_mem_reserve,
 	.io_mem_free = &nouveau_ttm_io_mem_free,

+ 43 - 43
drivers/gpu/drm/nouveau/nouveau_channel.c

@@ -27,7 +27,10 @@
 #include "nouveau_drv.h"
 #include "nouveau_drm.h"
 #include "nouveau_dma.h"
+#include "nouveau_fifo.h"
 #include "nouveau_ramht.h"
+#include "nouveau_fence.h"
+#include "nouveau_software.h"
 
 static int
 nouveau_channel_pushbuf_init(struct nouveau_channel *chan)
@@ -117,8 +120,9 @@ nouveau_channel_alloc(struct drm_device *dev, struct nouveau_channel **chan_ret,
 		      struct drm_file *file_priv,
 		      uint32_t vram_handle, uint32_t gart_handle)
 {
+	struct nouveau_exec_engine *fence = nv_engine(dev, NVOBJ_ENGINE_FENCE);
+	struct nouveau_fifo_priv *pfifo = nv_engine(dev, NVOBJ_ENGINE_FIFO);
 	struct drm_nouveau_private *dev_priv = dev->dev_private;
-	struct nouveau_fifo_engine *pfifo = &dev_priv->engine.fifo;
 	struct nouveau_fpriv *fpriv = nouveau_fpriv(file_priv);
 	struct nouveau_channel *chan;
 	unsigned long flags;
@@ -155,10 +159,6 @@ nouveau_channel_alloc(struct drm_device *dev, struct nouveau_channel **chan_ret,
 	}
 
 	NV_DEBUG(dev, "initialising channel %d\n", chan->id);
-	INIT_LIST_HEAD(&chan->nvsw.vbl_wait);
-	INIT_LIST_HEAD(&chan->nvsw.flip);
-	INIT_LIST_HEAD(&chan->fence.pending);
-	spin_lock_init(&chan->fence.lock);
 
 	/* setup channel's memory and vm */
 	ret = nouveau_gpuobj_channel_init(chan, vram_handle, gart_handle);
@@ -188,20 +188,15 @@ nouveau_channel_alloc(struct drm_device *dev, struct nouveau_channel **chan_ret,
 	chan->user_put = 0x40;
 	chan->user_get = 0x44;
 	if (dev_priv->card_type >= NV_50)
-                chan->user_get_hi = 0x60;
+		chan->user_get_hi = 0x60;
 
-	/* disable the fifo caches */
-	pfifo->reassign(dev, false);
-
-	/* Construct initial RAMFC for new channel */
-	ret = pfifo->create_context(chan);
+	/* create fifo context */
+	ret = pfifo->base.context_new(chan, NVOBJ_ENGINE_FIFO);
 	if (ret) {
 		nouveau_channel_put(&chan);
 		return ret;
 	}
 
-	pfifo->reassign(dev, true);
-
 	/* Insert NOPs for NOUVEAU_DMA_SKIPS */
 	ret = RING_SPACE(chan, NOUVEAU_DMA_SKIPS);
 	if (ret) {
@@ -211,9 +206,28 @@ nouveau_channel_alloc(struct drm_device *dev, struct nouveau_channel **chan_ret,
 
 	for (i = 0; i < NOUVEAU_DMA_SKIPS; i++)
 		OUT_RING  (chan, 0x00000000);
+
+	ret = nouveau_gpuobj_gr_new(chan, NvSw, nouveau_software_class(dev));
+	if (ret) {
+		nouveau_channel_put(&chan);
+		return ret;
+	}
+
+	if (dev_priv->card_type < NV_C0) {
+		ret = RING_SPACE(chan, 2);
+		if (ret) {
+			nouveau_channel_put(&chan);
+			return ret;
+		}
+
+		BEGIN_NV04(chan, NvSubSw, NV01_SUBCHAN_OBJECT, 1);
+		OUT_RING  (chan, NvSw);
+		FIRE_RING (chan);
+	}
+
 	FIRE_RING(chan);
 
-	ret = nouveau_fence_channel_init(chan);
+	ret = fence->context_new(chan, NVOBJ_ENGINE_FENCE);
 	if (ret) {
 		nouveau_channel_put(&chan);
 		return ret;
@@ -268,7 +282,6 @@ nouveau_channel_put_unlocked(struct nouveau_channel **pchan)
 	struct nouveau_channel *chan = *pchan;
 	struct drm_device *dev = chan->dev;
 	struct drm_nouveau_private *dev_priv = dev->dev_private;
-	struct nouveau_fifo_engine *pfifo = &dev_priv->engine.fifo;
 	unsigned long flags;
 	int i;
 
@@ -285,24 +298,12 @@ nouveau_channel_put_unlocked(struct nouveau_channel **pchan)
 	/* give it chance to idle */
 	nouveau_channel_idle(chan);
 
-	/* ensure all outstanding fences are signaled.  they should be if the
-	 * above attempts at idling were OK, but if we failed this'll tell TTM
-	 * we're done with the buffers.
-	 */
-	nouveau_fence_channel_fini(chan);
-
-	/* boot it off the hardware */
-	pfifo->reassign(dev, false);
-
 	/* destroy the engine specific contexts */
-	pfifo->destroy_context(chan);
-	for (i = 0; i < NVOBJ_ENGINE_NR; i++) {
+	for (i = NVOBJ_ENGINE_NR - 1; i >= 0; i--) {
 		if (chan->engctx[i])
 			dev_priv->eng[i]->context_del(chan, i);
 	}
 
-	pfifo->reassign(dev, true);
-
 	/* aside from its resources, the channel should now be dead,
 	 * remove it from the channel list
 	 */
@@ -354,38 +355,37 @@ nouveau_channel_ref(struct nouveau_channel *chan,
 	*pchan = chan;
 }
 
-void
+int
 nouveau_channel_idle(struct nouveau_channel *chan)
 {
 	struct drm_device *dev = chan->dev;
 	struct nouveau_fence *fence = NULL;
 	int ret;
 
-	nouveau_fence_update(chan);
-
-	if (chan->fence.sequence != chan->fence.sequence_ack) {
-		ret = nouveau_fence_new(chan, &fence, true);
-		if (!ret) {
-			ret = nouveau_fence_wait(fence, false, false);
-			nouveau_fence_unref(&fence);
-		}
-
-		if (ret)
-			NV_ERROR(dev, "Failed to idle channel %d.\n", chan->id);
+	ret = nouveau_fence_new(chan, &fence);
+	if (!ret) {
+		ret = nouveau_fence_wait(fence, false, false);
+		nouveau_fence_unref(&fence);
 	}
+
+	if (ret)
+		NV_ERROR(dev, "Failed to idle channel %d.\n", chan->id);
+	return ret;
 }
 
 /* cleans up all the fifos from file_priv */
 void
 nouveau_channel_cleanup(struct drm_device *dev, struct drm_file *file_priv)
 {
-	struct drm_nouveau_private *dev_priv = dev->dev_private;
-	struct nouveau_engine *engine = &dev_priv->engine;
+	struct nouveau_fifo_priv *pfifo = nv_engine(dev, NVOBJ_ENGINE_FIFO);
 	struct nouveau_channel *chan;
 	int i;
 
+	if (!pfifo)
+		return;
+
 	NV_DEBUG(dev, "clearing FIFO enables from file_priv\n");
-	for (i = 0; i < engine->fifo.channels; i++) {
+	for (i = 0; i < pfifo->channels; i++) {
 		chan = nouveau_channel_get(file_priv, i);
 		if (IS_ERR(chan))
 			continue;

+ 7 - 1
drivers/gpu/drm/nouveau/nouveau_connector.c

@@ -654,7 +654,13 @@ nouveau_connector_detect_depth(struct drm_connector *connector)
 	if (nv_connector->edid && connector->display_info.bpc)
 		return;
 
-	/* if not, we're out of options unless we're LVDS, default to 8bpc */
+	/* EDID 1.4 is *supposed* to be supported on eDP, but, Apple... */
+	if (nv_connector->type == DCB_CONNECTOR_eDP) {
+		connector->display_info.bpc = 6;
+		return;
+	}
+
+	/* we're out of options unless we're LVDS, default to 8bpc */
 	if (nv_encoder->dcb->type != OUTPUT_LVDS) {
 		connector->display_info.bpc = 8;
 		return;

+ 0 - 2
drivers/gpu/drm/nouveau/nouveau_debugfs.c

@@ -67,8 +67,6 @@ nouveau_debugfs_channel_info(struct seq_file *m, void *data)
 			   nvchan_rd32(chan, 0x8c));
 	}
 
-	seq_printf(m, "last fence    : %d\n", chan->fence.sequence);
-	seq_printf(m, "last signalled: %d\n", chan->fence.sequence_ack);
 	return 0;
 }
 

+ 25 - 13
drivers/gpu/drm/nouveau/nouveau_display.c

@@ -33,7 +33,9 @@
 #include "nouveau_crtc.h"
 #include "nouveau_dma.h"
 #include "nouveau_connector.h"
+#include "nouveau_software.h"
 #include "nouveau_gpio.h"
+#include "nouveau_fence.h"
 #include "nv50_display.h"
 
 static void
@@ -325,14 +327,21 @@ nouveau_display_create(struct drm_device *dev)
 
 	ret = disp->create(dev);
 	if (ret)
-		return ret;
+		goto disp_create_err;
 
 	if (dev->mode_config.num_crtc) {
 		ret = drm_vblank_init(dev, dev->mode_config.num_crtc);
 		if (ret)
-			return ret;
+			goto vblank_err;
 	}
 
+	return 0;
+
+vblank_err:
+	disp->destroy(dev);
+disp_create_err:
+	drm_kms_helper_poll_fini(dev);
+	drm_mode_config_cleanup(dev);
 	return ret;
 }
 
@@ -425,6 +434,7 @@ nouveau_page_flip_emit(struct nouveau_channel *chan,
 		       struct nouveau_page_flip_state *s,
 		       struct nouveau_fence **pfence)
 {
+	struct nouveau_software_chan *swch = chan->engctx[NVOBJ_ENGINE_SW];
 	struct drm_nouveau_private *dev_priv = chan->dev->dev_private;
 	struct drm_device *dev = chan->dev;
 	unsigned long flags;
@@ -432,7 +442,7 @@ nouveau_page_flip_emit(struct nouveau_channel *chan,
 
 	/* Queue it to the pending list */
 	spin_lock_irqsave(&dev->event_lock, flags);
-	list_add_tail(&s->head, &chan->nvsw.flip);
+	list_add_tail(&s->head, &swch->flip);
 	spin_unlock_irqrestore(&dev->event_lock, flags);
 
 	/* Synchronize with the old framebuffer */
@@ -446,17 +456,17 @@ nouveau_page_flip_emit(struct nouveau_channel *chan,
 		goto fail;
 
 	if (dev_priv->card_type < NV_C0) {
-		BEGIN_RING(chan, NvSubSw, NV_SW_PAGE_FLIP, 1);
+		BEGIN_NV04(chan, NvSubSw, NV_SW_PAGE_FLIP, 1);
 		OUT_RING  (chan, 0x00000000);
 		OUT_RING  (chan, 0x00000000);
 	} else {
-		BEGIN_NVC0(chan, 2, 0, NV10_SUBCHAN_REF_CNT, 1);
-		OUT_RING  (chan, ++chan->fence.sequence);
-		BEGIN_NVC0(chan, 8, 0, NVSW_SUBCHAN_PAGE_FLIP, 0x0000);
+		BEGIN_NVC0(chan, 0, NV10_SUBCHAN_REF_CNT, 1);
+		OUT_RING  (chan, 0);
+		BEGIN_IMC0(chan, 0, NVSW_SUBCHAN_PAGE_FLIP, 0x0000);
 	}
 	FIRE_RING (chan);
 
-	ret = nouveau_fence_new(chan, pfence, true);
+	ret = nouveau_fence_new(chan, pfence);
 	if (ret)
 		goto fail;
 
@@ -477,7 +487,7 @@ nouveau_crtc_page_flip(struct drm_crtc *crtc, struct drm_framebuffer *fb,
 	struct nouveau_bo *old_bo = nouveau_framebuffer(crtc->fb)->nvbo;
 	struct nouveau_bo *new_bo = nouveau_framebuffer(fb)->nvbo;
 	struct nouveau_page_flip_state *s;
-	struct nouveau_channel *chan;
+	struct nouveau_channel *chan = NULL;
 	struct nouveau_fence *fence;
 	int ret;
 
@@ -500,7 +510,9 @@ nouveau_crtc_page_flip(struct drm_crtc *crtc, struct drm_framebuffer *fb,
 		  new_bo->bo.offset };
 
 	/* Choose the channel the flip will be handled in */
-	chan = nouveau_fence_channel(new_bo->bo.sync_obj);
+	fence = new_bo->bo.sync_obj;
+	if (fence)
+		chan = nouveau_channel_get_unlocked(fence->channel);
 	if (!chan)
 		chan = nouveau_channel_get_unlocked(dev_priv->channel);
 	mutex_lock(&chan->mutex);
@@ -540,20 +552,20 @@ int
 nouveau_finish_page_flip(struct nouveau_channel *chan,
 			 struct nouveau_page_flip_state *ps)
 {
+	struct nouveau_software_chan *swch = chan->engctx[NVOBJ_ENGINE_SW];
 	struct drm_device *dev = chan->dev;
 	struct nouveau_page_flip_state *s;
 	unsigned long flags;
 
 	spin_lock_irqsave(&dev->event_lock, flags);
 
-	if (list_empty(&chan->nvsw.flip)) {
+	if (list_empty(&swch->flip)) {
 		NV_ERROR(dev, "Unexpected pageflip in channel %d.\n", chan->id);
 		spin_unlock_irqrestore(&dev->event_lock, flags);
 		return -EINVAL;
 	}
 
-	s = list_first_entry(&chan->nvsw.flip,
-			     struct nouveau_page_flip_state, head);
+	s = list_first_entry(&swch->flip, struct nouveau_page_flip_state, head);
 	if (s->event) {
 		struct drm_pending_vblank_event *e = s->event;
 		struct timeval now;

+ 27 - 8
drivers/gpu/drm/nouveau/nouveau_dma.h

@@ -48,12 +48,12 @@ void nv50_dma_push(struct nouveau_channel *, struct nouveau_bo *,
 
 /* Hardcoded object assignments to subchannels (subchannel id). */
 enum {
-	NvSubM2MF	= 0,
+	NvSubCtxSurf2D  = 0,
 	NvSubSw		= 1,
-	NvSub2D		= 2,
-	NvSubCtxSurf2D  = 2,
+	NvSubImageBlit  = 2,
+	NvSub2D		= 3,
 	NvSubGdiRect    = 3,
-	NvSubImageBlit  = 4
+	NvSubCopy	= 4,
 };
 
 /* Object handles. */
@@ -73,6 +73,7 @@ enum {
 	NvSema		= 0x8000000f,
 	NvEvoSema0	= 0x80000010,
 	NvEvoSema1	= 0x80000011,
+	NvNotify1       = 0x80000012,
 
 	/* G80+ display objects */
 	NvEvoVRAM	= 0x01000000,
@@ -127,15 +128,33 @@ extern void
 OUT_RINGp(struct nouveau_channel *chan, const void *data, unsigned nr_dwords);
 
 static inline void
-BEGIN_NVC0(struct nouveau_channel *chan, int op, int subc, int mthd, int size)
+BEGIN_NV04(struct nouveau_channel *chan, int subc, int mthd, int size)
 {
-	OUT_RING(chan, (op << 28) | (size << 16) | (subc << 13) | (mthd >> 2));
+	OUT_RING(chan, 0x00000000 | (subc << 13) | (size << 18) | mthd);
 }
 
 static inline void
-BEGIN_RING(struct nouveau_channel *chan, int subc, int mthd, int size)
+BEGIN_NI04(struct nouveau_channel *chan, int subc, int mthd, int size)
 {
-	OUT_RING(chan, (subc << 13) | (size << 18) | mthd);
+	OUT_RING(chan, 0x40000000 | (subc << 13) | (size << 18) | mthd);
+}
+
+static inline void
+BEGIN_NVC0(struct nouveau_channel *chan, int subc, int mthd, int size)
+{
+	OUT_RING(chan, 0x20000000 | (size << 16) | (subc << 13) | (mthd >> 2));
+}
+
+static inline void
+BEGIN_NIC0(struct nouveau_channel *chan, int subc, int mthd, int size)
+{
+	OUT_RING(chan, 0x60000000 | (size << 16) | (subc << 13) | (mthd >> 2));
+}
+
+static inline void
+BEGIN_IMC0(struct nouveau_channel *chan, int subc, int mthd, u16 data)
+{
+	OUT_RING(chan, 0x80000000 | (data << 16) | (subc << 13) | (mthd >> 2));
 }
 
 #define WRITE_PUT(val) do {                                                    \

+ 5 - 10
drivers/gpu/drm/nouveau/nouveau_drv.c

@@ -33,6 +33,7 @@
 #include "nouveau_fb.h"
 #include "nouveau_fbcon.h"
 #include "nouveau_pm.h"
+#include "nouveau_fifo.h"
 #include "nv50_display.h"
 
 #include "drm_pciids.h"
@@ -175,7 +176,7 @@ nouveau_pci_suspend(struct pci_dev *pdev, pm_message_t pm_state)
 	struct drm_device *dev = pci_get_drvdata(pdev);
 	struct drm_nouveau_private *dev_priv = dev->dev_private;
 	struct nouveau_instmem_engine *pinstmem = &dev_priv->engine.instmem;
-	struct nouveau_fifo_engine *pfifo = &dev_priv->engine.fifo;
+	struct nouveau_fifo_priv *pfifo = nv_engine(dev, NVOBJ_ENGINE_FIFO);
 	struct nouveau_channel *chan;
 	struct drm_crtc *crtc;
 	int ret, i, e;
@@ -214,17 +215,13 @@ nouveau_pci_suspend(struct pci_dev *pdev, pm_message_t pm_state)
 	ttm_bo_evict_mm(&dev_priv->ttm.bdev, TTM_PL_VRAM);
 
 	NV_INFO(dev, "Idling channels...\n");
-	for (i = 0; i < pfifo->channels; i++) {
+	for (i = 0; i < (pfifo ? pfifo->channels : 0); i++) {
 		chan = dev_priv->channels.ptr[i];
 
 		if (chan && chan->pushbuf_bo)
 			nouveau_channel_idle(chan);
 	}
 
-	pfifo->reassign(dev, false);
-	pfifo->disable(dev);
-	pfifo->unload_context(dev);
-
 	for (e = NVOBJ_ENGINE_NR - 1; e >= 0; e--) {
 		if (!dev_priv->eng[e])
 			continue;
@@ -265,8 +262,6 @@ out_abort:
 		if (dev_priv->eng[e])
 			dev_priv->eng[e]->init(dev, e);
 	}
-	pfifo->enable(dev);
-	pfifo->reassign(dev, true);
 	return ret;
 }
 
@@ -274,6 +269,7 @@ int
 nouveau_pci_resume(struct pci_dev *pdev)
 {
 	struct drm_device *dev = pci_get_drvdata(pdev);
+	struct nouveau_fifo_priv *pfifo = nv_engine(dev, NVOBJ_ENGINE_FIFO);
 	struct drm_nouveau_private *dev_priv = dev->dev_private;
 	struct nouveau_engine *engine = &dev_priv->engine;
 	struct drm_crtc *crtc;
@@ -321,7 +317,6 @@ nouveau_pci_resume(struct pci_dev *pdev)
 		if (dev_priv->eng[i])
 			dev_priv->eng[i]->init(dev, i);
 	}
-	engine->fifo.init(dev);
 
 	nouveau_irq_postinstall(dev);
 
@@ -330,7 +325,7 @@ nouveau_pci_resume(struct pci_dev *pdev)
 		struct nouveau_channel *chan;
 		int j;
 
-		for (i = 0; i < dev_priv->engine.fifo.channels; i++) {
+		for (i = 0; i < (pfifo ? pfifo->channels : 0); i++) {
 			chan = dev_priv->channels.ptr[i];
 			if (!chan || !chan->pushbuf_bo)
 				continue;

+ 17 - 149
drivers/gpu/drm/nouveau/nouveau_drv.h

@@ -70,7 +70,7 @@ struct nouveau_mem;
 
 #define MAX_NUM_DCB_ENTRIES 16
 
-#define NOUVEAU_MAX_CHANNEL_NR 128
+#define NOUVEAU_MAX_CHANNEL_NR 4096
 #define NOUVEAU_MAX_TILE_NR 15
 
 struct nouveau_mem {
@@ -165,8 +165,10 @@ enum nouveau_flags {
 #define NVOBJ_ENGINE_PPP	NVOBJ_ENGINE_MPEG
 #define NVOBJ_ENGINE_BSP	6
 #define NVOBJ_ENGINE_VP		7
-#define NVOBJ_ENGINE_DISPLAY	15
+#define NVOBJ_ENGINE_FIFO	14
+#define NVOBJ_ENGINE_FENCE	15
 #define NVOBJ_ENGINE_NR		16
+#define NVOBJ_ENGINE_DISPLAY	(NVOBJ_ENGINE_NR + 0) /*XXX*/
 
 #define NVOBJ_FLAG_DONT_MAP             (1 << 0)
 #define NVOBJ_FLAG_ZERO_ALLOC		(1 << 1)
@@ -234,17 +236,6 @@ struct nouveau_channel {
 	uint32_t user_get_hi;
 	uint32_t user_put;
 
-	/* Fencing */
-	struct {
-		/* lock protects the pending list only */
-		spinlock_t lock;
-		struct list_head pending;
-		uint32_t sequence;
-		uint32_t sequence_ack;
-		atomic_t last_sequence_irq;
-		struct nouveau_vma vma;
-	} fence;
-
 	/* DMA push buffer */
 	struct nouveau_gpuobj *pushbuf;
 	struct nouveau_bo     *pushbuf_bo;
@@ -258,8 +249,6 @@ struct nouveau_channel {
 
 	/* PFIFO context */
 	struct nouveau_gpuobj *ramfc;
-	struct nouveau_gpuobj *cache;
-	void *fifo_priv;
 
 	/* Execution engine contexts */
 	void *engctx[NVOBJ_ENGINE_NR];
@@ -293,18 +282,6 @@ struct nouveau_channel {
 		int ib_put;
 	} dma;
 
-	uint32_t sw_subchannel[8];
-
-	struct nouveau_vma dispc_vma[4];
-	struct {
-		struct nouveau_gpuobj *vblsem;
-		uint32_t vblsem_head;
-		uint32_t vblsem_offset;
-		uint32_t vblsem_rval;
-		struct list_head vbl_wait;
-		struct list_head flip;
-	} nvsw;
-
 	struct {
 		bool active;
 		char name[32];
@@ -367,30 +344,6 @@ struct nouveau_fb_engine {
 	void (*free_tile_region)(struct drm_device *dev, int i);
 };
 
-struct nouveau_fifo_engine {
-	void *priv;
-	int  channels;
-
-	struct nouveau_gpuobj *playlist[2];
-	int cur_playlist;
-
-	int  (*init)(struct drm_device *);
-	void (*takedown)(struct drm_device *);
-
-	void (*disable)(struct drm_device *);
-	void (*enable)(struct drm_device *);
-	bool (*reassign)(struct drm_device *, bool enable);
-	bool (*cache_pull)(struct drm_device *dev, bool enable);
-
-	int  (*channel_id)(struct drm_device *);
-
-	int  (*create_context)(struct nouveau_channel *);
-	void (*destroy_context)(struct nouveau_channel *);
-	int  (*load_context)(struct nouveau_channel *);
-	int  (*unload_context)(struct drm_device *);
-	void (*tlb_flush)(struct drm_device *dev);
-};
-
 struct nouveau_display_engine {
 	void *priv;
 	int (*early_init)(struct drm_device *);
@@ -598,7 +551,6 @@ struct nouveau_engine {
 	struct nouveau_mc_engine      mc;
 	struct nouveau_timer_engine   timer;
 	struct nouveau_fb_engine      fb;
-	struct nouveau_fifo_engine    fifo;
 	struct nouveau_display_engine display;
 	struct nouveau_gpio_engine    gpio;
 	struct nouveau_pm_engine      pm;
@@ -741,6 +693,9 @@ struct drm_nouveau_private {
 		struct ttm_bo_global_ref bo_global_ref;
 		struct ttm_bo_device bdev;
 		atomic_t validate_sequence;
+		int (*move)(struct nouveau_channel *,
+			    struct ttm_buffer_object *,
+			    struct ttm_mem_reg *, struct ttm_mem_reg *);
 	} ttm;
 
 	struct {
@@ -978,7 +933,7 @@ extern void nouveau_channel_put_unlocked(struct nouveau_channel **);
 extern void nouveau_channel_put(struct nouveau_channel **);
 extern void nouveau_channel_ref(struct nouveau_channel *chan,
 				struct nouveau_channel **pchan);
-extern void nouveau_channel_idle(struct nouveau_channel *chan);
+extern int  nouveau_channel_idle(struct nouveau_channel *chan);
 
 /* nouveau_object.c */
 #define NVOBJ_ENGINE_ADD(d, e, p) do {                                         \
@@ -1210,56 +1165,6 @@ extern void nv50_fb_vm_trap(struct drm_device *, int display);
 extern int  nvc0_fb_init(struct drm_device *);
 extern void nvc0_fb_takedown(struct drm_device *);
 
-/* nv04_fifo.c */
-extern int  nv04_fifo_init(struct drm_device *);
-extern void nv04_fifo_fini(struct drm_device *);
-extern void nv04_fifo_disable(struct drm_device *);
-extern void nv04_fifo_enable(struct drm_device *);
-extern bool nv04_fifo_reassign(struct drm_device *, bool);
-extern bool nv04_fifo_cache_pull(struct drm_device *, bool);
-extern int  nv04_fifo_channel_id(struct drm_device *);
-extern int  nv04_fifo_create_context(struct nouveau_channel *);
-extern void nv04_fifo_destroy_context(struct nouveau_channel *);
-extern int  nv04_fifo_load_context(struct nouveau_channel *);
-extern int  nv04_fifo_unload_context(struct drm_device *);
-extern void nv04_fifo_isr(struct drm_device *);
-
-/* nv10_fifo.c */
-extern int  nv10_fifo_init(struct drm_device *);
-extern int  nv10_fifo_channel_id(struct drm_device *);
-extern int  nv10_fifo_create_context(struct nouveau_channel *);
-extern int  nv10_fifo_load_context(struct nouveau_channel *);
-extern int  nv10_fifo_unload_context(struct drm_device *);
-
-/* nv40_fifo.c */
-extern int  nv40_fifo_init(struct drm_device *);
-extern int  nv40_fifo_create_context(struct nouveau_channel *);
-extern int  nv40_fifo_load_context(struct nouveau_channel *);
-extern int  nv40_fifo_unload_context(struct drm_device *);
-
-/* nv50_fifo.c */
-extern int  nv50_fifo_init(struct drm_device *);
-extern void nv50_fifo_takedown(struct drm_device *);
-extern int  nv50_fifo_channel_id(struct drm_device *);
-extern int  nv50_fifo_create_context(struct nouveau_channel *);
-extern void nv50_fifo_destroy_context(struct nouveau_channel *);
-extern int  nv50_fifo_load_context(struct nouveau_channel *);
-extern int  nv50_fifo_unload_context(struct drm_device *);
-extern void nv50_fifo_tlb_flush(struct drm_device *dev);
-
-/* nvc0_fifo.c */
-extern int  nvc0_fifo_init(struct drm_device *);
-extern void nvc0_fifo_takedown(struct drm_device *);
-extern void nvc0_fifo_disable(struct drm_device *);
-extern void nvc0_fifo_enable(struct drm_device *);
-extern bool nvc0_fifo_reassign(struct drm_device *, bool);
-extern bool nvc0_fifo_cache_pull(struct drm_device *, bool);
-extern int  nvc0_fifo_channel_id(struct drm_device *);
-extern int  nvc0_fifo_create_context(struct nouveau_channel *);
-extern void nvc0_fifo_destroy_context(struct nouveau_channel *);
-extern int  nvc0_fifo_load_context(struct nouveau_channel *);
-extern int  nvc0_fifo_unload_context(struct drm_device *);
-
 /* nv04_graph.c */
 extern int  nv04_graph_create(struct drm_device *);
 extern int  nv04_graph_object_new(struct nouveau_channel *, int, u32, u16);
@@ -1278,18 +1183,23 @@ extern int  nv20_graph_create(struct drm_device *);
 
 /* nv40_graph.c */
 extern int  nv40_graph_create(struct drm_device *);
-extern void nv40_grctx_init(struct nouveau_grctx *);
+extern void nv40_grctx_init(struct drm_device *, u32 *size);
+extern void nv40_grctx_fill(struct drm_device *, struct nouveau_gpuobj *);
 
 /* nv50_graph.c */
 extern int  nv50_graph_create(struct drm_device *);
-extern int  nv50_grctx_init(struct nouveau_grctx *);
 extern struct nouveau_enum nv50_data_error_names[];
 extern int  nv50_graph_isr_chid(struct drm_device *dev, u64 inst);
+extern int  nv50_grctx_init(struct drm_device *, u32 *, u32, u32 *, u32 *);
+extern void nv50_grctx_fill(struct drm_device *, struct nouveau_gpuobj *);
 
 /* nvc0_graph.c */
 extern int  nvc0_graph_create(struct drm_device *);
 extern int  nvc0_graph_isr_chid(struct drm_device *dev, u64 inst);
 
+/* nve0_graph.c */
+extern int  nve0_graph_create(struct drm_device *);
+
 /* nv84_crypt.c */
 extern int  nv84_crypt_create(struct drm_device *);
 
@@ -1415,6 +1325,7 @@ extern int nv04_crtc_create(struct drm_device *, int index);
 
 /* nouveau_bo.c */
 extern struct ttm_bo_driver nouveau_bo_driver;
+extern void nouveau_bo_move_init(struct nouveau_channel *);
 extern int nouveau_bo_new(struct drm_device *, int size, int align,
 			  uint32_t flags, uint32_t tile_mode,
 			  uint32_t tile_flags,
@@ -1440,50 +1351,6 @@ extern int  nouveau_bo_vma_add(struct nouveau_bo *, struct nouveau_vm *,
 			       struct nouveau_vma *);
 extern void nouveau_bo_vma_del(struct nouveau_bo *, struct nouveau_vma *);
 
-/* nouveau_fence.c */
-struct nouveau_fence;
-extern int nouveau_fence_init(struct drm_device *);
-extern void nouveau_fence_fini(struct drm_device *);
-extern int nouveau_fence_channel_init(struct nouveau_channel *);
-extern void nouveau_fence_channel_fini(struct nouveau_channel *);
-extern void nouveau_fence_update(struct nouveau_channel *);
-extern int nouveau_fence_new(struct nouveau_channel *, struct nouveau_fence **,
-			     bool emit);
-extern int nouveau_fence_emit(struct nouveau_fence *);
-extern void nouveau_fence_work(struct nouveau_fence *fence,
-			       void (*work)(void *priv, bool signalled),
-			       void *priv);
-struct nouveau_channel *nouveau_fence_channel(struct nouveau_fence *);
-
-extern bool __nouveau_fence_signalled(void *obj, void *arg);
-extern int __nouveau_fence_wait(void *obj, void *arg, bool lazy, bool intr);
-extern int __nouveau_fence_flush(void *obj, void *arg);
-extern void __nouveau_fence_unref(void **obj);
-extern void *__nouveau_fence_ref(void *obj);
-
-static inline bool nouveau_fence_signalled(struct nouveau_fence *obj)
-{
-	return __nouveau_fence_signalled(obj, NULL);
-}
-static inline int
-nouveau_fence_wait(struct nouveau_fence *obj, bool lazy, bool intr)
-{
-	return __nouveau_fence_wait(obj, NULL, lazy, intr);
-}
-extern int nouveau_fence_sync(struct nouveau_fence *, struct nouveau_channel *);
-static inline int nouveau_fence_flush(struct nouveau_fence *obj)
-{
-	return __nouveau_fence_flush(obj, NULL);
-}
-static inline void nouveau_fence_unref(struct nouveau_fence **obj)
-{
-	__nouveau_fence_unref((void **)obj);
-}
-static inline struct nouveau_fence *nouveau_fence_ref(struct nouveau_fence *obj)
-{
-	return __nouveau_fence_ref(obj);
-}
-
 /* nouveau_gem.c */
 extern int nouveau_gem_new(struct drm_device *, int size, int align,
 			   uint32_t domain, uint32_t tile_mode,
@@ -1780,6 +1647,7 @@ nv44_graph_class(struct drm_device *dev)
 #define NV84_SUBCHAN_SEMAPHORE_TRIGGER_ACQUIRE_EQUAL                 0x00000001
 #define NV84_SUBCHAN_SEMAPHORE_TRIGGER_WRITE_LONG                    0x00000002
 #define NV84_SUBCHAN_SEMAPHORE_TRIGGER_ACQUIRE_GEQUAL                0x00000004
+#define NVC0_SUBCHAN_SEMAPHORE_TRIGGER_YIELD                         0x00001000
 #define NV84_SUBCHAN_NOTIFY_INTR                                     0x00000020
 #define NV84_SUBCHAN_WRCACHE_FLUSH                                   0x00000024
 #define NV10_SUBCHAN_REF_CNT                                         0x00000050

+ 2 - 32
drivers/gpu/drm/nouveau/nouveau_fbcon.c

@@ -153,7 +153,7 @@ nouveau_fbcon_sync(struct fb_info *info)
 	struct drm_device *dev = nfbdev->dev;
 	struct drm_nouveau_private *dev_priv = dev->dev_private;
 	struct nouveau_channel *chan = dev_priv->channel;
-	int ret, i;
+	int ret;
 
 	if (!chan || !chan->accel_done || in_interrupt() ||
 	    info->state != FBINFO_STATE_RUNNING ||
@@ -163,38 +163,8 @@ nouveau_fbcon_sync(struct fb_info *info)
 	if (!mutex_trylock(&chan->mutex))
 		return 0;
 
-	ret = RING_SPACE(chan, 4);
-	if (ret) {
-		mutex_unlock(&chan->mutex);
-		nouveau_fbcon_gpu_lockup(info);
-		return 0;
-	}
-
-	if (dev_priv->card_type >= NV_C0) {
-		BEGIN_NVC0(chan, 2, NvSub2D, 0x010c, 1);
-		OUT_RING  (chan, 0);
-		BEGIN_NVC0(chan, 2, NvSub2D, 0x0100, 1);
-		OUT_RING  (chan, 0);
-	} else {
-		BEGIN_RING(chan, 0, 0x0104, 1);
-		OUT_RING  (chan, 0);
-		BEGIN_RING(chan, 0, 0x0100, 1);
-		OUT_RING  (chan, 0);
-	}
-
-	nouveau_bo_wr32(chan->notifier_bo, chan->m2mf_ntfy/4 + 3, 0xffffffff);
-	FIRE_RING(chan);
+	ret = nouveau_channel_idle(chan);
 	mutex_unlock(&chan->mutex);
-
-	ret = -EBUSY;
-	for (i = 0; i < 100000; i++) {
-		if (!nouveau_bo_rd32(chan->notifier_bo, chan->m2mf_ntfy/4 + 3)) {
-			ret = 0;
-			break;
-		}
-		DRM_UDELAY(1);
-	}
-
 	if (ret) {
 		nouveau_fbcon_gpu_lockup(info);
 		return 0;

+ 88 - 490
drivers/gpu/drm/nouveau/nouveau_fence.c

@@ -32,220 +32,100 @@
 
 #include "nouveau_drv.h"
 #include "nouveau_ramht.h"
+#include "nouveau_fence.h"
+#include "nouveau_software.h"
 #include "nouveau_dma.h"
 
-#define USE_REFCNT(dev) (nouveau_private(dev)->chipset >= 0x10)
-#define USE_SEMA(dev) (nouveau_private(dev)->chipset >= 0x17)
-
-struct nouveau_fence {
-	struct nouveau_channel *channel;
-	struct kref refcount;
-	struct list_head entry;
-
-	uint32_t sequence;
-	bool signalled;
-
-	void (*work)(void *priv, bool signalled);
-	void *priv;
-};
-
-struct nouveau_semaphore {
-	struct kref ref;
-	struct drm_device *dev;
-	struct drm_mm_node *mem;
-};
-
-static inline struct nouveau_fence *
-nouveau_fence(void *sync_obj)
+void
+nouveau_fence_context_del(struct nouveau_fence_chan *fctx)
 {
-	return (struct nouveau_fence *)sync_obj;
+	struct nouveau_fence *fence, *fnext;
+	spin_lock(&fctx->lock);
+	list_for_each_entry_safe(fence, fnext, &fctx->pending, head) {
+		if (fence->work)
+			fence->work(fence->priv, false);
+		fence->channel = NULL;
+		list_del(&fence->head);
+		nouveau_fence_unref(&fence);
+	}
+	spin_unlock(&fctx->lock);
 }
 
-static void
-nouveau_fence_del(struct kref *ref)
+void
+nouveau_fence_context_new(struct nouveau_fence_chan *fctx)
 {
-	struct nouveau_fence *fence =
-		container_of(ref, struct nouveau_fence, refcount);
-
-	nouveau_channel_ref(NULL, &fence->channel);
-	kfree(fence);
+	INIT_LIST_HEAD(&fctx->pending);
+	spin_lock_init(&fctx->lock);
 }
 
 void
 nouveau_fence_update(struct nouveau_channel *chan)
 {
 	struct drm_device *dev = chan->dev;
-	struct nouveau_fence *tmp, *fence;
-	uint32_t sequence;
+	struct nouveau_fence_priv *priv = nv_engine(dev, NVOBJ_ENGINE_FENCE);
+	struct nouveau_fence_chan *fctx = chan->engctx[NVOBJ_ENGINE_FENCE];
+	struct nouveau_fence *fence, *fnext;
 
-	spin_lock(&chan->fence.lock);
-
-	/* Fetch the last sequence if the channel is still up and running */
-	if (likely(!list_empty(&chan->fence.pending))) {
-		if (USE_REFCNT(dev))
-			sequence = nvchan_rd32(chan, 0x48);
-		else
-			sequence = atomic_read(&chan->fence.last_sequence_irq);
-
-		if (chan->fence.sequence_ack == sequence)
-			goto out;
-		chan->fence.sequence_ack = sequence;
-	}
-
-	list_for_each_entry_safe(fence, tmp, &chan->fence.pending, entry) {
-		if (fence->sequence > chan->fence.sequence_ack)
+	spin_lock(&fctx->lock);
+	list_for_each_entry_safe(fence, fnext, &fctx->pending, head) {
+		if (priv->read(chan) < fence->sequence)
 			break;
 
-		fence->signalled = true;
-		list_del(&fence->entry);
 		if (fence->work)
 			fence->work(fence->priv, true);
-
-		kref_put(&fence->refcount, nouveau_fence_del);
-	}
-
-out:
-	spin_unlock(&chan->fence.lock);
-}
-
-int
-nouveau_fence_new(struct nouveau_channel *chan, struct nouveau_fence **pfence,
-		  bool emit)
-{
-	struct nouveau_fence *fence;
-	int ret = 0;
-
-	fence = kzalloc(sizeof(*fence), GFP_KERNEL);
-	if (!fence)
-		return -ENOMEM;
-	kref_init(&fence->refcount);
-	nouveau_channel_ref(chan, &fence->channel);
-
-	if (emit)
-		ret = nouveau_fence_emit(fence);
-
-	if (ret)
+		fence->channel = NULL;
+		list_del(&fence->head);
 		nouveau_fence_unref(&fence);
-	*pfence = fence;
-	return ret;
-}
-
-struct nouveau_channel *
-nouveau_fence_channel(struct nouveau_fence *fence)
-{
-	return fence ? nouveau_channel_get_unlocked(fence->channel) : NULL;
+	}
+	spin_unlock(&fctx->lock);
 }
 
 int
-nouveau_fence_emit(struct nouveau_fence *fence)
+nouveau_fence_emit(struct nouveau_fence *fence, struct nouveau_channel *chan)
 {
-	struct nouveau_channel *chan = fence->channel;
 	struct drm_device *dev = chan->dev;
-	struct drm_nouveau_private *dev_priv = dev->dev_private;
+	struct nouveau_fence_priv *priv = nv_engine(dev, NVOBJ_ENGINE_FENCE);
+	struct nouveau_fence_chan *fctx = chan->engctx[NVOBJ_ENGINE_FENCE];
 	int ret;
 
-	ret = RING_SPACE(chan, 2);
-	if (ret)
-		return ret;
-
-	if (unlikely(chan->fence.sequence == chan->fence.sequence_ack - 1)) {
-		nouveau_fence_update(chan);
+	fence->channel  = chan;
+	fence->timeout  = jiffies + (3 * DRM_HZ);
+	fence->sequence = ++fctx->sequence;
 
-		BUG_ON(chan->fence.sequence ==
-		       chan->fence.sequence_ack - 1);
+	ret = priv->emit(fence);
+	if (!ret) {
+		kref_get(&fence->kref);
+		spin_lock(&fctx->lock);
+		list_add_tail(&fence->head, &fctx->pending);
+		spin_unlock(&fctx->lock);
 	}
 
-	fence->sequence = ++chan->fence.sequence;
-
-	kref_get(&fence->refcount);
-	spin_lock(&chan->fence.lock);
-	list_add_tail(&fence->entry, &chan->fence.pending);
-	spin_unlock(&chan->fence.lock);
-
-	if (USE_REFCNT(dev)) {
-		if (dev_priv->card_type < NV_C0)
-			BEGIN_RING(chan, 0, NV10_SUBCHAN_REF_CNT, 1);
-		else
-			BEGIN_NVC0(chan, 2, 0, NV10_SUBCHAN_REF_CNT, 1);
-	} else {
-		BEGIN_RING(chan, NvSubSw, 0x0150, 1);
-	}
-	OUT_RING (chan, fence->sequence);
-	FIRE_RING(chan);
-
-	return 0;
-}
-
-void
-nouveau_fence_work(struct nouveau_fence *fence,
-		   void (*work)(void *priv, bool signalled),
-		   void *priv)
-{
-	BUG_ON(fence->work);
-
-	spin_lock(&fence->channel->fence.lock);
-
-	if (fence->signalled) {
-		work(priv, true);
-	} else {
-		fence->work = work;
-		fence->priv = priv;
-	}
-
-	spin_unlock(&fence->channel->fence.lock);
-}
-
-void
-__nouveau_fence_unref(void **sync_obj)
-{
-	struct nouveau_fence *fence = nouveau_fence(*sync_obj);
-
-	if (fence)
-		kref_put(&fence->refcount, nouveau_fence_del);
-	*sync_obj = NULL;
-}
-
-void *
-__nouveau_fence_ref(void *sync_obj)
-{
-	struct nouveau_fence *fence = nouveau_fence(sync_obj);
-
-	kref_get(&fence->refcount);
-	return sync_obj;
+	return ret;
 }
 
 bool
-__nouveau_fence_signalled(void *sync_obj, void *sync_arg)
+nouveau_fence_done(struct nouveau_fence *fence)
 {
-	struct nouveau_fence *fence = nouveau_fence(sync_obj);
-	struct nouveau_channel *chan = fence->channel;
-
-	if (fence->signalled)
-		return true;
-
-	nouveau_fence_update(chan);
-	return fence->signalled;
+	if (fence->channel)
+		nouveau_fence_update(fence->channel);
+	return !fence->channel;
 }
 
 int
-__nouveau_fence_wait(void *sync_obj, void *sync_arg, bool lazy, bool intr)
+nouveau_fence_wait(struct nouveau_fence *fence, bool lazy, bool intr)
 {
-	unsigned long timeout = jiffies + (3 * DRM_HZ);
 	unsigned long sleep_time = NSEC_PER_MSEC / 1000;
 	ktime_t t;
 	int ret = 0;
 
-	while (1) {
-		if (__nouveau_fence_signalled(sync_obj, sync_arg))
-			break;
-
-		if (time_after_eq(jiffies, timeout)) {
+	while (!nouveau_fence_done(fence)) {
+		if (fence->timeout && time_after_eq(jiffies, fence->timeout)) {
 			ret = -EBUSY;
 			break;
 		}
 
-		__set_current_state(intr ? TASK_INTERRUPTIBLE
-			: TASK_UNINTERRUPTIBLE);
+		__set_current_state(intr ? TASK_INTERRUPTIBLE :
+					   TASK_UNINTERRUPTIBLE);
 		if (lazy) {
 			t = ktime_set(0, sleep_time);
 			schedule_hrtimeout(&t, HRTIMER_MODE_REL);
@@ -261,354 +141,72 @@ __nouveau_fence_wait(void *sync_obj, void *sync_arg, bool lazy, bool intr)
 	}
 
 	__set_current_state(TASK_RUNNING);
-
 	return ret;
 }
 
-static struct nouveau_semaphore *
-semaphore_alloc(struct drm_device *dev)
-{
-	struct drm_nouveau_private *dev_priv = dev->dev_private;
-	struct nouveau_semaphore *sema;
-	int size = (dev_priv->chipset < 0x84) ? 4 : 16;
-	int ret, i;
-
-	if (!USE_SEMA(dev))
-		return NULL;
-
-	sema = kmalloc(sizeof(*sema), GFP_KERNEL);
-	if (!sema)
-		goto fail;
-
-	ret = drm_mm_pre_get(&dev_priv->fence.heap);
-	if (ret)
-		goto fail;
-
-	spin_lock(&dev_priv->fence.lock);
-	sema->mem = drm_mm_search_free(&dev_priv->fence.heap, size, 0, 0);
-	if (sema->mem)
-		sema->mem = drm_mm_get_block_atomic(sema->mem, size, 0);
-	spin_unlock(&dev_priv->fence.lock);
-
-	if (!sema->mem)
-		goto fail;
-
-	kref_init(&sema->ref);
-	sema->dev = dev;
-	for (i = sema->mem->start; i < sema->mem->start + size; i += 4)
-		nouveau_bo_wr32(dev_priv->fence.bo, i / 4, 0);
-
-	return sema;
-fail:
-	kfree(sema);
-	return NULL;
-}
-
-static void
-semaphore_free(struct kref *ref)
-{
-	struct nouveau_semaphore *sema =
-		container_of(ref, struct nouveau_semaphore, ref);
-	struct drm_nouveau_private *dev_priv = sema->dev->dev_private;
-
-	spin_lock(&dev_priv->fence.lock);
-	drm_mm_put_block(sema->mem);
-	spin_unlock(&dev_priv->fence.lock);
-
-	kfree(sema);
-}
-
-static void
-semaphore_work(void *priv, bool signalled)
-{
-	struct nouveau_semaphore *sema = priv;
-	struct drm_nouveau_private *dev_priv = sema->dev->dev_private;
-
-	if (unlikely(!signalled))
-		nouveau_bo_wr32(dev_priv->fence.bo, sema->mem->start / 4, 1);
-
-	kref_put(&sema->ref, semaphore_free);
-}
-
-static int
-semaphore_acquire(struct nouveau_channel *chan, struct nouveau_semaphore *sema)
-{
-	struct drm_nouveau_private *dev_priv = chan->dev->dev_private;
-	struct nouveau_fence *fence = NULL;
-	u64 offset = chan->fence.vma.offset + sema->mem->start;
-	int ret;
-
-	if (dev_priv->chipset < 0x84) {
-		ret = RING_SPACE(chan, 4);
-		if (ret)
-			return ret;
-
-		BEGIN_RING(chan, 0, NV11_SUBCHAN_DMA_SEMAPHORE, 3);
-		OUT_RING  (chan, NvSema);
-		OUT_RING  (chan, offset);
-		OUT_RING  (chan, 1);
-	} else
-	if (dev_priv->chipset < 0xc0) {
-		ret = RING_SPACE(chan, 7);
-		if (ret)
-			return ret;
-
-		BEGIN_RING(chan, 0, NV11_SUBCHAN_DMA_SEMAPHORE, 1);
-		OUT_RING  (chan, chan->vram_handle);
-		BEGIN_RING(chan, 0, NV84_SUBCHAN_SEMAPHORE_ADDRESS_HIGH, 4);
-		OUT_RING  (chan, upper_32_bits(offset));
-		OUT_RING  (chan, lower_32_bits(offset));
-		OUT_RING  (chan, 1);
-		OUT_RING  (chan, 1); /* ACQUIRE_EQ */
-	} else {
-		ret = RING_SPACE(chan, 5);
-		if (ret)
-			return ret;
-
-		BEGIN_NVC0(chan, 2, 0, NV84_SUBCHAN_SEMAPHORE_ADDRESS_HIGH, 4);
-		OUT_RING  (chan, upper_32_bits(offset));
-		OUT_RING  (chan, lower_32_bits(offset));
-		OUT_RING  (chan, 1);
-		OUT_RING  (chan, 0x1001); /* ACQUIRE_EQ */
-	}
-
-	/* Delay semaphore destruction until its work is done */
-	ret = nouveau_fence_new(chan, &fence, true);
-	if (ret)
-		return ret;
-
-	kref_get(&sema->ref);
-	nouveau_fence_work(fence, semaphore_work, sema);
-	nouveau_fence_unref(&fence);
-	return 0;
-}
-
-static int
-semaphore_release(struct nouveau_channel *chan, struct nouveau_semaphore *sema)
-{
-	struct drm_nouveau_private *dev_priv = chan->dev->dev_private;
-	struct nouveau_fence *fence = NULL;
-	u64 offset = chan->fence.vma.offset + sema->mem->start;
-	int ret;
-
-	if (dev_priv->chipset < 0x84) {
-		ret = RING_SPACE(chan, 5);
-		if (ret)
-			return ret;
-
-		BEGIN_RING(chan, 0, NV11_SUBCHAN_DMA_SEMAPHORE, 2);
-		OUT_RING  (chan, NvSema);
-		OUT_RING  (chan, offset);
-		BEGIN_RING(chan, 0, NV11_SUBCHAN_SEMAPHORE_RELEASE, 1);
-		OUT_RING  (chan, 1);
-	} else
-	if (dev_priv->chipset < 0xc0) {
-		ret = RING_SPACE(chan, 7);
-		if (ret)
-			return ret;
-
-		BEGIN_RING(chan, 0, NV11_SUBCHAN_DMA_SEMAPHORE, 1);
-		OUT_RING  (chan, chan->vram_handle);
-		BEGIN_RING(chan, 0, NV84_SUBCHAN_SEMAPHORE_ADDRESS_HIGH, 4);
-		OUT_RING  (chan, upper_32_bits(offset));
-		OUT_RING  (chan, lower_32_bits(offset));
-		OUT_RING  (chan, 1);
-		OUT_RING  (chan, 2); /* RELEASE */
-	} else {
-		ret = RING_SPACE(chan, 5);
-		if (ret)
-			return ret;
-
-		BEGIN_NVC0(chan, 2, 0, NV84_SUBCHAN_SEMAPHORE_ADDRESS_HIGH, 4);
-		OUT_RING  (chan, upper_32_bits(offset));
-		OUT_RING  (chan, lower_32_bits(offset));
-		OUT_RING  (chan, 1);
-		OUT_RING  (chan, 0x1002); /* RELEASE */
-	}
-
-	/* Delay semaphore destruction until its work is done */
-	ret = nouveau_fence_new(chan, &fence, true);
-	if (ret)
-		return ret;
-
-	kref_get(&sema->ref);
-	nouveau_fence_work(fence, semaphore_work, sema);
-	nouveau_fence_unref(&fence);
-	return 0;
-}
-
 int
-nouveau_fence_sync(struct nouveau_fence *fence,
-		   struct nouveau_channel *wchan)
+nouveau_fence_sync(struct nouveau_fence *fence, struct nouveau_channel *chan)
 {
-	struct nouveau_channel *chan = nouveau_fence_channel(fence);
-	struct drm_device *dev = wchan->dev;
-	struct nouveau_semaphore *sema;
+	struct drm_device *dev = chan->dev;
+	struct nouveau_fence_priv *priv = nv_engine(dev, NVOBJ_ENGINE_FENCE);
+	struct nouveau_channel *prev;
 	int ret = 0;
 
-	if (likely(!chan || chan == wchan ||
-		   nouveau_fence_signalled(fence)))
-		goto out;
-
-	sema = semaphore_alloc(dev);
-	if (!sema) {
-		/* Early card or broken userspace, fall back to
-		 * software sync. */
-		ret = nouveau_fence_wait(fence, true, false);
-		goto out;
-	}
-
-	/* try to take chan's mutex, if we can't take it right away
-	 * we have to fallback to software sync to prevent locking
-	 * order issues
-	 */
-	if (!mutex_trylock(&chan->mutex)) {
-		ret = nouveau_fence_wait(fence, true, false);
-		goto out_unref;
+	prev = fence ? nouveau_channel_get_unlocked(fence->channel) : NULL;
+	if (prev) {
+		if (unlikely(prev != chan && !nouveau_fence_done(fence))) {
+			ret = priv->sync(fence, prev, chan);
+			if (unlikely(ret))
+				ret = nouveau_fence_wait(fence, true, false);
+		}
+		nouveau_channel_put_unlocked(&prev);
 	}
 
-	/* Make wchan wait until it gets signalled */
-	ret = semaphore_acquire(wchan, sema);
-	if (ret)
-		goto out_unlock;
-
-	/* Signal the semaphore from chan */
-	ret = semaphore_release(chan, sema);
-
-out_unlock:
-	mutex_unlock(&chan->mutex);
-out_unref:
-	kref_put(&sema->ref, semaphore_free);
-out:
-	if (chan)
-		nouveau_channel_put_unlocked(&chan);
 	return ret;
 }
 
-int
-__nouveau_fence_flush(void *sync_obj, void *sync_arg)
+static void
+nouveau_fence_del(struct kref *kref)
 {
-	return 0;
+	struct nouveau_fence *fence = container_of(kref, typeof(*fence), kref);
+	kfree(fence);
 }
 
-int
-nouveau_fence_channel_init(struct nouveau_channel *chan)
+void
+nouveau_fence_unref(struct nouveau_fence **pfence)
 {
-	struct drm_device *dev = chan->dev;
-	struct drm_nouveau_private *dev_priv = dev->dev_private;
-	struct nouveau_gpuobj *obj = NULL;
-	int ret;
-
-	if (dev_priv->card_type < NV_C0) {
-		/* Create an NV_SW object for various sync purposes */
-		ret = nouveau_gpuobj_gr_new(chan, NvSw, NV_SW);
-		if (ret)
-			return ret;
-
-		ret = RING_SPACE(chan, 2);
-		if (ret)
-			return ret;
-
-		BEGIN_RING(chan, NvSubSw, NV01_SUBCHAN_OBJECT, 1);
-		OUT_RING  (chan, NvSw);
-		FIRE_RING (chan);
-	}
-
-	/* Setup area of memory shared between all channels for x-chan sync */
-	if (USE_SEMA(dev) && dev_priv->chipset < 0x84) {
-		struct ttm_mem_reg *mem = &dev_priv->fence.bo->bo.mem;
-
-		ret = nouveau_gpuobj_dma_new(chan, NV_CLASS_DMA_FROM_MEMORY,
-					     mem->start << PAGE_SHIFT,
-					     mem->size, NV_MEM_ACCESS_RW,
-					     NV_MEM_TARGET_VRAM, &obj);
-		if (ret)
-			return ret;
-
-		ret = nouveau_ramht_insert(chan, NvSema, obj);
-		nouveau_gpuobj_ref(NULL, &obj);
-		if (ret)
-			return ret;
-	} else
-	if (USE_SEMA(dev)) {
-		/* map fence bo into channel's vm */
-		ret = nouveau_bo_vma_add(dev_priv->fence.bo, chan->vm,
-					 &chan->fence.vma);
-		if (ret)
-			return ret;
-	}
-
-	atomic_set(&chan->fence.last_sequence_irq, 0);
-	return 0;
+	if (*pfence)
+		kref_put(&(*pfence)->kref, nouveau_fence_del);
+	*pfence = NULL;
 }
 
-void
-nouveau_fence_channel_fini(struct nouveau_channel *chan)
+struct nouveau_fence *
+nouveau_fence_ref(struct nouveau_fence *fence)
 {
-	struct drm_nouveau_private *dev_priv = chan->dev->dev_private;
-	struct nouveau_fence *tmp, *fence;
-
-	spin_lock(&chan->fence.lock);
-	list_for_each_entry_safe(fence, tmp, &chan->fence.pending, entry) {
-		fence->signalled = true;
-		list_del(&fence->entry);
-
-		if (unlikely(fence->work))
-			fence->work(fence->priv, false);
-
-		kref_put(&fence->refcount, nouveau_fence_del);
-	}
-	spin_unlock(&chan->fence.lock);
-
-	nouveau_bo_vma_del(dev_priv->fence.bo, &chan->fence.vma);
+	kref_get(&fence->kref);
+	return fence;
 }
 
 int
-nouveau_fence_init(struct drm_device *dev)
+nouveau_fence_new(struct nouveau_channel *chan, struct nouveau_fence **pfence)
 {
-	struct drm_nouveau_private *dev_priv = dev->dev_private;
-	int size = (dev_priv->chipset < 0x84) ? 4096 : 16384;
-	int ret;
-
-	/* Create a shared VRAM heap for cross-channel sync. */
-	if (USE_SEMA(dev)) {
-		ret = nouveau_bo_new(dev, size, 0, TTM_PL_FLAG_VRAM,
-				     0, 0, NULL, &dev_priv->fence.bo);
-		if (ret)
-			return ret;
+	struct nouveau_fence *fence;
+	int ret = 0;
 
-		ret = nouveau_bo_pin(dev_priv->fence.bo, TTM_PL_FLAG_VRAM);
-		if (ret)
-			goto fail;
+	if (unlikely(!chan->engctx[NVOBJ_ENGINE_FENCE]))
+		return -ENODEV;
 
-		ret = nouveau_bo_map(dev_priv->fence.bo);
-		if (ret)
-			goto fail;
+	fence = kzalloc(sizeof(*fence), GFP_KERNEL);
+	if (!fence)
+		return -ENOMEM;
+	kref_init(&fence->kref);
 
-		ret = drm_mm_init(&dev_priv->fence.heap, 0,
-				  dev_priv->fence.bo->bo.mem.size);
+	if (chan) {
+		ret = nouveau_fence_emit(fence, chan);
 		if (ret)
-			goto fail;
-
-		spin_lock_init(&dev_priv->fence.lock);
+			nouveau_fence_unref(&fence);
 	}
 
-	return 0;
-fail:
-	nouveau_bo_unmap(dev_priv->fence.bo);
-	nouveau_bo_ref(NULL, &dev_priv->fence.bo);
+	*pfence = fence;
 	return ret;
 }
-
-void
-nouveau_fence_fini(struct drm_device *dev)
-{
-	struct drm_nouveau_private *dev_priv = dev->dev_private;
-
-	if (USE_SEMA(dev)) {
-		drm_mm_takedown(&dev_priv->fence.heap);
-		nouveau_bo_unmap(dev_priv->fence.bo);
-		nouveau_bo_unpin(dev_priv->fence.bo);
-		nouveau_bo_ref(NULL, &dev_priv->fence.bo);
-	}
-}

+ 52 - 0
drivers/gpu/drm/nouveau/nouveau_fence.h

@@ -0,0 +1,52 @@
+#ifndef __NOUVEAU_FENCE_H__
+#define __NOUVEAU_FENCE_H__
+
+struct nouveau_fence {
+	struct list_head head;
+	struct kref kref;
+
+	struct nouveau_channel *channel;
+	unsigned long timeout;
+	u32 sequence;
+
+	void (*work)(void *priv, bool signalled);
+	void *priv;
+};
+
+int  nouveau_fence_new(struct nouveau_channel *, struct nouveau_fence **);
+struct nouveau_fence *
+nouveau_fence_ref(struct nouveau_fence *);
+void nouveau_fence_unref(struct nouveau_fence **);
+
+int  nouveau_fence_emit(struct nouveau_fence *, struct nouveau_channel *);
+bool nouveau_fence_done(struct nouveau_fence *);
+int  nouveau_fence_wait(struct nouveau_fence *, bool lazy, bool intr);
+int  nouveau_fence_sync(struct nouveau_fence *, struct nouveau_channel *);
+void nouveau_fence_idle(struct nouveau_channel *);
+void nouveau_fence_update(struct nouveau_channel *);
+
+struct nouveau_fence_chan {
+	struct list_head pending;
+	spinlock_t lock;
+	u32 sequence;
+};
+
+struct nouveau_fence_priv {
+	struct nouveau_exec_engine engine;
+	int (*emit)(struct nouveau_fence *);
+	int (*sync)(struct nouveau_fence *, struct nouveau_channel *,
+		    struct nouveau_channel *);
+	u32 (*read)(struct nouveau_channel *);
+};
+
+void nouveau_fence_context_new(struct nouveau_fence_chan *);
+void nouveau_fence_context_del(struct nouveau_fence_chan *);
+
+int nv04_fence_create(struct drm_device *dev);
+int nv04_fence_mthd(struct nouveau_channel *, u32, u32, u32);
+
+int nv10_fence_create(struct drm_device *dev);
+int nv84_fence_create(struct drm_device *dev);
+int nvc0_fence_create(struct drm_device *dev);
+
+#endif

+ 32 - 0
drivers/gpu/drm/nouveau/nouveau_fifo.h

@@ -0,0 +1,32 @@
+#ifndef __NOUVEAU_FIFO_H__
+#define __NOUVEAU_FIFO_H__
+
+struct nouveau_fifo_priv {
+	struct nouveau_exec_engine base;
+	u32 channels;
+};
+
+struct nouveau_fifo_chan {
+};
+
+bool nv04_fifo_cache_pull(struct drm_device *, bool);
+void nv04_fifo_context_del(struct nouveau_channel *, int);
+int  nv04_fifo_fini(struct drm_device *, int, bool);
+int  nv04_fifo_init(struct drm_device *, int);
+void nv04_fifo_isr(struct drm_device *);
+void nv04_fifo_destroy(struct drm_device *, int);
+
+void nv50_fifo_playlist_update(struct drm_device *);
+void nv50_fifo_destroy(struct drm_device *, int);
+void nv50_fifo_tlb_flush(struct drm_device *, int);
+
+int  nv04_fifo_create(struct drm_device *);
+int  nv10_fifo_create(struct drm_device *);
+int  nv17_fifo_create(struct drm_device *);
+int  nv40_fifo_create(struct drm_device *);
+int  nv50_fifo_create(struct drm_device *);
+int  nv84_fifo_create(struct drm_device *);
+int  nvc0_fifo_create(struct drm_device *);
+int  nve0_fifo_create(struct drm_device *);
+
+#endif

+ 3 - 2
drivers/gpu/drm/nouveau/nouveau_gem.c

@@ -30,6 +30,7 @@
 #include "nouveau_drv.h"
 #include "nouveau_drm.h"
 #include "nouveau_dma.h"
+#include "nouveau_fence.h"
 
 #define nouveau_gem_pushbuf_sync(chan) 0
 
@@ -708,7 +709,7 @@ nouveau_gem_ioctl_pushbuf(struct drm_device *dev, void *data,
 	}
 
 	if (chan->dma.ib_max) {
-		ret = nouveau_dma_wait(chan, req->nr_push + 1, 6);
+		ret = nouveau_dma_wait(chan, req->nr_push + 1, 16);
 		if (ret) {
 			NV_INFO(dev, "nv50cal_space: %d\n", ret);
 			goto out;
@@ -778,7 +779,7 @@ nouveau_gem_ioctl_pushbuf(struct drm_device *dev, void *data,
 		}
 	}
 
-	ret = nouveau_fence_new(chan, &fence, true);
+	ret = nouveau_fence_new(chan, &fence);
 	if (ret) {
 		NV_ERROR(dev, "error fencing pushbuf: %d\n", ret);
 		WIND_RING(chan);

+ 1 - 1
drivers/gpu/drm/nouveau/nouveau_gpio.c

@@ -387,7 +387,7 @@ nouveau_gpio_reset(struct drm_device *dev)
 		if (dev_priv->card_type >= NV_D0) {
 			nv_mask(dev, 0x00d610 + (line * 4), 0xff, unk0);
 			if (unk1--)
-				nv_mask(dev, 0x00d640 + (unk1 * 4), 0xff, line);
+				nv_mask(dev, 0x00d740 + (unk1 * 4), 0xff, line);
 		} else
 		if (dev_priv->card_type >= NV_50) {
 			static const u32 regs[] = { 0xe100, 0xe28c };

+ 0 - 4
drivers/gpu/drm/nouveau/nouveau_grctx.h

@@ -18,7 +18,6 @@ struct nouveau_grctx {
 	uint32_t ctxvals_base;
 };
 
-#ifdef CP_CTX
 static inline void
 cp_out(struct nouveau_grctx *ctx, uint32_t inst)
 {
@@ -88,10 +87,8 @@ _cp_bra(struct nouveau_grctx *ctx, u32 mod, int flag, int state, int name)
 		    (state ? 0 : CP_BRA_IF_CLEAR));
 }
 #define cp_bra(c, f, s, n) _cp_bra((c), 0, CP_FLAG_##f, CP_FLAG_##f##_##s, n)
-#ifdef CP_BRA_MOD
 #define cp_cal(c, f, s, n) _cp_bra((c), 1, CP_FLAG_##f, CP_FLAG_##f##_##s, n)
 #define cp_ret(c, f, s) _cp_bra((c), 2, CP_FLAG_##f, CP_FLAG_##f##_##s, 0)
-#endif
 
 static inline void
 _cp_wait(struct nouveau_grctx *ctx, int flag, int state)
@@ -128,6 +125,5 @@ gr_def(struct nouveau_grctx *ctx, uint32_t reg, uint32_t val)
 
 	nv_wo32(ctx->data, reg * 4, val);
 }
-#endif
 
 #endif

+ 0 - 5
drivers/gpu/drm/nouveau/nouveau_hw.c

@@ -1018,11 +1018,6 @@ nv_load_state_ext(struct drm_device *dev, int head,
 	}
 
 	NVWriteCRTC(dev, head, NV_PCRTC_START, regp->fb_start);
-
-	/* Enable vblank interrupts. */
-	NVWriteCRTC(dev, head, NV_PCRTC_INTR_EN_0,
-		    (dev->vblank_enabled[head] ? 1 : 0));
-	NVWriteCRTC(dev, head, NV_PCRTC_INTR_0, NV_PCRTC_INTR_0_VBLANK);
 }
 
 static void

+ 11 - 6
drivers/gpu/drm/nouveau/nouveau_mem.c

@@ -39,6 +39,8 @@
 #include "nouveau_pm.h"
 #include "nouveau_mm.h"
 #include "nouveau_vm.h"
+#include "nouveau_fifo.h"
+#include "nouveau_fence.h"
 
 /*
  * NV10-NV40 tiling helpers
@@ -50,7 +52,6 @@ nv10_mem_update_tile_region(struct drm_device *dev,
 			    uint32_t size, uint32_t pitch, uint32_t flags)
 {
 	struct drm_nouveau_private *dev_priv = dev->dev_private;
-	struct nouveau_fifo_engine *pfifo = &dev_priv->engine.fifo;
 	struct nouveau_fb_engine *pfb = &dev_priv->engine.fb;
 	int i = tile - dev_priv->tile.reg, j;
 	unsigned long save;
@@ -64,8 +65,8 @@ nv10_mem_update_tile_region(struct drm_device *dev,
 		pfb->init_tile_region(dev, i, addr, size, pitch, flags);
 
 	spin_lock_irqsave(&dev_priv->context_switch_lock, save);
-	pfifo->reassign(dev, false);
-	pfifo->cache_pull(dev, false);
+	nv_wr32(dev, NV03_PFIFO_CACHES, 0);
+	nv04_fifo_cache_pull(dev, false);
 
 	nouveau_wait_for_idle(dev);
 
@@ -75,8 +76,8 @@ nv10_mem_update_tile_region(struct drm_device *dev,
 			dev_priv->eng[j]->set_tile_region(dev, i);
 	}
 
-	pfifo->cache_pull(dev, true);
-	pfifo->reassign(dev, true);
+	nv04_fifo_cache_pull(dev, true);
+	nv_wr32(dev, NV03_PFIFO_CACHES, 1);
 	spin_unlock_irqrestore(&dev_priv->context_switch_lock, save);
 }
 
@@ -89,7 +90,7 @@ nv10_mem_get_tile_region(struct drm_device *dev, int i)
 	spin_lock(&dev_priv->tile.lock);
 
 	if (!tile->used &&
-	    (!tile->fence || nouveau_fence_signalled(tile->fence)))
+	    (!tile->fence || nouveau_fence_done(tile->fence)))
 		tile->used = true;
 	else
 		tile = NULL;
@@ -843,6 +844,7 @@ nouveau_mem_timing_calc(struct drm_device *dev, u32 freq,
 		ret = nv50_mem_timing_calc(dev, freq, e, len, boot, t);
 		break;
 	case NV_C0:
+	case NV_D0:
 		ret = nvc0_mem_timing_calc(dev, freq, e, len, boot, t);
 		break;
 	default:
@@ -977,6 +979,8 @@ nouveau_mem_exec(struct nouveau_mem_exec_func *exec,
 		break;
 	case NV_MEM_TYPE_DDR3:
 		tDLLK = 12000;
+		tCKSRE = 2000;
+		tXS = 1000;
 		mr1_dlloff = 0x00000001;
 		break;
 	case NV_MEM_TYPE_GDDR3:
@@ -1023,6 +1027,7 @@ nouveau_mem_exec(struct nouveau_mem_exec_func *exec,
 	exec->refresh_self(exec, false);
 	exec->refresh_auto(exec, true);
 	exec->wait(exec, tXS);
+	exec->wait(exec, tXS);
 
 	/* update MRs */
 	if (mr[2] != info->mr[2]) {

+ 19 - 196
drivers/gpu/drm/nouveau/nouveau_object.c

@@ -34,9 +34,10 @@
 #include "drm.h"
 #include "nouveau_drv.h"
 #include "nouveau_drm.h"
+#include "nouveau_fifo.h"
 #include "nouveau_ramht.h"
+#include "nouveau_software.h"
 #include "nouveau_vm.h"
-#include "nv50_display.h"
 
 struct nouveau_gpuobj_method {
 	struct list_head head;
@@ -120,12 +121,13 @@ nouveau_gpuobj_mthd_call2(struct drm_device *dev, int chid,
 			  u32 class, u32 mthd, u32 data)
 {
 	struct drm_nouveau_private *dev_priv = dev->dev_private;
+	struct nouveau_fifo_priv *pfifo = nv_engine(dev, NVOBJ_ENGINE_FIFO);
 	struct nouveau_channel *chan = NULL;
 	unsigned long flags;
 	int ret = -EINVAL;
 
 	spin_lock_irqsave(&dev_priv->channels.lock, flags);
-	if (chid >= 0 && chid < dev_priv->engine.fifo.channels)
+	if (chid >= 0 && chid < pfifo->channels)
 		chan = dev_priv->channels.ptr[chid];
 	if (chan)
 		ret = nouveau_gpuobj_mthd_call(chan, class, mthd, data);
@@ -133,37 +135,6 @@ nouveau_gpuobj_mthd_call2(struct drm_device *dev, int chid,
 	return ret;
 }
 
-/* NVidia uses context objects to drive drawing operations.
-
-   Context objects can be selected into 8 subchannels in the FIFO,
-   and then used via DMA command buffers.
-
-   A context object is referenced by a user defined handle (CARD32). The HW
-   looks up graphics objects in a hash table in the instance RAM.
-
-   An entry in the hash table consists of 2 CARD32. The first CARD32 contains
-   the handle, the second one a bitfield, that contains the address of the
-   object in instance RAM.
-
-   The format of the second CARD32 seems to be:
-
-   NV4 to NV30:
-
-   15: 0  instance_addr >> 4
-   17:16  engine (here uses 1 = graphics)
-   28:24  channel id (here uses 0)
-   31	  valid (use 1)
-
-   NV40:
-
-   15: 0  instance_addr >> 4   (maybe 19-0)
-   21:20  engine (here uses 1 = graphics)
-   I'm unsure about the other bits, but using 0 seems to work.
-
-   The key into the hash table depends on the object handle and channel id and
-   is given as:
-*/
-
 int
 nouveau_gpuobj_new(struct drm_device *dev, struct nouveau_channel *chan,
 		   uint32_t size, int align, uint32_t flags,
@@ -267,7 +238,7 @@ nouveau_gpuobj_takedown(struct drm_device *dev)
 		kfree(oc);
 	}
 
-	BUG_ON(!list_empty(&dev_priv->gpuobj_list));
+	WARN_ON(!list_empty(&dev_priv->gpuobj_list));
 }
 
 
@@ -361,34 +332,6 @@ nouveau_gpuobj_new_fake(struct drm_device *dev, u32 pinst, u64 vinst,
 	return 0;
 }
 
-/*
-   DMA objects are used to reference a piece of memory in the
-   framebuffer, PCI or AGP address space. Each object is 16 bytes big
-   and looks as follows:
-
-   entry[0]
-   11:0  class (seems like I can always use 0 here)
-   12    page table present?
-   13    page entry linear?
-   15:14 access: 0 rw, 1 ro, 2 wo
-   17:16 target: 0 NV memory, 1 NV memory tiled, 2 PCI, 3 AGP
-   31:20 dma adjust (bits 0-11 of the address)
-   entry[1]
-   dma limit (size of transfer)
-   entry[X]
-   1     0 readonly, 1 readwrite
-   31:12 dma frame address of the page (bits 12-31 of the address)
-   entry[N]
-   page table terminator, same value as the first pte, as does nvidia
-   rivatv uses 0xffffffff
-
-   Non linear page tables need a list of frame addresses afterwards,
-   the rivatv project has some info on this.
-
-   The method below creates a DMA object in instance RAM and returns a handle
-   to it that can be used to set up context objects.
-*/
-
 void
 nv50_gpuobj_dma_init(struct nouveau_gpuobj *obj, u32 offset, int class,
 		     u64 base, u64 size, int target, int access,
@@ -540,82 +483,6 @@ nouveau_gpuobj_dma_new(struct nouveau_channel *chan, int class, u64 base,
 	return 0;
 }
 
-/* Context objects in the instance RAM have the following structure.
- * On NV40 they are 32 byte long, on NV30 and smaller 16 bytes.
-
-   NV4 - NV30:
-
-   entry[0]
-   11:0 class
-   12   chroma key enable
-   13   user clip enable
-   14   swizzle enable
-   17:15 patch config:
-       scrcopy_and, rop_and, blend_and, scrcopy, srccopy_pre, blend_pre
-   18   synchronize enable
-   19   endian: 1 big, 0 little
-   21:20 dither mode
-   23    single step enable
-   24    patch status: 0 invalid, 1 valid
-   25    context_surface 0: 1 valid
-   26    context surface 1: 1 valid
-   27    context pattern: 1 valid
-   28    context rop: 1 valid
-   29,30 context beta, beta4
-   entry[1]
-   7:0   mono format
-   15:8  color format
-   31:16 notify instance address
-   entry[2]
-   15:0  dma 0 instance address
-   31:16 dma 1 instance address
-   entry[3]
-   dma method traps
-
-   NV40:
-   No idea what the exact format is. Here's what can be deducted:
-
-   entry[0]:
-   11:0  class  (maybe uses more bits here?)
-   17    user clip enable
-   21:19 patch config
-   25    patch status valid ?
-   entry[1]:
-   15:0  DMA notifier  (maybe 20:0)
-   entry[2]:
-   15:0  DMA 0 instance (maybe 20:0)
-   24    big endian
-   entry[3]:
-   15:0  DMA 1 instance (maybe 20:0)
-   entry[4]:
-   entry[5]:
-   set to 0?
-*/
-static int
-nouveau_gpuobj_sw_new(struct nouveau_channel *chan, u32 handle, u16 class)
-{
-	struct drm_nouveau_private *dev_priv = chan->dev->dev_private;
-	struct nouveau_gpuobj *gpuobj;
-	int ret;
-
-	gpuobj = kzalloc(sizeof(*gpuobj), GFP_KERNEL);
-	if (!gpuobj)
-		return -ENOMEM;
-	gpuobj->dev = chan->dev;
-	gpuobj->engine = NVOBJ_ENGINE_SW;
-	gpuobj->class = class;
-	kref_init(&gpuobj->refcount);
-	gpuobj->cinst = 0x40;
-
-	spin_lock(&dev_priv->ramin_lock);
-	list_add_tail(&gpuobj->list, &dev_priv->gpuobj_list);
-	spin_unlock(&dev_priv->ramin_lock);
-
-	ret = nouveau_ramht_insert(chan, handle, gpuobj);
-	nouveau_gpuobj_ref(NULL, &gpuobj);
-	return ret;
-}
-
 int
 nouveau_gpuobj_gr_new(struct nouveau_channel *chan, u32 handle, int class)
 {
@@ -632,9 +499,6 @@ nouveau_gpuobj_gr_new(struct nouveau_channel *chan, u32 handle, int class)
 		if (oc->id != class)
 			continue;
 
-		if (oc->engine == NVOBJ_ENGINE_SW)
-			return nouveau_gpuobj_sw_new(chan, handle, class);
-
 		if (!chan->engctx[oc->engine]) {
 			ret = eng->context_new(chan, oc->engine);
 			if (ret)
@@ -644,7 +508,6 @@ nouveau_gpuobj_gr_new(struct nouveau_channel *chan, u32 handle, int class)
 		return eng->object_new(chan, oc->engine, handle, class);
 	}
 
-	NV_ERROR(dev, "illegal object class: 0x%x\n", class);
 	return -EINVAL;
 }
 
@@ -693,11 +556,10 @@ nouveau_gpuobj_channel_init_pramin(struct nouveau_channel *chan)
 static int
 nvc0_gpuobj_channel_init(struct nouveau_channel *chan, struct nouveau_vm *vm)
 {
-	struct drm_nouveau_private *dev_priv = chan->dev->dev_private;
 	struct drm_device *dev = chan->dev;
 	struct nouveau_gpuobj *pgd = NULL;
 	struct nouveau_vm_pgd *vpgd;
-	int ret, i;
+	int ret;
 
 	ret = nouveau_gpuobj_new(dev, NULL, 4096, 0x1000, 0, &chan->ramin);
 	if (ret)
@@ -722,19 +584,6 @@ nvc0_gpuobj_channel_init(struct nouveau_channel *chan, struct nouveau_vm *vm)
 	nv_wo32(chan->ramin, 0x0208, 0xffffffff);
 	nv_wo32(chan->ramin, 0x020c, 0x000000ff);
 
-	/* map display semaphore buffers into channel's vm */
-	for (i = 0; i < dev->mode_config.num_crtc; i++) {
-		struct nouveau_bo *bo;
-		if (dev_priv->card_type >= NV_D0)
-			bo = nvd0_display_crtc_sema(dev, i);
-		else
-			bo = nv50_display(dev)->crtc[i].sem.bo;
-
-		ret = nouveau_bo_vma_add(bo, chan->vm, &chan->dispc_vma[i]);
-		if (ret)
-			return ret;
-	}
-
 	return 0;
 }
 
@@ -747,7 +596,7 @@ nouveau_gpuobj_channel_init(struct nouveau_channel *chan,
 	struct nouveau_fpriv *fpriv = nouveau_fpriv(chan->file_priv);
 	struct nouveau_vm *vm = fpriv ? fpriv->vm : dev_priv->chan_vm;
 	struct nouveau_gpuobj *vram = NULL, *tt = NULL;
-	int ret, i;
+	int ret;
 
 	NV_DEBUG(dev, "ch%d vram=0x%08x tt=0x%08x\n", chan->id, vram_h, tt_h);
 	if (dev_priv->card_type >= NV_C0)
@@ -795,25 +644,6 @@ nouveau_gpuobj_channel_init(struct nouveau_channel *chan,
 		nouveau_gpuobj_ref(NULL, &ramht);
 		if (ret)
 			return ret;
-
-		/* dma objects for display sync channel semaphore blocks */
-		for (i = 0; i < dev->mode_config.num_crtc; i++) {
-			struct nouveau_gpuobj *sem = NULL;
-			struct nv50_display_crtc *dispc =
-				&nv50_display(dev)->crtc[i];
-			u64 offset = dispc->sem.bo->bo.offset;
-
-			ret = nouveau_gpuobj_dma_new(chan, 0x3d, offset, 0xfff,
-						     NV_MEM_ACCESS_RW,
-						     NV_MEM_TARGET_VRAM, &sem);
-			if (ret)
-				return ret;
-
-			ret = nouveau_ramht_insert(chan, NvEvoSema0 + i, sem);
-			nouveau_gpuobj_ref(NULL, &sem);
-			if (ret)
-				return ret;
-		}
 	}
 
 	/* VRAM ctxdma */
@@ -873,25 +703,7 @@ nouveau_gpuobj_channel_init(struct nouveau_channel *chan,
 void
 nouveau_gpuobj_channel_takedown(struct nouveau_channel *chan)
 {
-	struct drm_device *dev = chan->dev;
-	struct drm_nouveau_private *dev_priv = dev->dev_private;
-	int i;
-
-	NV_DEBUG(dev, "ch%d\n", chan->id);
-
-	if (dev_priv->card_type >= NV_D0) {
-		for (i = 0; i < dev->mode_config.num_crtc; i++) {
-			struct nouveau_bo *bo = nvd0_display_crtc_sema(dev, i);
-			nouveau_bo_vma_del(bo, &chan->dispc_vma[i]);
-		}
-	} else
-	if (dev_priv->card_type >= NV_50) {
-		struct nv50_display *disp = nv50_display(dev);
-		for (i = 0; i < dev->mode_config.num_crtc; i++) {
-			struct nv50_display_crtc *dispc = &disp->crtc[i];
-			nouveau_bo_vma_del(dispc->sem.bo, &chan->dispc_vma[i]);
-		}
-	}
+	NV_DEBUG(chan->dev, "ch%d\n", chan->id);
 
 	nouveau_vm_ref(NULL, &chan->vm, chan->vm_pd);
 	nouveau_gpuobj_ref(NULL, &chan->vm_pd);
@@ -956,6 +768,17 @@ int nouveau_ioctl_grobj_alloc(struct drm_device *dev, void *data,
 	if (init->handle == ~0)
 		return -EINVAL;
 
+	/* compatibility with userspace that assumes 506e for all chipsets */
+	if (init->class == 0x506e) {
+		init->class = nouveau_software_class(dev);
+		if (init->class == 0x906e)
+			return 0;
+	} else
+	if (init->class == 0x906e) {
+		NV_ERROR(dev, "906e not supported yet\n");
+		return -EINVAL;
+	}
+
 	chan = nouveau_channel_get(file_priv, init->channel);
 	if (IS_ERR(chan))
 		return PTR_ERR(chan);

+ 1 - 1
drivers/gpu/drm/nouveau/nouveau_perf.c

@@ -83,7 +83,7 @@ nouveau_perf_entry(struct drm_device *dev, int idx,
 	return NULL;
 }
 
-static u8 *
+u8 *
 nouveau_perf_rammap(struct drm_device *dev, u32 freq,
 		    u8 *ver, u8 *hdr, u8 *cnt, u8 *len)
 {

+ 3 - 1
drivers/gpu/drm/nouveau/nouveau_pm.h

@@ -61,8 +61,10 @@ int  nouveau_voltage_gpio_set(struct drm_device *, int voltage);
 /* nouveau_perf.c */
 void nouveau_perf_init(struct drm_device *);
 void nouveau_perf_fini(struct drm_device *);
-u8 *nouveau_perf_timing(struct drm_device *, u32 freq, u8 *ver, u8 *len);
+u8 *nouveau_perf_rammap(struct drm_device *, u32 freq, u8 *ver,
+			u8 *hdr, u8 *cnt, u8 *len);
 u8 *nouveau_perf_ramcfg(struct drm_device *, u32 freq, u8 *ver, u8 *len);
+u8 *nouveau_perf_timing(struct drm_device *, u32 freq, u8 *ver, u8 *len);
 
 /* nouveau_mem.c */
 void nouveau_mem_timing_init(struct drm_device *);

+ 2 - 2
drivers/gpu/drm/nouveau/nouveau_sgdma.c

@@ -341,10 +341,10 @@ nouveau_sgdma_init(struct drm_device *dev)
 	u32 aper_size, align;
 	int ret;
 
-	if (dev_priv->card_type >= NV_40 && pci_is_pcie(dev->pdev))
+	if (dev_priv->card_type >= NV_40)
 		aper_size = 512 * 1024 * 1024;
 	else
-		aper_size = 64 * 1024 * 1024;
+		aper_size = 128 * 1024 * 1024;
 
 	/* Dear NVIDIA, NV44+ would like proper present bits in PTEs for
 	 * christmas.  The cards before it have them, the cards after

+ 69 - 0
drivers/gpu/drm/nouveau/nouveau_software.h

@@ -0,0 +1,69 @@
+#ifndef __NOUVEAU_SOFTWARE_H__
+#define __NOUVEAU_SOFTWARE_H__
+
+struct nouveau_software_priv {
+	struct nouveau_exec_engine base;
+	struct list_head vblank;
+};
+
+struct nouveau_software_chan {
+	struct list_head flip;
+	struct {
+		struct list_head list;
+		struct nouveau_bo *bo;
+		u32 offset;
+		u32 value;
+		u32 head;
+	} vblank;
+};
+
+static inline void
+nouveau_software_vblank(struct drm_device *dev, int crtc)
+{
+	struct nouveau_software_priv *psw = nv_engine(dev, NVOBJ_ENGINE_SW);
+	struct nouveau_software_chan *pch, *tmp;
+
+	list_for_each_entry_safe(pch, tmp, &psw->vblank, vblank.list) {
+		if (pch->vblank.head != crtc)
+			continue;
+
+		nouveau_bo_wr32(pch->vblank.bo, pch->vblank.offset,
+						pch->vblank.value);
+		list_del(&pch->vblank.list);
+		drm_vblank_put(dev, crtc);
+	}
+}
+
+static inline void
+nouveau_software_context_new(struct nouveau_software_chan *pch)
+{
+	INIT_LIST_HEAD(&pch->flip);
+}
+
+static inline void
+nouveau_software_create(struct nouveau_software_priv *psw)
+{
+	INIT_LIST_HEAD(&psw->vblank);
+}
+
+static inline u16
+nouveau_software_class(struct drm_device *dev)
+{
+	struct drm_nouveau_private *dev_priv = dev->dev_private;
+	if (dev_priv->card_type <= NV_04)
+		return 0x006e;
+	if (dev_priv->card_type <= NV_40)
+		return 0x016e;
+	if (dev_priv->card_type <= NV_50)
+		return 0x506e;
+	if (dev_priv->card_type <= NV_E0)
+		return 0x906e;
+	return 0x0000;
+}
+
+int nv04_software_create(struct drm_device *);
+int nv50_software_create(struct drm_device *);
+int nvc0_software_create(struct drm_device *);
+u64 nvc0_software_crtc(struct nouveau_channel *, int crtc);
+
+#endif

+ 93 - 167
drivers/gpu/drm/nouveau/nouveau_state.c

@@ -39,6 +39,9 @@
 #include "nouveau_gpio.h"
 #include "nouveau_pm.h"
 #include "nv50_display.h"
+#include "nouveau_fifo.h"
+#include "nouveau_fence.h"
+#include "nouveau_software.h"
 
 static void nouveau_stub_takedown(struct drm_device *dev) {}
 static int nouveau_stub_init(struct drm_device *dev) { return 0; }
@@ -66,18 +69,6 @@ static int nouveau_init_engine_ptrs(struct drm_device *dev)
 		engine->timer.takedown		= nv04_timer_takedown;
 		engine->fb.init			= nv04_fb_init;
 		engine->fb.takedown		= nv04_fb_takedown;
-		engine->fifo.channels		= 16;
-		engine->fifo.init		= nv04_fifo_init;
-		engine->fifo.takedown		= nv04_fifo_fini;
-		engine->fifo.disable		= nv04_fifo_disable;
-		engine->fifo.enable		= nv04_fifo_enable;
-		engine->fifo.reassign		= nv04_fifo_reassign;
-		engine->fifo.cache_pull		= nv04_fifo_cache_pull;
-		engine->fifo.channel_id		= nv04_fifo_channel_id;
-		engine->fifo.create_context	= nv04_fifo_create_context;
-		engine->fifo.destroy_context	= nv04_fifo_destroy_context;
-		engine->fifo.load_context	= nv04_fifo_load_context;
-		engine->fifo.unload_context	= nv04_fifo_unload_context;
 		engine->display.early_init	= nv04_display_early_init;
 		engine->display.late_takedown	= nv04_display_late_takedown;
 		engine->display.create		= nv04_display_create;
@@ -111,18 +102,6 @@ static int nouveau_init_engine_ptrs(struct drm_device *dev)
 		engine->fb.init_tile_region	= nv10_fb_init_tile_region;
 		engine->fb.set_tile_region	= nv10_fb_set_tile_region;
 		engine->fb.free_tile_region	= nv10_fb_free_tile_region;
-		engine->fifo.channels		= 32;
-		engine->fifo.init		= nv10_fifo_init;
-		engine->fifo.takedown		= nv04_fifo_fini;
-		engine->fifo.disable		= nv04_fifo_disable;
-		engine->fifo.enable		= nv04_fifo_enable;
-		engine->fifo.reassign		= nv04_fifo_reassign;
-		engine->fifo.cache_pull		= nv04_fifo_cache_pull;
-		engine->fifo.channel_id		= nv10_fifo_channel_id;
-		engine->fifo.create_context	= nv10_fifo_create_context;
-		engine->fifo.destroy_context	= nv04_fifo_destroy_context;
-		engine->fifo.load_context	= nv10_fifo_load_context;
-		engine->fifo.unload_context	= nv10_fifo_unload_context;
 		engine->display.early_init	= nv04_display_early_init;
 		engine->display.late_takedown	= nv04_display_late_takedown;
 		engine->display.create		= nv04_display_create;
@@ -162,18 +141,6 @@ static int nouveau_init_engine_ptrs(struct drm_device *dev)
 		engine->fb.init_tile_region	= nv20_fb_init_tile_region;
 		engine->fb.set_tile_region	= nv20_fb_set_tile_region;
 		engine->fb.free_tile_region	= nv20_fb_free_tile_region;
-		engine->fifo.channels		= 32;
-		engine->fifo.init		= nv10_fifo_init;
-		engine->fifo.takedown		= nv04_fifo_fini;
-		engine->fifo.disable		= nv04_fifo_disable;
-		engine->fifo.enable		= nv04_fifo_enable;
-		engine->fifo.reassign		= nv04_fifo_reassign;
-		engine->fifo.cache_pull		= nv04_fifo_cache_pull;
-		engine->fifo.channel_id		= nv10_fifo_channel_id;
-		engine->fifo.create_context	= nv10_fifo_create_context;
-		engine->fifo.destroy_context	= nv04_fifo_destroy_context;
-		engine->fifo.load_context	= nv10_fifo_load_context;
-		engine->fifo.unload_context	= nv10_fifo_unload_context;
 		engine->display.early_init	= nv04_display_early_init;
 		engine->display.late_takedown	= nv04_display_late_takedown;
 		engine->display.create		= nv04_display_create;
@@ -209,18 +176,6 @@ static int nouveau_init_engine_ptrs(struct drm_device *dev)
 		engine->fb.init_tile_region	= nv30_fb_init_tile_region;
 		engine->fb.set_tile_region	= nv10_fb_set_tile_region;
 		engine->fb.free_tile_region	= nv30_fb_free_tile_region;
-		engine->fifo.channels		= 32;
-		engine->fifo.init		= nv10_fifo_init;
-		engine->fifo.takedown		= nv04_fifo_fini;
-		engine->fifo.disable		= nv04_fifo_disable;
-		engine->fifo.enable		= nv04_fifo_enable;
-		engine->fifo.reassign		= nv04_fifo_reassign;
-		engine->fifo.cache_pull		= nv04_fifo_cache_pull;
-		engine->fifo.channel_id		= nv10_fifo_channel_id;
-		engine->fifo.create_context	= nv10_fifo_create_context;
-		engine->fifo.destroy_context	= nv04_fifo_destroy_context;
-		engine->fifo.load_context	= nv10_fifo_load_context;
-		engine->fifo.unload_context	= nv10_fifo_unload_context;
 		engine->display.early_init	= nv04_display_early_init;
 		engine->display.late_takedown	= nv04_display_late_takedown;
 		engine->display.create		= nv04_display_create;
@@ -259,18 +214,6 @@ static int nouveau_init_engine_ptrs(struct drm_device *dev)
 		engine->fb.init_tile_region	= nv30_fb_init_tile_region;
 		engine->fb.set_tile_region	= nv40_fb_set_tile_region;
 		engine->fb.free_tile_region	= nv30_fb_free_tile_region;
-		engine->fifo.channels		= 32;
-		engine->fifo.init		= nv40_fifo_init;
-		engine->fifo.takedown		= nv04_fifo_fini;
-		engine->fifo.disable		= nv04_fifo_disable;
-		engine->fifo.enable		= nv04_fifo_enable;
-		engine->fifo.reassign		= nv04_fifo_reassign;
-		engine->fifo.cache_pull		= nv04_fifo_cache_pull;
-		engine->fifo.channel_id		= nv10_fifo_channel_id;
-		engine->fifo.create_context	= nv40_fifo_create_context;
-		engine->fifo.destroy_context	= nv04_fifo_destroy_context;
-		engine->fifo.load_context	= nv40_fifo_load_context;
-		engine->fifo.unload_context	= nv40_fifo_unload_context;
 		engine->display.early_init	= nv04_display_early_init;
 		engine->display.late_takedown	= nv04_display_late_takedown;
 		engine->display.create		= nv04_display_create;
@@ -317,18 +260,6 @@ static int nouveau_init_engine_ptrs(struct drm_device *dev)
 		engine->timer.takedown		= nv04_timer_takedown;
 		engine->fb.init			= nv50_fb_init;
 		engine->fb.takedown		= nv50_fb_takedown;
-		engine->fifo.channels		= 128;
-		engine->fifo.init		= nv50_fifo_init;
-		engine->fifo.takedown		= nv50_fifo_takedown;
-		engine->fifo.disable		= nv04_fifo_disable;
-		engine->fifo.enable		= nv04_fifo_enable;
-		engine->fifo.reassign		= nv04_fifo_reassign;
-		engine->fifo.channel_id		= nv50_fifo_channel_id;
-		engine->fifo.create_context	= nv50_fifo_create_context;
-		engine->fifo.destroy_context	= nv50_fifo_destroy_context;
-		engine->fifo.load_context	= nv50_fifo_load_context;
-		engine->fifo.unload_context	= nv50_fifo_unload_context;
-		engine->fifo.tlb_flush		= nv50_fifo_tlb_flush;
 		engine->display.early_init	= nv50_display_early_init;
 		engine->display.late_takedown	= nv50_display_late_takedown;
 		engine->display.create		= nv50_display_create;
@@ -392,17 +323,6 @@ static int nouveau_init_engine_ptrs(struct drm_device *dev)
 		engine->timer.takedown		= nv04_timer_takedown;
 		engine->fb.init			= nvc0_fb_init;
 		engine->fb.takedown		= nvc0_fb_takedown;
-		engine->fifo.channels		= 128;
-		engine->fifo.init		= nvc0_fifo_init;
-		engine->fifo.takedown		= nvc0_fifo_takedown;
-		engine->fifo.disable		= nvc0_fifo_disable;
-		engine->fifo.enable		= nvc0_fifo_enable;
-		engine->fifo.reassign		= nvc0_fifo_reassign;
-		engine->fifo.channel_id		= nvc0_fifo_channel_id;
-		engine->fifo.create_context	= nvc0_fifo_create_context;
-		engine->fifo.destroy_context	= nvc0_fifo_destroy_context;
-		engine->fifo.load_context	= nvc0_fifo_load_context;
-		engine->fifo.unload_context	= nvc0_fifo_unload_context;
 		engine->display.early_init	= nv50_display_early_init;
 		engine->display.late_takedown	= nv50_display_late_takedown;
 		engine->display.create		= nv50_display_create;
@@ -445,17 +365,6 @@ static int nouveau_init_engine_ptrs(struct drm_device *dev)
 		engine->timer.takedown		= nv04_timer_takedown;
 		engine->fb.init			= nvc0_fb_init;
 		engine->fb.takedown		= nvc0_fb_takedown;
-		engine->fifo.channels		= 128;
-		engine->fifo.init		= nvc0_fifo_init;
-		engine->fifo.takedown		= nvc0_fifo_takedown;
-		engine->fifo.disable		= nvc0_fifo_disable;
-		engine->fifo.enable		= nvc0_fifo_enable;
-		engine->fifo.reassign		= nvc0_fifo_reassign;
-		engine->fifo.channel_id		= nvc0_fifo_channel_id;
-		engine->fifo.create_context	= nvc0_fifo_create_context;
-		engine->fifo.destroy_context	= nvc0_fifo_destroy_context;
-		engine->fifo.load_context	= nvc0_fifo_load_context;
-		engine->fifo.unload_context	= nvc0_fifo_unload_context;
 		engine->display.early_init	= nouveau_stub_init;
 		engine->display.late_takedown	= nouveau_stub_takedown;
 		engine->display.create		= nvd0_display_create;
@@ -496,13 +405,6 @@ static int nouveau_init_engine_ptrs(struct drm_device *dev)
 		engine->timer.takedown		= nv04_timer_takedown;
 		engine->fb.init			= nvc0_fb_init;
 		engine->fb.takedown		= nvc0_fb_takedown;
-		engine->fifo.channels		= 0;
-		engine->fifo.init		= nouveau_stub_init;
-		engine->fifo.takedown		= nouveau_stub_takedown;
-		engine->fifo.disable		= nvc0_fifo_disable;
-		engine->fifo.enable		= nvc0_fifo_enable;
-		engine->fifo.reassign		= nvc0_fifo_reassign;
-		engine->fifo.unload_context	= nouveau_stub_init;
 		engine->display.early_init	= nouveau_stub_init;
 		engine->display.late_takedown	= nouveau_stub_takedown;
 		engine->display.create		= nvd0_display_create;
@@ -607,59 +509,16 @@ nouveau_card_channel_init(struct drm_device *dev)
 {
 	struct drm_nouveau_private *dev_priv = dev->dev_private;
 	struct nouveau_channel *chan;
-	int ret, oclass;
+	int ret;
 
 	ret = nouveau_channel_alloc(dev, &chan, NULL, NvDmaFB, NvDmaTT);
 	dev_priv->channel = chan;
 	if (ret)
 		return ret;
-
 	mutex_unlock(&dev_priv->channel->mutex);
 
-	if (dev_priv->card_type <= NV_50) {
-		if (dev_priv->card_type < NV_50)
-			oclass = 0x0039;
-		else
-			oclass = 0x5039;
-
-		ret = nouveau_gpuobj_gr_new(chan, NvM2MF, oclass);
-		if (ret)
-			goto error;
-
-		ret = nouveau_notifier_alloc(chan, NvNotify0, 32, 0xfe0, 0x1000,
-					     &chan->m2mf_ntfy);
-		if (ret)
-			goto error;
-
-		ret = RING_SPACE(chan, 6);
-		if (ret)
-			goto error;
-
-		BEGIN_RING(chan, NvSubM2MF, NV_MEMORY_TO_MEMORY_FORMAT_NAME, 1);
-		OUT_RING  (chan, NvM2MF);
-		BEGIN_RING(chan, NvSubM2MF, NV_MEMORY_TO_MEMORY_FORMAT_DMA_NOTIFY, 3);
-		OUT_RING  (chan, NvNotify0);
-		OUT_RING  (chan, chan->vram_handle);
-		OUT_RING  (chan, chan->gart_handle);
-	} else
-	if (dev_priv->card_type <= NV_D0) {
-		ret = nouveau_gpuobj_gr_new(chan, 0x9039, 0x9039);
-		if (ret)
-			goto error;
-
-		ret = RING_SPACE(chan, 2);
-		if (ret)
-			goto error;
-
-		BEGIN_NVC0(chan, 2, NvSubM2MF, 0x0000, 1);
-		OUT_RING  (chan, 0x00009039);
-	}
-
-	FIRE_RING (chan);
-error:
-	if (ret)
-		nouveau_card_channel_fini(dev);
-	return ret;
+	nouveau_bo_move_init(chan);
+	return 0;
 }
 
 static const struct vga_switcheroo_client_ops nouveau_switcheroo_ops = {
@@ -747,6 +606,81 @@ nouveau_card_init(struct drm_device *dev)
 		goto out_ttmvram;
 
 	if (!dev_priv->noaccel) {
+		switch (dev_priv->card_type) {
+		case NV_04:
+			nv04_fifo_create(dev);
+			break;
+		case NV_10:
+		case NV_20:
+		case NV_30:
+			if (dev_priv->chipset < 0x17)
+				nv10_fifo_create(dev);
+			else
+				nv17_fifo_create(dev);
+			break;
+		case NV_40:
+			nv40_fifo_create(dev);
+			break;
+		case NV_50:
+			if (dev_priv->chipset == 0x50)
+				nv50_fifo_create(dev);
+			else
+				nv84_fifo_create(dev);
+			break;
+		case NV_C0:
+		case NV_D0:
+			nvc0_fifo_create(dev);
+			break;
+		case NV_E0:
+			nve0_fifo_create(dev);
+			break;
+		default:
+			break;
+		}
+
+		switch (dev_priv->card_type) {
+		case NV_04:
+			nv04_fence_create(dev);
+			break;
+		case NV_10:
+		case NV_20:
+		case NV_30:
+		case NV_40:
+		case NV_50:
+			if (dev_priv->chipset < 0x84)
+				nv10_fence_create(dev);
+			else
+				nv84_fence_create(dev);
+			break;
+		case NV_C0:
+		case NV_D0:
+		case NV_E0:
+			nvc0_fence_create(dev);
+			break;
+		default:
+			break;
+		}
+
+		switch (dev_priv->card_type) {
+		case NV_04:
+		case NV_10:
+		case NV_20:
+		case NV_30:
+		case NV_40:
+			nv04_software_create(dev);
+			break;
+		case NV_50:
+			nv50_software_create(dev);
+			break;
+		case NV_C0:
+		case NV_D0:
+		case NV_E0:
+			nvc0_software_create(dev);
+			break;
+		default:
+			break;
+		}
+
 		switch (dev_priv->card_type) {
 		case NV_04:
 			nv04_graph_create(dev);
@@ -768,6 +702,9 @@ nouveau_card_init(struct drm_device *dev)
 		case NV_D0:
 			nvc0_graph_create(dev);
 			break;
+		case NV_E0:
+			nve0_graph_create(dev);
+			break;
 		default:
 			break;
 		}
@@ -800,8 +737,9 @@ nouveau_card_init(struct drm_device *dev)
 			}
 			break;
 		case NV_C0:
-			nvc0_copy_create(dev, 0);
 			nvc0_copy_create(dev, 1);
+		case NV_D0:
+			nvc0_copy_create(dev, 0);
 			break;
 		default:
 			break;
@@ -834,16 +772,11 @@ nouveau_card_init(struct drm_device *dev)
 					goto out_engine;
 			}
 		}
-
-		/* PFIFO */
-		ret = engine->fifo.init(dev);
-		if (ret)
-			goto out_engine;
 	}
 
 	ret = nouveau_irq_init(dev);
 	if (ret)
-		goto out_fifo;
+		goto out_engine;
 
 	ret = nouveau_display_create(dev);
 	if (ret)
@@ -852,14 +785,10 @@ nouveau_card_init(struct drm_device *dev)
 	nouveau_backlight_init(dev);
 	nouveau_pm_init(dev);
 
-	ret = nouveau_fence_init(dev);
-	if (ret)
-		goto out_pm;
-
 	if (dev_priv->eng[NVOBJ_ENGINE_GR]) {
 		ret = nouveau_card_channel_init(dev);
 		if (ret)
-			goto out_fence;
+			goto out_pm;
 	}
 
 	if (dev->mode_config.num_crtc) {
@@ -874,17 +803,12 @@ nouveau_card_init(struct drm_device *dev)
 
 out_chan:
 	nouveau_card_channel_fini(dev);
-out_fence:
-	nouveau_fence_fini(dev);
 out_pm:
 	nouveau_pm_fini(dev);
 	nouveau_backlight_exit(dev);
 	nouveau_display_destroy(dev);
 out_irq:
 	nouveau_irq_fini(dev);
-out_fifo:
-	if (!dev_priv->noaccel)
-		engine->fifo.takedown(dev);
 out_engine:
 	if (!dev_priv->noaccel) {
 		for (e = e - 1; e >= 0; e--) {
@@ -916,6 +840,7 @@ out_bios:
 out_display_early:
 	engine->display.late_takedown(dev);
 out:
+	vga_switcheroo_unregister_client(dev->pdev);
 	vga_client_register(dev->pdev, NULL, NULL, NULL);
 	return ret;
 }
@@ -932,13 +857,11 @@ static void nouveau_card_takedown(struct drm_device *dev)
 	}
 
 	nouveau_card_channel_fini(dev);
-	nouveau_fence_fini(dev);
 	nouveau_pm_fini(dev);
 	nouveau_backlight_exit(dev);
 	nouveau_display_destroy(dev);
 
 	if (!dev_priv->noaccel) {
-		engine->fifo.takedown(dev);
 		for (e = NVOBJ_ENGINE_NR - 1; e >= 0; e--) {
 			if (dev_priv->eng[e]) {
 				dev_priv->eng[e]->fini(dev, e, false);
@@ -973,6 +896,7 @@ static void nouveau_card_takedown(struct drm_device *dev)
 
 	nouveau_irq_fini(dev);
 
+	vga_switcheroo_unregister_client(dev->pdev);
 	vga_client_register(dev->pdev, NULL, NULL, NULL);
 }
 
@@ -1180,7 +1104,7 @@ int nouveau_load(struct drm_device *dev, unsigned long flags)
 		goto err_priv;
 	}
 
-	NV_INFO(dev, "Detected an NV%2x generation card (0x%08x)\n",
+	NV_INFO(dev, "Detected an NV%02x generation card (0x%08x)\n",
 		     dev_priv->card_type, reg0);
 
 	/* map the mmio regs, limiting the amount to preserve vmap space */
@@ -1223,6 +1147,8 @@ int nouveau_load(struct drm_device *dev, unsigned long flags)
 	if (nouveau_noaccel == -1) {
 		switch (dev_priv->chipset) {
 		case 0xd9: /* known broken */
+		case 0xe4: /* needs binary driver firmware */
+		case 0xe7: /* needs binary driver firmware */
 			NV_INFO(dev, "acceleration disabled by default, pass "
 				     "noaccel=0 to force enable\n");
 			dev_priv->noaccel = true;

+ 11 - 0
drivers/gpu/drm/nouveau/nv04_display.c

@@ -98,6 +98,13 @@ nv04_display_early_init(struct drm_device *dev)
 		NVSetOwner(dev, 0);
 	}
 
+	/* ensure vblank interrupts are off, they can't be enabled until
+	 * drm_vblank has been initialised
+	 */
+	NVWriteCRTC(dev, 0, NV_PCRTC_INTR_EN_0, 0);
+	if (nv_two_heads(dev))
+		NVWriteCRTC(dev, 1, NV_PCRTC_INTR_EN_0, 0);
+
 	return 0;
 }
 
@@ -246,6 +253,10 @@ nv04_display_init(struct drm_device *dev)
 void
 nv04_display_fini(struct drm_device *dev)
 {
+	/* disable vblank interrupts */
+	NVWriteCRTC(dev, 0, NV_PCRTC_INTR_EN_0, 0);
+	if (nv_two_heads(dev))
+		NVWriteCRTC(dev, 1, NV_PCRTC_INTR_EN_0, 0);
 }
 
 static void

+ 24 - 24
drivers/gpu/drm/nouveau/nv04_fbcon.c

@@ -41,7 +41,7 @@ nv04_fbcon_copyarea(struct fb_info *info, const struct fb_copyarea *region)
 	if (ret)
 		return ret;
 
-	BEGIN_RING(chan, NvSubImageBlit, 0x0300, 3);
+	BEGIN_NV04(chan, NvSubImageBlit, 0x0300, 3);
 	OUT_RING(chan, (region->sy << 16) | region->sx);
 	OUT_RING(chan, (region->dy << 16) | region->dx);
 	OUT_RING(chan, (region->height << 16) | region->width);
@@ -62,15 +62,15 @@ nv04_fbcon_fillrect(struct fb_info *info, const struct fb_fillrect *rect)
 	if (ret)
 		return ret;
 
-	BEGIN_RING(chan, NvSubGdiRect, 0x02fc, 1);
+	BEGIN_NV04(chan, NvSubGdiRect, 0x02fc, 1);
 	OUT_RING(chan, (rect->rop != ROP_COPY) ? 1 : 3);
-	BEGIN_RING(chan, NvSubGdiRect, 0x03fc, 1);
+	BEGIN_NV04(chan, NvSubGdiRect, 0x03fc, 1);
 	if (info->fix.visual == FB_VISUAL_TRUECOLOR ||
 	    info->fix.visual == FB_VISUAL_DIRECTCOLOR)
 		OUT_RING(chan, ((uint32_t *)info->pseudo_palette)[rect->color]);
 	else
 		OUT_RING(chan, rect->color);
-	BEGIN_RING(chan, NvSubGdiRect, 0x0400, 2);
+	BEGIN_NV04(chan, NvSubGdiRect, 0x0400, 2);
 	OUT_RING(chan, (rect->dx << 16) | rect->dy);
 	OUT_RING(chan, (rect->width << 16) | rect->height);
 	FIRE_RING(chan);
@@ -110,7 +110,7 @@ nv04_fbcon_imageblit(struct fb_info *info, const struct fb_image *image)
 		bg = image->bg_color;
 	}
 
-	BEGIN_RING(chan, NvSubGdiRect, 0x0be4, 7);
+	BEGIN_NV04(chan, NvSubGdiRect, 0x0be4, 7);
 	OUT_RING(chan, (image->dy << 16) | (image->dx & 0xffff));
 	OUT_RING(chan, ((image->dy + image->height) << 16) |
 			 ((image->dx + image->width) & 0xffff));
@@ -127,7 +127,7 @@ nv04_fbcon_imageblit(struct fb_info *info, const struct fb_image *image)
 		if (ret)
 			return ret;
 
-		BEGIN_RING(chan, NvSubGdiRect, 0x0c00, iter_len);
+		BEGIN_NV04(chan, NvSubGdiRect, 0x0c00, iter_len);
 		OUT_RINGp(chan, data, iter_len);
 		data += iter_len;
 		dsize -= iter_len;
@@ -209,25 +209,25 @@ nv04_fbcon_accel_init(struct fb_info *info)
 		return 0;
 	}
 
-	BEGIN_RING(chan, sub, 0x0000, 1);
+	BEGIN_NV04(chan, sub, 0x0000, 1);
 	OUT_RING(chan, NvCtxSurf2D);
-	BEGIN_RING(chan, sub, 0x0184, 2);
+	BEGIN_NV04(chan, sub, 0x0184, 2);
 	OUT_RING(chan, NvDmaFB);
 	OUT_RING(chan, NvDmaFB);
-	BEGIN_RING(chan, sub, 0x0300, 4);
+	BEGIN_NV04(chan, sub, 0x0300, 4);
 	OUT_RING(chan, surface_fmt);
 	OUT_RING(chan, info->fix.line_length | (info->fix.line_length << 16));
 	OUT_RING(chan, info->fix.smem_start - dev->mode_config.fb_base);
 	OUT_RING(chan, info->fix.smem_start - dev->mode_config.fb_base);
 
-	BEGIN_RING(chan, sub, 0x0000, 1);
+	BEGIN_NV04(chan, sub, 0x0000, 1);
 	OUT_RING(chan, NvRop);
-	BEGIN_RING(chan, sub, 0x0300, 1);
+	BEGIN_NV04(chan, sub, 0x0300, 1);
 	OUT_RING(chan, 0x55);
 
-	BEGIN_RING(chan, sub, 0x0000, 1);
+	BEGIN_NV04(chan, sub, 0x0000, 1);
 	OUT_RING(chan, NvImagePatt);
-	BEGIN_RING(chan, sub, 0x0300, 8);
+	BEGIN_NV04(chan, sub, 0x0300, 8);
 	OUT_RING(chan, pattern_fmt);
 #ifdef __BIG_ENDIAN
 	OUT_RING(chan, 2);
@@ -241,31 +241,31 @@ nv04_fbcon_accel_init(struct fb_info *info)
 	OUT_RING(chan, ~0);
 	OUT_RING(chan, ~0);
 
-	BEGIN_RING(chan, sub, 0x0000, 1);
+	BEGIN_NV04(chan, sub, 0x0000, 1);
 	OUT_RING(chan, NvClipRect);
-	BEGIN_RING(chan, sub, 0x0300, 2);
+	BEGIN_NV04(chan, sub, 0x0300, 2);
 	OUT_RING(chan, 0);
 	OUT_RING(chan, (info->var.yres_virtual << 16) | info->var.xres_virtual);
 
-	BEGIN_RING(chan, NvSubImageBlit, 0x0000, 1);
+	BEGIN_NV04(chan, NvSubImageBlit, 0x0000, 1);
 	OUT_RING(chan, NvImageBlit);
-	BEGIN_RING(chan, NvSubImageBlit, 0x019c, 1);
+	BEGIN_NV04(chan, NvSubImageBlit, 0x019c, 1);
 	OUT_RING(chan, NvCtxSurf2D);
-	BEGIN_RING(chan, NvSubImageBlit, 0x02fc, 1);
+	BEGIN_NV04(chan, NvSubImageBlit, 0x02fc, 1);
 	OUT_RING(chan, 3);
 
-	BEGIN_RING(chan, NvSubGdiRect, 0x0000, 1);
+	BEGIN_NV04(chan, NvSubGdiRect, 0x0000, 1);
 	OUT_RING(chan, NvGdiRect);
-	BEGIN_RING(chan, NvSubGdiRect, 0x0198, 1);
+	BEGIN_NV04(chan, NvSubGdiRect, 0x0198, 1);
 	OUT_RING(chan, NvCtxSurf2D);
-	BEGIN_RING(chan, NvSubGdiRect, 0x0188, 2);
+	BEGIN_NV04(chan, NvSubGdiRect, 0x0188, 2);
 	OUT_RING(chan, NvImagePatt);
 	OUT_RING(chan, NvRop);
-	BEGIN_RING(chan, NvSubGdiRect, 0x0304, 1);
+	BEGIN_NV04(chan, NvSubGdiRect, 0x0304, 1);
 	OUT_RING(chan, 1);
-	BEGIN_RING(chan, NvSubGdiRect, 0x0300, 1);
+	BEGIN_NV04(chan, NvSubGdiRect, 0x0300, 1);
 	OUT_RING(chan, rect_fmt);
-	BEGIN_RING(chan, NvSubGdiRect, 0x02fc, 1);
+	BEGIN_NV04(chan, NvSubGdiRect, 0x02fc, 1);
 	OUT_RING(chan, 3);
 
 	FIRE_RING(chan);

+ 140 - 0
drivers/gpu/drm/nouveau/nv04_fence.c

@@ -0,0 +1,140 @@
+/*
+ * Copyright 2012 Red Hat Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: Ben Skeggs
+ */
+
+#include "drmP.h"
+#include "nouveau_drv.h"
+#include "nouveau_dma.h"
+#include "nouveau_ramht.h"
+#include "nouveau_fence.h"
+
+struct nv04_fence_chan {
+	struct nouveau_fence_chan base;
+	atomic_t sequence;
+};
+
+struct nv04_fence_priv {
+	struct nouveau_fence_priv base;
+};
+
+static int
+nv04_fence_emit(struct nouveau_fence *fence)
+{
+	struct nouveau_channel *chan = fence->channel;
+	int ret = RING_SPACE(chan, 2);
+	if (ret == 0) {
+		BEGIN_NV04(chan, NvSubSw, 0x0150, 1);
+		OUT_RING  (chan, fence->sequence);
+		FIRE_RING (chan);
+	}
+	return ret;
+}
+
+static int
+nv04_fence_sync(struct nouveau_fence *fence,
+		struct nouveau_channel *prev, struct nouveau_channel *chan)
+{
+	return -ENODEV;
+}
+
+int
+nv04_fence_mthd(struct nouveau_channel *chan, u32 class, u32 mthd, u32 data)
+{
+	struct nv04_fence_chan *fctx = chan->engctx[NVOBJ_ENGINE_FENCE];
+	atomic_set(&fctx->sequence, data);
+	return 0;
+}
+
+static u32
+nv04_fence_read(struct nouveau_channel *chan)
+{
+	struct nv04_fence_chan *fctx = chan->engctx[NVOBJ_ENGINE_FENCE];
+	return atomic_read(&fctx->sequence);
+}
+
+static void
+nv04_fence_context_del(struct nouveau_channel *chan, int engine)
+{
+	struct nv04_fence_chan *fctx = chan->engctx[engine];
+	nouveau_fence_context_del(&fctx->base);
+	chan->engctx[engine] = NULL;
+	kfree(fctx);
+}
+
+static int
+nv04_fence_context_new(struct nouveau_channel *chan, int engine)
+{
+	struct nv04_fence_chan *fctx = kzalloc(sizeof(*fctx), GFP_KERNEL);
+	if (fctx) {
+		nouveau_fence_context_new(&fctx->base);
+		atomic_set(&fctx->sequence, 0);
+		chan->engctx[engine] = fctx;
+		return 0;
+	}
+	return -ENOMEM;
+}
+
+static int
+nv04_fence_fini(struct drm_device *dev, int engine, bool suspend)
+{
+	return 0;
+}
+
+static int
+nv04_fence_init(struct drm_device *dev, int engine)
+{
+	return 0;
+}
+
+static void
+nv04_fence_destroy(struct drm_device *dev, int engine)
+{
+	struct drm_nouveau_private *dev_priv = dev->dev_private;
+	struct nv04_fence_priv *priv = nv_engine(dev, engine);
+
+	dev_priv->eng[engine] = NULL;
+	kfree(priv);
+}
+
+int
+nv04_fence_create(struct drm_device *dev)
+{
+	struct drm_nouveau_private *dev_priv = dev->dev_private;
+	struct nv04_fence_priv *priv;
+	int ret;
+
+	priv = kzalloc(sizeof(*priv), GFP_KERNEL);
+	if (!priv)
+		return -ENOMEM;
+
+	priv->base.engine.destroy = nv04_fence_destroy;
+	priv->base.engine.init = nv04_fence_init;
+	priv->base.engine.fini = nv04_fence_fini;
+	priv->base.engine.context_new = nv04_fence_context_new;
+	priv->base.engine.context_del = nv04_fence_context_del;
+	priv->base.emit = nv04_fence_emit;
+	priv->base.sync = nv04_fence_sync;
+	priv->base.read = nv04_fence_read;
+	dev_priv->eng[NVOBJ_ENGINE_FENCE] = &priv->base.engine;
+	return ret;
+}

+ 191 - 228
drivers/gpu/drm/nouveau/nv04_fifo.c

@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2007 Ben Skeggs.
+ * Copyright (C) 2012 Ben Skeggs.
  * All Rights Reserved.
  *
  * Permission is hereby granted, free of charge, to any person obtaining
@@ -27,49 +27,38 @@
 #include "drmP.h"
 #include "drm.h"
 #include "nouveau_drv.h"
-#include "nouveau_ramht.h"
+#include "nouveau_fifo.h"
 #include "nouveau_util.h"
-
-#define NV04_RAMFC(c) (dev_priv->ramfc->pinst + ((c) * NV04_RAMFC__SIZE))
-#define NV04_RAMFC__SIZE 32
-#define NV04_RAMFC_DMA_PUT                                       0x00
-#define NV04_RAMFC_DMA_GET                                       0x04
-#define NV04_RAMFC_DMA_INSTANCE                                  0x08
-#define NV04_RAMFC_DMA_STATE                                     0x0C
-#define NV04_RAMFC_DMA_FETCH                                     0x10
-#define NV04_RAMFC_ENGINE                                        0x14
-#define NV04_RAMFC_PULL1_ENGINE                                  0x18
-
-#define RAMFC_WR(offset, val) nv_wo32(chan->ramfc, NV04_RAMFC_##offset, (val))
-#define RAMFC_RD(offset)      nv_ro32(chan->ramfc, NV04_RAMFC_##offset)
-
-void
-nv04_fifo_disable(struct drm_device *dev)
-{
-	uint32_t tmp;
-
-	tmp = nv_rd32(dev, NV04_PFIFO_CACHE1_DMA_PUSH);
-	nv_wr32(dev, NV04_PFIFO_CACHE1_DMA_PUSH, tmp & ~1);
-	nv_wr32(dev, NV03_PFIFO_CACHE1_PUSH0, 0);
-	tmp = nv_rd32(dev, NV03_PFIFO_CACHE1_PULL1);
-	nv_wr32(dev, NV04_PFIFO_CACHE1_PULL0, tmp & ~1);
-}
-
-void
-nv04_fifo_enable(struct drm_device *dev)
-{
-	nv_wr32(dev, NV03_PFIFO_CACHE1_PUSH0, 1);
-	nv_wr32(dev, NV04_PFIFO_CACHE1_PULL0, 1);
-}
-
-bool
-nv04_fifo_reassign(struct drm_device *dev, bool enable)
-{
-	uint32_t reassign = nv_rd32(dev, NV03_PFIFO_CACHES);
-
-	nv_wr32(dev, NV03_PFIFO_CACHES, enable ? 1 : 0);
-	return (reassign == 1);
-}
+#include "nouveau_ramht.h"
+#include "nouveau_software.h"
+
+static struct ramfc_desc {
+	unsigned bits:6;
+	unsigned ctxs:5;
+	unsigned ctxp:8;
+	unsigned regs:5;
+	unsigned regp;
+} nv04_ramfc[] = {
+	{ 32,  0, 0x00,  0, NV04_PFIFO_CACHE1_DMA_PUT },
+	{ 32,  0, 0x04,  0, NV04_PFIFO_CACHE1_DMA_GET },
+	{ 16,  0, 0x08,  0, NV04_PFIFO_CACHE1_DMA_INSTANCE },
+	{ 16, 16, 0x08,  0, NV04_PFIFO_CACHE1_DMA_DCOUNT },
+	{ 32,  0, 0x0c,  0, NV04_PFIFO_CACHE1_DMA_STATE },
+	{ 32,  0, 0x10,  0, NV04_PFIFO_CACHE1_DMA_FETCH },
+	{ 32,  0, 0x14,  0, NV04_PFIFO_CACHE1_ENGINE },
+	{ 32,  0, 0x18,  0, NV04_PFIFO_CACHE1_PULL1 },
+	{}
+};
+
+struct nv04_fifo_priv {
+	struct nouveau_fifo_priv base;
+	struct ramfc_desc *ramfc_desc;
+};
+
+struct nv04_fifo_chan {
+	struct nouveau_fifo_chan base;
+	struct nouveau_gpuobj *ramfc;
+};
 
 bool
 nv04_fifo_cache_pull(struct drm_device *dev, bool enable)
@@ -86,13 +75,13 @@ nv04_fifo_cache_pull(struct drm_device *dev, bool enable)
 		 * invalidate the most recently calculated instance.
 		 */
 		if (!nv_wait(dev, NV04_PFIFO_CACHE1_PULL0,
-			     NV04_PFIFO_CACHE1_PULL0_HASH_BUSY, 0))
+				  NV04_PFIFO_CACHE1_PULL0_HASH_BUSY, 0))
 			NV_ERROR(dev, "Timeout idling the PFIFO puller.\n");
 
 		if (nv_rd32(dev, NV04_PFIFO_CACHE1_PULL0) &
-		    NV04_PFIFO_CACHE1_PULL0_HASH_FAILED)
+				 NV04_PFIFO_CACHE1_PULL0_HASH_FAILED)
 			nv_wr32(dev, NV03_PFIFO_INTR_0,
-				NV_PFIFO_INTR_CACHE_ERROR);
+				     NV_PFIFO_INTR_CACHE_ERROR);
 
 		nv_wr32(dev, NV04_PFIFO_CACHE1_HASH, 0);
 	}
@@ -100,242 +89,182 @@ nv04_fifo_cache_pull(struct drm_device *dev, bool enable)
 	return pull & 1;
 }
 
-int
-nv04_fifo_channel_id(struct drm_device *dev)
-{
-	return nv_rd32(dev, NV03_PFIFO_CACHE1_PUSH1) &
-			NV03_PFIFO_CACHE1_PUSH1_CHID_MASK;
-}
-
-#ifdef __BIG_ENDIAN
-#define DMA_FETCH_ENDIANNESS NV_PFIFO_CACHE1_BIG_ENDIAN
-#else
-#define DMA_FETCH_ENDIANNESS 0
-#endif
-
-int
-nv04_fifo_create_context(struct nouveau_channel *chan)
+static int
+nv04_fifo_context_new(struct nouveau_channel *chan, int engine)
 {
 	struct drm_device *dev = chan->dev;
 	struct drm_nouveau_private *dev_priv = dev->dev_private;
+	struct nv04_fifo_priv *priv = nv_engine(dev, engine);
+	struct nv04_fifo_chan *fctx;
 	unsigned long flags;
 	int ret;
 
-	ret = nouveau_gpuobj_new_fake(dev, NV04_RAMFC(chan->id), ~0,
-						NV04_RAMFC__SIZE,
-						NVOBJ_FLAG_ZERO_ALLOC |
-						NVOBJ_FLAG_ZERO_FREE,
-						&chan->ramfc);
-	if (ret)
-		return ret;
+	fctx = chan->engctx[engine] = kzalloc(sizeof(*fctx), GFP_KERNEL);
+	if (!fctx)
+		return -ENOMEM;
 
+	/* map channel control registers */
 	chan->user = ioremap(pci_resource_start(dev->pdev, 0) +
 			     NV03_USER(chan->id), PAGE_SIZE);
-	if (!chan->user)
-		return -ENOMEM;
-
-	spin_lock_irqsave(&dev_priv->context_switch_lock, flags);
-
-	/* Setup initial state */
-	RAMFC_WR(DMA_PUT, chan->pushbuf_base);
-	RAMFC_WR(DMA_GET, chan->pushbuf_base);
-	RAMFC_WR(DMA_INSTANCE, chan->pushbuf->pinst >> 4);
-	RAMFC_WR(DMA_FETCH, (NV_PFIFO_CACHE1_DMA_FETCH_TRIG_128_BYTES |
-			     NV_PFIFO_CACHE1_DMA_FETCH_SIZE_128_BYTES |
-			     NV_PFIFO_CACHE1_DMA_FETCH_MAX_REQS_8 |
-			     DMA_FETCH_ENDIANNESS));
+	if (!chan->user) {
+		ret = -ENOMEM;
+		goto error;
+	}
 
-	/* enable the fifo dma operation */
-	nv_wr32(dev, NV04_PFIFO_MODE,
-		nv_rd32(dev, NV04_PFIFO_MODE) | (1 << chan->id));
+	/* initialise default fifo context */
+	ret = nouveau_gpuobj_new_fake(dev, dev_priv->ramfc->pinst +
+				      chan->id * 32, ~0, 32,
+				      NVOBJ_FLAG_ZERO_FREE, &fctx->ramfc);
+	if (ret)
+		goto error;
+
+	nv_wo32(fctx->ramfc, 0x00, chan->pushbuf_base);
+	nv_wo32(fctx->ramfc, 0x04, chan->pushbuf_base);
+	nv_wo32(fctx->ramfc, 0x08, chan->pushbuf->pinst >> 4);
+	nv_wo32(fctx->ramfc, 0x0c, 0x00000000);
+	nv_wo32(fctx->ramfc, 0x10, NV_PFIFO_CACHE1_DMA_FETCH_TRIG_128_BYTES |
+				   NV_PFIFO_CACHE1_DMA_FETCH_SIZE_128_BYTES |
+#ifdef __BIG_ENDIAN
+				   NV_PFIFO_CACHE1_BIG_ENDIAN |
+#endif
+				   NV_PFIFO_CACHE1_DMA_FETCH_MAX_REQS_8);
+	nv_wo32(fctx->ramfc, 0x14, 0x00000000);
+	nv_wo32(fctx->ramfc, 0x18, 0x00000000);
+	nv_wo32(fctx->ramfc, 0x1c, 0x00000000);
 
+	/* enable dma mode on the channel */
+	spin_lock_irqsave(&dev_priv->context_switch_lock, flags);
+	nv_mask(dev, NV04_PFIFO_MODE, (1 << chan->id), (1 << chan->id));
 	spin_unlock_irqrestore(&dev_priv->context_switch_lock, flags);
-	return 0;
+
+error:
+	if (ret)
+		priv->base.base.context_del(chan, engine);
+	return ret;
 }
 
 void
-nv04_fifo_destroy_context(struct nouveau_channel *chan)
+nv04_fifo_context_del(struct nouveau_channel *chan, int engine)
 {
 	struct drm_device *dev = chan->dev;
 	struct drm_nouveau_private *dev_priv = dev->dev_private;
-	struct nouveau_fifo_engine *pfifo = &dev_priv->engine.fifo;
+	struct nv04_fifo_priv *priv = nv_engine(chan->dev, engine);
+	struct nv04_fifo_chan *fctx = chan->engctx[engine];
+	struct ramfc_desc *c = priv->ramfc_desc;
 	unsigned long flags;
+	int chid;
 
+	/* prevent fifo context switches */
 	spin_lock_irqsave(&dev_priv->context_switch_lock, flags);
-	pfifo->reassign(dev, false);
-
-	/* Unload the context if it's the currently active one */
-	if (pfifo->channel_id(dev) == chan->id) {
-		pfifo->disable(dev);
-		pfifo->unload_context(dev);
-		pfifo->enable(dev);
+	nv_wr32(dev, NV03_PFIFO_CACHES, 0);
+
+	/* if this channel is active, replace it with a null context */
+	chid = nv_rd32(dev, NV03_PFIFO_CACHE1_PUSH1) & priv->base.channels;
+	if (chid == chan->id) {
+		nv_mask(dev, NV04_PFIFO_CACHE1_DMA_PUSH, 0x00000001, 0);
+		nv_wr32(dev, NV03_PFIFO_CACHE1_PUSH0, 0);
+		nv_mask(dev, NV04_PFIFO_CACHE1_PULL0, 0x00000001, 0);
+
+		do {
+			u32 mask = ((1ULL << c->bits) - 1) << c->regs;
+			nv_mask(dev, c->regp, mask, 0x00000000);
+		} while ((++c)->bits);
+
+		nv_wr32(dev, NV03_PFIFO_CACHE1_GET, 0);
+		nv_wr32(dev, NV03_PFIFO_CACHE1_PUT, 0);
+		nv_wr32(dev, NV03_PFIFO_CACHE1_PUSH1, priv->base.channels);
+		nv_wr32(dev, NV03_PFIFO_CACHE1_PUSH0, 1);
+		nv_wr32(dev, NV04_PFIFO_CACHE1_PULL0, 1);
 	}
 
-	/* Keep it from being rescheduled */
+	/* restore normal operation, after disabling dma mode */
 	nv_mask(dev, NV04_PFIFO_MODE, 1 << chan->id, 0);
-
-	pfifo->reassign(dev, true);
+	nv_wr32(dev, NV03_PFIFO_CACHES, 1);
 	spin_unlock_irqrestore(&dev_priv->context_switch_lock, flags);
 
-	/* Free the channel resources */
+	/* clean up */
+	nouveau_gpuobj_ref(NULL, &fctx->ramfc);
+	nouveau_gpuobj_ref(NULL, &chan->ramfc); /*XXX: nv40 */
 	if (chan->user) {
 		iounmap(chan->user);
 		chan->user = NULL;
 	}
-	nouveau_gpuobj_ref(NULL, &chan->ramfc);
-}
-
-static void
-nv04_fifo_do_load_context(struct drm_device *dev, int chid)
-{
-	struct drm_nouveau_private *dev_priv = dev->dev_private;
-	uint32_t fc = NV04_RAMFC(chid), tmp;
-
-	nv_wr32(dev, NV04_PFIFO_CACHE1_DMA_PUT, nv_ri32(dev, fc + 0));
-	nv_wr32(dev, NV04_PFIFO_CACHE1_DMA_GET, nv_ri32(dev, fc + 4));
-	tmp = nv_ri32(dev, fc + 8);
-	nv_wr32(dev, NV04_PFIFO_CACHE1_DMA_INSTANCE, tmp & 0xFFFF);
-	nv_wr32(dev, NV04_PFIFO_CACHE1_DMA_DCOUNT, tmp >> 16);
-	nv_wr32(dev, NV04_PFIFO_CACHE1_DMA_STATE, nv_ri32(dev, fc + 12));
-	nv_wr32(dev, NV04_PFIFO_CACHE1_DMA_FETCH, nv_ri32(dev, fc + 16));
-	nv_wr32(dev, NV04_PFIFO_CACHE1_ENGINE, nv_ri32(dev, fc + 20));
-	nv_wr32(dev, NV04_PFIFO_CACHE1_PULL1, nv_ri32(dev, fc + 24));
-
-	nv_wr32(dev, NV03_PFIFO_CACHE1_GET, 0);
-	nv_wr32(dev, NV03_PFIFO_CACHE1_PUT, 0);
-}
-
-int
-nv04_fifo_load_context(struct nouveau_channel *chan)
-{
-	uint32_t tmp;
-
-	nv_wr32(chan->dev, NV03_PFIFO_CACHE1_PUSH1,
-			   NV03_PFIFO_CACHE1_PUSH1_DMA | chan->id);
-	nv04_fifo_do_load_context(chan->dev, chan->id);
-	nv_wr32(chan->dev, NV04_PFIFO_CACHE1_DMA_PUSH, 1);
-
-	/* Reset NV04_PFIFO_CACHE1_DMA_CTL_AT_INFO to INVALID */
-	tmp = nv_rd32(chan->dev, NV04_PFIFO_CACHE1_DMA_CTL) & ~(1 << 31);
-	nv_wr32(chan->dev, NV04_PFIFO_CACHE1_DMA_CTL, tmp);
-
-	return 0;
 }
 
 int
-nv04_fifo_unload_context(struct drm_device *dev)
+nv04_fifo_init(struct drm_device *dev, int engine)
 {
 	struct drm_nouveau_private *dev_priv = dev->dev_private;
-	struct nouveau_fifo_engine *pfifo = &dev_priv->engine.fifo;
-	struct nouveau_channel *chan = NULL;
-	uint32_t tmp;
-	int chid;
-
-	chid = pfifo->channel_id(dev);
-	if (chid < 0 || chid >= dev_priv->engine.fifo.channels)
-		return 0;
-
-	chan = dev_priv->channels.ptr[chid];
-	if (!chan) {
-		NV_ERROR(dev, "Inactive channel on PFIFO: %d\n", chid);
-		return -EINVAL;
-	}
-
-	RAMFC_WR(DMA_PUT, nv_rd32(dev, NV04_PFIFO_CACHE1_DMA_PUT));
-	RAMFC_WR(DMA_GET, nv_rd32(dev, NV04_PFIFO_CACHE1_DMA_GET));
-	tmp  = nv_rd32(dev, NV04_PFIFO_CACHE1_DMA_DCOUNT) << 16;
-	tmp |= nv_rd32(dev, NV04_PFIFO_CACHE1_DMA_INSTANCE);
-	RAMFC_WR(DMA_INSTANCE, tmp);
-	RAMFC_WR(DMA_STATE, nv_rd32(dev, NV04_PFIFO_CACHE1_DMA_STATE));
-	RAMFC_WR(DMA_FETCH, nv_rd32(dev, NV04_PFIFO_CACHE1_DMA_FETCH));
-	RAMFC_WR(ENGINE, nv_rd32(dev, NV04_PFIFO_CACHE1_ENGINE));
-	RAMFC_WR(PULL1_ENGINE, nv_rd32(dev, NV04_PFIFO_CACHE1_PULL1));
-
-	nv04_fifo_do_load_context(dev, pfifo->channels - 1);
-	nv_wr32(dev, NV03_PFIFO_CACHE1_PUSH1, pfifo->channels - 1);
-	return 0;
-}
+	struct nv04_fifo_priv *priv = nv_engine(dev, engine);
+	int i;
 
-static void
-nv04_fifo_init_reset(struct drm_device *dev)
-{
-	nv_wr32(dev, NV03_PMC_ENABLE,
-		nv_rd32(dev, NV03_PMC_ENABLE) & ~NV_PMC_ENABLE_PFIFO);
-	nv_wr32(dev, NV03_PMC_ENABLE,
-		nv_rd32(dev, NV03_PMC_ENABLE) |  NV_PMC_ENABLE_PFIFO);
-
-	nv_wr32(dev, 0x003224, 0x000f0078);
-	nv_wr32(dev, 0x002044, 0x0101ffff);
-	nv_wr32(dev, 0x002040, 0x000000ff);
-	nv_wr32(dev, 0x002500, 0x00000000);
-	nv_wr32(dev, 0x003000, 0x00000000);
-	nv_wr32(dev, 0x003050, 0x00000000);
-	nv_wr32(dev, 0x003200, 0x00000000);
-	nv_wr32(dev, 0x003250, 0x00000000);
-	nv_wr32(dev, 0x003220, 0x00000000);
-
-	nv_wr32(dev, 0x003250, 0x00000000);
-	nv_wr32(dev, 0x003270, 0x00000000);
-	nv_wr32(dev, 0x003210, 0x00000000);
-}
+	nv_mask(dev, NV03_PMC_ENABLE, NV_PMC_ENABLE_PFIFO, 0);
+	nv_mask(dev, NV03_PMC_ENABLE, NV_PMC_ENABLE_PFIFO, NV_PMC_ENABLE_PFIFO);
 
-static void
-nv04_fifo_init_ramxx(struct drm_device *dev)
-{
-	struct drm_nouveau_private *dev_priv = dev->dev_private;
+	nv_wr32(dev, NV04_PFIFO_DELAY_0, 0x000000ff);
+	nv_wr32(dev, NV04_PFIFO_DMA_TIMESLICE, 0x0101ffff);
 
 	nv_wr32(dev, NV03_PFIFO_RAMHT, (0x03 << 24) /* search 128 */ |
 				       ((dev_priv->ramht->bits - 9) << 16) |
 				       (dev_priv->ramht->gpuobj->pinst >> 8));
 	nv_wr32(dev, NV03_PFIFO_RAMRO, dev_priv->ramro->pinst >> 8);
 	nv_wr32(dev, NV03_PFIFO_RAMFC, dev_priv->ramfc->pinst >> 8);
-}
 
-static void
-nv04_fifo_init_intr(struct drm_device *dev)
-{
-	nouveau_irq_register(dev, 8, nv04_fifo_isr);
-	nv_wr32(dev, 0x002100, 0xffffffff);
-	nv_wr32(dev, 0x002140, 0xffffffff);
+	nv_wr32(dev, NV03_PFIFO_CACHE1_PUSH1, priv->base.channels);
+
+	nv_wr32(dev, NV03_PFIFO_INTR_0, 0xffffffff);
+	nv_wr32(dev, NV03_PFIFO_INTR_EN_0, 0xffffffff);
+
+	nv_wr32(dev, NV03_PFIFO_CACHE1_PUSH0, 1);
+	nv_wr32(dev, NV04_PFIFO_CACHE1_PULL0, 1);
+	nv_wr32(dev, NV03_PFIFO_CACHES, 1);
+
+	for (i = 0; i < priv->base.channels; i++) {
+		if (dev_priv->channels.ptr[i])
+			nv_mask(dev, NV04_PFIFO_MODE, (1 << i), (1 << i));
+	}
+
+	return 0;
 }
 
 int
-nv04_fifo_init(struct drm_device *dev)
+nv04_fifo_fini(struct drm_device *dev, int engine, bool suspend)
 {
 	struct drm_nouveau_private *dev_priv = dev->dev_private;
-	struct nouveau_fifo_engine *pfifo = &dev_priv->engine.fifo;
-	int i;
-
-	nv04_fifo_init_reset(dev);
-	nv04_fifo_init_ramxx(dev);
-
-	nv04_fifo_do_load_context(dev, pfifo->channels - 1);
-	nv_wr32(dev, NV03_PFIFO_CACHE1_PUSH1, pfifo->channels - 1);
+	struct nv04_fifo_priv *priv = nv_engine(dev, engine);
+	struct nouveau_channel *chan;
+	int chid;
 
-	nv04_fifo_init_intr(dev);
-	pfifo->enable(dev);
-	pfifo->reassign(dev, true);
+	/* prevent context switches and halt fifo operation */
+	nv_wr32(dev, NV03_PFIFO_CACHES, 0);
+	nv_wr32(dev, NV04_PFIFO_CACHE1_DMA_PUSH, 0);
+	nv_wr32(dev, NV03_PFIFO_CACHE1_PUSH0, 0);
+	nv_wr32(dev, NV04_PFIFO_CACHE1_PULL0, 0);
 
-	for (i = 0; i < dev_priv->engine.fifo.channels; i++) {
-		if (dev_priv->channels.ptr[i]) {
-			uint32_t mode = nv_rd32(dev, NV04_PFIFO_MODE);
-			nv_wr32(dev, NV04_PFIFO_MODE, mode | (1 << i));
-		}
+	/* store current fifo context in ramfc */
+	chid = nv_rd32(dev, NV03_PFIFO_CACHE1_PUSH1) & priv->base.channels;
+	chan = dev_priv->channels.ptr[chid];
+	if (suspend && chid != priv->base.channels && chan) {
+		struct nv04_fifo_chan *fctx = chan->engctx[engine];
+		struct nouveau_gpuobj *ctx = fctx->ramfc;
+		struct ramfc_desc *c = priv->ramfc_desc;
+		do {
+			u32 rm = ((1ULL << c->bits) - 1) << c->regs;
+			u32 cm = ((1ULL << c->bits) - 1) << c->ctxs;
+			u32 rv = (nv_rd32(dev, c->regp) &  rm) >> c->regs;
+			u32 cv = (nv_ro32(ctx, c->ctxp) & ~cm);
+			nv_wo32(ctx, c->ctxp, cv | (rv << c->ctxs));
+		} while ((++c)->bits);
 	}
 
+	nv_wr32(dev, NV03_PFIFO_INTR_EN_0, 0x00000000);
 	return 0;
 }
 
-void
-nv04_fifo_fini(struct drm_device *dev)
-{
-	nv_wr32(dev, 0x2140, 0x00000000);
-	nouveau_irq_unregister(dev, 8);
-}
-
 static bool
 nouveau_fifo_swmthd(struct drm_device *dev, u32 chid, u32 addr, u32 data)
 {
+	struct nouveau_fifo_priv *pfifo = nv_engine(dev, NVOBJ_ENGINE_FIFO);
 	struct drm_nouveau_private *dev_priv = dev->dev_private;
 	struct nouveau_channel *chan = NULL;
 	struct nouveau_gpuobj *obj;
@@ -346,7 +275,7 @@ nouveau_fifo_swmthd(struct drm_device *dev, u32 chid, u32 addr, u32 data)
 	u32 engine;
 
 	spin_lock_irqsave(&dev_priv->channels.lock, flags);
-	if (likely(chid >= 0 && chid < dev_priv->engine.fifo.channels))
+	if (likely(chid >= 0 && chid < pfifo->channels))
 		chan = dev_priv->channels.ptr[chid];
 	if (unlikely(!chan))
 		goto out;
@@ -357,7 +286,6 @@ nouveau_fifo_swmthd(struct drm_device *dev, u32 chid, u32 addr, u32 data)
 		if (unlikely(!obj || obj->engine != NVOBJ_ENGINE_SW))
 			break;
 
-		chan->sw_subchannel[subc] = obj->class;
 		engine = 0x0000000f << (subc * 4);
 
 		nv_mask(dev, NV04_PFIFO_CACHE1_ENGINE, engine, 0x00000000);
@@ -368,7 +296,7 @@ nouveau_fifo_swmthd(struct drm_device *dev, u32 chid, u32 addr, u32 data)
 		if (unlikely(((engine >> (subc * 4)) & 0xf) != 0))
 			break;
 
-		if (!nouveau_gpuobj_mthd_call(chan, chan->sw_subchannel[subc],
+		if (!nouveau_gpuobj_mthd_call(chan, nouveau_software_class(dev),
 					      mthd, data))
 			handled = true;
 		break;
@@ -391,8 +319,8 @@ static const char *nv_dma_state_err(u32 state)
 void
 nv04_fifo_isr(struct drm_device *dev)
 {
+	struct nouveau_fifo_priv *pfifo = nv_engine(dev, NVOBJ_ENGINE_FIFO);
 	struct drm_nouveau_private *dev_priv = dev->dev_private;
-	struct nouveau_engine *engine = &dev_priv->engine;
 	uint32_t status, reassign;
 	int cnt = 0;
 
@@ -402,7 +330,7 @@ nv04_fifo_isr(struct drm_device *dev)
 
 		nv_wr32(dev, NV03_PFIFO_CACHES, 0);
 
-		chid = engine->fifo.channel_id(dev);
+		chid = nv_rd32(dev, NV03_PFIFO_CACHE1_PUSH1) & pfifo->channels;
 		get  = nv_rd32(dev, NV03_PFIFO_CACHE1_GET);
 
 		if (status & NV_PFIFO_INTR_CACHE_ERROR) {
@@ -541,3 +469,38 @@ nv04_fifo_isr(struct drm_device *dev)
 
 	nv_wr32(dev, NV03_PMC_INTR_0, NV_PMC_INTR_0_PFIFO_PENDING);
 }
+
+void
+nv04_fifo_destroy(struct drm_device *dev, int engine)
+{
+	struct drm_nouveau_private *dev_priv = dev->dev_private;
+	struct nv04_fifo_priv *priv = nv_engine(dev, engine);
+
+	nouveau_irq_unregister(dev, 8);
+
+	dev_priv->eng[engine] = NULL;
+	kfree(priv);
+}
+
+int
+nv04_fifo_create(struct drm_device *dev)
+{
+	struct drm_nouveau_private *dev_priv = dev->dev_private;
+	struct nv04_fifo_priv *priv;
+
+	priv = kzalloc(sizeof(*priv), GFP_KERNEL);
+	if (!priv)
+		return -ENOMEM;
+
+	priv->base.base.destroy = nv04_fifo_destroy;
+	priv->base.base.init = nv04_fifo_init;
+	priv->base.base.fini = nv04_fifo_fini;
+	priv->base.base.context_new = nv04_fifo_context_new;
+	priv->base.base.context_del = nv04_fifo_context_del;
+	priv->base.channels = 15;
+	priv->ramfc_desc = nv04_ramfc;
+	dev_priv->eng[NVOBJ_ENGINE_FIFO] = &priv->base.base;
+
+	nouveau_irq_register(dev, 8, nv04_fifo_isr);
+	return 0;
+}

+ 6 - 33
drivers/gpu/drm/nouveau/nv04_graph.c

@@ -356,12 +356,12 @@ static struct nouveau_channel *
 nv04_graph_channel(struct drm_device *dev)
 {
 	struct drm_nouveau_private *dev_priv = dev->dev_private;
-	int chid = dev_priv->engine.fifo.channels;
+	int chid = 15;
 
 	if (nv_rd32(dev, NV04_PGRAPH_CTX_CONTROL) & 0x00010000)
 		chid = nv_rd32(dev, NV04_PGRAPH_CTX_USER) >> 24;
 
-	if (chid >= dev_priv->engine.fifo.channels)
+	if (chid > 15)
 		return NULL;
 
 	return dev_priv->channels.ptr[chid];
@@ -404,7 +404,6 @@ nv04_graph_load_context(struct nouveau_channel *chan)
 static int
 nv04_graph_unload_context(struct drm_device *dev)
 {
-	struct drm_nouveau_private *dev_priv = dev->dev_private;
 	struct nouveau_channel *chan = NULL;
 	struct graph_state *ctx;
 	uint32_t tmp;
@@ -420,7 +419,7 @@ nv04_graph_unload_context(struct drm_device *dev)
 
 	nv_wr32(dev, NV04_PGRAPH_CTX_CONTROL, 0x10000000);
 	tmp  = nv_rd32(dev, NV04_PGRAPH_CTX_USER) & 0x00ffffff;
-	tmp |= (dev_priv->engine.fifo.channels - 1) << 24;
+	tmp |= 15 << 24;
 	nv_wr32(dev, NV04_PGRAPH_CTX_USER, tmp);
 	return 0;
 }
@@ -495,7 +494,6 @@ nv04_graph_object_new(struct nouveau_channel *chan, int engine,
 static int
 nv04_graph_init(struct drm_device *dev, int engine)
 {
-	struct drm_nouveau_private *dev_priv = dev->dev_private;
 	uint32_t tmp;
 
 	nv_wr32(dev, NV03_PMC_ENABLE, nv_rd32(dev, NV03_PMC_ENABLE) &
@@ -527,7 +525,7 @@ nv04_graph_init(struct drm_device *dev, int engine)
 	nv_wr32(dev, NV04_PGRAPH_STATE        , 0xFFFFFFFF);
 	nv_wr32(dev, NV04_PGRAPH_CTX_CONTROL  , 0x10000100);
 	tmp  = nv_rd32(dev, NV04_PGRAPH_CTX_USER) & 0x00ffffff;
-	tmp |= (dev_priv->engine.fifo.channels - 1) << 24;
+	tmp |= 15 << 24;
 	nv_wr32(dev, NV04_PGRAPH_CTX_USER, tmp);
 
 	/* These don't belong here, they're part of a per-channel context */
@@ -550,28 +548,6 @@ nv04_graph_fini(struct drm_device *dev, int engine, bool suspend)
 	return 0;
 }
 
-static int
-nv04_graph_mthd_set_ref(struct nouveau_channel *chan,
-			u32 class, u32 mthd, u32 data)
-{
-	atomic_set(&chan->fence.last_sequence_irq, data);
-	return 0;
-}
-
-int
-nv04_graph_mthd_page_flip(struct nouveau_channel *chan,
-			  u32 class, u32 mthd, u32 data)
-{
-	struct drm_device *dev = chan->dev;
-	struct nouveau_page_flip_state s;
-
-	if (!nouveau_finish_page_flip(chan, &s))
-		nv_set_crtc_base(dev, s.crtc,
-				 s.offset + s.y * s.pitch + s.x * s.bpp / 8);
-
-	return 0;
-}
-
 /*
  * Software methods, why they are needed, and how they all work:
  *
@@ -1020,7 +996,8 @@ nv04_graph_context_switch(struct drm_device *dev)
 	nv04_graph_unload_context(dev);
 
 	/* Load context for next channel */
-	chid = dev_priv->engine.fifo.channel_id(dev);
+	chid = nv_rd32(dev, NV03_PFIFO_CACHE1_PUSH1) &
+			    NV03_PFIFO_CACHE1_PUSH1_CHID_MASK;
 	chan = dev_priv->channels.ptr[chid];
 	if (chan)
 		nv04_graph_load_context(chan);
@@ -1345,9 +1322,5 @@ nv04_graph_create(struct drm_device *dev)
 	NVOBJ_MTHD (dev, 0x005e, 0x0198, nv04_graph_mthd_bind_surf2d);
 	NVOBJ_MTHD (dev, 0x005e, 0x02fc, nv04_graph_mthd_set_operation);
 
-	/* nvsw */
-	NVOBJ_CLASS(dev, 0x506e, SW);
-	NVOBJ_MTHD (dev, 0x506e, 0x0150, nv04_graph_mthd_set_ref);
-	NVOBJ_MTHD (dev, 0x506e, 0x0500, nv04_graph_mthd_page_flip);
 	return 0;
 }

+ 12 - 11
drivers/gpu/drm/nouveau/nv04_instmem.c

@@ -1,6 +1,8 @@
 #include "drmP.h"
 #include "drm.h"
+
 #include "nouveau_drv.h"
+#include "nouveau_fifo.h"
 #include "nouveau_ramht.h"
 
 /* returns the size of fifo context */
@@ -10,12 +12,15 @@ nouveau_fifo_ctx_size(struct drm_device *dev)
 	struct drm_nouveau_private *dev_priv = dev->dev_private;
 
 	if (dev_priv->chipset >= 0x40)
-		return 128;
+		return 128 * 32;
 	else
 	if (dev_priv->chipset >= 0x17)
-		return 64;
+		return 64 * 32;
+	else
+	if (dev_priv->chipset >= 0x10)
+		return 32 * 32;
 
-	return 32;
+	return 32 * 16;
 }
 
 int nv04_instmem_init(struct drm_device *dev)
@@ -39,14 +44,10 @@ int nv04_instmem_init(struct drm_device *dev)
 		else if (nv44_graph_class(dev))	    rsvd = 0x4980 * vs;
 		else				    rsvd = 0x4a40 * vs;
 		rsvd += 16 * 1024;
-		rsvd *= dev_priv->engine.fifo.channels;
-
-		/* pciegart table */
-		if (pci_is_pcie(dev->pdev))
-			rsvd += 512 * 1024;
+		rsvd *= 32; /* per-channel */
 
-		/* object storage */
-		rsvd += 512 * 1024;
+		rsvd += 512 * 1024; /* pci(e)gart table */
+		rsvd += 512 * 1024; /* object storage */
 
 		dev_priv->ramin_rsvd_vram = round_up(rsvd, 4096);
 	} else {
@@ -71,7 +72,7 @@ int nv04_instmem_init(struct drm_device *dev)
 		return ret;
 
 	/* And RAMFC */
-	length = dev_priv->engine.fifo.channels * nouveau_fifo_ctx_size(dev);
+	length = nouveau_fifo_ctx_size(dev);
 	switch (dev_priv->card_type) {
 	case NV_40:
 		offset = 0x20000;

+ 147 - 0
drivers/gpu/drm/nouveau/nv04_software.c

@@ -0,0 +1,147 @@
+/*
+ * Copyright 2012 Red Hat Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: Ben Skeggs
+ */
+
+#include "drmP.h"
+
+#include "nouveau_drv.h"
+#include "nouveau_ramht.h"
+#include "nouveau_fence.h"
+#include "nouveau_software.h"
+#include "nouveau_hw.h"
+
+struct nv04_software_priv {
+	struct nouveau_software_priv base;
+};
+
+struct nv04_software_chan {
+	struct nouveau_software_chan base;
+};
+
+static int
+mthd_flip(struct nouveau_channel *chan, u32 class, u32 mthd, u32 data)
+{
+
+	struct nouveau_page_flip_state state;
+
+	if (!nouveau_finish_page_flip(chan, &state)) {
+		nv_set_crtc_base(chan->dev, state.crtc, state.offset +
+				 state.y * state.pitch +
+				 state.x * state.bpp / 8);
+	}
+
+	return 0;
+}
+
+static int
+nv04_software_context_new(struct nouveau_channel *chan, int engine)
+{
+	struct nv04_software_chan *pch;
+
+	pch = kzalloc(sizeof(*pch), GFP_KERNEL);
+	if (!pch)
+		return -ENOMEM;
+
+	nouveau_software_context_new(&pch->base);
+	chan->engctx[engine] = pch;
+	return 0;
+}
+
+static void
+nv04_software_context_del(struct nouveau_channel *chan, int engine)
+{
+	struct nv04_software_chan *pch = chan->engctx[engine];
+	chan->engctx[engine] = NULL;
+	kfree(pch);
+}
+
+static int
+nv04_software_object_new(struct nouveau_channel *chan, int engine,
+			 u32 handle, u16 class)
+{
+	struct drm_device *dev = chan->dev;
+	struct nouveau_gpuobj *obj = NULL;
+	int ret;
+
+	ret = nouveau_gpuobj_new(dev, chan, 16, 16, 0, &obj);
+	if (ret)
+		return ret;
+	obj->engine = 0;
+	obj->class  = class;
+
+	ret = nouveau_ramht_insert(chan, handle, obj);
+	nouveau_gpuobj_ref(NULL, &obj);
+	return ret;
+}
+
+static int
+nv04_software_init(struct drm_device *dev, int engine)
+{
+	return 0;
+}
+
+static int
+nv04_software_fini(struct drm_device *dev, int engine, bool suspend)
+{
+	return 0;
+}
+
+static void
+nv04_software_destroy(struct drm_device *dev, int engine)
+{
+	struct nv04_software_priv *psw = nv_engine(dev, engine);
+
+	NVOBJ_ENGINE_DEL(dev, SW);
+	kfree(psw);
+}
+
+int
+nv04_software_create(struct drm_device *dev)
+{
+	struct drm_nouveau_private *dev_priv = dev->dev_private;
+	struct nv04_software_priv *psw;
+
+	psw = kzalloc(sizeof(*psw), GFP_KERNEL);
+	if (!psw)
+		return -ENOMEM;
+
+	psw->base.base.destroy = nv04_software_destroy;
+	psw->base.base.init = nv04_software_init;
+	psw->base.base.fini = nv04_software_fini;
+	psw->base.base.context_new = nv04_software_context_new;
+	psw->base.base.context_del = nv04_software_context_del;
+	psw->base.base.object_new = nv04_software_object_new;
+	nouveau_software_create(&psw->base);
+
+	NVOBJ_ENGINE_ADD(dev, SW, &psw->base.base);
+	if (dev_priv->card_type <= NV_04) {
+		NVOBJ_CLASS(dev, 0x006e, SW);
+		NVOBJ_MTHD (dev, 0x006e, 0x0150, nv04_fence_mthd);
+		NVOBJ_MTHD (dev, 0x006e, 0x0500, mthd_flip);
+	} else {
+		NVOBJ_CLASS(dev, 0x016e, SW);
+		NVOBJ_MTHD (dev, 0x016e, 0x0500, mthd_flip);
+	}
+
+	return 0;
+}

+ 214 - 0
drivers/gpu/drm/nouveau/nv10_fence.c

@@ -0,0 +1,214 @@
+/*
+ * Copyright 2012 Red Hat Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: Ben Skeggs <bskeggs@redhat.com>
+ */
+
+#include "drmP.h"
+#include "nouveau_drv.h"
+#include "nouveau_dma.h"
+#include "nouveau_ramht.h"
+#include "nouveau_fence.h"
+
+struct nv10_fence_chan {
+	struct nouveau_fence_chan base;
+};
+
+struct nv10_fence_priv {
+	struct nouveau_fence_priv base;
+	struct nouveau_bo *bo;
+	spinlock_t lock;
+	u32 sequence;
+};
+
+static int
+nv10_fence_emit(struct nouveau_fence *fence)
+{
+	struct nouveau_channel *chan = fence->channel;
+	int ret = RING_SPACE(chan, 2);
+	if (ret == 0) {
+		BEGIN_NV04(chan, 0, NV10_SUBCHAN_REF_CNT, 1);
+		OUT_RING  (chan, fence->sequence);
+		FIRE_RING (chan);
+	}
+	return ret;
+}
+
+
+static int
+nv10_fence_sync(struct nouveau_fence *fence,
+		struct nouveau_channel *prev, struct nouveau_channel *chan)
+{
+	return -ENODEV;
+}
+
+static int
+nv17_fence_sync(struct nouveau_fence *fence,
+		struct nouveau_channel *prev, struct nouveau_channel *chan)
+{
+	struct nv10_fence_priv *priv = nv_engine(chan->dev, NVOBJ_ENGINE_FENCE);
+	u32 value;
+	int ret;
+
+	if (!mutex_trylock(&prev->mutex))
+		return -EBUSY;
+
+	spin_lock(&priv->lock);
+	value = priv->sequence;
+	priv->sequence += 2;
+	spin_unlock(&priv->lock);
+
+	ret = RING_SPACE(prev, 5);
+	if (!ret) {
+		BEGIN_NV04(prev, 0, NV11_SUBCHAN_DMA_SEMAPHORE, 4);
+		OUT_RING  (prev, NvSema);
+		OUT_RING  (prev, 0);
+		OUT_RING  (prev, value + 0);
+		OUT_RING  (prev, value + 1);
+		FIRE_RING (prev);
+	}
+
+	if (!ret && !(ret = RING_SPACE(chan, 5))) {
+		BEGIN_NV04(chan, 0, NV11_SUBCHAN_DMA_SEMAPHORE, 4);
+		OUT_RING  (chan, NvSema);
+		OUT_RING  (chan, 0);
+		OUT_RING  (chan, value + 1);
+		OUT_RING  (chan, value + 2);
+		FIRE_RING (chan);
+	}
+
+	mutex_unlock(&prev->mutex);
+	return 0;
+}
+
+static u32
+nv10_fence_read(struct nouveau_channel *chan)
+{
+	return nvchan_rd32(chan, 0x0048);
+}
+
+static void
+nv10_fence_context_del(struct nouveau_channel *chan, int engine)
+{
+	struct nv10_fence_chan *fctx = chan->engctx[engine];
+	nouveau_fence_context_del(&fctx->base);
+	chan->engctx[engine] = NULL;
+	kfree(fctx);
+}
+
+static int
+nv10_fence_context_new(struct nouveau_channel *chan, int engine)
+{
+	struct nv10_fence_priv *priv = nv_engine(chan->dev, engine);
+	struct nv10_fence_chan *fctx;
+	struct nouveau_gpuobj *obj;
+	int ret = 0;
+
+	fctx = chan->engctx[engine] = kzalloc(sizeof(*fctx), GFP_KERNEL);
+	if (!fctx)
+		return -ENOMEM;
+
+	nouveau_fence_context_new(&fctx->base);
+
+	if (priv->bo) {
+		struct ttm_mem_reg *mem = &priv->bo->bo.mem;
+
+		ret = nouveau_gpuobj_dma_new(chan, NV_CLASS_DMA_FROM_MEMORY,
+					     mem->start * PAGE_SIZE, mem->size,
+					     NV_MEM_ACCESS_RW,
+					     NV_MEM_TARGET_VRAM, &obj);
+		if (!ret) {
+			ret = nouveau_ramht_insert(chan, NvSema, obj);
+			nouveau_gpuobj_ref(NULL, &obj);
+		}
+	}
+
+	if (ret)
+		nv10_fence_context_del(chan, engine);
+	return ret;
+}
+
+static int
+nv10_fence_fini(struct drm_device *dev, int engine, bool suspend)
+{
+	return 0;
+}
+
+static int
+nv10_fence_init(struct drm_device *dev, int engine)
+{
+	return 0;
+}
+
+static void
+nv10_fence_destroy(struct drm_device *dev, int engine)
+{
+	struct drm_nouveau_private *dev_priv = dev->dev_private;
+	struct nv10_fence_priv *priv = nv_engine(dev, engine);
+
+	nouveau_bo_ref(NULL, &priv->bo);
+	dev_priv->eng[engine] = NULL;
+	kfree(priv);
+}
+
+int
+nv10_fence_create(struct drm_device *dev)
+{
+	struct drm_nouveau_private *dev_priv = dev->dev_private;
+	struct nv10_fence_priv *priv;
+	int ret = 0;
+
+	priv = kzalloc(sizeof(*priv), GFP_KERNEL);
+	if (!priv)
+		return -ENOMEM;
+
+	priv->base.engine.destroy = nv10_fence_destroy;
+	priv->base.engine.init = nv10_fence_init;
+	priv->base.engine.fini = nv10_fence_fini;
+	priv->base.engine.context_new = nv10_fence_context_new;
+	priv->base.engine.context_del = nv10_fence_context_del;
+	priv->base.emit = nv10_fence_emit;
+	priv->base.read = nv10_fence_read;
+	priv->base.sync = nv10_fence_sync;
+	dev_priv->eng[NVOBJ_ENGINE_FENCE] = &priv->base.engine;
+	spin_lock_init(&priv->lock);
+
+	if (dev_priv->chipset >= 0x17) {
+		ret = nouveau_bo_new(dev, 4096, 0x1000, TTM_PL_FLAG_VRAM,
+				     0, 0x0000, NULL, &priv->bo);
+		if (!ret) {
+			ret = nouveau_bo_pin(priv->bo, TTM_PL_FLAG_VRAM);
+			if (!ret)
+				ret = nouveau_bo_map(priv->bo);
+			if (ret)
+				nouveau_bo_ref(NULL, &priv->bo);
+		}
+
+		if (ret == 0) {
+			nouveau_bo_wr32(priv->bo, 0x000, 0x00000000);
+			priv->base.sync = nv17_fence_sync;
+		}
+	}
+
+	if (ret)
+		nv10_fence_destroy(dev, NVOBJ_ENGINE_FENCE);
+	return ret;
+}

+ 85 - 193
drivers/gpu/drm/nouveau/nv10_fifo.c

@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2007 Ben Skeggs.
+ * Copyright (C) 2012 Ben Skeggs.
  * All Rights Reserved.
  *
  * Permission is hereby granted, free of charge, to any person obtaining
@@ -27,220 +27,112 @@
 #include "drmP.h"
 #include "drm.h"
 #include "nouveau_drv.h"
+#include "nouveau_fifo.h"
+#include "nouveau_util.h"
 #include "nouveau_ramht.h"
 
-#define NV10_RAMFC(c) (dev_priv->ramfc->pinst + ((c) * NV10_RAMFC__SIZE))
-#define NV10_RAMFC__SIZE ((dev_priv->chipset) >= 0x17 ? 64 : 32)
-
-int
-nv10_fifo_channel_id(struct drm_device *dev)
-{
-	return nv_rd32(dev, NV03_PFIFO_CACHE1_PUSH1) &
-			NV10_PFIFO_CACHE1_PUSH1_CHID_MASK;
-}
-
-int
-nv10_fifo_create_context(struct nouveau_channel *chan)
+static struct ramfc_desc {
+	unsigned bits:6;
+	unsigned ctxs:5;
+	unsigned ctxp:8;
+	unsigned regs:5;
+	unsigned regp;
+} nv10_ramfc[] = {
+	{ 32,  0, 0x00,  0, NV04_PFIFO_CACHE1_DMA_PUT },
+	{ 32,  0, 0x04,  0, NV04_PFIFO_CACHE1_DMA_GET },
+	{ 32,  0, 0x08,  0, NV10_PFIFO_CACHE1_REF_CNT },
+	{ 16,  0, 0x0c,  0, NV04_PFIFO_CACHE1_DMA_INSTANCE },
+	{ 16, 16, 0x0c,  0, NV04_PFIFO_CACHE1_DMA_DCOUNT },
+	{ 32,  0, 0x10,  0, NV04_PFIFO_CACHE1_DMA_STATE },
+	{ 32,  0, 0x14,  0, NV04_PFIFO_CACHE1_DMA_FETCH },
+	{ 32,  0, 0x18,  0, NV04_PFIFO_CACHE1_ENGINE },
+	{ 32,  0, 0x1c,  0, NV04_PFIFO_CACHE1_PULL1 },
+	{}
+};
+
+struct nv10_fifo_priv {
+	struct nouveau_fifo_priv base;
+	struct ramfc_desc *ramfc_desc;
+};
+
+struct nv10_fifo_chan {
+	struct nouveau_fifo_chan base;
+	struct nouveau_gpuobj *ramfc;
+};
+
+static int
+nv10_fifo_context_new(struct nouveau_channel *chan, int engine)
 {
-	struct drm_nouveau_private *dev_priv = chan->dev->dev_private;
 	struct drm_device *dev = chan->dev;
-	uint32_t fc = NV10_RAMFC(chan->id);
+	struct drm_nouveau_private *dev_priv = dev->dev_private;
+	struct nv10_fifo_priv *priv = nv_engine(dev, engine);
+	struct nv10_fifo_chan *fctx;
+	unsigned long flags;
 	int ret;
 
-	ret = nouveau_gpuobj_new_fake(dev, NV10_RAMFC(chan->id), ~0,
-				      NV10_RAMFC__SIZE, NVOBJ_FLAG_ZERO_ALLOC |
-				      NVOBJ_FLAG_ZERO_FREE, &chan->ramfc);
-	if (ret)
-		return ret;
+	fctx = chan->engctx[engine] = kzalloc(sizeof(*fctx), GFP_KERNEL);
+	if (!fctx)
+		return -ENOMEM;
 
+	/* map channel control registers */
 	chan->user = ioremap(pci_resource_start(dev->pdev, 0) +
 			     NV03_USER(chan->id), PAGE_SIZE);
-	if (!chan->user)
-		return -ENOMEM;
+	if (!chan->user) {
+		ret = -ENOMEM;
+		goto error;
+	}
 
-	/* Fill entries that are seen filled in dumps of nvidia driver just
-	 * after channel's is put into DMA mode
-	 */
-	nv_wi32(dev, fc +  0, chan->pushbuf_base);
-	nv_wi32(dev, fc +  4, chan->pushbuf_base);
-	nv_wi32(dev, fc + 12, chan->pushbuf->pinst >> 4);
-	nv_wi32(dev, fc + 20, NV_PFIFO_CACHE1_DMA_FETCH_TRIG_128_BYTES |
-			      NV_PFIFO_CACHE1_DMA_FETCH_SIZE_128_BYTES |
-			      NV_PFIFO_CACHE1_DMA_FETCH_MAX_REQS_8 |
+	/* initialise default fifo context */
+	ret = nouveau_gpuobj_new_fake(dev, dev_priv->ramfc->pinst +
+				      chan->id * 32, ~0, 32,
+				      NVOBJ_FLAG_ZERO_FREE, &fctx->ramfc);
+	if (ret)
+		goto error;
+
+	nv_wo32(fctx->ramfc, 0x00, chan->pushbuf_base);
+	nv_wo32(fctx->ramfc, 0x04, chan->pushbuf_base);
+	nv_wo32(fctx->ramfc, 0x08, 0x00000000);
+	nv_wo32(fctx->ramfc, 0x0c, chan->pushbuf->pinst >> 4);
+	nv_wo32(fctx->ramfc, 0x10, 0x00000000);
+	nv_wo32(fctx->ramfc, 0x14, NV_PFIFO_CACHE1_DMA_FETCH_TRIG_128_BYTES |
+				   NV_PFIFO_CACHE1_DMA_FETCH_SIZE_128_BYTES |
 #ifdef __BIG_ENDIAN
-			      NV_PFIFO_CACHE1_BIG_ENDIAN |
+				   NV_PFIFO_CACHE1_BIG_ENDIAN |
 #endif
-			      0);
-
-	/* enable the fifo dma operation */
-	nv_wr32(dev, NV04_PFIFO_MODE,
-		nv_rd32(dev, NV04_PFIFO_MODE) | (1 << chan->id));
-	return 0;
-}
-
-static void
-nv10_fifo_do_load_context(struct drm_device *dev, int chid)
-{
-	struct drm_nouveau_private *dev_priv = dev->dev_private;
-	uint32_t fc = NV10_RAMFC(chid), tmp;
-
-	nv_wr32(dev, NV04_PFIFO_CACHE1_DMA_PUT, nv_ri32(dev, fc + 0));
-	nv_wr32(dev, NV04_PFIFO_CACHE1_DMA_GET, nv_ri32(dev, fc + 4));
-	nv_wr32(dev, NV10_PFIFO_CACHE1_REF_CNT, nv_ri32(dev, fc + 8));
-
-	tmp = nv_ri32(dev, fc + 12);
-	nv_wr32(dev, NV04_PFIFO_CACHE1_DMA_INSTANCE, tmp & 0xFFFF);
-	nv_wr32(dev, NV04_PFIFO_CACHE1_DMA_DCOUNT, tmp >> 16);
-
-	nv_wr32(dev, NV04_PFIFO_CACHE1_DMA_STATE, nv_ri32(dev, fc + 16));
-	nv_wr32(dev, NV04_PFIFO_CACHE1_DMA_FETCH, nv_ri32(dev, fc + 20));
-	nv_wr32(dev, NV04_PFIFO_CACHE1_ENGINE, nv_ri32(dev, fc + 24));
-	nv_wr32(dev, NV04_PFIFO_CACHE1_PULL1, nv_ri32(dev, fc + 28));
-
-	if (dev_priv->chipset < 0x17)
-		goto out;
-
-	nv_wr32(dev, NV10_PFIFO_CACHE1_ACQUIRE_VALUE, nv_ri32(dev, fc + 32));
-	tmp = nv_ri32(dev, fc + 36);
-	nv_wr32(dev, NV10_PFIFO_CACHE1_ACQUIRE_TIMESTAMP, tmp);
-	nv_wr32(dev, NV10_PFIFO_CACHE1_ACQUIRE_TIMEOUT, nv_ri32(dev, fc + 40));
-	nv_wr32(dev, NV10_PFIFO_CACHE1_SEMAPHORE, nv_ri32(dev, fc + 44));
-	nv_wr32(dev, NV10_PFIFO_CACHE1_DMA_SUBROUTINE, nv_ri32(dev, fc + 48));
-
-out:
-	nv_wr32(dev, NV03_PFIFO_CACHE1_GET, 0);
-	nv_wr32(dev, NV03_PFIFO_CACHE1_PUT, 0);
-}
-
-int
-nv10_fifo_load_context(struct nouveau_channel *chan)
-{
-	struct drm_device *dev = chan->dev;
-	uint32_t tmp;
-
-	nv10_fifo_do_load_context(dev, chan->id);
+				   NV_PFIFO_CACHE1_DMA_FETCH_MAX_REQS_8);
+	nv_wo32(fctx->ramfc, 0x18, 0x00000000);
+	nv_wo32(fctx->ramfc, 0x1c, 0x00000000);
 
-	nv_wr32(dev, NV03_PFIFO_CACHE1_PUSH1,
-		     NV03_PFIFO_CACHE1_PUSH1_DMA | chan->id);
-	nv_wr32(dev, NV04_PFIFO_CACHE1_DMA_PUSH, 1);
+	/* enable dma mode on the channel */
+	spin_lock_irqsave(&dev_priv->context_switch_lock, flags);
+	nv_mask(dev, NV04_PFIFO_MODE, (1 << chan->id), (1 << chan->id));
+	spin_unlock_irqrestore(&dev_priv->context_switch_lock, flags);
 
-	/* Reset NV04_PFIFO_CACHE1_DMA_CTL_AT_INFO to INVALID */
-	tmp = nv_rd32(dev, NV04_PFIFO_CACHE1_DMA_CTL) & ~(1 << 31);
-	nv_wr32(dev, NV04_PFIFO_CACHE1_DMA_CTL, tmp);
-
-	return 0;
+error:
+	if (ret)
+		priv->base.base.context_del(chan, engine);
+	return ret;
 }
 
 int
-nv10_fifo_unload_context(struct drm_device *dev)
-{
-	struct drm_nouveau_private *dev_priv = dev->dev_private;
-	struct nouveau_fifo_engine *pfifo = &dev_priv->engine.fifo;
-	uint32_t fc, tmp;
-	int chid;
-
-	chid = pfifo->channel_id(dev);
-	if (chid < 0 || chid >= dev_priv->engine.fifo.channels)
-		return 0;
-	fc = NV10_RAMFC(chid);
-
-	nv_wi32(dev, fc +  0, nv_rd32(dev, NV04_PFIFO_CACHE1_DMA_PUT));
-	nv_wi32(dev, fc +  4, nv_rd32(dev, NV04_PFIFO_CACHE1_DMA_GET));
-	nv_wi32(dev, fc +  8, nv_rd32(dev, NV10_PFIFO_CACHE1_REF_CNT));
-	tmp  = nv_rd32(dev, NV04_PFIFO_CACHE1_DMA_INSTANCE) & 0xFFFF;
-	tmp |= (nv_rd32(dev, NV04_PFIFO_CACHE1_DMA_DCOUNT) << 16);
-	nv_wi32(dev, fc + 12, tmp);
-	nv_wi32(dev, fc + 16, nv_rd32(dev, NV04_PFIFO_CACHE1_DMA_STATE));
-	nv_wi32(dev, fc + 20, nv_rd32(dev, NV04_PFIFO_CACHE1_DMA_FETCH));
-	nv_wi32(dev, fc + 24, nv_rd32(dev, NV04_PFIFO_CACHE1_ENGINE));
-	nv_wi32(dev, fc + 28, nv_rd32(dev, NV04_PFIFO_CACHE1_PULL1));
-
-	if (dev_priv->chipset < 0x17)
-		goto out;
-
-	nv_wi32(dev, fc + 32, nv_rd32(dev, NV10_PFIFO_CACHE1_ACQUIRE_VALUE));
-	tmp = nv_rd32(dev, NV10_PFIFO_CACHE1_ACQUIRE_TIMESTAMP);
-	nv_wi32(dev, fc + 36, tmp);
-	nv_wi32(dev, fc + 40, nv_rd32(dev, NV10_PFIFO_CACHE1_ACQUIRE_TIMEOUT));
-	nv_wi32(dev, fc + 44, nv_rd32(dev, NV10_PFIFO_CACHE1_SEMAPHORE));
-	nv_wi32(dev, fc + 48, nv_rd32(dev, NV04_PFIFO_CACHE1_DMA_GET));
-
-out:
-	nv10_fifo_do_load_context(dev, pfifo->channels - 1);
-	nv_wr32(dev, NV03_PFIFO_CACHE1_PUSH1, pfifo->channels - 1);
-	return 0;
-}
-
-static void
-nv10_fifo_init_reset(struct drm_device *dev)
-{
-	nv_wr32(dev, NV03_PMC_ENABLE,
-		nv_rd32(dev, NV03_PMC_ENABLE) & ~NV_PMC_ENABLE_PFIFO);
-	nv_wr32(dev, NV03_PMC_ENABLE,
-		nv_rd32(dev, NV03_PMC_ENABLE) |  NV_PMC_ENABLE_PFIFO);
-
-	nv_wr32(dev, 0x003224, 0x000f0078);
-	nv_wr32(dev, 0x002044, 0x0101ffff);
-	nv_wr32(dev, 0x002040, 0x000000ff);
-	nv_wr32(dev, 0x002500, 0x00000000);
-	nv_wr32(dev, 0x003000, 0x00000000);
-	nv_wr32(dev, 0x003050, 0x00000000);
-
-	nv_wr32(dev, 0x003258, 0x00000000);
-	nv_wr32(dev, 0x003210, 0x00000000);
-	nv_wr32(dev, 0x003270, 0x00000000);
-}
-
-static void
-nv10_fifo_init_ramxx(struct drm_device *dev)
+nv10_fifo_create(struct drm_device *dev)
 {
 	struct drm_nouveau_private *dev_priv = dev->dev_private;
+	struct nv10_fifo_priv *priv;
 
-	nv_wr32(dev, NV03_PFIFO_RAMHT, (0x03 << 24) /* search 128 */ |
-				       ((dev_priv->ramht->bits - 9) << 16) |
-				       (dev_priv->ramht->gpuobj->pinst >> 8));
-	nv_wr32(dev, NV03_PFIFO_RAMRO, dev_priv->ramro->pinst >> 8);
+	priv = kzalloc(sizeof(*priv), GFP_KERNEL);
+	if (!priv)
+		return -ENOMEM;
 
-	if (dev_priv->chipset < 0x17) {
-		nv_wr32(dev, NV03_PFIFO_RAMFC, dev_priv->ramfc->pinst >> 8);
-	} else {
-		nv_wr32(dev, NV03_PFIFO_RAMFC, (dev_priv->ramfc->pinst >> 8) |
-					       (1 << 16) /* 64 Bytes entry*/);
-		/* XXX nvidia blob set bit 18, 21,23 for nv20 & nv30 */
-	}
-}
+	priv->base.base.destroy = nv04_fifo_destroy;
+	priv->base.base.init = nv04_fifo_init;
+	priv->base.base.fini = nv04_fifo_fini;
+	priv->base.base.context_new = nv10_fifo_context_new;
+	priv->base.base.context_del = nv04_fifo_context_del;
+	priv->base.channels = 31;
+	priv->ramfc_desc = nv10_ramfc;
+	dev_priv->eng[NVOBJ_ENGINE_FIFO] = &priv->base.base;
 
-static void
-nv10_fifo_init_intr(struct drm_device *dev)
-{
 	nouveau_irq_register(dev, 8, nv04_fifo_isr);
-	nv_wr32(dev, 0x002100, 0xffffffff);
-	nv_wr32(dev, 0x002140, 0xffffffff);
-}
-
-int
-nv10_fifo_init(struct drm_device *dev)
-{
-	struct drm_nouveau_private *dev_priv = dev->dev_private;
-	struct nouveau_fifo_engine *pfifo = &dev_priv->engine.fifo;
-	int i;
-
-	nv10_fifo_init_reset(dev);
-	nv10_fifo_init_ramxx(dev);
-
-	nv10_fifo_do_load_context(dev, pfifo->channels - 1);
-	nv_wr32(dev, NV03_PFIFO_CACHE1_PUSH1, pfifo->channels - 1);
-
-	nv10_fifo_init_intr(dev);
-	pfifo->enable(dev);
-	pfifo->reassign(dev, true);
-
-	for (i = 0; i < dev_priv->engine.fifo.channels; i++) {
-		if (dev_priv->channels.ptr[i]) {
-			uint32_t mode = nv_rd32(dev, NV04_PFIFO_MODE);
-			nv_wr32(dev, NV04_PFIFO_MODE, mode | (1 << i));
-		}
-	}
-
 	return 0;
 }

+ 4 - 9
drivers/gpu/drm/nouveau/nv10_graph.c

@@ -759,7 +759,6 @@ static int
 nv10_graph_unload_context(struct drm_device *dev)
 {
 	struct drm_nouveau_private *dev_priv = dev->dev_private;
-	struct nouveau_fifo_engine *pfifo = &dev_priv->engine.fifo;
 	struct nouveau_channel *chan;
 	struct graph_state *ctx;
 	uint32_t tmp;
@@ -782,7 +781,7 @@ nv10_graph_unload_context(struct drm_device *dev)
 
 	nv_wr32(dev, NV10_PGRAPH_CTX_CONTROL, 0x10000000);
 	tmp  = nv_rd32(dev, NV10_PGRAPH_CTX_USER) & 0x00ffffff;
-	tmp |= (pfifo->channels - 1) << 24;
+	tmp |= 31 << 24;
 	nv_wr32(dev, NV10_PGRAPH_CTX_USER, tmp);
 	return 0;
 }
@@ -822,12 +821,12 @@ struct nouveau_channel *
 nv10_graph_channel(struct drm_device *dev)
 {
 	struct drm_nouveau_private *dev_priv = dev->dev_private;
-	int chid = dev_priv->engine.fifo.channels;
+	int chid = 31;
 
 	if (nv_rd32(dev, NV10_PGRAPH_CTX_CONTROL) & 0x00010000)
 		chid = nv_rd32(dev, NV10_PGRAPH_CTX_USER) >> 24;
 
-	if (chid >= dev_priv->engine.fifo.channels)
+	if (chid >= 31)
 		return NULL;
 
 	return dev_priv->channels.ptr[chid];
@@ -948,7 +947,7 @@ nv10_graph_init(struct drm_device *dev, int engine)
 	nv_wr32(dev, NV10_PGRAPH_STATE, 0xFFFFFFFF);
 
 	tmp  = nv_rd32(dev, NV10_PGRAPH_CTX_USER) & 0x00ffffff;
-	tmp |= (dev_priv->engine.fifo.channels - 1) << 24;
+	tmp |= 31 << 24;
 	nv_wr32(dev, NV10_PGRAPH_CTX_USER, tmp);
 	nv_wr32(dev, NV10_PGRAPH_CTX_CONTROL, 0x10000100);
 	nv_wr32(dev, NV10_PGRAPH_FFINTFC_ST2, 0x08000000);
@@ -1153,10 +1152,6 @@ nv10_graph_create(struct drm_device *dev)
 	NVOBJ_ENGINE_ADD(dev, GR, &pgraph->base);
 	nouveau_irq_register(dev, 12, nv10_graph_isr);
 
-	/* nvsw */
-	NVOBJ_CLASS(dev, 0x506e, SW);
-	NVOBJ_MTHD (dev, 0x506e, 0x0500, nv04_graph_mthd_page_flip);
-
 	NVOBJ_CLASS(dev, 0x0030, GR); /* null */
 	NVOBJ_CLASS(dev, 0x0039, GR); /* m2mf */
 	NVOBJ_CLASS(dev, 0x004a, GR); /* gdirect */

+ 177 - 0
drivers/gpu/drm/nouveau/nv17_fifo.c

@@ -0,0 +1,177 @@
+/*
+ * Copyright (C) 2012 Ben Skeggs.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include "drmP.h"
+#include "drm.h"
+#include "nouveau_drv.h"
+#include "nouveau_fifo.h"
+#include "nouveau_util.h"
+#include "nouveau_ramht.h"
+
+static struct ramfc_desc {
+	unsigned bits:6;
+	unsigned ctxs:5;
+	unsigned ctxp:8;
+	unsigned regs:5;
+	unsigned regp;
+} nv17_ramfc[] = {
+	{ 32,  0, 0x00,  0, NV04_PFIFO_CACHE1_DMA_PUT },
+	{ 32,  0, 0x04,  0, NV04_PFIFO_CACHE1_DMA_GET },
+	{ 32,  0, 0x08,  0, NV10_PFIFO_CACHE1_REF_CNT },
+	{ 16,  0, 0x0c,  0, NV04_PFIFO_CACHE1_DMA_INSTANCE },
+	{ 16, 16, 0x0c,  0, NV04_PFIFO_CACHE1_DMA_DCOUNT },
+	{ 32,  0, 0x10,  0, NV04_PFIFO_CACHE1_DMA_STATE },
+	{ 32,  0, 0x14,  0, NV04_PFIFO_CACHE1_DMA_FETCH },
+	{ 32,  0, 0x18,  0, NV04_PFIFO_CACHE1_ENGINE },
+	{ 32,  0, 0x1c,  0, NV04_PFIFO_CACHE1_PULL1 },
+	{ 32,  0, 0x20,  0, NV10_PFIFO_CACHE1_ACQUIRE_VALUE },
+	{ 32,  0, 0x24,  0, NV10_PFIFO_CACHE1_ACQUIRE_TIMESTAMP },
+	{ 32,  0, 0x28,  0, NV10_PFIFO_CACHE1_ACQUIRE_TIMEOUT },
+	{ 32,  0, 0x2c,  0, NV10_PFIFO_CACHE1_SEMAPHORE },
+	{ 32,  0, 0x30,  0, NV10_PFIFO_CACHE1_DMA_SUBROUTINE },
+	{}
+};
+
+struct nv17_fifo_priv {
+	struct nouveau_fifo_priv base;
+	struct ramfc_desc *ramfc_desc;
+};
+
+struct nv17_fifo_chan {
+	struct nouveau_fifo_chan base;
+	struct nouveau_gpuobj *ramfc;
+};
+
+static int
+nv17_fifo_context_new(struct nouveau_channel *chan, int engine)
+{
+	struct drm_device *dev = chan->dev;
+	struct drm_nouveau_private *dev_priv = dev->dev_private;
+	struct nv17_fifo_priv *priv = nv_engine(dev, engine);
+	struct nv17_fifo_chan *fctx;
+	unsigned long flags;
+	int ret;
+
+	fctx = chan->engctx[engine] = kzalloc(sizeof(*fctx), GFP_KERNEL);
+	if (!fctx)
+		return -ENOMEM;
+
+	/* map channel control registers */
+	chan->user = ioremap(pci_resource_start(dev->pdev, 0) +
+			     NV03_USER(chan->id), PAGE_SIZE);
+	if (!chan->user) {
+		ret = -ENOMEM;
+		goto error;
+	}
+
+	/* initialise default fifo context */
+	ret = nouveau_gpuobj_new_fake(dev, dev_priv->ramfc->pinst +
+				      chan->id * 64, ~0, 64,
+				      NVOBJ_FLAG_ZERO_ALLOC |
+				      NVOBJ_FLAG_ZERO_FREE, &fctx->ramfc);
+	if (ret)
+		goto error;
+
+	nv_wo32(fctx->ramfc, 0x00, chan->pushbuf_base);
+	nv_wo32(fctx->ramfc, 0x04, chan->pushbuf_base);
+	nv_wo32(fctx->ramfc, 0x0c, chan->pushbuf->pinst >> 4);
+	nv_wo32(fctx->ramfc, 0x14, NV_PFIFO_CACHE1_DMA_FETCH_TRIG_128_BYTES |
+				   NV_PFIFO_CACHE1_DMA_FETCH_SIZE_128_BYTES |
+#ifdef __BIG_ENDIAN
+				   NV_PFIFO_CACHE1_BIG_ENDIAN |
+#endif
+				   NV_PFIFO_CACHE1_DMA_FETCH_MAX_REQS_8);
+
+	/* enable dma mode on the channel */
+	spin_lock_irqsave(&dev_priv->context_switch_lock, flags);
+	nv_mask(dev, NV04_PFIFO_MODE, (1 << chan->id), (1 << chan->id));
+	spin_unlock_irqrestore(&dev_priv->context_switch_lock, flags);
+
+error:
+	if (ret)
+		priv->base.base.context_del(chan, engine);
+	return ret;
+}
+
+static int
+nv17_fifo_init(struct drm_device *dev, int engine)
+{
+	struct drm_nouveau_private *dev_priv = dev->dev_private;
+	struct nv17_fifo_priv *priv = nv_engine(dev, engine);
+	int i;
+
+	nv_mask(dev, NV03_PMC_ENABLE, NV_PMC_ENABLE_PFIFO, 0);
+	nv_mask(dev, NV03_PMC_ENABLE, NV_PMC_ENABLE_PFIFO, NV_PMC_ENABLE_PFIFO);
+
+	nv_wr32(dev, NV04_PFIFO_DELAY_0, 0x000000ff);
+	nv_wr32(dev, NV04_PFIFO_DMA_TIMESLICE, 0x0101ffff);
+
+	nv_wr32(dev, NV03_PFIFO_RAMHT, (0x03 << 24) /* search 128 */ |
+				       ((dev_priv->ramht->bits - 9) << 16) |
+				       (dev_priv->ramht->gpuobj->pinst >> 8));
+	nv_wr32(dev, NV03_PFIFO_RAMRO, dev_priv->ramro->pinst >> 8);
+	nv_wr32(dev, NV03_PFIFO_RAMFC, 0x00010000 |
+				       dev_priv->ramfc->pinst >> 8);
+
+	nv_wr32(dev, NV03_PFIFO_CACHE1_PUSH1, priv->base.channels);
+
+	nv_wr32(dev, NV03_PFIFO_INTR_0, 0xffffffff);
+	nv_wr32(dev, NV03_PFIFO_INTR_EN_0, 0xffffffff);
+
+	nv_wr32(dev, NV03_PFIFO_CACHE1_PUSH0, 1);
+	nv_wr32(dev, NV04_PFIFO_CACHE1_PULL0, 1);
+	nv_wr32(dev, NV03_PFIFO_CACHES, 1);
+
+	for (i = 0; i < priv->base.channels; i++) {
+		if (dev_priv->channels.ptr[i])
+			nv_mask(dev, NV04_PFIFO_MODE, (1 << i), (1 << i));
+	}
+
+	return 0;
+}
+
+int
+nv17_fifo_create(struct drm_device *dev)
+{
+	struct drm_nouveau_private *dev_priv = dev->dev_private;
+	struct nv17_fifo_priv *priv;
+
+	priv = kzalloc(sizeof(*priv), GFP_KERNEL);
+	if (!priv)
+		return -ENOMEM;
+
+	priv->base.base.destroy = nv04_fifo_destroy;
+	priv->base.base.init = nv17_fifo_init;
+	priv->base.base.fini = nv04_fifo_fini;
+	priv->base.base.context_new = nv17_fifo_context_new;
+	priv->base.base.context_del = nv04_fifo_context_del;
+	priv->base.channels = 31;
+	priv->ramfc_desc = nv17_ramfc;
+	dev_priv->eng[NVOBJ_ENGINE_FIFO] = &priv->base.base;
+
+	nouveau_irq_register(dev, 8, nv04_fifo_isr);
+	return 0;
+}

+ 1 - 7
drivers/gpu/drm/nouveau/nv20_graph.c

@@ -43,8 +43,6 @@ struct nv20_graph_engine {
 int
 nv20_graph_unload_context(struct drm_device *dev)
 {
-	struct drm_nouveau_private *dev_priv = dev->dev_private;
-	struct nouveau_fifo_engine *pfifo = &dev_priv->engine.fifo;
 	struct nouveau_channel *chan;
 	struct nouveau_gpuobj *grctx;
 	u32 tmp;
@@ -62,7 +60,7 @@ nv20_graph_unload_context(struct drm_device *dev)
 
 	nv_wr32(dev, NV10_PGRAPH_CTX_CONTROL, 0x10000000);
 	tmp  = nv_rd32(dev, NV10_PGRAPH_CTX_USER) & 0x00ffffff;
-	tmp |= (pfifo->channels - 1) << 24;
+	tmp |= 31 << 24;
 	nv_wr32(dev, NV10_PGRAPH_CTX_USER, tmp);
 	return 0;
 }
@@ -796,10 +794,6 @@ nv20_graph_create(struct drm_device *dev)
 	NVOBJ_ENGINE_ADD(dev, GR, &pgraph->base);
 	nouveau_irq_register(dev, 12, nv20_graph_isr);
 
-	/* nvsw */
-	NVOBJ_CLASS(dev, 0x506e, SW);
-	NVOBJ_MTHD (dev, 0x506e, 0x0500, nv04_graph_mthd_page_flip);
-
 	NVOBJ_CLASS(dev, 0x0030, GR); /* null */
 	NVOBJ_CLASS(dev, 0x0039, GR); /* m2mf */
 	NVOBJ_CLASS(dev, 0x004a, GR); /* gdirect */

+ 3 - 1
drivers/gpu/drm/nouveau/nv31_mpeg.c

@@ -24,6 +24,7 @@
 
 #include "drmP.h"
 #include "nouveau_drv.h"
+#include "nouveau_fifo.h"
 #include "nouveau_ramht.h"
 
 struct nv31_mpeg_engine {
@@ -208,6 +209,7 @@ nv31_mpeg_mthd_dma(struct nouveau_channel *chan, u32 class, u32 mthd, u32 data)
 static int
 nv31_mpeg_isr_chid(struct drm_device *dev, u32 inst)
 {
+	struct nouveau_fifo_priv *pfifo = nv_engine(dev, NVOBJ_ENGINE_FIFO);
 	struct drm_nouveau_private *dev_priv = dev->dev_private;
 	struct nouveau_gpuobj *ctx;
 	unsigned long flags;
@@ -218,7 +220,7 @@ nv31_mpeg_isr_chid(struct drm_device *dev, u32 inst)
 		return 0;
 
 	spin_lock_irqsave(&dev_priv->channels.lock, flags);
-	for (i = 0; i < dev_priv->engine.fifo.channels; i++) {
+	for (i = 0; i < pfifo->channels; i++) {
 		if (!dev_priv->channels.ptr[i])
 			continue;
 

+ 127 - 224
drivers/gpu/drm/nouveau/nv40_fifo.c

@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2007 Ben Skeggs.
+ * Copyright (C) 2012 Ben Skeggs.
  * All Rights Reserved.
  *
  * Permission is hereby granted, free of charge, to any person obtaining
@@ -25,215 +25,123 @@
  */
 
 #include "drmP.h"
+#include "drm.h"
 #include "nouveau_drv.h"
-#include "nouveau_drm.h"
+#include "nouveau_fifo.h"
+#include "nouveau_util.h"
 #include "nouveau_ramht.h"
 
-#define NV40_RAMFC(c) (dev_priv->ramfc->pinst + ((c) * NV40_RAMFC__SIZE))
-#define NV40_RAMFC__SIZE 128
-
-int
-nv40_fifo_create_context(struct nouveau_channel *chan)
+static struct ramfc_desc {
+	unsigned bits:6;
+	unsigned ctxs:5;
+	unsigned ctxp:8;
+	unsigned regs:5;
+	unsigned regp;
+} nv40_ramfc[] = {
+	{ 32,  0, 0x00,  0, NV04_PFIFO_CACHE1_DMA_PUT },
+	{ 32,  0, 0x04,  0, NV04_PFIFO_CACHE1_DMA_GET },
+	{ 32,  0, 0x08,  0, NV10_PFIFO_CACHE1_REF_CNT },
+	{ 32,  0, 0x0c,  0, NV04_PFIFO_CACHE1_DMA_INSTANCE },
+	{ 32,  0, 0x10,  0, NV04_PFIFO_CACHE1_DMA_DCOUNT },
+	{ 32,  0, 0x14,  0, NV04_PFIFO_CACHE1_DMA_STATE },
+	{ 28,  0, 0x18,  0, NV04_PFIFO_CACHE1_DMA_FETCH },
+	{  2, 28, 0x18, 28, 0x002058 },
+	{ 32,  0, 0x1c,  0, NV04_PFIFO_CACHE1_ENGINE },
+	{ 32,  0, 0x20,  0, NV04_PFIFO_CACHE1_PULL1 },
+	{ 32,  0, 0x24,  0, NV10_PFIFO_CACHE1_ACQUIRE_VALUE },
+	{ 32,  0, 0x28,  0, NV10_PFIFO_CACHE1_ACQUIRE_TIMESTAMP },
+	{ 32,  0, 0x2c,  0, NV10_PFIFO_CACHE1_ACQUIRE_TIMEOUT },
+	{ 32,  0, 0x30,  0, NV10_PFIFO_CACHE1_SEMAPHORE },
+	{ 32,  0, 0x34,  0, NV10_PFIFO_CACHE1_DMA_SUBROUTINE },
+	{ 32,  0, 0x38,  0, NV40_PFIFO_GRCTX_INSTANCE },
+	{ 17,  0, 0x3c,  0, NV04_PFIFO_DMA_TIMESLICE },
+	{ 32,  0, 0x40,  0, 0x0032e4 },
+	{ 32,  0, 0x44,  0, 0x0032e8 },
+	{ 32,  0, 0x4c,  0, 0x002088 },
+	{ 32,  0, 0x50,  0, 0x003300 },
+	{ 32,  0, 0x54,  0, 0x00330c },
+	{}
+};
+
+struct nv40_fifo_priv {
+	struct nouveau_fifo_priv base;
+	struct ramfc_desc *ramfc_desc;
+};
+
+struct nv40_fifo_chan {
+	struct nouveau_fifo_chan base;
+	struct nouveau_gpuobj *ramfc;
+};
+
+static int
+nv40_fifo_context_new(struct nouveau_channel *chan, int engine)
 {
 	struct drm_device *dev = chan->dev;
 	struct drm_nouveau_private *dev_priv = dev->dev_private;
-	uint32_t fc = NV40_RAMFC(chan->id);
+	struct nv40_fifo_priv *priv = nv_engine(dev, engine);
+	struct nv40_fifo_chan *fctx;
 	unsigned long flags;
 	int ret;
 
-	ret = nouveau_gpuobj_new_fake(dev, NV40_RAMFC(chan->id), ~0,
-				      NV40_RAMFC__SIZE, NVOBJ_FLAG_ZERO_ALLOC |
-				      NVOBJ_FLAG_ZERO_FREE, &chan->ramfc);
-	if (ret)
-		return ret;
-
-	chan->user = ioremap(pci_resource_start(dev->pdev, 0) +
-			     NV40_USER(chan->id), PAGE_SIZE);
-	if (!chan->user)
+	fctx = chan->engctx[engine] = kzalloc(sizeof(*fctx), GFP_KERNEL);
+	if (!fctx)
 		return -ENOMEM;
 
-	spin_lock_irqsave(&dev_priv->context_switch_lock, flags);
+	/* map channel control registers */
+	chan->user = ioremap(pci_resource_start(dev->pdev, 0) +
+			     NV03_USER(chan->id), PAGE_SIZE);
+	if (!chan->user) {
+		ret = -ENOMEM;
+		goto error;
+	}
 
-	nv_wi32(dev, fc +  0, chan->pushbuf_base);
-	nv_wi32(dev, fc +  4, chan->pushbuf_base);
-	nv_wi32(dev, fc + 12, chan->pushbuf->pinst >> 4);
-	nv_wi32(dev, fc + 24, NV_PFIFO_CACHE1_DMA_FETCH_TRIG_128_BYTES |
-			      NV_PFIFO_CACHE1_DMA_FETCH_SIZE_128_BYTES |
-			      NV_PFIFO_CACHE1_DMA_FETCH_MAX_REQS_8 |
+	/* initialise default fifo context */
+	ret = nouveau_gpuobj_new_fake(dev, dev_priv->ramfc->pinst +
+				      chan->id * 128, ~0, 128,
+				      NVOBJ_FLAG_ZERO_ALLOC |
+				      NVOBJ_FLAG_ZERO_FREE, &fctx->ramfc);
+	if (ret)
+		goto error;
+
+	nv_wo32(fctx->ramfc, 0x00, chan->pushbuf_base);
+	nv_wo32(fctx->ramfc, 0x04, chan->pushbuf_base);
+	nv_wo32(fctx->ramfc, 0x0c, chan->pushbuf->pinst >> 4);
+	nv_wo32(fctx->ramfc, 0x18, 0x30000000 |
+				   NV_PFIFO_CACHE1_DMA_FETCH_TRIG_128_BYTES |
+				   NV_PFIFO_CACHE1_DMA_FETCH_SIZE_128_BYTES |
 #ifdef __BIG_ENDIAN
-			      NV_PFIFO_CACHE1_BIG_ENDIAN |
+				   NV_PFIFO_CACHE1_BIG_ENDIAN |
 #endif
-			      0x30000000 /* no idea.. */);
-	nv_wi32(dev, fc + 60, 0x0001FFFF);
-
-	/* enable the fifo dma operation */
-	nv_wr32(dev, NV04_PFIFO_MODE,
-		nv_rd32(dev, NV04_PFIFO_MODE) | (1 << chan->id));
+				   NV_PFIFO_CACHE1_DMA_FETCH_MAX_REQS_8);
+	nv_wo32(fctx->ramfc, 0x3c, 0x0001ffff);
 
+	/* enable dma mode on the channel */
+	spin_lock_irqsave(&dev_priv->context_switch_lock, flags);
+	nv_mask(dev, NV04_PFIFO_MODE, (1 << chan->id), (1 << chan->id));
 	spin_unlock_irqrestore(&dev_priv->context_switch_lock, flags);
-	return 0;
-}
-
-static void
-nv40_fifo_do_load_context(struct drm_device *dev, int chid)
-{
-	struct drm_nouveau_private *dev_priv = dev->dev_private;
-	uint32_t fc = NV40_RAMFC(chid), tmp, tmp2;
-
-	nv_wr32(dev, NV04_PFIFO_CACHE1_DMA_PUT, nv_ri32(dev, fc + 0));
-	nv_wr32(dev, NV04_PFIFO_CACHE1_DMA_GET, nv_ri32(dev, fc + 4));
-	nv_wr32(dev, NV10_PFIFO_CACHE1_REF_CNT, nv_ri32(dev, fc + 8));
-	nv_wr32(dev, NV04_PFIFO_CACHE1_DMA_INSTANCE, nv_ri32(dev, fc + 12));
-	nv_wr32(dev, NV04_PFIFO_CACHE1_DMA_DCOUNT, nv_ri32(dev, fc + 16));
-	nv_wr32(dev, NV04_PFIFO_CACHE1_DMA_STATE, nv_ri32(dev, fc + 20));
-
-	/* No idea what 0x2058 is.. */
-	tmp   = nv_ri32(dev, fc + 24);
-	tmp2  = nv_rd32(dev, 0x2058) & 0xFFF;
-	tmp2 |= (tmp & 0x30000000);
-	nv_wr32(dev, 0x2058, tmp2);
-	tmp  &= ~0x30000000;
-	nv_wr32(dev, NV04_PFIFO_CACHE1_DMA_FETCH, tmp);
 
-	nv_wr32(dev, NV04_PFIFO_CACHE1_ENGINE, nv_ri32(dev, fc + 28));
-	nv_wr32(dev, NV04_PFIFO_CACHE1_PULL1, nv_ri32(dev, fc + 32));
-	nv_wr32(dev, NV10_PFIFO_CACHE1_ACQUIRE_VALUE, nv_ri32(dev, fc + 36));
-	tmp = nv_ri32(dev, fc + 40);
-	nv_wr32(dev, NV10_PFIFO_CACHE1_ACQUIRE_TIMESTAMP, tmp);
-	nv_wr32(dev, NV10_PFIFO_CACHE1_ACQUIRE_TIMEOUT, nv_ri32(dev, fc + 44));
-	nv_wr32(dev, NV10_PFIFO_CACHE1_SEMAPHORE, nv_ri32(dev, fc + 48));
-	nv_wr32(dev, NV10_PFIFO_CACHE1_DMA_SUBROUTINE, nv_ri32(dev, fc + 52));
-	nv_wr32(dev, NV40_PFIFO_GRCTX_INSTANCE, nv_ri32(dev, fc + 56));
+	/*XXX: remove this later, need fifo engine context commit hook */
+	nouveau_gpuobj_ref(fctx->ramfc, &chan->ramfc);
 
-	/* Don't clobber the TIMEOUT_ENABLED flag when restoring from RAMFC */
-	tmp  = nv_rd32(dev, NV04_PFIFO_DMA_TIMESLICE) & ~0x1FFFF;
-	tmp |= nv_ri32(dev, fc + 60) & 0x1FFFF;
-	nv_wr32(dev, NV04_PFIFO_DMA_TIMESLICE, tmp);
-
-	nv_wr32(dev, 0x32e4, nv_ri32(dev, fc + 64));
-	/* NVIDIA does this next line twice... */
-	nv_wr32(dev, 0x32e8, nv_ri32(dev, fc + 68));
-	nv_wr32(dev, 0x2088, nv_ri32(dev, fc + 76));
-	nv_wr32(dev, 0x3300, nv_ri32(dev, fc + 80));
-	nv_wr32(dev, 0x330c, nv_ri32(dev, fc + 84));
-
-	nv_wr32(dev, NV03_PFIFO_CACHE1_GET, 0);
-	nv_wr32(dev, NV03_PFIFO_CACHE1_PUT, 0);
-}
-
-int
-nv40_fifo_load_context(struct nouveau_channel *chan)
-{
-	struct drm_device *dev = chan->dev;
-	uint32_t tmp;
-
-	nv40_fifo_do_load_context(dev, chan->id);
-
-	/* Set channel active, and in DMA mode */
-	nv_wr32(dev, NV03_PFIFO_CACHE1_PUSH1,
-		     NV40_PFIFO_CACHE1_PUSH1_DMA | chan->id);
-	nv_wr32(dev, NV04_PFIFO_CACHE1_DMA_PUSH, 1);
-
-	/* Reset DMA_CTL_AT_INFO to INVALID */
-	tmp = nv_rd32(dev, NV04_PFIFO_CACHE1_DMA_CTL) & ~(1 << 31);
-	nv_wr32(dev, NV04_PFIFO_CACHE1_DMA_CTL, tmp);
-
-	return 0;
+error:
+	if (ret)
+		priv->base.base.context_del(chan, engine);
+	return ret;
 }
 
-int
-nv40_fifo_unload_context(struct drm_device *dev)
+static int
+nv40_fifo_init(struct drm_device *dev, int engine)
 {
 	struct drm_nouveau_private *dev_priv = dev->dev_private;
-	struct nouveau_fifo_engine *pfifo = &dev_priv->engine.fifo;
-	uint32_t fc, tmp;
-	int chid;
-
-	chid = pfifo->channel_id(dev);
-	if (chid < 0 || chid >= dev_priv->engine.fifo.channels)
-		return 0;
-	fc = NV40_RAMFC(chid);
-
-	nv_wi32(dev, fc + 0, nv_rd32(dev, NV04_PFIFO_CACHE1_DMA_PUT));
-	nv_wi32(dev, fc + 4, nv_rd32(dev, NV04_PFIFO_CACHE1_DMA_GET));
-	nv_wi32(dev, fc + 8, nv_rd32(dev, NV10_PFIFO_CACHE1_REF_CNT));
-	nv_wi32(dev, fc + 12, nv_rd32(dev, NV04_PFIFO_CACHE1_DMA_INSTANCE));
-	nv_wi32(dev, fc + 16, nv_rd32(dev, NV04_PFIFO_CACHE1_DMA_DCOUNT));
-	nv_wi32(dev, fc + 20, nv_rd32(dev, NV04_PFIFO_CACHE1_DMA_STATE));
-	tmp  = nv_rd32(dev, NV04_PFIFO_CACHE1_DMA_FETCH);
-	tmp |= nv_rd32(dev, 0x2058) & 0x30000000;
-	nv_wi32(dev, fc + 24, tmp);
-	nv_wi32(dev, fc + 28, nv_rd32(dev, NV04_PFIFO_CACHE1_ENGINE));
-	nv_wi32(dev, fc + 32, nv_rd32(dev, NV04_PFIFO_CACHE1_PULL1));
-	nv_wi32(dev, fc + 36, nv_rd32(dev, NV10_PFIFO_CACHE1_ACQUIRE_VALUE));
-	tmp = nv_rd32(dev, NV10_PFIFO_CACHE1_ACQUIRE_TIMESTAMP);
-	nv_wi32(dev, fc + 40, tmp);
-	nv_wi32(dev, fc + 44, nv_rd32(dev, NV10_PFIFO_CACHE1_ACQUIRE_TIMEOUT));
-	nv_wi32(dev, fc + 48, nv_rd32(dev, NV10_PFIFO_CACHE1_SEMAPHORE));
-	/* NVIDIA read 0x3228 first, then write DMA_GET here.. maybe something
-	 * more involved depending on the value of 0x3228?
-	 */
-	nv_wi32(dev, fc + 52, nv_rd32(dev, NV04_PFIFO_CACHE1_DMA_GET));
-	nv_wi32(dev, fc + 56, nv_rd32(dev, NV40_PFIFO_GRCTX_INSTANCE));
-	nv_wi32(dev, fc + 60, nv_rd32(dev, NV04_PFIFO_DMA_TIMESLICE) & 0x1ffff);
-	/* No idea what the below is for exactly, ripped from a mmio-trace */
-	nv_wi32(dev, fc + 64, nv_rd32(dev, NV40_PFIFO_UNK32E4));
-	/* NVIDIA do this next line twice.. bug? */
-	nv_wi32(dev, fc + 68, nv_rd32(dev, 0x32e8));
-	nv_wi32(dev, fc + 76, nv_rd32(dev, 0x2088));
-	nv_wi32(dev, fc + 80, nv_rd32(dev, 0x3300));
-#if 0 /* no real idea which is PUT/GET in UNK_48.. */
-	tmp  = nv_rd32(dev, NV04_PFIFO_CACHE1_GET);
-	tmp |= (nv_rd32(dev, NV04_PFIFO_CACHE1_PUT) << 16);
-	nv_wi32(dev, fc + 72, tmp);
-#endif
-	nv_wi32(dev, fc + 84, nv_rd32(dev, 0x330c));
-
-	nv40_fifo_do_load_context(dev, pfifo->channels - 1);
-	nv_wr32(dev, NV03_PFIFO_CACHE1_PUSH1,
-		     NV40_PFIFO_CACHE1_PUSH1_DMA | (pfifo->channels - 1));
-	return 0;
-}
-
-static void
-nv40_fifo_init_reset(struct drm_device *dev)
-{
+	struct nv40_fifo_priv *priv = nv_engine(dev, engine);
 	int i;
 
-	nv_wr32(dev, NV03_PMC_ENABLE,
-		nv_rd32(dev, NV03_PMC_ENABLE) & ~NV_PMC_ENABLE_PFIFO);
-	nv_wr32(dev, NV03_PMC_ENABLE,
-		nv_rd32(dev, NV03_PMC_ENABLE) |  NV_PMC_ENABLE_PFIFO);
+	nv_mask(dev, NV03_PMC_ENABLE, NV_PMC_ENABLE_PFIFO, 0);
+	nv_mask(dev, NV03_PMC_ENABLE, NV_PMC_ENABLE_PFIFO, NV_PMC_ENABLE_PFIFO);
 
-	nv_wr32(dev, 0x003224, 0x000f0078);
-	nv_wr32(dev, 0x003210, 0x00000000);
-	nv_wr32(dev, 0x003270, 0x00000000);
-	nv_wr32(dev, 0x003240, 0x00000000);
-	nv_wr32(dev, 0x003244, 0x00000000);
-	nv_wr32(dev, 0x003258, 0x00000000);
-	nv_wr32(dev, 0x002504, 0x00000000);
-	for (i = 0; i < 16; i++)
-		nv_wr32(dev, 0x002510 + (i * 4), 0x00000000);
-	nv_wr32(dev, 0x00250c, 0x0000ffff);
-	nv_wr32(dev, 0x002048, 0x00000000);
-	nv_wr32(dev, 0x003228, 0x00000000);
-	nv_wr32(dev, 0x0032e8, 0x00000000);
-	nv_wr32(dev, 0x002410, 0x00000000);
-	nv_wr32(dev, 0x002420, 0x00000000);
-	nv_wr32(dev, 0x002058, 0x00000001);
-	nv_wr32(dev, 0x00221c, 0x00000000);
-	/* something with 0x2084, read/modify/write, no change */
 	nv_wr32(dev, 0x002040, 0x000000ff);
-	nv_wr32(dev, 0x002500, 0x00000000);
-	nv_wr32(dev, 0x003200, 0x00000000);
-
-	nv_wr32(dev, NV04_PFIFO_DMA_TIMESLICE, 0x2101ffff);
-}
-
-static void
-nv40_fifo_init_ramxx(struct drm_device *dev)
-{
-	struct drm_nouveau_private *dev_priv = dev->dev_private;
+	nv_wr32(dev, 0x002044, 0x2101ffff);
+	nv_wr32(dev, 0x002058, 0x00000001);
 
 	nv_wr32(dev, NV03_PFIFO_RAMHT, (0x03 << 24) /* search 128 */ |
 				       ((dev_priv->ramht->bits - 9) << 16) |
@@ -244,64 +152,59 @@ nv40_fifo_init_ramxx(struct drm_device *dev)
 	case 0x47:
 	case 0x49:
 	case 0x4b:
-		nv_wr32(dev, 0x2230, 1);
-		break;
-	default:
-		break;
-	}
-
-	switch (dev_priv->chipset) {
+		nv_wr32(dev, 0x002230, 0x00000001);
 	case 0x40:
 	case 0x41:
 	case 0x42:
 	case 0x43:
 	case 0x45:
-	case 0x47:
 	case 0x48:
-	case 0x49:
-	case 0x4b:
-		nv_wr32(dev, NV40_PFIFO_RAMFC, 0x30002);
+		nv_wr32(dev, 0x002220, 0x00030002);
 		break;
 	default:
-		nv_wr32(dev, 0x2230, 0);
-		nv_wr32(dev, NV40_PFIFO_RAMFC,
-			((dev_priv->vram_size - 512 * 1024 +
-			  dev_priv->ramfc->pinst) >> 16) | (3 << 16));
+		nv_wr32(dev, 0x002230, 0x00000000);
+		nv_wr32(dev, 0x002220, ((dev_priv->vram_size - 512 * 1024 +
+					 dev_priv->ramfc->pinst) >> 16) |
+				       0x00030000);
 		break;
 	}
-}
 
-static void
-nv40_fifo_init_intr(struct drm_device *dev)
-{
-	nouveau_irq_register(dev, 8, nv04_fifo_isr);
-	nv_wr32(dev, 0x002100, 0xffffffff);
-	nv_wr32(dev, 0x002140, 0xffffffff);
+	nv_wr32(dev, NV03_PFIFO_CACHE1_PUSH1, priv->base.channels);
+
+	nv_wr32(dev, NV03_PFIFO_INTR_0, 0xffffffff);
+	nv_wr32(dev, NV03_PFIFO_INTR_EN_0, 0xffffffff);
+
+	nv_wr32(dev, NV03_PFIFO_CACHE1_PUSH0, 1);
+	nv_wr32(dev, NV04_PFIFO_CACHE1_PULL0, 1);
+	nv_wr32(dev, NV03_PFIFO_CACHES, 1);
+
+	for (i = 0; i < priv->base.channels; i++) {
+		if (dev_priv->channels.ptr[i])
+			nv_mask(dev, NV04_PFIFO_MODE, (1 << i), (1 << i));
+	}
+
+	return 0;
 }
 
 int
-nv40_fifo_init(struct drm_device *dev)
+nv40_fifo_create(struct drm_device *dev)
 {
 	struct drm_nouveau_private *dev_priv = dev->dev_private;
-	struct nouveau_fifo_engine *pfifo = &dev_priv->engine.fifo;
-	int i;
-
-	nv40_fifo_init_reset(dev);
-	nv40_fifo_init_ramxx(dev);
+	struct nv40_fifo_priv *priv;
 
-	nv40_fifo_do_load_context(dev, pfifo->channels - 1);
-	nv_wr32(dev, NV03_PFIFO_CACHE1_PUSH1, pfifo->channels - 1);
-
-	nv40_fifo_init_intr(dev);
-	pfifo->enable(dev);
-	pfifo->reassign(dev, true);
+	priv = kzalloc(sizeof(*priv), GFP_KERNEL);
+	if (!priv)
+		return -ENOMEM;
 
-	for (i = 0; i < dev_priv->engine.fifo.channels; i++) {
-		if (dev_priv->channels.ptr[i]) {
-			uint32_t mode = nv_rd32(dev, NV04_PFIFO_MODE);
-			nv_wr32(dev, NV04_PFIFO_MODE, mode | (1 << i));
-		}
-	}
+	priv->base.base.destroy = nv04_fifo_destroy;
+	priv->base.base.init = nv40_fifo_init;
+	priv->base.base.fini = nv04_fifo_fini;
+	priv->base.base.context_new = nv40_fifo_context_new;
+	priv->base.base.context_del = nv04_fifo_context_del;
+	priv->base.channels = 31;
+	priv->ramfc_desc = nv40_ramfc;
+	dev_priv->eng[NVOBJ_ENGINE_FIFO] = &priv->base.base;
 
+	nouveau_irq_register(dev, 8, nv04_fifo_isr);
 	return 0;
 }

+ 7 - 30
drivers/gpu/drm/nouveau/nv40_graph.c

@@ -27,7 +27,7 @@
 #include "drmP.h"
 #include "drm.h"
 #include "nouveau_drv.h"
-#include "nouveau_grctx.h"
+#include "nouveau_fifo.h"
 #include "nouveau_ramht.h"
 
 struct nv40_graph_engine {
@@ -42,7 +42,6 @@ nv40_graph_context_new(struct nouveau_channel *chan, int engine)
 	struct drm_device *dev = chan->dev;
 	struct drm_nouveau_private *dev_priv = dev->dev_private;
 	struct nouveau_gpuobj *grctx = NULL;
-	struct nouveau_grctx ctx = {};
 	unsigned long flags;
 	int ret;
 
@@ -52,11 +51,7 @@ nv40_graph_context_new(struct nouveau_channel *chan, int engine)
 		return ret;
 
 	/* Initialise default context values */
-	ctx.dev = chan->dev;
-	ctx.mode = NOUVEAU_GRCTX_VALS;
-	ctx.data = grctx;
-	nv40_grctx_init(&ctx);
-
+	nv40_grctx_fill(dev, grctx);
 	nv_wo32(grctx, 0, grctx->vinst);
 
 	/* init grctx pointer in ramfc, and on PFIFO if channel is
@@ -184,8 +179,7 @@ nv40_graph_init(struct drm_device *dev, int engine)
 	struct nv40_graph_engine *pgraph = nv_engine(dev, engine);
 	struct drm_nouveau_private *dev_priv = dev->dev_private;
 	struct nouveau_fb_engine *pfb = &dev_priv->engine.fb;
-	struct nouveau_grctx ctx = {};
-	uint32_t vramsz, *cp;
+	uint32_t vramsz;
 	int i, j;
 
 	nv_wr32(dev, NV03_PMC_ENABLE, nv_rd32(dev, NV03_PMC_ENABLE) &
@@ -193,22 +187,8 @@ nv40_graph_init(struct drm_device *dev, int engine)
 	nv_wr32(dev, NV03_PMC_ENABLE, nv_rd32(dev, NV03_PMC_ENABLE) |
 			 NV_PMC_ENABLE_PGRAPH);
 
-	cp = kmalloc(sizeof(*cp) * 256, GFP_KERNEL);
-	if (!cp)
-		return -ENOMEM;
-
-	ctx.dev = dev;
-	ctx.mode = NOUVEAU_GRCTX_PROG;
-	ctx.data = cp;
-	ctx.ctxprog_max = 256;
-	nv40_grctx_init(&ctx);
-	pgraph->grctx_size = ctx.ctxvals_pos * 4;
-
-	nv_wr32(dev, NV40_PGRAPH_CTXCTL_UCODE_INDEX, 0);
-	for (i = 0; i < ctx.ctxprog_len; i++)
-		nv_wr32(dev, NV40_PGRAPH_CTXCTL_UCODE_DATA, cp[i]);
-
-	kfree(cp);
+	/* generate and upload context program */
+	nv40_grctx_init(dev, &pgraph->grctx_size);
 
 	/* No context present currently */
 	nv_wr32(dev, NV40_PGRAPH_CTXCTL_CUR, 0x00000000);
@@ -366,13 +346,14 @@ nv40_graph_fini(struct drm_device *dev, int engine, bool suspend)
 static int
 nv40_graph_isr_chid(struct drm_device *dev, u32 inst)
 {
+	struct nouveau_fifo_priv *pfifo = nv_engine(dev, NVOBJ_ENGINE_FIFO);
 	struct drm_nouveau_private *dev_priv = dev->dev_private;
 	struct nouveau_gpuobj *grctx;
 	unsigned long flags;
 	int i;
 
 	spin_lock_irqsave(&dev_priv->channels.lock, flags);
-	for (i = 0; i < dev_priv->engine.fifo.channels; i++) {
+	for (i = 0; i < pfifo->channels; i++) {
 		if (!dev_priv->channels.ptr[i])
 			continue;
 		grctx = dev_priv->channels.ptr[i]->engctx[NVOBJ_ENGINE_GR];
@@ -460,7 +441,6 @@ nv40_graph_create(struct drm_device *dev)
 	NVOBJ_ENGINE_ADD(dev, GR, &pgraph->base);
 	nouveau_irq_register(dev, 12, nv40_graph_isr);
 
-	NVOBJ_CLASS(dev, 0x506e, SW); /* nvsw */
 	NVOBJ_CLASS(dev, 0x0030, GR); /* null */
 	NVOBJ_CLASS(dev, 0x0039, GR); /* m2mf */
 	NVOBJ_CLASS(dev, 0x004a, GR); /* gdirect */
@@ -483,8 +463,5 @@ nv40_graph_create(struct drm_device *dev)
 	else
 		NVOBJ_CLASS(dev, 0x4097, GR);
 
-	/* nvsw */
-	NVOBJ_CLASS(dev, 0x506e, SW);
-	NVOBJ_MTHD (dev, 0x506e, 0x0500, nv04_graph_mthd_page_flip);
 	return 0;
 }

+ 30 - 2
drivers/gpu/drm/nouveau/nv40_grctx.c

@@ -595,8 +595,8 @@ nv40_graph_construct_shader(struct nouveau_grctx *ctx)
 	}
 }
 
-void
-nv40_grctx_init(struct nouveau_grctx *ctx)
+static void
+nv40_grctx_generate(struct nouveau_grctx *ctx)
 {
 	/* decide whether we're loading/unloading the context */
 	cp_bra (ctx, AUTO_SAVE, PENDING, cp_setup_save);
@@ -660,3 +660,31 @@ nv40_grctx_init(struct nouveau_grctx *ctx)
 	cp_out (ctx, CP_END);
 }
 
+void
+nv40_grctx_fill(struct drm_device *dev, struct nouveau_gpuobj *mem)
+{
+	nv40_grctx_generate(&(struct nouveau_grctx) {
+			     .dev = dev,
+			     .mode = NOUVEAU_GRCTX_VALS,
+			     .data = mem,
+			   });
+}
+
+void
+nv40_grctx_init(struct drm_device *dev, u32 *size)
+{
+	u32 ctxprog[256], i;
+	struct nouveau_grctx ctx = {
+		.dev = dev,
+		.mode = NOUVEAU_GRCTX_PROG,
+		.data = ctxprog,
+		.ctxprog_max = ARRAY_SIZE(ctxprog)
+	};
+
+	nv40_grctx_generate(&ctx);
+
+	nv_wr32(dev, NV40_PGRAPH_CTXCTL_UCODE_INDEX, 0);
+	for (i = 0; i < ctx.ctxprog_len; i++)
+		nv_wr32(dev, NV40_PGRAPH_CTXCTL_UCODE_DATA, ctxprog[i]);
+	*size = ctx.ctxvals_pos * 4;
+}

+ 1 - 0
drivers/gpu/drm/nouveau/nv40_pm.c

@@ -27,6 +27,7 @@
 #include "nouveau_bios.h"
 #include "nouveau_pm.h"
 #include "nouveau_hw.h"
+#include "nouveau_fifo.h"
 
 #define min2(a,b) ((a) < (b) ? (a) : (b))
 

+ 44 - 54
drivers/gpu/drm/nouveau/nv50_crtc.c

@@ -79,15 +79,15 @@ nv50_crtc_blank(struct nouveau_crtc *nv_crtc, bool blanked)
 			NV_ERROR(dev, "no space while blanking crtc\n");
 			return ret;
 		}
-		BEGIN_RING(evo, 0, NV50_EVO_CRTC(index, CLUT_MODE), 2);
+		BEGIN_NV04(evo, 0, NV50_EVO_CRTC(index, CLUT_MODE), 2);
 		OUT_RING(evo, NV50_EVO_CRTC_CLUT_MODE_BLANK);
 		OUT_RING(evo, 0);
 		if (dev_priv->chipset != 0x50) {
-			BEGIN_RING(evo, 0, NV84_EVO_CRTC(index, CLUT_DMA), 1);
+			BEGIN_NV04(evo, 0, NV84_EVO_CRTC(index, CLUT_DMA), 1);
 			OUT_RING(evo, NV84_EVO_CRTC_CLUT_DMA_HANDLE_NONE);
 		}
 
-		BEGIN_RING(evo, 0, NV50_EVO_CRTC(index, FB_DMA), 1);
+		BEGIN_NV04(evo, 0, NV50_EVO_CRTC(index, FB_DMA), 1);
 		OUT_RING(evo, NV50_EVO_CRTC_FB_DMA_HANDLE_NONE);
 	} else {
 		if (nv_crtc->cursor.visible)
@@ -100,20 +100,20 @@ nv50_crtc_blank(struct nouveau_crtc *nv_crtc, bool blanked)
 			NV_ERROR(dev, "no space while unblanking crtc\n");
 			return ret;
 		}
-		BEGIN_RING(evo, 0, NV50_EVO_CRTC(index, CLUT_MODE), 2);
+		BEGIN_NV04(evo, 0, NV50_EVO_CRTC(index, CLUT_MODE), 2);
 		OUT_RING(evo, nv_crtc->lut.depth == 8 ?
 				NV50_EVO_CRTC_CLUT_MODE_OFF :
 				NV50_EVO_CRTC_CLUT_MODE_ON);
 		OUT_RING(evo, nv_crtc->lut.nvbo->bo.offset >> 8);
 		if (dev_priv->chipset != 0x50) {
-			BEGIN_RING(evo, 0, NV84_EVO_CRTC(index, CLUT_DMA), 1);
+			BEGIN_NV04(evo, 0, NV84_EVO_CRTC(index, CLUT_DMA), 1);
 			OUT_RING(evo, NvEvoVRAM);
 		}
 
-		BEGIN_RING(evo, 0, NV50_EVO_CRTC(index, FB_OFFSET), 2);
+		BEGIN_NV04(evo, 0, NV50_EVO_CRTC(index, FB_OFFSET), 2);
 		OUT_RING(evo, nv_crtc->fb.offset >> 8);
 		OUT_RING(evo, 0);
-		BEGIN_RING(evo, 0, NV50_EVO_CRTC(index, FB_DMA), 1);
+		BEGIN_NV04(evo, 0, NV50_EVO_CRTC(index, FB_DMA), 1);
 		if (dev_priv->chipset != 0x50)
 			if (nv_crtc->fb.tile_flags == 0x7a00 ||
 			    nv_crtc->fb.tile_flags == 0xfe00)
@@ -158,10 +158,10 @@ nv50_crtc_set_dither(struct nouveau_crtc *nv_crtc, bool update)
 
 	ret = RING_SPACE(evo, 2 + (update ? 2 : 0));
 	if (ret == 0) {
-		BEGIN_RING(evo, 0, NV50_EVO_CRTC(head, DITHER_CTRL), 1);
+		BEGIN_NV04(evo, 0, NV50_EVO_CRTC(head, DITHER_CTRL), 1);
 		OUT_RING  (evo, mode);
 		if (update) {
-			BEGIN_RING(evo, 0, NV50_EVO_UPDATE, 1);
+			BEGIN_NV04(evo, 0, NV50_EVO_UPDATE, 1);
 			OUT_RING  (evo, 0);
 			FIRE_RING (evo);
 		}
@@ -193,11 +193,11 @@ nv50_crtc_set_color_vibrance(struct nouveau_crtc *nv_crtc, bool update)
 
 	hue = ((nv_crtc->vibrant_hue * 2047) / 100) & 0xfff;
 
-	BEGIN_RING(evo, 0, NV50_EVO_CRTC(nv_crtc->index, COLOR_CTRL), 1);
+	BEGIN_NV04(evo, 0, NV50_EVO_CRTC(nv_crtc->index, COLOR_CTRL), 1);
 	OUT_RING  (evo, (hue << 20) | (vib << 8));
 
 	if (update) {
-		BEGIN_RING(evo, 0, NV50_EVO_UPDATE, 1);
+		BEGIN_NV04(evo, 0, NV50_EVO_UPDATE, 1);
 		OUT_RING  (evo, 0);
 		FIRE_RING (evo);
 	}
@@ -311,9 +311,9 @@ nv50_crtc_set_scale(struct nouveau_crtc *nv_crtc, bool update)
 	if (ret)
 		return ret;
 
-	BEGIN_RING(evo, 0, NV50_EVO_CRTC(nv_crtc->index, SCALE_CTRL), 1);
+	BEGIN_NV04(evo, 0, NV50_EVO_CRTC(nv_crtc->index, SCALE_CTRL), 1);
 	OUT_RING  (evo, ctrl);
-	BEGIN_RING(evo, 0, NV50_EVO_CRTC(nv_crtc->index, SCALE_RES1), 2);
+	BEGIN_NV04(evo, 0, NV50_EVO_CRTC(nv_crtc->index, SCALE_RES1), 2);
 	OUT_RING  (evo, oY << 16 | oX);
 	OUT_RING  (evo, oY << 16 | oX);
 
@@ -383,23 +383,15 @@ nv50_crtc_set_clock(struct drm_device *dev, int head, int pclk)
 static void
 nv50_crtc_destroy(struct drm_crtc *crtc)
 {
-	struct drm_device *dev;
-	struct nouveau_crtc *nv_crtc;
-
-	if (!crtc)
-		return;
-
-	dev = crtc->dev;
-	nv_crtc = nouveau_crtc(crtc);
-
-	NV_DEBUG_KMS(dev, "\n");
+	struct nouveau_crtc *nv_crtc = nouveau_crtc(crtc);
 
-	drm_crtc_cleanup(&nv_crtc->base);
+	NV_DEBUG_KMS(crtc->dev, "\n");
 
 	nouveau_bo_unmap(nv_crtc->lut.nvbo);
 	nouveau_bo_ref(NULL, &nv_crtc->lut.nvbo);
 	nouveau_bo_unmap(nv_crtc->cursor.nvbo);
 	nouveau_bo_ref(NULL, &nv_crtc->cursor.nvbo);
+	drm_crtc_cleanup(&nv_crtc->base);
 	kfree(nv_crtc);
 }
 
@@ -593,7 +585,7 @@ nv50_crtc_do_mode_set_base(struct drm_crtc *crtc,
 		if (ret)
 			return ret;
 
-		BEGIN_RING(evo, 0, NV50_EVO_CRTC(nv_crtc->index, FB_DMA), 1);
+		BEGIN_NV04(evo, 0, NV50_EVO_CRTC(nv_crtc->index, FB_DMA), 1);
 		OUT_RING  (evo, fb->r_dma);
 	}
 
@@ -601,18 +593,18 @@ nv50_crtc_do_mode_set_base(struct drm_crtc *crtc,
 	if (ret)
 		return ret;
 
-	BEGIN_RING(evo, 0, NV50_EVO_CRTC(nv_crtc->index, FB_OFFSET), 5);
+	BEGIN_NV04(evo, 0, NV50_EVO_CRTC(nv_crtc->index, FB_OFFSET), 5);
 	OUT_RING  (evo, nv_crtc->fb.offset >> 8);
 	OUT_RING  (evo, 0);
 	OUT_RING  (evo, (drm_fb->height << 16) | drm_fb->width);
 	OUT_RING  (evo, fb->r_pitch);
 	OUT_RING  (evo, fb->r_format);
 
-	BEGIN_RING(evo, 0, NV50_EVO_CRTC(nv_crtc->index, CLUT_MODE), 1);
+	BEGIN_NV04(evo, 0, NV50_EVO_CRTC(nv_crtc->index, CLUT_MODE), 1);
 	OUT_RING  (evo, fb->base.depth == 8 ?
 		   NV50_EVO_CRTC_CLUT_MODE_OFF : NV50_EVO_CRTC_CLUT_MODE_ON);
 
-	BEGIN_RING(evo, 0, NV50_EVO_CRTC(nv_crtc->index, FB_POS), 1);
+	BEGIN_NV04(evo, 0, NV50_EVO_CRTC(nv_crtc->index, FB_POS), 1);
 	OUT_RING  (evo, (y << 16) | x);
 
 	if (nv_crtc->lut.depth != fb->base.depth) {
@@ -672,23 +664,23 @@ nv50_crtc_mode_set(struct drm_crtc *crtc, struct drm_display_mode *umode,
 
 	ret = RING_SPACE(evo, 18);
 	if (ret == 0) {
-		BEGIN_RING(evo, 0, 0x0804 + head, 2);
+		BEGIN_NV04(evo, 0, 0x0804 + head, 2);
 		OUT_RING  (evo, 0x00800000 | mode->clock);
 		OUT_RING  (evo, (ilace == 2) ? 2 : 0);
-		BEGIN_RING(evo, 0, 0x0810 + head, 6);
+		BEGIN_NV04(evo, 0, 0x0810 + head, 6);
 		OUT_RING  (evo, 0x00000000); /* border colour */
 		OUT_RING  (evo, (vactive << 16) | hactive);
 		OUT_RING  (evo, ( vsynce << 16) | hsynce);
 		OUT_RING  (evo, (vblanke << 16) | hblanke);
 		OUT_RING  (evo, (vblanks << 16) | hblanks);
 		OUT_RING  (evo, (vblan2e << 16) | vblan2s);
-		BEGIN_RING(evo, 0, 0x082c + head, 1);
+		BEGIN_NV04(evo, 0, 0x082c + head, 1);
 		OUT_RING  (evo, 0x00000000);
-		BEGIN_RING(evo, 0, 0x0900 + head, 1);
+		BEGIN_NV04(evo, 0, 0x0900 + head, 1);
 		OUT_RING  (evo, 0x00000311); /* makes sync channel work */
-		BEGIN_RING(evo, 0, 0x08c8 + head, 1);
+		BEGIN_NV04(evo, 0, 0x08c8 + head, 1);
 		OUT_RING  (evo, (umode->vdisplay << 16) | umode->hdisplay);
-		BEGIN_RING(evo, 0, 0x08d4 + head, 1);
+		BEGIN_NV04(evo, 0, 0x08d4 + head, 1);
 		OUT_RING  (evo, 0x00000000); /* screen position */
 	}
 
@@ -755,18 +747,22 @@ nv50_crtc_create(struct drm_device *dev, int index)
 	if (!nv_crtc)
 		return -ENOMEM;
 
+	nv_crtc->index = index;
+	nv_crtc->set_dither = nv50_crtc_set_dither;
+	nv_crtc->set_scale = nv50_crtc_set_scale;
+	nv_crtc->set_color_vibrance = nv50_crtc_set_color_vibrance;
 	nv_crtc->color_vibrance = 50;
 	nv_crtc->vibrant_hue = 0;
-
-	/* Default CLUT parameters, will be activated on the hw upon
-	 * first mode set.
-	 */
+	nv_crtc->lut.depth = 0;
 	for (i = 0; i < 256; i++) {
 		nv_crtc->lut.r[i] = i << 8;
 		nv_crtc->lut.g[i] = i << 8;
 		nv_crtc->lut.b[i] = i << 8;
 	}
-	nv_crtc->lut.depth = 0;
+
+	drm_crtc_init(dev, &nv_crtc->base, &nv50_crtc_funcs);
+	drm_crtc_helper_add(&nv_crtc->base, &nv50_crtc_helper_funcs);
+	drm_mode_crtc_set_gamma_size(&nv_crtc->base, 256);
 
 	ret = nouveau_bo_new(dev, 4096, 0x100, TTM_PL_FLAG_VRAM,
 			     0, 0x0000, NULL, &nv_crtc->lut.nvbo);
@@ -778,21 +774,9 @@ nv50_crtc_create(struct drm_device *dev, int index)
 			nouveau_bo_ref(NULL, &nv_crtc->lut.nvbo);
 	}
 
-	if (ret) {
-		kfree(nv_crtc);
-		return ret;
-	}
-
-	nv_crtc->index = index;
+	if (ret)
+		goto out;
 
-	/* set function pointers */
-	nv_crtc->set_dither = nv50_crtc_set_dither;
-	nv_crtc->set_scale = nv50_crtc_set_scale;
-	nv_crtc->set_color_vibrance = nv50_crtc_set_color_vibrance;
-
-	drm_crtc_init(dev, &nv_crtc->base, &nv50_crtc_funcs);
-	drm_crtc_helper_add(&nv_crtc->base, &nv50_crtc_helper_funcs);
-	drm_mode_crtc_set_gamma_size(&nv_crtc->base, 256);
 
 	ret = nouveau_bo_new(dev, 64*64*4, 0x100, TTM_PL_FLAG_VRAM,
 			     0, 0x0000, NULL, &nv_crtc->cursor.nvbo);
@@ -804,6 +788,12 @@ nv50_crtc_create(struct drm_device *dev, int index)
 			nouveau_bo_ref(NULL, &nv_crtc->cursor.nvbo);
 	}
 
+	if (ret)
+		goto out;
+
 	nv50_cursor_init(nv_crtc);
-	return 0;
+out:
+	if (ret)
+		nv50_crtc_destroy(&nv_crtc->base);
+	return ret;
 }

+ 6 - 6
drivers/gpu/drm/nouveau/nv50_cursor.c

@@ -53,15 +53,15 @@ nv50_cursor_show(struct nouveau_crtc *nv_crtc, bool update)
 	}
 
 	if (dev_priv->chipset != 0x50) {
-		BEGIN_RING(evo, 0, NV84_EVO_CRTC(nv_crtc->index, CURSOR_DMA), 1);
+		BEGIN_NV04(evo, 0, NV84_EVO_CRTC(nv_crtc->index, CURSOR_DMA), 1);
 		OUT_RING(evo, NvEvoVRAM);
 	}
-	BEGIN_RING(evo, 0, NV50_EVO_CRTC(nv_crtc->index, CURSOR_CTRL), 2);
+	BEGIN_NV04(evo, 0, NV50_EVO_CRTC(nv_crtc->index, CURSOR_CTRL), 2);
 	OUT_RING(evo, NV50_EVO_CRTC_CURSOR_CTRL_SHOW);
 	OUT_RING(evo, nv_crtc->cursor.offset >> 8);
 
 	if (update) {
-		BEGIN_RING(evo, 0, NV50_EVO_UPDATE, 1);
+		BEGIN_NV04(evo, 0, NV50_EVO_UPDATE, 1);
 		OUT_RING(evo, 0);
 		FIRE_RING(evo);
 		nv_crtc->cursor.visible = true;
@@ -86,16 +86,16 @@ nv50_cursor_hide(struct nouveau_crtc *nv_crtc, bool update)
 		NV_ERROR(dev, "no space while hiding cursor\n");
 		return;
 	}
-	BEGIN_RING(evo, 0, NV50_EVO_CRTC(nv_crtc->index, CURSOR_CTRL), 2);
+	BEGIN_NV04(evo, 0, NV50_EVO_CRTC(nv_crtc->index, CURSOR_CTRL), 2);
 	OUT_RING(evo, NV50_EVO_CRTC_CURSOR_CTRL_HIDE);
 	OUT_RING(evo, 0);
 	if (dev_priv->chipset != 0x50) {
-		BEGIN_RING(evo, 0, NV84_EVO_CRTC(nv_crtc->index, CURSOR_DMA), 1);
+		BEGIN_NV04(evo, 0, NV84_EVO_CRTC(nv_crtc->index, CURSOR_DMA), 1);
 		OUT_RING(evo, NV84_EVO_CRTC_CURSOR_DMA_HANDLE_NONE);
 	}
 
 	if (update) {
-		BEGIN_RING(evo, 0, NV50_EVO_UPDATE, 1);
+		BEGIN_NV04(evo, 0, NV50_EVO_UPDATE, 1);
 		OUT_RING(evo, 0);
 		FIRE_RING(evo);
 		nv_crtc->cursor.visible = false;

+ 3 - 3
drivers/gpu/drm/nouveau/nv50_dac.c

@@ -55,9 +55,9 @@ nv50_dac_disconnect(struct drm_encoder *encoder)
 		NV_ERROR(dev, "no space while disconnecting DAC\n");
 		return;
 	}
-	BEGIN_RING(evo, 0, NV50_EVO_DAC(nv_encoder->or, MODE_CTRL), 1);
+	BEGIN_NV04(evo, 0, NV50_EVO_DAC(nv_encoder->or, MODE_CTRL), 1);
 	OUT_RING  (evo, 0);
-	BEGIN_RING(evo, 0, NV50_EVO_UPDATE, 1);
+	BEGIN_NV04(evo, 0, NV50_EVO_UPDATE, 1);
 	OUT_RING  (evo, 0);
 
 	nv_encoder->crtc = NULL;
@@ -240,7 +240,7 @@ nv50_dac_mode_set(struct drm_encoder *encoder, struct drm_display_mode *mode,
 		NV_ERROR(dev, "no space while connecting DAC\n");
 		return;
 	}
-	BEGIN_RING(evo, 0, NV50_EVO_DAC(nv_encoder->or, MODE_CTRL), 2);
+	BEGIN_NV04(evo, 0, NV50_EVO_DAC(nv_encoder->or, MODE_CTRL), 2);
 	OUT_RING(evo, mode_ctl);
 	OUT_RING(evo, mode_ctl2);
 

+ 33 - 42
drivers/gpu/drm/nouveau/nv50_display.c

@@ -32,6 +32,7 @@
 #include "nouveau_fb.h"
 #include "nouveau_fbcon.h"
 #include "nouveau_ramht.h"
+#include "nouveau_software.h"
 #include "drm_crtc_helper.h"
 
 static void nv50_display_isr(struct drm_device *);
@@ -140,11 +141,11 @@ nv50_display_sync(struct drm_device *dev)
 
 	ret = RING_SPACE(evo, 6);
 	if (ret == 0) {
-		BEGIN_RING(evo, 0, 0x0084, 1);
+		BEGIN_NV04(evo, 0, 0x0084, 1);
 		OUT_RING  (evo, 0x80000000);
-		BEGIN_RING(evo, 0, 0x0080, 1);
+		BEGIN_NV04(evo, 0, 0x0080, 1);
 		OUT_RING  (evo, 0);
-		BEGIN_RING(evo, 0, 0x0084, 1);
+		BEGIN_NV04(evo, 0, 0x0084, 1);
 		OUT_RING  (evo, 0x00000000);
 
 		nv_wo32(disp->ntfy, 0x000, 0x00000000);
@@ -267,7 +268,7 @@ nv50_display_init(struct drm_device *dev)
 	ret = RING_SPACE(evo, 3);
 	if (ret)
 		return ret;
-	BEGIN_RING(evo, 0, NV50_EVO_UNK84, 2);
+	BEGIN_NV04(evo, 0, NV50_EVO_UNK84, 2);
 	OUT_RING  (evo, NV50_EVO_UNK84_NOTIFY_DISABLED);
 	OUT_RING  (evo, NvEvoSync);
 
@@ -292,7 +293,7 @@ nv50_display_fini(struct drm_device *dev)
 
 	ret = RING_SPACE(evo, 2);
 	if (ret == 0) {
-		BEGIN_RING(evo, 0, NV50_EVO_UPDATE, 1);
+		BEGIN_NV04(evo, 0, NV50_EVO_UPDATE, 1);
 		OUT_RING(evo, 0);
 	}
 	FIRE_RING(evo);
@@ -358,8 +359,11 @@ nv50_display_create(struct drm_device *dev)
 	dev_priv->engine.display.priv = priv;
 
 	/* Create CRTC objects */
-	for (i = 0; i < 2; i++)
-		nv50_crtc_create(dev, i);
+	for (i = 0; i < 2; i++) {
+		ret = nv50_crtc_create(dev, i);
+		if (ret)
+			return ret;
+	}
 
 	/* We setup the encoders from the BIOS table */
 	for (i = 0 ; i < dcb->entries; i++) {
@@ -438,13 +442,13 @@ nv50_display_flip_stop(struct drm_crtc *crtc)
 		return;
 	}
 
-	BEGIN_RING(evo, 0, 0x0084, 1);
+	BEGIN_NV04(evo, 0, 0x0084, 1);
 	OUT_RING  (evo, 0x00000000);
-	BEGIN_RING(evo, 0, 0x0094, 1);
+	BEGIN_NV04(evo, 0, 0x0094, 1);
 	OUT_RING  (evo, 0x00000000);
-	BEGIN_RING(evo, 0, 0x00c0, 1);
+	BEGIN_NV04(evo, 0, 0x00c0, 1);
 	OUT_RING  (evo, 0x00000000);
-	BEGIN_RING(evo, 0, 0x0080, 1);
+	BEGIN_NV04(evo, 0, 0x0080, 1);
 	OUT_RING  (evo, 0x00000000);
 	FIRE_RING (evo);
 }
@@ -474,28 +478,28 @@ nv50_display_flip_next(struct drm_crtc *crtc, struct drm_framebuffer *fb,
 		}
 
 		if (dev_priv->chipset < 0xc0) {
-			BEGIN_RING(chan, 0, 0x0060, 2);
+			BEGIN_NV04(chan, 0, 0x0060, 2);
 			OUT_RING  (chan, NvEvoSema0 + nv_crtc->index);
 			OUT_RING  (chan, dispc->sem.offset);
-			BEGIN_RING(chan, 0, 0x006c, 1);
+			BEGIN_NV04(chan, 0, 0x006c, 1);
 			OUT_RING  (chan, 0xf00d0000 | dispc->sem.value);
-			BEGIN_RING(chan, 0, 0x0064, 2);
+			BEGIN_NV04(chan, 0, 0x0064, 2);
 			OUT_RING  (chan, dispc->sem.offset ^ 0x10);
 			OUT_RING  (chan, 0x74b1e000);
-			BEGIN_RING(chan, 0, 0x0060, 1);
+			BEGIN_NV04(chan, 0, 0x0060, 1);
 			if (dev_priv->chipset < 0x84)
 				OUT_RING  (chan, NvSema);
 			else
 				OUT_RING  (chan, chan->vram_handle);
 		} else {
-			u64 offset = chan->dispc_vma[nv_crtc->index].offset;
+			u64 offset = nvc0_software_crtc(chan, nv_crtc->index);
 			offset += dispc->sem.offset;
-			BEGIN_NVC0(chan, 2, 0, 0x0010, 4);
+			BEGIN_NVC0(chan, 0, 0x0010, 4);
 			OUT_RING  (chan, upper_32_bits(offset));
 			OUT_RING  (chan, lower_32_bits(offset));
 			OUT_RING  (chan, 0xf00d0000 | dispc->sem.value);
 			OUT_RING  (chan, 0x1002);
-			BEGIN_NVC0(chan, 2, 0, 0x0010, 4);
+			BEGIN_NVC0(chan, 0, 0x0010, 4);
 			OUT_RING  (chan, upper_32_bits(offset));
 			OUT_RING  (chan, lower_32_bits(offset ^ 0x10));
 			OUT_RING  (chan, 0x74b1e000);
@@ -508,40 +512,40 @@ nv50_display_flip_next(struct drm_crtc *crtc, struct drm_framebuffer *fb,
 	}
 
 	/* queue the flip on the crtc's "display sync" channel */
-	BEGIN_RING(evo, 0, 0x0100, 1);
+	BEGIN_NV04(evo, 0, 0x0100, 1);
 	OUT_RING  (evo, 0xfffe0000);
 	if (chan) {
-		BEGIN_RING(evo, 0, 0x0084, 1);
+		BEGIN_NV04(evo, 0, 0x0084, 1);
 		OUT_RING  (evo, 0x00000100);
 	} else {
-		BEGIN_RING(evo, 0, 0x0084, 1);
+		BEGIN_NV04(evo, 0, 0x0084, 1);
 		OUT_RING  (evo, 0x00000010);
 		/* allows gamma somehow, PDISP will bitch at you if
 		 * you don't wait for vblank before changing this..
 		 */
-		BEGIN_RING(evo, 0, 0x00e0, 1);
+		BEGIN_NV04(evo, 0, 0x00e0, 1);
 		OUT_RING  (evo, 0x40000000);
 	}
-	BEGIN_RING(evo, 0, 0x0088, 4);
+	BEGIN_NV04(evo, 0, 0x0088, 4);
 	OUT_RING  (evo, dispc->sem.offset);
 	OUT_RING  (evo, 0xf00d0000 | dispc->sem.value);
 	OUT_RING  (evo, 0x74b1e000);
 	OUT_RING  (evo, NvEvoSync);
-	BEGIN_RING(evo, 0, 0x00a0, 2);
+	BEGIN_NV04(evo, 0, 0x00a0, 2);
 	OUT_RING  (evo, 0x00000000);
 	OUT_RING  (evo, 0x00000000);
-	BEGIN_RING(evo, 0, 0x00c0, 1);
+	BEGIN_NV04(evo, 0, 0x00c0, 1);
 	OUT_RING  (evo, nv_fb->r_dma);
-	BEGIN_RING(evo, 0, 0x0110, 2);
+	BEGIN_NV04(evo, 0, 0x0110, 2);
 	OUT_RING  (evo, 0x00000000);
 	OUT_RING  (evo, 0x00000000);
-	BEGIN_RING(evo, 0, 0x0800, 5);
+	BEGIN_NV04(evo, 0, 0x0800, 5);
 	OUT_RING  (evo, nv_fb->nvbo->bo.offset >> 8);
 	OUT_RING  (evo, 0);
 	OUT_RING  (evo, (fb->height << 16) | fb->width);
 	OUT_RING  (evo, nv_fb->r_pitch);
 	OUT_RING  (evo, nv_fb->r_format);
-	BEGIN_RING(evo, 0, 0x0080, 1);
+	BEGIN_NV04(evo, 0, 0x0080, 1);
 	OUT_RING  (evo, 0x00000000);
 	FIRE_RING (evo);
 
@@ -642,20 +646,7 @@ nv50_display_script_select(struct drm_device *dev, struct dcb_entry *dcb,
 static void
 nv50_display_vblank_crtc_handler(struct drm_device *dev, int crtc)
 {
-	struct drm_nouveau_private *dev_priv = dev->dev_private;
-	struct nouveau_channel *chan, *tmp;
-
-	list_for_each_entry_safe(chan, tmp, &dev_priv->vbl_waiting,
-				 nvsw.vbl_wait) {
-		if (chan->nvsw.vblsem_head != crtc)
-			continue;
-
-		nouveau_bo_wr32(chan->notifier_bo, chan->nvsw.vblsem_offset,
-						chan->nvsw.vblsem_rval);
-		list_del(&chan->nvsw.vbl_wait);
-		drm_vblank_put(dev, crtc);
-	}
-
+	nouveau_software_vblank(dev, crtc);
 	drm_handle_vblank(dev, crtc);
 }
 

+ 1 - 0
drivers/gpu/drm/nouveau/nv50_display.h

@@ -33,6 +33,7 @@
 #include "nouveau_dma.h"
 #include "nouveau_reg.h"
 #include "nouveau_crtc.h"
+#include "nouveau_software.h"
 #include "nv50_evo.h"
 
 struct nv50_display_crtc {

+ 3 - 1
drivers/gpu/drm/nouveau/nv50_fb.c

@@ -2,6 +2,7 @@
 #include "drm.h"
 #include "nouveau_drv.h"
 #include "nouveau_drm.h"
+#include "nouveau_fifo.h"
 
 struct nv50_fb_priv {
 	struct page *r100c08_page;
@@ -212,6 +213,7 @@ static struct nouveau_enum vm_fault[] = {
 void
 nv50_fb_vm_trap(struct drm_device *dev, int display)
 {
+	struct nouveau_fifo_priv *pfifo = nv_engine(dev, NVOBJ_ENGINE_FIFO);
 	struct drm_nouveau_private *dev_priv = dev->dev_private;
 	const struct nouveau_enum *en, *cl;
 	unsigned long flags;
@@ -236,7 +238,7 @@ nv50_fb_vm_trap(struct drm_device *dev, int display)
 	/* lookup channel id */
 	chinst = (trap[2] << 16) | trap[1];
 	spin_lock_irqsave(&dev_priv->channels.lock, flags);
-	for (ch = 0; ch < dev_priv->engine.fifo.channels; ch++) {
+	for (ch = 0; ch < pfifo->channels; ch++) {
 		struct nouveau_channel *chan = dev_priv->channels.ptr[ch];
 
 		if (!chan || !chan->ramin)

+ 29 - 30
drivers/gpu/drm/nouveau/nv50_fbcon.c

@@ -43,22 +43,22 @@ nv50_fbcon_fillrect(struct fb_info *info, const struct fb_fillrect *rect)
 		return ret;
 
 	if (rect->rop != ROP_COPY) {
-		BEGIN_RING(chan, NvSub2D, 0x02ac, 1);
+		BEGIN_NV04(chan, NvSub2D, 0x02ac, 1);
 		OUT_RING(chan, 1);
 	}
-	BEGIN_RING(chan, NvSub2D, 0x0588, 1);
+	BEGIN_NV04(chan, NvSub2D, 0x0588, 1);
 	if (info->fix.visual == FB_VISUAL_TRUECOLOR ||
 	    info->fix.visual == FB_VISUAL_DIRECTCOLOR)
 		OUT_RING(chan, ((uint32_t *)info->pseudo_palette)[rect->color]);
 	else
 		OUT_RING(chan, rect->color);
-	BEGIN_RING(chan, NvSub2D, 0x0600, 4);
+	BEGIN_NV04(chan, NvSub2D, 0x0600, 4);
 	OUT_RING(chan, rect->dx);
 	OUT_RING(chan, rect->dy);
 	OUT_RING(chan, rect->dx + rect->width);
 	OUT_RING(chan, rect->dy + rect->height);
 	if (rect->rop != ROP_COPY) {
-		BEGIN_RING(chan, NvSub2D, 0x02ac, 1);
+		BEGIN_NV04(chan, NvSub2D, 0x02ac, 1);
 		OUT_RING(chan, 3);
 	}
 	FIRE_RING(chan);
@@ -78,14 +78,14 @@ nv50_fbcon_copyarea(struct fb_info *info, const struct fb_copyarea *region)
 	if (ret)
 		return ret;
 
-	BEGIN_RING(chan, NvSub2D, 0x0110, 1);
+	BEGIN_NV04(chan, NvSub2D, 0x0110, 1);
 	OUT_RING(chan, 0);
-	BEGIN_RING(chan, NvSub2D, 0x08b0, 4);
+	BEGIN_NV04(chan, NvSub2D, 0x08b0, 4);
 	OUT_RING(chan, region->dx);
 	OUT_RING(chan, region->dy);
 	OUT_RING(chan, region->width);
 	OUT_RING(chan, region->height);
-	BEGIN_RING(chan, NvSub2D, 0x08d0, 4);
+	BEGIN_NV04(chan, NvSub2D, 0x08d0, 4);
 	OUT_RING(chan, 0);
 	OUT_RING(chan, region->sx);
 	OUT_RING(chan, 0);
@@ -116,7 +116,7 @@ nv50_fbcon_imageblit(struct fb_info *info, const struct fb_image *image)
 	width = ALIGN(image->width, 32);
 	dwords = (width * image->height) >> 5;
 
-	BEGIN_RING(chan, NvSub2D, 0x0814, 2);
+	BEGIN_NV04(chan, NvSub2D, 0x0814, 2);
 	if (info->fix.visual == FB_VISUAL_TRUECOLOR ||
 	    info->fix.visual == FB_VISUAL_DIRECTCOLOR) {
 		OUT_RING(chan, palette[image->bg_color] | mask);
@@ -125,10 +125,10 @@ nv50_fbcon_imageblit(struct fb_info *info, const struct fb_image *image)
 		OUT_RING(chan, image->bg_color);
 		OUT_RING(chan, image->fg_color);
 	}
-	BEGIN_RING(chan, NvSub2D, 0x0838, 2);
+	BEGIN_NV04(chan, NvSub2D, 0x0838, 2);
 	OUT_RING(chan, image->width);
 	OUT_RING(chan, image->height);
-	BEGIN_RING(chan, NvSub2D, 0x0850, 4);
+	BEGIN_NV04(chan, NvSub2D, 0x0850, 4);
 	OUT_RING(chan, 0);
 	OUT_RING(chan, image->dx);
 	OUT_RING(chan, 0);
@@ -143,7 +143,7 @@ nv50_fbcon_imageblit(struct fb_info *info, const struct fb_image *image)
 
 		dwords -= push;
 
-		BEGIN_RING(chan, NvSub2D, 0x40000860, push);
+		BEGIN_NI04(chan, NvSub2D, 0x0860, push);
 		OUT_RINGp(chan, data, push);
 		data += push;
 	}
@@ -199,60 +199,59 @@ nv50_fbcon_accel_init(struct fb_info *info)
 		return ret;
 	}
 
-	BEGIN_RING(chan, NvSub2D, 0x0000, 1);
+	BEGIN_NV04(chan, NvSub2D, 0x0000, 1);
 	OUT_RING(chan, Nv2D);
-	BEGIN_RING(chan, NvSub2D, 0x0180, 4);
-	OUT_RING(chan, NvNotify0);
+	BEGIN_NV04(chan, NvSub2D, 0x0184, 3);
 	OUT_RING(chan, chan->vram_handle);
 	OUT_RING(chan, chan->vram_handle);
 	OUT_RING(chan, chan->vram_handle);
-	BEGIN_RING(chan, NvSub2D, 0x0290, 1);
+	BEGIN_NV04(chan, NvSub2D, 0x0290, 1);
 	OUT_RING(chan, 0);
-	BEGIN_RING(chan, NvSub2D, 0x0888, 1);
+	BEGIN_NV04(chan, NvSub2D, 0x0888, 1);
 	OUT_RING(chan, 1);
-	BEGIN_RING(chan, NvSub2D, 0x02ac, 1);
+	BEGIN_NV04(chan, NvSub2D, 0x02ac, 1);
 	OUT_RING(chan, 3);
-	BEGIN_RING(chan, NvSub2D, 0x02a0, 1);
+	BEGIN_NV04(chan, NvSub2D, 0x02a0, 1);
 	OUT_RING(chan, 0x55);
-	BEGIN_RING(chan, NvSub2D, 0x08c0, 4);
+	BEGIN_NV04(chan, NvSub2D, 0x08c0, 4);
 	OUT_RING(chan, 0);
 	OUT_RING(chan, 1);
 	OUT_RING(chan, 0);
 	OUT_RING(chan, 1);
-	BEGIN_RING(chan, NvSub2D, 0x0580, 2);
+	BEGIN_NV04(chan, NvSub2D, 0x0580, 2);
 	OUT_RING(chan, 4);
 	OUT_RING(chan, format);
-	BEGIN_RING(chan, NvSub2D, 0x02e8, 2);
+	BEGIN_NV04(chan, NvSub2D, 0x02e8, 2);
 	OUT_RING(chan, 2);
 	OUT_RING(chan, 1);
-	BEGIN_RING(chan, NvSub2D, 0x0804, 1);
+	BEGIN_NV04(chan, NvSub2D, 0x0804, 1);
 	OUT_RING(chan, format);
-	BEGIN_RING(chan, NvSub2D, 0x0800, 1);
+	BEGIN_NV04(chan, NvSub2D, 0x0800, 1);
 	OUT_RING(chan, 1);
-	BEGIN_RING(chan, NvSub2D, 0x0808, 3);
+	BEGIN_NV04(chan, NvSub2D, 0x0808, 3);
 	OUT_RING(chan, 0);
 	OUT_RING(chan, 0);
 	OUT_RING(chan, 1);
-	BEGIN_RING(chan, NvSub2D, 0x081c, 1);
+	BEGIN_NV04(chan, NvSub2D, 0x081c, 1);
 	OUT_RING(chan, 1);
-	BEGIN_RING(chan, NvSub2D, 0x0840, 4);
+	BEGIN_NV04(chan, NvSub2D, 0x0840, 4);
 	OUT_RING(chan, 0);
 	OUT_RING(chan, 1);
 	OUT_RING(chan, 0);
 	OUT_RING(chan, 1);
-	BEGIN_RING(chan, NvSub2D, 0x0200, 2);
+	BEGIN_NV04(chan, NvSub2D, 0x0200, 2);
 	OUT_RING(chan, format);
 	OUT_RING(chan, 1);
-	BEGIN_RING(chan, NvSub2D, 0x0214, 5);
+	BEGIN_NV04(chan, NvSub2D, 0x0214, 5);
 	OUT_RING(chan, info->fix.line_length);
 	OUT_RING(chan, info->var.xres_virtual);
 	OUT_RING(chan, info->var.yres_virtual);
 	OUT_RING(chan, upper_32_bits(fb->vma.offset));
 	OUT_RING(chan, lower_32_bits(fb->vma.offset));
-	BEGIN_RING(chan, NvSub2D, 0x0230, 2);
+	BEGIN_NV04(chan, NvSub2D, 0x0230, 2);
 	OUT_RING(chan, format);
 	OUT_RING(chan, 1);
-	BEGIN_RING(chan, NvSub2D, 0x0244, 5);
+	BEGIN_NV04(chan, NvSub2D, 0x0244, 5);
 	OUT_RING(chan, info->fix.line_length);
 	OUT_RING(chan, info->var.xres_virtual);
 	OUT_RING(chan, info->var.yres_virtual);

+ 192 - 404
drivers/gpu/drm/nouveau/nv50_fifo.c

@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2007 Ben Skeggs.
+ * Copyright (C) 2012 Ben Skeggs.
  * All Rights Reserved.
  *
  * Permission is hereby granted, free of charge, to any person obtaining
@@ -27,480 +27,268 @@
 #include "drmP.h"
 #include "drm.h"
 #include "nouveau_drv.h"
+#include "nouveau_fifo.h"
 #include "nouveau_ramht.h"
 #include "nouveau_vm.h"
 
-static void
+struct nv50_fifo_priv {
+	struct nouveau_fifo_priv base;
+	struct nouveau_gpuobj *playlist[2];
+	int cur_playlist;
+};
+
+struct nv50_fifo_chan {
+	struct nouveau_fifo_chan base;
+};
+
+void
 nv50_fifo_playlist_update(struct drm_device *dev)
 {
+	struct nv50_fifo_priv *priv = nv_engine(dev, NVOBJ_ENGINE_FIFO);
 	struct drm_nouveau_private *dev_priv = dev->dev_private;
-	struct nouveau_fifo_engine *pfifo = &dev_priv->engine.fifo;
 	struct nouveau_gpuobj *cur;
-	int i, nr;
-
-	NV_DEBUG(dev, "\n");
+	int i, p;
 
-	cur = pfifo->playlist[pfifo->cur_playlist];
-	pfifo->cur_playlist = !pfifo->cur_playlist;
+	cur = priv->playlist[priv->cur_playlist];
+	priv->cur_playlist = !priv->cur_playlist;
 
-	/* We never schedule channel 0 or 127 */
-	for (i = 1, nr = 0; i < 127; i++) {
-		if (dev_priv->channels.ptr[i] &&
-		    dev_priv->channels.ptr[i]->ramfc) {
-			nv_wo32(cur, (nr * 4), i);
-			nr++;
-		}
+	for (i = 0, p = 0; i < priv->base.channels; i++) {
+		if (nv_rd32(dev, 0x002600 + (i * 4)) & 0x80000000)
+			nv_wo32(cur, p++ * 4, i);
 	}
-	dev_priv->engine.instmem.flush(dev);
-
-	nv_wr32(dev, 0x32f4, cur->vinst >> 12);
-	nv_wr32(dev, 0x32ec, nr);
-	nv_wr32(dev, 0x2500, 0x101);
-}
 
-static void
-nv50_fifo_channel_enable(struct drm_device *dev, int channel)
-{
-	struct drm_nouveau_private *dev_priv = dev->dev_private;
-	struct nouveau_channel *chan = dev_priv->channels.ptr[channel];
-	uint32_t inst;
-
-	NV_DEBUG(dev, "ch%d\n", channel);
-
-	if (dev_priv->chipset == 0x50)
-		inst = chan->ramfc->vinst >> 12;
-	else
-		inst = chan->ramfc->vinst >> 8;
+	dev_priv->engine.instmem.flush(dev);
 
-	nv_wr32(dev, NV50_PFIFO_CTX_TABLE(channel), inst |
-		     NV50_PFIFO_CTX_TABLE_CHANNEL_ENABLED);
+	nv_wr32(dev, 0x0032f4, cur->vinst >> 12);
+	nv_wr32(dev, 0x0032ec, p);
+	nv_wr32(dev, 0x002500, 0x00000101);
 }
 
-static void
-nv50_fifo_channel_disable(struct drm_device *dev, int channel)
+static int
+nv50_fifo_context_new(struct nouveau_channel *chan, int engine)
 {
+	struct nv50_fifo_priv *priv = nv_engine(chan->dev, engine);
+	struct nv50_fifo_chan *fctx;
+	struct drm_device *dev = chan->dev;
 	struct drm_nouveau_private *dev_priv = dev->dev_private;
-	uint32_t inst;
-
-	NV_DEBUG(dev, "ch%d\n", channel);
+	u64 ib_offset = chan->pushbuf_base + chan->dma.ib_base * 4;
+	u64 instance = chan->ramin->vinst >> 12;
+	unsigned long flags;
+	int ret = 0, i;
 
-	if (dev_priv->chipset == 0x50)
-		inst = NV50_PFIFO_CTX_TABLE_INSTANCE_MASK_G80;
-	else
-		inst = NV50_PFIFO_CTX_TABLE_INSTANCE_MASK_G84;
-	nv_wr32(dev, NV50_PFIFO_CTX_TABLE(channel), inst);
-}
+	fctx = chan->engctx[engine] = kzalloc(sizeof(*fctx), GFP_KERNEL);
+	if (!fctx)
+		return -ENOMEM;
+	atomic_inc(&chan->vm->engref[engine]);
 
-static void
-nv50_fifo_init_reset(struct drm_device *dev)
-{
-	uint32_t pmc_e = NV_PMC_ENABLE_PFIFO;
+	chan->user = ioremap(pci_resource_start(dev->pdev, 0) +
+			     NV50_USER(chan->id), PAGE_SIZE);
+	if (!chan->user) {
+		ret = -ENOMEM;
+		goto error;
+	}
 
-	NV_DEBUG(dev, "\n");
+	for (i = 0; i < 0x100; i += 4)
+		nv_wo32(chan->ramin, i, 0x00000000);
+	nv_wo32(chan->ramin, 0x3c, 0x403f6078);
+	nv_wo32(chan->ramin, 0x40, 0x00000000);
+	nv_wo32(chan->ramin, 0x44, 0x01003fff);
+	nv_wo32(chan->ramin, 0x48, chan->pushbuf->cinst >> 4);
+	nv_wo32(chan->ramin, 0x50, lower_32_bits(ib_offset));
+	nv_wo32(chan->ramin, 0x54, upper_32_bits(ib_offset) |
+				   drm_order(chan->dma.ib_max + 1) << 16);
+	nv_wo32(chan->ramin, 0x60, 0x7fffffff);
+	nv_wo32(chan->ramin, 0x78, 0x00000000);
+	nv_wo32(chan->ramin, 0x7c, 0x30000001);
+	nv_wo32(chan->ramin, 0x80, ((chan->ramht->bits - 9) << 27) |
+				   (4 << 24) /* SEARCH_FULL */ |
+				   (chan->ramht->gpuobj->cinst >> 4));
 
-	nv_wr32(dev, NV03_PMC_ENABLE, nv_rd32(dev, NV03_PMC_ENABLE) & ~pmc_e);
-	nv_wr32(dev, NV03_PMC_ENABLE, nv_rd32(dev, NV03_PMC_ENABLE) |  pmc_e);
-}
+	dev_priv->engine.instmem.flush(dev);
 
-static void
-nv50_fifo_init_intr(struct drm_device *dev)
-{
-	NV_DEBUG(dev, "\n");
+	spin_lock_irqsave(&dev_priv->context_switch_lock, flags);
+	nv_wr32(dev, 0x002600 + (chan->id * 4), 0x80000000 | instance);
+	nv50_fifo_playlist_update(dev);
+	spin_unlock_irqrestore(&dev_priv->context_switch_lock, flags);
 
-	nouveau_irq_register(dev, 8, nv04_fifo_isr);
-	nv_wr32(dev, NV03_PFIFO_INTR_0, 0xFFFFFFFF);
-	nv_wr32(dev, NV03_PFIFO_INTR_EN_0, 0xFFFFFFFF);
+error:
+	if (ret)
+		priv->base.base.context_del(chan, engine);
+	return ret;
 }
 
-static void
-nv50_fifo_init_context_table(struct drm_device *dev)
+static bool
+nv50_fifo_kickoff(struct nouveau_channel *chan)
 {
-	struct drm_nouveau_private *dev_priv = dev->dev_private;
-	int i;
-
-	NV_DEBUG(dev, "\n");
-
-	for (i = 0; i < NV50_PFIFO_CTX_TABLE__SIZE; i++) {
-		if (dev_priv->channels.ptr[i])
-			nv50_fifo_channel_enable(dev, i);
-		else
-			nv50_fifo_channel_disable(dev, i);
+	struct drm_device *dev = chan->dev;
+	bool done = true;
+	u32 me;
+
+	/* HW bug workaround:
+	 *
+	 * PFIFO will hang forever if the connected engines don't report
+	 * that they've processed the context switch request.
+	 *
+	 * In order for the kickoff to work, we need to ensure all the
+	 * connected engines are in a state where they can answer.
+	 *
+	 * Newer chipsets don't seem to suffer from this issue, and well,
+	 * there's also a "ignore these engines" bitmask reg we can use
+	 * if we hit the issue there..
+	 */
+
+	/* PME: make sure engine is enabled */
+	me = nv_mask(dev, 0x00b860, 0x00000001, 0x00000001);
+
+	/* do the kickoff... */
+	nv_wr32(dev, 0x0032fc, chan->ramin->vinst >> 12);
+	if (!nv_wait_ne(dev, 0x0032fc, 0xffffffff, 0xffffffff)) {
+		NV_INFO(dev, "PFIFO: channel %d unload timeout\n", chan->id);
+		done = false;
 	}
 
-	nv50_fifo_playlist_update(dev);
+	/* restore any engine states we changed, and exit */
+	nv_wr32(dev, 0x00b860, me);
+	return done;
 }
 
 static void
-nv50_fifo_init_regs__nv(struct drm_device *dev)
-{
-	NV_DEBUG(dev, "\n");
-
-	nv_wr32(dev, 0x250c, 0x6f3cfc34);
-}
-
-static void
-nv50_fifo_init_regs(struct drm_device *dev)
-{
-	NV_DEBUG(dev, "\n");
-
-	nv_wr32(dev, 0x2500, 0);
-	nv_wr32(dev, 0x3250, 0);
-	nv_wr32(dev, 0x3220, 0);
-	nv_wr32(dev, 0x3204, 0);
-	nv_wr32(dev, 0x3210, 0);
-	nv_wr32(dev, 0x3270, 0);
-	nv_wr32(dev, 0x2044, 0x01003fff);
-
-	/* Enable dummy channels setup by nv50_instmem.c */
-	nv50_fifo_channel_enable(dev, 0);
-	nv50_fifo_channel_enable(dev, 127);
-}
-
-int
-nv50_fifo_init(struct drm_device *dev)
+nv50_fifo_context_del(struct nouveau_channel *chan, int engine)
 {
+	struct nv50_fifo_chan *fctx = chan->engctx[engine];
+	struct drm_device *dev = chan->dev;
 	struct drm_nouveau_private *dev_priv = dev->dev_private;
-	struct nouveau_fifo_engine *pfifo = &dev_priv->engine.fifo;
-	int ret;
+	unsigned long flags;
 
-	NV_DEBUG(dev, "\n");
+	/* remove channel from playlist, will context switch if active */
+	spin_lock_irqsave(&dev_priv->context_switch_lock, flags);
+	nv_mask(dev, 0x002600 + (chan->id * 4), 0x80000000, 0x00000000);
+	nv50_fifo_playlist_update(dev);
 
-	if (pfifo->playlist[0]) {
-		pfifo->cur_playlist = !pfifo->cur_playlist;
-		goto just_reset;
-	}
+	/* tell any engines on this channel to unload their contexts */
+	nv50_fifo_kickoff(chan);
 
-	ret = nouveau_gpuobj_new(dev, NULL, 128*4, 0x1000,
-				 NVOBJ_FLAG_ZERO_ALLOC,
-				 &pfifo->playlist[0]);
-	if (ret) {
-		NV_ERROR(dev, "error creating playlist 0: %d\n", ret);
-		return ret;
-	}
+	nv_wr32(dev, 0x002600 + (chan->id * 4), 0x00000000);
+	spin_unlock_irqrestore(&dev_priv->context_switch_lock, flags);
 
-	ret = nouveau_gpuobj_new(dev, NULL, 128*4, 0x1000,
-				 NVOBJ_FLAG_ZERO_ALLOC,
-				 &pfifo->playlist[1]);
-	if (ret) {
-		nouveau_gpuobj_ref(NULL, &pfifo->playlist[0]);
-		NV_ERROR(dev, "error creating playlist 1: %d\n", ret);
-		return ret;
+	/* clean up */
+	if (chan->user) {
+		iounmap(chan->user);
+		chan->user = NULL;
 	}
 
-just_reset:
-	nv50_fifo_init_reset(dev);
-	nv50_fifo_init_intr(dev);
-	nv50_fifo_init_context_table(dev);
-	nv50_fifo_init_regs__nv(dev);
-	nv50_fifo_init_regs(dev);
-	dev_priv->engine.fifo.enable(dev);
-	dev_priv->engine.fifo.reassign(dev, true);
-
-	return 0;
+	atomic_dec(&chan->vm->engref[engine]);
+	chan->engctx[engine] = NULL;
+	kfree(fctx);
 }
 
-void
-nv50_fifo_takedown(struct drm_device *dev)
+static int
+nv50_fifo_init(struct drm_device *dev, int engine)
 {
 	struct drm_nouveau_private *dev_priv = dev->dev_private;
-	struct nouveau_fifo_engine *pfifo = &dev_priv->engine.fifo;
+	u32 instance;
+	int i;
 
-	NV_DEBUG(dev, "\n");
+	nv_mask(dev, 0x000200, 0x00000100, 0x00000000);
+	nv_mask(dev, 0x000200, 0x00000100, 0x00000100);
+	nv_wr32(dev, 0x00250c, 0x6f3cfc34);
+	nv_wr32(dev, 0x002044, 0x01003fff);
 
-	if (!pfifo->playlist[0])
-		return;
+	nv_wr32(dev, 0x002100, 0xffffffff);
+	nv_wr32(dev, 0x002140, 0xffffffff);
 
-	nv_wr32(dev, 0x2140, 0x00000000);
-	nouveau_irq_unregister(dev, 8);
+	for (i = 0; i < 128; i++) {
+		struct nouveau_channel *chan = dev_priv->channels.ptr[i];
+		if (chan && chan->engctx[engine])
+			instance = 0x80000000 | chan->ramin->vinst >> 12;
+		else
+			instance = 0x00000000;
+		nv_wr32(dev, 0x002600 + (i * 4), instance);
+	}
 
-	nouveau_gpuobj_ref(NULL, &pfifo->playlist[0]);
-	nouveau_gpuobj_ref(NULL, &pfifo->playlist[1]);
-}
+	nv50_fifo_playlist_update(dev);
 
-int
-nv50_fifo_channel_id(struct drm_device *dev)
-{
-	return nv_rd32(dev, NV03_PFIFO_CACHE1_PUSH1) &
-			NV50_PFIFO_CACHE1_PUSH1_CHID_MASK;
+	nv_wr32(dev, 0x003200, 1);
+	nv_wr32(dev, 0x003250, 1);
+	nv_wr32(dev, 0x002500, 1);
+	return 0;
 }
 
-int
-nv50_fifo_create_context(struct nouveau_channel *chan)
+static int
+nv50_fifo_fini(struct drm_device *dev, int engine, bool suspend)
 {
-	struct drm_device *dev = chan->dev;
 	struct drm_nouveau_private *dev_priv = dev->dev_private;
-	struct nouveau_gpuobj *ramfc = NULL;
-        uint64_t ib_offset = chan->pushbuf_base + chan->dma.ib_base * 4;
-	unsigned long flags;
-	int ret;
-
-	NV_DEBUG(dev, "ch%d\n", chan->id);
-
-	if (dev_priv->chipset == 0x50) {
-		ret = nouveau_gpuobj_new_fake(dev, chan->ramin->pinst,
-					      chan->ramin->vinst, 0x100,
-					      NVOBJ_FLAG_ZERO_ALLOC |
-					      NVOBJ_FLAG_ZERO_FREE,
-					      &chan->ramfc);
-		if (ret)
-			return ret;
-
-		ret = nouveau_gpuobj_new_fake(dev, chan->ramin->pinst + 0x0400,
-					      chan->ramin->vinst + 0x0400,
-					      4096, 0, &chan->cache);
-		if (ret)
-			return ret;
-	} else {
-		ret = nouveau_gpuobj_new(dev, chan, 0x100, 256,
-					 NVOBJ_FLAG_ZERO_ALLOC |
-					 NVOBJ_FLAG_ZERO_FREE, &chan->ramfc);
-		if (ret)
-			return ret;
-
-		ret = nouveau_gpuobj_new(dev, chan, 4096, 1024,
-					 0, &chan->cache);
-		if (ret)
-			return ret;
-	}
-	ramfc = chan->ramfc;
+	struct nv50_fifo_priv *priv = nv_engine(dev, engine);
+	int i;
 
-	chan->user = ioremap(pci_resource_start(dev->pdev, 0) +
-			     NV50_USER(chan->id), PAGE_SIZE);
-	if (!chan->user)
-		return -ENOMEM;
+	/* set playlist length to zero, fifo will unload context */
+	nv_wr32(dev, 0x0032ec, 0);
 
-	spin_lock_irqsave(&dev_priv->context_switch_lock, flags);
-
-	nv_wo32(ramfc, 0x48, chan->pushbuf->cinst >> 4);
-	nv_wo32(ramfc, 0x80, ((chan->ramht->bits - 9) << 27) |
-			     (4 << 24) /* SEARCH_FULL */ |
-			     (chan->ramht->gpuobj->cinst >> 4));
-	nv_wo32(ramfc, 0x44, 0x01003fff);
-	nv_wo32(ramfc, 0x60, 0x7fffffff);
-	nv_wo32(ramfc, 0x40, 0x00000000);
-	nv_wo32(ramfc, 0x7c, 0x30000001);
-	nv_wo32(ramfc, 0x78, 0x00000000);
-	nv_wo32(ramfc, 0x3c, 0x403f6078);
-	nv_wo32(ramfc, 0x50, lower_32_bits(ib_offset));
-	nv_wo32(ramfc, 0x54, upper_32_bits(ib_offset) |
-                drm_order(chan->dma.ib_max + 1) << 16);
-
-	if (dev_priv->chipset != 0x50) {
-		nv_wo32(chan->ramin, 0, chan->id);
-		nv_wo32(chan->ramin, 4, chan->ramfc->vinst >> 8);
-
-		nv_wo32(ramfc, 0x88, chan->cache->vinst >> 10);
-		nv_wo32(ramfc, 0x98, chan->ramin->vinst >> 12);
+	/* tell all connected engines to unload their contexts */
+	for (i = 0; i < priv->base.channels; i++) {
+		struct nouveau_channel *chan = dev_priv->channels.ptr[i];
+		if (chan && !nv50_fifo_kickoff(chan))
+			return -EBUSY;
 	}
 
-	dev_priv->engine.instmem.flush(dev);
-
-	nv50_fifo_channel_enable(dev, chan->id);
-	nv50_fifo_playlist_update(dev);
-	spin_unlock_irqrestore(&dev_priv->context_switch_lock, flags);
+	nv_wr32(dev, 0x002140, 0);
 	return 0;
 }
 
 void
-nv50_fifo_destroy_context(struct nouveau_channel *chan)
+nv50_fifo_tlb_flush(struct drm_device *dev, int engine)
 {
-	struct drm_device *dev = chan->dev;
-	struct drm_nouveau_private *dev_priv = dev->dev_private;
-	struct nouveau_fifo_engine *pfifo = &dev_priv->engine.fifo;
-	struct nouveau_gpuobj *ramfc = NULL;
-	unsigned long flags;
-
-	NV_DEBUG(dev, "ch%d\n", chan->id);
-
-	spin_lock_irqsave(&dev_priv->context_switch_lock, flags);
-	pfifo->reassign(dev, false);
-
-	/* Unload the context if it's the currently active one */
-	if (pfifo->channel_id(dev) == chan->id) {
-		pfifo->disable(dev);
-		pfifo->unload_context(dev);
-		pfifo->enable(dev);
-	}
-
-	/* This will ensure the channel is seen as disabled. */
-	nouveau_gpuobj_ref(chan->ramfc, &ramfc);
-	nouveau_gpuobj_ref(NULL, &chan->ramfc);
-	nv50_fifo_channel_disable(dev, chan->id);
-
-	/* Dummy channel, also used on ch 127 */
-	if (chan->id == 0)
-		nv50_fifo_channel_disable(dev, 127);
-	nv50_fifo_playlist_update(dev);
-
-	pfifo->reassign(dev, true);
-	spin_unlock_irqrestore(&dev_priv->context_switch_lock, flags);
-
-	/* Free the channel resources */
-	if (chan->user) {
-		iounmap(chan->user);
-		chan->user = NULL;
-	}
-	nouveau_gpuobj_ref(NULL, &ramfc);
-	nouveau_gpuobj_ref(NULL, &chan->cache);
+	nv50_vm_flush_engine(dev, 5);
 }
 
-int
-nv50_fifo_load_context(struct nouveau_channel *chan)
+void
+nv50_fifo_destroy(struct drm_device *dev, int engine)
 {
-	struct drm_device *dev = chan->dev;
 	struct drm_nouveau_private *dev_priv = dev->dev_private;
-	struct nouveau_gpuobj *ramfc = chan->ramfc;
-	struct nouveau_gpuobj *cache = chan->cache;
-	int ptr, cnt;
-
-	NV_DEBUG(dev, "ch%d\n", chan->id);
-
-	nv_wr32(dev, 0x3330, nv_ro32(ramfc, 0x00));
-	nv_wr32(dev, 0x3334, nv_ro32(ramfc, 0x04));
-	nv_wr32(dev, 0x3240, nv_ro32(ramfc, 0x08));
-	nv_wr32(dev, 0x3320, nv_ro32(ramfc, 0x0c));
-	nv_wr32(dev, 0x3244, nv_ro32(ramfc, 0x10));
-	nv_wr32(dev, 0x3328, nv_ro32(ramfc, 0x14));
-	nv_wr32(dev, 0x3368, nv_ro32(ramfc, 0x18));
-	nv_wr32(dev, 0x336c, nv_ro32(ramfc, 0x1c));
-	nv_wr32(dev, 0x3370, nv_ro32(ramfc, 0x20));
-	nv_wr32(dev, 0x3374, nv_ro32(ramfc, 0x24));
-	nv_wr32(dev, 0x3378, nv_ro32(ramfc, 0x28));
-	nv_wr32(dev, 0x337c, nv_ro32(ramfc, 0x2c));
-	nv_wr32(dev, 0x3228, nv_ro32(ramfc, 0x30));
-	nv_wr32(dev, 0x3364, nv_ro32(ramfc, 0x34));
-	nv_wr32(dev, 0x32a0, nv_ro32(ramfc, 0x38));
-	nv_wr32(dev, 0x3224, nv_ro32(ramfc, 0x3c));
-	nv_wr32(dev, 0x324c, nv_ro32(ramfc, 0x40));
-	nv_wr32(dev, 0x2044, nv_ro32(ramfc, 0x44));
-	nv_wr32(dev, 0x322c, nv_ro32(ramfc, 0x48));
-	nv_wr32(dev, 0x3234, nv_ro32(ramfc, 0x4c));
-	nv_wr32(dev, 0x3340, nv_ro32(ramfc, 0x50));
-	nv_wr32(dev, 0x3344, nv_ro32(ramfc, 0x54));
-	nv_wr32(dev, 0x3280, nv_ro32(ramfc, 0x58));
-	nv_wr32(dev, 0x3254, nv_ro32(ramfc, 0x5c));
-	nv_wr32(dev, 0x3260, nv_ro32(ramfc, 0x60));
-	nv_wr32(dev, 0x3264, nv_ro32(ramfc, 0x64));
-	nv_wr32(dev, 0x3268, nv_ro32(ramfc, 0x68));
-	nv_wr32(dev, 0x326c, nv_ro32(ramfc, 0x6c));
-	nv_wr32(dev, 0x32e4, nv_ro32(ramfc, 0x70));
-	nv_wr32(dev, 0x3248, nv_ro32(ramfc, 0x74));
-	nv_wr32(dev, 0x2088, nv_ro32(ramfc, 0x78));
-	nv_wr32(dev, 0x2058, nv_ro32(ramfc, 0x7c));
-	nv_wr32(dev, 0x2210, nv_ro32(ramfc, 0x80));
-
-	cnt = nv_ro32(ramfc, 0x84);
-	for (ptr = 0; ptr < cnt; ptr++) {
-		nv_wr32(dev, NV40_PFIFO_CACHE1_METHOD(ptr),
-			nv_ro32(cache, (ptr * 8) + 0));
-		nv_wr32(dev, NV40_PFIFO_CACHE1_DATA(ptr),
-			nv_ro32(cache, (ptr * 8) + 4));
-	}
-	nv_wr32(dev, NV03_PFIFO_CACHE1_PUT, cnt << 2);
-	nv_wr32(dev, NV03_PFIFO_CACHE1_GET, 0);
-
-	/* guessing that all the 0x34xx regs aren't on NV50 */
-	if (dev_priv->chipset != 0x50) {
-		nv_wr32(dev, 0x340c, nv_ro32(ramfc, 0x88));
-		nv_wr32(dev, 0x3400, nv_ro32(ramfc, 0x8c));
-		nv_wr32(dev, 0x3404, nv_ro32(ramfc, 0x90));
-		nv_wr32(dev, 0x3408, nv_ro32(ramfc, 0x94));
-		nv_wr32(dev, 0x3410, nv_ro32(ramfc, 0x98));
-	}
+	struct nv50_fifo_priv *priv = nv_engine(dev, engine);
 
-	nv_wr32(dev, NV03_PFIFO_CACHE1_PUSH1, chan->id | (1<<16));
-	return 0;
+	nouveau_irq_unregister(dev, 8);
+
+	nouveau_gpuobj_ref(NULL, &priv->playlist[0]);
+	nouveau_gpuobj_ref(NULL, &priv->playlist[1]);
+
+	dev_priv->eng[engine] = NULL;
+	kfree(priv);
 }
 
 int
-nv50_fifo_unload_context(struct drm_device *dev)
+nv50_fifo_create(struct drm_device *dev)
 {
 	struct drm_nouveau_private *dev_priv = dev->dev_private;
-	struct nouveau_fifo_engine *pfifo = &dev_priv->engine.fifo;
-	struct nouveau_gpuobj *ramfc, *cache;
-	struct nouveau_channel *chan = NULL;
-	int chid, get, put, ptr;
-
-	NV_DEBUG(dev, "\n");
-
-	chid = pfifo->channel_id(dev);
-	if (chid < 1 || chid >= dev_priv->engine.fifo.channels - 1)
-		return 0;
-
-	chan = dev_priv->channels.ptr[chid];
-	if (!chan) {
-		NV_ERROR(dev, "Inactive channel on PFIFO: %d\n", chid);
-		return -EINVAL;
-	}
-	NV_DEBUG(dev, "ch%d\n", chan->id);
-	ramfc = chan->ramfc;
-	cache = chan->cache;
-
-	nv_wo32(ramfc, 0x00, nv_rd32(dev, 0x3330));
-	nv_wo32(ramfc, 0x04, nv_rd32(dev, 0x3334));
-	nv_wo32(ramfc, 0x08, nv_rd32(dev, 0x3240));
-	nv_wo32(ramfc, 0x0c, nv_rd32(dev, 0x3320));
-	nv_wo32(ramfc, 0x10, nv_rd32(dev, 0x3244));
-	nv_wo32(ramfc, 0x14, nv_rd32(dev, 0x3328));
-	nv_wo32(ramfc, 0x18, nv_rd32(dev, 0x3368));
-	nv_wo32(ramfc, 0x1c, nv_rd32(dev, 0x336c));
-	nv_wo32(ramfc, 0x20, nv_rd32(dev, 0x3370));
-	nv_wo32(ramfc, 0x24, nv_rd32(dev, 0x3374));
-	nv_wo32(ramfc, 0x28, nv_rd32(dev, 0x3378));
-	nv_wo32(ramfc, 0x2c, nv_rd32(dev, 0x337c));
-	nv_wo32(ramfc, 0x30, nv_rd32(dev, 0x3228));
-	nv_wo32(ramfc, 0x34, nv_rd32(dev, 0x3364));
-	nv_wo32(ramfc, 0x38, nv_rd32(dev, 0x32a0));
-	nv_wo32(ramfc, 0x3c, nv_rd32(dev, 0x3224));
-	nv_wo32(ramfc, 0x40, nv_rd32(dev, 0x324c));
-	nv_wo32(ramfc, 0x44, nv_rd32(dev, 0x2044));
-	nv_wo32(ramfc, 0x48, nv_rd32(dev, 0x322c));
-	nv_wo32(ramfc, 0x4c, nv_rd32(dev, 0x3234));
-	nv_wo32(ramfc, 0x50, nv_rd32(dev, 0x3340));
-	nv_wo32(ramfc, 0x54, nv_rd32(dev, 0x3344));
-	nv_wo32(ramfc, 0x58, nv_rd32(dev, 0x3280));
-	nv_wo32(ramfc, 0x5c, nv_rd32(dev, 0x3254));
-	nv_wo32(ramfc, 0x60, nv_rd32(dev, 0x3260));
-	nv_wo32(ramfc, 0x64, nv_rd32(dev, 0x3264));
-	nv_wo32(ramfc, 0x68, nv_rd32(dev, 0x3268));
-	nv_wo32(ramfc, 0x6c, nv_rd32(dev, 0x326c));
-	nv_wo32(ramfc, 0x70, nv_rd32(dev, 0x32e4));
-	nv_wo32(ramfc, 0x74, nv_rd32(dev, 0x3248));
-	nv_wo32(ramfc, 0x78, nv_rd32(dev, 0x2088));
-	nv_wo32(ramfc, 0x7c, nv_rd32(dev, 0x2058));
-	nv_wo32(ramfc, 0x80, nv_rd32(dev, 0x2210));
-
-	put = (nv_rd32(dev, NV03_PFIFO_CACHE1_PUT) & 0x7ff) >> 2;
-	get = (nv_rd32(dev, NV03_PFIFO_CACHE1_GET) & 0x7ff) >> 2;
-	ptr = 0;
-	while (put != get) {
-		nv_wo32(cache, ptr + 0,
-			nv_rd32(dev, NV40_PFIFO_CACHE1_METHOD(get)));
-		nv_wo32(cache, ptr + 4,
-			nv_rd32(dev, NV40_PFIFO_CACHE1_DATA(get)));
-		get = (get + 1) & 0x1ff;
-		ptr += 8;
-	}
-
-	/* guessing that all the 0x34xx regs aren't on NV50 */
-	if (dev_priv->chipset != 0x50) {
-		nv_wo32(ramfc, 0x84, ptr >> 3);
-		nv_wo32(ramfc, 0x88, nv_rd32(dev, 0x340c));
-		nv_wo32(ramfc, 0x8c, nv_rd32(dev, 0x3400));
-		nv_wo32(ramfc, 0x90, nv_rd32(dev, 0x3404));
-		nv_wo32(ramfc, 0x94, nv_rd32(dev, 0x3408));
-		nv_wo32(ramfc, 0x98, nv_rd32(dev, 0x3410));
-	}
+	struct nv50_fifo_priv *priv;
+	int ret;
 
-	dev_priv->engine.instmem.flush(dev);
+	priv = kzalloc(sizeof(*priv), GFP_KERNEL);
+	if (!priv)
+		return -ENOMEM;
 
-	/*XXX: probably reload ch127 (NULL) state back too */
-	nv_wr32(dev, NV03_PFIFO_CACHE1_PUSH1, 127);
-	return 0;
-}
+	priv->base.base.destroy = nv50_fifo_destroy;
+	priv->base.base.init = nv50_fifo_init;
+	priv->base.base.fini = nv50_fifo_fini;
+	priv->base.base.context_new = nv50_fifo_context_new;
+	priv->base.base.context_del = nv50_fifo_context_del;
+	priv->base.base.tlb_flush = nv50_fifo_tlb_flush;
+	priv->base.channels = 127;
+	dev_priv->eng[NVOBJ_ENGINE_FIFO] = &priv->base.base;
+
+	ret = nouveau_gpuobj_new(dev, NULL, priv->base.channels * 4, 0x1000,
+				 NVOBJ_FLAG_ZERO_ALLOC, &priv->playlist[0]);
+	if (ret)
+		goto error;
+
+	ret = nouveau_gpuobj_new(dev, NULL, priv->base.channels * 4, 0x1000,
+				 NVOBJ_FLAG_ZERO_ALLOC, &priv->playlist[1]);
+	if (ret)
+		goto error;
 
-void
-nv50_fifo_tlb_flush(struct drm_device *dev)
-{
-	nv50_vm_flush_engine(dev, 5);
+	nouveau_irq_register(dev, 8, nv04_fifo_isr);
+error:
+	if (ret)
+		priv->base.base.destroy(dev, NVOBJ_ENGINE_FIFO);
+	return ret;
 }

+ 9 - 220
drivers/gpu/drm/nouveau/nv50_graph.c

@@ -27,8 +27,8 @@
 #include "drmP.h"
 #include "drm.h"
 #include "nouveau_drv.h"
+#include "nouveau_fifo.h"
 #include "nouveau_ramht.h"
-#include "nouveau_grctx.h"
 #include "nouveau_dma.h"
 #include "nouveau_vm.h"
 #include "nv50_evo.h"
@@ -40,86 +40,6 @@ struct nv50_graph_engine {
 	u32 grctx_size;
 };
 
-static void
-nv50_graph_fifo_access(struct drm_device *dev, bool enabled)
-{
-	const uint32_t mask = 0x00010001;
-
-	if (enabled)
-		nv_wr32(dev, 0x400500, nv_rd32(dev, 0x400500) | mask);
-	else
-		nv_wr32(dev, 0x400500, nv_rd32(dev, 0x400500) & ~mask);
-}
-
-static struct nouveau_channel *
-nv50_graph_channel(struct drm_device *dev)
-{
-	struct drm_nouveau_private *dev_priv = dev->dev_private;
-	uint32_t inst;
-	int i;
-
-	/* Be sure we're not in the middle of a context switch or bad things
-	 * will happen, such as unloading the wrong pgraph context.
-	 */
-	if (!nv_wait(dev, 0x400300, 0x00000001, 0x00000000))
-		NV_ERROR(dev, "Ctxprog is still running\n");
-
-	inst = nv_rd32(dev, NV50_PGRAPH_CTXCTL_CUR);
-	if (!(inst & NV50_PGRAPH_CTXCTL_CUR_LOADED))
-		return NULL;
-	inst = (inst & NV50_PGRAPH_CTXCTL_CUR_INSTANCE) << 12;
-
-	for (i = 0; i < dev_priv->engine.fifo.channels; i++) {
-		struct nouveau_channel *chan = dev_priv->channels.ptr[i];
-
-		if (chan && chan->ramin && chan->ramin->vinst == inst)
-			return chan;
-	}
-
-	return NULL;
-}
-
-static int
-nv50_graph_do_load_context(struct drm_device *dev, uint32_t inst)
-{
-	uint32_t fifo = nv_rd32(dev, 0x400500);
-
-	nv_wr32(dev, 0x400500, fifo & ~1);
-	nv_wr32(dev, 0x400784, inst);
-	nv_wr32(dev, 0x400824, nv_rd32(dev, 0x400824) | 0x40);
-	nv_wr32(dev, 0x400320, nv_rd32(dev, 0x400320) | 0x11);
-	nv_wr32(dev, 0x400040, 0xffffffff);
-	(void)nv_rd32(dev, 0x400040);
-	nv_wr32(dev, 0x400040, 0x00000000);
-	nv_wr32(dev, 0x400304, nv_rd32(dev, 0x400304) | 1);
-
-	if (nouveau_wait_for_idle(dev))
-		nv_wr32(dev, 0x40032c, inst | (1<<31));
-	nv_wr32(dev, 0x400500, fifo);
-
-	return 0;
-}
-
-static int
-nv50_graph_unload_context(struct drm_device *dev)
-{
-	uint32_t inst;
-
-	inst  = nv_rd32(dev, NV50_PGRAPH_CTXCTL_CUR);
-	if (!(inst & NV50_PGRAPH_CTXCTL_CUR_LOADED))
-		return 0;
-	inst &= NV50_PGRAPH_CTXCTL_CUR_INSTANCE;
-
-	nouveau_wait_for_idle(dev);
-	nv_wr32(dev, 0x400784, inst);
-	nv_wr32(dev, 0x400824, nv_rd32(dev, 0x400824) | 0x20);
-	nv_wr32(dev, 0x400304, nv_rd32(dev, 0x400304) | 0x01);
-	nouveau_wait_for_idle(dev);
-
-	nv_wr32(dev, NV50_PGRAPH_CTXCTL_CUR, inst);
-	return 0;
-}
-
 static int
 nv50_graph_init(struct drm_device *dev, int engine)
 {
@@ -211,12 +131,6 @@ nv50_graph_init(struct drm_device *dev, int engine)
 static int
 nv50_graph_fini(struct drm_device *dev, int engine, bool suspend)
 {
-	nv_mask(dev, 0x400500, 0x00010001, 0x00000000);
-	if (!nv_wait(dev, 0x400700, ~0, 0) && suspend) {
-		nv_mask(dev, 0x400500, 0x00010001, 0x00010001);
-		return -EBUSY;
-	}
-	nv50_graph_unload_context(dev);
 	nv_wr32(dev, 0x40013c, 0x00000000);
 	return 0;
 }
@@ -229,7 +143,6 @@ nv50_graph_context_new(struct nouveau_channel *chan, int engine)
 	struct nouveau_gpuobj *ramin = chan->ramin;
 	struct nouveau_gpuobj *grctx = NULL;
 	struct nv50_graph_engine *pgraph = nv_engine(dev, engine);
-	struct nouveau_grctx ctx = {};
 	int hdr, ret;
 
 	NV_DEBUG(dev, "ch%d\n", chan->id);
@@ -248,11 +161,7 @@ nv50_graph_context_new(struct nouveau_channel *chan, int engine)
 	nv_wo32(ramin, hdr + 0x10, 0);
 	nv_wo32(ramin, hdr + 0x14, 0x00010000);
 
-	ctx.dev = chan->dev;
-	ctx.mode = NOUVEAU_GRCTX_VALS;
-	ctx.data = grctx;
-	nv50_grctx_init(&ctx);
-
+	nv50_grctx_fill(dev, grctx);
 	nv_wo32(grctx, 0x00000, chan->ramin->vinst >> 12);
 
 	dev_priv->engine.instmem.flush(dev);
@@ -268,33 +177,14 @@ nv50_graph_context_del(struct nouveau_channel *chan, int engine)
 	struct nouveau_gpuobj *grctx = chan->engctx[engine];
 	struct drm_device *dev = chan->dev;
 	struct drm_nouveau_private *dev_priv = dev->dev_private;
-	struct nouveau_fifo_engine *pfifo = &dev_priv->engine.fifo;
 	int i, hdr = (dev_priv->chipset == 0x50) ? 0x200 : 0x20;
-	unsigned long flags;
-
-	NV_DEBUG(dev, "ch%d\n", chan->id);
-
-	if (!chan->ramin)
-		return;
-
-	spin_lock_irqsave(&dev_priv->context_switch_lock, flags);
-	pfifo->reassign(dev, false);
-	nv50_graph_fifo_access(dev, false);
-
-	if (nv50_graph_channel(dev) == chan)
-		nv50_graph_unload_context(dev);
 
 	for (i = hdr; i < hdr + 24; i += 4)
 		nv_wo32(chan->ramin, i, 0);
 	dev_priv->engine.instmem.flush(dev);
 
-	nv50_graph_fifo_access(dev, true);
-	pfifo->reassign(dev, true);
-	spin_unlock_irqrestore(&dev_priv->context_switch_lock, flags);
-
-	nouveau_gpuobj_ref(NULL, &grctx);
-
 	atomic_dec(&chan->vm->engref[engine]);
+	nouveau_gpuobj_ref(NULL, &grctx);
 	chan->engctx[engine] = NULL;
 }
 
@@ -324,85 +214,6 @@ nv50_graph_object_new(struct nouveau_channel *chan, int engine,
 	return ret;
 }
 
-static void
-nv50_graph_context_switch(struct drm_device *dev)
-{
-	uint32_t inst;
-
-	nv50_graph_unload_context(dev);
-
-	inst  = nv_rd32(dev, NV50_PGRAPH_CTXCTL_NEXT);
-	inst &= NV50_PGRAPH_CTXCTL_NEXT_INSTANCE;
-	nv50_graph_do_load_context(dev, inst);
-
-	nv_wr32(dev, NV40_PGRAPH_INTR_EN, nv_rd32(dev,
-		NV40_PGRAPH_INTR_EN) | NV_PGRAPH_INTR_CONTEXT_SWITCH);
-}
-
-static int
-nv50_graph_nvsw_dma_vblsem(struct nouveau_channel *chan,
-			   u32 class, u32 mthd, u32 data)
-{
-	struct nouveau_gpuobj *gpuobj;
-
-	gpuobj = nouveau_ramht_find(chan, data);
-	if (!gpuobj)
-		return -ENOENT;
-
-	if (nouveau_notifier_offset(gpuobj, NULL))
-		return -EINVAL;
-
-	chan->nvsw.vblsem = gpuobj;
-	chan->nvsw.vblsem_offset = ~0;
-	return 0;
-}
-
-static int
-nv50_graph_nvsw_vblsem_offset(struct nouveau_channel *chan,
-			      u32 class, u32 mthd, u32 data)
-{
-	if (nouveau_notifier_offset(chan->nvsw.vblsem, &data))
-		return -ERANGE;
-
-	chan->nvsw.vblsem_offset = data >> 2;
-	return 0;
-}
-
-static int
-nv50_graph_nvsw_vblsem_release_val(struct nouveau_channel *chan,
-				   u32 class, u32 mthd, u32 data)
-{
-	chan->nvsw.vblsem_rval = data;
-	return 0;
-}
-
-static int
-nv50_graph_nvsw_vblsem_release(struct nouveau_channel *chan,
-			       u32 class, u32 mthd, u32 data)
-{
-	struct drm_device *dev = chan->dev;
-	struct drm_nouveau_private *dev_priv = dev->dev_private;
-
-	if (!chan->nvsw.vblsem || chan->nvsw.vblsem_offset == ~0 || data > 1)
-		return -EINVAL;
-
-	drm_vblank_get(dev, data);
-
-	chan->nvsw.vblsem_head = data;
-	list_add(&chan->nvsw.vbl_wait, &dev_priv->vbl_waiting);
-
-	return 0;
-}
-
-static int
-nv50_graph_nvsw_mthd_page_flip(struct nouveau_channel *chan,
-			       u32 class, u32 mthd, u32 data)
-{
-	nouveau_finish_page_flip(chan, NULL);
-	return 0;
-}
-
-
 static void
 nv50_graph_tlb_flush(struct drm_device *dev, int engine)
 {
@@ -514,6 +325,7 @@ struct nouveau_enum nv50_data_error_names[] = {
 	{ 0x0000001f, "RT_BPP128_WITH_MS8", NULL },
 	{ 0x00000021, "Z_OUT_OF_BOUNDS", NULL },
 	{ 0x00000023, "XY_OUT_OF_BOUNDS", NULL },
+	{ 0x00000024, "VP_ZERO_INPUTS", NULL },
 	{ 0x00000027, "CP_MORE_PARAMS_THAN_SHARED", NULL },
 	{ 0x00000028, "CP_NO_REG_SPACE_STRIPED", NULL },
 	{ 0x00000029, "CP_NO_REG_SPACE_PACKED", NULL },
@@ -900,13 +712,14 @@ nv50_pgraph_trap_handler(struct drm_device *dev, u32 display, u64 inst, u32 chid
 int
 nv50_graph_isr_chid(struct drm_device *dev, u64 inst)
 {
+	struct nouveau_fifo_priv *pfifo = nv_engine(dev, NVOBJ_ENGINE_FIFO);
 	struct drm_nouveau_private *dev_priv = dev->dev_private;
 	struct nouveau_channel *chan;
 	unsigned long flags;
 	int i;
 
 	spin_lock_irqsave(&dev_priv->channels.lock, flags);
-	for (i = 0; i < dev_priv->engine.fifo.channels; i++) {
+	for (i = 0; i < pfifo->channels; i++) {
 		chan = dev_priv->channels.ptr[i];
 		if (!chan || !chan->ramin)
 			continue;
@@ -939,15 +752,6 @@ nv50_graph_isr(struct drm_device *dev)
 				show &= ~0x00000010;
 		}
 
-		if (stat & 0x00001000) {
-			nv_wr32(dev, 0x400500, 0x00000000);
-			nv_wr32(dev, 0x400100, 0x00001000);
-			nv_mask(dev, 0x40013c, 0x00001000, 0x00000000);
-			nv50_graph_context_switch(dev);
-			stat &= ~0x00001000;
-			show &= ~0x00001000;
-		}
-
 		show = (show && nouveau_ratelimit()) ? show : 0;
 
 		if (show & 0x00100000) {
@@ -996,28 +800,21 @@ nv50_graph_create(struct drm_device *dev)
 {
 	struct drm_nouveau_private *dev_priv = dev->dev_private;
 	struct nv50_graph_engine *pgraph;
-	struct nouveau_grctx ctx = {};
 	int ret;
 
 	pgraph = kzalloc(sizeof(*pgraph),GFP_KERNEL);
 	if (!pgraph)
 		return -ENOMEM;
 
-	ctx.dev = dev;
-	ctx.mode = NOUVEAU_GRCTX_PROG;
-	ctx.data = pgraph->ctxprog;
-	ctx.ctxprog_max = ARRAY_SIZE(pgraph->ctxprog);
-
-	ret = nv50_grctx_init(&ctx);
+	ret = nv50_grctx_init(dev, pgraph->ctxprog, ARRAY_SIZE(pgraph->ctxprog),
+				  &pgraph->ctxprog_size,
+				  &pgraph->grctx_size);
 	if (ret) {
 		NV_ERROR(dev, "PGRAPH: ctxprog build failed\n");
 		kfree(pgraph);
 		return 0;
 	}
 
-	pgraph->grctx_size = ctx.ctxvals_pos * 4;
-	pgraph->ctxprog_size = ctx.ctxprog_len;
-
 	pgraph->base.destroy = nv50_graph_destroy;
 	pgraph->base.init = nv50_graph_init;
 	pgraph->base.fini = nv50_graph_fini;
@@ -1031,14 +828,6 @@ nv50_graph_create(struct drm_device *dev)
 
 	nouveau_irq_register(dev, 12, nv50_graph_isr);
 
-	/* NVSW really doesn't live here... */
-	NVOBJ_CLASS(dev, 0x506e, SW); /* nvsw */
-	NVOBJ_MTHD (dev, 0x506e, 0x018c, nv50_graph_nvsw_dma_vblsem);
-	NVOBJ_MTHD (dev, 0x506e, 0x0400, nv50_graph_nvsw_vblsem_offset);
-	NVOBJ_MTHD (dev, 0x506e, 0x0404, nv50_graph_nvsw_vblsem_release_val);
-	NVOBJ_MTHD (dev, 0x506e, 0x0408, nv50_graph_nvsw_vblsem_release);
-	NVOBJ_MTHD (dev, 0x506e, 0x0500, nv50_graph_nvsw_mthd_page_flip);
-
 	NVOBJ_ENGINE_ADD(dev, GR, &pgraph->base);
 	NVOBJ_CLASS(dev, 0x0030, GR); /* null */
 	NVOBJ_CLASS(dev, 0x5039, GR); /* m2mf */

+ 30 - 3
drivers/gpu/drm/nouveau/nv50_grctx.c

@@ -172,8 +172,8 @@ static void nv50_graph_construct_xfer2(struct nouveau_grctx *ctx);
 
 /* Main function: construct the ctxprog skeleton, call the other functions. */
 
-int
-nv50_grctx_init(struct nouveau_grctx *ctx)
+static int
+nv50_grctx_generate(struct nouveau_grctx *ctx)
 {
 	struct drm_nouveau_private *dev_priv = ctx->dev->dev_private;
 
@@ -210,7 +210,7 @@ nv50_grctx_init(struct nouveau_grctx *ctx)
 	cp_name(ctx, cp_check_load);
 	cp_bra (ctx, AUTO_LOAD, PENDING, cp_setup_auto_load);
 	cp_bra (ctx, USER_LOAD, PENDING, cp_setup_load);
-	cp_bra (ctx, ALWAYS, TRUE, cp_exit);
+	cp_bra (ctx, ALWAYS, TRUE, cp_prepare_exit);
 
 	/* setup for context load */
 	cp_name(ctx, cp_setup_auto_load);
@@ -277,6 +277,33 @@ nv50_grctx_init(struct nouveau_grctx *ctx)
 	return 0;
 }
 
+void
+nv50_grctx_fill(struct drm_device *dev, struct nouveau_gpuobj *mem)
+{
+	nv50_grctx_generate(&(struct nouveau_grctx) {
+			     .dev = dev,
+			     .mode = NOUVEAU_GRCTX_VALS,
+			     .data = mem,
+			   });
+}
+
+int
+nv50_grctx_init(struct drm_device *dev, u32 *data, u32 max, u32 *len, u32 *cnt)
+{
+	struct nouveau_grctx ctx = {
+		.dev = dev,
+		.mode = NOUVEAU_GRCTX_PROG,
+		.data = data,
+		.ctxprog_max = max
+	};
+	int ret;
+
+	ret = nv50_grctx_generate(&ctx);
+	*cnt = ctx.ctxvals_pos * 4;
+	*len = ctx.ctxprog_len;
+	return ret;
+}
+
 /*
  * Constructs MMIO part of ctxprog and ctxvals. Just a matter of knowing which
  * registers to save/restore and the default values for them.

+ 1 - 1
drivers/gpu/drm/nouveau/nv50_instmem.c

@@ -83,7 +83,7 @@ nv50_channel_new(struct drm_device *dev, u32 size, struct nouveau_vm *vm,
 		return ret;
 	}
 
-	ret = drm_mm_init(&chan->ramin_heap, 0x6000, chan->ramin->size);
+	ret = drm_mm_init(&chan->ramin_heap, 0x6000, chan->ramin->size - 0x6000);
 	if (ret) {
 		nv50_channel_del(&chan);
 		return ret;

+ 2 - 17
drivers/gpu/drm/nouveau/nv50_mpeg.c

@@ -77,27 +77,13 @@ nv50_mpeg_context_new(struct nouveau_channel *chan, int engine)
 static void
 nv50_mpeg_context_del(struct nouveau_channel *chan, int engine)
 {
-	struct drm_nouveau_private *dev_priv = chan->dev->dev_private;
 	struct nouveau_gpuobj *ctx = chan->engctx[engine];
 	struct drm_device *dev = chan->dev;
-	unsigned long flags;
-	u32 inst, i;
-
-	if (!chan->ramin)
-		return;
-
-	inst  = chan->ramin->vinst >> 12;
-	inst |= 0x80000000;
-
-	spin_lock_irqsave(&dev_priv->context_switch_lock, flags);
-	nv_mask(dev, 0x00b32c, 0x00000001, 0x00000000);
-	if (nv_rd32(dev, 0x00b318) == inst)
-		nv_mask(dev, 0x00b318, 0x80000000, 0x00000000);
-	nv_mask(dev, 0x00b32c, 0x00000001, 0x00000001);
-	spin_unlock_irqrestore(&dev_priv->context_switch_lock, flags);
+	int i;
 
 	for (i = 0x00; i <= 0x14; i += 4)
 		nv_wo32(chan->ramin, CTX_PTR(dev, i), 0x00000000);
+
 	nouveau_gpuobj_ref(NULL, &ctx);
 	chan->engctx[engine] = NULL;
 }
@@ -162,7 +148,6 @@ nv50_mpeg_init(struct drm_device *dev, int engine)
 static int
 nv50_mpeg_fini(struct drm_device *dev, int engine, bool suspend)
 {
-	/*XXX: context save for s/r */
 	nv_mask(dev, 0x00b32c, 0x00000001, 0x00000000);
 	nv_wr32(dev, 0x00b140, 0x00000000);
 	return 0;

+ 214 - 0
drivers/gpu/drm/nouveau/nv50_software.c

@@ -0,0 +1,214 @@
+/*
+ * Copyright 2012 Red Hat Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: Ben Skeggs
+ */
+
+#include "drmP.h"
+
+#include "nouveau_drv.h"
+#include "nouveau_ramht.h"
+#include "nouveau_software.h"
+
+#include "nv50_display.h"
+
+struct nv50_software_priv {
+	struct nouveau_software_priv base;
+};
+
+struct nv50_software_chan {
+	struct nouveau_software_chan base;
+	struct {
+		struct nouveau_gpuobj *object;
+	} vblank;
+};
+
+static int
+mthd_dma_vblsem(struct nouveau_channel *chan, u32 class, u32 mthd, u32 data)
+{
+	struct nv50_software_chan *pch = chan->engctx[NVOBJ_ENGINE_SW];
+	struct nouveau_gpuobj *gpuobj;
+
+	gpuobj = nouveau_ramht_find(chan, data);
+	if (!gpuobj)
+		return -ENOENT;
+
+	if (nouveau_notifier_offset(gpuobj, NULL))
+		return -EINVAL;
+
+	pch->vblank.object = gpuobj;
+	pch->base.vblank.offset = ~0;
+	return 0;
+}
+
+static int
+mthd_vblsem_offset(struct nouveau_channel *chan, u32 class, u32 mthd, u32 data)
+{
+	struct nv50_software_chan *pch = chan->engctx[NVOBJ_ENGINE_SW];
+
+	if (nouveau_notifier_offset(pch->vblank.object, &data))
+		return -ERANGE;
+
+	pch->base.vblank.offset = data >> 2;
+	return 0;
+}
+
+static int
+mthd_vblsem_value(struct nouveau_channel *chan, u32 class, u32 mthd, u32 data)
+{
+	struct nv50_software_chan *pch = chan->engctx[NVOBJ_ENGINE_SW];
+	pch->base.vblank.value = data;
+	return 0;
+}
+
+static int
+mthd_vblsem_release(struct nouveau_channel *chan, u32 class, u32 mthd, u32 data)
+{
+	struct nv50_software_priv *psw = nv_engine(chan->dev, NVOBJ_ENGINE_SW);
+	struct nv50_software_chan *pch = chan->engctx[NVOBJ_ENGINE_SW];
+	struct drm_device *dev = chan->dev;
+
+	if (!pch->vblank.object || pch->base.vblank.offset == ~0 || data > 1)
+		return -EINVAL;
+
+	drm_vblank_get(dev, data);
+
+	pch->base.vblank.head = data;
+	list_add(&pch->base.vblank.list, &psw->base.vblank);
+	return 0;
+}
+
+static int
+mthd_flip(struct nouveau_channel *chan, u32 class, u32 mthd, u32 data)
+{
+	nouveau_finish_page_flip(chan, NULL);
+	return 0;
+}
+
+static int
+nv50_software_context_new(struct nouveau_channel *chan, int engine)
+{
+	struct nv50_software_priv *psw = nv_engine(chan->dev, NVOBJ_ENGINE_SW);
+	struct nv50_display *pdisp = nv50_display(chan->dev);
+	struct nv50_software_chan *pch;
+	int ret = 0, i;
+
+	pch = kzalloc(sizeof(*pch), GFP_KERNEL);
+	if (!pch)
+		return -ENOMEM;
+
+	nouveau_software_context_new(&pch->base);
+	pch->base.vblank.bo = chan->notifier_bo;
+	chan->engctx[engine] = pch;
+
+	/* dma objects for display sync channel semaphore blocks */
+	for (i = 0; i < chan->dev->mode_config.num_crtc; i++) {
+		struct nv50_display_crtc *dispc = &pdisp->crtc[i];
+		struct nouveau_gpuobj *obj = NULL;
+
+		ret = nouveau_gpuobj_dma_new(chan, NV_CLASS_DMA_IN_MEMORY,
+					     dispc->sem.bo->bo.offset, 0x1000,
+					     NV_MEM_ACCESS_RW,
+					     NV_MEM_TARGET_VRAM, &obj);
+		if (ret)
+			break;
+
+		ret = nouveau_ramht_insert(chan, NvEvoSema0 + i, obj);
+		nouveau_gpuobj_ref(NULL, &obj);
+	}
+
+	if (ret)
+		psw->base.base.context_del(chan, engine);
+	return ret;
+}
+
+static void
+nv50_software_context_del(struct nouveau_channel *chan, int engine)
+{
+	struct nv50_software_chan *pch = chan->engctx[engine];
+	chan->engctx[engine] = NULL;
+	kfree(pch);
+}
+
+static int
+nv50_software_object_new(struct nouveau_channel *chan, int engine,
+			 u32 handle, u16 class)
+{
+	struct drm_device *dev = chan->dev;
+	struct nouveau_gpuobj *obj = NULL;
+	int ret;
+
+	ret = nouveau_gpuobj_new(dev, chan, 16, 16, 0, &obj);
+	if (ret)
+		return ret;
+	obj->engine = 0;
+	obj->class  = class;
+
+	ret = nouveau_ramht_insert(chan, handle, obj);
+	nouveau_gpuobj_ref(NULL, &obj);
+	return ret;
+}
+
+static int
+nv50_software_init(struct drm_device *dev, int engine)
+{
+	return 0;
+}
+
+static int
+nv50_software_fini(struct drm_device *dev, int engine, bool suspend)
+{
+	return 0;
+}
+
+static void
+nv50_software_destroy(struct drm_device *dev, int engine)
+{
+	struct nv50_software_priv *psw = nv_engine(dev, engine);
+
+	NVOBJ_ENGINE_DEL(dev, SW);
+	kfree(psw);
+}
+
+int
+nv50_software_create(struct drm_device *dev)
+{
+	struct nv50_software_priv *psw = kzalloc(sizeof(*psw), GFP_KERNEL);
+	if (!psw)
+		return -ENOMEM;
+
+	psw->base.base.destroy = nv50_software_destroy;
+	psw->base.base.init = nv50_software_init;
+	psw->base.base.fini = nv50_software_fini;
+	psw->base.base.context_new = nv50_software_context_new;
+	psw->base.base.context_del = nv50_software_context_del;
+	psw->base.base.object_new = nv50_software_object_new;
+	nouveau_software_create(&psw->base);
+
+	NVOBJ_ENGINE_ADD(dev, SW, &psw->base.base);
+	NVOBJ_CLASS(dev, 0x506e, SW);
+	NVOBJ_MTHD (dev, 0x506e, 0x018c, mthd_dma_vblsem);
+	NVOBJ_MTHD (dev, 0x506e, 0x0400, mthd_vblsem_offset);
+	NVOBJ_MTHD (dev, 0x506e, 0x0404, mthd_vblsem_value);
+	NVOBJ_MTHD (dev, 0x506e, 0x0408, mthd_vblsem_release);
+	NVOBJ_MTHD (dev, 0x506e, 0x0500, mthd_flip);
+	return 0;
+}

+ 3 - 3
drivers/gpu/drm/nouveau/nv50_sor.c

@@ -242,9 +242,9 @@ nv50_sor_disconnect(struct drm_encoder *encoder)
 		NV_ERROR(dev, "no space while disconnecting SOR\n");
 		return;
 	}
-	BEGIN_RING(evo, 0, NV50_EVO_SOR(nv_encoder->or, MODE_CTRL), 1);
+	BEGIN_NV04(evo, 0, NV50_EVO_SOR(nv_encoder->or, MODE_CTRL), 1);
 	OUT_RING  (evo, 0);
-	BEGIN_RING(evo, 0, NV50_EVO_UPDATE, 1);
+	BEGIN_NV04(evo, 0, NV50_EVO_UPDATE, 1);
 	OUT_RING  (evo, 0);
 
 	nouveau_hdmi_mode_set(encoder, NULL);
@@ -430,7 +430,7 @@ nv50_sor_mode_set(struct drm_encoder *encoder, struct drm_display_mode *umode,
 		nv_encoder->crtc = NULL;
 		return;
 	}
-	BEGIN_RING(evo, 0, NV50_EVO_SOR(nv_encoder->or, MODE_CTRL), 1);
+	BEGIN_NV04(evo, 0, NV50_EVO_SOR(nv_encoder->or, MODE_CTRL), 1);
 	OUT_RING(evo, mode_ctl);
 }
 

+ 0 - 2
drivers/gpu/drm/nouveau/nv50_vm.c

@@ -147,7 +147,6 @@ nv50_vm_flush(struct nouveau_vm *vm)
 {
 	struct drm_nouveau_private *dev_priv = vm->dev->dev_private;
 	struct nouveau_instmem_engine *pinstmem = &dev_priv->engine.instmem;
-	struct nouveau_fifo_engine *pfifo = &dev_priv->engine.fifo;
 	int i;
 
 	pinstmem->flush(vm->dev);
@@ -158,7 +157,6 @@ nv50_vm_flush(struct nouveau_vm *vm)
 		return;
 	}
 
-	pfifo->tlb_flush(vm->dev);
 	for (i = 0; i < NVOBJ_ENGINE_NR; i++) {
 		if (atomic_read(&vm->engref[i]))
 			dev_priv->eng[i]->tlb_flush(vm->dev, i);

+ 177 - 0
drivers/gpu/drm/nouveau/nv84_fence.c

@@ -0,0 +1,177 @@
+/*
+ * Copyright 2012 Red Hat Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: Ben Skeggs
+ */
+
+#include "drmP.h"
+#include "nouveau_drv.h"
+#include "nouveau_dma.h"
+#include "nouveau_fifo.h"
+#include "nouveau_ramht.h"
+#include "nouveau_fence.h"
+
+struct nv84_fence_chan {
+	struct nouveau_fence_chan base;
+};
+
+struct nv84_fence_priv {
+	struct nouveau_fence_priv base;
+	struct nouveau_gpuobj *mem;
+};
+
+static int
+nv84_fence_emit(struct nouveau_fence *fence)
+{
+	struct nouveau_channel *chan = fence->channel;
+	int ret = RING_SPACE(chan, 7);
+	if (ret == 0) {
+		BEGIN_NV04(chan, 0, NV11_SUBCHAN_DMA_SEMAPHORE, 1);
+		OUT_RING  (chan, NvSema);
+		BEGIN_NV04(chan, 0, NV84_SUBCHAN_SEMAPHORE_ADDRESS_HIGH, 4);
+		OUT_RING  (chan, upper_32_bits(chan->id * 16));
+		OUT_RING  (chan, lower_32_bits(chan->id * 16));
+		OUT_RING  (chan, fence->sequence);
+		OUT_RING  (chan, NV84_SUBCHAN_SEMAPHORE_TRIGGER_WRITE_LONG);
+		FIRE_RING (chan);
+	}
+	return ret;
+}
+
+
+static int
+nv84_fence_sync(struct nouveau_fence *fence,
+		struct nouveau_channel *prev, struct nouveau_channel *chan)
+{
+	int ret = RING_SPACE(chan, 7);
+	if (ret == 0) {
+		BEGIN_NV04(chan, 0, NV11_SUBCHAN_DMA_SEMAPHORE, 1);
+		OUT_RING  (chan, NvSema);
+		BEGIN_NV04(chan, 0, NV84_SUBCHAN_SEMAPHORE_ADDRESS_HIGH, 4);
+		OUT_RING  (chan, upper_32_bits(prev->id * 16));
+		OUT_RING  (chan, lower_32_bits(prev->id * 16));
+		OUT_RING  (chan, fence->sequence);
+		OUT_RING  (chan, NV84_SUBCHAN_SEMAPHORE_TRIGGER_ACQUIRE_GEQUAL);
+		FIRE_RING (chan);
+	}
+	return ret;
+}
+
+static u32
+nv84_fence_read(struct nouveau_channel *chan)
+{
+	struct nv84_fence_priv *priv = nv_engine(chan->dev, NVOBJ_ENGINE_FENCE);
+	return nv_ro32(priv->mem, chan->id * 16);
+}
+
+static void
+nv84_fence_context_del(struct nouveau_channel *chan, int engine)
+{
+	struct nv84_fence_chan *fctx = chan->engctx[engine];
+	nouveau_fence_context_del(&fctx->base);
+	chan->engctx[engine] = NULL;
+	kfree(fctx);
+}
+
+static int
+nv84_fence_context_new(struct nouveau_channel *chan, int engine)
+{
+	struct nv84_fence_priv *priv = nv_engine(chan->dev, engine);
+	struct nv84_fence_chan *fctx;
+	struct nouveau_gpuobj *obj;
+	int ret;
+
+	fctx = chan->engctx[engine] = kzalloc(sizeof(*fctx), GFP_KERNEL);
+	if (!fctx)
+		return -ENOMEM;
+
+	nouveau_fence_context_new(&fctx->base);
+
+	ret = nouveau_gpuobj_dma_new(chan, NV_CLASS_DMA_FROM_MEMORY,
+				     priv->mem->vinst, priv->mem->size,
+				     NV_MEM_ACCESS_RW,
+				     NV_MEM_TARGET_VRAM, &obj);
+	if (ret == 0) {
+		ret = nouveau_ramht_insert(chan, NvSema, obj);
+		nouveau_gpuobj_ref(NULL, &obj);
+		nv_wo32(priv->mem, chan->id * 16, 0x00000000);
+	}
+
+	if (ret)
+		nv84_fence_context_del(chan, engine);
+	return ret;
+}
+
+static int
+nv84_fence_fini(struct drm_device *dev, int engine, bool suspend)
+{
+	return 0;
+}
+
+static int
+nv84_fence_init(struct drm_device *dev, int engine)
+{
+	return 0;
+}
+
+static void
+nv84_fence_destroy(struct drm_device *dev, int engine)
+{
+	struct drm_nouveau_private *dev_priv = dev->dev_private;
+	struct nv84_fence_priv *priv = nv_engine(dev, engine);
+
+	nouveau_gpuobj_ref(NULL, &priv->mem);
+	dev_priv->eng[engine] = NULL;
+	kfree(priv);
+}
+
+int
+nv84_fence_create(struct drm_device *dev)
+{
+	struct nouveau_fifo_priv *pfifo = nv_engine(dev, NVOBJ_ENGINE_FIFO);
+	struct drm_nouveau_private *dev_priv = dev->dev_private;
+	struct nv84_fence_priv *priv;
+	int ret;
+
+	priv = kzalloc(sizeof(*priv), GFP_KERNEL);
+	if (!priv)
+		return -ENOMEM;
+
+	priv->base.engine.destroy = nv84_fence_destroy;
+	priv->base.engine.init = nv84_fence_init;
+	priv->base.engine.fini = nv84_fence_fini;
+	priv->base.engine.context_new = nv84_fence_context_new;
+	priv->base.engine.context_del = nv84_fence_context_del;
+	priv->base.emit = nv84_fence_emit;
+	priv->base.sync = nv84_fence_sync;
+	priv->base.read = nv84_fence_read;
+	dev_priv->eng[NVOBJ_ENGINE_FENCE] = &priv->base.engine;
+
+	ret = nouveau_gpuobj_new(dev, NULL, 16 * pfifo->channels,
+				 0x1000, 0, &priv->mem);
+	if (ret)
+		goto out;
+
+out:
+	if (ret)
+		nv84_fence_destroy(dev, NVOBJ_ENGINE_FENCE);
+	return ret;
+}

+ 241 - 0
drivers/gpu/drm/nouveau/nv84_fifo.c

@@ -0,0 +1,241 @@
+/*
+ * Copyright (C) 2012 Ben Skeggs.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include "drmP.h"
+#include "drm.h"
+#include "nouveau_drv.h"
+#include "nouveau_fifo.h"
+#include "nouveau_ramht.h"
+#include "nouveau_vm.h"
+
+struct nv84_fifo_priv {
+	struct nouveau_fifo_priv base;
+	struct nouveau_gpuobj *playlist[2];
+	int cur_playlist;
+};
+
+struct nv84_fifo_chan {
+	struct nouveau_fifo_chan base;
+	struct nouveau_gpuobj *ramfc;
+	struct nouveau_gpuobj *cache;
+};
+
+static int
+nv84_fifo_context_new(struct nouveau_channel *chan, int engine)
+{
+	struct nv84_fifo_priv *priv = nv_engine(chan->dev, engine);
+	struct nv84_fifo_chan *fctx;
+	struct drm_device *dev = chan->dev;
+	struct drm_nouveau_private *dev_priv = dev->dev_private;
+        u64 ib_offset = chan->pushbuf_base + chan->dma.ib_base * 4;
+	u64 instance;
+	unsigned long flags;
+	int ret;
+
+	fctx = chan->engctx[engine] = kzalloc(sizeof(*fctx), GFP_KERNEL);
+	if (!fctx)
+		return -ENOMEM;
+	atomic_inc(&chan->vm->engref[engine]);
+
+	chan->user = ioremap(pci_resource_start(dev->pdev, 0) +
+			     NV50_USER(chan->id), PAGE_SIZE);
+	if (!chan->user) {
+		ret = -ENOMEM;
+		goto error;
+	}
+
+	ret = nouveau_gpuobj_new(dev, chan, 256, 256, NVOBJ_FLAG_ZERO_ALLOC |
+				 NVOBJ_FLAG_ZERO_FREE, &fctx->ramfc);
+	if (ret)
+		goto error;
+
+	instance = fctx->ramfc->vinst >> 8;
+
+	ret = nouveau_gpuobj_new(dev, chan, 4096, 1024, 0, &fctx->cache);
+	if (ret)
+		goto error;
+
+	nv_wo32(fctx->ramfc, 0x3c, 0x403f6078);
+	nv_wo32(fctx->ramfc, 0x40, 0x00000000);
+	nv_wo32(fctx->ramfc, 0x44, 0x01003fff);
+	nv_wo32(fctx->ramfc, 0x48, chan->pushbuf->cinst >> 4);
+	nv_wo32(fctx->ramfc, 0x50, lower_32_bits(ib_offset));
+	nv_wo32(fctx->ramfc, 0x54, upper_32_bits(ib_offset) |
+				   drm_order(chan->dma.ib_max + 1) << 16);
+	nv_wo32(fctx->ramfc, 0x60, 0x7fffffff);
+	nv_wo32(fctx->ramfc, 0x78, 0x00000000);
+	nv_wo32(fctx->ramfc, 0x7c, 0x30000001);
+	nv_wo32(fctx->ramfc, 0x80, ((chan->ramht->bits - 9) << 27) |
+				   (4 << 24) /* SEARCH_FULL */ |
+				   (chan->ramht->gpuobj->cinst >> 4));
+	nv_wo32(fctx->ramfc, 0x88, fctx->cache->vinst >> 10);
+	nv_wo32(fctx->ramfc, 0x98, chan->ramin->vinst >> 12);
+
+	nv_wo32(chan->ramin, 0x00, chan->id);
+	nv_wo32(chan->ramin, 0x04, fctx->ramfc->vinst >> 8);
+
+	dev_priv->engine.instmem.flush(dev);
+
+	spin_lock_irqsave(&dev_priv->context_switch_lock, flags);
+	nv_wr32(dev, 0x002600 + (chan->id * 4), 0x80000000 | instance);
+	nv50_fifo_playlist_update(dev);
+	spin_unlock_irqrestore(&dev_priv->context_switch_lock, flags);
+
+error:
+	if (ret)
+		priv->base.base.context_del(chan, engine);
+	return ret;
+}
+
+static void
+nv84_fifo_context_del(struct nouveau_channel *chan, int engine)
+{
+	struct nv84_fifo_chan *fctx = chan->engctx[engine];
+	struct drm_device *dev = chan->dev;
+	struct drm_nouveau_private *dev_priv = dev->dev_private;
+	unsigned long flags;
+
+	/* remove channel from playlist, will context switch if active */
+	spin_lock_irqsave(&dev_priv->context_switch_lock, flags);
+	nv_mask(dev, 0x002600 + (chan->id * 4), 0x80000000, 0x00000000);
+	nv50_fifo_playlist_update(dev);
+
+	/* tell any engines on this channel to unload their contexts */
+	nv_wr32(dev, 0x0032fc, chan->ramin->vinst >> 12);
+	if (!nv_wait_ne(dev, 0x0032fc, 0xffffffff, 0xffffffff))
+		NV_INFO(dev, "PFIFO: channel %d unload timeout\n", chan->id);
+
+	nv_wr32(dev, 0x002600 + (chan->id * 4), 0x00000000);
+	spin_unlock_irqrestore(&dev_priv->context_switch_lock, flags);
+
+	/* clean up */
+	if (chan->user) {
+		iounmap(chan->user);
+		chan->user = NULL;
+	}
+
+	nouveau_gpuobj_ref(NULL, &fctx->ramfc);
+	nouveau_gpuobj_ref(NULL, &fctx->cache);
+
+	atomic_dec(&chan->vm->engref[engine]);
+	chan->engctx[engine] = NULL;
+	kfree(fctx);
+}
+
+static int
+nv84_fifo_init(struct drm_device *dev, int engine)
+{
+	struct drm_nouveau_private *dev_priv = dev->dev_private;
+	struct nv84_fifo_chan *fctx;
+	u32 instance;
+	int i;
+
+	nv_mask(dev, 0x000200, 0x00000100, 0x00000000);
+	nv_mask(dev, 0x000200, 0x00000100, 0x00000100);
+	nv_wr32(dev, 0x00250c, 0x6f3cfc34);
+	nv_wr32(dev, 0x002044, 0x01003fff);
+
+	nv_wr32(dev, 0x002100, 0xffffffff);
+	nv_wr32(dev, 0x002140, 0xffffffff);
+
+	for (i = 0; i < 128; i++) {
+		struct nouveau_channel *chan = dev_priv->channels.ptr[i];
+		if (chan && (fctx = chan->engctx[engine]))
+			instance = 0x80000000 | fctx->ramfc->vinst >> 8;
+		else
+			instance = 0x00000000;
+		nv_wr32(dev, 0x002600 + (i * 4), instance);
+	}
+
+	nv50_fifo_playlist_update(dev);
+
+	nv_wr32(dev, 0x003200, 1);
+	nv_wr32(dev, 0x003250, 1);
+	nv_wr32(dev, 0x002500, 1);
+	return 0;
+}
+
+static int
+nv84_fifo_fini(struct drm_device *dev, int engine, bool suspend)
+{
+	struct drm_nouveau_private *dev_priv = dev->dev_private;
+	struct nv84_fifo_priv *priv = nv_engine(dev, engine);
+	int i;
+
+	/* set playlist length to zero, fifo will unload context */
+	nv_wr32(dev, 0x0032ec, 0);
+
+	/* tell all connected engines to unload their contexts */
+	for (i = 0; i < priv->base.channels; i++) {
+		struct nouveau_channel *chan = dev_priv->channels.ptr[i];
+		if (chan)
+			nv_wr32(dev, 0x0032fc, chan->ramin->vinst >> 12);
+		if (!nv_wait_ne(dev, 0x0032fc, 0xffffffff, 0xffffffff)) {
+			NV_INFO(dev, "PFIFO: channel %d unload timeout\n", i);
+			return -EBUSY;
+		}
+	}
+
+	nv_wr32(dev, 0x002140, 0);
+	return 0;
+}
+
+int
+nv84_fifo_create(struct drm_device *dev)
+{
+	struct drm_nouveau_private *dev_priv = dev->dev_private;
+	struct nv84_fifo_priv *priv;
+	int ret;
+
+	priv = kzalloc(sizeof(*priv), GFP_KERNEL);
+	if (!priv)
+		return -ENOMEM;
+
+	priv->base.base.destroy = nv50_fifo_destroy;
+	priv->base.base.init = nv84_fifo_init;
+	priv->base.base.fini = nv84_fifo_fini;
+	priv->base.base.context_new = nv84_fifo_context_new;
+	priv->base.base.context_del = nv84_fifo_context_del;
+	priv->base.base.tlb_flush = nv50_fifo_tlb_flush;
+	priv->base.channels = 127;
+	dev_priv->eng[NVOBJ_ENGINE_FIFO] = &priv->base.base;
+
+	ret = nouveau_gpuobj_new(dev, NULL, priv->base.channels * 4, 0x1000,
+				 NVOBJ_FLAG_ZERO_ALLOC, &priv->playlist[0]);
+	if (ret)
+		goto error;
+
+	ret = nouveau_gpuobj_new(dev, NULL, priv->base.channels * 4, 0x1000,
+				 NVOBJ_FLAG_ZERO_ALLOC, &priv->playlist[1]);
+	if (ret)
+		goto error;
+
+	nouveau_irq_register(dev, 8, nv04_fifo_isr);
+error:
+	if (ret)
+		priv->base.base.destroy(dev, NVOBJ_ENGINE_FIFO);
+	return ret;
+}

+ 152 - 14
drivers/gpu/drm/nouveau/nv98_crypt.c

@@ -23,21 +23,93 @@
  */
 
 #include "drmP.h"
+
 #include "nouveau_drv.h"
 #include "nouveau_util.h"
 #include "nouveau_vm.h"
 #include "nouveau_ramht.h"
 
-struct nv98_crypt_engine {
+#include "nv98_crypt.fuc.h"
+
+struct nv98_crypt_priv {
 	struct nouveau_exec_engine base;
 };
 
+struct nv98_crypt_chan {
+	struct nouveau_gpuobj *mem;
+};
+
 static int
-nv98_crypt_fini(struct drm_device *dev, int engine, bool suspend)
+nv98_crypt_context_new(struct nouveau_channel *chan, int engine)
+{
+	struct drm_device *dev = chan->dev;
+	struct drm_nouveau_private *dev_priv = dev->dev_private;
+	struct nv98_crypt_priv *priv = nv_engine(dev, engine);
+	struct nv98_crypt_chan *cctx;
+	int ret;
+
+	cctx = chan->engctx[engine] = kzalloc(sizeof(*cctx), GFP_KERNEL);
+	if (!cctx)
+		return -ENOMEM;
+
+	atomic_inc(&chan->vm->engref[engine]);
+
+	ret = nouveau_gpuobj_new(dev, chan, 256, 0, NVOBJ_FLAG_ZERO_ALLOC |
+				 NVOBJ_FLAG_ZERO_FREE, &cctx->mem);
+	if (ret)
+		goto error;
+
+	nv_wo32(chan->ramin, 0xa0, 0x00190000);
+	nv_wo32(chan->ramin, 0xa4, cctx->mem->vinst + cctx->mem->size - 1);
+	nv_wo32(chan->ramin, 0xa8, cctx->mem->vinst);
+	nv_wo32(chan->ramin, 0xac, 0x00000000);
+	nv_wo32(chan->ramin, 0xb0, 0x00000000);
+	nv_wo32(chan->ramin, 0xb4, 0x00000000);
+	dev_priv->engine.instmem.flush(dev);
+
+error:
+	if (ret)
+		priv->base.context_del(chan, engine);
+	return ret;
+}
+
+static void
+nv98_crypt_context_del(struct nouveau_channel *chan, int engine)
+{
+	struct nv98_crypt_chan *cctx = chan->engctx[engine];
+	int i;
+
+	for (i = 0xa0; i < 0xb4; i += 4)
+		nv_wo32(chan->ramin, i, 0x00000000);
+
+	nouveau_gpuobj_ref(NULL, &cctx->mem);
+
+	atomic_dec(&chan->vm->engref[engine]);
+	chan->engctx[engine] = NULL;
+	kfree(cctx);
+}
+
+static int
+nv98_crypt_object_new(struct nouveau_channel *chan, int engine,
+		     u32 handle, u16 class)
 {
-	if (!(nv_rd32(dev, 0x000200) & 0x00004000))
-		return 0;
+	struct nv98_crypt_chan *cctx = chan->engctx[engine];
+
+	/* fuc engine doesn't need an object, our ramht code does.. */
+	cctx->mem->engine = 5;
+	cctx->mem->class  = class;
+	return nouveau_ramht_insert(chan, handle, cctx->mem);
+}
 
+static void
+nv98_crypt_tlb_flush(struct drm_device *dev, int engine)
+{
+	nv50_vm_flush_engine(dev, 0x0a);
+}
+
+static int
+nv98_crypt_fini(struct drm_device *dev, int engine, bool suspend)
+{
 	nv_mask(dev, 0x000200, 0x00004000, 0x00000000);
 	return 0;
 }
@@ -45,34 +117,100 @@ nv98_crypt_fini(struct drm_device *dev, int engine, bool suspend)
 static int
 nv98_crypt_init(struct drm_device *dev, int engine)
 {
+	int i;
+
+	/* reset! */
 	nv_mask(dev, 0x000200, 0x00004000, 0x00000000);
 	nv_mask(dev, 0x000200, 0x00004000, 0x00004000);
+
+	/* wait for exit interrupt to signal */
+	nv_wait(dev, 0x087008, 0x00000010, 0x00000010);
+	nv_wr32(dev, 0x087004, 0x00000010);
+
+	/* upload microcode code and data segments */
+	nv_wr32(dev, 0x087ff8, 0x00100000);
+	for (i = 0; i < ARRAY_SIZE(nv98_pcrypt_code); i++)
+		nv_wr32(dev, 0x087ff4, nv98_pcrypt_code[i]);
+
+	nv_wr32(dev, 0x087ff8, 0x00000000);
+	for (i = 0; i < ARRAY_SIZE(nv98_pcrypt_data); i++)
+		nv_wr32(dev, 0x087ff4, nv98_pcrypt_data[i]);
+
+	/* start it running */
+	nv_wr32(dev, 0x08710c, 0x00000000);
+	nv_wr32(dev, 0x087104, 0x00000000); /* ENTRY */
+	nv_wr32(dev, 0x087100, 0x00000002); /* TRIGGER */
 	return 0;
 }
 
+static struct nouveau_enum nv98_crypt_isr_error_name[] = {
+	{ 0x0000, "ILLEGAL_MTHD" },
+	{ 0x0001, "INVALID_BITFIELD" },
+	{ 0x0002, "INVALID_ENUM" },
+	{ 0x0003, "QUERY" },
+	{}
+};
+
+static void
+nv98_crypt_isr(struct drm_device *dev)
+{
+	u32 disp = nv_rd32(dev, 0x08701c);
+	u32 stat = nv_rd32(dev, 0x087008) & disp & ~(disp >> 16);
+	u32 inst = nv_rd32(dev, 0x087050) & 0x3fffffff;
+	u32 ssta = nv_rd32(dev, 0x087040) & 0x0000ffff;
+	u32 addr = nv_rd32(dev, 0x087040) >> 16;
+	u32 mthd = (addr & 0x07ff) << 2;
+	u32 subc = (addr & 0x3800) >> 11;
+	u32 data = nv_rd32(dev, 0x087044);
+	int chid = nv50_graph_isr_chid(dev, inst);
+
+	if (stat & 0x00000040) {
+		NV_INFO(dev, "PCRYPT: DISPATCH_ERROR [");
+		nouveau_enum_print(nv98_crypt_isr_error_name, ssta);
+		printk("] ch %d [0x%08x] subc %d mthd 0x%04x data 0x%08x\n",
+			chid, inst, subc, mthd, data);
+		nv_wr32(dev, 0x087004, 0x00000040);
+		stat &= ~0x00000040;
+	}
+
+	if (stat) {
+		NV_INFO(dev, "PCRYPT: unhandled intr 0x%08x\n", stat);
+		nv_wr32(dev, 0x087004, stat);
+	}
+
+	nv50_fb_vm_trap(dev, 1);
+}
+
 static void
 nv98_crypt_destroy(struct drm_device *dev, int engine)
 {
-	struct nv98_crypt_engine *pcrypt = nv_engine(dev, engine);
+	struct nv98_crypt_priv *priv = nv_engine(dev, engine);
 
+	nouveau_irq_unregister(dev, 14);
 	NVOBJ_ENGINE_DEL(dev, CRYPT);
-
-	kfree(pcrypt);
+	kfree(priv);
 }
 
 int
 nv98_crypt_create(struct drm_device *dev)
 {
-	struct nv98_crypt_engine *pcrypt;
+	struct nv98_crypt_priv *priv;
 
-	pcrypt = kzalloc(sizeof(*pcrypt), GFP_KERNEL);
-	if (!pcrypt)
+	priv = kzalloc(sizeof(*priv), GFP_KERNEL);
+	if (!priv)
 		return -ENOMEM;
 
-	pcrypt->base.destroy = nv98_crypt_destroy;
-	pcrypt->base.init = nv98_crypt_init;
-	pcrypt->base.fini = nv98_crypt_fini;
+	priv->base.destroy = nv98_crypt_destroy;
+	priv->base.init = nv98_crypt_init;
+	priv->base.fini = nv98_crypt_fini;
+	priv->base.context_new = nv98_crypt_context_new;
+	priv->base.context_del = nv98_crypt_context_del;
+	priv->base.object_new = nv98_crypt_object_new;
+	priv->base.tlb_flush = nv98_crypt_tlb_flush;
+
+	nouveau_irq_register(dev, 14, nv98_crypt_isr);
 
-	NVOBJ_ENGINE_ADD(dev, CRYPT, &pcrypt->base);
+	NVOBJ_ENGINE_ADD(dev, CRYPT, &priv->base);
+	NVOBJ_CLASS(dev, 0x88b4, CRYPT);
 	return 0;
 }

+ 698 - 0
drivers/gpu/drm/nouveau/nv98_crypt.fuc

@@ -0,0 +1,698 @@
+/*
+ *  fuc microcode for nv98 pcrypt engine
+ *  Copyright (C) 2010  Marcin Kościelnicki
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ */
+
+.section #nv98_pcrypt_data
+
+ctx_dma:
+ctx_dma_query:		.b32 0
+ctx_dma_src:		.b32 0
+ctx_dma_dst:		.b32 0
+.equ #dma_count 3
+ctx_query_address_high:	.b32 0
+ctx_query_address_low:	.b32 0
+ctx_query_counter:	.b32 0
+ctx_cond_address_high:	.b32 0
+ctx_cond_address_low:	.b32 0
+ctx_cond_off:		.b32 0
+ctx_src_address_high:	.b32 0
+ctx_src_address_low:	.b32 0
+ctx_dst_address_high:	.b32 0
+ctx_dst_address_low:	.b32 0
+ctx_mode:		.b32 0
+.align 16
+ctx_key:		.skip 16
+ctx_iv:			.skip 16
+
+.align 0x80
+swap:
+.skip 32
+
+.align 8
+common_cmd_dtable:
+.b32 #ctx_query_address_high + 0x20000 ~0xff
+.b32 #ctx_query_address_low + 0x20000 ~0xfffffff0
+.b32 #ctx_query_counter + 0x20000 ~0xffffffff
+.b32 #cmd_query_get + 0x00000 ~1
+.b32 #ctx_cond_address_high + 0x20000 ~0xff
+.b32 #ctx_cond_address_low + 0x20000 ~0xfffffff0
+.b32 #cmd_cond_mode + 0x00000 ~7
+.b32 #cmd_wrcache_flush + 0x00000 ~0
+.equ #common_cmd_max 0x88
+
+
+.align 8
+engine_cmd_dtable:
+.b32 #ctx_key + 0x0 + 0x20000 ~0xffffffff
+.b32 #ctx_key + 0x4 + 0x20000 ~0xffffffff
+.b32 #ctx_key + 0x8 + 0x20000 ~0xffffffff
+.b32 #ctx_key + 0xc + 0x20000 ~0xffffffff
+.b32 #ctx_iv + 0x0 + 0x20000 ~0xffffffff
+.b32 #ctx_iv + 0x4 + 0x20000 ~0xffffffff
+.b32 #ctx_iv + 0x8 + 0x20000 ~0xffffffff
+.b32 #ctx_iv + 0xc + 0x20000 ~0xffffffff
+.b32 #ctx_src_address_high + 0x20000 ~0xff
+.b32 #ctx_src_address_low + 0x20000 ~0xfffffff0
+.b32 #ctx_dst_address_high + 0x20000 ~0xff
+.b32 #ctx_dst_address_low + 0x20000 ~0xfffffff0
+.b32 #crypt_cmd_mode + 0x00000 ~0xf
+.b32 #crypt_cmd_length + 0x10000 ~0x0ffffff0
+.equ #engine_cmd_max 0xce
+
+.align 4
+crypt_dtable:
+.b16 #crypt_copy_prep #crypt_do_inout
+.b16 #crypt_store_prep #crypt_do_out
+.b16 #crypt_ecb_e_prep #crypt_do_inout
+.b16 #crypt_ecb_d_prep #crypt_do_inout
+.b16 #crypt_cbc_e_prep #crypt_do_inout
+.b16 #crypt_cbc_d_prep #crypt_do_inout
+.b16 #crypt_pcbc_e_prep #crypt_do_inout
+.b16 #crypt_pcbc_d_prep #crypt_do_inout
+.b16 #crypt_cfb_e_prep #crypt_do_inout
+.b16 #crypt_cfb_d_prep #crypt_do_inout
+.b16 #crypt_ofb_prep #crypt_do_inout
+.b16 #crypt_ctr_prep #crypt_do_inout
+.b16 #crypt_cbc_mac_prep #crypt_do_in
+.b16 #crypt_cmac_finish_complete_prep #crypt_do_in
+.b16 #crypt_cmac_finish_partial_prep #crypt_do_in
+
+.align 0x100
+
+.section #nv98_pcrypt_code
+
+	// $r0 is always set to 0 in our code - this allows some space savings.
+	clear b32 $r0
+
+	// set up the interrupt handler
+	mov $r1 #ih
+	mov $iv0 $r1
+
+	// init stack pointer
+	mov $sp $r0
+
+	// set interrupt dispatch - route timer, fifo, ctxswitch to i0, others to host
+	movw $r1 0xfff0
+	sethi $r1 0
+	mov $r2 0x400
+	iowr I[$r2 + 0x300] $r1
+
+	// enable the interrupts
+	or $r1 0xc
+	iowr I[$r2] $r1
+
+	// enable fifo access and context switching
+	mov $r1 3
+	mov $r2 0x1200
+	iowr I[$r2] $r1
+
+	// enable i0 delivery
+	bset $flags ie0
+
+	// sleep forver, waking only for interrupts.
+	bset $flags $p0
+	spin:
+	sleep $p0
+	bra #spin
+
+// i0 handler
+ih:
+	// see which interrupts we got
+	iord $r1 I[$r0 + 0x200]
+
+	and $r2 $r1 0x8
+	cmpu b32 $r2 0
+	bra e #noctx
+
+		// context switch... prepare the regs for xfer
+		mov $r2 0x7700
+		mov $xtargets $r2
+		mov $xdbase $r0
+		// 128-byte context.
+		mov $r2 0
+		sethi $r2 0x50000
+
+		// read current channel
+		mov $r3 0x1400
+		iord $r4 I[$r3]
+		// if bit 30 set, it's active, so we have to unload it first.
+		shl b32 $r5 $r4 1
+		cmps b32 $r5 0
+		bra nc #ctxload
+
+			// unload the current channel - save the context
+			xdst $r0 $r2
+			xdwait
+			// and clear bit 30, then write back
+			bclr $r4 0x1e
+			iowr I[$r3] $r4
+			// tell PFIFO we unloaded
+			mov $r4 1
+			iowr I[$r3 + 0x200] $r4
+
+		bra #noctx
+
+		ctxload:
+			// no channel loaded - perhaps we're requested to load one
+			iord $r4 I[$r3 + 0x100]
+			shl b32 $r15 $r4 1
+			cmps b32 $r15 0
+			// if bit 30 of next channel not set, probably PFIFO is just
+			// killing a context. do a faux load, without the active bit.
+			bra nc #dummyload
+
+				// ok, do a real context load.
+				xdld $r0 $r2
+				xdwait
+				mov $r5 #ctx_dma
+				mov $r6 #dma_count - 1
+				ctxload_dma_loop:
+					ld b32 $r7 D[$r5 + $r6 * 4]
+					add b32 $r8 $r6 0x180
+					shl b32 $r8 8
+					iowr I[$r8] $r7
+					sub b32 $r6 1
+				bra nc #ctxload_dma_loop
+
+			dummyload:
+			// tell PFIFO we're done
+			mov $r5 2
+			iowr I[$r3 + 0x200] $r5
+
+	noctx:
+	and $r2 $r1 0x4
+	cmpu b32 $r2 0
+	bra e #nocmd
+
+		// incoming fifo command.
+		mov $r3 0x1900
+		iord $r2 I[$r3 + 0x100]
+		iord $r3 I[$r3]
+		// extract the method
+		and $r4 $r2 0x7ff
+		// shift the addr to proper position if we need to interrupt later
+		shl b32 $r2 0x10
+
+		// mthd 0 and 0x100 [NAME, NOP]: ignore
+		and $r5 $r4 0x7bf
+		cmpu b32 $r5 0
+		bra e #cmddone
+
+		mov $r5 #engine_cmd_dtable - 0xc0 * 8
+		mov $r6 #engine_cmd_max
+		cmpu b32 $r4 0xc0
+		bra nc #dtable_cmd
+		mov $r5 #common_cmd_dtable - 0x80 * 8
+		mov $r6 #common_cmd_max
+		cmpu b32 $r4 0x80
+		bra nc #dtable_cmd
+		cmpu b32 $r4 0x60
+		bra nc #dma_cmd
+		cmpu b32 $r4 0x50
+		bra ne #illegal_mthd
+
+			// mthd 0x140: PM_TRIGGER
+			mov $r2 0x2200
+			clear b32 $r3
+			sethi $r3 0x20000
+			iowr I[$r2] $r3
+			bra #cmddone
+
+		dma_cmd:
+			// mthd 0x180...: DMA_*
+			cmpu b32 $r4 0x60+#dma_count
+			bra nc #illegal_mthd
+			shl b32 $r5 $r4 2
+			add b32 $r5 (#ctx_dma - 0x60 * 4) & 0xffff
+			bset $r3 0x1e
+			st b32 D[$r5] $r3
+			add b32 $r4 0x180 - 0x60
+			shl b32 $r4 8
+			iowr I[$r4] $r3
+			bra #cmddone
+
+		dtable_cmd:
+			cmpu b32 $r4 $r6
+			bra nc #illegal_mthd
+			shl b32 $r4 3
+			add b32 $r4 $r5
+			ld b32 $r5 D[$r4 + 4]
+			and $r5 $r3
+			cmpu b32 $r5 0
+			bra ne #invalid_bitfield
+			ld b16 $r5 D[$r4]
+			ld b16 $r6 D[$r4 + 2]
+			cmpu b32 $r6 2
+			bra e #cmd_setctx
+			ld b32 $r7 D[$r0 + #ctx_cond_off]
+			and $r6 $r7
+			cmpu b32 $r6 1
+			bra e #cmddone
+			call $r5
+			bra $p1 #dispatch_error
+			bra #cmddone
+
+		cmd_setctx:
+			st b32 D[$r5] $r3
+			bra #cmddone
+
+
+		invalid_bitfield:
+			or $r2 1
+		dispatch_error:
+		illegal_mthd:
+			mov $r4 0x1000
+			iowr I[$r4] $r2
+			iowr I[$r4 + 0x100] $r3
+			mov $r4 0x40
+			iowr I[$r0] $r4
+
+			im_loop:
+				iord $r4 I[$r0 + 0x200]
+				and $r4 0x40
+				cmpu b32 $r4 0
+			bra ne #im_loop
+
+		cmddone:
+		// remove the command from FIFO
+		mov $r3 0x1d00
+		mov $r4 1
+		iowr I[$r3] $r4
+
+	nocmd:
+	// ack the processed interrupts
+	and $r1 $r1 0xc
+	iowr I[$r0 + 0x100] $r1
+iret
+
+cmd_query_get:
+	// if bit 0 of param set, trigger interrupt afterwards.
+	setp $p1 $r3
+	or $r2 3
+
+	// read PTIMER, beware of races...
+	mov $r4 0xb00
+	ptimer_retry:
+		iord $r6 I[$r4 + 0x100]
+		iord $r5 I[$r4]
+		iord $r7 I[$r4 + 0x100]
+		cmpu b32 $r6 $r7
+	bra ne #ptimer_retry
+
+	// prepare the query structure
+	ld b32 $r4 D[$r0 + #ctx_query_counter]
+	st b32 D[$r0 + #swap + 0x0] $r4
+	st b32 D[$r0 + #swap + 0x4] $r0
+	st b32 D[$r0 + #swap + 0x8] $r5
+	st b32 D[$r0 + #swap + 0xc] $r6
+
+	// will use target 0, DMA_QUERY.
+	mov $xtargets $r0
+
+	ld b32 $r4 D[$r0 + #ctx_query_address_high]
+	shl b32 $r4 0x18
+	mov $xdbase $r4
+
+	ld b32 $r4 D[$r0 + #ctx_query_address_low]
+	mov $r5 #swap
+	sethi $r5 0x20000
+	xdst $r4 $r5
+	xdwait
+
+	ret
+
+cmd_cond_mode:
+	// if >= 5, INVALID_ENUM
+	bset $flags $p1
+	or $r2 2
+	cmpu b32 $r3 5
+	bra nc #return
+
+	// otherwise, no error.
+	bclr $flags $p1
+
+	// if < 2, no QUERY object is involved
+	cmpu b32 $r3 2
+	bra nc #cmd_cond_mode_queryful
+
+		xor $r3 1
+		st b32 D[$r0 + #ctx_cond_off] $r3
+	return:
+		ret
+
+	cmd_cond_mode_queryful:
+	// ok, will need to pull a QUERY object, prepare offsets
+	ld b32 $r4 D[$r0 + #ctx_cond_address_high]
+	ld b32 $r5 D[$r0 + #ctx_cond_address_low]
+	and $r6 $r5 0xff
+	shr b32 $r5 8
+	shl b32 $r4 0x18
+	or $r4 $r5
+	mov $xdbase $r4
+	mov $xtargets $r0
+
+	// pull the first one
+	mov $r5 #swap
+	sethi $r5 0x20000
+	xdld $r6 $r5
+
+	// if == 2, only a single QUERY is involved...
+	cmpu b32 $r3 2
+	bra ne #cmd_cond_mode_double
+
+		xdwait
+		ld b32 $r4 D[$r0 + #swap + 4]
+		cmpu b32 $r4 0
+		xbit $r4 $flags z
+		st b32 D[$r0 + #ctx_cond_off] $r4
+		ret
+
+	// ok, we'll need to pull second one too
+	cmd_cond_mode_double:
+	add b32 $r6 0x10
+	add b32 $r5 0x10
+	xdld $r6 $r5
+	xdwait
+
+	// compare COUNTERs
+	ld b32 $r5 D[$r0 + #swap + 0x00]
+	ld b32 $r6 D[$r0 + #swap + 0x10]
+	cmpu b32 $r5 $r6
+	xbit $r4 $flags z
+
+	// compare RESen
+	ld b32 $r5 D[$r0 + #swap + 0x04]
+	ld b32 $r6 D[$r0 + #swap + 0x14]
+	cmpu b32 $r5 $r6
+	xbit $r5 $flags z
+	and $r4 $r5
+
+	// and negate or not, depending on mode
+	cmpu b32 $r3 3
+	xbit $r5 $flags z
+	xor $r4 $r5
+	st b32 D[$r0 + #ctx_cond_off] $r4
+	ret
+
+cmd_wrcache_flush:
+	bclr $flags $p1
+	mov $r2 0x2200
+	clear b32 $r3
+	sethi $r3 0x10000
+	iowr I[$r2] $r3
+	ret
+
+crypt_cmd_mode:
+	// if >= 0xf, INVALID_ENUM
+	bset $flags $p1
+	or $r2 2
+	cmpu b32 $r3 0xf
+	bra nc #crypt_cmd_mode_return
+
+		bclr $flags $p1
+		st b32 D[$r0 + #ctx_mode] $r3
+
+	crypt_cmd_mode_return:
+	ret
+
+crypt_cmd_length:
+	// nop if length == 0
+	cmpu b32 $r3 0
+	bra e #crypt_cmd_mode_return
+
+	// init key, IV
+	cxset 3
+	mov $r4 #ctx_key
+	sethi $r4 0x70000
+	xdst $r0 $r4
+	mov $r4 #ctx_iv
+	sethi $r4 0x60000
+	xdst $r0 $r4
+	xdwait
+	ckeyreg $c7
+
+	// prepare the targets
+	mov $r4 0x2100
+	mov $xtargets $r4
+
+	// prepare src address
+	ld b32 $r4 D[$r0 + #ctx_src_address_high]
+	ld b32 $r5 D[$r0 + #ctx_src_address_low]
+	shr b32 $r8 $r5 8
+	shl b32 $r4 0x18
+	or $r4 $r8
+	and $r5 $r5 0xff
+
+	// prepare dst address
+	ld b32 $r6 D[$r0 + #ctx_dst_address_high]
+	ld b32 $r7 D[$r0 + #ctx_dst_address_low]
+	shr b32 $r8 $r7 8
+	shl b32 $r6 0x18
+	or $r6 $r8
+	and $r7 $r7 0xff
+
+	// find the proper prep & do functions
+	ld b32 $r8 D[$r0 + #ctx_mode]
+	shl b32 $r8 2
+
+	// run prep
+	ld b16 $r9 D[$r8 + #crypt_dtable]
+	call $r9
+
+	// do it
+	ld b16 $r9 D[$r8 + #crypt_dtable + 2]
+	call $r9
+	cxset 1
+	xdwait
+	cxset 0x61
+	xdwait
+	xdwait
+
+	// update src address
+	shr b32 $r8 $r4 0x18
+	shl b32 $r9 $r4 8
+	add b32 $r9 $r5
+	adc b32 $r8 0
+	st b32 D[$r0 + #ctx_src_address_high] $r8
+	st b32 D[$r0 + #ctx_src_address_low] $r9
+
+	// update dst address
+	shr b32 $r8 $r6 0x18
+	shl b32 $r9 $r6 8
+	add b32 $r9 $r7
+	adc b32 $r8 0
+	st b32 D[$r0 + #ctx_dst_address_high] $r8
+	st b32 D[$r0 + #ctx_dst_address_low] $r9
+
+	// pull updated IV
+	cxset 2
+	mov $r4 #ctx_iv
+	sethi $r4 0x60000
+	xdld $r0 $r4
+	xdwait
+
+	ret
+
+
+crypt_copy_prep:
+	cs0begin 2
+		cxsin $c0
+		cxsout $c0
+	ret
+
+crypt_store_prep:
+	cs0begin 1
+		cxsout $c6
+	ret
+
+crypt_ecb_e_prep:
+	cs0begin 3
+		cxsin $c0
+		cenc $c0 $c0
+		cxsout $c0
+	ret
+
+crypt_ecb_d_prep:
+	ckexp $c7 $c7
+	cs0begin 3
+		cxsin $c0
+		cdec $c0 $c0
+		cxsout $c0
+	ret
+
+crypt_cbc_e_prep:
+	cs0begin 4
+		cxsin $c0
+		cxor $c6 $c0
+		cenc $c6 $c6
+		cxsout $c6
+	ret
+
+crypt_cbc_d_prep:
+	ckexp $c7 $c7
+	cs0begin 5
+		cmov $c2 $c6
+		cxsin $c6
+		cdec $c0 $c6
+		cxor $c0 $c2
+		cxsout $c0
+	ret
+
+crypt_pcbc_e_prep:
+	cs0begin 5
+		cxsin $c0
+		cxor $c6 $c0
+		cenc $c6 $c6
+		cxsout $c6
+		cxor $c6 $c0
+	ret
+
+crypt_pcbc_d_prep:
+	ckexp $c7 $c7
+	cs0begin 5
+		cxsin $c0
+		cdec $c1 $c0
+		cxor $c6 $c1
+		cxsout $c6
+		cxor $c6 $c0
+	ret
+
+crypt_cfb_e_prep:
+	cs0begin 4
+		cenc $c6 $c6
+		cxsin $c0
+		cxor $c6 $c0
+		cxsout $c6
+	ret
+
+crypt_cfb_d_prep:
+	cs0begin 4
+		cenc $c0 $c6
+		cxsin $c6
+		cxor $c0 $c6
+		cxsout $c0
+	ret
+
+crypt_ofb_prep:
+	cs0begin 4
+		cenc $c6 $c6
+		cxsin $c0
+		cxor $c0 $c6
+		cxsout $c0
+	ret
+
+crypt_ctr_prep:
+	cs0begin 5
+		cenc $c1 $c6
+		cadd $c6 1
+		cxsin $c0
+		cxor $c0 $c1
+		cxsout $c0
+	ret
+
+crypt_cbc_mac_prep:
+	cs0begin 3
+		cxsin $c0
+		cxor $c6 $c0
+		cenc $c6 $c6
+	ret
+
+crypt_cmac_finish_complete_prep:
+	cs0begin 7
+		cxsin $c0
+		cxor $c6 $c0
+		cxor $c0 $c0
+		cenc $c0 $c0
+		cprecmac $c0 $c0
+		cxor $c6 $c0
+		cenc $c6 $c6
+	ret
+
+crypt_cmac_finish_partial_prep:
+	cs0begin 8
+		cxsin $c0
+		cxor $c6 $c0
+		cxor $c0 $c0
+		cenc $c0 $c0
+		cprecmac $c0 $c0
+		cprecmac $c0 $c0
+		cxor $c6 $c0
+		cenc $c6 $c6
+	ret
+
+// TODO
+crypt_do_in:
+	add b32 $r3 $r5
+	mov $xdbase $r4
+	mov $r9 #swap
+	sethi $r9 0x20000
+	crypt_do_in_loop:
+		xdld $r5 $r9
+		xdwait
+		cxset 0x22
+		xdst $r0 $r9
+		cs0exec 1
+		xdwait
+		add b32 $r5 0x10
+		cmpu b32 $r5 $r3
+	bra ne #crypt_do_in_loop
+	cxset 1
+	xdwait
+	ret
+
+crypt_do_out:
+	add b32 $r3 $r7
+	mov $xdbase $r6
+	mov $r9 #swap
+	sethi $r9 0x20000
+	crypt_do_out_loop:
+		cs0exec 1
+		cxset 0x61
+		xdld $r7 $r9
+		xdst $r7 $r9
+		cxset 1
+		xdwait
+		add b32 $r7 0x10
+		cmpu b32 $r7 $r3
+	bra ne #crypt_do_out_loop
+	ret
+
+crypt_do_inout:
+	add b32 $r3 $r5
+	mov $r9 #swap
+	sethi $r9 0x20000
+	crypt_do_inout_loop:
+		mov $xdbase $r4
+		xdld $r5 $r9
+		xdwait
+		cxset 0x21
+		xdst $r0 $r9
+		cs0exec 1
+		cxset 0x61
+		mov $xdbase $r6
+		xdld $r7 $r9
+		xdst $r7 $r9
+		cxset 1
+		xdwait
+		add b32 $r5 0x10
+		add b32 $r7 0x10
+		cmpu b32 $r5 $r3
+	bra ne #crypt_do_inout_loop
+	ret
+
+.align 0x100

+ 584 - 0
drivers/gpu/drm/nouveau/nv98_crypt.fuc.h

@@ -0,0 +1,584 @@
+uint32_t nv98_pcrypt_data[] = {
+/* 0x0000: ctx_dma */
+/* 0x0000: ctx_dma_query */
+	0x00000000,
+/* 0x0004: ctx_dma_src */
+	0x00000000,
+/* 0x0008: ctx_dma_dst */
+	0x00000000,
+/* 0x000c: ctx_query_address_high */
+	0x00000000,
+/* 0x0010: ctx_query_address_low */
+	0x00000000,
+/* 0x0014: ctx_query_counter */
+	0x00000000,
+/* 0x0018: ctx_cond_address_high */
+	0x00000000,
+/* 0x001c: ctx_cond_address_low */
+	0x00000000,
+/* 0x0020: ctx_cond_off */
+	0x00000000,
+/* 0x0024: ctx_src_address_high */
+	0x00000000,
+/* 0x0028: ctx_src_address_low */
+	0x00000000,
+/* 0x002c: ctx_dst_address_high */
+	0x00000000,
+/* 0x0030: ctx_dst_address_low */
+	0x00000000,
+/* 0x0034: ctx_mode */
+	0x00000000,
+	0x00000000,
+	0x00000000,
+/* 0x0040: ctx_key */
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+/* 0x0050: ctx_iv */
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+/* 0x0080: swap */
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+/* 0x00a0: common_cmd_dtable */
+	0x0002000c,
+	0xffffff00,
+	0x00020010,
+	0x0000000f,
+	0x00020014,
+	0x00000000,
+	0x00000192,
+	0xfffffffe,
+	0x00020018,
+	0xffffff00,
+	0x0002001c,
+	0x0000000f,
+	0x000001d7,
+	0xfffffff8,
+	0x00000260,
+	0xffffffff,
+/* 0x00e0: engine_cmd_dtable */
+	0x00020040,
+	0x00000000,
+	0x00020044,
+	0x00000000,
+	0x00020048,
+	0x00000000,
+	0x0002004c,
+	0x00000000,
+	0x00020050,
+	0x00000000,
+	0x00020054,
+	0x00000000,
+	0x00020058,
+	0x00000000,
+	0x0002005c,
+	0x00000000,
+	0x00020024,
+	0xffffff00,
+	0x00020028,
+	0x0000000f,
+	0x0002002c,
+	0xffffff00,
+	0x00020030,
+	0x0000000f,
+	0x00000271,
+	0xfffffff0,
+	0x00010285,
+	0xf000000f,
+/* 0x0150: crypt_dtable */
+	0x04db0321,
+	0x04b1032f,
+	0x04db0339,
+	0x04db034b,
+	0x04db0361,
+	0x04db0377,
+	0x04db0395,
+	0x04db03af,
+	0x04db03cd,
+	0x04db03e3,
+	0x04db03f9,
+	0x04db040f,
+	0x04830429,
+	0x0483043b,
+	0x0483045d,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+};
+
+uint32_t nv98_pcrypt_code[] = {
+	0x17f004bd,
+	0x0010fe35,
+	0xf10004fe,
+	0xf0fff017,
+	0x27f10013,
+	0x21d00400,
+	0x0c15f0c0,
+	0xf00021d0,
+	0x27f10317,
+	0x21d01200,
+	0x1031f400,
+/* 0x002f: spin */
+	0xf40031f4,
+	0x0ef40028,
+/* 0x0035: ih */
+	0x8001cffd,
+	0xb00812c4,
+	0x0bf40024,
+	0x0027f167,
+	0x002bfe77,
+	0xf00007fe,
+	0x23f00027,
+	0x0037f105,
+	0x0034cf14,
+	0xb0014594,
+	0x18f40055,
+	0x0602fa17,
+	0x4af003f8,
+	0x0034d01e,
+	0xd00147f0,
+	0x0ef48034,
+/* 0x0075: ctxload */
+	0x4034cf33,
+	0xb0014f94,
+	0x18f400f5,
+	0x0502fa21,
+	0x57f003f8,
+	0x0267f000,
+/* 0x008c: ctxload_dma_loop */
+	0xa07856bc,
+	0xb6018068,
+	0x87d00884,
+	0x0162b600,
+/* 0x009f: dummyload */
+	0xf0f018f4,
+	0x35d00257,
+/* 0x00a5: noctx */
+	0x0412c480,
+	0xf50024b0,
+	0xf100df0b,
+	0xcf190037,
+	0x33cf4032,
+	0xff24e400,
+	0x1024b607,
+	0x07bf45e4,
+	0xf50054b0,
+	0xf100b90b,
+	0xf1fae057,
+	0xb000ce67,
+	0x18f4c044,
+	0xa057f14d,
+	0x8867f1fc,
+	0x8044b000,
+	0xb03f18f4,
+	0x18f46044,
+	0x5044b019,
+	0xf1741bf4,
+	0xbd220027,
+	0x0233f034,
+	0xf50023d0,
+/* 0x0103: dma_cmd */
+	0xb000810e,
+	0x18f46344,
+	0x0245945e,
+	0xfe8050b7,
+	0x801e39f0,
+	0x40b70053,
+	0x44b60120,
+	0x0043d008,
+/* 0x0123: dtable_cmd */
+	0xb8600ef4,
+	0x18f40446,
+	0x0344b63e,
+	0x980045bb,
+	0x53fd0145,
+	0x0054b004,
+	0x58291bf4,
+	0x46580045,
+	0x0264b001,
+	0x98170bf4,
+	0x67fd0807,
+	0x0164b004,
+	0xf9300bf4,
+	0x0f01f455,
+/* 0x015b: cmd_setctx */
+	0x80280ef4,
+	0x0ef40053,
+/* 0x0161: invalid_bitfield */
+	0x0125f022,
+/* 0x0164: dispatch_error */
+/* 0x0164: illegal_mthd */
+	0x100047f1,
+	0xd00042d0,
+	0x47f04043,
+	0x0004d040,
+/* 0x0174: im_loop */
+	0xf08004cf,
+	0x44b04044,
+	0xf71bf400,
+/* 0x0180: cmddone */
+	0x1d0037f1,
+	0xd00147f0,
+/* 0x018a: nocmd */
+	0x11c40034,
+	0x4001d00c,
+/* 0x0192: cmd_query_get */
+	0x38f201f8,
+	0x0325f001,
+	0x0b0047f1,
+/* 0x019c: ptimer_retry */
+	0xcf4046cf,
+	0x47cf0045,
+	0x0467b840,
+	0x98f41bf4,
+	0x04800504,
+	0x21008020,
+	0x80220580,
+	0x0bfe2306,
+	0x03049800,
+	0xfe1844b6,
+	0x04980047,
+	0x8057f104,
+	0x0253f000,
+	0xf80645fa,
+/* 0x01d7: cmd_cond_mode */
+	0xf400f803,
+	0x25f00131,
+	0x0534b002,
+	0xf41218f4,
+	0x34b00132,
+	0x0b18f402,
+	0x800136f0,
+/* 0x01f2: return */
+	0x00f80803,
+/* 0x01f4: cmd_cond_mode_queryful */
+	0x98060498,
+	0x56c40705,
+	0x0855b6ff,
+	0xfd1844b6,
+	0x47fe0545,
+	0x000bfe00,
+	0x008057f1,
+	0xfa0253f0,
+	0x34b00565,
+	0x131bf402,
+	0x049803f8,
+	0x0044b021,
+	0x800b4cf0,
+	0x00f80804,
+/* 0x022c: cmd_cond_mode_double */
+	0xb61060b6,
+	0x65fa1050,
+	0x9803f805,
+	0x06982005,
+	0x0456b824,
+	0x980b4cf0,
+	0x06982105,
+	0x0456b825,
+	0xfd0b5cf0,
+	0x34b00445,
+	0x0b5cf003,
+	0x800645fd,
+	0x00f80804,
+/* 0x0260: cmd_wrcache_flush */
+	0xf10132f4,
+	0xbd220027,
+	0x0133f034,
+	0xf80023d0,
+/* 0x0271: crypt_cmd_mode */
+	0x0131f400,
+	0xb00225f0,
+	0x18f40f34,
+	0x0132f409,
+/* 0x0283: crypt_cmd_mode_return */
+	0xf80d0380,
+/* 0x0285: crypt_cmd_length */
+	0x0034b000,
+	0xf4fb0bf4,
+	0x47f0033c,
+	0x0743f040,
+	0xf00604fa,
+	0x43f05047,
+	0x0604fa06,
+	0x3cf503f8,
+	0x47f1c407,
+	0x4bfe2100,
+	0x09049800,
+	0x950a0598,
+	0x44b60858,
+	0x0548fd18,
+	0x98ff55c4,
+	0x07980b06,
+	0x0878950c,
+	0xfd1864b6,
+	0x77c40568,
+	0x0d0898ff,
+	0x580284b6,
+	0x95f9a889,
+	0xf9a98958,
+	0x013cf495,
+	0x3cf403f8,
+	0xf803f861,
+	0x18489503,
+	0xbb084994,
+	0x81b60095,
+	0x09088000,
+	0x950a0980,
+	0x69941868,
+	0x0097bb08,
+	0x800081b6,
+	0x09800b08,
+	0x023cf40c,
+	0xf05047f0,
+	0x04fa0643,
+	0xf803f805,
+/* 0x0321: crypt_copy_prep */
+	0x203cf500,
+	0x003cf594,
+	0x003cf588,
+/* 0x032f: crypt_store_prep */
+	0xf500f88c,
+	0xf594103c,
+	0xf88c063c,
+/* 0x0339: crypt_ecb_e_prep */
+	0x303cf500,
+	0x003cf594,
+	0x003cf588,
+	0x003cf5d0,
+/* 0x034b: crypt_ecb_d_prep */
+	0xf500f88c,
+	0xf5c8773c,
+	0xf594303c,
+	0xf588003c,
+	0xf5d4003c,
+	0xf88c003c,
+/* 0x0361: crypt_cbc_e_prep */
+	0x403cf500,
+	0x003cf594,
+	0x063cf588,
+	0x663cf5ac,
+	0x063cf5d0,
+/* 0x0377: crypt_cbc_d_prep */
+	0xf500f88c,
+	0xf5c8773c,
+	0xf594503c,
+	0xf584623c,
+	0xf588063c,
+	0xf5d4603c,
+	0xf5ac203c,
+	0xf88c003c,
+/* 0x0395: crypt_pcbc_e_prep */
+	0x503cf500,
+	0x003cf594,
+	0x063cf588,
+	0x663cf5ac,
+	0x063cf5d0,
+	0x063cf58c,
+/* 0x03af: crypt_pcbc_d_prep */
+	0xf500f8ac,
+	0xf5c8773c,
+	0xf594503c,
+	0xf588003c,
+	0xf5d4013c,
+	0xf5ac163c,
+	0xf58c063c,
+	0xf8ac063c,
+/* 0x03cd: crypt_cfb_e_prep */
+	0x403cf500,
+	0x663cf594,
+	0x003cf5d0,
+	0x063cf588,
+	0x063cf5ac,
+/* 0x03e3: crypt_cfb_d_prep */
+	0xf500f88c,
+	0xf594403c,
+	0xf5d0603c,
+	0xf588063c,
+	0xf5ac603c,
+	0xf88c003c,
+/* 0x03f9: crypt_ofb_prep */
+	0x403cf500,
+	0x663cf594,
+	0x003cf5d0,
+	0x603cf588,
+	0x003cf5ac,
+/* 0x040f: crypt_ctr_prep */
+	0xf500f88c,
+	0xf594503c,
+	0xf5d0613c,
+	0xf5b0163c,
+	0xf588003c,
+	0xf5ac103c,
+	0xf88c003c,
+/* 0x0429: crypt_cbc_mac_prep */
+	0x303cf500,
+	0x003cf594,
+	0x063cf588,
+	0x663cf5ac,
+/* 0x043b: crypt_cmac_finish_complete_prep */
+	0xf500f8d0,
+	0xf594703c,
+	0xf588003c,
+	0xf5ac063c,
+	0xf5ac003c,
+	0xf5d0003c,
+	0xf5bc003c,
+	0xf5ac063c,
+	0xf8d0663c,
+/* 0x045d: crypt_cmac_finish_partial_prep */
+	0x803cf500,
+	0x003cf594,
+	0x063cf588,
+	0x003cf5ac,
+	0x003cf5ac,
+	0x003cf5d0,
+	0x003cf5bc,
+	0x063cf5bc,
+	0x663cf5ac,
+/* 0x0483: crypt_do_in */
+	0xbb00f8d0,
+	0x47fe0035,
+	0x8097f100,
+	0x0293f000,
+/* 0x0490: crypt_do_in_loop */
+	0xf80559fa,
+	0x223cf403,
+	0xf50609fa,
+	0xf898103c,
+	0x1050b603,
+	0xf40453b8,
+	0x3cf4e91b,
+	0xf803f801,
+/* 0x04b1: crypt_do_out */
+	0x0037bb00,
+	0xf10067fe,
+	0xf0008097,
+/* 0x04be: crypt_do_out_loop */
+	0x3cf50293,
+	0x3cf49810,
+	0x0579fa61,
+	0xf40679fa,
+	0x03f8013c,
+	0xb81070b6,
+	0x1bf40473,
+/* 0x04db: crypt_do_inout */
+	0xbb00f8e8,
+	0x97f10035,
+	0x93f00080,
+/* 0x04e5: crypt_do_inout_loop */
+	0x0047fe02,
+	0xf80559fa,
+	0x213cf403,
+	0xf50609fa,
+	0xf498103c,
+	0x67fe613c,
+	0x0579fa00,
+	0xf40679fa,
+	0x03f8013c,
+	0xb61050b6,
+	0x53b81070,
+	0xd41bf404,
+	0x000000f8,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+};

+ 4 - 27
drivers/gpu/drm/nouveau/nva3_copy.c

@@ -79,29 +79,13 @@ static void
 nva3_copy_context_del(struct nouveau_channel *chan, int engine)
 {
 	struct nouveau_gpuobj *ctx = chan->engctx[engine];
-	struct drm_device *dev = chan->dev;
-	u32 inst;
-
-	inst  = (chan->ramin->vinst >> 12);
-	inst |= 0x40000000;
-
-	/* disable fifo access */
-	nv_wr32(dev, 0x104048, 0x00000000);
-	/* mark channel as unloaded if it's currently active */
-	if (nv_rd32(dev, 0x104050) == inst)
-		nv_mask(dev, 0x104050, 0x40000000, 0x00000000);
-	/* mark next channel as invalid if it's about to be loaded */
-	if (nv_rd32(dev, 0x104054) == inst)
-		nv_mask(dev, 0x104054, 0x40000000, 0x00000000);
-	/* restore fifo access */
-	nv_wr32(dev, 0x104048, 0x00000003);
+	int i;
 
-	for (inst = 0xc0; inst <= 0xd4; inst += 4)
-		nv_wo32(chan->ramin, inst, 0x00000000);
-
-	nouveau_gpuobj_ref(NULL, &ctx);
+	for (i = 0xc0; i <= 0xd4; i += 4)
+		nv_wo32(chan->ramin, i, 0x00000000);
 
 	atomic_dec(&chan->vm->engref[engine]);
+	nouveau_gpuobj_ref(NULL, &ctx);
 	chan->engctx[engine] = ctx;
 }
 
@@ -143,13 +127,6 @@ static int
 nva3_copy_fini(struct drm_device *dev, int engine, bool suspend)
 {
 	nv_mask(dev, 0x104048, 0x00000003, 0x00000000);
-
-	/* trigger fuc context unload */
-	nv_wait(dev, 0x104008, 0x0000000c, 0x00000000);
-	nv_mask(dev, 0x104054, 0x40000000, 0x00000000);
-	nv_wr32(dev, 0x104000, 0x00000008);
-	nv_wait(dev, 0x104008, 0x00000008, 0x00000000);
-
 	nv_wr32(dev, 0x104014, 0xffffffff);
 	return 0;
 }

+ 272 - 18
drivers/gpu/drm/nouveau/nva3_pm.c

@@ -98,7 +98,9 @@ read_pll(struct drm_device *dev, int clk, u32 pll)
 		sclk = read_clk(dev, 0x10 + clk, false);
 	}
 
-	return sclk * N / (M * P);
+	if (M * P)
+		return sclk * N / (M * P);
+	return 0;
 }
 
 struct creg {
@@ -182,23 +184,26 @@ prog_pll(struct drm_device *dev, int clk, u32 pll, struct creg *reg)
 	const u32 src1 = 0x004160 + (clk * 4);
 	const u32 ctrl = pll + 0;
 	const u32 coef = pll + 4;
-	u32 cntl;
 
 	if (!reg->clk && !reg->pll) {
 		NV_DEBUG(dev, "no clock for %02x\n", clk);
 		return;
 	}
 
-	cntl = nv_rd32(dev, ctrl) & 0xfffffff2;
 	if (reg->pll) {
 		nv_mask(dev, src0, 0x00000101, 0x00000101);
 		nv_wr32(dev, coef, reg->pll);
-		nv_wr32(dev, ctrl, cntl | 0x00000015);
+		nv_mask(dev, ctrl, 0x00000015, 0x00000015);
+		nv_mask(dev, ctrl, 0x00000010, 0x00000000);
+		nv_wait(dev, ctrl, 0x00020000, 0x00020000);
+		nv_mask(dev, ctrl, 0x00000010, 0x00000010);
+		nv_mask(dev, ctrl, 0x00000008, 0x00000000);
 		nv_mask(dev, src1, 0x00000100, 0x00000000);
 		nv_mask(dev, src1, 0x00000001, 0x00000000);
 	} else {
 		nv_mask(dev, src1, 0x003f3141, 0x00000101 | reg->clk);
-		nv_wr32(dev, ctrl, cntl | 0x0000001d);
+		nv_mask(dev, ctrl, 0x00000018, 0x00000018);
+		udelay(20);
 		nv_mask(dev, ctrl, 0x00000001, 0x00000000);
 		nv_mask(dev, src0, 0x00000100, 0x00000000);
 		nv_mask(dev, src0, 0x00000001, 0x00000000);
@@ -230,17 +235,28 @@ nva3_pm_clocks_get(struct drm_device *dev, struct nouveau_pm_level *perflvl)
 }
 
 struct nva3_pm_state {
+	struct nouveau_pm_level *perflvl;
+
 	struct creg nclk;
 	struct creg sclk;
-	struct creg mclk;
 	struct creg vdec;
 	struct creg unka0;
+
+	struct creg mclk;
+	u8 *rammap;
+	u8  rammap_ver;
+	u8  rammap_len;
+	u8 *ramcfg;
+	u8  ramcfg_len;
+	u32 r004018;
+	u32 r100760;
 };
 
 void *
 nva3_pm_clocks_pre(struct drm_device *dev, struct nouveau_pm_level *perflvl)
 {
 	struct nva3_pm_state *info;
+	u8 ramcfg_cnt;
 	int ret;
 
 	info = kzalloc(sizeof(*info), GFP_KERNEL);
@@ -267,6 +283,20 @@ nva3_pm_clocks_pre(struct drm_device *dev, struct nouveau_pm_level *perflvl)
 	if (ret < 0)
 		goto out;
 
+	info->rammap = nouveau_perf_rammap(dev, perflvl->memory,
+					   &info->rammap_ver,
+					   &info->rammap_len,
+					   &ramcfg_cnt, &info->ramcfg_len);
+	if (info->rammap_ver != 0x10 || info->rammap_len < 5)
+		info->rammap = NULL;
+
+	info->ramcfg = nouveau_perf_ramcfg(dev, perflvl->memory,
+					   &info->rammap_ver,
+					   &info->ramcfg_len);
+	if (info->rammap_ver != 0x10)
+		info->ramcfg = NULL;
+
+	info->perflvl = perflvl;
 out:
 	if (ret < 0) {
 		kfree(info);
@@ -287,6 +317,240 @@ nva3_pm_grcp_idle(void *data)
 	return false;
 }
 
+static void
+mclk_precharge(struct nouveau_mem_exec_func *exec)
+{
+	nv_wr32(exec->dev, 0x1002d4, 0x00000001);
+}
+
+static void
+mclk_refresh(struct nouveau_mem_exec_func *exec)
+{
+	nv_wr32(exec->dev, 0x1002d0, 0x00000001);
+}
+
+static void
+mclk_refresh_auto(struct nouveau_mem_exec_func *exec, bool enable)
+{
+	nv_wr32(exec->dev, 0x100210, enable ? 0x80000000 : 0x00000000);
+}
+
+static void
+mclk_refresh_self(struct nouveau_mem_exec_func *exec, bool enable)
+{
+	nv_wr32(exec->dev, 0x1002dc, enable ? 0x00000001 : 0x00000000);
+}
+
+static void
+mclk_wait(struct nouveau_mem_exec_func *exec, u32 nsec)
+{
+	volatile u32 post = nv_rd32(exec->dev, 0); (void)post;
+	udelay((nsec + 500) / 1000);
+}
+
+static u32
+mclk_mrg(struct nouveau_mem_exec_func *exec, int mr)
+{
+	if (mr <= 1)
+		return nv_rd32(exec->dev, 0x1002c0 + ((mr - 0) * 4));
+	if (mr <= 3)
+		return nv_rd32(exec->dev, 0x1002e0 + ((mr - 2) * 4));
+	return 0;
+}
+
+static void
+mclk_mrs(struct nouveau_mem_exec_func *exec, int mr, u32 data)
+{
+	struct drm_nouveau_private *dev_priv = exec->dev->dev_private;
+
+	if (mr <= 1) {
+		if (dev_priv->vram_rank_B)
+			nv_wr32(exec->dev, 0x1002c8 + ((mr - 0) * 4), data);
+		nv_wr32(exec->dev, 0x1002c0 + ((mr - 0) * 4), data);
+	} else
+	if (mr <= 3) {
+		if (dev_priv->vram_rank_B)
+			nv_wr32(exec->dev, 0x1002e8 + ((mr - 2) * 4), data);
+		nv_wr32(exec->dev, 0x1002e0 + ((mr - 2) * 4), data);
+	}
+}
+
+static void
+mclk_clock_set(struct nouveau_mem_exec_func *exec)
+{
+	struct drm_device *dev = exec->dev;
+	struct nva3_pm_state *info = exec->priv;
+	u32 ctrl;
+
+	ctrl = nv_rd32(dev, 0x004000);
+	if (!(ctrl & 0x00000008) && info->mclk.pll) {
+		nv_wr32(dev, 0x004000, (ctrl |=  0x00000008));
+		nv_mask(dev, 0x1110e0, 0x00088000, 0x00088000);
+		nv_wr32(dev, 0x004018, 0x00001000);
+		nv_wr32(dev, 0x004000, (ctrl &= ~0x00000001));
+		nv_wr32(dev, 0x004004, info->mclk.pll);
+		nv_wr32(dev, 0x004000, (ctrl |=  0x00000001));
+		udelay(64);
+		nv_wr32(dev, 0x004018, 0x00005000 | info->r004018);
+		udelay(20);
+	} else
+	if (!info->mclk.pll) {
+		nv_mask(dev, 0x004168, 0x003f3040, info->mclk.clk);
+		nv_wr32(dev, 0x004000, (ctrl |= 0x00000008));
+		nv_mask(dev, 0x1110e0, 0x00088000, 0x00088000);
+		nv_wr32(dev, 0x004018, 0x0000d000 | info->r004018);
+	}
+
+	if (info->rammap) {
+		if (info->ramcfg && (info->rammap[4] & 0x08)) {
+			u32 unk5a0 = (ROM16(info->ramcfg[5]) << 8) |
+				      info->ramcfg[5];
+			u32 unk5a4 = ROM16(info->ramcfg[7]);
+			u32 unk804 = (info->ramcfg[9] & 0xf0) << 16 |
+				     (info->ramcfg[3] & 0x0f) << 16 |
+				     (info->ramcfg[9] & 0x0f) |
+				     0x80000000;
+			nv_wr32(dev, 0x1005a0, unk5a0);
+			nv_wr32(dev, 0x1005a4, unk5a4);
+			nv_wr32(dev, 0x10f804, unk804);
+			nv_mask(dev, 0x10053c, 0x00001000, 0x00000000);
+		} else {
+			nv_mask(dev, 0x10053c, 0x00001000, 0x00001000);
+			nv_mask(dev, 0x10f804, 0x80000000, 0x00000000);
+			nv_mask(dev, 0x100760, 0x22222222, info->r100760);
+			nv_mask(dev, 0x1007a0, 0x22222222, info->r100760);
+			nv_mask(dev, 0x1007e0, 0x22222222, info->r100760);
+		}
+	}
+
+	if (info->mclk.pll) {
+		nv_mask(dev, 0x1110e0, 0x00088000, 0x00011000);
+		nv_wr32(dev, 0x004000, (ctrl &= ~0x00000008));
+	}
+}
+
+static void
+mclk_timing_set(struct nouveau_mem_exec_func *exec)
+{
+	struct drm_device *dev = exec->dev;
+	struct nva3_pm_state *info = exec->priv;
+	struct nouveau_pm_level *perflvl = info->perflvl;
+	int i;
+
+	for (i = 0; i < 9; i++)
+		nv_wr32(dev, 0x100220 + (i * 4), perflvl->timing.reg[i]);
+
+	if (info->ramcfg) {
+		u32 data = (info->ramcfg[2] & 0x08) ? 0x00000000 : 0x00001000;
+		nv_mask(dev, 0x100200, 0x00001000, data);
+	}
+
+	if (info->ramcfg) {
+		u32 unk714 = nv_rd32(dev, 0x100714) & ~0xf0000010;
+		u32 unk718 = nv_rd32(dev, 0x100718) & ~0x00000100;
+		u32 unk71c = nv_rd32(dev, 0x10071c) & ~0x00000100;
+		if ( (info->ramcfg[2] & 0x20))
+			unk714 |= 0xf0000000;
+		if (!(info->ramcfg[2] & 0x04))
+			unk714 |= 0x00000010;
+		nv_wr32(dev, 0x100714, unk714);
+
+		if (info->ramcfg[2] & 0x01)
+			unk71c |= 0x00000100;
+		nv_wr32(dev, 0x10071c, unk71c);
+
+		if (info->ramcfg[2] & 0x02)
+			unk718 |= 0x00000100;
+		nv_wr32(dev, 0x100718, unk718);
+
+		if (info->ramcfg[2] & 0x10)
+			nv_wr32(dev, 0x111100, 0x48000000); /*XXX*/
+	}
+}
+
+static void
+prog_mem(struct drm_device *dev, struct nva3_pm_state *info)
+{
+	struct nouveau_mem_exec_func exec = {
+		.dev = dev,
+		.precharge = mclk_precharge,
+		.refresh = mclk_refresh,
+		.refresh_auto = mclk_refresh_auto,
+		.refresh_self = mclk_refresh_self,
+		.wait = mclk_wait,
+		.mrg = mclk_mrg,
+		.mrs = mclk_mrs,
+		.clock_set = mclk_clock_set,
+		.timing_set = mclk_timing_set,
+		.priv = info
+	};
+	u32 ctrl;
+
+	/* XXX: where the fuck does 750MHz come from? */
+	if (info->perflvl->memory <= 750000) {
+		info->r004018 = 0x10000000;
+		info->r100760 = 0x22222222;
+	}
+
+	ctrl = nv_rd32(dev, 0x004000);
+	if (ctrl & 0x00000008) {
+		if (info->mclk.pll) {
+			nv_mask(dev, 0x004128, 0x00000101, 0x00000101);
+			nv_wr32(dev, 0x004004, info->mclk.pll);
+			nv_wr32(dev, 0x004000, (ctrl |= 0x00000001));
+			nv_wr32(dev, 0x004000, (ctrl &= 0xffffffef));
+			nv_wait(dev, 0x004000, 0x00020000, 0x00020000);
+			nv_wr32(dev, 0x004000, (ctrl |= 0x00000010));
+			nv_wr32(dev, 0x004018, 0x00005000 | info->r004018);
+			nv_wr32(dev, 0x004000, (ctrl |= 0x00000004));
+		}
+	} else {
+		u32 ssel = 0x00000101;
+		if (info->mclk.clk)
+			ssel |= info->mclk.clk;
+		else
+			ssel |= 0x00080000; /* 324MHz, shouldn't matter... */
+		nv_mask(dev, 0x004168, 0x003f3141, ctrl);
+	}
+
+	if (info->ramcfg) {
+		if (info->ramcfg[2] & 0x10) {
+			nv_mask(dev, 0x111104, 0x00000600, 0x00000000);
+		} else {
+			nv_mask(dev, 0x111100, 0x40000000, 0x40000000);
+			nv_mask(dev, 0x111104, 0x00000180, 0x00000000);
+		}
+	}
+	if (info->rammap && !(info->rammap[4] & 0x02))
+		nv_mask(dev, 0x100200, 0x00000800, 0x00000000);
+	nv_wr32(dev, 0x611200, 0x00003300);
+	if (!(info->ramcfg[2] & 0x10))
+		nv_wr32(dev, 0x111100, 0x4c020000); /*XXX*/
+
+	nouveau_mem_exec(&exec, info->perflvl);
+
+	nv_wr32(dev, 0x611200, 0x00003330);
+	if (info->rammap && (info->rammap[4] & 0x02))
+		nv_mask(dev, 0x100200, 0x00000800, 0x00000800);
+	if (info->ramcfg) {
+		if (info->ramcfg[2] & 0x10) {
+			nv_mask(dev, 0x111104, 0x00000180, 0x00000180);
+			nv_mask(dev, 0x111100, 0x40000000, 0x00000000);
+		} else {
+			nv_mask(dev, 0x111104, 0x00000600, 0x00000600);
+		}
+	}
+
+	if (info->mclk.pll) {
+		nv_mask(dev, 0x004168, 0x00000001, 0x00000000);
+		nv_mask(dev, 0x004168, 0x00000100, 0x00000000);
+	} else {
+		nv_mask(dev, 0x004000, 0x00000001, 0x00000000);
+		nv_mask(dev, 0x004128, 0x00000001, 0x00000000);
+		nv_mask(dev, 0x004128, 0x00000100, 0x00000000);
+	}
+}
+
 int
 nva3_pm_clocks_set(struct drm_device *dev, void *pre_state)
 {
@@ -316,18 +580,8 @@ nva3_pm_clocks_set(struct drm_device *dev, void *pre_state)
 	prog_clk(dev, 0x20, &info->unka0);
 	prog_clk(dev, 0x21, &info->vdec);
 
-	if (info->mclk.clk || info->mclk.pll) {
-		nv_wr32(dev, 0x100210, 0);
-		nv_wr32(dev, 0x1002dc, 1);
-		nv_wr32(dev, 0x004018, 0x00001000);
-		prog_pll(dev, 0x02, 0x004000, &info->mclk);
-		if (nv_rd32(dev, 0x4000) & 0x00000008)
-			nv_wr32(dev, 0x004018, 0x1000d000);
-		else
-			nv_wr32(dev, 0x004018, 0x10005000);
-		nv_wr32(dev, 0x1002dc, 0);
-		nv_wr32(dev, 0x100210, 0x80000000);
-	}
+	if (info->mclk.clk || info->mclk.pll)
+		prog_mem(dev, info);
 
 	ret = 0;
 

+ 27 - 27
drivers/gpu/drm/nouveau/nvc0_fbcon.c

@@ -43,22 +43,22 @@ nvc0_fbcon_fillrect(struct fb_info *info, const struct fb_fillrect *rect)
 		return ret;
 
 	if (rect->rop != ROP_COPY) {
-		BEGIN_NVC0(chan, 2, NvSub2D, 0x02ac, 1);
+		BEGIN_NVC0(chan, NvSub2D, 0x02ac, 1);
 		OUT_RING  (chan, 1);
 	}
-	BEGIN_NVC0(chan, 2, NvSub2D, 0x0588, 1);
+	BEGIN_NVC0(chan, NvSub2D, 0x0588, 1);
 	if (info->fix.visual == FB_VISUAL_TRUECOLOR ||
 	    info->fix.visual == FB_VISUAL_DIRECTCOLOR)
 		OUT_RING  (chan, ((uint32_t *)info->pseudo_palette)[rect->color]);
 	else
 		OUT_RING  (chan, rect->color);
-	BEGIN_NVC0(chan, 2, NvSub2D, 0x0600, 4);
+	BEGIN_NVC0(chan, NvSub2D, 0x0600, 4);
 	OUT_RING  (chan, rect->dx);
 	OUT_RING  (chan, rect->dy);
 	OUT_RING  (chan, rect->dx + rect->width);
 	OUT_RING  (chan, rect->dy + rect->height);
 	if (rect->rop != ROP_COPY) {
-		BEGIN_NVC0(chan, 2, NvSub2D, 0x02ac, 1);
+		BEGIN_NVC0(chan, NvSub2D, 0x02ac, 1);
 		OUT_RING  (chan, 3);
 	}
 	FIRE_RING(chan);
@@ -78,14 +78,14 @@ nvc0_fbcon_copyarea(struct fb_info *info, const struct fb_copyarea *region)
 	if (ret)
 		return ret;
 
-	BEGIN_NVC0(chan, 2, NvSub2D, 0x0110, 1);
+	BEGIN_NVC0(chan, NvSub2D, 0x0110, 1);
 	OUT_RING  (chan, 0);
-	BEGIN_NVC0(chan, 2, NvSub2D, 0x08b0, 4);
+	BEGIN_NVC0(chan, NvSub2D, 0x08b0, 4);
 	OUT_RING  (chan, region->dx);
 	OUT_RING  (chan, region->dy);
 	OUT_RING  (chan, region->width);
 	OUT_RING  (chan, region->height);
-	BEGIN_NVC0(chan, 2, NvSub2D, 0x08d0, 4);
+	BEGIN_NVC0(chan, NvSub2D, 0x08d0, 4);
 	OUT_RING  (chan, 0);
 	OUT_RING  (chan, region->sx);
 	OUT_RING  (chan, 0);
@@ -116,7 +116,7 @@ nvc0_fbcon_imageblit(struct fb_info *info, const struct fb_image *image)
 	width = ALIGN(image->width, 32);
 	dwords = (width * image->height) >> 5;
 
-	BEGIN_NVC0(chan, 2, NvSub2D, 0x0814, 2);
+	BEGIN_NVC0(chan, NvSub2D, 0x0814, 2);
 	if (info->fix.visual == FB_VISUAL_TRUECOLOR ||
 	    info->fix.visual == FB_VISUAL_DIRECTCOLOR) {
 		OUT_RING  (chan, palette[image->bg_color] | mask);
@@ -125,10 +125,10 @@ nvc0_fbcon_imageblit(struct fb_info *info, const struct fb_image *image)
 		OUT_RING  (chan, image->bg_color);
 		OUT_RING  (chan, image->fg_color);
 	}
-	BEGIN_NVC0(chan, 2, NvSub2D, 0x0838, 2);
+	BEGIN_NVC0(chan, NvSub2D, 0x0838, 2);
 	OUT_RING  (chan, image->width);
 	OUT_RING  (chan, image->height);
-	BEGIN_NVC0(chan, 2, NvSub2D, 0x0850, 4);
+	BEGIN_NVC0(chan, NvSub2D, 0x0850, 4);
 	OUT_RING  (chan, 0);
 	OUT_RING  (chan, image->dx);
 	OUT_RING  (chan, 0);
@@ -143,7 +143,7 @@ nvc0_fbcon_imageblit(struct fb_info *info, const struct fb_image *image)
 
 		dwords -= push;
 
-		BEGIN_NVC0(chan, 6, NvSub2D, 0x0860, push);
+		BEGIN_NIC0(chan, NvSub2D, 0x0860, push);
 		OUT_RINGp(chan, data, push);
 		data += push;
 	}
@@ -200,47 +200,47 @@ nvc0_fbcon_accel_init(struct fb_info *info)
 		return ret;
 	}
 
-	BEGIN_NVC0(chan, 2, NvSub2D, 0x0000, 1);
+	BEGIN_NVC0(chan, NvSub2D, 0x0000, 1);
 	OUT_RING  (chan, 0x0000902d);
-	BEGIN_NVC0(chan, 2, NvSub2D, 0x0104, 2);
+	BEGIN_NVC0(chan, NvSub2D, 0x0104, 2);
 	OUT_RING  (chan, upper_32_bits(chan->notifier_vma.offset));
 	OUT_RING  (chan, lower_32_bits(chan->notifier_vma.offset));
-	BEGIN_NVC0(chan, 2, NvSub2D, 0x0290, 1);
+	BEGIN_NVC0(chan, NvSub2D, 0x0290, 1);
 	OUT_RING  (chan, 0);
-	BEGIN_NVC0(chan, 2, NvSub2D, 0x0888, 1);
+	BEGIN_NVC0(chan, NvSub2D, 0x0888, 1);
 	OUT_RING  (chan, 1);
-	BEGIN_NVC0(chan, 2, NvSub2D, 0x02ac, 1);
+	BEGIN_NVC0(chan, NvSub2D, 0x02ac, 1);
 	OUT_RING  (chan, 3);
-	BEGIN_NVC0(chan, 2, NvSub2D, 0x02a0, 1);
+	BEGIN_NVC0(chan, NvSub2D, 0x02a0, 1);
 	OUT_RING  (chan, 0x55);
-	BEGIN_NVC0(chan, 2, NvSub2D, 0x08c0, 4);
+	BEGIN_NVC0(chan, NvSub2D, 0x08c0, 4);
 	OUT_RING  (chan, 0);
 	OUT_RING  (chan, 1);
 	OUT_RING  (chan, 0);
 	OUT_RING  (chan, 1);
-	BEGIN_NVC0(chan, 2, NvSub2D, 0x0580, 2);
+	BEGIN_NVC0(chan, NvSub2D, 0x0580, 2);
 	OUT_RING  (chan, 4);
 	OUT_RING  (chan, format);
-	BEGIN_NVC0(chan, 2, NvSub2D, 0x02e8, 2);
+	BEGIN_NVC0(chan, NvSub2D, 0x02e8, 2);
 	OUT_RING  (chan, 2);
 	OUT_RING  (chan, 1);
 
-	BEGIN_NVC0(chan, 2, NvSub2D, 0x0804, 1);
+	BEGIN_NVC0(chan, NvSub2D, 0x0804, 1);
 	OUT_RING  (chan, format);
-	BEGIN_NVC0(chan, 2, NvSub2D, 0x0800, 1);
+	BEGIN_NVC0(chan, NvSub2D, 0x0800, 1);
 	OUT_RING  (chan, 1);
-	BEGIN_NVC0(chan, 2, NvSub2D, 0x0808, 3);
+	BEGIN_NVC0(chan, NvSub2D, 0x0808, 3);
 	OUT_RING  (chan, 0);
 	OUT_RING  (chan, 0);
 	OUT_RING  (chan, 1);
-	BEGIN_NVC0(chan, 2, NvSub2D, 0x081c, 1);
+	BEGIN_NVC0(chan, NvSub2D, 0x081c, 1);
 	OUT_RING  (chan, 1);
-	BEGIN_NVC0(chan, 2, NvSub2D, 0x0840, 4);
+	BEGIN_NVC0(chan, NvSub2D, 0x0840, 4);
 	OUT_RING  (chan, 0);
 	OUT_RING  (chan, 1);
 	OUT_RING  (chan, 0);
 	OUT_RING  (chan, 1);
-	BEGIN_NVC0(chan, 2, NvSub2D, 0x0200, 10);
+	BEGIN_NVC0(chan, NvSub2D, 0x0200, 10);
 	OUT_RING  (chan, format);
 	OUT_RING  (chan, 1);
 	OUT_RING  (chan, 0);
@@ -251,7 +251,7 @@ nvc0_fbcon_accel_init(struct fb_info *info)
 	OUT_RING  (chan, info->var.yres_virtual);
 	OUT_RING  (chan, upper_32_bits(fb->vma.offset));
 	OUT_RING  (chan, lower_32_bits(fb->vma.offset));
-	BEGIN_NVC0(chan, 2, NvSub2D, 0x0230, 10);
+	BEGIN_NVC0(chan, NvSub2D, 0x0230, 10);
 	OUT_RING  (chan, format);
 	OUT_RING  (chan, 1);
 	OUT_RING  (chan, 0);

+ 184 - 0
drivers/gpu/drm/nouveau/nvc0_fence.c

@@ -0,0 +1,184 @@
+/*
+ * Copyright 2012 Red Hat Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: Ben Skeggs
+ */
+
+#include "drmP.h"
+#include "nouveau_drv.h"
+#include "nouveau_dma.h"
+#include "nouveau_fifo.h"
+#include "nouveau_ramht.h"
+#include "nouveau_fence.h"
+
+struct nvc0_fence_priv {
+	struct nouveau_fence_priv base;
+	struct nouveau_bo *bo;
+};
+
+struct nvc0_fence_chan {
+	struct nouveau_fence_chan base;
+	struct nouveau_vma vma;
+};
+
+static int
+nvc0_fence_emit(struct nouveau_fence *fence)
+{
+	struct nouveau_channel *chan = fence->channel;
+	struct nvc0_fence_chan *fctx = chan->engctx[NVOBJ_ENGINE_FENCE];
+	u64 addr = fctx->vma.offset + chan->id * 16;
+	int ret;
+
+	ret = RING_SPACE(chan, 5);
+	if (ret == 0) {
+		BEGIN_NVC0(chan, 0, NV84_SUBCHAN_SEMAPHORE_ADDRESS_HIGH, 4);
+		OUT_RING  (chan, upper_32_bits(addr));
+		OUT_RING  (chan, lower_32_bits(addr));
+		OUT_RING  (chan, fence->sequence);
+		OUT_RING  (chan, NV84_SUBCHAN_SEMAPHORE_TRIGGER_WRITE_LONG);
+		FIRE_RING (chan);
+	}
+
+	return ret;
+}
+
+static int
+nvc0_fence_sync(struct nouveau_fence *fence,
+		struct nouveau_channel *prev, struct nouveau_channel *chan)
+{
+	struct nvc0_fence_chan *fctx = chan->engctx[NVOBJ_ENGINE_FENCE];
+	u64 addr = fctx->vma.offset + prev->id * 16;
+	int ret;
+
+	ret = RING_SPACE(chan, 5);
+	if (ret == 0) {
+		BEGIN_NVC0(chan, 0, NV84_SUBCHAN_SEMAPHORE_ADDRESS_HIGH, 4);
+		OUT_RING  (chan, upper_32_bits(addr));
+		OUT_RING  (chan, lower_32_bits(addr));
+		OUT_RING  (chan, fence->sequence);
+		OUT_RING  (chan, NV84_SUBCHAN_SEMAPHORE_TRIGGER_ACQUIRE_GEQUAL |
+				 NVC0_SUBCHAN_SEMAPHORE_TRIGGER_YIELD);
+		FIRE_RING (chan);
+	}
+
+	return ret;
+}
+
+static u32
+nvc0_fence_read(struct nouveau_channel *chan)
+{
+	struct nvc0_fence_priv *priv = nv_engine(chan->dev, NVOBJ_ENGINE_FENCE);
+	return nouveau_bo_rd32(priv->bo, chan->id * 16/4);
+}
+
+static void
+nvc0_fence_context_del(struct nouveau_channel *chan, int engine)
+{
+	struct nvc0_fence_priv *priv = nv_engine(chan->dev, engine);
+	struct nvc0_fence_chan *fctx = chan->engctx[engine];
+
+	nouveau_bo_vma_del(priv->bo, &fctx->vma);
+	nouveau_fence_context_del(&fctx->base);
+	chan->engctx[engine] = NULL;
+	kfree(fctx);
+}
+
+static int
+nvc0_fence_context_new(struct nouveau_channel *chan, int engine)
+{
+	struct nvc0_fence_priv *priv = nv_engine(chan->dev, engine);
+	struct nvc0_fence_chan *fctx;
+	int ret;
+
+	fctx = chan->engctx[engine] = kzalloc(sizeof(*fctx), GFP_KERNEL);
+	if (!fctx)
+		return -ENOMEM;
+
+	nouveau_fence_context_new(&fctx->base);
+
+	ret = nouveau_bo_vma_add(priv->bo, chan->vm, &fctx->vma);
+	if (ret)
+		nvc0_fence_context_del(chan, engine);
+
+	nouveau_bo_wr32(priv->bo, chan->id * 16/4, 0x00000000);
+	return ret;
+}
+
+static int
+nvc0_fence_fini(struct drm_device *dev, int engine, bool suspend)
+{
+	return 0;
+}
+
+static int
+nvc0_fence_init(struct drm_device *dev, int engine)
+{
+	return 0;
+}
+
+static void
+nvc0_fence_destroy(struct drm_device *dev, int engine)
+{
+	struct drm_nouveau_private *dev_priv = dev->dev_private;
+	struct nvc0_fence_priv *priv = nv_engine(dev, engine);
+
+	nouveau_bo_unmap(priv->bo);
+	nouveau_bo_ref(NULL, &priv->bo);
+	dev_priv->eng[engine] = NULL;
+	kfree(priv);
+}
+
+int
+nvc0_fence_create(struct drm_device *dev)
+{
+	struct nouveau_fifo_priv *pfifo = nv_engine(dev, NVOBJ_ENGINE_FIFO);
+	struct drm_nouveau_private *dev_priv = dev->dev_private;
+	struct nvc0_fence_priv *priv;
+	int ret;
+
+	priv = kzalloc(sizeof(*priv), GFP_KERNEL);
+	if (!priv)
+		return -ENOMEM;
+
+	priv->base.engine.destroy = nvc0_fence_destroy;
+	priv->base.engine.init = nvc0_fence_init;
+	priv->base.engine.fini = nvc0_fence_fini;
+	priv->base.engine.context_new = nvc0_fence_context_new;
+	priv->base.engine.context_del = nvc0_fence_context_del;
+	priv->base.emit = nvc0_fence_emit;
+	priv->base.sync = nvc0_fence_sync;
+	priv->base.read = nvc0_fence_read;
+	dev_priv->eng[NVOBJ_ENGINE_FENCE] = &priv->base.engine;
+
+	ret = nouveau_bo_new(dev, 16 * pfifo->channels, 0, TTM_PL_FLAG_VRAM,
+			     0, 0, NULL, &priv->bo);
+	if (ret == 0) {
+		ret = nouveau_bo_pin(priv->bo, TTM_PL_FLAG_VRAM);
+		if (ret == 0)
+			ret = nouveau_bo_map(priv->bo);
+		if (ret)
+			nouveau_bo_ref(NULL, &priv->bo);
+	}
+
+	if (ret)
+		nvc0_fence_destroy(dev, NVOBJ_ENGINE_FENCE);
+	return ret;
+}

+ 125 - 185
drivers/gpu/drm/nouveau/nvc0_fifo.c

@@ -26,10 +26,12 @@
 
 #include "nouveau_drv.h"
 #include "nouveau_mm.h"
+#include "nouveau_fifo.h"
 
 static void nvc0_fifo_isr(struct drm_device *);
 
 struct nvc0_fifo_priv {
+	struct nouveau_fifo_priv base;
 	struct nouveau_gpuobj *playlist[2];
 	int cur_playlist;
 	struct nouveau_vma user_vma;
@@ -37,8 +39,8 @@ struct nvc0_fifo_priv {
 };
 
 struct nvc0_fifo_chan {
+	struct nouveau_fifo_chan base;
 	struct nouveau_gpuobj *user;
-	struct nouveau_gpuobj *ramfc;
 };
 
 static void
@@ -46,8 +48,7 @@ nvc0_fifo_playlist_update(struct drm_device *dev)
 {
 	struct drm_nouveau_private *dev_priv = dev->dev_private;
 	struct nouveau_instmem_engine *pinstmem = &dev_priv->engine.instmem;
-	struct nouveau_fifo_engine *pfifo = &dev_priv->engine.fifo;
-	struct nvc0_fifo_priv *priv = pfifo->priv;
+	struct nvc0_fifo_priv *priv = nv_engine(dev, NVOBJ_ENGINE_FIFO);
 	struct nouveau_gpuobj *cur;
 	int i, p;
 
@@ -69,59 +70,20 @@ nvc0_fifo_playlist_update(struct drm_device *dev)
 		NV_ERROR(dev, "PFIFO - playlist update failed\n");
 }
 
-void
-nvc0_fifo_disable(struct drm_device *dev)
-{
-}
-
-void
-nvc0_fifo_enable(struct drm_device *dev)
-{
-}
-
-bool
-nvc0_fifo_reassign(struct drm_device *dev, bool enable)
-{
-	return false;
-}
-
-bool
-nvc0_fifo_cache_pull(struct drm_device *dev, bool enable)
-{
-	return false;
-}
-
-int
-nvc0_fifo_channel_id(struct drm_device *dev)
-{
-	return 127;
-}
-
-int
-nvc0_fifo_create_context(struct nouveau_channel *chan)
+static int
+nvc0_fifo_context_new(struct nouveau_channel *chan, int engine)
 {
 	struct drm_device *dev = chan->dev;
 	struct drm_nouveau_private *dev_priv = dev->dev_private;
 	struct nouveau_instmem_engine *pinstmem = &dev_priv->engine.instmem;
-	struct nouveau_fifo_engine *pfifo = &dev_priv->engine.fifo;
-	struct nvc0_fifo_priv *priv = pfifo->priv;
-	struct nvc0_fifo_chan *fifoch;
+	struct nvc0_fifo_priv *priv = nv_engine(dev, engine);
+	struct nvc0_fifo_chan *fctx;
 	u64 ib_virt = chan->pushbuf_base + chan->dma.ib_base * 4;
-	int ret;
+	int ret, i;
 
-	chan->fifo_priv = kzalloc(sizeof(*fifoch), GFP_KERNEL);
-	if (!chan->fifo_priv)
+	fctx = chan->engctx[engine] = kzalloc(sizeof(*fctx), GFP_KERNEL);
+	if (!fctx)
 		return -ENOMEM;
-	fifoch = chan->fifo_priv;
-
-	/* allocate vram for control regs, map into polling area */
-	ret = nouveau_gpuobj_new(dev, NULL, 0x1000, 0x1000,
-				 NVOBJ_FLAG_ZERO_ALLOC, &fifoch->user);
-	if (ret)
-		goto error;
-
-	nouveau_vm_map_at(&priv->user_vma, chan->id * 0x1000,
-			  *(struct nouveau_mem **)fifoch->user->node);
 
 	chan->user = ioremap_wc(pci_resource_start(dev->pdev, 1) +
 				priv->user_vma.offset + (chan->id * 0x1000),
@@ -131,176 +93,77 @@ nvc0_fifo_create_context(struct nouveau_channel *chan)
 		goto error;
 	}
 
-	/* ramfc */
-	ret = nouveau_gpuobj_new_fake(dev, chan->ramin->pinst,
-				      chan->ramin->vinst, 0x100,
-				      NVOBJ_FLAG_ZERO_ALLOC, &fifoch->ramfc);
+	/* allocate vram for control regs, map into polling area */
+	ret = nouveau_gpuobj_new(dev, NULL, 0x1000, 0x1000,
+				 NVOBJ_FLAG_ZERO_ALLOC, &fctx->user);
 	if (ret)
 		goto error;
 
-	nv_wo32(fifoch->ramfc, 0x08, lower_32_bits(fifoch->user->vinst));
-	nv_wo32(fifoch->ramfc, 0x0c, upper_32_bits(fifoch->user->vinst));
-	nv_wo32(fifoch->ramfc, 0x10, 0x0000face);
-	nv_wo32(fifoch->ramfc, 0x30, 0xfffff902);
-	nv_wo32(fifoch->ramfc, 0x48, lower_32_bits(ib_virt));
-	nv_wo32(fifoch->ramfc, 0x4c, drm_order(chan->dma.ib_max + 1) << 16 |
+	nouveau_vm_map_at(&priv->user_vma, chan->id * 0x1000,
+			  *(struct nouveau_mem **)fctx->user->node);
+
+	for (i = 0; i < 0x100; i += 4)
+		nv_wo32(chan->ramin, i, 0x00000000);
+	nv_wo32(chan->ramin, 0x08, lower_32_bits(fctx->user->vinst));
+	nv_wo32(chan->ramin, 0x0c, upper_32_bits(fctx->user->vinst));
+	nv_wo32(chan->ramin, 0x10, 0x0000face);
+	nv_wo32(chan->ramin, 0x30, 0xfffff902);
+	nv_wo32(chan->ramin, 0x48, lower_32_bits(ib_virt));
+	nv_wo32(chan->ramin, 0x4c, drm_order(chan->dma.ib_max + 1) << 16 |
 				   upper_32_bits(ib_virt));
-	nv_wo32(fifoch->ramfc, 0x54, 0x00000002);
-	nv_wo32(fifoch->ramfc, 0x84, 0x20400000);
-	nv_wo32(fifoch->ramfc, 0x94, 0x30000001);
-	nv_wo32(fifoch->ramfc, 0x9c, 0x00000100);
-	nv_wo32(fifoch->ramfc, 0xa4, 0x1f1f1f1f);
-	nv_wo32(fifoch->ramfc, 0xa8, 0x1f1f1f1f);
-	nv_wo32(fifoch->ramfc, 0xac, 0x0000001f);
-	nv_wo32(fifoch->ramfc, 0xb8, 0xf8000000);
-	nv_wo32(fifoch->ramfc, 0xf8, 0x10003080); /* 0x002310 */
-	nv_wo32(fifoch->ramfc, 0xfc, 0x10000010); /* 0x002350 */
+	nv_wo32(chan->ramin, 0x54, 0x00000002);
+	nv_wo32(chan->ramin, 0x84, 0x20400000);
+	nv_wo32(chan->ramin, 0x94, 0x30000001);
+	nv_wo32(chan->ramin, 0x9c, 0x00000100);
+	nv_wo32(chan->ramin, 0xa4, 0x1f1f1f1f);
+	nv_wo32(chan->ramin, 0xa8, 0x1f1f1f1f);
+	nv_wo32(chan->ramin, 0xac, 0x0000001f);
+	nv_wo32(chan->ramin, 0xb8, 0xf8000000);
+	nv_wo32(chan->ramin, 0xf8, 0x10003080); /* 0x002310 */
+	nv_wo32(chan->ramin, 0xfc, 0x10000010); /* 0x002350 */
 	pinstmem->flush(dev);
 
 	nv_wr32(dev, 0x003000 + (chan->id * 8), 0xc0000000 |
 						(chan->ramin->vinst >> 12));
 	nv_wr32(dev, 0x003004 + (chan->id * 8), 0x001f0001);
 	nvc0_fifo_playlist_update(dev);
-	return 0;
 
 error:
-	pfifo->destroy_context(chan);
+	if (ret)
+		priv->base.base.context_del(chan, engine);
 	return ret;
 }
 
-void
-nvc0_fifo_destroy_context(struct nouveau_channel *chan)
+static void
+nvc0_fifo_context_del(struct nouveau_channel *chan, int engine)
 {
+	struct nvc0_fifo_chan *fctx = chan->engctx[engine];
 	struct drm_device *dev = chan->dev;
-	struct nvc0_fifo_chan *fifoch;
 
 	nv_mask(dev, 0x003004 + (chan->id * 8), 0x00000001, 0x00000000);
 	nv_wr32(dev, 0x002634, chan->id);
 	if (!nv_wait(dev, 0x0002634, 0xffffffff, chan->id))
 		NV_WARN(dev, "0x2634 != chid: 0x%08x\n", nv_rd32(dev, 0x2634));
-
 	nvc0_fifo_playlist_update(dev);
-
 	nv_wr32(dev, 0x003000 + (chan->id * 8), 0x00000000);
 
+	nouveau_gpuobj_ref(NULL, &fctx->user);
 	if (chan->user) {
 		iounmap(chan->user);
 		chan->user = NULL;
 	}
 
-	fifoch = chan->fifo_priv;
-	chan->fifo_priv = NULL;
-	if (!fifoch)
-		return;
-
-	nouveau_gpuobj_ref(NULL, &fifoch->ramfc);
-	nouveau_gpuobj_ref(NULL, &fifoch->user);
-	kfree(fifoch);
-}
-
-int
-nvc0_fifo_load_context(struct nouveau_channel *chan)
-{
-	return 0;
-}
-
-int
-nvc0_fifo_unload_context(struct drm_device *dev)
-{
-	int i;
-
-	for (i = 0; i < 128; i++) {
-		if (!(nv_rd32(dev, 0x003004 + (i * 8)) & 1))
-			continue;
-
-		nv_mask(dev, 0x003004 + (i * 8), 0x00000001, 0x00000000);
-		nv_wr32(dev, 0x002634, i);
-		if (!nv_wait(dev, 0x002634, 0xffffffff, i)) {
-			NV_INFO(dev, "PFIFO: kick ch %d failed: 0x%08x\n",
-				i, nv_rd32(dev, 0x002634));
-			return -EBUSY;
-		}
-	}
-
-	return 0;
-}
-
-static void
-nvc0_fifo_destroy(struct drm_device *dev)
-{
-	struct drm_nouveau_private *dev_priv = dev->dev_private;
-	struct nouveau_fifo_engine *pfifo = &dev_priv->engine.fifo;
-	struct nvc0_fifo_priv *priv;
-
-	priv = pfifo->priv;
-	if (!priv)
-		return;
-
-	nouveau_vm_put(&priv->user_vma);
-	nouveau_gpuobj_ref(NULL, &priv->playlist[1]);
-	nouveau_gpuobj_ref(NULL, &priv->playlist[0]);
-	kfree(priv);
-}
-
-void
-nvc0_fifo_takedown(struct drm_device *dev)
-{
-	nv_wr32(dev, 0x002140, 0x00000000);
-	nvc0_fifo_destroy(dev);
+	chan->engctx[engine] = NULL;
+	kfree(fctx);
 }
 
 static int
-nvc0_fifo_create(struct drm_device *dev)
-{
-	struct drm_nouveau_private *dev_priv = dev->dev_private;
-	struct nouveau_fifo_engine *pfifo = &dev_priv->engine.fifo;
-	struct nvc0_fifo_priv *priv;
-	int ret;
-
-	priv = kzalloc(sizeof(*priv), GFP_KERNEL);
-	if (!priv)
-		return -ENOMEM;
-	pfifo->priv = priv;
-
-	ret = nouveau_gpuobj_new(dev, NULL, 0x1000, 0x1000, 0,
-				 &priv->playlist[0]);
-	if (ret)
-		goto error;
-
-	ret = nouveau_gpuobj_new(dev, NULL, 0x1000, 0x1000, 0,
-				 &priv->playlist[1]);
-	if (ret)
-		goto error;
-
-	ret = nouveau_vm_get(dev_priv->bar1_vm, pfifo->channels * 0x1000,
-			     12, NV_MEM_ACCESS_RW, &priv->user_vma);
-	if (ret)
-		goto error;
-
-	nouveau_irq_register(dev, 8, nvc0_fifo_isr);
-	NVOBJ_CLASS(dev, 0x506e, SW); /* nvsw */
-	return 0;
-
-error:
-	nvc0_fifo_destroy(dev);
-	return ret;
-}
-
-int
-nvc0_fifo_init(struct drm_device *dev)
+nvc0_fifo_init(struct drm_device *dev, int engine)
 {
 	struct drm_nouveau_private *dev_priv = dev->dev_private;
-	struct nouveau_fifo_engine *pfifo = &dev_priv->engine.fifo;
+	struct nvc0_fifo_priv *priv = nv_engine(dev, engine);
 	struct nouveau_channel *chan;
-	struct nvc0_fifo_priv *priv;
-	int ret, i;
-
-	if (!pfifo->priv) {
-		ret = nvc0_fifo_create(dev);
-		if (ret)
-			return ret;
-	}
-	priv = pfifo->priv;
+	int i;
 
 	/* reset PFIFO, enable all available PSUBFIFO areas */
 	nv_mask(dev, 0x000200, 0x00000100, 0x00000000);
@@ -338,7 +201,7 @@ nvc0_fifo_init(struct drm_device *dev)
 	/* restore PFIFO context table */
 	for (i = 0; i < 128; i++) {
 		chan = dev_priv->channels.ptr[i];
-		if (!chan || !chan->fifo_priv)
+		if (!chan || !chan->engctx[engine])
 			continue;
 
 		nv_wr32(dev, 0x003000 + (i * 8), 0xc0000000 |
@@ -350,6 +213,29 @@ nvc0_fifo_init(struct drm_device *dev)
 	return 0;
 }
 
+static int
+nvc0_fifo_fini(struct drm_device *dev, int engine, bool suspend)
+{
+	int i;
+
+	for (i = 0; i < 128; i++) {
+		if (!(nv_rd32(dev, 0x003004 + (i * 8)) & 1))
+			continue;
+
+		nv_mask(dev, 0x003004 + (i * 8), 0x00000001, 0x00000000);
+		nv_wr32(dev, 0x002634, i);
+		if (!nv_wait(dev, 0x002634, 0xffffffff, i)) {
+			NV_INFO(dev, "PFIFO: kick ch %d failed: 0x%08x\n",
+				i, nv_rd32(dev, 0x002634));
+			return -EBUSY;
+		}
+	}
+
+	nv_wr32(dev, 0x002140, 0x00000000);
+	return 0;
+}
+
+
 struct nouveau_enum nvc0_fifo_fault_unit[] = {
 	{ 0x00, "PGRAPH" },
 	{ 0x03, "PEEPHOLE" },
@@ -439,13 +325,14 @@ nvc0_fifo_isr_vm_fault(struct drm_device *dev, int unit)
 static int
 nvc0_fifo_page_flip(struct drm_device *dev, u32 chid)
 {
+	struct nvc0_fifo_priv *priv = nv_engine(dev, NVOBJ_ENGINE_FIFO);
 	struct drm_nouveau_private *dev_priv = dev->dev_private;
 	struct nouveau_channel *chan = NULL;
 	unsigned long flags;
 	int ret = -EINVAL;
 
 	spin_lock_irqsave(&dev_priv->channels.lock, flags);
-	if (likely(chid >= 0 && chid < dev_priv->engine.fifo.channels)) {
+	if (likely(chid >= 0 && chid < priv->base.channels)) {
 		chan = dev_priv->channels.ptr[chid];
 		if (likely(chan))
 			ret = nouveau_finish_page_flip(chan, NULL);
@@ -534,3 +421,56 @@ nvc0_fifo_isr(struct drm_device *dev)
 		nv_wr32(dev, 0x002140, 0);
 	}
 }
+
+static void
+nvc0_fifo_destroy(struct drm_device *dev, int engine)
+{
+	struct nvc0_fifo_priv *priv = nv_engine(dev, NVOBJ_ENGINE_FIFO);
+	struct drm_nouveau_private *dev_priv = dev->dev_private;
+
+	nouveau_vm_put(&priv->user_vma);
+	nouveau_gpuobj_ref(NULL, &priv->playlist[1]);
+	nouveau_gpuobj_ref(NULL, &priv->playlist[0]);
+
+	dev_priv->eng[engine] = NULL;
+	kfree(priv);
+}
+
+int
+nvc0_fifo_create(struct drm_device *dev)
+{
+	struct drm_nouveau_private *dev_priv = dev->dev_private;
+	struct nvc0_fifo_priv *priv;
+	int ret;
+
+	priv = kzalloc(sizeof(*priv), GFP_KERNEL);
+	if (!priv)
+		return -ENOMEM;
+
+	priv->base.base.destroy = nvc0_fifo_destroy;
+	priv->base.base.init = nvc0_fifo_init;
+	priv->base.base.fini = nvc0_fifo_fini;
+	priv->base.base.context_new = nvc0_fifo_context_new;
+	priv->base.base.context_del = nvc0_fifo_context_del;
+	priv->base.channels = 128;
+	dev_priv->eng[NVOBJ_ENGINE_FIFO] = &priv->base.base;
+
+	ret = nouveau_gpuobj_new(dev, NULL, 4096, 4096, 0, &priv->playlist[0]);
+	if (ret)
+		goto error;
+
+	ret = nouveau_gpuobj_new(dev, NULL, 4096, 4096, 0, &priv->playlist[1]);
+	if (ret)
+		goto error;
+
+	ret = nouveau_vm_get(dev_priv->bar1_vm, priv->base.channels * 0x1000,
+			     12, NV_MEM_ACCESS_RW, &priv->user_vma);
+	if (ret)
+		goto error;
+
+	nouveau_irq_register(dev, 8, nvc0_fifo_isr);
+error:
+	if (ret)
+		priv->base.base.destroy(dev, NVOBJ_ENGINE_FIFO);
+	return ret;
+}

+ 3 - 1
drivers/gpu/drm/nouveau/nvc0_graph.c

@@ -29,6 +29,7 @@
 
 #include "nouveau_drv.h"
 #include "nouveau_mm.h"
+#include "nouveau_fifo.h"
 
 #include "nvc0_graph.h"
 #include "nvc0_grhub.fuc.h"
@@ -620,13 +621,14 @@ nvc0_graph_init(struct drm_device *dev, int engine)
 int
 nvc0_graph_isr_chid(struct drm_device *dev, u64 inst)
 {
+	struct nouveau_fifo_priv *pfifo = nv_engine(dev, NVOBJ_ENGINE_FIFO);
 	struct drm_nouveau_private *dev_priv = dev->dev_private;
 	struct nouveau_channel *chan;
 	unsigned long flags;
 	int i;
 
 	spin_lock_irqsave(&dev_priv->channels.lock, flags);
-	for (i = 0; i < dev_priv->engine.fifo.channels; i++) {
+	for (i = 0; i < pfifo->channels; i++) {
 		chan = dev_priv->channels.ptr[i];
 		if (!chan || !chan->ramin)
 			continue;

+ 189 - 0
drivers/gpu/drm/nouveau/nvc0_pm.c

@@ -164,7 +164,9 @@ struct nvc0_pm_clock {
 };
 
 struct nvc0_pm_state {
+	struct nouveau_pm_level *perflvl;
 	struct nvc0_pm_clock eng[16];
+	struct nvc0_pm_clock mem;
 };
 
 static u32
@@ -303,6 +305,48 @@ calc_clk(struct drm_device *dev, int clk, struct nvc0_pm_clock *info, u32 freq)
 	return 0;
 }
 
+static int
+calc_mem(struct drm_device *dev, struct nvc0_pm_clock *info, u32 freq)
+{
+	struct pll_lims pll;
+	int N, M, P, ret;
+	u32 ctrl;
+
+	/* mclk pll input freq comes from another pll, make sure it's on */
+	ctrl = nv_rd32(dev, 0x132020);
+	if (!(ctrl & 0x00000001)) {
+		/* if not, program it to 567MHz.  nfi where this value comes
+		 * from - it looks like it's in the pll limits table for
+		 * 132000 but the binary driver ignores all my attempts to
+		 * change this value.
+		 */
+		nv_wr32(dev, 0x137320, 0x00000103);
+		nv_wr32(dev, 0x137330, 0x81200606);
+		nv_wait(dev, 0x132020, 0x00010000, 0x00010000);
+		nv_wr32(dev, 0x132024, 0x0001150f);
+		nv_mask(dev, 0x132020, 0x00000001, 0x00000001);
+		nv_wait(dev, 0x137390, 0x00020000, 0x00020000);
+		nv_mask(dev, 0x132020, 0x00000004, 0x00000004);
+	}
+
+	/* for the moment, until the clock tree is better understood, use
+	 * pll mode for all clock frequencies
+	 */
+	ret = get_pll_limits(dev, 0x132000, &pll);
+	if (ret == 0) {
+		pll.refclk = read_pll(dev, 0x132020);
+		if (pll.refclk) {
+			ret = nva3_calc_pll(dev, &pll, freq, &N, NULL, &M, &P);
+			if (ret > 0) {
+				info->coef = (P << 16) | (N << 8) | M;
+				return 0;
+			}
+		}
+	}
+
+	return -EINVAL;
+}
+
 void *
 nvc0_pm_clocks_pre(struct drm_device *dev, struct nouveau_pm_level *perflvl)
 {
@@ -335,6 +379,15 @@ nvc0_pm_clocks_pre(struct drm_device *dev, struct nouveau_pm_level *perflvl)
 		return ERR_PTR(ret);
 	}
 
+	if (perflvl->memory) {
+		ret = calc_mem(dev, &info->mem, perflvl->memory);
+		if (ret) {
+			kfree(info);
+			return ERR_PTR(ret);
+		}
+	}
+
+	info->perflvl = perflvl;
 	return info;
 }
 
@@ -375,12 +428,148 @@ prog_clk(struct drm_device *dev, int clk, struct nvc0_pm_clock *info)
 	nv_mask(dev, 0x137250 + (clk * 0x04), 0x00003f3f, info->mdiv);
 }
 
+static void
+mclk_precharge(struct nouveau_mem_exec_func *exec)
+{
+}
+
+static void
+mclk_refresh(struct nouveau_mem_exec_func *exec)
+{
+}
+
+static void
+mclk_refresh_auto(struct nouveau_mem_exec_func *exec, bool enable)
+{
+	nv_wr32(exec->dev, 0x10f210, enable ? 0x80000000 : 0x00000000);
+}
+
+static void
+mclk_refresh_self(struct nouveau_mem_exec_func *exec, bool enable)
+{
+}
+
+static void
+mclk_wait(struct nouveau_mem_exec_func *exec, u32 nsec)
+{
+	udelay((nsec + 500) / 1000);
+}
+
+static u32
+mclk_mrg(struct nouveau_mem_exec_func *exec, int mr)
+{
+	struct drm_device *dev = exec->dev;
+	struct drm_nouveau_private *dev_priv = dev->dev_private;
+	if (dev_priv->vram_type != NV_MEM_TYPE_GDDR5) {
+		if (mr <= 1)
+			return nv_rd32(dev, 0x10f300 + ((mr - 0) * 4));
+		return nv_rd32(dev, 0x10f320 + ((mr - 2) * 4));
+	} else {
+		if (mr == 0)
+			return nv_rd32(dev, 0x10f300 + (mr * 4));
+		else
+		if (mr <= 7)
+			return nv_rd32(dev, 0x10f32c + (mr * 4));
+		return nv_rd32(dev, 0x10f34c);
+	}
+}
+
+static void
+mclk_mrs(struct nouveau_mem_exec_func *exec, int mr, u32 data)
+{
+	struct drm_device *dev = exec->dev;
+	struct drm_nouveau_private *dev_priv = dev->dev_private;
+	if (dev_priv->vram_type != NV_MEM_TYPE_GDDR5) {
+		if (mr <= 1) {
+			nv_wr32(dev, 0x10f300 + ((mr - 0) * 4), data);
+			if (dev_priv->vram_rank_B)
+				nv_wr32(dev, 0x10f308 + ((mr - 0) * 4), data);
+		} else
+		if (mr <= 3) {
+			nv_wr32(dev, 0x10f320 + ((mr - 2) * 4), data);
+			if (dev_priv->vram_rank_B)
+				nv_wr32(dev, 0x10f328 + ((mr - 2) * 4), data);
+		}
+	} else {
+		if      (mr ==  0) nv_wr32(dev, 0x10f300 + (mr * 4), data);
+		else if (mr <=  7) nv_wr32(dev, 0x10f32c + (mr * 4), data);
+		else if (mr == 15) nv_wr32(dev, 0x10f34c, data);
+	}
+}
+
+static void
+mclk_clock_set(struct nouveau_mem_exec_func *exec)
+{
+	struct nvc0_pm_state *info = exec->priv;
+	struct drm_device *dev = exec->dev;
+	u32 ctrl = nv_rd32(dev, 0x132000);
+
+	nv_wr32(dev, 0x137360, 0x00000001);
+	nv_wr32(dev, 0x137370, 0x00000000);
+	nv_wr32(dev, 0x137380, 0x00000000);
+	if (ctrl & 0x00000001)
+		nv_wr32(dev, 0x132000, (ctrl &= ~0x00000001));
+
+	nv_wr32(dev, 0x132004, info->mem.coef);
+	nv_wr32(dev, 0x132000, (ctrl |= 0x00000001));
+	nv_wait(dev, 0x137390, 0x00000002, 0x00000002);
+	nv_wr32(dev, 0x132018, 0x00005000);
+
+	nv_wr32(dev, 0x137370, 0x00000001);
+	nv_wr32(dev, 0x137380, 0x00000001);
+	nv_wr32(dev, 0x137360, 0x00000000);
+}
+
+static void
+mclk_timing_set(struct nouveau_mem_exec_func *exec)
+{
+	struct nvc0_pm_state *info = exec->priv;
+	struct nouveau_pm_level *perflvl = info->perflvl;
+	int i;
+
+	for (i = 0; i < 5; i++)
+		nv_wr32(exec->dev, 0x10f290 + (i * 4), perflvl->timing.reg[i]);
+}
+
+static void
+prog_mem(struct drm_device *dev, struct nvc0_pm_state *info)
+{
+	struct drm_nouveau_private *dev_priv = dev->dev_private;
+	struct nouveau_mem_exec_func exec = {
+		.dev = dev,
+		.precharge = mclk_precharge,
+		.refresh = mclk_refresh,
+		.refresh_auto = mclk_refresh_auto,
+		.refresh_self = mclk_refresh_self,
+		.wait = mclk_wait,
+		.mrg = mclk_mrg,
+		.mrs = mclk_mrs,
+		.clock_set = mclk_clock_set,
+		.timing_set = mclk_timing_set,
+		.priv = info
+	};
+
+	if (dev_priv->chipset < 0xd0)
+		nv_wr32(dev, 0x611200, 0x00003300);
+	else
+		nv_wr32(dev, 0x62c000, 0x03030000);
+
+	nouveau_mem_exec(&exec, info->perflvl);
+
+	if (dev_priv->chipset < 0xd0)
+		nv_wr32(dev, 0x611200, 0x00003300);
+	else
+		nv_wr32(dev, 0x62c000, 0x03030300);
+}
 int
 nvc0_pm_clocks_set(struct drm_device *dev, void *data)
 {
 	struct nvc0_pm_state *info = data;
 	int i;
 
+	if (info->mem.coef)
+		prog_mem(dev, info);
+
 	for (i = 0; i < 16; i++) {
 		if (!info->eng[i].freq)
 			continue;

+ 153 - 0
drivers/gpu/drm/nouveau/nvc0_software.c

@@ -0,0 +1,153 @@
+/*
+ * Copyright 2012 Red Hat Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: Ben Skeggs
+ */
+
+#include "drmP.h"
+
+#include "nouveau_drv.h"
+#include "nouveau_ramht.h"
+#include "nouveau_software.h"
+
+#include "nv50_display.h"
+
+struct nvc0_software_priv {
+	struct nouveau_software_priv base;
+};
+
+struct nvc0_software_chan {
+	struct nouveau_software_chan base;
+	struct nouveau_vma dispc_vma[4];
+};
+
+u64
+nvc0_software_crtc(struct nouveau_channel *chan, int crtc)
+{
+	struct nvc0_software_chan *pch = chan->engctx[NVOBJ_ENGINE_SW];
+	return pch->dispc_vma[crtc].offset;
+}
+
+static int
+nvc0_software_context_new(struct nouveau_channel *chan, int engine)
+{
+	struct drm_device *dev = chan->dev;
+	struct drm_nouveau_private *dev_priv = dev->dev_private;
+	struct nvc0_software_priv *psw = nv_engine(dev, NVOBJ_ENGINE_SW);
+	struct nvc0_software_chan *pch;
+	int ret = 0, i;
+
+	pch = kzalloc(sizeof(*pch), GFP_KERNEL);
+	if (!pch)
+		return -ENOMEM;
+
+	nouveau_software_context_new(&pch->base);
+	chan->engctx[engine] = pch;
+
+	/* map display semaphore buffers into channel's vm */
+	for (i = 0; !ret && i < dev->mode_config.num_crtc; i++) {
+		struct nouveau_bo *bo;
+		if (dev_priv->card_type >= NV_D0)
+			bo = nvd0_display_crtc_sema(dev, i);
+		else
+			bo = nv50_display(dev)->crtc[i].sem.bo;
+
+		ret = nouveau_bo_vma_add(bo, chan->vm, &pch->dispc_vma[i]);
+	}
+
+	if (ret)
+		psw->base.base.context_del(chan, engine);
+	return ret;
+}
+
+static void
+nvc0_software_context_del(struct nouveau_channel *chan, int engine)
+{
+	struct drm_device *dev = chan->dev;
+	struct drm_nouveau_private *dev_priv = dev->dev_private;
+	struct nvc0_software_chan *pch = chan->engctx[engine];
+	int i;
+
+	if (dev_priv->card_type >= NV_D0) {
+		for (i = 0; i < dev->mode_config.num_crtc; i++) {
+			struct nouveau_bo *bo = nvd0_display_crtc_sema(dev, i);
+			nouveau_bo_vma_del(bo, &pch->dispc_vma[i]);
+		}
+	} else
+	if (dev_priv->card_type >= NV_50) {
+		struct nv50_display *disp = nv50_display(dev);
+		for (i = 0; i < dev->mode_config.num_crtc; i++) {
+			struct nv50_display_crtc *dispc = &disp->crtc[i];
+			nouveau_bo_vma_del(dispc->sem.bo, &pch->dispc_vma[i]);
+		}
+	}
+
+	chan->engctx[engine] = NULL;
+	kfree(pch);
+}
+
+static int
+nvc0_software_object_new(struct nouveau_channel *chan, int engine,
+			 u32 handle, u16 class)
+{
+	return 0;
+}
+
+static int
+nvc0_software_init(struct drm_device *dev, int engine)
+{
+	return 0;
+}
+
+static int
+nvc0_software_fini(struct drm_device *dev, int engine, bool suspend)
+{
+	return 0;
+}
+
+static void
+nvc0_software_destroy(struct drm_device *dev, int engine)
+{
+	struct nvc0_software_priv *psw = nv_engine(dev, engine);
+
+	NVOBJ_ENGINE_DEL(dev, SW);
+	kfree(psw);
+}
+
+int
+nvc0_software_create(struct drm_device *dev)
+{
+	struct nvc0_software_priv *psw = kzalloc(sizeof(*psw), GFP_KERNEL);
+	if (!psw)
+		return -ENOMEM;
+
+	psw->base.base.destroy = nvc0_software_destroy;
+	psw->base.base.init = nvc0_software_init;
+	psw->base.base.fini = nvc0_software_fini;
+	psw->base.base.context_new = nvc0_software_context_new;
+	psw->base.base.context_del = nvc0_software_context_del;
+	psw->base.base.object_new = nvc0_software_object_new;
+	nouveau_software_create(&psw->base);
+
+	NVOBJ_ENGINE_ADD(dev, SW, &psw->base.base);
+	NVOBJ_CLASS(dev, 0x906e, SW);
+	return 0;
+}

+ 5 - 5
drivers/gpu/drm/nouveau/nvd0_display.c

@@ -33,6 +33,7 @@
 #include "nouveau_crtc.h"
 #include "nouveau_dma.h"
 #include "nouveau_fb.h"
+#include "nouveau_software.h"
 #include "nv50_display.h"
 
 #define EVO_DMA_NR 9
@@ -284,8 +285,6 @@ nvd0_display_flip_next(struct drm_crtc *crtc, struct drm_framebuffer *fb,
 	u32 *push;
 	int ret;
 
-	evo_sync(crtc->dev, EVO_MASTER);
-
 	swap_interval <<= 4;
 	if (swap_interval == 0)
 		swap_interval |= 0x100;
@@ -300,15 +299,16 @@ nvd0_display_flip_next(struct drm_crtc *crtc, struct drm_framebuffer *fb,
 		if (ret)
 			return ret;
 
-		offset  = chan->dispc_vma[nv_crtc->index].offset;
+
+		offset  = nvc0_software_crtc(chan, nv_crtc->index);
 		offset += evo->sem.offset;
 
-		BEGIN_NVC0(chan, 2, 0, NV84_SUBCHAN_SEMAPHORE_ADDRESS_HIGH, 4);
+		BEGIN_NVC0(chan, 0, NV84_SUBCHAN_SEMAPHORE_ADDRESS_HIGH, 4);
 		OUT_RING  (chan, upper_32_bits(offset));
 		OUT_RING  (chan, lower_32_bits(offset));
 		OUT_RING  (chan, 0xf00d0000 | evo->sem.value);
 		OUT_RING  (chan, 0x1002);
-		BEGIN_NVC0(chan, 2, 0, NV84_SUBCHAN_SEMAPHORE_ADDRESS_HIGH, 4);
+		BEGIN_NVC0(chan, 0, NV84_SUBCHAN_SEMAPHORE_ADDRESS_HIGH, 4);
 		OUT_RING  (chan, upper_32_bits(offset));
 		OUT_RING  (chan, lower_32_bits(offset ^ 0x10));
 		OUT_RING  (chan, 0x74b1e000);

+ 423 - 0
drivers/gpu/drm/nouveau/nve0_fifo.c

@@ -0,0 +1,423 @@
+/*
+ * Copyright 2010 Red Hat Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: Ben Skeggs
+ */
+
+#include "drmP.h"
+
+#include "nouveau_drv.h"
+#include "nouveau_mm.h"
+#include "nouveau_fifo.h"
+
+#define NVE0_FIFO_ENGINE_NUM 32
+
+static void nve0_fifo_isr(struct drm_device *);
+
+struct nve0_fifo_engine {
+	struct nouveau_gpuobj *playlist[2];
+	int cur_playlist;
+};
+
+struct nve0_fifo_priv {
+	struct nouveau_fifo_priv base;
+	struct nve0_fifo_engine engine[NVE0_FIFO_ENGINE_NUM];
+	struct {
+		struct nouveau_gpuobj *mem;
+		struct nouveau_vma bar;
+	} user;
+	int spoon_nr;
+};
+
+struct nve0_fifo_chan {
+	struct nouveau_fifo_chan base;
+	u32 engine;
+};
+
+static void
+nve0_fifo_playlist_update(struct drm_device *dev, u32 engine)
+{
+	struct drm_nouveau_private *dev_priv = dev->dev_private;
+	struct nouveau_instmem_engine *pinstmem = &dev_priv->engine.instmem;
+	struct nve0_fifo_priv *priv = nv_engine(dev, NVOBJ_ENGINE_FIFO);
+	struct nve0_fifo_engine *peng = &priv->engine[engine];
+	struct nouveau_gpuobj *cur;
+	u32 match = (engine << 16) | 0x00000001;
+	int ret, i, p;
+
+	cur = peng->playlist[peng->cur_playlist];
+	if (unlikely(cur == NULL)) {
+		ret = nouveau_gpuobj_new(dev, NULL, 0x8000, 0x1000, 0, &cur);
+		if (ret) {
+			NV_ERROR(dev, "PFIFO: playlist alloc failed\n");
+			return;
+		}
+
+		peng->playlist[peng->cur_playlist] = cur;
+	}
+
+	peng->cur_playlist = !peng->cur_playlist;
+
+	for (i = 0, p = 0; i < priv->base.channels; i++) {
+		u32 ctrl = nv_rd32(dev, 0x800004 + (i * 8)) & 0x001f0001;
+		if (ctrl != match)
+			continue;
+		nv_wo32(cur, p + 0, i);
+		nv_wo32(cur, p + 4, 0x00000000);
+		p += 8;
+	}
+	pinstmem->flush(dev);
+
+	nv_wr32(dev, 0x002270, cur->vinst >> 12);
+	nv_wr32(dev, 0x002274, (engine << 20) | (p >> 3));
+	if (!nv_wait(dev, 0x002284 + (engine * 4), 0x00100000, 0x00000000))
+		NV_ERROR(dev, "PFIFO: playlist %d update timeout\n", engine);
+}
+
+static int
+nve0_fifo_context_new(struct nouveau_channel *chan, int engine)
+{
+	struct drm_device *dev = chan->dev;
+	struct drm_nouveau_private *dev_priv = dev->dev_private;
+	struct nouveau_instmem_engine *pinstmem = &dev_priv->engine.instmem;
+	struct nve0_fifo_priv *priv = nv_engine(dev, engine);
+	struct nve0_fifo_chan *fctx;
+	u64 usermem = priv->user.mem->vinst + chan->id * 512;
+	u64 ib_virt = chan->pushbuf_base + chan->dma.ib_base * 4;
+	int ret = 0, i;
+
+	fctx = chan->engctx[engine] = kzalloc(sizeof(*fctx), GFP_KERNEL);
+	if (!fctx)
+		return -ENOMEM;
+
+	fctx->engine = 0; /* PGRAPH */
+
+	/* allocate vram for control regs, map into polling area */
+	chan->user = ioremap_wc(pci_resource_start(dev->pdev, 1) +
+				priv->user.bar.offset + (chan->id * 512), 512);
+	if (!chan->user) {
+		ret = -ENOMEM;
+		goto error;
+	}
+
+	for (i = 0; i < 0x100; i += 4)
+		nv_wo32(chan->ramin, i, 0x00000000);
+	nv_wo32(chan->ramin, 0x08, lower_32_bits(usermem));
+	nv_wo32(chan->ramin, 0x0c, upper_32_bits(usermem));
+	nv_wo32(chan->ramin, 0x10, 0x0000face);
+	nv_wo32(chan->ramin, 0x30, 0xfffff902);
+	nv_wo32(chan->ramin, 0x48, lower_32_bits(ib_virt));
+	nv_wo32(chan->ramin, 0x4c, drm_order(chan->dma.ib_max + 1) << 16 |
+				     upper_32_bits(ib_virt));
+	nv_wo32(chan->ramin, 0x84, 0x20400000);
+	nv_wo32(chan->ramin, 0x94, 0x30000001);
+	nv_wo32(chan->ramin, 0x9c, 0x00000100);
+	nv_wo32(chan->ramin, 0xac, 0x0000001f);
+	nv_wo32(chan->ramin, 0xe4, 0x00000000);
+	nv_wo32(chan->ramin, 0xe8, chan->id);
+	nv_wo32(chan->ramin, 0xf8, 0x10003080); /* 0x002310 */
+	nv_wo32(chan->ramin, 0xfc, 0x10000010); /* 0x002350 */
+	pinstmem->flush(dev);
+
+	nv_wr32(dev, 0x800000 + (chan->id * 8), 0x80000000 |
+						(chan->ramin->vinst >> 12));
+	nv_mask(dev, 0x800004 + (chan->id * 8), 0x00000400, 0x00000400);
+	nve0_fifo_playlist_update(dev, fctx->engine);
+	nv_mask(dev, 0x800004 + (chan->id * 8), 0x00000400, 0x00000400);
+
+error:
+	if (ret)
+		priv->base.base.context_del(chan, engine);
+	return ret;
+}
+
+static void
+nve0_fifo_context_del(struct nouveau_channel *chan, int engine)
+{
+	struct nve0_fifo_chan *fctx = chan->engctx[engine];
+	struct drm_device *dev = chan->dev;
+
+	nv_mask(dev, 0x800004 + (chan->id * 8), 0x00000800, 0x00000800);
+	nv_wr32(dev, 0x002634, chan->id);
+	if (!nv_wait(dev, 0x0002634, 0xffffffff, chan->id))
+		NV_WARN(dev, "0x2634 != chid: 0x%08x\n", nv_rd32(dev, 0x2634));
+	nve0_fifo_playlist_update(dev, fctx->engine);
+	nv_wr32(dev, 0x800000 + (chan->id * 8), 0x00000000);
+
+	if (chan->user) {
+		iounmap(chan->user);
+		chan->user = NULL;
+	}
+
+	chan->engctx[NVOBJ_ENGINE_FIFO] = NULL;
+	kfree(fctx);
+}
+
+static int
+nve0_fifo_init(struct drm_device *dev, int engine)
+{
+	struct drm_nouveau_private *dev_priv = dev->dev_private;
+	struct nve0_fifo_priv *priv = nv_engine(dev, engine);
+	struct nve0_fifo_chan *fctx;
+	int i;
+
+	/* reset PFIFO, enable all available PSUBFIFO areas */
+	nv_mask(dev, 0x000200, 0x00000100, 0x00000000);
+	nv_mask(dev, 0x000200, 0x00000100, 0x00000100);
+	nv_wr32(dev, 0x000204, 0xffffffff);
+
+	priv->spoon_nr = hweight32(nv_rd32(dev, 0x000204));
+	NV_DEBUG(dev, "PFIFO: %d subfifo(s)\n", priv->spoon_nr);
+
+	/* PSUBFIFO[n] */
+	for (i = 0; i < priv->spoon_nr; i++) {
+		nv_mask(dev, 0x04013c + (i * 0x2000), 0x10000100, 0x00000000);
+		nv_wr32(dev, 0x040108 + (i * 0x2000), 0xffffffff); /* INTR */
+		nv_wr32(dev, 0x04010c + (i * 0x2000), 0xfffffeff); /* INTR_EN */
+	}
+
+	nv_wr32(dev, 0x002254, 0x10000000 | priv->user.bar.offset >> 12);
+
+	nv_wr32(dev, 0x002a00, 0xffffffff);
+	nv_wr32(dev, 0x002100, 0xffffffff);
+	nv_wr32(dev, 0x002140, 0xbfffffff);
+
+	/* restore PFIFO context table */
+	for (i = 0; i < priv->base.channels; i++) {
+		struct nouveau_channel *chan = dev_priv->channels.ptr[i];
+		if (!chan || !(fctx = chan->engctx[engine]))
+			continue;
+
+		nv_wr32(dev, 0x800000 + (i * 8), 0x80000000 |
+						 (chan->ramin->vinst >> 12));
+		nv_mask(dev, 0x800004 + (i * 8), 0x00000400, 0x00000400);
+		nve0_fifo_playlist_update(dev, fctx->engine);
+		nv_mask(dev, 0x800004 + (i * 8), 0x00000400, 0x00000400);
+	}
+
+	return 0;
+}
+
+static int
+nve0_fifo_fini(struct drm_device *dev, int engine, bool suspend)
+{
+	struct nve0_fifo_priv *priv = nv_engine(dev, engine);
+	int i;
+
+	for (i = 0; i < priv->base.channels; i++) {
+		if (!(nv_rd32(dev, 0x800004 + (i * 8)) & 1))
+			continue;
+
+		nv_mask(dev, 0x800004 + (i * 8), 0x00000800, 0x00000800);
+		nv_wr32(dev, 0x002634, i);
+		if (!nv_wait(dev, 0x002634, 0xffffffff, i)) {
+			NV_INFO(dev, "PFIFO: kick ch %d failed: 0x%08x\n",
+				i, nv_rd32(dev, 0x002634));
+			return -EBUSY;
+		}
+	}
+
+	nv_wr32(dev, 0x002140, 0x00000000);
+	return 0;
+}
+
+struct nouveau_enum nve0_fifo_fault_unit[] = {
+	{}
+};
+
+struct nouveau_enum nve0_fifo_fault_reason[] = {
+	{ 0x00, "PT_NOT_PRESENT" },
+	{ 0x01, "PT_TOO_SHORT" },
+	{ 0x02, "PAGE_NOT_PRESENT" },
+	{ 0x03, "VM_LIMIT_EXCEEDED" },
+	{ 0x04, "NO_CHANNEL" },
+	{ 0x05, "PAGE_SYSTEM_ONLY" },
+	{ 0x06, "PAGE_READ_ONLY" },
+	{ 0x0a, "COMPRESSED_SYSRAM" },
+	{ 0x0c, "INVALID_STORAGE_TYPE" },
+	{}
+};
+
+struct nouveau_enum nve0_fifo_fault_hubclient[] = {
+	{}
+};
+
+struct nouveau_enum nve0_fifo_fault_gpcclient[] = {
+	{}
+};
+
+struct nouveau_bitfield nve0_fifo_subfifo_intr[] = {
+	{ 0x00200000, "ILLEGAL_MTHD" },
+	{ 0x00800000, "EMPTY_SUBC" },
+	{}
+};
+
+static void
+nve0_fifo_isr_vm_fault(struct drm_device *dev, int unit)
+{
+	u32 inst = nv_rd32(dev, 0x2800 + (unit * 0x10));
+	u32 valo = nv_rd32(dev, 0x2804 + (unit * 0x10));
+	u32 vahi = nv_rd32(dev, 0x2808 + (unit * 0x10));
+	u32 stat = nv_rd32(dev, 0x280c + (unit * 0x10));
+	u32 client = (stat & 0x00001f00) >> 8;
+
+	NV_INFO(dev, "PFIFO: %s fault at 0x%010llx [",
+		(stat & 0x00000080) ? "write" : "read", (u64)vahi << 32 | valo);
+	nouveau_enum_print(nve0_fifo_fault_reason, stat & 0x0000000f);
+	printk("] from ");
+	nouveau_enum_print(nve0_fifo_fault_unit, unit);
+	if (stat & 0x00000040) {
+		printk("/");
+		nouveau_enum_print(nve0_fifo_fault_hubclient, client);
+	} else {
+		printk("/GPC%d/", (stat & 0x1f000000) >> 24);
+		nouveau_enum_print(nve0_fifo_fault_gpcclient, client);
+	}
+	printk(" on channel 0x%010llx\n", (u64)inst << 12);
+}
+
+static void
+nve0_fifo_isr_subfifo_intr(struct drm_device *dev, int unit)
+{
+	u32 stat = nv_rd32(dev, 0x040108 + (unit * 0x2000));
+	u32 addr = nv_rd32(dev, 0x0400c0 + (unit * 0x2000));
+	u32 data = nv_rd32(dev, 0x0400c4 + (unit * 0x2000));
+	u32 chid = nv_rd32(dev, 0x040120 + (unit * 0x2000)) & 0x7f;
+	u32 subc = (addr & 0x00070000);
+	u32 mthd = (addr & 0x00003ffc);
+
+	NV_INFO(dev, "PSUBFIFO %d:", unit);
+	nouveau_bitfield_print(nve0_fifo_subfifo_intr, stat);
+	NV_INFO(dev, "PSUBFIFO %d: ch %d subc %d mthd 0x%04x data 0x%08x\n",
+		unit, chid, subc, mthd, data);
+
+	nv_wr32(dev, 0x0400c0 + (unit * 0x2000), 0x80600008);
+	nv_wr32(dev, 0x040108 + (unit * 0x2000), stat);
+}
+
+static void
+nve0_fifo_isr(struct drm_device *dev)
+{
+	u32 stat = nv_rd32(dev, 0x002100);
+
+	if (stat & 0x00000100) {
+		NV_INFO(dev, "PFIFO: unknown status 0x00000100\n");
+		nv_wr32(dev, 0x002100, 0x00000100);
+		stat &= ~0x00000100;
+	}
+
+	if (stat & 0x10000000) {
+		u32 units = nv_rd32(dev, 0x00259c);
+		u32 u = units;
+
+		while (u) {
+			int i = ffs(u) - 1;
+			nve0_fifo_isr_vm_fault(dev, i);
+			u &= ~(1 << i);
+		}
+
+		nv_wr32(dev, 0x00259c, units);
+		stat &= ~0x10000000;
+	}
+
+	if (stat & 0x20000000) {
+		u32 units = nv_rd32(dev, 0x0025a0);
+		u32 u = units;
+
+		while (u) {
+			int i = ffs(u) - 1;
+			nve0_fifo_isr_subfifo_intr(dev, i);
+			u &= ~(1 << i);
+		}
+
+		nv_wr32(dev, 0x0025a0, units);
+		stat &= ~0x20000000;
+	}
+
+	if (stat & 0x40000000) {
+		NV_INFO(dev, "PFIFO: unknown status 0x40000000\n");
+		nv_mask(dev, 0x002a00, 0x00000000, 0x00000000);
+		stat &= ~0x40000000;
+	}
+
+	if (stat) {
+		NV_INFO(dev, "PFIFO: unhandled status 0x%08x\n", stat);
+		nv_wr32(dev, 0x002100, stat);
+		nv_wr32(dev, 0x002140, 0);
+	}
+}
+
+static void
+nve0_fifo_destroy(struct drm_device *dev, int engine)
+{
+	struct drm_nouveau_private *dev_priv = dev->dev_private;
+	struct nve0_fifo_priv *priv = nv_engine(dev, engine);
+	int i;
+
+	nouveau_vm_put(&priv->user.bar);
+	nouveau_gpuobj_ref(NULL, &priv->user.mem);
+
+	for (i = 0; i < NVE0_FIFO_ENGINE_NUM; i++) {
+		nouveau_gpuobj_ref(NULL, &priv->engine[i].playlist[0]);
+		nouveau_gpuobj_ref(NULL, &priv->engine[i].playlist[1]);
+	}
+
+	dev_priv->eng[engine] = NULL;
+	kfree(priv);
+}
+
+int
+nve0_fifo_create(struct drm_device *dev)
+{
+	struct drm_nouveau_private *dev_priv = dev->dev_private;
+	struct nve0_fifo_priv *priv;
+	int ret;
+
+	priv = kzalloc(sizeof(*priv), GFP_KERNEL);
+	if (!priv)
+		return -ENOMEM;
+
+	priv->base.base.destroy = nve0_fifo_destroy;
+	priv->base.base.init = nve0_fifo_init;
+	priv->base.base.fini = nve0_fifo_fini;
+	priv->base.base.context_new = nve0_fifo_context_new;
+	priv->base.base.context_del = nve0_fifo_context_del;
+	priv->base.channels = 4096;
+	dev_priv->eng[NVOBJ_ENGINE_FIFO] = &priv->base.base;
+
+	ret = nouveau_gpuobj_new(dev, NULL, priv->base.channels * 512, 0x1000,
+				 NVOBJ_FLAG_ZERO_ALLOC, &priv->user.mem);
+	if (ret)
+		goto error;
+
+	ret = nouveau_vm_get(dev_priv->bar1_vm, priv->user.mem->size,
+			     12, NV_MEM_ACCESS_RW, &priv->user.bar);
+	if (ret)
+		goto error;
+
+	nouveau_vm_map(&priv->user.bar, *(struct nouveau_mem **)priv->user.mem->node);
+
+	nouveau_irq_register(dev, 8, nve0_fifo_isr);
+error:
+	if (ret)
+		priv->base.base.destroy(dev, NVOBJ_ENGINE_FIFO);
+	return ret;
+}

+ 831 - 0
drivers/gpu/drm/nouveau/nve0_graph.c

@@ -0,0 +1,831 @@
+/*
+ * Copyright 2010 Red Hat Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: Ben Skeggs
+ */
+
+#include <linux/firmware.h>
+#include <linux/module.h>
+
+#include "drmP.h"
+
+#include "nouveau_drv.h"
+#include "nouveau_mm.h"
+#include "nouveau_fifo.h"
+
+#include "nve0_graph.h"
+
+static void
+nve0_graph_ctxctl_debug_unit(struct drm_device *dev, u32 base)
+{
+	NV_INFO(dev, "PGRAPH: %06x - done 0x%08x\n", base,
+		nv_rd32(dev, base + 0x400));
+	NV_INFO(dev, "PGRAPH: %06x - stat 0x%08x 0x%08x 0x%08x 0x%08x\n", base,
+		nv_rd32(dev, base + 0x800), nv_rd32(dev, base + 0x804),
+		nv_rd32(dev, base + 0x808), nv_rd32(dev, base + 0x80c));
+	NV_INFO(dev, "PGRAPH: %06x - stat 0x%08x 0x%08x 0x%08x 0x%08x\n", base,
+		nv_rd32(dev, base + 0x810), nv_rd32(dev, base + 0x814),
+		nv_rd32(dev, base + 0x818), nv_rd32(dev, base + 0x81c));
+}
+
+static void
+nve0_graph_ctxctl_debug(struct drm_device *dev)
+{
+	u32 gpcnr = nv_rd32(dev, 0x409604) & 0xffff;
+	u32 gpc;
+
+	nve0_graph_ctxctl_debug_unit(dev, 0x409000);
+	for (gpc = 0; gpc < gpcnr; gpc++)
+		nve0_graph_ctxctl_debug_unit(dev, 0x502000 + (gpc * 0x8000));
+}
+
+static int
+nve0_graph_load_context(struct nouveau_channel *chan)
+{
+	struct drm_device *dev = chan->dev;
+
+	nv_wr32(dev, 0x409840, 0x00000030);
+	nv_wr32(dev, 0x409500, 0x80000000 | chan->ramin->vinst >> 12);
+	nv_wr32(dev, 0x409504, 0x00000003);
+	if (!nv_wait(dev, 0x409800, 0x00000010, 0x00000010))
+		NV_ERROR(dev, "PGRAPH: load_ctx timeout\n");
+
+	return 0;
+}
+
+static int
+nve0_graph_unload_context_to(struct drm_device *dev, u64 chan)
+{
+	nv_wr32(dev, 0x409840, 0x00000003);
+	nv_wr32(dev, 0x409500, 0x80000000 | chan >> 12);
+	nv_wr32(dev, 0x409504, 0x00000009);
+	if (!nv_wait(dev, 0x409800, 0x00000001, 0x00000000)) {
+		NV_ERROR(dev, "PGRAPH: unload_ctx timeout\n");
+		return -EBUSY;
+	}
+
+	return 0;
+}
+
+static int
+nve0_graph_construct_context(struct nouveau_channel *chan)
+{
+	struct drm_nouveau_private *dev_priv = chan->dev->dev_private;
+	struct nve0_graph_priv *priv = nv_engine(chan->dev, NVOBJ_ENGINE_GR);
+	struct nve0_graph_chan *grch = chan->engctx[NVOBJ_ENGINE_GR];
+	struct drm_device *dev = chan->dev;
+	int ret, i;
+	u32 *ctx;
+
+	ctx = kmalloc(priv->grctx_size, GFP_KERNEL);
+	if (!ctx)
+		return -ENOMEM;
+
+	nve0_graph_load_context(chan);
+
+	nv_wo32(grch->grctx, 0x1c, 1);
+	nv_wo32(grch->grctx, 0x20, 0);
+	nv_wo32(grch->grctx, 0x28, 0);
+	nv_wo32(grch->grctx, 0x2c, 0);
+	dev_priv->engine.instmem.flush(dev);
+
+	ret = nve0_grctx_generate(chan);
+	if (ret)
+		goto err;
+
+	ret = nve0_graph_unload_context_to(dev, chan->ramin->vinst);
+	if (ret)
+		goto err;
+
+	for (i = 0; i < priv->grctx_size; i += 4)
+		ctx[i / 4] = nv_ro32(grch->grctx, i);
+
+	priv->grctx_vals = ctx;
+	return 0;
+
+err:
+	kfree(ctx);
+	return ret;
+}
+
+static int
+nve0_graph_create_context_mmio_list(struct nouveau_channel *chan)
+{
+	struct nve0_graph_priv *priv = nv_engine(chan->dev, NVOBJ_ENGINE_GR);
+	struct nve0_graph_chan *grch = chan->engctx[NVOBJ_ENGINE_GR];
+	struct drm_device *dev = chan->dev;
+	u32 magic[GPC_MAX][2];
+	u16 offset = 0x0000;
+	int gpc;
+	int ret;
+
+	ret = nouveau_gpuobj_new(dev, chan, 0x3000, 256, NVOBJ_FLAG_VM,
+				 &grch->unk408004);
+	if (ret)
+		return ret;
+
+	ret = nouveau_gpuobj_new(dev, chan, 0x8000, 256, NVOBJ_FLAG_VM,
+				 &grch->unk40800c);
+	if (ret)
+		return ret;
+
+	ret = nouveau_gpuobj_new(dev, chan, 384 * 1024, 4096,
+				 NVOBJ_FLAG_VM | NVOBJ_FLAG_VM_USER,
+				 &grch->unk418810);
+	if (ret)
+		return ret;
+
+	ret = nouveau_gpuobj_new(dev, chan, 0x1000, 0, NVOBJ_FLAG_VM,
+				 &grch->mmio);
+	if (ret)
+		return ret;
+
+#define mmio(r,v) do {                                                         \
+	nv_wo32(grch->mmio, (grch->mmio_nr * 8) + 0, (r));                     \
+	nv_wo32(grch->mmio, (grch->mmio_nr * 8) + 4, (v));                     \
+	grch->mmio_nr++;                                                       \
+} while (0)
+	mmio(0x40800c, grch->unk40800c->linst >> 8);
+	mmio(0x408010, 0x80000000);
+	mmio(0x419004, grch->unk40800c->linst >> 8);
+	mmio(0x419008, 0x00000000);
+	mmio(0x4064cc, 0x80000000);
+	mmio(0x408004, grch->unk408004->linst >> 8);
+	mmio(0x408008, 0x80000030);
+	mmio(0x418808, grch->unk408004->linst >> 8);
+	mmio(0x41880c, 0x80000030);
+	mmio(0x4064c8, 0x01800600);
+	mmio(0x418810, 0x80000000 | grch->unk418810->linst >> 12);
+	mmio(0x419848, 0x10000000 | grch->unk418810->linst >> 12);
+	mmio(0x405830, 0x02180648);
+	mmio(0x4064c4, 0x0192ffff);
+
+	for (gpc = 0; gpc < priv->gpc_nr; gpc++) {
+		u16 magic0 = 0x0218 * priv->tpc_nr[gpc];
+		u16 magic1 = 0x0648 * priv->tpc_nr[gpc];
+		magic[gpc][0]  = 0x10000000 | (magic0 << 16) | offset;
+		magic[gpc][1]  = 0x00000000 | (magic1 << 16);
+		offset += 0x0324 * priv->tpc_nr[gpc];
+	}
+
+	for (gpc = 0; gpc < priv->gpc_nr; gpc++) {
+		mmio(GPC_UNIT(gpc, 0x30c0), magic[gpc][0]);
+		mmio(GPC_UNIT(gpc, 0x30e4), magic[gpc][1] | offset);
+		offset += 0x07ff * priv->tpc_nr[gpc];
+	}
+
+	mmio(0x17e91c, 0x06060609);
+	mmio(0x17e920, 0x00090a05);
+#undef mmio
+	return 0;
+}
+
+static int
+nve0_graph_context_new(struct nouveau_channel *chan, int engine)
+{
+	struct drm_device *dev = chan->dev;
+	struct drm_nouveau_private *dev_priv = dev->dev_private;
+	struct nouveau_instmem_engine *pinstmem = &dev_priv->engine.instmem;
+	struct nve0_graph_priv *priv = nv_engine(dev, engine);
+	struct nve0_graph_chan *grch;
+	struct nouveau_gpuobj *grctx;
+	int ret, i;
+
+	grch = kzalloc(sizeof(*grch), GFP_KERNEL);
+	if (!grch)
+		return -ENOMEM;
+	chan->engctx[NVOBJ_ENGINE_GR] = grch;
+
+	ret = nouveau_gpuobj_new(dev, chan, priv->grctx_size, 256,
+				 NVOBJ_FLAG_VM | NVOBJ_FLAG_ZERO_ALLOC,
+				 &grch->grctx);
+	if (ret)
+		goto error;
+	grctx = grch->grctx;
+
+	ret = nve0_graph_create_context_mmio_list(chan);
+	if (ret)
+		goto error;
+
+	nv_wo32(chan->ramin, 0x0210, lower_32_bits(grctx->linst) | 4);
+	nv_wo32(chan->ramin, 0x0214, upper_32_bits(grctx->linst));
+	pinstmem->flush(dev);
+
+	if (!priv->grctx_vals) {
+		ret = nve0_graph_construct_context(chan);
+		if (ret)
+			goto error;
+	}
+
+	for (i = 0; i < priv->grctx_size; i += 4)
+		nv_wo32(grctx, i, priv->grctx_vals[i / 4]);
+	nv_wo32(grctx, 0xf4, 0);
+	nv_wo32(grctx, 0xf8, 0);
+	nv_wo32(grctx, 0x10, grch->mmio_nr);
+	nv_wo32(grctx, 0x14, lower_32_bits(grch->mmio->linst));
+	nv_wo32(grctx, 0x18, upper_32_bits(grch->mmio->linst));
+	nv_wo32(grctx, 0x1c, 1);
+	nv_wo32(grctx, 0x20, 0);
+	nv_wo32(grctx, 0x28, 0);
+	nv_wo32(grctx, 0x2c, 0);
+
+	pinstmem->flush(dev);
+	return 0;
+
+error:
+	priv->base.context_del(chan, engine);
+	return ret;
+}
+
+static void
+nve0_graph_context_del(struct nouveau_channel *chan, int engine)
+{
+	struct nve0_graph_chan *grch = chan->engctx[engine];
+
+	nouveau_gpuobj_ref(NULL, &grch->mmio);
+	nouveau_gpuobj_ref(NULL, &grch->unk418810);
+	nouveau_gpuobj_ref(NULL, &grch->unk40800c);
+	nouveau_gpuobj_ref(NULL, &grch->unk408004);
+	nouveau_gpuobj_ref(NULL, &grch->grctx);
+	chan->engctx[engine] = NULL;
+}
+
+static int
+nve0_graph_object_new(struct nouveau_channel *chan, int engine,
+		      u32 handle, u16 class)
+{
+	return 0;
+}
+
+static int
+nve0_graph_fini(struct drm_device *dev, int engine, bool suspend)
+{
+	return 0;
+}
+
+static void
+nve0_graph_init_obj418880(struct drm_device *dev)
+{
+	struct nve0_graph_priv *priv = nv_engine(dev, NVOBJ_ENGINE_GR);
+	int i;
+
+	nv_wr32(dev, GPC_BCAST(0x0880), 0x00000000);
+	nv_wr32(dev, GPC_BCAST(0x08a4), 0x00000000);
+	for (i = 0; i < 4; i++)
+		nv_wr32(dev, GPC_BCAST(0x0888) + (i * 4), 0x00000000);
+	nv_wr32(dev, GPC_BCAST(0x08b4), priv->unk4188b4->vinst >> 8);
+	nv_wr32(dev, GPC_BCAST(0x08b8), priv->unk4188b8->vinst >> 8);
+}
+
+static void
+nve0_graph_init_regs(struct drm_device *dev)
+{
+	nv_wr32(dev, 0x400080, 0x003083c2);
+	nv_wr32(dev, 0x400088, 0x0001ffe7);
+	nv_wr32(dev, 0x40008c, 0x00000000);
+	nv_wr32(dev, 0x400090, 0x00000030);
+	nv_wr32(dev, 0x40013c, 0x003901f7);
+	nv_wr32(dev, 0x400140, 0x00000100);
+	nv_wr32(dev, 0x400144, 0x00000000);
+	nv_wr32(dev, 0x400148, 0x00000110);
+	nv_wr32(dev, 0x400138, 0x00000000);
+	nv_wr32(dev, 0x400130, 0x00000000);
+	nv_wr32(dev, 0x400134, 0x00000000);
+	nv_wr32(dev, 0x400124, 0x00000002);
+}
+
+static void
+nve0_graph_init_units(struct drm_device *dev)
+{
+	nv_wr32(dev, 0x409ffc, 0x00000000);
+	nv_wr32(dev, 0x409c14, 0x00003e3e);
+	nv_wr32(dev, 0x409c24, 0x000f0000);
+
+	nv_wr32(dev, 0x404000, 0xc0000000);
+	nv_wr32(dev, 0x404600, 0xc0000000);
+	nv_wr32(dev, 0x408030, 0xc0000000);
+	nv_wr32(dev, 0x404490, 0xc0000000);
+	nv_wr32(dev, 0x406018, 0xc0000000);
+	nv_wr32(dev, 0x407020, 0xc0000000);
+	nv_wr32(dev, 0x405840, 0xc0000000);
+	nv_wr32(dev, 0x405844, 0x00ffffff);
+
+	nv_mask(dev, 0x419cc0, 0x00000008, 0x00000008);
+	nv_mask(dev, 0x419eb4, 0x00001000, 0x00001000);
+
+}
+
+static void
+nve0_graph_init_gpc_0(struct drm_device *dev)
+{
+	struct nve0_graph_priv *priv = nv_engine(dev, NVOBJ_ENGINE_GR);
+	const u32 magicgpc918 = DIV_ROUND_UP(0x00800000, priv->tpc_total);
+	u32 data[TPC_MAX / 8];
+	u8  tpcnr[GPC_MAX];
+	int i, gpc, tpc;
+
+	nv_wr32(dev, GPC_UNIT(0, 0x3018), 0x00000001);
+
+	memset(data, 0x00, sizeof(data));
+	memcpy(tpcnr, priv->tpc_nr, sizeof(priv->tpc_nr));
+	for (i = 0, gpc = -1; i < priv->tpc_total; i++) {
+		do {
+			gpc = (gpc + 1) % priv->gpc_nr;
+		} while (!tpcnr[gpc]);
+		tpc = priv->tpc_nr[gpc] - tpcnr[gpc]--;
+
+		data[i / 8] |= tpc << ((i % 8) * 4);
+	}
+
+	nv_wr32(dev, GPC_BCAST(0x0980), data[0]);
+	nv_wr32(dev, GPC_BCAST(0x0984), data[1]);
+	nv_wr32(dev, GPC_BCAST(0x0988), data[2]);
+	nv_wr32(dev, GPC_BCAST(0x098c), data[3]);
+
+	for (gpc = 0; gpc < priv->gpc_nr; gpc++) {
+		nv_wr32(dev, GPC_UNIT(gpc, 0x0914), priv->magic_not_rop_nr << 8 |
+						  priv->tpc_nr[gpc]);
+		nv_wr32(dev, GPC_UNIT(gpc, 0x0910), 0x00040000 | priv->tpc_total);
+		nv_wr32(dev, GPC_UNIT(gpc, 0x0918), magicgpc918);
+	}
+
+	nv_wr32(dev, GPC_BCAST(0x1bd4), magicgpc918);
+	nv_wr32(dev, GPC_BCAST(0x08ac), nv_rd32(dev, 0x100800));
+}
+
+static void
+nve0_graph_init_gpc_1(struct drm_device *dev)
+{
+	struct nve0_graph_priv *priv = nv_engine(dev, NVOBJ_ENGINE_GR);
+	int gpc, tpc;
+
+	for (gpc = 0; gpc < priv->gpc_nr; gpc++) {
+		nv_wr32(dev, GPC_UNIT(gpc, 0x3038), 0xc0000000);
+		nv_wr32(dev, GPC_UNIT(gpc, 0x0420), 0xc0000000);
+		nv_wr32(dev, GPC_UNIT(gpc, 0x0900), 0xc0000000);
+		nv_wr32(dev, GPC_UNIT(gpc, 0x1028), 0xc0000000);
+		nv_wr32(dev, GPC_UNIT(gpc, 0x0824), 0xc0000000);
+		for (tpc = 0; tpc < priv->tpc_nr[gpc]; tpc++) {
+			nv_wr32(dev, TPC_UNIT(gpc, tpc, 0x508), 0xffffffff);
+			nv_wr32(dev, TPC_UNIT(gpc, tpc, 0x50c), 0xffffffff);
+			nv_wr32(dev, TPC_UNIT(gpc, tpc, 0x224), 0xc0000000);
+			nv_wr32(dev, TPC_UNIT(gpc, tpc, 0x48c), 0xc0000000);
+			nv_wr32(dev, TPC_UNIT(gpc, tpc, 0x084), 0xc0000000);
+			nv_wr32(dev, TPC_UNIT(gpc, tpc, 0x644), 0x001ffffe);
+			nv_wr32(dev, TPC_UNIT(gpc, tpc, 0x64c), 0x0000000f);
+		}
+		nv_wr32(dev, GPC_UNIT(gpc, 0x2c90), 0xffffffff);
+		nv_wr32(dev, GPC_UNIT(gpc, 0x2c94), 0xffffffff);
+	}
+}
+
+static void
+nve0_graph_init_rop(struct drm_device *dev)
+{
+	struct nve0_graph_priv *priv = nv_engine(dev, NVOBJ_ENGINE_GR);
+	int rop;
+
+	for (rop = 0; rop < priv->rop_nr; rop++) {
+		nv_wr32(dev, ROP_UNIT(rop, 0x144), 0xc0000000);
+		nv_wr32(dev, ROP_UNIT(rop, 0x070), 0xc0000000);
+		nv_wr32(dev, ROP_UNIT(rop, 0x204), 0xffffffff);
+		nv_wr32(dev, ROP_UNIT(rop, 0x208), 0xffffffff);
+	}
+}
+
+static void
+nve0_graph_init_fuc(struct drm_device *dev, u32 fuc_base,
+		    struct nve0_graph_fuc *code, struct nve0_graph_fuc *data)
+{
+	int i;
+
+	nv_wr32(dev, fuc_base + 0x01c0, 0x01000000);
+	for (i = 0; i < data->size / 4; i++)
+		nv_wr32(dev, fuc_base + 0x01c4, data->data[i]);
+
+	nv_wr32(dev, fuc_base + 0x0180, 0x01000000);
+	for (i = 0; i < code->size / 4; i++) {
+		if ((i & 0x3f) == 0)
+			nv_wr32(dev, fuc_base + 0x0188, i >> 6);
+		nv_wr32(dev, fuc_base + 0x0184, code->data[i]);
+	}
+}
+
+static int
+nve0_graph_init_ctxctl(struct drm_device *dev)
+{
+	struct nve0_graph_priv *priv = nv_engine(dev, NVOBJ_ENGINE_GR);
+	u32 r000260;
+
+	/* load fuc microcode */
+	r000260 = nv_mask(dev, 0x000260, 0x00000001, 0x00000000);
+	nve0_graph_init_fuc(dev, 0x409000, &priv->fuc409c, &priv->fuc409d);
+	nve0_graph_init_fuc(dev, 0x41a000, &priv->fuc41ac, &priv->fuc41ad);
+	nv_wr32(dev, 0x000260, r000260);
+
+	/* start both of them running */
+	nv_wr32(dev, 0x409840, 0xffffffff);
+	nv_wr32(dev, 0x41a10c, 0x00000000);
+	nv_wr32(dev, 0x40910c, 0x00000000);
+	nv_wr32(dev, 0x41a100, 0x00000002);
+	nv_wr32(dev, 0x409100, 0x00000002);
+	if (!nv_wait(dev, 0x409800, 0x00000001, 0x00000001))
+		NV_INFO(dev, "0x409800 wait failed\n");
+
+	nv_wr32(dev, 0x409840, 0xffffffff);
+	nv_wr32(dev, 0x409500, 0x7fffffff);
+	nv_wr32(dev, 0x409504, 0x00000021);
+
+	nv_wr32(dev, 0x409840, 0xffffffff);
+	nv_wr32(dev, 0x409500, 0x00000000);
+	nv_wr32(dev, 0x409504, 0x00000010);
+	if (!nv_wait_ne(dev, 0x409800, 0xffffffff, 0x00000000)) {
+		NV_ERROR(dev, "fuc09 req 0x10 timeout\n");
+		return -EBUSY;
+	}
+	priv->grctx_size = nv_rd32(dev, 0x409800);
+
+	nv_wr32(dev, 0x409840, 0xffffffff);
+	nv_wr32(dev, 0x409500, 0x00000000);
+	nv_wr32(dev, 0x409504, 0x00000016);
+	if (!nv_wait_ne(dev, 0x409800, 0xffffffff, 0x00000000)) {
+		NV_ERROR(dev, "fuc09 req 0x16 timeout\n");
+		return -EBUSY;
+	}
+
+	nv_wr32(dev, 0x409840, 0xffffffff);
+	nv_wr32(dev, 0x409500, 0x00000000);
+	nv_wr32(dev, 0x409504, 0x00000025);
+	if (!nv_wait_ne(dev, 0x409800, 0xffffffff, 0x00000000)) {
+		NV_ERROR(dev, "fuc09 req 0x25 timeout\n");
+		return -EBUSY;
+	}
+
+	nv_wr32(dev, 0x409800, 0x00000000);
+	nv_wr32(dev, 0x409500, 0x00000001);
+	nv_wr32(dev, 0x409504, 0x00000030);
+	if (!nv_wait_ne(dev, 0x409800, 0xffffffff, 0x00000000)) {
+		NV_ERROR(dev, "fuc09 req 0x30 timeout\n");
+		return -EBUSY;
+	}
+
+	nv_wr32(dev, 0x409810, 0xb00095c8);
+	nv_wr32(dev, 0x409800, 0x00000000);
+	nv_wr32(dev, 0x409500, 0x00000001);
+	nv_wr32(dev, 0x409504, 0x00000031);
+	if (!nv_wait_ne(dev, 0x409800, 0xffffffff, 0x00000000)) {
+		NV_ERROR(dev, "fuc09 req 0x31 timeout\n");
+		return -EBUSY;
+	}
+
+	nv_wr32(dev, 0x409810, 0x00080420);
+	nv_wr32(dev, 0x409800, 0x00000000);
+	nv_wr32(dev, 0x409500, 0x00000001);
+	nv_wr32(dev, 0x409504, 0x00000032);
+	if (!nv_wait_ne(dev, 0x409800, 0xffffffff, 0x00000000)) {
+		NV_ERROR(dev, "fuc09 req 0x32 timeout\n");
+		return -EBUSY;
+	}
+
+	nv_wr32(dev, 0x409614, 0x00000070);
+	nv_wr32(dev, 0x409614, 0x00000770);
+	nv_wr32(dev, 0x40802c, 0x00000001);
+	return 0;
+}
+
+static int
+nve0_graph_init(struct drm_device *dev, int engine)
+{
+	int ret;
+
+	nv_mask(dev, 0x000200, 0x18001000, 0x00000000);
+	nv_mask(dev, 0x000200, 0x18001000, 0x18001000);
+
+	nve0_graph_init_obj418880(dev);
+	nve0_graph_init_regs(dev);
+	nve0_graph_init_gpc_0(dev);
+
+	nv_wr32(dev, 0x400500, 0x00010001);
+	nv_wr32(dev, 0x400100, 0xffffffff);
+	nv_wr32(dev, 0x40013c, 0xffffffff);
+
+	nve0_graph_init_units(dev);
+	nve0_graph_init_gpc_1(dev);
+	nve0_graph_init_rop(dev);
+
+	nv_wr32(dev, 0x400108, 0xffffffff);
+	nv_wr32(dev, 0x400138, 0xffffffff);
+	nv_wr32(dev, 0x400118, 0xffffffff);
+	nv_wr32(dev, 0x400130, 0xffffffff);
+	nv_wr32(dev, 0x40011c, 0xffffffff);
+	nv_wr32(dev, 0x400134, 0xffffffff);
+	nv_wr32(dev, 0x400054, 0x34ce3464);
+
+	ret = nve0_graph_init_ctxctl(dev);
+	if (ret)
+		return ret;
+
+	return 0;
+}
+
+int
+nve0_graph_isr_chid(struct drm_device *dev, u64 inst)
+{
+	struct nouveau_fifo_priv *pfifo = nv_engine(dev, NVOBJ_ENGINE_FIFO);
+	struct drm_nouveau_private *dev_priv = dev->dev_private;
+	struct nouveau_channel *chan;
+	unsigned long flags;
+	int i;
+
+	spin_lock_irqsave(&dev_priv->channels.lock, flags);
+	for (i = 0; i < pfifo->channels; i++) {
+		chan = dev_priv->channels.ptr[i];
+		if (!chan || !chan->ramin)
+			continue;
+
+		if (inst == chan->ramin->vinst)
+			break;
+	}
+	spin_unlock_irqrestore(&dev_priv->channels.lock, flags);
+	return i;
+}
+
+static void
+nve0_graph_ctxctl_isr(struct drm_device *dev)
+{
+	u32 ustat = nv_rd32(dev, 0x409c18);
+
+	if (ustat & 0x00000001)
+		NV_INFO(dev, "PGRAPH: CTXCTRL ucode error\n");
+	if (ustat & 0x00080000)
+		NV_INFO(dev, "PGRAPH: CTXCTRL watchdog timeout\n");
+	if (ustat & ~0x00080001)
+		NV_INFO(dev, "PGRAPH: CTXCTRL 0x%08x\n", ustat);
+
+	nve0_graph_ctxctl_debug(dev);
+	nv_wr32(dev, 0x409c20, ustat);
+}
+
+static void
+nve0_graph_trap_isr(struct drm_device *dev, int chid)
+{
+	struct nve0_graph_priv *priv = nv_engine(dev, NVOBJ_ENGINE_GR);
+	u32 trap = nv_rd32(dev, 0x400108);
+	int rop;
+
+	if (trap & 0x00000001) {
+		u32 stat = nv_rd32(dev, 0x404000);
+		NV_INFO(dev, "PGRAPH: DISPATCH ch %d 0x%08x\n", chid, stat);
+		nv_wr32(dev, 0x404000, 0xc0000000);
+		nv_wr32(dev, 0x400108, 0x00000001);
+		trap &= ~0x00000001;
+	}
+
+	if (trap & 0x00000010) {
+		u32 stat = nv_rd32(dev, 0x405840);
+		NV_INFO(dev, "PGRAPH: SHADER ch %d 0x%08x\n", chid, stat);
+		nv_wr32(dev, 0x405840, 0xc0000000);
+		nv_wr32(dev, 0x400108, 0x00000010);
+		trap &= ~0x00000010;
+	}
+
+	if (trap & 0x02000000) {
+		for (rop = 0; rop < priv->rop_nr; rop++) {
+			u32 statz = nv_rd32(dev, ROP_UNIT(rop, 0x070));
+			u32 statc = nv_rd32(dev, ROP_UNIT(rop, 0x144));
+			NV_INFO(dev, "PGRAPH: ROP%d ch %d 0x%08x 0x%08x\n",
+				     rop, chid, statz, statc);
+			nv_wr32(dev, ROP_UNIT(rop, 0x070), 0xc0000000);
+			nv_wr32(dev, ROP_UNIT(rop, 0x144), 0xc0000000);
+		}
+		nv_wr32(dev, 0x400108, 0x02000000);
+		trap &= ~0x02000000;
+	}
+
+	if (trap) {
+		NV_INFO(dev, "PGRAPH: TRAP ch %d 0x%08x\n", chid, trap);
+		nv_wr32(dev, 0x400108, trap);
+	}
+}
+
+static void
+nve0_graph_isr(struct drm_device *dev)
+{
+	u64 inst = (u64)(nv_rd32(dev, 0x409b00) & 0x0fffffff) << 12;
+	u32 chid = nve0_graph_isr_chid(dev, inst);
+	u32 stat = nv_rd32(dev, 0x400100);
+	u32 addr = nv_rd32(dev, 0x400704);
+	u32 mthd = (addr & 0x00003ffc);
+	u32 subc = (addr & 0x00070000) >> 16;
+	u32 data = nv_rd32(dev, 0x400708);
+	u32 code = nv_rd32(dev, 0x400110);
+	u32 class = nv_rd32(dev, 0x404200 + (subc * 4));
+
+	if (stat & 0x00000010) {
+		if (nouveau_gpuobj_mthd_call2(dev, chid, class, mthd, data)) {
+			NV_INFO(dev, "PGRAPH: ILLEGAL_MTHD ch %d [0x%010llx] "
+				     "subc %d class 0x%04x mthd 0x%04x "
+				     "data 0x%08x\n",
+				chid, inst, subc, class, mthd, data);
+		}
+		nv_wr32(dev, 0x400100, 0x00000010);
+		stat &= ~0x00000010;
+	}
+
+	if (stat & 0x00000020) {
+		NV_INFO(dev, "PGRAPH: ILLEGAL_CLASS ch %d [0x%010llx] subc %d "
+			     "class 0x%04x mthd 0x%04x data 0x%08x\n",
+			chid, inst, subc, class, mthd, data);
+		nv_wr32(dev, 0x400100, 0x00000020);
+		stat &= ~0x00000020;
+	}
+
+	if (stat & 0x00100000) {
+		NV_INFO(dev, "PGRAPH: DATA_ERROR [");
+		nouveau_enum_print(nv50_data_error_names, code);
+		printk("] ch %d [0x%010llx] subc %d class 0x%04x "
+		       "mthd 0x%04x data 0x%08x\n",
+		       chid, inst, subc, class, mthd, data);
+		nv_wr32(dev, 0x400100, 0x00100000);
+		stat &= ~0x00100000;
+	}
+
+	if (stat & 0x00200000) {
+		nve0_graph_trap_isr(dev, chid);
+		nv_wr32(dev, 0x400100, 0x00200000);
+		stat &= ~0x00200000;
+	}
+
+	if (stat & 0x00080000) {
+		nve0_graph_ctxctl_isr(dev);
+		nv_wr32(dev, 0x400100, 0x00080000);
+		stat &= ~0x00080000;
+	}
+
+	if (stat) {
+		NV_INFO(dev, "PGRAPH: unknown stat 0x%08x\n", stat);
+		nv_wr32(dev, 0x400100, stat);
+	}
+
+	nv_wr32(dev, 0x400500, 0x00010001);
+}
+
+static int
+nve0_graph_create_fw(struct drm_device *dev, const char *fwname,
+		     struct nve0_graph_fuc *fuc)
+{
+	struct drm_nouveau_private *dev_priv = dev->dev_private;
+	const struct firmware *fw;
+	char f[32];
+	int ret;
+
+	snprintf(f, sizeof(f), "nouveau/nv%02x_%s", dev_priv->chipset, fwname);
+	ret = request_firmware(&fw, f, &dev->pdev->dev);
+	if (ret)
+		return ret;
+
+	fuc->size = fw->size;
+	fuc->data = kmemdup(fw->data, fuc->size, GFP_KERNEL);
+	release_firmware(fw);
+	return (fuc->data != NULL) ? 0 : -ENOMEM;
+}
+
+static void
+nve0_graph_destroy_fw(struct nve0_graph_fuc *fuc)
+{
+	if (fuc->data) {
+		kfree(fuc->data);
+		fuc->data = NULL;
+	}
+}
+
+static void
+nve0_graph_destroy(struct drm_device *dev, int engine)
+{
+	struct nve0_graph_priv *priv = nv_engine(dev, engine);
+
+	nve0_graph_destroy_fw(&priv->fuc409c);
+	nve0_graph_destroy_fw(&priv->fuc409d);
+	nve0_graph_destroy_fw(&priv->fuc41ac);
+	nve0_graph_destroy_fw(&priv->fuc41ad);
+
+	nouveau_irq_unregister(dev, 12);
+
+	nouveau_gpuobj_ref(NULL, &priv->unk4188b8);
+	nouveau_gpuobj_ref(NULL, &priv->unk4188b4);
+
+	if (priv->grctx_vals)
+		kfree(priv->grctx_vals);
+
+	NVOBJ_ENGINE_DEL(dev, GR);
+	kfree(priv);
+}
+
+int
+nve0_graph_create(struct drm_device *dev)
+{
+	struct drm_nouveau_private *dev_priv = dev->dev_private;
+	struct nve0_graph_priv *priv;
+	int ret, gpc, i;
+	u32 kepler;
+
+	kepler = nve0_graph_class(dev);
+	if (!kepler) {
+		NV_ERROR(dev, "PGRAPH: unsupported chipset, please report!\n");
+		return 0;
+	}
+
+	priv = kzalloc(sizeof(*priv), GFP_KERNEL);
+	if (!priv)
+		return -ENOMEM;
+
+	priv->base.destroy = nve0_graph_destroy;
+	priv->base.init = nve0_graph_init;
+	priv->base.fini = nve0_graph_fini;
+	priv->base.context_new = nve0_graph_context_new;
+	priv->base.context_del = nve0_graph_context_del;
+	priv->base.object_new = nve0_graph_object_new;
+
+	NVOBJ_ENGINE_ADD(dev, GR, &priv->base);
+	nouveau_irq_register(dev, 12, nve0_graph_isr);
+
+	NV_INFO(dev, "PGRAPH: using external firmware\n");
+	if (nve0_graph_create_fw(dev, "fuc409c", &priv->fuc409c) ||
+	    nve0_graph_create_fw(dev, "fuc409d", &priv->fuc409d) ||
+	    nve0_graph_create_fw(dev, "fuc41ac", &priv->fuc41ac) ||
+	    nve0_graph_create_fw(dev, "fuc41ad", &priv->fuc41ad)) {
+		ret = 0;
+		goto error;
+	}
+
+	ret = nouveau_gpuobj_new(dev, NULL, 0x1000, 256, 0, &priv->unk4188b4);
+	if (ret)
+		goto error;
+
+	ret = nouveau_gpuobj_new(dev, NULL, 0x1000, 256, 0, &priv->unk4188b8);
+	if (ret)
+		goto error;
+
+	for (i = 0; i < 0x1000; i += 4) {
+		nv_wo32(priv->unk4188b4, i, 0x00000010);
+		nv_wo32(priv->unk4188b8, i, 0x00000010);
+	}
+
+	priv->gpc_nr  =  nv_rd32(dev, 0x409604) & 0x0000001f;
+	priv->rop_nr = (nv_rd32(dev, 0x409604) & 0x001f0000) >> 16;
+	for (gpc = 0; gpc < priv->gpc_nr; gpc++) {
+		priv->tpc_nr[gpc] = nv_rd32(dev, GPC_UNIT(gpc, 0x2608));
+		priv->tpc_total += priv->tpc_nr[gpc];
+	}
+
+	switch (dev_priv->chipset) {
+	case 0xe4:
+		if (priv->tpc_total == 8)
+			priv->magic_not_rop_nr = 3;
+		else
+		if (priv->tpc_total == 7)
+			priv->magic_not_rop_nr = 1;
+		break;
+	case 0xe7:
+		priv->magic_not_rop_nr = 1;
+		break;
+	default:
+		break;
+	}
+
+	if (!priv->magic_not_rop_nr) {
+		NV_ERROR(dev, "PGRAPH: unknown config: %d/%d/%d/%d, %d\n",
+			 priv->tpc_nr[0], priv->tpc_nr[1], priv->tpc_nr[2],
+			 priv->tpc_nr[3], priv->rop_nr);
+		priv->magic_not_rop_nr = 0x00;
+	}
+
+	NVOBJ_CLASS(dev, 0xa097, GR); /* subc 0: 3D */
+	NVOBJ_CLASS(dev, 0xa0c0, GR); /* subc 1: COMPUTE */
+	NVOBJ_CLASS(dev, 0xa040, GR); /* subc 2: P2MF */
+	NVOBJ_CLASS(dev, 0x902d, GR); /* subc 3: 2D */
+	NVOBJ_CLASS(dev, 0xa0b5, GR); /* subc 4: COPY */
+	return 0;
+
+error:
+	nve0_graph_destroy(dev, NVOBJ_ENGINE_GR);
+	return ret;
+}

+ 89 - 0
drivers/gpu/drm/nouveau/nve0_graph.h

@@ -0,0 +1,89 @@
+/*
+ * Copyright 2012 Red Hat Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: Ben Skeggs
+ */
+
+#ifndef __NVE0_GRAPH_H__
+#define __NVE0_GRAPH_H__
+
+#define GPC_MAX 4
+#define TPC_MAX 32
+
+#define ROP_BCAST(r)     (0x408800 + (r))
+#define ROP_UNIT(u, r)   (0x410000 + (u) * 0x400 + (r))
+#define GPC_BCAST(r)     (0x418000 + (r))
+#define GPC_UNIT(t, r)   (0x500000 + (t) * 0x8000 + (r))
+#define TPC_UNIT(t, m, r) (0x504000 + (t) * 0x8000 + (m) * 0x800 + (r))
+
+struct nve0_graph_fuc {
+	u32 *data;
+	u32  size;
+};
+
+struct nve0_graph_priv {
+	struct nouveau_exec_engine base;
+
+	struct nve0_graph_fuc fuc409c;
+	struct nve0_graph_fuc fuc409d;
+	struct nve0_graph_fuc fuc41ac;
+	struct nve0_graph_fuc fuc41ad;
+
+	u8 gpc_nr;
+	u8 rop_nr;
+	u8 tpc_nr[GPC_MAX];
+	u8 tpc_total;
+
+	u32  grctx_size;
+	u32 *grctx_vals;
+	struct nouveau_gpuobj *unk4188b4;
+	struct nouveau_gpuobj *unk4188b8;
+
+	u8 magic_not_rop_nr;
+};
+
+struct nve0_graph_chan {
+	struct nouveau_gpuobj *grctx;
+	struct nouveau_gpuobj *unk408004; /* 0x418810 too */
+	struct nouveau_gpuobj *unk40800c; /* 0x419004 too */
+	struct nouveau_gpuobj *unk418810; /* 0x419848 too */
+	struct nouveau_gpuobj *mmio;
+	int mmio_nr;
+};
+
+int nve0_grctx_generate(struct nouveau_channel *);
+
+/* nve0_graph.c uses this also to determine supported chipsets */
+static inline u32
+nve0_graph_class(struct drm_device *dev)
+{
+	struct drm_nouveau_private *dev_priv = dev->dev_private;
+
+	switch (dev_priv->chipset) {
+	case 0xe4:
+	case 0xe7:
+		return 0xa097;
+	default:
+		return 0;
+	}
+}
+
+#endif

+ 2777 - 0
drivers/gpu/drm/nouveau/nve0_grctx.c

@@ -0,0 +1,2777 @@
+/*
+ * Copyright 2010 Red Hat Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: Ben Skeggs
+ */
+
+#include "drmP.h"
+#include "nouveau_drv.h"
+#include "nouveau_mm.h"
+#include "nve0_graph.h"
+
+static void
+nv_icmd(struct drm_device *dev, u32 icmd, u32 data)
+{
+	nv_wr32(dev, 0x400204, data);
+	nv_wr32(dev, 0x400200, icmd);
+	while (nv_rd32(dev, 0x400700) & 0x00000002) {}
+}
+
+static void
+nve0_grctx_generate_icmd(struct drm_device *dev)
+{
+	nv_wr32(dev, 0x400208, 0x80000000);
+	nv_icmd(dev, 0x001000, 0x00000004);
+	nv_icmd(dev, 0x000039, 0x00000000);
+	nv_icmd(dev, 0x00003a, 0x00000000);
+	nv_icmd(dev, 0x00003b, 0x00000000);
+	nv_icmd(dev, 0x0000a9, 0x0000ffff);
+	nv_icmd(dev, 0x000038, 0x0fac6881);
+	nv_icmd(dev, 0x00003d, 0x00000001);
+	nv_icmd(dev, 0x0000e8, 0x00000400);
+	nv_icmd(dev, 0x0000e9, 0x00000400);
+	nv_icmd(dev, 0x0000ea, 0x00000400);
+	nv_icmd(dev, 0x0000eb, 0x00000400);
+	nv_icmd(dev, 0x0000ec, 0x00000400);
+	nv_icmd(dev, 0x0000ed, 0x00000400);
+	nv_icmd(dev, 0x0000ee, 0x00000400);
+	nv_icmd(dev, 0x0000ef, 0x00000400);
+	nv_icmd(dev, 0x000078, 0x00000300);
+	nv_icmd(dev, 0x000079, 0x00000300);
+	nv_icmd(dev, 0x00007a, 0x00000300);
+	nv_icmd(dev, 0x00007b, 0x00000300);
+	nv_icmd(dev, 0x00007c, 0x00000300);
+	nv_icmd(dev, 0x00007d, 0x00000300);
+	nv_icmd(dev, 0x00007e, 0x00000300);
+	nv_icmd(dev, 0x00007f, 0x00000300);
+	nv_icmd(dev, 0x000050, 0x00000011);
+	nv_icmd(dev, 0x000058, 0x00000008);
+	nv_icmd(dev, 0x000059, 0x00000008);
+	nv_icmd(dev, 0x00005a, 0x00000008);
+	nv_icmd(dev, 0x00005b, 0x00000008);
+	nv_icmd(dev, 0x00005c, 0x00000008);
+	nv_icmd(dev, 0x00005d, 0x00000008);
+	nv_icmd(dev, 0x00005e, 0x00000008);
+	nv_icmd(dev, 0x00005f, 0x00000008);
+	nv_icmd(dev, 0x000208, 0x00000001);
+	nv_icmd(dev, 0x000209, 0x00000001);
+	nv_icmd(dev, 0x00020a, 0x00000001);
+	nv_icmd(dev, 0x00020b, 0x00000001);
+	nv_icmd(dev, 0x00020c, 0x00000001);
+	nv_icmd(dev, 0x00020d, 0x00000001);
+	nv_icmd(dev, 0x00020e, 0x00000001);
+	nv_icmd(dev, 0x00020f, 0x00000001);
+	nv_icmd(dev, 0x000081, 0x00000001);
+	nv_icmd(dev, 0x000085, 0x00000004);
+	nv_icmd(dev, 0x000088, 0x00000400);
+	nv_icmd(dev, 0x000090, 0x00000300);
+	nv_icmd(dev, 0x000098, 0x00001001);
+	nv_icmd(dev, 0x0000e3, 0x00000001);
+	nv_icmd(dev, 0x0000da, 0x00000001);
+	nv_icmd(dev, 0x0000f8, 0x00000003);
+	nv_icmd(dev, 0x0000fa, 0x00000001);
+	nv_icmd(dev, 0x00009f, 0x0000ffff);
+	nv_icmd(dev, 0x0000a0, 0x0000ffff);
+	nv_icmd(dev, 0x0000a1, 0x0000ffff);
+	nv_icmd(dev, 0x0000a2, 0x0000ffff);
+	nv_icmd(dev, 0x0000b1, 0x00000001);
+	nv_icmd(dev, 0x0000ad, 0x0000013e);
+	nv_icmd(dev, 0x0000e1, 0x00000010);
+	nv_icmd(dev, 0x000290, 0x00000000);
+	nv_icmd(dev, 0x000291, 0x00000000);
+	nv_icmd(dev, 0x000292, 0x00000000);
+	nv_icmd(dev, 0x000293, 0x00000000);
+	nv_icmd(dev, 0x000294, 0x00000000);
+	nv_icmd(dev, 0x000295, 0x00000000);
+	nv_icmd(dev, 0x000296, 0x00000000);
+	nv_icmd(dev, 0x000297, 0x00000000);
+	nv_icmd(dev, 0x000298, 0x00000000);
+	nv_icmd(dev, 0x000299, 0x00000000);
+	nv_icmd(dev, 0x00029a, 0x00000000);
+	nv_icmd(dev, 0x00029b, 0x00000000);
+	nv_icmd(dev, 0x00029c, 0x00000000);
+	nv_icmd(dev, 0x00029d, 0x00000000);
+	nv_icmd(dev, 0x00029e, 0x00000000);
+	nv_icmd(dev, 0x00029f, 0x00000000);
+	nv_icmd(dev, 0x0003b0, 0x00000000);
+	nv_icmd(dev, 0x0003b1, 0x00000000);
+	nv_icmd(dev, 0x0003b2, 0x00000000);
+	nv_icmd(dev, 0x0003b3, 0x00000000);
+	nv_icmd(dev, 0x0003b4, 0x00000000);
+	nv_icmd(dev, 0x0003b5, 0x00000000);
+	nv_icmd(dev, 0x0003b6, 0x00000000);
+	nv_icmd(dev, 0x0003b7, 0x00000000);
+	nv_icmd(dev, 0x0003b8, 0x00000000);
+	nv_icmd(dev, 0x0003b9, 0x00000000);
+	nv_icmd(dev, 0x0003ba, 0x00000000);
+	nv_icmd(dev, 0x0003bb, 0x00000000);
+	nv_icmd(dev, 0x0003bc, 0x00000000);
+	nv_icmd(dev, 0x0003bd, 0x00000000);
+	nv_icmd(dev, 0x0003be, 0x00000000);
+	nv_icmd(dev, 0x0003bf, 0x00000000);
+	nv_icmd(dev, 0x0002a0, 0x00000000);
+	nv_icmd(dev, 0x0002a1, 0x00000000);
+	nv_icmd(dev, 0x0002a2, 0x00000000);
+	nv_icmd(dev, 0x0002a3, 0x00000000);
+	nv_icmd(dev, 0x0002a4, 0x00000000);
+	nv_icmd(dev, 0x0002a5, 0x00000000);
+	nv_icmd(dev, 0x0002a6, 0x00000000);
+	nv_icmd(dev, 0x0002a7, 0x00000000);
+	nv_icmd(dev, 0x0002a8, 0x00000000);
+	nv_icmd(dev, 0x0002a9, 0x00000000);
+	nv_icmd(dev, 0x0002aa, 0x00000000);
+	nv_icmd(dev, 0x0002ab, 0x00000000);
+	nv_icmd(dev, 0x0002ac, 0x00000000);
+	nv_icmd(dev, 0x0002ad, 0x00000000);
+	nv_icmd(dev, 0x0002ae, 0x00000000);
+	nv_icmd(dev, 0x0002af, 0x00000000);
+	nv_icmd(dev, 0x000420, 0x00000000);
+	nv_icmd(dev, 0x000421, 0x00000000);
+	nv_icmd(dev, 0x000422, 0x00000000);
+	nv_icmd(dev, 0x000423, 0x00000000);
+	nv_icmd(dev, 0x000424, 0x00000000);
+	nv_icmd(dev, 0x000425, 0x00000000);
+	nv_icmd(dev, 0x000426, 0x00000000);
+	nv_icmd(dev, 0x000427, 0x00000000);
+	nv_icmd(dev, 0x000428, 0x00000000);
+	nv_icmd(dev, 0x000429, 0x00000000);
+	nv_icmd(dev, 0x00042a, 0x00000000);
+	nv_icmd(dev, 0x00042b, 0x00000000);
+	nv_icmd(dev, 0x00042c, 0x00000000);
+	nv_icmd(dev, 0x00042d, 0x00000000);
+	nv_icmd(dev, 0x00042e, 0x00000000);
+	nv_icmd(dev, 0x00042f, 0x00000000);
+	nv_icmd(dev, 0x0002b0, 0x00000000);
+	nv_icmd(dev, 0x0002b1, 0x00000000);
+	nv_icmd(dev, 0x0002b2, 0x00000000);
+	nv_icmd(dev, 0x0002b3, 0x00000000);
+	nv_icmd(dev, 0x0002b4, 0x00000000);
+	nv_icmd(dev, 0x0002b5, 0x00000000);
+	nv_icmd(dev, 0x0002b6, 0x00000000);
+	nv_icmd(dev, 0x0002b7, 0x00000000);
+	nv_icmd(dev, 0x0002b8, 0x00000000);
+	nv_icmd(dev, 0x0002b9, 0x00000000);
+	nv_icmd(dev, 0x0002ba, 0x00000000);
+	nv_icmd(dev, 0x0002bb, 0x00000000);
+	nv_icmd(dev, 0x0002bc, 0x00000000);
+	nv_icmd(dev, 0x0002bd, 0x00000000);
+	nv_icmd(dev, 0x0002be, 0x00000000);
+	nv_icmd(dev, 0x0002bf, 0x00000000);
+	nv_icmd(dev, 0x000430, 0x00000000);
+	nv_icmd(dev, 0x000431, 0x00000000);
+	nv_icmd(dev, 0x000432, 0x00000000);
+	nv_icmd(dev, 0x000433, 0x00000000);
+	nv_icmd(dev, 0x000434, 0x00000000);
+	nv_icmd(dev, 0x000435, 0x00000000);
+	nv_icmd(dev, 0x000436, 0x00000000);
+	nv_icmd(dev, 0x000437, 0x00000000);
+	nv_icmd(dev, 0x000438, 0x00000000);
+	nv_icmd(dev, 0x000439, 0x00000000);
+	nv_icmd(dev, 0x00043a, 0x00000000);
+	nv_icmd(dev, 0x00043b, 0x00000000);
+	nv_icmd(dev, 0x00043c, 0x00000000);
+	nv_icmd(dev, 0x00043d, 0x00000000);
+	nv_icmd(dev, 0x00043e, 0x00000000);
+	nv_icmd(dev, 0x00043f, 0x00000000);
+	nv_icmd(dev, 0x0002c0, 0x00000000);
+	nv_icmd(dev, 0x0002c1, 0x00000000);
+	nv_icmd(dev, 0x0002c2, 0x00000000);
+	nv_icmd(dev, 0x0002c3, 0x00000000);
+	nv_icmd(dev, 0x0002c4, 0x00000000);
+	nv_icmd(dev, 0x0002c5, 0x00000000);
+	nv_icmd(dev, 0x0002c6, 0x00000000);
+	nv_icmd(dev, 0x0002c7, 0x00000000);
+	nv_icmd(dev, 0x0002c8, 0x00000000);
+	nv_icmd(dev, 0x0002c9, 0x00000000);
+	nv_icmd(dev, 0x0002ca, 0x00000000);
+	nv_icmd(dev, 0x0002cb, 0x00000000);
+	nv_icmd(dev, 0x0002cc, 0x00000000);
+	nv_icmd(dev, 0x0002cd, 0x00000000);
+	nv_icmd(dev, 0x0002ce, 0x00000000);
+	nv_icmd(dev, 0x0002cf, 0x00000000);
+	nv_icmd(dev, 0x0004d0, 0x00000000);
+	nv_icmd(dev, 0x0004d1, 0x00000000);
+	nv_icmd(dev, 0x0004d2, 0x00000000);
+	nv_icmd(dev, 0x0004d3, 0x00000000);
+	nv_icmd(dev, 0x0004d4, 0x00000000);
+	nv_icmd(dev, 0x0004d5, 0x00000000);
+	nv_icmd(dev, 0x0004d6, 0x00000000);
+	nv_icmd(dev, 0x0004d7, 0x00000000);
+	nv_icmd(dev, 0x0004d8, 0x00000000);
+	nv_icmd(dev, 0x0004d9, 0x00000000);
+	nv_icmd(dev, 0x0004da, 0x00000000);
+	nv_icmd(dev, 0x0004db, 0x00000000);
+	nv_icmd(dev, 0x0004dc, 0x00000000);
+	nv_icmd(dev, 0x0004dd, 0x00000000);
+	nv_icmd(dev, 0x0004de, 0x00000000);
+	nv_icmd(dev, 0x0004df, 0x00000000);
+	nv_icmd(dev, 0x000720, 0x00000000);
+	nv_icmd(dev, 0x000721, 0x00000000);
+	nv_icmd(dev, 0x000722, 0x00000000);
+	nv_icmd(dev, 0x000723, 0x00000000);
+	nv_icmd(dev, 0x000724, 0x00000000);
+	nv_icmd(dev, 0x000725, 0x00000000);
+	nv_icmd(dev, 0x000726, 0x00000000);
+	nv_icmd(dev, 0x000727, 0x00000000);
+	nv_icmd(dev, 0x000728, 0x00000000);
+	nv_icmd(dev, 0x000729, 0x00000000);
+	nv_icmd(dev, 0x00072a, 0x00000000);
+	nv_icmd(dev, 0x00072b, 0x00000000);
+	nv_icmd(dev, 0x00072c, 0x00000000);
+	nv_icmd(dev, 0x00072d, 0x00000000);
+	nv_icmd(dev, 0x00072e, 0x00000000);
+	nv_icmd(dev, 0x00072f, 0x00000000);
+	nv_icmd(dev, 0x0008c0, 0x00000000);
+	nv_icmd(dev, 0x0008c1, 0x00000000);
+	nv_icmd(dev, 0x0008c2, 0x00000000);
+	nv_icmd(dev, 0x0008c3, 0x00000000);
+	nv_icmd(dev, 0x0008c4, 0x00000000);
+	nv_icmd(dev, 0x0008c5, 0x00000000);
+	nv_icmd(dev, 0x0008c6, 0x00000000);
+	nv_icmd(dev, 0x0008c7, 0x00000000);
+	nv_icmd(dev, 0x0008c8, 0x00000000);
+	nv_icmd(dev, 0x0008c9, 0x00000000);
+	nv_icmd(dev, 0x0008ca, 0x00000000);
+	nv_icmd(dev, 0x0008cb, 0x00000000);
+	nv_icmd(dev, 0x0008cc, 0x00000000);
+	nv_icmd(dev, 0x0008cd, 0x00000000);
+	nv_icmd(dev, 0x0008ce, 0x00000000);
+	nv_icmd(dev, 0x0008cf, 0x00000000);
+	nv_icmd(dev, 0x000890, 0x00000000);
+	nv_icmd(dev, 0x000891, 0x00000000);
+	nv_icmd(dev, 0x000892, 0x00000000);
+	nv_icmd(dev, 0x000893, 0x00000000);
+	nv_icmd(dev, 0x000894, 0x00000000);
+	nv_icmd(dev, 0x000895, 0x00000000);
+	nv_icmd(dev, 0x000896, 0x00000000);
+	nv_icmd(dev, 0x000897, 0x00000000);
+	nv_icmd(dev, 0x000898, 0x00000000);
+	nv_icmd(dev, 0x000899, 0x00000000);
+	nv_icmd(dev, 0x00089a, 0x00000000);
+	nv_icmd(dev, 0x00089b, 0x00000000);
+	nv_icmd(dev, 0x00089c, 0x00000000);
+	nv_icmd(dev, 0x00089d, 0x00000000);
+	nv_icmd(dev, 0x00089e, 0x00000000);
+	nv_icmd(dev, 0x00089f, 0x00000000);
+	nv_icmd(dev, 0x0008e0, 0x00000000);
+	nv_icmd(dev, 0x0008e1, 0x00000000);
+	nv_icmd(dev, 0x0008e2, 0x00000000);
+	nv_icmd(dev, 0x0008e3, 0x00000000);
+	nv_icmd(dev, 0x0008e4, 0x00000000);
+	nv_icmd(dev, 0x0008e5, 0x00000000);
+	nv_icmd(dev, 0x0008e6, 0x00000000);
+	nv_icmd(dev, 0x0008e7, 0x00000000);
+	nv_icmd(dev, 0x0008e8, 0x00000000);
+	nv_icmd(dev, 0x0008e9, 0x00000000);
+	nv_icmd(dev, 0x0008ea, 0x00000000);
+	nv_icmd(dev, 0x0008eb, 0x00000000);
+	nv_icmd(dev, 0x0008ec, 0x00000000);
+	nv_icmd(dev, 0x0008ed, 0x00000000);
+	nv_icmd(dev, 0x0008ee, 0x00000000);
+	nv_icmd(dev, 0x0008ef, 0x00000000);
+	nv_icmd(dev, 0x0008a0, 0x00000000);
+	nv_icmd(dev, 0x0008a1, 0x00000000);
+	nv_icmd(dev, 0x0008a2, 0x00000000);
+	nv_icmd(dev, 0x0008a3, 0x00000000);
+	nv_icmd(dev, 0x0008a4, 0x00000000);
+	nv_icmd(dev, 0x0008a5, 0x00000000);
+	nv_icmd(dev, 0x0008a6, 0x00000000);
+	nv_icmd(dev, 0x0008a7, 0x00000000);
+	nv_icmd(dev, 0x0008a8, 0x00000000);
+	nv_icmd(dev, 0x0008a9, 0x00000000);
+	nv_icmd(dev, 0x0008aa, 0x00000000);
+	nv_icmd(dev, 0x0008ab, 0x00000000);
+	nv_icmd(dev, 0x0008ac, 0x00000000);
+	nv_icmd(dev, 0x0008ad, 0x00000000);
+	nv_icmd(dev, 0x0008ae, 0x00000000);
+	nv_icmd(dev, 0x0008af, 0x00000000);
+	nv_icmd(dev, 0x0008f0, 0x00000000);
+	nv_icmd(dev, 0x0008f1, 0x00000000);
+	nv_icmd(dev, 0x0008f2, 0x00000000);
+	nv_icmd(dev, 0x0008f3, 0x00000000);
+	nv_icmd(dev, 0x0008f4, 0x00000000);
+	nv_icmd(dev, 0x0008f5, 0x00000000);
+	nv_icmd(dev, 0x0008f6, 0x00000000);
+	nv_icmd(dev, 0x0008f7, 0x00000000);
+	nv_icmd(dev, 0x0008f8, 0x00000000);
+	nv_icmd(dev, 0x0008f9, 0x00000000);
+	nv_icmd(dev, 0x0008fa, 0x00000000);
+	nv_icmd(dev, 0x0008fb, 0x00000000);
+	nv_icmd(dev, 0x0008fc, 0x00000000);
+	nv_icmd(dev, 0x0008fd, 0x00000000);
+	nv_icmd(dev, 0x0008fe, 0x00000000);
+	nv_icmd(dev, 0x0008ff, 0x00000000);
+	nv_icmd(dev, 0x00094c, 0x000000ff);
+	nv_icmd(dev, 0x00094d, 0xffffffff);
+	nv_icmd(dev, 0x00094e, 0x00000002);
+	nv_icmd(dev, 0x0002ec, 0x00000001);
+	nv_icmd(dev, 0x000303, 0x00000001);
+	nv_icmd(dev, 0x0002e6, 0x00000001);
+	nv_icmd(dev, 0x000466, 0x00000052);
+	nv_icmd(dev, 0x000301, 0x3f800000);
+	nv_icmd(dev, 0x000304, 0x30201000);
+	nv_icmd(dev, 0x000305, 0x70605040);
+	nv_icmd(dev, 0x000306, 0xb8a89888);
+	nv_icmd(dev, 0x000307, 0xf8e8d8c8);
+	nv_icmd(dev, 0x00030a, 0x00ffff00);
+	nv_icmd(dev, 0x00030b, 0x0000001a);
+	nv_icmd(dev, 0x00030c, 0x00000001);
+	nv_icmd(dev, 0x000318, 0x00000001);
+	nv_icmd(dev, 0x000340, 0x00000000);
+	nv_icmd(dev, 0x000375, 0x00000001);
+	nv_icmd(dev, 0x00037d, 0x00000006);
+	nv_icmd(dev, 0x0003a0, 0x00000002);
+	nv_icmd(dev, 0x0003aa, 0x00000001);
+	nv_icmd(dev, 0x0003a9, 0x00000001);
+	nv_icmd(dev, 0x000380, 0x00000001);
+	nv_icmd(dev, 0x000383, 0x00000011);
+	nv_icmd(dev, 0x000360, 0x00000040);
+	nv_icmd(dev, 0x000366, 0x00000000);
+	nv_icmd(dev, 0x000367, 0x00000000);
+	nv_icmd(dev, 0x000368, 0x00000fff);
+	nv_icmd(dev, 0x000370, 0x00000000);
+	nv_icmd(dev, 0x000371, 0x00000000);
+	nv_icmd(dev, 0x000372, 0x000fffff);
+	nv_icmd(dev, 0x00037a, 0x00000012);
+	nv_icmd(dev, 0x000619, 0x00000003);
+	nv_icmd(dev, 0x000811, 0x00000003);
+	nv_icmd(dev, 0x000812, 0x00000004);
+	nv_icmd(dev, 0x000813, 0x00000006);
+	nv_icmd(dev, 0x000814, 0x00000008);
+	nv_icmd(dev, 0x000815, 0x0000000b);
+	nv_icmd(dev, 0x000800, 0x00000001);
+	nv_icmd(dev, 0x000801, 0x00000001);
+	nv_icmd(dev, 0x000802, 0x00000001);
+	nv_icmd(dev, 0x000803, 0x00000001);
+	nv_icmd(dev, 0x000804, 0x00000001);
+	nv_icmd(dev, 0x000805, 0x00000001);
+	nv_icmd(dev, 0x000632, 0x00000001);
+	nv_icmd(dev, 0x000633, 0x00000002);
+	nv_icmd(dev, 0x000634, 0x00000003);
+	nv_icmd(dev, 0x000635, 0x00000004);
+	nv_icmd(dev, 0x000654, 0x3f800000);
+	nv_icmd(dev, 0x000657, 0x3f800000);
+	nv_icmd(dev, 0x000655, 0x3f800000);
+	nv_icmd(dev, 0x000656, 0x3f800000);
+	nv_icmd(dev, 0x0006cd, 0x3f800000);
+	nv_icmd(dev, 0x0007f5, 0x3f800000);
+	nv_icmd(dev, 0x0007dc, 0x39291909);
+	nv_icmd(dev, 0x0007dd, 0x79695949);
+	nv_icmd(dev, 0x0007de, 0xb9a99989);
+	nv_icmd(dev, 0x0007df, 0xf9e9d9c9);
+	nv_icmd(dev, 0x0007e8, 0x00003210);
+	nv_icmd(dev, 0x0007e9, 0x00007654);
+	nv_icmd(dev, 0x0007ea, 0x00000098);
+	nv_icmd(dev, 0x0007ec, 0x39291909);
+	nv_icmd(dev, 0x0007ed, 0x79695949);
+	nv_icmd(dev, 0x0007ee, 0xb9a99989);
+	nv_icmd(dev, 0x0007ef, 0xf9e9d9c9);
+	nv_icmd(dev, 0x0007f0, 0x00003210);
+	nv_icmd(dev, 0x0007f1, 0x00007654);
+	nv_icmd(dev, 0x0007f2, 0x00000098);
+	nv_icmd(dev, 0x0005a5, 0x00000001);
+	nv_icmd(dev, 0x000980, 0x00000000);
+	nv_icmd(dev, 0x000981, 0x00000000);
+	nv_icmd(dev, 0x000982, 0x00000000);
+	nv_icmd(dev, 0x000983, 0x00000000);
+	nv_icmd(dev, 0x000984, 0x00000000);
+	nv_icmd(dev, 0x000985, 0x00000000);
+	nv_icmd(dev, 0x000986, 0x00000000);
+	nv_icmd(dev, 0x000987, 0x00000000);
+	nv_icmd(dev, 0x000988, 0x00000000);
+	nv_icmd(dev, 0x000989, 0x00000000);
+	nv_icmd(dev, 0x00098a, 0x00000000);
+	nv_icmd(dev, 0x00098b, 0x00000000);
+	nv_icmd(dev, 0x00098c, 0x00000000);
+	nv_icmd(dev, 0x00098d, 0x00000000);
+	nv_icmd(dev, 0x00098e, 0x00000000);
+	nv_icmd(dev, 0x00098f, 0x00000000);
+	nv_icmd(dev, 0x000990, 0x00000000);
+	nv_icmd(dev, 0x000991, 0x00000000);
+	nv_icmd(dev, 0x000992, 0x00000000);
+	nv_icmd(dev, 0x000993, 0x00000000);
+	nv_icmd(dev, 0x000994, 0x00000000);
+	nv_icmd(dev, 0x000995, 0x00000000);
+	nv_icmd(dev, 0x000996, 0x00000000);
+	nv_icmd(dev, 0x000997, 0x00000000);
+	nv_icmd(dev, 0x000998, 0x00000000);
+	nv_icmd(dev, 0x000999, 0x00000000);
+	nv_icmd(dev, 0x00099a, 0x00000000);
+	nv_icmd(dev, 0x00099b, 0x00000000);
+	nv_icmd(dev, 0x00099c, 0x00000000);
+	nv_icmd(dev, 0x00099d, 0x00000000);
+	nv_icmd(dev, 0x00099e, 0x00000000);
+	nv_icmd(dev, 0x00099f, 0x00000000);
+	nv_icmd(dev, 0x0009a0, 0x00000000);
+	nv_icmd(dev, 0x0009a1, 0x00000000);
+	nv_icmd(dev, 0x0009a2, 0x00000000);
+	nv_icmd(dev, 0x0009a3, 0x00000000);
+	nv_icmd(dev, 0x0009a4, 0x00000000);
+	nv_icmd(dev, 0x0009a5, 0x00000000);
+	nv_icmd(dev, 0x0009a6, 0x00000000);
+	nv_icmd(dev, 0x0009a7, 0x00000000);
+	nv_icmd(dev, 0x0009a8, 0x00000000);
+	nv_icmd(dev, 0x0009a9, 0x00000000);
+	nv_icmd(dev, 0x0009aa, 0x00000000);
+	nv_icmd(dev, 0x0009ab, 0x00000000);
+	nv_icmd(dev, 0x0009ac, 0x00000000);
+	nv_icmd(dev, 0x0009ad, 0x00000000);
+	nv_icmd(dev, 0x0009ae, 0x00000000);
+	nv_icmd(dev, 0x0009af, 0x00000000);
+	nv_icmd(dev, 0x0009b0, 0x00000000);
+	nv_icmd(dev, 0x0009b1, 0x00000000);
+	nv_icmd(dev, 0x0009b2, 0x00000000);
+	nv_icmd(dev, 0x0009b3, 0x00000000);
+	nv_icmd(dev, 0x0009b4, 0x00000000);
+	nv_icmd(dev, 0x0009b5, 0x00000000);
+	nv_icmd(dev, 0x0009b6, 0x00000000);
+	nv_icmd(dev, 0x0009b7, 0x00000000);
+	nv_icmd(dev, 0x0009b8, 0x00000000);
+	nv_icmd(dev, 0x0009b9, 0x00000000);
+	nv_icmd(dev, 0x0009ba, 0x00000000);
+	nv_icmd(dev, 0x0009bb, 0x00000000);
+	nv_icmd(dev, 0x0009bc, 0x00000000);
+	nv_icmd(dev, 0x0009bd, 0x00000000);
+	nv_icmd(dev, 0x0009be, 0x00000000);
+	nv_icmd(dev, 0x0009bf, 0x00000000);
+	nv_icmd(dev, 0x0009c0, 0x00000000);
+	nv_icmd(dev, 0x0009c1, 0x00000000);
+	nv_icmd(dev, 0x0009c2, 0x00000000);
+	nv_icmd(dev, 0x0009c3, 0x00000000);
+	nv_icmd(dev, 0x0009c4, 0x00000000);
+	nv_icmd(dev, 0x0009c5, 0x00000000);
+	nv_icmd(dev, 0x0009c6, 0x00000000);
+	nv_icmd(dev, 0x0009c7, 0x00000000);
+	nv_icmd(dev, 0x0009c8, 0x00000000);
+	nv_icmd(dev, 0x0009c9, 0x00000000);
+	nv_icmd(dev, 0x0009ca, 0x00000000);
+	nv_icmd(dev, 0x0009cb, 0x00000000);
+	nv_icmd(dev, 0x0009cc, 0x00000000);
+	nv_icmd(dev, 0x0009cd, 0x00000000);
+	nv_icmd(dev, 0x0009ce, 0x00000000);
+	nv_icmd(dev, 0x0009cf, 0x00000000);
+	nv_icmd(dev, 0x0009d0, 0x00000000);
+	nv_icmd(dev, 0x0009d1, 0x00000000);
+	nv_icmd(dev, 0x0009d2, 0x00000000);
+	nv_icmd(dev, 0x0009d3, 0x00000000);
+	nv_icmd(dev, 0x0009d4, 0x00000000);
+	nv_icmd(dev, 0x0009d5, 0x00000000);
+	nv_icmd(dev, 0x0009d6, 0x00000000);
+	nv_icmd(dev, 0x0009d7, 0x00000000);
+	nv_icmd(dev, 0x0009d8, 0x00000000);
+	nv_icmd(dev, 0x0009d9, 0x00000000);
+	nv_icmd(dev, 0x0009da, 0x00000000);
+	nv_icmd(dev, 0x0009db, 0x00000000);
+	nv_icmd(dev, 0x0009dc, 0x00000000);
+	nv_icmd(dev, 0x0009dd, 0x00000000);
+	nv_icmd(dev, 0x0009de, 0x00000000);
+	nv_icmd(dev, 0x0009df, 0x00000000);
+	nv_icmd(dev, 0x0009e0, 0x00000000);
+	nv_icmd(dev, 0x0009e1, 0x00000000);
+	nv_icmd(dev, 0x0009e2, 0x00000000);
+	nv_icmd(dev, 0x0009e3, 0x00000000);
+	nv_icmd(dev, 0x0009e4, 0x00000000);
+	nv_icmd(dev, 0x0009e5, 0x00000000);
+	nv_icmd(dev, 0x0009e6, 0x00000000);
+	nv_icmd(dev, 0x0009e7, 0x00000000);
+	nv_icmd(dev, 0x0009e8, 0x00000000);
+	nv_icmd(dev, 0x0009e9, 0x00000000);
+	nv_icmd(dev, 0x0009ea, 0x00000000);
+	nv_icmd(dev, 0x0009eb, 0x00000000);
+	nv_icmd(dev, 0x0009ec, 0x00000000);
+	nv_icmd(dev, 0x0009ed, 0x00000000);
+	nv_icmd(dev, 0x0009ee, 0x00000000);
+	nv_icmd(dev, 0x0009ef, 0x00000000);
+	nv_icmd(dev, 0x0009f0, 0x00000000);
+	nv_icmd(dev, 0x0009f1, 0x00000000);
+	nv_icmd(dev, 0x0009f2, 0x00000000);
+	nv_icmd(dev, 0x0009f3, 0x00000000);
+	nv_icmd(dev, 0x0009f4, 0x00000000);
+	nv_icmd(dev, 0x0009f5, 0x00000000);
+	nv_icmd(dev, 0x0009f6, 0x00000000);
+	nv_icmd(dev, 0x0009f7, 0x00000000);
+	nv_icmd(dev, 0x0009f8, 0x00000000);
+	nv_icmd(dev, 0x0009f9, 0x00000000);
+	nv_icmd(dev, 0x0009fa, 0x00000000);
+	nv_icmd(dev, 0x0009fb, 0x00000000);
+	nv_icmd(dev, 0x0009fc, 0x00000000);
+	nv_icmd(dev, 0x0009fd, 0x00000000);
+	nv_icmd(dev, 0x0009fe, 0x00000000);
+	nv_icmd(dev, 0x0009ff, 0x00000000);
+	nv_icmd(dev, 0x000468, 0x00000004);
+	nv_icmd(dev, 0x00046c, 0x00000001);
+	nv_icmd(dev, 0x000470, 0x00000000);
+	nv_icmd(dev, 0x000471, 0x00000000);
+	nv_icmd(dev, 0x000472, 0x00000000);
+	nv_icmd(dev, 0x000473, 0x00000000);
+	nv_icmd(dev, 0x000474, 0x00000000);
+	nv_icmd(dev, 0x000475, 0x00000000);
+	nv_icmd(dev, 0x000476, 0x00000000);
+	nv_icmd(dev, 0x000477, 0x00000000);
+	nv_icmd(dev, 0x000478, 0x00000000);
+	nv_icmd(dev, 0x000479, 0x00000000);
+	nv_icmd(dev, 0x00047a, 0x00000000);
+	nv_icmd(dev, 0x00047b, 0x00000000);
+	nv_icmd(dev, 0x00047c, 0x00000000);
+	nv_icmd(dev, 0x00047d, 0x00000000);
+	nv_icmd(dev, 0x00047e, 0x00000000);
+	nv_icmd(dev, 0x00047f, 0x00000000);
+	nv_icmd(dev, 0x000480, 0x00000000);
+	nv_icmd(dev, 0x000481, 0x00000000);
+	nv_icmd(dev, 0x000482, 0x00000000);
+	nv_icmd(dev, 0x000483, 0x00000000);
+	nv_icmd(dev, 0x000484, 0x00000000);
+	nv_icmd(dev, 0x000485, 0x00000000);
+	nv_icmd(dev, 0x000486, 0x00000000);
+	nv_icmd(dev, 0x000487, 0x00000000);
+	nv_icmd(dev, 0x000488, 0x00000000);
+	nv_icmd(dev, 0x000489, 0x00000000);
+	nv_icmd(dev, 0x00048a, 0x00000000);
+	nv_icmd(dev, 0x00048b, 0x00000000);
+	nv_icmd(dev, 0x00048c, 0x00000000);
+	nv_icmd(dev, 0x00048d, 0x00000000);
+	nv_icmd(dev, 0x00048e, 0x00000000);
+	nv_icmd(dev, 0x00048f, 0x00000000);
+	nv_icmd(dev, 0x000490, 0x00000000);
+	nv_icmd(dev, 0x000491, 0x00000000);
+	nv_icmd(dev, 0x000492, 0x00000000);
+	nv_icmd(dev, 0x000493, 0x00000000);
+	nv_icmd(dev, 0x000494, 0x00000000);
+	nv_icmd(dev, 0x000495, 0x00000000);
+	nv_icmd(dev, 0x000496, 0x00000000);
+	nv_icmd(dev, 0x000497, 0x00000000);
+	nv_icmd(dev, 0x000498, 0x00000000);
+	nv_icmd(dev, 0x000499, 0x00000000);
+	nv_icmd(dev, 0x00049a, 0x00000000);
+	nv_icmd(dev, 0x00049b, 0x00000000);
+	nv_icmd(dev, 0x00049c, 0x00000000);
+	nv_icmd(dev, 0x00049d, 0x00000000);
+	nv_icmd(dev, 0x00049e, 0x00000000);
+	nv_icmd(dev, 0x00049f, 0x00000000);
+	nv_icmd(dev, 0x0004a0, 0x00000000);
+	nv_icmd(dev, 0x0004a1, 0x00000000);
+	nv_icmd(dev, 0x0004a2, 0x00000000);
+	nv_icmd(dev, 0x0004a3, 0x00000000);
+	nv_icmd(dev, 0x0004a4, 0x00000000);
+	nv_icmd(dev, 0x0004a5, 0x00000000);
+	nv_icmd(dev, 0x0004a6, 0x00000000);
+	nv_icmd(dev, 0x0004a7, 0x00000000);
+	nv_icmd(dev, 0x0004a8, 0x00000000);
+	nv_icmd(dev, 0x0004a9, 0x00000000);
+	nv_icmd(dev, 0x0004aa, 0x00000000);
+	nv_icmd(dev, 0x0004ab, 0x00000000);
+	nv_icmd(dev, 0x0004ac, 0x00000000);
+	nv_icmd(dev, 0x0004ad, 0x00000000);
+	nv_icmd(dev, 0x0004ae, 0x00000000);
+	nv_icmd(dev, 0x0004af, 0x00000000);
+	nv_icmd(dev, 0x0004b0, 0x00000000);
+	nv_icmd(dev, 0x0004b1, 0x00000000);
+	nv_icmd(dev, 0x0004b2, 0x00000000);
+	nv_icmd(dev, 0x0004b3, 0x00000000);
+	nv_icmd(dev, 0x0004b4, 0x00000000);
+	nv_icmd(dev, 0x0004b5, 0x00000000);
+	nv_icmd(dev, 0x0004b6, 0x00000000);
+	nv_icmd(dev, 0x0004b7, 0x00000000);
+	nv_icmd(dev, 0x0004b8, 0x00000000);
+	nv_icmd(dev, 0x0004b9, 0x00000000);
+	nv_icmd(dev, 0x0004ba, 0x00000000);
+	nv_icmd(dev, 0x0004bb, 0x00000000);
+	nv_icmd(dev, 0x0004bc, 0x00000000);
+	nv_icmd(dev, 0x0004bd, 0x00000000);
+	nv_icmd(dev, 0x0004be, 0x00000000);
+	nv_icmd(dev, 0x0004bf, 0x00000000);
+	nv_icmd(dev, 0x0004c0, 0x00000000);
+	nv_icmd(dev, 0x0004c1, 0x00000000);
+	nv_icmd(dev, 0x0004c2, 0x00000000);
+	nv_icmd(dev, 0x0004c3, 0x00000000);
+	nv_icmd(dev, 0x0004c4, 0x00000000);
+	nv_icmd(dev, 0x0004c5, 0x00000000);
+	nv_icmd(dev, 0x0004c6, 0x00000000);
+	nv_icmd(dev, 0x0004c7, 0x00000000);
+	nv_icmd(dev, 0x0004c8, 0x00000000);
+	nv_icmd(dev, 0x0004c9, 0x00000000);
+	nv_icmd(dev, 0x0004ca, 0x00000000);
+	nv_icmd(dev, 0x0004cb, 0x00000000);
+	nv_icmd(dev, 0x0004cc, 0x00000000);
+	nv_icmd(dev, 0x0004cd, 0x00000000);
+	nv_icmd(dev, 0x0004ce, 0x00000000);
+	nv_icmd(dev, 0x0004cf, 0x00000000);
+	nv_icmd(dev, 0x000510, 0x3f800000);
+	nv_icmd(dev, 0x000511, 0x3f800000);
+	nv_icmd(dev, 0x000512, 0x3f800000);
+	nv_icmd(dev, 0x000513, 0x3f800000);
+	nv_icmd(dev, 0x000514, 0x3f800000);
+	nv_icmd(dev, 0x000515, 0x3f800000);
+	nv_icmd(dev, 0x000516, 0x3f800000);
+	nv_icmd(dev, 0x000517, 0x3f800000);
+	nv_icmd(dev, 0x000518, 0x3f800000);
+	nv_icmd(dev, 0x000519, 0x3f800000);
+	nv_icmd(dev, 0x00051a, 0x3f800000);
+	nv_icmd(dev, 0x00051b, 0x3f800000);
+	nv_icmd(dev, 0x00051c, 0x3f800000);
+	nv_icmd(dev, 0x00051d, 0x3f800000);
+	nv_icmd(dev, 0x00051e, 0x3f800000);
+	nv_icmd(dev, 0x00051f, 0x3f800000);
+	nv_icmd(dev, 0x000520, 0x000002b6);
+	nv_icmd(dev, 0x000529, 0x00000001);
+	nv_icmd(dev, 0x000530, 0xffff0000);
+	nv_icmd(dev, 0x000531, 0xffff0000);
+	nv_icmd(dev, 0x000532, 0xffff0000);
+	nv_icmd(dev, 0x000533, 0xffff0000);
+	nv_icmd(dev, 0x000534, 0xffff0000);
+	nv_icmd(dev, 0x000535, 0xffff0000);
+	nv_icmd(dev, 0x000536, 0xffff0000);
+	nv_icmd(dev, 0x000537, 0xffff0000);
+	nv_icmd(dev, 0x000538, 0xffff0000);
+	nv_icmd(dev, 0x000539, 0xffff0000);
+	nv_icmd(dev, 0x00053a, 0xffff0000);
+	nv_icmd(dev, 0x00053b, 0xffff0000);
+	nv_icmd(dev, 0x00053c, 0xffff0000);
+	nv_icmd(dev, 0x00053d, 0xffff0000);
+	nv_icmd(dev, 0x00053e, 0xffff0000);
+	nv_icmd(dev, 0x00053f, 0xffff0000);
+	nv_icmd(dev, 0x000585, 0x0000003f);
+	nv_icmd(dev, 0x000576, 0x00000003);
+	nv_icmd(dev, 0x00057b, 0x00000059);
+	nv_icmd(dev, 0x000586, 0x00000040);
+	nv_icmd(dev, 0x000582, 0x00000080);
+	nv_icmd(dev, 0x000583, 0x00000080);
+	nv_icmd(dev, 0x0005c2, 0x00000001);
+	nv_icmd(dev, 0x000638, 0x00000001);
+	nv_icmd(dev, 0x000639, 0x00000001);
+	nv_icmd(dev, 0x00063a, 0x00000002);
+	nv_icmd(dev, 0x00063b, 0x00000001);
+	nv_icmd(dev, 0x00063c, 0x00000001);
+	nv_icmd(dev, 0x00063d, 0x00000002);
+	nv_icmd(dev, 0x00063e, 0x00000001);
+	nv_icmd(dev, 0x0008b8, 0x00000001);
+	nv_icmd(dev, 0x0008b9, 0x00000001);
+	nv_icmd(dev, 0x0008ba, 0x00000001);
+	nv_icmd(dev, 0x0008bb, 0x00000001);
+	nv_icmd(dev, 0x0008bc, 0x00000001);
+	nv_icmd(dev, 0x0008bd, 0x00000001);
+	nv_icmd(dev, 0x0008be, 0x00000001);
+	nv_icmd(dev, 0x0008bf, 0x00000001);
+	nv_icmd(dev, 0x000900, 0x00000001);
+	nv_icmd(dev, 0x000901, 0x00000001);
+	nv_icmd(dev, 0x000902, 0x00000001);
+	nv_icmd(dev, 0x000903, 0x00000001);
+	nv_icmd(dev, 0x000904, 0x00000001);
+	nv_icmd(dev, 0x000905, 0x00000001);
+	nv_icmd(dev, 0x000906, 0x00000001);
+	nv_icmd(dev, 0x000907, 0x00000001);
+	nv_icmd(dev, 0x000908, 0x00000002);
+	nv_icmd(dev, 0x000909, 0x00000002);
+	nv_icmd(dev, 0x00090a, 0x00000002);
+	nv_icmd(dev, 0x00090b, 0x00000002);
+	nv_icmd(dev, 0x00090c, 0x00000002);
+	nv_icmd(dev, 0x00090d, 0x00000002);
+	nv_icmd(dev, 0x00090e, 0x00000002);
+	nv_icmd(dev, 0x00090f, 0x00000002);
+	nv_icmd(dev, 0x000910, 0x00000001);
+	nv_icmd(dev, 0x000911, 0x00000001);
+	nv_icmd(dev, 0x000912, 0x00000001);
+	nv_icmd(dev, 0x000913, 0x00000001);
+	nv_icmd(dev, 0x000914, 0x00000001);
+	nv_icmd(dev, 0x000915, 0x00000001);
+	nv_icmd(dev, 0x000916, 0x00000001);
+	nv_icmd(dev, 0x000917, 0x00000001);
+	nv_icmd(dev, 0x000918, 0x00000001);
+	nv_icmd(dev, 0x000919, 0x00000001);
+	nv_icmd(dev, 0x00091a, 0x00000001);
+	nv_icmd(dev, 0x00091b, 0x00000001);
+	nv_icmd(dev, 0x00091c, 0x00000001);
+	nv_icmd(dev, 0x00091d, 0x00000001);
+	nv_icmd(dev, 0x00091e, 0x00000001);
+	nv_icmd(dev, 0x00091f, 0x00000001);
+	nv_icmd(dev, 0x000920, 0x00000002);
+	nv_icmd(dev, 0x000921, 0x00000002);
+	nv_icmd(dev, 0x000922, 0x00000002);
+	nv_icmd(dev, 0x000923, 0x00000002);
+	nv_icmd(dev, 0x000924, 0x00000002);
+	nv_icmd(dev, 0x000925, 0x00000002);
+	nv_icmd(dev, 0x000926, 0x00000002);
+	nv_icmd(dev, 0x000927, 0x00000002);
+	nv_icmd(dev, 0x000928, 0x00000001);
+	nv_icmd(dev, 0x000929, 0x00000001);
+	nv_icmd(dev, 0x00092a, 0x00000001);
+	nv_icmd(dev, 0x00092b, 0x00000001);
+	nv_icmd(dev, 0x00092c, 0x00000001);
+	nv_icmd(dev, 0x00092d, 0x00000001);
+	nv_icmd(dev, 0x00092e, 0x00000001);
+	nv_icmd(dev, 0x00092f, 0x00000001);
+	nv_icmd(dev, 0x000648, 0x00000001);
+	nv_icmd(dev, 0x000649, 0x00000001);
+	nv_icmd(dev, 0x00064a, 0x00000001);
+	nv_icmd(dev, 0x00064b, 0x00000001);
+	nv_icmd(dev, 0x00064c, 0x00000001);
+	nv_icmd(dev, 0x00064d, 0x00000001);
+	nv_icmd(dev, 0x00064e, 0x00000001);
+	nv_icmd(dev, 0x00064f, 0x00000001);
+	nv_icmd(dev, 0x000650, 0x00000001);
+	nv_icmd(dev, 0x000658, 0x0000000f);
+	nv_icmd(dev, 0x0007ff, 0x0000000a);
+	nv_icmd(dev, 0x00066a, 0x40000000);
+	nv_icmd(dev, 0x00066b, 0x10000000);
+	nv_icmd(dev, 0x00066c, 0xffff0000);
+	nv_icmd(dev, 0x00066d, 0xffff0000);
+	nv_icmd(dev, 0x0007af, 0x00000008);
+	nv_icmd(dev, 0x0007b0, 0x00000008);
+	nv_icmd(dev, 0x0007f6, 0x00000001);
+	nv_icmd(dev, 0x0006b2, 0x00000055);
+	nv_icmd(dev, 0x0007ad, 0x00000003);
+	nv_icmd(dev, 0x000937, 0x00000001);
+	nv_icmd(dev, 0x000971, 0x00000008);
+	nv_icmd(dev, 0x000972, 0x00000040);
+	nv_icmd(dev, 0x000973, 0x0000012c);
+	nv_icmd(dev, 0x00097c, 0x00000040);
+	nv_icmd(dev, 0x000979, 0x00000003);
+	nv_icmd(dev, 0x000975, 0x00000020);
+	nv_icmd(dev, 0x000976, 0x00000001);
+	nv_icmd(dev, 0x000977, 0x00000020);
+	nv_icmd(dev, 0x000978, 0x00000001);
+	nv_icmd(dev, 0x000957, 0x00000003);
+	nv_icmd(dev, 0x00095e, 0x20164010);
+	nv_icmd(dev, 0x00095f, 0x00000020);
+	nv_icmd(dev, 0x00097d, 0x00000020);
+	nv_icmd(dev, 0x000683, 0x00000006);
+	nv_icmd(dev, 0x000685, 0x003fffff);
+	nv_icmd(dev, 0x000687, 0x003fffff);
+	nv_icmd(dev, 0x0006a0, 0x00000005);
+	nv_icmd(dev, 0x000840, 0x00400008);
+	nv_icmd(dev, 0x000841, 0x08000080);
+	nv_icmd(dev, 0x000842, 0x00400008);
+	nv_icmd(dev, 0x000843, 0x08000080);
+	nv_icmd(dev, 0x000818, 0x00000000);
+	nv_icmd(dev, 0x000819, 0x00000000);
+	nv_icmd(dev, 0x00081a, 0x00000000);
+	nv_icmd(dev, 0x00081b, 0x00000000);
+	nv_icmd(dev, 0x00081c, 0x00000000);
+	nv_icmd(dev, 0x00081d, 0x00000000);
+	nv_icmd(dev, 0x00081e, 0x00000000);
+	nv_icmd(dev, 0x00081f, 0x00000000);
+	nv_icmd(dev, 0x000848, 0x00000000);
+	nv_icmd(dev, 0x000849, 0x00000000);
+	nv_icmd(dev, 0x00084a, 0x00000000);
+	nv_icmd(dev, 0x00084b, 0x00000000);
+	nv_icmd(dev, 0x00084c, 0x00000000);
+	nv_icmd(dev, 0x00084d, 0x00000000);
+	nv_icmd(dev, 0x00084e, 0x00000000);
+	nv_icmd(dev, 0x00084f, 0x00000000);
+	nv_icmd(dev, 0x000850, 0x00000000);
+	nv_icmd(dev, 0x000851, 0x00000000);
+	nv_icmd(dev, 0x000852, 0x00000000);
+	nv_icmd(dev, 0x000853, 0x00000000);
+	nv_icmd(dev, 0x000854, 0x00000000);
+	nv_icmd(dev, 0x000855, 0x00000000);
+	nv_icmd(dev, 0x000856, 0x00000000);
+	nv_icmd(dev, 0x000857, 0x00000000);
+	nv_icmd(dev, 0x000738, 0x00000000);
+	nv_icmd(dev, 0x0006aa, 0x00000001);
+	nv_icmd(dev, 0x0006ab, 0x00000002);
+	nv_icmd(dev, 0x0006ac, 0x00000080);
+	nv_icmd(dev, 0x0006ad, 0x00000100);
+	nv_icmd(dev, 0x0006ae, 0x00000100);
+	nv_icmd(dev, 0x0006b1, 0x00000011);
+	nv_icmd(dev, 0x0006bb, 0x000000cf);
+	nv_icmd(dev, 0x0006ce, 0x2a712488);
+	nv_icmd(dev, 0x000739, 0x4085c000);
+	nv_icmd(dev, 0x00073a, 0x00000080);
+	nv_icmd(dev, 0x000786, 0x80000100);
+	nv_icmd(dev, 0x00073c, 0x00010100);
+	nv_icmd(dev, 0x00073d, 0x02800000);
+	nv_icmd(dev, 0x000787, 0x000000cf);
+	nv_icmd(dev, 0x00078c, 0x00000008);
+	nv_icmd(dev, 0x000792, 0x00000001);
+	nv_icmd(dev, 0x000794, 0x00000001);
+	nv_icmd(dev, 0x000795, 0x00000001);
+	nv_icmd(dev, 0x000796, 0x00000001);
+	nv_icmd(dev, 0x000797, 0x000000cf);
+	nv_icmd(dev, 0x000836, 0x00000001);
+	nv_icmd(dev, 0x00079a, 0x00000002);
+	nv_icmd(dev, 0x000833, 0x04444480);
+	nv_icmd(dev, 0x0007a1, 0x00000001);
+	nv_icmd(dev, 0x0007a3, 0x00000001);
+	nv_icmd(dev, 0x0007a4, 0x00000001);
+	nv_icmd(dev, 0x0007a5, 0x00000001);
+	nv_icmd(dev, 0x000831, 0x00000004);
+	nv_icmd(dev, 0x000b07, 0x00000002);
+	nv_icmd(dev, 0x000b08, 0x00000100);
+	nv_icmd(dev, 0x000b09, 0x00000100);
+	nv_icmd(dev, 0x000b0a, 0x00000001);
+	nv_icmd(dev, 0x000a04, 0x000000ff);
+	nv_icmd(dev, 0x000a0b, 0x00000040);
+	nv_icmd(dev, 0x00097f, 0x00000100);
+	nv_icmd(dev, 0x000a02, 0x00000001);
+	nv_icmd(dev, 0x000809, 0x00000007);
+	nv_icmd(dev, 0x00c221, 0x00000040);
+	nv_icmd(dev, 0x00c1b0, 0x0000000f);
+	nv_icmd(dev, 0x00c1b1, 0x0000000f);
+	nv_icmd(dev, 0x00c1b2, 0x0000000f);
+	nv_icmd(dev, 0x00c1b3, 0x0000000f);
+	nv_icmd(dev, 0x00c1b4, 0x0000000f);
+	nv_icmd(dev, 0x00c1b5, 0x0000000f);
+	nv_icmd(dev, 0x00c1b6, 0x0000000f);
+	nv_icmd(dev, 0x00c1b7, 0x0000000f);
+	nv_icmd(dev, 0x00c1b8, 0x0fac6881);
+	nv_icmd(dev, 0x00c1b9, 0x00fac688);
+	nv_icmd(dev, 0x00c401, 0x00000001);
+	nv_icmd(dev, 0x00c402, 0x00010001);
+	nv_icmd(dev, 0x00c403, 0x00000001);
+	nv_icmd(dev, 0x00c404, 0x00000001);
+	nv_icmd(dev, 0x00c40e, 0x00000020);
+	nv_icmd(dev, 0x00c500, 0x00000003);
+	nv_icmd(dev, 0x01e100, 0x00000001);
+	nv_icmd(dev, 0x001000, 0x00000002);
+	nv_icmd(dev, 0x0006aa, 0x00000001);
+	nv_icmd(dev, 0x0006ad, 0x00000100);
+	nv_icmd(dev, 0x0006ae, 0x00000100);
+	nv_icmd(dev, 0x0006b1, 0x00000011);
+	nv_icmd(dev, 0x00078c, 0x00000008);
+	nv_icmd(dev, 0x000792, 0x00000001);
+	nv_icmd(dev, 0x000794, 0x00000001);
+	nv_icmd(dev, 0x000795, 0x00000001);
+	nv_icmd(dev, 0x000796, 0x00000001);
+	nv_icmd(dev, 0x000797, 0x000000cf);
+	nv_icmd(dev, 0x00079a, 0x00000002);
+	nv_icmd(dev, 0x000833, 0x04444480);
+	nv_icmd(dev, 0x0007a1, 0x00000001);
+	nv_icmd(dev, 0x0007a3, 0x00000001);
+	nv_icmd(dev, 0x0007a4, 0x00000001);
+	nv_icmd(dev, 0x0007a5, 0x00000001);
+	nv_icmd(dev, 0x000831, 0x00000004);
+	nv_icmd(dev, 0x01e100, 0x00000001);
+	nv_icmd(dev, 0x001000, 0x00000008);
+	nv_icmd(dev, 0x000039, 0x00000000);
+	nv_icmd(dev, 0x00003a, 0x00000000);
+	nv_icmd(dev, 0x00003b, 0x00000000);
+	nv_icmd(dev, 0x000380, 0x00000001);
+	nv_icmd(dev, 0x000366, 0x00000000);
+	nv_icmd(dev, 0x000367, 0x00000000);
+	nv_icmd(dev, 0x000368, 0x00000fff);
+	nv_icmd(dev, 0x000370, 0x00000000);
+	nv_icmd(dev, 0x000371, 0x00000000);
+	nv_icmd(dev, 0x000372, 0x000fffff);
+	nv_icmd(dev, 0x000813, 0x00000006);
+	nv_icmd(dev, 0x000814, 0x00000008);
+	nv_icmd(dev, 0x000957, 0x00000003);
+	nv_icmd(dev, 0x000818, 0x00000000);
+	nv_icmd(dev, 0x000819, 0x00000000);
+	nv_icmd(dev, 0x00081a, 0x00000000);
+	nv_icmd(dev, 0x00081b, 0x00000000);
+	nv_icmd(dev, 0x00081c, 0x00000000);
+	nv_icmd(dev, 0x00081d, 0x00000000);
+	nv_icmd(dev, 0x00081e, 0x00000000);
+	nv_icmd(dev, 0x00081f, 0x00000000);
+	nv_icmd(dev, 0x000848, 0x00000000);
+	nv_icmd(dev, 0x000849, 0x00000000);
+	nv_icmd(dev, 0x00084a, 0x00000000);
+	nv_icmd(dev, 0x00084b, 0x00000000);
+	nv_icmd(dev, 0x00084c, 0x00000000);
+	nv_icmd(dev, 0x00084d, 0x00000000);
+	nv_icmd(dev, 0x00084e, 0x00000000);
+	nv_icmd(dev, 0x00084f, 0x00000000);
+	nv_icmd(dev, 0x000850, 0x00000000);
+	nv_icmd(dev, 0x000851, 0x00000000);
+	nv_icmd(dev, 0x000852, 0x00000000);
+	nv_icmd(dev, 0x000853, 0x00000000);
+	nv_icmd(dev, 0x000854, 0x00000000);
+	nv_icmd(dev, 0x000855, 0x00000000);
+	nv_icmd(dev, 0x000856, 0x00000000);
+	nv_icmd(dev, 0x000857, 0x00000000);
+	nv_icmd(dev, 0x000738, 0x00000000);
+	nv_icmd(dev, 0x000b07, 0x00000002);
+	nv_icmd(dev, 0x000b08, 0x00000100);
+	nv_icmd(dev, 0x000b09, 0x00000100);
+	nv_icmd(dev, 0x000b0a, 0x00000001);
+	nv_icmd(dev, 0x000a04, 0x000000ff);
+	nv_icmd(dev, 0x00097f, 0x00000100);
+	nv_icmd(dev, 0x000a02, 0x00000001);
+	nv_icmd(dev, 0x000809, 0x00000007);
+	nv_icmd(dev, 0x00c221, 0x00000040);
+	nv_icmd(dev, 0x00c401, 0x00000001);
+	nv_icmd(dev, 0x00c402, 0x00010001);
+	nv_icmd(dev, 0x00c403, 0x00000001);
+	nv_icmd(dev, 0x00c404, 0x00000001);
+	nv_icmd(dev, 0x00c40e, 0x00000020);
+	nv_icmd(dev, 0x00c500, 0x00000003);
+	nv_icmd(dev, 0x01e100, 0x00000001);
+	nv_icmd(dev, 0x001000, 0x00000001);
+	nv_icmd(dev, 0x000b07, 0x00000002);
+	nv_icmd(dev, 0x000b08, 0x00000100);
+	nv_icmd(dev, 0x000b09, 0x00000100);
+	nv_icmd(dev, 0x000b0a, 0x00000001);
+	nv_icmd(dev, 0x01e100, 0x00000001);
+	nv_wr32(dev, 0x400208, 0x00000000);
+}
+
+static void
+nv_mthd(struct drm_device *dev, u32 class, u32 mthd, u32 data)
+{
+	nv_wr32(dev, 0x40448c, data);
+	nv_wr32(dev, 0x404488, 0x80000000 | (mthd << 14) | class);
+}
+
+static void
+nve0_grctx_generate_a097(struct drm_device *dev)
+{
+	nv_mthd(dev, 0xa097, 0x0800, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x0840, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x0880, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x08c0, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x0900, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x0940, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x0980, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x09c0, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x0804, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x0844, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x0884, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x08c4, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x0904, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x0944, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x0984, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x09c4, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x0808, 0x00000400);
+	nv_mthd(dev, 0xa097, 0x0848, 0x00000400);
+	nv_mthd(dev, 0xa097, 0x0888, 0x00000400);
+	nv_mthd(dev, 0xa097, 0x08c8, 0x00000400);
+	nv_mthd(dev, 0xa097, 0x0908, 0x00000400);
+	nv_mthd(dev, 0xa097, 0x0948, 0x00000400);
+	nv_mthd(dev, 0xa097, 0x0988, 0x00000400);
+	nv_mthd(dev, 0xa097, 0x09c8, 0x00000400);
+	nv_mthd(dev, 0xa097, 0x080c, 0x00000300);
+	nv_mthd(dev, 0xa097, 0x084c, 0x00000300);
+	nv_mthd(dev, 0xa097, 0x088c, 0x00000300);
+	nv_mthd(dev, 0xa097, 0x08cc, 0x00000300);
+	nv_mthd(dev, 0xa097, 0x090c, 0x00000300);
+	nv_mthd(dev, 0xa097, 0x094c, 0x00000300);
+	nv_mthd(dev, 0xa097, 0x098c, 0x00000300);
+	nv_mthd(dev, 0xa097, 0x09cc, 0x00000300);
+	nv_mthd(dev, 0xa097, 0x0810, 0x000000cf);
+	nv_mthd(dev, 0xa097, 0x0850, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x0890, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x08d0, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x0910, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x0950, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x0990, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x09d0, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x0814, 0x00000040);
+	nv_mthd(dev, 0xa097, 0x0854, 0x00000040);
+	nv_mthd(dev, 0xa097, 0x0894, 0x00000040);
+	nv_mthd(dev, 0xa097, 0x08d4, 0x00000040);
+	nv_mthd(dev, 0xa097, 0x0914, 0x00000040);
+	nv_mthd(dev, 0xa097, 0x0954, 0x00000040);
+	nv_mthd(dev, 0xa097, 0x0994, 0x00000040);
+	nv_mthd(dev, 0xa097, 0x09d4, 0x00000040);
+	nv_mthd(dev, 0xa097, 0x0818, 0x00000001);
+	nv_mthd(dev, 0xa097, 0x0858, 0x00000001);
+	nv_mthd(dev, 0xa097, 0x0898, 0x00000001);
+	nv_mthd(dev, 0xa097, 0x08d8, 0x00000001);
+	nv_mthd(dev, 0xa097, 0x0918, 0x00000001);
+	nv_mthd(dev, 0xa097, 0x0958, 0x00000001);
+	nv_mthd(dev, 0xa097, 0x0998, 0x00000001);
+	nv_mthd(dev, 0xa097, 0x09d8, 0x00000001);
+	nv_mthd(dev, 0xa097, 0x081c, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x085c, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x089c, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x08dc, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x091c, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x095c, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x099c, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x09dc, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x0820, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x0860, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x08a0, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x08e0, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x0920, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x0960, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x09a0, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x09e0, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x1c00, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x1c10, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x1c20, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x1c30, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x1c40, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x1c50, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x1c60, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x1c70, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x1c80, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x1c90, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x1ca0, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x1cb0, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x1cc0, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x1cd0, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x1ce0, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x1cf0, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x1c04, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x1c14, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x1c24, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x1c34, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x1c44, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x1c54, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x1c64, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x1c74, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x1c84, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x1c94, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x1ca4, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x1cb4, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x1cc4, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x1cd4, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x1ce4, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x1cf4, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x1c08, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x1c18, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x1c28, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x1c38, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x1c48, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x1c58, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x1c68, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x1c78, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x1c88, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x1c98, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x1ca8, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x1cb8, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x1cc8, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x1cd8, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x1ce8, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x1cf8, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x1c0c, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x1c1c, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x1c2c, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x1c3c, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x1c4c, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x1c5c, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x1c6c, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x1c7c, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x1c8c, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x1c9c, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x1cac, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x1cbc, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x1ccc, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x1cdc, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x1cec, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x1cfc, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x1d00, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x1d10, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x1d20, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x1d30, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x1d40, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x1d50, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x1d60, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x1d70, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x1d80, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x1d90, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x1da0, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x1db0, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x1dc0, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x1dd0, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x1de0, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x1df0, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x1d04, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x1d14, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x1d24, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x1d34, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x1d44, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x1d54, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x1d64, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x1d74, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x1d84, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x1d94, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x1da4, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x1db4, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x1dc4, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x1dd4, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x1de4, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x1df4, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x1d08, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x1d18, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x1d28, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x1d38, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x1d48, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x1d58, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x1d68, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x1d78, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x1d88, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x1d98, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x1da8, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x1db8, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x1dc8, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x1dd8, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x1de8, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x1df8, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x1d0c, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x1d1c, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x1d2c, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x1d3c, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x1d4c, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x1d5c, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x1d6c, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x1d7c, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x1d8c, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x1d9c, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x1dac, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x1dbc, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x1dcc, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x1ddc, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x1dec, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x1dfc, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x1f00, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x1f08, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x1f10, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x1f18, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x1f20, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x1f28, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x1f30, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x1f38, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x1f40, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x1f48, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x1f50, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x1f58, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x1f60, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x1f68, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x1f70, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x1f78, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x1f04, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x1f0c, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x1f14, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x1f1c, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x1f24, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x1f2c, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x1f34, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x1f3c, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x1f44, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x1f4c, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x1f54, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x1f5c, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x1f64, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x1f6c, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x1f74, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x1f7c, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x1f80, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x1f88, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x1f90, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x1f98, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x1fa0, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x1fa8, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x1fb0, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x1fb8, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x1fc0, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x1fc8, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x1fd0, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x1fd8, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x1fe0, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x1fe8, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x1ff0, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x1ff8, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x1f84, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x1f8c, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x1f94, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x1f9c, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x1fa4, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x1fac, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x1fb4, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x1fbc, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x1fc4, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x1fcc, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x1fd4, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x1fdc, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x1fe4, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x1fec, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x1ff4, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x1ffc, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x2000, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x2040, 0x00000011);
+	nv_mthd(dev, 0xa097, 0x2080, 0x00000020);
+	nv_mthd(dev, 0xa097, 0x20c0, 0x00000030);
+	nv_mthd(dev, 0xa097, 0x2100, 0x00000040);
+	nv_mthd(dev, 0xa097, 0x2140, 0x00000051);
+	nv_mthd(dev, 0xa097, 0x200c, 0x00000001);
+	nv_mthd(dev, 0xa097, 0x204c, 0x00000001);
+	nv_mthd(dev, 0xa097, 0x208c, 0x00000001);
+	nv_mthd(dev, 0xa097, 0x20cc, 0x00000001);
+	nv_mthd(dev, 0xa097, 0x210c, 0x00000001);
+	nv_mthd(dev, 0xa097, 0x214c, 0x00000001);
+	nv_mthd(dev, 0xa097, 0x2010, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x2050, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x2090, 0x00000001);
+	nv_mthd(dev, 0xa097, 0x20d0, 0x00000002);
+	nv_mthd(dev, 0xa097, 0x2110, 0x00000003);
+	nv_mthd(dev, 0xa097, 0x2150, 0x00000004);
+	nv_mthd(dev, 0xa097, 0x0380, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x03a0, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x03c0, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x03e0, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x0384, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x03a4, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x03c4, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x03e4, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x0388, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x03a8, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x03c8, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x03e8, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x038c, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x03ac, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x03cc, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x03ec, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x0700, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x0710, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x0720, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x0730, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x0704, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x0714, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x0724, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x0734, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x0708, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x0718, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x0728, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x0738, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x2800, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x2804, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x2808, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x280c, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x2810, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x2814, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x2818, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x281c, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x2820, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x2824, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x2828, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x282c, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x2830, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x2834, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x2838, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x283c, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x2840, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x2844, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x2848, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x284c, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x2850, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x2854, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x2858, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x285c, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x2860, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x2864, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x2868, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x286c, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x2870, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x2874, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x2878, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x287c, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x2880, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x2884, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x2888, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x288c, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x2890, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x2894, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x2898, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x289c, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x28a0, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x28a4, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x28a8, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x28ac, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x28b0, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x28b4, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x28b8, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x28bc, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x28c0, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x28c4, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x28c8, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x28cc, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x28d0, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x28d4, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x28d8, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x28dc, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x28e0, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x28e4, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x28e8, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x28ec, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x28f0, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x28f4, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x28f8, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x28fc, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x2900, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x2904, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x2908, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x290c, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x2910, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x2914, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x2918, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x291c, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x2920, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x2924, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x2928, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x292c, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x2930, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x2934, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x2938, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x293c, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x2940, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x2944, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x2948, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x294c, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x2950, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x2954, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x2958, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x295c, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x2960, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x2964, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x2968, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x296c, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x2970, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x2974, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x2978, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x297c, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x2980, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x2984, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x2988, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x298c, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x2990, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x2994, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x2998, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x299c, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x29a0, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x29a4, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x29a8, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x29ac, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x29b0, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x29b4, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x29b8, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x29bc, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x29c0, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x29c4, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x29c8, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x29cc, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x29d0, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x29d4, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x29d8, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x29dc, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x29e0, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x29e4, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x29e8, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x29ec, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x29f0, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x29f4, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x29f8, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x29fc, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x0a00, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x0a20, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x0a40, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x0a60, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x0a80, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x0aa0, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x0ac0, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x0ae0, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x0b00, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x0b20, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x0b40, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x0b60, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x0b80, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x0ba0, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x0bc0, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x0be0, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x0a04, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x0a24, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x0a44, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x0a64, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x0a84, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x0aa4, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x0ac4, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x0ae4, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x0b04, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x0b24, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x0b44, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x0b64, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x0b84, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x0ba4, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x0bc4, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x0be4, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x0a08, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x0a28, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x0a48, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x0a68, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x0a88, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x0aa8, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x0ac8, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x0ae8, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x0b08, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x0b28, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x0b48, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x0b68, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x0b88, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x0ba8, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x0bc8, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x0be8, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x0a0c, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x0a2c, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x0a4c, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x0a6c, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x0a8c, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x0aac, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x0acc, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x0aec, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x0b0c, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x0b2c, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x0b4c, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x0b6c, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x0b8c, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x0bac, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x0bcc, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x0bec, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x0a10, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x0a30, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x0a50, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x0a70, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x0a90, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x0ab0, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x0ad0, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x0af0, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x0b10, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x0b30, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x0b50, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x0b70, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x0b90, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x0bb0, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x0bd0, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x0bf0, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x0a14, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x0a34, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x0a54, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x0a74, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x0a94, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x0ab4, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x0ad4, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x0af4, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x0b14, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x0b34, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x0b54, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x0b74, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x0b94, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x0bb4, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x0bd4, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x0bf4, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x0c00, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x0c10, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x0c20, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x0c30, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x0c40, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x0c50, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x0c60, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x0c70, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x0c80, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x0c90, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x0ca0, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x0cb0, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x0cc0, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x0cd0, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x0ce0, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x0cf0, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x0c04, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x0c14, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x0c24, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x0c34, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x0c44, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x0c54, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x0c64, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x0c74, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x0c84, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x0c94, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x0ca4, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x0cb4, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x0cc4, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x0cd4, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x0ce4, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x0cf4, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x0c08, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x0c18, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x0c28, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x0c38, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x0c48, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x0c58, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x0c68, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x0c78, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x0c88, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x0c98, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x0ca8, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x0cb8, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x0cc8, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x0cd8, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x0ce8, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x0cf8, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x0c0c, 0x3f800000);
+	nv_mthd(dev, 0xa097, 0x0c1c, 0x3f800000);
+	nv_mthd(dev, 0xa097, 0x0c2c, 0x3f800000);
+	nv_mthd(dev, 0xa097, 0x0c3c, 0x3f800000);
+	nv_mthd(dev, 0xa097, 0x0c4c, 0x3f800000);
+	nv_mthd(dev, 0xa097, 0x0c5c, 0x3f800000);
+	nv_mthd(dev, 0xa097, 0x0c6c, 0x3f800000);
+	nv_mthd(dev, 0xa097, 0x0c7c, 0x3f800000);
+	nv_mthd(dev, 0xa097, 0x0c8c, 0x3f800000);
+	nv_mthd(dev, 0xa097, 0x0c9c, 0x3f800000);
+	nv_mthd(dev, 0xa097, 0x0cac, 0x3f800000);
+	nv_mthd(dev, 0xa097, 0x0cbc, 0x3f800000);
+	nv_mthd(dev, 0xa097, 0x0ccc, 0x3f800000);
+	nv_mthd(dev, 0xa097, 0x0cdc, 0x3f800000);
+	nv_mthd(dev, 0xa097, 0x0cec, 0x3f800000);
+	nv_mthd(dev, 0xa097, 0x0cfc, 0x3f800000);
+	nv_mthd(dev, 0xa097, 0x0d00, 0xffff0000);
+	nv_mthd(dev, 0xa097, 0x0d08, 0xffff0000);
+	nv_mthd(dev, 0xa097, 0x0d10, 0xffff0000);
+	nv_mthd(dev, 0xa097, 0x0d18, 0xffff0000);
+	nv_mthd(dev, 0xa097, 0x0d20, 0xffff0000);
+	nv_mthd(dev, 0xa097, 0x0d28, 0xffff0000);
+	nv_mthd(dev, 0xa097, 0x0d30, 0xffff0000);
+	nv_mthd(dev, 0xa097, 0x0d38, 0xffff0000);
+	nv_mthd(dev, 0xa097, 0x0d04, 0xffff0000);
+	nv_mthd(dev, 0xa097, 0x0d0c, 0xffff0000);
+	nv_mthd(dev, 0xa097, 0x0d14, 0xffff0000);
+	nv_mthd(dev, 0xa097, 0x0d1c, 0xffff0000);
+	nv_mthd(dev, 0xa097, 0x0d24, 0xffff0000);
+	nv_mthd(dev, 0xa097, 0x0d2c, 0xffff0000);
+	nv_mthd(dev, 0xa097, 0x0d34, 0xffff0000);
+	nv_mthd(dev, 0xa097, 0x0d3c, 0xffff0000);
+	nv_mthd(dev, 0xa097, 0x0e00, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x0e10, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x0e20, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x0e30, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x0e40, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x0e50, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x0e60, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x0e70, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x0e80, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x0e90, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x0ea0, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x0eb0, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x0ec0, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x0ed0, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x0ee0, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x0ef0, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x0e04, 0xffff0000);
+	nv_mthd(dev, 0xa097, 0x0e14, 0xffff0000);
+	nv_mthd(dev, 0xa097, 0x0e24, 0xffff0000);
+	nv_mthd(dev, 0xa097, 0x0e34, 0xffff0000);
+	nv_mthd(dev, 0xa097, 0x0e44, 0xffff0000);
+	nv_mthd(dev, 0xa097, 0x0e54, 0xffff0000);
+	nv_mthd(dev, 0xa097, 0x0e64, 0xffff0000);
+	nv_mthd(dev, 0xa097, 0x0e74, 0xffff0000);
+	nv_mthd(dev, 0xa097, 0x0e84, 0xffff0000);
+	nv_mthd(dev, 0xa097, 0x0e94, 0xffff0000);
+	nv_mthd(dev, 0xa097, 0x0ea4, 0xffff0000);
+	nv_mthd(dev, 0xa097, 0x0eb4, 0xffff0000);
+	nv_mthd(dev, 0xa097, 0x0ec4, 0xffff0000);
+	nv_mthd(dev, 0xa097, 0x0ed4, 0xffff0000);
+	nv_mthd(dev, 0xa097, 0x0ee4, 0xffff0000);
+	nv_mthd(dev, 0xa097, 0x0ef4, 0xffff0000);
+	nv_mthd(dev, 0xa097, 0x0e08, 0xffff0000);
+	nv_mthd(dev, 0xa097, 0x0e18, 0xffff0000);
+	nv_mthd(dev, 0xa097, 0x0e28, 0xffff0000);
+	nv_mthd(dev, 0xa097, 0x0e38, 0xffff0000);
+	nv_mthd(dev, 0xa097, 0x0e48, 0xffff0000);
+	nv_mthd(dev, 0xa097, 0x0e58, 0xffff0000);
+	nv_mthd(dev, 0xa097, 0x0e68, 0xffff0000);
+	nv_mthd(dev, 0xa097, 0x0e78, 0xffff0000);
+	nv_mthd(dev, 0xa097, 0x0e88, 0xffff0000);
+	nv_mthd(dev, 0xa097, 0x0e98, 0xffff0000);
+	nv_mthd(dev, 0xa097, 0x0ea8, 0xffff0000);
+	nv_mthd(dev, 0xa097, 0x0eb8, 0xffff0000);
+	nv_mthd(dev, 0xa097, 0x0ec8, 0xffff0000);
+	nv_mthd(dev, 0xa097, 0x0ed8, 0xffff0000);
+	nv_mthd(dev, 0xa097, 0x0ee8, 0xffff0000);
+	nv_mthd(dev, 0xa097, 0x0ef8, 0xffff0000);
+	nv_mthd(dev, 0xa097, 0x0d40, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x0d48, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x0d50, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x0d58, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x0d44, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x0d4c, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x0d54, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x0d5c, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x1e00, 0x00000001);
+	nv_mthd(dev, 0xa097, 0x1e20, 0x00000001);
+	nv_mthd(dev, 0xa097, 0x1e40, 0x00000001);
+	nv_mthd(dev, 0xa097, 0x1e60, 0x00000001);
+	nv_mthd(dev, 0xa097, 0x1e80, 0x00000001);
+	nv_mthd(dev, 0xa097, 0x1ea0, 0x00000001);
+	nv_mthd(dev, 0xa097, 0x1ec0, 0x00000001);
+	nv_mthd(dev, 0xa097, 0x1ee0, 0x00000001);
+	nv_mthd(dev, 0xa097, 0x1e04, 0x00000001);
+	nv_mthd(dev, 0xa097, 0x1e24, 0x00000001);
+	nv_mthd(dev, 0xa097, 0x1e44, 0x00000001);
+	nv_mthd(dev, 0xa097, 0x1e64, 0x00000001);
+	nv_mthd(dev, 0xa097, 0x1e84, 0x00000001);
+	nv_mthd(dev, 0xa097, 0x1ea4, 0x00000001);
+	nv_mthd(dev, 0xa097, 0x1ec4, 0x00000001);
+	nv_mthd(dev, 0xa097, 0x1ee4, 0x00000001);
+	nv_mthd(dev, 0xa097, 0x1e08, 0x00000002);
+	nv_mthd(dev, 0xa097, 0x1e28, 0x00000002);
+	nv_mthd(dev, 0xa097, 0x1e48, 0x00000002);
+	nv_mthd(dev, 0xa097, 0x1e68, 0x00000002);
+	nv_mthd(dev, 0xa097, 0x1e88, 0x00000002);
+	nv_mthd(dev, 0xa097, 0x1ea8, 0x00000002);
+	nv_mthd(dev, 0xa097, 0x1ec8, 0x00000002);
+	nv_mthd(dev, 0xa097, 0x1ee8, 0x00000002);
+	nv_mthd(dev, 0xa097, 0x1e0c, 0x00000001);
+	nv_mthd(dev, 0xa097, 0x1e2c, 0x00000001);
+	nv_mthd(dev, 0xa097, 0x1e4c, 0x00000001);
+	nv_mthd(dev, 0xa097, 0x1e6c, 0x00000001);
+	nv_mthd(dev, 0xa097, 0x1e8c, 0x00000001);
+	nv_mthd(dev, 0xa097, 0x1eac, 0x00000001);
+	nv_mthd(dev, 0xa097, 0x1ecc, 0x00000001);
+	nv_mthd(dev, 0xa097, 0x1eec, 0x00000001);
+	nv_mthd(dev, 0xa097, 0x1e10, 0x00000001);
+	nv_mthd(dev, 0xa097, 0x1e30, 0x00000001);
+	nv_mthd(dev, 0xa097, 0x1e50, 0x00000001);
+	nv_mthd(dev, 0xa097, 0x1e70, 0x00000001);
+	nv_mthd(dev, 0xa097, 0x1e90, 0x00000001);
+	nv_mthd(dev, 0xa097, 0x1eb0, 0x00000001);
+	nv_mthd(dev, 0xa097, 0x1ed0, 0x00000001);
+	nv_mthd(dev, 0xa097, 0x1ef0, 0x00000001);
+	nv_mthd(dev, 0xa097, 0x1e14, 0x00000002);
+	nv_mthd(dev, 0xa097, 0x1e34, 0x00000002);
+	nv_mthd(dev, 0xa097, 0x1e54, 0x00000002);
+	nv_mthd(dev, 0xa097, 0x1e74, 0x00000002);
+	nv_mthd(dev, 0xa097, 0x1e94, 0x00000002);
+	nv_mthd(dev, 0xa097, 0x1eb4, 0x00000002);
+	nv_mthd(dev, 0xa097, 0x1ed4, 0x00000002);
+	nv_mthd(dev, 0xa097, 0x1ef4, 0x00000002);
+	nv_mthd(dev, 0xa097, 0x1e18, 0x00000001);
+	nv_mthd(dev, 0xa097, 0x1e38, 0x00000001);
+	nv_mthd(dev, 0xa097, 0x1e58, 0x00000001);
+	nv_mthd(dev, 0xa097, 0x1e78, 0x00000001);
+	nv_mthd(dev, 0xa097, 0x1e98, 0x00000001);
+	nv_mthd(dev, 0xa097, 0x1eb8, 0x00000001);
+	nv_mthd(dev, 0xa097, 0x1ed8, 0x00000001);
+	nv_mthd(dev, 0xa097, 0x1ef8, 0x00000001);
+	nv_mthd(dev, 0xa097, 0x3400, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x3404, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x3408, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x340c, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x3410, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x3414, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x3418, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x341c, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x3420, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x3424, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x3428, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x342c, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x3430, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x3434, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x3438, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x343c, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x3440, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x3444, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x3448, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x344c, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x3450, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x3454, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x3458, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x345c, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x3460, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x3464, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x3468, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x346c, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x3470, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x3474, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x3478, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x347c, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x3480, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x3484, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x3488, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x348c, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x3490, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x3494, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x3498, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x349c, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x34a0, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x34a4, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x34a8, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x34ac, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x34b0, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x34b4, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x34b8, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x34bc, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x34c0, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x34c4, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x34c8, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x34cc, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x34d0, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x34d4, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x34d8, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x34dc, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x34e0, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x34e4, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x34e8, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x34ec, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x34f0, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x34f4, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x34f8, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x34fc, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x3500, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x3504, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x3508, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x350c, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x3510, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x3514, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x3518, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x351c, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x3520, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x3524, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x3528, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x352c, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x3530, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x3534, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x3538, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x353c, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x3540, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x3544, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x3548, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x354c, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x3550, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x3554, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x3558, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x355c, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x3560, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x3564, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x3568, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x356c, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x3570, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x3574, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x3578, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x357c, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x3580, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x3584, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x3588, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x358c, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x3590, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x3594, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x3598, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x359c, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x35a0, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x35a4, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x35a8, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x35ac, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x35b0, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x35b4, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x35b8, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x35bc, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x35c0, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x35c4, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x35c8, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x35cc, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x35d0, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x35d4, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x35d8, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x35dc, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x35e0, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x35e4, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x35e8, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x35ec, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x35f0, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x35f4, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x35f8, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x35fc, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x030c, 0x00000001);
+	nv_mthd(dev, 0xa097, 0x1944, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x1514, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x0d68, 0x0000ffff);
+	nv_mthd(dev, 0xa097, 0x121c, 0x0fac6881);
+	nv_mthd(dev, 0xa097, 0x0fac, 0x00000001);
+	nv_mthd(dev, 0xa097, 0x1538, 0x00000001);
+	nv_mthd(dev, 0xa097, 0x0fe0, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x0fe4, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x0fe8, 0x00000014);
+	nv_mthd(dev, 0xa097, 0x0fec, 0x00000040);
+	nv_mthd(dev, 0xa097, 0x0ff0, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x179c, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x1228, 0x00000400);
+	nv_mthd(dev, 0xa097, 0x122c, 0x00000300);
+	nv_mthd(dev, 0xa097, 0x1230, 0x00010001);
+	nv_mthd(dev, 0xa097, 0x07f8, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x15b4, 0x00000001);
+	nv_mthd(dev, 0xa097, 0x15cc, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x1534, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x0fb0, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x15d0, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x153c, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x16b4, 0x00000003);
+	nv_mthd(dev, 0xa097, 0x0fbc, 0x0000ffff);
+	nv_mthd(dev, 0xa097, 0x0fc0, 0x0000ffff);
+	nv_mthd(dev, 0xa097, 0x0fc4, 0x0000ffff);
+	nv_mthd(dev, 0xa097, 0x0fc8, 0x0000ffff);
+	nv_mthd(dev, 0xa097, 0x0df8, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x0dfc, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x1948, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x1970, 0x00000001);
+	nv_mthd(dev, 0xa097, 0x161c, 0x000009f0);
+	nv_mthd(dev, 0xa097, 0x0dcc, 0x00000010);
+	nv_mthd(dev, 0xa097, 0x163c, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x15e4, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x1160, 0x25e00040);
+	nv_mthd(dev, 0xa097, 0x1164, 0x25e00040);
+	nv_mthd(dev, 0xa097, 0x1168, 0x25e00040);
+	nv_mthd(dev, 0xa097, 0x116c, 0x25e00040);
+	nv_mthd(dev, 0xa097, 0x1170, 0x25e00040);
+	nv_mthd(dev, 0xa097, 0x1174, 0x25e00040);
+	nv_mthd(dev, 0xa097, 0x1178, 0x25e00040);
+	nv_mthd(dev, 0xa097, 0x117c, 0x25e00040);
+	nv_mthd(dev, 0xa097, 0x1180, 0x25e00040);
+	nv_mthd(dev, 0xa097, 0x1184, 0x25e00040);
+	nv_mthd(dev, 0xa097, 0x1188, 0x25e00040);
+	nv_mthd(dev, 0xa097, 0x118c, 0x25e00040);
+	nv_mthd(dev, 0xa097, 0x1190, 0x25e00040);
+	nv_mthd(dev, 0xa097, 0x1194, 0x25e00040);
+	nv_mthd(dev, 0xa097, 0x1198, 0x25e00040);
+	nv_mthd(dev, 0xa097, 0x119c, 0x25e00040);
+	nv_mthd(dev, 0xa097, 0x11a0, 0x25e00040);
+	nv_mthd(dev, 0xa097, 0x11a4, 0x25e00040);
+	nv_mthd(dev, 0xa097, 0x11a8, 0x25e00040);
+	nv_mthd(dev, 0xa097, 0x11ac, 0x25e00040);
+	nv_mthd(dev, 0xa097, 0x11b0, 0x25e00040);
+	nv_mthd(dev, 0xa097, 0x11b4, 0x25e00040);
+	nv_mthd(dev, 0xa097, 0x11b8, 0x25e00040);
+	nv_mthd(dev, 0xa097, 0x11bc, 0x25e00040);
+	nv_mthd(dev, 0xa097, 0x11c0, 0x25e00040);
+	nv_mthd(dev, 0xa097, 0x11c4, 0x25e00040);
+	nv_mthd(dev, 0xa097, 0x11c8, 0x25e00040);
+	nv_mthd(dev, 0xa097, 0x11cc, 0x25e00040);
+	nv_mthd(dev, 0xa097, 0x11d0, 0x25e00040);
+	nv_mthd(dev, 0xa097, 0x11d4, 0x25e00040);
+	nv_mthd(dev, 0xa097, 0x11d8, 0x25e00040);
+	nv_mthd(dev, 0xa097, 0x11dc, 0x25e00040);
+	nv_mthd(dev, 0xa097, 0x1880, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x1884, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x1888, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x188c, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x1890, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x1894, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x1898, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x189c, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x18a0, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x18a4, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x18a8, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x18ac, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x18b0, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x18b4, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x18b8, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x18bc, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x18c0, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x18c4, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x18c8, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x18cc, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x18d0, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x18d4, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x18d8, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x18dc, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x18e0, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x18e4, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x18e8, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x18ec, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x18f0, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x18f4, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x18f8, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x18fc, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x0f84, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x0f88, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x17c8, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x17cc, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x17d0, 0x000000ff);
+	nv_mthd(dev, 0xa097, 0x17d4, 0xffffffff);
+	nv_mthd(dev, 0xa097, 0x17d8, 0x00000002);
+	nv_mthd(dev, 0xa097, 0x17dc, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x15f4, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x15f8, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x1434, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x1438, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x0d74, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x0dec, 0x00000001);
+	nv_mthd(dev, 0xa097, 0x13a4, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x1318, 0x00000001);
+	nv_mthd(dev, 0xa097, 0x1644, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x0748, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x0de8, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x1648, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x12a4, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x1120, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x1124, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x1128, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x112c, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x1118, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x164c, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x1658, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x1910, 0x00000290);
+	nv_mthd(dev, 0xa097, 0x1518, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x165c, 0x00000001);
+	nv_mthd(dev, 0xa097, 0x1520, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x1604, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x1570, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x13b0, 0x3f800000);
+	nv_mthd(dev, 0xa097, 0x13b4, 0x3f800000);
+	nv_mthd(dev, 0xa097, 0x020c, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x1670, 0x30201000);
+	nv_mthd(dev, 0xa097, 0x1674, 0x70605040);
+	nv_mthd(dev, 0xa097, 0x1678, 0xb8a89888);
+	nv_mthd(dev, 0xa097, 0x167c, 0xf8e8d8c8);
+	nv_mthd(dev, 0xa097, 0x166c, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x1680, 0x00ffff00);
+	nv_mthd(dev, 0xa097, 0x12d0, 0x00000003);
+	nv_mthd(dev, 0xa097, 0x12d4, 0x00000002);
+	nv_mthd(dev, 0xa097, 0x1684, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x1688, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x0dac, 0x00001b02);
+	nv_mthd(dev, 0xa097, 0x0db0, 0x00001b02);
+	nv_mthd(dev, 0xa097, 0x0db4, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x168c, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x15bc, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x156c, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x187c, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x1110, 0x00000001);
+	nv_mthd(dev, 0xa097, 0x0dc0, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x0dc4, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x0dc8, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x1234, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x1690, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x12ac, 0x00000001);
+	nv_mthd(dev, 0xa097, 0x0790, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x0794, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x0798, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x079c, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x07a0, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x077c, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x1000, 0x00000010);
+	nv_mthd(dev, 0xa097, 0x10fc, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x1290, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x0218, 0x00000010);
+	nv_mthd(dev, 0xa097, 0x12d8, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x12dc, 0x00000010);
+	nv_mthd(dev, 0xa097, 0x0d94, 0x00000001);
+	nv_mthd(dev, 0xa097, 0x155c, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x1560, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x1564, 0x00000fff);
+	nv_mthd(dev, 0xa097, 0x1574, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x1578, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x157c, 0x000fffff);
+	nv_mthd(dev, 0xa097, 0x1354, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x1610, 0x00000012);
+	nv_mthd(dev, 0xa097, 0x1608, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x160c, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x260c, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x07ac, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x162c, 0x00000003);
+	nv_mthd(dev, 0xa097, 0x0210, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x0320, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x0324, 0x3f800000);
+	nv_mthd(dev, 0xa097, 0x0328, 0x3f800000);
+	nv_mthd(dev, 0xa097, 0x032c, 0x3f800000);
+	nv_mthd(dev, 0xa097, 0x0330, 0x3f800000);
+	nv_mthd(dev, 0xa097, 0x0334, 0x3f800000);
+	nv_mthd(dev, 0xa097, 0x0338, 0x3f800000);
+	nv_mthd(dev, 0xa097, 0x0750, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x0760, 0x39291909);
+	nv_mthd(dev, 0xa097, 0x0764, 0x79695949);
+	nv_mthd(dev, 0xa097, 0x0768, 0xb9a99989);
+	nv_mthd(dev, 0xa097, 0x076c, 0xf9e9d9c9);
+	nv_mthd(dev, 0xa097, 0x0770, 0x30201000);
+	nv_mthd(dev, 0xa097, 0x0774, 0x70605040);
+	nv_mthd(dev, 0xa097, 0x0778, 0x00009080);
+	nv_mthd(dev, 0xa097, 0x0780, 0x39291909);
+	nv_mthd(dev, 0xa097, 0x0784, 0x79695949);
+	nv_mthd(dev, 0xa097, 0x0788, 0xb9a99989);
+	nv_mthd(dev, 0xa097, 0x078c, 0xf9e9d9c9);
+	nv_mthd(dev, 0xa097, 0x07d0, 0x30201000);
+	nv_mthd(dev, 0xa097, 0x07d4, 0x70605040);
+	nv_mthd(dev, 0xa097, 0x07d8, 0x00009080);
+	nv_mthd(dev, 0xa097, 0x037c, 0x00000001);
+	nv_mthd(dev, 0xa097, 0x0740, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x0744, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x2600, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x1918, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x191c, 0x00000900);
+	nv_mthd(dev, 0xa097, 0x1920, 0x00000405);
+	nv_mthd(dev, 0xa097, 0x1308, 0x00000001);
+	nv_mthd(dev, 0xa097, 0x1924, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x13ac, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x192c, 0x00000001);
+	nv_mthd(dev, 0xa097, 0x193c, 0x00002c1c);
+	nv_mthd(dev, 0xa097, 0x0d7c, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x0f8c, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x02c0, 0x00000001);
+	nv_mthd(dev, 0xa097, 0x1510, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x1940, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x0ff4, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x0ff8, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x194c, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x1950, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x1968, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x1590, 0x0000003f);
+	nv_mthd(dev, 0xa097, 0x07e8, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x07ec, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x07f0, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x07f4, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x196c, 0x00000011);
+	nv_mthd(dev, 0xa097, 0x02e4, 0x0000b001);
+	nv_mthd(dev, 0xa097, 0x036c, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x0370, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x197c, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x0fcc, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x0fd0, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x02d8, 0x00000040);
+	nv_mthd(dev, 0xa097, 0x1980, 0x00000080);
+	nv_mthd(dev, 0xa097, 0x1504, 0x00000080);
+	nv_mthd(dev, 0xa097, 0x1984, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x0300, 0x00000001);
+	nv_mthd(dev, 0xa097, 0x13a8, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x12ec, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x1310, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x1314, 0x00000001);
+	nv_mthd(dev, 0xa097, 0x1380, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x1384, 0x00000001);
+	nv_mthd(dev, 0xa097, 0x1388, 0x00000001);
+	nv_mthd(dev, 0xa097, 0x138c, 0x00000001);
+	nv_mthd(dev, 0xa097, 0x1390, 0x00000001);
+	nv_mthd(dev, 0xa097, 0x1394, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x139c, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x1398, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x1594, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x1598, 0x00000001);
+	nv_mthd(dev, 0xa097, 0x159c, 0x00000001);
+	nv_mthd(dev, 0xa097, 0x15a0, 0x00000001);
+	nv_mthd(dev, 0xa097, 0x15a4, 0x00000001);
+	nv_mthd(dev, 0xa097, 0x0f54, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x0f58, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x0f5c, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x19bc, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x0f9c, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x0fa0, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x12cc, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x12e8, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x130c, 0x00000001);
+	nv_mthd(dev, 0xa097, 0x1360, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x1364, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x1368, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x136c, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x1370, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x1374, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x1378, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x137c, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x133c, 0x00000001);
+	nv_mthd(dev, 0xa097, 0x1340, 0x00000001);
+	nv_mthd(dev, 0xa097, 0x1344, 0x00000002);
+	nv_mthd(dev, 0xa097, 0x1348, 0x00000001);
+	nv_mthd(dev, 0xa097, 0x134c, 0x00000001);
+	nv_mthd(dev, 0xa097, 0x1350, 0x00000002);
+	nv_mthd(dev, 0xa097, 0x1358, 0x00000001);
+	nv_mthd(dev, 0xa097, 0x12e4, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x131c, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x1320, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x1324, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x1328, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x19c0, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x1140, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x19c4, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x19c8, 0x00001500);
+	nv_mthd(dev, 0xa097, 0x135c, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x0f90, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x19e0, 0x00000001);
+	nv_mthd(dev, 0xa097, 0x19e4, 0x00000001);
+	nv_mthd(dev, 0xa097, 0x19e8, 0x00000001);
+	nv_mthd(dev, 0xa097, 0x19ec, 0x00000001);
+	nv_mthd(dev, 0xa097, 0x19f0, 0x00000001);
+	nv_mthd(dev, 0xa097, 0x19f4, 0x00000001);
+	nv_mthd(dev, 0xa097, 0x19f8, 0x00000001);
+	nv_mthd(dev, 0xa097, 0x19fc, 0x00000001);
+	nv_mthd(dev, 0xa097, 0x19cc, 0x00000001);
+	nv_mthd(dev, 0xa097, 0x15b8, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x1a00, 0x00001111);
+	nv_mthd(dev, 0xa097, 0x1a04, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x1a08, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x1a0c, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x1a10, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x1a14, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x1a18, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x1a1c, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x0d6c, 0xffff0000);
+	nv_mthd(dev, 0xa097, 0x0d70, 0xffff0000);
+	nv_mthd(dev, 0xa097, 0x10f8, 0x00001010);
+	nv_mthd(dev, 0xa097, 0x0d80, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x0d84, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x0d88, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x0d8c, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x0d90, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x0da0, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x07a4, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x07a8, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x1508, 0x80000000);
+	nv_mthd(dev, 0xa097, 0x150c, 0x40000000);
+	nv_mthd(dev, 0xa097, 0x1668, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x0318, 0x00000008);
+	nv_mthd(dev, 0xa097, 0x031c, 0x00000008);
+	nv_mthd(dev, 0xa097, 0x0d9c, 0x00000001);
+	nv_mthd(dev, 0xa097, 0x0374, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x0378, 0x00000020);
+	nv_mthd(dev, 0xa097, 0x07dc, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x074c, 0x00000055);
+	nv_mthd(dev, 0xa097, 0x1420, 0x00000003);
+	nv_mthd(dev, 0xa097, 0x17bc, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x17c0, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x17c4, 0x00000001);
+	nv_mthd(dev, 0xa097, 0x1008, 0x00000008);
+	nv_mthd(dev, 0xa097, 0x100c, 0x00000040);
+	nv_mthd(dev, 0xa097, 0x1010, 0x0000012c);
+	nv_mthd(dev, 0xa097, 0x0d60, 0x00000040);
+	nv_mthd(dev, 0xa097, 0x075c, 0x00000003);
+	nv_mthd(dev, 0xa097, 0x1018, 0x00000020);
+	nv_mthd(dev, 0xa097, 0x101c, 0x00000001);
+	nv_mthd(dev, 0xa097, 0x1020, 0x00000020);
+	nv_mthd(dev, 0xa097, 0x1024, 0x00000001);
+	nv_mthd(dev, 0xa097, 0x1444, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x1448, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x144c, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x0360, 0x20164010);
+	nv_mthd(dev, 0xa097, 0x0364, 0x00000020);
+	nv_mthd(dev, 0xa097, 0x0368, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x0de4, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x0204, 0x00000006);
+	nv_mthd(dev, 0xa097, 0x0208, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x02cc, 0x003fffff);
+	nv_mthd(dev, 0xa097, 0x02d0, 0x003fffff);
+	nv_mthd(dev, 0xa097, 0x1220, 0x00000005);
+	nv_mthd(dev, 0xa097, 0x0fdc, 0x00000000);
+	nv_mthd(dev, 0xa097, 0x0f98, 0x00400008);
+	nv_mthd(dev, 0xa097, 0x1284, 0x08000080);
+	nv_mthd(dev, 0xa097, 0x1450, 0x00400008);
+	nv_mthd(dev, 0xa097, 0x1454, 0x08000080);
+	nv_mthd(dev, 0xa097, 0x0214, 0x00000000);
+}
+
+static void
+nve0_grctx_generate_902d(struct drm_device *dev)
+{
+	nv_mthd(dev, 0x902d, 0x0200, 0x000000cf);
+	nv_mthd(dev, 0x902d, 0x0204, 0x00000001);
+	nv_mthd(dev, 0x902d, 0x0208, 0x00000020);
+	nv_mthd(dev, 0x902d, 0x020c, 0x00000001);
+	nv_mthd(dev, 0x902d, 0x0210, 0x00000000);
+	nv_mthd(dev, 0x902d, 0x0214, 0x00000080);
+	nv_mthd(dev, 0x902d, 0x0218, 0x00000100);
+	nv_mthd(dev, 0x902d, 0x021c, 0x00000100);
+	nv_mthd(dev, 0x902d, 0x0220, 0x00000000);
+	nv_mthd(dev, 0x902d, 0x0224, 0x00000000);
+	nv_mthd(dev, 0x902d, 0x0230, 0x000000cf);
+	nv_mthd(dev, 0x902d, 0x0234, 0x00000001);
+	nv_mthd(dev, 0x902d, 0x0238, 0x00000020);
+	nv_mthd(dev, 0x902d, 0x023c, 0x00000001);
+	nv_mthd(dev, 0x902d, 0x0244, 0x00000080);
+	nv_mthd(dev, 0x902d, 0x0248, 0x00000100);
+	nv_mthd(dev, 0x902d, 0x024c, 0x00000100);
+	nv_mthd(dev, 0x902d, 0x3410, 0x00000000);
+}
+
+static void
+nve0_graph_generate_unk40xx(struct drm_device *dev)
+{
+	nv_wr32(dev, 0x404010, 0x0);
+	nv_wr32(dev, 0x404014, 0x0);
+	nv_wr32(dev, 0x404018, 0x0);
+	nv_wr32(dev, 0x40401c, 0x0);
+	nv_wr32(dev, 0x404020, 0x0);
+	nv_wr32(dev, 0x404024, 0xe000);
+	nv_wr32(dev, 0x404028, 0x0);
+	nv_wr32(dev, 0x4040a8, 0x0);
+	nv_wr32(dev, 0x4040ac, 0x0);
+	nv_wr32(dev, 0x4040b0, 0x0);
+	nv_wr32(dev, 0x4040b4, 0x0);
+	nv_wr32(dev, 0x4040b8, 0x0);
+	nv_wr32(dev, 0x4040bc, 0x0);
+	nv_wr32(dev, 0x4040c0, 0x0);
+	nv_wr32(dev, 0x4040c4, 0x0);
+	nv_wr32(dev, 0x4040c8, 0xf800008f);
+	nv_wr32(dev, 0x4040d0, 0x0);
+	nv_wr32(dev, 0x4040d4, 0x0);
+	nv_wr32(dev, 0x4040d8, 0x0);
+	nv_wr32(dev, 0x4040dc, 0x0);
+	nv_wr32(dev, 0x4040e0, 0x0);
+	nv_wr32(dev, 0x4040e4, 0x0);
+	nv_wr32(dev, 0x4040e8, 0x1000);
+	nv_wr32(dev, 0x4040f8, 0x0);
+	nv_wr32(dev, 0x404130, 0x0);
+	nv_wr32(dev, 0x404134, 0x0);
+	nv_wr32(dev, 0x404138, 0x20000040);
+	nv_wr32(dev, 0x404150, 0x2e);
+	nv_wr32(dev, 0x404154, 0x400);
+	nv_wr32(dev, 0x404158, 0x200);
+	nv_wr32(dev, 0x404164, 0x55);
+	nv_wr32(dev, 0x4041a0, 0x0);
+	nv_wr32(dev, 0x4041a4, 0x0);
+	nv_wr32(dev, 0x4041a8, 0x0);
+	nv_wr32(dev, 0x4041ac, 0x0);
+	nv_wr32(dev, 0x404200, 0x0);
+	nv_wr32(dev, 0x404204, 0x0);
+	nv_wr32(dev, 0x404208, 0x0);
+	nv_wr32(dev, 0x40420c, 0x0);
+}
+
+static void
+nve0_graph_generate_unk44xx(struct drm_device *dev)
+{
+	nv_wr32(dev, 0x404404, 0x0);
+	nv_wr32(dev, 0x404408, 0x0);
+	nv_wr32(dev, 0x40440c, 0x0);
+	nv_wr32(dev, 0x404410, 0x0);
+	nv_wr32(dev, 0x404414, 0x0);
+	nv_wr32(dev, 0x404418, 0x0);
+	nv_wr32(dev, 0x40441c, 0x0);
+	nv_wr32(dev, 0x404420, 0x0);
+	nv_wr32(dev, 0x404424, 0x0);
+	nv_wr32(dev, 0x404428, 0x0);
+	nv_wr32(dev, 0x40442c, 0x0);
+	nv_wr32(dev, 0x404430, 0x0);
+	nv_wr32(dev, 0x404434, 0x0);
+	nv_wr32(dev, 0x404438, 0x0);
+	nv_wr32(dev, 0x404460, 0x0);
+	nv_wr32(dev, 0x404464, 0x0);
+	nv_wr32(dev, 0x404468, 0xffffff);
+	nv_wr32(dev, 0x40446c, 0x0);
+	nv_wr32(dev, 0x404480, 0x1);
+	nv_wr32(dev, 0x404498, 0x1);
+}
+
+static void
+nve0_graph_generate_unk46xx(struct drm_device *dev)
+{
+	nv_wr32(dev, 0x404604, 0x14);
+	nv_wr32(dev, 0x404608, 0x0);
+	nv_wr32(dev, 0x40460c, 0x3fff);
+	nv_wr32(dev, 0x404610, 0x100);
+	nv_wr32(dev, 0x404618, 0x0);
+	nv_wr32(dev, 0x40461c, 0x0);
+	nv_wr32(dev, 0x404620, 0x0);
+	nv_wr32(dev, 0x404624, 0x0);
+	nv_wr32(dev, 0x40462c, 0x0);
+	nv_wr32(dev, 0x404630, 0x0);
+	nv_wr32(dev, 0x404640, 0x0);
+	nv_wr32(dev, 0x404654, 0x0);
+	nv_wr32(dev, 0x404660, 0x0);
+	nv_wr32(dev, 0x404678, 0x0);
+	nv_wr32(dev, 0x40467c, 0x2);
+	nv_wr32(dev, 0x404680, 0x0);
+	nv_wr32(dev, 0x404684, 0x0);
+	nv_wr32(dev, 0x404688, 0x0);
+	nv_wr32(dev, 0x40468c, 0x0);
+	nv_wr32(dev, 0x404690, 0x0);
+	nv_wr32(dev, 0x404694, 0x0);
+	nv_wr32(dev, 0x404698, 0x0);
+	nv_wr32(dev, 0x40469c, 0x0);
+	nv_wr32(dev, 0x4046a0, 0x7f0080);
+	nv_wr32(dev, 0x4046a4, 0x0);
+	nv_wr32(dev, 0x4046a8, 0x0);
+	nv_wr32(dev, 0x4046ac, 0x0);
+	nv_wr32(dev, 0x4046b0, 0x0);
+	nv_wr32(dev, 0x4046b4, 0x0);
+	nv_wr32(dev, 0x4046b8, 0x0);
+	nv_wr32(dev, 0x4046bc, 0x0);
+	nv_wr32(dev, 0x4046c0, 0x0);
+	nv_wr32(dev, 0x4046c8, 0x0);
+	nv_wr32(dev, 0x4046cc, 0x0);
+	nv_wr32(dev, 0x4046d0, 0x0);
+}
+
+static void
+nve0_graph_generate_unk47xx(struct drm_device *dev)
+{
+	nv_wr32(dev, 0x404700, 0x0);
+	nv_wr32(dev, 0x404704, 0x0);
+	nv_wr32(dev, 0x404708, 0x0);
+	nv_wr32(dev, 0x404718, 0x0);
+	nv_wr32(dev, 0x40471c, 0x0);
+	nv_wr32(dev, 0x404720, 0x0);
+	nv_wr32(dev, 0x404724, 0x0);
+	nv_wr32(dev, 0x404728, 0x0);
+	nv_wr32(dev, 0x40472c, 0x0);
+	nv_wr32(dev, 0x404730, 0x0);
+	nv_wr32(dev, 0x404734, 0x100);
+	nv_wr32(dev, 0x404738, 0x0);
+	nv_wr32(dev, 0x40473c, 0x0);
+	nv_wr32(dev, 0x404744, 0x0);
+	nv_wr32(dev, 0x404748, 0x0);
+	nv_wr32(dev, 0x404754, 0x0);
+}
+
+static void
+nve0_graph_generate_unk58xx(struct drm_device *dev)
+{
+	nv_wr32(dev, 0x405800, 0xf8000bf);
+	nv_wr32(dev, 0x405830, 0x2180648);
+	nv_wr32(dev, 0x405834, 0x8000000);
+	nv_wr32(dev, 0x405838, 0x0);
+	nv_wr32(dev, 0x405854, 0x0);
+	nv_wr32(dev, 0x405870, 0x1);
+	nv_wr32(dev, 0x405874, 0x1);
+	nv_wr32(dev, 0x405878, 0x1);
+	nv_wr32(dev, 0x40587c, 0x1);
+	nv_wr32(dev, 0x405a00, 0x0);
+	nv_wr32(dev, 0x405a04, 0x0);
+	nv_wr32(dev, 0x405a18, 0x0);
+	nv_wr32(dev, 0x405b00, 0x0);
+	nv_wr32(dev, 0x405b10, 0x1000);
+}
+
+static void
+nve0_graph_generate_unk60xx(struct drm_device *dev)
+{
+	nv_wr32(dev, 0x406020, 0x4103c1);
+	nv_wr32(dev, 0x406028, 0x1);
+	nv_wr32(dev, 0x40602c, 0x1);
+	nv_wr32(dev, 0x406030, 0x1);
+	nv_wr32(dev, 0x406034, 0x1);
+}
+
+static void
+nve0_graph_generate_unk64xx(struct drm_device *dev)
+{
+	nv_wr32(dev, 0x4064a8, 0x0);
+	nv_wr32(dev, 0x4064ac, 0x3fff);
+	nv_wr32(dev, 0x4064b4, 0x0);
+	nv_wr32(dev, 0x4064b8, 0x0);
+	nv_wr32(dev, 0x4064c0, 0x801a00f0);
+	nv_wr32(dev, 0x4064c4, 0x192ffff);
+	nv_wr32(dev, 0x4064c8, 0x1800600);
+	nv_wr32(dev, 0x4064cc, 0x0);
+	nv_wr32(dev, 0x4064d0, 0x0);
+	nv_wr32(dev, 0x4064d4, 0x0);
+	nv_wr32(dev, 0x4064d8, 0x0);
+	nv_wr32(dev, 0x4064dc, 0x0);
+	nv_wr32(dev, 0x4064e0, 0x0);
+	nv_wr32(dev, 0x4064e4, 0x0);
+	nv_wr32(dev, 0x4064e8, 0x0);
+	nv_wr32(dev, 0x4064ec, 0x0);
+	nv_wr32(dev, 0x4064fc, 0x22a);
+}
+
+static void
+nve0_graph_generate_unk70xx(struct drm_device *dev)
+{
+	nv_wr32(dev, 0x407040, 0x0);
+}
+
+static void
+nve0_graph_generate_unk78xx(struct drm_device *dev)
+{
+	nv_wr32(dev, 0x407804, 0x23);
+	nv_wr32(dev, 0x40780c, 0xa418820);
+	nv_wr32(dev, 0x407810, 0x62080e6);
+	nv_wr32(dev, 0x407814, 0x20398a4);
+	nv_wr32(dev, 0x407818, 0xe629062);
+	nv_wr32(dev, 0x40781c, 0xa418820);
+	nv_wr32(dev, 0x407820, 0xe6);
+	nv_wr32(dev, 0x4078bc, 0x103);
+}
+
+static void
+nve0_graph_generate_unk80xx(struct drm_device *dev)
+{
+	nv_wr32(dev, 0x408000, 0x0);
+	nv_wr32(dev, 0x408004, 0x0);
+	nv_wr32(dev, 0x408008, 0x30);
+	nv_wr32(dev, 0x40800c, 0x0);
+	nv_wr32(dev, 0x408010, 0x0);
+	nv_wr32(dev, 0x408014, 0x69);
+	nv_wr32(dev, 0x408018, 0xe100e100);
+	nv_wr32(dev, 0x408064, 0x0);
+}
+
+static void
+nve0_graph_generate_unk88xx(struct drm_device *dev)
+{
+	nv_wr32(dev, 0x408800, 0x2802a3c);
+	nv_wr32(dev, 0x408804, 0x40);
+	nv_wr32(dev, 0x408808, 0x1043e005);
+	nv_wr32(dev, 0x408840, 0xb);
+	nv_wr32(dev, 0x408900, 0x3080b801);
+	nv_wr32(dev, 0x408904, 0x62000001);
+	nv_wr32(dev, 0x408908, 0xc8102f);
+	nv_wr32(dev, 0x408980, 0x11d);
+}
+
+static void
+nve0_graph_generate_gpc(struct drm_device *dev)
+{
+	nv_wr32(dev, 0x418380, 0x16);
+	nv_wr32(dev, 0x418400, 0x38004e00);
+	nv_wr32(dev, 0x418404, 0x71e0ffff);
+	nv_wr32(dev, 0x41840c, 0x1008);
+	nv_wr32(dev, 0x418410, 0xfff0fff);
+	nv_wr32(dev, 0x418414, 0x2200fff);
+	nv_wr32(dev, 0x418450, 0x0);
+	nv_wr32(dev, 0x418454, 0x0);
+	nv_wr32(dev, 0x418458, 0x0);
+	nv_wr32(dev, 0x41845c, 0x0);
+	nv_wr32(dev, 0x418460, 0x0);
+	nv_wr32(dev, 0x418464, 0x0);
+	nv_wr32(dev, 0x418468, 0x1);
+	nv_wr32(dev, 0x41846c, 0x0);
+	nv_wr32(dev, 0x418470, 0x0);
+	nv_wr32(dev, 0x418600, 0x1f);
+	nv_wr32(dev, 0x418684, 0xf);
+	nv_wr32(dev, 0x418700, 0x2);
+	nv_wr32(dev, 0x418704, 0x80);
+	nv_wr32(dev, 0x418708, 0x0);
+	nv_wr32(dev, 0x41870c, 0x0);
+	nv_wr32(dev, 0x418710, 0x0);
+	nv_wr32(dev, 0x418800, 0x7006860a);
+	nv_wr32(dev, 0x418808, 0x0);
+	nv_wr32(dev, 0x41880c, 0x0);
+	nv_wr32(dev, 0x418810, 0x0);
+	nv_wr32(dev, 0x418828, 0x44);
+	nv_wr32(dev, 0x418830, 0x10000001);
+	nv_wr32(dev, 0x4188d8, 0x8);
+	nv_wr32(dev, 0x4188e0, 0x1000000);
+	nv_wr32(dev, 0x4188e8, 0x0);
+	nv_wr32(dev, 0x4188ec, 0x0);
+	nv_wr32(dev, 0x4188f0, 0x0);
+	nv_wr32(dev, 0x4188f4, 0x0);
+	nv_wr32(dev, 0x4188f8, 0x0);
+	nv_wr32(dev, 0x4188fc, 0x20100018);
+	nv_wr32(dev, 0x41891c, 0xff00ff);
+	nv_wr32(dev, 0x418924, 0x0);
+	nv_wr32(dev, 0x418928, 0xffff00);
+	nv_wr32(dev, 0x41892c, 0xff00);
+	nv_wr32(dev, 0x418a00, 0x0);
+	nv_wr32(dev, 0x418a04, 0x0);
+	nv_wr32(dev, 0x418a08, 0x0);
+	nv_wr32(dev, 0x418a0c, 0x10000);
+	nv_wr32(dev, 0x418a10, 0x0);
+	nv_wr32(dev, 0x418a14, 0x0);
+	nv_wr32(dev, 0x418a18, 0x0);
+	nv_wr32(dev, 0x418a20, 0x0);
+	nv_wr32(dev, 0x418a24, 0x0);
+	nv_wr32(dev, 0x418a28, 0x0);
+	nv_wr32(dev, 0x418a2c, 0x10000);
+	nv_wr32(dev, 0x418a30, 0x0);
+	nv_wr32(dev, 0x418a34, 0x0);
+	nv_wr32(dev, 0x418a38, 0x0);
+	nv_wr32(dev, 0x418a40, 0x0);
+	nv_wr32(dev, 0x418a44, 0x0);
+	nv_wr32(dev, 0x418a48, 0x0);
+	nv_wr32(dev, 0x418a4c, 0x10000);
+	nv_wr32(dev, 0x418a50, 0x0);
+	nv_wr32(dev, 0x418a54, 0x0);
+	nv_wr32(dev, 0x418a58, 0x0);
+	nv_wr32(dev, 0x418a60, 0x0);
+	nv_wr32(dev, 0x418a64, 0x0);
+	nv_wr32(dev, 0x418a68, 0x0);
+	nv_wr32(dev, 0x418a6c, 0x10000);
+	nv_wr32(dev, 0x418a70, 0x0);
+	nv_wr32(dev, 0x418a74, 0x0);
+	nv_wr32(dev, 0x418a78, 0x0);
+	nv_wr32(dev, 0x418a80, 0x0);
+	nv_wr32(dev, 0x418a84, 0x0);
+	nv_wr32(dev, 0x418a88, 0x0);
+	nv_wr32(dev, 0x418a8c, 0x10000);
+	nv_wr32(dev, 0x418a90, 0x0);
+	nv_wr32(dev, 0x418a94, 0x0);
+	nv_wr32(dev, 0x418a98, 0x0);
+	nv_wr32(dev, 0x418aa0, 0x0);
+	nv_wr32(dev, 0x418aa4, 0x0);
+	nv_wr32(dev, 0x418aa8, 0x0);
+	nv_wr32(dev, 0x418aac, 0x10000);
+	nv_wr32(dev, 0x418ab0, 0x0);
+	nv_wr32(dev, 0x418ab4, 0x0);
+	nv_wr32(dev, 0x418ab8, 0x0);
+	nv_wr32(dev, 0x418ac0, 0x0);
+	nv_wr32(dev, 0x418ac4, 0x0);
+	nv_wr32(dev, 0x418ac8, 0x0);
+	nv_wr32(dev, 0x418acc, 0x10000);
+	nv_wr32(dev, 0x418ad0, 0x0);
+	nv_wr32(dev, 0x418ad4, 0x0);
+	nv_wr32(dev, 0x418ad8, 0x0);
+	nv_wr32(dev, 0x418ae0, 0x0);
+	nv_wr32(dev, 0x418ae4, 0x0);
+	nv_wr32(dev, 0x418ae8, 0x0);
+	nv_wr32(dev, 0x418aec, 0x10000);
+	nv_wr32(dev, 0x418af0, 0x0);
+	nv_wr32(dev, 0x418af4, 0x0);
+	nv_wr32(dev, 0x418af8, 0x0);
+	nv_wr32(dev, 0x418b00, 0x6);
+	nv_wr32(dev, 0x418b08, 0xa418820);
+	nv_wr32(dev, 0x418b0c, 0x62080e6);
+	nv_wr32(dev, 0x418b10, 0x20398a4);
+	nv_wr32(dev, 0x418b14, 0xe629062);
+	nv_wr32(dev, 0x418b18, 0xa418820);
+	nv_wr32(dev, 0x418b1c, 0xe6);
+	nv_wr32(dev, 0x418bb8, 0x103);
+	nv_wr32(dev, 0x418c08, 0x1);
+	nv_wr32(dev, 0x418c10, 0x0);
+	nv_wr32(dev, 0x418c14, 0x0);
+	nv_wr32(dev, 0x418c18, 0x0);
+	nv_wr32(dev, 0x418c1c, 0x0);
+	nv_wr32(dev, 0x418c20, 0x0);
+	nv_wr32(dev, 0x418c24, 0x0);
+	nv_wr32(dev, 0x418c28, 0x0);
+	nv_wr32(dev, 0x418c2c, 0x0);
+	nv_wr32(dev, 0x418c40, 0xffffffff);
+	nv_wr32(dev, 0x418c6c, 0x1);
+	nv_wr32(dev, 0x418c80, 0x20200004);
+	nv_wr32(dev, 0x418c8c, 0x1);
+	nv_wr32(dev, 0x419000, 0x780);
+	nv_wr32(dev, 0x419004, 0x0);
+	nv_wr32(dev, 0x419008, 0x0);
+	nv_wr32(dev, 0x419014, 0x4);
+}
+
+static void
+nve0_graph_generate_tpc(struct drm_device *dev)
+{
+	nv_wr32(dev, 0x419848, 0x0);
+	nv_wr32(dev, 0x419864, 0x129);
+	nv_wr32(dev, 0x419888, 0x0);
+	nv_wr32(dev, 0x419a00, 0xf0);
+	nv_wr32(dev, 0x419a04, 0x1);
+	nv_wr32(dev, 0x419a08, 0x21);
+	nv_wr32(dev, 0x419a0c, 0x20000);
+	nv_wr32(dev, 0x419a10, 0x0);
+	nv_wr32(dev, 0x419a14, 0x200);
+	nv_wr32(dev, 0x419a1c, 0xc000);
+	nv_wr32(dev, 0x419a20, 0x800);
+	nv_wr32(dev, 0x419a30, 0x1);
+	nv_wr32(dev, 0x419ac4, 0x37f440);
+	nv_wr32(dev, 0x419c00, 0xa);
+	nv_wr32(dev, 0x419c04, 0x80000006);
+	nv_wr32(dev, 0x419c08, 0x2);
+	nv_wr32(dev, 0x419c20, 0x0);
+	nv_wr32(dev, 0x419c24, 0x84210);
+	nv_wr32(dev, 0x419c28, 0x3efbefbe);
+	nv_wr32(dev, 0x419ce8, 0x0);
+	nv_wr32(dev, 0x419cf4, 0x3203);
+	nv_wr32(dev, 0x419e04, 0x0);
+	nv_wr32(dev, 0x419e08, 0x0);
+	nv_wr32(dev, 0x419e0c, 0x0);
+	nv_wr32(dev, 0x419e10, 0x402);
+	nv_wr32(dev, 0x419e44, 0x13eff2);
+	nv_wr32(dev, 0x419e48, 0x0);
+	nv_wr32(dev, 0x419e4c, 0x7f);
+	nv_wr32(dev, 0x419e50, 0x0);
+	nv_wr32(dev, 0x419e54, 0x0);
+	nv_wr32(dev, 0x419e58, 0x0);
+	nv_wr32(dev, 0x419e5c, 0x0);
+	nv_wr32(dev, 0x419e60, 0x0);
+	nv_wr32(dev, 0x419e64, 0x0);
+	nv_wr32(dev, 0x419e68, 0x0);
+	nv_wr32(dev, 0x419e6c, 0x0);
+	nv_wr32(dev, 0x419e70, 0x0);
+	nv_wr32(dev, 0x419e74, 0x0);
+	nv_wr32(dev, 0x419e78, 0x0);
+	nv_wr32(dev, 0x419e7c, 0x0);
+	nv_wr32(dev, 0x419e80, 0x0);
+	nv_wr32(dev, 0x419e84, 0x0);
+	nv_wr32(dev, 0x419e88, 0x0);
+	nv_wr32(dev, 0x419e8c, 0x0);
+	nv_wr32(dev, 0x419e90, 0x0);
+	nv_wr32(dev, 0x419e94, 0x0);
+	nv_wr32(dev, 0x419e98, 0x0);
+	nv_wr32(dev, 0x419eac, 0x1fcf);
+	nv_wr32(dev, 0x419eb0, 0xd3f);
+	nv_wr32(dev, 0x419ec8, 0x1304f);
+	nv_wr32(dev, 0x419f30, 0x0);
+	nv_wr32(dev, 0x419f34, 0x0);
+	nv_wr32(dev, 0x419f38, 0x0);
+	nv_wr32(dev, 0x419f3c, 0x0);
+	nv_wr32(dev, 0x419f40, 0x0);
+	nv_wr32(dev, 0x419f44, 0x0);
+	nv_wr32(dev, 0x419f48, 0x0);
+	nv_wr32(dev, 0x419f4c, 0x0);
+	nv_wr32(dev, 0x419f58, 0x0);
+	nv_wr32(dev, 0x419f78, 0xb);
+}
+
+static void
+nve0_graph_generate_tpcunk(struct drm_device *dev)
+{
+	nv_wr32(dev, 0x41be24, 0x6);
+	nv_wr32(dev, 0x41bec0, 0x12180000);
+	nv_wr32(dev, 0x41bec4, 0x37f7f);
+	nv_wr32(dev, 0x41bee4, 0x6480430);
+	nv_wr32(dev, 0x41bf00, 0xa418820);
+	nv_wr32(dev, 0x41bf04, 0x62080e6);
+	nv_wr32(dev, 0x41bf08, 0x20398a4);
+	nv_wr32(dev, 0x41bf0c, 0xe629062);
+	nv_wr32(dev, 0x41bf10, 0xa418820);
+	nv_wr32(dev, 0x41bf14, 0xe6);
+	nv_wr32(dev, 0x41bfd0, 0x900103);
+	nv_wr32(dev, 0x41bfe0, 0x400001);
+	nv_wr32(dev, 0x41bfe4, 0x0);
+}
+
+int
+nve0_grctx_generate(struct nouveau_channel *chan)
+{
+	struct nve0_graph_priv *priv = nv_engine(chan->dev, NVOBJ_ENGINE_GR);
+	struct nve0_graph_chan *grch = chan->engctx[NVOBJ_ENGINE_GR];
+	struct drm_device *dev = chan->dev;
+	u32 data[6] = {}, data2[2] = {}, tmp;
+	u32 tpc_set = 0, tpc_mask = 0;
+	u8 tpcnr[GPC_MAX], a, b;
+	u8 shift, ntpcv;
+	int i, gpc, tpc, id;
+
+	nv_mask(dev, 0x000260, 0x00000001, 0x00000000);
+	nv_wr32(dev, 0x400204, 0x00000000);
+	nv_wr32(dev, 0x400208, 0x00000000);
+
+	nve0_graph_generate_unk40xx(dev);
+	nve0_graph_generate_unk44xx(dev);
+	nve0_graph_generate_unk46xx(dev);
+	nve0_graph_generate_unk47xx(dev);
+	nve0_graph_generate_unk58xx(dev);
+	nve0_graph_generate_unk60xx(dev);
+	nve0_graph_generate_unk64xx(dev);
+	nve0_graph_generate_unk70xx(dev);
+	nve0_graph_generate_unk78xx(dev);
+	nve0_graph_generate_unk80xx(dev);
+	nve0_graph_generate_unk88xx(dev);
+	nve0_graph_generate_gpc(dev);
+	nve0_graph_generate_tpc(dev);
+	nve0_graph_generate_tpcunk(dev);
+
+	nv_wr32(dev, 0x404154, 0x0);
+
+	for (i = 0; i < grch->mmio_nr * 8; i += 8) {
+		u32 reg = nv_ro32(grch->mmio, i + 0);
+		u32 val = nv_ro32(grch->mmio, i + 4);
+		nv_wr32(dev, reg, val);
+	}
+
+	nv_wr32(dev, 0x418c6c, 0x1);
+	nv_wr32(dev, 0x41980c, 0x10);
+	nv_wr32(dev, 0x41be08, 0x4);
+	nv_wr32(dev, 0x4064c0, 0x801a00f0);
+	nv_wr32(dev, 0x405800, 0xf8000bf);
+	nv_wr32(dev, 0x419c00, 0xa);
+
+	for (tpc = 0, id = 0; tpc < 4; tpc++) {
+		for (gpc = 0; gpc < priv->gpc_nr; gpc++) {
+			if (tpc < priv->tpc_nr[gpc]) {
+				nv_wr32(dev, TPC_UNIT(gpc, tpc, 0x0698), id);
+				nv_wr32(dev, TPC_UNIT(gpc, tpc, 0x04e8), id);
+				nv_wr32(dev, GPC_UNIT(gpc, 0x0c10 + tpc * 4), id);
+				nv_wr32(dev, TPC_UNIT(gpc, tpc, 0x0088), id++);
+			}
+
+			nv_wr32(dev, GPC_UNIT(gpc, 0x0c08), priv->tpc_nr[gpc]);
+			nv_wr32(dev, GPC_UNIT(gpc, 0x0c8c), priv->tpc_nr[gpc]);
+		}
+	}
+
+	tmp = 0;
+	for (i = 0; i < priv->gpc_nr; i++)
+		tmp |= priv->tpc_nr[i] << (i * 4);
+	nv_wr32(dev, 0x406028, tmp);
+	nv_wr32(dev, 0x405870, tmp);
+
+	nv_wr32(dev, 0x40602c, 0x0);
+	nv_wr32(dev, 0x405874, 0x0);
+	nv_wr32(dev, 0x406030, 0x0);
+	nv_wr32(dev, 0x405878, 0x0);
+	nv_wr32(dev, 0x406034, 0x0);
+	nv_wr32(dev, 0x40587c, 0x0);
+
+	/* calculate first set of magics */
+	memcpy(tpcnr, priv->tpc_nr, sizeof(priv->tpc_nr));
+
+	gpc = -1;
+	for (tpc = 0; tpc < priv->tpc_total; tpc++) {
+		do {
+			gpc = (gpc + 1) % priv->gpc_nr;
+		} while (!tpcnr[gpc]);
+		tpcnr[gpc]--;
+
+		data[tpc / 6] |= gpc << ((tpc % 6) * 5);
+	}
+
+	for (; tpc < 32; tpc++)
+		data[tpc / 6] |= 7 << ((tpc % 6) * 5);
+
+	/* and the second... */
+	shift = 0;
+	ntpcv = priv->tpc_total;
+	while (!(ntpcv & (1 << 4))) {
+		ntpcv <<= 1;
+		shift++;
+	}
+
+	data2[0]  = ntpcv << 16;
+	data2[0] |= shift << 21;
+	data2[0] |= (((1 << (0 + 5)) % ntpcv) << 24);
+	data2[0] |= priv->tpc_total << 8;
+	data2[0] |= priv->magic_not_rop_nr;
+	for (i = 1; i < 7; i++)
+		data2[1] |= ((1 << (i + 5)) % ntpcv) << ((i - 1) * 5);
+
+	/* and write it all the various parts of PGRAPH */
+	nv_wr32(dev, 0x418bb8, (priv->tpc_total << 8) | priv->magic_not_rop_nr);
+	for (i = 0; i < 6; i++)
+		nv_wr32(dev, 0x418b08 + (i * 4), data[i]);
+
+	nv_wr32(dev, 0x41bfd0, data2[0]);
+	nv_wr32(dev, 0x41bfe4, data2[1]);
+	for (i = 0; i < 6; i++)
+		nv_wr32(dev, 0x41bf00 + (i * 4), data[i]);
+
+	nv_wr32(dev, 0x4078bc, (priv->tpc_total << 8) | priv->magic_not_rop_nr);
+	for (i = 0; i < 6; i++)
+		nv_wr32(dev, 0x40780c + (i * 4), data[i]);
+
+
+	memcpy(tpcnr, priv->tpc_nr, sizeof(priv->tpc_nr));
+	for (gpc = 0; gpc < priv->gpc_nr; gpc++)
+		tpc_mask |= ((1 << priv->tpc_nr[gpc]) - 1) << (gpc * 8);
+
+	for (i = 0, gpc = -1, b = -1; i < 32; i++) {
+		a = (i * (priv->tpc_total - 1)) / 32;
+		if (a != b) {
+			b = a;
+			do {
+				gpc = (gpc + 1) % priv->gpc_nr;
+			} while (!tpcnr[gpc]);
+			tpc = priv->tpc_nr[gpc] - tpcnr[gpc]--;
+
+			tpc_set |= 1 << ((gpc * 8) + tpc);
+		}
+
+		nv_wr32(dev, 0x406800 + (i * 0x20), tpc_set);
+		nv_wr32(dev, 0x406c00 + (i * 0x20), tpc_set ^ tpc_mask);
+	}
+
+	for (i = 0; i < 8; i++)
+		nv_wr32(dev, 0x4064d0 + (i * 0x04), 0x00000000);
+
+	nv_wr32(dev, 0x405b00, 0x201);
+	nv_wr32(dev, 0x408850, 0x2);
+	nv_wr32(dev, 0x408958, 0x2);
+	nv_wr32(dev, 0x419f78, 0xa);
+
+	nve0_grctx_generate_icmd(dev);
+	nve0_grctx_generate_a097(dev);
+	nve0_grctx_generate_902d(dev);
+
+	nv_mask(dev, 0x000260, 0x00000001, 0x00000001);
+	nv_wr32(dev, 0x418800, 0x7026860a); //XXX
+	nv_wr32(dev, 0x41be10, 0x00bb8bc7); //XXX
+	return 0;
+}