Browse Source

Merge branch 'drm-nouveau-next' of git://anongit.freedesktop.org/git/nouveau/linux-2.6 into drm-next

- Various fixes that make surviving concurrent piglit more possible.
- Buffer object deletion no longer synchronous
- Context/register initialisation updates that have been reported to
solve some stability issues (particularly on some problematic GF119
chips)
- Kernel side support for VP2 video decoding engines

* 'drm-nouveau-next' of git://anongit.freedesktop.org/git/nouveau/linux-2.6: (44 commits)
  drm/nvd0-/disp: handle case where display engine is missing/disabled
  drm/gr/nvc0-: merge nvc0/nve0 ucode, and use cpp instead of m4
  drm/nouveau/bsp/nv84: initial vp2 engine implementation
  drm/nouveau/vp/nv84: initial vp2 engine implementation
  drm/nouveau/core: xtensa engine base class implementation
  drm/nouveau/vdec: fork vp3 implementations from vp2
  drm/nouveau/core: move falcon class to engine/
  drm/nouveau/kms: don't fail if there's no dcb table entries
  drm/nouveau: remove limit on gart
  drm/nouveau/vm: perform a bar flush when flushing vm
  drm/nvc0/gr: cleanup register lists, and add nvce/nvcf to switches
  drm/nvc8/gr: update initial register/context values
  drm/nvc4/gr: update initial register/context values
  drm/nvc1/gr: update initial register/context values
  drm/nvc3/gr: update initial register/context values
  drm/nvc0/gr: update initial register/context values
  drm/nvd9/gr: update initial register/context values
  drm/nve4/gr: update initial register/context values
  drm/nvc0-/gr: bump maximum gpc/tpc limits
  drm/nvf0/gr: initial register/context setup
  ...
Dave Airlie 12 years ago
parent
commit
f7d452f4fd
100 changed files with 5695 additions and 4940 deletions
  1. 17 1
      drivers/gpu/drm/nouveau/Makefile
  2. 0 1
      drivers/gpu/drm/nouveau/core/core/mm.c
  3. 13 14
      drivers/gpu/drm/nouveau/core/engine/bsp/nv84.c
  4. 93 0
      drivers/gpu/drm/nouveau/core/engine/bsp/nv98.c
  5. 1 2
      drivers/gpu/drm/nouveau/core/engine/bsp/nvc0.c
  6. 1 2
      drivers/gpu/drm/nouveau/core/engine/bsp/nve0.c
  7. 2 2
      drivers/gpu/drm/nouveau/core/engine/copy/fuc/nva3.fuc.h
  8. 2 2
      drivers/gpu/drm/nouveau/core/engine/copy/fuc/nvc0.fuc.h
  9. 7 14
      drivers/gpu/drm/nouveau/core/engine/copy/nva3.c
  10. 6 4
      drivers/gpu/drm/nouveau/core/engine/copy/nvc0.c
  11. 47 0
      drivers/gpu/drm/nouveau/core/engine/copy/nve0.c
  12. 2 2
      drivers/gpu/drm/nouveau/core/engine/crypt/fuc/nv98.fuc.h
  13. 0 8
      drivers/gpu/drm/nouveau/core/engine/crypt/nv84.c
  14. 1 9
      drivers/gpu/drm/nouveau/core/engine/crypt/nv98.c
  15. 18 18
      drivers/gpu/drm/nouveau/core/engine/device/nv50.c
  16. 8 8
      drivers/gpu/drm/nouveau/core/engine/device/nvc0.c
  17. 7 4
      drivers/gpu/drm/nouveau/core/engine/device/nve0.c
  18. 3 3
      drivers/gpu/drm/nouveau/core/engine/disp/nv50.c
  19. 8 6
      drivers/gpu/drm/nouveau/core/engine/disp/nvd0.c
  20. 3 0
      drivers/gpu/drm/nouveau/core/engine/disp/nve0.c
  21. 3 0
      drivers/gpu/drm/nouveau/core/engine/disp/nvf0.c
  22. 1 2
      drivers/gpu/drm/nouveau/core/engine/falcon.c
  23. 1 1
      drivers/gpu/drm/nouveau/core/engine/fifo/nv40.c
  24. 4 0
      drivers/gpu/drm/nouveau/core/engine/fifo/nv84.c
  25. 30 23
      drivers/gpu/drm/nouveau/core/engine/fifo/nve0.c
  26. 521 80
      drivers/gpu/drm/nouveau/core/engine/graph/ctxnvc0.c
  27. 266 135
      drivers/gpu/drm/nouveau/core/engine/graph/ctxnve0.c
  28. 26 47
      drivers/gpu/drm/nouveau/core/engine/graph/fuc/com.fuc
  29. 369 0
      drivers/gpu/drm/nouveau/core/engine/graph/fuc/gpc.fuc
  30. 27 417
      drivers/gpu/drm/nouveau/core/engine/graph/fuc/gpcnvc0.fuc
  31. 35 81
      drivers/gpu/drm/nouveau/core/engine/graph/fuc/gpcnvc0.fuc.h
  32. 74 353
      drivers/gpu/drm/nouveau/core/engine/graph/fuc/gpcnve0.fuc
  33. 68 12
      drivers/gpu/drm/nouveau/core/engine/graph/fuc/gpcnve0.fuc.h
  34. 755 0
      drivers/gpu/drm/nouveau/core/engine/graph/fuc/hub.fuc
  35. 20 770
      drivers/gpu/drm/nouveau/core/engine/graph/fuc/hubnvc0.fuc
  36. 540 480
      drivers/gpu/drm/nouveau/core/engine/graph/fuc/hubnvc0.fuc.h
  37. 71 699
      drivers/gpu/drm/nouveau/core/engine/graph/fuc/hubnve0.fuc
  38. 536 423
      drivers/gpu/drm/nouveau/core/engine/graph/fuc/hubnve0.fuc.h
  39. 53 0
      drivers/gpu/drm/nouveau/core/engine/graph/fuc/macros.fuc
  40. 0 400
      drivers/gpu/drm/nouveau/core/engine/graph/fuc/nve0.fuc
  41. 7 0
      drivers/gpu/drm/nouveau/core/engine/graph/fuc/os.h
  42. 6 12
      drivers/gpu/drm/nouveau/core/engine/graph/nv50.c
  43. 586 7
      drivers/gpu/drm/nouveau/core/engine/graph/nvc0.c
  44. 4 2
      drivers/gpu/drm/nouveau/core/engine/graph/nvc0.h
  45. 341 43
      drivers/gpu/drm/nouveau/core/engine/graph/nve0.c
  46. 0 8
      drivers/gpu/drm/nouveau/core/engine/mpeg/nv50.c
  47. 0 1
      drivers/gpu/drm/nouveau/core/engine/mpeg/nv84.c
  48. 1 2
      drivers/gpu/drm/nouveau/core/engine/ppp/nvc0.c
  49. 13 14
      drivers/gpu/drm/nouveau/core/engine/vp/nv84.c
  50. 93 0
      drivers/gpu/drm/nouveau/core/engine/vp/nv98.c
  51. 1 2
      drivers/gpu/drm/nouveau/core/engine/vp/nvc0.c
  52. 1 2
      drivers/gpu/drm/nouveau/core/engine/vp/nve0.c
  53. 170 0
      drivers/gpu/drm/nouveau/core/engine/xtensa.c
  54. 3 2
      drivers/gpu/drm/nouveau/core/include/core/device.h
  55. 0 2
      drivers/gpu/drm/nouveau/core/include/core/mm.h
  56. 1 0
      drivers/gpu/drm/nouveau/core/include/engine/bsp.h
  57. 1 0
      drivers/gpu/drm/nouveau/core/include/engine/copy.h
  58. 0 0
      drivers/gpu/drm/nouveau/core/include/engine/falcon.h
  59. 0 1
      drivers/gpu/drm/nouveau/core/include/engine/mpeg.h
  60. 1 0
      drivers/gpu/drm/nouveau/core/include/engine/vp.h
  61. 38 0
      drivers/gpu/drm/nouveau/core/include/engine/xtensa.h
  62. 0 2
      drivers/gpu/drm/nouveau/core/include/subdev/clock.h
  63. 15 6
      drivers/gpu/drm/nouveau/core/include/subdev/devinit.h
  64. 25 70
      drivers/gpu/drm/nouveau/core/include/subdev/fb.h
  65. 1 4
      drivers/gpu/drm/nouveau/core/include/subdev/vm.h
  66. 9 4
      drivers/gpu/drm/nouveau/core/subdev/bar/nv50.c
  67. 4 6
      drivers/gpu/drm/nouveau/core/subdev/bar/nvc0.c
  68. 5 1
      drivers/gpu/drm/nouveau/core/subdev/bios/base.c
  69. 3 4
      drivers/gpu/drm/nouveau/core/subdev/bios/init.c
  70. 7 267
      drivers/gpu/drm/nouveau/core/subdev/clock/nv04.c
  71. 0 1
      drivers/gpu/drm/nouveau/core/subdev/clock/nv40.c
  72. 0 45
      drivers/gpu/drm/nouveau/core/subdev/clock/nv50.c
  73. 1 36
      drivers/gpu/drm/nouveau/core/subdev/clock/nva3.c
  74. 0 36
      drivers/gpu/drm/nouveau/core/subdev/clock/nvc0.c
  75. 2 2
      drivers/gpu/drm/nouveau/core/subdev/clock/pll.h
  76. 8 9
      drivers/gpu/drm/nouveau/core/subdev/clock/pllnv04.c
  77. 13 5
      drivers/gpu/drm/nouveau/core/subdev/clock/pllnva3.c
  78. 13 10
      drivers/gpu/drm/nouveau/core/subdev/devinit/base.c
  79. 304 25
      drivers/gpu/drm/nouveau/core/subdev/devinit/nv04.c
  80. 2 1
      drivers/gpu/drm/nouveau/core/subdev/devinit/nv05.c
  81. 2 1
      drivers/gpu/drm/nouveau/core/subdev/devinit/nv10.c
  82. 2 2
      drivers/gpu/drm/nouveau/core/subdev/devinit/nv1a.c
  83. 2 3
      drivers/gpu/drm/nouveau/core/subdev/devinit/nv20.c
  84. 53 25
      drivers/gpu/drm/nouveau/core/subdev/devinit/nv50.c
  85. 87 0
      drivers/gpu/drm/nouveau/core/subdev/devinit/nva3.c
  86. 90 0
      drivers/gpu/drm/nouveau/core/subdev/devinit/nvc0.c
  87. 25 0
      drivers/gpu/drm/nouveau/core/subdev/devinit/priv.h
  88. 74 51
      drivers/gpu/drm/nouveau/core/subdev/fb/base.c
  89. 3 51
      drivers/gpu/drm/nouveau/core/subdev/fb/nv04.c
  90. 3 17
      drivers/gpu/drm/nouveau/core/subdev/fb/nv10.c
  91. 3 29
      drivers/gpu/drm/nouveau/core/subdev/fb/nv1a.c
  92. 4 22
      drivers/gpu/drm/nouveau/core/subdev/fb/nv20.c
  93. 4 5
      drivers/gpu/drm/nouveau/core/subdev/fb/nv25.c
  94. 4 5
      drivers/gpu/drm/nouveau/core/subdev/fb/nv30.c
  95. 4 5
      drivers/gpu/drm/nouveau/core/subdev/fb/nv35.c
  96. 4 5
      drivers/gpu/drm/nouveau/core/subdev/fb/nv36.c
  97. 4 21
      drivers/gpu/drm/nouveau/core/subdev/fb/nv40.c
  98. 3 20
      drivers/gpu/drm/nouveau/core/subdev/fb/nv41.c
  99. 3 19
      drivers/gpu/drm/nouveau/core/subdev/fb/nv44.c
  100. 3 4
      drivers/gpu/drm/nouveau/core/subdev/fb/nv46.c

+ 17 - 1
drivers/gpu/drm/nouveau/Makefile

@@ -12,7 +12,6 @@ nouveau-y += core/core/engctx.o
 nouveau-y += core/core/engine.o
 nouveau-y += core/core/enum.o
 nouveau-y += core/core/event.o
-nouveau-y += core/core/falcon.o
 nouveau-y += core/core/gpuobj.o
 nouveau-y += core/core/handle.o
 nouveau-y += core/core/mm.o
@@ -60,6 +59,8 @@ nouveau-y += core/subdev/devinit/nv10.o
 nouveau-y += core/subdev/devinit/nv1a.o
 nouveau-y += core/subdev/devinit/nv20.o
 nouveau-y += core/subdev/devinit/nv50.o
+nouveau-y += core/subdev/devinit/nva3.o
+nouveau-y += core/subdev/devinit/nvc0.o
 nouveau-y += core/subdev/fb/base.o
 nouveau-y += core/subdev/fb/nv04.o
 nouveau-y += core/subdev/fb/nv10.o
@@ -78,6 +79,17 @@ nouveau-y += core/subdev/fb/nv49.o
 nouveau-y += core/subdev/fb/nv4e.o
 nouveau-y += core/subdev/fb/nv50.o
 nouveau-y += core/subdev/fb/nvc0.o
+nouveau-y += core/subdev/fb/ramnv04.o
+nouveau-y += core/subdev/fb/ramnv10.o
+nouveau-y += core/subdev/fb/ramnv1a.o
+nouveau-y += core/subdev/fb/ramnv20.o
+nouveau-y += core/subdev/fb/ramnv40.o
+nouveau-y += core/subdev/fb/ramnv41.o
+nouveau-y += core/subdev/fb/ramnv44.o
+nouveau-y += core/subdev/fb/ramnv49.o
+nouveau-y += core/subdev/fb/ramnv4e.o
+nouveau-y += core/subdev/fb/ramnv50.o
+nouveau-y += core/subdev/fb/ramnvc0.o
 nouveau-y += core/subdev/gpio/base.o
 nouveau-y += core/subdev/gpio/nv10.o
 nouveau-y += core/subdev/gpio/nv50.o
@@ -129,12 +141,15 @@ nouveau-y += core/subdev/vm/nv44.o
 nouveau-y += core/subdev/vm/nv50.o
 nouveau-y += core/subdev/vm/nvc0.o
 
+nouveau-y += core/engine/falcon.o
+nouveau-y += core/engine/xtensa.o
 nouveau-y += core/engine/dmaobj/base.o
 nouveau-y += core/engine/dmaobj/nv04.o
 nouveau-y += core/engine/dmaobj/nv50.o
 nouveau-y += core/engine/dmaobj/nvc0.o
 nouveau-y += core/engine/dmaobj/nvd0.o
 nouveau-y += core/engine/bsp/nv84.o
+nouveau-y += core/engine/bsp/nv98.o
 nouveau-y += core/engine/bsp/nvc0.o
 nouveau-y += core/engine/bsp/nve0.o
 nouveau-y += core/engine/copy/nva3.o
@@ -209,6 +224,7 @@ nouveau-y += core/engine/software/nv10.o
 nouveau-y += core/engine/software/nv50.o
 nouveau-y += core/engine/software/nvc0.o
 nouveau-y += core/engine/vp/nv84.o
+nouveau-y += core/engine/vp/nv98.o
 nouveau-y += core/engine/vp/nvc0.o
 nouveau-y += core/engine/vp/nve0.o
 

+ 0 - 1
drivers/gpu/drm/nouveau/core/core/mm.c

@@ -208,7 +208,6 @@ nouveau_mm_init(struct nouveau_mm *mm, u32 offset, u32 length, u32 block)
 	struct nouveau_mm_node *node;
 
 	if (block) {
-		mutex_init(&mm->mutex);
 		INIT_LIST_HEAD(&mm->nodes);
 		INIT_LIST_HEAD(&mm->free);
 		mm->block_size = block;

+ 13 - 14
drivers/gpu/drm/nouveau/core/engine/bsp/nv84.c

@@ -19,24 +19,19 @@
  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
  * OTHER DEALINGS IN THE SOFTWARE.
  *
- * Authors: Ben Skeggs
+ * Authors: Ben Skeggs, Ilia Mirkin
  */
 
-#include <core/engctx.h>
-#include <core/class.h>
-
+#include <engine/xtensa.h>
 #include <engine/bsp.h>
 
-struct nv84_bsp_priv {
-	struct nouveau_engine base;
-};
-
 /*******************************************************************************
  * BSP object classes
  ******************************************************************************/
 
 static struct nouveau_oclass
 nv84_bsp_sclass[] = {
+	{ 0x74b0, &nouveau_object_ofuncs },
 	{},
 };
 
@@ -48,7 +43,7 @@ static struct nouveau_oclass
 nv84_bsp_cclass = {
 	.handle = NV_ENGCTX(BSP, 0x84),
 	.ofuncs = &(struct nouveau_ofuncs) {
-		.ctor = _nouveau_engctx_ctor,
+		.ctor = _nouveau_xtensa_engctx_ctor,
 		.dtor = _nouveau_engctx_dtor,
 		.init = _nouveau_engctx_init,
 		.fini = _nouveau_engctx_fini,
@@ -66,10 +61,10 @@ nv84_bsp_ctor(struct nouveau_object *parent, struct nouveau_object *engine,
 	      struct nouveau_oclass *oclass, void *data, u32 size,
 	      struct nouveau_object **pobject)
 {
-	struct nv84_bsp_priv *priv;
+	struct nouveau_xtensa *priv;
 	int ret;
 
-	ret = nouveau_engine_create(parent, engine, oclass, true,
+	ret = nouveau_xtensa_create(parent, engine, oclass, 0x103000, true,
 				    "PBSP", "bsp", &priv);
 	*pobject = nv_object(priv);
 	if (ret)
@@ -78,6 +73,8 @@ nv84_bsp_ctor(struct nouveau_object *parent, struct nouveau_object *engine,
 	nv_subdev(priv)->unit = 0x04008000;
 	nv_engine(priv)->cclass = &nv84_bsp_cclass;
 	nv_engine(priv)->sclass = nv84_bsp_sclass;
+	priv->fifo_val = 0x1111;
+	priv->unkd28 = 0x90044;
 	return 0;
 }
 
@@ -86,8 +83,10 @@ nv84_bsp_oclass = {
 	.handle = NV_ENGINE(BSP, 0x84),
 	.ofuncs = &(struct nouveau_ofuncs) {
 		.ctor = nv84_bsp_ctor,
-		.dtor = _nouveau_engine_dtor,
-		.init = _nouveau_engine_init,
-		.fini = _nouveau_engine_fini,
+		.dtor = _nouveau_xtensa_dtor,
+		.init = _nouveau_xtensa_init,
+		.fini = _nouveau_xtensa_fini,
+		.rd32 = _nouveau_xtensa_rd32,
+		.wr32 = _nouveau_xtensa_wr32,
 	},
 };

+ 93 - 0
drivers/gpu/drm/nouveau/core/engine/bsp/nv98.c

@@ -0,0 +1,93 @@
+/*
+ * Copyright 2012 Red Hat Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: Ben Skeggs
+ */
+
+#include <core/engctx.h>
+#include <core/class.h>
+
+#include <engine/bsp.h>
+
+struct nv98_bsp_priv {
+	struct nouveau_engine base;
+};
+
+/*******************************************************************************
+ * BSP object classes
+ ******************************************************************************/
+
+static struct nouveau_oclass
+nv98_bsp_sclass[] = {
+	{},
+};
+
+/*******************************************************************************
+ * BSP context
+ ******************************************************************************/
+
+static struct nouveau_oclass
+nv98_bsp_cclass = {
+	.handle = NV_ENGCTX(BSP, 0x98),
+	.ofuncs = &(struct nouveau_ofuncs) {
+		.ctor = _nouveau_engctx_ctor,
+		.dtor = _nouveau_engctx_dtor,
+		.init = _nouveau_engctx_init,
+		.fini = _nouveau_engctx_fini,
+		.rd32 = _nouveau_engctx_rd32,
+		.wr32 = _nouveau_engctx_wr32,
+	},
+};
+
+/*******************************************************************************
+ * BSP engine/subdev functions
+ ******************************************************************************/
+
+static int
+nv98_bsp_ctor(struct nouveau_object *parent, struct nouveau_object *engine,
+	      struct nouveau_oclass *oclass, void *data, u32 size,
+	      struct nouveau_object **pobject)
+{
+	struct nv98_bsp_priv *priv;
+	int ret;
+
+	ret = nouveau_engine_create(parent, engine, oclass, true,
+				    "PBSP", "bsp", &priv);
+	*pobject = nv_object(priv);
+	if (ret)
+		return ret;
+
+	nv_subdev(priv)->unit = 0x04008000;
+	nv_engine(priv)->cclass = &nv98_bsp_cclass;
+	nv_engine(priv)->sclass = nv98_bsp_sclass;
+	return 0;
+}
+
+struct nouveau_oclass
+nv98_bsp_oclass = {
+	.handle = NV_ENGINE(BSP, 0x98),
+	.ofuncs = &(struct nouveau_ofuncs) {
+		.ctor = nv98_bsp_ctor,
+		.dtor = _nouveau_engine_dtor,
+		.init = _nouveau_engine_init,
+		.fini = _nouveau_engine_fini,
+	},
+};

+ 1 - 2
drivers/gpu/drm/nouveau/core/engine/bsp/nvc0.c

@@ -22,8 +22,7 @@
  * Authors: Maarten Lankhorst
  */
 
-#include <core/falcon.h>
-
+#include <engine/falcon.h>
 #include <engine/bsp.h>
 
 struct nvc0_bsp_priv {

+ 1 - 2
drivers/gpu/drm/nouveau/core/engine/bsp/nve0.c

@@ -22,8 +22,7 @@
  * Authors: Ben Skeggs
  */
 
-#include <core/falcon.h>
-
+#include <engine/falcon.h>
 #include <engine/bsp.h>
 
 struct nve0_bsp_priv {

+ 2 - 2
drivers/gpu/drm/nouveau/core/engine/copy/fuc/nva3.fuc.h

@@ -1,4 +1,4 @@
-static u32 nva3_pcopy_data[] = {
+uint32_t nva3_pcopy_data[] = {
 /* 0x0000: ctx_object */
 	0x00000000,
 /* 0x0004: ctx_dma */
@@ -183,7 +183,7 @@ static u32 nva3_pcopy_data[] = {
 	0x00000800,
 };
 
-static u32 nva3_pcopy_code[] = {
+uint32_t nva3_pcopy_code[] = {
 /* 0x0000: main */
 	0x04fe04bd,
 	0x3517f000,

+ 2 - 2
drivers/gpu/drm/nouveau/core/engine/copy/fuc/nvc0.fuc.h

@@ -1,4 +1,4 @@
-static u32 nvc0_pcopy_data[] = {
+uint32_t nvc0_pcopy_data[] = {
 /* 0x0000: ctx_object */
 	0x00000000,
 /* 0x0004: ctx_query_address_high */
@@ -171,7 +171,7 @@ static u32 nvc0_pcopy_data[] = {
 	0x00000800,
 };
 
-static u32 nvc0_pcopy_code[] = {
+uint32_t nvc0_pcopy_code[] = {
 /* 0x0000: main */
 	0x04fe04bd,
 	0x3517f000,

+ 7 - 14
drivers/gpu/drm/nouveau/core/engine/copy/nva3.c

@@ -22,16 +22,17 @@
  * Authors: Ben Skeggs
  */
 
-#include <core/client.h>
-#include <core/falcon.h>
-#include <core/class.h>
-#include <core/enum.h>
+#include <engine/falcon.h>
+#include <engine/fifo.h>
+#include <engine/copy.h>
 
 #include <subdev/fb.h>
 #include <subdev/vm.h>
 
-#include <engine/fifo.h>
-#include <engine/copy.h>
+#include <core/client.h>
+#include <core/class.h>
+#include <core/enum.h>
+
 
 #include "fuc/nva3.fuc.h"
 
@@ -116,13 +117,6 @@ nva3_copy_intr(struct nouveau_subdev *subdev)
 	nouveau_engctx_put(engctx);
 }
 
-static int
-nva3_copy_tlb_flush(struct nouveau_engine *engine)
-{
-	nv50_vm_flush_engine(&engine->base, 0x0d);
-	return 0;
-}
-
 static int
 nva3_copy_ctor(struct nouveau_object *parent, struct nouveau_object *engine,
 	       struct nouveau_oclass *oclass, void *data, u32 size,
@@ -142,7 +136,6 @@ nva3_copy_ctor(struct nouveau_object *parent, struct nouveau_object *engine,
 	nv_subdev(priv)->intr = nva3_copy_intr;
 	nv_engine(priv)->cclass = &nva3_copy_cclass;
 	nv_engine(priv)->sclass = nva3_copy_sclass;
-	nv_engine(priv)->tlb_flush = nva3_copy_tlb_flush;
 	nv_falcon(priv)->code.data = nva3_pcopy_code;
 	nv_falcon(priv)->code.size = sizeof(nva3_pcopy_code);
 	nv_falcon(priv)->data.data = nva3_pcopy_data;

+ 6 - 4
drivers/gpu/drm/nouveau/core/engine/copy/nvc0.c

@@ -22,13 +22,15 @@
  * Authors: Ben Skeggs
  */
 
-#include <core/falcon.h>
-#include <core/class.h>
-#include <core/enum.h>
-
+#include <engine/falcon.h>
 #include <engine/fifo.h>
 #include <engine/copy.h>
 
+#include <core/class.h>
+#include <core/enum.h>
+#include <core/class.h>
+#include <core/enum.h>
+
 #include "fuc/nvc0.fuc.h"
 
 struct nvc0_copy_priv {

+ 47 - 0
drivers/gpu/drm/nouveau/core/engine/copy/nve0.c

@@ -67,6 +67,19 @@ nve0_copy_cclass = {
  * PCOPY engine/subdev functions
  ******************************************************************************/
 
+static void
+nve0_copy_intr(struct nouveau_subdev *subdev)
+{
+	const int ce = nv_subidx(nv_object(subdev)) - NVDEV_ENGINE_COPY0;
+	struct nve0_copy_priv *priv = (void *)subdev;
+	u32 stat = nv_rd32(priv, 0x104908 + (ce * 0x1000));
+
+	if (stat) {
+		nv_warn(priv, "unhandled intr 0x%08x\n", stat);
+		nv_wr32(priv, 0x104908 + (ce * 0x1000), stat);
+	}
+}
+
 static int
 nve0_copy0_ctor(struct nouveau_object *parent, struct nouveau_object *engine,
 		struct nouveau_oclass *oclass, void *data, u32 size,
@@ -85,6 +98,7 @@ nve0_copy0_ctor(struct nouveau_object *parent, struct nouveau_object *engine,
 		return ret;
 
 	nv_subdev(priv)->unit = 0x00000040;
+	nv_subdev(priv)->intr = nve0_copy_intr;
 	nv_engine(priv)->cclass = &nve0_copy_cclass;
 	nv_engine(priv)->sclass = nve0_copy_sclass;
 	return 0;
@@ -108,6 +122,28 @@ nve0_copy1_ctor(struct nouveau_object *parent, struct nouveau_object *engine,
 		return ret;
 
 	nv_subdev(priv)->unit = 0x00000080;
+	nv_subdev(priv)->intr = nve0_copy_intr;
+	nv_engine(priv)->cclass = &nve0_copy_cclass;
+	nv_engine(priv)->sclass = nve0_copy_sclass;
+	return 0;
+}
+
+static int
+nve0_copy2_ctor(struct nouveau_object *parent, struct nouveau_object *engine,
+		struct nouveau_oclass *oclass, void *data, u32 size,
+		struct nouveau_object **pobject)
+{
+	struct nve0_copy_priv *priv;
+	int ret;
+
+	ret = nouveau_engine_create(parent, engine, oclass, true,
+				    "PCE2", "copy2", &priv);
+	*pobject = nv_object(priv);
+	if (ret)
+		return ret;
+
+	nv_subdev(priv)->unit = 0x00200000;
+	nv_subdev(priv)->intr = nve0_copy_intr;
 	nv_engine(priv)->cclass = &nve0_copy_cclass;
 	nv_engine(priv)->sclass = nve0_copy_sclass;
 	return 0;
@@ -134,3 +170,14 @@ nve0_copy1_oclass = {
 		.fini = _nouveau_engine_fini,
 	},
 };
+
+struct nouveau_oclass
+nve0_copy2_oclass = {
+	.handle = NV_ENGINE(COPY2, 0xe0),
+	.ofuncs = &(struct nouveau_ofuncs) {
+		.ctor = nve0_copy2_ctor,
+		.dtor = _nouveau_engine_dtor,
+		.init = _nouveau_engine_init,
+		.fini = _nouveau_engine_fini,
+	},
+};

+ 2 - 2
drivers/gpu/drm/nouveau/core/engine/crypt/fuc/nv98.fuc.h

@@ -1,4 +1,4 @@
-static uint32_t nv98_pcrypt_data[] = {
+uint32_t nv98_pcrypt_data[] = {
 /* 0x0000: ctx_dma */
 /* 0x0000: ctx_dma_query */
 	0x00000000,
@@ -150,7 +150,7 @@ static uint32_t nv98_pcrypt_data[] = {
 	0x00000000,
 };
 
-static uint32_t nv98_pcrypt_code[] = {
+uint32_t nv98_pcrypt_code[] = {
 	0x17f004bd,
 	0x0010fe35,
 	0xf10004fe,

+ 0 - 8
drivers/gpu/drm/nouveau/core/engine/crypt/nv84.c

@@ -140,13 +140,6 @@ nv84_crypt_intr(struct nouveau_subdev *subdev)
 	nouveau_engctx_put(engctx);
 }
 
-static int
-nv84_crypt_tlb_flush(struct nouveau_engine *engine)
-{
-	nv50_vm_flush_engine(&engine->base, 0x0a);
-	return 0;
-}
-
 static int
 nv84_crypt_ctor(struct nouveau_object *parent, struct nouveau_object *engine,
 	       struct nouveau_oclass *oclass, void *data, u32 size,
@@ -165,7 +158,6 @@ nv84_crypt_ctor(struct nouveau_object *parent, struct nouveau_object *engine,
 	nv_subdev(priv)->intr = nv84_crypt_intr;
 	nv_engine(priv)->cclass = &nv84_crypt_cclass;
 	nv_engine(priv)->sclass = nv84_crypt_sclass;
-	nv_engine(priv)->tlb_flush = nv84_crypt_tlb_flush;
 	return 0;
 }
 

+ 1 - 9
drivers/gpu/drm/nouveau/core/engine/crypt/nv98.c

@@ -27,11 +27,11 @@
 #include <core/enum.h>
 #include <core/class.h>
 #include <core/engctx.h>
-#include <core/falcon.h>
 
 #include <subdev/timer.h>
 #include <subdev/fb.h>
 
+#include <engine/falcon.h>
 #include <engine/fifo.h>
 #include <engine/crypt.h>
 
@@ -118,13 +118,6 @@ nv98_crypt_intr(struct nouveau_subdev *subdev)
 	nouveau_engctx_put(engctx);
 }
 
-static int
-nv98_crypt_tlb_flush(struct nouveau_engine *engine)
-{
-	nv50_vm_flush_engine(&engine->base, 0x0a);
-	return 0;
-}
-
 static int
 nv98_crypt_ctor(struct nouveau_object *parent, struct nouveau_object *engine,
 	       struct nouveau_oclass *oclass, void *data, u32 size,
@@ -143,7 +136,6 @@ nv98_crypt_ctor(struct nouveau_object *parent, struct nouveau_object *engine,
 	nv_subdev(priv)->intr = nv98_crypt_intr;
 	nv_engine(priv)->cclass = &nv98_crypt_cclass;
 	nv_engine(priv)->sclass = nv98_crypt_sclass;
-	nv_engine(priv)->tlb_flush = nv98_crypt_tlb_flush;
 	nv_falcon(priv)->code.data = nv98_pcrypt_code;
 	nv_falcon(priv)->code.size = sizeof(nv98_pcrypt_code);
 	nv_falcon(priv)->data.data = nv98_pcrypt_data;

+ 18 - 18
drivers/gpu/drm/nouveau/core/engine/device/nv50.c

@@ -227,9 +227,9 @@ nv50_identify(struct nouveau_device *device)
 		device->oclass[NVDEV_ENGINE_FIFO   ] = &nv84_fifo_oclass;
 		device->oclass[NVDEV_ENGINE_SW     ] = &nv50_software_oclass;
 		device->oclass[NVDEV_ENGINE_GR     ] = &nv50_graph_oclass;
-		device->oclass[NVDEV_ENGINE_VP     ] = &nv84_vp_oclass;
+		device->oclass[NVDEV_ENGINE_VP     ] = &nv98_vp_oclass;
 		device->oclass[NVDEV_ENGINE_CRYPT  ] = &nv98_crypt_oclass;
-		device->oclass[NVDEV_ENGINE_BSP    ] = &nv84_bsp_oclass;
+		device->oclass[NVDEV_ENGINE_BSP    ] = &nv98_bsp_oclass;
 		device->oclass[NVDEV_ENGINE_PPP    ] = &nv98_ppp_oclass;
 		device->oclass[NVDEV_ENGINE_DISP   ] = &nv94_disp_oclass;
 		break;
@@ -279,9 +279,9 @@ nv50_identify(struct nouveau_device *device)
 		device->oclass[NVDEV_ENGINE_FIFO   ] = &nv84_fifo_oclass;
 		device->oclass[NVDEV_ENGINE_SW     ] = &nv50_software_oclass;
 		device->oclass[NVDEV_ENGINE_GR     ] = &nv50_graph_oclass;
-		device->oclass[NVDEV_ENGINE_VP     ] = &nv84_vp_oclass;
+		device->oclass[NVDEV_ENGINE_VP     ] = &nv98_vp_oclass;
 		device->oclass[NVDEV_ENGINE_CRYPT  ] = &nv98_crypt_oclass;
-		device->oclass[NVDEV_ENGINE_BSP    ] = &nv84_bsp_oclass;
+		device->oclass[NVDEV_ENGINE_BSP    ] = &nv98_bsp_oclass;
 		device->oclass[NVDEV_ENGINE_PPP    ] = &nv98_ppp_oclass;
 		device->oclass[NVDEV_ENGINE_DISP   ] = &nv94_disp_oclass;
 		break;
@@ -305,9 +305,9 @@ nv50_identify(struct nouveau_device *device)
 		device->oclass[NVDEV_ENGINE_FIFO   ] = &nv84_fifo_oclass;
 		device->oclass[NVDEV_ENGINE_SW     ] = &nv50_software_oclass;
 		device->oclass[NVDEV_ENGINE_GR     ] = &nv50_graph_oclass;
-		device->oclass[NVDEV_ENGINE_VP     ] = &nv84_vp_oclass;
+		device->oclass[NVDEV_ENGINE_VP     ] = &nv98_vp_oclass;
 		device->oclass[NVDEV_ENGINE_CRYPT  ] = &nv98_crypt_oclass;
-		device->oclass[NVDEV_ENGINE_BSP    ] = &nv84_bsp_oclass;
+		device->oclass[NVDEV_ENGINE_BSP    ] = &nv98_bsp_oclass;
 		device->oclass[NVDEV_ENGINE_PPP    ] = &nv98_ppp_oclass;
 		device->oclass[NVDEV_ENGINE_DISP   ] = &nv94_disp_oclass;
 		break;
@@ -319,7 +319,7 @@ nv50_identify(struct nouveau_device *device)
 		device->oclass[NVDEV_SUBDEV_CLOCK  ] = &nva3_clock_oclass;
 		device->oclass[NVDEV_SUBDEV_THERM  ] = &nva3_therm_oclass;
 		device->oclass[NVDEV_SUBDEV_MXM    ] = &nv50_mxm_oclass;
-		device->oclass[NVDEV_SUBDEV_DEVINIT] = &nv50_devinit_oclass;
+		device->oclass[NVDEV_SUBDEV_DEVINIT] = &nva3_devinit_oclass;
 		device->oclass[NVDEV_SUBDEV_MC     ] = &nv98_mc_oclass;
 		device->oclass[NVDEV_SUBDEV_BUS    ] = &nv50_bus_oclass;
 		device->oclass[NVDEV_SUBDEV_TIMER  ] = &nv04_timer_oclass;
@@ -332,8 +332,8 @@ nv50_identify(struct nouveau_device *device)
 		device->oclass[NVDEV_ENGINE_SW     ] = &nv50_software_oclass;
 		device->oclass[NVDEV_ENGINE_GR     ] = &nv50_graph_oclass;
 		device->oclass[NVDEV_ENGINE_MPEG   ] = &nv84_mpeg_oclass;
-		device->oclass[NVDEV_ENGINE_VP     ] = &nv84_vp_oclass;
-		device->oclass[NVDEV_ENGINE_BSP    ] = &nv84_bsp_oclass;
+		device->oclass[NVDEV_ENGINE_VP     ] = &nv98_vp_oclass;
+		device->oclass[NVDEV_ENGINE_BSP    ] = &nv98_bsp_oclass;
 		device->oclass[NVDEV_ENGINE_PPP    ] = &nv98_ppp_oclass;
 		device->oclass[NVDEV_ENGINE_COPY0  ] = &nva3_copy_oclass;
 		device->oclass[NVDEV_ENGINE_DISP   ] = &nva3_disp_oclass;
@@ -346,7 +346,7 @@ nv50_identify(struct nouveau_device *device)
 		device->oclass[NVDEV_SUBDEV_CLOCK  ] = &nva3_clock_oclass;
 		device->oclass[NVDEV_SUBDEV_THERM  ] = &nva3_therm_oclass;
 		device->oclass[NVDEV_SUBDEV_MXM    ] = &nv50_mxm_oclass;
-		device->oclass[NVDEV_SUBDEV_DEVINIT] = &nv50_devinit_oclass;
+		device->oclass[NVDEV_SUBDEV_DEVINIT] = &nva3_devinit_oclass;
 		device->oclass[NVDEV_SUBDEV_MC     ] = &nv98_mc_oclass;
 		device->oclass[NVDEV_SUBDEV_BUS    ] = &nv50_bus_oclass;
 		device->oclass[NVDEV_SUBDEV_TIMER  ] = &nv04_timer_oclass;
@@ -358,8 +358,8 @@ nv50_identify(struct nouveau_device *device)
 		device->oclass[NVDEV_ENGINE_FIFO   ] = &nv84_fifo_oclass;
 		device->oclass[NVDEV_ENGINE_SW     ] = &nv50_software_oclass;
 		device->oclass[NVDEV_ENGINE_GR     ] = &nv50_graph_oclass;
-		device->oclass[NVDEV_ENGINE_VP     ] = &nv84_vp_oclass;
-		device->oclass[NVDEV_ENGINE_BSP    ] = &nv84_bsp_oclass;
+		device->oclass[NVDEV_ENGINE_VP     ] = &nv98_vp_oclass;
+		device->oclass[NVDEV_ENGINE_BSP    ] = &nv98_bsp_oclass;
 		device->oclass[NVDEV_ENGINE_PPP    ] = &nv98_ppp_oclass;
 		device->oclass[NVDEV_ENGINE_COPY0  ] = &nva3_copy_oclass;
 		device->oclass[NVDEV_ENGINE_DISP   ] = &nva3_disp_oclass;
@@ -372,7 +372,7 @@ nv50_identify(struct nouveau_device *device)
 		device->oclass[NVDEV_SUBDEV_CLOCK  ] = &nva3_clock_oclass;
 		device->oclass[NVDEV_SUBDEV_THERM  ] = &nva3_therm_oclass;
 		device->oclass[NVDEV_SUBDEV_MXM    ] = &nv50_mxm_oclass;
-		device->oclass[NVDEV_SUBDEV_DEVINIT] = &nv50_devinit_oclass;
+		device->oclass[NVDEV_SUBDEV_DEVINIT] = &nva3_devinit_oclass;
 		device->oclass[NVDEV_SUBDEV_MC     ] = &nv98_mc_oclass;
 		device->oclass[NVDEV_SUBDEV_BUS    ] = &nv50_bus_oclass;
 		device->oclass[NVDEV_SUBDEV_TIMER  ] = &nv04_timer_oclass;
@@ -384,8 +384,8 @@ nv50_identify(struct nouveau_device *device)
 		device->oclass[NVDEV_ENGINE_FIFO   ] = &nv84_fifo_oclass;
 		device->oclass[NVDEV_ENGINE_SW     ] = &nv50_software_oclass;
 		device->oclass[NVDEV_ENGINE_GR     ] = &nv50_graph_oclass;
-		device->oclass[NVDEV_ENGINE_VP     ] = &nv84_vp_oclass;
-		device->oclass[NVDEV_ENGINE_BSP    ] = &nv84_bsp_oclass;
+		device->oclass[NVDEV_ENGINE_VP     ] = &nv98_vp_oclass;
+		device->oclass[NVDEV_ENGINE_BSP    ] = &nv98_bsp_oclass;
 		device->oclass[NVDEV_ENGINE_PPP    ] = &nv98_ppp_oclass;
 		device->oclass[NVDEV_ENGINE_COPY0  ] = &nva3_copy_oclass;
 		device->oclass[NVDEV_ENGINE_DISP   ] = &nva3_disp_oclass;
@@ -398,7 +398,7 @@ nv50_identify(struct nouveau_device *device)
 		device->oclass[NVDEV_SUBDEV_CLOCK  ] = &nva3_clock_oclass;
 		device->oclass[NVDEV_SUBDEV_THERM  ] = &nva3_therm_oclass;
 		device->oclass[NVDEV_SUBDEV_MXM    ] = &nv50_mxm_oclass;
-		device->oclass[NVDEV_SUBDEV_DEVINIT] = &nv50_devinit_oclass;
+		device->oclass[NVDEV_SUBDEV_DEVINIT] = &nva3_devinit_oclass;
 		device->oclass[NVDEV_SUBDEV_MC     ] = &nv98_mc_oclass;
 		device->oclass[NVDEV_SUBDEV_BUS    ] = &nv50_bus_oclass;
 		device->oclass[NVDEV_SUBDEV_TIMER  ] = &nv04_timer_oclass;
@@ -410,8 +410,8 @@ nv50_identify(struct nouveau_device *device)
 		device->oclass[NVDEV_ENGINE_FIFO   ] = &nv84_fifo_oclass;
 		device->oclass[NVDEV_ENGINE_SW     ] = &nv50_software_oclass;
 		device->oclass[NVDEV_ENGINE_GR     ] = &nv50_graph_oclass;
-		device->oclass[NVDEV_ENGINE_VP     ] = &nv84_vp_oclass;
-		device->oclass[NVDEV_ENGINE_BSP    ] = &nv84_bsp_oclass;
+		device->oclass[NVDEV_ENGINE_VP     ] = &nv98_vp_oclass;
+		device->oclass[NVDEV_ENGINE_BSP    ] = &nv98_bsp_oclass;
 		device->oclass[NVDEV_ENGINE_PPP    ] = &nv98_ppp_oclass;
 		device->oclass[NVDEV_ENGINE_COPY0  ] = &nva3_copy_oclass;
 		device->oclass[NVDEV_ENGINE_DISP   ] = &nva3_disp_oclass;

+ 8 - 8
drivers/gpu/drm/nouveau/core/engine/device/nvc0.c

@@ -62,7 +62,7 @@ nvc0_identify(struct nouveau_device *device)
 		device->oclass[NVDEV_SUBDEV_CLOCK  ] = &nvc0_clock_oclass;
 		device->oclass[NVDEV_SUBDEV_THERM  ] = &nva3_therm_oclass;
 		device->oclass[NVDEV_SUBDEV_MXM    ] = &nv50_mxm_oclass;
-		device->oclass[NVDEV_SUBDEV_DEVINIT] = &nv50_devinit_oclass;
+		device->oclass[NVDEV_SUBDEV_DEVINIT] = &nvc0_devinit_oclass;
 		device->oclass[NVDEV_SUBDEV_MC     ] = &nvc0_mc_oclass;
 		device->oclass[NVDEV_SUBDEV_BUS    ] = &nvc0_bus_oclass;
 		device->oclass[NVDEV_SUBDEV_TIMER  ] = &nv04_timer_oclass;
@@ -91,7 +91,7 @@ nvc0_identify(struct nouveau_device *device)
 		device->oclass[NVDEV_SUBDEV_CLOCK  ] = &nvc0_clock_oclass;
 		device->oclass[NVDEV_SUBDEV_THERM  ] = &nva3_therm_oclass;
 		device->oclass[NVDEV_SUBDEV_MXM    ] = &nv50_mxm_oclass;
-		device->oclass[NVDEV_SUBDEV_DEVINIT] = &nv50_devinit_oclass;
+		device->oclass[NVDEV_SUBDEV_DEVINIT] = &nvc0_devinit_oclass;
 		device->oclass[NVDEV_SUBDEV_MC     ] = &nvc0_mc_oclass;
 		device->oclass[NVDEV_SUBDEV_BUS    ] = &nvc0_bus_oclass;
 		device->oclass[NVDEV_SUBDEV_TIMER  ] = &nv04_timer_oclass;
@@ -120,7 +120,7 @@ nvc0_identify(struct nouveau_device *device)
 		device->oclass[NVDEV_SUBDEV_CLOCK  ] = &nvc0_clock_oclass;
 		device->oclass[NVDEV_SUBDEV_THERM  ] = &nva3_therm_oclass;
 		device->oclass[NVDEV_SUBDEV_MXM    ] = &nv50_mxm_oclass;
-		device->oclass[NVDEV_SUBDEV_DEVINIT] = &nv50_devinit_oclass;
+		device->oclass[NVDEV_SUBDEV_DEVINIT] = &nvc0_devinit_oclass;
 		device->oclass[NVDEV_SUBDEV_MC     ] = &nvc0_mc_oclass;
 		device->oclass[NVDEV_SUBDEV_BUS    ] = &nvc0_bus_oclass;
 		device->oclass[NVDEV_SUBDEV_TIMER  ] = &nv04_timer_oclass;
@@ -148,7 +148,7 @@ nvc0_identify(struct nouveau_device *device)
 		device->oclass[NVDEV_SUBDEV_CLOCK  ] = &nvc0_clock_oclass;
 		device->oclass[NVDEV_SUBDEV_THERM  ] = &nva3_therm_oclass;
 		device->oclass[NVDEV_SUBDEV_MXM    ] = &nv50_mxm_oclass;
-		device->oclass[NVDEV_SUBDEV_DEVINIT] = &nv50_devinit_oclass;
+		device->oclass[NVDEV_SUBDEV_DEVINIT] = &nvc0_devinit_oclass;
 		device->oclass[NVDEV_SUBDEV_MC     ] = &nvc0_mc_oclass;
 		device->oclass[NVDEV_SUBDEV_BUS    ] = &nvc0_bus_oclass;
 		device->oclass[NVDEV_SUBDEV_TIMER  ] = &nv04_timer_oclass;
@@ -177,7 +177,7 @@ nvc0_identify(struct nouveau_device *device)
 		device->oclass[NVDEV_SUBDEV_CLOCK  ] = &nvc0_clock_oclass;
 		device->oclass[NVDEV_SUBDEV_THERM  ] = &nva3_therm_oclass;
 		device->oclass[NVDEV_SUBDEV_MXM    ] = &nv50_mxm_oclass;
-		device->oclass[NVDEV_SUBDEV_DEVINIT] = &nv50_devinit_oclass;
+		device->oclass[NVDEV_SUBDEV_DEVINIT] = &nvc0_devinit_oclass;
 		device->oclass[NVDEV_SUBDEV_MC     ] = &nvc0_mc_oclass;
 		device->oclass[NVDEV_SUBDEV_BUS    ] = &nvc0_bus_oclass;
 		device->oclass[NVDEV_SUBDEV_TIMER  ] = &nv04_timer_oclass;
@@ -206,7 +206,7 @@ nvc0_identify(struct nouveau_device *device)
 		device->oclass[NVDEV_SUBDEV_CLOCK  ] = &nvc0_clock_oclass;
 		device->oclass[NVDEV_SUBDEV_THERM  ] = &nva3_therm_oclass;
 		device->oclass[NVDEV_SUBDEV_MXM    ] = &nv50_mxm_oclass;
-		device->oclass[NVDEV_SUBDEV_DEVINIT] = &nv50_devinit_oclass;
+		device->oclass[NVDEV_SUBDEV_DEVINIT] = &nvc0_devinit_oclass;
 		device->oclass[NVDEV_SUBDEV_MC     ] = &nvc0_mc_oclass;
 		device->oclass[NVDEV_SUBDEV_BUS    ] = &nvc0_bus_oclass;
 		device->oclass[NVDEV_SUBDEV_TIMER  ] = &nv04_timer_oclass;
@@ -234,7 +234,7 @@ nvc0_identify(struct nouveau_device *device)
 		device->oclass[NVDEV_SUBDEV_CLOCK  ] = &nvc0_clock_oclass;
 		device->oclass[NVDEV_SUBDEV_THERM  ] = &nva3_therm_oclass;
 		device->oclass[NVDEV_SUBDEV_MXM    ] = &nv50_mxm_oclass;
-		device->oclass[NVDEV_SUBDEV_DEVINIT] = &nv50_devinit_oclass;
+		device->oclass[NVDEV_SUBDEV_DEVINIT] = &nvc0_devinit_oclass;
 		device->oclass[NVDEV_SUBDEV_MC     ] = &nvc0_mc_oclass;
 		device->oclass[NVDEV_SUBDEV_BUS    ] = &nvc0_bus_oclass;
 		device->oclass[NVDEV_SUBDEV_TIMER  ] = &nv04_timer_oclass;
@@ -263,7 +263,7 @@ nvc0_identify(struct nouveau_device *device)
 		device->oclass[NVDEV_SUBDEV_CLOCK  ] = &nvc0_clock_oclass;
 		device->oclass[NVDEV_SUBDEV_THERM  ] = &nvd0_therm_oclass;
 		device->oclass[NVDEV_SUBDEV_MXM    ] = &nv50_mxm_oclass;
-		device->oclass[NVDEV_SUBDEV_DEVINIT] = &nv50_devinit_oclass;
+		device->oclass[NVDEV_SUBDEV_DEVINIT] = &nvc0_devinit_oclass;
 		device->oclass[NVDEV_SUBDEV_MC     ] = &nvc0_mc_oclass;
 		device->oclass[NVDEV_SUBDEV_BUS    ] = &nvc0_bus_oclass;
 		device->oclass[NVDEV_SUBDEV_TIMER  ] = &nv04_timer_oclass;

+ 7 - 4
drivers/gpu/drm/nouveau/core/engine/device/nve0.c

@@ -62,7 +62,7 @@ nve0_identify(struct nouveau_device *device)
 		device->oclass[NVDEV_SUBDEV_CLOCK  ] = &nvc0_clock_oclass;
 		device->oclass[NVDEV_SUBDEV_THERM  ] = &nvd0_therm_oclass;
 		device->oclass[NVDEV_SUBDEV_MXM    ] = &nv50_mxm_oclass;
-		device->oclass[NVDEV_SUBDEV_DEVINIT] = &nv50_devinit_oclass;
+		device->oclass[NVDEV_SUBDEV_DEVINIT] = &nvc0_devinit_oclass;
 		device->oclass[NVDEV_SUBDEV_MC     ] = &nvc0_mc_oclass;
 		device->oclass[NVDEV_SUBDEV_BUS    ] = &nvc0_bus_oclass;
 		device->oclass[NVDEV_SUBDEV_TIMER  ] = &nv04_timer_oclass;
@@ -79,6 +79,7 @@ nve0_identify(struct nouveau_device *device)
 		device->oclass[NVDEV_ENGINE_DISP   ] = &nve0_disp_oclass;
 		device->oclass[NVDEV_ENGINE_COPY0  ] = &nve0_copy0_oclass;
 		device->oclass[NVDEV_ENGINE_COPY1  ] = &nve0_copy1_oclass;
+		device->oclass[NVDEV_ENGINE_COPY2  ] = &nve0_copy2_oclass;
 		device->oclass[NVDEV_ENGINE_BSP    ] = &nve0_bsp_oclass;
 		device->oclass[NVDEV_ENGINE_VP     ] = &nve0_vp_oclass;
 		device->oclass[NVDEV_ENGINE_PPP    ] = &nvc0_ppp_oclass;
@@ -91,7 +92,7 @@ nve0_identify(struct nouveau_device *device)
 		device->oclass[NVDEV_SUBDEV_CLOCK  ] = &nvc0_clock_oclass;
 		device->oclass[NVDEV_SUBDEV_THERM  ] = &nvd0_therm_oclass;
 		device->oclass[NVDEV_SUBDEV_MXM    ] = &nv50_mxm_oclass;
-		device->oclass[NVDEV_SUBDEV_DEVINIT] = &nv50_devinit_oclass;
+		device->oclass[NVDEV_SUBDEV_DEVINIT] = &nvc0_devinit_oclass;
 		device->oclass[NVDEV_SUBDEV_MC     ] = &nvc0_mc_oclass;
 		device->oclass[NVDEV_SUBDEV_BUS    ] = &nvc0_bus_oclass;
 		device->oclass[NVDEV_SUBDEV_TIMER  ] = &nv04_timer_oclass;
@@ -108,6 +109,7 @@ nve0_identify(struct nouveau_device *device)
 		device->oclass[NVDEV_ENGINE_DISP   ] = &nve0_disp_oclass;
 		device->oclass[NVDEV_ENGINE_COPY0  ] = &nve0_copy0_oclass;
 		device->oclass[NVDEV_ENGINE_COPY1  ] = &nve0_copy1_oclass;
+		device->oclass[NVDEV_ENGINE_COPY2  ] = &nve0_copy2_oclass;
 		device->oclass[NVDEV_ENGINE_BSP    ] = &nve0_bsp_oclass;
 		device->oclass[NVDEV_ENGINE_VP     ] = &nve0_vp_oclass;
 		device->oclass[NVDEV_ENGINE_PPP    ] = &nvc0_ppp_oclass;
@@ -120,7 +122,7 @@ nve0_identify(struct nouveau_device *device)
 		device->oclass[NVDEV_SUBDEV_CLOCK  ] = &nvc0_clock_oclass;
 		device->oclass[NVDEV_SUBDEV_THERM  ] = &nvd0_therm_oclass;
 		device->oclass[NVDEV_SUBDEV_MXM    ] = &nv50_mxm_oclass;
-		device->oclass[NVDEV_SUBDEV_DEVINIT] = &nv50_devinit_oclass;
+		device->oclass[NVDEV_SUBDEV_DEVINIT] = &nvc0_devinit_oclass;
 		device->oclass[NVDEV_SUBDEV_MC     ] = &nvc0_mc_oclass;
 		device->oclass[NVDEV_SUBDEV_BUS    ] = &nvc0_bus_oclass;
 		device->oclass[NVDEV_SUBDEV_TIMER  ] = &nv04_timer_oclass;
@@ -137,6 +139,7 @@ nve0_identify(struct nouveau_device *device)
 		device->oclass[NVDEV_ENGINE_DISP   ] = &nve0_disp_oclass;
 		device->oclass[NVDEV_ENGINE_COPY0  ] = &nve0_copy0_oclass;
 		device->oclass[NVDEV_ENGINE_COPY1  ] = &nve0_copy1_oclass;
+		device->oclass[NVDEV_ENGINE_COPY2  ] = &nve0_copy2_oclass;
 		device->oclass[NVDEV_ENGINE_BSP    ] = &nve0_bsp_oclass;
 		device->oclass[NVDEV_ENGINE_VP     ] = &nve0_vp_oclass;
 		device->oclass[NVDEV_ENGINE_PPP    ] = &nvc0_ppp_oclass;
@@ -149,7 +152,7 @@ nve0_identify(struct nouveau_device *device)
 		device->oclass[NVDEV_SUBDEV_CLOCK  ] = &nvc0_clock_oclass;
 		device->oclass[NVDEV_SUBDEV_THERM  ] = &nvd0_therm_oclass;
 		device->oclass[NVDEV_SUBDEV_MXM    ] = &nv50_mxm_oclass;
-		device->oclass[NVDEV_SUBDEV_DEVINIT] = &nv50_devinit_oclass;
+		device->oclass[NVDEV_SUBDEV_DEVINIT] = &nvc0_devinit_oclass;
 		device->oclass[NVDEV_SUBDEV_MC     ] = &nvc0_mc_oclass;
 		device->oclass[NVDEV_SUBDEV_BUS    ] = &nvc0_bus_oclass;
 		device->oclass[NVDEV_SUBDEV_TIMER  ] = &nv04_timer_oclass;

+ 3 - 3
drivers/gpu/drm/nouveau/core/engine/disp/nv50.c

@@ -34,9 +34,9 @@
 #include <subdev/bios/disp.h>
 #include <subdev/bios/init.h>
 #include <subdev/bios/pll.h>
+#include <subdev/devinit.h>
 #include <subdev/timer.h>
 #include <subdev/fb.h>
-#include <subdev/clock.h>
 
 #include "nv50.h"
 
@@ -987,10 +987,10 @@ nv50_disp_intr_unk20_0(struct nv50_disp_priv *priv, int head)
 static void
 nv50_disp_intr_unk20_1(struct nv50_disp_priv *priv, int head)
 {
-	struct nouveau_clock *clk = nouveau_clock(priv);
+	struct nouveau_devinit *devinit = nouveau_devinit(priv);
 	u32 pclk = nv_rd32(priv, 0x610ad0 + (head * 0x540)) & 0x3fffff;
 	if (pclk)
-		clk->pll_set(clk, PLL_VPLL0 + head, pclk);
+		devinit->pll_set(devinit, PLL_VPLL0 + head, pclk);
 }
 
 static void

+ 8 - 6
drivers/gpu/drm/nouveau/core/engine/disp/nvd0.c

@@ -29,15 +29,14 @@
 
 #include <engine/disp.h>
 
-#include <subdev/timer.h>
-#include <subdev/fb.h>
-#include <subdev/clock.h>
-
 #include <subdev/bios.h>
 #include <subdev/bios/dcb.h>
 #include <subdev/bios/disp.h>
 #include <subdev/bios/init.h>
 #include <subdev/bios/pll.h>
+#include <subdev/devinit.h>
+#include <subdev/fb.h>
+#include <subdev/timer.h>
 
 #include "nv50.h"
 
@@ -738,10 +737,10 @@ nvd0_disp_intr_unk2_0(struct nv50_disp_priv *priv, int head)
 static void
 nvd0_disp_intr_unk2_1(struct nv50_disp_priv *priv, int head)
 {
-	struct nouveau_clock *clk = nouveau_clock(priv);
+	struct nouveau_devinit *devinit = nouveau_devinit(priv);
 	u32 pclk = nv_rd32(priv, 0x660450 + (head * 0x300)) / 1000;
 	if (pclk)
-		clk->pll_set(clk, PLL_VPLL0 + head, pclk);
+		devinit->pll_set(devinit, PLL_VPLL0 + head, pclk);
 	nv_wr32(priv, 0x612200 + (head * 0x800), 0x00000000);
 }
 
@@ -959,6 +958,9 @@ nvd0_disp_ctor(struct nouveau_object *parent, struct nouveau_object *engine,
 	int heads = nv_rd32(parent, 0x022448);
 	int ret;
 
+	if (nv_rd32(parent, 0x022500) & 0x00000001)
+		return -ENODEV;
+
 	ret = nouveau_disp_create(parent, engine, oclass, heads,
 				  "PDISP", "display", &priv);
 	*pobject = nv_object(priv);

+ 3 - 0
drivers/gpu/drm/nouveau/core/engine/disp/nve0.c

@@ -54,6 +54,9 @@ nve0_disp_ctor(struct nouveau_object *parent, struct nouveau_object *engine,
 	int heads = nv_rd32(parent, 0x022448);
 	int ret;
 
+	if (nv_rd32(parent, 0x022500) & 0x00000001)
+		return -ENODEV;
+
 	ret = nouveau_disp_create(parent, engine, oclass, heads,
 				  "PDISP", "display", &priv);
 	*pobject = nv_object(priv);

+ 3 - 0
drivers/gpu/drm/nouveau/core/engine/disp/nvf0.c

@@ -54,6 +54,9 @@ nvf0_disp_ctor(struct nouveau_object *parent, struct nouveau_object *engine,
 	int heads = nv_rd32(parent, 0x022448);
 	int ret;
 
+	if (nv_rd32(parent, 0x022500) & 0x00000001)
+		return -ENODEV;
+
 	ret = nouveau_disp_create(parent, engine, oclass, heads,
 				  "PDISP", "display", &priv);
 	*pobject = nv_object(priv);

+ 1 - 2
drivers/gpu/drm/nouveau/core/core/falcon.c → drivers/gpu/drm/nouveau/core/engine/falcon.c

@@ -20,8 +20,7 @@
  * OTHER DEALINGS IN THE SOFTWARE.
  */
 
-#include <core/falcon.h>
-
+#include <engine/falcon.h>
 #include <subdev/timer.h>
 
 u32

+ 1 - 1
drivers/gpu/drm/nouveau/core/engine/fifo/nv40.c

@@ -320,7 +320,7 @@ nv40_fifo_init(struct nouveau_object *object)
 		break;
 	default:
 		nv_wr32(priv, 0x002230, 0x00000000);
-		nv_wr32(priv, 0x002220, ((pfb->ram.size - 512 * 1024 +
+		nv_wr32(priv, 0x002220, ((pfb->ram->size - 512 * 1024 +
 					 priv->ramfc->addr) >> 16) |
 					0x00030000);
 		break;

+ 4 - 0
drivers/gpu/drm/nouveau/core/engine/fifo/nv84.c

@@ -56,7 +56,9 @@ nv84_fifo_context_attach(struct nouveau_object *parent,
 	switch (nv_engidx(object->engine)) {
 	case NVDEV_ENGINE_SW   : return 0;
 	case NVDEV_ENGINE_GR   : addr = 0x0020; break;
+	case NVDEV_ENGINE_VP   : addr = 0x0040; break;
 	case NVDEV_ENGINE_MPEG : addr = 0x0060; break;
+	case NVDEV_ENGINE_BSP  : addr = 0x0080; break;
 	case NVDEV_ENGINE_CRYPT: addr = 0x00a0; break;
 	case NVDEV_ENGINE_COPY0: addr = 0x00c0; break;
 	default:
@@ -89,7 +91,9 @@ nv84_fifo_context_detach(struct nouveau_object *parent, bool suspend,
 	switch (nv_engidx(object->engine)) {
 	case NVDEV_ENGINE_SW   : return 0;
 	case NVDEV_ENGINE_GR   : engn = 0; addr = 0x0020; break;
+	case NVDEV_ENGINE_VP   : engn = 3; addr = 0x0040; break;
 	case NVDEV_ENGINE_MPEG : engn = 1; addr = 0x0060; break;
+	case NVDEV_ENGINE_BSP  : engn = 5; addr = 0x0080; break;
 	case NVDEV_ENGINE_CRYPT: engn = 4; addr = 0x00a0; break;
 	case NVDEV_ENGINE_COPY0: engn = 2; addr = 0x00c0; break;
 	default:

+ 30 - 23
drivers/gpu/drm/nouveau/core/engine/fifo/nve0.c

@@ -44,7 +44,8 @@ static const struct {
 	u64 subdev;
 	u64 mask;
 } fifo_engine[] = {
-	_(NVDEV_ENGINE_GR      , (1ULL << NVDEV_ENGINE_SW)),
+	_(NVDEV_ENGINE_GR      , (1ULL << NVDEV_ENGINE_SW) |
+				 (1ULL << NVDEV_ENGINE_COPY2)),
 	_(NVDEV_ENGINE_VP      , 0),
 	_(NVDEV_ENGINE_PPP     , 0),
 	_(NVDEV_ENGINE_BSP     , 0),
@@ -96,18 +97,6 @@ nve0_fifo_playlist_update(struct nve0_fifo_priv *priv, u32 engine)
 
 	mutex_lock(&nv_subdev(priv)->mutex);
 	cur = engn->playlist[engn->cur_playlist];
-	if (unlikely(cur == NULL)) {
-		int ret = nouveau_gpuobj_new(nv_object(priv), NULL,
-					     0x8000, 0x1000, 0, &cur);
-		if (ret) {
-			mutex_unlock(&nv_subdev(priv)->mutex);
-			nv_error(priv, "playlist alloc failed\n");
-			return;
-		}
-
-		engn->playlist[engn->cur_playlist] = cur;
-	}
-
 	engn->cur_playlist = !engn->cur_playlist;
 
 	for (i = 0, p = 0; i < priv->base.max; i++) {
@@ -138,10 +127,12 @@ nve0_fifo_context_attach(struct nouveau_object *parent,
 	int ret;
 
 	switch (nv_engidx(object->engine)) {
-	case NVDEV_ENGINE_SW   : return 0;
-	case NVDEV_ENGINE_GR   :
+	case NVDEV_ENGINE_SW   :
 	case NVDEV_ENGINE_COPY0:
-	case NVDEV_ENGINE_COPY1: addr = 0x0210; break;
+	case NVDEV_ENGINE_COPY1:
+	case NVDEV_ENGINE_COPY2:
+		return 0;
+	case NVDEV_ENGINE_GR   : addr = 0x0210; break;
 	case NVDEV_ENGINE_BSP  : addr = 0x0270; break;
 	case NVDEV_ENGINE_VP   : addr = 0x0250; break;
 	case NVDEV_ENGINE_PPP  : addr = 0x0260; break;
@@ -176,9 +167,10 @@ nve0_fifo_context_detach(struct nouveau_object *parent, bool suspend,
 
 	switch (nv_engidx(object->engine)) {
 	case NVDEV_ENGINE_SW   : return 0;
-	case NVDEV_ENGINE_GR   :
 	case NVDEV_ENGINE_COPY0:
-	case NVDEV_ENGINE_COPY1: addr = 0x0210; break;
+	case NVDEV_ENGINE_COPY1:
+	case NVDEV_ENGINE_COPY2: addr = 0x0000; break;
+	case NVDEV_ENGINE_GR   : addr = 0x0210; break;
 	case NVDEV_ENGINE_BSP  : addr = 0x0270; break;
 	case NVDEV_ENGINE_VP   : addr = 0x0250; break;
 	case NVDEV_ENGINE_PPP  : addr = 0x0260; break;
@@ -194,9 +186,12 @@ nve0_fifo_context_detach(struct nouveau_object *parent, bool suspend,
 			return -EBUSY;
 	}
 
-	nv_wo32(base, addr + 0x00, 0x00000000);
-	nv_wo32(base, addr + 0x04, 0x00000000);
-	bar->flush(bar);
+	if (addr) {
+		nv_wo32(base, addr + 0x00, 0x00000000);
+		nv_wo32(base, addr + 0x04, 0x00000000);
+		bar->flush(bar);
+	}
+
 	return 0;
 }
 
@@ -592,13 +587,25 @@ nve0_fifo_ctor(struct nouveau_object *parent, struct nouveau_object *engine,
 	       struct nouveau_object **pobject)
 {
 	struct nve0_fifo_priv *priv;
-	int ret;
+	int ret, i;
 
 	ret = nouveau_fifo_create(parent, engine, oclass, 0, 4095, &priv);
 	*pobject = nv_object(priv);
 	if (ret)
 		return ret;
 
+	for (i = 0; i < FIFO_ENGINE_NR; i++) {
+		ret = nouveau_gpuobj_new(nv_object(priv), NULL, 0x8000, 0x1000,
+					 0, &priv->engine[i].playlist[0]);
+		if (ret)
+			return ret;
+
+		ret = nouveau_gpuobj_new(nv_object(priv), NULL, 0x8000, 0x1000,
+					 0, &priv->engine[i].playlist[1]);
+		if (ret)
+			return ret;
+	}
+
 	ret = nouveau_gpuobj_new(nv_object(priv), NULL, 4096 * 0x200, 0x1000,
 				 NVOBJ_FLAG_ZERO_ALLOC, &priv->user.mem);
 	if (ret)
@@ -629,7 +636,7 @@ nve0_fifo_dtor(struct nouveau_object *object)
 	nouveau_gpuobj_unmap(&priv->user.bar);
 	nouveau_gpuobj_ref(NULL, &priv->user.mem);
 
-	for (i = 0; i < ARRAY_SIZE(priv->engine); i++) {
+	for (i = 0; i < FIFO_ENGINE_NR; i++) {
 		nouveau_gpuobj_ref(NULL, &priv->engine[i].playlist[1]);
 		nouveau_gpuobj_ref(NULL, &priv->engine[i].playlist[0]);
 	}

+ 521 - 80
drivers/gpu/drm/nouveau/core/engine/graph/ctxnvc0.c

@@ -1323,8 +1323,6 @@ nvc0_grctx_generate_9097(struct nvc0_graph_priv *priv)
 	nv_mthd(priv, 0x9097, 0x1450, 0x00300008);
 	nv_mthd(priv, 0x9097, 0x1454, 0x04000080);
 	nv_mthd(priv, 0x9097, 0x0214, 0x00000000);
-	/* in trace, right after 0x90c0, not here */
-	nv_mthd(priv, 0x9097, 0x3410, 0x80002006);
 }
 
 static void
@@ -1417,6 +1415,8 @@ nvc0_grctx_generate_90c0(struct nvc0_graph_priv *priv)
 	for (i = 0; nv_device(priv)->chipset >= 0xd0 && i < 4; i++) {
 		nv_mthd(priv, 0x90c0, 0x2710 + (i * 0x40), 0x00014000);
 		nv_mthd(priv, 0x90c0, 0x2730 + (i * 0x40), 0x00014000);
+	}
+	for (i = 0; nv_device(priv)->chipset >= 0xd0 && i < 4; i++) {
 		nv_mthd(priv, 0x90c0, 0x2714 + (i * 0x40), 0x00000040);
 		nv_mthd(priv, 0x90c0, 0x2734 + (i * 0x40), 0x00000040);
 	}
@@ -1456,7 +1456,23 @@ nvc0_grctx_generate_dispatch(struct nvc0_graph_priv *priv)
 	nv_wr32(priv, 0x404020, 0x00000000);
 	nv_wr32(priv, 0x404024, 0x00000000);
 	nv_wr32(priv, 0x404028, 0x00000000);
-	nv_wr32(priv, 0x40402c, 0x00000000);
+	switch (nv_device(priv)->chipset) {
+	case 0xd9:
+	case 0xd7:
+		nv_wr32(priv, 0x40402c, 0x00000000);
+		break;
+	case 0xc0:
+	case 0xc3:
+	case 0xc4:
+	case 0xc1:
+	case 0xc8:
+	case 0xce:
+	case 0xcf:
+		break;
+	default:
+		BUG_ON(1);
+		break;
+	}
 	nv_wr32(priv, 0x404044, 0x00000000);
 	nv_wr32(priv, 0x404094, 0x00000000);
 	nv_wr32(priv, 0x404098, 0x00000000);
@@ -1472,6 +1488,7 @@ nvc0_grctx_generate_dispatch(struct nvc0_graph_priv *priv)
 	nv_wr32(priv, 0x4040c0, 0x00000000);
 	nv_wr32(priv, 0x4040c4, 0x00000000);
 	nv_wr32(priv, 0x4040c8, 0xf0000087);
+	nv_wr32(priv, 0x4040d0, 0x00000000);
 	nv_wr32(priv, 0x4040d4, 0x00000000);
 	nv_wr32(priv, 0x4040d8, 0x00000000);
 	nv_wr32(priv, 0x4040dc, 0x00000000);
@@ -1487,7 +1504,23 @@ nvc0_grctx_generate_dispatch(struct nvc0_graph_priv *priv)
 	nv_wr32(priv, 0x404158, 0x00000200);
 	nv_wr32(priv, 0x404164, 0x00000055);
 	nv_wr32(priv, 0x404168, 0x00000000);
-	nv_wr32(priv, 0x404174, 0x00000000);
+	switch (nv_device(priv)->chipset) {
+	case 0xd9:
+	case 0xd7:
+		break;
+	case 0xc0:
+	case 0xc3:
+	case 0xc4:
+	case 0xc1:
+	case 0xc8:
+	case 0xce:
+	case 0xcf:
+		nv_wr32(priv, 0x404174, 0x00000000);
+		break;
+	default:
+		BUG_ON(1);
+		break;
+	}
 	nv_wr32(priv, 0x404178, 0x00000000);
 	nv_wr32(priv, 0x40417c, 0x00000000);
 	for (i = 0; i < 8; i++)
@@ -1613,20 +1646,31 @@ nvc0_grctx_generate_unk47xx(struct nvc0_graph_priv *priv)
 static void
 nvc0_grctx_generate_shaders(struct nvc0_graph_priv *priv)
 {
-
-	if (nv_device(priv)->chipset >= 0xd0) {
+	switch (nv_device(priv)->chipset) {
+	case 0xc1:
 		nv_wr32(priv, 0x405800, 0x0f8000bf);
 		nv_wr32(priv, 0x405830, 0x02180218);
-		nv_wr32(priv, 0x405834, 0x08000000);
-	} else
-	if (nv_device(priv)->chipset == 0xc1) {
+		nv_wr32(priv, 0x405834, 0x00000000);
+		break;
+	case 0xd9:
+	case 0xd7:
 		nv_wr32(priv, 0x405800, 0x0f8000bf);
 		nv_wr32(priv, 0x405830, 0x02180218);
-		nv_wr32(priv, 0x405834, 0x00000000);
-	} else {
+		nv_wr32(priv, 0x405834, 0x08000000);
+		break;
+	case 0xc0:
+	case 0xc3:
+	case 0xc4:
+	case 0xc8:
+	case 0xce:
+	case 0xcf:
 		nv_wr32(priv, 0x405800, 0x078000bf);
 		nv_wr32(priv, 0x405830, 0x02180000);
 		nv_wr32(priv, 0x405834, 0x00000000);
+		break;
+	default:
+		BUG_ON(1);
+		break;
 	}
 	nv_wr32(priv, 0x405838, 0x00000000);
 	nv_wr32(priv, 0x405854, 0x00000000);
@@ -1657,12 +1701,24 @@ nvc0_grctx_generate_unk64xx(struct nvc0_graph_priv *priv)
 	nv_wr32(priv, 0x4064ac, 0x00003fff);
 	nv_wr32(priv, 0x4064b4, 0x00000000);
 	nv_wr32(priv, 0x4064b8, 0x00000000);
-	if (nv_device(priv)->chipset >= 0xd0)
+	switch (nv_device(priv)->chipset) {
+	case 0xd9:
+	case 0xd7:
 		nv_wr32(priv, 0x4064bc, 0x00000000);
-	if (nv_device(priv)->chipset == 0xc1 ||
-	    nv_device(priv)->chipset >= 0xd0) {
 		nv_wr32(priv, 0x4064c0, 0x80140078);
 		nv_wr32(priv, 0x4064c4, 0x0086ffff);
+		break;
+	case 0xc0:
+	case 0xc3:
+	case 0xc4:
+	case 0xc1:
+	case 0xc8:
+	case 0xce:
+	case 0xcf:
+		break;
+	default:
+		BUG_ON(1);
+		break;
 	}
 }
 
@@ -1695,46 +1751,87 @@ nvc0_grctx_generate_ccache(struct nvc0_graph_priv *priv)
 static void
 nvc0_grctx_generate_rop(struct nvc0_graph_priv *priv)
 {
-	int chipset = nv_device(priv)->chipset;
-
 	/* ROPC_BROADCAST */
 	nv_wr32(priv, 0x408800, 0x02802a3c);
 	nv_wr32(priv, 0x408804, 0x00000040);
-	if (chipset >= 0xd0) {
-		nv_wr32(priv, 0x408808, 0x1043e005);
+	switch (nv_device(priv)->chipset) {
+	case 0xc0:
+	case 0xc3:
+	case 0xc4:
+	case 0xc8:
+	case 0xce:
+	case 0xcf:
+		nv_wr32(priv, 0x408808, 0x0003e00d);
 		nv_wr32(priv, 0x408900, 0x3080b801);
-		nv_wr32(priv, 0x408904, 0x1043e005);
-		nv_wr32(priv, 0x408908, 0x00c8102f);
-	} else
-	if (chipset == 0xc1) {
+		nv_wr32(priv, 0x408904, 0x02000001);
+		nv_wr32(priv, 0x408908, 0x00c80929);
+		break;
+	case 0xc1:
 		nv_wr32(priv, 0x408808, 0x1003e005);
 		nv_wr32(priv, 0x408900, 0x3080b801);
 		nv_wr32(priv, 0x408904, 0x62000001);
 		nv_wr32(priv, 0x408908, 0x00c80929);
-	} else {
-		nv_wr32(priv, 0x408808, 0x0003e00d);
+		break;
+	case 0xd9:
+	case 0xd7:
+		nv_wr32(priv, 0x408808, 0x1043e005);
 		nv_wr32(priv, 0x408900, 0x3080b801);
-		nv_wr32(priv, 0x408904, 0x02000001);
-		nv_wr32(priv, 0x408908, 0x00c80929);
+		nv_wr32(priv, 0x408904, 0x1043e005);
+		nv_wr32(priv, 0x408908, 0x00c8102f);
+		break;
+	default:
+		BUG_ON(1);
+		break;
 	}
-	nv_wr32(priv, 0x40890c, 0x00000000);
 	nv_wr32(priv, 0x408980, 0x0000011d);
 }
 
 static void
 nvc0_grctx_generate_gpc(struct nvc0_graph_priv *priv)
 {
-	int chipset = nv_device(priv)->chipset;
 	int i;
 
 	/* GPC_BROADCAST */
 	nv_wr32(priv, 0x418380, 0x00000016);
 	nv_wr32(priv, 0x418400, 0x38004e00);
 	nv_wr32(priv, 0x418404, 0x71e0ffff);
-	nv_wr32(priv, 0x418408, 0x00000000);
+	switch (nv_device(priv)->chipset) {
+	case 0xd9:
+	case 0xd7:
+		break;
+	case 0xc0:
+	case 0xc3:
+	case 0xc4:
+	case 0xc1:
+	case 0xc8:
+	case 0xce:
+	case 0xcf:
+		nv_wr32(priv, 0x418408, 0x00000000);
+		break;
+	default:
+		BUG_ON(1);
+		break;
+	}
 	nv_wr32(priv, 0x41840c, 0x00001008);
 	nv_wr32(priv, 0x418410, 0x0fff0fff);
-	nv_wr32(priv, 0x418414, chipset < 0xd0 ? 0x00200fff : 0x02200fff);
+	switch (nv_device(priv)->chipset) {
+	case 0xd9:
+	case 0xd7:
+		nv_wr32(priv, 0x418414, 0x02200fff);
+		break;
+	case 0xc0:
+	case 0xc3:
+	case 0xc4:
+	case 0xc1:
+	case 0xc8:
+	case 0xce:
+	case 0xcf:
+		nv_wr32(priv, 0x418414, 0x00200fff);
+		break;
+	default:
+		BUG_ON(1);
+		break;
+	}
 	nv_wr32(priv, 0x418450, 0x00000000);
 	nv_wr32(priv, 0x418454, 0x00000000);
 	nv_wr32(priv, 0x418458, 0x00000000);
@@ -1749,17 +1846,65 @@ nvc0_grctx_generate_gpc(struct nvc0_graph_priv *priv)
 	nv_wr32(priv, 0x418700, 0x00000002);
 	nv_wr32(priv, 0x418704, 0x00000080);
 	nv_wr32(priv, 0x418708, 0x00000000);
-	nv_wr32(priv, 0x41870c, chipset < 0xd0 ? 0x07c80000 : 0x00000000);
+	switch (nv_device(priv)->chipset) {
+	case 0xd9:
+	case 0xd7:
+		nv_wr32(priv, 0x41870c, 0x00000000);
+		break;
+	case 0xc0:
+	case 0xc3:
+	case 0xc4:
+	case 0xc1:
+	case 0xc8:
+	case 0xce:
+	case 0xcf:
+		nv_wr32(priv, 0x41870c, 0x07c80000);
+		break;
+	default:
+		BUG_ON(1);
+		break;
+	}
 	nv_wr32(priv, 0x418710, 0x00000000);
-	nv_wr32(priv, 0x418800, chipset < 0xd0 ? 0x0006860a : 0x7006860a);
+	switch (nv_device(priv)->chipset) {
+	case 0xd9:
+	case 0xd7:
+		nv_wr32(priv, 0x418800, 0x7006860a);
+		break;
+	case 0xc0:
+	case 0xc3:
+	case 0xc4:
+	case 0xc1:
+	case 0xc8:
+	case 0xce:
+	case 0xcf:
+		nv_wr32(priv, 0x418800, 0x0006860a);
+		break;
+	default:
+		BUG_ON(1);
+		break;
+	}
 	nv_wr32(priv, 0x418808, 0x00000000);
 	nv_wr32(priv, 0x41880c, 0x00000000);
 	nv_wr32(priv, 0x418810, 0x00000000);
 	nv_wr32(priv, 0x418828, 0x00008442);
-	if (chipset == 0xc1 || chipset >= 0xd0)
+	switch (nv_device(priv)->chipset) {
+	case 0xc1:
+	case 0xd9:
+	case 0xd7:
 		nv_wr32(priv, 0x418830, 0x10000001);
-	else
+		break;
+	case 0xc0:
+	case 0xc3:
+	case 0xc4:
+	case 0xc8:
+	case 0xce:
+	case 0xcf:
 		nv_wr32(priv, 0x418830, 0x00000001);
+		break;
+	default:
+		BUG_ON(1);
+		break;
+	}
 	nv_wr32(priv, 0x4188d8, 0x00000008);
 	nv_wr32(priv, 0x4188e0, 0x01000000);
 	nv_wr32(priv, 0x4188e8, 0x00000000);
@@ -1767,12 +1912,26 @@ nvc0_grctx_generate_gpc(struct nvc0_graph_priv *priv)
 	nv_wr32(priv, 0x4188f0, 0x00000000);
 	nv_wr32(priv, 0x4188f4, 0x00000000);
 	nv_wr32(priv, 0x4188f8, 0x00000000);
-	if (chipset >= 0xd0)
-		nv_wr32(priv, 0x4188fc, 0x20100008);
-	else if (chipset == 0xc1)
+	switch (nv_device(priv)->chipset) {
+	case 0xc1:
 		nv_wr32(priv, 0x4188fc, 0x00100018);
-	else
+		break;
+	case 0xd9:
+	case 0xd7:
+		nv_wr32(priv, 0x4188fc, 0x20100008);
+		break;
+	case 0xc0:
+	case 0xc3:
+	case 0xc4:
+	case 0xc8:
+	case 0xce:
+	case 0xcf:
 		nv_wr32(priv, 0x4188fc, 0x00100000);
+		break;
+	default:
+		BUG_ON(1);
+		break;
+	}
 	nv_wr32(priv, 0x41891c, 0x00ff00ff);
 	nv_wr32(priv, 0x418924, 0x00000000);
 	nv_wr32(priv, 0x418928, 0x00ffff00);
@@ -1786,7 +1945,24 @@ nvc0_grctx_generate_gpc(struct nvc0_graph_priv *priv)
 		nv_wr32(priv, 0x418a14 + (i * 0x20), 0x00000000);
 		nv_wr32(priv, 0x418a18 + (i * 0x20), 0x00000000);
 	}
-	nv_wr32(priv, 0x418b00, chipset < 0xd0 ? 0x00000000 : 0x00000006);
+	switch (nv_device(priv)->chipset) {
+	case 0xd9:
+	case 0xd7:
+		nv_wr32(priv, 0x418b00, 0x00000006);
+		break;
+	case 0xc0:
+	case 0xc3:
+	case 0xc4:
+	case 0xc1:
+	case 0xc8:
+	case 0xce:
+	case 0xcf:
+		nv_wr32(priv, 0x418b00, 0x00000000);
+		break;
+	default:
+		BUG_ON(1);
+		break;
+	}
 	nv_wr32(priv, 0x418b08, 0x0a418820);
 	nv_wr32(priv, 0x418b0c, 0x062080e6);
 	nv_wr32(priv, 0x418b10, 0x020398a4);
@@ -1803,8 +1979,23 @@ nvc0_grctx_generate_gpc(struct nvc0_graph_priv *priv)
 	nv_wr32(priv, 0x418c24, 0x00000000);
 	nv_wr32(priv, 0x418c28, 0x00000000);
 	nv_wr32(priv, 0x418c2c, 0x00000000);
-	if (chipset == 0xc1 || chipset >= 0xd0)
+	switch (nv_device(priv)->chipset) {
+	case 0xc1:
+	case 0xd9:
+	case 0xd7:
 		nv_wr32(priv, 0x418c6c, 0x00000001);
+		break;
+	case 0xc0:
+	case 0xc3:
+	case 0xc4:
+	case 0xc8:
+	case 0xce:
+	case 0xcf:
+		break;
+	default:
+		BUG_ON(1);
+		break;
+	}
 	nv_wr32(priv, 0x418c80, 0x20200004);
 	nv_wr32(priv, 0x418c8c, 0x00000001);
 	nv_wr32(priv, 0x419000, 0x00000780);
@@ -1816,16 +2007,28 @@ nvc0_grctx_generate_gpc(struct nvc0_graph_priv *priv)
 static void
 nvc0_grctx_generate_tp(struct nvc0_graph_priv *priv)
 {
-	int chipset = nv_device(priv)->chipset;
-
 	/* GPC_BROADCAST.TP_BROADCAST */
 	nv_wr32(priv, 0x419818, 0x00000000);
 	nv_wr32(priv, 0x41983c, 0x00038bc7);
 	nv_wr32(priv, 0x419848, 0x00000000);
-	if (chipset == 0xc1 || chipset >= 0xd0)
+	switch (nv_device(priv)->chipset) {
+	case 0xc1:
+	case 0xd9:
+	case 0xd7:
 		nv_wr32(priv, 0x419864, 0x00000129);
-	else
+		break;
+	case 0xc0:
+	case 0xc3:
+	case 0xc4:
+	case 0xc8:
+	case 0xce:
+	case 0xcf:
 		nv_wr32(priv, 0x419864, 0x0000012a);
+		break;
+	default:
+		BUG_ON(1);
+		break;
+	}
 	nv_wr32(priv, 0x419888, 0x00000000);
 	nv_wr32(priv, 0x419a00, 0x000001f0);
 	nv_wr32(priv, 0x419a04, 0x00000001);
@@ -1833,12 +2036,43 @@ nvc0_grctx_generate_tp(struct nvc0_graph_priv *priv)
 	nv_wr32(priv, 0x419a0c, 0x00020000);
 	nv_wr32(priv, 0x419a10, 0x00000000);
 	nv_wr32(priv, 0x419a14, 0x00000200);
-	nv_wr32(priv, 0x419a1c, 0x00000000);
-	nv_wr32(priv, 0x419a20, 0x00000800);
-	if (chipset >= 0xd0)
+	switch (nv_device(priv)->chipset) {
+	case 0xc0:
+		break;
+	case 0xc3:
+	case 0xc4:
+	case 0xc1:
+	case 0xc8:
+	case 0xce:
+	case 0xcf:
+	case 0xd9:
+	case 0xd7:
+		nv_wr32(priv, 0x419a1c, 0x00000000);
+		nv_wr32(priv, 0x419a20, 0x00000800);
+		break;
+	default:
+		BUG_ON(1);
+		break;
+	}
+	switch (nv_device(priv)->chipset) {
+	case 0xc0:
+	case 0xc8:
+		break;
+	case 0xd9:
+	case 0xd7:
 		nv_wr32(priv, 0x00419ac4, 0x0017f440);
-	else if (chipset != 0xc0 && chipset != 0xc8)
+		break;
+	case 0xc3:
+	case 0xc4:
+	case 0xc1:
+	case 0xce:
+	case 0xcf:
 		nv_wr32(priv, 0x00419ac4, 0x0007f440);
+		break;
+	default:
+		BUG_ON(1);
+		break;
+	}
 	nv_wr32(priv, 0x419b00, 0x0a418820);
 	nv_wr32(priv, 0x419b04, 0x062080e6);
 	nv_wr32(priv, 0x419b08, 0x020398a4);
@@ -1846,34 +2080,106 @@ nvc0_grctx_generate_tp(struct nvc0_graph_priv *priv)
 	nv_wr32(priv, 0x419b10, 0x0a418820);
 	nv_wr32(priv, 0x419b14, 0x000000e6);
 	nv_wr32(priv, 0x419bd0, 0x00900103);
-	if (chipset == 0xc1 || chipset >= 0xd0)
+	switch (nv_device(priv)->chipset) {
+	case 0xc1:
+	case 0xd9:
+	case 0xd7:
 		nv_wr32(priv, 0x419be0, 0x00400001);
-	else
+		break;
+	case 0xc0:
+	case 0xc3:
+	case 0xc4:
+	case 0xc8:
+	case 0xce:
+	case 0xcf:
 		nv_wr32(priv, 0x419be0, 0x00000001);
+		break;
+	default:
+		BUG_ON(1);
+		break;
+	}
 	nv_wr32(priv, 0x419be4, 0x00000000);
-	nv_wr32(priv, 0x419c00, chipset < 0xd0 ? 0x00000002 : 0x0000000a);
+	switch (nv_device(priv)->chipset) {
+	case 0xd9:
+	case 0xd7:
+		nv_wr32(priv, 0x419c00, 0x0000000a);
+		break;
+	case 0xc0:
+	case 0xc3:
+	case 0xc4:
+	case 0xc1:
+	case 0xc8:
+	case 0xce:
+	case 0xcf:
+		nv_wr32(priv, 0x419c00, 0x00000002);
+		break;
+	default:
+		BUG_ON(1);
+		break;
+	}
 	nv_wr32(priv, 0x419c04, 0x00000006);
 	nv_wr32(priv, 0x419c08, 0x00000002);
 	nv_wr32(priv, 0x419c20, 0x00000000);
-	if (nv_device(priv)->chipset >= 0xd0) {
+	switch (nv_device(priv)->chipset) {
+	case 0xc3:
+	case 0xc4:
+	case 0xc1:
+	case 0xce:
+	case 0xcf:
+		nv_wr32(priv, 0x419cb0, 0x00020048);
+		break;
+	case 0xd9:
+	case 0xd7:
 		nv_wr32(priv, 0x419c24, 0x00084210);
 		nv_wr32(priv, 0x419c28, 0x3cf3cf3c);
 		nv_wr32(priv, 0x419cb0, 0x00020048);
-	} else
-	if (chipset == 0xce || chipset == 0xcf) {
-		nv_wr32(priv, 0x419cb0, 0x00020048);
-	} else {
+		break;
+	case 0xc0:
+	case 0xc8:
 		nv_wr32(priv, 0x419cb0, 0x00060048);
+		break;
+	default:
+		BUG_ON(1);
+		break;
 	}
 	nv_wr32(priv, 0x419ce8, 0x00000000);
 	nv_wr32(priv, 0x419cf4, 0x00000183);
-	if (chipset == 0xc1 || chipset >= 0xd0)
+	switch (nv_device(priv)->chipset) {
+	case 0xc1:
+	case 0xd9:
+	case 0xd7:
 		nv_wr32(priv, 0x419d20, 0x12180000);
-	else
+		break;
+	case 0xc0:
+	case 0xc3:
+	case 0xc4:
+	case 0xc8:
+	case 0xce:
+	case 0xcf:
 		nv_wr32(priv, 0x419d20, 0x02180000);
+		break;
+	default:
+		BUG_ON(1);
+		break;
+	}
 	nv_wr32(priv, 0x419d24, 0x00001fff);
-	if (chipset == 0xc1 || chipset >= 0xd0)
+	switch (nv_device(priv)->chipset) {
+	case 0xc1:
+	case 0xd9:
+	case 0xd7:
 		nv_wr32(priv, 0x419d44, 0x02180218);
+		break;
+	case 0xc0:
+	case 0xc3:
+	case 0xc4:
+	case 0xc8:
+	case 0xce:
+	case 0xcf:
+		break;
+	default:
+		BUG_ON(1);
+		break;
+	}
 	nv_wr32(priv, 0x419e04, 0x00000000);
 	nv_wr32(priv, 0x419e08, 0x00000000);
 	nv_wr32(priv, 0x419e0c, 0x00000000);
@@ -1899,12 +2205,55 @@ nvc0_grctx_generate_tp(struct nvc0_graph_priv *priv)
 	nv_wr32(priv, 0x419e8c, 0x00000000);
 	nv_wr32(priv, 0x419e90, 0x00000000);
 	nv_wr32(priv, 0x419e98, 0x00000000);
-	if (chipset != 0xc0 && chipset != 0xc8)
+	switch (nv_device(priv)->chipset) {
+	case 0xc0:
+	case 0xc8:
+		break;
+	case 0xd9:
+	case 0xd7:
+		nv_wr32(priv, 0x419ee0, 0x00010110);
+		break;
+	case 0xc3:
+	case 0xc4:
+	case 0xc1:
+	case 0xce:
+	case 0xcf:
 		nv_wr32(priv, 0x419ee0, 0x00011110);
-	nv_wr32(priv, 0x419f50, 0x00000000);
-	nv_wr32(priv, 0x419f54, 0x00000000);
-	if (chipset != 0xc0 && chipset != 0xc8)
+		break;
+	default:
+		BUG_ON(1);
+		break;
+	}
+	switch (nv_device(priv)->chipset) {
+	case 0xc0:
+	case 0xc8:
+		nv_wr32(priv, 0x419f50, 0x00000000);
+		nv_wr32(priv, 0x419f54, 0x00000000);
+		break;
+	case 0xc3:
+	case 0xc4:
+	case 0xc1:
+	case 0xce:
+	case 0xcf:
+	case 0xd9:
+	case 0xd7:
+		nv_wr32(priv, 0x419f30, 0x00000000);
+		nv_wr32(priv, 0x419f34, 0x00000000);
+		nv_wr32(priv, 0x419f38, 0x00000000);
+		nv_wr32(priv, 0x419f3c, 0x00000000);
+		nv_wr32(priv, 0x419f40, 0x00000000);
+		nv_wr32(priv, 0x419f44, 0x00000000);
+		nv_wr32(priv, 0x419f48, 0x00000000);
+		nv_wr32(priv, 0x419f4c, 0x00000000);
+		nv_wr32(priv, 0x419f50, 0x00000000);
+		nv_wr32(priv, 0x419f54, 0x00000000);
 		nv_wr32(priv, 0x419f58, 0x00000000);
+		break;
+		break;
+	default:
+		BUG_ON(1);
+		break;
+	}
 }
 
 int
@@ -1952,32 +2301,46 @@ nvc0_grctx_generate(struct nvc0_graph_priv *priv)
 	mmio_list(0x419008, 0x00000000,  0, 0);
 	mmio_list(0x418808, 0x00000000,  8, 0);
 	mmio_list(0x41880c, 0x80000018,  0, 0);
-	if (nv_device(priv)->chipset != 0xc1) {
+	switch (nv_device(priv)->chipset) {
+	case 0xc1:
+	case 0xd9:
+	case 0xd7:
 		tmp = 0x02180000;
-		mmio_list(0x405830, tmp, 0, 0);
+		mmio_list(0x405830, 0x00000218 | tmp, 0, 0);
+		mmio_list(0x4064c4, 0x0086ffff, 0, 0);
 		for (gpc = 0; gpc < priv->gpc_nr; gpc++) {
 			for (tpc = 0; tpc < priv->tpc_nr[gpc]; tpc++) {
 				u32 reg = TPC_UNIT(gpc, tpc, 0x0520);
+				mmio_list(reg, 0x10000000 | tmp, 0, 0);
+				tmp += 0x0324;
+			}
+			for (tpc = 0; tpc < priv->tpc_nr[gpc]; tpc++) {
+				u32 reg = TPC_UNIT(gpc, tpc, 0x0544);
 				mmio_list(reg, tmp, 0, 0);
 				tmp += 0x0324;
 			}
 		}
-	} else {
+		break;
+		break;
+	case 0xc0:
+	case 0xc3:
+	case 0xc4:
+	case 0xc8:
+	case 0xce:
+	case 0xcf:
 		tmp = 0x02180000;
-		mmio_list(0x405830, 0x00000218 | tmp, 0, 0);
-		mmio_list(0x4064c4, 0x0086ffff, 0, 0);
+		mmio_list(0x405830, tmp, 0, 0);
 		for (gpc = 0; gpc < priv->gpc_nr; gpc++) {
 			for (tpc = 0; tpc < priv->tpc_nr[gpc]; tpc++) {
 				u32 reg = TPC_UNIT(gpc, tpc, 0x0520);
-				mmio_list(reg, 0x10000000 | tmp, 0, 0);
-				tmp += 0x0324;
-			}
-			for (tpc = 0; tpc < priv->tpc_nr[gpc]; tpc++) {
-				u32 reg = TPC_UNIT(gpc, tpc, 0x0544);
 				mmio_list(reg, tmp, 0, 0);
 				tmp += 0x0324;
 			}
 		}
+		break;
+	default:
+		BUG_ON(1);
+		break;
 	}
 
 	for (tpc = 0, id = 0; tpc < 4; tpc++) {
@@ -2209,9 +2572,23 @@ nvc0_grctx_generate(struct nvc0_graph_priv *priv)
 	nv_icmd(priv, 0x00000215, 0x00000040);
 	nv_icmd(priv, 0x00000216, 0x00000040);
 	nv_icmd(priv, 0x00000217, 0x00000040);
-	if (nv_device(priv)->chipset >= 0xd0) {
-		for (i = 0x0400; i <= 0x0417; i++)
+	switch (nv_device(priv)->chipset) {
+	case 0xd9:
+	case 0xd7:
+		for (i = 0x400; i <= 0x417; i++)
 			nv_icmd(priv, i, 0x00000040);
+		break;
+	case 0xc0:
+	case 0xc3:
+	case 0xc4:
+	case 0xc1:
+	case 0xc8:
+	case 0xce:
+	case 0xcf:
+		break;
+	default:
+		BUG_ON(1);
+		break;
 	}
 	nv_icmd(priv, 0x00000218, 0x0000c080);
 	nv_icmd(priv, 0x00000219, 0x0000c080);
@@ -2221,9 +2598,23 @@ nvc0_grctx_generate(struct nvc0_graph_priv *priv)
 	nv_icmd(priv, 0x0000021d, 0x0000c080);
 	nv_icmd(priv, 0x0000021e, 0x0000c080);
 	nv_icmd(priv, 0x0000021f, 0x0000c080);
-	if (nv_device(priv)->chipset >= 0xd0) {
-		for (i = 0x0440; i <= 0x0457; i++)
+	switch (nv_device(priv)->chipset) {
+	case 0xd9:
+	case 0xd7:
+		for (i = 0x440; i <= 0x457; i++)
 			nv_icmd(priv, i, 0x0000c080);
+		break;
+	case 0xc0:
+	case 0xc3:
+	case 0xc4:
+	case 0xc1:
+	case 0xc8:
+	case 0xce:
+	case 0xcf:
+		break;
+	default:
+		BUG_ON(1);
+		break;
 	}
 	nv_icmd(priv, 0x000000ad, 0x0000013e);
 	nv_icmd(priv, 0x000000e1, 0x00000010);
@@ -2787,9 +3178,23 @@ nvc0_grctx_generate(struct nvc0_graph_priv *priv)
 	nv_icmd(priv, 0x0000053f, 0xffff0000);
 	nv_icmd(priv, 0x00000585, 0x0000003f);
 	nv_icmd(priv, 0x00000576, 0x00000003);
-	if (nv_device(priv)->chipset == 0xc1 ||
-	    nv_device(priv)->chipset >= 0xd0)
+	switch (nv_device(priv)->chipset) {
+	case 0xc1:
+	case 0xc8:
+	case 0xd9:
+	case 0xd7:
 		nv_icmd(priv, 0x0000057b, 0x00000059);
+		break;
+	case 0xc0:
+	case 0xc3:
+	case 0xc4:
+	case 0xce:
+	case 0xcf:
+		break;
+	default:
+		BUG_ON(1);
+		break;
+	}
 	nv_icmd(priv, 0x00000586, 0x00000040);
 	nv_icmd(priv, 0x00000582, 0x00000080);
 	nv_icmd(priv, 0x00000583, 0x00000080);
@@ -2890,8 +3295,23 @@ nvc0_grctx_generate(struct nvc0_graph_priv *priv)
 	nv_icmd(priv, 0x00000957, 0x00000003);
 	nv_icmd(priv, 0x0000095e, 0x20164010);
 	nv_icmd(priv, 0x0000095f, 0x00000020);
-	if (nv_device(priv)->chipset >= 0xd0)
+	switch (nv_device(priv)->chipset) {
+	case 0xd9:
+	case 0xd7:
+	case 0xc8:
 		nv_icmd(priv, 0x0000097d, 0x00000020);
+		break;
+	case 0xc0:
+	case 0xc3:
+	case 0xc4:
+	case 0xc1:
+	case 0xce:
+	case 0xcf:
+		break;
+	default:
+		BUG_ON(1);
+		break;
+	}
 	nv_icmd(priv, 0x00000683, 0x00000006);
 	nv_icmd(priv, 0x00000685, 0x003fffff);
 	nv_icmd(priv, 0x00000687, 0x00000c48);
@@ -3020,6 +3440,8 @@ nvc0_grctx_generate(struct nvc0_graph_priv *priv)
 	nv_icmd(priv, 0x00000825, 0x00000100);
 	nv_icmd(priv, 0x00000826, 0x00000001);
 	nv_icmd(priv, 0x0001e100, 0x00000001);
+
+
 	nv_wr32(priv, 0x400208, 0x00000000);
 	nv_wr32(priv, 0x404154, 0x00000400);
 
@@ -3032,6 +3454,25 @@ nvc0_grctx_generate(struct nvc0_graph_priv *priv)
 	nvc0_grctx_generate_9039(priv);
 	nvc0_grctx_generate_90c0(priv);
 
+	switch (nv_device(priv)->chipset) {
+	case 0xc0:
+	case 0xc3:
+	case 0xc4:
+	case 0xc1:
+	case 0xc8:
+	case 0xce:
+	case 0xcf:
+		nv_mthd(priv, 0x902d, 0x3410, 0x00000000);
+		break;
+	case 0xd9:
+	case 0xd7:
+		nv_mthd(priv, 0x902d, 0x3410, 0x80002006);
+		break;
+	default:
+		BUG_ON(1);
+		break;
+	}
+
 	nv_wr32(priv, 0x000260, r000260);
 
 	return nvc0_grctx_fini(&info);

+ 266 - 135
drivers/gpu/drm/nouveau/core/engine/graph/ctxnve0.c

@@ -749,31 +749,6 @@ nve0_grctx_generate_icmd(struct nvc0_graph_priv *priv)
 	nv_icmd(priv, 0x000841, 0x08000080);
 	nv_icmd(priv, 0x000842, 0x00400008);
 	nv_icmd(priv, 0x000843, 0x08000080);
-	nv_icmd(priv, 0x000818, 0x00000000);
-	nv_icmd(priv, 0x000819, 0x00000000);
-	nv_icmd(priv, 0x00081a, 0x00000000);
-	nv_icmd(priv, 0x00081b, 0x00000000);
-	nv_icmd(priv, 0x00081c, 0x00000000);
-	nv_icmd(priv, 0x00081d, 0x00000000);
-	nv_icmd(priv, 0x00081e, 0x00000000);
-	nv_icmd(priv, 0x00081f, 0x00000000);
-	nv_icmd(priv, 0x000848, 0x00000000);
-	nv_icmd(priv, 0x000849, 0x00000000);
-	nv_icmd(priv, 0x00084a, 0x00000000);
-	nv_icmd(priv, 0x00084b, 0x00000000);
-	nv_icmd(priv, 0x00084c, 0x00000000);
-	nv_icmd(priv, 0x00084d, 0x00000000);
-	nv_icmd(priv, 0x00084e, 0x00000000);
-	nv_icmd(priv, 0x00084f, 0x00000000);
-	nv_icmd(priv, 0x000850, 0x00000000);
-	nv_icmd(priv, 0x000851, 0x00000000);
-	nv_icmd(priv, 0x000852, 0x00000000);
-	nv_icmd(priv, 0x000853, 0x00000000);
-	nv_icmd(priv, 0x000854, 0x00000000);
-	nv_icmd(priv, 0x000855, 0x00000000);
-	nv_icmd(priv, 0x000856, 0x00000000);
-	nv_icmd(priv, 0x000857, 0x00000000);
-	nv_icmd(priv, 0x000738, 0x00000000);
 	nv_icmd(priv, 0x0006aa, 0x00000001);
 	nv_icmd(priv, 0x0006ab, 0x00000002);
 	nv_icmd(priv, 0x0006ac, 0x00000080);
@@ -862,31 +837,6 @@ nve0_grctx_generate_icmd(struct nvc0_graph_priv *priv)
 	nv_icmd(priv, 0x000813, 0x00000006);
 	nv_icmd(priv, 0x000814, 0x00000008);
 	nv_icmd(priv, 0x000957, 0x00000003);
-	nv_icmd(priv, 0x000818, 0x00000000);
-	nv_icmd(priv, 0x000819, 0x00000000);
-	nv_icmd(priv, 0x00081a, 0x00000000);
-	nv_icmd(priv, 0x00081b, 0x00000000);
-	nv_icmd(priv, 0x00081c, 0x00000000);
-	nv_icmd(priv, 0x00081d, 0x00000000);
-	nv_icmd(priv, 0x00081e, 0x00000000);
-	nv_icmd(priv, 0x00081f, 0x00000000);
-	nv_icmd(priv, 0x000848, 0x00000000);
-	nv_icmd(priv, 0x000849, 0x00000000);
-	nv_icmd(priv, 0x00084a, 0x00000000);
-	nv_icmd(priv, 0x00084b, 0x00000000);
-	nv_icmd(priv, 0x00084c, 0x00000000);
-	nv_icmd(priv, 0x00084d, 0x00000000);
-	nv_icmd(priv, 0x00084e, 0x00000000);
-	nv_icmd(priv, 0x00084f, 0x00000000);
-	nv_icmd(priv, 0x000850, 0x00000000);
-	nv_icmd(priv, 0x000851, 0x00000000);
-	nv_icmd(priv, 0x000852, 0x00000000);
-	nv_icmd(priv, 0x000853, 0x00000000);
-	nv_icmd(priv, 0x000854, 0x00000000);
-	nv_icmd(priv, 0x000855, 0x00000000);
-	nv_icmd(priv, 0x000856, 0x00000000);
-	nv_icmd(priv, 0x000857, 0x00000000);
-	nv_icmd(priv, 0x000738, 0x00000000);
 	nv_icmd(priv, 0x000b07, 0x00000002);
 	nv_icmd(priv, 0x000b08, 0x00000100);
 	nv_icmd(priv, 0x000b09, 0x00000100);
@@ -2162,12 +2112,30 @@ nve0_grctx_generate_902d(struct nvc0_graph_priv *priv)
 	nv_mthd(priv, 0x902d, 0x0244, 0x00000080);
 	nv_mthd(priv, 0x902d, 0x0248, 0x00000100);
 	nv_mthd(priv, 0x902d, 0x024c, 0x00000100);
-	nv_mthd(priv, 0x902d, 0x3410, 0x00000000);
+	switch (nv_device(priv)->chipset) {
+	case 0xe6:
+		nv_mthd(priv, 0x902d, 0x3410, 0x80002006);
+		break;
+	case 0xe4:
+	case 0xe7:
+	default:
+		nv_mthd(priv, 0x902d, 0x3410, 0x00000000);
+		break;
+	}
 }
 
 static void
 nve0_graph_generate_unk40xx(struct nvc0_graph_priv *priv)
 {
+	switch (nv_device(priv)->chipset) {
+	case 0xf0:
+		nv_wr32(priv, 0x404004, 0x00000000);
+		nv_wr32(priv, 0x404008, 0x00000000);
+		nv_wr32(priv, 0x40400c, 0x00000000);
+		break;
+	default:
+		break;
+	}
 	nv_wr32(priv, 0x404010, 0x0);
 	nv_wr32(priv, 0x404014, 0x0);
 	nv_wr32(priv, 0x404018, 0x0);
@@ -2175,6 +2143,19 @@ nve0_graph_generate_unk40xx(struct nvc0_graph_priv *priv)
 	nv_wr32(priv, 0x404020, 0x0);
 	nv_wr32(priv, 0x404024, 0xe000);
 	nv_wr32(priv, 0x404028, 0x0);
+	switch (nv_device(priv)->chipset) {
+	case 0xf0:
+		nv_wr32(priv, 0x40402c, 0x00000000);
+		nv_wr32(priv, 0x404030, 0x00000000);
+		nv_wr32(priv, 0x404034, 0x00000000);
+		nv_wr32(priv, 0x404038, 0x00000000);
+		nv_wr32(priv, 0x40403c, 0x00000000);
+		nv_wr32(priv, 0x404040, 0x00000000);
+		nv_wr32(priv, 0x404044, 0x00000000);
+		break;
+	default:
+		break;
+	}
 	nv_wr32(priv, 0x4040a8, 0x0);
 	nv_wr32(priv, 0x4040ac, 0x0);
 	nv_wr32(priv, 0x4040b0, 0x0);
@@ -2192,6 +2173,22 @@ nve0_graph_generate_unk40xx(struct nvc0_graph_priv *priv)
 	nv_wr32(priv, 0x4040e4, 0x0);
 	nv_wr32(priv, 0x4040e8, 0x1000);
 	nv_wr32(priv, 0x4040f8, 0x0);
+	switch (nv_device(priv)->chipset) {
+	case 0xf0:
+		nv_wr32(priv, 0x404100, 0x00000000);
+		nv_wr32(priv, 0x404104, 0x00000000);
+		nv_wr32(priv, 0x404108, 0x00000000);
+		nv_wr32(priv, 0x40410c, 0x00000000);
+		nv_wr32(priv, 0x404110, 0x00000000);
+		nv_wr32(priv, 0x404114, 0x00000000);
+		nv_wr32(priv, 0x404118, 0x00000000);
+		nv_wr32(priv, 0x40411c, 0x00000000);
+		nv_wr32(priv, 0x404120, 0x00000000);
+		nv_wr32(priv, 0x404124, 0x00000000);
+		break;
+	default:
+		break;
+	}
 	nv_wr32(priv, 0x404130, 0x0);
 	nv_wr32(priv, 0x404134, 0x0);
 	nv_wr32(priv, 0x404138, 0x20000040);
@@ -2199,14 +2196,32 @@ nve0_graph_generate_unk40xx(struct nvc0_graph_priv *priv)
 	nv_wr32(priv, 0x404154, 0x400);
 	nv_wr32(priv, 0x404158, 0x200);
 	nv_wr32(priv, 0x404164, 0x55);
+	switch (nv_device(priv)->chipset) {
+	case 0xf0:
+		nv_wr32(priv, 0x40417c, 0x00000000);
+		nv_wr32(priv, 0x404180, 0x00000000);
+		break;
+	default:
+		break;
+	}
 	nv_wr32(priv, 0x4041a0, 0x0);
 	nv_wr32(priv, 0x4041a4, 0x0);
 	nv_wr32(priv, 0x4041a8, 0x0);
 	nv_wr32(priv, 0x4041ac, 0x0);
-	nv_wr32(priv, 0x404200, 0x0);
-	nv_wr32(priv, 0x404204, 0x0);
-	nv_wr32(priv, 0x404208, 0x0);
-	nv_wr32(priv, 0x40420c, 0x0);
+	switch (nv_device(priv)->chipset) {
+	case 0xf0:
+		nv_wr32(priv, 0x404200, 0xa197);
+		nv_wr32(priv, 0x404204, 0xa1c0);
+		nv_wr32(priv, 0x404208, 0xa140);
+		nv_wr32(priv, 0x40420c, 0x902d);
+		break;
+	default:
+		nv_wr32(priv, 0x404200, 0x0);
+		nv_wr32(priv, 0x404204, 0x0);
+		nv_wr32(priv, 0x404208, 0x0);
+		nv_wr32(priv, 0x40420c, 0x0);
+		break;
+	}
 }
 
 static void
@@ -2224,7 +2239,13 @@ nve0_graph_generate_unk44xx(struct nvc0_graph_priv *priv)
 	nv_wr32(priv, 0x404428, 0x0);
 	nv_wr32(priv, 0x40442c, 0x0);
 	nv_wr32(priv, 0x404430, 0x0);
-	nv_wr32(priv, 0x404434, 0x0);
+	switch (nv_device(priv)->chipset) {
+	case 0xf0:
+		break;
+	default:
+		nv_wr32(priv, 0x404434, 0x0);
+		break;
+	}
 	nv_wr32(priv, 0x404438, 0x0);
 	nv_wr32(priv, 0x404460, 0x0);
 	nv_wr32(priv, 0x404464, 0x0);
@@ -2310,14 +2331,33 @@ nve0_graph_generate_unk58xx(struct nvc0_graph_priv *priv)
 	nv_wr32(priv, 0x405a00, 0x0);
 	nv_wr32(priv, 0x405a04, 0x0);
 	nv_wr32(priv, 0x405a18, 0x0);
+}
+
+static void
+nve0_graph_generate_unk5bxx(struct nvc0_graph_priv *priv)
+{
 	nv_wr32(priv, 0x405b00, 0x0);
 	nv_wr32(priv, 0x405b10, 0x1000);
+	switch (nv_device(priv)->chipset) {
+	case 0xf0:
+		nv_wr32(priv, 0x405b20, 0x04000000);
+		break;
+	default:
+		break;
+	}
 }
 
 static void
 nve0_graph_generate_unk60xx(struct nvc0_graph_priv *priv)
 {
-	nv_wr32(priv, 0x406020, 0x4103c1);
+	switch (nv_device(priv)->chipset) {
+	case 0xf0:
+		nv_wr32(priv, 0x406020, 0x34103c1);
+		break;
+	default:
+		nv_wr32(priv, 0x406020, 0x4103c1);
+		break;
+	}
 	nv_wr32(priv, 0x406028, 0x1);
 	nv_wr32(priv, 0x40602c, 0x1);
 	nv_wr32(priv, 0x406030, 0x1);
@@ -2329,11 +2369,27 @@ nve0_graph_generate_unk64xx(struct nvc0_graph_priv *priv)
 {
 	nv_wr32(priv, 0x4064a8, 0x0);
 	nv_wr32(priv, 0x4064ac, 0x3fff);
+	switch (nv_device(priv)->chipset) {
+	case 0xf0:
+		nv_wr32(priv, 0x4064b0, 0x0);
+		break;
+	default:
+		break;
+	}
 	nv_wr32(priv, 0x4064b4, 0x0);
 	nv_wr32(priv, 0x4064b8, 0x0);
-	nv_wr32(priv, 0x4064c0, 0x801a00f0);
-	nv_wr32(priv, 0x4064c4, 0x192ffff);
-	nv_wr32(priv, 0x4064c8, 0x1800600);
+	switch (nv_device(priv)->chipset) {
+	case 0xf0:
+		nv_wr32(priv, 0x4064c0, 0x802000f0);
+		nv_wr32(priv, 0x4064c4, 0x192ffff);
+		nv_wr32(priv, 0x4064c8, 0x18007c0);
+		break;
+	default:
+		nv_wr32(priv, 0x4064c0, 0x801a00f0);
+		nv_wr32(priv, 0x4064c4, 0x192ffff);
+		nv_wr32(priv, 0x4064c8, 0x1800600);
+		break;
+	}
 	nv_wr32(priv, 0x4064cc, 0x0);
 	nv_wr32(priv, 0x4064d0, 0x0);
 	nv_wr32(priv, 0x4064d4, 0x0);
@@ -2349,7 +2405,13 @@ nve0_graph_generate_unk64xx(struct nvc0_graph_priv *priv)
 static void
 nve0_graph_generate_unk70xx(struct nvc0_graph_priv *priv)
 {
-	nv_wr32(priv, 0x407040, 0x0);
+	switch (nv_device(priv)->chipset) {
+	case 0xf0:
+		break;
+	default:
+		nv_wr32(priv, 0x407040, 0x0);
+		break;
+	}
 }
 
 static void
@@ -2381,9 +2443,23 @@ nve0_graph_generate_unk80xx(struct nvc0_graph_priv *priv)
 static void
 nve0_graph_generate_unk88xx(struct nvc0_graph_priv *priv)
 {
-	nv_wr32(priv, 0x408800, 0x2802a3c);
+	switch (nv_device(priv)->chipset) {
+	case 0xf0:
+		nv_wr32(priv, 0x408800, 0x12802a3c);
+		break;
+	default:
+		nv_wr32(priv, 0x408800, 0x2802a3c);
+		break;
+	}
 	nv_wr32(priv, 0x408804, 0x40);
-	nv_wr32(priv, 0x408808, 0x1043e005);
+	switch (nv_device(priv)->chipset) {
+	case 0xf0:
+		nv_wr32(priv, 0x408808, 0x1003e005);
+		break;
+	default:
+		nv_wr32(priv, 0x408808, 0x1043e005);
+		break;
+	}
 	nv_wr32(priv, 0x408840, 0xb);
 	nv_wr32(priv, 0x408900, 0x3080b801);
 	nv_wr32(priv, 0x408904, 0x62000001);
@@ -2394,6 +2470,8 @@ nve0_graph_generate_unk88xx(struct nvc0_graph_priv *priv)
 static void
 nve0_graph_generate_gpc(struct nvc0_graph_priv *priv)
 {
+	int i;
+
 	nv_wr32(priv, 0x418380, 0x16);
 	nv_wr32(priv, 0x418400, 0x38004e00);
 	nv_wr32(priv, 0x418404, 0x71e0ffff);
@@ -2418,7 +2496,14 @@ nve0_graph_generate_gpc(struct nvc0_graph_priv *priv)
 	nv_wr32(priv, 0x418710, 0x0);
 	nv_wr32(priv, 0x418800, 0x7006860a);
 	nv_wr32(priv, 0x418808, 0x0);
-	nv_wr32(priv, 0x41880c, 0x0);
+	switch (nv_device(priv)->chipset) {
+	case 0xf0:
+		nv_wr32(priv, 0x41880c, 0x30);
+		break;
+	default:
+		nv_wr32(priv, 0x41880c, 0x0);
+		break;
+	}
 	nv_wr32(priv, 0x418810, 0x0);
 	nv_wr32(priv, 0x418828, 0x44);
 	nv_wr32(priv, 0x418830, 0x10000001);
@@ -2434,62 +2519,15 @@ nve0_graph_generate_gpc(struct nvc0_graph_priv *priv)
 	nv_wr32(priv, 0x418924, 0x0);
 	nv_wr32(priv, 0x418928, 0xffff00);
 	nv_wr32(priv, 0x41892c, 0xff00);
-	nv_wr32(priv, 0x418a00, 0x0);
-	nv_wr32(priv, 0x418a04, 0x0);
-	nv_wr32(priv, 0x418a08, 0x0);
-	nv_wr32(priv, 0x418a0c, 0x10000);
-	nv_wr32(priv, 0x418a10, 0x0);
-	nv_wr32(priv, 0x418a14, 0x0);
-	nv_wr32(priv, 0x418a18, 0x0);
-	nv_wr32(priv, 0x418a20, 0x0);
-	nv_wr32(priv, 0x418a24, 0x0);
-	nv_wr32(priv, 0x418a28, 0x0);
-	nv_wr32(priv, 0x418a2c, 0x10000);
-	nv_wr32(priv, 0x418a30, 0x0);
-	nv_wr32(priv, 0x418a34, 0x0);
-	nv_wr32(priv, 0x418a38, 0x0);
-	nv_wr32(priv, 0x418a40, 0x0);
-	nv_wr32(priv, 0x418a44, 0x0);
-	nv_wr32(priv, 0x418a48, 0x0);
-	nv_wr32(priv, 0x418a4c, 0x10000);
-	nv_wr32(priv, 0x418a50, 0x0);
-	nv_wr32(priv, 0x418a54, 0x0);
-	nv_wr32(priv, 0x418a58, 0x0);
-	nv_wr32(priv, 0x418a60, 0x0);
-	nv_wr32(priv, 0x418a64, 0x0);
-	nv_wr32(priv, 0x418a68, 0x0);
-	nv_wr32(priv, 0x418a6c, 0x10000);
-	nv_wr32(priv, 0x418a70, 0x0);
-	nv_wr32(priv, 0x418a74, 0x0);
-	nv_wr32(priv, 0x418a78, 0x0);
-	nv_wr32(priv, 0x418a80, 0x0);
-	nv_wr32(priv, 0x418a84, 0x0);
-	nv_wr32(priv, 0x418a88, 0x0);
-	nv_wr32(priv, 0x418a8c, 0x10000);
-	nv_wr32(priv, 0x418a90, 0x0);
-	nv_wr32(priv, 0x418a94, 0x0);
-	nv_wr32(priv, 0x418a98, 0x0);
-	nv_wr32(priv, 0x418aa0, 0x0);
-	nv_wr32(priv, 0x418aa4, 0x0);
-	nv_wr32(priv, 0x418aa8, 0x0);
-	nv_wr32(priv, 0x418aac, 0x10000);
-	nv_wr32(priv, 0x418ab0, 0x0);
-	nv_wr32(priv, 0x418ab4, 0x0);
-	nv_wr32(priv, 0x418ab8, 0x0);
-	nv_wr32(priv, 0x418ac0, 0x0);
-	nv_wr32(priv, 0x418ac4, 0x0);
-	nv_wr32(priv, 0x418ac8, 0x0);
-	nv_wr32(priv, 0x418acc, 0x10000);
-	nv_wr32(priv, 0x418ad0, 0x0);
-	nv_wr32(priv, 0x418ad4, 0x0);
-	nv_wr32(priv, 0x418ad8, 0x0);
-	nv_wr32(priv, 0x418ae0, 0x0);
-	nv_wr32(priv, 0x418ae4, 0x0);
-	nv_wr32(priv, 0x418ae8, 0x0);
-	nv_wr32(priv, 0x418aec, 0x10000);
-	nv_wr32(priv, 0x418af0, 0x0);
-	nv_wr32(priv, 0x418af4, 0x0);
-	nv_wr32(priv, 0x418af8, 0x0);
+	for (i = 0; i < 8; i++) {
+		nv_wr32(priv, 0x418a00 + (i * 0x20), 0x0);
+		nv_wr32(priv, 0x418a04 + (i * 0x20), 0x0);
+		nv_wr32(priv, 0x418a08 + (i * 0x20), 0x0);
+		nv_wr32(priv, 0x418a0c + (i * 0x20), 0x10000);
+		nv_wr32(priv, 0x418a10 + (i * 0x20), 0x0);
+		nv_wr32(priv, 0x418a14 + (i * 0x20), 0x0);
+		nv_wr32(priv, 0x418a18 + (i * 0x20), 0x0);
+	}
 	nv_wr32(priv, 0x418b00, 0x6);
 	nv_wr32(priv, 0x418b08, 0xa418820);
 	nv_wr32(priv, 0x418b0c, 0x62080e6);
@@ -2511,6 +2549,13 @@ nve0_graph_generate_gpc(struct nvc0_graph_priv *priv)
 	nv_wr32(priv, 0x418c6c, 0x1);
 	nv_wr32(priv, 0x418c80, 0x20200004);
 	nv_wr32(priv, 0x418c8c, 0x1);
+	switch (nv_device(priv)->chipset) {
+	case 0xf0:
+		nv_wr32(priv, 0x418d24, 0x0);
+		break;
+	default:
+		break;
+	}
 	nv_wr32(priv, 0x419000, 0x780);
 	nv_wr32(priv, 0x419004, 0x0);
 	nv_wr32(priv, 0x419008, 0x0);
@@ -2530,31 +2575,71 @@ nve0_graph_generate_tpc(struct nvc0_graph_priv *priv)
 	nv_wr32(priv, 0x419a10, 0x0);
 	nv_wr32(priv, 0x419a14, 0x200);
 	nv_wr32(priv, 0x419a1c, 0xc000);
-	nv_wr32(priv, 0x419a20, 0x800);
+	switch (nv_device(priv)->chipset) {
+	case 0xf0:
+		nv_wr32(priv, 0x419a20, 0x20800);
+		break;
+	default:
+		nv_wr32(priv, 0x419a20, 0x800);
+		break;
+	}
 	nv_wr32(priv, 0x419a30, 0x1);
 	nv_wr32(priv, 0x419ac4, 0x37f440);
-	nv_wr32(priv, 0x419c00, 0xa);
+	switch (nv_device(priv)->chipset) {
+	case 0xf0:
+		nv_wr32(priv, 0x419c00, 0x1a);
+		break;
+	default:
+		nv_wr32(priv, 0x419c00, 0xa);
+		break;
+	}
 	nv_wr32(priv, 0x419c04, 0x80000006);
 	nv_wr32(priv, 0x419c08, 0x2);
 	nv_wr32(priv, 0x419c20, 0x0);
 	nv_wr32(priv, 0x419c24, 0x84210);
 	nv_wr32(priv, 0x419c28, 0x3efbefbe);
 	nv_wr32(priv, 0x419ce8, 0x0);
-	nv_wr32(priv, 0x419cf4, 0x3203);
-	nv_wr32(priv, 0x419e04, 0x0);
-	nv_wr32(priv, 0x419e08, 0x0);
-	nv_wr32(priv, 0x419e0c, 0x0);
-	nv_wr32(priv, 0x419e10, 0x402);
+	switch (nv_device(priv)->chipset) {
+	case 0xf0:
+		nv_wr32(priv, 0x419cf4, 0x203);
+		nv_wr32(priv, 0x419e04, 0x0);
+		nv_wr32(priv, 0x419e08, 0x1d);
+		nv_wr32(priv, 0x419e0c, 0x0);
+		nv_wr32(priv, 0x419e10, 0x1c02);
+
+		break;
+	default:
+		nv_wr32(priv, 0x419cf4, 0x3203);
+		nv_wr32(priv, 0x419e04, 0x0);
+		nv_wr32(priv, 0x419e08, 0x0);
+		nv_wr32(priv, 0x419e0c, 0x0);
+		nv_wr32(priv, 0x419e10, 0x402);
+		break;
+	}
 	nv_wr32(priv, 0x419e44, 0x13eff2);
 	nv_wr32(priv, 0x419e48, 0x0);
 	nv_wr32(priv, 0x419e4c, 0x7f);
 	nv_wr32(priv, 0x419e50, 0x0);
 	nv_wr32(priv, 0x419e54, 0x0);
-	nv_wr32(priv, 0x419e58, 0x0);
+	switch (nv_device(priv)->chipset) {
+	case 0xf0:
+		nv_wr32(priv, 0x419e58, 0x1);
+		break;
+	default:
+		nv_wr32(priv, 0x419e58, 0x0);
+		break;
+	}
 	nv_wr32(priv, 0x419e5c, 0x0);
 	nv_wr32(priv, 0x419e60, 0x0);
 	nv_wr32(priv, 0x419e64, 0x0);
-	nv_wr32(priv, 0x419e68, 0x0);
+	switch (nv_device(priv)->chipset) {
+	case 0xf0:
+		nv_wr32(priv, 0x419e68, 0x2);
+		break;
+	default:
+		nv_wr32(priv, 0x419e68, 0x0);
+		break;
+	}
 	nv_wr32(priv, 0x419e6c, 0x0);
 	nv_wr32(priv, 0x419e70, 0x0);
 	nv_wr32(priv, 0x419e74, 0x0);
@@ -2567,28 +2652,73 @@ nve0_graph_generate_tpc(struct nvc0_graph_priv *priv)
 	nv_wr32(priv, 0x419e90, 0x0);
 	nv_wr32(priv, 0x419e94, 0x0);
 	nv_wr32(priv, 0x419e98, 0x0);
-	nv_wr32(priv, 0x419eac, 0x1fcf);
-	nv_wr32(priv, 0x419eb0, 0xd3f);
+	switch (nv_device(priv)->chipset) {
+	case 0xe4:
+	case 0xe7:
+	case 0xe6:
+		nv_wr32(priv, 0x419eac, 0x1f8f);
+		nv_wr32(priv, 0x419eb0, 0xd3f);
+		break;
+	case 0xf0:
+		nv_wr32(priv, 0x419eac, 0x1fcf);
+		nv_wr32(priv, 0x419eb0, 0xdb00da0);
+		nv_wr32(priv, 0x419eb8, 0x0);
+		break;
+	}
 	nv_wr32(priv, 0x419ec8, 0x1304f);
 	nv_wr32(priv, 0x419f30, 0x0);
 	nv_wr32(priv, 0x419f34, 0x0);
 	nv_wr32(priv, 0x419f38, 0x0);
 	nv_wr32(priv, 0x419f3c, 0x0);
-	nv_wr32(priv, 0x419f40, 0x0);
+	switch (nv_device(priv)->chipset) {
+	case 0xf0:
+		nv_wr32(priv, 0x419f40, 0x18);
+		break;
+	default:
+		nv_wr32(priv, 0x419f40, 0x0);
+		break;
+	}
 	nv_wr32(priv, 0x419f44, 0x0);
 	nv_wr32(priv, 0x419f48, 0x0);
 	nv_wr32(priv, 0x419f4c, 0x0);
 	nv_wr32(priv, 0x419f58, 0x0);
-	nv_wr32(priv, 0x419f78, 0xb);
+	switch (nv_device(priv)->chipset) {
+	case 0xe4:
+	case 0xe7:
+	case 0xe6:
+		nv_wr32(priv, 0x419f70, 0x0);
+		nv_wr32(priv, 0x419f78, 0xb);
+		nv_wr32(priv, 0x419f7c, 0x27a);
+		break;
+	case 0xf0:
+		nv_wr32(priv, 0x419f70, 0x7300);
+		nv_wr32(priv, 0x419f78, 0xeb);
+		nv_wr32(priv, 0x419f7c, 0x404);
+		break;
+	}
 }
 
 static void
 nve0_graph_generate_tpcunk(struct nvc0_graph_priv *priv)
 {
 	nv_wr32(priv, 0x41be24, 0x6);
-	nv_wr32(priv, 0x41bec0, 0x12180000);
+	switch (nv_device(priv)->chipset) {
+	case 0xf0:
+		nv_wr32(priv, 0x41bec0, 0x10000000);
+		break;
+	default:
+		nv_wr32(priv, 0x41bec0, 0x12180000);
+		break;
+	}
 	nv_wr32(priv, 0x41bec4, 0x37f7f);
-	nv_wr32(priv, 0x41bee4, 0x6480430);
+	switch (nv_device(priv)->chipset) {
+	case 0xf0:
+		nv_wr32(priv, 0x41bee4, 0x0);
+		break;
+	default:
+		nv_wr32(priv, 0x41bee4, 0x6480430);
+		break;
+	}
 	nv_wr32(priv, 0x41bf00, 0xa418820);
 	nv_wr32(priv, 0x41bf04, 0x62080e6);
 	nv_wr32(priv, 0x41bf08, 0x20398a4);
@@ -2624,6 +2754,7 @@ nve0_grctx_generate(struct nvc0_graph_priv *priv)
 	nve0_graph_generate_unk46xx(priv);
 	nve0_graph_generate_unk47xx(priv);
 	nve0_graph_generate_unk58xx(priv);
+	nve0_graph_generate_unk5bxx(priv);
 	nve0_graph_generate_unk60xx(priv);
 	nve0_graph_generate_unk64xx(priv);
 	nve0_graph_generate_unk70xx(priv);

+ 26 - 47
drivers/gpu/drm/nouveau/core/engine/graph/fuc/nvc0.fuc → drivers/gpu/drm/nouveau/core/engine/graph/fuc/com.fuc

@@ -23,42 +23,7 @@
  * Authors: Ben Skeggs
  */
 
-define(`mmctx_data', `.b32 eval((($2 - 1) << 26) | $1)')
-define(`queue_init', `.skip eval((2 * 4) + ((8 * 4) * 2))')
-
-ifdef(`include_code', `
-// Error codes
-define(`E_BAD_COMMAND', 0x01)
-define(`E_CMD_OVERFLOW', 0x02)
-
-// Util macros to help with debugging ucode hangs etc
-define(`T_WAIT', 0)
-define(`T_MMCTX', 1)
-define(`T_STRWAIT', 2)
-define(`T_STRINIT', 3)
-define(`T_AUTO', 4)
-define(`T_CHAN', 5)
-define(`T_LOAD', 6)
-define(`T_SAVE', 7)
-define(`T_LCHAN', 8)
-define(`T_LCTXH', 9)
-
-define(`trace_set', `
-	mov $r8 0x83c
-	shl b32 $r8 6
-	clear b32 $r9
-	bset $r9 $1
-	iowr I[$r8 + 0x000] $r9		// CC_SCRATCH[7]
-')
-
-define(`trace_clr', `
-	mov $r8 0x85c
-	shl b32 $r8 6
-	clear b32 $r9
-	bset $r9 $1
-	iowr I[$r8 + 0x000] $r9		// CC_SCRATCH[7]
-')
-
+#ifdef INCLUDE_CODE
 // queue_put - add request to queue
 //
 // In : $r13 queue pointer
@@ -178,27 +143,41 @@ watchdog_clear:
 	iowr I[$r8 + 0x000] $r0
 	ret
 
-// wait_done{z,o} - wait on FUC_DONE bit to become clear/set
+// wait_donez - wait on FUC_DONE bit to become clear
+//
+// In : $r10 bit to wait on
+//
+wait_donez:
+	trace_set(T_WAIT);
+	mov $r8 0x818
+	shl b32 $r8 6
+	iowr I[$r8 + 0x000] $r10
+	wait_donez_ne:
+		mov $r8 0x400
+		shl b32 $r8 6
+		iord $r8 I[$r8 + 0x000]
+		xbit $r8 $r8 $r10
+		bra ne #wait_donez_ne
+	trace_clr(T_WAIT)
+	ret
+
+// wait_doneo - wait on FUC_DONE bit to become set
 //
 // In : $r10 bit to wait on
 //
-define(`wait_done', `
-$1:
+wait_doneo:
 	trace_set(T_WAIT);
 	mov $r8 0x818
 	shl b32 $r8 6
-	iowr I[$r8 + 0x000] $r10	// CC_SCRATCH[6] = wait bit
-	wait_done_$1:
+	iowr I[$r8 + 0x000] $r10
+	wait_doneo_e:
 		mov $r8 0x400
 		shl b32 $r8 6
-		iord $r8 I[$r8 + 0x000]	// DONE
+		iord $r8 I[$r8 + 0x000]
 		xbit $r8 $r8 $r10
-		bra $2 #wait_done_$1
+		bra e #wait_doneo_e
 	trace_clr(T_WAIT)
 	ret
-')
-wait_done(wait_donez, ne)
-wait_done(wait_doneo, e)
 
 // mmctx_size - determine size of a mmio list transfer
 //
@@ -397,4 +376,4 @@ strand_ctx_init:
 	sub b32 $r15 $r14 $r15
 	trace_clr(T_STRINIT)
 	ret
-')
+#endif

+ 369 - 0
drivers/gpu/drm/nouveau/core/engine/graph/fuc/gpc.fuc

@@ -0,0 +1,369 @@
+/* fuc microcode for nvc0 PGRAPH/GPC
+ *
+ * Copyright 2011 Red Hat Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: Ben Skeggs
+ */
+
+/* TODO
+ * - bracket certain functions with scratch writes, useful for debugging
+ * - watchdog timer around ctx operations
+ */
+
+#ifdef INCLUDE_DATA
+gpc_id:			.b32 0
+gpc_mmio_list_head:	.b32 0
+gpc_mmio_list_tail:	.b32 0
+
+tpc_count:		.b32 0
+tpc_mask:		.b32 0
+tpc_mmio_list_head:	.b32 0
+tpc_mmio_list_tail:	.b32 0
+
+cmd_queue:		queue_init
+#endif
+
+#ifdef INCLUDE_CODE
+// reports an exception to the host
+//
+// In: $r15 error code (see nvc0.fuc)
+//
+error:
+	push $r14
+	mov $r14 -0x67ec 	// 0x9814
+	sethi $r14 0x400000
+	call #nv_wr32		// HUB_CTXCTL_CC_SCRATCH[5] = error code
+	add b32 $r14 0x41c
+	mov $r15 1
+	call #nv_wr32		// HUB_CTXCTL_INTR_UP_SET
+	pop $r14
+	ret
+
+// GPC fuc initialisation, executed by triggering ucode start, will
+// fall through to main loop after completion.
+//
+// Input:
+//   CC_SCRATCH[0]: chipset (PMC_BOOT_0 read returns 0x0bad0bad... sigh)
+//   CC_SCRATCH[1]: context base
+//
+// Output:
+//   CC_SCRATCH[0]:
+//	     31:31: set to signal completion
+//   CC_SCRATCH[1]:
+//	      31:0: GPC context size
+//
+init:
+	clear b32 $r0
+	mov $sp $r0
+
+	// enable fifo access
+	mov $r1 0x1200
+	mov $r2 2
+	iowr I[$r1 + 0x000] $r2		// FIFO_ENABLE
+
+	// setup i0 handler, and route all interrupts to it
+	mov $r1 #ih
+	mov $iv0 $r1
+	mov $r1 0x400
+	iowr I[$r1 + 0x300] $r0		// INTR_DISPATCH
+
+	// enable fifo interrupt
+	mov $r2 4
+	iowr I[$r1 + 0x000] $r2		// INTR_EN_SET
+
+	// enable interrupts
+	bset $flags ie0
+
+	// figure out which GPC we are, and how many TPCs we have
+	mov $r1 0x608
+	shl b32 $r1 6
+	iord $r2 I[$r1 + 0x000]		// UNITS
+	mov $r3 1
+	and $r2 0x1f
+	shl b32 $r3 $r2
+	sub b32 $r3 1
+	st b32 D[$r0 + #tpc_count] $r2
+	st b32 D[$r0 + #tpc_mask] $r3
+	add b32 $r1 0x400
+	iord $r2 I[$r1 + 0x000]		// MYINDEX
+	st b32 D[$r0 + #gpc_id] $r2
+
+	// find context data for this chipset
+	mov $r2 0x800
+	shl b32 $r2 6
+	iord $r2 I[$r2 + 0x000]		// CC_SCRATCH[0]
+	mov $r1 #chipsets - 12
+	init_find_chipset:
+		add b32 $r1 12
+		ld b32 $r3 D[$r1 + 0x00]
+		cmpu b32 $r3 $r2
+		bra e #init_context
+		cmpu b32 $r3 0
+		bra ne #init_find_chipset
+		// unknown chipset
+		ret
+
+	// initialise context base, and size tracking
+	init_context:
+	mov $r2 0x800
+	shl b32 $r2 6
+	iord $r2 I[$r2 + 0x100]	// CC_SCRATCH[1], initial base
+	clear b32 $r3		// track GPC context size here
+
+	// set mmctx base addresses now so we don't have to do it later,
+	// they don't currently ever change
+	mov $r4 0x700
+	shl b32 $r4 6
+	shr b32 $r5 $r2 8
+	iowr I[$r4 + 0x000] $r5		// MMCTX_SAVE_SWBASE
+	iowr I[$r4 + 0x100] $r5		// MMCTX_LOAD_SWBASE
+
+	// calculate GPC mmio context size, store the chipset-specific
+	// mmio list pointers somewhere we can get at them later without
+	// re-parsing the chipset list
+	clear b32 $r14
+	clear b32 $r15
+	ld b16 $r14 D[$r1 + 4]
+	ld b16 $r15 D[$r1 + 6]
+	st b16 D[$r0 + #gpc_mmio_list_head] $r14
+	st b16 D[$r0 + #gpc_mmio_list_tail] $r15
+	call #mmctx_size
+	add b32 $r2 $r15
+	add b32 $r3 $r15
+
+	// calculate per-TPC mmio context size, store the list pointers
+	ld b16 $r14 D[$r1 + 8]
+	ld b16 $r15 D[$r1 + 10]
+	st b16 D[$r0 + #tpc_mmio_list_head] $r14
+	st b16 D[$r0 + #tpc_mmio_list_tail] $r15
+	call #mmctx_size
+	ld b32 $r14 D[$r0 + #tpc_count]
+	mulu $r14 $r15
+	add b32 $r2 $r14
+	add b32 $r3 $r14
+
+	// round up base/size to 256 byte boundary (for strand SWBASE)
+	add b32 $r4 0x1300
+	shr b32 $r3 2
+	iowr I[$r4 + 0x000] $r3		// MMCTX_LOAD_COUNT, wtf for?!?
+	shr b32 $r2 8
+	shr b32 $r3 6
+	add b32 $r2 1
+	add b32 $r3 1
+	shl b32 $r2 8
+	shl b32 $r3 8
+
+	// calculate size of strand context data
+	mov b32 $r15 $r2
+	call #strand_ctx_init
+	add b32 $r3 $r15
+
+	// save context size, and tell HUB we're done
+	mov $r1 0x800
+	shl b32 $r1 6
+	iowr I[$r1 + 0x100] $r3		// CC_SCRATCH[1]  = context size
+	add b32 $r1 0x800
+	clear b32 $r2
+	bset $r2 31
+	iowr I[$r1 + 0x000] $r2		// CC_SCRATCH[0] |= 0x80000000
+
+// Main program loop, very simple, sleeps until woken up by the interrupt
+// handler, pulls a command from the queue and executes its handler
+//
+main:
+	bset $flags $p0
+	sleep $p0
+	mov $r13 #cmd_queue
+	call #queue_get
+	bra $p1 #main
+
+	// 0x0000-0x0003 are all context transfers
+	cmpu b32 $r14 0x04
+	bra nc #main_not_ctx_xfer
+		// fetch $flags and mask off $p1/$p2
+		mov $r1 $flags
+		mov $r2 0x0006
+		not b32 $r2
+		and $r1 $r2
+		// set $p1/$p2 according to transfer type
+		shl b32 $r14 1
+		or $r1 $r14
+		mov $flags $r1
+		// transfer context data
+		call #ctx_xfer
+		bra #main
+
+	main_not_ctx_xfer:
+	shl b32 $r15 $r14 16
+	or $r15 E_BAD_COMMAND
+	call #error
+	bra #main
+
+// interrupt handler
+ih:
+	push $r8
+	mov $r8 $flags
+	push $r8
+	push $r9
+	push $r10
+	push $r11
+	push $r13
+	push $r14
+	push $r15
+
+	// incoming fifo command?
+	iord $r10 I[$r0 + 0x200]	// INTR
+	and $r11 $r10 0x00000004
+	bra e #ih_no_fifo
+		// queue incoming fifo command for later processing
+		mov $r11 0x1900
+		mov $r13 #cmd_queue
+		iord $r14 I[$r11 + 0x100]	// FIFO_CMD
+		iord $r15 I[$r11 + 0x000]	// FIFO_DATA
+		call #queue_put
+		add b32 $r11 0x400
+		mov $r14 1
+		iowr I[$r11 + 0x000] $r14	// FIFO_ACK
+
+	// ack, and wake up main()
+	ih_no_fifo:
+	iowr I[$r0 + 0x100] $r10	// INTR_ACK
+
+	pop $r15
+	pop $r14
+	pop $r13
+	pop $r11
+	pop $r10
+	pop $r9
+	pop $r8
+	mov $flags $r8
+	pop $r8
+	bclr $flags $p0
+	iret
+
+// Set this GPC's bit in HUB_BAR, used to signal completion of various
+// activities to the HUB fuc
+//
+hub_barrier_done:
+	mov $r15 1
+	ld b32 $r14 D[$r0 + #gpc_id]
+	shl b32 $r15 $r14
+	mov $r14 -0x6be8 	// 0x409418 - HUB_BAR_SET
+	sethi $r14 0x400000
+	call #nv_wr32
+	ret
+
+// Disables various things, waits a bit, and re-enables them..
+//
+// Not sure how exactly this helps, perhaps "ENABLE" is not such a
+// good description for the bits we turn off?  Anyways, without this,
+// funny things happen.
+//
+ctx_redswitch:
+	mov $r14 0x614
+	shl b32 $r14 6
+	mov $r15 0x020
+	iowr I[$r14] $r15	// GPC_RED_SWITCH = POWER
+	mov $r15 8
+	ctx_redswitch_delay:
+		sub b32 $r15 1
+		bra ne #ctx_redswitch_delay
+	mov $r15 0xa20
+	iowr I[$r14] $r15	// GPC_RED_SWITCH = UNK11, ENABLE, POWER
+	ret
+
+// Transfer GPC context data between GPU and storage area
+//
+// In: $r15 context base address
+//     $p1 clear on save, set on load
+//     $p2 set if opposite direction done/will be done, so:
+//		on save it means: "a load will follow this save"
+//		on load it means: "a save preceeded this load"
+//
+ctx_xfer:
+	// set context base address
+	mov $r1 0xa04
+	shl b32 $r1 6
+	iowr I[$r1 + 0x000] $r15// MEM_BASE
+	bra not $p1 #ctx_xfer_not_load
+		call #ctx_redswitch
+	ctx_xfer_not_load:
+
+	// strands
+	mov $r1 0x4afc
+	sethi $r1 0x20000
+	mov $r2 0xc
+	iowr I[$r1] $r2		// STRAND_CMD(0x3f) = 0x0c
+	call #strand_wait
+	mov $r2 0x47fc
+	sethi $r2 0x20000
+	iowr I[$r2] $r0		// STRAND_FIRST_GENE(0x3f) = 0x00
+	xbit $r2 $flags $p1
+	add b32 $r2 3
+	iowr I[$r1] $r2		// STRAND_CMD(0x3f) = 0x03/0x04 (SAVE/LOAD)
+
+	// mmio context
+	xbit $r10 $flags $p1	// direction
+	or $r10 2		// first
+	mov $r11 0x0000
+	sethi $r11 0x500000
+	ld b32 $r12 D[$r0 + #gpc_id]
+	shl b32 $r12 15
+	add b32 $r11 $r12	// base = NV_PGRAPH_GPCn
+	ld b32 $r12 D[$r0 + #gpc_mmio_list_head]
+	ld b32 $r13 D[$r0 + #gpc_mmio_list_tail]
+	mov $r14 0		// not multi
+	call #mmctx_xfer
+
+	// per-TPC mmio context
+	xbit $r10 $flags $p1	// direction
+	or $r10 4		// last
+	mov $r11 0x4000
+	sethi $r11 0x500000	// base = NV_PGRAPH_GPC0_TPC0
+	ld b32 $r12 D[$r0 + #gpc_id]
+	shl b32 $r12 15
+	add b32 $r11 $r12	// base = NV_PGRAPH_GPCn_TPC0
+	ld b32 $r12 D[$r0 + #tpc_mmio_list_head]
+	ld b32 $r13 D[$r0 + #tpc_mmio_list_tail]
+	ld b32 $r15 D[$r0 + #tpc_mask]
+	mov $r14 0x800		// stride = 0x800
+	call #mmctx_xfer
+
+	// wait for strands to finish
+	call #strand_wait
+
+	// if load, or a save without a load following, do some
+	// unknown stuff that's done after finishing a block of
+	// strand commands
+	bra $p1 #ctx_xfer_post
+	bra not $p2 #ctx_xfer_done
+	ctx_xfer_post:
+		mov $r1 0x4afc
+		sethi $r1 0x20000
+		mov $r2 0xd
+		iowr I[$r1] $r2		// STRAND_CMD(0x3f) = 0x0d
+		call #strand_wait
+
+	// mark completion in HUB's barrier
+	ctx_xfer_done:
+	call #hub_barrier_done
+	ret
+#endif

+ 27 - 417
drivers/gpu/drm/nouveau/core/engine/graph/fuc/gpcnvc0.fuc

@@ -1,6 +1,5 @@
-/* fuc microcode for nvc0 PGRAPH/GPC
- *
- * Copyright 2011 Red Hat Inc.
+/*
+ * Copyright 2013 Red Hat Inc.
  *
  * Permission is hereby granted, free of charge, to any person obtaining a
  * copy of this software and associated documentation files (the "Software"),
@@ -20,32 +19,17 @@
  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
  * OTHER DEALINGS IN THE SOFTWARE.
  *
- * Authors: Ben Skeggs
- */
-
-/* To build:
- *    m4 gpcnvc0.fuc | envyas -a -w -m fuc -V fuc3 -o gpcnvc0.fuc.h
+ * Authors: Ben Skeggs <bskeggs@redhat.com>
  */
 
-/* TODO
- * - bracket certain functions with scratch writes, useful for debugging
- * - watchdog timer around ctx operations
- */
+#define NVGF
+#include "macros.fuc"
 
 .section #nvc0_grgpc_data
-include(`nvc0.fuc')
-gpc_id:			.b32 0
-gpc_mmio_list_head:	.b32 0
-gpc_mmio_list_tail:	.b32 0
-
-tpc_count:		.b32 0
-tpc_mask:		.b32 0
-tpc_mmio_list_head:	.b32 0
-tpc_mmio_list_tail:	.b32 0
-
-cmd_queue:		queue_init
+#define INCLUDE_DATA
+#include "com.fuc"
+#include "gpc.fuc"
 
-// chipset descriptions
 chipsets:
 .b8  0xc0 0 0 0
 .b16 #nvc0_gpc_mmio_head
@@ -81,23 +65,26 @@ chipsets:
 .b16 #nvc0_gpc_mmio_head
 .b16 #nvc0_gpc_mmio_tail
 .b16 #nvc0_tpc_mmio_head
-.b16 #nvcf_tpc_mmio_tail
+.b16 #nvc3_tpc_mmio_tail
 .b8  0xd9 0 0 0
 .b16 #nvd9_gpc_mmio_head
-.b16 #nvd9_gpc_mmio_tail
-.b16 #nvd9_tpc_mmio_head
+.b16 #nvc1_gpc_mmio_tail
+.b16 #nvc0_tpc_mmio_head
 .b16 #nvd9_tpc_mmio_tail
 .b8  0xd7 0 0 0
 .b16 #nvd9_gpc_mmio_head
-.b16 #nvd9_gpc_mmio_tail
-.b16 #nvd9_tpc_mmio_head
+.b16 #nvc1_gpc_mmio_tail
+.b16 #nvc0_tpc_mmio_head
 .b16 #nvd9_tpc_mmio_tail
 .b8  0 0 0 0
 
 // GPC mmio lists
 nvc0_gpc_mmio_head:
+mmctx_data(0x000408, 1)
+nvd9_gpc_mmio_head:
 mmctx_data(0x000380, 1)
-mmctx_data(0x000400, 6)
+mmctx_data(0x000400, 2);
+mmctx_data(0x00040c, 3);
 mmctx_data(0x000450, 9)
 mmctx_data(0x000600, 1)
 mmctx_data(0x000684, 1)
@@ -124,35 +111,6 @@ nvc0_gpc_mmio_tail:
 mmctx_data(0x000c6c, 1);
 nvc1_gpc_mmio_tail:
 
-nvd9_gpc_mmio_head:
-mmctx_data(0x000380, 1)
-mmctx_data(0x000400, 2)
-mmctx_data(0x00040c, 3)
-mmctx_data(0x000450, 9)
-mmctx_data(0x000600, 1)
-mmctx_data(0x000684, 1)
-mmctx_data(0x000700, 5)
-mmctx_data(0x000800, 1)
-mmctx_data(0x000808, 3)
-mmctx_data(0x000828, 1)
-mmctx_data(0x000830, 1)
-mmctx_data(0x0008d8, 1)
-mmctx_data(0x0008e0, 1)
-mmctx_data(0x0008e8, 6)
-mmctx_data(0x00091c, 1)
-mmctx_data(0x000924, 3)
-mmctx_data(0x000b00, 1)
-mmctx_data(0x000b08, 6)
-mmctx_data(0x000bb8, 1)
-mmctx_data(0x000c08, 1)
-mmctx_data(0x000c10, 8)
-mmctx_data(0x000c6c, 1)
-mmctx_data(0x000c80, 1)
-mmctx_data(0x000c8c, 1)
-mmctx_data(0x001000, 3)
-mmctx_data(0x001014, 1)
-nvd9_gpc_mmio_tail:
-
 // TPC mmio lists
 nvc0_tpc_mmio_head:
 mmctx_data(0x000018, 1)
@@ -161,7 +119,6 @@ mmctx_data(0x000048, 1)
 mmctx_data(0x000064, 1)
 mmctx_data(0x000088, 1)
 mmctx_data(0x000200, 6)
-mmctx_data(0x00021c, 2)
 mmctx_data(0x000300, 6)
 mmctx_data(0x0003d0, 1)
 mmctx_data(0x0003e0, 2)
@@ -176,369 +133,22 @@ mmctx_data(0x000644, 20)
 mmctx_data(0x000698, 1)
 mmctx_data(0x000750, 2)
 nvc0_tpc_mmio_tail:
-mmctx_data(0x000758, 1)
+mmctx_data(0x00021c, 2)
 mmctx_data(0x0002c4, 1)
-mmctx_data(0x0006e0, 1)
-nvcf_tpc_mmio_tail:
-mmctx_data(0x0004bc, 1)
+mmctx_data(0x000730, 8)
+mmctx_data(0x000758, 1)
 nvc3_tpc_mmio_tail:
 mmctx_data(0x000544, 1)
 nvc1_tpc_mmio_tail:
-
-nvd9_tpc_mmio_head:
-mmctx_data(0x000018, 1)
-mmctx_data(0x00003c, 1)
-mmctx_data(0x000048, 1)
-mmctx_data(0x000064, 1)
-mmctx_data(0x000088, 1)
-mmctx_data(0x000200, 6)
-mmctx_data(0x00021c, 2)
-mmctx_data(0x0002c4, 1)
-mmctx_data(0x000300, 6)
-mmctx_data(0x0003d0, 1)
-mmctx_data(0x0003e0, 2)
-mmctx_data(0x000400, 3)
-mmctx_data(0x000420, 3)
-mmctx_data(0x0004b0, 1)
-mmctx_data(0x0004e8, 1)
-mmctx_data(0x0004f4, 1)
-mmctx_data(0x000520, 2)
-mmctx_data(0x000544, 1)
-mmctx_data(0x000604, 4)
-mmctx_data(0x000644, 20)
-mmctx_data(0x000698, 1)
-mmctx_data(0x0006e0, 1)
-mmctx_data(0x000750, 3)
+mmctx_data(0x000424, 2);
+mmctx_data(0x0006e0, 1);
 nvd9_tpc_mmio_tail:
+#undef INCLUDE_DATA
 
 .section #nvc0_grgpc_code
+#define INCLUDE_CODE
 bra #init
-define(`include_code')
-include(`nvc0.fuc')
-
-// reports an exception to the host
-//
-// In: $r15 error code (see nvc0.fuc)
-//
-error:
-	push $r14
-	mov $r14 -0x67ec 	// 0x9814
-	sethi $r14 0x400000
-	call #nv_wr32		// HUB_CTXCTL_CC_SCRATCH[5] = error code
-	add b32 $r14 0x41c
-	mov $r15 1
-	call #nv_wr32		// HUB_CTXCTL_INTR_UP_SET
-	pop $r14
-	ret
-
-// GPC fuc initialisation, executed by triggering ucode start, will
-// fall through to main loop after completion.
-//
-// Input:
-//   CC_SCRATCH[0]: chipset (PMC_BOOT_0 read returns 0x0bad0bad... sigh)
-//   CC_SCRATCH[1]: context base
-//
-// Output:
-//   CC_SCRATCH[0]:
-//	     31:31: set to signal completion
-//   CC_SCRATCH[1]:
-//	      31:0: GPC context size
-//
-init:
-	clear b32 $r0
-	mov $sp $r0
-
-	// enable fifo access
-	mov $r1 0x1200
-	mov $r2 2
-	iowr I[$r1 + 0x000] $r2		// FIFO_ENABLE
-
-	// setup i0 handler, and route all interrupts to it
-	mov $r1 #ih
-	mov $iv0 $r1
-	mov $r1 0x400
-	iowr I[$r1 + 0x300] $r0		// INTR_DISPATCH
-
-	// enable fifo interrupt
-	mov $r2 4
-	iowr I[$r1 + 0x000] $r2		// INTR_EN_SET
-
-	// enable interrupts
-	bset $flags ie0
-
-	// figure out which GPC we are, and how many TPCs we have
-	mov $r1 0x608
-	shl b32 $r1 6
-	iord $r2 I[$r1 + 0x000]		// UNITS
-	mov $r3 1
-	and $r2 0x1f
-	shl b32 $r3 $r2
-	sub b32 $r3 1
-	st b32 D[$r0 + #tpc_count] $r2
-	st b32 D[$r0 + #tpc_mask] $r3
-	add b32 $r1 0x400
-	iord $r2 I[$r1 + 0x000]		// MYINDEX
-	st b32 D[$r0 + #gpc_id] $r2
-
-	// find context data for this chipset
-	mov $r2 0x800
-	shl b32 $r2 6
-	iord $r2 I[$r2 + 0x000]		// CC_SCRATCH[0]
-	mov $r1 #chipsets - 12
-	init_find_chipset:
-		add b32 $r1 12
-		ld b32 $r3 D[$r1 + 0x00]
-		cmpu b32 $r3 $r2
-		bra e #init_context
-		cmpu b32 $r3 0
-		bra ne #init_find_chipset
-		// unknown chipset
-		ret
-
-	// initialise context base, and size tracking
-	init_context:
-	mov $r2 0x800
-	shl b32 $r2 6
-	iord $r2 I[$r2 + 0x100]	// CC_SCRATCH[1], initial base
-	clear b32 $r3		// track GPC context size here
-
-	// set mmctx base addresses now so we don't have to do it later,
-	// they don't currently ever change
-	mov $r4 0x700
-	shl b32 $r4 6
-	shr b32 $r5 $r2 8
-	iowr I[$r4 + 0x000] $r5		// MMCTX_SAVE_SWBASE
-	iowr I[$r4 + 0x100] $r5		// MMCTX_LOAD_SWBASE
-
-	// calculate GPC mmio context size, store the chipset-specific
-	// mmio list pointers somewhere we can get at them later without
-	// re-parsing the chipset list
-	clear b32 $r14
-	clear b32 $r15
-	ld b16 $r14 D[$r1 + 4]
-	ld b16 $r15 D[$r1 + 6]
-	st b16 D[$r0 + #gpc_mmio_list_head] $r14
-	st b16 D[$r0 + #gpc_mmio_list_tail] $r15
-	call #mmctx_size
-	add b32 $r2 $r15
-	add b32 $r3 $r15
-
-	// calculate per-TPC mmio context size, store the list pointers
-	ld b16 $r14 D[$r1 + 8]
-	ld b16 $r15 D[$r1 + 10]
-	st b16 D[$r0 + #tpc_mmio_list_head] $r14
-	st b16 D[$r0 + #tpc_mmio_list_tail] $r15
-	call #mmctx_size
-	ld b32 $r14 D[$r0 + #tpc_count]
-	mulu $r14 $r15
-	add b32 $r2 $r14
-	add b32 $r3 $r14
-
-	// round up base/size to 256 byte boundary (for strand SWBASE)
-	add b32 $r4 0x1300
-	shr b32 $r3 2
-	iowr I[$r4 + 0x000] $r3		// MMCTX_LOAD_COUNT, wtf for?!?
-	shr b32 $r2 8
-	shr b32 $r3 6
-	add b32 $r2 1
-	add b32 $r3 1
-	shl b32 $r2 8
-	shl b32 $r3 8
-
-	// calculate size of strand context data
-	mov b32 $r15 $r2
-	call #strand_ctx_init
-	add b32 $r3 $r15
-
-	// save context size, and tell HUB we're done
-	mov $r1 0x800
-	shl b32 $r1 6
-	iowr I[$r1 + 0x100] $r3		// CC_SCRATCH[1]  = context size
-	add b32 $r1 0x800
-	clear b32 $r2
-	bset $r2 31
-	iowr I[$r1 + 0x000] $r2		// CC_SCRATCH[0] |= 0x80000000
-
-// Main program loop, very simple, sleeps until woken up by the interrupt
-// handler, pulls a command from the queue and executes its handler
-//
-main:
-	bset $flags $p0
-	sleep $p0
-	mov $r13 #cmd_queue
-	call #queue_get
-	bra $p1 #main
-
-	// 0x0000-0x0003 are all context transfers
-	cmpu b32 $r14 0x04
-	bra nc #main_not_ctx_xfer
-		// fetch $flags and mask off $p1/$p2
-		mov $r1 $flags
-		mov $r2 0x0006
-		not b32 $r2
-		and $r1 $r2
-		// set $p1/$p2 according to transfer type
-		shl b32 $r14 1
-		or $r1 $r14
-		mov $flags $r1
-		// transfer context data
-		call #ctx_xfer
-		bra #main
-
-	main_not_ctx_xfer:
-	shl b32 $r15 $r14 16
-	or $r15 E_BAD_COMMAND
-	call #error
-	bra #main
-
-// interrupt handler
-ih:
-	push $r8
-	mov $r8 $flags
-	push $r8
-	push $r9
-	push $r10
-	push $r11
-	push $r13
-	push $r14
-	push $r15
-
-	// incoming fifo command?
-	iord $r10 I[$r0 + 0x200]	// INTR
-	and $r11 $r10 0x00000004
-	bra e #ih_no_fifo
-		// queue incoming fifo command for later processing
-		mov $r11 0x1900
-		mov $r13 #cmd_queue
-		iord $r14 I[$r11 + 0x100]	// FIFO_CMD
-		iord $r15 I[$r11 + 0x000]	// FIFO_DATA
-		call #queue_put
-		add b32 $r11 0x400
-		mov $r14 1
-		iowr I[$r11 + 0x000] $r14	// FIFO_ACK
-
-	// ack, and wake up main()
-	ih_no_fifo:
-	iowr I[$r0 + 0x100] $r10	// INTR_ACK
-
-	pop $r15
-	pop $r14
-	pop $r13
-	pop $r11
-	pop $r10
-	pop $r9
-	pop $r8
-	mov $flags $r8
-	pop $r8
-	bclr $flags $p0
-	iret
-
-// Set this GPC's bit in HUB_BAR, used to signal completion of various
-// activities to the HUB fuc
-//
-hub_barrier_done:
-	mov $r15 1
-	ld b32 $r14 D[$r0 + #gpc_id]
-	shl b32 $r15 $r14
-	mov $r14 -0x6be8 	// 0x409418 - HUB_BAR_SET
-	sethi $r14 0x400000
-	call #nv_wr32
-	ret
-
-// Disables various things, waits a bit, and re-enables them..
-//
-// Not sure how exactly this helps, perhaps "ENABLE" is not such a
-// good description for the bits we turn off?  Anyways, without this,
-// funny things happen.
-//
-ctx_redswitch:
-	mov $r14 0x614
-	shl b32 $r14 6
-	mov $r15 0x020
-	iowr I[$r14] $r15	// GPC_RED_SWITCH = POWER
-	mov $r15 8
-	ctx_redswitch_delay:
-		sub b32 $r15 1
-		bra ne #ctx_redswitch_delay
-	mov $r15 0xa20
-	iowr I[$r14] $r15	// GPC_RED_SWITCH = UNK11, ENABLE, POWER
-	ret
-
-// Transfer GPC context data between GPU and storage area
-//
-// In: $r15 context base address
-//     $p1 clear on save, set on load
-//     $p2 set if opposite direction done/will be done, so:
-//		on save it means: "a load will follow this save"
-//		on load it means: "a save preceeded this load"
-//
-ctx_xfer:
-	// set context base address
-	mov $r1 0xa04
-	shl b32 $r1 6
-	iowr I[$r1 + 0x000] $r15// MEM_BASE
-	bra not $p1 #ctx_xfer_not_load
-		call #ctx_redswitch
-	ctx_xfer_not_load:
-
-	// strands
-	mov $r1 0x4afc
-	sethi $r1 0x20000
-	mov $r2 0xc
-	iowr I[$r1] $r2		// STRAND_CMD(0x3f) = 0x0c
-	call #strand_wait
-	mov $r2 0x47fc
-	sethi $r2 0x20000
-	iowr I[$r2] $r0		// STRAND_FIRST_GENE(0x3f) = 0x00
-	xbit $r2 $flags $p1
-	add b32 $r2 3
-	iowr I[$r1] $r2		// STRAND_CMD(0x3f) = 0x03/0x04 (SAVE/LOAD)
-
-	// mmio context
-	xbit $r10 $flags $p1	// direction
-	or $r10 2		// first
-	mov $r11 0x0000
-	sethi $r11 0x500000
-	ld b32 $r12 D[$r0 + #gpc_id]
-	shl b32 $r12 15
-	add b32 $r11 $r12	// base = NV_PGRAPH_GPCn
-	ld b32 $r12 D[$r0 + #gpc_mmio_list_head]
-	ld b32 $r13 D[$r0 + #gpc_mmio_list_tail]
-	mov $r14 0		// not multi
-	call #mmctx_xfer
-
-	// per-TPC mmio context
-	xbit $r10 $flags $p1	// direction
-	or $r10 4		// last
-	mov $r11 0x4000
-	sethi $r11 0x500000	// base = NV_PGRAPH_GPC0_TPC0
-	ld b32 $r12 D[$r0 + #gpc_id]
-	shl b32 $r12 15
-	add b32 $r11 $r12	// base = NV_PGRAPH_GPCn_TPC0
-	ld b32 $r12 D[$r0 + #tpc_mmio_list_head]
-	ld b32 $r13 D[$r0 + #tpc_mmio_list_tail]
-	ld b32 $r15 D[$r0 + #tpc_mask]
-	mov $r14 0x800		// stride = 0x800
-	call #mmctx_xfer
-
-	// wait for strands to finish
-	call #strand_wait
-
-	// if load, or a save without a load following, do some
-	// unknown stuff that's done after finishing a block of
-	// strand commands
-	bra $p1 #ctx_xfer_post
-	bra not $p2 #ctx_xfer_done
-	ctx_xfer_post:
-		mov $r1 0x4afc
-		sethi $r1 0x20000
-		mov $r2 0xd
-		iowr I[$r1] $r2		// STRAND_CMD(0x3f) = 0x0d
-		call #strand_wait
-
-	// mark completion in HUB's barrier
-	ctx_xfer_done:
-	call #hub_barrier_done
-	ret
-
+#include "com.fuc"
+#include "gpc.fuc"
 .align 256
+#undef INCLUDE_CODE

+ 35 - 81
drivers/gpu/drm/nouveau/core/engine/graph/fuc/gpcnvc0.fuc.h

@@ -34,59 +34,36 @@ uint32_t nvc0_grgpc_data[] = {
 	0x00000000,
 /* 0x0064: chipsets */
 	0x000000c0,
-	0x012800c8,
-	0x01e40194,
+	0x013c00d4,
+	0x018c0140,
 	0x000000c1,
-	0x012c00c8,
-	0x01f80194,
+	0x014000d4,
+	0x01a00140,
 	0x000000c3,
-	0x012800c8,
-	0x01f40194,
+	0x013c00d4,
+	0x019c0140,
 	0x000000c4,
-	0x012800c8,
-	0x01f40194,
+	0x013c00d4,
+	0x019c0140,
 	0x000000c8,
-	0x012800c8,
-	0x01e40194,
+	0x013c00d4,
+	0x018c0140,
 	0x000000ce,
-	0x012800c8,
-	0x01f40194,
+	0x013c00d4,
+	0x019c0140,
 	0x000000cf,
-	0x012800c8,
-	0x01f00194,
+	0x013c00d4,
+	0x019c0140,
 	0x000000d9,
-	0x0194012c,
-	0x025401f8,
-	0x00000000,
-/* 0x00c8: nvc0_gpc_mmio_head */
-	0x00000380,
-	0x14000400,
-	0x20000450,
-	0x00000600,
-	0x00000684,
-	0x10000700,
-	0x00000800,
-	0x08000808,
-	0x00000828,
-	0x00000830,
-	0x000008d8,
-	0x000008e0,
-	0x140008e8,
-	0x0000091c,
-	0x08000924,
-	0x00000b00,
-	0x14000b08,
-	0x00000bb8,
-	0x00000c08,
-	0x1c000c10,
-	0x00000c80,
-	0x00000c8c,
-	0x08001000,
-	0x00001014,
-/* 0x0128: nvc0_gpc_mmio_tail */
-	0x00000c6c,
-/* 0x012c: nvc1_gpc_mmio_tail */
-/* 0x012c: nvd9_gpc_mmio_head */
+	0x014000d8,
+	0x01a80140,
+	0x000000d7,
+	0x014000d8,
+	0x01a80140,
+	0x00000000,
+/* 0x00d4: nvc0_gpc_mmio_head */
+	0x00000408,
+/* 0x00d8: nvd9_gpc_mmio_head */
 	0x00000380,
 	0x04000400,
 	0x0800040c,
@@ -108,20 +85,20 @@ uint32_t nvc0_grgpc_data[] = {
 	0x00000bb8,
 	0x00000c08,
 	0x1c000c10,
-	0x00000c6c,
 	0x00000c80,
 	0x00000c8c,
 	0x08001000,
 	0x00001014,
-/* 0x0194: nvd9_gpc_mmio_tail */
-/* 0x0194: nvc0_tpc_mmio_head */
+/* 0x013c: nvc0_gpc_mmio_tail */
+	0x00000c6c,
+/* 0x0140: nvc1_gpc_mmio_tail */
+/* 0x0140: nvc0_tpc_mmio_head */
 	0x00000018,
 	0x0000003c,
 	0x00000048,
 	0x00000064,
 	0x00000088,
 	0x14000200,
-	0x0400021c,
 	0x14000300,
 	0x000003d0,
 	0x040003e0,
@@ -135,39 +112,16 @@ uint32_t nvc0_grgpc_data[] = {
 	0x4c000644,
 	0x00000698,
 	0x04000750,
-/* 0x01e4: nvc0_tpc_mmio_tail */
-	0x00000758,
-	0x000002c4,
-	0x000006e0,
-/* 0x01f0: nvcf_tpc_mmio_tail */
-	0x000004bc,
-/* 0x01f4: nvc3_tpc_mmio_tail */
-	0x00000544,
-/* 0x01f8: nvc1_tpc_mmio_tail */
-/* 0x01f8: nvd9_tpc_mmio_head */
-	0x00000018,
-	0x0000003c,
-	0x00000048,
-	0x00000064,
-	0x00000088,
-	0x14000200,
+/* 0x018c: nvc0_tpc_mmio_tail */
 	0x0400021c,
 	0x000002c4,
-	0x14000300,
-	0x000003d0,
-	0x040003e0,
-	0x08000400,
-	0x08000420,
-	0x000004b0,
-	0x000004e8,
-	0x000004f4,
-	0x04000520,
+	0x1c000730,
+	0x00000758,
+/* 0x019c: nvc3_tpc_mmio_tail */
 	0x00000544,
-	0x0c000604,
-	0x4c000644,
-	0x00000698,
+/* 0x01a0: nvc1_tpc_mmio_tail */
+	0x04000424,
 	0x000006e0,
-	0x08000750,
 };
 
 uint32_t nvc0_grgpc_code[] = {
@@ -238,7 +192,7 @@ uint32_t nvc0_grgpc_code[] = {
 	0x0089d000,
 	0x081887f1,
 	0xd00684b6,
-/* 0x00e2: wait_done_wait_donez */
+/* 0x00e2: wait_donez_ne */
 	0x87f1008a,
 	0x84b60400,
 	0x0088cf06,
@@ -255,7 +209,7 @@ uint32_t nvc0_grgpc_code[] = {
 	0x87f10089,
 	0x84b60818,
 	0x008ad006,
-/* 0x011c: wait_done_wait_doneo */
+/* 0x011c: wait_doneo_e */
 	0x040087f1,
 	0xcf0684b6,
 	0x8aff0088,

+ 74 - 353
drivers/gpu/drm/nouveau/core/engine/graph/fuc/gpcnve0.fuc

@@ -1,6 +1,5 @@
-/* fuc microcode for nve0 PGRAPH/GPC
- *
- * Copyright 2011 Red Hat Inc.
+/*
+ * Copyright 2013 Red Hat Inc.
  *
  * Permission is hereby granted, free of charge, to any person obtaining a
  * copy of this software and associated documentation files (the "Software"),
@@ -20,32 +19,17 @@
  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
  * OTHER DEALINGS IN THE SOFTWARE.
  *
- * Authors: Ben Skeggs
- */
-
-/* To build:
- *    m4 nve0_grgpc.fuc | envyas -a -w -m fuc -V nva3 -o nve0_grgpc.fuc.h
+ * Authors: Ben Skeggs <bskeggs@redhat.com>
  */
 
-/* TODO
- * - bracket certain functions with scratch writes, useful for debugging
- * - watchdog timer around ctx operations
- */
+#define NVGK
+#include "macros.fuc"
 
 .section #nve0_grgpc_data
-include(`nve0.fuc')
-gpc_id:			.b32 0
-gpc_mmio_list_head:	.b32 0
-gpc_mmio_list_tail:	.b32 0
-
-tpc_count:		.b32 0
-tpc_mask:		.b32 0
-tpc_mmio_list_head:	.b32 0
-tpc_mmio_list_tail:	.b32 0
-
-cmd_queue:		queue_init
+#define INCLUDE_DATA
+#include "com.fuc"
+#include "gpc.fuc"
 
-// chipset descriptions
 chipsets:
 .b8  0xe4 0 0 0
 .b16 #nve4_gpc_mmio_head
@@ -62,6 +46,11 @@ chipsets:
 .b16 #nve4_gpc_mmio_tail
 .b16 #nve4_tpc_mmio_head
 .b16 #nve4_tpc_mmio_tail
+.b8  0xf0 0 0 0
+.b16 #nvf0_gpc_mmio_head
+.b16 #nvf0_gpc_mmio_tail
+.b16 #nvf0_tpc_mmio_head
+.b16 #nvf0_tpc_mmio_tail
 .b8  0 0 0 0
 
 // GPC mmio lists
@@ -101,6 +90,37 @@ mmctx_data(0x0031d0, 1)
 mmctx_data(0x0031e0, 2)
 nve4_gpc_mmio_tail:
 
+nvf0_gpc_mmio_head:
+mmctx_data(0x000380, 1)
+mmctx_data(0x000400, 2)
+mmctx_data(0x00040c, 3)
+mmctx_data(0x000450, 9)
+mmctx_data(0x000600, 1)
+mmctx_data(0x000684, 1)
+mmctx_data(0x000700, 5)
+mmctx_data(0x000800, 1)
+mmctx_data(0x000808, 3)
+mmctx_data(0x000828, 1)
+mmctx_data(0x000830, 1)
+mmctx_data(0x0008d8, 1)
+mmctx_data(0x0008e0, 1)
+mmctx_data(0x0008e8, 6)
+mmctx_data(0x00091c, 1)
+mmctx_data(0x000924, 3)
+mmctx_data(0x000b00, 1)
+mmctx_data(0x000b08, 6)
+mmctx_data(0x000bb8, 1)
+mmctx_data(0x000c08, 1)
+mmctx_data(0x000c10, 8)
+mmctx_data(0x000c40, 1)
+mmctx_data(0x000c6c, 1)
+mmctx_data(0x000c80, 1)
+mmctx_data(0x000c8c, 1)
+mmctx_data(0x000d24, 1)
+mmctx_data(0x001000, 3)
+mmctx_data(0x001014, 1)
+nvf0_gpc_mmio_tail:
+
 // TPC mmio lists
 nve4_tpc_mmio_head:
 mmctx_data(0x000048, 1)
@@ -120,337 +140,38 @@ mmctx_data(0x0006ac, 2)
 mmctx_data(0x0006c8, 1)
 mmctx_data(0x000730, 8)
 mmctx_data(0x000758, 1)
-mmctx_data(0x000778, 1)
+mmctx_data(0x000770, 1)
+mmctx_data(0x000778, 2)
 nve4_tpc_mmio_tail:
 
+nvf0_tpc_mmio_head:
+mmctx_data(0x000048, 1)
+mmctx_data(0x000064, 1)
+mmctx_data(0x000088, 1)
+mmctx_data(0x000200, 6)
+mmctx_data(0x00021c, 2)
+mmctx_data(0x000230, 1)
+mmctx_data(0x0002c4, 1)
+mmctx_data(0x000400, 3)
+mmctx_data(0x000420, 3)
+mmctx_data(0x0004e8, 1)
+mmctx_data(0x0004f4, 1)
+mmctx_data(0x000604, 4)
+mmctx_data(0x000644, 22)
+mmctx_data(0x0006ac, 2)
+mmctx_data(0x0006b8, 1)
+mmctx_data(0x0006c8, 1)
+mmctx_data(0x000730, 8)
+mmctx_data(0x000758, 1)
+mmctx_data(0x000770, 1)
+mmctx_data(0x000778, 2)
+nvf0_tpc_mmio_tail:
+#undef INCLUDE_DATA
+
 .section #nve0_grgpc_code
+#define INCLUDE_CODE
 bra #init
-define(`include_code')
-include(`nve0.fuc')
-
-// reports an exception to the host
-//
-// In: $r15 error code (see nve0.fuc)
-//
-error:
-	push $r14
-	mov $r14 -0x67ec 	// 0x9814
-	sethi $r14 0x400000
-	call #nv_wr32		// HUB_CTXCTL_CC_SCRATCH[5] = error code
-	add b32 $r14 0x41c
-	mov $r15 1
-	call #nv_wr32		// HUB_CTXCTL_INTR_UP_SET
-	pop $r14
-	ret
-
-// GPC fuc initialisation, executed by triggering ucode start, will
-// fall through to main loop after completion.
-//
-// Input:
-//   CC_SCRATCH[0]: chipset (PMC_BOOT_0 read returns 0x0bad0bad... sigh)
-//   CC_SCRATCH[1]: context base
-//
-// Output:
-//   CC_SCRATCH[0]:
-//	     31:31: set to signal completion
-//   CC_SCRATCH[1]:
-//	      31:0: GPC context size
-//
-init:
-	clear b32 $r0
-	mov $sp $r0
-
-	// enable fifo access
-	mov $r1 0x1200
-	mov $r2 2
-	iowr I[$r1 + 0x000] $r2		// FIFO_ENABLE
-
-	// setup i0 handler, and route all interrupts to it
-	mov $r1 #ih
-	mov $iv0 $r1
-	mov $r1 0x400
-	iowr I[$r1 + 0x300] $r0		// INTR_DISPATCH
-
-	// enable fifo interrupt
-	mov $r2 4
-	iowr I[$r1 + 0x000] $r2		// INTR_EN_SET
-
-	// enable interrupts
-	bset $flags ie0
-
-	// figure out which GPC we are, and how many TPCs we have
-	mov $r1 0x608
-	shl b32 $r1 6
-	iord $r2 I[$r1 + 0x000]		// UNITS
-	mov $r3 1
-	and $r2 0x1f
-	shl b32 $r3 $r2
-	sub b32 $r3 1
-	st b32 D[$r0 + #tpc_count] $r2
-	st b32 D[$r0 + #tpc_mask] $r3
-	add b32 $r1 0x400
-	iord $r2 I[$r1 + 0x000]		// MYINDEX
-	st b32 D[$r0 + #gpc_id] $r2
-
-	// find context data for this chipset
-	mov $r2 0x800
-	shl b32 $r2 6
-	iord $r2 I[$r2 + 0x000]		// CC_SCRATCH[0]
-	mov $r1 #chipsets - 12
-	init_find_chipset:
-		add b32 $r1 12
-		ld b32 $r3 D[$r1 + 0x00]
-		cmpu b32 $r3 $r2
-		bra e #init_context
-		cmpu b32 $r3 0
-		bra ne #init_find_chipset
-		// unknown chipset
-		ret
-
-	// initialise context base, and size tracking
-	init_context:
-	mov $r2 0x800
-	shl b32 $r2 6
-	iord $r2 I[$r2 + 0x100]	// CC_SCRATCH[1], initial base
-	clear b32 $r3		// track GPC context size here
-
-	// set mmctx base addresses now so we don't have to do it later,
-	// they don't currently ever change
-	mov $r4 0x700
-	shl b32 $r4 6
-	shr b32 $r5 $r2 8
-	iowr I[$r4 + 0x000] $r5		// MMCTX_SAVE_SWBASE
-	iowr I[$r4 + 0x100] $r5		// MMCTX_LOAD_SWBASE
-
-	// calculate GPC mmio context size, store the chipset-specific
-	// mmio list pointers somewhere we can get at them later without
-	// re-parsing the chipset list
-	clear b32 $r14
-	clear b32 $r15
-	ld b16 $r14 D[$r1 + 4]
-	ld b16 $r15 D[$r1 + 6]
-	st b16 D[$r0 + #gpc_mmio_list_head] $r14
-	st b16 D[$r0 + #gpc_mmio_list_tail] $r15
-	call #mmctx_size
-	add b32 $r2 $r15
-	add b32 $r3 $r15
-
-	// calculate per-TPC mmio context size, store the list pointers
-	ld b16 $r14 D[$r1 + 8]
-	ld b16 $r15 D[$r1 + 10]
-	st b16 D[$r0 + #tpc_mmio_list_head] $r14
-	st b16 D[$r0 + #tpc_mmio_list_tail] $r15
-	call #mmctx_size
-	ld b32 $r14 D[$r0 + #tpc_count]
-	mulu $r14 $r15
-	add b32 $r2 $r14
-	add b32 $r3 $r14
-
-	// round up base/size to 256 byte boundary (for strand SWBASE)
-	add b32 $r4 0x1300
-	shr b32 $r3 2
-	iowr I[$r4 + 0x000] $r3		// MMCTX_LOAD_COUNT, wtf for?!?
-	shr b32 $r2 8
-	shr b32 $r3 6
-	add b32 $r2 1
-	add b32 $r3 1
-	shl b32 $r2 8
-	shl b32 $r3 8
-
-	// calculate size of strand context data
-	mov b32 $r15 $r2
-	call #strand_ctx_init
-	add b32 $r3 $r15
-
-	// save context size, and tell HUB we're done
-	mov $r1 0x800
-	shl b32 $r1 6
-	iowr I[$r1 + 0x100] $r3		// CC_SCRATCH[1]  = context size
-	add b32 $r1 0x800
-	clear b32 $r2
-	bset $r2 31
-	iowr I[$r1 + 0x000] $r2		// CC_SCRATCH[0] |= 0x80000000
-
-// Main program loop, very simple, sleeps until woken up by the interrupt
-// handler, pulls a command from the queue and executes its handler
-//
-main:
-	bset $flags $p0
-	sleep $p0
-	mov $r13 #cmd_queue
-	call #queue_get
-	bra $p1 #main
-
-	// 0x0000-0x0003 are all context transfers
-	cmpu b32 $r14 0x04
-	bra nc #main_not_ctx_xfer
-		// fetch $flags and mask off $p1/$p2
-		mov $r1 $flags
-		mov $r2 0x0006
-		not b32 $r2
-		and $r1 $r2
-		// set $p1/$p2 according to transfer type
-		shl b32 $r14 1
-		or $r1 $r14
-		mov $flags $r1
-		// transfer context data
-		call #ctx_xfer
-		bra #main
-
-	main_not_ctx_xfer:
-	shl b32 $r15 $r14 16
-	or $r15 E_BAD_COMMAND
-	call #error
-	bra #main
-
-// interrupt handler
-ih:
-	push $r8
-	mov $r8 $flags
-	push $r8
-	push $r9
-	push $r10
-	push $r11
-	push $r13
-	push $r14
-	push $r15
-
-	// incoming fifo command?
-	iord $r10 I[$r0 + 0x200]	// INTR
-	and $r11 $r10 0x00000004
-	bra e #ih_no_fifo
-		// queue incoming fifo command for later processing
-		mov $r11 0x1900
-		mov $r13 #cmd_queue
-		iord $r14 I[$r11 + 0x100]	// FIFO_CMD
-		iord $r15 I[$r11 + 0x000]	// FIFO_DATA
-		call #queue_put
-		add b32 $r11 0x400
-		mov $r14 1
-		iowr I[$r11 + 0x000] $r14	// FIFO_ACK
-
-	// ack, and wake up main()
-	ih_no_fifo:
-	iowr I[$r0 + 0x100] $r10	// INTR_ACK
-
-	pop $r15
-	pop $r14
-	pop $r13
-	pop $r11
-	pop $r10
-	pop $r9
-	pop $r8
-	mov $flags $r8
-	pop $r8
-	bclr $flags $p0
-	iret
-
-// Set this GPC's bit in HUB_BAR, used to signal completion of various
-// activities to the HUB fuc
-//
-hub_barrier_done:
-	mov $r15 1
-	ld b32 $r14 D[$r0 + #gpc_id]
-	shl b32 $r15 $r14
-	mov $r14 -0x6be8 	// 0x409418 - HUB_BAR_SET
-	sethi $r14 0x400000
-	call #nv_wr32
-	ret
-
-// Disables various things, waits a bit, and re-enables them..
-//
-// Not sure how exactly this helps, perhaps "ENABLE" is not such a
-// good description for the bits we turn off?  Anyways, without this,
-// funny things happen.
-//
-ctx_redswitch:
-	mov $r14 0x614
-	shl b32 $r14 6
-	mov $r15 0x020
-	iowr I[$r14] $r15	// GPC_RED_SWITCH = POWER
-	mov $r15 8
-	ctx_redswitch_delay:
-		sub b32 $r15 1
-		bra ne #ctx_redswitch_delay
-	mov $r15 0xa20
-	iowr I[$r14] $r15	// GPC_RED_SWITCH = UNK11, ENABLE, POWER
-	ret
-
-// Transfer GPC context data between GPU and storage area
-//
-// In: $r15 context base address
-//     $p1 clear on save, set on load
-//     $p2 set if opposite direction done/will be done, so:
-//		on save it means: "a load will follow this save"
-//		on load it means: "a save preceeded this load"
-//
-ctx_xfer:
-	// set context base address
-	mov $r1 0xa04
-	shl b32 $r1 6
-	iowr I[$r1 + 0x000] $r15// MEM_BASE
-	bra not $p1 #ctx_xfer_not_load
-		call #ctx_redswitch
-	ctx_xfer_not_load:
-
-	// strands
-	mov $r1 0x4afc
-	sethi $r1 0x20000
-	mov $r2 0xc
-	iowr I[$r1] $r2		// STRAND_CMD(0x3f) = 0x0c
-	call #strand_wait
-	mov $r2 0x47fc
-	sethi $r2 0x20000
-	iowr I[$r2] $r0		// STRAND_FIRST_GENE(0x3f) = 0x00
-	xbit $r2 $flags $p1
-	add b32 $r2 3
-	iowr I[$r1] $r2		// STRAND_CMD(0x3f) = 0x03/0x04 (SAVE/LOAD)
-
-	// mmio context
-	xbit $r10 $flags $p1	// direction
-	or $r10 2		// first
-	mov $r11 0x0000
-	sethi $r11 0x500000
-	ld b32 $r12 D[$r0 + #gpc_id]
-	shl b32 $r12 15
-	add b32 $r11 $r12	// base = NV_PGRAPH_GPCn
-	ld b32 $r12 D[$r0 + #gpc_mmio_list_head]
-	ld b32 $r13 D[$r0 + #gpc_mmio_list_tail]
-	mov $r14 0		// not multi
-	call #mmctx_xfer
-
-	// per-TPC mmio context
-	xbit $r10 $flags $p1	// direction
-	or $r10 4		// last
-	mov $r11 0x4000
-	sethi $r11 0x500000	// base = NV_PGRAPH_GPC0_TPC0
-	ld b32 $r12 D[$r0 + #gpc_id]
-	shl b32 $r12 15
-	add b32 $r11 $r12	// base = NV_PGRAPH_GPCn_TPC0
-	ld b32 $r12 D[$r0 + #tpc_mmio_list_head]
-	ld b32 $r13 D[$r0 + #tpc_mmio_list_tail]
-	ld b32 $r15 D[$r0 + #tpc_mask]
-	mov $r14 0x800		// stride = 0x800
-	call #mmctx_xfer
-
-	// wait for strands to finish
-	call #strand_wait
-
-	// if load, or a save without a load following, do some
-	// unknown stuff that's done after finishing a block of
-	// strand commands
-	bra $p1 #ctx_xfer_post
-	bra not $p2 #ctx_xfer_done
-	ctx_xfer_post:
-		mov $r1 0x4afc
-		sethi $r1 0x20000
-		mov $r2 0xd
-		iowr I[$r1] $r2		// STRAND_CMD(0x3f) = 0x0d
-		call #strand_wait
-
-	// mark completion in HUB's barrier
-	ctx_xfer_done:
-	call #hub_barrier_done
-	ret
-
+#include "com.fuc"
+#include "gpc.fuc"
 .align 256
+#undef INCLUDE_CODE

+ 68 - 12
drivers/gpu/drm/nouveau/core/engine/graph/fuc/gpcnve0.fuc.h

@@ -34,16 +34,19 @@ uint32_t nve0_grgpc_data[] = {
 	0x00000000,
 /* 0x0064: chipsets */
 	0x000000e4,
-	0x0110008c,
-	0x01580110,
+	0x011c0098,
+	0x01d8018c,
 	0x000000e7,
-	0x0110008c,
-	0x01580110,
+	0x011c0098,
+	0x01d8018c,
 	0x000000e6,
-	0x0110008c,
-	0x01580110,
+	0x011c0098,
+	0x01d8018c,
+	0x000000f0,
+	0x018c011c,
+	0x022801d8,
 	0x00000000,
-/* 0x008c: nve4_gpc_mmio_head */
+/* 0x0098: nve4_gpc_mmio_head */
 	0x00000380,
 	0x04000400,
 	0x0800040c,
@@ -77,8 +80,59 @@ uint32_t nve0_grgpc_data[] = {
 	0x14003100,
 	0x000031d0,
 	0x040031e0,
-/* 0x0110: nve4_gpc_mmio_tail */
-/* 0x0110: nve4_tpc_mmio_head */
+/* 0x011c: nve4_gpc_mmio_tail */
+/* 0x011c: nvf0_gpc_mmio_head */
+	0x00000380,
+	0x04000400,
+	0x0800040c,
+	0x20000450,
+	0x00000600,
+	0x00000684,
+	0x10000700,
+	0x00000800,
+	0x08000808,
+	0x00000828,
+	0x00000830,
+	0x000008d8,
+	0x000008e0,
+	0x140008e8,
+	0x0000091c,
+	0x08000924,
+	0x00000b00,
+	0x14000b08,
+	0x00000bb8,
+	0x00000c08,
+	0x1c000c10,
+	0x00000c40,
+	0x00000c6c,
+	0x00000c80,
+	0x00000c8c,
+	0x00000d24,
+	0x08001000,
+	0x00001014,
+/* 0x018c: nvf0_gpc_mmio_tail */
+/* 0x018c: nve4_tpc_mmio_head */
+	0x00000048,
+	0x00000064,
+	0x00000088,
+	0x14000200,
+	0x0400021c,
+	0x00000230,
+	0x000002c4,
+	0x08000400,
+	0x08000420,
+	0x000004e8,
+	0x000004f4,
+	0x0c000604,
+	0x54000644,
+	0x040006ac,
+	0x000006c8,
+	0x1c000730,
+	0x00000758,
+	0x00000770,
+	0x04000778,
+/* 0x01d8: nve4_tpc_mmio_tail */
+/* 0x01d8: nvf0_tpc_mmio_head */
 	0x00000048,
 	0x00000064,
 	0x00000088,
@@ -93,10 +147,12 @@ uint32_t nve0_grgpc_data[] = {
 	0x0c000604,
 	0x54000644,
 	0x040006ac,
+	0x000006b8,
 	0x000006c8,
 	0x1c000730,
 	0x00000758,
-	0x00000778,
+	0x00000770,
+	0x04000778,
 };
 
 uint32_t nve0_grgpc_code[] = {
@@ -167,7 +223,7 @@ uint32_t nve0_grgpc_code[] = {
 	0x0089d000,
 	0x081887f1,
 	0xd00684b6,
-/* 0x00e2: wait_done_wait_donez */
+/* 0x00e2: wait_donez_ne */
 	0x87f1008a,
 	0x84b60400,
 	0x0088cf06,
@@ -184,7 +240,7 @@ uint32_t nve0_grgpc_code[] = {
 	0x87f10089,
 	0x84b60818,
 	0x008ad006,
-/* 0x011c: wait_done_wait_doneo */
+/* 0x011c: wait_doneo_e */
 	0x040087f1,
 	0xcf0684b6,
 	0x8aff0088,

+ 755 - 0
drivers/gpu/drm/nouveau/core/engine/graph/fuc/hub.fuc

@@ -0,0 +1,755 @@
+/* fuc microcode for nvc0 PGRAPH/HUB
+ *
+ * Copyright 2011 Red Hat Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: Ben Skeggs
+ */
+
+#ifdef INCLUDE_DATA
+gpc_count:		.b32 0
+rop_count:		.b32 0
+cmd_queue:		queue_init
+hub_mmio_list_head:	.b32 0
+hub_mmio_list_tail:	.b32 0
+
+ctx_current:		.b32 0
+
+.align 256
+chan_data:
+chan_mmio_count:	.b32 0
+chan_mmio_address:	.b32 0
+
+.align 256
+xfer_data: 		.skip 256
+
+#endif
+
+#ifdef INCLUDE_CODE
+// reports an exception to the host
+//
+// In: $r15 error code (see nvc0.fuc)
+//
+error:
+	push $r14
+	mov $r14 0x814
+	shl b32 $r14 6
+	iowr I[$r14 + 0x000] $r15	// CC_SCRATCH[5] = error code
+	mov $r14 0xc1c
+	shl b32 $r14 6
+	mov $r15 1
+	iowr I[$r14 + 0x000] $r15	// INTR_UP_SET
+	pop $r14
+	ret
+
+// HUB fuc initialisation, executed by triggering ucode start, will
+// fall through to main loop after completion.
+//
+// Input:
+//   CC_SCRATCH[0]: chipset (PMC_BOOT_0 read returns 0x0bad0bad... sigh)
+//
+// Output:
+//   CC_SCRATCH[0]:
+//	     31:31: set to signal completion
+//   CC_SCRATCH[1]:
+//	      31:0: total PGRAPH context size
+//
+init:
+	clear b32 $r0
+	mov $sp $r0
+	mov $xdbase $r0
+
+	// enable fifo access
+	mov $r1 0x1200
+	mov $r2 2
+	iowr I[$r1 + 0x000] $r2	// FIFO_ENABLE
+
+	// setup i0 handler, and route all interrupts to it
+	mov $r1 #ih
+	mov $iv0 $r1
+	mov $r1 0x400
+	iowr I[$r1 + 0x300] $r0	// INTR_DISPATCH
+
+	// route HUB_CHANNEL_SWITCH to fuc interrupt 8
+	mov $r3 0x404
+	shl b32 $r3 6
+	mov $r2 0x2003		// { HUB_CHANNEL_SWITCH, ZERO } -> intr 8
+	iowr I[$r3 + 0x000] $r2
+
+	// not sure what these are, route them because NVIDIA does, and
+	// the IRQ handler will signal the host if we ever get one.. we
+	// may find out if/why we need to handle these if so..
+	//
+	mov $r2 0x2004
+	iowr I[$r3 + 0x004] $r2 // { 0x04, ZERO } -> intr 9
+	mov $r2 0x200b
+	iowr I[$r3 + 0x008] $r2 // { 0x0b, ZERO } -> intr 10
+	mov $r2 0x200c
+	iowr I[$r3 + 0x01c] $r2 // { 0x0c, ZERO } -> intr 15
+
+	// enable all INTR_UP interrupts
+	mov $r2 0xc24
+	shl b32 $r2 6
+	not b32 $r3 $r0
+	iowr I[$r2] $r3
+
+	// enable fifo, ctxsw, 9, 10, 15 interrupts
+	mov $r2 -0x78fc		// 0x8704
+	sethi $r2 0
+	iowr I[$r1 + 0x000] $r2	// INTR_EN_SET
+
+	// fifo level triggered, rest edge
+	sub b32 $r1 0x100
+	mov $r2 4
+	iowr I[$r1] $r2
+
+	// enable interrupts
+	bset $flags ie0
+
+	// fetch enabled GPC/ROP counts
+	mov $r14 -0x69fc	// 0x409604
+	sethi $r14 0x400000
+	call #nv_rd32
+	extr $r1 $r15 16:20
+	st b32 D[$r0 + #rop_count] $r1
+	and $r15 0x1f
+	st b32 D[$r0 + #gpc_count] $r15
+
+	// set BAR_REQMASK to GPC mask
+	mov $r1 1
+	shl b32 $r1 $r15
+	sub b32 $r1 1
+	mov $r2 0x40c
+	shl b32 $r2 6
+	iowr I[$r2 + 0x000] $r1
+	iowr I[$r2 + 0x100] $r1
+
+	// find context data for this chipset
+	mov $r2 0x800
+	shl b32 $r2 6
+	iord $r2 I[$r2 + 0x000]		// CC_SCRATCH[0]
+	mov $r15 #chipsets - 8
+	init_find_chipset:
+		add b32 $r15 8
+		ld b32 $r3 D[$r15 + 0x00]
+		cmpu b32 $r3 $r2
+		bra e #init_context
+		cmpu b32 $r3 0
+		bra ne #init_find_chipset
+		// unknown chipset
+		ret
+
+	// context size calculation, reserve first 256 bytes for use by fuc
+	init_context:
+	mov $r1 256
+
+	// calculate size of mmio context data
+	ld b16 $r14 D[$r15 + 4]
+	ld b16 $r15 D[$r15 + 6]
+	sethi $r14 0
+	st b32 D[$r0 + #hub_mmio_list_head] $r14
+	st b32 D[$r0 + #hub_mmio_list_tail] $r15
+	call #mmctx_size
+
+	// set mmctx base addresses now so we don't have to do it later,
+	// they don't (currently) ever change
+	mov $r3 0x700
+	shl b32 $r3 6
+	shr b32 $r4 $r1 8
+	iowr I[$r3 + 0x000] $r4		// MMCTX_SAVE_SWBASE
+	iowr I[$r3 + 0x100] $r4		// MMCTX_LOAD_SWBASE
+	add b32 $r3 0x1300
+	add b32 $r1 $r15
+	shr b32 $r15 2
+	iowr I[$r3 + 0x000] $r15	// MMCTX_LOAD_COUNT, wtf for?!?
+
+	// strands, base offset needs to be aligned to 256 bytes
+	shr b32 $r1 8
+	add b32 $r1 1
+	shl b32 $r1 8
+	mov b32 $r15 $r1
+	call #strand_ctx_init
+	add b32 $r1 $r15
+
+	// initialise each GPC in sequence by passing in the offset of its
+	// context data in GPCn_CC_SCRATCH[1], and starting its FUC (which
+	// has previously been uploaded by the host) running.
+	//
+	// the GPC fuc init sequence will set GPCn_CC_SCRATCH[0] bit 31
+	// when it has completed, and return the size of its context data
+	// in GPCn_CC_SCRATCH[1]
+	//
+	ld b32 $r3 D[$r0 + #gpc_count]
+	mov $r4 0x2000
+	sethi $r4 0x500000
+	init_gpc:
+		// setup, and start GPC ucode running
+		add b32 $r14 $r4 0x804
+		mov b32 $r15 $r1
+		call #nv_wr32			// CC_SCRATCH[1] = ctx offset
+		add b32 $r14 $r4 0x800
+		mov b32 $r15 $r2
+		call #nv_wr32			// CC_SCRATCH[0] = chipset
+		add b32 $r14 $r4 0x10c
+		clear b32 $r15
+		call #nv_wr32
+		add b32 $r14 $r4 0x104
+		call #nv_wr32			// ENTRY
+		add b32 $r14 $r4 0x100
+		mov $r15 2			// CTRL_START_TRIGGER
+		call #nv_wr32			// CTRL
+
+		// wait for it to complete, and adjust context size
+		add b32 $r14 $r4 0x800
+		init_gpc_wait:
+			call #nv_rd32
+			xbit $r15 $r15 31
+			bra e #init_gpc_wait
+		add b32 $r14 $r4 0x804
+		call #nv_rd32
+		add b32 $r1 $r15
+
+		// next!
+		add b32 $r4 0x8000
+		sub b32 $r3 1
+		bra ne #init_gpc
+
+	// save context size, and tell host we're ready
+	mov $r2 0x800
+	shl b32 $r2 6
+	iowr I[$r2 + 0x100] $r1		// CC_SCRATCH[1]  = context size
+	add b32 $r2 0x800
+	clear b32 $r1
+	bset $r1 31
+	iowr I[$r2 + 0x000] $r1		// CC_SCRATCH[0] |= 0x80000000
+
+// Main program loop, very simple, sleeps until woken up by the interrupt
+// handler, pulls a command from the queue and executes its handler
+//
+main:
+	// sleep until we have something to do
+	bset $flags $p0
+	sleep $p0
+	mov $r13 #cmd_queue
+	call #queue_get
+	bra $p1 #main
+
+	// context switch, requested by GPU?
+	cmpu b32 $r14 0x4001
+	bra ne #main_not_ctx_switch
+		trace_set(T_AUTO)
+		mov $r1 0xb00
+		shl b32 $r1 6
+		iord $r2 I[$r1 + 0x100]		// CHAN_NEXT
+		iord $r1 I[$r1 + 0x000]		// CHAN_CUR
+
+		xbit $r3 $r1 31
+		bra e #chsw_no_prev
+			xbit $r3 $r2 31
+			bra e #chsw_prev_no_next
+				push $r2
+				mov b32 $r2 $r1
+				trace_set(T_SAVE)
+				bclr $flags $p1
+				bset $flags $p2
+				call #ctx_xfer
+				trace_clr(T_SAVE);
+				pop $r2
+				trace_set(T_LOAD);
+				bset $flags $p1
+				call #ctx_xfer
+				trace_clr(T_LOAD);
+				bra #chsw_done
+			chsw_prev_no_next:
+				push $r2
+				mov b32 $r2 $r1
+				bclr $flags $p1
+				bclr $flags $p2
+				call #ctx_xfer
+				pop $r2
+				mov $r1 0xb00
+				shl b32 $r1 6
+				iowr I[$r1] $r2
+				bra #chsw_done
+		chsw_no_prev:
+			xbit $r3 $r2 31
+			bra e #chsw_done
+				bset $flags $p1
+				bclr $flags $p2
+				call #ctx_xfer
+
+		// ack the context switch request
+		chsw_done:
+		mov $r1 0xb0c
+		shl b32 $r1 6
+		mov $r2 1
+		iowr I[$r1 + 0x000] $r2		// 0x409b0c
+		trace_clr(T_AUTO)
+		bra #main
+
+	// request to set current channel? (*not* a context switch)
+	main_not_ctx_switch:
+	cmpu b32 $r14 0x0001
+	bra ne #main_not_ctx_chan
+		mov b32 $r2 $r15
+		call #ctx_chan
+		bra #main_done
+
+	// request to store current channel context?
+	main_not_ctx_chan:
+	cmpu b32 $r14 0x0002
+	bra ne #main_not_ctx_save
+		trace_set(T_SAVE)
+		bclr $flags $p1
+		bclr $flags $p2
+		call #ctx_xfer
+		trace_clr(T_SAVE)
+		bra #main_done
+
+	main_not_ctx_save:
+		shl b32 $r15 $r14 16
+		or $r15 E_BAD_COMMAND
+		call #error
+		bra #main
+
+	main_done:
+	mov $r1 0x820
+	shl b32 $r1 6
+	clear b32 $r2
+	bset $r2 31
+	iowr I[$r1 + 0x000] $r2		// CC_SCRATCH[0] |= 0x80000000
+	bra #main
+
+// interrupt handler
+ih:
+	push $r8
+	mov $r8 $flags
+	push $r8
+	push $r9
+	push $r10
+	push $r11
+	push $r13
+	push $r14
+	push $r15
+
+	// incoming fifo command?
+	iord $r10 I[$r0 + 0x200]	// INTR
+	and $r11 $r10 0x00000004
+	bra e #ih_no_fifo
+		// queue incoming fifo command for later processing
+		mov $r11 0x1900
+		mov $r13 #cmd_queue
+		iord $r14 I[$r11 + 0x100]	// FIFO_CMD
+		iord $r15 I[$r11 + 0x000]	// FIFO_DATA
+		call #queue_put
+		add b32 $r11 0x400
+		mov $r14 1
+		iowr I[$r11 + 0x000] $r14	// FIFO_ACK
+
+	// context switch request?
+	ih_no_fifo:
+	and $r11 $r10 0x00000100
+	bra e #ih_no_ctxsw
+		// enqueue a context switch for later processing
+		mov $r13 #cmd_queue
+		mov $r14 0x4001
+		call #queue_put
+
+	// anything we didn't handle, bring it to the host's attention
+	ih_no_ctxsw:
+	mov $r11 0x104
+	not b32 $r11
+	and $r11 $r10 $r11
+	bra e #ih_no_other
+		mov $r10 0xc1c
+		shl b32 $r10 6
+		iowr I[$r10] $r11	// INTR_UP_SET
+
+	// ack, and wake up main()
+	ih_no_other:
+	iowr I[$r0 + 0x100] $r10	// INTR_ACK
+
+	pop $r15
+	pop $r14
+	pop $r13
+	pop $r11
+	pop $r10
+	pop $r9
+	pop $r8
+	mov $flags $r8
+	pop $r8
+	bclr $flags $p0
+	iret
+
+#ifdef NVGF
+// Not real sure, but, MEM_CMD 7 will hang forever if this isn't done
+ctx_4160s:
+	mov $r14 0x4160
+	sethi $r14 0x400000
+	mov $r15 1
+	call #nv_wr32
+	ctx_4160s_wait:
+		call #nv_rd32
+		xbit $r15 $r15 4
+		bra e #ctx_4160s_wait
+	ret
+
+// Without clearing again at end of xfer, some things cause PGRAPH
+// to hang with STATUS=0x00000007 until it's cleared.. fbcon can
+// still function with it set however...
+ctx_4160c:
+	mov $r14 0x4160
+	sethi $r14 0x400000
+	clear b32 $r15
+	call #nv_wr32
+	ret
+#endif
+
+// Again, not real sure
+//
+// In: $r15 value to set 0x404170 to
+//
+ctx_4170s:
+	mov $r14 0x4170
+	sethi $r14 0x400000
+	or $r15 0x10
+	call #nv_wr32
+	ret
+
+// Waits for a ctx_4170s() call to complete
+//
+ctx_4170w:
+	mov $r14 0x4170
+	sethi $r14 0x400000
+	call #nv_rd32
+	and $r15 0x10
+	bra ne #ctx_4170w
+	ret
+
+// Disables various things, waits a bit, and re-enables them..
+//
+// Not sure how exactly this helps, perhaps "ENABLE" is not such a
+// good description for the bits we turn off?  Anyways, without this,
+// funny things happen.
+//
+ctx_redswitch:
+	mov $r14 0x614
+	shl b32 $r14 6
+	mov $r15 0x270
+	iowr I[$r14] $r15	// HUB_RED_SWITCH = ENABLE_GPC, POWER_ALL
+	mov $r15 8
+	ctx_redswitch_delay:
+		sub b32 $r15 1
+		bra ne #ctx_redswitch_delay
+	mov $r15 0x770
+	iowr I[$r14] $r15	// HUB_RED_SWITCH = ENABLE_ALL, POWER_ALL
+	ret
+
+// Not a clue what this is for, except that unless the value is 0x10, the
+// strand context is saved (and presumably restored) incorrectly..
+//
+// In: $r15 value to set to (0x00/0x10 are used)
+//
+ctx_86c:
+	mov $r14 0x86c
+	shl b32 $r14 6
+	iowr I[$r14] $r15	// HUB(0x86c) = val
+	mov $r14 -0x75ec
+	sethi $r14 0x400000
+	call #nv_wr32		// ROP(0xa14) = val
+	mov $r14 -0x5794
+	sethi $r14 0x410000
+	call #nv_wr32		// GPC(0x86c) = val
+	ret
+
+// ctx_load - load's a channel's ctxctl data, and selects its vm
+//
+// In: $r2 channel address
+//
+ctx_load:
+	trace_set(T_CHAN)
+
+	// switch to channel, somewhat magic in parts..
+	mov $r10 12		// DONE_UNK12
+	call #wait_donez
+	mov $r1 0xa24
+	shl b32 $r1 6
+	iowr I[$r1 + 0x000] $r0	// 0x409a24
+	mov $r3 0xb00
+	shl b32 $r3 6
+	iowr I[$r3 + 0x100] $r2	// CHAN_NEXT
+	mov $r1 0xa0c
+	shl b32 $r1 6
+	mov $r4 7
+	iowr I[$r1 + 0x000] $r2 // MEM_CHAN
+	iowr I[$r1 + 0x100] $r4	// MEM_CMD
+	ctx_chan_wait_0:
+		iord $r4 I[$r1 + 0x100]
+		and $r4 0x1f
+		bra ne #ctx_chan_wait_0
+	iowr I[$r3 + 0x000] $r2	// CHAN_CUR
+
+	// load channel header, fetch PGRAPH context pointer
+	mov $xtargets $r0
+	bclr $r2 31
+	shl b32 $r2 4
+	add b32 $r2 2
+
+	trace_set(T_LCHAN)
+	mov $r1 0xa04
+	shl b32 $r1 6
+	iowr I[$r1 + 0x000] $r2		// MEM_BASE
+	mov $r1 0xa20
+	shl b32 $r1 6
+	mov $r2 0x0002
+	sethi $r2 0x80000000
+	iowr I[$r1 + 0x000] $r2		// MEM_TARGET = vram
+	mov $r1 0x10			// chan + 0x0210
+	mov $r2 #xfer_data
+	sethi $r2 0x00020000		// 16 bytes
+	xdld $r1 $r2
+	xdwait
+	trace_clr(T_LCHAN)
+
+	// update current context
+	ld b32 $r1 D[$r0 + #xfer_data + 4]
+	shl b32 $r1 24
+	ld b32 $r2 D[$r0 + #xfer_data + 0]
+	shr b32 $r2 8
+	or $r1 $r2
+	st b32 D[$r0 + #ctx_current] $r1
+
+	// set transfer base to start of context, and fetch context header
+	trace_set(T_LCTXH)
+	mov $r2 0xa04
+	shl b32 $r2 6
+	iowr I[$r2 + 0x000] $r1		// MEM_BASE
+	mov $r2 1
+	mov $r1 0xa20
+	shl b32 $r1 6
+	iowr I[$r1 + 0x000] $r2		// MEM_TARGET = vm
+	mov $r1 #chan_data
+	sethi $r1 0x00060000		// 256 bytes
+	xdld $r0 $r1
+	xdwait
+	trace_clr(T_LCTXH)
+
+	trace_clr(T_CHAN)
+	ret
+
+// ctx_chan - handler for HUB_SET_CHAN command, will set a channel as
+//            the active channel for ctxctl, but not actually transfer
+//            any context data.  intended for use only during initial
+//            context construction.
+//
+// In: $r2 channel address
+//
+ctx_chan:
+#ifdef NVGF
+	call #ctx_4160s
+#endif
+	call #ctx_load
+	mov $r10 12			// DONE_UNK12
+	call #wait_donez
+	mov $r1 0xa10
+	shl b32 $r1 6
+	mov $r2 5
+	iowr I[$r1 + 0x000] $r2		// MEM_CMD = 5 (???)
+	ctx_chan_wait:
+		iord $r2 I[$r1 + 0x000]
+		or $r2 $r2
+		bra ne #ctx_chan_wait
+#ifdef NVGF
+	call #ctx_4160c
+#endif
+	ret
+
+// Execute per-context state overrides list
+//
+// Only executed on the first load of a channel.  Might want to look into
+// removing this and having the host directly modify the channel's context
+// to change this state...  The nouveau DRM already builds this list as
+// it's definitely needed for NVIDIA's, so we may as well use it for now
+//
+// Input: $r1 mmio list length
+//
+ctx_mmio_exec:
+	// set transfer base to be the mmio list
+	ld b32 $r3 D[$r0 + #chan_mmio_address]
+	mov $r2 0xa04
+	shl b32 $r2 6
+	iowr I[$r2 + 0x000] $r3		// MEM_BASE
+
+	clear b32 $r3
+	ctx_mmio_loop:
+		// fetch next 256 bytes of mmio list if necessary
+		and $r4 $r3 0xff
+		bra ne #ctx_mmio_pull
+			mov $r5 #xfer_data
+			sethi $r5 0x00060000	// 256 bytes
+			xdld $r3 $r5
+			xdwait
+
+		// execute a single list entry
+		ctx_mmio_pull:
+		ld b32 $r14 D[$r4 + #xfer_data + 0x00]
+		ld b32 $r15 D[$r4 + #xfer_data + 0x04]
+		call #nv_wr32
+
+		// next!
+		add b32 $r3 8
+		sub b32 $r1 1
+		bra ne #ctx_mmio_loop
+
+	// set transfer base back to the current context
+	ctx_mmio_done:
+	ld b32 $r3 D[$r0 + #ctx_current]
+	iowr I[$r2 + 0x000] $r3		// MEM_BASE
+
+	// disable the mmio list now, we don't need/want to execute it again
+	st b32 D[$r0 + #chan_mmio_count] $r0
+	mov $r1 #chan_data
+	sethi $r1 0x00060000		// 256 bytes
+	xdst $r0 $r1
+	xdwait
+	ret
+
+// Transfer HUB context data between GPU and storage area
+//
+// In: $r2 channel address
+//     $p1 clear on save, set on load
+//     $p2 set if opposite direction done/will be done, so:
+//		on save it means: "a load will follow this save"
+//		on load it means: "a save preceeded this load"
+//
+ctx_xfer:
+	// according to mwk, some kind of wait for idle
+	mov $r15 0xc00
+	shl b32 $r15 6
+	mov $r14 4
+	iowr I[$r15 + 0x200] $r14
+	ctx_xfer_idle:
+		iord $r14 I[$r15 + 0x000]
+		and $r14 0x2000
+		bra ne #ctx_xfer_idle
+
+	bra not $p1 #ctx_xfer_pre
+	bra $p2 #ctx_xfer_pre_load
+	ctx_xfer_pre:
+		mov $r15 0x10
+		call #ctx_86c
+#ifdef NVGF
+		call #ctx_4160s
+#endif
+		bra not $p1 #ctx_xfer_exec
+
+	ctx_xfer_pre_load:
+		mov $r15 2
+		call #ctx_4170s
+		call #ctx_4170w
+		call #ctx_redswitch
+		clear b32 $r15
+		call #ctx_4170s
+		call #ctx_load
+
+	// fetch context pointer, and initiate xfer on all GPCs
+	ctx_xfer_exec:
+	ld b32 $r1 D[$r0 + #ctx_current]
+	mov $r2 0x414
+	shl b32 $r2 6
+	iowr I[$r2 + 0x000] $r0	// BAR_STATUS = reset
+	mov $r14 -0x5b00
+	sethi $r14 0x410000
+	mov b32 $r15 $r1
+	call #nv_wr32		// GPC_BCAST_WRCMD_DATA = ctx pointer
+	add b32 $r14 4
+	xbit $r15 $flags $p1
+	xbit $r2 $flags $p2
+	shl b32 $r2 1
+	or $r15 $r2
+	call #nv_wr32		// GPC_BCAST_WRCMD_CMD = GPC_XFER(type)
+
+	// strands
+	mov $r1 0x4afc
+	sethi $r1 0x20000
+	mov $r2 0xc
+	iowr I[$r1] $r2		// STRAND_CMD(0x3f) = 0x0c
+	call #strand_wait
+	mov $r2 0x47fc
+	sethi $r2 0x20000
+	iowr I[$r2] $r0		// STRAND_FIRST_GENE(0x3f) = 0x00
+	xbit $r2 $flags $p1
+	add b32 $r2 3
+	iowr I[$r1] $r2		// STRAND_CMD(0x3f) = 0x03/0x04 (SAVE/LOAD)
+
+	// mmio context
+	xbit $r10 $flags $p1	// direction
+	or $r10 6		// first, last
+	mov $r11 0		// base = 0
+	ld b32 $r12 D[$r0 + #hub_mmio_list_head]
+	ld b32 $r13 D[$r0 + #hub_mmio_list_tail]
+	mov $r14 0		// not multi
+	call #mmctx_xfer
+
+	// wait for GPCs to all complete
+	mov $r10 8		// DONE_BAR
+	call #wait_doneo
+
+	// wait for strand xfer to complete
+	call #strand_wait
+
+	// post-op
+	bra $p1 #ctx_xfer_post
+		mov $r10 12		// DONE_UNK12
+		call #wait_donez
+		mov $r1 0xa10
+		shl b32 $r1 6
+		mov $r2 5
+		iowr I[$r1] $r2		// MEM_CMD
+		ctx_xfer_post_save_wait:
+			iord $r2 I[$r1]
+			or $r2 $r2
+			bra ne #ctx_xfer_post_save_wait
+
+	bra $p2 #ctx_xfer_done
+	ctx_xfer_post:
+		mov $r15 2
+		call #ctx_4170s
+		clear b32 $r15
+		call #ctx_86c
+		call #strand_post
+		call #ctx_4170w
+		clear b32 $r15
+		call #ctx_4170s
+
+		bra not $p1 #ctx_xfer_no_post_mmio
+		ld b32 $r1 D[$r0 + #chan_mmio_count]
+		or $r1 $r1
+		bra e #ctx_xfer_no_post_mmio
+			call #ctx_mmio_exec
+
+		ctx_xfer_no_post_mmio:
+#ifdef NVGF
+		call #ctx_4160c
+#endif
+
+	ctx_xfer_done:
+	ret
+#endif

+ 20 - 770
drivers/gpu/drm/nouveau/core/engine/graph/fuc/hubnvc0.fuc

@@ -1,6 +1,5 @@
-/* fuc microcode for nvc0 PGRAPH/HUB
- *
- * Copyright 2011 Red Hat Inc.
+/*
+ * Copyright 2013 Red Hat Inc.
  *
  * Permission is hereby granted, free of charge, to any person obtaining a
  * copy of this software and associated documentation files (the "Software"),
@@ -20,22 +19,16 @@
  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
  * OTHER DEALINGS IN THE SOFTWARE.
  *
- * Authors: Ben Skeggs
+ * Authors: Ben Skeggs <bskeggs@redhat.com>
  */
 
-/* To build:
- *    m4 hubnvc0.fuc | envyas -a -w -m fuc -V fuc3 -o hubnvc0.fuc.h
- */
+#define NVGF
+#include "macros.fuc"
 
 .section #nvc0_grhub_data
-include(`nvc0.fuc')
-gpc_count:		.b32 0
-rop_count:		.b32 0
-cmd_queue:		queue_init
-hub_mmio_list_head:	.b32 0
-hub_mmio_list_tail:	.b32 0
-
-ctx_current:		.b32 0
+#define INCLUDE_DATA
+#include "com.fuc"
+#include "hub.fuc"
 
 chipsets:
 .b8  0xc0 0 0 0
@@ -68,9 +61,12 @@ chipsets:
 .b8  0 0 0 0
 
 nvc0_hub_mmio_head:
+mmctx_data(0x40402c, 1)
+mmctx_data(0x404174, 1)
+nvd9_hub_mmio_head:
 mmctx_data(0x17e91c, 2)
 mmctx_data(0x400204, 2)
-mmctx_data(0x404004, 11)
+mmctx_data(0x404004, 10)
 mmctx_data(0x404044, 1)
 mmctx_data(0x404094, 14)
 mmctx_data(0x4040d0, 7)
@@ -78,7 +74,7 @@ mmctx_data(0x4040f8, 1)
 mmctx_data(0x404130, 3)
 mmctx_data(0x404150, 3)
 mmctx_data(0x404164, 2)
-mmctx_data(0x404174, 3)
+mmctx_data(0x404178, 2)
 mmctx_data(0x404200, 8)
 mmctx_data(0x404404, 14)
 mmctx_data(0x404460, 4)
@@ -105,765 +101,19 @@ mmctx_data(0x4078bc, 1)
 mmctx_data(0x408000, 7)
 mmctx_data(0x408064, 1)
 mmctx_data(0x408800, 3)
-mmctx_data(0x408900, 4)
+mmctx_data(0x408900, 3)
 mmctx_data(0x408980, 1)
 nvc0_hub_mmio_tail:
 mmctx_data(0x4064c0, 2)
 nvc1_hub_mmio_tail:
-
-nvd9_hub_mmio_head:
-mmctx_data(0x17e91c, 2)
-mmctx_data(0x400204, 2)
-mmctx_data(0x404004, 10)
-mmctx_data(0x404044, 1)
-mmctx_data(0x404094, 14)
-mmctx_data(0x4040d0, 7)
-mmctx_data(0x4040f8, 1)
-mmctx_data(0x404130, 3)
-mmctx_data(0x404150, 3)
-mmctx_data(0x404164, 2)
-mmctx_data(0x404178, 2)
-mmctx_data(0x404200, 8)
-mmctx_data(0x404404, 14)
-mmctx_data(0x404460, 4)
-mmctx_data(0x404480, 1)
-mmctx_data(0x404498, 1)
-mmctx_data(0x404604, 4)
-mmctx_data(0x404618, 32)
-mmctx_data(0x404698, 21)
-mmctx_data(0x4046f0, 2)
-mmctx_data(0x404700, 22)
-mmctx_data(0x405800, 1)
-mmctx_data(0x405830, 3)
-mmctx_data(0x405854, 1)
-mmctx_data(0x405870, 4)
-mmctx_data(0x405a00, 2)
-mmctx_data(0x405a18, 1)
-mmctx_data(0x406020, 1)
-mmctx_data(0x406028, 4)
-mmctx_data(0x4064a8, 2)
-mmctx_data(0x4064b4, 5)
-mmctx_data(0x407804, 1)
-mmctx_data(0x40780c, 6)
-mmctx_data(0x4078bc, 1)
-mmctx_data(0x408000, 7)
-mmctx_data(0x408064, 1)
-mmctx_data(0x408800, 3)
-mmctx_data(0x408900, 4)
-mmctx_data(0x408980, 1)
+mmctx_data(0x4064bc, 3)
 nvd9_hub_mmio_tail:
-
-.align 256
-chan_data:
-chan_mmio_count:	.b32 0
-chan_mmio_address:	.b32 0
-
-.align 256
-xfer_data: 		.b32 0
+#undef INCLUDE_DATA
 
 .section #nvc0_grhub_code
+#define INCLUDE_CODE
 bra #init
-define(`include_code')
-include(`nvc0.fuc')
-
-// reports an exception to the host
-//
-// In: $r15 error code (see nvc0.fuc)
-//
-error:
-	push $r14
-	mov $r14 0x814
-	shl b32 $r14 6
-	iowr I[$r14 + 0x000] $r15	// CC_SCRATCH[5] = error code
-	mov $r14 0xc1c
-	shl b32 $r14 6
-	mov $r15 1
-	iowr I[$r14 + 0x000] $r15	// INTR_UP_SET
-	pop $r14
-	ret
-
-// HUB fuc initialisation, executed by triggering ucode start, will
-// fall through to main loop after completion.
-//
-// Input:
-//   CC_SCRATCH[0]: chipset (PMC_BOOT_0 read returns 0x0bad0bad... sigh)
-//
-// Output:
-//   CC_SCRATCH[0]:
-//	     31:31: set to signal completion
-//   CC_SCRATCH[1]:
-//	      31:0: total PGRAPH context size
-//
-init:
-	clear b32 $r0
-	mov $sp $r0
-	mov $xdbase $r0
-
-	// enable fifo access
-	mov $r1 0x1200
-	mov $r2 2
-	iowr I[$r1 + 0x000] $r2	// FIFO_ENABLE
-
-	// setup i0 handler, and route all interrupts to it
-	mov $r1 #ih
-	mov $iv0 $r1
-	mov $r1 0x400
-	iowr I[$r1 + 0x300] $r0	// INTR_DISPATCH
-
-	// route HUB_CHANNEL_SWITCH to fuc interrupt 8
-	mov $r3 0x404
-	shl b32 $r3 6
-	mov $r2 0x2003		// { HUB_CHANNEL_SWITCH, ZERO } -> intr 8
-	iowr I[$r3 + 0x000] $r2
-
-	// not sure what these are, route them because NVIDIA does, and
-	// the IRQ handler will signal the host if we ever get one.. we
-	// may find out if/why we need to handle these if so..
-	//
-	mov $r2 0x2004
-	iowr I[$r3 + 0x004] $r2 // { 0x04, ZERO } -> intr 9
-	mov $r2 0x200b
-	iowr I[$r3 + 0x008] $r2 // { 0x0b, ZERO } -> intr 10
-	mov $r2 0x200c
-	iowr I[$r3 + 0x01c] $r2 // { 0x0c, ZERO } -> intr 15
-
-	// enable all INTR_UP interrupts
-	mov $r2 0xc24
-	shl b32 $r2 6
-	not b32 $r3 $r0
-	iowr I[$r2] $r3
-
-	// enable fifo, ctxsw, 9, 10, 15 interrupts
-	mov $r2 -0x78fc		// 0x8704
-	sethi $r2 0
-	iowr I[$r1 + 0x000] $r2	// INTR_EN_SET
-
-	// fifo level triggered, rest edge
-	sub b32 $r1 0x100
-	mov $r2 4
-	iowr I[$r1] $r2
-
-	// enable interrupts
-	bset $flags ie0
-
-	// fetch enabled GPC/ROP counts
-	mov $r14 -0x69fc	// 0x409604
-	sethi $r14 0x400000
-	call #nv_rd32
-	extr $r1 $r15 16:20
-	st b32 D[$r0 + #rop_count] $r1
-	and $r15 0x1f
-	st b32 D[$r0 + #gpc_count] $r15
-
-	// set BAR_REQMASK to GPC mask
-	mov $r1 1
-	shl b32 $r1 $r15
-	sub b32 $r1 1
-	mov $r2 0x40c
-	shl b32 $r2 6
-	iowr I[$r2 + 0x000] $r1
-	iowr I[$r2 + 0x100] $r1
-
-	// find context data for this chipset
-	mov $r2 0x800
-	shl b32 $r2 6
-	iord $r2 I[$r2 + 0x000]		// CC_SCRATCH[0]
-	mov $r15 #chipsets - 8
-	init_find_chipset:
-		add b32 $r15 8
-		ld b32 $r3 D[$r15 + 0x00]
-		cmpu b32 $r3 $r2
-		bra e #init_context
-		cmpu b32 $r3 0
-		bra ne #init_find_chipset
-		// unknown chipset
-		ret
-
-	// context size calculation, reserve first 256 bytes for use by fuc
-	init_context:
-	mov $r1 256
-
-	// calculate size of mmio context data
-	ld b16 $r14 D[$r15 + 4]
-	ld b16 $r15 D[$r15 + 6]
-	sethi $r14 0
-	st b32 D[$r0 + #hub_mmio_list_head] $r14
-	st b32 D[$r0 + #hub_mmio_list_tail] $r15
-	call #mmctx_size
-
-	// set mmctx base addresses now so we don't have to do it later,
-	// they don't (currently) ever change
-	mov $r3 0x700
-	shl b32 $r3 6
-	shr b32 $r4 $r1 8
-	iowr I[$r3 + 0x000] $r4		// MMCTX_SAVE_SWBASE
-	iowr I[$r3 + 0x100] $r4		// MMCTX_LOAD_SWBASE
-	add b32 $r3 0x1300
-	add b32 $r1 $r15
-	shr b32 $r15 2
-	iowr I[$r3 + 0x000] $r15	// MMCTX_LOAD_COUNT, wtf for?!?
-
-	// strands, base offset needs to be aligned to 256 bytes
-	shr b32 $r1 8
-	add b32 $r1 1
-	shl b32 $r1 8
-	mov b32 $r15 $r1
-	call #strand_ctx_init
-	add b32 $r1 $r15
-
-	// initialise each GPC in sequence by passing in the offset of its
-	// context data in GPCn_CC_SCRATCH[1], and starting its FUC (which
-	// has previously been uploaded by the host) running.
-	//
-	// the GPC fuc init sequence will set GPCn_CC_SCRATCH[0] bit 31
-	// when it has completed, and return the size of its context data
-	// in GPCn_CC_SCRATCH[1]
-	//
-	ld b32 $r3 D[$r0 + #gpc_count]
-	mov $r4 0x2000
-	sethi $r4 0x500000
-	init_gpc:
-		// setup, and start GPC ucode running
-		add b32 $r14 $r4 0x804
-		mov b32 $r15 $r1
-		call #nv_wr32			// CC_SCRATCH[1] = ctx offset
-		add b32 $r14 $r4 0x800
-		mov b32 $r15 $r2
-		call #nv_wr32			// CC_SCRATCH[0] = chipset
-		add b32 $r14 $r4 0x10c
-		clear b32 $r15
-		call #nv_wr32
-		add b32 $r14 $r4 0x104
-		call #nv_wr32			// ENTRY
-		add b32 $r14 $r4 0x100
-		mov $r15 2			// CTRL_START_TRIGGER
-		call #nv_wr32			// CTRL
-
-		// wait for it to complete, and adjust context size
-		add b32 $r14 $r4 0x800
-		init_gpc_wait:
-			call #nv_rd32
-			xbit $r15 $r15 31
-			bra e #init_gpc_wait
-		add b32 $r14 $r4 0x804
-		call #nv_rd32
-		add b32 $r1 $r15
-
-		// next!
-		add b32 $r4 0x8000
-		sub b32 $r3 1
-		bra ne #init_gpc
-
-	// save context size, and tell host we're ready
-	mov $r2 0x800
-	shl b32 $r2 6
-	iowr I[$r2 + 0x100] $r1		// CC_SCRATCH[1]  = context size
-	add b32 $r2 0x800
-	clear b32 $r1
-	bset $r1 31
-	iowr I[$r2 + 0x000] $r1		// CC_SCRATCH[0] |= 0x80000000
-
-// Main program loop, very simple, sleeps until woken up by the interrupt
-// handler, pulls a command from the queue and executes its handler
-//
-main:
-	// sleep until we have something to do
-	bset $flags $p0
-	sleep $p0
-	mov $r13 #cmd_queue
-	call #queue_get
-	bra $p1 #main
-
-	// context switch, requested by GPU?
-	cmpu b32 $r14 0x4001
-	bra ne #main_not_ctx_switch
-		trace_set(T_AUTO)
-		mov $r1 0xb00
-		shl b32 $r1 6
-		iord $r2 I[$r1 + 0x100]		// CHAN_NEXT
-		iord $r1 I[$r1 + 0x000]		// CHAN_CUR
-
-		xbit $r3 $r1 31
-		bra e #chsw_no_prev
-			xbit $r3 $r2 31
-			bra e #chsw_prev_no_next
-				push $r2
-				mov b32 $r2 $r1
-				trace_set(T_SAVE)
-				bclr $flags $p1
-				bset $flags $p2
-				call #ctx_xfer
-				trace_clr(T_SAVE);
-				pop $r2
-				trace_set(T_LOAD);
-				bset $flags $p1
-				call #ctx_xfer
-				trace_clr(T_LOAD);
-				bra #chsw_done
-			chsw_prev_no_next:
-				push $r2
-				mov b32 $r2 $r1
-				bclr $flags $p1
-				bclr $flags $p2
-				call #ctx_xfer
-				pop $r2
-				mov $r1 0xb00
-				shl b32 $r1 6
-				iowr I[$r1] $r2
-				bra #chsw_done
-		chsw_no_prev:
-			xbit $r3 $r2 31
-			bra e #chsw_done
-				bset $flags $p1
-				bclr $flags $p2
-				call #ctx_xfer
-
-		// ack the context switch request
-		chsw_done:
-		mov $r1 0xb0c
-		shl b32 $r1 6
-		mov $r2 1
-		iowr I[$r1 + 0x000] $r2		// 0x409b0c
-		trace_clr(T_AUTO)
-		bra #main
-
-	// request to set current channel? (*not* a context switch)
-	main_not_ctx_switch:
-	cmpu b32 $r14 0x0001
-	bra ne #main_not_ctx_chan
-		mov b32 $r2 $r15
-		call #ctx_chan
-		bra #main_done
-
-	// request to store current channel context?
-	main_not_ctx_chan:
-	cmpu b32 $r14 0x0002
-	bra ne #main_not_ctx_save
-		trace_set(T_SAVE)
-		bclr $flags $p1
-		bclr $flags $p2
-		call #ctx_xfer
-		trace_clr(T_SAVE)
-		bra #main_done
-
-	main_not_ctx_save:
-		shl b32 $r15 $r14 16
-		or $r15 E_BAD_COMMAND
-		call #error
-		bra #main
-
-	main_done:
-	mov $r1 0x820
-	shl b32 $r1 6
-	clear b32 $r2
-	bset $r2 31
-	iowr I[$r1 + 0x000] $r2		// CC_SCRATCH[0] |= 0x80000000
-	bra #main
-
-// interrupt handler
-ih:
-	push $r8
-	mov $r8 $flags
-	push $r8
-	push $r9
-	push $r10
-	push $r11
-	push $r13
-	push $r14
-	push $r15
-
-	// incoming fifo command?
-	iord $r10 I[$r0 + 0x200]	// INTR
-	and $r11 $r10 0x00000004
-	bra e #ih_no_fifo
-		// queue incoming fifo command for later processing
-		mov $r11 0x1900
-		mov $r13 #cmd_queue
-		iord $r14 I[$r11 + 0x100]	// FIFO_CMD
-		iord $r15 I[$r11 + 0x000]	// FIFO_DATA
-		call #queue_put
-		add b32 $r11 0x400
-		mov $r14 1
-		iowr I[$r11 + 0x000] $r14	// FIFO_ACK
-
-	// context switch request?
-	ih_no_fifo:
-	and $r11 $r10 0x00000100
-	bra e #ih_no_ctxsw
-		// enqueue a context switch for later processing
-		mov $r13 #cmd_queue
-		mov $r14 0x4001
-		call #queue_put
-
-	// anything we didn't handle, bring it to the host's attention
-	ih_no_ctxsw:
-	mov $r11 0x104
-	not b32 $r11
-	and $r11 $r10 $r11
-	bra e #ih_no_other
-		mov $r10 0xc1c
-		shl b32 $r10 6
-		iowr I[$r10] $r11	// INTR_UP_SET
-
-	// ack, and wake up main()
-	ih_no_other:
-	iowr I[$r0 + 0x100] $r10	// INTR_ACK
-
-	pop $r15
-	pop $r14
-	pop $r13
-	pop $r11
-	pop $r10
-	pop $r9
-	pop $r8
-	mov $flags $r8
-	pop $r8
-	bclr $flags $p0
-	iret
-
-// Not real sure, but, MEM_CMD 7 will hang forever if this isn't done
-ctx_4160s:
-	mov $r14 0x4160
-	sethi $r14 0x400000
-	mov $r15 1
-	call #nv_wr32
-	ctx_4160s_wait:
-		call #nv_rd32
-		xbit $r15 $r15 4
-		bra e #ctx_4160s_wait
-	ret
-
-// Without clearing again at end of xfer, some things cause PGRAPH
-// to hang with STATUS=0x00000007 until it's cleared.. fbcon can
-// still function with it set however...
-ctx_4160c:
-	mov $r14 0x4160
-	sethi $r14 0x400000
-	clear b32 $r15
-	call #nv_wr32
-	ret
-
-// Again, not real sure
-//
-// In: $r15 value to set 0x404170 to
-//
-ctx_4170s:
-	mov $r14 0x4170
-	sethi $r14 0x400000
-	or $r15 0x10
-	call #nv_wr32
-	ret
-
-// Waits for a ctx_4170s() call to complete
-//
-ctx_4170w:
-	mov $r14 0x4170
-	sethi $r14 0x400000
-	call #nv_rd32
-	and $r15 0x10
-	bra ne #ctx_4170w
-	ret
-
-// Disables various things, waits a bit, and re-enables them..
-//
-// Not sure how exactly this helps, perhaps "ENABLE" is not such a
-// good description for the bits we turn off?  Anyways, without this,
-// funny things happen.
-//
-ctx_redswitch:
-	mov $r14 0x614
-	shl b32 $r14 6
-	mov $r15 0x270
-	iowr I[$r14] $r15	// HUB_RED_SWITCH = ENABLE_GPC, POWER_ALL
-	mov $r15 8
-	ctx_redswitch_delay:
-		sub b32 $r15 1
-		bra ne #ctx_redswitch_delay
-	mov $r15 0x770
-	iowr I[$r14] $r15	// HUB_RED_SWITCH = ENABLE_ALL, POWER_ALL
-	ret
-
-// Not a clue what this is for, except that unless the value is 0x10, the
-// strand context is saved (and presumably restored) incorrectly..
-//
-// In: $r15 value to set to (0x00/0x10 are used)
-//
-ctx_86c:
-	mov $r14 0x86c
-	shl b32 $r14 6
-	iowr I[$r14] $r15	// HUB(0x86c) = val
-	mov $r14 -0x75ec
-	sethi $r14 0x400000
-	call #nv_wr32		// ROP(0xa14) = val
-	mov $r14 -0x5794
-	sethi $r14 0x410000
-	call #nv_wr32		// GPC(0x86c) = val
-	ret
-
-// ctx_load - load's a channel's ctxctl data, and selects its vm
-//
-// In: $r2 channel address
-//
-ctx_load:
-	trace_set(T_CHAN)
-
-	// switch to channel, somewhat magic in parts..
-	mov $r10 12		// DONE_UNK12
-	call #wait_donez
-	mov $r1 0xa24
-	shl b32 $r1 6
-	iowr I[$r1 + 0x000] $r0	// 0x409a24
-	mov $r3 0xb00
-	shl b32 $r3 6
-	iowr I[$r3 + 0x100] $r2	// CHAN_NEXT
-	mov $r1 0xa0c
-	shl b32 $r1 6
-	mov $r4 7
-	iowr I[$r1 + 0x000] $r2 // MEM_CHAN
-	iowr I[$r1 + 0x100] $r4	// MEM_CMD
-	ctx_chan_wait_0:
-		iord $r4 I[$r1 + 0x100]
-		and $r4 0x1f
-		bra ne #ctx_chan_wait_0
-	iowr I[$r3 + 0x000] $r2	// CHAN_CUR
-
-	// load channel header, fetch PGRAPH context pointer
-	mov $xtargets $r0
-	bclr $r2 31
-	shl b32 $r2 4
-	add b32 $r2 2
-
-	trace_set(T_LCHAN)
-	mov $r1 0xa04
-	shl b32 $r1 6
-	iowr I[$r1 + 0x000] $r2		// MEM_BASE
-	mov $r1 0xa20
-	shl b32 $r1 6
-	mov $r2 0x0002
-	sethi $r2 0x80000000
-	iowr I[$r1 + 0x000] $r2		// MEM_TARGET = vram
-	mov $r1 0x10			// chan + 0x0210
-	mov $r2 #xfer_data
-	sethi $r2 0x00020000		// 16 bytes
-	xdld $r1 $r2
-	xdwait
-	trace_clr(T_LCHAN)
-
-	// update current context
-	ld b32 $r1 D[$r0 + #xfer_data + 4]
-	shl b32 $r1 24
-	ld b32 $r2 D[$r0 + #xfer_data + 0]
-	shr b32 $r2 8
-	or $r1 $r2
-	st b32 D[$r0 + #ctx_current] $r1
-
-	// set transfer base to start of context, and fetch context header
-	trace_set(T_LCTXH)
-	mov $r2 0xa04
-	shl b32 $r2 6
-	iowr I[$r2 + 0x000] $r1		// MEM_BASE
-	mov $r2 1
-	mov $r1 0xa20
-	shl b32 $r1 6
-	iowr I[$r1 + 0x000] $r2		// MEM_TARGET = vm
-	mov $r1 #chan_data
-	sethi $r1 0x00060000		// 256 bytes
-	xdld $r0 $r1
-	xdwait
-	trace_clr(T_LCTXH)
-
-	trace_clr(T_CHAN)
-	ret
-
-// ctx_chan - handler for HUB_SET_CHAN command, will set a channel as
-//            the active channel for ctxctl, but not actually transfer
-//            any context data.  intended for use only during initial
-//            context construction.
-//
-// In: $r2 channel address
-//
-ctx_chan:
-	call #ctx_4160s
-	call #ctx_load
-	mov $r10 12			// DONE_UNK12
-	call #wait_donez
-	mov $r1 0xa10
-	shl b32 $r1 6
-	mov $r2 5
-	iowr I[$r1 + 0x000] $r2		// MEM_CMD = 5 (???)
-	ctx_chan_wait:
-		iord $r2 I[$r1 + 0x000]
-		or $r2 $r2
-		bra ne #ctx_chan_wait
-	call #ctx_4160c
-	ret
-
-// Execute per-context state overrides list
-//
-// Only executed on the first load of a channel.  Might want to look into
-// removing this and having the host directly modify the channel's context
-// to change this state...  The nouveau DRM already builds this list as
-// it's definitely needed for NVIDIA's, so we may as well use it for now
-//
-// Input: $r1 mmio list length
-//
-ctx_mmio_exec:
-	// set transfer base to be the mmio list
-	ld b32 $r3 D[$r0 + #chan_mmio_address]
-	mov $r2 0xa04
-	shl b32 $r2 6
-	iowr I[$r2 + 0x000] $r3		// MEM_BASE
-
-	clear b32 $r3
-	ctx_mmio_loop:
-		// fetch next 256 bytes of mmio list if necessary
-		and $r4 $r3 0xff
-		bra ne #ctx_mmio_pull
-			mov $r5 #xfer_data
-			sethi $r5 0x00060000	// 256 bytes
-			xdld $r3 $r5
-			xdwait
-
-		// execute a single list entry
-		ctx_mmio_pull:
-		ld b32 $r14 D[$r4 + #xfer_data + 0x00]
-		ld b32 $r15 D[$r4 + #xfer_data + 0x04]
-		call #nv_wr32
-
-		// next!
-		add b32 $r3 8
-		sub b32 $r1 1
-		bra ne #ctx_mmio_loop
-
-	// set transfer base back to the current context
-	ctx_mmio_done:
-	ld b32 $r3 D[$r0 + #ctx_current]
-	iowr I[$r2 + 0x000] $r3		// MEM_BASE
-
-	// disable the mmio list now, we don't need/want to execute it again
-	st b32 D[$r0 + #chan_mmio_count] $r0
-	mov $r1 #chan_data
-	sethi $r1 0x00060000		// 256 bytes
-	xdst $r0 $r1
-	xdwait
-	ret
-
-// Transfer HUB context data between GPU and storage area
-//
-// In: $r2 channel address
-//     $p1 clear on save, set on load
-//     $p2 set if opposite direction done/will be done, so:
-//		on save it means: "a load will follow this save"
-//		on load it means: "a save preceeded this load"
-//
-ctx_xfer:
-	// according to mwk, some kind of wait for idle
-	mov $r15 0xc00
-	shl b32 $r15 6
-	mov $r14 4
-	iowr I[$r15 + 0x200] $r14
-	ctx_xfer_idle:
-		iord $r14 I[$r15 + 0x000]
-		and $r14 0x2000
-		bra ne #ctx_xfer_idle
-
-	bra not $p1 #ctx_xfer_pre
-	bra $p2 #ctx_xfer_pre_load
-	ctx_xfer_pre:
-		mov $r15 0x10
-		call #ctx_86c
-		call #ctx_4160s
-		bra not $p1 #ctx_xfer_exec
-
-	ctx_xfer_pre_load:
-		mov $r15 2
-		call #ctx_4170s
-		call #ctx_4170w
-		call #ctx_redswitch
-		clear b32 $r15
-		call #ctx_4170s
-		call #ctx_load
-
-	// fetch context pointer, and initiate xfer on all GPCs
-	ctx_xfer_exec:
-	ld b32 $r1 D[$r0 + #ctx_current]
-	mov $r2 0x414
-	shl b32 $r2 6
-	iowr I[$r2 + 0x000] $r0	// BAR_STATUS = reset
-	mov $r14 -0x5b00
-	sethi $r14 0x410000
-	mov b32 $r15 $r1
-	call #nv_wr32		// GPC_BCAST_WRCMD_DATA = ctx pointer
-	add b32 $r14 4
-	xbit $r15 $flags $p1
-	xbit $r2 $flags $p2
-	shl b32 $r2 1
-	or $r15 $r2
-	call #nv_wr32		// GPC_BCAST_WRCMD_CMD = GPC_XFER(type)
-
-	// strands
-	mov $r1 0x4afc
-	sethi $r1 0x20000
-	mov $r2 0xc
-	iowr I[$r1] $r2		// STRAND_CMD(0x3f) = 0x0c
-	call #strand_wait
-	mov $r2 0x47fc
-	sethi $r2 0x20000
-	iowr I[$r2] $r0		// STRAND_FIRST_GENE(0x3f) = 0x00
-	xbit $r2 $flags $p1
-	add b32 $r2 3
-	iowr I[$r1] $r2		// STRAND_CMD(0x3f) = 0x03/0x04 (SAVE/LOAD)
-
-	// mmio context
-	xbit $r10 $flags $p1	// direction
-	or $r10 6		// first, last
-	mov $r11 0		// base = 0
-	ld b32 $r12 D[$r0 + #hub_mmio_list_head]
-	ld b32 $r13 D[$r0 + #hub_mmio_list_tail]
-	mov $r14 0		// not multi
-	call #mmctx_xfer
-
-	// wait for GPCs to all complete
-	mov $r10 8		// DONE_BAR
-	call #wait_doneo
-
-	// wait for strand xfer to complete
-	call #strand_wait
-
-	// post-op
-	bra $p1 #ctx_xfer_post
-		mov $r10 12		// DONE_UNK12
-		call #wait_donez
-		mov $r1 0xa10
-		shl b32 $r1 6
-		mov $r2 5
-		iowr I[$r1] $r2		// MEM_CMD
-		ctx_xfer_post_save_wait:
-			iord $r2 I[$r1]
-			or $r2 $r2
-			bra ne #ctx_xfer_post_save_wait
-
-	bra $p2 #ctx_xfer_done
-	ctx_xfer_post:
-		mov $r15 2
-		call #ctx_4170s
-		clear b32 $r15
-		call #ctx_86c
-		call #strand_post
-		call #ctx_4170w
-		clear b32 $r15
-		call #ctx_4170s
-
-		bra not $p1 #ctx_xfer_no_post_mmio
-		ld b32 $r1 D[$r0 + #chan_mmio_count]
-		or $r1 $r1
-		bra e #ctx_xfer_no_post_mmio
-			call #ctx_mmio_exec
-
-		ctx_xfer_no_post_mmio:
-		call #ctx_4160c
-
-	ctx_xfer_done:
-	ret
-
+#include "com.fuc"
+#include "hub.fuc"
 .align 256
+#undef INCLUDE_CODE

+ 540 - 480
drivers/gpu/drm/nouveau/core/engine/graph/fuc/hubnvc0.fuc.h

@@ -28,108 +28,7 @@ uint32_t nvc0_grhub_data[] = {
 	0x00000000,
 /* 0x0058: ctx_current */
 	0x00000000,
-/* 0x005c: chipsets */
-	0x000000c0,
-	0x013c00a0,
-	0x000000c1,
-	0x014000a0,
-	0x000000c3,
-	0x013c00a0,
-	0x000000c4,
-	0x013c00a0,
-	0x000000c8,
-	0x013c00a0,
-	0x000000ce,
-	0x013c00a0,
-	0x000000cf,
-	0x013c00a0,
-	0x000000d9,
-	0x01dc0140,
 	0x00000000,
-/* 0x00a0: nvc0_hub_mmio_head */
-	0x0417e91c,
-	0x04400204,
-	0x28404004,
-	0x00404044,
-	0x34404094,
-	0x184040d0,
-	0x004040f8,
-	0x08404130,
-	0x08404150,
-	0x04404164,
-	0x08404174,
-	0x1c404200,
-	0x34404404,
-	0x0c404460,
-	0x00404480,
-	0x00404498,
-	0x0c404604,
-	0x7c404618,
-	0x50404698,
-	0x044046f0,
-	0x54404700,
-	0x00405800,
-	0x08405830,
-	0x00405854,
-	0x0c405870,
-	0x04405a00,
-	0x00405a18,
-	0x00406020,
-	0x0c406028,
-	0x044064a8,
-	0x044064b4,
-	0x00407804,
-	0x1440780c,
-	0x004078bc,
-	0x18408000,
-	0x00408064,
-	0x08408800,
-	0x0c408900,
-	0x00408980,
-/* 0x013c: nvc0_hub_mmio_tail */
-	0x044064c0,
-/* 0x0140: nvc1_hub_mmio_tail */
-/* 0x0140: nvd9_hub_mmio_head */
-	0x0417e91c,
-	0x04400204,
-	0x24404004,
-	0x00404044,
-	0x34404094,
-	0x184040d0,
-	0x004040f8,
-	0x08404130,
-	0x08404150,
-	0x04404164,
-	0x04404178,
-	0x1c404200,
-	0x34404404,
-	0x0c404460,
-	0x00404480,
-	0x00404498,
-	0x0c404604,
-	0x7c404618,
-	0x50404698,
-	0x044046f0,
-	0x54404700,
-	0x00405800,
-	0x08405830,
-	0x00405854,
-	0x0c405870,
-	0x04405a00,
-	0x00405a18,
-	0x00406020,
-	0x0c406028,
-	0x044064a8,
-	0x104064b4,
-	0x00407804,
-	0x1440780c,
-	0x004078bc,
-	0x18408000,
-	0x00408064,
-	0x08408800,
-	0x0c408900,
-	0x00408980,
-/* 0x01dc: nvd9_hub_mmio_tail */
 	0x00000000,
 	0x00000000,
 	0x00000000,
@@ -139,10 +38,7 @@ uint32_t nvc0_grhub_data[] = {
 	0x00000000,
 	0x00000000,
 	0x00000000,
-/* 0x0200: chan_data */
-/* 0x0200: chan_mmio_count */
 	0x00000000,
-/* 0x0204: chan_mmio_address */
 	0x00000000,
 	0x00000000,
 	0x00000000,
@@ -173,6 +69,75 @@ uint32_t nvc0_grhub_data[] = {
 	0x00000000,
 	0x00000000,
 	0x00000000,
+/* 0x0100: chan_data */
+/* 0x0100: chan_mmio_count */
+	0x00000000,
+/* 0x0104: chan_mmio_address */
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+/* 0x0200: xfer_data */
+	0x00000000,
 	0x00000000,
 	0x00000000,
 	0x00000000,
@@ -206,8 +171,103 @@ uint32_t nvc0_grhub_data[] = {
 	0x00000000,
 	0x00000000,
 	0x00000000,
-/* 0x0300: xfer_data */
 	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+/* 0x0300: chipsets */
+	0x000000c0,
+	0x03f0034c,
+	0x000000c1,
+	0x03f4034c,
+	0x000000c3,
+	0x03f0034c,
+	0x000000c4,
+	0x03f0034c,
+	0x000000c8,
+	0x03f0034c,
+	0x000000ce,
+	0x03f0034c,
+	0x000000cf,
+	0x03f0034c,
+	0x000000d9,
+	0x03f80354,
+	0x000000d7,
+	0x03f80354,
+	0x00000000,
+/* 0x034c: nvc0_hub_mmio_head */
+	0x0040402c,
+	0x00404174,
+/* 0x0354: nvd9_hub_mmio_head */
+	0x0417e91c,
+	0x04400204,
+	0x24404004,
+	0x00404044,
+	0x34404094,
+	0x184040d0,
+	0x004040f8,
+	0x08404130,
+	0x08404150,
+	0x04404164,
+	0x04404178,
+	0x1c404200,
+	0x34404404,
+	0x0c404460,
+	0x00404480,
+	0x00404498,
+	0x0c404604,
+	0x7c404618,
+	0x50404698,
+	0x044046f0,
+	0x54404700,
+	0x00405800,
+	0x08405830,
+	0x00405854,
+	0x0c405870,
+	0x04405a00,
+	0x00405a18,
+	0x00406020,
+	0x0c406028,
+	0x044064a8,
+	0x044064b4,
+	0x00407804,
+	0x1440780c,
+	0x004078bc,
+	0x18408000,
+	0x00408064,
+	0x08408800,
+	0x08408900,
+	0x00408980,
+/* 0x03f0: nvc0_hub_mmio_tail */
+	0x044064c0,
+/* 0x03f4: nvc1_hub_mmio_tail */
+	0x084064bc,
 };
 
 uint32_t nvc0_grhub_code[] = {
@@ -278,7 +338,7 @@ uint32_t nvc0_grhub_code[] = {
 	0x0089d000,
 	0x081887f1,
 	0xd00684b6,
-/* 0x00e2: wait_done_wait_donez */
+/* 0x00e2: wait_donez_ne */
 	0x87f1008a,
 	0x84b60400,
 	0x0088cf06,
@@ -295,7 +355,7 @@ uint32_t nvc0_grhub_code[] = {
 	0x87f10089,
 	0x84b60818,
 	0x008ad006,
-/* 0x011c: wait_done_wait_doneo */
+/* 0x011c: wait_doneo_e */
 	0x040087f1,
 	0xcf0684b6,
 	0x8aff0088,
@@ -443,7 +503,7 @@ uint32_t nvc0_grhub_code[] = {
 	0x0017f100,
 	0x0227f012,
 	0xf10012d0,
-	0xfe05b917,
+	0xfe05ba17,
 	0x17f10010,
 	0x10d00400,
 	0x0437f1c0,
@@ -477,403 +537,403 @@ uint32_t nvc0_grhub_code[] = {
 	0x4021d000,
 	0x080027f1,
 	0xcf0624b6,
-	0xf7f00022,
-/* 0x03a9: init_find_chipset */
-	0x08f0b654,
-	0xb800f398,
-	0x0bf40432,
-	0x0034b00b,
-	0xf8f11bf4,
-/* 0x03bd: init_context */
-	0x0017f100,
-	0x02fe5801,
-	0xf003ff58,
-	0x0e8000e3,
-	0x150f8014,
-	0x013d21f5,
-	0x070037f1,
-	0x950634b6,
-	0x34d00814,
-	0x4034d000,
-	0x130030b7,
-	0xb6001fbb,
-	0x3fd002f5,
-	0x0815b600,
-	0xb60110b6,
-	0x1fb90814,
-	0x6321f502,
-	0x001fbb02,
-	0xf1000398,
-	0xf0200047,
-/* 0x040e: init_gpc */
-	0x4ea05043,
-	0x1fb90804,
-	0x8d21f402,
-	0x08004ea0,
-	0xf4022fb9,
-	0x4ea08d21,
-	0xf4bd010c,
+	0xf7f10022,
+/* 0x03aa: init_find_chipset */
+	0xf0b602f8,
+	0x00f39808,
+	0xf40432b8,
+	0x34b00b0b,
+	0xf11bf400,
+/* 0x03be: init_context */
+	0x17f100f8,
+	0xfe580100,
+	0x03ff5802,
+	0x8000e3f0,
+	0x0f80140e,
+	0x3d21f515,
+	0x0037f101,
+	0x0634b607,
+	0xd0081495,
+	0x34d00034,
+	0x0030b740,
+	0x001fbb13,
+	0xd002f5b6,
+	0x15b6003f,
+	0x0110b608,
+	0xb90814b6,
+	0x21f5021f,
+	0x1fbb0263,
+	0x00039800,
+	0x200047f1,
+/* 0x040f: init_gpc */
+	0xa05043f0,
+	0xb908044e,
+	0x21f4021f,
+	0x004ea08d,
+	0x022fb908,
 	0xa08d21f4,
-	0xf401044e,
-	0x4ea08d21,
-	0xf7f00100,
-	0x8d21f402,
-	0x08004ea0,
-/* 0x0440: init_gpc_wait */
-	0xc86821f4,
-	0x0bf41fff,
-	0x044ea0fa,
+	0xbd010c4e,
+	0x8d21f4f4,
+	0x01044ea0,
+	0xa08d21f4,
+	0xf001004e,
+	0x21f402f7,
+	0x004ea08d,
+/* 0x0441: init_gpc_wait */
 	0x6821f408,
-	0xb7001fbb,
-	0xb6800040,
-	0x1bf40132,
-	0x0027f1b4,
-	0x0624b608,
-	0xb74021d0,
-	0xbd080020,
-	0x1f19f014,
-/* 0x0473: main */
-	0xf40021d0,
-	0x28f40031,
-	0x08d7f000,
-	0xf43921f4,
-	0xe4b1f401,
-	0x1bf54001,
-	0x87f100d1,
-	0x84b6083c,
-	0xf094bd06,
-	0x89d00499,
-	0x0017f100,
-	0x0614b60b,
-	0xcf4012cf,
-	0x13c80011,
-	0x7e0bf41f,
-	0xf41f23c8,
-	0x20f95a0b,
-	0xf10212b9,
+	0xf41fffc8,
+	0x4ea0fa0b,
+	0x21f40804,
+	0x001fbb68,
+	0x800040b7,
+	0xf40132b6,
+	0x27f1b41b,
+	0x24b60800,
+	0x4021d006,
+	0x080020b7,
+	0x19f014bd,
+	0x0021d01f,
+/* 0x0474: main */
+	0xf40031f4,
+	0xd7f00028,
+	0x3921f408,
+	0xb1f401f4,
+	0xf54001e4,
+	0xf100d11b,
 	0xb6083c87,
 	0x94bd0684,
-	0xd00799f0,
-	0x32f40089,
-	0x0231f401,
-	0x082921f5,
-	0x085c87f1,
+	0xd00499f0,
+	0x17f10089,
+	0x14b60b00,
+	0x4012cf06,
+	0xc80011cf,
+	0x0bf41f13,
+	0x1f23c87e,
+	0xf95a0bf4,
+	0x0212b920,
+	0x083c87f1,
 	0xbd0684b6,
 	0x0799f094,
-	0xfc0089d0,
-	0x3c87f120,
+	0xf40089d0,
+	0x31f40132,
+	0x2a21f502,
+	0x5c87f108,
 	0x0684b608,
 	0x99f094bd,
-	0x0089d006,
-	0xf50131f4,
-	0xf1082921,
-	0xb6085c87,
-	0x94bd0684,
-	0xd00699f0,
-	0x0ef40089,
-/* 0x0509: chsw_prev_no_next */
-	0xb920f931,
-	0x32f40212,
-	0x0232f401,
-	0x082921f5,
-	0x17f120fc,
-	0x14b60b00,
-	0x0012d006,
-/* 0x0527: chsw_no_prev */
-	0xc8130ef4,
-	0x0bf41f23,
-	0x0131f40d,
-	0xf50232f4,
-/* 0x0537: chsw_done */
-	0xf1082921,
-	0xb60b0c17,
-	0x27f00614,
-	0x0012d001,
+	0x0089d007,
+	0x87f120fc,
+	0x84b6083c,
+	0xf094bd06,
+	0x89d00699,
+	0x0131f400,
+	0x082a21f5,
 	0x085c87f1,
 	0xbd0684b6,
-	0x0499f094,
-	0xf50089d0,
-/* 0x0557: main_not_ctx_switch */
-	0xb0ff200e,
-	0x1bf401e4,
-	0x02f2b90d,
-	0x07b521f5,
-/* 0x0567: main_not_ctx_chan */
-	0xb0420ef4,
-	0x1bf402e4,
-	0x3c87f12e,
+	0x0699f094,
+	0xf40089d0,
+/* 0x050a: chsw_prev_no_next */
+	0x20f9310e,
+	0xf40212b9,
+	0x32f40132,
+	0x2a21f502,
+	0xf120fc08,
+	0xb60b0017,
+	0x12d00614,
+	0x130ef400,
+/* 0x0528: chsw_no_prev */
+	0xf41f23c8,
+	0x31f40d0b,
+	0x0232f401,
+	0x082a21f5,
+/* 0x0538: chsw_done */
+	0x0b0c17f1,
+	0xf00614b6,
+	0x12d00127,
+	0x5c87f100,
 	0x0684b608,
 	0x99f094bd,
-	0x0089d007,
-	0xf40132f4,
-	0x21f50232,
-	0x87f10829,
-	0x84b6085c,
+	0x0089d004,
+	0xff200ef5,
+/* 0x0558: main_not_ctx_switch */
+	0xf401e4b0,
+	0xf2b90d1b,
+	0xb621f502,
+	0x420ef407,
+/* 0x0568: main_not_ctx_chan */
+	0xf402e4b0,
+	0x87f12e1b,
+	0x84b6083c,
 	0xf094bd06,
 	0x89d00799,
-	0x110ef400,
-/* 0x0598: main_not_ctx_save */
-	0xf010ef94,
-	0x21f501f5,
-	0x0ef502ec,
-/* 0x05a6: main_done */
-	0x17f1fed1,
-	0x14b60820,
-	0xf024bd06,
-	0x12d01f29,
-	0xbe0ef500,
-/* 0x05b9: ih */
-	0xfe80f9fe,
-	0x80f90188,
-	0xa0f990f9,
-	0xd0f9b0f9,
-	0xf0f9e0f9,
-	0xc4800acf,
-	0x0bf404ab,
-	0x00b7f11d,
-	0x08d7f019,
-	0xcf40becf,
-	0x21f400bf,
-	0x00b0b704,
-	0x01e7f004,
-/* 0x05ef: ih_no_fifo */
-	0xe400bed0,
-	0xf40100ab,
-	0xd7f00d0b,
-	0x01e7f108,
-	0x0421f440,
-/* 0x0600: ih_no_ctxsw */
-	0x0104b7f1,
-	0xabffb0bd,
-	0x0d0bf4b4,
-	0x0c1ca7f1,
-	0xd006a4b6,
-/* 0x0616: ih_no_other */
-	0x0ad000ab,
-	0xfcf0fc40,
-	0xfcd0fce0,
-	0xfca0fcb0,
-	0xfe80fc90,
-	0x80fc0088,
-	0xf80032f4,
-/* 0x0631: ctx_4160s */
-	0x60e7f101,
-	0x40e3f041,
-	0xf401f7f0,
-/* 0x063e: ctx_4160s_wait */
-	0x21f48d21,
-	0x04ffc868,
-	0xf8fa0bf4,
-/* 0x0649: ctx_4160c */
-	0x60e7f100,
-	0x40e3f041,
-	0x21f4f4bd,
-/* 0x0657: ctx_4170s */
-	0xf100f88d,
-	0xf04170e7,
-	0xf5f040e3,
-	0x8d21f410,
-/* 0x0666: ctx_4170w */
+	0x0132f400,
+	0xf50232f4,
+	0xf1082a21,
+	0xb6085c87,
+	0x94bd0684,
+	0xd00799f0,
+	0x0ef40089,
+/* 0x0599: main_not_ctx_save */
+	0x10ef9411,
+	0xf501f5f0,
+	0xf502ec21,
+/* 0x05a7: main_done */
+	0xf1fed10e,
+	0xb6082017,
+	0x24bd0614,
+	0xd01f29f0,
+	0x0ef50012,
+/* 0x05ba: ih */
+	0x80f9febe,
+	0xf90188fe,
+	0xf990f980,
+	0xf9b0f9a0,
+	0xf9e0f9d0,
+	0x800acff0,
+	0xf404abc4,
+	0xb7f11d0b,
+	0xd7f01900,
+	0x40becf08,
+	0xf400bfcf,
+	0xb0b70421,
+	0xe7f00400,
+	0x00bed001,
+/* 0x05f0: ih_no_fifo */
+	0x0100abe4,
+	0xf00d0bf4,
+	0xe7f108d7,
+	0x21f44001,
+/* 0x0601: ih_no_ctxsw */
+	0x04b7f104,
+	0xffb0bd01,
+	0x0bf4b4ab,
+	0x1ca7f10d,
+	0x06a4b60c,
+/* 0x0617: ih_no_other */
+	0xd000abd0,
+	0xf0fc400a,
+	0xd0fce0fc,
+	0xa0fcb0fc,
+	0x80fc90fc,
+	0xfc0088fe,
+	0x0032f480,
+/* 0x0632: ctx_4160s */
+	0xe7f101f8,
+	0xe3f04160,
+	0x01f7f040,
+/* 0x063f: ctx_4160s_wait */
+	0xf48d21f4,
+	0xffc86821,
+	0xfa0bf404,
+/* 0x064a: ctx_4160c */
 	0xe7f100f8,
-	0xe3f04170,
-	0x6821f440,
-	0xf410f4f0,
-	0x00f8f31b,
-/* 0x0678: ctx_redswitch */
-	0x0614e7f1,
-	0xf106e4b6,
-	0xd00270f7,
-	0xf7f000ef,
-/* 0x0689: ctx_redswitch_delay */
-	0x01f2b608,
-	0xf1fd1bf4,
-	0xd00770f7,
-	0x00f800ef,
-/* 0x0698: ctx_86c */
-	0x086ce7f1,
-	0xd006e4b6,
-	0xe7f100ef,
-	0xe3f08a14,
-	0x8d21f440,
-	0xa86ce7f1,
-	0xf441e3f0,
+	0xe3f04160,
+	0xf4f4bd40,
 	0x00f88d21,
-/* 0x06b8: ctx_load */
-	0x083c87f1,
-	0xbd0684b6,
-	0x0599f094,
-	0xf00089d0,
-	0x21f40ca7,
-	0x2417f1c9,
-	0x0614b60a,
-	0xf10010d0,
-	0xb60b0037,
-	0x32d00634,
-	0x0c17f140,
-	0x0614b60a,
-	0xd00747f0,
-	0x14d00012,
-/* 0x06f1: ctx_chan_wait_0 */
-	0x4014cf40,
-	0xf41f44f0,
-	0x32d0fa1b,
-	0x000bfe00,
-	0xb61f2af0,
-	0x20b60424,
-	0x3c87f102,
+/* 0x0658: ctx_4170s */
+	0x4170e7f1,
+	0xf040e3f0,
+	0x21f410f5,
+/* 0x0667: ctx_4170w */
+	0xf100f88d,
+	0xf04170e7,
+	0x21f440e3,
+	0x10f4f068,
+	0xf8f31bf4,
+/* 0x0679: ctx_redswitch */
+	0x14e7f100,
+	0x06e4b606,
+	0x0270f7f1,
+	0xf000efd0,
+/* 0x068a: ctx_redswitch_delay */
+	0xf2b608f7,
+	0xfd1bf401,
+	0x0770f7f1,
+	0xf800efd0,
+/* 0x0699: ctx_86c */
+	0x6ce7f100,
+	0x06e4b608,
+	0xf100efd0,
+	0xf08a14e7,
+	0x21f440e3,
+	0x6ce7f18d,
+	0x41e3f0a8,
+	0xf88d21f4,
+/* 0x06b9: ctx_load */
+	0x3c87f100,
 	0x0684b608,
 	0x99f094bd,
-	0x0089d008,
-	0x0a0417f1,
+	0x0089d005,
+	0xf40ca7f0,
+	0x17f1c921,
+	0x14b60a24,
+	0x0010d006,
+	0x0b0037f1,
+	0xd00634b6,
+	0x17f14032,
+	0x14b60a0c,
+	0x0747f006,
+	0xd00012d0,
+/* 0x06f2: ctx_chan_wait_0 */
+	0x14cf4014,
+	0x1f44f040,
+	0xd0fa1bf4,
+	0x0bfe0032,
+	0x1f2af000,
+	0xb60424b6,
+	0x87f10220,
+	0x84b6083c,
+	0xf094bd06,
+	0x89d00899,
+	0x0417f100,
+	0x0614b60a,
+	0xf10012d0,
+	0xb60a2017,
+	0x27f00614,
+	0x0023f102,
+	0x0012d080,
+	0xf11017f0,
+	0xf0020027,
+	0x12fa0223,
+	0xf103f805,
+	0xb6085c87,
+	0x94bd0684,
+	0xd00899f0,
+	0x01980089,
+	0x1814b681,
+	0xb6800298,
+	0x12fd0825,
+	0x16018005,
+	0x083c87f1,
+	0xbd0684b6,
+	0x0999f094,
+	0xf10089d0,
+	0xb60a0427,
+	0x21d00624,
+	0x0127f000,
+	0x0a2017f1,
 	0xd00614b6,
 	0x17f10012,
-	0x14b60a20,
-	0x0227f006,
-	0x800023f1,
-	0xf00012d0,
-	0x27f11017,
-	0x23f00300,
-	0x0512fa02,
+	0x13f00100,
+	0x0501fa06,
 	0x87f103f8,
 	0x84b6085c,
 	0xf094bd06,
-	0x89d00899,
-	0xc1019800,
-	0x981814b6,
-	0x25b6c002,
-	0x0512fd08,
-	0xf1160180,
-	0xb6083c87,
-	0x94bd0684,
-	0xd00999f0,
-	0x27f10089,
-	0x24b60a04,
-	0x0021d006,
-	0xf10127f0,
-	0xb60a2017,
-	0x12d00614,
-	0x0017f100,
-	0x0613f002,
-	0xf80501fa,
-	0x5c87f103,
+	0x89d00999,
+	0x5c87f100,
 	0x0684b608,
 	0x99f094bd,
-	0x0089d009,
-	0x085c87f1,
-	0xbd0684b6,
-	0x0599f094,
-	0xf80089d0,
-/* 0x07b5: ctx_chan */
-	0x3121f500,
-	0xb821f506,
-	0x0ca7f006,
-	0xf1c921f4,
-	0xb60a1017,
-	0x27f00614,
-	0x0012d005,
-/* 0x07d0: ctx_chan_wait */
-	0xfd0012cf,
-	0x1bf40522,
-	0x4921f5fa,
-/* 0x07df: ctx_mmio_exec */
-	0x9800f806,
-	0x27f18103,
-	0x24b60a04,
-	0x0023d006,
-/* 0x07ee: ctx_mmio_loop */
-	0x34c434bd,
-	0x0f1bf4ff,
-	0x030057f1,
-	0xfa0653f0,
-	0x03f80535,
-/* 0x0800: ctx_mmio_pull */
-	0x98c04e98,
-	0x21f4c14f,
-	0x0830b68d,
-	0xf40112b6,
-/* 0x0812: ctx_mmio_done */
-	0x0398df1b,
-	0x0023d016,
-	0xf1800080,
-	0xf0020017,
-	0x01fa0613,
-	0xf803f806,
-/* 0x0829: ctx_xfer */
-	0x00f7f100,
-	0x06f4b60c,
-	0xd004e7f0,
-/* 0x0836: ctx_xfer_idle */
-	0xfecf80fe,
-	0x00e4f100,
-	0xf91bf420,
-	0xf40611f4,
-/* 0x0846: ctx_xfer_pre */
-	0xf7f01102,
-	0x9821f510,
-	0x3121f506,
-	0x1c11f406,
-/* 0x0854: ctx_xfer_pre_load */
-	0xf502f7f0,
-	0xf5065721,
-	0xf5066621,
-	0xbd067821,
-	0x5721f5f4,
-	0xb821f506,
-/* 0x086d: ctx_xfer_exec */
-	0x16019806,
-	0x041427f1,
-	0xd00624b6,
-	0xe7f10020,
-	0xe3f0a500,
-	0x021fb941,
-	0xb68d21f4,
-	0xfcf004e0,
-	0x022cf001,
-	0xfd0124b6,
-	0x21f405f2,
-	0xfc17f18d,
-	0x0213f04a,
-	0xd00c27f0,
-	0x21f50012,
-	0x27f10207,
-	0x23f047fc,
-	0x0020d002,
-	0xb6012cf0,
-	0x12d00320,
-	0x01acf000,
-	0xf006a5f0,
-	0x0c9800b7,
-	0x150d9814,
-	0xf500e7f0,
-	0xf0015c21,
-	0x21f508a7,
-	0x21f50103,
-	0x01f40207,
-	0x0ca7f022,
-	0xf1c921f4,
-	0xb60a1017,
-	0x27f00614,
-	0x0012d005,
-/* 0x08f4: ctx_xfer_post_save_wait */
-	0xfd0012cf,
-	0x1bf40522,
-	0x3202f4fa,
-/* 0x0900: ctx_xfer_post */
-	0xf502f7f0,
-	0xbd065721,
-	0x9821f5f4,
-	0x2621f506,
-	0x6621f502,
-	0xf5f4bd06,
-	0xf4065721,
-	0x01981011,
-	0x0511fd80,
-	0xf5070bf4,
-/* 0x092b: ctx_xfer_no_post_mmio */
-	0xf507df21,
-/* 0x092f: ctx_xfer_done */
-	0xf8064921,
-	0x00000000,
+	0x0089d005,
+/* 0x07b6: ctx_chan */
+	0x21f500f8,
+	0x21f50632,
+	0xa7f006b9,
+	0xc921f40c,
+	0x0a1017f1,
+	0xf00614b6,
+	0x12d00527,
+/* 0x07d1: ctx_chan_wait */
+	0x0012cf00,
+	0xf40522fd,
+	0x21f5fa1b,
+	0x00f8064a,
+/* 0x07e0: ctx_mmio_exec */
+	0xf1410398,
+	0xb60a0427,
+	0x23d00624,
+/* 0x07ef: ctx_mmio_loop */
+	0xc434bd00,
+	0x1bf4ff34,
+	0x0057f10f,
+	0x0653f002,
+	0xf80535fa,
+/* 0x0801: ctx_mmio_pull */
+	0x804e9803,
+	0xf4814f98,
+	0x30b68d21,
+	0x0112b608,
+/* 0x0813: ctx_mmio_done */
+	0x98df1bf4,
+	0x23d01603,
+	0x40008000,
+	0x010017f1,
+	0xfa0613f0,
+	0x03f80601,
+/* 0x082a: ctx_xfer */
+	0xf7f100f8,
+	0xf4b60c00,
+	0x04e7f006,
+/* 0x0837: ctx_xfer_idle */
+	0xcf80fed0,
+	0xe4f100fe,
+	0x1bf42000,
+	0x0611f4f9,
+/* 0x0847: ctx_xfer_pre */
+	0xf01102f4,
+	0x21f510f7,
+	0x21f50699,
+	0x11f40632,
+/* 0x0855: ctx_xfer_pre_load */
+	0x02f7f01c,
+	0x065821f5,
+	0x066721f5,
+	0x067921f5,
+	0x21f5f4bd,
+	0x21f50658,
+/* 0x086e: ctx_xfer_exec */
+	0x019806b9,
+	0x1427f116,
+	0x0624b604,
+	0xf10020d0,
+	0xf0a500e7,
+	0x1fb941e3,
+	0x8d21f402,
+	0xf004e0b6,
+	0x2cf001fc,
+	0x0124b602,
+	0xf405f2fd,
+	0x17f18d21,
+	0x13f04afc,
+	0x0c27f002,
+	0xf50012d0,
+	0xf1020721,
+	0xf047fc27,
+	0x20d00223,
+	0x012cf000,
+	0xd00320b6,
+	0xacf00012,
+	0x06a5f001,
+	0x9800b7f0,
+	0x0d98140c,
+	0x00e7f015,
+	0x015c21f5,
+	0xf508a7f0,
+	0xf5010321,
+	0xf4020721,
+	0xa7f02201,
+	0xc921f40c,
+	0x0a1017f1,
+	0xf00614b6,
+	0x12d00527,
+/* 0x08f5: ctx_xfer_post_save_wait */
+	0x0012cf00,
+	0xf40522fd,
+	0x02f4fa1b,
+/* 0x0901: ctx_xfer_post */
+	0x02f7f032,
+	0x065821f5,
+	0x21f5f4bd,
+	0x21f50699,
+	0x21f50226,
+	0xf4bd0667,
+	0x065821f5,
+	0x981011f4,
+	0x11fd4001,
+	0x070bf405,
+	0x07e021f5,
+/* 0x092c: ctx_xfer_no_post_mmio */
+	0x064a21f5,
+/* 0x0930: ctx_xfer_done */
+	0x000000f8,
 	0x00000000,
 	0x00000000,
 	0x00000000,

+ 71 - 699
drivers/gpu/drm/nouveau/core/engine/graph/fuc/hubnve0.fuc

@@ -1,6 +1,5 @@
-/* fuc microcode for nve0 PGRAPH/HUB
- *
- * Copyright 2011 Red Hat Inc.
+/*
+ * Copyright 2013 Red Hat Inc.
  *
  * Permission is hereby granted, free of charge, to any person obtaining a
  * copy of this software and associated documentation files (the "Software"),
@@ -20,22 +19,16 @@
  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
  * OTHER DEALINGS IN THE SOFTWARE.
  *
- * Authors: Ben Skeggs
+ * Authors: Ben Skeggs <bskeggs@redhat.com>
  */
 
-/* To build:
- *    m4 nve0_grhub.fuc | envyas -a -w -m fuc -V nva3 -o nve0_grhub.fuc.h
- */
+#define NVGK
+#include "macros.fuc"
 
 .section #nve0_grhub_data
-include(`nve0.fuc')
-gpc_count:		.b32 0
-rop_count:		.b32 0
-cmd_queue:		queue_init
-hub_mmio_list_head:	.b32 0
-hub_mmio_list_tail:	.b32 0
-
-ctx_current:		.b32 0
+#define INCLUDE_DATA
+#include "com.fuc"
+#include "hub.fuc"
 
 chipsets:
 .b8  0xe4 0 0 0
@@ -47,6 +40,9 @@ chipsets:
 .b8  0xe6 0 0 0
 .b16 #nve4_hub_mmio_head
 .b16 #nve4_hub_mmio_tail
+.b8  0xf0 0 0 0
+.b16 #nvf0_hub_mmio_head
+.b16 #nvf0_hub_mmio_tail
 .b8  0 0 0 0
 
 nve4_hub_mmio_head:
@@ -103,691 +99,67 @@ mmctx_data(0x408900, 3)
 mmctx_data(0x408980, 1)
 nve4_hub_mmio_tail:
 
-.align 256
-chan_data:
-chan_mmio_count:	.b32 0
-chan_mmio_address:	.b32 0
-
-.align 256
-xfer_data: 		.b32 0
+nvf0_hub_mmio_head:
+mmctx_data(0x17e91c, 2)
+mmctx_data(0x400204, 2)
+mmctx_data(0x404004, 17)
+mmctx_data(0x4040a8, 9)
+mmctx_data(0x4040d0, 7)
+mmctx_data(0x4040f8, 1)
+mmctx_data(0x404100, 10)
+mmctx_data(0x404130, 3)
+mmctx_data(0x404150, 3)
+mmctx_data(0x404164, 1)
+mmctx_data(0x40417c, 2)
+mmctx_data(0x4041a0, 4)
+mmctx_data(0x404200, 4)
+mmctx_data(0x404404, 12)
+mmctx_data(0x404438, 1)
+mmctx_data(0x404460, 4)
+mmctx_data(0x404480, 1)
+mmctx_data(0x404498, 1)
+mmctx_data(0x404604, 4)
+mmctx_data(0x404618, 4)
+mmctx_data(0x40462c, 2)
+mmctx_data(0x404640, 1)
+mmctx_data(0x404654, 1)
+mmctx_data(0x404660, 1)
+mmctx_data(0x404678, 19)
+mmctx_data(0x4046c8, 3)
+mmctx_data(0x404700, 3)
+mmctx_data(0x404718, 10)
+mmctx_data(0x404744, 2)
+mmctx_data(0x404754, 1)
+mmctx_data(0x405800, 1)
+mmctx_data(0x405830, 3)
+mmctx_data(0x405854, 1)
+mmctx_data(0x405870, 4)
+mmctx_data(0x405a00, 2)
+mmctx_data(0x405a18, 1)
+mmctx_data(0x405b00, 1)
+mmctx_data(0x405b10, 1)
+mmctx_data(0x405b20, 1)
+mmctx_data(0x406020, 1)
+mmctx_data(0x406028, 4)
+mmctx_data(0x4064a8, 5)
+mmctx_data(0x4064c0, 12)
+mmctx_data(0x4064fc, 1)
+mmctx_data(0x407804, 1)
+mmctx_data(0x40780c, 6)
+mmctx_data(0x4078bc, 1)
+mmctx_data(0x408000, 7)
+mmctx_data(0x408064, 1)
+mmctx_data(0x408800, 3)
+mmctx_data(0x408840, 1)
+mmctx_data(0x408900, 3)
+mmctx_data(0x408980, 1)
+nvf0_hub_mmio_tail:
+#undef INCLUDE_DATA
 
 .section #nve0_grhub_code
+#define INCLUDE_CODE
 bra #init
-define(`include_code')
-include(`nve0.fuc')
-
-// reports an exception to the host
-//
-// In: $r15 error code (see nve0.fuc)
-//
-error:
-	push $r14
-	mov $r14 0x814
-	shl b32 $r14 6
-	iowr I[$r14 + 0x000] $r15	// CC_SCRATCH[5] = error code
-	mov $r14 0xc1c
-	shl b32 $r14 6
-	mov $r15 1
-	iowr I[$r14 + 0x000] $r15	// INTR_UP_SET
-	pop $r14
-	ret
-
-// HUB fuc initialisation, executed by triggering ucode start, will
-// fall through to main loop after completion.
-//
-// Input:
-//   CC_SCRATCH[0]: chipset (PMC_BOOT_0 read returns 0x0bad0bad... sigh)
-//
-// Output:
-//   CC_SCRATCH[0]:
-//	     31:31: set to signal completion
-//   CC_SCRATCH[1]:
-//	      31:0: total PGRAPH context size
-//
-init:
-	clear b32 $r0
-	mov $sp $r0
-	mov $xdbase $r0
-
-	// enable fifo access
-	mov $r1 0x1200
-	mov $r2 2
-	iowr I[$r1 + 0x000] $r2	// FIFO_ENABLE
-
-	// setup i0 handler, and route all interrupts to it
-	mov $r1 #ih
-	mov $iv0 $r1
-	mov $r1 0x400
-	iowr I[$r1 + 0x300] $r0	// INTR_DISPATCH
-
-	// route HUB_CHANNEL_SWITCH to fuc interrupt 8
-	mov $r3 0x404
-	shl b32 $r3 6
-	mov $r2 0x2003		// { HUB_CHANNEL_SWITCH, ZERO } -> intr 8
-	iowr I[$r3 + 0x000] $r2
-
-	// not sure what these are, route them because NVIDIA does, and
-	// the IRQ handler will signal the host if we ever get one.. we
-	// may find out if/why we need to handle these if so..
-	//
-	mov $r2 0x2004
-	iowr I[$r3 + 0x004] $r2 // { 0x04, ZERO } -> intr 9
-	mov $r2 0x200b
-	iowr I[$r3 + 0x008] $r2 // { 0x0b, ZERO } -> intr 10
-	mov $r2 0x200c
-	iowr I[$r3 + 0x01c] $r2 // { 0x0c, ZERO } -> intr 15
-
-	// enable all INTR_UP interrupts
-	mov $r2 0xc24
-	shl b32 $r2 6
-	not b32 $r3 $r0
-	iowr I[$r2] $r3
-
-	// enable fifo, ctxsw, 9, 10, 15 interrupts
-	mov $r2 -0x78fc		// 0x8704
-	sethi $r2 0
-	iowr I[$r1 + 0x000] $r2	// INTR_EN_SET
-
-	// fifo level triggered, rest edge
-	sub b32 $r1 0x100
-	mov $r2 4
-	iowr I[$r1] $r2
-
-	// enable interrupts
-	bset $flags ie0
-
-	// fetch enabled GPC/ROP counts
-	mov $r14 -0x69fc	// 0x409604
-	sethi $r14 0x400000
-	call #nv_rd32
-	extr $r1 $r15 16:20
-	st b32 D[$r0 + #rop_count] $r1
-	and $r15 0x1f
-	st b32 D[$r0 + #gpc_count] $r15
-
-	// set BAR_REQMASK to GPC mask
-	mov $r1 1
-	shl b32 $r1 $r15
-	sub b32 $r1 1
-	mov $r2 0x40c
-	shl b32 $r2 6
-	iowr I[$r2 + 0x000] $r1
-	iowr I[$r2 + 0x100] $r1
-
-	// find context data for this chipset
-	mov $r2 0x800
-	shl b32 $r2 6
-	iord $r2 I[$r2 + 0x000]		// CC_SCRATCH[0]
-	mov $r15 #chipsets - 8
-	init_find_chipset:
-		add b32 $r15 8
-		ld b32 $r3 D[$r15 + 0x00]
-		cmpu b32 $r3 $r2
-		bra e #init_context
-		cmpu b32 $r3 0
-		bra ne #init_find_chipset
-		// unknown chipset
-		ret
-
-	// context size calculation, reserve first 256 bytes for use by fuc
-	init_context:
-	mov $r1 256
-
-	// calculate size of mmio context data
-	ld b16 $r14 D[$r15 + 4]
-	ld b16 $r15 D[$r15 + 6]
-	sethi $r14 0
-	st b32 D[$r0 + #hub_mmio_list_head] $r14
-	st b32 D[$r0 + #hub_mmio_list_tail] $r15
-	call #mmctx_size
-
-	// set mmctx base addresses now so we don't have to do it later,
-	// they don't (currently) ever change
-	mov $r3 0x700
-	shl b32 $r3 6
-	shr b32 $r4 $r1 8
-	iowr I[$r3 + 0x000] $r4		// MMCTX_SAVE_SWBASE
-	iowr I[$r3 + 0x100] $r4		// MMCTX_LOAD_SWBASE
-	add b32 $r3 0x1300
-	add b32 $r1 $r15
-	shr b32 $r15 2
-	iowr I[$r3 + 0x000] $r15	// MMCTX_LOAD_COUNT, wtf for?!?
-
-	// strands, base offset needs to be aligned to 256 bytes
-	shr b32 $r1 8
-	add b32 $r1 1
-	shl b32 $r1 8
-	mov b32 $r15 $r1
-	call #strand_ctx_init
-	add b32 $r1 $r15
-
-	// initialise each GPC in sequence by passing in the offset of its
-	// context data in GPCn_CC_SCRATCH[1], and starting its FUC (which
-	// has previously been uploaded by the host) running.
-	//
-	// the GPC fuc init sequence will set GPCn_CC_SCRATCH[0] bit 31
-	// when it has completed, and return the size of its context data
-	// in GPCn_CC_SCRATCH[1]
-	//
-	ld b32 $r3 D[$r0 + #gpc_count]
-	mov $r4 0x2000
-	sethi $r4 0x500000
-	init_gpc:
-		// setup, and start GPC ucode running
-		add b32 $r14 $r4 0x804
-		mov b32 $r15 $r1
-		call #nv_wr32			// CC_SCRATCH[1] = ctx offset
-		add b32 $r14 $r4 0x800
-		mov b32 $r15 $r2
-		call #nv_wr32			// CC_SCRATCH[0] = chipset
-		add b32 $r14 $r4 0x10c
-		clear b32 $r15
-		call #nv_wr32
-		add b32 $r14 $r4 0x104
-		call #nv_wr32			// ENTRY
-		add b32 $r14 $r4 0x100
-		mov $r15 2			// CTRL_START_TRIGGER
-		call #nv_wr32			// CTRL
-
-		// wait for it to complete, and adjust context size
-		add b32 $r14 $r4 0x800
-		init_gpc_wait:
-			call #nv_rd32
-			xbit $r15 $r15 31
-			bra e #init_gpc_wait
-		add b32 $r14 $r4 0x804
-		call #nv_rd32
-		add b32 $r1 $r15
-
-		// next!
-		add b32 $r4 0x8000
-		sub b32 $r3 1
-		bra ne #init_gpc
-
-	// save context size, and tell host we're ready
-	mov $r2 0x800
-	shl b32 $r2 6
-	iowr I[$r2 + 0x100] $r1		// CC_SCRATCH[1]  = context size
-	add b32 $r2 0x800
-	clear b32 $r1
-	bset $r1 31
-	iowr I[$r2 + 0x000] $r1		// CC_SCRATCH[0] |= 0x80000000
-
-// Main program loop, very simple, sleeps until woken up by the interrupt
-// handler, pulls a command from the queue and executes its handler
-//
-main:
-	// sleep until we have something to do
-	bset $flags $p0
-	sleep $p0
-	mov $r13 #cmd_queue
-	call #queue_get
-	bra $p1 #main
-
-	// context switch, requested by GPU?
-	cmpu b32 $r14 0x4001
-	bra ne #main_not_ctx_switch
-		trace_set(T_AUTO)
-		mov $r1 0xb00
-		shl b32 $r1 6
-		iord $r2 I[$r1 + 0x100]		// CHAN_NEXT
-		iord $r1 I[$r1 + 0x000]		// CHAN_CUR
-
-		xbit $r3 $r1 31
-		bra e #chsw_no_prev
-			xbit $r3 $r2 31
-			bra e #chsw_prev_no_next
-				push $r2
-				mov b32 $r2 $r1
-				trace_set(T_SAVE)
-				bclr $flags $p1
-				bset $flags $p2
-				call #ctx_xfer
-				trace_clr(T_SAVE);
-				pop $r2
-				trace_set(T_LOAD);
-				bset $flags $p1
-				call #ctx_xfer
-				trace_clr(T_LOAD);
-				bra #chsw_done
-			chsw_prev_no_next:
-				push $r2
-				mov b32 $r2 $r1
-				bclr $flags $p1
-				bclr $flags $p2
-				call #ctx_xfer
-				pop $r2
-				mov $r1 0xb00
-				shl b32 $r1 6
-				iowr I[$r1] $r2
-				bra #chsw_done
-		chsw_no_prev:
-			xbit $r3 $r2 31
-			bra e #chsw_done
-				bset $flags $p1
-				bclr $flags $p2
-				call #ctx_xfer
-
-		// ack the context switch request
-		chsw_done:
-		mov $r1 0xb0c
-		shl b32 $r1 6
-		mov $r2 1
-		iowr I[$r1 + 0x000] $r2		// 0x409b0c
-		trace_clr(T_AUTO)
-		bra #main
-
-	// request to set current channel? (*not* a context switch)
-	main_not_ctx_switch:
-	cmpu b32 $r14 0x0001
-	bra ne #main_not_ctx_chan
-		mov b32 $r2 $r15
-		call #ctx_chan
-		bra #main_done
-
-	// request to store current channel context?
-	main_not_ctx_chan:
-	cmpu b32 $r14 0x0002
-	bra ne #main_not_ctx_save
-		trace_set(T_SAVE)
-		bclr $flags $p1
-		bclr $flags $p2
-		call #ctx_xfer
-		trace_clr(T_SAVE)
-		bra #main_done
-
-	main_not_ctx_save:
-		shl b32 $r15 $r14 16
-		or $r15 E_BAD_COMMAND
-		call #error
-		bra #main
-
-	main_done:
-	mov $r1 0x820
-	shl b32 $r1 6
-	clear b32 $r2
-	bset $r2 31
-	iowr I[$r1 + 0x000] $r2		// CC_SCRATCH[0] |= 0x80000000
-	bra #main
-
-// interrupt handler
-ih:
-	push $r8
-	mov $r8 $flags
-	push $r8
-	push $r9
-	push $r10
-	push $r11
-	push $r13
-	push $r14
-	push $r15
-
-	// incoming fifo command?
-	iord $r10 I[$r0 + 0x200]	// INTR
-	and $r11 $r10 0x00000004
-	bra e #ih_no_fifo
-		// queue incoming fifo command for later processing
-		mov $r11 0x1900
-		mov $r13 #cmd_queue
-		iord $r14 I[$r11 + 0x100]	// FIFO_CMD
-		iord $r15 I[$r11 + 0x000]	// FIFO_DATA
-		call #queue_put
-		add b32 $r11 0x400
-		mov $r14 1
-		iowr I[$r11 + 0x000] $r14	// FIFO_ACK
-
-	// context switch request?
-	ih_no_fifo:
-	and $r11 $r10 0x00000100
-	bra e #ih_no_ctxsw
-		// enqueue a context switch for later processing
-		mov $r13 #cmd_queue
-		mov $r14 0x4001
-		call #queue_put
-
-	// anything we didn't handle, bring it to the host's attention
-	ih_no_ctxsw:
-	mov $r11 0x104
-	not b32 $r11
-	and $r11 $r10 $r11
-	bra e #ih_no_other
-		mov $r10 0xc1c
-		shl b32 $r10 6
-		iowr I[$r10] $r11	// INTR_UP_SET
-
-	// ack, and wake up main()
-	ih_no_other:
-	iowr I[$r0 + 0x100] $r10	// INTR_ACK
-
-	pop $r15
-	pop $r14
-	pop $r13
-	pop $r11
-	pop $r10
-	pop $r9
-	pop $r8
-	mov $flags $r8
-	pop $r8
-	bclr $flags $p0
-	iret
-
-// Again, not real sure
-//
-// In: $r15 value to set 0x404170 to
-//
-ctx_4170s:
-	mov $r14 0x4170
-	sethi $r14 0x400000
-	or $r15 0x10
-	call #nv_wr32
-	ret
-
-// Waits for a ctx_4170s() call to complete
-//
-ctx_4170w:
-	mov $r14 0x4170
-	sethi $r14 0x400000
-	call #nv_rd32
-	and $r15 0x10
-	bra ne #ctx_4170w
-	ret
-
-// Disables various things, waits a bit, and re-enables them..
-//
-// Not sure how exactly this helps, perhaps "ENABLE" is not such a
-// good description for the bits we turn off?  Anyways, without this,
-// funny things happen.
-//
-ctx_redswitch:
-	mov $r14 0x614
-	shl b32 $r14 6
-	mov $r15 0x270
-	iowr I[$r14] $r15	// HUB_RED_SWITCH = ENABLE_GPC, POWER_ALL
-	mov $r15 8
-	ctx_redswitch_delay:
-		sub b32 $r15 1
-		bra ne #ctx_redswitch_delay
-	mov $r15 0x770
-	iowr I[$r14] $r15	// HUB_RED_SWITCH = ENABLE_ALL, POWER_ALL
-	ret
-
-// Not a clue what this is for, except that unless the value is 0x10, the
-// strand context is saved (and presumably restored) incorrectly..
-//
-// In: $r15 value to set to (0x00/0x10 are used)
-//
-ctx_86c:
-	mov $r14 0x86c
-	shl b32 $r14 6
-	iowr I[$r14] $r15	// HUB(0x86c) = val
-	mov $r14 -0x75ec
-	sethi $r14 0x400000
-	call #nv_wr32		// ROP(0xa14) = val
-	mov $r14 -0x5794
-	sethi $r14 0x410000
-	call #nv_wr32		// GPC(0x86c) = val
-	ret
-
-// ctx_load - load's a channel's ctxctl data, and selects its vm
-//
-// In: $r2 channel address
-//
-ctx_load:
-	trace_set(T_CHAN)
-
-	// switch to channel, somewhat magic in parts..
-	mov $r10 12		// DONE_UNK12
-	call #wait_donez
-	mov $r1 0xa24
-	shl b32 $r1 6
-	iowr I[$r1 + 0x000] $r0	// 0x409a24
-	mov $r3 0xb00
-	shl b32 $r3 6
-	iowr I[$r3 + 0x100] $r2	// CHAN_NEXT
-	mov $r1 0xa0c
-	shl b32 $r1 6
-	mov $r4 7
-	iowr I[$r1 + 0x000] $r2 // MEM_CHAN
-	iowr I[$r1 + 0x100] $r4	// MEM_CMD
-	ctx_chan_wait_0:
-		iord $r4 I[$r1 + 0x100]
-		and $r4 0x1f
-		bra ne #ctx_chan_wait_0
-	iowr I[$r3 + 0x000] $r2	// CHAN_CUR
-
-	// load channel header, fetch PGRAPH context pointer
-	mov $xtargets $r0
-	bclr $r2 31
-	shl b32 $r2 4
-	add b32 $r2 2
-
-	trace_set(T_LCHAN)
-	mov $r1 0xa04
-	shl b32 $r1 6
-	iowr I[$r1 + 0x000] $r2		// MEM_BASE
-	mov $r1 0xa20
-	shl b32 $r1 6
-	mov $r2 0x0002
-	sethi $r2 0x80000000
-	iowr I[$r1 + 0x000] $r2		// MEM_TARGET = vram
-	mov $r1 0x10			// chan + 0x0210
-	mov $r2 #xfer_data
-	sethi $r2 0x00020000		// 16 bytes
-	xdld $r1 $r2
-	xdwait
-	trace_clr(T_LCHAN)
-
-	// update current context
-	ld b32 $r1 D[$r0 + #xfer_data + 4]
-	shl b32 $r1 24
-	ld b32 $r2 D[$r0 + #xfer_data + 0]
-	shr b32 $r2 8
-	or $r1 $r2
-	st b32 D[$r0 + #ctx_current] $r1
-
-	// set transfer base to start of context, and fetch context header
-	trace_set(T_LCTXH)
-	mov $r2 0xa04
-	shl b32 $r2 6
-	iowr I[$r2 + 0x000] $r1		// MEM_BASE
-	mov $r2 1
-	mov $r1 0xa20
-	shl b32 $r1 6
-	iowr I[$r1 + 0x000] $r2		// MEM_TARGET = vm
-	mov $r1 #chan_data
-	sethi $r1 0x00060000		// 256 bytes
-	xdld $r0 $r1
-	xdwait
-	trace_clr(T_LCTXH)
-
-	trace_clr(T_CHAN)
-	ret
-
-// ctx_chan - handler for HUB_SET_CHAN command, will set a channel as
-//            the active channel for ctxctl, but not actually transfer
-//            any context data.  intended for use only during initial
-//            context construction.
-//
-// In: $r2 channel address
-//
-ctx_chan:
-	call #ctx_load
-	mov $r10 12			// DONE_UNK12
-	call #wait_donez
-	mov $r1 0xa10
-	shl b32 $r1 6
-	mov $r2 5
-	iowr I[$r1 + 0x000] $r2		// MEM_CMD = 5 (???)
-	ctx_chan_wait:
-		iord $r2 I[$r1 + 0x000]
-		or $r2 $r2
-		bra ne #ctx_chan_wait
-	ret
-
-// Execute per-context state overrides list
-//
-// Only executed on the first load of a channel.  Might want to look into
-// removing this and having the host directly modify the channel's context
-// to change this state...  The nouveau DRM already builds this list as
-// it's definitely needed for NVIDIA's, so we may as well use it for now
-//
-// Input: $r1 mmio list length
-//
-ctx_mmio_exec:
-	// set transfer base to be the mmio list
-	ld b32 $r3 D[$r0 + #chan_mmio_address]
-	mov $r2 0xa04
-	shl b32 $r2 6
-	iowr I[$r2 + 0x000] $r3		// MEM_BASE
-
-	clear b32 $r3
-	ctx_mmio_loop:
-		// fetch next 256 bytes of mmio list if necessary
-		and $r4 $r3 0xff
-		bra ne #ctx_mmio_pull
-			mov $r5 #xfer_data
-			sethi $r5 0x00060000	// 256 bytes
-			xdld $r3 $r5
-			xdwait
-
-		// execute a single list entry
-		ctx_mmio_pull:
-		ld b32 $r14 D[$r4 + #xfer_data + 0x00]
-		ld b32 $r15 D[$r4 + #xfer_data + 0x04]
-		call #nv_wr32
-
-		// next!
-		add b32 $r3 8
-		sub b32 $r1 1
-		bra ne #ctx_mmio_loop
-
-	// set transfer base back to the current context
-	ctx_mmio_done:
-	ld b32 $r3 D[$r0 + #ctx_current]
-	iowr I[$r2 + 0x000] $r3		// MEM_BASE
-
-	// disable the mmio list now, we don't need/want to execute it again
-	st b32 D[$r0 + #chan_mmio_count] $r0
-	mov $r1 #chan_data
-	sethi $r1 0x00060000		// 256 bytes
-	xdst $r0 $r1
-	xdwait
-	ret
-
-// Transfer HUB context data between GPU and storage area
-//
-// In: $r2 channel address
-//     $p1 clear on save, set on load
-//     $p2 set if opposite direction done/will be done, so:
-//		on save it means: "a load will follow this save"
-//		on load it means: "a save preceeded this load"
-//
-ctx_xfer:
-	// according to mwk, some kind of wait for idle
-	mov $r15 0xc00
-	shl b32 $r15 6
-	mov $r14 4
-	iowr I[$r15 + 0x200] $r14
-	ctx_xfer_idle:
-		iord $r14 I[$r15 + 0x000]
-		and $r14 0x2000
-		bra ne #ctx_xfer_idle
-
-	bra not $p1 #ctx_xfer_pre
-	bra $p2 #ctx_xfer_pre_load
-	ctx_xfer_pre:
-		mov $r15 0x10
-		call #ctx_86c
-		bra not $p1 #ctx_xfer_exec
-
-	ctx_xfer_pre_load:
-		mov $r15 2
-		call #ctx_4170s
-		call #ctx_4170w
-		call #ctx_redswitch
-		clear b32 $r15
-		call #ctx_4170s
-		call #ctx_load
-
-	// fetch context pointer, and initiate xfer on all GPCs
-	ctx_xfer_exec:
-	ld b32 $r1 D[$r0 + #ctx_current]
-	mov $r2 0x414
-	shl b32 $r2 6
-	iowr I[$r2 + 0x000] $r0	// BAR_STATUS = reset
-	mov $r14 -0x5b00
-	sethi $r14 0x410000
-	mov b32 $r15 $r1
-	call #nv_wr32		// GPC_BCAST_WRCMD_DATA = ctx pointer
-	add b32 $r14 4
-	xbit $r15 $flags $p1
-	xbit $r2 $flags $p2
-	shl b32 $r2 1
-	or $r15 $r2
-	call #nv_wr32		// GPC_BCAST_WRCMD_CMD = GPC_XFER(type)
-
-	// strands
-	mov $r1 0x4afc
-	sethi $r1 0x20000
-	mov $r2 0xc
-	iowr I[$r1] $r2		// STRAND_CMD(0x3f) = 0x0c
-	call #strand_wait
-	mov $r2 0x47fc
-	sethi $r2 0x20000
-	iowr I[$r2] $r0		// STRAND_FIRST_GENE(0x3f) = 0x00
-	xbit $r2 $flags $p1
-	add b32 $r2 3
-	iowr I[$r1] $r2		// STRAND_CMD(0x3f) = 0x03/0x04 (SAVE/LOAD)
-
-	// mmio context
-	xbit $r10 $flags $p1	// direction
-	or $r10 6		// first, last
-	mov $r11 0		// base = 0
-	ld b32 $r12 D[$r0 + #hub_mmio_list_head]
-	ld b32 $r13 D[$r0 + #hub_mmio_list_tail]
-	mov $r14 0		// not multi
-	call #mmctx_xfer
-
-	// wait for GPCs to all complete
-	mov $r10 8		// DONE_BAR
-	call #wait_doneo
-
-	// wait for strand xfer to complete
-	call #strand_wait
-
-	// post-op
-	bra $p1 #ctx_xfer_post
-		mov $r10 12		// DONE_UNK12
-		call #wait_donez
-		mov $r1 0xa10
-		shl b32 $r1 6
-		mov $r2 5
-		iowr I[$r1] $r2		// MEM_CMD
-		ctx_xfer_post_save_wait:
-			iord $r2 I[$r1]
-			or $r2 $r2
-			bra ne #ctx_xfer_post_save_wait
-
-	bra $p2 #ctx_xfer_done
-	ctx_xfer_post:
-		mov $r15 2
-		call #ctx_4170s
-		clear b32 $r15
-		call #ctx_86c
-		call #strand_post
-		call #ctx_4170w
-		clear b32 $r15
-		call #ctx_4170s
-
-		bra not $p1 #ctx_xfer_no_post_mmio
-		ld b32 $r1 D[$r0 + #chan_mmio_count]
-		or $r1 $r1
-		bra e #ctx_xfer_no_post_mmio
-			call #ctx_mmio_exec
-
-		ctx_xfer_no_post_mmio:
-
-	ctx_xfer_done:
-	ret
-
+#include "com.fuc"
+#include "hub.fuc"
 .align 256
+#undef INCLUDE_CODE

+ 536 - 423
drivers/gpu/drm/nouveau/core/engine/graph/fuc/hubnve0.fuc.h

@@ -28,67 +28,7 @@ uint32_t nve0_grhub_data[] = {
 	0x00000000,
 /* 0x0058: ctx_current */
 	0x00000000,
-/* 0x005c: chipsets */
-	0x000000e4,
-	0x01440078,
-	0x000000e7,
-	0x01440078,
-	0x000000e6,
-	0x01440078,
 	0x00000000,
-/* 0x0078: nve4_hub_mmio_head */
-	0x0417e91c,
-	0x04400204,
-	0x18404010,
-	0x204040a8,
-	0x184040d0,
-	0x004040f8,
-	0x08404130,
-	0x08404150,
-	0x00404164,
-	0x0c4041a0,
-	0x0c404200,
-	0x34404404,
-	0x0c404460,
-	0x00404480,
-	0x00404498,
-	0x0c404604,
-	0x0c404618,
-	0x0440462c,
-	0x00404640,
-	0x00404654,
-	0x00404660,
-	0x48404678,
-	0x084046c8,
-	0x08404700,
-	0x24404718,
-	0x04404744,
-	0x00404754,
-	0x00405800,
-	0x08405830,
-	0x00405854,
-	0x0c405870,
-	0x04405a00,
-	0x00405a18,
-	0x00405b00,
-	0x00405b10,
-	0x00406020,
-	0x0c406028,
-	0x044064a8,
-	0x044064b4,
-	0x2c4064c0,
-	0x004064fc,
-	0x00407040,
-	0x00407804,
-	0x1440780c,
-	0x004078bc,
-	0x18408000,
-	0x00408064,
-	0x08408800,
-	0x00408840,
-	0x08408900,
-	0x00408980,
-/* 0x0144: nve4_hub_mmio_tail */
 	0x00000000,
 	0x00000000,
 	0x00000000,
@@ -129,6 +69,26 @@ uint32_t nve0_grhub_data[] = {
 	0x00000000,
 	0x00000000,
 	0x00000000,
+/* 0x0100: chan_data */
+/* 0x0100: chan_mmio_count */
+	0x00000000,
+/* 0x0104: chan_mmio_address */
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
 	0x00000000,
 	0x00000000,
 	0x00000000,
@@ -136,10 +96,7 @@ uint32_t nve0_grhub_data[] = {
 	0x00000000,
 	0x00000000,
 	0x00000000,
-/* 0x0200: chan_data */
-/* 0x0200: chan_mmio_count */
 	0x00000000,
-/* 0x0204: chan_mmio_address */
 	0x00000000,
 	0x00000000,
 	0x00000000,
@@ -179,6 +136,7 @@ uint32_t nve0_grhub_data[] = {
 	0x00000000,
 	0x00000000,
 	0x00000000,
+/* 0x0200: xfer_data */
 	0x00000000,
 	0x00000000,
 	0x00000000,
@@ -203,8 +161,163 @@ uint32_t nve0_grhub_data[] = {
 	0x00000000,
 	0x00000000,
 	0x00000000,
-/* 0x0300: xfer_data */
 	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+/* 0x0300: chipsets */
+	0x000000e4,
+	0x03f00324,
+	0x000000e7,
+	0x03f00324,
+	0x000000e6,
+	0x03f00324,
+	0x000000f0,
+	0x04c403f0,
+	0x00000000,
+/* 0x0324: nve4_hub_mmio_head */
+	0x0417e91c,
+	0x04400204,
+	0x18404010,
+	0x204040a8,
+	0x184040d0,
+	0x004040f8,
+	0x08404130,
+	0x08404150,
+	0x00404164,
+	0x0c4041a0,
+	0x0c404200,
+	0x34404404,
+	0x0c404460,
+	0x00404480,
+	0x00404498,
+	0x0c404604,
+	0x0c404618,
+	0x0440462c,
+	0x00404640,
+	0x00404654,
+	0x00404660,
+	0x48404678,
+	0x084046c8,
+	0x08404700,
+	0x24404718,
+	0x04404744,
+	0x00404754,
+	0x00405800,
+	0x08405830,
+	0x00405854,
+	0x0c405870,
+	0x04405a00,
+	0x00405a18,
+	0x00405b00,
+	0x00405b10,
+	0x00406020,
+	0x0c406028,
+	0x044064a8,
+	0x044064b4,
+	0x2c4064c0,
+	0x004064fc,
+	0x00407040,
+	0x00407804,
+	0x1440780c,
+	0x004078bc,
+	0x18408000,
+	0x00408064,
+	0x08408800,
+	0x00408840,
+	0x08408900,
+	0x00408980,
+/* 0x03f0: nve4_hub_mmio_tail */
+/* 0x03f0: nvf0_hub_mmio_head */
+	0x0417e91c,
+	0x04400204,
+	0x40404004,
+	0x204040a8,
+	0x184040d0,
+	0x004040f8,
+	0x24404100,
+	0x08404130,
+	0x08404150,
+	0x00404164,
+	0x0440417c,
+	0x0c4041a0,
+	0x0c404200,
+	0x2c404404,
+	0x00404438,
+	0x0c404460,
+	0x00404480,
+	0x00404498,
+	0x0c404604,
+	0x0c404618,
+	0x0440462c,
+	0x00404640,
+	0x00404654,
+	0x00404660,
+	0x48404678,
+	0x084046c8,
+	0x08404700,
+	0x24404718,
+	0x04404744,
+	0x00404754,
+	0x00405800,
+	0x08405830,
+	0x00405854,
+	0x0c405870,
+	0x04405a00,
+	0x00405a18,
+	0x00405b00,
+	0x00405b10,
+	0x00405b20,
+	0x00406020,
+	0x0c406028,
+	0x104064a8,
+	0x2c4064c0,
+	0x004064fc,
+	0x00407804,
+	0x1440780c,
+	0x004078bc,
+	0x18408000,
+	0x00408064,
+	0x08408800,
+	0x00408840,
+	0x08408900,
+	0x00408980,
 };
 
 uint32_t nve0_grhub_code[] = {
@@ -275,7 +388,7 @@ uint32_t nve0_grhub_code[] = {
 	0x0089d000,
 	0x081887f1,
 	0xd00684b6,
-/* 0x00e2: wait_done_wait_donez */
+/* 0x00e2: wait_donez_ne */
 	0x87f1008a,
 	0x84b60400,
 	0x0088cf06,
@@ -292,7 +405,7 @@ uint32_t nve0_grhub_code[] = {
 	0x87f10089,
 	0x84b60818,
 	0x008ad006,
-/* 0x011c: wait_done_wait_doneo */
+/* 0x011c: wait_doneo_e */
 	0x040087f1,
 	0xcf0684b6,
 	0x8aff0088,
@@ -440,7 +553,7 @@ uint32_t nve0_grhub_code[] = {
 	0x0017f100,
 	0x0227f012,
 	0xf10012d0,
-	0xfe05b917,
+	0xfe05ba17,
 	0x17f10010,
 	0x10d00400,
 	0x0437f1c0,
@@ -474,385 +587,385 @@ uint32_t nve0_grhub_code[] = {
 	0x4021d000,
 	0x080027f1,
 	0xcf0624b6,
-	0xf7f00022,
-/* 0x03a9: init_find_chipset */
-	0x08f0b654,
-	0xb800f398,
-	0x0bf40432,
-	0x0034b00b,
-	0xf8f11bf4,
-/* 0x03bd: init_context */
-	0x0017f100,
-	0x02fe5801,
-	0xf003ff58,
-	0x0e8000e3,
-	0x150f8014,
-	0x013d21f5,
-	0x070037f1,
-	0x950634b6,
-	0x34d00814,
-	0x4034d000,
-	0x130030b7,
-	0xb6001fbb,
-	0x3fd002f5,
-	0x0815b600,
-	0xb60110b6,
-	0x1fb90814,
-	0x6321f502,
-	0x001fbb02,
-	0xf1000398,
-	0xf0200047,
-/* 0x040e: init_gpc */
-	0x4ea05043,
-	0x1fb90804,
-	0x8d21f402,
-	0x08004ea0,
-	0xf4022fb9,
-	0x4ea08d21,
-	0xf4bd010c,
+	0xf7f10022,
+/* 0x03aa: init_find_chipset */
+	0xf0b602f8,
+	0x00f39808,
+	0xf40432b8,
+	0x34b00b0b,
+	0xf11bf400,
+/* 0x03be: init_context */
+	0x17f100f8,
+	0xfe580100,
+	0x03ff5802,
+	0x8000e3f0,
+	0x0f80140e,
+	0x3d21f515,
+	0x0037f101,
+	0x0634b607,
+	0xd0081495,
+	0x34d00034,
+	0x0030b740,
+	0x001fbb13,
+	0xd002f5b6,
+	0x15b6003f,
+	0x0110b608,
+	0xb90814b6,
+	0x21f5021f,
+	0x1fbb0263,
+	0x00039800,
+	0x200047f1,
+/* 0x040f: init_gpc */
+	0xa05043f0,
+	0xb908044e,
+	0x21f4021f,
+	0x004ea08d,
+	0x022fb908,
 	0xa08d21f4,
-	0xf401044e,
-	0x4ea08d21,
-	0xf7f00100,
-	0x8d21f402,
-	0x08004ea0,
-/* 0x0440: init_gpc_wait */
-	0xc86821f4,
-	0x0bf41fff,
-	0x044ea0fa,
+	0xbd010c4e,
+	0x8d21f4f4,
+	0x01044ea0,
+	0xa08d21f4,
+	0xf001004e,
+	0x21f402f7,
+	0x004ea08d,
+/* 0x0441: init_gpc_wait */
 	0x6821f408,
-	0xb7001fbb,
-	0xb6800040,
-	0x1bf40132,
-	0x0027f1b4,
-	0x0624b608,
-	0xb74021d0,
-	0xbd080020,
-	0x1f19f014,
-/* 0x0473: main */
-	0xf40021d0,
-	0x28f40031,
-	0x08d7f000,
-	0xf43921f4,
-	0xe4b1f401,
-	0x1bf54001,
-	0x87f100d1,
-	0x84b6083c,
-	0xf094bd06,
-	0x89d00499,
-	0x0017f100,
-	0x0614b60b,
-	0xcf4012cf,
-	0x13c80011,
-	0x7e0bf41f,
-	0xf41f23c8,
-	0x20f95a0b,
-	0xf10212b9,
+	0xf41fffc8,
+	0x4ea0fa0b,
+	0x21f40804,
+	0x001fbb68,
+	0x800040b7,
+	0xf40132b6,
+	0x27f1b41b,
+	0x24b60800,
+	0x4021d006,
+	0x080020b7,
+	0x19f014bd,
+	0x0021d01f,
+/* 0x0474: main */
+	0xf40031f4,
+	0xd7f00028,
+	0x3921f408,
+	0xb1f401f4,
+	0xf54001e4,
+	0xf100d11b,
 	0xb6083c87,
 	0x94bd0684,
-	0xd00799f0,
-	0x32f40089,
-	0x0231f401,
-	0x07fb21f5,
-	0x085c87f1,
+	0xd00499f0,
+	0x17f10089,
+	0x14b60b00,
+	0x4012cf06,
+	0xc80011cf,
+	0x0bf41f13,
+	0x1f23c87e,
+	0xf95a0bf4,
+	0x0212b920,
+	0x083c87f1,
 	0xbd0684b6,
 	0x0799f094,
-	0xfc0089d0,
-	0x3c87f120,
+	0xf40089d0,
+	0x31f40132,
+	0xfc21f502,
+	0x5c87f107,
 	0x0684b608,
 	0x99f094bd,
-	0x0089d006,
-	0xf50131f4,
-	0xf107fb21,
-	0xb6085c87,
-	0x94bd0684,
-	0xd00699f0,
-	0x0ef40089,
-/* 0x0509: chsw_prev_no_next */
-	0xb920f931,
-	0x32f40212,
-	0x0232f401,
-	0x07fb21f5,
-	0x17f120fc,
-	0x14b60b00,
-	0x0012d006,
-/* 0x0527: chsw_no_prev */
-	0xc8130ef4,
-	0x0bf41f23,
-	0x0131f40d,
-	0xf50232f4,
-/* 0x0537: chsw_done */
-	0xf107fb21,
-	0xb60b0c17,
-	0x27f00614,
-	0x0012d001,
+	0x0089d007,
+	0x87f120fc,
+	0x84b6083c,
+	0xf094bd06,
+	0x89d00699,
+	0x0131f400,
+	0x07fc21f5,
 	0x085c87f1,
 	0xbd0684b6,
-	0x0499f094,
-	0xf50089d0,
-/* 0x0557: main_not_ctx_switch */
-	0xb0ff200e,
-	0x1bf401e4,
-	0x02f2b90d,
-	0x078f21f5,
-/* 0x0567: main_not_ctx_chan */
-	0xb0420ef4,
-	0x1bf402e4,
-	0x3c87f12e,
+	0x0699f094,
+	0xf40089d0,
+/* 0x050a: chsw_prev_no_next */
+	0x20f9310e,
+	0xf40212b9,
+	0x32f40132,
+	0xfc21f502,
+	0xf120fc07,
+	0xb60b0017,
+	0x12d00614,
+	0x130ef400,
+/* 0x0528: chsw_no_prev */
+	0xf41f23c8,
+	0x31f40d0b,
+	0x0232f401,
+	0x07fc21f5,
+/* 0x0538: chsw_done */
+	0x0b0c17f1,
+	0xf00614b6,
+	0x12d00127,
+	0x5c87f100,
 	0x0684b608,
 	0x99f094bd,
-	0x0089d007,
-	0xf40132f4,
-	0x21f50232,
-	0x87f107fb,
-	0x84b6085c,
+	0x0089d004,
+	0xff200ef5,
+/* 0x0558: main_not_ctx_switch */
+	0xf401e4b0,
+	0xf2b90d1b,
+	0x9021f502,
+	0x420ef407,
+/* 0x0568: main_not_ctx_chan */
+	0xf402e4b0,
+	0x87f12e1b,
+	0x84b6083c,
 	0xf094bd06,
 	0x89d00799,
-	0x110ef400,
-/* 0x0598: main_not_ctx_save */
-	0xf010ef94,
-	0x21f501f5,
-	0x0ef502ec,
-/* 0x05a6: main_done */
-	0x17f1fed1,
-	0x14b60820,
-	0xf024bd06,
-	0x12d01f29,
-	0xbe0ef500,
-/* 0x05b9: ih */
-	0xfe80f9fe,
-	0x80f90188,
-	0xa0f990f9,
-	0xd0f9b0f9,
-	0xf0f9e0f9,
-	0xc4800acf,
-	0x0bf404ab,
-	0x00b7f11d,
-	0x08d7f019,
-	0xcf40becf,
-	0x21f400bf,
-	0x00b0b704,
-	0x01e7f004,
-/* 0x05ef: ih_no_fifo */
-	0xe400bed0,
-	0xf40100ab,
-	0xd7f00d0b,
-	0x01e7f108,
-	0x0421f440,
-/* 0x0600: ih_no_ctxsw */
-	0x0104b7f1,
-	0xabffb0bd,
-	0x0d0bf4b4,
-	0x0c1ca7f1,
-	0xd006a4b6,
-/* 0x0616: ih_no_other */
-	0x0ad000ab,
-	0xfcf0fc40,
-	0xfcd0fce0,
-	0xfca0fcb0,
-	0xfe80fc90,
-	0x80fc0088,
-	0xf80032f4,
-/* 0x0631: ctx_4170s */
-	0x70e7f101,
+	0x0132f400,
+	0xf50232f4,
+	0xf107fc21,
+	0xb6085c87,
+	0x94bd0684,
+	0xd00799f0,
+	0x0ef40089,
+/* 0x0599: main_not_ctx_save */
+	0x10ef9411,
+	0xf501f5f0,
+	0xf502ec21,
+/* 0x05a7: main_done */
+	0xf1fed10e,
+	0xb6082017,
+	0x24bd0614,
+	0xd01f29f0,
+	0x0ef50012,
+/* 0x05ba: ih */
+	0x80f9febe,
+	0xf90188fe,
+	0xf990f980,
+	0xf9b0f9a0,
+	0xf9e0f9d0,
+	0x800acff0,
+	0xf404abc4,
+	0xb7f11d0b,
+	0xd7f01900,
+	0x40becf08,
+	0xf400bfcf,
+	0xb0b70421,
+	0xe7f00400,
+	0x00bed001,
+/* 0x05f0: ih_no_fifo */
+	0x0100abe4,
+	0xf00d0bf4,
+	0xe7f108d7,
+	0x21f44001,
+/* 0x0601: ih_no_ctxsw */
+	0x04b7f104,
+	0xffb0bd01,
+	0x0bf4b4ab,
+	0x1ca7f10d,
+	0x06a4b60c,
+/* 0x0617: ih_no_other */
+	0xd000abd0,
+	0xf0fc400a,
+	0xd0fce0fc,
+	0xa0fcb0fc,
+	0x80fc90fc,
+	0xfc0088fe,
+	0x0032f480,
+/* 0x0632: ctx_4170s */
+	0xe7f101f8,
+	0xe3f04170,
+	0x10f5f040,
+	0xf88d21f4,
+/* 0x0641: ctx_4170w */
+	0x70e7f100,
 	0x40e3f041,
-	0xf410f5f0,
-	0x00f88d21,
-/* 0x0640: ctx_4170w */
-	0x4170e7f1,
-	0xf440e3f0,
-	0xf4f06821,
-	0xf31bf410,
-/* 0x0652: ctx_redswitch */
-	0xe7f100f8,
-	0xe4b60614,
-	0x70f7f106,
-	0x00efd002,
-/* 0x0663: ctx_redswitch_delay */
-	0xb608f7f0,
-	0x1bf401f2,
-	0x70f7f1fd,
-	0x00efd007,
-/* 0x0672: ctx_86c */
-	0xe7f100f8,
-	0xe4b6086c,
-	0x00efd006,
-	0x8a14e7f1,
-	0xf440e3f0,
-	0xe7f18d21,
-	0xe3f0a86c,
-	0x8d21f441,
-/* 0x0692: ctx_load */
-	0x87f100f8,
-	0x84b6083c,
-	0xf094bd06,
-	0x89d00599,
-	0x0ca7f000,
-	0xf1c921f4,
-	0xb60a2417,
-	0x10d00614,
-	0x0037f100,
-	0x0634b60b,
-	0xf14032d0,
-	0xb60a0c17,
-	0x47f00614,
-	0x0012d007,
-/* 0x06cb: ctx_chan_wait_0 */
-	0xcf4014d0,
-	0x44f04014,
-	0xfa1bf41f,
-	0xfe0032d0,
-	0x2af0000b,
-	0x0424b61f,
-	0xf10220b6,
+	0xf06821f4,
+	0x1bf410f4,
+/* 0x0653: ctx_redswitch */
+	0xf100f8f3,
+	0xb60614e7,
+	0xf7f106e4,
+	0xefd00270,
+	0x08f7f000,
+/* 0x0664: ctx_redswitch_delay */
+	0xf401f2b6,
+	0xf7f1fd1b,
+	0xefd00770,
+/* 0x0673: ctx_86c */
+	0xf100f800,
+	0xb6086ce7,
+	0xefd006e4,
+	0x14e7f100,
+	0x40e3f08a,
+	0xf18d21f4,
+	0xf0a86ce7,
+	0x21f441e3,
+/* 0x0693: ctx_load */
+	0xf100f88d,
 	0xb6083c87,
 	0x94bd0684,
-	0xd00899f0,
-	0x17f10089,
-	0x14b60a04,
-	0x0012d006,
-	0x0a2017f1,
+	0xd00599f0,
+	0xa7f00089,
+	0xc921f40c,
+	0x0a2417f1,
+	0xd00614b6,
+	0x37f10010,
+	0x34b60b00,
+	0x4032d006,
+	0x0a0c17f1,
 	0xf00614b6,
-	0x23f10227,
-	0x12d08000,
-	0x1017f000,
-	0x030027f1,
-	0xfa0223f0,
-	0x03f80512,
-	0x085c87f1,
+	0x12d00747,
+	0x4014d000,
+/* 0x06cc: ctx_chan_wait_0 */
+	0xf04014cf,
+	0x1bf41f44,
+	0x0032d0fa,
+	0xf0000bfe,
+	0x24b61f2a,
+	0x0220b604,
+	0x083c87f1,
 	0xbd0684b6,
 	0x0899f094,
-	0x980089d0,
-	0x14b6c101,
-	0xc0029818,
-	0xfd0825b6,
-	0x01800512,
-	0x3c87f116,
+	0xf10089d0,
+	0xb60a0417,
+	0x12d00614,
+	0x2017f100,
+	0x0614b60a,
+	0xf10227f0,
+	0xd0800023,
+	0x17f00012,
+	0x0027f110,
+	0x0223f002,
+	0xf80512fa,
+	0x5c87f103,
 	0x0684b608,
 	0x99f094bd,
-	0x0089d009,
-	0x0a0427f1,
-	0xd00624b6,
-	0x27f00021,
-	0x2017f101,
-	0x0614b60a,
-	0xf10012d0,
-	0xf0020017,
-	0x01fa0613,
-	0xf103f805,
+	0x0089d008,
+	0xb6810198,
+	0x02981814,
+	0x0825b680,
+	0x800512fd,
+	0x87f11601,
+	0x84b6083c,
+	0xf094bd06,
+	0x89d00999,
+	0x0427f100,
+	0x0624b60a,
+	0xf00021d0,
+	0x17f10127,
+	0x14b60a20,
+	0x0012d006,
+	0x010017f1,
+	0xfa0613f0,
+	0x03f80501,
+	0x085c87f1,
+	0xbd0684b6,
+	0x0999f094,
+	0xf10089d0,
 	0xb6085c87,
 	0x94bd0684,
-	0xd00999f0,
-	0x87f10089,
-	0x84b6085c,
-	0xf094bd06,
-	0x89d00599,
-/* 0x078f: ctx_chan */
-	0xf500f800,
-	0xf0069221,
-	0x21f40ca7,
-	0x1017f1c9,
-	0x0614b60a,
-	0xd00527f0,
-/* 0x07a6: ctx_chan_wait */
-	0x12cf0012,
-	0x0522fd00,
-	0xf8fa1bf4,
-/* 0x07b1: ctx_mmio_exec */
-	0x81039800,
-	0x0a0427f1,
-	0xd00624b6,
-	0x34bd0023,
-/* 0x07c0: ctx_mmio_loop */
-	0xf4ff34c4,
-	0x57f10f1b,
-	0x53f00300,
-	0x0535fa06,
-/* 0x07d2: ctx_mmio_pull */
-	0x4e9803f8,
-	0xc14f98c0,
-	0xb68d21f4,
-	0x12b60830,
-	0xdf1bf401,
-/* 0x07e4: ctx_mmio_done */
-	0xd0160398,
-	0x00800023,
-	0x0017f180,
-	0x0613f002,
-	0xf80601fa,
-/* 0x07fb: ctx_xfer */
-	0xf100f803,
-	0xb60c00f7,
-	0xe7f006f4,
-	0x80fed004,
-/* 0x0808: ctx_xfer_idle */
-	0xf100fecf,
-	0xf42000e4,
-	0x11f4f91b,
-	0x0d02f406,
-/* 0x0818: ctx_xfer_pre */
-	0xf510f7f0,
-	0xf4067221,
-/* 0x0822: ctx_xfer_pre_load */
-	0xf7f01c11,
-	0x3121f502,
-	0x4021f506,
-	0x5221f506,
-	0xf5f4bd06,
-	0xf5063121,
-/* 0x083b: ctx_xfer_exec */
-	0x98069221,
-	0x27f11601,
-	0x24b60414,
-	0x0020d006,
-	0xa500e7f1,
-	0xb941e3f0,
-	0x21f4021f,
-	0x04e0b68d,
-	0xf001fcf0,
-	0x24b6022c,
-	0x05f2fd01,
-	0xf18d21f4,
-	0xf04afc17,
-	0x27f00213,
-	0x0012d00c,
-	0x020721f5,
-	0x47fc27f1,
-	0xd00223f0,
-	0x2cf00020,
-	0x0320b601,
-	0xf00012d0,
-	0xa5f001ac,
-	0x00b7f006,
-	0x98140c98,
-	0xe7f0150d,
-	0x5c21f500,
-	0x08a7f001,
-	0x010321f5,
-	0x020721f5,
-	0xf02201f4,
-	0x21f40ca7,
-	0x1017f1c9,
-	0x0614b60a,
-	0xd00527f0,
-/* 0x08c2: ctx_xfer_post_save_wait */
-	0x12cf0012,
-	0x0522fd00,
-	0xf4fa1bf4,
-/* 0x08ce: ctx_xfer_post */
-	0xf7f02e02,
-	0x3121f502,
-	0xf5f4bd06,
-	0xf5067221,
-	0xf5022621,
-	0xbd064021,
-	0x3121f5f4,
-	0x1011f406,
-	0xfd800198,
-	0x0bf40511,
-	0xb121f507,
-/* 0x08f9: ctx_xfer_no_post_mmio */
-/* 0x08f9: ctx_xfer_done */
-	0x0000f807,
+	0xd00599f0,
+	0x00f80089,
+/* 0x0790: ctx_chan */
+	0x069321f5,
+	0xf40ca7f0,
+	0x17f1c921,
+	0x14b60a10,
+	0x0527f006,
+/* 0x07a7: ctx_chan_wait */
+	0xcf0012d0,
+	0x22fd0012,
+	0xfa1bf405,
+/* 0x07b2: ctx_mmio_exec */
+	0x039800f8,
+	0x0427f141,
+	0x0624b60a,
+	0xbd0023d0,
+/* 0x07c1: ctx_mmio_loop */
+	0xff34c434,
+	0xf10f1bf4,
+	0xf0020057,
+	0x35fa0653,
+/* 0x07d3: ctx_mmio_pull */
+	0x9803f805,
+	0x4f98804e,
+	0x8d21f481,
+	0xb60830b6,
+	0x1bf40112,
+/* 0x07e5: ctx_mmio_done */
+	0x160398df,
+	0x800023d0,
+	0x17f14000,
+	0x13f00100,
+	0x0601fa06,
+	0x00f803f8,
+/* 0x07fc: ctx_xfer */
+	0x0c00f7f1,
+	0xf006f4b6,
+	0xfed004e7,
+/* 0x0809: ctx_xfer_idle */
+	0x00fecf80,
+	0x2000e4f1,
+	0xf4f91bf4,
+	0x02f40611,
+/* 0x0819: ctx_xfer_pre */
+	0x10f7f00d,
+	0x067321f5,
+/* 0x0823: ctx_xfer_pre_load */
+	0xf01c11f4,
+	0x21f502f7,
+	0x21f50632,
+	0x21f50641,
+	0xf4bd0653,
+	0x063221f5,
+	0x069321f5,
+/* 0x083c: ctx_xfer_exec */
+	0xf1160198,
+	0xb6041427,
+	0x20d00624,
+	0x00e7f100,
+	0x41e3f0a5,
+	0xf4021fb9,
+	0xe0b68d21,
+	0x01fcf004,
+	0xb6022cf0,
+	0xf2fd0124,
+	0x8d21f405,
+	0x4afc17f1,
+	0xf00213f0,
+	0x12d00c27,
+	0x0721f500,
+	0xfc27f102,
+	0x0223f047,
+	0xf00020d0,
+	0x20b6012c,
+	0x0012d003,
+	0xf001acf0,
+	0xb7f006a5,
+	0x140c9800,
+	0xf0150d98,
+	0x21f500e7,
+	0xa7f0015c,
+	0x0321f508,
+	0x0721f501,
+	0x2201f402,
+	0xf40ca7f0,
+	0x17f1c921,
+	0x14b60a10,
+	0x0527f006,
+/* 0x08c3: ctx_xfer_post_save_wait */
+	0xcf0012d0,
+	0x22fd0012,
+	0xfa1bf405,
+/* 0x08cf: ctx_xfer_post */
+	0xf02e02f4,
+	0x21f502f7,
+	0xf4bd0632,
+	0x067321f5,
+	0x022621f5,
+	0x064121f5,
+	0x21f5f4bd,
+	0x11f40632,
+	0x40019810,
+	0xf40511fd,
+	0x21f5070b,
+/* 0x08fa: ctx_xfer_no_post_mmio */
+/* 0x08fa: ctx_xfer_done */
+	0x00f807b2,
 	0x00000000,
 };

+ 53 - 0
drivers/gpu/drm/nouveau/core/engine/graph/fuc/macros.fuc

@@ -0,0 +1,53 @@
+/*
+ * Copyright 2013 Red Hat Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: Ben Skeggs <bskeggs@redhat.com>
+ */
+
+#include "os.h"
+
+#define mmctx_data(r,c) .b32 (((c - 1) << 26) | r)
+#define queue_init      .skip 72 // (2 * 4) + ((8 * 4) * 2)
+
+#define T_WAIT    0
+#define T_MMCTX   1
+#define T_STRWAIT 2
+#define T_STRINIT 3
+#define T_AUTO    4
+#define T_CHAN    5
+#define T_LOAD    6
+#define T_SAVE    7
+#define T_LCHAN   8
+#define T_LCTXH   9
+
+#define trace_set(bit) /*
+*/	mov $r8 0x83c /*
+*/	shl b32 $r8 6 /*
+*/	clear b32 $r9 /*
+*/	bset $r9 bit /*
+*/	iowr I[$r8 + 0x000] $r9
+
+#define trace_clr(bit) /*
+*/	mov $r8 0x85c /*
+*/	shl b32 $r8 6 /*
+*/	clear b32 $r9 /*
+*/	bset $r9 bit /*
+*/	iowr I[$r8 + 0x000] $r9

+ 0 - 400
drivers/gpu/drm/nouveau/core/engine/graph/fuc/nve0.fuc

@@ -1,400 +0,0 @@
-/* fuc microcode util functions for nve0 PGRAPH
- *
- * Copyright 2011 Red Hat Inc.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
- * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
- * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- * OTHER DEALINGS IN THE SOFTWARE.
- *
- * Authors: Ben Skeggs
- */
-
-define(`mmctx_data', `.b32 eval((($2 - 1) << 26) | $1)')
-define(`queue_init', `.skip eval((2 * 4) + ((8 * 4) * 2))')
-
-ifdef(`include_code', `
-// Error codes
-define(`E_BAD_COMMAND', 0x01)
-define(`E_CMD_OVERFLOW', 0x02)
-
-// Util macros to help with debugging ucode hangs etc
-define(`T_WAIT', 0)
-define(`T_MMCTX', 1)
-define(`T_STRWAIT', 2)
-define(`T_STRINIT', 3)
-define(`T_AUTO', 4)
-define(`T_CHAN', 5)
-define(`T_LOAD', 6)
-define(`T_SAVE', 7)
-define(`T_LCHAN', 8)
-define(`T_LCTXH', 9)
-
-define(`trace_set', `
-	mov $r8 0x83c
-	shl b32 $r8 6
-	clear b32 $r9
-	bset $r9 $1
-	iowr I[$r8 + 0x000] $r9		// CC_SCRATCH[7]
-')
-
-define(`trace_clr', `
-	mov $r8 0x85c
-	shl b32 $r8 6
-	clear b32 $r9
-	bset $r9 $1
-	iowr I[$r8 + 0x000] $r9		// CC_SCRATCH[7]
-')
-
-// queue_put - add request to queue
-//
-// In : $r13 queue pointer
-//	$r14 command
-//	$r15 data
-//
-queue_put:
-	// make sure we have space..
-	ld b32 $r8 D[$r13 + 0x0]	// GET
-	ld b32 $r9 D[$r13 + 0x4]	// PUT
-	xor $r8 8
-	cmpu b32 $r8 $r9
-	bra ne #queue_put_next
-		mov $r15 E_CMD_OVERFLOW
-		call #error
-		ret
-
-	// store cmd/data on queue
-	queue_put_next:
-	and $r8 $r9 7
-	shl b32 $r8 3
-	add b32 $r8 $r13
-	add b32 $r8 8
-	st b32 D[$r8 + 0x0] $r14
-	st b32 D[$r8 + 0x4] $r15
-
-	// update PUT
-	add b32 $r9 1
-	and $r9 0xf
-	st b32 D[$r13 + 0x4] $r9
-	ret
-
-// queue_get - fetch request from queue
-//
-// In : $r13 queue pointer
-//
-// Out:	$p1  clear on success (data available)
-//	$r14 command
-// 	$r15 data
-//
-queue_get:
-	bset $flags $p1
-	ld b32 $r8 D[$r13 + 0x0]	// GET
-	ld b32 $r9 D[$r13 + 0x4]	// PUT
-	cmpu b32 $r8 $r9
-	bra e #queue_get_done
-		// fetch first cmd/data pair
-		and $r9 $r8 7
-		shl b32 $r9 3
-		add b32 $r9 $r13
-		add b32 $r9 8
-		ld b32 $r14 D[$r9 + 0x0]
-		ld b32 $r15 D[$r9 + 0x4]
-
-		// update GET
-		add b32 $r8 1
-		and $r8 0xf
-		st b32 D[$r13 + 0x0] $r8
-		bclr $flags $p1
-queue_get_done:
-	ret
-
-// nv_rd32 - read 32-bit value from nv register
-//
-// In : $r14 register
-// Out: $r15 value
-//
-nv_rd32:
-	mov $r11 0x728
-	shl b32 $r11 6
-	mov b32 $r12 $r14
-	bset $r12 31			// MMIO_CTRL_PENDING
-	iowr I[$r11 + 0x000] $r12	// MMIO_CTRL
-	nv_rd32_wait:
-		iord $r12 I[$r11 + 0x000]
-		xbit $r12 $r12 31
-		bra ne #nv_rd32_wait
-	mov $r10 6			// DONE_MMIO_RD
-	call #wait_doneo
-	iord $r15 I[$r11 + 0x100]	// MMIO_RDVAL
-	ret
-
-// nv_wr32 - write 32-bit value to nv register
-//
-// In : $r14 register
-//      $r15 value
-//
-nv_wr32:
-	mov $r11 0x728
-	shl b32 $r11 6
-	iowr I[$r11 + 0x200] $r15	// MMIO_WRVAL
-	mov b32 $r12 $r14
-	bset $r12 31			// MMIO_CTRL_PENDING
-	bset $r12 30			// MMIO_CTRL_WRITE
-	iowr I[$r11 + 0x000] $r12	// MMIO_CTRL
-	nv_wr32_wait:
-		iord $r12 I[$r11 + 0x000]
-		xbit $r12 $r12 31
-		bra ne #nv_wr32_wait
-	ret
-
-// (re)set watchdog timer
-//
-// In : $r15 timeout
-//
-watchdog_reset:
-	mov $r8 0x430
-	shl b32 $r8 6
-	bset $r15 31
-	iowr I[$r8 + 0x000] $r15
-	ret
-
-// clear watchdog timer
-watchdog_clear:
-	mov $r8 0x430
-	shl b32 $r8 6
-	iowr I[$r8 + 0x000] $r0
-	ret
-
-// wait_done{z,o} - wait on FUC_DONE bit to become clear/set
-//
-// In : $r10 bit to wait on
-//
-define(`wait_done', `
-$1:
-	trace_set(T_WAIT);
-	mov $r8 0x818
-	shl b32 $r8 6
-	iowr I[$r8 + 0x000] $r10	// CC_SCRATCH[6] = wait bit
-	wait_done_$1:
-		mov $r8 0x400
-		shl b32 $r8 6
-		iord $r8 I[$r8 + 0x000]	// DONE
-		xbit $r8 $r8 $r10
-		bra $2 #wait_done_$1
-	trace_clr(T_WAIT)
-	ret
-')
-wait_done(wait_donez, ne)
-wait_done(wait_doneo, e)
-
-// mmctx_size - determine size of a mmio list transfer
-//
-// In : $r14 mmio list head
-//      $r15 mmio list tail
-// Out: $r15 transfer size (in bytes)
-//
-mmctx_size:
-	clear b32 $r9
-	nv_mmctx_size_loop:
-		ld b32 $r8 D[$r14]
-		shr b32 $r8 26
-		add b32 $r8 1
-		shl b32 $r8 2
-		add b32 $r9 $r8
-		add b32 $r14 4
-		cmpu b32 $r14 $r15
-		bra ne #nv_mmctx_size_loop
-	mov b32 $r15 $r9
-	ret
-
-// mmctx_xfer - execute a list of mmio transfers
-//
-// In : $r10 flags
-//		bit 0: direction (0 = save, 1 = load)
-//		bit 1: set if first transfer
-//		bit 2: set if last transfer
-//	$r11 base
-//	$r12 mmio list head
-//	$r13 mmio list tail
-//	$r14 multi_stride
-//	$r15 multi_mask
-//
-mmctx_xfer:
-	trace_set(T_MMCTX)
-	mov $r8 0x710
-	shl b32 $r8 6
-	clear b32 $r9
-	or $r11 $r11
-	bra e #mmctx_base_disabled
-		iowr I[$r8 + 0x000] $r11	// MMCTX_BASE
-		bset $r9 0			// BASE_EN
-	mmctx_base_disabled:
-	or $r14 $r14
-	bra e #mmctx_multi_disabled
-		iowr I[$r8 + 0x200] $r14 	// MMCTX_MULTI_STRIDE
-		iowr I[$r8 + 0x300] $r15 	// MMCTX_MULTI_MASK
-		bset $r9 1			// MULTI_EN
-	mmctx_multi_disabled:
-	add b32 $r8 0x100
-
-	xbit $r11 $r10 0
-	shl b32 $r11 16			// DIR
-	bset $r11 12			// QLIMIT = 0x10
-	xbit $r14 $r10 1
-	shl b32 $r14 17
-	or $r11 $r14			// START_TRIGGER
-	iowr I[$r8 + 0x000] $r11	// MMCTX_CTRL
-
-	// loop over the mmio list, and send requests to the hw
-	mmctx_exec_loop:
-		// wait for space in mmctx queue
-		mmctx_wait_free:
-			iord $r14 I[$r8 + 0x000] // MMCTX_CTRL
-			and $r14 0x1f
-			bra e #mmctx_wait_free
-
-		// queue up an entry
-		ld b32 $r14 D[$r12]
-		or $r14 $r9
-		iowr I[$r8 + 0x300] $r14
-		add b32 $r12 4
-		cmpu b32 $r12 $r13
-		bra ne #mmctx_exec_loop
-
-	xbit $r11 $r10 2
-	bra ne #mmctx_stop
-		// wait for queue to empty
-		mmctx_fini_wait:
-			iord $r11 I[$r8 + 0x000]	// MMCTX_CTRL
-			and $r11 0x1f
-			cmpu b32 $r11 0x10
-			bra ne #mmctx_fini_wait
-		mov $r10 2				// DONE_MMCTX
-		call #wait_donez
-		bra #mmctx_done
-	mmctx_stop:
-		xbit $r11 $r10 0
-		shl b32 $r11 16			// DIR
-		bset $r11 12			// QLIMIT = 0x10
-		bset $r11 18			// STOP_TRIGGER
-		iowr I[$r8 + 0x000] $r11	// MMCTX_CTRL
-		mmctx_stop_wait:
-			// wait for STOP_TRIGGER to clear
-			iord $r11 I[$r8 + 0x000] // MMCTX_CTRL
-			xbit $r11 $r11 18
-			bra ne #mmctx_stop_wait
-	mmctx_done:
-	trace_clr(T_MMCTX)
-	ret
-
-// Wait for DONE_STRAND
-//
-strand_wait:
-	push $r10
-	mov $r10 2
-	call #wait_donez
-	pop $r10
-	ret
-
-// unknown - call before issuing strand commands
-//
-strand_pre:
-	mov $r8 0x4afc
-	sethi $r8 0x20000
-	mov $r9 0xc
-	iowr I[$r8] $r9
-	call #strand_wait
-	ret
-
-// unknown - call after issuing strand commands
-//
-strand_post:
-	mov $r8 0x4afc
-	sethi $r8 0x20000
-	mov $r9 0xd
-	iowr I[$r8] $r9
-	call #strand_wait
-	ret
-
-// Selects strand set?!
-//
-// In: $r14 id
-//
-strand_set:
-	mov $r10 0x4ffc
-	sethi $r10 0x20000
-	sub b32 $r11 $r10 0x500
-	mov $r12 0xf
-	iowr I[$r10 + 0x000] $r12		// 0x93c = 0xf
-	mov $r12 0xb
-	iowr I[$r11 + 0x000] $r12		// 0x928 = 0xb
-	call #strand_wait
-	iowr I[$r10 + 0x000] $r14		// 0x93c = <id>
-	mov $r12 0xa
-	iowr I[$r11 + 0x000] $r12		// 0x928 = 0xa
-	call #strand_wait
-	ret
-
-// Initialise strand context data
-//
-// In : $r15 context base
-// Out: $r15 context size (in bytes)
-//
-// Strandset(?) 3 hardcoded currently
-//
-strand_ctx_init:
-	trace_set(T_STRINIT)
-	call #strand_pre
-	mov $r14 3
-	call #strand_set
-	mov $r10 0x46fc
-	sethi $r10 0x20000
-	add b32 $r11 $r10 0x400
-	iowr I[$r10 + 0x100] $r0	// STRAND_FIRST_GENE = 0
-	mov $r12 1
-	iowr I[$r11 + 0x000] $r12	// STRAND_CMD = LATCH_FIRST_GENE
-	call #strand_wait
-	sub b32 $r12 $r0 1
-	iowr I[$r10 + 0x000] $r12	// STRAND_GENE_CNT = 0xffffffff
-	mov $r12 2
-	iowr I[$r11 + 0x000] $r12	// STRAND_CMD = LATCH_GENE_CNT
-	call #strand_wait
-	call #strand_post
-
-	// read the size of each strand, poke the context offset of
-	// each into STRAND_{SAVE,LOAD}_SWBASE now, no need to worry
-	// about it later then.
-	mov $r8 0x880
-	shl b32 $r8 6
-	iord $r9 I[$r8 + 0x000]		// STRANDS
-	add b32 $r8 0x2200
-	shr b32 $r14 $r15 8
-	ctx_init_strand_loop:
-		iowr I[$r8 + 0x000] $r14	// STRAND_SAVE_SWBASE
-		iowr I[$r8 + 0x100] $r14	// STRAND_LOAD_SWBASE
-		iord $r10 I[$r8 + 0x200]	// STRAND_SIZE
-		shr b32 $r10 6
-		add b32 $r10 1
-		add b32 $r14 $r10
-		add b32 $r8 4
-		sub b32 $r9 1
-		bra ne #ctx_init_strand_loop
-
-	shl b32 $r14 8
-	sub b32 $r15 $r14 $r15
-	trace_clr(T_STRINIT)
-	ret
-')

+ 7 - 0
drivers/gpu/drm/nouveau/core/engine/graph/fuc/os.h

@@ -0,0 +1,7 @@
+#ifndef __NVKM_GRAPH_OS_H__
+#define __NVKM_GRAPH_OS_H__
+
+#define E_BAD_COMMAND  0x00000001
+#define E_CMD_OVERFLOW 0x00000002
+
+#endif

+ 6 - 12
drivers/gpu/drm/nouveau/core/engine/graph/nv50.c

@@ -186,13 +186,6 @@ nv50_graph_cclass = {
  * PGRAPH engine/subdev functions
  ******************************************************************************/
 
-static int
-nv50_graph_tlb_flush(struct nouveau_engine *engine)
-{
-	nv50_vm_flush_engine(&engine->base, 0x00);
-	return 0;
-}
-
 static const struct nouveau_bitfield nv50_pgraph_status[] = {
 	{ 0x00000001, "BUSY" }, /* set when any bit is set */
 	{ 0x00000002, "DISPATCH" },
@@ -302,8 +295,10 @@ nv84_graph_tlb_flush(struct nouveau_engine *engine)
 				nv_rd32(priv, 0x400388));
 	}
 
-	nv50_vm_flush_engine(&engine->base, 0x00);
 
+	nv_wr32(priv, 0x100c80, 0x00000001);
+	if (!nv_wait(priv, 0x100c80, 0x00000001, 0x00000000))
+		nv_error(priv, "vm flush timeout\n");
 	nv_mask(priv, 0x400500, 0x00000001, 0x00000001);
 	spin_unlock_irqrestore(&priv->lock, flags);
 	return timeout ? -EBUSY : 0;
@@ -857,10 +852,9 @@ nv50_graph_ctor(struct nouveau_object *parent, struct nouveau_object *engine,
 
 	};
 
-	if (nv_device(priv)->chipset == 0x50 ||
-	    nv_device(priv)->chipset == 0xac)
-		nv_engine(priv)->tlb_flush = nv50_graph_tlb_flush;
-	else
+	/* unfortunate hw bug workaround... */
+	if (nv_device(priv)->chipset != 0x50 &&
+	    nv_device(priv)->chipset != 0xac)
 		nv_engine(priv)->tlb_flush = nv84_graph_tlb_flush;
 
 	spin_lock_init(&priv->lock);

+ 586 - 7
drivers/gpu/drm/nouveau/core/engine/graph/nvc0.c

@@ -237,6 +237,43 @@ nvc0_graph_ctxctl_isr(struct nvc0_graph_priv *priv)
 	nv_wr32(priv, 0x409c20, ustat);
 }
 
+static const struct nouveau_enum nvc0_mp_warp_error[] = {
+	{ 0x00, "NO_ERROR" },
+	{ 0x01, "STACK_MISMATCH" },
+	{ 0x05, "MISALIGNED_PC" },
+	{ 0x08, "MISALIGNED_GPR" },
+	{ 0x09, "INVALID_OPCODE" },
+	{ 0x0d, "GPR_OUT_OF_BOUNDS" },
+	{ 0x0e, "MEM_OUT_OF_BOUNDS" },
+	{ 0x0f, "UNALIGNED_MEM_ACCESS" },
+	{ 0x11, "INVALID_PARAM" },
+	{}
+};
+
+static const struct nouveau_bitfield nvc0_mp_global_error[] = {
+	{ 0x00000004, "MULTIPLE_WARP_ERRORS" },
+	{ 0x00000008, "OUT_OF_STACK_SPACE" },
+	{}
+};
+
+static void
+nvc0_graph_trap_mp(struct nvc0_graph_priv *priv, int gpc, int tpc)
+{
+	u32 werr = nv_rd32(priv, TPC_UNIT(gpc, tpc, 0x648));
+	u32 gerr = nv_rd32(priv, TPC_UNIT(gpc, tpc, 0x650));
+
+	nv_error(priv, "GPC%i/TPC%i/MP trap:", gpc, tpc);
+	nouveau_bitfield_print(nvc0_mp_global_error, gerr);
+	if (werr) {
+		pr_cont(" ");
+		nouveau_enum_print(nvc0_mp_warp_error, werr & 0xffff);
+	}
+	pr_cont("\n");
+
+	nv_wr32(priv, TPC_UNIT(gpc, tpc, 0x648), 0x00000000);
+	nv_wr32(priv, TPC_UNIT(gpc, tpc, 0x650), gerr);
+}
+
 static void
 nvc0_graph_trap_tpc(struct nvc0_graph_priv *priv, int gpc, int tpc)
 {
@@ -251,12 +288,7 @@ nvc0_graph_trap_tpc(struct nvc0_graph_priv *priv, int gpc, int tpc)
 	}
 
 	if (stat & 0x00000002) {
-		u32 trap0 = nv_rd32(priv, TPC_UNIT(gpc, tpc, 0x0644));
-		u32 trap1 = nv_rd32(priv, TPC_UNIT(gpc, tpc, 0x064c));
-		nv_error(priv, "GPC%d/TPC%d/MP: 0x%08x 0x%08x\n",
-			       gpc, tpc, trap0, trap1);
-		nv_wr32(priv, TPC_UNIT(gpc, tpc, 0x0644), 0x001ffffe);
-		nv_wr32(priv, TPC_UNIT(gpc, tpc, 0x064c), 0x0000000f);
+		nvc0_graph_trap_mp(priv, gpc, tpc);
 		nv_wr32(priv, TPC_UNIT(gpc, tpc, 0x0508), 0x00000002);
 		stat &= ~0x00000002;
 	}
@@ -684,6 +716,544 @@ nvc0_graph_init_regs(struct nvc0_graph_priv *priv)
 	nv_wr32(priv, 0x400124, 0x00000002);
 }
 
+static void
+nvc0_graph_init_unk40xx(struct nvc0_graph_priv *priv)
+{
+	nv_wr32(priv, 0x40415c, 0x00000000);
+	nv_wr32(priv, 0x404170, 0x00000000);
+}
+
+static void
+nvc0_graph_init_unk44xx(struct nvc0_graph_priv *priv)
+{
+	nv_wr32(priv, 0x404488, 0x00000000);
+	nv_wr32(priv, 0x40448c, 0x00000000);
+}
+
+static void
+nvc0_graph_init_unk78xx(struct nvc0_graph_priv *priv)
+{
+	nv_wr32(priv, 0x407808, 0x00000000);
+}
+
+static void
+nvc0_graph_init_unk60xx(struct nvc0_graph_priv *priv)
+{
+	nv_wr32(priv, 0x406024, 0x00000000);
+}
+
+static void
+nvc0_graph_init_unk64xx(struct nvc0_graph_priv *priv)
+{
+	switch (nv_device(priv)->chipset) {
+	case 0xd9:
+	case 0xd7:
+		nv_wr32(priv, 0x4064f0, 0x00000000);
+		nv_wr32(priv, 0x4064f4, 0x00000000);
+		nv_wr32(priv, 0x4064f8, 0x00000000);
+		break;
+	case 0xc0:
+	case 0xc3:
+	case 0xc4:
+	case 0xc1:
+	case 0xc8:
+	case 0xce:
+	case 0xcf:
+		break;
+	default:
+		BUG_ON(1);
+		break;
+	}
+}
+
+static void
+nvc0_graph_init_unk58xx(struct nvc0_graph_priv *priv)
+{
+	nv_wr32(priv, 0x405844, 0x00ffffff);
+	nv_wr32(priv, 0x405850, 0x00000000);
+	switch (nv_device(priv)->chipset) {
+	case 0xc3:
+	case 0xc4:
+	case 0xc1:
+	case 0xce:
+	case 0xcf:
+	case 0xd9:
+	case 0xd7:
+		nv_wr32(priv, 0x405900, 0x00002834);
+		break;
+	case 0xc0:
+	case 0xc8:
+		break;
+	default:
+		BUG_ON(1);
+		break;
+	}
+	nv_wr32(priv, 0x405908, 0x00000000);
+	switch (nv_device(priv)->chipset) {
+	case 0xd9:
+	case 0xd7:
+		nv_wr32(priv, 0x405928, 0x00000000);
+		nv_wr32(priv, 0x40592c, 0x00000000);
+		break;
+	case 0xc0:
+	case 0xc3:
+	case 0xc4:
+	case 0xc1:
+	case 0xc8:
+	case 0xce:
+	case 0xcf:
+		break;
+	default:
+		BUG_ON(1);
+		break;
+	}
+}
+
+static void
+nvc0_graph_init_unk80xx(struct nvc0_graph_priv *priv)
+{
+	nv_wr32(priv, 0x40803c, 0x00000000);
+}
+
+static void
+nvc0_graph_init_gpc(struct nvc0_graph_priv *priv)
+{
+	switch (nv_device(priv)->chipset) {
+	case 0xd9:
+	case 0xd7:
+		nv_wr32(priv, 0x418408, 0x00000000);
+		break;
+	case 0xc0:
+	case 0xc3:
+	case 0xc4:
+	case 0xc1:
+	case 0xc8:
+	case 0xce:
+	case 0xcf:
+		break;
+	default:
+		BUG_ON(1);
+		break;
+	}
+	nv_wr32(priv, 0x4184a0, 0x00000000);
+	switch (nv_device(priv)->chipset) {
+	case 0xd9:
+	case 0xd7:
+		nv_wr32(priv, 0x4184a4, 0x00000000);
+		nv_wr32(priv, 0x4184a8, 0x00000000);
+		break;
+	case 0xc0:
+	case 0xc3:
+	case 0xc4:
+	case 0xc1:
+	case 0xc8:
+	case 0xce:
+	case 0xcf:
+		break;
+	default:
+		BUG_ON(1);
+		break;
+	}
+	nv_wr32(priv, 0x418604, 0x00000000);
+	nv_wr32(priv, 0x418680, 0x00000000);
+	switch (nv_device(priv)->chipset) {
+	case 0xd9:
+	case 0xd7:
+	case 0xc1:
+		nv_wr32(priv, 0x418714, 0x00000000);
+		break;
+	case 0xc0:
+	case 0xc3:
+	case 0xc4:
+	case 0xc8:
+	case 0xce:
+	case 0xcf:
+		nv_wr32(priv, 0x418714, 0x80000000);
+		break;
+	default:
+		BUG_ON(1);
+		break;
+	}
+	nv_wr32(priv, 0x418384, 0x00000000);
+	nv_wr32(priv, 0x418814, 0x00000000);
+	nv_wr32(priv, 0x418818, 0x00000000);
+	nv_wr32(priv, 0x41881c, 0x00000000);
+	nv_wr32(priv, 0x418b04, 0x00000000);
+	switch (nv_device(priv)->chipset) {
+	case 0xd9:
+	case 0xd7:
+	case 0xc1:
+	case 0xc8:
+		nv_wr32(priv, 0x4188c8, 0x00000000);
+		break;
+	case 0xc0:
+	case 0xc3:
+	case 0xc4:
+	case 0xce:
+	case 0xcf:
+		nv_wr32(priv, 0x4188c8, 0x80000000);
+		break;
+	default:
+		BUG_ON(1);
+		break;
+	}
+	nv_wr32(priv, 0x4188cc, 0x00000000);
+	nv_wr32(priv, 0x4188d0, 0x00010000);
+	nv_wr32(priv, 0x4188d4, 0x00000001);
+	nv_wr32(priv, 0x418910, 0x00010001);
+	nv_wr32(priv, 0x418914, 0x00000301);
+	nv_wr32(priv, 0x418918, 0x00800000);
+	nv_wr32(priv, 0x418980, 0x77777770);
+	nv_wr32(priv, 0x418984, 0x77777777);
+	nv_wr32(priv, 0x418988, 0x77777777);
+	nv_wr32(priv, 0x41898c, 0x77777777);
+	nv_wr32(priv, 0x418c04, 0x00000000);
+	switch (nv_device(priv)->chipset) {
+	case 0xd9:
+	case 0xd7:
+		nv_wr32(priv, 0x418c64, 0x00000000);
+		nv_wr32(priv, 0x418c68, 0x00000000);
+		break;
+	case 0xc0:
+	case 0xc3:
+	case 0xc4:
+	case 0xc1:
+	case 0xc8:
+	case 0xce:
+	case 0xcf:
+		break;
+	default:
+		BUG_ON(1);
+		break;
+	}
+	nv_wr32(priv, 0x418c88, 0x00000000);
+	switch (nv_device(priv)->chipset) {
+	case 0xd9:
+	case 0xd7:
+		nv_wr32(priv, 0x418cb4, 0x00000000);
+		nv_wr32(priv, 0x418cb8, 0x00000000);
+		break;
+	case 0xc0:
+	case 0xc3:
+	case 0xc4:
+	case 0xc1:
+	case 0xc8:
+	case 0xce:
+	case 0xcf:
+		break;
+	default:
+		BUG_ON(1);
+		break;
+	}
+	nv_wr32(priv, 0x418d00, 0x00000000);
+	switch (nv_device(priv)->chipset) {
+	case 0xd9:
+	case 0xd7:
+		nv_wr32(priv, 0x418d28, 0x00000000);
+		nv_wr32(priv, 0x418d2c, 0x00000000);
+		nv_wr32(priv, 0x418f00, 0x00000000);
+		break;
+	case 0xc0:
+	case 0xc3:
+	case 0xc4:
+	case 0xc1:
+	case 0xc8:
+	case 0xce:
+	case 0xcf:
+		break;
+	default:
+		BUG_ON(1);
+		break;
+	}
+	nv_wr32(priv, 0x418f08, 0x00000000);
+	switch (nv_device(priv)->chipset) {
+	case 0xd9:
+	case 0xd7:
+		nv_wr32(priv, 0x418f20, 0x00000000);
+		nv_wr32(priv, 0x418f24, 0x00000000);
+		/*fall-through*/
+	case 0xc1:
+		nv_wr32(priv, 0x418e00, 0x00000003);
+		break;
+	case 0xc0:
+	case 0xc3:
+	case 0xc4:
+	case 0xc8:
+	case 0xce:
+	case 0xcf:
+		nv_wr32(priv, 0x418e00, 0x00000050);
+		break;
+	default:
+		BUG_ON(1);
+		break;
+	}
+	nv_wr32(priv, 0x418e08, 0x00000000);
+	switch (nv_device(priv)->chipset) {
+	case 0xd9:
+	case 0xd7:
+		nv_wr32(priv, 0x418e1c, 0x00000000);
+		nv_wr32(priv, 0x418e20, 0x00000000);
+		break;
+	case 0xc0:
+	case 0xc3:
+	case 0xc4:
+	case 0xc1:
+	case 0xc8:
+	case 0xce:
+	case 0xcf:
+		break;
+	default:
+		BUG_ON(1);
+		break;
+	}
+	nv_wr32(priv, 0x41900c, 0x00000000);
+	nv_wr32(priv, 0x419018, 0x00000000);
+}
+
+static void
+nvc0_graph_init_tpc(struct nvc0_graph_priv *priv)
+{
+	nv_wr32(priv, 0x419d08, 0x00000000);
+	nv_wr32(priv, 0x419d0c, 0x00000000);
+	nv_wr32(priv, 0x419d10, 0x00000014);
+	nv_wr32(priv, 0x419ab0, 0x00000000);
+	switch (nv_device(priv)->chipset) {
+	case 0xc3:
+	case 0xc4:
+	case 0xc1:
+	case 0xce:
+	case 0xcf:
+	case 0xd9:
+	case 0xd7:
+		nv_wr32(priv, 0x419ac8, 0x00000000);
+		break;
+	case 0xc0:
+	case 0xc8:
+		break;
+	default:
+		BUG_ON(1);
+		break;
+	}
+	nv_wr32(priv, 0x419ab8, 0x000000e7);
+	nv_wr32(priv, 0x419abc, 0x00000000);
+	nv_wr32(priv, 0x419ac0, 0x00000000);
+	switch (nv_device(priv)->chipset) {
+	case 0xd9:
+	case 0xd7:
+		nv_wr32(priv, 0x419ab4, 0x00000000);
+		nv_wr32(priv, 0x41980c, 0x00000010);
+		break;
+	case 0xc0:
+	case 0xc3:
+	case 0xc4:
+	case 0xc1:
+	case 0xc8:
+	case 0xce:
+	case 0xcf:
+		nv_wr32(priv, 0x41980c, 0x00000000);
+		break;
+	default:
+		BUG_ON(1);
+		break;
+	}
+	nv_wr32(priv, 0x419810, 0x00000000);
+	switch (nv_device(priv)->chipset) {
+	case 0xd9:
+	case 0xd7:
+	case 0xc1:
+		nv_wr32(priv, 0x419814, 0x00000004);
+		break;
+	case 0xc0:
+	case 0xc3:
+	case 0xc4:
+	case 0xc8:
+	case 0xce:
+	case 0xcf:
+		nv_wr32(priv, 0x419814, 0x00000000);
+		break;
+	default:
+		BUG_ON(1);
+		break;
+	}
+	nv_wr32(priv, 0x419844, 0x00000000);
+	switch (nv_device(priv)->chipset) {
+	case 0xd9:
+	case 0xd7:
+		nv_wr32(priv, 0x41984c, 0x0000a918);
+		break;
+	case 0xc0:
+	case 0xc3:
+	case 0xc4:
+	case 0xc1:
+	case 0xc8:
+	case 0xce:
+	case 0xcf:
+		nv_wr32(priv, 0x41984c, 0x00005bc5);
+		break;
+	default:
+		BUG_ON(1);
+		break;
+	}
+	nv_wr32(priv, 0x419850, 0x00000000);
+	nv_wr32(priv, 0x419854, 0x00000000);
+	nv_wr32(priv, 0x419858, 0x00000000);
+	nv_wr32(priv, 0x41985c, 0x00000000);
+	switch (nv_device(priv)->chipset) {
+	case 0xc3:
+	case 0xc4:
+	case 0xc1:
+	case 0xce:
+	case 0xcf:
+	case 0xd9:
+	case 0xd7:
+		nv_wr32(priv, 0x419880, 0x00000002);
+		break;
+	case 0xc0:
+	case 0xc8:
+		break;
+	default:
+		BUG_ON(1);
+		break;
+	}
+	nv_wr32(priv, 0x419c98, 0x00000000);
+	nv_wr32(priv, 0x419ca8, 0x80000000);
+	nv_wr32(priv, 0x419cb4, 0x00000000);
+	nv_wr32(priv, 0x419cb8, 0x00008bf4);
+	nv_wr32(priv, 0x419cbc, 0x28137606);
+	nv_wr32(priv, 0x419cc0, 0x00000000);
+	nv_wr32(priv, 0x419cc4, 0x00000000);
+	nv_wr32(priv, 0x419bd4, 0x00800000);
+	nv_wr32(priv, 0x419bdc, 0x00000000);
+	switch (nv_device(priv)->chipset) {
+	case 0xd9:
+	case 0xd7:
+		nv_wr32(priv, 0x419bf8, 0x00000000);
+		nv_wr32(priv, 0x419bfc, 0x00000000);
+		break;
+	case 0xc0:
+	case 0xc3:
+	case 0xc4:
+	case 0xc1:
+	case 0xc8:
+	case 0xce:
+	case 0xcf:
+		break;
+	default:
+		BUG_ON(1);
+		break;
+	}
+	nv_wr32(priv, 0x419d2c, 0x00000000);
+	switch (nv_device(priv)->chipset) {
+	case 0xd9:
+	case 0xd7:
+		nv_wr32(priv, 0x419d48, 0x00000000);
+		nv_wr32(priv, 0x419d4c, 0x00000000);
+		break;
+	case 0xc0:
+	case 0xc3:
+	case 0xc4:
+	case 0xc1:
+	case 0xc8:
+	case 0xce:
+	case 0xcf:
+		break;
+	default:
+		BUG_ON(1);
+		break;
+	}
+	nv_wr32(priv, 0x419c0c, 0x00000000);
+	nv_wr32(priv, 0x419e00, 0x00000000);
+	nv_wr32(priv, 0x419ea0, 0x00000000);
+	nv_wr32(priv, 0x419ea4, 0x00000100);
+	switch (nv_device(priv)->chipset) {
+	case 0xd9:
+	case 0xd7:
+		nv_wr32(priv, 0x419ea8, 0x02001100);
+		break;
+	case 0xc0:
+	case 0xc3:
+	case 0xc4:
+	case 0xc1:
+	case 0xc8:
+	case 0xce:
+	case 0xcf:
+		nv_wr32(priv, 0x419ea8, 0x00001100);
+		break;
+	default:
+		BUG_ON(1);
+		break;
+	}
+
+	switch (nv_device(priv)->chipset) {
+	case 0xc8:
+		nv_wr32(priv, 0x419eac, 0x11100f02);
+		break;
+	case 0xc0:
+	case 0xc3:
+	case 0xc4:
+	case 0xc1:
+	case 0xce:
+	case 0xcf:
+	case 0xd9:
+	case 0xd7:
+		nv_wr32(priv, 0x419eac, 0x11100702);
+		break;
+	default:
+		BUG_ON(1);
+		break;
+	}
+	nv_wr32(priv, 0x419eb0, 0x00000003);
+	nv_wr32(priv, 0x419eb4, 0x00000000);
+	nv_wr32(priv, 0x419eb8, 0x00000000);
+	nv_wr32(priv, 0x419ebc, 0x00000000);
+	nv_wr32(priv, 0x419ec0, 0x00000000);
+	switch (nv_device(priv)->chipset) {
+	case 0xc3:
+	case 0xc4:
+	case 0xc1:
+	case 0xce:
+	case 0xcf:
+	case 0xd9:
+	case 0xd7:
+		nv_wr32(priv, 0x419ec8, 0x0e063818);
+		nv_wr32(priv, 0x419ecc, 0x0e060e06);
+		nv_wr32(priv, 0x419ed0, 0x00003818);
+		break;
+	case 0xc0:
+	case 0xc8:
+		nv_wr32(priv, 0x419ec8, 0x06060618);
+		nv_wr32(priv, 0x419ed0, 0x0eff0e38);
+		break;
+	default:
+		BUG_ON(1);
+		break;
+	}
+	nv_wr32(priv, 0x419ed4, 0x011104f1);
+	nv_wr32(priv, 0x419edc, 0x00000000);
+	nv_wr32(priv, 0x419f00, 0x00000000);
+	nv_wr32(priv, 0x419f2c, 0x00000000);
+}
+
+static void
+nvc0_graph_init_unk88xx(struct nvc0_graph_priv *priv)
+{
+	nv_wr32(priv, 0x40880c, 0x00000000);
+	nv_wr32(priv, 0x408910, 0x00000000);
+	nv_wr32(priv, 0x408914, 0x00000000);
+	nv_wr32(priv, 0x408918, 0x00000000);
+	nv_wr32(priv, 0x40891c, 0x00000000);
+	nv_wr32(priv, 0x408920, 0x00000000);
+	nv_wr32(priv, 0x408924, 0x00000000);
+	nv_wr32(priv, 0x408928, 0x00000000);
+	nv_wr32(priv, 0x40892c, 0x00000000);
+	nv_wr32(priv, 0x408930, 0x00000000);
+	nv_wr32(priv, 0x408950, 0x00000000);
+	nv_wr32(priv, 0x408954, 0x0000ffff);
+	nv_wr32(priv, 0x408984, 0x00000000);
+	nv_wr32(priv, 0x408988, 0x08040201);
+	nv_wr32(priv, 0x40898c, 0x80402010);
+}
+
 static void
 nvc0_graph_init_gpc_0(struct nvc0_graph_priv *priv)
 {
@@ -925,7 +1495,16 @@ nvc0_graph_init(struct nouveau_object *object)
 
 	nvc0_graph_init_obj418880(priv);
 	nvc0_graph_init_regs(priv);
-	/*nvc0_graph_init_unitplemented_magics(priv);*/
+	nvc0_graph_init_unk40xx(priv);
+	nvc0_graph_init_unk44xx(priv);
+	nvc0_graph_init_unk78xx(priv);
+	nvc0_graph_init_unk60xx(priv);
+	nvc0_graph_init_unk64xx(priv);
+	nvc0_graph_init_unk58xx(priv);
+	nvc0_graph_init_unk80xx(priv);
+	nvc0_graph_init_gpc(priv);
+	nvc0_graph_init_tpc(priv);
+	nvc0_graph_init_unk88xx(priv);
 	nvc0_graph_init_gpc_0(priv);
 	/*nvc0_graph_init_unitplemented_c242(priv);*/
 

+ 4 - 2
drivers/gpu/drm/nouveau/core/engine/graph/nvc0.h

@@ -38,8 +38,8 @@
 #include <engine/fifo.h>
 #include <engine/graph.h>
 
-#define GPC_MAX 4
-#define TPC_MAX 32
+#define GPC_MAX 32
+#define TPC_MAX (GPC_MAX * 8)
 
 #define ROP_BCAST(r)      (0x408800 + (r))
 #define ROP_UNIT(u, r)    (0x410000 + (u) * 0x400 + (r))
@@ -124,6 +124,8 @@ nvc0_graph_class(void *obj)
 	case 0xe7:
 	case 0xe6:
 		return 0xa097;
+	case 0xf0:
+		return 0xa197;
 	default:
 		return 0;
 	}

+ 341 - 43
drivers/gpu/drm/nouveau/core/engine/graph/nve0.c

@@ -36,7 +36,6 @@ nve0_graph_sclass[] = {
 	{ 0xa040, &nouveau_object_ofuncs },
 	{ 0xa097, &nouveau_object_ofuncs },
 	{ 0xa0c0, &nouveau_object_ofuncs },
-	{ 0xa0b5, &nouveau_object_ofuncs },
 	{}
 };
 
@@ -90,9 +89,9 @@ static const struct nouveau_enum nve0_mp_warp_error[] = {
 	{}
 };
 
-static const struct nouveau_enum nve0_mp_global_error[] = {
-	{ 2, "MULTIPLE_WARP_ERRORS" },
-	{ 3, "OUT_OF_STACK_SPACE" },
+static const struct nouveau_bitfield nve0_mp_global_error[] = {
+	{ 0x00000004, "MULTIPLE_WARP_ERRORS" },
+	{ 0x00000008, "OUT_OF_STACK_SPACE" },
 	{}
 };
 
@@ -122,72 +121,63 @@ static const struct nouveau_enum nve0_sked_error[] = {
 };
 
 static void
-nve0_graph_mp_trap(struct nvc0_graph_priv *priv, int gpc, int tp)
+nve0_graph_mp_trap(struct nvc0_graph_priv *priv, int gpc, int tpc)
 {
-	int i;
-	u32 werr = nv_rd32(priv, TPC_UNIT(gpc, tp, 0x648));
-	u32 gerr = nv_rd32(priv, TPC_UNIT(gpc, tp, 0x650));
-
-	nv_error(priv, "GPC%i/TP%i/MP trap:", gpc, tp);
+	u32 werr = nv_rd32(priv, TPC_UNIT(gpc, tpc, 0x648));
+	u32 gerr = nv_rd32(priv, TPC_UNIT(gpc, tpc, 0x650));
 
-	for (i = 0; i <= 31; ++i) {
-		if (!(gerr & (1 << i)))
-			continue;
-		pr_cont(" ");
-		nouveau_enum_print(nve0_mp_global_error, i);
-	}
+	nv_error(priv, "GPC%i/TPC%i/MP trap:", gpc, tpc);
+	nouveau_bitfield_print(nve0_mp_global_error, gerr);
 	if (werr) {
 		pr_cont(" ");
 		nouveau_enum_print(nve0_mp_warp_error, werr & 0xffff);
 	}
 	pr_cont("\n");
 
-	/* disable MP trap to avoid spam */
-	nv_mask(priv, TPC_UNIT(gpc, tp, 0x50c), 0x2, 0x0);
-
-	/* TODO: figure out how to resume after an MP trap */
+	nv_wr32(priv, TPC_UNIT(gpc, tpc, 0x648), 0x00000000);
+	nv_wr32(priv, TPC_UNIT(gpc, tpc, 0x650), gerr);
 }
 
 static void
-nve0_graph_tp_trap(struct nvc0_graph_priv *priv, int gpc, int tp)
+nve0_graph_tpc_trap(struct nvc0_graph_priv *priv, int gpc, int tpc)
 {
-	u32 stat = nv_rd32(priv, TPC_UNIT(gpc, tp, 0x508));
+	u32 stat = nv_rd32(priv, TPC_UNIT(gpc, tpc, 0x508));
 
 	if (stat & 0x1) {
-		u32 trap = nv_rd32(priv, TPC_UNIT(gpc, tp, 0x224));
-		nv_error(priv, "GPC%i/TP%i/TEX trap: %08x\n",
-			 gpc, tp, trap);
+		u32 trap = nv_rd32(priv, TPC_UNIT(gpc, tpc, 0x224));
+		nv_error(priv, "GPC%i/TPC%i/TEX trap: %08x\n",
+			 gpc, tpc, trap);
 
-		nv_wr32(priv, TPC_UNIT(gpc, tp, 0x224), 0xc0000000);
+		nv_wr32(priv, TPC_UNIT(gpc, tpc, 0x224), 0xc0000000);
 		stat &= ~0x1;
 	}
 
 	if (stat & 0x2) {
-		nve0_graph_mp_trap(priv, gpc, tp);
+		nve0_graph_mp_trap(priv, gpc, tpc);
 		stat &= ~0x2;
 	}
 
 	if (stat & 0x4) {
-		u32 trap = nv_rd32(priv, TPC_UNIT(gpc, tp, 0x084));
-		nv_error(priv, "GPC%i/TP%i/POLY trap: %08x\n",
-			 gpc, tp, trap);
+		u32 trap = nv_rd32(priv, TPC_UNIT(gpc, tpc, 0x084));
+		nv_error(priv, "GPC%i/TPC%i/POLY trap: %08x\n",
+			 gpc, tpc, trap);
 
-		nv_wr32(priv, TPC_UNIT(gpc, tp, 0x084), 0xc0000000);
+		nv_wr32(priv, TPC_UNIT(gpc, tpc, 0x084), 0xc0000000);
 		stat &= ~0x4;
 	}
 
 	if (stat & 0x8) {
-		u32 trap = nv_rd32(priv, TPC_UNIT(gpc, tp, 0x48c));
-		nv_error(priv, "GPC%i/TP%i/L1C trap: %08x\n",
-			 gpc, tp, trap);
+		u32 trap = nv_rd32(priv, TPC_UNIT(gpc, tpc, 0x48c));
+		nv_error(priv, "GPC%i/TPC%i/L1C trap: %08x\n",
+			 gpc, tpc, trap);
 
-		nv_wr32(priv, TPC_UNIT(gpc, tp, 0x48c), 0xc0000000);
+		nv_wr32(priv, TPC_UNIT(gpc, tpc, 0x48c), 0xc0000000);
 		stat &= ~0x8;
 	}
 
 	if (stat) {
-		nv_error(priv, "GPC%i/TP%i: unknown stat %08x\n",
-			 gpc, tp, stat);
+		nv_error(priv, "GPC%i/TPC%i: unknown stat %08x\n",
+			 gpc, tpc, stat);
 	}
 }
 
@@ -199,7 +189,7 @@ nve0_graph_gpc_trap(struct nvc0_graph_priv *priv)
 
 	for (gpc = 0; gpc < 4; ++gpc) {
 		u32 stat;
-		int tp;
+		int tpc;
 
 		if (!(mask & (1 << gpc)))
 			continue;
@@ -258,9 +248,9 @@ nve0_graph_gpc_trap(struct nvc0_graph_priv *priv)
 			stat &= ~0x0008;
 		}
 
-		for (tp = 0; tp < 8; ++tp) {
-			if (stat & (1 << (16 + tp)))
-				nve0_graph_tp_trap(priv, gpc, tp);
+		for (tpc = 0; tpc < 8; ++tpc) {
+			if (stat & (1 << (16 + tpc)))
+				nve0_graph_tpc_trap(priv, gpc, tpc);
 		}
 		stat &= ~0xff0000;
 
@@ -485,6 +475,7 @@ nve0_graph_ctor(struct nouveau_object *parent, struct nouveau_object *engine,
 	case 0xe6:
 		priv->magic_not_rop_nr = 1;
 		break;
+	case 0xf0:
 	default:
 		break;
 	}
@@ -522,19 +513,313 @@ nve0_graph_init_regs(struct nvc0_graph_priv *priv)
 	nv_wr32(priv, 0x400124, 0x00000002);
 }
 
+static void
+nve0_graph_init_unk40xx(struct nvc0_graph_priv *priv)
+{
+	nv_wr32(priv, 0x40415c, 0x00000000);
+	nv_wr32(priv, 0x404170, 0x00000000);
+	switch (nv_device(priv)->chipset) {
+	case 0xf0:
+		nv_wr32(priv, 0x4041b4, 0x00000000);
+		break;
+	default:
+		break;
+	}
+}
+
+static void
+nve0_graph_init_unk44xx(struct nvc0_graph_priv *priv)
+{
+	nv_wr32(priv, 0x404488, 0x00000000);
+	nv_wr32(priv, 0x40448c, 0x00000000);
+}
+
+static void
+nve0_graph_init_unk78xx(struct nvc0_graph_priv *priv)
+{
+	nv_wr32(priv, 0x407808, 0x00000000);
+}
+
+static void
+nve0_graph_init_unk60xx(struct nvc0_graph_priv *priv)
+{
+	nv_wr32(priv, 0x406024, 0x00000000);
+}
+
+static void
+nve0_graph_init_unk64xx(struct nvc0_graph_priv *priv)
+{
+	nv_wr32(priv, 0x4064f0, 0x00000000);
+	nv_wr32(priv, 0x4064f4, 0x00000000);
+	nv_wr32(priv, 0x4064f8, 0x00000000);
+}
+
+static void
+nve0_graph_init_unk58xx(struct nvc0_graph_priv *priv)
+{
+	nv_wr32(priv, 0x405844, 0x00ffffff);
+	nv_wr32(priv, 0x405850, 0x00000000);
+	switch (nv_device(priv)->chipset) {
+	case 0xf0:
+		nv_wr32(priv, 0x405900, 0x0000ff00);
+		break;
+	default:
+		nv_wr32(priv, 0x405900, 0x0000ff34);
+		break;
+	}
+	nv_wr32(priv, 0x405908, 0x00000000);
+	nv_wr32(priv, 0x405928, 0x00000000);
+	nv_wr32(priv, 0x40592c, 0x00000000);
+}
+
+static void
+nve0_graph_init_unk80xx(struct nvc0_graph_priv *priv)
+{
+	nv_wr32(priv, 0x40803c, 0x00000000);
+}
+
+static void
+nve0_graph_init_unk70xx(struct nvc0_graph_priv *priv)
+{
+	nv_wr32(priv, 0x407010, 0x00000000);
+	switch (nv_device(priv)->chipset) {
+	case 0xf0:
+		nv_wr32(priv, 0x407040, 0x80440424);
+		nv_wr32(priv, 0x407048, 0x0000000a);
+		break;
+	default:
+		break;
+	}
+}
+
+static void
+nve0_graph_init_unk5bxx(struct nvc0_graph_priv *priv)
+{
+	switch (nv_device(priv)->chipset) {
+	case 0xf0:
+		nv_wr32(priv, 0x505b44, 0x00000000);
+		break;
+	default:
+		break;
+	}
+	nv_wr32(priv, 0x405b50, 0x00000000);
+}
+
+static void
+nve0_graph_init_gpc(struct nvc0_graph_priv *priv)
+{
+	nv_wr32(priv, 0x418408, 0x00000000);
+	nv_wr32(priv, 0x4184a0, 0x00000000);
+	nv_wr32(priv, 0x4184a4, 0x00000000);
+	nv_wr32(priv, 0x4184a8, 0x00000000);
+	nv_wr32(priv, 0x418604, 0x00000000);
+	nv_wr32(priv, 0x418680, 0x00000000);
+	nv_wr32(priv, 0x418714, 0x00000000);
+	nv_wr32(priv, 0x418384, 0x00000000);
+	nv_wr32(priv, 0x418814, 0x00000000);
+	nv_wr32(priv, 0x418818, 0x00000000);
+	nv_wr32(priv, 0x41881c, 0x00000000);
+	nv_wr32(priv, 0x418b04, 0x00000000);
+	nv_wr32(priv, 0x4188c8, 0x00000000);
+	nv_wr32(priv, 0x4188cc, 0x00000000);
+	nv_wr32(priv, 0x4188d0, 0x00010000);
+	nv_wr32(priv, 0x4188d4, 0x00000001);
+	nv_wr32(priv, 0x418910, 0x00010001);
+	nv_wr32(priv, 0x418914, 0x00000301);
+	nv_wr32(priv, 0x418918, 0x00800000);
+	nv_wr32(priv, 0x418980, 0x77777770);
+	nv_wr32(priv, 0x418984, 0x77777777);
+	nv_wr32(priv, 0x418988, 0x77777777);
+	nv_wr32(priv, 0x41898c, 0x77777777);
+	nv_wr32(priv, 0x418c04, 0x00000000);
+	nv_wr32(priv, 0x418c64, 0x00000000);
+	nv_wr32(priv, 0x418c68, 0x00000000);
+	nv_wr32(priv, 0x418c88, 0x00000000);
+	nv_wr32(priv, 0x418cb4, 0x00000000);
+	nv_wr32(priv, 0x418cb8, 0x00000000);
+	nv_wr32(priv, 0x418d00, 0x00000000);
+	nv_wr32(priv, 0x418d28, 0x00000000);
+	nv_wr32(priv, 0x418d2c, 0x00000000);
+	switch (nv_device(priv)->chipset) {
+	case 0xf0:
+		nv_wr32(priv, 0x418f00, 0x00000400);
+		break;
+	default:
+		nv_wr32(priv, 0x418f00, 0x00000000);
+		break;
+	}
+	nv_wr32(priv, 0x418f08, 0x00000000);
+	nv_wr32(priv, 0x418f20, 0x00000000);
+	nv_wr32(priv, 0x418f24, 0x00000000);
+	switch (nv_device(priv)->chipset) {
+	case 0xf0:
+		nv_wr32(priv, 0x418e00, 0x00000000);
+		break;
+	default:
+		nv_wr32(priv, 0x418e00, 0x00000060);
+		break;
+	}
+	nv_wr32(priv, 0x418e08, 0x00000000);
+	nv_wr32(priv, 0x418e1c, 0x00000000);
+	nv_wr32(priv, 0x418e20, 0x00000000);
+	nv_wr32(priv, 0x41900c, 0x00000000);
+	nv_wr32(priv, 0x419018, 0x00000000);
+}
+
+static void
+nve0_graph_init_tpc(struct nvc0_graph_priv *priv)
+{
+	nv_wr32(priv, 0x419d0c, 0x00000000);
+	nv_wr32(priv, 0x419d10, 0x00000014);
+	nv_wr32(priv, 0x419ab0, 0x00000000);
+	nv_wr32(priv, 0x419ac8, 0x00000000);
+	nv_wr32(priv, 0x419ab8, 0x000000e7);
+	switch (nv_device(priv)->chipset) {
+	case 0xf0:
+		nv_wr32(priv, 0x419aec, 0x00000000);
+		break;
+	default:
+		break;
+	}
+	nv_wr32(priv, 0x419abc, 0x00000000);
+	nv_wr32(priv, 0x419ac0, 0x00000000);
+	nv_wr32(priv, 0x419ab4, 0x00000000);
+	switch (nv_device(priv)->chipset) {
+	case 0xf0:
+		nv_wr32(priv, 0x419aa8, 0x00000000);
+		nv_wr32(priv, 0x419aac, 0x00000000);
+		break;
+	default:
+		break;
+	}
+	nv_wr32(priv, 0x41980c, 0x00000010);
+	nv_wr32(priv, 0x419844, 0x00000000);
+	nv_wr32(priv, 0x419850, 0x00000004);
+	nv_wr32(priv, 0x419854, 0x00000000);
+	nv_wr32(priv, 0x419858, 0x00000000);
+	nv_wr32(priv, 0x419c98, 0x00000000);
+	nv_wr32(priv, 0x419ca8, 0x00000000);
+	nv_wr32(priv, 0x419cb0, 0x01000000);
+	nv_wr32(priv, 0x419cb4, 0x00000000);
+	nv_wr32(priv, 0x419cb8, 0x00b08bea);
+	nv_wr32(priv, 0x419c84, 0x00010384);
+	switch (nv_device(priv)->chipset) {
+	case 0xf0:
+		nv_wr32(priv, 0x419cbc, 0x281b3646);
+		break;
+	default:
+		nv_wr32(priv, 0x419cbc, 0x28137646);
+		break;
+	}
+	nv_wr32(priv, 0x419cc0, 0x00000000);
+	nv_wr32(priv, 0x419cc4, 0x00000000);
+	switch (nv_device(priv)->chipset) {
+	case 0xf0:
+		nv_wr32(priv, 0x419c80, 0x00020230);
+		nv_wr32(priv, 0x419ccc, 0x00000000);
+		nv_wr32(priv, 0x419cd0, 0x00000000);
+		nv_wr32(priv, 0x419c0c, 0x00000000);
+		nv_wr32(priv, 0x419e00, 0x00000080);
+		break;
+	default:
+		nv_wr32(priv, 0x419c80, 0x00020232);
+		nv_wr32(priv, 0x419c0c, 0x00000000);
+		nv_wr32(priv, 0x419e00, 0x00000000);
+		break;
+	}
+	nv_wr32(priv, 0x419ea0, 0x00000000);
+	nv_wr32(priv, 0x419ee4, 0x00000000);
+	nv_wr32(priv, 0x419ea4, 0x00000100);
+	nv_wr32(priv, 0x419ea8, 0x00000000);
+	nv_wr32(priv, 0x419eb4, 0x00000000);
+	switch (nv_device(priv)->chipset) {
+	case 0xf0:
+		break;
+	default:
+		nv_wr32(priv, 0x419eb8, 0x00000000);
+		break;
+	}
+	nv_wr32(priv, 0x419ebc, 0x00000000);
+	nv_wr32(priv, 0x419ec0, 0x00000000);
+	nv_wr32(priv, 0x419edc, 0x00000000);
+	nv_wr32(priv, 0x419f00, 0x00000000);
+	switch (nv_device(priv)->chipset) {
+	case 0xf0:
+		nv_wr32(priv, 0x419ed0, 0x00003234);
+		nv_wr32(priv, 0x419f74, 0x00015555);
+		nv_wr32(priv, 0x419f80, 0x00000000);
+		nv_wr32(priv, 0x419f84, 0x00000000);
+		nv_wr32(priv, 0x419f88, 0x00000000);
+		nv_wr32(priv, 0x419f8c, 0x00000000);
+		break;
+	default:
+		nv_wr32(priv, 0x419f74, 0x00000555);
+		break;
+	}
+}
+
+static void
+nve0_graph_init_tpcunk(struct nvc0_graph_priv *priv)
+{
+	nv_wr32(priv, 0x41be04, 0x00000000);
+	nv_wr32(priv, 0x41be08, 0x00000004);
+	nv_wr32(priv, 0x41be0c, 0x00000000);
+	nv_wr32(priv, 0x41be10, 0x003b8bc7);
+	nv_wr32(priv, 0x41be14, 0x00000000);
+	nv_wr32(priv, 0x41be18, 0x00000000);
+	nv_wr32(priv, 0x41bfd4, 0x00800000);
+	nv_wr32(priv, 0x41bfdc, 0x00000000);
+	nv_wr32(priv, 0x41bff8, 0x00000000);
+	nv_wr32(priv, 0x41bffc, 0x00000000);
+	nv_wr32(priv, 0x41becc, 0x00000000);
+	nv_wr32(priv, 0x41bee8, 0x00000000);
+	nv_wr32(priv, 0x41beec, 0x00000000);
+}
+
+static void
+nve0_graph_init_unk88xx(struct nvc0_graph_priv *priv)
+{
+	nv_wr32(priv, 0x40880c, 0x00000000);
+	nv_wr32(priv, 0x408850, 0x00000004);
+	nv_wr32(priv, 0x408910, 0x00000000);
+	nv_wr32(priv, 0x408914, 0x00000000);
+	nv_wr32(priv, 0x408918, 0x00000000);
+	nv_wr32(priv, 0x40891c, 0x00000000);
+	nv_wr32(priv, 0x408920, 0x00000000);
+	nv_wr32(priv, 0x408924, 0x00000000);
+	nv_wr32(priv, 0x408928, 0x00000000);
+	nv_wr32(priv, 0x40892c, 0x00000000);
+	nv_wr32(priv, 0x408930, 0x00000000);
+	nv_wr32(priv, 0x408950, 0x00000000);
+	nv_wr32(priv, 0x408954, 0x0000ffff);
+	nv_wr32(priv, 0x408958, 0x00000034);
+	nv_wr32(priv, 0x408984, 0x00000000);
+	nv_wr32(priv, 0x408988, 0x08040201);
+	nv_wr32(priv, 0x40898c, 0x80402010);
+}
+
 static void
 nve0_graph_init_units(struct nvc0_graph_priv *priv)
 {
 	nv_wr32(priv, 0x409ffc, 0x00000000);
 	nv_wr32(priv, 0x409c14, 0x00003e3e);
-	nv_wr32(priv, 0x409c24, 0x000f0000);
+	switch (nv_device(priv)->chipset) {
+	case 0xe4:
+	case 0xe7:
+	case 0xe6:
+		nv_wr32(priv, 0x409c24, 0x000f0001);
+		break;
+	case 0xf0:
+		nv_wr32(priv, 0x409c24, 0x000f0000);
+		break;
+	}
 
 	nv_wr32(priv, 0x404000, 0xc0000000);
 	nv_wr32(priv, 0x404600, 0xc0000000);
 	nv_wr32(priv, 0x408030, 0xc0000000);
 	nv_wr32(priv, 0x404490, 0xc0000000);
 	nv_wr32(priv, 0x406018, 0xc0000000);
-	nv_wr32(priv, 0x407020, 0xc0000000);
+	nv_wr32(priv, 0x407020, 0x40000000);
 	nv_wr32(priv, 0x405840, 0xc0000000);
 	nv_wr32(priv, 0x405844, 0x00ffffff);
 
@@ -770,6 +1055,19 @@ nve0_graph_init(struct nouveau_object *object)
 
 	nve0_graph_init_obj418880(priv);
 	nve0_graph_init_regs(priv);
+	nve0_graph_init_unk40xx(priv);
+	nve0_graph_init_unk44xx(priv);
+	nve0_graph_init_unk78xx(priv);
+	nve0_graph_init_unk60xx(priv);
+	nve0_graph_init_unk64xx(priv);
+	nve0_graph_init_unk58xx(priv);
+	nve0_graph_init_unk80xx(priv);
+	nve0_graph_init_unk70xx(priv);
+	nve0_graph_init_unk5bxx(priv);
+	nve0_graph_init_gpc(priv);
+	nve0_graph_init_tpc(priv);
+	nve0_graph_init_tpcunk(priv);
+	nve0_graph_init_unk88xx(priv);
 	nve0_graph_init_gpc_0(priv);
 
 	nv_wr32(priv, 0x400500, 0x00010001);

+ 0 - 8
drivers/gpu/drm/nouveau/core/engine/mpeg/nv50.c

@@ -125,13 +125,6 @@ nv50_mpeg_cclass = {
  * PMPEG engine/subdev functions
  ******************************************************************************/
 
-int
-nv50_mpeg_tlb_flush(struct nouveau_engine *engine)
-{
-	nv50_vm_flush_engine(&engine->base, 0x08);
-	return 0;
-}
-
 void
 nv50_mpeg_intr(struct nouveau_subdev *subdev)
 {
@@ -191,7 +184,6 @@ nv50_mpeg_ctor(struct nouveau_object *parent, struct nouveau_object *engine,
 	nv_subdev(priv)->intr = nv50_vpe_intr;
 	nv_engine(priv)->cclass = &nv50_mpeg_cclass;
 	nv_engine(priv)->sclass = nv50_mpeg_sclass;
-	nv_engine(priv)->tlb_flush = nv50_mpeg_tlb_flush;
 	return 0;
 }
 

+ 0 - 1
drivers/gpu/drm/nouveau/core/engine/mpeg/nv84.c

@@ -88,7 +88,6 @@ nv84_mpeg_ctor(struct nouveau_object *parent, struct nouveau_object *engine,
 	nv_subdev(priv)->intr = nv50_mpeg_intr;
 	nv_engine(priv)->cclass = &nv84_mpeg_cclass;
 	nv_engine(priv)->sclass = nv84_mpeg_sclass;
-	nv_engine(priv)->tlb_flush = nv50_mpeg_tlb_flush;
 	return 0;
 }
 

+ 1 - 2
drivers/gpu/drm/nouveau/core/engine/ppp/nvc0.c

@@ -22,8 +22,7 @@
  * Authors: Maarten Lankhorst
  */
 
-#include <core/falcon.h>
-
+#include <engine/falcon.h>
 #include <engine/ppp.h>
 
 struct nvc0_ppp_priv {

+ 13 - 14
drivers/gpu/drm/nouveau/core/engine/vp/nv84.c

@@ -19,24 +19,19 @@
  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
  * OTHER DEALINGS IN THE SOFTWARE.
  *
- * Authors: Ben Skeggs
+ * Authors: Ben Skeggs, Ilia Mirkin
  */
 
-#include <core/engctx.h>
-#include <core/class.h>
-
+#include <engine/xtensa.h>
 #include <engine/vp.h>
 
-struct nv84_vp_priv {
-	struct nouveau_engine base;
-};
-
 /*******************************************************************************
  * VP object classes
  ******************************************************************************/
 
 static struct nouveau_oclass
 nv84_vp_sclass[] = {
+	{ 0x7476, &nouveau_object_ofuncs },
 	{},
 };
 
@@ -48,7 +43,7 @@ static struct nouveau_oclass
 nv84_vp_cclass = {
 	.handle = NV_ENGCTX(VP, 0x84),
 	.ofuncs = &(struct nouveau_ofuncs) {
-		.ctor = _nouveau_engctx_ctor,
+		.ctor = _nouveau_xtensa_engctx_ctor,
 		.dtor = _nouveau_engctx_dtor,
 		.init = _nouveau_engctx_init,
 		.fini = _nouveau_engctx_fini,
@@ -66,10 +61,10 @@ nv84_vp_ctor(struct nouveau_object *parent, struct nouveau_object *engine,
 	     struct nouveau_oclass *oclass, void *data, u32 size,
 	     struct nouveau_object **pobject)
 {
-	struct nv84_vp_priv *priv;
+	struct nouveau_xtensa *priv;
 	int ret;
 
-	ret = nouveau_engine_create(parent, engine, oclass, true,
+	ret = nouveau_xtensa_create(parent, engine, oclass, 0xf000, true,
 				    "PVP", "vp", &priv);
 	*pobject = nv_object(priv);
 	if (ret)
@@ -78,6 +73,8 @@ nv84_vp_ctor(struct nouveau_object *parent, struct nouveau_object *engine,
 	nv_subdev(priv)->unit = 0x01020000;
 	nv_engine(priv)->cclass = &nv84_vp_cclass;
 	nv_engine(priv)->sclass = nv84_vp_sclass;
+	priv->fifo_val = 0x111;
+	priv->unkd28 = 0x9c544;
 	return 0;
 }
 
@@ -86,8 +83,10 @@ nv84_vp_oclass = {
 	.handle = NV_ENGINE(VP, 0x84),
 	.ofuncs = &(struct nouveau_ofuncs) {
 		.ctor = nv84_vp_ctor,
-		.dtor = _nouveau_engine_dtor,
-		.init = _nouveau_engine_init,
-		.fini = _nouveau_engine_fini,
+		.dtor = _nouveau_xtensa_dtor,
+		.init = _nouveau_xtensa_init,
+		.fini = _nouveau_xtensa_fini,
+		.rd32 = _nouveau_xtensa_rd32,
+		.wr32 = _nouveau_xtensa_wr32,
 	},
 };

+ 93 - 0
drivers/gpu/drm/nouveau/core/engine/vp/nv98.c

@@ -0,0 +1,93 @@
+/*
+ * Copyright 2012 Red Hat Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: Ben Skeggs
+ */
+
+#include <core/engctx.h>
+#include <core/class.h>
+
+#include <engine/vp.h>
+
+struct nv98_vp_priv {
+	struct nouveau_engine base;
+};
+
+/*******************************************************************************
+ * VP object classes
+ ******************************************************************************/
+
+static struct nouveau_oclass
+nv98_vp_sclass[] = {
+	{},
+};
+
+/*******************************************************************************
+ * PVP context
+ ******************************************************************************/
+
+static struct nouveau_oclass
+nv98_vp_cclass = {
+	.handle = NV_ENGCTX(VP, 0x98),
+	.ofuncs = &(struct nouveau_ofuncs) {
+		.ctor = _nouveau_engctx_ctor,
+		.dtor = _nouveau_engctx_dtor,
+		.init = _nouveau_engctx_init,
+		.fini = _nouveau_engctx_fini,
+		.rd32 = _nouveau_engctx_rd32,
+		.wr32 = _nouveau_engctx_wr32,
+	},
+};
+
+/*******************************************************************************
+ * PVP engine/subdev functions
+ ******************************************************************************/
+
+static int
+nv98_vp_ctor(struct nouveau_object *parent, struct nouveau_object *engine,
+	     struct nouveau_oclass *oclass, void *data, u32 size,
+	     struct nouveau_object **pobject)
+{
+	struct nv98_vp_priv *priv;
+	int ret;
+
+	ret = nouveau_engine_create(parent, engine, oclass, true,
+				    "PVP", "vp", &priv);
+	*pobject = nv_object(priv);
+	if (ret)
+		return ret;
+
+	nv_subdev(priv)->unit = 0x01020000;
+	nv_engine(priv)->cclass = &nv98_vp_cclass;
+	nv_engine(priv)->sclass = nv98_vp_sclass;
+	return 0;
+}
+
+struct nouveau_oclass
+nv98_vp_oclass = {
+	.handle = NV_ENGINE(VP, 0x98),
+	.ofuncs = &(struct nouveau_ofuncs) {
+		.ctor = nv98_vp_ctor,
+		.dtor = _nouveau_engine_dtor,
+		.init = _nouveau_engine_init,
+		.fini = _nouveau_engine_fini,
+	},
+};

+ 1 - 2
drivers/gpu/drm/nouveau/core/engine/vp/nvc0.c

@@ -22,8 +22,7 @@
  * Authors: Maarten Lankhorst
  */
 
-#include <core/falcon.h>
-
+#include <engine/falcon.h>
 #include <engine/vp.h>
 
 struct nvc0_vp_priv {

+ 1 - 2
drivers/gpu/drm/nouveau/core/engine/vp/nve0.c

@@ -22,8 +22,7 @@
  * Authors: Ben Skeggs
  */
 
-#include <core/falcon.h>
-
+#include <engine/falcon.h>
 #include <engine/vp.h>
 
 struct nve0_vp_priv {

+ 170 - 0
drivers/gpu/drm/nouveau/core/engine/xtensa.c

@@ -0,0 +1,170 @@
+/*
+ * Copyright 2013 Ilia Mirkin
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include <engine/xtensa.h>
+
+u32
+_nouveau_xtensa_rd32(struct nouveau_object *object, u64 addr)
+{
+	struct nouveau_xtensa *xtensa = (void *)object;
+	return nv_rd32(xtensa, xtensa->addr + addr);
+}
+
+void
+_nouveau_xtensa_wr32(struct nouveau_object *object, u64 addr, u32 data)
+{
+	struct nouveau_xtensa *xtensa = (void *)object;
+	nv_wr32(xtensa, xtensa->addr + addr, data);
+}
+
+int
+_nouveau_xtensa_engctx_ctor(struct nouveau_object *parent,
+			    struct nouveau_object *engine,
+			    struct nouveau_oclass *oclass, void *data, u32 size,
+			    struct nouveau_object **pobject)
+{
+	struct nouveau_engctx *engctx;
+	int ret;
+
+	ret = nouveau_engctx_create(parent, engine, oclass, NULL,
+				    0x10000, 0x1000,
+				    NVOBJ_FLAG_ZERO_ALLOC, &engctx);
+	*pobject = nv_object(engctx);
+	return ret;
+}
+
+void
+_nouveau_xtensa_intr(struct nouveau_subdev *subdev)
+{
+	struct nouveau_xtensa *xtensa = (void *)subdev;
+	u32 unk104 = nv_ro32(xtensa, 0xd04);
+	u32 intr = nv_ro32(xtensa, 0xc20);
+	u32 chan = nv_ro32(xtensa, 0xc28);
+	u32 unk10c = nv_ro32(xtensa, 0xd0c);
+
+	if (intr & 0x10)
+		nv_warn(xtensa, "Watchdog interrupt, engine hung.\n");
+	nv_wo32(xtensa, 0xc20, intr);
+	intr = nv_ro32(xtensa, 0xc20);
+	if (unk104 == 0x10001 && unk10c == 0x200 && chan && !intr) {
+		nv_debug(xtensa, "Enabling FIFO_CTRL\n");
+		nv_mask(xtensa, xtensa->addr + 0xd94, 0, xtensa->fifo_val);
+	}
+}
+
+int
+nouveau_xtensa_create_(struct nouveau_object *parent,
+		       struct nouveau_object *engine,
+		       struct nouveau_oclass *oclass, u32 addr, bool enable,
+		       const char *iname, const char *fname,
+		       int length, void **pobject)
+{
+	struct nouveau_xtensa *xtensa;
+	int ret;
+
+	ret = nouveau_engine_create_(parent, engine, oclass, enable, iname,
+				     fname, length, pobject);
+	xtensa = *pobject;
+	if (ret)
+		return ret;
+
+	nv_subdev(xtensa)->intr = _nouveau_xtensa_intr;
+
+	xtensa->addr = addr;
+
+	return 0;
+}
+
+int
+_nouveau_xtensa_init(struct nouveau_object *object)
+{
+	struct nouveau_device *device = nv_device(object);
+	struct nouveau_xtensa *xtensa = (void *)object;
+	const struct firmware *fw;
+	char name[32];
+	int i, ret;
+	u32 tmp;
+
+	ret = nouveau_engine_init(&xtensa->base);
+	if (ret)
+		return ret;
+
+	if (!xtensa->gpu_fw) {
+		snprintf(name, sizeof(name), "nouveau/nv84_xuc%03x",
+			 xtensa->addr >> 12);
+
+		ret = request_firmware(&fw, name, &device->pdev->dev);
+		if (ret) {
+			nv_warn(xtensa, "unable to load firmware %s\n", name);
+			return ret;
+		}
+
+		ret = nouveau_gpuobj_new(object, NULL, fw->size, 0x1000, 0,
+					 &xtensa->gpu_fw);
+		if (ret) {
+			release_firmware(fw);
+			return ret;
+		}
+
+		nv_debug(xtensa, "Loading firmware to address: 0x%llx\n",
+			 xtensa->gpu_fw->addr);
+
+		for (i = 0; i < fw->size / 4; i++)
+			nv_wo32(xtensa->gpu_fw, i * 4, *((u32 *)fw->data + i));
+		release_firmware(fw);
+	}
+
+	nv_wo32(xtensa, 0xd10, 0x1fffffff); /* ?? */
+	nv_wo32(xtensa, 0xd08, 0x0fffffff); /* ?? */
+
+	nv_wo32(xtensa, 0xd28, xtensa->unkd28); /* ?? */
+	nv_wo32(xtensa, 0xc20, 0x3f); /* INTR */
+	nv_wo32(xtensa, 0xd84, 0x3f); /* INTR_EN */
+
+	nv_wo32(xtensa, 0xcc0, xtensa->gpu_fw->addr >> 8); /* XT_REGION_BASE */
+	nv_wo32(xtensa, 0xcc4, 0x1c); /* XT_REGION_SETUP */
+	nv_wo32(xtensa, 0xcc8, xtensa->gpu_fw->size >> 8); /* XT_REGION_LIMIT */
+
+	tmp = nv_rd32(xtensa, 0x0);
+	nv_wo32(xtensa, 0xde0, tmp); /* SCRATCH_H2X */
+
+	nv_wo32(xtensa, 0xce8, 0xf); /* XT_REGION_SETUP */
+
+	nv_wo32(xtensa, 0xc20, 0x3f); /* INTR */
+	nv_wo32(xtensa, 0xd84, 0x3f); /* INTR_EN */
+
+	return 0;
+}
+
+int
+_nouveau_xtensa_fini(struct nouveau_object *object, bool suspend)
+{
+	struct nouveau_xtensa *xtensa = (void *)object;
+
+	nv_wo32(xtensa, 0xd84, 0); /* INTR_EN */
+	nv_wo32(xtensa, 0xd94, 0); /* FIFO_CTRL */
+
+	if (!suspend)
+		nouveau_gpuobj_ref(NULL, &xtensa->gpu_fw);
+
+	return nouveau_engine_fini(&xtensa->base, suspend);
+}

+ 3 - 2
drivers/gpu/drm/nouveau/core/include/core/device.h

@@ -17,8 +17,7 @@ enum nv_subdev_type {
 	NVDEV_SUBDEV_DEVINIT,
 	NVDEV_SUBDEV_GPIO,
 	NVDEV_SUBDEV_I2C,
-	NVDEV_SUBDEV_CLOCK,
-	NVDEV_SUBDEV_DEVINIT_LAST = NVDEV_SUBDEV_CLOCK,
+	NVDEV_SUBDEV_DEVINIT_LAST = NVDEV_SUBDEV_I2C,
 
 	/* This grouping of subdevs are initialised right after they've
 	 * been created, and are allowed to assume any subdevs in the
@@ -35,6 +34,7 @@ enum nv_subdev_type {
 	NVDEV_SUBDEV_VM,
 	NVDEV_SUBDEV_BAR,
 	NVDEV_SUBDEV_VOLT,
+	NVDEV_SUBDEV_CLOCK,
 	NVDEV_SUBDEV_THERM,
 
 	NVDEV_ENGINE_DMAOBJ,
@@ -49,6 +49,7 @@ enum nv_subdev_type {
 	NVDEV_ENGINE_PPP,
 	NVDEV_ENGINE_COPY0,
 	NVDEV_ENGINE_COPY1,
+	NVDEV_ENGINE_COPY2,
 	NVDEV_ENGINE_UNK1C1,
 	NVDEV_ENGINE_VENC,
 	NVDEV_ENGINE_DISP,

+ 0 - 2
drivers/gpu/drm/nouveau/core/include/core/mm.h

@@ -15,8 +15,6 @@ struct nouveau_mm {
 	struct list_head nodes;
 	struct list_head free;
 
-	struct mutex mutex;
-
 	u32 block_size;
 	int heap_nodes;
 };

+ 1 - 0
drivers/gpu/drm/nouveau/core/include/engine/bsp.h

@@ -2,6 +2,7 @@
 #define __NOUVEAU_BSP_H__
 
 extern struct nouveau_oclass nv84_bsp_oclass;
+extern struct nouveau_oclass nv98_bsp_oclass;
 extern struct nouveau_oclass nvc0_bsp_oclass;
 extern struct nouveau_oclass nve0_bsp_oclass;
 

+ 1 - 0
drivers/gpu/drm/nouveau/core/include/engine/copy.h

@@ -8,5 +8,6 @@ extern struct nouveau_oclass nvc0_copy0_oclass;
 extern struct nouveau_oclass nvc0_copy1_oclass;
 extern struct nouveau_oclass nve0_copy0_oclass;
 extern struct nouveau_oclass nve0_copy1_oclass;
+extern struct nouveau_oclass nve0_copy2_oclass;
 
 #endif

+ 0 - 0
drivers/gpu/drm/nouveau/core/include/core/falcon.h → drivers/gpu/drm/nouveau/core/include/engine/falcon.h


+ 0 - 1
drivers/gpu/drm/nouveau/core/include/engine/mpeg.h

@@ -54,7 +54,6 @@ extern struct nouveau_ofuncs nv50_mpeg_ofuncs;
 int  nv50_mpeg_context_ctor(struct nouveau_object *, struct nouveau_object *,
 			    struct nouveau_oclass *, void *, u32,
 			    struct nouveau_object **);
-int  nv50_mpeg_tlb_flush(struct nouveau_engine *);
 void nv50_mpeg_intr(struct nouveau_subdev *);
 int  nv50_mpeg_init(struct nouveau_object *);
 

+ 1 - 0
drivers/gpu/drm/nouveau/core/include/engine/vp.h

@@ -2,6 +2,7 @@
 #define __NOUVEAU_VP_H__
 
 extern struct nouveau_oclass nv84_vp_oclass;
+extern struct nouveau_oclass nv98_vp_oclass;
 extern struct nouveau_oclass nvc0_vp_oclass;
 extern struct nouveau_oclass nve0_vp_oclass;
 

+ 38 - 0
drivers/gpu/drm/nouveau/core/include/engine/xtensa.h

@@ -0,0 +1,38 @@
+#ifndef __NOUVEAU_XTENSA_H__
+#define __NOUVEAU_XTENSA_H__
+
+#include <core/engine.h>
+#include <core/engctx.h>
+#include <core/gpuobj.h>
+
+struct nouveau_xtensa {
+	struct nouveau_engine base;
+
+	u32 addr;
+	struct nouveau_gpuobj *gpu_fw;
+	u32 fifo_val;
+	u32 unkd28;
+};
+
+#define nouveau_xtensa_create(p,e,c,b,d,i,f,r)				\
+	nouveau_xtensa_create_((p), (e), (c), (b), (d), (i), (f),	\
+			       sizeof(**r),(void **)r)
+
+int _nouveau_xtensa_engctx_ctor(struct nouveau_object *,
+				struct nouveau_object *,
+				struct nouveau_oclass *, void *, u32,
+				struct nouveau_object **);
+
+void _nouveau_xtensa_intr(struct nouveau_subdev *);
+int nouveau_xtensa_create_(struct nouveau_object *,
+			   struct nouveau_object *,
+			   struct nouveau_oclass *, u32, bool,
+			   const char *, const char *,
+			   int, void **);
+#define _nouveau_xtensa_dtor _nouveau_engine_dtor
+int _nouveau_xtensa_init(struct nouveau_object *);
+int _nouveau_xtensa_fini(struct nouveau_object *, bool);
+u32  _nouveau_xtensa_rd32(struct nouveau_object *, u64);
+void _nouveau_xtensa_wr32(struct nouveau_object *, u64, u32);
+
+#endif

+ 0 - 2
drivers/gpu/drm/nouveau/core/include/subdev/clock.h

@@ -10,8 +10,6 @@ struct nvbios_pll;
 struct nouveau_clock {
 	struct nouveau_subdev base;
 
-	int (*pll_set)(struct nouveau_clock *, u32 type, u32 freq);
-
 	/*XXX: die, these are here *only* to support the completely
 	 *     bat-shit insane what-was-nouveau_hw.c code
 	 */

+ 15 - 6
drivers/gpu/drm/nouveau/core/include/subdev/devinit.h

@@ -8,6 +8,8 @@ struct nouveau_devinit {
 	struct nouveau_subdev base;
 	bool post;
 	void (*meminit)(struct nouveau_devinit *);
+	int  (*pll_set)(struct nouveau_devinit *, u32 type, u32 freq);
+
 };
 
 static inline struct nouveau_devinit *
@@ -20,11 +22,20 @@ nouveau_devinit(void *obj)
 	nouveau_devinit_create_((p), (e), (o), sizeof(**d), (void **)d)
 #define nouveau_devinit_destroy(p)                                             \
 	nouveau_subdev_destroy(&(p)->base)
+#define nouveau_devinit_init(p) ({                                             \
+	struct nouveau_devinit *d = (p);                                       \
+	_nouveau_devinit_init(nv_object(d));                                   \
+})
+#define nouveau_devinit_fini(p,s) ({                                           \
+	struct nouveau_devinit *d = (p);                                       \
+	_nouveau_devinit_fini(nv_object(d), (s));                              \
+})
 
 int nouveau_devinit_create_(struct nouveau_object *, struct nouveau_object *,
 			    struct nouveau_oclass *, int, void **);
-int nouveau_devinit_init(struct nouveau_devinit *);
-int nouveau_devinit_fini(struct nouveau_devinit *, bool suspend);
+#define _nouveau_devinit_dtor _nouveau_subdev_dtor
+int _nouveau_devinit_init(struct nouveau_object *);
+int _nouveau_devinit_fini(struct nouveau_object *, bool suspend);
 
 extern struct nouveau_oclass nv04_devinit_oclass;
 extern struct nouveau_oclass nv05_devinit_oclass;
@@ -32,9 +43,7 @@ extern struct nouveau_oclass nv10_devinit_oclass;
 extern struct nouveau_oclass nv1a_devinit_oclass;
 extern struct nouveau_oclass nv20_devinit_oclass;
 extern struct nouveau_oclass nv50_devinit_oclass;
-
-void nv04_devinit_dtor(struct nouveau_object *);
-int  nv04_devinit_init(struct nouveau_object *);
-int  nv04_devinit_fini(struct nouveau_object *, bool);
+extern struct nouveau_oclass nva3_devinit_oclass;
+extern struct nouveau_oclass nvc0_devinit_oclass;
 
 #endif

+ 25 - 70
drivers/gpu/drm/nouveau/core/include/subdev/fb.h

@@ -53,31 +53,7 @@ struct nouveau_fb {
 
 	bool (*memtype_valid)(struct nouveau_fb *, u32 memtype);
 
-	struct {
-		enum {
-			NV_MEM_TYPE_UNKNOWN = 0,
-			NV_MEM_TYPE_STOLEN,
-			NV_MEM_TYPE_SGRAM,
-			NV_MEM_TYPE_SDRAM,
-			NV_MEM_TYPE_DDR1,
-			NV_MEM_TYPE_DDR2,
-			NV_MEM_TYPE_DDR3,
-			NV_MEM_TYPE_GDDR2,
-			NV_MEM_TYPE_GDDR3,
-			NV_MEM_TYPE_GDDR4,
-			NV_MEM_TYPE_GDDR5
-		} type;
-		u64 stolen;
-		u64 size;
-
-		int ranks;
-		int parts;
-
-		int  (*init)(struct nouveau_fb *);
-		int  (*get)(struct nouveau_fb *, u64 size, u32 align,
-			    u32 size_nc, u32 type, struct nouveau_mem **);
-		void (*put)(struct nouveau_fb *, struct nouveau_mem **);
-	} ram;
+	struct nouveau_ram *ram;
 
 	struct nouveau_mm vram;
 	struct nouveau_mm tags;
@@ -102,18 +78,6 @@ nouveau_fb(void *obj)
 	return (void *)nv_device(obj)->subdev[NVDEV_SUBDEV_FB];
 }
 
-#define nouveau_fb_create(p,e,c,d)                                             \
-	nouveau_subdev_create((p), (e), (c), 0, "PFB", "fb", (d))
-int  nouveau_fb_preinit(struct nouveau_fb *);
-void nouveau_fb_destroy(struct nouveau_fb *);
-int  nouveau_fb_init(struct nouveau_fb *);
-#define nouveau_fb_fini(p,s)                                                   \
-	nouveau_subdev_fini(&(p)->base, (s))
-
-void _nouveau_fb_dtor(struct nouveau_object *);
-int  _nouveau_fb_init(struct nouveau_object *);
-#define _nouveau_fb_fini _nouveau_subdev_fini
-
 extern struct nouveau_oclass nv04_fb_oclass;
 extern struct nouveau_oclass nv10_fb_oclass;
 extern struct nouveau_oclass nv1a_fb_oclass;
@@ -132,40 +96,31 @@ extern struct nouveau_oclass nv4e_fb_oclass;
 extern struct nouveau_oclass nv50_fb_oclass;
 extern struct nouveau_oclass nvc0_fb_oclass;
 
-struct nouveau_bios;
-int  nouveau_fb_bios_memtype(struct nouveau_bios *);
-
-bool nv04_fb_memtype_valid(struct nouveau_fb *, u32 memtype);
-
-void nv10_fb_tile_init(struct nouveau_fb *, int i, u32 addr, u32 size,
-		       u32 pitch, u32 flags, struct nouveau_fb_tile *);
-void nv10_fb_tile_fini(struct nouveau_fb *, int i, struct nouveau_fb_tile *);
-void nv10_fb_tile_prog(struct nouveau_fb *, int, struct nouveau_fb_tile *);
-
-int  nv20_fb_vram_init(struct nouveau_fb *);
-void nv20_fb_tile_init(struct nouveau_fb *, int i, u32 addr, u32 size,
-		       u32 pitch, u32 flags, struct nouveau_fb_tile *);
-void nv20_fb_tile_fini(struct nouveau_fb *, int i, struct nouveau_fb_tile *);
-void nv20_fb_tile_prog(struct nouveau_fb *, int, struct nouveau_fb_tile *);
-
-int  nv30_fb_init(struct nouveau_object *);
-void nv30_fb_tile_init(struct nouveau_fb *, int i, u32 addr, u32 size,
-		       u32 pitch, u32 flags, struct nouveau_fb_tile *);
-
-void nv40_fb_tile_comp(struct nouveau_fb *, int i, u32 size, u32 flags,
-		       struct nouveau_fb_tile *);
-
-int  nv41_fb_vram_init(struct nouveau_fb *);
-int  nv41_fb_init(struct nouveau_object *);
-void nv41_fb_tile_prog(struct nouveau_fb *, int, struct nouveau_fb_tile *);
-
-int  nv44_fb_vram_init(struct nouveau_fb *);
-int  nv44_fb_init(struct nouveau_object *);
-void nv44_fb_tile_prog(struct nouveau_fb *, int, struct nouveau_fb_tile *);
+struct nouveau_ram {
+	struct nouveau_object base;
+	enum {
+		NV_MEM_TYPE_UNKNOWN = 0,
+		NV_MEM_TYPE_STOLEN,
+		NV_MEM_TYPE_SGRAM,
+		NV_MEM_TYPE_SDRAM,
+		NV_MEM_TYPE_DDR1,
+		NV_MEM_TYPE_DDR2,
+		NV_MEM_TYPE_DDR3,
+		NV_MEM_TYPE_GDDR2,
+		NV_MEM_TYPE_GDDR3,
+		NV_MEM_TYPE_GDDR4,
+		NV_MEM_TYPE_GDDR5
+	} type;
+	u64 stolen;
+	u64 size;
+	u32 tags;
 
-void nv46_fb_tile_init(struct nouveau_fb *, int i, u32 addr, u32 size,
-		       u32 pitch, u32 flags, struct nouveau_fb_tile *);
+	int ranks;
+	int parts;
 
-void nv50_fb_vram_del(struct nouveau_fb *, struct nouveau_mem **);
+	int  (*get)(struct nouveau_fb *, u64 size, u32 align,
+		    u32 size_nc, u32 type, struct nouveau_mem **);
+	void (*put)(struct nouveau_fb *, struct nouveau_mem **);
+};
 
 #endif

+ 1 - 4
drivers/gpu/drm/nouveau/core/include/subdev/vm.h

@@ -58,7 +58,7 @@ struct nouveau_vm {
 	int refcount;
 
 	struct list_head pgd_list;
-	atomic_t engref[64]; //NVDEV_SUBDEV_NR];
+	atomic_t engref[NVDEV_SUBDEV_NR];
 
 	struct nouveau_vm_pgt *pgt;
 	u32 fpde;
@@ -117,9 +117,6 @@ int  nv04_vm_create(struct nouveau_vmmgr *, u64, u64, u64,
 		    struct nouveau_vm **);
 void nv04_vmmgr_dtor(struct nouveau_object *);
 
-void nv50_vm_flush_engine(struct nouveau_subdev *, int engine);
-void nvc0_vm_flush_engine(struct nouveau_subdev *, u64 addr, int type);
-
 /* nouveau_vm.c */
 int  nouveau_vm_create(struct nouveau_vmmgr *, u64 offset, u64 length,
 		       u64 mm_offset, u32 block, struct nouveau_vm **);

+ 9 - 4
drivers/gpu/drm/nouveau/core/subdev/bar/nv50.c

@@ -53,7 +53,6 @@ nv50_bar_kmap(struct nouveau_bar *bar, struct nouveau_mem *mem,
 		return ret;
 
 	nouveau_vm_map(vma, mem);
-	nv50_vm_flush_engine(nv_subdev(bar), 6);
 	return 0;
 }
 
@@ -69,7 +68,6 @@ nv50_bar_umap(struct nouveau_bar *bar, struct nouveau_mem *mem,
 		return ret;
 
 	nouveau_vm_map(vma, mem);
-	nv50_vm_flush_engine(nv_subdev(bar), 6);
 	return 0;
 }
 
@@ -77,7 +75,6 @@ static void
 nv50_bar_unmap(struct nouveau_bar *bar, struct nouveau_vma *vma)
 {
 	nouveau_vm_unmap(vma);
-	nv50_vm_flush_engine(nv_subdev(bar), 6);
 	nouveau_vm_put(vma);
 }
 
@@ -147,6 +144,8 @@ nv50_bar_ctor(struct nouveau_object *parent, struct nouveau_object *engine,
 	if (ret)
 		return ret;
 
+	atomic_inc(&vm->engref[NVDEV_SUBDEV_BAR]);
+
 	ret = nouveau_gpuobj_new(nv_object(priv), heap,
 				 ((limit-- - start) >> 12) * 8, 0x1000,
 				 NVOBJ_FLAG_ZERO_ALLOC, &vm->pgt[0].obj[0]);
@@ -179,6 +178,8 @@ nv50_bar_ctor(struct nouveau_object *parent, struct nouveau_object *engine,
 	if (ret)
 		return ret;
 
+	atomic_inc(&vm->engref[NVDEV_SUBDEV_BAR]);
+
 	ret = nouveau_vm_ref(vm, &priv->bar1_vm, priv->pgd);
 	nouveau_vm_ref(NULL, &vm, NULL);
 	if (ret)
@@ -237,7 +238,11 @@ nv50_bar_init(struct nouveau_object *object)
 
 	nv_mask(priv, 0x000200, 0x00000100, 0x00000000);
 	nv_mask(priv, 0x000200, 0x00000100, 0x00000100);
-	nv50_vm_flush_engine(nv_subdev(priv), 6);
+	nv_wr32(priv, 0x100c80, 0x00060001);
+	if (!nv_wait(priv, 0x100c80, 0x00000001, 0x00000000)) {
+		nv_error(priv, "vm flush timeout\n");
+		return -EBUSY;
+	}
 
 	nv_wr32(priv, 0x001704, 0x00000000 | priv->mem->addr >> 12);
 	nv_wr32(priv, 0x001704, 0x40000000 | priv->mem->addr >> 12);

+ 4 - 6
drivers/gpu/drm/nouveau/core/subdev/bar/nvc0.c

@@ -51,7 +51,6 @@ nvc0_bar_kmap(struct nouveau_bar *bar, struct nouveau_mem *mem,
 		return ret;
 
 	nouveau_vm_map(vma, mem);
-	nvc0_vm_flush_engine(nv_subdev(bar), priv->bar[0].pgd->addr, 5);
 	return 0;
 }
 
@@ -68,18 +67,13 @@ nvc0_bar_umap(struct nouveau_bar *bar, struct nouveau_mem *mem,
 		return ret;
 
 	nouveau_vm_map(vma, mem);
-	nvc0_vm_flush_engine(nv_subdev(bar), priv->bar[1].pgd->addr, 5);
 	return 0;
 }
 
 static void
 nvc0_bar_unmap(struct nouveau_bar *bar, struct nouveau_vma *vma)
 {
-	struct nvc0_bar_priv *priv = (void *)bar;
-	int i = !(vma->vm == priv->bar[0].vm);
-
 	nouveau_vm_unmap(vma);
-	nvc0_vm_flush_engine(nv_subdev(bar), priv->bar[i].pgd->addr, 5);
 	nouveau_vm_put(vma);
 }
 
@@ -116,6 +110,8 @@ nvc0_bar_ctor(struct nouveau_object *parent, struct nouveau_object *engine,
 	if (ret)
 		return ret;
 
+	atomic_inc(&vm->engref[NVDEV_SUBDEV_BAR]);
+
 	ret = nouveau_gpuobj_new(nv_object(priv), NULL,
 				 (pci_resource_len(pdev, 3) >> 12) * 8,
 				 0x1000, NVOBJ_FLAG_ZERO_ALLOC,
@@ -150,6 +146,8 @@ nvc0_bar_ctor(struct nouveau_object *parent, struct nouveau_object *engine,
 	if (ret)
 		return ret;
 
+	atomic_inc(&vm->engref[NVDEV_SUBDEV_BAR]);
+
 	ret = nouveau_vm_ref(vm, &priv->bar[1].vm, priv->bar[1].pgd);
 	nouveau_vm_ref(NULL, &vm, NULL);
 	if (ret)

+ 5 - 1
drivers/gpu/drm/nouveau/core/subdev/bios/base.c

@@ -85,11 +85,15 @@ static void
 nouveau_bios_shadow_pramin(struct nouveau_bios *bios)
 {
 	struct nouveau_device *device = nv_device(bios);
+	u64 addr = 0;
 	u32 bar0 = 0;
 	int i;
 
 	if (device->card_type >= NV_50) {
-		u64 addr = (u64)(nv_rd32(bios, 0x619f04) & 0xffffff00) << 8;
+		if (  device->card_type < NV_C0 ||
+		    !(nv_rd32(bios, 0x022500) & 0x00000001))
+			addr = (u64)(nv_rd32(bios, 0x619f04) & 0xffffff00) << 8;
+
 		if (!addr) {
 			addr  = (u64)nv_rd32(bios, 0x001700) << 16;
 			addr += 0xf0000;

+ 3 - 4
drivers/gpu/drm/nouveau/core/subdev/bios/init.c

@@ -10,7 +10,6 @@
 #include <subdev/bios/gpio.h>
 #include <subdev/bios/init.h>
 #include <subdev/devinit.h>
-#include <subdev/clock.h>
 #include <subdev/i2c.h>
 #include <subdev/vga.h>
 #include <subdev/gpio.h>
@@ -300,9 +299,9 @@ init_wrauxr(struct nvbios_init *init, u32 addr, u8 data)
 static void
 init_prog_pll(struct nvbios_init *init, u32 id, u32 freq)
 {
-	struct nouveau_clock *clk = nouveau_clock(init->bios);
-	if (clk && clk->pll_set && init_exec(init)) {
-		int ret = clk->pll_set(clk, id, freq);
+	struct nouveau_devinit *devinit = nouveau_devinit(init->bios);
+	if (devinit->pll_set && init_exec(init)) {
+		int ret = devinit->pll_set(devinit, id, freq);
 		if (ret)
 			warn("failed to prog pll 0x%08x to %dkHz\n", id, freq);
 	}

+ 7 - 267
drivers/gpu/drm/nouveau/core/subdev/clock/nv04.c

@@ -22,9 +22,10 @@
  * Authors: Ben Skeggs
  */
 
-#include <subdev/clock.h>
 #include <subdev/bios.h>
 #include <subdev/bios/pll.h>
+#include <subdev/clock.h>
+#include <subdev/devinit/priv.h>
 
 #include "pll.h"
 
@@ -32,272 +33,12 @@ struct nv04_clock_priv {
 	struct nouveau_clock base;
 };
 
-static int
-powerctrl_1_shift(int chip_version, int reg)
-{
-	int shift = -4;
-
-	if (chip_version < 0x17 || chip_version == 0x1a || chip_version == 0x20)
-		return shift;
-
-	switch (reg) {
-	case 0x680520:
-		shift += 4;
-	case 0x680508:
-		shift += 4;
-	case 0x680504:
-		shift += 4;
-	case 0x680500:
-		shift += 4;
-	}
-
-	/*
-	 * the shift for vpll regs is only used for nv3x chips with a single
-	 * stage pll
-	 */
-	if (shift > 4 && (chip_version < 0x32 || chip_version == 0x35 ||
-			  chip_version == 0x36 || chip_version >= 0x40))
-		shift = -4;
-
-	return shift;
-}
-
-static void
-setPLL_single(struct nv04_clock_priv *priv, u32 reg,
-	      struct nouveau_pll_vals *pv)
-{
-	int chip_version = nouveau_bios(priv)->version.chip;
-	uint32_t oldpll = nv_rd32(priv, reg);
-	int oldN = (oldpll >> 8) & 0xff, oldM = oldpll & 0xff;
-	uint32_t pll = (oldpll & 0xfff80000) | pv->log2P << 16 | pv->NM1;
-	uint32_t saved_powerctrl_1 = 0;
-	int shift_powerctrl_1 = powerctrl_1_shift(chip_version, reg);
-
-	if (oldpll == pll)
-		return;	/* already set */
-
-	if (shift_powerctrl_1 >= 0) {
-		saved_powerctrl_1 = nv_rd32(priv, 0x001584);
-		nv_wr32(priv, 0x001584,
-			(saved_powerctrl_1 & ~(0xf << shift_powerctrl_1)) |
-			1 << shift_powerctrl_1);
-	}
-
-	if (oldM && pv->M1 && (oldN / oldM < pv->N1 / pv->M1))
-		/* upclock -- write new post divider first */
-		nv_wr32(priv, reg, pv->log2P << 16 | (oldpll & 0xffff));
-	else
-		/* downclock -- write new NM first */
-		nv_wr32(priv, reg, (oldpll & 0xffff0000) | pv->NM1);
-
-	if (chip_version < 0x17 && chip_version != 0x11)
-		/* wait a bit on older chips */
-		msleep(64);
-	nv_rd32(priv, reg);
-
-	/* then write the other half as well */
-	nv_wr32(priv, reg, pll);
-
-	if (shift_powerctrl_1 >= 0)
-		nv_wr32(priv, 0x001584, saved_powerctrl_1);
-}
-
-static uint32_t
-new_ramdac580(uint32_t reg1, bool ss, uint32_t ramdac580)
-{
-	bool head_a = (reg1 == 0x680508);
-
-	if (ss)	/* single stage pll mode */
-		ramdac580 |= head_a ? 0x00000100 : 0x10000000;
-	else
-		ramdac580 &= head_a ? 0xfffffeff : 0xefffffff;
-
-	return ramdac580;
-}
-
-static void
-setPLL_double_highregs(struct nv04_clock_priv *priv, u32 reg1,
-		       struct nouveau_pll_vals *pv)
-{
-	int chip_version = nouveau_bios(priv)->version.chip;
-	bool nv3035 = chip_version == 0x30 || chip_version == 0x35;
-	uint32_t reg2 = reg1 + ((reg1 == 0x680520) ? 0x5c : 0x70);
-	uint32_t oldpll1 = nv_rd32(priv, reg1);
-	uint32_t oldpll2 = !nv3035 ? nv_rd32(priv, reg2) : 0;
-	uint32_t pll1 = (oldpll1 & 0xfff80000) | pv->log2P << 16 | pv->NM1;
-	uint32_t pll2 = (oldpll2 & 0x7fff0000) | 1 << 31 | pv->NM2;
-	uint32_t oldramdac580 = 0, ramdac580 = 0;
-	bool single_stage = !pv->NM2 || pv->N2 == pv->M2;	/* nv41+ only */
-	uint32_t saved_powerctrl_1 = 0, savedc040 = 0;
-	int shift_powerctrl_1 = powerctrl_1_shift(chip_version, reg1);
-
-	/* model specific additions to generic pll1 and pll2 set up above */
-	if (nv3035) {
-		pll1 = (pll1 & 0xfcc7ffff) | (pv->N2 & 0x18) << 21 |
-		       (pv->N2 & 0x7) << 19 | 8 << 4 | (pv->M2 & 7) << 4;
-		pll2 = 0;
-	}
-	if (chip_version > 0x40 && reg1 >= 0x680508) { /* !nv40 */
-		oldramdac580 = nv_rd32(priv, 0x680580);
-		ramdac580 = new_ramdac580(reg1, single_stage, oldramdac580);
-		if (oldramdac580 != ramdac580)
-			oldpll1 = ~0;	/* force mismatch */
-		if (single_stage)
-			/* magic value used by nvidia in single stage mode */
-			pll2 |= 0x011f;
-	}
-	if (chip_version > 0x70)
-		/* magic bits set by the blob (but not the bios) on g71-73 */
-		pll1 = (pll1 & 0x7fffffff) | (single_stage ? 0x4 : 0xc) << 28;
-
-	if (oldpll1 == pll1 && oldpll2 == pll2)
-		return;	/* already set */
-
-	if (shift_powerctrl_1 >= 0) {
-		saved_powerctrl_1 = nv_rd32(priv, 0x001584);
-		nv_wr32(priv, 0x001584,
-			(saved_powerctrl_1 & ~(0xf << shift_powerctrl_1)) |
-			1 << shift_powerctrl_1);
-	}
-
-	if (chip_version >= 0x40) {
-		int shift_c040 = 14;
-
-		switch (reg1) {
-		case 0x680504:
-			shift_c040 += 2;
-		case 0x680500:
-			shift_c040 += 2;
-		case 0x680520:
-			shift_c040 += 2;
-		case 0x680508:
-			shift_c040 += 2;
-		}
-
-		savedc040 = nv_rd32(priv, 0xc040);
-		if (shift_c040 != 14)
-			nv_wr32(priv, 0xc040, savedc040 & ~(3 << shift_c040));
-	}
-
-	if (oldramdac580 != ramdac580)
-		nv_wr32(priv, 0x680580, ramdac580);
-
-	if (!nv3035)
-		nv_wr32(priv, reg2, pll2);
-	nv_wr32(priv, reg1, pll1);
-
-	if (shift_powerctrl_1 >= 0)
-		nv_wr32(priv, 0x001584, saved_powerctrl_1);
-	if (chip_version >= 0x40)
-		nv_wr32(priv, 0xc040, savedc040);
-}
-
-static void
-setPLL_double_lowregs(struct nv04_clock_priv *priv, u32 NMNMreg,
-		      struct nouveau_pll_vals *pv)
-{
-	/* When setting PLLs, there is a merry game of disabling and enabling
-	 * various bits of hardware during the process. This function is a
-	 * synthesis of six nv4x traces, nearly each card doing a subtly
-	 * different thing. With luck all the necessary bits for each card are
-	 * combined herein. Without luck it deviates from each card's formula
-	 * so as to not work on any :)
-	 */
-
-	uint32_t Preg = NMNMreg - 4;
-	bool mpll = Preg == 0x4020;
-	uint32_t oldPval = nv_rd32(priv, Preg);
-	uint32_t NMNM = pv->NM2 << 16 | pv->NM1;
-	uint32_t Pval = (oldPval & (mpll ? ~(0x77 << 16) : ~(7 << 16))) |
-			0xc << 28 | pv->log2P << 16;
-	uint32_t saved4600 = 0;
-	/* some cards have different maskc040s */
-	uint32_t maskc040 = ~(3 << 14), savedc040;
-	bool single_stage = !pv->NM2 || pv->N2 == pv->M2;
-
-	if (nv_rd32(priv, NMNMreg) == NMNM && (oldPval & 0xc0070000) == Pval)
-		return;
-
-	if (Preg == 0x4000)
-		maskc040 = ~0x333;
-	if (Preg == 0x4058)
-		maskc040 = ~(0xc << 24);
-
-	if (mpll) {
-		struct nvbios_pll info;
-		uint8_t Pval2;
-
-		if (nvbios_pll_parse(nouveau_bios(priv), Preg, &info))
-			return;
-
-		Pval2 = pv->log2P + info.bias_p;
-		if (Pval2 > info.max_p)
-			Pval2 = info.max_p;
-		Pval |= 1 << 28 | Pval2 << 20;
-
-		saved4600 = nv_rd32(priv, 0x4600);
-		nv_wr32(priv, 0x4600, saved4600 | 8 << 28);
-	}
-	if (single_stage)
-		Pval |= mpll ? 1 << 12 : 1 << 8;
-
-	nv_wr32(priv, Preg, oldPval | 1 << 28);
-	nv_wr32(priv, Preg, Pval & ~(4 << 28));
-	if (mpll) {
-		Pval |= 8 << 20;
-		nv_wr32(priv, 0x4020, Pval & ~(0xc << 28));
-		nv_wr32(priv, 0x4038, Pval & ~(0xc << 28));
-	}
-
-	savedc040 = nv_rd32(priv, 0xc040);
-	nv_wr32(priv, 0xc040, savedc040 & maskc040);
-
-	nv_wr32(priv, NMNMreg, NMNM);
-	if (NMNMreg == 0x4024)
-		nv_wr32(priv, 0x403c, NMNM);
-
-	nv_wr32(priv, Preg, Pval);
-	if (mpll) {
-		Pval &= ~(8 << 20);
-		nv_wr32(priv, 0x4020, Pval);
-		nv_wr32(priv, 0x4038, Pval);
-		nv_wr32(priv, 0x4600, saved4600);
-	}
-
-	nv_wr32(priv, 0xc040, savedc040);
-
-	if (mpll) {
-		nv_wr32(priv, 0x4020, Pval & ~(1 << 28));
-		nv_wr32(priv, 0x4038, Pval & ~(1 << 28));
-	}
-}
-
-int
-nv04_clock_pll_set(struct nouveau_clock *clk, u32 type, u32 freq)
-{
-	struct nv04_clock_priv *priv = (void *)clk;
-	struct nouveau_pll_vals pv;
-	struct nvbios_pll info;
-	int ret;
-
-	ret = nvbios_pll_parse(nouveau_bios(priv), type > 0x405c ?
-			       type : type - 4, &info);
-	if (ret)
-		return ret;
-
-	ret = clk->pll_calc(clk, &info, freq, &pv);
-	if (!ret)
-		return ret;
-
-	return clk->pll_prog(clk, type, &pv);
-}
-
 int
 nv04_clock_pll_calc(struct nouveau_clock *clock, struct nvbios_pll *info,
 		    int clk, struct nouveau_pll_vals *pv)
 {
 	int N1, M1, N2, M2, P;
-	int ret = nv04_pll_calc(clock, info, clk, &N1, &M1, &N2, &M2, &P);
+	int ret = nv04_pll_calc(nv_subdev(clock), info, clk, &N1, &M1, &N2, &M2, &P);
 	if (ret) {
 		pv->refclk = info->refclk;
 		pv->N1 = N1;
@@ -313,17 +54,17 @@ int
 nv04_clock_pll_prog(struct nouveau_clock *clk, u32 reg1,
 		    struct nouveau_pll_vals *pv)
 {
-	struct nv04_clock_priv *priv = (void *)clk;
+	struct nouveau_devinit *devinit = nouveau_devinit(clk);
 	int cv = nouveau_bios(clk)->version.chip;
 
 	if (cv == 0x30 || cv == 0x31 || cv == 0x35 || cv == 0x36 ||
 	    cv >= 0x40) {
 		if (reg1 > 0x405c)
-			setPLL_double_highregs(priv, reg1, pv);
+			setPLL_double_highregs(devinit, reg1, pv);
 		else
-			setPLL_double_lowregs(priv, reg1, pv);
+			setPLL_double_lowregs(devinit, reg1, pv);
 	} else
-		setPLL_single(priv, reg1, pv);
+		setPLL_single(devinit, reg1, pv);
 
 	return 0;
 }
@@ -341,7 +82,6 @@ nv04_clock_ctor(struct nouveau_object *parent, struct nouveau_object *engine,
 	if (ret)
 		return ret;
 
-	priv->base.pll_set = nv04_clock_pll_set;
 	priv->base.pll_calc = nv04_clock_pll_calc;
 	priv->base.pll_prog = nv04_clock_pll_prog;
 	return 0;

+ 0 - 1
drivers/gpu/drm/nouveau/core/subdev/clock/nv40.c

@@ -41,7 +41,6 @@ nv40_clock_ctor(struct nouveau_object *parent, struct nouveau_object *engine,
 	if (ret)
 		return ret;
 
-	priv->base.pll_set = nv04_clock_pll_set;
 	priv->base.pll_calc = nv04_clock_pll_calc;
 	priv->base.pll_prog = nv04_clock_pll_prog;
 	return 0;

+ 0 - 45
drivers/gpu/drm/nouveau/core/subdev/clock/nv50.c

@@ -32,50 +32,6 @@ struct nv50_clock_priv {
 	struct nouveau_clock base;
 };
 
-static int
-nv50_clock_pll_set(struct nouveau_clock *clk, u32 type, u32 freq)
-{
-	struct nv50_clock_priv *priv = (void *)clk;
-	struct nouveau_bios *bios = nouveau_bios(priv);
-	struct nvbios_pll info;
-	int N1, M1, N2, M2, P;
-	int ret;
-
-	ret = nvbios_pll_parse(bios, type, &info);
-	if (ret) {
-		nv_error(clk, "failed to retrieve pll data, %d\n", ret);
-		return ret;
-	}
-
-	ret = nv04_pll_calc(clk, &info, freq, &N1, &M1, &N2, &M2, &P);
-	if (!ret) {
-		nv_error(clk, "failed pll calculation\n");
-		return ret;
-	}
-
-	switch (info.type) {
-	case PLL_VPLL0:
-	case PLL_VPLL1:
-		nv_wr32(priv, info.reg + 0, 0x10000611);
-		nv_mask(priv, info.reg + 4, 0x00ff00ff, (M1 << 16) | N1);
-		nv_mask(priv, info.reg + 8, 0x7fff00ff, (P  << 28) |
-							(M2 << 16) | N2);
-		break;
-	case PLL_MEMORY:
-		nv_mask(priv, info.reg + 0, 0x01ff0000, (P << 22) |
-						        (info.bias_p << 19) |
-							(P << 16));
-		nv_wr32(priv, info.reg + 4, (N1 << 8) | M1);
-		break;
-	default:
-		nv_mask(priv, info.reg + 0, 0x00070000, (P << 16));
-		nv_wr32(priv, info.reg + 4, (N1 << 8) | M1);
-		break;
-	}
-
-	return 0;
-}
-
 static int
 nv50_clock_ctor(struct nouveau_object *parent, struct nouveau_object *engine,
 		struct nouveau_oclass *oclass, void *data, u32 size,
@@ -89,7 +45,6 @@ nv50_clock_ctor(struct nouveau_object *parent, struct nouveau_object *engine,
 	if (ret)
 		return ret;
 
-	priv->base.pll_set = nv50_clock_pll_set;
 	priv->base.pll_calc = nv04_clock_pll_calc;
 	return 0;
 }

+ 1 - 36
drivers/gpu/drm/nouveau/core/subdev/clock/nva3.c

@@ -32,47 +32,13 @@ struct nva3_clock_priv {
 	struct nouveau_clock base;
 };
 
-static int
-nva3_clock_pll_set(struct nouveau_clock *clk, u32 type, u32 freq)
-{
-	struct nva3_clock_priv *priv = (void *)clk;
-	struct nouveau_bios *bios = nouveau_bios(priv);
-	struct nvbios_pll info;
-	int N, fN, M, P;
-	int ret;
-
-	ret = nvbios_pll_parse(bios, type, &info);
-	if (ret)
-		return ret;
-
-	ret = nva3_pll_calc(clk, &info, freq, &N, &fN, &M, &P);
-	if (ret < 0)
-		return ret;
-
-	switch (info.type) {
-	case PLL_VPLL0:
-	case PLL_VPLL1:
-		nv_wr32(priv, info.reg + 0, 0x50000610);
-		nv_mask(priv, info.reg + 4, 0x003fffff,
-					    (P << 16) | (M << 8) | N);
-		nv_wr32(priv, info.reg + 8, fN);
-		break;
-	default:
-		nv_warn(priv, "0x%08x/%dKhz unimplemented\n", type, freq);
-		ret = -EINVAL;
-		break;
-	}
-
-	return ret;
-}
-
 int
 nva3_clock_pll_calc(struct nouveau_clock *clock, struct nvbios_pll *info,
 		    int clk, struct nouveau_pll_vals *pv)
 {
 	int ret, N, M, P;
 
-	ret = nva3_pll_calc(clock, info, clk, &N, NULL, &M, &P);
+	ret = nva3_pll_calc(nv_subdev(clock), info, clk, &N, NULL, &M, &P);
 
 	if (ret > 0) {
 		pv->refclk = info->refclk;
@@ -97,7 +63,6 @@ nva3_clock_ctor(struct nouveau_object *parent, struct nouveau_object *engine,
 	if (ret)
 		return ret;
 
-	priv->base.pll_set = nva3_clock_pll_set;
 	priv->base.pll_calc = nva3_clock_pll_calc;
 	return 0;
 }

+ 0 - 36
drivers/gpu/drm/nouveau/core/subdev/clock/nvc0.c

@@ -32,41 +32,6 @@ struct nvc0_clock_priv {
 	struct nouveau_clock base;
 };
 
-static int
-nvc0_clock_pll_set(struct nouveau_clock *clk, u32 type, u32 freq)
-{
-	struct nvc0_clock_priv *priv = (void *)clk;
-	struct nouveau_bios *bios = nouveau_bios(priv);
-	struct nvbios_pll info;
-	int N, fN, M, P;
-	int ret;
-
-	ret = nvbios_pll_parse(bios, type, &info);
-	if (ret)
-		return ret;
-
-	ret = nva3_pll_calc(clk, &info, freq, &N, &fN, &M, &P);
-	if (ret < 0)
-		return ret;
-
-	switch (info.type) {
-	case PLL_VPLL0:
-	case PLL_VPLL1:
-	case PLL_VPLL2:
-	case PLL_VPLL3:
-		nv_mask(priv, info.reg + 0x0c, 0x00000000, 0x00000100);
-		nv_wr32(priv, info.reg + 0x04, (P << 16) | (N << 8) | M);
-		nv_wr32(priv, info.reg + 0x10, fN << 16);
-		break;
-	default:
-		nv_warn(priv, "0x%08x/%dKhz unimplemented\n", type, freq);
-		ret = -EINVAL;
-		break;
-	}
-
-	return ret;
-}
-
 static int
 nvc0_clock_ctor(struct nouveau_object *parent, struct nouveau_object *engine,
 		struct nouveau_oclass *oclass, void *data, u32 size,
@@ -80,7 +45,6 @@ nvc0_clock_ctor(struct nouveau_object *parent, struct nouveau_object *engine,
 	if (ret)
 		return ret;
 
-	priv->base.pll_set = nvc0_clock_pll_set;
 	priv->base.pll_calc = nva3_clock_pll_calc;
 	return 0;
 }

+ 2 - 2
drivers/gpu/drm/nouveau/core/subdev/clock/pll.h

@@ -1,9 +1,9 @@
 #ifndef __NOUVEAU_PLL_H__
 #define __NOUVEAU_PLL_H__
 
-int nv04_pll_calc(struct nouveau_clock *, struct nvbios_pll *, u32 freq,
+int nv04_pll_calc(struct nouveau_subdev *, struct nvbios_pll *, u32 freq,
 		  int *N1, int *M1, int *N2, int *M2, int *P);
-int nva3_pll_calc(struct nouveau_clock *, struct nvbios_pll *, u32 freq,
+int nva3_pll_calc(struct nouveau_subdev *, struct nvbios_pll *, u32 freq,
 		  int *N, int *fN, int *M, int *P);
 
 #endif

+ 8 - 9
drivers/gpu/drm/nouveau/core/subdev/clock/pllnv04.c

@@ -21,14 +21,13 @@
  * SOFTWARE.
  */
 
-#include <subdev/clock.h>
 #include <subdev/bios.h>
 #include <subdev/bios/pll.h>
 
 #include "pll.h"
 
 static int
-getMNP_single(struct nouveau_clock *clock, struct nvbios_pll *info, int clk,
+getMNP_single(struct nouveau_subdev *subdev, struct nvbios_pll *info, int clk,
 	      int *pN, int *pM, int *pP)
 {
 	/* Find M, N and P for a single stage PLL
@@ -39,7 +38,7 @@ getMNP_single(struct nouveau_clock *clock, struct nvbios_pll *info, int clk,
 	 * "clk" parameter in kHz
 	 * returns calculated clock
 	 */
-	int cv = nouveau_bios(clock)->version.chip;
+	int cv = nouveau_bios(subdev)->version.chip;
 	int minvco = info->vco1.min_freq, maxvco = info->vco1.max_freq;
 	int minM = info->vco1.min_m, maxM = info->vco1.max_m;
 	int minN = info->vco1.min_n, maxN = info->vco1.max_n;
@@ -124,7 +123,7 @@ getMNP_single(struct nouveau_clock *clock, struct nvbios_pll *info, int clk,
 }
 
 static int
-getMNP_double(struct nouveau_clock *clock, struct nvbios_pll *info, int clk,
+getMNP_double(struct nouveau_subdev *subdev, struct nvbios_pll *info, int clk,
 	      int *pN1, int *pM1, int *pN2, int *pM2, int *pP)
 {
 	/* Find M, N and P for a two stage PLL
@@ -135,7 +134,7 @@ getMNP_double(struct nouveau_clock *clock, struct nvbios_pll *info, int clk,
 	 * "clk" parameter in kHz
 	 * returns calculated clock
 	 */
-	int chip_version = nouveau_bios(clock)->version.chip;
+	int chip_version = nouveau_bios(subdev)->version.chip;
 	int minvco1 = info->vco1.min_freq, maxvco1 = info->vco1.max_freq;
 	int minvco2 = info->vco2.min_freq, maxvco2 = info->vco2.max_freq;
 	int minU1 = info->vco1.min_inputfreq, minU2 = info->vco2.min_inputfreq;
@@ -223,20 +222,20 @@ getMNP_double(struct nouveau_clock *clock, struct nvbios_pll *info, int clk,
 }
 
 int
-nv04_pll_calc(struct nouveau_clock *clk, struct nvbios_pll *info, u32 freq,
+nv04_pll_calc(struct nouveau_subdev *subdev, struct nvbios_pll *info, u32 freq,
 	      int *N1, int *M1, int *N2, int *M2, int *P)
 {
 	int ret;
 
 	if (!info->vco2.max_freq) {
-		ret = getMNP_single(clk, info, freq, N1, M1, P);
+		ret = getMNP_single(subdev, info, freq, N1, M1, P);
 		*N2 = 1;
 		*M2 = 1;
 	} else {
-		ret = getMNP_double(clk, info, freq, N1, M1, N2, M2, P);
+		ret = getMNP_double(subdev, info, freq, N1, M1, N2, M2, P);
 	}
 
 	if (!ret)
-		nv_error(clk, "unable to compute acceptable pll values\n");
+		nv_error(subdev, "unable to compute acceptable pll values\n");
 	return ret;
 }

+ 13 - 5
drivers/gpu/drm/nouveau/core/subdev/clock/pllnva3.c

@@ -29,7 +29,7 @@
 #include "pll.h"
 
 int
-nva3_pll_calc(struct nouveau_clock *clock, struct nvbios_pll *info,
+nva3_pll_calc(struct nouveau_subdev *subdev, struct nvbios_pll *info,
 	      u32 freq, int *pN, int *pfN, int *pM, int *P)
 {
 	u32 best_err = ~0, err;
@@ -50,8 +50,15 @@ nva3_pll_calc(struct nouveau_clock *clock, struct nvbios_pll *info,
 		u32 tmp = freq * *P * M;
 		N  = tmp / info->refclk;
 		fN = tmp % info->refclk;
-		if (!pfN && fN >= info->refclk / 2)
-			N++;
+
+		if (!pfN) {
+			if (fN >= info->refclk / 2)
+				N++;
+		} else {
+			if (fN <  info->refclk / 2)
+				N--;
+			fN = tmp - (N * info->refclk);
+		}
 
 		if (N < info->vco1.min_n)
 			continue;
@@ -66,13 +73,14 @@ nva3_pll_calc(struct nouveau_clock *clock, struct nvbios_pll *info,
 		}
 
 		if (pfN) {
-			*pfN = (((fN << 13) / info->refclk) - 4096) & 0xffff;
+			*pfN = ((fN << 13) + info->refclk / 2) / info->refclk;
+			*pfN = (*pfN - 4096) & 0xffff;
 			return freq;
 		}
 	}
 
 	if (unlikely(best_err == ~0)) {
-		nv_error(clock, "unable to find matching pll values\n");
+		nv_error(subdev, "unable to find matching pll values\n");
 		return -EINVAL;
 	}
 

+ 13 - 10
drivers/gpu/drm/nouveau/core/subdev/devinit/base.c

@@ -29,18 +29,10 @@
 #include <subdev/bios/init.h>
 
 int
-nouveau_devinit_init(struct nouveau_devinit *devinit)
+_nouveau_devinit_fini(struct nouveau_object *object, bool suspend)
 {
-	int ret = nouveau_subdev_init(&devinit->base);
-	if (ret)
-		return ret;
+	struct nouveau_devinit *devinit = (void *)object;
 
-	return nvbios_init(&devinit->base, devinit->post);
-}
-
-int
-nouveau_devinit_fini(struct nouveau_devinit *devinit, bool suspend)
-{
 	/* force full reinit on resume */
 	if (suspend)
 		devinit->post = true;
@@ -48,6 +40,17 @@ nouveau_devinit_fini(struct nouveau_devinit *devinit, bool suspend)
 	return nouveau_subdev_fini(&devinit->base, suspend);
 }
 
+int
+_nouveau_devinit_init(struct nouveau_object *object)
+{
+	struct nouveau_devinit *devinit = (void *)object;
+	int ret = nouveau_subdev_init(&devinit->base);
+	if (ret)
+		return ret;
+
+	return nvbios_init(&devinit->base, devinit->post);
+}
+
 int
 nouveau_devinit_create_(struct nouveau_object *parent,
 			struct nouveau_object *engine,

+ 304 - 25
drivers/gpu/drm/nouveau/core/subdev/devinit/nv04.c

@@ -24,10 +24,10 @@
  *
  */
 
-#include <subdev/devinit.h>
 #include <subdev/vga.h>
 
 #include "fbmem.h"
+#include "priv.h"
 
 struct nv04_devinit_priv {
 	struct nouveau_devinit base;
@@ -111,33 +111,298 @@ nv04_devinit_meminit(struct nouveau_devinit *devinit)
 }
 
 static int
-nv04_devinit_ctor(struct nouveau_object *parent, struct nouveau_object *engine,
-		  struct nouveau_oclass *oclass, void *data, u32 size,
-		  struct nouveau_object **pobject)
+powerctrl_1_shift(int chip_version, int reg)
 {
-	struct nv04_devinit_priv *priv;
+	int shift = -4;
+
+	if (chip_version < 0x17 || chip_version == 0x1a || chip_version == 0x20)
+		return shift;
+
+	switch (reg) {
+	case 0x680520:
+		shift += 4;
+	case 0x680508:
+		shift += 4;
+	case 0x680504:
+		shift += 4;
+	case 0x680500:
+		shift += 4;
+	}
+
+	/*
+	 * the shift for vpll regs is only used for nv3x chips with a single
+	 * stage pll
+	 */
+	if (shift > 4 && (chip_version < 0x32 || chip_version == 0x35 ||
+			  chip_version == 0x36 || chip_version >= 0x40))
+		shift = -4;
+
+	return shift;
+}
+
+void
+setPLL_single(struct nouveau_devinit *devinit, u32 reg,
+	      struct nouveau_pll_vals *pv)
+{
+	int chip_version = nouveau_bios(devinit)->version.chip;
+	uint32_t oldpll = nv_rd32(devinit, reg);
+	int oldN = (oldpll >> 8) & 0xff, oldM = oldpll & 0xff;
+	uint32_t pll = (oldpll & 0xfff80000) | pv->log2P << 16 | pv->NM1;
+	uint32_t saved_powerctrl_1 = 0;
+	int shift_powerctrl_1 = powerctrl_1_shift(chip_version, reg);
+
+	if (oldpll == pll)
+		return;	/* already set */
+
+	if (shift_powerctrl_1 >= 0) {
+		saved_powerctrl_1 = nv_rd32(devinit, 0x001584);
+		nv_wr32(devinit, 0x001584,
+			(saved_powerctrl_1 & ~(0xf << shift_powerctrl_1)) |
+			1 << shift_powerctrl_1);
+	}
+
+	if (oldM && pv->M1 && (oldN / oldM < pv->N1 / pv->M1))
+		/* upclock -- write new post divider first */
+		nv_wr32(devinit, reg, pv->log2P << 16 | (oldpll & 0xffff));
+	else
+		/* downclock -- write new NM first */
+		nv_wr32(devinit, reg, (oldpll & 0xffff0000) | pv->NM1);
+
+	if (chip_version < 0x17 && chip_version != 0x11)
+		/* wait a bit on older chips */
+		msleep(64);
+	nv_rd32(devinit, reg);
+
+	/* then write the other half as well */
+	nv_wr32(devinit, reg, pll);
+
+	if (shift_powerctrl_1 >= 0)
+		nv_wr32(devinit, 0x001584, saved_powerctrl_1);
+}
+
+static uint32_t
+new_ramdac580(uint32_t reg1, bool ss, uint32_t ramdac580)
+{
+	bool head_a = (reg1 == 0x680508);
+
+	if (ss)	/* single stage pll mode */
+		ramdac580 |= head_a ? 0x00000100 : 0x10000000;
+	else
+		ramdac580 &= head_a ? 0xfffffeff : 0xefffffff;
+
+	return ramdac580;
+}
+
+void
+setPLL_double_highregs(struct nouveau_devinit *devinit, u32 reg1,
+		       struct nouveau_pll_vals *pv)
+{
+	int chip_version = nouveau_bios(devinit)->version.chip;
+	bool nv3035 = chip_version == 0x30 || chip_version == 0x35;
+	uint32_t reg2 = reg1 + ((reg1 == 0x680520) ? 0x5c : 0x70);
+	uint32_t oldpll1 = nv_rd32(devinit, reg1);
+	uint32_t oldpll2 = !nv3035 ? nv_rd32(devinit, reg2) : 0;
+	uint32_t pll1 = (oldpll1 & 0xfff80000) | pv->log2P << 16 | pv->NM1;
+	uint32_t pll2 = (oldpll2 & 0x7fff0000) | 1 << 31 | pv->NM2;
+	uint32_t oldramdac580 = 0, ramdac580 = 0;
+	bool single_stage = !pv->NM2 || pv->N2 == pv->M2;	/* nv41+ only */
+	uint32_t saved_powerctrl_1 = 0, savedc040 = 0;
+	int shift_powerctrl_1 = powerctrl_1_shift(chip_version, reg1);
+
+	/* model specific additions to generic pll1 and pll2 set up above */
+	if (nv3035) {
+		pll1 = (pll1 & 0xfcc7ffff) | (pv->N2 & 0x18) << 21 |
+		       (pv->N2 & 0x7) << 19 | 8 << 4 | (pv->M2 & 7) << 4;
+		pll2 = 0;
+	}
+	if (chip_version > 0x40 && reg1 >= 0x680508) { /* !nv40 */
+		oldramdac580 = nv_rd32(devinit, 0x680580);
+		ramdac580 = new_ramdac580(reg1, single_stage, oldramdac580);
+		if (oldramdac580 != ramdac580)
+			oldpll1 = ~0;	/* force mismatch */
+		if (single_stage)
+			/* magic value used by nvidia in single stage mode */
+			pll2 |= 0x011f;
+	}
+	if (chip_version > 0x70)
+		/* magic bits set by the blob (but not the bios) on g71-73 */
+		pll1 = (pll1 & 0x7fffffff) | (single_stage ? 0x4 : 0xc) << 28;
+
+	if (oldpll1 == pll1 && oldpll2 == pll2)
+		return;	/* already set */
+
+	if (shift_powerctrl_1 >= 0) {
+		saved_powerctrl_1 = nv_rd32(devinit, 0x001584);
+		nv_wr32(devinit, 0x001584,
+			(saved_powerctrl_1 & ~(0xf << shift_powerctrl_1)) |
+			1 << shift_powerctrl_1);
+	}
+
+	if (chip_version >= 0x40) {
+		int shift_c040 = 14;
+
+		switch (reg1) {
+		case 0x680504:
+			shift_c040 += 2;
+		case 0x680500:
+			shift_c040 += 2;
+		case 0x680520:
+			shift_c040 += 2;
+		case 0x680508:
+			shift_c040 += 2;
+		}
+
+		savedc040 = nv_rd32(devinit, 0xc040);
+		if (shift_c040 != 14)
+			nv_wr32(devinit, 0xc040, savedc040 & ~(3 << shift_c040));
+	}
+
+	if (oldramdac580 != ramdac580)
+		nv_wr32(devinit, 0x680580, ramdac580);
+
+	if (!nv3035)
+		nv_wr32(devinit, reg2, pll2);
+	nv_wr32(devinit, reg1, pll1);
+
+	if (shift_powerctrl_1 >= 0)
+		nv_wr32(devinit, 0x001584, saved_powerctrl_1);
+	if (chip_version >= 0x40)
+		nv_wr32(devinit, 0xc040, savedc040);
+}
+
+void
+setPLL_double_lowregs(struct nouveau_devinit *devinit, u32 NMNMreg,
+		      struct nouveau_pll_vals *pv)
+{
+	/* When setting PLLs, there is a merry game of disabling and enabling
+	 * various bits of hardware during the process. This function is a
+	 * synthesis of six nv4x traces, nearly each card doing a subtly
+	 * different thing. With luck all the necessary bits for each card are
+	 * combined herein. Without luck it deviates from each card's formula
+	 * so as to not work on any :)
+	 */
+
+	uint32_t Preg = NMNMreg - 4;
+	bool mpll = Preg == 0x4020;
+	uint32_t oldPval = nv_rd32(devinit, Preg);
+	uint32_t NMNM = pv->NM2 << 16 | pv->NM1;
+	uint32_t Pval = (oldPval & (mpll ? ~(0x77 << 16) : ~(7 << 16))) |
+			0xc << 28 | pv->log2P << 16;
+	uint32_t saved4600 = 0;
+	/* some cards have different maskc040s */
+	uint32_t maskc040 = ~(3 << 14), savedc040;
+	bool single_stage = !pv->NM2 || pv->N2 == pv->M2;
+
+	if (nv_rd32(devinit, NMNMreg) == NMNM && (oldPval & 0xc0070000) == Pval)
+		return;
+
+	if (Preg == 0x4000)
+		maskc040 = ~0x333;
+	if (Preg == 0x4058)
+		maskc040 = ~(0xc << 24);
+
+	if (mpll) {
+		struct nvbios_pll info;
+		uint8_t Pval2;
+
+		if (nvbios_pll_parse(nouveau_bios(devinit), Preg, &info))
+			return;
+
+		Pval2 = pv->log2P + info.bias_p;
+		if (Pval2 > info.max_p)
+			Pval2 = info.max_p;
+		Pval |= 1 << 28 | Pval2 << 20;
+
+		saved4600 = nv_rd32(devinit, 0x4600);
+		nv_wr32(devinit, 0x4600, saved4600 | 8 << 28);
+	}
+	if (single_stage)
+		Pval |= mpll ? 1 << 12 : 1 << 8;
+
+	nv_wr32(devinit, Preg, oldPval | 1 << 28);
+	nv_wr32(devinit, Preg, Pval & ~(4 << 28));
+	if (mpll) {
+		Pval |= 8 << 20;
+		nv_wr32(devinit, 0x4020, Pval & ~(0xc << 28));
+		nv_wr32(devinit, 0x4038, Pval & ~(0xc << 28));
+	}
+
+	savedc040 = nv_rd32(devinit, 0xc040);
+	nv_wr32(devinit, 0xc040, savedc040 & maskc040);
+
+	nv_wr32(devinit, NMNMreg, NMNM);
+	if (NMNMreg == 0x4024)
+		nv_wr32(devinit, 0x403c, NMNM);
+
+	nv_wr32(devinit, Preg, Pval);
+	if (mpll) {
+		Pval &= ~(8 << 20);
+		nv_wr32(devinit, 0x4020, Pval);
+		nv_wr32(devinit, 0x4038, Pval);
+		nv_wr32(devinit, 0x4600, saved4600);
+	}
+
+	nv_wr32(devinit, 0xc040, savedc040);
+
+	if (mpll) {
+		nv_wr32(devinit, 0x4020, Pval & ~(1 << 28));
+		nv_wr32(devinit, 0x4038, Pval & ~(1 << 28));
+	}
+}
+
+int
+nv04_devinit_pll_set(struct nouveau_devinit *devinit, u32 type, u32 freq)
+{
+	struct nouveau_bios *bios = nouveau_bios(devinit);
+	struct nouveau_pll_vals pv;
+	struct nvbios_pll info;
+	int cv = bios->version.chip;
+	int N1, M1, N2, M2, P;
 	int ret;
 
-	ret = nouveau_devinit_create(parent, engine, oclass, &priv);
-	*pobject = nv_object(priv);
+	ret = nvbios_pll_parse(bios, type > 0x405c ? type : type - 4, &info);
 	if (ret)
 		return ret;
 
-	priv->base.meminit = nv04_devinit_meminit;
-	priv->owner = -1;
+	ret = nv04_pll_calc(nv_subdev(devinit), &info, freq,
+			   &N1, &M1, &N2, &M2, &P);
+	if (!ret)
+		return -EINVAL;
+
+	pv.refclk = info.refclk;
+	pv.N1 = N1;
+	pv.M1 = M1;
+	pv.N2 = N2;
+	pv.M2 = M2;
+	pv.log2P = P;
+
+	if (cv == 0x30 || cv == 0x31 || cv == 0x35 || cv == 0x36 ||
+	    cv >= 0x40) {
+		if (type > 0x405c)
+			setPLL_double_highregs(devinit, type, &pv);
+		else
+			setPLL_double_lowregs(devinit, type, &pv);
+	} else
+		setPLL_single(devinit, type, &pv);
+
 	return 0;
 }
 
-void
-nv04_devinit_dtor(struct nouveau_object *object)
+int
+nv04_devinit_fini(struct nouveau_object *object, bool suspend)
 {
 	struct nv04_devinit_priv *priv = (void *)object;
 
-	/* restore vga owner saved at first init, and lock crtc regs  */
-	nv_wrvgaowner(priv, priv->owner);
-	nv_lockvgac(priv, true);
+	/* make i2c busses accessible */
+	nv_mask(priv, 0x000200, 0x00000001, 0x00000001);
 
-	nouveau_devinit_destroy(&priv->base);
+	/* unlock extended vga crtc regs, and unslave crtcs */
+	nv_lockvgac(priv, false);
+	if (priv->owner < 0)
+		priv->owner = nv_rdvgaowner(priv);
+	nv_wrvgaowner(priv, 0);
+
+	return nouveau_devinit_fini(&priv->base, suspend);
 }
 
 int
@@ -160,21 +425,35 @@ nv04_devinit_init(struct nouveau_object *object)
 	return nouveau_devinit_init(&priv->base);
 }
 
-int
-nv04_devinit_fini(struct nouveau_object *object, bool suspend)
+void
+nv04_devinit_dtor(struct nouveau_object *object)
 {
 	struct nv04_devinit_priv *priv = (void *)object;
 
-	/* make i2c busses accessible */
-	nv_mask(priv, 0x000200, 0x00000001, 0x00000001);
+	/* restore vga owner saved at first init, and lock crtc regs  */
+	nv_wrvgaowner(priv, priv->owner);
+	nv_lockvgac(priv, true);
 
-	/* unlock extended vga crtc regs, and unslave crtcs */
-	nv_lockvgac(priv, false);
-	if (priv->owner < 0)
-		priv->owner = nv_rdvgaowner(priv);
-	nv_wrvgaowner(priv, 0);
+	nouveau_devinit_destroy(&priv->base);
+}
 
-	return nouveau_devinit_fini(&priv->base, suspend);
+static int
+nv04_devinit_ctor(struct nouveau_object *parent, struct nouveau_object *engine,
+		  struct nouveau_oclass *oclass, void *data, u32 size,
+		  struct nouveau_object **pobject)
+{
+	struct nv04_devinit_priv *priv;
+	int ret;
+
+	ret = nouveau_devinit_create(parent, engine, oclass, &priv);
+	*pobject = nv_object(priv);
+	if (ret)
+		return ret;
+
+	priv->base.meminit = nv04_devinit_meminit;
+	priv->base.pll_set = nv04_devinit_pll_set;
+	priv->owner = -1;
+	return 0;
 }
 
 struct nouveau_oclass

+ 2 - 1
drivers/gpu/drm/nouveau/core/subdev/devinit/nv05.c

@@ -24,12 +24,12 @@
  *
  */
 
-#include <subdev/devinit.h>
 #include <subdev/bios.h>
 #include <subdev/bios/bmp.h>
 #include <subdev/vga.h>
 
 #include "fbmem.h"
+#include "priv.h"
 
 struct nv05_devinit_priv {
 	struct nouveau_devinit base;
@@ -144,6 +144,7 @@ nv05_devinit_ctor(struct nouveau_object *parent, struct nouveau_object *engine,
 		return ret;
 
 	priv->base.meminit = nv05_devinit_meminit;
+	priv->base.pll_set = nv04_devinit_pll_set;
 	return 0;
 }
 

+ 2 - 1
drivers/gpu/drm/nouveau/core/subdev/devinit/nv10.c

@@ -24,10 +24,10 @@
  *
  */
 
-#include <subdev/devinit.h>
 #include <subdev/vga.h>
 
 #include "fbmem.h"
+#include "priv.h"
 
 struct nv10_devinit_priv {
 	struct nouveau_devinit base;
@@ -109,6 +109,7 @@ nv10_devinit_ctor(struct nouveau_object *parent, struct nouveau_object *engine,
 		return ret;
 
 	priv->base.meminit = nv10_devinit_meminit;
+	priv->base.pll_set = nv04_devinit_pll_set;
 	return 0;
 }
 

+ 2 - 2
drivers/gpu/drm/nouveau/core/subdev/devinit/nv1a.c

@@ -22,8 +22,7 @@
  * Authors: Ben Skeggs
  */
 
-#include <subdev/devinit.h>
-#include <subdev/vga.h>
+#include "priv.h"
 
 struct nv1a_devinit_priv {
 	struct nouveau_devinit base;
@@ -43,6 +42,7 @@ nv1a_devinit_ctor(struct nouveau_object *parent, struct nouveau_object *engine,
 	if (ret)
 		return ret;
 
+	priv->base.pll_set = nv04_devinit_pll_set;
 	return 0;
 }
 

+ 2 - 3
drivers/gpu/drm/nouveau/core/subdev/devinit/nv20.c

@@ -24,9 +24,7 @@
  *
  */
 
-#include <subdev/devinit.h>
-#include <subdev/vga.h>
-
+#include "priv.h"
 #include "fbmem.h"
 
 struct nv20_devinit_priv {
@@ -81,6 +79,7 @@ nv20_devinit_ctor(struct nouveau_object *parent, struct nouveau_object *engine,
 		return ret;
 
 	priv->base.meminit = nv20_devinit_meminit;
+	priv->base.pll_set = nv04_devinit_pll_set;
 	return 0;
 }
 

+ 53 - 25
drivers/gpu/drm/nouveau/core/subdev/devinit/nv50.c

@@ -1,5 +1,5 @@
 /*
- * Copyright 2012 Red Hat Inc.
+ * Copyright 2013 Red Hat Inc.
  *
  * Permission is hereby granted, free of charge, to any person obtaining a
  * copy of this software and associated documentation files (the "Software"),
@@ -26,37 +26,55 @@
 #include <subdev/bios/dcb.h>
 #include <subdev/bios/disp.h>
 #include <subdev/bios/init.h>
-#include <subdev/devinit.h>
 #include <subdev/vga.h>
 
-struct nv50_devinit_priv {
-	struct nouveau_devinit base;
-};
+#include "priv.h"
 
 static int
-nv50_devinit_ctor(struct nouveau_object *parent, struct nouveau_object *engine,
-		  struct nouveau_oclass *oclass, void *data, u32 size,
-		  struct nouveau_object **pobject)
+nv50_devinit_pll_set(struct nouveau_devinit *devinit, u32 type, u32 freq)
 {
-	struct nv50_devinit_priv *priv;
+	struct nv50_devinit_priv *priv = (void *)devinit;
+	struct nouveau_bios *bios = nouveau_bios(priv);
+	struct nvbios_pll info;
+	int N1, M1, N2, M2, P;
 	int ret;
 
-	ret = nouveau_devinit_create(parent, engine, oclass, &priv);
-	*pobject = nv_object(priv);
-	if (ret)
+	ret = nvbios_pll_parse(bios, type, &info);
+	if (ret) {
+		nv_error(devinit, "failed to retrieve pll data, %d\n", ret);
 		return ret;
+	}
 
-	return 0;
-}
+	ret = nv04_pll_calc(nv_subdev(devinit), &info, freq, &N1, &M1, &N2, &M2, &P);
+	if (!ret) {
+		nv_error(devinit, "failed pll calculation\n");
+		return ret;
+	}
 
-static void
-nv50_devinit_dtor(struct nouveau_object *object)
-{
-	struct nv50_devinit_priv *priv = (void *)object;
-	nouveau_devinit_destroy(&priv->base);
+	switch (info.type) {
+	case PLL_VPLL0:
+	case PLL_VPLL1:
+		nv_wr32(priv, info.reg + 0, 0x10000611);
+		nv_mask(priv, info.reg + 4, 0x00ff00ff, (M1 << 16) | N1);
+		nv_mask(priv, info.reg + 8, 0x7fff00ff, (P  << 28) |
+							(M2 << 16) | N2);
+		break;
+	case PLL_MEMORY:
+		nv_mask(priv, info.reg + 0, 0x01ff0000, (P << 22) |
+						        (info.bias_p << 19) |
+							(P << 16));
+		nv_wr32(priv, info.reg + 4, (N1 << 8) | M1);
+		break;
+	default:
+		nv_mask(priv, info.reg + 0, 0x00070000, (P << 16));
+		nv_wr32(priv, info.reg + 4, (N1 << 8) | M1);
+		break;
+	}
+
+	return 0;
 }
 
-static int
+int
 nv50_devinit_init(struct nouveau_object *object)
 {
 	struct nouveau_bios *bios = nouveau_bios(object);
@@ -103,10 +121,20 @@ nv50_devinit_init(struct nouveau_object *object)
 }
 
 static int
-nv50_devinit_fini(struct nouveau_object *object, bool suspend)
+nv50_devinit_ctor(struct nouveau_object *parent, struct nouveau_object *engine,
+		  struct nouveau_oclass *oclass, void *data, u32 size,
+		  struct nouveau_object **pobject)
 {
-	struct nv50_devinit_priv *priv = (void *)object;
-	return nouveau_devinit_fini(&priv->base, suspend);
+	struct nv50_devinit_priv *priv;
+	int ret;
+
+	ret = nouveau_devinit_create(parent, engine, oclass, &priv);
+	*pobject = nv_object(priv);
+	if (ret)
+		return ret;
+
+	priv->base.pll_set = nv50_devinit_pll_set;
+	return 0;
 }
 
 struct nouveau_oclass
@@ -114,8 +142,8 @@ nv50_devinit_oclass = {
 	.handle = NV_SUBDEV(DEVINIT, 0x50),
 	.ofuncs = &(struct nouveau_ofuncs) {
 		.ctor = nv50_devinit_ctor,
-		.dtor = nv50_devinit_dtor,
+		.dtor = _nouveau_devinit_dtor,
 		.init = nv50_devinit_init,
-		.fini = nv50_devinit_fini,
+		.fini = _nouveau_devinit_fini,
 	},
 };

+ 87 - 0
drivers/gpu/drm/nouveau/core/subdev/devinit/nva3.c

@@ -0,0 +1,87 @@
+/*
+ * Copyright 2013 Red Hat Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: Ben Skeggs
+ */
+
+#include "priv.h"
+
+static int
+nva3_devinit_pll_set(struct nouveau_devinit *devinit, u32 type, u32 freq)
+{
+	struct nva3_devinit_priv *priv = (void *)devinit;
+	struct nouveau_bios *bios = nouveau_bios(priv);
+	struct nvbios_pll info;
+	int N, fN, M, P;
+	int ret;
+
+	ret = nvbios_pll_parse(bios, type, &info);
+	if (ret)
+		return ret;
+
+	ret = nva3_pll_calc(nv_subdev(devinit), &info, freq, &N, &fN, &M, &P);
+	if (ret < 0)
+		return ret;
+
+	switch (info.type) {
+	case PLL_VPLL0:
+	case PLL_VPLL1:
+		nv_wr32(priv, info.reg + 0, 0x50000610);
+		nv_mask(priv, info.reg + 4, 0x003fffff,
+					    (P << 16) | (M << 8) | N);
+		nv_wr32(priv, info.reg + 8, fN);
+		break;
+	default:
+		nv_warn(priv, "0x%08x/%dKhz unimplemented\n", type, freq);
+		ret = -EINVAL;
+		break;
+	}
+
+	return ret;
+}
+
+static int
+nva3_devinit_ctor(struct nouveau_object *parent, struct nouveau_object *engine,
+		  struct nouveau_oclass *oclass, void *data, u32 size,
+		  struct nouveau_object **pobject)
+{
+	struct nv50_devinit_priv *priv;
+	int ret;
+
+	ret = nouveau_devinit_create(parent, engine, oclass, &priv);
+	*pobject = nv_object(priv);
+	if (ret)
+		return ret;
+
+	priv->base.pll_set = nva3_devinit_pll_set;
+	return 0;
+}
+
+struct nouveau_oclass
+nva3_devinit_oclass = {
+	.handle = NV_SUBDEV(DEVINIT, 0xa3),
+	.ofuncs = &(struct nouveau_ofuncs) {
+		.ctor = nva3_devinit_ctor,
+		.dtor = _nouveau_devinit_dtor,
+		.init = nv50_devinit_init,
+		.fini = _nouveau_devinit_fini,
+	},
+};

+ 90 - 0
drivers/gpu/drm/nouveau/core/subdev/devinit/nvc0.c

@@ -0,0 +1,90 @@
+/*
+ * Copyright 2013 Red Hat Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: Ben Skeggs
+ */
+
+#include "priv.h"
+
+static int
+nvc0_devinit_pll_set(struct nouveau_devinit *devinit, u32 type, u32 freq)
+{
+	struct nvc0_devinit_priv *priv = (void *)devinit;
+	struct nouveau_bios *bios = nouveau_bios(priv);
+	struct nvbios_pll info;
+	int N, fN, M, P;
+	int ret;
+
+	ret = nvbios_pll_parse(bios, type, &info);
+	if (ret)
+		return ret;
+
+	ret = nva3_pll_calc(nv_subdev(devinit), &info, freq, &N, &fN, &M, &P);
+	if (ret < 0)
+		return ret;
+
+	switch (info.type) {
+	case PLL_VPLL0:
+	case PLL_VPLL1:
+	case PLL_VPLL2:
+	case PLL_VPLL3:
+		nv_mask(priv, info.reg + 0x0c, 0x00000000, 0x00000100);
+		nv_wr32(priv, info.reg + 0x04, (P << 16) | (N << 8) | M);
+		nv_wr32(priv, info.reg + 0x10, fN << 16);
+		break;
+	default:
+		nv_warn(priv, "0x%08x/%dKhz unimplemented\n", type, freq);
+		ret = -EINVAL;
+		break;
+	}
+
+	return ret;
+}
+
+static int
+nvc0_devinit_ctor(struct nouveau_object *parent, struct nouveau_object *engine,
+		  struct nouveau_oclass *oclass, void *data, u32 size,
+		  struct nouveau_object **pobject)
+{
+	struct nv50_devinit_priv *priv;
+	int ret;
+
+	ret = nouveau_devinit_create(parent, engine, oclass, &priv);
+	*pobject = nv_object(priv);
+	if (ret)
+		return ret;
+
+	priv->base.pll_set = nvc0_devinit_pll_set;
+	if (nv_rd32(priv, 0x022500) & 0x00000001)
+		priv->base.post = true;
+	return 0;
+}
+
+struct nouveau_oclass
+nvc0_devinit_oclass = {
+	.handle = NV_SUBDEV(DEVINIT, 0xa3),
+	.ofuncs = &(struct nouveau_ofuncs) {
+		.ctor = nvc0_devinit_ctor,
+		.dtor = _nouveau_devinit_dtor,
+		.init = nv50_devinit_init,
+		.fini = _nouveau_devinit_fini,
+	},
+};

+ 25 - 0
drivers/gpu/drm/nouveau/core/subdev/devinit/priv.h

@@ -0,0 +1,25 @@
+#ifndef __NVKM_DEVINIT_PRIV_H__
+#define __NVKM_DEVINIT_PRIV_H__
+
+#include <subdev/bios.h>
+#include <subdev/bios/pll.h>
+#include <subdev/clock/pll.h>
+#include <subdev/devinit.h>
+
+void nv04_devinit_dtor(struct nouveau_object *);
+int  nv04_devinit_init(struct nouveau_object *);
+int  nv04_devinit_fini(struct nouveau_object *, bool);
+int  nv04_devinit_pll_set(struct nouveau_devinit *, u32, u32);
+
+void setPLL_single(struct nouveau_devinit *, u32, struct nouveau_pll_vals *);
+void setPLL_double_highregs(struct nouveau_devinit *, u32, struct nouveau_pll_vals *);
+void setPLL_double_lowregs(struct nouveau_devinit *, u32, struct nouveau_pll_vals *);
+
+
+struct nv50_devinit_priv {
+	struct nouveau_devinit base;
+};
+
+int  nv50_devinit_init(struct nouveau_object *);
+
+#endif

+ 74 - 51
drivers/gpu/drm/nouveau/core/subdev/fb/base.c

@@ -57,7 +57,57 @@ nouveau_fb_bios_memtype(struct nouveau_bios *bios)
 }
 
 int
-nouveau_fb_preinit(struct nouveau_fb *pfb)
+_nouveau_fb_fini(struct nouveau_object *object, bool suspend)
+{
+	struct nouveau_fb *pfb = (void *)object;
+	int ret;
+
+	ret = nv_ofuncs(pfb->ram)->fini(nv_object(pfb->ram), suspend);
+	if (ret && suspend)
+		return ret;
+
+	return nouveau_subdev_fini(&pfb->base, suspend);
+}
+
+int
+_nouveau_fb_init(struct nouveau_object *object)
+{
+	struct nouveau_fb *pfb = (void *)object;
+	int ret, i;
+
+	ret = nouveau_subdev_init(&pfb->base);
+	if (ret)
+		return ret;
+
+	ret = nv_ofuncs(pfb->ram)->init(nv_object(pfb->ram));
+	if (ret)
+		return ret;
+
+	for (i = 0; i < pfb->tile.regions; i++)
+		pfb->tile.prog(pfb, i, &pfb->tile.region[i]);
+
+	return 0;
+}
+
+void
+_nouveau_fb_dtor(struct nouveau_object *object)
+{
+	struct nouveau_fb *pfb = (void *)object;
+	int i;
+
+	for (i = 0; i < pfb->tile.regions; i++)
+		pfb->tile.fini(pfb, i, &pfb->tile.region[i]);
+	nouveau_mm_fini(&pfb->tags);
+	nouveau_mm_fini(&pfb->vram);
+
+	nouveau_object_ref(NULL, (struct nouveau_object **)&pfb->ram);
+	nouveau_subdev_destroy(&pfb->base);
+}
+
+int
+nouveau_fb_create_(struct nouveau_object *parent, struct nouveau_object *engine,
+		   struct nouveau_oclass *oclass, struct nouveau_oclass *ramcls,
+		   int length, void **pobject)
 {
 	static const char *name[] = {
 		[NV_MEM_TYPE_UNKNOWN] = "unknown",
@@ -72,69 +122,42 @@ nouveau_fb_preinit(struct nouveau_fb *pfb)
 		[NV_MEM_TYPE_GDDR4  ] = "GDDR4",
 		[NV_MEM_TYPE_GDDR5  ] = "GDDR5",
 	};
-	int ret, tags;
+	struct nouveau_object *ram;
+	struct nouveau_fb *pfb;
+	int ret;
 
-	tags = pfb->ram.init(pfb);
-	if (tags < 0 || !pfb->ram.size) {
+	ret = nouveau_subdev_create_(parent, engine, oclass, 0, "PFB", "fb",
+				     length, pobject);
+	pfb = *pobject;
+	if (ret)
+		return ret;
+
+	ret = nouveau_object_ctor(nv_object(pfb), nv_object(pfb),
+				  ramcls, NULL, 0, &ram);
+	if (ret) {
 		nv_fatal(pfb, "error detecting memory configuration!!\n");
-		return (tags < 0) ? tags : -ERANGE;
+		return ret;
 	}
 
+	atomic_dec(&ram->parent->refcount);
+	atomic_dec(&ram->engine->refcount);
+	pfb->ram = (void *)ram;
+
 	if (!nouveau_mm_initialised(&pfb->vram)) {
-		ret = nouveau_mm_init(&pfb->vram, 0, pfb->ram.size >> 12, 1);
+		ret = nouveau_mm_init(&pfb->vram, 0, pfb->ram->size >> 12, 1);
 		if (ret)
 			return ret;
 	}
 
 	if (!nouveau_mm_initialised(&pfb->tags)) {
-		ret = nouveau_mm_init(&pfb->tags, 0, tags ? ++tags : 0, 1);
+		ret = nouveau_mm_init(&pfb->tags, 0, pfb->ram->tags ?
+				     ++pfb->ram->tags : 0, 1);
 		if (ret)
 			return ret;
 	}
 
-	nv_info(pfb, "RAM type: %s\n", name[pfb->ram.type]);
-	nv_info(pfb, "RAM size: %d MiB\n", (int)(pfb->ram.size >> 20));
-	nv_info(pfb, "   ZCOMP: %d tags\n", tags);
+	nv_info(pfb, "RAM type: %s\n", name[pfb->ram->type]);
+	nv_info(pfb, "RAM size: %d MiB\n", (int)(pfb->ram->size >> 20));
+	nv_info(pfb, "   ZCOMP: %d tags\n", pfb->ram->tags);
 	return 0;
 }
-
-void
-nouveau_fb_destroy(struct nouveau_fb *pfb)
-{
-	int i;
-
-	for (i = 0; i < pfb->tile.regions; i++)
-		pfb->tile.fini(pfb, i, &pfb->tile.region[i]);
-	nouveau_mm_fini(&pfb->tags);
-	nouveau_mm_fini(&pfb->vram);
-
-	nouveau_subdev_destroy(&pfb->base);
-}
-
-void
-_nouveau_fb_dtor(struct nouveau_object *object)
-{
-	struct nouveau_fb *pfb = (void *)object;
-	nouveau_fb_destroy(pfb);
-}
-int
-nouveau_fb_init(struct nouveau_fb *pfb)
-{
-	int ret, i;
-
-	ret = nouveau_subdev_init(&pfb->base);
-	if (ret)
-		return ret;
-
-	for (i = 0; i < pfb->tile.regions; i++)
-		pfb->tile.prog(pfb, i, &pfb->tile.region[i]);
-
-	return 0;
-}
-
-int
-_nouveau_fb_init(struct nouveau_object *object)
-{
-	struct nouveau_fb *pfb = (void *)object;
-	return nouveau_fb_init(pfb);
-}

+ 3 - 51
drivers/gpu/drm/nouveau/core/subdev/fb/nv04.c

@@ -22,24 +22,8 @@
  * Authors: Ben Skeggs
  */
 
-#include <subdev/fb.h>
+#include "priv.h"
 
-#define NV04_PFB_BOOT_0						0x00100000
-#	define NV04_PFB_BOOT_0_RAM_AMOUNT			0x00000003
-#	define NV04_PFB_BOOT_0_RAM_AMOUNT_32MB			0x00000000
-#	define NV04_PFB_BOOT_0_RAM_AMOUNT_4MB			0x00000001
-#	define NV04_PFB_BOOT_0_RAM_AMOUNT_8MB			0x00000002
-#	define NV04_PFB_BOOT_0_RAM_AMOUNT_16MB			0x00000003
-#	define NV04_PFB_BOOT_0_RAM_WIDTH_128			0x00000004
-#	define NV04_PFB_BOOT_0_RAM_TYPE				0x00000028
-#	define NV04_PFB_BOOT_0_RAM_TYPE_SGRAM_8MBIT		0x00000000
-#	define NV04_PFB_BOOT_0_RAM_TYPE_SGRAM_16MBIT		0x00000008
-#	define NV04_PFB_BOOT_0_RAM_TYPE_SGRAM_16MBIT_4BANK	0x00000010
-#	define NV04_PFB_BOOT_0_RAM_TYPE_SDRAM_16MBIT		0x00000018
-#	define NV04_PFB_BOOT_0_RAM_TYPE_SDRAM_64MBIT		0x00000020
-#	define NV04_PFB_BOOT_0_RAM_TYPE_SDRAM_64MBITX16		0x00000028
-#	define NV04_PFB_BOOT_0_UMA_ENABLE			0x00000100
-#	define NV04_PFB_BOOT_0_UMA_SIZE				0x0000f000
 #define NV04_PFB_CFG0						0x00100200
 
 struct nv04_fb_priv {
@@ -55,37 +39,6 @@ nv04_fb_memtype_valid(struct nouveau_fb *pfb, u32 tile_flags)
 	return false;
 }
 
-static int
-nv04_fb_vram_init(struct nouveau_fb *pfb)
-{
-	u32 boot0 = nv_rd32(pfb, NV04_PFB_BOOT_0);
-	if (boot0 & 0x00000100) {
-		pfb->ram.size  = ((boot0 >> 12) & 0xf) * 2 + 2;
-		pfb->ram.size *= 1024 * 1024;
-	} else {
-		switch (boot0 & NV04_PFB_BOOT_0_RAM_AMOUNT) {
-		case NV04_PFB_BOOT_0_RAM_AMOUNT_32MB:
-			pfb->ram.size = 32 * 1024 * 1024;
-			break;
-		case NV04_PFB_BOOT_0_RAM_AMOUNT_16MB:
-			pfb->ram.size = 16 * 1024 * 1024;
-			break;
-		case NV04_PFB_BOOT_0_RAM_AMOUNT_8MB:
-			pfb->ram.size = 8 * 1024 * 1024;
-			break;
-		case NV04_PFB_BOOT_0_RAM_AMOUNT_4MB:
-			pfb->ram.size = 4 * 1024 * 1024;
-			break;
-		}
-	}
-
-	if ((boot0 & 0x00000038) <= 0x10)
-		pfb->ram.type = NV_MEM_TYPE_SGRAM;
-	else
-		pfb->ram.type = NV_MEM_TYPE_SDRAM;
-	return 0;
-}
-
 static int
 nv04_fb_init(struct nouveau_object *object)
 {
@@ -112,14 +65,13 @@ nv04_fb_ctor(struct nouveau_object *parent, struct nouveau_object *engine,
 	struct nv04_fb_priv *priv;
 	int ret;
 
-	ret = nouveau_fb_create(parent, engine, oclass, &priv);
+	ret = nouveau_fb_create(parent, engine, oclass, &nv04_ram_oclass, &priv);
 	*pobject = nv_object(priv);
 	if (ret)
 		return ret;
 
 	priv->base.memtype_valid = nv04_fb_memtype_valid;
-	priv->base.ram.init = nv04_fb_vram_init;
-	return nouveau_fb_preinit(&priv->base);
+	return 0;
 }
 
 struct nouveau_oclass

+ 3 - 17
drivers/gpu/drm/nouveau/core/subdev/fb/nv10.c

@@ -24,25 +24,12 @@
  *
  */
 
-#include <subdev/fb.h>
+#include "priv.h"
 
 struct nv10_fb_priv {
 	struct nouveau_fb base;
 };
 
-static int
-nv10_fb_vram_init(struct nouveau_fb *pfb)
-{
-	u32 cfg0 = nv_rd32(pfb, 0x100200);
-	if (cfg0 & 0x00000001)
-		pfb->ram.type = NV_MEM_TYPE_DDR1;
-	else
-		pfb->ram.type = NV_MEM_TYPE_SDRAM;
-
-	pfb->ram.size = nv_rd32(pfb, 0x10020c) & 0xff000000;
-	return 0;
-}
-
 void
 nv10_fb_tile_init(struct nouveau_fb *pfb, int i, u32 addr, u32 size, u32 pitch,
 		  u32 flags, struct nouveau_fb_tile *tile)
@@ -78,18 +65,17 @@ nv10_fb_ctor(struct nouveau_object *parent, struct nouveau_object *engine,
 	struct nv10_fb_priv *priv;
 	int ret;
 
-	ret = nouveau_fb_create(parent, engine, oclass, &priv);
+	ret = nouveau_fb_create(parent, engine, oclass, &nv10_ram_oclass, &priv);
 	*pobject = nv_object(priv);
 	if (ret)
 		return ret;
 
 	priv->base.memtype_valid = nv04_fb_memtype_valid;
-	priv->base.ram.init = nv10_fb_vram_init;
 	priv->base.tile.regions = 8;
 	priv->base.tile.init = nv10_fb_tile_init;
 	priv->base.tile.fini = nv10_fb_tile_fini;
 	priv->base.tile.prog = nv10_fb_tile_prog;
-	return nouveau_fb_preinit(&priv->base);
+	return 0;
 }
 
 struct nouveau_oclass

+ 3 - 29
drivers/gpu/drm/nouveau/core/subdev/fb/nv1a.c

@@ -24,37 +24,12 @@
  *
  */
 
-#include <subdev/fb.h>
+#include "priv.h"
 
 struct nv1a_fb_priv {
 	struct nouveau_fb base;
 };
 
-static int
-nv1a_fb_vram_init(struct nouveau_fb *pfb)
-{
-	struct pci_dev *bridge;
-	u32 mem, mib;
-
-	bridge = pci_get_bus_and_slot(0, PCI_DEVFN(0, 1));
-	if (!bridge) {
-		nv_fatal(pfb, "no bridge device\n");
-		return -ENODEV;
-	}
-
-	if (nv_device(pfb)->chipset == 0x1a) {
-		pci_read_config_dword(bridge, 0x7c, &mem);
-		mib = ((mem >> 6) & 31) + 1;
-	} else {
-		pci_read_config_dword(bridge, 0x84, &mem);
-		mib = ((mem >> 4) & 127) + 1;
-	}
-
-	pfb->ram.type = NV_MEM_TYPE_STOLEN;
-	pfb->ram.size = mib * 1024 * 1024;
-	return 0;
-}
-
 static int
 nv1a_fb_ctor(struct nouveau_object *parent, struct nouveau_object *engine,
 	     struct nouveau_oclass *oclass, void *data, u32 size,
@@ -63,18 +38,17 @@ nv1a_fb_ctor(struct nouveau_object *parent, struct nouveau_object *engine,
 	struct nv1a_fb_priv *priv;
 	int ret;
 
-	ret = nouveau_fb_create(parent, engine, oclass, &priv);
+	ret = nouveau_fb_create(parent, engine, oclass, &nv1a_ram_oclass, &priv);
 	*pobject = nv_object(priv);
 	if (ret)
 		return ret;
 
 	priv->base.memtype_valid = nv04_fb_memtype_valid;
-	priv->base.ram.init = nv1a_fb_vram_init;
 	priv->base.tile.regions = 8;
 	priv->base.tile.init = nv10_fb_tile_init;
 	priv->base.tile.fini = nv10_fb_tile_fini;
 	priv->base.tile.prog = nv10_fb_tile_prog;
-	return nouveau_fb_preinit(&priv->base);
+	return 0;
 }
 
 struct nouveau_oclass

+ 4 - 22
drivers/gpu/drm/nouveau/core/subdev/fb/nv20.c

@@ -24,29 +24,12 @@
  *
  */
 
-#include <subdev/fb.h>
+#include "priv.h"
 
 struct nv20_fb_priv {
 	struct nouveau_fb base;
 };
 
-int
-nv20_fb_vram_init(struct nouveau_fb *pfb)
-{
-	u32 pbus1218 = nv_rd32(pfb, 0x001218);
-
-	switch (pbus1218 & 0x00000300) {
-	case 0x00000000: pfb->ram.type = NV_MEM_TYPE_SDRAM; break;
-	case 0x00000100: pfb->ram.type = NV_MEM_TYPE_DDR1; break;
-	case 0x00000200: pfb->ram.type = NV_MEM_TYPE_GDDR3; break;
-	case 0x00000300: pfb->ram.type = NV_MEM_TYPE_GDDR2; break;
-	}
-	pfb->ram.size  = (nv_rd32(pfb, 0x10020c) & 0xff000000);
-	pfb->ram.parts = (nv_rd32(pfb, 0x100200) & 0x00000003) + 1;
-
-	return nv_rd32(pfb, 0x100320);
-}
-
 void
 nv20_fb_tile_init(struct nouveau_fb *pfb, int i, u32 addr, u32 size, u32 pitch,
 		  u32 flags, struct nouveau_fb_tile *tile)
@@ -65,7 +48,7 @@ nv20_fb_tile_comp(struct nouveau_fb *pfb, int i, u32 size, u32 flags,
 		  struct nouveau_fb_tile *tile)
 {
 	u32 tiles = DIV_ROUND_UP(size, 0x40);
-	u32 tags  = round_up(tiles / pfb->ram.parts, 0x40);
+	u32 tags  = round_up(tiles / pfb->ram->parts, 0x40);
 	if (!nouveau_mm_head(&pfb->tags, 1, tags, tags, 1, &tile->tag)) {
 		if (!(flags & 2)) tile->zcomp = 0x00000000; /* Z16 */
 		else              tile->zcomp = 0x04000000; /* Z24S8 */
@@ -105,19 +88,18 @@ nv20_fb_ctor(struct nouveau_object *parent, struct nouveau_object *engine,
 	struct nv20_fb_priv *priv;
 	int ret;
 
-	ret = nouveau_fb_create(parent, engine, oclass, &priv);
+	ret = nouveau_fb_create(parent, engine, oclass, &nv20_ram_oclass, &priv);
 	*pobject = nv_object(priv);
 	if (ret)
 		return ret;
 
 	priv->base.memtype_valid = nv04_fb_memtype_valid;
-	priv->base.ram.init = nv20_fb_vram_init;
 	priv->base.tile.regions = 8;
 	priv->base.tile.init = nv20_fb_tile_init;
 	priv->base.tile.comp = nv20_fb_tile_comp;
 	priv->base.tile.fini = nv20_fb_tile_fini;
 	priv->base.tile.prog = nv20_fb_tile_prog;
-	return nouveau_fb_preinit(&priv->base);
+	return 0;
 }
 
 struct nouveau_oclass

+ 4 - 5
drivers/gpu/drm/nouveau/core/subdev/fb/nv25.c

@@ -24,7 +24,7 @@
  *
  */
 
-#include <subdev/fb.h>
+#include "priv.h"
 
 struct nv25_fb_priv {
 	struct nouveau_fb base;
@@ -35,7 +35,7 @@ nv25_fb_tile_comp(struct nouveau_fb *pfb, int i, u32 size, u32 flags,
 		  struct nouveau_fb_tile *tile)
 {
 	u32 tiles = DIV_ROUND_UP(size, 0x40);
-	u32 tags  = round_up(tiles / pfb->ram.parts, 0x40);
+	u32 tags  = round_up(tiles / pfb->ram->parts, 0x40);
 	if (!nouveau_mm_head(&pfb->tags, 1, tags, tags, 1, &tile->tag)) {
 		if (!(flags & 2)) tile->zcomp = 0x00100000; /* Z16 */
 		else              tile->zcomp = 0x00200000; /* Z24S8 */
@@ -54,19 +54,18 @@ nv25_fb_ctor(struct nouveau_object *parent, struct nouveau_object *engine,
 	struct nv25_fb_priv *priv;
 	int ret;
 
-	ret = nouveau_fb_create(parent, engine, oclass, &priv);
+	ret = nouveau_fb_create(parent, engine, oclass, &nv20_ram_oclass, &priv);
 	*pobject = nv_object(priv);
 	if (ret)
 		return ret;
 
 	priv->base.memtype_valid = nv04_fb_memtype_valid;
-	priv->base.ram.init = nv20_fb_vram_init;
 	priv->base.tile.regions = 8;
 	priv->base.tile.init = nv20_fb_tile_init;
 	priv->base.tile.comp = nv25_fb_tile_comp;
 	priv->base.tile.fini = nv20_fb_tile_fini;
 	priv->base.tile.prog = nv20_fb_tile_prog;
-	return nouveau_fb_preinit(&priv->base);
+	return 0;
 }
 
 struct nouveau_oclass

+ 4 - 5
drivers/gpu/drm/nouveau/core/subdev/fb/nv30.c

@@ -24,7 +24,7 @@
  *
  */
 
-#include <subdev/fb.h>
+#include "priv.h"
 
 struct nv30_fb_priv {
 	struct nouveau_fb base;
@@ -54,7 +54,7 @@ nv30_fb_tile_comp(struct nouveau_fb *pfb, int i, u32 size, u32 flags,
 		  struct nouveau_fb_tile *tile)
 {
 	u32 tiles = DIV_ROUND_UP(size, 0x40);
-	u32 tags  = round_up(tiles / pfb->ram.parts, 0x40);
+	u32 tags  = round_up(tiles / pfb->ram->parts, 0x40);
 	if (!nouveau_mm_head(&pfb->tags, 1, tags, tags, 1, &tile->tag)) {
 		if (flags & 2) tile->zcomp |= 0x01000000; /* Z16 */
 		else           tile->zcomp |= 0x02000000; /* Z24S8 */
@@ -132,19 +132,18 @@ nv30_fb_ctor(struct nouveau_object *parent, struct nouveau_object *engine,
 	struct nv30_fb_priv *priv;
 	int ret;
 
-	ret = nouveau_fb_create(parent, engine, oclass, &priv);
+	ret = nouveau_fb_create(parent, engine, oclass, &nv20_ram_oclass, &priv);
 	*pobject = nv_object(priv);
 	if (ret)
 		return ret;
 
 	priv->base.memtype_valid = nv04_fb_memtype_valid;
-	priv->base.ram.init = nv20_fb_vram_init;
 	priv->base.tile.regions = 8;
 	priv->base.tile.init = nv30_fb_tile_init;
 	priv->base.tile.comp = nv30_fb_tile_comp;
 	priv->base.tile.fini = nv20_fb_tile_fini;
 	priv->base.tile.prog = nv20_fb_tile_prog;
-	return nouveau_fb_preinit(&priv->base);
+	return 0;
 }
 
 struct nouveau_oclass

+ 4 - 5
drivers/gpu/drm/nouveau/core/subdev/fb/nv35.c

@@ -24,7 +24,7 @@
  *
  */
 
-#include <subdev/fb.h>
+#include "priv.h"
 
 struct nv35_fb_priv {
 	struct nouveau_fb base;
@@ -35,7 +35,7 @@ nv35_fb_tile_comp(struct nouveau_fb *pfb, int i, u32 size, u32 flags,
 		  struct nouveau_fb_tile *tile)
 {
 	u32 tiles = DIV_ROUND_UP(size, 0x40);
-	u32 tags  = round_up(tiles / pfb->ram.parts, 0x40);
+	u32 tags  = round_up(tiles / pfb->ram->parts, 0x40);
 	if (!nouveau_mm_head(&pfb->tags, 1, tags, tags, 1, &tile->tag)) {
 		if (flags & 2) tile->zcomp |= 0x04000000; /* Z16 */
 		else           tile->zcomp |= 0x08000000; /* Z24S8 */
@@ -55,19 +55,18 @@ nv35_fb_ctor(struct nouveau_object *parent, struct nouveau_object *engine,
 	struct nv35_fb_priv *priv;
 	int ret;
 
-	ret = nouveau_fb_create(parent, engine, oclass, &priv);
+	ret = nouveau_fb_create(parent, engine, oclass, &nv20_ram_oclass, &priv);
 	*pobject = nv_object(priv);
 	if (ret)
 		return ret;
 
 	priv->base.memtype_valid = nv04_fb_memtype_valid;
-	priv->base.ram.init = nv20_fb_vram_init;
 	priv->base.tile.regions = 8;
 	priv->base.tile.init = nv30_fb_tile_init;
 	priv->base.tile.comp = nv35_fb_tile_comp;
 	priv->base.tile.fini = nv20_fb_tile_fini;
 	priv->base.tile.prog = nv20_fb_tile_prog;
-	return nouveau_fb_preinit(&priv->base);
+	return 0;
 }
 
 struct nouveau_oclass

+ 4 - 5
drivers/gpu/drm/nouveau/core/subdev/fb/nv36.c

@@ -24,7 +24,7 @@
  *
  */
 
-#include <subdev/fb.h>
+#include "priv.h"
 
 struct nv36_fb_priv {
 	struct nouveau_fb base;
@@ -35,7 +35,7 @@ nv36_fb_tile_comp(struct nouveau_fb *pfb, int i, u32 size, u32 flags,
 		  struct nouveau_fb_tile *tile)
 {
 	u32 tiles = DIV_ROUND_UP(size, 0x40);
-	u32 tags  = round_up(tiles / pfb->ram.parts, 0x40);
+	u32 tags  = round_up(tiles / pfb->ram->parts, 0x40);
 	if (!nouveau_mm_head(&pfb->tags, 1, tags, tags, 1, &tile->tag)) {
 		if (flags & 2) tile->zcomp |= 0x10000000; /* Z16 */
 		else           tile->zcomp |= 0x20000000; /* Z24S8 */
@@ -55,19 +55,18 @@ nv36_fb_ctor(struct nouveau_object *parent, struct nouveau_object *engine,
 	struct nv36_fb_priv *priv;
 	int ret;
 
-	ret = nouveau_fb_create(parent, engine, oclass, &priv);
+	ret = nouveau_fb_create(parent, engine, oclass, &nv20_ram_oclass, &priv);
 	*pobject = nv_object(priv);
 	if (ret)
 		return ret;
 
 	priv->base.memtype_valid = nv04_fb_memtype_valid;
-	priv->base.ram.init = nv20_fb_vram_init;
 	priv->base.tile.regions = 8;
 	priv->base.tile.init = nv30_fb_tile_init;
 	priv->base.tile.comp = nv36_fb_tile_comp;
 	priv->base.tile.fini = nv20_fb_tile_fini;
 	priv->base.tile.prog = nv20_fb_tile_prog;
-	return nouveau_fb_preinit(&priv->base);
+	return 0;
 }
 
 struct nouveau_oclass

+ 4 - 21
drivers/gpu/drm/nouveau/core/subdev/fb/nv40.c

@@ -24,34 +24,18 @@
  *
  */
 
-#include <subdev/fb.h>
+#include "priv.h"
 
 struct nv40_fb_priv {
 	struct nouveau_fb base;
 };
 
-static int
-nv40_fb_vram_init(struct nouveau_fb *pfb)
-{
-	u32 pbus1218 = nv_rd32(pfb, 0x001218);
-	switch (pbus1218 & 0x00000300) {
-	case 0x00000000: pfb->ram.type = NV_MEM_TYPE_SDRAM; break;
-	case 0x00000100: pfb->ram.type = NV_MEM_TYPE_DDR1; break;
-	case 0x00000200: pfb->ram.type = NV_MEM_TYPE_GDDR3; break;
-	case 0x00000300: pfb->ram.type = NV_MEM_TYPE_DDR2; break;
-	}
-
-	pfb->ram.size  =  nv_rd32(pfb, 0x10020c) & 0xff000000;
-	pfb->ram.parts = (nv_rd32(pfb, 0x100200) & 0x00000003) + 1;
-	return nv_rd32(pfb, 0x100320);
-}
-
 void
 nv40_fb_tile_comp(struct nouveau_fb *pfb, int i, u32 size, u32 flags,
 		  struct nouveau_fb_tile *tile)
 {
 	u32 tiles = DIV_ROUND_UP(size, 0x80);
-	u32 tags  = round_up(tiles / pfb->ram.parts, 0x100);
+	u32 tags  = round_up(tiles / pfb->ram->parts, 0x100);
 	if ( (flags & 2) &&
 	    !nouveau_mm_head(&pfb->tags, 1, tags, tags, 1, &tile->tag)) {
 		tile->zcomp  = 0x28000000; /* Z24S8_SPLIT_GRAD */
@@ -85,19 +69,18 @@ nv40_fb_ctor(struct nouveau_object *parent, struct nouveau_object *engine,
 	struct nv40_fb_priv *priv;
 	int ret;
 
-	ret = nouveau_fb_create(parent, engine, oclass, &priv);
+	ret = nouveau_fb_create(parent, engine, oclass, &nv40_ram_oclass, &priv);
 	*pobject = nv_object(priv);
 	if (ret)
 		return ret;
 
 	priv->base.memtype_valid = nv04_fb_memtype_valid;
-	priv->base.ram.init = nv40_fb_vram_init;
 	priv->base.tile.regions = 8;
 	priv->base.tile.init = nv30_fb_tile_init;
 	priv->base.tile.comp = nv40_fb_tile_comp;
 	priv->base.tile.fini = nv20_fb_tile_fini;
 	priv->base.tile.prog = nv20_fb_tile_prog;
-	return nouveau_fb_preinit(&priv->base);
+	return 0;
 }
 
 

+ 3 - 20
drivers/gpu/drm/nouveau/core/subdev/fb/nv41.c

@@ -24,28 +24,12 @@
  *
  */
 
-#include <subdev/fb.h>
+#include "priv.h"
 
 struct nv41_fb_priv {
 	struct nouveau_fb base;
 };
 
-int
-nv41_fb_vram_init(struct nouveau_fb *pfb)
-{
-	u32 pfb474 = nv_rd32(pfb, 0x100474);
-	if (pfb474 & 0x00000004)
-		pfb->ram.type = NV_MEM_TYPE_GDDR3;
-	if (pfb474 & 0x00000002)
-		pfb->ram.type = NV_MEM_TYPE_DDR2;
-	if (pfb474 & 0x00000001)
-		pfb->ram.type = NV_MEM_TYPE_DDR1;
-
-	pfb->ram.size =   nv_rd32(pfb, 0x10020c) & 0xff000000;
-	pfb->ram.parts = (nv_rd32(pfb, 0x100200) & 0x00000003) + 1;
-	return nv_rd32(pfb, 0x100320);
-}
-
 void
 nv41_fb_tile_prog(struct nouveau_fb *pfb, int i, struct nouveau_fb_tile *tile)
 {
@@ -78,19 +62,18 @@ nv41_fb_ctor(struct nouveau_object *parent, struct nouveau_object *engine,
 	struct nv41_fb_priv *priv;
 	int ret;
 
-	ret = nouveau_fb_create(parent, engine, oclass, &priv);
+	ret = nouveau_fb_create(parent, engine, oclass, &nv41_ram_oclass, &priv);
 	*pobject = nv_object(priv);
 	if (ret)
 		return ret;
 
 	priv->base.memtype_valid = nv04_fb_memtype_valid;
-	priv->base.ram.init = nv41_fb_vram_init;
 	priv->base.tile.regions = 12;
 	priv->base.tile.init = nv30_fb_tile_init;
 	priv->base.tile.comp = nv40_fb_tile_comp;
 	priv->base.tile.fini = nv20_fb_tile_fini;
 	priv->base.tile.prog = nv41_fb_tile_prog;
-	return nouveau_fb_preinit(&priv->base);
+	return 0;
 }
 
 

+ 3 - 19
drivers/gpu/drm/nouveau/core/subdev/fb/nv44.c

@@ -24,27 +24,12 @@
  *
  */
 
-#include <subdev/fb.h>
+#include "priv.h"
 
 struct nv44_fb_priv {
 	struct nouveau_fb base;
 };
 
-int
-nv44_fb_vram_init(struct nouveau_fb *pfb)
-{
-	u32 pfb474 = nv_rd32(pfb, 0x100474);
-	if (pfb474 & 0x00000004)
-		pfb->ram.type = NV_MEM_TYPE_GDDR3;
-	if (pfb474 & 0x00000002)
-		pfb->ram.type = NV_MEM_TYPE_DDR2;
-	if (pfb474 & 0x00000001)
-		pfb->ram.type = NV_MEM_TYPE_DDR1;
-
-	pfb->ram.size = nv_rd32(pfb, 0x10020c) & 0xff000000;
-	return 0;
-}
-
 static void
 nv44_fb_tile_init(struct nouveau_fb *pfb, int i, u32 addr, u32 size, u32 pitch,
 		  u32 flags, struct nouveau_fb_tile *tile)
@@ -87,18 +72,17 @@ nv44_fb_ctor(struct nouveau_object *parent, struct nouveau_object *engine,
 	struct nv44_fb_priv *priv;
 	int ret;
 
-	ret = nouveau_fb_create(parent, engine, oclass, &priv);
+	ret = nouveau_fb_create(parent, engine, oclass, &nv44_ram_oclass, &priv);
 	*pobject = nv_object(priv);
 	if (ret)
 		return ret;
 
 	priv->base.memtype_valid = nv04_fb_memtype_valid;
-	priv->base.ram.init = nv44_fb_vram_init;
 	priv->base.tile.regions = 12;
 	priv->base.tile.init = nv44_fb_tile_init;
 	priv->base.tile.fini = nv20_fb_tile_fini;
 	priv->base.tile.prog = nv44_fb_tile_prog;
-	return nouveau_fb_preinit(&priv->base);
+	return 0;
 }
 
 

+ 3 - 4
drivers/gpu/drm/nouveau/core/subdev/fb/nv46.c

@@ -24,7 +24,7 @@
  *
  */
 
-#include <subdev/fb.h>
+#include "priv.h"
 
 struct nv46_fb_priv {
 	struct nouveau_fb base;
@@ -52,18 +52,17 @@ nv46_fb_ctor(struct nouveau_object *parent, struct nouveau_object *engine,
 	struct nv46_fb_priv *priv;
 	int ret;
 
-	ret = nouveau_fb_create(parent, engine, oclass, &priv);
+	ret = nouveau_fb_create(parent, engine, oclass, &nv44_ram_oclass, &priv);
 	*pobject = nv_object(priv);
 	if (ret)
 		return ret;
 
 	priv->base.memtype_valid = nv04_fb_memtype_valid;
-	priv->base.ram.init = nv44_fb_vram_init;
 	priv->base.tile.regions = 15;
 	priv->base.tile.init = nv46_fb_tile_init;
 	priv->base.tile.fini = nv20_fb_tile_fini;
 	priv->base.tile.prog = nv44_fb_tile_prog;
-	return nouveau_fb_preinit(&priv->base);
+	return 0;
 }
 
 

Some files were not shown because too many files changed in this diff