Browse Source

drm/nv98/crypt: non-stub implementation of the engine hooks

fuc is from pscnv driver.

Signed-off-by: Ben Skeggs <bskeggs@redhat.com>
Ben Skeggs 13 years ago
parent
commit
b355096992

+ 152 - 14
drivers/gpu/drm/nouveau/nv98_crypt.c

@@ -23,21 +23,93 @@
  */
 
 #include "drmP.h"
+
 #include "nouveau_drv.h"
 #include "nouveau_util.h"
 #include "nouveau_vm.h"
 #include "nouveau_ramht.h"
 
-struct nv98_crypt_engine {
+#include "nv98_crypt.fuc.h"
+
+struct nv98_crypt_priv {
 	struct nouveau_exec_engine base;
 };
 
+struct nv98_crypt_chan {
+	struct nouveau_gpuobj *mem;
+};
+
 static int
-nv98_crypt_fini(struct drm_device *dev, int engine, bool suspend)
+nv98_crypt_context_new(struct nouveau_channel *chan, int engine)
+{
+	struct drm_device *dev = chan->dev;
+	struct drm_nouveau_private *dev_priv = dev->dev_private;
+	struct nv98_crypt_priv *priv = nv_engine(dev, engine);
+	struct nv98_crypt_chan *cctx;
+	int ret;
+
+	cctx = chan->engctx[engine] = kzalloc(sizeof(*cctx), GFP_KERNEL);
+	if (!cctx)
+		return -ENOMEM;
+
+	atomic_inc(&chan->vm->engref[engine]);
+
+	ret = nouveau_gpuobj_new(dev, chan, 256, 0, NVOBJ_FLAG_ZERO_ALLOC |
+				 NVOBJ_FLAG_ZERO_FREE, &cctx->mem);
+	if (ret)
+		goto error;
+
+	nv_wo32(chan->ramin, 0xa0, 0x00190000);
+	nv_wo32(chan->ramin, 0xa4, cctx->mem->vinst + cctx->mem->size - 1);
+	nv_wo32(chan->ramin, 0xa8, cctx->mem->vinst);
+	nv_wo32(chan->ramin, 0xac, 0x00000000);
+	nv_wo32(chan->ramin, 0xb0, 0x00000000);
+	nv_wo32(chan->ramin, 0xb4, 0x00000000);
+	dev_priv->engine.instmem.flush(dev);
+
+error:
+	if (ret)
+		priv->base.context_del(chan, engine);
+	return ret;
+}
+
+static void
+nv98_crypt_context_del(struct nouveau_channel *chan, int engine)
+{
+	struct nv98_crypt_chan *cctx = chan->engctx[engine];
+	int i;
+
+	for (i = 0xa0; i < 0xb4; i += 4)
+		nv_wo32(chan->ramin, i, 0x00000000);
+
+	nouveau_gpuobj_ref(NULL, &cctx->mem);
+
+	atomic_dec(&chan->vm->engref[engine]);
+	chan->engctx[engine] = NULL;
+	kfree(cctx);
+}
+
+static int
+nv98_crypt_object_new(struct nouveau_channel *chan, int engine,
+		     u32 handle, u16 class)
 {
-	if (!(nv_rd32(dev, 0x000200) & 0x00004000))
-		return 0;
+	struct nv98_crypt_chan *cctx = chan->engctx[engine];
+
+	/* fuc engine doesn't need an object, our ramht code does.. */
+	cctx->mem->engine = 5;
+	cctx->mem->class  = class;
+	return nouveau_ramht_insert(chan, handle, cctx->mem);
+}
 
+static void
+nv98_crypt_tlb_flush(struct drm_device *dev, int engine)
+{
+	nv50_vm_flush_engine(dev, 0x0a);
+}
+
+static int
+nv98_crypt_fini(struct drm_device *dev, int engine, bool suspend)
+{
 	nv_mask(dev, 0x000200, 0x00004000, 0x00000000);
 	return 0;
 }
@@ -45,34 +117,100 @@ nv98_crypt_fini(struct drm_device *dev, int engine, bool suspend)
 static int
 nv98_crypt_init(struct drm_device *dev, int engine)
 {
+	int i;
+
+	/* reset! */
 	nv_mask(dev, 0x000200, 0x00004000, 0x00000000);
 	nv_mask(dev, 0x000200, 0x00004000, 0x00004000);
+
+	/* wait for exit interrupt to signal */
+	nv_wait(dev, 0x087008, 0x00000010, 0x00000010);
+	nv_wr32(dev, 0x087004, 0x00000010);
+
+	/* upload microcode code and data segments */
+	nv_wr32(dev, 0x087ff8, 0x00100000);
+	for (i = 0; i < ARRAY_SIZE(nv98_pcrypt_code); i++)
+		nv_wr32(dev, 0x087ff4, nv98_pcrypt_code[i]);
+
+	nv_wr32(dev, 0x087ff8, 0x00000000);
+	for (i = 0; i < ARRAY_SIZE(nv98_pcrypt_data); i++)
+		nv_wr32(dev, 0x087ff4, nv98_pcrypt_data[i]);
+
+	/* start it running */
+	nv_wr32(dev, 0x08710c, 0x00000000);
+	nv_wr32(dev, 0x087104, 0x00000000); /* ENTRY */
+	nv_wr32(dev, 0x087100, 0x00000002); /* TRIGGER */
 	return 0;
 }
 
+static struct nouveau_enum nv98_crypt_isr_error_name[] = {
+	{ 0x0000, "ILLEGAL_MTHD" },
+	{ 0x0001, "INVALID_BITFIELD" },
+	{ 0x0002, "INVALID_ENUM" },
+	{ 0x0003, "QUERY" },
+	{}
+};
+
+static void
+nv98_crypt_isr(struct drm_device *dev)
+{
+	u32 disp = nv_rd32(dev, 0x08701c);
+	u32 stat = nv_rd32(dev, 0x087008) & disp & ~(disp >> 16);
+	u32 inst = nv_rd32(dev, 0x087050) & 0x3fffffff;
+	u32 ssta = nv_rd32(dev, 0x087040) & 0x0000ffff;
+	u32 addr = nv_rd32(dev, 0x087040) >> 16;
+	u32 mthd = (addr & 0x07ff) << 2;
+	u32 subc = (addr & 0x3800) >> 11;
+	u32 data = nv_rd32(dev, 0x087044);
+	int chid = nv50_graph_isr_chid(dev, inst);
+
+	if (stat & 0x00000040) {
+		NV_INFO(dev, "PCRYPT: DISPATCH_ERROR [");
+		nouveau_enum_print(nv98_crypt_isr_error_name, ssta);
+		printk("] ch %d [0x%08x] subc %d mthd 0x%04x data 0x%08x\n",
+			chid, inst, subc, mthd, data);
+		nv_wr32(dev, 0x087004, 0x00000040);
+		stat &= ~0x00000040;
+	}
+
+	if (stat) {
+		NV_INFO(dev, "PCRYPT: unhandled intr 0x%08x\n", stat);
+		nv_wr32(dev, 0x087004, stat);
+	}
+
+	nv50_fb_vm_trap(dev, 1);
+}
+
 static void
 nv98_crypt_destroy(struct drm_device *dev, int engine)
 {
-	struct nv98_crypt_engine *pcrypt = nv_engine(dev, engine);
+	struct nv98_crypt_priv *priv = nv_engine(dev, engine);
 
+	nouveau_irq_unregister(dev, 14);
 	NVOBJ_ENGINE_DEL(dev, CRYPT);
-
-	kfree(pcrypt);
+	kfree(priv);
 }
 
 int
 nv98_crypt_create(struct drm_device *dev)
 {
-	struct nv98_crypt_engine *pcrypt;
+	struct nv98_crypt_priv *priv;
 
-	pcrypt = kzalloc(sizeof(*pcrypt), GFP_KERNEL);
-	if (!pcrypt)
+	priv = kzalloc(sizeof(*priv), GFP_KERNEL);
+	if (!priv)
 		return -ENOMEM;
 
-	pcrypt->base.destroy = nv98_crypt_destroy;
-	pcrypt->base.init = nv98_crypt_init;
-	pcrypt->base.fini = nv98_crypt_fini;
+	priv->base.destroy = nv98_crypt_destroy;
+	priv->base.init = nv98_crypt_init;
+	priv->base.fini = nv98_crypt_fini;
+	priv->base.context_new = nv98_crypt_context_new;
+	priv->base.context_del = nv98_crypt_context_del;
+	priv->base.object_new = nv98_crypt_object_new;
+	priv->base.tlb_flush = nv98_crypt_tlb_flush;
+
+	nouveau_irq_register(dev, 14, nv98_crypt_isr);
 
-	NVOBJ_ENGINE_ADD(dev, CRYPT, &pcrypt->base);
+	NVOBJ_ENGINE_ADD(dev, CRYPT, &priv->base);
+	NVOBJ_CLASS(dev, 0x88b4, CRYPT);
 	return 0;
 }

+ 698 - 0
drivers/gpu/drm/nouveau/nv98_crypt.fuc

@@ -0,0 +1,698 @@
+/*
+ *  fuc microcode for nv98 pcrypt engine
+ *  Copyright (C) 2010  Marcin Kościelnicki
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ */
+
+.section #nv98_pcrypt_data
+
+ctx_dma:
+ctx_dma_query:		.b32 0
+ctx_dma_src:		.b32 0
+ctx_dma_dst:		.b32 0
+.equ #dma_count 3
+ctx_query_address_high:	.b32 0
+ctx_query_address_low:	.b32 0
+ctx_query_counter:	.b32 0
+ctx_cond_address_high:	.b32 0
+ctx_cond_address_low:	.b32 0
+ctx_cond_off:		.b32 0
+ctx_src_address_high:	.b32 0
+ctx_src_address_low:	.b32 0
+ctx_dst_address_high:	.b32 0
+ctx_dst_address_low:	.b32 0
+ctx_mode:		.b32 0
+.align 16
+ctx_key:		.skip 16
+ctx_iv:			.skip 16
+
+.align 0x80
+swap:
+.skip 32
+
+.align 8
+common_cmd_dtable:
+.b32 #ctx_query_address_high + 0x20000 ~0xff
+.b32 #ctx_query_address_low + 0x20000 ~0xfffffff0
+.b32 #ctx_query_counter + 0x20000 ~0xffffffff
+.b32 #cmd_query_get + 0x00000 ~1
+.b32 #ctx_cond_address_high + 0x20000 ~0xff
+.b32 #ctx_cond_address_low + 0x20000 ~0xfffffff0
+.b32 #cmd_cond_mode + 0x00000 ~7
+.b32 #cmd_wrcache_flush + 0x00000 ~0
+.equ #common_cmd_max 0x88
+
+
+.align 8
+engine_cmd_dtable:
+.b32 #ctx_key + 0x0 + 0x20000 ~0xffffffff
+.b32 #ctx_key + 0x4 + 0x20000 ~0xffffffff
+.b32 #ctx_key + 0x8 + 0x20000 ~0xffffffff
+.b32 #ctx_key + 0xc + 0x20000 ~0xffffffff
+.b32 #ctx_iv + 0x0 + 0x20000 ~0xffffffff
+.b32 #ctx_iv + 0x4 + 0x20000 ~0xffffffff
+.b32 #ctx_iv + 0x8 + 0x20000 ~0xffffffff
+.b32 #ctx_iv + 0xc + 0x20000 ~0xffffffff
+.b32 #ctx_src_address_high + 0x20000 ~0xff
+.b32 #ctx_src_address_low + 0x20000 ~0xfffffff0
+.b32 #ctx_dst_address_high + 0x20000 ~0xff
+.b32 #ctx_dst_address_low + 0x20000 ~0xfffffff0
+.b32 #crypt_cmd_mode + 0x00000 ~0xf
+.b32 #crypt_cmd_length + 0x10000 ~0x0ffffff0
+.equ #engine_cmd_max 0xce
+
+.align 4
+crypt_dtable:
+.b16 #crypt_copy_prep #crypt_do_inout
+.b16 #crypt_store_prep #crypt_do_out
+.b16 #crypt_ecb_e_prep #crypt_do_inout
+.b16 #crypt_ecb_d_prep #crypt_do_inout
+.b16 #crypt_cbc_e_prep #crypt_do_inout
+.b16 #crypt_cbc_d_prep #crypt_do_inout
+.b16 #crypt_pcbc_e_prep #crypt_do_inout
+.b16 #crypt_pcbc_d_prep #crypt_do_inout
+.b16 #crypt_cfb_e_prep #crypt_do_inout
+.b16 #crypt_cfb_d_prep #crypt_do_inout
+.b16 #crypt_ofb_prep #crypt_do_inout
+.b16 #crypt_ctr_prep #crypt_do_inout
+.b16 #crypt_cbc_mac_prep #crypt_do_in
+.b16 #crypt_cmac_finish_complete_prep #crypt_do_in
+.b16 #crypt_cmac_finish_partial_prep #crypt_do_in
+
+.align 0x100
+
+.section #nv98_pcrypt_code
+
+	// $r0 is always set to 0 in our code - this allows some space savings.
+	clear b32 $r0
+
+	// set up the interrupt handler
+	mov $r1 #ih
+	mov $iv0 $r1
+
+	// init stack pointer
+	mov $sp $r0
+
+	// set interrupt dispatch - route timer, fifo, ctxswitch to i0, others to host
+	movw $r1 0xfff0
+	sethi $r1 0
+	mov $r2 0x400
+	iowr I[$r2 + 0x300] $r1
+
+	// enable the interrupts
+	or $r1 0xc
+	iowr I[$r2] $r1
+
+	// enable fifo access and context switching
+	mov $r1 3
+	mov $r2 0x1200
+	iowr I[$r2] $r1
+
+	// enable i0 delivery
+	bset $flags ie0
+
+	// sleep forver, waking only for interrupts.
+	bset $flags $p0
+	spin:
+	sleep $p0
+	bra #spin
+
+// i0 handler
+ih:
+	// see which interrupts we got
+	iord $r1 I[$r0 + 0x200]
+
+	and $r2 $r1 0x8
+	cmpu b32 $r2 0
+	bra e #noctx
+
+		// context switch... prepare the regs for xfer
+		mov $r2 0x7700
+		mov $xtargets $r2
+		mov $xdbase $r0
+		// 128-byte context.
+		mov $r2 0
+		sethi $r2 0x50000
+
+		// read current channel
+		mov $r3 0x1400
+		iord $r4 I[$r3]
+		// if bit 30 set, it's active, so we have to unload it first.
+		shl b32 $r5 $r4 1
+		cmps b32 $r5 0
+		bra nc #ctxload
+
+			// unload the current channel - save the context
+			xdst $r0 $r2
+			xdwait
+			// and clear bit 30, then write back
+			bclr $r4 0x1e
+			iowr I[$r3] $r4
+			// tell PFIFO we unloaded
+			mov $r4 1
+			iowr I[$r3 + 0x200] $r4
+
+		bra #noctx
+
+		ctxload:
+			// no channel loaded - perhaps we're requested to load one
+			iord $r4 I[$r3 + 0x100]
+			shl b32 $r15 $r4 1
+			cmps b32 $r15 0
+			// if bit 30 of next channel not set, probably PFIFO is just
+			// killing a context. do a faux load, without the active bit.
+			bra nc #dummyload
+
+				// ok, do a real context load.
+				xdld $r0 $r2
+				xdwait
+				mov $r5 #ctx_dma
+				mov $r6 #dma_count - 1
+				ctxload_dma_loop:
+					ld b32 $r7 D[$r5 + $r6 * 4]
+					add b32 $r8 $r6 0x180
+					shl b32 $r8 8
+					iowr I[$r8] $r7
+					sub b32 $r6 1
+				bra nc #ctxload_dma_loop
+
+			dummyload:
+			// tell PFIFO we're done
+			mov $r5 2
+			iowr I[$r3 + 0x200] $r5
+
+	noctx:
+	and $r2 $r1 0x4
+	cmpu b32 $r2 0
+	bra e #nocmd
+
+		// incoming fifo command.
+		mov $r3 0x1900
+		iord $r2 I[$r3 + 0x100]
+		iord $r3 I[$r3]
+		// extract the method
+		and $r4 $r2 0x7ff
+		// shift the addr to proper position if we need to interrupt later
+		shl b32 $r2 0x10
+
+		// mthd 0 and 0x100 [NAME, NOP]: ignore
+		and $r5 $r4 0x7bf
+		cmpu b32 $r5 0
+		bra e #cmddone
+
+		mov $r5 #engine_cmd_dtable - 0xc0 * 8
+		mov $r6 #engine_cmd_max
+		cmpu b32 $r4 0xc0
+		bra nc #dtable_cmd
+		mov $r5 #common_cmd_dtable - 0x80 * 8
+		mov $r6 #common_cmd_max
+		cmpu b32 $r4 0x80
+		bra nc #dtable_cmd
+		cmpu b32 $r4 0x60
+		bra nc #dma_cmd
+		cmpu b32 $r4 0x50
+		bra ne #illegal_mthd
+
+			// mthd 0x140: PM_TRIGGER
+			mov $r2 0x2200
+			clear b32 $r3
+			sethi $r3 0x20000
+			iowr I[$r2] $r3
+			bra #cmddone
+
+		dma_cmd:
+			// mthd 0x180...: DMA_*
+			cmpu b32 $r4 0x60+#dma_count
+			bra nc #illegal_mthd
+			shl b32 $r5 $r4 2
+			add b32 $r5 (#ctx_dma - 0x60 * 4) & 0xffff
+			bset $r3 0x1e
+			st b32 D[$r5] $r3
+			add b32 $r4 0x180 - 0x60
+			shl b32 $r4 8
+			iowr I[$r4] $r3
+			bra #cmddone
+
+		dtable_cmd:
+			cmpu b32 $r4 $r6
+			bra nc #illegal_mthd
+			shl b32 $r4 3
+			add b32 $r4 $r5
+			ld b32 $r5 D[$r4 + 4]
+			and $r5 $r3
+			cmpu b32 $r5 0
+			bra ne #invalid_bitfield
+			ld b16 $r5 D[$r4]
+			ld b16 $r6 D[$r4 + 2]
+			cmpu b32 $r6 2
+			bra e #cmd_setctx
+			ld b32 $r7 D[$r0 + #ctx_cond_off]
+			and $r6 $r7
+			cmpu b32 $r6 1
+			bra e #cmddone
+			call $r5
+			bra $p1 #dispatch_error
+			bra #cmddone
+
+		cmd_setctx:
+			st b32 D[$r5] $r3
+			bra #cmddone
+
+
+		invalid_bitfield:
+			or $r2 1
+		dispatch_error:
+		illegal_mthd:
+			mov $r4 0x1000
+			iowr I[$r4] $r2
+			iowr I[$r4 + 0x100] $r3
+			mov $r4 0x40
+			iowr I[$r0] $r4
+
+			im_loop:
+				iord $r4 I[$r0 + 0x200]
+				and $r4 0x40
+				cmpu b32 $r4 0
+			bra ne #im_loop
+
+		cmddone:
+		// remove the command from FIFO
+		mov $r3 0x1d00
+		mov $r4 1
+		iowr I[$r3] $r4
+
+	nocmd:
+	// ack the processed interrupts
+	and $r1 $r1 0xc
+	iowr I[$r0 + 0x100] $r1
+iret
+
+cmd_query_get:
+	// if bit 0 of param set, trigger interrupt afterwards.
+	setp $p1 $r3
+	or $r2 3
+
+	// read PTIMER, beware of races...
+	mov $r4 0xb00
+	ptimer_retry:
+		iord $r6 I[$r4 + 0x100]
+		iord $r5 I[$r4]
+		iord $r7 I[$r4 + 0x100]
+		cmpu b32 $r6 $r7
+	bra ne #ptimer_retry
+
+	// prepare the query structure
+	ld b32 $r4 D[$r0 + #ctx_query_counter]
+	st b32 D[$r0 + #swap + 0x0] $r4
+	st b32 D[$r0 + #swap + 0x4] $r0
+	st b32 D[$r0 + #swap + 0x8] $r5
+	st b32 D[$r0 + #swap + 0xc] $r6
+
+	// will use target 0, DMA_QUERY.
+	mov $xtargets $r0
+
+	ld b32 $r4 D[$r0 + #ctx_query_address_high]
+	shl b32 $r4 0x18
+	mov $xdbase $r4
+
+	ld b32 $r4 D[$r0 + #ctx_query_address_low]
+	mov $r5 #swap
+	sethi $r5 0x20000
+	xdst $r4 $r5
+	xdwait
+
+	ret
+
+cmd_cond_mode:
+	// if >= 5, INVALID_ENUM
+	bset $flags $p1
+	or $r2 2
+	cmpu b32 $r3 5
+	bra nc #return
+
+	// otherwise, no error.
+	bclr $flags $p1
+
+	// if < 2, no QUERY object is involved
+	cmpu b32 $r3 2
+	bra nc #cmd_cond_mode_queryful
+
+		xor $r3 1
+		st b32 D[$r0 + #ctx_cond_off] $r3
+	return:
+		ret
+
+	cmd_cond_mode_queryful:
+	// ok, will need to pull a QUERY object, prepare offsets
+	ld b32 $r4 D[$r0 + #ctx_cond_address_high]
+	ld b32 $r5 D[$r0 + #ctx_cond_address_low]
+	and $r6 $r5 0xff
+	shr b32 $r5 8
+	shl b32 $r4 0x18
+	or $r4 $r5
+	mov $xdbase $r4
+	mov $xtargets $r0
+
+	// pull the first one
+	mov $r5 #swap
+	sethi $r5 0x20000
+	xdld $r6 $r5
+
+	// if == 2, only a single QUERY is involved...
+	cmpu b32 $r3 2
+	bra ne #cmd_cond_mode_double
+
+		xdwait
+		ld b32 $r4 D[$r0 + #swap + 4]
+		cmpu b32 $r4 0
+		xbit $r4 $flags z
+		st b32 D[$r0 + #ctx_cond_off] $r4
+		ret
+
+	// ok, we'll need to pull second one too
+	cmd_cond_mode_double:
+	add b32 $r6 0x10
+	add b32 $r5 0x10
+	xdld $r6 $r5
+	xdwait
+
+	// compare COUNTERs
+	ld b32 $r5 D[$r0 + #swap + 0x00]
+	ld b32 $r6 D[$r0 + #swap + 0x10]
+	cmpu b32 $r5 $r6
+	xbit $r4 $flags z
+
+	// compare RESen
+	ld b32 $r5 D[$r0 + #swap + 0x04]
+	ld b32 $r6 D[$r0 + #swap + 0x14]
+	cmpu b32 $r5 $r6
+	xbit $r5 $flags z
+	and $r4 $r5
+
+	// and negate or not, depending on mode
+	cmpu b32 $r3 3
+	xbit $r5 $flags z
+	xor $r4 $r5
+	st b32 D[$r0 + #ctx_cond_off] $r4
+	ret
+
+cmd_wrcache_flush:
+	bclr $flags $p1
+	mov $r2 0x2200
+	clear b32 $r3
+	sethi $r3 0x10000
+	iowr I[$r2] $r3
+	ret
+
+crypt_cmd_mode:
+	// if >= 0xf, INVALID_ENUM
+	bset $flags $p1
+	or $r2 2
+	cmpu b32 $r3 0xf
+	bra nc #crypt_cmd_mode_return
+
+		bclr $flags $p1
+		st b32 D[$r0 + #ctx_mode] $r3
+
+	crypt_cmd_mode_return:
+	ret
+
+crypt_cmd_length:
+	// nop if length == 0
+	cmpu b32 $r3 0
+	bra e #crypt_cmd_mode_return
+
+	// init key, IV
+	cxset 3
+	mov $r4 #ctx_key
+	sethi $r4 0x70000
+	xdst $r0 $r4
+	mov $r4 #ctx_iv
+	sethi $r4 0x60000
+	xdst $r0 $r4
+	xdwait
+	ckeyreg $c7
+
+	// prepare the targets
+	mov $r4 0x2100
+	mov $xtargets $r4
+
+	// prepare src address
+	ld b32 $r4 D[$r0 + #ctx_src_address_high]
+	ld b32 $r5 D[$r0 + #ctx_src_address_low]
+	shr b32 $r8 $r5 8
+	shl b32 $r4 0x18
+	or $r4 $r8
+	and $r5 $r5 0xff
+
+	// prepare dst address
+	ld b32 $r6 D[$r0 + #ctx_dst_address_high]
+	ld b32 $r7 D[$r0 + #ctx_dst_address_low]
+	shr b32 $r8 $r7 8
+	shl b32 $r6 0x18
+	or $r6 $r8
+	and $r7 $r7 0xff
+
+	// find the proper prep & do functions
+	ld b32 $r8 D[$r0 + #ctx_mode]
+	shl b32 $r8 2
+
+	// run prep
+	ld b16 $r9 D[$r8 + #crypt_dtable]
+	call $r9
+
+	// do it
+	ld b16 $r9 D[$r8 + #crypt_dtable + 2]
+	call $r9
+	cxset 1
+	xdwait
+	cxset 0x61
+	xdwait
+	xdwait
+
+	// update src address
+	shr b32 $r8 $r4 0x18
+	shl b32 $r9 $r4 8
+	add b32 $r9 $r5
+	adc b32 $r8 0
+	st b32 D[$r0 + #ctx_src_address_high] $r8
+	st b32 D[$r0 + #ctx_src_address_low] $r9
+
+	// update dst address
+	shr b32 $r8 $r6 0x18
+	shl b32 $r9 $r6 8
+	add b32 $r9 $r7
+	adc b32 $r8 0
+	st b32 D[$r0 + #ctx_dst_address_high] $r8
+	st b32 D[$r0 + #ctx_dst_address_low] $r9
+
+	// pull updated IV
+	cxset 2
+	mov $r4 #ctx_iv
+	sethi $r4 0x60000
+	xdld $r0 $r4
+	xdwait
+
+	ret
+
+
+crypt_copy_prep:
+	cs0begin 2
+		cxsin $c0
+		cxsout $c0
+	ret
+
+crypt_store_prep:
+	cs0begin 1
+		cxsout $c6
+	ret
+
+crypt_ecb_e_prep:
+	cs0begin 3
+		cxsin $c0
+		cenc $c0 $c0
+		cxsout $c0
+	ret
+
+crypt_ecb_d_prep:
+	ckexp $c7 $c7
+	cs0begin 3
+		cxsin $c0
+		cdec $c0 $c0
+		cxsout $c0
+	ret
+
+crypt_cbc_e_prep:
+	cs0begin 4
+		cxsin $c0
+		cxor $c6 $c0
+		cenc $c6 $c6
+		cxsout $c6
+	ret
+
+crypt_cbc_d_prep:
+	ckexp $c7 $c7
+	cs0begin 5
+		cmov $c2 $c6
+		cxsin $c6
+		cdec $c0 $c6
+		cxor $c0 $c2
+		cxsout $c0
+	ret
+
+crypt_pcbc_e_prep:
+	cs0begin 5
+		cxsin $c0
+		cxor $c6 $c0
+		cenc $c6 $c6
+		cxsout $c6
+		cxor $c6 $c0
+	ret
+
+crypt_pcbc_d_prep:
+	ckexp $c7 $c7
+	cs0begin 5
+		cxsin $c0
+		cdec $c1 $c0
+		cxor $c6 $c1
+		cxsout $c6
+		cxor $c6 $c0
+	ret
+
+crypt_cfb_e_prep:
+	cs0begin 4
+		cenc $c6 $c6
+		cxsin $c0
+		cxor $c6 $c0
+		cxsout $c6
+	ret
+
+crypt_cfb_d_prep:
+	cs0begin 4
+		cenc $c0 $c6
+		cxsin $c6
+		cxor $c0 $c6
+		cxsout $c0
+	ret
+
+crypt_ofb_prep:
+	cs0begin 4
+		cenc $c6 $c6
+		cxsin $c0
+		cxor $c0 $c6
+		cxsout $c0
+	ret
+
+crypt_ctr_prep:
+	cs0begin 5
+		cenc $c1 $c6
+		cadd $c6 1
+		cxsin $c0
+		cxor $c0 $c1
+		cxsout $c0
+	ret
+
+crypt_cbc_mac_prep:
+	cs0begin 3
+		cxsin $c0
+		cxor $c6 $c0
+		cenc $c6 $c6
+	ret
+
+crypt_cmac_finish_complete_prep:
+	cs0begin 7
+		cxsin $c0
+		cxor $c6 $c0
+		cxor $c0 $c0
+		cenc $c0 $c0
+		cprecmac $c0 $c0
+		cxor $c6 $c0
+		cenc $c6 $c6
+	ret
+
+crypt_cmac_finish_partial_prep:
+	cs0begin 8
+		cxsin $c0
+		cxor $c6 $c0
+		cxor $c0 $c0
+		cenc $c0 $c0
+		cprecmac $c0 $c0
+		cprecmac $c0 $c0
+		cxor $c6 $c0
+		cenc $c6 $c6
+	ret
+
+// TODO
+crypt_do_in:
+	add b32 $r3 $r5
+	mov $xdbase $r4
+	mov $r9 #swap
+	sethi $r9 0x20000
+	crypt_do_in_loop:
+		xdld $r5 $r9
+		xdwait
+		cxset 0x22
+		xdst $r0 $r9
+		cs0exec 1
+		xdwait
+		add b32 $r5 0x10
+		cmpu b32 $r5 $r3
+	bra ne #crypt_do_in_loop
+	cxset 1
+	xdwait
+	ret
+
+crypt_do_out:
+	add b32 $r3 $r7
+	mov $xdbase $r6
+	mov $r9 #swap
+	sethi $r9 0x20000
+	crypt_do_out_loop:
+		cs0exec 1
+		cxset 0x61
+		xdld $r7 $r9
+		xdst $r7 $r9
+		cxset 1
+		xdwait
+		add b32 $r7 0x10
+		cmpu b32 $r7 $r3
+	bra ne #crypt_do_out_loop
+	ret
+
+crypt_do_inout:
+	add b32 $r3 $r5
+	mov $r9 #swap
+	sethi $r9 0x20000
+	crypt_do_inout_loop:
+		mov $xdbase $r4
+		xdld $r5 $r9
+		xdwait
+		cxset 0x21
+		xdst $r0 $r9
+		cs0exec 1
+		cxset 0x61
+		mov $xdbase $r6
+		xdld $r7 $r9
+		xdst $r7 $r9
+		cxset 1
+		xdwait
+		add b32 $r5 0x10
+		add b32 $r7 0x10
+		cmpu b32 $r5 $r3
+	bra ne #crypt_do_inout_loop
+	ret
+
+.align 0x100

+ 584 - 0
drivers/gpu/drm/nouveau/nv98_crypt.fuc.h

@@ -0,0 +1,584 @@
+uint32_t nv98_pcrypt_data[] = {
+/* 0x0000: ctx_dma */
+/* 0x0000: ctx_dma_query */
+	0x00000000,
+/* 0x0004: ctx_dma_src */
+	0x00000000,
+/* 0x0008: ctx_dma_dst */
+	0x00000000,
+/* 0x000c: ctx_query_address_high */
+	0x00000000,
+/* 0x0010: ctx_query_address_low */
+	0x00000000,
+/* 0x0014: ctx_query_counter */
+	0x00000000,
+/* 0x0018: ctx_cond_address_high */
+	0x00000000,
+/* 0x001c: ctx_cond_address_low */
+	0x00000000,
+/* 0x0020: ctx_cond_off */
+	0x00000000,
+/* 0x0024: ctx_src_address_high */
+	0x00000000,
+/* 0x0028: ctx_src_address_low */
+	0x00000000,
+/* 0x002c: ctx_dst_address_high */
+	0x00000000,
+/* 0x0030: ctx_dst_address_low */
+	0x00000000,
+/* 0x0034: ctx_mode */
+	0x00000000,
+	0x00000000,
+	0x00000000,
+/* 0x0040: ctx_key */
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+/* 0x0050: ctx_iv */
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+/* 0x0080: swap */
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+/* 0x00a0: common_cmd_dtable */
+	0x0002000c,
+	0xffffff00,
+	0x00020010,
+	0x0000000f,
+	0x00020014,
+	0x00000000,
+	0x00000192,
+	0xfffffffe,
+	0x00020018,
+	0xffffff00,
+	0x0002001c,
+	0x0000000f,
+	0x000001d7,
+	0xfffffff8,
+	0x00000260,
+	0xffffffff,
+/* 0x00e0: engine_cmd_dtable */
+	0x00020040,
+	0x00000000,
+	0x00020044,
+	0x00000000,
+	0x00020048,
+	0x00000000,
+	0x0002004c,
+	0x00000000,
+	0x00020050,
+	0x00000000,
+	0x00020054,
+	0x00000000,
+	0x00020058,
+	0x00000000,
+	0x0002005c,
+	0x00000000,
+	0x00020024,
+	0xffffff00,
+	0x00020028,
+	0x0000000f,
+	0x0002002c,
+	0xffffff00,
+	0x00020030,
+	0x0000000f,
+	0x00000271,
+	0xfffffff0,
+	0x00010285,
+	0xf000000f,
+/* 0x0150: crypt_dtable */
+	0x04db0321,
+	0x04b1032f,
+	0x04db0339,
+	0x04db034b,
+	0x04db0361,
+	0x04db0377,
+	0x04db0395,
+	0x04db03af,
+	0x04db03cd,
+	0x04db03e3,
+	0x04db03f9,
+	0x04db040f,
+	0x04830429,
+	0x0483043b,
+	0x0483045d,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+};
+
+uint32_t nv98_pcrypt_code[] = {
+	0x17f004bd,
+	0x0010fe35,
+	0xf10004fe,
+	0xf0fff017,
+	0x27f10013,
+	0x21d00400,
+	0x0c15f0c0,
+	0xf00021d0,
+	0x27f10317,
+	0x21d01200,
+	0x1031f400,
+/* 0x002f: spin */
+	0xf40031f4,
+	0x0ef40028,
+/* 0x0035: ih */
+	0x8001cffd,
+	0xb00812c4,
+	0x0bf40024,
+	0x0027f167,
+	0x002bfe77,
+	0xf00007fe,
+	0x23f00027,
+	0x0037f105,
+	0x0034cf14,
+	0xb0014594,
+	0x18f40055,
+	0x0602fa17,
+	0x4af003f8,
+	0x0034d01e,
+	0xd00147f0,
+	0x0ef48034,
+/* 0x0075: ctxload */
+	0x4034cf33,
+	0xb0014f94,
+	0x18f400f5,
+	0x0502fa21,
+	0x57f003f8,
+	0x0267f000,
+/* 0x008c: ctxload_dma_loop */
+	0xa07856bc,
+	0xb6018068,
+	0x87d00884,
+	0x0162b600,
+/* 0x009f: dummyload */
+	0xf0f018f4,
+	0x35d00257,
+/* 0x00a5: noctx */
+	0x0412c480,
+	0xf50024b0,
+	0xf100df0b,
+	0xcf190037,
+	0x33cf4032,
+	0xff24e400,
+	0x1024b607,
+	0x07bf45e4,
+	0xf50054b0,
+	0xf100b90b,
+	0xf1fae057,
+	0xb000ce67,
+	0x18f4c044,
+	0xa057f14d,
+	0x8867f1fc,
+	0x8044b000,
+	0xb03f18f4,
+	0x18f46044,
+	0x5044b019,
+	0xf1741bf4,
+	0xbd220027,
+	0x0233f034,
+	0xf50023d0,
+/* 0x0103: dma_cmd */
+	0xb000810e,
+	0x18f46344,
+	0x0245945e,
+	0xfe8050b7,
+	0x801e39f0,
+	0x40b70053,
+	0x44b60120,
+	0x0043d008,
+/* 0x0123: dtable_cmd */
+	0xb8600ef4,
+	0x18f40446,
+	0x0344b63e,
+	0x980045bb,
+	0x53fd0145,
+	0x0054b004,
+	0x58291bf4,
+	0x46580045,
+	0x0264b001,
+	0x98170bf4,
+	0x67fd0807,
+	0x0164b004,
+	0xf9300bf4,
+	0x0f01f455,
+/* 0x015b: cmd_setctx */
+	0x80280ef4,
+	0x0ef40053,
+/* 0x0161: invalid_bitfield */
+	0x0125f022,
+/* 0x0164: dispatch_error */
+/* 0x0164: illegal_mthd */
+	0x100047f1,
+	0xd00042d0,
+	0x47f04043,
+	0x0004d040,
+/* 0x0174: im_loop */
+	0xf08004cf,
+	0x44b04044,
+	0xf71bf400,
+/* 0x0180: cmddone */
+	0x1d0037f1,
+	0xd00147f0,
+/* 0x018a: nocmd */
+	0x11c40034,
+	0x4001d00c,
+/* 0x0192: cmd_query_get */
+	0x38f201f8,
+	0x0325f001,
+	0x0b0047f1,
+/* 0x019c: ptimer_retry */
+	0xcf4046cf,
+	0x47cf0045,
+	0x0467b840,
+	0x98f41bf4,
+	0x04800504,
+	0x21008020,
+	0x80220580,
+	0x0bfe2306,
+	0x03049800,
+	0xfe1844b6,
+	0x04980047,
+	0x8057f104,
+	0x0253f000,
+	0xf80645fa,
+/* 0x01d7: cmd_cond_mode */
+	0xf400f803,
+	0x25f00131,
+	0x0534b002,
+	0xf41218f4,
+	0x34b00132,
+	0x0b18f402,
+	0x800136f0,
+/* 0x01f2: return */
+	0x00f80803,
+/* 0x01f4: cmd_cond_mode_queryful */
+	0x98060498,
+	0x56c40705,
+	0x0855b6ff,
+	0xfd1844b6,
+	0x47fe0545,
+	0x000bfe00,
+	0x008057f1,
+	0xfa0253f0,
+	0x34b00565,
+	0x131bf402,
+	0x049803f8,
+	0x0044b021,
+	0x800b4cf0,
+	0x00f80804,
+/* 0x022c: cmd_cond_mode_double */
+	0xb61060b6,
+	0x65fa1050,
+	0x9803f805,
+	0x06982005,
+	0x0456b824,
+	0x980b4cf0,
+	0x06982105,
+	0x0456b825,
+	0xfd0b5cf0,
+	0x34b00445,
+	0x0b5cf003,
+	0x800645fd,
+	0x00f80804,
+/* 0x0260: cmd_wrcache_flush */
+	0xf10132f4,
+	0xbd220027,
+	0x0133f034,
+	0xf80023d0,
+/* 0x0271: crypt_cmd_mode */
+	0x0131f400,
+	0xb00225f0,
+	0x18f40f34,
+	0x0132f409,
+/* 0x0283: crypt_cmd_mode_return */
+	0xf80d0380,
+/* 0x0285: crypt_cmd_length */
+	0x0034b000,
+	0xf4fb0bf4,
+	0x47f0033c,
+	0x0743f040,
+	0xf00604fa,
+	0x43f05047,
+	0x0604fa06,
+	0x3cf503f8,
+	0x47f1c407,
+	0x4bfe2100,
+	0x09049800,
+	0x950a0598,
+	0x44b60858,
+	0x0548fd18,
+	0x98ff55c4,
+	0x07980b06,
+	0x0878950c,
+	0xfd1864b6,
+	0x77c40568,
+	0x0d0898ff,
+	0x580284b6,
+	0x95f9a889,
+	0xf9a98958,
+	0x013cf495,
+	0x3cf403f8,
+	0xf803f861,
+	0x18489503,
+	0xbb084994,
+	0x81b60095,
+	0x09088000,
+	0x950a0980,
+	0x69941868,
+	0x0097bb08,
+	0x800081b6,
+	0x09800b08,
+	0x023cf40c,
+	0xf05047f0,
+	0x04fa0643,
+	0xf803f805,
+/* 0x0321: crypt_copy_prep */
+	0x203cf500,
+	0x003cf594,
+	0x003cf588,
+/* 0x032f: crypt_store_prep */
+	0xf500f88c,
+	0xf594103c,
+	0xf88c063c,
+/* 0x0339: crypt_ecb_e_prep */
+	0x303cf500,
+	0x003cf594,
+	0x003cf588,
+	0x003cf5d0,
+/* 0x034b: crypt_ecb_d_prep */
+	0xf500f88c,
+	0xf5c8773c,
+	0xf594303c,
+	0xf588003c,
+	0xf5d4003c,
+	0xf88c003c,
+/* 0x0361: crypt_cbc_e_prep */
+	0x403cf500,
+	0x003cf594,
+	0x063cf588,
+	0x663cf5ac,
+	0x063cf5d0,
+/* 0x0377: crypt_cbc_d_prep */
+	0xf500f88c,
+	0xf5c8773c,
+	0xf594503c,
+	0xf584623c,
+	0xf588063c,
+	0xf5d4603c,
+	0xf5ac203c,
+	0xf88c003c,
+/* 0x0395: crypt_pcbc_e_prep */
+	0x503cf500,
+	0x003cf594,
+	0x063cf588,
+	0x663cf5ac,
+	0x063cf5d0,
+	0x063cf58c,
+/* 0x03af: crypt_pcbc_d_prep */
+	0xf500f8ac,
+	0xf5c8773c,
+	0xf594503c,
+	0xf588003c,
+	0xf5d4013c,
+	0xf5ac163c,
+	0xf58c063c,
+	0xf8ac063c,
+/* 0x03cd: crypt_cfb_e_prep */
+	0x403cf500,
+	0x663cf594,
+	0x003cf5d0,
+	0x063cf588,
+	0x063cf5ac,
+/* 0x03e3: crypt_cfb_d_prep */
+	0xf500f88c,
+	0xf594403c,
+	0xf5d0603c,
+	0xf588063c,
+	0xf5ac603c,
+	0xf88c003c,
+/* 0x03f9: crypt_ofb_prep */
+	0x403cf500,
+	0x663cf594,
+	0x003cf5d0,
+	0x603cf588,
+	0x003cf5ac,
+/* 0x040f: crypt_ctr_prep */
+	0xf500f88c,
+	0xf594503c,
+	0xf5d0613c,
+	0xf5b0163c,
+	0xf588003c,
+	0xf5ac103c,
+	0xf88c003c,
+/* 0x0429: crypt_cbc_mac_prep */
+	0x303cf500,
+	0x003cf594,
+	0x063cf588,
+	0x663cf5ac,
+/* 0x043b: crypt_cmac_finish_complete_prep */
+	0xf500f8d0,
+	0xf594703c,
+	0xf588003c,
+	0xf5ac063c,
+	0xf5ac003c,
+	0xf5d0003c,
+	0xf5bc003c,
+	0xf5ac063c,
+	0xf8d0663c,
+/* 0x045d: crypt_cmac_finish_partial_prep */
+	0x803cf500,
+	0x003cf594,
+	0x063cf588,
+	0x003cf5ac,
+	0x003cf5ac,
+	0x003cf5d0,
+	0x003cf5bc,
+	0x063cf5bc,
+	0x663cf5ac,
+/* 0x0483: crypt_do_in */
+	0xbb00f8d0,
+	0x47fe0035,
+	0x8097f100,
+	0x0293f000,
+/* 0x0490: crypt_do_in_loop */
+	0xf80559fa,
+	0x223cf403,
+	0xf50609fa,
+	0xf898103c,
+	0x1050b603,
+	0xf40453b8,
+	0x3cf4e91b,
+	0xf803f801,
+/* 0x04b1: crypt_do_out */
+	0x0037bb00,
+	0xf10067fe,
+	0xf0008097,
+/* 0x04be: crypt_do_out_loop */
+	0x3cf50293,
+	0x3cf49810,
+	0x0579fa61,
+	0xf40679fa,
+	0x03f8013c,
+	0xb81070b6,
+	0x1bf40473,
+/* 0x04db: crypt_do_inout */
+	0xbb00f8e8,
+	0x97f10035,
+	0x93f00080,
+/* 0x04e5: crypt_do_inout_loop */
+	0x0047fe02,
+	0xf80559fa,
+	0x213cf403,
+	0xf50609fa,
+	0xf498103c,
+	0x67fe613c,
+	0x0579fa00,
+	0xf40679fa,
+	0x03f8013c,
+	0xb61050b6,
+	0x53b81070,
+	0xd41bf404,
+	0x000000f8,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+};