nvc0_grhub.fuc 18 KB


  1. /* fuc microcode for nvc0 PGRAPH/HUB
  2. *
  3. * Copyright 2011 Red Hat Inc.
  4. *
  5. * Permission is hereby granted, free of charge, to any person obtaining a
  6. * copy of this software and associated documentation files (the "Software"),
  7. * to deal in the Software without restriction, including without limitation
  8. * the rights to use, copy, modify, merge, publish, distribute, sublicense,
  9. * and/or sell copies of the Software, and to permit persons to whom the
  10. * Software is furnished to do so, subject to the following conditions:
  11. *
  12. * The above copyright notice and this permission notice shall be included in
  13. * all copies or substantial portions of the Software.
  14. *
  15. * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16. * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17. * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
  18. * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
  19. * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
  20. * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
  21. * OTHER DEALINGS IN THE SOFTWARE.
  22. *
  23. * Authors: Ben Skeggs
  24. */
  25. /* To build:
  26. * m4 nvc0_grhub.fuc | envyas -a -w -m fuc -V nva3 -o nvc0_grhub.fuc.h
  27. */
  28. .section nvc0_grhub_data
  29. include(`nvc0_graph.fuc')
  30. gpc_count: .b32 0
  31. rop_count: .b32 0
  32. cmd_queue: queue_init
  33. hub_mmio_list_head: .b32 0
  34. hub_mmio_list_tail: .b32 0
  35. ctx_current: .b32 0
  36. chipsets:
  37. .b8 0xc0 0 0 0
  38. .b16 nvc0_hub_mmio_head
  39. .b16 nvc0_hub_mmio_tail
  40. .b8 0xc1 0 0 0
  41. .b16 nvc0_hub_mmio_head
  42. .b16 nvc1_hub_mmio_tail
  43. .b8 0xc3 0 0 0
  44. .b16 nvc0_hub_mmio_head
  45. .b16 nvc0_hub_mmio_tail
  46. .b8 0xc4 0 0 0
  47. .b16 nvc0_hub_mmio_head
  48. .b16 nvc0_hub_mmio_tail
  49. .b8 0xc8 0 0 0
  50. .b16 nvc0_hub_mmio_head
  51. .b16 nvc0_hub_mmio_tail
  52. .b8 0xce 0 0 0
  53. .b16 nvc0_hub_mmio_head
  54. .b16 nvc0_hub_mmio_tail
  55. .b8 0xcf 0 0 0
  56. .b16 nvc0_hub_mmio_head
  57. .b16 nvc0_hub_mmio_tail
  58. .b8 0 0 0 0
  59. nvc0_hub_mmio_head:
  60. mmctx_data(0x17e91c, 2)
  61. mmctx_data(0x400204, 2)
  62. mmctx_data(0x404004, 11)
  63. mmctx_data(0x404044, 1)
  64. mmctx_data(0x404094, 14)
  65. mmctx_data(0x4040d0, 7)
  66. mmctx_data(0x4040f8, 1)
  67. mmctx_data(0x404130, 3)
  68. mmctx_data(0x404150, 3)
  69. mmctx_data(0x404164, 2)
  70. mmctx_data(0x404174, 3)
  71. mmctx_data(0x404200, 8)
  72. mmctx_data(0x404404, 14)
  73. mmctx_data(0x404460, 4)
  74. mmctx_data(0x404480, 1)
  75. mmctx_data(0x404498, 1)
  76. mmctx_data(0x404604, 4)
  77. mmctx_data(0x404618, 32)
  78. mmctx_data(0x404698, 21)
  79. mmctx_data(0x4046f0, 2)
  80. mmctx_data(0x404700, 22)
  81. mmctx_data(0x405800, 1)
  82. mmctx_data(0x405830, 3)
  83. mmctx_data(0x405854, 1)
  84. mmctx_data(0x405870, 4)
  85. mmctx_data(0x405a00, 2)
  86. mmctx_data(0x405a18, 1)
  87. mmctx_data(0x406020, 1)
  88. mmctx_data(0x406028, 4)
  89. mmctx_data(0x4064a8, 2)
  90. mmctx_data(0x4064b4, 2)
  91. mmctx_data(0x407804, 1)
  92. mmctx_data(0x40780c, 6)
  93. mmctx_data(0x4078bc, 1)
  94. mmctx_data(0x408000, 7)
  95. mmctx_data(0x408064, 1)
  96. mmctx_data(0x408800, 3)
  97. mmctx_data(0x408900, 4)
  98. mmctx_data(0x408980, 1)
  99. nvc0_hub_mmio_tail:
  100. mmctx_data(0x4064c0, 2)
  101. nvc1_hub_mmio_tail:
  102. .align 256
  103. chan_data:
  104. chan_mmio_count: .b32 0
  105. chan_mmio_address: .b32 0
  106. .align 256
  107. xfer_data: .b32 0
  108. .section nvc0_grhub_code
  109. bra init
  110. define(`include_code')
  111. include(`nvc0_graph.fuc')
  112. // reports an exception to the host
  113. //
  114. // In: $r15 error code (see nvc0_graph.fuc)
  115. //
  116. error:
  117. push $r14
  118. mov $r14 0x814
  119. shl b32 $r14 6
  120. iowr I[$r14 + 0x000] $r15 // CC_SCRATCH[5] = error code
  121. mov $r14 0xc1c
  122. shl b32 $r14 6
  123. mov $r15 1
  124. iowr I[$r14 + 0x000] $r15 // INTR_UP_SET
  125. pop $r14
  126. ret
  127. // HUB fuc initialisation, executed by triggering ucode start, will
  128. // fall through to main loop after completion.
  129. //
  130. // Input:
  131. // CC_SCRATCH[0]: chipset (PMC_BOOT_0 read returns 0x0bad0bad... sigh)
  132. //
  133. // Output:
  134. // CC_SCRATCH[0]:
  135. // 31:31: set to signal completion
  136. // CC_SCRATCH[1]:
  137. // 31:0: total PGRAPH context size
  138. //
  139. init:
  140. clear b32 $r0
  141. mov $sp $r0
  142. mov $xdbase $r0
  143. // enable fifo access
  144. mov $r1 0x1200
  145. mov $r2 2
  146. iowr I[$r1 + 0x000] $r2 // FIFO_ENABLE
  147. // setup i0 handler, and route all interrupts to it
  148. mov $r1 ih
  149. mov $iv0 $r1
  150. mov $r1 0x400
  151. iowr I[$r1 + 0x300] $r0 // INTR_DISPATCH
  152. // route HUB_CHANNEL_SWITCH to fuc interrupt 8
  153. mov $r3 0x404
  154. shl b32 $r3 6
  155. mov $r2 0x2003 // { HUB_CHANNEL_SWITCH, ZERO } -> intr 8
  156. iowr I[$r3 + 0x000] $r2
  157. // not sure what these are, route them because NVIDIA does, and
  158. // the IRQ handler will signal the host if we ever get one.. we
  159. // may find out if/why we need to handle these if so..
  160. //
  161. mov $r2 0x2004
  162. iowr I[$r3 + 0x004] $r2 // { 0x04, ZERO } -> intr 9
  163. mov $r2 0x200b
  164. iowr I[$r3 + 0x008] $r2 // { 0x0b, ZERO } -> intr 10
  165. mov $r2 0x200c
  166. iowr I[$r3 + 0x01c] $r2 // { 0x0c, ZERO } -> intr 15
  167. // enable all INTR_UP interrupts
  168. mov $r2 0xc24
  169. shl b32 $r2 6
  170. not b32 $r3 $r0
  171. iowr I[$r2] $r3
  172. // enable fifo, ctxsw, 9, 10, 15 interrupts
  173. mov $r2 -0x78fc // 0x8704
  174. sethi $r2 0
  175. iowr I[$r1 + 0x000] $r2 // INTR_EN_SET
  176. // fifo level triggered, rest edge
  177. sub b32 $r1 0x100
  178. mov $r2 4
  179. iowr I[$r1] $r2
  180. // enable interrupts
  181. bset $flags ie0
  182. // fetch enabled GPC/ROP counts
  183. mov $r14 -0x69fc // 0x409604
  184. sethi $r14 0x400000
  185. call nv_rd32
  186. extr $r1 $r15 16:20
  187. st b32 D[$r0 + rop_count] $r1
  188. and $r15 0x1f
  189. st b32 D[$r0 + gpc_count] $r15
  190. // set BAR_REQMASK to GPC mask
  191. mov $r1 1
  192. shl b32 $r1 $r15
  193. sub b32 $r1 1
  194. mov $r2 0x40c
  195. shl b32 $r2 6
  196. iowr I[$r2 + 0x000] $r1
  197. iowr I[$r2 + 0x100] $r1
  198. // find context data for this chipset
  199. mov $r2 0x800
  200. shl b32 $r2 6
  201. iord $r2 I[$r2 + 0x000] // CC_SCRATCH[0]
  202. mov $r15 chipsets - 8
  203. init_find_chipset:
  204. add b32 $r15 8
  205. ld b32 $r3 D[$r15 + 0x00]
  206. cmpu b32 $r3 $r2
  207. bra e init_context
  208. cmpu b32 $r3 0
  209. bra ne init_find_chipset
  210. // unknown chipset
  211. ret
  212. // context size calculation, reserve first 256 bytes for use by fuc
  213. init_context:
  214. mov $r1 256
  215. // calculate size of mmio context data
  216. ld b16 $r14 D[$r15 + 4]
  217. ld b16 $r15 D[$r15 + 6]
  218. sethi $r14 0
  219. st b32 D[$r0 + hub_mmio_list_head] $r14
  220. st b32 D[$r0 + hub_mmio_list_tail] $r15
  221. call mmctx_size
  222. // set mmctx base addresses now so we don't have to do it later,
  223. // they don't (currently) ever change
  224. mov $r3 0x700
  225. shl b32 $r3 6
  226. shr b32 $r4 $r1 8
  227. iowr I[$r3 + 0x000] $r4 // MMCTX_SAVE_SWBASE
  228. iowr I[$r3 + 0x100] $r4 // MMCTX_LOAD_SWBASE
  229. add b32 $r3 0x1300
  230. add b32 $r1 $r15
  231. shr b32 $r15 2
  232. iowr I[$r3 + 0x000] $r15 // MMCTX_LOAD_COUNT, wtf for?!?
  233. // strands, base offset needs to be aligned to 256 bytes
  234. shr b32 $r1 8
  235. add b32 $r1 1
  236. shl b32 $r1 8
  237. mov b32 $r15 $r1
  238. call strand_ctx_init
  239. add b32 $r1 $r15
  240. // initialise each GPC in sequence by passing in the offset of its
  241. // context data in GPCn_CC_SCRATCH[1], and starting its FUC (which
  242. // has previously been uploaded by the host) running.
  243. //
  244. // the GPC fuc init sequence will set GPCn_CC_SCRATCH[0] bit 31
  245. // when it has completed, and return the size of its context data
  246. // in GPCn_CC_SCRATCH[1]
  247. //
  248. ld b32 $r3 D[$r0 + gpc_count]
  249. mov $r4 0x2000
  250. sethi $r4 0x500000
  251. init_gpc:
  252. // setup, and start GPC ucode running
  253. add b32 $r14 $r4 0x804
  254. mov b32 $r15 $r1
  255. call nv_wr32 // CC_SCRATCH[1] = ctx offset
  256. add b32 $r14 $r4 0x800
  257. mov b32 $r15 $r2
  258. call nv_wr32 // CC_SCRATCH[0] = chipset
  259. add b32 $r14 $r4 0x10c
  260. clear b32 $r15
  261. call nv_wr32
  262. add b32 $r14 $r4 0x104
  263. call nv_wr32 // ENTRY
  264. add b32 $r14 $r4 0x100
  265. mov $r15 2 // CTRL_START_TRIGGER
  266. call nv_wr32 // CTRL
  267. // wait for it to complete, and adjust context size
  268. add b32 $r14 $r4 0x800
  269. init_gpc_wait:
  270. call nv_rd32
  271. xbit $r15 $r15 31
  272. bra e init_gpc_wait
  273. add b32 $r14 $r4 0x804
  274. call nv_rd32
  275. add b32 $r1 $r15
  276. // next!
  277. add b32 $r4 0x8000
  278. sub b32 $r3 1
  279. bra ne init_gpc
  280. // save context size, and tell host we're ready
  281. mov $r2 0x800
  282. shl b32 $r2 6
  283. iowr I[$r2 + 0x100] $r1 // CC_SCRATCH[1] = context size
  284. add b32 $r2 0x800
  285. clear b32 $r1
  286. bset $r1 31
  287. iowr I[$r2 + 0x000] $r1 // CC_SCRATCH[0] |= 0x80000000
  288. // Main program loop, very simple, sleeps until woken up by the interrupt
  289. // handler, pulls a command from the queue and executes its handler
  290. //
  291. main:
  292. // sleep until we have something to do
  293. bset $flags $p0
  294. sleep $p0
  295. mov $r13 cmd_queue
  296. call queue_get
  297. bra $p1 main
  298. // context switch, requested by GPU?
  299. cmpu b32 $r14 0x4001
  300. bra ne main_not_ctx_switch
  301. trace_set(T_AUTO)
  302. mov $r1 0xb00
  303. shl b32 $r1 6
  304. iord $r2 I[$r1 + 0x100] // CHAN_NEXT
  305. iord $r1 I[$r1 + 0x000] // CHAN_CUR
  306. xbit $r3 $r1 31
  307. bra e chsw_no_prev
  308. xbit $r3 $r2 31
  309. bra e chsw_prev_no_next
  310. push $r2
  311. mov b32 $r2 $r1
  312. trace_set(T_SAVE)
  313. bclr $flags $p1
  314. bset $flags $p2
  315. call ctx_xfer
  316. trace_clr(T_SAVE);
  317. pop $r2
  318. trace_set(T_LOAD);
  319. bset $flags $p1
  320. call ctx_xfer
  321. trace_clr(T_LOAD);
  322. bra chsw_done
  323. chsw_prev_no_next:
  324. push $r2
  325. mov b32 $r2 $r1
  326. bclr $flags $p1
  327. bclr $flags $p2
  328. call ctx_xfer
  329. pop $r2
  330. mov $r1 0xb00
  331. shl b32 $r1 6
  332. iowr I[$r1] $r2
  333. bra chsw_done
  334. chsw_no_prev:
  335. xbit $r3 $r2 31
  336. bra e chsw_done
  337. bset $flags $p1
  338. bclr $flags $p2
  339. call ctx_xfer
  340. // ack the context switch request
  341. chsw_done:
  342. mov $r1 0xb0c
  343. shl b32 $r1 6
  344. mov $r2 1
  345. iowr I[$r1 + 0x000] $r2 // 0x409b0c
  346. trace_clr(T_AUTO)
  347. bra main
  348. // request to set current channel? (*not* a context switch)
  349. main_not_ctx_switch:
  350. cmpu b32 $r14 0x0001
  351. bra ne main_not_ctx_chan
  352. mov b32 $r2 $r15
  353. call ctx_chan
  354. bra main_done
  355. // request to store current channel context?
  356. main_not_ctx_chan:
  357. cmpu b32 $r14 0x0002
  358. bra ne main_not_ctx_save
  359. trace_set(T_SAVE)
  360. bclr $flags $p1
  361. bclr $flags $p2
  362. call ctx_xfer
  363. trace_clr(T_SAVE)
  364. bra main_done
  365. main_not_ctx_save:
  366. shl b32 $r15 $r14 16
  367. or $r15 E_BAD_COMMAND
  368. call error
  369. bra main
  370. main_done:
  371. mov $r1 0x820
  372. shl b32 $r1 6
  373. clear b32 $r2
  374. bset $r2 31
  375. iowr I[$r1 + 0x000] $r2 // CC_SCRATCH[0] |= 0x80000000
  376. bra main
  377. // interrupt handler
  378. ih:
  379. push $r8
  380. mov $r8 $flags
  381. push $r8
  382. push $r9
  383. push $r10
  384. push $r11
  385. push $r13
  386. push $r14
  387. push $r15
  388. // incoming fifo command?
  389. iord $r10 I[$r0 + 0x200] // INTR
  390. and $r11 $r10 0x00000004
  391. bra e ih_no_fifo
  392. // queue incoming fifo command for later processing
  393. mov $r11 0x1900
  394. mov $r13 cmd_queue
  395. iord $r14 I[$r11 + 0x100] // FIFO_CMD
  396. iord $r15 I[$r11 + 0x000] // FIFO_DATA
  397. call queue_put
  398. add b32 $r11 0x400
  399. mov $r14 1
  400. iowr I[$r11 + 0x000] $r14 // FIFO_ACK
  401. // context switch request?
  402. ih_no_fifo:
  403. and $r11 $r10 0x00000100
  404. bra e ih_no_ctxsw
  405. // enqueue a context switch for later processing
  406. mov $r13 cmd_queue
  407. mov $r14 0x4001
  408. call queue_put
  409. // anything we didn't handle, bring it to the host's attention
  410. ih_no_ctxsw:
  411. mov $r11 0x104
  412. not b32 $r11
  413. and $r11 $r10 $r11
  414. bra e ih_no_other
  415. mov $r10 0xc1c
  416. shl b32 $r10 6
  417. iowr I[$r10] $r11 // INTR_UP_SET
  418. // ack, and wake up main()
  419. ih_no_other:
  420. iowr I[$r0 + 0x100] $r10 // INTR_ACK
  421. pop $r15
  422. pop $r14
  423. pop $r13
  424. pop $r11
  425. pop $r10
  426. pop $r9
  427. pop $r8
  428. mov $flags $r8
  429. pop $r8
  430. bclr $flags $p0
  431. iret
  432. // Not real sure, but, MEM_CMD 7 will hang forever if this isn't done
  433. ctx_4160s:
  434. mov $r14 0x4160
  435. sethi $r14 0x400000
  436. mov $r15 1
  437. call nv_wr32
  438. ctx_4160s_wait:
  439. call nv_rd32
  440. xbit $r15 $r15 4
  441. bra e ctx_4160s_wait
  442. ret
  443. // Without clearing again at end of xfer, some things cause PGRAPH
  444. // to hang with STATUS=0x00000007 until it's cleared.. fbcon can
  445. // still function with it set however...
  446. ctx_4160c:
  447. mov $r14 0x4160
  448. sethi $r14 0x400000
  449. clear b32 $r15
  450. call nv_wr32
  451. ret
  452. // Again, not real sure
  453. //
  454. // In: $r15 value to set 0x404170 to
  455. //
  456. ctx_4170s:
  457. mov $r14 0x4170
  458. sethi $r14 0x400000
  459. or $r15 0x10
  460. call nv_wr32
  461. ret
  462. // Waits for a ctx_4170s() call to complete
  463. //
  464. ctx_4170w:
  465. mov $r14 0x4170
  466. sethi $r14 0x400000
  467. call nv_rd32
  468. and $r15 0x10
  469. bra ne ctx_4170w
  470. ret
  471. // Disables various things, waits a bit, and re-enables them..
  472. //
  473. // Not sure how exactly this helps, perhaps "ENABLE" is not such a
  474. // good description for the bits we turn off? Anyways, without this,
  475. // funny things happen.
  476. //
  477. ctx_redswitch:
  478. mov $r14 0x614
  479. shl b32 $r14 6
  480. mov $r15 0x270
  481. iowr I[$r14] $r15 // HUB_RED_SWITCH = ENABLE_GPC, POWER_ALL
  482. mov $r15 8
  483. ctx_redswitch_delay:
  484. sub b32 $r15 1
  485. bra ne ctx_redswitch_delay
  486. mov $r15 0x770
  487. iowr I[$r14] $r15 // HUB_RED_SWITCH = ENABLE_ALL, POWER_ALL
  488. ret
  489. // Not a clue what this is for, except that unless the value is 0x10, the
  490. // strand context is saved (and presumably restored) incorrectly..
  491. //
  492. // In: $r15 value to set to (0x00/0x10 are used)
  493. //
  494. ctx_86c:
  495. mov $r14 0x86c
  496. shl b32 $r14 6
  497. iowr I[$r14] $r15 // HUB(0x86c) = val
  498. mov $r14 -0x75ec
  499. sethi $r14 0x400000
  500. call nv_wr32 // ROP(0xa14) = val
  501. mov $r14 -0x5794
  502. sethi $r14 0x410000
  503. call nv_wr32 // GPC(0x86c) = val
  504. ret
  505. // ctx_load - load's a channel's ctxctl data, and selects its vm
  506. //
  507. // In: $r2 channel address
  508. //
  509. ctx_load:
  510. trace_set(T_CHAN)
  511. // switch to channel, somewhat magic in parts..
  512. mov $r10 12 // DONE_UNK12
  513. call wait_donez
  514. mov $r1 0xa24
  515. shl b32 $r1 6
  516. iowr I[$r1 + 0x000] $r0 // 0x409a24
  517. mov $r3 0xb00
  518. shl b32 $r3 6
  519. iowr I[$r3 + 0x100] $r2 // CHAN_NEXT
  520. mov $r1 0xa0c
  521. shl b32 $r1 6
  522. mov $r4 7
  523. iowr I[$r1 + 0x000] $r2 // MEM_CHAN
  524. iowr I[$r1 + 0x100] $r4 // MEM_CMD
  525. ctx_chan_wait_0:
  526. iord $r4 I[$r1 + 0x100]
  527. and $r4 0x1f
  528. bra ne ctx_chan_wait_0
  529. iowr I[$r3 + 0x000] $r2 // CHAN_CUR
  530. // load channel header, fetch PGRAPH context pointer
  531. mov $xtargets $r0
  532. bclr $r2 31
  533. shl b32 $r2 4
  534. add b32 $r2 2
  535. trace_set(T_LCHAN)
  536. mov $r1 0xa04
  537. shl b32 $r1 6
  538. iowr I[$r1 + 0x000] $r2 // MEM_BASE
  539. mov $r1 0xa20
  540. shl b32 $r1 6
  541. mov $r2 0x0002
  542. sethi $r2 0x80000000
  543. iowr I[$r1 + 0x000] $r2 // MEM_TARGET = vram
  544. mov $r1 0x10 // chan + 0x0210
  545. mov $r2 xfer_data
  546. sethi $r2 0x00020000 // 16 bytes
  547. xdld $r1 $r2
  548. xdwait
  549. trace_clr(T_LCHAN)
  550. // update current context
  551. ld b32 $r1 D[$r0 + xfer_data + 4]
  552. shl b32 $r1 24
  553. ld b32 $r2 D[$r0 + xfer_data + 0]
  554. shr b32 $r2 8
  555. or $r1 $r2
  556. st b32 D[$r0 + ctx_current] $r1
  557. // set transfer base to start of context, and fetch context header
  558. trace_set(T_LCTXH)
  559. mov $r2 0xa04
  560. shl b32 $r2 6
  561. iowr I[$r2 + 0x000] $r1 // MEM_BASE
  562. mov $r2 1
  563. mov $r1 0xa20
  564. shl b32 $r1 6
  565. iowr I[$r1 + 0x000] $r2 // MEM_TARGET = vm
  566. mov $r1 chan_data
  567. sethi $r1 0x00060000 // 256 bytes
  568. xdld $r0 $r1
  569. xdwait
  570. trace_clr(T_LCTXH)
  571. trace_clr(T_CHAN)
  572. ret
  573. // ctx_chan - handler for HUB_SET_CHAN command, will set a channel as
  574. // the active channel for ctxctl, but not actually transfer
  575. // any context data. intended for use only during initial
  576. // context construction.
  577. //
  578. // In: $r2 channel address
  579. //
  580. ctx_chan:
  581. call ctx_4160s
  582. call ctx_load
  583. mov $r10 12 // DONE_UNK12
  584. call wait_donez
  585. mov $r1 0xa10
  586. shl b32 $r1 6
  587. mov $r2 5
  588. iowr I[$r1 + 0x000] $r2 // MEM_CMD = 5 (???)
  589. ctx_chan_wait:
  590. iord $r2 I[$r1 + 0x000]
  591. or $r2 $r2
  592. bra ne ctx_chan_wait
  593. call ctx_4160c
  594. ret
  595. // Execute per-context state overrides list
  596. //
  597. // Only executed on the first load of a channel. Might want to look into
  598. // removing this and having the host directly modify the channel's context
  599. // to change this state... The nouveau DRM already builds this list as
  600. // it's definitely needed for NVIDIA's, so we may as well use it for now
  601. //
  602. // Input: $r1 mmio list length
  603. //
  604. ctx_mmio_exec:
  605. // set transfer base to be the mmio list
  606. ld b32 $r3 D[$r0 + chan_mmio_address]
  607. mov $r2 0xa04
  608. shl b32 $r2 6
  609. iowr I[$r2 + 0x000] $r3 // MEM_BASE
  610. clear b32 $r3
  611. ctx_mmio_loop:
  612. // fetch next 256 bytes of mmio list if necessary
  613. and $r4 $r3 0xff
  614. bra ne ctx_mmio_pull
  615. mov $r5 xfer_data
  616. sethi $r5 0x00060000 // 256 bytes
  617. xdld $r3 $r5
  618. xdwait
  619. // execute a single list entry
  620. ctx_mmio_pull:
  621. ld b32 $r14 D[$r4 + xfer_data + 0x00]
  622. ld b32 $r15 D[$r4 + xfer_data + 0x04]
  623. call nv_wr32
  624. // next!
  625. add b32 $r3 8
  626. sub b32 $r1 1
  627. bra ne ctx_mmio_loop
  628. // set transfer base back to the current context
  629. ctx_mmio_done:
  630. ld b32 $r3 D[$r0 + ctx_current]
  631. iowr I[$r2 + 0x000] $r3 // MEM_BASE
  632. // disable the mmio list now, we don't need/want to execute it again
  633. st b32 D[$r0 + chan_mmio_count] $r0
  634. mov $r1 chan_data
  635. sethi $r1 0x00060000 // 256 bytes
  636. xdst $r0 $r1
  637. xdwait
  638. ret
  639. // Transfer HUB context data between GPU and storage area
  640. //
  641. // In: $r2 channel address
  642. // $p1 clear on save, set on load
  643. // $p2 set if opposite direction done/will be done, so:
  644. // on save it means: "a load will follow this save"
  645. // on load it means: "a save preceeded this load"
  646. //
  647. ctx_xfer:
  648. bra not $p1 ctx_xfer_pre
  649. bra $p2 ctx_xfer_pre_load
  650. ctx_xfer_pre:
  651. mov $r15 0x10
  652. call ctx_86c
  653. call ctx_4160s
  654. bra not $p1 ctx_xfer_exec
  655. ctx_xfer_pre_load:
  656. mov $r15 2
  657. call ctx_4170s
  658. call ctx_4170w
  659. call ctx_redswitch
  660. clear b32 $r15
  661. call ctx_4170s
  662. call ctx_load
  663. // fetch context pointer, and initiate xfer on all GPCs
  664. ctx_xfer_exec:
  665. ld b32 $r1 D[$r0 + ctx_current]
  666. mov $r2 0x414
  667. shl b32 $r2 6
  668. iowr I[$r2 + 0x000] $r0 // BAR_STATUS = reset
  669. mov $r14 -0x5b00
  670. sethi $r14 0x410000
  671. mov b32 $r15 $r1
  672. call nv_wr32 // GPC_BCAST_WRCMD_DATA = ctx pointer
  673. add b32 $r14 4
  674. xbit $r15 $flags $p1
  675. xbit $r2 $flags $p2
  676. shl b32 $r2 1
  677. or $r15 $r2
  678. call nv_wr32 // GPC_BCAST_WRCMD_CMD = GPC_XFER(type)
  679. // strands
  680. mov $r1 0x4afc
  681. sethi $r1 0x20000
  682. mov $r2 0xc
  683. iowr I[$r1] $r2 // STRAND_CMD(0x3f) = 0x0c
  684. call strand_wait
  685. mov $r2 0x47fc
  686. sethi $r2 0x20000
  687. iowr I[$r2] $r0 // STRAND_FIRST_GENE(0x3f) = 0x00
  688. xbit $r2 $flags $p1
  689. add b32 $r2 3
  690. iowr I[$r1] $r2 // STRAND_CMD(0x3f) = 0x03/0x04 (SAVE/LOAD)
  691. // mmio context
  692. xbit $r10 $flags $p1 // direction
  693. or $r10 6 // first, last
  694. mov $r11 0 // base = 0
  695. ld b32 $r12 D[$r0 + hub_mmio_list_head]
  696. ld b32 $r13 D[$r0 + hub_mmio_list_tail]
  697. mov $r14 0 // not multi
  698. call mmctx_xfer
  699. // wait for GPCs to all complete
  700. mov $r10 8 // DONE_BAR
  701. call wait_doneo
  702. // wait for strand xfer to complete
  703. call strand_wait
  704. // post-op
  705. bra $p1 ctx_xfer_post
  706. mov $r10 12 // DONE_UNK12
  707. call wait_donez
  708. mov $r1 0xa10
  709. shl b32 $r1 6
  710. mov $r2 5
  711. iowr I[$r1] $r2 // MEM_CMD
  712. ctx_xfer_post_save_wait:
  713. iord $r2 I[$r1]
  714. or $r2 $r2
  715. bra ne ctx_xfer_post_save_wait
  716. bra $p2 ctx_xfer_done
  717. ctx_xfer_post:
  718. mov $r15 2
  719. call ctx_4170s
  720. clear b32 $r15
  721. call ctx_86c
  722. call strand_post
  723. call ctx_4170w
  724. clear b32 $r15
  725. call ctx_4170s
  726. bra not $p1 ctx_xfer_no_post_mmio
  727. ld b32 $r1 D[$r0 + chan_mmio_count]
  728. or $r1 $r1
  729. bra e ctx_xfer_no_post_mmio
  730. call ctx_mmio_exec
  731. ctx_xfer_no_post_mmio:
  732. call ctx_4160c
  733. ctx_xfer_done:
  734. ret
  735. .align 256