nvc0_grhub.fuc 18 KB


  1. /* fuc microcode for nvc0 PGRAPH/HUB
  2. *
  3. * Copyright 2011 Red Hat Inc.
  4. *
  5. * Permission is hereby granted, free of charge, to any person obtaining a
  6. * copy of this software and associated documentation files (the "Software"),
  7. * to deal in the Software without restriction, including without limitation
  8. * the rights to use, copy, modify, merge, publish, distribute, sublicense,
  9. * and/or sell copies of the Software, and to permit persons to whom the
  10. * Software is furnished to do so, subject to the following conditions:
  11. *
  12. * The above copyright notice and this permission notice shall be included in
  13. * all copies or substantial portions of the Software.
  14. *
  15. * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16. * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17. * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
  18. * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
  19. * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
  20. * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
  21. * OTHER DEALINGS IN THE SOFTWARE.
  22. *
  23. * Authors: Ben Skeggs
  24. */
  25. /* To build:
  26. * m4 nvc0_grhub.fuc | envyas -a -w -m fuc -V nva3 -o nvc0_grhub.fuc.h
  27. */
  28. .section nvc0_grhub_data
  29. include(`nvc0_graph.fuc')
  30. gpc_count: .b32 0
  31. rop_count: .b32 0
  32. cmd_queue: queue_init
  33. hub_mmio_list_head: .b32 0
  34. hub_mmio_list_tail: .b32 0
  35. ctx_current: .b32 0
  36. chipsets:
  37. .b8 0xc0 0 0 0
  38. .b16 nvc0_hub_mmio_head
  39. .b16 nvc0_hub_mmio_tail
  40. .b8 0xc1 0 0 0
  41. .b16 nvc0_hub_mmio_head
  42. .b16 nvc1_hub_mmio_tail
  43. .b8 0xc3 0 0 0
  44. .b16 nvc0_hub_mmio_head
  45. .b16 nvc0_hub_mmio_tail
  46. .b8 0xc4 0 0 0
  47. .b16 nvc0_hub_mmio_head
  48. .b16 nvc0_hub_mmio_tail
  49. .b8 0xc8 0 0 0
  50. .b16 nvc0_hub_mmio_head
  51. .b16 nvc0_hub_mmio_tail
  52. .b8 0xce 0 0 0
  53. .b16 nvc0_hub_mmio_head
  54. .b16 nvc0_hub_mmio_tail
  55. .b8 0 0 0 0
  56. nvc0_hub_mmio_head:
  57. mmctx_data(0x17e91c, 2)
  58. mmctx_data(0x400204, 2)
  59. mmctx_data(0x404004, 11)
  60. mmctx_data(0x404044, 1)
  61. mmctx_data(0x404094, 14)
  62. mmctx_data(0x4040d0, 7)
  63. mmctx_data(0x4040f8, 1)
  64. mmctx_data(0x404130, 3)
  65. mmctx_data(0x404150, 3)
  66. mmctx_data(0x404164, 2)
  67. mmctx_data(0x404174, 3)
  68. mmctx_data(0x404200, 8)
  69. mmctx_data(0x404404, 14)
  70. mmctx_data(0x404460, 4)
  71. mmctx_data(0x404480, 1)
  72. mmctx_data(0x404498, 1)
  73. mmctx_data(0x404604, 4)
  74. mmctx_data(0x404618, 32)
  75. mmctx_data(0x404698, 21)
  76. mmctx_data(0x4046f0, 2)
  77. mmctx_data(0x404700, 22)
  78. mmctx_data(0x405800, 1)
  79. mmctx_data(0x405830, 3)
  80. mmctx_data(0x405854, 1)
  81. mmctx_data(0x405870, 4)
  82. mmctx_data(0x405a00, 2)
  83. mmctx_data(0x405a18, 1)
  84. mmctx_data(0x406020, 1)
  85. mmctx_data(0x406028, 4)
  86. mmctx_data(0x4064a8, 2)
  87. mmctx_data(0x4064b4, 2)
  88. mmctx_data(0x407804, 1)
  89. mmctx_data(0x40780c, 6)
  90. mmctx_data(0x4078bc, 1)
  91. mmctx_data(0x408000, 7)
  92. mmctx_data(0x408064, 1)
  93. mmctx_data(0x408800, 3)
  94. mmctx_data(0x408900, 4)
  95. mmctx_data(0x408980, 1)
  96. nvc0_hub_mmio_tail:
  97. mmctx_data(0x4064c0, 2)
  98. nvc1_hub_mmio_tail:
  99. .align 256
  100. chan_data:
  101. chan_mmio_count: .b32 0
  102. chan_mmio_address: .b32 0
  103. .align 256
  104. xfer_data: .b32 0
  105. .section nvc0_grhub_code
  106. bra init
  107. define(`include_code')
  108. include(`nvc0_graph.fuc')
  109. // reports an exception to the host
  110. //
  111. // In: $r15 error code (see nvc0_graph.fuc)
  112. //
  113. error:
  114. push $r14
  115. mov $r14 0x814
  116. shl b32 $r14 6
  117. iowr I[$r14 + 0x000] $r15 // CC_SCRATCH[5] = error code
  118. mov $r14 0xc1c
  119. shl b32 $r14 6
  120. mov $r15 1
  121. iowr I[$r14 + 0x000] $r15 // INTR_UP_SET
  122. pop $r14
  123. ret
  124. // HUB fuc initialisation, executed by triggering ucode start, will
  125. // fall through to main loop after completion.
  126. //
  127. // Input:
  128. // CC_SCRATCH[0]: chipset (PMC_BOOT_0 read returns 0x0bad0bad... sigh)
  129. //
  130. // Output:
  131. // CC_SCRATCH[0]:
  132. // 31:31: set to signal completion
  133. // CC_SCRATCH[1]:
  134. // 31:0: total PGRAPH context size
  135. //
  136. init:
  137. clear b32 $r0
  138. mov $sp $r0
  139. mov $xdbase $r0
  140. // enable fifo access
  141. mov $r1 0x1200
  142. mov $r2 2
  143. iowr I[$r1 + 0x000] $r2 // FIFO_ENABLE
  144. // setup i0 handler, and route all interrupts to it
  145. mov $r1 ih
  146. mov $iv0 $r1
  147. mov $r1 0x400
  148. iowr I[$r1 + 0x300] $r0 // INTR_DISPATCH
  149. // route HUB_CHANNEL_SWITCH to fuc interrupt 8
  150. mov $r3 0x404
  151. shl b32 $r3 6
  152. mov $r2 0x2003 // { HUB_CHANNEL_SWITCH, ZERO } -> intr 8
  153. iowr I[$r3 + 0x000] $r2
  154. // not sure what these are, route them because NVIDIA does, and
  155. // the IRQ handler will signal the host if we ever get one.. we
  156. // may find out if/why we need to handle these if so..
  157. //
  158. mov $r2 0x2004
  159. iowr I[$r3 + 0x004] $r2 // { 0x04, ZERO } -> intr 9
  160. mov $r2 0x200b
  161. iowr I[$r3 + 0x008] $r2 // { 0x0b, ZERO } -> intr 10
  162. mov $r2 0x200c
  163. iowr I[$r3 + 0x01c] $r2 // { 0x0c, ZERO } -> intr 15
  164. // enable all INTR_UP interrupts
  165. mov $r2 0xc24
  166. shl b32 $r2 6
  167. not b32 $r3 $r0
  168. iowr I[$r2] $r3
  169. // enable fifo, ctxsw, 9, 10, 15 interrupts
  170. mov $r2 -0x78fc // 0x8704
  171. sethi $r2 0
  172. iowr I[$r1 + 0x000] $r2 // INTR_EN_SET
  173. // fifo level triggered, rest edge
  174. sub b32 $r1 0x100
  175. mov $r2 4
  176. iowr I[$r1] $r2
  177. // enable interrupts
  178. bset $flags ie0
  179. // fetch enabled GPC/ROP counts
  180. mov $r14 -0x69fc // 0x409604
  181. sethi $r14 0x400000
  182. call nv_rd32
  183. extr $r1 $r15 16:20
  184. st b32 D[$r0 + rop_count] $r1
  185. and $r15 0x1f
  186. st b32 D[$r0 + gpc_count] $r15
  187. // set BAR_REQMASK to GPC mask
  188. mov $r1 1
  189. shl b32 $r1 $r15
  190. sub b32 $r1 1
  191. mov $r2 0x40c
  192. shl b32 $r2 6
  193. iowr I[$r2 + 0x000] $r1
  194. iowr I[$r2 + 0x100] $r1
  195. // find context data for this chipset
  196. mov $r2 0x800
  197. shl b32 $r2 6
  198. iord $r2 I[$r2 + 0x000] // CC_SCRATCH[0]
  199. mov $r15 chipsets - 8
  200. init_find_chipset:
  201. add b32 $r15 8
  202. ld b32 $r3 D[$r15 + 0x00]
  203. cmpu b32 $r3 $r2
  204. bra e init_context
  205. cmpu b32 $r3 0
  206. bra ne init_find_chipset
  207. // unknown chipset
  208. ret
  209. // context size calculation, reserve first 256 bytes for use by fuc
  210. init_context:
  211. mov $r1 256
  212. // calculate size of mmio context data
  213. ld b16 $r14 D[$r15 + 4]
  214. ld b16 $r15 D[$r15 + 6]
  215. sethi $r14 0
  216. st b32 D[$r0 + hub_mmio_list_head] $r14
  217. st b32 D[$r0 + hub_mmio_list_tail] $r15
  218. call mmctx_size
  219. // set mmctx base addresses now so we don't have to do it later,
  220. // they don't (currently) ever change
  221. mov $r3 0x700
  222. shl b32 $r3 6
  223. shr b32 $r4 $r1 8
  224. iowr I[$r3 + 0x000] $r4 // MMCTX_SAVE_SWBASE
  225. iowr I[$r3 + 0x100] $r4 // MMCTX_LOAD_SWBASE
  226. add b32 $r3 0x1300
  227. add b32 $r1 $r15
  228. shr b32 $r15 2
  229. iowr I[$r3 + 0x000] $r15 // MMCTX_LOAD_COUNT, wtf for?!?
  230. // strands, base offset needs to be aligned to 256 bytes
  231. shr b32 $r1 8
  232. add b32 $r1 1
  233. shl b32 $r1 8
  234. mov b32 $r15 $r1
  235. call strand_ctx_init
  236. add b32 $r1 $r15
  237. // initialise each GPC in sequence by passing in the offset of its
  238. // context data in GPCn_CC_SCRATCH[1], and starting its FUC (which
  239. // has previously been uploaded by the host) running.
  240. //
  241. // the GPC fuc init sequence will set GPCn_CC_SCRATCH[0] bit 31
  242. // when it has completed, and return the size of its context data
  243. // in GPCn_CC_SCRATCH[1]
  244. //
  245. ld b32 $r3 D[$r0 + gpc_count]
  246. mov $r4 0x2000
  247. sethi $r4 0x500000
  248. init_gpc:
  249. // setup, and start GPC ucode running
  250. add b32 $r14 $r4 0x804
  251. mov b32 $r15 $r1
  252. call nv_wr32 // CC_SCRATCH[1] = ctx offset
  253. add b32 $r14 $r4 0x800
  254. mov b32 $r15 $r2
  255. call nv_wr32 // CC_SCRATCH[0] = chipset
  256. add b32 $r14 $r4 0x10c
  257. clear b32 $r15
  258. call nv_wr32
  259. add b32 $r14 $r4 0x104
  260. call nv_wr32 // ENTRY
  261. add b32 $r14 $r4 0x100
  262. mov $r15 2 // CTRL_START_TRIGGER
  263. call nv_wr32 // CTRL
  264. // wait for it to complete, and adjust context size
  265. add b32 $r14 $r4 0x800
  266. init_gpc_wait:
  267. call nv_rd32
  268. xbit $r15 $r15 31
  269. bra e init_gpc_wait
  270. add b32 $r14 $r4 0x804
  271. call nv_rd32
  272. add b32 $r1 $r15
  273. // next!
  274. add b32 $r4 0x8000
  275. sub b32 $r3 1
  276. bra ne init_gpc
  277. // save context size, and tell host we're ready
  278. mov $r2 0x800
  279. shl b32 $r2 6
  280. iowr I[$r2 + 0x100] $r1 // CC_SCRATCH[1] = context size
  281. add b32 $r2 0x800
  282. clear b32 $r1
  283. bset $r1 31
  284. iowr I[$r2 + 0x000] $r1 // CC_SCRATCH[0] |= 0x80000000
  285. // Main program loop, very simple, sleeps until woken up by the interrupt
  286. // handler, pulls a command from the queue and executes its handler
  287. //
  288. main:
  289. // sleep until we have something to do
  290. bset $flags $p0
  291. sleep $p0
  292. mov $r13 cmd_queue
  293. call queue_get
  294. bra $p1 main
  295. // context switch, requested by GPU?
  296. cmpu b32 $r14 0x4001
  297. bra ne main_not_ctx_switch
  298. trace_set(T_AUTO)
  299. mov $r1 0xb00
  300. shl b32 $r1 6
  301. iord $r2 I[$r1 + 0x100] // CHAN_NEXT
  302. iord $r1 I[$r1 + 0x000] // CHAN_CUR
  303. xbit $r3 $r1 31
  304. bra e chsw_no_prev
  305. xbit $r3 $r2 31
  306. bra e chsw_prev_no_next
  307. push $r2
  308. mov b32 $r2 $r1
  309. trace_set(T_SAVE)
  310. bclr $flags $p1
  311. bset $flags $p2
  312. call ctx_xfer
  313. trace_clr(T_SAVE);
  314. pop $r2
  315. trace_set(T_LOAD);
  316. bset $flags $p1
  317. call ctx_xfer
  318. trace_clr(T_LOAD);
  319. bra chsw_done
  320. chsw_prev_no_next:
  321. push $r2
  322. mov b32 $r2 $r1
  323. bclr $flags $p1
  324. bclr $flags $p2
  325. call ctx_xfer
  326. pop $r2
  327. mov $r1 0xb00
  328. shl b32 $r1 6
  329. iowr I[$r1] $r2
  330. bra chsw_done
  331. chsw_no_prev:
  332. xbit $r3 $r2 31
  333. bra e chsw_done
  334. bset $flags $p1
  335. bclr $flags $p2
  336. call ctx_xfer
  337. // ack the context switch request
  338. chsw_done:
  339. mov $r1 0xb0c
  340. shl b32 $r1 6
  341. mov $r2 1
  342. iowr I[$r1 + 0x000] $r2 // 0x409b0c
  343. trace_clr(T_AUTO)
  344. bra main
  345. // request to set current channel? (*not* a context switch)
  346. main_not_ctx_switch:
  347. cmpu b32 $r14 0x0001
  348. bra ne main_not_ctx_chan
  349. mov b32 $r2 $r15
  350. call ctx_chan
  351. bra main_done
  352. // request to store current channel context?
  353. main_not_ctx_chan:
  354. cmpu b32 $r14 0x0002
  355. bra ne main_not_ctx_save
  356. trace_set(T_SAVE)
  357. bclr $flags $p1
  358. bclr $flags $p2
  359. call ctx_xfer
  360. trace_clr(T_SAVE)
  361. bra main_done
  362. main_not_ctx_save:
  363. shl b32 $r15 $r14 16
  364. or $r15 E_BAD_COMMAND
  365. call error
  366. bra main
  367. main_done:
  368. mov $r1 0x820
  369. shl b32 $r1 6
  370. clear b32 $r2
  371. bset $r2 31
  372. iowr I[$r1 + 0x000] $r2 // CC_SCRATCH[0] |= 0x80000000
  373. bra main
  374. // interrupt handler
  375. ih:
  376. push $r8
  377. mov $r8 $flags
  378. push $r8
  379. push $r9
  380. push $r10
  381. push $r11
  382. push $r13
  383. push $r14
  384. push $r15
  385. // incoming fifo command?
  386. iord $r10 I[$r0 + 0x200] // INTR
  387. and $r11 $r10 0x00000004
  388. bra e ih_no_fifo
  389. // queue incoming fifo command for later processing
  390. mov $r11 0x1900
  391. mov $r13 cmd_queue
  392. iord $r14 I[$r11 + 0x100] // FIFO_CMD
  393. iord $r15 I[$r11 + 0x000] // FIFO_DATA
  394. call queue_put
  395. add b32 $r11 0x400
  396. mov $r14 1
  397. iowr I[$r11 + 0x000] $r14 // FIFO_ACK
  398. // context switch request?
  399. ih_no_fifo:
  400. and $r11 $r10 0x00000100
  401. bra e ih_no_ctxsw
  402. // enqueue a context switch for later processing
  403. mov $r13 cmd_queue
  404. mov $r14 0x4001
  405. call queue_put
  406. // anything we didn't handle, bring it to the host's attention
  407. ih_no_ctxsw:
  408. mov $r11 0x104
  409. not b32 $r11
  410. and $r11 $r10 $r11
  411. bra e ih_no_other
  412. mov $r10 0xc1c
  413. shl b32 $r10 6
  414. iowr I[$r10] $r11 // INTR_UP_SET
  415. // ack, and wake up main()
  416. ih_no_other:
  417. iowr I[$r0 + 0x100] $r10 // INTR_ACK
  418. pop $r15
  419. pop $r14
  420. pop $r13
  421. pop $r11
  422. pop $r10
  423. pop $r9
  424. pop $r8
  425. mov $flags $r8
  426. pop $r8
  427. bclr $flags $p0
  428. iret
  429. // Not real sure, but, MEM_CMD 7 will hang forever if this isn't done
  430. ctx_4160s:
  431. mov $r14 0x4160
  432. sethi $r14 0x400000
  433. mov $r15 1
  434. call nv_wr32
  435. ctx_4160s_wait:
  436. call nv_rd32
  437. xbit $r15 $r15 4
  438. bra e ctx_4160s_wait
  439. ret
  440. // Without clearing again at end of xfer, some things cause PGRAPH
  441. // to hang with STATUS=0x00000007 until it's cleared.. fbcon can
  442. // still function with it set however...
  443. ctx_4160c:
  444. mov $r14 0x4160
  445. sethi $r14 0x400000
  446. clear b32 $r15
  447. call nv_wr32
  448. ret
  449. // Again, not real sure
  450. //
  451. // In: $r15 value to set 0x404170 to
  452. //
  453. ctx_4170s:
  454. mov $r14 0x4170
  455. sethi $r14 0x400000
  456. or $r15 0x10
  457. call nv_wr32
  458. ret
  459. // Waits for a ctx_4170s() call to complete
  460. //
  461. ctx_4170w:
  462. mov $r14 0x4170
  463. sethi $r14 0x400000
  464. call nv_rd32
  465. and $r15 0x10
  466. bra ne ctx_4170w
  467. ret
  468. // Disables various things, waits a bit, and re-enables them..
  469. //
  470. // Not sure how exactly this helps, perhaps "ENABLE" is not such a
  471. // good description for the bits we turn off? Anyways, without this,
  472. // funny things happen.
  473. //
  474. ctx_redswitch:
  475. mov $r14 0x614
  476. shl b32 $r14 6
  477. mov $r15 0x270
  478. iowr I[$r14] $r15 // HUB_RED_SWITCH = ENABLE_GPC, POWER_ALL
  479. mov $r15 8
  480. ctx_redswitch_delay:
  481. sub b32 $r15 1
  482. bra ne ctx_redswitch_delay
  483. mov $r15 0x770
  484. iowr I[$r14] $r15 // HUB_RED_SWITCH = ENABLE_ALL, POWER_ALL
  485. ret
  486. // Not a clue what this is for, except that unless the value is 0x10, the
  487. // strand context is saved (and presumably restored) incorrectly..
  488. //
  489. // In: $r15 value to set to (0x00/0x10 are used)
  490. //
  491. ctx_86c:
  492. mov $r14 0x86c
  493. shl b32 $r14 6
  494. iowr I[$r14] $r15 // HUB(0x86c) = val
  495. mov $r14 -0x75ec
  496. sethi $r14 0x400000
  497. call nv_wr32 // ROP(0xa14) = val
  498. mov $r14 -0x5794
  499. sethi $r14 0x410000
  500. call nv_wr32 // GPC(0x86c) = val
  501. ret
  502. // ctx_load - load's a channel's ctxctl data, and selects its vm
  503. //
  504. // In: $r2 channel address
  505. //
  506. ctx_load:
  507. trace_set(T_CHAN)
  508. // switch to channel, somewhat magic in parts..
  509. mov $r10 12 // DONE_UNK12
  510. call wait_donez
  511. mov $r1 0xa24
  512. shl b32 $r1 6
  513. iowr I[$r1 + 0x000] $r0 // 0x409a24
  514. mov $r3 0xb00
  515. shl b32 $r3 6
  516. iowr I[$r3 + 0x100] $r2 // CHAN_NEXT
  517. mov $r1 0xa0c
  518. shl b32 $r1 6
  519. mov $r4 7
  520. iowr I[$r1 + 0x000] $r2 // MEM_CHAN
  521. iowr I[$r1 + 0x100] $r4 // MEM_CMD
  522. ctx_chan_wait_0:
  523. iord $r4 I[$r1 + 0x100]
  524. and $r4 0x1f
  525. bra ne ctx_chan_wait_0
  526. iowr I[$r3 + 0x000] $r2 // CHAN_CUR
  527. // load channel header, fetch PGRAPH context pointer
  528. mov $xtargets $r0
  529. bclr $r2 31
  530. shl b32 $r2 4
  531. add b32 $r2 2
  532. trace_set(T_LCHAN)
  533. mov $r1 0xa04
  534. shl b32 $r1 6
  535. iowr I[$r1 + 0x000] $r2 // MEM_BASE
  536. mov $r1 0xa20
  537. shl b32 $r1 6
  538. mov $r2 0x0002
  539. sethi $r2 0x80000000
  540. iowr I[$r1 + 0x000] $r2 // MEM_TARGET = vram
  541. mov $r1 0x10 // chan + 0x0210
  542. mov $r2 xfer_data
  543. sethi $r2 0x00020000 // 16 bytes
  544. xdld $r1 $r2
  545. xdwait
  546. trace_clr(T_LCHAN)
  547. // update current context
  548. ld b32 $r1 D[$r0 + xfer_data + 4]
  549. shl b32 $r1 24
  550. ld b32 $r2 D[$r0 + xfer_data + 0]
  551. shr b32 $r2 8
  552. or $r1 $r2
  553. st b32 D[$r0 + ctx_current] $r1
  554. // set transfer base to start of context, and fetch context header
  555. trace_set(T_LCTXH)
  556. mov $r2 0xa04
  557. shl b32 $r2 6
  558. iowr I[$r2 + 0x000] $r1 // MEM_BASE
  559. mov $r2 1
  560. mov $r1 0xa20
  561. shl b32 $r1 6
  562. iowr I[$r1 + 0x000] $r2 // MEM_TARGET = vm
  563. mov $r1 chan_data
  564. sethi $r1 0x00060000 // 256 bytes
  565. xdld $r0 $r1
  566. xdwait
  567. trace_clr(T_LCTXH)
  568. trace_clr(T_CHAN)
  569. ret
  570. // ctx_chan - handler for HUB_SET_CHAN command, will set a channel as
  571. // the active channel for ctxctl, but not actually transfer
  572. // any context data. intended for use only during initial
  573. // context construction.
  574. //
  575. // In: $r2 channel address
  576. //
  577. ctx_chan:
  578. call ctx_4160s
  579. call ctx_load
  580. mov $r10 12 // DONE_UNK12
  581. call wait_donez
  582. mov $r1 0xa10
  583. shl b32 $r1 6
  584. mov $r2 5
  585. iowr I[$r1 + 0x000] $r2 // MEM_CMD = 5 (???)
  586. ctx_chan_wait:
  587. iord $r2 I[$r1 + 0x000]
  588. or $r2 $r2
  589. bra ne ctx_chan_wait
  590. call ctx_4160c
  591. ret
  592. // Execute per-context state overrides list
  593. //
  594. // Only executed on the first load of a channel. Might want to look into
  595. // removing this and having the host directly modify the channel's context
  596. // to change this state... The nouveau DRM already builds this list as
  597. // it's definitely needed for NVIDIA's, so we may as well use it for now
  598. //
  599. // Input: $r1 mmio list length
  600. //
  601. ctx_mmio_exec:
  602. // set transfer base to be the mmio list
  603. ld b32 $r3 D[$r0 + chan_mmio_address]
  604. mov $r2 0xa04
  605. shl b32 $r2 6
  606. iowr I[$r2 + 0x000] $r3 // MEM_BASE
  607. clear b32 $r3
  608. ctx_mmio_loop:
  609. // fetch next 256 bytes of mmio list if necessary
  610. and $r4 $r3 0xff
  611. bra ne ctx_mmio_pull
  612. mov $r5 xfer_data
  613. sethi $r5 0x00060000 // 256 bytes
  614. xdld $r3 $r5
  615. xdwait
  616. // execute a single list entry
  617. ctx_mmio_pull:
  618. ld b32 $r14 D[$r4 + xfer_data + 0x00]
  619. ld b32 $r15 D[$r4 + xfer_data + 0x04]
  620. call nv_wr32
  621. // next!
  622. add b32 $r3 8
  623. sub b32 $r1 1
  624. bra ne ctx_mmio_loop
  625. // set transfer base back to the current context
  626. ctx_mmio_done:
  627. ld b32 $r3 D[$r0 + ctx_current]
  628. iowr I[$r2 + 0x000] $r3 // MEM_BASE
  629. // disable the mmio list now, we don't need/want to execute it again
  630. st b32 D[$r0 + chan_mmio_count] $r0
  631. mov $r1 chan_data
  632. sethi $r1 0x00060000 // 256 bytes
  633. xdst $r0 $r1
  634. xdwait
  635. ret
  636. // Transfer HUB context data between GPU and storage area
  637. //
  638. // In: $r2 channel address
  639. // $p1 clear on save, set on load
  640. // $p2 set if opposite direction done/will be done, so:
  641. // on save it means: "a load will follow this save"
  642. // on load it means: "a save preceeded this load"
  643. //
  644. ctx_xfer:
  645. bra not $p1 ctx_xfer_pre
  646. bra $p2 ctx_xfer_pre_load
  647. ctx_xfer_pre:
  648. mov $r15 0x10
  649. call ctx_86c
  650. call ctx_4160s
  651. bra not $p1 ctx_xfer_exec
  652. ctx_xfer_pre_load:
  653. mov $r15 2
  654. call ctx_4170s
  655. call ctx_4170w
  656. call ctx_redswitch
  657. clear b32 $r15
  658. call ctx_4170s
  659. call ctx_load
  660. // fetch context pointer, and initiate xfer on all GPCs
  661. ctx_xfer_exec:
  662. ld b32 $r1 D[$r0 + ctx_current]
  663. mov $r2 0x414
  664. shl b32 $r2 6
  665. iowr I[$r2 + 0x000] $r0 // BAR_STATUS = reset
  666. mov $r14 -0x5b00
  667. sethi $r14 0x410000
  668. mov b32 $r15 $r1
  669. call nv_wr32 // GPC_BCAST_WRCMD_DATA = ctx pointer
  670. add b32 $r14 4
  671. xbit $r15 $flags $p1
  672. xbit $r2 $flags $p2
  673. shl b32 $r2 1
  674. or $r15 $r2
  675. call nv_wr32 // GPC_BCAST_WRCMD_CMD = GPC_XFER(type)
  676. // strands
  677. mov $r1 0x4afc
  678. sethi $r1 0x20000
  679. mov $r2 0xc
  680. iowr I[$r1] $r2 // STRAND_CMD(0x3f) = 0x0c
  681. call strand_wait
  682. mov $r2 0x47fc
  683. sethi $r2 0x20000
  684. iowr I[$r2] $r0 // STRAND_FIRST_GENE(0x3f) = 0x00
  685. xbit $r2 $flags $p1
  686. add b32 $r2 3
  687. iowr I[$r1] $r2 // STRAND_CMD(0x3f) = 0x03/0x04 (SAVE/LOAD)
  688. // mmio context
  689. xbit $r10 $flags $p1 // direction
  690. or $r10 6 // first, last
  691. mov $r11 0 // base = 0
  692. ld b32 $r12 D[$r0 + hub_mmio_list_head]
  693. ld b32 $r13 D[$r0 + hub_mmio_list_tail]
  694. mov $r14 0 // not multi
  695. call mmctx_xfer
  696. // wait for GPCs to all complete
  697. mov $r10 8 // DONE_BAR
  698. call wait_doneo
  699. // wait for strand xfer to complete
  700. call strand_wait
  701. // post-op
  702. bra $p1 ctx_xfer_post
  703. mov $r10 12 // DONE_UNK12
  704. call wait_donez
  705. mov $r1 0xa10
  706. shl b32 $r1 6
  707. mov $r2 5
  708. iowr I[$r1] $r2 // MEM_CMD
  709. ctx_xfer_post_save_wait:
  710. iord $r2 I[$r1]
  711. or $r2 $r2
  712. bra ne ctx_xfer_post_save_wait
  713. bra $p2 ctx_xfer_done
  714. ctx_xfer_post:
  715. mov $r15 2
  716. call ctx_4170s
  717. clear b32 $r15
  718. call ctx_86c
  719. call strand_post
  720. call ctx_4170w
  721. clear b32 $r15
  722. call ctx_4170s
  723. bra not $p1 ctx_xfer_no_post_mmio
  724. ld b32 $r1 D[$r0 + chan_mmio_count]
  725. or $r1 $r1
  726. bra e ctx_xfer_no_post_mmio
  727. call ctx_mmio_exec
  728. ctx_xfer_no_post_mmio:
  729. call ctx_4160c
  730. ctx_xfer_done:
  731. ret
  732. .align 256