nvc0_grhub.fuc 19 KB


  1. /* fuc microcode for nvc0 PGRAPH/HUB
  2. *
  3. * Copyright 2011 Red Hat Inc.
  4. *
  5. * Permission is hereby granted, free of charge, to any person obtaining a
  6. * copy of this software and associated documentation files (the "Software"),
  7. * to deal in the Software without restriction, including without limitation
  8. * the rights to use, copy, modify, merge, publish, distribute, sublicense,
  9. * and/or sell copies of the Software, and to permit persons to whom the
  10. * Software is furnished to do so, subject to the following conditions:
  11. *
  12. * The above copyright notice and this permission notice shall be included in
  13. * all copies or substantial portions of the Software.
  14. *
  15. * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16. * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17. * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
  18. * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
  19. * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
  20. * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
  21. * OTHER DEALINGS IN THE SOFTWARE.
  22. *
  23. * Authors: Ben Skeggs
  24. */
  25. /* To build:
  26. * m4 nvc0_grhub.fuc | envyas -a -w -m fuc -V nva3 -o nvc0_grhub.fuc.h
  27. */
  28. .section #nvc0_grhub_data
  29. include(`nvc0_graph.fuc')
  30. gpc_count: .b32 0
  31. rop_count: .b32 0
  32. cmd_queue: queue_init
  33. hub_mmio_list_head: .b32 0
  34. hub_mmio_list_tail: .b32 0
  35. ctx_current: .b32 0
  36. chipsets:
  37. .b8 0xc0 0 0 0
  38. .b16 #nvc0_hub_mmio_head
  39. .b16 #nvc0_hub_mmio_tail
  40. .b8 0xc1 0 0 0
  41. .b16 #nvc0_hub_mmio_head
  42. .b16 #nvc1_hub_mmio_tail
  43. .b8 0xc3 0 0 0
  44. .b16 #nvc0_hub_mmio_head
  45. .b16 #nvc0_hub_mmio_tail
  46. .b8 0xc4 0 0 0
  47. .b16 #nvc0_hub_mmio_head
  48. .b16 #nvc0_hub_mmio_tail
  49. .b8 0xc8 0 0 0
  50. .b16 #nvc0_hub_mmio_head
  51. .b16 #nvc0_hub_mmio_tail
  52. .b8 0xce 0 0 0
  53. .b16 #nvc0_hub_mmio_head
  54. .b16 #nvc0_hub_mmio_tail
  55. .b8 0xcf 0 0 0
  56. .b16 #nvc0_hub_mmio_head
  57. .b16 #nvc0_hub_mmio_tail
  58. .b8 0xd9 0 0 0
  59. .b16 #nvd9_hub_mmio_head
  60. .b16 #nvd9_hub_mmio_tail
  61. .b8 0 0 0 0
  62. nvc0_hub_mmio_head:
  63. mmctx_data(0x17e91c, 2)
  64. mmctx_data(0x400204, 2)
  65. mmctx_data(0x404004, 11)
  66. mmctx_data(0x404044, 1)
  67. mmctx_data(0x404094, 14)
  68. mmctx_data(0x4040d0, 7)
  69. mmctx_data(0x4040f8, 1)
  70. mmctx_data(0x404130, 3)
  71. mmctx_data(0x404150, 3)
  72. mmctx_data(0x404164, 2)
  73. mmctx_data(0x404174, 3)
  74. mmctx_data(0x404200, 8)
  75. mmctx_data(0x404404, 14)
  76. mmctx_data(0x404460, 4)
  77. mmctx_data(0x404480, 1)
  78. mmctx_data(0x404498, 1)
  79. mmctx_data(0x404604, 4)
  80. mmctx_data(0x404618, 32)
  81. mmctx_data(0x404698, 21)
  82. mmctx_data(0x4046f0, 2)
  83. mmctx_data(0x404700, 22)
  84. mmctx_data(0x405800, 1)
  85. mmctx_data(0x405830, 3)
  86. mmctx_data(0x405854, 1)
  87. mmctx_data(0x405870, 4)
  88. mmctx_data(0x405a00, 2)
  89. mmctx_data(0x405a18, 1)
  90. mmctx_data(0x406020, 1)
  91. mmctx_data(0x406028, 4)
  92. mmctx_data(0x4064a8, 2)
  93. mmctx_data(0x4064b4, 2)
  94. mmctx_data(0x407804, 1)
  95. mmctx_data(0x40780c, 6)
  96. mmctx_data(0x4078bc, 1)
  97. mmctx_data(0x408000, 7)
  98. mmctx_data(0x408064, 1)
  99. mmctx_data(0x408800, 3)
  100. mmctx_data(0x408900, 4)
  101. mmctx_data(0x408980, 1)
  102. nvc0_hub_mmio_tail:
  103. mmctx_data(0x4064c0, 2)
  104. nvc1_hub_mmio_tail:
  105. nvd9_hub_mmio_head:
  106. mmctx_data(0x17e91c, 2)
  107. mmctx_data(0x400204, 2)
  108. mmctx_data(0x404004, 10)
  109. mmctx_data(0x404044, 1)
  110. mmctx_data(0x404094, 14)
  111. mmctx_data(0x4040d0, 7)
  112. mmctx_data(0x4040f8, 1)
  113. mmctx_data(0x404130, 3)
  114. mmctx_data(0x404150, 3)
  115. mmctx_data(0x404164, 2)
  116. mmctx_data(0x404178, 2)
  117. mmctx_data(0x404200, 8)
  118. mmctx_data(0x404404, 14)
  119. mmctx_data(0x404460, 4)
  120. mmctx_data(0x404480, 1)
  121. mmctx_data(0x404498, 1)
  122. mmctx_data(0x404604, 4)
  123. mmctx_data(0x404618, 32)
  124. mmctx_data(0x404698, 21)
  125. mmctx_data(0x4046f0, 2)
  126. mmctx_data(0x404700, 22)
  127. mmctx_data(0x405800, 1)
  128. mmctx_data(0x405830, 3)
  129. mmctx_data(0x405854, 1)
  130. mmctx_data(0x405870, 4)
  131. mmctx_data(0x405a00, 2)
  132. mmctx_data(0x405a18, 1)
  133. mmctx_data(0x406020, 1)
  134. mmctx_data(0x406028, 4)
  135. mmctx_data(0x4064a8, 2)
  136. mmctx_data(0x4064b4, 5)
  137. mmctx_data(0x407804, 1)
  138. mmctx_data(0x40780c, 6)
  139. mmctx_data(0x4078bc, 1)
  140. mmctx_data(0x408000, 7)
  141. mmctx_data(0x408064, 1)
  142. mmctx_data(0x408800, 3)
  143. mmctx_data(0x408900, 4)
  144. mmctx_data(0x408980, 1)
  145. nvd9_hub_mmio_tail:
  146. .align 256
  147. chan_data:
  148. chan_mmio_count: .b32 0
  149. chan_mmio_address: .b32 0
  150. .align 256
  151. xfer_data: .b32 0
  152. .section #nvc0_grhub_code
  153. bra #init
  154. define(`include_code')
  155. include(`nvc0_graph.fuc')
  156. // reports an exception to the host
  157. //
  158. // In: $r15 error code (see nvc0_graph.fuc)
  159. //
  160. error:
  161. push $r14
  162. mov $r14 0x814
  163. shl b32 $r14 6
  164. iowr I[$r14 + 0x000] $r15 // CC_SCRATCH[5] = error code
  165. mov $r14 0xc1c
  166. shl b32 $r14 6
  167. mov $r15 1
  168. iowr I[$r14 + 0x000] $r15 // INTR_UP_SET
  169. pop $r14
  170. ret
  171. // HUB fuc initialisation, executed by triggering ucode start, will
  172. // fall through to main loop after completion.
  173. //
  174. // Input:
  175. // CC_SCRATCH[0]: chipset (PMC_BOOT_0 read returns 0x0bad0bad... sigh)
  176. //
  177. // Output:
  178. // CC_SCRATCH[0]:
  179. // 31:31: set to signal completion
  180. // CC_SCRATCH[1]:
  181. // 31:0: total PGRAPH context size
  182. //
  183. init:
  184. clear b32 $r0
  185. mov $sp $r0
  186. mov $xdbase $r0
  187. // enable fifo access
  188. mov $r1 0x1200
  189. mov $r2 2
  190. iowr I[$r1 + 0x000] $r2 // FIFO_ENABLE
  191. // setup i0 handler, and route all interrupts to it
  192. mov $r1 #ih
  193. mov $iv0 $r1
  194. mov $r1 0x400
  195. iowr I[$r1 + 0x300] $r0 // INTR_DISPATCH
  196. // route HUB_CHANNEL_SWITCH to fuc interrupt 8
  197. mov $r3 0x404
  198. shl b32 $r3 6
  199. mov $r2 0x2003 // { HUB_CHANNEL_SWITCH, ZERO } -> intr 8
  200. iowr I[$r3 + 0x000] $r2
  201. // not sure what these are, route them because NVIDIA does, and
  202. // the IRQ handler will signal the host if we ever get one.. we
  203. // may find out if/why we need to handle these if so..
  204. //
  205. mov $r2 0x2004
  206. iowr I[$r3 + 0x004] $r2 // { 0x04, ZERO } -> intr 9
  207. mov $r2 0x200b
  208. iowr I[$r3 + 0x008] $r2 // { 0x0b, ZERO } -> intr 10
  209. mov $r2 0x200c
  210. iowr I[$r3 + 0x01c] $r2 // { 0x0c, ZERO } -> intr 15
  211. // enable all INTR_UP interrupts
  212. mov $r2 0xc24
  213. shl b32 $r2 6
  214. not b32 $r3 $r0
  215. iowr I[$r2] $r3
  216. // enable fifo, ctxsw, 9, 10, 15 interrupts
  217. mov $r2 -0x78fc // 0x8704
  218. sethi $r2 0
  219. iowr I[$r1 + 0x000] $r2 // INTR_EN_SET
  220. // fifo level triggered, rest edge
  221. sub b32 $r1 0x100
  222. mov $r2 4
  223. iowr I[$r1] $r2
  224. // enable interrupts
  225. bset $flags ie0
  226. // fetch enabled GPC/ROP counts
  227. mov $r14 -0x69fc // 0x409604
  228. sethi $r14 0x400000
  229. call #nv_rd32
  230. extr $r1 $r15 16:20
  231. st b32 D[$r0 + #rop_count] $r1
  232. and $r15 0x1f
  233. st b32 D[$r0 + #gpc_count] $r15
  234. // set BAR_REQMASK to GPC mask
  235. mov $r1 1
  236. shl b32 $r1 $r15
  237. sub b32 $r1 1
  238. mov $r2 0x40c
  239. shl b32 $r2 6
  240. iowr I[$r2 + 0x000] $r1
  241. iowr I[$r2 + 0x100] $r1
  242. // find context data for this chipset
  243. mov $r2 0x800
  244. shl b32 $r2 6
  245. iord $r2 I[$r2 + 0x000] // CC_SCRATCH[0]
  246. mov $r15 #chipsets - 8
  247. init_find_chipset:
  248. add b32 $r15 8
  249. ld b32 $r3 D[$r15 + 0x00]
  250. cmpu b32 $r3 $r2
  251. bra e #init_context
  252. cmpu b32 $r3 0
  253. bra ne #init_find_chipset
  254. // unknown chipset
  255. ret
  256. // context size calculation, reserve first 256 bytes for use by fuc
  257. init_context:
  258. mov $r1 256
  259. // calculate size of mmio context data
  260. ld b16 $r14 D[$r15 + 4]
  261. ld b16 $r15 D[$r15 + 6]
  262. sethi $r14 0
  263. st b32 D[$r0 + #hub_mmio_list_head] $r14
  264. st b32 D[$r0 + #hub_mmio_list_tail] $r15
  265. call #mmctx_size
  266. // set mmctx base addresses now so we don't have to do it later,
  267. // they don't (currently) ever change
  268. mov $r3 0x700
  269. shl b32 $r3 6
  270. shr b32 $r4 $r1 8
  271. iowr I[$r3 + 0x000] $r4 // MMCTX_SAVE_SWBASE
  272. iowr I[$r3 + 0x100] $r4 // MMCTX_LOAD_SWBASE
  273. add b32 $r3 0x1300
  274. add b32 $r1 $r15
  275. shr b32 $r15 2
  276. iowr I[$r3 + 0x000] $r15 // MMCTX_LOAD_COUNT, wtf for?!?
  277. // strands, base offset needs to be aligned to 256 bytes
  278. shr b32 $r1 8
  279. add b32 $r1 1
  280. shl b32 $r1 8
  281. mov b32 $r15 $r1
  282. call #strand_ctx_init
  283. add b32 $r1 $r15
  284. // initialise each GPC in sequence by passing in the offset of its
  285. // context data in GPCn_CC_SCRATCH[1], and starting its FUC (which
  286. // has previously been uploaded by the host) running.
  287. //
  288. // the GPC fuc init sequence will set GPCn_CC_SCRATCH[0] bit 31
  289. // when it has completed, and return the size of its context data
  290. // in GPCn_CC_SCRATCH[1]
  291. //
  292. ld b32 $r3 D[$r0 + #gpc_count]
  293. mov $r4 0x2000
  294. sethi $r4 0x500000
  295. init_gpc:
  296. // setup, and start GPC ucode running
  297. add b32 $r14 $r4 0x804
  298. mov b32 $r15 $r1
  299. call #nv_wr32 // CC_SCRATCH[1] = ctx offset
  300. add b32 $r14 $r4 0x800
  301. mov b32 $r15 $r2
  302. call #nv_wr32 // CC_SCRATCH[0] = chipset
  303. add b32 $r14 $r4 0x10c
  304. clear b32 $r15
  305. call #nv_wr32
  306. add b32 $r14 $r4 0x104
  307. call #nv_wr32 // ENTRY
  308. add b32 $r14 $r4 0x100
  309. mov $r15 2 // CTRL_START_TRIGGER
  310. call #nv_wr32 // CTRL
  311. // wait for it to complete, and adjust context size
  312. add b32 $r14 $r4 0x800
  313. init_gpc_wait:
  314. call #nv_rd32
  315. xbit $r15 $r15 31
  316. bra e #init_gpc_wait
  317. add b32 $r14 $r4 0x804
  318. call #nv_rd32
  319. add b32 $r1 $r15
  320. // next!
  321. add b32 $r4 0x8000
  322. sub b32 $r3 1
  323. bra ne #init_gpc
  324. // save context size, and tell host we're ready
  325. mov $r2 0x800
  326. shl b32 $r2 6
  327. iowr I[$r2 + 0x100] $r1 // CC_SCRATCH[1] = context size
  328. add b32 $r2 0x800
  329. clear b32 $r1
  330. bset $r1 31
  331. iowr I[$r2 + 0x000] $r1 // CC_SCRATCH[0] |= 0x80000000
  332. // Main program loop, very simple, sleeps until woken up by the interrupt
  333. // handler, pulls a command from the queue and executes its handler
  334. //
  335. main:
  336. // sleep until we have something to do
  337. bset $flags $p0
  338. sleep $p0
  339. mov $r13 #cmd_queue
  340. call #queue_get
  341. bra $p1 #main
  342. // context switch, requested by GPU?
  343. cmpu b32 $r14 0x4001
  344. bra ne #main_not_ctx_switch
  345. trace_set(T_AUTO)
  346. mov $r1 0xb00
  347. shl b32 $r1 6
  348. iord $r2 I[$r1 + 0x100] // CHAN_NEXT
  349. iord $r1 I[$r1 + 0x000] // CHAN_CUR
  350. xbit $r3 $r1 31
  351. bra e #chsw_no_prev
  352. xbit $r3 $r2 31
  353. bra e #chsw_prev_no_next
  354. push $r2
  355. mov b32 $r2 $r1
  356. trace_set(T_SAVE)
  357. bclr $flags $p1
  358. bset $flags $p2
  359. call #ctx_xfer
  360. trace_clr(T_SAVE);
  361. pop $r2
  362. trace_set(T_LOAD);
  363. bset $flags $p1
  364. call #ctx_xfer
  365. trace_clr(T_LOAD);
  366. bra #chsw_done
  367. chsw_prev_no_next:
  368. push $r2
  369. mov b32 $r2 $r1
  370. bclr $flags $p1
  371. bclr $flags $p2
  372. call #ctx_xfer
  373. pop $r2
  374. mov $r1 0xb00
  375. shl b32 $r1 6
  376. iowr I[$r1] $r2
  377. bra #chsw_done
  378. chsw_no_prev:
  379. xbit $r3 $r2 31
  380. bra e #chsw_done
  381. bset $flags $p1
  382. bclr $flags $p2
  383. call #ctx_xfer
  384. // ack the context switch request
  385. chsw_done:
  386. mov $r1 0xb0c
  387. shl b32 $r1 6
  388. mov $r2 1
  389. iowr I[$r1 + 0x000] $r2 // 0x409b0c
  390. trace_clr(T_AUTO)
  391. bra #main
  392. // request to set current channel? (*not* a context switch)
  393. main_not_ctx_switch:
  394. cmpu b32 $r14 0x0001
  395. bra ne #main_not_ctx_chan
  396. mov b32 $r2 $r15
  397. call #ctx_chan
  398. bra #main_done
  399. // request to store current channel context?
  400. main_not_ctx_chan:
  401. cmpu b32 $r14 0x0002
  402. bra ne #main_not_ctx_save
  403. trace_set(T_SAVE)
  404. bclr $flags $p1
  405. bclr $flags $p2
  406. call #ctx_xfer
  407. trace_clr(T_SAVE)
  408. bra #main_done
  409. main_not_ctx_save:
  410. shl b32 $r15 $r14 16
  411. or $r15 E_BAD_COMMAND
  412. call #error
  413. bra #main
  414. main_done:
  415. mov $r1 0x820
  416. shl b32 $r1 6
  417. clear b32 $r2
  418. bset $r2 31
  419. iowr I[$r1 + 0x000] $r2 // CC_SCRATCH[0] |= 0x80000000
  420. bra #main
  421. // interrupt handler
  422. ih:
  423. push $r8
  424. mov $r8 $flags
  425. push $r8
  426. push $r9
  427. push $r10
  428. push $r11
  429. push $r13
  430. push $r14
  431. push $r15
  432. // incoming fifo command?
  433. iord $r10 I[$r0 + 0x200] // INTR
  434. and $r11 $r10 0x00000004
  435. bra e #ih_no_fifo
  436. // queue incoming fifo command for later processing
  437. mov $r11 0x1900
  438. mov $r13 #cmd_queue
  439. iord $r14 I[$r11 + 0x100] // FIFO_CMD
  440. iord $r15 I[$r11 + 0x000] // FIFO_DATA
  441. call #queue_put
  442. add b32 $r11 0x400
  443. mov $r14 1
  444. iowr I[$r11 + 0x000] $r14 // FIFO_ACK
  445. // context switch request?
  446. ih_no_fifo:
  447. and $r11 $r10 0x00000100
  448. bra e #ih_no_ctxsw
  449. // enqueue a context switch for later processing
  450. mov $r13 #cmd_queue
  451. mov $r14 0x4001
  452. call #queue_put
  453. // anything we didn't handle, bring it to the host's attention
  454. ih_no_ctxsw:
  455. mov $r11 0x104
  456. not b32 $r11
  457. and $r11 $r10 $r11
  458. bra e #ih_no_other
  459. mov $r10 0xc1c
  460. shl b32 $r10 6
  461. iowr I[$r10] $r11 // INTR_UP_SET
  462. // ack, and wake up main()
  463. ih_no_other:
  464. iowr I[$r0 + 0x100] $r10 // INTR_ACK
  465. pop $r15
  466. pop $r14
  467. pop $r13
  468. pop $r11
  469. pop $r10
  470. pop $r9
  471. pop $r8
  472. mov $flags $r8
  473. pop $r8
  474. bclr $flags $p0
  475. iret
  476. // Not real sure, but, MEM_CMD 7 will hang forever if this isn't done
  477. ctx_4160s:
  478. mov $r14 0x4160
  479. sethi $r14 0x400000
  480. mov $r15 1
  481. call #nv_wr32
  482. ctx_4160s_wait:
  483. call #nv_rd32
  484. xbit $r15 $r15 4
  485. bra e #ctx_4160s_wait
  486. ret
  487. // Without clearing again at end of xfer, some things cause PGRAPH
  488. // to hang with STATUS=0x00000007 until it's cleared.. fbcon can
  489. // still function with it set however...
  490. ctx_4160c:
  491. mov $r14 0x4160
  492. sethi $r14 0x400000
  493. clear b32 $r15
  494. call #nv_wr32
  495. ret
  496. // Again, not real sure
  497. //
  498. // In: $r15 value to set 0x404170 to
  499. //
  500. ctx_4170s:
  501. mov $r14 0x4170
  502. sethi $r14 0x400000
  503. or $r15 0x10
  504. call #nv_wr32
  505. ret
  506. // Waits for a ctx_4170s() call to complete
  507. //
  508. ctx_4170w:
  509. mov $r14 0x4170
  510. sethi $r14 0x400000
  511. call #nv_rd32
  512. and $r15 0x10
  513. bra ne #ctx_4170w
  514. ret
  515. // Disables various things, waits a bit, and re-enables them..
  516. //
  517. // Not sure how exactly this helps, perhaps "ENABLE" is not such a
  518. // good description for the bits we turn off? Anyways, without this,
  519. // funny things happen.
  520. //
  521. ctx_redswitch:
  522. mov $r14 0x614
  523. shl b32 $r14 6
  524. mov $r15 0x270
  525. iowr I[$r14] $r15 // HUB_RED_SWITCH = ENABLE_GPC, POWER_ALL
  526. mov $r15 8
  527. ctx_redswitch_delay:
  528. sub b32 $r15 1
  529. bra ne #ctx_redswitch_delay
  530. mov $r15 0x770
  531. iowr I[$r14] $r15 // HUB_RED_SWITCH = ENABLE_ALL, POWER_ALL
  532. ret
  533. // Not a clue what this is for, except that unless the value is 0x10, the
  534. // strand context is saved (and presumably restored) incorrectly..
  535. //
  536. // In: $r15 value to set to (0x00/0x10 are used)
  537. //
  538. ctx_86c:
  539. mov $r14 0x86c
  540. shl b32 $r14 6
  541. iowr I[$r14] $r15 // HUB(0x86c) = val
  542. mov $r14 -0x75ec
  543. sethi $r14 0x400000
  544. call #nv_wr32 // ROP(0xa14) = val
  545. mov $r14 -0x5794
  546. sethi $r14 0x410000
  547. call #nv_wr32 // GPC(0x86c) = val
  548. ret
  549. // ctx_load - load's a channel's ctxctl data, and selects its vm
  550. //
  551. // In: $r2 channel address
  552. //
  553. ctx_load:
  554. trace_set(T_CHAN)
  555. // switch to channel, somewhat magic in parts..
  556. mov $r10 12 // DONE_UNK12
  557. call #wait_donez
  558. mov $r1 0xa24
  559. shl b32 $r1 6
  560. iowr I[$r1 + 0x000] $r0 // 0x409a24
  561. mov $r3 0xb00
  562. shl b32 $r3 6
  563. iowr I[$r3 + 0x100] $r2 // CHAN_NEXT
  564. mov $r1 0xa0c
  565. shl b32 $r1 6
  566. mov $r4 7
  567. iowr I[$r1 + 0x000] $r2 // MEM_CHAN
  568. iowr I[$r1 + 0x100] $r4 // MEM_CMD
  569. ctx_chan_wait_0:
  570. iord $r4 I[$r1 + 0x100]
  571. and $r4 0x1f
  572. bra ne #ctx_chan_wait_0
  573. iowr I[$r3 + 0x000] $r2 // CHAN_CUR
  574. // load channel header, fetch PGRAPH context pointer
  575. mov $xtargets $r0
  576. bclr $r2 31
  577. shl b32 $r2 4
  578. add b32 $r2 2
  579. trace_set(T_LCHAN)
  580. mov $r1 0xa04
  581. shl b32 $r1 6
  582. iowr I[$r1 + 0x000] $r2 // MEM_BASE
  583. mov $r1 0xa20
  584. shl b32 $r1 6
  585. mov $r2 0x0002
  586. sethi $r2 0x80000000
  587. iowr I[$r1 + 0x000] $r2 // MEM_TARGET = vram
  588. mov $r1 0x10 // chan + 0x0210
  589. mov $r2 #xfer_data
  590. sethi $r2 0x00020000 // 16 bytes
  591. xdld $r1 $r2
  592. xdwait
  593. trace_clr(T_LCHAN)
  594. // update current context
  595. ld b32 $r1 D[$r0 + #xfer_data + 4]
  596. shl b32 $r1 24
  597. ld b32 $r2 D[$r0 + #xfer_data + 0]
  598. shr b32 $r2 8
  599. or $r1 $r2
  600. st b32 D[$r0 + #ctx_current] $r1
  601. // set transfer base to start of context, and fetch context header
  602. trace_set(T_LCTXH)
  603. mov $r2 0xa04
  604. shl b32 $r2 6
  605. iowr I[$r2 + 0x000] $r1 // MEM_BASE
  606. mov $r2 1
  607. mov $r1 0xa20
  608. shl b32 $r1 6
  609. iowr I[$r1 + 0x000] $r2 // MEM_TARGET = vm
  610. mov $r1 #chan_data
  611. sethi $r1 0x00060000 // 256 bytes
  612. xdld $r0 $r1
  613. xdwait
  614. trace_clr(T_LCTXH)
  615. trace_clr(T_CHAN)
  616. ret
  617. // ctx_chan - handler for HUB_SET_CHAN command, will set a channel as
  618. // the active channel for ctxctl, but not actually transfer
  619. // any context data. intended for use only during initial
  620. // context construction.
  621. //
  622. // In: $r2 channel address
  623. //
  624. ctx_chan:
  625. call #ctx_4160s
  626. call #ctx_load
  627. mov $r10 12 // DONE_UNK12
  628. call #wait_donez
  629. mov $r1 0xa10
  630. shl b32 $r1 6
  631. mov $r2 5
  632. iowr I[$r1 + 0x000] $r2 // MEM_CMD = 5 (???)
  633. ctx_chan_wait:
  634. iord $r2 I[$r1 + 0x000]
  635. or $r2 $r2
  636. bra ne #ctx_chan_wait
  637. call #ctx_4160c
  638. ret
  639. // Execute per-context state overrides list
  640. //
  641. // Only executed on the first load of a channel. Might want to look into
  642. // removing this and having the host directly modify the channel's context
  643. // to change this state... The nouveau DRM already builds this list as
  644. // it's definitely needed for NVIDIA's, so we may as well use it for now
  645. //
  646. // Input: $r1 mmio list length
  647. //
  648. ctx_mmio_exec:
  649. // set transfer base to be the mmio list
  650. ld b32 $r3 D[$r0 + #chan_mmio_address]
  651. mov $r2 0xa04
  652. shl b32 $r2 6
  653. iowr I[$r2 + 0x000] $r3 // MEM_BASE
  654. clear b32 $r3
  655. ctx_mmio_loop:
  656. // fetch next 256 bytes of mmio list if necessary
  657. and $r4 $r3 0xff
  658. bra ne #ctx_mmio_pull
  659. mov $r5 #xfer_data
  660. sethi $r5 0x00060000 // 256 bytes
  661. xdld $r3 $r5
  662. xdwait
  663. // execute a single list entry
  664. ctx_mmio_pull:
  665. ld b32 $r14 D[$r4 + #xfer_data + 0x00]
  666. ld b32 $r15 D[$r4 + #xfer_data + 0x04]
  667. call #nv_wr32
  668. // next!
  669. add b32 $r3 8
  670. sub b32 $r1 1
  671. bra ne #ctx_mmio_loop
  672. // set transfer base back to the current context
  673. ctx_mmio_done:
  674. ld b32 $r3 D[$r0 + #ctx_current]
  675. iowr I[$r2 + 0x000] $r3 // MEM_BASE
  676. // disable the mmio list now, we don't need/want to execute it again
  677. st b32 D[$r0 + #chan_mmio_count] $r0
  678. mov $r1 #chan_data
  679. sethi $r1 0x00060000 // 256 bytes
  680. xdst $r0 $r1
  681. xdwait
  682. ret
  683. // Transfer HUB context data between GPU and storage area
  684. //
  685. // In: $r2 channel address
  686. // $p1 clear on save, set on load
  687. // $p2 set if opposite direction done/will be done, so:
  688. // on save it means: "a load will follow this save"
  689. // on load it means: "a save preceeded this load"
  690. //
  691. ctx_xfer:
  692. bra not $p1 #ctx_xfer_pre
  693. bra $p2 #ctx_xfer_pre_load
  694. ctx_xfer_pre:
  695. mov $r15 0x10
  696. call #ctx_86c
  697. call #ctx_4160s
  698. bra not $p1 #ctx_xfer_exec
  699. ctx_xfer_pre_load:
  700. mov $r15 2
  701. call #ctx_4170s
  702. call #ctx_4170w
  703. call #ctx_redswitch
  704. clear b32 $r15
  705. call #ctx_4170s
  706. call #ctx_load
  707. // fetch context pointer, and initiate xfer on all GPCs
  708. ctx_xfer_exec:
  709. ld b32 $r1 D[$r0 + #ctx_current]
  710. mov $r2 0x414
  711. shl b32 $r2 6
  712. iowr I[$r2 + 0x000] $r0 // BAR_STATUS = reset
  713. mov $r14 -0x5b00
  714. sethi $r14 0x410000
  715. mov b32 $r15 $r1
  716. call #nv_wr32 // GPC_BCAST_WRCMD_DATA = ctx pointer
  717. add b32 $r14 4
  718. xbit $r15 $flags $p1
  719. xbit $r2 $flags $p2
  720. shl b32 $r2 1
  721. or $r15 $r2
  722. call #nv_wr32 // GPC_BCAST_WRCMD_CMD = GPC_XFER(type)
  723. // strands
  724. mov $r1 0x4afc
  725. sethi $r1 0x20000
  726. mov $r2 0xc
  727. iowr I[$r1] $r2 // STRAND_CMD(0x3f) = 0x0c
  728. call #strand_wait
  729. mov $r2 0x47fc
  730. sethi $r2 0x20000
  731. iowr I[$r2] $r0 // STRAND_FIRST_GENE(0x3f) = 0x00
  732. xbit $r2 $flags $p1
  733. add b32 $r2 3
  734. iowr I[$r1] $r2 // STRAND_CMD(0x3f) = 0x03/0x04 (SAVE/LOAD)
  735. // mmio context
  736. xbit $r10 $flags $p1 // direction
  737. or $r10 6 // first, last
  738. mov $r11 0 // base = 0
  739. ld b32 $r12 D[$r0 + #hub_mmio_list_head]
  740. ld b32 $r13 D[$r0 + #hub_mmio_list_tail]
  741. mov $r14 0 // not multi
  742. call #mmctx_xfer
  743. // wait for GPCs to all complete
  744. mov $r10 8 // DONE_BAR
  745. call #wait_doneo
  746. // wait for strand xfer to complete
  747. call #strand_wait
  748. // post-op
  749. bra $p1 #ctx_xfer_post
  750. mov $r10 12 // DONE_UNK12
  751. call #wait_donez
  752. mov $r1 0xa10
  753. shl b32 $r1 6
  754. mov $r2 5
  755. iowr I[$r1] $r2 // MEM_CMD
  756. ctx_xfer_post_save_wait:
  757. iord $r2 I[$r1]
  758. or $r2 $r2
  759. bra ne #ctx_xfer_post_save_wait
  760. bra $p2 #ctx_xfer_done
  761. ctx_xfer_post:
  762. mov $r15 2
  763. call #ctx_4170s
  764. clear b32 $r15
  765. call #ctx_86c
  766. call #strand_post
  767. call #ctx_4170w
  768. clear b32 $r15
  769. call #ctx_4170s
  770. bra not $p1 #ctx_xfer_no_post_mmio
  771. ld b32 $r1 D[$r0 + #chan_mmio_count]
  772. or $r1 $r1
  773. bra e #ctx_xfer_no_post_mmio
  774. call #ctx_mmio_exec
  775. ctx_xfer_no_post_mmio:
  776. call #ctx_4160c
  777. ctx_xfer_done:
  778. ret
  779. .align 256