nvc0_grgpc.fuc 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474
  1. /* fuc microcode for nvc0 PGRAPH/GPC
  2. *
  3. * Copyright 2011 Red Hat Inc.
  4. *
  5. * Permission is hereby granted, free of charge, to any person obtaining a
  6. * copy of this software and associated documentation files (the "Software"),
  7. * to deal in the Software without restriction, including without limitation
  8. * the rights to use, copy, modify, merge, publish, distribute, sublicense,
  9. * and/or sell copies of the Software, and to permit persons to whom the
  10. * Software is furnished to do so, subject to the following conditions:
  11. *
  12. * The above copyright notice and this permission notice shall be included in
  13. * all copies or substantial portions of the Software.
  14. *
  15. * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16. * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17. * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
  18. * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
  19. * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
  20. * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
  21. * OTHER DEALINGS IN THE SOFTWARE.
  22. *
  23. * Authors: Ben Skeggs
  24. */
  25. /* To build:
  26. * m4 nvc0_grgpc.fuc | envyas -a -w -m fuc -V nva3 -o nvc0_grgpc.fuc.h
  27. */
  28. /* TODO
  29. * - bracket certain functions with scratch writes, useful for debugging
  30. * - watchdog timer around ctx operations
  31. */
  32. .section nvc0_grgpc_data
  33. include(`nvc0_graph.fuc')
  34. gpc_id: .b32 0
  35. gpc_mmio_list_head: .b32 0
  36. gpc_mmio_list_tail: .b32 0
  37. tpc_count: .b32 0
  38. tpc_mask: .b32 0
  39. tpc_mmio_list_head: .b32 0
  40. tpc_mmio_list_tail: .b32 0
  41. cmd_queue: queue_init
  42. // chipset descriptions
  43. chipsets:
  44. .b8 0xc0 0 0 0
  45. .b16 nvc0_gpc_mmio_head
  46. .b16 nvc0_gpc_mmio_tail
  47. .b16 nvc0_tpc_mmio_head
  48. .b16 nvc0_tpc_mmio_tail
  49. .b8 0xc1 0 0 0
  50. .b16 nvc0_gpc_mmio_head
  51. .b16 nvc1_gpc_mmio_tail
  52. .b16 nvc0_tpc_mmio_head
  53. .b16 nvc1_tpc_mmio_tail
  54. .b8 0xc3 0 0 0
  55. .b16 nvc0_gpc_mmio_head
  56. .b16 nvc0_gpc_mmio_tail
  57. .b16 nvc0_tpc_mmio_head
  58. .b16 nvc3_tpc_mmio_tail
  59. .b8 0xc4 0 0 0
  60. .b16 nvc0_gpc_mmio_head
  61. .b16 nvc0_gpc_mmio_tail
  62. .b16 nvc0_tpc_mmio_head
  63. .b16 nvc3_tpc_mmio_tail
  64. .b8 0xc8 0 0 0
  65. .b16 nvc0_gpc_mmio_head
  66. .b16 nvc0_gpc_mmio_tail
  67. .b16 nvc0_tpc_mmio_head
  68. .b16 nvc0_tpc_mmio_tail
  69. .b8 0xce 0 0 0
  70. .b16 nvc0_gpc_mmio_head
  71. .b16 nvc0_gpc_mmio_tail
  72. .b16 nvc0_tpc_mmio_head
  73. .b16 nvc3_tpc_mmio_tail
  74. .b8 0 0 0 0
  75. // GPC mmio lists
  76. nvc0_gpc_mmio_head:
  77. mmctx_data(0x000380, 1)
  78. mmctx_data(0x000400, 6)
  79. mmctx_data(0x000450, 9)
  80. mmctx_data(0x000600, 1)
  81. mmctx_data(0x000684, 1)
  82. mmctx_data(0x000700, 5)
  83. mmctx_data(0x000800, 1)
  84. mmctx_data(0x000808, 3)
  85. mmctx_data(0x000828, 1)
  86. mmctx_data(0x000830, 1)
  87. mmctx_data(0x0008d8, 1)
  88. mmctx_data(0x0008e0, 1)
  89. mmctx_data(0x0008e8, 6)
  90. mmctx_data(0x00091c, 1)
  91. mmctx_data(0x000924, 3)
  92. mmctx_data(0x000b00, 1)
  93. mmctx_data(0x000b08, 6)
  94. mmctx_data(0x000bb8, 1)
  95. mmctx_data(0x000c08, 1)
  96. mmctx_data(0x000c10, 8)
  97. mmctx_data(0x000c80, 1)
  98. mmctx_data(0x000c8c, 1)
  99. mmctx_data(0x001000, 3)
  100. mmctx_data(0x001014, 1)
  101. nvc0_gpc_mmio_tail:
  102. mmctx_data(0x000c6c, 1);
  103. nvc1_gpc_mmio_tail:
  104. // TPC mmio lists
  105. nvc0_tpc_mmio_head:
  106. mmctx_data(0x000018, 1)
  107. mmctx_data(0x00003c, 1)
  108. mmctx_data(0x000048, 1)
  109. mmctx_data(0x000064, 1)
  110. mmctx_data(0x000088, 1)
  111. mmctx_data(0x000200, 6)
  112. mmctx_data(0x00021c, 2)
  113. mmctx_data(0x000300, 6)
  114. mmctx_data(0x0003d0, 1)
  115. mmctx_data(0x0003e0, 2)
  116. mmctx_data(0x000400, 3)
  117. mmctx_data(0x000420, 1)
  118. mmctx_data(0x0004b0, 1)
  119. mmctx_data(0x0004e8, 1)
  120. mmctx_data(0x0004f4, 1)
  121. mmctx_data(0x000520, 2)
  122. mmctx_data(0x000604, 4)
  123. mmctx_data(0x000644, 20)
  124. mmctx_data(0x000698, 1)
  125. mmctx_data(0x000750, 2)
  126. nvc0_tpc_mmio_tail:
  127. mmctx_data(0x000758, 1)
  128. mmctx_data(0x0002c4, 1)
  129. mmctx_data(0x0004bc, 1)
  130. mmctx_data(0x0006e0, 1)
  131. nvc3_tpc_mmio_tail:
  132. mmctx_data(0x000544, 1)
  133. nvc1_tpc_mmio_tail:
  134. .section nvc0_grgpc_code
  135. bra init
  136. define(`include_code')
  137. include(`nvc0_graph.fuc')
  138. // reports an exception to the host
  139. //
  140. // In: $r15 error code (see nvc0_graph.fuc)
  141. //
  142. error:
  143. push $r14
  144. mov $r14 -0x67ec // 0x9814
  145. sethi $r14 0x400000
  146. call nv_wr32 // HUB_CTXCTL_CC_SCRATCH[5] = error code
  147. add b32 $r14 0x41c
  148. mov $r15 1
  149. call nv_wr32 // HUB_CTXCTL_INTR_UP_SET
  150. pop $r14
  151. ret
  152. // GPC fuc initialisation, executed by triggering ucode start, will
  153. // fall through to main loop after completion.
  154. //
  155. // Input:
  156. // CC_SCRATCH[0]: chipset (PMC_BOOT_0 read returns 0x0bad0bad... sigh)
  157. // CC_SCRATCH[1]: context base
  158. //
  159. // Output:
  160. // CC_SCRATCH[0]:
  161. // 31:31: set to signal completion
  162. // CC_SCRATCH[1]:
  163. // 31:0: GPC context size
  164. //
  165. init:
  166. clear b32 $r0
  167. mov $sp $r0
  168. // enable fifo access
  169. mov $r1 0x1200
  170. mov $r2 2
  171. iowr I[$r1 + 0x000] $r2 // FIFO_ENABLE
  172. // setup i0 handler, and route all interrupts to it
  173. mov $r1 ih
  174. mov $iv0 $r1
  175. mov $r1 0x400
  176. iowr I[$r1 + 0x300] $r0 // INTR_DISPATCH
  177. // enable fifo interrupt
  178. mov $r2 4
  179. iowr I[$r1 + 0x000] $r2 // INTR_EN_SET
  180. // enable interrupts
  181. bset $flags ie0
  182. // figure out which GPC we are, and how many TPCs we have
  183. mov $r1 0x608
  184. shl b32 $r1 6
  185. iord $r2 I[$r1 + 0x000] // UNITS
  186. mov $r3 1
  187. and $r2 0x1f
  188. shl b32 $r3 $r2
  189. sub b32 $r3 1
  190. st b32 D[$r0 + tpc_count] $r2
  191. st b32 D[$r0 + tpc_mask] $r3
  192. add b32 $r1 0x400
  193. iord $r2 I[$r1 + 0x000] // MYINDEX
  194. st b32 D[$r0 + gpc_id] $r2
  195. // find context data for this chipset
  196. mov $r2 0x800
  197. shl b32 $r2 6
  198. iord $r2 I[$r2 + 0x000] // CC_SCRATCH[0]
  199. mov $r1 chipsets - 12
  200. init_find_chipset:
  201. add b32 $r1 12
  202. ld b32 $r3 D[$r1 + 0x00]
  203. cmpu b32 $r3 $r2
  204. bra e init_context
  205. cmpu b32 $r3 0
  206. bra ne init_find_chipset
  207. // unknown chipset
  208. ret
  209. // initialise context base, and size tracking
  210. init_context:
  211. mov $r2 0x800
  212. shl b32 $r2 6
  213. iord $r2 I[$r2 + 0x100] // CC_SCRATCH[1], initial base
  214. clear b32 $r3 // track GPC context size here
  215. // set mmctx base addresses now so we don't have to do it later,
  216. // they don't currently ever change
  217. mov $r4 0x700
  218. shl b32 $r4 6
  219. shr b32 $r5 $r2 8
  220. iowr I[$r4 + 0x000] $r5 // MMCTX_SAVE_SWBASE
  221. iowr I[$r4 + 0x100] $r5 // MMCTX_LOAD_SWBASE
  222. // calculate GPC mmio context size, store the chipset-specific
  223. // mmio list pointers somewhere we can get at them later without
  224. // re-parsing the chipset list
  225. clear b32 $r14
  226. clear b32 $r15
  227. ld b16 $r14 D[$r1 + 4]
  228. ld b16 $r15 D[$r1 + 6]
  229. st b16 D[$r0 + gpc_mmio_list_head] $r14
  230. st b16 D[$r0 + gpc_mmio_list_tail] $r15
  231. call mmctx_size
  232. add b32 $r2 $r15
  233. add b32 $r3 $r15
  234. // calculate per-TPC mmio context size, store the list pointers
  235. ld b16 $r14 D[$r1 + 8]
  236. ld b16 $r15 D[$r1 + 10]
  237. st b16 D[$r0 + tpc_mmio_list_head] $r14
  238. st b16 D[$r0 + tpc_mmio_list_tail] $r15
  239. call mmctx_size
  240. ld b32 $r14 D[$r0 + tpc_count]
  241. mulu $r14 $r15
  242. add b32 $r2 $r14
  243. add b32 $r3 $r14
  244. // round up base/size to 256 byte boundary (for strand SWBASE)
  245. add b32 $r4 0x1300
  246. shr b32 $r3 2
  247. iowr I[$r4 + 0x000] $r3 // MMCTX_LOAD_COUNT, wtf for?!?
  248. shr b32 $r2 8
  249. shr b32 $r3 6
  250. add b32 $r2 1
  251. add b32 $r3 1
  252. shl b32 $r2 8
  253. shl b32 $r3 8
  254. // calculate size of strand context data
  255. mov b32 $r15 $r2
  256. call strand_ctx_init
  257. add b32 $r3 $r15
  258. // save context size, and tell HUB we're done
  259. mov $r1 0x800
  260. shl b32 $r1 6
  261. iowr I[$r1 + 0x100] $r3 // CC_SCRATCH[1] = context size
  262. add b32 $r1 0x800
  263. clear b32 $r2
  264. bset $r2 31
  265. iowr I[$r1 + 0x000] $r2 // CC_SCRATCH[0] |= 0x80000000
  266. // Main program loop, very simple, sleeps until woken up by the interrupt
  267. // handler, pulls a command from the queue and executes its handler
  268. //
  269. main:
  270. bset $flags $p0
  271. sleep $p0
  272. mov $r13 cmd_queue
  273. call queue_get
  274. bra $p1 main
  275. // 0x0000-0x0003 are all context transfers
  276. cmpu b32 $r14 0x04
  277. bra nc main_not_ctx_xfer
  278. // fetch $flags and mask off $p1/$p2
  279. mov $r1 $flags
  280. mov $r2 0x0006
  281. not b32 $r2
  282. and $r1 $r2
  283. // set $p1/$p2 according to transfer type
  284. shl b32 $r14 1
  285. or $r1 $r14
  286. mov $flags $r1
  287. // transfer context data
  288. call ctx_xfer
  289. bra main
  290. main_not_ctx_xfer:
  291. shl b32 $r15 $r14 16
  292. or $r15 E_BAD_COMMAND
  293. call error
  294. bra main
  295. // interrupt handler
  296. ih:
  297. push $r8
  298. mov $r8 $flags
  299. push $r8
  300. push $r9
  301. push $r10
  302. push $r11
  303. push $r13
  304. push $r14
  305. push $r15
  306. // incoming fifo command?
  307. iord $r10 I[$r0 + 0x200] // INTR
  308. and $r11 $r10 0x00000004
  309. bra e ih_no_fifo
  310. // queue incoming fifo command for later processing
  311. mov $r11 0x1900
  312. mov $r13 cmd_queue
  313. iord $r14 I[$r11 + 0x100] // FIFO_CMD
  314. iord $r15 I[$r11 + 0x000] // FIFO_DATA
  315. call queue_put
  316. add b32 $r11 0x400
  317. mov $r14 1
  318. iowr I[$r11 + 0x000] $r14 // FIFO_ACK
  319. // ack, and wake up main()
  320. ih_no_fifo:
  321. iowr I[$r0 + 0x100] $r10 // INTR_ACK
  322. pop $r15
  323. pop $r14
  324. pop $r13
  325. pop $r11
  326. pop $r10
  327. pop $r9
  328. pop $r8
  329. mov $flags $r8
  330. pop $r8
  331. bclr $flags $p0
  332. iret
  333. // Set this GPC's bit in HUB_BAR, used to signal completion of various
  334. // activities to the HUB fuc
  335. //
  336. hub_barrier_done:
  337. mov $r15 1
  338. ld b32 $r14 D[$r0 + gpc_id]
  339. shl b32 $r15 $r14
  340. mov $r14 -0x6be8 // 0x409418 - HUB_BAR_SET
  341. sethi $r14 0x400000
  342. call nv_wr32
  343. ret
  344. // Disables various things, waits a bit, and re-enables them..
  345. //
  346. // Not sure how exactly this helps, perhaps "ENABLE" is not such a
  347. // good description for the bits we turn off? Anyways, without this,
  348. // funny things happen.
  349. //
  350. ctx_redswitch:
  351. mov $r14 0x614
  352. shl b32 $r14 6
  353. mov $r15 0x020
  354. iowr I[$r14] $r15 // GPC_RED_SWITCH = POWER
  355. mov $r15 8
  356. ctx_redswitch_delay:
  357. sub b32 $r15 1
  358. bra ne ctx_redswitch_delay
  359. mov $r15 0xa20
  360. iowr I[$r14] $r15 // GPC_RED_SWITCH = UNK11, ENABLE, POWER
  361. ret
  362. // Transfer GPC context data between GPU and storage area
  363. //
  364. // In: $r15 context base address
  365. // $p1 clear on save, set on load
  366. // $p2 set if opposite direction done/will be done, so:
  367. // on save it means: "a load will follow this save"
  368. // on load it means: "a save preceeded this load"
  369. //
  370. ctx_xfer:
  371. // set context base address
  372. mov $r1 0xa04
  373. shl b32 $r1 6
  374. iowr I[$r1 + 0x000] $r15// MEM_BASE
  375. bra not $p1 ctx_xfer_not_load
  376. call ctx_redswitch
  377. ctx_xfer_not_load:
  378. // strands
  379. mov $r1 0x4afc
  380. sethi $r1 0x20000
  381. mov $r2 0xc
  382. iowr I[$r1] $r2 // STRAND_CMD(0x3f) = 0x0c
  383. call strand_wait
  384. mov $r2 0x47fc
  385. sethi $r2 0x20000
  386. iowr I[$r2] $r0 // STRAND_FIRST_GENE(0x3f) = 0x00
  387. xbit $r2 $flags $p1
  388. add b32 $r2 3
  389. iowr I[$r1] $r2 // STRAND_CMD(0x3f) = 0x03/0x04 (SAVE/LOAD)
  390. // mmio context
  391. xbit $r10 $flags $p1 // direction
  392. or $r10 2 // first
  393. mov $r11 0x0000
  394. sethi $r11 0x500000
  395. ld b32 $r12 D[$r0 + gpc_id]
  396. shl b32 $r12 15
  397. add b32 $r11 $r12 // base = NV_PGRAPH_GPCn
  398. ld b32 $r12 D[$r0 + gpc_mmio_list_head]
  399. ld b32 $r13 D[$r0 + gpc_mmio_list_tail]
  400. mov $r14 0 // not multi
  401. call mmctx_xfer
  402. // per-TPC mmio context
  403. xbit $r10 $flags $p1 // direction
  404. or $r10 4 // last
  405. mov $r11 0x4000
  406. sethi $r11 0x500000 // base = NV_PGRAPH_GPC0_TPC0
  407. ld b32 $r12 D[$r0 + gpc_id]
  408. shl b32 $r12 15
  409. add b32 $r11 $r12 // base = NV_PGRAPH_GPCn_TPC0
  410. ld b32 $r12 D[$r0 + tpc_mmio_list_head]
  411. ld b32 $r13 D[$r0 + tpc_mmio_list_tail]
  412. ld b32 $r15 D[$r0 + tpc_mask]
  413. mov $r14 0x800 // stride = 0x800
  414. call mmctx_xfer
  415. // wait for strands to finish
  416. call strand_wait
  417. // if load, or a save without a load following, do some
  418. // unknown stuff that's done after finishing a block of
  419. // strand commands
  420. bra $p1 ctx_xfer_post
  421. bra not $p2 ctx_xfer_done
  422. ctx_xfer_post:
  423. mov $r1 0x4afc
  424. sethi $r1 0x20000
  425. mov $r2 0xd
  426. iowr I[$r1] $r2 // STRAND_CMD(0x3f) = 0x0d
  427. call strand_wait
  428. // mark completion in HUB's barrier
  429. ctx_xfer_done:
  430. call hub_barrier_done
  431. ret
  432. .align 256