nvc0_grgpc.fuc 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480
  1. /* fuc microcode for nvc0 PGRAPH/GPC
  2. *
  3. * Copyright 2011 Red Hat Inc.
  4. *
  5. * Permission is hereby granted, free of charge, to any person obtaining a
  6. * copy of this software and associated documentation files (the "Software"),
  7. * to deal in the Software without restriction, including without limitation
  8. * the rights to use, copy, modify, merge, publish, distribute, sublicense,
  9. * and/or sell copies of the Software, and to permit persons to whom the
  10. * Software is furnished to do so, subject to the following conditions:
  11. *
  12. * The above copyright notice and this permission notice shall be included in
  13. * all copies or substantial portions of the Software.
  14. *
  15. * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16. * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17. * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
  18. * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
  19. * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
  20. * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
  21. * OTHER DEALINGS IN THE SOFTWARE.
  22. *
  23. * Authors: Ben Skeggs
  24. */
  25. /* To build:
  26. * m4 nvc0_grgpc.fuc | envyas -a -w -m fuc -V nva3 -o nvc0_grgpc.fuc.h
  27. */
  28. /* TODO
  29. * - bracket certain functions with scratch writes, useful for debugging
  30. * - watchdog timer around ctx operations
  31. */
  32. .section nvc0_grgpc_data
  33. include(`nvc0_graph.fuc')
  34. gpc_id: .b32 0
  35. gpc_mmio_list_head: .b32 0
  36. gpc_mmio_list_tail: .b32 0
  37. tpc_count: .b32 0
  38. tpc_mask: .b32 0
  39. tpc_mmio_list_head: .b32 0
  40. tpc_mmio_list_tail: .b32 0
  41. cmd_queue: queue_init
  42. // chipset descriptions
  43. chipsets:
  44. .b8 0xc0 0 0 0
  45. .b16 nvc0_gpc_mmio_head
  46. .b16 nvc0_gpc_mmio_tail
  47. .b16 nvc0_tpc_mmio_head
  48. .b16 nvc0_tpc_mmio_tail
  49. .b8 0xc1 0 0 0
  50. .b16 nvc0_gpc_mmio_head
  51. .b16 nvc1_gpc_mmio_tail
  52. .b16 nvc0_tpc_mmio_head
  53. .b16 nvc1_tpc_mmio_tail
  54. .b8 0xc3 0 0 0
  55. .b16 nvc0_gpc_mmio_head
  56. .b16 nvc0_gpc_mmio_tail
  57. .b16 nvc0_tpc_mmio_head
  58. .b16 nvc3_tpc_mmio_tail
  59. .b8 0xc4 0 0 0
  60. .b16 nvc0_gpc_mmio_head
  61. .b16 nvc0_gpc_mmio_tail
  62. .b16 nvc0_tpc_mmio_head
  63. .b16 nvc3_tpc_mmio_tail
  64. .b8 0xc8 0 0 0
  65. .b16 nvc0_gpc_mmio_head
  66. .b16 nvc0_gpc_mmio_tail
  67. .b16 nvc0_tpc_mmio_head
  68. .b16 nvc0_tpc_mmio_tail
  69. .b8 0xce 0 0 0
  70. .b16 nvc0_gpc_mmio_head
  71. .b16 nvc0_gpc_mmio_tail
  72. .b16 nvc0_tpc_mmio_head
  73. .b16 nvc3_tpc_mmio_tail
  74. .b8 0xcf 0 0 0
  75. .b16 nvc0_gpc_mmio_head
  76. .b16 nvc0_gpc_mmio_tail
  77. .b16 nvc0_tpc_mmio_head
  78. .b16 nvcf_tpc_mmio_tail
  79. .b8 0 0 0 0
  80. // GPC mmio lists
  81. nvc0_gpc_mmio_head:
  82. mmctx_data(0x000380, 1)
  83. mmctx_data(0x000400, 6)
  84. mmctx_data(0x000450, 9)
  85. mmctx_data(0x000600, 1)
  86. mmctx_data(0x000684, 1)
  87. mmctx_data(0x000700, 5)
  88. mmctx_data(0x000800, 1)
  89. mmctx_data(0x000808, 3)
  90. mmctx_data(0x000828, 1)
  91. mmctx_data(0x000830, 1)
  92. mmctx_data(0x0008d8, 1)
  93. mmctx_data(0x0008e0, 1)
  94. mmctx_data(0x0008e8, 6)
  95. mmctx_data(0x00091c, 1)
  96. mmctx_data(0x000924, 3)
  97. mmctx_data(0x000b00, 1)
  98. mmctx_data(0x000b08, 6)
  99. mmctx_data(0x000bb8, 1)
  100. mmctx_data(0x000c08, 1)
  101. mmctx_data(0x000c10, 8)
  102. mmctx_data(0x000c80, 1)
  103. mmctx_data(0x000c8c, 1)
  104. mmctx_data(0x001000, 3)
  105. mmctx_data(0x001014, 1)
  106. nvc0_gpc_mmio_tail:
  107. mmctx_data(0x000c6c, 1);
  108. nvc1_gpc_mmio_tail:
  109. // TPC mmio lists
  110. nvc0_tpc_mmio_head:
  111. mmctx_data(0x000018, 1)
  112. mmctx_data(0x00003c, 1)
  113. mmctx_data(0x000048, 1)
  114. mmctx_data(0x000064, 1)
  115. mmctx_data(0x000088, 1)
  116. mmctx_data(0x000200, 6)
  117. mmctx_data(0x00021c, 2)
  118. mmctx_data(0x000300, 6)
  119. mmctx_data(0x0003d0, 1)
  120. mmctx_data(0x0003e0, 2)
  121. mmctx_data(0x000400, 3)
  122. mmctx_data(0x000420, 1)
  123. mmctx_data(0x0004b0, 1)
  124. mmctx_data(0x0004e8, 1)
  125. mmctx_data(0x0004f4, 1)
  126. mmctx_data(0x000520, 2)
  127. mmctx_data(0x000604, 4)
  128. mmctx_data(0x000644, 20)
  129. mmctx_data(0x000698, 1)
  130. mmctx_data(0x000750, 2)
  131. nvc0_tpc_mmio_tail:
  132. mmctx_data(0x000758, 1)
  133. mmctx_data(0x0002c4, 1)
  134. mmctx_data(0x0006e0, 1)
  135. nvcf_tpc_mmio_tail:
  136. mmctx_data(0x0004bc, 1)
  137. nvc3_tpc_mmio_tail:
  138. mmctx_data(0x000544, 1)
  139. nvc1_tpc_mmio_tail:
  140. .section nvc0_grgpc_code
  141. bra init
  142. define(`include_code')
  143. include(`nvc0_graph.fuc')
  144. // reports an exception to the host
  145. //
  146. // In: $r15 error code (see nvc0_graph.fuc)
  147. //
  148. error:
  149. push $r14
  150. mov $r14 -0x67ec // 0x9814
  151. sethi $r14 0x400000
  152. call nv_wr32 // HUB_CTXCTL_CC_SCRATCH[5] = error code
  153. add b32 $r14 0x41c
  154. mov $r15 1
  155. call nv_wr32 // HUB_CTXCTL_INTR_UP_SET
  156. pop $r14
  157. ret
  158. // GPC fuc initialisation, executed by triggering ucode start, will
  159. // fall through to main loop after completion.
  160. //
  161. // Input:
  162. // CC_SCRATCH[0]: chipset (PMC_BOOT_0 read returns 0x0bad0bad... sigh)
  163. // CC_SCRATCH[1]: context base
  164. //
  165. // Output:
  166. // CC_SCRATCH[0]:
  167. // 31:31: set to signal completion
  168. // CC_SCRATCH[1]:
  169. // 31:0: GPC context size
  170. //
  171. init:
  172. clear b32 $r0
  173. mov $sp $r0
  174. // enable fifo access
  175. mov $r1 0x1200
  176. mov $r2 2
  177. iowr I[$r1 + 0x000] $r2 // FIFO_ENABLE
  178. // setup i0 handler, and route all interrupts to it
  179. mov $r1 ih
  180. mov $iv0 $r1
  181. mov $r1 0x400
  182. iowr I[$r1 + 0x300] $r0 // INTR_DISPATCH
  183. // enable fifo interrupt
  184. mov $r2 4
  185. iowr I[$r1 + 0x000] $r2 // INTR_EN_SET
  186. // enable interrupts
  187. bset $flags ie0
  188. // figure out which GPC we are, and how many TPCs we have
  189. mov $r1 0x608
  190. shl b32 $r1 6
  191. iord $r2 I[$r1 + 0x000] // UNITS
  192. mov $r3 1
  193. and $r2 0x1f
  194. shl b32 $r3 $r2
  195. sub b32 $r3 1
  196. st b32 D[$r0 + tpc_count] $r2
  197. st b32 D[$r0 + tpc_mask] $r3
  198. add b32 $r1 0x400
  199. iord $r2 I[$r1 + 0x000] // MYINDEX
  200. st b32 D[$r0 + gpc_id] $r2
  201. // find context data for this chipset
  202. mov $r2 0x800
  203. shl b32 $r2 6
  204. iord $r2 I[$r2 + 0x000] // CC_SCRATCH[0]
  205. mov $r1 chipsets - 12
  206. init_find_chipset:
  207. add b32 $r1 12
  208. ld b32 $r3 D[$r1 + 0x00]
  209. cmpu b32 $r3 $r2
  210. bra e init_context
  211. cmpu b32 $r3 0
  212. bra ne init_find_chipset
  213. // unknown chipset
  214. ret
  215. // initialise context base, and size tracking
  216. init_context:
  217. mov $r2 0x800
  218. shl b32 $r2 6
  219. iord $r2 I[$r2 + 0x100] // CC_SCRATCH[1], initial base
  220. clear b32 $r3 // track GPC context size here
  221. // set mmctx base addresses now so we don't have to do it later,
  222. // they don't currently ever change
  223. mov $r4 0x700
  224. shl b32 $r4 6
  225. shr b32 $r5 $r2 8
  226. iowr I[$r4 + 0x000] $r5 // MMCTX_SAVE_SWBASE
  227. iowr I[$r4 + 0x100] $r5 // MMCTX_LOAD_SWBASE
  228. // calculate GPC mmio context size, store the chipset-specific
  229. // mmio list pointers somewhere we can get at them later without
  230. // re-parsing the chipset list
  231. clear b32 $r14
  232. clear b32 $r15
  233. ld b16 $r14 D[$r1 + 4]
  234. ld b16 $r15 D[$r1 + 6]
  235. st b16 D[$r0 + gpc_mmio_list_head] $r14
  236. st b16 D[$r0 + gpc_mmio_list_tail] $r15
  237. call mmctx_size
  238. add b32 $r2 $r15
  239. add b32 $r3 $r15
  240. // calculate per-TPC mmio context size, store the list pointers
  241. ld b16 $r14 D[$r1 + 8]
  242. ld b16 $r15 D[$r1 + 10]
  243. st b16 D[$r0 + tpc_mmio_list_head] $r14
  244. st b16 D[$r0 + tpc_mmio_list_tail] $r15
  245. call mmctx_size
  246. ld b32 $r14 D[$r0 + tpc_count]
  247. mulu $r14 $r15
  248. add b32 $r2 $r14
  249. add b32 $r3 $r14
  250. // round up base/size to 256 byte boundary (for strand SWBASE)
  251. add b32 $r4 0x1300
  252. shr b32 $r3 2
  253. iowr I[$r4 + 0x000] $r3 // MMCTX_LOAD_COUNT, wtf for?!?
  254. shr b32 $r2 8
  255. shr b32 $r3 6
  256. add b32 $r2 1
  257. add b32 $r3 1
  258. shl b32 $r2 8
  259. shl b32 $r3 8
  260. // calculate size of strand context data
  261. mov b32 $r15 $r2
  262. call strand_ctx_init
  263. add b32 $r3 $r15
  264. // save context size, and tell HUB we're done
  265. mov $r1 0x800
  266. shl b32 $r1 6
  267. iowr I[$r1 + 0x100] $r3 // CC_SCRATCH[1] = context size
  268. add b32 $r1 0x800
  269. clear b32 $r2
  270. bset $r2 31
  271. iowr I[$r1 + 0x000] $r2 // CC_SCRATCH[0] |= 0x80000000
  272. // Main program loop, very simple, sleeps until woken up by the interrupt
  273. // handler, pulls a command from the queue and executes its handler
  274. //
  275. main:
  276. bset $flags $p0
  277. sleep $p0
  278. mov $r13 cmd_queue
  279. call queue_get
  280. bra $p1 main
  281. // 0x0000-0x0003 are all context transfers
  282. cmpu b32 $r14 0x04
  283. bra nc main_not_ctx_xfer
  284. // fetch $flags and mask off $p1/$p2
  285. mov $r1 $flags
  286. mov $r2 0x0006
  287. not b32 $r2
  288. and $r1 $r2
  289. // set $p1/$p2 according to transfer type
  290. shl b32 $r14 1
  291. or $r1 $r14
  292. mov $flags $r1
  293. // transfer context data
  294. call ctx_xfer
  295. bra main
  296. main_not_ctx_xfer:
  297. shl b32 $r15 $r14 16
  298. or $r15 E_BAD_COMMAND
  299. call error
  300. bra main
  301. // interrupt handler
  302. ih:
  303. push $r8
  304. mov $r8 $flags
  305. push $r8
  306. push $r9
  307. push $r10
  308. push $r11
  309. push $r13
  310. push $r14
  311. push $r15
  312. // incoming fifo command?
  313. iord $r10 I[$r0 + 0x200] // INTR
  314. and $r11 $r10 0x00000004
  315. bra e ih_no_fifo
  316. // queue incoming fifo command for later processing
  317. mov $r11 0x1900
  318. mov $r13 cmd_queue
  319. iord $r14 I[$r11 + 0x100] // FIFO_CMD
  320. iord $r15 I[$r11 + 0x000] // FIFO_DATA
  321. call queue_put
  322. add b32 $r11 0x400
  323. mov $r14 1
  324. iowr I[$r11 + 0x000] $r14 // FIFO_ACK
  325. // ack, and wake up main()
  326. ih_no_fifo:
  327. iowr I[$r0 + 0x100] $r10 // INTR_ACK
  328. pop $r15
  329. pop $r14
  330. pop $r13
  331. pop $r11
  332. pop $r10
  333. pop $r9
  334. pop $r8
  335. mov $flags $r8
  336. pop $r8
  337. bclr $flags $p0
  338. iret
  339. // Set this GPC's bit in HUB_BAR, used to signal completion of various
  340. // activities to the HUB fuc
  341. //
  342. hub_barrier_done:
  343. mov $r15 1
  344. ld b32 $r14 D[$r0 + gpc_id]
  345. shl b32 $r15 $r14
  346. mov $r14 -0x6be8 // 0x409418 - HUB_BAR_SET
  347. sethi $r14 0x400000
  348. call nv_wr32
  349. ret
  350. // Disables various things, waits a bit, and re-enables them..
  351. //
  352. // Not sure how exactly this helps, perhaps "ENABLE" is not such a
  353. // good description for the bits we turn off? Anyways, without this,
  354. // funny things happen.
  355. //
  356. ctx_redswitch:
  357. mov $r14 0x614
  358. shl b32 $r14 6
  359. mov $r15 0x020
  360. iowr I[$r14] $r15 // GPC_RED_SWITCH = POWER
  361. mov $r15 8
  362. ctx_redswitch_delay:
  363. sub b32 $r15 1
  364. bra ne ctx_redswitch_delay
  365. mov $r15 0xa20
  366. iowr I[$r14] $r15 // GPC_RED_SWITCH = UNK11, ENABLE, POWER
  367. ret
  368. // Transfer GPC context data between GPU and storage area
  369. //
  370. // In: $r15 context base address
  371. // $p1 clear on save, set on load
  372. // $p2 set if opposite direction done/will be done, so:
  373. // on save it means: "a load will follow this save"
  374. // on load it means: "a save preceeded this load"
  375. //
  376. ctx_xfer:
  377. // set context base address
  378. mov $r1 0xa04
  379. shl b32 $r1 6
  380. iowr I[$r1 + 0x000] $r15// MEM_BASE
  381. bra not $p1 ctx_xfer_not_load
  382. call ctx_redswitch
  383. ctx_xfer_not_load:
  384. // strands
  385. mov $r1 0x4afc
  386. sethi $r1 0x20000
  387. mov $r2 0xc
  388. iowr I[$r1] $r2 // STRAND_CMD(0x3f) = 0x0c
  389. call strand_wait
  390. mov $r2 0x47fc
  391. sethi $r2 0x20000
  392. iowr I[$r2] $r0 // STRAND_FIRST_GENE(0x3f) = 0x00
  393. xbit $r2 $flags $p1
  394. add b32 $r2 3
  395. iowr I[$r1] $r2 // STRAND_CMD(0x3f) = 0x03/0x04 (SAVE/LOAD)
  396. // mmio context
  397. xbit $r10 $flags $p1 // direction
  398. or $r10 2 // first
  399. mov $r11 0x0000
  400. sethi $r11 0x500000
  401. ld b32 $r12 D[$r0 + gpc_id]
  402. shl b32 $r12 15
  403. add b32 $r11 $r12 // base = NV_PGRAPH_GPCn
  404. ld b32 $r12 D[$r0 + gpc_mmio_list_head]
  405. ld b32 $r13 D[$r0 + gpc_mmio_list_tail]
  406. mov $r14 0 // not multi
  407. call mmctx_xfer
  408. // per-TPC mmio context
  409. xbit $r10 $flags $p1 // direction
  410. or $r10 4 // last
  411. mov $r11 0x4000
  412. sethi $r11 0x500000 // base = NV_PGRAPH_GPC0_TPC0
  413. ld b32 $r12 D[$r0 + gpc_id]
  414. shl b32 $r12 15
  415. add b32 $r11 $r12 // base = NV_PGRAPH_GPCn_TPC0
  416. ld b32 $r12 D[$r0 + tpc_mmio_list_head]
  417. ld b32 $r13 D[$r0 + tpc_mmio_list_tail]
  418. ld b32 $r15 D[$r0 + tpc_mask]
  419. mov $r14 0x800 // stride = 0x800
  420. call mmctx_xfer
  421. // wait for strands to finish
  422. call strand_wait
  423. // if load, or a save without a load following, do some
  424. // unknown stuff that's done after finishing a block of
  425. // strand commands
  426. bra $p1 ctx_xfer_post
  427. bra not $p2 ctx_xfer_done
  428. ctx_xfer_post:
  429. mov $r1 0x4afc
  430. sethi $r1 0x20000
  431. mov $r2 0xd
  432. iowr I[$r1] $r2 // STRAND_CMD(0x3f) = 0x0d
  433. call strand_wait
  434. // mark completion in HUB's barrier
  435. ctx_xfer_done:
  436. call hub_barrier_done
  437. ret
  438. .align 256