nv98_crypt.fuc 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698
  1. /*
  2. * fuc microcode for nv98 pcrypt engine
  3. * Copyright (C) 2010 Marcin Kościelnicki
  4. *
  5. * This program is free software; you can redistribute it and/or modify
  6. * it under the terms of the GNU General Public License as published by
  7. * the Free Software Foundation; either version 2 of the License, or
  8. * (at your option) any later version.
  9. *
  10. * This program is distributed in the hope that it will be useful,
  11. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  12. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  13. * GNU General Public License for more details.
  14. *
  15. * You should have received a copy of the GNU General Public License
  16. * along with this program; if not, write to the Free Software
  17. * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
  18. */
  19. .section #nv98_pcrypt_data
  20. ctx_dma:
  21. ctx_dma_query: .b32 0
  22. ctx_dma_src: .b32 0
  23. ctx_dma_dst: .b32 0
  24. .equ #dma_count 3
  25. ctx_query_address_high: .b32 0
  26. ctx_query_address_low: .b32 0
  27. ctx_query_counter: .b32 0
  28. ctx_cond_address_high: .b32 0
  29. ctx_cond_address_low: .b32 0
  30. ctx_cond_off: .b32 0
  31. ctx_src_address_high: .b32 0
  32. ctx_src_address_low: .b32 0
  33. ctx_dst_address_high: .b32 0
  34. ctx_dst_address_low: .b32 0
  35. ctx_mode: .b32 0
  36. .align 16
  37. ctx_key: .skip 16
  38. ctx_iv: .skip 16
  39. .align 0x80
  40. swap:
  41. .skip 32
  42. .align 8
  43. common_cmd_dtable:
  44. .b32 #ctx_query_address_high + 0x20000 ~0xff
  45. .b32 #ctx_query_address_low + 0x20000 ~0xfffffff0
  46. .b32 #ctx_query_counter + 0x20000 ~0xffffffff
  47. .b32 #cmd_query_get + 0x00000 ~1
  48. .b32 #ctx_cond_address_high + 0x20000 ~0xff
  49. .b32 #ctx_cond_address_low + 0x20000 ~0xfffffff0
  50. .b32 #cmd_cond_mode + 0x00000 ~7
  51. .b32 #cmd_wrcache_flush + 0x00000 ~0
  52. .equ #common_cmd_max 0x88
  53. .align 8
  54. engine_cmd_dtable:
  55. .b32 #ctx_key + 0x0 + 0x20000 ~0xffffffff
  56. .b32 #ctx_key + 0x4 + 0x20000 ~0xffffffff
  57. .b32 #ctx_key + 0x8 + 0x20000 ~0xffffffff
  58. .b32 #ctx_key + 0xc + 0x20000 ~0xffffffff
  59. .b32 #ctx_iv + 0x0 + 0x20000 ~0xffffffff
  60. .b32 #ctx_iv + 0x4 + 0x20000 ~0xffffffff
  61. .b32 #ctx_iv + 0x8 + 0x20000 ~0xffffffff
  62. .b32 #ctx_iv + 0xc + 0x20000 ~0xffffffff
  63. .b32 #ctx_src_address_high + 0x20000 ~0xff
  64. .b32 #ctx_src_address_low + 0x20000 ~0xfffffff0
  65. .b32 #ctx_dst_address_high + 0x20000 ~0xff
  66. .b32 #ctx_dst_address_low + 0x20000 ~0xfffffff0
  67. .b32 #crypt_cmd_mode + 0x00000 ~0xf
  68. .b32 #crypt_cmd_length + 0x10000 ~0x0ffffff0
  69. .equ #engine_cmd_max 0xce
  70. .align 4
  71. crypt_dtable:
  72. .b16 #crypt_copy_prep #crypt_do_inout
  73. .b16 #crypt_store_prep #crypt_do_out
  74. .b16 #crypt_ecb_e_prep #crypt_do_inout
  75. .b16 #crypt_ecb_d_prep #crypt_do_inout
  76. .b16 #crypt_cbc_e_prep #crypt_do_inout
  77. .b16 #crypt_cbc_d_prep #crypt_do_inout
  78. .b16 #crypt_pcbc_e_prep #crypt_do_inout
  79. .b16 #crypt_pcbc_d_prep #crypt_do_inout
  80. .b16 #crypt_cfb_e_prep #crypt_do_inout
  81. .b16 #crypt_cfb_d_prep #crypt_do_inout
  82. .b16 #crypt_ofb_prep #crypt_do_inout
  83. .b16 #crypt_ctr_prep #crypt_do_inout
  84. .b16 #crypt_cbc_mac_prep #crypt_do_in
  85. .b16 #crypt_cmac_finish_complete_prep #crypt_do_in
  86. .b16 #crypt_cmac_finish_partial_prep #crypt_do_in
  87. .align 0x100
  88. .section #nv98_pcrypt_code
  89. // $r0 is always set to 0 in our code - this allows some space savings.
  90. clear b32 $r0
  91. // set up the interrupt handler
  92. mov $r1 #ih
  93. mov $iv0 $r1
  94. // init stack pointer
  95. mov $sp $r0
  96. // set interrupt dispatch - route timer, fifo, ctxswitch to i0, others to host
  97. movw $r1 0xfff0
  98. sethi $r1 0
  99. mov $r2 0x400
  100. iowr I[$r2 + 0x300] $r1
  101. // enable the interrupts
  102. or $r1 0xc
  103. iowr I[$r2] $r1
  104. // enable fifo access and context switching
  105. mov $r1 3
  106. mov $r2 0x1200
  107. iowr I[$r2] $r1
  108. // enable i0 delivery
  109. bset $flags ie0
  110. // sleep forver, waking only for interrupts.
  111. bset $flags $p0
  112. spin:
  113. sleep $p0
  114. bra #spin
  115. // i0 handler
  116. ih:
  117. // see which interrupts we got
  118. iord $r1 I[$r0 + 0x200]
  119. and $r2 $r1 0x8
  120. cmpu b32 $r2 0
  121. bra e #noctx
  122. // context switch... prepare the regs for xfer
  123. mov $r2 0x7700
  124. mov $xtargets $r2
  125. mov $xdbase $r0
  126. // 128-byte context.
  127. mov $r2 0
  128. sethi $r2 0x50000
  129. // read current channel
  130. mov $r3 0x1400
  131. iord $r4 I[$r3]
  132. // if bit 30 set, it's active, so we have to unload it first.
  133. shl b32 $r5 $r4 1
  134. cmps b32 $r5 0
  135. bra nc #ctxload
  136. // unload the current channel - save the context
  137. xdst $r0 $r2
  138. xdwait
  139. // and clear bit 30, then write back
  140. bclr $r4 0x1e
  141. iowr I[$r3] $r4
  142. // tell PFIFO we unloaded
  143. mov $r4 1
  144. iowr I[$r3 + 0x200] $r4
  145. bra #noctx
  146. ctxload:
  147. // no channel loaded - perhaps we're requested to load one
  148. iord $r4 I[$r3 + 0x100]
  149. shl b32 $r15 $r4 1
  150. cmps b32 $r15 0
  151. // if bit 30 of next channel not set, probably PFIFO is just
  152. // killing a context. do a faux load, without the active bit.
  153. bra nc #dummyload
  154. // ok, do a real context load.
  155. xdld $r0 $r2
  156. xdwait
  157. mov $r5 #ctx_dma
  158. mov $r6 #dma_count - 1
  159. ctxload_dma_loop:
  160. ld b32 $r7 D[$r5 + $r6 * 4]
  161. add b32 $r8 $r6 0x180
  162. shl b32 $r8 8
  163. iowr I[$r8] $r7
  164. sub b32 $r6 1
  165. bra nc #ctxload_dma_loop
  166. dummyload:
  167. // tell PFIFO we're done
  168. mov $r5 2
  169. iowr I[$r3 + 0x200] $r5
  170. noctx:
  171. and $r2 $r1 0x4
  172. cmpu b32 $r2 0
  173. bra e #nocmd
  174. // incoming fifo command.
  175. mov $r3 0x1900
  176. iord $r2 I[$r3 + 0x100]
  177. iord $r3 I[$r3]
  178. // extract the method
  179. and $r4 $r2 0x7ff
  180. // shift the addr to proper position if we need to interrupt later
  181. shl b32 $r2 0x10
  182. // mthd 0 and 0x100 [NAME, NOP]: ignore
  183. and $r5 $r4 0x7bf
  184. cmpu b32 $r5 0
  185. bra e #cmddone
  186. mov $r5 #engine_cmd_dtable - 0xc0 * 8
  187. mov $r6 #engine_cmd_max
  188. cmpu b32 $r4 0xc0
  189. bra nc #dtable_cmd
  190. mov $r5 #common_cmd_dtable - 0x80 * 8
  191. mov $r6 #common_cmd_max
  192. cmpu b32 $r4 0x80
  193. bra nc #dtable_cmd
  194. cmpu b32 $r4 0x60
  195. bra nc #dma_cmd
  196. cmpu b32 $r4 0x50
  197. bra ne #illegal_mthd
  198. // mthd 0x140: PM_TRIGGER
  199. mov $r2 0x2200
  200. clear b32 $r3
  201. sethi $r3 0x20000
  202. iowr I[$r2] $r3
  203. bra #cmddone
  204. dma_cmd:
  205. // mthd 0x180...: DMA_*
  206. cmpu b32 $r4 0x60+#dma_count
  207. bra nc #illegal_mthd
  208. shl b32 $r5 $r4 2
  209. add b32 $r5 (#ctx_dma - 0x60 * 4) & 0xffff
  210. bset $r3 0x1e
  211. st b32 D[$r5] $r3
  212. add b32 $r4 0x180 - 0x60
  213. shl b32 $r4 8
  214. iowr I[$r4] $r3
  215. bra #cmddone
  216. dtable_cmd:
  217. cmpu b32 $r4 $r6
  218. bra nc #illegal_mthd
  219. shl b32 $r4 3
  220. add b32 $r4 $r5
  221. ld b32 $r5 D[$r4 + 4]
  222. and $r5 $r3
  223. cmpu b32 $r5 0
  224. bra ne #invalid_bitfield
  225. ld b16 $r5 D[$r4]
  226. ld b16 $r6 D[$r4 + 2]
  227. cmpu b32 $r6 2
  228. bra e #cmd_setctx
  229. ld b32 $r7 D[$r0 + #ctx_cond_off]
  230. and $r6 $r7
  231. cmpu b32 $r6 1
  232. bra e #cmddone
  233. call $r5
  234. bra $p1 #dispatch_error
  235. bra #cmddone
  236. cmd_setctx:
  237. st b32 D[$r5] $r3
  238. bra #cmddone
  239. invalid_bitfield:
  240. or $r2 1
  241. dispatch_error:
  242. illegal_mthd:
  243. mov $r4 0x1000
  244. iowr I[$r4] $r2
  245. iowr I[$r4 + 0x100] $r3
  246. mov $r4 0x40
  247. iowr I[$r0] $r4
  248. im_loop:
  249. iord $r4 I[$r0 + 0x200]
  250. and $r4 0x40
  251. cmpu b32 $r4 0
  252. bra ne #im_loop
  253. cmddone:
  254. // remove the command from FIFO
  255. mov $r3 0x1d00
  256. mov $r4 1
  257. iowr I[$r3] $r4
  258. nocmd:
  259. // ack the processed interrupts
  260. and $r1 $r1 0xc
  261. iowr I[$r0 + 0x100] $r1
  262. iret
  263. cmd_query_get:
  264. // if bit 0 of param set, trigger interrupt afterwards.
  265. setp $p1 $r3
  266. or $r2 3
  267. // read PTIMER, beware of races...
  268. mov $r4 0xb00
  269. ptimer_retry:
  270. iord $r6 I[$r4 + 0x100]
  271. iord $r5 I[$r4]
  272. iord $r7 I[$r4 + 0x100]
  273. cmpu b32 $r6 $r7
  274. bra ne #ptimer_retry
  275. // prepare the query structure
  276. ld b32 $r4 D[$r0 + #ctx_query_counter]
  277. st b32 D[$r0 + #swap + 0x0] $r4
  278. st b32 D[$r0 + #swap + 0x4] $r0
  279. st b32 D[$r0 + #swap + 0x8] $r5
  280. st b32 D[$r0 + #swap + 0xc] $r6
  281. // will use target 0, DMA_QUERY.
  282. mov $xtargets $r0
  283. ld b32 $r4 D[$r0 + #ctx_query_address_high]
  284. shl b32 $r4 0x18
  285. mov $xdbase $r4
  286. ld b32 $r4 D[$r0 + #ctx_query_address_low]
  287. mov $r5 #swap
  288. sethi $r5 0x20000
  289. xdst $r4 $r5
  290. xdwait
  291. ret
  292. cmd_cond_mode:
  293. // if >= 5, INVALID_ENUM
  294. bset $flags $p1
  295. or $r2 2
  296. cmpu b32 $r3 5
  297. bra nc #return
  298. // otherwise, no error.
  299. bclr $flags $p1
  300. // if < 2, no QUERY object is involved
  301. cmpu b32 $r3 2
  302. bra nc #cmd_cond_mode_queryful
  303. xor $r3 1
  304. st b32 D[$r0 + #ctx_cond_off] $r3
  305. return:
  306. ret
  307. cmd_cond_mode_queryful:
  308. // ok, will need to pull a QUERY object, prepare offsets
  309. ld b32 $r4 D[$r0 + #ctx_cond_address_high]
  310. ld b32 $r5 D[$r0 + #ctx_cond_address_low]
  311. and $r6 $r5 0xff
  312. shr b32 $r5 8
  313. shl b32 $r4 0x18
  314. or $r4 $r5
  315. mov $xdbase $r4
  316. mov $xtargets $r0
  317. // pull the first one
  318. mov $r5 #swap
  319. sethi $r5 0x20000
  320. xdld $r6 $r5
  321. // if == 2, only a single QUERY is involved...
  322. cmpu b32 $r3 2
  323. bra ne #cmd_cond_mode_double
  324. xdwait
  325. ld b32 $r4 D[$r0 + #swap + 4]
  326. cmpu b32 $r4 0
  327. xbit $r4 $flags z
  328. st b32 D[$r0 + #ctx_cond_off] $r4
  329. ret
  330. // ok, we'll need to pull second one too
  331. cmd_cond_mode_double:
  332. add b32 $r6 0x10
  333. add b32 $r5 0x10
  334. xdld $r6 $r5
  335. xdwait
  336. // compare COUNTERs
  337. ld b32 $r5 D[$r0 + #swap + 0x00]
  338. ld b32 $r6 D[$r0 + #swap + 0x10]
  339. cmpu b32 $r5 $r6
  340. xbit $r4 $flags z
  341. // compare RESen
  342. ld b32 $r5 D[$r0 + #swap + 0x04]
  343. ld b32 $r6 D[$r0 + #swap + 0x14]
  344. cmpu b32 $r5 $r6
  345. xbit $r5 $flags z
  346. and $r4 $r5
  347. // and negate or not, depending on mode
  348. cmpu b32 $r3 3
  349. xbit $r5 $flags z
  350. xor $r4 $r5
  351. st b32 D[$r0 + #ctx_cond_off] $r4
  352. ret
  353. cmd_wrcache_flush:
  354. bclr $flags $p1
  355. mov $r2 0x2200
  356. clear b32 $r3
  357. sethi $r3 0x10000
  358. iowr I[$r2] $r3
  359. ret
  360. crypt_cmd_mode:
  361. // if >= 0xf, INVALID_ENUM
  362. bset $flags $p1
  363. or $r2 2
  364. cmpu b32 $r3 0xf
  365. bra nc #crypt_cmd_mode_return
  366. bclr $flags $p1
  367. st b32 D[$r0 + #ctx_mode] $r3
  368. crypt_cmd_mode_return:
  369. ret
  370. crypt_cmd_length:
  371. // nop if length == 0
  372. cmpu b32 $r3 0
  373. bra e #crypt_cmd_mode_return
  374. // init key, IV
  375. cxset 3
  376. mov $r4 #ctx_key
  377. sethi $r4 0x70000
  378. xdst $r0 $r4
  379. mov $r4 #ctx_iv
  380. sethi $r4 0x60000
  381. xdst $r0 $r4
  382. xdwait
  383. ckeyreg $c7
  384. // prepare the targets
  385. mov $r4 0x2100
  386. mov $xtargets $r4
  387. // prepare src address
  388. ld b32 $r4 D[$r0 + #ctx_src_address_high]
  389. ld b32 $r5 D[$r0 + #ctx_src_address_low]
  390. shr b32 $r8 $r5 8
  391. shl b32 $r4 0x18
  392. or $r4 $r8
  393. and $r5 $r5 0xff
  394. // prepare dst address
  395. ld b32 $r6 D[$r0 + #ctx_dst_address_high]
  396. ld b32 $r7 D[$r0 + #ctx_dst_address_low]
  397. shr b32 $r8 $r7 8
  398. shl b32 $r6 0x18
  399. or $r6 $r8
  400. and $r7 $r7 0xff
  401. // find the proper prep & do functions
  402. ld b32 $r8 D[$r0 + #ctx_mode]
  403. shl b32 $r8 2
  404. // run prep
  405. ld b16 $r9 D[$r8 + #crypt_dtable]
  406. call $r9
  407. // do it
  408. ld b16 $r9 D[$r8 + #crypt_dtable + 2]
  409. call $r9
  410. cxset 1
  411. xdwait
  412. cxset 0x61
  413. xdwait
  414. xdwait
  415. // update src address
  416. shr b32 $r8 $r4 0x18
  417. shl b32 $r9 $r4 8
  418. add b32 $r9 $r5
  419. adc b32 $r8 0
  420. st b32 D[$r0 + #ctx_src_address_high] $r8
  421. st b32 D[$r0 + #ctx_src_address_low] $r9
  422. // update dst address
  423. shr b32 $r8 $r6 0x18
  424. shl b32 $r9 $r6 8
  425. add b32 $r9 $r7
  426. adc b32 $r8 0
  427. st b32 D[$r0 + #ctx_dst_address_high] $r8
  428. st b32 D[$r0 + #ctx_dst_address_low] $r9
  429. // pull updated IV
  430. cxset 2
  431. mov $r4 #ctx_iv
  432. sethi $r4 0x60000
  433. xdld $r0 $r4
  434. xdwait
  435. ret
  436. crypt_copy_prep:
  437. cs0begin 2
  438. cxsin $c0
  439. cxsout $c0
  440. ret
  441. crypt_store_prep:
  442. cs0begin 1
  443. cxsout $c6
  444. ret
  445. crypt_ecb_e_prep:
  446. cs0begin 3
  447. cxsin $c0
  448. cenc $c0 $c0
  449. cxsout $c0
  450. ret
  451. crypt_ecb_d_prep:
  452. ckexp $c7 $c7
  453. cs0begin 3
  454. cxsin $c0
  455. cdec $c0 $c0
  456. cxsout $c0
  457. ret
  458. crypt_cbc_e_prep:
  459. cs0begin 4
  460. cxsin $c0
  461. cxor $c6 $c0
  462. cenc $c6 $c6
  463. cxsout $c6
  464. ret
  465. crypt_cbc_d_prep:
  466. ckexp $c7 $c7
  467. cs0begin 5
  468. cmov $c2 $c6
  469. cxsin $c6
  470. cdec $c0 $c6
  471. cxor $c0 $c2
  472. cxsout $c0
  473. ret
  474. crypt_pcbc_e_prep:
  475. cs0begin 5
  476. cxsin $c0
  477. cxor $c6 $c0
  478. cenc $c6 $c6
  479. cxsout $c6
  480. cxor $c6 $c0
  481. ret
  482. crypt_pcbc_d_prep:
  483. ckexp $c7 $c7
  484. cs0begin 5
  485. cxsin $c0
  486. cdec $c1 $c0
  487. cxor $c6 $c1
  488. cxsout $c6
  489. cxor $c6 $c0
  490. ret
  491. crypt_cfb_e_prep:
  492. cs0begin 4
  493. cenc $c6 $c6
  494. cxsin $c0
  495. cxor $c6 $c0
  496. cxsout $c6
  497. ret
  498. crypt_cfb_d_prep:
  499. cs0begin 4
  500. cenc $c0 $c6
  501. cxsin $c6
  502. cxor $c0 $c6
  503. cxsout $c0
  504. ret
  505. crypt_ofb_prep:
  506. cs0begin 4
  507. cenc $c6 $c6
  508. cxsin $c0
  509. cxor $c0 $c6
  510. cxsout $c0
  511. ret
  512. crypt_ctr_prep:
  513. cs0begin 5
  514. cenc $c1 $c6
  515. cadd $c6 1
  516. cxsin $c0
  517. cxor $c0 $c1
  518. cxsout $c0
  519. ret
  520. crypt_cbc_mac_prep:
  521. cs0begin 3
  522. cxsin $c0
  523. cxor $c6 $c0
  524. cenc $c6 $c6
  525. ret
  526. crypt_cmac_finish_complete_prep:
  527. cs0begin 7
  528. cxsin $c0
  529. cxor $c6 $c0
  530. cxor $c0 $c0
  531. cenc $c0 $c0
  532. cprecmac $c0 $c0
  533. cxor $c6 $c0
  534. cenc $c6 $c6
  535. ret
  536. crypt_cmac_finish_partial_prep:
  537. cs0begin 8
  538. cxsin $c0
  539. cxor $c6 $c0
  540. cxor $c0 $c0
  541. cenc $c0 $c0
  542. cprecmac $c0 $c0
  543. cprecmac $c0 $c0
  544. cxor $c6 $c0
  545. cenc $c6 $c6
  546. ret
  547. // TODO
  548. crypt_do_in:
  549. add b32 $r3 $r5
  550. mov $xdbase $r4
  551. mov $r9 #swap
  552. sethi $r9 0x20000
  553. crypt_do_in_loop:
  554. xdld $r5 $r9
  555. xdwait
  556. cxset 0x22
  557. xdst $r0 $r9
  558. cs0exec 1
  559. xdwait
  560. add b32 $r5 0x10
  561. cmpu b32 $r5 $r3
  562. bra ne #crypt_do_in_loop
  563. cxset 1
  564. xdwait
  565. ret
  566. crypt_do_out:
  567. add b32 $r3 $r7
  568. mov $xdbase $r6
  569. mov $r9 #swap
  570. sethi $r9 0x20000
  571. crypt_do_out_loop:
  572. cs0exec 1
  573. cxset 0x61
  574. xdld $r7 $r9
  575. xdst $r7 $r9
  576. cxset 1
  577. xdwait
  578. add b32 $r7 0x10
  579. cmpu b32 $r7 $r3
  580. bra ne #crypt_do_out_loop
  581. ret
  582. crypt_do_inout:
  583. add b32 $r3 $r5
  584. mov $r9 #swap
  585. sethi $r9 0x20000
  586. crypt_do_inout_loop:
  587. mov $xdbase $r4
  588. xdld $r5 $r9
  589. xdwait
  590. cxset 0x21
  591. xdst $r0 $r9
  592. cs0exec 1
  593. cxset 0x61
  594. mov $xdbase $r6
  595. xdld $r7 $r9
  596. xdst $r7 $r9
  597. cxset 1
  598. xdwait
  599. add b32 $r5 0x10
  600. add b32 $r7 0x10
  601. cmpu b32 $r5 $r3
  602. bra ne #crypt_do_inout_loop
  603. ret
  604. .align 0x100