123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698 |
- /*
- * fuc microcode for nv98 pcrypt engine
- * Copyright (C) 2010 Marcin Kościelnicki
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
- */
- .section #nv98_pcrypt_data
- ctx_dma:
- ctx_dma_query: .b32 0
- ctx_dma_src: .b32 0
- ctx_dma_dst: .b32 0
- .equ #dma_count 3
- ctx_query_address_high: .b32 0
- ctx_query_address_low: .b32 0
- ctx_query_counter: .b32 0
- ctx_cond_address_high: .b32 0
- ctx_cond_address_low: .b32 0
- ctx_cond_off: .b32 0
- ctx_src_address_high: .b32 0
- ctx_src_address_low: .b32 0
- ctx_dst_address_high: .b32 0
- ctx_dst_address_low: .b32 0
- ctx_mode: .b32 0
- .align 16
- ctx_key: .skip 16
- ctx_iv: .skip 16
- .align 0x80
- swap:
- .skip 32
- .align 8
- common_cmd_dtable:
- .b32 #ctx_query_address_high + 0x20000 ~0xff
- .b32 #ctx_query_address_low + 0x20000 ~0xfffffff0
- .b32 #ctx_query_counter + 0x20000 ~0xffffffff
- .b32 #cmd_query_get + 0x00000 ~1
- .b32 #ctx_cond_address_high + 0x20000 ~0xff
- .b32 #ctx_cond_address_low + 0x20000 ~0xfffffff0
- .b32 #cmd_cond_mode + 0x00000 ~7
- .b32 #cmd_wrcache_flush + 0x00000 ~0
- .equ #common_cmd_max 0x88
- .align 8
- engine_cmd_dtable:
- .b32 #ctx_key + 0x0 + 0x20000 ~0xffffffff
- .b32 #ctx_key + 0x4 + 0x20000 ~0xffffffff
- .b32 #ctx_key + 0x8 + 0x20000 ~0xffffffff
- .b32 #ctx_key + 0xc + 0x20000 ~0xffffffff
- .b32 #ctx_iv + 0x0 + 0x20000 ~0xffffffff
- .b32 #ctx_iv + 0x4 + 0x20000 ~0xffffffff
- .b32 #ctx_iv + 0x8 + 0x20000 ~0xffffffff
- .b32 #ctx_iv + 0xc + 0x20000 ~0xffffffff
- .b32 #ctx_src_address_high + 0x20000 ~0xff
- .b32 #ctx_src_address_low + 0x20000 ~0xfffffff0
- .b32 #ctx_dst_address_high + 0x20000 ~0xff
- .b32 #ctx_dst_address_low + 0x20000 ~0xfffffff0
- .b32 #crypt_cmd_mode + 0x00000 ~0xf
- .b32 #crypt_cmd_length + 0x10000 ~0x0ffffff0
- .equ #engine_cmd_max 0xce
- .align 4
- crypt_dtable:
- .b16 #crypt_copy_prep #crypt_do_inout
- .b16 #crypt_store_prep #crypt_do_out
- .b16 #crypt_ecb_e_prep #crypt_do_inout
- .b16 #crypt_ecb_d_prep #crypt_do_inout
- .b16 #crypt_cbc_e_prep #crypt_do_inout
- .b16 #crypt_cbc_d_prep #crypt_do_inout
- .b16 #crypt_pcbc_e_prep #crypt_do_inout
- .b16 #crypt_pcbc_d_prep #crypt_do_inout
- .b16 #crypt_cfb_e_prep #crypt_do_inout
- .b16 #crypt_cfb_d_prep #crypt_do_inout
- .b16 #crypt_ofb_prep #crypt_do_inout
- .b16 #crypt_ctr_prep #crypt_do_inout
- .b16 #crypt_cbc_mac_prep #crypt_do_in
- .b16 #crypt_cmac_finish_complete_prep #crypt_do_in
- .b16 #crypt_cmac_finish_partial_prep #crypt_do_in
- .align 0x100
- .section #nv98_pcrypt_code
- // $r0 is always set to 0 in our code - this allows some space savings.
- clear b32 $r0
- // set up the interrupt handler
- mov $r1 #ih
- mov $iv0 $r1
- // init stack pointer
- mov $sp $r0
- // set interrupt dispatch - route timer, fifo, ctxswitch to i0, others to host
- movw $r1 0xfff0
- sethi $r1 0
- mov $r2 0x400
- iowr I[$r2 + 0x300] $r1
- // enable the interrupts
- or $r1 0xc
- iowr I[$r2] $r1
- // enable fifo access and context switching
- mov $r1 3
- mov $r2 0x1200
- iowr I[$r2] $r1
- // enable i0 delivery
- bset $flags ie0
- // sleep forver, waking only for interrupts.
- bset $flags $p0
- spin:
- sleep $p0
- bra #spin
- // i0 handler
- ih:
- // see which interrupts we got
- iord $r1 I[$r0 + 0x200]
- and $r2 $r1 0x8
- cmpu b32 $r2 0
- bra e #noctx
- // context switch... prepare the regs for xfer
- mov $r2 0x7700
- mov $xtargets $r2
- mov $xdbase $r0
- // 128-byte context.
- mov $r2 0
- sethi $r2 0x50000
- // read current channel
- mov $r3 0x1400
- iord $r4 I[$r3]
- // if bit 30 set, it's active, so we have to unload it first.
- shl b32 $r5 $r4 1
- cmps b32 $r5 0
- bra nc #ctxload
- // unload the current channel - save the context
- xdst $r0 $r2
- xdwait
- // and clear bit 30, then write back
- bclr $r4 0x1e
- iowr I[$r3] $r4
- // tell PFIFO we unloaded
- mov $r4 1
- iowr I[$r3 + 0x200] $r4
- bra #noctx
- ctxload:
- // no channel loaded - perhaps we're requested to load one
- iord $r4 I[$r3 + 0x100]
- shl b32 $r15 $r4 1
- cmps b32 $r15 0
- // if bit 30 of next channel not set, probably PFIFO is just
- // killing a context. do a faux load, without the active bit.
- bra nc #dummyload
- // ok, do a real context load.
- xdld $r0 $r2
- xdwait
- mov $r5 #ctx_dma
- mov $r6 #dma_count - 1
- ctxload_dma_loop:
- ld b32 $r7 D[$r5 + $r6 * 4]
- add b32 $r8 $r6 0x180
- shl b32 $r8 8
- iowr I[$r8] $r7
- sub b32 $r6 1
- bra nc #ctxload_dma_loop
- dummyload:
- // tell PFIFO we're done
- mov $r5 2
- iowr I[$r3 + 0x200] $r5
- noctx:
- and $r2 $r1 0x4
- cmpu b32 $r2 0
- bra e #nocmd
- // incoming fifo command.
- mov $r3 0x1900
- iord $r2 I[$r3 + 0x100]
- iord $r3 I[$r3]
- // extract the method
- and $r4 $r2 0x7ff
- // shift the addr to proper position if we need to interrupt later
- shl b32 $r2 0x10
- // mthd 0 and 0x100 [NAME, NOP]: ignore
- and $r5 $r4 0x7bf
- cmpu b32 $r5 0
- bra e #cmddone
- mov $r5 #engine_cmd_dtable - 0xc0 * 8
- mov $r6 #engine_cmd_max
- cmpu b32 $r4 0xc0
- bra nc #dtable_cmd
- mov $r5 #common_cmd_dtable - 0x80 * 8
- mov $r6 #common_cmd_max
- cmpu b32 $r4 0x80
- bra nc #dtable_cmd
- cmpu b32 $r4 0x60
- bra nc #dma_cmd
- cmpu b32 $r4 0x50
- bra ne #illegal_mthd
- // mthd 0x140: PM_TRIGGER
- mov $r2 0x2200
- clear b32 $r3
- sethi $r3 0x20000
- iowr I[$r2] $r3
- bra #cmddone
- dma_cmd:
- // mthd 0x180...: DMA_*
- cmpu b32 $r4 0x60+#dma_count
- bra nc #illegal_mthd
- shl b32 $r5 $r4 2
- add b32 $r5 (#ctx_dma - 0x60 * 4) & 0xffff
- bset $r3 0x1e
- st b32 D[$r5] $r3
- add b32 $r4 0x180 - 0x60
- shl b32 $r4 8
- iowr I[$r4] $r3
- bra #cmddone
- dtable_cmd:
- cmpu b32 $r4 $r6
- bra nc #illegal_mthd
- shl b32 $r4 3
- add b32 $r4 $r5
- ld b32 $r5 D[$r4 + 4]
- and $r5 $r3
- cmpu b32 $r5 0
- bra ne #invalid_bitfield
- ld b16 $r5 D[$r4]
- ld b16 $r6 D[$r4 + 2]
- cmpu b32 $r6 2
- bra e #cmd_setctx
- ld b32 $r7 D[$r0 + #ctx_cond_off]
- and $r6 $r7
- cmpu b32 $r6 1
- bra e #cmddone
- call $r5
- bra $p1 #dispatch_error
- bra #cmddone
- cmd_setctx:
- st b32 D[$r5] $r3
- bra #cmddone
- invalid_bitfield:
- or $r2 1
- dispatch_error:
- illegal_mthd:
- mov $r4 0x1000
- iowr I[$r4] $r2
- iowr I[$r4 + 0x100] $r3
- mov $r4 0x40
- iowr I[$r0] $r4
- im_loop:
- iord $r4 I[$r0 + 0x200]
- and $r4 0x40
- cmpu b32 $r4 0
- bra ne #im_loop
- cmddone:
- // remove the command from FIFO
- mov $r3 0x1d00
- mov $r4 1
- iowr I[$r3] $r4
- nocmd:
- // ack the processed interrupts
- and $r1 $r1 0xc
- iowr I[$r0 + 0x100] $r1
- iret
- cmd_query_get:
- // if bit 0 of param set, trigger interrupt afterwards.
- setp $p1 $r3
- or $r2 3
- // read PTIMER, beware of races...
- mov $r4 0xb00
- ptimer_retry:
- iord $r6 I[$r4 + 0x100]
- iord $r5 I[$r4]
- iord $r7 I[$r4 + 0x100]
- cmpu b32 $r6 $r7
- bra ne #ptimer_retry
- // prepare the query structure
- ld b32 $r4 D[$r0 + #ctx_query_counter]
- st b32 D[$r0 + #swap + 0x0] $r4
- st b32 D[$r0 + #swap + 0x4] $r0
- st b32 D[$r0 + #swap + 0x8] $r5
- st b32 D[$r0 + #swap + 0xc] $r6
- // will use target 0, DMA_QUERY.
- mov $xtargets $r0
- ld b32 $r4 D[$r0 + #ctx_query_address_high]
- shl b32 $r4 0x18
- mov $xdbase $r4
- ld b32 $r4 D[$r0 + #ctx_query_address_low]
- mov $r5 #swap
- sethi $r5 0x20000
- xdst $r4 $r5
- xdwait
- ret
- cmd_cond_mode:
- // if >= 5, INVALID_ENUM
- bset $flags $p1
- or $r2 2
- cmpu b32 $r3 5
- bra nc #return
- // otherwise, no error.
- bclr $flags $p1
- // if < 2, no QUERY object is involved
- cmpu b32 $r3 2
- bra nc #cmd_cond_mode_queryful
- xor $r3 1
- st b32 D[$r0 + #ctx_cond_off] $r3
- return:
- ret
- cmd_cond_mode_queryful:
- // ok, will need to pull a QUERY object, prepare offsets
- ld b32 $r4 D[$r0 + #ctx_cond_address_high]
- ld b32 $r5 D[$r0 + #ctx_cond_address_low]
- and $r6 $r5 0xff
- shr b32 $r5 8
- shl b32 $r4 0x18
- or $r4 $r5
- mov $xdbase $r4
- mov $xtargets $r0
- // pull the first one
- mov $r5 #swap
- sethi $r5 0x20000
- xdld $r6 $r5
- // if == 2, only a single QUERY is involved...
- cmpu b32 $r3 2
- bra ne #cmd_cond_mode_double
- xdwait
- ld b32 $r4 D[$r0 + #swap + 4]
- cmpu b32 $r4 0
- xbit $r4 $flags z
- st b32 D[$r0 + #ctx_cond_off] $r4
- ret
- // ok, we'll need to pull second one too
- cmd_cond_mode_double:
- add b32 $r6 0x10
- add b32 $r5 0x10
- xdld $r6 $r5
- xdwait
- // compare COUNTERs
- ld b32 $r5 D[$r0 + #swap + 0x00]
- ld b32 $r6 D[$r0 + #swap + 0x10]
- cmpu b32 $r5 $r6
- xbit $r4 $flags z
- // compare RESen
- ld b32 $r5 D[$r0 + #swap + 0x04]
- ld b32 $r6 D[$r0 + #swap + 0x14]
- cmpu b32 $r5 $r6
- xbit $r5 $flags z
- and $r4 $r5
- // and negate or not, depending on mode
- cmpu b32 $r3 3
- xbit $r5 $flags z
- xor $r4 $r5
- st b32 D[$r0 + #ctx_cond_off] $r4
- ret
- cmd_wrcache_flush:
- bclr $flags $p1
- mov $r2 0x2200
- clear b32 $r3
- sethi $r3 0x10000
- iowr I[$r2] $r3
- ret
- crypt_cmd_mode:
- // if >= 0xf, INVALID_ENUM
- bset $flags $p1
- or $r2 2
- cmpu b32 $r3 0xf
- bra nc #crypt_cmd_mode_return
- bclr $flags $p1
- st b32 D[$r0 + #ctx_mode] $r3
- crypt_cmd_mode_return:
- ret
- crypt_cmd_length:
- // nop if length == 0
- cmpu b32 $r3 0
- bra e #crypt_cmd_mode_return
- // init key, IV
- cxset 3
- mov $r4 #ctx_key
- sethi $r4 0x70000
- xdst $r0 $r4
- mov $r4 #ctx_iv
- sethi $r4 0x60000
- xdst $r0 $r4
- xdwait
- ckeyreg $c7
- // prepare the targets
- mov $r4 0x2100
- mov $xtargets $r4
- // prepare src address
- ld b32 $r4 D[$r0 + #ctx_src_address_high]
- ld b32 $r5 D[$r0 + #ctx_src_address_low]
- shr b32 $r8 $r5 8
- shl b32 $r4 0x18
- or $r4 $r8
- and $r5 $r5 0xff
- // prepare dst address
- ld b32 $r6 D[$r0 + #ctx_dst_address_high]
- ld b32 $r7 D[$r0 + #ctx_dst_address_low]
- shr b32 $r8 $r7 8
- shl b32 $r6 0x18
- or $r6 $r8
- and $r7 $r7 0xff
- // find the proper prep & do functions
- ld b32 $r8 D[$r0 + #ctx_mode]
- shl b32 $r8 2
- // run prep
- ld b16 $r9 D[$r8 + #crypt_dtable]
- call $r9
- // do it
- ld b16 $r9 D[$r8 + #crypt_dtable + 2]
- call $r9
- cxset 1
- xdwait
- cxset 0x61
- xdwait
- xdwait
- // update src address
- shr b32 $r8 $r4 0x18
- shl b32 $r9 $r4 8
- add b32 $r9 $r5
- adc b32 $r8 0
- st b32 D[$r0 + #ctx_src_address_high] $r8
- st b32 D[$r0 + #ctx_src_address_low] $r9
- // update dst address
- shr b32 $r8 $r6 0x18
- shl b32 $r9 $r6 8
- add b32 $r9 $r7
- adc b32 $r8 0
- st b32 D[$r0 + #ctx_dst_address_high] $r8
- st b32 D[$r0 + #ctx_dst_address_low] $r9
- // pull updated IV
- cxset 2
- mov $r4 #ctx_iv
- sethi $r4 0x60000
- xdld $r0 $r4
- xdwait
- ret
- crypt_copy_prep:
- cs0begin 2
- cxsin $c0
- cxsout $c0
- ret
- crypt_store_prep:
- cs0begin 1
- cxsout $c6
- ret
- crypt_ecb_e_prep:
- cs0begin 3
- cxsin $c0
- cenc $c0 $c0
- cxsout $c0
- ret
- crypt_ecb_d_prep:
- ckexp $c7 $c7
- cs0begin 3
- cxsin $c0
- cdec $c0 $c0
- cxsout $c0
- ret
- crypt_cbc_e_prep:
- cs0begin 4
- cxsin $c0
- cxor $c6 $c0
- cenc $c6 $c6
- cxsout $c6
- ret
- crypt_cbc_d_prep:
- ckexp $c7 $c7
- cs0begin 5
- cmov $c2 $c6
- cxsin $c6
- cdec $c0 $c6
- cxor $c0 $c2
- cxsout $c0
- ret
- crypt_pcbc_e_prep:
- cs0begin 5
- cxsin $c0
- cxor $c6 $c0
- cenc $c6 $c6
- cxsout $c6
- cxor $c6 $c0
- ret
- crypt_pcbc_d_prep:
- ckexp $c7 $c7
- cs0begin 5
- cxsin $c0
- cdec $c1 $c0
- cxor $c6 $c1
- cxsout $c6
- cxor $c6 $c0
- ret
- crypt_cfb_e_prep:
- cs0begin 4
- cenc $c6 $c6
- cxsin $c0
- cxor $c6 $c0
- cxsout $c6
- ret
- crypt_cfb_d_prep:
- cs0begin 4
- cenc $c0 $c6
- cxsin $c6
- cxor $c0 $c6
- cxsout $c0
- ret
- crypt_ofb_prep:
- cs0begin 4
- cenc $c6 $c6
- cxsin $c0
- cxor $c0 $c6
- cxsout $c0
- ret
- crypt_ctr_prep:
- cs0begin 5
- cenc $c1 $c6
- cadd $c6 1
- cxsin $c0
- cxor $c0 $c1
- cxsout $c0
- ret
- crypt_cbc_mac_prep:
- cs0begin 3
- cxsin $c0
- cxor $c6 $c0
- cenc $c6 $c6
- ret
- crypt_cmac_finish_complete_prep:
- cs0begin 7
- cxsin $c0
- cxor $c6 $c0
- cxor $c0 $c0
- cenc $c0 $c0
- cprecmac $c0 $c0
- cxor $c6 $c0
- cenc $c6 $c6
- ret
- crypt_cmac_finish_partial_prep:
- cs0begin 8
- cxsin $c0
- cxor $c6 $c0
- cxor $c0 $c0
- cenc $c0 $c0
- cprecmac $c0 $c0
- cprecmac $c0 $c0
- cxor $c6 $c0
- cenc $c6 $c6
- ret
- // TODO
- crypt_do_in:
- add b32 $r3 $r5
- mov $xdbase $r4
- mov $r9 #swap
- sethi $r9 0x20000
- crypt_do_in_loop:
- xdld $r5 $r9
- xdwait
- cxset 0x22
- xdst $r0 $r9
- cs0exec 1
- xdwait
- add b32 $r5 0x10
- cmpu b32 $r5 $r3
- bra ne #crypt_do_in_loop
- cxset 1
- xdwait
- ret
- crypt_do_out:
- add b32 $r3 $r7
- mov $xdbase $r6
- mov $r9 #swap
- sethi $r9 0x20000
- crypt_do_out_loop:
- cs0exec 1
- cxset 0x61
- xdld $r7 $r9
- xdst $r7 $r9
- cxset 1
- xdwait
- add b32 $r7 0x10
- cmpu b32 $r7 $r3
- bra ne #crypt_do_out_loop
- ret
- crypt_do_inout:
- add b32 $r3 $r5
- mov $r9 #swap
- sethi $r9 0x20000
- crypt_do_inout_loop:
- mov $xdbase $r4
- xdld $r5 $r9
- xdwait
- cxset 0x21
- xdst $r0 $r9
- cs0exec 1
- cxset 0x61
- mov $xdbase $r6
- xdld $r7 $r9
- xdst $r7 $r9
- cxset 1
- xdwait
- add b32 $r5 0x10
- add b32 $r7 0x10
- cmpu b32 $r5 $r3
- bra ne #crypt_do_inout_loop
- ret
- .align 0x100
|