aes-i586-asm.S 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376
  1. // -------------------------------------------------------------------------
  2. // Copyright (c) 2001, Dr Brian Gladman < >, Worcester, UK.
  3. // All rights reserved.
  4. //
  5. // LICENSE TERMS
  6. //
  7. // The free distribution and use of this software in both source and binary
  8. // form is allowed (with or without changes) provided that:
  9. //
  10. // 1. distributions of this source code include the above copyright
  11. // notice, this list of conditions and the following disclaimer//
  12. //
  13. // 2. distributions in binary form include the above copyright
  14. // notice, this list of conditions and the following disclaimer
  15. // in the documentation and/or other associated materials//
  16. //
  17. // 3. the copyright holder's name is not used to endorse products
  18. // built using this software without specific written permission.
  19. //
  20. //
  21. // ALTERNATIVELY, provided that this notice is retained in full, this product
  22. // may be distributed under the terms of the GNU General Public License (GPL),
  23. // in which case the provisions of the GPL apply INSTEAD OF those given above.
  24. //
  25. // Copyright (c) 2004 Linus Torvalds <torvalds@osdl.org>
  26. // Copyright (c) 2004 Red Hat, Inc., James Morris <jmorris@redhat.com>
  27. // DISCLAIMER
  28. //
  29. // This software is provided 'as is' with no explicit or implied warranties
  30. // in respect of its properties including, but not limited to, correctness
  31. // and fitness for purpose.
  32. // -------------------------------------------------------------------------
  33. // Issue Date: 29/07/2002
  34. .file "aes-i586-asm.S"
  35. .text
  36. // aes_rval aes_enc_blk(const unsigned char in_blk[], unsigned char out_blk[], const aes_ctx cx[1])//
  37. // aes_rval aes_dec_blk(const unsigned char in_blk[], unsigned char out_blk[], const aes_ctx cx[1])//
  38. #define tlen 1024 // length of each of 4 'xor' arrays (256 32-bit words)
  39. // offsets to parameters with one register pushed onto stack
  40. #define in_blk 8 // input byte array address parameter
  41. #define out_blk 12 // output byte array address parameter
  42. #define ctx 16 // AES context structure
  43. // offsets in context structure
  44. #define ekey 0 // encryption key schedule base address
  45. #define nrnd 256 // number of rounds
  46. #define dkey 260 // decryption key schedule base address
  47. // register mapping for encrypt and decrypt subroutines
  48. #define r0 eax
  49. #define r1 ebx
  50. #define r2 ecx
  51. #define r3 edx
  52. #define r4 esi
  53. #define r5 edi
  54. #define eaxl al
  55. #define eaxh ah
  56. #define ebxl bl
  57. #define ebxh bh
  58. #define ecxl cl
  59. #define ecxh ch
  60. #define edxl dl
  61. #define edxh dh
  62. #define _h(reg) reg##h
  63. #define h(reg) _h(reg)
  64. #define _l(reg) reg##l
  65. #define l(reg) _l(reg)
  66. // This macro takes a 32-bit word representing a column and uses
  67. // each of its four bytes to index into four tables of 256 32-bit
  68. // words to obtain values that are then xored into the appropriate
  69. // output registers r0, r1, r4 or r5.
  70. // Parameters:
  71. // table table base address
  72. // %1 out_state[0]
  73. // %2 out_state[1]
  74. // %3 out_state[2]
  75. // %4 out_state[3]
  76. // idx input register for the round (destroyed)
  77. // tmp scratch register for the round
  78. // sched key schedule
  79. #define do_col(table, a1,a2,a3,a4, idx, tmp) \
  80. movzx %l(idx),%tmp; \
  81. xor table(,%tmp,4),%a1; \
  82. movzx %h(idx),%tmp; \
  83. shr $16,%idx; \
  84. xor table+tlen(,%tmp,4),%a2; \
  85. movzx %l(idx),%tmp; \
  86. movzx %h(idx),%idx; \
  87. xor table+2*tlen(,%tmp,4),%a3; \
  88. xor table+3*tlen(,%idx,4),%a4;
  89. // initialise output registers from the key schedule
  90. // NB1: original value of a3 is in idx on exit
  91. // NB2: original values of a1,a2,a4 aren't used
  92. #define do_fcol(table, a1,a2,a3,a4, idx, tmp, sched) \
  93. mov 0 sched,%a1; \
  94. movzx %l(idx),%tmp; \
  95. mov 12 sched,%a2; \
  96. xor table(,%tmp,4),%a1; \
  97. mov 4 sched,%a4; \
  98. movzx %h(idx),%tmp; \
  99. shr $16,%idx; \
  100. xor table+tlen(,%tmp,4),%a2; \
  101. movzx %l(idx),%tmp; \
  102. movzx %h(idx),%idx; \
  103. xor table+3*tlen(,%idx,4),%a4; \
  104. mov %a3,%idx; \
  105. mov 8 sched,%a3; \
  106. xor table+2*tlen(,%tmp,4),%a3;
  107. // initialise output registers from the key schedule
  108. // NB1: original value of a3 is in idx on exit
  109. // NB2: original values of a1,a2,a4 aren't used
  110. #define do_icol(table, a1,a2,a3,a4, idx, tmp, sched) \
  111. mov 0 sched,%a1; \
  112. movzx %l(idx),%tmp; \
  113. mov 4 sched,%a2; \
  114. xor table(,%tmp,4),%a1; \
  115. mov 12 sched,%a4; \
  116. movzx %h(idx),%tmp; \
  117. shr $16,%idx; \
  118. xor table+tlen(,%tmp,4),%a2; \
  119. movzx %l(idx),%tmp; \
  120. movzx %h(idx),%idx; \
  121. xor table+3*tlen(,%idx,4),%a4; \
  122. mov %a3,%idx; \
  123. mov 8 sched,%a3; \
  124. xor table+2*tlen(,%tmp,4),%a3;
  125. // original Gladman had conditional saves to MMX regs.
  126. #define save(a1, a2) \
  127. mov %a2,4*a1(%esp)
  128. #define restore(a1, a2) \
  129. mov 4*a2(%esp),%a1
  130. // These macros perform a forward encryption cycle. They are entered with
  131. // the first previous round column values in r0,r1,r4,r5 and
  132. // exit with the final values in the same registers, using stack
  133. // for temporary storage.
  134. // round column values
  135. // on entry: r0,r1,r4,r5
  136. // on exit: r2,r1,r4,r5
  137. #define fwd_rnd1(arg, table) \
  138. save (0,r1); \
  139. save (1,r5); \
  140. \
  141. /* compute new column values */ \
  142. do_fcol(table, r2,r5,r4,r1, r0,r3, arg); /* idx=r0 */ \
  143. do_col (table, r4,r1,r2,r5, r0,r3); /* idx=r4 */ \
  144. restore(r0,0); \
  145. do_col (table, r1,r2,r5,r4, r0,r3); /* idx=r1 */ \
  146. restore(r0,1); \
  147. do_col (table, r5,r4,r1,r2, r0,r3); /* idx=r5 */
  148. // round column values
  149. // on entry: r2,r1,r4,r5
  150. // on exit: r0,r1,r4,r5
  151. #define fwd_rnd2(arg, table) \
  152. save (0,r1); \
  153. save (1,r5); \
  154. \
  155. /* compute new column values */ \
  156. do_fcol(table, r0,r5,r4,r1, r2,r3, arg); /* idx=r2 */ \
  157. do_col (table, r4,r1,r0,r5, r2,r3); /* idx=r4 */ \
  158. restore(r2,0); \
  159. do_col (table, r1,r0,r5,r4, r2,r3); /* idx=r1 */ \
  160. restore(r2,1); \
  161. do_col (table, r5,r4,r1,r0, r2,r3); /* idx=r5 */
  162. // These macros performs an inverse encryption cycle. They are entered with
  163. // the first previous round column values in r0,r1,r4,r5 and
  164. // exit with the final values in the same registers, using stack
  165. // for temporary storage
  166. // round column values
  167. // on entry: r0,r1,r4,r5
  168. // on exit: r2,r1,r4,r5
  169. #define inv_rnd1(arg, table) \
  170. save (0,r1); \
  171. save (1,r5); \
  172. \
  173. /* compute new column values */ \
  174. do_icol(table, r2,r1,r4,r5, r0,r3, arg); /* idx=r0 */ \
  175. do_col (table, r4,r5,r2,r1, r0,r3); /* idx=r4 */ \
  176. restore(r0,0); \
  177. do_col (table, r1,r4,r5,r2, r0,r3); /* idx=r1 */ \
  178. restore(r0,1); \
  179. do_col (table, r5,r2,r1,r4, r0,r3); /* idx=r5 */
  180. // round column values
  181. // on entry: r2,r1,r4,r5
  182. // on exit: r0,r1,r4,r5
  183. #define inv_rnd2(arg, table) \
  184. save (0,r1); \
  185. save (1,r5); \
  186. \
  187. /* compute new column values */ \
  188. do_icol(table, r0,r1,r4,r5, r2,r3, arg); /* idx=r2 */ \
  189. do_col (table, r4,r5,r0,r1, r2,r3); /* idx=r4 */ \
  190. restore(r2,0); \
  191. do_col (table, r1,r4,r5,r0, r2,r3); /* idx=r1 */ \
  192. restore(r2,1); \
  193. do_col (table, r5,r0,r1,r4, r2,r3); /* idx=r5 */
  194. // AES (Rijndael) Encryption Subroutine
  195. .global aes_enc_blk
  196. .extern ft_tab
  197. .extern fl_tab
  198. .align 4
  199. aes_enc_blk:
  200. push %ebp
  201. mov ctx(%esp),%ebp // pointer to context
  202. // CAUTION: the order and the values used in these assigns
  203. // rely on the register mappings
  204. 1: push %ebx
  205. mov in_blk+4(%esp),%r2
  206. push %esi
  207. mov nrnd(%ebp),%r3 // number of rounds
  208. push %edi
  209. #if ekey != 0
  210. lea ekey(%ebp),%ebp // key pointer
  211. #endif
  212. // input four columns and xor in first round key
  213. mov (%r2),%r0
  214. mov 4(%r2),%r1
  215. mov 8(%r2),%r4
  216. mov 12(%r2),%r5
  217. xor (%ebp),%r0
  218. xor 4(%ebp),%r1
  219. xor 8(%ebp),%r4
  220. xor 12(%ebp),%r5
  221. sub $8,%esp // space for register saves on stack
  222. add $16,%ebp // increment to next round key
  223. sub $10,%r3
  224. je 4f // 10 rounds for 128-bit key
  225. add $32,%ebp
  226. sub $2,%r3
  227. je 3f // 12 rounds for 128-bit key
  228. add $32,%ebp
  229. 2: fwd_rnd1( -64(%ebp) ,ft_tab) // 14 rounds for 128-bit key
  230. fwd_rnd2( -48(%ebp) ,ft_tab)
  231. 3: fwd_rnd1( -32(%ebp) ,ft_tab) // 12 rounds for 128-bit key
  232. fwd_rnd2( -16(%ebp) ,ft_tab)
  233. 4: fwd_rnd1( (%ebp) ,ft_tab) // 10 rounds for 128-bit key
  234. fwd_rnd2( +16(%ebp) ,ft_tab)
  235. fwd_rnd1( +32(%ebp) ,ft_tab)
  236. fwd_rnd2( +48(%ebp) ,ft_tab)
  237. fwd_rnd1( +64(%ebp) ,ft_tab)
  238. fwd_rnd2( +80(%ebp) ,ft_tab)
  239. fwd_rnd1( +96(%ebp) ,ft_tab)
  240. fwd_rnd2(+112(%ebp) ,ft_tab)
  241. fwd_rnd1(+128(%ebp) ,ft_tab)
  242. fwd_rnd2(+144(%ebp) ,fl_tab) // last round uses a different table
  243. // move final values to the output array. CAUTION: the
  244. // order of these assigns rely on the register mappings
  245. add $8,%esp
  246. mov out_blk+12(%esp),%ebp
  247. mov %r5,12(%ebp)
  248. pop %edi
  249. mov %r4,8(%ebp)
  250. pop %esi
  251. mov %r1,4(%ebp)
  252. pop %ebx
  253. mov %r0,(%ebp)
  254. pop %ebp
  255. mov $1,%eax
  256. ret
  257. // AES (Rijndael) Decryption Subroutine
  258. .global aes_dec_blk
  259. .extern it_tab
  260. .extern il_tab
  261. .align 4
  262. aes_dec_blk:
  263. push %ebp
  264. mov ctx(%esp),%ebp // pointer to context
  265. // CAUTION: the order and the values used in these assigns
  266. // rely on the register mappings
  267. 1: push %ebx
  268. mov in_blk+4(%esp),%r2
  269. push %esi
  270. mov nrnd(%ebp),%r3 // number of rounds
  271. push %edi
  272. #if dkey != 0
  273. lea dkey(%ebp),%ebp // key pointer
  274. #endif
  275. mov %r3,%r0
  276. shl $4,%r0
  277. add %r0,%ebp
  278. // input four columns and xor in first round key
  279. mov (%r2),%r0
  280. mov 4(%r2),%r1
  281. mov 8(%r2),%r4
  282. mov 12(%r2),%r5
  283. xor (%ebp),%r0
  284. xor 4(%ebp),%r1
  285. xor 8(%ebp),%r4
  286. xor 12(%ebp),%r5
  287. sub $8,%esp // space for register saves on stack
  288. sub $16,%ebp // increment to next round key
  289. sub $10,%r3
  290. je 4f // 10 rounds for 128-bit key
  291. sub $32,%ebp
  292. sub $2,%r3
  293. je 3f // 12 rounds for 128-bit key
  294. sub $32,%ebp
  295. 2: inv_rnd1( +64(%ebp), it_tab) // 14 rounds for 128-bit key
  296. inv_rnd2( +48(%ebp), it_tab)
  297. 3: inv_rnd1( +32(%ebp), it_tab) // 12 rounds for 128-bit key
  298. inv_rnd2( +16(%ebp), it_tab)
  299. 4: inv_rnd1( (%ebp), it_tab) // 10 rounds for 128-bit key
  300. inv_rnd2( -16(%ebp), it_tab)
  301. inv_rnd1( -32(%ebp), it_tab)
  302. inv_rnd2( -48(%ebp), it_tab)
  303. inv_rnd1( -64(%ebp), it_tab)
  304. inv_rnd2( -80(%ebp), it_tab)
  305. inv_rnd1( -96(%ebp), it_tab)
  306. inv_rnd2(-112(%ebp), it_tab)
  307. inv_rnd1(-128(%ebp), it_tab)
  308. inv_rnd2(-144(%ebp), il_tab) // last round uses a different table
  309. // move final values to the output array. CAUTION: the
  310. // order of these assigns rely on the register mappings
  311. add $8,%esp
  312. mov out_blk+12(%esp),%ebp
  313. mov %r5,12(%ebp)
  314. pop %edi
  315. mov %r4,8(%ebp)
  316. pop %esi
  317. mov %r1,4(%ebp)
  318. pop %ebx
  319. mov %r0,(%ebp)
  320. pop %ebp
  321. mov $1,%eax
  322. ret