aes-x86_64-asm_64.S 4.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190
  1. /* AES (Rijndael) implementation (FIPS PUB 197) for x86_64
  2. *
  3. * Copyright (C) 2005 Andreas Steinmetz, <ast@domdv.de>
  4. *
  5. * License:
  6. * This code can be distributed under the terms of the GNU General Public
  7. * License (GPL) Version 2 provided that the above header down to and
  8. * including this sentence is retained in full.
  9. */
  10. .extern crypto_ft_tab
  11. .extern crypto_it_tab
  12. .extern crypto_fl_tab
  13. .extern crypto_il_tab
  14. .text
  15. #include <asm/asm-offsets.h>
  16. #define BASE crypto_tfm_ctx_offset
  17. #define R1 %rax
  18. #define R1E %eax
  19. #define R1X %ax
  20. #define R1H %ah
  21. #define R1L %al
  22. #define R2 %rbx
  23. #define R2E %ebx
  24. #define R2X %bx
  25. #define R2H %bh
  26. #define R2L %bl
  27. #define R3 %rcx
  28. #define R3E %ecx
  29. #define R3X %cx
  30. #define R3H %ch
  31. #define R3L %cl
  32. #define R4 %rdx
  33. #define R4E %edx
  34. #define R4X %dx
  35. #define R4H %dh
  36. #define R4L %dl
  37. #define R5 %rsi
  38. #define R5E %esi
  39. #define R6 %rdi
  40. #define R6E %edi
  41. #define R7 %rbp
  42. #define R7E %ebp
  43. #define R8 %r8
  44. #define R9 %r9
  45. #define R10 %r10
  46. #define R11 %r11
  47. #define prologue(FUNC,KEY,B128,B192,r1,r2,r3,r4,r5,r6,r7,r8,r9,r10,r11) \
  48. .global FUNC; \
  49. .type FUNC,@function; \
  50. .align 8; \
  51. FUNC: movq r1,r2; \
  52. movq r3,r4; \
  53. leaq BASE+KEY+48+4(r8),r9; \
  54. movq r10,r11; \
  55. movl (r7),r5 ## E; \
  56. movl 4(r7),r1 ## E; \
  57. movl 8(r7),r6 ## E; \
  58. movl 12(r7),r7 ## E; \
  59. movl BASE+0(r8),r10 ## E; \
  60. xorl -48(r9),r5 ## E; \
  61. xorl -44(r9),r1 ## E; \
  62. xorl -40(r9),r6 ## E; \
  63. xorl -36(r9),r7 ## E; \
  64. cmpl $24,r10 ## E; \
  65. jb B128; \
  66. leaq 32(r9),r9; \
  67. je B192; \
  68. leaq 32(r9),r9;
  69. #define epilogue(r1,r2,r3,r4,r5,r6,r7,r8,r9) \
  70. movq r1,r2; \
  71. movq r3,r4; \
  72. movl r5 ## E,(r9); \
  73. movl r6 ## E,4(r9); \
  74. movl r7 ## E,8(r9); \
  75. movl r8 ## E,12(r9); \
  76. ret;
  77. #define round(TAB,OFFSET,r1,r2,r3,r4,r5,r6,r7,r8,ra,rb,rc,rd) \
  78. movzbl r2 ## H,r5 ## E; \
  79. movzbl r2 ## L,r6 ## E; \
  80. movl TAB+1024(,r5,4),r5 ## E;\
  81. movw r4 ## X,r2 ## X; \
  82. movl TAB(,r6,4),r6 ## E; \
  83. roll $16,r2 ## E; \
  84. shrl $16,r4 ## E; \
  85. movzbl r4 ## H,r7 ## E; \
  86. movzbl r4 ## L,r4 ## E; \
  87. xorl OFFSET(r8),ra ## E; \
  88. xorl OFFSET+4(r8),rb ## E; \
  89. xorl TAB+3072(,r7,4),r5 ## E;\
  90. xorl TAB+2048(,r4,4),r6 ## E;\
  91. movzbl r1 ## L,r7 ## E; \
  92. movzbl r1 ## H,r4 ## E; \
  93. movl TAB+1024(,r4,4),r4 ## E;\
  94. movw r3 ## X,r1 ## X; \
  95. roll $16,r1 ## E; \
  96. shrl $16,r3 ## E; \
  97. xorl TAB(,r7,4),r5 ## E; \
  98. movzbl r3 ## H,r7 ## E; \
  99. movzbl r3 ## L,r3 ## E; \
  100. xorl TAB+3072(,r7,4),r4 ## E;\
  101. xorl TAB+2048(,r3,4),r5 ## E;\
  102. movzbl r1 ## H,r7 ## E; \
  103. movzbl r1 ## L,r3 ## E; \
  104. shrl $16,r1 ## E; \
  105. xorl TAB+3072(,r7,4),r6 ## E;\
  106. movl TAB+2048(,r3,4),r3 ## E;\
  107. movzbl r1 ## H,r7 ## E; \
  108. movzbl r1 ## L,r1 ## E; \
  109. xorl TAB+1024(,r7,4),r6 ## E;\
  110. xorl TAB(,r1,4),r3 ## E; \
  111. movzbl r2 ## H,r1 ## E; \
  112. movzbl r2 ## L,r7 ## E; \
  113. shrl $16,r2 ## E; \
  114. xorl TAB+3072(,r1,4),r3 ## E;\
  115. xorl TAB+2048(,r7,4),r4 ## E;\
  116. movzbl r2 ## H,r1 ## E; \
  117. movzbl r2 ## L,r2 ## E; \
  118. xorl OFFSET+8(r8),rc ## E; \
  119. xorl OFFSET+12(r8),rd ## E; \
  120. xorl TAB+1024(,r1,4),r3 ## E;\
  121. xorl TAB(,r2,4),r4 ## E;
  122. #define move_regs(r1,r2,r3,r4) \
  123. movl r3 ## E,r1 ## E; \
  124. movl r4 ## E,r2 ## E;
  125. #define entry(FUNC,KEY,B128,B192) \
  126. prologue(FUNC,KEY,B128,B192,R2,R8,R7,R9,R1,R3,R4,R6,R10,R5,R11)
  127. #define return epilogue(R8,R2,R9,R7,R5,R6,R3,R4,R11)
  128. #define encrypt_round(TAB,OFFSET) \
  129. round(TAB,OFFSET,R1,R2,R3,R4,R5,R6,R7,R10,R5,R6,R3,R4) \
  130. move_regs(R1,R2,R5,R6)
  131. #define encrypt_final(TAB,OFFSET) \
  132. round(TAB,OFFSET,R1,R2,R3,R4,R5,R6,R7,R10,R5,R6,R3,R4)
  133. #define decrypt_round(TAB,OFFSET) \
  134. round(TAB,OFFSET,R2,R1,R4,R3,R6,R5,R7,R10,R5,R6,R3,R4) \
  135. move_regs(R1,R2,R5,R6)
  136. #define decrypt_final(TAB,OFFSET) \
  137. round(TAB,OFFSET,R2,R1,R4,R3,R6,R5,R7,R10,R5,R6,R3,R4)
  138. /* void aes_enc_blk(stuct crypto_tfm *tfm, u8 *out, const u8 *in) */
  139. entry(aes_enc_blk,0,enc128,enc192)
  140. encrypt_round(crypto_ft_tab,-96)
  141. encrypt_round(crypto_ft_tab,-80)
  142. enc192: encrypt_round(crypto_ft_tab,-64)
  143. encrypt_round(crypto_ft_tab,-48)
  144. enc128: encrypt_round(crypto_ft_tab,-32)
  145. encrypt_round(crypto_ft_tab,-16)
  146. encrypt_round(crypto_ft_tab, 0)
  147. encrypt_round(crypto_ft_tab, 16)
  148. encrypt_round(crypto_ft_tab, 32)
  149. encrypt_round(crypto_ft_tab, 48)
  150. encrypt_round(crypto_ft_tab, 64)
  151. encrypt_round(crypto_ft_tab, 80)
  152. encrypt_round(crypto_ft_tab, 96)
  153. encrypt_final(crypto_fl_tab,112)
  154. return
  155. /* void aes_dec_blk(struct crypto_tfm *tfm, u8 *out, const u8 *in) */
  156. entry(aes_dec_blk,240,dec128,dec192)
  157. decrypt_round(crypto_it_tab,-96)
  158. decrypt_round(crypto_it_tab,-80)
  159. dec192: decrypt_round(crypto_it_tab,-64)
  160. decrypt_round(crypto_it_tab,-48)
  161. dec128: decrypt_round(crypto_it_tab,-32)
  162. decrypt_round(crypto_it_tab,-16)
  163. decrypt_round(crypto_it_tab, 0)
  164. decrypt_round(crypto_it_tab, 16)
  165. decrypt_round(crypto_it_tab, 32)
  166. decrypt_round(crypto_it_tab, 48)
  167. decrypt_round(crypto_it_tab, 64)
  168. decrypt_round(crypto_it_tab, 80)
  169. decrypt_round(crypto_it_tab, 96)
  170. decrypt_final(crypto_il_tab,112)
  171. return