aesni-intel_asm.S 21 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896
  1. /*
  2. * Implement AES algorithm in Intel AES-NI instructions.
  3. *
  4. * The white paper of AES-NI instructions can be downloaded from:
  5. * http://softwarecommunity.intel.com/isn/downloads/intelavx/AES-Instructions-Set_WP.pdf
  6. *
  7. * Copyright (C) 2008, Intel Corp.
  8. * Author: Huang Ying <ying.huang@intel.com>
  9. * Vinodh Gopal <vinodh.gopal@intel.com>
  10. * Kahraman Akdemir
  11. *
  12. * This program is free software; you can redistribute it and/or modify
  13. * it under the terms of the GNU General Public License as published by
  14. * the Free Software Foundation; either version 2 of the License, or
  15. * (at your option) any later version.
  16. */
  17. #include <linux/linkage.h>
  18. .text
  19. #define STATE1 %xmm0
  20. #define STATE2 %xmm4
  21. #define STATE3 %xmm5
  22. #define STATE4 %xmm6
  23. #define STATE STATE1
  24. #define IN1 %xmm1
  25. #define IN2 %xmm7
  26. #define IN3 %xmm8
  27. #define IN4 %xmm9
  28. #define IN IN1
  29. #define KEY %xmm2
  30. #define IV %xmm3
  31. #define KEYP %rdi
  32. #define OUTP %rsi
  33. #define INP %rdx
  34. #define LEN %rcx
  35. #define IVP %r8
  36. #define KLEN %r9d
  37. #define T1 %r10
  38. #define TKEYP T1
  39. #define T2 %r11
  40. _key_expansion_128:
  41. _key_expansion_256a:
  42. pshufd $0b11111111, %xmm1, %xmm1
  43. shufps $0b00010000, %xmm0, %xmm4
  44. pxor %xmm4, %xmm0
  45. shufps $0b10001100, %xmm0, %xmm4
  46. pxor %xmm4, %xmm0
  47. pxor %xmm1, %xmm0
  48. movaps %xmm0, (%rcx)
  49. add $0x10, %rcx
  50. ret
  51. _key_expansion_192a:
  52. pshufd $0b01010101, %xmm1, %xmm1
  53. shufps $0b00010000, %xmm0, %xmm4
  54. pxor %xmm4, %xmm0
  55. shufps $0b10001100, %xmm0, %xmm4
  56. pxor %xmm4, %xmm0
  57. pxor %xmm1, %xmm0
  58. movaps %xmm2, %xmm5
  59. movaps %xmm2, %xmm6
  60. pslldq $4, %xmm5
  61. pshufd $0b11111111, %xmm0, %xmm3
  62. pxor %xmm3, %xmm2
  63. pxor %xmm5, %xmm2
  64. movaps %xmm0, %xmm1
  65. shufps $0b01000100, %xmm0, %xmm6
  66. movaps %xmm6, (%rcx)
  67. shufps $0b01001110, %xmm2, %xmm1
  68. movaps %xmm1, 16(%rcx)
  69. add $0x20, %rcx
  70. ret
  71. _key_expansion_192b:
  72. pshufd $0b01010101, %xmm1, %xmm1
  73. shufps $0b00010000, %xmm0, %xmm4
  74. pxor %xmm4, %xmm0
  75. shufps $0b10001100, %xmm0, %xmm4
  76. pxor %xmm4, %xmm0
  77. pxor %xmm1, %xmm0
  78. movaps %xmm2, %xmm5
  79. pslldq $4, %xmm5
  80. pshufd $0b11111111, %xmm0, %xmm3
  81. pxor %xmm3, %xmm2
  82. pxor %xmm5, %xmm2
  83. movaps %xmm0, (%rcx)
  84. add $0x10, %rcx
  85. ret
  86. _key_expansion_256b:
  87. pshufd $0b10101010, %xmm1, %xmm1
  88. shufps $0b00010000, %xmm2, %xmm4
  89. pxor %xmm4, %xmm2
  90. shufps $0b10001100, %xmm2, %xmm4
  91. pxor %xmm4, %xmm2
  92. pxor %xmm1, %xmm2
  93. movaps %xmm2, (%rcx)
  94. add $0x10, %rcx
  95. ret
  96. /*
  97. * int aesni_set_key(struct crypto_aes_ctx *ctx, const u8 *in_key,
  98. * unsigned int key_len)
  99. */
  100. ENTRY(aesni_set_key)
  101. movups (%rsi), %xmm0 # user key (first 16 bytes)
  102. movaps %xmm0, (%rdi)
  103. lea 0x10(%rdi), %rcx # key addr
  104. movl %edx, 480(%rdi)
  105. pxor %xmm4, %xmm4 # xmm4 is assumed 0 in _key_expansion_x
  106. cmp $24, %dl
  107. jb .Lenc_key128
  108. je .Lenc_key192
  109. movups 0x10(%rsi), %xmm2 # other user key
  110. movaps %xmm2, (%rcx)
  111. add $0x10, %rcx
  112. # aeskeygenassist $0x1, %xmm2, %xmm1 # round 1
  113. .byte 0x66, 0x0f, 0x3a, 0xdf, 0xca, 0x01
  114. call _key_expansion_256a
  115. # aeskeygenassist $0x1, %xmm0, %xmm1
  116. .byte 0x66, 0x0f, 0x3a, 0xdf, 0xc8, 0x01
  117. call _key_expansion_256b
  118. # aeskeygenassist $0x2, %xmm2, %xmm1 # round 2
  119. .byte 0x66, 0x0f, 0x3a, 0xdf, 0xca, 0x02
  120. call _key_expansion_256a
  121. # aeskeygenassist $0x2, %xmm0, %xmm1
  122. .byte 0x66, 0x0f, 0x3a, 0xdf, 0xc8, 0x02
  123. call _key_expansion_256b
  124. # aeskeygenassist $0x4, %xmm2, %xmm1 # round 3
  125. .byte 0x66, 0x0f, 0x3a, 0xdf, 0xca, 0x04
  126. call _key_expansion_256a
  127. # aeskeygenassist $0x4, %xmm0, %xmm1
  128. .byte 0x66, 0x0f, 0x3a, 0xdf, 0xc8, 0x04
  129. call _key_expansion_256b
  130. # aeskeygenassist $0x8, %xmm2, %xmm1 # round 4
  131. .byte 0x66, 0x0f, 0x3a, 0xdf, 0xca, 0x08
  132. call _key_expansion_256a
  133. # aeskeygenassist $0x8, %xmm0, %xmm1
  134. .byte 0x66, 0x0f, 0x3a, 0xdf, 0xc8, 0x08
  135. call _key_expansion_256b
  136. # aeskeygenassist $0x10, %xmm2, %xmm1 # round 5
  137. .byte 0x66, 0x0f, 0x3a, 0xdf, 0xca, 0x10
  138. call _key_expansion_256a
  139. # aeskeygenassist $0x10, %xmm0, %xmm1
  140. .byte 0x66, 0x0f, 0x3a, 0xdf, 0xc8, 0x10
  141. call _key_expansion_256b
  142. # aeskeygenassist $0x20, %xmm2, %xmm1 # round 6
  143. .byte 0x66, 0x0f, 0x3a, 0xdf, 0xca, 0x20
  144. call _key_expansion_256a
  145. # aeskeygenassist $0x20, %xmm0, %xmm1
  146. .byte 0x66, 0x0f, 0x3a, 0xdf, 0xc8, 0x20
  147. call _key_expansion_256b
  148. # aeskeygenassist $0x40, %xmm2, %xmm1 # round 7
  149. .byte 0x66, 0x0f, 0x3a, 0xdf, 0xca, 0x40
  150. call _key_expansion_256a
  151. jmp .Ldec_key
  152. .Lenc_key192:
  153. movq 0x10(%rsi), %xmm2 # other user key
  154. # aeskeygenassist $0x1, %xmm2, %xmm1 # round 1
  155. .byte 0x66, 0x0f, 0x3a, 0xdf, 0xca, 0x01
  156. call _key_expansion_192a
  157. # aeskeygenassist $0x2, %xmm2, %xmm1 # round 2
  158. .byte 0x66, 0x0f, 0x3a, 0xdf, 0xca, 0x02
  159. call _key_expansion_192b
  160. # aeskeygenassist $0x4, %xmm2, %xmm1 # round 3
  161. .byte 0x66, 0x0f, 0x3a, 0xdf, 0xca, 0x04
  162. call _key_expansion_192a
  163. # aeskeygenassist $0x8, %xmm2, %xmm1 # round 4
  164. .byte 0x66, 0x0f, 0x3a, 0xdf, 0xca, 0x08
  165. call _key_expansion_192b
  166. # aeskeygenassist $0x10, %xmm2, %xmm1 # round 5
  167. .byte 0x66, 0x0f, 0x3a, 0xdf, 0xca, 0x10
  168. call _key_expansion_192a
  169. # aeskeygenassist $0x20, %xmm2, %xmm1 # round 6
  170. .byte 0x66, 0x0f, 0x3a, 0xdf, 0xca, 0x20
  171. call _key_expansion_192b
  172. # aeskeygenassist $0x40, %xmm2, %xmm1 # round 7
  173. .byte 0x66, 0x0f, 0x3a, 0xdf, 0xca, 0x40
  174. call _key_expansion_192a
  175. # aeskeygenassist $0x80, %xmm2, %xmm1 # round 8
  176. .byte 0x66, 0x0f, 0x3a, 0xdf, 0xca, 0x80
  177. call _key_expansion_192b
  178. jmp .Ldec_key
  179. .Lenc_key128:
  180. # aeskeygenassist $0x1, %xmm0, %xmm1 # round 1
  181. .byte 0x66, 0x0f, 0x3a, 0xdf, 0xc8, 0x01
  182. call _key_expansion_128
  183. # aeskeygenassist $0x2, %xmm0, %xmm1 # round 2
  184. .byte 0x66, 0x0f, 0x3a, 0xdf, 0xc8, 0x02
  185. call _key_expansion_128
  186. # aeskeygenassist $0x4, %xmm0, %xmm1 # round 3
  187. .byte 0x66, 0x0f, 0x3a, 0xdf, 0xc8, 0x04
  188. call _key_expansion_128
  189. # aeskeygenassist $0x8, %xmm0, %xmm1 # round 4
  190. .byte 0x66, 0x0f, 0x3a, 0xdf, 0xc8, 0x08
  191. call _key_expansion_128
  192. # aeskeygenassist $0x10, %xmm0, %xmm1 # round 5
  193. .byte 0x66, 0x0f, 0x3a, 0xdf, 0xc8, 0x10
  194. call _key_expansion_128
  195. # aeskeygenassist $0x20, %xmm0, %xmm1 # round 6
  196. .byte 0x66, 0x0f, 0x3a, 0xdf, 0xc8, 0x20
  197. call _key_expansion_128
  198. # aeskeygenassist $0x40, %xmm0, %xmm1 # round 7
  199. .byte 0x66, 0x0f, 0x3a, 0xdf, 0xc8, 0x40
  200. call _key_expansion_128
  201. # aeskeygenassist $0x80, %xmm0, %xmm1 # round 8
  202. .byte 0x66, 0x0f, 0x3a, 0xdf, 0xc8, 0x80
  203. call _key_expansion_128
  204. # aeskeygenassist $0x1b, %xmm0, %xmm1 # round 9
  205. .byte 0x66, 0x0f, 0x3a, 0xdf, 0xc8, 0x1b
  206. call _key_expansion_128
  207. # aeskeygenassist $0x36, %xmm0, %xmm1 # round 10
  208. .byte 0x66, 0x0f, 0x3a, 0xdf, 0xc8, 0x36
  209. call _key_expansion_128
  210. .Ldec_key:
  211. sub $0x10, %rcx
  212. movaps (%rdi), %xmm0
  213. movaps (%rcx), %xmm1
  214. movaps %xmm0, 240(%rcx)
  215. movaps %xmm1, 240(%rdi)
  216. add $0x10, %rdi
  217. lea 240-16(%rcx), %rsi
  218. .align 4
  219. .Ldec_key_loop:
  220. movaps (%rdi), %xmm0
  221. # aesimc %xmm0, %xmm1
  222. .byte 0x66, 0x0f, 0x38, 0xdb, 0xc8
  223. movaps %xmm1, (%rsi)
  224. add $0x10, %rdi
  225. sub $0x10, %rsi
  226. cmp %rcx, %rdi
  227. jb .Ldec_key_loop
  228. xor %rax, %rax
  229. ret
  230. /*
  231. * void aesni_enc(struct crypto_aes_ctx *ctx, u8 *dst, const u8 *src)
  232. */
  233. ENTRY(aesni_enc)
  234. movl 480(KEYP), KLEN # key length
  235. movups (INP), STATE # input
  236. call _aesni_enc1
  237. movups STATE, (OUTP) # output
  238. ret
  239. /*
  240. * _aesni_enc1: internal ABI
  241. * input:
  242. * KEYP: key struct pointer
  243. * KLEN: round count
  244. * STATE: initial state (input)
  245. * output:
  246. * STATE: finial state (output)
  247. * changed:
  248. * KEY
  249. * TKEYP (T1)
  250. */
  251. _aesni_enc1:
  252. movaps (KEYP), KEY # key
  253. mov KEYP, TKEYP
  254. pxor KEY, STATE # round 0
  255. add $0x30, TKEYP
  256. cmp $24, KLEN
  257. jb .Lenc128
  258. lea 0x20(TKEYP), TKEYP
  259. je .Lenc192
  260. add $0x20, TKEYP
  261. movaps -0x60(TKEYP), KEY
  262. # aesenc KEY, STATE
  263. .byte 0x66, 0x0f, 0x38, 0xdc, 0xc2
  264. movaps -0x50(TKEYP), KEY
  265. # aesenc KEY, STATE
  266. .byte 0x66, 0x0f, 0x38, 0xdc, 0xc2
  267. .align 4
  268. .Lenc192:
  269. movaps -0x40(TKEYP), KEY
  270. # aesenc KEY, STATE
  271. .byte 0x66, 0x0f, 0x38, 0xdc, 0xc2
  272. movaps -0x30(TKEYP), KEY
  273. # aesenc KEY, STATE
  274. .byte 0x66, 0x0f, 0x38, 0xdc, 0xc2
  275. .align 4
  276. .Lenc128:
  277. movaps -0x20(TKEYP), KEY
  278. # aesenc KEY, STATE
  279. .byte 0x66, 0x0f, 0x38, 0xdc, 0xc2
  280. movaps -0x10(TKEYP), KEY
  281. # aesenc KEY, STATE
  282. .byte 0x66, 0x0f, 0x38, 0xdc, 0xc2
  283. movaps (TKEYP), KEY
  284. # aesenc KEY, STATE
  285. .byte 0x66, 0x0f, 0x38, 0xdc, 0xc2
  286. movaps 0x10(TKEYP), KEY
  287. # aesenc KEY, STATE
  288. .byte 0x66, 0x0f, 0x38, 0xdc, 0xc2
  289. movaps 0x20(TKEYP), KEY
  290. # aesenc KEY, STATE
  291. .byte 0x66, 0x0f, 0x38, 0xdc, 0xc2
  292. movaps 0x30(TKEYP), KEY
  293. # aesenc KEY, STATE
  294. .byte 0x66, 0x0f, 0x38, 0xdc, 0xc2
  295. movaps 0x40(TKEYP), KEY
  296. # aesenc KEY, STATE
  297. .byte 0x66, 0x0f, 0x38, 0xdc, 0xc2
  298. movaps 0x50(TKEYP), KEY
  299. # aesenc KEY, STATE
  300. .byte 0x66, 0x0f, 0x38, 0xdc, 0xc2
  301. movaps 0x60(TKEYP), KEY
  302. # aesenc KEY, STATE
  303. .byte 0x66, 0x0f, 0x38, 0xdc, 0xc2
  304. movaps 0x70(TKEYP), KEY
  305. # aesenclast KEY, STATE # last round
  306. .byte 0x66, 0x0f, 0x38, 0xdd, 0xc2
  307. ret
  308. /*
  309. * _aesni_enc4: internal ABI
  310. * input:
  311. * KEYP: key struct pointer
  312. * KLEN: round count
  313. * STATE1: initial state (input)
  314. * STATE2
  315. * STATE3
  316. * STATE4
  317. * output:
  318. * STATE1: finial state (output)
  319. * STATE2
  320. * STATE3
  321. * STATE4
  322. * changed:
  323. * KEY
  324. * TKEYP (T1)
  325. */
  326. _aesni_enc4:
  327. movaps (KEYP), KEY # key
  328. mov KEYP, TKEYP
  329. pxor KEY, STATE1 # round 0
  330. pxor KEY, STATE2
  331. pxor KEY, STATE3
  332. pxor KEY, STATE4
  333. add $0x30, TKEYP
  334. cmp $24, KLEN
  335. jb .L4enc128
  336. lea 0x20(TKEYP), TKEYP
  337. je .L4enc192
  338. add $0x20, TKEYP
  339. movaps -0x60(TKEYP), KEY
  340. # aesenc KEY, STATE1
  341. .byte 0x66, 0x0f, 0x38, 0xdc, 0xc2
  342. # aesenc KEY, STATE2
  343. .byte 0x66, 0x0f, 0x38, 0xdc, 0xe2
  344. # aesenc KEY, STATE3
  345. .byte 0x66, 0x0f, 0x38, 0xdc, 0xea
  346. # aesenc KEY, STATE4
  347. .byte 0x66, 0x0f, 0x38, 0xdc, 0xf2
  348. movaps -0x50(TKEYP), KEY
  349. # aesenc KEY, STATE1
  350. .byte 0x66, 0x0f, 0x38, 0xdc, 0xc2
  351. # aesenc KEY, STATE2
  352. .byte 0x66, 0x0f, 0x38, 0xdc, 0xe2
  353. # aesenc KEY, STATE3
  354. .byte 0x66, 0x0f, 0x38, 0xdc, 0xea
  355. # aesenc KEY, STATE4
  356. .byte 0x66, 0x0f, 0x38, 0xdc, 0xf2
  357. #.align 4
  358. .L4enc192:
  359. movaps -0x40(TKEYP), KEY
  360. # aesenc KEY, STATE1
  361. .byte 0x66, 0x0f, 0x38, 0xdc, 0xc2
  362. # aesenc KEY, STATE2
  363. .byte 0x66, 0x0f, 0x38, 0xdc, 0xe2
  364. # aesenc KEY, STATE3
  365. .byte 0x66, 0x0f, 0x38, 0xdc, 0xea
  366. # aesenc KEY, STATE4
  367. .byte 0x66, 0x0f, 0x38, 0xdc, 0xf2
  368. movaps -0x30(TKEYP), KEY
  369. # aesenc KEY, STATE1
  370. .byte 0x66, 0x0f, 0x38, 0xdc, 0xc2
  371. # aesenc KEY, STATE2
  372. .byte 0x66, 0x0f, 0x38, 0xdc, 0xe2
  373. # aesenc KEY, STATE3
  374. .byte 0x66, 0x0f, 0x38, 0xdc, 0xea
  375. # aesenc KEY, STATE4
  376. .byte 0x66, 0x0f, 0x38, 0xdc, 0xf2
  377. #.align 4
  378. .L4enc128:
  379. movaps -0x20(TKEYP), KEY
  380. # aesenc KEY, STATE1
  381. .byte 0x66, 0x0f, 0x38, 0xdc, 0xc2
  382. # aesenc KEY, STATE2
  383. .byte 0x66, 0x0f, 0x38, 0xdc, 0xe2
  384. # aesenc KEY, STATE3
  385. .byte 0x66, 0x0f, 0x38, 0xdc, 0xea
  386. # aesenc KEY, STATE4
  387. .byte 0x66, 0x0f, 0x38, 0xdc, 0xf2
  388. movaps -0x10(TKEYP), KEY
  389. # aesenc KEY, STATE1
  390. .byte 0x66, 0x0f, 0x38, 0xdc, 0xc2
  391. # aesenc KEY, STATE2
  392. .byte 0x66, 0x0f, 0x38, 0xdc, 0xe2
  393. # aesenc KEY, STATE3
  394. .byte 0x66, 0x0f, 0x38, 0xdc, 0xea
  395. # aesenc KEY, STATE4
  396. .byte 0x66, 0x0f, 0x38, 0xdc, 0xf2
  397. movaps (TKEYP), KEY
  398. # aesenc KEY, STATE1
  399. .byte 0x66, 0x0f, 0x38, 0xdc, 0xc2
  400. # aesenc KEY, STATE2
  401. .byte 0x66, 0x0f, 0x38, 0xdc, 0xe2
  402. # aesenc KEY, STATE3
  403. .byte 0x66, 0x0f, 0x38, 0xdc, 0xea
  404. # aesenc KEY, STATE4
  405. .byte 0x66, 0x0f, 0x38, 0xdc, 0xf2
  406. movaps 0x10(TKEYP), KEY
  407. # aesenc KEY, STATE1
  408. .byte 0x66, 0x0f, 0x38, 0xdc, 0xc2
  409. # aesenc KEY, STATE2
  410. .byte 0x66, 0x0f, 0x38, 0xdc, 0xe2
  411. # aesenc KEY, STATE3
  412. .byte 0x66, 0x0f, 0x38, 0xdc, 0xea
  413. # aesenc KEY, STATE4
  414. .byte 0x66, 0x0f, 0x38, 0xdc, 0xf2
  415. movaps 0x20(TKEYP), KEY
  416. # aesenc KEY, STATE1
  417. .byte 0x66, 0x0f, 0x38, 0xdc, 0xc2
  418. # aesenc KEY, STATE2
  419. .byte 0x66, 0x0f, 0x38, 0xdc, 0xe2
  420. # aesenc KEY, STATE3
  421. .byte 0x66, 0x0f, 0x38, 0xdc, 0xea
  422. # aesenc KEY, STATE4
  423. .byte 0x66, 0x0f, 0x38, 0xdc, 0xf2
  424. movaps 0x30(TKEYP), KEY
  425. # aesenc KEY, STATE1
  426. .byte 0x66, 0x0f, 0x38, 0xdc, 0xc2
  427. # aesenc KEY, STATE2
  428. .byte 0x66, 0x0f, 0x38, 0xdc, 0xe2
  429. # aesenc KEY, STATE3
  430. .byte 0x66, 0x0f, 0x38, 0xdc, 0xea
  431. # aesenc KEY, STATE4
  432. .byte 0x66, 0x0f, 0x38, 0xdc, 0xf2
  433. movaps 0x40(TKEYP), KEY
  434. # aesenc KEY, STATE1
  435. .byte 0x66, 0x0f, 0x38, 0xdc, 0xc2
  436. # aesenc KEY, STATE2
  437. .byte 0x66, 0x0f, 0x38, 0xdc, 0xe2
  438. # aesenc KEY, STATE3
  439. .byte 0x66, 0x0f, 0x38, 0xdc, 0xea
  440. # aesenc KEY, STATE4
  441. .byte 0x66, 0x0f, 0x38, 0xdc, 0xf2
  442. movaps 0x50(TKEYP), KEY
  443. # aesenc KEY, STATE1
  444. .byte 0x66, 0x0f, 0x38, 0xdc, 0xc2
  445. # aesenc KEY, STATE2
  446. .byte 0x66, 0x0f, 0x38, 0xdc, 0xe2
  447. # aesenc KEY, STATE3
  448. .byte 0x66, 0x0f, 0x38, 0xdc, 0xea
  449. # aesenc KEY, STATE4
  450. .byte 0x66, 0x0f, 0x38, 0xdc, 0xf2
  451. movaps 0x60(TKEYP), KEY
  452. # aesenc KEY, STATE1
  453. .byte 0x66, 0x0f, 0x38, 0xdc, 0xc2
  454. # aesenc KEY, STATE2
  455. .byte 0x66, 0x0f, 0x38, 0xdc, 0xe2
  456. # aesenc KEY, STATE3
  457. .byte 0x66, 0x0f, 0x38, 0xdc, 0xea
  458. # aesenc KEY, STATE4
  459. .byte 0x66, 0x0f, 0x38, 0xdc, 0xf2
  460. movaps 0x70(TKEYP), KEY
  461. # aesenclast KEY, STATE1 # last round
  462. .byte 0x66, 0x0f, 0x38, 0xdd, 0xc2
  463. # aesenclast KEY, STATE2
  464. .byte 0x66, 0x0f, 0x38, 0xdd, 0xe2
  465. # aesenclast KEY, STATE3
  466. .byte 0x66, 0x0f, 0x38, 0xdd, 0xea
  467. # aesenclast KEY, STATE4
  468. .byte 0x66, 0x0f, 0x38, 0xdd, 0xf2
  469. ret
  470. /*
  471. * void aesni_dec (struct crypto_aes_ctx *ctx, u8 *dst, const u8 *src)
  472. */
  473. ENTRY(aesni_dec)
  474. mov 480(KEYP), KLEN # key length
  475. add $240, KEYP
  476. movups (INP), STATE # input
  477. call _aesni_dec1
  478. movups STATE, (OUTP) #output
  479. ret
  480. /*
  481. * _aesni_dec1: internal ABI
  482. * input:
  483. * KEYP: key struct pointer
  484. * KLEN: key length
  485. * STATE: initial state (input)
  486. * output:
  487. * STATE: finial state (output)
  488. * changed:
  489. * KEY
  490. * TKEYP (T1)
  491. */
  492. _aesni_dec1:
  493. movaps (KEYP), KEY # key
  494. mov KEYP, TKEYP
  495. pxor KEY, STATE # round 0
  496. add $0x30, TKEYP
  497. cmp $24, KLEN
  498. jb .Ldec128
  499. lea 0x20(TKEYP), TKEYP
  500. je .Ldec192
  501. add $0x20, TKEYP
  502. movaps -0x60(TKEYP), KEY
  503. # aesdec KEY, STATE
  504. .byte 0x66, 0x0f, 0x38, 0xde, 0xc2
  505. movaps -0x50(TKEYP), KEY
  506. # aesdec KEY, STATE
  507. .byte 0x66, 0x0f, 0x38, 0xde, 0xc2
  508. .align 4
  509. .Ldec192:
  510. movaps -0x40(TKEYP), KEY
  511. # aesdec KEY, STATE
  512. .byte 0x66, 0x0f, 0x38, 0xde, 0xc2
  513. movaps -0x30(TKEYP), KEY
  514. # aesdec KEY, STATE
  515. .byte 0x66, 0x0f, 0x38, 0xde, 0xc2
  516. .align 4
  517. .Ldec128:
  518. movaps -0x20(TKEYP), KEY
  519. # aesdec KEY, STATE
  520. .byte 0x66, 0x0f, 0x38, 0xde, 0xc2
  521. movaps -0x10(TKEYP), KEY
  522. # aesdec KEY, STATE
  523. .byte 0x66, 0x0f, 0x38, 0xde, 0xc2
  524. movaps (TKEYP), KEY
  525. # aesdec KEY, STATE
  526. .byte 0x66, 0x0f, 0x38, 0xde, 0xc2
  527. movaps 0x10(TKEYP), KEY
  528. # aesdec KEY, STATE
  529. .byte 0x66, 0x0f, 0x38, 0xde, 0xc2
  530. movaps 0x20(TKEYP), KEY
  531. # aesdec KEY, STATE
  532. .byte 0x66, 0x0f, 0x38, 0xde, 0xc2
  533. movaps 0x30(TKEYP), KEY
  534. # aesdec KEY, STATE
  535. .byte 0x66, 0x0f, 0x38, 0xde, 0xc2
  536. movaps 0x40(TKEYP), KEY
  537. # aesdec KEY, STATE
  538. .byte 0x66, 0x0f, 0x38, 0xde, 0xc2
  539. movaps 0x50(TKEYP), KEY
  540. # aesdec KEY, STATE
  541. .byte 0x66, 0x0f, 0x38, 0xde, 0xc2
  542. movaps 0x60(TKEYP), KEY
  543. # aesdec KEY, STATE
  544. .byte 0x66, 0x0f, 0x38, 0xde, 0xc2
  545. movaps 0x70(TKEYP), KEY
  546. # aesdeclast KEY, STATE # last round
  547. .byte 0x66, 0x0f, 0x38, 0xdf, 0xc2
  548. ret
  549. /*
  550. * _aesni_dec4: internal ABI
  551. * input:
  552. * KEYP: key struct pointer
  553. * KLEN: key length
  554. * STATE1: initial state (input)
  555. * STATE2
  556. * STATE3
  557. * STATE4
  558. * output:
  559. * STATE1: finial state (output)
  560. * STATE2
  561. * STATE3
  562. * STATE4
  563. * changed:
  564. * KEY
  565. * TKEYP (T1)
  566. */
  567. _aesni_dec4:
  568. movaps (KEYP), KEY # key
  569. mov KEYP, TKEYP
  570. pxor KEY, STATE1 # round 0
  571. pxor KEY, STATE2
  572. pxor KEY, STATE3
  573. pxor KEY, STATE4
  574. add $0x30, TKEYP
  575. cmp $24, KLEN
  576. jb .L4dec128
  577. lea 0x20(TKEYP), TKEYP
  578. je .L4dec192
  579. add $0x20, TKEYP
  580. movaps -0x60(TKEYP), KEY
  581. # aesdec KEY, STATE1
  582. .byte 0x66, 0x0f, 0x38, 0xde, 0xc2
  583. # aesdec KEY, STATE2
  584. .byte 0x66, 0x0f, 0x38, 0xde, 0xe2
  585. # aesdec KEY, STATE3
  586. .byte 0x66, 0x0f, 0x38, 0xde, 0xea
  587. # aesdec KEY, STATE4
  588. .byte 0x66, 0x0f, 0x38, 0xde, 0xf2
  589. movaps -0x50(TKEYP), KEY
  590. # aesdec KEY, STATE1
  591. .byte 0x66, 0x0f, 0x38, 0xde, 0xc2
  592. # aesdec KEY, STATE2
  593. .byte 0x66, 0x0f, 0x38, 0xde, 0xe2
  594. # aesdec KEY, STATE3
  595. .byte 0x66, 0x0f, 0x38, 0xde, 0xea
  596. # aesdec KEY, STATE4
  597. .byte 0x66, 0x0f, 0x38, 0xde, 0xf2
  598. .align 4
  599. .L4dec192:
  600. movaps -0x40(TKEYP), KEY
  601. # aesdec KEY, STATE1
  602. .byte 0x66, 0x0f, 0x38, 0xde, 0xc2
  603. # aesdec KEY, STATE2
  604. .byte 0x66, 0x0f, 0x38, 0xde, 0xe2
  605. # aesdec KEY, STATE3
  606. .byte 0x66, 0x0f, 0x38, 0xde, 0xea
  607. # aesdec KEY, STATE4
  608. .byte 0x66, 0x0f, 0x38, 0xde, 0xf2
  609. movaps -0x30(TKEYP), KEY
  610. # aesdec KEY, STATE1
  611. .byte 0x66, 0x0f, 0x38, 0xde, 0xc2
  612. # aesdec KEY, STATE2
  613. .byte 0x66, 0x0f, 0x38, 0xde, 0xe2
  614. # aesdec KEY, STATE3
  615. .byte 0x66, 0x0f, 0x38, 0xde, 0xea
  616. # aesdec KEY, STATE4
  617. .byte 0x66, 0x0f, 0x38, 0xde, 0xf2
  618. .align 4
  619. .L4dec128:
  620. movaps -0x20(TKEYP), KEY
  621. # aesdec KEY, STATE1
  622. .byte 0x66, 0x0f, 0x38, 0xde, 0xc2
  623. # aesdec KEY, STATE2
  624. .byte 0x66, 0x0f, 0x38, 0xde, 0xe2
  625. # aesdec KEY, STATE3
  626. .byte 0x66, 0x0f, 0x38, 0xde, 0xea
  627. # aesdec KEY, STATE4
  628. .byte 0x66, 0x0f, 0x38, 0xde, 0xf2
  629. movaps -0x10(TKEYP), KEY
  630. # aesdec KEY, STATE1
  631. .byte 0x66, 0x0f, 0x38, 0xde, 0xc2
  632. # aesdec KEY, STATE2
  633. .byte 0x66, 0x0f, 0x38, 0xde, 0xe2
  634. # aesdec KEY, STATE3
  635. .byte 0x66, 0x0f, 0x38, 0xde, 0xea
  636. # aesdec KEY, STATE4
  637. .byte 0x66, 0x0f, 0x38, 0xde, 0xf2
  638. movaps (TKEYP), KEY
  639. # aesdec KEY, STATE1
  640. .byte 0x66, 0x0f, 0x38, 0xde, 0xc2
  641. # aesdec KEY, STATE2
  642. .byte 0x66, 0x0f, 0x38, 0xde, 0xe2
  643. # aesdec KEY, STATE3
  644. .byte 0x66, 0x0f, 0x38, 0xde, 0xea
  645. # aesdec KEY, STATE4
  646. .byte 0x66, 0x0f, 0x38, 0xde, 0xf2
  647. movaps 0x10(TKEYP), KEY
  648. # aesdec KEY, STATE1
  649. .byte 0x66, 0x0f, 0x38, 0xde, 0xc2
  650. # aesdec KEY, STATE2
  651. .byte 0x66, 0x0f, 0x38, 0xde, 0xe2
  652. # aesdec KEY, STATE3
  653. .byte 0x66, 0x0f, 0x38, 0xde, 0xea
  654. # aesdec KEY, STATE4
  655. .byte 0x66, 0x0f, 0x38, 0xde, 0xf2
  656. movaps 0x20(TKEYP), KEY
  657. # aesdec KEY, STATE1
  658. .byte 0x66, 0x0f, 0x38, 0xde, 0xc2
  659. # aesdec KEY, STATE2
  660. .byte 0x66, 0x0f, 0x38, 0xde, 0xe2
  661. # aesdec KEY, STATE3
  662. .byte 0x66, 0x0f, 0x38, 0xde, 0xea
  663. # aesdec KEY, STATE4
  664. .byte 0x66, 0x0f, 0x38, 0xde, 0xf2
  665. movaps 0x30(TKEYP), KEY
  666. # aesdec KEY, STATE1
  667. .byte 0x66, 0x0f, 0x38, 0xde, 0xc2
  668. # aesdec KEY, STATE2
  669. .byte 0x66, 0x0f, 0x38, 0xde, 0xe2
  670. # aesdec KEY, STATE3
  671. .byte 0x66, 0x0f, 0x38, 0xde, 0xea
  672. # aesdec KEY, STATE4
  673. .byte 0x66, 0x0f, 0x38, 0xde, 0xf2
  674. movaps 0x40(TKEYP), KEY
  675. # aesdec KEY, STATE1
  676. .byte 0x66, 0x0f, 0x38, 0xde, 0xc2
  677. # aesdec KEY, STATE2
  678. .byte 0x66, 0x0f, 0x38, 0xde, 0xe2
  679. # aesdec KEY, STATE3
  680. .byte 0x66, 0x0f, 0x38, 0xde, 0xea
  681. # aesdec KEY, STATE4
  682. .byte 0x66, 0x0f, 0x38, 0xde, 0xf2
  683. movaps 0x50(TKEYP), KEY
  684. # aesdec KEY, STATE1
  685. .byte 0x66, 0x0f, 0x38, 0xde, 0xc2
  686. # aesdec KEY, STATE2
  687. .byte 0x66, 0x0f, 0x38, 0xde, 0xe2
  688. # aesdec KEY, STATE3
  689. .byte 0x66, 0x0f, 0x38, 0xde, 0xea
  690. # aesdec KEY, STATE4
  691. .byte 0x66, 0x0f, 0x38, 0xde, 0xf2
  692. movaps 0x60(TKEYP), KEY
  693. # aesdec KEY, STATE1
  694. .byte 0x66, 0x0f, 0x38, 0xde, 0xc2
  695. # aesdec KEY, STATE2
  696. .byte 0x66, 0x0f, 0x38, 0xde, 0xe2
  697. # aesdec KEY, STATE3
  698. .byte 0x66, 0x0f, 0x38, 0xde, 0xea
  699. # aesdec KEY, STATE4
  700. .byte 0x66, 0x0f, 0x38, 0xde, 0xf2
  701. movaps 0x70(TKEYP), KEY
  702. # aesdeclast KEY, STATE1 # last round
  703. .byte 0x66, 0x0f, 0x38, 0xdf, 0xc2
  704. # aesdeclast KEY, STATE2
  705. .byte 0x66, 0x0f, 0x38, 0xdf, 0xe2
  706. # aesdeclast KEY, STATE3
  707. .byte 0x66, 0x0f, 0x38, 0xdf, 0xea
  708. # aesdeclast KEY, STATE4
  709. .byte 0x66, 0x0f, 0x38, 0xdf, 0xf2
  710. ret
  711. /*
  712. * void aesni_ecb_enc(struct crypto_aes_ctx *ctx, const u8 *dst, u8 *src,
  713. * size_t len)
  714. */
  715. ENTRY(aesni_ecb_enc)
  716. test LEN, LEN # check length
  717. jz .Lecb_enc_ret
  718. mov 480(KEYP), KLEN
  719. cmp $16, LEN
  720. jb .Lecb_enc_ret
  721. cmp $64, LEN
  722. jb .Lecb_enc_loop1
  723. .align 4
  724. .Lecb_enc_loop4:
  725. movups (INP), STATE1
  726. movups 0x10(INP), STATE2
  727. movups 0x20(INP), STATE3
  728. movups 0x30(INP), STATE4
  729. call _aesni_enc4
  730. movups STATE1, (OUTP)
  731. movups STATE2, 0x10(OUTP)
  732. movups STATE3, 0x20(OUTP)
  733. movups STATE4, 0x30(OUTP)
  734. sub $64, LEN
  735. add $64, INP
  736. add $64, OUTP
  737. cmp $64, LEN
  738. jge .Lecb_enc_loop4
  739. cmp $16, LEN
  740. jb .Lecb_enc_ret
  741. .align 4
  742. .Lecb_enc_loop1:
  743. movups (INP), STATE1
  744. call _aesni_enc1
  745. movups STATE1, (OUTP)
  746. sub $16, LEN
  747. add $16, INP
  748. add $16, OUTP
  749. cmp $16, LEN
  750. jge .Lecb_enc_loop1
  751. .Lecb_enc_ret:
  752. ret
  753. /*
  754. * void aesni_ecb_dec(struct crypto_aes_ctx *ctx, const u8 *dst, u8 *src,
  755. * size_t len);
  756. */
  757. ENTRY(aesni_ecb_dec)
  758. test LEN, LEN
  759. jz .Lecb_dec_ret
  760. mov 480(KEYP), KLEN
  761. add $240, KEYP
  762. cmp $16, LEN
  763. jb .Lecb_dec_ret
  764. cmp $64, LEN
  765. jb .Lecb_dec_loop1
  766. .align 4
  767. .Lecb_dec_loop4:
  768. movups (INP), STATE1
  769. movups 0x10(INP), STATE2
  770. movups 0x20(INP), STATE3
  771. movups 0x30(INP), STATE4
  772. call _aesni_dec4
  773. movups STATE1, (OUTP)
  774. movups STATE2, 0x10(OUTP)
  775. movups STATE3, 0x20(OUTP)
  776. movups STATE4, 0x30(OUTP)
  777. sub $64, LEN
  778. add $64, INP
  779. add $64, OUTP
  780. cmp $64, LEN
  781. jge .Lecb_dec_loop4
  782. cmp $16, LEN
  783. jb .Lecb_dec_ret
  784. .align 4
  785. .Lecb_dec_loop1:
  786. movups (INP), STATE1
  787. call _aesni_dec1
  788. movups STATE1, (OUTP)
  789. sub $16, LEN
  790. add $16, INP
  791. add $16, OUTP
  792. cmp $16, LEN
  793. jge .Lecb_dec_loop1
  794. .Lecb_dec_ret:
  795. ret
  796. /*
  797. * void aesni_cbc_enc(struct crypto_aes_ctx *ctx, const u8 *dst, u8 *src,
  798. * size_t len, u8 *iv)
  799. */
  800. ENTRY(aesni_cbc_enc)
  801. cmp $16, LEN
  802. jb .Lcbc_enc_ret
  803. mov 480(KEYP), KLEN
  804. movups (IVP), STATE # load iv as initial state
  805. .align 4
  806. .Lcbc_enc_loop:
  807. movups (INP), IN # load input
  808. pxor IN, STATE
  809. call _aesni_enc1
  810. movups STATE, (OUTP) # store output
  811. sub $16, LEN
  812. add $16, INP
  813. add $16, OUTP
  814. cmp $16, LEN
  815. jge .Lcbc_enc_loop
  816. movups STATE, (IVP)
  817. .Lcbc_enc_ret:
  818. ret
  819. /*
  820. * void aesni_cbc_dec(struct crypto_aes_ctx *ctx, const u8 *dst, u8 *src,
  821. * size_t len, u8 *iv)
  822. */
  823. ENTRY(aesni_cbc_dec)
  824. cmp $16, LEN
  825. jb .Lcbc_dec_ret
  826. mov 480(KEYP), KLEN
  827. add $240, KEYP
  828. movups (IVP), IV
  829. cmp $64, LEN
  830. jb .Lcbc_dec_loop1
  831. .align 4
  832. .Lcbc_dec_loop4:
  833. movups (INP), IN1
  834. movaps IN1, STATE1
  835. movups 0x10(INP), IN2
  836. movaps IN2, STATE2
  837. movups 0x20(INP), IN3
  838. movaps IN3, STATE3
  839. movups 0x30(INP), IN4
  840. movaps IN4, STATE4
  841. call _aesni_dec4
  842. pxor IV, STATE1
  843. pxor IN1, STATE2
  844. pxor IN2, STATE3
  845. pxor IN3, STATE4
  846. movaps IN4, IV
  847. movups STATE1, (OUTP)
  848. movups STATE2, 0x10(OUTP)
  849. movups STATE3, 0x20(OUTP)
  850. movups STATE4, 0x30(OUTP)
  851. sub $64, LEN
  852. add $64, INP
  853. add $64, OUTP
  854. cmp $64, LEN
  855. jge .Lcbc_dec_loop4
  856. cmp $16, LEN
  857. jb .Lcbc_dec_ret
  858. .align 4
  859. .Lcbc_dec_loop1:
  860. movups (INP), IN
  861. movaps IN, STATE
  862. call _aesni_dec1
  863. pxor IV, STATE
  864. movups STATE, (OUTP)
  865. movaps IN, IV
  866. sub $16, LEN
  867. add $16, INP
  868. add $16, OUTP
  869. cmp $16, LEN
  870. jge .Lcbc_dec_loop1
  871. movups IV, (IVP)
  872. .Lcbc_dec_ret:
  873. ret