aesni-intel_asm.S 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841
  1. /*
  2. * Implement AES algorithm in Intel AES-NI instructions.
  3. *
  4. * The white paper of AES-NI instructions can be downloaded from:
  5. * http://softwarecommunity.intel.com/isn/downloads/intelavx/AES-Instructions-Set_WP.pdf
  6. *
  7. * Copyright (C) 2008, Intel Corp.
  8. * Author: Huang Ying <ying.huang@intel.com>
  9. * Vinodh Gopal <vinodh.gopal@intel.com>
  10. * Kahraman Akdemir
  11. *
  12. * This program is free software; you can redistribute it and/or modify
  13. * it under the terms of the GNU General Public License as published by
  14. * the Free Software Foundation; either version 2 of the License, or
  15. * (at your option) any later version.
  16. */
  17. #include <linux/linkage.h>
  18. #include <asm/inst.h>
  19. .text
  20. #define STATE1 %xmm0
  21. #define STATE2 %xmm4
  22. #define STATE3 %xmm5
  23. #define STATE4 %xmm6
  24. #define STATE STATE1
  25. #define IN1 %xmm1
  26. #define IN2 %xmm7
  27. #define IN3 %xmm8
  28. #define IN4 %xmm9
  29. #define IN IN1
  30. #define KEY %xmm2
  31. #define IV %xmm3
  32. #define BSWAP_MASK %xmm10
  33. #define CTR %xmm11
  34. #define INC %xmm12
  35. #define KEYP %rdi
  36. #define OUTP %rsi
  37. #define INP %rdx
  38. #define LEN %rcx
  39. #define IVP %r8
  40. #define KLEN %r9d
  41. #define T1 %r10
  42. #define TKEYP T1
  43. #define T2 %r11
  44. #define TCTR_LOW T2
  45. _key_expansion_128:
  46. _key_expansion_256a:
  47. pshufd $0b11111111, %xmm1, %xmm1
  48. shufps $0b00010000, %xmm0, %xmm4
  49. pxor %xmm4, %xmm0
  50. shufps $0b10001100, %xmm0, %xmm4
  51. pxor %xmm4, %xmm0
  52. pxor %xmm1, %xmm0
  53. movaps %xmm0, (%rcx)
  54. add $0x10, %rcx
  55. ret
  56. _key_expansion_192a:
  57. pshufd $0b01010101, %xmm1, %xmm1
  58. shufps $0b00010000, %xmm0, %xmm4
  59. pxor %xmm4, %xmm0
  60. shufps $0b10001100, %xmm0, %xmm4
  61. pxor %xmm4, %xmm0
  62. pxor %xmm1, %xmm0
  63. movaps %xmm2, %xmm5
  64. movaps %xmm2, %xmm6
  65. pslldq $4, %xmm5
  66. pshufd $0b11111111, %xmm0, %xmm3
  67. pxor %xmm3, %xmm2
  68. pxor %xmm5, %xmm2
  69. movaps %xmm0, %xmm1
  70. shufps $0b01000100, %xmm0, %xmm6
  71. movaps %xmm6, (%rcx)
  72. shufps $0b01001110, %xmm2, %xmm1
  73. movaps %xmm1, 16(%rcx)
  74. add $0x20, %rcx
  75. ret
  76. _key_expansion_192b:
  77. pshufd $0b01010101, %xmm1, %xmm1
  78. shufps $0b00010000, %xmm0, %xmm4
  79. pxor %xmm4, %xmm0
  80. shufps $0b10001100, %xmm0, %xmm4
  81. pxor %xmm4, %xmm0
  82. pxor %xmm1, %xmm0
  83. movaps %xmm2, %xmm5
  84. pslldq $4, %xmm5
  85. pshufd $0b11111111, %xmm0, %xmm3
  86. pxor %xmm3, %xmm2
  87. pxor %xmm5, %xmm2
  88. movaps %xmm0, (%rcx)
  89. add $0x10, %rcx
  90. ret
  91. _key_expansion_256b:
  92. pshufd $0b10101010, %xmm1, %xmm1
  93. shufps $0b00010000, %xmm2, %xmm4
  94. pxor %xmm4, %xmm2
  95. shufps $0b10001100, %xmm2, %xmm4
  96. pxor %xmm4, %xmm2
  97. pxor %xmm1, %xmm2
  98. movaps %xmm2, (%rcx)
  99. add $0x10, %rcx
  100. ret
  101. /*
  102. * int aesni_set_key(struct crypto_aes_ctx *ctx, const u8 *in_key,
  103. * unsigned int key_len)
  104. */
  105. ENTRY(aesni_set_key)
  106. movups (%rsi), %xmm0 # user key (first 16 bytes)
  107. movaps %xmm0, (%rdi)
  108. lea 0x10(%rdi), %rcx # key addr
  109. movl %edx, 480(%rdi)
  110. pxor %xmm4, %xmm4 # xmm4 is assumed 0 in _key_expansion_x
  111. cmp $24, %dl
  112. jb .Lenc_key128
  113. je .Lenc_key192
  114. movups 0x10(%rsi), %xmm2 # other user key
  115. movaps %xmm2, (%rcx)
  116. add $0x10, %rcx
  117. AESKEYGENASSIST 0x1 %xmm2 %xmm1 # round 1
  118. call _key_expansion_256a
  119. AESKEYGENASSIST 0x1 %xmm0 %xmm1
  120. call _key_expansion_256b
  121. AESKEYGENASSIST 0x2 %xmm2 %xmm1 # round 2
  122. call _key_expansion_256a
  123. AESKEYGENASSIST 0x2 %xmm0 %xmm1
  124. call _key_expansion_256b
  125. AESKEYGENASSIST 0x4 %xmm2 %xmm1 # round 3
  126. call _key_expansion_256a
  127. AESKEYGENASSIST 0x4 %xmm0 %xmm1
  128. call _key_expansion_256b
  129. AESKEYGENASSIST 0x8 %xmm2 %xmm1 # round 4
  130. call _key_expansion_256a
  131. AESKEYGENASSIST 0x8 %xmm0 %xmm1
  132. call _key_expansion_256b
  133. AESKEYGENASSIST 0x10 %xmm2 %xmm1 # round 5
  134. call _key_expansion_256a
  135. AESKEYGENASSIST 0x10 %xmm0 %xmm1
  136. call _key_expansion_256b
  137. AESKEYGENASSIST 0x20 %xmm2 %xmm1 # round 6
  138. call _key_expansion_256a
  139. AESKEYGENASSIST 0x20 %xmm0 %xmm1
  140. call _key_expansion_256b
  141. AESKEYGENASSIST 0x40 %xmm2 %xmm1 # round 7
  142. call _key_expansion_256a
  143. jmp .Ldec_key
  144. .Lenc_key192:
  145. movq 0x10(%rsi), %xmm2 # other user key
  146. AESKEYGENASSIST 0x1 %xmm2 %xmm1 # round 1
  147. call _key_expansion_192a
  148. AESKEYGENASSIST 0x2 %xmm2 %xmm1 # round 2
  149. call _key_expansion_192b
  150. AESKEYGENASSIST 0x4 %xmm2 %xmm1 # round 3
  151. call _key_expansion_192a
  152. AESKEYGENASSIST 0x8 %xmm2 %xmm1 # round 4
  153. call _key_expansion_192b
  154. AESKEYGENASSIST 0x10 %xmm2 %xmm1 # round 5
  155. call _key_expansion_192a
  156. AESKEYGENASSIST 0x20 %xmm2 %xmm1 # round 6
  157. call _key_expansion_192b
  158. AESKEYGENASSIST 0x40 %xmm2 %xmm1 # round 7
  159. call _key_expansion_192a
  160. AESKEYGENASSIST 0x80 %xmm2 %xmm1 # round 8
  161. call _key_expansion_192b
  162. jmp .Ldec_key
  163. .Lenc_key128:
  164. AESKEYGENASSIST 0x1 %xmm0 %xmm1 # round 1
  165. call _key_expansion_128
  166. AESKEYGENASSIST 0x2 %xmm0 %xmm1 # round 2
  167. call _key_expansion_128
  168. AESKEYGENASSIST 0x4 %xmm0 %xmm1 # round 3
  169. call _key_expansion_128
  170. AESKEYGENASSIST 0x8 %xmm0 %xmm1 # round 4
  171. call _key_expansion_128
  172. AESKEYGENASSIST 0x10 %xmm0 %xmm1 # round 5
  173. call _key_expansion_128
  174. AESKEYGENASSIST 0x20 %xmm0 %xmm1 # round 6
  175. call _key_expansion_128
  176. AESKEYGENASSIST 0x40 %xmm0 %xmm1 # round 7
  177. call _key_expansion_128
  178. AESKEYGENASSIST 0x80 %xmm0 %xmm1 # round 8
  179. call _key_expansion_128
  180. AESKEYGENASSIST 0x1b %xmm0 %xmm1 # round 9
  181. call _key_expansion_128
  182. AESKEYGENASSIST 0x36 %xmm0 %xmm1 # round 10
  183. call _key_expansion_128
  184. .Ldec_key:
  185. sub $0x10, %rcx
  186. movaps (%rdi), %xmm0
  187. movaps (%rcx), %xmm1
  188. movaps %xmm0, 240(%rcx)
  189. movaps %xmm1, 240(%rdi)
  190. add $0x10, %rdi
  191. lea 240-16(%rcx), %rsi
  192. .align 4
  193. .Ldec_key_loop:
  194. movaps (%rdi), %xmm0
  195. AESIMC %xmm0 %xmm1
  196. movaps %xmm1, (%rsi)
  197. add $0x10, %rdi
  198. sub $0x10, %rsi
  199. cmp %rcx, %rdi
  200. jb .Ldec_key_loop
  201. xor %rax, %rax
  202. ret
  203. /*
  204. * void aesni_enc(struct crypto_aes_ctx *ctx, u8 *dst, const u8 *src)
  205. */
  206. ENTRY(aesni_enc)
  207. movl 480(KEYP), KLEN # key length
  208. movups (INP), STATE # input
  209. call _aesni_enc1
  210. movups STATE, (OUTP) # output
  211. ret
  212. /*
  213. * _aesni_enc1: internal ABI
  214. * input:
  215. * KEYP: key struct pointer
  216. * KLEN: round count
  217. * STATE: initial state (input)
  218. * output:
  219. * STATE: finial state (output)
  220. * changed:
  221. * KEY
  222. * TKEYP (T1)
  223. */
  224. _aesni_enc1:
  225. movaps (KEYP), KEY # key
  226. mov KEYP, TKEYP
  227. pxor KEY, STATE # round 0
  228. add $0x30, TKEYP
  229. cmp $24, KLEN
  230. jb .Lenc128
  231. lea 0x20(TKEYP), TKEYP
  232. je .Lenc192
  233. add $0x20, TKEYP
  234. movaps -0x60(TKEYP), KEY
  235. AESENC KEY STATE
  236. movaps -0x50(TKEYP), KEY
  237. AESENC KEY STATE
  238. .align 4
  239. .Lenc192:
  240. movaps -0x40(TKEYP), KEY
  241. AESENC KEY STATE
  242. movaps -0x30(TKEYP), KEY
  243. AESENC KEY STATE
  244. .align 4
  245. .Lenc128:
  246. movaps -0x20(TKEYP), KEY
  247. AESENC KEY STATE
  248. movaps -0x10(TKEYP), KEY
  249. AESENC KEY STATE
  250. movaps (TKEYP), KEY
  251. AESENC KEY STATE
  252. movaps 0x10(TKEYP), KEY
  253. AESENC KEY STATE
  254. movaps 0x20(TKEYP), KEY
  255. AESENC KEY STATE
  256. movaps 0x30(TKEYP), KEY
  257. AESENC KEY STATE
  258. movaps 0x40(TKEYP), KEY
  259. AESENC KEY STATE
  260. movaps 0x50(TKEYP), KEY
  261. AESENC KEY STATE
  262. movaps 0x60(TKEYP), KEY
  263. AESENC KEY STATE
  264. movaps 0x70(TKEYP), KEY
  265. AESENCLAST KEY STATE
  266. ret
  267. /*
  268. * _aesni_enc4: internal ABI
  269. * input:
  270. * KEYP: key struct pointer
  271. * KLEN: round count
  272. * STATE1: initial state (input)
  273. * STATE2
  274. * STATE3
  275. * STATE4
  276. * output:
  277. * STATE1: finial state (output)
  278. * STATE2
  279. * STATE3
  280. * STATE4
  281. * changed:
  282. * KEY
  283. * TKEYP (T1)
  284. */
  285. _aesni_enc4:
  286. movaps (KEYP), KEY # key
  287. mov KEYP, TKEYP
  288. pxor KEY, STATE1 # round 0
  289. pxor KEY, STATE2
  290. pxor KEY, STATE3
  291. pxor KEY, STATE4
  292. add $0x30, TKEYP
  293. cmp $24, KLEN
  294. jb .L4enc128
  295. lea 0x20(TKEYP), TKEYP
  296. je .L4enc192
  297. add $0x20, TKEYP
  298. movaps -0x60(TKEYP), KEY
  299. AESENC KEY STATE1
  300. AESENC KEY STATE2
  301. AESENC KEY STATE3
  302. AESENC KEY STATE4
  303. movaps -0x50(TKEYP), KEY
  304. AESENC KEY STATE1
  305. AESENC KEY STATE2
  306. AESENC KEY STATE3
  307. AESENC KEY STATE4
  308. #.align 4
  309. .L4enc192:
  310. movaps -0x40(TKEYP), KEY
  311. AESENC KEY STATE1
  312. AESENC KEY STATE2
  313. AESENC KEY STATE3
  314. AESENC KEY STATE4
  315. movaps -0x30(TKEYP), KEY
  316. AESENC KEY STATE1
  317. AESENC KEY STATE2
  318. AESENC KEY STATE3
  319. AESENC KEY STATE4
  320. #.align 4
  321. .L4enc128:
  322. movaps -0x20(TKEYP), KEY
  323. AESENC KEY STATE1
  324. AESENC KEY STATE2
  325. AESENC KEY STATE3
  326. AESENC KEY STATE4
  327. movaps -0x10(TKEYP), KEY
  328. AESENC KEY STATE1
  329. AESENC KEY STATE2
  330. AESENC KEY STATE3
  331. AESENC KEY STATE4
  332. movaps (TKEYP), KEY
  333. AESENC KEY STATE1
  334. AESENC KEY STATE2
  335. AESENC KEY STATE3
  336. AESENC KEY STATE4
  337. movaps 0x10(TKEYP), KEY
  338. AESENC KEY STATE1
  339. AESENC KEY STATE2
  340. AESENC KEY STATE3
  341. AESENC KEY STATE4
  342. movaps 0x20(TKEYP), KEY
  343. AESENC KEY STATE1
  344. AESENC KEY STATE2
  345. AESENC KEY STATE3
  346. AESENC KEY STATE4
  347. movaps 0x30(TKEYP), KEY
  348. AESENC KEY STATE1
  349. AESENC KEY STATE2
  350. AESENC KEY STATE3
  351. AESENC KEY STATE4
  352. movaps 0x40(TKEYP), KEY
  353. AESENC KEY STATE1
  354. AESENC KEY STATE2
  355. AESENC KEY STATE3
  356. AESENC KEY STATE4
  357. movaps 0x50(TKEYP), KEY
  358. AESENC KEY STATE1
  359. AESENC KEY STATE2
  360. AESENC KEY STATE3
  361. AESENC KEY STATE4
  362. movaps 0x60(TKEYP), KEY
  363. AESENC KEY STATE1
  364. AESENC KEY STATE2
  365. AESENC KEY STATE3
  366. AESENC KEY STATE4
  367. movaps 0x70(TKEYP), KEY
  368. AESENCLAST KEY STATE1 # last round
  369. AESENCLAST KEY STATE2
  370. AESENCLAST KEY STATE3
  371. AESENCLAST KEY STATE4
  372. ret
  373. /*
  374. * void aesni_dec (struct crypto_aes_ctx *ctx, u8 *dst, const u8 *src)
  375. */
  376. ENTRY(aesni_dec)
  377. mov 480(KEYP), KLEN # key length
  378. add $240, KEYP
  379. movups (INP), STATE # input
  380. call _aesni_dec1
  381. movups STATE, (OUTP) #output
  382. ret
  383. /*
  384. * _aesni_dec1: internal ABI
  385. * input:
  386. * KEYP: key struct pointer
  387. * KLEN: key length
  388. * STATE: initial state (input)
  389. * output:
  390. * STATE: finial state (output)
  391. * changed:
  392. * KEY
  393. * TKEYP (T1)
  394. */
  395. _aesni_dec1:
  396. movaps (KEYP), KEY # key
  397. mov KEYP, TKEYP
  398. pxor KEY, STATE # round 0
  399. add $0x30, TKEYP
  400. cmp $24, KLEN
  401. jb .Ldec128
  402. lea 0x20(TKEYP), TKEYP
  403. je .Ldec192
  404. add $0x20, TKEYP
  405. movaps -0x60(TKEYP), KEY
  406. AESDEC KEY STATE
  407. movaps -0x50(TKEYP), KEY
  408. AESDEC KEY STATE
  409. .align 4
  410. .Ldec192:
  411. movaps -0x40(TKEYP), KEY
  412. AESDEC KEY STATE
  413. movaps -0x30(TKEYP), KEY
  414. AESDEC KEY STATE
  415. .align 4
  416. .Ldec128:
  417. movaps -0x20(TKEYP), KEY
  418. AESDEC KEY STATE
  419. movaps -0x10(TKEYP), KEY
  420. AESDEC KEY STATE
  421. movaps (TKEYP), KEY
  422. AESDEC KEY STATE
  423. movaps 0x10(TKEYP), KEY
  424. AESDEC KEY STATE
  425. movaps 0x20(TKEYP), KEY
  426. AESDEC KEY STATE
  427. movaps 0x30(TKEYP), KEY
  428. AESDEC KEY STATE
  429. movaps 0x40(TKEYP), KEY
  430. AESDEC KEY STATE
  431. movaps 0x50(TKEYP), KEY
  432. AESDEC KEY STATE
  433. movaps 0x60(TKEYP), KEY
  434. AESDEC KEY STATE
  435. movaps 0x70(TKEYP), KEY
  436. AESDECLAST KEY STATE
  437. ret
  438. /*
  439. * _aesni_dec4: internal ABI
  440. * input:
  441. * KEYP: key struct pointer
  442. * KLEN: key length
  443. * STATE1: initial state (input)
  444. * STATE2
  445. * STATE3
  446. * STATE4
  447. * output:
  448. * STATE1: finial state (output)
  449. * STATE2
  450. * STATE3
  451. * STATE4
  452. * changed:
  453. * KEY
  454. * TKEYP (T1)
  455. */
  456. _aesni_dec4:
  457. movaps (KEYP), KEY # key
  458. mov KEYP, TKEYP
  459. pxor KEY, STATE1 # round 0
  460. pxor KEY, STATE2
  461. pxor KEY, STATE3
  462. pxor KEY, STATE4
  463. add $0x30, TKEYP
  464. cmp $24, KLEN
  465. jb .L4dec128
  466. lea 0x20(TKEYP), TKEYP
  467. je .L4dec192
  468. add $0x20, TKEYP
  469. movaps -0x60(TKEYP), KEY
  470. AESDEC KEY STATE1
  471. AESDEC KEY STATE2
  472. AESDEC KEY STATE3
  473. AESDEC KEY STATE4
  474. movaps -0x50(TKEYP), KEY
  475. AESDEC KEY STATE1
  476. AESDEC KEY STATE2
  477. AESDEC KEY STATE3
  478. AESDEC KEY STATE4
  479. .align 4
  480. .L4dec192:
  481. movaps -0x40(TKEYP), KEY
  482. AESDEC KEY STATE1
  483. AESDEC KEY STATE2
  484. AESDEC KEY STATE3
  485. AESDEC KEY STATE4
  486. movaps -0x30(TKEYP), KEY
  487. AESDEC KEY STATE1
  488. AESDEC KEY STATE2
  489. AESDEC KEY STATE3
  490. AESDEC KEY STATE4
  491. .align 4
  492. .L4dec128:
  493. movaps -0x20(TKEYP), KEY
  494. AESDEC KEY STATE1
  495. AESDEC KEY STATE2
  496. AESDEC KEY STATE3
  497. AESDEC KEY STATE4
  498. movaps -0x10(TKEYP), KEY
  499. AESDEC KEY STATE1
  500. AESDEC KEY STATE2
  501. AESDEC KEY STATE3
  502. AESDEC KEY STATE4
  503. movaps (TKEYP), KEY
  504. AESDEC KEY STATE1
  505. AESDEC KEY STATE2
  506. AESDEC KEY STATE3
  507. AESDEC KEY STATE4
  508. movaps 0x10(TKEYP), KEY
  509. AESDEC KEY STATE1
  510. AESDEC KEY STATE2
  511. AESDEC KEY STATE3
  512. AESDEC KEY STATE4
  513. movaps 0x20(TKEYP), KEY
  514. AESDEC KEY STATE1
  515. AESDEC KEY STATE2
  516. AESDEC KEY STATE3
  517. AESDEC KEY STATE4
  518. movaps 0x30(TKEYP), KEY
  519. AESDEC KEY STATE1
  520. AESDEC KEY STATE2
  521. AESDEC KEY STATE3
  522. AESDEC KEY STATE4
  523. movaps 0x40(TKEYP), KEY
  524. AESDEC KEY STATE1
  525. AESDEC KEY STATE2
  526. AESDEC KEY STATE3
  527. AESDEC KEY STATE4
  528. movaps 0x50(TKEYP), KEY
  529. AESDEC KEY STATE1
  530. AESDEC KEY STATE2
  531. AESDEC KEY STATE3
  532. AESDEC KEY STATE4
  533. movaps 0x60(TKEYP), KEY
  534. AESDEC KEY STATE1
  535. AESDEC KEY STATE2
  536. AESDEC KEY STATE3
  537. AESDEC KEY STATE4
  538. movaps 0x70(TKEYP), KEY
  539. AESDECLAST KEY STATE1 # last round
  540. AESDECLAST KEY STATE2
  541. AESDECLAST KEY STATE3
  542. AESDECLAST KEY STATE4
  543. ret
  544. /*
  545. * void aesni_ecb_enc(struct crypto_aes_ctx *ctx, const u8 *dst, u8 *src,
  546. * size_t len)
  547. */
  548. ENTRY(aesni_ecb_enc)
  549. test LEN, LEN # check length
  550. jz .Lecb_enc_ret
  551. mov 480(KEYP), KLEN
  552. cmp $16, LEN
  553. jb .Lecb_enc_ret
  554. cmp $64, LEN
  555. jb .Lecb_enc_loop1
  556. .align 4
  557. .Lecb_enc_loop4:
  558. movups (INP), STATE1
  559. movups 0x10(INP), STATE2
  560. movups 0x20(INP), STATE3
  561. movups 0x30(INP), STATE4
  562. call _aesni_enc4
  563. movups STATE1, (OUTP)
  564. movups STATE2, 0x10(OUTP)
  565. movups STATE3, 0x20(OUTP)
  566. movups STATE4, 0x30(OUTP)
  567. sub $64, LEN
  568. add $64, INP
  569. add $64, OUTP
  570. cmp $64, LEN
  571. jge .Lecb_enc_loop4
  572. cmp $16, LEN
  573. jb .Lecb_enc_ret
  574. .align 4
  575. .Lecb_enc_loop1:
  576. movups (INP), STATE1
  577. call _aesni_enc1
  578. movups STATE1, (OUTP)
  579. sub $16, LEN
  580. add $16, INP
  581. add $16, OUTP
  582. cmp $16, LEN
  583. jge .Lecb_enc_loop1
  584. .Lecb_enc_ret:
  585. ret
  586. /*
  587. * void aesni_ecb_dec(struct crypto_aes_ctx *ctx, const u8 *dst, u8 *src,
  588. * size_t len);
  589. */
  590. ENTRY(aesni_ecb_dec)
  591. test LEN, LEN
  592. jz .Lecb_dec_ret
  593. mov 480(KEYP), KLEN
  594. add $240, KEYP
  595. cmp $16, LEN
  596. jb .Lecb_dec_ret
  597. cmp $64, LEN
  598. jb .Lecb_dec_loop1
  599. .align 4
  600. .Lecb_dec_loop4:
  601. movups (INP), STATE1
  602. movups 0x10(INP), STATE2
  603. movups 0x20(INP), STATE3
  604. movups 0x30(INP), STATE4
  605. call _aesni_dec4
  606. movups STATE1, (OUTP)
  607. movups STATE2, 0x10(OUTP)
  608. movups STATE3, 0x20(OUTP)
  609. movups STATE4, 0x30(OUTP)
  610. sub $64, LEN
  611. add $64, INP
  612. add $64, OUTP
  613. cmp $64, LEN
  614. jge .Lecb_dec_loop4
  615. cmp $16, LEN
  616. jb .Lecb_dec_ret
  617. .align 4
  618. .Lecb_dec_loop1:
  619. movups (INP), STATE1
  620. call _aesni_dec1
  621. movups STATE1, (OUTP)
  622. sub $16, LEN
  623. add $16, INP
  624. add $16, OUTP
  625. cmp $16, LEN
  626. jge .Lecb_dec_loop1
  627. .Lecb_dec_ret:
  628. ret
  629. /*
  630. * void aesni_cbc_enc(struct crypto_aes_ctx *ctx, const u8 *dst, u8 *src,
  631. * size_t len, u8 *iv)
  632. */
  633. ENTRY(aesni_cbc_enc)
  634. cmp $16, LEN
  635. jb .Lcbc_enc_ret
  636. mov 480(KEYP), KLEN
  637. movups (IVP), STATE # load iv as initial state
  638. .align 4
  639. .Lcbc_enc_loop:
  640. movups (INP), IN # load input
  641. pxor IN, STATE
  642. call _aesni_enc1
  643. movups STATE, (OUTP) # store output
  644. sub $16, LEN
  645. add $16, INP
  646. add $16, OUTP
  647. cmp $16, LEN
  648. jge .Lcbc_enc_loop
  649. movups STATE, (IVP)
  650. .Lcbc_enc_ret:
  651. ret
  652. /*
  653. * void aesni_cbc_dec(struct crypto_aes_ctx *ctx, const u8 *dst, u8 *src,
  654. * size_t len, u8 *iv)
  655. */
  656. ENTRY(aesni_cbc_dec)
  657. cmp $16, LEN
  658. jb .Lcbc_dec_just_ret
  659. mov 480(KEYP), KLEN
  660. add $240, KEYP
  661. movups (IVP), IV
  662. cmp $64, LEN
  663. jb .Lcbc_dec_loop1
  664. .align 4
  665. .Lcbc_dec_loop4:
  666. movups (INP), IN1
  667. movaps IN1, STATE1
  668. movups 0x10(INP), IN2
  669. movaps IN2, STATE2
  670. movups 0x20(INP), IN3
  671. movaps IN3, STATE3
  672. movups 0x30(INP), IN4
  673. movaps IN4, STATE4
  674. call _aesni_dec4
  675. pxor IV, STATE1
  676. pxor IN1, STATE2
  677. pxor IN2, STATE3
  678. pxor IN3, STATE4
  679. movaps IN4, IV
  680. movups STATE1, (OUTP)
  681. movups STATE2, 0x10(OUTP)
  682. movups STATE3, 0x20(OUTP)
  683. movups STATE4, 0x30(OUTP)
  684. sub $64, LEN
  685. add $64, INP
  686. add $64, OUTP
  687. cmp $64, LEN
  688. jge .Lcbc_dec_loop4
  689. cmp $16, LEN
  690. jb .Lcbc_dec_ret
  691. .align 4
  692. .Lcbc_dec_loop1:
  693. movups (INP), IN
  694. movaps IN, STATE
  695. call _aesni_dec1
  696. pxor IV, STATE
  697. movups STATE, (OUTP)
  698. movaps IN, IV
  699. sub $16, LEN
  700. add $16, INP
  701. add $16, OUTP
  702. cmp $16, LEN
  703. jge .Lcbc_dec_loop1
  704. .Lcbc_dec_ret:
  705. movups IV, (IVP)
  706. .Lcbc_dec_just_ret:
  707. ret
  708. .align 16
  709. .Lbswap_mask:
  710. .byte 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0
  711. /*
  712. * _aesni_inc_init: internal ABI
  713. * setup registers used by _aesni_inc
  714. * input:
  715. * IV
  716. * output:
  717. * CTR: == IV, in little endian
  718. * TCTR_LOW: == lower qword of CTR
  719. * INC: == 1, in little endian
  720. * BSWAP_MASK == endian swapping mask
  721. */
  722. _aesni_inc_init:
  723. movaps .Lbswap_mask, BSWAP_MASK
  724. movaps IV, CTR
  725. PSHUFB_XMM BSWAP_MASK CTR
  726. mov $1, TCTR_LOW
  727. MOVQ_R64_XMM TCTR_LOW INC
  728. MOVQ_R64_XMM CTR TCTR_LOW
  729. ret
  730. /*
  731. * _aesni_inc: internal ABI
  732. * Increase IV by 1, IV is in big endian
  733. * input:
  734. * IV
  735. * CTR: == IV, in little endian
  736. * TCTR_LOW: == lower qword of CTR
  737. * INC: == 1, in little endian
  738. * BSWAP_MASK == endian swapping mask
  739. * output:
  740. * IV: Increase by 1
  741. * changed:
  742. * CTR: == output IV, in little endian
  743. * TCTR_LOW: == lower qword of CTR
  744. */
  745. _aesni_inc:
  746. paddq INC, CTR
  747. add $1, TCTR_LOW
  748. jnc .Linc_low
  749. pslldq $8, INC
  750. paddq INC, CTR
  751. psrldq $8, INC
  752. .Linc_low:
  753. movaps CTR, IV
  754. PSHUFB_XMM BSWAP_MASK IV
  755. ret
  756. /*
  757. * void aesni_ctr_enc(struct crypto_aes_ctx *ctx, const u8 *dst, u8 *src,
  758. * size_t len, u8 *iv)
  759. */
  760. ENTRY(aesni_ctr_enc)
  761. cmp $16, LEN
  762. jb .Lctr_enc_just_ret
  763. mov 480(KEYP), KLEN
  764. movups (IVP), IV
  765. call _aesni_inc_init
  766. cmp $64, LEN
  767. jb .Lctr_enc_loop1
  768. .align 4
  769. .Lctr_enc_loop4:
  770. movaps IV, STATE1
  771. call _aesni_inc
  772. movups (INP), IN1
  773. movaps IV, STATE2
  774. call _aesni_inc
  775. movups 0x10(INP), IN2
  776. movaps IV, STATE3
  777. call _aesni_inc
  778. movups 0x20(INP), IN3
  779. movaps IV, STATE4
  780. call _aesni_inc
  781. movups 0x30(INP), IN4
  782. call _aesni_enc4
  783. pxor IN1, STATE1
  784. movups STATE1, (OUTP)
  785. pxor IN2, STATE2
  786. movups STATE2, 0x10(OUTP)
  787. pxor IN3, STATE3
  788. movups STATE3, 0x20(OUTP)
  789. pxor IN4, STATE4
  790. movups STATE4, 0x30(OUTP)
  791. sub $64, LEN
  792. add $64, INP
  793. add $64, OUTP
  794. cmp $64, LEN
  795. jge .Lctr_enc_loop4
  796. cmp $16, LEN
  797. jb .Lctr_enc_ret
  798. .align 4
  799. .Lctr_enc_loop1:
  800. movaps IV, STATE
  801. call _aesni_inc
  802. movups (INP), IN
  803. call _aesni_enc1
  804. pxor IN, STATE
  805. movups STATE, (OUTP)
  806. sub $16, LEN
  807. add $16, INP
  808. add $16, OUTP
  809. cmp $16, LEN
  810. jge .Lctr_enc_loop1
  811. .Lctr_enc_ret:
  812. movups IV, (IVP)
  813. .Lctr_enc_just_ret:
  814. ret