aesni-intel_asm.S 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726
  1. /*
  2. * Implement AES algorithm in Intel AES-NI instructions.
  3. *
  4. * The white paper of AES-NI instructions can be downloaded from:
  5. * http://softwarecommunity.intel.com/isn/downloads/intelavx/AES-Instructions-Set_WP.pdf
  6. *
  7. * Copyright (C) 2008, Intel Corp.
  8. * Author: Huang Ying <ying.huang@intel.com>
  9. * Vinodh Gopal <vinodh.gopal@intel.com>
  10. * Kahraman Akdemir
  11. *
  12. * This program is free software; you can redistribute it and/or modify
  13. * it under the terms of the GNU General Public License as published by
  14. * the Free Software Foundation; either version 2 of the License, or
  15. * (at your option) any later version.
  16. */
  17. #include <linux/linkage.h>
  18. #include <asm/inst.h>
  19. .text
  20. #define STATE1 %xmm0
  21. #define STATE2 %xmm4
  22. #define STATE3 %xmm5
  23. #define STATE4 %xmm6
  24. #define STATE STATE1
  25. #define IN1 %xmm1
  26. #define IN2 %xmm7
  27. #define IN3 %xmm8
  28. #define IN4 %xmm9
  29. #define IN IN1
  30. #define KEY %xmm2
  31. #define IV %xmm3
  32. #define KEYP %rdi
  33. #define OUTP %rsi
  34. #define INP %rdx
  35. #define LEN %rcx
  36. #define IVP %r8
  37. #define KLEN %r9d
  38. #define T1 %r10
  39. #define TKEYP T1
  40. #define T2 %r11
  41. _key_expansion_128:
  42. _key_expansion_256a:
  43. pshufd $0b11111111, %xmm1, %xmm1
  44. shufps $0b00010000, %xmm0, %xmm4
  45. pxor %xmm4, %xmm0
  46. shufps $0b10001100, %xmm0, %xmm4
  47. pxor %xmm4, %xmm0
  48. pxor %xmm1, %xmm0
  49. movaps %xmm0, (%rcx)
  50. add $0x10, %rcx
  51. ret
  52. _key_expansion_192a:
  53. pshufd $0b01010101, %xmm1, %xmm1
  54. shufps $0b00010000, %xmm0, %xmm4
  55. pxor %xmm4, %xmm0
  56. shufps $0b10001100, %xmm0, %xmm4
  57. pxor %xmm4, %xmm0
  58. pxor %xmm1, %xmm0
  59. movaps %xmm2, %xmm5
  60. movaps %xmm2, %xmm6
  61. pslldq $4, %xmm5
  62. pshufd $0b11111111, %xmm0, %xmm3
  63. pxor %xmm3, %xmm2
  64. pxor %xmm5, %xmm2
  65. movaps %xmm0, %xmm1
  66. shufps $0b01000100, %xmm0, %xmm6
  67. movaps %xmm6, (%rcx)
  68. shufps $0b01001110, %xmm2, %xmm1
  69. movaps %xmm1, 16(%rcx)
  70. add $0x20, %rcx
  71. ret
  72. _key_expansion_192b:
  73. pshufd $0b01010101, %xmm1, %xmm1
  74. shufps $0b00010000, %xmm0, %xmm4
  75. pxor %xmm4, %xmm0
  76. shufps $0b10001100, %xmm0, %xmm4
  77. pxor %xmm4, %xmm0
  78. pxor %xmm1, %xmm0
  79. movaps %xmm2, %xmm5
  80. pslldq $4, %xmm5
  81. pshufd $0b11111111, %xmm0, %xmm3
  82. pxor %xmm3, %xmm2
  83. pxor %xmm5, %xmm2
  84. movaps %xmm0, (%rcx)
  85. add $0x10, %rcx
  86. ret
  87. _key_expansion_256b:
  88. pshufd $0b10101010, %xmm1, %xmm1
  89. shufps $0b00010000, %xmm2, %xmm4
  90. pxor %xmm4, %xmm2
  91. shufps $0b10001100, %xmm2, %xmm4
  92. pxor %xmm4, %xmm2
  93. pxor %xmm1, %xmm2
  94. movaps %xmm2, (%rcx)
  95. add $0x10, %rcx
  96. ret
  97. /*
  98. * int aesni_set_key(struct crypto_aes_ctx *ctx, const u8 *in_key,
  99. * unsigned int key_len)
  100. */
  101. ENTRY(aesni_set_key)
  102. movups (%rsi), %xmm0 # user key (first 16 bytes)
  103. movaps %xmm0, (%rdi)
  104. lea 0x10(%rdi), %rcx # key addr
  105. movl %edx, 480(%rdi)
  106. pxor %xmm4, %xmm4 # xmm4 is assumed 0 in _key_expansion_x
  107. cmp $24, %dl
  108. jb .Lenc_key128
  109. je .Lenc_key192
  110. movups 0x10(%rsi), %xmm2 # other user key
  111. movaps %xmm2, (%rcx)
  112. add $0x10, %rcx
  113. AESKEYGENASSIST 0x1 %xmm2 %xmm1 # round 1
  114. call _key_expansion_256a
  115. AESKEYGENASSIST 0x1 %xmm0 %xmm1
  116. call _key_expansion_256b
  117. AESKEYGENASSIST 0x2 %xmm2 %xmm1 # round 2
  118. call _key_expansion_256a
  119. AESKEYGENASSIST 0x2 %xmm0 %xmm1
  120. call _key_expansion_256b
  121. AESKEYGENASSIST 0x4 %xmm2 %xmm1 # round 3
  122. call _key_expansion_256a
  123. AESKEYGENASSIST 0x4 %xmm0 %xmm1
  124. call _key_expansion_256b
  125. AESKEYGENASSIST 0x8 %xmm2 %xmm1 # round 4
  126. call _key_expansion_256a
  127. AESKEYGENASSIST 0x8 %xmm0 %xmm1
  128. call _key_expansion_256b
  129. AESKEYGENASSIST 0x10 %xmm2 %xmm1 # round 5
  130. call _key_expansion_256a
  131. AESKEYGENASSIST 0x10 %xmm0 %xmm1
  132. call _key_expansion_256b
  133. AESKEYGENASSIST 0x20 %xmm2 %xmm1 # round 6
  134. call _key_expansion_256a
  135. AESKEYGENASSIST 0x20 %xmm0 %xmm1
  136. call _key_expansion_256b
  137. AESKEYGENASSIST 0x40 %xmm2 %xmm1 # round 7
  138. call _key_expansion_256a
  139. jmp .Ldec_key
  140. .Lenc_key192:
  141. movq 0x10(%rsi), %xmm2 # other user key
  142. AESKEYGENASSIST 0x1 %xmm2 %xmm1 # round 1
  143. call _key_expansion_192a
  144. AESKEYGENASSIST 0x2 %xmm2 %xmm1 # round 2
  145. call _key_expansion_192b
  146. AESKEYGENASSIST 0x4 %xmm2 %xmm1 # round 3
  147. call _key_expansion_192a
  148. AESKEYGENASSIST 0x8 %xmm2 %xmm1 # round 4
  149. call _key_expansion_192b
  150. AESKEYGENASSIST 0x10 %xmm2 %xmm1 # round 5
  151. call _key_expansion_192a
  152. AESKEYGENASSIST 0x20 %xmm2 %xmm1 # round 6
  153. call _key_expansion_192b
  154. AESKEYGENASSIST 0x40 %xmm2 %xmm1 # round 7
  155. call _key_expansion_192a
  156. AESKEYGENASSIST 0x80 %xmm2 %xmm1 # round 8
  157. call _key_expansion_192b
  158. jmp .Ldec_key
  159. .Lenc_key128:
  160. AESKEYGENASSIST 0x1 %xmm0 %xmm1 # round 1
  161. call _key_expansion_128
  162. AESKEYGENASSIST 0x2 %xmm0 %xmm1 # round 2
  163. call _key_expansion_128
  164. AESKEYGENASSIST 0x4 %xmm0 %xmm1 # round 3
  165. call _key_expansion_128
  166. AESKEYGENASSIST 0x8 %xmm0 %xmm1 # round 4
  167. call _key_expansion_128
  168. AESKEYGENASSIST 0x10 %xmm0 %xmm1 # round 5
  169. call _key_expansion_128
  170. AESKEYGENASSIST 0x20 %xmm0 %xmm1 # round 6
  171. call _key_expansion_128
  172. AESKEYGENASSIST 0x40 %xmm0 %xmm1 # round 7
  173. call _key_expansion_128
  174. AESKEYGENASSIST 0x80 %xmm0 %xmm1 # round 8
  175. call _key_expansion_128
  176. AESKEYGENASSIST 0x1b %xmm0 %xmm1 # round 9
  177. call _key_expansion_128
  178. AESKEYGENASSIST 0x36 %xmm0 %xmm1 # round 10
  179. call _key_expansion_128
  180. .Ldec_key:
  181. sub $0x10, %rcx
  182. movaps (%rdi), %xmm0
  183. movaps (%rcx), %xmm1
  184. movaps %xmm0, 240(%rcx)
  185. movaps %xmm1, 240(%rdi)
  186. add $0x10, %rdi
  187. lea 240-16(%rcx), %rsi
  188. .align 4
  189. .Ldec_key_loop:
  190. movaps (%rdi), %xmm0
  191. AESIMC %xmm0 %xmm1
  192. movaps %xmm1, (%rsi)
  193. add $0x10, %rdi
  194. sub $0x10, %rsi
  195. cmp %rcx, %rdi
  196. jb .Ldec_key_loop
  197. xor %rax, %rax
  198. ret
  199. /*
  200. * void aesni_enc(struct crypto_aes_ctx *ctx, u8 *dst, const u8 *src)
  201. */
  202. ENTRY(aesni_enc)
  203. movl 480(KEYP), KLEN # key length
  204. movups (INP), STATE # input
  205. call _aesni_enc1
  206. movups STATE, (OUTP) # output
  207. ret
  208. /*
  209. * _aesni_enc1: internal ABI
  210. * input:
  211. * KEYP: key struct pointer
  212. * KLEN: round count
  213. * STATE: initial state (input)
  214. * output:
  215. * STATE: finial state (output)
  216. * changed:
  217. * KEY
  218. * TKEYP (T1)
  219. */
  220. _aesni_enc1:
  221. movaps (KEYP), KEY # key
  222. mov KEYP, TKEYP
  223. pxor KEY, STATE # round 0
  224. add $0x30, TKEYP
  225. cmp $24, KLEN
  226. jb .Lenc128
  227. lea 0x20(TKEYP), TKEYP
  228. je .Lenc192
  229. add $0x20, TKEYP
  230. movaps -0x60(TKEYP), KEY
  231. AESENC KEY STATE
  232. movaps -0x50(TKEYP), KEY
  233. AESENC KEY STATE
  234. .align 4
  235. .Lenc192:
  236. movaps -0x40(TKEYP), KEY
  237. AESENC KEY STATE
  238. movaps -0x30(TKEYP), KEY
  239. AESENC KEY STATE
  240. .align 4
  241. .Lenc128:
  242. movaps -0x20(TKEYP), KEY
  243. AESENC KEY STATE
  244. movaps -0x10(TKEYP), KEY
  245. AESENC KEY STATE
  246. movaps (TKEYP), KEY
  247. AESENC KEY STATE
  248. movaps 0x10(TKEYP), KEY
  249. AESENC KEY STATE
  250. movaps 0x20(TKEYP), KEY
  251. AESENC KEY STATE
  252. movaps 0x30(TKEYP), KEY
  253. AESENC KEY STATE
  254. movaps 0x40(TKEYP), KEY
  255. AESENC KEY STATE
  256. movaps 0x50(TKEYP), KEY
  257. AESENC KEY STATE
  258. movaps 0x60(TKEYP), KEY
  259. AESENC KEY STATE
  260. movaps 0x70(TKEYP), KEY
  261. AESENCLAST KEY STATE
  262. ret
  263. /*
  264. * _aesni_enc4: internal ABI
  265. * input:
  266. * KEYP: key struct pointer
  267. * KLEN: round count
  268. * STATE1: initial state (input)
  269. * STATE2
  270. * STATE3
  271. * STATE4
  272. * output:
  273. * STATE1: finial state (output)
  274. * STATE2
  275. * STATE3
  276. * STATE4
  277. * changed:
  278. * KEY
  279. * TKEYP (T1)
  280. */
  281. _aesni_enc4:
  282. movaps (KEYP), KEY # key
  283. mov KEYP, TKEYP
  284. pxor KEY, STATE1 # round 0
  285. pxor KEY, STATE2
  286. pxor KEY, STATE3
  287. pxor KEY, STATE4
  288. add $0x30, TKEYP
  289. cmp $24, KLEN
  290. jb .L4enc128
  291. lea 0x20(TKEYP), TKEYP
  292. je .L4enc192
  293. add $0x20, TKEYP
  294. movaps -0x60(TKEYP), KEY
  295. AESENC KEY STATE1
  296. AESENC KEY STATE2
  297. AESENC KEY STATE3
  298. AESENC KEY STATE4
  299. movaps -0x50(TKEYP), KEY
  300. AESENC KEY STATE1
  301. AESENC KEY STATE2
  302. AESENC KEY STATE3
  303. AESENC KEY STATE4
  304. #.align 4
  305. .L4enc192:
  306. movaps -0x40(TKEYP), KEY
  307. AESENC KEY STATE1
  308. AESENC KEY STATE2
  309. AESENC KEY STATE3
  310. AESENC KEY STATE4
  311. movaps -0x30(TKEYP), KEY
  312. AESENC KEY STATE1
  313. AESENC KEY STATE2
  314. AESENC KEY STATE3
  315. AESENC KEY STATE4
  316. #.align 4
  317. .L4enc128:
  318. movaps -0x20(TKEYP), KEY
  319. AESENC KEY STATE1
  320. AESENC KEY STATE2
  321. AESENC KEY STATE3
  322. AESENC KEY STATE4
  323. movaps -0x10(TKEYP), KEY
  324. AESENC KEY STATE1
  325. AESENC KEY STATE2
  326. AESENC KEY STATE3
  327. AESENC KEY STATE4
  328. movaps (TKEYP), KEY
  329. AESENC KEY STATE1
  330. AESENC KEY STATE2
  331. AESENC KEY STATE3
  332. AESENC KEY STATE4
  333. movaps 0x10(TKEYP), KEY
  334. AESENC KEY STATE1
  335. AESENC KEY STATE2
  336. AESENC KEY STATE3
  337. AESENC KEY STATE4
  338. movaps 0x20(TKEYP), KEY
  339. AESENC KEY STATE1
  340. AESENC KEY STATE2
  341. AESENC KEY STATE3
  342. AESENC KEY STATE4
  343. movaps 0x30(TKEYP), KEY
  344. AESENC KEY STATE1
  345. AESENC KEY STATE2
  346. AESENC KEY STATE3
  347. AESENC KEY STATE4
  348. movaps 0x40(TKEYP), KEY
  349. AESENC KEY STATE1
  350. AESENC KEY STATE2
  351. AESENC KEY STATE3
  352. AESENC KEY STATE4
  353. movaps 0x50(TKEYP), KEY
  354. AESENC KEY STATE1
  355. AESENC KEY STATE2
  356. AESENC KEY STATE3
  357. AESENC KEY STATE4
  358. movaps 0x60(TKEYP), KEY
  359. AESENC KEY STATE1
  360. AESENC KEY STATE2
  361. AESENC KEY STATE3
  362. AESENC KEY STATE4
  363. movaps 0x70(TKEYP), KEY
  364. AESENCLAST KEY STATE1 # last round
  365. AESENCLAST KEY STATE2
  366. AESENCLAST KEY STATE3
  367. AESENCLAST KEY STATE4
  368. ret
  369. /*
  370. * void aesni_dec (struct crypto_aes_ctx *ctx, u8 *dst, const u8 *src)
  371. */
  372. ENTRY(aesni_dec)
  373. mov 480(KEYP), KLEN # key length
  374. add $240, KEYP
  375. movups (INP), STATE # input
  376. call _aesni_dec1
  377. movups STATE, (OUTP) #output
  378. ret
  379. /*
  380. * _aesni_dec1: internal ABI
  381. * input:
  382. * KEYP: key struct pointer
  383. * KLEN: key length
  384. * STATE: initial state (input)
  385. * output:
  386. * STATE: finial state (output)
  387. * changed:
  388. * KEY
  389. * TKEYP (T1)
  390. */
  391. _aesni_dec1:
  392. movaps (KEYP), KEY # key
  393. mov KEYP, TKEYP
  394. pxor KEY, STATE # round 0
  395. add $0x30, TKEYP
  396. cmp $24, KLEN
  397. jb .Ldec128
  398. lea 0x20(TKEYP), TKEYP
  399. je .Ldec192
  400. add $0x20, TKEYP
  401. movaps -0x60(TKEYP), KEY
  402. AESDEC KEY STATE
  403. movaps -0x50(TKEYP), KEY
  404. AESDEC KEY STATE
  405. .align 4
  406. .Ldec192:
  407. movaps -0x40(TKEYP), KEY
  408. AESDEC KEY STATE
  409. movaps -0x30(TKEYP), KEY
  410. AESDEC KEY STATE
  411. .align 4
  412. .Ldec128:
  413. movaps -0x20(TKEYP), KEY
  414. AESDEC KEY STATE
  415. movaps -0x10(TKEYP), KEY
  416. AESDEC KEY STATE
  417. movaps (TKEYP), KEY
  418. AESDEC KEY STATE
  419. movaps 0x10(TKEYP), KEY
  420. AESDEC KEY STATE
  421. movaps 0x20(TKEYP), KEY
  422. AESDEC KEY STATE
  423. movaps 0x30(TKEYP), KEY
  424. AESDEC KEY STATE
  425. movaps 0x40(TKEYP), KEY
  426. AESDEC KEY STATE
  427. movaps 0x50(TKEYP), KEY
  428. AESDEC KEY STATE
  429. movaps 0x60(TKEYP), KEY
  430. AESDEC KEY STATE
  431. movaps 0x70(TKEYP), KEY
  432. AESDECLAST KEY STATE
  433. ret
  434. /*
  435. * _aesni_dec4: internal ABI
  436. * input:
  437. * KEYP: key struct pointer
  438. * KLEN: key length
  439. * STATE1: initial state (input)
  440. * STATE2
  441. * STATE3
  442. * STATE4
  443. * output:
  444. * STATE1: finial state (output)
  445. * STATE2
  446. * STATE3
  447. * STATE4
  448. * changed:
  449. * KEY
  450. * TKEYP (T1)
  451. */
  452. _aesni_dec4:
  453. movaps (KEYP), KEY # key
  454. mov KEYP, TKEYP
  455. pxor KEY, STATE1 # round 0
  456. pxor KEY, STATE2
  457. pxor KEY, STATE3
  458. pxor KEY, STATE4
  459. add $0x30, TKEYP
  460. cmp $24, KLEN
  461. jb .L4dec128
  462. lea 0x20(TKEYP), TKEYP
  463. je .L4dec192
  464. add $0x20, TKEYP
  465. movaps -0x60(TKEYP), KEY
  466. AESDEC KEY STATE1
  467. AESDEC KEY STATE2
  468. AESDEC KEY STATE3
  469. AESDEC KEY STATE4
  470. movaps -0x50(TKEYP), KEY
  471. AESDEC KEY STATE1
  472. AESDEC KEY STATE2
  473. AESDEC KEY STATE3
  474. AESDEC KEY STATE4
  475. .align 4
  476. .L4dec192:
  477. movaps -0x40(TKEYP), KEY
  478. AESDEC KEY STATE1
  479. AESDEC KEY STATE2
  480. AESDEC KEY STATE3
  481. AESDEC KEY STATE4
  482. movaps -0x30(TKEYP), KEY
  483. AESDEC KEY STATE1
  484. AESDEC KEY STATE2
  485. AESDEC KEY STATE3
  486. AESDEC KEY STATE4
  487. .align 4
  488. .L4dec128:
  489. movaps -0x20(TKEYP), KEY
  490. AESDEC KEY STATE1
  491. AESDEC KEY STATE2
  492. AESDEC KEY STATE3
  493. AESDEC KEY STATE4
  494. movaps -0x10(TKEYP), KEY
  495. AESDEC KEY STATE1
  496. AESDEC KEY STATE2
  497. AESDEC KEY STATE3
  498. AESDEC KEY STATE4
  499. movaps (TKEYP), KEY
  500. AESDEC KEY STATE1
  501. AESDEC KEY STATE2
  502. AESDEC KEY STATE3
  503. AESDEC KEY STATE4
  504. movaps 0x10(TKEYP), KEY
  505. AESDEC KEY STATE1
  506. AESDEC KEY STATE2
  507. AESDEC KEY STATE3
  508. AESDEC KEY STATE4
  509. movaps 0x20(TKEYP), KEY
  510. AESDEC KEY STATE1
  511. AESDEC KEY STATE2
  512. AESDEC KEY STATE3
  513. AESDEC KEY STATE4
  514. movaps 0x30(TKEYP), KEY
  515. AESDEC KEY STATE1
  516. AESDEC KEY STATE2
  517. AESDEC KEY STATE3
  518. AESDEC KEY STATE4
  519. movaps 0x40(TKEYP), KEY
  520. AESDEC KEY STATE1
  521. AESDEC KEY STATE2
  522. AESDEC KEY STATE3
  523. AESDEC KEY STATE4
  524. movaps 0x50(TKEYP), KEY
  525. AESDEC KEY STATE1
  526. AESDEC KEY STATE2
  527. AESDEC KEY STATE3
  528. AESDEC KEY STATE4
  529. movaps 0x60(TKEYP), KEY
  530. AESDEC KEY STATE1
  531. AESDEC KEY STATE2
  532. AESDEC KEY STATE3
  533. AESDEC KEY STATE4
  534. movaps 0x70(TKEYP), KEY
  535. AESDECLAST KEY STATE1 # last round
  536. AESDECLAST KEY STATE2
  537. AESDECLAST KEY STATE3
  538. AESDECLAST KEY STATE4
  539. ret
  540. /*
  541. * void aesni_ecb_enc(struct crypto_aes_ctx *ctx, const u8 *dst, u8 *src,
  542. * size_t len)
  543. */
  544. ENTRY(aesni_ecb_enc)
  545. test LEN, LEN # check length
  546. jz .Lecb_enc_ret
  547. mov 480(KEYP), KLEN
  548. cmp $16, LEN
  549. jb .Lecb_enc_ret
  550. cmp $64, LEN
  551. jb .Lecb_enc_loop1
  552. .align 4
  553. .Lecb_enc_loop4:
  554. movups (INP), STATE1
  555. movups 0x10(INP), STATE2
  556. movups 0x20(INP), STATE3
  557. movups 0x30(INP), STATE4
  558. call _aesni_enc4
  559. movups STATE1, (OUTP)
  560. movups STATE2, 0x10(OUTP)
  561. movups STATE3, 0x20(OUTP)
  562. movups STATE4, 0x30(OUTP)
  563. sub $64, LEN
  564. add $64, INP
  565. add $64, OUTP
  566. cmp $64, LEN
  567. jge .Lecb_enc_loop4
  568. cmp $16, LEN
  569. jb .Lecb_enc_ret
  570. .align 4
  571. .Lecb_enc_loop1:
  572. movups (INP), STATE1
  573. call _aesni_enc1
  574. movups STATE1, (OUTP)
  575. sub $16, LEN
  576. add $16, INP
  577. add $16, OUTP
  578. cmp $16, LEN
  579. jge .Lecb_enc_loop1
  580. .Lecb_enc_ret:
  581. ret
  582. /*
  583. * void aesni_ecb_dec(struct crypto_aes_ctx *ctx, const u8 *dst, u8 *src,
  584. * size_t len);
  585. */
  586. ENTRY(aesni_ecb_dec)
  587. test LEN, LEN
  588. jz .Lecb_dec_ret
  589. mov 480(KEYP), KLEN
  590. add $240, KEYP
  591. cmp $16, LEN
  592. jb .Lecb_dec_ret
  593. cmp $64, LEN
  594. jb .Lecb_dec_loop1
  595. .align 4
  596. .Lecb_dec_loop4:
  597. movups (INP), STATE1
  598. movups 0x10(INP), STATE2
  599. movups 0x20(INP), STATE3
  600. movups 0x30(INP), STATE4
  601. call _aesni_dec4
  602. movups STATE1, (OUTP)
  603. movups STATE2, 0x10(OUTP)
  604. movups STATE3, 0x20(OUTP)
  605. movups STATE4, 0x30(OUTP)
  606. sub $64, LEN
  607. add $64, INP
  608. add $64, OUTP
  609. cmp $64, LEN
  610. jge .Lecb_dec_loop4
  611. cmp $16, LEN
  612. jb .Lecb_dec_ret
  613. .align 4
  614. .Lecb_dec_loop1:
  615. movups (INP), STATE1
  616. call _aesni_dec1
  617. movups STATE1, (OUTP)
  618. sub $16, LEN
  619. add $16, INP
  620. add $16, OUTP
  621. cmp $16, LEN
  622. jge .Lecb_dec_loop1
  623. .Lecb_dec_ret:
  624. ret
  625. /*
  626. * void aesni_cbc_enc(struct crypto_aes_ctx *ctx, const u8 *dst, u8 *src,
  627. * size_t len, u8 *iv)
  628. */
  629. ENTRY(aesni_cbc_enc)
  630. cmp $16, LEN
  631. jb .Lcbc_enc_ret
  632. mov 480(KEYP), KLEN
  633. movups (IVP), STATE # load iv as initial state
  634. .align 4
  635. .Lcbc_enc_loop:
  636. movups (INP), IN # load input
  637. pxor IN, STATE
  638. call _aesni_enc1
  639. movups STATE, (OUTP) # store output
  640. sub $16, LEN
  641. add $16, INP
  642. add $16, OUTP
  643. cmp $16, LEN
  644. jge .Lcbc_enc_loop
  645. movups STATE, (IVP)
  646. .Lcbc_enc_ret:
  647. ret
  648. /*
  649. * void aesni_cbc_dec(struct crypto_aes_ctx *ctx, const u8 *dst, u8 *src,
  650. * size_t len, u8 *iv)
  651. */
  652. ENTRY(aesni_cbc_dec)
  653. cmp $16, LEN
  654. jb .Lcbc_dec_just_ret
  655. mov 480(KEYP), KLEN
  656. add $240, KEYP
  657. movups (IVP), IV
  658. cmp $64, LEN
  659. jb .Lcbc_dec_loop1
  660. .align 4
  661. .Lcbc_dec_loop4:
  662. movups (INP), IN1
  663. movaps IN1, STATE1
  664. movups 0x10(INP), IN2
  665. movaps IN2, STATE2
  666. movups 0x20(INP), IN3
  667. movaps IN3, STATE3
  668. movups 0x30(INP), IN4
  669. movaps IN4, STATE4
  670. call _aesni_dec4
  671. pxor IV, STATE1
  672. pxor IN1, STATE2
  673. pxor IN2, STATE3
  674. pxor IN3, STATE4
  675. movaps IN4, IV
  676. movups STATE1, (OUTP)
  677. movups STATE2, 0x10(OUTP)
  678. movups STATE3, 0x20(OUTP)
  679. movups STATE4, 0x30(OUTP)
  680. sub $64, LEN
  681. add $64, INP
  682. add $64, OUTP
  683. cmp $64, LEN
  684. jge .Lcbc_dec_loop4
  685. cmp $16, LEN
  686. jb .Lcbc_dec_ret
  687. .align 4
  688. .Lcbc_dec_loop1:
  689. movups (INP), IN
  690. movaps IN, STATE
  691. call _aesni_dec1
  692. pxor IV, STATE
  693. movups STATE, (OUTP)
  694. movaps IN, IV
  695. sub $16, LEN
  696. add $16, INP
  697. add $16, OUTP
  698. cmp $16, LEN
  699. jge .Lcbc_dec_loop1
  700. .Lcbc_dec_ret:
  701. movups IV, (IVP)
  702. .Lcbc_dec_just_ret:
  703. ret