salsa20-i586-asm_32.S 20 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114
  1. # salsa20_pm.s version 20051229
  2. # D. J. Bernstein
  3. # Public domain.
  4. # enter ECRYPT_encrypt_bytes
  5. .text
  6. .p2align 5
  7. .globl ECRYPT_encrypt_bytes
  8. ECRYPT_encrypt_bytes:
  9. mov %esp,%eax
  10. and $31,%eax
  11. add $256,%eax
  12. sub %eax,%esp
  13. # eax_stack = eax
  14. movl %eax,80(%esp)
  15. # ebx_stack = ebx
  16. movl %ebx,84(%esp)
  17. # esi_stack = esi
  18. movl %esi,88(%esp)
  19. # edi_stack = edi
  20. movl %edi,92(%esp)
  21. # ebp_stack = ebp
  22. movl %ebp,96(%esp)
  23. # x = arg1
  24. movl 4(%esp,%eax),%edx
  25. # m = arg2
  26. movl 8(%esp,%eax),%esi
  27. # out = arg3
  28. movl 12(%esp,%eax),%edi
  29. # bytes = arg4
  30. movl 16(%esp,%eax),%ebx
  31. # bytes -= 0
  32. sub $0,%ebx
  33. # goto done if unsigned<=
  34. jbe ._done
  35. ._start:
  36. # in0 = *(uint32 *) (x + 0)
  37. movl 0(%edx),%eax
  38. # in1 = *(uint32 *) (x + 4)
  39. movl 4(%edx),%ecx
  40. # in2 = *(uint32 *) (x + 8)
  41. movl 8(%edx),%ebp
  42. # j0 = in0
  43. movl %eax,164(%esp)
  44. # in3 = *(uint32 *) (x + 12)
  45. movl 12(%edx),%eax
  46. # j1 = in1
  47. movl %ecx,168(%esp)
  48. # in4 = *(uint32 *) (x + 16)
  49. movl 16(%edx),%ecx
  50. # j2 = in2
  51. movl %ebp,172(%esp)
  52. # in5 = *(uint32 *) (x + 20)
  53. movl 20(%edx),%ebp
  54. # j3 = in3
  55. movl %eax,176(%esp)
  56. # in6 = *(uint32 *) (x + 24)
  57. movl 24(%edx),%eax
  58. # j4 = in4
  59. movl %ecx,180(%esp)
  60. # in7 = *(uint32 *) (x + 28)
  61. movl 28(%edx),%ecx
  62. # j5 = in5
  63. movl %ebp,184(%esp)
  64. # in8 = *(uint32 *) (x + 32)
  65. movl 32(%edx),%ebp
  66. # j6 = in6
  67. movl %eax,188(%esp)
  68. # in9 = *(uint32 *) (x + 36)
  69. movl 36(%edx),%eax
  70. # j7 = in7
  71. movl %ecx,192(%esp)
  72. # in10 = *(uint32 *) (x + 40)
  73. movl 40(%edx),%ecx
  74. # j8 = in8
  75. movl %ebp,196(%esp)
  76. # in11 = *(uint32 *) (x + 44)
  77. movl 44(%edx),%ebp
  78. # j9 = in9
  79. movl %eax,200(%esp)
  80. # in12 = *(uint32 *) (x + 48)
  81. movl 48(%edx),%eax
  82. # j10 = in10
  83. movl %ecx,204(%esp)
  84. # in13 = *(uint32 *) (x + 52)
  85. movl 52(%edx),%ecx
  86. # j11 = in11
  87. movl %ebp,208(%esp)
  88. # in14 = *(uint32 *) (x + 56)
  89. movl 56(%edx),%ebp
  90. # j12 = in12
  91. movl %eax,212(%esp)
  92. # in15 = *(uint32 *) (x + 60)
  93. movl 60(%edx),%eax
  94. # j13 = in13
  95. movl %ecx,216(%esp)
  96. # j14 = in14
  97. movl %ebp,220(%esp)
  98. # j15 = in15
  99. movl %eax,224(%esp)
  100. # x_backup = x
  101. movl %edx,64(%esp)
  102. ._bytesatleast1:
  103. # bytes - 64
  104. cmp $64,%ebx
  105. # goto nocopy if unsigned>=
  106. jae ._nocopy
  107. # ctarget = out
  108. movl %edi,228(%esp)
  109. # out = &tmp
  110. leal 0(%esp),%edi
  111. # i = bytes
  112. mov %ebx,%ecx
  113. # while (i) { *out++ = *m++; --i }
  114. rep movsb
  115. # out = &tmp
  116. leal 0(%esp),%edi
  117. # m = &tmp
  118. leal 0(%esp),%esi
  119. ._nocopy:
  120. # out_backup = out
  121. movl %edi,72(%esp)
  122. # m_backup = m
  123. movl %esi,68(%esp)
  124. # bytes_backup = bytes
  125. movl %ebx,76(%esp)
  126. # in0 = j0
  127. movl 164(%esp),%eax
  128. # in1 = j1
  129. movl 168(%esp),%ecx
  130. # in2 = j2
  131. movl 172(%esp),%edx
  132. # in3 = j3
  133. movl 176(%esp),%ebx
  134. # x0 = in0
  135. movl %eax,100(%esp)
  136. # x1 = in1
  137. movl %ecx,104(%esp)
  138. # x2 = in2
  139. movl %edx,108(%esp)
  140. # x3 = in3
  141. movl %ebx,112(%esp)
  142. # in4 = j4
  143. movl 180(%esp),%eax
  144. # in5 = j5
  145. movl 184(%esp),%ecx
  146. # in6 = j6
  147. movl 188(%esp),%edx
  148. # in7 = j7
  149. movl 192(%esp),%ebx
  150. # x4 = in4
  151. movl %eax,116(%esp)
  152. # x5 = in5
  153. movl %ecx,120(%esp)
  154. # x6 = in6
  155. movl %edx,124(%esp)
  156. # x7 = in7
  157. movl %ebx,128(%esp)
  158. # in8 = j8
  159. movl 196(%esp),%eax
  160. # in9 = j9
  161. movl 200(%esp),%ecx
  162. # in10 = j10
  163. movl 204(%esp),%edx
  164. # in11 = j11
  165. movl 208(%esp),%ebx
  166. # x8 = in8
  167. movl %eax,132(%esp)
  168. # x9 = in9
  169. movl %ecx,136(%esp)
  170. # x10 = in10
  171. movl %edx,140(%esp)
  172. # x11 = in11
  173. movl %ebx,144(%esp)
  174. # in12 = j12
  175. movl 212(%esp),%eax
  176. # in13 = j13
  177. movl 216(%esp),%ecx
  178. # in14 = j14
  179. movl 220(%esp),%edx
  180. # in15 = j15
  181. movl 224(%esp),%ebx
  182. # x12 = in12
  183. movl %eax,148(%esp)
  184. # x13 = in13
  185. movl %ecx,152(%esp)
  186. # x14 = in14
  187. movl %edx,156(%esp)
  188. # x15 = in15
  189. movl %ebx,160(%esp)
  190. # i = 20
  191. mov $20,%ebp
  192. # p = x0
  193. movl 100(%esp),%eax
  194. # s = x5
  195. movl 120(%esp),%ecx
  196. # t = x10
  197. movl 140(%esp),%edx
  198. # w = x15
  199. movl 160(%esp),%ebx
  200. ._mainloop:
  201. # x0 = p
  202. movl %eax,100(%esp)
  203. # x10 = t
  204. movl %edx,140(%esp)
  205. # p += x12
  206. addl 148(%esp),%eax
  207. # x5 = s
  208. movl %ecx,120(%esp)
  209. # t += x6
  210. addl 124(%esp),%edx
  211. # x15 = w
  212. movl %ebx,160(%esp)
  213. # r = x1
  214. movl 104(%esp),%esi
  215. # r += s
  216. add %ecx,%esi
  217. # v = x11
  218. movl 144(%esp),%edi
  219. # v += w
  220. add %ebx,%edi
  221. # p <<<= 7
  222. rol $7,%eax
  223. # p ^= x4
  224. xorl 116(%esp),%eax
  225. # t <<<= 7
  226. rol $7,%edx
  227. # t ^= x14
  228. xorl 156(%esp),%edx
  229. # r <<<= 7
  230. rol $7,%esi
  231. # r ^= x9
  232. xorl 136(%esp),%esi
  233. # v <<<= 7
  234. rol $7,%edi
  235. # v ^= x3
  236. xorl 112(%esp),%edi
  237. # x4 = p
  238. movl %eax,116(%esp)
  239. # x14 = t
  240. movl %edx,156(%esp)
  241. # p += x0
  242. addl 100(%esp),%eax
  243. # x9 = r
  244. movl %esi,136(%esp)
  245. # t += x10
  246. addl 140(%esp),%edx
  247. # x3 = v
  248. movl %edi,112(%esp)
  249. # p <<<= 9
  250. rol $9,%eax
  251. # p ^= x8
  252. xorl 132(%esp),%eax
  253. # t <<<= 9
  254. rol $9,%edx
  255. # t ^= x2
  256. xorl 108(%esp),%edx
  257. # s += r
  258. add %esi,%ecx
  259. # s <<<= 9
  260. rol $9,%ecx
  261. # s ^= x13
  262. xorl 152(%esp),%ecx
  263. # w += v
  264. add %edi,%ebx
  265. # w <<<= 9
  266. rol $9,%ebx
  267. # w ^= x7
  268. xorl 128(%esp),%ebx
  269. # x8 = p
  270. movl %eax,132(%esp)
  271. # x2 = t
  272. movl %edx,108(%esp)
  273. # p += x4
  274. addl 116(%esp),%eax
  275. # x13 = s
  276. movl %ecx,152(%esp)
  277. # t += x14
  278. addl 156(%esp),%edx
  279. # x7 = w
  280. movl %ebx,128(%esp)
  281. # p <<<= 13
  282. rol $13,%eax
  283. # p ^= x12
  284. xorl 148(%esp),%eax
  285. # t <<<= 13
  286. rol $13,%edx
  287. # t ^= x6
  288. xorl 124(%esp),%edx
  289. # r += s
  290. add %ecx,%esi
  291. # r <<<= 13
  292. rol $13,%esi
  293. # r ^= x1
  294. xorl 104(%esp),%esi
  295. # v += w
  296. add %ebx,%edi
  297. # v <<<= 13
  298. rol $13,%edi
  299. # v ^= x11
  300. xorl 144(%esp),%edi
  301. # x12 = p
  302. movl %eax,148(%esp)
  303. # x6 = t
  304. movl %edx,124(%esp)
  305. # p += x8
  306. addl 132(%esp),%eax
  307. # x1 = r
  308. movl %esi,104(%esp)
  309. # t += x2
  310. addl 108(%esp),%edx
  311. # x11 = v
  312. movl %edi,144(%esp)
  313. # p <<<= 18
  314. rol $18,%eax
  315. # p ^= x0
  316. xorl 100(%esp),%eax
  317. # t <<<= 18
  318. rol $18,%edx
  319. # t ^= x10
  320. xorl 140(%esp),%edx
  321. # s += r
  322. add %esi,%ecx
  323. # s <<<= 18
  324. rol $18,%ecx
  325. # s ^= x5
  326. xorl 120(%esp),%ecx
  327. # w += v
  328. add %edi,%ebx
  329. # w <<<= 18
  330. rol $18,%ebx
  331. # w ^= x15
  332. xorl 160(%esp),%ebx
  333. # x0 = p
  334. movl %eax,100(%esp)
  335. # x10 = t
  336. movl %edx,140(%esp)
  337. # p += x3
  338. addl 112(%esp),%eax
  339. # p <<<= 7
  340. rol $7,%eax
  341. # x5 = s
  342. movl %ecx,120(%esp)
  343. # t += x9
  344. addl 136(%esp),%edx
  345. # x15 = w
  346. movl %ebx,160(%esp)
  347. # r = x4
  348. movl 116(%esp),%esi
  349. # r += s
  350. add %ecx,%esi
  351. # v = x14
  352. movl 156(%esp),%edi
  353. # v += w
  354. add %ebx,%edi
  355. # p ^= x1
  356. xorl 104(%esp),%eax
  357. # t <<<= 7
  358. rol $7,%edx
  359. # t ^= x11
  360. xorl 144(%esp),%edx
  361. # r <<<= 7
  362. rol $7,%esi
  363. # r ^= x6
  364. xorl 124(%esp),%esi
  365. # v <<<= 7
  366. rol $7,%edi
  367. # v ^= x12
  368. xorl 148(%esp),%edi
  369. # x1 = p
  370. movl %eax,104(%esp)
  371. # x11 = t
  372. movl %edx,144(%esp)
  373. # p += x0
  374. addl 100(%esp),%eax
  375. # x6 = r
  376. movl %esi,124(%esp)
  377. # t += x10
  378. addl 140(%esp),%edx
  379. # x12 = v
  380. movl %edi,148(%esp)
  381. # p <<<= 9
  382. rol $9,%eax
  383. # p ^= x2
  384. xorl 108(%esp),%eax
  385. # t <<<= 9
  386. rol $9,%edx
  387. # t ^= x8
  388. xorl 132(%esp),%edx
  389. # s += r
  390. add %esi,%ecx
  391. # s <<<= 9
  392. rol $9,%ecx
  393. # s ^= x7
  394. xorl 128(%esp),%ecx
  395. # w += v
  396. add %edi,%ebx
  397. # w <<<= 9
  398. rol $9,%ebx
  399. # w ^= x13
  400. xorl 152(%esp),%ebx
  401. # x2 = p
  402. movl %eax,108(%esp)
  403. # x8 = t
  404. movl %edx,132(%esp)
  405. # p += x1
  406. addl 104(%esp),%eax
  407. # x7 = s
  408. movl %ecx,128(%esp)
  409. # t += x11
  410. addl 144(%esp),%edx
  411. # x13 = w
  412. movl %ebx,152(%esp)
  413. # p <<<= 13
  414. rol $13,%eax
  415. # p ^= x3
  416. xorl 112(%esp),%eax
  417. # t <<<= 13
  418. rol $13,%edx
  419. # t ^= x9
  420. xorl 136(%esp),%edx
  421. # r += s
  422. add %ecx,%esi
  423. # r <<<= 13
  424. rol $13,%esi
  425. # r ^= x4
  426. xorl 116(%esp),%esi
  427. # v += w
  428. add %ebx,%edi
  429. # v <<<= 13
  430. rol $13,%edi
  431. # v ^= x14
  432. xorl 156(%esp),%edi
  433. # x3 = p
  434. movl %eax,112(%esp)
  435. # x9 = t
  436. movl %edx,136(%esp)
  437. # p += x2
  438. addl 108(%esp),%eax
  439. # x4 = r
  440. movl %esi,116(%esp)
  441. # t += x8
  442. addl 132(%esp),%edx
  443. # x14 = v
  444. movl %edi,156(%esp)
  445. # p <<<= 18
  446. rol $18,%eax
  447. # p ^= x0
  448. xorl 100(%esp),%eax
  449. # t <<<= 18
  450. rol $18,%edx
  451. # t ^= x10
  452. xorl 140(%esp),%edx
  453. # s += r
  454. add %esi,%ecx
  455. # s <<<= 18
  456. rol $18,%ecx
  457. # s ^= x5
  458. xorl 120(%esp),%ecx
  459. # w += v
  460. add %edi,%ebx
  461. # w <<<= 18
  462. rol $18,%ebx
  463. # w ^= x15
  464. xorl 160(%esp),%ebx
  465. # x0 = p
  466. movl %eax,100(%esp)
  467. # x10 = t
  468. movl %edx,140(%esp)
  469. # p += x12
  470. addl 148(%esp),%eax
  471. # x5 = s
  472. movl %ecx,120(%esp)
  473. # t += x6
  474. addl 124(%esp),%edx
  475. # x15 = w
  476. movl %ebx,160(%esp)
  477. # r = x1
  478. movl 104(%esp),%esi
  479. # r += s
  480. add %ecx,%esi
  481. # v = x11
  482. movl 144(%esp),%edi
  483. # v += w
  484. add %ebx,%edi
  485. # p <<<= 7
  486. rol $7,%eax
  487. # p ^= x4
  488. xorl 116(%esp),%eax
  489. # t <<<= 7
  490. rol $7,%edx
  491. # t ^= x14
  492. xorl 156(%esp),%edx
  493. # r <<<= 7
  494. rol $7,%esi
  495. # r ^= x9
  496. xorl 136(%esp),%esi
  497. # v <<<= 7
  498. rol $7,%edi
  499. # v ^= x3
  500. xorl 112(%esp),%edi
  501. # x4 = p
  502. movl %eax,116(%esp)
  503. # x14 = t
  504. movl %edx,156(%esp)
  505. # p += x0
  506. addl 100(%esp),%eax
  507. # x9 = r
  508. movl %esi,136(%esp)
  509. # t += x10
  510. addl 140(%esp),%edx
  511. # x3 = v
  512. movl %edi,112(%esp)
  513. # p <<<= 9
  514. rol $9,%eax
  515. # p ^= x8
  516. xorl 132(%esp),%eax
  517. # t <<<= 9
  518. rol $9,%edx
  519. # t ^= x2
  520. xorl 108(%esp),%edx
  521. # s += r
  522. add %esi,%ecx
  523. # s <<<= 9
  524. rol $9,%ecx
  525. # s ^= x13
  526. xorl 152(%esp),%ecx
  527. # w += v
  528. add %edi,%ebx
  529. # w <<<= 9
  530. rol $9,%ebx
  531. # w ^= x7
  532. xorl 128(%esp),%ebx
  533. # x8 = p
  534. movl %eax,132(%esp)
  535. # x2 = t
  536. movl %edx,108(%esp)
  537. # p += x4
  538. addl 116(%esp),%eax
  539. # x13 = s
  540. movl %ecx,152(%esp)
  541. # t += x14
  542. addl 156(%esp),%edx
  543. # x7 = w
  544. movl %ebx,128(%esp)
  545. # p <<<= 13
  546. rol $13,%eax
  547. # p ^= x12
  548. xorl 148(%esp),%eax
  549. # t <<<= 13
  550. rol $13,%edx
  551. # t ^= x6
  552. xorl 124(%esp),%edx
  553. # r += s
  554. add %ecx,%esi
  555. # r <<<= 13
  556. rol $13,%esi
  557. # r ^= x1
  558. xorl 104(%esp),%esi
  559. # v += w
  560. add %ebx,%edi
  561. # v <<<= 13
  562. rol $13,%edi
  563. # v ^= x11
  564. xorl 144(%esp),%edi
  565. # x12 = p
  566. movl %eax,148(%esp)
  567. # x6 = t
  568. movl %edx,124(%esp)
  569. # p += x8
  570. addl 132(%esp),%eax
  571. # x1 = r
  572. movl %esi,104(%esp)
  573. # t += x2
  574. addl 108(%esp),%edx
  575. # x11 = v
  576. movl %edi,144(%esp)
  577. # p <<<= 18
  578. rol $18,%eax
  579. # p ^= x0
  580. xorl 100(%esp),%eax
  581. # t <<<= 18
  582. rol $18,%edx
  583. # t ^= x10
  584. xorl 140(%esp),%edx
  585. # s += r
  586. add %esi,%ecx
  587. # s <<<= 18
  588. rol $18,%ecx
  589. # s ^= x5
  590. xorl 120(%esp),%ecx
  591. # w += v
  592. add %edi,%ebx
  593. # w <<<= 18
  594. rol $18,%ebx
  595. # w ^= x15
  596. xorl 160(%esp),%ebx
  597. # x0 = p
  598. movl %eax,100(%esp)
  599. # x10 = t
  600. movl %edx,140(%esp)
  601. # p += x3
  602. addl 112(%esp),%eax
  603. # p <<<= 7
  604. rol $7,%eax
  605. # x5 = s
  606. movl %ecx,120(%esp)
  607. # t += x9
  608. addl 136(%esp),%edx
  609. # x15 = w
  610. movl %ebx,160(%esp)
  611. # r = x4
  612. movl 116(%esp),%esi
  613. # r += s
  614. add %ecx,%esi
  615. # v = x14
  616. movl 156(%esp),%edi
  617. # v += w
  618. add %ebx,%edi
  619. # p ^= x1
  620. xorl 104(%esp),%eax
  621. # t <<<= 7
  622. rol $7,%edx
  623. # t ^= x11
  624. xorl 144(%esp),%edx
  625. # r <<<= 7
  626. rol $7,%esi
  627. # r ^= x6
  628. xorl 124(%esp),%esi
  629. # v <<<= 7
  630. rol $7,%edi
  631. # v ^= x12
  632. xorl 148(%esp),%edi
  633. # x1 = p
  634. movl %eax,104(%esp)
  635. # x11 = t
  636. movl %edx,144(%esp)
  637. # p += x0
  638. addl 100(%esp),%eax
  639. # x6 = r
  640. movl %esi,124(%esp)
  641. # t += x10
  642. addl 140(%esp),%edx
  643. # x12 = v
  644. movl %edi,148(%esp)
  645. # p <<<= 9
  646. rol $9,%eax
  647. # p ^= x2
  648. xorl 108(%esp),%eax
  649. # t <<<= 9
  650. rol $9,%edx
  651. # t ^= x8
  652. xorl 132(%esp),%edx
  653. # s += r
  654. add %esi,%ecx
  655. # s <<<= 9
  656. rol $9,%ecx
  657. # s ^= x7
  658. xorl 128(%esp),%ecx
  659. # w += v
  660. add %edi,%ebx
  661. # w <<<= 9
  662. rol $9,%ebx
  663. # w ^= x13
  664. xorl 152(%esp),%ebx
  665. # x2 = p
  666. movl %eax,108(%esp)
  667. # x8 = t
  668. movl %edx,132(%esp)
  669. # p += x1
  670. addl 104(%esp),%eax
  671. # x7 = s
  672. movl %ecx,128(%esp)
  673. # t += x11
  674. addl 144(%esp),%edx
  675. # x13 = w
  676. movl %ebx,152(%esp)
  677. # p <<<= 13
  678. rol $13,%eax
  679. # p ^= x3
  680. xorl 112(%esp),%eax
  681. # t <<<= 13
  682. rol $13,%edx
  683. # t ^= x9
  684. xorl 136(%esp),%edx
  685. # r += s
  686. add %ecx,%esi
  687. # r <<<= 13
  688. rol $13,%esi
  689. # r ^= x4
  690. xorl 116(%esp),%esi
  691. # v += w
  692. add %ebx,%edi
  693. # v <<<= 13
  694. rol $13,%edi
  695. # v ^= x14
  696. xorl 156(%esp),%edi
  697. # x3 = p
  698. movl %eax,112(%esp)
  699. # x9 = t
  700. movl %edx,136(%esp)
  701. # p += x2
  702. addl 108(%esp),%eax
  703. # x4 = r
  704. movl %esi,116(%esp)
  705. # t += x8
  706. addl 132(%esp),%edx
  707. # x14 = v
  708. movl %edi,156(%esp)
  709. # p <<<= 18
  710. rol $18,%eax
  711. # p ^= x0
  712. xorl 100(%esp),%eax
  713. # t <<<= 18
  714. rol $18,%edx
  715. # t ^= x10
  716. xorl 140(%esp),%edx
  717. # s += r
  718. add %esi,%ecx
  719. # s <<<= 18
  720. rol $18,%ecx
  721. # s ^= x5
  722. xorl 120(%esp),%ecx
  723. # w += v
  724. add %edi,%ebx
  725. # w <<<= 18
  726. rol $18,%ebx
  727. # w ^= x15
  728. xorl 160(%esp),%ebx
  729. # i -= 4
  730. sub $4,%ebp
  731. # goto mainloop if unsigned >
  732. ja ._mainloop
  733. # x0 = p
  734. movl %eax,100(%esp)
  735. # x5 = s
  736. movl %ecx,120(%esp)
  737. # x10 = t
  738. movl %edx,140(%esp)
  739. # x15 = w
  740. movl %ebx,160(%esp)
  741. # out = out_backup
  742. movl 72(%esp),%edi
  743. # m = m_backup
  744. movl 68(%esp),%esi
  745. # in0 = x0
  746. movl 100(%esp),%eax
  747. # in1 = x1
  748. movl 104(%esp),%ecx
  749. # in0 += j0
  750. addl 164(%esp),%eax
  751. # in1 += j1
  752. addl 168(%esp),%ecx
  753. # in0 ^= *(uint32 *) (m + 0)
  754. xorl 0(%esi),%eax
  755. # in1 ^= *(uint32 *) (m + 4)
  756. xorl 4(%esi),%ecx
  757. # *(uint32 *) (out + 0) = in0
  758. movl %eax,0(%edi)
  759. # *(uint32 *) (out + 4) = in1
  760. movl %ecx,4(%edi)
  761. # in2 = x2
  762. movl 108(%esp),%eax
  763. # in3 = x3
  764. movl 112(%esp),%ecx
  765. # in2 += j2
  766. addl 172(%esp),%eax
  767. # in3 += j3
  768. addl 176(%esp),%ecx
  769. # in2 ^= *(uint32 *) (m + 8)
  770. xorl 8(%esi),%eax
  771. # in3 ^= *(uint32 *) (m + 12)
  772. xorl 12(%esi),%ecx
  773. # *(uint32 *) (out + 8) = in2
  774. movl %eax,8(%edi)
  775. # *(uint32 *) (out + 12) = in3
  776. movl %ecx,12(%edi)
  777. # in4 = x4
  778. movl 116(%esp),%eax
  779. # in5 = x5
  780. movl 120(%esp),%ecx
  781. # in4 += j4
  782. addl 180(%esp),%eax
  783. # in5 += j5
  784. addl 184(%esp),%ecx
  785. # in4 ^= *(uint32 *) (m + 16)
  786. xorl 16(%esi),%eax
  787. # in5 ^= *(uint32 *) (m + 20)
  788. xorl 20(%esi),%ecx
  789. # *(uint32 *) (out + 16) = in4
  790. movl %eax,16(%edi)
  791. # *(uint32 *) (out + 20) = in5
  792. movl %ecx,20(%edi)
  793. # in6 = x6
  794. movl 124(%esp),%eax
  795. # in7 = x7
  796. movl 128(%esp),%ecx
  797. # in6 += j6
  798. addl 188(%esp),%eax
  799. # in7 += j7
  800. addl 192(%esp),%ecx
  801. # in6 ^= *(uint32 *) (m + 24)
  802. xorl 24(%esi),%eax
  803. # in7 ^= *(uint32 *) (m + 28)
  804. xorl 28(%esi),%ecx
  805. # *(uint32 *) (out + 24) = in6
  806. movl %eax,24(%edi)
  807. # *(uint32 *) (out + 28) = in7
  808. movl %ecx,28(%edi)
  809. # in8 = x8
  810. movl 132(%esp),%eax
  811. # in9 = x9
  812. movl 136(%esp),%ecx
  813. # in8 += j8
  814. addl 196(%esp),%eax
  815. # in9 += j9
  816. addl 200(%esp),%ecx
  817. # in8 ^= *(uint32 *) (m + 32)
  818. xorl 32(%esi),%eax
  819. # in9 ^= *(uint32 *) (m + 36)
  820. xorl 36(%esi),%ecx
  821. # *(uint32 *) (out + 32) = in8
  822. movl %eax,32(%edi)
  823. # *(uint32 *) (out + 36) = in9
  824. movl %ecx,36(%edi)
  825. # in10 = x10
  826. movl 140(%esp),%eax
  827. # in11 = x11
  828. movl 144(%esp),%ecx
  829. # in10 += j10
  830. addl 204(%esp),%eax
  831. # in11 += j11
  832. addl 208(%esp),%ecx
  833. # in10 ^= *(uint32 *) (m + 40)
  834. xorl 40(%esi),%eax
  835. # in11 ^= *(uint32 *) (m + 44)
  836. xorl 44(%esi),%ecx
  837. # *(uint32 *) (out + 40) = in10
  838. movl %eax,40(%edi)
  839. # *(uint32 *) (out + 44) = in11
  840. movl %ecx,44(%edi)
  841. # in12 = x12
  842. movl 148(%esp),%eax
  843. # in13 = x13
  844. movl 152(%esp),%ecx
  845. # in12 += j12
  846. addl 212(%esp),%eax
  847. # in13 += j13
  848. addl 216(%esp),%ecx
  849. # in12 ^= *(uint32 *) (m + 48)
  850. xorl 48(%esi),%eax
  851. # in13 ^= *(uint32 *) (m + 52)
  852. xorl 52(%esi),%ecx
  853. # *(uint32 *) (out + 48) = in12
  854. movl %eax,48(%edi)
  855. # *(uint32 *) (out + 52) = in13
  856. movl %ecx,52(%edi)
  857. # in14 = x14
  858. movl 156(%esp),%eax
  859. # in15 = x15
  860. movl 160(%esp),%ecx
  861. # in14 += j14
  862. addl 220(%esp),%eax
  863. # in15 += j15
  864. addl 224(%esp),%ecx
  865. # in14 ^= *(uint32 *) (m + 56)
  866. xorl 56(%esi),%eax
  867. # in15 ^= *(uint32 *) (m + 60)
  868. xorl 60(%esi),%ecx
  869. # *(uint32 *) (out + 56) = in14
  870. movl %eax,56(%edi)
  871. # *(uint32 *) (out + 60) = in15
  872. movl %ecx,60(%edi)
  873. # bytes = bytes_backup
  874. movl 76(%esp),%ebx
  875. # in8 = j8
  876. movl 196(%esp),%eax
  877. # in9 = j9
  878. movl 200(%esp),%ecx
  879. # in8 += 1
  880. add $1,%eax
  881. # in9 += 0 + carry
  882. adc $0,%ecx
  883. # j8 = in8
  884. movl %eax,196(%esp)
  885. # j9 = in9
  886. movl %ecx,200(%esp)
  887. # bytes - 64
  888. cmp $64,%ebx
  889. # goto bytesatleast65 if unsigned>
  890. ja ._bytesatleast65
  891. # goto bytesatleast64 if unsigned>=
  892. jae ._bytesatleast64
  893. # m = out
  894. mov %edi,%esi
  895. # out = ctarget
  896. movl 228(%esp),%edi
  897. # i = bytes
  898. mov %ebx,%ecx
  899. # while (i) { *out++ = *m++; --i }
  900. rep movsb
  901. ._bytesatleast64:
  902. # x = x_backup
  903. movl 64(%esp),%eax
  904. # in8 = j8
  905. movl 196(%esp),%ecx
  906. # in9 = j9
  907. movl 200(%esp),%edx
  908. # *(uint32 *) (x + 32) = in8
  909. movl %ecx,32(%eax)
  910. # *(uint32 *) (x + 36) = in9
  911. movl %edx,36(%eax)
  912. ._done:
  913. # eax = eax_stack
  914. movl 80(%esp),%eax
  915. # ebx = ebx_stack
  916. movl 84(%esp),%ebx
  917. # esi = esi_stack
  918. movl 88(%esp),%esi
  919. # edi = edi_stack
  920. movl 92(%esp),%edi
  921. # ebp = ebp_stack
  922. movl 96(%esp),%ebp
  923. # leave
  924. add %eax,%esp
  925. ret
  926. ._bytesatleast65:
  927. # bytes -= 64
  928. sub $64,%ebx
  929. # out += 64
  930. add $64,%edi
  931. # m += 64
  932. add $64,%esi
  933. # goto bytesatleast1
  934. jmp ._bytesatleast1
  935. # enter ECRYPT_keysetup
  936. .text
  937. .p2align 5
  938. .globl ECRYPT_keysetup
  939. ECRYPT_keysetup:
  940. mov %esp,%eax
  941. and $31,%eax
  942. add $256,%eax
  943. sub %eax,%esp
  944. # eax_stack = eax
  945. movl %eax,64(%esp)
  946. # ebx_stack = ebx
  947. movl %ebx,68(%esp)
  948. # esi_stack = esi
  949. movl %esi,72(%esp)
  950. # edi_stack = edi
  951. movl %edi,76(%esp)
  952. # ebp_stack = ebp
  953. movl %ebp,80(%esp)
  954. # k = arg2
  955. movl 8(%esp,%eax),%ecx
  956. # kbits = arg3
  957. movl 12(%esp,%eax),%edx
  958. # x = arg1
  959. movl 4(%esp,%eax),%eax
  960. # in1 = *(uint32 *) (k + 0)
  961. movl 0(%ecx),%ebx
  962. # in2 = *(uint32 *) (k + 4)
  963. movl 4(%ecx),%esi
  964. # in3 = *(uint32 *) (k + 8)
  965. movl 8(%ecx),%edi
  966. # in4 = *(uint32 *) (k + 12)
  967. movl 12(%ecx),%ebp
  968. # *(uint32 *) (x + 4) = in1
  969. movl %ebx,4(%eax)
  970. # *(uint32 *) (x + 8) = in2
  971. movl %esi,8(%eax)
  972. # *(uint32 *) (x + 12) = in3
  973. movl %edi,12(%eax)
  974. # *(uint32 *) (x + 16) = in4
  975. movl %ebp,16(%eax)
  976. # kbits - 256
  977. cmp $256,%edx
  978. # goto kbits128 if unsigned<
  979. jb ._kbits128
  980. ._kbits256:
  981. # in11 = *(uint32 *) (k + 16)
  982. movl 16(%ecx),%edx
  983. # in12 = *(uint32 *) (k + 20)
  984. movl 20(%ecx),%ebx
  985. # in13 = *(uint32 *) (k + 24)
  986. movl 24(%ecx),%esi
  987. # in14 = *(uint32 *) (k + 28)
  988. movl 28(%ecx),%ecx
  989. # *(uint32 *) (x + 44) = in11
  990. movl %edx,44(%eax)
  991. # *(uint32 *) (x + 48) = in12
  992. movl %ebx,48(%eax)
  993. # *(uint32 *) (x + 52) = in13
  994. movl %esi,52(%eax)
  995. # *(uint32 *) (x + 56) = in14
  996. movl %ecx,56(%eax)
  997. # in0 = 1634760805
  998. mov $1634760805,%ecx
  999. # in5 = 857760878
  1000. mov $857760878,%edx
  1001. # in10 = 2036477234
  1002. mov $2036477234,%ebx
  1003. # in15 = 1797285236
  1004. mov $1797285236,%esi
  1005. # *(uint32 *) (x + 0) = in0
  1006. movl %ecx,0(%eax)
  1007. # *(uint32 *) (x + 20) = in5
  1008. movl %edx,20(%eax)
  1009. # *(uint32 *) (x + 40) = in10
  1010. movl %ebx,40(%eax)
  1011. # *(uint32 *) (x + 60) = in15
  1012. movl %esi,60(%eax)
  1013. # goto keysetupdone
  1014. jmp ._keysetupdone
  1015. ._kbits128:
  1016. # in11 = *(uint32 *) (k + 0)
  1017. movl 0(%ecx),%edx
  1018. # in12 = *(uint32 *) (k + 4)
  1019. movl 4(%ecx),%ebx
  1020. # in13 = *(uint32 *) (k + 8)
  1021. movl 8(%ecx),%esi
  1022. # in14 = *(uint32 *) (k + 12)
  1023. movl 12(%ecx),%ecx
  1024. # *(uint32 *) (x + 44) = in11
  1025. movl %edx,44(%eax)
  1026. # *(uint32 *) (x + 48) = in12
  1027. movl %ebx,48(%eax)
  1028. # *(uint32 *) (x + 52) = in13
  1029. movl %esi,52(%eax)
  1030. # *(uint32 *) (x + 56) = in14
  1031. movl %ecx,56(%eax)
  1032. # in0 = 1634760805
  1033. mov $1634760805,%ecx
  1034. # in5 = 824206446
  1035. mov $824206446,%edx
  1036. # in10 = 2036477238
  1037. mov $2036477238,%ebx
  1038. # in15 = 1797285236
  1039. mov $1797285236,%esi
  1040. # *(uint32 *) (x + 0) = in0
  1041. movl %ecx,0(%eax)
  1042. # *(uint32 *) (x + 20) = in5
  1043. movl %edx,20(%eax)
  1044. # *(uint32 *) (x + 40) = in10
  1045. movl %ebx,40(%eax)
  1046. # *(uint32 *) (x + 60) = in15
  1047. movl %esi,60(%eax)
  1048. ._keysetupdone:
  1049. # eax = eax_stack
  1050. movl 64(%esp),%eax
  1051. # ebx = ebx_stack
  1052. movl 68(%esp),%ebx
  1053. # esi = esi_stack
  1054. movl 72(%esp),%esi
  1055. # edi = edi_stack
  1056. movl 76(%esp),%edi
  1057. # ebp = ebp_stack
  1058. movl 80(%esp),%ebp
  1059. # leave
  1060. add %eax,%esp
  1061. ret
  1062. # enter ECRYPT_ivsetup
  1063. .text
  1064. .p2align 5
  1065. .globl ECRYPT_ivsetup
  1066. ECRYPT_ivsetup:
  1067. mov %esp,%eax
  1068. and $31,%eax
  1069. add $256,%eax
  1070. sub %eax,%esp
  1071. # eax_stack = eax
  1072. movl %eax,64(%esp)
  1073. # ebx_stack = ebx
  1074. movl %ebx,68(%esp)
  1075. # esi_stack = esi
  1076. movl %esi,72(%esp)
  1077. # edi_stack = edi
  1078. movl %edi,76(%esp)
  1079. # ebp_stack = ebp
  1080. movl %ebp,80(%esp)
  1081. # iv = arg2
  1082. movl 8(%esp,%eax),%ecx
  1083. # x = arg1
  1084. movl 4(%esp,%eax),%eax
  1085. # in6 = *(uint32 *) (iv + 0)
  1086. movl 0(%ecx),%edx
  1087. # in7 = *(uint32 *) (iv + 4)
  1088. movl 4(%ecx),%ecx
  1089. # in8 = 0
  1090. mov $0,%ebx
  1091. # in9 = 0
  1092. mov $0,%esi
  1093. # *(uint32 *) (x + 24) = in6
  1094. movl %edx,24(%eax)
  1095. # *(uint32 *) (x + 28) = in7
  1096. movl %ecx,28(%eax)
  1097. # *(uint32 *) (x + 32) = in8
  1098. movl %ebx,32(%eax)
  1099. # *(uint32 *) (x + 36) = in9
  1100. movl %esi,36(%eax)
  1101. # eax = eax_stack
  1102. movl 64(%esp),%eax
  1103. # ebx = ebx_stack
  1104. movl 68(%esp),%ebx
  1105. # esi = esi_stack
  1106. movl 72(%esp),%esi
  1107. # edi = edi_stack
  1108. movl 76(%esp),%edi
  1109. # ebp = ebp_stack
  1110. movl 80(%esp),%ebp
  1111. # leave
  1112. add %eax,%esp
  1113. ret