sha1-armv4-large.S 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503
  1. #define __ARM_ARCH__ __LINUX_ARM_ARCH__
  2. @ ====================================================================
  3. @ Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL
  4. @ project. The module is, however, dual licensed under OpenSSL and
  5. @ CRYPTOGAMS licenses depending on where you obtain it. For further
  6. @ details see http://www.openssl.org/~appro/cryptogams/.
  7. @ ====================================================================
  8. @ sha1_block procedure for ARMv4.
  9. @
  10. @ January 2007.
  11. @ Size/performance trade-off
  12. @ ====================================================================
  13. @ impl size in bytes comp cycles[*] measured performance
  14. @ ====================================================================
  15. @ thumb 304 3212 4420
  16. @ armv4-small 392/+29% 1958/+64% 2250/+96%
  17. @ armv4-compact 740/+89% 1552/+26% 1840/+22%
  18. @ armv4-large 1420/+92% 1307/+19% 1370/+34%[***]
  19. @ full unroll ~5100/+260% ~1260/+4% ~1300/+5%
  20. @ ====================================================================
  21. @ thumb = same as 'small' but in Thumb instructions[**] and
  22. @ with recurring code in two private functions;
  23. @ small = detached Xload/update, loops are folded;
  24. @ compact = detached Xload/update, 5x unroll;
  25. @ large = interleaved Xload/update, 5x unroll;
  26. @ full unroll = interleaved Xload/update, full unroll, estimated[!];
  27. @
  28. @ [*] Manually counted instructions in "grand" loop body. Measured
  29. @ performance is affected by prologue and epilogue overhead,
  30. @ i-cache availability, branch penalties, etc.
  31. @ [**] While each Thumb instruction is twice smaller, they are not as
  32. @ diverse as ARM ones: e.g., there are only two arithmetic
  33. @ instructions with 3 arguments, no [fixed] rotate, addressing
  34. @ modes are limited. As result it takes more instructions to do
  35. @ the same job in Thumb, therefore the code is never twice as
  36. @ small and always slower.
  37. @ [***] which is also ~35% better than compiler generated code. Dual-
  38. @ issue Cortex A8 core was measured to process input block in
  39. @ ~990 cycles.
  40. @ August 2010.
  41. @
  42. @ Rescheduling for dual-issue pipeline resulted in 13% improvement on
  43. @ Cortex A8 core and in absolute terms ~870 cycles per input block
  44. @ [or 13.6 cycles per byte].
  45. @ February 2011.
  46. @
  47. @ Profiler-assisted and platform-specific optimization resulted in 10%
  48. @ improvement on Cortex A8 core and 12.2 cycles per byte.
  49. .text
  50. .global sha1_block_data_order
  51. .type sha1_block_data_order,%function
  52. .align 2
  53. sha1_block_data_order:
  54. stmdb sp!,{r4-r12,lr}
  55. add r2,r1,r2,lsl#6 @ r2 to point at the end of r1
  56. ldmia r0,{r3,r4,r5,r6,r7}
  57. .Lloop:
  58. ldr r8,.LK_00_19
  59. mov r14,sp
  60. sub sp,sp,#15*4
  61. mov r5,r5,ror#30
  62. mov r6,r6,ror#30
  63. mov r7,r7,ror#30 @ [6]
  64. .L_00_15:
  65. #if __ARM_ARCH__<7
  66. ldrb r10,[r1,#2]
  67. ldrb r9,[r1,#3]
  68. ldrb r11,[r1,#1]
  69. add r7,r8,r7,ror#2 @ E+=K_00_19
  70. ldrb r12,[r1],#4
  71. orr r9,r9,r10,lsl#8
  72. eor r10,r5,r6 @ F_xx_xx
  73. orr r9,r9,r11,lsl#16
  74. add r7,r7,r3,ror#27 @ E+=ROR(A,27)
  75. orr r9,r9,r12,lsl#24
  76. #else
  77. ldr r9,[r1],#4 @ handles unaligned
  78. add r7,r8,r7,ror#2 @ E+=K_00_19
  79. eor r10,r5,r6 @ F_xx_xx
  80. add r7,r7,r3,ror#27 @ E+=ROR(A,27)
  81. #ifdef __ARMEL__
  82. rev r9,r9 @ byte swap
  83. #endif
  84. #endif
  85. and r10,r4,r10,ror#2
  86. add r7,r7,r9 @ E+=X[i]
  87. eor r10,r10,r6,ror#2 @ F_00_19(B,C,D)
  88. str r9,[r14,#-4]!
  89. add r7,r7,r10 @ E+=F_00_19(B,C,D)
  90. #if __ARM_ARCH__<7
  91. ldrb r10,[r1,#2]
  92. ldrb r9,[r1,#3]
  93. ldrb r11,[r1,#1]
  94. add r6,r8,r6,ror#2 @ E+=K_00_19
  95. ldrb r12,[r1],#4
  96. orr r9,r9,r10,lsl#8
  97. eor r10,r4,r5 @ F_xx_xx
  98. orr r9,r9,r11,lsl#16
  99. add r6,r6,r7,ror#27 @ E+=ROR(A,27)
  100. orr r9,r9,r12,lsl#24
  101. #else
  102. ldr r9,[r1],#4 @ handles unaligned
  103. add r6,r8,r6,ror#2 @ E+=K_00_19
  104. eor r10,r4,r5 @ F_xx_xx
  105. add r6,r6,r7,ror#27 @ E+=ROR(A,27)
  106. #ifdef __ARMEL__
  107. rev r9,r9 @ byte swap
  108. #endif
  109. #endif
  110. and r10,r3,r10,ror#2
  111. add r6,r6,r9 @ E+=X[i]
  112. eor r10,r10,r5,ror#2 @ F_00_19(B,C,D)
  113. str r9,[r14,#-4]!
  114. add r6,r6,r10 @ E+=F_00_19(B,C,D)
  115. #if __ARM_ARCH__<7
  116. ldrb r10,[r1,#2]
  117. ldrb r9,[r1,#3]
  118. ldrb r11,[r1,#1]
  119. add r5,r8,r5,ror#2 @ E+=K_00_19
  120. ldrb r12,[r1],#4
  121. orr r9,r9,r10,lsl#8
  122. eor r10,r3,r4 @ F_xx_xx
  123. orr r9,r9,r11,lsl#16
  124. add r5,r5,r6,ror#27 @ E+=ROR(A,27)
  125. orr r9,r9,r12,lsl#24
  126. #else
  127. ldr r9,[r1],#4 @ handles unaligned
  128. add r5,r8,r5,ror#2 @ E+=K_00_19
  129. eor r10,r3,r4 @ F_xx_xx
  130. add r5,r5,r6,ror#27 @ E+=ROR(A,27)
  131. #ifdef __ARMEL__
  132. rev r9,r9 @ byte swap
  133. #endif
  134. #endif
  135. and r10,r7,r10,ror#2
  136. add r5,r5,r9 @ E+=X[i]
  137. eor r10,r10,r4,ror#2 @ F_00_19(B,C,D)
  138. str r9,[r14,#-4]!
  139. add r5,r5,r10 @ E+=F_00_19(B,C,D)
  140. #if __ARM_ARCH__<7
  141. ldrb r10,[r1,#2]
  142. ldrb r9,[r1,#3]
  143. ldrb r11,[r1,#1]
  144. add r4,r8,r4,ror#2 @ E+=K_00_19
  145. ldrb r12,[r1],#4
  146. orr r9,r9,r10,lsl#8
  147. eor r10,r7,r3 @ F_xx_xx
  148. orr r9,r9,r11,lsl#16
  149. add r4,r4,r5,ror#27 @ E+=ROR(A,27)
  150. orr r9,r9,r12,lsl#24
  151. #else
  152. ldr r9,[r1],#4 @ handles unaligned
  153. add r4,r8,r4,ror#2 @ E+=K_00_19
  154. eor r10,r7,r3 @ F_xx_xx
  155. add r4,r4,r5,ror#27 @ E+=ROR(A,27)
  156. #ifdef __ARMEL__
  157. rev r9,r9 @ byte swap
  158. #endif
  159. #endif
  160. and r10,r6,r10,ror#2
  161. add r4,r4,r9 @ E+=X[i]
  162. eor r10,r10,r3,ror#2 @ F_00_19(B,C,D)
  163. str r9,[r14,#-4]!
  164. add r4,r4,r10 @ E+=F_00_19(B,C,D)
  165. #if __ARM_ARCH__<7
  166. ldrb r10,[r1,#2]
  167. ldrb r9,[r1,#3]
  168. ldrb r11,[r1,#1]
  169. add r3,r8,r3,ror#2 @ E+=K_00_19
  170. ldrb r12,[r1],#4
  171. orr r9,r9,r10,lsl#8
  172. eor r10,r6,r7 @ F_xx_xx
  173. orr r9,r9,r11,lsl#16
  174. add r3,r3,r4,ror#27 @ E+=ROR(A,27)
  175. orr r9,r9,r12,lsl#24
  176. #else
  177. ldr r9,[r1],#4 @ handles unaligned
  178. add r3,r8,r3,ror#2 @ E+=K_00_19
  179. eor r10,r6,r7 @ F_xx_xx
  180. add r3,r3,r4,ror#27 @ E+=ROR(A,27)
  181. #ifdef __ARMEL__
  182. rev r9,r9 @ byte swap
  183. #endif
  184. #endif
  185. and r10,r5,r10,ror#2
  186. add r3,r3,r9 @ E+=X[i]
  187. eor r10,r10,r7,ror#2 @ F_00_19(B,C,D)
  188. str r9,[r14,#-4]!
  189. add r3,r3,r10 @ E+=F_00_19(B,C,D)
  190. teq r14,sp
  191. bne .L_00_15 @ [((11+4)*5+2)*3]
  192. #if __ARM_ARCH__<7
  193. ldrb r10,[r1,#2]
  194. ldrb r9,[r1,#3]
  195. ldrb r11,[r1,#1]
  196. add r7,r8,r7,ror#2 @ E+=K_00_19
  197. ldrb r12,[r1],#4
  198. orr r9,r9,r10,lsl#8
  199. eor r10,r5,r6 @ F_xx_xx
  200. orr r9,r9,r11,lsl#16
  201. add r7,r7,r3,ror#27 @ E+=ROR(A,27)
  202. orr r9,r9,r12,lsl#24
  203. #else
  204. ldr r9,[r1],#4 @ handles unaligned
  205. add r7,r8,r7,ror#2 @ E+=K_00_19
  206. eor r10,r5,r6 @ F_xx_xx
  207. add r7,r7,r3,ror#27 @ E+=ROR(A,27)
  208. #ifdef __ARMEL__
  209. rev r9,r9 @ byte swap
  210. #endif
  211. #endif
  212. and r10,r4,r10,ror#2
  213. add r7,r7,r9 @ E+=X[i]
  214. eor r10,r10,r6,ror#2 @ F_00_19(B,C,D)
  215. str r9,[r14,#-4]!
  216. add r7,r7,r10 @ E+=F_00_19(B,C,D)
  217. ldr r9,[r14,#15*4]
  218. ldr r10,[r14,#13*4]
  219. ldr r11,[r14,#7*4]
  220. add r6,r8,r6,ror#2 @ E+=K_xx_xx
  221. ldr r12,[r14,#2*4]
  222. eor r9,r9,r10
  223. eor r11,r11,r12 @ 1 cycle stall
  224. eor r10,r4,r5 @ F_xx_xx
  225. mov r9,r9,ror#31
  226. add r6,r6,r7,ror#27 @ E+=ROR(A,27)
  227. eor r9,r9,r11,ror#31
  228. str r9,[r14,#-4]!
  229. and r10,r3,r10,ror#2 @ F_xx_xx
  230. @ F_xx_xx
  231. add r6,r6,r9 @ E+=X[i]
  232. eor r10,r10,r5,ror#2 @ F_00_19(B,C,D)
  233. add r6,r6,r10 @ E+=F_00_19(B,C,D)
  234. ldr r9,[r14,#15*4]
  235. ldr r10,[r14,#13*4]
  236. ldr r11,[r14,#7*4]
  237. add r5,r8,r5,ror#2 @ E+=K_xx_xx
  238. ldr r12,[r14,#2*4]
  239. eor r9,r9,r10
  240. eor r11,r11,r12 @ 1 cycle stall
  241. eor r10,r3,r4 @ F_xx_xx
  242. mov r9,r9,ror#31
  243. add r5,r5,r6,ror#27 @ E+=ROR(A,27)
  244. eor r9,r9,r11,ror#31
  245. str r9,[r14,#-4]!
  246. and r10,r7,r10,ror#2 @ F_xx_xx
  247. @ F_xx_xx
  248. add r5,r5,r9 @ E+=X[i]
  249. eor r10,r10,r4,ror#2 @ F_00_19(B,C,D)
  250. add r5,r5,r10 @ E+=F_00_19(B,C,D)
  251. ldr r9,[r14,#15*4]
  252. ldr r10,[r14,#13*4]
  253. ldr r11,[r14,#7*4]
  254. add r4,r8,r4,ror#2 @ E+=K_xx_xx
  255. ldr r12,[r14,#2*4]
  256. eor r9,r9,r10
  257. eor r11,r11,r12 @ 1 cycle stall
  258. eor r10,r7,r3 @ F_xx_xx
  259. mov r9,r9,ror#31
  260. add r4,r4,r5,ror#27 @ E+=ROR(A,27)
  261. eor r9,r9,r11,ror#31
  262. str r9,[r14,#-4]!
  263. and r10,r6,r10,ror#2 @ F_xx_xx
  264. @ F_xx_xx
  265. add r4,r4,r9 @ E+=X[i]
  266. eor r10,r10,r3,ror#2 @ F_00_19(B,C,D)
  267. add r4,r4,r10 @ E+=F_00_19(B,C,D)
  268. ldr r9,[r14,#15*4]
  269. ldr r10,[r14,#13*4]
  270. ldr r11,[r14,#7*4]
  271. add r3,r8,r3,ror#2 @ E+=K_xx_xx
  272. ldr r12,[r14,#2*4]
  273. eor r9,r9,r10
  274. eor r11,r11,r12 @ 1 cycle stall
  275. eor r10,r6,r7 @ F_xx_xx
  276. mov r9,r9,ror#31
  277. add r3,r3,r4,ror#27 @ E+=ROR(A,27)
  278. eor r9,r9,r11,ror#31
  279. str r9,[r14,#-4]!
  280. and r10,r5,r10,ror#2 @ F_xx_xx
  281. @ F_xx_xx
  282. add r3,r3,r9 @ E+=X[i]
  283. eor r10,r10,r7,ror#2 @ F_00_19(B,C,D)
  284. add r3,r3,r10 @ E+=F_00_19(B,C,D)
  285. ldr r8,.LK_20_39 @ [+15+16*4]
  286. sub sp,sp,#25*4
  287. cmn sp,#0 @ [+3], clear carry to denote 20_39
  288. .L_20_39_or_60_79:
  289. ldr r9,[r14,#15*4]
  290. ldr r10,[r14,#13*4]
  291. ldr r11,[r14,#7*4]
  292. add r7,r8,r7,ror#2 @ E+=K_xx_xx
  293. ldr r12,[r14,#2*4]
  294. eor r9,r9,r10
  295. eor r11,r11,r12 @ 1 cycle stall
  296. eor r10,r5,r6 @ F_xx_xx
  297. mov r9,r9,ror#31
  298. add r7,r7,r3,ror#27 @ E+=ROR(A,27)
  299. eor r9,r9,r11,ror#31
  300. str r9,[r14,#-4]!
  301. eor r10,r4,r10,ror#2 @ F_xx_xx
  302. @ F_xx_xx
  303. add r7,r7,r9 @ E+=X[i]
  304. add r7,r7,r10 @ E+=F_20_39(B,C,D)
  305. ldr r9,[r14,#15*4]
  306. ldr r10,[r14,#13*4]
  307. ldr r11,[r14,#7*4]
  308. add r6,r8,r6,ror#2 @ E+=K_xx_xx
  309. ldr r12,[r14,#2*4]
  310. eor r9,r9,r10
  311. eor r11,r11,r12 @ 1 cycle stall
  312. eor r10,r4,r5 @ F_xx_xx
  313. mov r9,r9,ror#31
  314. add r6,r6,r7,ror#27 @ E+=ROR(A,27)
  315. eor r9,r9,r11,ror#31
  316. str r9,[r14,#-4]!
  317. eor r10,r3,r10,ror#2 @ F_xx_xx
  318. @ F_xx_xx
  319. add r6,r6,r9 @ E+=X[i]
  320. add r6,r6,r10 @ E+=F_20_39(B,C,D)
  321. ldr r9,[r14,#15*4]
  322. ldr r10,[r14,#13*4]
  323. ldr r11,[r14,#7*4]
  324. add r5,r8,r5,ror#2 @ E+=K_xx_xx
  325. ldr r12,[r14,#2*4]
  326. eor r9,r9,r10
  327. eor r11,r11,r12 @ 1 cycle stall
  328. eor r10,r3,r4 @ F_xx_xx
  329. mov r9,r9,ror#31
  330. add r5,r5,r6,ror#27 @ E+=ROR(A,27)
  331. eor r9,r9,r11,ror#31
  332. str r9,[r14,#-4]!
  333. eor r10,r7,r10,ror#2 @ F_xx_xx
  334. @ F_xx_xx
  335. add r5,r5,r9 @ E+=X[i]
  336. add r5,r5,r10 @ E+=F_20_39(B,C,D)
  337. ldr r9,[r14,#15*4]
  338. ldr r10,[r14,#13*4]
  339. ldr r11,[r14,#7*4]
  340. add r4,r8,r4,ror#2 @ E+=K_xx_xx
  341. ldr r12,[r14,#2*4]
  342. eor r9,r9,r10
  343. eor r11,r11,r12 @ 1 cycle stall
  344. eor r10,r7,r3 @ F_xx_xx
  345. mov r9,r9,ror#31
  346. add r4,r4,r5,ror#27 @ E+=ROR(A,27)
  347. eor r9,r9,r11,ror#31
  348. str r9,[r14,#-4]!
  349. eor r10,r6,r10,ror#2 @ F_xx_xx
  350. @ F_xx_xx
  351. add r4,r4,r9 @ E+=X[i]
  352. add r4,r4,r10 @ E+=F_20_39(B,C,D)
  353. ldr r9,[r14,#15*4]
  354. ldr r10,[r14,#13*4]
  355. ldr r11,[r14,#7*4]
  356. add r3,r8,r3,ror#2 @ E+=K_xx_xx
  357. ldr r12,[r14,#2*4]
  358. eor r9,r9,r10
  359. eor r11,r11,r12 @ 1 cycle stall
  360. eor r10,r6,r7 @ F_xx_xx
  361. mov r9,r9,ror#31
  362. add r3,r3,r4,ror#27 @ E+=ROR(A,27)
  363. eor r9,r9,r11,ror#31
  364. str r9,[r14,#-4]!
  365. eor r10,r5,r10,ror#2 @ F_xx_xx
  366. @ F_xx_xx
  367. add r3,r3,r9 @ E+=X[i]
  368. add r3,r3,r10 @ E+=F_20_39(B,C,D)
  369. teq r14,sp @ preserve carry
  370. bne .L_20_39_or_60_79 @ [+((12+3)*5+2)*4]
  371. bcs .L_done @ [+((12+3)*5+2)*4], spare 300 bytes
  372. ldr r8,.LK_40_59
  373. sub sp,sp,#20*4 @ [+2]
  374. .L_40_59:
  375. ldr r9,[r14,#15*4]
  376. ldr r10,[r14,#13*4]
  377. ldr r11,[r14,#7*4]
  378. add r7,r8,r7,ror#2 @ E+=K_xx_xx
  379. ldr r12,[r14,#2*4]
  380. eor r9,r9,r10
  381. eor r11,r11,r12 @ 1 cycle stall
  382. eor r10,r5,r6 @ F_xx_xx
  383. mov r9,r9,ror#31
  384. add r7,r7,r3,ror#27 @ E+=ROR(A,27)
  385. eor r9,r9,r11,ror#31
  386. str r9,[r14,#-4]!
  387. and r10,r4,r10,ror#2 @ F_xx_xx
  388. and r11,r5,r6 @ F_xx_xx
  389. add r7,r7,r9 @ E+=X[i]
  390. add r7,r7,r10 @ E+=F_40_59(B,C,D)
  391. add r7,r7,r11,ror#2
  392. ldr r9,[r14,#15*4]
  393. ldr r10,[r14,#13*4]
  394. ldr r11,[r14,#7*4]
  395. add r6,r8,r6,ror#2 @ E+=K_xx_xx
  396. ldr r12,[r14,#2*4]
  397. eor r9,r9,r10
  398. eor r11,r11,r12 @ 1 cycle stall
  399. eor r10,r4,r5 @ F_xx_xx
  400. mov r9,r9,ror#31
  401. add r6,r6,r7,ror#27 @ E+=ROR(A,27)
  402. eor r9,r9,r11,ror#31
  403. str r9,[r14,#-4]!
  404. and r10,r3,r10,ror#2 @ F_xx_xx
  405. and r11,r4,r5 @ F_xx_xx
  406. add r6,r6,r9 @ E+=X[i]
  407. add r6,r6,r10 @ E+=F_40_59(B,C,D)
  408. add r6,r6,r11,ror#2
  409. ldr r9,[r14,#15*4]
  410. ldr r10,[r14,#13*4]
  411. ldr r11,[r14,#7*4]
  412. add r5,r8,r5,ror#2 @ E+=K_xx_xx
  413. ldr r12,[r14,#2*4]
  414. eor r9,r9,r10
  415. eor r11,r11,r12 @ 1 cycle stall
  416. eor r10,r3,r4 @ F_xx_xx
  417. mov r9,r9,ror#31
  418. add r5,r5,r6,ror#27 @ E+=ROR(A,27)
  419. eor r9,r9,r11,ror#31
  420. str r9,[r14,#-4]!
  421. and r10,r7,r10,ror#2 @ F_xx_xx
  422. and r11,r3,r4 @ F_xx_xx
  423. add r5,r5,r9 @ E+=X[i]
  424. add r5,r5,r10 @ E+=F_40_59(B,C,D)
  425. add r5,r5,r11,ror#2
  426. ldr r9,[r14,#15*4]
  427. ldr r10,[r14,#13*4]
  428. ldr r11,[r14,#7*4]
  429. add r4,r8,r4,ror#2 @ E+=K_xx_xx
  430. ldr r12,[r14,#2*4]
  431. eor r9,r9,r10
  432. eor r11,r11,r12 @ 1 cycle stall
  433. eor r10,r7,r3 @ F_xx_xx
  434. mov r9,r9,ror#31
  435. add r4,r4,r5,ror#27 @ E+=ROR(A,27)
  436. eor r9,r9,r11,ror#31
  437. str r9,[r14,#-4]!
  438. and r10,r6,r10,ror#2 @ F_xx_xx
  439. and r11,r7,r3 @ F_xx_xx
  440. add r4,r4,r9 @ E+=X[i]
  441. add r4,r4,r10 @ E+=F_40_59(B,C,D)
  442. add r4,r4,r11,ror#2
  443. ldr r9,[r14,#15*4]
  444. ldr r10,[r14,#13*4]
  445. ldr r11,[r14,#7*4]
  446. add r3,r8,r3,ror#2 @ E+=K_xx_xx
  447. ldr r12,[r14,#2*4]
  448. eor r9,r9,r10
  449. eor r11,r11,r12 @ 1 cycle stall
  450. eor r10,r6,r7 @ F_xx_xx
  451. mov r9,r9,ror#31
  452. add r3,r3,r4,ror#27 @ E+=ROR(A,27)
  453. eor r9,r9,r11,ror#31
  454. str r9,[r14,#-4]!
  455. and r10,r5,r10,ror#2 @ F_xx_xx
  456. and r11,r6,r7 @ F_xx_xx
  457. add r3,r3,r9 @ E+=X[i]
  458. add r3,r3,r10 @ E+=F_40_59(B,C,D)
  459. add r3,r3,r11,ror#2
  460. teq r14,sp
  461. bne .L_40_59 @ [+((12+5)*5+2)*4]
  462. ldr r8,.LK_60_79
  463. sub sp,sp,#20*4
  464. cmp sp,#0 @ set carry to denote 60_79
  465. b .L_20_39_or_60_79 @ [+4], spare 300 bytes
  466. .L_done:
  467. add sp,sp,#80*4 @ "deallocate" stack frame
  468. ldmia r0,{r8,r9,r10,r11,r12}
  469. add r3,r8,r3
  470. add r4,r9,r4
  471. add r5,r10,r5,ror#2
  472. add r6,r11,r6,ror#2
  473. add r7,r12,r7,ror#2
  474. stmia r0,{r3,r4,r5,r6,r7}
  475. teq r1,r2
  476. bne .Lloop @ [+18], total 1307
  477. #if __ARM_ARCH__>=5
  478. ldmia sp!,{r4-r12,pc}
  479. #else
  480. ldmia sp!,{r4-r12,lr}
  481. tst lr,#1
  482. moveq pc,lr @ be binary compatible with V4, yet
  483. .word 0xe12fff1e @ interoperable with Thumb ISA:-)
  484. #endif
  485. .align 2
  486. .LK_00_19: .word 0x5a827999
  487. .LK_20_39: .word 0x6ed9eba1
  488. .LK_40_59: .word 0x8f1bbcdc
  489. .LK_60_79: .word 0xca62c1d6
  490. .size sha1_block_data_order,.-sha1_block_data_order
  491. .asciz "SHA1 block transform for ARMv4, CRYPTOGAMS by <appro@openssl.org>"
  492. .align 2