sha512-armv4.pl 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398
  1. #!/usr/bin/env perl
  2. # ====================================================================
  3. # Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL
  4. # project. The module is, however, dual licensed under OpenSSL and
  5. # CRYPTOGAMS licenses depending on where you obtain it. For further
  6. # details see http://www.openssl.org/~appro/cryptogams/.
  7. # ====================================================================
  8. # SHA512 block procedure for ARMv4. September 2007.
  9. # This code is ~4.5 (four and a half) times faster than code generated
  10. # by gcc 3.4 and it spends ~72 clock cycles per byte.
  11. # Byte order [in]dependence. =========================================
  12. #
  13. # Caller is expected to maintain specific *dword* order in h[0-7],
  14. # namely with most significant dword at *lower* address, which is
  15. # reflected in below two parameters. *Byte* order within these dwords
  16. # in turn is whatever *native* byte order on current platform.
  17. $hi=0;
  18. $lo=4;
  19. # ====================================================================
  20. $output=shift;
  21. open STDOUT,">$output";
  22. $ctx="r0";
  23. $inp="r1";
  24. $len="r2";
  25. $Tlo="r3";
  26. $Thi="r4";
  27. $Alo="r5";
  28. $Ahi="r6";
  29. $Elo="r7";
  30. $Ehi="r8";
  31. $t0="r9";
  32. $t1="r10";
  33. $t2="r11";
  34. $t3="r12";
  35. ############ r13 is stack pointer
  36. $Ktbl="r14";
  37. ############ r15 is program counter
  38. $Aoff=8*0;
  39. $Boff=8*1;
  40. $Coff=8*2;
  41. $Doff=8*3;
  42. $Eoff=8*4;
  43. $Foff=8*5;
  44. $Goff=8*6;
  45. $Hoff=8*7;
  46. $Xoff=8*8;
  47. sub BODY_00_15() {
  48. my $magic = shift;
  49. $code.=<<___;
  50. ldr $t2,[sp,#$Hoff+0] @ h.lo
  51. ldr $t3,[sp,#$Hoff+4] @ h.hi
  52. @ Sigma1(x) (ROTR((x),14) ^ ROTR((x),18) ^ ROTR((x),41))
  53. @ LO lo>>14^hi<<18 ^ lo>>18^hi<<14 ^ hi>>9^lo<<23
  54. @ HI hi>>14^lo<<18 ^ hi>>18^lo<<14 ^ lo>>9^hi<<23
  55. mov $t0,$Elo,lsr#14
  56. mov $t1,$Ehi,lsr#14
  57. eor $t0,$t0,$Ehi,lsl#18
  58. eor $t1,$t1,$Elo,lsl#18
  59. eor $t0,$t0,$Elo,lsr#18
  60. eor $t1,$t1,$Ehi,lsr#18
  61. eor $t0,$t0,$Ehi,lsl#14
  62. eor $t1,$t1,$Elo,lsl#14
  63. eor $t0,$t0,$Ehi,lsr#9
  64. eor $t1,$t1,$Elo,lsr#9
  65. eor $t0,$t0,$Elo,lsl#23
  66. eor $t1,$t1,$Ehi,lsl#23 @ Sigma1(e)
  67. adds $Tlo,$Tlo,$t0
  68. adc $Thi,$Thi,$t1 @ T += Sigma1(e)
  69. adds $Tlo,$Tlo,$t2
  70. adc $Thi,$Thi,$t3 @ T += h
  71. ldr $t0,[sp,#$Foff+0] @ f.lo
  72. ldr $t1,[sp,#$Foff+4] @ f.hi
  73. ldr $t2,[sp,#$Goff+0] @ g.lo
  74. ldr $t3,[sp,#$Goff+4] @ g.hi
  75. str $Elo,[sp,#$Eoff+0]
  76. str $Ehi,[sp,#$Eoff+4]
  77. str $Alo,[sp,#$Aoff+0]
  78. str $Ahi,[sp,#$Aoff+4]
  79. eor $t0,$t0,$t2
  80. eor $t1,$t1,$t3
  81. and $t0,$t0,$Elo
  82. and $t1,$t1,$Ehi
  83. eor $t0,$t0,$t2
  84. eor $t1,$t1,$t3 @ Ch(e,f,g)
  85. ldr $t2,[$Ktbl,#4] @ K[i].lo
  86. ldr $t3,[$Ktbl,#0] @ K[i].hi
  87. ldr $Elo,[sp,#$Doff+0] @ d.lo
  88. ldr $Ehi,[sp,#$Doff+4] @ d.hi
  89. adds $Tlo,$Tlo,$t0
  90. adc $Thi,$Thi,$t1 @ T += Ch(e,f,g)
  91. adds $Tlo,$Tlo,$t2
  92. adc $Thi,$Thi,$t3 @ T += K[i]
  93. adds $Elo,$Elo,$Tlo
  94. adc $Ehi,$Ehi,$Thi @ d += T
  95. and $t0,$t2,#0xff
  96. teq $t0,#$magic
  97. orreq $Ktbl,$Ktbl,#1
  98. ldr $t2,[sp,#$Boff+0] @ b.lo
  99. ldr $t3,[sp,#$Coff+0] @ c.lo
  100. @ Sigma0(x) (ROTR((x),28) ^ ROTR((x),34) ^ ROTR((x),39))
  101. @ LO lo>>28^hi<<4 ^ hi>>2^lo<<30 ^ hi>>7^lo<<25
  102. @ HI hi>>28^lo<<4 ^ lo>>2^hi<<30 ^ lo>>7^hi<<25
  103. mov $t0,$Alo,lsr#28
  104. mov $t1,$Ahi,lsr#28
  105. eor $t0,$t0,$Ahi,lsl#4
  106. eor $t1,$t1,$Alo,lsl#4
  107. eor $t0,$t0,$Ahi,lsr#2
  108. eor $t1,$t1,$Alo,lsr#2
  109. eor $t0,$t0,$Alo,lsl#30
  110. eor $t1,$t1,$Ahi,lsl#30
  111. eor $t0,$t0,$Ahi,lsr#7
  112. eor $t1,$t1,$Alo,lsr#7
  113. eor $t0,$t0,$Alo,lsl#25
  114. eor $t1,$t1,$Ahi,lsl#25 @ Sigma0(a)
  115. adds $Tlo,$Tlo,$t0
  116. adc $Thi,$Thi,$t1 @ T += Sigma0(a)
  117. and $t0,$Alo,$t2
  118. orr $Alo,$Alo,$t2
  119. ldr $t1,[sp,#$Boff+4] @ b.hi
  120. ldr $t2,[sp,#$Coff+4] @ c.hi
  121. and $Alo,$Alo,$t3
  122. orr $Alo,$Alo,$t0 @ Maj(a,b,c).lo
  123. and $t3,$Ahi,$t1
  124. orr $Ahi,$Ahi,$t1
  125. and $Ahi,$Ahi,$t2
  126. orr $Ahi,$Ahi,$t3 @ Maj(a,b,c).hi
  127. adds $Alo,$Alo,$Tlo
  128. adc $Ahi,$Ahi,$Thi @ h += T
  129. sub sp,sp,#8
  130. add $Ktbl,$Ktbl,#8
  131. ___
  132. }
  133. $code=<<___;
  134. .text
  135. .code 32
  136. .type K512,%object
  137. .align 5
  138. K512:
  139. .word 0x428a2f98,0xd728ae22, 0x71374491,0x23ef65cd
  140. .word 0xb5c0fbcf,0xec4d3b2f, 0xe9b5dba5,0x8189dbbc
  141. .word 0x3956c25b,0xf348b538, 0x59f111f1,0xb605d019
  142. .word 0x923f82a4,0xaf194f9b, 0xab1c5ed5,0xda6d8118
  143. .word 0xd807aa98,0xa3030242, 0x12835b01,0x45706fbe
  144. .word 0x243185be,0x4ee4b28c, 0x550c7dc3,0xd5ffb4e2
  145. .word 0x72be5d74,0xf27b896f, 0x80deb1fe,0x3b1696b1
  146. .word 0x9bdc06a7,0x25c71235, 0xc19bf174,0xcf692694
  147. .word 0xe49b69c1,0x9ef14ad2, 0xefbe4786,0x384f25e3
  148. .word 0x0fc19dc6,0x8b8cd5b5, 0x240ca1cc,0x77ac9c65
  149. .word 0x2de92c6f,0x592b0275, 0x4a7484aa,0x6ea6e483
  150. .word 0x5cb0a9dc,0xbd41fbd4, 0x76f988da,0x831153b5
  151. .word 0x983e5152,0xee66dfab, 0xa831c66d,0x2db43210
  152. .word 0xb00327c8,0x98fb213f, 0xbf597fc7,0xbeef0ee4
  153. .word 0xc6e00bf3,0x3da88fc2, 0xd5a79147,0x930aa725
  154. .word 0x06ca6351,0xe003826f, 0x14292967,0x0a0e6e70
  155. .word 0x27b70a85,0x46d22ffc, 0x2e1b2138,0x5c26c926
  156. .word 0x4d2c6dfc,0x5ac42aed, 0x53380d13,0x9d95b3df
  157. .word 0x650a7354,0x8baf63de, 0x766a0abb,0x3c77b2a8
  158. .word 0x81c2c92e,0x47edaee6, 0x92722c85,0x1482353b
  159. .word 0xa2bfe8a1,0x4cf10364, 0xa81a664b,0xbc423001
  160. .word 0xc24b8b70,0xd0f89791, 0xc76c51a3,0x0654be30
  161. .word 0xd192e819,0xd6ef5218, 0xd6990624,0x5565a910
  162. .word 0xf40e3585,0x5771202a, 0x106aa070,0x32bbd1b8
  163. .word 0x19a4c116,0xb8d2d0c8, 0x1e376c08,0x5141ab53
  164. .word 0x2748774c,0xdf8eeb99, 0x34b0bcb5,0xe19b48a8
  165. .word 0x391c0cb3,0xc5c95a63, 0x4ed8aa4a,0xe3418acb
  166. .word 0x5b9cca4f,0x7763e373, 0x682e6ff3,0xd6b2b8a3
  167. .word 0x748f82ee,0x5defb2fc, 0x78a5636f,0x43172f60
  168. .word 0x84c87814,0xa1f0ab72, 0x8cc70208,0x1a6439ec
  169. .word 0x90befffa,0x23631e28, 0xa4506ceb,0xde82bde9
  170. .word 0xbef9a3f7,0xb2c67915, 0xc67178f2,0xe372532b
  171. .word 0xca273ece,0xea26619c, 0xd186b8c7,0x21c0c207
  172. .word 0xeada7dd6,0xcde0eb1e, 0xf57d4f7f,0xee6ed178
  173. .word 0x06f067aa,0x72176fba, 0x0a637dc5,0xa2c898a6
  174. .word 0x113f9804,0xbef90dae, 0x1b710b35,0x131c471b
  175. .word 0x28db77f5,0x23047d84, 0x32caab7b,0x40c72493
  176. .word 0x3c9ebe0a,0x15c9bebc, 0x431d67c4,0x9c100d4c
  177. .word 0x4cc5d4be,0xcb3e42b6, 0x597f299c,0xfc657e2a
  178. .word 0x5fcb6fab,0x3ad6faec, 0x6c44198c,0x4a475817
  179. .size K512,.-K512
  180. .global sha512_block_data_order
  181. .type sha512_block_data_order,%function
  182. sha512_block_data_order:
  183. sub r3,pc,#8 @ sha512_block_data_order
  184. add $len,$inp,$len,lsl#7 @ len to point at the end of inp
  185. stmdb sp!,{r4-r12,lr}
  186. sub $Ktbl,r3,#640 @ K512
  187. sub sp,sp,#9*8
  188. ldr $Elo,[$ctx,#$Eoff+$lo]
  189. ldr $Ehi,[$ctx,#$Eoff+$hi]
  190. ldr $t0, [$ctx,#$Goff+$lo]
  191. ldr $t1, [$ctx,#$Goff+$hi]
  192. ldr $t2, [$ctx,#$Hoff+$lo]
  193. ldr $t3, [$ctx,#$Hoff+$hi]
  194. .Loop:
  195. str $t0, [sp,#$Goff+0]
  196. str $t1, [sp,#$Goff+4]
  197. str $t2, [sp,#$Hoff+0]
  198. str $t3, [sp,#$Hoff+4]
  199. ldr $Alo,[$ctx,#$Aoff+$lo]
  200. ldr $Ahi,[$ctx,#$Aoff+$hi]
  201. ldr $Tlo,[$ctx,#$Boff+$lo]
  202. ldr $Thi,[$ctx,#$Boff+$hi]
  203. ldr $t0, [$ctx,#$Coff+$lo]
  204. ldr $t1, [$ctx,#$Coff+$hi]
  205. ldr $t2, [$ctx,#$Doff+$lo]
  206. ldr $t3, [$ctx,#$Doff+$hi]
  207. str $Tlo,[sp,#$Boff+0]
  208. str $Thi,[sp,#$Boff+4]
  209. str $t0, [sp,#$Coff+0]
  210. str $t1, [sp,#$Coff+4]
  211. str $t2, [sp,#$Doff+0]
  212. str $t3, [sp,#$Doff+4]
  213. ldr $Tlo,[$ctx,#$Foff+$lo]
  214. ldr $Thi,[$ctx,#$Foff+$hi]
  215. str $Tlo,[sp,#$Foff+0]
  216. str $Thi,[sp,#$Foff+4]
  217. .L00_15:
  218. ldrb $Tlo,[$inp,#7]
  219. ldrb $t0, [$inp,#6]
  220. ldrb $t1, [$inp,#5]
  221. ldrb $t2, [$inp,#4]
  222. ldrb $Thi,[$inp,#3]
  223. ldrb $t3, [$inp,#2]
  224. orr $Tlo,$Tlo,$t0,lsl#8
  225. ldrb $t0, [$inp,#1]
  226. orr $Tlo,$Tlo,$t1,lsl#16
  227. ldrb $t1, [$inp],#8
  228. orr $Tlo,$Tlo,$t2,lsl#24
  229. orr $Thi,$Thi,$t3,lsl#8
  230. orr $Thi,$Thi,$t0,lsl#16
  231. orr $Thi,$Thi,$t1,lsl#24
  232. str $Tlo,[sp,#$Xoff+0]
  233. str $Thi,[sp,#$Xoff+4]
  234. ___
  235. &BODY_00_15(0x94);
  236. $code.=<<___;
  237. tst $Ktbl,#1
  238. beq .L00_15
  239. bic $Ktbl,$Ktbl,#1
  240. .L16_79:
  241. ldr $t0,[sp,#`$Xoff+8*(16-1)`+0]
  242. ldr $t1,[sp,#`$Xoff+8*(16-1)`+4]
  243. ldr $t2,[sp,#`$Xoff+8*(16-14)`+0]
  244. ldr $t3,[sp,#`$Xoff+8*(16-14)`+4]
  245. @ sigma0(x) (ROTR((x),1) ^ ROTR((x),8) ^ ((x)>>7))
  246. @ LO lo>>1^hi<<31 ^ lo>>8^hi<<24 ^ lo>>7^hi<<25
  247. @ HI hi>>1^lo<<31 ^ hi>>8^lo<<24 ^ hi>>7
  248. mov $Tlo,$t0,lsr#1
  249. mov $Thi,$t1,lsr#1
  250. eor $Tlo,$Tlo,$t1,lsl#31
  251. eor $Thi,$Thi,$t0,lsl#31
  252. eor $Tlo,$Tlo,$t0,lsr#8
  253. eor $Thi,$Thi,$t1,lsr#8
  254. eor $Tlo,$Tlo,$t1,lsl#24
  255. eor $Thi,$Thi,$t0,lsl#24
  256. eor $Tlo,$Tlo,$t0,lsr#7
  257. eor $Thi,$Thi,$t1,lsr#7
  258. eor $Tlo,$Tlo,$t1,lsl#25
  259. @ sigma1(x) (ROTR((x),19) ^ ROTR((x),61) ^ ((x)>>6))
  260. @ LO lo>>19^hi<<13 ^ hi>>29^lo<<3 ^ lo>>6^hi<<26
  261. @ HI hi>>19^lo<<13 ^ lo>>29^hi<<3 ^ hi>>6
  262. mov $t0,$t2,lsr#19
  263. mov $t1,$t3,lsr#19
  264. eor $t0,$t0,$t3,lsl#13
  265. eor $t1,$t1,$t2,lsl#13
  266. eor $t0,$t0,$t3,lsr#29
  267. eor $t1,$t1,$t2,lsr#29
  268. eor $t0,$t0,$t2,lsl#3
  269. eor $t1,$t1,$t3,lsl#3
  270. eor $t0,$t0,$t2,lsr#6
  271. eor $t1,$t1,$t3,lsr#6
  272. eor $t0,$t0,$t3,lsl#26
  273. ldr $t2,[sp,#`$Xoff+8*(16-9)`+0]
  274. ldr $t3,[sp,#`$Xoff+8*(16-9)`+4]
  275. adds $Tlo,$Tlo,$t0
  276. adc $Thi,$Thi,$t1
  277. ldr $t0,[sp,#`$Xoff+8*16`+0]
  278. ldr $t1,[sp,#`$Xoff+8*16`+4]
  279. adds $Tlo,$Tlo,$t2
  280. adc $Thi,$Thi,$t3
  281. adds $Tlo,$Tlo,$t0
  282. adc $Thi,$Thi,$t1
  283. str $Tlo,[sp,#$Xoff+0]
  284. str $Thi,[sp,#$Xoff+4]
  285. ___
  286. &BODY_00_15(0x17);
  287. $code.=<<___;
  288. tst $Ktbl,#1
  289. beq .L16_79
  290. bic $Ktbl,$Ktbl,#1
  291. ldr $Tlo,[sp,#$Boff+0]
  292. ldr $Thi,[sp,#$Boff+4]
  293. ldr $t0, [$ctx,#$Aoff+$lo]
  294. ldr $t1, [$ctx,#$Aoff+$hi]
  295. ldr $t2, [$ctx,#$Boff+$lo]
  296. ldr $t3, [$ctx,#$Boff+$hi]
  297. adds $t0,$Alo,$t0
  298. adc $t1,$Ahi,$t1
  299. adds $t2,$Tlo,$t2
  300. adc $t3,$Thi,$t3
  301. str $t0, [$ctx,#$Aoff+$lo]
  302. str $t1, [$ctx,#$Aoff+$hi]
  303. str $t2, [$ctx,#$Boff+$lo]
  304. str $t3, [$ctx,#$Boff+$hi]
  305. ldr $Alo,[sp,#$Coff+0]
  306. ldr $Ahi,[sp,#$Coff+4]
  307. ldr $Tlo,[sp,#$Doff+0]
  308. ldr $Thi,[sp,#$Doff+4]
  309. ldr $t0, [$ctx,#$Coff+$lo]
  310. ldr $t1, [$ctx,#$Coff+$hi]
  311. ldr $t2, [$ctx,#$Doff+$lo]
  312. ldr $t3, [$ctx,#$Doff+$hi]
  313. adds $t0,$Alo,$t0
  314. adc $t1,$Ahi,$t1
  315. adds $t2,$Tlo,$t2
  316. adc $t3,$Thi,$t3
  317. str $t0, [$ctx,#$Coff+$lo]
  318. str $t1, [$ctx,#$Coff+$hi]
  319. str $t2, [$ctx,#$Doff+$lo]
  320. str $t3, [$ctx,#$Doff+$hi]
  321. ldr $Tlo,[sp,#$Foff+0]
  322. ldr $Thi,[sp,#$Foff+4]
  323. ldr $t0, [$ctx,#$Eoff+$lo]
  324. ldr $t1, [$ctx,#$Eoff+$hi]
  325. ldr $t2, [$ctx,#$Foff+$lo]
  326. ldr $t3, [$ctx,#$Foff+$hi]
  327. adds $Elo,$Elo,$t0
  328. adc $Ehi,$Ehi,$t1
  329. adds $t2,$Tlo,$t2
  330. adc $t3,$Thi,$t3
  331. str $Elo,[$ctx,#$Eoff+$lo]
  332. str $Ehi,[$ctx,#$Eoff+$hi]
  333. str $t2, [$ctx,#$Foff+$lo]
  334. str $t3, [$ctx,#$Foff+$hi]
  335. ldr $Alo,[sp,#$Goff+0]
  336. ldr $Ahi,[sp,#$Goff+4]
  337. ldr $Tlo,[sp,#$Hoff+0]
  338. ldr $Thi,[sp,#$Hoff+4]
  339. ldr $t0, [$ctx,#$Goff+$lo]
  340. ldr $t1, [$ctx,#$Goff+$hi]
  341. ldr $t2, [$ctx,#$Hoff+$lo]
  342. ldr $t3, [$ctx,#$Hoff+$hi]
  343. adds $t0,$Alo,$t0
  344. adc $t1,$Ahi,$t1
  345. adds $t2,$Tlo,$t2
  346. adc $t3,$Thi,$t3
  347. str $t0, [$ctx,#$Goff+$lo]
  348. str $t1, [$ctx,#$Goff+$hi]
  349. str $t2, [$ctx,#$Hoff+$lo]
  350. str $t3, [$ctx,#$Hoff+$hi]
  351. add sp,sp,#640
  352. sub $Ktbl,$Ktbl,#640
  353. teq $inp,$len
  354. bne .Loop
  355. add sp,sp,#8*9 @ destroy frame
  356. ldmia sp!,{r4-r12,lr}
  357. tst lr,#1
  358. moveq pc,lr @ be binary compatible with V4, yet
  359. bx lr @ interoperable with Thumb ISA:-)
  360. .size sha512_block_data_order,.-sha512_block_data_order
  361. .asciz "SHA512 block transform for ARMv4, CRYPTOGAMS by <appro\@openssl.org>"
  362. ___
  363. $code =~ s/\`([^\`]*)\`/eval $1/gem;
  364. $code =~ s/\bbx\s+lr\b/.word\t0xe12fff1e/gm; # make it possible to compile with -march=armv4
  365. print $code;
  366. close STDOUT; # enforce flush