sha512-parisc.pl 22 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810
  1. #! /usr/bin/env perl
  2. # Copyright 2009-2020 The OpenSSL Project Authors. All Rights Reserved.
  3. #
  4. # Licensed under the Apache License 2.0 (the "License"). You may not use
  5. # this file except in compliance with the License. You can obtain a copy
  6. # in the file LICENSE in the source distribution or at
  7. # https://www.openssl.org/source/license.html
  8. # ====================================================================
  9. # Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
  10. # project. The module is, however, dual licensed under OpenSSL and
  11. # CRYPTOGAMS licenses depending on where you obtain it. For further
  12. # details see http://www.openssl.org/~appro/cryptogams/.
  13. # ====================================================================
  14. # SHA256/512 block procedure for PA-RISC.
  15. # June 2009.
  16. #
  17. # SHA256 performance is >75% better than gcc 3.2 generated code on
  18. # PA-7100LC. Compared to code generated by vendor compiler this
  19. # implementation is almost 70% faster in 64-bit build, but delivers
  20. # virtually same performance in 32-bit build on PA-8600.
  21. #
  22. # SHA512 performance is >2.9x better than gcc 3.2 generated code on
  23. # PA-7100LC, PA-RISC 1.1 processor. Then implementation detects if the
  24. # code is executed on PA-RISC 2.0 processor and switches to 64-bit
  25. # code path delivering adequate performance even in "blended" 32-bit
  26. # build. Though 64-bit code is not any faster than code generated by
  27. # vendor compiler on PA-8600...
  28. #
  29. # Special thanks to polarhome.com for providing HP-UX account.
  30. # $output is the last argument if it looks like a file (it has an extension)
  31. # $flavour is the first argument if it doesn't look like a file
  32. $output = $#ARGV >= 0 && $ARGV[$#ARGV] =~ m|\.\w+$| ? pop : undef;
  33. $flavour = $#ARGV >= 0 && $ARGV[0] !~ m|\.| ? shift : undef;
  34. $output and open STDOUT,">$output";
  35. if ($flavour =~ /64/) {
  36. $LEVEL ="2.0W";
  37. $SIZE_T =8;
  38. $FRAME_MARKER =80;
  39. $SAVED_RP =16;
  40. $PUSH ="std";
  41. $PUSHMA ="std,ma";
  42. $POP ="ldd";
  43. $POPMB ="ldd,mb";
  44. } else {
  45. $LEVEL ="1.0";
  46. $SIZE_T =4;
  47. $FRAME_MARKER =48;
  48. $SAVED_RP =20;
  49. $PUSH ="stw";
  50. $PUSHMA ="stwm";
  51. $POP ="ldw";
  52. $POPMB ="ldwm";
  53. }
  54. if ($output =~ /512/) {
  55. $func="sha512_block_data_order";
  56. $SZ=8;
  57. @Sigma0=(28,34,39);
  58. @Sigma1=(14,18,41);
  59. @sigma0=(1, 8, 7);
  60. @sigma1=(19,61, 6);
  61. $rounds=80;
  62. $LAST10BITS=0x017;
  63. $LD="ldd";
  64. $LDM="ldd,ma";
  65. $ST="std";
  66. } else {
  67. $func="sha256_block_data_order";
  68. $SZ=4;
  69. @Sigma0=( 2,13,22);
  70. @Sigma1=( 6,11,25);
  71. @sigma0=( 7,18, 3);
  72. @sigma1=(17,19,10);
  73. $rounds=64;
  74. $LAST10BITS=0x0f2;
  75. $LD="ldw";
  76. $LDM="ldwm";
  77. $ST="stw";
  78. }
  79. $FRAME=16*$SIZE_T+$FRAME_MARKER;# 16 saved regs + frame marker
  80. # [+ argument transfer]
  81. $XOFF=16*$SZ+32; # local variables
  82. $FRAME+=$XOFF;
  83. $XOFF+=$FRAME_MARKER; # distance between %sp and local variables
  84. $ctx="%r26"; # zapped by $a0
  85. $inp="%r25"; # zapped by $a1
  86. $num="%r24"; # zapped by $t0
  87. $a0 ="%r26";
  88. $a1 ="%r25";
  89. $t0 ="%r24";
  90. $t1 ="%r29";
  91. $Tbl="%r31";
  92. @V=($A,$B,$C,$D,$E,$F,$G,$H)=("%r17","%r18","%r19","%r20","%r21","%r22","%r23","%r28");
  93. @X=("%r1", "%r2", "%r3", "%r4", "%r5", "%r6", "%r7", "%r8",
  94. "%r9", "%r10","%r11","%r12","%r13","%r14","%r15","%r16",$inp);
  95. sub ROUND_00_15 {
  96. my ($i,$a,$b,$c,$d,$e,$f,$g,$h)=@_;
  97. $code.=<<___;
  98. _ror $e,$Sigma1[0],$a0
  99. and $f,$e,$t0
  100. _ror $e,$Sigma1[1],$a1
  101. addl $t1,$h,$h
  102. andcm $g,$e,$t1
  103. xor $a1,$a0,$a0
  104. _ror $a1,`$Sigma1[2]-$Sigma1[1]`,$a1
  105. or $t0,$t1,$t1 ; Ch(e,f,g)
  106. addl @X[$i%16],$h,$h
  107. xor $a0,$a1,$a1 ; Sigma1(e)
  108. addl $t1,$h,$h
  109. _ror $a,$Sigma0[0],$a0
  110. addl $a1,$h,$h
  111. _ror $a,$Sigma0[1],$a1
  112. and $a,$b,$t0
  113. and $a,$c,$t1
  114. xor $a1,$a0,$a0
  115. _ror $a1,`$Sigma0[2]-$Sigma0[1]`,$a1
  116. xor $t1,$t0,$t0
  117. and $b,$c,$t1
  118. xor $a0,$a1,$a1 ; Sigma0(a)
  119. addl $h,$d,$d
  120. xor $t1,$t0,$t0 ; Maj(a,b,c)
  121. `"$LDM $SZ($Tbl),$t1" if ($i<15)`
  122. addl $a1,$h,$h
  123. addl $t0,$h,$h
  124. ___
  125. }
  126. sub ROUND_16_xx {
  127. my ($i,$a,$b,$c,$d,$e,$f,$g,$h)=@_;
  128. $i-=16;
  129. $code.=<<___;
  130. _ror @X[($i+1)%16],$sigma0[0],$a0
  131. _ror @X[($i+1)%16],$sigma0[1],$a1
  132. addl @X[($i+9)%16],@X[$i],@X[$i]
  133. _ror @X[($i+14)%16],$sigma1[0],$t0
  134. _ror @X[($i+14)%16],$sigma1[1],$t1
  135. xor $a1,$a0,$a0
  136. _shr @X[($i+1)%16],$sigma0[2],$a1
  137. xor $t1,$t0,$t0
  138. _shr @X[($i+14)%16],$sigma1[2],$t1
  139. xor $a1,$a0,$a0 ; sigma0(X[(i+1)&0x0f])
  140. xor $t1,$t0,$t0 ; sigma1(X[(i+14)&0x0f])
  141. $LDM $SZ($Tbl),$t1
  142. addl $a0,@X[$i],@X[$i]
  143. addl $t0,@X[$i],@X[$i]
  144. ___
  145. $code.=<<___ if ($i==15);
  146. extru $t1,31,10,$a1
  147. comiclr,<> $LAST10BITS,$a1,%r0
  148. ldo 1($Tbl),$Tbl ; signal end of $Tbl
  149. ___
  150. &ROUND_00_15($i+16,$a,$b,$c,$d,$e,$f,$g,$h);
  151. }
  152. $code=<<___;
  153. .LEVEL $LEVEL
  154. .SPACE \$TEXT\$
  155. .SUBSPA \$CODE\$,QUAD=0,ALIGN=8,ACCESS=0x2C,CODE_ONLY
  156. .ALIGN 64
  157. L\$table
  158. ___
  159. $code.=<<___ if ($SZ==8);
  160. .WORD 0x428a2f98,0xd728ae22,0x71374491,0x23ef65cd
  161. .WORD 0xb5c0fbcf,0xec4d3b2f,0xe9b5dba5,0x8189dbbc
  162. .WORD 0x3956c25b,0xf348b538,0x59f111f1,0xb605d019
  163. .WORD 0x923f82a4,0xaf194f9b,0xab1c5ed5,0xda6d8118
  164. .WORD 0xd807aa98,0xa3030242,0x12835b01,0x45706fbe
  165. .WORD 0x243185be,0x4ee4b28c,0x550c7dc3,0xd5ffb4e2
  166. .WORD 0x72be5d74,0xf27b896f,0x80deb1fe,0x3b1696b1
  167. .WORD 0x9bdc06a7,0x25c71235,0xc19bf174,0xcf692694
  168. .WORD 0xe49b69c1,0x9ef14ad2,0xefbe4786,0x384f25e3
  169. .WORD 0x0fc19dc6,0x8b8cd5b5,0x240ca1cc,0x77ac9c65
  170. .WORD 0x2de92c6f,0x592b0275,0x4a7484aa,0x6ea6e483
  171. .WORD 0x5cb0a9dc,0xbd41fbd4,0x76f988da,0x831153b5
  172. .WORD 0x983e5152,0xee66dfab,0xa831c66d,0x2db43210
  173. .WORD 0xb00327c8,0x98fb213f,0xbf597fc7,0xbeef0ee4
  174. .WORD 0xc6e00bf3,0x3da88fc2,0xd5a79147,0x930aa725
  175. .WORD 0x06ca6351,0xe003826f,0x14292967,0x0a0e6e70
  176. .WORD 0x27b70a85,0x46d22ffc,0x2e1b2138,0x5c26c926
  177. .WORD 0x4d2c6dfc,0x5ac42aed,0x53380d13,0x9d95b3df
  178. .WORD 0x650a7354,0x8baf63de,0x766a0abb,0x3c77b2a8
  179. .WORD 0x81c2c92e,0x47edaee6,0x92722c85,0x1482353b
  180. .WORD 0xa2bfe8a1,0x4cf10364,0xa81a664b,0xbc423001
  181. .WORD 0xc24b8b70,0xd0f89791,0xc76c51a3,0x0654be30
  182. .WORD 0xd192e819,0xd6ef5218,0xd6990624,0x5565a910
  183. .WORD 0xf40e3585,0x5771202a,0x106aa070,0x32bbd1b8
  184. .WORD 0x19a4c116,0xb8d2d0c8,0x1e376c08,0x5141ab53
  185. .WORD 0x2748774c,0xdf8eeb99,0x34b0bcb5,0xe19b48a8
  186. .WORD 0x391c0cb3,0xc5c95a63,0x4ed8aa4a,0xe3418acb
  187. .WORD 0x5b9cca4f,0x7763e373,0x682e6ff3,0xd6b2b8a3
  188. .WORD 0x748f82ee,0x5defb2fc,0x78a5636f,0x43172f60
  189. .WORD 0x84c87814,0xa1f0ab72,0x8cc70208,0x1a6439ec
  190. .WORD 0x90befffa,0x23631e28,0xa4506ceb,0xde82bde9
  191. .WORD 0xbef9a3f7,0xb2c67915,0xc67178f2,0xe372532b
  192. .WORD 0xca273ece,0xea26619c,0xd186b8c7,0x21c0c207
  193. .WORD 0xeada7dd6,0xcde0eb1e,0xf57d4f7f,0xee6ed178
  194. .WORD 0x06f067aa,0x72176fba,0x0a637dc5,0xa2c898a6
  195. .WORD 0x113f9804,0xbef90dae,0x1b710b35,0x131c471b
  196. .WORD 0x28db77f5,0x23047d84,0x32caab7b,0x40c72493
  197. .WORD 0x3c9ebe0a,0x15c9bebc,0x431d67c4,0x9c100d4c
  198. .WORD 0x4cc5d4be,0xcb3e42b6,0x597f299c,0xfc657e2a
  199. .WORD 0x5fcb6fab,0x3ad6faec,0x6c44198c,0x4a475817
  200. ___
  201. $code.=<<___ if ($SZ==4);
  202. .WORD 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5
  203. .WORD 0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5
  204. .WORD 0xd807aa98,0x12835b01,0x243185be,0x550c7dc3
  205. .WORD 0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174
  206. .WORD 0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc
  207. .WORD 0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da
  208. .WORD 0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7
  209. .WORD 0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967
  210. .WORD 0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13
  211. .WORD 0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85
  212. .WORD 0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3
  213. .WORD 0xd192e819,0xd6990624,0xf40e3585,0x106aa070
  214. .WORD 0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5
  215. .WORD 0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3
  216. .WORD 0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208
  217. .WORD 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2
  218. ___
  219. $code.=<<___;
  220. .EXPORT $func,ENTRY,ARGW0=GR,ARGW1=GR,ARGW2=GR
  221. .ALIGN 64
  222. $func
  223. .PROC
  224. .CALLINFO FRAME=`$FRAME-16*$SIZE_T`,NO_CALLS,SAVE_RP,ENTRY_GR=18
  225. .ENTRY
  226. $PUSH %r2,-$SAVED_RP(%sp) ; standard prologue
  227. $PUSHMA %r3,$FRAME(%sp)
  228. $PUSH %r4,`-$FRAME+1*$SIZE_T`(%sp)
  229. $PUSH %r5,`-$FRAME+2*$SIZE_T`(%sp)
  230. $PUSH %r6,`-$FRAME+3*$SIZE_T`(%sp)
  231. $PUSH %r7,`-$FRAME+4*$SIZE_T`(%sp)
  232. $PUSH %r8,`-$FRAME+5*$SIZE_T`(%sp)
  233. $PUSH %r9,`-$FRAME+6*$SIZE_T`(%sp)
  234. $PUSH %r10,`-$FRAME+7*$SIZE_T`(%sp)
  235. $PUSH %r11,`-$FRAME+8*$SIZE_T`(%sp)
  236. $PUSH %r12,`-$FRAME+9*$SIZE_T`(%sp)
  237. $PUSH %r13,`-$FRAME+10*$SIZE_T`(%sp)
  238. $PUSH %r14,`-$FRAME+11*$SIZE_T`(%sp)
  239. $PUSH %r15,`-$FRAME+12*$SIZE_T`(%sp)
  240. $PUSH %r16,`-$FRAME+13*$SIZE_T`(%sp)
  241. $PUSH %r17,`-$FRAME+14*$SIZE_T`(%sp)
  242. $PUSH %r18,`-$FRAME+15*$SIZE_T`(%sp)
  243. _shl $num,`log(16*$SZ)/log(2)`,$num
  244. addl $inp,$num,$num ; $num to point at the end of $inp
  245. $PUSH $num,`-$FRAME_MARKER-4*$SIZE_T`(%sp) ; save arguments
  246. $PUSH $inp,`-$FRAME_MARKER-3*$SIZE_T`(%sp)
  247. $PUSH $ctx,`-$FRAME_MARKER-2*$SIZE_T`(%sp)
  248. blr %r0,$Tbl
  249. ldi 3,$t1
  250. L\$pic
  251. andcm $Tbl,$t1,$Tbl ; wipe privilege level
  252. ldo L\$table-L\$pic($Tbl),$Tbl
  253. ___
  254. $code.=<<___ if ($SZ==8 && $SIZE_T==4);
  255. ldi 31,$t1
  256. mtctl $t1,%cr11
  257. extrd,u,*= $t1,%sar,1,$t1 ; executes on PA-RISC 1.0
  258. b L\$parisc1
  259. nop
  260. ___
  261. $code.=<<___;
  262. $LD `0*$SZ`($ctx),$A ; load context
  263. $LD `1*$SZ`($ctx),$B
  264. $LD `2*$SZ`($ctx),$C
  265. $LD `3*$SZ`($ctx),$D
  266. $LD `4*$SZ`($ctx),$E
  267. $LD `5*$SZ`($ctx),$F
  268. $LD `6*$SZ`($ctx),$G
  269. $LD `7*$SZ`($ctx),$H
  270. extru $inp,31,`log($SZ)/log(2)`,$t0
  271. sh3addl $t0,%r0,$t0
  272. subi `8*$SZ`,$t0,$t0
  273. mtctl $t0,%cr11 ; load %sar with align factor
  274. L\$oop
  275. ldi `$SZ-1`,$t0
  276. $LDM $SZ($Tbl),$t1
  277. andcm $inp,$t0,$t0 ; align $inp
  278. ___
  279. for ($i=0;$i<15;$i++) { # load input block
  280. $code.="\t$LD `$SZ*$i`($t0),@X[$i]\n"; }
  281. $code.=<<___;
  282. cmpb,*= $inp,$t0,L\$aligned
  283. $LD `$SZ*15`($t0),@X[15]
  284. $LD `$SZ*16`($t0),@X[16]
  285. ___
  286. for ($i=0;$i<16;$i++) { # align data
  287. $code.="\t_align @X[$i],@X[$i+1],@X[$i]\n"; }
  288. $code.=<<___;
  289. L\$aligned
  290. nop ; otherwise /usr/ccs/bin/as is confused by below .WORD
  291. ___
  292. for($i=0;$i<16;$i++) { &ROUND_00_15($i,@V); unshift(@V,pop(@V)); }
  293. $code.=<<___;
  294. L\$rounds
  295. nop ; otherwise /usr/ccs/bin/as is confused by below .WORD
  296. ___
  297. for(;$i<32;$i++) { &ROUND_16_xx($i,@V); unshift(@V,pop(@V)); }
  298. $code.=<<___;
  299. bb,>= $Tbl,31,L\$rounds ; end of $Tbl signalled?
  300. nop
  301. $POP `-$FRAME_MARKER-2*$SIZE_T`(%sp),$ctx ; restore arguments
  302. $POP `-$FRAME_MARKER-3*$SIZE_T`(%sp),$inp
  303. $POP `-$FRAME_MARKER-4*$SIZE_T`(%sp),$num
  304. ldo `-$rounds*$SZ-1`($Tbl),$Tbl ; rewind $Tbl
  305. $LD `0*$SZ`($ctx),@X[0] ; load context
  306. $LD `1*$SZ`($ctx),@X[1]
  307. $LD `2*$SZ`($ctx),@X[2]
  308. $LD `3*$SZ`($ctx),@X[3]
  309. $LD `4*$SZ`($ctx),@X[4]
  310. $LD `5*$SZ`($ctx),@X[5]
  311. addl @X[0],$A,$A
  312. $LD `6*$SZ`($ctx),@X[6]
  313. addl @X[1],$B,$B
  314. $LD `7*$SZ`($ctx),@X[7]
  315. ldo `16*$SZ`($inp),$inp ; advance $inp
  316. $ST $A,`0*$SZ`($ctx) ; save context
  317. addl @X[2],$C,$C
  318. $ST $B,`1*$SZ`($ctx)
  319. addl @X[3],$D,$D
  320. $ST $C,`2*$SZ`($ctx)
  321. addl @X[4],$E,$E
  322. $ST $D,`3*$SZ`($ctx)
  323. addl @X[5],$F,$F
  324. $ST $E,`4*$SZ`($ctx)
  325. addl @X[6],$G,$G
  326. $ST $F,`5*$SZ`($ctx)
  327. addl @X[7],$H,$H
  328. $ST $G,`6*$SZ`($ctx)
  329. $ST $H,`7*$SZ`($ctx)
  330. cmpb,*<>,n $inp,$num,L\$oop
  331. $PUSH $inp,`-$FRAME_MARKER-3*$SIZE_T`(%sp) ; save $inp
  332. ___
  333. if ($SZ==8 && $SIZE_T==4) # SHA512 for 32-bit PA-RISC 1.0
  334. {{
  335. $code.=<<___;
  336. b L\$done
  337. nop
  338. .ALIGN 64
  339. L\$parisc1
  340. ___
  341. @V=( $Ahi, $Alo, $Bhi, $Blo, $Chi, $Clo, $Dhi, $Dlo,
  342. $Ehi, $Elo, $Fhi, $Flo, $Ghi, $Glo, $Hhi, $Hlo) =
  343. ( "%r1", "%r2", "%r3", "%r4", "%r5", "%r6", "%r7", "%r8",
  344. "%r9","%r10","%r11","%r12","%r13","%r14","%r15","%r16");
  345. $a0 ="%r17";
  346. $a1 ="%r18";
  347. $a2 ="%r19";
  348. $a3 ="%r20";
  349. $t0 ="%r21";
  350. $t1 ="%r22";
  351. $t2 ="%r28";
  352. $t3 ="%r29";
  353. $Tbl="%r31";
  354. @X=("%r23","%r24","%r25","%r26"); # zaps $num,$inp,$ctx
  355. sub ROUND_00_15_pa1 {
  356. my ($i,$ahi,$alo,$bhi,$blo,$chi,$clo,$dhi,$dlo,
  357. $ehi,$elo,$fhi,$flo,$ghi,$glo,$hhi,$hlo,$flag)=@_;
  358. my ($Xhi,$Xlo,$Xnhi,$Xnlo) = @X;
  359. $code.=<<___ if (!$flag);
  360. ldw `-$XOFF+8*(($i+1)%16)`(%sp),$Xnhi
  361. ldw `-$XOFF+8*(($i+1)%16)+4`(%sp),$Xnlo ; load X[i+1]
  362. ___
  363. $code.=<<___;
  364. shd $ehi,$elo,$Sigma1[0],$t0
  365. add $Xlo,$hlo,$hlo
  366. shd $elo,$ehi,$Sigma1[0],$t1
  367. addc $Xhi,$hhi,$hhi ; h += X[i]
  368. shd $ehi,$elo,$Sigma1[1],$t2
  369. ldwm 8($Tbl),$Xhi
  370. shd $elo,$ehi,$Sigma1[1],$t3
  371. ldw -4($Tbl),$Xlo ; load K[i]
  372. xor $t2,$t0,$t0
  373. xor $t3,$t1,$t1
  374. and $flo,$elo,$a0
  375. and $fhi,$ehi,$a1
  376. shd $ehi,$elo,$Sigma1[2],$t2
  377. andcm $glo,$elo,$a2
  378. shd $elo,$ehi,$Sigma1[2],$t3
  379. andcm $ghi,$ehi,$a3
  380. xor $t2,$t0,$t0
  381. xor $t3,$t1,$t1 ; Sigma1(e)
  382. add $Xlo,$hlo,$hlo
  383. xor $a2,$a0,$a0
  384. addc $Xhi,$hhi,$hhi ; h += K[i]
  385. xor $a3,$a1,$a1 ; Ch(e,f,g)
  386. add $t0,$hlo,$hlo
  387. shd $ahi,$alo,$Sigma0[0],$t0
  388. addc $t1,$hhi,$hhi ; h += Sigma1(e)
  389. shd $alo,$ahi,$Sigma0[0],$t1
  390. add $a0,$hlo,$hlo
  391. shd $ahi,$alo,$Sigma0[1],$t2
  392. addc $a1,$hhi,$hhi ; h += Ch(e,f,g)
  393. shd $alo,$ahi,$Sigma0[1],$t3
  394. xor $t2,$t0,$t0
  395. xor $t3,$t1,$t1
  396. shd $ahi,$alo,$Sigma0[2],$t2
  397. and $alo,$blo,$a0
  398. shd $alo,$ahi,$Sigma0[2],$t3
  399. and $ahi,$bhi,$a1
  400. xor $t2,$t0,$t0
  401. xor $t3,$t1,$t1 ; Sigma0(a)
  402. and $alo,$clo,$a2
  403. and $ahi,$chi,$a3
  404. xor $a2,$a0,$a0
  405. add $hlo,$dlo,$dlo
  406. xor $a3,$a1,$a1
  407. addc $hhi,$dhi,$dhi ; d += h
  408. and $blo,$clo,$a2
  409. add $t0,$hlo,$hlo
  410. and $bhi,$chi,$a3
  411. addc $t1,$hhi,$hhi ; h += Sigma0(a)
  412. xor $a2,$a0,$a0
  413. add $a0,$hlo,$hlo
  414. xor $a3,$a1,$a1 ; Maj(a,b,c)
  415. addc $a1,$hhi,$hhi ; h += Maj(a,b,c)
  416. ___
  417. $code.=<<___ if ($i==15 && $flag);
  418. extru $Xlo,31,10,$Xlo
  419. comiclr,= $LAST10BITS,$Xlo,%r0
  420. b L\$rounds_pa1
  421. nop
  422. ___
  423. push(@X,shift(@X)); push(@X,shift(@X));
  424. }
  425. sub ROUND_16_xx_pa1 {
  426. my ($Xhi,$Xlo,$Xnhi,$Xnlo) = @X;
  427. my ($i)=shift;
  428. $i-=16;
  429. $code.=<<___;
  430. ldw `-$XOFF+8*(($i+1)%16)`(%sp),$Xnhi
  431. ldw `-$XOFF+8*(($i+1)%16)+4`(%sp),$Xnlo ; load X[i+1]
  432. ldw `-$XOFF+8*(($i+9)%16)`(%sp),$a1
  433. ldw `-$XOFF+8*(($i+9)%16)+4`(%sp),$a0 ; load X[i+9]
  434. ldw `-$XOFF+8*(($i+14)%16)`(%sp),$a3
  435. ldw `-$XOFF+8*(($i+14)%16)+4`(%sp),$a2 ; load X[i+14]
  436. shd $Xnhi,$Xnlo,$sigma0[0],$t0
  437. shd $Xnlo,$Xnhi,$sigma0[0],$t1
  438. add $a0,$Xlo,$Xlo
  439. shd $Xnhi,$Xnlo,$sigma0[1],$t2
  440. addc $a1,$Xhi,$Xhi
  441. shd $Xnlo,$Xnhi,$sigma0[1],$t3
  442. xor $t2,$t0,$t0
  443. shd $Xnhi,$Xnlo,$sigma0[2],$t2
  444. xor $t3,$t1,$t1
  445. extru $Xnhi,`31-$sigma0[2]`,`32-$sigma0[2]`,$t3
  446. xor $t2,$t0,$t0
  447. shd $a3,$a2,$sigma1[0],$a0
  448. xor $t3,$t1,$t1 ; sigma0(X[i+1)&0x0f])
  449. shd $a2,$a3,$sigma1[0],$a1
  450. add $t0,$Xlo,$Xlo
  451. shd $a3,$a2,$sigma1[1],$t2
  452. addc $t1,$Xhi,$Xhi
  453. shd $a2,$a3,$sigma1[1],$t3
  454. xor $t2,$a0,$a0
  455. shd $a3,$a2,$sigma1[2],$t2
  456. xor $t3,$a1,$a1
  457. extru $a3,`31-$sigma1[2]`,`32-$sigma1[2]`,$t3
  458. xor $t2,$a0,$a0
  459. xor $t3,$a1,$a1 ; sigma0(X[i+14)&0x0f])
  460. add $a0,$Xlo,$Xlo
  461. addc $a1,$Xhi,$Xhi
  462. stw $Xhi,`-$XOFF+8*($i%16)`(%sp)
  463. stw $Xlo,`-$XOFF+8*($i%16)+4`(%sp)
  464. ___
  465. &ROUND_00_15_pa1($i,@_,1);
  466. }
  467. $code.=<<___;
  468. ldw `0*4`($ctx),$Ahi ; load context
  469. ldw `1*4`($ctx),$Alo
  470. ldw `2*4`($ctx),$Bhi
  471. ldw `3*4`($ctx),$Blo
  472. ldw `4*4`($ctx),$Chi
  473. ldw `5*4`($ctx),$Clo
  474. ldw `6*4`($ctx),$Dhi
  475. ldw `7*4`($ctx),$Dlo
  476. ldw `8*4`($ctx),$Ehi
  477. ldw `9*4`($ctx),$Elo
  478. ldw `10*4`($ctx),$Fhi
  479. ldw `11*4`($ctx),$Flo
  480. ldw `12*4`($ctx),$Ghi
  481. ldw `13*4`($ctx),$Glo
  482. ldw `14*4`($ctx),$Hhi
  483. ldw `15*4`($ctx),$Hlo
  484. extru $inp,31,2,$t0
  485. sh3addl $t0,%r0,$t0
  486. subi 32,$t0,$t0
  487. mtctl $t0,%cr11 ; load %sar with align factor
  488. L\$oop_pa1
  489. extru $inp,31,2,$a3
  490. comib,= 0,$a3,L\$aligned_pa1
  491. sub $inp,$a3,$inp
  492. ldw `0*4`($inp),$X[0]
  493. ldw `1*4`($inp),$X[1]
  494. ldw `2*4`($inp),$t2
  495. ldw `3*4`($inp),$t3
  496. ldw `4*4`($inp),$a0
  497. ldw `5*4`($inp),$a1
  498. ldw `6*4`($inp),$a2
  499. ldw `7*4`($inp),$a3
  500. vshd $X[0],$X[1],$X[0]
  501. vshd $X[1],$t2,$X[1]
  502. stw $X[0],`-$XOFF+0*4`(%sp)
  503. ldw `8*4`($inp),$t0
  504. vshd $t2,$t3,$t2
  505. stw $X[1],`-$XOFF+1*4`(%sp)
  506. ldw `9*4`($inp),$t1
  507. vshd $t3,$a0,$t3
  508. ___
  509. {
  510. my @t=($t2,$t3,$a0,$a1,$a2,$a3,$t0,$t1);
  511. for ($i=2;$i<=(128/4-8);$i++) {
  512. $code.=<<___;
  513. stw $t[0],`-$XOFF+$i*4`(%sp)
  514. ldw `(8+$i)*4`($inp),$t[0]
  515. vshd $t[1],$t[2],$t[1]
  516. ___
  517. push(@t,shift(@t));
  518. }
  519. for (;$i<(128/4-1);$i++) {
  520. $code.=<<___;
  521. stw $t[0],`-$XOFF+$i*4`(%sp)
  522. vshd $t[1],$t[2],$t[1]
  523. ___
  524. push(@t,shift(@t));
  525. }
  526. $code.=<<___;
  527. b L\$collected_pa1
  528. stw $t[0],`-$XOFF+$i*4`(%sp)
  529. ___
  530. }
  531. $code.=<<___;
  532. L\$aligned_pa1
  533. ldw `0*4`($inp),$X[0]
  534. ldw `1*4`($inp),$X[1]
  535. ldw `2*4`($inp),$t2
  536. ldw `3*4`($inp),$t3
  537. ldw `4*4`($inp),$a0
  538. ldw `5*4`($inp),$a1
  539. ldw `6*4`($inp),$a2
  540. ldw `7*4`($inp),$a3
  541. stw $X[0],`-$XOFF+0*4`(%sp)
  542. ldw `8*4`($inp),$t0
  543. stw $X[1],`-$XOFF+1*4`(%sp)
  544. ldw `9*4`($inp),$t1
  545. ___
  546. {
  547. my @t=($t2,$t3,$a0,$a1,$a2,$a3,$t0,$t1);
  548. for ($i=2;$i<(128/4-8);$i++) {
  549. $code.=<<___;
  550. stw $t[0],`-$XOFF+$i*4`(%sp)
  551. ldw `(8+$i)*4`($inp),$t[0]
  552. ___
  553. push(@t,shift(@t));
  554. }
  555. for (;$i<128/4;$i++) {
  556. $code.=<<___;
  557. stw $t[0],`-$XOFF+$i*4`(%sp)
  558. ___
  559. push(@t,shift(@t));
  560. }
  561. $code.="L\$collected_pa1\n";
  562. }
  563. for($i=0;$i<16;$i++) { &ROUND_00_15_pa1($i,@V); unshift(@V,pop(@V)); unshift(@V,pop(@V)); }
  564. $code.="L\$rounds_pa1\n";
  565. for(;$i<32;$i++) { &ROUND_16_xx_pa1($i,@V); unshift(@V,pop(@V)); unshift(@V,pop(@V)); }
  566. $code.=<<___;
  567. $POP `-$FRAME_MARKER-2*$SIZE_T`(%sp),$ctx ; restore arguments
  568. $POP `-$FRAME_MARKER-3*$SIZE_T`(%sp),$inp
  569. $POP `-$FRAME_MARKER-4*$SIZE_T`(%sp),$num
  570. ldo `-$rounds*$SZ`($Tbl),$Tbl ; rewind $Tbl
  571. ldw `0*4`($ctx),$t1 ; update context
  572. ldw `1*4`($ctx),$t0
  573. ldw `2*4`($ctx),$t3
  574. ldw `3*4`($ctx),$t2
  575. ldw `4*4`($ctx),$a1
  576. ldw `5*4`($ctx),$a0
  577. ldw `6*4`($ctx),$a3
  578. add $t0,$Alo,$Alo
  579. ldw `7*4`($ctx),$a2
  580. addc $t1,$Ahi,$Ahi
  581. ldw `8*4`($ctx),$t1
  582. add $t2,$Blo,$Blo
  583. ldw `9*4`($ctx),$t0
  584. addc $t3,$Bhi,$Bhi
  585. ldw `10*4`($ctx),$t3
  586. add $a0,$Clo,$Clo
  587. ldw `11*4`($ctx),$t2
  588. addc $a1,$Chi,$Chi
  589. ldw `12*4`($ctx),$a1
  590. add $a2,$Dlo,$Dlo
  591. ldw `13*4`($ctx),$a0
  592. addc $a3,$Dhi,$Dhi
  593. ldw `14*4`($ctx),$a3
  594. add $t0,$Elo,$Elo
  595. ldw `15*4`($ctx),$a2
  596. addc $t1,$Ehi,$Ehi
  597. stw $Ahi,`0*4`($ctx)
  598. add $t2,$Flo,$Flo
  599. stw $Alo,`1*4`($ctx)
  600. addc $t3,$Fhi,$Fhi
  601. stw $Bhi,`2*4`($ctx)
  602. add $a0,$Glo,$Glo
  603. stw $Blo,`3*4`($ctx)
  604. addc $a1,$Ghi,$Ghi
  605. stw $Chi,`4*4`($ctx)
  606. add $a2,$Hlo,$Hlo
  607. stw $Clo,`5*4`($ctx)
  608. addc $a3,$Hhi,$Hhi
  609. stw $Dhi,`6*4`($ctx)
  610. ldo `16*$SZ`($inp),$inp ; advance $inp
  611. stw $Dlo,`7*4`($ctx)
  612. stw $Ehi,`8*4`($ctx)
  613. stw $Elo,`9*4`($ctx)
  614. stw $Fhi,`10*4`($ctx)
  615. stw $Flo,`11*4`($ctx)
  616. stw $Ghi,`12*4`($ctx)
  617. stw $Glo,`13*4`($ctx)
  618. stw $Hhi,`14*4`($ctx)
  619. comb,= $inp,$num,L\$done
  620. stw $Hlo,`15*4`($ctx)
  621. b L\$oop_pa1
  622. $PUSH $inp,`-$FRAME_MARKER-3*$SIZE_T`(%sp) ; save $inp
  623. L\$done
  624. ___
  625. }}
  626. $code.=<<___;
  627. $POP `-$FRAME-$SAVED_RP`(%sp),%r2 ; standard epilogue
  628. $POP `-$FRAME+1*$SIZE_T`(%sp),%r4
  629. $POP `-$FRAME+2*$SIZE_T`(%sp),%r5
  630. $POP `-$FRAME+3*$SIZE_T`(%sp),%r6
  631. $POP `-$FRAME+4*$SIZE_T`(%sp),%r7
  632. $POP `-$FRAME+5*$SIZE_T`(%sp),%r8
  633. $POP `-$FRAME+6*$SIZE_T`(%sp),%r9
  634. $POP `-$FRAME+7*$SIZE_T`(%sp),%r10
  635. $POP `-$FRAME+8*$SIZE_T`(%sp),%r11
  636. $POP `-$FRAME+9*$SIZE_T`(%sp),%r12
  637. $POP `-$FRAME+10*$SIZE_T`(%sp),%r13
  638. $POP `-$FRAME+11*$SIZE_T`(%sp),%r14
  639. $POP `-$FRAME+12*$SIZE_T`(%sp),%r15
  640. $POP `-$FRAME+13*$SIZE_T`(%sp),%r16
  641. $POP `-$FRAME+14*$SIZE_T`(%sp),%r17
  642. $POP `-$FRAME+15*$SIZE_T`(%sp),%r18
  643. bv (%r2)
  644. .EXIT
  645. $POPMB -$FRAME(%sp),%r3
  646. .PROCEND
  647. .STRINGZ "SHA`64*$SZ` block transform for PA-RISC, CRYPTOGAMS by <appro\@openssl.org>"
  648. ___
  649. # Explicitly encode PA-RISC 2.0 instructions used in this module, so
  650. # that it can be compiled with .LEVEL 1.0. It should be noted that I
  651. # wouldn't have to do this, if GNU assembler understood .ALLOW 2.0
  652. # directive...
  653. my $ldd = sub {
  654. my ($mod,$args) = @_;
  655. my $orig = "ldd$mod\t$args";
  656. if ($args =~ /(\-?[0-9]+)\(%r([0-9]+)\),%r([0-9]+)/) # format 3 suffices
  657. { my $opcode=(0x14<<26)|($2<<21)|($3<<16)|(($1&0x1FF8)<<1)|(($1>>13)&1);
  658. $opcode|=(1<<3) if ($mod =~ /^,m/);
  659. $opcode|=(1<<2) if ($mod =~ /^,mb/);
  660. sprintf "\t.WORD\t0x%08x\t; %s",$opcode,$orig;
  661. }
  662. else { "\t".$orig; }
  663. };
  664. my $std = sub {
  665. my ($mod,$args) = @_;
  666. my $orig = "std$mod\t$args";
  667. if ($args =~ /%r([0-9]+),(\-?[0-9]+)\(%r([0-9]+)\)/) # format 3 suffices
  668. { my $opcode=(0x1c<<26)|($3<<21)|($1<<16)|(($2&0x1FF8)<<1)|(($2>>13)&1);
  669. sprintf "\t.WORD\t0x%08x\t; %s",$opcode,$orig;
  670. }
  671. else { "\t".$orig; }
  672. };
  673. my $extrd = sub {
  674. my ($mod,$args) = @_;
  675. my $orig = "extrd$mod\t$args";
  676. # I only have ",u" completer, it's implicitly encoded...
  677. if ($args =~ /%r([0-9]+),([0-9]+),([0-9]+),%r([0-9]+)/) # format 15
  678. { my $opcode=(0x36<<26)|($1<<21)|($4<<16);
  679. my $len=32-$3;
  680. $opcode |= (($2&0x20)<<6)|(($2&0x1f)<<5); # encode pos
  681. $opcode |= (($len&0x20)<<7)|($len&0x1f); # encode len
  682. sprintf "\t.WORD\t0x%08x\t; %s",$opcode,$orig;
  683. }
  684. elsif ($args =~ /%r([0-9]+),%sar,([0-9]+),%r([0-9]+)/) # format 12
  685. { my $opcode=(0x34<<26)|($1<<21)|($3<<16)|(2<<11)|(1<<9);
  686. my $len=32-$2;
  687. $opcode |= (($len&0x20)<<3)|($len&0x1f); # encode len
  688. $opcode |= (1<<13) if ($mod =~ /,\**=/);
  689. sprintf "\t.WORD\t0x%08x\t; %s",$opcode,$orig;
  690. }
  691. else { "\t".$orig; }
  692. };
  693. my $shrpd = sub {
  694. my ($mod,$args) = @_;
  695. my $orig = "shrpd$mod\t$args";
  696. if ($args =~ /%r([0-9]+),%r([0-9]+),([0-9]+),%r([0-9]+)/) # format 14
  697. { my $opcode=(0x34<<26)|($2<<21)|($1<<16)|(1<<10)|$4;
  698. my $cpos=63-$3;
  699. $opcode |= (($cpos&0x20)<<6)|(($cpos&0x1f)<<5); # encode sa
  700. sprintf "\t.WORD\t0x%08x\t; %s",$opcode,$orig;
  701. }
  702. elsif ($args =~ /%r([0-9]+),%r([0-9]+),%sar,%r([0-9]+)/) # format 11
  703. { sprintf "\t.WORD\t0x%08x\t; %s",
  704. (0x34<<26)|($2<<21)|($1<<16)|(1<<9)|$3,$orig;
  705. }
  706. else { "\t".$orig; }
  707. };
  708. sub assemble {
  709. my ($mnemonic,$mod,$args)=@_;
  710. my $opcode = eval("\$$mnemonic");
  711. ref($opcode) eq 'CODE' ? &$opcode($mod,$args) : "\t$mnemonic$mod\t$args";
  712. }
  713. if (`$ENV{CC} -Wa,-v -c -o /dev/null -x assembler /dev/null 2>&1`
  714. =~ /GNU assembler/) {
  715. $gnuas = 1;
  716. }
  717. foreach (split("\n",$code)) {
  718. s/\`([^\`]*)\`/eval $1/ge;
  719. s/shd\s+(%r[0-9]+),(%r[0-9]+),([0-9]+)/
  720. $3>31 ? sprintf("shd\t%$2,%$1,%d",$3-32) # rotation for >=32
  721. : sprintf("shd\t%$1,%$2,%d",$3)/e or
  722. # translate made up instructions: _ror, _shr, _align, _shl
  723. s/_ror(\s+)(%r[0-9]+),/
  724. ($SZ==4 ? "shd" : "shrpd")."$1$2,$2,"/e or
  725. s/_shr(\s+%r[0-9]+),([0-9]+),/
  726. $SZ==4 ? sprintf("extru%s,%d,%d,",$1,31-$2,32-$2)
  727. : sprintf("extrd,u%s,%d,%d,",$1,63-$2,64-$2)/e or
  728. s/_align(\s+%r[0-9]+,%r[0-9]+),/
  729. ($SZ==4 ? "vshd$1," : "shrpd$1,%sar,")/e or
  730. s/_shl(\s+%r[0-9]+),([0-9]+),/
  731. $SIZE_T==4 ? sprintf("zdep%s,%d,%d,",$1,31-$2,32-$2)
  732. : sprintf("depd,z%s,%d,%d,",$1,63-$2,64-$2)/e;
  733. s/^\s+([a-z]+)([\S]*)\s+([\S]*)/&assemble($1,$2,$3)/e if ($SIZE_T==4);
  734. s/(\.LEVEL\s+2\.0)W/$1w/ if ($gnuas && $SIZE_T==8);
  735. s/\.SPACE\s+\$TEXT\$/.text/ if ($gnuas && $SIZE_T==8);
  736. s/\.SUBSPA.*// if ($gnuas && $SIZE_T==8);
  737. s/cmpb,\*/comb,/ if ($SIZE_T==4);
  738. s/\bbv\b/bve/ if ($SIZE_T==8);
  739. print $_,"\n";
  740. }
  741. close STDOUT or die "error closing STDOUT: $!";