sha512-parisc.pl 21 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807
  1. #! /usr/bin/env perl
  2. # Copyright 2009-2018 The OpenSSL Project Authors. All Rights Reserved.
  3. #
  4. # Licensed under the OpenSSL license (the "License"). You may not use
  5. # this file except in compliance with the License. You can obtain a copy
  6. # in the file LICENSE in the source distribution or at
  7. # https://www.openssl.org/source/license.html
  8. # ====================================================================
  9. # Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
  10. # project. The module is, however, dual licensed under OpenSSL and
  11. # CRYPTOGAMS licenses depending on where you obtain it. For further
  12. # details see http://www.openssl.org/~appro/cryptogams/.
  13. # ====================================================================
  14. # SHA256/512 block procedure for PA-RISC.
  15. # June 2009.
  16. #
  17. # SHA256 performance is >75% better than gcc 3.2 generated code on
  18. # PA-7100LC. Compared to code generated by vendor compiler this
  19. # implementation is almost 70% faster in 64-bit build, but delivers
  20. # virtually same performance in 32-bit build on PA-8600.
  21. #
  22. # SHA512 performance is >2.9x better than gcc 3.2 generated code on
  23. # PA-7100LC, PA-RISC 1.1 processor. Then implementation detects if the
  24. # code is executed on PA-RISC 2.0 processor and switches to 64-bit
  25. # code path delivering adequate performance even in "blended" 32-bit
  26. # build. Though 64-bit code is not any faster than code generated by
  27. # vendor compiler on PA-8600...
  28. #
  29. # Special thanks to polarhome.com for providing HP-UX account.
  30. $flavour = shift;
  31. $output = shift;
  32. open STDOUT,">$output";
  33. if ($flavour =~ /64/) {
  34. $LEVEL ="2.0W";
  35. $SIZE_T =8;
  36. $FRAME_MARKER =80;
  37. $SAVED_RP =16;
  38. $PUSH ="std";
  39. $PUSHMA ="std,ma";
  40. $POP ="ldd";
  41. $POPMB ="ldd,mb";
  42. } else {
  43. $LEVEL ="1.0";
  44. $SIZE_T =4;
  45. $FRAME_MARKER =48;
  46. $SAVED_RP =20;
  47. $PUSH ="stw";
  48. $PUSHMA ="stwm";
  49. $POP ="ldw";
  50. $POPMB ="ldwm";
  51. }
  52. if ($output =~ /512/) {
  53. $func="sha512_block_data_order";
  54. $SZ=8;
  55. @Sigma0=(28,34,39);
  56. @Sigma1=(14,18,41);
  57. @sigma0=(1, 8, 7);
  58. @sigma1=(19,61, 6);
  59. $rounds=80;
  60. $LAST10BITS=0x017;
  61. $LD="ldd";
  62. $LDM="ldd,ma";
  63. $ST="std";
  64. } else {
  65. $func="sha256_block_data_order";
  66. $SZ=4;
  67. @Sigma0=( 2,13,22);
  68. @Sigma1=( 6,11,25);
  69. @sigma0=( 7,18, 3);
  70. @sigma1=(17,19,10);
  71. $rounds=64;
  72. $LAST10BITS=0x0f2;
  73. $LD="ldw";
  74. $LDM="ldwm";
  75. $ST="stw";
  76. }
  77. $FRAME=16*$SIZE_T+$FRAME_MARKER;# 16 saved regs + frame marker
  78. # [+ argument transfer]
  79. $XOFF=16*$SZ+32; # local variables
  80. $FRAME+=$XOFF;
  81. $XOFF+=$FRAME_MARKER; # distance between %sp and local variables
  82. $ctx="%r26"; # zapped by $a0
  83. $inp="%r25"; # zapped by $a1
  84. $num="%r24"; # zapped by $t0
  85. $a0 ="%r26";
  86. $a1 ="%r25";
  87. $t0 ="%r24";
  88. $t1 ="%r29";
  89. $Tbl="%r31";
  90. @V=($A,$B,$C,$D,$E,$F,$G,$H)=("%r17","%r18","%r19","%r20","%r21","%r22","%r23","%r28");
  91. @X=("%r1", "%r2", "%r3", "%r4", "%r5", "%r6", "%r7", "%r8",
  92. "%r9", "%r10","%r11","%r12","%r13","%r14","%r15","%r16",$inp);
  93. sub ROUND_00_15 {
  94. my ($i,$a,$b,$c,$d,$e,$f,$g,$h)=@_;
  95. $code.=<<___;
  96. _ror $e,$Sigma1[0],$a0
  97. and $f,$e,$t0
  98. _ror $e,$Sigma1[1],$a1
  99. addl $t1,$h,$h
  100. andcm $g,$e,$t1
  101. xor $a1,$a0,$a0
  102. _ror $a1,`$Sigma1[2]-$Sigma1[1]`,$a1
  103. or $t0,$t1,$t1 ; Ch(e,f,g)
  104. addl @X[$i%16],$h,$h
  105. xor $a0,$a1,$a1 ; Sigma1(e)
  106. addl $t1,$h,$h
  107. _ror $a,$Sigma0[0],$a0
  108. addl $a1,$h,$h
  109. _ror $a,$Sigma0[1],$a1
  110. and $a,$b,$t0
  111. and $a,$c,$t1
  112. xor $a1,$a0,$a0
  113. _ror $a1,`$Sigma0[2]-$Sigma0[1]`,$a1
  114. xor $t1,$t0,$t0
  115. and $b,$c,$t1
  116. xor $a0,$a1,$a1 ; Sigma0(a)
  117. addl $h,$d,$d
  118. xor $t1,$t0,$t0 ; Maj(a,b,c)
  119. `"$LDM $SZ($Tbl),$t1" if ($i<15)`
  120. addl $a1,$h,$h
  121. addl $t0,$h,$h
  122. ___
  123. }
  124. sub ROUND_16_xx {
  125. my ($i,$a,$b,$c,$d,$e,$f,$g,$h)=@_;
  126. $i-=16;
  127. $code.=<<___;
  128. _ror @X[($i+1)%16],$sigma0[0],$a0
  129. _ror @X[($i+1)%16],$sigma0[1],$a1
  130. addl @X[($i+9)%16],@X[$i],@X[$i]
  131. _ror @X[($i+14)%16],$sigma1[0],$t0
  132. _ror @X[($i+14)%16],$sigma1[1],$t1
  133. xor $a1,$a0,$a0
  134. _shr @X[($i+1)%16],$sigma0[2],$a1
  135. xor $t1,$t0,$t0
  136. _shr @X[($i+14)%16],$sigma1[2],$t1
  137. xor $a1,$a0,$a0 ; sigma0(X[(i+1)&0x0f])
  138. xor $t1,$t0,$t0 ; sigma1(X[(i+14)&0x0f])
  139. $LDM $SZ($Tbl),$t1
  140. addl $a0,@X[$i],@X[$i]
  141. addl $t0,@X[$i],@X[$i]
  142. ___
  143. $code.=<<___ if ($i==15);
  144. extru $t1,31,10,$a1
  145. comiclr,<> $LAST10BITS,$a1,%r0
  146. ldo 1($Tbl),$Tbl ; signal end of $Tbl
  147. ___
  148. &ROUND_00_15($i+16,$a,$b,$c,$d,$e,$f,$g,$h);
  149. }
  150. $code=<<___;
  151. .LEVEL $LEVEL
  152. .SPACE \$TEXT\$
  153. .SUBSPA \$CODE\$,QUAD=0,ALIGN=8,ACCESS=0x2C,CODE_ONLY
  154. .ALIGN 64
  155. L\$table
  156. ___
  157. $code.=<<___ if ($SZ==8);
  158. .WORD 0x428a2f98,0xd728ae22,0x71374491,0x23ef65cd
  159. .WORD 0xb5c0fbcf,0xec4d3b2f,0xe9b5dba5,0x8189dbbc
  160. .WORD 0x3956c25b,0xf348b538,0x59f111f1,0xb605d019
  161. .WORD 0x923f82a4,0xaf194f9b,0xab1c5ed5,0xda6d8118
  162. .WORD 0xd807aa98,0xa3030242,0x12835b01,0x45706fbe
  163. .WORD 0x243185be,0x4ee4b28c,0x550c7dc3,0xd5ffb4e2
  164. .WORD 0x72be5d74,0xf27b896f,0x80deb1fe,0x3b1696b1
  165. .WORD 0x9bdc06a7,0x25c71235,0xc19bf174,0xcf692694
  166. .WORD 0xe49b69c1,0x9ef14ad2,0xefbe4786,0x384f25e3
  167. .WORD 0x0fc19dc6,0x8b8cd5b5,0x240ca1cc,0x77ac9c65
  168. .WORD 0x2de92c6f,0x592b0275,0x4a7484aa,0x6ea6e483
  169. .WORD 0x5cb0a9dc,0xbd41fbd4,0x76f988da,0x831153b5
  170. .WORD 0x983e5152,0xee66dfab,0xa831c66d,0x2db43210
  171. .WORD 0xb00327c8,0x98fb213f,0xbf597fc7,0xbeef0ee4
  172. .WORD 0xc6e00bf3,0x3da88fc2,0xd5a79147,0x930aa725
  173. .WORD 0x06ca6351,0xe003826f,0x14292967,0x0a0e6e70
  174. .WORD 0x27b70a85,0x46d22ffc,0x2e1b2138,0x5c26c926
  175. .WORD 0x4d2c6dfc,0x5ac42aed,0x53380d13,0x9d95b3df
  176. .WORD 0x650a7354,0x8baf63de,0x766a0abb,0x3c77b2a8
  177. .WORD 0x81c2c92e,0x47edaee6,0x92722c85,0x1482353b
  178. .WORD 0xa2bfe8a1,0x4cf10364,0xa81a664b,0xbc423001
  179. .WORD 0xc24b8b70,0xd0f89791,0xc76c51a3,0x0654be30
  180. .WORD 0xd192e819,0xd6ef5218,0xd6990624,0x5565a910
  181. .WORD 0xf40e3585,0x5771202a,0x106aa070,0x32bbd1b8
  182. .WORD 0x19a4c116,0xb8d2d0c8,0x1e376c08,0x5141ab53
  183. .WORD 0x2748774c,0xdf8eeb99,0x34b0bcb5,0xe19b48a8
  184. .WORD 0x391c0cb3,0xc5c95a63,0x4ed8aa4a,0xe3418acb
  185. .WORD 0x5b9cca4f,0x7763e373,0x682e6ff3,0xd6b2b8a3
  186. .WORD 0x748f82ee,0x5defb2fc,0x78a5636f,0x43172f60
  187. .WORD 0x84c87814,0xa1f0ab72,0x8cc70208,0x1a6439ec
  188. .WORD 0x90befffa,0x23631e28,0xa4506ceb,0xde82bde9
  189. .WORD 0xbef9a3f7,0xb2c67915,0xc67178f2,0xe372532b
  190. .WORD 0xca273ece,0xea26619c,0xd186b8c7,0x21c0c207
  191. .WORD 0xeada7dd6,0xcde0eb1e,0xf57d4f7f,0xee6ed178
  192. .WORD 0x06f067aa,0x72176fba,0x0a637dc5,0xa2c898a6
  193. .WORD 0x113f9804,0xbef90dae,0x1b710b35,0x131c471b
  194. .WORD 0x28db77f5,0x23047d84,0x32caab7b,0x40c72493
  195. .WORD 0x3c9ebe0a,0x15c9bebc,0x431d67c4,0x9c100d4c
  196. .WORD 0x4cc5d4be,0xcb3e42b6,0x597f299c,0xfc657e2a
  197. .WORD 0x5fcb6fab,0x3ad6faec,0x6c44198c,0x4a475817
  198. ___
  199. $code.=<<___ if ($SZ==4);
  200. .WORD 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5
  201. .WORD 0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5
  202. .WORD 0xd807aa98,0x12835b01,0x243185be,0x550c7dc3
  203. .WORD 0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174
  204. .WORD 0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc
  205. .WORD 0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da
  206. .WORD 0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7
  207. .WORD 0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967
  208. .WORD 0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13
  209. .WORD 0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85
  210. .WORD 0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3
  211. .WORD 0xd192e819,0xd6990624,0xf40e3585,0x106aa070
  212. .WORD 0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5
  213. .WORD 0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3
  214. .WORD 0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208
  215. .WORD 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2
  216. ___
  217. $code.=<<___;
  218. .EXPORT $func,ENTRY,ARGW0=GR,ARGW1=GR,ARGW2=GR
  219. .ALIGN 64
  220. $func
  221. .PROC
  222. .CALLINFO FRAME=`$FRAME-16*$SIZE_T`,NO_CALLS,SAVE_RP,ENTRY_GR=18
  223. .ENTRY
  224. $PUSH %r2,-$SAVED_RP(%sp) ; standard prologue
  225. $PUSHMA %r3,$FRAME(%sp)
  226. $PUSH %r4,`-$FRAME+1*$SIZE_T`(%sp)
  227. $PUSH %r5,`-$FRAME+2*$SIZE_T`(%sp)
  228. $PUSH %r6,`-$FRAME+3*$SIZE_T`(%sp)
  229. $PUSH %r7,`-$FRAME+4*$SIZE_T`(%sp)
  230. $PUSH %r8,`-$FRAME+5*$SIZE_T`(%sp)
  231. $PUSH %r9,`-$FRAME+6*$SIZE_T`(%sp)
  232. $PUSH %r10,`-$FRAME+7*$SIZE_T`(%sp)
  233. $PUSH %r11,`-$FRAME+8*$SIZE_T`(%sp)
  234. $PUSH %r12,`-$FRAME+9*$SIZE_T`(%sp)
  235. $PUSH %r13,`-$FRAME+10*$SIZE_T`(%sp)
  236. $PUSH %r14,`-$FRAME+11*$SIZE_T`(%sp)
  237. $PUSH %r15,`-$FRAME+12*$SIZE_T`(%sp)
  238. $PUSH %r16,`-$FRAME+13*$SIZE_T`(%sp)
  239. $PUSH %r17,`-$FRAME+14*$SIZE_T`(%sp)
  240. $PUSH %r18,`-$FRAME+15*$SIZE_T`(%sp)
  241. _shl $num,`log(16*$SZ)/log(2)`,$num
  242. addl $inp,$num,$num ; $num to point at the end of $inp
  243. $PUSH $num,`-$FRAME_MARKER-4*$SIZE_T`(%sp) ; save arguments
  244. $PUSH $inp,`-$FRAME_MARKER-3*$SIZE_T`(%sp)
  245. $PUSH $ctx,`-$FRAME_MARKER-2*$SIZE_T`(%sp)
  246. blr %r0,$Tbl
  247. ldi 3,$t1
  248. L\$pic
  249. andcm $Tbl,$t1,$Tbl ; wipe privilege level
  250. ldo L\$table-L\$pic($Tbl),$Tbl
  251. ___
  252. $code.=<<___ if ($SZ==8 && $SIZE_T==4);
  253. ldi 31,$t1
  254. mtctl $t1,%cr11
  255. extrd,u,*= $t1,%sar,1,$t1 ; executes on PA-RISC 1.0
  256. b L\$parisc1
  257. nop
  258. ___
  259. $code.=<<___;
  260. $LD `0*$SZ`($ctx),$A ; load context
  261. $LD `1*$SZ`($ctx),$B
  262. $LD `2*$SZ`($ctx),$C
  263. $LD `3*$SZ`($ctx),$D
  264. $LD `4*$SZ`($ctx),$E
  265. $LD `5*$SZ`($ctx),$F
  266. $LD `6*$SZ`($ctx),$G
  267. $LD `7*$SZ`($ctx),$H
  268. extru $inp,31,`log($SZ)/log(2)`,$t0
  269. sh3addl $t0,%r0,$t0
  270. subi `8*$SZ`,$t0,$t0
  271. mtctl $t0,%cr11 ; load %sar with align factor
  272. L\$oop
  273. ldi `$SZ-1`,$t0
  274. $LDM $SZ($Tbl),$t1
  275. andcm $inp,$t0,$t0 ; align $inp
  276. ___
  277. for ($i=0;$i<15;$i++) { # load input block
  278. $code.="\t$LD `$SZ*$i`($t0),@X[$i]\n"; }
  279. $code.=<<___;
  280. cmpb,*= $inp,$t0,L\$aligned
  281. $LD `$SZ*15`($t0),@X[15]
  282. $LD `$SZ*16`($t0),@X[16]
  283. ___
  284. for ($i=0;$i<16;$i++) { # align data
  285. $code.="\t_align @X[$i],@X[$i+1],@X[$i]\n"; }
  286. $code.=<<___;
  287. L\$aligned
  288. nop ; otherwise /usr/ccs/bin/as is confused by below .WORD
  289. ___
  290. for($i=0;$i<16;$i++) { &ROUND_00_15($i,@V); unshift(@V,pop(@V)); }
  291. $code.=<<___;
  292. L\$rounds
  293. nop ; otherwise /usr/ccs/bin/as is confused by below .WORD
  294. ___
  295. for(;$i<32;$i++) { &ROUND_16_xx($i,@V); unshift(@V,pop(@V)); }
  296. $code.=<<___;
  297. bb,>= $Tbl,31,L\$rounds ; end of $Tbl signalled?
  298. nop
  299. $POP `-$FRAME_MARKER-2*$SIZE_T`(%sp),$ctx ; restore arguments
  300. $POP `-$FRAME_MARKER-3*$SIZE_T`(%sp),$inp
  301. $POP `-$FRAME_MARKER-4*$SIZE_T`(%sp),$num
  302. ldo `-$rounds*$SZ-1`($Tbl),$Tbl ; rewind $Tbl
  303. $LD `0*$SZ`($ctx),@X[0] ; load context
  304. $LD `1*$SZ`($ctx),@X[1]
  305. $LD `2*$SZ`($ctx),@X[2]
  306. $LD `3*$SZ`($ctx),@X[3]
  307. $LD `4*$SZ`($ctx),@X[4]
  308. $LD `5*$SZ`($ctx),@X[5]
  309. addl @X[0],$A,$A
  310. $LD `6*$SZ`($ctx),@X[6]
  311. addl @X[1],$B,$B
  312. $LD `7*$SZ`($ctx),@X[7]
  313. ldo `16*$SZ`($inp),$inp ; advance $inp
  314. $ST $A,`0*$SZ`($ctx) ; save context
  315. addl @X[2],$C,$C
  316. $ST $B,`1*$SZ`($ctx)
  317. addl @X[3],$D,$D
  318. $ST $C,`2*$SZ`($ctx)
  319. addl @X[4],$E,$E
  320. $ST $D,`3*$SZ`($ctx)
  321. addl @X[5],$F,$F
  322. $ST $E,`4*$SZ`($ctx)
  323. addl @X[6],$G,$G
  324. $ST $F,`5*$SZ`($ctx)
  325. addl @X[7],$H,$H
  326. $ST $G,`6*$SZ`($ctx)
  327. $ST $H,`7*$SZ`($ctx)
  328. cmpb,*<>,n $inp,$num,L\$oop
  329. $PUSH $inp,`-$FRAME_MARKER-3*$SIZE_T`(%sp) ; save $inp
  330. ___
  331. if ($SZ==8 && $SIZE_T==4) # SHA512 for 32-bit PA-RISC 1.0
  332. {{
  333. $code.=<<___;
  334. b L\$done
  335. nop
  336. .ALIGN 64
  337. L\$parisc1
  338. ___
  339. @V=( $Ahi, $Alo, $Bhi, $Blo, $Chi, $Clo, $Dhi, $Dlo,
  340. $Ehi, $Elo, $Fhi, $Flo, $Ghi, $Glo, $Hhi, $Hlo) =
  341. ( "%r1", "%r2", "%r3", "%r4", "%r5", "%r6", "%r7", "%r8",
  342. "%r9","%r10","%r11","%r12","%r13","%r14","%r15","%r16");
  343. $a0 ="%r17";
  344. $a1 ="%r18";
  345. $a2 ="%r19";
  346. $a3 ="%r20";
  347. $t0 ="%r21";
  348. $t1 ="%r22";
  349. $t2 ="%r28";
  350. $t3 ="%r29";
  351. $Tbl="%r31";
  352. @X=("%r23","%r24","%r25","%r26"); # zaps $num,$inp,$ctx
  353. sub ROUND_00_15_pa1 {
  354. my ($i,$ahi,$alo,$bhi,$blo,$chi,$clo,$dhi,$dlo,
  355. $ehi,$elo,$fhi,$flo,$ghi,$glo,$hhi,$hlo,$flag)=@_;
  356. my ($Xhi,$Xlo,$Xnhi,$Xnlo) = @X;
  357. $code.=<<___ if (!$flag);
  358. ldw `-$XOFF+8*(($i+1)%16)`(%sp),$Xnhi
  359. ldw `-$XOFF+8*(($i+1)%16)+4`(%sp),$Xnlo ; load X[i+1]
  360. ___
  361. $code.=<<___;
  362. shd $ehi,$elo,$Sigma1[0],$t0
  363. add $Xlo,$hlo,$hlo
  364. shd $elo,$ehi,$Sigma1[0],$t1
  365. addc $Xhi,$hhi,$hhi ; h += X[i]
  366. shd $ehi,$elo,$Sigma1[1],$t2
  367. ldwm 8($Tbl),$Xhi
  368. shd $elo,$ehi,$Sigma1[1],$t3
  369. ldw -4($Tbl),$Xlo ; load K[i]
  370. xor $t2,$t0,$t0
  371. xor $t3,$t1,$t1
  372. and $flo,$elo,$a0
  373. and $fhi,$ehi,$a1
  374. shd $ehi,$elo,$Sigma1[2],$t2
  375. andcm $glo,$elo,$a2
  376. shd $elo,$ehi,$Sigma1[2],$t3
  377. andcm $ghi,$ehi,$a3
  378. xor $t2,$t0,$t0
  379. xor $t3,$t1,$t1 ; Sigma1(e)
  380. add $Xlo,$hlo,$hlo
  381. xor $a2,$a0,$a0
  382. addc $Xhi,$hhi,$hhi ; h += K[i]
  383. xor $a3,$a1,$a1 ; Ch(e,f,g)
  384. add $t0,$hlo,$hlo
  385. shd $ahi,$alo,$Sigma0[0],$t0
  386. addc $t1,$hhi,$hhi ; h += Sigma1(e)
  387. shd $alo,$ahi,$Sigma0[0],$t1
  388. add $a0,$hlo,$hlo
  389. shd $ahi,$alo,$Sigma0[1],$t2
  390. addc $a1,$hhi,$hhi ; h += Ch(e,f,g)
  391. shd $alo,$ahi,$Sigma0[1],$t3
  392. xor $t2,$t0,$t0
  393. xor $t3,$t1,$t1
  394. shd $ahi,$alo,$Sigma0[2],$t2
  395. and $alo,$blo,$a0
  396. shd $alo,$ahi,$Sigma0[2],$t3
  397. and $ahi,$bhi,$a1
  398. xor $t2,$t0,$t0
  399. xor $t3,$t1,$t1 ; Sigma0(a)
  400. and $alo,$clo,$a2
  401. and $ahi,$chi,$a3
  402. xor $a2,$a0,$a0
  403. add $hlo,$dlo,$dlo
  404. xor $a3,$a1,$a1
  405. addc $hhi,$dhi,$dhi ; d += h
  406. and $blo,$clo,$a2
  407. add $t0,$hlo,$hlo
  408. and $bhi,$chi,$a3
  409. addc $t1,$hhi,$hhi ; h += Sigma0(a)
  410. xor $a2,$a0,$a0
  411. add $a0,$hlo,$hlo
  412. xor $a3,$a1,$a1 ; Maj(a,b,c)
  413. addc $a1,$hhi,$hhi ; h += Maj(a,b,c)
  414. ___
  415. $code.=<<___ if ($i==15 && $flag);
  416. extru $Xlo,31,10,$Xlo
  417. comiclr,= $LAST10BITS,$Xlo,%r0
  418. b L\$rounds_pa1
  419. nop
  420. ___
  421. push(@X,shift(@X)); push(@X,shift(@X));
  422. }
  423. sub ROUND_16_xx_pa1 {
  424. my ($Xhi,$Xlo,$Xnhi,$Xnlo) = @X;
  425. my ($i)=shift;
  426. $i-=16;
  427. $code.=<<___;
  428. ldw `-$XOFF+8*(($i+1)%16)`(%sp),$Xnhi
  429. ldw `-$XOFF+8*(($i+1)%16)+4`(%sp),$Xnlo ; load X[i+1]
  430. ldw `-$XOFF+8*(($i+9)%16)`(%sp),$a1
  431. ldw `-$XOFF+8*(($i+9)%16)+4`(%sp),$a0 ; load X[i+9]
  432. ldw `-$XOFF+8*(($i+14)%16)`(%sp),$a3
  433. ldw `-$XOFF+8*(($i+14)%16)+4`(%sp),$a2 ; load X[i+14]
  434. shd $Xnhi,$Xnlo,$sigma0[0],$t0
  435. shd $Xnlo,$Xnhi,$sigma0[0],$t1
  436. add $a0,$Xlo,$Xlo
  437. shd $Xnhi,$Xnlo,$sigma0[1],$t2
  438. addc $a1,$Xhi,$Xhi
  439. shd $Xnlo,$Xnhi,$sigma0[1],$t3
  440. xor $t2,$t0,$t0
  441. shd $Xnhi,$Xnlo,$sigma0[2],$t2
  442. xor $t3,$t1,$t1
  443. extru $Xnhi,`31-$sigma0[2]`,`32-$sigma0[2]`,$t3
  444. xor $t2,$t0,$t0
  445. shd $a3,$a2,$sigma1[0],$a0
  446. xor $t3,$t1,$t1 ; sigma0(X[i+1)&0x0f])
  447. shd $a2,$a3,$sigma1[0],$a1
  448. add $t0,$Xlo,$Xlo
  449. shd $a3,$a2,$sigma1[1],$t2
  450. addc $t1,$Xhi,$Xhi
  451. shd $a2,$a3,$sigma1[1],$t3
  452. xor $t2,$a0,$a0
  453. shd $a3,$a2,$sigma1[2],$t2
  454. xor $t3,$a1,$a1
  455. extru $a3,`31-$sigma1[2]`,`32-$sigma1[2]`,$t3
  456. xor $t2,$a0,$a0
  457. xor $t3,$a1,$a1 ; sigma0(X[i+14)&0x0f])
  458. add $a0,$Xlo,$Xlo
  459. addc $a1,$Xhi,$Xhi
  460. stw $Xhi,`-$XOFF+8*($i%16)`(%sp)
  461. stw $Xlo,`-$XOFF+8*($i%16)+4`(%sp)
  462. ___
  463. &ROUND_00_15_pa1($i,@_,1);
  464. }
  465. $code.=<<___;
  466. ldw `0*4`($ctx),$Ahi ; load context
  467. ldw `1*4`($ctx),$Alo
  468. ldw `2*4`($ctx),$Bhi
  469. ldw `3*4`($ctx),$Blo
  470. ldw `4*4`($ctx),$Chi
  471. ldw `5*4`($ctx),$Clo
  472. ldw `6*4`($ctx),$Dhi
  473. ldw `7*4`($ctx),$Dlo
  474. ldw `8*4`($ctx),$Ehi
  475. ldw `9*4`($ctx),$Elo
  476. ldw `10*4`($ctx),$Fhi
  477. ldw `11*4`($ctx),$Flo
  478. ldw `12*4`($ctx),$Ghi
  479. ldw `13*4`($ctx),$Glo
  480. ldw `14*4`($ctx),$Hhi
  481. ldw `15*4`($ctx),$Hlo
  482. extru $inp,31,2,$t0
  483. sh3addl $t0,%r0,$t0
  484. subi 32,$t0,$t0
  485. mtctl $t0,%cr11 ; load %sar with align factor
  486. L\$oop_pa1
  487. extru $inp,31,2,$a3
  488. comib,= 0,$a3,L\$aligned_pa1
  489. sub $inp,$a3,$inp
  490. ldw `0*4`($inp),$X[0]
  491. ldw `1*4`($inp),$X[1]
  492. ldw `2*4`($inp),$t2
  493. ldw `3*4`($inp),$t3
  494. ldw `4*4`($inp),$a0
  495. ldw `5*4`($inp),$a1
  496. ldw `6*4`($inp),$a2
  497. ldw `7*4`($inp),$a3
  498. vshd $X[0],$X[1],$X[0]
  499. vshd $X[1],$t2,$X[1]
  500. stw $X[0],`-$XOFF+0*4`(%sp)
  501. ldw `8*4`($inp),$t0
  502. vshd $t2,$t3,$t2
  503. stw $X[1],`-$XOFF+1*4`(%sp)
  504. ldw `9*4`($inp),$t1
  505. vshd $t3,$a0,$t3
  506. ___
  507. {
  508. my @t=($t2,$t3,$a0,$a1,$a2,$a3,$t0,$t1);
  509. for ($i=2;$i<=(128/4-8);$i++) {
  510. $code.=<<___;
  511. stw $t[0],`-$XOFF+$i*4`(%sp)
  512. ldw `(8+$i)*4`($inp),$t[0]
  513. vshd $t[1],$t[2],$t[1]
  514. ___
  515. push(@t,shift(@t));
  516. }
  517. for (;$i<(128/4-1);$i++) {
  518. $code.=<<___;
  519. stw $t[0],`-$XOFF+$i*4`(%sp)
  520. vshd $t[1],$t[2],$t[1]
  521. ___
  522. push(@t,shift(@t));
  523. }
  524. $code.=<<___;
  525. b L\$collected_pa1
  526. stw $t[0],`-$XOFF+$i*4`(%sp)
  527. ___
  528. }
  529. $code.=<<___;
  530. L\$aligned_pa1
  531. ldw `0*4`($inp),$X[0]
  532. ldw `1*4`($inp),$X[1]
  533. ldw `2*4`($inp),$t2
  534. ldw `3*4`($inp),$t3
  535. ldw `4*4`($inp),$a0
  536. ldw `5*4`($inp),$a1
  537. ldw `6*4`($inp),$a2
  538. ldw `7*4`($inp),$a3
  539. stw $X[0],`-$XOFF+0*4`(%sp)
  540. ldw `8*4`($inp),$t0
  541. stw $X[1],`-$XOFF+1*4`(%sp)
  542. ldw `9*4`($inp),$t1
  543. ___
  544. {
  545. my @t=($t2,$t3,$a0,$a1,$a2,$a3,$t0,$t1);
  546. for ($i=2;$i<(128/4-8);$i++) {
  547. $code.=<<___;
  548. stw $t[0],`-$XOFF+$i*4`(%sp)
  549. ldw `(8+$i)*4`($inp),$t[0]
  550. ___
  551. push(@t,shift(@t));
  552. }
  553. for (;$i<128/4;$i++) {
  554. $code.=<<___;
  555. stw $t[0],`-$XOFF+$i*4`(%sp)
  556. ___
  557. push(@t,shift(@t));
  558. }
  559. $code.="L\$collected_pa1\n";
  560. }
  561. for($i=0;$i<16;$i++) { &ROUND_00_15_pa1($i,@V); unshift(@V,pop(@V)); unshift(@V,pop(@V)); }
  562. $code.="L\$rounds_pa1\n";
  563. for(;$i<32;$i++) { &ROUND_16_xx_pa1($i,@V); unshift(@V,pop(@V)); unshift(@V,pop(@V)); }
  564. $code.=<<___;
  565. $POP `-$FRAME_MARKER-2*$SIZE_T`(%sp),$ctx ; restore arguments
  566. $POP `-$FRAME_MARKER-3*$SIZE_T`(%sp),$inp
  567. $POP `-$FRAME_MARKER-4*$SIZE_T`(%sp),$num
  568. ldo `-$rounds*$SZ`($Tbl),$Tbl ; rewind $Tbl
  569. ldw `0*4`($ctx),$t1 ; update context
  570. ldw `1*4`($ctx),$t0
  571. ldw `2*4`($ctx),$t3
  572. ldw `3*4`($ctx),$t2
  573. ldw `4*4`($ctx),$a1
  574. ldw `5*4`($ctx),$a0
  575. ldw `6*4`($ctx),$a3
  576. add $t0,$Alo,$Alo
  577. ldw `7*4`($ctx),$a2
  578. addc $t1,$Ahi,$Ahi
  579. ldw `8*4`($ctx),$t1
  580. add $t2,$Blo,$Blo
  581. ldw `9*4`($ctx),$t0
  582. addc $t3,$Bhi,$Bhi
  583. ldw `10*4`($ctx),$t3
  584. add $a0,$Clo,$Clo
  585. ldw `11*4`($ctx),$t2
  586. addc $a1,$Chi,$Chi
  587. ldw `12*4`($ctx),$a1
  588. add $a2,$Dlo,$Dlo
  589. ldw `13*4`($ctx),$a0
  590. addc $a3,$Dhi,$Dhi
  591. ldw `14*4`($ctx),$a3
  592. add $t0,$Elo,$Elo
  593. ldw `15*4`($ctx),$a2
  594. addc $t1,$Ehi,$Ehi
  595. stw $Ahi,`0*4`($ctx)
  596. add $t2,$Flo,$Flo
  597. stw $Alo,`1*4`($ctx)
  598. addc $t3,$Fhi,$Fhi
  599. stw $Bhi,`2*4`($ctx)
  600. add $a0,$Glo,$Glo
  601. stw $Blo,`3*4`($ctx)
  602. addc $a1,$Ghi,$Ghi
  603. stw $Chi,`4*4`($ctx)
  604. add $a2,$Hlo,$Hlo
  605. stw $Clo,`5*4`($ctx)
  606. addc $a3,$Hhi,$Hhi
  607. stw $Dhi,`6*4`($ctx)
  608. ldo `16*$SZ`($inp),$inp ; advance $inp
  609. stw $Dlo,`7*4`($ctx)
  610. stw $Ehi,`8*4`($ctx)
  611. stw $Elo,`9*4`($ctx)
  612. stw $Fhi,`10*4`($ctx)
  613. stw $Flo,`11*4`($ctx)
  614. stw $Ghi,`12*4`($ctx)
  615. stw $Glo,`13*4`($ctx)
  616. stw $Hhi,`14*4`($ctx)
  617. comb,= $inp,$num,L\$done
  618. stw $Hlo,`15*4`($ctx)
  619. b L\$oop_pa1
  620. $PUSH $inp,`-$FRAME_MARKER-3*$SIZE_T`(%sp) ; save $inp
  621. L\$done
  622. ___
  623. }}
  624. $code.=<<___;
  625. $POP `-$FRAME-$SAVED_RP`(%sp),%r2 ; standard epilogue
  626. $POP `-$FRAME+1*$SIZE_T`(%sp),%r4
  627. $POP `-$FRAME+2*$SIZE_T`(%sp),%r5
  628. $POP `-$FRAME+3*$SIZE_T`(%sp),%r6
  629. $POP `-$FRAME+4*$SIZE_T`(%sp),%r7
  630. $POP `-$FRAME+5*$SIZE_T`(%sp),%r8
  631. $POP `-$FRAME+6*$SIZE_T`(%sp),%r9
  632. $POP `-$FRAME+7*$SIZE_T`(%sp),%r10
  633. $POP `-$FRAME+8*$SIZE_T`(%sp),%r11
  634. $POP `-$FRAME+9*$SIZE_T`(%sp),%r12
  635. $POP `-$FRAME+10*$SIZE_T`(%sp),%r13
  636. $POP `-$FRAME+11*$SIZE_T`(%sp),%r14
  637. $POP `-$FRAME+12*$SIZE_T`(%sp),%r15
  638. $POP `-$FRAME+13*$SIZE_T`(%sp),%r16
  639. $POP `-$FRAME+14*$SIZE_T`(%sp),%r17
  640. $POP `-$FRAME+15*$SIZE_T`(%sp),%r18
  641. bv (%r2)
  642. .EXIT
  643. $POPMB -$FRAME(%sp),%r3
  644. .PROCEND
  645. .STRINGZ "SHA`64*$SZ` block transform for PA-RISC, CRYPTOGAMS by <appro\@openssl.org>"
  646. ___
  647. # Explicitly encode PA-RISC 2.0 instructions used in this module, so
  648. # that it can be compiled with .LEVEL 1.0. It should be noted that I
  649. # wouldn't have to do this, if GNU assembler understood .ALLOW 2.0
  650. # directive...
  651. my $ldd = sub {
  652. my ($mod,$args) = @_;
  653. my $orig = "ldd$mod\t$args";
  654. if ($args =~ /(\-?[0-9]+)\(%r([0-9]+)\),%r([0-9]+)/) # format 3 suffices
  655. { my $opcode=(0x14<<26)|($2<<21)|($3<<16)|(($1&0x1FF8)<<1)|(($1>>13)&1);
  656. $opcode|=(1<<3) if ($mod =~ /^,m/);
  657. $opcode|=(1<<2) if ($mod =~ /^,mb/);
  658. sprintf "\t.WORD\t0x%08x\t; %s",$opcode,$orig;
  659. }
  660. else { "\t".$orig; }
  661. };
  662. my $std = sub {
  663. my ($mod,$args) = @_;
  664. my $orig = "std$mod\t$args";
  665. if ($args =~ /%r([0-9]+),(\-?[0-9]+)\(%r([0-9]+)\)/) # format 3 suffices
  666. { my $opcode=(0x1c<<26)|($3<<21)|($1<<16)|(($2&0x1FF8)<<1)|(($2>>13)&1);
  667. sprintf "\t.WORD\t0x%08x\t; %s",$opcode,$orig;
  668. }
  669. else { "\t".$orig; }
  670. };
  671. my $extrd = sub {
  672. my ($mod,$args) = @_;
  673. my $orig = "extrd$mod\t$args";
  674. # I only have ",u" completer, it's implicitly encoded...
  675. if ($args =~ /%r([0-9]+),([0-9]+),([0-9]+),%r([0-9]+)/) # format 15
  676. { my $opcode=(0x36<<26)|($1<<21)|($4<<16);
  677. my $len=32-$3;
  678. $opcode |= (($2&0x20)<<6)|(($2&0x1f)<<5); # encode pos
  679. $opcode |= (($len&0x20)<<7)|($len&0x1f); # encode len
  680. sprintf "\t.WORD\t0x%08x\t; %s",$opcode,$orig;
  681. }
  682. elsif ($args =~ /%r([0-9]+),%sar,([0-9]+),%r([0-9]+)/) # format 12
  683. { my $opcode=(0x34<<26)|($1<<21)|($3<<16)|(2<<11)|(1<<9);
  684. my $len=32-$2;
  685. $opcode |= (($len&0x20)<<3)|($len&0x1f); # encode len
  686. $opcode |= (1<<13) if ($mod =~ /,\**=/);
  687. sprintf "\t.WORD\t0x%08x\t; %s",$opcode,$orig;
  688. }
  689. else { "\t".$orig; }
  690. };
  691. my $shrpd = sub {
  692. my ($mod,$args) = @_;
  693. my $orig = "shrpd$mod\t$args";
  694. if ($args =~ /%r([0-9]+),%r([0-9]+),([0-9]+),%r([0-9]+)/) # format 14
  695. { my $opcode=(0x34<<26)|($2<<21)|($1<<16)|(1<<10)|$4;
  696. my $cpos=63-$3;
  697. $opcode |= (($cpos&0x20)<<6)|(($cpos&0x1f)<<5); # encode sa
  698. sprintf "\t.WORD\t0x%08x\t; %s",$opcode,$orig;
  699. }
  700. elsif ($args =~ /%r([0-9]+),%r([0-9]+),%sar,%r([0-9]+)/) # format 11
  701. { sprintf "\t.WORD\t0x%08x\t; %s",
  702. (0x34<<26)|($2<<21)|($1<<16)|(1<<9)|$3,$orig;
  703. }
  704. else { "\t".$orig; }
  705. };
  706. sub assemble {
  707. my ($mnemonic,$mod,$args)=@_;
  708. my $opcode = eval("\$$mnemonic");
  709. ref($opcode) eq 'CODE' ? &$opcode($mod,$args) : "\t$mnemonic$mod\t$args";
  710. }
  711. if (`$ENV{CC} -Wa,-v -c -o /dev/null -x assembler /dev/null 2>&1`
  712. =~ /GNU assembler/) {
  713. $gnuas = 1;
  714. }
  715. foreach (split("\n",$code)) {
  716. s/\`([^\`]*)\`/eval $1/ge;
  717. s/shd\s+(%r[0-9]+),(%r[0-9]+),([0-9]+)/
  718. $3>31 ? sprintf("shd\t%$2,%$1,%d",$3-32) # rotation for >=32
  719. : sprintf("shd\t%$1,%$2,%d",$3)/e or
  720. # translate made up instructions: _ror, _shr, _align, _shl
  721. s/_ror(\s+)(%r[0-9]+),/
  722. ($SZ==4 ? "shd" : "shrpd")."$1$2,$2,"/e or
  723. s/_shr(\s+%r[0-9]+),([0-9]+),/
  724. $SZ==4 ? sprintf("extru%s,%d,%d,",$1,31-$2,32-$2)
  725. : sprintf("extrd,u%s,%d,%d,",$1,63-$2,64-$2)/e or
  726. s/_align(\s+%r[0-9]+,%r[0-9]+),/
  727. ($SZ==4 ? "vshd$1," : "shrpd$1,%sar,")/e or
  728. s/_shl(\s+%r[0-9]+),([0-9]+),/
  729. $SIZE_T==4 ? sprintf("zdep%s,%d,%d,",$1,31-$2,32-$2)
  730. : sprintf("depd,z%s,%d,%d,",$1,63-$2,64-$2)/e;
  731. s/^\s+([a-z]+)([\S]*)\s+([\S]*)/&assemble($1,$2,$3)/e if ($SIZE_T==4);
  732. s/(\.LEVEL\s+2\.0)W/$1w/ if ($gnuas && $SIZE_T==8);
  733. s/\.SPACE\s+\$TEXT\$/.text/ if ($gnuas && $SIZE_T==8);
  734. s/\.SUBSPA.*// if ($gnuas && $SIZE_T==8);
  735. s/cmpb,\*/comb,/ if ($SIZE_T==4);
  736. s/\bbv\b/bve/ if ($SIZE_T==8);
  737. print $_,"\n";
  738. }
  739. close STDOUT;