sha512-parisc.pl 21 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791
  1. #!/usr/bin/env perl
  2. # ====================================================================
  3. # Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL
  4. # project. The module is, however, dual licensed under OpenSSL and
  5. # CRYPTOGAMS licenses depending on where you obtain it. For further
  6. # details see http://www.openssl.org/~appro/cryptogams/.
  7. # ====================================================================
  8. # SHA256/512 block procedure for PA-RISC.
  9. # June 2009.
  10. #
  11. # SHA256 performance is >75% better than gcc 3.2 generated code on
  12. # PA-7100LC. Compared to code generated by vendor compiler this
  13. # implementation is almost 70% faster in 64-bit build, but delivers
  14. # virtually same performance in 32-bit build on PA-8600.
  15. #
  16. # SHA512 performance is >2.9x better than gcc 3.2 generated code on
  17. # PA-7100LC, PA-RISC 1.1 processor. Then implementation detects if the
  18. # code is executed on PA-RISC 2.0 processor and switches to 64-bit
  19. # code path delivering adequate peformance even in "blended" 32-bit
  20. # build. Though 64-bit code is not any faster than code generated by
  21. # vendor compiler on PA-8600...
  22. #
  23. # Special thanks to polarhome.com for providing HP-UX account.
  24. $flavour = shift;
  25. $output = shift;
  26. open STDOUT,">$output";
  27. if ($flavour =~ /64/) {
  28. $LEVEL ="2.0W";
  29. $SIZE_T =8;
  30. $FRAME_MARKER =80;
  31. $SAVED_RP =16;
  32. $PUSH ="std";
  33. $PUSHMA ="std,ma";
  34. $POP ="ldd";
  35. $POPMB ="ldd,mb";
  36. } else {
  37. $LEVEL ="1.0";
  38. $SIZE_T =4;
  39. $FRAME_MARKER =48;
  40. $SAVED_RP =20;
  41. $PUSH ="stw";
  42. $PUSHMA ="stwm";
  43. $POP ="ldw";
  44. $POPMB ="ldwm";
  45. }
  46. if ($output =~ /512/) {
  47. $func="sha512_block_data_order";
  48. $SZ=8;
  49. @Sigma0=(28,34,39);
  50. @Sigma1=(14,18,41);
  51. @sigma0=(1, 8, 7);
  52. @sigma1=(19,61, 6);
  53. $rounds=80;
  54. $LAST10BITS=0x017;
  55. $LD="ldd";
  56. $LDM="ldd,ma";
  57. $ST="std";
  58. } else {
  59. $func="sha256_block_data_order";
  60. $SZ=4;
  61. @Sigma0=( 2,13,22);
  62. @Sigma1=( 6,11,25);
  63. @sigma0=( 7,18, 3);
  64. @sigma1=(17,19,10);
  65. $rounds=64;
  66. $LAST10BITS=0x0f2;
  67. $LD="ldw";
  68. $LDM="ldwm";
  69. $ST="stw";
  70. }
  71. $FRAME=16*$SIZE_T+$FRAME_MARKER;# 16 saved regs + frame marker
  72. # [+ argument transfer]
  73. $XOFF=16*$SZ+32; # local variables
  74. $FRAME+=$XOFF;
  75. $XOFF+=$FRAME_MARKER; # distance between %sp and local variables
  76. $ctx="%r26"; # zapped by $a0
  77. $inp="%r25"; # zapped by $a1
  78. $num="%r24"; # zapped by $t0
  79. $a0 ="%r26";
  80. $a1 ="%r25";
  81. $t0 ="%r24";
  82. $t1 ="%r29";
  83. $Tbl="%r31";
  84. @V=($A,$B,$C,$D,$E,$F,$G,$H)=("%r17","%r18","%r19","%r20","%r21","%r22","%r23","%r28");
  85. @X=("%r1", "%r2", "%r3", "%r4", "%r5", "%r6", "%r7", "%r8",
  86. "%r9", "%r10","%r11","%r12","%r13","%r14","%r15","%r16",$inp);
  87. sub ROUND_00_15 {
  88. my ($i,$a,$b,$c,$d,$e,$f,$g,$h)=@_;
  89. $code.=<<___;
  90. _ror $e,$Sigma1[0],$a0
  91. and $f,$e,$t0
  92. _ror $e,$Sigma1[1],$a1
  93. addl $t1,$h,$h
  94. andcm $g,$e,$t1
  95. xor $a1,$a0,$a0
  96. _ror $a1,`$Sigma1[2]-$Sigma1[1]`,$a1
  97. or $t0,$t1,$t1 ; Ch(e,f,g)
  98. addl @X[$i%16],$h,$h
  99. xor $a0,$a1,$a1 ; Sigma1(e)
  100. addl $t1,$h,$h
  101. _ror $a,$Sigma0[0],$a0
  102. addl $a1,$h,$h
  103. _ror $a,$Sigma0[1],$a1
  104. and $a,$b,$t0
  105. and $a,$c,$t1
  106. xor $a1,$a0,$a0
  107. _ror $a1,`$Sigma0[2]-$Sigma0[1]`,$a1
  108. xor $t1,$t0,$t0
  109. and $b,$c,$t1
  110. xor $a0,$a1,$a1 ; Sigma0(a)
  111. addl $h,$d,$d
  112. xor $t1,$t0,$t0 ; Maj(a,b,c)
  113. `"$LDM $SZ($Tbl),$t1" if ($i<15)`
  114. addl $a1,$h,$h
  115. addl $t0,$h,$h
  116. ___
  117. }
  118. sub ROUND_16_xx {
  119. my ($i,$a,$b,$c,$d,$e,$f,$g,$h)=@_;
  120. $i-=16;
  121. $code.=<<___;
  122. _ror @X[($i+1)%16],$sigma0[0],$a0
  123. _ror @X[($i+1)%16],$sigma0[1],$a1
  124. addl @X[($i+9)%16],@X[$i],@X[$i]
  125. _ror @X[($i+14)%16],$sigma1[0],$t0
  126. _ror @X[($i+14)%16],$sigma1[1],$t1
  127. xor $a1,$a0,$a0
  128. _shr @X[($i+1)%16],$sigma0[2],$a1
  129. xor $t1,$t0,$t0
  130. _shr @X[($i+14)%16],$sigma1[2],$t1
  131. xor $a1,$a0,$a0 ; sigma0(X[(i+1)&0x0f])
  132. xor $t1,$t0,$t0 ; sigma1(X[(i+14)&0x0f])
  133. $LDM $SZ($Tbl),$t1
  134. addl $a0,@X[$i],@X[$i]
  135. addl $t0,@X[$i],@X[$i]
  136. ___
  137. $code.=<<___ if ($i==15);
  138. extru $t1,31,10,$a1
  139. comiclr,<> $LAST10BITS,$a1,%r0
  140. ldo 1($Tbl),$Tbl ; signal end of $Tbl
  141. ___
  142. &ROUND_00_15($i+16,$a,$b,$c,$d,$e,$f,$g,$h);
  143. }
  144. $code=<<___;
  145. .LEVEL $LEVEL
  146. .SPACE \$TEXT\$
  147. .SUBSPA \$CODE\$,QUAD=0,ALIGN=8,ACCESS=0x2C,CODE_ONLY
  148. .ALIGN 64
  149. L\$table
  150. ___
  151. $code.=<<___ if ($SZ==8);
  152. .WORD 0x428a2f98,0xd728ae22,0x71374491,0x23ef65cd
  153. .WORD 0xb5c0fbcf,0xec4d3b2f,0xe9b5dba5,0x8189dbbc
  154. .WORD 0x3956c25b,0xf348b538,0x59f111f1,0xb605d019
  155. .WORD 0x923f82a4,0xaf194f9b,0xab1c5ed5,0xda6d8118
  156. .WORD 0xd807aa98,0xa3030242,0x12835b01,0x45706fbe
  157. .WORD 0x243185be,0x4ee4b28c,0x550c7dc3,0xd5ffb4e2
  158. .WORD 0x72be5d74,0xf27b896f,0x80deb1fe,0x3b1696b1
  159. .WORD 0x9bdc06a7,0x25c71235,0xc19bf174,0xcf692694
  160. .WORD 0xe49b69c1,0x9ef14ad2,0xefbe4786,0x384f25e3
  161. .WORD 0x0fc19dc6,0x8b8cd5b5,0x240ca1cc,0x77ac9c65
  162. .WORD 0x2de92c6f,0x592b0275,0x4a7484aa,0x6ea6e483
  163. .WORD 0x5cb0a9dc,0xbd41fbd4,0x76f988da,0x831153b5
  164. .WORD 0x983e5152,0xee66dfab,0xa831c66d,0x2db43210
  165. .WORD 0xb00327c8,0x98fb213f,0xbf597fc7,0xbeef0ee4
  166. .WORD 0xc6e00bf3,0x3da88fc2,0xd5a79147,0x930aa725
  167. .WORD 0x06ca6351,0xe003826f,0x14292967,0x0a0e6e70
  168. .WORD 0x27b70a85,0x46d22ffc,0x2e1b2138,0x5c26c926
  169. .WORD 0x4d2c6dfc,0x5ac42aed,0x53380d13,0x9d95b3df
  170. .WORD 0x650a7354,0x8baf63de,0x766a0abb,0x3c77b2a8
  171. .WORD 0x81c2c92e,0x47edaee6,0x92722c85,0x1482353b
  172. .WORD 0xa2bfe8a1,0x4cf10364,0xa81a664b,0xbc423001
  173. .WORD 0xc24b8b70,0xd0f89791,0xc76c51a3,0x0654be30
  174. .WORD 0xd192e819,0xd6ef5218,0xd6990624,0x5565a910
  175. .WORD 0xf40e3585,0x5771202a,0x106aa070,0x32bbd1b8
  176. .WORD 0x19a4c116,0xb8d2d0c8,0x1e376c08,0x5141ab53
  177. .WORD 0x2748774c,0xdf8eeb99,0x34b0bcb5,0xe19b48a8
  178. .WORD 0x391c0cb3,0xc5c95a63,0x4ed8aa4a,0xe3418acb
  179. .WORD 0x5b9cca4f,0x7763e373,0x682e6ff3,0xd6b2b8a3
  180. .WORD 0x748f82ee,0x5defb2fc,0x78a5636f,0x43172f60
  181. .WORD 0x84c87814,0xa1f0ab72,0x8cc70208,0x1a6439ec
  182. .WORD 0x90befffa,0x23631e28,0xa4506ceb,0xde82bde9
  183. .WORD 0xbef9a3f7,0xb2c67915,0xc67178f2,0xe372532b
  184. .WORD 0xca273ece,0xea26619c,0xd186b8c7,0x21c0c207
  185. .WORD 0xeada7dd6,0xcde0eb1e,0xf57d4f7f,0xee6ed178
  186. .WORD 0x06f067aa,0x72176fba,0x0a637dc5,0xa2c898a6
  187. .WORD 0x113f9804,0xbef90dae,0x1b710b35,0x131c471b
  188. .WORD 0x28db77f5,0x23047d84,0x32caab7b,0x40c72493
  189. .WORD 0x3c9ebe0a,0x15c9bebc,0x431d67c4,0x9c100d4c
  190. .WORD 0x4cc5d4be,0xcb3e42b6,0x597f299c,0xfc657e2a
  191. .WORD 0x5fcb6fab,0x3ad6faec,0x6c44198c,0x4a475817
  192. ___
  193. $code.=<<___ if ($SZ==4);
  194. .WORD 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5
  195. .WORD 0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5
  196. .WORD 0xd807aa98,0x12835b01,0x243185be,0x550c7dc3
  197. .WORD 0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174
  198. .WORD 0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc
  199. .WORD 0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da
  200. .WORD 0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7
  201. .WORD 0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967
  202. .WORD 0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13
  203. .WORD 0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85
  204. .WORD 0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3
  205. .WORD 0xd192e819,0xd6990624,0xf40e3585,0x106aa070
  206. .WORD 0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5
  207. .WORD 0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3
  208. .WORD 0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208
  209. .WORD 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2
  210. ___
  211. $code.=<<___;
  212. .EXPORT $func,ENTRY,ARGW0=GR,ARGW1=GR,ARGW2=GR
  213. .ALIGN 64
  214. $func
  215. .PROC
  216. .CALLINFO FRAME=`$FRAME-16*$SIZE_T`,NO_CALLS,SAVE_RP,ENTRY_GR=18
  217. .ENTRY
  218. $PUSH %r2,-$SAVED_RP(%sp) ; standard prologue
  219. $PUSHMA %r3,$FRAME(%sp)
  220. $PUSH %r4,`-$FRAME+1*$SIZE_T`(%sp)
  221. $PUSH %r5,`-$FRAME+2*$SIZE_T`(%sp)
  222. $PUSH %r6,`-$FRAME+3*$SIZE_T`(%sp)
  223. $PUSH %r7,`-$FRAME+4*$SIZE_T`(%sp)
  224. $PUSH %r8,`-$FRAME+5*$SIZE_T`(%sp)
  225. $PUSH %r9,`-$FRAME+6*$SIZE_T`(%sp)
  226. $PUSH %r10,`-$FRAME+7*$SIZE_T`(%sp)
  227. $PUSH %r11,`-$FRAME+8*$SIZE_T`(%sp)
  228. $PUSH %r12,`-$FRAME+9*$SIZE_T`(%sp)
  229. $PUSH %r13,`-$FRAME+10*$SIZE_T`(%sp)
  230. $PUSH %r14,`-$FRAME+11*$SIZE_T`(%sp)
  231. $PUSH %r15,`-$FRAME+12*$SIZE_T`(%sp)
  232. $PUSH %r16,`-$FRAME+13*$SIZE_T`(%sp)
  233. $PUSH %r17,`-$FRAME+14*$SIZE_T`(%sp)
  234. $PUSH %r18,`-$FRAME+15*$SIZE_T`(%sp)
  235. _shl $num,`log(16*$SZ)/log(2)`,$num
  236. addl $inp,$num,$num ; $num to point at the end of $inp
  237. $PUSH $num,`-$FRAME_MARKER-4*$SIZE_T`(%sp) ; save arguments
  238. $PUSH $inp,`-$FRAME_MARKER-3*$SIZE_T`(%sp)
  239. $PUSH $ctx,`-$FRAME_MARKER-2*$SIZE_T`(%sp)
  240. blr %r0,$Tbl
  241. ldi 3,$t1
  242. L\$pic
  243. andcm $Tbl,$t1,$Tbl ; wipe privilege level
  244. ldo L\$table-L\$pic($Tbl),$Tbl
  245. ___
  246. $code.=<<___ if ($SZ==8 && $SIZE_T==4);
  247. ldi 31,$t1
  248. mtctl $t1,%cr11
  249. extrd,u,*= $t1,%sar,1,$t1 ; executes on PA-RISC 1.0
  250. b L\$parisc1
  251. nop
  252. ___
  253. $code.=<<___;
  254. $LD `0*$SZ`($ctx),$A ; load context
  255. $LD `1*$SZ`($ctx),$B
  256. $LD `2*$SZ`($ctx),$C
  257. $LD `3*$SZ`($ctx),$D
  258. $LD `4*$SZ`($ctx),$E
  259. $LD `5*$SZ`($ctx),$F
  260. $LD `6*$SZ`($ctx),$G
  261. $LD `7*$SZ`($ctx),$H
  262. extru $inp,31,`log($SZ)/log(2)`,$t0
  263. sh3addl $t0,%r0,$t0
  264. subi `8*$SZ`,$t0,$t0
  265. mtctl $t0,%cr11 ; load %sar with align factor
  266. L\$oop
  267. ldi `$SZ-1`,$t0
  268. $LDM $SZ($Tbl),$t1
  269. andcm $inp,$t0,$t0 ; align $inp
  270. ___
  271. for ($i=0;$i<15;$i++) { # load input block
  272. $code.="\t$LD `$SZ*$i`($t0),@X[$i]\n"; }
  273. $code.=<<___;
  274. cmpb,*= $inp,$t0,L\$aligned
  275. $LD `$SZ*15`($t0),@X[15]
  276. $LD `$SZ*16`($t0),@X[16]
  277. ___
  278. for ($i=0;$i<16;$i++) { # align data
  279. $code.="\t_align @X[$i],@X[$i+1],@X[$i]\n"; }
  280. $code.=<<___;
  281. L\$aligned
  282. nop ; otherwise /usr/ccs/bin/as is confused by below .WORD
  283. ___
  284. for($i=0;$i<16;$i++) { &ROUND_00_15($i,@V); unshift(@V,pop(@V)); }
  285. $code.=<<___;
  286. L\$rounds
  287. nop ; otherwise /usr/ccs/bin/as is confused by below .WORD
  288. ___
  289. for(;$i<32;$i++) { &ROUND_16_xx($i,@V); unshift(@V,pop(@V)); }
  290. $code.=<<___;
  291. bb,>= $Tbl,31,L\$rounds ; end of $Tbl signalled?
  292. nop
  293. $POP `-$FRAME_MARKER-2*$SIZE_T`(%sp),$ctx ; restore arguments
  294. $POP `-$FRAME_MARKER-3*$SIZE_T`(%sp),$inp
  295. $POP `-$FRAME_MARKER-4*$SIZE_T`(%sp),$num
  296. ldo `-$rounds*$SZ-1`($Tbl),$Tbl ; rewind $Tbl
  297. $LD `0*$SZ`($ctx),@X[0] ; load context
  298. $LD `1*$SZ`($ctx),@X[1]
  299. $LD `2*$SZ`($ctx),@X[2]
  300. $LD `3*$SZ`($ctx),@X[3]
  301. $LD `4*$SZ`($ctx),@X[4]
  302. $LD `5*$SZ`($ctx),@X[5]
  303. addl @X[0],$A,$A
  304. $LD `6*$SZ`($ctx),@X[6]
  305. addl @X[1],$B,$B
  306. $LD `7*$SZ`($ctx),@X[7]
  307. ldo `16*$SZ`($inp),$inp ; advance $inp
  308. $ST $A,`0*$SZ`($ctx) ; save context
  309. addl @X[2],$C,$C
  310. $ST $B,`1*$SZ`($ctx)
  311. addl @X[3],$D,$D
  312. $ST $C,`2*$SZ`($ctx)
  313. addl @X[4],$E,$E
  314. $ST $D,`3*$SZ`($ctx)
  315. addl @X[5],$F,$F
  316. $ST $E,`4*$SZ`($ctx)
  317. addl @X[6],$G,$G
  318. $ST $F,`5*$SZ`($ctx)
  319. addl @X[7],$H,$H
  320. $ST $G,`6*$SZ`($ctx)
  321. $ST $H,`7*$SZ`($ctx)
  322. cmpb,*<>,n $inp,$num,L\$oop
  323. $PUSH $inp,`-$FRAME_MARKER-3*$SIZE_T`(%sp) ; save $inp
  324. ___
  325. if ($SZ==8 && $SIZE_T==4) # SHA512 for 32-bit PA-RISC 1.0
  326. {{
  327. $code.=<<___;
  328. b L\$done
  329. nop
  330. .ALIGN 64
  331. L\$parisc1
  332. ___
  333. @V=( $Ahi, $Alo, $Bhi, $Blo, $Chi, $Clo, $Dhi, $Dlo,
  334. $Ehi, $Elo, $Fhi, $Flo, $Ghi, $Glo, $Hhi, $Hlo) =
  335. ( "%r1", "%r2", "%r3", "%r4", "%r5", "%r6", "%r7", "%r8",
  336. "%r9","%r10","%r11","%r12","%r13","%r14","%r15","%r16");
  337. $a0 ="%r17";
  338. $a1 ="%r18";
  339. $a2 ="%r19";
  340. $a3 ="%r20";
  341. $t0 ="%r21";
  342. $t1 ="%r22";
  343. $t2 ="%r28";
  344. $t3 ="%r29";
  345. $Tbl="%r31";
  346. @X=("%r23","%r24","%r25","%r26"); # zaps $num,$inp,$ctx
  347. sub ROUND_00_15_pa1 {
  348. my ($i,$ahi,$alo,$bhi,$blo,$chi,$clo,$dhi,$dlo,
  349. $ehi,$elo,$fhi,$flo,$ghi,$glo,$hhi,$hlo,$flag)=@_;
  350. my ($Xhi,$Xlo,$Xnhi,$Xnlo) = @X;
  351. $code.=<<___ if (!$flag);
  352. ldw `-$XOFF+8*(($i+1)%16)`(%sp),$Xnhi
  353. ldw `-$XOFF+8*(($i+1)%16)+4`(%sp),$Xnlo ; load X[i+1]
  354. ___
  355. $code.=<<___;
  356. shd $ehi,$elo,$Sigma1[0],$t0
  357. add $Xlo,$hlo,$hlo
  358. shd $elo,$ehi,$Sigma1[0],$t1
  359. addc $Xhi,$hhi,$hhi ; h += X[i]
  360. shd $ehi,$elo,$Sigma1[1],$t2
  361. ldwm 8($Tbl),$Xhi
  362. shd $elo,$ehi,$Sigma1[1],$t3
  363. ldw -4($Tbl),$Xlo ; load K[i]
  364. xor $t2,$t0,$t0
  365. xor $t3,$t1,$t1
  366. and $flo,$elo,$a0
  367. and $fhi,$ehi,$a1
  368. shd $ehi,$elo,$Sigma1[2],$t2
  369. andcm $glo,$elo,$a2
  370. shd $elo,$ehi,$Sigma1[2],$t3
  371. andcm $ghi,$ehi,$a3
  372. xor $t2,$t0,$t0
  373. xor $t3,$t1,$t1 ; Sigma1(e)
  374. add $Xlo,$hlo,$hlo
  375. xor $a2,$a0,$a0
  376. addc $Xhi,$hhi,$hhi ; h += K[i]
  377. xor $a3,$a1,$a1 ; Ch(e,f,g)
  378. add $t0,$hlo,$hlo
  379. shd $ahi,$alo,$Sigma0[0],$t0
  380. addc $t1,$hhi,$hhi ; h += Sigma1(e)
  381. shd $alo,$ahi,$Sigma0[0],$t1
  382. add $a0,$hlo,$hlo
  383. shd $ahi,$alo,$Sigma0[1],$t2
  384. addc $a1,$hhi,$hhi ; h += Ch(e,f,g)
  385. shd $alo,$ahi,$Sigma0[1],$t3
  386. xor $t2,$t0,$t0
  387. xor $t3,$t1,$t1
  388. shd $ahi,$alo,$Sigma0[2],$t2
  389. and $alo,$blo,$a0
  390. shd $alo,$ahi,$Sigma0[2],$t3
  391. and $ahi,$bhi,$a1
  392. xor $t2,$t0,$t0
  393. xor $t3,$t1,$t1 ; Sigma0(a)
  394. and $alo,$clo,$a2
  395. and $ahi,$chi,$a3
  396. xor $a2,$a0,$a0
  397. add $hlo,$dlo,$dlo
  398. xor $a3,$a1,$a1
  399. addc $hhi,$dhi,$dhi ; d += h
  400. and $blo,$clo,$a2
  401. add $t0,$hlo,$hlo
  402. and $bhi,$chi,$a3
  403. addc $t1,$hhi,$hhi ; h += Sigma0(a)
  404. xor $a2,$a0,$a0
  405. add $a0,$hlo,$hlo
  406. xor $a3,$a1,$a1 ; Maj(a,b,c)
  407. addc $a1,$hhi,$hhi ; h += Maj(a,b,c)
  408. ___
  409. $code.=<<___ if ($i==15 && $flag);
  410. extru $Xlo,31,10,$Xlo
  411. comiclr,= $LAST10BITS,$Xlo,%r0
  412. b L\$rounds_pa1
  413. nop
  414. ___
  415. push(@X,shift(@X)); push(@X,shift(@X));
  416. }
  417. sub ROUND_16_xx_pa1 {
  418. my ($Xhi,$Xlo,$Xnhi,$Xnlo) = @X;
  419. my ($i)=shift;
  420. $i-=16;
  421. $code.=<<___;
  422. ldw `-$XOFF+8*(($i+1)%16)`(%sp),$Xnhi
  423. ldw `-$XOFF+8*(($i+1)%16)+4`(%sp),$Xnlo ; load X[i+1]
  424. ldw `-$XOFF+8*(($i+9)%16)`(%sp),$a1
  425. ldw `-$XOFF+8*(($i+9)%16)+4`(%sp),$a0 ; load X[i+9]
  426. ldw `-$XOFF+8*(($i+14)%16)`(%sp),$a3
  427. ldw `-$XOFF+8*(($i+14)%16)+4`(%sp),$a2 ; load X[i+14]
  428. shd $Xnhi,$Xnlo,$sigma0[0],$t0
  429. shd $Xnlo,$Xnhi,$sigma0[0],$t1
  430. add $a0,$Xlo,$Xlo
  431. shd $Xnhi,$Xnlo,$sigma0[1],$t2
  432. addc $a1,$Xhi,$Xhi
  433. shd $Xnlo,$Xnhi,$sigma0[1],$t3
  434. xor $t2,$t0,$t0
  435. shd $Xnhi,$Xnlo,$sigma0[2],$t2
  436. xor $t3,$t1,$t1
  437. extru $Xnhi,`31-$sigma0[2]`,`32-$sigma0[2]`,$t3
  438. xor $t2,$t0,$t0
  439. shd $a3,$a2,$sigma1[0],$a0
  440. xor $t3,$t1,$t1 ; sigma0(X[i+1)&0x0f])
  441. shd $a2,$a3,$sigma1[0],$a1
  442. add $t0,$Xlo,$Xlo
  443. shd $a3,$a2,$sigma1[1],$t2
  444. addc $t1,$Xhi,$Xhi
  445. shd $a2,$a3,$sigma1[1],$t3
  446. xor $t2,$a0,$a0
  447. shd $a3,$a2,$sigma1[2],$t2
  448. xor $t3,$a1,$a1
  449. extru $a3,`31-$sigma1[2]`,`32-$sigma1[2]`,$t3
  450. xor $t2,$a0,$a0
  451. xor $t3,$a1,$a1 ; sigma0(X[i+14)&0x0f])
  452. add $a0,$Xlo,$Xlo
  453. addc $a1,$Xhi,$Xhi
  454. stw $Xhi,`-$XOFF+8*($i%16)`(%sp)
  455. stw $Xlo,`-$XOFF+8*($i%16)+4`(%sp)
  456. ___
  457. &ROUND_00_15_pa1($i,@_,1);
  458. }
  459. $code.=<<___;
  460. ldw `0*4`($ctx),$Ahi ; load context
  461. ldw `1*4`($ctx),$Alo
  462. ldw `2*4`($ctx),$Bhi
  463. ldw `3*4`($ctx),$Blo
  464. ldw `4*4`($ctx),$Chi
  465. ldw `5*4`($ctx),$Clo
  466. ldw `6*4`($ctx),$Dhi
  467. ldw `7*4`($ctx),$Dlo
  468. ldw `8*4`($ctx),$Ehi
  469. ldw `9*4`($ctx),$Elo
  470. ldw `10*4`($ctx),$Fhi
  471. ldw `11*4`($ctx),$Flo
  472. ldw `12*4`($ctx),$Ghi
  473. ldw `13*4`($ctx),$Glo
  474. ldw `14*4`($ctx),$Hhi
  475. ldw `15*4`($ctx),$Hlo
  476. extru $inp,31,2,$t0
  477. sh3addl $t0,%r0,$t0
  478. subi 32,$t0,$t0
  479. mtctl $t0,%cr11 ; load %sar with align factor
  480. L\$oop_pa1
  481. extru $inp,31,2,$a3
  482. comib,= 0,$a3,L\$aligned_pa1
  483. sub $inp,$a3,$inp
  484. ldw `0*4`($inp),$X[0]
  485. ldw `1*4`($inp),$X[1]
  486. ldw `2*4`($inp),$t2
  487. ldw `3*4`($inp),$t3
  488. ldw `4*4`($inp),$a0
  489. ldw `5*4`($inp),$a1
  490. ldw `6*4`($inp),$a2
  491. ldw `7*4`($inp),$a3
  492. vshd $X[0],$X[1],$X[0]
  493. vshd $X[1],$t2,$X[1]
  494. stw $X[0],`-$XOFF+0*4`(%sp)
  495. ldw `8*4`($inp),$t0
  496. vshd $t2,$t3,$t2
  497. stw $X[1],`-$XOFF+1*4`(%sp)
  498. ldw `9*4`($inp),$t1
  499. vshd $t3,$a0,$t3
  500. ___
  501. {
  502. my @t=($t2,$t3,$a0,$a1,$a2,$a3,$t0,$t1);
  503. for ($i=2;$i<=(128/4-8);$i++) {
  504. $code.=<<___;
  505. stw $t[0],`-$XOFF+$i*4`(%sp)
  506. ldw `(8+$i)*4`($inp),$t[0]
  507. vshd $t[1],$t[2],$t[1]
  508. ___
  509. push(@t,shift(@t));
  510. }
  511. for (;$i<(128/4-1);$i++) {
  512. $code.=<<___;
  513. stw $t[0],`-$XOFF+$i*4`(%sp)
  514. vshd $t[1],$t[2],$t[1]
  515. ___
  516. push(@t,shift(@t));
  517. }
  518. $code.=<<___;
  519. b L\$collected_pa1
  520. stw $t[0],`-$XOFF+$i*4`(%sp)
  521. ___
  522. }
  523. $code.=<<___;
  524. L\$aligned_pa1
  525. ldw `0*4`($inp),$X[0]
  526. ldw `1*4`($inp),$X[1]
  527. ldw `2*4`($inp),$t2
  528. ldw `3*4`($inp),$t3
  529. ldw `4*4`($inp),$a0
  530. ldw `5*4`($inp),$a1
  531. ldw `6*4`($inp),$a2
  532. ldw `7*4`($inp),$a3
  533. stw $X[0],`-$XOFF+0*4`(%sp)
  534. ldw `8*4`($inp),$t0
  535. stw $X[1],`-$XOFF+1*4`(%sp)
  536. ldw `9*4`($inp),$t1
  537. ___
  538. {
  539. my @t=($t2,$t3,$a0,$a1,$a2,$a3,$t0,$t1);
  540. for ($i=2;$i<(128/4-8);$i++) {
  541. $code.=<<___;
  542. stw $t[0],`-$XOFF+$i*4`(%sp)
  543. ldw `(8+$i)*4`($inp),$t[0]
  544. ___
  545. push(@t,shift(@t));
  546. }
  547. for (;$i<128/4;$i++) {
  548. $code.=<<___;
  549. stw $t[0],`-$XOFF+$i*4`(%sp)
  550. ___
  551. push(@t,shift(@t));
  552. }
  553. $code.="L\$collected_pa1\n";
  554. }
  555. for($i=0;$i<16;$i++) { &ROUND_00_15_pa1($i,@V); unshift(@V,pop(@V)); unshift(@V,pop(@V)); }
  556. $code.="L\$rounds_pa1\n";
  557. for(;$i<32;$i++) { &ROUND_16_xx_pa1($i,@V); unshift(@V,pop(@V)); unshift(@V,pop(@V)); }
  558. $code.=<<___;
  559. $POP `-$FRAME_MARKER-2*$SIZE_T`(%sp),$ctx ; restore arguments
  560. $POP `-$FRAME_MARKER-3*$SIZE_T`(%sp),$inp
  561. $POP `-$FRAME_MARKER-4*$SIZE_T`(%sp),$num
  562. ldo `-$rounds*$SZ`($Tbl),$Tbl ; rewind $Tbl
  563. ldw `0*4`($ctx),$t1 ; update context
  564. ldw `1*4`($ctx),$t0
  565. ldw `2*4`($ctx),$t3
  566. ldw `3*4`($ctx),$t2
  567. ldw `4*4`($ctx),$a1
  568. ldw `5*4`($ctx),$a0
  569. ldw `6*4`($ctx),$a3
  570. add $t0,$Alo,$Alo
  571. ldw `7*4`($ctx),$a2
  572. addc $t1,$Ahi,$Ahi
  573. ldw `8*4`($ctx),$t1
  574. add $t2,$Blo,$Blo
  575. ldw `9*4`($ctx),$t0
  576. addc $t3,$Bhi,$Bhi
  577. ldw `10*4`($ctx),$t3
  578. add $a0,$Clo,$Clo
  579. ldw `11*4`($ctx),$t2
  580. addc $a1,$Chi,$Chi
  581. ldw `12*4`($ctx),$a1
  582. add $a2,$Dlo,$Dlo
  583. ldw `13*4`($ctx),$a0
  584. addc $a3,$Dhi,$Dhi
  585. ldw `14*4`($ctx),$a3
  586. add $t0,$Elo,$Elo
  587. ldw `15*4`($ctx),$a2
  588. addc $t1,$Ehi,$Ehi
  589. stw $Ahi,`0*4`($ctx)
  590. add $t2,$Flo,$Flo
  591. stw $Alo,`1*4`($ctx)
  592. addc $t3,$Fhi,$Fhi
  593. stw $Bhi,`2*4`($ctx)
  594. add $a0,$Glo,$Glo
  595. stw $Blo,`3*4`($ctx)
  596. addc $a1,$Ghi,$Ghi
  597. stw $Chi,`4*4`($ctx)
  598. add $a2,$Hlo,$Hlo
  599. stw $Clo,`5*4`($ctx)
  600. addc $a3,$Hhi,$Hhi
  601. stw $Dhi,`6*4`($ctx)
  602. ldo `16*$SZ`($inp),$inp ; advance $inp
  603. stw $Dlo,`7*4`($ctx)
  604. stw $Ehi,`8*4`($ctx)
  605. stw $Elo,`9*4`($ctx)
  606. stw $Fhi,`10*4`($ctx)
  607. stw $Flo,`11*4`($ctx)
  608. stw $Ghi,`12*4`($ctx)
  609. stw $Glo,`13*4`($ctx)
  610. stw $Hhi,`14*4`($ctx)
  611. comb,= $inp,$num,L\$done
  612. stw $Hlo,`15*4`($ctx)
  613. b L\$oop_pa1
  614. $PUSH $inp,`-$FRAME_MARKER-3*$SIZE_T`(%sp) ; save $inp
  615. L\$done
  616. ___
  617. }}
  618. $code.=<<___;
  619. $POP `-$FRAME-$SAVED_RP`(%sp),%r2 ; standard epilogue
  620. $POP `-$FRAME+1*$SIZE_T`(%sp),%r4
  621. $POP `-$FRAME+2*$SIZE_T`(%sp),%r5
  622. $POP `-$FRAME+3*$SIZE_T`(%sp),%r6
  623. $POP `-$FRAME+4*$SIZE_T`(%sp),%r7
  624. $POP `-$FRAME+5*$SIZE_T`(%sp),%r8
  625. $POP `-$FRAME+6*$SIZE_T`(%sp),%r9
  626. $POP `-$FRAME+7*$SIZE_T`(%sp),%r10
  627. $POP `-$FRAME+8*$SIZE_T`(%sp),%r11
  628. $POP `-$FRAME+9*$SIZE_T`(%sp),%r12
  629. $POP `-$FRAME+10*$SIZE_T`(%sp),%r13
  630. $POP `-$FRAME+11*$SIZE_T`(%sp),%r14
  631. $POP `-$FRAME+12*$SIZE_T`(%sp),%r15
  632. $POP `-$FRAME+13*$SIZE_T`(%sp),%r16
  633. $POP `-$FRAME+14*$SIZE_T`(%sp),%r17
  634. $POP `-$FRAME+15*$SIZE_T`(%sp),%r18
  635. bv (%r2)
  636. .EXIT
  637. $POPMB -$FRAME(%sp),%r3
  638. .PROCEND
  639. .STRINGZ "SHA`64*$SZ` block transform for PA-RISC, CRYPTOGAMS by <appro\@openssl.org>"
  640. ___
  641. # Explicitly encode PA-RISC 2.0 instructions used in this module, so
  642. # that it can be compiled with .LEVEL 1.0. It should be noted that I
  643. # wouldn't have to do this, if GNU assembler understood .ALLOW 2.0
  644. # directive...
  645. my $ldd = sub {
  646. my ($mod,$args) = @_;
  647. my $orig = "ldd$mod\t$args";
  648. if ($args =~ /(\-?[0-9]+)\(%r([0-9]+)\),%r([0-9]+)/) # format 3 suffices
  649. { my $opcode=(0x14<<26)|($2<<21)|($3<<16)|(($1&0x1FF8)<<1)|(($1>>13)&1);
  650. $opcode|=(1<<3) if ($mod =~ /^,m/);
  651. $opcode|=(1<<2) if ($mod =~ /^,mb/);
  652. sprintf "\t.WORD\t0x%08x\t; %s",$opcode,$orig;
  653. }
  654. else { "\t".$orig; }
  655. };
  656. my $std = sub {
  657. my ($mod,$args) = @_;
  658. my $orig = "std$mod\t$args";
  659. if ($args =~ /%r([0-9]+),(\-?[0-9]+)\(%r([0-9]+)\)/) # format 3 suffices
  660. { my $opcode=(0x1c<<26)|($3<<21)|($1<<16)|(($2&0x1FF8)<<1)|(($2>>13)&1);
  661. sprintf "\t.WORD\t0x%08x\t; %s",$opcode,$orig;
  662. }
  663. else { "\t".$orig; }
  664. };
  665. my $extrd = sub {
  666. my ($mod,$args) = @_;
  667. my $orig = "extrd$mod\t$args";
  668. # I only have ",u" completer, it's implicitly encoded...
  669. if ($args =~ /%r([0-9]+),([0-9]+),([0-9]+),%r([0-9]+)/) # format 15
  670. { my $opcode=(0x36<<26)|($1<<21)|($4<<16);
  671. my $len=32-$3;
  672. $opcode |= (($2&0x20)<<6)|(($2&0x1f)<<5); # encode pos
  673. $opcode |= (($len&0x20)<<7)|($len&0x1f); # encode len
  674. sprintf "\t.WORD\t0x%08x\t; %s",$opcode,$orig;
  675. }
  676. elsif ($args =~ /%r([0-9]+),%sar,([0-9]+),%r([0-9]+)/) # format 12
  677. { my $opcode=(0x34<<26)|($1<<21)|($3<<16)|(2<<11)|(1<<9);
  678. my $len=32-$2;
  679. $opcode |= (($len&0x20)<<3)|($len&0x1f); # encode len
  680. $opcode |= (1<<13) if ($mod =~ /,\**=/);
  681. sprintf "\t.WORD\t0x%08x\t; %s",$opcode,$orig;
  682. }
  683. else { "\t".$orig; }
  684. };
  685. my $shrpd = sub {
  686. my ($mod,$args) = @_;
  687. my $orig = "shrpd$mod\t$args";
  688. if ($args =~ /%r([0-9]+),%r([0-9]+),([0-9]+),%r([0-9]+)/) # format 14
  689. { my $opcode=(0x34<<26)|($2<<21)|($1<<16)|(1<<10)|$4;
  690. my $cpos=63-$3;
  691. $opcode |= (($cpos&0x20)<<6)|(($cpos&0x1f)<<5); # encode sa
  692. sprintf "\t.WORD\t0x%08x\t; %s",$opcode,$orig;
  693. }
  694. elsif ($args =~ /%r([0-9]+),%r([0-9]+),%sar,%r([0-9]+)/) # format 11
  695. { sprintf "\t.WORD\t0x%08x\t; %s",
  696. (0x34<<26)|($2<<21)|($1<<16)|(1<<9)|$3,$orig;
  697. }
  698. else { "\t".$orig; }
  699. };
  700. sub assemble {
  701. my ($mnemonic,$mod,$args)=@_;
  702. my $opcode = eval("\$$mnemonic");
  703. ref($opcode) eq 'CODE' ? &$opcode($mod,$args) : "\t$mnemonic$mod\t$args";
  704. }
  705. foreach (split("\n",$code)) {
  706. s/\`([^\`]*)\`/eval $1/ge;
  707. s/shd\s+(%r[0-9]+),(%r[0-9]+),([0-9]+)/
  708. $3>31 ? sprintf("shd\t%$2,%$1,%d",$3-32) # rotation for >=32
  709. : sprintf("shd\t%$1,%$2,%d",$3)/e or
  710. # translate made up instructons: _ror, _shr, _align, _shl
  711. s/_ror(\s+)(%r[0-9]+),/
  712. ($SZ==4 ? "shd" : "shrpd")."$1$2,$2,"/e or
  713. s/_shr(\s+%r[0-9]+),([0-9]+),/
  714. $SZ==4 ? sprintf("extru%s,%d,%d,",$1,31-$2,32-$2)
  715. : sprintf("extrd,u%s,%d,%d,",$1,63-$2,64-$2)/e or
  716. s/_align(\s+%r[0-9]+,%r[0-9]+),/
  717. ($SZ==4 ? "vshd$1," : "shrpd$1,%sar,")/e or
  718. s/_shl(\s+%r[0-9]+),([0-9]+),/
  719. $SIZE_T==4 ? sprintf("zdep%s,%d,%d,",$1,31-$2,32-$2)
  720. : sprintf("depd,z%s,%d,%d,",$1,63-$2,64-$2)/e;
  721. s/^\s+([a-z]+)([\S]*)\s+([\S]*)/&assemble($1,$2,$3)/e if ($SIZE_T==4);
  722. s/cmpb,\*/comb,/ if ($SIZE_T==4);
  723. print $_,"\n";
  724. }
  725. close STDOUT;