sha1-c64x-large.pl 5.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230
  1. #!/usr/bin/env perl
  2. #
  3. # ====================================================================
  4. # Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
  5. # project. The module is, however, dual licensed under OpenSSL and
  6. # CRYPTOGAMS licenses depending on where you obtain it. For further
  7. # details see http://www.openssl.org/~appro/cryptogams/.
  8. # ====================================================================
  9. #
  10. # SHA1 for C64x.
  11. #
  12. # November 2016
  13. #
  14. # This is fully-unrolled SHA1 implementation. It's 25% faster than
  15. # one with compact loops, doesn't use in-memory ring buffer, as
  16. # everything is accomodated in registers, and has "perfect" interrupt
  17. # agility. Drawback is obviously the code size...
  18. while (($output=shift) && ($output!~/\w[\w\-]*\.\w+$/)) {}
  19. open STDOUT,">$output";
  20. ($CTX,$INP,$NUM) = ("A4","B4","A6"); # arguments
  21. ($A,$B,$C,$D,$E, $Arot,$F,$F0,$K) = map("A$_",(16..20, 21..24));
  22. @V = ($A,$B,$C,$D,$E);
  23. @X = map("B$_",(16..31));
  24. ($Actx,$Bctx,$Cctx,$Dctx,$Ectx) = map("A$_",(3,6..9)); # zaps $NUM
  25. sub BODY_00_19 {
  26. my ($i,$a,$b,$c,$d,$e) = @_;
  27. my $j = ($i+1)&15;
  28. $code.=<<___ if ($i<14);
  29. ROTL $a,5,$Arot ;; $i
  30. || AND $c,$b,$F
  31. || ANDN $d,$b,$F0
  32. || ADD $K,$e,$e ; E+=K
  33. || LDNW *${INP}++,@X[$i+2]
  34. OR $F0,$F,$F ; F_00_19(B,C,D)
  35. || ROTL $b,30,$b
  36. || SWAP2 @X[$i+1],@X[$i+1]
  37. || ADD @X[$i],$e,$e ; E+=X[i]
  38. ADD $Arot,$e,$e ; E+=rot(A,5)
  39. || SWAP4 @X[$i+1],@X[$i+1]
  40. ADD $F,$e,$e ; E+=F_00_19(B,C,D)
  41. ___
  42. $code.=<<___ if ($i==14);
  43. ROTL $a,5,$Arot ;; $i
  44. || AND $c,$b,$F
  45. || ANDN $d,$b,$F0
  46. || ADD $K,$e,$e ; E+=K
  47. OR $F0,$F,$F ; F_00_19(B,C,D)
  48. || ROTL $b,30,$b
  49. || ADD @X[$i],$e,$e ; E+=X[i]
  50. || SWAP2 @X[$i+1],@X[$i+1]
  51. ADD $Arot,$e,$e ; E+=rot(A,5)
  52. || SWAP4 @X[$i+1],@X[$i+1]
  53. ADD $F,$e,$e ; E+=F_00_19(B,C,D)
  54. ___
  55. $code.=<<___ if ($i==15);
  56. || XOR @X[($j+2)&15],@X[$j],@X[$j]
  57. ROTL $a,5,$Arot ;; $i
  58. || AND $c,$b,$F
  59. || ANDN $d,$b,$F0
  60. || ADD $K,$e,$e ; E+=K
  61. || XOR @X[($j+8)&15],@X[$j],@X[$j]
  62. OR $F0,$F,$F ; F_00_19(B,C,D)
  63. || ROTL $b,30,$b
  64. || ADD @X[$i],$e,$e ; E+=X[i]
  65. || XOR @X[($j+13)&15],@X[$j],@X[$j]
  66. ADD $Arot,$e,$e ; E+=rot(A,5)
  67. || ROTL @X[$j],1,@X[$j]
  68. ADD $F,$e,$e ; E+=F_00_19(B,C,D)
  69. ___
  70. $code.=<<___ if ($i>15);
  71. || XOR @X[($j+2)&15],@X[$j],@X[$j]
  72. ROTL $a,5,$Arot ;; $i
  73. || AND $c,$b,$F
  74. || ANDN $d,$b,$F0
  75. || ADD $K,$e,$e ; E+=K
  76. || XOR @X[($j+8)&15],@X[$j],@X[$j]
  77. OR $F0,$F,$F ; F_00_19(B,C,D)
  78. || ROTL $b,30,$b
  79. || ADD @X[$i&15],$e,$e ; E+=X[i]
  80. || XOR @X[($j+13)&15],@X[$j],@X[$j]
  81. ADD $Arot,$e,$e ; E+=rot(A,5)
  82. || ROTL @X[$j],1,@X[$j]
  83. ADD $F,$e,$e ; E+=F_00_19(B,C,D)
  84. ___
  85. }
  86. sub BODY_20_39 {
  87. my ($i,$a,$b,$c,$d,$e) = @_;
  88. my $j = ($i+1)&15;
  89. $code.=<<___ if ($i<79);
  90. || XOR @X[($j+2)&15],@X[$j],@X[$j]
  91. ROTL $a,5,$Arot ;; $i
  92. || XOR $c,$b,$F
  93. || ADD $K,$e,$e ; E+=K
  94. || XOR @X[($j+8)&15],@X[$j],@X[$j]
  95. XOR $d,$F,$F ; F_20_39(B,C,D)
  96. || ROTL $b,30,$b
  97. || ADD @X[$i&15],$e,$e ; E+=X[i]
  98. || XOR @X[($j+13)&15],@X[$j],@X[$j]
  99. ADD $Arot,$e,$e ; E+=rot(A,5)
  100. || ROTL @X[$j],1,@X[$j]
  101. ADD $F,$e,$e ; E+=F_20_39(B,C,D)
  102. ___
  103. $code.=<<___ if ($i==79);
  104. || [A0] B loop?
  105. || [A0] LDNW *${INP}++,@X[0] ; pre-fetch input
  106. ROTL $a,5,$Arot ;; $i
  107. || XOR $c,$b,$F
  108. || ADD $K,$e,$e ; E+=K
  109. || [A0] LDNW *${INP}++,@X[1]
  110. XOR $d,$F,$F ; F_20_39(B,C,D)
  111. || ROTL $b,30,$b
  112. || ADD @X[$i&15],$e,$e ; E+=X[i]
  113. ADD $Arot,$e,$e ; E+=rot(A,5)
  114. ADD $F,$e,$e ; E+=F_20_39(B,C,D)
  115. || ADD $Bctx,$a,$a ; accumulate context
  116. || ADD $Cctx,$b,$b
  117. ADD $Dctx,$c,$c
  118. || ADD $Ectx,$d,$d
  119. || ADD $Actx,$e,$e
  120. ;;===== branch to loop? is taken here
  121. ___
  122. }
  123. sub BODY_40_59 {
  124. my ($i,$a,$b,$c,$d,$e) = @_;
  125. my $j = ($i+1)&15;
  126. $code.=<<___;
  127. || XOR @X[($j+2)&15],@X[$j],@X[$j]
  128. ROTL $a,5,$Arot ;; $i
  129. || AND $c,$b,$F
  130. || AND $d,$b,$F0
  131. || ADD $K,$e,$e ; E+=K
  132. || XOR @X[($j+8)&15],@X[$j],@X[$j]
  133. XOR $F0,$F,$F
  134. || AND $c,$d,$F0
  135. || ROTL $b,30,$b
  136. || XOR @X[($j+13)&15],@X[$j],@X[$j]
  137. || ADD @X[$i&15],$e,$e ; E+=X[i]
  138. XOR $F0,$F,$F ; F_40_59(B,C,D)
  139. || ADD $Arot,$e,$e ; E+=rot(A,5)
  140. || ROTL @X[$j],1,@X[$j]
  141. ADD $F,$e,$e ; E+=F_20_39(B,C,D)
  142. ___
  143. }
  144. $code=<<___;
  145. .text
  146. .if .ASSEMBLER_VERSION<7000000
  147. .asg 0,__TI_EABI__
  148. .endif
  149. .if __TI_EABI__
  150. .asg sha1_block_data_order,_sha1_block_data_order
  151. .endif
  152. .asg B3,RA
  153. .asg A15,FP
  154. .asg B15,SP
  155. .if .BIG_ENDIAN
  156. .asg MV,SWAP2
  157. .asg MV,SWAP4
  158. .endif
  159. .global _sha1_block_data_order
  160. _sha1_block_data_order:
  161. .asmfunc
  162. MV $NUM,A0 ; reassign $NUM
  163. [!A0] BNOP RA ; if ($NUM==0) return;
  164. || [A0] LDW *${CTX}[0],$A ; load A-E...
  165. [A0] LDW *${CTX}[1],$B
  166. [A0] LDW *${CTX}[2],$C
  167. [A0] LDW *${CTX}[3],$D
  168. [A0] LDW *${CTX}[4],$E
  169. [A0] LDNW *${INP}++,@X[0] ; pre-fetch input
  170. [A0] LDNW *${INP}++,@X[1]
  171. NOP 3
  172. loop?:
  173. SUB A0,1,A0
  174. || MV $A,$Actx
  175. || MVD $B,$Bctx
  176. || SWAP2 @X[0],@X[0]
  177. || MVKL 0x5a827999,$K
  178. MVKH 0x5a827999,$K ; K_00_19
  179. || MV $C,$Cctx
  180. || MV $D,$Dctx
  181. || MVD $E,$Ectx
  182. || SWAP4 @X[0],@X[0]
  183. ___
  184. for ($i=0;$i<20;$i++) { &BODY_00_19($i,@V); unshift(@V,pop(@V)); }
  185. $code.=<<___;
  186. || MVKL 0x6ed9eba1,$K
  187. MVKH 0x6ed9eba1,$K ; K_20_39
  188. ___
  189. for (;$i<40;$i++) { &BODY_20_39($i,@V); unshift(@V,pop(@V)); }
  190. $code.=<<___;
  191. || MVKL 0x8f1bbcdc,$K
  192. MVKH 0x8f1bbcdc,$K ; K_40_59
  193. ___
  194. for (;$i<60;$i++) { &BODY_40_59($i,@V); unshift(@V,pop(@V)); }
  195. $code.=<<___;
  196. || MVKL 0xca62c1d6,$K
  197. MVKH 0xca62c1d6,$K ; K_60_79
  198. ___
  199. for (;$i<80;$i++) { &BODY_20_39($i,@V); unshift(@V,pop(@V)); }
  200. $code.=<<___;
  201. BNOP RA ; return
  202. STW $A,*${CTX}[0] ; emit A-E...
  203. STW $B,*${CTX}[1]
  204. STW $C,*${CTX}[2]
  205. STW $D,*${CTX}[3]
  206. STW $E,*${CTX}[4]
  207. .endasmfunc
  208. .sect .const
  209. .cstring "SHA1 block transform for C64x, CRYPTOGAMS by <appro\@openssl.org>"
  210. .align 4
  211. ___
  212. print $code;
  213. close STDOUT;