rc4-c64xplus.pl 4.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191
  1. #! /usr/bin/env perl
  2. # Copyright 2014-2020 The OpenSSL Project Authors. All Rights Reserved.
  3. #
  4. # Licensed under the Apache License 2.0 (the "License"). You may not use
  5. # this file except in compliance with the License. You can obtain a copy
  6. # in the file LICENSE in the source distribution or at
  7. # https://www.openssl.org/source/license.html
  8. #
  9. # ====================================================================
  10. # Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
  11. # project. The module is, however, dual licensed under OpenSSL and
  12. # CRYPTOGAMS licenses depending on where you obtain it. For further
  13. # details see http://www.openssl.org/~appro/cryptogams/.
  14. # ====================================================================
  15. #
  16. # RC4 for C64x+.
  17. #
  18. # April 2014
  19. #
  20. # RC4 subroutine processes one byte in 7.0 cycles, which is 3x faster
  21. # than TI CGT-generated code. Loop is scheduled in such way that
  22. # there is only one reference to memory in each cycle. This is done
  23. # to avoid L1D memory banking conflicts, see SPRU871 TI publication
  24. # for further details. Otherwise it should be possible to schedule
  25. # the loop for iteration interval of 6...
  26. ($KEY,$LEN,$INP,$OUT)=("A4","B4","A6","B6");
  27. ($KEYA,$XX,$TY,$xx,$ONE,$ret)=map("A$_",(5,7,8,9,1,2));
  28. ($KEYB,$YY,$TX,$tx,$SUM,$dat)=map("B$_",(5,7,8,9,1,2));
  29. $code.=<<___;
  30. .text
  31. .if .ASSEMBLER_VERSION<7000000
  32. .asg 0,__TI_EABI__
  33. .endif
  34. .if __TI_EABI__
  35. .nocmp
  36. .asg RC4,_RC4
  37. .asg RC4_set_key,_RC4_set_key
  38. .asg RC4_options,_RC4_options
  39. .endif
  40. .global _RC4
  41. .align 16
  42. _RC4:
  43. .asmfunc
  44. MV $LEN,B0
  45. [!B0] BNOP B3 ; if (len==0) return;
  46. ||[B0] ADD $KEY,2,$KEYA
  47. ||[B0] ADD $KEY,2,$KEYB
  48. [B0] MVK 1,$ONE
  49. ||[B0] LDBU *${KEYA}[-2],$XX ; key->x
  50. [B0] LDBU *${KEYB}[-1],$YY ; key->y
  51. || NOP 4
  52. ADD4 $ONE,$XX,$XX
  53. LDBU *${KEYA}[$XX],$TX
  54. || MVC $LEN,ILC
  55. NOP 4
  56. ;;==================================================
  57. SPLOOP 7
  58. || ADD4 $TX,$YY,$YY
  59. LDBU *${KEYB}[$YY],$TY
  60. || MVD $XX,$xx
  61. || ADD4 $ONE,$XX,$XX
  62. LDBU *${KEYA}[$XX],$tx
  63. CMPEQ $YY,$XX,B0
  64. || NOP 3
  65. STB $TX,*${KEYB}[$YY]
  66. ||[B0] ADD4 $TX,$YY,$YY
  67. STB $TY,*${KEYA}[$xx]
  68. ||[!B0] ADD4 $tx,$YY,$YY
  69. ||[!B0] MVD $tx,$TX
  70. ADD4 $TY,$TX,$SUM ; [0,0] $TX is not replaced by $tx yet!
  71. || NOP 2
  72. LDBU *$INP++,$dat
  73. || NOP 2
  74. LDBU *${KEYB}[$SUM],$ret
  75. || NOP 5
  76. XOR.L $dat,$ret,$ret
  77. SPKERNEL
  78. || STB $ret,*$OUT++
  79. ;;==================================================
  80. SUB4 $XX,$ONE,$XX
  81. || NOP 5
  82. STB $XX,*${KEYA}[-2] ; key->x
  83. || SUB4 $YY,$TX,$YY
  84. || BNOP B3
  85. STB $YY,*${KEYB}[-1] ; key->y
  86. || NOP 5
  87. .endasmfunc
  88. .global _RC4_set_key
  89. .align 16
  90. _RC4_set_key:
  91. .asmfunc
  92. .if .BIG_ENDIAN
  93. MVK 0x00000404,$ONE
  94. || MVK 0x00000203,B0
  95. MVKH 0x04040000,$ONE
  96. || MVKH 0x00010000,B0
  97. .else
  98. MVK 0x00000404,$ONE
  99. || MVK 0x00000100,B0
  100. MVKH 0x04040000,$ONE
  101. || MVKH 0x03020000,B0
  102. .endif
  103. ADD $KEY,2,$KEYA
  104. || ADD $KEY,2,$KEYB
  105. || ADD $INP,$LEN,$ret ; end of input
  106. LDBU *${INP}++,$dat
  107. || MVK 0,$TX
  108. STH $TX,*${KEY}++ ; key->x=key->y=0
  109. || MV B0,A0
  110. || MVK 64-4,B0
  111. ;;==================================================
  112. SPLOOPD 1
  113. || MVC B0,ILC
  114. STNW A0,*${KEY}++
  115. || ADD4 $ONE,A0,A0
  116. SPKERNEL
  117. ;;==================================================
  118. MVK 0,$YY
  119. || MVK 0,$XX
  120. MVK 1,$ONE
  121. || MVK 256-1,B0
  122. ;;==================================================
  123. SPLOOPD 8
  124. || MVC B0,ILC
  125. ADD4 $dat,$YY,$YY
  126. || CMPEQ $INP,$ret,A0 ; end of input?
  127. LDBU *${KEYB}[$YY],$TY
  128. || MVD $XX,$xx
  129. || ADD4 $ONE,$XX,$XX
  130. LDBU *${KEYA}[$XX],$tx
  131. ||[A0] SUB $INP,$LEN,$INP ; rewind
  132. LDBU *${INP}++,$dat
  133. || CMPEQ $YY,$XX,B0
  134. || NOP 3
  135. STB $TX,*${KEYB}[$YY]
  136. ||[B0] ADD4 $TX,$YY,$YY
  137. STB $TY,*${KEYA}[$xx]
  138. ||[!B0] ADD4 $tx,$YY,$YY
  139. ||[!B0] MV $tx,$TX
  140. SPKERNEL
  141. ;;==================================================
  142. BNOP B3,5
  143. .endasmfunc
  144. .global _RC4_options
  145. .align 16
  146. _RC4_options:
  147. _rc4_options:
  148. .asmfunc
  149. BNOP B3,1
  150. ADDKPC _rc4_options,B4
  151. .if __TI_EABI__
  152. MVKL \$PCR_OFFSET(rc4_options,_rc4_options),A4
  153. MVKH \$PCR_OFFSET(rc4_options,_rc4_options),A4
  154. .else
  155. MVKL (rc4_options-_rc4_options),A4
  156. MVKH (rc4_options-_rc4_options),A4
  157. .endif
  158. ADD B4,A4,A4
  159. .endasmfunc
  160. .if __TI_EABI__
  161. .sect ".text:rc4_options.const"
  162. .else
  163. .sect ".const:rc4_options"
  164. .endif
  165. .align 4
  166. rc4_options:
  167. .cstring "rc4(sploop,char)"
  168. .cstring "RC4 for C64+, CRYPTOGAMS by <appro\@openssl.org>"
  169. .align 4
  170. ___
  171. $output = pop and open STDOUT,">$output";
  172. print $code;
  173. close STDOUT or die "error closing STDOUT: $!";