co-586.pl 5.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287
  1. #!/usr/local/bin/perl
  2. $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
  3. push(@INC,"${dir}","${dir}../../perlasm");
  4. require "x86asm.pl";
  5. &asm_init($ARGV[0],$0);
  6. &bn_mul_comba("bn_mul_comba8",8);
  7. &bn_mul_comba("bn_mul_comba4",4);
  8. &bn_sqr_comba("bn_sqr_comba8",8);
  9. &bn_sqr_comba("bn_sqr_comba4",4);
  10. &asm_finish();
  11. sub mul_add_c
  12. {
  13. local($a,$ai,$b,$bi,$c0,$c1,$c2,$pos,$i,$na,$nb)=@_;
  14. # pos == -1 if eax and edx are pre-loaded, 0 to load from next
  15. # words, and 1 if load return value
  16. &comment("mul a[$ai]*b[$bi]");
  17. # "eax" and "edx" will always be pre-loaded.
  18. # &mov("eax",&DWP($ai*4,$a,"",0)) ;
  19. # &mov("edx",&DWP($bi*4,$b,"",0));
  20. &mul("edx");
  21. &add($c0,"eax");
  22. &mov("eax",&DWP(($na)*4,$a,"",0)) if $pos == 0; # laod next a
  23. &mov("eax",&wparam(0)) if $pos > 0; # load r[]
  24. ###
  25. &adc($c1,"edx");
  26. &mov("edx",&DWP(($nb)*4,$b,"",0)) if $pos == 0; # laod next b
  27. &mov("edx",&DWP(($nb)*4,$b,"",0)) if $pos == 1; # laod next b
  28. ###
  29. &adc($c2,0);
  30. # is pos > 1, it means it is the last loop
  31. &mov(&DWP($i*4,"eax","",0),$c0) if $pos > 0; # save r[];
  32. &mov("eax",&DWP(($na)*4,$a,"",0)) if $pos == 1; # laod next a
  33. }
  34. sub sqr_add_c
  35. {
  36. local($r,$a,$ai,$bi,$c0,$c1,$c2,$pos,$i,$na,$nb)=@_;
  37. # pos == -1 if eax and edx are pre-loaded, 0 to load from next
  38. # words, and 1 if load return value
  39. &comment("sqr a[$ai]*a[$bi]");
  40. # "eax" and "edx" will always be pre-loaded.
  41. # &mov("eax",&DWP($ai*4,$a,"",0)) ;
  42. # &mov("edx",&DWP($bi*4,$b,"",0));
  43. if ($ai == $bi)
  44. { &mul("eax");}
  45. else
  46. { &mul("edx");}
  47. &add($c0,"eax");
  48. &mov("eax",&DWP(($na)*4,$a,"",0)) if $pos == 0; # load next a
  49. ###
  50. &adc($c1,"edx");
  51. &mov("edx",&DWP(($nb)*4,$a,"",0)) if ($pos == 1) && ($na != $nb);
  52. ###
  53. &adc($c2,0);
  54. # is pos > 1, it means it is the last loop
  55. &mov(&DWP($i*4,$r,"",0),$c0) if $pos > 0; # save r[];
  56. &mov("eax",&DWP(($na)*4,$a,"",0)) if $pos == 1; # load next b
  57. }
  58. sub sqr_add_c2
  59. {
  60. local($r,$a,$ai,$bi,$c0,$c1,$c2,$pos,$i,$na,$nb)=@_;
  61. # pos == -1 if eax and edx are pre-loaded, 0 to load from next
  62. # words, and 1 if load return value
  63. &comment("sqr a[$ai]*a[$bi]");
  64. # "eax" and "edx" will always be pre-loaded.
  65. # &mov("eax",&DWP($ai*4,$a,"",0)) ;
  66. # &mov("edx",&DWP($bi*4,$a,"",0));
  67. if ($ai == $bi)
  68. { &mul("eax");}
  69. else
  70. { &mul("edx");}
  71. &add("eax","eax");
  72. ###
  73. &adc("edx","edx");
  74. ###
  75. &adc($c2,0);
  76. &add($c0,"eax");
  77. &adc($c1,"edx");
  78. &mov("eax",&DWP(($na)*4,$a,"",0)) if $pos == 0; # load next a
  79. &mov("eax",&DWP(($na)*4,$a,"",0)) if $pos == 1; # load next b
  80. &adc($c2,0);
  81. &mov(&DWP($i*4,$r,"",0),$c0) if $pos > 0; # save r[];
  82. &mov("edx",&DWP(($nb)*4,$a,"",0)) if ($pos <= 1) && ($na != $nb);
  83. ###
  84. }
  85. sub bn_mul_comba
  86. {
  87. local($name,$num)=@_;
  88. local($a,$b,$c0,$c1,$c2);
  89. local($i,$as,$ae,$bs,$be,$ai,$bi);
  90. local($tot,$end);
  91. &function_begin_B($name,"");
  92. $c0="ebx";
  93. $c1="ecx";
  94. $c2="ebp";
  95. $a="esi";
  96. $b="edi";
  97. $as=0;
  98. $ae=0;
  99. $bs=0;
  100. $be=0;
  101. $tot=$num+$num-1;
  102. &push("esi");
  103. &mov($a,&wparam(1));
  104. &push("edi");
  105. &mov($b,&wparam(2));
  106. &push("ebp");
  107. &push("ebx");
  108. &xor($c0,$c0);
  109. &mov("eax",&DWP(0,$a,"",0)); # load the first word
  110. &xor($c1,$c1);
  111. &mov("edx",&DWP(0,$b,"",0)); # load the first second
  112. for ($i=0; $i<$tot; $i++)
  113. {
  114. $ai=$as;
  115. $bi=$bs;
  116. $end=$be+1;
  117. &comment("################## Calculate word $i");
  118. for ($j=$bs; $j<$end; $j++)
  119. {
  120. &xor($c2,$c2) if ($j == $bs);
  121. if (($j+1) == $end)
  122. {
  123. $v=1;
  124. $v=2 if (($i+1) == $tot);
  125. }
  126. else
  127. { $v=0; }
  128. if (($j+1) != $end)
  129. {
  130. $na=($ai-1);
  131. $nb=($bi+1);
  132. }
  133. else
  134. {
  135. $na=$as+($i < ($num-1));
  136. $nb=$bs+($i >= ($num-1));
  137. }
  138. #printf STDERR "[$ai,$bi] -> [$na,$nb]\n";
  139. &mul_add_c($a,$ai,$b,$bi,$c0,$c1,$c2,$v,$i,$na,$nb);
  140. if ($v)
  141. {
  142. &comment("saved r[$i]");
  143. # &mov("eax",&wparam(0));
  144. # &mov(&DWP($i*4,"eax","",0),$c0);
  145. ($c0,$c1,$c2)=($c1,$c2,$c0);
  146. }
  147. $ai--;
  148. $bi++;
  149. }
  150. $as++ if ($i < ($num-1));
  151. $ae++ if ($i >= ($num-1));
  152. $bs++ if ($i >= ($num-1));
  153. $be++ if ($i < ($num-1));
  154. }
  155. &comment("save r[$i]");
  156. # &mov("eax",&wparam(0));
  157. &mov(&DWP($i*4,"eax","",0),$c0);
  158. &pop("ebx");
  159. &pop("ebp");
  160. &pop("edi");
  161. &pop("esi");
  162. &ret();
  163. &function_end_B($name);
  164. }
  165. sub bn_sqr_comba
  166. {
  167. local($name,$num)=@_;
  168. local($r,$a,$c0,$c1,$c2)=@_;
  169. local($i,$as,$ae,$bs,$be,$ai,$bi);
  170. local($b,$tot,$end,$half);
  171. &function_begin_B($name,"");
  172. $c0="ebx";
  173. $c1="ecx";
  174. $c2="ebp";
  175. $a="esi";
  176. $r="edi";
  177. &push("esi");
  178. &push("edi");
  179. &push("ebp");
  180. &push("ebx");
  181. &mov($r,&wparam(0));
  182. &mov($a,&wparam(1));
  183. &xor($c0,$c0);
  184. &xor($c1,$c1);
  185. &mov("eax",&DWP(0,$a,"",0)); # load the first word
  186. $as=0;
  187. $ae=0;
  188. $bs=0;
  189. $be=0;
  190. $tot=$num+$num-1;
  191. for ($i=0; $i<$tot; $i++)
  192. {
  193. $ai=$as;
  194. $bi=$bs;
  195. $end=$be+1;
  196. &comment("############### Calculate word $i");
  197. for ($j=$bs; $j<$end; $j++)
  198. {
  199. &xor($c2,$c2) if ($j == $bs);
  200. if (($ai-1) < ($bi+1))
  201. {
  202. $v=1;
  203. $v=2 if ($i+1) == $tot;
  204. }
  205. else
  206. { $v=0; }
  207. if (!$v)
  208. {
  209. $na=$ai-1;
  210. $nb=$bi+1;
  211. }
  212. else
  213. {
  214. $na=$as+($i < ($num-1));
  215. $nb=$bs+($i >= ($num-1));
  216. }
  217. if ($ai == $bi)
  218. {
  219. &sqr_add_c($r,$a,$ai,$bi,
  220. $c0,$c1,$c2,$v,$i,$na,$nb);
  221. }
  222. else
  223. {
  224. &sqr_add_c2($r,$a,$ai,$bi,
  225. $c0,$c1,$c2,$v,$i,$na,$nb);
  226. }
  227. if ($v)
  228. {
  229. &comment("saved r[$i]");
  230. #&mov(&DWP($i*4,$r,"",0),$c0);
  231. ($c0,$c1,$c2)=($c1,$c2,$c0);
  232. last;
  233. }
  234. $ai--;
  235. $bi++;
  236. }
  237. $as++ if ($i < ($num-1));
  238. $ae++ if ($i >= ($num-1));
  239. $bs++ if ($i >= ($num-1));
  240. $be++ if ($i < ($num-1));
  241. }
  242. &mov(&DWP($i*4,$r,"",0),$c0);
  243. &pop("ebx");
  244. &pop("ebp");
  245. &pop("edi");
  246. &pop("esi");
  247. &ret();
  248. &function_end_B($name);
  249. }