co-586.pl 5.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297
  1. #! /usr/bin/env perl
  2. # Copyright 1995-2016 The OpenSSL Project Authors. All Rights Reserved.
  3. #
  4. # Licensed under the Apache License 2.0 (the "License"). You may not use
  5. # this file except in compliance with the License. You can obtain a copy
  6. # in the file LICENSE in the source distribution or at
  7. # https://www.openssl.org/source/license.html
  8. $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
  9. push(@INC,"${dir}","${dir}../../perlasm");
  10. require "x86asm.pl";
  11. $output = pop and open STDOUT,">$output";
  12. &asm_init($ARGV[0]);
  13. &bn_mul_comba("bn_mul_comba8",8);
  14. &bn_mul_comba("bn_mul_comba4",4);
  15. &bn_sqr_comba("bn_sqr_comba8",8);
  16. &bn_sqr_comba("bn_sqr_comba4",4);
  17. &asm_finish();
  18. close STDOUT or die "error closing STDOUT";
  19. sub mul_add_c
  20. {
  21. local($a,$ai,$b,$bi,$c0,$c1,$c2,$pos,$i,$na,$nb)=@_;
  22. # pos == -1 if eax and edx are pre-loaded, 0 to load from next
  23. # words, and 1 if load return value
  24. &comment("mul a[$ai]*b[$bi]");
  25. # "eax" and "edx" will always be pre-loaded.
  26. # &mov("eax",&DWP($ai*4,$a,"",0)) ;
  27. # &mov("edx",&DWP($bi*4,$b,"",0));
  28. &mul("edx");
  29. &add($c0,"eax");
  30. &mov("eax",&DWP(($na)*4,$a,"",0)) if $pos == 0; # load next a
  31. &mov("eax",&wparam(0)) if $pos > 0; # load r[]
  32. ###
  33. &adc($c1,"edx");
  34. &mov("edx",&DWP(($nb)*4,$b,"",0)) if $pos == 0; # load next b
  35. &mov("edx",&DWP(($nb)*4,$b,"",0)) if $pos == 1; # load next b
  36. ###
  37. &adc($c2,0);
  38. # is pos > 1, it means it is the last loop
  39. &mov(&DWP($i*4,"eax","",0),$c0) if $pos > 0; # save r[];
  40. &mov("eax",&DWP(($na)*4,$a,"",0)) if $pos == 1; # load next a
  41. }
  42. sub sqr_add_c
  43. {
  44. local($r,$a,$ai,$bi,$c0,$c1,$c2,$pos,$i,$na,$nb)=@_;
  45. # pos == -1 if eax and edx are pre-loaded, 0 to load from next
  46. # words, and 1 if load return value
  47. &comment("sqr a[$ai]*a[$bi]");
  48. # "eax" and "edx" will always be pre-loaded.
  49. # &mov("eax",&DWP($ai*4,$a,"",0)) ;
  50. # &mov("edx",&DWP($bi*4,$b,"",0));
  51. if ($ai == $bi)
  52. { &mul("eax");}
  53. else
  54. { &mul("edx");}
  55. &add($c0,"eax");
  56. &mov("eax",&DWP(($na)*4,$a,"",0)) if $pos == 0; # load next a
  57. ###
  58. &adc($c1,"edx");
  59. &mov("edx",&DWP(($nb)*4,$a,"",0)) if ($pos == 1) && ($na != $nb);
  60. ###
  61. &adc($c2,0);
  62. # is pos > 1, it means it is the last loop
  63. &mov(&DWP($i*4,$r,"",0),$c0) if $pos > 0; # save r[];
  64. &mov("eax",&DWP(($na)*4,$a,"",0)) if $pos == 1; # load next b
  65. }
  66. sub sqr_add_c2
  67. {
  68. local($r,$a,$ai,$bi,$c0,$c1,$c2,$pos,$i,$na,$nb)=@_;
  69. # pos == -1 if eax and edx are pre-loaded, 0 to load from next
  70. # words, and 1 if load return value
  71. &comment("sqr a[$ai]*a[$bi]");
  72. # "eax" and "edx" will always be pre-loaded.
  73. # &mov("eax",&DWP($ai*4,$a,"",0)) ;
  74. # &mov("edx",&DWP($bi*4,$a,"",0));
  75. if ($ai == $bi)
  76. { &mul("eax");}
  77. else
  78. { &mul("edx");}
  79. &add("eax","eax");
  80. ###
  81. &adc("edx","edx");
  82. ###
  83. &adc($c2,0);
  84. &add($c0,"eax");
  85. &adc($c1,"edx");
  86. &mov("eax",&DWP(($na)*4,$a,"",0)) if $pos == 0; # load next a
  87. &mov("eax",&DWP(($na)*4,$a,"",0)) if $pos == 1; # load next b
  88. &adc($c2,0);
  89. &mov(&DWP($i*4,$r,"",0),$c0) if $pos > 0; # save r[];
  90. &mov("edx",&DWP(($nb)*4,$a,"",0)) if ($pos <= 1) && ($na != $nb);
  91. ###
  92. }
  93. sub bn_mul_comba
  94. {
  95. local($name,$num)=@_;
  96. local($a,$b,$c0,$c1,$c2);
  97. local($i,$as,$ae,$bs,$be,$ai,$bi);
  98. local($tot,$end);
  99. &function_begin_B($name,"");
  100. $c0="ebx";
  101. $c1="ecx";
  102. $c2="ebp";
  103. $a="esi";
  104. $b="edi";
  105. $as=0;
  106. $ae=0;
  107. $bs=0;
  108. $be=0;
  109. $tot=$num+$num-1;
  110. &push("esi");
  111. &mov($a,&wparam(1));
  112. &push("edi");
  113. &mov($b,&wparam(2));
  114. &push("ebp");
  115. &push("ebx");
  116. &xor($c0,$c0);
  117. &mov("eax",&DWP(0,$a,"",0)); # load the first word
  118. &xor($c1,$c1);
  119. &mov("edx",&DWP(0,$b,"",0)); # load the first second
  120. for ($i=0; $i<$tot; $i++)
  121. {
  122. $ai=$as;
  123. $bi=$bs;
  124. $end=$be+1;
  125. &comment("################## Calculate word $i");
  126. for ($j=$bs; $j<$end; $j++)
  127. {
  128. &xor($c2,$c2) if ($j == $bs);
  129. if (($j+1) == $end)
  130. {
  131. $v=1;
  132. $v=2 if (($i+1) == $tot);
  133. }
  134. else
  135. { $v=0; }
  136. if (($j+1) != $end)
  137. {
  138. $na=($ai-1);
  139. $nb=($bi+1);
  140. }
  141. else
  142. {
  143. $na=$as+($i < ($num-1));
  144. $nb=$bs+($i >= ($num-1));
  145. }
  146. #printf STDERR "[$ai,$bi] -> [$na,$nb]\n";
  147. &mul_add_c($a,$ai,$b,$bi,$c0,$c1,$c2,$v,$i,$na,$nb);
  148. if ($v)
  149. {
  150. &comment("saved r[$i]");
  151. # &mov("eax",&wparam(0));
  152. # &mov(&DWP($i*4,"eax","",0),$c0);
  153. ($c0,$c1,$c2)=($c1,$c2,$c0);
  154. }
  155. $ai--;
  156. $bi++;
  157. }
  158. $as++ if ($i < ($num-1));
  159. $ae++ if ($i >= ($num-1));
  160. $bs++ if ($i >= ($num-1));
  161. $be++ if ($i < ($num-1));
  162. }
  163. &comment("save r[$i]");
  164. # &mov("eax",&wparam(0));
  165. &mov(&DWP($i*4,"eax","",0),$c0);
  166. &pop("ebx");
  167. &pop("ebp");
  168. &pop("edi");
  169. &pop("esi");
  170. &ret();
  171. &function_end_B($name);
  172. }
  173. sub bn_sqr_comba
  174. {
  175. local($name,$num)=@_;
  176. local($r,$a,$c0,$c1,$c2)=@_;
  177. local($i,$as,$ae,$bs,$be,$ai,$bi);
  178. local($b,$tot,$end,$half);
  179. &function_begin_B($name,"");
  180. $c0="ebx";
  181. $c1="ecx";
  182. $c2="ebp";
  183. $a="esi";
  184. $r="edi";
  185. &push("esi");
  186. &push("edi");
  187. &push("ebp");
  188. &push("ebx");
  189. &mov($r,&wparam(0));
  190. &mov($a,&wparam(1));
  191. &xor($c0,$c0);
  192. &xor($c1,$c1);
  193. &mov("eax",&DWP(0,$a,"",0)); # load the first word
  194. $as=0;
  195. $ae=0;
  196. $bs=0;
  197. $be=0;
  198. $tot=$num+$num-1;
  199. for ($i=0; $i<$tot; $i++)
  200. {
  201. $ai=$as;
  202. $bi=$bs;
  203. $end=$be+1;
  204. &comment("############### Calculate word $i");
  205. for ($j=$bs; $j<$end; $j++)
  206. {
  207. &xor($c2,$c2) if ($j == $bs);
  208. if (($ai-1) < ($bi+1))
  209. {
  210. $v=1;
  211. $v=2 if ($i+1) == $tot;
  212. }
  213. else
  214. { $v=0; }
  215. if (!$v)
  216. {
  217. $na=$ai-1;
  218. $nb=$bi+1;
  219. }
  220. else
  221. {
  222. $na=$as+($i < ($num-1));
  223. $nb=$bs+($i >= ($num-1));
  224. }
  225. if ($ai == $bi)
  226. {
  227. &sqr_add_c($r,$a,$ai,$bi,
  228. $c0,$c1,$c2,$v,$i,$na,$nb);
  229. }
  230. else
  231. {
  232. &sqr_add_c2($r,$a,$ai,$bi,
  233. $c0,$c1,$c2,$v,$i,$na,$nb);
  234. }
  235. if ($v)
  236. {
  237. &comment("saved r[$i]");
  238. #&mov(&DWP($i*4,$r,"",0),$c0);
  239. ($c0,$c1,$c2)=($c1,$c2,$c0);
  240. last;
  241. }
  242. $ai--;
  243. $bi++;
  244. }
  245. $as++ if ($i < ($num-1));
  246. $ae++ if ($i >= ($num-1));
  247. $bs++ if ($i >= ($num-1));
  248. $be++ if ($i < ($num-1));
  249. }
  250. &mov(&DWP($i*4,$r,"",0),$c0);
  251. &pop("ebx");
  252. &pop("ebp");
  253. &pop("edi");
  254. &pop("esi");
  255. &ret();
  256. &function_end_B($name);
  257. }