bn-586.pl 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593
  1. #!/usr/local/bin/perl
  2. push(@INC,"perlasm","../../perlasm");
  3. require "x86asm.pl";
  4. &asm_init($ARGV[0],$0);
  5. &bn_mul_add_words("bn_mul_add_words");
  6. &bn_mul_words("bn_mul_words");
  7. &bn_sqr_words("bn_sqr_words");
  8. &bn_div_words("bn_div_words");
  9. &bn_add_words("bn_add_words");
  10. &bn_sub_words("bn_sub_words");
  11. &bn_sub_part_words("bn_sub_part_words");
  12. &asm_finish();
  13. sub bn_mul_add_words
  14. {
  15. local($name)=@_;
  16. &function_begin($name,"");
  17. &comment("");
  18. $Low="eax";
  19. $High="edx";
  20. $a="ebx";
  21. $w="ebp";
  22. $r="edi";
  23. $c="esi";
  24. &xor($c,$c); # clear carry
  25. &mov($r,&wparam(0)); #
  26. &mov("ecx",&wparam(2)); #
  27. &mov($a,&wparam(1)); #
  28. &and("ecx",0xfffffff8); # num / 8
  29. &mov($w,&wparam(3)); #
  30. &push("ecx"); # Up the stack for a tmp variable
  31. &jz(&label("maw_finish"));
  32. &set_label("maw_loop",0);
  33. &mov(&swtmp(0),"ecx"); #
  34. for ($i=0; $i<32; $i+=4)
  35. {
  36. &comment("Round $i");
  37. &mov("eax",&DWP($i,$a,"",0)); # *a
  38. &mul($w); # *a * w
  39. &add("eax",$c); # L(t)+= *r
  40. &mov($c,&DWP($i,$r,"",0)); # L(t)+= *r
  41. &adc("edx",0); # H(t)+=carry
  42. &add("eax",$c); # L(t)+=c
  43. &adc("edx",0); # H(t)+=carry
  44. &mov(&DWP($i,$r,"",0),"eax"); # *r= L(t);
  45. &mov($c,"edx"); # c= H(t);
  46. }
  47. &comment("");
  48. &mov("ecx",&swtmp(0)); #
  49. &add($a,32);
  50. &add($r,32);
  51. &sub("ecx",8);
  52. &jnz(&label("maw_loop"));
  53. &set_label("maw_finish",0);
  54. &mov("ecx",&wparam(2)); # get num
  55. &and("ecx",7);
  56. &jnz(&label("maw_finish2")); # helps branch prediction
  57. &jmp(&label("maw_end"));
  58. &set_label("maw_finish2",1);
  59. for ($i=0; $i<7; $i++)
  60. {
  61. &comment("Tail Round $i");
  62. &mov("eax",&DWP($i*4,$a,"",0));# *a
  63. &mul($w); # *a * w
  64. &add("eax",$c); # L(t)+=c
  65. &mov($c,&DWP($i*4,$r,"",0)); # L(t)+= *r
  66. &adc("edx",0); # H(t)+=carry
  67. &add("eax",$c);
  68. &adc("edx",0); # H(t)+=carry
  69. &dec("ecx") if ($i != 7-1);
  70. &mov(&DWP($i*4,$r,"",0),"eax"); # *r= L(t);
  71. &mov($c,"edx"); # c= H(t);
  72. &jz(&label("maw_end")) if ($i != 7-1);
  73. }
  74. &set_label("maw_end",0);
  75. &mov("eax",$c);
  76. &pop("ecx"); # clear variable from
  77. &function_end($name);
  78. }
  79. sub bn_mul_words
  80. {
  81. local($name)=@_;
  82. &function_begin($name,"");
  83. &comment("");
  84. $Low="eax";
  85. $High="edx";
  86. $a="ebx";
  87. $w="ecx";
  88. $r="edi";
  89. $c="esi";
  90. $num="ebp";
  91. &xor($c,$c); # clear carry
  92. &mov($r,&wparam(0)); #
  93. &mov($a,&wparam(1)); #
  94. &mov($num,&wparam(2)); #
  95. &mov($w,&wparam(3)); #
  96. &and($num,0xfffffff8); # num / 8
  97. &jz(&label("mw_finish"));
  98. &set_label("mw_loop",0);
  99. for ($i=0; $i<32; $i+=4)
  100. {
  101. &comment("Round $i");
  102. &mov("eax",&DWP($i,$a,"",0)); # *a
  103. &mul($w); # *a * w
  104. &add("eax",$c); # L(t)+=c
  105. # XXX
  106. &adc("edx",0); # H(t)+=carry
  107. &mov(&DWP($i,$r,"",0),"eax"); # *r= L(t);
  108. &mov($c,"edx"); # c= H(t);
  109. }
  110. &comment("");
  111. &add($a,32);
  112. &add($r,32);
  113. &sub($num,8);
  114. &jz(&label("mw_finish"));
  115. &jmp(&label("mw_loop"));
  116. &set_label("mw_finish",0);
  117. &mov($num,&wparam(2)); # get num
  118. &and($num,7);
  119. &jnz(&label("mw_finish2"));
  120. &jmp(&label("mw_end"));
  121. &set_label("mw_finish2",1);
  122. for ($i=0; $i<7; $i++)
  123. {
  124. &comment("Tail Round $i");
  125. &mov("eax",&DWP($i*4,$a,"",0));# *a
  126. &mul($w); # *a * w
  127. &add("eax",$c); # L(t)+=c
  128. # XXX
  129. &adc("edx",0); # H(t)+=carry
  130. &mov(&DWP($i*4,$r,"",0),"eax");# *r= L(t);
  131. &mov($c,"edx"); # c= H(t);
  132. &dec($num) if ($i != 7-1);
  133. &jz(&label("mw_end")) if ($i != 7-1);
  134. }
  135. &set_label("mw_end",0);
  136. &mov("eax",$c);
  137. &function_end($name);
  138. }
  139. sub bn_sqr_words
  140. {
  141. local($name)=@_;
  142. &function_begin($name,"");
  143. &comment("");
  144. $r="esi";
  145. $a="edi";
  146. $num="ebx";
  147. &mov($r,&wparam(0)); #
  148. &mov($a,&wparam(1)); #
  149. &mov($num,&wparam(2)); #
  150. &and($num,0xfffffff8); # num / 8
  151. &jz(&label("sw_finish"));
  152. &set_label("sw_loop",0);
  153. for ($i=0; $i<32; $i+=4)
  154. {
  155. &comment("Round $i");
  156. &mov("eax",&DWP($i,$a,"",0)); # *a
  157. # XXX
  158. &mul("eax"); # *a * *a
  159. &mov(&DWP($i*2,$r,"",0),"eax"); #
  160. &mov(&DWP($i*2+4,$r,"",0),"edx");#
  161. }
  162. &comment("");
  163. &add($a,32);
  164. &add($r,64);
  165. &sub($num,8);
  166. &jnz(&label("sw_loop"));
  167. &set_label("sw_finish",0);
  168. &mov($num,&wparam(2)); # get num
  169. &and($num,7);
  170. &jz(&label("sw_end"));
  171. for ($i=0; $i<7; $i++)
  172. {
  173. &comment("Tail Round $i");
  174. &mov("eax",&DWP($i*4,$a,"",0)); # *a
  175. # XXX
  176. &mul("eax"); # *a * *a
  177. &mov(&DWP($i*8,$r,"",0),"eax"); #
  178. &dec($num) if ($i != 7-1);
  179. &mov(&DWP($i*8+4,$r,"",0),"edx");
  180. &jz(&label("sw_end")) if ($i != 7-1);
  181. }
  182. &set_label("sw_end",0);
  183. &function_end($name);
  184. }
  185. sub bn_div_words
  186. {
  187. local($name)=@_;
  188. &function_begin($name,"");
  189. &mov("edx",&wparam(0)); #
  190. &mov("eax",&wparam(1)); #
  191. &mov("ebx",&wparam(2)); #
  192. &div("ebx");
  193. &function_end($name);
  194. }
  195. sub bn_add_words
  196. {
  197. local($name)=@_;
  198. &function_begin($name,"");
  199. &comment("");
  200. $a="esi";
  201. $b="edi";
  202. $c="eax";
  203. $r="ebx";
  204. $tmp1="ecx";
  205. $tmp2="edx";
  206. $num="ebp";
  207. &mov($r,&wparam(0)); # get r
  208. &mov($a,&wparam(1)); # get a
  209. &mov($b,&wparam(2)); # get b
  210. &mov($num,&wparam(3)); # get num
  211. &xor($c,$c); # clear carry
  212. &and($num,0xfffffff8); # num / 8
  213. &jz(&label("aw_finish"));
  214. &set_label("aw_loop",0);
  215. for ($i=0; $i<8; $i++)
  216. {
  217. &comment("Round $i");
  218. &mov($tmp1,&DWP($i*4,$a,"",0)); # *a
  219. &mov($tmp2,&DWP($i*4,$b,"",0)); # *b
  220. &add($tmp1,$c);
  221. &mov($c,0);
  222. &adc($c,$c);
  223. &add($tmp1,$tmp2);
  224. &adc($c,0);
  225. &mov(&DWP($i*4,$r,"",0),$tmp1); # *r
  226. }
  227. &comment("");
  228. &add($a,32);
  229. &add($b,32);
  230. &add($r,32);
  231. &sub($num,8);
  232. &jnz(&label("aw_loop"));
  233. &set_label("aw_finish",0);
  234. &mov($num,&wparam(3)); # get num
  235. &and($num,7);
  236. &jz(&label("aw_end"));
  237. for ($i=0; $i<7; $i++)
  238. {
  239. &comment("Tail Round $i");
  240. &mov($tmp1,&DWP($i*4,$a,"",0)); # *a
  241. &mov($tmp2,&DWP($i*4,$b,"",0));# *b
  242. &add($tmp1,$c);
  243. &mov($c,0);
  244. &adc($c,$c);
  245. &add($tmp1,$tmp2);
  246. &adc($c,0);
  247. &dec($num) if ($i != 6);
  248. &mov(&DWP($i*4,$r,"",0),$tmp1); # *r
  249. &jz(&label("aw_end")) if ($i != 6);
  250. }
  251. &set_label("aw_end",0);
  252. # &mov("eax",$c); # $c is "eax"
  253. &function_end($name);
  254. }
  255. sub bn_sub_words
  256. {
  257. local($name)=@_;
  258. &function_begin($name,"");
  259. &comment("");
  260. $a="esi";
  261. $b="edi";
  262. $c="eax";
  263. $r="ebx";
  264. $tmp1="ecx";
  265. $tmp2="edx";
  266. $num="ebp";
  267. &mov($r,&wparam(0)); # get r
  268. &mov($a,&wparam(1)); # get a
  269. &mov($b,&wparam(2)); # get b
  270. &mov($num,&wparam(3)); # get num
  271. &xor($c,$c); # clear carry
  272. &and($num,0xfffffff8); # num / 8
  273. &jz(&label("aw_finish"));
  274. &set_label("aw_loop",0);
  275. for ($i=0; $i<8; $i++)
  276. {
  277. &comment("Round $i");
  278. &mov($tmp1,&DWP($i*4,$a,"",0)); # *a
  279. &mov($tmp2,&DWP($i*4,$b,"",0)); # *b
  280. &sub($tmp1,$c);
  281. &mov($c,0);
  282. &adc($c,$c);
  283. &sub($tmp1,$tmp2);
  284. &adc($c,0);
  285. &mov(&DWP($i*4,$r,"",0),$tmp1); # *r
  286. }
  287. &comment("");
  288. &add($a,32);
  289. &add($b,32);
  290. &add($r,32);
  291. &sub($num,8);
  292. &jnz(&label("aw_loop"));
  293. &set_label("aw_finish",0);
  294. &mov($num,&wparam(3)); # get num
  295. &and($num,7);
  296. &jz(&label("aw_end"));
  297. for ($i=0; $i<7; $i++)
  298. {
  299. &comment("Tail Round $i");
  300. &mov($tmp1,&DWP($i*4,$a,"",0)); # *a
  301. &mov($tmp2,&DWP($i*4,$b,"",0));# *b
  302. &sub($tmp1,$c);
  303. &mov($c,0);
  304. &adc($c,$c);
  305. &sub($tmp1,$tmp2);
  306. &adc($c,0);
  307. &dec($num) if ($i != 6);
  308. &mov(&DWP($i*4,$r,"",0),$tmp1); # *r
  309. &jz(&label("aw_end")) if ($i != 6);
  310. }
  311. &set_label("aw_end",0);
  312. # &mov("eax",$c); # $c is "eax"
  313. &function_end($name);
  314. }
  315. sub bn_sub_part_words
  316. {
  317. local($name)=@_;
  318. &function_begin($name,"");
  319. &comment("");
  320. $a="esi";
  321. $b="edi";
  322. $c="eax";
  323. $r="ebx";
  324. $tmp1="ecx";
  325. $tmp2="edx";
  326. $num="ebp";
  327. &mov($r,&wparam(0)); # get r
  328. &mov($a,&wparam(1)); # get a
  329. &mov($b,&wparam(2)); # get b
  330. &mov($num,&wparam(3)); # get num
  331. &xor($c,$c); # clear carry
  332. &and($num,0xfffffff8); # num / 8
  333. &jz(&label("aw_finish"));
  334. &set_label("aw_loop",0);
  335. for ($i=0; $i<8; $i++)
  336. {
  337. &comment("Round $i");
  338. &mov($tmp1,&DWP($i*4,$a,"",0)); # *a
  339. &mov($tmp2,&DWP($i*4,$b,"",0)); # *b
  340. &sub($tmp1,$c);
  341. &mov($c,0);
  342. &adc($c,$c);
  343. &sub($tmp1,$tmp2);
  344. &adc($c,0);
  345. &mov(&DWP($i*4,$r,"",0),$tmp1); # *r
  346. }
  347. &comment("");
  348. &add($a,32);
  349. &add($b,32);
  350. &add($r,32);
  351. &sub($num,8);
  352. &jnz(&label("aw_loop"));
  353. &set_label("aw_finish",0);
  354. &mov($num,&wparam(3)); # get num
  355. &and($num,7);
  356. &jz(&label("aw_end"));
  357. for ($i=0; $i<7; $i++)
  358. {
  359. &comment("Tail Round $i");
  360. &mov($tmp1,&DWP(0,$a,"",0)); # *a
  361. &mov($tmp2,&DWP(0,$b,"",0));# *b
  362. &sub($tmp1,$c);
  363. &mov($c,0);
  364. &adc($c,$c);
  365. &sub($tmp1,$tmp2);
  366. &adc($c,0);
  367. &mov(&DWP(0,$r,"",0),$tmp1); # *r
  368. &add($a, 4);
  369. &add($b, 4);
  370. &add($r, 4);
  371. &dec($num) if ($i != 6);
  372. &jz(&label("aw_end")) if ($i != 6);
  373. }
  374. &set_label("aw_end",0);
  375. &cmp(&wparam(4),0);
  376. &je(&label("pw_end"));
  377. &mov($num,&wparam(4)); # get dl
  378. &cmp($num,0);
  379. &je(&label("pw_end"));
  380. &jge(&label("pw_pos"));
  381. &comment("pw_neg");
  382. &mov($tmp2,0);
  383. &sub($tmp2,$num);
  384. &mov($num,$tmp2);
  385. &and($num,0xfffffff8); # num / 8
  386. &jz(&label("pw_neg_finish"));
  387. &set_label("pw_neg_loop",0);
  388. for ($i=0; $i<8; $i++)
  389. {
  390. &comment("dl<0 Round $i");
  391. &mov($tmp1,0);
  392. &mov($tmp2,&DWP($i*4,$b,"",0)); # *b
  393. &sub($tmp1,$c);
  394. &mov($c,0);
  395. &adc($c,$c);
  396. &sub($tmp1,$tmp2);
  397. &adc($c,0);
  398. &mov(&DWP($i*4,$r,"",0),$tmp1); # *r
  399. }
  400. &comment("");
  401. &add($b,32);
  402. &add($r,32);
  403. &sub($num,8);
  404. &jnz(&label("pw_neg_loop"));
  405. &set_label("pw_neg_finish",0);
  406. &mov($tmp2,&wparam(4)); # get dl
  407. &mov($num,0);
  408. &sub($num,$tmp2);
  409. &and($num,7);
  410. &jz(&label("pw_end"));
  411. for ($i=0; $i<7; $i++)
  412. {
  413. &comment("dl<0 Tail Round $i");
  414. &mov($tmp1,0);
  415. &mov($tmp2,&DWP($i*4,$b,"",0));# *b
  416. &sub($tmp1,$c);
  417. &mov($c,0);
  418. &adc($c,$c);
  419. &sub($tmp1,$tmp2);
  420. &adc($c,0);
  421. &dec($num) if ($i != 6);
  422. &mov(&DWP($i*4,$r,"",0),$tmp1); # *r
  423. &jz(&label("pw_end")) if ($i != 6);
  424. }
  425. &jmp(&label("pw_end"));
  426. &set_label("pw_pos",0);
  427. &and($num,0xfffffff8); # num / 8
  428. &jz(&label("pw_pos_finish"));
  429. &set_label("pw_pos_loop",0);
  430. for ($i=0; $i<8; $i++)
  431. {
  432. &comment("dl>0 Round $i");
  433. &mov($tmp1,&DWP($i*4,$a,"",0)); # *a
  434. &sub($tmp1,$c);
  435. &mov(&DWP($i*4,$r,"",0),$tmp1); # *r
  436. &jnc(&label("pw_nc".$i));
  437. }
  438. &comment("");
  439. &add($a,32);
  440. &add($r,32);
  441. &sub($num,8);
  442. &jnz(&label("pw_pos_loop"));
  443. &set_label("pw_pos_finish",0);
  444. &mov($num,&wparam(4)); # get dl
  445. &and($num,7);
  446. &jz(&label("pw_end"));
  447. for ($i=0; $i<7; $i++)
  448. {
  449. &comment("dl>0 Tail Round $i");
  450. &mov($tmp1,&DWP($i*4,$a,"",0)); # *a
  451. &sub($tmp1,$c);
  452. &mov(&DWP($i*4,$r,"",0),$tmp1); # *r
  453. &jnc(&label("pw_tail_nc".$i));
  454. &dec($num) if ($i != 6);
  455. &jz(&label("pw_end")) if ($i != 6);
  456. }
  457. &mov($c,1);
  458. &jmp(&label("pw_end"));
  459. &set_label("pw_nc_loop",0);
  460. for ($i=0; $i<8; $i++)
  461. {
  462. &mov($tmp1,&DWP($i*4,$a,"",0)); # *a
  463. &mov(&DWP($i*4,$r,"",0),$tmp1); # *r
  464. &set_label("pw_nc".$i,0);
  465. }
  466. &comment("");
  467. &add($a,32);
  468. &add($r,32);
  469. &sub($num,8);
  470. &jnz(&label("pw_nc_loop"));
  471. &mov($num,&wparam(4)); # get dl
  472. &and($num,7);
  473. &jz(&label("pw_nc_end"));
  474. for ($i=0; $i<7; $i++)
  475. {
  476. &mov($tmp1,&DWP($i*4,$a,"",0)); # *a
  477. &mov(&DWP($i*4,$r,"",0),$tmp1); # *r
  478. &set_label("pw_tail_nc".$i,0);
  479. &dec($num) if ($i != 6);
  480. &jz(&label("pw_nc_end")) if ($i != 6);
  481. }
  482. &set_label("pw_nc_end",0);
  483. &mov($c,0);
  484. &set_label("pw_end",0);
  485. # &mov("eax",$c); # $c is "eax"
  486. &function_end($name);
  487. }