bn-586.pl 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675
  1. #!/usr/local/bin/perl
  2. push(@INC,"perlasm","../../perlasm");
  3. require "x86asm.pl";
  4. &asm_init($ARGV[0],$0);
  5. $sse2=0;
  6. for (@ARGV) { $sse2=1 if (/-DOPENSSL_IA32_SSE2/); }
  7. &external_label("OPENSSL_ia32cap_P") if ($sse2);
  8. &bn_mul_add_words("bn_mul_add_words");
  9. &bn_mul_words("bn_mul_words");
  10. &bn_sqr_words("bn_sqr_words");
  11. &bn_div_words("bn_div_words");
  12. &bn_add_words("bn_add_words");
  13. &bn_sub_words("bn_sub_words");
  14. &bn_sub_part_words("bn_sub_part_words");
  15. &asm_finish();
  16. sub bn_mul_add_words
  17. {
  18. local($name)=@_;
  19. &function_begin($name,$sse2?"EXTRN\t_OPENSSL_ia32cap_P:DWORD":"");
  20. &comment("");
  21. $Low="eax";
  22. $High="edx";
  23. $a="ebx";
  24. $w="ebp";
  25. $r="edi";
  26. $c="esi";
  27. &xor($c,$c); # clear carry
  28. &mov($r,&wparam(0)); #
  29. &mov("ecx",&wparam(2)); #
  30. &mov($a,&wparam(1)); #
  31. &and("ecx",0xfffffff8); # num / 8
  32. &mov($w,&wparam(3)); #
  33. &push("ecx"); # Up the stack for a tmp variable
  34. &jz(&label("maw_finish"));
  35. if ($sse2) {
  36. &picmeup("eax","OPENSSL_ia32cap_P");
  37. &bt(&DWP(0,"eax"),26);
  38. &jnc(&label("maw_loop"));
  39. &movd("mm0",$w); # mm0 = w
  40. &pxor("mm1","mm1"); # mm1 = carry_in
  41. &set_label("maw_sse2_loop",0);
  42. &movd("mm3",&DWP(0,$r,"",0)); # mm3 = r[0]
  43. &paddq("mm1","mm3"); # mm1 = carry_in + r[0]
  44. &movd("mm2",&DWP(0,$a,"",0)); # mm2 = a[0]
  45. &pmuludq("mm2","mm0"); # mm2 = w*a[0]
  46. &movd("mm4",&DWP(4,$a,"",0)); # mm4 = a[1]
  47. &pmuludq("mm4","mm0"); # mm4 = w*a[1]
  48. &movd("mm6",&DWP(8,$a,"",0)); # mm6 = a[2]
  49. &pmuludq("mm6","mm0"); # mm6 = w*a[2]
  50. &movd("mm7",&DWP(12,$a,"",0)); # mm7 = a[3]
  51. &pmuludq("mm7","mm0"); # mm7 = w*a[3]
  52. &paddq("mm1","mm2"); # mm1 = carry_in + r[0] + w*a[0]
  53. &movd("mm3",&DWP(4,$r,"",0)); # mm3 = r[1]
  54. &paddq("mm3","mm4"); # mm3 = r[1] + w*a[1]
  55. &movd("mm5",&DWP(8,$r,"",0)); # mm5 = r[2]
  56. &paddq("mm5","mm6"); # mm5 = r[2] + w*a[2]
  57. &movd("mm4",&DWP(12,$r,"",0)); # mm4 = r[3]
  58. &paddq("mm7","mm4"); # mm7 = r[3] + w*a[3]
  59. &movd(&DWP(0,$r,"",0),"mm1");
  60. &movd("mm2",&DWP(16,$a,"",0)); # mm2 = a[4]
  61. &pmuludq("mm2","mm0"); # mm2 = w*a[4]
  62. &psrlq("mm1",32); # mm1 = carry0
  63. &movd("mm4",&DWP(20,$a,"",0)); # mm4 = a[5]
  64. &pmuludq("mm4","mm0"); # mm4 = w*a[5]
  65. &paddq("mm1","mm3"); # mm1 = carry0 + r[1] + w*a[1]
  66. &movd("mm6",&DWP(24,$a,"",0)); # mm6 = a[6]
  67. &pmuludq("mm6","mm0"); # mm6 = w*a[6]
  68. &movd(&DWP(4,$r,"",0),"mm1");
  69. &psrlq("mm1",32); # mm1 = carry1
  70. &movd("mm3",&DWP(28,$a,"",0)); # mm3 = a[7]
  71. &add($a,32);
  72. &pmuludq("mm3","mm0"); # mm3 = w*a[7]
  73. &paddq("mm1","mm5"); # mm1 = carry1 + r[2] + w*a[2]
  74. &movd("mm5",&DWP(16,$r,"",0)); # mm5 = r[4]
  75. &paddq("mm2","mm5"); # mm2 = r[4] + w*a[4]
  76. &movd(&DWP(8,$r,"",0),"mm1");
  77. &psrlq("mm1",32); # mm1 = carry2
  78. &paddq("mm1","mm7"); # mm1 = carry2 + r[3] + w*a[3]
  79. &movd("mm5",&DWP(20,$r,"",0)); # mm5 = r[5]
  80. &paddq("mm4","mm5"); # mm4 = r[5] + w*a[5]
  81. &movd(&DWP(12,$r,"",0),"mm1");
  82. &psrlq("mm1",32); # mm1 = carry3
  83. &paddq("mm1","mm2"); # mm1 = carry3 + r[4] + w*a[4]
  84. &movd("mm5",&DWP(24,$r,"",0)); # mm5 = r[6]
  85. &paddq("mm6","mm5"); # mm6 = r[6] + w*a[6]
  86. &movd(&DWP(16,$r,"",0),"mm1");
  87. &psrlq("mm1",32); # mm1 = carry4
  88. &paddq("mm1","mm4"); # mm1 = carry4 + r[5] + w*a[5]
  89. &movd("mm5",&DWP(28,$r,"",0)); # mm5 = r[7]
  90. &paddq("mm3","mm5"); # mm3 = r[7] + w*a[7]
  91. &movd(&DWP(20,$r,"",0),"mm1");
  92. &psrlq("mm1",32); # mm1 = carry5
  93. &paddq("mm1","mm6"); # mm1 = carry5 + r[6] + w*a[6]
  94. &movd(&DWP(24,$r,"",0),"mm1");
  95. &psrlq("mm1",32); # mm1 = carry6
  96. &paddq("mm1","mm3"); # mm1 = carry6 + r[7] + w*a[7]
  97. &movd(&DWP(28,$r,"",0),"mm1");
  98. &add($r,32);
  99. &psrlq("mm1",32); # mm1 = carry_out
  100. &sub("ecx",8);
  101. &jnz(&label("maw_sse2_loop"));
  102. &movd($c,"mm1"); # c = carry_out
  103. &emms();
  104. &jmp(&label("maw_finish"));
  105. }
  106. &set_label("maw_loop",0);
  107. &mov(&swtmp(0),"ecx"); #
  108. for ($i=0; $i<32; $i+=4)
  109. {
  110. &comment("Round $i");
  111. &mov("eax",&DWP($i,$a,"",0)); # *a
  112. &mul($w); # *a * w
  113. &add("eax",$c); # L(t)+= *r
  114. &mov($c,&DWP($i,$r,"",0)); # L(t)+= *r
  115. &adc("edx",0); # H(t)+=carry
  116. &add("eax",$c); # L(t)+=c
  117. &adc("edx",0); # H(t)+=carry
  118. &mov(&DWP($i,$r,"",0),"eax"); # *r= L(t);
  119. &mov($c,"edx"); # c= H(t);
  120. }
  121. &comment("");
  122. &mov("ecx",&swtmp(0)); #
  123. &add($a,32);
  124. &add($r,32);
  125. &sub("ecx",8);
  126. &jnz(&label("maw_loop"));
  127. &set_label("maw_finish",0);
  128. &mov("ecx",&wparam(2)); # get num
  129. &and("ecx",7);
  130. &jnz(&label("maw_finish2")); # helps branch prediction
  131. &jmp(&label("maw_end"));
  132. &set_label("maw_finish2",1);
  133. for ($i=0; $i<7; $i++)
  134. {
  135. &comment("Tail Round $i");
  136. &mov("eax",&DWP($i*4,$a,"",0));# *a
  137. &mul($w); # *a * w
  138. &add("eax",$c); # L(t)+=c
  139. &mov($c,&DWP($i*4,$r,"",0)); # L(t)+= *r
  140. &adc("edx",0); # H(t)+=carry
  141. &add("eax",$c);
  142. &adc("edx",0); # H(t)+=carry
  143. &dec("ecx") if ($i != 7-1);
  144. &mov(&DWP($i*4,$r,"",0),"eax"); # *r= L(t);
  145. &mov($c,"edx"); # c= H(t);
  146. &jz(&label("maw_end")) if ($i != 7-1);
  147. }
  148. &set_label("maw_end",0);
  149. &mov("eax",$c);
  150. &pop("ecx"); # clear variable from
  151. &function_end($name);
  152. }
  153. sub bn_mul_words
  154. {
  155. local($name)=@_;
  156. &function_begin($name,"");
  157. &comment("");
  158. $Low="eax";
  159. $High="edx";
  160. $a="ebx";
  161. $w="ecx";
  162. $r="edi";
  163. $c="esi";
  164. $num="ebp";
  165. &xor($c,$c); # clear carry
  166. &mov($r,&wparam(0)); #
  167. &mov($a,&wparam(1)); #
  168. &mov($num,&wparam(2)); #
  169. &mov($w,&wparam(3)); #
  170. &and($num,0xfffffff8); # num / 8
  171. &jz(&label("mw_finish"));
  172. &set_label("mw_loop",0);
  173. for ($i=0; $i<32; $i+=4)
  174. {
  175. &comment("Round $i");
  176. &mov("eax",&DWP($i,$a,"",0)); # *a
  177. &mul($w); # *a * w
  178. &add("eax",$c); # L(t)+=c
  179. # XXX
  180. &adc("edx",0); # H(t)+=carry
  181. &mov(&DWP($i,$r,"",0),"eax"); # *r= L(t);
  182. &mov($c,"edx"); # c= H(t);
  183. }
  184. &comment("");
  185. &add($a,32);
  186. &add($r,32);
  187. &sub($num,8);
  188. &jz(&label("mw_finish"));
  189. &jmp(&label("mw_loop"));
  190. &set_label("mw_finish",0);
  191. &mov($num,&wparam(2)); # get num
  192. &and($num,7);
  193. &jnz(&label("mw_finish2"));
  194. &jmp(&label("mw_end"));
  195. &set_label("mw_finish2",1);
  196. for ($i=0; $i<7; $i++)
  197. {
  198. &comment("Tail Round $i");
  199. &mov("eax",&DWP($i*4,$a,"",0));# *a
  200. &mul($w); # *a * w
  201. &add("eax",$c); # L(t)+=c
  202. # XXX
  203. &adc("edx",0); # H(t)+=carry
  204. &mov(&DWP($i*4,$r,"",0),"eax");# *r= L(t);
  205. &mov($c,"edx"); # c= H(t);
  206. &dec($num) if ($i != 7-1);
  207. &jz(&label("mw_end")) if ($i != 7-1);
  208. }
  209. &set_label("mw_end",0);
  210. &mov("eax",$c);
  211. &function_end($name);
  212. }
  213. sub bn_sqr_words
  214. {
  215. local($name)=@_;
  216. &function_begin($name,"");
  217. &comment("");
  218. $r="esi";
  219. $a="edi";
  220. $num="ebx";
  221. &mov($r,&wparam(0)); #
  222. &mov($a,&wparam(1)); #
  223. &mov($num,&wparam(2)); #
  224. &and($num,0xfffffff8); # num / 8
  225. &jz(&label("sw_finish"));
  226. &set_label("sw_loop",0);
  227. for ($i=0; $i<32; $i+=4)
  228. {
  229. &comment("Round $i");
  230. &mov("eax",&DWP($i,$a,"",0)); # *a
  231. # XXX
  232. &mul("eax"); # *a * *a
  233. &mov(&DWP($i*2,$r,"",0),"eax"); #
  234. &mov(&DWP($i*2+4,$r,"",0),"edx");#
  235. }
  236. &comment("");
  237. &add($a,32);
  238. &add($r,64);
  239. &sub($num,8);
  240. &jnz(&label("sw_loop"));
  241. &set_label("sw_finish",0);
  242. &mov($num,&wparam(2)); # get num
  243. &and($num,7);
  244. &jz(&label("sw_end"));
  245. for ($i=0; $i<7; $i++)
  246. {
  247. &comment("Tail Round $i");
  248. &mov("eax",&DWP($i*4,$a,"",0)); # *a
  249. # XXX
  250. &mul("eax"); # *a * *a
  251. &mov(&DWP($i*8,$r,"",0),"eax"); #
  252. &dec($num) if ($i != 7-1);
  253. &mov(&DWP($i*8+4,$r,"",0),"edx");
  254. &jz(&label("sw_end")) if ($i != 7-1);
  255. }
  256. &set_label("sw_end",0);
  257. &function_end($name);
  258. }
  259. sub bn_div_words
  260. {
  261. local($name)=@_;
  262. &function_begin($name,"");
  263. &mov("edx",&wparam(0)); #
  264. &mov("eax",&wparam(1)); #
  265. &mov("ebx",&wparam(2)); #
  266. &div("ebx");
  267. &function_end($name);
  268. }
  269. sub bn_add_words
  270. {
  271. local($name)=@_;
  272. &function_begin($name,"");
  273. &comment("");
  274. $a="esi";
  275. $b="edi";
  276. $c="eax";
  277. $r="ebx";
  278. $tmp1="ecx";
  279. $tmp2="edx";
  280. $num="ebp";
  281. &mov($r,&wparam(0)); # get r
  282. &mov($a,&wparam(1)); # get a
  283. &mov($b,&wparam(2)); # get b
  284. &mov($num,&wparam(3)); # get num
  285. &xor($c,$c); # clear carry
  286. &and($num,0xfffffff8); # num / 8
  287. &jz(&label("aw_finish"));
  288. &set_label("aw_loop",0);
  289. for ($i=0; $i<8; $i++)
  290. {
  291. &comment("Round $i");
  292. &mov($tmp1,&DWP($i*4,$a,"",0)); # *a
  293. &mov($tmp2,&DWP($i*4,$b,"",0)); # *b
  294. &add($tmp1,$c);
  295. &mov($c,0);
  296. &adc($c,$c);
  297. &add($tmp1,$tmp2);
  298. &adc($c,0);
  299. &mov(&DWP($i*4,$r,"",0),$tmp1); # *r
  300. }
  301. &comment("");
  302. &add($a,32);
  303. &add($b,32);
  304. &add($r,32);
  305. &sub($num,8);
  306. &jnz(&label("aw_loop"));
  307. &set_label("aw_finish",0);
  308. &mov($num,&wparam(3)); # get num
  309. &and($num,7);
  310. &jz(&label("aw_end"));
  311. for ($i=0; $i<7; $i++)
  312. {
  313. &comment("Tail Round $i");
  314. &mov($tmp1,&DWP($i*4,$a,"",0)); # *a
  315. &mov($tmp2,&DWP($i*4,$b,"",0));# *b
  316. &add($tmp1,$c);
  317. &mov($c,0);
  318. &adc($c,$c);
  319. &add($tmp1,$tmp2);
  320. &adc($c,0);
  321. &dec($num) if ($i != 6);
  322. &mov(&DWP($i*4,$r,"",0),$tmp1); # *r
  323. &jz(&label("aw_end")) if ($i != 6);
  324. }
  325. &set_label("aw_end",0);
  326. # &mov("eax",$c); # $c is "eax"
  327. &function_end($name);
  328. }
  329. sub bn_sub_words
  330. {
  331. local($name)=@_;
  332. &function_begin($name,"");
  333. &comment("");
  334. $a="esi";
  335. $b="edi";
  336. $c="eax";
  337. $r="ebx";
  338. $tmp1="ecx";
  339. $tmp2="edx";
  340. $num="ebp";
  341. &mov($r,&wparam(0)); # get r
  342. &mov($a,&wparam(1)); # get a
  343. &mov($b,&wparam(2)); # get b
  344. &mov($num,&wparam(3)); # get num
  345. &xor($c,$c); # clear carry
  346. &and($num,0xfffffff8); # num / 8
  347. &jz(&label("aw_finish"));
  348. &set_label("aw_loop",0);
  349. for ($i=0; $i<8; $i++)
  350. {
  351. &comment("Round $i");
  352. &mov($tmp1,&DWP($i*4,$a,"",0)); # *a
  353. &mov($tmp2,&DWP($i*4,$b,"",0)); # *b
  354. &sub($tmp1,$c);
  355. &mov($c,0);
  356. &adc($c,$c);
  357. &sub($tmp1,$tmp2);
  358. &adc($c,0);
  359. &mov(&DWP($i*4,$r,"",0),$tmp1); # *r
  360. }
  361. &comment("");
  362. &add($a,32);
  363. &add($b,32);
  364. &add($r,32);
  365. &sub($num,8);
  366. &jnz(&label("aw_loop"));
  367. &set_label("aw_finish",0);
  368. &mov($num,&wparam(3)); # get num
  369. &and($num,7);
  370. &jz(&label("aw_end"));
  371. for ($i=0; $i<7; $i++)
  372. {
  373. &comment("Tail Round $i");
  374. &mov($tmp1,&DWP($i*4,$a,"",0)); # *a
  375. &mov($tmp2,&DWP($i*4,$b,"",0));# *b
  376. &sub($tmp1,$c);
  377. &mov($c,0);
  378. &adc($c,$c);
  379. &sub($tmp1,$tmp2);
  380. &adc($c,0);
  381. &dec($num) if ($i != 6);
  382. &mov(&DWP($i*4,$r,"",0),$tmp1); # *r
  383. &jz(&label("aw_end")) if ($i != 6);
  384. }
  385. &set_label("aw_end",0);
  386. # &mov("eax",$c); # $c is "eax"
  387. &function_end($name);
  388. }
  389. sub bn_sub_part_words
  390. {
  391. local($name)=@_;
  392. &function_begin($name,"");
  393. &comment("");
  394. $a="esi";
  395. $b="edi";
  396. $c="eax";
  397. $r="ebx";
  398. $tmp1="ecx";
  399. $tmp2="edx";
  400. $num="ebp";
  401. &mov($r,&wparam(0)); # get r
  402. &mov($a,&wparam(1)); # get a
  403. &mov($b,&wparam(2)); # get b
  404. &mov($num,&wparam(3)); # get num
  405. &xor($c,$c); # clear carry
  406. &and($num,0xfffffff8); # num / 8
  407. &jz(&label("aw_finish"));
  408. &set_label("aw_loop",0);
  409. for ($i=0; $i<8; $i++)
  410. {
  411. &comment("Round $i");
  412. &mov($tmp1,&DWP($i*4,$a,"",0)); # *a
  413. &mov($tmp2,&DWP($i*4,$b,"",0)); # *b
  414. &sub($tmp1,$c);
  415. &mov($c,0);
  416. &adc($c,$c);
  417. &sub($tmp1,$tmp2);
  418. &adc($c,0);
  419. &mov(&DWP($i*4,$r,"",0),$tmp1); # *r
  420. }
  421. &comment("");
  422. &add($a,32);
  423. &add($b,32);
  424. &add($r,32);
  425. &sub($num,8);
  426. &jnz(&label("aw_loop"));
  427. &set_label("aw_finish",0);
  428. &mov($num,&wparam(3)); # get num
  429. &and($num,7);
  430. &jz(&label("aw_end"));
  431. for ($i=0; $i<7; $i++)
  432. {
  433. &comment("Tail Round $i");
  434. &mov($tmp1,&DWP(0,$a,"",0)); # *a
  435. &mov($tmp2,&DWP(0,$b,"",0));# *b
  436. &sub($tmp1,$c);
  437. &mov($c,0);
  438. &adc($c,$c);
  439. &sub($tmp1,$tmp2);
  440. &adc($c,0);
  441. &mov(&DWP(0,$r,"",0),$tmp1); # *r
  442. &add($a, 4);
  443. &add($b, 4);
  444. &add($r, 4);
  445. &dec($num) if ($i != 6);
  446. &jz(&label("aw_end")) if ($i != 6);
  447. }
  448. &set_label("aw_end",0);
  449. &cmp(&wparam(4),0);
  450. &je(&label("pw_end"));
  451. &mov($num,&wparam(4)); # get dl
  452. &cmp($num,0);
  453. &je(&label("pw_end"));
  454. &jge(&label("pw_pos"));
  455. &comment("pw_neg");
  456. &mov($tmp2,0);
  457. &sub($tmp2,$num);
  458. &mov($num,$tmp2);
  459. &and($num,0xfffffff8); # num / 8
  460. &jz(&label("pw_neg_finish"));
  461. &set_label("pw_neg_loop",0);
  462. for ($i=0; $i<8; $i++)
  463. {
  464. &comment("dl<0 Round $i");
  465. &mov($tmp1,0);
  466. &mov($tmp2,&DWP($i*4,$b,"",0)); # *b
  467. &sub($tmp1,$c);
  468. &mov($c,0);
  469. &adc($c,$c);
  470. &sub($tmp1,$tmp2);
  471. &adc($c,0);
  472. &mov(&DWP($i*4,$r,"",0),$tmp1); # *r
  473. }
  474. &comment("");
  475. &add($b,32);
  476. &add($r,32);
  477. &sub($num,8);
  478. &jnz(&label("pw_neg_loop"));
  479. &set_label("pw_neg_finish",0);
  480. &mov($tmp2,&wparam(4)); # get dl
  481. &mov($num,0);
  482. &sub($num,$tmp2);
  483. &and($num,7);
  484. &jz(&label("pw_end"));
  485. for ($i=0; $i<7; $i++)
  486. {
  487. &comment("dl<0 Tail Round $i");
  488. &mov($tmp1,0);
  489. &mov($tmp2,&DWP($i*4,$b,"",0));# *b
  490. &sub($tmp1,$c);
  491. &mov($c,0);
  492. &adc($c,$c);
  493. &sub($tmp1,$tmp2);
  494. &adc($c,0);
  495. &dec($num) if ($i != 6);
  496. &mov(&DWP($i*4,$r,"",0),$tmp1); # *r
  497. &jz(&label("pw_end")) if ($i != 6);
  498. }
  499. &jmp(&label("pw_end"));
  500. &set_label("pw_pos",0);
  501. &and($num,0xfffffff8); # num / 8
  502. &jz(&label("pw_pos_finish"));
  503. &set_label("pw_pos_loop",0);
  504. for ($i=0; $i<8; $i++)
  505. {
  506. &comment("dl>0 Round $i");
  507. &mov($tmp1,&DWP($i*4,$a,"",0)); # *a
  508. &sub($tmp1,$c);
  509. &mov(&DWP($i*4,$r,"",0),$tmp1); # *r
  510. &jnc(&label("pw_nc".$i));
  511. }
  512. &comment("");
  513. &add($a,32);
  514. &add($r,32);
  515. &sub($num,8);
  516. &jnz(&label("pw_pos_loop"));
  517. &set_label("pw_pos_finish",0);
  518. &mov($num,&wparam(4)); # get dl
  519. &and($num,7);
  520. &jz(&label("pw_end"));
  521. for ($i=0; $i<7; $i++)
  522. {
  523. &comment("dl>0 Tail Round $i");
  524. &mov($tmp1,&DWP($i*4,$a,"",0)); # *a
  525. &sub($tmp1,$c);
  526. &mov(&DWP($i*4,$r,"",0),$tmp1); # *r
  527. &jnc(&label("pw_tail_nc".$i));
  528. &dec($num) if ($i != 6);
  529. &jz(&label("pw_end")) if ($i != 6);
  530. }
  531. &mov($c,1);
  532. &jmp(&label("pw_end"));
  533. &set_label("pw_nc_loop",0);
  534. for ($i=0; $i<8; $i++)
  535. {
  536. &mov($tmp1,&DWP($i*4,$a,"",0)); # *a
  537. &mov(&DWP($i*4,$r,"",0),$tmp1); # *r
  538. &set_label("pw_nc".$i,0);
  539. }
  540. &comment("");
  541. &add($a,32);
  542. &add($r,32);
  543. &sub($num,8);
  544. &jnz(&label("pw_nc_loop"));
  545. &mov($num,&wparam(4)); # get dl
  546. &and($num,7);
  547. &jz(&label("pw_nc_end"));
  548. for ($i=0; $i<7; $i++)
  549. {
  550. &mov($tmp1,&DWP($i*4,$a,"",0)); # *a
  551. &mov(&DWP($i*4,$r,"",0),$tmp1); # *r
  552. &set_label("pw_tail_nc".$i,0);
  553. &dec($num) if ($i != 6);
  554. &jz(&label("pw_nc_end")) if ($i != 6);
  555. }
  556. &set_label("pw_nc_end",0);
  557. &mov($c,0);
  558. &set_label("pw_end",0);
  559. # &mov("eax",$c); # $c is "eax"
  560. &function_end($name);
  561. }