123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675 |
- #!/usr/local/bin/perl
- push(@INC,"perlasm","../../perlasm");
- require "x86asm.pl";
- &asm_init($ARGV[0],$0);
- $sse2=0;
- for (@ARGV) { $sse2=1 if (/-DOPENSSL_IA32_SSE2/); }
- &external_label("OPENSSL_ia32cap_P") if ($sse2);
- &bn_mul_add_words("bn_mul_add_words");
- &bn_mul_words("bn_mul_words");
- &bn_sqr_words("bn_sqr_words");
- &bn_div_words("bn_div_words");
- &bn_add_words("bn_add_words");
- &bn_sub_words("bn_sub_words");
- &bn_sub_part_words("bn_sub_part_words");
- &asm_finish();
- sub bn_mul_add_words
- {
- local($name)=@_;
- &function_begin($name,$sse2?"EXTRN\t_OPENSSL_ia32cap_P:DWORD":"");
- &comment("");
- $Low="eax";
- $High="edx";
- $a="ebx";
- $w="ebp";
- $r="edi";
- $c="esi";
- &xor($c,$c); # clear carry
- &mov($r,&wparam(0)); #
- &mov("ecx",&wparam(2)); #
- &mov($a,&wparam(1)); #
- &and("ecx",0xfffffff8); # num / 8
- &mov($w,&wparam(3)); #
- &push("ecx"); # Up the stack for a tmp variable
- &jz(&label("maw_finish"));
- if ($sse2) {
- &picmeup("eax","OPENSSL_ia32cap_P");
- &bt(&DWP(0,"eax"),26);
- &jnc(&label("maw_loop"));
- &movd("mm0",$w); # mm0 = w
- &pxor("mm1","mm1"); # mm1 = carry_in
- &set_label("maw_sse2_loop",0);
- &movd("mm3",&DWP(0,$r,"",0)); # mm3 = r[0]
- &paddq("mm1","mm3"); # mm1 = carry_in + r[0]
- &movd("mm2",&DWP(0,$a,"",0)); # mm2 = a[0]
- &pmuludq("mm2","mm0"); # mm2 = w*a[0]
- &movd("mm4",&DWP(4,$a,"",0)); # mm4 = a[1]
- &pmuludq("mm4","mm0"); # mm4 = w*a[1]
- &movd("mm6",&DWP(8,$a,"",0)); # mm6 = a[2]
- &pmuludq("mm6","mm0"); # mm6 = w*a[2]
- &movd("mm7",&DWP(12,$a,"",0)); # mm7 = a[3]
- &pmuludq("mm7","mm0"); # mm7 = w*a[3]
- &paddq("mm1","mm2"); # mm1 = carry_in + r[0] + w*a[0]
- &movd("mm3",&DWP(4,$r,"",0)); # mm3 = r[1]
- &paddq("mm3","mm4"); # mm3 = r[1] + w*a[1]
- &movd("mm5",&DWP(8,$r,"",0)); # mm5 = r[2]
- &paddq("mm5","mm6"); # mm5 = r[2] + w*a[2]
- &movd("mm4",&DWP(12,$r,"",0)); # mm4 = r[3]
- &paddq("mm7","mm4"); # mm7 = r[3] + w*a[3]
- &movd(&DWP(0,$r,"",0),"mm1");
- &movd("mm2",&DWP(16,$a,"",0)); # mm2 = a[4]
- &pmuludq("mm2","mm0"); # mm2 = w*a[4]
- &psrlq("mm1",32); # mm1 = carry0
- &movd("mm4",&DWP(20,$a,"",0)); # mm4 = a[5]
- &pmuludq("mm4","mm0"); # mm4 = w*a[5]
- &paddq("mm1","mm3"); # mm1 = carry0 + r[1] + w*a[1]
- &movd("mm6",&DWP(24,$a,"",0)); # mm6 = a[6]
- &pmuludq("mm6","mm0"); # mm6 = w*a[6]
- &movd(&DWP(4,$r,"",0),"mm1");
- &psrlq("mm1",32); # mm1 = carry1
- &movd("mm3",&DWP(28,$a,"",0)); # mm3 = a[7]
- &add($a,32);
- &pmuludq("mm3","mm0"); # mm3 = w*a[7]
- &paddq("mm1","mm5"); # mm1 = carry1 + r[2] + w*a[2]
- &movd("mm5",&DWP(16,$r,"",0)); # mm5 = r[4]
- &paddq("mm2","mm5"); # mm2 = r[4] + w*a[4]
- &movd(&DWP(8,$r,"",0),"mm1");
- &psrlq("mm1",32); # mm1 = carry2
- &paddq("mm1","mm7"); # mm1 = carry2 + r[3] + w*a[3]
- &movd("mm5",&DWP(20,$r,"",0)); # mm5 = r[5]
- &paddq("mm4","mm5"); # mm4 = r[5] + w*a[5]
- &movd(&DWP(12,$r,"",0),"mm1");
- &psrlq("mm1",32); # mm1 = carry3
- &paddq("mm1","mm2"); # mm1 = carry3 + r[4] + w*a[4]
- &movd("mm5",&DWP(24,$r,"",0)); # mm5 = r[6]
- &paddq("mm6","mm5"); # mm6 = r[6] + w*a[6]
- &movd(&DWP(16,$r,"",0),"mm1");
- &psrlq("mm1",32); # mm1 = carry4
- &paddq("mm1","mm4"); # mm1 = carry4 + r[5] + w*a[5]
- &movd("mm5",&DWP(28,$r,"",0)); # mm5 = r[7]
- &paddq("mm3","mm5"); # mm3 = r[7] + w*a[7]
- &movd(&DWP(20,$r,"",0),"mm1");
- &psrlq("mm1",32); # mm1 = carry5
- &paddq("mm1","mm6"); # mm1 = carry5 + r[6] + w*a[6]
- &movd(&DWP(24,$r,"",0),"mm1");
- &psrlq("mm1",32); # mm1 = carry6
- &paddq("mm1","mm3"); # mm1 = carry6 + r[7] + w*a[7]
- &movd(&DWP(28,$r,"",0),"mm1");
- &add($r,32);
- &psrlq("mm1",32); # mm1 = carry_out
- &sub("ecx",8);
- &jnz(&label("maw_sse2_loop"));
- &movd($c,"mm1"); # c = carry_out
- &emms();
- &jmp(&label("maw_finish"));
- }
- &set_label("maw_loop",0);
- &mov(&swtmp(0),"ecx"); #
- for ($i=0; $i<32; $i+=4)
- {
- &comment("Round $i");
- &mov("eax",&DWP($i,$a,"",0)); # *a
- &mul($w); # *a * w
- &add("eax",$c); # L(t)+= *r
- &mov($c,&DWP($i,$r,"",0)); # L(t)+= *r
- &adc("edx",0); # H(t)+=carry
- &add("eax",$c); # L(t)+=c
- &adc("edx",0); # H(t)+=carry
- &mov(&DWP($i,$r,"",0),"eax"); # *r= L(t);
- &mov($c,"edx"); # c= H(t);
- }
- &comment("");
- &mov("ecx",&swtmp(0)); #
- &add($a,32);
- &add($r,32);
- &sub("ecx",8);
- &jnz(&label("maw_loop"));
- &set_label("maw_finish",0);
- &mov("ecx",&wparam(2)); # get num
- &and("ecx",7);
- &jnz(&label("maw_finish2")); # helps branch prediction
- &jmp(&label("maw_end"));
- &set_label("maw_finish2",1);
- for ($i=0; $i<7; $i++)
- {
- &comment("Tail Round $i");
- &mov("eax",&DWP($i*4,$a,"",0));# *a
- &mul($w); # *a * w
- &add("eax",$c); # L(t)+=c
- &mov($c,&DWP($i*4,$r,"",0)); # L(t)+= *r
- &adc("edx",0); # H(t)+=carry
- &add("eax",$c);
- &adc("edx",0); # H(t)+=carry
- &dec("ecx") if ($i != 7-1);
- &mov(&DWP($i*4,$r,"",0),"eax"); # *r= L(t);
- &mov($c,"edx"); # c= H(t);
- &jz(&label("maw_end")) if ($i != 7-1);
- }
- &set_label("maw_end",0);
- &mov("eax",$c);
- &pop("ecx"); # clear variable from
- &function_end($name);
- }
- sub bn_mul_words
- {
- local($name)=@_;
- &function_begin($name,"");
- &comment("");
- $Low="eax";
- $High="edx";
- $a="ebx";
- $w="ecx";
- $r="edi";
- $c="esi";
- $num="ebp";
- &xor($c,$c); # clear carry
- &mov($r,&wparam(0)); #
- &mov($a,&wparam(1)); #
- &mov($num,&wparam(2)); #
- &mov($w,&wparam(3)); #
- &and($num,0xfffffff8); # num / 8
- &jz(&label("mw_finish"));
- &set_label("mw_loop",0);
- for ($i=0; $i<32; $i+=4)
- {
- &comment("Round $i");
- &mov("eax",&DWP($i,$a,"",0)); # *a
- &mul($w); # *a * w
- &add("eax",$c); # L(t)+=c
- # XXX
- &adc("edx",0); # H(t)+=carry
- &mov(&DWP($i,$r,"",0),"eax"); # *r= L(t);
- &mov($c,"edx"); # c= H(t);
- }
- &comment("");
- &add($a,32);
- &add($r,32);
- &sub($num,8);
- &jz(&label("mw_finish"));
- &jmp(&label("mw_loop"));
- &set_label("mw_finish",0);
- &mov($num,&wparam(2)); # get num
- &and($num,7);
- &jnz(&label("mw_finish2"));
- &jmp(&label("mw_end"));
- &set_label("mw_finish2",1);
- for ($i=0; $i<7; $i++)
- {
- &comment("Tail Round $i");
- &mov("eax",&DWP($i*4,$a,"",0));# *a
- &mul($w); # *a * w
- &add("eax",$c); # L(t)+=c
- # XXX
- &adc("edx",0); # H(t)+=carry
- &mov(&DWP($i*4,$r,"",0),"eax");# *r= L(t);
- &mov($c,"edx"); # c= H(t);
- &dec($num) if ($i != 7-1);
- &jz(&label("mw_end")) if ($i != 7-1);
- }
- &set_label("mw_end",0);
- &mov("eax",$c);
- &function_end($name);
- }
- sub bn_sqr_words
- {
- local($name)=@_;
- &function_begin($name,"");
- &comment("");
- $r="esi";
- $a="edi";
- $num="ebx";
- &mov($r,&wparam(0)); #
- &mov($a,&wparam(1)); #
- &mov($num,&wparam(2)); #
- &and($num,0xfffffff8); # num / 8
- &jz(&label("sw_finish"));
- &set_label("sw_loop",0);
- for ($i=0; $i<32; $i+=4)
- {
- &comment("Round $i");
- &mov("eax",&DWP($i,$a,"",0)); # *a
- # XXX
- &mul("eax"); # *a * *a
- &mov(&DWP($i*2,$r,"",0),"eax"); #
- &mov(&DWP($i*2+4,$r,"",0),"edx");#
- }
- &comment("");
- &add($a,32);
- &add($r,64);
- &sub($num,8);
- &jnz(&label("sw_loop"));
- &set_label("sw_finish",0);
- &mov($num,&wparam(2)); # get num
- &and($num,7);
- &jz(&label("sw_end"));
- for ($i=0; $i<7; $i++)
- {
- &comment("Tail Round $i");
- &mov("eax",&DWP($i*4,$a,"",0)); # *a
- # XXX
- &mul("eax"); # *a * *a
- &mov(&DWP($i*8,$r,"",0),"eax"); #
- &dec($num) if ($i != 7-1);
- &mov(&DWP($i*8+4,$r,"",0),"edx");
- &jz(&label("sw_end")) if ($i != 7-1);
- }
- &set_label("sw_end",0);
- &function_end($name);
- }
- sub bn_div_words
- {
- local($name)=@_;
- &function_begin($name,"");
- &mov("edx",&wparam(0)); #
- &mov("eax",&wparam(1)); #
- &mov("ebx",&wparam(2)); #
- &div("ebx");
- &function_end($name);
- }
- sub bn_add_words
- {
- local($name)=@_;
- &function_begin($name,"");
- &comment("");
- $a="esi";
- $b="edi";
- $c="eax";
- $r="ebx";
- $tmp1="ecx";
- $tmp2="edx";
- $num="ebp";
- &mov($r,&wparam(0)); # get r
- &mov($a,&wparam(1)); # get a
- &mov($b,&wparam(2)); # get b
- &mov($num,&wparam(3)); # get num
- &xor($c,$c); # clear carry
- &and($num,0xfffffff8); # num / 8
- &jz(&label("aw_finish"));
- &set_label("aw_loop",0);
- for ($i=0; $i<8; $i++)
- {
- &comment("Round $i");
- &mov($tmp1,&DWP($i*4,$a,"",0)); # *a
- &mov($tmp2,&DWP($i*4,$b,"",0)); # *b
- &add($tmp1,$c);
- &mov($c,0);
- &adc($c,$c);
- &add($tmp1,$tmp2);
- &adc($c,0);
- &mov(&DWP($i*4,$r,"",0),$tmp1); # *r
- }
- &comment("");
- &add($a,32);
- &add($b,32);
- &add($r,32);
- &sub($num,8);
- &jnz(&label("aw_loop"));
- &set_label("aw_finish",0);
- &mov($num,&wparam(3)); # get num
- &and($num,7);
- &jz(&label("aw_end"));
- for ($i=0; $i<7; $i++)
- {
- &comment("Tail Round $i");
- &mov($tmp1,&DWP($i*4,$a,"",0)); # *a
- &mov($tmp2,&DWP($i*4,$b,"",0));# *b
- &add($tmp1,$c);
- &mov($c,0);
- &adc($c,$c);
- &add($tmp1,$tmp2);
- &adc($c,0);
- &dec($num) if ($i != 6);
- &mov(&DWP($i*4,$r,"",0),$tmp1); # *r
- &jz(&label("aw_end")) if ($i != 6);
- }
- &set_label("aw_end",0);
- # &mov("eax",$c); # $c is "eax"
- &function_end($name);
- }
- sub bn_sub_words
- {
- local($name)=@_;
- &function_begin($name,"");
- &comment("");
- $a="esi";
- $b="edi";
- $c="eax";
- $r="ebx";
- $tmp1="ecx";
- $tmp2="edx";
- $num="ebp";
- &mov($r,&wparam(0)); # get r
- &mov($a,&wparam(1)); # get a
- &mov($b,&wparam(2)); # get b
- &mov($num,&wparam(3)); # get num
- &xor($c,$c); # clear carry
- &and($num,0xfffffff8); # num / 8
- &jz(&label("aw_finish"));
- &set_label("aw_loop",0);
- for ($i=0; $i<8; $i++)
- {
- &comment("Round $i");
- &mov($tmp1,&DWP($i*4,$a,"",0)); # *a
- &mov($tmp2,&DWP($i*4,$b,"",0)); # *b
- &sub($tmp1,$c);
- &mov($c,0);
- &adc($c,$c);
- &sub($tmp1,$tmp2);
- &adc($c,0);
- &mov(&DWP($i*4,$r,"",0),$tmp1); # *r
- }
- &comment("");
- &add($a,32);
- &add($b,32);
- &add($r,32);
- &sub($num,8);
- &jnz(&label("aw_loop"));
- &set_label("aw_finish",0);
- &mov($num,&wparam(3)); # get num
- &and($num,7);
- &jz(&label("aw_end"));
- for ($i=0; $i<7; $i++)
- {
- &comment("Tail Round $i");
- &mov($tmp1,&DWP($i*4,$a,"",0)); # *a
- &mov($tmp2,&DWP($i*4,$b,"",0));# *b
- &sub($tmp1,$c);
- &mov($c,0);
- &adc($c,$c);
- &sub($tmp1,$tmp2);
- &adc($c,0);
- &dec($num) if ($i != 6);
- &mov(&DWP($i*4,$r,"",0),$tmp1); # *r
- &jz(&label("aw_end")) if ($i != 6);
- }
- &set_label("aw_end",0);
- # &mov("eax",$c); # $c is "eax"
- &function_end($name);
- }
- sub bn_sub_part_words
- {
- local($name)=@_;
- &function_begin($name,"");
- &comment("");
- $a="esi";
- $b="edi";
- $c="eax";
- $r="ebx";
- $tmp1="ecx";
- $tmp2="edx";
- $num="ebp";
- &mov($r,&wparam(0)); # get r
- &mov($a,&wparam(1)); # get a
- &mov($b,&wparam(2)); # get b
- &mov($num,&wparam(3)); # get num
- &xor($c,$c); # clear carry
- &and($num,0xfffffff8); # num / 8
- &jz(&label("aw_finish"));
- &set_label("aw_loop",0);
- for ($i=0; $i<8; $i++)
- {
- &comment("Round $i");
- &mov($tmp1,&DWP($i*4,$a,"",0)); # *a
- &mov($tmp2,&DWP($i*4,$b,"",0)); # *b
- &sub($tmp1,$c);
- &mov($c,0);
- &adc($c,$c);
- &sub($tmp1,$tmp2);
- &adc($c,0);
- &mov(&DWP($i*4,$r,"",0),$tmp1); # *r
- }
- &comment("");
- &add($a,32);
- &add($b,32);
- &add($r,32);
- &sub($num,8);
- &jnz(&label("aw_loop"));
- &set_label("aw_finish",0);
- &mov($num,&wparam(3)); # get num
- &and($num,7);
- &jz(&label("aw_end"));
- for ($i=0; $i<7; $i++)
- {
- &comment("Tail Round $i");
- &mov($tmp1,&DWP(0,$a,"",0)); # *a
- &mov($tmp2,&DWP(0,$b,"",0));# *b
- &sub($tmp1,$c);
- &mov($c,0);
- &adc($c,$c);
- &sub($tmp1,$tmp2);
- &adc($c,0);
- &mov(&DWP(0,$r,"",0),$tmp1); # *r
- &add($a, 4);
- &add($b, 4);
- &add($r, 4);
- &dec($num) if ($i != 6);
- &jz(&label("aw_end")) if ($i != 6);
- }
- &set_label("aw_end",0);
- &cmp(&wparam(4),0);
- &je(&label("pw_end"));
- &mov($num,&wparam(4)); # get dl
- &cmp($num,0);
- &je(&label("pw_end"));
- &jge(&label("pw_pos"));
- &comment("pw_neg");
- &mov($tmp2,0);
- &sub($tmp2,$num);
- &mov($num,$tmp2);
- &and($num,0xfffffff8); # num / 8
- &jz(&label("pw_neg_finish"));
- &set_label("pw_neg_loop",0);
- for ($i=0; $i<8; $i++)
- {
- &comment("dl<0 Round $i");
- &mov($tmp1,0);
- &mov($tmp2,&DWP($i*4,$b,"",0)); # *b
- &sub($tmp1,$c);
- &mov($c,0);
- &adc($c,$c);
- &sub($tmp1,$tmp2);
- &adc($c,0);
- &mov(&DWP($i*4,$r,"",0),$tmp1); # *r
- }
-
- &comment("");
- &add($b,32);
- &add($r,32);
- &sub($num,8);
- &jnz(&label("pw_neg_loop"));
-
- &set_label("pw_neg_finish",0);
- &mov($tmp2,&wparam(4)); # get dl
- &mov($num,0);
- &sub($num,$tmp2);
- &and($num,7);
- &jz(&label("pw_end"));
-
- for ($i=0; $i<7; $i++)
- {
- &comment("dl<0 Tail Round $i");
- &mov($tmp1,0);
- &mov($tmp2,&DWP($i*4,$b,"",0));# *b
- &sub($tmp1,$c);
- &mov($c,0);
- &adc($c,$c);
- &sub($tmp1,$tmp2);
- &adc($c,0);
- &dec($num) if ($i != 6);
- &mov(&DWP($i*4,$r,"",0),$tmp1); # *r
- &jz(&label("pw_end")) if ($i != 6);
- }
- &jmp(&label("pw_end"));
-
- &set_label("pw_pos",0);
-
- &and($num,0xfffffff8); # num / 8
- &jz(&label("pw_pos_finish"));
- &set_label("pw_pos_loop",0);
- for ($i=0; $i<8; $i++)
- {
- &comment("dl>0 Round $i");
- &mov($tmp1,&DWP($i*4,$a,"",0)); # *a
- &sub($tmp1,$c);
- &mov(&DWP($i*4,$r,"",0),$tmp1); # *r
- &jnc(&label("pw_nc".$i));
- }
-
- &comment("");
- &add($a,32);
- &add($r,32);
- &sub($num,8);
- &jnz(&label("pw_pos_loop"));
-
- &set_label("pw_pos_finish",0);
- &mov($num,&wparam(4)); # get dl
- &and($num,7);
- &jz(&label("pw_end"));
-
- for ($i=0; $i<7; $i++)
- {
- &comment("dl>0 Tail Round $i");
- &mov($tmp1,&DWP($i*4,$a,"",0)); # *a
- &sub($tmp1,$c);
- &mov(&DWP($i*4,$r,"",0),$tmp1); # *r
- &jnc(&label("pw_tail_nc".$i));
- &dec($num) if ($i != 6);
- &jz(&label("pw_end")) if ($i != 6);
- }
- &mov($c,1);
- &jmp(&label("pw_end"));
- &set_label("pw_nc_loop",0);
- for ($i=0; $i<8; $i++)
- {
- &mov($tmp1,&DWP($i*4,$a,"",0)); # *a
- &mov(&DWP($i*4,$r,"",0),$tmp1); # *r
- &set_label("pw_nc".$i,0);
- }
-
- &comment("");
- &add($a,32);
- &add($r,32);
- &sub($num,8);
- &jnz(&label("pw_nc_loop"));
-
- &mov($num,&wparam(4)); # get dl
- &and($num,7);
- &jz(&label("pw_nc_end"));
-
- for ($i=0; $i<7; $i++)
- {
- &mov($tmp1,&DWP($i*4,$a,"",0)); # *a
- &mov(&DWP($i*4,$r,"",0),$tmp1); # *r
- &set_label("pw_tail_nc".$i,0);
- &dec($num) if ($i != 6);
- &jz(&label("pw_nc_end")) if ($i != 6);
- }
- &set_label("pw_nc_end",0);
- &mov($c,0);
- &set_label("pw_end",0);
- # &mov("eax",$c); # $c is "eax"
- &function_end($name);
- }
|