123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436 |
- #! /usr/bin/env perl
- # Copyright 2021 The OpenSSL Project Authors. All Rights Reserved.
- #
- # Licensed under the OpenSSL license (the "License"). You may not use
- # this file except in compliance with the License. You can obtain a copy
- # in the file LICENSE in the source distribution or at
- # https://www.openssl.org/source/license.html
- #
- # ====================================================================
- # Written by Amitay Isaacs <amitay@ozlabs.org> and Martin Schwenke
- # <martin@meltin.net> for the OpenSSL project.
- # ====================================================================
- #
- # p521 lower-level primitives for PPC64 using vector instructions.
- #
- use strict;
- use warnings;
- my $flavour = shift;
- my $output = "";
- while (($output=shift) && ($output!~/\w[\w\-]*\.\w+$/)) {}
- if (!$output) {
- $output = "-";
- }
- my ($xlate, $dir);
- $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
- ( $xlate="${dir}ppc-xlate.pl" and -f $xlate ) or
- ( $xlate="${dir}../../perlasm/ppc-xlate.pl" and -f $xlate) or
- die "can't locate ppc-xlate.pl";
- open OUT,"| \"$^X\" $xlate $flavour $output";
- *STDOUT=*OUT;
- my $code = "";
- my ($sp, $outp, $savelr, $savesp) = ("r1", "r3", "r10", "r12");
- my $vzero = "v32";
- sub startproc($)
- {
- my ($name) = @_;
- $code.=<<___;
- .globl ${name}
- ${name}:
- .cfi_startproc
- ___
- }
- sub endproc($)
- {
- my ($name) = @_;
- $code.=<<___;
- blr
- .cfi_endproc
- .size ${name},.-${name}
- ___
- }
- sub push_vrs($$)
- {
- my ($min, $max) = @_;
- my $count = $max - $min + 1;
- $code.=<<___;
- mr $savesp,$sp
- stdu $sp,-16*`$count+1`($sp)
- ___
- for (my $i = $min; $i <= $max; $i++) {
- my $mult = $max - $i + 1;
- $code.=<<___;
- stxv $i,-16*$mult($savesp)
- ___
- }
- $code.=<<___;
- ___
- }
- sub pop_vrs($$)
- {
- my ($min, $max) = @_;
- $code.=<<___;
- ld $savesp,0($sp)
- ___
- for (my $i = $min; $i <= $max; $i++) {
- my $mult = $max - $i + 1;
- $code.=<<___;
- lxv $i,-16*$mult($savesp)
- ___
- }
- $code.=<<___;
- mr $sp,$savesp
- ___
- }
- sub load_vrs($$)
- {
- my ($pointer, $reg_list) = @_;
- for (my $i = 0; $i <= 8; $i++) {
- my $offset = $i * 8;
- $code.=<<___;
- lxsd $reg_list->[$i],$offset($pointer)
- ___
- }
- $code.=<<___;
- ___
- }
- sub store_vrs($$)
- {
- my ($pointer, $reg_list) = @_;
- for (my $i = 0; $i <= 8; $i++) {
- my $offset = $i * 16;
- $code.=<<___;
- stxv $reg_list->[$i],$offset($pointer)
- ___
- }
- $code.=<<___;
- ___
- }
- $code.=<<___;
- .text
- ___
- {
- # mul/square common
- my ($t1, $t2, $t3, $t4) = ("v33", "v34", "v44", "v54");
- my ($zero, $one) = ("r8", "r9");
- my @out = map("v$_",(55..63));
- {
- #
- # p521_felem_mul
- #
- my ($in1p, $in2p) = ("r4", "r5");
- my @in1 = map("v$_",(45..53));
- my @in2 = map("v$_",(35..43));
- startproc("p521_felem_mul");
- push_vrs(52, 63);
- $code.=<<___;
- vspltisw $vzero,0
- ___
- load_vrs($in1p, \@in1);
- load_vrs($in2p, \@in2);
- $code.=<<___;
- vmsumudm $out[0],$in1[0],$in2[0],$vzero
- xxpermdi $t1,$in1[0],$in1[1],0b00
- xxpermdi $t2,$in2[1],$in2[0],0b00
- vmsumudm $out[1],$t1,$t2,$vzero
- xxpermdi $t2,$in2[2],$in2[1],0b00
- vmsumudm $out[2],$t1,$t2,$vzero
- vmsumudm $out[2],$in1[2],$in2[0],$out[2]
- xxpermdi $t2,$in2[3],$in2[2],0b00
- vmsumudm $out[3],$t1,$t2,$vzero
- xxpermdi $t3,$in1[2],$in1[3],0b00
- xxpermdi $t4,$in2[1],$in2[0],0b00
- vmsumudm $out[3],$t3,$t4,$out[3]
- xxpermdi $t2,$in2[4],$in2[3],0b00
- vmsumudm $out[4],$t1,$t2,$vzero
- xxpermdi $t4,$in2[2],$in2[1],0b00
- vmsumudm $out[4],$t3,$t4,$out[4]
- vmsumudm $out[4],$in1[4],$in2[0],$out[4]
- xxpermdi $t2,$in2[5],$in2[4],0b00
- vmsumudm $out[5],$t1,$t2,$vzero
- xxpermdi $t4,$in2[3],$in2[2],0b00
- vmsumudm $out[5],$t3,$t4,$out[5]
- xxpermdi $t2,$in2[6],$in2[5],0b00
- vmsumudm $out[6],$t1,$t2,$vzero
- xxpermdi $t4,$in2[4],$in2[3],0b00
- vmsumudm $out[6],$t3,$t4,$out[6]
- xxpermdi $t2,$in2[7],$in2[6],0b00
- vmsumudm $out[7],$t1,$t2,$vzero
- xxpermdi $t4,$in2[5],$in2[4],0b00
- vmsumudm $out[7],$t3,$t4,$out[7]
- xxpermdi $t2,$in2[8],$in2[7],0b00
- vmsumudm $out[8],$t1,$t2,$vzero
- xxpermdi $t4,$in2[6],$in2[5],0b00
- vmsumudm $out[8],$t3,$t4,$out[8]
- xxpermdi $t1,$in1[4],$in1[5],0b00
- xxpermdi $t2,$in2[1],$in2[0],0b00
- vmsumudm $out[5],$t1,$t2,$out[5]
- xxpermdi $t2,$in2[2],$in2[1],0b00
- vmsumudm $out[6],$t1,$t2,$out[6]
- vmsumudm $out[6],$in1[6],$in2[0],$out[6]
- xxpermdi $t2,$in2[3],$in2[2],0b00
- vmsumudm $out[7],$t1,$t2,$out[7]
- xxpermdi $t3,$in1[6],$in1[7],0b00
- xxpermdi $t4,$in2[1],$in2[0],0b00
- vmsumudm $out[7],$t3,$t4,$out[7]
- xxpermdi $t2,$in2[4],$in2[3],0b00
- vmsumudm $out[8],$t1,$t2,$out[8]
- xxpermdi $t4,$in2[2],$in2[1],0b00
- vmsumudm $out[8],$t3,$t4,$out[8]
- vmsumudm $out[8],$in1[8],$in2[0],$out[8]
- li $zero,0
- li $one,1
- mtvsrdd $t1,$one,$zero
- ___
- for (my $i = 0; $i <= 8; $i++) {
- $code.=<<___;
- vsld $in2[$i],$in2[$i],$t1
- ___
- }
- $code.=<<___;
- vmsumudm $out[7],$in1[8],$in2[8],$out[7]
- xxpermdi $t2,$in2[8],$in2[7],0b00
- xxpermdi $t1,$in1[7],$in1[8],0b00
- vmsumudm $out[6],$t1,$t2,$out[6]
- xxpermdi $t1,$in1[6],$in1[7],0b00
- vmsumudm $out[5],$t1,$t2,$out[5]
- vmsumudm $out[5],$in1[8],$in2[6],$out[5]
- xxpermdi $t1,$in1[5],$in1[6],0b00
- vmsumudm $out[4],$t1,$t2,$out[4]
- xxpermdi $t4,$in2[6],$in2[5],0b00
- xxpermdi $t3,$in1[7],$in1[8],0b00
- vmsumudm $out[4],$t3,$t4,$out[4]
- xxpermdi $t1,$in1[4],$in1[5],0b00
- vmsumudm $out[3],$t1,$t2,$out[3]
- xxpermdi $t3,$in1[6],$in1[7],0b00
- vmsumudm $out[3],$t3,$t4,$out[3]
- vmsumudm $out[3],$in1[8],$in2[4],$out[3]
- xxpermdi $t1,$in1[3],$in1[4],0b00
- vmsumudm $out[2],$t1,$t2,$out[2]
- xxpermdi $t3,$in1[5],$in1[6],0b00
- vmsumudm $out[2],$t3,$t4,$out[2]
- xxpermdi $t1,$in1[2],$in1[3],0b00
- vmsumudm $out[1],$t1,$t2,$out[1]
- xxpermdi $t3,$in1[4],$in1[5],0b00
- vmsumudm $out[1],$t3,$t4,$out[1]
- xxpermdi $t1,$in1[1],$in1[2],0b00
- vmsumudm $out[0],$t1,$t2,$out[0]
- xxpermdi $t3,$in1[3],$in1[4],0b00
- vmsumudm $out[0],$t3,$t4,$out[0]
- xxpermdi $t2,$in2[4],$in2[3],0b00
- xxpermdi $t1,$in1[7],$in1[8],0b00
- vmsumudm $out[2],$t1,$t2,$out[2]
- xxpermdi $t1,$in1[6],$in1[7],0b00
- vmsumudm $out[1],$t1,$t2,$out[1]
- vmsumudm $out[1],$in1[8],$in2[2],$out[1]
- xxpermdi $t1,$in1[5],$in1[6],0b00
- vmsumudm $out[0],$t1,$t2,$out[0]
- xxpermdi $t4,$in2[2],$in2[1],0b00
- xxpermdi $t3,$in1[7],$in1[8],0b00
- vmsumudm $out[0],$t3,$t4,$out[0]
- ___
- store_vrs($outp, \@out);
- pop_vrs(52, 63);
- endproc("p521_felem_mul");
- }
- {
- #
- # p51_felem_square
- #
- my ($inp) = ("r4");
- my @in = map("v$_",(45..53));
- my @inx2 = map("v$_",(35..43));
- startproc("p521_felem_square");
- push_vrs(52, 63);
- $code.=<<___;
- vspltisw $vzero,0
- ___
- load_vrs($inp, \@in);
- $code.=<<___;
- li $zero,0
- li $one,1
- mtvsrdd $t1,$one,$zero
- ___
- for (my $i = 0; $i <= 8; $i++) {
- $code.=<<___;
- vsld $inx2[$i],$in[$i],$t1
- ___
- }
- $code.=<<___;
- vmsumudm $out[0],$in[0],$in[0],$vzero
- vmsumudm $out[1],$in[0],$inx2[1],$vzero
- xxpermdi $t1,$in[0],$in[1],0b00
- xxpermdi $t2,$inx2[2],$in[1],0b00
- vmsumudm $out[2],$t1,$t2,$vzero
- xxpermdi $t2,$inx2[3],$inx2[2],0b00
- vmsumudm $out[3],$t1,$t2,$vzero
- xxpermdi $t2,$inx2[4],$inx2[3],0b00
- vmsumudm $out[4],$t1,$t2,$vzero
- vmsumudm $out[4],$in[2],$in[2],$out[4]
- xxpermdi $t2,$inx2[5],$inx2[4],0b00
- vmsumudm $out[5],$t1,$t2,$vzero
- vmsumudm $out[5],$in[2],$inx2[3],$out[5]
- xxpermdi $t2,$inx2[6],$inx2[5],0b00
- vmsumudm $out[6],$t1,$t2,$vzero
- xxpermdi $t3,$in[2],$in[3],0b00
- xxpermdi $t4,$inx2[4],$in[3],0b00
- vmsumudm $out[6],$t3,$t4,$out[6]
- xxpermdi $t2,$inx2[7],$inx2[6],0b00
- vmsumudm $out[7],$t1,$t2,$vzero
- xxpermdi $t4,$inx2[5],$inx2[4],0b00
- vmsumudm $out[7],$t3,$t4,$out[7]
- xxpermdi $t2,$inx2[8],$inx2[7],0b00
- vmsumudm $out[8],$t1,$t2,$vzero
- xxpermdi $t4,$inx2[6],$inx2[5],0b00
- vmsumudm $out[8],$t3,$t4,$out[8]
- vmsumudm $out[8],$in[4],$in[4],$out[8]
- vmsumudm $out[1],$in[5],$inx2[5],$out[1]
- vmsumudm $out[3],$in[6],$inx2[6],$out[3]
- vmsumudm $out[5],$in[7],$inx2[7],$out[5]
- vmsumudm $out[7],$in[8],$inx2[8],$out[7]
- mtvsrdd $t1,$one,$zero
- ___
- for (my $i = 5; $i <= 8; $i++) {
- $code.=<<___;
- vsld $inx2[$i],$inx2[$i],$t1
- ___
- }
- $code.=<<___;
- vmsumudm $out[6],$in[7],$inx2[8],$out[6]
- vmsumudm $out[5],$in[6],$inx2[8],$out[5]
- xxpermdi $t2,$inx2[8],$inx2[7],0b00
- xxpermdi $t1,$in[5],$in[6],0b00
- vmsumudm $out[4],$t1,$t2,$out[4]
- xxpermdi $t1,$in[4],$in[5],0b00
- vmsumudm $out[3],$t1,$t2,$out[3]
- xxpermdi $t1,$in[3],$in[4],0b00
- vmsumudm $out[2],$t1,$t2,$out[2]
- vmsumudm $out[2],$in[5],$inx2[6],$out[2]
- xxpermdi $t1,$in[2],$in[3],0b00
- vmsumudm $out[1],$t1,$t2,$out[1]
- vmsumudm $out[1],$in[4],$inx2[6],$out[1]
- xxpermdi $t1,$in[1],$in[2],0b00
- vmsumudm $out[0],$t1,$t2,$out[0]
- xxpermdi $t2,$inx2[6],$inx2[5],0b00
- xxpermdi $t1,$in[3],$in[4],0b00
- vmsumudm $out[0],$t1,$t2,$out[0]
- ___
- store_vrs($outp, \@out);
- pop_vrs(52, 63);
- endproc("p521_felem_square");
- }
- }
- $code =~ s/\`([^\`]*)\`/eval $1/gem;
- print $code;
- close STDOUT or die "error closing STDOUT: $!";
|