12 years ago · c616200172
--- a/Configure
+++ b/Configure
@@ -610,12 +610,14 @@ my %table=(
 
				 "uClinux-dist","$ENV{'CC'}:\$(CFLAGS)::-D_REENTRANT::\$(LDFLAGS) \$(LDLIBS):BN_LLONG:${no_asm}:$ENV{'LIBSSL_dlfcn'}:linux-shared:-fPIC:-shared:.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR):$ENV{'RANLIB'}::",
			
 
				 "uClinux-dist64","$ENV{'CC'}:\$(CFLAGS)::-D_REENTRANT::\$(LDFLAGS) \$(LDLIBS):SIXTY_FOUR_BIT_LONG:${no_asm}:$ENV{'LIBSSL_dlfcn'}:linux-shared:-fPIC:-shared:.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR):$ENV{'RANLIB'}::",
			
 
				 
			
 
				+"c64xplus","cl6x:-mv6400+ -o2 -ox -ms -pden -DNO_SYS_TYPES_H -DGETPID_IS_MEANINGLESS -DMD32_REG_T=int -DOPENSSL_SMALL_FOOTPRINT:<c6x.h>::DSPBIOS::BN_LLONG:c64xpluscpuid.o:bn-c64xplus.o c64xplus-gf2m.o::aes-c64xplus.o aes_cbc.o aes_ctr.o:::sha1-c64xplus.o sha256-c64xplus.o sha512-c64xplus.o:::::::ghash-c64xplus.o::void:",
			
 
				+
			
 
				 );
			
 
				 
			
 
				 my @MK1MF_Builds=qw(VC-WIN64I VC-WIN64A
			
 
				 		    debug-VC-WIN64I debug-VC-WIN64A
			
 
				 		    VC-NT VC-CE VC-WIN32 debug-VC-WIN32
			
 
				-		    BC-32 
			
 
				+		    BC-32 c64xplus
			
 
				 		    netware-clib netware-clib-bsdsock
			
 
				 		    netware-libc netware-libc-bsdsock);
			
 
				 
			
--- a/Makefile.fips
+++ b/Makefile.fips
@@ -186,7 +186,7 @@ SHARED_LDFLAGS=
 
				 GENERAL=        Makefile
			
 
				 BASENAME=       openssl
			
 
				 NAME=           $(BASENAME)-$(VERSION)
			
 
				-TARFILE=        openssl-fips-2.0-test.tar
			
 
				+TARFILE=        openssl-fips-2.0.tar
			
 
				 WTARFILE=       $(NAME)-win.tar
			
 
				 EXHEADER=       e_os2.h
			
 
				 HEADER=         e_os.h
			
--- a/c6x/do_fips
+++ b/c6x/do_fips
@@ -0,0 +1,7 @@
 
				+#!/bin/sh
			
 
				+
			
 
				+perl Configure c64xplus fipscanisteronly no-engine
			
 
				+perl util/mkfiles.pl > MINFO
			
 
				+perl util/mk1mf.pl auto > c6x/fips.mak
			
 
				+make -f c6x/fips.mak
			
 
				+make -f c6x/fips_algvs.mak
			
--- a/c6x/env
+++ b/c6x/env
@@ -0,0 +1,7 @@
 
				+# MSYS-style PATH
			
 
				+export PATH=/c/CCStudio_v3.3/c6000/cgtools/bin:/c/Program\ Files/ActivePerl58/bin:$PATH
			
 
				+
			
 
				+# Windows-style variables
			
 
				+export C6X_C_DIR='C:\CCStudio_v3.3\c6000\cgtools\include;C:\CCStudio_v3.3\c6000\cgtools\lib'
			
 
				+
			
 
				+export PERL5LIB=C:/CCStudio_v3.3/bin/utilities/ccs_scripting
			
--- a/c6x/fips_standalone_sha1
+++ b/c6x/fips_standalone_sha1
@@ -0,0 +1,32 @@
 
				+#!/usr/bin/env perl
			
 
				+#
			
 
				+$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
			
 
				+
			
 
				+unshift(@INC,$dir);
			
 
				+require "hmac_sha1.pl";
			
 
				+
			
 
				+(!@ARV[0] && -f @ARGV[$#ARGV]) || die "usage: $0 [-verify] file";
			
 
				+
			
 
				+$verify=shift	if (@ARGV[0] eq "-verify");
			
 
				+
			
 
				+sysopen(FD,@ARGV[0],0) || die "$!";
			
 
				+binmode(FD);
			
 
				+
			
 
				+my $ctx = HMAC->Init("etaonrishdlcupfm");
			
 
				+
			
 
				+while (read(FD,$blob,4*1024)) { $ctx->Update($blob); }
			
 
				+
			
 
				+close(FD);
			
 
				+
			
 
				+my $signature = unpack("H*",$ctx->Final());
			
 
				+
			
 
				+print "HMAC-SHA1(@ARGV[0])= $signature\n";
			
 
				+
			
 
				+if ($verify) {
			
 
				+	open(FD,"<@ARGV[0].sha1") || die "$!";
			
 
				+	$line = <FD>;
			
 
				+	close(FD);
			
 
				+	exit(0)	if ($line =~ /HMAC\-SHA1\([^\)]*\)=\s*([0-9a-f]+)/i &&
			
 
				+				$1 eq $signature);
			
 
				+	die "signature mismatch";
			
 
				+}
			
--- a/c6x/fipscanister.cmd
+++ b/c6x/fipscanister.cmd
@@ -0,0 +1,19 @@
 
				+SECTIONS
			
 
				+{
			
 
				+    .text:
			
 
				+    {
			
 
				+	*(.fips_text:start)
			
 
				+	*(.text)
			
 
				+	*(.const:aes_asm)
			
 
				+	*(.const:sha_asm)
			
 
				+	*(.const:des_sptrans)
			
 
				+	*(.switch)
			
 
				+	*(.fips_text:end)
			
 
				+    }
			
 
				+    .const:
			
 
				+    {
			
 
				+	*(.fips_const:start)
			
 
				+	*(.const)
			
 
				+	*(.fips_const:end)
			
 
				+    }
			
 
				+}
			
--- a/c6x/hmac_sha1.pl
+++ b/c6x/hmac_sha1.pl
@@ -0,0 +1,196 @@
 
				+#!/usr/bin/env perl
			
 
				+#
			
 
				+# Copyright (c) 2011 The OpenSSL Project.
			
 
				+#
			
 
				+######################################################################
			
 
				+#
			
 
				+# SHA1 and HMAC in Perl by <appro@openssl.org>.
			
 
				+#
			
 
				+{ package SHA1;
			
 
				+  use integer;
			
 
				+
			
 
				+    {
			
 
				+    ################################### SHA1 block code generator
			
 
				+    my @V = ('$A','$B','$C','$D','$E');
			
 
				+    my $i;
			
 
				+
			
 
				+    sub XUpdate {
			
 
				+      my $ret;
			
 
				+	$ret="(\$T=\$W[($i-16)%16]^\$W[($i-14)%16]^\$W[($i-8)%16]^\$W[($i-3)%16],\n\t";
			
 
				+	if ((1<<31)<<1) {
			
 
				+	    $ret.="    \$W[$i%16]=((\$T<<1)|(\$T>>31))&0xffffffff)\n\t  ";
			
 
				+	} else {
			
 
				+	    $ret.="    \$W[$i%16]=(\$T<<1)|((\$T>>31)&1))\n\t  ";
			
 
				+	}
			
 
				+    }
			
 
				+    sub tail {
			
 
				+      my ($a,$b,$c,$d,$e)=@V;
			
 
				+      my $ret;
			
 
				+	if ((1<<31)<<1) {
			
 
				+	    $ret.="(($a<<5)|($a>>27));\n\t";
			
 
				+	    $ret.="$b=($b<<30)|($b>>2);	$e&=0xffffffff;	#$b&=0xffffffff;\n\t";
			
 
				+	} else {
			
 
				+	    $ret.="(($a<<5)|($a>>27)&0x1f);\n\t";
			
 
				+	    $ret.="$b=($b<<30)|($b>>2)&0x3fffffff;\n\t";
			
 
				+	}
			
 
				+      $ret;
			
 
				+    }
			
 
				+    sub BODY_00_15 {
			
 
				+	my ($a,$b,$c,$d,$e)=@V;
			
 
				+	"$e+=\$W[$i]+0x5a827999+((($c^$d)&$b)^$d)+".tail();
			
 
				+    }
			
 
				+    sub BODY_16_19 {
			
 
				+	my ($a,$b,$c,$d,$e)=@V;
			
 
				+	"$e+=".XUpdate()."+0x5a827999+((($c^$d)&$b)^$d)+".tail();
			
 
				+    }
			
 
				+    sub BODY_20_39 {
			
 
				+	my ($a,$b,$c,$d,$e)=@V;
			
 
				+	"$e+=".XUpdate()."+0x6ed9eba1+($b^$c^$d)+".tail();
			
 
				+    }
			
 
				+    sub BODY_40_59 {
			
 
				+	my ($a,$b,$c,$d,$e)=@V;
			
 
				+	"$e+=".XUpdate()."+0x8f1bbcdc+(($b&$c)|(($b|$c)&$d))+".tail();
			
 
				+    }
			
 
				+    sub BODY_60_79 {
			
 
				+	my ($a,$b,$c,$d,$e)=@V;
			
 
				+	"$e+=".XUpdate()."+0xca62c1d6+($b^$c^$d)+".tail();
			
 
				+    }
			
 
				+
			
 
				+    my $sha1_impl =
			
 
				+    'sub block {
			
 
				+	my $self = @_[0];
			
 
				+	my @W    = unpack("N16",@_[1]);
			
 
				+	my ($A,$B,$C,$D,$E,$T) = @{$self->{H}};
			
 
				+	';
			
 
				+
			
 
				+	$sha1_impl.='
			
 
				+	$A &= 0xffffffff;
			
 
				+	$B &= 0xffffffff;
			
 
				+	' if ((1<<31)<<1);
			
 
				+
			
 
				+	for($i=0;$i<16;$i++){ $sha1_impl.=BODY_00_15(); unshift(@V,pop(@V)); }
			
 
				+	for(;$i<20;$i++)    { $sha1_impl.=BODY_16_19(); unshift(@V,pop(@V)); }
			
 
				+	for(;$i<40;$i++)    { $sha1_impl.=BODY_20_39(); unshift(@V,pop(@V)); }
			
 
				+	for(;$i<60;$i++)    { $sha1_impl.=BODY_40_59(); unshift(@V,pop(@V)); }
			
 
				+	for(;$i<80;$i++)    { $sha1_impl.=BODY_60_79(); unshift(@V,pop(@V)); }
			
 
				+
			
 
				+	$sha1_impl.='
			
 
				+	$self->{H}[0]+=$A;	$self->{H}[1]+=$B;	$self->{H}[2]+=$C;
			
 
				+	$self->{H}[3]+=$D;	$self->{H}[4]+=$E;	}';
			
 
				+
			
 
				+    #print $sha1_impl,"\n";
			
 
				+    eval($sha1_impl);		# generate code
			
 
				+    }
			
 
				+
			
 
				+    sub Init {
			
 
				+	my $class = shift;	# multiple instances...
			
 
				+	my $self  = {};
			
 
				+
			
 
				+	bless $self,$class;
			
 
				+	$self->{H} = [0x67452301,0xefcdab89,0x98badcfe,0x10325476,0xc3d2e1f0];
			
 
				+	$self->{N} = 0;
			
 
				+	return $self;
			
 
				+    }
			
 
				+
			
 
				+    sub Update {
			
 
				+	my $self = shift;
			
 
				+	my $msg;
			
 
				+
			
 
				+	foreach $msg (@_) {
			
 
				+	    my $len  = length($msg);
			
 
				+	    my $num  = length($self->{buf});
			
 
				+	    my $off  = 0;
			
 
				+
			
 
				+	    $self->{N} += $len;
			
 
				+
			
 
				+	    if (($num+$len)<64)
			
 
				+	    {	$self->{buf} .= $msg; next;	}
			
 
				+	    elsif ($num)
			
 
				+	    {	$self->{buf} .= substr($msg,0,($off=64-$num));
			
 
				+		$self->block($self->{buf});
			
 
				+	    }
			
 
				+
			
 
				+	    while(($off+64) <= $len)
			
 
				+	    {	$self->block(substr($msg,$off,64));
			
 
				+		$off += 64;
			
 
				+	    }
			
 
				+
			
 
				+	    $self->{buf} = substr($msg,$off);
			
 
				+	}
			
 
				+	return $self;
			
 
				+    }
			
 
				+
			
 
				+    sub Final {
			
 
				+	my $self = shift;
			
 
				+	my $num  = length($self->{buf});
			
 
				+
			
 
				+	$self->{buf} .= chr(0x80); $num++;
			
 
				+	if ($num>56)
			
 
				+	{   $self->{buf} .= chr(0)x(64-$num);
			
 
				+	    $self->block($self->{buf});
			
 
				+	    $self->{buf}=undef;
			
 
				+	    $num=0;
			
 
				+	}
			
 
				+	$self->{buf} .= chr(0)x(56-$num);
			
 
				+	$self->{buf} .= pack("N2",($self->{N}>>29)&0x7,$self->{N}<<3);
			
 
				+	$self->block($self->{buf});
			
 
				+
			
 
				+	return pack("N*",@{$self->{H}});
			
 
				+    }
			
 
				+
			
 
				+    sub Selftest {
			
 
				+	my $hash;
			
 
				+
			
 
				+	$hash=SHA1->Init()->Update('abc')->Final();
			
 
				+	die "SHA1 test#1" if (unpack("H*",$hash) ne 'a9993e364706816aba3e25717850c26c9cd0d89d');
			
 
				+
			
 
				+	$hash=SHA1->Init()->Update('abcdbcdecdefdefgefghfghighijhijkijkljklmklmnlmnomnopnopq')->Final();
			
 
				+	die "SHA1 test#2" if (unpack("H*",$hash) ne '84983e441c3bd26ebaae4aa1f95129e5e54670f1');
			
 
				+
			
 
				+	#$hash=SHA1->Init()->Update('a'x1000000)->Final();
			
 
				+	#die "SHA1 test#3" if (unpack("H*",$hash) ne '34aa973cd4c4daa4f61eeb2bdbad27316534016f');
			
 
				+    }
			
 
				+}
			
 
				+
			
 
				+{ package HMAC;
			
 
				+
			
 
				+    sub Init {
			
 
				+	my $class = shift;
			
 
				+	my $key   = shift;
			
 
				+	my $self  = {};
			
 
				+
			
 
				+	bless $self,$class;
			
 
				+
			
 
				+	if (length($key)>64) {
			
 
				+	    $key = SHA1->Init()->Update($key)->Final();
			
 
				+	}
			
 
				+	$key .= chr(0x00)x(64-length($key));
			
 
				+
			
 
				+	my @ikey = map($_^=0x36,unpack("C*",$key));
			
 
				+	($self->{hash} = SHA1->Init())->Update(pack("C*",@ikey));
			
 
				+	 $self->{okey} = pack("C*",map($_^=0x36^0x5c,@ikey));
			
 
				+
			
 
				+	return $self;
			
 
				+    }
			
 
				+
			
 
				+    sub Update {
			
 
				+	my $self = shift;
			
 
				+	$self->{hash}->Update(@_);
			
 
				+	return $self;
			
 
				+    }
			
 
				+
			
 
				+    sub Final {
			
 
				+	my $self  = shift;
			
 
				+	my $ihash = $self->{hash}->Final();
			
 
				+	return SHA1->Init()->Update($self->{okey},$ihash)->Final();
			
 
				+    }
			
 
				+
			
 
				+    sub Selftest {
			
 
				+	my $hmac;
			
 
				+
			
 
				+	$hmac = HMAC->Init('0123456789:;<=>?@ABC')->Update('Sample #2')->Final();
			
 
				+	die "HMAC test" if (unpack("H*",$hmac) ne '0922d3405faa3d194f82a45830737d5cc6c75d24');
			
 
				+    }
			
 
				+}
			
 
				+
			
 
				+1;
			
--- a/c6x/incore6x
+++ b/c6x/incore6x
@@ -0,0 +1,241 @@
 
				+#!/usr/bin/env perl
			
 
				+#
			
 
				+# Copyright (c) 2011 The OpenSSL Project.
			
 
				+#
			
 
				+# The script embeds fingerprint into TI-COFF executable object.
			
 
				+
			
 
				+$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
			
 
				+
			
 
				+unshift(@INC,$dir);
			
 
				+require "hmac_sha1.pl";
			
 
				+
			
 
				+######################################################################
			
 
				+#
			
 
				+# COFF symbol table parser by <appro@openssl.org>. The table entries
			
 
				+# are extended with offset within executable file...
			
 
				+#
			
 
				+{ package COFF;
			
 
				+  use FileHandle;
			
 
				+
			
 
				+    sub dup  { my %copy=map {$_} @_; return \%copy; }
			
 
				+
			
 
				+    sub Load {
			
 
				+	my $class = shift;
			
 
				+	my $self  = {};
			
 
				+	my $FD    = FileHandle->new();	# autoclose
			
 
				+
			
 
				+	bless $self,$class;
			
 
				+
			
 
				+	sysopen($FD,shift,0) or die "$!";
			
 
				+	binmode($FD);
			
 
				+
			
 
				+	#################################################
			
 
				+	# read and parse COFF header...
			
 
				+	#
			
 
				+	read($FD,my $coff,22) or die "$!";
			
 
				+
			
 
				+	my %coff_header;
			
 
				+	@coff_header{version,nsects,date,syms_off,nsyms,opt,flags,magic}=
			
 
				+		unpack("v2V3v3",$coff);
			
 
				+
			
 
				+	$!=42;		# signal fipsld to revert to two-step link
			
 
				+	die "not TI-COFF file" if ($coff_header{version} != 0xC2);
			
 
				+
			
 
				+	my $big_endian = ($coff_header{flags}>>9)&1;	# 0 or 1
			
 
				+
			
 
				+	my $strings;
			
 
				+	my $symsize;
			
 
				+
			
 
				+	#################################################
			
 
				+	# load strings table
			
 
				+	#
			
 
				+	seek($FD,$coff_header{syms_off}+18*$coff_header{nsyms},0) or die "$!";
			
 
				+	read($FD,$strings,4) or die "$!";
			
 
				+	$symsize = unpack("V",$strings);
			
 
				+	read($FD,$strings,$symsize,4) or die "$!";
			
 
				+
			
 
				+	#################################################
			
 
				+	# read sections
			
 
				+	#
			
 
				+	my $i;
			
 
				+	my @sections;
			
 
				+
			
 
				+	# seek to section headers
			
 
				+	seek($FD,22+@coff_header{opt},0) or die "$!";
			
 
				+	for ($i=0;$i<$coff_header{nsects};$i++) {
			
 
				+	    my %coff_shdr;
			
 
				+	    my $name;
			
 
				+
			
 
				+	    read($FD,my $section,48) or die "$!";
			
 
				+
			
 
				+	    @coff_shdr{sh_name,sh_phaddr,sh_vaddr,
			
 
				+			sh_size,sh_offset,sh_relocs,sh_reserved,
			
 
				+			sh_relocoff,sh_lines,sh_flags} =
			
 
				+		unpack("a8V9",$section);
			
 
				+
			
 
				+	    $name = $coff_shdr{sh_name};
			
 
				+	    # see if sh_name is a an offset in $strings
			
 
				+	    my ($hi,$lo) = unpack("V2",$name);
			
 
				+	    if ($hi==0 && $lo<$symsize) {
			
 
				+		$name = substr($strings,$lo,64);
			
 
				+	    }
			
 
				+	    $coff_shdr{sh_name} = (split(chr(0),$name))[0];
			
 
				+
			
 
				+	    push(@sections,dup(%coff_shdr));
			
 
				+	}
			
 
				+
			
 
				+	#################################################
			
 
				+	# load symbols table
			
 
				+	#
			
 
				+	seek($FD,$coff_header{syms_off},0) or die "$!";
			
 
				+	for ($i=0;$i<$coff_header{nsyms};$i++) {
			
 
				+	    my %coff_sym;
			
 
				+	    my $name;
			
 
				+
			
 
				+	    read($FD,my $blob,18) or die "$!";
			
 
				+
			
 
				+	    @coff_sym{st_name,st_value,st_shndx,reserved,class,aux} =
			
 
				+		unpack("a8Vv2C2",$blob);
			
 
				+
			
 
				+	    # skip aux entries
			
 
				+	    if ($coff_sym{aux}) {
			
 
				+		seek($FD,18*$coff_sym{aux},1) or die "$!";
			
 
				+		$i+=$coff_sym{aux};
			
 
				+	    }
			
 
				+
			
 
				+	    $name = $coff_sym{st_name};
			
 
				+	    # see if st_name is a an offset in $strings
			
 
				+	    my ($hi,$lo) = unpack("V2",$name);
			
 
				+	    if ($hi==0 && $lo<$symsize) {
			
 
				+		$name = substr($strings,$lo,64);
			
 
				+	    }
			
 
				+	    $coff_sym{st_name} = $name = (split(chr(0),$name))[0];
			
 
				+
			
 
				+	    my $st_secn = $coff_sym{st_shndx}-1;
			
 
				+	    if ($st_secn>=0 && $st_secn<=$#sections
			
 
				+		&& @sections[$st_secn]->{sh_offset}
			
 
				+		&& $name =~ m/^_[a-z]+/i) {
			
 
				+		# synthesize st_offset, ...
			
 
				+		$coff_sym{st_offset} = $coff_sym{st_value}
			
 
				+				- @sections[$st_secn]->{sh_vaddr}
			
 
				+				+ @sections[$st_secn]->{sh_offset};
			
 
				+		$coff_sym{st_section} = @sections[$st_secn]->{sh_name};
			
 
				+		# ... and add to lookup table
			
 
				+		$self->{symbols}{$name} = dup(%coff_sym);
			
 
				+	    }
			
 
				+	}
			
 
				+
			
 
				+	return $self;
			
 
				+    }
			
 
				+
			
 
				+    sub Lookup {
			
 
				+	my $self = shift;
			
 
				+	my $name = shift;
			
 
				+	return $self->{symbols}{"_$name"};
			
 
				+    }
			
 
				+
			
 
				+    sub Traverse {
			
 
				+	my $self = shift;
			
 
				+	my $code = shift;
			
 
				+
			
 
				+	if (ref($code) eq 'CODE') {
			
 
				+	    for (keys(%{$self->{symbols}})) { &$code($self->{symbols}{$_}); }
			
 
				+	}
			
 
				+    }
			
 
				+}
			
 
				+
			
 
				+######################################################################
			
 
				+#
			
 
				+# main()
			
 
				+#
			
 
				+my $legacy_mode;
			
 
				+
			
 
				+if ($#ARGV<0 || ($#ARGV>0 && !($legacy_mode=(@ARGV[0] =~ /^\-(dso|exe)$/)))) {
			
 
				+	print STDERR "usage: $0 [-dso|-exe] ti-coff-binary\n";
			
 
				+	exit(1);
			
 
				+}
			
 
				+
			
 
				+$exe = COFF->Load(@ARGV[$#ARGV]);
			
 
				+
			
 
				+$FIPS_text_start	= $exe->Lookup("FIPS_text_start")		or die;
			
 
				+$FIPS_text_end		= $exe->Lookup("FIPS_text_end")			or die;
			
 
				+$FIPS_rodata_start	= $exe->Lookup("FIPS_rodata_start")		or die;
			
 
				+$FIPS_rodata_end	= $exe->Lookup("FIPS_rodata_end")		or die;
			
 
				+$FIPS_signature		= $exe->Lookup("FIPS_signature")		or die;
			
 
				+
			
 
				+# new cross-compile support
			
 
				+$FIPS_text_startX	= $exe->Lookup("FIPS_text_startX");
			
 
				+$FIPS_text_endX		= $exe->Lookup("FIPS_text_endX");
			
 
				+
			
 
				+if (!$legacy_mode) {
			
 
				+    if (!$FIPS_text_startX || !$FIPS_text_endX) {
			
 
				+	print STDERR "@ARGV[$#ARGV] is not cross-compiler aware.\n";
			
 
				+	exit(42);	# signal fipsld to revert to two-step link
			
 
				+    }
			
 
				+
			
 
				+    $FINGERPRINT_ascii_value
			
 
				+			= $exe->Lookup("FINGERPRINT_ascii_value");
			
 
				+}
			
 
				+if ($FIPS_text_startX && $FIPS_text_endX) {
			
 
				+    $FIPS_text_start = $FIPS_text_startX;
			
 
				+    $FIPS_text_end   = $FIPS_text_endX;
			
 
				+}
			
 
				+
			
 
				+sysopen(FD,@ARGV[$#ARGV],$legacy_mode?0:2) or die "$!";	# 2 is read/write
			
 
				+binmode(FD);
			
 
				+
			
 
				+sub HMAC_Update {
			
 
				+  my ($hmac,$off,$len) = @_;
			
 
				+  my $blob;
			
 
				+
			
 
				+    seek(FD,$off,0)	or die "$!";
			
 
				+    read(FD,$blob,$len)	or die "$!";
			
 
				+    $$hmac->Update($blob);
			
 
				+}
			
 
				+
			
 
				+# fips/fips.c:FIPS_incore_fingerprint's Perl twin
			
 
				+#
			
 
				+sub FIPS_incore_fingerprint {
			
 
				+  my $p1  = $FIPS_text_start->{st_offset};
			
 
				+  my $p2  = $FIPS_text_end->{st_offset};
			
 
				+  my $p3  = $FIPS_rodata_start->{st_offset};
			
 
				+  my $p4  = $FIPS_rodata_end->{st_offset};
			
 
				+  my $sig = $FIPS_signature->{st_offset};
			
 
				+  my $ctx = HMAC->Init("etaonrishdlcupfm");
			
 
				+
			
 
				+    # detect overlapping regions
			
 
				+    if ($p1<=$p3 && $p2>=$p3) {
			
 
				+	$p3 = $p1; $p4 = $p2>$p4?$p2:$p4; $p1 = 0; $p2 = 0;
			
 
				+    } elsif ($p3<=$p1 && $p4>=$p1) {
			
 
				+	$p3 = $p3; $p4 = $p2>$p4?$p2:$p4; $p1 = 0; $p2 = 0;
			
 
				+    }
			
 
				+
			
 
				+    if ($p1) {
			
 
				+	HMAC_Update (\$ctx,$p1,$p2-$p1);
			
 
				+    }
			
 
				+
			
 
				+    if ($sig>=$p3 && $sig<$p4) {
			
 
				+	# "punch" hole
			
 
				+	HMAC_Update(\$ctx,$p3,$sig-$p3);
			
 
				+	$p3 = $sig+20;
			
 
				+	HMAC_Update(\$ctx,$p3,$p4-$p3);
			
 
				+    } else {
			
 
				+	HMAC_Update(\$ctx,$p3,$p4-$p3);
			
 
				+    }
			
 
				+
			
 
				+    return $ctx->Final();
			
 
				+}
			
 
				+
			
 
				+$fingerprint = FIPS_incore_fingerprint();
			
 
				+
			
 
				+if ($legacy_mode) {
			
 
				+    print unpack("H*",$fingerprint);
			
 
				+} elsif ($FINGERPRINT_ascii_value) {
			
 
				+    seek(FD,$FINGERPRINT_ascii_value->{st_offset},0)	or die "$!";
			
 
				+    print FD unpack("H*",$fingerprint)			or die "$!";
			
 
				+} else {
			
 
				+    seek(FD,$FIPS_signature->{st_offset},0)		or die "$!";
			
 
				+    print FD $fingerprint				or die "$!";
			
 
				+}
			
 
				+
			
 
				+close (FD);
			
--- a/c6x/run6x
+++ b/c6x/run6x
@@ -0,0 +1,43 @@
 
				+#!/usr/bin/env perl
			
 
				+
			
 
				+$exe  = @ARGV[0];
			
 
				+$exe .= ".out" if (! -f $exe);
			
 
				+die if (! -f $exe);
			
 
				+
			
 
				+use CCS_SCRIPTING_PERL;
			
 
				+
			
 
				+my $studio=new CCS_SCRIPTING_PERL::CCS_Scripting();
			
 
				+
			
 
				+$studio->CCSOpenNamed("*","*",1);	# connect to board
			
 
				+$studio->TargetReset();
			
 
				+
			
 
				+print "loading $exe\n";
			
 
				+$studio->ProgramLoad($exe);
			
 
				+
			
 
				+sub write_string {
			
 
				+    my ($studio,$addr,$str) = @_;
			
 
				+    my $len = length($str);
			
 
				+    my $i;
			
 
				+
			
 
				+    for ($i=0; $i<$len; $i++) {
			
 
				+	$studio->MemoryWrite($CCS_SCRIPTING_PERL::PAGE_DATA,$addr+$i,8,vec($str,$i,8));
			
 
				+    }
			
 
				+    $studio->MemoryWrite($CCS_SCRIPTING_PERL::PAGE_DATA,$addr+$i,8,0);
			
 
				+
			
 
				+    return $i+1;
			
 
				+}
			
 
				+
			
 
				+$addr= $studio->SymbolGetAddress("__c_args");
			
 
				+printf "setting up __c_args at 0x%X\n",$addr;#\n";
			
 
				+
			
 
				+$studio->MemoryWrite($CCS_SCRIPTING_PERL::PAGE_DATA,$addr,32,$#ARGV+1);
			
 
				+
			
 
				+for ($i=0,$strings=$addr+($#ARGV+3)*4; $i<=$#ARGV; $i++) {
			
 
				+    $off = write_string($studio,$strings,@ARGV[$i]);
			
 
				+    $studio->MemoryWrite($CCS_SCRIPTING_PERL::PAGE_DATA,$addr+4*($i+1),32,$strings);
			
 
				+    $strings += $off;
			
 
				+}
			
 
				+$studio->MemoryWrite($SCC_SCRIPTING_PERL::PAGE_DATA,$addr+4*($i+1),32,0);
			
 
				+
			
 
				+print "running...\n";
			
 
				+$studio->TargetRun();
			
--- a/crypto/aes/asm/aes-c64xplus.pl
+++ b/crypto/aes/asm/aes-c64xplus.pl
@@ -0,0 +1,1329 @@
 
				+#!/usr/bin/env perl
			
 
				+#
			
 
				+# ====================================================================
			
 
				+# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
			
 
				+# project. The module is, however, dual licensed under OpenSSL and
			
 
				+# CRYPTOGAMS licenses depending on where you obtain it. For further
			
 
				+# details see http://www.openssl.org/~appro/cryptogams/.
			
 
				+# ====================================================================
			
 
				+#
			
 
				+# [Endian-neutral] AES for C64x+.
			
 
				+#
			
 
				+# Even though SPLOOPs are scheduled for 13 cycles, and thus expected
			
 
				+# performance is ~8.5 cycles per byte processed with 128-bit key,
			
 
				+# measured performance turned to be ~10 cycles per byte. Discrepancy
			
 
				+# must be caused by limitations of L1D memory banking(*), see SPRU871
			
 
				+# TI publication for further details. If any consolation it's still
			
 
				+# ~20% faster than TI's linear assembly module anyway... Compared to
			
 
				+# aes_core.c compiled with cl6x 6.0 with -mv6400+ -o2 options this
			
 
				+# code is 3.75x faster and almost 3x smaller (tables included).
			
 
				+#
			
 
				+# (*)	This means that there might be subtle correlation between data
			
 
				+#	and timing and one can wonder if it can be ... attacked:-(
			
 
				+#	On the other hand this also means that *if* one chooses to
			
 
				+#	implement *4* T-tables variant [instead of 1 T-table as in
			
 
				+#	this implementation, or in addition to], then one ought to
			
 
				+#	*interleave* them. Even though it complicates addressing,
			
 
				+#	references to interleaved tables would be guaranteed not to
			
 
				+#	clash. I reckon that it should be possible to break 8 cycles
			
 
				+#	per byte "barrier," i.e. improve by ~20%, naturally at the
			
 
				+#	cost of 8x increased pressure on L1D. 8x because you'd have
			
 
				+#	to interleave both Te and Td tables...
			
 
				+
			
 
				+while (($output=shift) && ($output!~/\w[\w\-]*\.\w+$/)) {}
			
 
				+open STDOUT,">$output";
			
 
				+
			
 
				+($TEA,$TEB)=("A5","B5");
			
 
				+($KPA,$KPB)=("A3","B1");
			
 
				+@K=("A6","B6","A7","B7");
			
 
				+@s=("A8","B8","A9","B9");
			
 
				+@Te0=@Td0=("A16","B16","A17","B17");
			
 
				+@Te1=@Td1=("A18","B18","A19","B19");
			
 
				+@Te2=@Td2=("A20","B20","A21","B21");
			
 
				+@Te3=@Td3=("A22","B22","A23","B23");
			
 
				+
			
 
				+$code=<<___;
			
 
				+	.text
			
 
				+
			
 
				+	.asg	B3,RA
			
 
				+	.asg	A4,INP
			
 
				+	.asg	B4,OUT
			
 
				+	.asg	A6,KEY
			
 
				+	.asg	A4,RET
			
 
				+	.asg	B15,SP
			
 
				+
			
 
				+	.eval	24,EXT0
			
 
				+	.eval	16,EXT1
			
 
				+	.eval	8,EXT2
			
 
				+	.eval	0,EXT3
			
 
				+	.eval	8,TBL1
			
 
				+	.eval	16,TBL2
			
 
				+	.eval	24,TBL3
			
 
				+
			
 
				+	.if	.BIG_ENDIAN
			
 
				+	.eval	24-EXT0,EXT0
			
 
				+	.eval	24-EXT1,EXT1
			
 
				+	.eval	24-EXT2,EXT2
			
 
				+	.eval	24-EXT3,EXT3
			
 
				+	.eval	32-TBL1,TBL1
			
 
				+	.eval	32-TBL2,TBL2
			
 
				+	.eval	32-TBL3,TBL3
			
 
				+	.endif
			
 
				+
			
 
				+	.global	_AES_encrypt
			
 
				+_AES_encrypt:
			
 
				+	.asmfunc
			
 
				+	MVK	1,B2
			
 
				+__encrypt:
			
 
				+   [B2]	LDNDW	*INP++,A9:A8			; load input
			
 
				+||	MVKL	(AES_Te-_AES_encrypt),$TEA
			
 
				+||	ADDKPC	_AES_encrypt,B0
			
 
				+   [B2]	LDNDW	*INP++,B9:B8
			
 
				+||	MVKH	(AES_Te-_AES_encrypt),$TEA
			
 
				+||	ADD	0,KEY,$KPA
			
 
				+||	ADD	4,KEY,$KPB
			
 
				+	LDW	*$KPA++[2],$Te0[0]		; zero round key
			
 
				+||	LDW	*$KPB++[2],$Te0[1]
			
 
				+||	MVK	60,A0
			
 
				+||	ADD	B0,$TEA,$TEA			; AES_Te
			
 
				+	LDW	*KEY[A0],B0			; rounds
			
 
				+||	MVK	1024,A0				; sizeof(AES_Te)
			
 
				+	LDW	*$KPA++[2],$Te0[2]
			
 
				+||	LDW	*$KPB++[2],$Te0[3]
			
 
				+||	MV	$TEA,$TEB
			
 
				+	NOP
			
 
				+	.if	.BIG_ENDIAN
			
 
				+	MV	A9,$s[0]
			
 
				+||	MV	A8,$s[1]
			
 
				+||	MV	B9,$s[2]
			
 
				+||	MV	B8,$s[3]
			
 
				+	.else
			
 
				+	MV	A8,$s[0]
			
 
				+||	MV	A9,$s[1]
			
 
				+||	MV	B8,$s[2]
			
 
				+||	MV	B9,$s[3]
			
 
				+	.endif
			
 
				+	XOR	$Te0[0],$s[0],$s[0]
			
 
				+||	XOR	$Te0[1],$s[1],$s[1]
			
 
				+||	LDW	*$KPA++[2],$K[0]		; 1st round key
			
 
				+||	LDW	*$KPB++[2],$K[1]
			
 
				+	SUB	B0,2,B0
			
 
				+
			
 
				+	SPLOOPD	13
			
 
				+||	MVC	B0,ILC
			
 
				+||	LDW	*$KPA++[2],$K[2]
			
 
				+||	LDW	*$KPB++[2],$K[3]
			
 
				+;;====================================================================
			
 
				+	EXTU	$s[1],EXT1,24,$Te1[1]
			
 
				+||	EXTU	$s[0],EXT3,24,$Te3[0]
			
 
				+	LDW	*${TEB}[$Te1[1]],$Te1[1]	; Te1[s1>>8],	t0
			
 
				+||	LDW	*${TEA}[$Te3[0]],$Te3[0]	; Te3[s0>>24],	t1
			
 
				+||	XOR	$s[2],$Te0[2],$s[2]		; modulo-scheduled
			
 
				+||	XOR	$s[3],$Te0[3],$s[3]		; modulo-scheduled
			
 
				+||	EXTU	$s[1],EXT3,24,$Te3[1]
			
 
				+||	EXTU	$s[0],EXT1,24,$Te1[0]
			
 
				+	LDW	*${TEB}[$Te3[1]],$Te3[1]	; Te3[s1>>24],	t2
			
 
				+||	LDW	*${TEA}[$Te1[0]],$Te1[0]	; Te1[s0>>8],	t3
			
 
				+||	EXTU	$s[2],EXT2,24,$Te2[2]
			
 
				+||	EXTU	$s[3],EXT2,24,$Te2[3]
			
 
				+	LDW	*${TEA}[$Te2[2]],$Te2[2]	; Te2[s2>>16],	t0
			
 
				+||	LDW	*${TEB}[$Te2[3]],$Te2[3]	; Te2[s3>>16],	t1
			
 
				+||	EXTU	$s[3],EXT3,24,$Te3[3]
			
 
				+||	EXTU	$s[2],EXT1,24,$Te1[2]
			
 
				+	LDW	*${TEB}[$Te3[3]],$Te3[3]	; Te3[s3>>24],	t0
			
 
				+||	LDW	*${TEA}[$Te1[2]],$Te1[2]	; Te1[s2>>8],	t1
			
 
				+||	EXTU	$s[0],EXT2,24,$Te2[0]
			
 
				+||	EXTU	$s[1],EXT2,24,$Te2[1]
			
 
				+	LDW	*${TEA}[$Te2[0]],$Te2[0]	; Te2[s0>>16],	t2
			
 
				+||	LDW	*${TEB}[$Te2[1]],$Te2[1]	; Te2[s1>>16],	t3
			
 
				+||	EXTU	$s[3],EXT1,24,$Te1[3]
			
 
				+||	EXTU	$s[2],EXT3,24,$Te3[2]
			
 
				+	LDW	*${TEB}[$Te1[3]],$Te1[3]	; Te1[s3>>8],	t2
			
 
				+||	LDW	*${TEA}[$Te3[2]],$Te3[2]	; Te3[s2>>24],	t3
			
 
				+||	ROTL	$Te1[1],TBL1,$Te3[0]		; t0
			
 
				+||	ROTL	$Te3[0],TBL3,$Te1[1]		; t1
			
 
				+||	EXTU	$s[0],EXT0,24,$Te0[0]
			
 
				+||	EXTU	$s[1],EXT0,24,$Te0[1]
			
 
				+	LDW	*${TEA}[$Te0[0]],$Te0[0]	; Te0[s0],	t0
			
 
				+||	LDW	*${TEB}[$Te0[1]],$Te0[1]	; Te0[s1],	t1
			
 
				+||	ROTL	$Te3[1],TBL3,$Te1[0]		; t2
			
 
				+||	ROTL	$Te1[0],TBL1,$Te3[1]		; t3
			
 
				+||	EXTU	$s[2],EXT0,24,$Te0[2]
			
 
				+||	EXTU	$s[3],EXT0,24,$Te0[3]
			
 
				+	LDW	*${TEA}[$Te0[2]],$Te0[2]	; Te0[s2],	t2
			
 
				+||	LDW	*${TEB}[$Te0[3]],$Te0[3]	; Te0[s3],	t3
			
 
				+||	ROTL	$Te2[2],TBL2,$Te2[2]		; t0
			
 
				+||	ROTL	$Te2[3],TBL2,$Te2[3]		; t1
			
 
				+||	XOR	$K[0],$Te3[0],$s[0]
			
 
				+||	XOR	$K[1],$Te1[1],$s[1]
			
 
				+	ROTL	$Te3[3],TBL3,$Te1[2]		; t0
			
 
				+||	ROTL	$Te1[2],TBL1,$Te3[3]		; t1
			
 
				+||	XOR	$K[2],$Te1[0],$s[2]
			
 
				+||	XOR	$K[3],$Te3[1],$s[3]
			
 
				+||	LDW	*$KPA++[2],$K[0]		; next round key
			
 
				+||	LDW	*$KPB++[2],$K[1]
			
 
				+	ROTL	$Te2[0],TBL2,$Te2[0]		; t2
			
 
				+||	ROTL	$Te2[1],TBL2,$Te2[1]		; t3
			
 
				+||	XOR	$s[0],$Te2[2],$s[0]
			
 
				+||	XOR	$s[1],$Te2[3],$s[1]
			
 
				+||	LDW	*$KPA++[2],$K[2]
			
 
				+||	LDW	*$KPB++[2],$K[3]
			
 
				+	ROTL	$Te1[3],TBL1,$Te3[2]		; t2
			
 
				+||	ROTL	$Te3[2],TBL3,$Te1[3]		; t3
			
 
				+||	XOR	$s[0],$Te1[2],$s[0]
			
 
				+||	XOR	$s[1],$Te3[3],$s[1]
			
 
				+	XOR	$s[2],$Te2[0],$s[2]
			
 
				+||	XOR	$s[3],$Te2[1],$s[3]
			
 
				+||	XOR	$s[0],$Te0[0],$s[0]
			
 
				+||	XOR	$s[1],$Te0[1],$s[1]
			
 
				+	SPKERNEL
			
 
				+||	XOR.L	$s[2],$Te3[2],$s[2]
			
 
				+||	XOR.L	$s[3],$Te1[3],$s[3]
			
 
				+;;====================================================================
			
 
				+	ADD.D	${TEA},A0,${TEA}		; point to Te4
			
 
				+||	ADD.D	${TEB},A0,${TEB}
			
 
				+||	EXTU	$s[1],EXT1,24,$Te1[1]
			
 
				+||	EXTU	$s[0],EXT3,24,$Te3[0]
			
 
				+	LDBU	*${TEB}[$Te1[1]],$Te1[1]	; Te1[s1>>8],	t0
			
 
				+||	LDBU	*${TEA}[$Te3[0]],$Te3[0]	; Te3[s0>>24],	t1
			
 
				+||	XOR	$s[2],$Te0[2],$s[2]		; modulo-scheduled
			
 
				+||	XOR	$s[3],$Te0[3],$s[3]		; modulo-scheduled
			
 
				+||	EXTU	$s[0],EXT0,24,$Te0[0]
			
 
				+||	EXTU	$s[1],EXT0,24,$Te0[1]
			
 
				+	LDBU	*${TEA}[$Te0[0]],$Te0[0]	; Te0[s0],	t0
			
 
				+||	LDBU	*${TEB}[$Te0[1]],$Te0[1]	; Te0[s1],	t1
			
 
				+||	EXTU	$s[3],EXT3,24,$Te3[3]
			
 
				+||	EXTU	$s[2],EXT1,24,$Te1[2]
			
 
				+	LDBU	*${TEB}[$Te3[3]],$Te3[3]	; Te3[s3>>24],	t0
			
 
				+||	LDBU	*${TEA}[$Te1[2]],$Te1[2]	; Te1[s2>>8],	t1
			
 
				+||	EXTU	$s[2],EXT2,24,$Te2[2]
			
 
				+||	EXTU	$s[3],EXT2,24,$Te2[3]
			
 
				+	LDBU	*${TEA}[$Te2[2]],$Te2[2]	; Te2[s2>>16],	t0
			
 
				+||	LDBU	*${TEB}[$Te2[3]],$Te2[3]	; Te2[s3>>16],	t1
			
 
				+||	EXTU	$s[1],EXT3,24,$Te3[1]
			
 
				+||	EXTU	$s[0],EXT1,24,$Te1[0]
			
 
				+	LDBU	*${TEB}[$Te3[1]],$Te3[1]	; Te3[s1>>24],	t2
			
 
				+||	LDBU	*${TEA}[$Te1[0]],$Te1[0]	; Te1[s0>>8],	t3
			
 
				+||	EXTU	$s[3],EXT1,24,$Te1[3]
			
 
				+||	EXTU	$s[2],EXT3,24,$Te3[2]
			
 
				+	LDBU	*${TEB}[$Te1[3]],$Te1[3]	; Te1[s3>>8],	t2
			
 
				+||	LDBU	*${TEA}[$Te3[2]],$Te3[2]	; Te3[s2>>24],	t3
			
 
				+||	EXTU	$s[2],EXT0,24,$Te0[2]
			
 
				+||	EXTU	$s[3],EXT0,24,$Te0[3]
			
 
				+	LDBU	*${TEA}[$Te0[2]],$Te0[2]	; Te0[s2],	t2
			
 
				+||	LDBU	*${TEB}[$Te0[3]],$Te0[3]	; Te0[s3],	t3
			
 
				+||	EXTU	$s[0],EXT2,24,$Te2[0]
			
 
				+||	EXTU	$s[1],EXT2,24,$Te2[1]
			
 
				+	LDBU	*${TEA}[$Te2[0]],$Te2[0]	; Te2[s0>>16],	t2
			
 
				+||	LDBU	*${TEB}[$Te2[1]],$Te2[1]	; Te2[s1>>16],	t3
			
 
				+
			
 
				+	.if	.BIG_ENDIAN
			
 
				+	PACK2	$Te0[0],$Te1[1],$Te0[0]
			
 
				+||	PACK2	$Te0[1],$Te1[2],$Te0[1]
			
 
				+	PACK2	$Te2[2],$Te3[3],$Te2[2]
			
 
				+||	PACK2	$Te2[3],$Te3[0],$Te2[3]
			
 
				+	PACKL4	$Te0[0],$Te2[2],$Te0[0]
			
 
				+||	PACKL4	$Te0[1],$Te2[3],$Te0[1]
			
 
				+	XOR	$K[0],$Te0[0],$Te0[0]		; s[0]
			
 
				+||	XOR	$K[1],$Te0[1],$Te0[1]		; s[1]
			
 
				+
			
 
				+	PACK2	$Te0[2],$Te1[3],$Te0[2]
			
 
				+||	PACK2	$Te0[3],$Te1[0],$Te0[3]
			
 
				+	PACK2	$Te2[0],$Te3[1],$Te2[0]
			
 
				+||	PACK2	$Te2[1],$Te3[2],$Te2[1]
			
 
				+||	BNOP	RA
			
 
				+	PACKL4	$Te0[2],$Te2[0],$Te0[2]
			
 
				+||	PACKL4	$Te0[3],$Te2[1],$Te0[3]
			
 
				+	XOR	$K[2],$Te0[2],$Te0[2]		; s[2]
			
 
				+||	XOR	$K[3],$Te0[3],$Te0[3]		; s[3]
			
 
				+
			
 
				+	MV	$Te0[0],A9
			
 
				+||	MV	$Te0[1],A8
			
 
				+	MV	$Te0[2],B9
			
 
				+||	MV	$Te0[3],B8
			
 
				+|| [B2]	STNDW	A9:A8,*OUT++
			
 
				+   [B2]	STNDW	B9:B8,*OUT++
			
 
				+	.else
			
 
				+	PACK2	$Te1[1],$Te0[0],$Te1[1]
			
 
				+||	PACK2	$Te1[2],$Te0[1],$Te1[2]
			
 
				+	PACK2	$Te3[3],$Te2[2],$Te3[3]
			
 
				+||	PACK2	$Te3[0],$Te2[3],$Te3[0]
			
 
				+	PACKL4	$Te3[3],$Te1[1],$Te1[1]
			
 
				+||	PACKL4	$Te3[0],$Te1[2],$Te1[2]
			
 
				+	XOR	$K[0],$Te1[1],$Te1[1]		; s[0]
			
 
				+||	XOR	$K[1],$Te1[2],$Te1[2]		; s[1]
			
 
				+
			
 
				+	PACK2	$Te1[3],$Te0[2],$Te1[3]
			
 
				+||	PACK2	$Te1[0],$Te0[3],$Te1[0]
			
 
				+	PACK2	$Te3[1],$Te2[0],$Te3[1]
			
 
				+||	PACK2	$Te3[2],$Te2[1],$Te3[2]
			
 
				+||	BNOP	RA
			
 
				+	PACKL4	$Te3[1],$Te1[3],$Te1[3]
			
 
				+||	PACKL4	$Te3[2],$Te1[0],$Te1[0]
			
 
				+	XOR	$K[2],$Te1[3],$Te1[3]		; s[2]
			
 
				+||	XOR	$K[3],$Te1[0],$Te1[0]		; s[3]
			
 
				+
			
 
				+	MV	$Te1[1],A8
			
 
				+||	MV	$Te1[2],A9
			
 
				+	MV	$Te1[3],B8
			
 
				+||	MV	$Te1[0],B9
			
 
				+|| [B2]	STNDW	A9:A8,*OUT++
			
 
				+   [B2]	STNDW	B9:B8,*OUT++
			
 
				+	.endif
			
 
				+	.endasmfunc
			
 
				+
			
 
				+	.global	_AES_decrypt
			
 
				+_AES_decrypt:
			
 
				+	.asmfunc
			
 
				+	MVK	1,B2
			
 
				+__decrypt:
			
 
				+   [B2]	LDNDW	*INP++,A9:A8			; load input
			
 
				+||	MVKL	(AES_Td-_AES_decrypt),$TEA
			
 
				+||	ADDKPC	_AES_decrypt,B0
			
 
				+   [B2]	LDNDW	*INP++,B9:B8
			
 
				+||	MVKH	(AES_Td-_AES_decrypt),$TEA
			
 
				+||	ADD	0,KEY,$KPA
			
 
				+||	ADD	4,KEY,$KPB
			
 
				+	LDW	*$KPA++[2],$Td0[0]		; zero round key
			
 
				+||	LDW	*$KPB++[2],$Td0[1]
			
 
				+||	MVK	60,A0
			
 
				+||	ADD	B0,$TEA,$TEA			; AES_Td
			
 
				+	LDW	*KEY[A0],B0			; rounds
			
 
				+||	MVK	1024,A0				; sizeof(AES_Td)
			
 
				+	LDW	*$KPA++[2],$Td0[2]
			
 
				+||	LDW	*$KPB++[2],$Td0[3]
			
 
				+||	MV	$TEA,$TEB
			
 
				+	NOP
			
 
				+	.if	.BIG_ENDIAN
			
 
				+	MV	A9,$s[0]
			
 
				+||	MV	A8,$s[1]
			
 
				+||	MV	B9,$s[2]
			
 
				+||	MV	B8,$s[3]
			
 
				+	.else
			
 
				+	MV	A8,$s[0]
			
 
				+||	MV	A9,$s[1]
			
 
				+||	MV	B8,$s[2]
			
 
				+||	MV	B9,$s[3]
			
 
				+	.endif
			
 
				+	XOR	$Td0[0],$s[0],$s[0]
			
 
				+||	XOR	$Td0[1],$s[1],$s[1]
			
 
				+||	LDW	*$KPA++[2],$K[0]		; 1st round key
			
 
				+||	LDW	*$KPB++[2],$K[1]
			
 
				+	SUB	B0,2,B0
			
 
				+
			
 
				+	SPLOOPD	13
			
 
				+||	MVC	B0,ILC
			
 
				+||	LDW	*$KPA++[2],$K[2]
			
 
				+||	LDW	*$KPB++[2],$K[3]
			
 
				+;;====================================================================
			
 
				+	EXTU	$s[1],EXT3,24,$Td3[1]
			
 
				+||	EXTU	$s[0],EXT1,24,$Td1[0]
			
 
				+	LDW	*${TEB}[$Td3[1]],$Td3[1]	; Td3[s1>>24],	t0
			
 
				+||	LDW	*${TEA}[$Td1[0]],$Td1[0]	; Td1[s0>>8],	t1
			
 
				+||	XOR	$s[2],$Td0[2],$s[2]		; modulo-scheduled
			
 
				+||	XOR	$s[3],$Td0[3],$s[3]		; modulo-scheduled
			
 
				+||	EXTU	$s[1],EXT1,24,$Td1[1]
			
 
				+||	EXTU	$s[0],EXT3,24,$Td3[0]
			
 
				+	LDW	*${TEB}[$Td1[1]],$Td1[1]	; Td1[s1>>8],	t2
			
 
				+||	LDW	*${TEA}[$Td3[0]],$Td3[0]	; Td3[s0>>24],	t3
			
 
				+||	EXTU	$s[2],EXT2,24,$Td2[2]
			
 
				+||	EXTU	$s[3],EXT2,24,$Td2[3]
			
 
				+	LDW	*${TEA}[$Td2[2]],$Td2[2]	; Td2[s2>>16],	t0
			
 
				+||	LDW	*${TEB}[$Td2[3]],$Td2[3]	; Td2[s3>>16],	t1
			
 
				+||	EXTU	$s[3],EXT1,24,$Td1[3]
			
 
				+||	EXTU	$s[2],EXT3,24,$Td3[2]
			
 
				+	LDW	*${TEB}[$Td1[3]],$Td1[3]	; Td1[s3>>8],	t0
			
 
				+||	LDW	*${TEA}[$Td3[2]],$Td3[2]	; Td3[s2>>24],	t1
			
 
				+||	EXTU	$s[0],EXT2,24,$Td2[0]
			
 
				+||	EXTU	$s[1],EXT2,24,$Td2[1]
			
 
				+	LDW	*${TEA}[$Td2[0]],$Td2[0]	; Td2[s0>>16],	t2
			
 
				+||	LDW	*${TEB}[$Td2[1]],$Td2[1]	; Td2[s1>>16],	t3
			
 
				+||	EXTU	$s[3],EXT3,24,$Td3[3]
			
 
				+||	EXTU	$s[2],EXT1,24,$Td1[2]
			
 
				+	LDW	*${TEB}[$Td3[3]],$Td3[3]	; Td3[s3>>24],	t2
			
 
				+||	LDW	*${TEA}[$Td1[2]],$Td1[2]	; Td1[s2>>8],	t3
			
 
				+||	ROTL	$Td3[1],TBL3,$Td1[0]		; t0
			
 
				+||	ROTL	$Td1[0],TBL1,$Td3[1]		; t1
			
 
				+||	EXTU	$s[0],EXT0,24,$Td0[0]
			
 
				+||	EXTU	$s[1],EXT0,24,$Td0[1]
			
 
				+	LDW	*${TEA}[$Td0[0]],$Td0[0]	; Td0[s0],	t0
			
 
				+||	LDW	*${TEB}[$Td0[1]],$Td0[1]	; Td0[s1],	t1
			
 
				+||	ROTL	$Td1[1],TBL1,$Td3[0]		; t2
			
 
				+||	ROTL	$Td3[0],TBL3,$Td1[1]		; t3
			
 
				+||	EXTU	$s[2],EXT0,24,$Td0[2]
			
 
				+||	EXTU	$s[3],EXT0,24,$Td0[3]
			
 
				+	LDW	*${TEA}[$Td0[2]],$Td0[2]	; Td0[s2],	t2
			
 
				+||	LDW	*${TEB}[$Td0[3]],$Td0[3]	; Td0[s3],	t3
			
 
				+||	ROTL	$Td2[2],TBL2,$Td2[2]		; t0
			
 
				+||	ROTL	$Td2[3],TBL2,$Td2[3]		; t1
			
 
				+||	XOR	$K[0],$Td1[0],$s[0]
			
 
				+||	XOR	$K[1],$Td3[1],$s[1]
			
 
				+	ROTL	$Td1[3],TBL1,$Td3[2]		; t0
			
 
				+||	ROTL	$Td3[2],TBL3,$Td1[3]		; t1
			
 
				+||	XOR	$K[2],$Td3[0],$s[2]
			
 
				+||	XOR	$K[3],$Td1[1],$s[3]
			
 
				+||	LDW	*$KPA++[2],$K[0]		; next round key
			
 
				+||	LDW	*$KPB++[2],$K[1]
			
 
				+	ROTL	$Td2[0],TBL2,$Td2[0]		; t2
			
 
				+||	ROTL	$Td2[1],TBL2,$Td2[1]		; t3
			
 
				+||	XOR	$s[0],$Td2[2],$s[0]
			
 
				+||	XOR	$s[1],$Td2[3],$s[1]
			
 
				+||	LDW	*$KPA++[2],$K[2]
			
 
				+||	LDW	*$KPB++[2],$K[3]
			
 
				+	ROTL	$Td3[3],TBL3,$Td1[2]		; t2
			
 
				+||	ROTL	$Td1[2],TBL1,$Td3[3]		; t3
			
 
				+||	XOR	$s[0],$Td3[2],$s[0]
			
 
				+||	XOR	$s[1],$Td1[3],$s[1]
			
 
				+	XOR	$s[2],$Td2[0],$s[2]
			
 
				+||	XOR	$s[3],$Td2[1],$s[3]
			
 
				+||	XOR	$s[0],$Td0[0],$s[0]
			
 
				+||	XOR	$s[1],$Td0[1],$s[1]
			
 
				+	SPKERNEL
			
 
				+||	XOR.L	$s[2],$Td1[2],$s[2]
			
 
				+||	XOR.L	$s[3],$Td3[3],$s[3]
			
 
				+;;====================================================================
			
 
				+	ADD.D	${TEA},A0,${TEA}		; point to Td4
			
 
				+||	ADD.D	${TEB},A0,${TEB}
			
 
				+||	EXTU	$s[1],EXT3,24,$Td3[1]
			
 
				+||	EXTU	$s[0],EXT1,24,$Td1[0]
			
 
				+	LDBU	*${TEB}[$Td3[1]],$Td3[1]	; Td3[s1>>24],	t0
			
 
				+||	LDBU	*${TEA}[$Td1[0]],$Td1[0]	; Td1[s0>>8],	t1
			
 
				+||	XOR	$s[2],$Td0[2],$s[2]		; modulo-scheduled
			
 
				+||	XOR	$s[3],$Td0[3],$s[3]		; modulo-scheduled
			
 
				+||	EXTU	$s[0],EXT0,24,$Td0[0]
			
 
				+||	EXTU	$s[1],EXT0,24,$Td0[1]
			
 
				+	LDBU	*${TEA}[$Td0[0]],$Td0[0]	; Td0[s0],	t0
			
 
				+||	LDBU	*${TEB}[$Td0[1]],$Td0[1]	; Td0[s1],	t1
			
 
				+||	EXTU	$s[2],EXT2,24,$Td2[2]
			
 
				+||	EXTU	$s[3],EXT2,24,$Td2[3]
			
 
				+	LDBU	*${TEA}[$Td2[2]],$Td2[2]	; Td2[s2>>16],	t0
			
 
				+||	LDBU	*${TEB}[$Td2[3]],$Td2[3]	; Td2[s3>>16],	t1
			
 
				+||	EXTU	$s[3],EXT1,24,$Td1[3]
			
 
				+||	EXTU	$s[2],EXT3,24,$Td3[2]
			
 
				+	LDBU	*${TEB}[$Td1[3]],$Td1[3]	; Td1[s3>>8],	t0
			
 
				+||	LDBU	*${TEA}[$Td3[2]],$Td3[2]	; Td3[s2>>24],	t1
			
 
				+||	EXTU	$s[1],EXT1,24,$Td1[1]
			
 
				+||	EXTU	$s[0],EXT3,24,$Td3[0]
			
 
				+	LDBU	*${TEB}[$Td1[1]],$Td1[1]	; Td1[s1>>8],	t2
			
 
				+||	LDBU	*${TEA}[$Td3[0]],$Td3[0]	; Td3[s0>>24],	t3
			
 
				+||	EXTU	$s[0],EXT2,24,$Td2[0]
			
 
				+||	EXTU	$s[1],EXT2,24,$Td2[1]
			
 
				+	LDBU	*${TEA}[$Td2[0]],$Td2[0]	; Td2[s0>>16],	t2
			
 
				+||	LDBU	*${TEB}[$Td2[1]],$Td2[1]	; Td2[s1>>16],	t3
			
 
				+||	EXTU	$s[3],EXT3,24,$Td3[3]
			
 
				+||	EXTU	$s[2],EXT1,24,$Td1[2]
			
 
				+	LDBU	*${TEB}[$Td3[3]],$Td3[3]	; Td3[s3>>24],	t2
			
 
				+||	LDBU	*${TEA}[$Td1[2]],$Td1[2]	; Td1[s2>>8],	t3
			
 
				+||	EXTU	$s[2],EXT0,24,$Td0[2]
			
 
				+||	EXTU	$s[3],EXT0,24,$Td0[3]
			
 
				+	LDBU	*${TEA}[$Td0[2]],$Td0[2]	; Td0[s2],	t2
			
 
				+||	LDBU	*${TEB}[$Td0[3]],$Td0[3]	; Td0[s3],	t3
			
 
				+
			
 
				+	.if	.BIG_ENDIAN
			
 
				+	PACK2	$Td0[0],$Td1[3],$Td0[0]
			
 
				+||	PACK2	$Td0[1],$Td1[0],$Td0[1]
			
 
				+	PACK2	$Td2[2],$Td3[1],$Td2[2]
			
 
				+||	PACK2	$Td2[3],$Td3[2],$Td2[3]
			
 
				+	PACKL4	$Td0[0],$Td2[2],$Td0[0]
			
 
				+||	PACKL4	$Td0[1],$Td2[3],$Td0[1]
			
 
				+	XOR	$K[0],$Td0[0],$Td0[0]		; s[0]
			
 
				+||	XOR	$K[1],$Td0[1],$Td0[1]		; s[1]
			
 
				+
			
 
				+	PACK2	$Td0[2],$Td1[1],$Td0[2]
			
 
				+||	PACK2	$Td0[3],$Td1[2],$Td0[3]
			
 
				+	PACK2	$Td2[0],$Td3[3],$Td2[0]
			
 
				+||	PACK2	$Td2[1],$Td3[0],$Td2[1]
			
 
				+||	BNOP	RA
			
 
				+	PACKL4	$Td0[2],$Td2[0],$Td0[2]
			
 
				+||	PACKL4	$Td0[3],$Td2[1],$Td0[3]
			
 
				+	XOR	$K[2],$Td0[2],$Td0[2]		; s[2]
			
 
				+||	XOR	$K[3],$Td0[3],$Td0[3]		; s[3]
			
 
				+
			
 
				+	MV	$Td0[0],A9
			
 
				+||	MV	$Td0[1],A8
			
 
				+	MV	$Td0[2],B9
			
 
				+||	MV	$Td0[3],B8
			
 
				+|| [B2]	STNDW	A9:A8,*OUT++
			
 
				+   [B2]	STNDW	B9:B8,*OUT++
			
 
				+	.else
			
 
				+	PACK2	$Td1[3],$Td0[0],$Td1[3]
			
 
				+||	PACK2	$Td1[0],$Td0[1],$Td1[0]
			
 
				+	PACK2	$Td3[1],$Td2[2],$Td3[1]
			
 
				+||	PACK2	$Td3[2],$Td2[3],$Td3[2]
			
 
				+	PACKL4	$Td3[1],$Td1[3],$Td1[3]
			
 
				+||	PACKL4	$Td3[2],$Td1[0],$Td1[0]
			
 
				+	XOR	$K[0],$Td1[3],$Td1[3]		; s[0]
			
 
				+||	XOR	$K[1],$Td1[0],$Td1[0]		; s[1]
			
 
				+
			
 
				+	PACK2	$Td1[1],$Td0[2],$Td1[1]
			
 
				+||	PACK2	$Td1[2],$Td0[3],$Td1[2]
			
 
				+	PACK2	$Td3[3],$Td2[0],$Td3[3]
			
 
				+||	PACK2	$Td3[0],$Td2[1],$Td3[0]
			
 
				+||	BNOP	RA
			
 
				+	PACKL4	$Td3[3],$Td1[1],$Td1[1]
			
 
				+||	PACKL4	$Td3[0],$Td1[2],$Td1[2]
			
 
				+	XOR	$K[2],$Td1[1],$Td1[1]		; s[2]
			
 
				+||	XOR	$K[3],$Td1[2],$Td1[2]		; s[3]
			
 
				+
			
 
				+	MV	$Td1[3],A8
			
 
				+||	MV	$Td1[0],A9
			
 
				+	MV	$Td1[1],B8
			
 
				+||	MV	$Td1[2],B9
			
 
				+|| [B2]	STNDW	A9:A8,*OUT++
			
 
				+   [B2]	STNDW	B9:B8,*OUT++
			
 
				+	.endif
			
 
				+	.endasmfunc
			
 
				+___
			
 
				+{
			
 
				+my @K=(@K,@s);			# extended key
			
 
				+my @Te4=map("B$_",(16..19));
			
 
				+
			
 
				+my @Kx9=@Te0;			# used in AES_set_decrypt_key
			
 
				+my @KxB=@Te1;
			
 
				+my @KxD=@Te2;
			
 
				+my @KxE=@Te3;
			
 
				+
			
 
				+$code.=<<___;
			
 
				+	.asg	OUT,BITS
			
 
				+
			
 
				+	.global	_AES_set_encrypt_key
			
 
				+_AES_set_encrypt_key:
			
 
				+__set_encrypt_key:
			
 
				+	.asmfunc
			
 
				+	MV	INP,A0
			
 
				+||	SHRU	BITS,5,BITS			; 128-192-256 -> 4-6-8
			
 
				+||	MV	KEY,A1
			
 
				+  [!A0]	B	RA
			
 
				+||[!A0]	MVK	-1,RET
			
 
				+||[!A0]	MVK	1,A1				; only one B RA
			
 
				+  [!A1]	B	RA
			
 
				+||[!A1]	MVK	-1,RET
			
 
				+||[!A1]	MVK	0,A0
			
 
				+||	MVK	0,B0
			
 
				+||	MVK	0,A1
			
 
				+   [A0]	LDNDW	*INP++,A9:A8
			
 
				+|| [A0]	CMPEQ	4,BITS,B0
			
 
				+|| [A0]	CMPLT	3,BITS,A1
			
 
				+   [B0]	B	key128?
			
 
				+|| [A1]	LDNDW	*INP++,B9:B8
			
 
				+|| [A0]	CMPEQ	6,BITS,B0
			
 
				+|| [A0]	CMPLT	5,BITS,A1
			
 
				+   [B0]	B	key192?
			
 
				+|| [A1]	LDNDW	*INP++,B17:B16
			
 
				+|| [A0]	CMPEQ	8,BITS,B0
			
 
				+|| [A0]	CMPLT	7,BITS,A1
			
 
				+   [B0]	B	key256?
			
 
				+|| [A1]	LDNDW	*INP++,B19:B18
			
 
				+
			
 
				+   [A0]	ADD	0,KEY,$KPA
			
 
				+|| [A0]	ADD	4,KEY,$KPB
			
 
				+|| [A0]	MVKL	(AES_Te4-_AES_set_encrypt_key),$TEA
			
 
				+|| [A0]	ADDKPC	_AES_set_encrypt_key,B6
			
 
				+   [A0]	MVKH	(AES_Te4-_AES_set_encrypt_key),$TEA
			
 
				+   [A0]	ADD	B6,$TEA,$TEA			; AES_Te4
			
 
				+	NOP
			
 
				+	NOP
			
 
				+
			
 
				+	BNOP	RA,5
			
 
				+||	MVK	-2,RET				; unknown bit lenght
			
 
				+||	MVK	0,B0				; redundant
			
 
				+;;====================================================================
			
 
				+;;====================================================================
			
 
				+key128?:
			
 
				+	.if	.BIG_ENDIAN
			
 
				+	MV	A9,$K[0]
			
 
				+||	MV	A8,$K[1]
			
 
				+||	MV	B9,$Te4[2]
			
 
				+||	MV	B8,$K[3]
			
 
				+	.else
			
 
				+	MV	A8,$K[0]
			
 
				+||	MV	A9,$K[1]
			
 
				+||	MV	B8,$Te4[2]
			
 
				+||	MV	B9,$K[3]
			
 
				+	.endif
			
 
				+
			
 
				+	MVK	256,A0
			
 
				+||	MVK	9,B0
			
 
				+
			
 
				+	SPLOOPD	14
			
 
				+||	MVC	B0,ILC
			
 
				+||	MV	$TEA,$TEB
			
 
				+||	ADD	$TEA,A0,A30			; rcon
			
 
				+;;====================================================================
			
 
				+	LDW	*A30++[1],A31			; rcon[i]
			
 
				+||	MV	$Te4[2],$K[2]
			
 
				+||	EXTU	$K[3],EXT1,24,$Te4[0]
			
 
				+	LDBU	*${TEB}[$Te4[0]],$Te4[0]
			
 
				+||	MV	$K[3],A0
			
 
				+||	EXTU	$K[3],EXT2,24,$Te4[1]
			
 
				+	LDBU	*${TEB}[$Te4[1]],$Te4[1]
			
 
				+||	EXTU	A0,EXT3,24,A0
			
 
				+||	EXTU	$K[3],EXT0,24,$Te4[3]
			
 
				+	.if	.BIG_ENDIAN
			
 
				+	LDBU	*${TEA}[A0],$Te4[3]
			
 
				+||	LDBU	*${TEB}[$Te4[3]],A0
			
 
				+	.else
			
 
				+	LDBU	*${TEA}[A0],A0
			
 
				+||	LDBU	*${TEB}[$Te4[3]],$Te4[3]
			
 
				+	.endif
			
 
				+
			
 
				+	STW	$K[0],*$KPA++[2]
			
 
				+||	STW	$K[1],*$KPB++[2]
			
 
				+	STW	$K[2],*$KPA++[2]
			
 
				+||	STW	$K[3],*$KPB++[2]
			
 
				+
			
 
				+	XOR	A31,$K[0],$K[0]			; ^=rcon[i]
			
 
				+	.if	.BIG_ENDIAN
			
 
				+	PACK2	$Te4[0],$Te4[1],$Te4[1]
			
 
				+	PACK2	$Te4[3],A0,$Te4[3]
			
 
				+	PACKL4	$Te4[1],$Te4[3],$Te4[3]
			
 
				+	.else
			
 
				+	PACK2	$Te4[1],$Te4[0],$Te4[1]
			
 
				+	PACK2	$Te4[3],A0,$Te4[3]
			
 
				+	PACKL4	$Te4[3],$Te4[1],$Te4[3]
			
 
				+	.endif
			
 
				+	XOR	$Te4[3],$K[0],$Te4[0]		; K[0]
			
 
				+	XOR	$Te4[0],$K[1],$K[1]		; K[1]
			
 
				+	MV	$Te4[0],$K[0]
			
 
				+||	XOR	$K[1],$K[2],$Te4[2]		; K[2]
			
 
				+	XOR	$Te4[2],$K[3],$K[3]		; K[3]
			
 
				+	SPKERNEL
			
 
				+;;====================================================================
			
 
				+	BNOP	RA
			
 
				+	MV	$Te4[2],$K[2]
			
 
				+||	STW	$K[0],*$KPA++[2]
			
 
				+||	STW	$K[1],*$KPB++[2]
			
 
				+	STW	$K[2],*$KPA++[2]
			
 
				+||	STW	$K[3],*$KPB++[2]
			
 
				+	MVK	10,B0				; rounds
			
 
				+	STW	B0,*++${KPB}[15]
			
 
				+	MVK	0,RET
			
 
				+;;====================================================================
			
 
				+;;====================================================================
			
 
				+key192?:
			
 
				+	.if	.BIG_ENDIAN
			
 
				+	MV	A9,$K[0]
			
 
				+||	MV	A8,$K[1]
			
 
				+||	MV	B9,$K[2]
			
 
				+||	MV	B8,$K[3]
			
 
				+	MV	B17,$Te4[2]
			
 
				+||	MV	B16,$K[5]
			
 
				+	.else
			
 
				+	MV	A8,$K[0]
			
 
				+||	MV	A9,$K[1]
			
 
				+||	MV	B8,$K[2]
			
 
				+||	MV	B9,$K[3]
			
 
				+	MV	B16,$Te4[2]
			
 
				+||	MV	B17,$K[5]
			
 
				+	.endif
			
 
				+
			
 
				+	MVK	256,A0
			
 
				+||	MVK	6,B0
			
 
				+	MV	$TEA,$TEB
			
 
				+||	ADD	$TEA,A0,A30			; rcon
			
 
				+;;====================================================================
			
 
				+loop192?:
			
 
				+	LDW	*A30++[1],A31			; rcon[i]
			
 
				+||	MV	$Te4[2],$K[4]
			
 
				+||	EXTU	$K[5],EXT1,24,$Te4[0]
			
 
				+	LDBU	*${TEB}[$Te4[0]],$Te4[0]
			
 
				+||	MV	$K[5],A0
			
 
				+||	EXTU	$K[5],EXT2,24,$Te4[1]
			
 
				+	LDBU	*${TEB}[$Te4[1]],$Te4[1]
			
 
				+||	EXTU	A0,EXT3,24,A0
			
 
				+||	EXTU	$K[5],EXT0,24,$Te4[3]
			
 
				+	.if	.BIG_ENDIAN
			
 
				+	LDBU	*${TEA}[A0],$Te4[3]
			
 
				+||	LDBU	*${TEB}[$Te4[3]],A0
			
 
				+	.else
			
 
				+	LDBU	*${TEA}[A0],A0
			
 
				+||	LDBU	*${TEB}[$Te4[3]],$Te4[3]
			
 
				+	.endif
			
 
				+
			
 
				+	STW	$K[0],*$KPA++[2]
			
 
				+||	STW	$K[1],*$KPB++[2]
			
 
				+	STW	$K[2],*$KPA++[2]
			
 
				+||	STW	$K[3],*$KPB++[2]
			
 
				+	STW	$K[4],*$KPA++[2]
			
 
				+||	STW	$K[5],*$KPB++[2]
			
 
				+
			
 
				+	XOR	A31,$K[0],$K[0]			; ^=rcon[i]
			
 
				+	.if	.BIG_ENDIAN
			
 
				+	PACK2	$Te4[0],$Te4[1],$Te4[1]
			
 
				+||	PACK2	$Te4[3],A0,$Te4[3]
			
 
				+	PACKL4	$Te4[1],$Te4[3],$Te4[3]
			
 
				+	.else
			
 
				+	PACK2	$Te4[1],$Te4[0],$Te4[1]
			
 
				+||	PACK2	$Te4[3],A0,$Te4[3]
			
 
				+	PACKL4	$Te4[3],$Te4[1],$Te4[3]
			
 
				+	.endif
			
 
				+	BDEC	loop192?,B0
			
 
				+||	XOR	$Te4[3],$K[0],$Te4[0]		; K[0]
			
 
				+	XOR	$Te4[0],$K[1],$K[1]		; K[1]
			
 
				+	MV	$Te4[0],$K[0]
			
 
				+||	XOR	$K[1],$K[2],$Te4[2]		; K[2]
			
 
				+	XOR	$Te4[2],$K[3],$K[3]		; K[3]
			
 
				+	MV	$Te4[2],$K[2]
			
 
				+||	XOR	$K[3],$K[4],$Te4[2]		; K[4]
			
 
				+	XOR	$Te4[2],$K[5],$K[5]		; K[5]
			
 
				+;;====================================================================
			
 
				+	BNOP	RA
			
 
				+	STW	$K[0],*$KPA++[2]
			
 
				+||	STW	$K[1],*$KPB++[2]
			
 
				+	STW	$K[2],*$KPA++[2]
			
 
				+||	STW	$K[3],*$KPB++[2]
			
 
				+	MVK	12,B0				; rounds
			
 
				+	STW	B0,*++${KPB}[7]
			
 
				+	MVK	0,RET
			
 
				+;;====================================================================
			
 
				+;;====================================================================
			
 
				+key256?:
			
 
				+	.if	.BIG_ENDIAN
			
 
				+	MV	A9,$K[0]
			
 
				+||	MV	A8,$K[1]
			
 
				+||	MV	B9,$K[2]
			
 
				+||	MV	B8,$K[3]
			
 
				+	MV	B17,$K[4]
			
 
				+||	MV	B16,$K[5]
			
 
				+||	MV	B19,$Te4[2]
			
 
				+||	MV	B18,$K[7]
			
 
				+	.else
			
 
				+	MV	A8,$K[0]
			
 
				+||	MV	A9,$K[1]
			
 
				+||	MV	B8,$K[2]
			
 
				+||	MV	B9,$K[3]
			
 
				+	MV	B16,$K[4]
			
 
				+||	MV	B17,$K[5]
			
 
				+||	MV	B18,$Te4[2]
			
 
				+||	MV	B19,$K[7]
			
 
				+	.endif
			
 
				+
			
 
				+	MVK	256,A0
			
 
				+||	MVK	6,B0
			
 
				+	MV	$TEA,$TEB
			
 
				+||	ADD	$TEA,A0,A30			; rcon
			
 
				+;;====================================================================
			
 
				+loop256?:
			
 
				+	LDW	*A30++[1],A31			; rcon[i]
			
 
				+||	MV	$Te4[2],$K[6]
			
 
				+||	EXTU	$K[7],EXT1,24,$Te4[0]
			
 
				+	LDBU	*${TEB}[$Te4[0]],$Te4[0]
			
 
				+||	MV	$K[7],A0
			
 
				+||	EXTU	$K[7],EXT2,24,$Te4[1]
			
 
				+	LDBU	*${TEB}[$Te4[1]],$Te4[1]
			
 
				+||	EXTU	A0,EXT3,24,A0
			
 
				+||	EXTU	$K[7],EXT0,24,$Te4[3]
			
 
				+	.if	.BIG_ENDIAN
			
 
				+	LDBU	*${TEA}[A0],$Te4[3]
			
 
				+||	LDBU	*${TEB}[$Te4[3]],A0
			
 
				+	.else
			
 
				+	LDBU	*${TEA}[A0],A0
			
 
				+||	LDBU	*${TEB}[$Te4[3]],$Te4[3]
			
 
				+	.endif
			
 
				+
			
 
				+	STW	$K[0],*$KPA++[2]
			
 
				+||	STW	$K[1],*$KPB++[2]
			
 
				+	STW	$K[2],*$KPA++[2]
			
 
				+||	STW	$K[3],*$KPB++[2]
			
 
				+	STW	$K[4],*$KPA++[2]
			
 
				+||	STW	$K[5],*$KPB++[2]
			
 
				+	STW	$K[6],*$KPA++[2]
			
 
				+||	STW	$K[7],*$KPB++[2]
			
 
				+||	XOR	A31,$K[0],$K[0]			; ^=rcon[i]
			
 
				+	.if	.BIG_ENDIAN
			
 
				+	PACK2	$Te4[0],$Te4[1],$Te4[1]
			
 
				+||	PACK2	$Te4[3],A0,$Te4[3]
			
 
				+	PACKL4	$Te4[1],$Te4[3],$Te4[3]
			
 
				+||[!B0]	B	done256?
			
 
				+	.else
			
 
				+	PACK2	$Te4[1],$Te4[0],$Te4[1]
			
 
				+||	PACK2	$Te4[3],A0,$Te4[3]
			
 
				+	PACKL4	$Te4[3],$Te4[1],$Te4[3]
			
 
				+||[!B0]	B	done256?
			
 
				+	.endif
			
 
				+	XOR	$Te4[3],$K[0],$Te4[0]		; K[0]
			
 
				+	XOR	$Te4[0],$K[1],$K[1]		; K[1]
			
 
				+	MV	$Te4[0],$K[0]
			
 
				+||	XOR	$K[1],$K[2],$Te4[2]		; K[2]
			
 
				+	XOR	$Te4[2],$K[3],$K[3]		; K[3]
			
 
				+
			
 
				+	MV	$Te4[2],$K[2]
			
 
				+|| [B0]	EXTU	$K[3],EXT0,24,$Te4[0]
			
 
				+|| [B0]	SUB	B0,1,B0
			
 
				+	LDBU	*${TEB}[$Te4[0]],$Te4[0]
			
 
				+||	MV	$K[3],A0
			
 
				+||	EXTU	$K[3],EXT1,24,$Te4[1]
			
 
				+	LDBU	*${TEB}[$Te4[1]],$Te4[1]
			
 
				+||	EXTU	A0,EXT2,24,A0
			
 
				+||	EXTU	$K[3],EXT3,24,$Te4[3]
			
 
				+
			
 
				+	.if	.BIG_ENDIAN
			
 
				+	LDBU	*${TEA}[A0],$Te4[3]
			
 
				+||	LDBU	*${TEB}[$Te4[3]],A0
			
 
				+	NOP	3
			
 
				+	PACK2	$Te4[0],$Te4[1],$Te4[1]
			
 
				+	PACK2	$Te4[3],A0,$Te4[3]
			
 
				+||	B	loop256?
			
 
				+	PACKL4	$Te4[1],$Te4[3],$Te4[3]
			
 
				+	.else
			
 
				+	LDBU	*${TEA}[A0],A0
			
 
				+||	LDBU	*${TEB}[$Te4[3]],$Te4[3]
			
 
				+	NOP	3
			
 
				+	PACK2	$Te4[1],$Te4[0],$Te4[1]
			
 
				+	PACK2	$Te4[3],A0,$Te4[3]
			
 
				+||	B	loop256?
			
 
				+	PACKL4	$Te4[3],$Te4[1],$Te4[3]
			
 
				+	.endif
			
 
				+
			
 
				+	XOR	$Te4[3],$K[4],$Te4[0]		; K[4]
			
 
				+	XOR	$Te4[0],$K[5],$K[5]		; K[5]
			
 
				+	MV	$Te4[0],$K[4]
			
 
				+||	XOR	$K[5],$K[6],$Te4[2]		; K[6]
			
 
				+	XOR	$Te4[2],$K[7],$K[7]		; K[7]
			
 
				+;;====================================================================
			
 
				+done256?:
			
 
				+	BNOP	RA
			
 
				+	STW	$K[0],*$KPA++[2]
			
 
				+||	STW	$K[1],*$KPB++[2]
			
 
				+	STW	$K[2],*$KPA++[2]
			
 
				+||	STW	$K[3],*$KPB++[2]
			
 
				+	MVK	14,B0				; rounds
			
 
				+	STW	B0,*--${KPB}[1]
			
 
				+	MVK	0,RET
			
 
				+	.endasmfunc
			
 
				+
			
 
				+	.global	_AES_set_decrypt_key
			
 
				+_AES_set_decrypt_key:
			
 
				+	.asmfunc
			
 
				+	B	__set_encrypt_key		; guarantee local call
			
 
				+	MV	KEY,B30				; B30 is not modified
			
 
				+	MV	RA, B31				; B31 is not modified
			
 
				+	ADDKPC	ret?,RA,2
			
 
				+ret?:						; B0 holds rounds or zero
			
 
				+  [!B0]	BNOP	B31				; return if zero
			
 
				+   [B0]	SHL	B0,4,A0				; offset to last round key
			
 
				+   [B0]	SHRU	B0,1,B1
			
 
				+   [B0]	SUB	B1,1,B1
			
 
				+   [B0]	MVK	0x0000001B,B3			; AES polynomial
			
 
				+   [B0]	MVKH	0x07000000,B3
			
 
				+
			
 
				+	SPLOOPD	9				; flip round keys
			
 
				+||	MVC	B1,ILC
			
 
				+||	MV	B30,$KPA
			
 
				+||	ADD	B30,A0,$KPB
			
 
				+||	MVK	16,A0				; sizeof(round key)
			
 
				+;;====================================================================
			
 
				+	LDW	*${KPA}[0],A16
			
 
				+||	LDW	*${KPB}[0],B16
			
 
				+	LDW	*${KPA}[1],A17
			
 
				+||	LDW	*${KPB}[1],B17
			
 
				+	LDW	*${KPA}[2],A18
			
 
				+||	LDW	*${KPB}[2],B18
			
 
				+	LDW	*${KPA}[3],A19
			
 
				+||	ADD	$KPA,A0,$KPA
			
 
				+||	LDW	*${KPB}[3],B19
			
 
				+||	SUB	$KPB,A0,$KPB
			
 
				+	NOP
			
 
				+	STW	B16,*${KPA}[-4]
			
 
				+||	STW	A16,*${KPB}[4]
			
 
				+	STW	B17,*${KPA}[-3]
			
 
				+||	STW	A17,*${KPB}[5]
			
 
				+	STW	B18,*${KPA}[-2]
			
 
				+||	STW	A18,*${KPB}[6]
			
 
				+	STW	B19,*${KPA}[-1]
			
 
				+||	STW	A19,*${KPB}[7]
			
 
				+	SPKERNEL
			
 
				+;;====================================================================
			
 
				+	SUB	B0,1,B0				; skip last round
			
 
				+||	ADD	B30,A0,$KPA			; skip first round
			
 
				+||	ADD	B30,A0,$KPB
			
 
				+||	MVC	GFPGFR,B30			; save GFPGFR
			
 
				+	LDW	*${KPA}[0],$K[0]
			
 
				+||	LDW	*${KPB}[1],$K[1]
			
 
				+||	MVC	B3,GFPGFR
			
 
				+	LDW	*${KPA}[2],$K[2]
			
 
				+||	LDW	*${KPB}[3],$K[3]
			
 
				+	MVK	0x00000909,A24
			
 
				+||	MVK	0x00000B0B,B24
			
 
				+	MVKH	0x09090000,A24
			
 
				+||	MVKH	0x0B0B0000,B24
			
 
				+	MVC	B0,ILC
			
 
				+||	SUB	B0,1,B0
			
 
				+
			
 
				+	GMPY4	$K[0],A24,$Kx9[0]		; ·0x09
			
 
				+||	GMPY4	$K[1],A24,$Kx9[1]
			
 
				+||	MVK	0x00000D0D,A25
			
 
				+||	MVK	0x00000E0E,B25
			
 
				+	GMPY4	$K[2],A24,$Kx9[2]
			
 
				+||	GMPY4	$K[3],A24,$Kx9[3]
			
 
				+||	MVKH	0x0D0D0000,A25
			
 
				+||	MVKH	0x0E0E0000,B25
			
 
				+
			
 
				+	GMPY4	$K[0],B24,$KxB[0]		; ·0x0B
			
 
				+||	GMPY4	$K[1],B24,$KxB[1]
			
 
				+	GMPY4	$K[2],B24,$KxB[2]
			
 
				+||	GMPY4	$K[3],B24,$KxB[3]
			
 
				+
			
 
				+	SPLOOP	11				; InvMixColumns
			
 
				+;;====================================================================
			
 
				+	GMPY4	$K[0],A25,$KxD[0]		; ·0x0D
			
 
				+||	GMPY4	$K[1],A25,$KxD[1]
			
 
				+||	SWAP2	$Kx9[0],$Kx9[0]			; rotate by 16
			
 
				+||	SWAP2	$Kx9[1],$Kx9[1]
			
 
				+||	MV	$K[0],$s[0]			; this or DINT
			
 
				+||	MV	$K[1],$s[1]
			
 
				+|| [B0]	LDW	*${KPA}[4],$K[0]
			
 
				+|| [B0]	LDW	*${KPB}[5],$K[1]
			
 
				+	GMPY4	$K[2],A25,$KxD[2]
			
 
				+||	GMPY4	$K[3],A25,$KxD[3]
			
 
				+||	SWAP2	$Kx9[2],$Kx9[2]
			
 
				+||	SWAP2	$Kx9[3],$Kx9[3]
			
 
				+||	MV	$K[2],$s[2]
			
 
				+||	MV	$K[3],$s[3]
			
 
				+|| [B0]	LDW	*${KPA}[6],$K[2]
			
 
				+|| [B0]	LDW	*${KPB}[7],$K[3]
			
 
				+
			
 
				+	GMPY4	$s[0],B25,$KxE[0]		; ·0x0E
			
 
				+||	GMPY4	$s[1],B25,$KxE[1]
			
 
				+||	XOR	$Kx9[0],$KxB[0],$KxB[0]
			
 
				+||	XOR	$Kx9[1],$KxB[1],$KxB[1]
			
 
				+	GMPY4	$s[2],B25,$KxE[2]
			
 
				+||	GMPY4	$s[3],B25,$KxE[3]
			
 
				+||	XOR	$Kx9[2],$KxB[2],$KxB[2]
			
 
				+||	XOR	$Kx9[3],$KxB[3],$KxB[3]
			
 
				+
			
 
				+	ROTL	$KxB[0],TBL3,$KxB[0]
			
 
				+||	ROTL	$KxB[1],TBL3,$KxB[1]
			
 
				+||	SWAP2	$KxD[0],$KxD[0]			; rotate by 16
			
 
				+||	SWAP2	$KxD[1],$KxD[1]
			
 
				+	ROTL	$KxB[2],TBL3,$KxB[2]
			
 
				+||	ROTL	$KxB[3],TBL3,$KxB[3]
			
 
				+||	SWAP2	$KxD[2],$KxD[2]
			
 
				+||	SWAP2	$KxD[3],$KxD[3]
			
 
				+
			
 
				+	XOR	$KxE[0],$KxD[0],$KxE[0]
			
 
				+||	XOR	$KxE[1],$KxD[1],$KxE[1]
			
 
				+|| [B0]	GMPY4	$K[0],A24,$Kx9[0]		; ·0x09
			
 
				+|| [B0]	GMPY4	$K[1],A24,$Kx9[1]
			
 
				+||	ADDAW	$KPA,4,$KPA
			
 
				+	XOR	$KxE[2],$KxD[2],$KxE[2]
			
 
				+||	XOR	$KxE[3],$KxD[3],$KxE[3]
			
 
				+|| [B0]	GMPY4	$K[2],A24,$Kx9[2]
			
 
				+|| [B0]	GMPY4	$K[3],A24,$Kx9[3]
			
 
				+||	ADDAW	$KPB,4,$KPB
			
 
				+
			
 
				+	XOR	$KxB[0],$KxE[0],$KxE[0]
			
 
				+||	XOR	$KxB[1],$KxE[1],$KxE[1]
			
 
				+|| [B0]	GMPY4	$K[0],B24,$KxB[0]		; ·0x0B
			
 
				+|| [B0]	GMPY4	$K[1],B24,$KxB[1]
			
 
				+	XOR	$KxB[2],$KxE[2],$KxE[2]
			
 
				+||	XOR	$KxB[3],$KxE[3],$KxE[3]
			
 
				+|| [B0]	GMPY4	$K[2],B24,$KxB[2]
			
 
				+|| [B0]	GMPY4	$K[3],B24,$KxB[3]
			
 
				+||	STW	$KxE[0],*${KPA}[-4]
			
 
				+||	STW	$KxE[1],*${KPB}[-3]
			
 
				+	STW	$KxE[2],*${KPA}[-2]
			
 
				+||	STW	$KxE[3],*${KPB}[-1]
			
 
				+|| [B0]	SUB	B0,1,B0
			
 
				+	SPKERNEL
			
 
				+;;====================================================================
			
 
				+	BNOP	B31,3
			
 
				+	MVC	B30,GFPGFR			; restore GFPGFR(*)
			
 
				+	MVK	0,RET
			
 
				+	.endasmfunc
			
 
				+___
			
 
				+# (*)	Even though ABI doesn't specify GFPGFR as non-volatile, there
			
 
				+#	are code samples out there that *assume* its default value.
			
 
				+}
			
 
				+{
			
 
				+my ($inp,$out,$blocks,$key,$ivp)=("A4","B4","A6","B6","A8");
			
 
				+$code.=<<___;
			
 
				+	.global	_AES_ctr32_encrypt
			
 
				+_AES_ctr32_encrypt:
			
 
				+	.asmfunc
			
 
				+	LDNDW	*${ivp}[0],A31:A30	; load counter value
			
 
				+||	MV	$blocks,A2		; reassign $blocks
			
 
				+||	DMV	RA,$key,B27:B26		; reassign RA and $key
			
 
				+	LDNDW	*${ivp}[1],B31:B30
			
 
				+||	MVK	0,B2			; don't let __encrypt load input
			
 
				+||	MVK	0,A1			; and postpone writing output
			
 
				+	.if	.BIG_ENDIAN
			
 
				+	NOP
			
 
				+	.else
			
 
				+	NOP	4
			
 
				+	SWAP2	B31,B31			; keep least significant 32 bits
			
 
				+	SWAP4	B31,B31			; in host byte order
			
 
				+	.endif
			
 
				+ctr32_loop?:
			
 
				+   [A2]	BNOP	__encrypt
			
 
				+|| [A1]	XOR	A29,A9,A9		; input^Ek(counter)
			
 
				+|| [A1]	XOR	A28,A8,A8
			
 
				+|| [A2]	LDNDW	*INP++,A29:A28		; load input
			
 
				+  [!A2]	BNOP	B27			; return
			
 
				+|| [A1]	XOR	B29,B9,B9
			
 
				+|| [A1]	XOR	B28,B8,B8
			
 
				+|| [A2]	LDNDW	*INP++,B29:B28
			
 
				+	.if	.BIG_ENDIAN
			
 
				+   [A1]	STNDW	A9:A8,*OUT++		; save output
			
 
				+|| [A2]	DMV	A31,A30,A9:A8		; pass counter value to __encrypt
			
 
				+   [A1]	STNDW	B9:B8,*OUT++
			
 
				+|| [A2]	DMV	B31,B30,B9:B8
			
 
				+|| [A2]	ADD	B30,1,B30		; counter++
			
 
				+	.else
			
 
				+   [A1]	STNDW	A9:A8,*OUT++		; save output
			
 
				+|| [A2]	DMV	A31,A30,A9:A8
			
 
				+|| [A2]	SWAP2	B31,B0
			
 
				+|| [A2]	ADD	B31,1,B31		; counter++
			
 
				+   [A1]	STNDW	B9:B8,*OUT++
			
 
				+|| [A2]	MV	B30,B8
			
 
				+|| [A2]	SWAP4	B0,B9
			
 
				+	.endif
			
 
				+   [A2]	ADDKPC	ctr32_loop?,RA		; return to ctr32_loop?
			
 
				+|| [A2]	MV	B26,KEY			; pass $key
			
 
				+|| [A2]	SUB	A2,1,A2			; $blocks--
			
 
				+||[!A1]	MVK	1,A1
			
 
				+	NOP
			
 
				+	NOP
			
 
				+	.endasmfunc
			
 
				+___
			
 
				+}
			
 
				+# Tables are kept in endian-neutral manner
			
 
				+$code.=<<___;
			
 
				+	.sect	".const:aes_asm"
			
 
				+	.align	128
			
 
				+AES_Te:
			
 
				+	.byte	0xc6,0x63,0x63,0xa5,	0xf8,0x7c,0x7c,0x84
			
 
				+	.byte	0xee,0x77,0x77,0x99,	0xf6,0x7b,0x7b,0x8d
			
 
				+	.byte	0xff,0xf2,0xf2,0x0d,	0xd6,0x6b,0x6b,0xbd
			
 
				+	.byte	0xde,0x6f,0x6f,0xb1,	0x91,0xc5,0xc5,0x54
			
 
				+	.byte	0x60,0x30,0x30,0x50,	0x02,0x01,0x01,0x03
			
 
				+	.byte	0xce,0x67,0x67,0xa9,	0x56,0x2b,0x2b,0x7d
			
 
				+	.byte	0xe7,0xfe,0xfe,0x19,	0xb5,0xd7,0xd7,0x62
			
 
				+	.byte	0x4d,0xab,0xab,0xe6,	0xec,0x76,0x76,0x9a
			
 
				+	.byte	0x8f,0xca,0xca,0x45,	0x1f,0x82,0x82,0x9d
			
 
				+	.byte	0x89,0xc9,0xc9,0x40,	0xfa,0x7d,0x7d,0x87
			
 
				+	.byte	0xef,0xfa,0xfa,0x15,	0xb2,0x59,0x59,0xeb
			
 
				+	.byte	0x8e,0x47,0x47,0xc9,	0xfb,0xf0,0xf0,0x0b
			
 
				+	.byte	0x41,0xad,0xad,0xec,	0xb3,0xd4,0xd4,0x67
			
 
				+	.byte	0x5f,0xa2,0xa2,0xfd,	0x45,0xaf,0xaf,0xea
			
 
				+	.byte	0x23,0x9c,0x9c,0xbf,	0x53,0xa4,0xa4,0xf7
			
 
				+	.byte	0xe4,0x72,0x72,0x96,	0x9b,0xc0,0xc0,0x5b
			
 
				+	.byte	0x75,0xb7,0xb7,0xc2,	0xe1,0xfd,0xfd,0x1c
			
 
				+	.byte	0x3d,0x93,0x93,0xae,	0x4c,0x26,0x26,0x6a
			
 
				+	.byte	0x6c,0x36,0x36,0x5a,	0x7e,0x3f,0x3f,0x41
			
 
				+	.byte	0xf5,0xf7,0xf7,0x02,	0x83,0xcc,0xcc,0x4f
			
 
				+	.byte	0x68,0x34,0x34,0x5c,	0x51,0xa5,0xa5,0xf4
			
 
				+	.byte	0xd1,0xe5,0xe5,0x34,	0xf9,0xf1,0xf1,0x08
			
 
				+	.byte	0xe2,0x71,0x71,0x93,	0xab,0xd8,0xd8,0x73
			
 
				+	.byte	0x62,0x31,0x31,0x53,	0x2a,0x15,0x15,0x3f
			
 
				+	.byte	0x08,0x04,0x04,0x0c,	0x95,0xc7,0xc7,0x52
			
 
				+	.byte	0x46,0x23,0x23,0x65,	0x9d,0xc3,0xc3,0x5e
			
 
				+	.byte	0x30,0x18,0x18,0x28,	0x37,0x96,0x96,0xa1
			
 
				+	.byte	0x0a,0x05,0x05,0x0f,	0x2f,0x9a,0x9a,0xb5
			
 
				+	.byte	0x0e,0x07,0x07,0x09,	0x24,0x12,0x12,0x36
			
 
				+	.byte	0x1b,0x80,0x80,0x9b,	0xdf,0xe2,0xe2,0x3d
			
 
				+	.byte	0xcd,0xeb,0xeb,0x26,	0x4e,0x27,0x27,0x69
			
 
				+	.byte	0x7f,0xb2,0xb2,0xcd,	0xea,0x75,0x75,0x9f
			
 
				+	.byte	0x12,0x09,0x09,0x1b,	0x1d,0x83,0x83,0x9e
			
 
				+	.byte	0x58,0x2c,0x2c,0x74,	0x34,0x1a,0x1a,0x2e
			
 
				+	.byte	0x36,0x1b,0x1b,0x2d,	0xdc,0x6e,0x6e,0xb2
			
 
				+	.byte	0xb4,0x5a,0x5a,0xee,	0x5b,0xa0,0xa0,0xfb
			
 
				+	.byte	0xa4,0x52,0x52,0xf6,	0x76,0x3b,0x3b,0x4d
			
 
				+	.byte	0xb7,0xd6,0xd6,0x61,	0x7d,0xb3,0xb3,0xce
			
 
				+	.byte	0x52,0x29,0x29,0x7b,	0xdd,0xe3,0xe3,0x3e
			
 
				+	.byte	0x5e,0x2f,0x2f,0x71,	0x13,0x84,0x84,0x97
			
 
				+	.byte	0xa6,0x53,0x53,0xf5,	0xb9,0xd1,0xd1,0x68
			
 
				+	.byte	0x00,0x00,0x00,0x00,	0xc1,0xed,0xed,0x2c
			
 
				+	.byte	0x40,0x20,0x20,0x60,	0xe3,0xfc,0xfc,0x1f
			
 
				+	.byte	0x79,0xb1,0xb1,0xc8,	0xb6,0x5b,0x5b,0xed
			
 
				+	.byte	0xd4,0x6a,0x6a,0xbe,	0x8d,0xcb,0xcb,0x46
			
 
				+	.byte	0x67,0xbe,0xbe,0xd9,	0x72,0x39,0x39,0x4b
			
 
				+	.byte	0x94,0x4a,0x4a,0xde,	0x98,0x4c,0x4c,0xd4
			
 
				+	.byte	0xb0,0x58,0x58,0xe8,	0x85,0xcf,0xcf,0x4a
			
 
				+	.byte	0xbb,0xd0,0xd0,0x6b,	0xc5,0xef,0xef,0x2a
			
 
				+	.byte	0x4f,0xaa,0xaa,0xe5,	0xed,0xfb,0xfb,0x16
			
 
				+	.byte	0x86,0x43,0x43,0xc5,	0x9a,0x4d,0x4d,0xd7
			
 
				+	.byte	0x66,0x33,0x33,0x55,	0x11,0x85,0x85,0x94
			
 
				+	.byte	0x8a,0x45,0x45,0xcf,	0xe9,0xf9,0xf9,0x10
			
 
				+	.byte	0x04,0x02,0x02,0x06,	0xfe,0x7f,0x7f,0x81
			
 
				+	.byte	0xa0,0x50,0x50,0xf0,	0x78,0x3c,0x3c,0x44
			
 
				+	.byte	0x25,0x9f,0x9f,0xba,	0x4b,0xa8,0xa8,0xe3
			
 
				+	.byte	0xa2,0x51,0x51,0xf3,	0x5d,0xa3,0xa3,0xfe
			
 
				+	.byte	0x80,0x40,0x40,0xc0,	0x05,0x8f,0x8f,0x8a
			
 
				+	.byte	0x3f,0x92,0x92,0xad,	0x21,0x9d,0x9d,0xbc
			
 
				+	.byte	0x70,0x38,0x38,0x48,	0xf1,0xf5,0xf5,0x04
			
 
				+	.byte	0x63,0xbc,0xbc,0xdf,	0x77,0xb6,0xb6,0xc1
			
 
				+	.byte	0xaf,0xda,0xda,0x75,	0x42,0x21,0x21,0x63
			
 
				+	.byte	0x20,0x10,0x10,0x30,	0xe5,0xff,0xff,0x1a
			
 
				+	.byte	0xfd,0xf3,0xf3,0x0e,	0xbf,0xd2,0xd2,0x6d
			
 
				+	.byte	0x81,0xcd,0xcd,0x4c,	0x18,0x0c,0x0c,0x14
			
 
				+	.byte	0x26,0x13,0x13,0x35,	0xc3,0xec,0xec,0x2f
			
 
				+	.byte	0xbe,0x5f,0x5f,0xe1,	0x35,0x97,0x97,0xa2
			
 
				+	.byte	0x88,0x44,0x44,0xcc,	0x2e,0x17,0x17,0x39
			
 
				+	.byte	0x93,0xc4,0xc4,0x57,	0x55,0xa7,0xa7,0xf2
			
 
				+	.byte	0xfc,0x7e,0x7e,0x82,	0x7a,0x3d,0x3d,0x47
			
 
				+	.byte	0xc8,0x64,0x64,0xac,	0xba,0x5d,0x5d,0xe7
			
 
				+	.byte	0x32,0x19,0x19,0x2b,	0xe6,0x73,0x73,0x95
			
 
				+	.byte	0xc0,0x60,0x60,0xa0,	0x19,0x81,0x81,0x98
			
 
				+	.byte	0x9e,0x4f,0x4f,0xd1,	0xa3,0xdc,0xdc,0x7f
			
 
				+	.byte	0x44,0x22,0x22,0x66,	0x54,0x2a,0x2a,0x7e
			
 
				+	.byte	0x3b,0x90,0x90,0xab,	0x0b,0x88,0x88,0x83
			
 
				+	.byte	0x8c,0x46,0x46,0xca,	0xc7,0xee,0xee,0x29
			
 
				+	.byte	0x6b,0xb8,0xb8,0xd3,	0x28,0x14,0x14,0x3c
			
 
				+	.byte	0xa7,0xde,0xde,0x79,	0xbc,0x5e,0x5e,0xe2
			
 
				+	.byte	0x16,0x0b,0x0b,0x1d,	0xad,0xdb,0xdb,0x76
			
 
				+	.byte	0xdb,0xe0,0xe0,0x3b,	0x64,0x32,0x32,0x56
			
 
				+	.byte	0x74,0x3a,0x3a,0x4e,	0x14,0x0a,0x0a,0x1e
			
 
				+	.byte	0x92,0x49,0x49,0xdb,	0x0c,0x06,0x06,0x0a
			
 
				+	.byte	0x48,0x24,0x24,0x6c,	0xb8,0x5c,0x5c,0xe4
			
 
				+	.byte	0x9f,0xc2,0xc2,0x5d,	0xbd,0xd3,0xd3,0x6e
			
 
				+	.byte	0x43,0xac,0xac,0xef,	0xc4,0x62,0x62,0xa6
			
 
				+	.byte	0x39,0x91,0x91,0xa8,	0x31,0x95,0x95,0xa4
			
 
				+	.byte	0xd3,0xe4,0xe4,0x37,	0xf2,0x79,0x79,0x8b
			
 
				+	.byte	0xd5,0xe7,0xe7,0x32,	0x8b,0xc8,0xc8,0x43
			
 
				+	.byte	0x6e,0x37,0x37,0x59,	0xda,0x6d,0x6d,0xb7
			
 
				+	.byte	0x01,0x8d,0x8d,0x8c,	0xb1,0xd5,0xd5,0x64
			
 
				+	.byte	0x9c,0x4e,0x4e,0xd2,	0x49,0xa9,0xa9,0xe0
			
 
				+	.byte	0xd8,0x6c,0x6c,0xb4,	0xac,0x56,0x56,0xfa
			
 
				+	.byte	0xf3,0xf4,0xf4,0x07,	0xcf,0xea,0xea,0x25
			
 
				+	.byte	0xca,0x65,0x65,0xaf,	0xf4,0x7a,0x7a,0x8e
			
 
				+	.byte	0x47,0xae,0xae,0xe9,	0x10,0x08,0x08,0x18
			
 
				+	.byte	0x6f,0xba,0xba,0xd5,	0xf0,0x78,0x78,0x88
			
 
				+	.byte	0x4a,0x25,0x25,0x6f,	0x5c,0x2e,0x2e,0x72
			
 
				+	.byte	0x38,0x1c,0x1c,0x24,	0x57,0xa6,0xa6,0xf1
			
 
				+	.byte	0x73,0xb4,0xb4,0xc7,	0x97,0xc6,0xc6,0x51
			
 
				+	.byte	0xcb,0xe8,0xe8,0x23,	0xa1,0xdd,0xdd,0x7c
			
 
				+	.byte	0xe8,0x74,0x74,0x9c,	0x3e,0x1f,0x1f,0x21
			
 
				+	.byte	0x96,0x4b,0x4b,0xdd,	0x61,0xbd,0xbd,0xdc
			
 
				+	.byte	0x0d,0x8b,0x8b,0x86,	0x0f,0x8a,0x8a,0x85
			
 
				+	.byte	0xe0,0x70,0x70,0x90,	0x7c,0x3e,0x3e,0x42
			
 
				+	.byte	0x71,0xb5,0xb5,0xc4,	0xcc,0x66,0x66,0xaa
			
 
				+	.byte	0x90,0x48,0x48,0xd8,	0x06,0x03,0x03,0x05
			
 
				+	.byte	0xf7,0xf6,0xf6,0x01,	0x1c,0x0e,0x0e,0x12
			
 
				+	.byte	0xc2,0x61,0x61,0xa3,	0x6a,0x35,0x35,0x5f
			
 
				+	.byte	0xae,0x57,0x57,0xf9,	0x69,0xb9,0xb9,0xd0
			
 
				+	.byte	0x17,0x86,0x86,0x91,	0x99,0xc1,0xc1,0x58
			
 
				+	.byte	0x3a,0x1d,0x1d,0x27,	0x27,0x9e,0x9e,0xb9
			
 
				+	.byte	0xd9,0xe1,0xe1,0x38,	0xeb,0xf8,0xf8,0x13
			
 
				+	.byte	0x2b,0x98,0x98,0xb3,	0x22,0x11,0x11,0x33
			
 
				+	.byte	0xd2,0x69,0x69,0xbb,	0xa9,0xd9,0xd9,0x70
			
 
				+	.byte	0x07,0x8e,0x8e,0x89,	0x33,0x94,0x94,0xa7
			
 
				+	.byte	0x2d,0x9b,0x9b,0xb6,	0x3c,0x1e,0x1e,0x22
			
 
				+	.byte	0x15,0x87,0x87,0x92,	0xc9,0xe9,0xe9,0x20
			
 
				+	.byte	0x87,0xce,0xce,0x49,	0xaa,0x55,0x55,0xff
			
 
				+	.byte	0x50,0x28,0x28,0x78,	0xa5,0xdf,0xdf,0x7a
			
 
				+	.byte	0x03,0x8c,0x8c,0x8f,	0x59,0xa1,0xa1,0xf8
			
 
				+	.byte	0x09,0x89,0x89,0x80,	0x1a,0x0d,0x0d,0x17
			
 
				+	.byte	0x65,0xbf,0xbf,0xda,	0xd7,0xe6,0xe6,0x31
			
 
				+	.byte	0x84,0x42,0x42,0xc6,	0xd0,0x68,0x68,0xb8
			
 
				+	.byte	0x82,0x41,0x41,0xc3,	0x29,0x99,0x99,0xb0
			
 
				+	.byte	0x5a,0x2d,0x2d,0x77,	0x1e,0x0f,0x0f,0x11
			
 
				+	.byte	0x7b,0xb0,0xb0,0xcb,	0xa8,0x54,0x54,0xfc
			
 
				+	.byte	0x6d,0xbb,0xbb,0xd6,	0x2c,0x16,0x16,0x3a
			
 
				+AES_Te4:
			
 
				+	.byte	0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5
			
 
				+	.byte	0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76
			
 
				+	.byte	0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0
			
 
				+	.byte	0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0
			
 
				+	.byte	0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc
			
 
				+	.byte	0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15
			
 
				+	.byte	0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a
			
 
				+	.byte	0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75
			
 
				+	.byte	0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0
			
 
				+	.byte	0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84
			
 
				+	.byte	0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b
			
 
				+	.byte	0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf
			
 
				+	.byte	0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85
			
 
				+	.byte	0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8
			
 
				+	.byte	0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5
			
 
				+	.byte	0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2
			
 
				+	.byte	0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17
			
 
				+	.byte	0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73
			
 
				+	.byte	0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88
			
 
				+	.byte	0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb
			
 
				+	.byte	0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c
			
 
				+	.byte	0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79
			
 
				+	.byte	0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9
			
 
				+	.byte	0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08
			
 
				+	.byte	0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6
			
 
				+	.byte	0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a
			
 
				+	.byte	0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e
			
 
				+	.byte	0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e
			
 
				+	.byte	0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94
			
 
				+	.byte	0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf
			
 
				+	.byte	0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68
			
 
				+	.byte	0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16
			
 
				+rcon:
			
 
				+	.byte	0x01,0x00,0x00,0x00,	0x02,0x00,0x00,0x00
			
 
				+	.byte	0x04,0x00,0x00,0x00,	0x08,0x00,0x00,0x00
			
 
				+	.byte	0x10,0x00,0x00,0x00,	0x20,0x00,0x00,0x00
			
 
				+	.byte	0x40,0x00,0x00,0x00,	0x80,0x00,0x00,0x00
			
 
				+	.byte	0x1B,0x00,0x00,0x00,	0x36,0x00,0x00,0x00
			
 
				+	.align	128
			
 
				+AES_Td:
			
 
				+	.byte	0x51,0xf4,0xa7,0x50,	0x7e,0x41,0x65,0x53
			
 
				+	.byte	0x1a,0x17,0xa4,0xc3,	0x3a,0x27,0x5e,0x96
			
 
				+	.byte	0x3b,0xab,0x6b,0xcb,	0x1f,0x9d,0x45,0xf1
			
 
				+	.byte	0xac,0xfa,0x58,0xab,	0x4b,0xe3,0x03,0x93
			
 
				+	.byte	0x20,0x30,0xfa,0x55,	0xad,0x76,0x6d,0xf6
			
 
				+	.byte	0x88,0xcc,0x76,0x91,	0xf5,0x02,0x4c,0x25
			
 
				+	.byte	0x4f,0xe5,0xd7,0xfc,	0xc5,0x2a,0xcb,0xd7
			
 
				+	.byte	0x26,0x35,0x44,0x80,	0xb5,0x62,0xa3,0x8f
			
 
				+	.byte	0xde,0xb1,0x5a,0x49,	0x25,0xba,0x1b,0x67
			
 
				+	.byte	0x45,0xea,0x0e,0x98,	0x5d,0xfe,0xc0,0xe1
			
 
				+	.byte	0xc3,0x2f,0x75,0x02,	0x81,0x4c,0xf0,0x12
			
 
				+	.byte	0x8d,0x46,0x97,0xa3,	0x6b,0xd3,0xf9,0xc6
			
 
				+	.byte	0x03,0x8f,0x5f,0xe7,	0x15,0x92,0x9c,0x95
			
 
				+	.byte	0xbf,0x6d,0x7a,0xeb,	0x95,0x52,0x59,0xda
			
 
				+	.byte	0xd4,0xbe,0x83,0x2d,	0x58,0x74,0x21,0xd3
			
 
				+	.byte	0x49,0xe0,0x69,0x29,	0x8e,0xc9,0xc8,0x44
			
 
				+	.byte	0x75,0xc2,0x89,0x6a,	0xf4,0x8e,0x79,0x78
			
 
				+	.byte	0x99,0x58,0x3e,0x6b,	0x27,0xb9,0x71,0xdd
			
 
				+	.byte	0xbe,0xe1,0x4f,0xb6,	0xf0,0x88,0xad,0x17
			
 
				+	.byte	0xc9,0x20,0xac,0x66,	0x7d,0xce,0x3a,0xb4
			
 
				+	.byte	0x63,0xdf,0x4a,0x18,	0xe5,0x1a,0x31,0x82
			
 
				+	.byte	0x97,0x51,0x33,0x60,	0x62,0x53,0x7f,0x45
			
 
				+	.byte	0xb1,0x64,0x77,0xe0,	0xbb,0x6b,0xae,0x84
			
 
				+	.byte	0xfe,0x81,0xa0,0x1c,	0xf9,0x08,0x2b,0x94
			
 
				+	.byte	0x70,0x48,0x68,0x58,	0x8f,0x45,0xfd,0x19
			
 
				+	.byte	0x94,0xde,0x6c,0x87,	0x52,0x7b,0xf8,0xb7
			
 
				+	.byte	0xab,0x73,0xd3,0x23,	0x72,0x4b,0x02,0xe2
			
 
				+	.byte	0xe3,0x1f,0x8f,0x57,	0x66,0x55,0xab,0x2a
			
 
				+	.byte	0xb2,0xeb,0x28,0x07,	0x2f,0xb5,0xc2,0x03
			
 
				+	.byte	0x86,0xc5,0x7b,0x9a,	0xd3,0x37,0x08,0xa5
			
 
				+	.byte	0x30,0x28,0x87,0xf2,	0x23,0xbf,0xa5,0xb2
			
 
				+	.byte	0x02,0x03,0x6a,0xba,	0xed,0x16,0x82,0x5c
			
 
				+	.byte	0x8a,0xcf,0x1c,0x2b,	0xa7,0x79,0xb4,0x92
			
 
				+	.byte	0xf3,0x07,0xf2,0xf0,	0x4e,0x69,0xe2,0xa1
			
 
				+	.byte	0x65,0xda,0xf4,0xcd,	0x06,0x05,0xbe,0xd5
			
 
				+	.byte	0xd1,0x34,0x62,0x1f,	0xc4,0xa6,0xfe,0x8a
			
 
				+	.byte	0x34,0x2e,0x53,0x9d,	0xa2,0xf3,0x55,0xa0
			
 
				+	.byte	0x05,0x8a,0xe1,0x32,	0xa4,0xf6,0xeb,0x75
			
 
				+	.byte	0x0b,0x83,0xec,0x39,	0x40,0x60,0xef,0xaa
			
 
				+	.byte	0x5e,0x71,0x9f,0x06,	0xbd,0x6e,0x10,0x51
			
 
				+	.byte	0x3e,0x21,0x8a,0xf9,	0x96,0xdd,0x06,0x3d
			
 
				+	.byte	0xdd,0x3e,0x05,0xae,	0x4d,0xe6,0xbd,0x46
			
 
				+	.byte	0x91,0x54,0x8d,0xb5,	0x71,0xc4,0x5d,0x05
			
 
				+	.byte	0x04,0x06,0xd4,0x6f,	0x60,0x50,0x15,0xff
			
 
				+	.byte	0x19,0x98,0xfb,0x24,	0xd6,0xbd,0xe9,0x97
			
 
				+	.byte	0x89,0x40,0x43,0xcc,	0x67,0xd9,0x9e,0x77
			
 
				+	.byte	0xb0,0xe8,0x42,0xbd,	0x07,0x89,0x8b,0x88
			
 
				+	.byte	0xe7,0x19,0x5b,0x38,	0x79,0xc8,0xee,0xdb
			
 
				+	.byte	0xa1,0x7c,0x0a,0x47,	0x7c,0x42,0x0f,0xe9
			
 
				+	.byte	0xf8,0x84,0x1e,0xc9,	0x00,0x00,0x00,0x00
			
 
				+	.byte	0x09,0x80,0x86,0x83,	0x32,0x2b,0xed,0x48
			
 
				+	.byte	0x1e,0x11,0x70,0xac,	0x6c,0x5a,0x72,0x4e
			
 
				+	.byte	0xfd,0x0e,0xff,0xfb,	0x0f,0x85,0x38,0x56
			
 
				+	.byte	0x3d,0xae,0xd5,0x1e,	0x36,0x2d,0x39,0x27
			
 
				+	.byte	0x0a,0x0f,0xd9,0x64,	0x68,0x5c,0xa6,0x21
			
 
				+	.byte	0x9b,0x5b,0x54,0xd1,	0x24,0x36,0x2e,0x3a
			
 
				+	.byte	0x0c,0x0a,0x67,0xb1,	0x93,0x57,0xe7,0x0f
			
 
				+	.byte	0xb4,0xee,0x96,0xd2,	0x1b,0x9b,0x91,0x9e
			
 
				+	.byte	0x80,0xc0,0xc5,0x4f,	0x61,0xdc,0x20,0xa2
			
 
				+	.byte	0x5a,0x77,0x4b,0x69,	0x1c,0x12,0x1a,0x16
			
 
				+	.byte	0xe2,0x93,0xba,0x0a,	0xc0,0xa0,0x2a,0xe5
			
 
				+	.byte	0x3c,0x22,0xe0,0x43,	0x12,0x1b,0x17,0x1d
			
 
				+	.byte	0x0e,0x09,0x0d,0x0b,	0xf2,0x8b,0xc7,0xad
			
 
				+	.byte	0x2d,0xb6,0xa8,0xb9,	0x14,0x1e,0xa9,0xc8
			
 
				+	.byte	0x57,0xf1,0x19,0x85,	0xaf,0x75,0x07,0x4c
			
 
				+	.byte	0xee,0x99,0xdd,0xbb,	0xa3,0x7f,0x60,0xfd
			
 
				+	.byte	0xf7,0x01,0x26,0x9f,	0x5c,0x72,0xf5,0xbc
			
 
				+	.byte	0x44,0x66,0x3b,0xc5,	0x5b,0xfb,0x7e,0x34
			
 
				+	.byte	0x8b,0x43,0x29,0x76,	0xcb,0x23,0xc6,0xdc
			
 
				+	.byte	0xb6,0xed,0xfc,0x68,	0xb8,0xe4,0xf1,0x63
			
 
				+	.byte	0xd7,0x31,0xdc,0xca,	0x42,0x63,0x85,0x10
			
 
				+	.byte	0x13,0x97,0x22,0x40,	0x84,0xc6,0x11,0x20
			
 
				+	.byte	0x85,0x4a,0x24,0x7d,	0xd2,0xbb,0x3d,0xf8
			
 
				+	.byte	0xae,0xf9,0x32,0x11,	0xc7,0x29,0xa1,0x6d
			
 
				+	.byte	0x1d,0x9e,0x2f,0x4b,	0xdc,0xb2,0x30,0xf3
			
 
				+	.byte	0x0d,0x86,0x52,0xec,	0x77,0xc1,0xe3,0xd0
			
 
				+	.byte	0x2b,0xb3,0x16,0x6c,	0xa9,0x70,0xb9,0x99
			
 
				+	.byte	0x11,0x94,0x48,0xfa,	0x47,0xe9,0x64,0x22
			
 
				+	.byte	0xa8,0xfc,0x8c,0xc4,	0xa0,0xf0,0x3f,0x1a
			
 
				+	.byte	0x56,0x7d,0x2c,0xd8,	0x22,0x33,0x90,0xef
			
 
				+	.byte	0x87,0x49,0x4e,0xc7,	0xd9,0x38,0xd1,0xc1
			
 
				+	.byte	0x8c,0xca,0xa2,0xfe,	0x98,0xd4,0x0b,0x36
			
 
				+	.byte	0xa6,0xf5,0x81,0xcf,	0xa5,0x7a,0xde,0x28
			
 
				+	.byte	0xda,0xb7,0x8e,0x26,	0x3f,0xad,0xbf,0xa4
			
 
				+	.byte	0x2c,0x3a,0x9d,0xe4,	0x50,0x78,0x92,0x0d
			
 
				+	.byte	0x6a,0x5f,0xcc,0x9b,	0x54,0x7e,0x46,0x62
			
 
				+	.byte	0xf6,0x8d,0x13,0xc2,	0x90,0xd8,0xb8,0xe8
			
 
				+	.byte	0x2e,0x39,0xf7,0x5e,	0x82,0xc3,0xaf,0xf5
			
 
				+	.byte	0x9f,0x5d,0x80,0xbe,	0x69,0xd0,0x93,0x7c
			
 
				+	.byte	0x6f,0xd5,0x2d,0xa9,	0xcf,0x25,0x12,0xb3
			
 
				+	.byte	0xc8,0xac,0x99,0x3b,	0x10,0x18,0x7d,0xa7
			
 
				+	.byte	0xe8,0x9c,0x63,0x6e,	0xdb,0x3b,0xbb,0x7b
			
 
				+	.byte	0xcd,0x26,0x78,0x09,	0x6e,0x59,0x18,0xf4
			
 
				+	.byte	0xec,0x9a,0xb7,0x01,	0x83,0x4f,0x9a,0xa8
			
 
				+	.byte	0xe6,0x95,0x6e,0x65,	0xaa,0xff,0xe6,0x7e
			
 
				+	.byte	0x21,0xbc,0xcf,0x08,	0xef,0x15,0xe8,0xe6
			
 
				+	.byte	0xba,0xe7,0x9b,0xd9,	0x4a,0x6f,0x36,0xce
			
 
				+	.byte	0xea,0x9f,0x09,0xd4,	0x29,0xb0,0x7c,0xd6
			
 
				+	.byte	0x31,0xa4,0xb2,0xaf,	0x2a,0x3f,0x23,0x31
			
 
				+	.byte	0xc6,0xa5,0x94,0x30,	0x35,0xa2,0x66,0xc0
			
 
				+	.byte	0x74,0x4e,0xbc,0x37,	0xfc,0x82,0xca,0xa6
			
 
				+	.byte	0xe0,0x90,0xd0,0xb0,	0x33,0xa7,0xd8,0x15
			
 
				+	.byte	0xf1,0x04,0x98,0x4a,	0x41,0xec,0xda,0xf7
			
 
				+	.byte	0x7f,0xcd,0x50,0x0e,	0x17,0x91,0xf6,0x2f
			
 
				+	.byte	0x76,0x4d,0xd6,0x8d,	0x43,0xef,0xb0,0x4d
			
 
				+	.byte	0xcc,0xaa,0x4d,0x54,	0xe4,0x96,0x04,0xdf
			
 
				+	.byte	0x9e,0xd1,0xb5,0xe3,	0x4c,0x6a,0x88,0x1b
			
 
				+	.byte	0xc1,0x2c,0x1f,0xb8,	0x46,0x65,0x51,0x7f
			
 
				+	.byte	0x9d,0x5e,0xea,0x04,	0x01,0x8c,0x35,0x5d
			
 
				+	.byte	0xfa,0x87,0x74,0x73,	0xfb,0x0b,0x41,0x2e
			
 
				+	.byte	0xb3,0x67,0x1d,0x5a,	0x92,0xdb,0xd2,0x52
			
 
				+	.byte	0xe9,0x10,0x56,0x33,	0x6d,0xd6,0x47,0x13
			
 
				+	.byte	0x9a,0xd7,0x61,0x8c,	0x37,0xa1,0x0c,0x7a
			
 
				+	.byte	0x59,0xf8,0x14,0x8e,	0xeb,0x13,0x3c,0x89
			
 
				+	.byte	0xce,0xa9,0x27,0xee,	0xb7,0x61,0xc9,0x35
			
 
				+	.byte	0xe1,0x1c,0xe5,0xed,	0x7a,0x47,0xb1,0x3c
			
 
				+	.byte	0x9c,0xd2,0xdf,0x59,	0x55,0xf2,0x73,0x3f
			
 
				+	.byte	0x18,0x14,0xce,0x79,	0x73,0xc7,0x37,0xbf
			
 
				+	.byte	0x53,0xf7,0xcd,0xea,	0x5f,0xfd,0xaa,0x5b
			
 
				+	.byte	0xdf,0x3d,0x6f,0x14,	0x78,0x44,0xdb,0x86
			
 
				+	.byte	0xca,0xaf,0xf3,0x81,	0xb9,0x68,0xc4,0x3e
			
 
				+	.byte	0x38,0x24,0x34,0x2c,	0xc2,0xa3,0x40,0x5f
			
 
				+	.byte	0x16,0x1d,0xc3,0x72,	0xbc,0xe2,0x25,0x0c
			
 
				+	.byte	0x28,0x3c,0x49,0x8b,	0xff,0x0d,0x95,0x41
			
 
				+	.byte	0x39,0xa8,0x01,0x71,	0x08,0x0c,0xb3,0xde
			
 
				+	.byte	0xd8,0xb4,0xe4,0x9c,	0x64,0x56,0xc1,0x90
			
 
				+	.byte	0x7b,0xcb,0x84,0x61,	0xd5,0x32,0xb6,0x70
			
 
				+	.byte	0x48,0x6c,0x5c,0x74,	0xd0,0xb8,0x57,0x42
			
 
				+AES_Td4:
			
 
				+	.byte	0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38
			
 
				+	.byte	0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb
			
 
				+	.byte	0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87
			
 
				+	.byte	0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb
			
 
				+	.byte	0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d
			
 
				+	.byte	0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e
			
 
				+	.byte	0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2
			
 
				+	.byte	0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25
			
 
				+	.byte	0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16
			
 
				+	.byte	0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92
			
 
				+	.byte	0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda
			
 
				+	.byte	0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84
			
 
				+	.byte	0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a
			
 
				+	.byte	0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06
			
 
				+	.byte	0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02
			
 
				+	.byte	0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b
			
 
				+	.byte	0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea
			
 
				+	.byte	0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73
			
 
				+	.byte	0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85
			
 
				+	.byte	0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e
			
 
				+	.byte	0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89
			
 
				+	.byte	0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b
			
 
				+	.byte	0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20
			
 
				+	.byte	0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4
			
 
				+	.byte	0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31
			
 
				+	.byte	0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f
			
 
				+	.byte	0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d
			
 
				+	.byte	0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef
			
 
				+	.byte	0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0
			
 
				+	.byte	0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61
			
 
				+	.byte	0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26
			
 
				+	.byte	0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d
			
 
				+	.cstring "AES for C64x+, CRYPTOGAMS by <appro\@openssl.org>"
			
 
				+	.align	4
			
 
				+___
			
 
				+
			
 
				+print $code;
			
--- a/crypto/bn/asm/bn-c64xplus.asm
+++ b/crypto/bn/asm/bn-c64xplus.asm
@@ -0,0 +1,333 @@
 
				+;;====================================================================
			
 
				+;; Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
			
 
				+;; project.
			
 
				+;;
			
 
				+;; Rights for redistribution and usage in source and binary forms are
			
 
				+;; granted according to the OpenSSL license. Warranty of any kind is
			
 
				+;; disclaimed.
			
 
				+;;====================================================================
			
 
				+;; Compiler-generated multiply-n-add SPLOOP runs at 12*n cycles, n
			
 
				+;; being the number of 32-bit words, addition - 8*n. Corresponding 4x
			
 
				+;; unrolled SPLOOP-free loops - at ~8*n and ~5*n. Below assembler
			
 
				+;; SPLOOPs spin at ... 2*n cycles [plus epilogue].
			
 
				+;;====================================================================
			
 
				+	.text
			
 
				+
			
 
				+	.asg	B3,RA
			
 
				+	.asg	A4,ARG0
			
 
				+	.asg	B4,ARG1
			
 
				+	.asg	A6,ARG2
			
 
				+	.asg	B6,ARG3
			
 
				+	.asg	A8,ARG4
			
 
				+	.asg	B8,ARG5
			
 
				+	.asg	A4,RET
			
 
				+	.asg	A15,FP
			
 
				+	.asg	B14,DP
			
 
				+	.asg	B15,SP
			
 
				+
			
 
				+	.global	_bn_mul_add_words
			
 
				+_bn_mul_add_words:
			
 
				+	.asmfunc
			
 
				+	MV	ARG2,B0
			
 
				+  [!B0]	BNOP	RA
			
 
				+||[!B0]	MVK	0,RET
			
 
				+   [B0]	MVC	B0,ILC
			
 
				+   [B0]	ZERO	A19		; high part of accumulator
			
 
				+|| [B0]	MV	ARG0,A2
			
 
				+|| [B0]	MV	ARG3,A3
			
 
				+	NOP	3
			
 
				+
			
 
				+	SPLOOP	2		; 2*n+10
			
 
				+;;====================================================================
			
 
				+	LDW	*ARG1++,B7	; ap[i]
			
 
				+	NOP	3
			
 
				+	LDW	*ARG0++,A7	; rp[i]
			
 
				+	MPY32U	B7,A3,A17:A16
			
 
				+	NOP	3		; [2,0] in epilogue
			
 
				+	ADDU	A16,A7,A21:A20
			
 
				+	ADDU	A19,A21:A20,A19:A18
			
 
				+||	MV.S	A17,A23
			
 
				+	SPKERNEL 2,1		; leave slot for "return value"
			
 
				+||	STW	A18,*A2++	; rp[i]
			
 
				+||	ADD	A19,A23,A19
			
 
				+;;====================================================================
			
 
				+	BNOP	RA,4
			
 
				+	MV	A19,RET		; return value
			
 
				+	.endasmfunc
			
 
				+
			
 
				+	.global	_bn_mul_words
			
 
				+_bn_mul_words:
			
 
				+	.asmfunc
			
 
				+	MV	ARG2,B0
			
 
				+  [!B0]	BNOP	RA
			
 
				+||[!B0]	MVK	0,RET
			
 
				+   [B0]	MVC	B0,ILC
			
 
				+   [B0]	ZERO	A19		; high part of accumulator
			
 
				+	NOP	3
			
 
				+
			
 
				+	SPLOOP	2		; 2*n+10
			
 
				+;;====================================================================
			
 
				+	LDW	*ARG1++,A7	; ap[i]
			
 
				+	NOP	4
			
 
				+	MPY32U	A7,ARG3,A17:A16
			
 
				+	NOP	4		; [2,0] in epiloque
			
 
				+	ADDU	A19,A16,A19:A18
			
 
				+||	MV.S	A17,A21
			
 
				+	SPKERNEL 2,1		; leave slot for "return value"
			
 
				+||	STW	A18,*ARG0++	; rp[i]
			
 
				+||	ADD.L	A19,A21,A19
			
 
				+;;====================================================================
			
 
				+	BNOP	RA,4
			
 
				+	MV	A19,RET		; return value
			
 
				+	.endasmfunc
			
 
				+
			
 
				+	.global	_bn_sqr_words
			
 
				+_bn_sqr_words:
			
 
				+	.asmfunc
			
 
				+	MV	ARG2,B0
			
 
				+  [!B0]	BNOP	RA
			
 
				+||[!B0]	MVK	0,RET
			
 
				+   [B0]	MVC	B0,ILC
			
 
				+   [B0]	MV	ARG0,B2
			
 
				+|| [B0]	ADD	4,ARG0,ARG0
			
 
				+	NOP	3
			
 
				+
			
 
				+	SPLOOP	2		; 2*n+10
			
 
				+;;====================================================================
			
 
				+	LDW	*ARG1++,B7	; ap[i]
			
 
				+	NOP	4
			
 
				+	MPY32U	B7,B7,B1:B0
			
 
				+	NOP	3		; [2,0] in epilogue
			
 
				+	STW	B0,*B2++(8)	; rp[2*i]
			
 
				+	MV	B1,A1
			
 
				+	SPKERNEL 2,0		; fully overlap BNOP RA,5
			
 
				+||	STW	A1,*ARG0++(8)	; rp[2*i+1]
			
 
				+;;====================================================================
			
 
				+	BNOP	RA,5
			
 
				+	.endasmfunc
			
 
				+
			
 
				+	.global	_bn_add_words
			
 
				+_bn_add_words:
			
 
				+	.asmfunc
			
 
				+	MV	ARG3,B0
			
 
				+  [!B0]	BNOP	RA
			
 
				+||[!B0]	MVK	0,RET
			
 
				+   [B0]	MVC	B0,ILC
			
 
				+   [B0]	ZERO	A1		; carry flag
			
 
				+|| [B0]	MV	ARG0,A3
			
 
				+	NOP	3
			
 
				+
			
 
				+	SPLOOP	2		; 2*n+6
			
 
				+;;====================================================================
			
 
				+	LDW	*ARG2++,A7	; bp[i]
			
 
				+||	LDW	*ARG1++,B7	; ap[i]
			
 
				+	NOP	4
			
 
				+	ADDU	A7,B7,A9:A8
			
 
				+	ADDU	A1,A9:A8,A1:A0
			
 
				+	SPKERNEL 0,0		; fully overlap BNOP RA,5
			
 
				+||	STW	A0,*A3++	; write result
			
 
				+||	MV	A1,RET		; keep carry flag in RET
			
 
				+;;====================================================================
			
 
				+	BNOP	RA,5
			
 
				+	.endasmfunc
			
 
				+
			
 
				+	.global	_bn_sub_words
			
 
				+_bn_sub_words:
			
 
				+	.asmfunc
			
 
				+	MV	ARG3,B0
			
 
				+  [!B0]	BNOP	RA
			
 
				+||[!B0]	MVK	0,RET
			
 
				+   [B0]	MVC	B0,ILC
			
 
				+   [B0]	ZERO	A2		; borrow flag
			
 
				+|| [B0]	MV	ARG0,A3
			
 
				+	NOP	3
			
 
				+
			
 
				+	SPLOOP	2		; 2*n+6
			
 
				+;;====================================================================
			
 
				+	LDW	*ARG2++,A7	; bp[i]
			
 
				+||	LDW	*ARG1++,B7	; ap[i]
			
 
				+	NOP	4
			
 
				+	SUBU	B7,A7,A1:A0
			
 
				+  [A2]	SUB	A1:A0,1,A1:A0
			
 
				+	SPKERNEL 0,1		; leave slot for "return borrow flag"
			
 
				+||	STW	A0,*A3++	; write result
			
 
				+||	AND	1,A1,A2		; pass on borrow flag
			
 
				+;;====================================================================
			
 
				+	BNOP	RA,4
			
 
				+	AND	1,A1,RET	; return borrow flag
			
 
				+	.endasmfunc
			
 
				+
			
 
				+	.global	_bn_div_words
			
 
				+	.global	__divull
			
 
				+_bn_div_words:
			
 
				+	.asmfunc
			
 
				+	CALLP	__divull,A3	; jump to rts64plus.lib
			
 
				+||	MV	ARG0,A5
			
 
				+||	MV	ARG1,ARG0
			
 
				+||	MV	ARG2,ARG1
			
 
				+||	ZERO	B5
			
 
				+	.endasmfunc
			
 
				+
			
 
				+;;====================================================================
			
 
				+;; Not really Comba algorithm, just straightforward NxM... Dedicated
			
 
				+;; fully unrolled real Comba implementations are asymptotically 2x
			
 
				+;; faster, but naturally larger undertaking. Purpose of this exercise
			
 
				+;; was rather to learn to master nested SPLOOPs...
			
 
				+;;====================================================================
			
 
				+	.global	_bn_sqr_comba8
			
 
				+	.global	_bn_mul_comba8
			
 
				+_bn_sqr_comba8:
			
 
				+	MV	ARG1,ARG2
			
 
				+_bn_mul_comba8:
			
 
				+	.asmfunc
			
 
				+	MVK	8,B0		; N, RILC
			
 
				+||	MVK	8,A0		; M, outer loop counter
			
 
				+||	MV	ARG1,A5		; copy ap
			
 
				+||	MV	ARG0,B4		; copy rp
			
 
				+||	ZERO	B19		; high part of accumulator
			
 
				+	MVC	B0,RILC
			
 
				+||	SUB	B0,2,B1		; N-2, initial ILC
			
 
				+||	SUB	B0,1,B2		; const B2=N-1
			
 
				+||	LDW	*A5++,B6	; ap[0]
			
 
				+||	MV	A0,A3		; const A3=M
			
 
				+sploopNxM?:			; for best performance arrange M<=N
			
 
				+   [A0]	SPLOOPD	2		; 2*n+10
			
 
				+||	MVC	B1,ILC
			
 
				+||	ADDAW	B4,B0,B5
			
 
				+||	ZERO	B7
			
 
				+||	LDW	*A5++,A9	; pre-fetch ap[1]
			
 
				+||	ZERO	A1
			
 
				+||	SUB	A0,1,A0
			
 
				+;;====================================================================
			
 
				+;; SPLOOP from bn_mul_add_words, but with flipped A<>B register files.
			
 
				+;; This is because of Advisory 15 from TI publication SPRZ247I.
			
 
				+	LDW	*ARG2++,A7	; bp[i]
			
 
				+	NOP	3
			
 
				+   [A1]	LDW	*B5++,B7	; rp[i]
			
 
				+	MPY32U	A7,B6,B17:B16
			
 
				+	NOP	3
			
 
				+	ADDU	B16,B7,B21:B20
			
 
				+	ADDU	B19,B21:B20,B19:B18
			
 
				+||	MV.S	B17,B23
			
 
				+	SPKERNEL
			
 
				+||	STW	B18,*B4++	; rp[i]
			
 
				+||	ADD.S	B19,B23,B19
			
 
				+;;====================================================================
			
 
				+outer?:				; m*2*(n+1)+10
			
 
				+	SUBAW	ARG2,A3,ARG2	; rewind bp to bp[0]
			
 
				+	SPMASKR
			
 
				+||	CMPGT	A0,1,A2		; done pre-fetching ap[i+1]?
			
 
				+	MVD	A9,B6		; move through .M unit(*)
			
 
				+   [A2]	LDW	*A5++,A9	; pre-fetch ap[i+1]
			
 
				+	SUBAW	B5,B2,B5	; rewind rp to rp[1]
			
 
				+	MVK	1,A1
			
 
				+   [A0]	BNOP.S1	outer?,4
			
 
				+|| [A0]	SUB.L	A0,1,A0
			
 
				+	STW	B19,*B4--[B2]	; rewind rp tp rp[1]
			
 
				+||	ZERO.S	B19		; high part of accumulator
			
 
				+;; end of outer?
			
 
				+	BNOP	RA,5		; return
			
 
				+	.endasmfunc
			
 
				+;; (*)	It should be noted that B6 is used as input to MPY32U in
			
 
				+;;	chronologically next cycle in *preceding* SPLOOP iteration.
			
 
				+;;	Normally such arrangement would require DINT, but at this
			
 
				+;;	point SPLOOP is draining and interrupts are disabled
			
 
				+;;	implicitly.
			
 
				+
			
 
				+	.global	_bn_sqr_comba4
			
 
				+	.global	_bn_mul_comba4
			
 
				+_bn_sqr_comba4:
			
 
				+	MV	ARG1,ARG2
			
 
				+_bn_mul_comba4:
			
 
				+	.asmfunc
			
 
				+	.if	0
			
 
				+	BNOP	sploopNxM?,3
			
 
				+	;; Above mentioned m*2*(n+1)+10 does not apply in n=m=4 case,
			
 
				+	;; because of read-after-write penalties, it's rather
			
 
				+	;; n*2*(n+3)+10, or 66 cycles [plus various overheads]...
			
 
				+	MVK	4,B0		; N, RILC
			
 
				+||	MVK	4,A0		; M, outer loop counter
			
 
				+||	MV	ARG1,A5		; copy ap
			
 
				+||	MV	ARG0,B4		; copy rp
			
 
				+||	ZERO	B19		; high part of accumulator
			
 
				+	MVC	B0,RILC
			
 
				+||	SUB	B0,2,B1		; first ILC
			
 
				+||	SUB	B0,1,B2		; const B2=N-1
			
 
				+||	LDW	*A5++,B6	; ap[0]
			
 
				+||	MV	A0,A3		; const A3=M
			
 
				+	.else
			
 
				+	;; This alternative is exercise in fully unrolled Comba
			
 
				+	;; algorithm implementation that operates at n*(n+1)+12, or
			
 
				+	;; as little as 32 cycles...
			
 
				+	LDW	*ARG1[0],B16	; a[0]
			
 
				+||	LDW	*ARG2[0],A16	; b[0]
			
 
				+	LDW	*ARG1[1],B17	; a[1]
			
 
				+||	LDW	*ARG2[1],A17	; b[1]
			
 
				+	LDW	*ARG1[2],B18	; a[2]
			
 
				+||	LDW	*ARG2[2],A18	; b[2]
			
 
				+	LDW	*ARG1[3],B19	; a[3]
			
 
				+||	LDW	*ARG2[3],A19	; b[3]
			
 
				+	NOP
			
 
				+	MPY32U	A16,B16,A1:A0	; a[0]*b[0]
			
 
				+	MPY32U	A17,B16,A23:A22	; a[0]*b[1]
			
 
				+	MPY32U	A16,B17,A25:A24	; a[1]*b[0]
			
 
				+	MPY32U	A16,B18,A27:A26	; a[2]*b[0]
			
 
				+	STW	A0,*ARG0[0]
			
 
				+||	MPY32U	A17,B17,A29:A28	; a[1]*b[1]
			
 
				+	MPY32U	A18,B16,A31:A30	; a[0]*b[2]
			
 
				+||	ADDU	A22,A1,A1:A0
			
 
				+	MV	A23,B0
			
 
				+||	MPY32U	A19,B16,A21:A20	; a[3]*b[0]
			
 
				+||	ADDU	A24,A1:A0,A1:A0
			
 
				+	ADDU	A25,B0,B1:B0
			
 
				+||	STW	A0,*ARG0[1]
			
 
				+||	MPY32U	A18,B17,A23:A22	; a[2]*b[1]
			
 
				+||	ADDU	A26,A1,A9:A8
			
 
				+	ADDU	A27,B1,B9:B8
			
 
				+||	MPY32U	A17,B18,A25:A24	; a[1]*b[2]
			
 
				+||	ADDU	A28,A9:A8,A9:A8
			
 
				+	ADDU	A29,B9:B8,B9:B8
			
 
				+||	MPY32U	A16,B19,A27:A26	; a[0]*b[3]
			
 
				+||	ADDU	A30,A9:A8,A9:A8
			
 
				+	ADDU	A31,B9:B8,B9:B8
			
 
				+||	ADDU	B0,A9:A8,A9:A8
			
 
				+	STW	A8,*ARG0[2]
			
 
				+||	ADDU	A20,A9,A1:A0
			
 
				+	ADDU	A21,B9,B1:B0
			
 
				+||	MPY32U	A19,B17,A21:A20	; a[3]*b[1]
			
 
				+||	ADDU	A22,A1:A0,A1:A0
			
 
				+	ADDU	A23,B1:B0,B1:B0
			
 
				+||	MPY32U	A18,B18,A23:A22	; a[2]*b[2]
			
 
				+||	ADDU	A24,A1:A0,A1:A0
			
 
				+	ADDU	A25,B1:B0,B1:B0
			
 
				+||	MPY32U	A17,B19,A25:A24	; a[1]*b[3]
			
 
				+||	ADDU	A26,A1:A0,A1:A0
			
 
				+	ADDU	A27,B1:B0,B1:B0
			
 
				+||	ADDU	B8,A1:A0,A1:A0
			
 
				+	STW	A0,*ARG0[3]
			
 
				+||	MPY32U	A19,B18,A27:A26	; a[3]*b[2]
			
 
				+||	ADDU	A20,A1,A9:A8
			
 
				+	ADDU	A21,B1,B9:B8
			
 
				+||	MPY32U	A18,B19,A29:A28	; a[2]*b[3]
			
 
				+||	ADDU	A22,A9:A8,A9:A8
			
 
				+	ADDU	A23,B9:B8,B9:B8
			
 
				+||	MPY32U	A19,B19,A31:A30	; a[3]*b[3]
			
 
				+||	ADDU	A24,A9:A8,A9:A8
			
 
				+	ADDU	A25,B9:B8,B9:B8
			
 
				+||	ADDU	B0,A9:A8,A9:A8
			
 
				+	STW	A8,*ARG0[4]
			
 
				+||	ADDU	A26,A9,A1:A0
			
 
				+	ADDU	A27,B9,B1:B0
			
 
				+||	ADDU	A28,A1:A0,A1:A0
			
 
				+	ADDU	A29,B1:B0,B1:B0
			
 
				+||	BNOP	RA
			
 
				+||	ADDU	B8,A1:A0,A1:A0
			
 
				+	STW	A0,*ARG0[5]
			
 
				+||	ADDU	A30,A1,A9:A8
			
 
				+	ADD	A31,B1,B8
			
 
				+	ADDU	B0,A9:A8,A9:A8	; removed || to avoid cross-path stall below
			
 
				+	ADD	B8,A9,A9
			
 
				+||	STW	A8,*ARG0[6]
			
 
				+	STW	A9,*ARG0[7]
			
 
				+	.endif
			
 
				+	.endasmfunc
			
--- a/crypto/bn/asm/c64xplus-gf2m.pl
+++ b/crypto/bn/asm/c64xplus-gf2m.pl
@@ -0,0 +1,146 @@
 
				+#!/usr/bin/env perl
			
 
				+#
			
 
				+# ====================================================================
			
 
				+# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
			
 
				+# project. The module is, however, dual licensed under OpenSSL and
			
 
				+# CRYPTOGAMS licenses depending on where you obtain it. For further
			
 
				+# details see http://www.openssl.org/~appro/cryptogams/.
			
 
				+# ====================================================================
			
 
				+#
			
 
				+# February 2012
			
 
				+#
			
 
				+# The module implements bn_GF2m_mul_2x2 polynomial multiplication
			
 
				+# used in bn_gf2m.c. It's kind of low-hanging mechanical port from
			
 
				+# C for the time being... The subroutine runs in 37 cycles, which is
			
 
				+# 4.5x faster than compiler-generated code. Though comparison is
			
 
				+# totally unfair, because this module utilizes Galois Field Multiply
			
 
				+# instruction.
			
 
				+
			
 
				+while (($output=shift) && ($output!~/\w[\w\-]*\.\w+$/)) {}
			
 
				+open STDOUT,">$output";
			
 
				+
			
 
				+($rp,$a1,$a0,$b1,$b0)=("A4","B4","A6","B6","A8");   # argument vector
			
 
				+
			
 
				+($Alo,$Alox0,$Alox1,$Alox2,$Alox3)=map("A$_",(16..20));
			
 
				+($Ahi,$Ahix0,$Ahix1,$Ahix2,$Ahix3)=map("B$_",(16..20));
			
 
				+($B_0,$B_1,$B_2,$B_3)=("B5","A5","A7","B7");
			
 
				+($A,$B)=($Alo,$B_1);
			
 
				+$xFF="B1";
			
 
				+
			
 
				+sub mul_1x1_upper {
			
 
				+my ($A,$B)=@_;
			
 
				+$code.=<<___;
			
 
				+	EXTU	$B,8,24,$B_2		; smash $B to 4 bytes
			
 
				+||	AND	$B,$xFF,$B_0
			
 
				+||	SHRU	$B,24,$B_3
			
 
				+	SHRU	$A,16,   $Ahi		; smash $A to two halfwords
			
 
				+||	EXTU	$A,16,16,$Alo
			
 
				+
			
 
				+	XORMPY	$Alo,$B_2,$Alox2	; 16x8 bits muliplication
			
 
				+||	XORMPY	$Ahi,$B_2,$Ahix2
			
 
				+||	EXTU	$B,16,24,$B_1
			
 
				+	XORMPY	$Alo,$B_0,$Alox0
			
 
				+||	XORMPY	$Ahi,$B_0,$Ahix0
			
 
				+	XORMPY	$Alo,$B_3,$Alox3
			
 
				+||	XORMPY	$Ahi,$B_3,$Ahix3
			
 
				+	XORMPY	$Alo,$B_1,$Alox1
			
 
				+||	XORMPY	$Ahi,$B_1,$Ahix1
			
 
				+___
			
 
				+}
			
 
				+sub mul_1x1_merged {
			
 
				+my ($OUTlo,$OUThi,$A,$B)=@_;
			
 
				+$code.=<<___;
			
 
				+	 EXTU	$B,8,24,$B_2		; smash $B to 4 bytes
			
 
				+||	 AND	$B,$xFF,$B_0
			
 
				+||	 SHRU	$B,24,$B_3
			
 
				+	 SHRU	$A,16,   $Ahi		; smash $A to two halfwords
			
 
				+||	 EXTU	$A,16,16,$Alo
			
 
				+
			
 
				+	XOR	$Ahix0,$Alox2,$Ahix0
			
 
				+||	MV	$Ahix2,$OUThi
			
 
				+||	 XORMPY	$Alo,$B_2,$Alox2
			
 
				+	 XORMPY	$Ahi,$B_2,$Ahix2
			
 
				+||	 EXTU	$B,16,24,$B_1
			
 
				+||	 XORMPY	$Alo,$B_0,A1		; $Alox0
			
 
				+	XOR	$Ahix1,$Alox3,$Ahix1
			
 
				+||	SHL	$Ahix0,16,$OUTlo
			
 
				+||	SHRU	$Ahix0,16,$Ahix0
			
 
				+	XOR	$Alox0,$OUTlo,$OUTlo
			
 
				+||	XOR	$Ahix0,$OUThi,$OUThi
			
 
				+||	 XORMPY	$Ahi,$B_0,$Ahix0
			
 
				+||	 XORMPY	$Alo,$B_3,$Alox3
			
 
				+||	SHL	$Alox1,8,$Alox1
			
 
				+||	SHL	$Ahix3,8,$Ahix3
			
 
				+	XOR	$Alox1,$OUTlo,$OUTlo
			
 
				+||	XOR	$Ahix3,$OUThi,$OUThi
			
 
				+||	 XORMPY	$Ahi,$B_3,$Ahix3
			
 
				+||	SHL	$Ahix1,24,$Alox1
			
 
				+||	SHRU	$Ahix1,8, $Ahix1
			
 
				+	XOR	$Alox1,$OUTlo,$OUTlo
			
 
				+||	XOR	$Ahix1,$OUThi,$OUThi
			
 
				+||	 XORMPY	$Alo,$B_1,$Alox1
			
 
				+||	 XORMPY	$Ahi,$B_1,$Ahix1
			
 
				+||	 MV	A1,$Alox0
			
 
				+___
			
 
				+}
			
 
				+sub mul_1x1_lower {
			
 
				+my ($OUTlo,$OUThi)=@_;
			
 
				+$code.=<<___;
			
 
				+	;NOP
			
 
				+	XOR	$Ahix0,$Alox2,$Ahix0
			
 
				+||	MV	$Ahix2,$OUThi
			
 
				+	NOP
			
 
				+	XOR	$Ahix1,$Alox3,$Ahix1
			
 
				+||	SHL	$Ahix0,16,$OUTlo
			
 
				+||	SHRU	$Ahix0,16,$Ahix0
			
 
				+	XOR	$Alox0,$OUTlo,$OUTlo
			
 
				+||	XOR	$Ahix0,$OUThi,$OUThi
			
 
				+||	SHL	$Alox1,8,$Alox1
			
 
				+||	SHL	$Ahix3,8,$Ahix3
			
 
				+	XOR	$Alox1,$OUTlo,$OUTlo
			
 
				+||	XOR	$Ahix3,$OUThi,$OUThi
			
 
				+||	SHL	$Ahix1,24,$Alox1
			
 
				+||	SHRU	$Ahix1,8, $Ahix1
			
 
				+	XOR	$Alox1,$OUTlo,$OUTlo
			
 
				+||	XOR	$Ahix1,$OUThi,$OUThi
			
 
				+___
			
 
				+}
			
 
				+$code.=<<___;
			
 
				+	.text
			
 
				+
			
 
				+	.global	_bn_GF2m_mul_2x2
			
 
				+_bn_GF2m_mul_2x2:
			
 
				+	.asmfunc
			
 
				+	MVK	0xFF,$xFF
			
 
				+___
			
 
				+	&mul_1x1_upper($a0,$b0);		# a0·b0
			
 
				+$code.=<<___;
			
 
				+||	MV	$b1,$B
			
 
				+	MV	$a1,$A
			
 
				+___
			
 
				+	&mul_1x1_merged("A28","B28",$A,$B);	# a0·b0/a1·b1
			
 
				+$code.=<<___;
			
 
				+||	XOR	$b0,$b1,$B
			
 
				+	XOR	$a0,$a1,$A
			
 
				+___
			
 
				+	&mul_1x1_merged("A31","B31",$A,$B);	# a1·b1/(a0+a1)·(b0+b1)
			
 
				+$code.=<<___;
			
 
				+	XOR	A28,A31,A29
			
 
				+||	XOR	B28,B31,B29			; a0·b0+a1·b1
			
 
				+___
			
 
				+	&mul_1x1_lower("A30","B30");		# (a0+a1)·(b0+b1)
			
 
				+$code.=<<___;
			
 
				+||	BNOP	B3
			
 
				+	XOR	A29,A30,A30
			
 
				+||	XOR	B29,B30,B30			; (a0+a1)·(b0+b1)-a0·b0-a1·b1
			
 
				+	XOR	B28,A30,A30
			
 
				+||	STW	A28,*${rp}[0]
			
 
				+	XOR	B30,A31,A31
			
 
				+||	STW	A30,*${rp}[1]
			
 
				+	STW	A31,*${rp}[2]
			
 
				+	STW	B31,*${rp}[3]
			
 
				+	.endasmfunc
			
 
				+___
			
 
				+
			
 
				+print $code;
			
 
				+close STDOUT;
			
--- a/crypto/bn/bn_nist.c
+++ b/crypto/bn/bn_nist.c
@@ -366,6 +366,10 @@ static void nist_cp_bn(BN_ULONG *buf, BN_ULONG *a, int top)
 
				 # endif
			
 
				 #endif /* BN_BITS2 != 64 */
			
 
				 
			
 
				+#if defined(_TMS320C6X) && defined(NIST_INT64)
			
 
				+# undef NIST_INT64     /* compiler bug */
			
 
				+# pragma diag_suppress 177
			
 
				+#endif
			
 
				 
			
 
				 #define nist_set_192(to, from, a1, a2, a3) \
			
 
				 	{ \
			
@@ -1047,6 +1051,11 @@ int BN_nist_mod_384(BIGNUM *r, const BIGNUM *a, const BIGNUM *field,
 
				 	return 1;
			
 
				 	}
			
 
				 
			
 
				+#ifdef _WIN32_WCE
			
 
				+/* Workaround for compiler bug under CE */
			
 
				+#pragma optimize( "", off )
			
 
				+#endif
			
 
				+
			
 
				 #define BN_NIST_521_RSHIFT	(521%BN_BITS2)
			
 
				 #define BN_NIST_521_LSHIFT	(BN_BITS2-BN_NIST_521_RSHIFT)
			
 
				 #define BN_NIST_521_TOP_MASK	((BN_ULONG)BN_MASK2>>BN_NIST_521_LSHIFT)
			
@@ -1113,6 +1122,10 @@ int BN_nist_mod_521(BIGNUM *r, const BIGNUM *a, const BIGNUM *field,
 
				 	return 1;
			
 
				 	}
			
 
				 
			
 
				+#ifdef _WIN32_WCE
			
 
				+#pragma optimize( "", on )
			
 
				+#endif
			
 
				+
			
 
				 int (*BN_nist_mod_func(const BIGNUM *p))(BIGNUM *r, const BIGNUM *a, const BIGNUM *field, BN_CTX *ctx)
			
 
				 	{
			
 
				 	if (BN_ucmp(&_bignum_nist_p_192, p) == 0)
			
--- a/crypto/c64xpluscpuid.pl
+++ b/crypto/c64xpluscpuid.pl
@@ -0,0 +1,246 @@
 
				+#!/usr/bin/env perl
			
 
				+#
			
 
				+
			
 
				+while (($output=shift) && ($output!~/\w[\w\-]*\.\w+$/)) {}
			
 
				+open STDOUT,">$output";
			
 
				+
			
 
				+$code.=<<___;
			
 
				+	.text
			
 
				+
			
 
				+	.asg	B3,RA
			
 
				+
			
 
				+	.global	_OPENSSL_rdtsc
			
 
				+_OPENSSL_rdtsc:
			
 
				+	.asmfunc
			
 
				+	B	RA
			
 
				+	MVC	TSCL,B0
			
 
				+	MVC	TSCH,B1
			
 
				+  [!B0]	MVC	B0,TSCL		; start TSC
			
 
				+	MV	B0,A4
			
 
				+	MV	B1,A5
			
 
				+	.endasmfunc
			
 
				+
			
 
				+	.global	_OPENSSL_cleanse
			
 
				+_OPENSSL_cleanse:
			
 
				+	.asmfunc
			
 
				+	ZERO	A3:A2
			
 
				+||	ZERO	B2
			
 
				+||	SHRU	B4,3,B0		; is length >= 8
			
 
				+||	ADD	1,A4,B6
			
 
				+  [!B0]	BNOP	RA
			
 
				+||	ZERO	A1
			
 
				+||	ZERO	B1
			
 
				+   [B0]	MVC	B0,ILC
			
 
				+||[!B0]	CMPLT	0,B4,A1
			
 
				+||[!B0]	CMPLT	1,B4,B1
			
 
				+   [A1]	STB	A2,*A4++[2]
			
 
				+|| [B1] STB	B2,*B6++[2]
			
 
				+||[!B0]	CMPLT	2,B4,A1
			
 
				+||[!B0]	CMPLT	3,B4,B1
			
 
				+   [A1]	STB	A2,*A4++[2]
			
 
				+|| [B1] STB	B2,*B6++[2]
			
 
				+||[!B0]	CMPLT	4,B4,A1
			
 
				+||[!B0]	CMPLT	5,B4,B1
			
 
				+   [A1]	STB	A2,*A4++[2]
			
 
				+|| [B1] STB	B2,*B6++[2]
			
 
				+||[!B0]	CMPLT	6,B4,A1
			
 
				+   [A1]	STB	A2,*A4++[2]
			
 
				+
			
 
				+	SPLOOP	1
			
 
				+	STNDW	A3:A2,*A4++
			
 
				+||	SUB	B4,8,B4
			
 
				+	SPKERNEL
			
 
				+
			
 
				+	MV	B4,B0		; remaining bytes
			
 
				+||	ADD	1,A4,B6
			
 
				+||	BNOP	RA
			
 
				+   [B0]	CMPLT	0,B0,A1
			
 
				+|| [B0]	CMPLT	1,B0,B1
			
 
				+   [A1]	STB	A2,*A4++[2]
			
 
				+|| [B1] STB	B2,*B6++[2]
			
 
				+|| [B0]	CMPLT	2,B0,A1
			
 
				+|| [B0]	CMPLT	3,B0,B1
			
 
				+   [A1]	STB	A2,*A4++[2]
			
 
				+|| [B1] STB	B2,*B6++[2]
			
 
				+|| [B0]	CMPLT	4,B0,A1
			
 
				+|| [B0]	CMPLT	5,B0,B1
			
 
				+   [A1]	STB	A2,*A4++[2]
			
 
				+|| [B1] STB	B2,*B6++[2]
			
 
				+|| [B0]	CMPLT	6,B0,A1
			
 
				+   [A1]	STB	A2,*A4++[2]
			
 
				+	.endasmfunc
			
 
				+
			
 
				+	.global	_OPENSSL_atomic_add
			
 
				+_OPENSSL_atomic_add:
			
 
				+	.asmfunc
			
 
				+	MV	A4,B0
			
 
				+atomic_add?:
			
 
				+	LL	*B0,B5
			
 
				+	NOP	4
			
 
				+	ADD	B4,B5,B5
			
 
				+	SL	B5,*B0
			
 
				+	CMTL	*B0,B1
			
 
				+	NOP	4
			
 
				+  [!B1]	B	atomic_add?
			
 
				+   [B1]	BNOP	RA,4
			
 
				+	MV	B5,A4
			
 
				+	.endasmfunc
			
 
				+
			
 
				+	.global	_OPENSSL_wipe_cpu
			
 
				+_OPENSSL_wipe_cpu:
			
 
				+	.asmfunc
			
 
				+	ZERO	A0
			
 
				+||	ZERO	B0
			
 
				+||	ZERO	A1
			
 
				+||	ZERO	B1
			
 
				+	ZERO	A3:A2
			
 
				+||	MVD	B0,B2
			
 
				+||	ZERO	A4
			
 
				+||	ZERO	B4
			
 
				+||	ZERO	A5
			
 
				+||	ZERO	B5
			
 
				+||	BNOP	RA
			
 
				+	ZERO	A7:A6
			
 
				+||	ZERO	B7:B6
			
 
				+||	ZERO	A8
			
 
				+||	ZERO	B8
			
 
				+||	ZERO	A9
			
 
				+||	ZERO	B9
			
 
				+	ZERO	A17:A16
			
 
				+||	ZERO	B17:B16
			
 
				+||	ZERO	A18
			
 
				+||	ZERO	B18
			
 
				+||	ZERO	A19
			
 
				+||	ZERO	B19
			
 
				+	ZERO	A21:A20
			
 
				+||	ZERO	B21:B20
			
 
				+||	ZERO	A22
			
 
				+||	ZERO	B22
			
 
				+||	ZERO	A23
			
 
				+||	ZERO	B23
			
 
				+	ZERO	A25:A24
			
 
				+||	ZERO	B25:B24
			
 
				+||	ZERO	A26
			
 
				+||	ZERO	B26
			
 
				+||	ZERO	A27
			
 
				+||	ZERO	B27
			
 
				+	ZERO	A29:A28
			
 
				+||	ZERO	B29:B28
			
 
				+||	ZERO	A30
			
 
				+||	ZERO	B30
			
 
				+||	ZERO	A31
			
 
				+||	ZERO	B31
			
 
				+	.endasmfunc
			
 
				+
			
 
				+CLFLUSH	.macro	CONTROL,ADDR,LEN
			
 
				+	B	passthrough?
			
 
				+||	STW	ADDR,*CONTROL[0]
			
 
				+	STW	LEN,*CONTROL[1]
			
 
				+spinlock?:
			
 
				+	LDW	*CONTROL[1],A0
			
 
				+	NOP	3
			
 
				+passthrough?:
			
 
				+	NOP
			
 
				+  [A0]	BNOP	spinlock?,5
			
 
				+	.endm
			
 
				+
			
 
				+	.global	_OPENSSL_instrument_bus
			
 
				+_OPENSSL_instrument_bus:
			
 
				+	.asmfunc
			
 
				+	MV	B4,B0			; reassign sizeof(output)
			
 
				+||	MV	A4,B4			; reassign output
			
 
				+||	MVK	0x00004030,A3
			
 
				+	MV	B0,A4			; return value
			
 
				+||	MVK	1,A1
			
 
				+||	MVKH	0x01840000,A3		; L1DWIBAR
			
 
				+	MVC	TSCL,B8			; collect 1st tick
			
 
				+||	MVK	0x00004010,A5
			
 
				+	MV	B8,B9			; lasttick = tick
			
 
				+||	MVK	0,B7			; lastdiff = 0
			
 
				+||	MVKH	0x01840000,A5		; L2WIBAR
			
 
				+	CLFLUSH	A3,B4,A1		; write-back and invalidate L1D line
			
 
				+	CLFLUSH	A5,B4,A1		; write-back and invalidate L2 line
			
 
				+	LL	*B4,B5
			
 
				+	NOP	4
			
 
				+	ADD	B7,B5,B5
			
 
				+	SL	B5,*B4
			
 
				+	CMTL	*B4,B1
			
 
				+	NOP	4
			
 
				+	STW	B5,*B4
			
 
				+bus_loop1?:
			
 
				+	MVC	TSCL,B8
			
 
				+|| [B0]	SUB	B0,1,B0
			
 
				+	SUB	B8,B9,B7		; lastdiff = tick - lasttick
			
 
				+||	MV	B8,B9			; lasttick = tick
			
 
				+	CLFLUSH	A3,B4,A1		; write-back and invalidate L1D line
			
 
				+	CLFLUSH	A5,B4,A1		; write-back and invalidate L2 line
			
 
				+	LL	*B4,B5
			
 
				+	NOP	4
			
 
				+	ADD	B7,B5,B5
			
 
				+	SL	B5,*B4
			
 
				+	CMTL	*B4,B1
			
 
				+	STW	B5,*B4			; [!B1] is removed to flatten samples
			
 
				+||	ADDK	4,B4
			
 
				+|| [B0]	BNOP	bus_loop1?,5
			
 
				+
			
 
				+	BNOP	RA,5
			
 
				+	.endasmfunc
			
 
				+
			
 
				+	.global	_OPENSSL_instrument_bus2
			
 
				+_OPENSSL_instrument_bus2:
			
 
				+	.asmfunc
			
 
				+	MV	A6,B0			; reassign max
			
 
				+||	MV	B4,A6			; reassing sizeof(output)
			
 
				+||	MVK	0x00004030,A3
			
 
				+	MV	A4,B4			; reassign output
			
 
				+||	MVK	0,A4			; return value
			
 
				+||	MVK	1,A1
			
 
				+||	MVKH	0x01840000,A3		; L1DWIBAR
			
 
				+
			
 
				+	MVC	TSCL,B8			; collect 1st tick
			
 
				+||	MVK	0x00004010,A5
			
 
				+	MV	B8,B9			; lasttick = tick
			
 
				+||	MVK	0,B7			; lastdiff = 0
			
 
				+||	MVKH	0x01840000,A5		; L2WIBAR
			
 
				+	CLFLUSH	A3,B4,A1		; write-back and invalidate L1D line
			
 
				+	CLFLUSH	A5,B4,A1		; write-back and invalidate L2 line
			
 
				+	LL	*B4,B5
			
 
				+	NOP	4
			
 
				+	ADD	B7,B5,B5
			
 
				+	SL	B5,*B4
			
 
				+	CMTL	*B4,B1
			
 
				+	NOP	4
			
 
				+	STW	B5,*B4
			
 
				+
			
 
				+	MVC	TSCL,B8			; collect 1st diff
			
 
				+	SUB	B8,B9,B7		; lastdiff = tick - lasttick
			
 
				+||	MV	B8,B9			; lasttick = tick
			
 
				+||	SUB	B0,1,B0
			
 
				+bus_loop2?:
			
 
				+	CLFLUSH	A3,B4,A1		; write-back and invalidate L1D line
			
 
				+	CLFLUSH	A5,B4,A1		; write-back and invalidate L2 line
			
 
				+	LL	*B4,B5
			
 
				+	NOP	4
			
 
				+	ADD	B7,B5,B5
			
 
				+	SL	B5,*B4
			
 
				+	CMTL	*B4,B1
			
 
				+	STW	B5,*B4			; [!B1] is removed to flatten samples
			
 
				+||[!B0]	BNOP	bus_loop2_done?,2
			
 
				+||	SUB	B0,1,B0
			
 
				+	MVC	TSCL,B8
			
 
				+	SUB	B8,B9,B8
			
 
				+||	MV	B8,B9
			
 
				+	CMPEQ	B8,B7,B2
			
 
				+||	MV	B8,B7
			
 
				+  [!B2]	ADDAW	B4,1,B4
			
 
				+||[!B2]	ADDK	1,A4
			
 
				+	CMPEQ	A4,A6,A2
			
 
				+  [!A2]	BNOP	bus_loop2?,5
			
 
				+
			
 
				+bus_loop2_done?:
			
 
				+	BNOP	RA,5
			
 
				+	.endasmfunc
			
 
				+___
			
 
				+
			
 
				+print $code;
			
 
				+close STDOUT;
			
--- a/crypto/cmac/cmac.c
+++ b/crypto/cmac/cmac.c
@@ -143,7 +143,8 @@ int CMAC_CTX_copy(CMAC_CTX *out, const CMAC_CTX *in)
 
				 int CMAC_Init(CMAC_CTX *ctx, const void *key, size_t keylen, 
			
 
				 			const EVP_CIPHER *cipher, ENGINE *impl)
			
 
				 	{
			
 
				-	static unsigned char zero_iv[EVP_MAX_BLOCK_LENGTH];
			
 
				+	__fips_constseg
			
 
				+	static const unsigned char zero_iv[EVP_MAX_BLOCK_LENGTH] = {0};
			
 
				 	/* All zeros means restart */
			
 
				 	if (!key && !cipher && !impl && keylen == 0)
			
 
				 		{
			
--- a/crypto/cryptlib.c
+++ b/crypto/cryptlib.c
@@ -382,7 +382,9 @@ void OpenSSLDie(const char *file,int line,const char *assertion)
 
				 	abort();
			
 
				 #else
			
 
				 	/* Win32 abort() customarily shows a dialog, but we just did that... */
			
 
				+#ifdef SIGABRT
			
 
				 	raise(SIGABRT);
			
 
				+#endif
			
 
				 	_exit(3);
			
 
				 #endif
			
 
				 	}
			
--- a/crypto/des/spr.h
+++ b/crypto/des/spr.h
@@ -56,6 +56,9 @@
 
				  * [including the GNU Public Licence.]
			
 
				  */
			
 
				 
			
 
				+#ifdef _TMS320C6X
			
 
				+#  pragma DATA_SECTION(DES_SPtrans,".const:des_sptrans")
			
 
				+#endif
			
 
				 __fips_constseg
			
 
				 OPENSSL_GLOBAL const DES_LONG DES_SPtrans[8][64]={
			
 
				 {
			
--- a/crypto/modes/asm/ghash-c64xplus.pl
+++ b/crypto/modes/asm/ghash-c64xplus.pl
@@ -0,0 +1,231 @@
 
				+#!/usr/bin/env perl
			
 
				+#
			
 
				+# ====================================================================
			
 
				+# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
			
 
				+# project. The module is, however, dual licensed under OpenSSL and
			
 
				+# CRYPTOGAMS licenses depending on where you obtain it. For further
			
 
				+# details see http://www.openssl.org/~appro/cryptogams/.
			
 
				+# ====================================================================
			
 
				+#
			
 
				+# December 2011
			
 
				+#
			
 
				+# The module implements GCM GHASH function and underlying single
			
 
				+# multiplication operation in GF(2^128). Even though subroutines
			
 
				+# have _4bit suffix, they are not using any tables, but rely on
			
 
				+# hardware Galois Field Multiply support. Streamed GHASH processes
			
 
				+# byte in ~7 cycles, which is >6x faster than "4-bit" table-driven
			
 
				+# code compiled with TI's cl6x 6.0 with -mv6400+ -o2 flags. We are
			
 
				+# comparing apples vs. oranges, but compiler surely could have done
			
 
				+# better, because theoretical [though not necessarily achievable]
			
 
				+# estimate for "4-bit" table-driven implementation is ~12 cycles.
			
 
				+
			
 
				+while (($output=shift) && ($output!~/\w[\w\-]*\.\w+$/)) {}
			
 
				+open STDOUT,">$output";
			
 
				+
			
 
				+($Xip,$Htable,$inp,$len)=("A4","B4","A6","B6");	# arguments
			
 
				+
			
 
				+($Z0,$Z1,$Z2,$Z3,	$H0, $H1, $H2, $H3,
			
 
				+			$H0x,$H1x,$H2x,$H3x)=map("A$_",(16..27));
			
 
				+($H01u,$H01y,$H2u,$H3u,	$H0y,$H1y,$H2y,$H3y,
			
 
				+			$H0z,$H1z,$H2z,$H3z)=map("B$_",(16..27));
			
 
				+($FF000000,$E10000)=("B30","B31");
			
 
				+($xip,$x0,$x1,$xib)=map("B$_",(6..9));	# $xip zaps $len
			
 
				+ $xia="A9";
			
 
				+($rem,$res)=("B4","B5");		# $rem zaps $Htable
			
 
				+
			
 
				+$code.=<<___;
			
 
				+	.text
			
 
				+
			
 
				+	.asg	B3,RA
			
 
				+
			
 
				+	.if	0
			
 
				+	.global	_gcm_gmult_1bit
			
 
				+_gcm_gmult_1bit:
			
 
				+	ADDAD	$Htable,2,$Htable
			
 
				+	.endif
			
 
				+	.global	_gcm_gmult_4bit
			
 
				+_gcm_gmult_4bit:
			
 
				+	.asmfunc
			
 
				+	LDDW	*${Htable}[-1],$H1:$H0	; H.lo
			
 
				+	LDDW	*${Htable}[-2],$H3:$H2	; H.hi
			
 
				+||	MV	$Xip,${xip}		; reassign Xi
			
 
				+||	MVK	15,B1			; SPLOOPD constant
			
 
				+
			
 
				+	MVK	0xE1,$E10000
			
 
				+||	LDBU	*++${xip}[15],$x1	; Xi[15]
			
 
				+	MVK	0xFF,$FF000000
			
 
				+||	LDBU	*--${xip},$x0		; Xi[14]
			
 
				+	SHL	$E10000,16,$E10000	; [pre-shifted] reduction polynomial
			
 
				+	SHL	$FF000000,24,$FF000000	; upper byte mask
			
 
				+||	BNOP	ghash_loop?
			
 
				+||	MVK	1,B0			; take a single spin
			
 
				+
			
 
				+	PACKH2	$H0,$H1,$xia		; pack H0' and H1's upper bytes
			
 
				+	AND	$H2,$FF000000,$H2u	; H2's upper byte
			
 
				+	AND	$H3,$FF000000,$H3u	; H3's upper byte
			
 
				+||	SHRU	$H2u,8,$H2u
			
 
				+	SHRU	$H3u,8,$H3u
			
 
				+||	ZERO	$Z1:$Z0
			
 
				+	SHRU2	$xia,8,$H01u
			
 
				+||	ZERO	$Z3:$Z2
			
 
				+	.endasmfunc
			
 
				+
			
 
				+	.global	_gcm_ghash_4bit
			
 
				+_gcm_ghash_4bit:
			
 
				+	.asmfunc
			
 
				+	LDDW	*${Htable}[-1],$H1:$H0	; H.lo
			
 
				+||	SHRU	$len,4,B0		; reassign len
			
 
				+	LDDW	*${Htable}[-2],$H3:$H2	; H.hi
			
 
				+||	MV	$Xip,${xip}		; reassign Xi
			
 
				+||	MVK	15,B1			; SPLOOPD constant
			
 
				+
			
 
				+	MVK	0xE1,$E10000
			
 
				+|| [B0]	LDNDW	*${inp}[1],$H1x:$H0x
			
 
				+	MVK	0xFF,$FF000000
			
 
				+|| [B0]	LDNDW	*${inp}++[2],$H3x:$H2x
			
 
				+	SHL	$E10000,16,$E10000	; [pre-shifted] reduction polynomial
			
 
				+||	LDDW	*${xip}[1],$Z1:$Z0
			
 
				+	SHL	$FF000000,24,$FF000000	; upper byte mask
			
 
				+||	LDDW	*${xip}[0],$Z3:$Z2
			
 
				+
			
 
				+	PACKH2	$H0,$H1,$xia		; pack H0' and H1's upper bytes
			
 
				+	AND	$H2,$FF000000,$H2u	; H2's upper byte
			
 
				+	AND	$H3,$FF000000,$H3u	; H3's upper byte
			
 
				+||	SHRU	$H2u,8,$H2u
			
 
				+	SHRU	$H3u,8,$H3u
			
 
				+	SHRU2	$xia,8,$H01u
			
 
				+
			
 
				+|| [B0]	XOR	$H0x,$Z0,$Z0		; Xi^=inp
			
 
				+|| [B0]	XOR	$H1x,$Z1,$Z1
			
 
				+	.if	.LITTLE_ENDIAN
			
 
				+   [B0]	XOR	$H2x,$Z2,$Z2
			
 
				+|| [B0]	XOR	$H3x,$Z3,$Z3
			
 
				+|| [B0]	SHRU	$Z1,24,$xia		; Xi[15], avoid cross-path stall
			
 
				+	STDW	$Z1:$Z0,*${xip}[1]
			
 
				+|| [B0]	SHRU	$Z1,16,$x0		; Xi[14]
			
 
				+|| [B0]	ZERO	$Z1:$Z0
			
 
				+	.else
			
 
				+   [B0]	XOR	$H2x,$Z2,$Z2
			
 
				+|| [B0]	XOR	$H3x,$Z3,$Z3
			
 
				+|| [B0]	MV	$Z0,$xia		; Xi[15], avoid cross-path stall
			
 
				+	STDW	$Z1:$Z0,*${xip}[1]
			
 
				+|| [B0] SHRU	$Z0,8,$x0		; Xi[14]
			
 
				+|| [B0]	ZERO	$Z1:$Z0
			
 
				+	.endif
			
 
				+	STDW	$Z3:$Z2,*${xip}[0]
			
 
				+|| [B0]	ZERO	$Z3:$Z2
			
 
				+|| [B0]	MV	$xia,$x1
			
 
				+   [B0]	ADDK	14,${xip}
			
 
				+
			
 
				+ghash_loop?:
			
 
				+	SPLOOPD	6			; 6*16+7
			
 
				+||	MVC	B1,ILC
			
 
				+|| [B0]	SUB	B0,1,B0
			
 
				+||	ZERO	A0
			
 
				+||	ADD	$x1,$x1,$xib		; SHL	$x1,1,$xib
			
 
				+||	SHL	$x1,1,$xia
			
 
				+___
			
 
				+
			
 
				+########____________________________
			
 
				+#  0    D2.     M1          M2      |
			
 
				+#  1            M1                  |
			
 
				+#  2            M1          M2      |
			
 
				+#  3        D1. M1          M2      |
			
 
				+#  4        S1. L1                  |
			
 
				+#  5    S2  S1x L1          D2  L2  |____________________________
			
 
				+#  6/0          L1  S1      L2  S2x |D2.     M1          M2      |
			
 
				+#  7/1          L1  S1  D1x S2  M2  |        M1                  |
			
 
				+#  8/2              S1  L1x S2      |        M1          M2      |
			
 
				+#  9/3              S1  L1x         |    D1. M1          M2      |
			
 
				+# 10/4                  D1x         |    S1. L1                  |
			
 
				+# 11/5                              |S2  S1x L1          D2  L2  |____________
			
 
				+# 12/6/0                D1x       __|        L1  S1      L2  S2x |D2.     ....
			
 
				+#    7/1                                     L1  S1  D1x S2  M2  |        ....
			
 
				+#    8/2                                         S1  L1x S2      |        ....
			
 
				+#####...                                         ................|............
			
 
				+$code.=<<___;
			
 
				+	XORMPY	$H0,$xia,$H0x		; 0	; H·Xi[i]
			
 
				+||	XORMPY	$H01u,$xib,$H01y
			
 
				+|| [A0]	LDBU	*--${xip},$x0
			
 
				+	XORMPY	$H1,$xia,$H1x		; 1
			
 
				+	XORMPY	$H2,$xia,$H2x		; 2
			
 
				+||	XORMPY	$H2u,$xib,$H2y
			
 
				+	XORMPY	$H3,$xia,$H3x		; 3
			
 
				+||	XORMPY	$H3u,$xib,$H3y
			
 
				+||[!A0]	MVK.D	15,A0				; *--${xip} counter
			
 
				+	XOR.L	$H0x,$Z0,$Z0		; 4	; Z^=H·Xi[i]
			
 
				+|| [A0]	SUB.S	A0,1,A0
			
 
				+	XOR.L	$H1x,$Z1,$Z1		; 5
			
 
				+||	AND.D	$H01y,$FF000000,$H0z
			
 
				+||	SWAP2.L	$H01y,$H1y		;	; SHL	$H01y,16,$H1y
			
 
				+||	SHL	$x0,1,$xib
			
 
				+||	SHL	$x0,1,$xia
			
 
				+
			
 
				+	XOR.L	$H2x,$Z2,$Z2		; 6/0	; [0,0] in epilogue
			
 
				+||	SHL	$Z0,1,$rem		;	; rem=Z<<1
			
 
				+||	SHRMB.S	$Z1,$Z0,$Z0		;	; Z>>=8
			
 
				+||	AND.L	$H1y,$FF000000,$H1z
			
 
				+	XOR.L	$H3x,$Z3,$Z3		; 7/1
			
 
				+||	SHRMB.S	$Z2,$Z1,$Z1
			
 
				+||	XOR.D	$H0z,$Z0,$Z0			; merge upper byte products
			
 
				+||	AND.S	$H2y,$FF000000,$H2z
			
 
				+||	XORMPY	$E10000,$rem,$res	;	; implicit rem&0x1FE
			
 
				+	XOR.L	$H1z,$Z1,$Z1		; 8/2
			
 
				+||	SHRMB.S	$Z3,$Z2,$Z2
			
 
				+||	AND.S	$H3y,$FF000000,$H3z
			
 
				+	XOR.L	$H2z,$Z2,$Z2		; 9/3
			
 
				+||	SHRU	$Z3,8,$Z3
			
 
				+	XOR.D	$H3z,$Z3,$Z3		; 10/4
			
 
				+	NOP				; 11/5
			
 
				+
			
 
				+	SPKERNEL 0,2
			
 
				+||	XOR.D	$res,$Z3,$Z3		; 12/6/0; Z^=res
			
 
				+
			
 
				+	; input pre-fetch is possible where D1 slot is available...
			
 
				+   [B0]	LDNDW	*${inp}[1],$H1x:$H0x	; 8/-
			
 
				+   [B0]	LDNDW	*${inp}++[2],$H3x:$H2x	; 9/-
			
 
				+	NOP				; 10/-
			
 
				+	.if	.LITTLE_ENDIAN
			
 
				+	SWAP2	$Z0,$Z1			; 11/-
			
 
				+||	SWAP4	$Z1,$Z0
			
 
				+	SWAP4	$Z1,$Z1			; 12/-
			
 
				+||	SWAP2	$Z0,$Z0
			
 
				+	SWAP2	$Z2,$Z3
			
 
				+||	SWAP4	$Z3,$Z2
			
 
				+||[!B0]	BNOP	RA
			
 
				+	SWAP4	$Z3,$Z3
			
 
				+||	SWAP2	$Z2,$Z2
			
 
				+|| [B0]	BNOP	ghash_loop?
			
 
				+   [B0]	XOR	$H0x,$Z0,$Z0		; Xi^=inp
			
 
				+|| [B0]	XOR	$H1x,$Z1,$Z1
			
 
				+   [B0]	XOR	$H2x,$Z2,$Z2
			
 
				+|| [B0]	XOR	$H3x,$Z3,$Z3
			
 
				+|| [B0]	SHRU	$Z1,24,$xia		; Xi[15], avoid cross-path stall
			
 
				+	STDW	$Z1:$Z0,*${xip}[1]
			
 
				+|| [B0]	SHRU	$Z1,16,$x0		; Xi[14]
			
 
				+|| [B0]	ZERO	$Z1:$Z0
			
 
				+	.else
			
 
				+  [!B0]	BNOP	RA			; 11/-
			
 
				+   [B0]	BNOP	ghash_loop?		; 12/-
			
 
				+   [B0]	XOR	$H0x,$Z0,$Z0		; Xi^=inp
			
 
				+|| [B0]	XOR	$H1x,$Z1,$Z1
			
 
				+   [B0]	XOR	$H2x,$Z2,$Z2
			
 
				+|| [B0]	XOR	$H3x,$Z3,$Z3
			
 
				+|| [B0]	MV	$Z0,$xia		; Xi[15], avoid cross-path stall
			
 
				+	STDW	$Z1:$Z0,*${xip}[1]
			
 
				+|| [B0] SHRU	$Z0,8,$x0		; Xi[14]
			
 
				+|| [B0]	ZERO	$Z1:$Z0
			
 
				+	.endif
			
 
				+	STDW	$Z3:$Z2,*${xip}[0]
			
 
				+|| [B0]	ZERO	$Z3:$Z2
			
 
				+|| [B0]	MV	$xia,$x1
			
 
				+   [B0]	ADDK	14,${xip}
			
 
				+	.endasmfunc
			
 
				+
			
 
				+	.sect	.const
			
 
				+	.cstring "GHASH for C64x+, CRYPTOGAMS by <appro\@openssl.org>"
			
 
				+	.align	4
			
 
				+___
			
 
				+
			
 
				+print $code;
			
 
				+close STDOUT;
			
--- a/crypto/modes/gcm128.c
+++ b/crypto/modes/gcm128.c
@@ -674,6 +674,8 @@ void gcm_ghash_4bit_x86(u64 Xi[2],const u128 Htable[16],const u8 *inp,size_t len
 
				 void gcm_gmult_neon(u64 Xi[2],const u128 Htable[16]);
			
 
				 void gcm_ghash_neon(u64 Xi[2],const u128 Htable[16],const u8 *inp,size_t len);
			
 
				 #  endif
			
 
				+# elif defined(_TMS320C6400_PLUS)
			
 
				+#   define GHASH_ASM_C64Xplus
			
 
				 # endif
			
 
				 #endif
			
 
				 
			
@@ -746,6 +748,10 @@ void CRYPTO_gcm128_init(GCM128_CONTEXT *ctx,void *key,block128_f block)
 
				 		ctx->gmult = gcm_gmult_4bit;
			
 
				 		ctx->ghash = gcm_ghash_4bit;
			
 
				 	}
			
 
				+# elif defined(GHASH_ASM_C64Xplus)
			
 
				+	/* C64x+ assembler doesn't use tables, skip gcm_init_4bit.
			
 
				+	 * This is likely to trigger "function never referenced"
			
 
				+	 * warning and code being eliminated. */
			
 
				 # else
			
 
				 	gcm_init_4bit(ctx->Htable,ctx->H.u);
			
 
				 # endif
			
--- a/crypto/sha/asm/sha1-c64xplus.pl
+++ b/crypto/sha/asm/sha1-c64xplus.pl
@@ -0,0 +1,323 @@
 
				+#!/usr/bin/env perl
			
 
				+#
			
 
				+# ====================================================================
			
 
				+# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
			
 
				+# project. The module is, however, dual licensed under OpenSSL and
			
 
				+# CRYPTOGAMS licenses depending on where you obtain it. For further
			
 
				+# details see http://www.openssl.org/~appro/cryptogams/.
			
 
				+# ====================================================================
			
 
				+#
			
 
				+# SHA1 for C64x+.
			
 
				+#
			
 
				+# November 2011
			
 
				+#
			
 
				+# If compared to compiler-generated code with similar characteristics,
			
 
				+# i.e. compiled with OPENSSL_SMALL_FOOTPRINT and utilizing SPLOOPs,
			
 
				+# this implementation is 25% smaller and >2x faster. In absolute terms
			
 
				+# performance is (quite impressive) ~6.5 cycles per processed byte.
			
 
				+# Fully unrolled assembler would be ~5x larger and is likely to be
			
 
				+# ~15% faster. It would be free from references to intermediate ring
			
 
				+# buffer, but put more pressure on L1P [both because the code would be
			
 
				+# larger and won't be using SPLOOP buffer]. There are no plans to
			
 
				+# realize fully unrolled variant though...
			
 
				+#
			
 
				+# !!! Note that this module uses AMR, which means that all interrupt
			
 
				+# service routines are expected to preserve it and for own well-being
			
 
				+# zero it upon entry.
			
 
				+
			
 
				+while (($output=shift) && ($output!~/\w[\w\-]*\.\w+$/)) {}
			
 
				+open STDOUT,">$output";
			
 
				+
			
 
				+($CTX,$INP,$NUM) = ("A4","B4","A6");		# arguments
			
 
				+
			
 
				+($A,$B,$C,$D,$E, $Arot,$F,$F0,$T,$K) = map("A$_",(16..20, 21..25));
			
 
				+($X0,$X2,$X8,$X13) = ("A26","B26","A27","B27");
			
 
				+($TX0,$TX1,$TX2,$TX3) = map("B$_",(28..31));
			
 
				+($XPA,$XPB) = ("A5","B5");			# X circular buffer
			
 
				+($Actx,$Bctx,$Cctx,$Dctx,$Ectx) = map("A$_",(3,6..9));	# zaps $NUM
			
 
				+
			
 
				+$code=<<___;
			
 
				+	.text
			
 
				+
			
 
				+	.asg	B3,RA
			
 
				+	.asg	A15,FP
			
 
				+	.asg	B15,SP
			
 
				+
			
 
				+	.if	.BIG_ENDIAN
			
 
				+	.asg	MV,SWAP2
			
 
				+	.asg	MV,SWAP4
			
 
				+	.endif
			
 
				+
			
 
				+	.global	_sha1_block_data_order
			
 
				+_sha1_block_data_order:
			
 
				+	.asmfunc stack_usage(64)
			
 
				+	MV	$NUM,A0			; reassign $NUM
			
 
				+||	MVK	-64,B0
			
 
				+  [!A0]	BNOP	RA			; if ($NUM==0) return;
			
 
				+|| [A0]	STW	FP,*SP--[16]		; save frame pointer and alloca(64)
			
 
				+|| [A0]	MV	SP,FP
			
 
				+   [A0]	LDW	*${CTX}[0],$A		; load A-E...
			
 
				+|| [A0]	AND	B0,SP,SP		; align stack at 64 bytes
			
 
				+   [A0]	LDW	*${CTX}[1],$B
			
 
				+|| [A0]	SUBAW	SP,2,SP			; reserve two words above buffer
			
 
				+   [A0]	LDW	*${CTX}[2],$C
			
 
				+|| [A0]	MVK	0x00404,B0
			
 
				+   [A0]	LDW	*${CTX}[3],$D
			
 
				+|| [A0]	MVKH	0x50000,B0		; 0x050404, 64 bytes for $XP[AB]
			
 
				+   [A0]	LDW	*${CTX}[4],$E
			
 
				+|| [A0]	MVC	B0,AMR			; setup circular addressing
			
 
				+	LDNW	*${INP}++,$TX1		; pre-fetch input
			
 
				+	NOP	1
			
 
				+
			
 
				+loop?:
			
 
				+	MVK	0x00007999,$K
			
 
				+||	ADDAW	SP,2,$XPA
			
 
				+||	SUB	A0,1,A0
			
 
				+||	MVK	13,B0
			
 
				+	MVKH	0x5a820000,$K		; K_00_19
			
 
				+||	ADDAW	SP,2,$XPB
			
 
				+||	MV	$A,$Actx
			
 
				+||	MV	$B,$Bctx
			
 
				+;;==================================================
			
 
				+	SPLOOPD	5			; BODY_00_13
			
 
				+||	MV	$C,$Cctx
			
 
				+||	MV	$D,$Dctx
			
 
				+||	MV	$E,$Ectx
			
 
				+||	MVC	B0,ILC
			
 
				+
			
 
				+	ROTL	$A,5,$Arot
			
 
				+||	AND	$C,$B,$F
			
 
				+||	ANDN	$D,$B,$F0
			
 
				+||	ADD	$K,$E,$T		; T=E+K
			
 
				+
			
 
				+	XOR	$F0,$F,$F		; F_00_19(B,C,D)
			
 
				+||	MV	$D,$E			; E=D
			
 
				+||	MV	$C,$D			; D=C
			
 
				+||	SWAP2	$TX1,$TX2
			
 
				+||	LDNW	*${INP}++,$TX1
			
 
				+
			
 
				+	ADD	$F,$T,$T		; T+=F_00_19(B,C,D)
			
 
				+||	ROTL	$B,30,$C		; C=ROL(B,30)
			
 
				+||	SWAP4	$TX2,$TX3		; byte swap
			
 
				+
			
 
				+	ADD	$Arot,$T,$T		; T+=ROL(A,5)
			
 
				+||	MV	$A,$B			; B=A
			
 
				+
			
 
				+	ADD	$TX3,$T,$A		; A=T+Xi
			
 
				+||	STW	$TX3,*${XPB}++
			
 
				+	SPKERNEL
			
 
				+;;==================================================
			
 
				+	ROTL	$A,5,$Arot		; BODY_14
			
 
				+||	AND	$C,$B,$F
			
 
				+||	ANDN	$D,$B,$F0
			
 
				+||	ADD	$K,$E,$T		; T=E+K
			
 
				+
			
 
				+	XOR	$F0,$F,$F		; F_00_19(B,C,D)
			
 
				+||	MV	$D,$E			; E=D
			
 
				+||	MV	$C,$D			; D=C
			
 
				+||	SWAP2	$TX1,$TX2
			
 
				+||	LDNW	*${INP}++,$TX1
			
 
				+
			
 
				+	ADD	$F,$T,$T		; T+=F_00_19(B,C,D)
			
 
				+||	ROTL	$B,30,$C		; C=ROL(B,30)
			
 
				+||	SWAP4	$TX2,$TX2		; byte swap
			
 
				+||	LDW	*${XPA}++,$X0		; fetches from X ring buffer are
			
 
				+||	LDW	*${XPB}[4],$X2		; 2 iterations ahead
			
 
				+
			
 
				+	ADD	$Arot,$T,$T		; T+=ROL(A,5)
			
 
				+||	MV	$A,$B			; B=A
			
 
				+||	LDW	*${XPA}[7],$X8
			
 
				+||	MV	$TX3,$X13		; ||	LDW	*${XPB}[15],$X13
			
 
				+||	MV	$TX2,$TX3
			
 
				+
			
 
				+	ADD	$TX2,$T,$A		; A=T+Xi
			
 
				+||	STW	$TX2,*${XPB}++
			
 
				+;;==================================================
			
 
				+	ROTL	$A,5,$Arot		; BODY_15
			
 
				+||	AND	$C,$B,$F
			
 
				+||	ANDN	$D,$B,$F0
			
 
				+||	ADD	$K,$E,$T		; T=E+K
			
 
				+
			
 
				+	XOR	$F0,$F,$F		; F_00_19(B,C,D)
			
 
				+||	MV	$D,$E			; E=D
			
 
				+||	MV	$C,$D			; D=C
			
 
				+||	SWAP2	$TX1,$TX2
			
 
				+
			
 
				+	ADD	$F,$T,$T		; T+=F_00_19(B,C,D)
			
 
				+||	ROTL	$B,30,$C		; C=ROL(B,30)
			
 
				+||	SWAP4	$TX2,$TX2		; byte swap
			
 
				+||	XOR	$X0,$X2,$TX0		; Xupdate XORs are 1 iteration ahead
			
 
				+||	LDW	*${XPA}++,$X0
			
 
				+||	LDW	*${XPB}[4],$X2
			
 
				+
			
 
				+	ADD	$Arot,$T,$T		; T+=ROL(A,5)
			
 
				+||	MV	$A,$B			; B=A
			
 
				+||	XOR	$X8,$X13,$TX1
			
 
				+||	LDW	*${XPA}[7],$X8
			
 
				+||	MV	$TX3,$X13		; ||	LDW	*${XPB}[15],$X13
			
 
				+||	MV	$TX2,$TX3
			
 
				+
			
 
				+	ADD	$TX2,$T,$A		; A=T+Xi
			
 
				+||	STW	$TX2,*${XPB}++
			
 
				+||	XOR	$TX0,$TX1,$TX1
			
 
				+||	MVK	3,B0
			
 
				+;;==================================================
			
 
				+	SPLOOPD	5			; BODY_16_19
			
 
				+||	MVC	B0,ILC
			
 
				+
			
 
				+	ROTL	$A,5,$Arot
			
 
				+||	AND	$C,$B,$F
			
 
				+||	ANDN	$D,$B,$F0
			
 
				+||	ADD	$K,$E,$T		; T=E+K
			
 
				+||	ROTL	$TX1,1,$TX2		; Xupdate output
			
 
				+
			
 
				+	XOR	$F0,$F,$F		; F_00_19(B,C,D)
			
 
				+||	MV	$D,$E			; E=D
			
 
				+||	MV	$C,$D			; D=C
			
 
				+
			
 
				+	ADD	$F,$T,$T		; T+=F_00_19(B,C,D)
			
 
				+||	ROTL	$B,30,$C		; C=ROL(B,30)
			
 
				+||	XOR	$X0,$X2,$TX0
			
 
				+||	LDW	*${XPA}++,$X0
			
 
				+||	LDW	*${XPB}[4],$X2
			
 
				+
			
 
				+	ADD	$Arot,$T,$T		; T+=ROL(A,5)
			
 
				+||	MV	$A,$B			; B=A
			
 
				+||	XOR	$X8,$X13,$TX1
			
 
				+||	LDW	*${XPA}[7],$X8
			
 
				+||	MV	$TX3,$X13		; ||	LDW	*${XPB}[15],$X13
			
 
				+||	MV	$TX2,$TX3
			
 
				+
			
 
				+	ADD	$TX2,$T,$A		; A=T+Xi
			
 
				+||	STW	$TX2,*${XPB}++
			
 
				+||	XOR	$TX0,$TX1,$TX1
			
 
				+	SPKERNEL
			
 
				+
			
 
				+	MVK	0xffffeba1,$K
			
 
				+||	MVK	19,B0
			
 
				+	MVKH	0x6ed90000,$K		; K_20_39
			
 
				+___
			
 
				+sub BODY_20_39 {
			
 
				+$code.=<<___;
			
 
				+;;==================================================
			
 
				+	SPLOOPD	5			; BODY_20_39
			
 
				+||	MVC	B0,ILC
			
 
				+
			
 
				+	ROTL	$A,5,$Arot
			
 
				+||	XOR	$B,$C,$F
			
 
				+||	ADD	$K,$E,$T		; T=E+K
			
 
				+||	ROTL	$TX1,1,$TX2		; Xupdate output
			
 
				+
			
 
				+	XOR	$D,$F,$F		; F_20_39(B,C,D)
			
 
				+||	MV	$D,$E			; E=D
			
 
				+||	MV	$C,$D			; D=C
			
 
				+
			
 
				+	ADD	$F,$T,$T		; T+=F_20_39(B,C,D)
			
 
				+||	ROTL	$B,30,$C		; C=ROL(B,30)
			
 
				+||	XOR	$X0,$X2,$TX0
			
 
				+||	LDW	*${XPA}++,$X0
			
 
				+||	LDW	*${XPB}[4],$X2
			
 
				+
			
 
				+	ADD	$Arot,$T,$T		; T+=ROL(A,5)
			
 
				+||	MV	$A,$B			; B=A
			
 
				+||	XOR	$X8,$X13,$TX1
			
 
				+||	LDW	*${XPA}[7],$X8
			
 
				+||	MV	$TX3,$X13		; ||	LDW	*${XPB}[15],$X13
			
 
				+||	MV	$TX2,$TX3
			
 
				+
			
 
				+	ADD	$TX2,$T,$A		; A=T+Xi
			
 
				+||	STW	$TX2,*${XPB}++		; last one is redundant
			
 
				+||	XOR	$TX0,$TX1,$TX1
			
 
				+	SPKERNEL
			
 
				+___
			
 
				+$code.=<<___ if (!shift);
			
 
				+	MVK	0xffffbcdc,$K
			
 
				+	MVKH	0x8f1b0000,$K		; K_40_59
			
 
				+___
			
 
				+}	&BODY_20_39();
			
 
				+$code.=<<___;
			
 
				+;;==================================================
			
 
				+	SPLOOPD	5			; BODY_40_59
			
 
				+||	MVC	B0,ILC
			
 
				+||	AND	$B,$C,$F
			
 
				+||	AND	$B,$D,$F0
			
 
				+
			
 
				+	ROTL	$A,5,$Arot
			
 
				+||	XOR	$F0,$F,$F
			
 
				+||	AND	$C,$D,$F0
			
 
				+||	ADD	$K,$E,$T		; T=E+K
			
 
				+||	ROTL	$TX1,1,$TX2		; Xupdate output
			
 
				+
			
 
				+	XOR	$F0,$F,$F		; F_40_59(B,C,D)
			
 
				+||	MV	$D,$E			; E=D
			
 
				+||	MV	$C,$D			; D=C
			
 
				+
			
 
				+	ADD	$F,$T,$T		; T+=F_40_59(B,C,D)
			
 
				+||	ROTL	$B,30,$C		; C=ROL(B,30)
			
 
				+||	XOR	$X0,$X2,$TX0
			
 
				+||	LDW	*${XPA}++,$X0
			
 
				+||	LDW	*${XPB}[4],$X2
			
 
				+
			
 
				+	ADD	$Arot,$T,$T		; T+=ROL(A,5)
			
 
				+||	MV	$A,$B			; B=A
			
 
				+||	XOR	$X8,$X13,$TX1
			
 
				+||	LDW	*${XPA}[7],$X8
			
 
				+||	MV	$TX3,$X13		; ||	LDW	*${XPB}[15],$X13
			
 
				+||	MV	$TX2,$TX3
			
 
				+
			
 
				+	ADD	$TX2,$T,$A		; A=T+Xi
			
 
				+||	STW	$TX2,*${XPB}++
			
 
				+||	XOR	$TX0,$TX1,$TX1
			
 
				+||	AND	$B,$C,$F
			
 
				+||	AND	$B,$D,$F0
			
 
				+	SPKERNEL
			
 
				+
			
 
				+	MVK	0xffffc1d6,$K
			
 
				+||	MVK	18,B0
			
 
				+	MVKH	0xca620000,$K		; K_60_79
			
 
				+___
			
 
				+	&BODY_20_39(-1);		# BODY_60_78
			
 
				+$code.=<<___;
			
 
				+;;==================================================
			
 
				+   [A0]	B	loop?
			
 
				+||	ROTL	$A,5,$Arot		; BODY_79
			
 
				+||	XOR	$B,$C,$F
			
 
				+||	ROTL	$TX1,1,$TX2		; Xupdate output
			
 
				+
			
 
				+   [A0]	LDNW	*${INP}++,$TX1		; pre-fetch input
			
 
				+||	ADD	$K,$E,$T		; T=E+K
			
 
				+||	XOR	$D,$F,$F		; F_20_39(B,C,D)
			
 
				+
			
 
				+	ADD	$F,$T,$T		; T+=F_20_39(B,C,D)
			
 
				+||	ADD	$Ectx,$D,$E		; E=D,E+=Ectx
			
 
				+||	ADD	$Dctx,$C,$D		; D=C,D+=Dctx
			
 
				+||	ROTL	$B,30,$C		; C=ROL(B,30)
			
 
				+
			
 
				+	ADD	$Arot,$T,$T		; T+=ROL(A,5)
			
 
				+||	ADD	$Bctx,$A,$B		; B=A,B+=Bctx
			
 
				+
			
 
				+	ADD	$TX2,$T,$A		; A=T+Xi
			
 
				+
			
 
				+	ADD	$Actx,$A,$A		; A+=Actx
			
 
				+||	ADD	$Cctx,$C,$C		; C+=Cctx
			
 
				+;; end of loop?
			
 
				+
			
 
				+	BNOP	RA			; return
			
 
				+||	MV	FP,SP			; restore stack pointer
			
 
				+||	LDW	*FP[0],FP		; restore frame pointer
			
 
				+	STW	$A,*${CTX}[0]		; emit A-E...
			
 
				+||	MVK	0,B0
			
 
				+	STW	$B,*${CTX}[1]
			
 
				+||	MVC	B0,AMR			; clear AMR
			
 
				+	STW	$C,*${CTX}[2]
			
 
				+	STW	$D,*${CTX}[3]
			
 
				+	STW	$E,*${CTX}[4]
			
 
				+	.endasmfunc
			
 
				+
			
 
				+	.sect	.const
			
 
				+	.cstring "SHA1 block transform for C64x+, CRYPTOGAMS by <appro\@openssl.org>"
			
 
				+	.align	4
			
 
				+___
			
 
				+
			
 
				+print $code;
			
 
				+close STDOUT;
			
--- a/crypto/sha/asm/sha256-c64xplus.pl
+++ b/crypto/sha/asm/sha256-c64xplus.pl
@@ -0,0 +1,292 @@
 
				+#!/usr/bin/env perl
			
 
				+#
			
 
				+# ====================================================================
			
 
				+# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
			
 
				+# project. The module is, however, dual licensed under OpenSSL and
			
 
				+# CRYPTOGAMS licenses depending on where you obtain it. For further
			
 
				+# details see http://www.openssl.org/~appro/cryptogams/.
			
 
				+# ====================================================================
			
 
				+#
			
 
				+# SHA256 for C64x+.
			
 
				+#
			
 
				+# January 2012
			
 
				+#
			
 
				+# Performance is just below 10 cycles per processed byte, which is
			
 
				+# almost 40% faster than compiler-generated code. Unroll is unlikely
			
 
				+# to give more than ~8% improvement...
			
 
				+#
			
 
				+# !!! Note that this module uses AMR, which means that all interrupt
			
 
				+# service routines are expected to preserve it and for own well-being
			
 
				+# zero it upon entry.
			
 
				+
			
 
				+while (($output=shift) && ($output!~/\w[\w\-]*\.\w+$/)) {}
			
 
				+open STDOUT,">$output";
			
 
				+
			
 
				+($CTXA,$INP,$NUM) = ("A4","B4","A6");            # arguments
			
 
				+ $K256="A3";
			
 
				+
			
 
				+($A,$Actx,$B,$Bctx,$C,$Cctx,$D,$Dctx,$T2,$S0,$s1,$t0a,$t1a,$t2a,$X9,$X14)
			
 
				+	=map("A$_",(16..31));
			
 
				+($E,$Ectx,$F,$Fctx,$G,$Gctx,$H,$Hctx,$T1,$S1,$s0,$t0e,$t1e,$t2e,$X1,$X15)
			
 
				+	=map("B$_",(16..31));
			
 
				+
			
 
				+($Xia,$Xib)=("A5","B5");			# circular/ring buffer
			
 
				+ $CTXB=$t2e;
			
 
				+
			
 
				+($Xn,$X0,$K)=("B7","B8","B9");
			
 
				+($Maj,$Ch)=($T2,"B6");
			
 
				+
			
 
				+$code.=<<___;
			
 
				+	.text
			
 
				+
			
 
				+	.asg	B3,RA
			
 
				+	.asg	A15,FP
			
 
				+	.asg	B15,SP
			
 
				+
			
 
				+	.if	.BIG_ENDIAN
			
 
				+	.asg	SWAP2,MV
			
 
				+	.asg	SWAP4,MV
			
 
				+	.endif
			
 
				+
			
 
				+	.global	_sha256_block_data_order
			
 
				+_sha256_block_data_order:
			
 
				+	.asmfunc stack_usage(64)
			
 
				+	MV	$NUM,A0				; reassign $NUM
			
 
				+||	MVK	-64,B0
			
 
				+  [!A0]	BNOP	RA				; if ($NUM==0) return;
			
 
				+|| [A0]	STW	FP,*SP--[16]			; save frame pointer and alloca(64)
			
 
				+|| [A0]	MV	SP,FP
			
 
				+   [A0]	ADDKPC	_sha256_block_data_order,B2
			
 
				+|| [A0]	AND	B0,SP,SP			; align stack at 64 bytes
			
 
				+   [A0]	MVK	0x00404,B1
			
 
				+|| [A0]	MVKL	(K256-_sha256_block_data_order),$K256
			
 
				+   [A0]	MVKH	0x50000,B1
			
 
				+|| [A0]	MVKH	(K256-_sha256_block_data_order),$K256
			
 
				+   [A0]	MVC	B1,AMR				; setup circular addressing
			
 
				+|| [A0]	MV	SP,$Xia
			
 
				+   [A0]	MV	SP,$Xib
			
 
				+|| [A0]	ADD	B2,$K256,$K256
			
 
				+|| [A0]	MV	$CTXA,$CTXB
			
 
				+|| [A0]	SUBAW	SP,2,SP				; reserve two words above buffer
			
 
				+	LDW	*${CTXA}[0],$A			; load ctx
			
 
				+||	LDW	*${CTXB}[4],$E
			
 
				+	LDW	*${CTXA}[1],$B
			
 
				+||	LDW	*${CTXB}[5],$F
			
 
				+	LDW	*${CTXA}[2],$C
			
 
				+||	LDW	*${CTXB}[6],$G
			
 
				+	LDW	*${CTXA}[3],$D
			
 
				+||	LDW	*${CTXB}[7],$H
			
 
				+
			
 
				+	LDNW	*$INP++,$Xn			; pre-fetch input
			
 
				+	LDW	*$K256++,$K			; pre-fetch K256[0]
			
 
				+	MVK	14,B0				; loop counters
			
 
				+	MVK	47,B1
			
 
				+||	ADDAW	$Xia,9,$Xia
			
 
				+outerloop?:
			
 
				+	SUB	A0,1,A0
			
 
				+||	MV	$A,$Actx
			
 
				+||	MV	$E,$Ectx
			
 
				+||	MVD	$B,$Bctx
			
 
				+||	MVD	$F,$Fctx
			
 
				+	MV	$C,$Cctx
			
 
				+||	MV	$G,$Gctx
			
 
				+||	MVD	$D,$Dctx
			
 
				+||	MVD	$H,$Hctx
			
 
				+||	SWAP4	$Xn,$X0
			
 
				+
			
 
				+	SPLOOPD	8				; BODY_00_14
			
 
				+||	MVC	B0,ILC
			
 
				+||	SWAP2	$X0,$X0
			
 
				+
			
 
				+	LDNW	*$INP++,$Xn
			
 
				+||	ROTL	$A,30,$S0
			
 
				+||	OR	$A,$B,$Maj
			
 
				+||	AND	$A,$B,$t2a
			
 
				+||	ROTL	$E,26,$S1
			
 
				+||	AND	$F,$E,$Ch
			
 
				+||	ANDN	$G,$E,$t2e
			
 
				+	ROTL	$A,19,$t0a
			
 
				+||	AND	$C,$Maj,$Maj
			
 
				+||	ROTL	$E,21,$t0e
			
 
				+||	XOR	$t2e,$Ch,$Ch			; Ch(e,f,g) = (e&f)^(~e&g)
			
 
				+	ROTL	$A,10,$t1a
			
 
				+||	OR	$t2a,$Maj,$Maj			; Maj(a,b,c) = ((a|b)&c)|(a&b)
			
 
				+||	ROTL	$E,7,$t1e
			
 
				+||	ADD	$K,$H,$T1			; T1 = h + K256[i]
			
 
				+	ADD	$X0,$T1,$T1			; T1 += X[i];
			
 
				+||	STW	$X0,*$Xib++
			
 
				+||	XOR	$t0a,$S0,$S0
			
 
				+||	XOR	$t0e,$S1,$S1
			
 
				+	XOR	$t1a,$S0,$S0			; Sigma0(a)
			
 
				+||	XOR	$t1e,$S1,$S1			; Sigma1(e)
			
 
				+||	LDW	*$K256++,$K			; pre-fetch K256[i+1]
			
 
				+||	ADD	$Ch,$T1,$T1			; T1 += Ch(e,f,g)
			
 
				+	ADD	$S1,$T1,$T1			; T1 += Sigma1(e)
			
 
				+||	ADD	$S0,$Maj,$T2			; T2 = Sigma0(a) + Maj(a,b,c)
			
 
				+||	ROTL	$G,0,$H				; h = g
			
 
				+||	MV	$F,$G				; g = f
			
 
				+||	MV	$X0,$X14
			
 
				+||	SWAP4	$Xn,$X0
			
 
				+	SWAP2	$X0,$X0
			
 
				+||	MV	$E,$F				; f = e
			
 
				+||	ADD	$D,$T1,$E			; e = d + T1
			
 
				+||	MV	$C,$D				; d = c
			
 
				+	MV	$B,$C				; c = b
			
 
				+||	MV	$A,$B				; b = a
			
 
				+||	ADD	$T1,$T2,$A			; a = T1 + T2
			
 
				+	SPKERNEL
			
 
				+
			
 
				+	ROTL	$A,30,$S0			; BODY_15
			
 
				+||	OR	$A,$B,$Maj
			
 
				+||	AND	$A,$B,$t2a
			
 
				+||	ROTL	$E,26,$S1
			
 
				+||	AND	$F,$E,$Ch
			
 
				+||	ANDN	$G,$E,$t2e
			
 
				+||	LDW	*${Xib}[1],$Xn			; modulo-scheduled
			
 
				+	ROTL	$A,19,$t0a
			
 
				+||	AND	$C,$Maj,$Maj
			
 
				+||	ROTL	$E,21,$t0e
			
 
				+||	XOR	$t2e,$Ch,$Ch			; Ch(e,f,g) = (e&f)^(~e&g)
			
 
				+||	LDW	*${Xib}[2],$X1			; modulo-scheduled
			
 
				+	ROTL	$A,10,$t1a
			
 
				+||	OR	$t2a,$Maj,$Maj			; Maj(a,b,c) = ((a|b)&c)|(a&b)
			
 
				+||	ROTL	$E,7,$t1e
			
 
				+||	ADD	$K,$H,$T1			; T1 = h + K256[i]
			
 
				+	ADD	$X0,$T1,$T1			; T1 += X[i];
			
 
				+||	STW	$X0,*$Xib++
			
 
				+||	XOR	$t0a,$S0,$S0
			
 
				+||	XOR	$t0e,$S1,$S1
			
 
				+	XOR	$t1a,$S0,$S0			; Sigma0(a)
			
 
				+||	XOR	$t1e,$S1,$S1			; Sigma1(e)
			
 
				+||	LDW	*$K256++,$K			; pre-fetch K256[i+1]
			
 
				+||	ADD	$Ch,$T1,$T1			; T1 += Ch(e,f,g)
			
 
				+	ADD	$S1,$T1,$T1			; T1 += Sigma1(e)
			
 
				+||	ADD	$S0,$Maj,$T2			; T2 = Sigma0(a) + Maj(a,b,c)
			
 
				+||	ROTL	$G,0,$H				; h = g
			
 
				+||	MV	$F,$G				; g = f
			
 
				+||	MV	$X0,$X15
			
 
				+	MV	$E,$F				; f = e
			
 
				+||	ADD	$D,$T1,$E			; e = d + T1
			
 
				+||	MV	$C,$D				; d = c
			
 
				+||	MV	$Xn,$X0				; modulo-scheduled
			
 
				+||	LDW	*$Xia,$X9			; modulo-scheduled
			
 
				+||	ROTL	$X1,25,$t0e			; modulo-scheduled
			
 
				+||	ROTL	$X14,15,$t0a			; modulo-scheduled
			
 
				+	SHRU	$X1,3,$s0			; modulo-scheduled
			
 
				+||	SHRU	$X14,10,$s1			; modulo-scheduled
			
 
				+||	ROTL	$B,0,$C				; c = b
			
 
				+||	MV	$A,$B				; b = a
			
 
				+||	ADD	$T1,$T2,$A			; a = T1 + T2
			
 
				+
			
 
				+	SPLOOPD	10				; BODY_16_63
			
 
				+||	MVC	B1,ILC
			
 
				+||	ROTL	$X1,14,$t1e			; modulo-scheduled
			
 
				+||	ROTL	$X14,13,$t1a			; modulo-scheduled
			
 
				+
			
 
				+	XOR	$t0e,$s0,$s0
			
 
				+||	XOR	$t0a,$s1,$s1
			
 
				+||	MV	$X15,$X14
			
 
				+||	MV	$X1,$Xn
			
 
				+	XOR	$t1e,$s0,$s0			; sigma0(X[i+1])
			
 
				+||	XOR	$t1a,$s1,$s1			; sigma1(X[i+14])
			
 
				+||	LDW	*${Xib}[2],$X1			; module-scheduled
			
 
				+	ROTL	$A,30,$S0
			
 
				+||	OR	$A,$B,$Maj
			
 
				+||	AND	$A,$B,$t2a
			
 
				+||	ROTL	$E,26,$S1
			
 
				+||	AND	$F,$E,$Ch
			
 
				+||	ANDN	$G,$E,$t2e
			
 
				+||	ADD	$X9,$X0,$X0			; X[i] += X[i+9]
			
 
				+	ROTL	$A,19,$t0a
			
 
				+||	AND	$C,$Maj,$Maj
			
 
				+||	ROTL	$E,21,$t0e
			
 
				+||	XOR	$t2e,$Ch,$Ch			; Ch(e,f,g) = (e&f)^(~e&g)
			
 
				+||	ADD	$s0,$X0,$X0			; X[i] += sigma1(X[i+1])
			
 
				+	ROTL	$A,10,$t1a
			
 
				+||	OR	$t2a,$Maj,$Maj			; Maj(a,b,c) = ((a|b)&c)|(a&b)
			
 
				+||	ROTL	$E,7,$t1e
			
 
				+||	ADD	$H,$K,$T1			; T1 = h + K256[i]
			
 
				+||	ADD	$s1,$X0,$X0			; X[i] += sigma1(X[i+14])
			
 
				+	XOR	$t0a,$S0,$S0
			
 
				+||	XOR	$t0e,$S1,$S1
			
 
				+||	ADD	$X0,$T1,$T1			; T1 += X[i]
			
 
				+||	STW	$X0,*$Xib++
			
 
				+	XOR	$t1a,$S0,$S0			; Sigma0(a)
			
 
				+||	XOR	$t1e,$S1,$S1			; Sigma1(e)
			
 
				+||	ADD	$Ch,$T1,$T1			; T1 += Ch(e,f,g)
			
 
				+||	MV	$X0,$X15
			
 
				+||	ROTL	$G,0,$H				; h = g
			
 
				+||	LDW	*$K256++,$K			; pre-fetch K256[i+1]
			
 
				+	ADD	$S1,$T1,$T1			; T1 += Sigma1(e)
			
 
				+||	ADD	$S0,$Maj,$T2			; T2 = Sigma0(a) + Maj(a,b,c)
			
 
				+||	MV	$F,$G				; g = f
			
 
				+||	MV	$Xn,$X0				; modulo-scheduled
			
 
				+||	LDW	*++$Xia,$X9			; modulo-scheduled
			
 
				+||	ROTL	$X1,25,$t0e			; module-scheduled
			
 
				+||	ROTL	$X14,15,$t0a			; modulo-scheduled
			
 
				+	ROTL	$X1,14,$t1e			; modulo-scheduled
			
 
				+||	ROTL	$X14,13,$t1a			; modulo-scheduled
			
 
				+||	MV	$E,$F				; f = e
			
 
				+||	ADD	$D,$T1,$E			; e = d + T1
			
 
				+||	MV	$C,$D				; d = c
			
 
				+||	MV	$B,$C				; c = b
			
 
				+	MV	$A,$B				; b = a
			
 
				+||	ADD	$T1,$T2,$A			; a = T1 + T2
			
 
				+||	SHRU	$X1,3,$s0			; modulo-scheduled
			
 
				+||	SHRU	$X14,10,$s1			; modulo-scheduled
			
 
				+	SPKERNEL
			
 
				+
			
 
				+   [A0]	B	outerloop?
			
 
				+|| [A0]	LDNW	*$INP++,$Xn			; pre-fetch input
			
 
				+|| [A0]	ADDK	-260,$K256			; rewind K256
			
 
				+||	ADD	$Actx,$A,$A			; accumulate ctx
			
 
				+||	ADD	$Ectx,$E,$E
			
 
				+||	ADD	$Bctx,$B,$B
			
 
				+	ADD	$Fctx,$F,$F
			
 
				+||	ADD	$Cctx,$C,$C
			
 
				+||	ADD	$Gctx,$G,$G
			
 
				+||	ADD	$Dctx,$D,$D
			
 
				+||	ADD	$Hctx,$H,$H
			
 
				+|| [A0]	LDW	*$K256++,$K			; pre-fetch K256[0]
			
 
				+
			
 
				+  [!A0]	BNOP	RA
			
 
				+||[!A0]	MV	$CTXA,$CTXB
			
 
				+  [!A0]	MV	FP,SP				; restore stack pointer
			
 
				+||[!A0]	LDW	*FP[0],FP			; restore frame pointer
			
 
				+  [!A0]	STW	$A,*${CTXA}[0]  		; save ctx
			
 
				+||[!A0]	STW	$E,*${CTXB}[4]
			
 
				+||[!A0]	MVK	0,B0
			
 
				+  [!A0]	STW	$B,*${CTXA}[1]
			
 
				+||[!A0]	STW	$F,*${CTXB}[5]
			
 
				+||[!A0]	MVC	B0,AMR				; clear AMR
			
 
				+	STW	$C,*${CTXA}[2]
			
 
				+||	STW	$G,*${CTXB}[6]
			
 
				+	STW	$D,*${CTXA}[3]
			
 
				+||	STW	$H,*${CTXB}[7]
			
 
				+	.endasmfunc
			
 
				+
			
 
				+	.sect	".const:sha_asm"
			
 
				+	.align	128
			
 
				+K256:
			
 
				+	.uword	0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5
			
 
				+	.uword	0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5
			
 
				+	.uword	0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3
			
 
				+	.uword	0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174
			
 
				+	.uword	0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc
			
 
				+	.uword	0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da
			
 
				+	.uword	0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7
			
 
				+	.uword	0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967
			
 
				+	.uword	0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13
			
 
				+	.uword	0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85
			
 
				+	.uword	0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3
			
 
				+	.uword	0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070
			
 
				+	.uword	0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5
			
 
				+	.uword	0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3
			
 
				+	.uword	0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208
			
 
				+	.uword	0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2
			
 
				+	.cstring "SHA256 block transform for C64x+, CRYPTOGAMS by <appro\@openssl.org>"
			
 
				+	.align	4
			
 
				+
			
 
				+___
			
 
				+
			
 
				+print $code;
			
--- a/crypto/sha/asm/sha512-c64xplus.pl
+++ b/crypto/sha/asm/sha512-c64xplus.pl
@@ -0,0 +1,410 @@
 
				+#!/usr/bin/env perl
			
 
				+#
			
 
				+# ====================================================================
			
 
				+# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
			
 
				+# project. The module is, however, dual licensed under OpenSSL and
			
 
				+# CRYPTOGAMS licenses depending on where you obtain it. For further
			
 
				+# details see http://www.openssl.org/~appro/cryptogams/.
			
 
				+# ====================================================================
			
 
				+#
			
 
				+# SHA512 for C64x+.
			
 
				+#
			
 
				+# January 2012
			
 
				+#
			
 
				+# Performance is 19 cycles per processed byte. Compared to block
			
 
				+# transform function from sha512.c compiled with cl6x with -mv6400+
			
 
				+# -o2 -DOPENSSL_SMALL_FOOTPRINT it's almost 7x faster and 2x smaller.
			
 
				+# Loop unroll won't make it, this implementation, any faster, because
			
 
				+# it's effectively dominated by SHRU||SHL pairs and you can't schedule
			
 
				+# more of them.
			
 
				+#
			
 
				+# !!! Note that this module uses AMR, which means that all interrupt
			
 
				+# service routines are expected to preserve it and for own well-being
			
 
				+# zero it upon entry.
			
 
				+
			
 
				+while (($output=shift) && ($output!~/\w[\w\-]*\.\w+$/)) {}
			
 
				+open STDOUT,">$output";
			
 
				+
			
 
				+($CTXA,$INP,$NUM) = ("A4","B4","A6");            # arguments
			
 
				+ $K512="A3";
			
 
				+
			
 
				+($Ahi,$Actxhi,$Bhi,$Bctxhi,$Chi,$Cctxhi,$Dhi,$Dctxhi,
			
 
				+ $Ehi,$Ectxhi,$Fhi,$Fctxhi,$Ghi,$Gctxhi,$Hhi,$Hctxhi)=map("A$_",(16..31));
			
 
				+($Alo,$Actxlo,$Blo,$Bctxlo,$Clo,$Cctxlo,$Dlo,$Dctxlo,
			
 
				+ $Elo,$Ectxlo,$Flo,$Fctxlo,$Glo,$Gctxlo,$Hlo,$Hctxlo)=map("B$_",(16..31));
			
 
				+
			
 
				+($S1hi,$CHhi,$S0hi,$t0hi)=map("A$_",(10..13));
			
 
				+($S1lo,$CHlo,$S0lo,$t0lo)=map("B$_",(10..13));
			
 
				+($T1hi,         $T2hi)=         ("A6","A7");
			
 
				+($T1lo,$T1carry,$T2lo,$T2carry)=("B6","B7","B8","B9");
			
 
				+($Khi,$Klo)=("A9","A8");
			
 
				+($MAJhi,$MAJlo)=($T2hi,$T2lo);
			
 
				+($t1hi,$t1lo)=($Khi,"B2");
			
 
				+ $CTXB=$t1lo;
			
 
				+
			
 
				+($Xihi,$Xilo)=("A5","B5");			# circular/ring buffer
			
 
				+
			
 
				+$code.=<<___;
			
 
				+	.text
			
 
				+
			
 
				+	.asg	B3,RA
			
 
				+	.asg	A15,FP
			
 
				+	.asg	B15,SP
			
 
				+
			
 
				+	.if	.BIG_ENDIAN
			
 
				+	.asg	$Khi,KHI
			
 
				+	.asg	$Klo,KLO
			
 
				+	.else
			
 
				+	.asg	$Khi,KLO
			
 
				+	.asg	$Klo,KHI
			
 
				+	.endif
			
 
				+
			
 
				+	.global	_sha512_block_data_order
			
 
				+_sha512_block_data_order:
			
 
				+	.asmfunc stack_usage(40+128)
			
 
				+	MV	$NUM,A0				; reassign $NUM
			
 
				+||	MVK	-128,B0
			
 
				+  [!A0]	BNOP	RA				; if ($NUM==0) return;
			
 
				+|| [A0]	STW	FP,*SP--(40)			; save frame pointer
			
 
				+|| [A0]	MV	SP,FP
			
 
				+   [A0]	STDW	B13:B12,*SP[4]
			
 
				+|| [A0]	MVK	0x00404,B1
			
 
				+   [A0]	STDW	B11:B10,*SP[3]
			
 
				+|| [A0]	STDW	A13:A12,*FP[-3]
			
 
				+|| [A0]	MVKH	0x60000,B1
			
 
				+   [A0]	STDW	A11:A10,*SP[1]
			
 
				+|| [A0]	MVC	B1,AMR				; setup circular addressing
			
 
				+|| [A0]	ADD	B0,SP,SP			; alloca(128)
			
 
				+   [A0]	AND	B0,SP,SP			; align stack at 128 bytes
			
 
				+|| [A0]	ADDKPC	_sha512_block_data_order,B1
			
 
				+|| [A0]	MVKL	(K512-_sha512_block_data_order),$K512
			
 
				+   [A0]	MVKH	(K512-_sha512_block_data_order),$K512
			
 
				+|| [A0]	SUBAW	SP,2,SP				; reserve two words above buffer
			
 
				+	ADDAW	SP,3,$Xilo
			
 
				+	ADDAW	SP,2,$Xihi
			
 
				+
			
 
				+||	MV	$CTXA,$CTXB
			
 
				+	LDW	*${CTXA}[0^.LITTLE_ENDIAN],$Ahi	; load ctx
			
 
				+||	LDW	*${CTXB}[1^.LITTLE_ENDIAN],$Alo
			
 
				+||	ADD	B1,$K512,$K512
			
 
				+	LDW	*${CTXA}[2^.LITTLE_ENDIAN],$Bhi
			
 
				+||	LDW	*${CTXB}[3^.LITTLE_ENDIAN],$Blo
			
 
				+	LDW	*${CTXA}[4^.LITTLE_ENDIAN],$Chi
			
 
				+||	LDW	*${CTXB}[5^.LITTLE_ENDIAN],$Clo
			
 
				+	LDW	*${CTXA}[6^.LITTLE_ENDIAN],$Dhi
			
 
				+||	LDW	*${CTXB}[7^.LITTLE_ENDIAN],$Dlo
			
 
				+	LDW	*${CTXA}[8^.LITTLE_ENDIAN],$Ehi
			
 
				+||	LDW	*${CTXB}[9^.LITTLE_ENDIAN],$Elo
			
 
				+	LDW	*${CTXA}[10^.LITTLE_ENDIAN],$Fhi
			
 
				+||	LDW	*${CTXB}[11^.LITTLE_ENDIAN],$Flo
			
 
				+	LDW	*${CTXA}[12^.LITTLE_ENDIAN],$Ghi
			
 
				+||	LDW	*${CTXB}[13^.LITTLE_ENDIAN],$Glo
			
 
				+	LDW	*${CTXA}[14^.LITTLE_ENDIAN],$Hhi
			
 
				+||	LDW	*${CTXB}[15^.LITTLE_ENDIAN],$Hlo
			
 
				+
			
 
				+	LDNDW	*$INP++,B11:B10			; pre-fetch input
			
 
				+	LDDW	*$K512++,$Khi:$Klo		; pre-fetch K512[0]
			
 
				+outerloop?:
			
 
				+	MVK	15,B0				; loop counters
			
 
				+||	MVK	64,B1
			
 
				+||	SUB	A0,1,A0
			
 
				+	MV	$Ahi,$Actxhi
			
 
				+||	MV	$Alo,$Actxlo
			
 
				+||	MV	$Bhi,$Bctxhi
			
 
				+||	MV	$Blo,$Bctxlo
			
 
				+||	MV	$Chi,$Cctxhi
			
 
				+||	MV	$Clo,$Cctxlo
			
 
				+||	MVD	$Dhi,$Dctxhi
			
 
				+||	MVD	$Dlo,$Dctxlo
			
 
				+	MV	$Ehi,$Ectxhi
			
 
				+||	MV	$Elo,$Ectxlo
			
 
				+||	MV	$Fhi,$Fctxhi
			
 
				+||	MV	$Flo,$Fctxlo
			
 
				+||	MV	$Ghi,$Gctxhi
			
 
				+||	MV	$Glo,$Gctxlo
			
 
				+||	MVD	$Hhi,$Hctxhi
			
 
				+||	MVD	$Hlo,$Hctxlo
			
 
				+loop0_15?:
			
 
				+	.if	.BIG_ENDIAN
			
 
				+	MV	B11,$T1hi
			
 
				+||	MV	B10,$T1lo
			
 
				+	.else
			
 
				+	SWAP4	B10,$T1hi
			
 
				+||	SWAP4	B11,$T1lo
			
 
				+	SWAP2	$T1hi,$T1hi
			
 
				+||	SWAP2	$T1lo,$T1lo
			
 
				+	.endif
			
 
				+loop16_79?:
			
 
				+	STW	$T1hi,*$Xihi++[2]
			
 
				+||	STW	$T1lo,*$Xilo++[2]			; X[i] = T1
			
 
				+||	ADD	$Hhi,$T1hi,$T1hi
			
 
				+||	ADDU	$Hlo,$T1lo,$T1carry:$T1lo		; T1 += h
			
 
				+||	SHRU	$Ehi,14,$S1hi
			
 
				+||	SHL	$Ehi,32-14,$S1lo
			
 
				+	XOR	$Fhi,$Ghi,$CHhi
			
 
				+||	XOR	$Flo,$Glo,$CHlo
			
 
				+||	ADD	KHI,$T1hi,$T1hi
			
 
				+||	ADDU	KLO,$T1carry:$T1lo,$T1carry:$T1lo	; T1 += K512[i]
			
 
				+||	SHRU	$Elo,14,$t0lo
			
 
				+||	SHL	$Elo,32-14,$t0hi
			
 
				+	XOR	$t0hi,$S1hi,$S1hi
			
 
				+||	XOR	$t0lo,$S1lo,$S1lo
			
 
				+||	AND	$Ehi,$CHhi,$CHhi
			
 
				+||	AND	$Elo,$CHlo,$CHlo
			
 
				+||	ROTL	$Ghi,0,$Hhi
			
 
				+||	ROTL	$Glo,0,$Hlo				; h = g
			
 
				+||	SHRU	$Ehi,18,$t0hi
			
 
				+||	SHL	$Ehi,32-18,$t0lo
			
 
				+	XOR	$t0hi,$S1hi,$S1hi
			
 
				+||	XOR	$t0lo,$S1lo,$S1lo
			
 
				+||	XOR	$Ghi,$CHhi,$CHhi
			
 
				+||	XOR	$Glo,$CHlo,$CHlo			; Ch(e,f,g) = ((f^g)&e)^g
			
 
				+||	ROTL	$Fhi,0,$Ghi
			
 
				+||	ROTL	$Flo,0,$Glo				; g = f
			
 
				+||	SHRU	$Elo,18,$t0lo
			
 
				+||	SHL	$Elo,32-18,$t0hi
			
 
				+	XOR	$t0hi,$S1hi,$S1hi
			
 
				+||	XOR	$t0lo,$S1lo,$S1lo
			
 
				+||	OR	$Ahi,$Bhi,$MAJhi
			
 
				+||	OR	$Alo,$Blo,$MAJlo
			
 
				+||	ROTL	$Ehi,0,$Fhi
			
 
				+||	ROTL	$Elo,0,$Flo				; f = e
			
 
				+||	SHRU	$Ehi,41-32,$t0lo
			
 
				+||	SHL	$Ehi,64-41,$t0hi
			
 
				+	XOR	$t0hi,$S1hi,$S1hi
			
 
				+||	XOR	$t0lo,$S1lo,$S1lo
			
 
				+||	AND	$Chi,$MAJhi,$MAJhi
			
 
				+||	AND	$Clo,$MAJlo,$MAJlo
			
 
				+||	ROTL	$Dhi,0,$Ehi
			
 
				+||	ROTL	$Dlo,0,$Elo				; e = d
			
 
				+||	SHRU	$Elo,41-32,$t0hi
			
 
				+||	SHL	$Elo,64-41,$t0lo
			
 
				+	XOR	$t0hi,$S1hi,$S1hi
			
 
				+||	XOR	$t0lo,$S1lo,$S1lo			; Sigma1(e)
			
 
				+||	AND	$Ahi,$Bhi,$t1hi
			
 
				+||	AND	$Alo,$Blo,$t1lo
			
 
				+||	ROTL	$Chi,0,$Dhi
			
 
				+||	ROTL	$Clo,0,$Dlo				; d = c
			
 
				+||	SHRU	$Ahi,28,$S0hi
			
 
				+||	SHL	$Ahi,32-28,$S0lo
			
 
				+	OR	$t1hi,$MAJhi,$MAJhi
			
 
				+||	OR	$t1lo,$MAJlo,$MAJlo			; Maj(a,b,c) = ((a|b)&c)|(a&b)
			
 
				+||	ADD	$CHhi,$T1hi,$T1hi
			
 
				+||	ADDU	$CHlo,$T1carry:$T1lo,$T1carry:$T1lo	; T1 += Ch(e,f,g)
			
 
				+||	ROTL	$Bhi,0,$Chi
			
 
				+||	ROTL	$Blo,0,$Clo				; c = b
			
 
				+||	SHRU	$Alo,28,$t0lo
			
 
				+||	SHL	$Alo,32-28,$t0hi
			
 
				+	XOR	$t0hi,$S0hi,$S0hi
			
 
				+||	XOR	$t0lo,$S0lo,$S0lo
			
 
				+||	ADD	$S1hi,$T1hi,$T1hi
			
 
				+||	ADDU	$S1lo,$T1carry:$T1lo,$T1carry:$T1lo	; T1 += Sigma1(e)
			
 
				+||	ROTL	$Ahi,0,$Bhi
			
 
				+||	ROTL	$Alo,0,$Blo				; b = a
			
 
				+||	SHRU	$Ahi,34-32,$t0lo
			
 
				+||	SHL	$Ahi,64-34,$t0hi
			
 
				+	XOR	$t0hi,$S0hi,$S0hi
			
 
				+||	XOR	$t0lo,$S0lo,$S0lo
			
 
				+||	ADD	$MAJhi,$T1hi,$T2hi
			
 
				+||	ADDU	$MAJlo,$T1carry:$T1lo,$T2carry:$T2lo	; T2 = T1+Maj(a,b,c)
			
 
				+||	SHRU	$Alo,34-32,$t0hi
			
 
				+||	SHL	$Alo,64-34,$t0lo
			
 
				+	XOR	$t0hi,$S0hi,$S0hi
			
 
				+||	XOR	$t0lo,$S0lo,$S0lo
			
 
				+||	ADD	$Ehi,$T1hi,$T1hi
			
 
				+||	ADDU	$Elo,$T1carry:$T1lo,$T1carry:$T1lo	; T1 += e
			
 
				+|| [B0]	BNOP	loop0_15?
			
 
				+||	SHRU	$Ahi,39-32,$t0lo
			
 
				+||	SHL	$Ahi,64-39,$t0hi
			
 
				+	XOR	$t0hi,$S0hi,$S0hi
			
 
				+||	XOR	$t0lo,$S0lo,$S0lo
			
 
				+|| [B0]	LDNDW	*$INP++,B11:B10				; pre-fetch input
			
 
				+||[!B1]	BNOP	break?
			
 
				+||	SHRU	$Alo,39-32,$t0hi
			
 
				+||	SHL	$Alo,64-39,$t0lo
			
 
				+	XOR	$t0hi,$S0hi,$S0hi
			
 
				+||	XOR	$t0lo,$S0lo,$S0lo			; Sigma0(a)
			
 
				+||	ADD	$T1carry,$T1hi,$Ehi
			
 
				+||	MV	$T1lo,$Elo				; e = T1
			
 
				+||[!B0]	LDW	*${Xihi}[28],$T1hi
			
 
				+||[!B0]	LDW	*${Xilo}[28],$T1lo			; X[i+14]
			
 
				+	ADD	$S0hi,$T2hi,$T2hi
			
 
				+||	ADDU	$S0lo,$T2carry:$T2lo,$T2carry:$T2lo	; T2 += Sigma0(a)
			
 
				+|| [B1]	LDDW	*$K512++,$Khi:$Klo			; pre-fetch K512[i]
			
 
				+	NOP						; avoid cross-path stall
			
 
				+	ADD	$T2carry,$T2hi,$Ahi
			
 
				+||	MV	$T2lo,$Alo				; a = T2
			
 
				+|| [B0]	SUB	B0,1,B0
			
 
				+;;===== branch to loop00_15? is taken here
			
 
				+	NOP
			
 
				+;;===== branch to break? is taken here
			
 
				+	LDW	*${Xihi}[2],$T2hi
			
 
				+||	LDW	*${Xilo}[2],$T2lo			; X[i+1]
			
 
				+||	SHRU	$T1hi,19,$S1hi
			
 
				+||	SHL	$T1hi,32-19,$S1lo
			
 
				+	SHRU	$T1lo,19,$t0lo
			
 
				+||	SHL	$T1lo,32-19,$t0hi
			
 
				+	XOR	$t0hi,$S1hi,$S1hi
			
 
				+||	XOR	$t0lo,$S1lo,$S1lo
			
 
				+||	SHRU	$T1hi,61-32,$t0lo
			
 
				+||	SHL	$T1hi,64-61,$t0hi
			
 
				+	XOR	$t0hi,$S1hi,$S1hi
			
 
				+||	XOR	$t0lo,$S1lo,$S1lo
			
 
				+||	SHRU	$T1lo,61-32,$t0hi
			
 
				+||	SHL	$T1lo,64-61,$t0lo
			
 
				+	XOR	$t0hi,$S1hi,$S1hi
			
 
				+||	XOR	$t0lo,$S1lo,$S1lo
			
 
				+||	SHRU	$T1hi,6,$t0hi
			
 
				+||	SHL	$T1hi,32-6,$t0lo
			
 
				+	XOR	$t0hi,$S1hi,$S1hi
			
 
				+||	XOR	$t0lo,$S1lo,$S1lo
			
 
				+||	SHRU	$T1lo,6,$t0lo
			
 
				+||	LDW	*${Xihi}[18],$T1hi
			
 
				+||	LDW	*${Xilo}[18],$T1lo			; X[i+9]
			
 
				+	XOR	$t0lo,$S1lo,$S1lo			; sigma1(Xi[i+14])
			
 
				+
			
 
				+||	LDW	*${Xihi}[0],$CHhi
			
 
				+||	LDW	*${Xilo}[0],$CHlo			; X[i]
			
 
				+||	SHRU	$T2hi,1,$S0hi
			
 
				+||	SHL	$T2hi,32-1,$S0lo
			
 
				+	SHRU	$T2lo,1,$t0lo
			
 
				+||	SHL	$T2lo,32-1,$t0hi
			
 
				+	XOR	$t0hi,$S0hi,$S0hi
			
 
				+||	XOR	$t0lo,$S0lo,$S0lo
			
 
				+||	SHRU	$T2hi,8,$t0hi
			
 
				+||	SHL	$T2hi,32-8,$t0lo
			
 
				+	XOR	$t0hi,$S0hi,$S0hi
			
 
				+||	XOR	$t0lo,$S0lo,$S0lo
			
 
				+||	SHRU	$T2lo,8,$t0lo
			
 
				+||	SHL	$T2lo,32-8,$t0hi
			
 
				+	XOR	$t0hi,$S0hi,$S0hi
			
 
				+||	XOR	$t0lo,$S0lo,$S0lo
			
 
				+||	ADD	$S1hi,$T1hi,$T1hi
			
 
				+||	ADDU	$S1lo,$T1lo,$T1carry:$T1lo		; T1 = X[i+9]+sigma1()
			
 
				+|| [B1]	BNOP	loop16_79?
			
 
				+||	SHRU	$T2hi,7,$t0hi
			
 
				+||	SHL	$T2hi,32-7,$t0lo
			
 
				+	XOR	$t0hi,$S0hi,$S0hi
			
 
				+||	XOR	$t0lo,$S0lo,$S0lo
			
 
				+||	ADD	$CHhi,$T1hi,$T1hi
			
 
				+||	ADDU	$CHlo,$T1carry:$T1lo,$T1carry:$T1lo	; T1 += X[i]
			
 
				+||	SHRU	$T2lo,7,$t0lo
			
 
				+	XOR	$t0lo,$S0lo,$S0lo			; sigma0(Xi[i+1]
			
 
				+
			
 
				+	ADD	$S0hi,$T1hi,$T1hi
			
 
				+||	ADDU	$S0lo,$T1carry:$T1lo,$T1carry:$T1lo	; T1 += sigma0()
			
 
				+|| [B1]	SUB	B1,1,B1
			
 
				+	NOP						; avoid cross-path stall
			
 
				+	ADD	$T1carry,$T1hi,$T1hi
			
 
				+;;===== branch to loop16_79? is taken here
			
 
				+
			
 
				+break?:
			
 
				+	ADD	$Ahi,$Actxhi,$Ahi		; accumulate ctx
			
 
				+||	ADDU	$Alo,$Actxlo,$Actxlo:$Alo
			
 
				+|| [A0]	LDNDW	*$INP++,B11:B10			; pre-fetch input
			
 
				+|| [A0]	ADDK	-640,$K512			; rewind pointer to K512
			
 
				+	ADD	$Bhi,$Bctxhi,$Bhi
			
 
				+||	ADDU	$Blo,$Bctxlo,$Bctxlo:$Blo
			
 
				+|| [A0]	LDDW	*$K512++,$Khi:$Klo		; pre-fetch K512[0]
			
 
				+	ADD	$Chi,$Cctxhi,$Chi
			
 
				+||	ADDU	$Clo,$Cctxlo,$Cctxlo:$Clo
			
 
				+||	ADD	$Actxlo,$Ahi,$Ahi
			
 
				+||[!A0]	MV	$CTXA,$CTXB
			
 
				+	ADD	$Dhi,$Dctxhi,$Dhi
			
 
				+||	ADDU	$Dlo,$Dctxlo,$Dctxlo:$Dlo
			
 
				+||	ADD	$Bctxlo,$Bhi,$Bhi
			
 
				+||[!A0]	STW	$Ahi,*${CTXA}[0^.LITTLE_ENDIAN]	; save ctx
			
 
				+||[!A0]	STW	$Alo,*${CTXB}[1^.LITTLE_ENDIAN]
			
 
				+	ADD	$Ehi,$Ectxhi,$Ehi
			
 
				+||	ADDU	$Elo,$Ectxlo,$Ectxlo:$Elo
			
 
				+||	ADD	$Cctxlo,$Chi,$Chi
			
 
				+|| [A0]	BNOP	outerloop?
			
 
				+||[!A0]	STW	$Bhi,*${CTXA}[2^.LITTLE_ENDIAN]
			
 
				+||[!A0]	STW	$Blo,*${CTXB}[3^.LITTLE_ENDIAN]
			
 
				+	ADD	$Fhi,$Fctxhi,$Fhi
			
 
				+||	ADDU	$Flo,$Fctxlo,$Fctxlo:$Flo
			
 
				+||	ADD	$Dctxlo,$Dhi,$Dhi
			
 
				+||[!A0]	STW	$Chi,*${CTXA}[4^.LITTLE_ENDIAN]
			
 
				+||[!A0]	STW	$Clo,*${CTXB}[5^.LITTLE_ENDIAN]
			
 
				+	ADD	$Ghi,$Gctxhi,$Ghi
			
 
				+||	ADDU	$Glo,$Gctxlo,$Gctxlo:$Glo
			
 
				+||	ADD	$Ectxlo,$Ehi,$Ehi
			
 
				+||[!A0]	STW	$Dhi,*${CTXA}[6^.LITTLE_ENDIAN]
			
 
				+||[!A0]	STW	$Dlo,*${CTXB}[7^.LITTLE_ENDIAN]
			
 
				+	ADD	$Hhi,$Hctxhi,$Hhi
			
 
				+||	ADDU	$Hlo,$Hctxlo,$Hctxlo:$Hlo
			
 
				+||	ADD	$Fctxlo,$Fhi,$Fhi
			
 
				+||[!A0]	STW	$Ehi,*${CTXA}[8^.LITTLE_ENDIAN]
			
 
				+||[!A0]	STW	$Elo,*${CTXB}[9^.LITTLE_ENDIAN]
			
 
				+	ADD	$Gctxlo,$Ghi,$Ghi
			
 
				+||[!A0]	STW	$Fhi,*${CTXA}[10^.LITTLE_ENDIAN]
			
 
				+||[!A0]	STW	$Flo,*${CTXB}[11^.LITTLE_ENDIAN]
			
 
				+	ADD	$Hctxlo,$Hhi,$Hhi
			
 
				+||[!A0]	STW	$Ghi,*${CTXA}[12^.LITTLE_ENDIAN]
			
 
				+||[!A0]	STW	$Glo,*${CTXB}[13^.LITTLE_ENDIAN]
			
 
				+;;===== branch to outerloop? is taken here
			
 
				+
			
 
				+	STW	$Hhi,*${CTXA}[14^.LITTLE_ENDIAN]
			
 
				+||	STW	$Hlo,*${CTXB}[15^.LITTLE_ENDIAN]
			
 
				+||	MVK	-40,B0
			
 
				+	ADD	FP,B0,SP			; destroy circular buffer
			
 
				+||	LDDW	*FP[-4],A11:A10
			
 
				+	LDDW	*SP[2],A13:A12
			
 
				+||	LDDW	*FP[-2],B11:B10
			
 
				+	LDDW	*SP[4],B13:B12
			
 
				+||	BNOP	RA
			
 
				+	LDW	*++SP(40),FP			; restore frame pointer
			
 
				+	MVK	0,B0
			
 
				+	MVC	B0,AMR				; clear AMR
			
 
				+	NOP	2				; wait till FP is committed
			
 
				+	.endasmfunc
			
 
				+
			
 
				+	.sect	".const:sha_asm"
			
 
				+	.align	128
			
 
				+K512:
			
 
				+	.uword	0x428a2f98,0xd728ae22, 0x71374491,0x23ef65cd
			
 
				+	.uword	0xb5c0fbcf,0xec4d3b2f, 0xe9b5dba5,0x8189dbbc
			
 
				+	.uword	0x3956c25b,0xf348b538, 0x59f111f1,0xb605d019
			
 
				+	.uword	0x923f82a4,0xaf194f9b, 0xab1c5ed5,0xda6d8118
			
 
				+	.uword	0xd807aa98,0xa3030242, 0x12835b01,0x45706fbe
			
 
				+	.uword	0x243185be,0x4ee4b28c, 0x550c7dc3,0xd5ffb4e2
			
 
				+	.uword	0x72be5d74,0xf27b896f, 0x80deb1fe,0x3b1696b1
			
 
				+	.uword	0x9bdc06a7,0x25c71235, 0xc19bf174,0xcf692694
			
 
				+	.uword	0xe49b69c1,0x9ef14ad2, 0xefbe4786,0x384f25e3
			
 
				+	.uword	0x0fc19dc6,0x8b8cd5b5, 0x240ca1cc,0x77ac9c65
			
 
				+	.uword	0x2de92c6f,0x592b0275, 0x4a7484aa,0x6ea6e483
			
 
				+	.uword	0x5cb0a9dc,0xbd41fbd4, 0x76f988da,0x831153b5
			
 
				+	.uword	0x983e5152,0xee66dfab, 0xa831c66d,0x2db43210
			
 
				+	.uword	0xb00327c8,0x98fb213f, 0xbf597fc7,0xbeef0ee4
			
 
				+	.uword	0xc6e00bf3,0x3da88fc2, 0xd5a79147,0x930aa725
			
 
				+	.uword	0x06ca6351,0xe003826f, 0x14292967,0x0a0e6e70
			
 
				+	.uword	0x27b70a85,0x46d22ffc, 0x2e1b2138,0x5c26c926
			
 
				+	.uword	0x4d2c6dfc,0x5ac42aed, 0x53380d13,0x9d95b3df
			
 
				+	.uword	0x650a7354,0x8baf63de, 0x766a0abb,0x3c77b2a8
			
 
				+	.uword	0x81c2c92e,0x47edaee6, 0x92722c85,0x1482353b
			
 
				+	.uword	0xa2bfe8a1,0x4cf10364, 0xa81a664b,0xbc423001
			
 
				+	.uword	0xc24b8b70,0xd0f89791, 0xc76c51a3,0x0654be30
			
 
				+	.uword	0xd192e819,0xd6ef5218, 0xd6990624,0x5565a910
			
 
				+	.uword	0xf40e3585,0x5771202a, 0x106aa070,0x32bbd1b8
			
 
				+	.uword	0x19a4c116,0xb8d2d0c8, 0x1e376c08,0x5141ab53
			
 
				+	.uword	0x2748774c,0xdf8eeb99, 0x34b0bcb5,0xe19b48a8
			
 
				+	.uword	0x391c0cb3,0xc5c95a63, 0x4ed8aa4a,0xe3418acb
			
 
				+	.uword	0x5b9cca4f,0x7763e373, 0x682e6ff3,0xd6b2b8a3
			
 
				+	.uword	0x748f82ee,0x5defb2fc, 0x78a5636f,0x43172f60
			
 
				+	.uword	0x84c87814,0xa1f0ab72, 0x8cc70208,0x1a6439ec
			
 
				+	.uword	0x90befffa,0x23631e28, 0xa4506ceb,0xde82bde9
			
 
				+	.uword	0xbef9a3f7,0xb2c67915, 0xc67178f2,0xe372532b
			
 
				+	.uword	0xca273ece,0xea26619c, 0xd186b8c7,0x21c0c207
			
 
				+	.uword	0xeada7dd6,0xcde0eb1e, 0xf57d4f7f,0xee6ed178
			
 
				+	.uword	0x06f067aa,0x72176fba, 0x0a637dc5,0xa2c898a6
			
 
				+	.uword	0x113f9804,0xbef90dae, 0x1b710b35,0x131c471b
			
 
				+	.uword	0x28db77f5,0x23047d84, 0x32caab7b,0x40c72493
			
 
				+	.uword	0x3c9ebe0a,0x15c9bebc, 0x431d67c4,0x9c100d4c
			
 
				+	.uword	0x4cc5d4be,0xcb3e42b6, 0x597f299c,0xfc657e2a
			
 
				+	.uword	0x5fcb6fab,0x3ad6faec, 0x6c44198c,0x4a475817
			
 
				+	.cstring "SHA512 block transform for C64x+, CRYPTOGAMS by <appro\@openssl.org>"
			
 
				+	.align	4
			
 
				+___
			
 
				+
			
 
				+print $code;
			
 
				+close STDOUT;
			
--- a/crypto/uid.c
+++ b/crypto/uid.c
@@ -65,7 +65,7 @@ int OPENSSL_issetugid(void)
 
				 	return issetugid();
			
 
				 	}
			
 
				 
			
 
				-#elif defined(OPENSSL_SYS_WIN32) || defined(OPENSSL_SYS_VXWORKS) || defined(OPENSSL_SYS_NETWARE)
			
 
				+#elif defined(OPENSSL_SYS_WIN32) || defined(OPENSSL_SYS_VXWORKS) || defined(OPENSSL_SYS_NETWARE) || defined(_TMS320C6X)
			
 
				 
			
 
				 int OPENSSL_issetugid(void)
			
 
				 	{
			
--- a/e_os.h
+++ b/e_os.h
@@ -668,7 +668,7 @@ extern char *sys_errlist[]; extern int sys_nerr;
 
				 #if defined(OPENSSL_SYS_WINDOWS)
			
 
				 #  define strcasecmp _stricmp
			
 
				 #  define strncasecmp _strnicmp
			
 
				-#elif defined(OPENSSL_SYS_VMS)
			
 
				+#elif defined(OPENSSL_SYS_VMS) || defined(OPENSSL_SYS_DSPBIOS)
			
 
				 /* VMS below version 7.0 doesn't have strcasecmp() */
			
 
				 #  include "o_str.h"
			
 
				 #  define strcasecmp OPENSSL_strcasecmp
			
--- a/fips/aes/fips_aesavs.c
+++ b/fips/aes/fips_aesavs.c
@@ -99,7 +99,7 @@ static int AESTest(EVP_CIPHER_CTX *ctx,
 
				     {
			
 
				     const EVP_CIPHER *cipher = NULL;
			
 
				 
			
 
				-    if (strcasecmp(amode, "CBC") == 0)
			
 
				+    if (fips_strcasecmp(amode, "CBC") == 0)
			
 
				 	{
			
 
				 	switch (akeysz)
			
 
				 		{
			
@@ -117,7 +117,7 @@ static int AESTest(EVP_CIPHER_CTX *ctx,
 
				 		}
			
 
				 
			
 
				 	}
			
 
				-    else if (strcasecmp(amode, "ECB") == 0)
			
 
				+    else if (fips_strcasecmp(amode, "ECB") == 0)
			
 
				 	{
			
 
				 	switch (akeysz)
			
 
				 		{
			
@@ -134,7 +134,7 @@ static int AESTest(EVP_CIPHER_CTX *ctx,
 
				 		break;
			
 
				 		}
			
 
				 	}
			
 
				-    else if (strcasecmp(amode, "CFB128") == 0)
			
 
				+    else if (fips_strcasecmp(amode, "CFB128") == 0)
			
 
				 	{
			
 
				 	switch (akeysz)
			
 
				 		{
			
@@ -169,7 +169,7 @@ static int AESTest(EVP_CIPHER_CTX *ctx,
 
				 		break;
			
 
				 		}
			
 
				 	}
			
 
				-    else if(!strcasecmp(amode,"CFB1"))
			
 
				+    else if(!fips_strcasecmp(amode,"CFB1"))
			
 
				 	{
			
 
				 	switch (akeysz)
			
 
				 		{
			
@@ -186,7 +186,7 @@ static int AESTest(EVP_CIPHER_CTX *ctx,
 
				 		break;
			
 
				 		}
			
 
				 	}
			
 
				-    else if(!strcasecmp(amode,"CFB8"))
			
 
				+    else if(!fips_strcasecmp(amode,"CFB8"))
			
 
				 	{
			
 
				 	switch (akeysz)
			
 
				 		{
			
@@ -215,7 +215,7 @@ static int AESTest(EVP_CIPHER_CTX *ctx,
 
				 	}
			
 
				     if (FIPS_cipherinit(ctx, cipher, aKey, iVec, dir) <= 0)
			
 
				 	return 0;
			
 
				-    if(!strcasecmp(amode,"CFB1"))
			
 
				+    if(!fips_strcasecmp(amode,"CFB1"))
			
 
				 	M_EVP_CIPHER_CTX_set_flags(ctx, EVP_CIPH_FLAG_LENGTH_BITS);
			
 
				     if (dir)
			
 
				 		FIPS_cipher(ctx, ciphertext, plaintext, len);
			
@@ -635,10 +635,8 @@ static int proc_file(char *rqfile, char *rspfile)
 
				 		char *xp, *pp = ibuf+2;
			
 
				 		int n;
			
 
				 		if (akeysz)
			
 
				-		    { /* insert current time & date */
			
 
				-		    time_t rtim = time(0);
			
 
				-		    fputs("# ", rfp);
			
 
				-		    copy_line(ctime(&rtim), rfp);
			
 
				+		    {
			
 
				+		    copy_line(ibuf, rfp);
			
 
				 		    }
			
 
				 		else
			
 
				 		    {
			
@@ -876,11 +874,11 @@ int main(int argc, char **argv)
 
				 
			
 
				     if (argc > 1)
			
 
				 	{
			
 
				-	if (strcasecmp(argv[1], "-d") == 0)
			
 
				+	if (fips_strcasecmp(argv[1], "-d") == 0)
			
 
				 	    {
			
 
				 	    d_opt = 1;
			
 
				 	    }
			
 
				-	else if (strcasecmp(argv[1], "-f") == 0)
			
 
				+	else if (fips_strcasecmp(argv[1], "-f") == 0)
			
 
				 	    {
			
 
				 	    d_opt = 0;
			
 
				 	    }
			
--- a/fips/aes/fips_gcmtest.c
+++ b/fips/aes/fips_gcmtest.c
@@ -75,10 +75,11 @@ int main(int argc, char **argv)
 
				 
			
 
				 #include "fips_utl.h"
			
 
				 
			
 
				+static char buf[204800];
			
 
				+static char lbuf[204800];
			
 
				+
			
 
				 static void gcmtest(FILE *in, FILE *out, int encrypt)
			
 
				 	{
			
 
				-	char buf[2048];
			
 
				-	char lbuf[2048];
			
 
				 	char *keyword, *value;
			
 
				 	int keylen = -1, ivlen = -1, aadlen = -1, taglen = -1, ptlen = -1;
			
 
				 	int rv;
			
@@ -266,8 +267,6 @@ static void gcmtest(FILE *in, FILE *out, int encrypt)
 
				 
			
 
				 static void xtstest(FILE *in, FILE *out)
			
 
				 	{
			
 
				-	char buf[204800];
			
 
				-	char lbuf[204800];
			
 
				 	char *keyword, *value;
			
 
				 	int inlen = 0;
			
 
				 	int encrypt = 0;
			
@@ -340,8 +339,6 @@ static void xtstest(FILE *in, FILE *out)
 
				 
			
 
				 static void ccmtest(FILE *in, FILE *out)
			
 
				 	{
			
 
				-	char buf[200048];
			
 
				-	char lbuf[200048];
			
 
				 	char *keyword, *value;
			
 
				 	long l;
			
 
				 	unsigned char *Key = NULL, *Nonce = NULL;
			
--- a/fips/des/fips_desmovs.c
+++ b/fips/des/fips_desmovs.c
@@ -356,10 +356,8 @@ static int tproc_file(char *rqfile, char *rspfile)
 
				 		char *xp, *pp = ibuf+2;
			
 
				 		int n;
			
 
				 		if(*amode)
			
 
				-		    { /* insert current time & date */
			
 
				-		    time_t rtim = time(0);
			
 
				-		    fputs("# ", rfp);
			
 
				-		    copy_line(ctime(&rtim), rfp);
			
 
				+		    {
			
 
				+		    copy_line(ibuf, rfp);
			
 
				 		    }
			
 
				 		else
			
 
				 		    {
			
--- a/fips/dh/fips_dhvs.c
+++ b/fips/dh/fips_dhvs.c
@@ -286,7 +286,7 @@ int main(int argc, char **argv)
 
				 	return 0;
			
 
				 	parse_error:
			
 
				 	fprintf(stderr, "Error Parsing request file\n");
			
 
				-	exit(1);
			
 
				+	return 1;
			
 
				 	}
			
 
				 
			
 
				 #endif
			
--- a/fips/dsa/fips_dsatest.c
+++ b/fips/dsa/fips_dsatest.c
@@ -62,8 +62,10 @@
 
				 #include <stdlib.h>
			
 
				 #include <string.h>
			
 
				 #include <ctype.h>
			
 
				+#ifndef NO_SYS_TYPES_H
			
 
				 #include <sys/types.h>
			
 
				 #include <sys/stat.h>
			
 
				+#endif
			
 
				 
			
 
				 #include "e_os.h"
			
 
				 
			
--- a/fips/fips.c
+++ b/fips/fips.c
@@ -81,7 +81,7 @@ static int fips_started = 0;
 
				 static int fips_is_owning_thread(void);
			
 
				 static int fips_set_owning_thread(void);
			
 
				 static int fips_clear_owning_thread(void);
			
 
				-static unsigned char *fips_signature_witness(void);
			
 
				+static const unsigned char *fips_signature_witness(void);
			
 
				 
			
 
				 #define fips_w_lock()	CRYPTO_w_lock(CRYPTO_LOCK_FIPS)
			
 
				 #define fips_w_unlock()	CRYPTO_w_unlock(CRYPTO_LOCK_FIPS)
			
@@ -148,6 +148,9 @@ void fips_set_selftest_fail(void)
 
				 
			
 
				 extern const void         *FIPS_text_start(),  *FIPS_text_end();
			
 
				 extern const unsigned char FIPS_rodata_start[], FIPS_rodata_end[];
			
 
				+#ifdef _TMS320C6X
			
 
				+const
			
 
				+#endif
			
 
				 unsigned char              FIPS_signature [20] = { 0 };
			
 
				 __fips_constseg
			
 
				 static const char          FIPS_hmac_key[]="etaonrishdlcupfm";
			
@@ -413,9 +416,8 @@ int fips_clear_owning_thread(void)
 
				 	return ret;
			
 
				 	}
			
 
				 
			
 
				-unsigned char *fips_signature_witness(void)
			
 
				+const unsigned char *fips_signature_witness(void)
			
 
				 	{
			
 
				-	extern unsigned char FIPS_signature[];
			
 
				 	return FIPS_signature;
			
 
				 	}
			
 
				 
			
--- a/fips/fips_canister.c
+++ b/fips/fips_canister.c
@@ -35,6 +35,7 @@ const void         *FIPS_text_end(void);
 
				 	(defined(__linux)     && ((defined(__PPC__) && !defined(__PPC64__)) || \
			
 
				 				  defined(__arm__) || defined(__arm)))	|| \
			
 
				 	(defined(__APPLE__) /* verified on all MacOS X & iOS flavors */)|| \
			
 
				+	(defined(_TMS320C6X))						|| \
			
 
				 	(defined(_WIN32)      && defined(_MSC_VER))
			
 
				 #  define FIPS_REF_POINT_IS_CROSS_COMPILER_AWARE
			
 
				 # endif
			
@@ -70,6 +71,10 @@ const unsigned int FIPS_text_startX[]=
 
				 #  pragma const_seg("fipsro$a")
			
 
				 #  pragma const_seg()
			
 
				    __declspec(allocate("fipsro$a"))
			
 
				+# elif defined(_TMS320C6X)
			
 
				+#  pragma CODE_SECTION(instruction_pointer,".fips_text:start")
			
 
				+#  pragma CODE_SECTION(FIPS_ref_point,".fips_text:start")
			
 
				+#  pragma DATA_SECTION(FIPS_rodata_start,".fips_const:start")
			
 
				 # endif
			
 
				 const unsigned int FIPS_rodata_start[]=
			
 
				 	{ 0x46495053, 0x5f726f64, 0x6174615f, 0x73746172 };
			
@@ -87,6 +92,10 @@ const unsigned int FIPS_text_endX[]=
 
				 #  pragma const_seg("fipsro$z")
			
 
				 #  pragma const_seg()
			
 
				    __declspec(allocate("fipsro$z"))
			
 
				+# elif defined(_TMS320C6X)
			
 
				+#  pragma CODE_SECTION(instruction_pointer,".fips_text:end")
			
 
				+#  pragma CODE_SECTION(FIPS_ref_point,".fips_text:end")
			
 
				+#  pragma DATA_SECTION(FIPS_rodata_end,".fips_const:end")
			
 
				 # endif
			
 
				 const unsigned int FIPS_rodata_end[]=
			
 
				 	{ 0x46495053, 0x5f726f64, 0x6174615f, 0x656e645b };
			
--- a/fips/fips_premain.c
+++ b/fips/fips_premain.c
@@ -53,6 +53,12 @@
 
				   int lib$initialize();
			
 
				   globaldef int (*lib_init_ref)() = lib$initialize;
			
 
				 # pragma __standard
			
 
				+#elif defined(_TMS320C6X)
			
 
				+# if defined(__TI_EABI__)
			
 
				+  asm("\t.sect \".init_array\"\n\t.align 4\n\t.field FINGERPRINT_premain,32");
			
 
				+# else
			
 
				+  asm("\t.sect \".pinit\"\n\t.align 4\n\t.field _FINGERPRINT_premain,32");
			
 
				+# endif
			
 
				 #elif 0
			
 
				   The rest has to be taken care of through command line:
			
 
				 
			
--- a/fips/fips_premain.c.sha1
+++ b/fips/fips_premain.c.sha1
@@ -1 +1 @@
 
				-HMAC-SHA1(fips_premain.c)= 1eaf66f76187877ff403708a2948d240f92736a0
			
 
				+HMAC-SHA1(fips_premain.c)= 65b20c3cec235cec85af848e1cd2dfdfa101804a
			
--- a/fips/fipsalgtest.pl
+++ b/fips/fipsalgtest.pl
@@ -495,6 +495,7 @@ my $onedir = 0;
 
				 my $filter = "";
			
 
				 my $tvdir;
			
 
				 my $tprefix;
			
 
				+my $sfprefix = "";
			
 
				 my $debug          = 0;
			
 
				 my $quiet          = 0;
			
 
				 my $notest         = 0;
			
@@ -615,6 +616,9 @@ foreach (@ARGV) {
 
				     elsif (/--script-tprefix=(.*)$/) {
			
 
				         $stprefix = $1;
			
 
				     }
			
 
				+    elsif (/--script-fprefix=(.*)$/) {
			
 
				+        $sfprefix = $1;
			
 
				+    }
			
 
				     elsif (/--mkdir=(.*)$/) {
			
 
				         $mkcmd = $1;
			
 
				     }
			
@@ -1017,6 +1021,10 @@ END
 
				             $out =~ s|/req/(\S+)\.req|/$rspdir/$1.rsp|;
			
 
				             my $outdir = $out;
			
 
				             $outdir =~ s|/[^/]*$||;
			
 
				+            if ( !-d $outdir  && ($outfile eq "" || $minimal_script)) {
			
 
				+                print STDERR "DEBUG: Creating directory $outdir\n" if $debug;
			
 
				+                mkdir($outdir) || die "Can't create directory $outdir";
			
 
				+            }
			
 
				 	    if ($outfile ne "") {
			
 
				 	    	if ($win32) {
			
 
				 		    $outdir =~ tr|/|\\|;
			
@@ -1039,12 +1047,9 @@ END
 
				 		    }
			
 
				 		$lastdir = $outdir;
			
 
				 		}
			
 
				-            } elsif ( !-d $outdir ) {
			
 
				-                print STDERR "DEBUG: Creating directory $outdir\n" if $debug;
			
 
				-                mkdir($outdir) || die "Can't create directory $outdir";
			
 
				             }
			
 
				         }
			
 
				-        my $cmd = "$tcmd \"$req\" \"$out\"";
			
 
				+        my $cmd = "$tcmd \"$sfprefix$req\" \"$sfprefix$out\"";
			
 
				         print STDERR "DEBUG: running test $tname\n" if ( $debug && !$verify );
			
 
				 	if ($outfile ne "") {
			
 
				 	    if ($minimal_script) {
			
--- a/fips/fipssyms.h
+++ b/fips/fipssyms.h
@@ -589,6 +589,7 @@
 
				 #define AES_encrypt fips_aes_encrypt
			
 
				 #define AES_set_decrypt_key fips_aes_set_decrypt_key
			
 
				 #define AES_set_encrypt_key fips_aes_set_encrypt_key
			
 
				+#define AES_ctr32_encrypt fips_aes_ctr32_encrypt
			
 
				 #define BN_from_montgomery fips_bn_from_montgomery
			
 
				 #define BN_num_bits_word FIPS_bn_num_bits_word
			
 
				 #define DES_SPtrans fips_des_sptrans
			
--- a/fips/rand/fips_rand.c
+++ b/fips/rand/fips_rand.c
@@ -66,7 +66,7 @@
 
				 #include <openssl/aes.h>
			
 
				 #include <openssl/err.h>
			
 
				 #include <openssl/fips_rand.h>
			
 
				-#if !(defined(OPENSSL_SYS_WIN32) || defined(OPENSSL_SYS_VXWORKS))
			
 
				+#if !(defined(OPENSSL_SYS_WIN32) || defined(OPENSSL_SYS_VXWORKS) || defined(OPENSSL_SYSNAME_DSPBIOS))
			
 
				 # include <sys/time.h>
			
 
				 #endif
			
 
				 #if defined(OPENSSL_SYS_VXWORKS)
			
@@ -232,8 +232,13 @@ void FIPS_get_timevec(unsigned char *buf, unsigned long *pctr)
 
				 	{
			
 
				 #ifdef OPENSSL_SYS_WIN32
			
 
				 	FILETIME ft;
			
 
				+#ifdef _WIN32_WCE
			
 
				+	SYSTEMTIME t;
			
 
				+#endif
			
 
				 #elif defined(OPENSSL_SYS_VXWORKS)
			
 
				         struct timespec ts;
			
 
				+#elif defined(OPENSSL_SYSNAME_DSPBIOS)
			
 
				+	unsigned long long TSC, OPENSSL_rdtsc();
			
 
				 #else
			
 
				 	struct timeval tv;
			
 
				 #endif
			
@@ -243,7 +248,12 @@ void FIPS_get_timevec(unsigned char *buf, unsigned long *pctr)
 
				 #endif
			
 
				 
			
 
				 #ifdef OPENSSL_SYS_WIN32
			
 
				+#ifdef _WIN32_WCE
			
 
				+	GetSystemTime(&t);
			
 
				+	SystemTimeToFileTime(&t, &ft);
			
 
				+#else
			
 
				 	GetSystemTimeAsFileTime(&ft);
			
 
				+#endif
			
 
				 	buf[0] = (unsigned char) (ft.dwHighDateTime & 0xff);
			
 
				 	buf[1] = (unsigned char) ((ft.dwHighDateTime >> 8) & 0xff);
			
 
				 	buf[2] = (unsigned char) ((ft.dwHighDateTime >> 16) & 0xff);
			
@@ -262,6 +272,16 @@ void FIPS_get_timevec(unsigned char *buf, unsigned long *pctr)
 
				 	buf[5] = (unsigned char) ((ts.tv_nsec >> 8) & 0xff);
			
 
				 	buf[6] = (unsigned char) ((ts.tv_nsec >> 16) & 0xff);
			
 
				 	buf[7] = (unsigned char) ((ts.tv_nsec >> 24) & 0xff);
			
 
				+#elif defined(OPENSSL_SYSNAME_DSPBIOS)
			
 
				+	TSC = OPENSSL_rdtsc();
			
 
				+	buf[0] = (unsigned char) (TSC & 0xff);
			
 
				+	buf[1] = (unsigned char) ((TSC >> 8) & 0xff);
			
 
				+	buf[2] = (unsigned char) ((TSC >> 16) & 0xff);
			
 
				+	buf[3] = (unsigned char) ((TSC >> 24) & 0xff);
			
 
				+	buf[4] = (unsigned char) ((TSC >> 32) & 0xff);
			
 
				+	buf[5] = (unsigned char) ((TSC >> 40) & 0xff);
			
 
				+	buf[6] = (unsigned char) ((TSC >> 48) & 0xff);
			
 
				+	buf[7] = (unsigned char) ((TSC >> 56) & 0xff);
			
 
				 #else
			
 
				 	gettimeofday(&tv,NULL);
			
 
				 	buf[0] = (unsigned char) (tv.tv_sec & 0xff);
			
--- a/ms/do_fips.bat
+++ b/ms/do_fips.bat
@@ -1,7 +1,10 @@
 
				-@echo off
			
 
				+rem @echo off
			
 
				 
			
 
				 SET ASM=%1
			
 
				 SET EXARG=
			
 
				+SET MFILE=ntdll.mak
			
 
				+
			
 
				+if NOT X%OSVERSION% == X goto wince
			
 
				 
			
 
				 if NOT X%PROCESSOR_ARCHITECTURE% == X goto defined 
			
 
				 
			
@@ -42,6 +45,14 @@ SET TARGET=VC-WIN64A
 
				 if x%ASM% == xno-asm goto compile
			
 
				 SET ASM=nasm
			
 
				 
			
 
				+goto compile
			
 
				+
			
 
				+:wince
			
 
				+
			
 
				+echo Auto Configuring for WinCE
			
 
				+SET TARGET=VC-CE
			
 
				+SET MFILE=cedll.mak
			
 
				+
			
 
				 :compile
			
 
				 
			
 
				 if x%ASM% == xno-asm SET EXARG=no-asm
			
@@ -52,13 +63,13 @@ echo on
 
				 
			
 
				 perl util\mkfiles.pl >MINFO
			
 
				 @if ERRORLEVEL 1 goto error
			
 
				-perl util\mk1mf.pl dll %ASM% %TARGET% >ms\ntdll.mak
			
 
				+perl util\mk1mf.pl dll %ASM% %TARGET% >ms\%MFILE%
			
 
				 @if ERRORLEVEL 1 goto error
			
 
				 
			
 
				-nmake -f ms\ntdll.mak clean
			
 
				-nmake -f ms\ntdll.mak
			
 
				+nmake -f ms\%MFILE% clean
			
 
				+nmake -f ms\%MFILE%
			
 
				 @if ERRORLEVEL 1 goto error
			
 
				-nmake -f ms\ntdll.mak install
			
 
				+nmake -f ms\%MFILE% install
			
 
				 @if ERRORLEVEL 1 goto error
			
 
				 
			
 
				 @echo.
			
--- a/test/fips_algvs.c
+++ b/test/fips_algvs.c
@@ -89,6 +89,7 @@ extern int fips_rsavtest_main(int argc, char **argv);
 
				 extern int fips_shatest_main(int argc, char **argv);
			
 
				 extern int fips_test_suite_main(int argc, char **argv);
			
 
				 
			
 
				+#if !defined(_TMS320C6400_PLUS)
			
 
				 #include "fips_aesavs.c"
			
 
				 #include "fips_cmactest.c"
			
 
				 #include "fips_desmovs.c"
			
@@ -106,6 +107,28 @@ extern int fips_test_suite_main(int argc, char **argv);
 
				 #include "fips_shatest.c"
			
 
				 #include "fips_test_suite.c"
			
 
				 
			
 
				+#else
			
 
				+#include "aes/fips_aesavs.c"
			
 
				+#include "cmac/fips_cmactest.c"
			
 
				+#include "des/fips_desmovs.c"
			
 
				+#include "dh/fips_dhvs.c"
			
 
				+#include "rand/fips_drbgvs.c"
			
 
				+#include "dsa/fips_dssvs.c"
			
 
				+#include "ecdh/fips_ecdhvs.c"
			
 
				+#include "ecdsa/fips_ecdsavs.c"
			
 
				+#include "aes/fips_gcmtest.c"
			
 
				+#include "hmac/fips_hmactest.c"
			
 
				+#include "rand/fips_rngvs.c"
			
 
				+#include "rsa/fips_rsagtest.c"
			
 
				+#include "rsa/fips_rsastest.c"
			
 
				+#include "rsa/fips_rsavtest.c"
			
 
				+#include "sha/fips_shatest.c"
			
 
				+#include "fips_test_suite.c"
			
 
				+
			
 
				+#pragma DATA_SECTION(aucCmBootDspLoad, "BootDspSection");
			
 
				+volatile unsigned char aucCmBootDspLoad[8*1024];
			
 
				+#endif
			
 
				+
			
 
				 typedef struct
			
 
				 	{
			
 
				 	const char *name;
			
@@ -221,7 +244,7 @@ static int run_prg(int argc, char **argv)
 
				 
			
 
				 int main(int argc, char **argv)
			
 
				 	{
			
 
				-	char buf[1024];
			
 
				+	static char buf[1024];
			
 
				 	char **args = argv + 1;
			
 
				 	const char *sname = "fipstests.sh";
			
 
				 	ARGS arg;
			
@@ -238,6 +261,10 @@ int main(int argc, char **argv)
 
				 	CRYPTO_mem_ctrl(CRYPTO_MEM_CHECK_ON);
			
 
				 #endif
			
 
				 
			
 
				+#if defined(_TMS320C6400_PLUS)
			
 
				+	SysInit();
			
 
				+#endif
			
 
				+
			
 
				 	if (*args && *args[0] != '-')
			
 
				 		{
			
 
				 		rv = run_prg(argc - 1, args);
			
--- a/util/fips_standalone_sha1
+++ b/util/fips_standalone_sha1
@@ -0,0 +1,32 @@
 
				+#!/usr/bin/env perl
			
 
				+#
			
 
				+$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
			
 
				+
			
 
				+unshift(@INC,$dir);
			
 
				+require "hmac_sha1.pl";
			
 
				+
			
 
				+(!@ARV[0] && -f @ARGV[$#ARGV]) || die "usage: $0 [-verify] file";
			
 
				+
			
 
				+$verify=shift	if (@ARGV[0] eq "-verify");
			
 
				+
			
 
				+sysopen(FD,@ARGV[0],0) || die "$!";
			
 
				+binmode(FD);
			
 
				+
			
 
				+my $ctx = HMAC->Init("etaonrishdlcupfm");
			
 
				+
			
 
				+while (read(FD,$blob,4*1024)) { $ctx->Update($blob); }
			
 
				+
			
 
				+close(FD);
			
 
				+
			
 
				+my $signature = unpack("H*",$ctx->Final());
			
 
				+
			
 
				+print "HMAC-SHA1(@ARGV[0])= $signature\n";
			
 
				+
			
 
				+if ($verify) {
			
 
				+	open(FD,"<@ARGV[0].sha1") || die "$!";
			
 
				+	$line = <FD>;
			
 
				+	close(FD);
			
 
				+	exit(0)	if ($line =~ /HMAC\-SHA1\([^\)]*\)=\s*([0-9a-f]+)/i &&
			
 
				+				$1 eq $signature);
			
 
				+	die "signature mismatch";
			
 
				+}
			
--- a/util/fipsas.pl
+++ b/util/fipsas.pl
@@ -8,9 +8,6 @@ my @ARGS = @ARGV;
 
				 
			
 
				 my $top = shift @ARGS;
			
 
				 my $target = shift @ARGS;
			
 
				-my $tmptarg = $target;
			
 
				-
			
 
				-$tmptarg =~ s/\.[^\\\/\.]+$/.tmp/;
			
 
				 
			
 
				 my $runasm = 1;
			
 
				 
			
@@ -40,43 +37,31 @@ while (<IN>)
 
				 	last if (/assembler/)
			
 
				 	}
			
 
				 
			
 
				-# Store all renames.
			
 
				+# Store all renames [noting minimal length].
			
 
				+my $minlen=0x10000;
			
 
				 while (<IN>)
			
 
				 	{
			
 
				-	if (/^#define\s+(\w+)\s+(\w+)\b/)
			
 
				+	if (/^#define\s+_?(\w+)\s+_?(\w+)\b/)
			
 
				 		{
			
 
				 		$edits{$1} = $2;
			
 
				+		my $len = length($1);
			
 
				+		$minlen = $len if ($len<$minlen);
			
 
				 		}
			
 
				 	}
			
 
				 
			
 
				-my ($from, $to);
			
 
				-
			
 
				-#delete any temp file lying around
			
 
				+open(IN,"$target") || die "Can't open $target for reading";
			
 
				 
			
 
				-unlink $tmptarg;
			
 
				+@code = <IN>;	# suck in whole file
			
 
				 
			
 
				-#rename target temporarily
			
 
				-my $rencnt = 0;
			
 
				-# On windows the previous file doesn't always close straight away
			
 
				-# so retry the rename operation a few times if it fails.
			
 
				-while (!rename($target, $tmptarg))
			
 
				-        {
			
 
				-        sleep 2;
			
 
				-        die "Can't rename $target" if ($rencnt++ > 10);
			
 
				-        }
			
 
				+close IN;
			
 
				 
			
 
				-#edit target
			
 
				-open(IN,$tmptarg) || die "Can't open temporary file";
			
 
				-open(OUT, ">$target") || die "Can't open output file $target";
			
 
				+open(OUT,">$target") || die "Can't open $target for writing";
			
 
				 
			
 
				-while (<IN>)
			
 
				-{
			
 
				-	while (($from, $to) = each %edits)
			
 
				-		{
			
 
				-		s/(\b_*)$from(\b)/$1$to$2/g;
			
 
				-		}
			
 
				-	print OUT $_;
			
 
				-}
			
 
				+foreach $line (@code)
			
 
				+	{
			
 
				+	$line =~ s/\b(_?)(\w{$minlen,})\b/$1.($edits{$2} or $2)/geo;
			
 
				+	print OUT $line;
			
 
				+	}
			
 
				 
			
 
				 close OUT;
			
 
				 
			
@@ -87,14 +72,5 @@ if ($runasm)
 
				 
			
 
				 	my $rv = $?;
			
 
				 
			
 
				-	# restore target
			
 
				-	unlink $target;
			
 
				-	rename $tmptarg, $target;
			
 
				-
			
 
				 	die "Error executing assembler!" if $rv != 0;
			
 
				 	}
			
 
				-else
			
 
				-	{
			
 
				-	# Don't care about target
			
 
				-	unlink $tmptarg;
			
 
				-	}
			
--- a/util/fipsdist.pl
+++ b/util/fipsdist.pl
@@ -58,7 +58,7 @@ while (<STDIN>)
 
				 		}
			
 
				 	else
			
 
				 		{
			
 
				-		next unless (/^(fips\/|crypto|util|test|include|ms)/);
			
 
				+		next unless (/^(fips\/|crypto|util|test|include|ms|c6x)/);
			
 
				 		}
			
 
				 	if (/^crypto\/([^\/]+)/)
			
 
				 		{
			
--- a/util/fipslink.pl
+++ b/util/fipslink.pl
@@ -27,6 +27,19 @@ if (exists $ENV{"PREMAIN_DSO_EXE"})
 
				 	$fips_premain_dso = "";
			
 
				 	}
			
 
				 
			
 
				+my $fips_sig = $ENV{"FIPS_SIG"};
			
 
				+if (defined $fips_sig)
			
 
				+	{
			
 
				+	if ($fips_premain_dso ne "")
			
 
				+		{
			
 
				+		$fips_premain_dso = "$fips_sig -dso";
			
 
				+		}
			
 
				+	else
			
 
				+		{
			
 
				+		$fips_premain_dso = "$fips_sig -exe";
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				 check_hash($sha1_exe, "fips_premain.c");
			
 
				 check_hash($sha1_exe, "fipscanister.lib");
			
 
				 
			
--- a/util/hmac_sha1.pl
+++ b/util/hmac_sha1.pl
@@ -0,0 +1,196 @@
 
				+#!/usr/bin/env perl
			
 
				+#
			
 
				+# Copyright (c) 2011 The OpenSSL Project.
			
 
				+#
			
 
				+######################################################################
			
 
				+#
			
 
				+# SHA1 and HMAC in Perl by <appro@openssl.org>.
			
 
				+#
			
 
				+{ package SHA1;
			
 
				+  use integer;
			
 
				+
			
 
				+    {
			
 
				+    ################################### SHA1 block code generator
			
 
				+    my @V = ('$A','$B','$C','$D','$E');
			
 
				+    my $i;
			
 
				+
			
 
				+    sub XUpdate {
			
 
				+      my $ret;
			
 
				+	$ret="(\$T=\$W[($i-16)%16]^\$W[($i-14)%16]^\$W[($i-8)%16]^\$W[($i-3)%16],\n\t";
			
 
				+	if ((1<<31)<<1) {
			
 
				+	    $ret.="    \$W[$i%16]=((\$T<<1)|(\$T>>31))&0xffffffff)\n\t  ";
			
 
				+	} else {
			
 
				+	    $ret.="    \$W[$i%16]=(\$T<<1)|((\$T>>31)&1))\n\t  ";
			
 
				+	}
			
 
				+    }
			
 
				+    sub tail {
			
 
				+      my ($a,$b,$c,$d,$e)=@V;
			
 
				+      my $ret;
			
 
				+	if ((1<<31)<<1) {
			
 
				+	    $ret.="(($a<<5)|($a>>27));\n\t";
			
 
				+	    $ret.="$b=($b<<30)|($b>>2);	$e&=0xffffffff;	#$b&=0xffffffff;\n\t";
			
 
				+	} else {
			
 
				+	    $ret.="(($a<<5)|($a>>27)&0x1f);\n\t";
			
 
				+	    $ret.="$b=($b<<30)|($b>>2)&0x3fffffff;\n\t";
			
 
				+	}
			
 
				+      $ret;
			
 
				+    }
			
 
				+    sub BODY_00_15 {
			
 
				+	my ($a,$b,$c,$d,$e)=@V;
			
 
				+	"$e+=\$W[$i]+0x5a827999+((($c^$d)&$b)^$d)+".tail();
			
 
				+    }
			
 
				+    sub BODY_16_19 {
			
 
				+	my ($a,$b,$c,$d,$e)=@V;
			
 
				+	"$e+=".XUpdate()."+0x5a827999+((($c^$d)&$b)^$d)+".tail();
			
 
				+    }
			
 
				+    sub BODY_20_39 {
			
 
				+	my ($a,$b,$c,$d,$e)=@V;
			
 
				+	"$e+=".XUpdate()."+0x6ed9eba1+($b^$c^$d)+".tail();
			
 
				+    }
			
 
				+    sub BODY_40_59 {
			
 
				+	my ($a,$b,$c,$d,$e)=@V;
			
 
				+	"$e+=".XUpdate()."+0x8f1bbcdc+(($b&$c)|(($b|$c)&$d))+".tail();
			
 
				+    }
			
 
				+    sub BODY_60_79 {
			
 
				+	my ($a,$b,$c,$d,$e)=@V;
			
 
				+	"$e+=".XUpdate()."+0xca62c1d6+($b^$c^$d)+".tail();
			
 
				+    }
			
 
				+
			
 
				+    my $sha1_impl =
			
 
				+    'sub block {
			
 
				+	my $self = @_[0];
			
 
				+	my @W    = unpack("N16",@_[1]);
			
 
				+	my ($A,$B,$C,$D,$E,$T) = @{$self->{H}};
			
 
				+	';
			
 
				+
			
 
				+	$sha1_impl.='
			
 
				+	$A &= 0xffffffff;
			
 
				+	$B &= 0xffffffff;
			
 
				+	' if ((1<<31)<<1);
			
 
				+
			
 
				+	for($i=0;$i<16;$i++){ $sha1_impl.=BODY_00_15(); unshift(@V,pop(@V)); }
			
 
				+	for(;$i<20;$i++)    { $sha1_impl.=BODY_16_19(); unshift(@V,pop(@V)); }
			
 
				+	for(;$i<40;$i++)    { $sha1_impl.=BODY_20_39(); unshift(@V,pop(@V)); }
			
 
				+	for(;$i<60;$i++)    { $sha1_impl.=BODY_40_59(); unshift(@V,pop(@V)); }
			
 
				+	for(;$i<80;$i++)    { $sha1_impl.=BODY_60_79(); unshift(@V,pop(@V)); }
			
 
				+
			
 
				+	$sha1_impl.='
			
 
				+	$self->{H}[0]+=$A;	$self->{H}[1]+=$B;	$self->{H}[2]+=$C;
			
 
				+	$self->{H}[3]+=$D;	$self->{H}[4]+=$E;	}';
			
 
				+
			
 
				+    #print $sha1_impl,"\n";
			
 
				+    eval($sha1_impl);		# generate code
			
 
				+    }
			
 
				+
			
 
				+    sub Init {
			
 
				+	my $class = shift;	# multiple instances...
			
 
				+	my $self  = {};
			
 
				+
			
 
				+	bless $self,$class;
			
 
				+	$self->{H} = [0x67452301,0xefcdab89,0x98badcfe,0x10325476,0xc3d2e1f0];
			
 
				+	$self->{N} = 0;
			
 
				+	return $self;
			
 
				+    }
			
 
				+
			
 
				+    sub Update {
			
 
				+	my $self = shift;
			
 
				+	my $msg;
			
 
				+
			
 
				+	foreach $msg (@_) {
			
 
				+	    my $len  = length($msg);
			
 
				+	    my $num  = length($self->{buf});
			
 
				+	    my $off  = 0;
			
 
				+
			
 
				+	    $self->{N} += $len;
			
 
				+
			
 
				+	    if (($num+$len)<64)
			
 
				+	    {	$self->{buf} .= $msg; next;	}
			
 
				+	    elsif ($num)
			
 
				+	    {	$self->{buf} .= substr($msg,0,($off=64-$num));
			
 
				+		$self->block($self->{buf});
			
 
				+	    }
			
 
				+
			
 
				+	    while(($off+64) <= $len)
			
 
				+	    {	$self->block(substr($msg,$off,64));
			
 
				+		$off += 64;
			
 
				+	    }
			
 
				+
			
 
				+	    $self->{buf} = substr($msg,$off);
			
 
				+	}
			
 
				+	return $self;
			
 
				+    }
			
 
				+
			
 
				+    sub Final {
			
 
				+	my $self = shift;
			
 
				+	my $num  = length($self->{buf});
			
 
				+
			
 
				+	$self->{buf} .= chr(0x80); $num++;
			
 
				+	if ($num>56)
			
 
				+	{   $self->{buf} .= chr(0)x(64-$num);
			
 
				+	    $self->block($self->{buf});
			
 
				+	    $self->{buf}=undef;
			
 
				+	    $num=0;
			
 
				+	}
			
 
				+	$self->{buf} .= chr(0)x(56-$num);
			
 
				+	$self->{buf} .= pack("N2",($self->{N}>>29)&0x7,$self->{N}<<3);
			
 
				+	$self->block($self->{buf});
			
 
				+
			
 
				+	return pack("N*",@{$self->{H}});
			
 
				+    }
			
 
				+
			
 
				+    sub Selftest {
			
 
				+	my $hash;
			
 
				+
			
 
				+	$hash=SHA1->Init()->Update('abc')->Final();
			
 
				+	die "SHA1 test#1" if (unpack("H*",$hash) ne 'a9993e364706816aba3e25717850c26c9cd0d89d');
			
 
				+
			
 
				+	$hash=SHA1->Init()->Update('abcdbcdecdefdefgefghfghighijhijkijkljklmklmnlmnomnopnopq')->Final();
			
 
				+	die "SHA1 test#2" if (unpack("H*",$hash) ne '84983e441c3bd26ebaae4aa1f95129e5e54670f1');
			
 
				+
			
 
				+	#$hash=SHA1->Init()->Update('a'x1000000)->Final();
			
 
				+	#die "SHA1 test#3" if (unpack("H*",$hash) ne '34aa973cd4c4daa4f61eeb2bdbad27316534016f');
			
 
				+    }
			
 
				+}
			
 
				+
			
 
				+{ package HMAC;
			
 
				+
			
 
				+    sub Init {
			
 
				+	my $class = shift;
			
 
				+	my $key   = shift;
			
 
				+	my $self  = {};
			
 
				+
			
 
				+	bless $self,$class;
			
 
				+
			
 
				+	if (length($key)>64) {
			
 
				+	    $key = SHA1->Init()->Update($key)->Final();
			
 
				+	}
			
 
				+	$key .= chr(0x00)x(64-length($key));
			
 
				+
			
 
				+	my @ikey = map($_^=0x36,unpack("C*",$key));
			
 
				+	($self->{hash} = SHA1->Init())->Update(pack("C*",@ikey));
			
 
				+	 $self->{okey} = pack("C*",map($_^=0x36^0x5c,@ikey));
			
 
				+
			
 
				+	return $self;
			
 
				+    }
			
 
				+
			
 
				+    sub Update {
			
 
				+	my $self = shift;
			
 
				+	$self->{hash}->Update(@_);
			
 
				+	return $self;
			
 
				+    }
			
 
				+
			
 
				+    sub Final {
			
 
				+	my $self  = shift;
			
 
				+	my $ihash = $self->{hash}->Final();
			
 
				+	return SHA1->Init()->Update($self->{okey},$ihash)->Final();
			
 
				+    }
			
 
				+
			
 
				+    sub Selftest {
			
 
				+	my $hmac;
			
 
				+
			
 
				+	$hmac = HMAC->Init('0123456789:;<=>?@ABC')->Update('Sample #2')->Final();
			
 
				+	die "HMAC test" if (unpack("H*",$hmac) ne '0922d3405faa3d194f82a45830737d5cc6c75d24');
			
 
				+    }
			
 
				+}
			
 
				+
			
 
				+1;
			
--- a/util/mk1mf.pl
+++ b/util/mk1mf.pl
@@ -23,6 +23,7 @@ local $fips_canister_path = "";
 
				 my $fips_premain_dso_exe_path = "";
			
 
				 my $fips_premain_c_path = "";
			
 
				 my $fips_sha1_exe_path = "";
			
 
				+my $fips_sha1_exe_build = 1;
			
 
				 
			
 
				 local $fipscanisterbuild = 0;
			
 
				 
			
@@ -248,6 +249,10 @@ elsif (($platform eq "netware-clib") || ($platform eq "netware-libc") ||
 
				 	$BSDSOCK=1 if ($platform eq "netware-libc-bsdsock") || ($platform eq "netware-clib-bsdsock");
			
 
				 	require 'netware.pl';
			
 
				 	}
			
 
				+elsif ($platform eq "c64xplus")
			
 
				+	{
			
 
				+	require "TI_CGTOOLS.pl";
			
 
				+	}
			
 
				 else
			
 
				 	{
			
 
				 	require "unix.pl";
			
@@ -500,8 +505,16 @@ if ($fips)
 
				 	{
			
 
				 	if ($fips_sha1_exe_path eq "")
			
 
				 		{
			
 
				-		$fips_sha1_exe_path =
			
 
				-			"\$(BIN_D)${o}fips_standalone_sha1$exep";
			
 
				+		$fips_sha1_exe_path = $ENV{"FIPS_SHA1_PATH"};
			
 
				+		if (defined $fips_sha1_exe_path)
			
 
				+			{
			
 
				+			$fips_sha1_exe_build = 0;
			
 
				+			}
			
 
				+		else
			
 
				+			{
			
 
				+			$fips_sha1_exe_path =
			
 
				+				"\$(BIN_D)${o}fips_standalone_sha1$exep";
			
 
				+			}
			
 
				 		}
			
 
				 	}
			
 
				 	else
			
@@ -545,7 +558,7 @@ if ($fips)
 
				 
			
 
				 if ($fipscanisteronly)
			
 
				 	{
			
 
				-	$build_targets = "\$(O_FIPSCANISTER) \$(T_EXE)";
			
 
				+	$build_targets = "\$(O_FIPSCANISTER)";
			
 
				 	$libs_dep = "";
			
 
				 	}
			
 
				 
			
@@ -567,9 +580,14 @@ if ($fipscanisteronly)
 
				 	\$(CP) \"fips${o}fips_premain.c.sha1\" \"\$(INSTALLTOP)${o}lib\"
			
 
				 	\$(CP) \"\$(INCO_D)${o}fips.h\" \"\$(INSTALLTOP)${o}include${o}openssl\"
			
 
				 	\$(CP) \"\$(INCO_D)${o}fips_rand.h\" \"\$(INSTALLTOP)${o}include${o}openssl\"
			
 
				-	\$(CP) "\$(BIN_D)${o}fips_standalone_sha1$exep" \"\$(INSTALLTOP)${o}bin\"
			
 
				 	\$(CP) \"util${o}fipslink.pl\" \"\$(INSTALLTOP)${o}bin\"
			
 
				 EOF
			
 
				+	if ($fips_sha1_exe_build)
			
 
				+		{
			
 
				+		$extra_install .= <<"EOF";
			
 
				+	\$(CP) "\$(BIN_D)${o}fips_standalone_sha1$exep" \"\$(INSTALLTOP)${o}bin\"
			
 
				+EOF
			
 
				+		}
			
 
				 	}
			
 
				 elsif ($shlib)
			
 
				 	{
			
@@ -716,7 +734,7 @@ LIBS_DEP=$libs_dep
 
				 EOF
			
 
				 
			
 
				 $rules=<<"EOF";
			
 
				-all: banner \$(TMP_D) \$(BIN_D) \$(TEST_D) \$(LIB_D) \$(INCO_D) headers \$(FIPS_SHA1_EXE) $build_targets
			
 
				+all: banner \$(TMP_D) \$(BIN_D) \$(TEST_D) \$(LIB_D) \$(INCO_D) headers $build_targets
			
 
				 
			
 
				 banner:
			
 
				 $banner
			
@@ -744,7 +762,11 @@ headers: \$(HEADER) \$(EXHEADER)
 
				 
			
 
				 lib: \$(LIBS_DEP) \$(E_SHLIB)
			
 
				 
			
 
				-exe: \$(T_EXE) \$(BIN_D)$o\$(E_EXE)$exep
			
 
				+exe: \$(BIN_D)$o\$(E_EXE)$exep
			
 
				+
			
 
				+build_tests: \$(T_EXE)
			
 
				+
			
 
				+build_algvs: \$(T_SRC) \$(BIN_D)${o}fips_algvs$exep
			
 
				 
			
 
				 install: all
			
 
				 	\$(MKDIR) \"\$(INSTALLTOP)\"
			
@@ -846,6 +868,9 @@ if ($fips)
 
				 	$rules.=&cc_compile_target("\$(OBJ_D)${o}\$(E_PREMAIN_DSO)$obj",
			
 
				 		"fips${o}fips_premain.c",
			
 
				 		"-DFINGERPRINT_PREMAIN_DSO_LOAD \$(SHLIB_CFLAGS)");
			
 
				+	$rules.=&cc_compile_target("\$(OBJ_D)${o}fips_algvs$obj",
			
 
				+		"test${o}fips_algvs.c",
			
 
				+		"\$(SHLIB_CFLAGS)");
			
 
				 	}
			
 
				 
			
 
				 foreach (values %lib_nam)
			
@@ -878,6 +903,7 @@ EOF
 
				 }
			
 
				 
			
 
				 $defs.=&do_defs("T_EXE",$test,"\$(TEST_D)",$exep);
			
 
				+$defs.=&do_defs("T_SRC",$test,"\$(TMP_D)",".c");
			
 
				 foreach (split(/\s+/,$test))
			
 
				 	{
			
 
				 	my $t_libs;
			
@@ -899,8 +925,11 @@ foreach (split(/\s+/,$test))
 
				 
			
 
				 	$tt="\$(OBJ_D)${o}$t${obj}";
			
 
				 	$rules.=&do_link_rule("\$(TEST_D)$o$t$exep",$tt,"\$(LIBS_DEP)","$t_libs \$(EX_LIBS)", $ltype);
			
 
				+	$rules.=&do_copy_rule("\$(TMP_D)",$_,".c");
			
 
				 	}
			
 
				 
			
 
				+	$rules.=&do_link_rule("\$(TEST_D)${o}fips_algvs$exep","\$(OBJ_D)${o}fips_algvs$obj","\$(LIBS_DEP)","\$(O_FIPSCANISTER) \$(EX_LIBS)", 2) if $fips;
			
 
				+
			
 
				 $defs.=&do_defs("E_SHLIB",$engines . $otherlibs,"\$(ENG_D)",$shlibp);
			
 
				 
			
 
				 foreach (split(/\s+/,$engines))
			
@@ -955,20 +984,20 @@ if ($fips)
 
				 					"\$(OBJ_D)${o}fips_start$obj",
			
 
				 					"\$(FIPSOBJ)",
			
 
				 					"\$(OBJ_D)${o}fips_end$obj",
			
 
				-					"\$(FIPS_SHA1_EXE)", "");
			
 
				+					"");
			
 
				 		# FIXME
			
 
				 		$rules.=&do_link_rule("\$(FIPS_SHA1_EXE)",
			
 
				 					"\$(OBJ_D)${o}fips_standalone_sha1$obj \$(OBJ_D)${o}sha1dgst$obj $sha1_asm_obj",
			
 
				-					"","\$(EX_LIBS)", 1);
			
 
				+					"","\$(EX_LIBS)", 1) if $fips_sha1_exe_build;
			
 
				 		}
			
 
				 	else
			
 
				 		{
			
 
				 		$rules.=&do_link_rule("\$(FIPS_SHA1_EXE)",
			
 
				 					"\$(OBJ_D)${o}fips_standalone_sha1$obj \$(O_FIPSCANISTER)",
			
 
				-					"","", 1);
			
 
				+					"","", 1) if $fips_sha1_exe_build;
			
 
				 
			
 
				 		}
			
 
				-	$rules.=&do_link_rule("\$(PREMAIN_DSO_EXE)","\$(OBJ_D)${o}\$(E_PREMAIN_DSO)$obj \$(CRYPTOOBJ) \$(O_FIPSCANISTER)","","\$(EX_LIBS)", 1);
			
 
				+	$rules.=&do_link_rule("\$(PREMAIN_DSO_EXE)","\$(OBJ_D)${o}\$(E_PREMAIN_DSO)$obj \$(CRYPTOOBJ) \$(O_FIPSCANISTER)","","\$(EX_LIBS)", 1) unless defined $ENV{"FIPS_SIG"};
			
 
				 	
			
 
				 	}
			
 
				 
			
@@ -1192,6 +1221,10 @@ sub do_compile_rule
 
				 			{
			
 
				 			$ret.=&Sasm_compile_target("$to${o}$n$obj",$s,$n);
			
 
				 			}
			
 
				+		elsif (-f ($s="${d}${o}asm${o}${n}.asm"))
			
 
				+			{
			
 
				+			$ret.=&cc_compile_target("$to${o}$n$obj","$s",$ex);
			
 
				+			}
			
 
				 		else	{ die "no rule for $_"; }
			
 
				 		}
			
 
				 	return($ret);
			
--- a/util/msincore
+++ b/util/msincore
@@ -0,0 +1,169 @@
 
				+#!/usr/bin/env perl
			
 
				+#
			
 
				+# Copyright (c) 2012 The OpenSSL Project.
			
 
				+#
			
 
				+# The script embeds fingerprint into Microsoft PE-COFF executable object.
			
 
				+
			
 
				+$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
			
 
				+
			
 
				+unshift(@INC,$dir);
			
 
				+require "hmac_sha1.pl";
			
 
				+
			
 
				+######################################################################
			
 
				+#
			
 
				+# PE-COFF segment table parser by <appro@openssl.org>.
			
 
				+#
			
 
				+{ package PECOFF;
			
 
				+  use FileHandle;
			
 
				+
			
 
				+    sub dup  { my %copy=map {$_} @_; return \%copy; }
			
 
				+
			
 
				+    sub Load {
			
 
				+	my $class = shift;
			
 
				+	my $self  = {};
			
 
				+	my $FD    = FileHandle->new();	# autoclose
			
 
				+	my $file  = shift;
			
 
				+
			
 
				+	bless $self,$class;
			
 
				+
			
 
				+	sysopen($FD,$file,0)		or die "$!";
			
 
				+	binmode($FD);
			
 
				+
			
 
				+	#################################################
			
 
				+	# read IMAGE_DOS_HEADER
			
 
				+	#
			
 
				+	read($FD,my $mz,64) or die "$!";
			
 
				+	my @dos_header=unpack("a2C58V",$mz);
			
 
				+
			
 
				+	$!=42;		# signal fipsld to revert to two-step link
			
 
				+	die "$file is not PE-COFF image" if (@dos_header[0] ne "MZ");
			
 
				+
			
 
				+	my $e_lfanew=pop(@dos_header);
			
 
				+	seek($FD,$e_lfanew,0)		or die "$!";
			
 
				+	read($FD,my $magic,4)		or die "$!";
			
 
				+
			
 
				+	$!=42;		# signal fipsld to revert to two-step link
			
 
				+	die "$file is not PE-COFF image" if (unpack("V",$magic)!=0x4550);
			
 
				+
			
 
				+	#################################################
			
 
				+	# read and parse COFF header...
			
 
				+	#
			
 
				+	read($FD,my $coff,20) or die "$!";
			
 
				+
			
 
				+	my %coff_header;
			
 
				+	@coff_header{machine,nsects,date,syms_off,nsyms,opt,flags}=
			
 
				+		unpack("v2V3v2",$coff);
			
 
				+
			
 
				+	my $strings;
			
 
				+	my $symsize;
			
 
				+
			
 
				+	#################################################
			
 
				+	# load strings table
			
 
				+	#
			
 
				+	if ($coff_header{syms_off}) {
			
 
				+	    seek($FD,$coff_header{syms_off}+18*$coff_header{nsyms},0) or die "$!";
			
 
				+	    read($FD,$strings,4) or die "$!";
			
 
				+	    $symsize = unpack("V",$strings);
			
 
				+	    read($FD,$strings,$symsize,4) or die "$!";
			
 
				+	}
			
 
				+
			
 
				+	#################################################
			
 
				+	# read sections
			
 
				+	#
			
 
				+	my $i;
			
 
				+
			
 
				+	# seek to section headers
			
 
				+	seek($FD,$e_lfanew+24+@coff_header{opt},0) or die "$!";
			
 
				+
			
 
				+	for ($i=0;$i<$coff_header{nsects};$i++) {
			
 
				+	    my %coff_shdr;
			
 
				+	    my $name;
			
 
				+
			
 
				+	    read($FD,my $section,40) or die "$!";
			
 
				+
			
 
				+	    @coff_shdr{sh_name,sh_vsize,sh_vaddr,
			
 
				+	    		sh_rawsize,sh_offset,sh_relocs,sh_lines,
			
 
				+			sh_nrelocls,sh_nlines,sh_flags} =
			
 
				+		unpack("a8V6v2V",$section);
			
 
				+
			
 
				+	    $name = $coff_shdr{sh_name};
			
 
				+	    # see if sh_name is an offset in $strings
			
 
				+	    my ($hi,$lo) = unpack("V2",$name);
			
 
				+	    if ($hi==0 && $lo<$symsize) {
			
 
				+		$name = substr($strings,$lo,64);
			
 
				+	    }
			
 
				+	    $name = (split(chr(0),$name))[0];
			
 
				+	    $coff_shdr{sh_name} = $name;
			
 
				+
			
 
				+	    $self->{sections}{$name} = dup(%coff_shdr);
			
 
				+	}
			
 
				+
			
 
				+	return $self;
			
 
				+    }
			
 
				+
			
 
				+    sub Lookup {
			
 
				+	my $self = shift;
			
 
				+	my $name = shift;
			
 
				+	return $self->{sections}{$name};
			
 
				+    }
			
 
				+}
			
 
				+
			
 
				+######################################################################
			
 
				+#
			
 
				+# main()
			
 
				+#
			
 
				+my $legacy_mode;
			
 
				+
			
 
				+if ($#ARGV<0 || ($#ARGV>0 && !($legacy_mode=(@ARGV[0] =~ /^\-(dso|exe)$/)))) {
			
 
				+	print STDERR "usage: $0 [-dso|-exe] pe-coff-binary\n";
			
 
				+	exit(1);
			
 
				+}
			
 
				+
			
 
				+$exe = PECOFF->Load(@ARGV[$#ARGV]);
			
 
				+
			
 
				+sysopen(FD,@ARGV[$#ARGV],$legacy_mode?0:2) or die "$!";	# 2 is read/write
			
 
				+binmode(FD);
			
 
				+
			
 
				+sub FIPS_incore_fingerprint {
			
 
				+  my $ctx = HMAC->Init("etaonrishdlcupfm");
			
 
				+  my ($beg,$end);
			
 
				+  my $sect;
			
 
				+
			
 
				+    $sect = $exe->Lookup("fipstx")	or die "no fipstx section";
			
 
				+
			
 
				+    seek(FD,$sect->{sh_offset},0)	or die "$!";
			
 
				+    read(FD,$blob,$sect->{sh_vsize})	or die "$!";
			
 
				+
			
 
				+    ($beg = index($blob,"SPIFxet_ts_tXtra")) >= 0
			
 
				+					or die "no FIPS_text_startX";
			
 
				+    ($end = rindex($blob,"SPIFxet_ne_t][Xd")) >= 0
			
 
				+					or die "no FIPS_text_endX";
			
 
				+
			
 
				+    $ctx->Update(substr($blob,$beg,$end-$beg));
			
 
				+
			
 
				+    $sect = $exe->Lookup("fipsro")	or die "no fipsro section";
			
 
				+
			
 
				+    seek(FD,$sect->{sh_offset},0)	or die "$!";
			
 
				+    read(FD,$blob,$sect->{sh_vsize})	or die "$!";
			
 
				+
			
 
				+    ($beg = index($blob,"SPIFdor__atarats",40)) >= 0
			
 
				+					or die "no FIPS_rodata_start";
			
 
				+    ($end = rindex($blob,"SPIFdor__ata[dne")) >= 0
			
 
				+					or die "no FIPS_rodata_end";
			
 
				+
			
 
				+    $ctx->Update(substr($blob,$beg,$end-$beg));
			
 
				+
			
 
				+    return $ctx->Final();
			
 
				+}
			
 
				+
			
 
				+$fingerprint = FIPS_incore_fingerprint();
			
 
				+
			
 
				+if ($legacy_mode) {
			
 
				+    print unpack("H*",$fingerprint);
			
 
				+} else {
			
 
				+    my $sect = $exe->Lookup("fipsro");
			
 
				+    seek(FD,$sect->{sh_offset},0)		or die "$!";
			
 
				+    print FD unpack("H*",$fingerprint)		or die "$!";
			
 
				+}
			
 
				+
			
 
				+close (FD);
			
--- a/util/pl/TI_CGTOOLS.pl
+++ b/util/pl/TI_CGTOOLS.pl
@@ -0,0 +1,274 @@
 
				+#!/usr/local/bin/perl
			
 
				+#
			
 
				+# TI_CGTOOLS.pl, Texas Instruments CGTOOLS under Unix or MSYS.
			
 
				+#
			
 
				+
			
 
				+$ssl=	"ssl";
			
 
				+$crypto="crypto";
			
 
				+
			
 
				+if ($fips && !$shlib)
			
 
				+	{
			
 
				+	$crypto="fips";
			
 
				+	$crypto_compat = "cryptocompat.lib";
			
 
				+	}
			
 
				+else
			
 
				+	{
			
 
				+	$crypto="crypto";
			
 
				+	}
			
 
				+
			
 
				+if ($fipscanisterbuild)
			
 
				+	{
			
 
				+	$fips_canister_path = "\$(LIB_D)/fipscanister.obj";
			
 
				+	}
			
 
				+
			
 
				+$o='/';
			
 
				+$cp='cp';
			
 
				+$cp2='$(PERL) util/copy.pl -stripcr';
			
 
				+$mkdir='$(PERL) util/mkdir-p.pl';
			
 
				+$rm='rm -f';
			
 
				+
			
 
				+$zlib_lib="zlib1.lib";
			
 
				+
			
 
				+# Santize -L options for ms link
			
 
				+$l_flags =~ s/-L("\[^"]+")/\/libpath:$1/g;
			
 
				+$l_flags =~ s/-L(\S+)/\/libpath:$1/g;
			
 
				+
			
 
				+# C compiler stuff
			
 
				+$cc='cl6x';
			
 
				+$base_cflags= " $mf_cflag";
			
 
				+my $f;
			
 
				+$opt_cflags='';
			
 
				+$dbg_cflags=$f.' -g -DDEBUG -D_DEBUG';
			
 
				+$lflags='';
			
 
				+
			
 
				+*::cc_compile_target = sub {
			
 
				+	my ($target,$source,$ex_flags)=@_;
			
 
				+	my $ret;
			
 
				+
			
 
				+	$ex_flags.=" -DMK1MF_BUILD" if ($source =~/cversion/);
			
 
				+	$ret ="$target: \$(SRC_D)$o$source\n\t";
			
 
				+	if ($fipscanisterbuild && $source=~/\.asm$/) {
			
 
				+		$ret.="\$(PERL) util${o}fipsas.pl . \$< norunasm \$(CFLAG)\n\t";
			
 
				+	}
			
 
				+	$ret.="\$(CC) --obj_directory=\$(OBJ_D) $ex_flags -c \$(SRC_D)$o$source\n";
			
 
				+	$target =~ s/.*${o}([^${o}]+)/$1/;
			
 
				+	$source =~ s/.*${o}([^${o}\.]+)\..*/$1${obj}/;
			
 
				+	$ret.="\tmv \$(OBJ_D)${o}$source \$(OBJ_D)${o}$target\n" if ($target ne $source);
			
 
				+	$ret.="\n";
			
 
				+	return($ret);
			
 
				+};
			
 
				+*::perlasm_compile_target = sub {
			
 
				+	my ($target,$source,$bname)=@_;
			
 
				+	my $ret;
			
 
				+
			
 
				+	$bname =~ s/(.*)\.[^\.]$/$1/;
			
 
				+	$ret=<<___;
			
 
				+\$(TMP_D)$o$bname.asm: $source
			
 
				+	\$(PERL) $source \$\@
			
 
				+___
			
 
				+	$ret .= "\t\$(PERL) util${o}fipsas.pl . \$@ norunasm \$(CFLAG)\n" if $fipscanisterbuild;
			
 
				+
			
 
				+	$ret.=<<___;
			
 
				+
			
 
				+$target: \$(TMP_D)$o$bname.asm
			
 
				+	\$(ASM) --obj_directory=\$(OBJ_D) \$(TMP_D)$o$bname.asm
			
 
				+
			
 
				+___
			
 
				+};
			
 
				+
			
 
				+$mlflags='';
			
 
				+
			
 
				+$out_def ="c6x";
			
 
				+$tmp_def ="$out_def/tmp";
			
 
				+$inc_def="$out_def/inc";
			
 
				+
			
 
				+if ($debug)
			
 
				+	{
			
 
				+	$cflags=$dbg_cflags.$base_cflags;
			
 
				+	}
			
 
				+else
			
 
				+	{
			
 
				+	$cflags=$opt_cflags.$base_cflags;
			
 
				+	}
			
 
				+
			
 
				+$obj='.obj';
			
 
				+$asm_suffix='.asm';
			
 
				+$ofile="";
			
 
				+
			
 
				+# EXE linking stuff
			
 
				+$link='$(CC) -z';
			
 
				+$efile="-o ";
			
 
				+$exep='.out';
			
 
				+$ex_libs='';
			
 
				+
			
 
				+# static library stuff
			
 
				+$mklib='ar6x';
			
 
				+$ranlib='';
			
 
				+$plib="";
			
 
				+$libp=".lib";
			
 
				+$shlibp=($shlib)?".dll":".lib";
			
 
				+$lfile='-o ';
			
 
				+
			
 
				+$shlib_ex_obj="";
			
 
				+$asm='$(CC) $(CFLAG) -c';
			
 
				+
			
 
				+$bn_asm_obj='';
			
 
				+$bn_asm_src='';
			
 
				+$des_enc_obj='';
			
 
				+$des_enc_src='';
			
 
				+$bf_enc_obj='';
			
 
				+$bf_enc_src='';
			
 
				+
			
 
				+if (!$no_asm)
			
 
				+	{
			
 
				+	import_asm($mf_bn_asm, "bn", \$bn_asm_obj, \$bn_asm_src);
			
 
				+	import_asm($mf_aes_asm, "aes", \$aes_asm_obj, \$aes_asm_src);
			
 
				+	import_asm($mf_des_asm, "des", \$des_enc_obj, \$des_enc_src);
			
 
				+	import_asm($mf_bf_asm, "bf", \$bf_enc_obj, \$bf_enc_src);
			
 
				+	import_asm($mf_cast_asm, "cast", \$cast_enc_obj, \$cast_enc_src);
			
 
				+	import_asm($mf_rc4_asm, "rc4", \$rc4_enc_obj, \$rc4_enc_src);
			
 
				+	import_asm($mf_rc5_asm, "rc5", \$rc5_enc_obj, \$rc5_enc_src);
			
 
				+	import_asm($mf_md5_asm, "md5", \$md5_asm_obj, \$md5_asm_src);
			
 
				+	import_asm($mf_sha_asm, "sha", \$sha1_asm_obj, \$sha1_asm_src);
			
 
				+	import_asm($mf_rmd_asm, "ripemd", \$rmd160_asm_obj, \$rmd160_asm_src);
			
 
				+	import_asm($mf_wp_asm, "whrlpool", \$whirlpool_asm_obj, \$whirlpool_asm_src);
			
 
				+	import_asm($mf_modes_asm, "modes", \$modes_asm_obj, \$modes_asm_src);
			
 
				+	import_asm($mf_cpuid_asm, "", \$cpuid_asm_obj, \$cpuid_asm_src);
			
 
				+	$perl_asm = 1;
			
 
				+	}
			
 
				+
			
 
				+sub do_lib_rule
			
 
				+	{
			
 
				+	my($objs,$target,$name,$shlib,$ign,$base_addr) = @_;
			
 
				+	local($ret);
			
 
				+
			
 
				+	$taget =~ s/\//$o/g if $o ne '/';
			
 
				+	my $base_arg;
			
 
				+	if ($base_addr ne "")
			
 
				+		{
			
 
				+		$base_arg= " /base:$base_addr";
			
 
				+		}
			
 
				+	else
			
 
				+		{
			
 
				+		$base_arg = "";
			
 
				+		}
			
 
				+	if ($name ne "")
			
 
				+		{
			
 
				+		$name =~ tr/a-z/A-Z/;
			
 
				+		$name = "/def:ms/${name}.def";
			
 
				+		}
			
 
				+
			
 
				+#	$target="\$(LIB_D)$o$target";
			
 
				+#	$ret.="$target: $objs\n";
			
 
				+	if (!$shlib)
			
 
				+		{
			
 
				+#		$ret.="\t\$(RM) \$(O_$Name)\n";
			
 
				+		$ret.="$target: $objs\n";
			
 
				+		$ret.="\t\$(MKLIB) $lfile$target $objs\n";
			
 
				+		}
			
 
				+	else
			
 
				+		{
			
 
				+		local($ex)=($target =~ /O_CRYPTO/)?'':' $(L_CRYPTO)';
			
 
				+		$ex.=" $zlib_lib" if $zlib_opt == 1 && $target =~ /O_CRYPTO/;
			
 
				+
			
 
				+ 		if ($fips && $target =~ /O_CRYPTO/)
			
 
				+			{
			
 
				+			$ret.="$target: $objs \$(PREMAIN_DSO_EXE)";
			
 
				+			$ret.="\n\tFIPS_LINK=\"\$(LINK)\" \\\n";
			
 
				+			$ret.="\tFIPS_CC=\$(CC)\\\n";
			
 
				+			$ret.="\tFIPS_CC_ARGS=/Fo\$(OBJ_D)${o}fips_premain.obj \$(SHLIB_CFLAGS) -c\\\n";
			
 
				+			$ret.="\tPREMAIN_DSO_EXE=\$(PREMAIN_DSO_EXE)\\\n";
			
 
				+			$ret.="\tFIPS_SHA1_EXE=\$(FIPS_SHA1_EXE)\\\n";
			
 
				+			$ret.="\tFIPS_TARGET=$target\\\n";
			
 
				+			$ret.="\tFIPSLIB_D=\$(FIPSLIB_D)\\\n";
			
 
				+			$ret.="\t\$(FIPSLINK) \$(MLFLAGS) /map $base_arg $efile$target ";
			
 
				+			$ret.="$name \$(SHLIB_EX_OBJ) $objs \$(EX_LIBS) ";
			
 
				+			$ret.="\$(OBJ_D)${o}fips_premain.obj $ex\n";
			
 
				+			}
			
 
				+		else
			
 
				+			{
			
 
				+			$ret.="$target: $objs";
			
 
				+			$ret.="\n\t\$(LINK) \$(MLFLAGS) $efile$target $name \$(SHLIB_EX_OBJ) $objs $ex \$(EX_LIBS)\n";
			
 
				+			}
			
 
				+
			
 
				+		$ret.="\tIF EXIST \$@.manifest mt -nologo -manifest \$@.manifest -outputresource:\$@;2\n\n";
			
 
				+		}
			
 
				+	$ret.="\n";
			
 
				+	return($ret);
			
 
				+	}
			
 
				+
			
 
				+sub do_link_rule
			
 
				+	{
			
 
				+	my($target,$files,$dep_libs,$libs,$standalone)=@_;
			
 
				+	local($ret,$_);
			
 
				+	$file =~ s/\//$o/g if $o ne '/';
			
 
				+	$n=&bname($targer);
			
 
				+	$ret.="$target: $files $dep_libs\n";
			
 
				+	if ($standalone == 1)
			
 
				+		{
			
 
				+		$ret.="	\$(LINK) \$(LFLAGS) $efile$target ";
			
 
				+		$ret.= "\$(EX_LIBS) " if ($files =~ /O_FIPSCANISTER/ && !$fipscanisterbuild);
			
 
				+		$ret.="$files $libs\n";
			
 
				+		}
			
 
				+	elsif ($standalone == 2)
			
 
				+		{
			
 
				+		$ret.="\t\$(LINK) \$(LFLAGS) $efile$target $files \$(O_FIPSCANISTER) $out_def/application.cmd\n";
			
 
				+		$ret.="\t$out_def/incore6x $target\n\n";
			
 
				+		}
			
 
				+	else
			
 
				+		{
			
 
				+		$ret.="\t\$(LINK) \$(LFLAGS) $efile$target ";
			
 
				+		$ret.="\t\$(APP_EX_OBJ) $files $libs\n";
			
 
				+		}
			
 
				+	return($ret);
			
 
				+	}
			
 
				+
			
 
				+sub do_rlink_rule
			
 
				+	{
			
 
				+	local($target,$rl_start, $rl_mid, $rl_end,$dep_libs,$libs)=@_;
			
 
				+	local($ret,$_);
			
 
				+	my $files = "$rl_start $rl_mid $rl_end";
			
 
				+
			
 
				+	$file =~ s/\//$o/g if $o ne '/';
			
 
				+	$n=&bname($target);
			
 
				+	$ret.="$target: $files $dep_libs\n";
			
 
				+	$ret.="\t\$(LINK) -r $lfile$target $files $out_def/fipscanister.cmd\n";
			
 
				+	$ret.="\t\$(PERL) $out_def${o}fips_standalone_sha1 $target > ${target}.sha1\n";
			
 
				+	$ret.="\t\$(PERL) util${o}copy.pl -stripcr fips${o}fips_premain.c \$(LIB_D)${o}fips_premain.c\n";
			
 
				+	$ret.="\t\$(CP) fips${o}fips_premain.c.sha1 \$(LIB_D)${o}fips_premain.c.sha1\n";
			
 
				+	$ret.="\n";
			
 
				+	return($ret);
			
 
				+	}
			
 
				+
			
 
				+sub import_asm
			
 
				+	{
			
 
				+	my ($mf_var, $asm_name, $oref, $sref) = @_;
			
 
				+	my $asm_dir;
			
 
				+	if ($asm_name eq "")
			
 
				+		{
			
 
				+		$asm_dir = "crypto$o";
			
 
				+		}
			
 
				+	else
			
 
				+		{
			
 
				+		$asm_dir = "crypto$o$asm_name$oasm$o";
			
 
				+		}
			
 
				+
			
 
				+	$$oref = "";
			
 
				+	$$sref = "";
			
 
				+	$mf_var =~ s/\.o//g;
			
 
				+
			
 
				+	foreach (split(/ /, $mf_var))
			
 
				+		{
			
 
				+		$$sref .= $asm_dir . $_ . ".asm ";
			
 
				+		}
			
 
				+	foreach (split(/ /, $mf_var))
			
 
				+		{
			
 
				+		$$oref .= "\$(TMP_D)\\" . $_ . ".obj ";
			
 
				+		}
			
 
				+	$$oref =~ s/ $//;
			
 
				+	$$sref =~ s/ $//;
			
 
				+
			
 
				+	}
			
 
				+
			
 
				+
			
 
				+1;
			
--- a/util/pl/VC-32.pl
+++ b/util/pl/VC-32.pl
@@ -123,7 +123,7 @@ elsif ($FLAVOR =~ /CE/)
 
				     }
			
 
				 
			
 
				     $cc='$(CC)';
			
 
				-    $base_cflags=' /W3 /WX /GF /Gy /nologo -DUNICODE -D_UNICODE -DOPENSSL_SYSNAME_WINCE -DWIN32_LEAN_AND_MEAN -DL_ENDIAN -DDSO_WIN32 -DNO_CHMOD -DOPENSSL_SMALL_FOOTPRINT';
			
 
				+    $base_cflags=' /W3 /GF /Gy /nologo -DUNICODE -D_UNICODE -DOPENSSL_SYSNAME_WINCE -DWIN32_LEAN_AND_MEAN -DL_ENDIAN -DDSO_WIN32 -DNO_CHMOD -DOPENSSL_SMALL_FOOTPRINT';
			
 
				     $base_cflags.=" $wcecdefs";
			
 
				     $base_cflags.=' -I$(WCECOMPAT)/include'		if (defined($ENV{'WCECOMPAT'}));
			
 
				     $base_cflags.=' -I$(PORTSDK_LIBPATH)/../../include'	if (defined($ENV{'PORTSDK_LIBPATH'}));
			
@@ -174,12 +174,12 @@ $rsc="rc";
 
				 $efile="/out:";
			
 
				 $exep='.exe';
			
 
				 if ($no_sock)		{ $ex_libs=''; }
			
 
				-elsif ($FLAVOR =~ /CE/)	{ $ex_libs='winsock.lib'; }
			
 
				+elsif ($FLAVOR =~ /CE/)	{ $ex_libs='ws2.lib'; }
			
 
				 else			{ $ex_libs='ws2_32.lib'; }
			
 
				 
			
 
				 if ($FLAVOR =~ /CE/)
			
 
				 	{
			
 
				-	$ex_libs.=' $(WCECOMPAT)/lib/wcecompatex.lib'	if (defined($ENV{'WCECOMPAT'}));
			
 
				+	$ex_libs.=' $(WCECOMPAT)/lib/wcecompatex.lib crypt32.lib coredll.lib corelibc.lib'	if (defined($ENV{'WCECOMPAT'}));
			
 
				 	$ex_libs.=' $(PORTSDK_LIBPATH)/portlib.lib'	if (defined($ENV{'PORTSDK_LIBPATH'}));
			
 
				 	$ex_libs.=' /nodefaultlib:oldnames.lib coredll.lib corelibc.lib' if ($ENV{'TARGETCPU'} eq "X86");
			
 
				 	}
			
@@ -389,8 +389,9 @@ sub do_rlink_rule
 
				 
			
 
				 	$file =~ s/\//$o/g if $o ne '/';
			
 
				 	$n=&bname($targer);
			
 
				-	$ret.="$target: $files $dep_libs \$(FIPS_SHA1_EXE)\n";
			
 
				-	$ret.="\t\$(PERL) ms\\segrenam.pl \$\$a $rl_start\n";
			
 
				+	$ret.="$target: $files $dep_libs";
			
 
				+	$ret.=" \$(FIPS_SHA1_EXE)" unless defined $ENV{"FIPS_SHA1_PATH"};
			
 
				+  	$ret.="\n\t\$(PERL) ms\\segrenam.pl \$\$a $rl_start\n";
			
 
				 	$ret.="\t\$(PERL) ms\\segrenam.pl \$\$b $rl_mid\n";
			
 
				 	$ret.="\t\$(PERL) ms\\segrenam.pl \$\$c $rl_end\n";
			
 
				 	$ret.="\t\$(MKLIB) $lfile$target @<<\n\t$files\n<<\n";