Browse Source

sys/src/libsec/amd64: import md5block and sha1block functions (thanks Erik Quanstrom)

David du Colombier 3 years ago
parent
commit
be9994e3b9

+ 13 - 2
sys/src/ape/lib/sec/amd64/mkfile

@@ -3,10 +3,21 @@ APE=/sys/src/ape
 
 LIB=/$objtype/lib/ape/libsec.a
 
-OFILES=	\
+FILES=\
+	md5block\
+	sha1block\
 
 HFILES=/sys/include/ape/libsec.h
 
-UPDATE=mkfile
+SFILES=${FILES:%=%.s}
+
+OFILES=${SFILES:%.s=%.$O}
+
+UPDATE=mkfile\
+	$HFILES\
+	$SFILES\
 
 </sys/src/cmd/mksyslib
+
+%.$O:	../../../../libsec/amd64/%.s
+	$AS ../../../../libsec/amd64/$stem.s

+ 214 - 0
sys/src/libsec/amd64/md5block.s

@@ -0,0 +1,214 @@
+/*
+ *  rfc1321 requires that I include this.  The code is new.  The constants
+ *  all come from the rfc (hence the copyright).  We trade a table for the
+ *  macros in rfc.  The total size is a lot less. -- presotto
+ *
+ *	Copyright (C) 1991-2, RSA Data Security, Inc. Created 1991. All
+ *	rights reserved.
+ *
+ *	License to copy and use this software is granted provided that it
+ *	is identified as the "RSA Data Security, Inc. MD5 Message-Digest
+ *	Algorithm" in all material mentioning or referencing this software
+ *	or this function.
+ *
+ *	License is also granted to make and use derivative works provided
+ *	that such works are identified as "derived from the RSA Data
+ *	Security, Inc. MD5 Message-Digest Algorithm" in all material
+ *	mentioning or referencing the derived work.
+ *
+ *	RSA Data Security, Inc. makes no representations concerning either
+ *	the merchantability of this software or the suitability of this
+ *	software forany particular purpose. It is provided "as is"
+ *	without express or implied warranty of any kind.
+ *	These notices must be retained in any copies of any part of this
+ *	documentation and/or software.
+ */
+#define S11 7
+#define S12 12
+#define S13 17
+#define S14 22
+
+#define S21 5
+#define S22 9
+#define S23 14
+#define S24 20
+
+#define S31 4
+#define S32 11
+#define S33 16
+#define S34 23
+
+#define S41 6
+#define S42 10
+#define S43 15
+#define S44 21
+
+/*
+ * SI is data
+ *	a += FN(B,C,D);
+ *	a += x[sh] + t[sh];
+ *	a = (a << S11) | (a >> (32 - S11));
+ *	a += b;
+ */
+
+#define BODY1(off,V,FN,SH,A,B,C,D)\
+	FN(B,C,D)\
+	LEAL V(A)(DI*1),A;\
+	ADDL (off)(BP),A;\
+	ROLL $SH,A;\
+	ADDL B,A;\
+
+#define BODY(off,V,FN,SH,A,B,C,D)\
+	FN(B,C,D)\
+	LEAL V(A)(DI*1),A;\
+	ADDL (off)(BP),A;\
+	ROLL $SH,A;\
+	ADDL B,A;\
+
+/*
+ * fn1 = ((c ^ d) & b) ^ d
+ */
+#define FN1(B,C,D)\
+	MOVL C,DI;\
+	XORL D,DI;\
+	ANDL B,DI;\
+	XORL D,DI;\
+
+/*
+ * fn2 = ((b ^ c) & d) ^ c;
+ */
+#define FN2(B,C,D)\
+	MOVL B,DI;\
+	XORL C,DI;\
+	ANDL D,DI;\
+	XORL C,DI;\
+
+/*
+ * fn3 = b ^ c ^ d;
+ */
+#define FN3(B,C,D)\
+	MOVL B,DI;\
+	XORL C,DI;\
+	XORL D,DI;\
+
+/*
+ * fn4 = c ^ (b | ~d);
+ */
+#define FN4(B,C,D)\
+	MOVL D,DI;\
+	XORL $-1,DI;\
+	ORL B,DI;\
+	XORL C,DI;\
+
+#define	LEN	8
+#define	STATE	16
+
+TEXT	_md5block+0(SB), $0
+
+	MOVQ	RARG,R8
+	MOVLQZX len+LEN(FP),BX
+	ADDQ	BX,R8
+
+mainloop:
+	MOVQ state+STATE(FP),SI
+	MOVL (SI),AX
+	MOVL 4(SI),BX
+	MOVL 8(SI),CX
+	MOVL 12(SI),DX
+
+	BODY1( 0*4,0xd76aa478,FN1,S11,AX,BX,CX,DX)
+	BODY1( 1*4,0xe8c7b756,FN1,S12,DX,AX,BX,CX)
+	BODY1( 2*4,0x242070db,FN1,S13,CX,DX,AX,BX)
+	BODY1( 3*4,0xc1bdceee,FN1,S14,BX,CX,DX,AX)
+
+	BODY1( 4*4,0xf57c0faf,FN1,S11,AX,BX,CX,DX)
+	BODY1( 5*4,0x4787c62a,FN1,S12,DX,AX,BX,CX)
+	BODY1( 6*4,0xa8304613,FN1,S13,CX,DX,AX,BX)
+	BODY1( 7*4,0xfd469501,FN1,S14,BX,CX,DX,AX)
+
+	BODY1( 8*4,0x698098d8,FN1,S11,AX,BX,CX,DX)
+	BODY1( 9*4,0x8b44f7af,FN1,S12,DX,AX,BX,CX)
+	BODY1(10*4,0xffff5bb1,FN1,S13,CX,DX,AX,BX)
+	BODY1(11*4,0x895cd7be,FN1,S14,BX,CX,DX,AX)
+
+	BODY1(12*4,0x6b901122,FN1,S11,AX,BX,CX,DX)
+	BODY1(13*4,0xfd987193,FN1,S12,DX,AX,BX,CX)
+	BODY1(14*4,0xa679438e,FN1,S13,CX,DX,AX,BX)
+	BODY1(15*4,0x49b40821,FN1,S14,BX,CX,DX,AX)
+
+
+	BODY( 1*4,0xf61e2562,FN2,S21,AX,BX,CX,DX)
+	BODY( 6*4,0xc040b340,FN2,S22,DX,AX,BX,CX)
+	BODY(11*4,0x265e5a51,FN2,S23,CX,DX,AX,BX)
+	BODY( 0*4,0xe9b6c7aa,FN2,S24,BX,CX,DX,AX)
+
+	BODY( 5*4,0xd62f105d,FN2,S21,AX,BX,CX,DX)
+	BODY(10*4,0x02441453,FN2,S22,DX,AX,BX,CX)
+	BODY(15*4,0xd8a1e681,FN2,S23,CX,DX,AX,BX)
+	BODY( 4*4,0xe7d3fbc8,FN2,S24,BX,CX,DX,AX)
+
+	BODY( 9*4,0x21e1cde6,FN2,S21,AX,BX,CX,DX)
+	BODY(14*4,0xc33707d6,FN2,S22,DX,AX,BX,CX)
+	BODY( 3*4,0xf4d50d87,FN2,S23,CX,DX,AX,BX)
+	BODY( 8*4,0x455a14ed,FN2,S24,BX,CX,DX,AX)
+
+	BODY(13*4,0xa9e3e905,FN2,S21,AX,BX,CX,DX)
+	BODY( 2*4,0xfcefa3f8,FN2,S22,DX,AX,BX,CX)
+	BODY( 7*4,0x676f02d9,FN2,S23,CX,DX,AX,BX)
+	BODY(12*4,0x8d2a4c8a,FN2,S24,BX,CX,DX,AX)
+
+
+	BODY( 5*4,0xfffa3942,FN3,S31,AX,BX,CX,DX)
+	BODY( 8*4,0x8771f681,FN3,S32,DX,AX,BX,CX)
+	BODY(11*4,0x6d9d6122,FN3,S33,CX,DX,AX,BX)
+	BODY(14*4,0xfde5380c,FN3,S34,BX,CX,DX,AX)
+
+	BODY( 1*4,0xa4beea44,FN3,S31,AX,BX,CX,DX)
+	BODY( 4*4,0x4bdecfa9,FN3,S32,DX,AX,BX,CX)
+	BODY( 7*4,0xf6bb4b60,FN3,S33,CX,DX,AX,BX)
+	BODY(10*4,0xbebfbc70,FN3,S34,BX,CX,DX,AX)
+
+	BODY(13*4,0x289b7ec6,FN3,S31,AX,BX,CX,DX)
+	BODY( 0*4,0xeaa127fa,FN3,S32,DX,AX,BX,CX)
+	BODY( 3*4,0xd4ef3085,FN3,S33,CX,DX,AX,BX)
+	BODY( 6*4,0x04881d05,FN3,S34,BX,CX,DX,AX)
+
+	BODY( 9*4,0xd9d4d039,FN3,S31,AX,BX,CX,DX)
+	BODY(12*4,0xe6db99e5,FN3,S32,DX,AX,BX,CX)
+	BODY(15*4,0x1fa27cf8,FN3,S33,CX,DX,AX,BX)
+	BODY( 2*4,0xc4ac5665,FN3,S34,BX,CX,DX,AX)
+
+
+	BODY( 0*4,0xf4292244,FN4,S41,AX,BX,CX,DX)
+	BODY( 7*4,0x432aff97,FN4,S42,DX,AX,BX,CX)
+	BODY(14*4,0xab9423a7,FN4,S43,CX,DX,AX,BX)
+	BODY( 5*4,0xfc93a039,FN4,S44,BX,CX,DX,AX)
+
+	BODY(12*4,0x655b59c3,FN4,S41,AX,BX,CX,DX)
+	BODY( 3*4,0x8f0ccc92,FN4,S42,DX,AX,BX,CX)
+	BODY(10*4,0xffeff47d,FN4,S43,CX,DX,AX,BX)
+	BODY( 1*4,0x85845dd1,FN4,S44,BX,CX,DX,AX)
+
+	BODY( 8*4,0x6fa87e4f,FN4,S41,AX,BX,CX,DX)
+	BODY(15*4,0xfe2ce6e0,FN4,S42,DX,AX,BX,CX)
+	BODY( 6*4,0xa3014314,FN4,S43,CX,DX,AX,BX)
+	BODY(13*4,0x4e0811a1,FN4,S44,BX,CX,DX,AX)
+
+	BODY( 4*4,0xf7537e82,FN4,S41,AX,BX,CX,DX)
+	BODY(11*4,0xbd3af235,FN4,S42,DX,AX,BX,CX)
+	BODY( 2*4,0x2ad7d2bb,FN4,S43,CX,DX,AX,BX)
+	BODY( 9*4,0xeb86d391,FN4,S44,BX,CX,DX,AX)
+
+	ADDQ $(16*4),BP
+	MOVQ state+STATE(FP),DI
+	ADDL AX,0(DI)
+	ADDL BX,4(DI)
+	ADDL CX,8(DI)
+	ADDL DX,12(DI)
+
+	CMPQ BP,R8
+	JCS mainloop
+
+	RET
+
+	END

+ 10 - 2
sys/src/libsec/amd64/mkfile

@@ -2,10 +2,18 @@ objtype=amd64
 </$objtype/mkfile
 
 LIB=/$objtype/lib/libsec.a
-OFILES=	\
+FILES=\
+	md5block\
+	sha1block\
 
 HFILES=/sys/include/libsec.h
 
-UPDATE=mkfile
+SFILES=${FILES:%=%.s}
+
+OFILES=${FILES:%=%.$O}
+
+UPDATE=mkfile\
+	$HFILES\
+	$SFILES\
 
 </sys/src/cmd/mksyslib

+ 197 - 0
sys/src/libsec/amd64/sha1block.s

@@ -0,0 +1,197 @@
+/* x = (wp[off-f] ^ wp[off-8] ^ wp[off-14] ^ wp[off-16]) <<< 1;
+ * wp[off] = x;
+ * x += A <<< 5;
+ * E += 0xca62c1d6 + x;
+ * x = FN(B,C,D);
+ * E += x;
+ * B >>> 2
+ */
+#define BSWAPDI	BYTE $0x0f; BYTE $0xcf;
+
+#define BODY(off,FN,V,A,B,C,D,E)\
+	MOVL (off-64)(BP),DI;\
+	XORL (off-56)(BP),DI;\
+	XORL (off-32)(BP),DI;\
+	XORL (off-12)(BP),DI;\
+	ROLL $1,DI;\
+	MOVL DI,off(BP);\
+	LEAL V(DI)(E*1),E;\
+	MOVL A,DI;\
+	ROLL $5,DI;\
+	ADDL DI,E;\
+	FN(B,C,D)\
+	ADDL DI,E;\
+	RORL $2,B;\
+
+#define BODY0(off,FN,V,A,B,C,D,E)\
+	MOVLQZX off(BX),DI;\
+	BSWAPDI;\
+	MOVL DI,off(BP);\
+	LEAL V(DI)(E*1),E;\
+	MOVL A,DI;\
+	ROLL $5,DI;\
+	ADDL DI,E;\
+	FN(B,C,D)\
+	ADDL DI,E;\
+	RORL $2,B;\
+
+/*
+ * fn1 = (((C^D)&B)^D);
+ */
+#define FN1(B,C,D)\
+	MOVL C,DI;\
+	XORL D,DI;\
+	ANDL B,DI;\
+	XORL D,DI;\
+
+/*
+ * fn24 = B ^ C ^ D
+ */
+#define FN24(B,C,D)\
+	MOVL B,DI;\
+	XORL C,DI;\
+	XORL D,DI;\
+
+/*
+ * fn3 = ((B ^ C) & (D ^= B)) ^ B
+ * D ^= B to restore D
+ */
+#define FN3(B,C,D)\
+	MOVL B,DI;\
+	XORL C,DI;\
+	XORL B,D;\
+	ANDL D,DI;\
+	XORL B,DI;\
+	XORL B,D;\
+
+/*
+ * stack offsets
+ * void sha1block(uchar *DATA, int LEN, ulong *STATE)
+ */
+#define	DATA	0
+#define	LEN	8
+#define	STATE	16
+
+/*
+ * stack offsets for locals
+ * ulong w[80];
+ * uchar *edata;
+ * ulong *w15, *w40, *w60, *w80;
+ * register local
+ * ulong *wp = BP
+ * ulong a = eax, b = ebx, c = ecx, d = edx, e = esi
+ * ulong tmp = edi
+ */
+#define	Rpdata	R8
+#define WARRAY	(-8-(80*4))
+#define TMP1	(-16-(80*4))
+#define TMP2	(-24-(80*4))
+#define W15	(-32-(80*4))
+#define W40	(-40-(80*4))
+#define W60	(-48-(80*4))
+#define W80	(-56-(80*4))
+#define EDATA	(-64-(80*4))
+
+TEXT	_sha1block+0(SB),$384
+
+	MOVQ RARG, Rpdata
+	MOVLQZX len+LEN(FP),BX
+	ADDQ BX, RARG
+	MOVQ RARG,edata+EDATA(SP)
+
+	LEAQ aw15+(WARRAY+15*4)(SP),DI
+	MOVQ DI,w15+W15(SP)
+	LEAQ aw40+(WARRAY+40*4)(SP),DX
+	MOVQ DX,w40+W40(SP)
+	LEAQ aw60+(WARRAY+60*4)(SP),CX
+	MOVQ CX,w60+W60(SP)
+	LEAQ aw80+(WARRAY+80*4)(SP),DI
+	MOVQ DI,w80+W80(SP)
+
+mainloop:
+	LEAQ warray+WARRAY(SP),BP
+
+	MOVQ state+STATE(FP),DI
+	MOVL (DI),AX
+	MOVL 4(DI),BX
+	MOVL BX,tmp1+TMP1(SP)
+	MOVL 8(DI),CX
+	MOVL 12(DI),DX
+	MOVL 16(DI),SI
+
+	MOVQ Rpdata,BX
+
+loop1:
+	BODY0(0,FN1,0x5a827999,AX,tmp1+TMP1(SP),CX,DX,SI)
+	MOVL SI,tmp2+TMP2(SP)
+	BODY0(4,FN1,0x5a827999,SI,AX,tmp1+TMP1(SP),CX,DX)
+	MOVL tmp1+TMP1(SP),SI
+	BODY0(8,FN1,0x5a827999,DX,tmp2+TMP2(SP),AX,SI,CX)
+	BODY0(12,FN1,0x5a827999,CX,DX,tmp2+TMP2(SP),AX,SI)
+	MOVL SI,tmp1+TMP1(SP)
+	BODY0(16,FN1,0x5a827999,SI,CX,DX,tmp2+TMP2(SP),AX)
+	MOVL tmp2+TMP2(SP),SI
+
+	ADDQ $20,BX
+	ADDQ $20,BP
+	CMPQ BP,w15+W15(SP)
+	JCS loop1
+
+	BODY0(0,FN1,0x5a827999,AX,tmp1+TMP1(SP),CX,DX,SI)
+	ADDQ $4,BX
+	MOVQ BX,R8
+	MOVQ tmp1+TMP1(SP),BX
+
+	BODY(4,FN1,0x5a827999,SI,AX,BX,CX,DX)
+	BODY(8,FN1,0x5a827999,DX,SI,AX,BX,CX)
+	BODY(12,FN1,0x5a827999,CX,DX,SI,AX,BX)
+	BODY(16,FN1,0x5a827999,BX,CX,DX,SI,AX)
+
+	ADDQ $20,BP
+
+loop2:
+	BODY(0,FN24,0x6ed9eba1,AX,BX,CX,DX,SI)
+	BODY(4,FN24,0x6ed9eba1,SI,AX,BX,CX,DX)
+	BODY(8,FN24,0x6ed9eba1,DX,SI,AX,BX,CX)
+	BODY(12,FN24,0x6ed9eba1,CX,DX,SI,AX,BX)
+	BODY(16,FN24,0x6ed9eba1,BX,CX,DX,SI,AX)
+
+	ADDQ $20,BP
+	CMPQ BP,w40+W40(SP)
+	JCS loop2
+
+loop3:
+	BODY(0,FN3,0x8f1bbcdc,AX,BX,CX,DX,SI)
+	BODY(4,FN3,0x8f1bbcdc,SI,AX,BX,CX,DX)
+	BODY(8,FN3,0x8f1bbcdc,DX,SI,AX,BX,CX)
+	BODY(12,FN3,0x8f1bbcdc,CX,DX,SI,AX,BX)
+	BODY(16,FN3,0x8f1bbcdc,BX,CX,DX,SI,AX)
+
+	ADDQ $20,BP
+	CMPQ BP,w60+W60(SP)
+	JCS loop3
+
+loop4:
+	BODY(0,FN24,0xca62c1d6,AX,BX,CX,DX,SI)
+	BODY(4,FN24,0xca62c1d6,SI,AX,BX,CX,DX)
+	BODY(8,FN24,0xca62c1d6,DX,SI,AX,BX,CX)
+	BODY(12,FN24,0xca62c1d6,CX,DX,SI,AX,BX)
+	BODY(16,FN24,0xca62c1d6,BX,CX,DX,SI,AX)
+
+	ADDQ $20,BP
+	CMPQ BP,w80+W80(SP)
+	JCS loop4
+
+	MOVQ state+STATE(FP),DI
+	ADDL AX,0(DI)
+	ADDL BX,4(DI)
+	ADDL CX,8(DI)
+	ADDL DX,12(DI)
+	ADDL SI,16(DI)
+
+	MOVQ edata+EDATA(SP),DI
+	CMPQ Rpdata,DI
+	JCS mainloop
+
+	RET
+	END