123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451 |
- /*
- * Copyright (C) 2017 Denys Vlasenko
- *
- * Licensed under GPLv2, see file LICENSE in this source tree.
- */
- #include "tls.h"
- /* The file is taken almost verbatim from matrixssl-3-7-2b-open/crypto/math/.
- * Changes are flagged with //bbox
- */
- /**
- * @file pstm_montgomery_reduce.c
- * @version 33ef80f (HEAD, tag: MATRIXSSL-3-7-2-OPEN, tag: MATRIXSSL-3-7-2-COMM, origin/master, origin/HEAD, master)
- *
- * Multiprecision Montgomery Reduction.
- */
- /*
- * Copyright (c) 2013-2015 INSIDE Secure Corporation
- * Copyright (c) PeerSec Networks, 2002-2011
- * All Rights Reserved
- *
- * The latest version of this code is available at http://www.matrixssl.org
- *
- * This software is open source; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This General Public License does NOT permit incorporating this software
- * into proprietary programs. If you are unable to comply with the GPL, a
- * commercial license for this software may be purchased from INSIDE at
- * http://www.insidesecure.com/eng/Company/Locations
- *
- * This program is distributed in WITHOUT ANY WARRANTY; without even the
- * implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
- * See the GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
- * http://www.gnu.org/copyleft/gpl.html
- */
- /******************************************************************************/
- //bbox
- //#include "../cryptoApi.h"
- #ifndef DISABLE_PSTM
- /******************************************************************************/
- #if defined(PSTM_X86)
- /* x86-32 optimized for 32 bit platforms. For 64 bit mode use X86_64 instead */
- #if !defined(__GNUC__) || !defined(__i386__) || !defined(PSTM_32BIT)
- #error "PSTM_X86 option requires GCC and 32 bit mode x86 processor"
- #endif
- //#pragma message ("Using 32 bit x86 Assembly Optimizations")
- #define MONT_START
- #define MONT_FINI
- #define LOOP_END
- #define LOOP_START \
- mu = c[x] * mp
- #if 0
- #define INNERMUL \
- asm( \
- "movl %5,%%eax \n\t" \
- "mull %4 \n\t" \
- "addl %1,%%eax \n\t" \
- "adcl $0,%%edx \n\t" \
- "addl %%eax,%0 \n\t" \
- "adcl $0,%%edx \n\t" \
- "movl %%edx,%1 \n\t" \
- :"=g"(_c[LO]), "=r"(cy) \
- :"0"(_c[LO]), "1"(cy), "g"(mu), "g"(*tmpm++) \
- : "%eax", "%edx", "cc")
- /*
- * The above generated "error: 'asm' operand has impossible constraints" on Android.
- * Do they reserve in their ABI a register for something, and there aren't enough left?
- */
- #else
- /* Let's avoid two explicit "movl" by telling compiler to put input value of *tmpm++
- * into EAX, and to expect cy result in EDX:
- */
- #define INNERMUL \
- asm( \
- "mull %4 \n\t" \
- "addl %3,%%eax \n\t" \
- "adcl $0,%%edx \n\t" \
- "addl %%eax,%0 \n\t" \
- "adcl $0,%%edx \n\t" \
- :"=g"(_c[LO]), "=&d"(cy) \
- :"0"(_c[LO]), "g"(cy), "g"(mu), "a"(*tmpm++) \
- :"cc")
- /* This doesn't tell compiler that we clobber EAX, but it probably won't need
- * the value of *tmpm anyway, thus won't try to reuse EAX contents.
- * TODO: fix it with dummy "=a"(clobbered_eax) output?
- */
- #endif
- #define PROPCARRY \
- asm( \
- "addl %1,%0 \n\t" \
- "sbb %1,%1 \n\t" \
- "neg %1 \n\t" \
- :"=g"(_c[LO]), "=r"(cy) \
- :"0"(_c[LO]), "1"(cy) \
- :"cc")
- /******************************************************************************/
- #elif defined(PSTM_X86_64)
- /* x86-64 optimized */
- #if !defined(__GNUC__) || !defined(__x86_64__) || !defined(PSTM_64BIT)
- #error "PSTM_X86_64 option requires PSTM_64BIT, GCC and 64 bit mode x86 processor"
- #endif
- //#pragma message ("Using 64 bit x86_64 Assembly Optimizations")
- #define MONT_START
- #define MONT_FINI
- #define LOOP_END
- #define LOOP_START \
- mu = c[x] * mp
- #define INNERMUL \
- asm( \
- "movq %5,%%rax \n\t" \
- "mulq %4 \n\t" \
- "addq %1,%%rax \n\t" \
- "adcq $0,%%rdx \n\t" \
- "addq %%rax,%0 \n\t" \
- "adcq $0,%%rdx \n\t" \
- "movq %%rdx,%1 \n\t" \
- :"=g"(_c[LO]), "=r"(cy) \
- :"0"(_c[LO]), "1"(cy), "r"(mu), "r"(*tmpm++) \
- : "%rax", "%rdx", "cc")
- #define INNERMUL8 \
- asm( \
- "movq 0(%5),%%rax \n\t" \
- "movq 0(%2),%%r10 \n\t" \
- "movq 0x8(%5),%%r11 \n\t" \
- "mulq %4 \n\t" \
- "addq %%r10,%%rax \n\t" \
- "adcq $0,%%rdx \n\t" \
- "movq 0x8(%2),%%r10 \n\t" \
- "addq %3,%%rax \n\t" \
- "adcq $0,%%rdx \n\t" \
- "movq %%rax,0(%0) \n\t" \
- "movq %%rdx,%1 \n\t" \
- \
- "movq %%r11,%%rax \n\t" \
- "movq 0x10(%5),%%r11 \n\t" \
- "mulq %4 \n\t" \
- "addq %%r10,%%rax \n\t" \
- "adcq $0,%%rdx \n\t" \
- "movq 0x10(%2),%%r10 \n\t" \
- "addq %3,%%rax \n\t" \
- "adcq $0,%%rdx \n\t" \
- "movq %%rax,0x8(%0) \n\t" \
- "movq %%rdx,%1 \n\t" \
- \
- "movq %%r11,%%rax \n\t" \
- "movq 0x18(%5),%%r11 \n\t" \
- "mulq %4 \n\t" \
- "addq %%r10,%%rax \n\t" \
- "adcq $0,%%rdx \n\t" \
- "movq 0x18(%2),%%r10 \n\t" \
- "addq %3,%%rax \n\t" \
- "adcq $0,%%rdx \n\t" \
- "movq %%rax,0x10(%0) \n\t" \
- "movq %%rdx,%1 \n\t" \
- \
- "movq %%r11,%%rax \n\t" \
- "movq 0x20(%5),%%r11 \n\t" \
- "mulq %4 \n\t" \
- "addq %%r10,%%rax \n\t" \
- "adcq $0,%%rdx \n\t" \
- "movq 0x20(%2),%%r10 \n\t" \
- "addq %3,%%rax \n\t" \
- "adcq $0,%%rdx \n\t" \
- "movq %%rax,0x18(%0) \n\t" \
- "movq %%rdx,%1 \n\t" \
- \
- "movq %%r11,%%rax \n\t" \
- "movq 0x28(%5),%%r11 \n\t" \
- "mulq %4 \n\t" \
- "addq %%r10,%%rax \n\t" \
- "adcq $0,%%rdx \n\t" \
- "movq 0x28(%2),%%r10 \n\t" \
- "addq %3,%%rax \n\t" \
- "adcq $0,%%rdx \n\t" \
- "movq %%rax,0x20(%0) \n\t" \
- "movq %%rdx,%1 \n\t" \
- \
- "movq %%r11,%%rax \n\t" \
- "movq 0x30(%5),%%r11 \n\t" \
- "mulq %4 \n\t" \
- "addq %%r10,%%rax \n\t" \
- "adcq $0,%%rdx \n\t" \
- "movq 0x30(%2),%%r10 \n\t" \
- "addq %3,%%rax \n\t" \
- "adcq $0,%%rdx \n\t" \
- "movq %%rax,0x28(%0) \n\t" \
- "movq %%rdx,%1 \n\t" \
- \
- "movq %%r11,%%rax \n\t" \
- "movq 0x38(%5),%%r11 \n\t" \
- "mulq %4 \n\t" \
- "addq %%r10,%%rax \n\t" \
- "adcq $0,%%rdx \n\t" \
- "movq 0x38(%2),%%r10 \n\t" \
- "addq %3,%%rax \n\t" \
- "adcq $0,%%rdx \n\t" \
- "movq %%rax,0x30(%0) \n\t" \
- "movq %%rdx,%1 \n\t" \
- \
- "movq %%r11,%%rax \n\t" \
- "mulq %4 \n\t" \
- "addq %%r10,%%rax \n\t" \
- "adcq $0,%%rdx \n\t" \
- "addq %3,%%rax \n\t" \
- "adcq $0,%%rdx \n\t" \
- "movq %%rax,0x38(%0) \n\t" \
- "movq %%rdx,%1 \n\t" \
- \
- :"=r"(_c), "=r"(cy) \
- : "0"(_c), "1"(cy), "g"(mu), "r"(tmpm)\
- : "%rax", "%rdx", "%r10", "%r11", "cc")
- #define PROPCARRY \
- asm( \
- "addq %1,%0 \n\t" \
- "setb %%al \n\t" \
- "movzbq %%al,%1 \n\t" \
- :"=g"(_c[LO]), "=r"(cy) \
- :"0"(_c[LO]), "1"(cy) \
- : "%rax", "cc")
- /******************************************************************************/
- #elif defined(PSTM_ARM)
- #define MONT_START
- #define MONT_FINI
- #define LOOP_END
- #define LOOP_START \
- mu = c[x] * mp
- #ifdef __thumb2__
- //#pragma message ("Using 32 bit ARM Thumb2 Assembly Optimizations")
- #define INNERMUL \
- asm( \
- " LDR r0,%1 \n\t" \
- " ADDS r0,r0,%0 \n\t" \
- " ITE CS \n\t" \
- " MOVCS %0,#1 \n\t" \
- " MOVCC %0,#0 \n\t" \
- " UMLAL r0,%0,%3,%4 \n\t" \
- " STR r0,%1 \n\t" \
- :"=r"(cy),"=m"(_c[0])\
- :"0"(cy),"r"(mu),"r"(*tmpm++),"m"(_c[0])\
- :"r0","cc");
- #define PROPCARRY \
- asm( \
- " LDR r0,%1 \n\t" \
- " ADDS r0,r0,%0 \n\t" \
- " STR r0,%1 \n\t" \
- " ITE CS \n\t" \
- " MOVCS %0,#1 \n\t" \
- " MOVCC %0,#0 \n\t" \
- :"=r"(cy),"=m"(_c[0])\
- :"0"(cy),"m"(_c[0])\
- :"r0","cc");
- #else /* Non-Thumb2 code */
- //#pragma message ("Using 32 bit ARM Assembly Optimizations")
- #define INNERMUL \
- asm( \
- " LDR r0,%1 \n\t" \
- " ADDS r0,r0,%0 \n\t" \
- " MOVCS %0,#1 \n\t" \
- " MOVCC %0,#0 \n\t" \
- " UMLAL r0,%0,%3,%4 \n\t" \
- " STR r0,%1 \n\t" \
- :"=r"(cy),"=m"(_c[0])\
- :"0"(cy),"r"(mu),"r"(*tmpm++),"m"(_c[0])\
- :"r0","cc");
- #define PROPCARRY \
- asm( \
- " LDR r0,%1 \n\t" \
- " ADDS r0,r0,%0 \n\t" \
- " STR r0,%1 \n\t" \
- " MOVCS %0,#1 \n\t" \
- " MOVCC %0,#0 \n\t" \
- :"=r"(cy),"=m"(_c[0])\
- :"0"(cy),"m"(_c[0])\
- :"r0","cc");
- #endif /* __thumb2__ */
- /******************************************************************************/
- #elif defined(PSTM_MIPS)
- /* MIPS32 */
- //#pragma message ("Using 32 bit MIPS Assembly Optimizations")
- #define MONT_START
- #define MONT_FINI
- #define LOOP_END
- #define LOOP_START \
- mu = c[x] * mp
- #define INNERMUL \
- asm( \
- " multu %3,%4 \n\t" \
- " mflo $12 \n\t" \
- " mfhi $13 \n\t" \
- " addu $12,$12,%0 \n\t" \
- " sltu $10,$12,%0 \n\t" \
- " addu $13,$13,$10 \n\t" \
- " lw $10,%1 \n\t" \
- " addu $12,$12,$10 \n\t" \
- " sltu $10,$12,$10 \n\t" \
- " addu %0,$13,$10 \n\t" \
- " sw $12,%1 \n\t" \
- :"=r"(cy),"=m"(_c[0])\
- :"r"(cy),"r"(mu),"r"(tmpm[0]),"r"(_c[0])\
- :"$10","$12","$13")\
- ; ++tmpm;
- #define PROPCARRY \
- asm( \
- " lw $10,%1 \n\t" \
- " addu $10,$10,%0 \n\t" \
- " sw $10,%1 \n\t" \
- " sltu %0,$10,%0 \n\t" \
- :"=r"(cy),"=m"(_c[0])\
- :"r"(cy),"r"(_c[0])\
- :"$10");
- /******************************************************************************/
- #else
- /* ISO C code */
- #define MONT_START
- #define MONT_FINI
- #define LOOP_END
- #define LOOP_START \
- mu = c[x] * mp
- #define INNERMUL \
- do { pstm_word t; \
- t = ((pstm_word)_c[0] + (pstm_word)cy) + \
- (((pstm_word)mu) * ((pstm_word)*tmpm++)); \
- _c[0] = (pstm_digit)t; \
- cy = (pstm_digit)(t >> DIGIT_BIT); \
- } while (0)
- #define PROPCARRY \
- do { pstm_digit t = _c[0] += cy; cy = (t < cy); } while (0)
- #endif
- /******************************************************************************/
- #define LO 0
- /* computes x/R == x (mod N) via Montgomery Reduction */
- int32 FAST_FUNC pstm_montgomery_reduce(psPool_t *pool, pstm_int *a, pstm_int *m,
- pstm_digit mp, pstm_digit *paD, uint32 paDlen)
- {
- pstm_digit *c, *_c, *tmpm, mu;
- int32 oldused, x, y;
- int pa; //bbox: was int16
- pa = m->used;
- if (pa > a->alloc) {
- /* Sanity test for bad numbers. This will confirm no buffer overruns */
- return PS_LIMIT_FAIL;
- }
- if (paD && paDlen >= (uint32)2*pa+1) {
- c = paD;
- memset(c, 0x0, paDlen);
- } else {
- c = xzalloc(2*pa+1);//bbox
- }
- /* copy the input */
- oldused = a->used;
- for (x = 0; x < oldused; x++) {
- c[x] = a->dp[x];
- }
- MONT_START;
- for (x = 0; x < pa; x++) {
- pstm_digit cy = 0;
- /* get Mu for this round */
- LOOP_START;
- _c = c + x;
- tmpm = m->dp;
- y = 0;
- #ifdef PSTM_X86_64
- for (; y < (pa & ~7); y += 8) {
- INNERMUL8;
- _c += 8;
- tmpm += 8;
- }
- #endif /* PSTM_X86_64 */
- for (; y < pa; y++) {
- INNERMUL;
- ++_c;
- }
- LOOP_END;
- while (cy) {
- PROPCARRY;
- ++_c;
- }
- }
- /* now copy out */
- _c = c + pa;
- tmpm = a->dp;
- for (x = 0; x < pa+1; x++) {
- *tmpm++ = *_c++;
- }
- for (; x < oldused; x++) {
- *tmpm++ = 0;
- }
- MONT_FINI;
- a->used = pa+1;
- pstm_clamp(a);
- /* reuse x as return code */
- x = PSTM_OKAY;
- /* if A >= m then A = A - m */
- if (pstm_cmp_mag (a, m) != PSTM_LT) {
- if (s_pstm_sub (a, m, a) != PSTM_OKAY) {
- x = PS_MEM_FAIL;
- }
- }
- if (paDlen < (uint32)2*pa+1) {
- psFree(c, pool);
- }
- return x;
- }
- #endif /* !DISABLE_PSTM */
- /******************************************************************************/
|