123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113 |
- /*
- * Copyright (C) 2017 Denys Vlasenko
- *
- * Licensed under GPLv2, see file LICENSE in this source tree.
- */
- #include "tls.h"
- /* The file is taken almost verbatim from matrixssl-3-7-2b-open/crypto/math/.
- * Changes are flagged with //bbox
- */
- /**
- * @file pstm_sqr_comba.c
- * @version 33ef80f (HEAD, tag: MATRIXSSL-3-7-2-OPEN, tag: MATRIXSSL-3-7-2-COMM, origin/master, origin/HEAD, master)
- *
- * Multiprecision Squaring with Comba technique.
- */
- /*
- * Copyright (c) 2013-2015 INSIDE Secure Corporation
- * Copyright (c) PeerSec Networks, 2002-2011
- * All Rights Reserved
- *
- * The latest version of this code is available at http://www.matrixssl.org
- *
- * This software is open source; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This General Public License does NOT permit incorporating this software
- * into proprietary programs. If you are unable to comply with the GPL, a
- * commercial license for this software may be purchased from INSIDE at
- * http://www.insidesecure.com/eng/Company/Locations
- *
- * This program is distributed in WITHOUT ANY WARRANTY; without even the
- * implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
- * See the GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
- * http://www.gnu.org/copyleft/gpl.html
- */
- /******************************************************************************/
- //bbox
- //#include "../cryptoApi.h"
- #ifndef DISABLE_PSTM
- /******************************************************************************/
- #if defined(PSTM_X86)
- /* x86-32 optimized for 32 bit platforms. For 64 bit mode use X86_64 instead */
- #if !defined(__GNUC__) || !defined(__i386__)
- #error "PSTM_X86 option requires GCC and 32 bit mode x86 processor"
- #endif
- //#pragma message ("Using 32 bit x86 Assembly Optimizations")
- #define COMBA_START
- #define CLEAR_CARRY \
- c0 = c1 = c2 = 0;
- #define COMBA_STORE(x) \
- x = c0;
- #define COMBA_STORE2(x) \
- x = c1;
- #define CARRY_FORWARD \
- do { c0 = c1; c1 = c2; c2 = 0; } while (0);
- #define COMBA_FINI
- #define SQRADD(i, j) \
- asm( \
- "movl %6,%%eax \n\t" \
- "mull %%eax \n\t" \
- "addl %%eax,%0 \n\t" \
- "adcl %%edx,%1 \n\t" \
- "adcl $0,%2 \n\t" \
- :"=rm"(c0), "=rm"(c1), "=rm"(c2): "0"(c0), "1"(c1), "2"(c2), "m"(i) :"%eax","%edx","%cc");
- //bbox: ^^^ replaced "=r" with "=rm": %ebx is not available on shared build
- #define SQRADD2(i, j) \
- asm( \
- "movl %6,%%eax \n\t" \
- "mull %7 \n\t" \
- "addl %%eax,%0 \n\t" \
- "adcl %%edx,%1 \n\t" \
- "adcl $0,%2 \n\t" \
- "addl %%eax,%0 \n\t" \
- "adcl %%edx,%1 \n\t" \
- "adcl $0,%2 \n\t" \
- :"=rm"(c0), "=rm"(c1), "=rm"(c2): "0"(c0), "1"(c1), "2"(c2), "m"(i), "m"(j) :"%eax","%edx","%cc");
- //bbox: ^^^ replaced "=r" with "=rm": %ebx is not available on shared build
- #define SQRADDSC(i, j) \
- asm( \
- "movl %6,%%eax \n\t" \
- "mull %7 \n\t" \
- "movl %%eax,%0 \n\t" \
- "movl %%edx,%1 \n\t" \
- "xorl %2,%2 \n\t" \
- :"=r"(sc0), "=r"(sc1), "=r"(sc2): "0"(sc0), "1"(sc1), "2"(sc2), "g"(i), "g"(j) :"%eax","%edx","%cc");
- #define SQRADDAC(i, j) \
- asm( \
- "movl %6,%%eax \n\t" \
- "mull %7 \n\t" \
- "addl %%eax,%0 \n\t" \
- "adcl %%edx,%1 \n\t" \
- "adcl $0,%2 \n\t" \
- :"=r"(sc0), "=r"(sc1), "=r"(sc2): "0"(sc0), "1"(sc1), "2"(sc2), "g"(i), "g"(j) :"%eax","%edx","%cc");
- #define SQRADDDB \
- asm( \
- "addl %6,%0 \n\t" \
- "adcl %7,%1 \n\t" \
- "adcl %8,%2 \n\t" \
- "addl %6,%0 \n\t" \
- "adcl %7,%1 \n\t" \
- "adcl %8,%2 \n\t" \
- :"=r"(c0), "=r"(c1), "=r"(c2) : "0"(c0), "1"(c1), "2"(c2), "r"(sc0), "r"(sc1), "r"(sc2) : "%cc");
- /******************************************************************************/
- #elif defined(PSTM_X86_64)
- /* x86-64 optimized */
- #if !defined(__GNUC__) || !defined(__x86_64__) || !defined(PSTM_64BIT)
- #error "PSTM_X86_64 option requires PSTM_64BIT, GCC and 64 bit mode x86 processor"
- #endif
- //#pragma message ("Using 64 bit x86_64 Assembly Optimizations")
- #define COMBA_START
- #define CLEAR_CARRY \
- c0 = c1 = c2 = 0;
- #define COMBA_STORE(x) \
- x = c0;
- #define COMBA_STORE2(x) \
- x = c1;
- #define CARRY_FORWARD \
- do { c0 = c1; c1 = c2; c2 = 0; } while (0);
- #define COMBA_FINI
- #define SQRADD(i, j) \
- asm( \
- "movq %6,%%rax \n\t" \
- "mulq %%rax \n\t" \
- "addq %%rax,%0 \n\t" \
- "adcq %%rdx,%1 \n\t" \
- "adcq $0,%2 \n\t" \
- :"=r"(c0), "=r"(c1), "=r"(c2): "0"(c0), "1"(c1), "2"(c2), "g"(i) :"%rax","%rdx","cc");
- #define SQRADD2(i, j) \
- asm( \
- "movq %6,%%rax \n\t" \
- "mulq %7 \n\t" \
- "addq %%rax,%0 \n\t" \
- "adcq %%rdx,%1 \n\t" \
- "adcq $0,%2 \n\t" \
- "addq %%rax,%0 \n\t" \
- "adcq %%rdx,%1 \n\t" \
- "adcq $0,%2 \n\t" \
- :"=r"(c0), "=r"(c1), "=r"(c2): "0"(c0), "1"(c1), "2"(c2), "g"(i), "g"(j) :"%rax","%rdx","cc");
- #define SQRADDSC(i, j) \
- asm( \
- "movq %6,%%rax \n\t" \
- "mulq %7 \n\t" \
- "movq %%rax,%0 \n\t" \
- "movq %%rdx,%1 \n\t" \
- "xorq %2,%2 \n\t" \
- :"=r"(sc0), "=r"(sc1), "=r"(sc2): "0"(sc0), "1"(sc1), "2"(sc2), "g"(i), "g"(j) :"%rax","%rdx","cc");
- #define SQRADDAC(i, j) \
- asm( \
- "movq %6,%%rax \n\t" \
- "mulq %7 \n\t" \
- "addq %%rax,%0 \n\t" \
- "adcq %%rdx,%1 \n\t" \
- "adcq $0,%2 \n\t" \
- :"=r"(sc0), "=r"(sc1), "=r"(sc2): "0"(sc0), "1"(sc1), "2"(sc2), "g"(i), "g"(j) :"%rax","%rdx","cc");
- #define SQRADDDB \
- asm( \
- "addq %6,%0 \n\t" \
- "adcq %7,%1 \n\t" \
- "adcq %8,%2 \n\t" \
- "addq %6,%0 \n\t" \
- "adcq %7,%1 \n\t" \
- "adcq %8,%2 \n\t" \
- :"=r"(c0), "=r"(c1), "=r"(c2) : "0"(c0), "1"(c1), "2"(c2), "r"(sc0), "r"(sc1), "r"(sc2) : "cc");
- /******************************************************************************/
- #elif defined(PSTM_ARM)
- /* ARM code */
- //#pragma message ("Using 32 bit ARM Assembly Optimizations")
- #define COMBA_START
- #define CLEAR_CARRY \
- c0 = c1 = c2 = 0;
- #define COMBA_STORE(x) \
- x = c0;
- #define COMBA_STORE2(x) \
- x = c1;
- #define CARRY_FORWARD \
- do { c0 = c1; c1 = c2; c2 = 0; } while (0);
- #define COMBA_FINI
- /* multiplies point i and j, updates carry "c1" and digit c2 */
- #define SQRADD(i, j) \
- asm( \
- " UMULL r0,r1,%6,%6 \n\t" \
- " ADDS %0,%0,r0 \n\t" \
- " ADCS %1,%1,r1 \n\t" \
- " ADC %2,%2,#0 \n\t" \
- :"=r"(c0), "=r"(c1), "=r"(c2) : "0"(c0), "1"(c1), "2"(c2), "r"(i) : "r0", "r1", "%cc");
- /* for squaring some of the terms are doubled... */
- #define SQRADD2(i, j) \
- asm( \
- " UMULL r0,r1,%6,%7 \n\t" \
- " ADDS %0,%0,r0 \n\t" \
- " ADCS %1,%1,r1 \n\t" \
- " ADC %2,%2,#0 \n\t" \
- " ADDS %0,%0,r0 \n\t" \
- " ADCS %1,%1,r1 \n\t" \
- " ADC %2,%2,#0 \n\t" \
- :"=r"(c0), "=r"(c1), "=r"(c2) : "0"(c0), "1"(c1), "2"(c2), "r"(i), "r"(j) : "r0", "r1", "%cc");
- #define SQRADDSC(i, j) \
- asm( \
- " UMULL %0,%1,%6,%7 \n\t" \
- " SUB %2,%2,%2 \n\t" \
- :"=r"(sc0), "=r"(sc1), "=r"(sc2) : "0"(sc0), "1"(sc1), "2"(sc2), "r"(i), "r"(j) : "%cc");
- #define SQRADDAC(i, j) \
- asm( \
- " UMULL r0,r1,%6,%7 \n\t" \
- " ADDS %0,%0,r0 \n\t" \
- " ADCS %1,%1,r1 \n\t" \
- " ADC %2,%2,#0 \n\t" \
- :"=r"(sc0), "=r"(sc1), "=r"(sc2) : "0"(sc0), "1"(sc1), "2"(sc2), "r"(i), "r"(j) : "r0", "r1", "%cc");
- #define SQRADDDB \
- asm( \
- " ADDS %0,%0,%3 \n\t" \
- " ADCS %1,%1,%4 \n\t" \
- " ADC %2,%2,%5 \n\t" \
- " ADDS %0,%0,%3 \n\t" \
- " ADCS %1,%1,%4 \n\t" \
- " ADC %2,%2,%5 \n\t" \
- :"=r"(c0), "=r"(c1), "=r"(c2) : "r"(sc0), "r"(sc1), "r"(sc2), "0"(c0), "1"(c1), "2"(c2) : "%cc");
- /******************************************************************************/
- #elif defined(PSTM_MIPS)
- /* MIPS32 */
- //#pragma message ("Using 32 bit MIPS Assembly Optimizations")
- #define COMBA_START
- #define CLEAR_CARRY \
- c0 = c1 = c2 = 0;
- #define COMBA_STORE(x) \
- x = c0;
- #define COMBA_STORE2(x) \
- x = c1;
- #define CARRY_FORWARD \
- do { c0 = c1; c1 = c2; c2 = 0; } while (0);
- #define COMBA_FINI
- /* multiplies point i and j, updates carry "c1" and digit c2 */
- #define SQRADD(i, j) \
- asm( \
- " multu %6,%6 \n\t" \
- " mflo $12 \n\t" \
- " mfhi $13 \n\t" \
- " addu %0,%0,$12 \n\t" \
- " sltu $12,%0,$12 \n\t" \
- " addu %1,%1,$13 \n\t" \
- " sltu $13,%1,$13 \n\t" \
- " addu %1,%1,$12 \n\t" \
- " sltu $12,%1,$12 \n\t" \
- " addu %2,%2,$13 \n\t" \
- " addu %2,%2,$12 \n\t" \
- :"=r"(c0), "=r"(c1), "=r"(c2):"0"(c0), "1"(c1), "2"(c2), "r"(i):"$12","$13");
- /* for squaring some of the terms are doubled... */
- #define SQRADD2(i, j) \
- asm( \
- " multu %6,%7 \n\t" \
- " mflo $12 \n\t" \
- " mfhi $13 \n\t" \
- \
- " addu %0,%0,$12 \n\t" \
- " sltu $14,%0,$12 \n\t" \
- " addu %1,%1,$13 \n\t" \
- " sltu $15,%1,$13 \n\t" \
- " addu %1,%1,$14 \n\t" \
- " sltu $14,%1,$14 \n\t" \
- " addu %2,%2,$15 \n\t" \
- " addu %2,%2,$14 \n\t" \
- \
- " addu %0,%0,$12 \n\t" \
- " sltu $14,%0,$12 \n\t" \
- " addu %1,%1,$13 \n\t" \
- " sltu $15,%1,$13 \n\t" \
- " addu %1,%1,$14 \n\t" \
- " sltu $14,%1,$14 \n\t" \
- " addu %2,%2,$15 \n\t" \
- " addu %2,%2,$14 \n\t" \
- :"=r"(c0), "=r"(c1), "=r"(c2):"0"(c0), "1"(c1), "2"(c2), "r"(i), "r"(j):"$12", "$13", "$14", "$15");
- #define SQRADDSC(i, j) \
- asm( \
- " multu %6,%7 \n\t" \
- " mflo %0 \n\t" \
- " mfhi %1 \n\t" \
- " xor %2,%2,%2 \n\t" \
- :"=r"(sc0), "=r"(sc1), "=r"(sc2):"0"(sc0), "1"(sc1), "2"(sc2), "r"(i),"r"(j) : "%cc");
- #define SQRADDAC(i, j) \
- asm( \
- " multu %6,%7 \n\t" \
- " mflo $12 \n\t" \
- " mfhi $13 \n\t" \
- " addu %0,%0,$12 \n\t" \
- " sltu $12,%0,$12 \n\t" \
- " addu %1,%1,$13 \n\t" \
- " sltu $13,%1,$13 \n\t" \
- " addu %1,%1,$12 \n\t" \
- " sltu $12,%1,$12 \n\t" \
- " addu %2,%2,$13 \n\t" \
- " addu %2,%2,$12 \n\t" \
- :"=r"(sc0), "=r"(sc1), "=r"(sc2):"0"(sc0), "1"(sc1), "2"(sc2), "r"(i), "r"(j):"$12", "$13", "$14");
- #define SQRADDDB \
- asm( \
- " addu %0,%0,%3 \n\t" \
- " sltu $10,%0,%3 \n\t" \
- " addu %1,%1,$10 \n\t" \
- " sltu $10,%1,$10 \n\t" \
- " addu %1,%1,%4 \n\t" \
- " sltu $11,%1,%4 \n\t" \
- " addu %2,%2,$10 \n\t" \
- " addu %2,%2,$11 \n\t" \
- " addu %2,%2,%5 \n\t" \
- \
- " addu %0,%0,%3 \n\t" \
- " sltu $10,%0,%3 \n\t" \
- " addu %1,%1,$10 \n\t" \
- " sltu $10,%1,$10 \n\t" \
- " addu %1,%1,%4 \n\t" \
- " sltu $11,%1,%4 \n\t" \
- " addu %2,%2,$10 \n\t" \
- " addu %2,%2,$11 \n\t" \
- " addu %2,%2,%5 \n\t" \
- :"=r"(c0), "=r"(c1), "=r"(c2) : "r"(sc0), "r"(sc1), "r"(sc2), "0"(c0), "1"(c1), "2"(c2) : "$10", "$11");
- #else
- /******************************************************************************/
- #define PSTM_ISO
- /* ISO C portable code */
- #define COMBA_START
- #define CLEAR_CARRY \
- c0 = c1 = c2 = 0;
- #define COMBA_STORE(x) \
- x = c0;
- #define COMBA_STORE2(x) \
- x = c1;
- #define CARRY_FORWARD \
- do { c0 = c1; c1 = c2; c2 = 0; } while (0);
- #define COMBA_FINI
- /* multiplies point i and j, updates carry "c1" and digit c2 */
- #define SQRADD(i, j) \
- do { pstm_word t; \
- t = c0 + ((pstm_word)i) * ((pstm_word)j); c0 = (pstm_digit)t; \
- t = c1 + (t >> DIGIT_BIT); \
- c1 = (pstm_digit)t; c2 += (pstm_digit)(t >> DIGIT_BIT); \
- } while (0);
- /* for squaring some of the terms are doubled... */
- #define SQRADD2(i, j) \
- do { pstm_word t; \
- t = ((pstm_word)i) * ((pstm_word)j); \
- tt = (pstm_word)c0 + t; c0 = (pstm_digit)tt; \
- tt = (pstm_word)c1 + (tt >> DIGIT_BIT); \
- c1 = (pstm_digit)tt; c2 += (pstm_digit)(tt >> DIGIT_BIT); \
- tt = (pstm_word)c0 + t; c0 = (pstm_digit)tt; \
- tt = (pstm_word)c1 + (tt >> DIGIT_BIT); \
- c1 = (pstm_digit)tt; c2 += (pstm_digit)(tt >> DIGIT_BIT); \
- } while (0);
- #define SQRADDSC(i, j) \
- do { pstm_word t; \
- t = ((pstm_word)i) * ((pstm_word)j); \
- sc0 = (pstm_digit)t; sc1 = (pstm_digit)(t >> DIGIT_BIT); sc2 = 0; \
- } while (0);
- #define SQRADDAC(i, j) \
- do { pstm_word t; \
- t = ((pstm_word)sc0) + ((pstm_word)i) * ((pstm_word)j); \
- sc0 = (pstm_digit)t; \
- t = ((pstm_word)sc1) + (t >> DIGIT_BIT); sc1 = (pstm_digit)t; \
- sc2 += (pstm_digit)(t >> DIGIT_BIT); \
- } while (0);
- #define SQRADDDB \
- do { pstm_word t; \
- t = ((pstm_word)sc0) + ((pstm_word)sc0) + ((pstm_word)c0); \
- c0 = (pstm_digit)t; \
- t = ((pstm_word)sc1) + ((pstm_word)sc1) + c1 + (t >> DIGIT_BIT); \
- c1 = (pstm_digit)t; \
- c2 = c2 + sc2 + sc2 + (pstm_digit)(t >> DIGIT_BIT); \
- } while (0);
- #endif /* ISO_C */
- /******************************************************************************/
- /*
- Non-unrolled comba squarer
- */
- //bbox: pool unused
- #define pstm_sqr_comba_gen(pool, A, B, paD, paDlen) \
- pstm_sqr_comba_gen( A, B, paD, paDlen)
- static int32 pstm_sqr_comba_gen(psPool_t *pool, pstm_int *A, pstm_int *B,
- pstm_digit *paD, uint32 paDlen)
- {
- int16 paDfail, pa;
- int32 ix, iz;
- pstm_digit c0, c1, c2, *dst;
- #ifdef PSTM_ISO
- pstm_word tt;
- #endif
- paDfail = 0;
- /* get size of output and trim */
- pa = A->used + A->used;
- /* number of output digits to produce */
- COMBA_START;
- CLEAR_CARRY;
- /*
- If b is not large enough grow it and continue
- */
- if (B->alloc < pa) {
- if (pstm_grow(B, pa) != PSTM_OKAY) {
- return PS_MEM_FAIL;
- }
- }
- if (paD != NULL) {
- if (paDlen < (sizeof(pstm_digit) * pa)) {
- paDfail = 1; /* have a paD, but it's not big enough */
- dst = xzalloc(sizeof(pstm_digit) * pa);//bbox
- } else {
- dst = paD;
- memset(dst, 0x0, paDlen);
- }
- } else {
- dst = xzalloc(sizeof(pstm_digit) * pa);//bbox
- }
- for (ix = 0; ix < pa; ix++) {
- int32 tx, ty, iy;
- pstm_digit *tmpy, *tmpx;
- /* get offsets into the two bignums */
- ty = min(A->used-1, ix);
- tx = ix - ty;
- /* setup temp aliases */
- tmpx = A->dp + tx;
- tmpy = A->dp + ty;
- /*
- This is the number of times the loop will iterate,
- while (tx++ < a->used && ty-- >= 0) { ... }
- */
- iy = min(A->used-tx, ty+1);
- /*
- now for squaring tx can never equal ty. We halve the distance since
- they approach at a rate of 2x and we have to round because odd cases
- need to be executed
- */
- iy = min(iy, (ty-tx+1)>>1);
- /* forward carries */
- CARRY_FORWARD;
- /* execute loop */
- for (iz = 0; iz < iy; iz++) {
- SQRADD2(*tmpx++, *tmpy--);
- }
- /* even columns have the square term in them */
- if ((ix&1) == 0) {
- SQRADD(A->dp[ix>>1], A->dp[ix>>1]);
- }
- /* store it */
- COMBA_STORE(dst[ix]);
- }
- COMBA_FINI;
- /*
- setup dest
- */
- iz = B->used;
- B->used = pa;
- {
- pstm_digit *tmpc;
- tmpc = B->dp;
- for (ix = 0; ix < pa; ix++) {
- *tmpc++ = dst[ix];
- }
- /* clear unused digits (that existed in the old copy of c) */
- for (; ix < iz; ix++) {
- *tmpc++ = 0;
- }
- }
- pstm_clamp(B);
- if ((paD == NULL) || paDfail == 1) {
- psFree(dst, pool);
- }
- return PS_SUCCESS;
- }
- /******************************************************************************/
- /*
- Unrolled Comba loop for 1024 bit keys
- */
- #ifdef USE_1024_KEY_SPEED_OPTIMIZATIONS
- static int32 pstm_sqr_comba16(pstm_int *A, pstm_int *B)
- {
- pstm_digit *a, b[32], c0, c1, c2, sc0, sc1, sc2;
- #ifdef PSTM_ISO
- pstm_word tt;
- #endif
- if (B->alloc < 32) {
- if (pstm_grow(B, 32) != PSTM_OKAY) {
- return PS_MEM_FAIL;
- }
- }
- a = A->dp;
- sc0 = sc1 = sc2 = 0;
- COMBA_START;
- /* clear carries */
- CLEAR_CARRY;
- /* output 0 */
- SQRADD(a[0],a[0]);
- COMBA_STORE(b[0]);
- /* output 1 */
- CARRY_FORWARD;
- SQRADD2(a[0], a[1]);
- COMBA_STORE(b[1]);
- /* output 2 */
- CARRY_FORWARD;
- SQRADD2(a[0], a[2]); SQRADD(a[1], a[1]);
- COMBA_STORE(b[2]);
- /* output 3 */
- CARRY_FORWARD;
- SQRADD2(a[0], a[3]); SQRADD2(a[1], a[2]);
- COMBA_STORE(b[3]);
- /* output 4 */
- CARRY_FORWARD;
- SQRADD2(a[0], a[4]); SQRADD2(a[1], a[3]); SQRADD(a[2], a[2]);
- COMBA_STORE(b[4]);
- /* output 5 */
- CARRY_FORWARD;
- SQRADDSC(a[0], a[5]); SQRADDAC(a[1], a[4]); SQRADDAC(a[2], a[3]); SQRADDDB;
- COMBA_STORE(b[5]);
- /* output 6 */
- CARRY_FORWARD;
- SQRADDSC(a[0], a[6]); SQRADDAC(a[1], a[5]); SQRADDAC(a[2], a[4]); SQRADDDB; SQRADD(a[3], a[3]);
- COMBA_STORE(b[6]);
- /* output 7 */
- CARRY_FORWARD;
- SQRADDSC(a[0], a[7]); SQRADDAC(a[1], a[6]); SQRADDAC(a[2], a[5]); SQRADDAC(a[3], a[4]); SQRADDDB;
- COMBA_STORE(b[7]);
- /* output 8 */
- CARRY_FORWARD;
- SQRADDSC(a[0], a[8]); SQRADDAC(a[1], a[7]); SQRADDAC(a[2], a[6]); SQRADDAC(a[3], a[5]); SQRADDDB; SQRADD(a[4], a[4]);
- COMBA_STORE(b[8]);
- /* output 9 */
- CARRY_FORWARD;
- SQRADDSC(a[0], a[9]); SQRADDAC(a[1], a[8]); SQRADDAC(a[2], a[7]); SQRADDAC(a[3], a[6]); SQRADDAC(a[4], a[5]); SQRADDDB;
- COMBA_STORE(b[9]);
- /* output 10 */
- CARRY_FORWARD;
- SQRADDSC(a[0], a[10]); SQRADDAC(a[1], a[9]); SQRADDAC(a[2], a[8]); SQRADDAC(a[3], a[7]); SQRADDAC(a[4], a[6]); SQRADDDB; SQRADD(a[5], a[5]);
- COMBA_STORE(b[10]);
- /* output 11 */
- CARRY_FORWARD;
- SQRADDSC(a[0], a[11]); SQRADDAC(a[1], a[10]); SQRADDAC(a[2], a[9]); SQRADDAC(a[3], a[8]); SQRADDAC(a[4], a[7]); SQRADDAC(a[5], a[6]); SQRADDDB;
- COMBA_STORE(b[11]);
- /* output 12 */
- CARRY_FORWARD;
- SQRADDSC(a[0], a[12]); SQRADDAC(a[1], a[11]); SQRADDAC(a[2], a[10]); SQRADDAC(a[3], a[9]); SQRADDAC(a[4], a[8]); SQRADDAC(a[5], a[7]); SQRADDDB; SQRADD(a[6], a[6]);
- COMBA_STORE(b[12]);
- /* output 13 */
- CARRY_FORWARD;
- SQRADDSC(a[0], a[13]); SQRADDAC(a[1], a[12]); SQRADDAC(a[2], a[11]); SQRADDAC(a[3], a[10]); SQRADDAC(a[4], a[9]); SQRADDAC(a[5], a[8]); SQRADDAC(a[6], a[7]); SQRADDDB;
- COMBA_STORE(b[13]);
- /* output 14 */
- CARRY_FORWARD;
- SQRADDSC(a[0], a[14]); SQRADDAC(a[1], a[13]); SQRADDAC(a[2], a[12]); SQRADDAC(a[3], a[11]); SQRADDAC(a[4], a[10]); SQRADDAC(a[5], a[9]); SQRADDAC(a[6], a[8]); SQRADDDB; SQRADD(a[7], a[7]);
- COMBA_STORE(b[14]);
- /* output 15 */
- CARRY_FORWARD;
- SQRADDSC(a[0], a[15]); SQRADDAC(a[1], a[14]); SQRADDAC(a[2], a[13]); SQRADDAC(a[3], a[12]); SQRADDAC(a[4], a[11]); SQRADDAC(a[5], a[10]); SQRADDAC(a[6], a[9]); SQRADDAC(a[7], a[8]); SQRADDDB;
- COMBA_STORE(b[15]);
- /* output 16 */
- CARRY_FORWARD;
- SQRADDSC(a[1], a[15]); SQRADDAC(a[2], a[14]); SQRADDAC(a[3], a[13]); SQRADDAC(a[4], a[12]); SQRADDAC(a[5], a[11]); SQRADDAC(a[6], a[10]); SQRADDAC(a[7], a[9]); SQRADDDB; SQRADD(a[8], a[8]);
- COMBA_STORE(b[16]);
- /* output 17 */
- CARRY_FORWARD;
- SQRADDSC(a[2], a[15]); SQRADDAC(a[3], a[14]); SQRADDAC(a[4], a[13]); SQRADDAC(a[5], a[12]); SQRADDAC(a[6], a[11]); SQRADDAC(a[7], a[10]); SQRADDAC(a[8], a[9]); SQRADDDB;
- COMBA_STORE(b[17]);
- /* output 18 */
- CARRY_FORWARD;
- SQRADDSC(a[3], a[15]); SQRADDAC(a[4], a[14]); SQRADDAC(a[5], a[13]); SQRADDAC(a[6], a[12]); SQRADDAC(a[7], a[11]); SQRADDAC(a[8], a[10]); SQRADDDB; SQRADD(a[9], a[9]);
- COMBA_STORE(b[18]);
- /* output 19 */
- CARRY_FORWARD;
- SQRADDSC(a[4], a[15]); SQRADDAC(a[5], a[14]); SQRADDAC(a[6], a[13]); SQRADDAC(a[7], a[12]); SQRADDAC(a[8], a[11]); SQRADDAC(a[9], a[10]); SQRADDDB;
- COMBA_STORE(b[19]);
- /* output 20 */
- CARRY_FORWARD;
- SQRADDSC(a[5], a[15]); SQRADDAC(a[6], a[14]); SQRADDAC(a[7], a[13]); SQRADDAC(a[8], a[12]); SQRADDAC(a[9], a[11]); SQRADDDB; SQRADD(a[10], a[10]);
- COMBA_STORE(b[20]);
- /* output 21 */
- CARRY_FORWARD;
- SQRADDSC(a[6], a[15]); SQRADDAC(a[7], a[14]); SQRADDAC(a[8], a[13]); SQRADDAC(a[9], a[12]); SQRADDAC(a[10], a[11]); SQRADDDB;
- COMBA_STORE(b[21]);
- /* output 22 */
- CARRY_FORWARD;
- SQRADDSC(a[7], a[15]); SQRADDAC(a[8], a[14]); SQRADDAC(a[9], a[13]); SQRADDAC(a[10], a[12]); SQRADDDB; SQRADD(a[11], a[11]);
- COMBA_STORE(b[22]);
- /* output 23 */
- CARRY_FORWARD;
- SQRADDSC(a[8], a[15]); SQRADDAC(a[9], a[14]); SQRADDAC(a[10], a[13]); SQRADDAC(a[11], a[12]); SQRADDDB;
- COMBA_STORE(b[23]);
- /* output 24 */
- CARRY_FORWARD;
- SQRADDSC(a[9], a[15]); SQRADDAC(a[10], a[14]); SQRADDAC(a[11], a[13]); SQRADDDB; SQRADD(a[12], a[12]);
- COMBA_STORE(b[24]);
- /* output 25 */
- CARRY_FORWARD;
- SQRADDSC(a[10], a[15]); SQRADDAC(a[11], a[14]); SQRADDAC(a[12], a[13]); SQRADDDB;
- COMBA_STORE(b[25]);
- /* output 26 */
- CARRY_FORWARD;
- SQRADD2(a[11], a[15]); SQRADD2(a[12], a[14]); SQRADD(a[13], a[13]);
- COMBA_STORE(b[26]);
- /* output 27 */
- CARRY_FORWARD;
- SQRADD2(a[12], a[15]); SQRADD2(a[13], a[14]);
- COMBA_STORE(b[27]);
- /* output 28 */
- CARRY_FORWARD;
- SQRADD2(a[13], a[15]); SQRADD(a[14], a[14]);
- COMBA_STORE(b[28]);
- /* output 29 */
- CARRY_FORWARD;
- SQRADD2(a[14], a[15]);
- COMBA_STORE(b[29]);
- /* output 30 */
- CARRY_FORWARD;
- SQRADD(a[15], a[15]);
- COMBA_STORE(b[30]);
- COMBA_STORE2(b[31]);
- COMBA_FINI;
- B->used = 32;
- B->sign = PSTM_ZPOS;
- memcpy(B->dp, b, 32 * sizeof(pstm_digit));
- pstm_clamp(B);
- return PSTM_OKAY;
- }
- #endif /* USE_1024_KEY_SPEED_OPTIMIZATIONS */
- #ifdef USE_2048_KEY_SPEED_OPTIMIZATIONS
- static int32 pstm_sqr_comba32(pstm_int *A, pstm_int *B)
- {
- pstm_digit *a, b[64], c0, c1, c2, sc0, sc1, sc2;
- #ifdef PSTM_ISO
- pstm_word tt;
- #endif
- if (B->alloc < 64) {
- if (pstm_grow(B, 64) != PSTM_OKAY) {
- return PS_MEM_FAIL;
- }
- }
- sc0 = sc1 = sc2 = 0;
- a = A->dp;
- COMBA_START;
- /* clear carries */
- CLEAR_CARRY;
- /* output 0 */
- SQRADD(a[0],a[0]);
- COMBA_STORE(b[0]);
- /* output 1 */
- CARRY_FORWARD;
- SQRADD2(a[0], a[1]);
- COMBA_STORE(b[1]);
- /* output 2 */
- CARRY_FORWARD;
- SQRADD2(a[0], a[2]); SQRADD(a[1], a[1]);
- COMBA_STORE(b[2]);
- /* output 3 */
- CARRY_FORWARD;
- SQRADD2(a[0], a[3]); SQRADD2(a[1], a[2]);
- COMBA_STORE(b[3]);
- /* output 4 */
- CARRY_FORWARD;
- SQRADD2(a[0], a[4]); SQRADD2(a[1], a[3]); SQRADD(a[2], a[2]);
- COMBA_STORE(b[4]);
- /* output 5 */
- CARRY_FORWARD;
- SQRADDSC(a[0], a[5]); SQRADDAC(a[1], a[4]); SQRADDAC(a[2], a[3]); SQRADDDB;
- COMBA_STORE(b[5]);
- /* output 6 */
- CARRY_FORWARD;
- SQRADDSC(a[0], a[6]); SQRADDAC(a[1], a[5]); SQRADDAC(a[2], a[4]); SQRADDDB; SQRADD(a[3], a[3]);
- COMBA_STORE(b[6]);
- /* output 7 */
- CARRY_FORWARD;
- SQRADDSC(a[0], a[7]); SQRADDAC(a[1], a[6]); SQRADDAC(a[2], a[5]); SQRADDAC(a[3], a[4]); SQRADDDB;
- COMBA_STORE(b[7]);
- /* output 8 */
- CARRY_FORWARD;
- SQRADDSC(a[0], a[8]); SQRADDAC(a[1], a[7]); SQRADDAC(a[2], a[6]); SQRADDAC(a[3], a[5]); SQRADDDB; SQRADD(a[4], a[4]);
- COMBA_STORE(b[8]);
- /* output 9 */
- CARRY_FORWARD;
- SQRADDSC(a[0], a[9]); SQRADDAC(a[1], a[8]); SQRADDAC(a[2], a[7]); SQRADDAC(a[3], a[6]); SQRADDAC(a[4], a[5]); SQRADDDB;
- COMBA_STORE(b[9]);
- /* output 10 */
- CARRY_FORWARD;
- SQRADDSC(a[0], a[10]); SQRADDAC(a[1], a[9]); SQRADDAC(a[2], a[8]); SQRADDAC(a[3], a[7]); SQRADDAC(a[4], a[6]); SQRADDDB; SQRADD(a[5], a[5]);
- COMBA_STORE(b[10]);
- /* output 11 */
- CARRY_FORWARD;
- SQRADDSC(a[0], a[11]); SQRADDAC(a[1], a[10]); SQRADDAC(a[2], a[9]); SQRADDAC(a[3], a[8]); SQRADDAC(a[4], a[7]); SQRADDAC(a[5], a[6]); SQRADDDB;
- COMBA_STORE(b[11]);
- /* output 12 */
- CARRY_FORWARD;
- SQRADDSC(a[0], a[12]); SQRADDAC(a[1], a[11]); SQRADDAC(a[2], a[10]); SQRADDAC(a[3], a[9]); SQRADDAC(a[4], a[8]); SQRADDAC(a[5], a[7]); SQRADDDB; SQRADD(a[6], a[6]);
- COMBA_STORE(b[12]);
- /* output 13 */
- CARRY_FORWARD;
- SQRADDSC(a[0], a[13]); SQRADDAC(a[1], a[12]); SQRADDAC(a[2], a[11]); SQRADDAC(a[3], a[10]); SQRADDAC(a[4], a[9]); SQRADDAC(a[5], a[8]); SQRADDAC(a[6], a[7]); SQRADDDB;
- COMBA_STORE(b[13]);
- /* output 14 */
- CARRY_FORWARD;
- SQRADDSC(a[0], a[14]); SQRADDAC(a[1], a[13]); SQRADDAC(a[2], a[12]); SQRADDAC(a[3], a[11]); SQRADDAC(a[4], a[10]); SQRADDAC(a[5], a[9]); SQRADDAC(a[6], a[8]); SQRADDDB; SQRADD(a[7], a[7]);
- COMBA_STORE(b[14]);
- /* output 15 */
- CARRY_FORWARD;
- SQRADDSC(a[0], a[15]); SQRADDAC(a[1], a[14]); SQRADDAC(a[2], a[13]); SQRADDAC(a[3], a[12]); SQRADDAC(a[4], a[11]); SQRADDAC(a[5], a[10]); SQRADDAC(a[6], a[9]); SQRADDAC(a[7], a[8]); SQRADDDB;
- COMBA_STORE(b[15]);
- /* output 16 */
- CARRY_FORWARD;
- SQRADDSC(a[0], a[16]); SQRADDAC(a[1], a[15]); SQRADDAC(a[2], a[14]); SQRADDAC(a[3], a[13]); SQRADDAC(a[4], a[12]); SQRADDAC(a[5], a[11]); SQRADDAC(a[6], a[10]); SQRADDAC(a[7], a[9]); SQRADDDB; SQRADD(a[8], a[8]);
- COMBA_STORE(b[16]);
- /* output 17 */
- CARRY_FORWARD;
- SQRADDSC(a[0], a[17]); SQRADDAC(a[1], a[16]); SQRADDAC(a[2], a[15]); SQRADDAC(a[3], a[14]); SQRADDAC(a[4], a[13]); SQRADDAC(a[5], a[12]); SQRADDAC(a[6], a[11]); SQRADDAC(a[7], a[10]); SQRADDAC(a[8], a[9]); SQRADDDB;
- COMBA_STORE(b[17]);
- /* output 18 */
- CARRY_FORWARD;
- SQRADDSC(a[0], a[18]); SQRADDAC(a[1], a[17]); SQRADDAC(a[2], a[16]); SQRADDAC(a[3], a[15]); SQRADDAC(a[4], a[14]); SQRADDAC(a[5], a[13]); SQRADDAC(a[6], a[12]); SQRADDAC(a[7], a[11]); SQRADDAC(a[8], a[10]); SQRADDDB; SQRADD(a[9], a[9]);
- COMBA_STORE(b[18]);
- /* output 19 */
- CARRY_FORWARD;
- SQRADDSC(a[0], a[19]); SQRADDAC(a[1], a[18]); SQRADDAC(a[2], a[17]); SQRADDAC(a[3], a[16]); SQRADDAC(a[4], a[15]); SQRADDAC(a[5], a[14]); SQRADDAC(a[6], a[13]); SQRADDAC(a[7], a[12]); SQRADDAC(a[8], a[11]); SQRADDAC(a[9], a[10]); SQRADDDB;
- COMBA_STORE(b[19]);
- /* output 20 */
- CARRY_FORWARD;
- SQRADDSC(a[0], a[20]); SQRADDAC(a[1], a[19]); SQRADDAC(a[2], a[18]); SQRADDAC(a[3], a[17]); SQRADDAC(a[4], a[16]); SQRADDAC(a[5], a[15]); SQRADDAC(a[6], a[14]); SQRADDAC(a[7], a[13]); SQRADDAC(a[8], a[12]); SQRADDAC(a[9], a[11]); SQRADDDB; SQRADD(a[10], a[10]);
- COMBA_STORE(b[20]);
- /* output 21 */
- CARRY_FORWARD;
- SQRADDSC(a[0], a[21]); SQRADDAC(a[1], a[20]); SQRADDAC(a[2], a[19]); SQRADDAC(a[3], a[18]); SQRADDAC(a[4], a[17]); SQRADDAC(a[5], a[16]); SQRADDAC(a[6], a[15]); SQRADDAC(a[7], a[14]); SQRADDAC(a[8], a[13]); SQRADDAC(a[9], a[12]); SQRADDAC(a[10], a[11]); SQRADDDB;
- COMBA_STORE(b[21]);
- /* output 22 */
- CARRY_FORWARD;
- SQRADDSC(a[0], a[22]); SQRADDAC(a[1], a[21]); SQRADDAC(a[2], a[20]); SQRADDAC(a[3], a[19]); SQRADDAC(a[4], a[18]); SQRADDAC(a[5], a[17]); SQRADDAC(a[6], a[16]); SQRADDAC(a[7], a[15]); SQRADDAC(a[8], a[14]); SQRADDAC(a[9], a[13]); SQRADDAC(a[10], a[12]); SQRADDDB; SQRADD(a[11], a[11]);
- COMBA_STORE(b[22]);
- /* output 23 */
- CARRY_FORWARD;
- SQRADDSC(a[0], a[23]); SQRADDAC(a[1], a[22]); SQRADDAC(a[2], a[21]); SQRADDAC(a[3], a[20]); SQRADDAC(a[4], a[19]); SQRADDAC(a[5], a[18]); SQRADDAC(a[6], a[17]); SQRADDAC(a[7], a[16]); SQRADDAC(a[8], a[15]); SQRADDAC(a[9], a[14]); SQRADDAC(a[10], a[13]); SQRADDAC(a[11], a[12]); SQRADDDB;
- COMBA_STORE(b[23]);
- /* output 24 */
- CARRY_FORWARD;
- SQRADDSC(a[0], a[24]); SQRADDAC(a[1], a[23]); SQRADDAC(a[2], a[22]); SQRADDAC(a[3], a[21]); SQRADDAC(a[4], a[20]); SQRADDAC(a[5], a[19]); SQRADDAC(a[6], a[18]); SQRADDAC(a[7], a[17]); SQRADDAC(a[8], a[16]); SQRADDAC(a[9], a[15]); SQRADDAC(a[10], a[14]); SQRADDAC(a[11], a[13]); SQRADDDB; SQRADD(a[12], a[12]);
- COMBA_STORE(b[24]);
- /* output 25 */
- CARRY_FORWARD;
- SQRADDSC(a[0], a[25]); SQRADDAC(a[1], a[24]); SQRADDAC(a[2], a[23]); SQRADDAC(a[3], a[22]); SQRADDAC(a[4], a[21]); SQRADDAC(a[5], a[20]); SQRADDAC(a[6], a[19]); SQRADDAC(a[7], a[18]); SQRADDAC(a[8], a[17]); SQRADDAC(a[9], a[16]); SQRADDAC(a[10], a[15]); SQRADDAC(a[11], a[14]); SQRADDAC(a[12], a[13]); SQRADDDB;
- COMBA_STORE(b[25]);
- /* output 26 */
- CARRY_FORWARD;
- SQRADDSC(a[0], a[26]); SQRADDAC(a[1], a[25]); SQRADDAC(a[2], a[24]); SQRADDAC(a[3], a[23]); SQRADDAC(a[4], a[22]); SQRADDAC(a[5], a[21]); SQRADDAC(a[6], a[20]); SQRADDAC(a[7], a[19]); SQRADDAC(a[8], a[18]); SQRADDAC(a[9], a[17]); SQRADDAC(a[10], a[16]); SQRADDAC(a[11], a[15]); SQRADDAC(a[12], a[14]); SQRADDDB; SQRADD(a[13], a[13]);
- COMBA_STORE(b[26]);
- /* output 27 */
- CARRY_FORWARD;
- SQRADDSC(a[0], a[27]); SQRADDAC(a[1], a[26]); SQRADDAC(a[2], a[25]); SQRADDAC(a[3], a[24]); SQRADDAC(a[4], a[23]); SQRADDAC(a[5], a[22]); SQRADDAC(a[6], a[21]); SQRADDAC(a[7], a[20]); SQRADDAC(a[8], a[19]); SQRADDAC(a[9], a[18]); SQRADDAC(a[10], a[17]); SQRADDAC(a[11], a[16]); SQRADDAC(a[12], a[15]); SQRADDAC(a[13], a[14]); SQRADDDB;
- COMBA_STORE(b[27]);
- /* output 28 */
- CARRY_FORWARD;
- SQRADDSC(a[0], a[28]); SQRADDAC(a[1], a[27]); SQRADDAC(a[2], a[26]); SQRADDAC(a[3], a[25]); SQRADDAC(a[4], a[24]); SQRADDAC(a[5], a[23]); SQRADDAC(a[6], a[22]); SQRADDAC(a[7], a[21]); SQRADDAC(a[8], a[20]); SQRADDAC(a[9], a[19]); SQRADDAC(a[10], a[18]); SQRADDAC(a[11], a[17]); SQRADDAC(a[12], a[16]); SQRADDAC(a[13], a[15]); SQRADDDB; SQRADD(a[14], a[14]);
- COMBA_STORE(b[28]);
- /* output 29 */
- CARRY_FORWARD;
- SQRADDSC(a[0], a[29]); SQRADDAC(a[1], a[28]); SQRADDAC(a[2], a[27]); SQRADDAC(a[3], a[26]); SQRADDAC(a[4], a[25]); SQRADDAC(a[5], a[24]); SQRADDAC(a[6], a[23]); SQRADDAC(a[7], a[22]); SQRADDAC(a[8], a[21]); SQRADDAC(a[9], a[20]); SQRADDAC(a[10], a[19]); SQRADDAC(a[11], a[18]); SQRADDAC(a[12], a[17]); SQRADDAC(a[13], a[16]); SQRADDAC(a[14], a[15]); SQRADDDB;
- COMBA_STORE(b[29]);
- /* output 30 */
- CARRY_FORWARD;
- SQRADDSC(a[0], a[30]); SQRADDAC(a[1], a[29]); SQRADDAC(a[2], a[28]); SQRADDAC(a[3], a[27]); SQRADDAC(a[4], a[26]); SQRADDAC(a[5], a[25]); SQRADDAC(a[6], a[24]); SQRADDAC(a[7], a[23]); SQRADDAC(a[8], a[22]); SQRADDAC(a[9], a[21]); SQRADDAC(a[10], a[20]); SQRADDAC(a[11], a[19]); SQRADDAC(a[12], a[18]); SQRADDAC(a[13], a[17]); SQRADDAC(a[14], a[16]); SQRADDDB; SQRADD(a[15], a[15]);
- COMBA_STORE(b[30]);
- /* output 31 */
- CARRY_FORWARD;
- SQRADDSC(a[0], a[31]); SQRADDAC(a[1], a[30]); SQRADDAC(a[2], a[29]); SQRADDAC(a[3], a[28]); SQRADDAC(a[4], a[27]); SQRADDAC(a[5], a[26]); SQRADDAC(a[6], a[25]); SQRADDAC(a[7], a[24]); SQRADDAC(a[8], a[23]); SQRADDAC(a[9], a[22]); SQRADDAC(a[10], a[21]); SQRADDAC(a[11], a[20]); SQRADDAC(a[12], a[19]); SQRADDAC(a[13], a[18]); SQRADDAC(a[14], a[17]); SQRADDAC(a[15], a[16]); SQRADDDB;
- COMBA_STORE(b[31]);
- /* output 32 */
- CARRY_FORWARD;
- SQRADDSC(a[1], a[31]); SQRADDAC(a[2], a[30]); SQRADDAC(a[3], a[29]); SQRADDAC(a[4], a[28]); SQRADDAC(a[5], a[27]); SQRADDAC(a[6], a[26]); SQRADDAC(a[7], a[25]); SQRADDAC(a[8], a[24]); SQRADDAC(a[9], a[23]); SQRADDAC(a[10], a[22]); SQRADDAC(a[11], a[21]); SQRADDAC(a[12], a[20]); SQRADDAC(a[13], a[19]); SQRADDAC(a[14], a[18]); SQRADDAC(a[15], a[17]); SQRADDDB; SQRADD(a[16], a[16]);
- COMBA_STORE(b[32]);
- /* output 33 */
- CARRY_FORWARD;
- SQRADDSC(a[2], a[31]); SQRADDAC(a[3], a[30]); SQRADDAC(a[4], a[29]); SQRADDAC(a[5], a[28]); SQRADDAC(a[6], a[27]); SQRADDAC(a[7], a[26]); SQRADDAC(a[8], a[25]); SQRADDAC(a[9], a[24]); SQRADDAC(a[10], a[23]); SQRADDAC(a[11], a[22]); SQRADDAC(a[12], a[21]); SQRADDAC(a[13], a[20]); SQRADDAC(a[14], a[19]); SQRADDAC(a[15], a[18]); SQRADDAC(a[16], a[17]); SQRADDDB;
- COMBA_STORE(b[33]);
- /* output 34 */
- CARRY_FORWARD;
- SQRADDSC(a[3], a[31]); SQRADDAC(a[4], a[30]); SQRADDAC(a[5], a[29]); SQRADDAC(a[6], a[28]); SQRADDAC(a[7], a[27]); SQRADDAC(a[8], a[26]); SQRADDAC(a[9], a[25]); SQRADDAC(a[10], a[24]); SQRADDAC(a[11], a[23]); SQRADDAC(a[12], a[22]); SQRADDAC(a[13], a[21]); SQRADDAC(a[14], a[20]); SQRADDAC(a[15], a[19]); SQRADDAC(a[16], a[18]); SQRADDDB; SQRADD(a[17], a[17]);
- COMBA_STORE(b[34]);
- /* output 35 */
- CARRY_FORWARD;
- SQRADDSC(a[4], a[31]); SQRADDAC(a[5], a[30]); SQRADDAC(a[6], a[29]); SQRADDAC(a[7], a[28]); SQRADDAC(a[8], a[27]); SQRADDAC(a[9], a[26]); SQRADDAC(a[10], a[25]); SQRADDAC(a[11], a[24]); SQRADDAC(a[12], a[23]); SQRADDAC(a[13], a[22]); SQRADDAC(a[14], a[21]); SQRADDAC(a[15], a[20]); SQRADDAC(a[16], a[19]); SQRADDAC(a[17], a[18]); SQRADDDB;
- COMBA_STORE(b[35]);
- /* output 36 */
- CARRY_FORWARD;
- SQRADDSC(a[5], a[31]); SQRADDAC(a[6], a[30]); SQRADDAC(a[7], a[29]); SQRADDAC(a[8], a[28]); SQRADDAC(a[9], a[27]); SQRADDAC(a[10], a[26]); SQRADDAC(a[11], a[25]); SQRADDAC(a[12], a[24]); SQRADDAC(a[13], a[23]); SQRADDAC(a[14], a[22]); SQRADDAC(a[15], a[21]); SQRADDAC(a[16], a[20]); SQRADDAC(a[17], a[19]); SQRADDDB; SQRADD(a[18], a[18]);
- COMBA_STORE(b[36]);
- /* output 37 */
- CARRY_FORWARD;
- SQRADDSC(a[6], a[31]); SQRADDAC(a[7], a[30]); SQRADDAC(a[8], a[29]); SQRADDAC(a[9], a[28]); SQRADDAC(a[10], a[27]); SQRADDAC(a[11], a[26]); SQRADDAC(a[12], a[25]); SQRADDAC(a[13], a[24]); SQRADDAC(a[14], a[23]); SQRADDAC(a[15], a[22]); SQRADDAC(a[16], a[21]); SQRADDAC(a[17], a[20]); SQRADDAC(a[18], a[19]); SQRADDDB;
- COMBA_STORE(b[37]);
- /* output 38 */
- CARRY_FORWARD;
- SQRADDSC(a[7], a[31]); SQRADDAC(a[8], a[30]); SQRADDAC(a[9], a[29]); SQRADDAC(a[10], a[28]); SQRADDAC(a[11], a[27]); SQRADDAC(a[12], a[26]); SQRADDAC(a[13], a[25]); SQRADDAC(a[14], a[24]); SQRADDAC(a[15], a[23]); SQRADDAC(a[16], a[22]); SQRADDAC(a[17], a[21]); SQRADDAC(a[18], a[20]); SQRADDDB; SQRADD(a[19], a[19]);
- COMBA_STORE(b[38]);
- /* output 39 */
- CARRY_FORWARD;
- SQRADDSC(a[8], a[31]); SQRADDAC(a[9], a[30]); SQRADDAC(a[10], a[29]); SQRADDAC(a[11], a[28]); SQRADDAC(a[12], a[27]); SQRADDAC(a[13], a[26]); SQRADDAC(a[14], a[25]); SQRADDAC(a[15], a[24]); SQRADDAC(a[16], a[23]); SQRADDAC(a[17], a[22]); SQRADDAC(a[18], a[21]); SQRADDAC(a[19], a[20]); SQRADDDB;
- COMBA_STORE(b[39]);
- /* output 40 */
- CARRY_FORWARD;
- SQRADDSC(a[9], a[31]); SQRADDAC(a[10], a[30]); SQRADDAC(a[11], a[29]); SQRADDAC(a[12], a[28]); SQRADDAC(a[13], a[27]); SQRADDAC(a[14], a[26]); SQRADDAC(a[15], a[25]); SQRADDAC(a[16], a[24]); SQRADDAC(a[17], a[23]); SQRADDAC(a[18], a[22]); SQRADDAC(a[19], a[21]); SQRADDDB; SQRADD(a[20], a[20]);
- COMBA_STORE(b[40]);
- /* output 41 */
- CARRY_FORWARD;
- SQRADDSC(a[10], a[31]); SQRADDAC(a[11], a[30]); SQRADDAC(a[12], a[29]); SQRADDAC(a[13], a[28]); SQRADDAC(a[14], a[27]); SQRADDAC(a[15], a[26]); SQRADDAC(a[16], a[25]); SQRADDAC(a[17], a[24]); SQRADDAC(a[18], a[23]); SQRADDAC(a[19], a[22]); SQRADDAC(a[20], a[21]); SQRADDDB;
- COMBA_STORE(b[41]);
- /* output 42 */
- CARRY_FORWARD;
- SQRADDSC(a[11], a[31]); SQRADDAC(a[12], a[30]); SQRADDAC(a[13], a[29]); SQRADDAC(a[14], a[28]); SQRADDAC(a[15], a[27]); SQRADDAC(a[16], a[26]); SQRADDAC(a[17], a[25]); SQRADDAC(a[18], a[24]); SQRADDAC(a[19], a[23]); SQRADDAC(a[20], a[22]); SQRADDDB; SQRADD(a[21], a[21]);
- COMBA_STORE(b[42]);
- /* output 43 */
- CARRY_FORWARD;
- SQRADDSC(a[12], a[31]); SQRADDAC(a[13], a[30]); SQRADDAC(a[14], a[29]); SQRADDAC(a[15], a[28]); SQRADDAC(a[16], a[27]); SQRADDAC(a[17], a[26]); SQRADDAC(a[18], a[25]); SQRADDAC(a[19], a[24]); SQRADDAC(a[20], a[23]); SQRADDAC(a[21], a[22]); SQRADDDB;
- COMBA_STORE(b[43]);
- /* output 44 */
- CARRY_FORWARD;
- SQRADDSC(a[13], a[31]); SQRADDAC(a[14], a[30]); SQRADDAC(a[15], a[29]); SQRADDAC(a[16], a[28]); SQRADDAC(a[17], a[27]); SQRADDAC(a[18], a[26]); SQRADDAC(a[19], a[25]); SQRADDAC(a[20], a[24]); SQRADDAC(a[21], a[23]); SQRADDDB; SQRADD(a[22], a[22]);
- COMBA_STORE(b[44]);
- /* output 45 */
- CARRY_FORWARD;
- SQRADDSC(a[14], a[31]); SQRADDAC(a[15], a[30]); SQRADDAC(a[16], a[29]); SQRADDAC(a[17], a[28]); SQRADDAC(a[18], a[27]); SQRADDAC(a[19], a[26]); SQRADDAC(a[20], a[25]); SQRADDAC(a[21], a[24]); SQRADDAC(a[22], a[23]); SQRADDDB;
- COMBA_STORE(b[45]);
- /* output 46 */
- CARRY_FORWARD;
- SQRADDSC(a[15], a[31]); SQRADDAC(a[16], a[30]); SQRADDAC(a[17], a[29]); SQRADDAC(a[18], a[28]); SQRADDAC(a[19], a[27]); SQRADDAC(a[20], a[26]); SQRADDAC(a[21], a[25]); SQRADDAC(a[22], a[24]); SQRADDDB; SQRADD(a[23], a[23]);
- COMBA_STORE(b[46]);
- /* output 47 */
- CARRY_FORWARD;
- SQRADDSC(a[16], a[31]); SQRADDAC(a[17], a[30]); SQRADDAC(a[18], a[29]); SQRADDAC(a[19], a[28]); SQRADDAC(a[20], a[27]); SQRADDAC(a[21], a[26]); SQRADDAC(a[22], a[25]); SQRADDAC(a[23], a[24]); SQRADDDB;
- COMBA_STORE(b[47]);
- /* output 48 */
- CARRY_FORWARD;
- SQRADDSC(a[17], a[31]); SQRADDAC(a[18], a[30]); SQRADDAC(a[19], a[29]); SQRADDAC(a[20], a[28]); SQRADDAC(a[21], a[27]); SQRADDAC(a[22], a[26]); SQRADDAC(a[23], a[25]); SQRADDDB; SQRADD(a[24], a[24]);
- COMBA_STORE(b[48]);
- /* output 49 */
- CARRY_FORWARD;
- SQRADDSC(a[18], a[31]); SQRADDAC(a[19], a[30]); SQRADDAC(a[20], a[29]); SQRADDAC(a[21], a[28]); SQRADDAC(a[22], a[27]); SQRADDAC(a[23], a[26]); SQRADDAC(a[24], a[25]); SQRADDDB;
- COMBA_STORE(b[49]);
- /* output 50 */
- CARRY_FORWARD;
- SQRADDSC(a[19], a[31]); SQRADDAC(a[20], a[30]); SQRADDAC(a[21], a[29]); SQRADDAC(a[22], a[28]); SQRADDAC(a[23], a[27]); SQRADDAC(a[24], a[26]); SQRADDDB; SQRADD(a[25], a[25]);
- COMBA_STORE(b[50]);
- /* output 51 */
- CARRY_FORWARD;
- SQRADDSC(a[20], a[31]); SQRADDAC(a[21], a[30]); SQRADDAC(a[22], a[29]); SQRADDAC(a[23], a[28]); SQRADDAC(a[24], a[27]); SQRADDAC(a[25], a[26]); SQRADDDB;
- COMBA_STORE(b[51]);
- /* output 52 */
- CARRY_FORWARD;
- SQRADDSC(a[21], a[31]); SQRADDAC(a[22], a[30]); SQRADDAC(a[23], a[29]); SQRADDAC(a[24], a[28]); SQRADDAC(a[25], a[27]); SQRADDDB; SQRADD(a[26], a[26]);
- COMBA_STORE(b[52]);
- /* output 53 */
- CARRY_FORWARD;
- SQRADDSC(a[22], a[31]); SQRADDAC(a[23], a[30]); SQRADDAC(a[24], a[29]); SQRADDAC(a[25], a[28]); SQRADDAC(a[26], a[27]); SQRADDDB;
- COMBA_STORE(b[53]);
- /* output 54 */
- CARRY_FORWARD;
- SQRADDSC(a[23], a[31]); SQRADDAC(a[24], a[30]); SQRADDAC(a[25], a[29]); SQRADDAC(a[26], a[28]); SQRADDDB; SQRADD(a[27], a[27]);
- COMBA_STORE(b[54]);
- /* output 55 */
- CARRY_FORWARD;
- SQRADDSC(a[24], a[31]); SQRADDAC(a[25], a[30]); SQRADDAC(a[26], a[29]); SQRADDAC(a[27], a[28]); SQRADDDB;
- COMBA_STORE(b[55]);
- /* output 56 */
- CARRY_FORWARD;
- SQRADDSC(a[25], a[31]); SQRADDAC(a[26], a[30]); SQRADDAC(a[27], a[29]); SQRADDDB; SQRADD(a[28], a[28]);
- COMBA_STORE(b[56]);
- /* output 57 */
- CARRY_FORWARD;
- SQRADDSC(a[26], a[31]); SQRADDAC(a[27], a[30]); SQRADDAC(a[28], a[29]); SQRADDDB;
- COMBA_STORE(b[57]);
- /* output 58 */
- CARRY_FORWARD;
- SQRADD2(a[27], a[31]); SQRADD2(a[28], a[30]); SQRADD(a[29], a[29]);
- COMBA_STORE(b[58]);
- /* output 59 */
- CARRY_FORWARD;
- SQRADD2(a[28], a[31]); SQRADD2(a[29], a[30]);
- COMBA_STORE(b[59]);
- /* output 60 */
- CARRY_FORWARD;
- SQRADD2(a[29], a[31]); SQRADD(a[30], a[30]);
- COMBA_STORE(b[60]);
- /* output 61 */
- CARRY_FORWARD;
- SQRADD2(a[30], a[31]);
- COMBA_STORE(b[61]);
- /* output 62 */
- CARRY_FORWARD;
- SQRADD(a[31], a[31]);
- COMBA_STORE(b[62]);
- COMBA_STORE2(b[63]);
- COMBA_FINI;
- B->used = 64;
- B->sign = PSTM_ZPOS;
- memcpy(B->dp, b, 64 * sizeof(pstm_digit));
- pstm_clamp(B);
- return PSTM_OKAY;
- }
- #endif /* USE_2048_KEY_SPEED_OPTIMIZATIONS */
- /******************************************************************************/
- /*
- */
- int32 pstm_sqr_comba(psPool_t *pool, pstm_int *A, pstm_int *B, pstm_digit *paD,
- uint32 paDlen)
- {
- #ifdef USE_1024_KEY_SPEED_OPTIMIZATIONS
- if (A->used == 16) {
- return pstm_sqr_comba16(A, B);
- } else {
- #ifdef USE_2048_KEY_SPEED_OPTIMIZATIONS
- if (A->used == 32) {
- return pstm_sqr_comba32(A, B);
- }
- #endif /* USE_2048_KEY_SPEED_OPTIMIZATIONS */
- return pstm_sqr_comba_gen(pool, A, B, paD, paDlen);
- }
- #else
- #ifdef USE_2048_KEY_SPEED_OPTIMIZATIONS
- if (A->used == 32) {
- return pstm_sqr_comba32(A, B);
- }
- #endif /* USE_2048_KEY_SPEED_OPTIMIZATIONS */
- return pstm_sqr_comba_gen(pool, A, B, paD, paDlen);
- #endif
- }
- #endif /* DISABLE_PSTM */
- /******************************************************************************/
|