123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460 |
- /* chacha.c
- *
- * Copyright (C) 2006-2024 wolfSSL Inc.
- *
- * This file is part of wolfSSL.
- *
- * wolfSSL is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * wolfSSL is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA
- */
- /*
- DESCRIPTION
- This library contains implementation for the ChaCha20 stream cipher.
- Based from chacha-ref.c version 20080118
- D. J. Bernstein
- Public domain.
- */
- #ifdef HAVE_CONFIG_H
- #include <config.h>
- #endif
- #include <wolfssl/wolfcrypt/settings.h>
- #ifdef HAVE_CHACHA
- #include <wolfssl/wolfcrypt/chacha.h>
- #include <wolfssl/wolfcrypt/error-crypt.h>
- #ifdef NO_INLINE
- #include <wolfssl/wolfcrypt/misc.h>
- #else
- #define WOLFSSL_MISC_INCLUDED
- #include <wolfcrypt/src/misc.c>
- #endif
- #ifdef BIG_ENDIAN_ORDER
- #define LITTLE32(x) ByteReverseWord32(x)
- #else
- #define LITTLE32(x) (x)
- #endif
- /* Number of rounds */
- #define ROUNDS 20
- #define U32C(v) (v##U)
- #define U32V(v) ((word32)(v) & U32C(0xFFFFFFFF))
- #define U8TO32_LITTLE(p) LITTLE32(((word32*)(p))[0])
- #define ROTATE(v,c) rotlFixed(v, c)
- #define XOR(v,w) ((v) ^ (w))
- #define PLUS(v,w) (U32V((v) + (w)))
- #define PLUSONE(v) (PLUS((v),1))
- #define QUARTERROUND(a,b,c,d) \
- x[a] = PLUS(x[a],x[b]); x[d] = ROTATE(XOR(x[d],x[a]),16); \
- x[c] = PLUS(x[c],x[d]); x[b] = ROTATE(XOR(x[b],x[c]),12); \
- x[a] = PLUS(x[a],x[b]); x[d] = ROTATE(XOR(x[d],x[a]), 8); \
- x[c] = PLUS(x[c],x[d]); x[b] = ROTATE(XOR(x[b],x[c]), 7);
- #endif /* HAVE_CHACHA */
- #if defined(WOLFSSL_ARMASM)
- /* implementation is located in wolfcrypt/src/port/arm/armv8-chacha.c */
- #elif defined(WOLFSSL_RISCV_ASM)
- /* implementation located in wolfcrypt/src/port/riscv/riscv-64-chacha.c */
- #else
- /* BEGIN ChaCha C implementation */
- #if defined(HAVE_CHACHA)
- #include <wolfssl/wolfcrypt/logging.h>
- #include <wolfssl/wolfcrypt/cpuid.h>
- #ifdef CHACHA_AEAD_TEST
- #include <stdio.h>
- #endif
- #ifdef USE_INTEL_CHACHA_SPEEDUP
- #include <emmintrin.h>
- #include <immintrin.h>
- #if defined(__GNUC__) && ((__GNUC__ < 4) || \
- (__GNUC__ == 4 && __GNUC_MINOR__ <= 8))
- #undef NO_AVX2_SUPPORT
- #define NO_AVX2_SUPPORT
- #endif
- #if defined(__clang__) && ((__clang_major__ < 3) || \
- (__clang_major__ == 3 && __clang_minor__ <= 5))
- #undef NO_AVX2_SUPPORT
- #define NO_AVX2_SUPPORT
- #elif defined(__clang__) && defined(NO_AVX2_SUPPORT)
- #undef NO_AVX2_SUPPORT
- #endif
- #if defined(_MSC_VER) && (_MSC_VER <= 1900)
- #undef NO_AVX2_SUPPORT
- #define NO_AVX2_SUPPORT
- #endif
- #ifndef NO_AVX2_SUPPORT
- #define HAVE_INTEL_AVX2
- #endif
- static int cpuidFlagsSet = 0;
- static word32 cpuidFlags = 0;
- #endif
- /**
- * Set up iv(nonce). Earlier versions used 64 bits instead of 96, this version
- * uses the typical AEAD 96 bit nonce and can do record sizes of 256 GB.
- */
- int wc_Chacha_SetIV(ChaCha* ctx, const byte* inIv, word32 counter)
- {
- word32 temp[CHACHA_IV_WORDS];/* used for alignment of memory */
- if (ctx == NULL || inIv == NULL)
- return BAD_FUNC_ARG;
- XMEMCPY(temp, inIv, CHACHA_IV_BYTES);
- ctx->left = 0; /* resets state */
- ctx->X[CHACHA_MATRIX_CNT_IV+0] = counter; /* block counter */
- ctx->X[CHACHA_MATRIX_CNT_IV+1] = LITTLE32(temp[0]); /* fixed variable from nonce */
- ctx->X[CHACHA_MATRIX_CNT_IV+2] = LITTLE32(temp[1]); /* counter from nonce */
- ctx->X[CHACHA_MATRIX_CNT_IV+3] = LITTLE32(temp[2]); /* counter from nonce */
- return 0;
- }
- /* "expand 32-byte k" as unsigned 32 byte */
- static const word32 sigma[4] = {0x61707865, 0x3320646e, 0x79622d32, 0x6b206574};
- /* "expand 16-byte k" as unsigned 16 byte */
- static const word32 tau[4] = {0x61707865, 0x3120646e, 0x79622d36, 0x6b206574};
- /**
- * Key setup. 8 word iv (nonce)
- */
- int wc_Chacha_SetKey(ChaCha* ctx, const byte* key, word32 keySz)
- {
- const word32* constants;
- const byte* k;
- #ifdef XSTREAM_ALIGN
- word32 alignKey[8];
- #endif
- if (ctx == NULL || key == NULL)
- return BAD_FUNC_ARG;
- if (keySz != (CHACHA_MAX_KEY_SZ/2) && keySz != CHACHA_MAX_KEY_SZ)
- return BAD_FUNC_ARG;
- #ifdef XSTREAM_ALIGN
- if ((wc_ptr_t)key % 4) {
- WOLFSSL_MSG("wc_ChachaSetKey unaligned key");
- XMEMCPY(alignKey, key, keySz);
- k = (byte*)alignKey;
- }
- else {
- k = key;
- }
- #else
- k = key;
- #endif /* XSTREAM_ALIGN */
- #ifdef CHACHA_AEAD_TEST
- word32 i;
- printf("ChaCha key used :\n");
- for (i = 0; i < keySz; i++) {
- printf("%02x", key[i]);
- if ((i + 1) % 8 == 0)
- printf("\n");
- }
- printf("\n\n");
- #endif
- ctx->X[4] = U8TO32_LITTLE(k + 0);
- ctx->X[5] = U8TO32_LITTLE(k + 4);
- ctx->X[6] = U8TO32_LITTLE(k + 8);
- ctx->X[7] = U8TO32_LITTLE(k + 12);
- if (keySz == CHACHA_MAX_KEY_SZ) {
- k += 16;
- constants = sigma;
- }
- else {
- constants = tau;
- }
- ctx->X[ 8] = U8TO32_LITTLE(k + 0);
- ctx->X[ 9] = U8TO32_LITTLE(k + 4);
- ctx->X[10] = U8TO32_LITTLE(k + 8);
- ctx->X[11] = U8TO32_LITTLE(k + 12);
- ctx->X[ 0] = constants[0];
- ctx->X[ 1] = constants[1];
- ctx->X[ 2] = constants[2];
- ctx->X[ 3] = constants[3];
- ctx->left = 0; /* resets state */
- return 0;
- }
- #ifndef USE_INTEL_CHACHA_SPEEDUP
- /**
- * Converts word into bytes with rotations having been done.
- */
- static WC_INLINE void wc_Chacha_wordtobyte(word32 x[CHACHA_CHUNK_WORDS],
- word32 state[CHACHA_CHUNK_WORDS])
- {
- word32 i;
- XMEMCPY(x, state, CHACHA_CHUNK_BYTES);
- for (i = (ROUNDS); i > 0; i -= 2) {
- QUARTERROUND(0, 4, 8, 12)
- QUARTERROUND(1, 5, 9, 13)
- QUARTERROUND(2, 6, 10, 14)
- QUARTERROUND(3, 7, 11, 15)
- QUARTERROUND(0, 5, 10, 15)
- QUARTERROUND(1, 6, 11, 12)
- QUARTERROUND(2, 7, 8, 13)
- QUARTERROUND(3, 4, 9, 14)
- }
- for (i = 0; i < CHACHA_CHUNK_WORDS; i++) {
- x[i] = PLUS(x[i], state[i]);
- #ifdef BIG_ENDIAN_ORDER
- x[i] = LITTLE32(x[i]);
- #endif
- }
- }
- #endif /* !USE_INTEL_CHACHA_SPEEDUP */
- #ifdef __cplusplus
- extern "C" {
- #endif
- extern void chacha_encrypt_x64(ChaCha* ctx, const byte* m, byte* c,
- word32 bytes);
- extern void chacha_encrypt_avx1(ChaCha* ctx, const byte* m, byte* c,
- word32 bytes);
- extern void chacha_encrypt_avx2(ChaCha* ctx, const byte* m, byte* c,
- word32 bytes);
- #ifdef __cplusplus
- } /* extern "C" */
- #endif
- #ifndef USE_INTEL_CHACHA_SPEEDUP
- /**
- * Encrypt a stream of bytes
- */
- static void wc_Chacha_encrypt_bytes(ChaCha* ctx, const byte* m, byte* c,
- word32 bytes)
- {
- union {
- byte state[CHACHA_CHUNK_BYTES];
- word32 state32[CHACHA_CHUNK_WORDS];
- wolfssl_word align_word; /* align for xorbufout */
- } tmp;
- /* handle left overs */
- if (bytes > 0 && ctx->left > 0) {
- word32 processed = min(bytes, ctx->left);
- wc_Chacha_wordtobyte(tmp.state32, ctx->X); /* recreate the stream */
- xorbufout(c, m, tmp.state + CHACHA_CHUNK_BYTES - ctx->left, processed);
- ctx->left -= processed;
- /* Used up all of the stream that was left, increment the counter */
- if (ctx->left == 0) {
- ctx->X[CHACHA_MATRIX_CNT_IV] =
- PLUSONE(ctx->X[CHACHA_MATRIX_CNT_IV]);
- }
- bytes -= processed;
- c += processed;
- m += processed;
- }
- while (bytes >= CHACHA_CHUNK_BYTES) {
- wc_Chacha_wordtobyte(tmp.state32, ctx->X);
- ctx->X[CHACHA_MATRIX_CNT_IV] = PLUSONE(ctx->X[CHACHA_MATRIX_CNT_IV]);
- xorbufout(c, m, tmp.state, CHACHA_CHUNK_BYTES);
- bytes -= CHACHA_CHUNK_BYTES;
- c += CHACHA_CHUNK_BYTES;
- m += CHACHA_CHUNK_BYTES;
- }
- if (bytes) {
- /* in this case there will always be some left over since bytes is less
- * than CHACHA_CHUNK_BYTES, so do not increment counter after getting
- * stream in order for the stream to be recreated on next call */
- wc_Chacha_wordtobyte(tmp.state32, ctx->X);
- xorbufout(c, m, tmp.state, bytes);
- ctx->left = CHACHA_CHUNK_BYTES - bytes;
- }
- }
- #endif /* !USE_INTEL_CHACHA_SPEEDUP */
- /**
- * API to encrypt/decrypt a message of any size.
- */
- int wc_Chacha_Process(ChaCha* ctx, byte* output, const byte* input,
- word32 msglen)
- {
- if (ctx == NULL || input == NULL || output == NULL)
- return BAD_FUNC_ARG;
- #ifdef USE_INTEL_CHACHA_SPEEDUP
- /* handle left overs */
- if (msglen > 0 && ctx->left > 0) {
- byte* out;
- word32 processed = min(msglen, ctx->left);
- out = (byte*)ctx->over + CHACHA_CHUNK_BYTES - ctx->left;
- xorbufout(output, input, out, processed);
- ctx->left -= processed;
- msglen -= processed;
- output += processed;
- input += processed;
- }
- if (msglen == 0) {
- return 0;
- }
- if (!cpuidFlagsSet) {
- cpuidFlags = cpuid_get_flags();
- cpuidFlagsSet = 1;
- }
- #ifdef HAVE_INTEL_AVX2
- if (IS_INTEL_AVX2(cpuidFlags)) {
- SAVE_VECTOR_REGISTERS(return _svr_ret;);
- chacha_encrypt_avx2(ctx, input, output, msglen);
- RESTORE_VECTOR_REGISTERS();
- return 0;
- }
- #endif
- if (IS_INTEL_AVX1(cpuidFlags)) {
- SAVE_VECTOR_REGISTERS(return _svr_ret;);
- chacha_encrypt_avx1(ctx, input, output, msglen);
- RESTORE_VECTOR_REGISTERS();
- return 0;
- }
- else {
- chacha_encrypt_x64(ctx, input, output, msglen);
- return 0;
- }
- #else
- wc_Chacha_encrypt_bytes(ctx, input, output, msglen);
- return 0;
- #endif
- }
- #endif /* HAVE_CHACHA */
- #endif /* END ChaCha C implementation */
- #if defined(HAVE_CHACHA) && defined(HAVE_XCHACHA)
- void wc_Chacha_purge_current_block(ChaCha* ctx)
- {
- if (ctx->left > 0) {
- byte scratch[CHACHA_CHUNK_BYTES];
- XMEMSET(scratch, 0, sizeof(scratch));
- (void)wc_Chacha_Process(ctx, scratch, scratch, CHACHA_CHUNK_BYTES - ctx->left);
- }
- }
- /*
- * wc_HChacha_block - half a ChaCha block, for XChaCha
- *
- * see https://tools.ietf.org/html/draft-arciszewski-xchacha-03
- */
- static WC_INLINE void wc_HChacha_block(ChaCha* ctx,
- word32 stream[CHACHA_CHUNK_WORDS/2], word32 nrounds)
- {
- word32 x[CHACHA_CHUNK_WORDS];
- word32 i;
- for (i = 0; i < CHACHA_CHUNK_WORDS; i++) {
- x[i] = ctx->X[i];
- }
- for (i = nrounds; i > 0; i -= 2) {
- QUARTERROUND(0, 4, 8, 12)
- QUARTERROUND(1, 5, 9, 13)
- QUARTERROUND(2, 6, 10, 14)
- QUARTERROUND(3, 7, 11, 15)
- QUARTERROUND(0, 5, 10, 15)
- QUARTERROUND(1, 6, 11, 12)
- QUARTERROUND(2, 7, 8, 13)
- QUARTERROUND(3, 4, 9, 14)
- }
- for (i = 0; i < CHACHA_CHUNK_WORDS/4; ++i)
- stream[i] = x[i];
- for (i = CHACHA_CHUNK_WORDS/4; i < CHACHA_CHUNK_WORDS/2; ++i)
- stream[i] = x[i + CHACHA_CHUNK_WORDS/2];
- }
- /* XChaCha -- https://tools.ietf.org/html/draft-arciszewski-xchacha-03 */
- int wc_XChacha_SetKey(ChaCha *ctx,
- const byte *key, word32 keySz,
- const byte *nonce, word32 nonceSz,
- word32 counter)
- {
- int ret;
- word32 k[CHACHA_MAX_KEY_SZ];
- byte iv[CHACHA_IV_BYTES];
- if (nonceSz != XCHACHA_NONCE_BYTES)
- return BAD_FUNC_ARG;
- if ((ret = wc_Chacha_SetKey(ctx, key, keySz)) < 0)
- return ret;
- /* form a first chacha IV from the first 16 bytes of the nonce.
- * the first word is supplied in the "counter" arg, and
- * the result is a full 128 bit nonceful IV for the one-time block
- * crypto op that follows.
- */
- if ((ret = wc_Chacha_SetIV(ctx, nonce + 4, U8TO32_LITTLE(nonce))) < 0)
- return ret;
- wc_HChacha_block(ctx, k, 20); /* 20 rounds, but keeping half the output. */
- /* the HChacha output is used as a 256 bit key for the main cipher. */
- XMEMCPY(&ctx->X[4], k, 8 * sizeof(word32));
- /* use 8 bytes from the end of the 24 byte nonce, padded up to 12 bytes,
- * to form the IV for the main cipher.
- */
- XMEMSET(iv, 0, 4);
- XMEMCPY(iv + 4, nonce + 16, 8);
- if ((ret = wc_Chacha_SetIV(ctx, iv, counter)) < 0)
- return ret;
- ForceZero(k, sizeof k);
- ForceZero(iv, sizeof iv);
- return 0;
- }
- #endif /* HAVE_CHACHA && HAVE_XCHACHA */
|