chacha.c 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460
  1. /* chacha.c
  2. *
  3. * Copyright (C) 2006-2024 wolfSSL Inc.
  4. *
  5. * This file is part of wolfSSL.
  6. *
  7. * wolfSSL is free software; you can redistribute it and/or modify
  8. * it under the terms of the GNU General Public License as published by
  9. * the Free Software Foundation; either version 2 of the License, or
  10. * (at your option) any later version.
  11. *
  12. * wolfSSL is distributed in the hope that it will be useful,
  13. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  14. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  15. * GNU General Public License for more details.
  16. *
  17. * You should have received a copy of the GNU General Public License
  18. * along with this program; if not, write to the Free Software
  19. * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA
  20. */
  21. /*
  22. DESCRIPTION
  23. This library contains implementation for the ChaCha20 stream cipher.
  24. Based from chacha-ref.c version 20080118
  25. D. J. Bernstein
  26. Public domain.
  27. */
  28. #ifdef HAVE_CONFIG_H
  29. #include <config.h>
  30. #endif
  31. #include <wolfssl/wolfcrypt/settings.h>
  32. #ifdef HAVE_CHACHA
  33. #include <wolfssl/wolfcrypt/chacha.h>
  34. #include <wolfssl/wolfcrypt/error-crypt.h>
  35. #ifdef NO_INLINE
  36. #include <wolfssl/wolfcrypt/misc.h>
  37. #else
  38. #define WOLFSSL_MISC_INCLUDED
  39. #include <wolfcrypt/src/misc.c>
  40. #endif
  41. #ifdef BIG_ENDIAN_ORDER
  42. #define LITTLE32(x) ByteReverseWord32(x)
  43. #else
  44. #define LITTLE32(x) (x)
  45. #endif
  46. /* Number of rounds */
  47. #define ROUNDS 20
  48. #define U32C(v) (v##U)
  49. #define U32V(v) ((word32)(v) & U32C(0xFFFFFFFF))
  50. #define U8TO32_LITTLE(p) LITTLE32(((word32*)(p))[0])
  51. #define ROTATE(v,c) rotlFixed(v, c)
  52. #define XOR(v,w) ((v) ^ (w))
  53. #define PLUS(v,w) (U32V((v) + (w)))
  54. #define PLUSONE(v) (PLUS((v),1))
  55. #define QUARTERROUND(a,b,c,d) \
  56. x[a] = PLUS(x[a],x[b]); x[d] = ROTATE(XOR(x[d],x[a]),16); \
  57. x[c] = PLUS(x[c],x[d]); x[b] = ROTATE(XOR(x[b],x[c]),12); \
  58. x[a] = PLUS(x[a],x[b]); x[d] = ROTATE(XOR(x[d],x[a]), 8); \
  59. x[c] = PLUS(x[c],x[d]); x[b] = ROTATE(XOR(x[b],x[c]), 7);
  60. #endif /* HAVE_CHACHA */
  61. #if defined(WOLFSSL_ARMASM)
  62. /* implementation is located in wolfcrypt/src/port/arm/armv8-chacha.c */
  63. #elif defined(WOLFSSL_RISCV_ASM)
  64. /* implementation located in wolfcrypt/src/port/riscv/riscv-64-chacha.c */
  65. #else
  66. /* BEGIN ChaCha C implementation */
  67. #if defined(HAVE_CHACHA)
  68. #include <wolfssl/wolfcrypt/logging.h>
  69. #include <wolfssl/wolfcrypt/cpuid.h>
  70. #ifdef CHACHA_AEAD_TEST
  71. #include <stdio.h>
  72. #endif
  73. #ifdef USE_INTEL_CHACHA_SPEEDUP
  74. #include <emmintrin.h>
  75. #include <immintrin.h>
  76. #if defined(__GNUC__) && ((__GNUC__ < 4) || \
  77. (__GNUC__ == 4 && __GNUC_MINOR__ <= 8))
  78. #undef NO_AVX2_SUPPORT
  79. #define NO_AVX2_SUPPORT
  80. #endif
  81. #if defined(__clang__) && ((__clang_major__ < 3) || \
  82. (__clang_major__ == 3 && __clang_minor__ <= 5))
  83. #undef NO_AVX2_SUPPORT
  84. #define NO_AVX2_SUPPORT
  85. #elif defined(__clang__) && defined(NO_AVX2_SUPPORT)
  86. #undef NO_AVX2_SUPPORT
  87. #endif
  88. #if defined(_MSC_VER) && (_MSC_VER <= 1900)
  89. #undef NO_AVX2_SUPPORT
  90. #define NO_AVX2_SUPPORT
  91. #endif
  92. #ifndef NO_AVX2_SUPPORT
  93. #define HAVE_INTEL_AVX2
  94. #endif
  95. static int cpuidFlagsSet = 0;
  96. static word32 cpuidFlags = 0;
  97. #endif
  98. /**
  99. * Set up iv(nonce). Earlier versions used 64 bits instead of 96, this version
  100. * uses the typical AEAD 96 bit nonce and can do record sizes of 256 GB.
  101. */
  102. int wc_Chacha_SetIV(ChaCha* ctx, const byte* inIv, word32 counter)
  103. {
  104. word32 temp[CHACHA_IV_WORDS];/* used for alignment of memory */
  105. if (ctx == NULL || inIv == NULL)
  106. return BAD_FUNC_ARG;
  107. XMEMCPY(temp, inIv, CHACHA_IV_BYTES);
  108. ctx->left = 0; /* resets state */
  109. ctx->X[CHACHA_MATRIX_CNT_IV+0] = counter; /* block counter */
  110. ctx->X[CHACHA_MATRIX_CNT_IV+1] = LITTLE32(temp[0]); /* fixed variable from nonce */
  111. ctx->X[CHACHA_MATRIX_CNT_IV+2] = LITTLE32(temp[1]); /* counter from nonce */
  112. ctx->X[CHACHA_MATRIX_CNT_IV+3] = LITTLE32(temp[2]); /* counter from nonce */
  113. return 0;
  114. }
  115. /* "expand 32-byte k" as unsigned 32 byte */
  116. static const word32 sigma[4] = {0x61707865, 0x3320646e, 0x79622d32, 0x6b206574};
  117. /* "expand 16-byte k" as unsigned 16 byte */
  118. static const word32 tau[4] = {0x61707865, 0x3120646e, 0x79622d36, 0x6b206574};
  119. /**
  120. * Key setup. 8 word iv (nonce)
  121. */
  122. int wc_Chacha_SetKey(ChaCha* ctx, const byte* key, word32 keySz)
  123. {
  124. const word32* constants;
  125. const byte* k;
  126. #ifdef XSTREAM_ALIGN
  127. word32 alignKey[8];
  128. #endif
  129. if (ctx == NULL || key == NULL)
  130. return BAD_FUNC_ARG;
  131. if (keySz != (CHACHA_MAX_KEY_SZ/2) && keySz != CHACHA_MAX_KEY_SZ)
  132. return BAD_FUNC_ARG;
  133. #ifdef XSTREAM_ALIGN
  134. if ((wc_ptr_t)key % 4) {
  135. WOLFSSL_MSG("wc_ChachaSetKey unaligned key");
  136. XMEMCPY(alignKey, key, keySz);
  137. k = (byte*)alignKey;
  138. }
  139. else {
  140. k = key;
  141. }
  142. #else
  143. k = key;
  144. #endif /* XSTREAM_ALIGN */
  145. #ifdef CHACHA_AEAD_TEST
  146. word32 i;
  147. printf("ChaCha key used :\n");
  148. for (i = 0; i < keySz; i++) {
  149. printf("%02x", key[i]);
  150. if ((i + 1) % 8 == 0)
  151. printf("\n");
  152. }
  153. printf("\n\n");
  154. #endif
  155. ctx->X[4] = U8TO32_LITTLE(k + 0);
  156. ctx->X[5] = U8TO32_LITTLE(k + 4);
  157. ctx->X[6] = U8TO32_LITTLE(k + 8);
  158. ctx->X[7] = U8TO32_LITTLE(k + 12);
  159. if (keySz == CHACHA_MAX_KEY_SZ) {
  160. k += 16;
  161. constants = sigma;
  162. }
  163. else {
  164. constants = tau;
  165. }
  166. ctx->X[ 8] = U8TO32_LITTLE(k + 0);
  167. ctx->X[ 9] = U8TO32_LITTLE(k + 4);
  168. ctx->X[10] = U8TO32_LITTLE(k + 8);
  169. ctx->X[11] = U8TO32_LITTLE(k + 12);
  170. ctx->X[ 0] = constants[0];
  171. ctx->X[ 1] = constants[1];
  172. ctx->X[ 2] = constants[2];
  173. ctx->X[ 3] = constants[3];
  174. ctx->left = 0; /* resets state */
  175. return 0;
  176. }
  177. #ifndef USE_INTEL_CHACHA_SPEEDUP
  178. /**
  179. * Converts word into bytes with rotations having been done.
  180. */
  181. static WC_INLINE void wc_Chacha_wordtobyte(word32 x[CHACHA_CHUNK_WORDS],
  182. word32 state[CHACHA_CHUNK_WORDS])
  183. {
  184. word32 i;
  185. XMEMCPY(x, state, CHACHA_CHUNK_BYTES);
  186. for (i = (ROUNDS); i > 0; i -= 2) {
  187. QUARTERROUND(0, 4, 8, 12)
  188. QUARTERROUND(1, 5, 9, 13)
  189. QUARTERROUND(2, 6, 10, 14)
  190. QUARTERROUND(3, 7, 11, 15)
  191. QUARTERROUND(0, 5, 10, 15)
  192. QUARTERROUND(1, 6, 11, 12)
  193. QUARTERROUND(2, 7, 8, 13)
  194. QUARTERROUND(3, 4, 9, 14)
  195. }
  196. for (i = 0; i < CHACHA_CHUNK_WORDS; i++) {
  197. x[i] = PLUS(x[i], state[i]);
  198. #ifdef BIG_ENDIAN_ORDER
  199. x[i] = LITTLE32(x[i]);
  200. #endif
  201. }
  202. }
  203. #endif /* !USE_INTEL_CHACHA_SPEEDUP */
  204. #ifdef __cplusplus
  205. extern "C" {
  206. #endif
  207. extern void chacha_encrypt_x64(ChaCha* ctx, const byte* m, byte* c,
  208. word32 bytes);
  209. extern void chacha_encrypt_avx1(ChaCha* ctx, const byte* m, byte* c,
  210. word32 bytes);
  211. extern void chacha_encrypt_avx2(ChaCha* ctx, const byte* m, byte* c,
  212. word32 bytes);
  213. #ifdef __cplusplus
  214. } /* extern "C" */
  215. #endif
  216. #ifndef USE_INTEL_CHACHA_SPEEDUP
  217. /**
  218. * Encrypt a stream of bytes
  219. */
  220. static void wc_Chacha_encrypt_bytes(ChaCha* ctx, const byte* m, byte* c,
  221. word32 bytes)
  222. {
  223. union {
  224. byte state[CHACHA_CHUNK_BYTES];
  225. word32 state32[CHACHA_CHUNK_WORDS];
  226. wolfssl_word align_word; /* align for xorbufout */
  227. } tmp;
  228. /* handle left overs */
  229. if (bytes > 0 && ctx->left > 0) {
  230. word32 processed = min(bytes, ctx->left);
  231. wc_Chacha_wordtobyte(tmp.state32, ctx->X); /* recreate the stream */
  232. xorbufout(c, m, tmp.state + CHACHA_CHUNK_BYTES - ctx->left, processed);
  233. ctx->left -= processed;
  234. /* Used up all of the stream that was left, increment the counter */
  235. if (ctx->left == 0) {
  236. ctx->X[CHACHA_MATRIX_CNT_IV] =
  237. PLUSONE(ctx->X[CHACHA_MATRIX_CNT_IV]);
  238. }
  239. bytes -= processed;
  240. c += processed;
  241. m += processed;
  242. }
  243. while (bytes >= CHACHA_CHUNK_BYTES) {
  244. wc_Chacha_wordtobyte(tmp.state32, ctx->X);
  245. ctx->X[CHACHA_MATRIX_CNT_IV] = PLUSONE(ctx->X[CHACHA_MATRIX_CNT_IV]);
  246. xorbufout(c, m, tmp.state, CHACHA_CHUNK_BYTES);
  247. bytes -= CHACHA_CHUNK_BYTES;
  248. c += CHACHA_CHUNK_BYTES;
  249. m += CHACHA_CHUNK_BYTES;
  250. }
  251. if (bytes) {
  252. /* in this case there will always be some left over since bytes is less
  253. * than CHACHA_CHUNK_BYTES, so do not increment counter after getting
  254. * stream in order for the stream to be recreated on next call */
  255. wc_Chacha_wordtobyte(tmp.state32, ctx->X);
  256. xorbufout(c, m, tmp.state, bytes);
  257. ctx->left = CHACHA_CHUNK_BYTES - bytes;
  258. }
  259. }
  260. #endif /* !USE_INTEL_CHACHA_SPEEDUP */
  261. /**
  262. * API to encrypt/decrypt a message of any size.
  263. */
  264. int wc_Chacha_Process(ChaCha* ctx, byte* output, const byte* input,
  265. word32 msglen)
  266. {
  267. if (ctx == NULL || input == NULL || output == NULL)
  268. return BAD_FUNC_ARG;
  269. #ifdef USE_INTEL_CHACHA_SPEEDUP
  270. /* handle left overs */
  271. if (msglen > 0 && ctx->left > 0) {
  272. byte* out;
  273. word32 processed = min(msglen, ctx->left);
  274. out = (byte*)ctx->over + CHACHA_CHUNK_BYTES - ctx->left;
  275. xorbufout(output, input, out, processed);
  276. ctx->left -= processed;
  277. msglen -= processed;
  278. output += processed;
  279. input += processed;
  280. }
  281. if (msglen == 0) {
  282. return 0;
  283. }
  284. if (!cpuidFlagsSet) {
  285. cpuidFlags = cpuid_get_flags();
  286. cpuidFlagsSet = 1;
  287. }
  288. #ifdef HAVE_INTEL_AVX2
  289. if (IS_INTEL_AVX2(cpuidFlags)) {
  290. SAVE_VECTOR_REGISTERS(return _svr_ret;);
  291. chacha_encrypt_avx2(ctx, input, output, msglen);
  292. RESTORE_VECTOR_REGISTERS();
  293. return 0;
  294. }
  295. #endif
  296. if (IS_INTEL_AVX1(cpuidFlags)) {
  297. SAVE_VECTOR_REGISTERS(return _svr_ret;);
  298. chacha_encrypt_avx1(ctx, input, output, msglen);
  299. RESTORE_VECTOR_REGISTERS();
  300. return 0;
  301. }
  302. else {
  303. chacha_encrypt_x64(ctx, input, output, msglen);
  304. return 0;
  305. }
  306. #else
  307. wc_Chacha_encrypt_bytes(ctx, input, output, msglen);
  308. return 0;
  309. #endif
  310. }
  311. #endif /* HAVE_CHACHA */
  312. #endif /* END ChaCha C implementation */
  313. #if defined(HAVE_CHACHA) && defined(HAVE_XCHACHA)
  314. void wc_Chacha_purge_current_block(ChaCha* ctx)
  315. {
  316. if (ctx->left > 0) {
  317. byte scratch[CHACHA_CHUNK_BYTES];
  318. XMEMSET(scratch, 0, sizeof(scratch));
  319. (void)wc_Chacha_Process(ctx, scratch, scratch, CHACHA_CHUNK_BYTES - ctx->left);
  320. }
  321. }
  322. /*
  323. * wc_HChacha_block - half a ChaCha block, for XChaCha
  324. *
  325. * see https://tools.ietf.org/html/draft-arciszewski-xchacha-03
  326. */
  327. static WC_INLINE void wc_HChacha_block(ChaCha* ctx,
  328. word32 stream[CHACHA_CHUNK_WORDS/2], word32 nrounds)
  329. {
  330. word32 x[CHACHA_CHUNK_WORDS];
  331. word32 i;
  332. for (i = 0; i < CHACHA_CHUNK_WORDS; i++) {
  333. x[i] = ctx->X[i];
  334. }
  335. for (i = nrounds; i > 0; i -= 2) {
  336. QUARTERROUND(0, 4, 8, 12)
  337. QUARTERROUND(1, 5, 9, 13)
  338. QUARTERROUND(2, 6, 10, 14)
  339. QUARTERROUND(3, 7, 11, 15)
  340. QUARTERROUND(0, 5, 10, 15)
  341. QUARTERROUND(1, 6, 11, 12)
  342. QUARTERROUND(2, 7, 8, 13)
  343. QUARTERROUND(3, 4, 9, 14)
  344. }
  345. for (i = 0; i < CHACHA_CHUNK_WORDS/4; ++i)
  346. stream[i] = x[i];
  347. for (i = CHACHA_CHUNK_WORDS/4; i < CHACHA_CHUNK_WORDS/2; ++i)
  348. stream[i] = x[i + CHACHA_CHUNK_WORDS/2];
  349. }
  350. /* XChaCha -- https://tools.ietf.org/html/draft-arciszewski-xchacha-03 */
  351. int wc_XChacha_SetKey(ChaCha *ctx,
  352. const byte *key, word32 keySz,
  353. const byte *nonce, word32 nonceSz,
  354. word32 counter)
  355. {
  356. int ret;
  357. word32 k[CHACHA_MAX_KEY_SZ];
  358. byte iv[CHACHA_IV_BYTES];
  359. if (nonceSz != XCHACHA_NONCE_BYTES)
  360. return BAD_FUNC_ARG;
  361. if ((ret = wc_Chacha_SetKey(ctx, key, keySz)) < 0)
  362. return ret;
  363. /* form a first chacha IV from the first 16 bytes of the nonce.
  364. * the first word is supplied in the "counter" arg, and
  365. * the result is a full 128 bit nonceful IV for the one-time block
  366. * crypto op that follows.
  367. */
  368. if ((ret = wc_Chacha_SetIV(ctx, nonce + 4, U8TO32_LITTLE(nonce))) < 0)
  369. return ret;
  370. wc_HChacha_block(ctx, k, 20); /* 20 rounds, but keeping half the output. */
  371. /* the HChacha output is used as a 256 bit key for the main cipher. */
  372. XMEMCPY(&ctx->X[4], k, 8 * sizeof(word32));
  373. /* use 8 bytes from the end of the 24 byte nonce, padded up to 12 bytes,
  374. * to form the IV for the main cipher.
  375. */
  376. XMEMSET(iv, 0, 4);
  377. XMEMCPY(iv + 4, nonce + 16, 8);
  378. if ((ret = wc_Chacha_SetIV(ctx, iv, counter)) < 0)
  379. return ret;
  380. ForceZero(k, sizeof k);
  381. ForceZero(iv, sizeof iv);
  382. return 0;
  383. }
  384. #endif /* HAVE_CHACHA && HAVE_XCHACHA */