chacha.c 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453
  1. /* chacha.c
  2. *
  3. * Copyright (C) 2006-2020 wolfSSL Inc.
  4. *
  5. * This file is part of wolfSSL.
  6. *
  7. * wolfSSL is free software; you can redistribute it and/or modify
  8. * it under the terms of the GNU General Public License as published by
  9. * the Free Software Foundation; either version 2 of the License, or
  10. * (at your option) any later version.
  11. *
  12. * wolfSSL is distributed in the hope that it will be useful,
  13. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  14. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  15. * GNU General Public License for more details.
  16. *
  17. * You should have received a copy of the GNU General Public License
  18. * along with this program; if not, write to the Free Software
  19. * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA
  20. */
  21. /*
  22. DESCRIPTION
  23. This library contains implementation for the ChaCha20 stream cipher.
  24. Based from chacha-ref.c version 20080118
  25. D. J. Bernstein
  26. Public domain.
  27. */
  28. #ifdef WOLFSSL_ARMASM
  29. /* implementation is located in wolfcrypt/src/port/arm/armv8-chacha.c */
  30. #else
  31. #ifdef HAVE_CONFIG_H
  32. #include <config.h>
  33. #endif
  34. #include <wolfssl/wolfcrypt/settings.h>
  35. #if defined(HAVE_CHACHA) && !defined(WOLFSSL_ARMASM)
  36. #include <wolfssl/wolfcrypt/chacha.h>
  37. #include <wolfssl/wolfcrypt/error-crypt.h>
  38. #include <wolfssl/wolfcrypt/logging.h>
  39. #include <wolfssl/wolfcrypt/cpuid.h>
  40. #ifdef NO_INLINE
  41. #include <wolfssl/wolfcrypt/misc.h>
  42. #else
  43. #define WOLFSSL_MISC_INCLUDED
  44. #include <wolfcrypt/src/misc.c>
  45. #endif
  46. #ifdef CHACHA_AEAD_TEST
  47. #include <stdio.h>
  48. #endif
  49. #ifdef USE_INTEL_CHACHA_SPEEDUP
  50. #include <emmintrin.h>
  51. #include <immintrin.h>
  52. #if defined(__GNUC__) && ((__GNUC__ < 4) || \
  53. (__GNUC__ == 4 && __GNUC_MINOR__ <= 8))
  54. #undef NO_AVX2_SUPPORT
  55. #define NO_AVX2_SUPPORT
  56. #endif
  57. #if defined(__clang__) && ((__clang_major__ < 3) || \
  58. (__clang_major__ == 3 && __clang_minor__ <= 5))
  59. #undef NO_AVX2_SUPPORT
  60. #define NO_AVX2_SUPPORT
  61. #elif defined(__clang__) && defined(NO_AVX2_SUPPORT)
  62. #undef NO_AVX2_SUPPORT
  63. #endif
  64. #ifndef NO_AVX2_SUPPORT
  65. #define HAVE_INTEL_AVX2
  66. #endif
  67. static int cpuidFlagsSet = 0;
  68. static int cpuidFlags = 0;
  69. #endif
  70. #ifdef BIG_ENDIAN_ORDER
  71. #define LITTLE32(x) ByteReverseWord32(x)
  72. #else
  73. #define LITTLE32(x) (x)
  74. #endif
  75. /* Number of rounds */
  76. #define ROUNDS 20
  77. #define U32C(v) (v##U)
  78. #define U32V(v) ((word32)(v) & U32C(0xFFFFFFFF))
  79. #define U8TO32_LITTLE(p) LITTLE32(((word32*)(p))[0])
  80. #define ROTATE(v,c) rotlFixed(v, c)
  81. #define XOR(v,w) ((v) ^ (w))
  82. #define PLUS(v,w) (U32V((v) + (w)))
  83. #define PLUSONE(v) (PLUS((v),1))
  84. #define QUARTERROUND(a,b,c,d) \
  85. x[a] = PLUS(x[a],x[b]); x[d] = ROTATE(XOR(x[d],x[a]),16); \
  86. x[c] = PLUS(x[c],x[d]); x[b] = ROTATE(XOR(x[b],x[c]),12); \
  87. x[a] = PLUS(x[a],x[b]); x[d] = ROTATE(XOR(x[d],x[a]), 8); \
  88. x[c] = PLUS(x[c],x[d]); x[b] = ROTATE(XOR(x[b],x[c]), 7);
  89. /**
  90. * Set up iv(nonce). Earlier versions used 64 bits instead of 96, this version
  91. * uses the typical AEAD 96 bit nonce and can do record sizes of 256 GB.
  92. */
  93. int wc_Chacha_SetIV(ChaCha* ctx, const byte* inIv, word32 counter)
  94. {
  95. word32 temp[CHACHA_IV_WORDS];/* used for alignment of memory */
  96. if (ctx == NULL || inIv == NULL)
  97. return BAD_FUNC_ARG;
  98. XMEMCPY(temp, inIv, CHACHA_IV_BYTES);
  99. ctx->left = 0; /* resets state */
  100. ctx->X[CHACHA_MATRIX_CNT_IV+0] = counter; /* block counter */
  101. ctx->X[CHACHA_MATRIX_CNT_IV+1] = LITTLE32(temp[0]); /* fixed variable from nonce */
  102. ctx->X[CHACHA_MATRIX_CNT_IV+2] = LITTLE32(temp[1]); /* counter from nonce */
  103. ctx->X[CHACHA_MATRIX_CNT_IV+3] = LITTLE32(temp[2]); /* counter from nonce */
  104. return 0;
  105. }
  106. /* "expand 32-byte k" as unsigned 32 byte */
  107. static const word32 sigma[4] = {0x61707865, 0x3320646e, 0x79622d32, 0x6b206574};
  108. /* "expand 16-byte k" as unsigned 16 byte */
  109. static const word32 tau[4] = {0x61707865, 0x3120646e, 0x79622d36, 0x6b206574};
  110. /**
  111. * Key setup. 8 word iv (nonce)
  112. */
  113. int wc_Chacha_SetKey(ChaCha* ctx, const byte* key, word32 keySz)
  114. {
  115. const word32* constants;
  116. const byte* k;
  117. #ifdef XSTREAM_ALIGN
  118. word32 alignKey[8];
  119. #endif
  120. if (ctx == NULL || key == NULL)
  121. return BAD_FUNC_ARG;
  122. if (keySz != (CHACHA_MAX_KEY_SZ/2) && keySz != CHACHA_MAX_KEY_SZ)
  123. return BAD_FUNC_ARG;
  124. #ifdef XSTREAM_ALIGN
  125. if ((wolfssl_word)key % 4) {
  126. WOLFSSL_MSG("wc_ChachaSetKey unaligned key");
  127. XMEMCPY(alignKey, key, keySz);
  128. k = (byte*)alignKey;
  129. }
  130. else {
  131. k = key;
  132. }
  133. #else
  134. k = key;
  135. #endif /* XSTREAM_ALIGN */
  136. #ifdef CHACHA_AEAD_TEST
  137. word32 i;
  138. printf("ChaCha key used :\n");
  139. for (i = 0; i < keySz; i++) {
  140. printf("%02x", key[i]);
  141. if ((i + 1) % 8 == 0)
  142. printf("\n");
  143. }
  144. printf("\n\n");
  145. #endif
  146. ctx->X[4] = U8TO32_LITTLE(k + 0);
  147. ctx->X[5] = U8TO32_LITTLE(k + 4);
  148. ctx->X[6] = U8TO32_LITTLE(k + 8);
  149. ctx->X[7] = U8TO32_LITTLE(k + 12);
  150. if (keySz == CHACHA_MAX_KEY_SZ) {
  151. k += 16;
  152. constants = sigma;
  153. }
  154. else {
  155. constants = tau;
  156. }
  157. ctx->X[ 8] = U8TO32_LITTLE(k + 0);
  158. ctx->X[ 9] = U8TO32_LITTLE(k + 4);
  159. ctx->X[10] = U8TO32_LITTLE(k + 8);
  160. ctx->X[11] = U8TO32_LITTLE(k + 12);
  161. ctx->X[ 0] = constants[0];
  162. ctx->X[ 1] = constants[1];
  163. ctx->X[ 2] = constants[2];
  164. ctx->X[ 3] = constants[3];
  165. ctx->left = 0; /* resets state */
  166. return 0;
  167. }
  168. /**
  169. * Converts word into bytes with rotations having been done.
  170. */
  171. static WC_INLINE void wc_Chacha_wordtobyte(word32 output[CHACHA_CHUNK_WORDS],
  172. const word32 input[CHACHA_CHUNK_WORDS])
  173. {
  174. word32 x[CHACHA_CHUNK_WORDS];
  175. word32 i;
  176. for (i = 0; i < CHACHA_CHUNK_WORDS; i++) {
  177. x[i] = input[i];
  178. }
  179. for (i = (ROUNDS); i > 0; i -= 2) {
  180. QUARTERROUND(0, 4, 8, 12)
  181. QUARTERROUND(1, 5, 9, 13)
  182. QUARTERROUND(2, 6, 10, 14)
  183. QUARTERROUND(3, 7, 11, 15)
  184. QUARTERROUND(0, 5, 10, 15)
  185. QUARTERROUND(1, 6, 11, 12)
  186. QUARTERROUND(2, 7, 8, 13)
  187. QUARTERROUND(3, 4, 9, 14)
  188. }
  189. for (i = 0; i < CHACHA_CHUNK_WORDS; i++) {
  190. x[i] = PLUS(x[i], input[i]);
  191. }
  192. for (i = 0; i < CHACHA_CHUNK_WORDS; i++) {
  193. output[i] = LITTLE32(x[i]);
  194. }
  195. }
  196. #ifdef HAVE_XCHACHA
  197. /*
  198. * wc_HChacha_block - half a ChaCha block, for XChaCha
  199. *
  200. * see https://tools.ietf.org/html/draft-arciszewski-xchacha-03
  201. */
  202. static WC_INLINE void wc_HChacha_block(ChaCha* ctx, word32 stream[CHACHA_CHUNK_WORDS/2], int nrounds)
  203. {
  204. word32 x[CHACHA_CHUNK_WORDS];
  205. word32 i;
  206. for (i = 0; i < CHACHA_CHUNK_WORDS; i++) {
  207. x[i] = ctx->X[i];
  208. }
  209. for (i = nrounds; i > 0; i -= 2) {
  210. QUARTERROUND(0, 4, 8, 12)
  211. QUARTERROUND(1, 5, 9, 13)
  212. QUARTERROUND(2, 6, 10, 14)
  213. QUARTERROUND(3, 7, 11, 15)
  214. QUARTERROUND(0, 5, 10, 15)
  215. QUARTERROUND(1, 6, 11, 12)
  216. QUARTERROUND(2, 7, 8, 13)
  217. QUARTERROUND(3, 4, 9, 14)
  218. }
  219. for (i = 0; i < CHACHA_CHUNK_WORDS/4; ++i)
  220. stream[i] = x[i];
  221. for (i = CHACHA_CHUNK_WORDS/4; i < CHACHA_CHUNK_WORDS/2; ++i)
  222. stream[i] = x[i + CHACHA_CHUNK_WORDS/2];
  223. }
  224. /* XChaCha -- https://tools.ietf.org/html/draft-arciszewski-xchacha-03 */
  225. int wc_XChacha_SetKey(ChaCha *ctx,
  226. const byte *key, word32 keySz,
  227. const byte *nonce, word32 nonceSz,
  228. word32 counter) {
  229. word32 k[CHACHA_MAX_KEY_SZ];
  230. byte iv[CHACHA_IV_BYTES];
  231. int ret;
  232. if (nonceSz != XCHACHA_NONCE_BYTES)
  233. return BAD_FUNC_ARG;
  234. if ((ret = wc_Chacha_SetKey(ctx, key, keySz)) < 0)
  235. return ret;
  236. /* form a first chacha IV from the first 16 bytes of the nonce.
  237. * the first word is supplied in the "counter" arg, and
  238. * the result is a full 128 bit nonceful IV for the one-time block
  239. * crypto op that follows.
  240. */
  241. if ((ret = wc_Chacha_SetIV(ctx, nonce + 4, U8TO32_LITTLE(nonce))) < 0)
  242. return ret;
  243. wc_HChacha_block(ctx, k, 20); /* 20 rounds, but keeping half the output. */
  244. /* the HChacha output is used as a 256 bit key for the main cipher. */
  245. XMEMCPY(&ctx->X[4], k, 8 * sizeof(word32));
  246. /* use 8 bytes from the end of the 24 byte nonce, padded up to 12 bytes,
  247. * to form the IV for the main cipher.
  248. */
  249. XMEMSET(iv, 0, 4);
  250. XMEMCPY(iv + 4, nonce + 16, 8);
  251. if ((ret = wc_Chacha_SetIV(ctx, iv, counter)) < 0)
  252. return ret;
  253. ForceZero(k, sizeof k);
  254. ForceZero(iv, sizeof iv);
  255. return 0;
  256. }
  257. #endif /* HAVE_XCHACHA */
  258. #ifdef __cplusplus
  259. extern "C" {
  260. #endif
  261. extern void chacha_encrypt_x64(ChaCha* ctx, const byte* m, byte* c,
  262. word32 bytes);
  263. extern void chacha_encrypt_avx1(ChaCha* ctx, const byte* m, byte* c,
  264. word32 bytes);
  265. extern void chacha_encrypt_avx2(ChaCha* ctx, const byte* m, byte* c,
  266. word32 bytes);
  267. #ifdef __cplusplus
  268. } /* extern "C" */
  269. #endif
  270. /**
  271. * Encrypt a stream of bytes
  272. */
  273. static void wc_Chacha_encrypt_bytes(ChaCha* ctx, const byte* m, byte* c,
  274. word32 bytes)
  275. {
  276. byte* output;
  277. word32 temp[CHACHA_CHUNK_WORDS]; /* used to make sure aligned */
  278. word32 i;
  279. /* handle left overs */
  280. if (bytes > 0 && ctx->left > 0) {
  281. wc_Chacha_wordtobyte(temp, ctx->X); /* recreate the stream */
  282. output = (byte*)temp + CHACHA_CHUNK_BYTES - ctx->left;
  283. for (i = 0; i < bytes && i < ctx->left; i++) {
  284. c[i] = (byte)(m[i] ^ output[i]);
  285. }
  286. ctx->left -= i;
  287. /* Used up all of the stream that was left, increment the counter */
  288. if (ctx->left == 0) {
  289. ctx->X[CHACHA_MATRIX_CNT_IV] =
  290. PLUSONE(ctx->X[CHACHA_MATRIX_CNT_IV]);
  291. }
  292. bytes -= i;
  293. c += i;
  294. m += i;
  295. }
  296. output = (byte*)temp;
  297. while (bytes >= CHACHA_CHUNK_BYTES) {
  298. wc_Chacha_wordtobyte(temp, ctx->X);
  299. ctx->X[CHACHA_MATRIX_CNT_IV] = PLUSONE(ctx->X[CHACHA_MATRIX_CNT_IV]);
  300. for (i = 0; i < CHACHA_CHUNK_BYTES; ++i) {
  301. c[i] = (byte)(m[i] ^ output[i]);
  302. }
  303. bytes -= CHACHA_CHUNK_BYTES;
  304. c += CHACHA_CHUNK_BYTES;
  305. m += CHACHA_CHUNK_BYTES;
  306. }
  307. if (bytes) {
  308. /* in this case there will always be some left over since bytes is less
  309. * than CHACHA_CHUNK_BYTES, so do not increment counter after getting
  310. * stream in order for the stream to be recreated on next call */
  311. wc_Chacha_wordtobyte(temp, ctx->X);
  312. for (i = 0; i < bytes; ++i) {
  313. c[i] = m[i] ^ output[i];
  314. }
  315. ctx->left = CHACHA_CHUNK_BYTES - i;
  316. }
  317. }
  318. /**
  319. * API to encrypt/decrypt a message of any size.
  320. */
  321. int wc_Chacha_Process(ChaCha* ctx, byte* output, const byte* input,
  322. word32 msglen)
  323. {
  324. if (ctx == NULL || input == NULL || output == NULL)
  325. return BAD_FUNC_ARG;
  326. #ifdef USE_INTEL_CHACHA_SPEEDUP
  327. /* handle left overs */
  328. if (msglen > 0 && ctx->left > 0) {
  329. byte* out;
  330. word32 i;
  331. out = (byte*)ctx->over + CHACHA_CHUNK_BYTES - ctx->left;
  332. for (i = 0; i < msglen && i < ctx->left; i++) {
  333. output[i] = (byte)(input[i] ^ out[i]);
  334. }
  335. ctx->left -= i;
  336. msglen -= i;
  337. output += i;
  338. input += i;
  339. }
  340. if (msglen == 0) {
  341. return 0;
  342. }
  343. if (!cpuidFlagsSet) {
  344. cpuidFlags = cpuid_get_flags();
  345. cpuidFlagsSet = 1;
  346. }
  347. #ifdef HAVE_INTEL_AVX2
  348. if (IS_INTEL_AVX2(cpuidFlags)) {
  349. SAVE_VECTOR_REGISTERS();
  350. chacha_encrypt_avx2(ctx, input, output, msglen);
  351. RESTORE_VECTOR_REGISTERS();
  352. return 0;
  353. }
  354. #endif
  355. if (IS_INTEL_AVX1(cpuidFlags)) {
  356. SAVE_VECTOR_REGISTERS();
  357. chacha_encrypt_avx1(ctx, input, output, msglen);
  358. RESTORE_VECTOR_REGISTERS();
  359. return 0;
  360. }
  361. else {
  362. chacha_encrypt_x64(ctx, input, output, msglen);
  363. return 0;
  364. }
  365. #endif
  366. wc_Chacha_encrypt_bytes(ctx, input, output, msglen);
  367. return 0;
  368. }
  369. void wc_Chacha_purge_current_block(ChaCha* ctx) {
  370. if (ctx->left > 0) {
  371. byte scratch[CHACHA_CHUNK_BYTES];
  372. XMEMSET(scratch, 0, sizeof(scratch));
  373. (void)wc_Chacha_Process(ctx, scratch, scratch, CHACHA_CHUNK_BYTES - ctx->left);
  374. }
  375. }
  376. #endif /* HAVE_CHACHA*/
  377. #endif /* WOLFSSL_ARMASM */