tls_aesgcm.c 5.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200
  1. /*
  2. * Copyright (C) 2018 Denys Vlasenko
  3. *
  4. * Licensed under GPLv2, see file LICENSE in this source tree.
  5. */
  6. #include "tls.h"
  7. typedef uint8_t byte;
  8. typedef uint32_t word32;
  9. #define XMEMSET memset
  10. #define XMEMCPY memcpy
  11. /* from wolfssl-3.15.3/wolfcrypt/src/aes.c */
  12. static ALWAYS_INLINE void FlattenSzInBits(byte* buf, word32 sz)
  13. {
  14. /* Multiply the sz by 8 */
  15. //bbox: these sizes are never even close to 2^32/8
  16. // word32 szHi = (sz >> (8*sizeof(sz) - 3));
  17. sz <<= 3;
  18. /* copy over the words of the sz into the destination buffer */
  19. // buf[0] = (szHi >> 24) & 0xff;
  20. // buf[1] = (szHi >> 16) & 0xff;
  21. // buf[2] = (szHi >> 8) & 0xff;
  22. // buf[3] = szHi & 0xff;
  23. *(uint32_t*)(buf + 0) = 0;
  24. // buf[4] = (sz >> 24) & 0xff;
  25. // buf[5] = (sz >> 16) & 0xff;
  26. // buf[6] = (sz >> 8) & 0xff;
  27. // buf[7] = sz & 0xff;
  28. *(uint32_t*)(buf + 4) = SWAP_BE32(sz);
  29. }
  30. static void RIGHTSHIFTX(byte* x)
  31. {
  32. #define l ((unsigned long*)x)
  33. #if 0
  34. // Generic byte-at-a-time algorithm
  35. int i;
  36. byte carryIn = (x[15] & 0x01) ? 0xE1 : 0;
  37. for (i = 0; i < AES_BLOCK_SIZE; i++) {
  38. byte carryOut = (x[i] << 7); // zero, or 0x80
  39. x[i] = (x[i] >> 1) ^ carryIn;
  40. carryIn = carryOut;
  41. }
  42. #elif BB_BIG_ENDIAN
  43. // Big-endian can shift-right in larger than byte chunks
  44. // (we use the fact that 'x' is long-aligned)
  45. unsigned long carryIn = (x[15] & 0x01)
  46. ? ((unsigned long)0xE1 << (LONG_BIT-8))
  47. : 0;
  48. # if ULONG_MAX <= 0xffffffff
  49. int i;
  50. for (i = 0; i < AES_BLOCK_SIZE/sizeof(long); i++) {
  51. unsigned long carryOut = l[i] << (LONG_BIT-1); // zero, or 0x800..00
  52. l[i] = (l[i] >> 1) ^ carryIn;
  53. carryIn = carryOut;
  54. }
  55. # else
  56. // 64-bit code: need to process only 2 words
  57. unsigned long carryOut = l[0] << (LONG_BIT-1); // zero, or 0x800..00
  58. l[0] = (l[0] >> 1) ^ carryIn;
  59. l[1] = (l[1] >> 1) ^ carryOut;
  60. # endif
  61. #else /* LITTLE_ENDIAN */
  62. // In order to use word-sized ops, little-endian needs to byteswap.
  63. // On x86, code size increase is ~10 bytes compared to byte-by-byte.
  64. unsigned long carryIn = (x[15] & 0x01)
  65. ? ((unsigned long)0xE1 << (LONG_BIT-8))
  66. : 0;
  67. # if ULONG_MAX <= 0xffffffff
  68. int i;
  69. for (i = 0; i < AES_BLOCK_SIZE/sizeof(long); i++) {
  70. unsigned long ti = SWAP_BE32(l[i]);
  71. unsigned long carryOut = ti << (LONG_BIT-1); // zero, or 0x800..00
  72. ti = (ti >> 1) ^ carryIn;
  73. l[i] = SWAP_BE32(ti);
  74. carryIn = carryOut;
  75. }
  76. # else
  77. // 64-bit code: need to process only 2 words
  78. unsigned long tt = SWAP_BE64(l[0]);
  79. unsigned long carryOut = tt << (LONG_BIT-1); // zero, or 0x800..00
  80. tt = (tt >> 1) ^ carryIn; l[0] = SWAP_BE64(tt);
  81. tt = SWAP_BE64(l[1]);
  82. tt = (tt >> 1) ^ carryOut; l[1] = SWAP_BE64(tt);
  83. # endif
  84. #endif /* LITTLE_ENDIAN */
  85. #undef l
  86. }
  87. // Caller guarantees X is aligned
  88. static void GMULT(byte* X, byte* Y)
  89. {
  90. byte Z[AES_BLOCK_SIZE] ALIGNED_long;
  91. //byte V[AES_BLOCK_SIZE] ALIGNED_long;
  92. int i;
  93. XMEMSET(Z, 0, AES_BLOCK_SIZE);
  94. //XMEMCPY(V, X, AES_BLOCK_SIZE);
  95. for (i = 0; i < AES_BLOCK_SIZE; i++) {
  96. uint32_t y = 0x800000 | Y[i];
  97. for (;;) { // for every bit in Y[i], from msb to lsb
  98. if (y & 0x80) {
  99. xorbuf_aligned_AES_BLOCK_SIZE(Z, X); // was V, not X
  100. }
  101. RIGHTSHIFTX(X); // was V, not X
  102. y = y << 1;
  103. if ((int32_t)y < 0) // if bit 0x80000000 set = if 8 iterations done
  104. break;
  105. }
  106. }
  107. XMEMCPY(X, Z, AES_BLOCK_SIZE);
  108. }
  109. //bbox:
  110. // for TLS AES-GCM, a (which is AAD) is always 13 bytes long, and bbox code provides
  111. // extra 3 zeroed bytes, making it a[16], or a[AES_BLOCK_SIZE].
  112. // Resulting auth tag in s[] is also always AES_BLOCK_SIZE bytes.
  113. //
  114. // This allows some simplifications.
  115. #define aSz 13
  116. #define sSz AES_BLOCK_SIZE
  117. void FAST_FUNC aesgcm_GHASH(byte* h,
  118. const byte* a, //unsigned aSz,
  119. const byte* c, unsigned cSz,
  120. byte* s //, unsigned sSz
  121. )
  122. {
  123. byte x[AES_BLOCK_SIZE] ALIGNED_long;
  124. // byte scratch[AES_BLOCK_SIZE] ALIGNED_long;
  125. unsigned blocks, partial;
  126. //was: byte* h = aes->H;
  127. //XMEMSET(x, 0, AES_BLOCK_SIZE);
  128. /* Hash in A, the Additional Authentication Data */
  129. // if (aSz != 0 && a != NULL) {
  130. // blocks = aSz / AES_BLOCK_SIZE;
  131. // partial = aSz % AES_BLOCK_SIZE;
  132. // while (blocks--) {
  133. //xorbuf(x, a, AES_BLOCK_SIZE);
  134. XMEMCPY(x, a, AES_BLOCK_SIZE);// memcpy(x,a) = memset(x,0)+xorbuf(x,a)
  135. GMULT(x, h);
  136. // a += AES_BLOCK_SIZE;
  137. // }
  138. // if (partial != 0) {
  139. // XMEMSET(scratch, 0, AES_BLOCK_SIZE);
  140. // XMEMCPY(scratch, a, partial);
  141. // xorbuf(x, scratch, AES_BLOCK_SIZE);
  142. // GMULT(x, h);
  143. // }
  144. // }
  145. /* Hash in C, the Ciphertext */
  146. if (cSz != 0 /*&& c != NULL*/) {
  147. blocks = cSz / AES_BLOCK_SIZE;
  148. partial = cSz % AES_BLOCK_SIZE;
  149. while (blocks--) {
  150. if (BB_UNALIGNED_MEMACCESS_OK) // c is not guaranteed to be aligned
  151. xorbuf_aligned_AES_BLOCK_SIZE(x, c);
  152. else
  153. xorbuf(x, c, AES_BLOCK_SIZE);
  154. GMULT(x, h);
  155. c += AES_BLOCK_SIZE;
  156. }
  157. if (partial != 0) {
  158. //XMEMSET(scratch, 0, AES_BLOCK_SIZE);
  159. //XMEMCPY(scratch, c, partial);
  160. //xorbuf(x, scratch, AES_BLOCK_SIZE);
  161. xorbuf(x, c, partial);//same result as above
  162. GMULT(x, h);
  163. }
  164. }
  165. /* Hash in the lengths of A and C in bits */
  166. //FlattenSzInBits(&scratch[0], aSz);
  167. //FlattenSzInBits(&scratch[8], cSz);
  168. //xorbuf_aligned_AES_BLOCK_SIZE(x, scratch);
  169. // simpler:
  170. #define P32(v) ((uint32_t*)v)
  171. //P32(x)[0] ^= 0;
  172. P32(x)[1] ^= SWAP_BE32(aSz * 8);
  173. //P32(x)[2] ^= 0;
  174. P32(x)[3] ^= SWAP_BE32(cSz * 8);
  175. #undef P32
  176. GMULT(x, h);
  177. /* Copy the result into s. */
  178. XMEMCPY(s, x, sSz);
  179. }