1
0

poly1305-donna-16.h 6.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201
  1. /*
  2. poly1305 implementation using 16 bit * 16 bit = 32 bit multiplication and 32 bit addition
  3. */
  4. #if defined(_MSC_VER)
  5. #define POLY1305_NOINLINE __declspec(noinline)
  6. #elif defined(__GNUC__)
  7. #define POLY1305_NOINLINE __attribute__((noinline))
  8. #else
  9. #define POLY1305_NOINLINE
  10. #endif
  11. #define poly1305_block_size 16
  12. /* 17 + sizeof(size_t) + 18*sizeof(unsigned short) */
  13. typedef struct poly1305_state_internal_t {
  14. unsigned char buffer[poly1305_block_size];
  15. size_t leftover;
  16. unsigned short r[10];
  17. unsigned short h[10];
  18. unsigned short pad[8];
  19. unsigned char final;
  20. } poly1305_state_internal_t;
  21. /* interpret two 8 bit unsigned integers as a 16 bit unsigned integer in little endian */
  22. static unsigned short
  23. U8TO16(const unsigned char *p) {
  24. return
  25. (((unsigned short)(p[0] & 0xff) ) |
  26. ((unsigned short)(p[1] & 0xff) << 8));
  27. }
  28. /* store a 16 bit unsigned integer as two 8 bit unsigned integers in little endian */
  29. static void
  30. U16TO8(unsigned char *p, unsigned short v) {
  31. p[0] = (v ) & 0xff;
  32. p[1] = (v >> 8) & 0xff;
  33. }
  34. void
  35. poly1305_init(poly1305_context *ctx, const unsigned char key[32]) {
  36. poly1305_state_internal_t *st = (poly1305_state_internal_t *)ctx;
  37. unsigned short t0,t1,t2,t3,t4,t5,t6,t7;
  38. size_t i;
  39. /* r &= 0xffffffc0ffffffc0ffffffc0fffffff */
  40. t0 = U8TO16(&key[ 0]); st->r[0] = ( t0 ) & 0x1fff;
  41. t1 = U8TO16(&key[ 2]); st->r[1] = ((t0 >> 13) | (t1 << 3)) & 0x1fff;
  42. t2 = U8TO16(&key[ 4]); st->r[2] = ((t1 >> 10) | (t2 << 6)) & 0x1f03;
  43. t3 = U8TO16(&key[ 6]); st->r[3] = ((t2 >> 7) | (t3 << 9)) & 0x1fff;
  44. t4 = U8TO16(&key[ 8]); st->r[4] = ((t3 >> 4) | (t4 << 12)) & 0x00ff;
  45. st->r[5] = ((t4 >> 1) ) & 0x1ffe;
  46. t5 = U8TO16(&key[10]); st->r[6] = ((t4 >> 14) | (t5 << 2)) & 0x1fff;
  47. t6 = U8TO16(&key[12]); st->r[7] = ((t5 >> 11) | (t6 << 5)) & 0x1f81;
  48. t7 = U8TO16(&key[14]); st->r[8] = ((t6 >> 8) | (t7 << 8)) & 0x1fff;
  49. st->r[9] = ((t7 >> 5) ) & 0x007f;
  50. /* h = 0 */
  51. for (i = 0; i < 10; i++)
  52. st->h[i] = 0;
  53. /* save pad for later */
  54. for (i = 0; i < 8; i++)
  55. st->pad[i] = U8TO16(&key[16 + (2 * i)]);
  56. st->leftover = 0;
  57. st->final = 0;
  58. }
  59. static void
  60. poly1305_blocks(poly1305_state_internal_t *st, const unsigned char *m, size_t bytes) {
  61. const unsigned short hibit = (st->final) ? 0 : (1 << 11); /* 1 << 128 */
  62. unsigned short t0,t1,t2,t3,t4,t5,t6,t7;
  63. unsigned long d[10];
  64. unsigned long c;
  65. while (bytes >= poly1305_block_size) {
  66. size_t i, j;
  67. /* h += m[i] */
  68. t0 = U8TO16(&m[ 0]); st->h[0] += ( t0 ) & 0x1fff;
  69. t1 = U8TO16(&m[ 2]); st->h[1] += ((t0 >> 13) | (t1 << 3)) & 0x1fff;
  70. t2 = U8TO16(&m[ 4]); st->h[2] += ((t1 >> 10) | (t2 << 6)) & 0x1fff;
  71. t3 = U8TO16(&m[ 6]); st->h[3] += ((t2 >> 7) | (t3 << 9)) & 0x1fff;
  72. t4 = U8TO16(&m[ 8]); st->h[4] += ((t3 >> 4) | (t4 << 12)) & 0x1fff;
  73. st->h[5] += ((t4 >> 1) ) & 0x1fff;
  74. t5 = U8TO16(&m[10]); st->h[6] += ((t4 >> 14) | (t5 << 2)) & 0x1fff;
  75. t6 = U8TO16(&m[12]); st->h[7] += ((t5 >> 11) | (t6 << 5)) & 0x1fff;
  76. t7 = U8TO16(&m[14]); st->h[8] += ((t6 >> 8) | (t7 << 8)) & 0x1fff;
  77. st->h[9] += ((t7 >> 5) ) | hibit;
  78. /* h *= r, (partial) h %= p */
  79. for (i = 0, c = 0; i < 10; i++) {
  80. d[i] = c;
  81. for (j = 0; j < 10; j++) {
  82. d[i] += (unsigned long)st->h[j] * ((j <= i) ? st->r[i - j] : (5 * st->r[i + 10 - j]));
  83. /* Sum(h[i] * r[i] * 5) will overflow slightly above 6 products with an unclamped r, so carry at 5 */
  84. if (j == 4) {
  85. c = (d[i] >> 13);
  86. d[i] &= 0x1fff;
  87. }
  88. }
  89. c += (d[i] >> 13);
  90. d[i] &= 0x1fff;
  91. }
  92. c = ((c << 2) + c); /* c *= 5 */
  93. c += d[0];
  94. d[0] = ((unsigned short)c & 0x1fff);
  95. c = (c >> 13);
  96. d[1] += c;
  97. for (i = 0; i < 10; i++)
  98. st->h[i] = (unsigned short)d[i];
  99. m += poly1305_block_size;
  100. bytes -= poly1305_block_size;
  101. }
  102. }
  103. POLY1305_NOINLINE void
  104. poly1305_finish(poly1305_context *ctx, unsigned char mac[16]) {
  105. poly1305_state_internal_t *st = (poly1305_state_internal_t *)ctx;
  106. unsigned short c;
  107. unsigned short g[10];
  108. unsigned short mask;
  109. unsigned long f;
  110. size_t i;
  111. /* process the remaining block */
  112. if (st->leftover) {
  113. size_t i = st->leftover;
  114. st->buffer[i++] = 1;
  115. for (; i < poly1305_block_size; i++)
  116. st->buffer[i] = 0;
  117. st->final = 1;
  118. poly1305_blocks(st, st->buffer, poly1305_block_size);
  119. }
  120. /* fully carry h */
  121. c = st->h[1] >> 13;
  122. st->h[1] &= 0x1fff;
  123. for (i = 2; i < 10; i++) {
  124. st->h[i] += c;
  125. c = st->h[i] >> 13;
  126. st->h[i] &= 0x1fff;
  127. }
  128. st->h[0] += (c * 5);
  129. c = st->h[0] >> 13;
  130. st->h[0] &= 0x1fff;
  131. st->h[1] += c;
  132. c = st->h[1] >> 13;
  133. st->h[1] &= 0x1fff;
  134. st->h[2] += c;
  135. /* compute h + -p */
  136. g[0] = st->h[0] + 5;
  137. c = g[0] >> 13;
  138. g[0] &= 0x1fff;
  139. for (i = 1; i < 10; i++) {
  140. g[i] = st->h[i] + c;
  141. c = g[i] >> 13;
  142. g[i] &= 0x1fff;
  143. }
  144. /* select h if h < p, or h + -p if h >= p */
  145. mask = (c ^ 1) - 1;
  146. for (i = 0; i < 10; i++)
  147. g[i] &= mask;
  148. mask = ~mask;
  149. for (i = 0; i < 10; i++)
  150. st->h[i] = (st->h[i] & mask) | g[i];
  151. /* h = h % (2^128) */
  152. st->h[0] = ((st->h[0] ) | (st->h[1] << 13) ) & 0xffff;
  153. st->h[1] = ((st->h[1] >> 3) | (st->h[2] << 10) ) & 0xffff;
  154. st->h[2] = ((st->h[2] >> 6) | (st->h[3] << 7) ) & 0xffff;
  155. st->h[3] = ((st->h[3] >> 9) | (st->h[4] << 4) ) & 0xffff;
  156. st->h[4] = ((st->h[4] >> 12) | (st->h[5] << 1) | (st->h[6] << 14)) & 0xffff;
  157. st->h[5] = ((st->h[6] >> 2) | (st->h[7] << 11) ) & 0xffff;
  158. st->h[6] = ((st->h[7] >> 5) | (st->h[8] << 8) ) & 0xffff;
  159. st->h[7] = ((st->h[8] >> 8) | (st->h[9] << 5) ) & 0xffff;
  160. /* mac = (h + pad) % (2^128) */
  161. f = (unsigned long)st->h[0] + st->pad[0];
  162. st->h[0] = (unsigned short)f;
  163. for (i = 1; i < 8; i++) {
  164. f = (unsigned long)st->h[i] + st->pad[i] + (f >> 16);
  165. st->h[i] = (unsigned short)f;
  166. }
  167. for (i = 0; i < 8; i++)
  168. U16TO8(mac + (i * 2), st->h[i]);
  169. /* zero out the state */
  170. for (i = 0; i < 10; i++)
  171. st->h[i] = 0;
  172. for (i = 0; i < 10; i++)
  173. st->r[i] = 0;
  174. for (i = 0; i < 8; i++)
  175. st->pad[i] = 0;
  176. }