auth.c 4.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195
  1. #include "crypto_onetimeauth.h"
  2. #include "crypto_uint32.h"
  3. #define addmulmod crypto_onetimeauth_poly1305_neon2_addmulmod
  4. #define blocks crypto_onetimeauth_poly1305_neon2_blocks
  5. typedef struct {
  6. crypto_uint32 v[12]; /* for alignment; only using 10 */
  7. } fe1305x2;
  8. extern void addmulmod(fe1305x2 *r, const fe1305x2 *x, const fe1305x2 *y, const fe1305x2 *c);
  9. extern int blocks(fe1305x2 *h, const fe1305x2 *precomp, const unsigned char *in, unsigned int inlen);
  10. static void freeze(fe1305x2 *r)
  11. {
  12. int i;
  13. crypto_uint32 x0 = r->v[0];
  14. crypto_uint32 x1 = r->v[2];
  15. crypto_uint32 x2 = r->v[4];
  16. crypto_uint32 x3 = r->v[6];
  17. crypto_uint32 x4 = r->v[8];
  18. crypto_uint32 y0;
  19. crypto_uint32 y1;
  20. crypto_uint32 y2;
  21. crypto_uint32 y3;
  22. crypto_uint32 y4;
  23. crypto_uint32 swap;
  24. for (i = 0;i < 3;++i) {
  25. x1 += x0 >> 26; x0 &= 0x3ffffff;
  26. x2 += x1 >> 26; x1 &= 0x3ffffff;
  27. x3 += x2 >> 26; x2 &= 0x3ffffff;
  28. x4 += x3 >> 26; x3 &= 0x3ffffff;
  29. x0 += 5*(x4 >> 26); x4 &= 0x3ffffff;
  30. }
  31. y0 = x0 + 5;
  32. y1 = x1 + (y0 >> 26); y0 &= 0x3ffffff;
  33. y2 = x2 + (y1 >> 26); y1 &= 0x3ffffff;
  34. y3 = x3 + (y2 >> 26); y2 &= 0x3ffffff;
  35. y4 = x4 + (y3 >> 26); y3 &= 0x3ffffff;
  36. swap = -(y4 >> 26); y4 &= 0x3ffffff;
  37. y0 ^= x0;
  38. y1 ^= x1;
  39. y2 ^= x2;
  40. y3 ^= x3;
  41. y4 ^= x4;
  42. y0 &= swap;
  43. y1 &= swap;
  44. y2 &= swap;
  45. y3 &= swap;
  46. y4 &= swap;
  47. y0 ^= x0;
  48. y1 ^= x1;
  49. y2 ^= x2;
  50. y3 ^= x3;
  51. y4 ^= x4;
  52. r->v[0] = y0;
  53. r->v[2] = y1;
  54. r->v[4] = y2;
  55. r->v[6] = y3;
  56. r->v[8] = y4;
  57. }
  58. static void fe1305x2_tobytearray(unsigned char *r, fe1305x2 *x)
  59. {
  60. crypto_uint32 x0 = x->v[0];
  61. crypto_uint32 x1 = x->v[2];
  62. crypto_uint32 x2 = x->v[4];
  63. crypto_uint32 x3 = x->v[6];
  64. crypto_uint32 x4 = x->v[8];
  65. x1 += x0 >> 26;
  66. x0 &= 0x3ffffff;
  67. x2 += x1 >> 26;
  68. x1 &= 0x3ffffff;
  69. x3 += x2 >> 26;
  70. x2 &= 0x3ffffff;
  71. x4 += x3 >> 26;
  72. x3 &= 0x3ffffff;
  73. *(crypto_uint32 *) r = x0 + (x1 << 26);
  74. *(crypto_uint32 *) (r + 4) = (x1 >> 6) + (x2 << 20);
  75. *(crypto_uint32 *) (r + 8) = (x2 >> 12) + (x3 << 14);
  76. *(crypto_uint32 *) (r + 12) = (x3 >> 18) + (x4 << 8);
  77. }
  78. static void fe1305x2_frombytearray(fe1305x2 *r, const unsigned char *x, unsigned long long xlen)
  79. {
  80. int i;
  81. unsigned char t[17];
  82. for (i = 0;(i < 16) && (i < xlen);i++) t[i] = x[i];
  83. xlen -= i;
  84. x += i;
  85. t[i++] = 1;
  86. for (;i<17;i++) t[i] = 0;
  87. r->v[0] = 0x3ffffff & *(crypto_uint32 *) t;
  88. r->v[2] = 0x3ffffff & ((*(crypto_uint32 *) (t + 3)) >> 2);
  89. r->v[4] = 0x3ffffff & ((*(crypto_uint32 *) (t + 6)) >> 4);
  90. r->v[6] = 0x3ffffff & ((*(crypto_uint32 *) (t + 9)) >> 6);
  91. r->v[8] = *(crypto_uint32 *) (t + 13);
  92. if (xlen) {
  93. for (i = 0;(i < 16) && (i < xlen);i++) t[i] = x[i];
  94. t[i++] = 1;
  95. for (;i<17;i++) t[i] = 0;
  96. r->v[1] = 0x3ffffff & *(crypto_uint32 *) t;
  97. r->v[3] = 0x3ffffff & ((*(crypto_uint32 *) (t + 3)) >> 2);
  98. r->v[5] = 0x3ffffff & ((*(crypto_uint32 *) (t + 6)) >> 4);
  99. r->v[7] = 0x3ffffff & ((*(crypto_uint32 *) (t + 9)) >> 6);
  100. r->v[9] = *(crypto_uint32 *) (t + 13);
  101. }
  102. else
  103. r->v[1] = r->v[3] = r->v[5] = r->v[7] = r->v[9] = 0;
  104. }
  105. static const fe1305x2 zero __attribute__ ((aligned (16)));
  106. int crypto_onetimeauth(unsigned char *out,const unsigned char *in,unsigned long long inlen,const unsigned char *k)
  107. {
  108. unsigned int j;
  109. unsigned int tlen;
  110. unsigned char data[sizeof(fe1305x2[5]) + 128];
  111. fe1305x2 *const r = (fe1305x2 *) (data + (15 & (-(int) data)));
  112. fe1305x2 *const h = r + 1;
  113. fe1305x2 *const c = h + 1;
  114. fe1305x2 *const precomp = c + 1;
  115. r->v[1] = r->v[0] = 0x3ffffff & *(crypto_uint32 *) k;
  116. r->v[3] = r->v[2] = 0x3ffff03 & ((*(crypto_uint32 *) (k + 3)) >> 2);
  117. r->v[5] = r->v[4] = 0x3ffc0ff & ((*(crypto_uint32 *) (k + 6)) >> 4);
  118. r->v[7] = r->v[6] = 0x3f03fff & ((*(crypto_uint32 *) (k + 9)) >> 6);
  119. r->v[9] = r->v[8] = 0x00fffff & ((*(crypto_uint32 *) (k + 12)) >> 8);
  120. for (j = 0;j < 10;j++) h->v[j] = 0; /* XXX: should fast-forward a bit */
  121. addmulmod(precomp,r,r,&zero); /* precompute r^2 */
  122. if (inlen >= 64)
  123. addmulmod(precomp + 1,precomp,precomp,&zero); /* precompute r^4 */
  124. while (inlen > 32) {
  125. tlen = 1048576;
  126. if (inlen < 1048576) tlen = inlen;
  127. tlen -= blocks(h, precomp, in, tlen);
  128. inlen -= tlen;
  129. in += tlen;
  130. }
  131. addmulmod(h,h,precomp,&zero);
  132. if (inlen > 16) {
  133. fe1305x2_frombytearray(c,in,inlen);
  134. precomp->v[1] = r->v[1];
  135. precomp->v[3] = r->v[3];
  136. precomp->v[5] = r->v[5];
  137. precomp->v[7] = r->v[7];
  138. precomp->v[9] = r->v[9];
  139. addmulmod(h,h,precomp,c);
  140. } else if (inlen > 0) {
  141. fe1305x2_frombytearray(c,in,inlen);
  142. r->v[1] = 1;
  143. r->v[3] = 0;
  144. r->v[5] = 0;
  145. r->v[7] = 0;
  146. r->v[9] = 0;
  147. addmulmod(h,h,r,c);
  148. }
  149. h->v[0] += h->v[1];
  150. h->v[2] += h->v[3];
  151. h->v[4] += h->v[5];
  152. h->v[6] += h->v[7];
  153. h->v[8] += h->v[9];
  154. freeze(h);
  155. fe1305x2_frombytearray(c,k+16,16);
  156. c->v[8] ^= (1 << 24);
  157. h->v[0] += c->v[0];
  158. h->v[2] += c->v[2];
  159. h->v[4] += c->v[4];
  160. h->v[6] += c->v[6];
  161. h->v[8] += c->v[8];
  162. fe1305x2_tobytearray(out,h);
  163. return 0;
  164. }