hash_md5_sha.c 56 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908190919101911191219131914191519161917191819191920192119221923192419251926192719281929
  1. /* vi: set sw=4 ts=4: */
  2. /*
  3. * Utility routines.
  4. *
  5. * Copyright (C) 2010 Denys Vlasenko
  6. *
  7. * Licensed under GPLv2 or later, see file LICENSE in this source tree.
  8. */
  9. #include "libbb.h"
  10. #define STR1(s) #s
  11. #define STR(s) STR1(s)
  12. #define NEED_SHA512 (ENABLE_SHA512SUM || ENABLE_USE_BB_CRYPT_SHA)
  13. #if ENABLE_SHA1_HWACCEL || ENABLE_SHA256_HWACCEL
  14. # if defined(__GNUC__) && (defined(__i386__) || defined(__x86_64__))
  15. static void cpuid_eax_ebx_ecx(unsigned *eax, unsigned *ebx, unsigned *ecx, unsigned *edx)
  16. {
  17. asm ("cpuid"
  18. : "=a"(*eax), "=b"(*ebx), "=c"(*ecx), "=d"(*edx)
  19. : "0" (*eax), "1" (*ebx), "2" (*ecx)
  20. );
  21. }
  22. static smallint shaNI;
  23. static NOINLINE int get_shaNI(void)
  24. {
  25. /* Get leaf 7 subleaf 0. Exists on all CPUs since Merom (2006).
  26. * "If a value entered for CPUID.EAX is higher than the maximum
  27. * input value for basic or extended function for that processor
  28. * then the data for the highest basic information leaf is returned".
  29. * This means that Pentiums 4 would return leaf 5 or 6 instead of 7,
  30. * which happen to have zero in EBX bit 29. Thus they should work too.
  31. */
  32. unsigned eax = 7;
  33. unsigned ecx = 0;
  34. unsigned ebx = 0; /* should not be needed, paranoia */
  35. unsigned edx;
  36. cpuid_eax_ebx_ecx(&eax, &ebx, &ecx, &edx);
  37. ebx = ((ebx >> 28) & 2) - 1; /* bit 29 -> 1 or -1 */
  38. shaNI = (int)ebx;
  39. return (int)ebx;
  40. }
  41. void FAST_FUNC sha1_process_block64_shaNI(sha1_ctx_t *ctx);
  42. void FAST_FUNC sha256_process_block64_shaNI(sha256_ctx_t *ctx);
  43. # if defined(__i386__)
  44. struct ASM_expects_76_shaNI { char t[1 - 2*(offsetof(sha256_ctx_t, hash) != 76)]; };
  45. # endif
  46. # if defined(__x86_64__)
  47. struct ASM_expects_80_shaNI { char t[1 - 2*(offsetof(sha256_ctx_t, hash) != 80)]; };
  48. # endif
  49. # endif
  50. #endif
  51. /* gcc 4.2.1 optimizes rotr64 better with inline than with macro
  52. * (for rotX32, there is no difference). Why? My guess is that
  53. * macro requires clever common subexpression elimination heuristics
  54. * in gcc, while inline basically forces it to happen.
  55. */
  56. //#define rotl32(x,n) (((x) << (n)) | ((x) >> (32 - (n))))
  57. static ALWAYS_INLINE uint32_t rotl32(uint32_t x, unsigned n)
  58. {
  59. return (x << n) | (x >> (32 - n));
  60. }
  61. //#define rotr32(x,n) (((x) >> (n)) | ((x) << (32 - (n))))
  62. static ALWAYS_INLINE uint32_t rotr32(uint32_t x, unsigned n)
  63. {
  64. return (x >> n) | (x << (32 - n));
  65. }
  66. /* rotr64 in needed for sha512 only: */
  67. //#define rotr64(x,n) (((x) >> (n)) | ((x) << (64 - (n))))
  68. static ALWAYS_INLINE uint64_t rotr64(uint64_t x, unsigned n)
  69. {
  70. return (x >> n) | (x << (64 - n));
  71. }
  72. /* rotl64 only used for sha3 currently */
  73. static ALWAYS_INLINE uint64_t rotl64(uint64_t x, unsigned n)
  74. {
  75. return (x << n) | (x >> (64 - n));
  76. }
  77. /* Process the remaining bytes in the buffer */
  78. static void FAST_FUNC common64_end(md5_ctx_t *ctx, int swap_needed)
  79. {
  80. unsigned bufpos = ctx->total64 & 63;
  81. /* Pad the buffer to the next 64-byte boundary with 0x80,0,0,0... */
  82. ctx->wbuffer[bufpos++] = 0x80;
  83. /* This loop iterates either once or twice, no more, no less */
  84. while (1) {
  85. unsigned remaining = 64 - bufpos;
  86. memset(ctx->wbuffer + bufpos, 0, remaining);
  87. /* Do we have enough space for the length count? */
  88. if (remaining >= 8) {
  89. /* Store the 64-bit counter of bits in the buffer */
  90. uint64_t t = ctx->total64 << 3;
  91. if (swap_needed)
  92. t = bb_bswap_64(t);
  93. /* wbuffer is suitably aligned for this */
  94. *(bb__aliased_uint64_t *) (&ctx->wbuffer[64 - 8]) = t;
  95. }
  96. ctx->process_block(ctx);
  97. if (remaining >= 8)
  98. break;
  99. bufpos = 0;
  100. }
  101. }
  102. /*
  103. * Compute MD5 checksum of strings according to the
  104. * definition of MD5 in RFC 1321 from April 1992.
  105. *
  106. * Written by Ulrich Drepper <drepper@gnu.ai.mit.edu>, 1995.
  107. *
  108. * Copyright (C) 1995-1999 Free Software Foundation, Inc.
  109. * Copyright (C) 2001 Manuel Novoa III
  110. * Copyright (C) 2003 Glenn L. McGrath
  111. * Copyright (C) 2003 Erik Andersen
  112. *
  113. * Licensed under GPLv2 or later, see file LICENSE in this source tree.
  114. */
  115. /* 0: fastest, 3: smallest */
  116. #if CONFIG_MD5_SMALL < 0
  117. # define MD5_SMALL 0
  118. #elif CONFIG_MD5_SMALL > 3
  119. # define MD5_SMALL 3
  120. #else
  121. # define MD5_SMALL CONFIG_MD5_SMALL
  122. #endif
  123. /* These are the four functions used in the four steps of the MD5 algorithm
  124. * and defined in the RFC 1321. The first function is a little bit optimized
  125. * (as found in Colin Plumbs public domain implementation).
  126. * #define FF(b, c, d) ((b & c) | (~b & d))
  127. */
  128. #undef FF
  129. #undef FG
  130. #undef FH
  131. #undef FI
  132. #define FF(b, c, d) (d ^ (b & (c ^ d)))
  133. #define FG(b, c, d) FF(d, b, c)
  134. #define FH(b, c, d) (b ^ c ^ d)
  135. #define FI(b, c, d) (c ^ (b | ~d))
  136. /* Hash a single block, 64 bytes long and 4-byte aligned */
  137. static void FAST_FUNC md5_process_block64(md5_ctx_t *ctx)
  138. {
  139. #if MD5_SMALL > 0
  140. /* Before we start, one word to the strange constants.
  141. They are defined in RFC 1321 as
  142. T[i] = (int)(2^32 * fabs(sin(i))), i=1..64
  143. */
  144. static const uint32_t C_array[] ALIGN4 = {
  145. /* round 1 */
  146. 0xd76aa478, 0xe8c7b756, 0x242070db, 0xc1bdceee,
  147. 0xf57c0faf, 0x4787c62a, 0xa8304613, 0xfd469501,
  148. 0x698098d8, 0x8b44f7af, 0xffff5bb1, 0x895cd7be,
  149. 0x6b901122, 0xfd987193, 0xa679438e, 0x49b40821,
  150. /* round 2 */
  151. 0xf61e2562, 0xc040b340, 0x265e5a51, 0xe9b6c7aa,
  152. 0xd62f105d, 0x02441453, 0xd8a1e681, 0xe7d3fbc8,
  153. 0x21e1cde6, 0xc33707d6, 0xf4d50d87, 0x455a14ed,
  154. 0xa9e3e905, 0xfcefa3f8, 0x676f02d9, 0x8d2a4c8a,
  155. /* round 3 */
  156. 0xfffa3942, 0x8771f681, 0x6d9d6122, 0xfde5380c,
  157. 0xa4beea44, 0x4bdecfa9, 0xf6bb4b60, 0xbebfbc70,
  158. 0x289b7ec6, 0xeaa127fa, 0xd4ef3085, 0x4881d05,
  159. 0xd9d4d039, 0xe6db99e5, 0x1fa27cf8, 0xc4ac5665,
  160. /* round 4 */
  161. 0xf4292244, 0x432aff97, 0xab9423a7, 0xfc93a039,
  162. 0x655b59c3, 0x8f0ccc92, 0xffeff47d, 0x85845dd1,
  163. 0x6fa87e4f, 0xfe2ce6e0, 0xa3014314, 0x4e0811a1,
  164. 0xf7537e82, 0xbd3af235, 0x2ad7d2bb, 0xeb86d391
  165. };
  166. static const char P_array[] ALIGN1 = {
  167. # if MD5_SMALL > 1
  168. 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, /* 1 */
  169. # endif
  170. 1, 6, 11, 0, 5, 10, 15, 4, 9, 14, 3, 8, 13, 2, 7, 12, /* 2 */
  171. 5, 8, 11, 14, 1, 4, 7, 10, 13, 0, 3, 6, 9, 12, 15, 2, /* 3 */
  172. 0, 7, 14, 5, 12, 3, 10, 1, 8, 15, 6, 13, 4, 11, 2, 9 /* 4 */
  173. };
  174. #endif
  175. uint32_t *words = (void*) ctx->wbuffer;
  176. uint32_t A = ctx->hash[0];
  177. uint32_t B = ctx->hash[1];
  178. uint32_t C = ctx->hash[2];
  179. uint32_t D = ctx->hash[3];
  180. #if MD5_SMALL >= 2 /* 2 or 3 */
  181. static const char S_array[] ALIGN1 = {
  182. 7, 12, 17, 22,
  183. 5, 9, 14, 20,
  184. 4, 11, 16, 23,
  185. 6, 10, 15, 21
  186. };
  187. const uint32_t *pc;
  188. const char *pp;
  189. const char *ps;
  190. int i;
  191. uint32_t temp;
  192. if (BB_BIG_ENDIAN)
  193. for (i = 0; i < 16; i++)
  194. words[i] = SWAP_LE32(words[i]);
  195. # if MD5_SMALL == 3
  196. pc = C_array;
  197. pp = P_array;
  198. ps = S_array - 4;
  199. for (i = 0; i < 64; i++) {
  200. if ((i & 0x0f) == 0)
  201. ps += 4;
  202. temp = A;
  203. switch (i >> 4) {
  204. case 0:
  205. temp += FF(B, C, D);
  206. break;
  207. case 1:
  208. temp += FG(B, C, D);
  209. break;
  210. case 2:
  211. temp += FH(B, C, D);
  212. break;
  213. default: /* case 3 */
  214. temp += FI(B, C, D);
  215. }
  216. temp += words[(int) (*pp++)] + *pc++;
  217. temp = rotl32(temp, ps[i & 3]);
  218. temp += B;
  219. A = D;
  220. D = C;
  221. C = B;
  222. B = temp;
  223. }
  224. # else /* MD5_SMALL == 2 */
  225. pc = C_array;
  226. pp = P_array;
  227. ps = S_array;
  228. for (i = 0; i < 16; i++) {
  229. temp = A + FF(B, C, D) + words[(int) (*pp++)] + *pc++;
  230. temp = rotl32(temp, ps[i & 3]);
  231. temp += B;
  232. A = D;
  233. D = C;
  234. C = B;
  235. B = temp;
  236. }
  237. ps += 4;
  238. for (i = 0; i < 16; i++) {
  239. temp = A + FG(B, C, D) + words[(int) (*pp++)] + *pc++;
  240. temp = rotl32(temp, ps[i & 3]);
  241. temp += B;
  242. A = D;
  243. D = C;
  244. C = B;
  245. B = temp;
  246. }
  247. ps += 4;
  248. for (i = 0; i < 16; i++) {
  249. temp = A + FH(B, C, D) + words[(int) (*pp++)] + *pc++;
  250. temp = rotl32(temp, ps[i & 3]);
  251. temp += B;
  252. A = D;
  253. D = C;
  254. C = B;
  255. B = temp;
  256. }
  257. ps += 4;
  258. for (i = 0; i < 16; i++) {
  259. temp = A + FI(B, C, D) + words[(int) (*pp++)] + *pc++;
  260. temp = rotl32(temp, ps[i & 3]);
  261. temp += B;
  262. A = D;
  263. D = C;
  264. C = B;
  265. B = temp;
  266. }
  267. # endif
  268. /* Add checksum to the starting values */
  269. ctx->hash[0] += A;
  270. ctx->hash[1] += B;
  271. ctx->hash[2] += C;
  272. ctx->hash[3] += D;
  273. #else /* MD5_SMALL == 0 or 1 */
  274. # if MD5_SMALL == 1
  275. const uint32_t *pc;
  276. const char *pp;
  277. int i;
  278. # endif
  279. /* First round: using the given function, the context and a constant
  280. the next context is computed. Because the algorithm's processing
  281. unit is a 32-bit word and it is determined to work on words in
  282. little endian byte order we perhaps have to change the byte order
  283. before the computation. To reduce the work for the next steps
  284. we save swapped words in WORDS array. */
  285. # undef OP
  286. # define OP(a, b, c, d, s, T) \
  287. do { \
  288. a += FF(b, c, d) + (*words IF_BIG_ENDIAN(= SWAP_LE32(*words))) + T; \
  289. words++; \
  290. a = rotl32(a, s); \
  291. a += b; \
  292. } while (0)
  293. /* Round 1 */
  294. # if MD5_SMALL == 1
  295. pc = C_array;
  296. for (i = 0; i < 4; i++) {
  297. OP(A, B, C, D, 7, *pc++);
  298. OP(D, A, B, C, 12, *pc++);
  299. OP(C, D, A, B, 17, *pc++);
  300. OP(B, C, D, A, 22, *pc++);
  301. }
  302. # else
  303. OP(A, B, C, D, 7, 0xd76aa478);
  304. OP(D, A, B, C, 12, 0xe8c7b756);
  305. OP(C, D, A, B, 17, 0x242070db);
  306. OP(B, C, D, A, 22, 0xc1bdceee);
  307. OP(A, B, C, D, 7, 0xf57c0faf);
  308. OP(D, A, B, C, 12, 0x4787c62a);
  309. OP(C, D, A, B, 17, 0xa8304613);
  310. OP(B, C, D, A, 22, 0xfd469501);
  311. OP(A, B, C, D, 7, 0x698098d8);
  312. OP(D, A, B, C, 12, 0x8b44f7af);
  313. OP(C, D, A, B, 17, 0xffff5bb1);
  314. OP(B, C, D, A, 22, 0x895cd7be);
  315. OP(A, B, C, D, 7, 0x6b901122);
  316. OP(D, A, B, C, 12, 0xfd987193);
  317. OP(C, D, A, B, 17, 0xa679438e);
  318. OP(B, C, D, A, 22, 0x49b40821);
  319. # endif
  320. words -= 16;
  321. /* For the second to fourth round we have the possibly swapped words
  322. in WORDS. Redefine the macro to take an additional first
  323. argument specifying the function to use. */
  324. # undef OP
  325. # define OP(f, a, b, c, d, k, s, T) \
  326. do { \
  327. a += f(b, c, d) + words[k] + T; \
  328. a = rotl32(a, s); \
  329. a += b; \
  330. } while (0)
  331. /* Round 2 */
  332. # if MD5_SMALL == 1
  333. pp = P_array;
  334. for (i = 0; i < 4; i++) {
  335. OP(FG, A, B, C, D, (int) (*pp++), 5, *pc++);
  336. OP(FG, D, A, B, C, (int) (*pp++), 9, *pc++);
  337. OP(FG, C, D, A, B, (int) (*pp++), 14, *pc++);
  338. OP(FG, B, C, D, A, (int) (*pp++), 20, *pc++);
  339. }
  340. # else
  341. OP(FG, A, B, C, D, 1, 5, 0xf61e2562);
  342. OP(FG, D, A, B, C, 6, 9, 0xc040b340);
  343. OP(FG, C, D, A, B, 11, 14, 0x265e5a51);
  344. OP(FG, B, C, D, A, 0, 20, 0xe9b6c7aa);
  345. OP(FG, A, B, C, D, 5, 5, 0xd62f105d);
  346. OP(FG, D, A, B, C, 10, 9, 0x02441453);
  347. OP(FG, C, D, A, B, 15, 14, 0xd8a1e681);
  348. OP(FG, B, C, D, A, 4, 20, 0xe7d3fbc8);
  349. OP(FG, A, B, C, D, 9, 5, 0x21e1cde6);
  350. OP(FG, D, A, B, C, 14, 9, 0xc33707d6);
  351. OP(FG, C, D, A, B, 3, 14, 0xf4d50d87);
  352. OP(FG, B, C, D, A, 8, 20, 0x455a14ed);
  353. OP(FG, A, B, C, D, 13, 5, 0xa9e3e905);
  354. OP(FG, D, A, B, C, 2, 9, 0xfcefa3f8);
  355. OP(FG, C, D, A, B, 7, 14, 0x676f02d9);
  356. OP(FG, B, C, D, A, 12, 20, 0x8d2a4c8a);
  357. # endif
  358. /* Round 3 */
  359. # if MD5_SMALL == 1
  360. for (i = 0; i < 4; i++) {
  361. OP(FH, A, B, C, D, (int) (*pp++), 4, *pc++);
  362. OP(FH, D, A, B, C, (int) (*pp++), 11, *pc++);
  363. OP(FH, C, D, A, B, (int) (*pp++), 16, *pc++);
  364. OP(FH, B, C, D, A, (int) (*pp++), 23, *pc++);
  365. }
  366. # else
  367. OP(FH, A, B, C, D, 5, 4, 0xfffa3942);
  368. OP(FH, D, A, B, C, 8, 11, 0x8771f681);
  369. OP(FH, C, D, A, B, 11, 16, 0x6d9d6122);
  370. OP(FH, B, C, D, A, 14, 23, 0xfde5380c);
  371. OP(FH, A, B, C, D, 1, 4, 0xa4beea44);
  372. OP(FH, D, A, B, C, 4, 11, 0x4bdecfa9);
  373. OP(FH, C, D, A, B, 7, 16, 0xf6bb4b60);
  374. OP(FH, B, C, D, A, 10, 23, 0xbebfbc70);
  375. OP(FH, A, B, C, D, 13, 4, 0x289b7ec6);
  376. OP(FH, D, A, B, C, 0, 11, 0xeaa127fa);
  377. OP(FH, C, D, A, B, 3, 16, 0xd4ef3085);
  378. OP(FH, B, C, D, A, 6, 23, 0x04881d05);
  379. OP(FH, A, B, C, D, 9, 4, 0xd9d4d039);
  380. OP(FH, D, A, B, C, 12, 11, 0xe6db99e5);
  381. OP(FH, C, D, A, B, 15, 16, 0x1fa27cf8);
  382. OP(FH, B, C, D, A, 2, 23, 0xc4ac5665);
  383. # endif
  384. /* Round 4 */
  385. # if MD5_SMALL == 1
  386. for (i = 0; i < 4; i++) {
  387. OP(FI, A, B, C, D, (int) (*pp++), 6, *pc++);
  388. OP(FI, D, A, B, C, (int) (*pp++), 10, *pc++);
  389. OP(FI, C, D, A, B, (int) (*pp++), 15, *pc++);
  390. OP(FI, B, C, D, A, (int) (*pp++), 21, *pc++);
  391. }
  392. # else
  393. OP(FI, A, B, C, D, 0, 6, 0xf4292244);
  394. OP(FI, D, A, B, C, 7, 10, 0x432aff97);
  395. OP(FI, C, D, A, B, 14, 15, 0xab9423a7);
  396. OP(FI, B, C, D, A, 5, 21, 0xfc93a039);
  397. OP(FI, A, B, C, D, 12, 6, 0x655b59c3);
  398. OP(FI, D, A, B, C, 3, 10, 0x8f0ccc92);
  399. OP(FI, C, D, A, B, 10, 15, 0xffeff47d);
  400. OP(FI, B, C, D, A, 1, 21, 0x85845dd1);
  401. OP(FI, A, B, C, D, 8, 6, 0x6fa87e4f);
  402. OP(FI, D, A, B, C, 15, 10, 0xfe2ce6e0);
  403. OP(FI, C, D, A, B, 6, 15, 0xa3014314);
  404. OP(FI, B, C, D, A, 13, 21, 0x4e0811a1);
  405. OP(FI, A, B, C, D, 4, 6, 0xf7537e82);
  406. OP(FI, D, A, B, C, 11, 10, 0xbd3af235);
  407. OP(FI, C, D, A, B, 2, 15, 0x2ad7d2bb);
  408. OP(FI, B, C, D, A, 9, 21, 0xeb86d391);
  409. # endif
  410. /* Add checksum to the starting values */
  411. ctx->hash[0] += A;
  412. ctx->hash[1] += B;
  413. ctx->hash[2] += C;
  414. ctx->hash[3] += D;
  415. #endif
  416. }
  417. #undef OP
  418. #undef FF
  419. #undef FG
  420. #undef FH
  421. #undef FI
  422. /* Initialize structure containing state of computation.
  423. * (RFC 1321, 3.3: Step 3)
  424. */
  425. void FAST_FUNC md5_begin(md5_ctx_t *ctx)
  426. {
  427. ctx->hash[0] = 0x67452301;
  428. ctx->hash[1] = 0xefcdab89;
  429. ctx->hash[2] = 0x98badcfe;
  430. ctx->hash[3] = 0x10325476;
  431. ctx->total64 = 0;
  432. ctx->process_block = md5_process_block64;
  433. }
  434. /* Used also for sha1 and sha256 */
  435. void FAST_FUNC md5_hash(md5_ctx_t *ctx, const void *buffer, size_t len)
  436. {
  437. unsigned bufpos = ctx->total64 & 63;
  438. ctx->total64 += len;
  439. while (1) {
  440. unsigned remaining = 64 - bufpos;
  441. if (remaining > len)
  442. remaining = len;
  443. /* Copy data into aligned buffer */
  444. memcpy(ctx->wbuffer + bufpos, buffer, remaining);
  445. len -= remaining;
  446. buffer = (const char *)buffer + remaining;
  447. bufpos += remaining;
  448. /* Clever way to do "if (bufpos != N) break; ... ; bufpos = 0;" */
  449. bufpos -= 64;
  450. if (bufpos != 0)
  451. break;
  452. /* Buffer is filled up, process it */
  453. ctx->process_block(ctx);
  454. /*bufpos = 0; - already is */
  455. }
  456. }
  457. /* Process the remaining bytes in the buffer and put result from CTX
  458. * in first 16 bytes following RESBUF. The result is always in little
  459. * endian byte order, so that a byte-wise output yields to the wanted
  460. * ASCII representation of the message digest.
  461. */
  462. unsigned FAST_FUNC md5_end(md5_ctx_t *ctx, void *resbuf)
  463. {
  464. /* MD5 stores total in LE, need to swap on BE arches: */
  465. common64_end(ctx, /*swap_needed:*/ BB_BIG_ENDIAN);
  466. /* The MD5 result is in little endian byte order */
  467. if (BB_BIG_ENDIAN) {
  468. ctx->hash[0] = SWAP_LE32(ctx->hash[0]);
  469. ctx->hash[1] = SWAP_LE32(ctx->hash[1]);
  470. ctx->hash[2] = SWAP_LE32(ctx->hash[2]);
  471. ctx->hash[3] = SWAP_LE32(ctx->hash[3]);
  472. }
  473. memcpy(resbuf, ctx->hash, sizeof(ctx->hash[0]) * 4);
  474. return sizeof(ctx->hash[0]) * 4;
  475. }
  476. /*
  477. * SHA1 part is:
  478. * Copyright 2007 Rob Landley <rob@landley.net>
  479. *
  480. * Based on the public domain SHA-1 in C by Steve Reid <steve@edmweb.com>
  481. * from http://www.mirrors.wiretapped.net/security/cryptography/hashes/sha1/
  482. *
  483. * Licensed under GPLv2, see file LICENSE in this source tree.
  484. *
  485. * ---------------------------------------------------------------------------
  486. *
  487. * SHA256 and SHA512 parts are:
  488. * Released into the Public Domain by Ulrich Drepper <drepper@redhat.com>.
  489. * Shrank by Denys Vlasenko.
  490. *
  491. * ---------------------------------------------------------------------------
  492. *
  493. * The best way to test random blocksizes is to go to coreutils/md5_sha1_sum.c
  494. * and replace "4096" with something like "2000 + time(NULL) % 2097",
  495. * then rebuild and compare "shaNNNsum bigfile" results.
  496. */
  497. #if CONFIG_SHA1_SMALL == 0
  498. # if defined(__GNUC__) && defined(__i386__)
  499. static void FAST_FUNC sha1_process_block64(sha1_ctx_t *ctx UNUSED_PARAM)
  500. {
  501. BUILD_BUG_ON(offsetof(sha1_ctx_t, hash) != 76);
  502. asm(
  503. "\n\
  504. pushl %ebp # \n\
  505. pushl %edi # \n\
  506. pushl %esi # \n\
  507. pushl %ebx # \n\
  508. pushl %eax \n\
  509. movl $15, %edi \n\
  510. 1: \n\
  511. movl (%eax,%edi,4), %esi \n\
  512. bswap %esi \n\
  513. pushl %esi \n\
  514. decl %edi \n\
  515. jns 1b \n\
  516. movl 80(%eax), %ebx # b = ctx->hash[1] \n\
  517. movl 84(%eax), %ecx # c = ctx->hash[2] \n\
  518. movl 88(%eax), %edx # d = ctx->hash[3] \n\
  519. movl 92(%eax), %ebp # e = ctx->hash[4] \n\
  520. movl 76(%eax), %eax # a = ctx->hash[0] \n\
  521. #Register and stack use: \n\
  522. # eax..edx: a..d \n\
  523. # ebp: e \n\
  524. # esi,edi: temps \n\
  525. # 4*n(%esp): W[n] \n\
  526. "
  527. #define RD1As(a,b,c,d,e, n, RCONST) \
  528. "\n\
  529. ##movl 4*"n"(%esp), %esi # n=0, W[0] already in %esi \n\
  530. movl "c", %edi # c \n\
  531. xorl "d", %edi # ^d \n\
  532. andl "b", %edi # &b \n\
  533. xorl "d", %edi # (((c ^ d) & b) ^ d) \n\
  534. leal "RCONST"("e",%esi), "e" # e += RCONST + W[n] \n\
  535. addl %edi, "e" # e += (((c ^ d) & b) ^ d) \n\
  536. movl "a", %esi # \n\
  537. roll $5, %esi # rotl32(a,5) \n\
  538. addl %esi, "e" # e += rotl32(a,5) \n\
  539. rorl $2, "b" # b = rotl32(b,30) \n\
  540. "
  541. #define RD1Bs(a,b,c,d,e, n, RCONST) \
  542. "\n\
  543. movl 4*"n"(%esp), %esi # W[n] \n\
  544. movl "c", %edi # c \n\
  545. xorl "d", %edi # ^d \n\
  546. andl "b", %edi # &b \n\
  547. xorl "d", %edi # (((c ^ d) & b) ^ d) \n\
  548. leal "RCONST"("e",%esi), "e" # e += RCONST + W[n] \n\
  549. addl %edi, "e" # e += (((c ^ d) & b) ^ d) \n\
  550. movl "a", %esi # \n\
  551. roll $5, %esi # rotl32(a,5) \n\
  552. addl %esi, "e" # e += rotl32(a,5) \n\
  553. rorl $2, "b" # b = rotl32(b,30) \n\
  554. "
  555. #define RD1Cs(a,b,c,d,e, n13,n8,n2,n, RCONST) \
  556. "\n\
  557. movl 4*"n13"(%esp), %esi # W[(n+13) & 15] \n\
  558. xorl 4*"n8"(%esp), %esi # ^W[(n+8) & 15] \n\
  559. xorl 4*"n2"(%esp), %esi # ^W[(n+2) & 15] \n\
  560. xorl 4*"n"(%esp), %esi # ^W[n & 15] \n\
  561. roll %esi # \n\
  562. movl %esi, 4*"n"(%esp) # store to W[n & 15] \n\
  563. movl "c", %edi # c \n\
  564. xorl "d", %edi # ^d \n\
  565. andl "b", %edi # &b \n\
  566. xorl "d", %edi # (((c ^ d) & b) ^ d) \n\
  567. leal "RCONST"("e",%esi), "e" # e += RCONST + mixed_W \n\
  568. addl %edi, "e" # e += (((c ^ d) & b) ^ d) \n\
  569. movl "a", %esi # \n\
  570. roll $5, %esi # rotl32(a,5) \n\
  571. addl %esi, "e" # e += rotl32(a,5) \n\
  572. rorl $2, "b" # b = rotl32(b,30) \n\
  573. "
  574. #define RD1A(a,b,c,d,e, n) RD1As("%e"STR(a),"%e"STR(b),"%e"STR(c),"%e"STR(d),"%e"STR(e), STR((n)), STR(RCONST))
  575. #define RD1B(a,b,c,d,e, n) RD1Bs("%e"STR(a),"%e"STR(b),"%e"STR(c),"%e"STR(d),"%e"STR(e), STR((n)), STR(RCONST))
  576. #define RD1C(a,b,c,d,e, n) RD1Cs("%e"STR(a),"%e"STR(b),"%e"STR(c),"%e"STR(d),"%e"STR(e), STR(((n+13)&15)), STR(((n+8)&15)), STR(((n+2)&15)), STR(((n)&15)), STR(RCONST))
  577. #undef RCONST
  578. #define RCONST 0x5A827999
  579. RD1A(ax,bx,cx,dx,bp, 0) RD1B(bp,ax,bx,cx,dx, 1) RD1B(dx,bp,ax,bx,cx, 2) RD1B(cx,dx,bp,ax,bx, 3) RD1B(bx,cx,dx,bp,ax, 4)
  580. RD1B(ax,bx,cx,dx,bp, 5) RD1B(bp,ax,bx,cx,dx, 6) RD1B(dx,bp,ax,bx,cx, 7) RD1B(cx,dx,bp,ax,bx, 8) RD1B(bx,cx,dx,bp,ax, 9)
  581. RD1B(ax,bx,cx,dx,bp,10) RD1B(bp,ax,bx,cx,dx,11) RD1B(dx,bp,ax,bx,cx,12) RD1B(cx,dx,bp,ax,bx,13) RD1B(bx,cx,dx,bp,ax,14)
  582. RD1B(ax,bx,cx,dx,bp,15) RD1C(bp,ax,bx,cx,dx,16) RD1C(dx,bp,ax,bx,cx,17) RD1C(cx,dx,bp,ax,bx,18) RD1C(bx,cx,dx,bp,ax,19)
  583. #define RD2s(a,b,c,d,e, n13,n8,n2,n, RCONST) \
  584. "\n\
  585. movl 4*"n13"(%esp), %esi # W[(n+13) & 15] \n\
  586. xorl 4*"n8"(%esp), %esi # ^W[(n+8) & 15] \n\
  587. xorl 4*"n2"(%esp), %esi # ^W[(n+2) & 15] \n\
  588. xorl 4*"n"(%esp), %esi # ^W[n & 15] \n\
  589. roll %esi # \n\
  590. movl %esi, 4*"n"(%esp) # store to W[n & 15] \n\
  591. movl "c", %edi # c \n\
  592. xorl "d", %edi # ^d \n\
  593. xorl "b", %edi # ^b \n\
  594. leal "RCONST"("e",%esi), "e" # e += RCONST + mixed_W \n\
  595. addl %edi, "e" # e += (c ^ d ^ b) \n\
  596. movl "a", %esi # \n\
  597. roll $5, %esi # rotl32(a,5) \n\
  598. addl %esi, "e" # e += rotl32(a,5) \n\
  599. rorl $2, "b" # b = rotl32(b,30) \n\
  600. "
  601. #define RD2(a,b,c,d,e, n) RD2s("%e"STR(a),"%e"STR(b),"%e"STR(c),"%e"STR(d),"%e"STR(e), STR(((20+n+13)&15)), STR(((20+n+8)&15)), STR(((20+n+2)&15)), STR(((20+n)&15)), STR(RCONST))
  602. #undef RCONST
  603. #define RCONST 0x6ED9EBA1
  604. RD2(ax,bx,cx,dx,bp, 0) RD2(bp,ax,bx,cx,dx, 1) RD2(dx,bp,ax,bx,cx, 2) RD2(cx,dx,bp,ax,bx, 3) RD2(bx,cx,dx,bp,ax, 4)
  605. RD2(ax,bx,cx,dx,bp, 5) RD2(bp,ax,bx,cx,dx, 6) RD2(dx,bp,ax,bx,cx, 7) RD2(cx,dx,bp,ax,bx, 8) RD2(bx,cx,dx,bp,ax, 9)
  606. RD2(ax,bx,cx,dx,bp,10) RD2(bp,ax,bx,cx,dx,11) RD2(dx,bp,ax,bx,cx,12) RD2(cx,dx,bp,ax,bx,13) RD2(bx,cx,dx,bp,ax,14)
  607. RD2(ax,bx,cx,dx,bp,15) RD2(bp,ax,bx,cx,dx,16) RD2(dx,bp,ax,bx,cx,17) RD2(cx,dx,bp,ax,bx,18) RD2(bx,cx,dx,bp,ax,19)
  608. #define RD3s(a,b,c,d,e, n13,n8,n2,n, RCONST) \
  609. "\n\
  610. movl "b", %edi # di: b \n\
  611. movl "b", %esi # si: b \n\
  612. orl "c", %edi # di: b | c \n\
  613. andl "c", %esi # si: b & c \n\
  614. andl "d", %edi # di: (b | c) & d \n\
  615. orl %esi, %edi # ((b | c) & d) | (b & c) \n\
  616. movl 4*"n13"(%esp), %esi # W[(n+13) & 15] \n\
  617. xorl 4*"n8"(%esp), %esi # ^W[(n+8) & 15] \n\
  618. xorl 4*"n2"(%esp), %esi # ^W[(n+2) & 15] \n\
  619. xorl 4*"n"(%esp), %esi # ^W[n & 15] \n\
  620. roll %esi # \n\
  621. movl %esi, 4*"n"(%esp) # store to W[n & 15] \n\
  622. addl %edi, "e" # += ((b | c) & d) | (b & c)\n\
  623. leal "RCONST"("e",%esi), "e" # e += RCONST + mixed_W \n\
  624. movl "a", %esi # \n\
  625. roll $5, %esi # rotl32(a,5) \n\
  626. addl %esi, "e" # e += rotl32(a,5) \n\
  627. rorl $2, "b" # b = rotl32(b,30) \n\
  628. "
  629. #define RD3(a,b,c,d,e, n) RD3s("%e"STR(a),"%e"STR(b),"%e"STR(c),"%e"STR(d),"%e"STR(e), STR(((40+n+13)&15)), STR(((40+n+8)&15)), STR(((40+n+2)&15)), STR(((40+n)&15)), STR(RCONST))
  630. #undef RCONST
  631. #define RCONST 0x8F1BBCDC
  632. RD3(ax,bx,cx,dx,bp, 0) RD3(bp,ax,bx,cx,dx, 1) RD3(dx,bp,ax,bx,cx, 2) RD3(cx,dx,bp,ax,bx, 3) RD3(bx,cx,dx,bp,ax, 4)
  633. RD3(ax,bx,cx,dx,bp, 5) RD3(bp,ax,bx,cx,dx, 6) RD3(dx,bp,ax,bx,cx, 7) RD3(cx,dx,bp,ax,bx, 8) RD3(bx,cx,dx,bp,ax, 9)
  634. RD3(ax,bx,cx,dx,bp,10) RD3(bp,ax,bx,cx,dx,11) RD3(dx,bp,ax,bx,cx,12) RD3(cx,dx,bp,ax,bx,13) RD3(bx,cx,dx,bp,ax,14)
  635. RD3(ax,bx,cx,dx,bp,15) RD3(bp,ax,bx,cx,dx,16) RD3(dx,bp,ax,bx,cx,17) RD3(cx,dx,bp,ax,bx,18) RD3(bx,cx,dx,bp,ax,19)
  636. #define RD4As(a,b,c,d,e, n13,n8,n2,n, RCONST) \
  637. "\n\
  638. movl 4*"n13"(%esp), %esi # W[(n+13) & 15] \n\
  639. xorl 4*"n8"(%esp), %esi # ^W[(n+8) & 15] \n\
  640. xorl 4*"n2"(%esp), %esi # ^W[(n+2) & 15] \n\
  641. xorl 4*"n"(%esp), %esi # ^W[n & 15] \n\
  642. roll %esi # \n\
  643. movl %esi, 4*"n"(%esp) # store to W[n & 15] \n\
  644. movl "c", %edi # c \n\
  645. xorl "d", %edi # ^d \n\
  646. xorl "b", %edi # ^b \n\
  647. leal "RCONST"("e",%esi), "e" # e += RCONST + mixed_W \n\
  648. addl %edi, "e" # e += (c ^ d ^ b) \n\
  649. movl "a", %esi # \n\
  650. roll $5, %esi # rotl32(a,5) \n\
  651. addl %esi, "e" # e += rotl32(a,5) \n\
  652. rorl $2, "b" # b = rotl32(b,30) \n\
  653. "
  654. #define RD4Bs(a,b,c,d,e, n13,n8,n2,n, RCONST) \
  655. "\n\
  656. movl 4*"n13"(%esp), %esi # W[(n+13) & 15] \n\
  657. xorl 4*"n8"(%esp), %esi # ^W[(n+8) & 15] \n\
  658. xorl 4*"n2"(%esp), %esi # ^W[(n+2) & 15] \n\
  659. xorl 4*"n"(%esp), %esi # ^W[n & 15] \n\
  660. roll %esi # \n\
  661. ##movl %esi, 4*"n"(%esp) # store to W[n & 15] elided \n\
  662. movl "c", %edi # c \n\
  663. xorl "d", %edi # ^d \n\
  664. xorl "b", %edi # ^b \n\
  665. leal "RCONST"("e",%esi), "e" # e += RCONST + mixed_W \n\
  666. addl %edi, "e" # e += (c ^ d ^ b) \n\
  667. movl "a", %esi # \n\
  668. roll $5, %esi # rotl32(a,5) \n\
  669. addl %esi, "e" # e += rotl32(a,5) \n\
  670. rorl $2, "b" # b = rotl32(b,30) \n\
  671. "
  672. #define RD4A(a,b,c,d,e, n) RD4As("%e"STR(a),"%e"STR(b),"%e"STR(c),"%e"STR(d),"%e"STR(e), STR(((60+n+13)&15)), STR(((60+n+8)&15)), STR(((60+n+2)&15)), STR(((60+n)&15)), STR(RCONST))
  673. #define RD4B(a,b,c,d,e, n) RD4Bs("%e"STR(a),"%e"STR(b),"%e"STR(c),"%e"STR(d),"%e"STR(e), STR(((60+n+13)&15)), STR(((60+n+8)&15)), STR(((60+n+2)&15)), STR(((60+n)&15)), STR(RCONST))
  674. #undef RCONST
  675. #define RCONST 0xCA62C1D6
  676. RD4A(ax,bx,cx,dx,bp, 0) RD4A(bp,ax,bx,cx,dx, 1) RD4A(dx,bp,ax,bx,cx, 2) RD4A(cx,dx,bp,ax,bx, 3) RD4A(bx,cx,dx,bp,ax, 4)
  677. RD4A(ax,bx,cx,dx,bp, 5) RD4A(bp,ax,bx,cx,dx, 6) RD4A(dx,bp,ax,bx,cx, 7) RD4A(cx,dx,bp,ax,bx, 8) RD4A(bx,cx,dx,bp,ax, 9)
  678. RD4A(ax,bx,cx,dx,bp,10) RD4A(bp,ax,bx,cx,dx,11) RD4A(dx,bp,ax,bx,cx,12) RD4A(cx,dx,bp,ax,bx,13) RD4A(bx,cx,dx,bp,ax,14)
  679. RD4A(ax,bx,cx,dx,bp,15) RD4A(bp,ax,bx,cx,dx,16) RD4B(dx,bp,ax,bx,cx,17) RD4B(cx,dx,bp,ax,bx,18) RD4B(bx,cx,dx,bp,ax,19)
  680. "\n\
  681. movl 4*16(%esp), %esi # \n\
  682. addl $4*(16+1), %esp # \n\
  683. addl %eax, 76(%esi) # ctx->hash[0] += a \n\
  684. addl %ebx, 80(%esi) # ctx->hash[1] += b \n\
  685. addl %ecx, 84(%esi) # ctx->hash[2] += c \n\
  686. addl %edx, 88(%esi) # ctx->hash[3] += d \n\
  687. addl %ebp, 92(%esi) # ctx->hash[4] += e \n\
  688. popl %ebx # \n\
  689. popl %esi # \n\
  690. popl %edi # \n\
  691. popl %ebp # \n\
  692. "
  693. ); /* asm */
  694. #undef RCONST
  695. }
  696. # elif defined(__GNUC__) && defined(__x86_64__)
  697. /* in hash_md5_sha_x86-64.S */
  698. struct ASM_expects_80 { char t[1 - 2*(offsetof(sha1_ctx_t, hash) != 80)]; };
  699. void FAST_FUNC sha1_process_block64(sha1_ctx_t *ctx);
  700. # else
  701. /* Fast, fully-unrolled SHA1. +3800 bytes of code on x86.
  702. * It seems further speedup can be achieved by handling more than
  703. * 64 bytes per one function call (coreutils does that).
  704. */
  705. static void FAST_FUNC sha1_process_block64(sha1_ctx_t *ctx)
  706. {
  707. static const uint32_t rconsts[] ALIGN4 = {
  708. 0x5A827999, 0x6ED9EBA1, 0x8F1BBCDC, 0xCA62C1D6
  709. };
  710. uint32_t W[16];
  711. uint32_t a, b, c, d, e;
  712. a = ctx->hash[0];
  713. b = ctx->hash[1];
  714. c = ctx->hash[2];
  715. d = ctx->hash[3];
  716. e = ctx->hash[4];
  717. /* From kernel source comments:
  718. * """
  719. * If you have 32 registers or more, the compiler can (and should)
  720. * try to change the array[] accesses into registers. However, on
  721. * machines with less than ~25 registers, that won't really work,
  722. * and at least gcc will make an unholy mess of it.
  723. *
  724. * So to avoid that mess which just slows things down, we force
  725. * the stores to memory to actually happen (we might be better off
  726. * with a 'W(t)=(val);asm("":"+m" (W(t))' there instead, as
  727. * suggested by Artur Skawina - that will also make gcc unable to
  728. * try to do the silly "optimize away loads" part because it won't
  729. * see what the value will be).
  730. * """
  731. */
  732. #if defined(__GNUC__) && defined(__i386__)
  733. # define DO_NOT_TRY_PROPAGATING(m) asm("":"+m"(m))
  734. #else
  735. # define DO_NOT_TRY_PROPAGATING(m) ((void)0)
  736. #endif
  737. #undef OP
  738. #define OP(A,B,C,D,E, n) \
  739. do { \
  740. uint32_t work = EXPR(B, C, D); \
  741. if (n <= 15) \
  742. work += W[n & 15] = SWAP_BE32(((uint32_t*)ctx->wbuffer)[n]); \
  743. if (n >= 16) \
  744. work += W[n & 15] = rotl32(W[(n+13) & 15] ^ W[(n+8) & 15] ^ W[(n+2) & 15] ^ W[n & 15], 1); \
  745. DO_NOT_TRY_PROPAGATING(W[n & 15]); \
  746. E += work + rotl32(A, 5) + rconsts[n / 20]; \
  747. B = rotl32(B, 30); \
  748. } while (0)
  749. #define OP20(n) \
  750. OP(a,b,c,d,e, (n+ 0)); OP(e,a,b,c,d, (n+ 1)); OP(d,e,a,b,c, (n+ 2)); OP(c,d,e,a,b, (n+ 3)); OP(b,c,d,e,a, (n+ 4)); \
  751. OP(a,b,c,d,e, (n+ 5)); OP(e,a,b,c,d, (n+ 6)); OP(d,e,a,b,c, (n+ 7)); OP(c,d,e,a,b, (n+ 8)); OP(b,c,d,e,a, (n+ 9)); \
  752. OP(a,b,c,d,e, (n+10)); OP(e,a,b,c,d, (n+11)); OP(d,e,a,b,c, (n+12)); OP(c,d,e,a,b, (n+13)); OP(b,c,d,e,a, (n+14)); \
  753. OP(a,b,c,d,e, (n+15)); OP(e,a,b,c,d, (n+16)); OP(d,e,a,b,c, (n+17)); OP(c,d,e,a,b, (n+18)); OP(b,c,d,e,a, (n+19))
  754. /* 4 rounds of 20 operations each */
  755. #define EXPR(b,c,d) (((c ^ d) & b) ^ d)
  756. OP20(0);
  757. #undef EXPR
  758. #define EXPR(b,c,d) (c ^ d ^ b)
  759. OP20(20);
  760. #undef EXPR
  761. #define EXPR(b,c,d) (((b | c) & d) | (b & c))
  762. OP20(40);
  763. #undef EXPR
  764. #define EXPR(b,c,d) (c ^ d ^ b)
  765. OP20(60);
  766. #undef EXPR
  767. #undef OP
  768. #undef OP20
  769. ctx->hash[0] += a;
  770. ctx->hash[1] += b;
  771. ctx->hash[2] += c;
  772. ctx->hash[3] += d;
  773. ctx->hash[4] += e;
  774. }
  775. # endif
  776. #elif CONFIG_SHA1_SMALL == 1
  777. /* Middle-sized version, +300 bytes of code on x86. */
  778. static void FAST_FUNC sha1_process_block64(sha1_ctx_t *ctx)
  779. {
  780. static const uint32_t rconsts[] ALIGN4 = {
  781. 0x5A827999, 0x6ED9EBA1, 0x8F1BBCDC, 0xCA62C1D6
  782. };
  783. int j;
  784. int n;
  785. uint32_t W[16+16];
  786. uint32_t a, b, c, d, e;
  787. a = ctx->hash[0];
  788. b = ctx->hash[1];
  789. c = ctx->hash[2];
  790. d = ctx->hash[3];
  791. e = ctx->hash[4];
  792. /* 1st round of 20 operations */
  793. n = 0;
  794. do {
  795. uint32_t work = ((c ^ d) & b) ^ d;
  796. W[n] = W[n+16] = SWAP_BE32(((uint32_t*)ctx->wbuffer)[n]);
  797. work += W[n];
  798. work += e + rotl32(a, 5) + rconsts[0];
  799. /* Rotate by one for next time */
  800. e = d;
  801. d = c;
  802. c = rotl32(b, 30);
  803. b = a;
  804. a = work;
  805. n = (n + 1) & 15;
  806. } while (n != 0);
  807. do {
  808. uint32_t work = ((c ^ d) & b) ^ d;
  809. W[n] = W[n+16] = rotl32(W[n+13] ^ W[n+8] ^ W[n+2] ^ W[n], 1);
  810. work += W[n];
  811. work += e + rotl32(a, 5) + rconsts[0];
  812. e = d;
  813. d = c;
  814. c = rotl32(b, 30);
  815. b = a;
  816. a = work;
  817. n = (n + 1) /* & 15*/;
  818. } while (n != 4);
  819. /* 2nd round of 20 operations */
  820. j = 19;
  821. do {
  822. uint32_t work = c ^ d ^ b;
  823. W[n] = W[n+16] = rotl32(W[n+13] ^ W[n+8] ^ W[n+2] ^ W[n], 1);
  824. work += W[n];
  825. work += e + rotl32(a, 5) + rconsts[1];
  826. e = d;
  827. d = c;
  828. c = rotl32(b, 30);
  829. b = a;
  830. a = work;
  831. n = (n + 1) & 15;
  832. } while (--j >= 0);
  833. /* 3rd round */
  834. j = 19;
  835. do {
  836. uint32_t work = ((b | c) & d) | (b & c);
  837. W[n] = W[n+16] = rotl32(W[n+13] ^ W[n+8] ^ W[n+2] ^ W[n], 1);
  838. work += W[n];
  839. work += e + rotl32(a, 5) + rconsts[2];
  840. e = d;
  841. d = c;
  842. c = rotl32(b, 30);
  843. b = a;
  844. a = work;
  845. n = (n + 1) & 15;
  846. } while (--j >= 0);
  847. /* 4th round */
  848. j = 19;
  849. do {
  850. uint32_t work = c ^ d ^ b;
  851. W[n] = W[n+16] = rotl32(W[n+13] ^ W[n+8] ^ W[n+2] ^ W[n], 1);
  852. work += W[n];
  853. work += e + rotl32(a, 5) + rconsts[3];
  854. e = d;
  855. d = c;
  856. c = rotl32(b, 30);
  857. b = a;
  858. a = work;
  859. n = (n + 1) & 15;
  860. } while (--j >= 0);
  861. ctx->hash[0] += a;
  862. ctx->hash[1] += b;
  863. ctx->hash[2] += c;
  864. ctx->hash[3] += d;
  865. ctx->hash[4] += e;
  866. }
  867. #else
  868. /* Compact version, almost twice as slow as fully unrolled */
  869. static void FAST_FUNC sha1_process_block64(sha1_ctx_t *ctx)
  870. {
  871. static const uint32_t rconsts[] ALIGN4 = {
  872. 0x5A827999, 0x6ED9EBA1, 0x8F1BBCDC, 0xCA62C1D6
  873. };
  874. int i, j;
  875. int n;
  876. uint32_t W[16+16];
  877. uint32_t a, b, c, d, e;
  878. /* On-stack work buffer frees up one register in the main loop
  879. * which otherwise will be needed to hold ctx pointer.
  880. *
  881. * The compiler is not smart enough to realize it, though. :(
  882. * If __attribute__((optimize("2"))) is added to the function,
  883. * only then gcc-9.3.1 spills "ctx" to stack and uses the freed
  884. * register (making code 6 bytes smaller, not just faster).
  885. */
  886. for (i = 0; i < 16; i++)
  887. W[i] = W[i+16] = SWAP_BE32(((uint32_t*)ctx->wbuffer)[i]);
  888. a = ctx->hash[0];
  889. b = ctx->hash[1];
  890. c = ctx->hash[2];
  891. d = ctx->hash[3];
  892. e = ctx->hash[4];
  893. /* 4 rounds of 20 operations each */
  894. n = 0;
  895. for (i = 0; i < 4; i++) {
  896. j = 19;
  897. do {
  898. uint32_t work;
  899. work = c ^ d;
  900. if (i == 0) {
  901. work = (work & b) ^ d;
  902. if (j <= 3)
  903. goto ge16;
  904. } else {
  905. if (i == 2)
  906. work = ((b | c) & d) | (b & c);
  907. else /* i = 1 or 3 */
  908. work ^= b;
  909. ge16:
  910. W[n] = W[n+16] = rotl32(W[n+13] ^ W[n+8] ^ W[n+2] ^ W[n], 1);
  911. }
  912. work += W[n];
  913. work += e + rotl32(a, 5) + rconsts[i];
  914. /* Rotate by one for next time */
  915. e = d;
  916. d = c;
  917. c = rotl32(b, 30);
  918. b = a;
  919. a = work;
  920. n = (n + 1) & 15;
  921. } while (--j >= 0);
  922. }
  923. ctx->hash[0] += a;
  924. ctx->hash[1] += b;
  925. ctx->hash[2] += c;
  926. ctx->hash[3] += d;
  927. ctx->hash[4] += e;
  928. }
  929. #endif
  930. /* Constants for SHA512 from FIPS 180-2:4.2.3.
  931. * SHA256 constants from FIPS 180-2:4.2.2
  932. * are the most significant half of first 64 elements
  933. * of the same array.
  934. */
  935. #undef K
  936. #if NEED_SHA512
  937. typedef uint64_t sha_K_int;
  938. # define K(v) v
  939. #else
  940. typedef uint32_t sha_K_int;
  941. # define K(v) (uint32_t)(v >> 32)
  942. #endif
  943. static const sha_K_int sha_K[] ALIGN8 = {
  944. K(0x428a2f98d728ae22ULL), K(0x7137449123ef65cdULL),
  945. K(0xb5c0fbcfec4d3b2fULL), K(0xe9b5dba58189dbbcULL),
  946. K(0x3956c25bf348b538ULL), K(0x59f111f1b605d019ULL),
  947. K(0x923f82a4af194f9bULL), K(0xab1c5ed5da6d8118ULL),
  948. K(0xd807aa98a3030242ULL), K(0x12835b0145706fbeULL),
  949. K(0x243185be4ee4b28cULL), K(0x550c7dc3d5ffb4e2ULL),
  950. K(0x72be5d74f27b896fULL), K(0x80deb1fe3b1696b1ULL),
  951. K(0x9bdc06a725c71235ULL), K(0xc19bf174cf692694ULL),
  952. K(0xe49b69c19ef14ad2ULL), K(0xefbe4786384f25e3ULL),
  953. K(0x0fc19dc68b8cd5b5ULL), K(0x240ca1cc77ac9c65ULL),
  954. K(0x2de92c6f592b0275ULL), K(0x4a7484aa6ea6e483ULL),
  955. K(0x5cb0a9dcbd41fbd4ULL), K(0x76f988da831153b5ULL),
  956. K(0x983e5152ee66dfabULL), K(0xa831c66d2db43210ULL),
  957. K(0xb00327c898fb213fULL), K(0xbf597fc7beef0ee4ULL),
  958. K(0xc6e00bf33da88fc2ULL), K(0xd5a79147930aa725ULL),
  959. K(0x06ca6351e003826fULL), K(0x142929670a0e6e70ULL),
  960. K(0x27b70a8546d22ffcULL), K(0x2e1b21385c26c926ULL),
  961. K(0x4d2c6dfc5ac42aedULL), K(0x53380d139d95b3dfULL),
  962. K(0x650a73548baf63deULL), K(0x766a0abb3c77b2a8ULL),
  963. K(0x81c2c92e47edaee6ULL), K(0x92722c851482353bULL),
  964. K(0xa2bfe8a14cf10364ULL), K(0xa81a664bbc423001ULL),
  965. K(0xc24b8b70d0f89791ULL), K(0xc76c51a30654be30ULL),
  966. K(0xd192e819d6ef5218ULL), K(0xd69906245565a910ULL),
  967. K(0xf40e35855771202aULL), K(0x106aa07032bbd1b8ULL),
  968. K(0x19a4c116b8d2d0c8ULL), K(0x1e376c085141ab53ULL),
  969. K(0x2748774cdf8eeb99ULL), K(0x34b0bcb5e19b48a8ULL),
  970. K(0x391c0cb3c5c95a63ULL), K(0x4ed8aa4ae3418acbULL),
  971. K(0x5b9cca4f7763e373ULL), K(0x682e6ff3d6b2b8a3ULL),
  972. K(0x748f82ee5defb2fcULL), K(0x78a5636f43172f60ULL),
  973. K(0x84c87814a1f0ab72ULL), K(0x8cc702081a6439ecULL),
  974. K(0x90befffa23631e28ULL), K(0xa4506cebde82bde9ULL),
  975. K(0xbef9a3f7b2c67915ULL), K(0xc67178f2e372532bULL),
  976. #if NEED_SHA512 /* [64]+ are used for sha512 only */
  977. K(0xca273eceea26619cULL), K(0xd186b8c721c0c207ULL),
  978. K(0xeada7dd6cde0eb1eULL), K(0xf57d4f7fee6ed178ULL),
  979. K(0x06f067aa72176fbaULL), K(0x0a637dc5a2c898a6ULL),
  980. K(0x113f9804bef90daeULL), K(0x1b710b35131c471bULL),
  981. K(0x28db77f523047d84ULL), K(0x32caab7b40c72493ULL),
  982. K(0x3c9ebe0a15c9bebcULL), K(0x431d67c49c100d4cULL),
  983. K(0x4cc5d4becb3e42b6ULL), K(0x597f299cfc657e2aULL),
  984. K(0x5fcb6fab3ad6faecULL), K(0x6c44198c4a475817ULL),
  985. #endif
  986. };
  987. #undef K
  988. #undef Ch
  989. #undef Maj
  990. #undef S0
  991. #undef S1
  992. #undef R0
  993. #undef R1
  994. static void FAST_FUNC sha256_process_block64(sha256_ctx_t *ctx)
  995. {
  996. unsigned t;
  997. uint32_t W[64], a, b, c, d, e, f, g, h;
  998. const uint32_t *words = (uint32_t*) ctx->wbuffer;
  999. /* Operators defined in FIPS 180-2:4.1.2. */
  1000. #define Ch(x, y, z) ((x & y) ^ (~x & z))
  1001. #define Maj(x, y, z) ((x & y) ^ (x & z) ^ (y & z))
  1002. #define S0(x) (rotr32(x, 2) ^ rotr32(x, 13) ^ rotr32(x, 22))
  1003. #define S1(x) (rotr32(x, 6) ^ rotr32(x, 11) ^ rotr32(x, 25))
  1004. #define R0(x) (rotr32(x, 7) ^ rotr32(x, 18) ^ (x >> 3))
  1005. #define R1(x) (rotr32(x, 17) ^ rotr32(x, 19) ^ (x >> 10))
  1006. /* Compute the message schedule according to FIPS 180-2:6.2.2 step 2. */
  1007. for (t = 0; t < 16; ++t)
  1008. W[t] = SWAP_BE32(words[t]);
  1009. for (/*t = 16*/; t < 64; ++t)
  1010. W[t] = R1(W[t - 2]) + W[t - 7] + R0(W[t - 15]) + W[t - 16];
  1011. a = ctx->hash[0];
  1012. b = ctx->hash[1];
  1013. c = ctx->hash[2];
  1014. d = ctx->hash[3];
  1015. e = ctx->hash[4];
  1016. f = ctx->hash[5];
  1017. g = ctx->hash[6];
  1018. h = ctx->hash[7];
  1019. /* The actual computation according to FIPS 180-2:6.2.2 step 3. */
  1020. for (t = 0; t < 64; ++t) {
  1021. /* Need to fetch upper half of sha_K[t]
  1022. * (I hope compiler is clever enough to just fetch
  1023. * upper half)
  1024. */
  1025. uint32_t K_t = NEED_SHA512 ? (sha_K[t] >> 32) : sha_K[t];
  1026. uint32_t T1 = h + S1(e) + Ch(e, f, g) + K_t + W[t];
  1027. uint32_t T2 = S0(a) + Maj(a, b, c);
  1028. h = g;
  1029. g = f;
  1030. f = e;
  1031. e = d + T1;
  1032. d = c;
  1033. c = b;
  1034. b = a;
  1035. a = T1 + T2;
  1036. }
  1037. #undef Ch
  1038. #undef Maj
  1039. #undef S0
  1040. #undef S1
  1041. #undef R0
  1042. #undef R1
  1043. /* Add the starting values of the context according to FIPS 180-2:6.2.2
  1044. step 4. */
  1045. ctx->hash[0] += a;
  1046. ctx->hash[1] += b;
  1047. ctx->hash[2] += c;
  1048. ctx->hash[3] += d;
  1049. ctx->hash[4] += e;
  1050. ctx->hash[5] += f;
  1051. ctx->hash[6] += g;
  1052. ctx->hash[7] += h;
  1053. }
  1054. #if NEED_SHA512
  1055. static void FAST_FUNC sha512_process_block128(sha512_ctx_t *ctx)
  1056. {
  1057. unsigned t;
  1058. uint64_t W[80];
  1059. /* On i386, having assignments here (not later as sha256 does)
  1060. * produces 99 bytes smaller code with gcc 4.3.1
  1061. */
  1062. uint64_t a = ctx->hash[0];
  1063. uint64_t b = ctx->hash[1];
  1064. uint64_t c = ctx->hash[2];
  1065. uint64_t d = ctx->hash[3];
  1066. uint64_t e = ctx->hash[4];
  1067. uint64_t f = ctx->hash[5];
  1068. uint64_t g = ctx->hash[6];
  1069. uint64_t h = ctx->hash[7];
  1070. const uint64_t *words = (uint64_t*) ctx->wbuffer;
  1071. /* Operators defined in FIPS 180-2:4.1.2. */
  1072. #define Ch(x, y, z) ((x & y) ^ (~x & z))
  1073. #define Maj(x, y, z) ((x & y) ^ (x & z) ^ (y & z))
  1074. #define S0(x) (rotr64(x, 28) ^ rotr64(x, 34) ^ rotr64(x, 39))
  1075. #define S1(x) (rotr64(x, 14) ^ rotr64(x, 18) ^ rotr64(x, 41))
  1076. #define R0(x) (rotr64(x, 1) ^ rotr64(x, 8) ^ (x >> 7))
  1077. #define R1(x) (rotr64(x, 19) ^ rotr64(x, 61) ^ (x >> 6))
  1078. /* Compute the message schedule according to FIPS 180-2:6.3.2 step 2. */
  1079. for (t = 0; t < 16; ++t)
  1080. W[t] = SWAP_BE64(words[t]);
  1081. for (/*t = 16*/; t < 80; ++t)
  1082. W[t] = R1(W[t - 2]) + W[t - 7] + R0(W[t - 15]) + W[t - 16];
  1083. /* The actual computation according to FIPS 180-2:6.3.2 step 3. */
  1084. for (t = 0; t < 80; ++t) {
  1085. uint64_t T1 = h + S1(e) + Ch(e, f, g) + sha_K[t] + W[t];
  1086. uint64_t T2 = S0(a) + Maj(a, b, c);
  1087. h = g;
  1088. g = f;
  1089. f = e;
  1090. e = d + T1;
  1091. d = c;
  1092. c = b;
  1093. b = a;
  1094. a = T1 + T2;
  1095. }
  1096. #undef Ch
  1097. #undef Maj
  1098. #undef S0
  1099. #undef S1
  1100. #undef R0
  1101. #undef R1
  1102. /* Add the starting values of the context according to FIPS 180-2:6.3.2
  1103. step 4. */
  1104. ctx->hash[0] += a;
  1105. ctx->hash[1] += b;
  1106. ctx->hash[2] += c;
  1107. ctx->hash[3] += d;
  1108. ctx->hash[4] += e;
  1109. ctx->hash[5] += f;
  1110. ctx->hash[6] += g;
  1111. ctx->hash[7] += h;
  1112. }
  1113. #endif /* NEED_SHA512 */
  1114. void FAST_FUNC sha1_begin(sha1_ctx_t *ctx)
  1115. {
  1116. ctx->hash[0] = 0x67452301;
  1117. ctx->hash[1] = 0xefcdab89;
  1118. ctx->hash[2] = 0x98badcfe;
  1119. ctx->hash[3] = 0x10325476;
  1120. ctx->hash[4] = 0xc3d2e1f0;
  1121. ctx->total64 = 0;
  1122. ctx->process_block = sha1_process_block64;
  1123. #if ENABLE_SHA1_HWACCEL
  1124. # if defined(__GNUC__) && (defined(__i386__) || defined(__x86_64__))
  1125. {
  1126. int ni = shaNI;
  1127. if (!ni)
  1128. ni = get_shaNI();
  1129. if (ni > 0)
  1130. ctx->process_block = sha1_process_block64_shaNI;
  1131. }
  1132. # endif
  1133. #endif
  1134. }
  1135. static const uint32_t init256[] ALIGN4 = {
  1136. 0,
  1137. 0,
  1138. 0x6a09e667,
  1139. 0xbb67ae85,
  1140. 0x3c6ef372,
  1141. 0xa54ff53a,
  1142. 0x510e527f,
  1143. 0x9b05688c,
  1144. 0x1f83d9ab,
  1145. 0x5be0cd19,
  1146. };
  1147. #if NEED_SHA512
  1148. static const uint32_t init512_lo[] ALIGN4 = {
  1149. 0,
  1150. 0,
  1151. 0xf3bcc908,
  1152. 0x84caa73b,
  1153. 0xfe94f82b,
  1154. 0x5f1d36f1,
  1155. 0xade682d1,
  1156. 0x2b3e6c1f,
  1157. 0xfb41bd6b,
  1158. 0x137e2179,
  1159. };
  1160. #endif /* NEED_SHA512 */
  1161. // Note: SHA-384 is identical to SHA-512, except that initial hash values are
  1162. // 0xcbbb9d5dc1059ed8, 0x629a292a367cd507, 0x9159015a3070dd17, 0x152fecd8f70e5939,
  1163. // 0x67332667ffc00b31, 0x8eb44a8768581511, 0xdb0c2e0d64f98fa7, 0x47b5481dbefa4fa4,
  1164. // and the output is constructed by omitting last two 64-bit words of it.
  1165. /* Initialize structure containing state of computation.
  1166. (FIPS 180-2:5.3.2) */
  1167. void FAST_FUNC sha256_begin(sha256_ctx_t *ctx)
  1168. {
  1169. memcpy(&ctx->total64, init256, sizeof(init256));
  1170. /*ctx->total64 = 0; - done by prepending two 32-bit zeros to init256 */
  1171. ctx->process_block = sha256_process_block64;
  1172. #if ENABLE_SHA256_HWACCEL
  1173. # if defined(__GNUC__) && (defined(__i386__) || defined(__x86_64__))
  1174. {
  1175. int ni = shaNI;
  1176. if (!ni)
  1177. ni = get_shaNI();
  1178. if (ni > 0)
  1179. ctx->process_block = sha256_process_block64_shaNI;
  1180. }
  1181. # endif
  1182. #endif
  1183. }
  1184. #if NEED_SHA512
  1185. /* Initialize structure containing state of computation.
  1186. (FIPS 180-2:5.3.3) */
  1187. void FAST_FUNC sha512_begin(sha512_ctx_t *ctx)
  1188. {
  1189. int i;
  1190. /* Two extra iterations zero out ctx->total64[2] */
  1191. uint64_t *tp = ctx->total64;
  1192. for (i = 0; i < 8 + 2; i++)
  1193. tp[i] = ((uint64_t)(init256[i]) << 32) + init512_lo[i];
  1194. /*ctx->total64[0] = ctx->total64[1] = 0; - already done */
  1195. }
  1196. void FAST_FUNC sha512_hash(sha512_ctx_t *ctx, const void *buffer, size_t len)
  1197. {
  1198. unsigned bufpos = ctx->total64[0] & 127;
  1199. unsigned remaining;
  1200. /* First increment the byte count. FIPS 180-2 specifies the possible
  1201. length of the file up to 2^128 _bits_.
  1202. We compute the number of _bytes_ and convert to bits later. */
  1203. ctx->total64[0] += len;
  1204. if (ctx->total64[0] < len)
  1205. ctx->total64[1]++;
  1206. while (1) {
  1207. remaining = 128 - bufpos;
  1208. if (remaining > len)
  1209. remaining = len;
  1210. /* Copy data into aligned buffer */
  1211. memcpy(ctx->wbuffer + bufpos, buffer, remaining);
  1212. len -= remaining;
  1213. buffer = (const char *)buffer + remaining;
  1214. bufpos += remaining;
  1215. /* Clever way to do "if (bufpos != N) break; ... ; bufpos = 0;" */
  1216. bufpos -= 128;
  1217. if (bufpos != 0)
  1218. break;
  1219. /* Buffer is filled up, process it */
  1220. sha512_process_block128(ctx);
  1221. /*bufpos = 0; - already is */
  1222. }
  1223. }
  1224. #endif /* NEED_SHA512 */
  1225. /* Used also for sha256 */
  1226. unsigned FAST_FUNC sha1_end(sha1_ctx_t *ctx, void *resbuf)
  1227. {
  1228. unsigned hash_size;
  1229. /* SHA stores total in BE, need to swap on LE arches: */
  1230. common64_end(ctx, /*swap_needed:*/ BB_LITTLE_ENDIAN);
  1231. hash_size = 8;
  1232. if (ctx->process_block == sha1_process_block64
  1233. #if ENABLE_SHA1_HWACCEL
  1234. || ctx->process_block == sha1_process_block64_shaNI
  1235. #endif
  1236. ) {
  1237. hash_size = 5;
  1238. }
  1239. /* This way we do not impose alignment constraints on resbuf: */
  1240. if (BB_LITTLE_ENDIAN) {
  1241. unsigned i;
  1242. for (i = 0; i < hash_size; ++i)
  1243. ctx->hash[i] = SWAP_BE32(ctx->hash[i]);
  1244. }
  1245. hash_size *= sizeof(ctx->hash[0]);
  1246. memcpy(resbuf, ctx->hash, hash_size);
  1247. return hash_size;
  1248. }
  1249. #if NEED_SHA512
  1250. unsigned FAST_FUNC sha512_end(sha512_ctx_t *ctx, void *resbuf)
  1251. {
  1252. unsigned bufpos = ctx->total64[0] & 127;
  1253. /* Pad the buffer to the next 128-byte boundary with 0x80,0,0,0... */
  1254. ctx->wbuffer[bufpos++] = 0x80;
  1255. while (1) {
  1256. unsigned remaining = 128 - bufpos;
  1257. memset(ctx->wbuffer + bufpos, 0, remaining);
  1258. if (remaining >= 16) {
  1259. /* Store the 128-bit counter of bits in the buffer in BE format */
  1260. uint64_t t;
  1261. t = ctx->total64[0] << 3;
  1262. t = SWAP_BE64(t);
  1263. *(bb__aliased_uint64_t *) (&ctx->wbuffer[128 - 8]) = t;
  1264. t = (ctx->total64[1] << 3) | (ctx->total64[0] >> 61);
  1265. t = SWAP_BE64(t);
  1266. *(bb__aliased_uint64_t *) (&ctx->wbuffer[128 - 16]) = t;
  1267. }
  1268. sha512_process_block128(ctx);
  1269. if (remaining >= 16)
  1270. break;
  1271. bufpos = 0;
  1272. }
  1273. if (BB_LITTLE_ENDIAN) {
  1274. unsigned i;
  1275. for (i = 0; i < ARRAY_SIZE(ctx->hash); ++i)
  1276. ctx->hash[i] = SWAP_BE64(ctx->hash[i]);
  1277. }
  1278. memcpy(resbuf, ctx->hash, sizeof(ctx->hash));
  1279. return sizeof(ctx->hash);
  1280. }
  1281. #endif /* NEED_SHA512 */
  1282. /*
  1283. * The Keccak sponge function, designed by Guido Bertoni, Joan Daemen,
  1284. * Michael Peeters and Gilles Van Assche. For more information, feedback or
  1285. * questions, please refer to our website: http://keccak.noekeon.org/
  1286. *
  1287. * Implementation by Ronny Van Keer,
  1288. * hereby denoted as "the implementer".
  1289. *
  1290. * To the extent possible under law, the implementer has waived all copyright
  1291. * and related or neighboring rights to the source code in this file.
  1292. * http://creativecommons.org/publicdomain/zero/1.0/
  1293. *
  1294. * Busybox modifications (C) Lauri Kasanen, under the GPLv2.
  1295. */
  1296. #if CONFIG_SHA3_SMALL < 0
  1297. # define SHA3_SMALL 0
  1298. #elif CONFIG_SHA3_SMALL > 1
  1299. # define SHA3_SMALL 1
  1300. #else
  1301. # define SHA3_SMALL CONFIG_SHA3_SMALL
  1302. #endif
  1303. #define OPTIMIZE_SHA3_FOR_32 0
  1304. /*
  1305. * SHA3 can be optimized for 32-bit CPUs with bit-slicing:
  1306. * every 64-bit word of state[] can be split into two 32-bit words
  1307. * by even/odd bits. In this form, all rotations of sha3 round
  1308. * are 32-bit - and there are lots of them.
  1309. * However, it requires either splitting/combining state words
  1310. * before/after sha3 round (code does this now)
  1311. * or shuffling bits before xor'ing them into state and in sha3_end.
  1312. * Without shuffling, bit-slicing results in -130 bytes of code
  1313. * and marginal speedup (but of course it gives wrong result).
  1314. * With shuffling it works, but +260 code bytes, and slower.
  1315. * Disabled for now:
  1316. */
  1317. #if 0 /* LONG_MAX == 0x7fffffff */
  1318. # undef OPTIMIZE_SHA3_FOR_32
  1319. # define OPTIMIZE_SHA3_FOR_32 1
  1320. #endif
  1321. #if OPTIMIZE_SHA3_FOR_32
  1322. /* This splits every 64-bit word into a pair of 32-bit words,
  1323. * even bits go into first word, odd bits go to second one.
  1324. * The conversion is done in-place.
  1325. */
  1326. static void split_halves(uint64_t *state)
  1327. {
  1328. /* Credit: Henry S. Warren, Hacker's Delight, Addison-Wesley, 2002 */
  1329. uint32_t *s32 = (uint32_t*)state;
  1330. uint32_t t, x0, x1;
  1331. int i;
  1332. for (i = 24; i >= 0; --i) {
  1333. x0 = s32[0];
  1334. t = (x0 ^ (x0 >> 1)) & 0x22222222; x0 = x0 ^ t ^ (t << 1);
  1335. t = (x0 ^ (x0 >> 2)) & 0x0C0C0C0C; x0 = x0 ^ t ^ (t << 2);
  1336. t = (x0 ^ (x0 >> 4)) & 0x00F000F0; x0 = x0 ^ t ^ (t << 4);
  1337. t = (x0 ^ (x0 >> 8)) & 0x0000FF00; x0 = x0 ^ t ^ (t << 8);
  1338. x1 = s32[1];
  1339. t = (x1 ^ (x1 >> 1)) & 0x22222222; x1 = x1 ^ t ^ (t << 1);
  1340. t = (x1 ^ (x1 >> 2)) & 0x0C0C0C0C; x1 = x1 ^ t ^ (t << 2);
  1341. t = (x1 ^ (x1 >> 4)) & 0x00F000F0; x1 = x1 ^ t ^ (t << 4);
  1342. t = (x1 ^ (x1 >> 8)) & 0x0000FF00; x1 = x1 ^ t ^ (t << 8);
  1343. *s32++ = (x0 & 0x0000FFFF) | (x1 << 16);
  1344. *s32++ = (x0 >> 16) | (x1 & 0xFFFF0000);
  1345. }
  1346. }
  1347. /* The reverse operation */
  1348. static void combine_halves(uint64_t *state)
  1349. {
  1350. uint32_t *s32 = (uint32_t*)state;
  1351. uint32_t t, x0, x1;
  1352. int i;
  1353. for (i = 24; i >= 0; --i) {
  1354. x0 = s32[0];
  1355. x1 = s32[1];
  1356. t = (x0 & 0x0000FFFF) | (x1 << 16);
  1357. x1 = (x0 >> 16) | (x1 & 0xFFFF0000);
  1358. x0 = t;
  1359. t = (x0 ^ (x0 >> 8)) & 0x0000FF00; x0 = x0 ^ t ^ (t << 8);
  1360. t = (x0 ^ (x0 >> 4)) & 0x00F000F0; x0 = x0 ^ t ^ (t << 4);
  1361. t = (x0 ^ (x0 >> 2)) & 0x0C0C0C0C; x0 = x0 ^ t ^ (t << 2);
  1362. t = (x0 ^ (x0 >> 1)) & 0x22222222; x0 = x0 ^ t ^ (t << 1);
  1363. *s32++ = x0;
  1364. t = (x1 ^ (x1 >> 8)) & 0x0000FF00; x1 = x1 ^ t ^ (t << 8);
  1365. t = (x1 ^ (x1 >> 4)) & 0x00F000F0; x1 = x1 ^ t ^ (t << 4);
  1366. t = (x1 ^ (x1 >> 2)) & 0x0C0C0C0C; x1 = x1 ^ t ^ (t << 2);
  1367. t = (x1 ^ (x1 >> 1)) & 0x22222222; x1 = x1 ^ t ^ (t << 1);
  1368. *s32++ = x1;
  1369. }
  1370. }
  1371. #endif
  1372. /*
  1373. * In the crypto literature this function is usually called Keccak-f().
  1374. */
  1375. static void sha3_process_block72(uint64_t *state)
  1376. {
  1377. enum { NROUNDS = 24 };
  1378. #if OPTIMIZE_SHA3_FOR_32
  1379. /*
  1380. static const uint32_t IOTA_CONST_0[NROUNDS] ALIGN4 = {
  1381. 0x00000001UL,
  1382. 0x00000000UL,
  1383. 0x00000000UL,
  1384. 0x00000000UL,
  1385. 0x00000001UL,
  1386. 0x00000001UL,
  1387. 0x00000001UL,
  1388. 0x00000001UL,
  1389. 0x00000000UL,
  1390. 0x00000000UL,
  1391. 0x00000001UL,
  1392. 0x00000000UL,
  1393. 0x00000001UL,
  1394. 0x00000001UL,
  1395. 0x00000001UL,
  1396. 0x00000001UL,
  1397. 0x00000000UL,
  1398. 0x00000000UL,
  1399. 0x00000000UL,
  1400. 0x00000000UL,
  1401. 0x00000001UL,
  1402. 0x00000000UL,
  1403. 0x00000001UL,
  1404. 0x00000000UL,
  1405. };
  1406. ** bits are in lsb: 0101 0000 1111 0100 1111 0001
  1407. */
  1408. uint32_t IOTA_CONST_0bits = (uint32_t)(0x0050f4f1);
  1409. static const uint32_t IOTA_CONST_1[NROUNDS] ALIGN4 = {
  1410. 0x00000000UL,
  1411. 0x00000089UL,
  1412. 0x8000008bUL,
  1413. 0x80008080UL,
  1414. 0x0000008bUL,
  1415. 0x00008000UL,
  1416. 0x80008088UL,
  1417. 0x80000082UL,
  1418. 0x0000000bUL,
  1419. 0x0000000aUL,
  1420. 0x00008082UL,
  1421. 0x00008003UL,
  1422. 0x0000808bUL,
  1423. 0x8000000bUL,
  1424. 0x8000008aUL,
  1425. 0x80000081UL,
  1426. 0x80000081UL,
  1427. 0x80000008UL,
  1428. 0x00000083UL,
  1429. 0x80008003UL,
  1430. 0x80008088UL,
  1431. 0x80000088UL,
  1432. 0x00008000UL,
  1433. 0x80008082UL,
  1434. };
  1435. uint32_t *const s32 = (uint32_t*)state;
  1436. unsigned round;
  1437. split_halves(state);
  1438. for (round = 0; round < NROUNDS; round++) {
  1439. unsigned x;
  1440. /* Theta */
  1441. {
  1442. uint32_t BC[20];
  1443. for (x = 0; x < 10; ++x) {
  1444. BC[x+10] = BC[x] = s32[x]^s32[x+10]^s32[x+20]^s32[x+30]^s32[x+40];
  1445. }
  1446. for (x = 0; x < 10; x += 2) {
  1447. uint32_t ta, tb;
  1448. ta = BC[x+8] ^ rotl32(BC[x+3], 1);
  1449. tb = BC[x+9] ^ BC[x+2];
  1450. s32[x+0] ^= ta;
  1451. s32[x+1] ^= tb;
  1452. s32[x+10] ^= ta;
  1453. s32[x+11] ^= tb;
  1454. s32[x+20] ^= ta;
  1455. s32[x+21] ^= tb;
  1456. s32[x+30] ^= ta;
  1457. s32[x+31] ^= tb;
  1458. s32[x+40] ^= ta;
  1459. s32[x+41] ^= tb;
  1460. }
  1461. }
  1462. /* RhoPi */
  1463. {
  1464. uint32_t t0a,t0b, t1a,t1b;
  1465. t1a = s32[1*2+0];
  1466. t1b = s32[1*2+1];
  1467. #define RhoPi(PI_LANE, ROT_CONST) \
  1468. t0a = s32[PI_LANE*2+0];\
  1469. t0b = s32[PI_LANE*2+1];\
  1470. if (ROT_CONST & 1) {\
  1471. s32[PI_LANE*2+0] = rotl32(t1b, ROT_CONST/2+1);\
  1472. s32[PI_LANE*2+1] = ROT_CONST == 1 ? t1a : rotl32(t1a, ROT_CONST/2+0);\
  1473. } else {\
  1474. s32[PI_LANE*2+0] = rotl32(t1a, ROT_CONST/2);\
  1475. s32[PI_LANE*2+1] = rotl32(t1b, ROT_CONST/2);\
  1476. }\
  1477. t1a = t0a; t1b = t0b;
  1478. RhoPi(10, 1)
  1479. RhoPi( 7, 3)
  1480. RhoPi(11, 6)
  1481. RhoPi(17,10)
  1482. RhoPi(18,15)
  1483. RhoPi( 3,21)
  1484. RhoPi( 5,28)
  1485. RhoPi(16,36)
  1486. RhoPi( 8,45)
  1487. RhoPi(21,55)
  1488. RhoPi(24, 2)
  1489. RhoPi( 4,14)
  1490. RhoPi(15,27)
  1491. RhoPi(23,41)
  1492. RhoPi(19,56)
  1493. RhoPi(13, 8)
  1494. RhoPi(12,25)
  1495. RhoPi( 2,43)
  1496. RhoPi(20,62)
  1497. RhoPi(14,18)
  1498. RhoPi(22,39)
  1499. RhoPi( 9,61)
  1500. RhoPi( 6,20)
  1501. RhoPi( 1,44)
  1502. #undef RhoPi
  1503. }
  1504. /* Chi */
  1505. for (x = 0; x <= 40;) {
  1506. uint32_t BC0, BC1, BC2, BC3, BC4;
  1507. BC0 = s32[x + 0*2];
  1508. BC1 = s32[x + 1*2];
  1509. BC2 = s32[x + 2*2];
  1510. s32[x + 0*2] = BC0 ^ ((~BC1) & BC2);
  1511. BC3 = s32[x + 3*2];
  1512. s32[x + 1*2] = BC1 ^ ((~BC2) & BC3);
  1513. BC4 = s32[x + 4*2];
  1514. s32[x + 2*2] = BC2 ^ ((~BC3) & BC4);
  1515. s32[x + 3*2] = BC3 ^ ((~BC4) & BC0);
  1516. s32[x + 4*2] = BC4 ^ ((~BC0) & BC1);
  1517. x++;
  1518. BC0 = s32[x + 0*2];
  1519. BC1 = s32[x + 1*2];
  1520. BC2 = s32[x + 2*2];
  1521. s32[x + 0*2] = BC0 ^ ((~BC1) & BC2);
  1522. BC3 = s32[x + 3*2];
  1523. s32[x + 1*2] = BC1 ^ ((~BC2) & BC3);
  1524. BC4 = s32[x + 4*2];
  1525. s32[x + 2*2] = BC2 ^ ((~BC3) & BC4);
  1526. s32[x + 3*2] = BC3 ^ ((~BC4) & BC0);
  1527. s32[x + 4*2] = BC4 ^ ((~BC0) & BC1);
  1528. x += 9;
  1529. }
  1530. /* Iota */
  1531. s32[0] ^= IOTA_CONST_0bits & 1;
  1532. IOTA_CONST_0bits >>= 1;
  1533. s32[1] ^= IOTA_CONST_1[round];
  1534. }
  1535. combine_halves(state);
  1536. #else
  1537. /* Native 64-bit algorithm */
  1538. static const uint16_t IOTA_CONST[NROUNDS] ALIGN2 = {
  1539. /* Elements should be 64-bit, but top half is always zero
  1540. * or 0x80000000. We encode 63rd bits in a separate word below.
  1541. * Same is true for 31th bits, which lets us use 16-bit table
  1542. * instead of 64-bit. The speed penalty is lost in the noise.
  1543. */
  1544. 0x0001,
  1545. 0x8082,
  1546. 0x808a,
  1547. 0x8000,
  1548. 0x808b,
  1549. 0x0001,
  1550. 0x8081,
  1551. 0x8009,
  1552. 0x008a,
  1553. 0x0088,
  1554. 0x8009,
  1555. 0x000a,
  1556. 0x808b,
  1557. 0x008b,
  1558. 0x8089,
  1559. 0x8003,
  1560. 0x8002,
  1561. 0x0080,
  1562. 0x800a,
  1563. 0x000a,
  1564. 0x8081,
  1565. 0x8080,
  1566. 0x0001,
  1567. 0x8008,
  1568. };
  1569. /* bit for CONST[0] is in msb: 0011 0011 0000 0111 1101 1101 */
  1570. const uint32_t IOTA_CONST_bit63 = (uint32_t)(0x3307dd00);
  1571. /* bit for CONST[0] is in msb: 0001 0110 0011 1000 0001 1011 */
  1572. const uint32_t IOTA_CONST_bit31 = (uint32_t)(0x16381b00);
  1573. static const uint8_t ROT_CONST[24] ALIGN1 = {
  1574. 1, 3, 6, 10, 15, 21, 28, 36, 45, 55, 2, 14,
  1575. 27, 41, 56, 8, 25, 43, 62, 18, 39, 61, 20, 44,
  1576. };
  1577. static const uint8_t PI_LANE[24] ALIGN1 = {
  1578. 10, 7, 11, 17, 18, 3, 5, 16, 8, 21, 24, 4,
  1579. 15, 23, 19, 13, 12, 2, 20, 14, 22, 9, 6, 1,
  1580. };
  1581. /*static const uint8_t MOD5[10] ALIGN1 = { 0, 1, 2, 3, 4, 0, 1, 2, 3, 4, };*/
  1582. unsigned x;
  1583. unsigned round;
  1584. if (BB_BIG_ENDIAN) {
  1585. for (x = 0; x < 25; x++) {
  1586. state[x] = SWAP_LE64(state[x]);
  1587. }
  1588. }
  1589. for (round = 0; round < NROUNDS; ++round) {
  1590. /* Theta */
  1591. {
  1592. uint64_t BC[10];
  1593. for (x = 0; x < 5; ++x) {
  1594. BC[x + 5] = BC[x] = state[x]
  1595. ^ state[x + 5] ^ state[x + 10]
  1596. ^ state[x + 15] ^ state[x + 20];
  1597. }
  1598. /* Using 2x5 vector above eliminates the need to use
  1599. * BC[MOD5[x+N]] trick below to fetch BC[(x+N) % 5],
  1600. * and the code is a bit _smaller_.
  1601. */
  1602. for (x = 0; x < 5; ++x) {
  1603. uint64_t temp = BC[x + 4] ^ rotl64(BC[x + 1], 1);
  1604. state[x] ^= temp;
  1605. state[x + 5] ^= temp;
  1606. state[x + 10] ^= temp;
  1607. state[x + 15] ^= temp;
  1608. state[x + 20] ^= temp;
  1609. }
  1610. }
  1611. /* Rho Pi */
  1612. if (SHA3_SMALL) {
  1613. uint64_t t1 = state[1];
  1614. for (x = 0; x < 24; ++x) {
  1615. uint64_t t0 = state[PI_LANE[x]];
  1616. state[PI_LANE[x]] = rotl64(t1, ROT_CONST[x]);
  1617. t1 = t0;
  1618. }
  1619. } else {
  1620. /* Especially large benefit for 32-bit arch (75% faster):
  1621. * 64-bit rotations by non-constant usually are SLOW on those.
  1622. * We resort to unrolling here.
  1623. * This optimizes out PI_LANE[] and ROT_CONST[],
  1624. * but generates 300-500 more bytes of code.
  1625. */
  1626. uint64_t t0;
  1627. uint64_t t1 = state[1];
  1628. #define RhoPi_twice(x) \
  1629. t0 = state[PI_LANE[x ]]; \
  1630. state[PI_LANE[x ]] = rotl64(t1, ROT_CONST[x ]); \
  1631. t1 = state[PI_LANE[x+1]]; \
  1632. state[PI_LANE[x+1]] = rotl64(t0, ROT_CONST[x+1]);
  1633. RhoPi_twice(0); RhoPi_twice(2);
  1634. RhoPi_twice(4); RhoPi_twice(6);
  1635. RhoPi_twice(8); RhoPi_twice(10);
  1636. RhoPi_twice(12); RhoPi_twice(14);
  1637. RhoPi_twice(16); RhoPi_twice(18);
  1638. RhoPi_twice(20); RhoPi_twice(22);
  1639. #undef RhoPi_twice
  1640. }
  1641. /* Chi */
  1642. # if LONG_MAX > 0x7fffffff
  1643. for (x = 0; x <= 20; x += 5) {
  1644. uint64_t BC0, BC1, BC2, BC3, BC4;
  1645. BC0 = state[x + 0];
  1646. BC1 = state[x + 1];
  1647. BC2 = state[x + 2];
  1648. state[x + 0] = BC0 ^ ((~BC1) & BC2);
  1649. BC3 = state[x + 3];
  1650. state[x + 1] = BC1 ^ ((~BC2) & BC3);
  1651. BC4 = state[x + 4];
  1652. state[x + 2] = BC2 ^ ((~BC3) & BC4);
  1653. state[x + 3] = BC3 ^ ((~BC4) & BC0);
  1654. state[x + 4] = BC4 ^ ((~BC0) & BC1);
  1655. }
  1656. # else
  1657. /* Reduced register pressure version
  1658. * for register-starved 32-bit arches
  1659. * (i386: -95 bytes, and it is _faster_)
  1660. */
  1661. for (x = 0; x <= 40;) {
  1662. uint32_t BC0, BC1, BC2, BC3, BC4;
  1663. uint32_t *const s32 = (uint32_t*)state;
  1664. # if SHA3_SMALL
  1665. do_half:
  1666. # endif
  1667. BC0 = s32[x + 0*2];
  1668. BC1 = s32[x + 1*2];
  1669. BC2 = s32[x + 2*2];
  1670. s32[x + 0*2] = BC0 ^ ((~BC1) & BC2);
  1671. BC3 = s32[x + 3*2];
  1672. s32[x + 1*2] = BC1 ^ ((~BC2) & BC3);
  1673. BC4 = s32[x + 4*2];
  1674. s32[x + 2*2] = BC2 ^ ((~BC3) & BC4);
  1675. s32[x + 3*2] = BC3 ^ ((~BC4) & BC0);
  1676. s32[x + 4*2] = BC4 ^ ((~BC0) & BC1);
  1677. x++;
  1678. # if SHA3_SMALL
  1679. if (x & 1)
  1680. goto do_half;
  1681. x += 8;
  1682. # else
  1683. BC0 = s32[x + 0*2];
  1684. BC1 = s32[x + 1*2];
  1685. BC2 = s32[x + 2*2];
  1686. s32[x + 0*2] = BC0 ^ ((~BC1) & BC2);
  1687. BC3 = s32[x + 3*2];
  1688. s32[x + 1*2] = BC1 ^ ((~BC2) & BC3);
  1689. BC4 = s32[x + 4*2];
  1690. s32[x + 2*2] = BC2 ^ ((~BC3) & BC4);
  1691. s32[x + 3*2] = BC3 ^ ((~BC4) & BC0);
  1692. s32[x + 4*2] = BC4 ^ ((~BC0) & BC1);
  1693. x += 9;
  1694. # endif
  1695. }
  1696. # endif /* long is 32-bit */
  1697. /* Iota */
  1698. state[0] ^= IOTA_CONST[round]
  1699. | (uint32_t)((IOTA_CONST_bit31 << round) & 0x80000000)
  1700. | (uint64_t)((IOTA_CONST_bit63 << round) & 0x80000000) << 32;
  1701. }
  1702. if (BB_BIG_ENDIAN) {
  1703. for (x = 0; x < 25; x++) {
  1704. state[x] = SWAP_LE64(state[x]);
  1705. }
  1706. }
  1707. #endif
  1708. }
  1709. void FAST_FUNC sha3_begin(sha3_ctx_t *ctx)
  1710. {
  1711. memset(ctx, 0, sizeof(*ctx));
  1712. /* SHA3-512, user can override */
  1713. ctx->input_block_bytes = (1600 - 512*2) / 8; /* 72 bytes */
  1714. }
  1715. void FAST_FUNC sha3_hash(sha3_ctx_t *ctx, const void *buffer, size_t len)
  1716. {
  1717. #if SHA3_SMALL
  1718. const uint8_t *data = buffer;
  1719. unsigned bufpos = ctx->bytes_queued;
  1720. while (1) {
  1721. unsigned remaining = ctx->input_block_bytes - bufpos;
  1722. if (remaining > len)
  1723. remaining = len;
  1724. len -= remaining;
  1725. /* XOR data into buffer */
  1726. while (remaining != 0) {
  1727. uint8_t *buf = (uint8_t*)ctx->state;
  1728. buf[bufpos] ^= *data++;
  1729. bufpos++;
  1730. remaining--;
  1731. }
  1732. /* Clever way to do "if (bufpos != N) break; ... ; bufpos = 0;" */
  1733. bufpos -= ctx->input_block_bytes;
  1734. if (bufpos != 0)
  1735. break;
  1736. /* Buffer is filled up, process it */
  1737. sha3_process_block72(ctx->state);
  1738. /*bufpos = 0; - already is */
  1739. }
  1740. ctx->bytes_queued = bufpos + ctx->input_block_bytes;
  1741. #else
  1742. /* +50 bytes code size, but a bit faster because of long-sized XORs */
  1743. const uint8_t *data = buffer;
  1744. unsigned bufpos = ctx->bytes_queued;
  1745. unsigned iblk_bytes = ctx->input_block_bytes;
  1746. /* If already data in queue, continue queuing first */
  1747. if (bufpos != 0) {
  1748. while (len != 0) {
  1749. uint8_t *buf = (uint8_t*)ctx->state;
  1750. buf[bufpos] ^= *data++;
  1751. len--;
  1752. bufpos++;
  1753. if (bufpos == iblk_bytes) {
  1754. bufpos = 0;
  1755. goto do_block;
  1756. }
  1757. }
  1758. }
  1759. /* Absorb complete blocks */
  1760. while (len >= iblk_bytes) {
  1761. /* XOR data onto beginning of state[].
  1762. * We try to be efficient - operate one word at a time, not byte.
  1763. * Careful wrt unaligned access: can't just use "*(long*)data"!
  1764. */
  1765. unsigned count = iblk_bytes / sizeof(long);
  1766. long *buf = (long*)ctx->state;
  1767. do {
  1768. long v;
  1769. move_from_unaligned_long(v, (long*)data);
  1770. *buf++ ^= v;
  1771. data += sizeof(long);
  1772. } while (--count);
  1773. len -= iblk_bytes;
  1774. do_block:
  1775. sha3_process_block72(ctx->state);
  1776. }
  1777. /* Queue remaining data bytes */
  1778. while (len != 0) {
  1779. uint8_t *buf = (uint8_t*)ctx->state;
  1780. buf[bufpos] ^= *data++;
  1781. bufpos++;
  1782. len--;
  1783. }
  1784. ctx->bytes_queued = bufpos;
  1785. #endif
  1786. }
  1787. unsigned FAST_FUNC sha3_end(sha3_ctx_t *ctx, void *resbuf)
  1788. {
  1789. /* Padding */
  1790. uint8_t *buf = (uint8_t*)ctx->state;
  1791. /*
  1792. * Keccak block padding is: add 1 bit after last bit of input,
  1793. * then add zero bits until the end of block, and add the last 1 bit
  1794. * (the last bit in the block) - the "10*1" pattern.
  1795. * SHA3 standard appends additional two bits, 01, before that padding:
  1796. *
  1797. * SHA3-224(M) = KECCAK[448](M||01, 224)
  1798. * SHA3-256(M) = KECCAK[512](M||01, 256)
  1799. * SHA3-384(M) = KECCAK[768](M||01, 384)
  1800. * SHA3-512(M) = KECCAK[1024](M||01, 512)
  1801. * (M is the input, || is bit concatenation)
  1802. *
  1803. * The 6 below contains 01 "SHA3" bits and the first 1 "Keccak" bit:
  1804. */
  1805. buf[ctx->bytes_queued] ^= 6; /* bit pattern 00000110 */
  1806. buf[ctx->input_block_bytes - 1] ^= 0x80;
  1807. sha3_process_block72(ctx->state);
  1808. /* Output */
  1809. memcpy(resbuf, ctx->state, 64);
  1810. return 64;
  1811. }