armv8-sha256.c 49 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508
  1. /* armv8-sha256.c
  2. *
  3. * Copyright (C) 2006-2020 wolfSSL Inc.
  4. *
  5. * This file is part of wolfSSL.
  6. *
  7. * wolfSSL is free software; you can redistribute it and/or modify
  8. * it under the terms of the GNU General Public License as published by
  9. * the Free Software Foundation; either version 2 of the License, or
  10. * (at your option) any later version.
  11. *
  12. * wolfSSL is distributed in the hope that it will be useful,
  13. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  14. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  15. * GNU General Public License for more details.
  16. *
  17. * You should have received a copy of the GNU General Public License
  18. * along with this program; if not, write to the Free Software
  19. * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA
  20. */
  21. #ifdef HAVE_CONFIG_H
  22. #include <config.h>
  23. #endif
  24. #include <wolfssl/wolfcrypt/settings.h>
  25. #ifdef WOLFSSL_ARMASM
  26. #if !defined(NO_SHA256) || defined(WOLFSSL_SHA224)
  27. #include <wolfssl/wolfcrypt/sha256.h>
  28. #include <wolfssl/wolfcrypt/logging.h>
  29. #include <wolfssl/wolfcrypt/error-crypt.h>
  30. #ifdef NO_INLINE
  31. #include <wolfssl/wolfcrypt/misc.h>
  32. #else
  33. #define WOLFSSL_MISC_INCLUDED
  34. #include <wolfcrypt/src/misc.c>
  35. #endif
  36. static const ALIGN32 word32 K[64] = {
  37. 0x428A2F98L, 0x71374491L, 0xB5C0FBCFL, 0xE9B5DBA5L, 0x3956C25BL,
  38. 0x59F111F1L, 0x923F82A4L, 0xAB1C5ED5L, 0xD807AA98L, 0x12835B01L,
  39. 0x243185BEL, 0x550C7DC3L, 0x72BE5D74L, 0x80DEB1FEL, 0x9BDC06A7L,
  40. 0xC19BF174L, 0xE49B69C1L, 0xEFBE4786L, 0x0FC19DC6L, 0x240CA1CCL,
  41. 0x2DE92C6FL, 0x4A7484AAL, 0x5CB0A9DCL, 0x76F988DAL, 0x983E5152L,
  42. 0xA831C66DL, 0xB00327C8L, 0xBF597FC7L, 0xC6E00BF3L, 0xD5A79147L,
  43. 0x06CA6351L, 0x14292967L, 0x27B70A85L, 0x2E1B2138L, 0x4D2C6DFCL,
  44. 0x53380D13L, 0x650A7354L, 0x766A0ABBL, 0x81C2C92EL, 0x92722C85L,
  45. 0xA2BFE8A1L, 0xA81A664BL, 0xC24B8B70L, 0xC76C51A3L, 0xD192E819L,
  46. 0xD6990624L, 0xF40E3585L, 0x106AA070L, 0x19A4C116L, 0x1E376C08L,
  47. 0x2748774CL, 0x34B0BCB5L, 0x391C0CB3L, 0x4ED8AA4AL, 0x5B9CCA4FL,
  48. 0x682E6FF3L, 0x748F82EEL, 0x78A5636FL, 0x84C87814L, 0x8CC70208L,
  49. 0x90BEFFFAL, 0xA4506CEBL, 0xBEF9A3F7L, 0xC67178F2L
  50. };
  51. static int InitSha256(wc_Sha256* sha256)
  52. {
  53. int ret = 0;
  54. if (sha256 == NULL) {
  55. return BAD_FUNC_ARG;
  56. }
  57. sha256->digest[0] = 0x6A09E667L;
  58. sha256->digest[1] = 0xBB67AE85L;
  59. sha256->digest[2] = 0x3C6EF372L;
  60. sha256->digest[3] = 0xA54FF53AL;
  61. sha256->digest[4] = 0x510E527FL;
  62. sha256->digest[5] = 0x9B05688CL;
  63. sha256->digest[6] = 0x1F83D9ABL;
  64. sha256->digest[7] = 0x5BE0CD19L;
  65. sha256->buffLen = 0;
  66. sha256->loLen = 0;
  67. sha256->hiLen = 0;
  68. return ret;
  69. }
  70. static WC_INLINE void AddLength(wc_Sha256* sha256, word32 len)
  71. {
  72. word32 tmp = sha256->loLen;
  73. if ((sha256->loLen += len) < tmp)
  74. sha256->hiLen++; /* carry low to high */
  75. }
  76. #ifdef __aarch64__
  77. /* ARMv8 hardware acceleration */
  78. static WC_INLINE int Sha256Update(wc_Sha256* sha256, const byte* data, word32 len)
  79. {
  80. word32 add;
  81. word32 numBlocks;
  82. /* only perform actions if a buffer is passed in */
  83. if (len > 0) {
  84. /* fill leftover buffer with data */
  85. add = min(len, WC_SHA256_BLOCK_SIZE - sha256->buffLen);
  86. XMEMCPY((byte*)(sha256->buffer) + sha256->buffLen, data, add);
  87. sha256->buffLen += add;
  88. data += add;
  89. len -= add;
  90. /* number of blocks in a row to complete */
  91. numBlocks = (len + sha256->buffLen)/WC_SHA256_BLOCK_SIZE;
  92. if (numBlocks > 0) {
  93. word32* k = (word32*)K;
  94. /* get leftover amount after blocks */
  95. add = (len + sha256->buffLen) - numBlocks * WC_SHA256_BLOCK_SIZE;
  96. __asm__ volatile (
  97. "#load leftover data\n"
  98. "LD1 {v0.2d-v3.2d}, %[buffer] \n"
  99. "#load current digest\n"
  100. "LD1 {v12.2d-v13.2d}, %[digest] \n"
  101. "MOV w8, %w[blocks] \n"
  102. "REV32 v0.16b, v0.16b \n"
  103. "REV32 v1.16b, v1.16b \n"
  104. "REV32 v2.16b, v2.16b \n"
  105. "REV32 v3.16b, v3.16b \n"
  106. "#load K values in \n"
  107. "LD1 {v16.4s-v19.4s}, [%[k]], #64 \n"
  108. "LD1 {v20.4s-v23.4s}, [%[k]], #64 \n"
  109. "MOV v14.16b, v12.16b \n" /* store digest for add at the end */
  110. "MOV v15.16b, v13.16b \n"
  111. "LD1 {v24.4s-v27.4s}, [%[k]], #64 \n"
  112. "LD1 {v28.4s-v31.4s}, [%[k]], #64 \n"
  113. /* beginning of SHA256 block operation */
  114. "1:\n"
  115. /* Round 1 */
  116. "MOV v4.16b, v0.16b \n"
  117. "ADD v0.4s, v0.4s, v16.4s \n"
  118. "MOV v11.16b, v12.16b \n"
  119. "SHA256H q12, q13, v0.4s \n"
  120. "SHA256H2 q13, q11, v0.4s \n"
  121. /* Round 2 */
  122. "SHA256SU0 v4.4s, v1.4s \n"
  123. "ADD v0.4s, v1.4s, v17.4s \n"
  124. "MOV v11.16b, v12.16b \n"
  125. "SHA256SU1 v4.4s, v2.4s, v3.4s \n"
  126. "SHA256H q12, q13, v0.4s \n"
  127. "SHA256H2 q13, q11, v0.4s \n"
  128. /* Round 3 */
  129. "SHA256SU0 v1.4s, v2.4s \n"
  130. "ADD v0.4s, v2.4s, v18.4s \n"
  131. "MOV v11.16b, v12.16b \n"
  132. "SHA256SU1 v1.4s, v3.4s, v4.4s \n"
  133. "SHA256H q12, q13, v0.4s \n"
  134. "SHA256H2 q13, q11, v0.4s \n"
  135. /* Round 4 */
  136. "SHA256SU0 v2.4s, v3.4s \n"
  137. "ADD v0.4s, v3.4s, v19.4s \n"
  138. "MOV v11.16b, v12.16b \n"
  139. "SHA256SU1 v2.4s, v4.4s, v1.4s \n"
  140. "SHA256H q12, q13, v0.4s \n"
  141. "SHA256H2 q13, q11, v0.4s \n"
  142. /* Round 5 */
  143. "SHA256SU0 v3.4s, v4.4s \n"
  144. "ADD v0.4s, v4.4s, v20.4s \n"
  145. "MOV v11.16b, v12.16b \n"
  146. "SHA256SU1 v3.4s, v1.4s, v2.4s \n"
  147. "SHA256H q12, q13, v0.4s \n"
  148. "SHA256H2 q13, q11, v0.4s \n"
  149. /* Round 6 */
  150. "SHA256SU0 v4.4s, v1.4s \n"
  151. "ADD v0.4s, v1.4s, v21.4s \n"
  152. "MOV v11.16b, v12.16b \n"
  153. "SHA256SU1 v4.4s, v2.4s, v3.4s \n"
  154. "SHA256H q12, q13, v0.4s \n"
  155. "SHA256H2 q13, q11, v0.4s \n"
  156. /* Round 7 */
  157. "SHA256SU0 v1.4s, v2.4s \n"
  158. "ADD v0.4s, v2.4s, v22.4s \n"
  159. "MOV v11.16b, v12.16b \n"
  160. "SHA256SU1 v1.4s, v3.4s, v4.4s \n"
  161. "SHA256H q12, q13, v0.4s \n"
  162. "SHA256H2 q13, q11, v0.4s \n"
  163. /* Round 8 */
  164. "SHA256SU0 v2.4s, v3.4s \n"
  165. "ADD v0.4s, v3.4s, v23.4s \n"
  166. "MOV v11.16b, v12.16b \n"
  167. "SHA256SU1 v2.4s, v4.4s, v1.4s \n"
  168. "SHA256H q12, q13, v0.4s \n"
  169. "SHA256H2 q13, q11, v0.4s \n"
  170. /* Round 9 */
  171. "SHA256SU0 v3.4s, v4.4s \n"
  172. "ADD v0.4s, v4.4s, v24.4s \n"
  173. "MOV v11.16b, v12.16b \n"
  174. "SHA256SU1 v3.4s, v1.4s, v2.4s \n"
  175. "SHA256H q12, q13, v0.4s \n"
  176. "SHA256H2 q13, q11, v0.4s \n"
  177. /* Round 10 */
  178. "SHA256SU0 v4.4s, v1.4s \n"
  179. "ADD v0.4s, v1.4s, v25.4s \n"
  180. "MOV v11.16b, v12.16b \n"
  181. "SHA256SU1 v4.4s, v2.4s, v3.4s \n"
  182. "SHA256H q12, q13, v0.4s \n"
  183. "SHA256H2 q13, q11, v0.4s \n"
  184. /* Round 11 */
  185. "SHA256SU0 v1.4s, v2.4s \n"
  186. "ADD v0.4s, v2.4s, v26.4s \n"
  187. "MOV v11.16b, v12.16b \n"
  188. "SHA256SU1 v1.4s, v3.4s, v4.4s \n"
  189. "SHA256H q12, q13, v0.4s \n"
  190. "SHA256H2 q13, q11, v0.4s \n"
  191. /* Round 12 */
  192. "SHA256SU0 v2.4s, v3.4s \n"
  193. "ADD v0.4s, v3.4s, v27.4s \n"
  194. "MOV v11.16b, v12.16b \n"
  195. "SHA256SU1 v2.4s, v4.4s, v1.4s \n"
  196. "SHA256H q12, q13, v0.4s \n"
  197. "SHA256H2 q13, q11, v0.4s \n"
  198. /* Round 13 */
  199. "SHA256SU0 v3.4s, v4.4s \n"
  200. "ADD v0.4s, v4.4s, v28.4s \n"
  201. "MOV v11.16b, v12.16b \n"
  202. "SHA256SU1 v3.4s, v1.4s, v2.4s \n"
  203. "SHA256H q12, q13, v0.4s \n"
  204. "SHA256H2 q13, q11, v0.4s \n"
  205. /* Round 14 */
  206. "ADD v0.4s, v1.4s, v29.4s \n"
  207. "MOV v11.16b, v12.16b \n"
  208. "SHA256H q12, q13, v0.4s \n"
  209. "SHA256H2 q13, q11, v0.4s \n"
  210. /* Round 15 */
  211. "ADD v0.4s, v2.4s, v30.4s \n"
  212. "MOV v11.16b, v12.16b \n"
  213. "SHA256H q12, q13, v0.4s \n"
  214. "SHA256H2 q13, q11, v0.4s \n"
  215. /* Round 16 */
  216. "ADD v0.4s, v3.4s, v31.4s \n"
  217. "MOV v11.16b, v12.16b \n"
  218. "SHA256H q12, q13, v0.4s \n"
  219. "SHA256H2 q13, q11, v0.4s \n"
  220. "#Add working vars back into digest state \n"
  221. "SUB w8, w8, #1 \n"
  222. "ADD v12.4s, v12.4s, v14.4s \n"
  223. "ADD v13.4s, v13.4s, v15.4s \n"
  224. "#check if more blocks should be done\n"
  225. "CBZ w8, 2f \n"
  226. "#load in message and schedule updates \n"
  227. "LD1 {v0.2d-v3.2d}, [%[dataIn]], #64 \n"
  228. "MOV v14.16b, v12.16b \n"
  229. "MOV v15.16b, v13.16b \n"
  230. "REV32 v0.16b, v0.16b \n"
  231. "REV32 v1.16b, v1.16b \n"
  232. "REV32 v2.16b, v2.16b \n"
  233. "REV32 v3.16b, v3.16b \n"
  234. "B 1b \n" /* do another block */
  235. "2:\n"
  236. "STP q12, q13, %[out] \n"
  237. : [out] "=m" (sha256->digest), "=m" (sha256->buffer), "=r" (numBlocks),
  238. "=r" (data), "=r" (k)
  239. : [k] "4" (k), [digest] "m" (sha256->digest), [buffer] "m" (sha256->buffer),
  240. [blocks] "2" (numBlocks), [dataIn] "3" (data)
  241. : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7",
  242. "v8", "v9", "v10", "v11", "v12", "v13", "v14",
  243. "v15", "v16", "v17", "v18", "v19", "v20", "v21",
  244. "v22", "v23", "v24", "v25", "v26", "v27", "v28",
  245. "v29", "v30", "v31", "w8"
  246. );
  247. AddLength(sha256, WC_SHA256_BLOCK_SIZE * numBlocks);
  248. /* copy over any remaining data leftover */
  249. XMEMCPY(sha256->buffer, data, add);
  250. sha256->buffLen = add;
  251. }
  252. }
  253. /* account for possibility of not used if len = 0 */
  254. (void)add;
  255. (void)numBlocks;
  256. return 0;
  257. }
  258. static WC_INLINE int Sha256Final(wc_Sha256* sha256, byte* hash)
  259. {
  260. byte* local;
  261. local = (byte*)sha256->buffer;
  262. AddLength(sha256, sha256->buffLen); /* before adding pads */
  263. local[sha256->buffLen++] = 0x80; /* add 1 */
  264. /* pad with zeros */
  265. if (sha256->buffLen > WC_SHA256_PAD_SIZE) {
  266. XMEMSET(&local[sha256->buffLen], 0, WC_SHA256_BLOCK_SIZE - sha256->buffLen);
  267. sha256->buffLen += WC_SHA256_BLOCK_SIZE - sha256->buffLen;
  268. __asm__ volatile (
  269. "LD1 {v4.2d-v7.2d}, %[buffer] \n"
  270. "MOV v0.16b, v4.16b \n"
  271. "MOV v1.16b, v5.16b \n"
  272. "REV32 v0.16b, v0.16b \n"
  273. "REV32 v1.16b, v1.16b \n"
  274. "MOV v2.16b, v6.16b \n"
  275. "MOV v3.16b, v7.16b \n"
  276. "REV32 v2.16b, v2.16b \n"
  277. "REV32 v3.16b, v3.16b \n"
  278. "MOV v4.16b, v0.16b \n"
  279. "MOV v5.16b, v1.16b \n"
  280. "LD1 {v20.2d-v21.2d}, %[digest] \n"
  281. "#SHA256 operation on updated message \n"
  282. "MOV v16.16b, v20.16b \n"
  283. "MOV v17.16b, v21.16b \n"
  284. "LD1 {v22.16b-v25.16b}, [%[k]], #64 \n"
  285. "SHA256SU0 v4.4s, v1.4s \n"
  286. "ADD v0.4s, v0.4s, v22.4s \n"
  287. "MOV v6.16b, v2.16b \n"
  288. "MOV v18.16b, v16.16b \n"
  289. "SHA256SU1 v4.4s, v2.4s, v3.4s \n"
  290. "SHA256H q16, q17, v0.4s \n"
  291. "SHA256H2 q17, q18, v0.4s \n"
  292. "SHA256SU0 v5.4s, v2.4s \n"
  293. "ADD v1.4s, v1.4s, v23.4s \n"
  294. "MOV v18.16b, v16.16b \n"
  295. "MOV v7.16b, v3.16b \n"
  296. "SHA256SU1 v5.4s, v3.4s, v4.4s \n"
  297. "SHA256H q16, q17, v1.4s \n"
  298. "SHA256H2 q17, q18, v1.4s \n"
  299. "SHA256SU0 v6.4s, v3.4s \n"
  300. "ADD v2.4s, v2.4s, v24.4s \n"
  301. "MOV v18.16b, v16.16b \n"
  302. "MOV v8.16b, v4.16b \n"
  303. "SHA256SU1 v6.4s, v4.4s, v5.4s \n"
  304. "SHA256H q16, q17, v2.4s \n"
  305. "SHA256H2 q17, q18, v2.4s \n"
  306. "SHA256SU0 v7.4s, v4.4s \n"
  307. "ADD v3.4s, v3.4s, v25.4s \n"
  308. "MOV v18.16b, v16.16b \n"
  309. "MOV v9.16b, v5.16b \n"
  310. "SHA256SU1 v7.4s, v5.4s, v6.4s \n"
  311. "SHA256H q16, q17, v3.4s \n"
  312. "SHA256H2 q17, q18, v3.4s \n"
  313. "LD1 {v22.16b-v25.16b}, [%[k]], #64 \n"
  314. "SHA256SU0 v8.4s, v5.4s \n"
  315. "ADD v4.4s, v4.4s, v22.4s \n"
  316. "MOV v18.16b, v16.16b \n"
  317. "MOV v10.16b, v6.16b \n"
  318. "SHA256SU1 v8.4s, v6.4s, v7.4s \n"
  319. "SHA256H q16, q17, v4.4s \n"
  320. "SHA256H2 q17, q18, v4.4s \n"
  321. "SHA256SU0 v9.4s, v6.4s \n"
  322. "ADD v5.4s, v5.4s, v23.4s \n"
  323. "MOV v18.16b, v16.16b \n"
  324. "MOV v11.16b, v7.16b \n"
  325. "SHA256SU1 v9.4s, v7.4s, v8.4s \n"
  326. "SHA256H q16, q17, v5.4s \n"
  327. "SHA256H2 q17, q18, v5.4s \n"
  328. "SHA256SU0 v10.4s, v7.4s \n"
  329. "ADD v6.4s, v6.4s, v24.4s \n"
  330. "MOV v18.16b, v16.16b \n"
  331. "MOV v12.16b, v8.16b \n"
  332. "SHA256SU1 v10.4s, v8.4s, v9.4s \n"
  333. "SHA256H q16, q17, v6.4s \n"
  334. "SHA256H2 q17, q18, v6.4s \n"
  335. "SHA256SU0 v11.4s, v8.4s \n"
  336. "ADD v7.4s, v7.4s, v25.4s \n"
  337. "MOV v18.16b, v16.16b \n"
  338. "MOV v13.16b, v9.16b \n"
  339. "SHA256SU1 v11.4s, v9.4s, v10.4s \n"
  340. "SHA256H q16, q17, v7.4s \n"
  341. "SHA256H2 q17, q18, v7.4s \n"
  342. "LD1 {v22.16b-v25.16b}, [%[k]], #64 \n"
  343. "SHA256SU0 v12.4s, v9.4s \n"
  344. "ADD v8.4s, v8.4s, v22.4s \n"
  345. "MOV v18.16b, v16.16b \n"
  346. "MOV v14.16b, v10.16b \n"
  347. "SHA256SU1 v12.4s, v10.4s, v11.4s \n"
  348. "SHA256H q16, q17, v8.4s \n"
  349. "SHA256H2 q17, q18, v8.4s \n"
  350. "SHA256SU0 v13.4s, v10.4s \n"
  351. "ADD v9.4s, v9.4s, v23.4s \n"
  352. "MOV v18.16b, v16.16b \n"
  353. "MOV v15.16b, v11.16b \n"
  354. "SHA256SU1 v13.4s, v11.4s, v12.4s \n"
  355. "SHA256H q16, q17, v9.4s \n"
  356. "SHA256H2 q17, q18, v9.4s \n"
  357. "SHA256SU0 v14.4s, v11.4s \n"
  358. "ADD v10.4s, v10.4s, v24.4s \n"
  359. "MOV v18.16b, v16.16b \n"
  360. "SHA256SU1 v14.4s, v12.4s, v13.4s \n"
  361. "SHA256H q16, q17, v10.4s \n"
  362. "SHA256H2 q17, q18, v10.4s \n"
  363. "SHA256SU0 v15.4s, v12.4s \n"
  364. "ADD v11.4s, v11.4s, v25.4s \n"
  365. "MOV v18.16b, v16.16b \n"
  366. "SHA256SU1 v15.4s, v13.4s, v14.4s \n"
  367. "SHA256H q16, q17, v11.4s \n"
  368. "SHA256H2 q17, q18, v11.4s \n"
  369. "LD1 {v22.16b-v25.16b}, [%[k]] \n"
  370. "ADD v12.4s, v12.4s, v22.4s \n"
  371. "MOV v18.16b, v16.16b \n"
  372. "SHA256H q16, q17, v12.4s \n"
  373. "SHA256H2 q17, q18, v12.4s \n"
  374. "ADD v13.4s, v13.4s, v23.4s \n"
  375. "MOV v18.16b, v16.16b \n"
  376. "SHA256H q16, q17, v13.4s \n"
  377. "SHA256H2 q17, q18, v13.4s \n"
  378. "ADD v14.4s, v14.4s, v24.4s \n"
  379. "MOV v18.16b, v16.16b \n"
  380. "SHA256H q16, q17, v14.4s \n"
  381. "SHA256H2 q17, q18, v14.4s \n"
  382. "ADD v15.4s, v15.4s, v25.4s \n"
  383. "MOV v18.16b, v16.16b \n"
  384. "SHA256H q16, q17, v15.4s \n"
  385. "SHA256H2 q17, q18, v15.4s \n"
  386. "#Add working vars back into digest state \n"
  387. "ADD v16.4s, v16.4s, v20.4s \n"
  388. "ADD v17.4s, v17.4s, v21.4s \n"
  389. "STP q16, q17, %[out] \n"
  390. : [out] "=m" (sha256->digest)
  391. : [k] "r" (K), [digest] "m" (sha256->digest),
  392. [buffer] "m" (sha256->buffer)
  393. : "cc", "memory", "v0", "v1", "v2", "v3", "v8", "v9", "v10", "v11"
  394. , "v12", "v13", "v14", "v15", "v16", "v17", "v18"
  395. , "v19", "v20", "v21", "v22", "v23", "v24", "v25"
  396. );
  397. sha256->buffLen = 0;
  398. }
  399. XMEMSET(&local[sha256->buffLen], 0, WC_SHA256_PAD_SIZE - sha256->buffLen);
  400. /* put lengths in bits */
  401. sha256->hiLen = (sha256->loLen >> (8*sizeof(sha256->loLen) - 3)) +
  402. (sha256->hiLen << 3);
  403. sha256->loLen = sha256->loLen << 3;
  404. /* store lengths */
  405. #if defined(LITTLE_ENDIAN_ORDER)
  406. __asm__ volatile (
  407. "LD1 {v0.2d-v3.2d}, %[in] \n"
  408. "REV32 v0.16b, v0.16b \n"
  409. "REV32 v1.16b, v1.16b \n"
  410. "REV32 v2.16b, v2.16b \n"
  411. "REV32 v3.16b, v3.16b \n"
  412. "ST1 {v0.2d-v3.2d}, %[out] \n"
  413. : [out] "=m" (sha256->buffer)
  414. : [in] "m" (sha256->buffer)
  415. : "cc", "memory", "v0", "v1", "v2", "v3"
  416. );
  417. #endif
  418. /* ! length ordering dependent on digest endian type ! */
  419. XMEMCPY(&local[WC_SHA256_PAD_SIZE], &sha256->hiLen, sizeof(word32));
  420. XMEMCPY(&local[WC_SHA256_PAD_SIZE + sizeof(word32)], &sha256->loLen,
  421. sizeof(word32));
  422. __asm__ volatile (
  423. "#load in message and schedule updates \n"
  424. "LD1 {v4.2d-v7.2d}, %[buffer] \n"
  425. "MOV v0.16b, v4.16b \n"
  426. "MOV v1.16b, v5.16b \n"
  427. "MOV v2.16b, v6.16b \n"
  428. "MOV v3.16b, v7.16b \n"
  429. "LD1 {v20.2d-v21.2d}, %[digest] \n"
  430. "MOV v16.16b, v20.16b \n"
  431. "MOV v17.16b, v21.16b \n"
  432. "LD1 {v22.16b-v25.16b}, [%[k]], #64 \n"
  433. "SHA256SU0 v4.4s, v1.4s \n"
  434. "ADD v0.4s, v0.4s, v22.4s \n"
  435. "MOV v6.16b, v2.16b \n"
  436. "MOV v18.16b, v16.16b \n"
  437. "SHA256SU1 v4.4s, v2.4s, v3.4s \n"
  438. "SHA256H q16, q17, v0.4s \n"
  439. "SHA256H2 q17, q18, v0.4s \n"
  440. "SHA256SU0 v5.4s, v2.4s \n"
  441. "ADD v1.4s, v1.4s, v23.4s \n"
  442. "MOV v7.16b, v3.16b \n"
  443. "MOV v18.16b, v16.16b \n"
  444. "SHA256SU1 v5.4s, v3.4s, v4.4s \n"
  445. "SHA256H q16, q17, v1.4s \n"
  446. "SHA256H2 q17, q18, v1.4s \n"
  447. "SHA256SU0 v6.4s, v3.4s \n"
  448. "ADD v2.4s, v2.4s, v24.4s \n"
  449. "MOV v18.16b, v16.16b \n"
  450. "MOV v8.16b, v4.16b \n"
  451. "SHA256SU1 v6.4s, v4.4s, v5.4s \n"
  452. "SHA256H q16, q17, v2.4s \n"
  453. "SHA256H2 q17, q18, v2.4s \n"
  454. "SHA256SU0 v7.4s, v4.4s \n"
  455. "ADD v3.4s, v3.4s, v25.4s \n"
  456. "MOV v18.16b, v16.16b \n"
  457. "MOV v9.16b, v5.16b \n"
  458. "SHA256SU1 v7.4s, v5.4s, v6.4s \n"
  459. "SHA256H q16, q17, v3.4s \n"
  460. "SHA256H2 q17, q18, v3.4s \n"
  461. "LD1 {v22.16b-v25.16b}, [%[k]], #64 \n"
  462. "SHA256SU0 v8.4s, v5.4s \n"
  463. "ADD v4.4s, v4.4s, v22.4s \n"
  464. "MOV v18.16b, v16.16b \n"
  465. "MOV v10.16b, v6.16b \n"
  466. "SHA256SU1 v8.4s, v6.4s, v7.4s \n"
  467. "SHA256H q16, q17, v4.4s \n"
  468. "SHA256H2 q17, q18, v4.4s \n"
  469. "SHA256SU0 v9.4s, v6.4s \n"
  470. "ADD v5.4s, v5.4s, v23.4s \n"
  471. "MOV v18.16b, v16.16b \n"
  472. "MOV v11.16b, v7.16b \n"
  473. "SHA256SU1 v9.4s, v7.4s, v8.4s \n"
  474. "SHA256H q16, q17, v5.4s \n"
  475. "SHA256H2 q17, q18, v5.4s \n"
  476. "SHA256SU0 v10.4s, v7.4s \n"
  477. "ADD v6.4s, v6.4s, v24.4s \n"
  478. "MOV v18.16b, v16.16b \n"
  479. "MOV v12.16b, v8.16b \n"
  480. "SHA256SU1 v10.4s, v8.4s, v9.4s \n"
  481. "SHA256H q16, q17, v6.4s \n"
  482. "SHA256H2 q17, q18, v6.4s \n"
  483. "SHA256SU0 v11.4s, v8.4s \n"
  484. "ADD v7.4s, v7.4s, v25.4s \n"
  485. "MOV v18.16b, v16.16b \n"
  486. "MOV v13.16b, v9.16b \n"
  487. "SHA256SU1 v11.4s, v9.4s, v10.4s \n"
  488. "SHA256H q16, q17, v7.4s \n"
  489. "SHA256H2 q17, q18, v7.4s \n"
  490. "LD1 {v22.16b-v25.16b}, [%[k]], #64 \n"
  491. "SHA256SU0 v12.4s, v9.4s \n"
  492. "ADD v8.4s, v8.4s, v22.4s \n"
  493. "MOV v18.16b, v16.16b \n"
  494. "MOV v14.16b, v10.16b \n"
  495. "SHA256SU1 v12.4s, v10.4s, v11.4s \n"
  496. "SHA256H q16, q17, v8.4s \n"
  497. "SHA256H2 q17, q18, v8.4s \n"
  498. "SHA256SU0 v13.4s, v10.4s \n"
  499. "ADD v9.4s, v9.4s, v23.4s \n"
  500. "MOV v18.16b, v16.16b \n"
  501. "MOV v15.16b, v11.16b \n"
  502. "SHA256SU1 v13.4s, v11.4s, v12.4s \n"
  503. "SHA256H q16, q17, v9.4s \n"
  504. "SHA256H2 q17, q18, v9.4s \n"
  505. "SHA256SU0 v14.4s, v11.4s \n"
  506. "ADD v10.4s, v10.4s, v24.4s \n"
  507. "MOV v18.16b, v16.16b \n"
  508. "SHA256SU1 v14.4s, v12.4s, v13.4s \n"
  509. "SHA256H q16, q17, v10.4s \n"
  510. "SHA256H2 q17, q18, v10.4s \n"
  511. "SHA256SU0 v15.4s, v12.4s \n"
  512. "ADD v11.4s, v11.4s, v25.4s \n"
  513. "MOV v18.16b, v16.16b \n"
  514. "SHA256SU1 v15.4s, v13.4s, v14.4s \n"
  515. "SHA256H q16, q17, v11.4s \n"
  516. "SHA256H2 q17, q18, v11.4s \n"
  517. "LD1 {v22.16b-v25.16b}, [%[k]] \n"
  518. "ADD v12.4s, v12.4s, v22.4s \n"
  519. "MOV v18.16b, v16.16b \n"
  520. "SHA256H q16, q17, v12.4s \n"
  521. "SHA256H2 q17, q18, v12.4s \n"
  522. "ADD v13.4s, v13.4s, v23.4s \n"
  523. "MOV v18.16b, v16.16b \n"
  524. "SHA256H q16, q17, v13.4s \n"
  525. "SHA256H2 q17, q18, v13.4s \n"
  526. "ADD v14.4s, v14.4s, v24.4s \n"
  527. "MOV v18.16b, v16.16b \n"
  528. "SHA256H q16, q17, v14.4s \n"
  529. "SHA256H2 q17, q18, v14.4s \n"
  530. "ADD v15.4s, v15.4s, v25.4s \n"
  531. "MOV v18.16b, v16.16b \n"
  532. "SHA256H q16, q17, v15.4s \n"
  533. "SHA256H2 q17, q18, v15.4s \n"
  534. "#Add working vars back into digest state \n"
  535. "ADD v16.4s, v16.4s, v20.4s \n"
  536. "ADD v17.4s, v17.4s, v21.4s \n"
  537. "#Store value as hash output \n"
  538. #if defined(LITTLE_ENDIAN_ORDER)
  539. "REV32 v16.16b, v16.16b \n"
  540. #endif
  541. "ST1 {v16.16b}, [%[hashOut]], #16 \n"
  542. #if defined(LITTLE_ENDIAN_ORDER)
  543. "REV32 v17.16b, v17.16b \n"
  544. #endif
  545. "ST1 {v17.16b}, [%[hashOut]] \n"
  546. : [hashOut] "=r" (hash)
  547. : [k] "r" (K), [digest] "m" (sha256->digest),
  548. [buffer] "m" (sha256->buffer),
  549. "0" (hash)
  550. : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7",
  551. "v8", "v9", "v10", "v11", "v12", "v13", "v14",
  552. "v15", "v16", "v17", "v18", "v19", "v20", "v21",
  553. "v22", "v23", "v24", "v25"
  554. );
  555. return 0;
  556. }
  557. #else /* not using 64 bit */
  558. /* ARMv8 hardware acceleration Aarch32 */
  559. static WC_INLINE int Sha256Update(wc_Sha256* sha256, const byte* data, word32 len)
  560. {
  561. word32 add;
  562. word32 numBlocks;
  563. /* only perform actions if a buffer is passed in */
  564. if (len > 0) {
  565. /* fill leftover buffer with data */
  566. add = min(len, WC_SHA256_BLOCK_SIZE - sha256->buffLen);
  567. XMEMCPY((byte*)(sha256->buffer) + sha256->buffLen, data, add);
  568. sha256->buffLen += add;
  569. data += add;
  570. len -= add;
  571. /* number of blocks in a row to complete */
  572. numBlocks = (len + sha256->buffLen)/WC_SHA256_BLOCK_SIZE;
  573. if (numBlocks > 0) {
  574. word32* bufPt = sha256->buffer;
  575. word32* digPt = sha256->digest;
  576. /* get leftover amount after blocks */
  577. add = (len + sha256->buffLen) - numBlocks * WC_SHA256_BLOCK_SIZE;
  578. __asm__ volatile (
  579. "#load leftover data\n"
  580. "VLDM %[buffer]!, {q0-q3} \n"
  581. "#load current digest\n"
  582. "VLDM %[digest], {q12-q13} \n"
  583. "MOV r8, %[blocks] \n"
  584. "VREV32.8 q0, q0 \n"
  585. "VREV32.8 q1, q1 \n"
  586. "VREV32.8 q2, q2 \n"
  587. "VREV32.8 q3, q3 \n"
  588. "VLDM %[k]! ,{q5-q8} \n"
  589. "VLDM %[k]! ,{q9}\n"
  590. "VMOV.32 q14, q12 \n" /* store digest for add at the end */
  591. "VMOV.32 q15, q13 \n"
  592. /* beginning of SHA256 block operation */
  593. "1:\n"
  594. /* Round 1 */
  595. "VMOV.32 q4, q0 \n"
  596. "VADD.i32 q0, q0, q5 \n"
  597. "VMOV.32 q11, q12 \n"
  598. "SHA256H.32 q12, q13, q0 \n"
  599. "SHA256H2.32 q13, q11, q0 \n"
  600. /* Round 2 */
  601. "SHA256SU0.32 q4, q1 \n"
  602. "VADD.i32 q0, q1, q6 \n"
  603. "VMOV.32 q11, q12 \n"
  604. "SHA256SU1.32 q4, q2, q3 \n"
  605. "SHA256H.32 q12, q13, q0 \n"
  606. "SHA256H2.32 q13, q11, q0 \n"
  607. /* Round 3 */
  608. "SHA256SU0.32 q1, q2 \n"
  609. "VADD.i32 q0, q2, q7 \n"
  610. "VMOV.32 q11, q12 \n"
  611. "SHA256SU1.32 q1, q3, q4 \n"
  612. "SHA256H.32 q12, q13, q0 \n"
  613. "SHA256H2.32 q13, q11, q0 \n"
  614. /* Round 4 */
  615. "SHA256SU0.32 q2, q3 \n"
  616. "VADD.i32 q0, q3, q8 \n"
  617. "VMOV.32 q11, q12 \n"
  618. "SHA256SU1.32 q2, q4, q1 \n"
  619. "SHA256H.32 q12, q13, q0 \n"
  620. "SHA256H2.32 q13, q11, q0 \n"
  621. /* Round 5 */
  622. "SHA256SU0.32 q3, q4 \n"
  623. "VADD.i32 q0, q4, q9 \n"
  624. "VMOV.32 q11, q12 \n"
  625. "SHA256SU1.32 q3, q1, q2 \n"
  626. "SHA256H.32 q12, q13, q0 \n"
  627. "SHA256H2.32 q13, q11, q0 \n"
  628. /* Round 6 */
  629. "VLD1.32 {q10}, [%[k]]! \n"
  630. "SHA256SU0.32 q4, q1 \n"
  631. "VADD.i32 q0, q1, q10 \n"
  632. "VMOV.32 q11, q12 \n"
  633. "SHA256SU1.32 q4, q2, q3 \n"
  634. "SHA256H.32 q12, q13, q0 \n"
  635. "SHA256H2.32 q13, q11, q0 \n"
  636. /* Round 7 */
  637. "VLD1.32 {q10}, [%[k]]! \n"
  638. "SHA256SU0.32 q1, q2 \n"
  639. "VADD.i32 q0, q2, q10 \n"
  640. "VMOV.32 q11, q12 \n"
  641. "SHA256SU1.32 q1, q3, q4 \n"
  642. "SHA256H.32 q12, q13, q0 \n"
  643. "SHA256H2.32 q13, q11, q0 \n"
  644. /* Round 8 */
  645. "VLD1.32 {q10}, [%[k]]! \n"
  646. "SHA256SU0.32 q2, q3 \n"
  647. "VADD.i32 q0, q3, q10 \n"
  648. "VMOV.32 q11, q12 \n"
  649. "SHA256SU1.32 q2, q4, q1 \n"
  650. "SHA256H.32 q12, q13, q0 \n"
  651. "SHA256H2.32 q13, q11, q0 \n"
  652. /* Round 9 */
  653. "VLD1.32 {q10}, [%[k]]! \n"
  654. "SHA256SU0.32 q3, q4 \n"
  655. "VADD.i32 q0, q4, q10 \n"
  656. "VMOV.32 q11, q12 \n"
  657. "SHA256SU1.32 q3, q1, q2 \n"
  658. "SHA256H.32 q12, q13, q0 \n"
  659. "SHA256H2.32 q13, q11, q0 \n"
  660. /* Round 10 */
  661. "VLD1.32 {q10}, [%[k]]! \n"
  662. "SHA256SU0.32 q4, q1 \n"
  663. "VADD.i32 q0, q1, q10 \n"
  664. "VMOV.32 q11, q12 \n"
  665. "SHA256SU1.32 q4, q2, q3 \n"
  666. "SHA256H.32 q12, q13, q0 \n"
  667. "SHA256H2.32 q13, q11, q0 \n"
  668. /* Round 11 */
  669. "VLD1.32 {q10}, [%[k]]! \n"
  670. "SHA256SU0.32 q1, q2 \n"
  671. "VADD.i32 q0, q2, q10 \n"
  672. "VMOV.32 q11, q12 \n"
  673. "SHA256SU1.32 q1, q3, q4 \n"
  674. "SHA256H.32 q12, q13, q0 \n"
  675. "SHA256H2.32 q13, q11, q0 \n"
  676. /* Round 12 */
  677. "VLD1.32 {q10}, [%[k]]! \n"
  678. "SHA256SU0.32 q2, q3 \n"
  679. "VADD.i32 q0, q3, q10 \n"
  680. "VMOV.32 q11, q12 \n"
  681. "SHA256SU1.32 q2, q4, q1 \n"
  682. "SHA256H.32 q12, q13, q0 \n"
  683. "SHA256H2.32 q13, q11, q0 \n"
  684. /* Round 13 */
  685. "VLD1.32 {q10}, [%[k]]! \n"
  686. "SHA256SU0.32 q3, q4 \n"
  687. "VADD.i32 q0, q4, q10 \n"
  688. "VMOV.32 q11, q12 \n"
  689. "SHA256SU1.32 q3, q1, q2 \n"
  690. "SHA256H.32 q12, q13, q0 \n"
  691. "SHA256H2.32 q13, q11, q0 \n"
  692. /* Round 14 */
  693. "VLD1.32 {q10}, [%[k]]! \n"
  694. "VADD.i32 q0, q1, q10 \n"
  695. "VMOV.32 q11, q12 \n"
  696. "SHA256H.32 q12, q13, q0 \n"
  697. "SHA256H2.32 q13, q11, q0 \n"
  698. /* Round 15 */
  699. "VLD1.32 {q10}, [%[k]]! \n"
  700. "VADD.i32 q0, q2, q10 \n"
  701. "VMOV.32 q11, q12 \n"
  702. "SHA256H.32 q12, q13, q0 \n"
  703. "SHA256H2.32 q13, q11, q0 \n"
  704. /* Round 16 */
  705. "VLD1.32 {q10}, [%[k]] \n"
  706. "SUB r8, r8, #1 \n"
  707. "VADD.i32 q0, q3, q10 \n"
  708. "VMOV.32 q11, q12 \n"
  709. "SHA256H.32 q12, q13, q0 \n"
  710. "SHA256H2.32 q13, q11, q0 \n"
  711. "#Add working vars back into digest state \n"
  712. "VADD.i32 q12, q12, q14 \n"
  713. "VADD.i32 q13, q13, q15 \n"
  714. "#check if more blocks should be done\n"
  715. "CMP r8, #0 \n"
  716. "BEQ 2f \n"
  717. "#load in message and schedule updates \n"
  718. "VLD1.32 {q0}, [%[dataIn]]! \n"
  719. "VLD1.32 {q1}, [%[dataIn]]! \n"
  720. "VLD1.32 {q2}, [%[dataIn]]! \n"
  721. "VLD1.32 {q3}, [%[dataIn]]! \n"
  722. /* reset K pointer */
  723. "SUB %[k], %[k], #160 \n"
  724. "VREV32.8 q0, q0 \n"
  725. "VREV32.8 q1, q1 \n"
  726. "VREV32.8 q2, q2 \n"
  727. "VREV32.8 q3, q3 \n"
  728. "VMOV.32 q14, q12 \n"
  729. "VMOV.32 q15, q13 \n"
  730. "B 1b \n" /* do another block */
  731. "2:\n"
  732. "VST1.32 {q12, q13}, [%[out]] \n"
  733. : [out] "=r" (digPt), "=r" (bufPt), "=r" (numBlocks),
  734. "=r" (data)
  735. : [k] "r" (K), [digest] "0" (digPt), [buffer] "1" (bufPt),
  736. [blocks] "2" (numBlocks), [dataIn] "3" (data)
  737. : "cc", "memory", "q0", "q1", "q2", "q3", "q4", "q5", "q6", "q7",
  738. "q8", "q9", "q10", "q11", "q12", "q13", "q14",
  739. "q15", "r8"
  740. );
  741. AddLength(sha256, WC_SHA256_BLOCK_SIZE * numBlocks);
  742. /* copy over any remaining data leftover */
  743. XMEMCPY(sha256->buffer, data, add);
  744. sha256->buffLen = add;
  745. }
  746. }
  747. /* account for possibility of not used if len = 0 */
  748. (void)add;
  749. (void)numBlocks;
  750. return 0;
  751. }
  752. static WC_INLINE int Sha256Final(wc_Sha256* sha256, byte* hash)
  753. {
  754. byte* local;
  755. if (sha256 == NULL || hash == NULL) {
  756. return BAD_FUNC_ARG;
  757. }
  758. local = (byte*)sha256->buffer;
  759. AddLength(sha256, sha256->buffLen); /* before adding pads */
  760. local[sha256->buffLen++] = 0x80; /* add 1 */
  761. /* pad with zeros */
  762. if (sha256->buffLen > WC_SHA256_PAD_SIZE) {
  763. word32* bufPt = sha256->buffer;
  764. word32* digPt = sha256->digest;
  765. XMEMSET(&local[sha256->buffLen], 0, WC_SHA256_BLOCK_SIZE - sha256->buffLen);
  766. sha256->buffLen += WC_SHA256_BLOCK_SIZE - sha256->buffLen;
  767. __asm__ volatile (
  768. "#load leftover data\n"
  769. "VLDM %[buffer]!, {q0-q3} \n"
  770. "#load current digest\n"
  771. "VLDM %[digest], {q12-q13} \n"
  772. "VREV32.8 q0, q0 \n"
  773. "VREV32.8 q1, q1 \n"
  774. "VREV32.8 q2, q2 \n"
  775. "VREV32.8 q3, q3 \n"
  776. "#load K values in \n"
  777. "VMOV.32 q14, q12 \n" /* store digest for add at the end */
  778. "VMOV.32 q15, q13 \n"
  779. /* beginning of SHA256 block operation */
  780. /* Round 1 */
  781. "VLD1.32 {q5}, [%[k]]! \n"
  782. "VMOV.32 q4, q0 \n"
  783. "VADD.i32 q0, q0, q5 \n"
  784. "VMOV.32 q11, q12 \n"
  785. "SHA256H.32 q12, q13, q0 \n"
  786. "SHA256H2.32 q13, q11, q0 \n"
  787. /* Round 2 */
  788. "VLD1.32 {q5}, [%[k]]! \n"
  789. "SHA256SU0.32 q4, q1 \n"
  790. "VADD.i32 q0, q1, q5 \n"
  791. "VMOV.32 q11, q12 \n"
  792. "SHA256SU1.32 q4, q2, q3 \n"
  793. "SHA256H.32 q12, q13, q0 \n"
  794. "SHA256H2.32 q13, q11, q0 \n"
  795. /* Round 3 */
  796. "VLD1.32 {q5}, [%[k]]! \n"
  797. "SHA256SU0.32 q1, q2 \n"
  798. "VADD.i32 q0, q2, q5 \n"
  799. "VMOV.32 q11, q12 \n"
  800. "SHA256SU1.32 q1, q3, q4 \n"
  801. "SHA256H.32 q12, q13, q0 \n"
  802. "SHA256H2.32 q13, q11, q0 \n"
  803. /* Round 4 */
  804. "VLD1.32 {q5}, [%[k]]! \n"
  805. "SHA256SU0.32 q2, q3 \n"
  806. "VADD.i32 q0, q3, q5 \n"
  807. "VMOV.32 q11, q12 \n"
  808. "SHA256SU1.32 q2, q4, q1 \n"
  809. "SHA256H.32 q12, q13, q0 \n"
  810. "SHA256H2.32 q13, q11, q0 \n"
  811. /* Round 5 */
  812. "VLD1.32 {q5}, [%[k]]! \n"
  813. "SHA256SU0.32 q3, q4 \n"
  814. "VADD.i32 q0, q4, q5 \n"
  815. "VMOV.32 q11, q12 \n"
  816. "SHA256SU1.32 q3, q1, q2 \n"
  817. "SHA256H.32 q12, q13, q0 \n"
  818. "SHA256H2.32 q13, q11, q0 \n"
  819. /* Round 6 */
  820. "VLD1.32 {q5}, [%[k]]! \n"
  821. "SHA256SU0.32 q4, q1 \n"
  822. "VADD.i32 q0, q1, q5 \n"
  823. "VMOV.32 q11, q12 \n"
  824. "SHA256SU1.32 q4, q2, q3 \n"
  825. "SHA256H.32 q12, q13, q0 \n"
  826. "SHA256H2.32 q13, q11, q0 \n"
  827. /* Round 7 */
  828. "VLD1.32 {q5}, [%[k]]! \n"
  829. "SHA256SU0.32 q1, q2 \n"
  830. "VADD.i32 q0, q2, q5 \n"
  831. "VMOV.32 q11, q12 \n"
  832. "SHA256SU1.32 q1, q3, q4 \n"
  833. "SHA256H.32 q12, q13, q0 \n"
  834. "SHA256H2.32 q13, q11, q0 \n"
  835. /* Round 8 */
  836. "VLD1.32 {q5}, [%[k]]! \n"
  837. "SHA256SU0.32 q2, q3 \n"
  838. "VADD.i32 q0, q3, q5 \n"
  839. "VMOV.32 q11, q12 \n"
  840. "SHA256SU1.32 q2, q4, q1 \n"
  841. "SHA256H.32 q12, q13, q0 \n"
  842. "SHA256H2.32 q13, q11, q0 \n"
  843. /* Round 9 */
  844. "VLD1.32 {q5}, [%[k]]! \n"
  845. "SHA256SU0.32 q3, q4 \n"
  846. "VADD.i32 q0, q4, q5 \n"
  847. "VMOV.32 q11, q12 \n"
  848. "SHA256SU1.32 q3, q1, q2 \n"
  849. "SHA256H.32 q12, q13, q0 \n"
  850. "SHA256H2.32 q13, q11, q0 \n"
  851. /* Round 10 */
  852. "VLD1.32 {q5}, [%[k]]! \n"
  853. "SHA256SU0.32 q4, q1 \n"
  854. "VADD.i32 q0, q1, q5 \n"
  855. "VMOV.32 q11, q12 \n"
  856. "SHA256SU1.32 q4, q2, q3 \n"
  857. "SHA256H.32 q12, q13, q0 \n"
  858. "SHA256H2.32 q13, q11, q0 \n"
  859. /* Round 11 */
  860. "VLD1.32 {q5}, [%[k]]! \n"
  861. "SHA256SU0.32 q1, q2 \n"
  862. "VADD.i32 q0, q2, q5 \n"
  863. "VMOV.32 q11, q12 \n"
  864. "SHA256SU1.32 q1, q3, q4 \n"
  865. "SHA256H.32 q12, q13, q0 \n"
  866. "SHA256H2.32 q13, q11, q0 \n"
  867. /* Round 12 */
  868. "VLD1.32 {q5}, [%[k]]! \n"
  869. "SHA256SU0.32 q2, q3 \n"
  870. "VADD.i32 q0, q3, q5 \n"
  871. "VMOV.32 q11, q12 \n"
  872. "SHA256SU1.32 q2, q4, q1 \n"
  873. "SHA256H.32 q12, q13, q0 \n"
  874. "SHA256H2.32 q13, q11, q0 \n"
  875. /* Round 13 */
  876. "VLD1.32 {q5}, [%[k]]! \n"
  877. "SHA256SU0.32 q3, q4 \n"
  878. "VADD.i32 q0, q4, q5 \n"
  879. "VMOV.32 q11, q12 \n"
  880. "SHA256SU1.32 q3, q1, q2 \n"
  881. "SHA256H.32 q12, q13, q0 \n"
  882. "SHA256H2.32 q13, q11, q0 \n"
  883. /* Round 14 */
  884. "VLD1.32 {q5}, [%[k]]! \n"
  885. "VADD.i32 q0, q1, q5 \n"
  886. "VMOV.32 q11, q12 \n"
  887. "SHA256H.32 q12, q13, q0 \n"
  888. "SHA256H2.32 q13, q11, q0 \n"
  889. /* Round 15 */
  890. "VLD1.32 {q5}, [%[k]]! \n"
  891. "VADD.i32 q0, q2, q5 \n"
  892. "VMOV.32 q11, q12 \n"
  893. "SHA256H.32 q12, q13, q0 \n"
  894. "SHA256H2.32 q13, q11, q0 \n"
  895. /* Round 16 */
  896. "VLD1.32 {q5}, [%[k]]! \n"
  897. "VADD.i32 q0, q3, q5 \n"
  898. "VMOV.32 q11, q12 \n"
  899. "SHA256H.32 q12, q13, q0 \n"
  900. "SHA256H2.32 q13, q11, q0 \n"
  901. "#Add working vars back into digest state \n"
  902. "VADD.i32 q12, q12, q14 \n"
  903. "VADD.i32 q13, q13, q15 \n"
  904. /* reset K pointer */
  905. "SUB %[k], %[k], #256 \n"
  906. "VST1.32 {q12, q13}, [%[out]] \n"
  907. : [out] "=r" (digPt), "=r" (bufPt)
  908. : [k] "r" (K), [digest] "0" (digPt), [buffer] "1" (bufPt)
  909. : "cc", "memory", "q0", "q1", "q2", "q3", "q4", "q5", "q6", "q7",
  910. "q8", "q9", "q10", "q11", "q12", "q13", "q14",
  911. "q15"
  912. );
  913. sha256->buffLen = 0;
  914. }
  915. XMEMSET(&local[sha256->buffLen], 0, WC_SHA256_PAD_SIZE - sha256->buffLen);
  916. /* put lengths in bits */
  917. sha256->hiLen = (sha256->loLen >> (8*sizeof(sha256->loLen) - 3)) +
  918. (sha256->hiLen << 3);
  919. sha256->loLen = sha256->loLen << 3;
  920. /* store lengths */
  921. #if defined(LITTLE_ENDIAN_ORDER)
  922. {
  923. word32* bufPt = sha256->buffer;
  924. __asm__ volatile (
  925. "VLD1.32 {q0}, [%[in]] \n"
  926. "VREV32.8 q0, q0 \n"
  927. "VST1.32 {q0}, [%[out]]!\n"
  928. "VLD1.32 {q1}, [%[in]] \n"
  929. "VREV32.8 q1, q1 \n"
  930. "VST1.32 {q1}, [%[out]]!\n"
  931. "VLD1.32 {q2}, [%[in]] \n"
  932. "VREV32.8 q2, q2 \n"
  933. "VST1.32 {q2}, [%[out]]!\n"
  934. "VLD1.32 {q3}, [%[in]] \n"
  935. "VREV32.8 q3, q3 \n"
  936. "VST1.32 {q3}, [%[out]] \n"
  937. : [out] "=r" (bufPt)
  938. : [in] "0" (bufPt)
  939. : "cc", "memory", "q0", "q1", "q2", "q3"
  940. );
  941. }
  942. #endif
  943. /* ! length ordering dependent on digest endian type ! */
  944. XMEMCPY(&local[WC_SHA256_PAD_SIZE], &sha256->hiLen, sizeof(word32));
  945. XMEMCPY(&local[WC_SHA256_PAD_SIZE + sizeof(word32)], &sha256->loLen,
  946. sizeof(word32));
  947. bufPt = sha256->buffer;
  948. word32* digPt = sha256->digest;
  949. __asm__ volatile (
  950. "#load leftover data\n"
  951. "VLDM %[buffer]!, {q0-q3} \n"
  952. "#load current digest\n"
  953. "VLDM %[digest], {q12-q13} \n"
  954. "VMOV.32 q14, q12 \n" /* store digest for add at the end */
  955. "VMOV.32 q15, q13 \n"
  956. /* beginning of SHA256 block operation */
  957. /* Round 1 */
  958. "VLD1.32 {q5}, [%[k]]! \n"
  959. "VMOV.32 q4, q0 \n"
  960. "VADD.i32 q0, q0, q5 \n"
  961. "VMOV.32 q11, q12 \n"
  962. "SHA256H.32 q12, q13, q0 \n"
  963. "SHA256H2.32 q13, q11, q0 \n"
  964. /* Round 2 */
  965. "VLD1.32 {q5}, [%[k]]! \n"
  966. "SHA256SU0.32 q4, q1 \n"
  967. "VADD.i32 q0, q1, q5 \n"
  968. "VMOV.32 q11, q12 \n"
  969. "SHA256SU1.32 q4, q2, q3 \n"
  970. "SHA256H.32 q12, q13, q0 \n"
  971. "SHA256H2.32 q13, q11, q0 \n"
  972. /* Round 3 */
  973. "VLD1.32 {q5}, [%[k]]! \n"
  974. "SHA256SU0.32 q1, q2 \n"
  975. "VADD.i32 q0, q2, q5 \n"
  976. "VMOV.32 q11, q12 \n"
  977. "SHA256SU1.32 q1, q3, q4 \n"
  978. "SHA256H.32 q12, q13, q0 \n"
  979. "SHA256H2.32 q13, q11, q0 \n"
  980. /* Round 4 */
  981. "VLD1.32 {q5}, [%[k]]! \n"
  982. "SHA256SU0.32 q2, q3 \n"
  983. "VADD.i32 q0, q3, q5 \n"
  984. "VMOV.32 q11, q12 \n"
  985. "SHA256SU1.32 q2, q4, q1 \n"
  986. "SHA256H.32 q12, q13, q0 \n"
  987. "SHA256H2.32 q13, q11, q0 \n"
  988. /* Round 5 */
  989. "VLD1.32 {q5}, [%[k]]! \n"
  990. "SHA256SU0.32 q3, q4 \n"
  991. "VADD.i32 q0, q4, q5 \n"
  992. "VMOV.32 q11, q12 \n"
  993. "SHA256SU1.32 q3, q1, q2 \n"
  994. "SHA256H.32 q12, q13, q0 \n"
  995. "SHA256H2.32 q13, q11, q0 \n"
  996. /* Round 6 */
  997. "VLD1.32 {q5}, [%[k]]! \n"
  998. "SHA256SU0.32 q4, q1 \n"
  999. "VADD.i32 q0, q1, q5 \n"
  1000. "VMOV.32 q11, q12 \n"
  1001. "SHA256SU1.32 q4, q2, q3 \n"
  1002. "SHA256H.32 q12, q13, q0 \n"
  1003. "SHA256H2.32 q13, q11, q0 \n"
  1004. /* Round 7 */
  1005. "VLD1.32 {q5}, [%[k]]! \n"
  1006. "SHA256SU0.32 q1, q2 \n"
  1007. "VADD.i32 q0, q2, q5 \n"
  1008. "VMOV.32 q11, q12 \n"
  1009. "SHA256SU1.32 q1, q3, q4 \n"
  1010. "SHA256H.32 q12, q13, q0 \n"
  1011. "SHA256H2.32 q13, q11, q0 \n"
  1012. /* Round 8 */
  1013. "VLD1.32 {q5}, [%[k]]! \n"
  1014. "SHA256SU0.32 q2, q3 \n"
  1015. "VADD.i32 q0, q3, q5 \n"
  1016. "VMOV.32 q11, q12 \n"
  1017. "SHA256SU1.32 q2, q4, q1 \n"
  1018. "SHA256H.32 q12, q13, q0 \n"
  1019. "SHA256H2.32 q13, q11, q0 \n"
  1020. /* Round 9 */
  1021. "VLD1.32 {q5}, [%[k]]! \n"
  1022. "SHA256SU0.32 q3, q4 \n"
  1023. "VADD.i32 q0, q4, q5 \n"
  1024. "VMOV.32 q11, q12 \n"
  1025. "SHA256SU1.32 q3, q1, q2 \n"
  1026. "SHA256H.32 q12, q13, q0 \n"
  1027. "SHA256H2.32 q13, q11, q0 \n"
  1028. /* Round 10 */
  1029. "VLD1.32 {q5}, [%[k]]! \n"
  1030. "SHA256SU0.32 q4, q1 \n"
  1031. "VADD.i32 q0, q1, q5 \n"
  1032. "VMOV.32 q11, q12 \n"
  1033. "SHA256SU1.32 q4, q2, q3 \n"
  1034. "SHA256H.32 q12, q13, q0 \n"
  1035. "SHA256H2.32 q13, q11, q0 \n"
  1036. /* Round 11 */
  1037. "VLD1.32 {q5}, [%[k]]! \n"
  1038. "SHA256SU0.32 q1, q2 \n"
  1039. "VADD.i32 q0, q2, q5 \n"
  1040. "VMOV.32 q11, q12 \n"
  1041. "SHA256SU1.32 q1, q3, q4 \n"
  1042. "SHA256H.32 q12, q13, q0 \n"
  1043. "SHA256H2.32 q13, q11, q0 \n"
  1044. /* Round 12 */
  1045. "VLD1.32 {q5}, [%[k]]! \n"
  1046. "SHA256SU0.32 q2, q3 \n"
  1047. "VADD.i32 q0, q3, q5 \n"
  1048. "VMOV.32 q11, q12 \n"
  1049. "SHA256SU1.32 q2, q4, q1 \n"
  1050. "SHA256H.32 q12, q13, q0 \n"
  1051. "SHA256H2.32 q13, q11, q0 \n"
  1052. /* Round 13 */
  1053. "VLD1.32 {q5}, [%[k]]! \n"
  1054. "SHA256SU0.32 q3, q4 \n"
  1055. "VADD.i32 q0, q4, q5 \n"
  1056. "VMOV.32 q11, q12 \n"
  1057. "SHA256SU1.32 q3, q1, q2 \n"
  1058. "SHA256H.32 q12, q13, q0 \n"
  1059. "SHA256H2.32 q13, q11, q0 \n"
  1060. /* Round 14 */
  1061. "VLD1.32 {q5}, [%[k]]! \n"
  1062. "VADD.i32 q0, q1, q5 \n"
  1063. "VMOV.32 q11, q12 \n"
  1064. "SHA256H.32 q12, q13, q0 \n"
  1065. "SHA256H2.32 q13, q11, q0 \n"
  1066. /* Round 15 */
  1067. "VLD1.32 {q5}, [%[k]]! \n"
  1068. "VADD.i32 q0, q2, q5 \n"
  1069. "VMOV.32 q11, q12 \n"
  1070. "SHA256H.32 q12, q13, q0 \n"
  1071. "SHA256H2.32 q13, q11, q0 \n"
  1072. /* Round 16 */
  1073. "VLD1.32 {q5}, [%[k]]! \n"
  1074. "VADD.i32 q0, q3, q5 \n"
  1075. "VMOV.32 q11, q12 \n"
  1076. "SHA256H.32 q12, q13, q0 \n"
  1077. "SHA256H2.32 q13, q11, q0 \n"
  1078. "#Add working vars back into digest state \n"
  1079. "VADD.i32 q12, q12, q14 \n"
  1080. "VADD.i32 q13, q13, q15 \n"
  1081. "#Store value as hash output \n"
  1082. #if defined(LITTLE_ENDIAN_ORDER)
  1083. "VREV32.8 q12, q12 \n"
  1084. #endif
  1085. "VST1.32 {q12}, [%[hashOut]]! \n"
  1086. #if defined(LITTLE_ENDIAN_ORDER)
  1087. "VREV32.8 q13, q13 \n"
  1088. #endif
  1089. "VST1.32 {q13}, [%[hashOut]] \n"
  1090. : [out] "=r" (digPt), "=r" (bufPt),
  1091. [hashOut] "=r" (hash)
  1092. : [k] "r" (K), [digest] "0" (digPt), [buffer] "1" (bufPt),
  1093. "2" (hash)
  1094. : "cc", "memory", "q0", "q1", "q2", "q3", "q4", "q5", "q6", "q7",
  1095. "q8", "q9", "q10", "q11", "q12", "q13", "q14",
  1096. "q15"
  1097. );
  1098. return 0;
  1099. }
  1100. #endif /* __aarch64__ */
  1101. #ifndef NO_SHA256
  1102. int wc_InitSha256_ex(wc_Sha256* sha256, void* heap, int devId)
  1103. {
  1104. if (sha256 == NULL)
  1105. return BAD_FUNC_ARG;
  1106. sha256->heap = heap;
  1107. (void)devId;
  1108. return InitSha256(sha256);
  1109. }
  1110. int wc_InitSha256(wc_Sha256* sha256)
  1111. {
  1112. return wc_InitSha256_ex(sha256, NULL, INVALID_DEVID);
  1113. }
  1114. void wc_Sha256Free(wc_Sha256* sha256)
  1115. {
  1116. (void)sha256;
  1117. }
  1118. int wc_Sha256Update(wc_Sha256* sha256, const byte* data, word32 len)
  1119. {
  1120. if (sha256 == NULL || (data == NULL && len != 0)) {
  1121. return BAD_FUNC_ARG;
  1122. }
  1123. return Sha256Update(sha256, data, len);
  1124. }
  1125. int wc_Sha256Final(wc_Sha256* sha256, byte* hash)
  1126. {
  1127. int ret;
  1128. if (sha256 == NULL || hash == NULL) {
  1129. return BAD_FUNC_ARG;
  1130. }
  1131. ret = Sha256Final(sha256, hash);
  1132. if (ret != 0)
  1133. return ret;
  1134. return InitSha256(sha256); /* reset state */
  1135. }
  1136. int wc_Sha256GetHash(wc_Sha256* sha256, byte* hash)
  1137. {
  1138. int ret;
  1139. wc_Sha256 tmpSha256;
  1140. if (sha256 == NULL || hash == NULL)
  1141. return BAD_FUNC_ARG;
  1142. ret = wc_Sha256Copy(sha256, &tmpSha256);
  1143. if (ret == 0) {
  1144. ret = wc_Sha256Final(&tmpSha256, hash);
  1145. }
  1146. return ret;
  1147. }
  1148. #if defined(WOLFSSL_HASH_FLAGS) || defined(WOLF_CRYPTO_CB)
  1149. int wc_Sha256SetFlags(wc_Sha256* sha256, word32 flags)
  1150. {
  1151. if (sha256) {
  1152. sha256->flags = flags;
  1153. }
  1154. return 0;
  1155. }
  1156. int wc_Sha256GetFlags(wc_Sha256* sha256, word32* flags)
  1157. {
  1158. if (sha256 && flags) {
  1159. *flags = sha256->flags;
  1160. }
  1161. return 0;
  1162. }
  1163. #endif
  1164. int wc_Sha256Copy(wc_Sha256* src, wc_Sha256* dst)
  1165. {
  1166. int ret = 0;
  1167. if (src == NULL || dst == NULL)
  1168. return BAD_FUNC_ARG;
  1169. XMEMCPY(dst, src, sizeof(wc_Sha256));
  1170. return ret;
  1171. }
  1172. #endif /* !NO_SHA256 */
  1173. #ifdef WOLFSSL_SHA224
  1174. static int InitSha224(wc_Sha224* sha224)
  1175. {
  1176. int ret = 0;
  1177. if (sha224 == NULL) {
  1178. return BAD_FUNC_ARG;
  1179. }
  1180. sha224->digest[0] = 0xc1059ed8;
  1181. sha224->digest[1] = 0x367cd507;
  1182. sha224->digest[2] = 0x3070dd17;
  1183. sha224->digest[3] = 0xf70e5939;
  1184. sha224->digest[4] = 0xffc00b31;
  1185. sha224->digest[5] = 0x68581511;
  1186. sha224->digest[6] = 0x64f98fa7;
  1187. sha224->digest[7] = 0xbefa4fa4;
  1188. sha224->buffLen = 0;
  1189. sha224->loLen = 0;
  1190. sha224->hiLen = 0;
  1191. return ret;
  1192. }
  1193. int wc_InitSha224_ex(wc_Sha224* sha224, void* heap, int devId)
  1194. {
  1195. if (sha224 == NULL)
  1196. return BAD_FUNC_ARG;
  1197. sha224->heap = heap;
  1198. (void)devId;
  1199. return InitSha224(sha224);
  1200. }
  1201. int wc_InitSha224(wc_Sha224* sha224)
  1202. {
  1203. return wc_InitSha224_ex(sha224, NULL, INVALID_DEVID);
  1204. }
  1205. int wc_Sha224Update(wc_Sha224* sha224, const byte* data, word32 len)
  1206. {
  1207. int ret;
  1208. if (sha224 == NULL || (data == NULL && len > 0)) {
  1209. return BAD_FUNC_ARG;
  1210. }
  1211. ret = Sha256Update((wc_Sha256 *)sha224, data, len);
  1212. return ret;
  1213. }
  1214. int wc_Sha224Final(wc_Sha224* sha224, byte* hash)
  1215. {
  1216. int ret;
  1217. word32 hashTmp[WC_SHA256_DIGEST_SIZE/sizeof(word32)];
  1218. if (sha224 == NULL || hash == NULL) {
  1219. return BAD_FUNC_ARG;
  1220. }
  1221. ret = Sha256Final((wc_Sha256*)sha224, (byte*)hashTmp);
  1222. if (ret != 0)
  1223. return ret;
  1224. XMEMCPY(hash, hashTmp, WC_SHA224_DIGEST_SIZE);
  1225. return InitSha224(sha224); /* reset state */
  1226. }
  1227. void wc_Sha224Free(wc_Sha224* sha224)
  1228. {
  1229. if (sha224 == NULL)
  1230. return;
  1231. }
  1232. int wc_Sha224GetHash(wc_Sha224* sha224, byte* hash)
  1233. {
  1234. int ret;
  1235. wc_Sha224 tmpSha224;
  1236. if (sha224 == NULL || hash == NULL)
  1237. return BAD_FUNC_ARG;
  1238. ret = wc_Sha224Copy(sha224, &tmpSha224);
  1239. if (ret == 0) {
  1240. ret = wc_Sha224Final(&tmpSha224, hash);
  1241. }
  1242. return ret;
  1243. }
  1244. #if defined(WOLFSSL_HASH_FLAGS) || defined(WOLF_CRYPTO_CB)
  1245. int wc_Sha224SetFlags(wc_Sha224* sha224, word32 flags)
  1246. {
  1247. if (sha224) {
  1248. sha224->flags = flags;
  1249. }
  1250. return 0;
  1251. }
  1252. int wc_Sha224GetFlags(wc_Sha224* sha224, word32* flags)
  1253. {
  1254. if (sha224 && flags) {
  1255. *flags = sha224->flags;
  1256. }
  1257. return 0;
  1258. }
  1259. #endif
  1260. int wc_Sha224Copy(wc_Sha224* src, wc_Sha224* dst)
  1261. {
  1262. int ret = 0;
  1263. if (src == NULL || dst == NULL)
  1264. return BAD_FUNC_ARG;
  1265. XMEMCPY(dst, src, sizeof(wc_Sha224));
  1266. return ret;
  1267. }
  1268. #endif /* WOLFSSL_SHA224 */
  1269. #endif /* !NO_SHA256 || WOLFSSL_SHA224 */
  1270. #endif /* WOLFSSL_ARMASM */