scalarmult.pq 52 KB


  1. stack3072 playground1
  2. int32 playground1_ptr
  3. @define const121666 playground1_ptr + 0
  4. @define x1 playground1_ptr + 48
  5. @define x2 playground1_ptr + 96
  6. @define z2 playground1_ptr + 144
  7. @define x3 playground1_ptr + 192
  8. @define z3 playground1_ptr + 240
  9. @define tmp0 playground1_ptr + 288
  10. @define tmp1 playground1_ptr + 336
  11. @define z11_copy x1
  12. int32 i
  13. int32 j
  14. int32 ptr
  15. int32 swap
  16. int32 pos
  17. int32 bit
  18. int32 byte
  19. int32 word
  20. int32 pos8
  21. int32 pos7
  22. int32 mulsource
  23. int32 postcopy
  24. int32 q
  25. int32 p
  26. int32 n
  27. reg128 e0
  28. reg128 e4
  29. reg128 f0
  30. reg128 f4
  31. reg128 f8
  32. reg128 g0
  33. reg128 g4
  34. reg128 g8
  35. reg128 d0
  36. reg128 d4
  37. reg128 d8
  38. reg128 x0
  39. reg128 x4
  40. reg128 x8
  41. reg128 F0
  42. reg128 F4
  43. reg128 F8
  44. reg128 G0
  45. reg128 G4
  46. reg128 G8
  47. reg128 X0
  48. reg128 X4
  49. reg128 X8
  50. reg128 f0plusF0
  51. reg128 f0minusF0
  52. reg128 g0plusG0
  53. reg128 g0minusG0
  54. reg128 f4plusF4
  55. reg128 f4minusF4
  56. reg128 g4plusG4
  57. reg128 g4minusG4
  58. reg128 f8plusF8
  59. reg128 f8minusF8
  60. reg128 g8plusG8
  61. reg128 g8minusG8
  62. reg128 fg01
  63. reg128 fg23
  64. reg128 fg45
  65. reg128 fg67
  66. reg128 fg89
  67. reg128 fg01_2
  68. reg128 fg23_2
  69. reg128 fg45_2
  70. reg128 fg67_2
  71. reg128 fg45_19_38
  72. reg128 fg67_19_38
  73. reg128 fg89_19_38
  74. reg128 h0
  75. reg128 h1
  76. reg128 h2
  77. reg128 h3
  78. reg128 h4
  79. reg128 h5
  80. reg128 h6
  81. reg128 h7
  82. reg128 h8
  83. reg128 h9
  84. stack64 h0stack
  85. stack64 h1stack
  86. stack64 h2stack
  87. stack64 h3stack
  88. stack64 h4stack
  89. stack64 h5stack
  90. stack64 h6stack
  91. stack64 h7stack
  92. stack64 h8stack
  93. stack64 h9stack
  94. reg128 t0
  95. reg128 t1
  96. reg128 t2
  97. reg128 t3
  98. reg128 t4
  99. reg128 t5
  100. reg128 t6
  101. reg128 t7
  102. reg128 t8
  103. reg128 t9
  104. reg128 c0
  105. reg128 c1
  106. reg128 c2
  107. reg128 c3
  108. reg128 c4
  109. reg128 c5
  110. reg128 c6
  111. reg128 c7
  112. reg128 c8
  113. reg128 c9
  114. reg128 f02
  115. reg128 f13
  116. reg128 f46
  117. reg128 f57
  118. reg128 f89
  119. reg128 g02
  120. reg128 g13
  121. reg128 g46
  122. reg128 g57
  123. reg128 g89
  124. reg128 f13_2
  125. reg128 f57_2
  126. reg128 f89_2
  127. reg128 mix
  128. reg128 g13_19
  129. reg128 g46_19
  130. reg128 g57_19
  131. reg128 g89_19
  132. stack128 f13_2_stack
  133. stack128 f57_2_stack
  134. stack128 mix_stack
  135. stack128 g13_19_stack
  136. stack128 g46_19_stack
  137. stack128 g57_19_stack
  138. stack128 g89_19_stack
  139. stack128 h9_stack
  140. stack128 h7_stack
  141. stack128 h5_stack
  142. reg128 t
  143. reg128 s
  144. reg128 s2
  145. reg128 c
  146. reg128 mask26
  147. reg128 mask25
  148. reg128 _0x2000000
  149. reg128 _0x1000000
  150. reg128 _19_19_38_38
  151. stack128 _0x2000000_stack
  152. stack128 _0x1000000_stack
  153. stack128 _19_19_38_38_stack
  154. reg128 h02
  155. reg128 h24
  156. reg128 h46
  157. reg128 h68
  158. reg128 h80
  159. reg128 h31
  160. reg128 h53
  161. reg128 h75
  162. reg128 h97
  163. reg128 h19
  164. reg128 h04
  165. reg128 h15
  166. reg128 h26
  167. reg128 h37
  168. reg128 h48
  169. reg128 h59
  170. reg128 f0_f1_f2_f3
  171. reg128 f4_f5_f6_f7
  172. reg128 f8_f9_g8_g9
  173. reg128 19f8_19f9_19g8_19g9
  174. reg128 f8_2f9_g8_g9
  175. # required for even (and partially also odd)
  176. reg128 g0_g1_g2_g3
  177. reg128 g4_g5_g6_g7
  178. reg128 f0_2f1_f2_2f3
  179. reg128 f4_2f5_f6_2f7
  180. reg128 f8_2f9_f9_f6
  181. reg128 g0_19g1_g2_19g3
  182. reg128 19g0_19g1_19g2_19g3 # This one is going to be freed very early
  183. reg128 19g4_19g5_19g6_19g7
  184. reg128 g4_19g5_g6_19g7
  185. reg128 g8_19g9_19g8_19g9
  186. #required only for odd
  187. reg128 f1_f8_f3_f0
  188. reg128 f5_f2_f7_f4
  189. reg128 19g8_g9_19g2_g3
  190. reg128 19g4_g5_19g6_g7
  191. reg128 _19_19_19_19
  192. reg128 _0_1_0_1
  193. reg128 _1_1_1_1
  194. stack512 playground2
  195. int32 playp
  196. int32 binput
  197. reg128 b
  198. int32 pos0
  199. int32 pos1
  200. int32 pos2
  201. int32 pos3
  202. int32 posh
  203. int32 posf
  204. int32 posg
  205. int32 posH
  206. int32 posF
  207. int32 posG
  208. int32 posx
  209. int32 posy
  210. int32 out0
  211. int32 out1
  212. int32 out2
  213. int32 out3
  214. int32 out4
  215. int32 out5
  216. int32 out6
  217. int32 out7
  218. int32 out8
  219. int32 out9
  220. int32 carry0
  221. int32 carry1
  222. int32 carry2
  223. int32 carry3
  224. int32 carry4
  225. int32 carry5
  226. int32 carry6
  227. int32 carry7
  228. int32 carry8
  229. int32 carry9
  230. int32 carry
  231. reg128 zero
  232. reg128 one
  233. @define fe_0(x) ;\
  234. posx = x ;\
  235. 4x zero = 0 ;\
  236. mem128[posx] aligned= zero; posx += 16 ;\
  237. mem128[posx] aligned= zero; posx += 16 ;\
  238. mem64[posx] aligned= zero[0] ;\
  239. @define fe_1(x) ;\
  240. posx = x ;\
  241. 4x zero = 0 ;\
  242. new one ;\
  243. one = 0xff,one[1] ;\
  244. one = one[0],0 ;\
  245. 4x one unsigned>>= 7 ;\
  246. mem128[posx] aligned= one; posx += 16 ;\
  247. mem128[posx] aligned= zero; posx += 16 ;\
  248. mem64[posx] aligned= zero[0] ;\
  249. @define fe_copy(x,y) ;\
  250. posy = y ;\
  251. posx = x ;\
  252. f0 aligned= mem128[posy]; posy += 16 ;\
  253. f4 aligned= mem128[posy]; posy += 16 ;\
  254. new f8 ;\
  255. f8 aligned= mem64[posy] f8[1] ;\
  256. mem128[posx] aligned= f0; posx += 16 ;\
  257. mem128[posx] aligned= f4; posx += 16 ;\
  258. mem64[posx] aligned= f8[0] ;\
  259. @define fe_add(sum,x,y) ;\
  260. pos1 = x ;\
  261. pos2 = y ;\
  262. f0 aligned= mem128[pos1]; pos1 += 16 ;\
  263. g0 aligned= mem128[pos2]; pos2 += 16 ;\
  264. 4x f0 += g0 ;\
  265. ;\
  266. f4 aligned= mem128[pos1]; pos1 += 16 ;\
  267. g4 aligned= mem128[pos2]; pos2 += 16 ;\
  268. 4x f4 += g4 ;\
  269. pos0 = sum ;\
  270. ;\
  271. new f8 ;\
  272. new g8 ;\
  273. f8 aligned= mem64[pos1] f8[1] ;\
  274. g8 aligned= mem64[pos2] g8[1] ;\
  275. 4x f8 += g8 ;\
  276. ;\
  277. mem128[pos0] aligned= f0; pos0 += 16 ;\
  278. mem128[pos0] aligned= f4; pos0 += 16 ;\
  279. mem64[pos0] aligned= f8[0] ;\
  280. @define fe_sub(diff,x,y) ;\
  281. pos1 = x ;\
  282. pos2 = y ;\
  283. f0 aligned= mem128[pos1]; pos1 += 16 ;\
  284. g0 aligned= mem128[pos2]; pos2 += 16 ;\
  285. 4x f0 -= g0 ;\
  286. ;\
  287. f4 aligned= mem128[pos1]; pos1 += 16 ;\
  288. g4 aligned= mem128[pos2]; pos2 += 16 ;\
  289. 4x f4 -= g4 ;\
  290. pos0 = diff ;\
  291. ;\
  292. new f8 ;\
  293. new g8 ;\
  294. f8 aligned= mem64[pos1] f8[1] ;\
  295. g8 aligned= mem64[pos2] g8[1] ;\
  296. 4x f8 -= g8 ;\
  297. ;\
  298. mem128[pos0] aligned= f0; pos0 += 16 ;\
  299. mem128[pos0] aligned= f4; pos0 += 16 ;\
  300. mem64[pos0] aligned= f8[0] ;\
  301. @define fe_addsub(sum,x,y,diff) ;\
  302. pos1 = x ;\
  303. pos2 = y ;\
  304. pos3 = diff ;\
  305. pos0 = sum ;\
  306. f0 aligned= mem128[pos1]; pos1 += 16 ;\
  307. ;\
  308. g0 aligned= mem128[pos2]; pos2 += 16 ;\
  309. 4x d0 = f0 - g0 ;\
  310. ;\
  311. 4x f0 += g0 ;\
  312. f4 aligned= mem128[pos1]; pos1 += 16 ;\
  313. ;\
  314. g4 aligned= mem128[pos2]; pos2 += 16 ;\
  315. 4x d4 = f4 - g4 ;\
  316. ;\
  317. 4x f4 += g4 ;\
  318. new f8 ;\
  319. f8 aligned= mem64[pos1] f8[1] ;\
  320. ;\
  321. new g8 ;\
  322. g8 aligned= mem64[pos2] g8[1] ;\
  323. 4x d8 = f8 - g8 ;\
  324. ;\
  325. 4x f8 += g8 ;\
  326. mem128[pos3] aligned= d0; pos3 += 16 ;\
  327. ;\
  328. mem128[pos0] aligned= f0; pos0 += 16 ;\
  329. ;\
  330. mem128[pos3] aligned= d4; pos3 += 16 ;\
  331. ;\
  332. mem128[pos0] aligned= f4; pos0 += 16 ;\
  333. ;\
  334. mem64[pos3] aligned= d8[0] ;\
  335. ;\
  336. mem64[pos0] aligned= f8[0] ;\
  337. @define fe_negcswap2addsub(x2,x3,z2,z3,swap) ;\
  338. new f8 ;\
  339. new g8 ;\
  340. new F8 ;\
  341. new G8 ;\
  342. pos0 = x2 ;\
  343. pos1 = x3 ;\
  344. pos2 = z2 ;\
  345. f0 aligned= mem128[pos0]; pos0 += 16 ;\
  346. pos3 = z3 ;\
  347. g0 aligned= mem128[pos1]; pos1 += 16 ;\
  348. x0 = f0 ^ g0 ;\
  349. F0 aligned= mem128[pos2]; pos2 += 16 ;\
  350. b = swap,swap,swap,swap ;\
  351. G0 aligned= mem128[pos3]; pos3 += 16 ;\
  352. X0 = F0 ^ G0 ;\
  353. f4 aligned= mem128[pos0]; pos0 += 16 ;\
  354. x0 &= b ;\
  355. g4 aligned= mem128[pos1]; pos1 += 16 ;\
  356. X0 &= b ;\
  357. F4 aligned= mem128[pos2]; pos2 += 16 ;\
  358. f0 ^= x0 ;\
  359. G4 aligned= mem128[pos3]; pos3 += 16 ;\
  360. g0 ^= x0 ;\
  361. f8 aligned= mem64[pos0] f8[1] ;\
  362. F0 ^= X0 ;\
  363. g8 aligned= mem64[pos1] g8[1] ;\
  364. G0 ^= X0 ;\
  365. F8 aligned= mem64[pos2] F8[1] ;\
  366. x4 = f4 ^ g4 ;\
  367. G8 aligned= mem64[pos3] G8[1] ;\
  368. x8 = f8 ^ g8 ;\
  369. pos0 -= 32 ;\
  370. x4 &= b ;\
  371. pos1 -= 32 ;\
  372. x8 &= b ;\
  373. pos2 -= 32 ;\
  374. f4 ^= x4 ;\
  375. pos3 -= 32 ;\
  376. f8 ^= x8 ;\
  377. g4 ^= x4 ;\
  378. g8 ^= x8 ;\
  379. X4 = F4 ^ G4 ;\
  380. X8 = F8 ^ G8 ;\
  381. X4 &= b ;\
  382. X8 &= b ;\
  383. F4 ^= X4 ;\
  384. F8 ^= X8 ;\
  385. G4 ^= X4 ;\
  386. G8 ^= X8 ;\
  387. 4x f0plusF0 = f0 + F0 ;\
  388. 4x f0minusF0 = f0 - F0 ;\
  389. mem128[pos0] aligned= f0plusF0; pos0 += 16 ;\
  390. 4x f4plusF4 = f4 + F4 ;\
  391. mem128[pos2] aligned= f0minusF0; pos2 += 16 ;\
  392. 4x f4minusF4 = f4 - F4 ;\
  393. mem128[pos0] aligned= f4plusF4; pos0 += 16 ;\
  394. 4x f8plusF8 = f8 + F8 ;\
  395. mem128[pos2] aligned= f4minusF4; pos2 += 16 ;\
  396. 4x f8minusF8 = f8 - F8 ;\
  397. mem64[pos0] aligned= f8plusF8[0] ;\
  398. 4x g0plusG0 = g0 + G0 ;\
  399. mem64[pos2] aligned= f8minusF8[0] ;\
  400. 4x g0minusG0 = g0 - G0 ;\
  401. mem128[pos1] aligned= g0plusG0; pos1 += 16 ;\
  402. 4x g4plusG4 = g4 + G4 ;\
  403. mem128[pos3] aligned= g0minusG0; pos3 += 16 ;\
  404. 4x g4minusG4 = g4 - G4 ;\
  405. mem128[pos1] aligned= g4plusG4; pos1 += 16 ;\
  406. 4x g8plusG8 = g8 + G8 ;\
  407. mem128[pos3] aligned= g4minusG4; pos3 += 16 ;\
  408. 4x g8minusG8 = g8 - G8 ;\
  409. mem64[pos1] aligned= g8plusG8[0] ;\
  410. mem64[pos3] aligned= g8minusG8[0] ;\
  411. @define fe_sqsq(h,f,H,F) ;\
  412. ptr = &_19_19_38_38_stack ;\
  413. posf = f ;\
  414. posF = F ;\
  415. _19_19_38_38 aligned= mem128[ptr] ;\
  416. ;\
  417. fg01 aligned= mem128[posf];posf+=16 ;\
  418. fg23 aligned= mem128[posF];posF+=16 ;\
  419. fg01[0,1,2,3] fg23[0,1,2,3] = fg01[0]fg23[0]fg01[1]fg23[1] fg01[2]fg23[2]fg01[3]fg23[3] ;\
  420. ;\
  421. fg45 aligned= mem128[posf];posf+=16 ;\
  422. fg67 aligned= mem128[posF];posF+=16 ;\
  423. ;\
  424. 4x fg01_2 = fg01 << 1 ;\
  425. fg45[0,1,2,3] fg67[0,1,2,3] = fg45[0]fg67[0]fg45[1]fg67[1] fg45[2]fg67[2]fg45[3]fg67[3] ;\
  426. 4x fg23_2 = fg23 << 1 ;\
  427. new fg89 ;\
  428. fg89 aligned= mem64[posf]fg89[1] ;\
  429. 4x fg45_2 = fg45 << 1 ;\
  430. fg89 aligned= fg89[0]mem64[posF] ;\
  431. 4x fg67_2 = fg67 << 1 ;\
  432. ;\
  433. fg45_19_38[0,1] = fg45_19_38[0,1];fg45_19_38[2] = fg45[2] * _19_19_38_38[2];fg45_19_38[3] = fg45[3] * _19_19_38_38[3] ;\
  434. fg89 = fg89[0,2,1,3] ;\
  435. 4x fg67_19_38 = fg67 * _19_19_38_38 ;\
  436. 4x fg89_19_38 = fg89 * _19_19_38_38 ;\
  437. ;\
  438. # f0f0 + f1f9_76 + f2f8_38 + f3f7_76 + f4f6_38 + f5f5_38; ;\
  439. h0[0,1] = fg01[0] signed* fg01[0]; h0[2,3] = fg01[1] signed* fg01[1] ;\
  440. h0[0,1] += fg01_2[2] signed* fg89_19_38[2]; h0[2,3] += fg01_2[3] signed* fg89_19_38[3] ;\
  441. h0[0,1] += fg23_2[0] signed* fg89_19_38[0]; h0[2,3] += fg23_2[1] signed* fg89_19_38[1] ;\
  442. h0[0,1] += fg23_2[2] signed* fg67_19_38[2]; h0[2,3] += fg23_2[3] signed* fg67_19_38[3] ;\
  443. h0[0,1] += fg45_2[0] signed* fg67_19_38[0]; h0[2,3] += fg45_2[1] signed* fg67_19_38[1] ;\
  444. h0[0,1] += fg45[2] signed* fg45_19_38[2]; h0[2,3] += fg45[3] signed* fg45_19_38[3] ;\
  445. ;\
  446. # f0f1_2 + f2f9_38 + f3f8_38 + f4f7_38 + f5f6_38; ;\
  447. h1[0,1] = fg01[0] signed* fg01_2[2]; h1[2,3] = fg01[1] signed* fg01_2[3] ;\
  448. h1[0,1] += fg23[0] signed* fg89_19_38[2]; h1[2,3] += fg23[1] signed* fg89_19_38[3] ;\
  449. h1[0,1] += fg23_2[2] signed* fg89_19_38[0]; h1[2,3] += fg23_2[3] signed* fg89_19_38[1] ;\
  450. h1[0,1] += fg45[0] signed* fg67_19_38[2]; h1[2,3] += fg45[1] signed* fg67_19_38[3] ;\
  451. h1[0,1] += fg45_2[2] signed* fg67_19_38[0]; h1[2,3] += fg45_2[3] signed* fg67_19_38[1] ;\
  452. ;\
  453. # f0f2_2 + f1f1_2 + f3f9_76 + f4f8_38 + f5f7_76 + f6f6_19 ;\
  454. h2[0,1] = fg01_2[0] signed* fg23[0]; h2[2,3] = fg01_2[1] signed* fg23[1] ;\
  455. h2[0,1] += fg01_2[2] signed* fg01[2]; h2[2,3] += fg01_2[3] signed* fg01[3] ;\
  456. h2[0,1] += fg23_2[2] signed* fg89_19_38[2]; h2[2,3] += fg23_2[3] signed* fg89_19_38[3] ;\
  457. h2[0,1] += fg45_2[0] signed* fg89_19_38[0]; h2[2,3] += fg45_2[1] signed* fg89_19_38[1] ;\
  458. h2[0,1] += fg45_2[2] signed* fg67_19_38[2]; h2[2,3] += fg45_2[3] signed* fg67_19_38[3] ;\
  459. h2[0,1] += fg67[0] signed* fg67_19_38[0]; h2[2,3] += fg67[1] signed* fg67_19_38[1] ;\
  460. ;\
  461. # f0f3_2+f1f2_2 +f4f9_38+f5f8_38+f6f7_38; ;\
  462. h3[0,1] = fg01_2[0] signed* fg23[2]; h3[2,3] = fg01_2[1] signed* fg23[3] ;\
  463. h3[0,1] += fg01_2[2] signed* fg23[0]; h3[2,3] += fg01_2[3] signed* fg23[1] ;\
  464. h3[0,1] += fg45[0] signed* fg89_19_38[2]; h3[2,3] += fg45[1] signed* fg89_19_38[3] ;\
  465. h3[0,1] += fg45_2[2] signed* fg89_19_38[0]; h3[2,3] += fg45_2[3] signed* fg89_19_38[1] ;\
  466. h3[0,1] += fg67[0] signed* fg67_19_38[2]; h3[2,3] += fg67[1] signed* fg67_19_38[3] ;\
  467. ;\
  468. # f0f4_2 + f1f3_4 + f2f2 + f5f9_76 + f6f8_38 + f7f7_38; ;\
  469. h4[0,1] = fg01_2[0] signed* fg45[0]; h4[2,3] = fg01_2[1] signed* fg45[1] ;\
  470. h4[0,1] += fg01_2[2] signed* fg23_2[2]; h4[2,3] += fg01_2[3] signed* fg23_2[3] ;\
  471. h4[0,1] += fg23[0] signed* fg23[0]; h4[2,3] += fg23[1] signed* fg23[1] ;\
  472. h4[0,1] += fg45_2[2] signed* fg89_19_38[2]; h4[2,3] += fg45_2[3] signed* fg89_19_38[3] ;\
  473. h4[0,1] += fg67_2[0] signed* fg89_19_38[0]; h4[2,3] += fg67_2[1] signed* fg89_19_38[1] ;\
  474. h4[0,1] += fg67[2] signed* fg67_19_38[2]; h4[2,3] += fg67[3] signed* fg67_19_38[3] ;\
  475. ;\
  476. # f0f5_2+f1f4_2 +f2f3_2 +f6f9_38+f7f8_38; ;\
  477. h5[0,1] = fg01_2[0] signed* fg45[2]; h5[2,3] = fg01_2[1] signed* fg45[3] ;\
  478. h5[0,1] += fg01_2[2] signed* fg45[0]; h5[2,3] += fg01_2[3] signed* fg45[1] ;\
  479. h5[0,1] += fg23_2[0] signed* fg23[2]; h5[2,3] += fg23_2[1] signed* fg23[3] ;\
  480. h5[0,1] += fg67[0] signed* fg89_19_38[2]; h5[2,3] += fg67[1] signed* fg89_19_38[3] ;\
  481. h5[0,1] += fg67_2[2] signed* fg89_19_38[0]; h5[2,3] += fg67_2[3] signed* fg89_19_38[1] ;\
  482. ;\
  483. # f0f6_2+f1f5_4 +f2f4_2 +f3f3_2 +f7f9_76+f8f8_19; ;\
  484. h6[0,1] = fg01_2[0] signed* fg67[0]; h6[2,3] = fg01_2[1] signed* fg67[1] ;\
  485. h6[0,1] += fg01_2[2] signed* fg45_2[2]; h6[2,3] += fg01_2[3] signed* fg45_2[3] ;\
  486. h6[0,1] += fg23_2[0] signed* fg45[0]; h6[2,3] += fg23_2[1] signed* fg45[1] ;\
  487. h6[0,1] += fg23_2[2] signed* fg23[2]; h6[2,3] += fg23_2[3] signed* fg23[3] ;\
  488. h6[0,1] += fg67_2[2] signed* fg89_19_38[2]; h6[2,3] += fg67_2[3] signed* fg89_19_38[3] ;\
  489. h6[0,1] += fg89[0] signed* fg89_19_38[0]; h6[2,3] += fg89[1] signed* fg89_19_38[1] ;\
  490. ;\
  491. # f0f7_2+f1f6_2 +f2f5_2 +f3f4_2 +f8f9_38; ;\
  492. h7[0,1] = fg01_2[0] signed* fg67[2]; h7[2,3] = fg01_2[1] signed* fg67[3] ;\
  493. h7[0,1] += fg01_2[2] signed* fg67[0]; h7[2,3] += fg01_2[3] signed* fg67[1] ;\
  494. h7[0,1] += fg23_2[0] signed* fg45[2]; h7[2,3] += fg23_2[1] signed* fg45[3] ;\
  495. h7[0,1] += fg23_2[2] signed* fg45[0]; h7[2,3] += fg23_2[3] signed* fg45[1] ;\
  496. h7[0,1] += fg89[0] signed* fg89_19_38[2]; h7[2,3] += fg89[1] signed* fg89_19_38[3] ;\
  497. ;\
  498. # f0f8_2+f1f7_4 +f2f6_2 +f3f5_4 +f4f4 +f9f9_38; ;\
  499. h8[0,1] = fg89[2] signed* fg89_19_38[2]; h8[2,3] = fg89[3] signed* fg89_19_38[3] ;\
  500. h8[0,1] += fg01_2[0] signed* fg89[0]; h8[2,3] += fg01_2[1] signed* fg89[1] ;\
  501. h8[0,1] += fg01_2[2] signed* fg67_2[2]; h8[2,3] += fg01_2[3] signed* fg67_2[3] ;\
  502. h8[0,1] += fg23_2[0] signed* fg67[0]; h8[2,3] += fg23_2[1] signed* fg67[1] ;\
  503. h8[0,1] += fg23_2[2] signed* fg45_2[2]; h8[2,3] += fg23_2[3] signed* fg45_2[3] ;\
  504. h8[0,1] += fg45[0] signed* fg45[0]; h8[2,3] += fg45[1] signed* fg45[1] ;\
  505. ;\
  506. ptr = &_0x2000000_stack ;\
  507. _0x2000000 aligned= mem128[ptr] ;\
  508. # f0f9_2+f1f8_2 +f2f7_2 +f3f6_2 +f4f5_2; ;\
  509. h9[0,1] = fg45_2[0] signed* fg45[2]; h9[2,3] = fg45_2[1] signed* fg45[3] ;\
  510. h9[0,1] += fg01_2[0] signed* fg89[2]; h9[2,3] += fg01_2[1] signed* fg89[3] ;\
  511. h9[0,1] += fg01_2[2] signed* fg89[0]; h9[2,3] += fg01_2[3] signed* fg89[1] ;\
  512. h9[0,1] += fg23_2[0] signed* fg67[2]; h9[2,3] += fg23_2[1] signed* fg67[3] ;\
  513. h9[0,1] += fg23_2[2] signed* fg67[0]; h9[2,3] += fg23_2[3] signed* fg67[1] ;\
  514. ;\
  515. ptr = &_0x1000000_stack ;\
  516. _0x1000000 aligned= mem128[ptr] ;\
  517. 2x t0 = h0 + _0x2000000 ;\
  518. 2x t6 = h6 + _0x2000000 ;\
  519. ;\
  520. 2x c0 = t0 signed>> 26 ;\
  521. 2x c6 = t6 signed>> 26 ;\
  522. 2x h1 += c0 ;\
  523. 2x t0 = c0 << 26 ;\
  524. 2x t1 = h1 + _0x1000000 ;\
  525. 2x h7 += c6 ;\
  526. 2x t6 = c6 << 26 ;\
  527. 2x t7 = h7 + _0x1000000 ;\
  528. 2x h0 -= t0 ;\
  529. 2x c1 = t1 signed>> 25 ;\
  530. 2x h6 -= t6 ;\
  531. 2x c7 = t7 signed>> 25 ;\
  532. 2x h2 += c1 ;\
  533. 2x t1 = c1 << 25 ;\
  534. 2x t2 = h2 + _0x2000000 ;\
  535. 2x h8 += c7 ;\
  536. 2x h1 -= t1 ;\
  537. 2x c2 = t2 signed>> 26 ;\
  538. 2x t7 = c7 << 25 ;\
  539. 2x t8 = h8 + _0x2000000 ;\
  540. 2x h3 += c2 ;\
  541. 2x t2 = c2 << 26 ;\
  542. 2x t3 = h3 + _0x1000000 ;\
  543. 2x h7 -= t7 ;\
  544. 2x c8 = t8 signed>> 26 ;\
  545. 2x h2 -= t2 ;\
  546. 2x c3 = t3 signed>> 25 ;\
  547. 2x h9 += c8 ;\
  548. 2x t8 = c8 << 26 ;\
  549. 2x t9 = h9 + _0x1000000 ;\
  550. 2x h4 += c3 ;\
  551. posh = h ;\
  552. 2x t3 = c3 << 25 ;\
  553. posH = H ;\
  554. 2x t4 = h4 + _0x2000000 ;\
  555. posh+=8;\
  556. 2x h8 -= t8 ;\
  557. posH+=8;\
  558. 2x c9 = t9 signed>> 25 ;\
  559. 2x h3 -= t3 ;\
  560. 2x c4 = t4 signed>> 26 ;\
  561. 2x s = c9 + c9 ;\
  562. 2x h5 += c4 ;\
  563. h2 h3 = h2[0]h3[0]h2[2]h2[3] h2[1]h3[1]h3[2]h3[3];\
  564. 2x t4 = c4 << 26 ;\
  565. h2 h3 = h2[0]h2[1]h2[2]h3[2] h3[0]h3[1]h2[3]h3[3];\
  566. 2x t5 = h5 + _0x1000000 ;\
  567. 2x h0 += s ;\
  568. mem64[posh] aligned= h2[0];posh+=8 ;\
  569. 2x s = c9 << 4 ;\
  570. mem64[posH] aligned= h2[1];posH+=8 ;\
  571. 2x h4 -= t4 ;\
  572. 2x c5 = t5 signed>> 25 ;\
  573. 2x h0 += s ;\
  574. 2x h6 += c5 ;\
  575. 2x t5 = c5 << 25 ;\
  576. 2x t6 = h6 + _0x2000000 ;\
  577. 2x h0 += c9 ;\
  578. 2x t9 = c9 << 25 ;\
  579. 2x t0 = h0 + _0x2000000 ;\
  580. 2x h5 -= t5 ;\
  581. 2x c6 = t6 signed>> 26 ;\
  582. 2x h9 -= t9 ;\
  583. h4 h5 = h4[0]h5[0]h4[2]h4[3] h4[1]h5[1]h5[2]h5[3];\
  584. 2x c0 = t0 signed>> 26 ;\
  585. h4 h5 = h4[0]h4[1]h4[2]h5[2] h5[0]h5[1]h4[3]h5[3];\
  586. 2x h7 += c6 ;\
  587. mem64[posh] aligned= h4[0] ;\
  588. 2x t6 = c6 << 26 ;\
  589. mem64[posH] aligned= h4[1] ;\
  590. 2x h1 += c0 ;\
  591. h8 h9 = h8[0]h9[0]h8[2]h8[3] h8[1]h9[1]h9[2]h9[3];\
  592. 2x t0 = c0 << 26 ;\
  593. h8 h9 = h8[0]h8[1]h8[2]h9[2] h9[0]h9[1]h8[3]h9[3];\
  594. 2x h6 -= t6 ;\
  595. posh+=16;\
  596. 2x h0 -= t0 ;\
  597. mem64[posh] aligned= h8[0] ;\
  598. posH+=16;\
  599. mem64[posH] aligned= h8[1] ;\
  600. ;\
  601. h6 h7 = h6[0]h7[0]h6[2]h6[3] h6[1]h7[1]h7[2]h7[3];\
  602. h6 h7 = h6[0]h6[1]h6[2]h7[2] h7[0]h7[1]h6[3]h7[3];\
  603. posh-=8;\
  604. posH-=8;\
  605. ;\
  606. h0 h1 = h0[0]h1[0]h0[2]h0[3] h0[1]h1[1]h1[2]h1[3];\
  607. h0 h1 = h0[0]h0[1]h0[2]h1[2] h1[0]h1[1]h0[3]h1[3];\
  608. ;\
  609. mem64[posh] aligned= h6[0] ;\
  610. mem64[posH] aligned= h6[1] ;\
  611. posh-=24;\
  612. posH-=24;\
  613. mem64[posh] aligned= h0[0] ;\
  614. mem64[posH] aligned= h0[1] ;\
  615. @define fe_mulmul(h,f,g,H,F,G) ;\
  616. posf = f ;\
  617. posg = g ;\
  618. ;\
  619. g02 aligned= mem128[posg]; posg += 16 # g0 g1 g2 g3 ;\
  620. ;\
  621. g46 aligned= mem128[posg]; posg += 16 # g4 g5 g6 g7 ;\
  622. ;\
  623. new g89 ;\
  624. g89 aligned= mem64[posg] g89[1] # g8 g9 ? ? ;\
  625. ;\
  626. posG = G ;\
  627. g13 aligned= mem128[posG]; posG += 16 # G0 G1 G2 G3 ;\
  628. ;\
  629. g02 g13 = g02[0]g13[0] g02[2]g13[2] g02[1]g13[1] g02[3]g13[3] # g0 G0 g2 G2 g1 G1 g3 G3 ;\
  630. ;\
  631. g57 aligned= mem128[posG]; posG += 16 # G4 G5 G6 G7 ;\
  632. ;\
  633. 4x mix = g02 << 4 ;\
  634. g46 g57 = g46[0]g57[0] g46[2]g57[2] g46[1]g57[1] g46[3]g57[3] # g4 G4 g6 G6 g5 G5 g7 G7 ;\
  635. ;\
  636. 4x g13_19 = g13 << 4 ;\
  637. 4x mix += g02 ;\
  638. 4x g13_19 += g13 ;\
  639. 4x g46_19 = g46 << 4 ;\
  640. g89 aligned= g89[0] mem64[posG] # g8 g9 G8 G9 ;\
  641. 4x g57_19 = g57 << 4 ;\
  642. g89 = g89[0] g89[2] g89[1] g89[3] # g8 G8 g9 G9 ;\
  643. 4x g46_19 += g46 ;\
  644. 4x g57_19 += g57 ;\
  645. f02 aligned= mem128[posf]; posf += 16 # f0 f1 f2 f3 ;\
  646. 4x g89_19 = g89 << 4 ;\
  647. f46 aligned= mem128[posf]; posf += 16 # f4 f5 f6 f7 ;\
  648. 4x g89_19 += g89 ;\
  649. new f89 ;\
  650. f89 aligned= mem64[posf] f89[1] # f8 f9 ? ? ;\
  651. 4x mix += g02 ;\
  652. posF = F ;\
  653. f13 aligned= mem128[posF]; posF += 16 # F0 F1 F2 F3 ;\
  654. 4x g13_19 += g13 ;\
  655. f57 aligned= mem128[posF]; posF += 16 # F4 F5 F6 F7 ;\
  656. 4x g57_19 += g57 ;\
  657. f89 aligned= f89[0] mem64[posF] # f8 f9 F8 F9 ;\
  658. 4x g89_19 += g89 ;\
  659. f02 f13 = f02[0]f13[0] f02[2]f13[2] f02[1]f13[1] f02[3]f13[3] # f0 F0 f2 F2 f1 F1 f3 F3 ;\
  660. 4x g46_19 += g46 ;\
  661. ;\
  662. 4x mix += g02 # 19g0 19G0 19g2 19G2 ;\
  663. f46 f57 = f46[0]f57[0] f46[2]f57[2] f46[1]f57[1] f46[3]f57[3] # f4 F4 f6 F6 f5 F5 f7 F7 ;\
  664. 4x g13_19 += g13 # 19g1 19G1 19g3 19G3 ;\
  665. new g13_19_stack ;\
  666. ptr = &g13_19_stack ;\
  667. 4x g89_19 += g89 # 19g8 19G8 19g9 19G9 ;\
  668. f89 = f89[0] f89[2] f89[1] f89[3] # f8 F8 f9 F9 ;\
  669. ;\
  670. mem128[ptr] aligned= g13_19 ;\
  671. 4x f13_2 = f13 << 1 # 2f1 2F1 2f3 2F3 ;\
  672. new g89_19_stack ;\
  673. ptr = &g89_19_stack ;\
  674. mem128[ptr] aligned= g89_19 ;\
  675. 4x f57_2 = f57 << 1 # 2f5 2F5 2f7 2F7 ;\
  676. ;\
  677. new f13_2_stack ;\
  678. ptr = &f13_2_stack ;\
  679. mem128[ptr] aligned= f13_2 ;\
  680. ;\
  681. 4x f89_2 = f89 << 1 # 2f8 2F8 2f9 2F9 ;\
  682. 4x g57_19 += g57 # 19g5 19G5 19g7 19G7 ;\
  683. mix = f89_2[2,3] mix[2,3] # 2f9 2F9 19g2 19G2 ;\
  684. ;\
  685. 4x g46_19 += g46 # 19g4 19G4 19g6 19G6 ;\
  686. ;\
  687. new g57_19_stack ;\
  688. ptr = &g57_19_stack ;\
  689. mem128[ptr] aligned= g57_19 ;\
  690. ;\
  691. # h9 = f0g9+f1g8 +f2g7 +f3g6 +f4g5 +f5g4 +f6g3 +f7g2 +f8g1 +f9g0 ;\
  692. h9[0,1] = f02[0] signed* g89[2]; h9[2,3] = f02[1] signed* g89[3] ;\
  693. h9[0,1] += f13[0] signed* g89[0]; h9[2,3] += f13[1] signed* g89[1] ;\
  694. h9[0,1] += f02[2] signed* g57[2]; h9[2,3] += f02[3] signed* g57[3] ;\
  695. h9[0,1] += f13[2] signed* g46[2]; h9[2,3] += f13[3] signed* g46[3] ;\
  696. h9[0,1] += f46[0] signed* g57[0]; h9[2,3] += f46[1] signed* g57[1] ;\
  697. h9[0,1] += f57[0] signed* g46[0]; h9[2,3] += f57[1] signed* g46[1] ;\
  698. h9[0,1] += f46[2] signed* g13[2]; h9[2,3] += f46[3] signed* g13[3] ;\
  699. h9[0,1] += f57[2] signed* g02[2]; h9[2,3] += f57[3] signed* g02[3] ;\
  700. h9[0,1] += f89[0] signed* g13[0]; h9[2,3] += f89[1] signed* g13[1] ;\
  701. h9[0,1] += f89[2] signed* g02[0]; h9[2,3] += f89[3] signed* g02[1] ;\
  702. ;\
  703. new g46_19_stack ;\
  704. ptr = &g46_19_stack ;\
  705. mem128[ptr] aligned= g46_19 ;\
  706. ;\
  707. # h8 = f0g8+f1g7_2 +f2g6 +f3g5_2 +f4g4 +f5g3_2 +f6g2 +f7g1_2 +f8g0 +f9g9_38 ;\
  708. h8[0,1] = f02[0] signed* g89[0]; h8[2,3] = f02[1] signed* g89[1] ;\
  709. h8[0,1] += f13_2[0] signed* g57[2]; h8[2,3] += f13_2[1] signed* g57[3] ;\
  710. h8[0,1] += f13_2[2] signed* g57[0]; h8[2,3] += f13_2[3] signed* g57[1] ;\
  711. h8[0,1] += f02[2] signed* g46[2]; h8[2,3] += f02[3] signed* g46[3] ;\
  712. h8[0,1] += f46[0] signed* g46[0]; h8[2,3] += f46[1] signed* g46[1] ;\
  713. h8[0,1] += f46[2] signed* g02[2]; h8[2,3] += f46[3] signed* g02[3] ;\
  714. h8[0,1] += f89[0] signed* g02[0]; h8[2,3] += f89[1] signed* g02[1] ;\
  715. ;\
  716. new f57_2_stack ;\
  717. ptr = &f57_2_stack ;\
  718. mem128[ptr] aligned= f57_2 ;\
  719. ;\
  720. # h7 = f0g7+f1g6 +f2g5 +f3g4 +f4g3 +f5g2 +f6g1 +f7g0 +f8g9_19+f9g8_19 ;\
  721. h7[0,1] = f02[0] signed* g57[2]; h7[2,3] = f02[1] signed* g57[3] ;\
  722. h7[0,1] += f13[0] signed* g46[2]; h7[2,3] += f13[1] signed* g46[3] ;\
  723. h7[0,1] += f02[2] signed* g57[0]; h7[2,3] += f02[3] signed* g57[1] ;\
  724. h7[0,1] += f13[2] signed* g46[0]; h7[2,3] += f13[3] signed* g46[1] ;\
  725. h7[0,1] += f46[0] signed* g13[2]; h7[2,3] += f46[1] signed* g13[3] ;\
  726. h7[0,1] += f57[0] signed* g02[2]; h7[2,3] += f57[1] signed* g02[3] ;\
  727. h7[0,1] += f46[2] signed* g13[0]; h7[2,3] += f46[3] signed* g13[1] ;\
  728. h7[0,1] += f57[2] signed* g02[0]; h7[2,3] += f57[3] signed* g02[1] ;\
  729. ;\
  730. new mix_stack ;\
  731. ptr = &mix_stack ;\
  732. mem128[ptr] aligned= mix ;\
  733. ;\
  734. # h6 = f0g6+f1g5_2 +f2g4 +f3g3_2 +f4g2 +f5g1_2 +f6g0 +f7g9_38+f8g8_19+f9g7_38 ;\
  735. h6[0,1] = f02[0] signed* g46[2]; h6[2,3] = f02[1] signed* g46[3] ;\
  736. h6[0,1] += f02[2] signed* g46[0]; h6[2,3] += f02[3] signed* g46[1] ;\
  737. h6[0,1] += f46[0] signed* g02[2]; h6[2,3] += f46[1] signed* g02[3] ;\
  738. h6[0,1] += f46[2] signed* g02[0]; h6[2,3] += f46[3] signed* g02[1] ;\
  739. h6[0,1] += f13_2[0] signed* g57[0]; h6[2,3] += f13_2[1] signed* g57[1] ;\
  740. ;\
  741. new h9_stack ;\
  742. ptr = &h9_stack ;\
  743. mem128[ptr] aligned= h9 ;\
  744. ;\
  745. # h5 = f0g5+f1g4 +f2g3 +f3g2 +f4g1 +f5g0 +f6g9_19+f7g8_19+f8g7_19+f9g6_19 ;\
  746. h5[0,1] = f02[0] signed* g57[0]; h5[2,3] = f02[1] signed* g57[1] ;\
  747. h5[0,1] += f13[0] signed* g46[0]; h5[2,3] += f13[1] signed* g46[1] ;\
  748. h5[0,1] += f02[2] signed* g13[2]; h5[2,3] += f02[3] signed* g13[3] ;\
  749. h5[0,1] += f13[2] signed* g02[2]; h5[2,3] += f13[3] signed* g02[3] ;\
  750. h5[0,1] += f46[0] signed* g13[0]; h5[2,3] += f46[1] signed* g13[1] ;\
  751. h5[0,1] += f57[0] signed* g02[0]; h5[2,3] += f57[1] signed* g02[1] ;\
  752. ;\
  753. # h3 = f0g3+f1g2 +f2g1 +f3g0 +f4g9_19+f5g8_19+f6g7_19+f7g6_19+f8g5_19+f9g4_19 ;\
  754. h3[0,1] = f02[0] signed* g13[2]; h3[2,3] = f02[1] signed* g13[3] ;\
  755. h3[0,1] += f13[0] signed* g02[2]; h3[2,3] += f13[1] signed* g02[3] ;\
  756. h3[0,1] += f02[2] signed* g13[0]; h3[2,3] += f02[3] signed* g13[1] ;\
  757. h3[0,1] += f13[2] signed* g02[0]; h3[2,3] += f13[3] signed* g02[1] ;\
  758. ;\
  759. ptr = &g89_19_stack ;\
  760. g89_19 aligned= mem128[ptr] ;\
  761. ;\
  762. h7[0,1] += f89[0] signed* g89_19[2]; h7[2,3] += f89[1] signed* g89_19[3] ;\
  763. h7[0,1] += f89[2] signed* g89_19[0]; h7[2,3] += f89[3] signed* g89_19[1] ;\
  764. h5[0,1] += f46[2] signed* g89_19[2]; h5[2,3] += f46[3] signed* g89_19[3] ;\
  765. h5[0,1] += f57[2] signed* g89_19[0]; h5[2,3] += f57[3] signed* g89_19[1] ;\
  766. h3[0,1] += f46[0] signed* g89_19[2]; h3[2,3] += f46[1] signed* g89_19[3] ;\
  767. h3[0,1] += f57[0] signed* g89_19[0]; h3[2,3] += f57[1] signed* g89_19[1] ;\
  768. h6[0,1] += f89[0] signed* g89_19[0]; h6[2,3] += f89[1] signed* g89_19[1] ;\
  769. ;\
  770. new h7_stack ;\
  771. ptr = &h7_stack ;\
  772. mem128[ptr] aligned= h7 ;\
  773. ;\
  774. ;\
  775. # h1 = f0g1+f1g0 +f2g9_19+f3g8_19+f4g7_19+f5g6_19+f6g5_19+f7g4_19+f8g3_19+f9g2_19 ;\
  776. h1[0,1] = f02[0] signed* g13[0]; h1[2,3] = f02[1] signed* g13[1] ;\
  777. h1[0,1] += f13[0] signed* g02[0]; h1[2,3] += f13[1] signed* g02[1] ;\
  778. ;\
  779. ptr = &mix_stack ;\
  780. mix aligned= mem128[ptr] ;\
  781. ;\
  782. h8[0,1] += mix[0] signed* g89_19[2]; h8[2,3] += mix[1] signed* g89_19[3] ;\
  783. h1[0,1] += f02[2] signed* g89_19[2]; h1[2,3] += f02[3] signed* g89_19[3] ;\
  784. h1[0,1] += f13[2] signed* g89_19[0]; h1[2,3] += f13[3] signed* g89_19[1] ;\
  785. ;\
  786. ptr = &g46_19_stack ;\
  787. g46_19 aligned= mem128[ptr] ;\
  788. ;\
  789. h5[0,1] += f89[2] signed* g46_19[2]; h5[2,3] += f89[3] signed* g46_19[3] ;\
  790. h3[0,1] += f57[2] signed* g46_19[2]; h3[2,3] += f57[3] signed* g46_19[3] ;\
  791. h3[0,1] += f89[2] signed* g46_19[0]; h3[2,3] += f89[3] signed* g46_19[1] ;\
  792. h1[0,1] += f57[0] signed* g46_19[2]; h1[2,3] += f57[1] signed* g46_19[3] ;\
  793. h1[0,1] += f57[2] signed* g46_19[0]; h1[2,3] += f57[3] signed* g46_19[1] ;\
  794. ;\
  795. ptr = &g57_19_stack ;\
  796. g57_19 aligned= mem128[ptr] ;\
  797. ;\
  798. h5[0,1] += f89[0] signed* g57_19[2]; h5[2,3] += f89[1] signed* g57_19[3] ;\
  799. h3[0,1] += f46[2] signed* g57_19[2]; h3[2,3] += f46[3] signed* g57_19[3] ;\
  800. h3[0,1] += f89[0] signed* g57_19[0]; h3[2,3] += f89[1] signed* g57_19[1] ;\
  801. h1[0,1] += f46[0] signed* g57_19[2]; h1[2,3] += f46[1] signed* g57_19[3] ;\
  802. h1[0,1] += f46[2] signed* g57_19[0]; h1[2,3] += f46[3] signed* g57_19[1] ;\
  803. ;\
  804. new h5_stack ;\
  805. ptr = &h5_stack ;\
  806. mem128[ptr] aligned= h5 ;\
  807. ;\
  808. ;\
  809. ;\
  810. ptr = &g13_19_stack ;\
  811. g13_19 aligned= mem128[ptr] ;\
  812. h1[0,1] += f89[0] signed* g13_19[2]; h1[2,3] += f89[1] signed* g13_19[3] ;\
  813. h1[0,1] += f89[2] signed* mix[2]; h1[2,3] += f89[3] signed* mix[3] ;\
  814. ;\
  815. ;\
  816. # h4 = f0g4+f1g3_2 +f2g2 +f3g1_2 +f4g0 +f5g9_38+f6g8_19+f7g7_38+f8g6_19+f9g5_38 ;\
  817. h4[0,1] = f02[0] signed* g46[0]; h4[2,3] = f02[1] signed* g46[1] ;\
  818. h4[0,1] += f02[2] signed* g02[2]; h4[2,3] += f02[3] signed* g02[3] ;\
  819. h4[0,1] += f46[0] signed* g02[0]; h4[2,3] += f46[1] signed* g02[1] ;\
  820. h4[0,1] += f89[0] signed* g46_19[2]; h4[2,3] += f89[1] signed* g46_19[3] ;\
  821. h4[0,1] += f46[2] signed* g89_19[0]; h4[2,3] += f46[3] signed* g89_19[1] ;\
  822. h4[0,1] += f13_2[0] signed* g13[2]; h4[2,3] += f13_2[1] signed* g13[3] ;\
  823. h4[0,1] += f13_2[2] signed* g13[0]; h4[2,3] += f13_2[3] signed* g13[1] ;\
  824. ;\
  825. # h2 = f0g2+f1g1_2 +f2g0 +f3g9_38+f4g8_19+f5g7_38+f6g6_19+f7g5_38+f8g4_19+f9g3_38 ;\
  826. h2[0,1] = f02[0] signed* g02[2]; h2[2,3] = f02[1] signed* g02[3] ;\
  827. h2[0,1] += f02[2] signed* g02[0]; h2[2,3] += f02[3] signed* g02[1] ;\
  828. h2[0,1] += f46[2] signed* g46_19[2]; h2[2,3] += f46[3] signed* g46_19[3] ;\
  829. h2[0,1] += f46[0] signed* g89_19[0]; h2[2,3] += f46[1] signed* g89_19[1] ;\
  830. h2[0,1] += f89[0] signed* g46_19[0]; h2[2,3] += f89[1] signed* g46_19[1] ;\
  831. ;\
  832. # h0 = f0g0+f1g9_38+f2g8_19+f3g7_38+f4g6_19+f5g5_38+f6g4_19+f7g3_38+f8g2_19+f9g1_38 ;\
  833. h0[0,1] = f02[0] signed* g02[0]; h0[2,3] = f02[1] signed* g02[1] ;\
  834. h0[0,1] += f46[0] signed* g46_19[2]; h0[2,3] += f46[1] signed* g46_19[3] ;\
  835. h0[0,1] += f46[2] signed* g46_19[0]; h0[2,3] += f46[3] signed* g46_19[1] ;\
  836. h0[0,1] += f89[0] signed* mix[2]; h0[2,3] += f89[1] signed* mix[3] ;\
  837. h0[0,1] += f02[2] signed* g89_19[0]; h0[2,3] += f02[3] signed* g89_19[1] ;\
  838. ;\
  839. ptr = &f57_2_stack ;\
  840. f57_2 aligned= mem128[ptr] ;\
  841. ;\
  842. h8[0,1] += f57_2[0] signed* g13[2]; h8[2,3] += f57_2[1] signed* g13[3] ;\
  843. h8[0,1] += f57_2[2] signed* g13[0]; h8[2,3] += f57_2[3] signed* g13[1] ;\
  844. h6[0,1] += f57_2[0] signed* g13[0]; h6[2,3] += f57_2[1] signed* g13[1] ;\
  845. h6[0,1] += f57_2[2] signed* g89_19[2]; h6[2,3] += f57_2[3] signed* g89_19[3] ;\
  846. h4[0,1] += f57_2[0] signed* g89_19[2]; h4[2,3] += f57_2[1] signed* g89_19[3] ;\
  847. h4[0,1] += f57_2[2] signed* g57_19[2]; h4[2,3] += f57_2[3] signed* g57_19[3] ;\
  848. h0[0,1] += f57_2[0] signed* g57_19[0]; h0[2,3] += f57_2[1] signed* g57_19[1] ;\
  849. h0[0,1] += f57_2[2] signed* g13_19[2]; h0[2,3] += f57_2[3] signed* g13_19[3] ;\
  850. h2[0,1] += f57_2[0] signed* g57_19[2]; h2[2,3] += f57_2[1] signed* g57_19[3] ;\
  851. h2[0,1] += f57_2[2] signed* g57_19[0]; h2[2,3] += f57_2[3] signed* g57_19[1] ;\
  852. ;\
  853. ptr = &f13_2_stack ;\
  854. f13_2 aligned= mem128[ptr] ;\
  855. ;\
  856. ptr = &_0x2000000_stack ;\
  857. _0x2000000 aligned= mem128[ptr] ;\
  858. h6[0,1] += f13_2[2] signed* g13[2]; h6[2,3] += f13_2[3] signed* g13[3] ;\
  859. h0[0,1] += f13_2[0] signed* g89_19[2]; h0[2,3] += f13_2[1] signed* g89_19[3] ;\
  860. h0[0,1] += f13_2[2] signed* g57_19[2]; h0[2,3] += f13_2[3] signed* g57_19[3] ;\
  861. h2[0,1] += f13_2[0] signed* g13[0]; h2[2,3] += f13_2[1] signed* g13[1] ;\
  862. ptr = &_0x1000000_stack ;\
  863. _0x1000000 aligned= mem128[ptr] ;\
  864. h2[0,1] += f13_2[2] signed* g89_19[2]; h2[2,3] += f13_2[3] signed* g89_19[3] ;\
  865. ;\
  866. ptr = &h7_stack ;\
  867. h7 aligned= mem128[ptr] ;\
  868. ;\
  869. h0[0,1] += mix[0] signed* g13_19[0]; h0[2,3] += mix[1] signed* g13_19[1] ;\
  870. ptr = &h9_stack ;\
  871. h9 aligned= mem128[ptr] ;\
  872. ;\
  873. h6[0,1] += mix[0] signed* g57_19[2]; h6[2,3] += mix[1] signed* g57_19[3] ;\
  874. ptr = &h5_stack ;\
  875. h5 aligned= mem128[ptr] ;\
  876. ;\
  877. h4[0,1] += mix[0] signed* g57_19[0]; h4[2,3] += mix[1] signed* g57_19[1] ;\
  878. ;\
  879. 2x t0 = h0 + _0x2000000 ;\
  880. 2x t6 = h6 + _0x2000000 ;\
  881. ;\
  882. h2[0,1] += mix[0] signed* g13_19[2]; h2[2,3] += mix[1] signed* g13_19[3] ;\
  883. ;\
  884. 2x c0 = t0 signed>> 26 ;\
  885. 2x c6 = t6 signed>> 26 ;\
  886. 2x h1 += c0 ;\
  887. 2x t0 = c0 << 26 ;\
  888. 2x t1 = h1 + _0x1000000 ;\
  889. 2x h7 += c6 ;\
  890. 2x t6 = c6 << 26 ;\
  891. 2x t7 = h7 + _0x1000000 ;\
  892. 2x h0 -= t0 ;\
  893. 2x c1 = t1 signed>> 25 ;\
  894. 2x h6 -= t6 ;\
  895. 2x c7 = t7 signed>> 25 ;\
  896. 2x h2 += c1 ;\
  897. 2x t1 = c1 << 25 ;\
  898. 2x t2 = h2 + _0x2000000 ;\
  899. 2x h8 += c7 ;\
  900. 2x h1 -= t1 ;\
  901. 2x c2 = t2 signed>> 26 ;\
  902. 2x t7 = c7 << 25 ;\
  903. 2x t8 = h8 + _0x2000000 ;\
  904. 2x h3 += c2 ;\
  905. 2x t2 = c2 << 26 ;\
  906. 2x t3 = h3 + _0x1000000 ;\
  907. 2x h7 -= t7 ;\
  908. 2x c8 = t8 signed>> 26 ;\
  909. 2x h2 -= t2 ;\
  910. 2x c3 = t3 signed>> 25 ;\
  911. 2x h9 += c8 ;\
  912. 2x t8 = c8 << 26 ;\
  913. 2x t9 = h9 + _0x1000000 ;\
  914. 2x h4 += c3 ;\
  915. posh = h ;\
  916. 2x t3 = c3 << 25 ;\
  917. posH = H ;\
  918. 2x t4 = h4 + _0x2000000 ;\
  919. posh+=8 ;\
  920. 2x h8 -= t8 ;\
  921. posH+=8 ;\
  922. 2x c9 = t9 signed>> 25 ;\
  923. 2x h3 -= t3 ;\
  924. 2x c4 = t4 signed>> 26 ;\
  925. 2x s = c9 + c9 ;\
  926. 2x h5 += c4 ;\
  927. h2 h3 = h2[0]h3[0]h2[2]h2[3] h2[1]h3[1]h3[2]h3[3] ;\
  928. 2x t4 = c4 << 26 ;\
  929. h2 h3 = h2[0]h2[1]h2[2]h3[2] h3[0]h3[1]h2[3]h3[3] ;\
  930. 2x t5 = h5 + _0x1000000 ;\
  931. 2x h0 += s ;\
  932. mem64[posh] aligned= h2[0];posh+=8 ;\
  933. 2x s = c9 << 4 ;\
  934. mem64[posH] aligned= h2[1];posH+=8 ;\
  935. 2x h4 -= t4 ;\
  936. 2x c5 = t5 signed>> 25 ;\
  937. 2x h0 += s ;\
  938. 2x h6 += c5 ;\
  939. 2x t5 = c5 << 25 ;\
  940. 2x t6 = h6 + _0x2000000 ;\
  941. 2x h0 += c9 ;\
  942. 2x t9 = c9 << 25 ;\
  943. 2x t0 = h0 + _0x2000000 ;\
  944. 2x h5 -= t5 ;\
  945. 2x c6 = t6 signed>> 26 ;\
  946. 2x h9 -= t9 ;\
  947. h4 h5 = h4[0]h5[0]h4[2]h4[3] h4[1]h5[1]h5[2]h5[3] ;\
  948. 2x c0 = t0 signed>> 26 ;\
  949. h4 h5 = h4[0]h4[1]h4[2]h5[2] h5[0]h5[1]h4[3]h5[3] ;\
  950. 2x h7 += c6 ;\
  951. mem64[posh] aligned= h4[0] ;\
  952. 2x t6 = c6 << 26 ;\
  953. mem64[posH] aligned= h4[1] ;\
  954. 2x h1 += c0 ;\
  955. h8 h9 = h8[0]h9[0]h8[2]h8[3] h8[1]h9[1]h9[2]h9[3] ;\
  956. 2x t0 = c0 << 26 ;\
  957. h8 h9 = h8[0]h8[1]h8[2]h9[2] h9[0]h9[1]h8[3]h9[3] ;\
  958. 2x h6 -= t6 ;\
  959. posh+=16 ;\
  960. 2x h0 -= t0 ;\
  961. mem64[posh] aligned= h8[0] ;\
  962. posH+=16 ;\
  963. mem64[posH] aligned= h8[1] ;\
  964. ;\
  965. h6 h7 = h6[0]h7[0]h6[2]h6[3] h6[1]h7[1]h7[2]h7[3] ;\
  966. h6 h7 = h6[0]h6[1]h6[2]h7[2] h7[0]h7[1]h6[3]h7[3] ;\
  967. posh-=8 ;\
  968. posH-=8 ;\
  969. ;\
  970. h0 h1 = h0[0]h1[0]h0[2]h0[3] h0[1]h1[1]h1[2]h1[3] ;\
  971. h0 h1 = h0[0]h0[1]h0[2]h1[2] h1[0]h1[1]h0[3]h1[3] ;\
  972. ;\
  973. mem64[posh] aligned= h6[0] ;\
  974. mem64[posH] aligned= h6[1] ;\
  975. posh-=24 ;\
  976. posH-=24 ;\
  977. mem64[posh] aligned= h0[0] ;\
  978. mem64[posH] aligned= h0[1] ;\
  979. @define fe_mul(h,f,g) ;\
  980. posf = f ;\
  981. posg = g ;\
  982. posh = h ;\
  983. ;\
  984. 4x _19_19_19_19 = 19 ;\
  985. ;\
  986. 4x _0_1_0_1 = 0 ;\
  987. ;\
  988. 4x _1_1_1_1 = 1 ;\
  989. ;\
  990. _0_1_0_1[0,1,2,3] _1_1_1_1[0,1,2,3] = _0_1_0_1[0]_1_1_1_1[0]_0_1_0_1[1]_1_1_1_1[1] _0_1_0_1[2]_1_1_1_1[2]_0_1_0_1[3]_1_1_1_1[3] ;\
  991. ;\
  992. g0_g1_g2_g3 aligned= mem128[posg];posg+=16 ;\
  993. ;\
  994. g4_g5_g6_g7 aligned= mem128[posg];posg+=16 ;\
  995. ;\
  996. new f8_f9_g8_g9 ;\
  997. f8_f9_g8_g9 aligned= f8_f9_g8_g9[0]mem64[posg] ;\
  998. ;\
  999. f0_f1_f2_f3 aligned= mem128[posf];posf+=16 ;\
  1000. playp = &playground2 ;\
  1001. ;\
  1002. f4_f5_f6_f7 aligned= mem128[posf];posf+=16 ;\
  1003. 4x 19g0_19g1_19g2_19g3 = g0_g1_g2_g3 * _19_19_19_19 ;\
  1004. f8_f9_g8_g9 aligned= mem64[posf]f8_f9_g8_g9[1] ;\
  1005. ;\
  1006. new f1_f8_f3_f0 ;\
  1007. f1_f8_f3_f0 = f1_f8_f3_f0[0,1]f0_f1_f2_f3[3]f0_f1_f2_f3[0] ;\
  1008. 4x 19g4_19g5_19g6_19g7 = g4_g5_g6_g7 * _19_19_19_19 ;\
  1009. ;\
  1010. f1_f8_f3_f0 = f0_f1_f2_f3[1]f8_f9_g8_g9[0]f1_f8_f3_f0[2,3] ;\
  1011. 4x f0_2f1_f2_2f3 = f0_f1_f2_f3 << _0_1_0_1 ;\
  1012. new g0_19g1_g2_19g3 ;\
  1013. g0_19g1_g2_19g3 = 19g0_19g1_19g2_19g3[1]g0_g1_g2_g3[0]g0_19g1_g2_19g3[2,3] # ;\
  1014. ;\
  1015. new g4_19g5_g6_19g7 ;\
  1016. g4_19g5_g6_19g7 = 19g4_19g5_19g6_19g7[1]g4_g5_g6_g7[0]g4_19g5_g6_19g7[2,3] # ;\
  1017. 4x f4_2f5_f6_2f7 = f4_f5_f6_f7 << _0_1_0_1 ;\
  1018. ;\
  1019. new f8_2f9_f9_f6 ;\
  1020. f8_2f9_f9_f6 = f8_f9_g8_g9[0] << _0_1_0_1[0],f8_f9_g8_g9[1] << _0_1_0_1[1],f8_2f9_f9_f6[2,3] ;\
  1021. ;\
  1022. g0_19g1_g2_19g3 = g0_19g1_g2_19g3[1]g0_19g1_g2_19g3[0]g0_19g1_g2_19g3[2,3] ;\
  1023. g8_19g9_19g8_19g9[0,1] = g8_19g9_19g8_19g9[0,1];g8_19g9_19g8_19g9[2] = f8_f9_g8_g9[2] * _19_19_19_19[2];g8_19g9_19g8_19g9[3] = f8_f9_g8_g9[3] * _19_19_19_19[3] # wants to move up ;\
  1024. g4_19g5_g6_19g7 = g4_19g5_g6_19g7[1]g4_19g5_g6_19g7[0]g4_19g5_g6_19g7[2,3] ;\
  1025. ;\
  1026. ;\
  1027. f8_2f9_f9_f6 = f8_2f9_f9_f6[0,1]f8_f9_g8_g9[1]f4_f5_f6_f7[2] ;\
  1028. ;\
  1029. ;\
  1030. g8_19g9_19g8_19g9 = g8_19g9_19g8_19g9[3]f8_f9_g8_g9[2]g8_19g9_19g8_19g9[2,3] # ;\
  1031. ;\
  1032. g8_19g9_19g8_19g9 = g8_19g9_19g8_19g9[1]g8_19g9_19g8_19g9[0]g8_19g9_19g8_19g9[2,3] ;\
  1033. ;\
  1034. new 19g8_g9_19g2_g3 ;\
  1035. 19g8_g9_19g2_g3 = f8_f9_g8_g9[3]g8_19g9_19g8_19g9[2]19g8_g9_19g2_g3[2,3] # ;\
  1036. g0_19g1_g2_19g3 = g0_19g1_g2_19g3[0,1]19g0_19g1_19g2_19g3[3]g0_g1_g2_g3[2] # ;\
  1037. ;\
  1038. ;\
  1039. ;\
  1040. h02[0,1] = f0_2f1_f2_2f3[0] signed* g0_g1_g2_g3[0]; h02[2,3] = f0_2f1_f2_2f3[1] signed* g0_g1_g2_g3[1] ;\
  1041. g0_19g1_g2_19g3 = g0_19g1_g2_19g3[0,1]g0_19g1_g2_19g3[3]g0_19g1_g2_19g3[2] ;\
  1042. ;\
  1043. h02[0,1] += f0_2f1_f2_2f3[2] signed* g8_19g9_19g8_19g9[2]; h02[2,3] += f0_2f1_f2_2f3[3] signed* g8_19g9_19g8_19g9[3] ;\
  1044. ;\
  1045. 19g8_g9_19g2_g3 = 19g8_g9_19g2_g3[1]19g8_g9_19g2_g3[0]19g8_g9_19g2_g3[2,3] ;\
  1046. h02[0,1] += f4_2f5_f6_2f7[0] signed* 19g4_19g5_19g6_19g7[2]; h02[2,3] += f4_2f5_f6_2f7[1] signed* 19g4_19g5_19g6_19g7[3] ;\
  1047. ;\
  1048. 19g8_g9_19g2_g3 = 19g8_g9_19g2_g3[0,1]g0_g1_g2_g3[3]19g0_19g1_19g2_19g3[2] # ;\
  1049. h02[0,1] += f4_2f5_f6_2f7[2] signed* 19g4_19g5_19g6_19g7[0]; h02[2,3] += f4_2f5_f6_2f7[3] signed* 19g4_19g5_19g6_19g7[1] ;\
  1050. ;\
  1051. 19g8_g9_19g2_g3 = 19g8_g9_19g2_g3[0,1]19g8_g9_19g2_g3[3]19g8_g9_19g2_g3[2] ;\
  1052. h02[0,1] += f8_2f9_f9_f6[0] signed* 19g0_19g1_19g2_19g3[2]; h02[2,3] += f8_2f9_f9_f6[1] signed* 19g0_19g1_19g2_19g3[3] ;\
  1053. ;\
  1054. new f5_f2_f7_f4 ;\
  1055. f5_f2_f7_f4 = f4_f5_f6_f7[1]f0_f1_f2_f3[2]f5_f2_f7_f4[2,3] ;\
  1056. h31[0,1] = f1_f8_f3_f0[0] signed* g0_19g1_g2_19g3[2]; h31[2,3] = f1_f8_f3_f0[1] signed* g0_19g1_g2_19g3[3] ;\
  1057. ;\
  1058. f5_f2_f7_f4 = f5_f2_f7_f4[0,1]f4_f5_f6_f7[3]f4_f5_f6_f7[0] ;\
  1059. h31[0,1] += f1_f8_f3_f0[2] signed* g0_g1_g2_g3[0]; h31[2,3] += f1_f8_f3_f0[3] signed* g0_g1_g2_g3[1] ;\
  1060. ;\
  1061. mem64[playp] aligned= h02[0];playp+=8 ;\
  1062. h31[0,1] += f5_f2_f7_f4[0] signed* g8_19g9_19g8_19g9[2]; h31[2,3] += f5_f2_f7_f4[1] signed* g8_19g9_19g8_19g9[3] ;\
  1063. ;\
  1064. new h24 ;\
  1065. h24 = h02[2,3]h24[2,3] ;\
  1066. h31[0,1] += f5_f2_f7_f4[2] signed* 19g4_19g5_19g6_19g7[2]; h31[2,3] += f5_f2_f7_f4[3] signed* 19g4_19g5_19g6_19g7[3] ;\
  1067. ;\
  1068. ;\
  1069. h24 = h24[0],0 ;\
  1070. h31[0,1] += f8_2f9_f9_f6[2] signed* 19g4_19g5_19g6_19g7[0]; h31[2,3] += f8_2f9_f9_f6[3] signed* 19g4_19g5_19g6_19g7[1] ;\
  1071. ;\
  1072. g4_19g5_g6_19g7 = g4_19g5_g6_19g7[0,1]19g4_19g5_19g6_19g7[3]g4_g5_g6_g7[2] # ;\
  1073. h24[0,1] += f0_2f1_f2_2f3[0] signed* g0_g1_g2_g3[2]; h24[2,3] += f0_2f1_f2_2f3[1] signed* g0_g1_g2_g3[3] ;\
  1074. ;\
  1075. g4_19g5_g6_19g7 = g4_19g5_g6_19g7[0,1]g4_19g5_g6_19g7[3]g4_19g5_g6_19g7[2] ;\
  1076. h24[0,1] += f0_2f1_f2_2f3[2] signed* g0_g1_g2_g3[0]; h24[2,3] += f0_2f1_f2_2f3[3] signed* g0_g1_g2_g3[1] ;\
  1077. ;\
  1078. mem64[playp] aligned= h31[1];playp+=8 ;\
  1079. h24[0,1] += f4_2f5_f6_2f7[0] signed* g8_19g9_19g8_19g9[2]; h24[2,3] += f4_2f5_f6_2f7[1] signed* g8_19g9_19g8_19g9[3] ;\
  1080. ;\
  1081. new h53 ;\
  1082. h53 = h53[0,1]h31[0,1] ;\
  1083. h24[0,1] += f4_2f5_f6_2f7[2] signed* 19g4_19g5_19g6_19g7[2]; h24[2,3] += f4_2f5_f6_2f7[3] signed* 19g4_19g5_19g6_19g7[3] ;\
  1084. ;\
  1085. h53 = 0,h53[1] ;\
  1086. h24[0,1] += f8_2f9_f9_f6[0] signed* 19g4_19g5_19g6_19g7[0]; h24[2,3] += f8_2f9_f9_f6[1] signed* 19g4_19g5_19g6_19g7[1] ;\
  1087. ;\
  1088. h53[0,1] += f1_f8_f3_f0[0] signed* g4_19g5_g6_19g7[0]; h53[2,3] += f1_f8_f3_f0[1] signed* g4_19g5_g6_19g7[1] ;\
  1089. ;\
  1090. h53[0,1] += f1_f8_f3_f0[2] signed* g0_g1_g2_g3[2]; h53[2,3] += f1_f8_f3_f0[3] signed* g0_g1_g2_g3[3] ;\
  1091. ;\
  1092. h53[0,1] += f5_f2_f7_f4[0] signed* g0_g1_g2_g3[0]; h53[2,3] += f5_f2_f7_f4[1] signed* g0_g1_g2_g3[1] ;\
  1093. ;\
  1094. mem64[playp] aligned= h24[0];playp+=8 ;\
  1095. h53[0,1] += f5_f2_f7_f4[2] signed* g8_19g9_19g8_19g9[2]; h53[2,3] += f5_f2_f7_f4[3] signed* g8_19g9_19g8_19g9[3] ;\
  1096. ;\
  1097. new h46 ;\
  1098. h46 = h24[2,3]h46[2,3] ;\
  1099. h53[0,1] += f8_2f9_f9_f6[2] signed* 19g4_19g5_19g6_19g7[2]; h53[2,3] += f8_2f9_f9_f6[3] signed* 19g4_19g5_19g6_19g7[3] ;\
  1100. ;\
  1101. h46 = h46[0],0 ;\
  1102. h46[0,1] += f0_2f1_f2_2f3[0] signed* g4_g5_g6_g7[0]; h46[2,3] += f0_2f1_f2_2f3[1] signed* g4_g5_g6_g7[1] ;\
  1103. ;\
  1104. h46[0,1] += f0_2f1_f2_2f3[2] signed* g0_g1_g2_g3[2]; h46[2,3] += f0_2f1_f2_2f3[3] signed* g0_g1_g2_g3[3] ;\
  1105. ;\
  1106. h46[0,1] += f4_2f5_f6_2f7[0] signed* g0_g1_g2_g3[0]; h46[2,3] += f4_2f5_f6_2f7[1] signed* g0_g1_g2_g3[1] ;\
  1107. ;\
  1108. new h75 ;\
  1109. h75 = h75[0,1]h53[0,1] ;\
  1110. h46[0,1] += f4_2f5_f6_2f7[2] signed* g8_19g9_19g8_19g9[2]; h46[2,3] += f4_2f5_f6_2f7[3] signed* g8_19g9_19g8_19g9[3] ;\
  1111. ;\
  1112. h75 = 0,h75[1] ;\
  1113. h46[0,1] += f8_2f9_f9_f6[0] signed* 19g4_19g5_19g6_19g7[2]; h46[2,3] += f8_2f9_f9_f6[1] signed* 19g4_19g5_19g6_19g7[3] ;\
  1114. ;\
  1115. mem64[playp] aligned= h53[1];playp+=8 ;\
  1116. h75[0,1] += f1_f8_f3_f0[0] signed* g4_19g5_g6_19g7[2]; h75[2,3] += f1_f8_f3_f0[1] signed* g4_19g5_g6_19g7[3] ;\
  1117. ;\
  1118. h75[0,1] += f1_f8_f3_f0[2] signed* g4_g5_g6_g7[0]; h75[2,3] += f1_f8_f3_f0[3] signed* g4_g5_g6_g7[1] ;\
  1119. ;\
  1120. mem64[playp] aligned= h46[0] ;\
  1121. h75[0,1] += f5_f2_f7_f4[0] signed* g0_g1_g2_g3[2]; h75[2,3] += f5_f2_f7_f4[1] signed* g0_g1_g2_g3[3] ;\
  1122. ;\
  1123. new h68 ;\
  1124. h68 = h46[2,3]h68[2,3] ;\
  1125. h75[0,1] += f5_f2_f7_f4[2] signed* g0_g1_g2_g3[0]; h75[2,3] += f5_f2_f7_f4[3] signed* g0_g1_g2_g3[1] ;\
  1126. ;\
  1127. h68 = h68[0],0 ;\
  1128. h75[0,1] += f8_2f9_f9_f6[2] signed* g8_19g9_19g8_19g9[2]; h75[2,3] += f8_2f9_f9_f6[3] signed* g8_19g9_19g8_19g9[3] ;\
  1129. ;\
  1130. h68[0,1] += f0_2f1_f2_2f3[0] signed* g4_g5_g6_g7[2]; h68[2,3] += f0_2f1_f2_2f3[1] signed* g4_g5_g6_g7[3] ;\
  1131. ;\
  1132. h68[0,1] += f0_2f1_f2_2f3[2] signed* g4_g5_g6_g7[0]; h68[2,3] += f0_2f1_f2_2f3[3] signed* g4_g5_g6_g7[1] ;\
  1133. ;\
  1134. h68[0,1] += f4_2f5_f6_2f7[0] signed* g0_g1_g2_g3[2]; h68[2,3] += f4_2f5_f6_2f7[1] signed* g0_g1_g2_g3[3] ;\
  1135. ;\
  1136. new h97 ;\
  1137. h97 = h97[0,1]h75[0,1] ;\
  1138. h68[0,1] += f4_2f5_f6_2f7[2] signed* g0_g1_g2_g3[0]; h68[2,3] += f4_2f5_f6_2f7[3] signed* g0_g1_g2_g3[1] ;\
  1139. ;\
  1140. h97 = 0,h97[1] ;\
  1141. h68[0,1] += f8_2f9_f9_f6[0] signed* g8_19g9_19g8_19g9[2]; h68[2,3] += f8_2f9_f9_f6[1] signed* g8_19g9_19g8_19g9[3] ;\
  1142. ;\
  1143. h97[0,1] += f1_f8_f3_f0[0] signed* g8_19g9_19g8_19g9[0]; h97[2,3] += f1_f8_f3_f0[1] signed* g8_19g9_19g8_19g9[1] ;\
  1144. ;\
  1145. playp -= 32 ;\
  1146. h97[0,1] += f1_f8_f3_f0[2] signed* g4_g5_g6_g7[2]; h97[2,3] += f1_f8_f3_f0[3] signed* g4_g5_g6_g7[3] ;\
  1147. ;\
  1148. h97[0,1] += f5_f2_f7_f4[0] signed* g4_g5_g6_g7[0]; h97[2,3] += f5_f2_f7_f4[1] signed* g4_g5_g6_g7[1] ;\
  1149. new h80 ;\
  1150. h80 = h68[2,3]h80[2,3] ;\
  1151. ;\
  1152. h97[0,1] += f5_f2_f7_f4[2] signed* g0_g1_g2_g3[2]; h97[2,3] += f5_f2_f7_f4[3] signed* g0_g1_g2_g3[3] ;\
  1153. h80 aligned= h80[0]mem64[playp];playp+=8 ;\
  1154. ;\
  1155. h97[0,1] += f8_2f9_f9_f6[2] signed* g0_g1_g2_g3[0]; h97[2,3] += f8_2f9_f9_f6[3] signed* g0_g1_g2_g3[1] ;\
  1156. ;\
  1157. h80[0,1] += f0_2f1_f2_2f3[0] signed* g8_19g9_19g8_19g9[0]; h80[2,3] += f0_2f1_f2_2f3[1] signed* g8_19g9_19g8_19g9[1] ;\
  1158. new 19g4_g5_19g6_g7 ;\
  1159. 19g4_g5_19g6_g7 = g4_g5_g6_g7[1]19g4_19g5_19g6_19g7[0]19g4_g5_19g6_g7[2,3] ;\
  1160. ;\
  1161. h80[0,1] += f0_2f1_f2_2f3[2] signed* g4_19g5_g6_19g7[2]; h80[2,3] += f0_2f1_f2_2f3[3] signed* g4_19g5_g6_19g7[3] ;\
  1162. 19g4_g5_19g6_g7 = 19g4_g5_19g6_g7[1]19g4_g5_19g6_g7[0]19g4_g5_19g6_g7[2,3] ;\
  1163. ;\
  1164. h80[0,1] += f4_2f5_f6_2f7[0] signed* g4_19g5_g6_19g7[0]; h80[2,3] += f4_2f5_f6_2f7[1] signed* g4_19g5_g6_19g7[1] ;\
  1165. 19g4_g5_19g6_g7 = 19g4_g5_19g6_g7[0,1]g4_g5_g6_g7[3]19g4_19g5_19g6_19g7[2] ;\
  1166. ;\
  1167. new h19 ;\
  1168. h19 = h19[0,1]h97[0,1] ;\
  1169. h80[0,1] += f4_2f5_f6_2f7[2] signed* g0_19g1_g2_19g3[2]; h80[2,3] += f4_2f5_f6_2f7[3] signed* g0_19g1_g2_19g3[3] ;\
  1170. 19g4_g5_19g6_g7 = 19g4_g5_19g6_g7[0,1]19g4_g5_19g6_g7[3]19g4_g5_19g6_g7[2] ;\
  1171. ;\
  1172. ;\
  1173. h19 aligned= mem64[playp]h19[1] ;\
  1174. h80[0,1] += f8_2f9_f9_f6[0] signed* g0_19g1_g2_19g3[0]; h80[2,3] += f8_2f9_f9_f6[1] signed* g0_19g1_g2_19g3[1] ;\
  1175. ;\
  1176. h19[0,1] += f1_f8_f3_f0[0] signed* g0_g1_g2_g3[0]; h19[2,3] += f1_f8_f3_f0[1] signed* g0_g1_g2_g3[1] ;\
  1177. ;\
  1178. playp+=24 ;\
  1179. h19[0,1] += f1_f8_f3_f0[2] signed* 19g8_g9_19g2_g3[0]; h19[2,3] += f1_f8_f3_f0[3] signed* 19g8_g9_19g2_g3[1] ;\
  1180. new h04 ;\
  1181. h04 = h80[2,3]h04[2,3] ;\
  1182. ;\
  1183. new h37 ;\
  1184. h37 = h37[0]h97[1] ;\
  1185. h19[0,1] += f5_f2_f7_f4[0] signed* 19g4_g5_19g6_g7[2]; h19[2,3] += f5_f2_f7_f4[1] signed* 19g4_g5_19g6_g7[3] ;\
  1186. new h15 ;\
  1187. h15 = h15[0,1]h75[2,3] ;\
  1188. ;\
  1189. new h48 ;\
  1190. h48 = h48[0,1]h80[0,1] ;\
  1191. h19[0,1] += f5_f2_f7_f4[2] signed* 19g4_g5_19g6_g7[0]; h19[2,3] += f5_f2_f7_f4[3] signed* 19g4_g5_19g6_g7[1] ;\
  1192. new h26 ;\
  1193. h26 = h26[0,1]h68[0,1] ;\
  1194. ;\
  1195. h19[0,1] += f8_2f9_f9_f6[2] signed* 19g8_g9_19g2_g3[2]; h19[2,3] += f8_2f9_f9_f6[3] signed* 19g8_g9_19g2_g3[3] ;\
  1196. h04 aligned= h04[0]mem64[playp] ;\
  1197. ;\
  1198. playp -= 16 ;\
  1199. ;\
  1200. h15 = h19[0,1]h15[2,3] ;\
  1201. ;\
  1202. 4x mask26 = 0xffffffff ;\
  1203. 2x mask25 = mask26 << 25 ;\
  1204. ;\
  1205. ptr = &_0x2000000_stack ;\
  1206. _0x2000000 aligned= mem128[ptr] ;\
  1207. 2x t0 = h04 + _0x2000000 ;\
  1208. ;\
  1209. 2x mask26 <<= 26 ;\
  1210. ;\
  1211. #waiting for t0 ;\
  1212. ;\
  1213. 2x c = t0 signed>> 26 ;\
  1214. h26 aligned= mem64[playp]h26[1];playp += 8 ;\
  1215. ;\
  1216. #waiting for c ;\
  1217. ;\
  1218. 2x h15 += c ;\
  1219. ;\
  1220. t0 &= mask26 ;\
  1221. h37 aligned= mem64[playp]h37[1];playp += 8 ;\
  1222. ;\
  1223. ptr = &_0x1000000_stack ;\
  1224. _0x1000000 aligned= mem128[ptr] ;\
  1225. 2x t1 = h15 + _0x1000000 ;\
  1226. ;\
  1227. 2x h04 -= t0 ;\
  1228. ;\
  1229. #waiting for t1 ;\
  1230. ;\
  1231. 2x c = t1 signed>> 25 ;\
  1232. h48 = h04[2,3]h48[2,3] ;\
  1233. ;\
  1234. #waiting for t1 ;\
  1235. ;\
  1236. t1 &= mask25 ;\
  1237. ;\
  1238. 2x h26 += c ;\
  1239. new h59 ;\
  1240. h59 = h59[0]h19[1] ;\
  1241. ;\
  1242. 2x t0 = h26 + _0x2000000 ;\
  1243. ;\
  1244. 2x h15 -= t1 ;\
  1245. ;\
  1246. #waiting for t0 ;\
  1247. ;\
  1248. 2x c = t0 signed>> 26 ;\
  1249. h59 = h15[2,3]h59[2,3] ;\
  1250. ;\
  1251. t0 &= mask26 ;\
  1252. ;\
  1253. 2x h37 += c ;\
  1254. ;\
  1255. 2x t1 = h37 + _0x1000000 ;\
  1256. ;\
  1257. 2x h26 -= t0 ;\
  1258. ;\
  1259. #waiting for t1 ;\
  1260. ;\
  1261. 2x c = t1 signed>> 25 ;\
  1262. ;\
  1263. t1 &= mask25 ;\
  1264. ;\
  1265. 2x h48 += c ;\
  1266. ;\
  1267. 2x t0 = h48 + _0x2000000 ;\
  1268. ;\
  1269. 2x h37 -= t1 ;\
  1270. ;\
  1271. #waiting for t0 ;\
  1272. ;\
  1273. 2x c = t0 signed>> 26 ;\
  1274. ;\
  1275. t0 &= mask26 ;\
  1276. ;\
  1277. 2x h59 += c ;\
  1278. ;\
  1279. new t ;\
  1280. t = t[0], h59[1] + _0x1000000[1] ;\
  1281. ;\
  1282. 2x h48 -= t0 ;\
  1283. ;\
  1284. #waiting for t ;\
  1285. ;\
  1286. c = c[0],t[1] signed>> 25 ;\
  1287. ;\
  1288. t &= mask25 ;\
  1289. ;\
  1290. new s2 ;\
  1291. s2 = s2[0],c[1] + c[1] ;\
  1292. ;\
  1293. new s ;\
  1294. s = s[0],c[1] << 4 ;\
  1295. ;\
  1296. s2 = s2[0],s2[1] + c[1] ;\
  1297. ;\
  1298. #waiting for s2 ;\
  1299. ;\
  1300. s = s[0],s[1] + s2[1] ;\
  1301. ;\
  1302. #waiting for s ;\
  1303. ;\
  1304. h04 = h04[0] + s[1],h04[1] ;\
  1305. ;\
  1306. h26[0,1,2,3] h37[0,1,2,3] = h26[0]h37[0]h26[1]h37[1] h26[2]h37[2]h26[3]h37[3] # h26 now contains 0,0,h2,h3, h37 contains 0,0,h6,h7 ;\
  1307. ;\
  1308. t0 = h04[0] + _0x2000000[0],t0[1] ;\
  1309. ;\
  1310. posh += 8 ;\
  1311. mem64[posh] aligned= h26[0] ;\
  1312. h59 = h59[0],h59[1] - t[1] ;\
  1313. ;\
  1314. posh += 16 ;\
  1315. mem64[posh] aligned= h37[0] ;\
  1316. c = t0[0] signed>> 26,c[1] ;\
  1317. ;\
  1318. t0 &= mask26 ;\
  1319. ;\
  1320. h15 = h15[0] + c[0],h15[1] ;\
  1321. ;\
  1322. h48[0,1,2,3] h59[0,1,2,3] = h48[0]h59[0]h48[1]h59[1] h48[2]h59[2]h48[3]h59[3] # h48 now contains 0,0,h4,h5, h59 contains 0,0,h8,h9 ;\
  1323. ;\
  1324. h04 = h04[0] - t0[0],h04[1] ;\
  1325. ;\
  1326. #waiting for h04 ;\
  1327. posh -= 8 ;\
  1328. mem64[posh] aligned= h48[0] ;\
  1329. posh += 16 ;\
  1330. mem64[posh] aligned= h59[0] ;\
  1331. #waiting for h04 ;\
  1332. ;\
  1333. # Costs 3 cycles! ;\
  1334. h04[0,1,2,3] h15[0,1,2,3] = h04[0]h15[0]h04[1]h15[1] h04[2]h15[2]h04[3]h15[3] # h04 now contains 0,0,h0,h1, ;\
  1335. ;\
  1336. posh -= 32 ;\
  1337. mem64[posh] aligned= h04[0] ;\
  1338. qpushenter crypto_scalarmult_curve25519_neon2
  1339. stack64 stack_r45
  1340. stack64 stack_r67
  1341. stack64 stack_r89
  1342. stack64 stack_r1011
  1343. stack32 stack_r12
  1344. stack32 stack_r14
  1345. assign r4 r5 to caller_r4 caller_r5; stack_r45 = caller_r4 caller_r5
  1346. assign r6 r7 to caller_r6 caller_r7; stack_r67 = caller_r6 caller_r7
  1347. assign r8 r9 to caller_r8 caller_r9; stack_r89 = caller_r8 caller_r9
  1348. assign r10 r11 to caller_r10 caller_r11; stack_r1011 = caller_r10 caller_r11
  1349. stack_r12 = caller_r12
  1350. stack_r14 = caller_r14
  1351. stack32 swap_stack
  1352. stack32 pos_stack
  1353. stack256 e
  1354. int32 eptr
  1355. q = input_0
  1356. n = input_1
  1357. p = input_2
  1358. playground1_ptr = &playground1
  1359. swap = 0
  1360. pos = 254
  1361. 4x _0x1000000 = 1
  1362. 2x _0x2000000 = _0x1000000 unsigned>> 7
  1363. 2x _0x1000000 = _0x1000000 unsigned>> 8
  1364. new _19_19_38_38
  1365. _19_19_38_38 = 19,19,_19_19_38_38[2,3]
  1366. _19_19_38_38 = _19_19_38_38[0,1],38,38
  1367. ptr = &_0x2000000_stack
  1368. mem128[ptr] aligned= _0x2000000
  1369. ptr = &_0x1000000_stack
  1370. mem128[ptr] aligned= _0x1000000
  1371. ptr = &_19_19_38_38_stack
  1372. mem128[ptr] aligned= _19_19_38_38
  1373. fe_0(const121666)
  1374. ptr = const121666
  1375. word = 960
  1376. word = word - 2
  1377. word = -word
  1378. word = word - (word << 7)
  1379. mem32[ptr] = word
  1380. eptr = &e
  1381. e0 = mem128[n]; n += 16
  1382. e4 = mem128[n]
  1383. mem128[eptr] aligned= e0; eptr += 16
  1384. mem128[eptr] aligned= e4
  1385. eptr -= 16
  1386. byte = mem8[eptr]
  1387. byte &= 248
  1388. mem8[eptr] = byte
  1389. byte = mem8[eptr + 31]
  1390. byte &= 127
  1391. byte |= 64
  1392. mem8[eptr + 31] = byte
  1393. 2x mask26 = 0xffffffff
  1394. 2x mask25 = mask26 unsigned>> 7
  1395. 2x mask26 = mask26 unsigned>> 6
  1396. new h0
  1397. new h1
  1398. new h2
  1399. new h3
  1400. new h4
  1401. new h5
  1402. new h6
  1403. new h7
  1404. new h8
  1405. new h9
  1406. h0 = mem64[p] h0[1]
  1407. h1 = mem64[p] h1[1]
  1408. p += 6
  1409. h2 = mem64[p] h2[1]
  1410. h3 = mem64[p] h3[1]
  1411. p += 6
  1412. h4 = mem64[p] h4[1]
  1413. p += 4
  1414. h5 = mem64[p] h5[1]
  1415. h6 = mem64[p] h6[1]
  1416. p += 6
  1417. h7 = mem64[p] h7[1]
  1418. p += 2
  1419. h8 = mem64[p] h8[1]
  1420. h9 = mem64[p] h9[1]
  1421. 2x h1 unsigned>>= 26
  1422. 2x h2 unsigned>>= 3
  1423. 2x h3 unsigned>>= 29
  1424. 2x h4 unsigned>>= 6
  1425. 2x h6 unsigned>>= 25
  1426. 2x h7 unsigned>>= 3
  1427. 2x h8 unsigned>>= 12
  1428. 2x h9 unsigned>>= 38
  1429. h0 &= mask26
  1430. h2 &= mask26
  1431. h4 &= mask26
  1432. h6 &= mask26
  1433. h8 &= mask26
  1434. h1 &= mask25
  1435. h3 &= mask25
  1436. h5 &= mask25
  1437. h7 &= mask25
  1438. h9 &= mask25
  1439. posh = x1
  1440. 2x t0 = h0 + _0x2000000
  1441. 2x t6 = h6 + _0x2000000
  1442. 2x c0 = t0 signed>> 26
  1443. 2x c6 = t6 signed>> 26
  1444. 2x h1 += c0
  1445. 2x t0 = c0 << 26
  1446. 2x t1 = h1 + _0x1000000
  1447. 2x h7 += c6
  1448. 2x t6 = c6 << 26
  1449. 2x t7 = h7 + _0x1000000
  1450. 2x h0 -= t0
  1451. 2x c1 = t1 signed>> 25
  1452. 2x h6 -= t6
  1453. 2x c7 = t7 signed>> 25
  1454. 2x h2 += c1
  1455. 2x t1 = c1 << 25
  1456. 2x t2 = h2 + _0x2000000
  1457. 2x h8 += c7
  1458. 2x h1 -= t1
  1459. 2x c2 = t2 signed>> 26
  1460. 2x t7 = c7 << 25
  1461. 2x t8 = h8 + _0x2000000
  1462. 2x h3 += c2
  1463. 2x t2 = c2 << 26
  1464. 2x t3 = h3 + _0x1000000
  1465. 2x h7 -= t7
  1466. 2x c8 = t8 signed>> 26
  1467. 2x h2 -= t2
  1468. 2x c3 = t3 signed>> 25
  1469. 2x h9 += c8
  1470. 2x t8 = c8 << 26
  1471. 2x t9 = h9 + _0x1000000
  1472. 2x h4 += c3
  1473. 2x t3 = c3 << 25
  1474. 2x t4 = h4 + _0x2000000
  1475. posh+=8
  1476. 2x h8 -= t8
  1477. 2x c9 = t9 signed>> 25
  1478. 2x h3 -= t3
  1479. 2x c4 = t4 signed>> 26
  1480. 2x s = c9 + c9
  1481. 2x h5 += c4
  1482. h2 h3 = h2[0]h3[0]h2[2]h2[3] h2[1]h3[1]h3[2]h3[3]
  1483. 2x t4 = c4 << 26
  1484. h2 h3 = h2[0]h2[1]h2[2]h3[2] h3[0]h3[1]h2[3]h3[3]
  1485. 2x t5 = h5 + _0x1000000
  1486. 2x h0 += s
  1487. mem64[posh] aligned= h2[0];posh+=8
  1488. 2x s = c9 << 4
  1489. 2x h4 -= t4
  1490. 2x c5 = t5 signed>> 25
  1491. 2x h0 += s
  1492. 2x h6 += c5
  1493. 2x t5 = c5 << 25
  1494. 2x t6 = h6 + _0x2000000
  1495. 2x h0 += c9
  1496. 2x t9 = c9 << 25
  1497. 2x t0 = h0 + _0x2000000
  1498. 2x h5 -= t5
  1499. 2x c6 = t6 signed>> 26
  1500. 2x h9 -= t9
  1501. h4 h5 = h4[0]h5[0]h4[2]h4[3] h4[1]h5[1]h5[2]h5[3]
  1502. 2x c0 = t0 signed>> 26
  1503. h4 h5 = h4[0]h4[1]h4[2]h5[2] h5[0]h5[1]h4[3]h5[3]
  1504. 2x h7 += c6
  1505. mem64[posh] aligned= h4[0]
  1506. 2x t6 = c6 << 26
  1507. 2x h1 += c0
  1508. h8 h9 = h8[0]h9[0]h8[2]h8[3] h8[1]h9[1]h9[2]h9[3]
  1509. 2x t0 = c0 << 26
  1510. h8 h9 = h8[0]h8[1]h8[2]h9[2] h9[0]h9[1]h8[3]h9[3]
  1511. 2x h6 -= t6
  1512. posh+=16
  1513. 2x h0 -= t0
  1514. mem64[posh] aligned= h8[0]
  1515. h6 h7 = h6[0]h7[0]h6[2]h6[3] h6[1]h7[1]h7[2]h7[3]
  1516. h6 h7 = h6[0]h6[1]h6[2]h7[2] h7[0]h7[1]h6[3]h7[3]
  1517. posh-=8
  1518. h0 h1 = h0[0]h1[0]h0[2]h0[3] h0[1]h1[1]h1[2]h1[3]
  1519. h0 h1 = h0[0]h0[1]h0[2]h1[2] h1[0]h1[1]h0[3]h1[3]
  1520. mem64[posh] aligned= h6[0]
  1521. posh-=24
  1522. mem64[posh] aligned= h0[0]
  1523. fe_1(x2)
  1524. fe_0(z2)
  1525. fe_1(z3)
  1526. fe_copy(x3,x1)
  1527. mainloop:
  1528. pos8 = (pos unsigned>> 3)
  1529. pos7 = pos & 7
  1530. bit = mem8[eptr + pos8]
  1531. bit unsigned>>= pos7
  1532. bit &= 1
  1533. pos_stack = pos
  1534. swap ^= bit
  1535. swap_stack = bit
  1536. swap = -swap
  1537. fe_negcswap2addsub(x2,x3,z2,z3,swap)
  1538. fe_sqsq(tmp0,x2,tmp1,z2)
  1539. fe_mulmul(z2,z3,x2,x2,x3,z2)
  1540. fe_sub(z3,tmp0,tmp1)
  1541. fe_addsub(x3,z2,x2,z2)
  1542. fe_mulmul(tmp0,const121666,z3,x2,tmp0,tmp1)
  1543. fe_sqsq(z2,z2,x3,x3)
  1544. fe_add(tmp0,tmp1,tmp0)
  1545. fe_mulmul(z3,x1,z2,z2,z3,tmp0)
  1546. pos = pos_stack
  1547. swap = swap_stack
  1548. signed<? pos -= 1
  1549. goto mainloop if !signed<
  1550. # skip: swap is always 0 here since last exponent bit is 0
  1551. # swap = -swap
  1552. # fe_negcswap2(x2,x3,z2,z3,swap)
  1553. fe_copy(tmp1,z2)
  1554. i = 0
  1555. invertloop:
  1556. mulsource = z2
  1557. postcopy = 0
  1558. j = 2
  1559. =? i - 1
  1560. j = 1 if =
  1561. mulsource = tmp1 if =
  1562. postcopy = z11_copy if =
  1563. =? i - 2
  1564. j = 1 if =
  1565. mulsource = z11_copy if =
  1566. =? i - 3
  1567. j = 5 if =
  1568. postcopy = tmp1 if =
  1569. =? i - 4
  1570. j = 10 if =
  1571. =? i - 5
  1572. j = 20 if =
  1573. =? i - 6
  1574. j = 10 if =
  1575. mulsource = tmp1 if =
  1576. postcopy = tmp1 if =
  1577. =? i - 7
  1578. j = 50 if =
  1579. =? i - 8
  1580. j = 100 if =
  1581. =? i - 9
  1582. j = 50 if =
  1583. mulsource = tmp1 if =
  1584. =? i - 10
  1585. j = 5 if =
  1586. mulsource = z11_copy if =
  1587. =? i - 11
  1588. j = 0 if =
  1589. mulsource = x2 if =
  1590. fe_copy(tmp0,z2)
  1591. =? j - 0
  1592. goto skipsquaringloop if =
  1593. squaringloop:
  1594. fe_mul(tmp0,tmp0,tmp0)
  1595. unsigned>? j -= 1
  1596. goto squaringloop if unsigned>
  1597. skipsquaringloop:
  1598. fe_mul(z2,mulsource,tmp0)
  1599. =? postcopy - 0
  1600. goto skippostcopy if =
  1601. fe_copy(postcopy,z2)
  1602. skippostcopy:
  1603. =? i - 1
  1604. goto skipfinalcopy if !=
  1605. fe_copy(z2,tmp0)
  1606. skipfinalcopy:
  1607. i += 1
  1608. unsigned<? i - 12
  1609. goto invertloop if unsigned<
  1610. posf = z2
  1611. out0 = mem32[posf]; posf += 4
  1612. out1 = mem32[posf]; posf += 4
  1613. out2 = mem32[posf]; posf += 4
  1614. out3 = mem32[posf]; posf += 4
  1615. out4 = mem32[posf]; posf += 4
  1616. out5 = mem32[posf]; posf += 4
  1617. out6 = mem32[posf]; posf += 4
  1618. out7 = mem32[posf]; posf += 4
  1619. out8 = mem32[posf]; posf += 4
  1620. out9 = mem32[posf]
  1621. carry = out9 + (out9 << 4)
  1622. carry = carry + (out9 << 1)
  1623. carry += 16777216
  1624. carry signed>>= 25
  1625. carry += out0
  1626. carry signed>>= 26
  1627. carry += out1
  1628. carry signed>>= 25
  1629. carry += out2
  1630. carry signed>>= 26
  1631. carry += out3
  1632. carry signed>>= 25
  1633. carry += out4
  1634. carry signed>>= 26
  1635. carry += out5
  1636. carry signed>>= 25
  1637. carry += out6
  1638. carry signed>>= 26
  1639. carry += out7
  1640. carry signed>>= 25
  1641. carry += out8
  1642. carry signed>>= 26
  1643. carry += out9
  1644. carry signed>>= 25
  1645. out0 += carry
  1646. out0 += (carry << 1)
  1647. out0 += (carry << 4)
  1648. carry0 = (out0 signed>> 26)
  1649. out1 += carry0
  1650. out0 -= (carry0 << 26)
  1651. carry1 = (out1 signed>> 25)
  1652. out2 += carry1
  1653. out1 -= (carry1 << 25)
  1654. carry2 = (out2 signed>> 26)
  1655. out3 += carry2
  1656. out2 -= (carry2 << 26)
  1657. carry3 = (out3 signed>> 25)
  1658. out4 += carry3
  1659. out3 -= (carry3 << 25)
  1660. carry4 = (out4 signed>> 26)
  1661. out5 += carry4
  1662. out4 -= (carry4 << 26)
  1663. carry5 = (out5 signed>> 25)
  1664. out6 += carry5
  1665. out5 -= (carry5 << 25)
  1666. carry6 = (out6 signed>> 26)
  1667. out7 += carry6
  1668. out6 -= (carry6 << 26)
  1669. carry7 = (out7 signed>> 25)
  1670. out8 += carry7
  1671. out7 -= (carry7 << 25)
  1672. carry8 = (out8 signed>> 26)
  1673. out9 += carry8
  1674. out8 -= (carry8 << 26)
  1675. carry9 = (out9 signed>> 25)
  1676. out9 -= (carry9 << 25)
  1677. out0 += (out1 << 26)
  1678. out1 unsigned>>= 6
  1679. out1 += (out2 << 19)
  1680. out2 unsigned>>= 13
  1681. out2 += (out3 << 13)
  1682. out3 unsigned>>= 19
  1683. out3 += (out4 << 6)
  1684. out5 += (out6 << 25)
  1685. out6 unsigned>>= 7
  1686. out6 += (out7 << 19)
  1687. out7 unsigned>>= 13
  1688. out7 += (out8 << 12)
  1689. out8 unsigned>>= 20
  1690. out8 += (out9 << 6)
  1691. mem32[q] = out0; q += 4
  1692. mem32[q] = out1; q += 4
  1693. mem32[q] = out2; q += 4
  1694. mem32[q] = out3; q += 4
  1695. mem32[q] = out5; q += 4
  1696. mem32[q] = out6; q += 4
  1697. mem32[q] = out7; q += 4
  1698. mem32[q] = out8
  1699. assign r4 r5 to caller_r4 caller_r5 = stack_r45
  1700. assign r6 r7 to caller_r6 caller_r7 = stack_r67
  1701. assign r8 r9 to caller_r8 caller_r9 = stack_r89
  1702. assign r10 r11 to caller_r10 caller_r11 = stack_r1011
  1703. caller_r12 = stack_r12
  1704. caller_r14 = stack_r14
  1705. int32 result
  1706. result = 0
  1707. qpopreturn result