sp_x86_64_asm.asm 2.1 MB


  1. ; /* sp_x86_64_asm
  2. ; *
  3. ; * Copyright (C) 2006-2023 wolfSSL Inc.
  4. ; *
  5. ; * This file is part of wolfSSL.
  6. ; *
  7. ; * wolfSSL is free software; you can redistribute it and/or modify
  8. ; * it under the terms of the GNU General Public License as published by
  9. ; * the Free Software Foundation; either version 2 of the License, or
  10. ; * (at your option) any later version.
  11. ; *
  12. ; * wolfSSL is distributed in the hope that it will be useful,
  13. ; * but WITHOUT ANY WARRANTY; without even the implied warranty of
  14. ; * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  15. ; * GNU General Public License for more details.
  16. ; *
  17. ; * You should have received a copy of the GNU General Public License
  18. ; * along with this program; if not, write to the Free Software
  19. ; * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA
  20. ; */
  21. IF @Version LT 1200
  22. ; AVX2 instructions not recognized by old versions of MASM
  23. IFNDEF NO_AVX2_SUPPORT
  24. NO_AVX2_SUPPORT = 1
  25. ENDIF
  26. ; MOVBE instruction not recognized by old versions of MASM
  27. IFNDEF NO_MOVBE_SUPPORT
  28. NO_MOVBE_SUPPORT = 1
  29. ENDIF
  30. ENDIF
  31. IFNDEF HAVE_INTEL_AVX1
  32. HAVE_INTEL_AVX1 = 1
  33. ENDIF
  34. IFNDEF NO_AVX2_SUPPORT
  35. HAVE_INTEL_AVX2 = 1
  36. ENDIF
  37. IFNDEF _WIN64
  38. _WIN64 = 1
  39. ENDIF
  40. IFNDEF WOLFSSL_SP_NO_2048
  41. IFNDEF WOLFSSL_SP_NO_2048
  42. ; /* Read big endian unsigned byte array into r.
  43. ; * Uses the bswap instruction.
  44. ; *
  45. ; * r A single precision integer.
  46. ; * size Maximum number of bytes to convert
  47. ; * a Byte array.
  48. ; * n Number of bytes in array to read.
  49. ; */
  50. _text SEGMENT READONLY PARA
  51. sp_2048_from_bin_bswap PROC
  52. push r12
  53. push r13
  54. mov r11, r8
  55. mov r12, rcx
  56. add r11, r9
  57. add r12, 256
  58. xor r13, r13
  59. jmp L_2048_from_bin_bswap_64_end
  60. L_2048_from_bin_bswap_64_start:
  61. sub r11, 64
  62. mov rax, QWORD PTR [r11+56]
  63. mov r10, QWORD PTR [r11+48]
  64. bswap rax
  65. bswap r10
  66. mov QWORD PTR [rcx], rax
  67. mov QWORD PTR [rcx+8], r10
  68. mov rax, QWORD PTR [r11+40]
  69. mov r10, QWORD PTR [r11+32]
  70. bswap rax
  71. bswap r10
  72. mov QWORD PTR [rcx+16], rax
  73. mov QWORD PTR [rcx+24], r10
  74. mov rax, QWORD PTR [r11+24]
  75. mov r10, QWORD PTR [r11+16]
  76. bswap rax
  77. bswap r10
  78. mov QWORD PTR [rcx+32], rax
  79. mov QWORD PTR [rcx+40], r10
  80. mov rax, QWORD PTR [r11+8]
  81. mov r10, QWORD PTR [r11]
  82. bswap rax
  83. bswap r10
  84. mov QWORD PTR [rcx+48], rax
  85. mov QWORD PTR [rcx+56], r10
  86. add rcx, 64
  87. sub r9, 64
  88. L_2048_from_bin_bswap_64_end:
  89. cmp r9, 63
  90. jg L_2048_from_bin_bswap_64_start
  91. jmp L_2048_from_bin_bswap_8_end
  92. L_2048_from_bin_bswap_8_start:
  93. sub r11, 8
  94. mov rax, QWORD PTR [r11]
  95. bswap rax
  96. mov QWORD PTR [rcx], rax
  97. add rcx, 8
  98. sub r9, 8
  99. L_2048_from_bin_bswap_8_end:
  100. cmp r9, 7
  101. jg L_2048_from_bin_bswap_8_start
  102. cmp r9, r13
  103. je L_2048_from_bin_bswap_hi_end
  104. mov r10, r13
  105. mov rax, r13
  106. L_2048_from_bin_bswap_hi_start:
  107. mov al, BYTE PTR [r8]
  108. shl r10, 8
  109. inc r8
  110. add r10, rax
  111. dec r9
  112. jg L_2048_from_bin_bswap_hi_start
  113. mov QWORD PTR [rcx], r10
  114. add rcx, 8
  115. L_2048_from_bin_bswap_hi_end:
  116. cmp rcx, r12
  117. jge L_2048_from_bin_bswap_zero_end
  118. L_2048_from_bin_bswap_zero_start:
  119. mov QWORD PTR [rcx], r13
  120. add rcx, 8
  121. cmp rcx, r12
  122. jl L_2048_from_bin_bswap_zero_start
  123. L_2048_from_bin_bswap_zero_end:
  124. pop r13
  125. pop r12
  126. ret
  127. sp_2048_from_bin_bswap ENDP
  128. _text ENDS
  129. IFNDEF NO_MOVBE_SUPPORT
  130. ; /* Read big endian unsigned byte array into r.
  131. ; * Uses the movbe instruction which is an optional instruction.
  132. ; *
  133. ; * r A single precision integer.
  134. ; * size Maximum number of bytes to convert
  135. ; * a Byte array.
  136. ; * n Number of bytes in array to read.
  137. ; */
  138. _text SEGMENT READONLY PARA
  139. sp_2048_from_bin_movbe PROC
  140. push r12
  141. mov r11, r8
  142. mov r12, rcx
  143. add r11, r9
  144. add r12, 256
  145. jmp L_2048_from_bin_movbe_64_end
  146. L_2048_from_bin_movbe_64_start:
  147. sub r11, 64
  148. movbe rax, QWORD PTR [r11+56]
  149. movbe r10, QWORD PTR [r11+48]
  150. mov QWORD PTR [rcx], rax
  151. mov QWORD PTR [rcx+8], r10
  152. movbe rax, QWORD PTR [r11+40]
  153. movbe r10, QWORD PTR [r11+32]
  154. mov QWORD PTR [rcx+16], rax
  155. mov QWORD PTR [rcx+24], r10
  156. movbe rax, QWORD PTR [r11+24]
  157. movbe r10, QWORD PTR [r11+16]
  158. mov QWORD PTR [rcx+32], rax
  159. mov QWORD PTR [rcx+40], r10
  160. movbe rax, QWORD PTR [r11+8]
  161. movbe r10, QWORD PTR [r11]
  162. mov QWORD PTR [rcx+48], rax
  163. mov QWORD PTR [rcx+56], r10
  164. add rcx, 64
  165. sub r9, 64
  166. L_2048_from_bin_movbe_64_end:
  167. cmp r9, 63
  168. jg L_2048_from_bin_movbe_64_start
  169. jmp L_2048_from_bin_movbe_8_end
  170. L_2048_from_bin_movbe_8_start:
  171. sub r11, 8
  172. movbe rax, QWORD PTR [r11]
  173. mov QWORD PTR [rcx], rax
  174. add rcx, 8
  175. sub r9, 8
  176. L_2048_from_bin_movbe_8_end:
  177. cmp r9, 7
  178. jg L_2048_from_bin_movbe_8_start
  179. cmp r9, 0
  180. je L_2048_from_bin_movbe_hi_end
  181. mov r10, 0
  182. mov rax, 0
  183. L_2048_from_bin_movbe_hi_start:
  184. mov al, BYTE PTR [r8]
  185. shl r10, 8
  186. inc r8
  187. add r10, rax
  188. dec r9
  189. jg L_2048_from_bin_movbe_hi_start
  190. mov QWORD PTR [rcx], r10
  191. add rcx, 8
  192. L_2048_from_bin_movbe_hi_end:
  193. cmp rcx, r12
  194. jge L_2048_from_bin_movbe_zero_end
  195. L_2048_from_bin_movbe_zero_start:
  196. mov QWORD PTR [rcx], 0
  197. add rcx, 8
  198. cmp rcx, r12
  199. jl L_2048_from_bin_movbe_zero_start
  200. L_2048_from_bin_movbe_zero_end:
  201. pop r12
  202. ret
  203. sp_2048_from_bin_movbe ENDP
  204. _text ENDS
  205. ENDIF
  206. ; /* Write r as big endian to byte array.
  207. ; * Fixed length number of bytes written: 256
  208. ; * Uses the bswap instruction.
  209. ; *
  210. ; * r A single precision integer.
  211. ; * a Byte array.
  212. ; */
  213. _text SEGMENT READONLY PARA
  214. sp_2048_to_bin_bswap_32 PROC
  215. mov rax, QWORD PTR [rcx+248]
  216. mov r8, QWORD PTR [rcx+240]
  217. bswap rax
  218. bswap r8
  219. mov QWORD PTR [rdx], rax
  220. mov QWORD PTR [rdx+8], r8
  221. mov rax, QWORD PTR [rcx+232]
  222. mov r8, QWORD PTR [rcx+224]
  223. bswap rax
  224. bswap r8
  225. mov QWORD PTR [rdx+16], rax
  226. mov QWORD PTR [rdx+24], r8
  227. mov rax, QWORD PTR [rcx+216]
  228. mov r8, QWORD PTR [rcx+208]
  229. bswap rax
  230. bswap r8
  231. mov QWORD PTR [rdx+32], rax
  232. mov QWORD PTR [rdx+40], r8
  233. mov rax, QWORD PTR [rcx+200]
  234. mov r8, QWORD PTR [rcx+192]
  235. bswap rax
  236. bswap r8
  237. mov QWORD PTR [rdx+48], rax
  238. mov QWORD PTR [rdx+56], r8
  239. mov rax, QWORD PTR [rcx+184]
  240. mov r8, QWORD PTR [rcx+176]
  241. bswap rax
  242. bswap r8
  243. mov QWORD PTR [rdx+64], rax
  244. mov QWORD PTR [rdx+72], r8
  245. mov rax, QWORD PTR [rcx+168]
  246. mov r8, QWORD PTR [rcx+160]
  247. bswap rax
  248. bswap r8
  249. mov QWORD PTR [rdx+80], rax
  250. mov QWORD PTR [rdx+88], r8
  251. mov rax, QWORD PTR [rcx+152]
  252. mov r8, QWORD PTR [rcx+144]
  253. bswap rax
  254. bswap r8
  255. mov QWORD PTR [rdx+96], rax
  256. mov QWORD PTR [rdx+104], r8
  257. mov rax, QWORD PTR [rcx+136]
  258. mov r8, QWORD PTR [rcx+128]
  259. bswap rax
  260. bswap r8
  261. mov QWORD PTR [rdx+112], rax
  262. mov QWORD PTR [rdx+120], r8
  263. mov rax, QWORD PTR [rcx+120]
  264. mov r8, QWORD PTR [rcx+112]
  265. bswap rax
  266. bswap r8
  267. mov QWORD PTR [rdx+128], rax
  268. mov QWORD PTR [rdx+136], r8
  269. mov rax, QWORD PTR [rcx+104]
  270. mov r8, QWORD PTR [rcx+96]
  271. bswap rax
  272. bswap r8
  273. mov QWORD PTR [rdx+144], rax
  274. mov QWORD PTR [rdx+152], r8
  275. mov rax, QWORD PTR [rcx+88]
  276. mov r8, QWORD PTR [rcx+80]
  277. bswap rax
  278. bswap r8
  279. mov QWORD PTR [rdx+160], rax
  280. mov QWORD PTR [rdx+168], r8
  281. mov rax, QWORD PTR [rcx+72]
  282. mov r8, QWORD PTR [rcx+64]
  283. bswap rax
  284. bswap r8
  285. mov QWORD PTR [rdx+176], rax
  286. mov QWORD PTR [rdx+184], r8
  287. mov rax, QWORD PTR [rcx+56]
  288. mov r8, QWORD PTR [rcx+48]
  289. bswap rax
  290. bswap r8
  291. mov QWORD PTR [rdx+192], rax
  292. mov QWORD PTR [rdx+200], r8
  293. mov rax, QWORD PTR [rcx+40]
  294. mov r8, QWORD PTR [rcx+32]
  295. bswap rax
  296. bswap r8
  297. mov QWORD PTR [rdx+208], rax
  298. mov QWORD PTR [rdx+216], r8
  299. mov rax, QWORD PTR [rcx+24]
  300. mov r8, QWORD PTR [rcx+16]
  301. bswap rax
  302. bswap r8
  303. mov QWORD PTR [rdx+224], rax
  304. mov QWORD PTR [rdx+232], r8
  305. mov rax, QWORD PTR [rcx+8]
  306. mov r8, QWORD PTR [rcx]
  307. bswap rax
  308. bswap r8
  309. mov QWORD PTR [rdx+240], rax
  310. mov QWORD PTR [rdx+248], r8
  311. ret
  312. sp_2048_to_bin_bswap_32 ENDP
  313. _text ENDS
  314. IFNDEF NO_MOVBE_SUPPORT
  315. ; /* Write r as big endian to byte array.
  316. ; * Fixed length number of bytes written: 256
  317. ; * Uses the movbe instruction which is optional.
  318. ; *
  319. ; * r A single precision integer.
  320. ; * a Byte array.
  321. ; */
  322. _text SEGMENT READONLY PARA
  323. sp_2048_to_bin_movbe_32 PROC
  324. movbe rax, QWORD PTR [rcx+248]
  325. movbe r8, QWORD PTR [rcx+240]
  326. mov QWORD PTR [rdx], rax
  327. mov QWORD PTR [rdx+8], r8
  328. movbe rax, QWORD PTR [rcx+232]
  329. movbe r8, QWORD PTR [rcx+224]
  330. mov QWORD PTR [rdx+16], rax
  331. mov QWORD PTR [rdx+24], r8
  332. movbe rax, QWORD PTR [rcx+216]
  333. movbe r8, QWORD PTR [rcx+208]
  334. mov QWORD PTR [rdx+32], rax
  335. mov QWORD PTR [rdx+40], r8
  336. movbe rax, QWORD PTR [rcx+200]
  337. movbe r8, QWORD PTR [rcx+192]
  338. mov QWORD PTR [rdx+48], rax
  339. mov QWORD PTR [rdx+56], r8
  340. movbe rax, QWORD PTR [rcx+184]
  341. movbe r8, QWORD PTR [rcx+176]
  342. mov QWORD PTR [rdx+64], rax
  343. mov QWORD PTR [rdx+72], r8
  344. movbe rax, QWORD PTR [rcx+168]
  345. movbe r8, QWORD PTR [rcx+160]
  346. mov QWORD PTR [rdx+80], rax
  347. mov QWORD PTR [rdx+88], r8
  348. movbe rax, QWORD PTR [rcx+152]
  349. movbe r8, QWORD PTR [rcx+144]
  350. mov QWORD PTR [rdx+96], rax
  351. mov QWORD PTR [rdx+104], r8
  352. movbe rax, QWORD PTR [rcx+136]
  353. movbe r8, QWORD PTR [rcx+128]
  354. mov QWORD PTR [rdx+112], rax
  355. mov QWORD PTR [rdx+120], r8
  356. movbe rax, QWORD PTR [rcx+120]
  357. movbe r8, QWORD PTR [rcx+112]
  358. mov QWORD PTR [rdx+128], rax
  359. mov QWORD PTR [rdx+136], r8
  360. movbe rax, QWORD PTR [rcx+104]
  361. movbe r8, QWORD PTR [rcx+96]
  362. mov QWORD PTR [rdx+144], rax
  363. mov QWORD PTR [rdx+152], r8
  364. movbe rax, QWORD PTR [rcx+88]
  365. movbe r8, QWORD PTR [rcx+80]
  366. mov QWORD PTR [rdx+160], rax
  367. mov QWORD PTR [rdx+168], r8
  368. movbe rax, QWORD PTR [rcx+72]
  369. movbe r8, QWORD PTR [rcx+64]
  370. mov QWORD PTR [rdx+176], rax
  371. mov QWORD PTR [rdx+184], r8
  372. movbe rax, QWORD PTR [rcx+56]
  373. movbe r8, QWORD PTR [rcx+48]
  374. mov QWORD PTR [rdx+192], rax
  375. mov QWORD PTR [rdx+200], r8
  376. movbe rax, QWORD PTR [rcx+40]
  377. movbe r8, QWORD PTR [rcx+32]
  378. mov QWORD PTR [rdx+208], rax
  379. mov QWORD PTR [rdx+216], r8
  380. movbe rax, QWORD PTR [rcx+24]
  381. movbe r8, QWORD PTR [rcx+16]
  382. mov QWORD PTR [rdx+224], rax
  383. mov QWORD PTR [rdx+232], r8
  384. movbe rax, QWORD PTR [rcx+8]
  385. movbe r8, QWORD PTR [rcx]
  386. mov QWORD PTR [rdx+240], rax
  387. mov QWORD PTR [rdx+248], r8
  388. ret
  389. sp_2048_to_bin_movbe_32 ENDP
  390. _text ENDS
  391. ENDIF
  392. ; /* Multiply a and b into r. (r = a * b)
  393. ; *
  394. ; * r A single precision integer.
  395. ; * a A single precision integer.
  396. ; * b A single precision integer.
  397. ; */
  398. _text SEGMENT READONLY PARA
  399. sp_2048_mul_16 PROC
  400. push r12
  401. mov r9, rdx
  402. sub rsp, 128
  403. ; A[0] * B[0]
  404. mov rax, QWORD PTR [r8]
  405. mul QWORD PTR [r9]
  406. xor r12, r12
  407. mov QWORD PTR [rsp], rax
  408. mov r11, rdx
  409. ; A[0] * B[1]
  410. mov rax, QWORD PTR [r8+8]
  411. mul QWORD PTR [r9]
  412. xor r10, r10
  413. add r11, rax
  414. adc r12, rdx
  415. adc r10, 0
  416. ; A[1] * B[0]
  417. mov rax, QWORD PTR [r8]
  418. mul QWORD PTR [r9+8]
  419. add r11, rax
  420. adc r12, rdx
  421. adc r10, 0
  422. mov QWORD PTR [rsp+8], r11
  423. ; A[0] * B[2]
  424. mov rax, QWORD PTR [r8+16]
  425. mul QWORD PTR [r9]
  426. xor r11, r11
  427. add r12, rax
  428. adc r10, rdx
  429. adc r11, 0
  430. ; A[1] * B[1]
  431. mov rax, QWORD PTR [r8+8]
  432. mul QWORD PTR [r9+8]
  433. add r12, rax
  434. adc r10, rdx
  435. adc r11, 0
  436. ; A[2] * B[0]
  437. mov rax, QWORD PTR [r8]
  438. mul QWORD PTR [r9+16]
  439. add r12, rax
  440. adc r10, rdx
  441. adc r11, 0
  442. mov QWORD PTR [rsp+16], r12
  443. ; A[0] * B[3]
  444. mov rax, QWORD PTR [r8+24]
  445. mul QWORD PTR [r9]
  446. xor r12, r12
  447. add r10, rax
  448. adc r11, rdx
  449. adc r12, 0
  450. ; A[1] * B[2]
  451. mov rax, QWORD PTR [r8+16]
  452. mul QWORD PTR [r9+8]
  453. add r10, rax
  454. adc r11, rdx
  455. adc r12, 0
  456. ; A[2] * B[1]
  457. mov rax, QWORD PTR [r8+8]
  458. mul QWORD PTR [r9+16]
  459. add r10, rax
  460. adc r11, rdx
  461. adc r12, 0
  462. ; A[3] * B[0]
  463. mov rax, QWORD PTR [r8]
  464. mul QWORD PTR [r9+24]
  465. add r10, rax
  466. adc r11, rdx
  467. adc r12, 0
  468. mov QWORD PTR [rsp+24], r10
  469. ; A[0] * B[4]
  470. mov rax, QWORD PTR [r8+32]
  471. mul QWORD PTR [r9]
  472. xor r10, r10
  473. add r11, rax
  474. adc r12, rdx
  475. adc r10, 0
  476. ; A[1] * B[3]
  477. mov rax, QWORD PTR [r8+24]
  478. mul QWORD PTR [r9+8]
  479. add r11, rax
  480. adc r12, rdx
  481. adc r10, 0
  482. ; A[2] * B[2]
  483. mov rax, QWORD PTR [r8+16]
  484. mul QWORD PTR [r9+16]
  485. add r11, rax
  486. adc r12, rdx
  487. adc r10, 0
  488. ; A[3] * B[1]
  489. mov rax, QWORD PTR [r8+8]
  490. mul QWORD PTR [r9+24]
  491. add r11, rax
  492. adc r12, rdx
  493. adc r10, 0
  494. ; A[4] * B[0]
  495. mov rax, QWORD PTR [r8]
  496. mul QWORD PTR [r9+32]
  497. add r11, rax
  498. adc r12, rdx
  499. adc r10, 0
  500. mov QWORD PTR [rsp+32], r11
  501. ; A[0] * B[5]
  502. mov rax, QWORD PTR [r8+40]
  503. mul QWORD PTR [r9]
  504. xor r11, r11
  505. add r12, rax
  506. adc r10, rdx
  507. adc r11, 0
  508. ; A[1] * B[4]
  509. mov rax, QWORD PTR [r8+32]
  510. mul QWORD PTR [r9+8]
  511. add r12, rax
  512. adc r10, rdx
  513. adc r11, 0
  514. ; A[2] * B[3]
  515. mov rax, QWORD PTR [r8+24]
  516. mul QWORD PTR [r9+16]
  517. add r12, rax
  518. adc r10, rdx
  519. adc r11, 0
  520. ; A[3] * B[2]
  521. mov rax, QWORD PTR [r8+16]
  522. mul QWORD PTR [r9+24]
  523. add r12, rax
  524. adc r10, rdx
  525. adc r11, 0
  526. ; A[4] * B[1]
  527. mov rax, QWORD PTR [r8+8]
  528. mul QWORD PTR [r9+32]
  529. add r12, rax
  530. adc r10, rdx
  531. adc r11, 0
  532. ; A[5] * B[0]
  533. mov rax, QWORD PTR [r8]
  534. mul QWORD PTR [r9+40]
  535. add r12, rax
  536. adc r10, rdx
  537. adc r11, 0
  538. mov QWORD PTR [rsp+40], r12
  539. ; A[0] * B[6]
  540. mov rax, QWORD PTR [r8+48]
  541. mul QWORD PTR [r9]
  542. xor r12, r12
  543. add r10, rax
  544. adc r11, rdx
  545. adc r12, 0
  546. ; A[1] * B[5]
  547. mov rax, QWORD PTR [r8+40]
  548. mul QWORD PTR [r9+8]
  549. add r10, rax
  550. adc r11, rdx
  551. adc r12, 0
  552. ; A[2] * B[4]
  553. mov rax, QWORD PTR [r8+32]
  554. mul QWORD PTR [r9+16]
  555. add r10, rax
  556. adc r11, rdx
  557. adc r12, 0
  558. ; A[3] * B[3]
  559. mov rax, QWORD PTR [r8+24]
  560. mul QWORD PTR [r9+24]
  561. add r10, rax
  562. adc r11, rdx
  563. adc r12, 0
  564. ; A[4] * B[2]
  565. mov rax, QWORD PTR [r8+16]
  566. mul QWORD PTR [r9+32]
  567. add r10, rax
  568. adc r11, rdx
  569. adc r12, 0
  570. ; A[5] * B[1]
  571. mov rax, QWORD PTR [r8+8]
  572. mul QWORD PTR [r9+40]
  573. add r10, rax
  574. adc r11, rdx
  575. adc r12, 0
  576. ; A[6] * B[0]
  577. mov rax, QWORD PTR [r8]
  578. mul QWORD PTR [r9+48]
  579. add r10, rax
  580. adc r11, rdx
  581. adc r12, 0
  582. mov QWORD PTR [rsp+48], r10
  583. ; A[0] * B[7]
  584. mov rax, QWORD PTR [r8+56]
  585. mul QWORD PTR [r9]
  586. xor r10, r10
  587. add r11, rax
  588. adc r12, rdx
  589. adc r10, 0
  590. ; A[1] * B[6]
  591. mov rax, QWORD PTR [r8+48]
  592. mul QWORD PTR [r9+8]
  593. add r11, rax
  594. adc r12, rdx
  595. adc r10, 0
  596. ; A[2] * B[5]
  597. mov rax, QWORD PTR [r8+40]
  598. mul QWORD PTR [r9+16]
  599. add r11, rax
  600. adc r12, rdx
  601. adc r10, 0
  602. ; A[3] * B[4]
  603. mov rax, QWORD PTR [r8+32]
  604. mul QWORD PTR [r9+24]
  605. add r11, rax
  606. adc r12, rdx
  607. adc r10, 0
  608. ; A[4] * B[3]
  609. mov rax, QWORD PTR [r8+24]
  610. mul QWORD PTR [r9+32]
  611. add r11, rax
  612. adc r12, rdx
  613. adc r10, 0
  614. ; A[5] * B[2]
  615. mov rax, QWORD PTR [r8+16]
  616. mul QWORD PTR [r9+40]
  617. add r11, rax
  618. adc r12, rdx
  619. adc r10, 0
  620. ; A[6] * B[1]
  621. mov rax, QWORD PTR [r8+8]
  622. mul QWORD PTR [r9+48]
  623. add r11, rax
  624. adc r12, rdx
  625. adc r10, 0
  626. ; A[7] * B[0]
  627. mov rax, QWORD PTR [r8]
  628. mul QWORD PTR [r9+56]
  629. add r11, rax
  630. adc r12, rdx
  631. adc r10, 0
  632. mov QWORD PTR [rsp+56], r11
  633. ; A[0] * B[8]
  634. mov rax, QWORD PTR [r8+64]
  635. mul QWORD PTR [r9]
  636. xor r11, r11
  637. add r12, rax
  638. adc r10, rdx
  639. adc r11, 0
  640. ; A[1] * B[7]
  641. mov rax, QWORD PTR [r8+56]
  642. mul QWORD PTR [r9+8]
  643. add r12, rax
  644. adc r10, rdx
  645. adc r11, 0
  646. ; A[2] * B[6]
  647. mov rax, QWORD PTR [r8+48]
  648. mul QWORD PTR [r9+16]
  649. add r12, rax
  650. adc r10, rdx
  651. adc r11, 0
  652. ; A[3] * B[5]
  653. mov rax, QWORD PTR [r8+40]
  654. mul QWORD PTR [r9+24]
  655. add r12, rax
  656. adc r10, rdx
  657. adc r11, 0
  658. ; A[4] * B[4]
  659. mov rax, QWORD PTR [r8+32]
  660. mul QWORD PTR [r9+32]
  661. add r12, rax
  662. adc r10, rdx
  663. adc r11, 0
  664. ; A[5] * B[3]
  665. mov rax, QWORD PTR [r8+24]
  666. mul QWORD PTR [r9+40]
  667. add r12, rax
  668. adc r10, rdx
  669. adc r11, 0
  670. ; A[6] * B[2]
  671. mov rax, QWORD PTR [r8+16]
  672. mul QWORD PTR [r9+48]
  673. add r12, rax
  674. adc r10, rdx
  675. adc r11, 0
  676. ; A[7] * B[1]
  677. mov rax, QWORD PTR [r8+8]
  678. mul QWORD PTR [r9+56]
  679. add r12, rax
  680. adc r10, rdx
  681. adc r11, 0
  682. ; A[8] * B[0]
  683. mov rax, QWORD PTR [r8]
  684. mul QWORD PTR [r9+64]
  685. add r12, rax
  686. adc r10, rdx
  687. adc r11, 0
  688. mov QWORD PTR [rsp+64], r12
  689. ; A[0] * B[9]
  690. mov rax, QWORD PTR [r8+72]
  691. mul QWORD PTR [r9]
  692. xor r12, r12
  693. add r10, rax
  694. adc r11, rdx
  695. adc r12, 0
  696. ; A[1] * B[8]
  697. mov rax, QWORD PTR [r8+64]
  698. mul QWORD PTR [r9+8]
  699. add r10, rax
  700. adc r11, rdx
  701. adc r12, 0
  702. ; A[2] * B[7]
  703. mov rax, QWORD PTR [r8+56]
  704. mul QWORD PTR [r9+16]
  705. add r10, rax
  706. adc r11, rdx
  707. adc r12, 0
  708. ; A[3] * B[6]
  709. mov rax, QWORD PTR [r8+48]
  710. mul QWORD PTR [r9+24]
  711. add r10, rax
  712. adc r11, rdx
  713. adc r12, 0
  714. ; A[4] * B[5]
  715. mov rax, QWORD PTR [r8+40]
  716. mul QWORD PTR [r9+32]
  717. add r10, rax
  718. adc r11, rdx
  719. adc r12, 0
  720. ; A[5] * B[4]
  721. mov rax, QWORD PTR [r8+32]
  722. mul QWORD PTR [r9+40]
  723. add r10, rax
  724. adc r11, rdx
  725. adc r12, 0
  726. ; A[6] * B[3]
  727. mov rax, QWORD PTR [r8+24]
  728. mul QWORD PTR [r9+48]
  729. add r10, rax
  730. adc r11, rdx
  731. adc r12, 0
  732. ; A[7] * B[2]
  733. mov rax, QWORD PTR [r8+16]
  734. mul QWORD PTR [r9+56]
  735. add r10, rax
  736. adc r11, rdx
  737. adc r12, 0
  738. ; A[8] * B[1]
  739. mov rax, QWORD PTR [r8+8]
  740. mul QWORD PTR [r9+64]
  741. add r10, rax
  742. adc r11, rdx
  743. adc r12, 0
  744. ; A[9] * B[0]
  745. mov rax, QWORD PTR [r8]
  746. mul QWORD PTR [r9+72]
  747. add r10, rax
  748. adc r11, rdx
  749. adc r12, 0
  750. mov QWORD PTR [rsp+72], r10
  751. ; A[0] * B[10]
  752. mov rax, QWORD PTR [r8+80]
  753. mul QWORD PTR [r9]
  754. xor r10, r10
  755. add r11, rax
  756. adc r12, rdx
  757. adc r10, 0
  758. ; A[1] * B[9]
  759. mov rax, QWORD PTR [r8+72]
  760. mul QWORD PTR [r9+8]
  761. add r11, rax
  762. adc r12, rdx
  763. adc r10, 0
  764. ; A[2] * B[8]
  765. mov rax, QWORD PTR [r8+64]
  766. mul QWORD PTR [r9+16]
  767. add r11, rax
  768. adc r12, rdx
  769. adc r10, 0
  770. ; A[3] * B[7]
  771. mov rax, QWORD PTR [r8+56]
  772. mul QWORD PTR [r9+24]
  773. add r11, rax
  774. adc r12, rdx
  775. adc r10, 0
  776. ; A[4] * B[6]
  777. mov rax, QWORD PTR [r8+48]
  778. mul QWORD PTR [r9+32]
  779. add r11, rax
  780. adc r12, rdx
  781. adc r10, 0
  782. ; A[5] * B[5]
  783. mov rax, QWORD PTR [r8+40]
  784. mul QWORD PTR [r9+40]
  785. add r11, rax
  786. adc r12, rdx
  787. adc r10, 0
  788. ; A[6] * B[4]
  789. mov rax, QWORD PTR [r8+32]
  790. mul QWORD PTR [r9+48]
  791. add r11, rax
  792. adc r12, rdx
  793. adc r10, 0
  794. ; A[7] * B[3]
  795. mov rax, QWORD PTR [r8+24]
  796. mul QWORD PTR [r9+56]
  797. add r11, rax
  798. adc r12, rdx
  799. adc r10, 0
  800. ; A[8] * B[2]
  801. mov rax, QWORD PTR [r8+16]
  802. mul QWORD PTR [r9+64]
  803. add r11, rax
  804. adc r12, rdx
  805. adc r10, 0
  806. ; A[9] * B[1]
  807. mov rax, QWORD PTR [r8+8]
  808. mul QWORD PTR [r9+72]
  809. add r11, rax
  810. adc r12, rdx
  811. adc r10, 0
  812. ; A[10] * B[0]
  813. mov rax, QWORD PTR [r8]
  814. mul QWORD PTR [r9+80]
  815. add r11, rax
  816. adc r12, rdx
  817. adc r10, 0
  818. mov QWORD PTR [rsp+80], r11
  819. ; A[0] * B[11]
  820. mov rax, QWORD PTR [r8+88]
  821. mul QWORD PTR [r9]
  822. xor r11, r11
  823. add r12, rax
  824. adc r10, rdx
  825. adc r11, 0
  826. ; A[1] * B[10]
  827. mov rax, QWORD PTR [r8+80]
  828. mul QWORD PTR [r9+8]
  829. add r12, rax
  830. adc r10, rdx
  831. adc r11, 0
  832. ; A[2] * B[9]
  833. mov rax, QWORD PTR [r8+72]
  834. mul QWORD PTR [r9+16]
  835. add r12, rax
  836. adc r10, rdx
  837. adc r11, 0
  838. ; A[3] * B[8]
  839. mov rax, QWORD PTR [r8+64]
  840. mul QWORD PTR [r9+24]
  841. add r12, rax
  842. adc r10, rdx
  843. adc r11, 0
  844. ; A[4] * B[7]
  845. mov rax, QWORD PTR [r8+56]
  846. mul QWORD PTR [r9+32]
  847. add r12, rax
  848. adc r10, rdx
  849. adc r11, 0
  850. ; A[5] * B[6]
  851. mov rax, QWORD PTR [r8+48]
  852. mul QWORD PTR [r9+40]
  853. add r12, rax
  854. adc r10, rdx
  855. adc r11, 0
  856. ; A[6] * B[5]
  857. mov rax, QWORD PTR [r8+40]
  858. mul QWORD PTR [r9+48]
  859. add r12, rax
  860. adc r10, rdx
  861. adc r11, 0
  862. ; A[7] * B[4]
  863. mov rax, QWORD PTR [r8+32]
  864. mul QWORD PTR [r9+56]
  865. add r12, rax
  866. adc r10, rdx
  867. adc r11, 0
  868. ; A[8] * B[3]
  869. mov rax, QWORD PTR [r8+24]
  870. mul QWORD PTR [r9+64]
  871. add r12, rax
  872. adc r10, rdx
  873. adc r11, 0
  874. ; A[9] * B[2]
  875. mov rax, QWORD PTR [r8+16]
  876. mul QWORD PTR [r9+72]
  877. add r12, rax
  878. adc r10, rdx
  879. adc r11, 0
  880. ; A[10] * B[1]
  881. mov rax, QWORD PTR [r8+8]
  882. mul QWORD PTR [r9+80]
  883. add r12, rax
  884. adc r10, rdx
  885. adc r11, 0
  886. ; A[11] * B[0]
  887. mov rax, QWORD PTR [r8]
  888. mul QWORD PTR [r9+88]
  889. add r12, rax
  890. adc r10, rdx
  891. adc r11, 0
  892. mov QWORD PTR [rsp+88], r12
  893. ; A[0] * B[12]
  894. mov rax, QWORD PTR [r8+96]
  895. mul QWORD PTR [r9]
  896. xor r12, r12
  897. add r10, rax
  898. adc r11, rdx
  899. adc r12, 0
  900. ; A[1] * B[11]
  901. mov rax, QWORD PTR [r8+88]
  902. mul QWORD PTR [r9+8]
  903. add r10, rax
  904. adc r11, rdx
  905. adc r12, 0
  906. ; A[2] * B[10]
  907. mov rax, QWORD PTR [r8+80]
  908. mul QWORD PTR [r9+16]
  909. add r10, rax
  910. adc r11, rdx
  911. adc r12, 0
  912. ; A[3] * B[9]
  913. mov rax, QWORD PTR [r8+72]
  914. mul QWORD PTR [r9+24]
  915. add r10, rax
  916. adc r11, rdx
  917. adc r12, 0
  918. ; A[4] * B[8]
  919. mov rax, QWORD PTR [r8+64]
  920. mul QWORD PTR [r9+32]
  921. add r10, rax
  922. adc r11, rdx
  923. adc r12, 0
  924. ; A[5] * B[7]
  925. mov rax, QWORD PTR [r8+56]
  926. mul QWORD PTR [r9+40]
  927. add r10, rax
  928. adc r11, rdx
  929. adc r12, 0
  930. ; A[6] * B[6]
  931. mov rax, QWORD PTR [r8+48]
  932. mul QWORD PTR [r9+48]
  933. add r10, rax
  934. adc r11, rdx
  935. adc r12, 0
  936. ; A[7] * B[5]
  937. mov rax, QWORD PTR [r8+40]
  938. mul QWORD PTR [r9+56]
  939. add r10, rax
  940. adc r11, rdx
  941. adc r12, 0
  942. ; A[8] * B[4]
  943. mov rax, QWORD PTR [r8+32]
  944. mul QWORD PTR [r9+64]
  945. add r10, rax
  946. adc r11, rdx
  947. adc r12, 0
  948. ; A[9] * B[3]
  949. mov rax, QWORD PTR [r8+24]
  950. mul QWORD PTR [r9+72]
  951. add r10, rax
  952. adc r11, rdx
  953. adc r12, 0
  954. ; A[10] * B[2]
  955. mov rax, QWORD PTR [r8+16]
  956. mul QWORD PTR [r9+80]
  957. add r10, rax
  958. adc r11, rdx
  959. adc r12, 0
  960. ; A[11] * B[1]
  961. mov rax, QWORD PTR [r8+8]
  962. mul QWORD PTR [r9+88]
  963. add r10, rax
  964. adc r11, rdx
  965. adc r12, 0
  966. ; A[12] * B[0]
  967. mov rax, QWORD PTR [r8]
  968. mul QWORD PTR [r9+96]
  969. add r10, rax
  970. adc r11, rdx
  971. adc r12, 0
  972. mov QWORD PTR [rsp+96], r10
  973. ; A[0] * B[13]
  974. mov rax, QWORD PTR [r8+104]
  975. mul QWORD PTR [r9]
  976. xor r10, r10
  977. add r11, rax
  978. adc r12, rdx
  979. adc r10, 0
  980. ; A[1] * B[12]
  981. mov rax, QWORD PTR [r8+96]
  982. mul QWORD PTR [r9+8]
  983. add r11, rax
  984. adc r12, rdx
  985. adc r10, 0
  986. ; A[2] * B[11]
  987. mov rax, QWORD PTR [r8+88]
  988. mul QWORD PTR [r9+16]
  989. add r11, rax
  990. adc r12, rdx
  991. adc r10, 0
  992. ; A[3] * B[10]
  993. mov rax, QWORD PTR [r8+80]
  994. mul QWORD PTR [r9+24]
  995. add r11, rax
  996. adc r12, rdx
  997. adc r10, 0
  998. ; A[4] * B[9]
  999. mov rax, QWORD PTR [r8+72]
  1000. mul QWORD PTR [r9+32]
  1001. add r11, rax
  1002. adc r12, rdx
  1003. adc r10, 0
  1004. ; A[5] * B[8]
  1005. mov rax, QWORD PTR [r8+64]
  1006. mul QWORD PTR [r9+40]
  1007. add r11, rax
  1008. adc r12, rdx
  1009. adc r10, 0
  1010. ; A[6] * B[7]
  1011. mov rax, QWORD PTR [r8+56]
  1012. mul QWORD PTR [r9+48]
  1013. add r11, rax
  1014. adc r12, rdx
  1015. adc r10, 0
  1016. ; A[7] * B[6]
  1017. mov rax, QWORD PTR [r8+48]
  1018. mul QWORD PTR [r9+56]
  1019. add r11, rax
  1020. adc r12, rdx
  1021. adc r10, 0
  1022. ; A[8] * B[5]
  1023. mov rax, QWORD PTR [r8+40]
  1024. mul QWORD PTR [r9+64]
  1025. add r11, rax
  1026. adc r12, rdx
  1027. adc r10, 0
  1028. ; A[9] * B[4]
  1029. mov rax, QWORD PTR [r8+32]
  1030. mul QWORD PTR [r9+72]
  1031. add r11, rax
  1032. adc r12, rdx
  1033. adc r10, 0
  1034. ; A[10] * B[3]
  1035. mov rax, QWORD PTR [r8+24]
  1036. mul QWORD PTR [r9+80]
  1037. add r11, rax
  1038. adc r12, rdx
  1039. adc r10, 0
  1040. ; A[11] * B[2]
  1041. mov rax, QWORD PTR [r8+16]
  1042. mul QWORD PTR [r9+88]
  1043. add r11, rax
  1044. adc r12, rdx
  1045. adc r10, 0
  1046. ; A[12] * B[1]
  1047. mov rax, QWORD PTR [r8+8]
  1048. mul QWORD PTR [r9+96]
  1049. add r11, rax
  1050. adc r12, rdx
  1051. adc r10, 0
  1052. ; A[13] * B[0]
  1053. mov rax, QWORD PTR [r8]
  1054. mul QWORD PTR [r9+104]
  1055. add r11, rax
  1056. adc r12, rdx
  1057. adc r10, 0
  1058. mov QWORD PTR [rsp+104], r11
  1059. ; A[0] * B[14]
  1060. mov rax, QWORD PTR [r8+112]
  1061. mul QWORD PTR [r9]
  1062. xor r11, r11
  1063. add r12, rax
  1064. adc r10, rdx
  1065. adc r11, 0
  1066. ; A[1] * B[13]
  1067. mov rax, QWORD PTR [r8+104]
  1068. mul QWORD PTR [r9+8]
  1069. add r12, rax
  1070. adc r10, rdx
  1071. adc r11, 0
  1072. ; A[2] * B[12]
  1073. mov rax, QWORD PTR [r8+96]
  1074. mul QWORD PTR [r9+16]
  1075. add r12, rax
  1076. adc r10, rdx
  1077. adc r11, 0
  1078. ; A[3] * B[11]
  1079. mov rax, QWORD PTR [r8+88]
  1080. mul QWORD PTR [r9+24]
  1081. add r12, rax
  1082. adc r10, rdx
  1083. adc r11, 0
  1084. ; A[4] * B[10]
  1085. mov rax, QWORD PTR [r8+80]
  1086. mul QWORD PTR [r9+32]
  1087. add r12, rax
  1088. adc r10, rdx
  1089. adc r11, 0
  1090. ; A[5] * B[9]
  1091. mov rax, QWORD PTR [r8+72]
  1092. mul QWORD PTR [r9+40]
  1093. add r12, rax
  1094. adc r10, rdx
  1095. adc r11, 0
  1096. ; A[6] * B[8]
  1097. mov rax, QWORD PTR [r8+64]
  1098. mul QWORD PTR [r9+48]
  1099. add r12, rax
  1100. adc r10, rdx
  1101. adc r11, 0
  1102. ; A[7] * B[7]
  1103. mov rax, QWORD PTR [r8+56]
  1104. mul QWORD PTR [r9+56]
  1105. add r12, rax
  1106. adc r10, rdx
  1107. adc r11, 0
  1108. ; A[8] * B[6]
  1109. mov rax, QWORD PTR [r8+48]
  1110. mul QWORD PTR [r9+64]
  1111. add r12, rax
  1112. adc r10, rdx
  1113. adc r11, 0
  1114. ; A[9] * B[5]
  1115. mov rax, QWORD PTR [r8+40]
  1116. mul QWORD PTR [r9+72]
  1117. add r12, rax
  1118. adc r10, rdx
  1119. adc r11, 0
  1120. ; A[10] * B[4]
  1121. mov rax, QWORD PTR [r8+32]
  1122. mul QWORD PTR [r9+80]
  1123. add r12, rax
  1124. adc r10, rdx
  1125. adc r11, 0
  1126. ; A[11] * B[3]
  1127. mov rax, QWORD PTR [r8+24]
  1128. mul QWORD PTR [r9+88]
  1129. add r12, rax
  1130. adc r10, rdx
  1131. adc r11, 0
  1132. ; A[12] * B[2]
  1133. mov rax, QWORD PTR [r8+16]
  1134. mul QWORD PTR [r9+96]
  1135. add r12, rax
  1136. adc r10, rdx
  1137. adc r11, 0
  1138. ; A[13] * B[1]
  1139. mov rax, QWORD PTR [r8+8]
  1140. mul QWORD PTR [r9+104]
  1141. add r12, rax
  1142. adc r10, rdx
  1143. adc r11, 0
  1144. ; A[14] * B[0]
  1145. mov rax, QWORD PTR [r8]
  1146. mul QWORD PTR [r9+112]
  1147. add r12, rax
  1148. adc r10, rdx
  1149. adc r11, 0
  1150. mov QWORD PTR [rsp+112], r12
  1151. ; A[0] * B[15]
  1152. mov rax, QWORD PTR [r8+120]
  1153. mul QWORD PTR [r9]
  1154. xor r12, r12
  1155. add r10, rax
  1156. adc r11, rdx
  1157. adc r12, 0
  1158. ; A[1] * B[14]
  1159. mov rax, QWORD PTR [r8+112]
  1160. mul QWORD PTR [r9+8]
  1161. add r10, rax
  1162. adc r11, rdx
  1163. adc r12, 0
  1164. ; A[2] * B[13]
  1165. mov rax, QWORD PTR [r8+104]
  1166. mul QWORD PTR [r9+16]
  1167. add r10, rax
  1168. adc r11, rdx
  1169. adc r12, 0
  1170. ; A[3] * B[12]
  1171. mov rax, QWORD PTR [r8+96]
  1172. mul QWORD PTR [r9+24]
  1173. add r10, rax
  1174. adc r11, rdx
  1175. adc r12, 0
  1176. ; A[4] * B[11]
  1177. mov rax, QWORD PTR [r8+88]
  1178. mul QWORD PTR [r9+32]
  1179. add r10, rax
  1180. adc r11, rdx
  1181. adc r12, 0
  1182. ; A[5] * B[10]
  1183. mov rax, QWORD PTR [r8+80]
  1184. mul QWORD PTR [r9+40]
  1185. add r10, rax
  1186. adc r11, rdx
  1187. adc r12, 0
  1188. ; A[6] * B[9]
  1189. mov rax, QWORD PTR [r8+72]
  1190. mul QWORD PTR [r9+48]
  1191. add r10, rax
  1192. adc r11, rdx
  1193. adc r12, 0
  1194. ; A[7] * B[8]
  1195. mov rax, QWORD PTR [r8+64]
  1196. mul QWORD PTR [r9+56]
  1197. add r10, rax
  1198. adc r11, rdx
  1199. adc r12, 0
  1200. ; A[8] * B[7]
  1201. mov rax, QWORD PTR [r8+56]
  1202. mul QWORD PTR [r9+64]
  1203. add r10, rax
  1204. adc r11, rdx
  1205. adc r12, 0
  1206. ; A[9] * B[6]
  1207. mov rax, QWORD PTR [r8+48]
  1208. mul QWORD PTR [r9+72]
  1209. add r10, rax
  1210. adc r11, rdx
  1211. adc r12, 0
  1212. ; A[10] * B[5]
  1213. mov rax, QWORD PTR [r8+40]
  1214. mul QWORD PTR [r9+80]
  1215. add r10, rax
  1216. adc r11, rdx
  1217. adc r12, 0
  1218. ; A[11] * B[4]
  1219. mov rax, QWORD PTR [r8+32]
  1220. mul QWORD PTR [r9+88]
  1221. add r10, rax
  1222. adc r11, rdx
  1223. adc r12, 0
  1224. ; A[12] * B[3]
  1225. mov rax, QWORD PTR [r8+24]
  1226. mul QWORD PTR [r9+96]
  1227. add r10, rax
  1228. adc r11, rdx
  1229. adc r12, 0
  1230. ; A[13] * B[2]
  1231. mov rax, QWORD PTR [r8+16]
  1232. mul QWORD PTR [r9+104]
  1233. add r10, rax
  1234. adc r11, rdx
  1235. adc r12, 0
  1236. ; A[14] * B[1]
  1237. mov rax, QWORD PTR [r8+8]
  1238. mul QWORD PTR [r9+112]
  1239. add r10, rax
  1240. adc r11, rdx
  1241. adc r12, 0
  1242. ; A[15] * B[0]
  1243. mov rax, QWORD PTR [r8]
  1244. mul QWORD PTR [r9+120]
  1245. add r10, rax
  1246. adc r11, rdx
  1247. adc r12, 0
  1248. mov QWORD PTR [rsp+120], r10
  1249. ; A[1] * B[15]
  1250. mov rax, QWORD PTR [r8+120]
  1251. mul QWORD PTR [r9+8]
  1252. xor r10, r10
  1253. add r11, rax
  1254. adc r12, rdx
  1255. adc r10, 0
  1256. ; A[2] * B[14]
  1257. mov rax, QWORD PTR [r8+112]
  1258. mul QWORD PTR [r9+16]
  1259. add r11, rax
  1260. adc r12, rdx
  1261. adc r10, 0
  1262. ; A[3] * B[13]
  1263. mov rax, QWORD PTR [r8+104]
  1264. mul QWORD PTR [r9+24]
  1265. add r11, rax
  1266. adc r12, rdx
  1267. adc r10, 0
  1268. ; A[4] * B[12]
  1269. mov rax, QWORD PTR [r8+96]
  1270. mul QWORD PTR [r9+32]
  1271. add r11, rax
  1272. adc r12, rdx
  1273. adc r10, 0
  1274. ; A[5] * B[11]
  1275. mov rax, QWORD PTR [r8+88]
  1276. mul QWORD PTR [r9+40]
  1277. add r11, rax
  1278. adc r12, rdx
  1279. adc r10, 0
  1280. ; A[6] * B[10]
  1281. mov rax, QWORD PTR [r8+80]
  1282. mul QWORD PTR [r9+48]
  1283. add r11, rax
  1284. adc r12, rdx
  1285. adc r10, 0
  1286. ; A[7] * B[9]
  1287. mov rax, QWORD PTR [r8+72]
  1288. mul QWORD PTR [r9+56]
  1289. add r11, rax
  1290. adc r12, rdx
  1291. adc r10, 0
  1292. ; A[8] * B[8]
  1293. mov rax, QWORD PTR [r8+64]
  1294. mul QWORD PTR [r9+64]
  1295. add r11, rax
  1296. adc r12, rdx
  1297. adc r10, 0
  1298. ; A[9] * B[7]
  1299. mov rax, QWORD PTR [r8+56]
  1300. mul QWORD PTR [r9+72]
  1301. add r11, rax
  1302. adc r12, rdx
  1303. adc r10, 0
  1304. ; A[10] * B[6]
  1305. mov rax, QWORD PTR [r8+48]
  1306. mul QWORD PTR [r9+80]
  1307. add r11, rax
  1308. adc r12, rdx
  1309. adc r10, 0
  1310. ; A[11] * B[5]
  1311. mov rax, QWORD PTR [r8+40]
  1312. mul QWORD PTR [r9+88]
  1313. add r11, rax
  1314. adc r12, rdx
  1315. adc r10, 0
  1316. ; A[12] * B[4]
  1317. mov rax, QWORD PTR [r8+32]
  1318. mul QWORD PTR [r9+96]
  1319. add r11, rax
  1320. adc r12, rdx
  1321. adc r10, 0
  1322. ; A[13] * B[3]
  1323. mov rax, QWORD PTR [r8+24]
  1324. mul QWORD PTR [r9+104]
  1325. add r11, rax
  1326. adc r12, rdx
  1327. adc r10, 0
  1328. ; A[14] * B[2]
  1329. mov rax, QWORD PTR [r8+16]
  1330. mul QWORD PTR [r9+112]
  1331. add r11, rax
  1332. adc r12, rdx
  1333. adc r10, 0
  1334. ; A[15] * B[1]
  1335. mov rax, QWORD PTR [r8+8]
  1336. mul QWORD PTR [r9+120]
  1337. add r11, rax
  1338. adc r12, rdx
  1339. adc r10, 0
  1340. mov QWORD PTR [rcx+128], r11
  1341. ; A[2] * B[15]
  1342. mov rax, QWORD PTR [r8+120]
  1343. mul QWORD PTR [r9+16]
  1344. xor r11, r11
  1345. add r12, rax
  1346. adc r10, rdx
  1347. adc r11, 0
  1348. ; A[3] * B[14]
  1349. mov rax, QWORD PTR [r8+112]
  1350. mul QWORD PTR [r9+24]
  1351. add r12, rax
  1352. adc r10, rdx
  1353. adc r11, 0
  1354. ; A[4] * B[13]
  1355. mov rax, QWORD PTR [r8+104]
  1356. mul QWORD PTR [r9+32]
  1357. add r12, rax
  1358. adc r10, rdx
  1359. adc r11, 0
  1360. ; A[5] * B[12]
  1361. mov rax, QWORD PTR [r8+96]
  1362. mul QWORD PTR [r9+40]
  1363. add r12, rax
  1364. adc r10, rdx
  1365. adc r11, 0
  1366. ; A[6] * B[11]
  1367. mov rax, QWORD PTR [r8+88]
  1368. mul QWORD PTR [r9+48]
  1369. add r12, rax
  1370. adc r10, rdx
  1371. adc r11, 0
  1372. ; A[7] * B[10]
  1373. mov rax, QWORD PTR [r8+80]
  1374. mul QWORD PTR [r9+56]
  1375. add r12, rax
  1376. adc r10, rdx
  1377. adc r11, 0
  1378. ; A[8] * B[9]
  1379. mov rax, QWORD PTR [r8+72]
  1380. mul QWORD PTR [r9+64]
  1381. add r12, rax
  1382. adc r10, rdx
  1383. adc r11, 0
  1384. ; A[9] * B[8]
  1385. mov rax, QWORD PTR [r8+64]
  1386. mul QWORD PTR [r9+72]
  1387. add r12, rax
  1388. adc r10, rdx
  1389. adc r11, 0
  1390. ; A[10] * B[7]
  1391. mov rax, QWORD PTR [r8+56]
  1392. mul QWORD PTR [r9+80]
  1393. add r12, rax
  1394. adc r10, rdx
  1395. adc r11, 0
  1396. ; A[11] * B[6]
  1397. mov rax, QWORD PTR [r8+48]
  1398. mul QWORD PTR [r9+88]
  1399. add r12, rax
  1400. adc r10, rdx
  1401. adc r11, 0
  1402. ; A[12] * B[5]
  1403. mov rax, QWORD PTR [r8+40]
  1404. mul QWORD PTR [r9+96]
  1405. add r12, rax
  1406. adc r10, rdx
  1407. adc r11, 0
  1408. ; A[13] * B[4]
  1409. mov rax, QWORD PTR [r8+32]
  1410. mul QWORD PTR [r9+104]
  1411. add r12, rax
  1412. adc r10, rdx
  1413. adc r11, 0
  1414. ; A[14] * B[3]
  1415. mov rax, QWORD PTR [r8+24]
  1416. mul QWORD PTR [r9+112]
  1417. add r12, rax
  1418. adc r10, rdx
  1419. adc r11, 0
  1420. ; A[15] * B[2]
  1421. mov rax, QWORD PTR [r8+16]
  1422. mul QWORD PTR [r9+120]
  1423. add r12, rax
  1424. adc r10, rdx
  1425. adc r11, 0
  1426. mov QWORD PTR [rcx+136], r12
  1427. ; A[3] * B[15]
  1428. mov rax, QWORD PTR [r8+120]
  1429. mul QWORD PTR [r9+24]
  1430. xor r12, r12
  1431. add r10, rax
  1432. adc r11, rdx
  1433. adc r12, 0
  1434. ; A[4] * B[14]
  1435. mov rax, QWORD PTR [r8+112]
  1436. mul QWORD PTR [r9+32]
  1437. add r10, rax
  1438. adc r11, rdx
  1439. adc r12, 0
  1440. ; A[5] * B[13]
  1441. mov rax, QWORD PTR [r8+104]
  1442. mul QWORD PTR [r9+40]
  1443. add r10, rax
  1444. adc r11, rdx
  1445. adc r12, 0
  1446. ; A[6] * B[12]
  1447. mov rax, QWORD PTR [r8+96]
  1448. mul QWORD PTR [r9+48]
  1449. add r10, rax
  1450. adc r11, rdx
  1451. adc r12, 0
  1452. ; A[7] * B[11]
  1453. mov rax, QWORD PTR [r8+88]
  1454. mul QWORD PTR [r9+56]
  1455. add r10, rax
  1456. adc r11, rdx
  1457. adc r12, 0
  1458. ; A[8] * B[10]
  1459. mov rax, QWORD PTR [r8+80]
  1460. mul QWORD PTR [r9+64]
  1461. add r10, rax
  1462. adc r11, rdx
  1463. adc r12, 0
  1464. ; A[9] * B[9]
  1465. mov rax, QWORD PTR [r8+72]
  1466. mul QWORD PTR [r9+72]
  1467. add r10, rax
  1468. adc r11, rdx
  1469. adc r12, 0
  1470. ; A[10] * B[8]
  1471. mov rax, QWORD PTR [r8+64]
  1472. mul QWORD PTR [r9+80]
  1473. add r10, rax
  1474. adc r11, rdx
  1475. adc r12, 0
  1476. ; A[11] * B[7]
  1477. mov rax, QWORD PTR [r8+56]
  1478. mul QWORD PTR [r9+88]
  1479. add r10, rax
  1480. adc r11, rdx
  1481. adc r12, 0
  1482. ; A[12] * B[6]
  1483. mov rax, QWORD PTR [r8+48]
  1484. mul QWORD PTR [r9+96]
  1485. add r10, rax
  1486. adc r11, rdx
  1487. adc r12, 0
  1488. ; A[13] * B[5]
  1489. mov rax, QWORD PTR [r8+40]
  1490. mul QWORD PTR [r9+104]
  1491. add r10, rax
  1492. adc r11, rdx
  1493. adc r12, 0
  1494. ; A[14] * B[4]
  1495. mov rax, QWORD PTR [r8+32]
  1496. mul QWORD PTR [r9+112]
  1497. add r10, rax
  1498. adc r11, rdx
  1499. adc r12, 0
  1500. ; A[15] * B[3]
  1501. mov rax, QWORD PTR [r8+24]
  1502. mul QWORD PTR [r9+120]
  1503. add r10, rax
  1504. adc r11, rdx
  1505. adc r12, 0
  1506. mov QWORD PTR [rcx+144], r10
  1507. ; A[4] * B[15]
  1508. mov rax, QWORD PTR [r8+120]
  1509. mul QWORD PTR [r9+32]
  1510. xor r10, r10
  1511. add r11, rax
  1512. adc r12, rdx
  1513. adc r10, 0
  1514. ; A[5] * B[14]
  1515. mov rax, QWORD PTR [r8+112]
  1516. mul QWORD PTR [r9+40]
  1517. add r11, rax
  1518. adc r12, rdx
  1519. adc r10, 0
  1520. ; A[6] * B[13]
  1521. mov rax, QWORD PTR [r8+104]
  1522. mul QWORD PTR [r9+48]
  1523. add r11, rax
  1524. adc r12, rdx
  1525. adc r10, 0
  1526. ; A[7] * B[12]
  1527. mov rax, QWORD PTR [r8+96]
  1528. mul QWORD PTR [r9+56]
  1529. add r11, rax
  1530. adc r12, rdx
  1531. adc r10, 0
  1532. ; A[8] * B[11]
  1533. mov rax, QWORD PTR [r8+88]
  1534. mul QWORD PTR [r9+64]
  1535. add r11, rax
  1536. adc r12, rdx
  1537. adc r10, 0
  1538. ; A[9] * B[10]
  1539. mov rax, QWORD PTR [r8+80]
  1540. mul QWORD PTR [r9+72]
  1541. add r11, rax
  1542. adc r12, rdx
  1543. adc r10, 0
  1544. ; A[10] * B[9]
  1545. mov rax, QWORD PTR [r8+72]
  1546. mul QWORD PTR [r9+80]
  1547. add r11, rax
  1548. adc r12, rdx
  1549. adc r10, 0
  1550. ; A[11] * B[8]
  1551. mov rax, QWORD PTR [r8+64]
  1552. mul QWORD PTR [r9+88]
  1553. add r11, rax
  1554. adc r12, rdx
  1555. adc r10, 0
  1556. ; A[12] * B[7]
  1557. mov rax, QWORD PTR [r8+56]
  1558. mul QWORD PTR [r9+96]
  1559. add r11, rax
  1560. adc r12, rdx
  1561. adc r10, 0
  1562. ; A[13] * B[6]
  1563. mov rax, QWORD PTR [r8+48]
  1564. mul QWORD PTR [r9+104]
  1565. add r11, rax
  1566. adc r12, rdx
  1567. adc r10, 0
  1568. ; A[14] * B[5]
  1569. mov rax, QWORD PTR [r8+40]
  1570. mul QWORD PTR [r9+112]
  1571. add r11, rax
  1572. adc r12, rdx
  1573. adc r10, 0
  1574. ; A[15] * B[4]
  1575. mov rax, QWORD PTR [r8+32]
  1576. mul QWORD PTR [r9+120]
  1577. add r11, rax
  1578. adc r12, rdx
  1579. adc r10, 0
  1580. mov QWORD PTR [rcx+152], r11
  1581. ; A[5] * B[15]
  1582. mov rax, QWORD PTR [r8+120]
  1583. mul QWORD PTR [r9+40]
  1584. xor r11, r11
  1585. add r12, rax
  1586. adc r10, rdx
  1587. adc r11, 0
  1588. ; A[6] * B[14]
  1589. mov rax, QWORD PTR [r8+112]
  1590. mul QWORD PTR [r9+48]
  1591. add r12, rax
  1592. adc r10, rdx
  1593. adc r11, 0
  1594. ; A[7] * B[13]
  1595. mov rax, QWORD PTR [r8+104]
  1596. mul QWORD PTR [r9+56]
  1597. add r12, rax
  1598. adc r10, rdx
  1599. adc r11, 0
  1600. ; A[8] * B[12]
  1601. mov rax, QWORD PTR [r8+96]
  1602. mul QWORD PTR [r9+64]
  1603. add r12, rax
  1604. adc r10, rdx
  1605. adc r11, 0
  1606. ; A[9] * B[11]
  1607. mov rax, QWORD PTR [r8+88]
  1608. mul QWORD PTR [r9+72]
  1609. add r12, rax
  1610. adc r10, rdx
  1611. adc r11, 0
  1612. ; A[10] * B[10]
  1613. mov rax, QWORD PTR [r8+80]
  1614. mul QWORD PTR [r9+80]
  1615. add r12, rax
  1616. adc r10, rdx
  1617. adc r11, 0
  1618. ; A[11] * B[9]
  1619. mov rax, QWORD PTR [r8+72]
  1620. mul QWORD PTR [r9+88]
  1621. add r12, rax
  1622. adc r10, rdx
  1623. adc r11, 0
  1624. ; A[12] * B[8]
  1625. mov rax, QWORD PTR [r8+64]
  1626. mul QWORD PTR [r9+96]
  1627. add r12, rax
  1628. adc r10, rdx
  1629. adc r11, 0
  1630. ; A[13] * B[7]
  1631. mov rax, QWORD PTR [r8+56]
  1632. mul QWORD PTR [r9+104]
  1633. add r12, rax
  1634. adc r10, rdx
  1635. adc r11, 0
  1636. ; A[14] * B[6]
  1637. mov rax, QWORD PTR [r8+48]
  1638. mul QWORD PTR [r9+112]
  1639. add r12, rax
  1640. adc r10, rdx
  1641. adc r11, 0
  1642. ; A[15] * B[5]
  1643. mov rax, QWORD PTR [r8+40]
  1644. mul QWORD PTR [r9+120]
  1645. add r12, rax
  1646. adc r10, rdx
  1647. adc r11, 0
  1648. mov QWORD PTR [rcx+160], r12
  1649. ; A[6] * B[15]
  1650. mov rax, QWORD PTR [r8+120]
  1651. mul QWORD PTR [r9+48]
  1652. xor r12, r12
  1653. add r10, rax
  1654. adc r11, rdx
  1655. adc r12, 0
  1656. ; A[7] * B[14]
  1657. mov rax, QWORD PTR [r8+112]
  1658. mul QWORD PTR [r9+56]
  1659. add r10, rax
  1660. adc r11, rdx
  1661. adc r12, 0
  1662. ; A[8] * B[13]
  1663. mov rax, QWORD PTR [r8+104]
  1664. mul QWORD PTR [r9+64]
  1665. add r10, rax
  1666. adc r11, rdx
  1667. adc r12, 0
  1668. ; A[9] * B[12]
  1669. mov rax, QWORD PTR [r8+96]
  1670. mul QWORD PTR [r9+72]
  1671. add r10, rax
  1672. adc r11, rdx
  1673. adc r12, 0
  1674. ; A[10] * B[11]
  1675. mov rax, QWORD PTR [r8+88]
  1676. mul QWORD PTR [r9+80]
  1677. add r10, rax
  1678. adc r11, rdx
  1679. adc r12, 0
  1680. ; A[11] * B[10]
  1681. mov rax, QWORD PTR [r8+80]
  1682. mul QWORD PTR [r9+88]
  1683. add r10, rax
  1684. adc r11, rdx
  1685. adc r12, 0
  1686. ; A[12] * B[9]
  1687. mov rax, QWORD PTR [r8+72]
  1688. mul QWORD PTR [r9+96]
  1689. add r10, rax
  1690. adc r11, rdx
  1691. adc r12, 0
  1692. ; A[13] * B[8]
  1693. mov rax, QWORD PTR [r8+64]
  1694. mul QWORD PTR [r9+104]
  1695. add r10, rax
  1696. adc r11, rdx
  1697. adc r12, 0
  1698. ; A[14] * B[7]
  1699. mov rax, QWORD PTR [r8+56]
  1700. mul QWORD PTR [r9+112]
  1701. add r10, rax
  1702. adc r11, rdx
  1703. adc r12, 0
  1704. ; A[15] * B[6]
  1705. mov rax, QWORD PTR [r8+48]
  1706. mul QWORD PTR [r9+120]
  1707. add r10, rax
  1708. adc r11, rdx
  1709. adc r12, 0
  1710. mov QWORD PTR [rcx+168], r10
  1711. ; A[7] * B[15]
  1712. mov rax, QWORD PTR [r8+120]
  1713. mul QWORD PTR [r9+56]
  1714. xor r10, r10
  1715. add r11, rax
  1716. adc r12, rdx
  1717. adc r10, 0
  1718. ; A[8] * B[14]
  1719. mov rax, QWORD PTR [r8+112]
  1720. mul QWORD PTR [r9+64]
  1721. add r11, rax
  1722. adc r12, rdx
  1723. adc r10, 0
  1724. ; A[9] * B[13]
  1725. mov rax, QWORD PTR [r8+104]
  1726. mul QWORD PTR [r9+72]
  1727. add r11, rax
  1728. adc r12, rdx
  1729. adc r10, 0
  1730. ; A[10] * B[12]
  1731. mov rax, QWORD PTR [r8+96]
  1732. mul QWORD PTR [r9+80]
  1733. add r11, rax
  1734. adc r12, rdx
  1735. adc r10, 0
  1736. ; A[11] * B[11]
  1737. mov rax, QWORD PTR [r8+88]
  1738. mul QWORD PTR [r9+88]
  1739. add r11, rax
  1740. adc r12, rdx
  1741. adc r10, 0
  1742. ; A[12] * B[10]
  1743. mov rax, QWORD PTR [r8+80]
  1744. mul QWORD PTR [r9+96]
  1745. add r11, rax
  1746. adc r12, rdx
  1747. adc r10, 0
  1748. ; A[13] * B[9]
  1749. mov rax, QWORD PTR [r8+72]
  1750. mul QWORD PTR [r9+104]
  1751. add r11, rax
  1752. adc r12, rdx
  1753. adc r10, 0
  1754. ; A[14] * B[8]
  1755. mov rax, QWORD PTR [r8+64]
  1756. mul QWORD PTR [r9+112]
  1757. add r11, rax
  1758. adc r12, rdx
  1759. adc r10, 0
  1760. ; A[15] * B[7]
  1761. mov rax, QWORD PTR [r8+56]
  1762. mul QWORD PTR [r9+120]
  1763. add r11, rax
  1764. adc r12, rdx
  1765. adc r10, 0
  1766. mov QWORD PTR [rcx+176], r11
  1767. ; A[8] * B[15]
  1768. mov rax, QWORD PTR [r8+120]
  1769. mul QWORD PTR [r9+64]
  1770. xor r11, r11
  1771. add r12, rax
  1772. adc r10, rdx
  1773. adc r11, 0
  1774. ; A[9] * B[14]
  1775. mov rax, QWORD PTR [r8+112]
  1776. mul QWORD PTR [r9+72]
  1777. add r12, rax
  1778. adc r10, rdx
  1779. adc r11, 0
  1780. ; A[10] * B[13]
  1781. mov rax, QWORD PTR [r8+104]
  1782. mul QWORD PTR [r9+80]
  1783. add r12, rax
  1784. adc r10, rdx
  1785. adc r11, 0
  1786. ; A[11] * B[12]
  1787. mov rax, QWORD PTR [r8+96]
  1788. mul QWORD PTR [r9+88]
  1789. add r12, rax
  1790. adc r10, rdx
  1791. adc r11, 0
  1792. ; A[12] * B[11]
  1793. mov rax, QWORD PTR [r8+88]
  1794. mul QWORD PTR [r9+96]
  1795. add r12, rax
  1796. adc r10, rdx
  1797. adc r11, 0
  1798. ; A[13] * B[10]
  1799. mov rax, QWORD PTR [r8+80]
  1800. mul QWORD PTR [r9+104]
  1801. add r12, rax
  1802. adc r10, rdx
  1803. adc r11, 0
  1804. ; A[14] * B[9]
  1805. mov rax, QWORD PTR [r8+72]
  1806. mul QWORD PTR [r9+112]
  1807. add r12, rax
  1808. adc r10, rdx
  1809. adc r11, 0
  1810. ; A[15] * B[8]
  1811. mov rax, QWORD PTR [r8+64]
  1812. mul QWORD PTR [r9+120]
  1813. add r12, rax
  1814. adc r10, rdx
  1815. adc r11, 0
  1816. mov QWORD PTR [rcx+184], r12
  1817. ; A[9] * B[15]
  1818. mov rax, QWORD PTR [r8+120]
  1819. mul QWORD PTR [r9+72]
  1820. xor r12, r12
  1821. add r10, rax
  1822. adc r11, rdx
  1823. adc r12, 0
  1824. ; A[10] * B[14]
  1825. mov rax, QWORD PTR [r8+112]
  1826. mul QWORD PTR [r9+80]
  1827. add r10, rax
  1828. adc r11, rdx
  1829. adc r12, 0
  1830. ; A[11] * B[13]
  1831. mov rax, QWORD PTR [r8+104]
  1832. mul QWORD PTR [r9+88]
  1833. add r10, rax
  1834. adc r11, rdx
  1835. adc r12, 0
  1836. ; A[12] * B[12]
  1837. mov rax, QWORD PTR [r8+96]
  1838. mul QWORD PTR [r9+96]
  1839. add r10, rax
  1840. adc r11, rdx
  1841. adc r12, 0
  1842. ; A[13] * B[11]
  1843. mov rax, QWORD PTR [r8+88]
  1844. mul QWORD PTR [r9+104]
  1845. add r10, rax
  1846. adc r11, rdx
  1847. adc r12, 0
  1848. ; A[14] * B[10]
  1849. mov rax, QWORD PTR [r8+80]
  1850. mul QWORD PTR [r9+112]
  1851. add r10, rax
  1852. adc r11, rdx
  1853. adc r12, 0
  1854. ; A[15] * B[9]
  1855. mov rax, QWORD PTR [r8+72]
  1856. mul QWORD PTR [r9+120]
  1857. add r10, rax
  1858. adc r11, rdx
  1859. adc r12, 0
  1860. mov QWORD PTR [rcx+192], r10
  1861. ; A[10] * B[15]
  1862. mov rax, QWORD PTR [r8+120]
  1863. mul QWORD PTR [r9+80]
  1864. xor r10, r10
  1865. add r11, rax
  1866. adc r12, rdx
  1867. adc r10, 0
  1868. ; A[11] * B[14]
  1869. mov rax, QWORD PTR [r8+112]
  1870. mul QWORD PTR [r9+88]
  1871. add r11, rax
  1872. adc r12, rdx
  1873. adc r10, 0
  1874. ; A[12] * B[13]
  1875. mov rax, QWORD PTR [r8+104]
  1876. mul QWORD PTR [r9+96]
  1877. add r11, rax
  1878. adc r12, rdx
  1879. adc r10, 0
  1880. ; A[13] * B[12]
  1881. mov rax, QWORD PTR [r8+96]
  1882. mul QWORD PTR [r9+104]
  1883. add r11, rax
  1884. adc r12, rdx
  1885. adc r10, 0
  1886. ; A[14] * B[11]
  1887. mov rax, QWORD PTR [r8+88]
  1888. mul QWORD PTR [r9+112]
  1889. add r11, rax
  1890. adc r12, rdx
  1891. adc r10, 0
  1892. ; A[15] * B[10]
  1893. mov rax, QWORD PTR [r8+80]
  1894. mul QWORD PTR [r9+120]
  1895. add r11, rax
  1896. adc r12, rdx
  1897. adc r10, 0
  1898. mov QWORD PTR [rcx+200], r11
  1899. ; A[11] * B[15]
  1900. mov rax, QWORD PTR [r8+120]
  1901. mul QWORD PTR [r9+88]
  1902. xor r11, r11
  1903. add r12, rax
  1904. adc r10, rdx
  1905. adc r11, 0
  1906. ; A[12] * B[14]
  1907. mov rax, QWORD PTR [r8+112]
  1908. mul QWORD PTR [r9+96]
  1909. add r12, rax
  1910. adc r10, rdx
  1911. adc r11, 0
  1912. ; A[13] * B[13]
  1913. mov rax, QWORD PTR [r8+104]
  1914. mul QWORD PTR [r9+104]
  1915. add r12, rax
  1916. adc r10, rdx
  1917. adc r11, 0
  1918. ; A[14] * B[12]
  1919. mov rax, QWORD PTR [r8+96]
  1920. mul QWORD PTR [r9+112]
  1921. add r12, rax
  1922. adc r10, rdx
  1923. adc r11, 0
  1924. ; A[15] * B[11]
  1925. mov rax, QWORD PTR [r8+88]
  1926. mul QWORD PTR [r9+120]
  1927. add r12, rax
  1928. adc r10, rdx
  1929. adc r11, 0
  1930. mov QWORD PTR [rcx+208], r12
  1931. ; A[12] * B[15]
  1932. mov rax, QWORD PTR [r8+120]
  1933. mul QWORD PTR [r9+96]
  1934. xor r12, r12
  1935. add r10, rax
  1936. adc r11, rdx
  1937. adc r12, 0
  1938. ; A[13] * B[14]
  1939. mov rax, QWORD PTR [r8+112]
  1940. mul QWORD PTR [r9+104]
  1941. add r10, rax
  1942. adc r11, rdx
  1943. adc r12, 0
  1944. ; A[14] * B[13]
  1945. mov rax, QWORD PTR [r8+104]
  1946. mul QWORD PTR [r9+112]
  1947. add r10, rax
  1948. adc r11, rdx
  1949. adc r12, 0
  1950. ; A[15] * B[12]
  1951. mov rax, QWORD PTR [r8+96]
  1952. mul QWORD PTR [r9+120]
  1953. add r10, rax
  1954. adc r11, rdx
  1955. adc r12, 0
  1956. mov QWORD PTR [rcx+216], r10
  1957. ; A[13] * B[15]
  1958. mov rax, QWORD PTR [r8+120]
  1959. mul QWORD PTR [r9+104]
  1960. xor r10, r10
  1961. add r11, rax
  1962. adc r12, rdx
  1963. adc r10, 0
  1964. ; A[14] * B[14]
  1965. mov rax, QWORD PTR [r8+112]
  1966. mul QWORD PTR [r9+112]
  1967. add r11, rax
  1968. adc r12, rdx
  1969. adc r10, 0
  1970. ; A[15] * B[13]
  1971. mov rax, QWORD PTR [r8+104]
  1972. mul QWORD PTR [r9+120]
  1973. add r11, rax
  1974. adc r12, rdx
  1975. adc r10, 0
  1976. mov QWORD PTR [rcx+224], r11
  1977. ; A[14] * B[15]
  1978. mov rax, QWORD PTR [r8+120]
  1979. mul QWORD PTR [r9+112]
  1980. xor r11, r11
  1981. add r12, rax
  1982. adc r10, rdx
  1983. adc r11, 0
  1984. ; A[15] * B[14]
  1985. mov rax, QWORD PTR [r8+112]
  1986. mul QWORD PTR [r9+120]
  1987. add r12, rax
  1988. adc r10, rdx
  1989. adc r11, 0
  1990. mov QWORD PTR [rcx+232], r12
  1991. ; A[15] * B[15]
  1992. mov rax, QWORD PTR [r8+120]
  1993. mul QWORD PTR [r9+120]
  1994. add r10, rax
  1995. adc r11, rdx
  1996. mov QWORD PTR [rcx+240], r10
  1997. mov QWORD PTR [rcx+248], r11
  1998. mov rax, QWORD PTR [rsp]
  1999. mov rdx, QWORD PTR [rsp+8]
  2000. mov r10, QWORD PTR [rsp+16]
  2001. mov r11, QWORD PTR [rsp+24]
  2002. mov QWORD PTR [rcx], rax
  2003. mov QWORD PTR [rcx+8], rdx
  2004. mov QWORD PTR [rcx+16], r10
  2005. mov QWORD PTR [rcx+24], r11
  2006. mov rax, QWORD PTR [rsp+32]
  2007. mov rdx, QWORD PTR [rsp+40]
  2008. mov r10, QWORD PTR [rsp+48]
  2009. mov r11, QWORD PTR [rsp+56]
  2010. mov QWORD PTR [rcx+32], rax
  2011. mov QWORD PTR [rcx+40], rdx
  2012. mov QWORD PTR [rcx+48], r10
  2013. mov QWORD PTR [rcx+56], r11
  2014. mov rax, QWORD PTR [rsp+64]
  2015. mov rdx, QWORD PTR [rsp+72]
  2016. mov r10, QWORD PTR [rsp+80]
  2017. mov r11, QWORD PTR [rsp+88]
  2018. mov QWORD PTR [rcx+64], rax
  2019. mov QWORD PTR [rcx+72], rdx
  2020. mov QWORD PTR [rcx+80], r10
  2021. mov QWORD PTR [rcx+88], r11
  2022. mov rax, QWORD PTR [rsp+96]
  2023. mov rdx, QWORD PTR [rsp+104]
  2024. mov r10, QWORD PTR [rsp+112]
  2025. mov r11, QWORD PTR [rsp+120]
  2026. mov QWORD PTR [rcx+96], rax
  2027. mov QWORD PTR [rcx+104], rdx
  2028. mov QWORD PTR [rcx+112], r10
  2029. mov QWORD PTR [rcx+120], r11
  2030. add rsp, 128
  2031. pop r12
  2032. ret
  2033. sp_2048_mul_16 ENDP
  2034. _text ENDS
  2035. IFDEF HAVE_INTEL_AVX2
  2036. ; /* Multiply a and b into r. (r = a * b)
  2037. ; *
  2038. ; * r Result of multiplication.
  2039. ; * a First number to multiply.
  2040. ; * b Second number to multiply.
  2041. ; */
  2042. _text SEGMENT READONLY PARA
  2043. sp_2048_mul_avx2_16 PROC
  2044. push rbx
  2045. push rbp
  2046. push r12
  2047. push r13
  2048. push r14
  2049. push r15
  2050. push rdi
  2051. mov rbp, r8
  2052. mov r8, rcx
  2053. mov r9, rdx
  2054. sub rsp, 128
  2055. cmp r9, r8
  2056. mov rbx, rsp
  2057. cmovne rbx, r8
  2058. cmp rbp, r8
  2059. cmove rbx, rsp
  2060. add r8, 128
  2061. xor rdi, rdi
  2062. mov rdx, QWORD PTR [r9]
  2063. ; A[0] * B[0]
  2064. mulx r11, r10, QWORD PTR [rbp]
  2065. ; A[0] * B[1]
  2066. mulx r12, rax, QWORD PTR [rbp+8]
  2067. mov QWORD PTR [rbx], r10
  2068. adcx r11, rax
  2069. ; A[0] * B[2]
  2070. mulx r13, rax, QWORD PTR [rbp+16]
  2071. mov QWORD PTR [rbx+8], r11
  2072. adcx r12, rax
  2073. ; A[0] * B[3]
  2074. mulx r14, rax, QWORD PTR [rbp+24]
  2075. mov QWORD PTR [rbx+16], r12
  2076. adcx r13, rax
  2077. mov QWORD PTR [rbx+24], r13
  2078. ; A[0] * B[4]
  2079. mulx r10, rax, QWORD PTR [rbp+32]
  2080. adcx r14, rax
  2081. ; A[0] * B[5]
  2082. mulx r11, rax, QWORD PTR [rbp+40]
  2083. mov QWORD PTR [rbx+32], r14
  2084. adcx r10, rax
  2085. ; A[0] * B[6]
  2086. mulx r12, rax, QWORD PTR [rbp+48]
  2087. mov QWORD PTR [rbx+40], r10
  2088. adcx r11, rax
  2089. ; A[0] * B[7]
  2090. mulx r13, rax, QWORD PTR [rbp+56]
  2091. mov QWORD PTR [rbx+48], r11
  2092. adcx r12, rax
  2093. mov QWORD PTR [rbx+56], r12
  2094. ; A[0] * B[8]
  2095. mulx r14, rax, QWORD PTR [rbp+64]
  2096. adcx r13, rax
  2097. ; A[0] * B[9]
  2098. mulx r10, rax, QWORD PTR [rbp+72]
  2099. mov QWORD PTR [rbx+64], r13
  2100. adcx r14, rax
  2101. ; A[0] * B[10]
  2102. mulx r11, rax, QWORD PTR [rbp+80]
  2103. mov QWORD PTR [rbx+72], r14
  2104. adcx r10, rax
  2105. ; A[0] * B[11]
  2106. mulx r12, rax, QWORD PTR [rbp+88]
  2107. mov QWORD PTR [rbx+80], r10
  2108. adcx r11, rax
  2109. mov QWORD PTR [rbx+88], r11
  2110. ; A[0] * B[12]
  2111. mulx r13, rax, QWORD PTR [rbp+96]
  2112. adcx r12, rax
  2113. ; A[0] * B[13]
  2114. mulx r14, rax, QWORD PTR [rbp+104]
  2115. mov QWORD PTR [rbx+96], r12
  2116. adcx r13, rax
  2117. ; A[0] * B[14]
  2118. mulx r10, rax, QWORD PTR [rbp+112]
  2119. mov QWORD PTR [rbx+104], r13
  2120. adcx r14, rax
  2121. ; A[0] * B[15]
  2122. mulx r11, rax, QWORD PTR [rbp+120]
  2123. mov QWORD PTR [rbx+112], r14
  2124. adcx r10, rax
  2125. adcx r11, rdi
  2126. mov r15, rdi
  2127. adcx r15, rdi
  2128. mov QWORD PTR [rbx+120], r10
  2129. mov QWORD PTR [r8], r11
  2130. mov rdx, QWORD PTR [r9+8]
  2131. mov r11, QWORD PTR [rbx+8]
  2132. mov r12, QWORD PTR [rbx+16]
  2133. mov r13, QWORD PTR [rbx+24]
  2134. mov r14, QWORD PTR [rbx+32]
  2135. mov r10, QWORD PTR [rbx+40]
  2136. ; A[1] * B[0]
  2137. mulx rcx, rax, QWORD PTR [rbp]
  2138. adcx r11, rax
  2139. adox r12, rcx
  2140. ; A[1] * B[1]
  2141. mulx rcx, rax, QWORD PTR [rbp+8]
  2142. mov QWORD PTR [rbx+8], r11
  2143. adcx r12, rax
  2144. adox r13, rcx
  2145. ; A[1] * B[2]
  2146. mulx rcx, rax, QWORD PTR [rbp+16]
  2147. mov QWORD PTR [rbx+16], r12
  2148. adcx r13, rax
  2149. adox r14, rcx
  2150. ; A[1] * B[3]
  2151. mulx rcx, rax, QWORD PTR [rbp+24]
  2152. mov QWORD PTR [rbx+24], r13
  2153. adcx r14, rax
  2154. adox r10, rcx
  2155. mov QWORD PTR [rbx+32], r14
  2156. mov r11, QWORD PTR [rbx+48]
  2157. mov r12, QWORD PTR [rbx+56]
  2158. mov r13, QWORD PTR [rbx+64]
  2159. mov r14, QWORD PTR [rbx+72]
  2160. ; A[1] * B[4]
  2161. mulx rcx, rax, QWORD PTR [rbp+32]
  2162. adcx r10, rax
  2163. adox r11, rcx
  2164. ; A[1] * B[5]
  2165. mulx rcx, rax, QWORD PTR [rbp+40]
  2166. mov QWORD PTR [rbx+40], r10
  2167. adcx r11, rax
  2168. adox r12, rcx
  2169. ; A[1] * B[6]
  2170. mulx rcx, rax, QWORD PTR [rbp+48]
  2171. mov QWORD PTR [rbx+48], r11
  2172. adcx r12, rax
  2173. adox r13, rcx
  2174. ; A[1] * B[7]
  2175. mulx rcx, rax, QWORD PTR [rbp+56]
  2176. mov QWORD PTR [rbx+56], r12
  2177. adcx r13, rax
  2178. adox r14, rcx
  2179. mov QWORD PTR [rbx+64], r13
  2180. mov r10, QWORD PTR [rbx+80]
  2181. mov r11, QWORD PTR [rbx+88]
  2182. mov r12, QWORD PTR [rbx+96]
  2183. mov r13, QWORD PTR [rbx+104]
  2184. ; A[1] * B[8]
  2185. mulx rcx, rax, QWORD PTR [rbp+64]
  2186. adcx r14, rax
  2187. adox r10, rcx
  2188. ; A[1] * B[9]
  2189. mulx rcx, rax, QWORD PTR [rbp+72]
  2190. mov QWORD PTR [rbx+72], r14
  2191. adcx r10, rax
  2192. adox r11, rcx
  2193. ; A[1] * B[10]
  2194. mulx rcx, rax, QWORD PTR [rbp+80]
  2195. mov QWORD PTR [rbx+80], r10
  2196. adcx r11, rax
  2197. adox r12, rcx
  2198. ; A[1] * B[11]
  2199. mulx rcx, rax, QWORD PTR [rbp+88]
  2200. mov QWORD PTR [rbx+88], r11
  2201. adcx r12, rax
  2202. adox r13, rcx
  2203. mov QWORD PTR [rbx+96], r12
  2204. mov r14, QWORD PTR [rbx+112]
  2205. mov r10, QWORD PTR [rbx+120]
  2206. mov r11, QWORD PTR [r8]
  2207. ; A[1] * B[12]
  2208. mulx rcx, rax, QWORD PTR [rbp+96]
  2209. adcx r13, rax
  2210. adox r14, rcx
  2211. ; A[1] * B[13]
  2212. mulx rcx, rax, QWORD PTR [rbp+104]
  2213. mov QWORD PTR [rbx+104], r13
  2214. adcx r14, rax
  2215. adox r10, rcx
  2216. ; A[1] * B[14]
  2217. mulx rcx, rax, QWORD PTR [rbp+112]
  2218. mov QWORD PTR [rbx+112], r14
  2219. adcx r10, rax
  2220. adox r11, rcx
  2221. ; A[1] * B[15]
  2222. mulx rcx, rax, QWORD PTR [rbp+120]
  2223. mov QWORD PTR [rbx+120], r10
  2224. mov r12, rdi
  2225. adcx r11, rax
  2226. adox r12, rcx
  2227. adcx r12, r15
  2228. mov r15, rdi
  2229. adox r15, rdi
  2230. adcx r15, rdi
  2231. mov QWORD PTR [r8], r11
  2232. mov QWORD PTR [r8+8], r12
  2233. mov rdx, QWORD PTR [r9+16]
  2234. mov r12, QWORD PTR [rbx+16]
  2235. mov r13, QWORD PTR [rbx+24]
  2236. mov r14, QWORD PTR [rbx+32]
  2237. mov r10, QWORD PTR [rbx+40]
  2238. mov r11, QWORD PTR [rbx+48]
  2239. ; A[2] * B[0]
  2240. mulx rcx, rax, QWORD PTR [rbp]
  2241. adcx r12, rax
  2242. adox r13, rcx
  2243. ; A[2] * B[1]
  2244. mulx rcx, rax, QWORD PTR [rbp+8]
  2245. mov QWORD PTR [rbx+16], r12
  2246. adcx r13, rax
  2247. adox r14, rcx
  2248. ; A[2] * B[2]
  2249. mulx rcx, rax, QWORD PTR [rbp+16]
  2250. mov QWORD PTR [rbx+24], r13
  2251. adcx r14, rax
  2252. adox r10, rcx
  2253. ; A[2] * B[3]
  2254. mulx rcx, rax, QWORD PTR [rbp+24]
  2255. mov QWORD PTR [rbx+32], r14
  2256. adcx r10, rax
  2257. adox r11, rcx
  2258. mov QWORD PTR [rbx+40], r10
  2259. mov r12, QWORD PTR [rbx+56]
  2260. mov r13, QWORD PTR [rbx+64]
  2261. mov r14, QWORD PTR [rbx+72]
  2262. mov r10, QWORD PTR [rbx+80]
  2263. ; A[2] * B[4]
  2264. mulx rcx, rax, QWORD PTR [rbp+32]
  2265. adcx r11, rax
  2266. adox r12, rcx
  2267. ; A[2] * B[5]
  2268. mulx rcx, rax, QWORD PTR [rbp+40]
  2269. mov QWORD PTR [rbx+48], r11
  2270. adcx r12, rax
  2271. adox r13, rcx
  2272. ; A[2] * B[6]
  2273. mulx rcx, rax, QWORD PTR [rbp+48]
  2274. mov QWORD PTR [rbx+56], r12
  2275. adcx r13, rax
  2276. adox r14, rcx
  2277. ; A[2] * B[7]
  2278. mulx rcx, rax, QWORD PTR [rbp+56]
  2279. mov QWORD PTR [rbx+64], r13
  2280. adcx r14, rax
  2281. adox r10, rcx
  2282. mov QWORD PTR [rbx+72], r14
  2283. mov r11, QWORD PTR [rbx+88]
  2284. mov r12, QWORD PTR [rbx+96]
  2285. mov r13, QWORD PTR [rbx+104]
  2286. mov r14, QWORD PTR [rbx+112]
  2287. ; A[2] * B[8]
  2288. mulx rcx, rax, QWORD PTR [rbp+64]
  2289. adcx r10, rax
  2290. adox r11, rcx
  2291. ; A[2] * B[9]
  2292. mulx rcx, rax, QWORD PTR [rbp+72]
  2293. mov QWORD PTR [rbx+80], r10
  2294. adcx r11, rax
  2295. adox r12, rcx
  2296. ; A[2] * B[10]
  2297. mulx rcx, rax, QWORD PTR [rbp+80]
  2298. mov QWORD PTR [rbx+88], r11
  2299. adcx r12, rax
  2300. adox r13, rcx
  2301. ; A[2] * B[11]
  2302. mulx rcx, rax, QWORD PTR [rbp+88]
  2303. mov QWORD PTR [rbx+96], r12
  2304. adcx r13, rax
  2305. adox r14, rcx
  2306. mov QWORD PTR [rbx+104], r13
  2307. mov r10, QWORD PTR [rbx+120]
  2308. mov r11, QWORD PTR [r8]
  2309. mov r12, QWORD PTR [r8+8]
  2310. ; A[2] * B[12]
  2311. mulx rcx, rax, QWORD PTR [rbp+96]
  2312. adcx r14, rax
  2313. adox r10, rcx
  2314. ; A[2] * B[13]
  2315. mulx rcx, rax, QWORD PTR [rbp+104]
  2316. mov QWORD PTR [rbx+112], r14
  2317. adcx r10, rax
  2318. adox r11, rcx
  2319. ; A[2] * B[14]
  2320. mulx rcx, rax, QWORD PTR [rbp+112]
  2321. mov QWORD PTR [rbx+120], r10
  2322. adcx r11, rax
  2323. adox r12, rcx
  2324. ; A[2] * B[15]
  2325. mulx rcx, rax, QWORD PTR [rbp+120]
  2326. mov QWORD PTR [r8], r11
  2327. mov r13, rdi
  2328. adcx r12, rax
  2329. adox r13, rcx
  2330. adcx r13, r15
  2331. mov r15, rdi
  2332. adox r15, rdi
  2333. adcx r15, rdi
  2334. mov QWORD PTR [r8+8], r12
  2335. mov QWORD PTR [r8+16], r13
  2336. mov rdx, QWORD PTR [r9+24]
  2337. mov r13, QWORD PTR [rbx+24]
  2338. mov r14, QWORD PTR [rbx+32]
  2339. mov r10, QWORD PTR [rbx+40]
  2340. mov r11, QWORD PTR [rbx+48]
  2341. mov r12, QWORD PTR [rbx+56]
  2342. ; A[3] * B[0]
  2343. mulx rcx, rax, QWORD PTR [rbp]
  2344. adcx r13, rax
  2345. adox r14, rcx
  2346. ; A[3] * B[1]
  2347. mulx rcx, rax, QWORD PTR [rbp+8]
  2348. mov QWORD PTR [rbx+24], r13
  2349. adcx r14, rax
  2350. adox r10, rcx
  2351. ; A[3] * B[2]
  2352. mulx rcx, rax, QWORD PTR [rbp+16]
  2353. mov QWORD PTR [rbx+32], r14
  2354. adcx r10, rax
  2355. adox r11, rcx
  2356. ; A[3] * B[3]
  2357. mulx rcx, rax, QWORD PTR [rbp+24]
  2358. mov QWORD PTR [rbx+40], r10
  2359. adcx r11, rax
  2360. adox r12, rcx
  2361. mov QWORD PTR [rbx+48], r11
  2362. mov r13, QWORD PTR [rbx+64]
  2363. mov r14, QWORD PTR [rbx+72]
  2364. mov r10, QWORD PTR [rbx+80]
  2365. mov r11, QWORD PTR [rbx+88]
  2366. ; A[3] * B[4]
  2367. mulx rcx, rax, QWORD PTR [rbp+32]
  2368. adcx r12, rax
  2369. adox r13, rcx
  2370. ; A[3] * B[5]
  2371. mulx rcx, rax, QWORD PTR [rbp+40]
  2372. mov QWORD PTR [rbx+56], r12
  2373. adcx r13, rax
  2374. adox r14, rcx
  2375. ; A[3] * B[6]
  2376. mulx rcx, rax, QWORD PTR [rbp+48]
  2377. mov QWORD PTR [rbx+64], r13
  2378. adcx r14, rax
  2379. adox r10, rcx
  2380. ; A[3] * B[7]
  2381. mulx rcx, rax, QWORD PTR [rbp+56]
  2382. mov QWORD PTR [rbx+72], r14
  2383. adcx r10, rax
  2384. adox r11, rcx
  2385. mov QWORD PTR [rbx+80], r10
  2386. mov r12, QWORD PTR [rbx+96]
  2387. mov r13, QWORD PTR [rbx+104]
  2388. mov r14, QWORD PTR [rbx+112]
  2389. mov r10, QWORD PTR [rbx+120]
  2390. ; A[3] * B[8]
  2391. mulx rcx, rax, QWORD PTR [rbp+64]
  2392. adcx r11, rax
  2393. adox r12, rcx
  2394. ; A[3] * B[9]
  2395. mulx rcx, rax, QWORD PTR [rbp+72]
  2396. mov QWORD PTR [rbx+88], r11
  2397. adcx r12, rax
  2398. adox r13, rcx
  2399. ; A[3] * B[10]
  2400. mulx rcx, rax, QWORD PTR [rbp+80]
  2401. mov QWORD PTR [rbx+96], r12
  2402. adcx r13, rax
  2403. adox r14, rcx
  2404. ; A[3] * B[11]
  2405. mulx rcx, rax, QWORD PTR [rbp+88]
  2406. mov QWORD PTR [rbx+104], r13
  2407. adcx r14, rax
  2408. adox r10, rcx
  2409. mov QWORD PTR [rbx+112], r14
  2410. mov r11, QWORD PTR [r8]
  2411. mov r12, QWORD PTR [r8+8]
  2412. mov r13, QWORD PTR [r8+16]
  2413. ; A[3] * B[12]
  2414. mulx rcx, rax, QWORD PTR [rbp+96]
  2415. adcx r10, rax
  2416. adox r11, rcx
  2417. ; A[3] * B[13]
  2418. mulx rcx, rax, QWORD PTR [rbp+104]
  2419. mov QWORD PTR [rbx+120], r10
  2420. adcx r11, rax
  2421. adox r12, rcx
  2422. ; A[3] * B[14]
  2423. mulx rcx, rax, QWORD PTR [rbp+112]
  2424. mov QWORD PTR [r8], r11
  2425. adcx r12, rax
  2426. adox r13, rcx
  2427. ; A[3] * B[15]
  2428. mulx rcx, rax, QWORD PTR [rbp+120]
  2429. mov QWORD PTR [r8+8], r12
  2430. mov r14, rdi
  2431. adcx r13, rax
  2432. adox r14, rcx
  2433. adcx r14, r15
  2434. mov r15, rdi
  2435. adox r15, rdi
  2436. adcx r15, rdi
  2437. mov QWORD PTR [r8+16], r13
  2438. mov QWORD PTR [r8+24], r14
  2439. mov rdx, QWORD PTR [r9+32]
  2440. mov r14, QWORD PTR [rbx+32]
  2441. mov r10, QWORD PTR [rbx+40]
  2442. mov r11, QWORD PTR [rbx+48]
  2443. mov r12, QWORD PTR [rbx+56]
  2444. mov r13, QWORD PTR [rbx+64]
  2445. ; A[4] * B[0]
  2446. mulx rcx, rax, QWORD PTR [rbp]
  2447. adcx r14, rax
  2448. adox r10, rcx
  2449. ; A[4] * B[1]
  2450. mulx rcx, rax, QWORD PTR [rbp+8]
  2451. mov QWORD PTR [rbx+32], r14
  2452. adcx r10, rax
  2453. adox r11, rcx
  2454. ; A[4] * B[2]
  2455. mulx rcx, rax, QWORD PTR [rbp+16]
  2456. mov QWORD PTR [rbx+40], r10
  2457. adcx r11, rax
  2458. adox r12, rcx
  2459. ; A[4] * B[3]
  2460. mulx rcx, rax, QWORD PTR [rbp+24]
  2461. mov QWORD PTR [rbx+48], r11
  2462. adcx r12, rax
  2463. adox r13, rcx
  2464. mov QWORD PTR [rbx+56], r12
  2465. mov r14, QWORD PTR [rbx+72]
  2466. mov r10, QWORD PTR [rbx+80]
  2467. mov r11, QWORD PTR [rbx+88]
  2468. mov r12, QWORD PTR [rbx+96]
  2469. ; A[4] * B[4]
  2470. mulx rcx, rax, QWORD PTR [rbp+32]
  2471. adcx r13, rax
  2472. adox r14, rcx
  2473. ; A[4] * B[5]
  2474. mulx rcx, rax, QWORD PTR [rbp+40]
  2475. mov QWORD PTR [rbx+64], r13
  2476. adcx r14, rax
  2477. adox r10, rcx
  2478. ; A[4] * B[6]
  2479. mulx rcx, rax, QWORD PTR [rbp+48]
  2480. mov QWORD PTR [rbx+72], r14
  2481. adcx r10, rax
  2482. adox r11, rcx
  2483. ; A[4] * B[7]
  2484. mulx rcx, rax, QWORD PTR [rbp+56]
  2485. mov QWORD PTR [rbx+80], r10
  2486. adcx r11, rax
  2487. adox r12, rcx
  2488. mov QWORD PTR [rbx+88], r11
  2489. mov r13, QWORD PTR [rbx+104]
  2490. mov r14, QWORD PTR [rbx+112]
  2491. mov r10, QWORD PTR [rbx+120]
  2492. mov r11, QWORD PTR [r8]
  2493. ; A[4] * B[8]
  2494. mulx rcx, rax, QWORD PTR [rbp+64]
  2495. adcx r12, rax
  2496. adox r13, rcx
  2497. ; A[4] * B[9]
  2498. mulx rcx, rax, QWORD PTR [rbp+72]
  2499. mov QWORD PTR [rbx+96], r12
  2500. adcx r13, rax
  2501. adox r14, rcx
  2502. ; A[4] * B[10]
  2503. mulx rcx, rax, QWORD PTR [rbp+80]
  2504. mov QWORD PTR [rbx+104], r13
  2505. adcx r14, rax
  2506. adox r10, rcx
  2507. ; A[4] * B[11]
  2508. mulx rcx, rax, QWORD PTR [rbp+88]
  2509. mov QWORD PTR [rbx+112], r14
  2510. adcx r10, rax
  2511. adox r11, rcx
  2512. mov QWORD PTR [rbx+120], r10
  2513. mov r12, QWORD PTR [r8+8]
  2514. mov r13, QWORD PTR [r8+16]
  2515. mov r14, QWORD PTR [r8+24]
  2516. ; A[4] * B[12]
  2517. mulx rcx, rax, QWORD PTR [rbp+96]
  2518. adcx r11, rax
  2519. adox r12, rcx
  2520. ; A[4] * B[13]
  2521. mulx rcx, rax, QWORD PTR [rbp+104]
  2522. mov QWORD PTR [r8], r11
  2523. adcx r12, rax
  2524. adox r13, rcx
  2525. ; A[4] * B[14]
  2526. mulx rcx, rax, QWORD PTR [rbp+112]
  2527. mov QWORD PTR [r8+8], r12
  2528. adcx r13, rax
  2529. adox r14, rcx
  2530. ; A[4] * B[15]
  2531. mulx rcx, rax, QWORD PTR [rbp+120]
  2532. mov QWORD PTR [r8+16], r13
  2533. mov r10, rdi
  2534. adcx r14, rax
  2535. adox r10, rcx
  2536. adcx r10, r15
  2537. mov r15, rdi
  2538. adox r15, rdi
  2539. adcx r15, rdi
  2540. mov QWORD PTR [r8+24], r14
  2541. mov QWORD PTR [r8+32], r10
  2542. mov rdx, QWORD PTR [r9+40]
  2543. mov r10, QWORD PTR [rbx+40]
  2544. mov r11, QWORD PTR [rbx+48]
  2545. mov r12, QWORD PTR [rbx+56]
  2546. mov r13, QWORD PTR [rbx+64]
  2547. mov r14, QWORD PTR [rbx+72]
  2548. ; A[5] * B[0]
  2549. mulx rcx, rax, QWORD PTR [rbp]
  2550. adcx r10, rax
  2551. adox r11, rcx
  2552. ; A[5] * B[1]
  2553. mulx rcx, rax, QWORD PTR [rbp+8]
  2554. mov QWORD PTR [rbx+40], r10
  2555. adcx r11, rax
  2556. adox r12, rcx
  2557. ; A[5] * B[2]
  2558. mulx rcx, rax, QWORD PTR [rbp+16]
  2559. mov QWORD PTR [rbx+48], r11
  2560. adcx r12, rax
  2561. adox r13, rcx
  2562. ; A[5] * B[3]
  2563. mulx rcx, rax, QWORD PTR [rbp+24]
  2564. mov QWORD PTR [rbx+56], r12
  2565. adcx r13, rax
  2566. adox r14, rcx
  2567. mov QWORD PTR [rbx+64], r13
  2568. mov r10, QWORD PTR [rbx+80]
  2569. mov r11, QWORD PTR [rbx+88]
  2570. mov r12, QWORD PTR [rbx+96]
  2571. mov r13, QWORD PTR [rbx+104]
  2572. ; A[5] * B[4]
  2573. mulx rcx, rax, QWORD PTR [rbp+32]
  2574. adcx r14, rax
  2575. adox r10, rcx
  2576. ; A[5] * B[5]
  2577. mulx rcx, rax, QWORD PTR [rbp+40]
  2578. mov QWORD PTR [rbx+72], r14
  2579. adcx r10, rax
  2580. adox r11, rcx
  2581. ; A[5] * B[6]
  2582. mulx rcx, rax, QWORD PTR [rbp+48]
  2583. mov QWORD PTR [rbx+80], r10
  2584. adcx r11, rax
  2585. adox r12, rcx
  2586. ; A[5] * B[7]
  2587. mulx rcx, rax, QWORD PTR [rbp+56]
  2588. mov QWORD PTR [rbx+88], r11
  2589. adcx r12, rax
  2590. adox r13, rcx
  2591. mov QWORD PTR [rbx+96], r12
  2592. mov r14, QWORD PTR [rbx+112]
  2593. mov r10, QWORD PTR [rbx+120]
  2594. mov r11, QWORD PTR [r8]
  2595. mov r12, QWORD PTR [r8+8]
  2596. ; A[5] * B[8]
  2597. mulx rcx, rax, QWORD PTR [rbp+64]
  2598. adcx r13, rax
  2599. adox r14, rcx
  2600. ; A[5] * B[9]
  2601. mulx rcx, rax, QWORD PTR [rbp+72]
  2602. mov QWORD PTR [rbx+104], r13
  2603. adcx r14, rax
  2604. adox r10, rcx
  2605. ; A[5] * B[10]
  2606. mulx rcx, rax, QWORD PTR [rbp+80]
  2607. mov QWORD PTR [rbx+112], r14
  2608. adcx r10, rax
  2609. adox r11, rcx
  2610. ; A[5] * B[11]
  2611. mulx rcx, rax, QWORD PTR [rbp+88]
  2612. mov QWORD PTR [rbx+120], r10
  2613. adcx r11, rax
  2614. adox r12, rcx
  2615. mov QWORD PTR [r8], r11
  2616. mov r13, QWORD PTR [r8+16]
  2617. mov r14, QWORD PTR [r8+24]
  2618. mov r10, QWORD PTR [r8+32]
  2619. ; A[5] * B[12]
  2620. mulx rcx, rax, QWORD PTR [rbp+96]
  2621. adcx r12, rax
  2622. adox r13, rcx
  2623. ; A[5] * B[13]
  2624. mulx rcx, rax, QWORD PTR [rbp+104]
  2625. mov QWORD PTR [r8+8], r12
  2626. adcx r13, rax
  2627. adox r14, rcx
  2628. ; A[5] * B[14]
  2629. mulx rcx, rax, QWORD PTR [rbp+112]
  2630. mov QWORD PTR [r8+16], r13
  2631. adcx r14, rax
  2632. adox r10, rcx
  2633. ; A[5] * B[15]
  2634. mulx rcx, rax, QWORD PTR [rbp+120]
  2635. mov QWORD PTR [r8+24], r14
  2636. mov r11, rdi
  2637. adcx r10, rax
  2638. adox r11, rcx
  2639. adcx r11, r15
  2640. mov r15, rdi
  2641. adox r15, rdi
  2642. adcx r15, rdi
  2643. mov QWORD PTR [r8+32], r10
  2644. mov QWORD PTR [r8+40], r11
  2645. mov rdx, QWORD PTR [r9+48]
  2646. mov r11, QWORD PTR [rbx+48]
  2647. mov r12, QWORD PTR [rbx+56]
  2648. mov r13, QWORD PTR [rbx+64]
  2649. mov r14, QWORD PTR [rbx+72]
  2650. mov r10, QWORD PTR [rbx+80]
  2651. ; A[6] * B[0]
  2652. mulx rcx, rax, QWORD PTR [rbp]
  2653. adcx r11, rax
  2654. adox r12, rcx
  2655. ; A[6] * B[1]
  2656. mulx rcx, rax, QWORD PTR [rbp+8]
  2657. mov QWORD PTR [rbx+48], r11
  2658. adcx r12, rax
  2659. adox r13, rcx
  2660. ; A[6] * B[2]
  2661. mulx rcx, rax, QWORD PTR [rbp+16]
  2662. mov QWORD PTR [rbx+56], r12
  2663. adcx r13, rax
  2664. adox r14, rcx
  2665. ; A[6] * B[3]
  2666. mulx rcx, rax, QWORD PTR [rbp+24]
  2667. mov QWORD PTR [rbx+64], r13
  2668. adcx r14, rax
  2669. adox r10, rcx
  2670. mov QWORD PTR [rbx+72], r14
  2671. mov r11, QWORD PTR [rbx+88]
  2672. mov r12, QWORD PTR [rbx+96]
  2673. mov r13, QWORD PTR [rbx+104]
  2674. mov r14, QWORD PTR [rbx+112]
  2675. ; A[6] * B[4]
  2676. mulx rcx, rax, QWORD PTR [rbp+32]
  2677. adcx r10, rax
  2678. adox r11, rcx
  2679. ; A[6] * B[5]
  2680. mulx rcx, rax, QWORD PTR [rbp+40]
  2681. mov QWORD PTR [rbx+80], r10
  2682. adcx r11, rax
  2683. adox r12, rcx
  2684. ; A[6] * B[6]
  2685. mulx rcx, rax, QWORD PTR [rbp+48]
  2686. mov QWORD PTR [rbx+88], r11
  2687. adcx r12, rax
  2688. adox r13, rcx
  2689. ; A[6] * B[7]
  2690. mulx rcx, rax, QWORD PTR [rbp+56]
  2691. mov QWORD PTR [rbx+96], r12
  2692. adcx r13, rax
  2693. adox r14, rcx
  2694. mov QWORD PTR [rbx+104], r13
  2695. mov r10, QWORD PTR [rbx+120]
  2696. mov r11, QWORD PTR [r8]
  2697. mov r12, QWORD PTR [r8+8]
  2698. mov r13, QWORD PTR [r8+16]
  2699. ; A[6] * B[8]
  2700. mulx rcx, rax, QWORD PTR [rbp+64]
  2701. adcx r14, rax
  2702. adox r10, rcx
  2703. ; A[6] * B[9]
  2704. mulx rcx, rax, QWORD PTR [rbp+72]
  2705. mov QWORD PTR [rbx+112], r14
  2706. adcx r10, rax
  2707. adox r11, rcx
  2708. ; A[6] * B[10]
  2709. mulx rcx, rax, QWORD PTR [rbp+80]
  2710. mov QWORD PTR [rbx+120], r10
  2711. adcx r11, rax
  2712. adox r12, rcx
  2713. ; A[6] * B[11]
  2714. mulx rcx, rax, QWORD PTR [rbp+88]
  2715. mov QWORD PTR [r8], r11
  2716. adcx r12, rax
  2717. adox r13, rcx
  2718. mov QWORD PTR [r8+8], r12
  2719. mov r14, QWORD PTR [r8+24]
  2720. mov r10, QWORD PTR [r8+32]
  2721. mov r11, QWORD PTR [r8+40]
  2722. ; A[6] * B[12]
  2723. mulx rcx, rax, QWORD PTR [rbp+96]
  2724. adcx r13, rax
  2725. adox r14, rcx
  2726. ; A[6] * B[13]
  2727. mulx rcx, rax, QWORD PTR [rbp+104]
  2728. mov QWORD PTR [r8+16], r13
  2729. adcx r14, rax
  2730. adox r10, rcx
  2731. ; A[6] * B[14]
  2732. mulx rcx, rax, QWORD PTR [rbp+112]
  2733. mov QWORD PTR [r8+24], r14
  2734. adcx r10, rax
  2735. adox r11, rcx
  2736. ; A[6] * B[15]
  2737. mulx rcx, rax, QWORD PTR [rbp+120]
  2738. mov QWORD PTR [r8+32], r10
  2739. mov r12, rdi
  2740. adcx r11, rax
  2741. adox r12, rcx
  2742. adcx r12, r15
  2743. mov r15, rdi
  2744. adox r15, rdi
  2745. adcx r15, rdi
  2746. mov QWORD PTR [r8+40], r11
  2747. mov QWORD PTR [r8+48], r12
  2748. mov rdx, QWORD PTR [r9+56]
  2749. mov r12, QWORD PTR [rbx+56]
  2750. mov r13, QWORD PTR [rbx+64]
  2751. mov r14, QWORD PTR [rbx+72]
  2752. mov r10, QWORD PTR [rbx+80]
  2753. mov r11, QWORD PTR [rbx+88]
  2754. ; A[7] * B[0]
  2755. mulx rcx, rax, QWORD PTR [rbp]
  2756. adcx r12, rax
  2757. adox r13, rcx
  2758. ; A[7] * B[1]
  2759. mulx rcx, rax, QWORD PTR [rbp+8]
  2760. mov QWORD PTR [rbx+56], r12
  2761. adcx r13, rax
  2762. adox r14, rcx
  2763. ; A[7] * B[2]
  2764. mulx rcx, rax, QWORD PTR [rbp+16]
  2765. mov QWORD PTR [rbx+64], r13
  2766. adcx r14, rax
  2767. adox r10, rcx
  2768. ; A[7] * B[3]
  2769. mulx rcx, rax, QWORD PTR [rbp+24]
  2770. mov QWORD PTR [rbx+72], r14
  2771. adcx r10, rax
  2772. adox r11, rcx
  2773. mov QWORD PTR [rbx+80], r10
  2774. mov r12, QWORD PTR [rbx+96]
  2775. mov r13, QWORD PTR [rbx+104]
  2776. mov r14, QWORD PTR [rbx+112]
  2777. mov r10, QWORD PTR [rbx+120]
  2778. ; A[7] * B[4]
  2779. mulx rcx, rax, QWORD PTR [rbp+32]
  2780. adcx r11, rax
  2781. adox r12, rcx
  2782. ; A[7] * B[5]
  2783. mulx rcx, rax, QWORD PTR [rbp+40]
  2784. mov QWORD PTR [rbx+88], r11
  2785. adcx r12, rax
  2786. adox r13, rcx
  2787. ; A[7] * B[6]
  2788. mulx rcx, rax, QWORD PTR [rbp+48]
  2789. mov QWORD PTR [rbx+96], r12
  2790. adcx r13, rax
  2791. adox r14, rcx
  2792. ; A[7] * B[7]
  2793. mulx rcx, rax, QWORD PTR [rbp+56]
  2794. mov QWORD PTR [rbx+104], r13
  2795. adcx r14, rax
  2796. adox r10, rcx
  2797. mov QWORD PTR [rbx+112], r14
  2798. mov r11, QWORD PTR [r8]
  2799. mov r12, QWORD PTR [r8+8]
  2800. mov r13, QWORD PTR [r8+16]
  2801. mov r14, QWORD PTR [r8+24]
  2802. ; A[7] * B[8]
  2803. mulx rcx, rax, QWORD PTR [rbp+64]
  2804. adcx r10, rax
  2805. adox r11, rcx
  2806. ; A[7] * B[9]
  2807. mulx rcx, rax, QWORD PTR [rbp+72]
  2808. mov QWORD PTR [rbx+120], r10
  2809. adcx r11, rax
  2810. adox r12, rcx
  2811. ; A[7] * B[10]
  2812. mulx rcx, rax, QWORD PTR [rbp+80]
  2813. mov QWORD PTR [r8], r11
  2814. adcx r12, rax
  2815. adox r13, rcx
  2816. ; A[7] * B[11]
  2817. mulx rcx, rax, QWORD PTR [rbp+88]
  2818. mov QWORD PTR [r8+8], r12
  2819. adcx r13, rax
  2820. adox r14, rcx
  2821. mov QWORD PTR [r8+16], r13
  2822. mov r10, QWORD PTR [r8+32]
  2823. mov r11, QWORD PTR [r8+40]
  2824. mov r12, QWORD PTR [r8+48]
  2825. ; A[7] * B[12]
  2826. mulx rcx, rax, QWORD PTR [rbp+96]
  2827. adcx r14, rax
  2828. adox r10, rcx
  2829. ; A[7] * B[13]
  2830. mulx rcx, rax, QWORD PTR [rbp+104]
  2831. mov QWORD PTR [r8+24], r14
  2832. adcx r10, rax
  2833. adox r11, rcx
  2834. ; A[7] * B[14]
  2835. mulx rcx, rax, QWORD PTR [rbp+112]
  2836. mov QWORD PTR [r8+32], r10
  2837. adcx r11, rax
  2838. adox r12, rcx
  2839. ; A[7] * B[15]
  2840. mulx rcx, rax, QWORD PTR [rbp+120]
  2841. mov QWORD PTR [r8+40], r11
  2842. mov r13, rdi
  2843. adcx r12, rax
  2844. adox r13, rcx
  2845. adcx r13, r15
  2846. mov r15, rdi
  2847. adox r15, rdi
  2848. adcx r15, rdi
  2849. mov QWORD PTR [r8+48], r12
  2850. mov QWORD PTR [r8+56], r13
  2851. mov rdx, QWORD PTR [r9+64]
  2852. mov r13, QWORD PTR [rbx+64]
  2853. mov r14, QWORD PTR [rbx+72]
  2854. mov r10, QWORD PTR [rbx+80]
  2855. mov r11, QWORD PTR [rbx+88]
  2856. mov r12, QWORD PTR [rbx+96]
  2857. ; A[8] * B[0]
  2858. mulx rcx, rax, QWORD PTR [rbp]
  2859. adcx r13, rax
  2860. adox r14, rcx
  2861. ; A[8] * B[1]
  2862. mulx rcx, rax, QWORD PTR [rbp+8]
  2863. mov QWORD PTR [rbx+64], r13
  2864. adcx r14, rax
  2865. adox r10, rcx
  2866. ; A[8] * B[2]
  2867. mulx rcx, rax, QWORD PTR [rbp+16]
  2868. mov QWORD PTR [rbx+72], r14
  2869. adcx r10, rax
  2870. adox r11, rcx
  2871. ; A[8] * B[3]
  2872. mulx rcx, rax, QWORD PTR [rbp+24]
  2873. mov QWORD PTR [rbx+80], r10
  2874. adcx r11, rax
  2875. adox r12, rcx
  2876. mov QWORD PTR [rbx+88], r11
  2877. mov r13, QWORD PTR [rbx+104]
  2878. mov r14, QWORD PTR [rbx+112]
  2879. mov r10, QWORD PTR [rbx+120]
  2880. mov r11, QWORD PTR [r8]
  2881. ; A[8] * B[4]
  2882. mulx rcx, rax, QWORD PTR [rbp+32]
  2883. adcx r12, rax
  2884. adox r13, rcx
  2885. ; A[8] * B[5]
  2886. mulx rcx, rax, QWORD PTR [rbp+40]
  2887. mov QWORD PTR [rbx+96], r12
  2888. adcx r13, rax
  2889. adox r14, rcx
  2890. ; A[8] * B[6]
  2891. mulx rcx, rax, QWORD PTR [rbp+48]
  2892. mov QWORD PTR [rbx+104], r13
  2893. adcx r14, rax
  2894. adox r10, rcx
  2895. ; A[8] * B[7]
  2896. mulx rcx, rax, QWORD PTR [rbp+56]
  2897. mov QWORD PTR [rbx+112], r14
  2898. adcx r10, rax
  2899. adox r11, rcx
  2900. mov QWORD PTR [rbx+120], r10
  2901. mov r12, QWORD PTR [r8+8]
  2902. mov r13, QWORD PTR [r8+16]
  2903. mov r14, QWORD PTR [r8+24]
  2904. mov r10, QWORD PTR [r8+32]
  2905. ; A[8] * B[8]
  2906. mulx rcx, rax, QWORD PTR [rbp+64]
  2907. adcx r11, rax
  2908. adox r12, rcx
  2909. ; A[8] * B[9]
  2910. mulx rcx, rax, QWORD PTR [rbp+72]
  2911. mov QWORD PTR [r8], r11
  2912. adcx r12, rax
  2913. adox r13, rcx
  2914. ; A[8] * B[10]
  2915. mulx rcx, rax, QWORD PTR [rbp+80]
  2916. mov QWORD PTR [r8+8], r12
  2917. adcx r13, rax
  2918. adox r14, rcx
  2919. ; A[8] * B[11]
  2920. mulx rcx, rax, QWORD PTR [rbp+88]
  2921. mov QWORD PTR [r8+16], r13
  2922. adcx r14, rax
  2923. adox r10, rcx
  2924. mov QWORD PTR [r8+24], r14
  2925. mov r11, QWORD PTR [r8+40]
  2926. mov r12, QWORD PTR [r8+48]
  2927. mov r13, QWORD PTR [r8+56]
  2928. ; A[8] * B[12]
  2929. mulx rcx, rax, QWORD PTR [rbp+96]
  2930. adcx r10, rax
  2931. adox r11, rcx
  2932. ; A[8] * B[13]
  2933. mulx rcx, rax, QWORD PTR [rbp+104]
  2934. mov QWORD PTR [r8+32], r10
  2935. adcx r11, rax
  2936. adox r12, rcx
  2937. ; A[8] * B[14]
  2938. mulx rcx, rax, QWORD PTR [rbp+112]
  2939. mov QWORD PTR [r8+40], r11
  2940. adcx r12, rax
  2941. adox r13, rcx
  2942. ; A[8] * B[15]
  2943. mulx rcx, rax, QWORD PTR [rbp+120]
  2944. mov QWORD PTR [r8+48], r12
  2945. mov r14, rdi
  2946. adcx r13, rax
  2947. adox r14, rcx
  2948. adcx r14, r15
  2949. mov r15, rdi
  2950. adox r15, rdi
  2951. adcx r15, rdi
  2952. mov QWORD PTR [r8+56], r13
  2953. mov QWORD PTR [r8+64], r14
  2954. mov rdx, QWORD PTR [r9+72]
  2955. mov r14, QWORD PTR [rbx+72]
  2956. mov r10, QWORD PTR [rbx+80]
  2957. mov r11, QWORD PTR [rbx+88]
  2958. mov r12, QWORD PTR [rbx+96]
  2959. mov r13, QWORD PTR [rbx+104]
  2960. ; A[9] * B[0]
  2961. mulx rcx, rax, QWORD PTR [rbp]
  2962. adcx r14, rax
  2963. adox r10, rcx
  2964. ; A[9] * B[1]
  2965. mulx rcx, rax, QWORD PTR [rbp+8]
  2966. mov QWORD PTR [rbx+72], r14
  2967. adcx r10, rax
  2968. adox r11, rcx
  2969. ; A[9] * B[2]
  2970. mulx rcx, rax, QWORD PTR [rbp+16]
  2971. mov QWORD PTR [rbx+80], r10
  2972. adcx r11, rax
  2973. adox r12, rcx
  2974. ; A[9] * B[3]
  2975. mulx rcx, rax, QWORD PTR [rbp+24]
  2976. mov QWORD PTR [rbx+88], r11
  2977. adcx r12, rax
  2978. adox r13, rcx
  2979. mov QWORD PTR [rbx+96], r12
  2980. mov r14, QWORD PTR [rbx+112]
  2981. mov r10, QWORD PTR [rbx+120]
  2982. mov r11, QWORD PTR [r8]
  2983. mov r12, QWORD PTR [r8+8]
  2984. ; A[9] * B[4]
  2985. mulx rcx, rax, QWORD PTR [rbp+32]
  2986. adcx r13, rax
  2987. adox r14, rcx
  2988. ; A[9] * B[5]
  2989. mulx rcx, rax, QWORD PTR [rbp+40]
  2990. mov QWORD PTR [rbx+104], r13
  2991. adcx r14, rax
  2992. adox r10, rcx
  2993. ; A[9] * B[6]
  2994. mulx rcx, rax, QWORD PTR [rbp+48]
  2995. mov QWORD PTR [rbx+112], r14
  2996. adcx r10, rax
  2997. adox r11, rcx
  2998. ; A[9] * B[7]
  2999. mulx rcx, rax, QWORD PTR [rbp+56]
  3000. mov QWORD PTR [rbx+120], r10
  3001. adcx r11, rax
  3002. adox r12, rcx
  3003. mov QWORD PTR [r8], r11
  3004. mov r13, QWORD PTR [r8+16]
  3005. mov r14, QWORD PTR [r8+24]
  3006. mov r10, QWORD PTR [r8+32]
  3007. mov r11, QWORD PTR [r8+40]
  3008. ; A[9] * B[8]
  3009. mulx rcx, rax, QWORD PTR [rbp+64]
  3010. adcx r12, rax
  3011. adox r13, rcx
  3012. ; A[9] * B[9]
  3013. mulx rcx, rax, QWORD PTR [rbp+72]
  3014. mov QWORD PTR [r8+8], r12
  3015. adcx r13, rax
  3016. adox r14, rcx
  3017. ; A[9] * B[10]
  3018. mulx rcx, rax, QWORD PTR [rbp+80]
  3019. mov QWORD PTR [r8+16], r13
  3020. adcx r14, rax
  3021. adox r10, rcx
  3022. ; A[9] * B[11]
  3023. mulx rcx, rax, QWORD PTR [rbp+88]
  3024. mov QWORD PTR [r8+24], r14
  3025. adcx r10, rax
  3026. adox r11, rcx
  3027. mov QWORD PTR [r8+32], r10
  3028. mov r12, QWORD PTR [r8+48]
  3029. mov r13, QWORD PTR [r8+56]
  3030. mov r14, QWORD PTR [r8+64]
  3031. ; A[9] * B[12]
  3032. mulx rcx, rax, QWORD PTR [rbp+96]
  3033. adcx r11, rax
  3034. adox r12, rcx
  3035. ; A[9] * B[13]
  3036. mulx rcx, rax, QWORD PTR [rbp+104]
  3037. mov QWORD PTR [r8+40], r11
  3038. adcx r12, rax
  3039. adox r13, rcx
  3040. ; A[9] * B[14]
  3041. mulx rcx, rax, QWORD PTR [rbp+112]
  3042. mov QWORD PTR [r8+48], r12
  3043. adcx r13, rax
  3044. adox r14, rcx
  3045. ; A[9] * B[15]
  3046. mulx rcx, rax, QWORD PTR [rbp+120]
  3047. mov QWORD PTR [r8+56], r13
  3048. mov r10, rdi
  3049. adcx r14, rax
  3050. adox r10, rcx
  3051. adcx r10, r15
  3052. mov r15, rdi
  3053. adox r15, rdi
  3054. adcx r15, rdi
  3055. mov QWORD PTR [r8+64], r14
  3056. mov QWORD PTR [r8+72], r10
  3057. mov rdx, QWORD PTR [r9+80]
  3058. mov r10, QWORD PTR [rbx+80]
  3059. mov r11, QWORD PTR [rbx+88]
  3060. mov r12, QWORD PTR [rbx+96]
  3061. mov r13, QWORD PTR [rbx+104]
  3062. mov r14, QWORD PTR [rbx+112]
  3063. ; A[10] * B[0]
  3064. mulx rcx, rax, QWORD PTR [rbp]
  3065. adcx r10, rax
  3066. adox r11, rcx
  3067. ; A[10] * B[1]
  3068. mulx rcx, rax, QWORD PTR [rbp+8]
  3069. mov QWORD PTR [rbx+80], r10
  3070. adcx r11, rax
  3071. adox r12, rcx
  3072. ; A[10] * B[2]
  3073. mulx rcx, rax, QWORD PTR [rbp+16]
  3074. mov QWORD PTR [rbx+88], r11
  3075. adcx r12, rax
  3076. adox r13, rcx
  3077. ; A[10] * B[3]
  3078. mulx rcx, rax, QWORD PTR [rbp+24]
  3079. mov QWORD PTR [rbx+96], r12
  3080. adcx r13, rax
  3081. adox r14, rcx
  3082. mov QWORD PTR [rbx+104], r13
  3083. mov r10, QWORD PTR [rbx+120]
  3084. mov r11, QWORD PTR [r8]
  3085. mov r12, QWORD PTR [r8+8]
  3086. mov r13, QWORD PTR [r8+16]
  3087. ; A[10] * B[4]
  3088. mulx rcx, rax, QWORD PTR [rbp+32]
  3089. adcx r14, rax
  3090. adox r10, rcx
  3091. ; A[10] * B[5]
  3092. mulx rcx, rax, QWORD PTR [rbp+40]
  3093. mov QWORD PTR [rbx+112], r14
  3094. adcx r10, rax
  3095. adox r11, rcx
  3096. ; A[10] * B[6]
  3097. mulx rcx, rax, QWORD PTR [rbp+48]
  3098. mov QWORD PTR [rbx+120], r10
  3099. adcx r11, rax
  3100. adox r12, rcx
  3101. ; A[10] * B[7]
  3102. mulx rcx, rax, QWORD PTR [rbp+56]
  3103. mov QWORD PTR [r8], r11
  3104. adcx r12, rax
  3105. adox r13, rcx
  3106. mov QWORD PTR [r8+8], r12
  3107. mov r14, QWORD PTR [r8+24]
  3108. mov r10, QWORD PTR [r8+32]
  3109. mov r11, QWORD PTR [r8+40]
  3110. mov r12, QWORD PTR [r8+48]
  3111. ; A[10] * B[8]
  3112. mulx rcx, rax, QWORD PTR [rbp+64]
  3113. adcx r13, rax
  3114. adox r14, rcx
  3115. ; A[10] * B[9]
  3116. mulx rcx, rax, QWORD PTR [rbp+72]
  3117. mov QWORD PTR [r8+16], r13
  3118. adcx r14, rax
  3119. adox r10, rcx
  3120. ; A[10] * B[10]
  3121. mulx rcx, rax, QWORD PTR [rbp+80]
  3122. mov QWORD PTR [r8+24], r14
  3123. adcx r10, rax
  3124. adox r11, rcx
  3125. ; A[10] * B[11]
  3126. mulx rcx, rax, QWORD PTR [rbp+88]
  3127. mov QWORD PTR [r8+32], r10
  3128. adcx r11, rax
  3129. adox r12, rcx
  3130. mov QWORD PTR [r8+40], r11
  3131. mov r13, QWORD PTR [r8+56]
  3132. mov r14, QWORD PTR [r8+64]
  3133. mov r10, QWORD PTR [r8+72]
  3134. ; A[10] * B[12]
  3135. mulx rcx, rax, QWORD PTR [rbp+96]
  3136. adcx r12, rax
  3137. adox r13, rcx
  3138. ; A[10] * B[13]
  3139. mulx rcx, rax, QWORD PTR [rbp+104]
  3140. mov QWORD PTR [r8+48], r12
  3141. adcx r13, rax
  3142. adox r14, rcx
  3143. ; A[10] * B[14]
  3144. mulx rcx, rax, QWORD PTR [rbp+112]
  3145. mov QWORD PTR [r8+56], r13
  3146. adcx r14, rax
  3147. adox r10, rcx
  3148. ; A[10] * B[15]
  3149. mulx rcx, rax, QWORD PTR [rbp+120]
  3150. mov QWORD PTR [r8+64], r14
  3151. mov r11, rdi
  3152. adcx r10, rax
  3153. adox r11, rcx
  3154. adcx r11, r15
  3155. mov r15, rdi
  3156. adox r15, rdi
  3157. adcx r15, rdi
  3158. mov QWORD PTR [r8+72], r10
  3159. mov QWORD PTR [r8+80], r11
  3160. mov rdx, QWORD PTR [r9+88]
  3161. mov r11, QWORD PTR [rbx+88]
  3162. mov r12, QWORD PTR [rbx+96]
  3163. mov r13, QWORD PTR [rbx+104]
  3164. mov r14, QWORD PTR [rbx+112]
  3165. mov r10, QWORD PTR [rbx+120]
  3166. ; A[11] * B[0]
  3167. mulx rcx, rax, QWORD PTR [rbp]
  3168. adcx r11, rax
  3169. adox r12, rcx
  3170. ; A[11] * B[1]
  3171. mulx rcx, rax, QWORD PTR [rbp+8]
  3172. mov QWORD PTR [rbx+88], r11
  3173. adcx r12, rax
  3174. adox r13, rcx
  3175. ; A[11] * B[2]
  3176. mulx rcx, rax, QWORD PTR [rbp+16]
  3177. mov QWORD PTR [rbx+96], r12
  3178. adcx r13, rax
  3179. adox r14, rcx
  3180. ; A[11] * B[3]
  3181. mulx rcx, rax, QWORD PTR [rbp+24]
  3182. mov QWORD PTR [rbx+104], r13
  3183. adcx r14, rax
  3184. adox r10, rcx
  3185. mov QWORD PTR [rbx+112], r14
  3186. mov r11, QWORD PTR [r8]
  3187. mov r12, QWORD PTR [r8+8]
  3188. mov r13, QWORD PTR [r8+16]
  3189. mov r14, QWORD PTR [r8+24]
  3190. ; A[11] * B[4]
  3191. mulx rcx, rax, QWORD PTR [rbp+32]
  3192. adcx r10, rax
  3193. adox r11, rcx
  3194. ; A[11] * B[5]
  3195. mulx rcx, rax, QWORD PTR [rbp+40]
  3196. mov QWORD PTR [rbx+120], r10
  3197. adcx r11, rax
  3198. adox r12, rcx
  3199. ; A[11] * B[6]
  3200. mulx rcx, rax, QWORD PTR [rbp+48]
  3201. mov QWORD PTR [r8], r11
  3202. adcx r12, rax
  3203. adox r13, rcx
  3204. ; A[11] * B[7]
  3205. mulx rcx, rax, QWORD PTR [rbp+56]
  3206. mov QWORD PTR [r8+8], r12
  3207. adcx r13, rax
  3208. adox r14, rcx
  3209. mov QWORD PTR [r8+16], r13
  3210. mov r10, QWORD PTR [r8+32]
  3211. mov r11, QWORD PTR [r8+40]
  3212. mov r12, QWORD PTR [r8+48]
  3213. mov r13, QWORD PTR [r8+56]
  3214. ; A[11] * B[8]
  3215. mulx rcx, rax, QWORD PTR [rbp+64]
  3216. adcx r14, rax
  3217. adox r10, rcx
  3218. ; A[11] * B[9]
  3219. mulx rcx, rax, QWORD PTR [rbp+72]
  3220. mov QWORD PTR [r8+24], r14
  3221. adcx r10, rax
  3222. adox r11, rcx
  3223. ; A[11] * B[10]
  3224. mulx rcx, rax, QWORD PTR [rbp+80]
  3225. mov QWORD PTR [r8+32], r10
  3226. adcx r11, rax
  3227. adox r12, rcx
  3228. ; A[11] * B[11]
  3229. mulx rcx, rax, QWORD PTR [rbp+88]
  3230. mov QWORD PTR [r8+40], r11
  3231. adcx r12, rax
  3232. adox r13, rcx
  3233. mov QWORD PTR [r8+48], r12
  3234. mov r14, QWORD PTR [r8+64]
  3235. mov r10, QWORD PTR [r8+72]
  3236. mov r11, QWORD PTR [r8+80]
  3237. ; A[11] * B[12]
  3238. mulx rcx, rax, QWORD PTR [rbp+96]
  3239. adcx r13, rax
  3240. adox r14, rcx
  3241. ; A[11] * B[13]
  3242. mulx rcx, rax, QWORD PTR [rbp+104]
  3243. mov QWORD PTR [r8+56], r13
  3244. adcx r14, rax
  3245. adox r10, rcx
  3246. ; A[11] * B[14]
  3247. mulx rcx, rax, QWORD PTR [rbp+112]
  3248. mov QWORD PTR [r8+64], r14
  3249. adcx r10, rax
  3250. adox r11, rcx
  3251. ; A[11] * B[15]
  3252. mulx rcx, rax, QWORD PTR [rbp+120]
  3253. mov QWORD PTR [r8+72], r10
  3254. mov r12, rdi
  3255. adcx r11, rax
  3256. adox r12, rcx
  3257. adcx r12, r15
  3258. mov r15, rdi
  3259. adox r15, rdi
  3260. adcx r15, rdi
  3261. mov QWORD PTR [r8+80], r11
  3262. mov QWORD PTR [r8+88], r12
  3263. mov rdx, QWORD PTR [r9+96]
  3264. mov r12, QWORD PTR [rbx+96]
  3265. mov r13, QWORD PTR [rbx+104]
  3266. mov r14, QWORD PTR [rbx+112]
  3267. mov r10, QWORD PTR [rbx+120]
  3268. mov r11, QWORD PTR [r8]
  3269. ; A[12] * B[0]
  3270. mulx rcx, rax, QWORD PTR [rbp]
  3271. adcx r12, rax
  3272. adox r13, rcx
  3273. ; A[12] * B[1]
  3274. mulx rcx, rax, QWORD PTR [rbp+8]
  3275. mov QWORD PTR [rbx+96], r12
  3276. adcx r13, rax
  3277. adox r14, rcx
  3278. ; A[12] * B[2]
  3279. mulx rcx, rax, QWORD PTR [rbp+16]
  3280. mov QWORD PTR [rbx+104], r13
  3281. adcx r14, rax
  3282. adox r10, rcx
  3283. ; A[12] * B[3]
  3284. mulx rcx, rax, QWORD PTR [rbp+24]
  3285. mov QWORD PTR [rbx+112], r14
  3286. adcx r10, rax
  3287. adox r11, rcx
  3288. mov QWORD PTR [rbx+120], r10
  3289. mov r12, QWORD PTR [r8+8]
  3290. mov r13, QWORD PTR [r8+16]
  3291. mov r14, QWORD PTR [r8+24]
  3292. mov r10, QWORD PTR [r8+32]
  3293. ; A[12] * B[4]
  3294. mulx rcx, rax, QWORD PTR [rbp+32]
  3295. adcx r11, rax
  3296. adox r12, rcx
  3297. ; A[12] * B[5]
  3298. mulx rcx, rax, QWORD PTR [rbp+40]
  3299. mov QWORD PTR [r8], r11
  3300. adcx r12, rax
  3301. adox r13, rcx
  3302. ; A[12] * B[6]
  3303. mulx rcx, rax, QWORD PTR [rbp+48]
  3304. mov QWORD PTR [r8+8], r12
  3305. adcx r13, rax
  3306. adox r14, rcx
  3307. ; A[12] * B[7]
  3308. mulx rcx, rax, QWORD PTR [rbp+56]
  3309. mov QWORD PTR [r8+16], r13
  3310. adcx r14, rax
  3311. adox r10, rcx
  3312. mov QWORD PTR [r8+24], r14
  3313. mov r11, QWORD PTR [r8+40]
  3314. mov r12, QWORD PTR [r8+48]
  3315. mov r13, QWORD PTR [r8+56]
  3316. mov r14, QWORD PTR [r8+64]
  3317. ; A[12] * B[8]
  3318. mulx rcx, rax, QWORD PTR [rbp+64]
  3319. adcx r10, rax
  3320. adox r11, rcx
  3321. ; A[12] * B[9]
  3322. mulx rcx, rax, QWORD PTR [rbp+72]
  3323. mov QWORD PTR [r8+32], r10
  3324. adcx r11, rax
  3325. adox r12, rcx
  3326. ; A[12] * B[10]
  3327. mulx rcx, rax, QWORD PTR [rbp+80]
  3328. mov QWORD PTR [r8+40], r11
  3329. adcx r12, rax
  3330. adox r13, rcx
  3331. ; A[12] * B[11]
  3332. mulx rcx, rax, QWORD PTR [rbp+88]
  3333. mov QWORD PTR [r8+48], r12
  3334. adcx r13, rax
  3335. adox r14, rcx
  3336. mov QWORD PTR [r8+56], r13
  3337. mov r10, QWORD PTR [r8+72]
  3338. mov r11, QWORD PTR [r8+80]
  3339. mov r12, QWORD PTR [r8+88]
  3340. ; A[12] * B[12]
  3341. mulx rcx, rax, QWORD PTR [rbp+96]
  3342. adcx r14, rax
  3343. adox r10, rcx
  3344. ; A[12] * B[13]
  3345. mulx rcx, rax, QWORD PTR [rbp+104]
  3346. mov QWORD PTR [r8+64], r14
  3347. adcx r10, rax
  3348. adox r11, rcx
  3349. ; A[12] * B[14]
  3350. mulx rcx, rax, QWORD PTR [rbp+112]
  3351. mov QWORD PTR [r8+72], r10
  3352. adcx r11, rax
  3353. adox r12, rcx
  3354. ; A[12] * B[15]
  3355. mulx rcx, rax, QWORD PTR [rbp+120]
  3356. mov QWORD PTR [r8+80], r11
  3357. mov r13, rdi
  3358. adcx r12, rax
  3359. adox r13, rcx
  3360. adcx r13, r15
  3361. mov r15, rdi
  3362. adox r15, rdi
  3363. adcx r15, rdi
  3364. mov QWORD PTR [r8+88], r12
  3365. mov QWORD PTR [r8+96], r13
  3366. mov rdx, QWORD PTR [r9+104]
  3367. mov r13, QWORD PTR [rbx+104]
  3368. mov r14, QWORD PTR [rbx+112]
  3369. mov r10, QWORD PTR [rbx+120]
  3370. mov r11, QWORD PTR [r8]
  3371. mov r12, QWORD PTR [r8+8]
  3372. ; A[13] * B[0]
  3373. mulx rcx, rax, QWORD PTR [rbp]
  3374. adcx r13, rax
  3375. adox r14, rcx
  3376. ; A[13] * B[1]
  3377. mulx rcx, rax, QWORD PTR [rbp+8]
  3378. mov QWORD PTR [rbx+104], r13
  3379. adcx r14, rax
  3380. adox r10, rcx
  3381. ; A[13] * B[2]
  3382. mulx rcx, rax, QWORD PTR [rbp+16]
  3383. mov QWORD PTR [rbx+112], r14
  3384. adcx r10, rax
  3385. adox r11, rcx
  3386. ; A[13] * B[3]
  3387. mulx rcx, rax, QWORD PTR [rbp+24]
  3388. mov QWORD PTR [rbx+120], r10
  3389. adcx r11, rax
  3390. adox r12, rcx
  3391. mov QWORD PTR [r8], r11
  3392. mov r13, QWORD PTR [r8+16]
  3393. mov r14, QWORD PTR [r8+24]
  3394. mov r10, QWORD PTR [r8+32]
  3395. mov r11, QWORD PTR [r8+40]
  3396. ; A[13] * B[4]
  3397. mulx rcx, rax, QWORD PTR [rbp+32]
  3398. adcx r12, rax
  3399. adox r13, rcx
  3400. ; A[13] * B[5]
  3401. mulx rcx, rax, QWORD PTR [rbp+40]
  3402. mov QWORD PTR [r8+8], r12
  3403. adcx r13, rax
  3404. adox r14, rcx
  3405. ; A[13] * B[6]
  3406. mulx rcx, rax, QWORD PTR [rbp+48]
  3407. mov QWORD PTR [r8+16], r13
  3408. adcx r14, rax
  3409. adox r10, rcx
  3410. ; A[13] * B[7]
  3411. mulx rcx, rax, QWORD PTR [rbp+56]
  3412. mov QWORD PTR [r8+24], r14
  3413. adcx r10, rax
  3414. adox r11, rcx
  3415. mov QWORD PTR [r8+32], r10
  3416. mov r12, QWORD PTR [r8+48]
  3417. mov r13, QWORD PTR [r8+56]
  3418. mov r14, QWORD PTR [r8+64]
  3419. mov r10, QWORD PTR [r8+72]
  3420. ; A[13] * B[8]
  3421. mulx rcx, rax, QWORD PTR [rbp+64]
  3422. adcx r11, rax
  3423. adox r12, rcx
  3424. ; A[13] * B[9]
  3425. mulx rcx, rax, QWORD PTR [rbp+72]
  3426. mov QWORD PTR [r8+40], r11
  3427. adcx r12, rax
  3428. adox r13, rcx
  3429. ; A[13] * B[10]
  3430. mulx rcx, rax, QWORD PTR [rbp+80]
  3431. mov QWORD PTR [r8+48], r12
  3432. adcx r13, rax
  3433. adox r14, rcx
  3434. ; A[13] * B[11]
  3435. mulx rcx, rax, QWORD PTR [rbp+88]
  3436. mov QWORD PTR [r8+56], r13
  3437. adcx r14, rax
  3438. adox r10, rcx
  3439. mov QWORD PTR [r8+64], r14
  3440. mov r11, QWORD PTR [r8+80]
  3441. mov r12, QWORD PTR [r8+88]
  3442. mov r13, QWORD PTR [r8+96]
  3443. ; A[13] * B[12]
  3444. mulx rcx, rax, QWORD PTR [rbp+96]
  3445. adcx r10, rax
  3446. adox r11, rcx
  3447. ; A[13] * B[13]
  3448. mulx rcx, rax, QWORD PTR [rbp+104]
  3449. mov QWORD PTR [r8+72], r10
  3450. adcx r11, rax
  3451. adox r12, rcx
  3452. ; A[13] * B[14]
  3453. mulx rcx, rax, QWORD PTR [rbp+112]
  3454. mov QWORD PTR [r8+80], r11
  3455. adcx r12, rax
  3456. adox r13, rcx
  3457. ; A[13] * B[15]
  3458. mulx rcx, rax, QWORD PTR [rbp+120]
  3459. mov QWORD PTR [r8+88], r12
  3460. mov r14, rdi
  3461. adcx r13, rax
  3462. adox r14, rcx
  3463. adcx r14, r15
  3464. mov r15, rdi
  3465. adox r15, rdi
  3466. adcx r15, rdi
  3467. mov QWORD PTR [r8+96], r13
  3468. mov QWORD PTR [r8+104], r14
  3469. mov rdx, QWORD PTR [r9+112]
  3470. mov r14, QWORD PTR [rbx+112]
  3471. mov r10, QWORD PTR [rbx+120]
  3472. mov r11, QWORD PTR [r8]
  3473. mov r12, QWORD PTR [r8+8]
  3474. mov r13, QWORD PTR [r8+16]
  3475. ; A[14] * B[0]
  3476. mulx rcx, rax, QWORD PTR [rbp]
  3477. adcx r14, rax
  3478. adox r10, rcx
  3479. ; A[14] * B[1]
  3480. mulx rcx, rax, QWORD PTR [rbp+8]
  3481. mov QWORD PTR [rbx+112], r14
  3482. adcx r10, rax
  3483. adox r11, rcx
  3484. ; A[14] * B[2]
  3485. mulx rcx, rax, QWORD PTR [rbp+16]
  3486. mov QWORD PTR [rbx+120], r10
  3487. adcx r11, rax
  3488. adox r12, rcx
  3489. ; A[14] * B[3]
  3490. mulx rcx, rax, QWORD PTR [rbp+24]
  3491. mov QWORD PTR [r8], r11
  3492. adcx r12, rax
  3493. adox r13, rcx
  3494. mov QWORD PTR [r8+8], r12
  3495. mov r14, QWORD PTR [r8+24]
  3496. mov r10, QWORD PTR [r8+32]
  3497. mov r11, QWORD PTR [r8+40]
  3498. mov r12, QWORD PTR [r8+48]
  3499. ; A[14] * B[4]
  3500. mulx rcx, rax, QWORD PTR [rbp+32]
  3501. adcx r13, rax
  3502. adox r14, rcx
  3503. ; A[14] * B[5]
  3504. mulx rcx, rax, QWORD PTR [rbp+40]
  3505. mov QWORD PTR [r8+16], r13
  3506. adcx r14, rax
  3507. adox r10, rcx
  3508. ; A[14] * B[6]
  3509. mulx rcx, rax, QWORD PTR [rbp+48]
  3510. mov QWORD PTR [r8+24], r14
  3511. adcx r10, rax
  3512. adox r11, rcx
  3513. ; A[14] * B[7]
  3514. mulx rcx, rax, QWORD PTR [rbp+56]
  3515. mov QWORD PTR [r8+32], r10
  3516. adcx r11, rax
  3517. adox r12, rcx
  3518. mov QWORD PTR [r8+40], r11
  3519. mov r13, QWORD PTR [r8+56]
  3520. mov r14, QWORD PTR [r8+64]
  3521. mov r10, QWORD PTR [r8+72]
  3522. mov r11, QWORD PTR [r8+80]
  3523. ; A[14] * B[8]
  3524. mulx rcx, rax, QWORD PTR [rbp+64]
  3525. adcx r12, rax
  3526. adox r13, rcx
  3527. ; A[14] * B[9]
  3528. mulx rcx, rax, QWORD PTR [rbp+72]
  3529. mov QWORD PTR [r8+48], r12
  3530. adcx r13, rax
  3531. adox r14, rcx
  3532. ; A[14] * B[10]
  3533. mulx rcx, rax, QWORD PTR [rbp+80]
  3534. mov QWORD PTR [r8+56], r13
  3535. adcx r14, rax
  3536. adox r10, rcx
  3537. ; A[14] * B[11]
  3538. mulx rcx, rax, QWORD PTR [rbp+88]
  3539. mov QWORD PTR [r8+64], r14
  3540. adcx r10, rax
  3541. adox r11, rcx
  3542. mov QWORD PTR [r8+72], r10
  3543. mov r12, QWORD PTR [r8+88]
  3544. mov r13, QWORD PTR [r8+96]
  3545. mov r14, QWORD PTR [r8+104]
  3546. ; A[14] * B[12]
  3547. mulx rcx, rax, QWORD PTR [rbp+96]
  3548. adcx r11, rax
  3549. adox r12, rcx
  3550. ; A[14] * B[13]
  3551. mulx rcx, rax, QWORD PTR [rbp+104]
  3552. mov QWORD PTR [r8+80], r11
  3553. adcx r12, rax
  3554. adox r13, rcx
  3555. ; A[14] * B[14]
  3556. mulx rcx, rax, QWORD PTR [rbp+112]
  3557. mov QWORD PTR [r8+88], r12
  3558. adcx r13, rax
  3559. adox r14, rcx
  3560. ; A[14] * B[15]
  3561. mulx rcx, rax, QWORD PTR [rbp+120]
  3562. mov QWORD PTR [r8+96], r13
  3563. mov r10, rdi
  3564. adcx r14, rax
  3565. adox r10, rcx
  3566. adcx r10, r15
  3567. mov r15, rdi
  3568. adox r15, rdi
  3569. adcx r15, rdi
  3570. mov QWORD PTR [r8+104], r14
  3571. mov QWORD PTR [r8+112], r10
  3572. mov rdx, QWORD PTR [r9+120]
  3573. mov r10, QWORD PTR [rbx+120]
  3574. mov r11, QWORD PTR [r8]
  3575. mov r12, QWORD PTR [r8+8]
  3576. mov r13, QWORD PTR [r8+16]
  3577. mov r14, QWORD PTR [r8+24]
  3578. ; A[15] * B[0]
  3579. mulx rcx, rax, QWORD PTR [rbp]
  3580. adcx r10, rax
  3581. adox r11, rcx
  3582. ; A[15] * B[1]
  3583. mulx rcx, rax, QWORD PTR [rbp+8]
  3584. mov QWORD PTR [rbx+120], r10
  3585. adcx r11, rax
  3586. adox r12, rcx
  3587. ; A[15] * B[2]
  3588. mulx rcx, rax, QWORD PTR [rbp+16]
  3589. mov QWORD PTR [r8], r11
  3590. adcx r12, rax
  3591. adox r13, rcx
  3592. ; A[15] * B[3]
  3593. mulx rcx, rax, QWORD PTR [rbp+24]
  3594. mov QWORD PTR [r8+8], r12
  3595. adcx r13, rax
  3596. adox r14, rcx
  3597. mov QWORD PTR [r8+16], r13
  3598. mov r10, QWORD PTR [r8+32]
  3599. mov r11, QWORD PTR [r8+40]
  3600. mov r12, QWORD PTR [r8+48]
  3601. mov r13, QWORD PTR [r8+56]
  3602. ; A[15] * B[4]
  3603. mulx rcx, rax, QWORD PTR [rbp+32]
  3604. adcx r14, rax
  3605. adox r10, rcx
  3606. ; A[15] * B[5]
  3607. mulx rcx, rax, QWORD PTR [rbp+40]
  3608. mov QWORD PTR [r8+24], r14
  3609. adcx r10, rax
  3610. adox r11, rcx
  3611. ; A[15] * B[6]
  3612. mulx rcx, rax, QWORD PTR [rbp+48]
  3613. mov QWORD PTR [r8+32], r10
  3614. adcx r11, rax
  3615. adox r12, rcx
  3616. ; A[15] * B[7]
  3617. mulx rcx, rax, QWORD PTR [rbp+56]
  3618. mov QWORD PTR [r8+40], r11
  3619. adcx r12, rax
  3620. adox r13, rcx
  3621. mov QWORD PTR [r8+48], r12
  3622. mov r14, QWORD PTR [r8+64]
  3623. mov r10, QWORD PTR [r8+72]
  3624. mov r11, QWORD PTR [r8+80]
  3625. mov r12, QWORD PTR [r8+88]
  3626. ; A[15] * B[8]
  3627. mulx rcx, rax, QWORD PTR [rbp+64]
  3628. adcx r13, rax
  3629. adox r14, rcx
  3630. ; A[15] * B[9]
  3631. mulx rcx, rax, QWORD PTR [rbp+72]
  3632. mov QWORD PTR [r8+56], r13
  3633. adcx r14, rax
  3634. adox r10, rcx
  3635. ; A[15] * B[10]
  3636. mulx rcx, rax, QWORD PTR [rbp+80]
  3637. mov QWORD PTR [r8+64], r14
  3638. adcx r10, rax
  3639. adox r11, rcx
  3640. ; A[15] * B[11]
  3641. mulx rcx, rax, QWORD PTR [rbp+88]
  3642. mov QWORD PTR [r8+72], r10
  3643. adcx r11, rax
  3644. adox r12, rcx
  3645. mov QWORD PTR [r8+80], r11
  3646. mov r13, QWORD PTR [r8+96]
  3647. mov r14, QWORD PTR [r8+104]
  3648. mov r10, QWORD PTR [r8+112]
  3649. ; A[15] * B[12]
  3650. mulx rcx, rax, QWORD PTR [rbp+96]
  3651. adcx r12, rax
  3652. adox r13, rcx
  3653. ; A[15] * B[13]
  3654. mulx rcx, rax, QWORD PTR [rbp+104]
  3655. mov QWORD PTR [r8+88], r12
  3656. adcx r13, rax
  3657. adox r14, rcx
  3658. ; A[15] * B[14]
  3659. mulx rcx, rax, QWORD PTR [rbp+112]
  3660. mov QWORD PTR [r8+96], r13
  3661. adcx r14, rax
  3662. adox r10, rcx
  3663. ; A[15] * B[15]
  3664. mulx rcx, rax, QWORD PTR [rbp+120]
  3665. mov QWORD PTR [r8+104], r14
  3666. mov r11, rdi
  3667. adcx r10, rax
  3668. adox r11, rcx
  3669. adcx r11, r15
  3670. mov QWORD PTR [r8+112], r10
  3671. mov QWORD PTR [r8+120], r11
  3672. sub r8, 128
  3673. cmp r9, r8
  3674. je L_start_2048_mul_avx2_16
  3675. cmp rbp, r8
  3676. jne L_end_2048_mul_avx2_16
  3677. L_start_2048_mul_avx2_16:
  3678. vmovdqu xmm0, OWORD PTR [rbx]
  3679. vmovups OWORD PTR [r8], xmm0
  3680. vmovdqu xmm0, OWORD PTR [rbx+16]
  3681. vmovups OWORD PTR [r8+16], xmm0
  3682. vmovdqu xmm0, OWORD PTR [rbx+32]
  3683. vmovups OWORD PTR [r8+32], xmm0
  3684. vmovdqu xmm0, OWORD PTR [rbx+48]
  3685. vmovups OWORD PTR [r8+48], xmm0
  3686. vmovdqu xmm0, OWORD PTR [rbx+64]
  3687. vmovups OWORD PTR [r8+64], xmm0
  3688. vmovdqu xmm0, OWORD PTR [rbx+80]
  3689. vmovups OWORD PTR [r8+80], xmm0
  3690. vmovdqu xmm0, OWORD PTR [rbx+96]
  3691. vmovups OWORD PTR [r8+96], xmm0
  3692. vmovdqu xmm0, OWORD PTR [rbx+112]
  3693. vmovups OWORD PTR [r8+112], xmm0
  3694. L_end_2048_mul_avx2_16:
  3695. add rsp, 128
  3696. pop rdi
  3697. pop r15
  3698. pop r14
  3699. pop r13
  3700. pop r12
  3701. pop rbp
  3702. pop rbx
  3703. ret
  3704. sp_2048_mul_avx2_16 ENDP
  3705. _text ENDS
  3706. ENDIF
  3707. ; /* Add b to a into r. (r = a + b)
  3708. ; *
  3709. ; * r A single precision integer.
  3710. ; * a A single precision integer.
  3711. ; * b A single precision integer.
  3712. ; */
  3713. _text SEGMENT READONLY PARA
  3714. sp_2048_add_16 PROC
  3715. ; Add
  3716. mov r9, QWORD PTR [rdx]
  3717. xor rax, rax
  3718. add r9, QWORD PTR [r8]
  3719. mov r10, QWORD PTR [rdx+8]
  3720. mov QWORD PTR [rcx], r9
  3721. adc r10, QWORD PTR [r8+8]
  3722. mov r9, QWORD PTR [rdx+16]
  3723. mov QWORD PTR [rcx+8], r10
  3724. adc r9, QWORD PTR [r8+16]
  3725. mov r10, QWORD PTR [rdx+24]
  3726. mov QWORD PTR [rcx+16], r9
  3727. adc r10, QWORD PTR [r8+24]
  3728. mov r9, QWORD PTR [rdx+32]
  3729. mov QWORD PTR [rcx+24], r10
  3730. adc r9, QWORD PTR [r8+32]
  3731. mov r10, QWORD PTR [rdx+40]
  3732. mov QWORD PTR [rcx+32], r9
  3733. adc r10, QWORD PTR [r8+40]
  3734. mov r9, QWORD PTR [rdx+48]
  3735. mov QWORD PTR [rcx+40], r10
  3736. adc r9, QWORD PTR [r8+48]
  3737. mov r10, QWORD PTR [rdx+56]
  3738. mov QWORD PTR [rcx+48], r9
  3739. adc r10, QWORD PTR [r8+56]
  3740. mov r9, QWORD PTR [rdx+64]
  3741. mov QWORD PTR [rcx+56], r10
  3742. adc r9, QWORD PTR [r8+64]
  3743. mov r10, QWORD PTR [rdx+72]
  3744. mov QWORD PTR [rcx+64], r9
  3745. adc r10, QWORD PTR [r8+72]
  3746. mov r9, QWORD PTR [rdx+80]
  3747. mov QWORD PTR [rcx+72], r10
  3748. adc r9, QWORD PTR [r8+80]
  3749. mov r10, QWORD PTR [rdx+88]
  3750. mov QWORD PTR [rcx+80], r9
  3751. adc r10, QWORD PTR [r8+88]
  3752. mov r9, QWORD PTR [rdx+96]
  3753. mov QWORD PTR [rcx+88], r10
  3754. adc r9, QWORD PTR [r8+96]
  3755. mov r10, QWORD PTR [rdx+104]
  3756. mov QWORD PTR [rcx+96], r9
  3757. adc r10, QWORD PTR [r8+104]
  3758. mov r9, QWORD PTR [rdx+112]
  3759. mov QWORD PTR [rcx+104], r10
  3760. adc r9, QWORD PTR [r8+112]
  3761. mov r10, QWORD PTR [rdx+120]
  3762. mov QWORD PTR [rcx+112], r9
  3763. adc r10, QWORD PTR [r8+120]
  3764. mov QWORD PTR [rcx+120], r10
  3765. adc rax, 0
  3766. ret
  3767. sp_2048_add_16 ENDP
  3768. _text ENDS
  3769. ; /* Sub b from a into a. (a -= b)
  3770. ; *
  3771. ; * a A single precision integer and result.
  3772. ; * b A single precision integer.
  3773. ; */
  3774. _text SEGMENT READONLY PARA
  3775. sp_2048_sub_in_place_32 PROC
  3776. mov r8, QWORD PTR [rcx]
  3777. sub r8, QWORD PTR [rdx]
  3778. mov r9, QWORD PTR [rcx+8]
  3779. mov QWORD PTR [rcx], r8
  3780. sbb r9, QWORD PTR [rdx+8]
  3781. mov r8, QWORD PTR [rcx+16]
  3782. mov QWORD PTR [rcx+8], r9
  3783. sbb r8, QWORD PTR [rdx+16]
  3784. mov r9, QWORD PTR [rcx+24]
  3785. mov QWORD PTR [rcx+16], r8
  3786. sbb r9, QWORD PTR [rdx+24]
  3787. mov r8, QWORD PTR [rcx+32]
  3788. mov QWORD PTR [rcx+24], r9
  3789. sbb r8, QWORD PTR [rdx+32]
  3790. mov r9, QWORD PTR [rcx+40]
  3791. mov QWORD PTR [rcx+32], r8
  3792. sbb r9, QWORD PTR [rdx+40]
  3793. mov r8, QWORD PTR [rcx+48]
  3794. mov QWORD PTR [rcx+40], r9
  3795. sbb r8, QWORD PTR [rdx+48]
  3796. mov r9, QWORD PTR [rcx+56]
  3797. mov QWORD PTR [rcx+48], r8
  3798. sbb r9, QWORD PTR [rdx+56]
  3799. mov r8, QWORD PTR [rcx+64]
  3800. mov QWORD PTR [rcx+56], r9
  3801. sbb r8, QWORD PTR [rdx+64]
  3802. mov r9, QWORD PTR [rcx+72]
  3803. mov QWORD PTR [rcx+64], r8
  3804. sbb r9, QWORD PTR [rdx+72]
  3805. mov r8, QWORD PTR [rcx+80]
  3806. mov QWORD PTR [rcx+72], r9
  3807. sbb r8, QWORD PTR [rdx+80]
  3808. mov r9, QWORD PTR [rcx+88]
  3809. mov QWORD PTR [rcx+80], r8
  3810. sbb r9, QWORD PTR [rdx+88]
  3811. mov r8, QWORD PTR [rcx+96]
  3812. mov QWORD PTR [rcx+88], r9
  3813. sbb r8, QWORD PTR [rdx+96]
  3814. mov r9, QWORD PTR [rcx+104]
  3815. mov QWORD PTR [rcx+96], r8
  3816. sbb r9, QWORD PTR [rdx+104]
  3817. mov r8, QWORD PTR [rcx+112]
  3818. mov QWORD PTR [rcx+104], r9
  3819. sbb r8, QWORD PTR [rdx+112]
  3820. mov r9, QWORD PTR [rcx+120]
  3821. mov QWORD PTR [rcx+112], r8
  3822. sbb r9, QWORD PTR [rdx+120]
  3823. mov r8, QWORD PTR [rcx+128]
  3824. mov QWORD PTR [rcx+120], r9
  3825. sbb r8, QWORD PTR [rdx+128]
  3826. mov r9, QWORD PTR [rcx+136]
  3827. mov QWORD PTR [rcx+128], r8
  3828. sbb r9, QWORD PTR [rdx+136]
  3829. mov r8, QWORD PTR [rcx+144]
  3830. mov QWORD PTR [rcx+136], r9
  3831. sbb r8, QWORD PTR [rdx+144]
  3832. mov r9, QWORD PTR [rcx+152]
  3833. mov QWORD PTR [rcx+144], r8
  3834. sbb r9, QWORD PTR [rdx+152]
  3835. mov r8, QWORD PTR [rcx+160]
  3836. mov QWORD PTR [rcx+152], r9
  3837. sbb r8, QWORD PTR [rdx+160]
  3838. mov r9, QWORD PTR [rcx+168]
  3839. mov QWORD PTR [rcx+160], r8
  3840. sbb r9, QWORD PTR [rdx+168]
  3841. mov r8, QWORD PTR [rcx+176]
  3842. mov QWORD PTR [rcx+168], r9
  3843. sbb r8, QWORD PTR [rdx+176]
  3844. mov r9, QWORD PTR [rcx+184]
  3845. mov QWORD PTR [rcx+176], r8
  3846. sbb r9, QWORD PTR [rdx+184]
  3847. mov r8, QWORD PTR [rcx+192]
  3848. mov QWORD PTR [rcx+184], r9
  3849. sbb r8, QWORD PTR [rdx+192]
  3850. mov r9, QWORD PTR [rcx+200]
  3851. mov QWORD PTR [rcx+192], r8
  3852. sbb r9, QWORD PTR [rdx+200]
  3853. mov r8, QWORD PTR [rcx+208]
  3854. mov QWORD PTR [rcx+200], r9
  3855. sbb r8, QWORD PTR [rdx+208]
  3856. mov r9, QWORD PTR [rcx+216]
  3857. mov QWORD PTR [rcx+208], r8
  3858. sbb r9, QWORD PTR [rdx+216]
  3859. mov r8, QWORD PTR [rcx+224]
  3860. mov QWORD PTR [rcx+216], r9
  3861. sbb r8, QWORD PTR [rdx+224]
  3862. mov r9, QWORD PTR [rcx+232]
  3863. mov QWORD PTR [rcx+224], r8
  3864. sbb r9, QWORD PTR [rdx+232]
  3865. mov r8, QWORD PTR [rcx+240]
  3866. mov QWORD PTR [rcx+232], r9
  3867. sbb r8, QWORD PTR [rdx+240]
  3868. mov r9, QWORD PTR [rcx+248]
  3869. mov QWORD PTR [rcx+240], r8
  3870. sbb r9, QWORD PTR [rdx+248]
  3871. mov QWORD PTR [rcx+248], r9
  3872. sbb rax, rax
  3873. ret
  3874. sp_2048_sub_in_place_32 ENDP
  3875. _text ENDS
  3876. ; /* Add b to a into r. (r = a + b)
  3877. ; *
  3878. ; * r A single precision integer.
  3879. ; * a A single precision integer.
  3880. ; * b A single precision integer.
  3881. ; */
  3882. _text SEGMENT READONLY PARA
  3883. sp_2048_add_32 PROC
  3884. ; Add
  3885. mov r9, QWORD PTR [rdx]
  3886. xor rax, rax
  3887. add r9, QWORD PTR [r8]
  3888. mov r10, QWORD PTR [rdx+8]
  3889. mov QWORD PTR [rcx], r9
  3890. adc r10, QWORD PTR [r8+8]
  3891. mov r9, QWORD PTR [rdx+16]
  3892. mov QWORD PTR [rcx+8], r10
  3893. adc r9, QWORD PTR [r8+16]
  3894. mov r10, QWORD PTR [rdx+24]
  3895. mov QWORD PTR [rcx+16], r9
  3896. adc r10, QWORD PTR [r8+24]
  3897. mov r9, QWORD PTR [rdx+32]
  3898. mov QWORD PTR [rcx+24], r10
  3899. adc r9, QWORD PTR [r8+32]
  3900. mov r10, QWORD PTR [rdx+40]
  3901. mov QWORD PTR [rcx+32], r9
  3902. adc r10, QWORD PTR [r8+40]
  3903. mov r9, QWORD PTR [rdx+48]
  3904. mov QWORD PTR [rcx+40], r10
  3905. adc r9, QWORD PTR [r8+48]
  3906. mov r10, QWORD PTR [rdx+56]
  3907. mov QWORD PTR [rcx+48], r9
  3908. adc r10, QWORD PTR [r8+56]
  3909. mov r9, QWORD PTR [rdx+64]
  3910. mov QWORD PTR [rcx+56], r10
  3911. adc r9, QWORD PTR [r8+64]
  3912. mov r10, QWORD PTR [rdx+72]
  3913. mov QWORD PTR [rcx+64], r9
  3914. adc r10, QWORD PTR [r8+72]
  3915. mov r9, QWORD PTR [rdx+80]
  3916. mov QWORD PTR [rcx+72], r10
  3917. adc r9, QWORD PTR [r8+80]
  3918. mov r10, QWORD PTR [rdx+88]
  3919. mov QWORD PTR [rcx+80], r9
  3920. adc r10, QWORD PTR [r8+88]
  3921. mov r9, QWORD PTR [rdx+96]
  3922. mov QWORD PTR [rcx+88], r10
  3923. adc r9, QWORD PTR [r8+96]
  3924. mov r10, QWORD PTR [rdx+104]
  3925. mov QWORD PTR [rcx+96], r9
  3926. adc r10, QWORD PTR [r8+104]
  3927. mov r9, QWORD PTR [rdx+112]
  3928. mov QWORD PTR [rcx+104], r10
  3929. adc r9, QWORD PTR [r8+112]
  3930. mov r10, QWORD PTR [rdx+120]
  3931. mov QWORD PTR [rcx+112], r9
  3932. adc r10, QWORD PTR [r8+120]
  3933. mov r9, QWORD PTR [rdx+128]
  3934. mov QWORD PTR [rcx+120], r10
  3935. adc r9, QWORD PTR [r8+128]
  3936. mov r10, QWORD PTR [rdx+136]
  3937. mov QWORD PTR [rcx+128], r9
  3938. adc r10, QWORD PTR [r8+136]
  3939. mov r9, QWORD PTR [rdx+144]
  3940. mov QWORD PTR [rcx+136], r10
  3941. adc r9, QWORD PTR [r8+144]
  3942. mov r10, QWORD PTR [rdx+152]
  3943. mov QWORD PTR [rcx+144], r9
  3944. adc r10, QWORD PTR [r8+152]
  3945. mov r9, QWORD PTR [rdx+160]
  3946. mov QWORD PTR [rcx+152], r10
  3947. adc r9, QWORD PTR [r8+160]
  3948. mov r10, QWORD PTR [rdx+168]
  3949. mov QWORD PTR [rcx+160], r9
  3950. adc r10, QWORD PTR [r8+168]
  3951. mov r9, QWORD PTR [rdx+176]
  3952. mov QWORD PTR [rcx+168], r10
  3953. adc r9, QWORD PTR [r8+176]
  3954. mov r10, QWORD PTR [rdx+184]
  3955. mov QWORD PTR [rcx+176], r9
  3956. adc r10, QWORD PTR [r8+184]
  3957. mov r9, QWORD PTR [rdx+192]
  3958. mov QWORD PTR [rcx+184], r10
  3959. adc r9, QWORD PTR [r8+192]
  3960. mov r10, QWORD PTR [rdx+200]
  3961. mov QWORD PTR [rcx+192], r9
  3962. adc r10, QWORD PTR [r8+200]
  3963. mov r9, QWORD PTR [rdx+208]
  3964. mov QWORD PTR [rcx+200], r10
  3965. adc r9, QWORD PTR [r8+208]
  3966. mov r10, QWORD PTR [rdx+216]
  3967. mov QWORD PTR [rcx+208], r9
  3968. adc r10, QWORD PTR [r8+216]
  3969. mov r9, QWORD PTR [rdx+224]
  3970. mov QWORD PTR [rcx+216], r10
  3971. adc r9, QWORD PTR [r8+224]
  3972. mov r10, QWORD PTR [rdx+232]
  3973. mov QWORD PTR [rcx+224], r9
  3974. adc r10, QWORD PTR [r8+232]
  3975. mov r9, QWORD PTR [rdx+240]
  3976. mov QWORD PTR [rcx+232], r10
  3977. adc r9, QWORD PTR [r8+240]
  3978. mov r10, QWORD PTR [rdx+248]
  3979. mov QWORD PTR [rcx+240], r9
  3980. adc r10, QWORD PTR [r8+248]
  3981. mov QWORD PTR [rcx+248], r10
  3982. adc rax, 0
  3983. ret
  3984. sp_2048_add_32 ENDP
  3985. _text ENDS
  3986. ; /* Multiply a and b into r. (r = a * b)
  3987. ; *
  3988. ; * r A single precision integer.
  3989. ; * a A single precision integer.
  3990. ; * b A single precision integer.
  3991. ; */
  3992. _text SEGMENT READONLY PARA
  3993. sp_2048_mul_32 PROC
  3994. push r12
  3995. push r13
  3996. push r14
  3997. push r15
  3998. push rdi
  3999. push rsi
  4000. sub rsp, 808
  4001. mov QWORD PTR [rsp+768], rcx
  4002. mov QWORD PTR [rsp+776], rdx
  4003. mov QWORD PTR [rsp+784], r8
  4004. lea r12, QWORD PTR [rsp+512]
  4005. lea r14, QWORD PTR [rdx+128]
  4006. ; Add
  4007. mov rax, QWORD PTR [rdx]
  4008. xor r15, r15
  4009. add rax, QWORD PTR [r14]
  4010. mov r9, QWORD PTR [rdx+8]
  4011. mov QWORD PTR [r12], rax
  4012. adc r9, QWORD PTR [r14+8]
  4013. mov r10, QWORD PTR [rdx+16]
  4014. mov QWORD PTR [r12+8], r9
  4015. adc r10, QWORD PTR [r14+16]
  4016. mov rax, QWORD PTR [rdx+24]
  4017. mov QWORD PTR [r12+16], r10
  4018. adc rax, QWORD PTR [r14+24]
  4019. mov r9, QWORD PTR [rdx+32]
  4020. mov QWORD PTR [r12+24], rax
  4021. adc r9, QWORD PTR [r14+32]
  4022. mov r10, QWORD PTR [rdx+40]
  4023. mov QWORD PTR [r12+32], r9
  4024. adc r10, QWORD PTR [r14+40]
  4025. mov rax, QWORD PTR [rdx+48]
  4026. mov QWORD PTR [r12+40], r10
  4027. adc rax, QWORD PTR [r14+48]
  4028. mov r9, QWORD PTR [rdx+56]
  4029. mov QWORD PTR [r12+48], rax
  4030. adc r9, QWORD PTR [r14+56]
  4031. mov r10, QWORD PTR [rdx+64]
  4032. mov QWORD PTR [r12+56], r9
  4033. adc r10, QWORD PTR [r14+64]
  4034. mov rax, QWORD PTR [rdx+72]
  4035. mov QWORD PTR [r12+64], r10
  4036. adc rax, QWORD PTR [r14+72]
  4037. mov r9, QWORD PTR [rdx+80]
  4038. mov QWORD PTR [r12+72], rax
  4039. adc r9, QWORD PTR [r14+80]
  4040. mov r10, QWORD PTR [rdx+88]
  4041. mov QWORD PTR [r12+80], r9
  4042. adc r10, QWORD PTR [r14+88]
  4043. mov rax, QWORD PTR [rdx+96]
  4044. mov QWORD PTR [r12+88], r10
  4045. adc rax, QWORD PTR [r14+96]
  4046. mov r9, QWORD PTR [rdx+104]
  4047. mov QWORD PTR [r12+96], rax
  4048. adc r9, QWORD PTR [r14+104]
  4049. mov r10, QWORD PTR [rdx+112]
  4050. mov QWORD PTR [r12+104], r9
  4051. adc r10, QWORD PTR [r14+112]
  4052. mov rax, QWORD PTR [rdx+120]
  4053. mov QWORD PTR [r12+112], r10
  4054. adc rax, QWORD PTR [r14+120]
  4055. mov QWORD PTR [r12+120], rax
  4056. adc r15, 0
  4057. mov QWORD PTR [rsp+792], r15
  4058. lea r13, QWORD PTR [rsp+640]
  4059. lea r14, QWORD PTR [r8+128]
  4060. ; Add
  4061. mov rax, QWORD PTR [r8]
  4062. xor rdi, rdi
  4063. add rax, QWORD PTR [r14]
  4064. mov r9, QWORD PTR [r8+8]
  4065. mov QWORD PTR [r13], rax
  4066. adc r9, QWORD PTR [r14+8]
  4067. mov r10, QWORD PTR [r8+16]
  4068. mov QWORD PTR [r13+8], r9
  4069. adc r10, QWORD PTR [r14+16]
  4070. mov rax, QWORD PTR [r8+24]
  4071. mov QWORD PTR [r13+16], r10
  4072. adc rax, QWORD PTR [r14+24]
  4073. mov r9, QWORD PTR [r8+32]
  4074. mov QWORD PTR [r13+24], rax
  4075. adc r9, QWORD PTR [r14+32]
  4076. mov r10, QWORD PTR [r8+40]
  4077. mov QWORD PTR [r13+32], r9
  4078. adc r10, QWORD PTR [r14+40]
  4079. mov rax, QWORD PTR [r8+48]
  4080. mov QWORD PTR [r13+40], r10
  4081. adc rax, QWORD PTR [r14+48]
  4082. mov r9, QWORD PTR [r8+56]
  4083. mov QWORD PTR [r13+48], rax
  4084. adc r9, QWORD PTR [r14+56]
  4085. mov r10, QWORD PTR [r8+64]
  4086. mov QWORD PTR [r13+56], r9
  4087. adc r10, QWORD PTR [r14+64]
  4088. mov rax, QWORD PTR [r8+72]
  4089. mov QWORD PTR [r13+64], r10
  4090. adc rax, QWORD PTR [r14+72]
  4091. mov r9, QWORD PTR [r8+80]
  4092. mov QWORD PTR [r13+72], rax
  4093. adc r9, QWORD PTR [r14+80]
  4094. mov r10, QWORD PTR [r8+88]
  4095. mov QWORD PTR [r13+80], r9
  4096. adc r10, QWORD PTR [r14+88]
  4097. mov rax, QWORD PTR [r8+96]
  4098. mov QWORD PTR [r13+88], r10
  4099. adc rax, QWORD PTR [r14+96]
  4100. mov r9, QWORD PTR [r8+104]
  4101. mov QWORD PTR [r13+96], rax
  4102. adc r9, QWORD PTR [r14+104]
  4103. mov r10, QWORD PTR [r8+112]
  4104. mov QWORD PTR [r13+104], r9
  4105. adc r10, QWORD PTR [r14+112]
  4106. mov rax, QWORD PTR [r8+120]
  4107. mov QWORD PTR [r13+112], r10
  4108. adc rax, QWORD PTR [r14+120]
  4109. mov QWORD PTR [r13+120], rax
  4110. adc rdi, 0
  4111. mov QWORD PTR [rsp+800], rdi
  4112. mov r8, r13
  4113. mov rdx, r12
  4114. mov rcx, rsp
  4115. call sp_2048_mul_16
  4116. mov r8, QWORD PTR [rsp+784]
  4117. mov rdx, QWORD PTR [rsp+776]
  4118. lea rcx, QWORD PTR [rsp+256]
  4119. add r8, 128
  4120. add rdx, 128
  4121. call sp_2048_mul_16
  4122. mov r8, QWORD PTR [rsp+784]
  4123. mov rdx, QWORD PTR [rsp+776]
  4124. mov rcx, QWORD PTR [rsp+768]
  4125. call sp_2048_mul_16
  4126. IFDEF _WIN64
  4127. mov r8, QWORD PTR [rsp+784]
  4128. mov rdx, QWORD PTR [rsp+776]
  4129. mov rcx, QWORD PTR [rsp+768]
  4130. ENDIF
  4131. mov r15, QWORD PTR [rsp+792]
  4132. mov rdi, QWORD PTR [rsp+800]
  4133. mov rsi, QWORD PTR [rsp+768]
  4134. mov r11, r15
  4135. lea r12, QWORD PTR [rsp+512]
  4136. lea r13, QWORD PTR [rsp+640]
  4137. and r11, rdi
  4138. neg r15
  4139. neg rdi
  4140. add rsi, 256
  4141. mov rax, QWORD PTR [r12]
  4142. mov r9, QWORD PTR [r13]
  4143. and rax, rdi
  4144. and r9, r15
  4145. mov QWORD PTR [r12], rax
  4146. mov QWORD PTR [r13], r9
  4147. mov rax, QWORD PTR [r12+8]
  4148. mov r9, QWORD PTR [r13+8]
  4149. and rax, rdi
  4150. and r9, r15
  4151. mov QWORD PTR [r12+8], rax
  4152. mov QWORD PTR [r13+8], r9
  4153. mov rax, QWORD PTR [r12+16]
  4154. mov r9, QWORD PTR [r13+16]
  4155. and rax, rdi
  4156. and r9, r15
  4157. mov QWORD PTR [r12+16], rax
  4158. mov QWORD PTR [r13+16], r9
  4159. mov rax, QWORD PTR [r12+24]
  4160. mov r9, QWORD PTR [r13+24]
  4161. and rax, rdi
  4162. and r9, r15
  4163. mov QWORD PTR [r12+24], rax
  4164. mov QWORD PTR [r13+24], r9
  4165. mov rax, QWORD PTR [r12+32]
  4166. mov r9, QWORD PTR [r13+32]
  4167. and rax, rdi
  4168. and r9, r15
  4169. mov QWORD PTR [r12+32], rax
  4170. mov QWORD PTR [r13+32], r9
  4171. mov rax, QWORD PTR [r12+40]
  4172. mov r9, QWORD PTR [r13+40]
  4173. and rax, rdi
  4174. and r9, r15
  4175. mov QWORD PTR [r12+40], rax
  4176. mov QWORD PTR [r13+40], r9
  4177. mov rax, QWORD PTR [r12+48]
  4178. mov r9, QWORD PTR [r13+48]
  4179. and rax, rdi
  4180. and r9, r15
  4181. mov QWORD PTR [r12+48], rax
  4182. mov QWORD PTR [r13+48], r9
  4183. mov rax, QWORD PTR [r12+56]
  4184. mov r9, QWORD PTR [r13+56]
  4185. and rax, rdi
  4186. and r9, r15
  4187. mov QWORD PTR [r12+56], rax
  4188. mov QWORD PTR [r13+56], r9
  4189. mov rax, QWORD PTR [r12+64]
  4190. mov r9, QWORD PTR [r13+64]
  4191. and rax, rdi
  4192. and r9, r15
  4193. mov QWORD PTR [r12+64], rax
  4194. mov QWORD PTR [r13+64], r9
  4195. mov rax, QWORD PTR [r12+72]
  4196. mov r9, QWORD PTR [r13+72]
  4197. and rax, rdi
  4198. and r9, r15
  4199. mov QWORD PTR [r12+72], rax
  4200. mov QWORD PTR [r13+72], r9
  4201. mov rax, QWORD PTR [r12+80]
  4202. mov r9, QWORD PTR [r13+80]
  4203. and rax, rdi
  4204. and r9, r15
  4205. mov QWORD PTR [r12+80], rax
  4206. mov QWORD PTR [r13+80], r9
  4207. mov rax, QWORD PTR [r12+88]
  4208. mov r9, QWORD PTR [r13+88]
  4209. and rax, rdi
  4210. and r9, r15
  4211. mov QWORD PTR [r12+88], rax
  4212. mov QWORD PTR [r13+88], r9
  4213. mov rax, QWORD PTR [r12+96]
  4214. mov r9, QWORD PTR [r13+96]
  4215. and rax, rdi
  4216. and r9, r15
  4217. mov QWORD PTR [r12+96], rax
  4218. mov QWORD PTR [r13+96], r9
  4219. mov rax, QWORD PTR [r12+104]
  4220. mov r9, QWORD PTR [r13+104]
  4221. and rax, rdi
  4222. and r9, r15
  4223. mov QWORD PTR [r12+104], rax
  4224. mov QWORD PTR [r13+104], r9
  4225. mov rax, QWORD PTR [r12+112]
  4226. mov r9, QWORD PTR [r13+112]
  4227. and rax, rdi
  4228. and r9, r15
  4229. mov QWORD PTR [r12+112], rax
  4230. mov QWORD PTR [r13+112], r9
  4231. mov rax, QWORD PTR [r12+120]
  4232. mov r9, QWORD PTR [r13+120]
  4233. and rax, rdi
  4234. and r9, r15
  4235. mov QWORD PTR [r12+120], rax
  4236. mov QWORD PTR [r13+120], r9
  4237. mov rax, QWORD PTR [r12]
  4238. add rax, QWORD PTR [r13]
  4239. mov r9, QWORD PTR [r12+8]
  4240. mov QWORD PTR [rsi], rax
  4241. adc r9, QWORD PTR [r13+8]
  4242. mov r10, QWORD PTR [r12+16]
  4243. mov QWORD PTR [rsi+8], r9
  4244. adc r10, QWORD PTR [r13+16]
  4245. mov rax, QWORD PTR [r12+24]
  4246. mov QWORD PTR [rsi+16], r10
  4247. adc rax, QWORD PTR [r13+24]
  4248. mov r9, QWORD PTR [r12+32]
  4249. mov QWORD PTR [rsi+24], rax
  4250. adc r9, QWORD PTR [r13+32]
  4251. mov r10, QWORD PTR [r12+40]
  4252. mov QWORD PTR [rsi+32], r9
  4253. adc r10, QWORD PTR [r13+40]
  4254. mov rax, QWORD PTR [r12+48]
  4255. mov QWORD PTR [rsi+40], r10
  4256. adc rax, QWORD PTR [r13+48]
  4257. mov r9, QWORD PTR [r12+56]
  4258. mov QWORD PTR [rsi+48], rax
  4259. adc r9, QWORD PTR [r13+56]
  4260. mov r10, QWORD PTR [r12+64]
  4261. mov QWORD PTR [rsi+56], r9
  4262. adc r10, QWORD PTR [r13+64]
  4263. mov rax, QWORD PTR [r12+72]
  4264. mov QWORD PTR [rsi+64], r10
  4265. adc rax, QWORD PTR [r13+72]
  4266. mov r9, QWORD PTR [r12+80]
  4267. mov QWORD PTR [rsi+72], rax
  4268. adc r9, QWORD PTR [r13+80]
  4269. mov r10, QWORD PTR [r12+88]
  4270. mov QWORD PTR [rsi+80], r9
  4271. adc r10, QWORD PTR [r13+88]
  4272. mov rax, QWORD PTR [r12+96]
  4273. mov QWORD PTR [rsi+88], r10
  4274. adc rax, QWORD PTR [r13+96]
  4275. mov r9, QWORD PTR [r12+104]
  4276. mov QWORD PTR [rsi+96], rax
  4277. adc r9, QWORD PTR [r13+104]
  4278. mov r10, QWORD PTR [r12+112]
  4279. mov QWORD PTR [rsi+104], r9
  4280. adc r10, QWORD PTR [r13+112]
  4281. mov rax, QWORD PTR [r12+120]
  4282. mov QWORD PTR [rsi+112], r10
  4283. adc rax, QWORD PTR [r13+120]
  4284. mov QWORD PTR [rsi+120], rax
  4285. adc r11, 0
  4286. lea r13, QWORD PTR [rsp+256]
  4287. mov r12, rsp
  4288. mov rax, QWORD PTR [r12]
  4289. sub rax, QWORD PTR [r13]
  4290. mov r9, QWORD PTR [r12+8]
  4291. mov QWORD PTR [r12], rax
  4292. sbb r9, QWORD PTR [r13+8]
  4293. mov r10, QWORD PTR [r12+16]
  4294. mov QWORD PTR [r12+8], r9
  4295. sbb r10, QWORD PTR [r13+16]
  4296. mov rax, QWORD PTR [r12+24]
  4297. mov QWORD PTR [r12+16], r10
  4298. sbb rax, QWORD PTR [r13+24]
  4299. mov r9, QWORD PTR [r12+32]
  4300. mov QWORD PTR [r12+24], rax
  4301. sbb r9, QWORD PTR [r13+32]
  4302. mov r10, QWORD PTR [r12+40]
  4303. mov QWORD PTR [r12+32], r9
  4304. sbb r10, QWORD PTR [r13+40]
  4305. mov rax, QWORD PTR [r12+48]
  4306. mov QWORD PTR [r12+40], r10
  4307. sbb rax, QWORD PTR [r13+48]
  4308. mov r9, QWORD PTR [r12+56]
  4309. mov QWORD PTR [r12+48], rax
  4310. sbb r9, QWORD PTR [r13+56]
  4311. mov r10, QWORD PTR [r12+64]
  4312. mov QWORD PTR [r12+56], r9
  4313. sbb r10, QWORD PTR [r13+64]
  4314. mov rax, QWORD PTR [r12+72]
  4315. mov QWORD PTR [r12+64], r10
  4316. sbb rax, QWORD PTR [r13+72]
  4317. mov r9, QWORD PTR [r12+80]
  4318. mov QWORD PTR [r12+72], rax
  4319. sbb r9, QWORD PTR [r13+80]
  4320. mov r10, QWORD PTR [r12+88]
  4321. mov QWORD PTR [r12+80], r9
  4322. sbb r10, QWORD PTR [r13+88]
  4323. mov rax, QWORD PTR [r12+96]
  4324. mov QWORD PTR [r12+88], r10
  4325. sbb rax, QWORD PTR [r13+96]
  4326. mov r9, QWORD PTR [r12+104]
  4327. mov QWORD PTR [r12+96], rax
  4328. sbb r9, QWORD PTR [r13+104]
  4329. mov r10, QWORD PTR [r12+112]
  4330. mov QWORD PTR [r12+104], r9
  4331. sbb r10, QWORD PTR [r13+112]
  4332. mov rax, QWORD PTR [r12+120]
  4333. mov QWORD PTR [r12+112], r10
  4334. sbb rax, QWORD PTR [r13+120]
  4335. mov r9, QWORD PTR [r12+128]
  4336. mov QWORD PTR [r12+120], rax
  4337. sbb r9, QWORD PTR [r13+128]
  4338. mov r10, QWORD PTR [r12+136]
  4339. mov QWORD PTR [r12+128], r9
  4340. sbb r10, QWORD PTR [r13+136]
  4341. mov rax, QWORD PTR [r12+144]
  4342. mov QWORD PTR [r12+136], r10
  4343. sbb rax, QWORD PTR [r13+144]
  4344. mov r9, QWORD PTR [r12+152]
  4345. mov QWORD PTR [r12+144], rax
  4346. sbb r9, QWORD PTR [r13+152]
  4347. mov r10, QWORD PTR [r12+160]
  4348. mov QWORD PTR [r12+152], r9
  4349. sbb r10, QWORD PTR [r13+160]
  4350. mov rax, QWORD PTR [r12+168]
  4351. mov QWORD PTR [r12+160], r10
  4352. sbb rax, QWORD PTR [r13+168]
  4353. mov r9, QWORD PTR [r12+176]
  4354. mov QWORD PTR [r12+168], rax
  4355. sbb r9, QWORD PTR [r13+176]
  4356. mov r10, QWORD PTR [r12+184]
  4357. mov QWORD PTR [r12+176], r9
  4358. sbb r10, QWORD PTR [r13+184]
  4359. mov rax, QWORD PTR [r12+192]
  4360. mov QWORD PTR [r12+184], r10
  4361. sbb rax, QWORD PTR [r13+192]
  4362. mov r9, QWORD PTR [r12+200]
  4363. mov QWORD PTR [r12+192], rax
  4364. sbb r9, QWORD PTR [r13+200]
  4365. mov r10, QWORD PTR [r12+208]
  4366. mov QWORD PTR [r12+200], r9
  4367. sbb r10, QWORD PTR [r13+208]
  4368. mov rax, QWORD PTR [r12+216]
  4369. mov QWORD PTR [r12+208], r10
  4370. sbb rax, QWORD PTR [r13+216]
  4371. mov r9, QWORD PTR [r12+224]
  4372. mov QWORD PTR [r12+216], rax
  4373. sbb r9, QWORD PTR [r13+224]
  4374. mov r10, QWORD PTR [r12+232]
  4375. mov QWORD PTR [r12+224], r9
  4376. sbb r10, QWORD PTR [r13+232]
  4377. mov rax, QWORD PTR [r12+240]
  4378. mov QWORD PTR [r12+232], r10
  4379. sbb rax, QWORD PTR [r13+240]
  4380. mov r9, QWORD PTR [r12+248]
  4381. mov QWORD PTR [r12+240], rax
  4382. sbb r9, QWORD PTR [r13+248]
  4383. mov QWORD PTR [r12+248], r9
  4384. sbb r11, 0
  4385. mov rax, QWORD PTR [r12]
  4386. sub rax, QWORD PTR [rcx]
  4387. mov r9, QWORD PTR [r12+8]
  4388. mov QWORD PTR [r12], rax
  4389. sbb r9, QWORD PTR [rcx+8]
  4390. mov r10, QWORD PTR [r12+16]
  4391. mov QWORD PTR [r12+8], r9
  4392. sbb r10, QWORD PTR [rcx+16]
  4393. mov rax, QWORD PTR [r12+24]
  4394. mov QWORD PTR [r12+16], r10
  4395. sbb rax, QWORD PTR [rcx+24]
  4396. mov r9, QWORD PTR [r12+32]
  4397. mov QWORD PTR [r12+24], rax
  4398. sbb r9, QWORD PTR [rcx+32]
  4399. mov r10, QWORD PTR [r12+40]
  4400. mov QWORD PTR [r12+32], r9
  4401. sbb r10, QWORD PTR [rcx+40]
  4402. mov rax, QWORD PTR [r12+48]
  4403. mov QWORD PTR [r12+40], r10
  4404. sbb rax, QWORD PTR [rcx+48]
  4405. mov r9, QWORD PTR [r12+56]
  4406. mov QWORD PTR [r12+48], rax
  4407. sbb r9, QWORD PTR [rcx+56]
  4408. mov r10, QWORD PTR [r12+64]
  4409. mov QWORD PTR [r12+56], r9
  4410. sbb r10, QWORD PTR [rcx+64]
  4411. mov rax, QWORD PTR [r12+72]
  4412. mov QWORD PTR [r12+64], r10
  4413. sbb rax, QWORD PTR [rcx+72]
  4414. mov r9, QWORD PTR [r12+80]
  4415. mov QWORD PTR [r12+72], rax
  4416. sbb r9, QWORD PTR [rcx+80]
  4417. mov r10, QWORD PTR [r12+88]
  4418. mov QWORD PTR [r12+80], r9
  4419. sbb r10, QWORD PTR [rcx+88]
  4420. mov rax, QWORD PTR [r12+96]
  4421. mov QWORD PTR [r12+88], r10
  4422. sbb rax, QWORD PTR [rcx+96]
  4423. mov r9, QWORD PTR [r12+104]
  4424. mov QWORD PTR [r12+96], rax
  4425. sbb r9, QWORD PTR [rcx+104]
  4426. mov r10, QWORD PTR [r12+112]
  4427. mov QWORD PTR [r12+104], r9
  4428. sbb r10, QWORD PTR [rcx+112]
  4429. mov rax, QWORD PTR [r12+120]
  4430. mov QWORD PTR [r12+112], r10
  4431. sbb rax, QWORD PTR [rcx+120]
  4432. mov r9, QWORD PTR [r12+128]
  4433. mov QWORD PTR [r12+120], rax
  4434. sbb r9, QWORD PTR [rcx+128]
  4435. mov r10, QWORD PTR [r12+136]
  4436. mov QWORD PTR [r12+128], r9
  4437. sbb r10, QWORD PTR [rcx+136]
  4438. mov rax, QWORD PTR [r12+144]
  4439. mov QWORD PTR [r12+136], r10
  4440. sbb rax, QWORD PTR [rcx+144]
  4441. mov r9, QWORD PTR [r12+152]
  4442. mov QWORD PTR [r12+144], rax
  4443. sbb r9, QWORD PTR [rcx+152]
  4444. mov r10, QWORD PTR [r12+160]
  4445. mov QWORD PTR [r12+152], r9
  4446. sbb r10, QWORD PTR [rcx+160]
  4447. mov rax, QWORD PTR [r12+168]
  4448. mov QWORD PTR [r12+160], r10
  4449. sbb rax, QWORD PTR [rcx+168]
  4450. mov r9, QWORD PTR [r12+176]
  4451. mov QWORD PTR [r12+168], rax
  4452. sbb r9, QWORD PTR [rcx+176]
  4453. mov r10, QWORD PTR [r12+184]
  4454. mov QWORD PTR [r12+176], r9
  4455. sbb r10, QWORD PTR [rcx+184]
  4456. mov rax, QWORD PTR [r12+192]
  4457. mov QWORD PTR [r12+184], r10
  4458. sbb rax, QWORD PTR [rcx+192]
  4459. mov r9, QWORD PTR [r12+200]
  4460. mov QWORD PTR [r12+192], rax
  4461. sbb r9, QWORD PTR [rcx+200]
  4462. mov r10, QWORD PTR [r12+208]
  4463. mov QWORD PTR [r12+200], r9
  4464. sbb r10, QWORD PTR [rcx+208]
  4465. mov rax, QWORD PTR [r12+216]
  4466. mov QWORD PTR [r12+208], r10
  4467. sbb rax, QWORD PTR [rcx+216]
  4468. mov r9, QWORD PTR [r12+224]
  4469. mov QWORD PTR [r12+216], rax
  4470. sbb r9, QWORD PTR [rcx+224]
  4471. mov r10, QWORD PTR [r12+232]
  4472. mov QWORD PTR [r12+224], r9
  4473. sbb r10, QWORD PTR [rcx+232]
  4474. mov rax, QWORD PTR [r12+240]
  4475. mov QWORD PTR [r12+232], r10
  4476. sbb rax, QWORD PTR [rcx+240]
  4477. mov r9, QWORD PTR [r12+248]
  4478. mov QWORD PTR [r12+240], rax
  4479. sbb r9, QWORD PTR [rcx+248]
  4480. mov QWORD PTR [r12+248], r9
  4481. sbb r11, 0
  4482. sub rsi, 128
  4483. ; Add
  4484. mov rax, QWORD PTR [rsi]
  4485. add rax, QWORD PTR [r12]
  4486. mov r9, QWORD PTR [rsi+8]
  4487. mov QWORD PTR [rsi], rax
  4488. adc r9, QWORD PTR [r12+8]
  4489. mov r10, QWORD PTR [rsi+16]
  4490. mov QWORD PTR [rsi+8], r9
  4491. adc r10, QWORD PTR [r12+16]
  4492. mov rax, QWORD PTR [rsi+24]
  4493. mov QWORD PTR [rsi+16], r10
  4494. adc rax, QWORD PTR [r12+24]
  4495. mov r9, QWORD PTR [rsi+32]
  4496. mov QWORD PTR [rsi+24], rax
  4497. adc r9, QWORD PTR [r12+32]
  4498. mov r10, QWORD PTR [rsi+40]
  4499. mov QWORD PTR [rsi+32], r9
  4500. adc r10, QWORD PTR [r12+40]
  4501. mov rax, QWORD PTR [rsi+48]
  4502. mov QWORD PTR [rsi+40], r10
  4503. adc rax, QWORD PTR [r12+48]
  4504. mov r9, QWORD PTR [rsi+56]
  4505. mov QWORD PTR [rsi+48], rax
  4506. adc r9, QWORD PTR [r12+56]
  4507. mov r10, QWORD PTR [rsi+64]
  4508. mov QWORD PTR [rsi+56], r9
  4509. adc r10, QWORD PTR [r12+64]
  4510. mov rax, QWORD PTR [rsi+72]
  4511. mov QWORD PTR [rsi+64], r10
  4512. adc rax, QWORD PTR [r12+72]
  4513. mov r9, QWORD PTR [rsi+80]
  4514. mov QWORD PTR [rsi+72], rax
  4515. adc r9, QWORD PTR [r12+80]
  4516. mov r10, QWORD PTR [rsi+88]
  4517. mov QWORD PTR [rsi+80], r9
  4518. adc r10, QWORD PTR [r12+88]
  4519. mov rax, QWORD PTR [rsi+96]
  4520. mov QWORD PTR [rsi+88], r10
  4521. adc rax, QWORD PTR [r12+96]
  4522. mov r9, QWORD PTR [rsi+104]
  4523. mov QWORD PTR [rsi+96], rax
  4524. adc r9, QWORD PTR [r12+104]
  4525. mov r10, QWORD PTR [rsi+112]
  4526. mov QWORD PTR [rsi+104], r9
  4527. adc r10, QWORD PTR [r12+112]
  4528. mov rax, QWORD PTR [rsi+120]
  4529. mov QWORD PTR [rsi+112], r10
  4530. adc rax, QWORD PTR [r12+120]
  4531. mov r9, QWORD PTR [rsi+128]
  4532. mov QWORD PTR [rsi+120], rax
  4533. adc r9, QWORD PTR [r12+128]
  4534. mov r10, QWORD PTR [rsi+136]
  4535. mov QWORD PTR [rsi+128], r9
  4536. adc r10, QWORD PTR [r12+136]
  4537. mov rax, QWORD PTR [rsi+144]
  4538. mov QWORD PTR [rsi+136], r10
  4539. adc rax, QWORD PTR [r12+144]
  4540. mov r9, QWORD PTR [rsi+152]
  4541. mov QWORD PTR [rsi+144], rax
  4542. adc r9, QWORD PTR [r12+152]
  4543. mov r10, QWORD PTR [rsi+160]
  4544. mov QWORD PTR [rsi+152], r9
  4545. adc r10, QWORD PTR [r12+160]
  4546. mov rax, QWORD PTR [rsi+168]
  4547. mov QWORD PTR [rsi+160], r10
  4548. adc rax, QWORD PTR [r12+168]
  4549. mov r9, QWORD PTR [rsi+176]
  4550. mov QWORD PTR [rsi+168], rax
  4551. adc r9, QWORD PTR [r12+176]
  4552. mov r10, QWORD PTR [rsi+184]
  4553. mov QWORD PTR [rsi+176], r9
  4554. adc r10, QWORD PTR [r12+184]
  4555. mov rax, QWORD PTR [rsi+192]
  4556. mov QWORD PTR [rsi+184], r10
  4557. adc rax, QWORD PTR [r12+192]
  4558. mov r9, QWORD PTR [rsi+200]
  4559. mov QWORD PTR [rsi+192], rax
  4560. adc r9, QWORD PTR [r12+200]
  4561. mov r10, QWORD PTR [rsi+208]
  4562. mov QWORD PTR [rsi+200], r9
  4563. adc r10, QWORD PTR [r12+208]
  4564. mov rax, QWORD PTR [rsi+216]
  4565. mov QWORD PTR [rsi+208], r10
  4566. adc rax, QWORD PTR [r12+216]
  4567. mov r9, QWORD PTR [rsi+224]
  4568. mov QWORD PTR [rsi+216], rax
  4569. adc r9, QWORD PTR [r12+224]
  4570. mov r10, QWORD PTR [rsi+232]
  4571. mov QWORD PTR [rsi+224], r9
  4572. adc r10, QWORD PTR [r12+232]
  4573. mov rax, QWORD PTR [rsi+240]
  4574. mov QWORD PTR [rsi+232], r10
  4575. adc rax, QWORD PTR [r12+240]
  4576. mov r9, QWORD PTR [rsi+248]
  4577. mov QWORD PTR [rsi+240], rax
  4578. adc r9, QWORD PTR [r12+248]
  4579. mov QWORD PTR [rsi+248], r9
  4580. adc r11, 0
  4581. mov QWORD PTR [rcx+384], r11
  4582. add rsi, 128
  4583. ; Add
  4584. mov rax, QWORD PTR [rsi]
  4585. add rax, QWORD PTR [r13]
  4586. mov r9, QWORD PTR [rsi+8]
  4587. mov QWORD PTR [rsi], rax
  4588. adc r9, QWORD PTR [r13+8]
  4589. mov r10, QWORD PTR [rsi+16]
  4590. mov QWORD PTR [rsi+8], r9
  4591. adc r10, QWORD PTR [r13+16]
  4592. mov rax, QWORD PTR [rsi+24]
  4593. mov QWORD PTR [rsi+16], r10
  4594. adc rax, QWORD PTR [r13+24]
  4595. mov r9, QWORD PTR [rsi+32]
  4596. mov QWORD PTR [rsi+24], rax
  4597. adc r9, QWORD PTR [r13+32]
  4598. mov r10, QWORD PTR [rsi+40]
  4599. mov QWORD PTR [rsi+32], r9
  4600. adc r10, QWORD PTR [r13+40]
  4601. mov rax, QWORD PTR [rsi+48]
  4602. mov QWORD PTR [rsi+40], r10
  4603. adc rax, QWORD PTR [r13+48]
  4604. mov r9, QWORD PTR [rsi+56]
  4605. mov QWORD PTR [rsi+48], rax
  4606. adc r9, QWORD PTR [r13+56]
  4607. mov r10, QWORD PTR [rsi+64]
  4608. mov QWORD PTR [rsi+56], r9
  4609. adc r10, QWORD PTR [r13+64]
  4610. mov rax, QWORD PTR [rsi+72]
  4611. mov QWORD PTR [rsi+64], r10
  4612. adc rax, QWORD PTR [r13+72]
  4613. mov r9, QWORD PTR [rsi+80]
  4614. mov QWORD PTR [rsi+72], rax
  4615. adc r9, QWORD PTR [r13+80]
  4616. mov r10, QWORD PTR [rsi+88]
  4617. mov QWORD PTR [rsi+80], r9
  4618. adc r10, QWORD PTR [r13+88]
  4619. mov rax, QWORD PTR [rsi+96]
  4620. mov QWORD PTR [rsi+88], r10
  4621. adc rax, QWORD PTR [r13+96]
  4622. mov r9, QWORD PTR [rsi+104]
  4623. mov QWORD PTR [rsi+96], rax
  4624. adc r9, QWORD PTR [r13+104]
  4625. mov r10, QWORD PTR [rsi+112]
  4626. mov QWORD PTR [rsi+104], r9
  4627. adc r10, QWORD PTR [r13+112]
  4628. mov rax, QWORD PTR [rsi+120]
  4629. mov QWORD PTR [rsi+112], r10
  4630. adc rax, QWORD PTR [r13+120]
  4631. mov r9, QWORD PTR [rsi+128]
  4632. mov QWORD PTR [rsi+120], rax
  4633. adc r9, QWORD PTR [r13+128]
  4634. mov QWORD PTR [rsi+128], r9
  4635. ; Add to zero
  4636. mov rax, QWORD PTR [r13+136]
  4637. adc rax, 0
  4638. mov r9, QWORD PTR [r13+144]
  4639. mov QWORD PTR [rsi+136], rax
  4640. adc r9, 0
  4641. mov r10, QWORD PTR [r13+152]
  4642. mov QWORD PTR [rsi+144], r9
  4643. adc r10, 0
  4644. mov rax, QWORD PTR [r13+160]
  4645. mov QWORD PTR [rsi+152], r10
  4646. adc rax, 0
  4647. mov r9, QWORD PTR [r13+168]
  4648. mov QWORD PTR [rsi+160], rax
  4649. adc r9, 0
  4650. mov r10, QWORD PTR [r13+176]
  4651. mov QWORD PTR [rsi+168], r9
  4652. adc r10, 0
  4653. mov rax, QWORD PTR [r13+184]
  4654. mov QWORD PTR [rsi+176], r10
  4655. adc rax, 0
  4656. mov r9, QWORD PTR [r13+192]
  4657. mov QWORD PTR [rsi+184], rax
  4658. adc r9, 0
  4659. mov r10, QWORD PTR [r13+200]
  4660. mov QWORD PTR [rsi+192], r9
  4661. adc r10, 0
  4662. mov rax, QWORD PTR [r13+208]
  4663. mov QWORD PTR [rsi+200], r10
  4664. adc rax, 0
  4665. mov r9, QWORD PTR [r13+216]
  4666. mov QWORD PTR [rsi+208], rax
  4667. adc r9, 0
  4668. mov r10, QWORD PTR [r13+224]
  4669. mov QWORD PTR [rsi+216], r9
  4670. adc r10, 0
  4671. mov rax, QWORD PTR [r13+232]
  4672. mov QWORD PTR [rsi+224], r10
  4673. adc rax, 0
  4674. mov r9, QWORD PTR [r13+240]
  4675. mov QWORD PTR [rsi+232], rax
  4676. adc r9, 0
  4677. mov r10, QWORD PTR [r13+248]
  4678. mov QWORD PTR [rsi+240], r9
  4679. adc r10, 0
  4680. mov QWORD PTR [rsi+248], r10
  4681. add rsp, 808
  4682. pop rsi
  4683. pop rdi
  4684. pop r15
  4685. pop r14
  4686. pop r13
  4687. pop r12
  4688. ret
  4689. sp_2048_mul_32 ENDP
  4690. _text ENDS
  4691. IFDEF HAVE_INTEL_AVX2
  4692. ; /* Multiply a and b into r. (r = a * b)
  4693. ; *
  4694. ; * r A single precision integer.
  4695. ; * a A single precision integer.
  4696. ; * b A single precision integer.
  4697. ; */
  4698. _text SEGMENT READONLY PARA
  4699. sp_2048_mul_avx2_32 PROC
  4700. push r12
  4701. push r13
  4702. push r14
  4703. push r15
  4704. push rdi
  4705. push rsi
  4706. sub rsp, 808
  4707. mov QWORD PTR [rsp+768], rcx
  4708. mov QWORD PTR [rsp+776], rdx
  4709. mov QWORD PTR [rsp+784], r8
  4710. lea r12, QWORD PTR [rsp+512]
  4711. lea r14, QWORD PTR [rdx+128]
  4712. ; Add
  4713. mov rax, QWORD PTR [rdx]
  4714. xor r15, r15
  4715. add rax, QWORD PTR [r14]
  4716. mov r9, QWORD PTR [rdx+8]
  4717. mov QWORD PTR [r12], rax
  4718. adc r9, QWORD PTR [r14+8]
  4719. mov r10, QWORD PTR [rdx+16]
  4720. mov QWORD PTR [r12+8], r9
  4721. adc r10, QWORD PTR [r14+16]
  4722. mov rax, QWORD PTR [rdx+24]
  4723. mov QWORD PTR [r12+16], r10
  4724. adc rax, QWORD PTR [r14+24]
  4725. mov r9, QWORD PTR [rdx+32]
  4726. mov QWORD PTR [r12+24], rax
  4727. adc r9, QWORD PTR [r14+32]
  4728. mov r10, QWORD PTR [rdx+40]
  4729. mov QWORD PTR [r12+32], r9
  4730. adc r10, QWORD PTR [r14+40]
  4731. mov rax, QWORD PTR [rdx+48]
  4732. mov QWORD PTR [r12+40], r10
  4733. adc rax, QWORD PTR [r14+48]
  4734. mov r9, QWORD PTR [rdx+56]
  4735. mov QWORD PTR [r12+48], rax
  4736. adc r9, QWORD PTR [r14+56]
  4737. mov r10, QWORD PTR [rdx+64]
  4738. mov QWORD PTR [r12+56], r9
  4739. adc r10, QWORD PTR [r14+64]
  4740. mov rax, QWORD PTR [rdx+72]
  4741. mov QWORD PTR [r12+64], r10
  4742. adc rax, QWORD PTR [r14+72]
  4743. mov r9, QWORD PTR [rdx+80]
  4744. mov QWORD PTR [r12+72], rax
  4745. adc r9, QWORD PTR [r14+80]
  4746. mov r10, QWORD PTR [rdx+88]
  4747. mov QWORD PTR [r12+80], r9
  4748. adc r10, QWORD PTR [r14+88]
  4749. mov rax, QWORD PTR [rdx+96]
  4750. mov QWORD PTR [r12+88], r10
  4751. adc rax, QWORD PTR [r14+96]
  4752. mov r9, QWORD PTR [rdx+104]
  4753. mov QWORD PTR [r12+96], rax
  4754. adc r9, QWORD PTR [r14+104]
  4755. mov r10, QWORD PTR [rdx+112]
  4756. mov QWORD PTR [r12+104], r9
  4757. adc r10, QWORD PTR [r14+112]
  4758. mov rax, QWORD PTR [rdx+120]
  4759. mov QWORD PTR [r12+112], r10
  4760. adc rax, QWORD PTR [r14+120]
  4761. mov QWORD PTR [r12+120], rax
  4762. adc r15, 0
  4763. mov QWORD PTR [rsp+792], r15
  4764. lea r13, QWORD PTR [rsp+640]
  4765. lea r14, QWORD PTR [r8+128]
  4766. ; Add
  4767. mov rax, QWORD PTR [r8]
  4768. xor rdi, rdi
  4769. add rax, QWORD PTR [r14]
  4770. mov r9, QWORD PTR [r8+8]
  4771. mov QWORD PTR [r13], rax
  4772. adc r9, QWORD PTR [r14+8]
  4773. mov r10, QWORD PTR [r8+16]
  4774. mov QWORD PTR [r13+8], r9
  4775. adc r10, QWORD PTR [r14+16]
  4776. mov rax, QWORD PTR [r8+24]
  4777. mov QWORD PTR [r13+16], r10
  4778. adc rax, QWORD PTR [r14+24]
  4779. mov r9, QWORD PTR [r8+32]
  4780. mov QWORD PTR [r13+24], rax
  4781. adc r9, QWORD PTR [r14+32]
  4782. mov r10, QWORD PTR [r8+40]
  4783. mov QWORD PTR [r13+32], r9
  4784. adc r10, QWORD PTR [r14+40]
  4785. mov rax, QWORD PTR [r8+48]
  4786. mov QWORD PTR [r13+40], r10
  4787. adc rax, QWORD PTR [r14+48]
  4788. mov r9, QWORD PTR [r8+56]
  4789. mov QWORD PTR [r13+48], rax
  4790. adc r9, QWORD PTR [r14+56]
  4791. mov r10, QWORD PTR [r8+64]
  4792. mov QWORD PTR [r13+56], r9
  4793. adc r10, QWORD PTR [r14+64]
  4794. mov rax, QWORD PTR [r8+72]
  4795. mov QWORD PTR [r13+64], r10
  4796. adc rax, QWORD PTR [r14+72]
  4797. mov r9, QWORD PTR [r8+80]
  4798. mov QWORD PTR [r13+72], rax
  4799. adc r9, QWORD PTR [r14+80]
  4800. mov r10, QWORD PTR [r8+88]
  4801. mov QWORD PTR [r13+80], r9
  4802. adc r10, QWORD PTR [r14+88]
  4803. mov rax, QWORD PTR [r8+96]
  4804. mov QWORD PTR [r13+88], r10
  4805. adc rax, QWORD PTR [r14+96]
  4806. mov r9, QWORD PTR [r8+104]
  4807. mov QWORD PTR [r13+96], rax
  4808. adc r9, QWORD PTR [r14+104]
  4809. mov r10, QWORD PTR [r8+112]
  4810. mov QWORD PTR [r13+104], r9
  4811. adc r10, QWORD PTR [r14+112]
  4812. mov rax, QWORD PTR [r8+120]
  4813. mov QWORD PTR [r13+112], r10
  4814. adc rax, QWORD PTR [r14+120]
  4815. mov QWORD PTR [r13+120], rax
  4816. adc rdi, 0
  4817. mov QWORD PTR [rsp+800], rdi
  4818. mov r8, r13
  4819. mov rdx, r12
  4820. mov rcx, rsp
  4821. call sp_2048_mul_avx2_16
  4822. mov r8, QWORD PTR [rsp+784]
  4823. mov rdx, QWORD PTR [rsp+776]
  4824. lea rcx, QWORD PTR [rsp+256]
  4825. add r8, 128
  4826. add rdx, 128
  4827. call sp_2048_mul_avx2_16
  4828. mov r8, QWORD PTR [rsp+784]
  4829. mov rdx, QWORD PTR [rsp+776]
  4830. mov rcx, QWORD PTR [rsp+768]
  4831. call sp_2048_mul_avx2_16
  4832. IFDEF _WIN64
  4833. mov r8, QWORD PTR [rsp+784]
  4834. mov rdx, QWORD PTR [rsp+776]
  4835. mov rcx, QWORD PTR [rsp+768]
  4836. ENDIF
  4837. mov r15, QWORD PTR [rsp+792]
  4838. mov rdi, QWORD PTR [rsp+800]
  4839. mov rsi, QWORD PTR [rsp+768]
  4840. mov r11, r15
  4841. lea r12, QWORD PTR [rsp+512]
  4842. lea r13, QWORD PTR [rsp+640]
  4843. and r11, rdi
  4844. neg r15
  4845. neg rdi
  4846. add rsi, 256
  4847. mov rax, QWORD PTR [r12]
  4848. mov r9, QWORD PTR [r13]
  4849. pext rax, rax, rdi
  4850. pext r9, r9, r15
  4851. add rax, r9
  4852. mov r9, QWORD PTR [r12+8]
  4853. mov r10, QWORD PTR [r13+8]
  4854. pext r9, r9, rdi
  4855. pext r10, r10, r15
  4856. mov QWORD PTR [rsi], rax
  4857. adc r9, r10
  4858. mov r10, QWORD PTR [r12+16]
  4859. mov rax, QWORD PTR [r13+16]
  4860. pext r10, r10, rdi
  4861. pext rax, rax, r15
  4862. mov QWORD PTR [rsi+8], r9
  4863. adc r10, rax
  4864. mov rax, QWORD PTR [r12+24]
  4865. mov r9, QWORD PTR [r13+24]
  4866. pext rax, rax, rdi
  4867. pext r9, r9, r15
  4868. mov QWORD PTR [rsi+16], r10
  4869. adc rax, r9
  4870. mov r9, QWORD PTR [r12+32]
  4871. mov r10, QWORD PTR [r13+32]
  4872. pext r9, r9, rdi
  4873. pext r10, r10, r15
  4874. mov QWORD PTR [rsi+24], rax
  4875. adc r9, r10
  4876. mov r10, QWORD PTR [r12+40]
  4877. mov rax, QWORD PTR [r13+40]
  4878. pext r10, r10, rdi
  4879. pext rax, rax, r15
  4880. mov QWORD PTR [rsi+32], r9
  4881. adc r10, rax
  4882. mov rax, QWORD PTR [r12+48]
  4883. mov r9, QWORD PTR [r13+48]
  4884. pext rax, rax, rdi
  4885. pext r9, r9, r15
  4886. mov QWORD PTR [rsi+40], r10
  4887. adc rax, r9
  4888. mov r9, QWORD PTR [r12+56]
  4889. mov r10, QWORD PTR [r13+56]
  4890. pext r9, r9, rdi
  4891. pext r10, r10, r15
  4892. mov QWORD PTR [rsi+48], rax
  4893. adc r9, r10
  4894. mov r10, QWORD PTR [r12+64]
  4895. mov rax, QWORD PTR [r13+64]
  4896. pext r10, r10, rdi
  4897. pext rax, rax, r15
  4898. mov QWORD PTR [rsi+56], r9
  4899. adc r10, rax
  4900. mov rax, QWORD PTR [r12+72]
  4901. mov r9, QWORD PTR [r13+72]
  4902. pext rax, rax, rdi
  4903. pext r9, r9, r15
  4904. mov QWORD PTR [rsi+64], r10
  4905. adc rax, r9
  4906. mov r9, QWORD PTR [r12+80]
  4907. mov r10, QWORD PTR [r13+80]
  4908. pext r9, r9, rdi
  4909. pext r10, r10, r15
  4910. mov QWORD PTR [rsi+72], rax
  4911. adc r9, r10
  4912. mov r10, QWORD PTR [r12+88]
  4913. mov rax, QWORD PTR [r13+88]
  4914. pext r10, r10, rdi
  4915. pext rax, rax, r15
  4916. mov QWORD PTR [rsi+80], r9
  4917. adc r10, rax
  4918. mov rax, QWORD PTR [r12+96]
  4919. mov r9, QWORD PTR [r13+96]
  4920. pext rax, rax, rdi
  4921. pext r9, r9, r15
  4922. mov QWORD PTR [rsi+88], r10
  4923. adc rax, r9
  4924. mov r9, QWORD PTR [r12+104]
  4925. mov r10, QWORD PTR [r13+104]
  4926. pext r9, r9, rdi
  4927. pext r10, r10, r15
  4928. mov QWORD PTR [rsi+96], rax
  4929. adc r9, r10
  4930. mov r10, QWORD PTR [r12+112]
  4931. mov rax, QWORD PTR [r13+112]
  4932. pext r10, r10, rdi
  4933. pext rax, rax, r15
  4934. mov QWORD PTR [rsi+104], r9
  4935. adc r10, rax
  4936. mov rax, QWORD PTR [r12+120]
  4937. mov r9, QWORD PTR [r13+120]
  4938. pext rax, rax, rdi
  4939. pext r9, r9, r15
  4940. mov QWORD PTR [rsi+112], r10
  4941. adc rax, r9
  4942. mov QWORD PTR [rsi+120], rax
  4943. adc r11, 0
  4944. lea r13, QWORD PTR [rsp+256]
  4945. mov r12, rsp
  4946. mov rax, QWORD PTR [r12]
  4947. sub rax, QWORD PTR [r13]
  4948. mov r9, QWORD PTR [r12+8]
  4949. mov QWORD PTR [r12], rax
  4950. sbb r9, QWORD PTR [r13+8]
  4951. mov r10, QWORD PTR [r12+16]
  4952. mov QWORD PTR [r12+8], r9
  4953. sbb r10, QWORD PTR [r13+16]
  4954. mov rax, QWORD PTR [r12+24]
  4955. mov QWORD PTR [r12+16], r10
  4956. sbb rax, QWORD PTR [r13+24]
  4957. mov r9, QWORD PTR [r12+32]
  4958. mov QWORD PTR [r12+24], rax
  4959. sbb r9, QWORD PTR [r13+32]
  4960. mov r10, QWORD PTR [r12+40]
  4961. mov QWORD PTR [r12+32], r9
  4962. sbb r10, QWORD PTR [r13+40]
  4963. mov rax, QWORD PTR [r12+48]
  4964. mov QWORD PTR [r12+40], r10
  4965. sbb rax, QWORD PTR [r13+48]
  4966. mov r9, QWORD PTR [r12+56]
  4967. mov QWORD PTR [r12+48], rax
  4968. sbb r9, QWORD PTR [r13+56]
  4969. mov r10, QWORD PTR [r12+64]
  4970. mov QWORD PTR [r12+56], r9
  4971. sbb r10, QWORD PTR [r13+64]
  4972. mov rax, QWORD PTR [r12+72]
  4973. mov QWORD PTR [r12+64], r10
  4974. sbb rax, QWORD PTR [r13+72]
  4975. mov r9, QWORD PTR [r12+80]
  4976. mov QWORD PTR [r12+72], rax
  4977. sbb r9, QWORD PTR [r13+80]
  4978. mov r10, QWORD PTR [r12+88]
  4979. mov QWORD PTR [r12+80], r9
  4980. sbb r10, QWORD PTR [r13+88]
  4981. mov rax, QWORD PTR [r12+96]
  4982. mov QWORD PTR [r12+88], r10
  4983. sbb rax, QWORD PTR [r13+96]
  4984. mov r9, QWORD PTR [r12+104]
  4985. mov QWORD PTR [r12+96], rax
  4986. sbb r9, QWORD PTR [r13+104]
  4987. mov r10, QWORD PTR [r12+112]
  4988. mov QWORD PTR [r12+104], r9
  4989. sbb r10, QWORD PTR [r13+112]
  4990. mov rax, QWORD PTR [r12+120]
  4991. mov QWORD PTR [r12+112], r10
  4992. sbb rax, QWORD PTR [r13+120]
  4993. mov r9, QWORD PTR [r12+128]
  4994. mov QWORD PTR [r12+120], rax
  4995. sbb r9, QWORD PTR [r13+128]
  4996. mov r10, QWORD PTR [r12+136]
  4997. mov QWORD PTR [r12+128], r9
  4998. sbb r10, QWORD PTR [r13+136]
  4999. mov rax, QWORD PTR [r12+144]
  5000. mov QWORD PTR [r12+136], r10
  5001. sbb rax, QWORD PTR [r13+144]
  5002. mov r9, QWORD PTR [r12+152]
  5003. mov QWORD PTR [r12+144], rax
  5004. sbb r9, QWORD PTR [r13+152]
  5005. mov r10, QWORD PTR [r12+160]
  5006. mov QWORD PTR [r12+152], r9
  5007. sbb r10, QWORD PTR [r13+160]
  5008. mov rax, QWORD PTR [r12+168]
  5009. mov QWORD PTR [r12+160], r10
  5010. sbb rax, QWORD PTR [r13+168]
  5011. mov r9, QWORD PTR [r12+176]
  5012. mov QWORD PTR [r12+168], rax
  5013. sbb r9, QWORD PTR [r13+176]
  5014. mov r10, QWORD PTR [r12+184]
  5015. mov QWORD PTR [r12+176], r9
  5016. sbb r10, QWORD PTR [r13+184]
  5017. mov rax, QWORD PTR [r12+192]
  5018. mov QWORD PTR [r12+184], r10
  5019. sbb rax, QWORD PTR [r13+192]
  5020. mov r9, QWORD PTR [r12+200]
  5021. mov QWORD PTR [r12+192], rax
  5022. sbb r9, QWORD PTR [r13+200]
  5023. mov r10, QWORD PTR [r12+208]
  5024. mov QWORD PTR [r12+200], r9
  5025. sbb r10, QWORD PTR [r13+208]
  5026. mov rax, QWORD PTR [r12+216]
  5027. mov QWORD PTR [r12+208], r10
  5028. sbb rax, QWORD PTR [r13+216]
  5029. mov r9, QWORD PTR [r12+224]
  5030. mov QWORD PTR [r12+216], rax
  5031. sbb r9, QWORD PTR [r13+224]
  5032. mov r10, QWORD PTR [r12+232]
  5033. mov QWORD PTR [r12+224], r9
  5034. sbb r10, QWORD PTR [r13+232]
  5035. mov rax, QWORD PTR [r12+240]
  5036. mov QWORD PTR [r12+232], r10
  5037. sbb rax, QWORD PTR [r13+240]
  5038. mov r9, QWORD PTR [r12+248]
  5039. mov QWORD PTR [r12+240], rax
  5040. sbb r9, QWORD PTR [r13+248]
  5041. mov QWORD PTR [r12+248], r9
  5042. sbb r11, 0
  5043. mov rax, QWORD PTR [r12]
  5044. sub rax, QWORD PTR [rcx]
  5045. mov r9, QWORD PTR [r12+8]
  5046. mov QWORD PTR [r12], rax
  5047. sbb r9, QWORD PTR [rcx+8]
  5048. mov r10, QWORD PTR [r12+16]
  5049. mov QWORD PTR [r12+8], r9
  5050. sbb r10, QWORD PTR [rcx+16]
  5051. mov rax, QWORD PTR [r12+24]
  5052. mov QWORD PTR [r12+16], r10
  5053. sbb rax, QWORD PTR [rcx+24]
  5054. mov r9, QWORD PTR [r12+32]
  5055. mov QWORD PTR [r12+24], rax
  5056. sbb r9, QWORD PTR [rcx+32]
  5057. mov r10, QWORD PTR [r12+40]
  5058. mov QWORD PTR [r12+32], r9
  5059. sbb r10, QWORD PTR [rcx+40]
  5060. mov rax, QWORD PTR [r12+48]
  5061. mov QWORD PTR [r12+40], r10
  5062. sbb rax, QWORD PTR [rcx+48]
  5063. mov r9, QWORD PTR [r12+56]
  5064. mov QWORD PTR [r12+48], rax
  5065. sbb r9, QWORD PTR [rcx+56]
  5066. mov r10, QWORD PTR [r12+64]
  5067. mov QWORD PTR [r12+56], r9
  5068. sbb r10, QWORD PTR [rcx+64]
  5069. mov rax, QWORD PTR [r12+72]
  5070. mov QWORD PTR [r12+64], r10
  5071. sbb rax, QWORD PTR [rcx+72]
  5072. mov r9, QWORD PTR [r12+80]
  5073. mov QWORD PTR [r12+72], rax
  5074. sbb r9, QWORD PTR [rcx+80]
  5075. mov r10, QWORD PTR [r12+88]
  5076. mov QWORD PTR [r12+80], r9
  5077. sbb r10, QWORD PTR [rcx+88]
  5078. mov rax, QWORD PTR [r12+96]
  5079. mov QWORD PTR [r12+88], r10
  5080. sbb rax, QWORD PTR [rcx+96]
  5081. mov r9, QWORD PTR [r12+104]
  5082. mov QWORD PTR [r12+96], rax
  5083. sbb r9, QWORD PTR [rcx+104]
  5084. mov r10, QWORD PTR [r12+112]
  5085. mov QWORD PTR [r12+104], r9
  5086. sbb r10, QWORD PTR [rcx+112]
  5087. mov rax, QWORD PTR [r12+120]
  5088. mov QWORD PTR [r12+112], r10
  5089. sbb rax, QWORD PTR [rcx+120]
  5090. mov r9, QWORD PTR [r12+128]
  5091. mov QWORD PTR [r12+120], rax
  5092. sbb r9, QWORD PTR [rcx+128]
  5093. mov r10, QWORD PTR [r12+136]
  5094. mov QWORD PTR [r12+128], r9
  5095. sbb r10, QWORD PTR [rcx+136]
  5096. mov rax, QWORD PTR [r12+144]
  5097. mov QWORD PTR [r12+136], r10
  5098. sbb rax, QWORD PTR [rcx+144]
  5099. mov r9, QWORD PTR [r12+152]
  5100. mov QWORD PTR [r12+144], rax
  5101. sbb r9, QWORD PTR [rcx+152]
  5102. mov r10, QWORD PTR [r12+160]
  5103. mov QWORD PTR [r12+152], r9
  5104. sbb r10, QWORD PTR [rcx+160]
  5105. mov rax, QWORD PTR [r12+168]
  5106. mov QWORD PTR [r12+160], r10
  5107. sbb rax, QWORD PTR [rcx+168]
  5108. mov r9, QWORD PTR [r12+176]
  5109. mov QWORD PTR [r12+168], rax
  5110. sbb r9, QWORD PTR [rcx+176]
  5111. mov r10, QWORD PTR [r12+184]
  5112. mov QWORD PTR [r12+176], r9
  5113. sbb r10, QWORD PTR [rcx+184]
  5114. mov rax, QWORD PTR [r12+192]
  5115. mov QWORD PTR [r12+184], r10
  5116. sbb rax, QWORD PTR [rcx+192]
  5117. mov r9, QWORD PTR [r12+200]
  5118. mov QWORD PTR [r12+192], rax
  5119. sbb r9, QWORD PTR [rcx+200]
  5120. mov r10, QWORD PTR [r12+208]
  5121. mov QWORD PTR [r12+200], r9
  5122. sbb r10, QWORD PTR [rcx+208]
  5123. mov rax, QWORD PTR [r12+216]
  5124. mov QWORD PTR [r12+208], r10
  5125. sbb rax, QWORD PTR [rcx+216]
  5126. mov r9, QWORD PTR [r12+224]
  5127. mov QWORD PTR [r12+216], rax
  5128. sbb r9, QWORD PTR [rcx+224]
  5129. mov r10, QWORD PTR [r12+232]
  5130. mov QWORD PTR [r12+224], r9
  5131. sbb r10, QWORD PTR [rcx+232]
  5132. mov rax, QWORD PTR [r12+240]
  5133. mov QWORD PTR [r12+232], r10
  5134. sbb rax, QWORD PTR [rcx+240]
  5135. mov r9, QWORD PTR [r12+248]
  5136. mov QWORD PTR [r12+240], rax
  5137. sbb r9, QWORD PTR [rcx+248]
  5138. mov QWORD PTR [r12+248], r9
  5139. sbb r11, 0
  5140. sub rsi, 128
  5141. ; Add
  5142. mov rax, QWORD PTR [rsi]
  5143. add rax, QWORD PTR [r12]
  5144. mov r9, QWORD PTR [rsi+8]
  5145. mov QWORD PTR [rsi], rax
  5146. adc r9, QWORD PTR [r12+8]
  5147. mov r10, QWORD PTR [rsi+16]
  5148. mov QWORD PTR [rsi+8], r9
  5149. adc r10, QWORD PTR [r12+16]
  5150. mov rax, QWORD PTR [rsi+24]
  5151. mov QWORD PTR [rsi+16], r10
  5152. adc rax, QWORD PTR [r12+24]
  5153. mov r9, QWORD PTR [rsi+32]
  5154. mov QWORD PTR [rsi+24], rax
  5155. adc r9, QWORD PTR [r12+32]
  5156. mov r10, QWORD PTR [rsi+40]
  5157. mov QWORD PTR [rsi+32], r9
  5158. adc r10, QWORD PTR [r12+40]
  5159. mov rax, QWORD PTR [rsi+48]
  5160. mov QWORD PTR [rsi+40], r10
  5161. adc rax, QWORD PTR [r12+48]
  5162. mov r9, QWORD PTR [rsi+56]
  5163. mov QWORD PTR [rsi+48], rax
  5164. adc r9, QWORD PTR [r12+56]
  5165. mov r10, QWORD PTR [rsi+64]
  5166. mov QWORD PTR [rsi+56], r9
  5167. adc r10, QWORD PTR [r12+64]
  5168. mov rax, QWORD PTR [rsi+72]
  5169. mov QWORD PTR [rsi+64], r10
  5170. adc rax, QWORD PTR [r12+72]
  5171. mov r9, QWORD PTR [rsi+80]
  5172. mov QWORD PTR [rsi+72], rax
  5173. adc r9, QWORD PTR [r12+80]
  5174. mov r10, QWORD PTR [rsi+88]
  5175. mov QWORD PTR [rsi+80], r9
  5176. adc r10, QWORD PTR [r12+88]
  5177. mov rax, QWORD PTR [rsi+96]
  5178. mov QWORD PTR [rsi+88], r10
  5179. adc rax, QWORD PTR [r12+96]
  5180. mov r9, QWORD PTR [rsi+104]
  5181. mov QWORD PTR [rsi+96], rax
  5182. adc r9, QWORD PTR [r12+104]
  5183. mov r10, QWORD PTR [rsi+112]
  5184. mov QWORD PTR [rsi+104], r9
  5185. adc r10, QWORD PTR [r12+112]
  5186. mov rax, QWORD PTR [rsi+120]
  5187. mov QWORD PTR [rsi+112], r10
  5188. adc rax, QWORD PTR [r12+120]
  5189. mov r9, QWORD PTR [rsi+128]
  5190. mov QWORD PTR [rsi+120], rax
  5191. adc r9, QWORD PTR [r12+128]
  5192. mov r10, QWORD PTR [rsi+136]
  5193. mov QWORD PTR [rsi+128], r9
  5194. adc r10, QWORD PTR [r12+136]
  5195. mov rax, QWORD PTR [rsi+144]
  5196. mov QWORD PTR [rsi+136], r10
  5197. adc rax, QWORD PTR [r12+144]
  5198. mov r9, QWORD PTR [rsi+152]
  5199. mov QWORD PTR [rsi+144], rax
  5200. adc r9, QWORD PTR [r12+152]
  5201. mov r10, QWORD PTR [rsi+160]
  5202. mov QWORD PTR [rsi+152], r9
  5203. adc r10, QWORD PTR [r12+160]
  5204. mov rax, QWORD PTR [rsi+168]
  5205. mov QWORD PTR [rsi+160], r10
  5206. adc rax, QWORD PTR [r12+168]
  5207. mov r9, QWORD PTR [rsi+176]
  5208. mov QWORD PTR [rsi+168], rax
  5209. adc r9, QWORD PTR [r12+176]
  5210. mov r10, QWORD PTR [rsi+184]
  5211. mov QWORD PTR [rsi+176], r9
  5212. adc r10, QWORD PTR [r12+184]
  5213. mov rax, QWORD PTR [rsi+192]
  5214. mov QWORD PTR [rsi+184], r10
  5215. adc rax, QWORD PTR [r12+192]
  5216. mov r9, QWORD PTR [rsi+200]
  5217. mov QWORD PTR [rsi+192], rax
  5218. adc r9, QWORD PTR [r12+200]
  5219. mov r10, QWORD PTR [rsi+208]
  5220. mov QWORD PTR [rsi+200], r9
  5221. adc r10, QWORD PTR [r12+208]
  5222. mov rax, QWORD PTR [rsi+216]
  5223. mov QWORD PTR [rsi+208], r10
  5224. adc rax, QWORD PTR [r12+216]
  5225. mov r9, QWORD PTR [rsi+224]
  5226. mov QWORD PTR [rsi+216], rax
  5227. adc r9, QWORD PTR [r12+224]
  5228. mov r10, QWORD PTR [rsi+232]
  5229. mov QWORD PTR [rsi+224], r9
  5230. adc r10, QWORD PTR [r12+232]
  5231. mov rax, QWORD PTR [rsi+240]
  5232. mov QWORD PTR [rsi+232], r10
  5233. adc rax, QWORD PTR [r12+240]
  5234. mov r9, QWORD PTR [rsi+248]
  5235. mov QWORD PTR [rsi+240], rax
  5236. adc r9, QWORD PTR [r12+248]
  5237. mov QWORD PTR [rsi+248], r9
  5238. adc r11, 0
  5239. mov QWORD PTR [rcx+384], r11
  5240. add rsi, 128
  5241. ; Add
  5242. mov rax, QWORD PTR [rsi]
  5243. add rax, QWORD PTR [r13]
  5244. mov r9, QWORD PTR [rsi+8]
  5245. mov QWORD PTR [rsi], rax
  5246. adc r9, QWORD PTR [r13+8]
  5247. mov r10, QWORD PTR [rsi+16]
  5248. mov QWORD PTR [rsi+8], r9
  5249. adc r10, QWORD PTR [r13+16]
  5250. mov rax, QWORD PTR [rsi+24]
  5251. mov QWORD PTR [rsi+16], r10
  5252. adc rax, QWORD PTR [r13+24]
  5253. mov r9, QWORD PTR [rsi+32]
  5254. mov QWORD PTR [rsi+24], rax
  5255. adc r9, QWORD PTR [r13+32]
  5256. mov r10, QWORD PTR [rsi+40]
  5257. mov QWORD PTR [rsi+32], r9
  5258. adc r10, QWORD PTR [r13+40]
  5259. mov rax, QWORD PTR [rsi+48]
  5260. mov QWORD PTR [rsi+40], r10
  5261. adc rax, QWORD PTR [r13+48]
  5262. mov r9, QWORD PTR [rsi+56]
  5263. mov QWORD PTR [rsi+48], rax
  5264. adc r9, QWORD PTR [r13+56]
  5265. mov r10, QWORD PTR [rsi+64]
  5266. mov QWORD PTR [rsi+56], r9
  5267. adc r10, QWORD PTR [r13+64]
  5268. mov rax, QWORD PTR [rsi+72]
  5269. mov QWORD PTR [rsi+64], r10
  5270. adc rax, QWORD PTR [r13+72]
  5271. mov r9, QWORD PTR [rsi+80]
  5272. mov QWORD PTR [rsi+72], rax
  5273. adc r9, QWORD PTR [r13+80]
  5274. mov r10, QWORD PTR [rsi+88]
  5275. mov QWORD PTR [rsi+80], r9
  5276. adc r10, QWORD PTR [r13+88]
  5277. mov rax, QWORD PTR [rsi+96]
  5278. mov QWORD PTR [rsi+88], r10
  5279. adc rax, QWORD PTR [r13+96]
  5280. mov r9, QWORD PTR [rsi+104]
  5281. mov QWORD PTR [rsi+96], rax
  5282. adc r9, QWORD PTR [r13+104]
  5283. mov r10, QWORD PTR [rsi+112]
  5284. mov QWORD PTR [rsi+104], r9
  5285. adc r10, QWORD PTR [r13+112]
  5286. mov rax, QWORD PTR [rsi+120]
  5287. mov QWORD PTR [rsi+112], r10
  5288. adc rax, QWORD PTR [r13+120]
  5289. mov r9, QWORD PTR [rsi+128]
  5290. mov QWORD PTR [rsi+120], rax
  5291. adc r9, QWORD PTR [r13+128]
  5292. mov QWORD PTR [rsi+128], r9
  5293. ; Add to zero
  5294. mov rax, QWORD PTR [r13+136]
  5295. adc rax, 0
  5296. mov r9, QWORD PTR [r13+144]
  5297. mov QWORD PTR [rsi+136], rax
  5298. adc r9, 0
  5299. mov r10, QWORD PTR [r13+152]
  5300. mov QWORD PTR [rsi+144], r9
  5301. adc r10, 0
  5302. mov rax, QWORD PTR [r13+160]
  5303. mov QWORD PTR [rsi+152], r10
  5304. adc rax, 0
  5305. mov r9, QWORD PTR [r13+168]
  5306. mov QWORD PTR [rsi+160], rax
  5307. adc r9, 0
  5308. mov r10, QWORD PTR [r13+176]
  5309. mov QWORD PTR [rsi+168], r9
  5310. adc r10, 0
  5311. mov rax, QWORD PTR [r13+184]
  5312. mov QWORD PTR [rsi+176], r10
  5313. adc rax, 0
  5314. mov r9, QWORD PTR [r13+192]
  5315. mov QWORD PTR [rsi+184], rax
  5316. adc r9, 0
  5317. mov r10, QWORD PTR [r13+200]
  5318. mov QWORD PTR [rsi+192], r9
  5319. adc r10, 0
  5320. mov rax, QWORD PTR [r13+208]
  5321. mov QWORD PTR [rsi+200], r10
  5322. adc rax, 0
  5323. mov r9, QWORD PTR [r13+216]
  5324. mov QWORD PTR [rsi+208], rax
  5325. adc r9, 0
  5326. mov r10, QWORD PTR [r13+224]
  5327. mov QWORD PTR [rsi+216], r9
  5328. adc r10, 0
  5329. mov rax, QWORD PTR [r13+232]
  5330. mov QWORD PTR [rsi+224], r10
  5331. adc rax, 0
  5332. mov r9, QWORD PTR [r13+240]
  5333. mov QWORD PTR [rsi+232], rax
  5334. adc r9, 0
  5335. mov r10, QWORD PTR [r13+248]
  5336. mov QWORD PTR [rsi+240], r9
  5337. adc r10, 0
  5338. mov QWORD PTR [rsi+248], r10
  5339. add rsp, 808
  5340. pop rsi
  5341. pop rdi
  5342. pop r15
  5343. pop r14
  5344. pop r13
  5345. pop r12
  5346. ret
  5347. sp_2048_mul_avx2_32 ENDP
  5348. _text ENDS
  5349. ENDIF
  5350. ; /* Square a and put result in r. (r = a * a)
  5351. ; *
  5352. ; * r A single precision integer.
  5353. ; * a A single precision integer.
  5354. ; */
  5355. _text SEGMENT READONLY PARA
  5356. sp_2048_sqr_16 PROC
  5357. push r12
  5358. push r13
  5359. push r14
  5360. mov r8, rdx
  5361. sub rsp, 128
  5362. ; A[0] * A[0]
  5363. mov rax, QWORD PTR [r8]
  5364. mul rax
  5365. xor r11, r11
  5366. mov QWORD PTR [rsp], rax
  5367. mov r10, rdx
  5368. ; A[0] * A[1]
  5369. mov rax, QWORD PTR [r8+8]
  5370. mul QWORD PTR [r8]
  5371. xor r9, r9
  5372. add r10, rax
  5373. adc r11, rdx
  5374. adc r9, 0
  5375. add r10, rax
  5376. adc r11, rdx
  5377. adc r9, 0
  5378. mov QWORD PTR [rsp+8], r10
  5379. ; A[0] * A[2]
  5380. mov rax, QWORD PTR [r8+16]
  5381. mul QWORD PTR [r8]
  5382. xor r10, r10
  5383. add r11, rax
  5384. adc r9, rdx
  5385. adc r10, 0
  5386. add r11, rax
  5387. adc r9, rdx
  5388. adc r10, 0
  5389. ; A[1] * A[1]
  5390. mov rax, QWORD PTR [r8+8]
  5391. mul rax
  5392. add r11, rax
  5393. adc r9, rdx
  5394. adc r10, 0
  5395. mov QWORD PTR [rsp+16], r11
  5396. ; A[0] * A[3]
  5397. mov rax, QWORD PTR [r8+24]
  5398. mul QWORD PTR [r8]
  5399. xor r11, r11
  5400. add r9, rax
  5401. adc r10, rdx
  5402. adc r11, 0
  5403. add r9, rax
  5404. adc r10, rdx
  5405. adc r11, 0
  5406. ; A[1] * A[2]
  5407. mov rax, QWORD PTR [r8+16]
  5408. mul QWORD PTR [r8+8]
  5409. add r9, rax
  5410. adc r10, rdx
  5411. adc r11, 0
  5412. add r9, rax
  5413. adc r10, rdx
  5414. adc r11, 0
  5415. mov QWORD PTR [rsp+24], r9
  5416. ; A[0] * A[4]
  5417. mov rax, QWORD PTR [r8+32]
  5418. mul QWORD PTR [r8]
  5419. xor r9, r9
  5420. add r10, rax
  5421. adc r11, rdx
  5422. adc r9, 0
  5423. add r10, rax
  5424. adc r11, rdx
  5425. adc r9, 0
  5426. ; A[1] * A[3]
  5427. mov rax, QWORD PTR [r8+24]
  5428. mul QWORD PTR [r8+8]
  5429. add r10, rax
  5430. adc r11, rdx
  5431. adc r9, 0
  5432. add r10, rax
  5433. adc r11, rdx
  5434. adc r9, 0
  5435. ; A[2] * A[2]
  5436. mov rax, QWORD PTR [r8+16]
  5437. mul rax
  5438. add r10, rax
  5439. adc r11, rdx
  5440. adc r9, 0
  5441. mov QWORD PTR [rsp+32], r10
  5442. ; A[0] * A[5]
  5443. mov rax, QWORD PTR [r8+40]
  5444. mul QWORD PTR [r8]
  5445. xor r10, r10
  5446. xor r14, r14
  5447. mov r12, rax
  5448. mov r13, rdx
  5449. ; A[1] * A[4]
  5450. mov rax, QWORD PTR [r8+32]
  5451. mul QWORD PTR [r8+8]
  5452. add r12, rax
  5453. adc r13, rdx
  5454. adc r14, 0
  5455. ; A[2] * A[3]
  5456. mov rax, QWORD PTR [r8+24]
  5457. mul QWORD PTR [r8+16]
  5458. add r12, rax
  5459. adc r13, rdx
  5460. adc r14, 0
  5461. add r12, r12
  5462. adc r13, r13
  5463. adc r14, r14
  5464. add r11, r12
  5465. adc r9, r13
  5466. adc r10, r14
  5467. mov QWORD PTR [rsp+40], r11
  5468. ; A[0] * A[6]
  5469. mov rax, QWORD PTR [r8+48]
  5470. mul QWORD PTR [r8]
  5471. xor r11, r11
  5472. xor r14, r14
  5473. mov r12, rax
  5474. mov r13, rdx
  5475. ; A[1] * A[5]
  5476. mov rax, QWORD PTR [r8+40]
  5477. mul QWORD PTR [r8+8]
  5478. add r12, rax
  5479. adc r13, rdx
  5480. adc r14, 0
  5481. ; A[2] * A[4]
  5482. mov rax, QWORD PTR [r8+32]
  5483. mul QWORD PTR [r8+16]
  5484. add r12, rax
  5485. adc r13, rdx
  5486. adc r14, 0
  5487. ; A[3] * A[3]
  5488. mov rax, QWORD PTR [r8+24]
  5489. mul rax
  5490. add r12, r12
  5491. adc r13, r13
  5492. adc r14, r14
  5493. add r12, rax
  5494. adc r13, rdx
  5495. adc r14, 0
  5496. add r9, r12
  5497. adc r10, r13
  5498. adc r11, r14
  5499. mov QWORD PTR [rsp+48], r9
  5500. ; A[0] * A[7]
  5501. mov rax, QWORD PTR [r8+56]
  5502. mul QWORD PTR [r8]
  5503. xor r9, r9
  5504. xor r14, r14
  5505. mov r12, rax
  5506. mov r13, rdx
  5507. ; A[1] * A[6]
  5508. mov rax, QWORD PTR [r8+48]
  5509. mul QWORD PTR [r8+8]
  5510. add r12, rax
  5511. adc r13, rdx
  5512. adc r14, 0
  5513. ; A[2] * A[5]
  5514. mov rax, QWORD PTR [r8+40]
  5515. mul QWORD PTR [r8+16]
  5516. add r12, rax
  5517. adc r13, rdx
  5518. adc r14, 0
  5519. ; A[3] * A[4]
  5520. mov rax, QWORD PTR [r8+32]
  5521. mul QWORD PTR [r8+24]
  5522. add r12, rax
  5523. adc r13, rdx
  5524. adc r14, 0
  5525. add r12, r12
  5526. adc r13, r13
  5527. adc r14, r14
  5528. add r10, r12
  5529. adc r11, r13
  5530. adc r9, r14
  5531. mov QWORD PTR [rsp+56], r10
  5532. ; A[0] * A[8]
  5533. mov rax, QWORD PTR [r8+64]
  5534. mul QWORD PTR [r8]
  5535. xor r10, r10
  5536. xor r14, r14
  5537. mov r12, rax
  5538. mov r13, rdx
  5539. ; A[1] * A[7]
  5540. mov rax, QWORD PTR [r8+56]
  5541. mul QWORD PTR [r8+8]
  5542. add r12, rax
  5543. adc r13, rdx
  5544. adc r14, 0
  5545. ; A[2] * A[6]
  5546. mov rax, QWORD PTR [r8+48]
  5547. mul QWORD PTR [r8+16]
  5548. add r12, rax
  5549. adc r13, rdx
  5550. adc r14, 0
  5551. ; A[3] * A[5]
  5552. mov rax, QWORD PTR [r8+40]
  5553. mul QWORD PTR [r8+24]
  5554. add r12, rax
  5555. adc r13, rdx
  5556. adc r14, 0
  5557. ; A[4] * A[4]
  5558. mov rax, QWORD PTR [r8+32]
  5559. mul rax
  5560. add r12, r12
  5561. adc r13, r13
  5562. adc r14, r14
  5563. add r12, rax
  5564. adc r13, rdx
  5565. adc r14, 0
  5566. add r11, r12
  5567. adc r9, r13
  5568. adc r10, r14
  5569. mov QWORD PTR [rsp+64], r11
  5570. ; A[0] * A[9]
  5571. mov rax, QWORD PTR [r8+72]
  5572. mul QWORD PTR [r8]
  5573. xor r11, r11
  5574. xor r14, r14
  5575. mov r12, rax
  5576. mov r13, rdx
  5577. ; A[1] * A[8]
  5578. mov rax, QWORD PTR [r8+64]
  5579. mul QWORD PTR [r8+8]
  5580. add r12, rax
  5581. adc r13, rdx
  5582. adc r14, 0
  5583. ; A[2] * A[7]
  5584. mov rax, QWORD PTR [r8+56]
  5585. mul QWORD PTR [r8+16]
  5586. add r12, rax
  5587. adc r13, rdx
  5588. adc r14, 0
  5589. ; A[3] * A[6]
  5590. mov rax, QWORD PTR [r8+48]
  5591. mul QWORD PTR [r8+24]
  5592. add r12, rax
  5593. adc r13, rdx
  5594. adc r14, 0
  5595. ; A[4] * A[5]
  5596. mov rax, QWORD PTR [r8+40]
  5597. mul QWORD PTR [r8+32]
  5598. add r12, rax
  5599. adc r13, rdx
  5600. adc r14, 0
  5601. add r12, r12
  5602. adc r13, r13
  5603. adc r14, r14
  5604. add r9, r12
  5605. adc r10, r13
  5606. adc r11, r14
  5607. mov QWORD PTR [rsp+72], r9
  5608. ; A[0] * A[10]
  5609. mov rax, QWORD PTR [r8+80]
  5610. mul QWORD PTR [r8]
  5611. xor r9, r9
  5612. xor r14, r14
  5613. mov r12, rax
  5614. mov r13, rdx
  5615. ; A[1] * A[9]
  5616. mov rax, QWORD PTR [r8+72]
  5617. mul QWORD PTR [r8+8]
  5618. add r12, rax
  5619. adc r13, rdx
  5620. adc r14, 0
  5621. ; A[2] * A[8]
  5622. mov rax, QWORD PTR [r8+64]
  5623. mul QWORD PTR [r8+16]
  5624. add r12, rax
  5625. adc r13, rdx
  5626. adc r14, 0
  5627. ; A[3] * A[7]
  5628. mov rax, QWORD PTR [r8+56]
  5629. mul QWORD PTR [r8+24]
  5630. add r12, rax
  5631. adc r13, rdx
  5632. adc r14, 0
  5633. ; A[4] * A[6]
  5634. mov rax, QWORD PTR [r8+48]
  5635. mul QWORD PTR [r8+32]
  5636. add r12, rax
  5637. adc r13, rdx
  5638. adc r14, 0
  5639. ; A[5] * A[5]
  5640. mov rax, QWORD PTR [r8+40]
  5641. mul rax
  5642. add r12, r12
  5643. adc r13, r13
  5644. adc r14, r14
  5645. add r12, rax
  5646. adc r13, rdx
  5647. adc r14, 0
  5648. add r10, r12
  5649. adc r11, r13
  5650. adc r9, r14
  5651. mov QWORD PTR [rsp+80], r10
  5652. ; A[0] * A[11]
  5653. mov rax, QWORD PTR [r8+88]
  5654. mul QWORD PTR [r8]
  5655. xor r10, r10
  5656. xor r14, r14
  5657. mov r12, rax
  5658. mov r13, rdx
  5659. ; A[1] * A[10]
  5660. mov rax, QWORD PTR [r8+80]
  5661. mul QWORD PTR [r8+8]
  5662. add r12, rax
  5663. adc r13, rdx
  5664. adc r14, 0
  5665. ; A[2] * A[9]
  5666. mov rax, QWORD PTR [r8+72]
  5667. mul QWORD PTR [r8+16]
  5668. add r12, rax
  5669. adc r13, rdx
  5670. adc r14, 0
  5671. ; A[3] * A[8]
  5672. mov rax, QWORD PTR [r8+64]
  5673. mul QWORD PTR [r8+24]
  5674. add r12, rax
  5675. adc r13, rdx
  5676. adc r14, 0
  5677. ; A[4] * A[7]
  5678. mov rax, QWORD PTR [r8+56]
  5679. mul QWORD PTR [r8+32]
  5680. add r12, rax
  5681. adc r13, rdx
  5682. adc r14, 0
  5683. ; A[5] * A[6]
  5684. mov rax, QWORD PTR [r8+48]
  5685. mul QWORD PTR [r8+40]
  5686. add r12, rax
  5687. adc r13, rdx
  5688. adc r14, 0
  5689. add r12, r12
  5690. adc r13, r13
  5691. adc r14, r14
  5692. add r11, r12
  5693. adc r9, r13
  5694. adc r10, r14
  5695. mov QWORD PTR [rsp+88], r11
  5696. ; A[0] * A[12]
  5697. mov rax, QWORD PTR [r8+96]
  5698. mul QWORD PTR [r8]
  5699. xor r11, r11
  5700. xor r14, r14
  5701. mov r12, rax
  5702. mov r13, rdx
  5703. ; A[1] * A[11]
  5704. mov rax, QWORD PTR [r8+88]
  5705. mul QWORD PTR [r8+8]
  5706. add r12, rax
  5707. adc r13, rdx
  5708. adc r14, 0
  5709. ; A[2] * A[10]
  5710. mov rax, QWORD PTR [r8+80]
  5711. mul QWORD PTR [r8+16]
  5712. add r12, rax
  5713. adc r13, rdx
  5714. adc r14, 0
  5715. ; A[3] * A[9]
  5716. mov rax, QWORD PTR [r8+72]
  5717. mul QWORD PTR [r8+24]
  5718. add r12, rax
  5719. adc r13, rdx
  5720. adc r14, 0
  5721. ; A[4] * A[8]
  5722. mov rax, QWORD PTR [r8+64]
  5723. mul QWORD PTR [r8+32]
  5724. add r12, rax
  5725. adc r13, rdx
  5726. adc r14, 0
  5727. ; A[5] * A[7]
  5728. mov rax, QWORD PTR [r8+56]
  5729. mul QWORD PTR [r8+40]
  5730. add r12, rax
  5731. adc r13, rdx
  5732. adc r14, 0
  5733. ; A[6] * A[6]
  5734. mov rax, QWORD PTR [r8+48]
  5735. mul rax
  5736. add r12, r12
  5737. adc r13, r13
  5738. adc r14, r14
  5739. add r12, rax
  5740. adc r13, rdx
  5741. adc r14, 0
  5742. add r9, r12
  5743. adc r10, r13
  5744. adc r11, r14
  5745. mov QWORD PTR [rsp+96], r9
  5746. ; A[0] * A[13]
  5747. mov rax, QWORD PTR [r8+104]
  5748. mul QWORD PTR [r8]
  5749. xor r9, r9
  5750. xor r14, r14
  5751. mov r12, rax
  5752. mov r13, rdx
  5753. ; A[1] * A[12]
  5754. mov rax, QWORD PTR [r8+96]
  5755. mul QWORD PTR [r8+8]
  5756. add r12, rax
  5757. adc r13, rdx
  5758. adc r14, 0
  5759. ; A[2] * A[11]
  5760. mov rax, QWORD PTR [r8+88]
  5761. mul QWORD PTR [r8+16]
  5762. add r12, rax
  5763. adc r13, rdx
  5764. adc r14, 0
  5765. ; A[3] * A[10]
  5766. mov rax, QWORD PTR [r8+80]
  5767. mul QWORD PTR [r8+24]
  5768. add r12, rax
  5769. adc r13, rdx
  5770. adc r14, 0
  5771. ; A[4] * A[9]
  5772. mov rax, QWORD PTR [r8+72]
  5773. mul QWORD PTR [r8+32]
  5774. add r12, rax
  5775. adc r13, rdx
  5776. adc r14, 0
  5777. ; A[5] * A[8]
  5778. mov rax, QWORD PTR [r8+64]
  5779. mul QWORD PTR [r8+40]
  5780. add r12, rax
  5781. adc r13, rdx
  5782. adc r14, 0
  5783. ; A[6] * A[7]
  5784. mov rax, QWORD PTR [r8+56]
  5785. mul QWORD PTR [r8+48]
  5786. add r12, rax
  5787. adc r13, rdx
  5788. adc r14, 0
  5789. add r12, r12
  5790. adc r13, r13
  5791. adc r14, r14
  5792. add r10, r12
  5793. adc r11, r13
  5794. adc r9, r14
  5795. mov QWORD PTR [rsp+104], r10
  5796. ; A[0] * A[14]
  5797. mov rax, QWORD PTR [r8+112]
  5798. mul QWORD PTR [r8]
  5799. xor r10, r10
  5800. xor r14, r14
  5801. mov r12, rax
  5802. mov r13, rdx
  5803. ; A[1] * A[13]
  5804. mov rax, QWORD PTR [r8+104]
  5805. mul QWORD PTR [r8+8]
  5806. add r12, rax
  5807. adc r13, rdx
  5808. adc r14, 0
  5809. ; A[2] * A[12]
  5810. mov rax, QWORD PTR [r8+96]
  5811. mul QWORD PTR [r8+16]
  5812. add r12, rax
  5813. adc r13, rdx
  5814. adc r14, 0
  5815. ; A[3] * A[11]
  5816. mov rax, QWORD PTR [r8+88]
  5817. mul QWORD PTR [r8+24]
  5818. add r12, rax
  5819. adc r13, rdx
  5820. adc r14, 0
  5821. ; A[4] * A[10]
  5822. mov rax, QWORD PTR [r8+80]
  5823. mul QWORD PTR [r8+32]
  5824. add r12, rax
  5825. adc r13, rdx
  5826. adc r14, 0
  5827. ; A[5] * A[9]
  5828. mov rax, QWORD PTR [r8+72]
  5829. mul QWORD PTR [r8+40]
  5830. add r12, rax
  5831. adc r13, rdx
  5832. adc r14, 0
  5833. ; A[6] * A[8]
  5834. mov rax, QWORD PTR [r8+64]
  5835. mul QWORD PTR [r8+48]
  5836. add r12, rax
  5837. adc r13, rdx
  5838. adc r14, 0
  5839. ; A[7] * A[7]
  5840. mov rax, QWORD PTR [r8+56]
  5841. mul rax
  5842. add r12, r12
  5843. adc r13, r13
  5844. adc r14, r14
  5845. add r12, rax
  5846. adc r13, rdx
  5847. adc r14, 0
  5848. add r11, r12
  5849. adc r9, r13
  5850. adc r10, r14
  5851. mov QWORD PTR [rsp+112], r11
  5852. ; A[0] * A[15]
  5853. mov rax, QWORD PTR [r8+120]
  5854. mul QWORD PTR [r8]
  5855. xor r11, r11
  5856. xor r14, r14
  5857. mov r12, rax
  5858. mov r13, rdx
  5859. ; A[1] * A[14]
  5860. mov rax, QWORD PTR [r8+112]
  5861. mul QWORD PTR [r8+8]
  5862. add r12, rax
  5863. adc r13, rdx
  5864. adc r14, 0
  5865. ; A[2] * A[13]
  5866. mov rax, QWORD PTR [r8+104]
  5867. mul QWORD PTR [r8+16]
  5868. add r12, rax
  5869. adc r13, rdx
  5870. adc r14, 0
  5871. ; A[3] * A[12]
  5872. mov rax, QWORD PTR [r8+96]
  5873. mul QWORD PTR [r8+24]
  5874. add r12, rax
  5875. adc r13, rdx
  5876. adc r14, 0
  5877. ; A[4] * A[11]
  5878. mov rax, QWORD PTR [r8+88]
  5879. mul QWORD PTR [r8+32]
  5880. add r12, rax
  5881. adc r13, rdx
  5882. adc r14, 0
  5883. ; A[5] * A[10]
  5884. mov rax, QWORD PTR [r8+80]
  5885. mul QWORD PTR [r8+40]
  5886. add r12, rax
  5887. adc r13, rdx
  5888. adc r14, 0
  5889. ; A[6] * A[9]
  5890. mov rax, QWORD PTR [r8+72]
  5891. mul QWORD PTR [r8+48]
  5892. add r12, rax
  5893. adc r13, rdx
  5894. adc r14, 0
  5895. ; A[7] * A[8]
  5896. mov rax, QWORD PTR [r8+64]
  5897. mul QWORD PTR [r8+56]
  5898. add r12, rax
  5899. adc r13, rdx
  5900. adc r14, 0
  5901. add r12, r12
  5902. adc r13, r13
  5903. adc r14, r14
  5904. add r9, r12
  5905. adc r10, r13
  5906. adc r11, r14
  5907. mov QWORD PTR [rsp+120], r9
  5908. ; A[1] * A[15]
  5909. mov rax, QWORD PTR [r8+120]
  5910. mul QWORD PTR [r8+8]
  5911. xor r9, r9
  5912. xor r14, r14
  5913. mov r12, rax
  5914. mov r13, rdx
  5915. ; A[2] * A[14]
  5916. mov rax, QWORD PTR [r8+112]
  5917. mul QWORD PTR [r8+16]
  5918. add r12, rax
  5919. adc r13, rdx
  5920. adc r14, 0
  5921. ; A[3] * A[13]
  5922. mov rax, QWORD PTR [r8+104]
  5923. mul QWORD PTR [r8+24]
  5924. add r12, rax
  5925. adc r13, rdx
  5926. adc r14, 0
  5927. ; A[4] * A[12]
  5928. mov rax, QWORD PTR [r8+96]
  5929. mul QWORD PTR [r8+32]
  5930. add r12, rax
  5931. adc r13, rdx
  5932. adc r14, 0
  5933. ; A[5] * A[11]
  5934. mov rax, QWORD PTR [r8+88]
  5935. mul QWORD PTR [r8+40]
  5936. add r12, rax
  5937. adc r13, rdx
  5938. adc r14, 0
  5939. ; A[6] * A[10]
  5940. mov rax, QWORD PTR [r8+80]
  5941. mul QWORD PTR [r8+48]
  5942. add r12, rax
  5943. adc r13, rdx
  5944. adc r14, 0
  5945. ; A[7] * A[9]
  5946. mov rax, QWORD PTR [r8+72]
  5947. mul QWORD PTR [r8+56]
  5948. add r12, rax
  5949. adc r13, rdx
  5950. adc r14, 0
  5951. ; A[8] * A[8]
  5952. mov rax, QWORD PTR [r8+64]
  5953. mul rax
  5954. add r12, r12
  5955. adc r13, r13
  5956. adc r14, r14
  5957. add r12, rax
  5958. adc r13, rdx
  5959. adc r14, 0
  5960. add r10, r12
  5961. adc r11, r13
  5962. adc r9, r14
  5963. mov QWORD PTR [rcx+128], r10
  5964. ; A[2] * A[15]
  5965. mov rax, QWORD PTR [r8+120]
  5966. mul QWORD PTR [r8+16]
  5967. xor r10, r10
  5968. xor r14, r14
  5969. mov r12, rax
  5970. mov r13, rdx
  5971. ; A[3] * A[14]
  5972. mov rax, QWORD PTR [r8+112]
  5973. mul QWORD PTR [r8+24]
  5974. add r12, rax
  5975. adc r13, rdx
  5976. adc r14, 0
  5977. ; A[4] * A[13]
  5978. mov rax, QWORD PTR [r8+104]
  5979. mul QWORD PTR [r8+32]
  5980. add r12, rax
  5981. adc r13, rdx
  5982. adc r14, 0
  5983. ; A[5] * A[12]
  5984. mov rax, QWORD PTR [r8+96]
  5985. mul QWORD PTR [r8+40]
  5986. add r12, rax
  5987. adc r13, rdx
  5988. adc r14, 0
  5989. ; A[6] * A[11]
  5990. mov rax, QWORD PTR [r8+88]
  5991. mul QWORD PTR [r8+48]
  5992. add r12, rax
  5993. adc r13, rdx
  5994. adc r14, 0
  5995. ; A[7] * A[10]
  5996. mov rax, QWORD PTR [r8+80]
  5997. mul QWORD PTR [r8+56]
  5998. add r12, rax
  5999. adc r13, rdx
  6000. adc r14, 0
  6001. ; A[8] * A[9]
  6002. mov rax, QWORD PTR [r8+72]
  6003. mul QWORD PTR [r8+64]
  6004. add r12, rax
  6005. adc r13, rdx
  6006. adc r14, 0
  6007. add r12, r12
  6008. adc r13, r13
  6009. adc r14, r14
  6010. add r11, r12
  6011. adc r9, r13
  6012. adc r10, r14
  6013. mov QWORD PTR [rcx+136], r11
  6014. ; A[3] * A[15]
  6015. mov rax, QWORD PTR [r8+120]
  6016. mul QWORD PTR [r8+24]
  6017. xor r11, r11
  6018. xor r14, r14
  6019. mov r12, rax
  6020. mov r13, rdx
  6021. ; A[4] * A[14]
  6022. mov rax, QWORD PTR [r8+112]
  6023. mul QWORD PTR [r8+32]
  6024. add r12, rax
  6025. adc r13, rdx
  6026. adc r14, 0
  6027. ; A[5] * A[13]
  6028. mov rax, QWORD PTR [r8+104]
  6029. mul QWORD PTR [r8+40]
  6030. add r12, rax
  6031. adc r13, rdx
  6032. adc r14, 0
  6033. ; A[6] * A[12]
  6034. mov rax, QWORD PTR [r8+96]
  6035. mul QWORD PTR [r8+48]
  6036. add r12, rax
  6037. adc r13, rdx
  6038. adc r14, 0
  6039. ; A[7] * A[11]
  6040. mov rax, QWORD PTR [r8+88]
  6041. mul QWORD PTR [r8+56]
  6042. add r12, rax
  6043. adc r13, rdx
  6044. adc r14, 0
  6045. ; A[8] * A[10]
  6046. mov rax, QWORD PTR [r8+80]
  6047. mul QWORD PTR [r8+64]
  6048. add r12, rax
  6049. adc r13, rdx
  6050. adc r14, 0
  6051. ; A[9] * A[9]
  6052. mov rax, QWORD PTR [r8+72]
  6053. mul rax
  6054. add r12, r12
  6055. adc r13, r13
  6056. adc r14, r14
  6057. add r12, rax
  6058. adc r13, rdx
  6059. adc r14, 0
  6060. add r9, r12
  6061. adc r10, r13
  6062. adc r11, r14
  6063. mov QWORD PTR [rcx+144], r9
  6064. ; A[4] * A[15]
  6065. mov rax, QWORD PTR [r8+120]
  6066. mul QWORD PTR [r8+32]
  6067. xor r9, r9
  6068. xor r14, r14
  6069. mov r12, rax
  6070. mov r13, rdx
  6071. ; A[5] * A[14]
  6072. mov rax, QWORD PTR [r8+112]
  6073. mul QWORD PTR [r8+40]
  6074. add r12, rax
  6075. adc r13, rdx
  6076. adc r14, 0
  6077. ; A[6] * A[13]
  6078. mov rax, QWORD PTR [r8+104]
  6079. mul QWORD PTR [r8+48]
  6080. add r12, rax
  6081. adc r13, rdx
  6082. adc r14, 0
  6083. ; A[7] * A[12]
  6084. mov rax, QWORD PTR [r8+96]
  6085. mul QWORD PTR [r8+56]
  6086. add r12, rax
  6087. adc r13, rdx
  6088. adc r14, 0
  6089. ; A[8] * A[11]
  6090. mov rax, QWORD PTR [r8+88]
  6091. mul QWORD PTR [r8+64]
  6092. add r12, rax
  6093. adc r13, rdx
  6094. adc r14, 0
  6095. ; A[9] * A[10]
  6096. mov rax, QWORD PTR [r8+80]
  6097. mul QWORD PTR [r8+72]
  6098. add r12, rax
  6099. adc r13, rdx
  6100. adc r14, 0
  6101. add r12, r12
  6102. adc r13, r13
  6103. adc r14, r14
  6104. add r10, r12
  6105. adc r11, r13
  6106. adc r9, r14
  6107. mov QWORD PTR [rcx+152], r10
  6108. ; A[5] * A[15]
  6109. mov rax, QWORD PTR [r8+120]
  6110. mul QWORD PTR [r8+40]
  6111. xor r10, r10
  6112. xor r14, r14
  6113. mov r12, rax
  6114. mov r13, rdx
  6115. ; A[6] * A[14]
  6116. mov rax, QWORD PTR [r8+112]
  6117. mul QWORD PTR [r8+48]
  6118. add r12, rax
  6119. adc r13, rdx
  6120. adc r14, 0
  6121. ; A[7] * A[13]
  6122. mov rax, QWORD PTR [r8+104]
  6123. mul QWORD PTR [r8+56]
  6124. add r12, rax
  6125. adc r13, rdx
  6126. adc r14, 0
  6127. ; A[8] * A[12]
  6128. mov rax, QWORD PTR [r8+96]
  6129. mul QWORD PTR [r8+64]
  6130. add r12, rax
  6131. adc r13, rdx
  6132. adc r14, 0
  6133. ; A[9] * A[11]
  6134. mov rax, QWORD PTR [r8+88]
  6135. mul QWORD PTR [r8+72]
  6136. add r12, rax
  6137. adc r13, rdx
  6138. adc r14, 0
  6139. ; A[10] * A[10]
  6140. mov rax, QWORD PTR [r8+80]
  6141. mul rax
  6142. add r12, r12
  6143. adc r13, r13
  6144. adc r14, r14
  6145. add r12, rax
  6146. adc r13, rdx
  6147. adc r14, 0
  6148. add r11, r12
  6149. adc r9, r13
  6150. adc r10, r14
  6151. mov QWORD PTR [rcx+160], r11
  6152. ; A[6] * A[15]
  6153. mov rax, QWORD PTR [r8+120]
  6154. mul QWORD PTR [r8+48]
  6155. xor r11, r11
  6156. xor r14, r14
  6157. mov r12, rax
  6158. mov r13, rdx
  6159. ; A[7] * A[14]
  6160. mov rax, QWORD PTR [r8+112]
  6161. mul QWORD PTR [r8+56]
  6162. add r12, rax
  6163. adc r13, rdx
  6164. adc r14, 0
  6165. ; A[8] * A[13]
  6166. mov rax, QWORD PTR [r8+104]
  6167. mul QWORD PTR [r8+64]
  6168. add r12, rax
  6169. adc r13, rdx
  6170. adc r14, 0
  6171. ; A[9] * A[12]
  6172. mov rax, QWORD PTR [r8+96]
  6173. mul QWORD PTR [r8+72]
  6174. add r12, rax
  6175. adc r13, rdx
  6176. adc r14, 0
  6177. ; A[10] * A[11]
  6178. mov rax, QWORD PTR [r8+88]
  6179. mul QWORD PTR [r8+80]
  6180. add r12, rax
  6181. adc r13, rdx
  6182. adc r14, 0
  6183. add r12, r12
  6184. adc r13, r13
  6185. adc r14, r14
  6186. add r9, r12
  6187. adc r10, r13
  6188. adc r11, r14
  6189. mov QWORD PTR [rcx+168], r9
  6190. ; A[7] * A[15]
  6191. mov rax, QWORD PTR [r8+120]
  6192. mul QWORD PTR [r8+56]
  6193. xor r9, r9
  6194. xor r14, r14
  6195. mov r12, rax
  6196. mov r13, rdx
  6197. ; A[8] * A[14]
  6198. mov rax, QWORD PTR [r8+112]
  6199. mul QWORD PTR [r8+64]
  6200. add r12, rax
  6201. adc r13, rdx
  6202. adc r14, 0
  6203. ; A[9] * A[13]
  6204. mov rax, QWORD PTR [r8+104]
  6205. mul QWORD PTR [r8+72]
  6206. add r12, rax
  6207. adc r13, rdx
  6208. adc r14, 0
  6209. ; A[10] * A[12]
  6210. mov rax, QWORD PTR [r8+96]
  6211. mul QWORD PTR [r8+80]
  6212. add r12, rax
  6213. adc r13, rdx
  6214. adc r14, 0
  6215. ; A[11] * A[11]
  6216. mov rax, QWORD PTR [r8+88]
  6217. mul rax
  6218. add r12, r12
  6219. adc r13, r13
  6220. adc r14, r14
  6221. add r12, rax
  6222. adc r13, rdx
  6223. adc r14, 0
  6224. add r10, r12
  6225. adc r11, r13
  6226. adc r9, r14
  6227. mov QWORD PTR [rcx+176], r10
  6228. ; A[8] * A[15]
  6229. mov rax, QWORD PTR [r8+120]
  6230. mul QWORD PTR [r8+64]
  6231. xor r10, r10
  6232. xor r14, r14
  6233. mov r12, rax
  6234. mov r13, rdx
  6235. ; A[9] * A[14]
  6236. mov rax, QWORD PTR [r8+112]
  6237. mul QWORD PTR [r8+72]
  6238. add r12, rax
  6239. adc r13, rdx
  6240. adc r14, 0
  6241. ; A[10] * A[13]
  6242. mov rax, QWORD PTR [r8+104]
  6243. mul QWORD PTR [r8+80]
  6244. add r12, rax
  6245. adc r13, rdx
  6246. adc r14, 0
  6247. ; A[11] * A[12]
  6248. mov rax, QWORD PTR [r8+96]
  6249. mul QWORD PTR [r8+88]
  6250. add r12, rax
  6251. adc r13, rdx
  6252. adc r14, 0
  6253. add r12, r12
  6254. adc r13, r13
  6255. adc r14, r14
  6256. add r11, r12
  6257. adc r9, r13
  6258. adc r10, r14
  6259. mov QWORD PTR [rcx+184], r11
  6260. ; A[9] * A[15]
  6261. mov rax, QWORD PTR [r8+120]
  6262. mul QWORD PTR [r8+72]
  6263. xor r11, r11
  6264. xor r14, r14
  6265. mov r12, rax
  6266. mov r13, rdx
  6267. ; A[10] * A[14]
  6268. mov rax, QWORD PTR [r8+112]
  6269. mul QWORD PTR [r8+80]
  6270. add r12, rax
  6271. adc r13, rdx
  6272. adc r14, 0
  6273. ; A[11] * A[13]
  6274. mov rax, QWORD PTR [r8+104]
  6275. mul QWORD PTR [r8+88]
  6276. add r12, rax
  6277. adc r13, rdx
  6278. adc r14, 0
  6279. ; A[12] * A[12]
  6280. mov rax, QWORD PTR [r8+96]
  6281. mul rax
  6282. add r12, r12
  6283. adc r13, r13
  6284. adc r14, r14
  6285. add r12, rax
  6286. adc r13, rdx
  6287. adc r14, 0
  6288. add r9, r12
  6289. adc r10, r13
  6290. adc r11, r14
  6291. mov QWORD PTR [rcx+192], r9
  6292. ; A[10] * A[15]
  6293. mov rax, QWORD PTR [r8+120]
  6294. mul QWORD PTR [r8+80]
  6295. xor r9, r9
  6296. xor r14, r14
  6297. mov r12, rax
  6298. mov r13, rdx
  6299. ; A[11] * A[14]
  6300. mov rax, QWORD PTR [r8+112]
  6301. mul QWORD PTR [r8+88]
  6302. add r12, rax
  6303. adc r13, rdx
  6304. adc r14, 0
  6305. ; A[12] * A[13]
  6306. mov rax, QWORD PTR [r8+104]
  6307. mul QWORD PTR [r8+96]
  6308. add r12, rax
  6309. adc r13, rdx
  6310. adc r14, 0
  6311. add r12, r12
  6312. adc r13, r13
  6313. adc r14, r14
  6314. add r10, r12
  6315. adc r11, r13
  6316. adc r9, r14
  6317. mov QWORD PTR [rcx+200], r10
  6318. ; A[11] * A[15]
  6319. mov rax, QWORD PTR [r8+120]
  6320. mul QWORD PTR [r8+88]
  6321. xor r10, r10
  6322. add r11, rax
  6323. adc r9, rdx
  6324. adc r10, 0
  6325. add r11, rax
  6326. adc r9, rdx
  6327. adc r10, 0
  6328. ; A[12] * A[14]
  6329. mov rax, QWORD PTR [r8+112]
  6330. mul QWORD PTR [r8+96]
  6331. add r11, rax
  6332. adc r9, rdx
  6333. adc r10, 0
  6334. add r11, rax
  6335. adc r9, rdx
  6336. adc r10, 0
  6337. ; A[13] * A[13]
  6338. mov rax, QWORD PTR [r8+104]
  6339. mul rax
  6340. add r11, rax
  6341. adc r9, rdx
  6342. adc r10, 0
  6343. mov QWORD PTR [rcx+208], r11
  6344. ; A[12] * A[15]
  6345. mov rax, QWORD PTR [r8+120]
  6346. mul QWORD PTR [r8+96]
  6347. xor r11, r11
  6348. add r9, rax
  6349. adc r10, rdx
  6350. adc r11, 0
  6351. add r9, rax
  6352. adc r10, rdx
  6353. adc r11, 0
  6354. ; A[13] * A[14]
  6355. mov rax, QWORD PTR [r8+112]
  6356. mul QWORD PTR [r8+104]
  6357. add r9, rax
  6358. adc r10, rdx
  6359. adc r11, 0
  6360. add r9, rax
  6361. adc r10, rdx
  6362. adc r11, 0
  6363. mov QWORD PTR [rcx+216], r9
  6364. ; A[13] * A[15]
  6365. mov rax, QWORD PTR [r8+120]
  6366. mul QWORD PTR [r8+104]
  6367. xor r9, r9
  6368. add r10, rax
  6369. adc r11, rdx
  6370. adc r9, 0
  6371. add r10, rax
  6372. adc r11, rdx
  6373. adc r9, 0
  6374. ; A[14] * A[14]
  6375. mov rax, QWORD PTR [r8+112]
  6376. mul rax
  6377. add r10, rax
  6378. adc r11, rdx
  6379. adc r9, 0
  6380. mov QWORD PTR [rcx+224], r10
  6381. ; A[14] * A[15]
  6382. mov rax, QWORD PTR [r8+120]
  6383. mul QWORD PTR [r8+112]
  6384. xor r10, r10
  6385. add r11, rax
  6386. adc r9, rdx
  6387. adc r10, 0
  6388. add r11, rax
  6389. adc r9, rdx
  6390. adc r10, 0
  6391. mov QWORD PTR [rcx+232], r11
  6392. ; A[15] * A[15]
  6393. mov rax, QWORD PTR [r8+120]
  6394. mul rax
  6395. add r9, rax
  6396. adc r10, rdx
  6397. mov QWORD PTR [rcx+240], r9
  6398. mov QWORD PTR [rcx+248], r10
  6399. mov rax, QWORD PTR [rsp]
  6400. mov rdx, QWORD PTR [rsp+8]
  6401. mov r12, QWORD PTR [rsp+16]
  6402. mov r13, QWORD PTR [rsp+24]
  6403. mov QWORD PTR [rcx], rax
  6404. mov QWORD PTR [rcx+8], rdx
  6405. mov QWORD PTR [rcx+16], r12
  6406. mov QWORD PTR [rcx+24], r13
  6407. mov rax, QWORD PTR [rsp+32]
  6408. mov rdx, QWORD PTR [rsp+40]
  6409. mov r12, QWORD PTR [rsp+48]
  6410. mov r13, QWORD PTR [rsp+56]
  6411. mov QWORD PTR [rcx+32], rax
  6412. mov QWORD PTR [rcx+40], rdx
  6413. mov QWORD PTR [rcx+48], r12
  6414. mov QWORD PTR [rcx+56], r13
  6415. mov rax, QWORD PTR [rsp+64]
  6416. mov rdx, QWORD PTR [rsp+72]
  6417. mov r12, QWORD PTR [rsp+80]
  6418. mov r13, QWORD PTR [rsp+88]
  6419. mov QWORD PTR [rcx+64], rax
  6420. mov QWORD PTR [rcx+72], rdx
  6421. mov QWORD PTR [rcx+80], r12
  6422. mov QWORD PTR [rcx+88], r13
  6423. mov rax, QWORD PTR [rsp+96]
  6424. mov rdx, QWORD PTR [rsp+104]
  6425. mov r12, QWORD PTR [rsp+112]
  6426. mov r13, QWORD PTR [rsp+120]
  6427. mov QWORD PTR [rcx+96], rax
  6428. mov QWORD PTR [rcx+104], rdx
  6429. mov QWORD PTR [rcx+112], r12
  6430. mov QWORD PTR [rcx+120], r13
  6431. add rsp, 128
  6432. pop r14
  6433. pop r13
  6434. pop r12
  6435. ret
  6436. sp_2048_sqr_16 ENDP
  6437. _text ENDS
  6438. IFDEF HAVE_INTEL_AVX2
  6439. ; /* Square a and put result in r. (r = a * a)
  6440. ; *
  6441. ; * r A single precision integer.
  6442. ; * a A single precision integer.
  6443. ; */
  6444. _text SEGMENT READONLY PARA
  6445. sp_2048_sqr_avx2_16 PROC
  6446. push rbp
  6447. push r12
  6448. push r13
  6449. push r14
  6450. push r15
  6451. push rdi
  6452. push rsi
  6453. push rbx
  6454. mov r8, rcx
  6455. mov r9, rdx
  6456. sub rsp, 128
  6457. cmp r9, r8
  6458. mov rbp, rsp
  6459. cmovne rbp, r8
  6460. add r8, 128
  6461. xor r13, r13
  6462. ; Diagonal 1
  6463. ; Zero into %r9
  6464. ; Zero into %r10
  6465. ; A[1] x A[0]
  6466. mov rdx, QWORD PTR [r9]
  6467. mulx r11, r10, QWORD PTR [r9+8]
  6468. ; A[2] x A[0]
  6469. mulx r12, rax, QWORD PTR [r9+16]
  6470. adcx r11, rax
  6471. adox r12, r13
  6472. mov QWORD PTR [rbp+8], r10
  6473. mov QWORD PTR [rbp+16], r11
  6474. ; Zero into %r8
  6475. ; Zero into %r9
  6476. ; A[3] x A[0]
  6477. mulx r10, rax, QWORD PTR [r9+24]
  6478. adcx r12, rax
  6479. adox r10, r13
  6480. ; A[4] x A[0]
  6481. mulx r11, rax, QWORD PTR [r9+32]
  6482. adcx r10, rax
  6483. adox r11, r13
  6484. mov QWORD PTR [rbp+24], r12
  6485. mov QWORD PTR [rbp+32], r10
  6486. ; Zero into %r10
  6487. ; Zero into %r8
  6488. ; A[5] x A[0]
  6489. mulx r12, rax, QWORD PTR [r9+40]
  6490. adcx r11, rax
  6491. adox r12, r13
  6492. ; A[6] x A[0]
  6493. mulx r10, rax, QWORD PTR [r9+48]
  6494. adcx r12, rax
  6495. adox r10, r13
  6496. mov QWORD PTR [rbp+40], r11
  6497. mov QWORD PTR [rbp+48], r12
  6498. ; Zero into %r9
  6499. ; Zero into %r10
  6500. ; A[7] x A[0]
  6501. mulx r11, rax, QWORD PTR [r9+56]
  6502. adcx r10, rax
  6503. adox r11, r13
  6504. ; A[8] x A[0]
  6505. mulx r12, rax, QWORD PTR [r9+64]
  6506. adcx r11, rax
  6507. adox r12, r13
  6508. mov QWORD PTR [rbp+56], r10
  6509. mov QWORD PTR [rbp+64], r11
  6510. ; Zero into %r8
  6511. ; Zero into %r9
  6512. ; A[9] x A[0]
  6513. mulx r10, rax, QWORD PTR [r9+72]
  6514. adcx r12, rax
  6515. adox r10, r13
  6516. ; A[10] x A[0]
  6517. mulx r11, rax, QWORD PTR [r9+80]
  6518. adcx r10, rax
  6519. adox r11, r13
  6520. mov QWORD PTR [rbp+72], r12
  6521. mov QWORD PTR [rbp+80], r10
  6522. ; No load %r13 - %r10
  6523. ; A[11] x A[0]
  6524. mulx r15, rax, QWORD PTR [r9+88]
  6525. adcx r11, rax
  6526. adox r15, r13
  6527. ; A[12] x A[0]
  6528. mulx rdi, rax, QWORD PTR [r9+96]
  6529. adcx r15, rax
  6530. adox rdi, r13
  6531. mov QWORD PTR [rbp+88], r11
  6532. ; No store %r13 - %r10
  6533. ; No load %r15 - %r9
  6534. ; A[13] x A[0]
  6535. mulx rsi, rax, QWORD PTR [r9+104]
  6536. adcx rdi, rax
  6537. adox rsi, r13
  6538. ; A[14] x A[0]
  6539. mulx rbx, rax, QWORD PTR [r9+112]
  6540. adcx rsi, rax
  6541. adox rbx, r13
  6542. ; No store %r14 - %r8
  6543. ; No store %r15 - %r9
  6544. ; Zero into %r8
  6545. ; Zero into %r9
  6546. ; A[15] x A[0]
  6547. mulx r10, rax, QWORD PTR [r9+120]
  6548. adcx rbx, rax
  6549. adox r10, r13
  6550. ; No store %rbx - %r10
  6551. ; Carry
  6552. adcx r10, r13
  6553. mov r14, r13
  6554. adcx r14, r13
  6555. adox r14, r13
  6556. mov QWORD PTR [r8], r10
  6557. ; Diagonal 2
  6558. mov r10, QWORD PTR [rbp+24]
  6559. mov r11, QWORD PTR [rbp+32]
  6560. mov r12, QWORD PTR [rbp+40]
  6561. ; A[2] x A[1]
  6562. mov rdx, QWORD PTR [r9+8]
  6563. mulx rcx, rax, QWORD PTR [r9+16]
  6564. adcx r10, rax
  6565. adox r11, rcx
  6566. ; A[3] x A[1]
  6567. mulx rcx, rax, QWORD PTR [r9+24]
  6568. adcx r11, rax
  6569. adox r12, rcx
  6570. mov QWORD PTR [rbp+24], r10
  6571. mov QWORD PTR [rbp+32], r11
  6572. mov r10, QWORD PTR [rbp+48]
  6573. mov r11, QWORD PTR [rbp+56]
  6574. ; A[4] x A[1]
  6575. mulx rcx, rax, QWORD PTR [r9+32]
  6576. adcx r12, rax
  6577. adox r10, rcx
  6578. ; A[5] x A[1]
  6579. mulx rcx, rax, QWORD PTR [r9+40]
  6580. adcx r10, rax
  6581. adox r11, rcx
  6582. mov QWORD PTR [rbp+40], r12
  6583. mov QWORD PTR [rbp+48], r10
  6584. mov r12, QWORD PTR [rbp+64]
  6585. mov r10, QWORD PTR [rbp+72]
  6586. ; A[6] x A[1]
  6587. mulx rcx, rax, QWORD PTR [r9+48]
  6588. adcx r11, rax
  6589. adox r12, rcx
  6590. ; A[7] x A[1]
  6591. mulx rcx, rax, QWORD PTR [r9+56]
  6592. adcx r12, rax
  6593. adox r10, rcx
  6594. mov QWORD PTR [rbp+56], r11
  6595. mov QWORD PTR [rbp+64], r12
  6596. mov r11, QWORD PTR [rbp+80]
  6597. mov r12, QWORD PTR [rbp+88]
  6598. ; A[8] x A[1]
  6599. mulx rcx, rax, QWORD PTR [r9+64]
  6600. adcx r10, rax
  6601. adox r11, rcx
  6602. ; A[9] x A[1]
  6603. mulx rcx, rax, QWORD PTR [r9+72]
  6604. adcx r11, rax
  6605. adox r12, rcx
  6606. mov QWORD PTR [rbp+72], r10
  6607. mov QWORD PTR [rbp+80], r11
  6608. ; No load %r13 - %r8
  6609. ; A[10] x A[1]
  6610. mulx rcx, rax, QWORD PTR [r9+80]
  6611. adcx r12, rax
  6612. adox r15, rcx
  6613. ; A[11] x A[1]
  6614. mulx rcx, rax, QWORD PTR [r9+88]
  6615. adcx r15, rax
  6616. adox rdi, rcx
  6617. mov QWORD PTR [rbp+88], r12
  6618. ; No store %r13 - %r8
  6619. ; No load %r15 - %r10
  6620. ; A[12] x A[1]
  6621. mulx rcx, rax, QWORD PTR [r9+96]
  6622. adcx rdi, rax
  6623. adox rsi, rcx
  6624. ; A[13] x A[1]
  6625. mulx rcx, rax, QWORD PTR [r9+104]
  6626. adcx rsi, rax
  6627. adox rbx, rcx
  6628. ; No store %r14 - %r9
  6629. ; No store %r15 - %r10
  6630. mov r11, QWORD PTR [r8]
  6631. ; Zero into %r10
  6632. ; A[14] x A[1]
  6633. mulx rcx, rax, QWORD PTR [r9+112]
  6634. adcx rbx, rax
  6635. adox r11, rcx
  6636. ; A[15] x A[1]
  6637. mulx r12, rax, QWORD PTR [r9+120]
  6638. adcx r11, rax
  6639. adox r12, r13
  6640. ; No store %rbx - %r8
  6641. mov QWORD PTR [r8], r11
  6642. ; Zero into %r8
  6643. ; Zero into %r9
  6644. ; A[15] x A[2]
  6645. mov rdx, QWORD PTR [r9+16]
  6646. mulx r10, rax, QWORD PTR [r9+120]
  6647. adcx r12, rax
  6648. adox r10, r13
  6649. mov QWORD PTR [r8+8], r12
  6650. ; Carry
  6651. adcx r10, r14
  6652. mov r14, r13
  6653. adcx r14, r13
  6654. adox r14, r13
  6655. mov QWORD PTR [r8+16], r10
  6656. ; Diagonal 3
  6657. mov r10, QWORD PTR [rbp+40]
  6658. mov r11, QWORD PTR [rbp+48]
  6659. mov r12, QWORD PTR [rbp+56]
  6660. ; A[3] x A[2]
  6661. mulx rcx, rax, QWORD PTR [r9+24]
  6662. adcx r10, rax
  6663. adox r11, rcx
  6664. ; A[4] x A[2]
  6665. mulx rcx, rax, QWORD PTR [r9+32]
  6666. adcx r11, rax
  6667. adox r12, rcx
  6668. mov QWORD PTR [rbp+40], r10
  6669. mov QWORD PTR [rbp+48], r11
  6670. mov r10, QWORD PTR [rbp+64]
  6671. mov r11, QWORD PTR [rbp+72]
  6672. ; A[5] x A[2]
  6673. mulx rcx, rax, QWORD PTR [r9+40]
  6674. adcx r12, rax
  6675. adox r10, rcx
  6676. ; A[6] x A[2]
  6677. mulx rcx, rax, QWORD PTR [r9+48]
  6678. adcx r10, rax
  6679. adox r11, rcx
  6680. mov QWORD PTR [rbp+56], r12
  6681. mov QWORD PTR [rbp+64], r10
  6682. mov r12, QWORD PTR [rbp+80]
  6683. mov r10, QWORD PTR [rbp+88]
  6684. ; A[7] x A[2]
  6685. mulx rcx, rax, QWORD PTR [r9+56]
  6686. adcx r11, rax
  6687. adox r12, rcx
  6688. ; A[8] x A[2]
  6689. mulx rcx, rax, QWORD PTR [r9+64]
  6690. adcx r12, rax
  6691. adox r10, rcx
  6692. mov QWORD PTR [rbp+72], r11
  6693. mov QWORD PTR [rbp+80], r12
  6694. ; No load %r13 - %r9
  6695. ; A[9] x A[2]
  6696. mulx rcx, rax, QWORD PTR [r9+72]
  6697. adcx r10, rax
  6698. adox r15, rcx
  6699. ; A[10] x A[2]
  6700. mulx rcx, rax, QWORD PTR [r9+80]
  6701. adcx r15, rax
  6702. adox rdi, rcx
  6703. mov QWORD PTR [rbp+88], r10
  6704. ; No store %r13 - %r9
  6705. ; No load %r15 - %r8
  6706. ; A[11] x A[2]
  6707. mulx rcx, rax, QWORD PTR [r9+88]
  6708. adcx rdi, rax
  6709. adox rsi, rcx
  6710. ; A[12] x A[2]
  6711. mulx rcx, rax, QWORD PTR [r9+96]
  6712. adcx rsi, rax
  6713. adox rbx, rcx
  6714. ; No store %r14 - %r10
  6715. ; No store %r15 - %r8
  6716. mov r12, QWORD PTR [r8]
  6717. mov r10, QWORD PTR [r8+8]
  6718. ; A[13] x A[2]
  6719. mulx rcx, rax, QWORD PTR [r9+104]
  6720. adcx rbx, rax
  6721. adox r12, rcx
  6722. ; A[14] x A[2]
  6723. mulx rcx, rax, QWORD PTR [r9+112]
  6724. adcx r12, rax
  6725. adox r10, rcx
  6726. ; No store %rbx - %r9
  6727. mov QWORD PTR [r8], r12
  6728. mov r11, QWORD PTR [r8+16]
  6729. ; Zero into %r10
  6730. ; A[14] x A[3]
  6731. mov rdx, QWORD PTR [r9+24]
  6732. mulx rcx, rax, QWORD PTR [r9+112]
  6733. adcx r10, rax
  6734. adox r11, rcx
  6735. ; A[14] x A[4]
  6736. mov rdx, QWORD PTR [r9+32]
  6737. mulx r12, rax, QWORD PTR [r9+112]
  6738. adcx r11, rax
  6739. adox r12, r13
  6740. mov QWORD PTR [r8+8], r10
  6741. mov QWORD PTR [r8+16], r11
  6742. ; Zero into %r8
  6743. ; Zero into %r9
  6744. ; A[14] x A[5]
  6745. mov rdx, QWORD PTR [r9+40]
  6746. mulx r10, rax, QWORD PTR [r9+112]
  6747. adcx r12, rax
  6748. adox r10, r13
  6749. mov QWORD PTR [r8+24], r12
  6750. ; Carry
  6751. adcx r10, r14
  6752. mov r14, r13
  6753. adcx r14, r13
  6754. adox r14, r13
  6755. mov QWORD PTR [r8+32], r10
  6756. ; Diagonal 4
  6757. mov r10, QWORD PTR [rbp+56]
  6758. mov r11, QWORD PTR [rbp+64]
  6759. mov r12, QWORD PTR [rbp+72]
  6760. ; A[4] x A[3]
  6761. mov rdx, QWORD PTR [r9+24]
  6762. mulx rcx, rax, QWORD PTR [r9+32]
  6763. adcx r10, rax
  6764. adox r11, rcx
  6765. ; A[5] x A[3]
  6766. mulx rcx, rax, QWORD PTR [r9+40]
  6767. adcx r11, rax
  6768. adox r12, rcx
  6769. mov QWORD PTR [rbp+56], r10
  6770. mov QWORD PTR [rbp+64], r11
  6771. mov r10, QWORD PTR [rbp+80]
  6772. mov r11, QWORD PTR [rbp+88]
  6773. ; A[6] x A[3]
  6774. mulx rcx, rax, QWORD PTR [r9+48]
  6775. adcx r12, rax
  6776. adox r10, rcx
  6777. ; A[7] x A[3]
  6778. mulx rcx, rax, QWORD PTR [r9+56]
  6779. adcx r10, rax
  6780. adox r11, rcx
  6781. mov QWORD PTR [rbp+72], r12
  6782. mov QWORD PTR [rbp+80], r10
  6783. ; No load %r13 - %r10
  6784. ; A[8] x A[3]
  6785. mulx rcx, rax, QWORD PTR [r9+64]
  6786. adcx r11, rax
  6787. adox r15, rcx
  6788. ; A[9] x A[3]
  6789. mulx rcx, rax, QWORD PTR [r9+72]
  6790. adcx r15, rax
  6791. adox rdi, rcx
  6792. mov QWORD PTR [rbp+88], r11
  6793. ; No store %r13 - %r10
  6794. ; No load %r15 - %r9
  6795. ; A[10] x A[3]
  6796. mulx rcx, rax, QWORD PTR [r9+80]
  6797. adcx rdi, rax
  6798. adox rsi, rcx
  6799. ; A[11] x A[3]
  6800. mulx rcx, rax, QWORD PTR [r9+88]
  6801. adcx rsi, rax
  6802. adox rbx, rcx
  6803. ; No store %r14 - %r8
  6804. ; No store %r15 - %r9
  6805. mov r10, QWORD PTR [r8]
  6806. mov r11, QWORD PTR [r8+8]
  6807. ; A[12] x A[3]
  6808. mulx rcx, rax, QWORD PTR [r9+96]
  6809. adcx rbx, rax
  6810. adox r10, rcx
  6811. ; A[13] x A[3]
  6812. mulx rcx, rax, QWORD PTR [r9+104]
  6813. adcx r10, rax
  6814. adox r11, rcx
  6815. ; No store %rbx - %r10
  6816. mov QWORD PTR [r8], r10
  6817. mov r12, QWORD PTR [r8+16]
  6818. mov r10, QWORD PTR [r8+24]
  6819. ; A[13] x A[4]
  6820. mov rdx, QWORD PTR [r9+32]
  6821. mulx rcx, rax, QWORD PTR [r9+104]
  6822. adcx r11, rax
  6823. adox r12, rcx
  6824. ; A[13] x A[5]
  6825. mov rdx, QWORD PTR [r9+40]
  6826. mulx rcx, rax, QWORD PTR [r9+104]
  6827. adcx r12, rax
  6828. adox r10, rcx
  6829. mov QWORD PTR [r8+8], r11
  6830. mov QWORD PTR [r8+16], r12
  6831. mov r11, QWORD PTR [r8+32]
  6832. ; Zero into %r10
  6833. ; A[13] x A[6]
  6834. mov rdx, QWORD PTR [r9+48]
  6835. mulx rcx, rax, QWORD PTR [r9+104]
  6836. adcx r10, rax
  6837. adox r11, rcx
  6838. ; A[13] x A[7]
  6839. mov rdx, QWORD PTR [r9+56]
  6840. mulx r12, rax, QWORD PTR [r9+104]
  6841. adcx r11, rax
  6842. adox r12, r13
  6843. mov QWORD PTR [r8+24], r10
  6844. mov QWORD PTR [r8+32], r11
  6845. ; Zero into %r8
  6846. ; Zero into %r9
  6847. ; A[13] x A[8]
  6848. mov rdx, QWORD PTR [r9+64]
  6849. mulx r10, rax, QWORD PTR [r9+104]
  6850. adcx r12, rax
  6851. adox r10, r13
  6852. mov QWORD PTR [r8+40], r12
  6853. ; Carry
  6854. adcx r10, r14
  6855. mov r14, r13
  6856. adcx r14, r13
  6857. adox r14, r13
  6858. mov QWORD PTR [r8+48], r10
  6859. ; Diagonal 5
  6860. mov r10, QWORD PTR [rbp+72]
  6861. mov r11, QWORD PTR [rbp+80]
  6862. mov r12, QWORD PTR [rbp+88]
  6863. ; A[5] x A[4]
  6864. mov rdx, QWORD PTR [r9+32]
  6865. mulx rcx, rax, QWORD PTR [r9+40]
  6866. adcx r10, rax
  6867. adox r11, rcx
  6868. ; A[6] x A[4]
  6869. mulx rcx, rax, QWORD PTR [r9+48]
  6870. adcx r11, rax
  6871. adox r12, rcx
  6872. mov QWORD PTR [rbp+72], r10
  6873. mov QWORD PTR [rbp+80], r11
  6874. ; No load %r13 - %r8
  6875. ; A[7] x A[4]
  6876. mulx rcx, rax, QWORD PTR [r9+56]
  6877. adcx r12, rax
  6878. adox r15, rcx
  6879. ; A[8] x A[4]
  6880. mulx rcx, rax, QWORD PTR [r9+64]
  6881. adcx r15, rax
  6882. adox rdi, rcx
  6883. mov QWORD PTR [rbp+88], r12
  6884. ; No store %r13 - %r8
  6885. ; No load %r15 - %r10
  6886. ; A[9] x A[4]
  6887. mulx rcx, rax, QWORD PTR [r9+72]
  6888. adcx rdi, rax
  6889. adox rsi, rcx
  6890. ; A[10] x A[4]
  6891. mulx rcx, rax, QWORD PTR [r9+80]
  6892. adcx rsi, rax
  6893. adox rbx, rcx
  6894. ; No store %r14 - %r9
  6895. ; No store %r15 - %r10
  6896. mov r11, QWORD PTR [r8]
  6897. mov r12, QWORD PTR [r8+8]
  6898. ; A[11] x A[4]
  6899. mulx rcx, rax, QWORD PTR [r9+88]
  6900. adcx rbx, rax
  6901. adox r11, rcx
  6902. ; A[12] x A[4]
  6903. mulx rcx, rax, QWORD PTR [r9+96]
  6904. adcx r11, rax
  6905. adox r12, rcx
  6906. ; No store %rbx - %r8
  6907. mov QWORD PTR [r8], r11
  6908. mov r10, QWORD PTR [r8+16]
  6909. mov r11, QWORD PTR [r8+24]
  6910. ; A[12] x A[5]
  6911. mov rdx, QWORD PTR [r9+40]
  6912. mulx rcx, rax, QWORD PTR [r9+96]
  6913. adcx r12, rax
  6914. adox r10, rcx
  6915. ; A[12] x A[6]
  6916. mov rdx, QWORD PTR [r9+48]
  6917. mulx rcx, rax, QWORD PTR [r9+96]
  6918. adcx r10, rax
  6919. adox r11, rcx
  6920. mov QWORD PTR [r8+8], r12
  6921. mov QWORD PTR [r8+16], r10
  6922. mov r12, QWORD PTR [r8+32]
  6923. mov r10, QWORD PTR [r8+40]
  6924. ; A[12] x A[7]
  6925. mov rdx, QWORD PTR [r9+56]
  6926. mulx rcx, rax, QWORD PTR [r9+96]
  6927. adcx r11, rax
  6928. adox r12, rcx
  6929. ; A[12] x A[8]
  6930. mov rdx, QWORD PTR [r9+64]
  6931. mulx rcx, rax, QWORD PTR [r9+96]
  6932. adcx r12, rax
  6933. adox r10, rcx
  6934. mov QWORD PTR [r8+24], r11
  6935. mov QWORD PTR [r8+32], r12
  6936. mov r11, QWORD PTR [r8+48]
  6937. ; Zero into %r10
  6938. ; A[12] x A[9]
  6939. mov rdx, QWORD PTR [r9+72]
  6940. mulx rcx, rax, QWORD PTR [r9+96]
  6941. adcx r10, rax
  6942. adox r11, rcx
  6943. ; A[12] x A[10]
  6944. mov rdx, QWORD PTR [r9+80]
  6945. mulx r12, rax, QWORD PTR [r9+96]
  6946. adcx r11, rax
  6947. adox r12, r13
  6948. mov QWORD PTR [r8+40], r10
  6949. mov QWORD PTR [r8+48], r11
  6950. ; Zero into %r8
  6951. ; Zero into %r9
  6952. ; A[12] x A[11]
  6953. mov rdx, QWORD PTR [r9+88]
  6954. mulx r10, rax, QWORD PTR [r9+96]
  6955. adcx r12, rax
  6956. adox r10, r13
  6957. mov QWORD PTR [r8+56], r12
  6958. ; Carry
  6959. adcx r10, r14
  6960. mov r14, r13
  6961. adcx r14, r13
  6962. adox r14, r13
  6963. mov QWORD PTR [r8+64], r10
  6964. ; Diagonal 6
  6965. mov r10, QWORD PTR [rbp+88]
  6966. ; No load %r13 - %r9
  6967. ; A[6] x A[5]
  6968. mov rdx, QWORD PTR [r9+40]
  6969. mulx rcx, rax, QWORD PTR [r9+48]
  6970. adcx r10, rax
  6971. adox r15, rcx
  6972. ; A[7] x A[5]
  6973. mulx rcx, rax, QWORD PTR [r9+56]
  6974. adcx r15, rax
  6975. adox rdi, rcx
  6976. mov QWORD PTR [rbp+88], r10
  6977. ; No store %r13 - %r9
  6978. ; No load %r15 - %r8
  6979. ; A[8] x A[5]
  6980. mulx rcx, rax, QWORD PTR [r9+64]
  6981. adcx rdi, rax
  6982. adox rsi, rcx
  6983. ; A[9] x A[5]
  6984. mulx rcx, rax, QWORD PTR [r9+72]
  6985. adcx rsi, rax
  6986. adox rbx, rcx
  6987. ; No store %r14 - %r10
  6988. ; No store %r15 - %r8
  6989. mov r12, QWORD PTR [r8]
  6990. mov r10, QWORD PTR [r8+8]
  6991. ; A[10] x A[5]
  6992. mulx rcx, rax, QWORD PTR [r9+80]
  6993. adcx rbx, rax
  6994. adox r12, rcx
  6995. ; A[11] x A[5]
  6996. mulx rcx, rax, QWORD PTR [r9+88]
  6997. adcx r12, rax
  6998. adox r10, rcx
  6999. ; No store %rbx - %r9
  7000. mov QWORD PTR [r8], r12
  7001. mov r11, QWORD PTR [r8+16]
  7002. mov r12, QWORD PTR [r8+24]
  7003. ; A[11] x A[6]
  7004. mov rdx, QWORD PTR [r9+48]
  7005. mulx rcx, rax, QWORD PTR [r9+88]
  7006. adcx r10, rax
  7007. adox r11, rcx
  7008. ; A[11] x A[7]
  7009. mov rdx, QWORD PTR [r9+56]
  7010. mulx rcx, rax, QWORD PTR [r9+88]
  7011. adcx r11, rax
  7012. adox r12, rcx
  7013. mov QWORD PTR [r8+8], r10
  7014. mov QWORD PTR [r8+16], r11
  7015. mov r10, QWORD PTR [r8+32]
  7016. mov r11, QWORD PTR [r8+40]
  7017. ; A[11] x A[8]
  7018. mov rdx, QWORD PTR [r9+64]
  7019. mulx rcx, rax, QWORD PTR [r9+88]
  7020. adcx r12, rax
  7021. adox r10, rcx
  7022. ; A[11] x A[9]
  7023. mov rdx, QWORD PTR [r9+72]
  7024. mulx rcx, rax, QWORD PTR [r9+88]
  7025. adcx r10, rax
  7026. adox r11, rcx
  7027. mov QWORD PTR [r8+24], r12
  7028. mov QWORD PTR [r8+32], r10
  7029. mov r12, QWORD PTR [r8+48]
  7030. mov r10, QWORD PTR [r8+56]
  7031. ; A[11] x A[10]
  7032. mov rdx, QWORD PTR [r9+80]
  7033. mulx rcx, rax, QWORD PTR [r9+88]
  7034. adcx r11, rax
  7035. adox r12, rcx
  7036. ; A[13] x A[9]
  7037. mov rdx, QWORD PTR [r9+72]
  7038. mulx rcx, rax, QWORD PTR [r9+104]
  7039. adcx r12, rax
  7040. adox r10, rcx
  7041. mov QWORD PTR [r8+40], r11
  7042. mov QWORD PTR [r8+48], r12
  7043. mov r11, QWORD PTR [r8+64]
  7044. ; Zero into %r10
  7045. ; A[13] x A[10]
  7046. mov rdx, QWORD PTR [r9+80]
  7047. mulx rcx, rax, QWORD PTR [r9+104]
  7048. adcx r10, rax
  7049. adox r11, rcx
  7050. ; A[13] x A[11]
  7051. mov rdx, QWORD PTR [r9+88]
  7052. mulx r12, rax, QWORD PTR [r9+104]
  7053. adcx r11, rax
  7054. adox r12, r13
  7055. mov QWORD PTR [r8+56], r10
  7056. mov QWORD PTR [r8+64], r11
  7057. ; Zero into %r8
  7058. ; Zero into %r9
  7059. ; A[13] x A[12]
  7060. mov rdx, QWORD PTR [r9+96]
  7061. mulx r10, rax, QWORD PTR [r9+104]
  7062. adcx r12, rax
  7063. adox r10, r13
  7064. mov QWORD PTR [r8+72], r12
  7065. ; Carry
  7066. adcx r10, r14
  7067. mov r14, r13
  7068. adcx r14, r13
  7069. adox r14, r13
  7070. mov QWORD PTR [r8+80], r10
  7071. ; Diagonal 7
  7072. ; No load %r15 - %r9
  7073. ; A[7] x A[6]
  7074. mov rdx, QWORD PTR [r9+48]
  7075. mulx rcx, rax, QWORD PTR [r9+56]
  7076. adcx rdi, rax
  7077. adox rsi, rcx
  7078. ; A[8] x A[6]
  7079. mulx rcx, rax, QWORD PTR [r9+64]
  7080. adcx rsi, rax
  7081. adox rbx, rcx
  7082. ; No store %r14 - %r8
  7083. ; No store %r15 - %r9
  7084. mov r10, QWORD PTR [r8]
  7085. mov r11, QWORD PTR [r8+8]
  7086. ; A[9] x A[6]
  7087. mulx rcx, rax, QWORD PTR [r9+72]
  7088. adcx rbx, rax
  7089. adox r10, rcx
  7090. ; A[10] x A[6]
  7091. mulx rcx, rax, QWORD PTR [r9+80]
  7092. adcx r10, rax
  7093. adox r11, rcx
  7094. ; No store %rbx - %r10
  7095. mov QWORD PTR [r8], r10
  7096. mov r12, QWORD PTR [r8+16]
  7097. mov r10, QWORD PTR [r8+24]
  7098. ; A[10] x A[7]
  7099. mov rdx, QWORD PTR [r9+56]
  7100. mulx rcx, rax, QWORD PTR [r9+80]
  7101. adcx r11, rax
  7102. adox r12, rcx
  7103. ; A[10] x A[8]
  7104. mov rdx, QWORD PTR [r9+64]
  7105. mulx rcx, rax, QWORD PTR [r9+80]
  7106. adcx r12, rax
  7107. adox r10, rcx
  7108. mov QWORD PTR [r8+8], r11
  7109. mov QWORD PTR [r8+16], r12
  7110. mov r11, QWORD PTR [r8+32]
  7111. mov r12, QWORD PTR [r8+40]
  7112. ; A[10] x A[9]
  7113. mov rdx, QWORD PTR [r9+72]
  7114. mulx rcx, rax, QWORD PTR [r9+80]
  7115. adcx r10, rax
  7116. adox r11, rcx
  7117. ; A[14] x A[6]
  7118. mov rdx, QWORD PTR [r9+48]
  7119. mulx rcx, rax, QWORD PTR [r9+112]
  7120. adcx r11, rax
  7121. adox r12, rcx
  7122. mov QWORD PTR [r8+24], r10
  7123. mov QWORD PTR [r8+32], r11
  7124. mov r10, QWORD PTR [r8+48]
  7125. mov r11, QWORD PTR [r8+56]
  7126. ; A[14] x A[7]
  7127. mov rdx, QWORD PTR [r9+56]
  7128. mulx rcx, rax, QWORD PTR [r9+112]
  7129. adcx r12, rax
  7130. adox r10, rcx
  7131. ; A[14] x A[8]
  7132. mov rdx, QWORD PTR [r9+64]
  7133. mulx rcx, rax, QWORD PTR [r9+112]
  7134. adcx r10, rax
  7135. adox r11, rcx
  7136. mov QWORD PTR [r8+40], r12
  7137. mov QWORD PTR [r8+48], r10
  7138. mov r12, QWORD PTR [r8+64]
  7139. mov r10, QWORD PTR [r8+72]
  7140. ; A[14] x A[9]
  7141. mov rdx, QWORD PTR [r9+72]
  7142. mulx rcx, rax, QWORD PTR [r9+112]
  7143. adcx r11, rax
  7144. adox r12, rcx
  7145. ; A[14] x A[10]
  7146. mov rdx, QWORD PTR [r9+80]
  7147. mulx rcx, rax, QWORD PTR [r9+112]
  7148. adcx r12, rax
  7149. adox r10, rcx
  7150. mov QWORD PTR [r8+56], r11
  7151. mov QWORD PTR [r8+64], r12
  7152. mov r11, QWORD PTR [r8+80]
  7153. ; Zero into %r10
  7154. ; A[14] x A[11]
  7155. mov rdx, QWORD PTR [r9+88]
  7156. mulx rcx, rax, QWORD PTR [r9+112]
  7157. adcx r10, rax
  7158. adox r11, rcx
  7159. ; A[14] x A[12]
  7160. mov rdx, QWORD PTR [r9+96]
  7161. mulx r12, rax, QWORD PTR [r9+112]
  7162. adcx r11, rax
  7163. adox r12, r13
  7164. mov QWORD PTR [r8+72], r10
  7165. mov QWORD PTR [r8+80], r11
  7166. ; Zero into %r8
  7167. ; Zero into %r9
  7168. ; A[14] x A[13]
  7169. mov rdx, QWORD PTR [r9+104]
  7170. mulx r10, rax, QWORD PTR [r9+112]
  7171. adcx r12, rax
  7172. adox r10, r13
  7173. mov QWORD PTR [r8+88], r12
  7174. ; Carry
  7175. adcx r10, r14
  7176. mov r14, r13
  7177. adcx r14, r13
  7178. adox r14, r13
  7179. mov QWORD PTR [r8+96], r10
  7180. ; Diagonal 8
  7181. mov r11, QWORD PTR [r8]
  7182. mov r12, QWORD PTR [r8+8]
  7183. ; A[8] x A[7]
  7184. mov rdx, QWORD PTR [r9+56]
  7185. mulx rcx, rax, QWORD PTR [r9+64]
  7186. adcx rbx, rax
  7187. adox r11, rcx
  7188. ; A[9] x A[7]
  7189. mulx rcx, rax, QWORD PTR [r9+72]
  7190. adcx r11, rax
  7191. adox r12, rcx
  7192. ; No store %rbx - %r8
  7193. mov QWORD PTR [r8], r11
  7194. mov r10, QWORD PTR [r8+16]
  7195. mov r11, QWORD PTR [r8+24]
  7196. ; A[9] x A[8]
  7197. mov rdx, QWORD PTR [r9+64]
  7198. mulx rcx, rax, QWORD PTR [r9+72]
  7199. adcx r12, rax
  7200. adox r10, rcx
  7201. ; A[15] x A[3]
  7202. mov rdx, QWORD PTR [r9+24]
  7203. mulx rcx, rax, QWORD PTR [r9+120]
  7204. adcx r10, rax
  7205. adox r11, rcx
  7206. mov QWORD PTR [r8+8], r12
  7207. mov QWORD PTR [r8+16], r10
  7208. mov r12, QWORD PTR [r8+32]
  7209. mov r10, QWORD PTR [r8+40]
  7210. ; A[15] x A[4]
  7211. mov rdx, QWORD PTR [r9+32]
  7212. mulx rcx, rax, QWORD PTR [r9+120]
  7213. adcx r11, rax
  7214. adox r12, rcx
  7215. ; A[15] x A[5]
  7216. mov rdx, QWORD PTR [r9+40]
  7217. mulx rcx, rax, QWORD PTR [r9+120]
  7218. adcx r12, rax
  7219. adox r10, rcx
  7220. mov QWORD PTR [r8+24], r11
  7221. mov QWORD PTR [r8+32], r12
  7222. mov r11, QWORD PTR [r8+48]
  7223. mov r12, QWORD PTR [r8+56]
  7224. ; A[15] x A[6]
  7225. mov rdx, QWORD PTR [r9+48]
  7226. mulx rcx, rax, QWORD PTR [r9+120]
  7227. adcx r10, rax
  7228. adox r11, rcx
  7229. ; A[15] x A[7]
  7230. mov rdx, QWORD PTR [r9+56]
  7231. mulx rcx, rax, QWORD PTR [r9+120]
  7232. adcx r11, rax
  7233. adox r12, rcx
  7234. mov QWORD PTR [r8+40], r10
  7235. mov QWORD PTR [r8+48], r11
  7236. mov r10, QWORD PTR [r8+64]
  7237. mov r11, QWORD PTR [r8+72]
  7238. ; A[15] x A[8]
  7239. mov rdx, QWORD PTR [r9+64]
  7240. mulx rcx, rax, QWORD PTR [r9+120]
  7241. adcx r12, rax
  7242. adox r10, rcx
  7243. ; A[15] x A[9]
  7244. mov rdx, QWORD PTR [r9+72]
  7245. mulx rcx, rax, QWORD PTR [r9+120]
  7246. adcx r10, rax
  7247. adox r11, rcx
  7248. mov QWORD PTR [r8+56], r12
  7249. mov QWORD PTR [r8+64], r10
  7250. mov r12, QWORD PTR [r8+80]
  7251. mov r10, QWORD PTR [r8+88]
  7252. ; A[15] x A[10]
  7253. mov rdx, QWORD PTR [r9+80]
  7254. mulx rcx, rax, QWORD PTR [r9+120]
  7255. adcx r11, rax
  7256. adox r12, rcx
  7257. ; A[15] x A[11]
  7258. mov rdx, QWORD PTR [r9+88]
  7259. mulx rcx, rax, QWORD PTR [r9+120]
  7260. adcx r12, rax
  7261. adox r10, rcx
  7262. mov QWORD PTR [r8+72], r11
  7263. mov QWORD PTR [r8+80], r12
  7264. mov r11, QWORD PTR [r8+96]
  7265. ; Zero into %r10
  7266. ; A[15] x A[12]
  7267. mov rdx, QWORD PTR [r9+96]
  7268. mulx rcx, rax, QWORD PTR [r9+120]
  7269. adcx r10, rax
  7270. adox r11, rcx
  7271. ; A[15] x A[13]
  7272. mov rdx, QWORD PTR [r9+104]
  7273. mulx r12, rax, QWORD PTR [r9+120]
  7274. adcx r11, rax
  7275. adox r12, r13
  7276. mov QWORD PTR [r8+88], r10
  7277. mov QWORD PTR [r8+96], r11
  7278. ; Zero into %r8
  7279. ; Zero into %r9
  7280. ; A[15] x A[14]
  7281. mov rdx, QWORD PTR [r9+112]
  7282. mulx r10, rax, QWORD PTR [r9+120]
  7283. adcx r12, rax
  7284. adox r10, r13
  7285. mov QWORD PTR [r8+104], r12
  7286. ; Carry
  7287. adcx r10, r14
  7288. mov r14, r13
  7289. adcx r14, r13
  7290. adox r14, r13
  7291. mov QWORD PTR [r8+112], r10
  7292. mov QWORD PTR [r8+120], r14
  7293. ; Double and Add in A[i] x A[i]
  7294. mov r11, QWORD PTR [rbp+8]
  7295. ; A[0] x A[0]
  7296. mov rdx, QWORD PTR [r9]
  7297. mulx rcx, rax, rdx
  7298. mov QWORD PTR [rbp], rax
  7299. adox r11, r11
  7300. adcx r11, rcx
  7301. mov QWORD PTR [rbp+8], r11
  7302. mov r10, QWORD PTR [rbp+16]
  7303. mov r11, QWORD PTR [rbp+24]
  7304. ; A[1] x A[1]
  7305. mov rdx, QWORD PTR [r9+8]
  7306. mulx rcx, rax, rdx
  7307. adox r10, r10
  7308. adox r11, r11
  7309. adcx r10, rax
  7310. adcx r11, rcx
  7311. mov QWORD PTR [rbp+16], r10
  7312. mov QWORD PTR [rbp+24], r11
  7313. mov r10, QWORD PTR [rbp+32]
  7314. mov r11, QWORD PTR [rbp+40]
  7315. ; A[2] x A[2]
  7316. mov rdx, QWORD PTR [r9+16]
  7317. mulx rcx, rax, rdx
  7318. adox r10, r10
  7319. adox r11, r11
  7320. adcx r10, rax
  7321. adcx r11, rcx
  7322. mov QWORD PTR [rbp+32], r10
  7323. mov QWORD PTR [rbp+40], r11
  7324. mov r10, QWORD PTR [rbp+48]
  7325. mov r11, QWORD PTR [rbp+56]
  7326. ; A[3] x A[3]
  7327. mov rdx, QWORD PTR [r9+24]
  7328. mulx rcx, rax, rdx
  7329. adox r10, r10
  7330. adox r11, r11
  7331. adcx r10, rax
  7332. adcx r11, rcx
  7333. mov QWORD PTR [rbp+48], r10
  7334. mov QWORD PTR [rbp+56], r11
  7335. mov r10, QWORD PTR [rbp+64]
  7336. mov r11, QWORD PTR [rbp+72]
  7337. ; A[4] x A[4]
  7338. mov rdx, QWORD PTR [r9+32]
  7339. mulx rcx, rax, rdx
  7340. adox r10, r10
  7341. adox r11, r11
  7342. adcx r10, rax
  7343. adcx r11, rcx
  7344. mov QWORD PTR [rbp+64], r10
  7345. mov QWORD PTR [rbp+72], r11
  7346. mov r10, QWORD PTR [rbp+80]
  7347. mov r11, QWORD PTR [rbp+88]
  7348. ; A[5] x A[5]
  7349. mov rdx, QWORD PTR [r9+40]
  7350. mulx rcx, rax, rdx
  7351. adox r10, r10
  7352. adox r11, r11
  7353. adcx r10, rax
  7354. adcx r11, rcx
  7355. mov QWORD PTR [rbp+80], r10
  7356. mov QWORD PTR [rbp+88], r11
  7357. ; A[6] x A[6]
  7358. mov rdx, QWORD PTR [r9+48]
  7359. mulx rcx, rax, rdx
  7360. adox r15, r15
  7361. adox rdi, rdi
  7362. adcx r15, rax
  7363. adcx rdi, rcx
  7364. ; A[7] x A[7]
  7365. mov rdx, QWORD PTR [r9+56]
  7366. mulx rcx, rax, rdx
  7367. adox rsi, rsi
  7368. adox rbx, rbx
  7369. adcx rsi, rax
  7370. adcx rbx, rcx
  7371. mov r10, QWORD PTR [r8]
  7372. mov r11, QWORD PTR [r8+8]
  7373. ; A[8] x A[8]
  7374. mov rdx, QWORD PTR [r9+64]
  7375. mulx rcx, rax, rdx
  7376. adox r10, r10
  7377. adox r11, r11
  7378. adcx r10, rax
  7379. adcx r11, rcx
  7380. mov QWORD PTR [r8], r10
  7381. mov QWORD PTR [r8+8], r11
  7382. mov r10, QWORD PTR [r8+16]
  7383. mov r11, QWORD PTR [r8+24]
  7384. ; A[9] x A[9]
  7385. mov rdx, QWORD PTR [r9+72]
  7386. mulx rcx, rax, rdx
  7387. adox r10, r10
  7388. adox r11, r11
  7389. adcx r10, rax
  7390. adcx r11, rcx
  7391. mov QWORD PTR [r8+16], r10
  7392. mov QWORD PTR [r8+24], r11
  7393. mov r10, QWORD PTR [r8+32]
  7394. mov r11, QWORD PTR [r8+40]
  7395. ; A[10] x A[10]
  7396. mov rdx, QWORD PTR [r9+80]
  7397. mulx rcx, rax, rdx
  7398. adox r10, r10
  7399. adox r11, r11
  7400. adcx r10, rax
  7401. adcx r11, rcx
  7402. mov QWORD PTR [r8+32], r10
  7403. mov QWORD PTR [r8+40], r11
  7404. mov r10, QWORD PTR [r8+48]
  7405. mov r11, QWORD PTR [r8+56]
  7406. ; A[11] x A[11]
  7407. mov rdx, QWORD PTR [r9+88]
  7408. mulx rcx, rax, rdx
  7409. adox r10, r10
  7410. adox r11, r11
  7411. adcx r10, rax
  7412. adcx r11, rcx
  7413. mov QWORD PTR [r8+48], r10
  7414. mov QWORD PTR [r8+56], r11
  7415. mov r10, QWORD PTR [r8+64]
  7416. mov r11, QWORD PTR [r8+72]
  7417. ; A[12] x A[12]
  7418. mov rdx, QWORD PTR [r9+96]
  7419. mulx rcx, rax, rdx
  7420. adox r10, r10
  7421. adox r11, r11
  7422. adcx r10, rax
  7423. adcx r11, rcx
  7424. mov QWORD PTR [r8+64], r10
  7425. mov QWORD PTR [r8+72], r11
  7426. mov r10, QWORD PTR [r8+80]
  7427. mov r11, QWORD PTR [r8+88]
  7428. ; A[13] x A[13]
  7429. mov rdx, QWORD PTR [r9+104]
  7430. mulx rcx, rax, rdx
  7431. adox r10, r10
  7432. adox r11, r11
  7433. adcx r10, rax
  7434. adcx r11, rcx
  7435. mov QWORD PTR [r8+80], r10
  7436. mov QWORD PTR [r8+88], r11
  7437. mov r10, QWORD PTR [r8+96]
  7438. mov r11, QWORD PTR [r8+104]
  7439. ; A[14] x A[14]
  7440. mov rdx, QWORD PTR [r9+112]
  7441. mulx rcx, rax, rdx
  7442. adox r10, r10
  7443. adox r11, r11
  7444. adcx r10, rax
  7445. adcx r11, rcx
  7446. mov QWORD PTR [r8+96], r10
  7447. mov QWORD PTR [r8+104], r11
  7448. mov r10, QWORD PTR [r8+112]
  7449. mov r11, QWORD PTR [r8+120]
  7450. ; A[15] x A[15]
  7451. mov rdx, QWORD PTR [r9+120]
  7452. mulx rcx, rax, rdx
  7453. adox r10, r10
  7454. adox r11, r11
  7455. adcx r10, rax
  7456. adcx r11, rcx
  7457. mov QWORD PTR [r8+112], r10
  7458. mov QWORD PTR [r8+120], r11
  7459. mov QWORD PTR [r8+-32], r15
  7460. mov QWORD PTR [r8+-24], rdi
  7461. mov QWORD PTR [r8+-16], rsi
  7462. mov QWORD PTR [r8+-8], rbx
  7463. sub r8, 128
  7464. cmp r9, r8
  7465. jne L_end_2048_sqr_avx2_16
  7466. vmovdqu xmm0, OWORD PTR [rbp]
  7467. vmovups OWORD PTR [r8], xmm0
  7468. vmovdqu xmm0, OWORD PTR [rbp+16]
  7469. vmovups OWORD PTR [r8+16], xmm0
  7470. vmovdqu xmm0, OWORD PTR [rbp+32]
  7471. vmovups OWORD PTR [r8+32], xmm0
  7472. vmovdqu xmm0, OWORD PTR [rbp+48]
  7473. vmovups OWORD PTR [r8+48], xmm0
  7474. vmovdqu xmm0, OWORD PTR [rbp+64]
  7475. vmovups OWORD PTR [r8+64], xmm0
  7476. vmovdqu xmm0, OWORD PTR [rbp+80]
  7477. vmovups OWORD PTR [r8+80], xmm0
  7478. L_end_2048_sqr_avx2_16:
  7479. add rsp, 128
  7480. pop rbx
  7481. pop rsi
  7482. pop rdi
  7483. pop r15
  7484. pop r14
  7485. pop r13
  7486. pop r12
  7487. pop rbp
  7488. ret
  7489. sp_2048_sqr_avx2_16 ENDP
  7490. _text ENDS
  7491. ENDIF
  7492. ; /* Square a and put result in r. (r = a * a)
  7493. ; *
  7494. ; * Karatsuba: ah^2, al^2, (al - ah)^2
  7495. ; *
  7496. ; * r A single precision integer.
  7497. ; * a A single precision integer.
  7498. ; */
  7499. _text SEGMENT READONLY PARA
  7500. sp_2048_sqr_32 PROC
  7501. sub rsp, 272
  7502. mov QWORD PTR [rsp+256], rcx
  7503. mov QWORD PTR [rsp+264], rdx
  7504. mov r9, 0
  7505. mov r10, rsp
  7506. lea r11, QWORD PTR [rdx+128]
  7507. mov rax, QWORD PTR [rdx]
  7508. sub rax, QWORD PTR [r11]
  7509. mov r8, QWORD PTR [rdx+8]
  7510. mov QWORD PTR [r10], rax
  7511. sbb r8, QWORD PTR [r11+8]
  7512. mov rax, QWORD PTR [rdx+16]
  7513. mov QWORD PTR [r10+8], r8
  7514. sbb rax, QWORD PTR [r11+16]
  7515. mov r8, QWORD PTR [rdx+24]
  7516. mov QWORD PTR [r10+16], rax
  7517. sbb r8, QWORD PTR [r11+24]
  7518. mov rax, QWORD PTR [rdx+32]
  7519. mov QWORD PTR [r10+24], r8
  7520. sbb rax, QWORD PTR [r11+32]
  7521. mov r8, QWORD PTR [rdx+40]
  7522. mov QWORD PTR [r10+32], rax
  7523. sbb r8, QWORD PTR [r11+40]
  7524. mov rax, QWORD PTR [rdx+48]
  7525. mov QWORD PTR [r10+40], r8
  7526. sbb rax, QWORD PTR [r11+48]
  7527. mov r8, QWORD PTR [rdx+56]
  7528. mov QWORD PTR [r10+48], rax
  7529. sbb r8, QWORD PTR [r11+56]
  7530. mov rax, QWORD PTR [rdx+64]
  7531. mov QWORD PTR [r10+56], r8
  7532. sbb rax, QWORD PTR [r11+64]
  7533. mov r8, QWORD PTR [rdx+72]
  7534. mov QWORD PTR [r10+64], rax
  7535. sbb r8, QWORD PTR [r11+72]
  7536. mov rax, QWORD PTR [rdx+80]
  7537. mov QWORD PTR [r10+72], r8
  7538. sbb rax, QWORD PTR [r11+80]
  7539. mov r8, QWORD PTR [rdx+88]
  7540. mov QWORD PTR [r10+80], rax
  7541. sbb r8, QWORD PTR [r11+88]
  7542. mov rax, QWORD PTR [rdx+96]
  7543. mov QWORD PTR [r10+88], r8
  7544. sbb rax, QWORD PTR [r11+96]
  7545. mov r8, QWORD PTR [rdx+104]
  7546. mov QWORD PTR [r10+96], rax
  7547. sbb r8, QWORD PTR [r11+104]
  7548. mov rax, QWORD PTR [rdx+112]
  7549. mov QWORD PTR [r10+104], r8
  7550. sbb rax, QWORD PTR [r11+112]
  7551. mov r8, QWORD PTR [rdx+120]
  7552. mov QWORD PTR [r10+112], rax
  7553. sbb r8, QWORD PTR [r11+120]
  7554. mov QWORD PTR [r10+120], r8
  7555. sbb r9, 0
  7556. ; Cond Negate
  7557. mov rax, QWORD PTR [r10]
  7558. mov r11, r9
  7559. xor rax, r9
  7560. neg r11
  7561. sub rax, r9
  7562. mov r8, QWORD PTR [r10+8]
  7563. sbb r11, 0
  7564. mov QWORD PTR [r10], rax
  7565. xor r8, r9
  7566. add r8, r11
  7567. mov rax, QWORD PTR [r10+16]
  7568. setc r11b
  7569. mov QWORD PTR [r10+8], r8
  7570. xor rax, r9
  7571. add rax, r11
  7572. mov r8, QWORD PTR [r10+24]
  7573. setc r11b
  7574. mov QWORD PTR [r10+16], rax
  7575. xor r8, r9
  7576. add r8, r11
  7577. mov rax, QWORD PTR [r10+32]
  7578. setc r11b
  7579. mov QWORD PTR [r10+24], r8
  7580. xor rax, r9
  7581. add rax, r11
  7582. mov r8, QWORD PTR [r10+40]
  7583. setc r11b
  7584. mov QWORD PTR [r10+32], rax
  7585. xor r8, r9
  7586. add r8, r11
  7587. mov rax, QWORD PTR [r10+48]
  7588. setc r11b
  7589. mov QWORD PTR [r10+40], r8
  7590. xor rax, r9
  7591. add rax, r11
  7592. mov r8, QWORD PTR [r10+56]
  7593. setc r11b
  7594. mov QWORD PTR [r10+48], rax
  7595. xor r8, r9
  7596. add r8, r11
  7597. mov rax, QWORD PTR [r10+64]
  7598. setc r11b
  7599. mov QWORD PTR [r10+56], r8
  7600. xor rax, r9
  7601. add rax, r11
  7602. mov r8, QWORD PTR [r10+72]
  7603. setc r11b
  7604. mov QWORD PTR [r10+64], rax
  7605. xor r8, r9
  7606. add r8, r11
  7607. mov rax, QWORD PTR [r10+80]
  7608. setc r11b
  7609. mov QWORD PTR [r10+72], r8
  7610. xor rax, r9
  7611. add rax, r11
  7612. mov r8, QWORD PTR [r10+88]
  7613. setc r11b
  7614. mov QWORD PTR [r10+80], rax
  7615. xor r8, r9
  7616. add r8, r11
  7617. mov rax, QWORD PTR [r10+96]
  7618. setc r11b
  7619. mov QWORD PTR [r10+88], r8
  7620. xor rax, r9
  7621. add rax, r11
  7622. mov r8, QWORD PTR [r10+104]
  7623. setc r11b
  7624. mov QWORD PTR [r10+96], rax
  7625. xor r8, r9
  7626. add r8, r11
  7627. mov rax, QWORD PTR [r10+112]
  7628. setc r11b
  7629. mov QWORD PTR [r10+104], r8
  7630. xor rax, r9
  7631. add rax, r11
  7632. mov r8, QWORD PTR [r10+120]
  7633. setc r11b
  7634. mov QWORD PTR [r10+112], rax
  7635. xor r8, r9
  7636. add r8, r11
  7637. mov QWORD PTR [r10+120], r8
  7638. mov rdx, r10
  7639. mov rcx, rsp
  7640. call sp_2048_sqr_16
  7641. mov rdx, QWORD PTR [rsp+264]
  7642. mov rcx, QWORD PTR [rsp+256]
  7643. add rdx, 128
  7644. add rcx, 256
  7645. call sp_2048_sqr_16
  7646. mov rdx, QWORD PTR [rsp+264]
  7647. mov rcx, QWORD PTR [rsp+256]
  7648. call sp_2048_sqr_16
  7649. IFDEF _WIN64
  7650. mov rdx, QWORD PTR [rsp+264]
  7651. mov rcx, QWORD PTR [rsp+256]
  7652. ENDIF
  7653. mov rdx, QWORD PTR [rsp+256]
  7654. lea r10, QWORD PTR [rsp+128]
  7655. add rdx, 384
  7656. mov r9, 0
  7657. mov r8, QWORD PTR [r10+-128]
  7658. sub r8, QWORD PTR [rdx+-128]
  7659. mov rax, QWORD PTR [r10+-120]
  7660. mov QWORD PTR [r10+-128], r8
  7661. sbb rax, QWORD PTR [rdx+-120]
  7662. mov r8, QWORD PTR [r10+-112]
  7663. mov QWORD PTR [r10+-120], rax
  7664. sbb r8, QWORD PTR [rdx+-112]
  7665. mov rax, QWORD PTR [r10+-104]
  7666. mov QWORD PTR [r10+-112], r8
  7667. sbb rax, QWORD PTR [rdx+-104]
  7668. mov r8, QWORD PTR [r10+-96]
  7669. mov QWORD PTR [r10+-104], rax
  7670. sbb r8, QWORD PTR [rdx+-96]
  7671. mov rax, QWORD PTR [r10+-88]
  7672. mov QWORD PTR [r10+-96], r8
  7673. sbb rax, QWORD PTR [rdx+-88]
  7674. mov r8, QWORD PTR [r10+-80]
  7675. mov QWORD PTR [r10+-88], rax
  7676. sbb r8, QWORD PTR [rdx+-80]
  7677. mov rax, QWORD PTR [r10+-72]
  7678. mov QWORD PTR [r10+-80], r8
  7679. sbb rax, QWORD PTR [rdx+-72]
  7680. mov r8, QWORD PTR [r10+-64]
  7681. mov QWORD PTR [r10+-72], rax
  7682. sbb r8, QWORD PTR [rdx+-64]
  7683. mov rax, QWORD PTR [r10+-56]
  7684. mov QWORD PTR [r10+-64], r8
  7685. sbb rax, QWORD PTR [rdx+-56]
  7686. mov r8, QWORD PTR [r10+-48]
  7687. mov QWORD PTR [r10+-56], rax
  7688. sbb r8, QWORD PTR [rdx+-48]
  7689. mov rax, QWORD PTR [r10+-40]
  7690. mov QWORD PTR [r10+-48], r8
  7691. sbb rax, QWORD PTR [rdx+-40]
  7692. mov r8, QWORD PTR [r10+-32]
  7693. mov QWORD PTR [r10+-40], rax
  7694. sbb r8, QWORD PTR [rdx+-32]
  7695. mov rax, QWORD PTR [r10+-24]
  7696. mov QWORD PTR [r10+-32], r8
  7697. sbb rax, QWORD PTR [rdx+-24]
  7698. mov r8, QWORD PTR [r10+-16]
  7699. mov QWORD PTR [r10+-24], rax
  7700. sbb r8, QWORD PTR [rdx+-16]
  7701. mov rax, QWORD PTR [r10+-8]
  7702. mov QWORD PTR [r10+-16], r8
  7703. sbb rax, QWORD PTR [rdx+-8]
  7704. mov r8, QWORD PTR [r10]
  7705. mov QWORD PTR [r10+-8], rax
  7706. sbb r8, QWORD PTR [rdx]
  7707. mov rax, QWORD PTR [r10+8]
  7708. mov QWORD PTR [r10], r8
  7709. sbb rax, QWORD PTR [rdx+8]
  7710. mov r8, QWORD PTR [r10+16]
  7711. mov QWORD PTR [r10+8], rax
  7712. sbb r8, QWORD PTR [rdx+16]
  7713. mov rax, QWORD PTR [r10+24]
  7714. mov QWORD PTR [r10+16], r8
  7715. sbb rax, QWORD PTR [rdx+24]
  7716. mov r8, QWORD PTR [r10+32]
  7717. mov QWORD PTR [r10+24], rax
  7718. sbb r8, QWORD PTR [rdx+32]
  7719. mov rax, QWORD PTR [r10+40]
  7720. mov QWORD PTR [r10+32], r8
  7721. sbb rax, QWORD PTR [rdx+40]
  7722. mov r8, QWORD PTR [r10+48]
  7723. mov QWORD PTR [r10+40], rax
  7724. sbb r8, QWORD PTR [rdx+48]
  7725. mov rax, QWORD PTR [r10+56]
  7726. mov QWORD PTR [r10+48], r8
  7727. sbb rax, QWORD PTR [rdx+56]
  7728. mov r8, QWORD PTR [r10+64]
  7729. mov QWORD PTR [r10+56], rax
  7730. sbb r8, QWORD PTR [rdx+64]
  7731. mov rax, QWORD PTR [r10+72]
  7732. mov QWORD PTR [r10+64], r8
  7733. sbb rax, QWORD PTR [rdx+72]
  7734. mov r8, QWORD PTR [r10+80]
  7735. mov QWORD PTR [r10+72], rax
  7736. sbb r8, QWORD PTR [rdx+80]
  7737. mov rax, QWORD PTR [r10+88]
  7738. mov QWORD PTR [r10+80], r8
  7739. sbb rax, QWORD PTR [rdx+88]
  7740. mov r8, QWORD PTR [r10+96]
  7741. mov QWORD PTR [r10+88], rax
  7742. sbb r8, QWORD PTR [rdx+96]
  7743. mov rax, QWORD PTR [r10+104]
  7744. mov QWORD PTR [r10+96], r8
  7745. sbb rax, QWORD PTR [rdx+104]
  7746. mov r8, QWORD PTR [r10+112]
  7747. mov QWORD PTR [r10+104], rax
  7748. sbb r8, QWORD PTR [rdx+112]
  7749. mov rax, QWORD PTR [r10+120]
  7750. mov QWORD PTR [r10+112], r8
  7751. sbb rax, QWORD PTR [rdx+120]
  7752. mov QWORD PTR [r10+120], rax
  7753. sbb r9, 0
  7754. sub rdx, 256
  7755. mov r8, QWORD PTR [r10+-128]
  7756. sub r8, QWORD PTR [rdx+-128]
  7757. mov rax, QWORD PTR [r10+-120]
  7758. mov QWORD PTR [r10+-128], r8
  7759. sbb rax, QWORD PTR [rdx+-120]
  7760. mov r8, QWORD PTR [r10+-112]
  7761. mov QWORD PTR [r10+-120], rax
  7762. sbb r8, QWORD PTR [rdx+-112]
  7763. mov rax, QWORD PTR [r10+-104]
  7764. mov QWORD PTR [r10+-112], r8
  7765. sbb rax, QWORD PTR [rdx+-104]
  7766. mov r8, QWORD PTR [r10+-96]
  7767. mov QWORD PTR [r10+-104], rax
  7768. sbb r8, QWORD PTR [rdx+-96]
  7769. mov rax, QWORD PTR [r10+-88]
  7770. mov QWORD PTR [r10+-96], r8
  7771. sbb rax, QWORD PTR [rdx+-88]
  7772. mov r8, QWORD PTR [r10+-80]
  7773. mov QWORD PTR [r10+-88], rax
  7774. sbb r8, QWORD PTR [rdx+-80]
  7775. mov rax, QWORD PTR [r10+-72]
  7776. mov QWORD PTR [r10+-80], r8
  7777. sbb rax, QWORD PTR [rdx+-72]
  7778. mov r8, QWORD PTR [r10+-64]
  7779. mov QWORD PTR [r10+-72], rax
  7780. sbb r8, QWORD PTR [rdx+-64]
  7781. mov rax, QWORD PTR [r10+-56]
  7782. mov QWORD PTR [r10+-64], r8
  7783. sbb rax, QWORD PTR [rdx+-56]
  7784. mov r8, QWORD PTR [r10+-48]
  7785. mov QWORD PTR [r10+-56], rax
  7786. sbb r8, QWORD PTR [rdx+-48]
  7787. mov rax, QWORD PTR [r10+-40]
  7788. mov QWORD PTR [r10+-48], r8
  7789. sbb rax, QWORD PTR [rdx+-40]
  7790. mov r8, QWORD PTR [r10+-32]
  7791. mov QWORD PTR [r10+-40], rax
  7792. sbb r8, QWORD PTR [rdx+-32]
  7793. mov rax, QWORD PTR [r10+-24]
  7794. mov QWORD PTR [r10+-32], r8
  7795. sbb rax, QWORD PTR [rdx+-24]
  7796. mov r8, QWORD PTR [r10+-16]
  7797. mov QWORD PTR [r10+-24], rax
  7798. sbb r8, QWORD PTR [rdx+-16]
  7799. mov rax, QWORD PTR [r10+-8]
  7800. mov QWORD PTR [r10+-16], r8
  7801. sbb rax, QWORD PTR [rdx+-8]
  7802. mov r8, QWORD PTR [r10]
  7803. mov QWORD PTR [r10+-8], rax
  7804. sbb r8, QWORD PTR [rdx]
  7805. mov rax, QWORD PTR [r10+8]
  7806. mov QWORD PTR [r10], r8
  7807. sbb rax, QWORD PTR [rdx+8]
  7808. mov r8, QWORD PTR [r10+16]
  7809. mov QWORD PTR [r10+8], rax
  7810. sbb r8, QWORD PTR [rdx+16]
  7811. mov rax, QWORD PTR [r10+24]
  7812. mov QWORD PTR [r10+16], r8
  7813. sbb rax, QWORD PTR [rdx+24]
  7814. mov r8, QWORD PTR [r10+32]
  7815. mov QWORD PTR [r10+24], rax
  7816. sbb r8, QWORD PTR [rdx+32]
  7817. mov rax, QWORD PTR [r10+40]
  7818. mov QWORD PTR [r10+32], r8
  7819. sbb rax, QWORD PTR [rdx+40]
  7820. mov r8, QWORD PTR [r10+48]
  7821. mov QWORD PTR [r10+40], rax
  7822. sbb r8, QWORD PTR [rdx+48]
  7823. mov rax, QWORD PTR [r10+56]
  7824. mov QWORD PTR [r10+48], r8
  7825. sbb rax, QWORD PTR [rdx+56]
  7826. mov r8, QWORD PTR [r10+64]
  7827. mov QWORD PTR [r10+56], rax
  7828. sbb r8, QWORD PTR [rdx+64]
  7829. mov rax, QWORD PTR [r10+72]
  7830. mov QWORD PTR [r10+64], r8
  7831. sbb rax, QWORD PTR [rdx+72]
  7832. mov r8, QWORD PTR [r10+80]
  7833. mov QWORD PTR [r10+72], rax
  7834. sbb r8, QWORD PTR [rdx+80]
  7835. mov rax, QWORD PTR [r10+88]
  7836. mov QWORD PTR [r10+80], r8
  7837. sbb rax, QWORD PTR [rdx+88]
  7838. mov r8, QWORD PTR [r10+96]
  7839. mov QWORD PTR [r10+88], rax
  7840. sbb r8, QWORD PTR [rdx+96]
  7841. mov rax, QWORD PTR [r10+104]
  7842. mov QWORD PTR [r10+96], r8
  7843. sbb rax, QWORD PTR [rdx+104]
  7844. mov r8, QWORD PTR [r10+112]
  7845. mov QWORD PTR [r10+104], rax
  7846. sbb r8, QWORD PTR [rdx+112]
  7847. mov rax, QWORD PTR [r10+120]
  7848. mov QWORD PTR [r10+112], r8
  7849. sbb rax, QWORD PTR [rdx+120]
  7850. mov QWORD PTR [r10+120], rax
  7851. sbb r9, 0
  7852. mov rcx, QWORD PTR [rsp+256]
  7853. neg r9
  7854. add rcx, 256
  7855. mov r8, QWORD PTR [rcx+-128]
  7856. sub r8, QWORD PTR [r10+-128]
  7857. mov rax, QWORD PTR [rcx+-120]
  7858. mov QWORD PTR [rcx+-128], r8
  7859. sbb rax, QWORD PTR [r10+-120]
  7860. mov r8, QWORD PTR [rcx+-112]
  7861. mov QWORD PTR [rcx+-120], rax
  7862. sbb r8, QWORD PTR [r10+-112]
  7863. mov rax, QWORD PTR [rcx+-104]
  7864. mov QWORD PTR [rcx+-112], r8
  7865. sbb rax, QWORD PTR [r10+-104]
  7866. mov r8, QWORD PTR [rcx+-96]
  7867. mov QWORD PTR [rcx+-104], rax
  7868. sbb r8, QWORD PTR [r10+-96]
  7869. mov rax, QWORD PTR [rcx+-88]
  7870. mov QWORD PTR [rcx+-96], r8
  7871. sbb rax, QWORD PTR [r10+-88]
  7872. mov r8, QWORD PTR [rcx+-80]
  7873. mov QWORD PTR [rcx+-88], rax
  7874. sbb r8, QWORD PTR [r10+-80]
  7875. mov rax, QWORD PTR [rcx+-72]
  7876. mov QWORD PTR [rcx+-80], r8
  7877. sbb rax, QWORD PTR [r10+-72]
  7878. mov r8, QWORD PTR [rcx+-64]
  7879. mov QWORD PTR [rcx+-72], rax
  7880. sbb r8, QWORD PTR [r10+-64]
  7881. mov rax, QWORD PTR [rcx+-56]
  7882. mov QWORD PTR [rcx+-64], r8
  7883. sbb rax, QWORD PTR [r10+-56]
  7884. mov r8, QWORD PTR [rcx+-48]
  7885. mov QWORD PTR [rcx+-56], rax
  7886. sbb r8, QWORD PTR [r10+-48]
  7887. mov rax, QWORD PTR [rcx+-40]
  7888. mov QWORD PTR [rcx+-48], r8
  7889. sbb rax, QWORD PTR [r10+-40]
  7890. mov r8, QWORD PTR [rcx+-32]
  7891. mov QWORD PTR [rcx+-40], rax
  7892. sbb r8, QWORD PTR [r10+-32]
  7893. mov rax, QWORD PTR [rcx+-24]
  7894. mov QWORD PTR [rcx+-32], r8
  7895. sbb rax, QWORD PTR [r10+-24]
  7896. mov r8, QWORD PTR [rcx+-16]
  7897. mov QWORD PTR [rcx+-24], rax
  7898. sbb r8, QWORD PTR [r10+-16]
  7899. mov rax, QWORD PTR [rcx+-8]
  7900. mov QWORD PTR [rcx+-16], r8
  7901. sbb rax, QWORD PTR [r10+-8]
  7902. mov r8, QWORD PTR [rcx]
  7903. mov QWORD PTR [rcx+-8], rax
  7904. sbb r8, QWORD PTR [r10]
  7905. mov rax, QWORD PTR [rcx+8]
  7906. mov QWORD PTR [rcx], r8
  7907. sbb rax, QWORD PTR [r10+8]
  7908. mov r8, QWORD PTR [rcx+16]
  7909. mov QWORD PTR [rcx+8], rax
  7910. sbb r8, QWORD PTR [r10+16]
  7911. mov rax, QWORD PTR [rcx+24]
  7912. mov QWORD PTR [rcx+16], r8
  7913. sbb rax, QWORD PTR [r10+24]
  7914. mov r8, QWORD PTR [rcx+32]
  7915. mov QWORD PTR [rcx+24], rax
  7916. sbb r8, QWORD PTR [r10+32]
  7917. mov rax, QWORD PTR [rcx+40]
  7918. mov QWORD PTR [rcx+32], r8
  7919. sbb rax, QWORD PTR [r10+40]
  7920. mov r8, QWORD PTR [rcx+48]
  7921. mov QWORD PTR [rcx+40], rax
  7922. sbb r8, QWORD PTR [r10+48]
  7923. mov rax, QWORD PTR [rcx+56]
  7924. mov QWORD PTR [rcx+48], r8
  7925. sbb rax, QWORD PTR [r10+56]
  7926. mov r8, QWORD PTR [rcx+64]
  7927. mov QWORD PTR [rcx+56], rax
  7928. sbb r8, QWORD PTR [r10+64]
  7929. mov rax, QWORD PTR [rcx+72]
  7930. mov QWORD PTR [rcx+64], r8
  7931. sbb rax, QWORD PTR [r10+72]
  7932. mov r8, QWORD PTR [rcx+80]
  7933. mov QWORD PTR [rcx+72], rax
  7934. sbb r8, QWORD PTR [r10+80]
  7935. mov rax, QWORD PTR [rcx+88]
  7936. mov QWORD PTR [rcx+80], r8
  7937. sbb rax, QWORD PTR [r10+88]
  7938. mov r8, QWORD PTR [rcx+96]
  7939. mov QWORD PTR [rcx+88], rax
  7940. sbb r8, QWORD PTR [r10+96]
  7941. mov rax, QWORD PTR [rcx+104]
  7942. mov QWORD PTR [rcx+96], r8
  7943. sbb rax, QWORD PTR [r10+104]
  7944. mov r8, QWORD PTR [rcx+112]
  7945. mov QWORD PTR [rcx+104], rax
  7946. sbb r8, QWORD PTR [r10+112]
  7947. mov rax, QWORD PTR [rcx+120]
  7948. mov QWORD PTR [rcx+112], r8
  7949. sbb rax, QWORD PTR [r10+120]
  7950. mov QWORD PTR [rcx+120], rax
  7951. sbb r9, 0
  7952. mov rcx, QWORD PTR [rsp+256]
  7953. add rcx, 384
  7954. ; Add in word
  7955. mov r8, QWORD PTR [rcx]
  7956. add r8, r9
  7957. mov rax, QWORD PTR [rcx+8]
  7958. mov QWORD PTR [rcx], r8
  7959. adc rax, 0
  7960. mov r8, QWORD PTR [rcx+16]
  7961. mov QWORD PTR [rcx+8], rax
  7962. adc r8, 0
  7963. mov rax, QWORD PTR [rcx+24]
  7964. mov QWORD PTR [rcx+16], r8
  7965. adc rax, 0
  7966. mov r8, QWORD PTR [rcx+32]
  7967. mov QWORD PTR [rcx+24], rax
  7968. adc r8, 0
  7969. mov rax, QWORD PTR [rcx+40]
  7970. mov QWORD PTR [rcx+32], r8
  7971. adc rax, 0
  7972. mov r8, QWORD PTR [rcx+48]
  7973. mov QWORD PTR [rcx+40], rax
  7974. adc r8, 0
  7975. mov rax, QWORD PTR [rcx+56]
  7976. mov QWORD PTR [rcx+48], r8
  7977. adc rax, 0
  7978. mov r8, QWORD PTR [rcx+64]
  7979. mov QWORD PTR [rcx+56], rax
  7980. adc r8, 0
  7981. mov rax, QWORD PTR [rcx+72]
  7982. mov QWORD PTR [rcx+64], r8
  7983. adc rax, 0
  7984. mov r8, QWORD PTR [rcx+80]
  7985. mov QWORD PTR [rcx+72], rax
  7986. adc r8, 0
  7987. mov rax, QWORD PTR [rcx+88]
  7988. mov QWORD PTR [rcx+80], r8
  7989. adc rax, 0
  7990. mov r8, QWORD PTR [rcx+96]
  7991. mov QWORD PTR [rcx+88], rax
  7992. adc r8, 0
  7993. mov rax, QWORD PTR [rcx+104]
  7994. mov QWORD PTR [rcx+96], r8
  7995. adc rax, 0
  7996. mov r8, QWORD PTR [rcx+112]
  7997. mov QWORD PTR [rcx+104], rax
  7998. adc r8, 0
  7999. mov rax, QWORD PTR [rcx+120]
  8000. mov QWORD PTR [rcx+112], r8
  8001. adc rax, 0
  8002. mov QWORD PTR [rcx+120], rax
  8003. mov rdx, QWORD PTR [rsp+264]
  8004. mov rcx, QWORD PTR [rsp+256]
  8005. add rsp, 272
  8006. ret
  8007. sp_2048_sqr_32 ENDP
  8008. _text ENDS
  8009. IFDEF HAVE_INTEL_AVX2
  8010. ; /* Square a and put result in r. (r = a * a)
  8011. ; *
  8012. ; * Karatsuba: ah^2, al^2, (al - ah)^2
  8013. ; *
  8014. ; * r A single precision integer.
  8015. ; * a A single precision integer.
  8016. ; */
  8017. _text SEGMENT READONLY PARA
  8018. sp_2048_sqr_avx2_32 PROC
  8019. sub rsp, 272
  8020. mov QWORD PTR [rsp+256], rcx
  8021. mov QWORD PTR [rsp+264], rdx
  8022. mov r9, 0
  8023. mov r10, rsp
  8024. lea r11, QWORD PTR [rdx+128]
  8025. mov rax, QWORD PTR [rdx]
  8026. sub rax, QWORD PTR [r11]
  8027. mov r8, QWORD PTR [rdx+8]
  8028. mov QWORD PTR [r10], rax
  8029. sbb r8, QWORD PTR [r11+8]
  8030. mov rax, QWORD PTR [rdx+16]
  8031. mov QWORD PTR [r10+8], r8
  8032. sbb rax, QWORD PTR [r11+16]
  8033. mov r8, QWORD PTR [rdx+24]
  8034. mov QWORD PTR [r10+16], rax
  8035. sbb r8, QWORD PTR [r11+24]
  8036. mov rax, QWORD PTR [rdx+32]
  8037. mov QWORD PTR [r10+24], r8
  8038. sbb rax, QWORD PTR [r11+32]
  8039. mov r8, QWORD PTR [rdx+40]
  8040. mov QWORD PTR [r10+32], rax
  8041. sbb r8, QWORD PTR [r11+40]
  8042. mov rax, QWORD PTR [rdx+48]
  8043. mov QWORD PTR [r10+40], r8
  8044. sbb rax, QWORD PTR [r11+48]
  8045. mov r8, QWORD PTR [rdx+56]
  8046. mov QWORD PTR [r10+48], rax
  8047. sbb r8, QWORD PTR [r11+56]
  8048. mov rax, QWORD PTR [rdx+64]
  8049. mov QWORD PTR [r10+56], r8
  8050. sbb rax, QWORD PTR [r11+64]
  8051. mov r8, QWORD PTR [rdx+72]
  8052. mov QWORD PTR [r10+64], rax
  8053. sbb r8, QWORD PTR [r11+72]
  8054. mov rax, QWORD PTR [rdx+80]
  8055. mov QWORD PTR [r10+72], r8
  8056. sbb rax, QWORD PTR [r11+80]
  8057. mov r8, QWORD PTR [rdx+88]
  8058. mov QWORD PTR [r10+80], rax
  8059. sbb r8, QWORD PTR [r11+88]
  8060. mov rax, QWORD PTR [rdx+96]
  8061. mov QWORD PTR [r10+88], r8
  8062. sbb rax, QWORD PTR [r11+96]
  8063. mov r8, QWORD PTR [rdx+104]
  8064. mov QWORD PTR [r10+96], rax
  8065. sbb r8, QWORD PTR [r11+104]
  8066. mov rax, QWORD PTR [rdx+112]
  8067. mov QWORD PTR [r10+104], r8
  8068. sbb rax, QWORD PTR [r11+112]
  8069. mov r8, QWORD PTR [rdx+120]
  8070. mov QWORD PTR [r10+112], rax
  8071. sbb r8, QWORD PTR [r11+120]
  8072. mov QWORD PTR [r10+120], r8
  8073. sbb r9, 0
  8074. ; Cond Negate
  8075. mov rax, QWORD PTR [r10]
  8076. mov r11, r9
  8077. xor rax, r9
  8078. neg r11
  8079. sub rax, r9
  8080. mov r8, QWORD PTR [r10+8]
  8081. sbb r11, 0
  8082. mov QWORD PTR [r10], rax
  8083. xor r8, r9
  8084. add r8, r11
  8085. mov rax, QWORD PTR [r10+16]
  8086. setc r11b
  8087. mov QWORD PTR [r10+8], r8
  8088. xor rax, r9
  8089. add rax, r11
  8090. mov r8, QWORD PTR [r10+24]
  8091. setc r11b
  8092. mov QWORD PTR [r10+16], rax
  8093. xor r8, r9
  8094. add r8, r11
  8095. mov rax, QWORD PTR [r10+32]
  8096. setc r11b
  8097. mov QWORD PTR [r10+24], r8
  8098. xor rax, r9
  8099. add rax, r11
  8100. mov r8, QWORD PTR [r10+40]
  8101. setc r11b
  8102. mov QWORD PTR [r10+32], rax
  8103. xor r8, r9
  8104. add r8, r11
  8105. mov rax, QWORD PTR [r10+48]
  8106. setc r11b
  8107. mov QWORD PTR [r10+40], r8
  8108. xor rax, r9
  8109. add rax, r11
  8110. mov r8, QWORD PTR [r10+56]
  8111. setc r11b
  8112. mov QWORD PTR [r10+48], rax
  8113. xor r8, r9
  8114. add r8, r11
  8115. mov rax, QWORD PTR [r10+64]
  8116. setc r11b
  8117. mov QWORD PTR [r10+56], r8
  8118. xor rax, r9
  8119. add rax, r11
  8120. mov r8, QWORD PTR [r10+72]
  8121. setc r11b
  8122. mov QWORD PTR [r10+64], rax
  8123. xor r8, r9
  8124. add r8, r11
  8125. mov rax, QWORD PTR [r10+80]
  8126. setc r11b
  8127. mov QWORD PTR [r10+72], r8
  8128. xor rax, r9
  8129. add rax, r11
  8130. mov r8, QWORD PTR [r10+88]
  8131. setc r11b
  8132. mov QWORD PTR [r10+80], rax
  8133. xor r8, r9
  8134. add r8, r11
  8135. mov rax, QWORD PTR [r10+96]
  8136. setc r11b
  8137. mov QWORD PTR [r10+88], r8
  8138. xor rax, r9
  8139. add rax, r11
  8140. mov r8, QWORD PTR [r10+104]
  8141. setc r11b
  8142. mov QWORD PTR [r10+96], rax
  8143. xor r8, r9
  8144. add r8, r11
  8145. mov rax, QWORD PTR [r10+112]
  8146. setc r11b
  8147. mov QWORD PTR [r10+104], r8
  8148. xor rax, r9
  8149. add rax, r11
  8150. mov r8, QWORD PTR [r10+120]
  8151. setc r11b
  8152. mov QWORD PTR [r10+112], rax
  8153. xor r8, r9
  8154. add r8, r11
  8155. mov QWORD PTR [r10+120], r8
  8156. mov rdx, r10
  8157. mov rcx, rsp
  8158. call sp_2048_sqr_avx2_16
  8159. mov rdx, QWORD PTR [rsp+264]
  8160. mov rcx, QWORD PTR [rsp+256]
  8161. add rdx, 128
  8162. add rcx, 256
  8163. call sp_2048_sqr_avx2_16
  8164. mov rdx, QWORD PTR [rsp+264]
  8165. mov rcx, QWORD PTR [rsp+256]
  8166. call sp_2048_sqr_avx2_16
  8167. IFDEF _WIN64
  8168. mov rdx, QWORD PTR [rsp+264]
  8169. mov rcx, QWORD PTR [rsp+256]
  8170. ENDIF
  8171. mov rdx, QWORD PTR [rsp+256]
  8172. lea r10, QWORD PTR [rsp+128]
  8173. add rdx, 384
  8174. mov r9, 0
  8175. mov r8, QWORD PTR [r10+-128]
  8176. sub r8, QWORD PTR [rdx+-128]
  8177. mov rax, QWORD PTR [r10+-120]
  8178. mov QWORD PTR [r10+-128], r8
  8179. sbb rax, QWORD PTR [rdx+-120]
  8180. mov r8, QWORD PTR [r10+-112]
  8181. mov QWORD PTR [r10+-120], rax
  8182. sbb r8, QWORD PTR [rdx+-112]
  8183. mov rax, QWORD PTR [r10+-104]
  8184. mov QWORD PTR [r10+-112], r8
  8185. sbb rax, QWORD PTR [rdx+-104]
  8186. mov r8, QWORD PTR [r10+-96]
  8187. mov QWORD PTR [r10+-104], rax
  8188. sbb r8, QWORD PTR [rdx+-96]
  8189. mov rax, QWORD PTR [r10+-88]
  8190. mov QWORD PTR [r10+-96], r8
  8191. sbb rax, QWORD PTR [rdx+-88]
  8192. mov r8, QWORD PTR [r10+-80]
  8193. mov QWORD PTR [r10+-88], rax
  8194. sbb r8, QWORD PTR [rdx+-80]
  8195. mov rax, QWORD PTR [r10+-72]
  8196. mov QWORD PTR [r10+-80], r8
  8197. sbb rax, QWORD PTR [rdx+-72]
  8198. mov r8, QWORD PTR [r10+-64]
  8199. mov QWORD PTR [r10+-72], rax
  8200. sbb r8, QWORD PTR [rdx+-64]
  8201. mov rax, QWORD PTR [r10+-56]
  8202. mov QWORD PTR [r10+-64], r8
  8203. sbb rax, QWORD PTR [rdx+-56]
  8204. mov r8, QWORD PTR [r10+-48]
  8205. mov QWORD PTR [r10+-56], rax
  8206. sbb r8, QWORD PTR [rdx+-48]
  8207. mov rax, QWORD PTR [r10+-40]
  8208. mov QWORD PTR [r10+-48], r8
  8209. sbb rax, QWORD PTR [rdx+-40]
  8210. mov r8, QWORD PTR [r10+-32]
  8211. mov QWORD PTR [r10+-40], rax
  8212. sbb r8, QWORD PTR [rdx+-32]
  8213. mov rax, QWORD PTR [r10+-24]
  8214. mov QWORD PTR [r10+-32], r8
  8215. sbb rax, QWORD PTR [rdx+-24]
  8216. mov r8, QWORD PTR [r10+-16]
  8217. mov QWORD PTR [r10+-24], rax
  8218. sbb r8, QWORD PTR [rdx+-16]
  8219. mov rax, QWORD PTR [r10+-8]
  8220. mov QWORD PTR [r10+-16], r8
  8221. sbb rax, QWORD PTR [rdx+-8]
  8222. mov r8, QWORD PTR [r10]
  8223. mov QWORD PTR [r10+-8], rax
  8224. sbb r8, QWORD PTR [rdx]
  8225. mov rax, QWORD PTR [r10+8]
  8226. mov QWORD PTR [r10], r8
  8227. sbb rax, QWORD PTR [rdx+8]
  8228. mov r8, QWORD PTR [r10+16]
  8229. mov QWORD PTR [r10+8], rax
  8230. sbb r8, QWORD PTR [rdx+16]
  8231. mov rax, QWORD PTR [r10+24]
  8232. mov QWORD PTR [r10+16], r8
  8233. sbb rax, QWORD PTR [rdx+24]
  8234. mov r8, QWORD PTR [r10+32]
  8235. mov QWORD PTR [r10+24], rax
  8236. sbb r8, QWORD PTR [rdx+32]
  8237. mov rax, QWORD PTR [r10+40]
  8238. mov QWORD PTR [r10+32], r8
  8239. sbb rax, QWORD PTR [rdx+40]
  8240. mov r8, QWORD PTR [r10+48]
  8241. mov QWORD PTR [r10+40], rax
  8242. sbb r8, QWORD PTR [rdx+48]
  8243. mov rax, QWORD PTR [r10+56]
  8244. mov QWORD PTR [r10+48], r8
  8245. sbb rax, QWORD PTR [rdx+56]
  8246. mov r8, QWORD PTR [r10+64]
  8247. mov QWORD PTR [r10+56], rax
  8248. sbb r8, QWORD PTR [rdx+64]
  8249. mov rax, QWORD PTR [r10+72]
  8250. mov QWORD PTR [r10+64], r8
  8251. sbb rax, QWORD PTR [rdx+72]
  8252. mov r8, QWORD PTR [r10+80]
  8253. mov QWORD PTR [r10+72], rax
  8254. sbb r8, QWORD PTR [rdx+80]
  8255. mov rax, QWORD PTR [r10+88]
  8256. mov QWORD PTR [r10+80], r8
  8257. sbb rax, QWORD PTR [rdx+88]
  8258. mov r8, QWORD PTR [r10+96]
  8259. mov QWORD PTR [r10+88], rax
  8260. sbb r8, QWORD PTR [rdx+96]
  8261. mov rax, QWORD PTR [r10+104]
  8262. mov QWORD PTR [r10+96], r8
  8263. sbb rax, QWORD PTR [rdx+104]
  8264. mov r8, QWORD PTR [r10+112]
  8265. mov QWORD PTR [r10+104], rax
  8266. sbb r8, QWORD PTR [rdx+112]
  8267. mov rax, QWORD PTR [r10+120]
  8268. mov QWORD PTR [r10+112], r8
  8269. sbb rax, QWORD PTR [rdx+120]
  8270. mov QWORD PTR [r10+120], rax
  8271. sbb r9, 0
  8272. sub rdx, 256
  8273. mov r8, QWORD PTR [r10+-128]
  8274. sub r8, QWORD PTR [rdx+-128]
  8275. mov rax, QWORD PTR [r10+-120]
  8276. mov QWORD PTR [r10+-128], r8
  8277. sbb rax, QWORD PTR [rdx+-120]
  8278. mov r8, QWORD PTR [r10+-112]
  8279. mov QWORD PTR [r10+-120], rax
  8280. sbb r8, QWORD PTR [rdx+-112]
  8281. mov rax, QWORD PTR [r10+-104]
  8282. mov QWORD PTR [r10+-112], r8
  8283. sbb rax, QWORD PTR [rdx+-104]
  8284. mov r8, QWORD PTR [r10+-96]
  8285. mov QWORD PTR [r10+-104], rax
  8286. sbb r8, QWORD PTR [rdx+-96]
  8287. mov rax, QWORD PTR [r10+-88]
  8288. mov QWORD PTR [r10+-96], r8
  8289. sbb rax, QWORD PTR [rdx+-88]
  8290. mov r8, QWORD PTR [r10+-80]
  8291. mov QWORD PTR [r10+-88], rax
  8292. sbb r8, QWORD PTR [rdx+-80]
  8293. mov rax, QWORD PTR [r10+-72]
  8294. mov QWORD PTR [r10+-80], r8
  8295. sbb rax, QWORD PTR [rdx+-72]
  8296. mov r8, QWORD PTR [r10+-64]
  8297. mov QWORD PTR [r10+-72], rax
  8298. sbb r8, QWORD PTR [rdx+-64]
  8299. mov rax, QWORD PTR [r10+-56]
  8300. mov QWORD PTR [r10+-64], r8
  8301. sbb rax, QWORD PTR [rdx+-56]
  8302. mov r8, QWORD PTR [r10+-48]
  8303. mov QWORD PTR [r10+-56], rax
  8304. sbb r8, QWORD PTR [rdx+-48]
  8305. mov rax, QWORD PTR [r10+-40]
  8306. mov QWORD PTR [r10+-48], r8
  8307. sbb rax, QWORD PTR [rdx+-40]
  8308. mov r8, QWORD PTR [r10+-32]
  8309. mov QWORD PTR [r10+-40], rax
  8310. sbb r8, QWORD PTR [rdx+-32]
  8311. mov rax, QWORD PTR [r10+-24]
  8312. mov QWORD PTR [r10+-32], r8
  8313. sbb rax, QWORD PTR [rdx+-24]
  8314. mov r8, QWORD PTR [r10+-16]
  8315. mov QWORD PTR [r10+-24], rax
  8316. sbb r8, QWORD PTR [rdx+-16]
  8317. mov rax, QWORD PTR [r10+-8]
  8318. mov QWORD PTR [r10+-16], r8
  8319. sbb rax, QWORD PTR [rdx+-8]
  8320. mov r8, QWORD PTR [r10]
  8321. mov QWORD PTR [r10+-8], rax
  8322. sbb r8, QWORD PTR [rdx]
  8323. mov rax, QWORD PTR [r10+8]
  8324. mov QWORD PTR [r10], r8
  8325. sbb rax, QWORD PTR [rdx+8]
  8326. mov r8, QWORD PTR [r10+16]
  8327. mov QWORD PTR [r10+8], rax
  8328. sbb r8, QWORD PTR [rdx+16]
  8329. mov rax, QWORD PTR [r10+24]
  8330. mov QWORD PTR [r10+16], r8
  8331. sbb rax, QWORD PTR [rdx+24]
  8332. mov r8, QWORD PTR [r10+32]
  8333. mov QWORD PTR [r10+24], rax
  8334. sbb r8, QWORD PTR [rdx+32]
  8335. mov rax, QWORD PTR [r10+40]
  8336. mov QWORD PTR [r10+32], r8
  8337. sbb rax, QWORD PTR [rdx+40]
  8338. mov r8, QWORD PTR [r10+48]
  8339. mov QWORD PTR [r10+40], rax
  8340. sbb r8, QWORD PTR [rdx+48]
  8341. mov rax, QWORD PTR [r10+56]
  8342. mov QWORD PTR [r10+48], r8
  8343. sbb rax, QWORD PTR [rdx+56]
  8344. mov r8, QWORD PTR [r10+64]
  8345. mov QWORD PTR [r10+56], rax
  8346. sbb r8, QWORD PTR [rdx+64]
  8347. mov rax, QWORD PTR [r10+72]
  8348. mov QWORD PTR [r10+64], r8
  8349. sbb rax, QWORD PTR [rdx+72]
  8350. mov r8, QWORD PTR [r10+80]
  8351. mov QWORD PTR [r10+72], rax
  8352. sbb r8, QWORD PTR [rdx+80]
  8353. mov rax, QWORD PTR [r10+88]
  8354. mov QWORD PTR [r10+80], r8
  8355. sbb rax, QWORD PTR [rdx+88]
  8356. mov r8, QWORD PTR [r10+96]
  8357. mov QWORD PTR [r10+88], rax
  8358. sbb r8, QWORD PTR [rdx+96]
  8359. mov rax, QWORD PTR [r10+104]
  8360. mov QWORD PTR [r10+96], r8
  8361. sbb rax, QWORD PTR [rdx+104]
  8362. mov r8, QWORD PTR [r10+112]
  8363. mov QWORD PTR [r10+104], rax
  8364. sbb r8, QWORD PTR [rdx+112]
  8365. mov rax, QWORD PTR [r10+120]
  8366. mov QWORD PTR [r10+112], r8
  8367. sbb rax, QWORD PTR [rdx+120]
  8368. mov QWORD PTR [r10+120], rax
  8369. sbb r9, 0
  8370. mov rcx, QWORD PTR [rsp+256]
  8371. neg r9
  8372. add rcx, 256
  8373. mov r8, QWORD PTR [rcx+-128]
  8374. sub r8, QWORD PTR [r10+-128]
  8375. mov rax, QWORD PTR [rcx+-120]
  8376. mov QWORD PTR [rcx+-128], r8
  8377. sbb rax, QWORD PTR [r10+-120]
  8378. mov r8, QWORD PTR [rcx+-112]
  8379. mov QWORD PTR [rcx+-120], rax
  8380. sbb r8, QWORD PTR [r10+-112]
  8381. mov rax, QWORD PTR [rcx+-104]
  8382. mov QWORD PTR [rcx+-112], r8
  8383. sbb rax, QWORD PTR [r10+-104]
  8384. mov r8, QWORD PTR [rcx+-96]
  8385. mov QWORD PTR [rcx+-104], rax
  8386. sbb r8, QWORD PTR [r10+-96]
  8387. mov rax, QWORD PTR [rcx+-88]
  8388. mov QWORD PTR [rcx+-96], r8
  8389. sbb rax, QWORD PTR [r10+-88]
  8390. mov r8, QWORD PTR [rcx+-80]
  8391. mov QWORD PTR [rcx+-88], rax
  8392. sbb r8, QWORD PTR [r10+-80]
  8393. mov rax, QWORD PTR [rcx+-72]
  8394. mov QWORD PTR [rcx+-80], r8
  8395. sbb rax, QWORD PTR [r10+-72]
  8396. mov r8, QWORD PTR [rcx+-64]
  8397. mov QWORD PTR [rcx+-72], rax
  8398. sbb r8, QWORD PTR [r10+-64]
  8399. mov rax, QWORD PTR [rcx+-56]
  8400. mov QWORD PTR [rcx+-64], r8
  8401. sbb rax, QWORD PTR [r10+-56]
  8402. mov r8, QWORD PTR [rcx+-48]
  8403. mov QWORD PTR [rcx+-56], rax
  8404. sbb r8, QWORD PTR [r10+-48]
  8405. mov rax, QWORD PTR [rcx+-40]
  8406. mov QWORD PTR [rcx+-48], r8
  8407. sbb rax, QWORD PTR [r10+-40]
  8408. mov r8, QWORD PTR [rcx+-32]
  8409. mov QWORD PTR [rcx+-40], rax
  8410. sbb r8, QWORD PTR [r10+-32]
  8411. mov rax, QWORD PTR [rcx+-24]
  8412. mov QWORD PTR [rcx+-32], r8
  8413. sbb rax, QWORD PTR [r10+-24]
  8414. mov r8, QWORD PTR [rcx+-16]
  8415. mov QWORD PTR [rcx+-24], rax
  8416. sbb r8, QWORD PTR [r10+-16]
  8417. mov rax, QWORD PTR [rcx+-8]
  8418. mov QWORD PTR [rcx+-16], r8
  8419. sbb rax, QWORD PTR [r10+-8]
  8420. mov r8, QWORD PTR [rcx]
  8421. mov QWORD PTR [rcx+-8], rax
  8422. sbb r8, QWORD PTR [r10]
  8423. mov rax, QWORD PTR [rcx+8]
  8424. mov QWORD PTR [rcx], r8
  8425. sbb rax, QWORD PTR [r10+8]
  8426. mov r8, QWORD PTR [rcx+16]
  8427. mov QWORD PTR [rcx+8], rax
  8428. sbb r8, QWORD PTR [r10+16]
  8429. mov rax, QWORD PTR [rcx+24]
  8430. mov QWORD PTR [rcx+16], r8
  8431. sbb rax, QWORD PTR [r10+24]
  8432. mov r8, QWORD PTR [rcx+32]
  8433. mov QWORD PTR [rcx+24], rax
  8434. sbb r8, QWORD PTR [r10+32]
  8435. mov rax, QWORD PTR [rcx+40]
  8436. mov QWORD PTR [rcx+32], r8
  8437. sbb rax, QWORD PTR [r10+40]
  8438. mov r8, QWORD PTR [rcx+48]
  8439. mov QWORD PTR [rcx+40], rax
  8440. sbb r8, QWORD PTR [r10+48]
  8441. mov rax, QWORD PTR [rcx+56]
  8442. mov QWORD PTR [rcx+48], r8
  8443. sbb rax, QWORD PTR [r10+56]
  8444. mov r8, QWORD PTR [rcx+64]
  8445. mov QWORD PTR [rcx+56], rax
  8446. sbb r8, QWORD PTR [r10+64]
  8447. mov rax, QWORD PTR [rcx+72]
  8448. mov QWORD PTR [rcx+64], r8
  8449. sbb rax, QWORD PTR [r10+72]
  8450. mov r8, QWORD PTR [rcx+80]
  8451. mov QWORD PTR [rcx+72], rax
  8452. sbb r8, QWORD PTR [r10+80]
  8453. mov rax, QWORD PTR [rcx+88]
  8454. mov QWORD PTR [rcx+80], r8
  8455. sbb rax, QWORD PTR [r10+88]
  8456. mov r8, QWORD PTR [rcx+96]
  8457. mov QWORD PTR [rcx+88], rax
  8458. sbb r8, QWORD PTR [r10+96]
  8459. mov rax, QWORD PTR [rcx+104]
  8460. mov QWORD PTR [rcx+96], r8
  8461. sbb rax, QWORD PTR [r10+104]
  8462. mov r8, QWORD PTR [rcx+112]
  8463. mov QWORD PTR [rcx+104], rax
  8464. sbb r8, QWORD PTR [r10+112]
  8465. mov rax, QWORD PTR [rcx+120]
  8466. mov QWORD PTR [rcx+112], r8
  8467. sbb rax, QWORD PTR [r10+120]
  8468. mov QWORD PTR [rcx+120], rax
  8469. sbb r9, 0
  8470. mov rcx, QWORD PTR [rsp+256]
  8471. add rcx, 384
  8472. ; Add in word
  8473. mov r8, QWORD PTR [rcx]
  8474. add r8, r9
  8475. mov rax, QWORD PTR [rcx+8]
  8476. mov QWORD PTR [rcx], r8
  8477. adc rax, 0
  8478. mov r8, QWORD PTR [rcx+16]
  8479. mov QWORD PTR [rcx+8], rax
  8480. adc r8, 0
  8481. mov rax, QWORD PTR [rcx+24]
  8482. mov QWORD PTR [rcx+16], r8
  8483. adc rax, 0
  8484. mov r8, QWORD PTR [rcx+32]
  8485. mov QWORD PTR [rcx+24], rax
  8486. adc r8, 0
  8487. mov rax, QWORD PTR [rcx+40]
  8488. mov QWORD PTR [rcx+32], r8
  8489. adc rax, 0
  8490. mov r8, QWORD PTR [rcx+48]
  8491. mov QWORD PTR [rcx+40], rax
  8492. adc r8, 0
  8493. mov rax, QWORD PTR [rcx+56]
  8494. mov QWORD PTR [rcx+48], r8
  8495. adc rax, 0
  8496. mov r8, QWORD PTR [rcx+64]
  8497. mov QWORD PTR [rcx+56], rax
  8498. adc r8, 0
  8499. mov rax, QWORD PTR [rcx+72]
  8500. mov QWORD PTR [rcx+64], r8
  8501. adc rax, 0
  8502. mov r8, QWORD PTR [rcx+80]
  8503. mov QWORD PTR [rcx+72], rax
  8504. adc r8, 0
  8505. mov rax, QWORD PTR [rcx+88]
  8506. mov QWORD PTR [rcx+80], r8
  8507. adc rax, 0
  8508. mov r8, QWORD PTR [rcx+96]
  8509. mov QWORD PTR [rcx+88], rax
  8510. adc r8, 0
  8511. mov rax, QWORD PTR [rcx+104]
  8512. mov QWORD PTR [rcx+96], r8
  8513. adc rax, 0
  8514. mov r8, QWORD PTR [rcx+112]
  8515. mov QWORD PTR [rcx+104], rax
  8516. adc r8, 0
  8517. mov rax, QWORD PTR [rcx+120]
  8518. mov QWORD PTR [rcx+112], r8
  8519. adc rax, 0
  8520. mov QWORD PTR [rcx+120], rax
  8521. mov rdx, QWORD PTR [rsp+264]
  8522. mov rcx, QWORD PTR [rsp+256]
  8523. add rsp, 272
  8524. ret
  8525. sp_2048_sqr_avx2_32 ENDP
  8526. _text ENDS
  8527. ENDIF
  8528. ; /* Sub b from a into a. (a -= b)
  8529. ; *
  8530. ; * a A single precision integer and result.
  8531. ; * b A single precision integer.
  8532. ; */
  8533. _text SEGMENT READONLY PARA
  8534. sp_2048_sub_in_place_16 PROC
  8535. mov r8, QWORD PTR [rcx]
  8536. sub r8, QWORD PTR [rdx]
  8537. mov r9, QWORD PTR [rcx+8]
  8538. mov QWORD PTR [rcx], r8
  8539. sbb r9, QWORD PTR [rdx+8]
  8540. mov r8, QWORD PTR [rcx+16]
  8541. mov QWORD PTR [rcx+8], r9
  8542. sbb r8, QWORD PTR [rdx+16]
  8543. mov r9, QWORD PTR [rcx+24]
  8544. mov QWORD PTR [rcx+16], r8
  8545. sbb r9, QWORD PTR [rdx+24]
  8546. mov r8, QWORD PTR [rcx+32]
  8547. mov QWORD PTR [rcx+24], r9
  8548. sbb r8, QWORD PTR [rdx+32]
  8549. mov r9, QWORD PTR [rcx+40]
  8550. mov QWORD PTR [rcx+32], r8
  8551. sbb r9, QWORD PTR [rdx+40]
  8552. mov r8, QWORD PTR [rcx+48]
  8553. mov QWORD PTR [rcx+40], r9
  8554. sbb r8, QWORD PTR [rdx+48]
  8555. mov r9, QWORD PTR [rcx+56]
  8556. mov QWORD PTR [rcx+48], r8
  8557. sbb r9, QWORD PTR [rdx+56]
  8558. mov r8, QWORD PTR [rcx+64]
  8559. mov QWORD PTR [rcx+56], r9
  8560. sbb r8, QWORD PTR [rdx+64]
  8561. mov r9, QWORD PTR [rcx+72]
  8562. mov QWORD PTR [rcx+64], r8
  8563. sbb r9, QWORD PTR [rdx+72]
  8564. mov r8, QWORD PTR [rcx+80]
  8565. mov QWORD PTR [rcx+72], r9
  8566. sbb r8, QWORD PTR [rdx+80]
  8567. mov r9, QWORD PTR [rcx+88]
  8568. mov QWORD PTR [rcx+80], r8
  8569. sbb r9, QWORD PTR [rdx+88]
  8570. mov r8, QWORD PTR [rcx+96]
  8571. mov QWORD PTR [rcx+88], r9
  8572. sbb r8, QWORD PTR [rdx+96]
  8573. mov r9, QWORD PTR [rcx+104]
  8574. mov QWORD PTR [rcx+96], r8
  8575. sbb r9, QWORD PTR [rdx+104]
  8576. mov r8, QWORD PTR [rcx+112]
  8577. mov QWORD PTR [rcx+104], r9
  8578. sbb r8, QWORD PTR [rdx+112]
  8579. mov r9, QWORD PTR [rcx+120]
  8580. mov QWORD PTR [rcx+112], r8
  8581. sbb r9, QWORD PTR [rdx+120]
  8582. mov QWORD PTR [rcx+120], r9
  8583. sbb rax, rax
  8584. ret
  8585. sp_2048_sub_in_place_16 ENDP
  8586. _text ENDS
  8587. ; /* Mul a by digit b into r. (r = a * b)
  8588. ; *
  8589. ; * r A single precision integer.
  8590. ; * a A single precision integer.
  8591. ; * b A single precision digit.
  8592. ; */
  8593. _text SEGMENT READONLY PARA
  8594. sp_2048_mul_d_32 PROC
  8595. push r12
  8596. mov r9, rdx
  8597. ; A[0] * B
  8598. mov rax, r8
  8599. xor r12, r12
  8600. mul QWORD PTR [r9]
  8601. mov r10, rax
  8602. mov r11, rdx
  8603. mov QWORD PTR [rcx], r10
  8604. ; A[1] * B
  8605. mov rax, r8
  8606. xor r10, r10
  8607. mul QWORD PTR [r9+8]
  8608. add r11, rax
  8609. mov QWORD PTR [rcx+8], r11
  8610. adc r12, rdx
  8611. adc r10, 0
  8612. ; A[2] * B
  8613. mov rax, r8
  8614. xor r11, r11
  8615. mul QWORD PTR [r9+16]
  8616. add r12, rax
  8617. mov QWORD PTR [rcx+16], r12
  8618. adc r10, rdx
  8619. adc r11, 0
  8620. ; A[3] * B
  8621. mov rax, r8
  8622. xor r12, r12
  8623. mul QWORD PTR [r9+24]
  8624. add r10, rax
  8625. mov QWORD PTR [rcx+24], r10
  8626. adc r11, rdx
  8627. adc r12, 0
  8628. ; A[4] * B
  8629. mov rax, r8
  8630. xor r10, r10
  8631. mul QWORD PTR [r9+32]
  8632. add r11, rax
  8633. mov QWORD PTR [rcx+32], r11
  8634. adc r12, rdx
  8635. adc r10, 0
  8636. ; A[5] * B
  8637. mov rax, r8
  8638. xor r11, r11
  8639. mul QWORD PTR [r9+40]
  8640. add r12, rax
  8641. mov QWORD PTR [rcx+40], r12
  8642. adc r10, rdx
  8643. adc r11, 0
  8644. ; A[6] * B
  8645. mov rax, r8
  8646. xor r12, r12
  8647. mul QWORD PTR [r9+48]
  8648. add r10, rax
  8649. mov QWORD PTR [rcx+48], r10
  8650. adc r11, rdx
  8651. adc r12, 0
  8652. ; A[7] * B
  8653. mov rax, r8
  8654. xor r10, r10
  8655. mul QWORD PTR [r9+56]
  8656. add r11, rax
  8657. mov QWORD PTR [rcx+56], r11
  8658. adc r12, rdx
  8659. adc r10, 0
  8660. ; A[8] * B
  8661. mov rax, r8
  8662. xor r11, r11
  8663. mul QWORD PTR [r9+64]
  8664. add r12, rax
  8665. mov QWORD PTR [rcx+64], r12
  8666. adc r10, rdx
  8667. adc r11, 0
  8668. ; A[9] * B
  8669. mov rax, r8
  8670. xor r12, r12
  8671. mul QWORD PTR [r9+72]
  8672. add r10, rax
  8673. mov QWORD PTR [rcx+72], r10
  8674. adc r11, rdx
  8675. adc r12, 0
  8676. ; A[10] * B
  8677. mov rax, r8
  8678. xor r10, r10
  8679. mul QWORD PTR [r9+80]
  8680. add r11, rax
  8681. mov QWORD PTR [rcx+80], r11
  8682. adc r12, rdx
  8683. adc r10, 0
  8684. ; A[11] * B
  8685. mov rax, r8
  8686. xor r11, r11
  8687. mul QWORD PTR [r9+88]
  8688. add r12, rax
  8689. mov QWORD PTR [rcx+88], r12
  8690. adc r10, rdx
  8691. adc r11, 0
  8692. ; A[12] * B
  8693. mov rax, r8
  8694. xor r12, r12
  8695. mul QWORD PTR [r9+96]
  8696. add r10, rax
  8697. mov QWORD PTR [rcx+96], r10
  8698. adc r11, rdx
  8699. adc r12, 0
  8700. ; A[13] * B
  8701. mov rax, r8
  8702. xor r10, r10
  8703. mul QWORD PTR [r9+104]
  8704. add r11, rax
  8705. mov QWORD PTR [rcx+104], r11
  8706. adc r12, rdx
  8707. adc r10, 0
  8708. ; A[14] * B
  8709. mov rax, r8
  8710. xor r11, r11
  8711. mul QWORD PTR [r9+112]
  8712. add r12, rax
  8713. mov QWORD PTR [rcx+112], r12
  8714. adc r10, rdx
  8715. adc r11, 0
  8716. ; A[15] * B
  8717. mov rax, r8
  8718. xor r12, r12
  8719. mul QWORD PTR [r9+120]
  8720. add r10, rax
  8721. mov QWORD PTR [rcx+120], r10
  8722. adc r11, rdx
  8723. adc r12, 0
  8724. ; A[16] * B
  8725. mov rax, r8
  8726. xor r10, r10
  8727. mul QWORD PTR [r9+128]
  8728. add r11, rax
  8729. mov QWORD PTR [rcx+128], r11
  8730. adc r12, rdx
  8731. adc r10, 0
  8732. ; A[17] * B
  8733. mov rax, r8
  8734. xor r11, r11
  8735. mul QWORD PTR [r9+136]
  8736. add r12, rax
  8737. mov QWORD PTR [rcx+136], r12
  8738. adc r10, rdx
  8739. adc r11, 0
  8740. ; A[18] * B
  8741. mov rax, r8
  8742. xor r12, r12
  8743. mul QWORD PTR [r9+144]
  8744. add r10, rax
  8745. mov QWORD PTR [rcx+144], r10
  8746. adc r11, rdx
  8747. adc r12, 0
  8748. ; A[19] * B
  8749. mov rax, r8
  8750. xor r10, r10
  8751. mul QWORD PTR [r9+152]
  8752. add r11, rax
  8753. mov QWORD PTR [rcx+152], r11
  8754. adc r12, rdx
  8755. adc r10, 0
  8756. ; A[20] * B
  8757. mov rax, r8
  8758. xor r11, r11
  8759. mul QWORD PTR [r9+160]
  8760. add r12, rax
  8761. mov QWORD PTR [rcx+160], r12
  8762. adc r10, rdx
  8763. adc r11, 0
  8764. ; A[21] * B
  8765. mov rax, r8
  8766. xor r12, r12
  8767. mul QWORD PTR [r9+168]
  8768. add r10, rax
  8769. mov QWORD PTR [rcx+168], r10
  8770. adc r11, rdx
  8771. adc r12, 0
  8772. ; A[22] * B
  8773. mov rax, r8
  8774. xor r10, r10
  8775. mul QWORD PTR [r9+176]
  8776. add r11, rax
  8777. mov QWORD PTR [rcx+176], r11
  8778. adc r12, rdx
  8779. adc r10, 0
  8780. ; A[23] * B
  8781. mov rax, r8
  8782. xor r11, r11
  8783. mul QWORD PTR [r9+184]
  8784. add r12, rax
  8785. mov QWORD PTR [rcx+184], r12
  8786. adc r10, rdx
  8787. adc r11, 0
  8788. ; A[24] * B
  8789. mov rax, r8
  8790. xor r12, r12
  8791. mul QWORD PTR [r9+192]
  8792. add r10, rax
  8793. mov QWORD PTR [rcx+192], r10
  8794. adc r11, rdx
  8795. adc r12, 0
  8796. ; A[25] * B
  8797. mov rax, r8
  8798. xor r10, r10
  8799. mul QWORD PTR [r9+200]
  8800. add r11, rax
  8801. mov QWORD PTR [rcx+200], r11
  8802. adc r12, rdx
  8803. adc r10, 0
  8804. ; A[26] * B
  8805. mov rax, r8
  8806. xor r11, r11
  8807. mul QWORD PTR [r9+208]
  8808. add r12, rax
  8809. mov QWORD PTR [rcx+208], r12
  8810. adc r10, rdx
  8811. adc r11, 0
  8812. ; A[27] * B
  8813. mov rax, r8
  8814. xor r12, r12
  8815. mul QWORD PTR [r9+216]
  8816. add r10, rax
  8817. mov QWORD PTR [rcx+216], r10
  8818. adc r11, rdx
  8819. adc r12, 0
  8820. ; A[28] * B
  8821. mov rax, r8
  8822. xor r10, r10
  8823. mul QWORD PTR [r9+224]
  8824. add r11, rax
  8825. mov QWORD PTR [rcx+224], r11
  8826. adc r12, rdx
  8827. adc r10, 0
  8828. ; A[29] * B
  8829. mov rax, r8
  8830. xor r11, r11
  8831. mul QWORD PTR [r9+232]
  8832. add r12, rax
  8833. mov QWORD PTR [rcx+232], r12
  8834. adc r10, rdx
  8835. adc r11, 0
  8836. ; A[30] * B
  8837. mov rax, r8
  8838. xor r12, r12
  8839. mul QWORD PTR [r9+240]
  8840. add r10, rax
  8841. mov QWORD PTR [rcx+240], r10
  8842. adc r11, rdx
  8843. adc r12, 0
  8844. ; A[31] * B
  8845. mov rax, r8
  8846. mul QWORD PTR [r9+248]
  8847. add r11, rax
  8848. adc r12, rdx
  8849. mov QWORD PTR [rcx+248], r11
  8850. mov QWORD PTR [rcx+256], r12
  8851. pop r12
  8852. ret
  8853. sp_2048_mul_d_32 ENDP
  8854. _text ENDS
  8855. ; /* Conditionally subtract b from a using the mask m.
  8856. ; * m is -1 to subtract and 0 when not copying.
  8857. ; *
  8858. ; * r A single precision number representing condition subtract result.
  8859. ; * a A single precision number to subtract from.
  8860. ; * b A single precision number to subtract.
  8861. ; * m Mask value to apply.
  8862. ; */
  8863. _text SEGMENT READONLY PARA
  8864. sp_2048_cond_sub_16 PROC
  8865. sub rsp, 128
  8866. mov r10, QWORD PTR [r8]
  8867. mov r11, QWORD PTR [r8+8]
  8868. and r10, r9
  8869. and r11, r9
  8870. mov QWORD PTR [rsp], r10
  8871. mov QWORD PTR [rsp+8], r11
  8872. mov r10, QWORD PTR [r8+16]
  8873. mov r11, QWORD PTR [r8+24]
  8874. and r10, r9
  8875. and r11, r9
  8876. mov QWORD PTR [rsp+16], r10
  8877. mov QWORD PTR [rsp+24], r11
  8878. mov r10, QWORD PTR [r8+32]
  8879. mov r11, QWORD PTR [r8+40]
  8880. and r10, r9
  8881. and r11, r9
  8882. mov QWORD PTR [rsp+32], r10
  8883. mov QWORD PTR [rsp+40], r11
  8884. mov r10, QWORD PTR [r8+48]
  8885. mov r11, QWORD PTR [r8+56]
  8886. and r10, r9
  8887. and r11, r9
  8888. mov QWORD PTR [rsp+48], r10
  8889. mov QWORD PTR [rsp+56], r11
  8890. mov r10, QWORD PTR [r8+64]
  8891. mov r11, QWORD PTR [r8+72]
  8892. and r10, r9
  8893. and r11, r9
  8894. mov QWORD PTR [rsp+64], r10
  8895. mov QWORD PTR [rsp+72], r11
  8896. mov r10, QWORD PTR [r8+80]
  8897. mov r11, QWORD PTR [r8+88]
  8898. and r10, r9
  8899. and r11, r9
  8900. mov QWORD PTR [rsp+80], r10
  8901. mov QWORD PTR [rsp+88], r11
  8902. mov r10, QWORD PTR [r8+96]
  8903. mov r11, QWORD PTR [r8+104]
  8904. and r10, r9
  8905. and r11, r9
  8906. mov QWORD PTR [rsp+96], r10
  8907. mov QWORD PTR [rsp+104], r11
  8908. mov r10, QWORD PTR [r8+112]
  8909. mov r11, QWORD PTR [r8+120]
  8910. and r10, r9
  8911. and r11, r9
  8912. mov QWORD PTR [rsp+112], r10
  8913. mov QWORD PTR [rsp+120], r11
  8914. mov r10, QWORD PTR [rdx]
  8915. mov r8, QWORD PTR [rsp]
  8916. sub r10, r8
  8917. mov r11, QWORD PTR [rdx+8]
  8918. mov r8, QWORD PTR [rsp+8]
  8919. sbb r11, r8
  8920. mov QWORD PTR [rcx], r10
  8921. mov r10, QWORD PTR [rdx+16]
  8922. mov r8, QWORD PTR [rsp+16]
  8923. sbb r10, r8
  8924. mov QWORD PTR [rcx+8], r11
  8925. mov r11, QWORD PTR [rdx+24]
  8926. mov r8, QWORD PTR [rsp+24]
  8927. sbb r11, r8
  8928. mov QWORD PTR [rcx+16], r10
  8929. mov r10, QWORD PTR [rdx+32]
  8930. mov r8, QWORD PTR [rsp+32]
  8931. sbb r10, r8
  8932. mov QWORD PTR [rcx+24], r11
  8933. mov r11, QWORD PTR [rdx+40]
  8934. mov r8, QWORD PTR [rsp+40]
  8935. sbb r11, r8
  8936. mov QWORD PTR [rcx+32], r10
  8937. mov r10, QWORD PTR [rdx+48]
  8938. mov r8, QWORD PTR [rsp+48]
  8939. sbb r10, r8
  8940. mov QWORD PTR [rcx+40], r11
  8941. mov r11, QWORD PTR [rdx+56]
  8942. mov r8, QWORD PTR [rsp+56]
  8943. sbb r11, r8
  8944. mov QWORD PTR [rcx+48], r10
  8945. mov r10, QWORD PTR [rdx+64]
  8946. mov r8, QWORD PTR [rsp+64]
  8947. sbb r10, r8
  8948. mov QWORD PTR [rcx+56], r11
  8949. mov r11, QWORD PTR [rdx+72]
  8950. mov r8, QWORD PTR [rsp+72]
  8951. sbb r11, r8
  8952. mov QWORD PTR [rcx+64], r10
  8953. mov r10, QWORD PTR [rdx+80]
  8954. mov r8, QWORD PTR [rsp+80]
  8955. sbb r10, r8
  8956. mov QWORD PTR [rcx+72], r11
  8957. mov r11, QWORD PTR [rdx+88]
  8958. mov r8, QWORD PTR [rsp+88]
  8959. sbb r11, r8
  8960. mov QWORD PTR [rcx+80], r10
  8961. mov r10, QWORD PTR [rdx+96]
  8962. mov r8, QWORD PTR [rsp+96]
  8963. sbb r10, r8
  8964. mov QWORD PTR [rcx+88], r11
  8965. mov r11, QWORD PTR [rdx+104]
  8966. mov r8, QWORD PTR [rsp+104]
  8967. sbb r11, r8
  8968. mov QWORD PTR [rcx+96], r10
  8969. mov r10, QWORD PTR [rdx+112]
  8970. mov r8, QWORD PTR [rsp+112]
  8971. sbb r10, r8
  8972. mov QWORD PTR [rcx+104], r11
  8973. mov r11, QWORD PTR [rdx+120]
  8974. mov r8, QWORD PTR [rsp+120]
  8975. sbb r11, r8
  8976. mov QWORD PTR [rcx+112], r10
  8977. mov QWORD PTR [rcx+120], r11
  8978. sbb rax, rax
  8979. add rsp, 128
  8980. ret
  8981. sp_2048_cond_sub_16 ENDP
  8982. _text ENDS
  8983. ; /* Reduce the number back to 2048 bits using Montgomery reduction.
  8984. ; *
  8985. ; * a A single precision number to reduce in place.
  8986. ; * m The single precision number representing the modulus.
  8987. ; * mp The digit representing the negative inverse of m mod 2^n.
  8988. ; */
  8989. _text SEGMENT READONLY PARA
  8990. sp_2048_mont_reduce_16 PROC
  8991. push r12
  8992. push r13
  8993. push r14
  8994. push r15
  8995. push rdi
  8996. push rsi
  8997. mov r9, rdx
  8998. xor rsi, rsi
  8999. ; i = 16
  9000. mov r10, 16
  9001. mov r15, QWORD PTR [rcx]
  9002. mov rdi, QWORD PTR [rcx+8]
  9003. L_2048_mont_reduce_16_loop:
  9004. ; mu = a[i] * mp
  9005. mov r13, r15
  9006. imul r13, r8
  9007. ; a[i+0] += m[0] * mu
  9008. mov rax, r13
  9009. xor r12, r12
  9010. mul QWORD PTR [r9]
  9011. add r15, rax
  9012. adc r12, rdx
  9013. ; a[i+1] += m[1] * mu
  9014. mov rax, r13
  9015. xor r11, r11
  9016. mul QWORD PTR [r9+8]
  9017. mov r15, rdi
  9018. add r15, rax
  9019. adc r11, rdx
  9020. add r15, r12
  9021. adc r11, 0
  9022. ; a[i+2] += m[2] * mu
  9023. mov rax, r13
  9024. xor r12, r12
  9025. mul QWORD PTR [r9+16]
  9026. mov rdi, QWORD PTR [rcx+16]
  9027. add rdi, rax
  9028. adc r12, rdx
  9029. add rdi, r11
  9030. adc r12, 0
  9031. ; a[i+3] += m[3] * mu
  9032. mov rax, r13
  9033. xor r11, r11
  9034. mul QWORD PTR [r9+24]
  9035. mov r14, QWORD PTR [rcx+24]
  9036. add r14, rax
  9037. adc r11, rdx
  9038. add r14, r12
  9039. mov QWORD PTR [rcx+24], r14
  9040. adc r11, 0
  9041. ; a[i+4] += m[4] * mu
  9042. mov rax, r13
  9043. xor r12, r12
  9044. mul QWORD PTR [r9+32]
  9045. mov r14, QWORD PTR [rcx+32]
  9046. add r14, rax
  9047. adc r12, rdx
  9048. add r14, r11
  9049. mov QWORD PTR [rcx+32], r14
  9050. adc r12, 0
  9051. ; a[i+5] += m[5] * mu
  9052. mov rax, r13
  9053. xor r11, r11
  9054. mul QWORD PTR [r9+40]
  9055. mov r14, QWORD PTR [rcx+40]
  9056. add r14, rax
  9057. adc r11, rdx
  9058. add r14, r12
  9059. mov QWORD PTR [rcx+40], r14
  9060. adc r11, 0
  9061. ; a[i+6] += m[6] * mu
  9062. mov rax, r13
  9063. xor r12, r12
  9064. mul QWORD PTR [r9+48]
  9065. mov r14, QWORD PTR [rcx+48]
  9066. add r14, rax
  9067. adc r12, rdx
  9068. add r14, r11
  9069. mov QWORD PTR [rcx+48], r14
  9070. adc r12, 0
  9071. ; a[i+7] += m[7] * mu
  9072. mov rax, r13
  9073. xor r11, r11
  9074. mul QWORD PTR [r9+56]
  9075. mov r14, QWORD PTR [rcx+56]
  9076. add r14, rax
  9077. adc r11, rdx
  9078. add r14, r12
  9079. mov QWORD PTR [rcx+56], r14
  9080. adc r11, 0
  9081. ; a[i+8] += m[8] * mu
  9082. mov rax, r13
  9083. xor r12, r12
  9084. mul QWORD PTR [r9+64]
  9085. mov r14, QWORD PTR [rcx+64]
  9086. add r14, rax
  9087. adc r12, rdx
  9088. add r14, r11
  9089. mov QWORD PTR [rcx+64], r14
  9090. adc r12, 0
  9091. ; a[i+9] += m[9] * mu
  9092. mov rax, r13
  9093. xor r11, r11
  9094. mul QWORD PTR [r9+72]
  9095. mov r14, QWORD PTR [rcx+72]
  9096. add r14, rax
  9097. adc r11, rdx
  9098. add r14, r12
  9099. mov QWORD PTR [rcx+72], r14
  9100. adc r11, 0
  9101. ; a[i+10] += m[10] * mu
  9102. mov rax, r13
  9103. xor r12, r12
  9104. mul QWORD PTR [r9+80]
  9105. mov r14, QWORD PTR [rcx+80]
  9106. add r14, rax
  9107. adc r12, rdx
  9108. add r14, r11
  9109. mov QWORD PTR [rcx+80], r14
  9110. adc r12, 0
  9111. ; a[i+11] += m[11] * mu
  9112. mov rax, r13
  9113. xor r11, r11
  9114. mul QWORD PTR [r9+88]
  9115. mov r14, QWORD PTR [rcx+88]
  9116. add r14, rax
  9117. adc r11, rdx
  9118. add r14, r12
  9119. mov QWORD PTR [rcx+88], r14
  9120. adc r11, 0
  9121. ; a[i+12] += m[12] * mu
  9122. mov rax, r13
  9123. xor r12, r12
  9124. mul QWORD PTR [r9+96]
  9125. mov r14, QWORD PTR [rcx+96]
  9126. add r14, rax
  9127. adc r12, rdx
  9128. add r14, r11
  9129. mov QWORD PTR [rcx+96], r14
  9130. adc r12, 0
  9131. ; a[i+13] += m[13] * mu
  9132. mov rax, r13
  9133. xor r11, r11
  9134. mul QWORD PTR [r9+104]
  9135. mov r14, QWORD PTR [rcx+104]
  9136. add r14, rax
  9137. adc r11, rdx
  9138. add r14, r12
  9139. mov QWORD PTR [rcx+104], r14
  9140. adc r11, 0
  9141. ; a[i+14] += m[14] * mu
  9142. mov rax, r13
  9143. xor r12, r12
  9144. mul QWORD PTR [r9+112]
  9145. mov r14, QWORD PTR [rcx+112]
  9146. add r14, rax
  9147. adc r12, rdx
  9148. add r14, r11
  9149. mov QWORD PTR [rcx+112], r14
  9150. adc r12, 0
  9151. ; a[i+15] += m[15] * mu
  9152. mov rax, r13
  9153. mul QWORD PTR [r9+120]
  9154. mov r14, QWORD PTR [rcx+120]
  9155. add r12, rax
  9156. adc rdx, rsi
  9157. mov rsi, 0
  9158. adc rsi, 0
  9159. add r14, r12
  9160. mov QWORD PTR [rcx+120], r14
  9161. adc QWORD PTR [rcx+128], rdx
  9162. adc rsi, 0
  9163. ; i -= 1
  9164. add rcx, 8
  9165. dec r10
  9166. jnz L_2048_mont_reduce_16_loop
  9167. mov QWORD PTR [rcx], r15
  9168. mov QWORD PTR [rcx+8], rdi
  9169. neg rsi
  9170. IFDEF _WIN64
  9171. mov r8, r9
  9172. mov r9, rsi
  9173. ELSE
  9174. mov r9, rsi
  9175. mov r8, r9
  9176. ENDIF
  9177. mov rdx, rcx
  9178. mov rcx, rcx
  9179. sub rcx, 128
  9180. call sp_2048_cond_sub_16
  9181. pop rsi
  9182. pop rdi
  9183. pop r15
  9184. pop r14
  9185. pop r13
  9186. pop r12
  9187. ret
  9188. sp_2048_mont_reduce_16 ENDP
  9189. _text ENDS
  9190. IFDEF HAVE_INTEL_AVX2
  9191. ; /* Conditionally subtract b from a using the mask m.
  9192. ; * m is -1 to subtract and 0 when not copying.
  9193. ; *
  9194. ; * r A single precision number representing condition subtract result.
  9195. ; * a A single precision number to subtract from.
  9196. ; * b A single precision number to subtract.
  9197. ; * m Mask value to apply.
  9198. ; */
  9199. _text SEGMENT READONLY PARA
  9200. sp_2048_cond_sub_avx2_16 PROC
  9201. push r12
  9202. mov r12, QWORD PTR [r8]
  9203. mov r10, QWORD PTR [rdx]
  9204. pext r12, r12, r9
  9205. sub r10, r12
  9206. mov r12, QWORD PTR [r8+8]
  9207. mov r11, QWORD PTR [rdx+8]
  9208. pext r12, r12, r9
  9209. mov QWORD PTR [rcx], r10
  9210. sbb r11, r12
  9211. mov r10, QWORD PTR [r8+16]
  9212. mov r12, QWORD PTR [rdx+16]
  9213. pext r10, r10, r9
  9214. mov QWORD PTR [rcx+8], r11
  9215. sbb r12, r10
  9216. mov r11, QWORD PTR [r8+24]
  9217. mov r10, QWORD PTR [rdx+24]
  9218. pext r11, r11, r9
  9219. mov QWORD PTR [rcx+16], r12
  9220. sbb r10, r11
  9221. mov r12, QWORD PTR [r8+32]
  9222. mov r11, QWORD PTR [rdx+32]
  9223. pext r12, r12, r9
  9224. mov QWORD PTR [rcx+24], r10
  9225. sbb r11, r12
  9226. mov r10, QWORD PTR [r8+40]
  9227. mov r12, QWORD PTR [rdx+40]
  9228. pext r10, r10, r9
  9229. mov QWORD PTR [rcx+32], r11
  9230. sbb r12, r10
  9231. mov r11, QWORD PTR [r8+48]
  9232. mov r10, QWORD PTR [rdx+48]
  9233. pext r11, r11, r9
  9234. mov QWORD PTR [rcx+40], r12
  9235. sbb r10, r11
  9236. mov r12, QWORD PTR [r8+56]
  9237. mov r11, QWORD PTR [rdx+56]
  9238. pext r12, r12, r9
  9239. mov QWORD PTR [rcx+48], r10
  9240. sbb r11, r12
  9241. mov r10, QWORD PTR [r8+64]
  9242. mov r12, QWORD PTR [rdx+64]
  9243. pext r10, r10, r9
  9244. mov QWORD PTR [rcx+56], r11
  9245. sbb r12, r10
  9246. mov r11, QWORD PTR [r8+72]
  9247. mov r10, QWORD PTR [rdx+72]
  9248. pext r11, r11, r9
  9249. mov QWORD PTR [rcx+64], r12
  9250. sbb r10, r11
  9251. mov r12, QWORD PTR [r8+80]
  9252. mov r11, QWORD PTR [rdx+80]
  9253. pext r12, r12, r9
  9254. mov QWORD PTR [rcx+72], r10
  9255. sbb r11, r12
  9256. mov r10, QWORD PTR [r8+88]
  9257. mov r12, QWORD PTR [rdx+88]
  9258. pext r10, r10, r9
  9259. mov QWORD PTR [rcx+80], r11
  9260. sbb r12, r10
  9261. mov r11, QWORD PTR [r8+96]
  9262. mov r10, QWORD PTR [rdx+96]
  9263. pext r11, r11, r9
  9264. mov QWORD PTR [rcx+88], r12
  9265. sbb r10, r11
  9266. mov r12, QWORD PTR [r8+104]
  9267. mov r11, QWORD PTR [rdx+104]
  9268. pext r12, r12, r9
  9269. mov QWORD PTR [rcx+96], r10
  9270. sbb r11, r12
  9271. mov r10, QWORD PTR [r8+112]
  9272. mov r12, QWORD PTR [rdx+112]
  9273. pext r10, r10, r9
  9274. mov QWORD PTR [rcx+104], r11
  9275. sbb r12, r10
  9276. mov r11, QWORD PTR [r8+120]
  9277. mov r10, QWORD PTR [rdx+120]
  9278. pext r11, r11, r9
  9279. mov QWORD PTR [rcx+112], r12
  9280. sbb r10, r11
  9281. mov QWORD PTR [rcx+120], r10
  9282. sbb rax, rax
  9283. pop r12
  9284. ret
  9285. sp_2048_cond_sub_avx2_16 ENDP
  9286. _text ENDS
  9287. ENDIF
  9288. ; /* Mul a by digit b into r. (r = a * b)
  9289. ; *
  9290. ; * r A single precision integer.
  9291. ; * a A single precision integer.
  9292. ; * b A single precision digit.
  9293. ; */
  9294. _text SEGMENT READONLY PARA
  9295. sp_2048_mul_d_16 PROC
  9296. push r12
  9297. mov r9, rdx
  9298. ; A[0] * B
  9299. mov rax, r8
  9300. xor r12, r12
  9301. mul QWORD PTR [r9]
  9302. mov r10, rax
  9303. mov r11, rdx
  9304. mov QWORD PTR [rcx], r10
  9305. ; A[1] * B
  9306. mov rax, r8
  9307. xor r10, r10
  9308. mul QWORD PTR [r9+8]
  9309. add r11, rax
  9310. mov QWORD PTR [rcx+8], r11
  9311. adc r12, rdx
  9312. adc r10, 0
  9313. ; A[2] * B
  9314. mov rax, r8
  9315. xor r11, r11
  9316. mul QWORD PTR [r9+16]
  9317. add r12, rax
  9318. mov QWORD PTR [rcx+16], r12
  9319. adc r10, rdx
  9320. adc r11, 0
  9321. ; A[3] * B
  9322. mov rax, r8
  9323. xor r12, r12
  9324. mul QWORD PTR [r9+24]
  9325. add r10, rax
  9326. mov QWORD PTR [rcx+24], r10
  9327. adc r11, rdx
  9328. adc r12, 0
  9329. ; A[4] * B
  9330. mov rax, r8
  9331. xor r10, r10
  9332. mul QWORD PTR [r9+32]
  9333. add r11, rax
  9334. mov QWORD PTR [rcx+32], r11
  9335. adc r12, rdx
  9336. adc r10, 0
  9337. ; A[5] * B
  9338. mov rax, r8
  9339. xor r11, r11
  9340. mul QWORD PTR [r9+40]
  9341. add r12, rax
  9342. mov QWORD PTR [rcx+40], r12
  9343. adc r10, rdx
  9344. adc r11, 0
  9345. ; A[6] * B
  9346. mov rax, r8
  9347. xor r12, r12
  9348. mul QWORD PTR [r9+48]
  9349. add r10, rax
  9350. mov QWORD PTR [rcx+48], r10
  9351. adc r11, rdx
  9352. adc r12, 0
  9353. ; A[7] * B
  9354. mov rax, r8
  9355. xor r10, r10
  9356. mul QWORD PTR [r9+56]
  9357. add r11, rax
  9358. mov QWORD PTR [rcx+56], r11
  9359. adc r12, rdx
  9360. adc r10, 0
  9361. ; A[8] * B
  9362. mov rax, r8
  9363. xor r11, r11
  9364. mul QWORD PTR [r9+64]
  9365. add r12, rax
  9366. mov QWORD PTR [rcx+64], r12
  9367. adc r10, rdx
  9368. adc r11, 0
  9369. ; A[9] * B
  9370. mov rax, r8
  9371. xor r12, r12
  9372. mul QWORD PTR [r9+72]
  9373. add r10, rax
  9374. mov QWORD PTR [rcx+72], r10
  9375. adc r11, rdx
  9376. adc r12, 0
  9377. ; A[10] * B
  9378. mov rax, r8
  9379. xor r10, r10
  9380. mul QWORD PTR [r9+80]
  9381. add r11, rax
  9382. mov QWORD PTR [rcx+80], r11
  9383. adc r12, rdx
  9384. adc r10, 0
  9385. ; A[11] * B
  9386. mov rax, r8
  9387. xor r11, r11
  9388. mul QWORD PTR [r9+88]
  9389. add r12, rax
  9390. mov QWORD PTR [rcx+88], r12
  9391. adc r10, rdx
  9392. adc r11, 0
  9393. ; A[12] * B
  9394. mov rax, r8
  9395. xor r12, r12
  9396. mul QWORD PTR [r9+96]
  9397. add r10, rax
  9398. mov QWORD PTR [rcx+96], r10
  9399. adc r11, rdx
  9400. adc r12, 0
  9401. ; A[13] * B
  9402. mov rax, r8
  9403. xor r10, r10
  9404. mul QWORD PTR [r9+104]
  9405. add r11, rax
  9406. mov QWORD PTR [rcx+104], r11
  9407. adc r12, rdx
  9408. adc r10, 0
  9409. ; A[14] * B
  9410. mov rax, r8
  9411. xor r11, r11
  9412. mul QWORD PTR [r9+112]
  9413. add r12, rax
  9414. mov QWORD PTR [rcx+112], r12
  9415. adc r10, rdx
  9416. adc r11, 0
  9417. ; A[15] * B
  9418. mov rax, r8
  9419. mul QWORD PTR [r9+120]
  9420. add r10, rax
  9421. adc r11, rdx
  9422. mov QWORD PTR [rcx+120], r10
  9423. mov QWORD PTR [rcx+128], r11
  9424. pop r12
  9425. ret
  9426. sp_2048_mul_d_16 ENDP
  9427. _text ENDS
  9428. IFDEF HAVE_INTEL_AVX2
  9429. ; /* Mul a by digit b into r. (r = a * b)
  9430. ; *
  9431. ; * r A single precision integer.
  9432. ; * a A single precision integer.
  9433. ; * b A single precision digit.
  9434. ; */
  9435. _text SEGMENT READONLY PARA
  9436. sp_2048_mul_d_avx2_16 PROC
  9437. push r12
  9438. push r13
  9439. mov rax, rdx
  9440. ; A[0] * B
  9441. mov rdx, r8
  9442. xor r13, r13
  9443. mulx r12, r11, QWORD PTR [rax]
  9444. mov QWORD PTR [rcx], r11
  9445. ; A[1] * B
  9446. mulx r10, r9, QWORD PTR [rax+8]
  9447. mov r11, r13
  9448. adcx r12, r9
  9449. adox r11, r10
  9450. mov QWORD PTR [rcx+8], r12
  9451. ; A[2] * B
  9452. mulx r10, r9, QWORD PTR [rax+16]
  9453. mov r12, r13
  9454. adcx r11, r9
  9455. adox r12, r10
  9456. mov QWORD PTR [rcx+16], r11
  9457. ; A[3] * B
  9458. mulx r10, r9, QWORD PTR [rax+24]
  9459. mov r11, r13
  9460. adcx r12, r9
  9461. adox r11, r10
  9462. mov QWORD PTR [rcx+24], r12
  9463. ; A[4] * B
  9464. mulx r10, r9, QWORD PTR [rax+32]
  9465. mov r12, r13
  9466. adcx r11, r9
  9467. adox r12, r10
  9468. mov QWORD PTR [rcx+32], r11
  9469. ; A[5] * B
  9470. mulx r10, r9, QWORD PTR [rax+40]
  9471. mov r11, r13
  9472. adcx r12, r9
  9473. adox r11, r10
  9474. mov QWORD PTR [rcx+40], r12
  9475. ; A[6] * B
  9476. mulx r10, r9, QWORD PTR [rax+48]
  9477. mov r12, r13
  9478. adcx r11, r9
  9479. adox r12, r10
  9480. mov QWORD PTR [rcx+48], r11
  9481. ; A[7] * B
  9482. mulx r10, r9, QWORD PTR [rax+56]
  9483. mov r11, r13
  9484. adcx r12, r9
  9485. adox r11, r10
  9486. mov QWORD PTR [rcx+56], r12
  9487. ; A[8] * B
  9488. mulx r10, r9, QWORD PTR [rax+64]
  9489. mov r12, r13
  9490. adcx r11, r9
  9491. adox r12, r10
  9492. mov QWORD PTR [rcx+64], r11
  9493. ; A[9] * B
  9494. mulx r10, r9, QWORD PTR [rax+72]
  9495. mov r11, r13
  9496. adcx r12, r9
  9497. adox r11, r10
  9498. mov QWORD PTR [rcx+72], r12
  9499. ; A[10] * B
  9500. mulx r10, r9, QWORD PTR [rax+80]
  9501. mov r12, r13
  9502. adcx r11, r9
  9503. adox r12, r10
  9504. mov QWORD PTR [rcx+80], r11
  9505. ; A[11] * B
  9506. mulx r10, r9, QWORD PTR [rax+88]
  9507. mov r11, r13
  9508. adcx r12, r9
  9509. adox r11, r10
  9510. mov QWORD PTR [rcx+88], r12
  9511. ; A[12] * B
  9512. mulx r10, r9, QWORD PTR [rax+96]
  9513. mov r12, r13
  9514. adcx r11, r9
  9515. adox r12, r10
  9516. mov QWORD PTR [rcx+96], r11
  9517. ; A[13] * B
  9518. mulx r10, r9, QWORD PTR [rax+104]
  9519. mov r11, r13
  9520. adcx r12, r9
  9521. adox r11, r10
  9522. mov QWORD PTR [rcx+104], r12
  9523. ; A[14] * B
  9524. mulx r10, r9, QWORD PTR [rax+112]
  9525. mov r12, r13
  9526. adcx r11, r9
  9527. adox r12, r10
  9528. mov QWORD PTR [rcx+112], r11
  9529. ; A[15] * B
  9530. mulx r10, r9, QWORD PTR [rax+120]
  9531. mov r11, r13
  9532. adcx r12, r9
  9533. adox r11, r10
  9534. adcx r11, r13
  9535. mov QWORD PTR [rcx+120], r12
  9536. mov QWORD PTR [rcx+128], r11
  9537. pop r13
  9538. pop r12
  9539. ret
  9540. sp_2048_mul_d_avx2_16 ENDP
  9541. _text ENDS
  9542. ENDIF
  9543. IFDEF _WIN64
  9544. ; /* Divide the double width number (d1|d0) by the dividend. (d1|d0 / div)
  9545. ; *
  9546. ; * d1 The high order half of the number to divide.
  9547. ; * d0 The low order half of the number to divide.
  9548. ; * div The dividend.
  9549. ; * returns the result of the division.
  9550. ; */
  9551. _text SEGMENT READONLY PARA
  9552. div_2048_word_asm_16 PROC
  9553. mov r9, rdx
  9554. mov rax, r9
  9555. mov rdx, rcx
  9556. div r8
  9557. ret
  9558. div_2048_word_asm_16 ENDP
  9559. _text ENDS
  9560. ENDIF
  9561. ; /* Compare a with b in constant time.
  9562. ; *
  9563. ; * a A single precision integer.
  9564. ; * b A single precision integer.
  9565. ; * return -ve, 0 or +ve if a is less than, equal to or greater than b
  9566. ; * respectively.
  9567. ; */
  9568. _text SEGMENT READONLY PARA
  9569. sp_2048_cmp_16 PROC
  9570. push r12
  9571. xor r9, r9
  9572. mov r8, -1
  9573. mov rax, -1
  9574. mov r10, 1
  9575. mov r11, QWORD PTR [rcx+120]
  9576. mov r12, QWORD PTR [rdx+120]
  9577. and r11, r8
  9578. and r12, r8
  9579. sub r11, r12
  9580. cmova rax, r10
  9581. cmovc rax, r8
  9582. cmovnz r8, r9
  9583. mov r11, QWORD PTR [rcx+112]
  9584. mov r12, QWORD PTR [rdx+112]
  9585. and r11, r8
  9586. and r12, r8
  9587. sub r11, r12
  9588. cmova rax, r10
  9589. cmovc rax, r8
  9590. cmovnz r8, r9
  9591. mov r11, QWORD PTR [rcx+104]
  9592. mov r12, QWORD PTR [rdx+104]
  9593. and r11, r8
  9594. and r12, r8
  9595. sub r11, r12
  9596. cmova rax, r10
  9597. cmovc rax, r8
  9598. cmovnz r8, r9
  9599. mov r11, QWORD PTR [rcx+96]
  9600. mov r12, QWORD PTR [rdx+96]
  9601. and r11, r8
  9602. and r12, r8
  9603. sub r11, r12
  9604. cmova rax, r10
  9605. cmovc rax, r8
  9606. cmovnz r8, r9
  9607. mov r11, QWORD PTR [rcx+88]
  9608. mov r12, QWORD PTR [rdx+88]
  9609. and r11, r8
  9610. and r12, r8
  9611. sub r11, r12
  9612. cmova rax, r10
  9613. cmovc rax, r8
  9614. cmovnz r8, r9
  9615. mov r11, QWORD PTR [rcx+80]
  9616. mov r12, QWORD PTR [rdx+80]
  9617. and r11, r8
  9618. and r12, r8
  9619. sub r11, r12
  9620. cmova rax, r10
  9621. cmovc rax, r8
  9622. cmovnz r8, r9
  9623. mov r11, QWORD PTR [rcx+72]
  9624. mov r12, QWORD PTR [rdx+72]
  9625. and r11, r8
  9626. and r12, r8
  9627. sub r11, r12
  9628. cmova rax, r10
  9629. cmovc rax, r8
  9630. cmovnz r8, r9
  9631. mov r11, QWORD PTR [rcx+64]
  9632. mov r12, QWORD PTR [rdx+64]
  9633. and r11, r8
  9634. and r12, r8
  9635. sub r11, r12
  9636. cmova rax, r10
  9637. cmovc rax, r8
  9638. cmovnz r8, r9
  9639. mov r11, QWORD PTR [rcx+56]
  9640. mov r12, QWORD PTR [rdx+56]
  9641. and r11, r8
  9642. and r12, r8
  9643. sub r11, r12
  9644. cmova rax, r10
  9645. cmovc rax, r8
  9646. cmovnz r8, r9
  9647. mov r11, QWORD PTR [rcx+48]
  9648. mov r12, QWORD PTR [rdx+48]
  9649. and r11, r8
  9650. and r12, r8
  9651. sub r11, r12
  9652. cmova rax, r10
  9653. cmovc rax, r8
  9654. cmovnz r8, r9
  9655. mov r11, QWORD PTR [rcx+40]
  9656. mov r12, QWORD PTR [rdx+40]
  9657. and r11, r8
  9658. and r12, r8
  9659. sub r11, r12
  9660. cmova rax, r10
  9661. cmovc rax, r8
  9662. cmovnz r8, r9
  9663. mov r11, QWORD PTR [rcx+32]
  9664. mov r12, QWORD PTR [rdx+32]
  9665. and r11, r8
  9666. and r12, r8
  9667. sub r11, r12
  9668. cmova rax, r10
  9669. cmovc rax, r8
  9670. cmovnz r8, r9
  9671. mov r11, QWORD PTR [rcx+24]
  9672. mov r12, QWORD PTR [rdx+24]
  9673. and r11, r8
  9674. and r12, r8
  9675. sub r11, r12
  9676. cmova rax, r10
  9677. cmovc rax, r8
  9678. cmovnz r8, r9
  9679. mov r11, QWORD PTR [rcx+16]
  9680. mov r12, QWORD PTR [rdx+16]
  9681. and r11, r8
  9682. and r12, r8
  9683. sub r11, r12
  9684. cmova rax, r10
  9685. cmovc rax, r8
  9686. cmovnz r8, r9
  9687. mov r11, QWORD PTR [rcx+8]
  9688. mov r12, QWORD PTR [rdx+8]
  9689. and r11, r8
  9690. and r12, r8
  9691. sub r11, r12
  9692. cmova rax, r10
  9693. cmovc rax, r8
  9694. cmovnz r8, r9
  9695. mov r11, QWORD PTR [rcx]
  9696. mov r12, QWORD PTR [rdx]
  9697. and r11, r8
  9698. and r12, r8
  9699. sub r11, r12
  9700. cmova rax, r10
  9701. cmovc rax, r8
  9702. cmovnz r8, r9
  9703. xor rax, r8
  9704. pop r12
  9705. ret
  9706. sp_2048_cmp_16 ENDP
  9707. _text ENDS
  9708. IFNDEF WC_NO_CACHE_RESISTANT
  9709. _text SEGMENT READONLY PARA
  9710. sp_2048_get_from_table_16 PROC
  9711. sub rsp, 128
  9712. vmovdqu OWORD PTR [rsp], xmm6
  9713. vmovdqu OWORD PTR [rsp+16], xmm7
  9714. vmovdqu OWORD PTR [rsp+32], xmm8
  9715. vmovdqu OWORD PTR [rsp+48], xmm9
  9716. vmovdqu OWORD PTR [rsp+64], xmm10
  9717. vmovdqu OWORD PTR [rsp+80], xmm11
  9718. vmovdqu OWORD PTR [rsp+96], xmm12
  9719. vmovdqu OWORD PTR [rsp+112], xmm13
  9720. mov rax, 1
  9721. movd xmm10, r8
  9722. movd xmm11, rax
  9723. pxor xmm13, xmm13
  9724. pshufd xmm11, xmm11, 0
  9725. pshufd xmm10, xmm10, 0
  9726. ; START: 0-7
  9727. pxor xmm13, xmm13
  9728. pxor xmm4, xmm4
  9729. pxor xmm5, xmm5
  9730. pxor xmm6, xmm6
  9731. pxor xmm7, xmm7
  9732. ; ENTRY: 0
  9733. mov r9, QWORD PTR [rdx]
  9734. movdqu xmm12, xmm13
  9735. pcmpeqd xmm12, xmm10
  9736. movdqu xmm0, [r9]
  9737. movdqu xmm1, [r9+16]
  9738. movdqu xmm2, [r9+32]
  9739. movdqu xmm3, [r9+48]
  9740. pand xmm0, xmm12
  9741. pand xmm1, xmm12
  9742. pand xmm2, xmm12
  9743. pand xmm3, xmm12
  9744. por xmm4, xmm0
  9745. por xmm5, xmm1
  9746. por xmm6, xmm2
  9747. por xmm7, xmm3
  9748. paddd xmm13, xmm11
  9749. ; ENTRY: 1
  9750. mov r9, QWORD PTR [rdx+8]
  9751. movdqu xmm12, xmm13
  9752. pcmpeqd xmm12, xmm10
  9753. movdqu xmm0, [r9]
  9754. movdqu xmm1, [r9+16]
  9755. movdqu xmm2, [r9+32]
  9756. movdqu xmm3, [r9+48]
  9757. pand xmm0, xmm12
  9758. pand xmm1, xmm12
  9759. pand xmm2, xmm12
  9760. pand xmm3, xmm12
  9761. por xmm4, xmm0
  9762. por xmm5, xmm1
  9763. por xmm6, xmm2
  9764. por xmm7, xmm3
  9765. paddd xmm13, xmm11
  9766. ; ENTRY: 2
  9767. mov r9, QWORD PTR [rdx+16]
  9768. movdqu xmm12, xmm13
  9769. pcmpeqd xmm12, xmm10
  9770. movdqu xmm0, [r9]
  9771. movdqu xmm1, [r9+16]
  9772. movdqu xmm2, [r9+32]
  9773. movdqu xmm3, [r9+48]
  9774. pand xmm0, xmm12
  9775. pand xmm1, xmm12
  9776. pand xmm2, xmm12
  9777. pand xmm3, xmm12
  9778. por xmm4, xmm0
  9779. por xmm5, xmm1
  9780. por xmm6, xmm2
  9781. por xmm7, xmm3
  9782. paddd xmm13, xmm11
  9783. ; ENTRY: 3
  9784. mov r9, QWORD PTR [rdx+24]
  9785. movdqu xmm12, xmm13
  9786. pcmpeqd xmm12, xmm10
  9787. movdqu xmm0, [r9]
  9788. movdqu xmm1, [r9+16]
  9789. movdqu xmm2, [r9+32]
  9790. movdqu xmm3, [r9+48]
  9791. pand xmm0, xmm12
  9792. pand xmm1, xmm12
  9793. pand xmm2, xmm12
  9794. pand xmm3, xmm12
  9795. por xmm4, xmm0
  9796. por xmm5, xmm1
  9797. por xmm6, xmm2
  9798. por xmm7, xmm3
  9799. paddd xmm13, xmm11
  9800. ; ENTRY: 4
  9801. mov r9, QWORD PTR [rdx+32]
  9802. movdqu xmm12, xmm13
  9803. pcmpeqd xmm12, xmm10
  9804. movdqu xmm0, [r9]
  9805. movdqu xmm1, [r9+16]
  9806. movdqu xmm2, [r9+32]
  9807. movdqu xmm3, [r9+48]
  9808. pand xmm0, xmm12
  9809. pand xmm1, xmm12
  9810. pand xmm2, xmm12
  9811. pand xmm3, xmm12
  9812. por xmm4, xmm0
  9813. por xmm5, xmm1
  9814. por xmm6, xmm2
  9815. por xmm7, xmm3
  9816. paddd xmm13, xmm11
  9817. ; ENTRY: 5
  9818. mov r9, QWORD PTR [rdx+40]
  9819. movdqu xmm12, xmm13
  9820. pcmpeqd xmm12, xmm10
  9821. movdqu xmm0, [r9]
  9822. movdqu xmm1, [r9+16]
  9823. movdqu xmm2, [r9+32]
  9824. movdqu xmm3, [r9+48]
  9825. pand xmm0, xmm12
  9826. pand xmm1, xmm12
  9827. pand xmm2, xmm12
  9828. pand xmm3, xmm12
  9829. por xmm4, xmm0
  9830. por xmm5, xmm1
  9831. por xmm6, xmm2
  9832. por xmm7, xmm3
  9833. paddd xmm13, xmm11
  9834. ; ENTRY: 6
  9835. mov r9, QWORD PTR [rdx+48]
  9836. movdqu xmm12, xmm13
  9837. pcmpeqd xmm12, xmm10
  9838. movdqu xmm0, [r9]
  9839. movdqu xmm1, [r9+16]
  9840. movdqu xmm2, [r9+32]
  9841. movdqu xmm3, [r9+48]
  9842. pand xmm0, xmm12
  9843. pand xmm1, xmm12
  9844. pand xmm2, xmm12
  9845. pand xmm3, xmm12
  9846. por xmm4, xmm0
  9847. por xmm5, xmm1
  9848. por xmm6, xmm2
  9849. por xmm7, xmm3
  9850. paddd xmm13, xmm11
  9851. ; ENTRY: 7
  9852. mov r9, QWORD PTR [rdx+56]
  9853. movdqu xmm12, xmm13
  9854. pcmpeqd xmm12, xmm10
  9855. movdqu xmm0, [r9]
  9856. movdqu xmm1, [r9+16]
  9857. movdqu xmm2, [r9+32]
  9858. movdqu xmm3, [r9+48]
  9859. pand xmm0, xmm12
  9860. pand xmm1, xmm12
  9861. pand xmm2, xmm12
  9862. pand xmm3, xmm12
  9863. por xmm4, xmm0
  9864. por xmm5, xmm1
  9865. por xmm6, xmm2
  9866. por xmm7, xmm3
  9867. paddd xmm13, xmm11
  9868. ; ENTRY: 8
  9869. mov r9, QWORD PTR [rdx+64]
  9870. movdqu xmm12, xmm13
  9871. pcmpeqd xmm12, xmm10
  9872. movdqu xmm0, [r9]
  9873. movdqu xmm1, [r9+16]
  9874. movdqu xmm2, [r9+32]
  9875. movdqu xmm3, [r9+48]
  9876. pand xmm0, xmm12
  9877. pand xmm1, xmm12
  9878. pand xmm2, xmm12
  9879. pand xmm3, xmm12
  9880. por xmm4, xmm0
  9881. por xmm5, xmm1
  9882. por xmm6, xmm2
  9883. por xmm7, xmm3
  9884. paddd xmm13, xmm11
  9885. ; ENTRY: 9
  9886. mov r9, QWORD PTR [rdx+72]
  9887. movdqu xmm12, xmm13
  9888. pcmpeqd xmm12, xmm10
  9889. movdqu xmm0, [r9]
  9890. movdqu xmm1, [r9+16]
  9891. movdqu xmm2, [r9+32]
  9892. movdqu xmm3, [r9+48]
  9893. pand xmm0, xmm12
  9894. pand xmm1, xmm12
  9895. pand xmm2, xmm12
  9896. pand xmm3, xmm12
  9897. por xmm4, xmm0
  9898. por xmm5, xmm1
  9899. por xmm6, xmm2
  9900. por xmm7, xmm3
  9901. paddd xmm13, xmm11
  9902. ; ENTRY: 10
  9903. mov r9, QWORD PTR [rdx+80]
  9904. movdqu xmm12, xmm13
  9905. pcmpeqd xmm12, xmm10
  9906. movdqu xmm0, [r9]
  9907. movdqu xmm1, [r9+16]
  9908. movdqu xmm2, [r9+32]
  9909. movdqu xmm3, [r9+48]
  9910. pand xmm0, xmm12
  9911. pand xmm1, xmm12
  9912. pand xmm2, xmm12
  9913. pand xmm3, xmm12
  9914. por xmm4, xmm0
  9915. por xmm5, xmm1
  9916. por xmm6, xmm2
  9917. por xmm7, xmm3
  9918. paddd xmm13, xmm11
  9919. ; ENTRY: 11
  9920. mov r9, QWORD PTR [rdx+88]
  9921. movdqu xmm12, xmm13
  9922. pcmpeqd xmm12, xmm10
  9923. movdqu xmm0, [r9]
  9924. movdqu xmm1, [r9+16]
  9925. movdqu xmm2, [r9+32]
  9926. movdqu xmm3, [r9+48]
  9927. pand xmm0, xmm12
  9928. pand xmm1, xmm12
  9929. pand xmm2, xmm12
  9930. pand xmm3, xmm12
  9931. por xmm4, xmm0
  9932. por xmm5, xmm1
  9933. por xmm6, xmm2
  9934. por xmm7, xmm3
  9935. paddd xmm13, xmm11
  9936. ; ENTRY: 12
  9937. mov r9, QWORD PTR [rdx+96]
  9938. movdqu xmm12, xmm13
  9939. pcmpeqd xmm12, xmm10
  9940. movdqu xmm0, [r9]
  9941. movdqu xmm1, [r9+16]
  9942. movdqu xmm2, [r9+32]
  9943. movdqu xmm3, [r9+48]
  9944. pand xmm0, xmm12
  9945. pand xmm1, xmm12
  9946. pand xmm2, xmm12
  9947. pand xmm3, xmm12
  9948. por xmm4, xmm0
  9949. por xmm5, xmm1
  9950. por xmm6, xmm2
  9951. por xmm7, xmm3
  9952. paddd xmm13, xmm11
  9953. ; ENTRY: 13
  9954. mov r9, QWORD PTR [rdx+104]
  9955. movdqu xmm12, xmm13
  9956. pcmpeqd xmm12, xmm10
  9957. movdqu xmm0, [r9]
  9958. movdqu xmm1, [r9+16]
  9959. movdqu xmm2, [r9+32]
  9960. movdqu xmm3, [r9+48]
  9961. pand xmm0, xmm12
  9962. pand xmm1, xmm12
  9963. pand xmm2, xmm12
  9964. pand xmm3, xmm12
  9965. por xmm4, xmm0
  9966. por xmm5, xmm1
  9967. por xmm6, xmm2
  9968. por xmm7, xmm3
  9969. paddd xmm13, xmm11
  9970. ; ENTRY: 14
  9971. mov r9, QWORD PTR [rdx+112]
  9972. movdqu xmm12, xmm13
  9973. pcmpeqd xmm12, xmm10
  9974. movdqu xmm0, [r9]
  9975. movdqu xmm1, [r9+16]
  9976. movdqu xmm2, [r9+32]
  9977. movdqu xmm3, [r9+48]
  9978. pand xmm0, xmm12
  9979. pand xmm1, xmm12
  9980. pand xmm2, xmm12
  9981. pand xmm3, xmm12
  9982. por xmm4, xmm0
  9983. por xmm5, xmm1
  9984. por xmm6, xmm2
  9985. por xmm7, xmm3
  9986. paddd xmm13, xmm11
  9987. ; ENTRY: 15
  9988. mov r9, QWORD PTR [rdx+120]
  9989. movdqu xmm12, xmm13
  9990. pcmpeqd xmm12, xmm10
  9991. movdqu xmm0, [r9]
  9992. movdqu xmm1, [r9+16]
  9993. movdqu xmm2, [r9+32]
  9994. movdqu xmm3, [r9+48]
  9995. pand xmm0, xmm12
  9996. pand xmm1, xmm12
  9997. pand xmm2, xmm12
  9998. pand xmm3, xmm12
  9999. por xmm4, xmm0
  10000. por xmm5, xmm1
  10001. por xmm6, xmm2
  10002. por xmm7, xmm3
  10003. paddd xmm13, xmm11
  10004. ; ENTRY: 16
  10005. mov r9, QWORD PTR [rdx+128]
  10006. movdqu xmm12, xmm13
  10007. pcmpeqd xmm12, xmm10
  10008. movdqu xmm0, [r9]
  10009. movdqu xmm1, [r9+16]
  10010. movdqu xmm2, [r9+32]
  10011. movdqu xmm3, [r9+48]
  10012. pand xmm0, xmm12
  10013. pand xmm1, xmm12
  10014. pand xmm2, xmm12
  10015. pand xmm3, xmm12
  10016. por xmm4, xmm0
  10017. por xmm5, xmm1
  10018. por xmm6, xmm2
  10019. por xmm7, xmm3
  10020. paddd xmm13, xmm11
  10021. ; ENTRY: 17
  10022. mov r9, QWORD PTR [rdx+136]
  10023. movdqu xmm12, xmm13
  10024. pcmpeqd xmm12, xmm10
  10025. movdqu xmm0, [r9]
  10026. movdqu xmm1, [r9+16]
  10027. movdqu xmm2, [r9+32]
  10028. movdqu xmm3, [r9+48]
  10029. pand xmm0, xmm12
  10030. pand xmm1, xmm12
  10031. pand xmm2, xmm12
  10032. pand xmm3, xmm12
  10033. por xmm4, xmm0
  10034. por xmm5, xmm1
  10035. por xmm6, xmm2
  10036. por xmm7, xmm3
  10037. paddd xmm13, xmm11
  10038. ; ENTRY: 18
  10039. mov r9, QWORD PTR [rdx+144]
  10040. movdqu xmm12, xmm13
  10041. pcmpeqd xmm12, xmm10
  10042. movdqu xmm0, [r9]
  10043. movdqu xmm1, [r9+16]
  10044. movdqu xmm2, [r9+32]
  10045. movdqu xmm3, [r9+48]
  10046. pand xmm0, xmm12
  10047. pand xmm1, xmm12
  10048. pand xmm2, xmm12
  10049. pand xmm3, xmm12
  10050. por xmm4, xmm0
  10051. por xmm5, xmm1
  10052. por xmm6, xmm2
  10053. por xmm7, xmm3
  10054. paddd xmm13, xmm11
  10055. ; ENTRY: 19
  10056. mov r9, QWORD PTR [rdx+152]
  10057. movdqu xmm12, xmm13
  10058. pcmpeqd xmm12, xmm10
  10059. movdqu xmm0, [r9]
  10060. movdqu xmm1, [r9+16]
  10061. movdqu xmm2, [r9+32]
  10062. movdqu xmm3, [r9+48]
  10063. pand xmm0, xmm12
  10064. pand xmm1, xmm12
  10065. pand xmm2, xmm12
  10066. pand xmm3, xmm12
  10067. por xmm4, xmm0
  10068. por xmm5, xmm1
  10069. por xmm6, xmm2
  10070. por xmm7, xmm3
  10071. paddd xmm13, xmm11
  10072. ; ENTRY: 20
  10073. mov r9, QWORD PTR [rdx+160]
  10074. movdqu xmm12, xmm13
  10075. pcmpeqd xmm12, xmm10
  10076. movdqu xmm0, [r9]
  10077. movdqu xmm1, [r9+16]
  10078. movdqu xmm2, [r9+32]
  10079. movdqu xmm3, [r9+48]
  10080. pand xmm0, xmm12
  10081. pand xmm1, xmm12
  10082. pand xmm2, xmm12
  10083. pand xmm3, xmm12
  10084. por xmm4, xmm0
  10085. por xmm5, xmm1
  10086. por xmm6, xmm2
  10087. por xmm7, xmm3
  10088. paddd xmm13, xmm11
  10089. ; ENTRY: 21
  10090. mov r9, QWORD PTR [rdx+168]
  10091. movdqu xmm12, xmm13
  10092. pcmpeqd xmm12, xmm10
  10093. movdqu xmm0, [r9]
  10094. movdqu xmm1, [r9+16]
  10095. movdqu xmm2, [r9+32]
  10096. movdqu xmm3, [r9+48]
  10097. pand xmm0, xmm12
  10098. pand xmm1, xmm12
  10099. pand xmm2, xmm12
  10100. pand xmm3, xmm12
  10101. por xmm4, xmm0
  10102. por xmm5, xmm1
  10103. por xmm6, xmm2
  10104. por xmm7, xmm3
  10105. paddd xmm13, xmm11
  10106. ; ENTRY: 22
  10107. mov r9, QWORD PTR [rdx+176]
  10108. movdqu xmm12, xmm13
  10109. pcmpeqd xmm12, xmm10
  10110. movdqu xmm0, [r9]
  10111. movdqu xmm1, [r9+16]
  10112. movdqu xmm2, [r9+32]
  10113. movdqu xmm3, [r9+48]
  10114. pand xmm0, xmm12
  10115. pand xmm1, xmm12
  10116. pand xmm2, xmm12
  10117. pand xmm3, xmm12
  10118. por xmm4, xmm0
  10119. por xmm5, xmm1
  10120. por xmm6, xmm2
  10121. por xmm7, xmm3
  10122. paddd xmm13, xmm11
  10123. ; ENTRY: 23
  10124. mov r9, QWORD PTR [rdx+184]
  10125. movdqu xmm12, xmm13
  10126. pcmpeqd xmm12, xmm10
  10127. movdqu xmm0, [r9]
  10128. movdqu xmm1, [r9+16]
  10129. movdqu xmm2, [r9+32]
  10130. movdqu xmm3, [r9+48]
  10131. pand xmm0, xmm12
  10132. pand xmm1, xmm12
  10133. pand xmm2, xmm12
  10134. pand xmm3, xmm12
  10135. por xmm4, xmm0
  10136. por xmm5, xmm1
  10137. por xmm6, xmm2
  10138. por xmm7, xmm3
  10139. paddd xmm13, xmm11
  10140. ; ENTRY: 24
  10141. mov r9, QWORD PTR [rdx+192]
  10142. movdqu xmm12, xmm13
  10143. pcmpeqd xmm12, xmm10
  10144. movdqu xmm0, [r9]
  10145. movdqu xmm1, [r9+16]
  10146. movdqu xmm2, [r9+32]
  10147. movdqu xmm3, [r9+48]
  10148. pand xmm0, xmm12
  10149. pand xmm1, xmm12
  10150. pand xmm2, xmm12
  10151. pand xmm3, xmm12
  10152. por xmm4, xmm0
  10153. por xmm5, xmm1
  10154. por xmm6, xmm2
  10155. por xmm7, xmm3
  10156. paddd xmm13, xmm11
  10157. ; ENTRY: 25
  10158. mov r9, QWORD PTR [rdx+200]
  10159. movdqu xmm12, xmm13
  10160. pcmpeqd xmm12, xmm10
  10161. movdqu xmm0, [r9]
  10162. movdqu xmm1, [r9+16]
  10163. movdqu xmm2, [r9+32]
  10164. movdqu xmm3, [r9+48]
  10165. pand xmm0, xmm12
  10166. pand xmm1, xmm12
  10167. pand xmm2, xmm12
  10168. pand xmm3, xmm12
  10169. por xmm4, xmm0
  10170. por xmm5, xmm1
  10171. por xmm6, xmm2
  10172. por xmm7, xmm3
  10173. paddd xmm13, xmm11
  10174. ; ENTRY: 26
  10175. mov r9, QWORD PTR [rdx+208]
  10176. movdqu xmm12, xmm13
  10177. pcmpeqd xmm12, xmm10
  10178. movdqu xmm0, [r9]
  10179. movdqu xmm1, [r9+16]
  10180. movdqu xmm2, [r9+32]
  10181. movdqu xmm3, [r9+48]
  10182. pand xmm0, xmm12
  10183. pand xmm1, xmm12
  10184. pand xmm2, xmm12
  10185. pand xmm3, xmm12
  10186. por xmm4, xmm0
  10187. por xmm5, xmm1
  10188. por xmm6, xmm2
  10189. por xmm7, xmm3
  10190. paddd xmm13, xmm11
  10191. ; ENTRY: 27
  10192. mov r9, QWORD PTR [rdx+216]
  10193. movdqu xmm12, xmm13
  10194. pcmpeqd xmm12, xmm10
  10195. movdqu xmm0, [r9]
  10196. movdqu xmm1, [r9+16]
  10197. movdqu xmm2, [r9+32]
  10198. movdqu xmm3, [r9+48]
  10199. pand xmm0, xmm12
  10200. pand xmm1, xmm12
  10201. pand xmm2, xmm12
  10202. pand xmm3, xmm12
  10203. por xmm4, xmm0
  10204. por xmm5, xmm1
  10205. por xmm6, xmm2
  10206. por xmm7, xmm3
  10207. paddd xmm13, xmm11
  10208. ; ENTRY: 28
  10209. mov r9, QWORD PTR [rdx+224]
  10210. movdqu xmm12, xmm13
  10211. pcmpeqd xmm12, xmm10
  10212. movdqu xmm0, [r9]
  10213. movdqu xmm1, [r9+16]
  10214. movdqu xmm2, [r9+32]
  10215. movdqu xmm3, [r9+48]
  10216. pand xmm0, xmm12
  10217. pand xmm1, xmm12
  10218. pand xmm2, xmm12
  10219. pand xmm3, xmm12
  10220. por xmm4, xmm0
  10221. por xmm5, xmm1
  10222. por xmm6, xmm2
  10223. por xmm7, xmm3
  10224. paddd xmm13, xmm11
  10225. ; ENTRY: 29
  10226. mov r9, QWORD PTR [rdx+232]
  10227. movdqu xmm12, xmm13
  10228. pcmpeqd xmm12, xmm10
  10229. movdqu xmm0, [r9]
  10230. movdqu xmm1, [r9+16]
  10231. movdqu xmm2, [r9+32]
  10232. movdqu xmm3, [r9+48]
  10233. pand xmm0, xmm12
  10234. pand xmm1, xmm12
  10235. pand xmm2, xmm12
  10236. pand xmm3, xmm12
  10237. por xmm4, xmm0
  10238. por xmm5, xmm1
  10239. por xmm6, xmm2
  10240. por xmm7, xmm3
  10241. paddd xmm13, xmm11
  10242. ; ENTRY: 30
  10243. mov r9, QWORD PTR [rdx+240]
  10244. movdqu xmm12, xmm13
  10245. pcmpeqd xmm12, xmm10
  10246. movdqu xmm0, [r9]
  10247. movdqu xmm1, [r9+16]
  10248. movdqu xmm2, [r9+32]
  10249. movdqu xmm3, [r9+48]
  10250. pand xmm0, xmm12
  10251. pand xmm1, xmm12
  10252. pand xmm2, xmm12
  10253. pand xmm3, xmm12
  10254. por xmm4, xmm0
  10255. por xmm5, xmm1
  10256. por xmm6, xmm2
  10257. por xmm7, xmm3
  10258. paddd xmm13, xmm11
  10259. ; ENTRY: 31
  10260. mov r9, QWORD PTR [rdx+248]
  10261. movdqu xmm12, xmm13
  10262. pcmpeqd xmm12, xmm10
  10263. movdqu xmm0, [r9]
  10264. movdqu xmm1, [r9+16]
  10265. movdqu xmm2, [r9+32]
  10266. movdqu xmm3, [r9+48]
  10267. pand xmm0, xmm12
  10268. pand xmm1, xmm12
  10269. pand xmm2, xmm12
  10270. pand xmm3, xmm12
  10271. por xmm4, xmm0
  10272. por xmm5, xmm1
  10273. por xmm6, xmm2
  10274. por xmm7, xmm3
  10275. paddd xmm13, xmm11
  10276. movdqu [rcx], xmm4
  10277. movdqu [rcx+16], xmm5
  10278. movdqu [rcx+32], xmm6
  10279. movdqu [rcx+48], xmm7
  10280. add rcx, 64
  10281. ; END: 0-7
  10282. ; START: 8-15
  10283. pxor xmm13, xmm13
  10284. pxor xmm4, xmm4
  10285. pxor xmm5, xmm5
  10286. pxor xmm6, xmm6
  10287. pxor xmm7, xmm7
  10288. ; ENTRY: 0
  10289. mov r9, QWORD PTR [rdx]
  10290. add r9, 64
  10291. movdqu xmm12, xmm13
  10292. pcmpeqd xmm12, xmm10
  10293. movdqu xmm0, [r9]
  10294. movdqu xmm1, [r9+16]
  10295. movdqu xmm2, [r9+32]
  10296. movdqu xmm3, [r9+48]
  10297. pand xmm0, xmm12
  10298. pand xmm1, xmm12
  10299. pand xmm2, xmm12
  10300. pand xmm3, xmm12
  10301. por xmm4, xmm0
  10302. por xmm5, xmm1
  10303. por xmm6, xmm2
  10304. por xmm7, xmm3
  10305. paddd xmm13, xmm11
  10306. ; ENTRY: 1
  10307. mov r9, QWORD PTR [rdx+8]
  10308. add r9, 64
  10309. movdqu xmm12, xmm13
  10310. pcmpeqd xmm12, xmm10
  10311. movdqu xmm0, [r9]
  10312. movdqu xmm1, [r9+16]
  10313. movdqu xmm2, [r9+32]
  10314. movdqu xmm3, [r9+48]
  10315. pand xmm0, xmm12
  10316. pand xmm1, xmm12
  10317. pand xmm2, xmm12
  10318. pand xmm3, xmm12
  10319. por xmm4, xmm0
  10320. por xmm5, xmm1
  10321. por xmm6, xmm2
  10322. por xmm7, xmm3
  10323. paddd xmm13, xmm11
  10324. ; ENTRY: 2
  10325. mov r9, QWORD PTR [rdx+16]
  10326. add r9, 64
  10327. movdqu xmm12, xmm13
  10328. pcmpeqd xmm12, xmm10
  10329. movdqu xmm0, [r9]
  10330. movdqu xmm1, [r9+16]
  10331. movdqu xmm2, [r9+32]
  10332. movdqu xmm3, [r9+48]
  10333. pand xmm0, xmm12
  10334. pand xmm1, xmm12
  10335. pand xmm2, xmm12
  10336. pand xmm3, xmm12
  10337. por xmm4, xmm0
  10338. por xmm5, xmm1
  10339. por xmm6, xmm2
  10340. por xmm7, xmm3
  10341. paddd xmm13, xmm11
  10342. ; ENTRY: 3
  10343. mov r9, QWORD PTR [rdx+24]
  10344. add r9, 64
  10345. movdqu xmm12, xmm13
  10346. pcmpeqd xmm12, xmm10
  10347. movdqu xmm0, [r9]
  10348. movdqu xmm1, [r9+16]
  10349. movdqu xmm2, [r9+32]
  10350. movdqu xmm3, [r9+48]
  10351. pand xmm0, xmm12
  10352. pand xmm1, xmm12
  10353. pand xmm2, xmm12
  10354. pand xmm3, xmm12
  10355. por xmm4, xmm0
  10356. por xmm5, xmm1
  10357. por xmm6, xmm2
  10358. por xmm7, xmm3
  10359. paddd xmm13, xmm11
  10360. ; ENTRY: 4
  10361. mov r9, QWORD PTR [rdx+32]
  10362. add r9, 64
  10363. movdqu xmm12, xmm13
  10364. pcmpeqd xmm12, xmm10
  10365. movdqu xmm0, [r9]
  10366. movdqu xmm1, [r9+16]
  10367. movdqu xmm2, [r9+32]
  10368. movdqu xmm3, [r9+48]
  10369. pand xmm0, xmm12
  10370. pand xmm1, xmm12
  10371. pand xmm2, xmm12
  10372. pand xmm3, xmm12
  10373. por xmm4, xmm0
  10374. por xmm5, xmm1
  10375. por xmm6, xmm2
  10376. por xmm7, xmm3
  10377. paddd xmm13, xmm11
  10378. ; ENTRY: 5
  10379. mov r9, QWORD PTR [rdx+40]
  10380. add r9, 64
  10381. movdqu xmm12, xmm13
  10382. pcmpeqd xmm12, xmm10
  10383. movdqu xmm0, [r9]
  10384. movdqu xmm1, [r9+16]
  10385. movdqu xmm2, [r9+32]
  10386. movdqu xmm3, [r9+48]
  10387. pand xmm0, xmm12
  10388. pand xmm1, xmm12
  10389. pand xmm2, xmm12
  10390. pand xmm3, xmm12
  10391. por xmm4, xmm0
  10392. por xmm5, xmm1
  10393. por xmm6, xmm2
  10394. por xmm7, xmm3
  10395. paddd xmm13, xmm11
  10396. ; ENTRY: 6
  10397. mov r9, QWORD PTR [rdx+48]
  10398. add r9, 64
  10399. movdqu xmm12, xmm13
  10400. pcmpeqd xmm12, xmm10
  10401. movdqu xmm0, [r9]
  10402. movdqu xmm1, [r9+16]
  10403. movdqu xmm2, [r9+32]
  10404. movdqu xmm3, [r9+48]
  10405. pand xmm0, xmm12
  10406. pand xmm1, xmm12
  10407. pand xmm2, xmm12
  10408. pand xmm3, xmm12
  10409. por xmm4, xmm0
  10410. por xmm5, xmm1
  10411. por xmm6, xmm2
  10412. por xmm7, xmm3
  10413. paddd xmm13, xmm11
  10414. ; ENTRY: 7
  10415. mov r9, QWORD PTR [rdx+56]
  10416. add r9, 64
  10417. movdqu xmm12, xmm13
  10418. pcmpeqd xmm12, xmm10
  10419. movdqu xmm0, [r9]
  10420. movdqu xmm1, [r9+16]
  10421. movdqu xmm2, [r9+32]
  10422. movdqu xmm3, [r9+48]
  10423. pand xmm0, xmm12
  10424. pand xmm1, xmm12
  10425. pand xmm2, xmm12
  10426. pand xmm3, xmm12
  10427. por xmm4, xmm0
  10428. por xmm5, xmm1
  10429. por xmm6, xmm2
  10430. por xmm7, xmm3
  10431. paddd xmm13, xmm11
  10432. ; ENTRY: 8
  10433. mov r9, QWORD PTR [rdx+64]
  10434. add r9, 64
  10435. movdqu xmm12, xmm13
  10436. pcmpeqd xmm12, xmm10
  10437. movdqu xmm0, [r9]
  10438. movdqu xmm1, [r9+16]
  10439. movdqu xmm2, [r9+32]
  10440. movdqu xmm3, [r9+48]
  10441. pand xmm0, xmm12
  10442. pand xmm1, xmm12
  10443. pand xmm2, xmm12
  10444. pand xmm3, xmm12
  10445. por xmm4, xmm0
  10446. por xmm5, xmm1
  10447. por xmm6, xmm2
  10448. por xmm7, xmm3
  10449. paddd xmm13, xmm11
  10450. ; ENTRY: 9
  10451. mov r9, QWORD PTR [rdx+72]
  10452. add r9, 64
  10453. movdqu xmm12, xmm13
  10454. pcmpeqd xmm12, xmm10
  10455. movdqu xmm0, [r9]
  10456. movdqu xmm1, [r9+16]
  10457. movdqu xmm2, [r9+32]
  10458. movdqu xmm3, [r9+48]
  10459. pand xmm0, xmm12
  10460. pand xmm1, xmm12
  10461. pand xmm2, xmm12
  10462. pand xmm3, xmm12
  10463. por xmm4, xmm0
  10464. por xmm5, xmm1
  10465. por xmm6, xmm2
  10466. por xmm7, xmm3
  10467. paddd xmm13, xmm11
  10468. ; ENTRY: 10
  10469. mov r9, QWORD PTR [rdx+80]
  10470. add r9, 64
  10471. movdqu xmm12, xmm13
  10472. pcmpeqd xmm12, xmm10
  10473. movdqu xmm0, [r9]
  10474. movdqu xmm1, [r9+16]
  10475. movdqu xmm2, [r9+32]
  10476. movdqu xmm3, [r9+48]
  10477. pand xmm0, xmm12
  10478. pand xmm1, xmm12
  10479. pand xmm2, xmm12
  10480. pand xmm3, xmm12
  10481. por xmm4, xmm0
  10482. por xmm5, xmm1
  10483. por xmm6, xmm2
  10484. por xmm7, xmm3
  10485. paddd xmm13, xmm11
  10486. ; ENTRY: 11
  10487. mov r9, QWORD PTR [rdx+88]
  10488. add r9, 64
  10489. movdqu xmm12, xmm13
  10490. pcmpeqd xmm12, xmm10
  10491. movdqu xmm0, [r9]
  10492. movdqu xmm1, [r9+16]
  10493. movdqu xmm2, [r9+32]
  10494. movdqu xmm3, [r9+48]
  10495. pand xmm0, xmm12
  10496. pand xmm1, xmm12
  10497. pand xmm2, xmm12
  10498. pand xmm3, xmm12
  10499. por xmm4, xmm0
  10500. por xmm5, xmm1
  10501. por xmm6, xmm2
  10502. por xmm7, xmm3
  10503. paddd xmm13, xmm11
  10504. ; ENTRY: 12
  10505. mov r9, QWORD PTR [rdx+96]
  10506. add r9, 64
  10507. movdqu xmm12, xmm13
  10508. pcmpeqd xmm12, xmm10
  10509. movdqu xmm0, [r9]
  10510. movdqu xmm1, [r9+16]
  10511. movdqu xmm2, [r9+32]
  10512. movdqu xmm3, [r9+48]
  10513. pand xmm0, xmm12
  10514. pand xmm1, xmm12
  10515. pand xmm2, xmm12
  10516. pand xmm3, xmm12
  10517. por xmm4, xmm0
  10518. por xmm5, xmm1
  10519. por xmm6, xmm2
  10520. por xmm7, xmm3
  10521. paddd xmm13, xmm11
  10522. ; ENTRY: 13
  10523. mov r9, QWORD PTR [rdx+104]
  10524. add r9, 64
  10525. movdqu xmm12, xmm13
  10526. pcmpeqd xmm12, xmm10
  10527. movdqu xmm0, [r9]
  10528. movdqu xmm1, [r9+16]
  10529. movdqu xmm2, [r9+32]
  10530. movdqu xmm3, [r9+48]
  10531. pand xmm0, xmm12
  10532. pand xmm1, xmm12
  10533. pand xmm2, xmm12
  10534. pand xmm3, xmm12
  10535. por xmm4, xmm0
  10536. por xmm5, xmm1
  10537. por xmm6, xmm2
  10538. por xmm7, xmm3
  10539. paddd xmm13, xmm11
  10540. ; ENTRY: 14
  10541. mov r9, QWORD PTR [rdx+112]
  10542. add r9, 64
  10543. movdqu xmm12, xmm13
  10544. pcmpeqd xmm12, xmm10
  10545. movdqu xmm0, [r9]
  10546. movdqu xmm1, [r9+16]
  10547. movdqu xmm2, [r9+32]
  10548. movdqu xmm3, [r9+48]
  10549. pand xmm0, xmm12
  10550. pand xmm1, xmm12
  10551. pand xmm2, xmm12
  10552. pand xmm3, xmm12
  10553. por xmm4, xmm0
  10554. por xmm5, xmm1
  10555. por xmm6, xmm2
  10556. por xmm7, xmm3
  10557. paddd xmm13, xmm11
  10558. ; ENTRY: 15
  10559. mov r9, QWORD PTR [rdx+120]
  10560. add r9, 64
  10561. movdqu xmm12, xmm13
  10562. pcmpeqd xmm12, xmm10
  10563. movdqu xmm0, [r9]
  10564. movdqu xmm1, [r9+16]
  10565. movdqu xmm2, [r9+32]
  10566. movdqu xmm3, [r9+48]
  10567. pand xmm0, xmm12
  10568. pand xmm1, xmm12
  10569. pand xmm2, xmm12
  10570. pand xmm3, xmm12
  10571. por xmm4, xmm0
  10572. por xmm5, xmm1
  10573. por xmm6, xmm2
  10574. por xmm7, xmm3
  10575. paddd xmm13, xmm11
  10576. ; ENTRY: 16
  10577. mov r9, QWORD PTR [rdx+128]
  10578. add r9, 64
  10579. movdqu xmm12, xmm13
  10580. pcmpeqd xmm12, xmm10
  10581. movdqu xmm0, [r9]
  10582. movdqu xmm1, [r9+16]
  10583. movdqu xmm2, [r9+32]
  10584. movdqu xmm3, [r9+48]
  10585. pand xmm0, xmm12
  10586. pand xmm1, xmm12
  10587. pand xmm2, xmm12
  10588. pand xmm3, xmm12
  10589. por xmm4, xmm0
  10590. por xmm5, xmm1
  10591. por xmm6, xmm2
  10592. por xmm7, xmm3
  10593. paddd xmm13, xmm11
  10594. ; ENTRY: 17
  10595. mov r9, QWORD PTR [rdx+136]
  10596. add r9, 64
  10597. movdqu xmm12, xmm13
  10598. pcmpeqd xmm12, xmm10
  10599. movdqu xmm0, [r9]
  10600. movdqu xmm1, [r9+16]
  10601. movdqu xmm2, [r9+32]
  10602. movdqu xmm3, [r9+48]
  10603. pand xmm0, xmm12
  10604. pand xmm1, xmm12
  10605. pand xmm2, xmm12
  10606. pand xmm3, xmm12
  10607. por xmm4, xmm0
  10608. por xmm5, xmm1
  10609. por xmm6, xmm2
  10610. por xmm7, xmm3
  10611. paddd xmm13, xmm11
  10612. ; ENTRY: 18
  10613. mov r9, QWORD PTR [rdx+144]
  10614. add r9, 64
  10615. movdqu xmm12, xmm13
  10616. pcmpeqd xmm12, xmm10
  10617. movdqu xmm0, [r9]
  10618. movdqu xmm1, [r9+16]
  10619. movdqu xmm2, [r9+32]
  10620. movdqu xmm3, [r9+48]
  10621. pand xmm0, xmm12
  10622. pand xmm1, xmm12
  10623. pand xmm2, xmm12
  10624. pand xmm3, xmm12
  10625. por xmm4, xmm0
  10626. por xmm5, xmm1
  10627. por xmm6, xmm2
  10628. por xmm7, xmm3
  10629. paddd xmm13, xmm11
  10630. ; ENTRY: 19
  10631. mov r9, QWORD PTR [rdx+152]
  10632. add r9, 64
  10633. movdqu xmm12, xmm13
  10634. pcmpeqd xmm12, xmm10
  10635. movdqu xmm0, [r9]
  10636. movdqu xmm1, [r9+16]
  10637. movdqu xmm2, [r9+32]
  10638. movdqu xmm3, [r9+48]
  10639. pand xmm0, xmm12
  10640. pand xmm1, xmm12
  10641. pand xmm2, xmm12
  10642. pand xmm3, xmm12
  10643. por xmm4, xmm0
  10644. por xmm5, xmm1
  10645. por xmm6, xmm2
  10646. por xmm7, xmm3
  10647. paddd xmm13, xmm11
  10648. ; ENTRY: 20
  10649. mov r9, QWORD PTR [rdx+160]
  10650. add r9, 64
  10651. movdqu xmm12, xmm13
  10652. pcmpeqd xmm12, xmm10
  10653. movdqu xmm0, [r9]
  10654. movdqu xmm1, [r9+16]
  10655. movdqu xmm2, [r9+32]
  10656. movdqu xmm3, [r9+48]
  10657. pand xmm0, xmm12
  10658. pand xmm1, xmm12
  10659. pand xmm2, xmm12
  10660. pand xmm3, xmm12
  10661. por xmm4, xmm0
  10662. por xmm5, xmm1
  10663. por xmm6, xmm2
  10664. por xmm7, xmm3
  10665. paddd xmm13, xmm11
  10666. ; ENTRY: 21
  10667. mov r9, QWORD PTR [rdx+168]
  10668. add r9, 64
  10669. movdqu xmm12, xmm13
  10670. pcmpeqd xmm12, xmm10
  10671. movdqu xmm0, [r9]
  10672. movdqu xmm1, [r9+16]
  10673. movdqu xmm2, [r9+32]
  10674. movdqu xmm3, [r9+48]
  10675. pand xmm0, xmm12
  10676. pand xmm1, xmm12
  10677. pand xmm2, xmm12
  10678. pand xmm3, xmm12
  10679. por xmm4, xmm0
  10680. por xmm5, xmm1
  10681. por xmm6, xmm2
  10682. por xmm7, xmm3
  10683. paddd xmm13, xmm11
  10684. ; ENTRY: 22
  10685. mov r9, QWORD PTR [rdx+176]
  10686. add r9, 64
  10687. movdqu xmm12, xmm13
  10688. pcmpeqd xmm12, xmm10
  10689. movdqu xmm0, [r9]
  10690. movdqu xmm1, [r9+16]
  10691. movdqu xmm2, [r9+32]
  10692. movdqu xmm3, [r9+48]
  10693. pand xmm0, xmm12
  10694. pand xmm1, xmm12
  10695. pand xmm2, xmm12
  10696. pand xmm3, xmm12
  10697. por xmm4, xmm0
  10698. por xmm5, xmm1
  10699. por xmm6, xmm2
  10700. por xmm7, xmm3
  10701. paddd xmm13, xmm11
  10702. ; ENTRY: 23
  10703. mov r9, QWORD PTR [rdx+184]
  10704. add r9, 64
  10705. movdqu xmm12, xmm13
  10706. pcmpeqd xmm12, xmm10
  10707. movdqu xmm0, [r9]
  10708. movdqu xmm1, [r9+16]
  10709. movdqu xmm2, [r9+32]
  10710. movdqu xmm3, [r9+48]
  10711. pand xmm0, xmm12
  10712. pand xmm1, xmm12
  10713. pand xmm2, xmm12
  10714. pand xmm3, xmm12
  10715. por xmm4, xmm0
  10716. por xmm5, xmm1
  10717. por xmm6, xmm2
  10718. por xmm7, xmm3
  10719. paddd xmm13, xmm11
  10720. ; ENTRY: 24
  10721. mov r9, QWORD PTR [rdx+192]
  10722. add r9, 64
  10723. movdqu xmm12, xmm13
  10724. pcmpeqd xmm12, xmm10
  10725. movdqu xmm0, [r9]
  10726. movdqu xmm1, [r9+16]
  10727. movdqu xmm2, [r9+32]
  10728. movdqu xmm3, [r9+48]
  10729. pand xmm0, xmm12
  10730. pand xmm1, xmm12
  10731. pand xmm2, xmm12
  10732. pand xmm3, xmm12
  10733. por xmm4, xmm0
  10734. por xmm5, xmm1
  10735. por xmm6, xmm2
  10736. por xmm7, xmm3
  10737. paddd xmm13, xmm11
  10738. ; ENTRY: 25
  10739. mov r9, QWORD PTR [rdx+200]
  10740. add r9, 64
  10741. movdqu xmm12, xmm13
  10742. pcmpeqd xmm12, xmm10
  10743. movdqu xmm0, [r9]
  10744. movdqu xmm1, [r9+16]
  10745. movdqu xmm2, [r9+32]
  10746. movdqu xmm3, [r9+48]
  10747. pand xmm0, xmm12
  10748. pand xmm1, xmm12
  10749. pand xmm2, xmm12
  10750. pand xmm3, xmm12
  10751. por xmm4, xmm0
  10752. por xmm5, xmm1
  10753. por xmm6, xmm2
  10754. por xmm7, xmm3
  10755. paddd xmm13, xmm11
  10756. ; ENTRY: 26
  10757. mov r9, QWORD PTR [rdx+208]
  10758. add r9, 64
  10759. movdqu xmm12, xmm13
  10760. pcmpeqd xmm12, xmm10
  10761. movdqu xmm0, [r9]
  10762. movdqu xmm1, [r9+16]
  10763. movdqu xmm2, [r9+32]
  10764. movdqu xmm3, [r9+48]
  10765. pand xmm0, xmm12
  10766. pand xmm1, xmm12
  10767. pand xmm2, xmm12
  10768. pand xmm3, xmm12
  10769. por xmm4, xmm0
  10770. por xmm5, xmm1
  10771. por xmm6, xmm2
  10772. por xmm7, xmm3
  10773. paddd xmm13, xmm11
  10774. ; ENTRY: 27
  10775. mov r9, QWORD PTR [rdx+216]
  10776. add r9, 64
  10777. movdqu xmm12, xmm13
  10778. pcmpeqd xmm12, xmm10
  10779. movdqu xmm0, [r9]
  10780. movdqu xmm1, [r9+16]
  10781. movdqu xmm2, [r9+32]
  10782. movdqu xmm3, [r9+48]
  10783. pand xmm0, xmm12
  10784. pand xmm1, xmm12
  10785. pand xmm2, xmm12
  10786. pand xmm3, xmm12
  10787. por xmm4, xmm0
  10788. por xmm5, xmm1
  10789. por xmm6, xmm2
  10790. por xmm7, xmm3
  10791. paddd xmm13, xmm11
  10792. ; ENTRY: 28
  10793. mov r9, QWORD PTR [rdx+224]
  10794. add r9, 64
  10795. movdqu xmm12, xmm13
  10796. pcmpeqd xmm12, xmm10
  10797. movdqu xmm0, [r9]
  10798. movdqu xmm1, [r9+16]
  10799. movdqu xmm2, [r9+32]
  10800. movdqu xmm3, [r9+48]
  10801. pand xmm0, xmm12
  10802. pand xmm1, xmm12
  10803. pand xmm2, xmm12
  10804. pand xmm3, xmm12
  10805. por xmm4, xmm0
  10806. por xmm5, xmm1
  10807. por xmm6, xmm2
  10808. por xmm7, xmm3
  10809. paddd xmm13, xmm11
  10810. ; ENTRY: 29
  10811. mov r9, QWORD PTR [rdx+232]
  10812. add r9, 64
  10813. movdqu xmm12, xmm13
  10814. pcmpeqd xmm12, xmm10
  10815. movdqu xmm0, [r9]
  10816. movdqu xmm1, [r9+16]
  10817. movdqu xmm2, [r9+32]
  10818. movdqu xmm3, [r9+48]
  10819. pand xmm0, xmm12
  10820. pand xmm1, xmm12
  10821. pand xmm2, xmm12
  10822. pand xmm3, xmm12
  10823. por xmm4, xmm0
  10824. por xmm5, xmm1
  10825. por xmm6, xmm2
  10826. por xmm7, xmm3
  10827. paddd xmm13, xmm11
  10828. ; ENTRY: 30
  10829. mov r9, QWORD PTR [rdx+240]
  10830. add r9, 64
  10831. movdqu xmm12, xmm13
  10832. pcmpeqd xmm12, xmm10
  10833. movdqu xmm0, [r9]
  10834. movdqu xmm1, [r9+16]
  10835. movdqu xmm2, [r9+32]
  10836. movdqu xmm3, [r9+48]
  10837. pand xmm0, xmm12
  10838. pand xmm1, xmm12
  10839. pand xmm2, xmm12
  10840. pand xmm3, xmm12
  10841. por xmm4, xmm0
  10842. por xmm5, xmm1
  10843. por xmm6, xmm2
  10844. por xmm7, xmm3
  10845. paddd xmm13, xmm11
  10846. ; ENTRY: 31
  10847. mov r9, QWORD PTR [rdx+248]
  10848. add r9, 64
  10849. movdqu xmm12, xmm13
  10850. pcmpeqd xmm12, xmm10
  10851. movdqu xmm0, [r9]
  10852. movdqu xmm1, [r9+16]
  10853. movdqu xmm2, [r9+32]
  10854. movdqu xmm3, [r9+48]
  10855. pand xmm0, xmm12
  10856. pand xmm1, xmm12
  10857. pand xmm2, xmm12
  10858. pand xmm3, xmm12
  10859. por xmm4, xmm0
  10860. por xmm5, xmm1
  10861. por xmm6, xmm2
  10862. por xmm7, xmm3
  10863. paddd xmm13, xmm11
  10864. movdqu [rcx], xmm4
  10865. movdqu [rcx+16], xmm5
  10866. movdqu [rcx+32], xmm6
  10867. movdqu [rcx+48], xmm7
  10868. ; END: 8-15
  10869. vmovdqu xmm6, OWORD PTR [rsp]
  10870. vmovdqu xmm7, OWORD PTR [rsp+16]
  10871. vmovdqu xmm8, OWORD PTR [rsp+32]
  10872. vmovdqu xmm9, OWORD PTR [rsp+48]
  10873. vmovdqu xmm10, OWORD PTR [rsp+64]
  10874. vmovdqu xmm11, OWORD PTR [rsp+80]
  10875. vmovdqu xmm12, OWORD PTR [rsp+96]
  10876. vmovdqu xmm13, OWORD PTR [rsp+112]
  10877. add rsp, 128
  10878. ret
  10879. sp_2048_get_from_table_16 ENDP
  10880. _text ENDS
  10881. ENDIF
  10882. IFDEF HAVE_INTEL_AVX2
  10883. ; /* Reduce the number back to 2048 bits using Montgomery reduction.
  10884. ; *
  10885. ; * a A single precision number to reduce in place.
  10886. ; * m The single precision number representing the modulus.
  10887. ; * mp The digit representing the negative inverse of m mod 2^n.
  10888. ; */
  10889. _text SEGMENT READONLY PARA
  10890. sp_2048_mont_reduce_avx2_16 PROC
  10891. push r12
  10892. push r13
  10893. push r14
  10894. push r15
  10895. push rdi
  10896. push rsi
  10897. push rbx
  10898. push rbp
  10899. mov r9, rcx
  10900. mov r10, rdx
  10901. xor rbp, rbp
  10902. ; i = 16
  10903. mov r11, 16
  10904. mov r14, QWORD PTR [r9]
  10905. mov r15, QWORD PTR [r9+8]
  10906. mov rdi, QWORD PTR [r9+16]
  10907. mov rsi, QWORD PTR [r9+24]
  10908. add r9, 64
  10909. xor rbp, rbp
  10910. L_2048_mont_reduce_avx2_16_loop:
  10911. ; mu = a[i] * mp
  10912. mov rdx, r14
  10913. mov r12, r14
  10914. imul rdx, r8
  10915. xor rbx, rbx
  10916. ; a[i+0] += m[0] * mu
  10917. mulx rcx, rax, QWORD PTR [r10]
  10918. mov r14, r15
  10919. adcx r12, rax
  10920. adox r14, rcx
  10921. ; a[i+1] += m[1] * mu
  10922. mulx rcx, rax, QWORD PTR [r10+8]
  10923. mov r15, rdi
  10924. adcx r14, rax
  10925. adox r15, rcx
  10926. ; a[i+2] += m[2] * mu
  10927. mulx rcx, rax, QWORD PTR [r10+16]
  10928. mov rdi, rsi
  10929. adcx r15, rax
  10930. adox rdi, rcx
  10931. ; a[i+3] += m[3] * mu
  10932. mulx rcx, rax, QWORD PTR [r10+24]
  10933. mov rsi, QWORD PTR [r9+-32]
  10934. adcx rdi, rax
  10935. adox rsi, rcx
  10936. ; a[i+4] += m[4] * mu
  10937. mulx rcx, rax, QWORD PTR [r10+32]
  10938. mov r13, QWORD PTR [r9+-24]
  10939. adcx rsi, rax
  10940. adox r13, rcx
  10941. ; a[i+5] += m[5] * mu
  10942. mulx rcx, rax, QWORD PTR [r10+40]
  10943. mov r12, QWORD PTR [r9+-16]
  10944. adcx r13, rax
  10945. adox r12, rcx
  10946. mov QWORD PTR [r9+-24], r13
  10947. ; a[i+6] += m[6] * mu
  10948. mulx rcx, rax, QWORD PTR [r10+48]
  10949. mov r13, QWORD PTR [r9+-8]
  10950. adcx r12, rax
  10951. adox r13, rcx
  10952. mov QWORD PTR [r9+-16], r12
  10953. ; a[i+7] += m[7] * mu
  10954. mulx rcx, rax, QWORD PTR [r10+56]
  10955. mov r12, QWORD PTR [r9]
  10956. adcx r13, rax
  10957. adox r12, rcx
  10958. mov QWORD PTR [r9+-8], r13
  10959. ; a[i+8] += m[8] * mu
  10960. mulx rcx, rax, QWORD PTR [r10+64]
  10961. mov r13, QWORD PTR [r9+8]
  10962. adcx r12, rax
  10963. adox r13, rcx
  10964. mov QWORD PTR [r9], r12
  10965. ; a[i+9] += m[9] * mu
  10966. mulx rcx, rax, QWORD PTR [r10+72]
  10967. mov r12, QWORD PTR [r9+16]
  10968. adcx r13, rax
  10969. adox r12, rcx
  10970. mov QWORD PTR [r9+8], r13
  10971. ; a[i+10] += m[10] * mu
  10972. mulx rcx, rax, QWORD PTR [r10+80]
  10973. mov r13, QWORD PTR [r9+24]
  10974. adcx r12, rax
  10975. adox r13, rcx
  10976. mov QWORD PTR [r9+16], r12
  10977. ; a[i+11] += m[11] * mu
  10978. mulx rcx, rax, QWORD PTR [r10+88]
  10979. mov r12, QWORD PTR [r9+32]
  10980. adcx r13, rax
  10981. adox r12, rcx
  10982. mov QWORD PTR [r9+24], r13
  10983. ; a[i+12] += m[12] * mu
  10984. mulx rcx, rax, QWORD PTR [r10+96]
  10985. mov r13, QWORD PTR [r9+40]
  10986. adcx r12, rax
  10987. adox r13, rcx
  10988. mov QWORD PTR [r9+32], r12
  10989. ; a[i+13] += m[13] * mu
  10990. mulx rcx, rax, QWORD PTR [r10+104]
  10991. mov r12, QWORD PTR [r9+48]
  10992. adcx r13, rax
  10993. adox r12, rcx
  10994. mov QWORD PTR [r9+40], r13
  10995. ; a[i+14] += m[14] * mu
  10996. mulx rcx, rax, QWORD PTR [r10+112]
  10997. mov r13, QWORD PTR [r9+56]
  10998. adcx r12, rax
  10999. adox r13, rcx
  11000. mov QWORD PTR [r9+48], r12
  11001. ; a[i+15] += m[15] * mu
  11002. mulx rcx, rax, QWORD PTR [r10+120]
  11003. mov r12, QWORD PTR [r9+64]
  11004. adcx r13, rax
  11005. adox r12, rcx
  11006. mov QWORD PTR [r9+56], r13
  11007. adcx r12, rbp
  11008. mov rbp, rbx
  11009. mov QWORD PTR [r9+64], r12
  11010. adox rbp, rbx
  11011. adcx rbp, rbx
  11012. ; mu = a[i] * mp
  11013. mov rdx, r14
  11014. mov r12, r14
  11015. imul rdx, r8
  11016. xor rbx, rbx
  11017. ; a[i+0] += m[0] * mu
  11018. mulx rcx, rax, QWORD PTR [r10]
  11019. mov r14, r15
  11020. adcx r12, rax
  11021. adox r14, rcx
  11022. ; a[i+1] += m[1] * mu
  11023. mulx rcx, rax, QWORD PTR [r10+8]
  11024. mov r15, rdi
  11025. adcx r14, rax
  11026. adox r15, rcx
  11027. ; a[i+2] += m[2] * mu
  11028. mulx rcx, rax, QWORD PTR [r10+16]
  11029. mov rdi, rsi
  11030. adcx r15, rax
  11031. adox rdi, rcx
  11032. ; a[i+3] += m[3] * mu
  11033. mulx rcx, rax, QWORD PTR [r10+24]
  11034. mov rsi, QWORD PTR [r9+-24]
  11035. adcx rdi, rax
  11036. adox rsi, rcx
  11037. ; a[i+4] += m[4] * mu
  11038. mulx rcx, rax, QWORD PTR [r10+32]
  11039. mov r13, QWORD PTR [r9+-16]
  11040. adcx rsi, rax
  11041. adox r13, rcx
  11042. ; a[i+5] += m[5] * mu
  11043. mulx rcx, rax, QWORD PTR [r10+40]
  11044. mov r12, QWORD PTR [r9+-8]
  11045. adcx r13, rax
  11046. adox r12, rcx
  11047. mov QWORD PTR [r9+-16], r13
  11048. ; a[i+6] += m[6] * mu
  11049. mulx rcx, rax, QWORD PTR [r10+48]
  11050. mov r13, QWORD PTR [r9]
  11051. adcx r12, rax
  11052. adox r13, rcx
  11053. mov QWORD PTR [r9+-8], r12
  11054. ; a[i+7] += m[7] * mu
  11055. mulx rcx, rax, QWORD PTR [r10+56]
  11056. mov r12, QWORD PTR [r9+8]
  11057. adcx r13, rax
  11058. adox r12, rcx
  11059. mov QWORD PTR [r9], r13
  11060. ; a[i+8] += m[8] * mu
  11061. mulx rcx, rax, QWORD PTR [r10+64]
  11062. mov r13, QWORD PTR [r9+16]
  11063. adcx r12, rax
  11064. adox r13, rcx
  11065. mov QWORD PTR [r9+8], r12
  11066. ; a[i+9] += m[9] * mu
  11067. mulx rcx, rax, QWORD PTR [r10+72]
  11068. mov r12, QWORD PTR [r9+24]
  11069. adcx r13, rax
  11070. adox r12, rcx
  11071. mov QWORD PTR [r9+16], r13
  11072. ; a[i+10] += m[10] * mu
  11073. mulx rcx, rax, QWORD PTR [r10+80]
  11074. mov r13, QWORD PTR [r9+32]
  11075. adcx r12, rax
  11076. adox r13, rcx
  11077. mov QWORD PTR [r9+24], r12
  11078. ; a[i+11] += m[11] * mu
  11079. mulx rcx, rax, QWORD PTR [r10+88]
  11080. mov r12, QWORD PTR [r9+40]
  11081. adcx r13, rax
  11082. adox r12, rcx
  11083. mov QWORD PTR [r9+32], r13
  11084. ; a[i+12] += m[12] * mu
  11085. mulx rcx, rax, QWORD PTR [r10+96]
  11086. mov r13, QWORD PTR [r9+48]
  11087. adcx r12, rax
  11088. adox r13, rcx
  11089. mov QWORD PTR [r9+40], r12
  11090. ; a[i+13] += m[13] * mu
  11091. mulx rcx, rax, QWORD PTR [r10+104]
  11092. mov r12, QWORD PTR [r9+56]
  11093. adcx r13, rax
  11094. adox r12, rcx
  11095. mov QWORD PTR [r9+48], r13
  11096. ; a[i+14] += m[14] * mu
  11097. mulx rcx, rax, QWORD PTR [r10+112]
  11098. mov r13, QWORD PTR [r9+64]
  11099. adcx r12, rax
  11100. adox r13, rcx
  11101. mov QWORD PTR [r9+56], r12
  11102. ; a[i+15] += m[15] * mu
  11103. mulx rcx, rax, QWORD PTR [r10+120]
  11104. mov r12, QWORD PTR [r9+72]
  11105. adcx r13, rax
  11106. adox r12, rcx
  11107. mov QWORD PTR [r9+64], r13
  11108. adcx r12, rbp
  11109. mov rbp, rbx
  11110. mov QWORD PTR [r9+72], r12
  11111. adox rbp, rbx
  11112. adcx rbp, rbx
  11113. ; a += 2
  11114. add r9, 16
  11115. ; i -= 2
  11116. sub r11, 2
  11117. jnz L_2048_mont_reduce_avx2_16_loop
  11118. sub r9, 64
  11119. neg rbp
  11120. mov r8, r9
  11121. sub r9, 128
  11122. mov rcx, QWORD PTR [r10]
  11123. mov rdx, r14
  11124. pext rcx, rcx, rbp
  11125. sub rdx, rcx
  11126. mov rcx, QWORD PTR [r10+8]
  11127. mov rax, r15
  11128. pext rcx, rcx, rbp
  11129. mov QWORD PTR [r9], rdx
  11130. sbb rax, rcx
  11131. mov rdx, QWORD PTR [r10+16]
  11132. mov rcx, rdi
  11133. pext rdx, rdx, rbp
  11134. mov QWORD PTR [r9+8], rax
  11135. sbb rcx, rdx
  11136. mov rax, QWORD PTR [r10+24]
  11137. mov rdx, rsi
  11138. pext rax, rax, rbp
  11139. mov QWORD PTR [r9+16], rcx
  11140. sbb rdx, rax
  11141. mov rcx, QWORD PTR [r10+32]
  11142. mov rax, QWORD PTR [r8+32]
  11143. pext rcx, rcx, rbp
  11144. mov QWORD PTR [r9+24], rdx
  11145. sbb rax, rcx
  11146. mov rdx, QWORD PTR [r10+40]
  11147. mov rcx, QWORD PTR [r8+40]
  11148. pext rdx, rdx, rbp
  11149. mov QWORD PTR [r9+32], rax
  11150. sbb rcx, rdx
  11151. mov rax, QWORD PTR [r10+48]
  11152. mov rdx, QWORD PTR [r8+48]
  11153. pext rax, rax, rbp
  11154. mov QWORD PTR [r9+40], rcx
  11155. sbb rdx, rax
  11156. mov rcx, QWORD PTR [r10+56]
  11157. mov rax, QWORD PTR [r8+56]
  11158. pext rcx, rcx, rbp
  11159. mov QWORD PTR [r9+48], rdx
  11160. sbb rax, rcx
  11161. mov rdx, QWORD PTR [r10+64]
  11162. mov rcx, QWORD PTR [r8+64]
  11163. pext rdx, rdx, rbp
  11164. mov QWORD PTR [r9+56], rax
  11165. sbb rcx, rdx
  11166. mov rax, QWORD PTR [r10+72]
  11167. mov rdx, QWORD PTR [r8+72]
  11168. pext rax, rax, rbp
  11169. mov QWORD PTR [r9+64], rcx
  11170. sbb rdx, rax
  11171. mov rcx, QWORD PTR [r10+80]
  11172. mov rax, QWORD PTR [r8+80]
  11173. pext rcx, rcx, rbp
  11174. mov QWORD PTR [r9+72], rdx
  11175. sbb rax, rcx
  11176. mov rdx, QWORD PTR [r10+88]
  11177. mov rcx, QWORD PTR [r8+88]
  11178. pext rdx, rdx, rbp
  11179. mov QWORD PTR [r9+80], rax
  11180. sbb rcx, rdx
  11181. mov rax, QWORD PTR [r10+96]
  11182. mov rdx, QWORD PTR [r8+96]
  11183. pext rax, rax, rbp
  11184. mov QWORD PTR [r9+88], rcx
  11185. sbb rdx, rax
  11186. mov rcx, QWORD PTR [r10+104]
  11187. mov rax, QWORD PTR [r8+104]
  11188. pext rcx, rcx, rbp
  11189. mov QWORD PTR [r9+96], rdx
  11190. sbb rax, rcx
  11191. mov rdx, QWORD PTR [r10+112]
  11192. mov rcx, QWORD PTR [r8+112]
  11193. pext rdx, rdx, rbp
  11194. mov QWORD PTR [r9+104], rax
  11195. sbb rcx, rdx
  11196. mov rax, QWORD PTR [r10+120]
  11197. mov rdx, QWORD PTR [r8+120]
  11198. pext rax, rax, rbp
  11199. mov QWORD PTR [r9+112], rcx
  11200. sbb rdx, rax
  11201. mov QWORD PTR [r9+120], rdx
  11202. pop rbp
  11203. pop rbx
  11204. pop rsi
  11205. pop rdi
  11206. pop r15
  11207. pop r14
  11208. pop r13
  11209. pop r12
  11210. ret
  11211. sp_2048_mont_reduce_avx2_16 ENDP
  11212. _text ENDS
  11213. ENDIF
  11214. IFNDEF WC_NO_CACHE_RESISTANT
  11215. _text SEGMENT READONLY PARA
  11216. sp_2048_get_from_table_avx2_16 PROC
  11217. sub rsp, 128
  11218. vmovdqu OWORD PTR [rsp], xmm6
  11219. vmovdqu OWORD PTR [rsp+16], xmm7
  11220. vmovdqu OWORD PTR [rsp+32], xmm8
  11221. vmovdqu OWORD PTR [rsp+48], xmm9
  11222. vmovdqu OWORD PTR [rsp+64], xmm10
  11223. vmovdqu OWORD PTR [rsp+80], xmm11
  11224. vmovdqu OWORD PTR [rsp+96], xmm12
  11225. vmovdqu OWORD PTR [rsp+112], xmm13
  11226. mov rax, 1
  11227. movd xmm10, r8
  11228. movd xmm11, rax
  11229. vpxor ymm13, ymm13, ymm13
  11230. vpermd ymm10, ymm13, ymm10
  11231. vpermd ymm11, ymm13, ymm11
  11232. ; START: 0-15
  11233. vpxor ymm13, ymm13, ymm13
  11234. vpxor ymm4, ymm4, ymm4
  11235. vpxor ymm5, ymm5, ymm5
  11236. vpxor ymm6, ymm6, ymm6
  11237. vpxor ymm7, ymm7, ymm7
  11238. ; ENTRY: 0
  11239. mov r9, QWORD PTR [rdx]
  11240. vpcmpeqd ymm12, ymm13, ymm10
  11241. vmovdqu ymm0, YMMWORD PTR [r9]
  11242. vmovdqu ymm1, YMMWORD PTR [r9+32]
  11243. vmovdqu ymm2, YMMWORD PTR [r9+64]
  11244. vmovdqu ymm3, YMMWORD PTR [r9+96]
  11245. vpand ymm0, ymm0, ymm12
  11246. vpand ymm1, ymm1, ymm12
  11247. vpand ymm2, ymm2, ymm12
  11248. vpand ymm3, ymm3, ymm12
  11249. vpor ymm4, ymm4, ymm0
  11250. vpor ymm5, ymm5, ymm1
  11251. vpor ymm6, ymm6, ymm2
  11252. vpor ymm7, ymm7, ymm3
  11253. vpaddd ymm13, ymm13, ymm11
  11254. ; ENTRY: 1
  11255. mov r9, QWORD PTR [rdx+8]
  11256. vpcmpeqd ymm12, ymm13, ymm10
  11257. vmovdqu ymm0, YMMWORD PTR [r9]
  11258. vmovdqu ymm1, YMMWORD PTR [r9+32]
  11259. vmovdqu ymm2, YMMWORD PTR [r9+64]
  11260. vmovdqu ymm3, YMMWORD PTR [r9+96]
  11261. vpand ymm0, ymm0, ymm12
  11262. vpand ymm1, ymm1, ymm12
  11263. vpand ymm2, ymm2, ymm12
  11264. vpand ymm3, ymm3, ymm12
  11265. vpor ymm4, ymm4, ymm0
  11266. vpor ymm5, ymm5, ymm1
  11267. vpor ymm6, ymm6, ymm2
  11268. vpor ymm7, ymm7, ymm3
  11269. vpaddd ymm13, ymm13, ymm11
  11270. ; ENTRY: 2
  11271. mov r9, QWORD PTR [rdx+16]
  11272. vpcmpeqd ymm12, ymm13, ymm10
  11273. vmovdqu ymm0, YMMWORD PTR [r9]
  11274. vmovdqu ymm1, YMMWORD PTR [r9+32]
  11275. vmovdqu ymm2, YMMWORD PTR [r9+64]
  11276. vmovdqu ymm3, YMMWORD PTR [r9+96]
  11277. vpand ymm0, ymm0, ymm12
  11278. vpand ymm1, ymm1, ymm12
  11279. vpand ymm2, ymm2, ymm12
  11280. vpand ymm3, ymm3, ymm12
  11281. vpor ymm4, ymm4, ymm0
  11282. vpor ymm5, ymm5, ymm1
  11283. vpor ymm6, ymm6, ymm2
  11284. vpor ymm7, ymm7, ymm3
  11285. vpaddd ymm13, ymm13, ymm11
  11286. ; ENTRY: 3
  11287. mov r9, QWORD PTR [rdx+24]
  11288. vpcmpeqd ymm12, ymm13, ymm10
  11289. vmovdqu ymm0, YMMWORD PTR [r9]
  11290. vmovdqu ymm1, YMMWORD PTR [r9+32]
  11291. vmovdqu ymm2, YMMWORD PTR [r9+64]
  11292. vmovdqu ymm3, YMMWORD PTR [r9+96]
  11293. vpand ymm0, ymm0, ymm12
  11294. vpand ymm1, ymm1, ymm12
  11295. vpand ymm2, ymm2, ymm12
  11296. vpand ymm3, ymm3, ymm12
  11297. vpor ymm4, ymm4, ymm0
  11298. vpor ymm5, ymm5, ymm1
  11299. vpor ymm6, ymm6, ymm2
  11300. vpor ymm7, ymm7, ymm3
  11301. vpaddd ymm13, ymm13, ymm11
  11302. ; ENTRY: 4
  11303. mov r9, QWORD PTR [rdx+32]
  11304. vpcmpeqd ymm12, ymm13, ymm10
  11305. vmovdqu ymm0, YMMWORD PTR [r9]
  11306. vmovdqu ymm1, YMMWORD PTR [r9+32]
  11307. vmovdqu ymm2, YMMWORD PTR [r9+64]
  11308. vmovdqu ymm3, YMMWORD PTR [r9+96]
  11309. vpand ymm0, ymm0, ymm12
  11310. vpand ymm1, ymm1, ymm12
  11311. vpand ymm2, ymm2, ymm12
  11312. vpand ymm3, ymm3, ymm12
  11313. vpor ymm4, ymm4, ymm0
  11314. vpor ymm5, ymm5, ymm1
  11315. vpor ymm6, ymm6, ymm2
  11316. vpor ymm7, ymm7, ymm3
  11317. vpaddd ymm13, ymm13, ymm11
  11318. ; ENTRY: 5
  11319. mov r9, QWORD PTR [rdx+40]
  11320. vpcmpeqd ymm12, ymm13, ymm10
  11321. vmovdqu ymm0, YMMWORD PTR [r9]
  11322. vmovdqu ymm1, YMMWORD PTR [r9+32]
  11323. vmovdqu ymm2, YMMWORD PTR [r9+64]
  11324. vmovdqu ymm3, YMMWORD PTR [r9+96]
  11325. vpand ymm0, ymm0, ymm12
  11326. vpand ymm1, ymm1, ymm12
  11327. vpand ymm2, ymm2, ymm12
  11328. vpand ymm3, ymm3, ymm12
  11329. vpor ymm4, ymm4, ymm0
  11330. vpor ymm5, ymm5, ymm1
  11331. vpor ymm6, ymm6, ymm2
  11332. vpor ymm7, ymm7, ymm3
  11333. vpaddd ymm13, ymm13, ymm11
  11334. ; ENTRY: 6
  11335. mov r9, QWORD PTR [rdx+48]
  11336. vpcmpeqd ymm12, ymm13, ymm10
  11337. vmovdqu ymm0, YMMWORD PTR [r9]
  11338. vmovdqu ymm1, YMMWORD PTR [r9+32]
  11339. vmovdqu ymm2, YMMWORD PTR [r9+64]
  11340. vmovdqu ymm3, YMMWORD PTR [r9+96]
  11341. vpand ymm0, ymm0, ymm12
  11342. vpand ymm1, ymm1, ymm12
  11343. vpand ymm2, ymm2, ymm12
  11344. vpand ymm3, ymm3, ymm12
  11345. vpor ymm4, ymm4, ymm0
  11346. vpor ymm5, ymm5, ymm1
  11347. vpor ymm6, ymm6, ymm2
  11348. vpor ymm7, ymm7, ymm3
  11349. vpaddd ymm13, ymm13, ymm11
  11350. ; ENTRY: 7
  11351. mov r9, QWORD PTR [rdx+56]
  11352. vpcmpeqd ymm12, ymm13, ymm10
  11353. vmovdqu ymm0, YMMWORD PTR [r9]
  11354. vmovdqu ymm1, YMMWORD PTR [r9+32]
  11355. vmovdqu ymm2, YMMWORD PTR [r9+64]
  11356. vmovdqu ymm3, YMMWORD PTR [r9+96]
  11357. vpand ymm0, ymm0, ymm12
  11358. vpand ymm1, ymm1, ymm12
  11359. vpand ymm2, ymm2, ymm12
  11360. vpand ymm3, ymm3, ymm12
  11361. vpor ymm4, ymm4, ymm0
  11362. vpor ymm5, ymm5, ymm1
  11363. vpor ymm6, ymm6, ymm2
  11364. vpor ymm7, ymm7, ymm3
  11365. vpaddd ymm13, ymm13, ymm11
  11366. ; ENTRY: 8
  11367. mov r9, QWORD PTR [rdx+64]
  11368. vpcmpeqd ymm12, ymm13, ymm10
  11369. vmovdqu ymm0, YMMWORD PTR [r9]
  11370. vmovdqu ymm1, YMMWORD PTR [r9+32]
  11371. vmovdqu ymm2, YMMWORD PTR [r9+64]
  11372. vmovdqu ymm3, YMMWORD PTR [r9+96]
  11373. vpand ymm0, ymm0, ymm12
  11374. vpand ymm1, ymm1, ymm12
  11375. vpand ymm2, ymm2, ymm12
  11376. vpand ymm3, ymm3, ymm12
  11377. vpor ymm4, ymm4, ymm0
  11378. vpor ymm5, ymm5, ymm1
  11379. vpor ymm6, ymm6, ymm2
  11380. vpor ymm7, ymm7, ymm3
  11381. vpaddd ymm13, ymm13, ymm11
  11382. ; ENTRY: 9
  11383. mov r9, QWORD PTR [rdx+72]
  11384. vpcmpeqd ymm12, ymm13, ymm10
  11385. vmovdqu ymm0, YMMWORD PTR [r9]
  11386. vmovdqu ymm1, YMMWORD PTR [r9+32]
  11387. vmovdqu ymm2, YMMWORD PTR [r9+64]
  11388. vmovdqu ymm3, YMMWORD PTR [r9+96]
  11389. vpand ymm0, ymm0, ymm12
  11390. vpand ymm1, ymm1, ymm12
  11391. vpand ymm2, ymm2, ymm12
  11392. vpand ymm3, ymm3, ymm12
  11393. vpor ymm4, ymm4, ymm0
  11394. vpor ymm5, ymm5, ymm1
  11395. vpor ymm6, ymm6, ymm2
  11396. vpor ymm7, ymm7, ymm3
  11397. vpaddd ymm13, ymm13, ymm11
  11398. ; ENTRY: 10
  11399. mov r9, QWORD PTR [rdx+80]
  11400. vpcmpeqd ymm12, ymm13, ymm10
  11401. vmovdqu ymm0, YMMWORD PTR [r9]
  11402. vmovdqu ymm1, YMMWORD PTR [r9+32]
  11403. vmovdqu ymm2, YMMWORD PTR [r9+64]
  11404. vmovdqu ymm3, YMMWORD PTR [r9+96]
  11405. vpand ymm0, ymm0, ymm12
  11406. vpand ymm1, ymm1, ymm12
  11407. vpand ymm2, ymm2, ymm12
  11408. vpand ymm3, ymm3, ymm12
  11409. vpor ymm4, ymm4, ymm0
  11410. vpor ymm5, ymm5, ymm1
  11411. vpor ymm6, ymm6, ymm2
  11412. vpor ymm7, ymm7, ymm3
  11413. vpaddd ymm13, ymm13, ymm11
  11414. ; ENTRY: 11
  11415. mov r9, QWORD PTR [rdx+88]
  11416. vpcmpeqd ymm12, ymm13, ymm10
  11417. vmovdqu ymm0, YMMWORD PTR [r9]
  11418. vmovdqu ymm1, YMMWORD PTR [r9+32]
  11419. vmovdqu ymm2, YMMWORD PTR [r9+64]
  11420. vmovdqu ymm3, YMMWORD PTR [r9+96]
  11421. vpand ymm0, ymm0, ymm12
  11422. vpand ymm1, ymm1, ymm12
  11423. vpand ymm2, ymm2, ymm12
  11424. vpand ymm3, ymm3, ymm12
  11425. vpor ymm4, ymm4, ymm0
  11426. vpor ymm5, ymm5, ymm1
  11427. vpor ymm6, ymm6, ymm2
  11428. vpor ymm7, ymm7, ymm3
  11429. vpaddd ymm13, ymm13, ymm11
  11430. ; ENTRY: 12
  11431. mov r9, QWORD PTR [rdx+96]
  11432. vpcmpeqd ymm12, ymm13, ymm10
  11433. vmovdqu ymm0, YMMWORD PTR [r9]
  11434. vmovdqu ymm1, YMMWORD PTR [r9+32]
  11435. vmovdqu ymm2, YMMWORD PTR [r9+64]
  11436. vmovdqu ymm3, YMMWORD PTR [r9+96]
  11437. vpand ymm0, ymm0, ymm12
  11438. vpand ymm1, ymm1, ymm12
  11439. vpand ymm2, ymm2, ymm12
  11440. vpand ymm3, ymm3, ymm12
  11441. vpor ymm4, ymm4, ymm0
  11442. vpor ymm5, ymm5, ymm1
  11443. vpor ymm6, ymm6, ymm2
  11444. vpor ymm7, ymm7, ymm3
  11445. vpaddd ymm13, ymm13, ymm11
  11446. ; ENTRY: 13
  11447. mov r9, QWORD PTR [rdx+104]
  11448. vpcmpeqd ymm12, ymm13, ymm10
  11449. vmovdqu ymm0, YMMWORD PTR [r9]
  11450. vmovdqu ymm1, YMMWORD PTR [r9+32]
  11451. vmovdqu ymm2, YMMWORD PTR [r9+64]
  11452. vmovdqu ymm3, YMMWORD PTR [r9+96]
  11453. vpand ymm0, ymm0, ymm12
  11454. vpand ymm1, ymm1, ymm12
  11455. vpand ymm2, ymm2, ymm12
  11456. vpand ymm3, ymm3, ymm12
  11457. vpor ymm4, ymm4, ymm0
  11458. vpor ymm5, ymm5, ymm1
  11459. vpor ymm6, ymm6, ymm2
  11460. vpor ymm7, ymm7, ymm3
  11461. vpaddd ymm13, ymm13, ymm11
  11462. ; ENTRY: 14
  11463. mov r9, QWORD PTR [rdx+112]
  11464. vpcmpeqd ymm12, ymm13, ymm10
  11465. vmovdqu ymm0, YMMWORD PTR [r9]
  11466. vmovdqu ymm1, YMMWORD PTR [r9+32]
  11467. vmovdqu ymm2, YMMWORD PTR [r9+64]
  11468. vmovdqu ymm3, YMMWORD PTR [r9+96]
  11469. vpand ymm0, ymm0, ymm12
  11470. vpand ymm1, ymm1, ymm12
  11471. vpand ymm2, ymm2, ymm12
  11472. vpand ymm3, ymm3, ymm12
  11473. vpor ymm4, ymm4, ymm0
  11474. vpor ymm5, ymm5, ymm1
  11475. vpor ymm6, ymm6, ymm2
  11476. vpor ymm7, ymm7, ymm3
  11477. vpaddd ymm13, ymm13, ymm11
  11478. ; ENTRY: 15
  11479. mov r9, QWORD PTR [rdx+120]
  11480. vpcmpeqd ymm12, ymm13, ymm10
  11481. vmovdqu ymm0, YMMWORD PTR [r9]
  11482. vmovdqu ymm1, YMMWORD PTR [r9+32]
  11483. vmovdqu ymm2, YMMWORD PTR [r9+64]
  11484. vmovdqu ymm3, YMMWORD PTR [r9+96]
  11485. vpand ymm0, ymm0, ymm12
  11486. vpand ymm1, ymm1, ymm12
  11487. vpand ymm2, ymm2, ymm12
  11488. vpand ymm3, ymm3, ymm12
  11489. vpor ymm4, ymm4, ymm0
  11490. vpor ymm5, ymm5, ymm1
  11491. vpor ymm6, ymm6, ymm2
  11492. vpor ymm7, ymm7, ymm3
  11493. vpaddd ymm13, ymm13, ymm11
  11494. ; ENTRY: 16
  11495. mov r9, QWORD PTR [rdx+128]
  11496. vpcmpeqd ymm12, ymm13, ymm10
  11497. vmovdqu ymm0, YMMWORD PTR [r9]
  11498. vmovdqu ymm1, YMMWORD PTR [r9+32]
  11499. vmovdqu ymm2, YMMWORD PTR [r9+64]
  11500. vmovdqu ymm3, YMMWORD PTR [r9+96]
  11501. vpand ymm0, ymm0, ymm12
  11502. vpand ymm1, ymm1, ymm12
  11503. vpand ymm2, ymm2, ymm12
  11504. vpand ymm3, ymm3, ymm12
  11505. vpor ymm4, ymm4, ymm0
  11506. vpor ymm5, ymm5, ymm1
  11507. vpor ymm6, ymm6, ymm2
  11508. vpor ymm7, ymm7, ymm3
  11509. vpaddd ymm13, ymm13, ymm11
  11510. ; ENTRY: 17
  11511. mov r9, QWORD PTR [rdx+136]
  11512. vpcmpeqd ymm12, ymm13, ymm10
  11513. vmovdqu ymm0, YMMWORD PTR [r9]
  11514. vmovdqu ymm1, YMMWORD PTR [r9+32]
  11515. vmovdqu ymm2, YMMWORD PTR [r9+64]
  11516. vmovdqu ymm3, YMMWORD PTR [r9+96]
  11517. vpand ymm0, ymm0, ymm12
  11518. vpand ymm1, ymm1, ymm12
  11519. vpand ymm2, ymm2, ymm12
  11520. vpand ymm3, ymm3, ymm12
  11521. vpor ymm4, ymm4, ymm0
  11522. vpor ymm5, ymm5, ymm1
  11523. vpor ymm6, ymm6, ymm2
  11524. vpor ymm7, ymm7, ymm3
  11525. vpaddd ymm13, ymm13, ymm11
  11526. ; ENTRY: 18
  11527. mov r9, QWORD PTR [rdx+144]
  11528. vpcmpeqd ymm12, ymm13, ymm10
  11529. vmovdqu ymm0, YMMWORD PTR [r9]
  11530. vmovdqu ymm1, YMMWORD PTR [r9+32]
  11531. vmovdqu ymm2, YMMWORD PTR [r9+64]
  11532. vmovdqu ymm3, YMMWORD PTR [r9+96]
  11533. vpand ymm0, ymm0, ymm12
  11534. vpand ymm1, ymm1, ymm12
  11535. vpand ymm2, ymm2, ymm12
  11536. vpand ymm3, ymm3, ymm12
  11537. vpor ymm4, ymm4, ymm0
  11538. vpor ymm5, ymm5, ymm1
  11539. vpor ymm6, ymm6, ymm2
  11540. vpor ymm7, ymm7, ymm3
  11541. vpaddd ymm13, ymm13, ymm11
  11542. ; ENTRY: 19
  11543. mov r9, QWORD PTR [rdx+152]
  11544. vpcmpeqd ymm12, ymm13, ymm10
  11545. vmovdqu ymm0, YMMWORD PTR [r9]
  11546. vmovdqu ymm1, YMMWORD PTR [r9+32]
  11547. vmovdqu ymm2, YMMWORD PTR [r9+64]
  11548. vmovdqu ymm3, YMMWORD PTR [r9+96]
  11549. vpand ymm0, ymm0, ymm12
  11550. vpand ymm1, ymm1, ymm12
  11551. vpand ymm2, ymm2, ymm12
  11552. vpand ymm3, ymm3, ymm12
  11553. vpor ymm4, ymm4, ymm0
  11554. vpor ymm5, ymm5, ymm1
  11555. vpor ymm6, ymm6, ymm2
  11556. vpor ymm7, ymm7, ymm3
  11557. vpaddd ymm13, ymm13, ymm11
  11558. ; ENTRY: 20
  11559. mov r9, QWORD PTR [rdx+160]
  11560. vpcmpeqd ymm12, ymm13, ymm10
  11561. vmovdqu ymm0, YMMWORD PTR [r9]
  11562. vmovdqu ymm1, YMMWORD PTR [r9+32]
  11563. vmovdqu ymm2, YMMWORD PTR [r9+64]
  11564. vmovdqu ymm3, YMMWORD PTR [r9+96]
  11565. vpand ymm0, ymm0, ymm12
  11566. vpand ymm1, ymm1, ymm12
  11567. vpand ymm2, ymm2, ymm12
  11568. vpand ymm3, ymm3, ymm12
  11569. vpor ymm4, ymm4, ymm0
  11570. vpor ymm5, ymm5, ymm1
  11571. vpor ymm6, ymm6, ymm2
  11572. vpor ymm7, ymm7, ymm3
  11573. vpaddd ymm13, ymm13, ymm11
  11574. ; ENTRY: 21
  11575. mov r9, QWORD PTR [rdx+168]
  11576. vpcmpeqd ymm12, ymm13, ymm10
  11577. vmovdqu ymm0, YMMWORD PTR [r9]
  11578. vmovdqu ymm1, YMMWORD PTR [r9+32]
  11579. vmovdqu ymm2, YMMWORD PTR [r9+64]
  11580. vmovdqu ymm3, YMMWORD PTR [r9+96]
  11581. vpand ymm0, ymm0, ymm12
  11582. vpand ymm1, ymm1, ymm12
  11583. vpand ymm2, ymm2, ymm12
  11584. vpand ymm3, ymm3, ymm12
  11585. vpor ymm4, ymm4, ymm0
  11586. vpor ymm5, ymm5, ymm1
  11587. vpor ymm6, ymm6, ymm2
  11588. vpor ymm7, ymm7, ymm3
  11589. vpaddd ymm13, ymm13, ymm11
  11590. ; ENTRY: 22
  11591. mov r9, QWORD PTR [rdx+176]
  11592. vpcmpeqd ymm12, ymm13, ymm10
  11593. vmovdqu ymm0, YMMWORD PTR [r9]
  11594. vmovdqu ymm1, YMMWORD PTR [r9+32]
  11595. vmovdqu ymm2, YMMWORD PTR [r9+64]
  11596. vmovdqu ymm3, YMMWORD PTR [r9+96]
  11597. vpand ymm0, ymm0, ymm12
  11598. vpand ymm1, ymm1, ymm12
  11599. vpand ymm2, ymm2, ymm12
  11600. vpand ymm3, ymm3, ymm12
  11601. vpor ymm4, ymm4, ymm0
  11602. vpor ymm5, ymm5, ymm1
  11603. vpor ymm6, ymm6, ymm2
  11604. vpor ymm7, ymm7, ymm3
  11605. vpaddd ymm13, ymm13, ymm11
  11606. ; ENTRY: 23
  11607. mov r9, QWORD PTR [rdx+184]
  11608. vpcmpeqd ymm12, ymm13, ymm10
  11609. vmovdqu ymm0, YMMWORD PTR [r9]
  11610. vmovdqu ymm1, YMMWORD PTR [r9+32]
  11611. vmovdqu ymm2, YMMWORD PTR [r9+64]
  11612. vmovdqu ymm3, YMMWORD PTR [r9+96]
  11613. vpand ymm0, ymm0, ymm12
  11614. vpand ymm1, ymm1, ymm12
  11615. vpand ymm2, ymm2, ymm12
  11616. vpand ymm3, ymm3, ymm12
  11617. vpor ymm4, ymm4, ymm0
  11618. vpor ymm5, ymm5, ymm1
  11619. vpor ymm6, ymm6, ymm2
  11620. vpor ymm7, ymm7, ymm3
  11621. vpaddd ymm13, ymm13, ymm11
  11622. ; ENTRY: 24
  11623. mov r9, QWORD PTR [rdx+192]
  11624. vpcmpeqd ymm12, ymm13, ymm10
  11625. vmovdqu ymm0, YMMWORD PTR [r9]
  11626. vmovdqu ymm1, YMMWORD PTR [r9+32]
  11627. vmovdqu ymm2, YMMWORD PTR [r9+64]
  11628. vmovdqu ymm3, YMMWORD PTR [r9+96]
  11629. vpand ymm0, ymm0, ymm12
  11630. vpand ymm1, ymm1, ymm12
  11631. vpand ymm2, ymm2, ymm12
  11632. vpand ymm3, ymm3, ymm12
  11633. vpor ymm4, ymm4, ymm0
  11634. vpor ymm5, ymm5, ymm1
  11635. vpor ymm6, ymm6, ymm2
  11636. vpor ymm7, ymm7, ymm3
  11637. vpaddd ymm13, ymm13, ymm11
  11638. ; ENTRY: 25
  11639. mov r9, QWORD PTR [rdx+200]
  11640. vpcmpeqd ymm12, ymm13, ymm10
  11641. vmovdqu ymm0, YMMWORD PTR [r9]
  11642. vmovdqu ymm1, YMMWORD PTR [r9+32]
  11643. vmovdqu ymm2, YMMWORD PTR [r9+64]
  11644. vmovdqu ymm3, YMMWORD PTR [r9+96]
  11645. vpand ymm0, ymm0, ymm12
  11646. vpand ymm1, ymm1, ymm12
  11647. vpand ymm2, ymm2, ymm12
  11648. vpand ymm3, ymm3, ymm12
  11649. vpor ymm4, ymm4, ymm0
  11650. vpor ymm5, ymm5, ymm1
  11651. vpor ymm6, ymm6, ymm2
  11652. vpor ymm7, ymm7, ymm3
  11653. vpaddd ymm13, ymm13, ymm11
  11654. ; ENTRY: 26
  11655. mov r9, QWORD PTR [rdx+208]
  11656. vpcmpeqd ymm12, ymm13, ymm10
  11657. vmovdqu ymm0, YMMWORD PTR [r9]
  11658. vmovdqu ymm1, YMMWORD PTR [r9+32]
  11659. vmovdqu ymm2, YMMWORD PTR [r9+64]
  11660. vmovdqu ymm3, YMMWORD PTR [r9+96]
  11661. vpand ymm0, ymm0, ymm12
  11662. vpand ymm1, ymm1, ymm12
  11663. vpand ymm2, ymm2, ymm12
  11664. vpand ymm3, ymm3, ymm12
  11665. vpor ymm4, ymm4, ymm0
  11666. vpor ymm5, ymm5, ymm1
  11667. vpor ymm6, ymm6, ymm2
  11668. vpor ymm7, ymm7, ymm3
  11669. vpaddd ymm13, ymm13, ymm11
  11670. ; ENTRY: 27
  11671. mov r9, QWORD PTR [rdx+216]
  11672. vpcmpeqd ymm12, ymm13, ymm10
  11673. vmovdqu ymm0, YMMWORD PTR [r9]
  11674. vmovdqu ymm1, YMMWORD PTR [r9+32]
  11675. vmovdqu ymm2, YMMWORD PTR [r9+64]
  11676. vmovdqu ymm3, YMMWORD PTR [r9+96]
  11677. vpand ymm0, ymm0, ymm12
  11678. vpand ymm1, ymm1, ymm12
  11679. vpand ymm2, ymm2, ymm12
  11680. vpand ymm3, ymm3, ymm12
  11681. vpor ymm4, ymm4, ymm0
  11682. vpor ymm5, ymm5, ymm1
  11683. vpor ymm6, ymm6, ymm2
  11684. vpor ymm7, ymm7, ymm3
  11685. vpaddd ymm13, ymm13, ymm11
  11686. ; ENTRY: 28
  11687. mov r9, QWORD PTR [rdx+224]
  11688. vpcmpeqd ymm12, ymm13, ymm10
  11689. vmovdqu ymm0, YMMWORD PTR [r9]
  11690. vmovdqu ymm1, YMMWORD PTR [r9+32]
  11691. vmovdqu ymm2, YMMWORD PTR [r9+64]
  11692. vmovdqu ymm3, YMMWORD PTR [r9+96]
  11693. vpand ymm0, ymm0, ymm12
  11694. vpand ymm1, ymm1, ymm12
  11695. vpand ymm2, ymm2, ymm12
  11696. vpand ymm3, ymm3, ymm12
  11697. vpor ymm4, ymm4, ymm0
  11698. vpor ymm5, ymm5, ymm1
  11699. vpor ymm6, ymm6, ymm2
  11700. vpor ymm7, ymm7, ymm3
  11701. vpaddd ymm13, ymm13, ymm11
  11702. ; ENTRY: 29
  11703. mov r9, QWORD PTR [rdx+232]
  11704. vpcmpeqd ymm12, ymm13, ymm10
  11705. vmovdqu ymm0, YMMWORD PTR [r9]
  11706. vmovdqu ymm1, YMMWORD PTR [r9+32]
  11707. vmovdqu ymm2, YMMWORD PTR [r9+64]
  11708. vmovdqu ymm3, YMMWORD PTR [r9+96]
  11709. vpand ymm0, ymm0, ymm12
  11710. vpand ymm1, ymm1, ymm12
  11711. vpand ymm2, ymm2, ymm12
  11712. vpand ymm3, ymm3, ymm12
  11713. vpor ymm4, ymm4, ymm0
  11714. vpor ymm5, ymm5, ymm1
  11715. vpor ymm6, ymm6, ymm2
  11716. vpor ymm7, ymm7, ymm3
  11717. vpaddd ymm13, ymm13, ymm11
  11718. ; ENTRY: 30
  11719. mov r9, QWORD PTR [rdx+240]
  11720. vpcmpeqd ymm12, ymm13, ymm10
  11721. vmovdqu ymm0, YMMWORD PTR [r9]
  11722. vmovdqu ymm1, YMMWORD PTR [r9+32]
  11723. vmovdqu ymm2, YMMWORD PTR [r9+64]
  11724. vmovdqu ymm3, YMMWORD PTR [r9+96]
  11725. vpand ymm0, ymm0, ymm12
  11726. vpand ymm1, ymm1, ymm12
  11727. vpand ymm2, ymm2, ymm12
  11728. vpand ymm3, ymm3, ymm12
  11729. vpor ymm4, ymm4, ymm0
  11730. vpor ymm5, ymm5, ymm1
  11731. vpor ymm6, ymm6, ymm2
  11732. vpor ymm7, ymm7, ymm3
  11733. vpaddd ymm13, ymm13, ymm11
  11734. ; ENTRY: 31
  11735. mov r9, QWORD PTR [rdx+248]
  11736. vpcmpeqd ymm12, ymm13, ymm10
  11737. vmovdqu ymm0, YMMWORD PTR [r9]
  11738. vmovdqu ymm1, YMMWORD PTR [r9+32]
  11739. vmovdqu ymm2, YMMWORD PTR [r9+64]
  11740. vmovdqu ymm3, YMMWORD PTR [r9+96]
  11741. vpand ymm0, ymm0, ymm12
  11742. vpand ymm1, ymm1, ymm12
  11743. vpand ymm2, ymm2, ymm12
  11744. vpand ymm3, ymm3, ymm12
  11745. vpor ymm4, ymm4, ymm0
  11746. vpor ymm5, ymm5, ymm1
  11747. vpor ymm6, ymm6, ymm2
  11748. vpor ymm7, ymm7, ymm3
  11749. vpaddd ymm13, ymm13, ymm11
  11750. vmovdqu YMMWORD PTR [rcx], ymm4
  11751. vmovdqu YMMWORD PTR [rcx+32], ymm5
  11752. vmovdqu YMMWORD PTR [rcx+64], ymm6
  11753. vmovdqu YMMWORD PTR [rcx+96], ymm7
  11754. ; END: 0-15
  11755. vmovdqu xmm6, OWORD PTR [rsp]
  11756. vmovdqu xmm7, OWORD PTR [rsp+16]
  11757. vmovdqu xmm8, OWORD PTR [rsp+32]
  11758. vmovdqu xmm9, OWORD PTR [rsp+48]
  11759. vmovdqu xmm10, OWORD PTR [rsp+64]
  11760. vmovdqu xmm11, OWORD PTR [rsp+80]
  11761. vmovdqu xmm12, OWORD PTR [rsp+96]
  11762. vmovdqu xmm13, OWORD PTR [rsp+112]
  11763. add rsp, 128
  11764. ret
  11765. sp_2048_get_from_table_avx2_16 ENDP
  11766. _text ENDS
  11767. ENDIF
  11768. ; /* Conditionally subtract b from a using the mask m.
  11769. ; * m is -1 to subtract and 0 when not copying.
  11770. ; *
  11771. ; * r A single precision number representing condition subtract result.
  11772. ; * a A single precision number to subtract from.
  11773. ; * b A single precision number to subtract.
  11774. ; * m Mask value to apply.
  11775. ; */
  11776. _text SEGMENT READONLY PARA
  11777. sp_2048_cond_sub_32 PROC
  11778. sub rsp, 256
  11779. mov r10, QWORD PTR [r8]
  11780. mov r11, QWORD PTR [r8+8]
  11781. and r10, r9
  11782. and r11, r9
  11783. mov QWORD PTR [rsp], r10
  11784. mov QWORD PTR [rsp+8], r11
  11785. mov r10, QWORD PTR [r8+16]
  11786. mov r11, QWORD PTR [r8+24]
  11787. and r10, r9
  11788. and r11, r9
  11789. mov QWORD PTR [rsp+16], r10
  11790. mov QWORD PTR [rsp+24], r11
  11791. mov r10, QWORD PTR [r8+32]
  11792. mov r11, QWORD PTR [r8+40]
  11793. and r10, r9
  11794. and r11, r9
  11795. mov QWORD PTR [rsp+32], r10
  11796. mov QWORD PTR [rsp+40], r11
  11797. mov r10, QWORD PTR [r8+48]
  11798. mov r11, QWORD PTR [r8+56]
  11799. and r10, r9
  11800. and r11, r9
  11801. mov QWORD PTR [rsp+48], r10
  11802. mov QWORD PTR [rsp+56], r11
  11803. mov r10, QWORD PTR [r8+64]
  11804. mov r11, QWORD PTR [r8+72]
  11805. and r10, r9
  11806. and r11, r9
  11807. mov QWORD PTR [rsp+64], r10
  11808. mov QWORD PTR [rsp+72], r11
  11809. mov r10, QWORD PTR [r8+80]
  11810. mov r11, QWORD PTR [r8+88]
  11811. and r10, r9
  11812. and r11, r9
  11813. mov QWORD PTR [rsp+80], r10
  11814. mov QWORD PTR [rsp+88], r11
  11815. mov r10, QWORD PTR [r8+96]
  11816. mov r11, QWORD PTR [r8+104]
  11817. and r10, r9
  11818. and r11, r9
  11819. mov QWORD PTR [rsp+96], r10
  11820. mov QWORD PTR [rsp+104], r11
  11821. mov r10, QWORD PTR [r8+112]
  11822. mov r11, QWORD PTR [r8+120]
  11823. and r10, r9
  11824. and r11, r9
  11825. mov QWORD PTR [rsp+112], r10
  11826. mov QWORD PTR [rsp+120], r11
  11827. mov r10, QWORD PTR [r8+128]
  11828. mov r11, QWORD PTR [r8+136]
  11829. and r10, r9
  11830. and r11, r9
  11831. mov QWORD PTR [rsp+128], r10
  11832. mov QWORD PTR [rsp+136], r11
  11833. mov r10, QWORD PTR [r8+144]
  11834. mov r11, QWORD PTR [r8+152]
  11835. and r10, r9
  11836. and r11, r9
  11837. mov QWORD PTR [rsp+144], r10
  11838. mov QWORD PTR [rsp+152], r11
  11839. mov r10, QWORD PTR [r8+160]
  11840. mov r11, QWORD PTR [r8+168]
  11841. and r10, r9
  11842. and r11, r9
  11843. mov QWORD PTR [rsp+160], r10
  11844. mov QWORD PTR [rsp+168], r11
  11845. mov r10, QWORD PTR [r8+176]
  11846. mov r11, QWORD PTR [r8+184]
  11847. and r10, r9
  11848. and r11, r9
  11849. mov QWORD PTR [rsp+176], r10
  11850. mov QWORD PTR [rsp+184], r11
  11851. mov r10, QWORD PTR [r8+192]
  11852. mov r11, QWORD PTR [r8+200]
  11853. and r10, r9
  11854. and r11, r9
  11855. mov QWORD PTR [rsp+192], r10
  11856. mov QWORD PTR [rsp+200], r11
  11857. mov r10, QWORD PTR [r8+208]
  11858. mov r11, QWORD PTR [r8+216]
  11859. and r10, r9
  11860. and r11, r9
  11861. mov QWORD PTR [rsp+208], r10
  11862. mov QWORD PTR [rsp+216], r11
  11863. mov r10, QWORD PTR [r8+224]
  11864. mov r11, QWORD PTR [r8+232]
  11865. and r10, r9
  11866. and r11, r9
  11867. mov QWORD PTR [rsp+224], r10
  11868. mov QWORD PTR [rsp+232], r11
  11869. mov r10, QWORD PTR [r8+240]
  11870. mov r11, QWORD PTR [r8+248]
  11871. and r10, r9
  11872. and r11, r9
  11873. mov QWORD PTR [rsp+240], r10
  11874. mov QWORD PTR [rsp+248], r11
  11875. mov r10, QWORD PTR [rdx]
  11876. mov r8, QWORD PTR [rsp]
  11877. sub r10, r8
  11878. mov r11, QWORD PTR [rdx+8]
  11879. mov r8, QWORD PTR [rsp+8]
  11880. sbb r11, r8
  11881. mov QWORD PTR [rcx], r10
  11882. mov r10, QWORD PTR [rdx+16]
  11883. mov r8, QWORD PTR [rsp+16]
  11884. sbb r10, r8
  11885. mov QWORD PTR [rcx+8], r11
  11886. mov r11, QWORD PTR [rdx+24]
  11887. mov r8, QWORD PTR [rsp+24]
  11888. sbb r11, r8
  11889. mov QWORD PTR [rcx+16], r10
  11890. mov r10, QWORD PTR [rdx+32]
  11891. mov r8, QWORD PTR [rsp+32]
  11892. sbb r10, r8
  11893. mov QWORD PTR [rcx+24], r11
  11894. mov r11, QWORD PTR [rdx+40]
  11895. mov r8, QWORD PTR [rsp+40]
  11896. sbb r11, r8
  11897. mov QWORD PTR [rcx+32], r10
  11898. mov r10, QWORD PTR [rdx+48]
  11899. mov r8, QWORD PTR [rsp+48]
  11900. sbb r10, r8
  11901. mov QWORD PTR [rcx+40], r11
  11902. mov r11, QWORD PTR [rdx+56]
  11903. mov r8, QWORD PTR [rsp+56]
  11904. sbb r11, r8
  11905. mov QWORD PTR [rcx+48], r10
  11906. mov r10, QWORD PTR [rdx+64]
  11907. mov r8, QWORD PTR [rsp+64]
  11908. sbb r10, r8
  11909. mov QWORD PTR [rcx+56], r11
  11910. mov r11, QWORD PTR [rdx+72]
  11911. mov r8, QWORD PTR [rsp+72]
  11912. sbb r11, r8
  11913. mov QWORD PTR [rcx+64], r10
  11914. mov r10, QWORD PTR [rdx+80]
  11915. mov r8, QWORD PTR [rsp+80]
  11916. sbb r10, r8
  11917. mov QWORD PTR [rcx+72], r11
  11918. mov r11, QWORD PTR [rdx+88]
  11919. mov r8, QWORD PTR [rsp+88]
  11920. sbb r11, r8
  11921. mov QWORD PTR [rcx+80], r10
  11922. mov r10, QWORD PTR [rdx+96]
  11923. mov r8, QWORD PTR [rsp+96]
  11924. sbb r10, r8
  11925. mov QWORD PTR [rcx+88], r11
  11926. mov r11, QWORD PTR [rdx+104]
  11927. mov r8, QWORD PTR [rsp+104]
  11928. sbb r11, r8
  11929. mov QWORD PTR [rcx+96], r10
  11930. mov r10, QWORD PTR [rdx+112]
  11931. mov r8, QWORD PTR [rsp+112]
  11932. sbb r10, r8
  11933. mov QWORD PTR [rcx+104], r11
  11934. mov r11, QWORD PTR [rdx+120]
  11935. mov r8, QWORD PTR [rsp+120]
  11936. sbb r11, r8
  11937. mov QWORD PTR [rcx+112], r10
  11938. mov r10, QWORD PTR [rdx+128]
  11939. mov r8, QWORD PTR [rsp+128]
  11940. sbb r10, r8
  11941. mov QWORD PTR [rcx+120], r11
  11942. mov r11, QWORD PTR [rdx+136]
  11943. mov r8, QWORD PTR [rsp+136]
  11944. sbb r11, r8
  11945. mov QWORD PTR [rcx+128], r10
  11946. mov r10, QWORD PTR [rdx+144]
  11947. mov r8, QWORD PTR [rsp+144]
  11948. sbb r10, r8
  11949. mov QWORD PTR [rcx+136], r11
  11950. mov r11, QWORD PTR [rdx+152]
  11951. mov r8, QWORD PTR [rsp+152]
  11952. sbb r11, r8
  11953. mov QWORD PTR [rcx+144], r10
  11954. mov r10, QWORD PTR [rdx+160]
  11955. mov r8, QWORD PTR [rsp+160]
  11956. sbb r10, r8
  11957. mov QWORD PTR [rcx+152], r11
  11958. mov r11, QWORD PTR [rdx+168]
  11959. mov r8, QWORD PTR [rsp+168]
  11960. sbb r11, r8
  11961. mov QWORD PTR [rcx+160], r10
  11962. mov r10, QWORD PTR [rdx+176]
  11963. mov r8, QWORD PTR [rsp+176]
  11964. sbb r10, r8
  11965. mov QWORD PTR [rcx+168], r11
  11966. mov r11, QWORD PTR [rdx+184]
  11967. mov r8, QWORD PTR [rsp+184]
  11968. sbb r11, r8
  11969. mov QWORD PTR [rcx+176], r10
  11970. mov r10, QWORD PTR [rdx+192]
  11971. mov r8, QWORD PTR [rsp+192]
  11972. sbb r10, r8
  11973. mov QWORD PTR [rcx+184], r11
  11974. mov r11, QWORD PTR [rdx+200]
  11975. mov r8, QWORD PTR [rsp+200]
  11976. sbb r11, r8
  11977. mov QWORD PTR [rcx+192], r10
  11978. mov r10, QWORD PTR [rdx+208]
  11979. mov r8, QWORD PTR [rsp+208]
  11980. sbb r10, r8
  11981. mov QWORD PTR [rcx+200], r11
  11982. mov r11, QWORD PTR [rdx+216]
  11983. mov r8, QWORD PTR [rsp+216]
  11984. sbb r11, r8
  11985. mov QWORD PTR [rcx+208], r10
  11986. mov r10, QWORD PTR [rdx+224]
  11987. mov r8, QWORD PTR [rsp+224]
  11988. sbb r10, r8
  11989. mov QWORD PTR [rcx+216], r11
  11990. mov r11, QWORD PTR [rdx+232]
  11991. mov r8, QWORD PTR [rsp+232]
  11992. sbb r11, r8
  11993. mov QWORD PTR [rcx+224], r10
  11994. mov r10, QWORD PTR [rdx+240]
  11995. mov r8, QWORD PTR [rsp+240]
  11996. sbb r10, r8
  11997. mov QWORD PTR [rcx+232], r11
  11998. mov r11, QWORD PTR [rdx+248]
  11999. mov r8, QWORD PTR [rsp+248]
  12000. sbb r11, r8
  12001. mov QWORD PTR [rcx+240], r10
  12002. mov QWORD PTR [rcx+248], r11
  12003. sbb rax, rax
  12004. add rsp, 256
  12005. ret
  12006. sp_2048_cond_sub_32 ENDP
  12007. _text ENDS
  12008. ; /* Reduce the number back to 2048 bits using Montgomery reduction.
  12009. ; *
  12010. ; * a A single precision number to reduce in place.
  12011. ; * m The single precision number representing the modulus.
  12012. ; * mp The digit representing the negative inverse of m mod 2^n.
  12013. ; */
  12014. _text SEGMENT READONLY PARA
  12015. sp_2048_mont_reduce_32 PROC
  12016. push r12
  12017. push r13
  12018. push r14
  12019. push r15
  12020. push rdi
  12021. push rsi
  12022. mov r9, rdx
  12023. xor rsi, rsi
  12024. ; i = 32
  12025. mov r10, 32
  12026. mov r15, QWORD PTR [rcx]
  12027. mov rdi, QWORD PTR [rcx+8]
  12028. L_2048_mont_reduce_32_loop:
  12029. ; mu = a[i] * mp
  12030. mov r13, r15
  12031. imul r13, r8
  12032. ; a[i+0] += m[0] * mu
  12033. mov rax, r13
  12034. xor r12, r12
  12035. mul QWORD PTR [r9]
  12036. add r15, rax
  12037. adc r12, rdx
  12038. ; a[i+1] += m[1] * mu
  12039. mov rax, r13
  12040. xor r11, r11
  12041. mul QWORD PTR [r9+8]
  12042. mov r15, rdi
  12043. add r15, rax
  12044. adc r11, rdx
  12045. add r15, r12
  12046. adc r11, 0
  12047. ; a[i+2] += m[2] * mu
  12048. mov rax, r13
  12049. xor r12, r12
  12050. mul QWORD PTR [r9+16]
  12051. mov rdi, QWORD PTR [rcx+16]
  12052. add rdi, rax
  12053. adc r12, rdx
  12054. add rdi, r11
  12055. adc r12, 0
  12056. ; a[i+3] += m[3] * mu
  12057. mov rax, r13
  12058. xor r11, r11
  12059. mul QWORD PTR [r9+24]
  12060. mov r14, QWORD PTR [rcx+24]
  12061. add r14, rax
  12062. adc r11, rdx
  12063. add r14, r12
  12064. mov QWORD PTR [rcx+24], r14
  12065. adc r11, 0
  12066. ; a[i+4] += m[4] * mu
  12067. mov rax, r13
  12068. xor r12, r12
  12069. mul QWORD PTR [r9+32]
  12070. mov r14, QWORD PTR [rcx+32]
  12071. add r14, rax
  12072. adc r12, rdx
  12073. add r14, r11
  12074. mov QWORD PTR [rcx+32], r14
  12075. adc r12, 0
  12076. ; a[i+5] += m[5] * mu
  12077. mov rax, r13
  12078. xor r11, r11
  12079. mul QWORD PTR [r9+40]
  12080. mov r14, QWORD PTR [rcx+40]
  12081. add r14, rax
  12082. adc r11, rdx
  12083. add r14, r12
  12084. mov QWORD PTR [rcx+40], r14
  12085. adc r11, 0
  12086. ; a[i+6] += m[6] * mu
  12087. mov rax, r13
  12088. xor r12, r12
  12089. mul QWORD PTR [r9+48]
  12090. mov r14, QWORD PTR [rcx+48]
  12091. add r14, rax
  12092. adc r12, rdx
  12093. add r14, r11
  12094. mov QWORD PTR [rcx+48], r14
  12095. adc r12, 0
  12096. ; a[i+7] += m[7] * mu
  12097. mov rax, r13
  12098. xor r11, r11
  12099. mul QWORD PTR [r9+56]
  12100. mov r14, QWORD PTR [rcx+56]
  12101. add r14, rax
  12102. adc r11, rdx
  12103. add r14, r12
  12104. mov QWORD PTR [rcx+56], r14
  12105. adc r11, 0
  12106. ; a[i+8] += m[8] * mu
  12107. mov rax, r13
  12108. xor r12, r12
  12109. mul QWORD PTR [r9+64]
  12110. mov r14, QWORD PTR [rcx+64]
  12111. add r14, rax
  12112. adc r12, rdx
  12113. add r14, r11
  12114. mov QWORD PTR [rcx+64], r14
  12115. adc r12, 0
  12116. ; a[i+9] += m[9] * mu
  12117. mov rax, r13
  12118. xor r11, r11
  12119. mul QWORD PTR [r9+72]
  12120. mov r14, QWORD PTR [rcx+72]
  12121. add r14, rax
  12122. adc r11, rdx
  12123. add r14, r12
  12124. mov QWORD PTR [rcx+72], r14
  12125. adc r11, 0
  12126. ; a[i+10] += m[10] * mu
  12127. mov rax, r13
  12128. xor r12, r12
  12129. mul QWORD PTR [r9+80]
  12130. mov r14, QWORD PTR [rcx+80]
  12131. add r14, rax
  12132. adc r12, rdx
  12133. add r14, r11
  12134. mov QWORD PTR [rcx+80], r14
  12135. adc r12, 0
  12136. ; a[i+11] += m[11] * mu
  12137. mov rax, r13
  12138. xor r11, r11
  12139. mul QWORD PTR [r9+88]
  12140. mov r14, QWORD PTR [rcx+88]
  12141. add r14, rax
  12142. adc r11, rdx
  12143. add r14, r12
  12144. mov QWORD PTR [rcx+88], r14
  12145. adc r11, 0
  12146. ; a[i+12] += m[12] * mu
  12147. mov rax, r13
  12148. xor r12, r12
  12149. mul QWORD PTR [r9+96]
  12150. mov r14, QWORD PTR [rcx+96]
  12151. add r14, rax
  12152. adc r12, rdx
  12153. add r14, r11
  12154. mov QWORD PTR [rcx+96], r14
  12155. adc r12, 0
  12156. ; a[i+13] += m[13] * mu
  12157. mov rax, r13
  12158. xor r11, r11
  12159. mul QWORD PTR [r9+104]
  12160. mov r14, QWORD PTR [rcx+104]
  12161. add r14, rax
  12162. adc r11, rdx
  12163. add r14, r12
  12164. mov QWORD PTR [rcx+104], r14
  12165. adc r11, 0
  12166. ; a[i+14] += m[14] * mu
  12167. mov rax, r13
  12168. xor r12, r12
  12169. mul QWORD PTR [r9+112]
  12170. mov r14, QWORD PTR [rcx+112]
  12171. add r14, rax
  12172. adc r12, rdx
  12173. add r14, r11
  12174. mov QWORD PTR [rcx+112], r14
  12175. adc r12, 0
  12176. ; a[i+15] += m[15] * mu
  12177. mov rax, r13
  12178. xor r11, r11
  12179. mul QWORD PTR [r9+120]
  12180. mov r14, QWORD PTR [rcx+120]
  12181. add r14, rax
  12182. adc r11, rdx
  12183. add r14, r12
  12184. mov QWORD PTR [rcx+120], r14
  12185. adc r11, 0
  12186. ; a[i+16] += m[16] * mu
  12187. mov rax, r13
  12188. xor r12, r12
  12189. mul QWORD PTR [r9+128]
  12190. mov r14, QWORD PTR [rcx+128]
  12191. add r14, rax
  12192. adc r12, rdx
  12193. add r14, r11
  12194. mov QWORD PTR [rcx+128], r14
  12195. adc r12, 0
  12196. ; a[i+17] += m[17] * mu
  12197. mov rax, r13
  12198. xor r11, r11
  12199. mul QWORD PTR [r9+136]
  12200. mov r14, QWORD PTR [rcx+136]
  12201. add r14, rax
  12202. adc r11, rdx
  12203. add r14, r12
  12204. mov QWORD PTR [rcx+136], r14
  12205. adc r11, 0
  12206. ; a[i+18] += m[18] * mu
  12207. mov rax, r13
  12208. xor r12, r12
  12209. mul QWORD PTR [r9+144]
  12210. mov r14, QWORD PTR [rcx+144]
  12211. add r14, rax
  12212. adc r12, rdx
  12213. add r14, r11
  12214. mov QWORD PTR [rcx+144], r14
  12215. adc r12, 0
  12216. ; a[i+19] += m[19] * mu
  12217. mov rax, r13
  12218. xor r11, r11
  12219. mul QWORD PTR [r9+152]
  12220. mov r14, QWORD PTR [rcx+152]
  12221. add r14, rax
  12222. adc r11, rdx
  12223. add r14, r12
  12224. mov QWORD PTR [rcx+152], r14
  12225. adc r11, 0
  12226. ; a[i+20] += m[20] * mu
  12227. mov rax, r13
  12228. xor r12, r12
  12229. mul QWORD PTR [r9+160]
  12230. mov r14, QWORD PTR [rcx+160]
  12231. add r14, rax
  12232. adc r12, rdx
  12233. add r14, r11
  12234. mov QWORD PTR [rcx+160], r14
  12235. adc r12, 0
  12236. ; a[i+21] += m[21] * mu
  12237. mov rax, r13
  12238. xor r11, r11
  12239. mul QWORD PTR [r9+168]
  12240. mov r14, QWORD PTR [rcx+168]
  12241. add r14, rax
  12242. adc r11, rdx
  12243. add r14, r12
  12244. mov QWORD PTR [rcx+168], r14
  12245. adc r11, 0
  12246. ; a[i+22] += m[22] * mu
  12247. mov rax, r13
  12248. xor r12, r12
  12249. mul QWORD PTR [r9+176]
  12250. mov r14, QWORD PTR [rcx+176]
  12251. add r14, rax
  12252. adc r12, rdx
  12253. add r14, r11
  12254. mov QWORD PTR [rcx+176], r14
  12255. adc r12, 0
  12256. ; a[i+23] += m[23] * mu
  12257. mov rax, r13
  12258. xor r11, r11
  12259. mul QWORD PTR [r9+184]
  12260. mov r14, QWORD PTR [rcx+184]
  12261. add r14, rax
  12262. adc r11, rdx
  12263. add r14, r12
  12264. mov QWORD PTR [rcx+184], r14
  12265. adc r11, 0
  12266. ; a[i+24] += m[24] * mu
  12267. mov rax, r13
  12268. xor r12, r12
  12269. mul QWORD PTR [r9+192]
  12270. mov r14, QWORD PTR [rcx+192]
  12271. add r14, rax
  12272. adc r12, rdx
  12273. add r14, r11
  12274. mov QWORD PTR [rcx+192], r14
  12275. adc r12, 0
  12276. ; a[i+25] += m[25] * mu
  12277. mov rax, r13
  12278. xor r11, r11
  12279. mul QWORD PTR [r9+200]
  12280. mov r14, QWORD PTR [rcx+200]
  12281. add r14, rax
  12282. adc r11, rdx
  12283. add r14, r12
  12284. mov QWORD PTR [rcx+200], r14
  12285. adc r11, 0
  12286. ; a[i+26] += m[26] * mu
  12287. mov rax, r13
  12288. xor r12, r12
  12289. mul QWORD PTR [r9+208]
  12290. mov r14, QWORD PTR [rcx+208]
  12291. add r14, rax
  12292. adc r12, rdx
  12293. add r14, r11
  12294. mov QWORD PTR [rcx+208], r14
  12295. adc r12, 0
  12296. ; a[i+27] += m[27] * mu
  12297. mov rax, r13
  12298. xor r11, r11
  12299. mul QWORD PTR [r9+216]
  12300. mov r14, QWORD PTR [rcx+216]
  12301. add r14, rax
  12302. adc r11, rdx
  12303. add r14, r12
  12304. mov QWORD PTR [rcx+216], r14
  12305. adc r11, 0
  12306. ; a[i+28] += m[28] * mu
  12307. mov rax, r13
  12308. xor r12, r12
  12309. mul QWORD PTR [r9+224]
  12310. mov r14, QWORD PTR [rcx+224]
  12311. add r14, rax
  12312. adc r12, rdx
  12313. add r14, r11
  12314. mov QWORD PTR [rcx+224], r14
  12315. adc r12, 0
  12316. ; a[i+29] += m[29] * mu
  12317. mov rax, r13
  12318. xor r11, r11
  12319. mul QWORD PTR [r9+232]
  12320. mov r14, QWORD PTR [rcx+232]
  12321. add r14, rax
  12322. adc r11, rdx
  12323. add r14, r12
  12324. mov QWORD PTR [rcx+232], r14
  12325. adc r11, 0
  12326. ; a[i+30] += m[30] * mu
  12327. mov rax, r13
  12328. xor r12, r12
  12329. mul QWORD PTR [r9+240]
  12330. mov r14, QWORD PTR [rcx+240]
  12331. add r14, rax
  12332. adc r12, rdx
  12333. add r14, r11
  12334. mov QWORD PTR [rcx+240], r14
  12335. adc r12, 0
  12336. ; a[i+31] += m[31] * mu
  12337. mov rax, r13
  12338. mul QWORD PTR [r9+248]
  12339. mov r14, QWORD PTR [rcx+248]
  12340. add r12, rax
  12341. adc rdx, rsi
  12342. mov rsi, 0
  12343. adc rsi, 0
  12344. add r14, r12
  12345. mov QWORD PTR [rcx+248], r14
  12346. adc QWORD PTR [rcx+256], rdx
  12347. adc rsi, 0
  12348. ; i -= 1
  12349. add rcx, 8
  12350. dec r10
  12351. jnz L_2048_mont_reduce_32_loop
  12352. mov QWORD PTR [rcx], r15
  12353. mov QWORD PTR [rcx+8], rdi
  12354. neg rsi
  12355. IFDEF _WIN64
  12356. mov r8, r9
  12357. mov r9, rsi
  12358. ELSE
  12359. mov r9, rsi
  12360. mov r8, r9
  12361. ENDIF
  12362. mov rdx, rcx
  12363. mov rcx, rcx
  12364. sub rcx, 256
  12365. call sp_2048_cond_sub_32
  12366. pop rsi
  12367. pop rdi
  12368. pop r15
  12369. pop r14
  12370. pop r13
  12371. pop r12
  12372. ret
  12373. sp_2048_mont_reduce_32 ENDP
  12374. _text ENDS
  12375. ; /* Sub b from a into r. (r = a - b)
  12376. ; *
  12377. ; * r A single precision integer.
  12378. ; * a A single precision integer.
  12379. ; * b A single precision integer.
  12380. ; */
  12381. _text SEGMENT READONLY PARA
  12382. sp_2048_sub_32 PROC
  12383. mov r9, QWORD PTR [rdx]
  12384. sub r9, QWORD PTR [r8]
  12385. mov r10, QWORD PTR [rdx+8]
  12386. mov QWORD PTR [rcx], r9
  12387. sbb r10, QWORD PTR [r8+8]
  12388. mov r9, QWORD PTR [rdx+16]
  12389. mov QWORD PTR [rcx+8], r10
  12390. sbb r9, QWORD PTR [r8+16]
  12391. mov r10, QWORD PTR [rdx+24]
  12392. mov QWORD PTR [rcx+16], r9
  12393. sbb r10, QWORD PTR [r8+24]
  12394. mov r9, QWORD PTR [rdx+32]
  12395. mov QWORD PTR [rcx+24], r10
  12396. sbb r9, QWORD PTR [r8+32]
  12397. mov r10, QWORD PTR [rdx+40]
  12398. mov QWORD PTR [rcx+32], r9
  12399. sbb r10, QWORD PTR [r8+40]
  12400. mov r9, QWORD PTR [rdx+48]
  12401. mov QWORD PTR [rcx+40], r10
  12402. sbb r9, QWORD PTR [r8+48]
  12403. mov r10, QWORD PTR [rdx+56]
  12404. mov QWORD PTR [rcx+48], r9
  12405. sbb r10, QWORD PTR [r8+56]
  12406. mov r9, QWORD PTR [rdx+64]
  12407. mov QWORD PTR [rcx+56], r10
  12408. sbb r9, QWORD PTR [r8+64]
  12409. mov r10, QWORD PTR [rdx+72]
  12410. mov QWORD PTR [rcx+64], r9
  12411. sbb r10, QWORD PTR [r8+72]
  12412. mov r9, QWORD PTR [rdx+80]
  12413. mov QWORD PTR [rcx+72], r10
  12414. sbb r9, QWORD PTR [r8+80]
  12415. mov r10, QWORD PTR [rdx+88]
  12416. mov QWORD PTR [rcx+80], r9
  12417. sbb r10, QWORD PTR [r8+88]
  12418. mov r9, QWORD PTR [rdx+96]
  12419. mov QWORD PTR [rcx+88], r10
  12420. sbb r9, QWORD PTR [r8+96]
  12421. mov r10, QWORD PTR [rdx+104]
  12422. mov QWORD PTR [rcx+96], r9
  12423. sbb r10, QWORD PTR [r8+104]
  12424. mov r9, QWORD PTR [rdx+112]
  12425. mov QWORD PTR [rcx+104], r10
  12426. sbb r9, QWORD PTR [r8+112]
  12427. mov r10, QWORD PTR [rdx+120]
  12428. mov QWORD PTR [rcx+112], r9
  12429. sbb r10, QWORD PTR [r8+120]
  12430. mov r9, QWORD PTR [rdx+128]
  12431. mov QWORD PTR [rcx+120], r10
  12432. sbb r9, QWORD PTR [r8+128]
  12433. mov r10, QWORD PTR [rdx+136]
  12434. mov QWORD PTR [rcx+128], r9
  12435. sbb r10, QWORD PTR [r8+136]
  12436. mov r9, QWORD PTR [rdx+144]
  12437. mov QWORD PTR [rcx+136], r10
  12438. sbb r9, QWORD PTR [r8+144]
  12439. mov r10, QWORD PTR [rdx+152]
  12440. mov QWORD PTR [rcx+144], r9
  12441. sbb r10, QWORD PTR [r8+152]
  12442. mov r9, QWORD PTR [rdx+160]
  12443. mov QWORD PTR [rcx+152], r10
  12444. sbb r9, QWORD PTR [r8+160]
  12445. mov r10, QWORD PTR [rdx+168]
  12446. mov QWORD PTR [rcx+160], r9
  12447. sbb r10, QWORD PTR [r8+168]
  12448. mov r9, QWORD PTR [rdx+176]
  12449. mov QWORD PTR [rcx+168], r10
  12450. sbb r9, QWORD PTR [r8+176]
  12451. mov r10, QWORD PTR [rdx+184]
  12452. mov QWORD PTR [rcx+176], r9
  12453. sbb r10, QWORD PTR [r8+184]
  12454. mov r9, QWORD PTR [rdx+192]
  12455. mov QWORD PTR [rcx+184], r10
  12456. sbb r9, QWORD PTR [r8+192]
  12457. mov r10, QWORD PTR [rdx+200]
  12458. mov QWORD PTR [rcx+192], r9
  12459. sbb r10, QWORD PTR [r8+200]
  12460. mov r9, QWORD PTR [rdx+208]
  12461. mov QWORD PTR [rcx+200], r10
  12462. sbb r9, QWORD PTR [r8+208]
  12463. mov r10, QWORD PTR [rdx+216]
  12464. mov QWORD PTR [rcx+208], r9
  12465. sbb r10, QWORD PTR [r8+216]
  12466. mov r9, QWORD PTR [rdx+224]
  12467. mov QWORD PTR [rcx+216], r10
  12468. sbb r9, QWORD PTR [r8+224]
  12469. mov r10, QWORD PTR [rdx+232]
  12470. mov QWORD PTR [rcx+224], r9
  12471. sbb r10, QWORD PTR [r8+232]
  12472. mov r9, QWORD PTR [rdx+240]
  12473. mov QWORD PTR [rcx+232], r10
  12474. sbb r9, QWORD PTR [r8+240]
  12475. mov r10, QWORD PTR [rdx+248]
  12476. mov QWORD PTR [rcx+240], r9
  12477. sbb r10, QWORD PTR [r8+248]
  12478. mov QWORD PTR [rcx+248], r10
  12479. sbb rax, rax
  12480. ret
  12481. sp_2048_sub_32 ENDP
  12482. _text ENDS
  12483. IFDEF HAVE_INTEL_AVX2
  12484. ; /* Mul a by digit b into r. (r = a * b)
  12485. ; *
  12486. ; * r A single precision integer.
  12487. ; * a A single precision integer.
  12488. ; * b A single precision digit.
  12489. ; */
  12490. _text SEGMENT READONLY PARA
  12491. sp_2048_mul_d_avx2_32 PROC
  12492. push r12
  12493. push r13
  12494. mov rax, rdx
  12495. ; A[0] * B
  12496. mov rdx, r8
  12497. xor r13, r13
  12498. mulx r12, r11, QWORD PTR [rax]
  12499. mov QWORD PTR [rcx], r11
  12500. ; A[1] * B
  12501. mulx r10, r9, QWORD PTR [rax+8]
  12502. mov r11, r13
  12503. adcx r12, r9
  12504. adox r11, r10
  12505. mov QWORD PTR [rcx+8], r12
  12506. ; A[2] * B
  12507. mulx r10, r9, QWORD PTR [rax+16]
  12508. mov r12, r13
  12509. adcx r11, r9
  12510. adox r12, r10
  12511. mov QWORD PTR [rcx+16], r11
  12512. ; A[3] * B
  12513. mulx r10, r9, QWORD PTR [rax+24]
  12514. mov r11, r13
  12515. adcx r12, r9
  12516. adox r11, r10
  12517. mov QWORD PTR [rcx+24], r12
  12518. ; A[4] * B
  12519. mulx r10, r9, QWORD PTR [rax+32]
  12520. mov r12, r13
  12521. adcx r11, r9
  12522. adox r12, r10
  12523. mov QWORD PTR [rcx+32], r11
  12524. ; A[5] * B
  12525. mulx r10, r9, QWORD PTR [rax+40]
  12526. mov r11, r13
  12527. adcx r12, r9
  12528. adox r11, r10
  12529. mov QWORD PTR [rcx+40], r12
  12530. ; A[6] * B
  12531. mulx r10, r9, QWORD PTR [rax+48]
  12532. mov r12, r13
  12533. adcx r11, r9
  12534. adox r12, r10
  12535. mov QWORD PTR [rcx+48], r11
  12536. ; A[7] * B
  12537. mulx r10, r9, QWORD PTR [rax+56]
  12538. mov r11, r13
  12539. adcx r12, r9
  12540. adox r11, r10
  12541. mov QWORD PTR [rcx+56], r12
  12542. ; A[8] * B
  12543. mulx r10, r9, QWORD PTR [rax+64]
  12544. mov r12, r13
  12545. adcx r11, r9
  12546. adox r12, r10
  12547. mov QWORD PTR [rcx+64], r11
  12548. ; A[9] * B
  12549. mulx r10, r9, QWORD PTR [rax+72]
  12550. mov r11, r13
  12551. adcx r12, r9
  12552. adox r11, r10
  12553. mov QWORD PTR [rcx+72], r12
  12554. ; A[10] * B
  12555. mulx r10, r9, QWORD PTR [rax+80]
  12556. mov r12, r13
  12557. adcx r11, r9
  12558. adox r12, r10
  12559. mov QWORD PTR [rcx+80], r11
  12560. ; A[11] * B
  12561. mulx r10, r9, QWORD PTR [rax+88]
  12562. mov r11, r13
  12563. adcx r12, r9
  12564. adox r11, r10
  12565. mov QWORD PTR [rcx+88], r12
  12566. ; A[12] * B
  12567. mulx r10, r9, QWORD PTR [rax+96]
  12568. mov r12, r13
  12569. adcx r11, r9
  12570. adox r12, r10
  12571. mov QWORD PTR [rcx+96], r11
  12572. ; A[13] * B
  12573. mulx r10, r9, QWORD PTR [rax+104]
  12574. mov r11, r13
  12575. adcx r12, r9
  12576. adox r11, r10
  12577. mov QWORD PTR [rcx+104], r12
  12578. ; A[14] * B
  12579. mulx r10, r9, QWORD PTR [rax+112]
  12580. mov r12, r13
  12581. adcx r11, r9
  12582. adox r12, r10
  12583. mov QWORD PTR [rcx+112], r11
  12584. ; A[15] * B
  12585. mulx r10, r9, QWORD PTR [rax+120]
  12586. mov r11, r13
  12587. adcx r12, r9
  12588. adox r11, r10
  12589. mov QWORD PTR [rcx+120], r12
  12590. ; A[16] * B
  12591. mulx r10, r9, QWORD PTR [rax+128]
  12592. mov r12, r13
  12593. adcx r11, r9
  12594. adox r12, r10
  12595. mov QWORD PTR [rcx+128], r11
  12596. ; A[17] * B
  12597. mulx r10, r9, QWORD PTR [rax+136]
  12598. mov r11, r13
  12599. adcx r12, r9
  12600. adox r11, r10
  12601. mov QWORD PTR [rcx+136], r12
  12602. ; A[18] * B
  12603. mulx r10, r9, QWORD PTR [rax+144]
  12604. mov r12, r13
  12605. adcx r11, r9
  12606. adox r12, r10
  12607. mov QWORD PTR [rcx+144], r11
  12608. ; A[19] * B
  12609. mulx r10, r9, QWORD PTR [rax+152]
  12610. mov r11, r13
  12611. adcx r12, r9
  12612. adox r11, r10
  12613. mov QWORD PTR [rcx+152], r12
  12614. ; A[20] * B
  12615. mulx r10, r9, QWORD PTR [rax+160]
  12616. mov r12, r13
  12617. adcx r11, r9
  12618. adox r12, r10
  12619. mov QWORD PTR [rcx+160], r11
  12620. ; A[21] * B
  12621. mulx r10, r9, QWORD PTR [rax+168]
  12622. mov r11, r13
  12623. adcx r12, r9
  12624. adox r11, r10
  12625. mov QWORD PTR [rcx+168], r12
  12626. ; A[22] * B
  12627. mulx r10, r9, QWORD PTR [rax+176]
  12628. mov r12, r13
  12629. adcx r11, r9
  12630. adox r12, r10
  12631. mov QWORD PTR [rcx+176], r11
  12632. ; A[23] * B
  12633. mulx r10, r9, QWORD PTR [rax+184]
  12634. mov r11, r13
  12635. adcx r12, r9
  12636. adox r11, r10
  12637. mov QWORD PTR [rcx+184], r12
  12638. ; A[24] * B
  12639. mulx r10, r9, QWORD PTR [rax+192]
  12640. mov r12, r13
  12641. adcx r11, r9
  12642. adox r12, r10
  12643. mov QWORD PTR [rcx+192], r11
  12644. ; A[25] * B
  12645. mulx r10, r9, QWORD PTR [rax+200]
  12646. mov r11, r13
  12647. adcx r12, r9
  12648. adox r11, r10
  12649. mov QWORD PTR [rcx+200], r12
  12650. ; A[26] * B
  12651. mulx r10, r9, QWORD PTR [rax+208]
  12652. mov r12, r13
  12653. adcx r11, r9
  12654. adox r12, r10
  12655. mov QWORD PTR [rcx+208], r11
  12656. ; A[27] * B
  12657. mulx r10, r9, QWORD PTR [rax+216]
  12658. mov r11, r13
  12659. adcx r12, r9
  12660. adox r11, r10
  12661. mov QWORD PTR [rcx+216], r12
  12662. ; A[28] * B
  12663. mulx r10, r9, QWORD PTR [rax+224]
  12664. mov r12, r13
  12665. adcx r11, r9
  12666. adox r12, r10
  12667. mov QWORD PTR [rcx+224], r11
  12668. ; A[29] * B
  12669. mulx r10, r9, QWORD PTR [rax+232]
  12670. mov r11, r13
  12671. adcx r12, r9
  12672. adox r11, r10
  12673. mov QWORD PTR [rcx+232], r12
  12674. ; A[30] * B
  12675. mulx r10, r9, QWORD PTR [rax+240]
  12676. mov r12, r13
  12677. adcx r11, r9
  12678. adox r12, r10
  12679. mov QWORD PTR [rcx+240], r11
  12680. ; A[31] * B
  12681. mulx r10, r9, QWORD PTR [rax+248]
  12682. mov r11, r13
  12683. adcx r12, r9
  12684. adox r11, r10
  12685. adcx r11, r13
  12686. mov QWORD PTR [rcx+248], r12
  12687. mov QWORD PTR [rcx+256], r11
  12688. pop r13
  12689. pop r12
  12690. ret
  12691. sp_2048_mul_d_avx2_32 ENDP
  12692. _text ENDS
  12693. ENDIF
  12694. IFDEF _WIN64
  12695. ; /* Divide the double width number (d1|d0) by the dividend. (d1|d0 / div)
  12696. ; *
  12697. ; * d1 The high order half of the number to divide.
  12698. ; * d0 The low order half of the number to divide.
  12699. ; * div The dividend.
  12700. ; * returns the result of the division.
  12701. ; */
  12702. _text SEGMENT READONLY PARA
  12703. div_2048_word_asm_32 PROC
  12704. mov r9, rdx
  12705. mov rax, r9
  12706. mov rdx, rcx
  12707. div r8
  12708. ret
  12709. div_2048_word_asm_32 ENDP
  12710. _text ENDS
  12711. ENDIF
  12712. IFDEF HAVE_INTEL_AVX2
  12713. ; /* Conditionally subtract b from a using the mask m.
  12714. ; * m is -1 to subtract and 0 when not copying.
  12715. ; *
  12716. ; * r A single precision number representing condition subtract result.
  12717. ; * a A single precision number to subtract from.
  12718. ; * b A single precision number to subtract.
  12719. ; * m Mask value to apply.
  12720. ; */
  12721. _text SEGMENT READONLY PARA
  12722. sp_2048_cond_sub_avx2_32 PROC
  12723. push r12
  12724. mov r12, QWORD PTR [r8]
  12725. mov r10, QWORD PTR [rdx]
  12726. pext r12, r12, r9
  12727. sub r10, r12
  12728. mov r12, QWORD PTR [r8+8]
  12729. mov r11, QWORD PTR [rdx+8]
  12730. pext r12, r12, r9
  12731. mov QWORD PTR [rcx], r10
  12732. sbb r11, r12
  12733. mov r10, QWORD PTR [r8+16]
  12734. mov r12, QWORD PTR [rdx+16]
  12735. pext r10, r10, r9
  12736. mov QWORD PTR [rcx+8], r11
  12737. sbb r12, r10
  12738. mov r11, QWORD PTR [r8+24]
  12739. mov r10, QWORD PTR [rdx+24]
  12740. pext r11, r11, r9
  12741. mov QWORD PTR [rcx+16], r12
  12742. sbb r10, r11
  12743. mov r12, QWORD PTR [r8+32]
  12744. mov r11, QWORD PTR [rdx+32]
  12745. pext r12, r12, r9
  12746. mov QWORD PTR [rcx+24], r10
  12747. sbb r11, r12
  12748. mov r10, QWORD PTR [r8+40]
  12749. mov r12, QWORD PTR [rdx+40]
  12750. pext r10, r10, r9
  12751. mov QWORD PTR [rcx+32], r11
  12752. sbb r12, r10
  12753. mov r11, QWORD PTR [r8+48]
  12754. mov r10, QWORD PTR [rdx+48]
  12755. pext r11, r11, r9
  12756. mov QWORD PTR [rcx+40], r12
  12757. sbb r10, r11
  12758. mov r12, QWORD PTR [r8+56]
  12759. mov r11, QWORD PTR [rdx+56]
  12760. pext r12, r12, r9
  12761. mov QWORD PTR [rcx+48], r10
  12762. sbb r11, r12
  12763. mov r10, QWORD PTR [r8+64]
  12764. mov r12, QWORD PTR [rdx+64]
  12765. pext r10, r10, r9
  12766. mov QWORD PTR [rcx+56], r11
  12767. sbb r12, r10
  12768. mov r11, QWORD PTR [r8+72]
  12769. mov r10, QWORD PTR [rdx+72]
  12770. pext r11, r11, r9
  12771. mov QWORD PTR [rcx+64], r12
  12772. sbb r10, r11
  12773. mov r12, QWORD PTR [r8+80]
  12774. mov r11, QWORD PTR [rdx+80]
  12775. pext r12, r12, r9
  12776. mov QWORD PTR [rcx+72], r10
  12777. sbb r11, r12
  12778. mov r10, QWORD PTR [r8+88]
  12779. mov r12, QWORD PTR [rdx+88]
  12780. pext r10, r10, r9
  12781. mov QWORD PTR [rcx+80], r11
  12782. sbb r12, r10
  12783. mov r11, QWORD PTR [r8+96]
  12784. mov r10, QWORD PTR [rdx+96]
  12785. pext r11, r11, r9
  12786. mov QWORD PTR [rcx+88], r12
  12787. sbb r10, r11
  12788. mov r12, QWORD PTR [r8+104]
  12789. mov r11, QWORD PTR [rdx+104]
  12790. pext r12, r12, r9
  12791. mov QWORD PTR [rcx+96], r10
  12792. sbb r11, r12
  12793. mov r10, QWORD PTR [r8+112]
  12794. mov r12, QWORD PTR [rdx+112]
  12795. pext r10, r10, r9
  12796. mov QWORD PTR [rcx+104], r11
  12797. sbb r12, r10
  12798. mov r11, QWORD PTR [r8+120]
  12799. mov r10, QWORD PTR [rdx+120]
  12800. pext r11, r11, r9
  12801. mov QWORD PTR [rcx+112], r12
  12802. sbb r10, r11
  12803. mov r12, QWORD PTR [r8+128]
  12804. mov r11, QWORD PTR [rdx+128]
  12805. pext r12, r12, r9
  12806. mov QWORD PTR [rcx+120], r10
  12807. sbb r11, r12
  12808. mov r10, QWORD PTR [r8+136]
  12809. mov r12, QWORD PTR [rdx+136]
  12810. pext r10, r10, r9
  12811. mov QWORD PTR [rcx+128], r11
  12812. sbb r12, r10
  12813. mov r11, QWORD PTR [r8+144]
  12814. mov r10, QWORD PTR [rdx+144]
  12815. pext r11, r11, r9
  12816. mov QWORD PTR [rcx+136], r12
  12817. sbb r10, r11
  12818. mov r12, QWORD PTR [r8+152]
  12819. mov r11, QWORD PTR [rdx+152]
  12820. pext r12, r12, r9
  12821. mov QWORD PTR [rcx+144], r10
  12822. sbb r11, r12
  12823. mov r10, QWORD PTR [r8+160]
  12824. mov r12, QWORD PTR [rdx+160]
  12825. pext r10, r10, r9
  12826. mov QWORD PTR [rcx+152], r11
  12827. sbb r12, r10
  12828. mov r11, QWORD PTR [r8+168]
  12829. mov r10, QWORD PTR [rdx+168]
  12830. pext r11, r11, r9
  12831. mov QWORD PTR [rcx+160], r12
  12832. sbb r10, r11
  12833. mov r12, QWORD PTR [r8+176]
  12834. mov r11, QWORD PTR [rdx+176]
  12835. pext r12, r12, r9
  12836. mov QWORD PTR [rcx+168], r10
  12837. sbb r11, r12
  12838. mov r10, QWORD PTR [r8+184]
  12839. mov r12, QWORD PTR [rdx+184]
  12840. pext r10, r10, r9
  12841. mov QWORD PTR [rcx+176], r11
  12842. sbb r12, r10
  12843. mov r11, QWORD PTR [r8+192]
  12844. mov r10, QWORD PTR [rdx+192]
  12845. pext r11, r11, r9
  12846. mov QWORD PTR [rcx+184], r12
  12847. sbb r10, r11
  12848. mov r12, QWORD PTR [r8+200]
  12849. mov r11, QWORD PTR [rdx+200]
  12850. pext r12, r12, r9
  12851. mov QWORD PTR [rcx+192], r10
  12852. sbb r11, r12
  12853. mov r10, QWORD PTR [r8+208]
  12854. mov r12, QWORD PTR [rdx+208]
  12855. pext r10, r10, r9
  12856. mov QWORD PTR [rcx+200], r11
  12857. sbb r12, r10
  12858. mov r11, QWORD PTR [r8+216]
  12859. mov r10, QWORD PTR [rdx+216]
  12860. pext r11, r11, r9
  12861. mov QWORD PTR [rcx+208], r12
  12862. sbb r10, r11
  12863. mov r12, QWORD PTR [r8+224]
  12864. mov r11, QWORD PTR [rdx+224]
  12865. pext r12, r12, r9
  12866. mov QWORD PTR [rcx+216], r10
  12867. sbb r11, r12
  12868. mov r10, QWORD PTR [r8+232]
  12869. mov r12, QWORD PTR [rdx+232]
  12870. pext r10, r10, r9
  12871. mov QWORD PTR [rcx+224], r11
  12872. sbb r12, r10
  12873. mov r11, QWORD PTR [r8+240]
  12874. mov r10, QWORD PTR [rdx+240]
  12875. pext r11, r11, r9
  12876. mov QWORD PTR [rcx+232], r12
  12877. sbb r10, r11
  12878. mov r12, QWORD PTR [r8+248]
  12879. mov r11, QWORD PTR [rdx+248]
  12880. pext r12, r12, r9
  12881. mov QWORD PTR [rcx+240], r10
  12882. sbb r11, r12
  12883. mov QWORD PTR [rcx+248], r11
  12884. sbb rax, rax
  12885. pop r12
  12886. ret
  12887. sp_2048_cond_sub_avx2_32 ENDP
  12888. _text ENDS
  12889. ENDIF
  12890. ; /* Compare a with b in constant time.
  12891. ; *
  12892. ; * a A single precision integer.
  12893. ; * b A single precision integer.
  12894. ; * return -ve, 0 or +ve if a is less than, equal to or greater than b
  12895. ; * respectively.
  12896. ; */
  12897. _text SEGMENT READONLY PARA
  12898. sp_2048_cmp_32 PROC
  12899. push r12
  12900. xor r9, r9
  12901. mov r8, -1
  12902. mov rax, -1
  12903. mov r10, 1
  12904. mov r11, QWORD PTR [rcx+248]
  12905. mov r12, QWORD PTR [rdx+248]
  12906. and r11, r8
  12907. and r12, r8
  12908. sub r11, r12
  12909. cmova rax, r10
  12910. cmovc rax, r8
  12911. cmovnz r8, r9
  12912. mov r11, QWORD PTR [rcx+240]
  12913. mov r12, QWORD PTR [rdx+240]
  12914. and r11, r8
  12915. and r12, r8
  12916. sub r11, r12
  12917. cmova rax, r10
  12918. cmovc rax, r8
  12919. cmovnz r8, r9
  12920. mov r11, QWORD PTR [rcx+232]
  12921. mov r12, QWORD PTR [rdx+232]
  12922. and r11, r8
  12923. and r12, r8
  12924. sub r11, r12
  12925. cmova rax, r10
  12926. cmovc rax, r8
  12927. cmovnz r8, r9
  12928. mov r11, QWORD PTR [rcx+224]
  12929. mov r12, QWORD PTR [rdx+224]
  12930. and r11, r8
  12931. and r12, r8
  12932. sub r11, r12
  12933. cmova rax, r10
  12934. cmovc rax, r8
  12935. cmovnz r8, r9
  12936. mov r11, QWORD PTR [rcx+216]
  12937. mov r12, QWORD PTR [rdx+216]
  12938. and r11, r8
  12939. and r12, r8
  12940. sub r11, r12
  12941. cmova rax, r10
  12942. cmovc rax, r8
  12943. cmovnz r8, r9
  12944. mov r11, QWORD PTR [rcx+208]
  12945. mov r12, QWORD PTR [rdx+208]
  12946. and r11, r8
  12947. and r12, r8
  12948. sub r11, r12
  12949. cmova rax, r10
  12950. cmovc rax, r8
  12951. cmovnz r8, r9
  12952. mov r11, QWORD PTR [rcx+200]
  12953. mov r12, QWORD PTR [rdx+200]
  12954. and r11, r8
  12955. and r12, r8
  12956. sub r11, r12
  12957. cmova rax, r10
  12958. cmovc rax, r8
  12959. cmovnz r8, r9
  12960. mov r11, QWORD PTR [rcx+192]
  12961. mov r12, QWORD PTR [rdx+192]
  12962. and r11, r8
  12963. and r12, r8
  12964. sub r11, r12
  12965. cmova rax, r10
  12966. cmovc rax, r8
  12967. cmovnz r8, r9
  12968. mov r11, QWORD PTR [rcx+184]
  12969. mov r12, QWORD PTR [rdx+184]
  12970. and r11, r8
  12971. and r12, r8
  12972. sub r11, r12
  12973. cmova rax, r10
  12974. cmovc rax, r8
  12975. cmovnz r8, r9
  12976. mov r11, QWORD PTR [rcx+176]
  12977. mov r12, QWORD PTR [rdx+176]
  12978. and r11, r8
  12979. and r12, r8
  12980. sub r11, r12
  12981. cmova rax, r10
  12982. cmovc rax, r8
  12983. cmovnz r8, r9
  12984. mov r11, QWORD PTR [rcx+168]
  12985. mov r12, QWORD PTR [rdx+168]
  12986. and r11, r8
  12987. and r12, r8
  12988. sub r11, r12
  12989. cmova rax, r10
  12990. cmovc rax, r8
  12991. cmovnz r8, r9
  12992. mov r11, QWORD PTR [rcx+160]
  12993. mov r12, QWORD PTR [rdx+160]
  12994. and r11, r8
  12995. and r12, r8
  12996. sub r11, r12
  12997. cmova rax, r10
  12998. cmovc rax, r8
  12999. cmovnz r8, r9
  13000. mov r11, QWORD PTR [rcx+152]
  13001. mov r12, QWORD PTR [rdx+152]
  13002. and r11, r8
  13003. and r12, r8
  13004. sub r11, r12
  13005. cmova rax, r10
  13006. cmovc rax, r8
  13007. cmovnz r8, r9
  13008. mov r11, QWORD PTR [rcx+144]
  13009. mov r12, QWORD PTR [rdx+144]
  13010. and r11, r8
  13011. and r12, r8
  13012. sub r11, r12
  13013. cmova rax, r10
  13014. cmovc rax, r8
  13015. cmovnz r8, r9
  13016. mov r11, QWORD PTR [rcx+136]
  13017. mov r12, QWORD PTR [rdx+136]
  13018. and r11, r8
  13019. and r12, r8
  13020. sub r11, r12
  13021. cmova rax, r10
  13022. cmovc rax, r8
  13023. cmovnz r8, r9
  13024. mov r11, QWORD PTR [rcx+128]
  13025. mov r12, QWORD PTR [rdx+128]
  13026. and r11, r8
  13027. and r12, r8
  13028. sub r11, r12
  13029. cmova rax, r10
  13030. cmovc rax, r8
  13031. cmovnz r8, r9
  13032. mov r11, QWORD PTR [rcx+120]
  13033. mov r12, QWORD PTR [rdx+120]
  13034. and r11, r8
  13035. and r12, r8
  13036. sub r11, r12
  13037. cmova rax, r10
  13038. cmovc rax, r8
  13039. cmovnz r8, r9
  13040. mov r11, QWORD PTR [rcx+112]
  13041. mov r12, QWORD PTR [rdx+112]
  13042. and r11, r8
  13043. and r12, r8
  13044. sub r11, r12
  13045. cmova rax, r10
  13046. cmovc rax, r8
  13047. cmovnz r8, r9
  13048. mov r11, QWORD PTR [rcx+104]
  13049. mov r12, QWORD PTR [rdx+104]
  13050. and r11, r8
  13051. and r12, r8
  13052. sub r11, r12
  13053. cmova rax, r10
  13054. cmovc rax, r8
  13055. cmovnz r8, r9
  13056. mov r11, QWORD PTR [rcx+96]
  13057. mov r12, QWORD PTR [rdx+96]
  13058. and r11, r8
  13059. and r12, r8
  13060. sub r11, r12
  13061. cmova rax, r10
  13062. cmovc rax, r8
  13063. cmovnz r8, r9
  13064. mov r11, QWORD PTR [rcx+88]
  13065. mov r12, QWORD PTR [rdx+88]
  13066. and r11, r8
  13067. and r12, r8
  13068. sub r11, r12
  13069. cmova rax, r10
  13070. cmovc rax, r8
  13071. cmovnz r8, r9
  13072. mov r11, QWORD PTR [rcx+80]
  13073. mov r12, QWORD PTR [rdx+80]
  13074. and r11, r8
  13075. and r12, r8
  13076. sub r11, r12
  13077. cmova rax, r10
  13078. cmovc rax, r8
  13079. cmovnz r8, r9
  13080. mov r11, QWORD PTR [rcx+72]
  13081. mov r12, QWORD PTR [rdx+72]
  13082. and r11, r8
  13083. and r12, r8
  13084. sub r11, r12
  13085. cmova rax, r10
  13086. cmovc rax, r8
  13087. cmovnz r8, r9
  13088. mov r11, QWORD PTR [rcx+64]
  13089. mov r12, QWORD PTR [rdx+64]
  13090. and r11, r8
  13091. and r12, r8
  13092. sub r11, r12
  13093. cmova rax, r10
  13094. cmovc rax, r8
  13095. cmovnz r8, r9
  13096. mov r11, QWORD PTR [rcx+56]
  13097. mov r12, QWORD PTR [rdx+56]
  13098. and r11, r8
  13099. and r12, r8
  13100. sub r11, r12
  13101. cmova rax, r10
  13102. cmovc rax, r8
  13103. cmovnz r8, r9
  13104. mov r11, QWORD PTR [rcx+48]
  13105. mov r12, QWORD PTR [rdx+48]
  13106. and r11, r8
  13107. and r12, r8
  13108. sub r11, r12
  13109. cmova rax, r10
  13110. cmovc rax, r8
  13111. cmovnz r8, r9
  13112. mov r11, QWORD PTR [rcx+40]
  13113. mov r12, QWORD PTR [rdx+40]
  13114. and r11, r8
  13115. and r12, r8
  13116. sub r11, r12
  13117. cmova rax, r10
  13118. cmovc rax, r8
  13119. cmovnz r8, r9
  13120. mov r11, QWORD PTR [rcx+32]
  13121. mov r12, QWORD PTR [rdx+32]
  13122. and r11, r8
  13123. and r12, r8
  13124. sub r11, r12
  13125. cmova rax, r10
  13126. cmovc rax, r8
  13127. cmovnz r8, r9
  13128. mov r11, QWORD PTR [rcx+24]
  13129. mov r12, QWORD PTR [rdx+24]
  13130. and r11, r8
  13131. and r12, r8
  13132. sub r11, r12
  13133. cmova rax, r10
  13134. cmovc rax, r8
  13135. cmovnz r8, r9
  13136. mov r11, QWORD PTR [rcx+16]
  13137. mov r12, QWORD PTR [rdx+16]
  13138. and r11, r8
  13139. and r12, r8
  13140. sub r11, r12
  13141. cmova rax, r10
  13142. cmovc rax, r8
  13143. cmovnz r8, r9
  13144. mov r11, QWORD PTR [rcx+8]
  13145. mov r12, QWORD PTR [rdx+8]
  13146. and r11, r8
  13147. and r12, r8
  13148. sub r11, r12
  13149. cmova rax, r10
  13150. cmovc rax, r8
  13151. cmovnz r8, r9
  13152. mov r11, QWORD PTR [rcx]
  13153. mov r12, QWORD PTR [rdx]
  13154. and r11, r8
  13155. and r12, r8
  13156. sub r11, r12
  13157. cmova rax, r10
  13158. cmovc rax, r8
  13159. cmovnz r8, r9
  13160. xor rax, r8
  13161. pop r12
  13162. ret
  13163. sp_2048_cmp_32 ENDP
  13164. _text ENDS
  13165. IFNDEF WC_NO_CACHE_RESISTANT
  13166. _text SEGMENT READONLY PARA
  13167. sp_2048_get_from_table_32 PROC
  13168. sub rsp, 128
  13169. vmovdqu OWORD PTR [rsp], xmm6
  13170. vmovdqu OWORD PTR [rsp+16], xmm7
  13171. vmovdqu OWORD PTR [rsp+32], xmm8
  13172. vmovdqu OWORD PTR [rsp+48], xmm9
  13173. vmovdqu OWORD PTR [rsp+64], xmm10
  13174. vmovdqu OWORD PTR [rsp+80], xmm11
  13175. vmovdqu OWORD PTR [rsp+96], xmm12
  13176. vmovdqu OWORD PTR [rsp+112], xmm13
  13177. mov rax, 1
  13178. movd xmm10, r8
  13179. movd xmm11, rax
  13180. pxor xmm13, xmm13
  13181. pshufd xmm11, xmm11, 0
  13182. pshufd xmm10, xmm10, 0
  13183. ; START: 0-7
  13184. pxor xmm13, xmm13
  13185. pxor xmm4, xmm4
  13186. pxor xmm5, xmm5
  13187. pxor xmm6, xmm6
  13188. pxor xmm7, xmm7
  13189. ; ENTRY: 0
  13190. mov r9, QWORD PTR [rdx]
  13191. movdqu xmm12, xmm13
  13192. pcmpeqd xmm12, xmm10
  13193. movdqu xmm0, [r9]
  13194. movdqu xmm1, [r9+16]
  13195. movdqu xmm2, [r9+32]
  13196. movdqu xmm3, [r9+48]
  13197. pand xmm0, xmm12
  13198. pand xmm1, xmm12
  13199. pand xmm2, xmm12
  13200. pand xmm3, xmm12
  13201. por xmm4, xmm0
  13202. por xmm5, xmm1
  13203. por xmm6, xmm2
  13204. por xmm7, xmm3
  13205. paddd xmm13, xmm11
  13206. ; ENTRY: 1
  13207. mov r9, QWORD PTR [rdx+8]
  13208. movdqu xmm12, xmm13
  13209. pcmpeqd xmm12, xmm10
  13210. movdqu xmm0, [r9]
  13211. movdqu xmm1, [r9+16]
  13212. movdqu xmm2, [r9+32]
  13213. movdqu xmm3, [r9+48]
  13214. pand xmm0, xmm12
  13215. pand xmm1, xmm12
  13216. pand xmm2, xmm12
  13217. pand xmm3, xmm12
  13218. por xmm4, xmm0
  13219. por xmm5, xmm1
  13220. por xmm6, xmm2
  13221. por xmm7, xmm3
  13222. paddd xmm13, xmm11
  13223. ; ENTRY: 2
  13224. mov r9, QWORD PTR [rdx+16]
  13225. movdqu xmm12, xmm13
  13226. pcmpeqd xmm12, xmm10
  13227. movdqu xmm0, [r9]
  13228. movdqu xmm1, [r9+16]
  13229. movdqu xmm2, [r9+32]
  13230. movdqu xmm3, [r9+48]
  13231. pand xmm0, xmm12
  13232. pand xmm1, xmm12
  13233. pand xmm2, xmm12
  13234. pand xmm3, xmm12
  13235. por xmm4, xmm0
  13236. por xmm5, xmm1
  13237. por xmm6, xmm2
  13238. por xmm7, xmm3
  13239. paddd xmm13, xmm11
  13240. ; ENTRY: 3
  13241. mov r9, QWORD PTR [rdx+24]
  13242. movdqu xmm12, xmm13
  13243. pcmpeqd xmm12, xmm10
  13244. movdqu xmm0, [r9]
  13245. movdqu xmm1, [r9+16]
  13246. movdqu xmm2, [r9+32]
  13247. movdqu xmm3, [r9+48]
  13248. pand xmm0, xmm12
  13249. pand xmm1, xmm12
  13250. pand xmm2, xmm12
  13251. pand xmm3, xmm12
  13252. por xmm4, xmm0
  13253. por xmm5, xmm1
  13254. por xmm6, xmm2
  13255. por xmm7, xmm3
  13256. paddd xmm13, xmm11
  13257. ; ENTRY: 4
  13258. mov r9, QWORD PTR [rdx+32]
  13259. movdqu xmm12, xmm13
  13260. pcmpeqd xmm12, xmm10
  13261. movdqu xmm0, [r9]
  13262. movdqu xmm1, [r9+16]
  13263. movdqu xmm2, [r9+32]
  13264. movdqu xmm3, [r9+48]
  13265. pand xmm0, xmm12
  13266. pand xmm1, xmm12
  13267. pand xmm2, xmm12
  13268. pand xmm3, xmm12
  13269. por xmm4, xmm0
  13270. por xmm5, xmm1
  13271. por xmm6, xmm2
  13272. por xmm7, xmm3
  13273. paddd xmm13, xmm11
  13274. ; ENTRY: 5
  13275. mov r9, QWORD PTR [rdx+40]
  13276. movdqu xmm12, xmm13
  13277. pcmpeqd xmm12, xmm10
  13278. movdqu xmm0, [r9]
  13279. movdqu xmm1, [r9+16]
  13280. movdqu xmm2, [r9+32]
  13281. movdqu xmm3, [r9+48]
  13282. pand xmm0, xmm12
  13283. pand xmm1, xmm12
  13284. pand xmm2, xmm12
  13285. pand xmm3, xmm12
  13286. por xmm4, xmm0
  13287. por xmm5, xmm1
  13288. por xmm6, xmm2
  13289. por xmm7, xmm3
  13290. paddd xmm13, xmm11
  13291. ; ENTRY: 6
  13292. mov r9, QWORD PTR [rdx+48]
  13293. movdqu xmm12, xmm13
  13294. pcmpeqd xmm12, xmm10
  13295. movdqu xmm0, [r9]
  13296. movdqu xmm1, [r9+16]
  13297. movdqu xmm2, [r9+32]
  13298. movdqu xmm3, [r9+48]
  13299. pand xmm0, xmm12
  13300. pand xmm1, xmm12
  13301. pand xmm2, xmm12
  13302. pand xmm3, xmm12
  13303. por xmm4, xmm0
  13304. por xmm5, xmm1
  13305. por xmm6, xmm2
  13306. por xmm7, xmm3
  13307. paddd xmm13, xmm11
  13308. ; ENTRY: 7
  13309. mov r9, QWORD PTR [rdx+56]
  13310. movdqu xmm12, xmm13
  13311. pcmpeqd xmm12, xmm10
  13312. movdqu xmm0, [r9]
  13313. movdqu xmm1, [r9+16]
  13314. movdqu xmm2, [r9+32]
  13315. movdqu xmm3, [r9+48]
  13316. pand xmm0, xmm12
  13317. pand xmm1, xmm12
  13318. pand xmm2, xmm12
  13319. pand xmm3, xmm12
  13320. por xmm4, xmm0
  13321. por xmm5, xmm1
  13322. por xmm6, xmm2
  13323. por xmm7, xmm3
  13324. paddd xmm13, xmm11
  13325. ; ENTRY: 8
  13326. mov r9, QWORD PTR [rdx+64]
  13327. movdqu xmm12, xmm13
  13328. pcmpeqd xmm12, xmm10
  13329. movdqu xmm0, [r9]
  13330. movdqu xmm1, [r9+16]
  13331. movdqu xmm2, [r9+32]
  13332. movdqu xmm3, [r9+48]
  13333. pand xmm0, xmm12
  13334. pand xmm1, xmm12
  13335. pand xmm2, xmm12
  13336. pand xmm3, xmm12
  13337. por xmm4, xmm0
  13338. por xmm5, xmm1
  13339. por xmm6, xmm2
  13340. por xmm7, xmm3
  13341. paddd xmm13, xmm11
  13342. ; ENTRY: 9
  13343. mov r9, QWORD PTR [rdx+72]
  13344. movdqu xmm12, xmm13
  13345. pcmpeqd xmm12, xmm10
  13346. movdqu xmm0, [r9]
  13347. movdqu xmm1, [r9+16]
  13348. movdqu xmm2, [r9+32]
  13349. movdqu xmm3, [r9+48]
  13350. pand xmm0, xmm12
  13351. pand xmm1, xmm12
  13352. pand xmm2, xmm12
  13353. pand xmm3, xmm12
  13354. por xmm4, xmm0
  13355. por xmm5, xmm1
  13356. por xmm6, xmm2
  13357. por xmm7, xmm3
  13358. paddd xmm13, xmm11
  13359. ; ENTRY: 10
  13360. mov r9, QWORD PTR [rdx+80]
  13361. movdqu xmm12, xmm13
  13362. pcmpeqd xmm12, xmm10
  13363. movdqu xmm0, [r9]
  13364. movdqu xmm1, [r9+16]
  13365. movdqu xmm2, [r9+32]
  13366. movdqu xmm3, [r9+48]
  13367. pand xmm0, xmm12
  13368. pand xmm1, xmm12
  13369. pand xmm2, xmm12
  13370. pand xmm3, xmm12
  13371. por xmm4, xmm0
  13372. por xmm5, xmm1
  13373. por xmm6, xmm2
  13374. por xmm7, xmm3
  13375. paddd xmm13, xmm11
  13376. ; ENTRY: 11
  13377. mov r9, QWORD PTR [rdx+88]
  13378. movdqu xmm12, xmm13
  13379. pcmpeqd xmm12, xmm10
  13380. movdqu xmm0, [r9]
  13381. movdqu xmm1, [r9+16]
  13382. movdqu xmm2, [r9+32]
  13383. movdqu xmm3, [r9+48]
  13384. pand xmm0, xmm12
  13385. pand xmm1, xmm12
  13386. pand xmm2, xmm12
  13387. pand xmm3, xmm12
  13388. por xmm4, xmm0
  13389. por xmm5, xmm1
  13390. por xmm6, xmm2
  13391. por xmm7, xmm3
  13392. paddd xmm13, xmm11
  13393. ; ENTRY: 12
  13394. mov r9, QWORD PTR [rdx+96]
  13395. movdqu xmm12, xmm13
  13396. pcmpeqd xmm12, xmm10
  13397. movdqu xmm0, [r9]
  13398. movdqu xmm1, [r9+16]
  13399. movdqu xmm2, [r9+32]
  13400. movdqu xmm3, [r9+48]
  13401. pand xmm0, xmm12
  13402. pand xmm1, xmm12
  13403. pand xmm2, xmm12
  13404. pand xmm3, xmm12
  13405. por xmm4, xmm0
  13406. por xmm5, xmm1
  13407. por xmm6, xmm2
  13408. por xmm7, xmm3
  13409. paddd xmm13, xmm11
  13410. ; ENTRY: 13
  13411. mov r9, QWORD PTR [rdx+104]
  13412. movdqu xmm12, xmm13
  13413. pcmpeqd xmm12, xmm10
  13414. movdqu xmm0, [r9]
  13415. movdqu xmm1, [r9+16]
  13416. movdqu xmm2, [r9+32]
  13417. movdqu xmm3, [r9+48]
  13418. pand xmm0, xmm12
  13419. pand xmm1, xmm12
  13420. pand xmm2, xmm12
  13421. pand xmm3, xmm12
  13422. por xmm4, xmm0
  13423. por xmm5, xmm1
  13424. por xmm6, xmm2
  13425. por xmm7, xmm3
  13426. paddd xmm13, xmm11
  13427. ; ENTRY: 14
  13428. mov r9, QWORD PTR [rdx+112]
  13429. movdqu xmm12, xmm13
  13430. pcmpeqd xmm12, xmm10
  13431. movdqu xmm0, [r9]
  13432. movdqu xmm1, [r9+16]
  13433. movdqu xmm2, [r9+32]
  13434. movdqu xmm3, [r9+48]
  13435. pand xmm0, xmm12
  13436. pand xmm1, xmm12
  13437. pand xmm2, xmm12
  13438. pand xmm3, xmm12
  13439. por xmm4, xmm0
  13440. por xmm5, xmm1
  13441. por xmm6, xmm2
  13442. por xmm7, xmm3
  13443. paddd xmm13, xmm11
  13444. ; ENTRY: 15
  13445. mov r9, QWORD PTR [rdx+120]
  13446. movdqu xmm12, xmm13
  13447. pcmpeqd xmm12, xmm10
  13448. movdqu xmm0, [r9]
  13449. movdqu xmm1, [r9+16]
  13450. movdqu xmm2, [r9+32]
  13451. movdqu xmm3, [r9+48]
  13452. pand xmm0, xmm12
  13453. pand xmm1, xmm12
  13454. pand xmm2, xmm12
  13455. pand xmm3, xmm12
  13456. por xmm4, xmm0
  13457. por xmm5, xmm1
  13458. por xmm6, xmm2
  13459. por xmm7, xmm3
  13460. paddd xmm13, xmm11
  13461. ; ENTRY: 16
  13462. mov r9, QWORD PTR [rdx+128]
  13463. movdqu xmm12, xmm13
  13464. pcmpeqd xmm12, xmm10
  13465. movdqu xmm0, [r9]
  13466. movdqu xmm1, [r9+16]
  13467. movdqu xmm2, [r9+32]
  13468. movdqu xmm3, [r9+48]
  13469. pand xmm0, xmm12
  13470. pand xmm1, xmm12
  13471. pand xmm2, xmm12
  13472. pand xmm3, xmm12
  13473. por xmm4, xmm0
  13474. por xmm5, xmm1
  13475. por xmm6, xmm2
  13476. por xmm7, xmm3
  13477. paddd xmm13, xmm11
  13478. ; ENTRY: 17
  13479. mov r9, QWORD PTR [rdx+136]
  13480. movdqu xmm12, xmm13
  13481. pcmpeqd xmm12, xmm10
  13482. movdqu xmm0, [r9]
  13483. movdqu xmm1, [r9+16]
  13484. movdqu xmm2, [r9+32]
  13485. movdqu xmm3, [r9+48]
  13486. pand xmm0, xmm12
  13487. pand xmm1, xmm12
  13488. pand xmm2, xmm12
  13489. pand xmm3, xmm12
  13490. por xmm4, xmm0
  13491. por xmm5, xmm1
  13492. por xmm6, xmm2
  13493. por xmm7, xmm3
  13494. paddd xmm13, xmm11
  13495. ; ENTRY: 18
  13496. mov r9, QWORD PTR [rdx+144]
  13497. movdqu xmm12, xmm13
  13498. pcmpeqd xmm12, xmm10
  13499. movdqu xmm0, [r9]
  13500. movdqu xmm1, [r9+16]
  13501. movdqu xmm2, [r9+32]
  13502. movdqu xmm3, [r9+48]
  13503. pand xmm0, xmm12
  13504. pand xmm1, xmm12
  13505. pand xmm2, xmm12
  13506. pand xmm3, xmm12
  13507. por xmm4, xmm0
  13508. por xmm5, xmm1
  13509. por xmm6, xmm2
  13510. por xmm7, xmm3
  13511. paddd xmm13, xmm11
  13512. ; ENTRY: 19
  13513. mov r9, QWORD PTR [rdx+152]
  13514. movdqu xmm12, xmm13
  13515. pcmpeqd xmm12, xmm10
  13516. movdqu xmm0, [r9]
  13517. movdqu xmm1, [r9+16]
  13518. movdqu xmm2, [r9+32]
  13519. movdqu xmm3, [r9+48]
  13520. pand xmm0, xmm12
  13521. pand xmm1, xmm12
  13522. pand xmm2, xmm12
  13523. pand xmm3, xmm12
  13524. por xmm4, xmm0
  13525. por xmm5, xmm1
  13526. por xmm6, xmm2
  13527. por xmm7, xmm3
  13528. paddd xmm13, xmm11
  13529. ; ENTRY: 20
  13530. mov r9, QWORD PTR [rdx+160]
  13531. movdqu xmm12, xmm13
  13532. pcmpeqd xmm12, xmm10
  13533. movdqu xmm0, [r9]
  13534. movdqu xmm1, [r9+16]
  13535. movdqu xmm2, [r9+32]
  13536. movdqu xmm3, [r9+48]
  13537. pand xmm0, xmm12
  13538. pand xmm1, xmm12
  13539. pand xmm2, xmm12
  13540. pand xmm3, xmm12
  13541. por xmm4, xmm0
  13542. por xmm5, xmm1
  13543. por xmm6, xmm2
  13544. por xmm7, xmm3
  13545. paddd xmm13, xmm11
  13546. ; ENTRY: 21
  13547. mov r9, QWORD PTR [rdx+168]
  13548. movdqu xmm12, xmm13
  13549. pcmpeqd xmm12, xmm10
  13550. movdqu xmm0, [r9]
  13551. movdqu xmm1, [r9+16]
  13552. movdqu xmm2, [r9+32]
  13553. movdqu xmm3, [r9+48]
  13554. pand xmm0, xmm12
  13555. pand xmm1, xmm12
  13556. pand xmm2, xmm12
  13557. pand xmm3, xmm12
  13558. por xmm4, xmm0
  13559. por xmm5, xmm1
  13560. por xmm6, xmm2
  13561. por xmm7, xmm3
  13562. paddd xmm13, xmm11
  13563. ; ENTRY: 22
  13564. mov r9, QWORD PTR [rdx+176]
  13565. movdqu xmm12, xmm13
  13566. pcmpeqd xmm12, xmm10
  13567. movdqu xmm0, [r9]
  13568. movdqu xmm1, [r9+16]
  13569. movdqu xmm2, [r9+32]
  13570. movdqu xmm3, [r9+48]
  13571. pand xmm0, xmm12
  13572. pand xmm1, xmm12
  13573. pand xmm2, xmm12
  13574. pand xmm3, xmm12
  13575. por xmm4, xmm0
  13576. por xmm5, xmm1
  13577. por xmm6, xmm2
  13578. por xmm7, xmm3
  13579. paddd xmm13, xmm11
  13580. ; ENTRY: 23
  13581. mov r9, QWORD PTR [rdx+184]
  13582. movdqu xmm12, xmm13
  13583. pcmpeqd xmm12, xmm10
  13584. movdqu xmm0, [r9]
  13585. movdqu xmm1, [r9+16]
  13586. movdqu xmm2, [r9+32]
  13587. movdqu xmm3, [r9+48]
  13588. pand xmm0, xmm12
  13589. pand xmm1, xmm12
  13590. pand xmm2, xmm12
  13591. pand xmm3, xmm12
  13592. por xmm4, xmm0
  13593. por xmm5, xmm1
  13594. por xmm6, xmm2
  13595. por xmm7, xmm3
  13596. paddd xmm13, xmm11
  13597. ; ENTRY: 24
  13598. mov r9, QWORD PTR [rdx+192]
  13599. movdqu xmm12, xmm13
  13600. pcmpeqd xmm12, xmm10
  13601. movdqu xmm0, [r9]
  13602. movdqu xmm1, [r9+16]
  13603. movdqu xmm2, [r9+32]
  13604. movdqu xmm3, [r9+48]
  13605. pand xmm0, xmm12
  13606. pand xmm1, xmm12
  13607. pand xmm2, xmm12
  13608. pand xmm3, xmm12
  13609. por xmm4, xmm0
  13610. por xmm5, xmm1
  13611. por xmm6, xmm2
  13612. por xmm7, xmm3
  13613. paddd xmm13, xmm11
  13614. ; ENTRY: 25
  13615. mov r9, QWORD PTR [rdx+200]
  13616. movdqu xmm12, xmm13
  13617. pcmpeqd xmm12, xmm10
  13618. movdqu xmm0, [r9]
  13619. movdqu xmm1, [r9+16]
  13620. movdqu xmm2, [r9+32]
  13621. movdqu xmm3, [r9+48]
  13622. pand xmm0, xmm12
  13623. pand xmm1, xmm12
  13624. pand xmm2, xmm12
  13625. pand xmm3, xmm12
  13626. por xmm4, xmm0
  13627. por xmm5, xmm1
  13628. por xmm6, xmm2
  13629. por xmm7, xmm3
  13630. paddd xmm13, xmm11
  13631. ; ENTRY: 26
  13632. mov r9, QWORD PTR [rdx+208]
  13633. movdqu xmm12, xmm13
  13634. pcmpeqd xmm12, xmm10
  13635. movdqu xmm0, [r9]
  13636. movdqu xmm1, [r9+16]
  13637. movdqu xmm2, [r9+32]
  13638. movdqu xmm3, [r9+48]
  13639. pand xmm0, xmm12
  13640. pand xmm1, xmm12
  13641. pand xmm2, xmm12
  13642. pand xmm3, xmm12
  13643. por xmm4, xmm0
  13644. por xmm5, xmm1
  13645. por xmm6, xmm2
  13646. por xmm7, xmm3
  13647. paddd xmm13, xmm11
  13648. ; ENTRY: 27
  13649. mov r9, QWORD PTR [rdx+216]
  13650. movdqu xmm12, xmm13
  13651. pcmpeqd xmm12, xmm10
  13652. movdqu xmm0, [r9]
  13653. movdqu xmm1, [r9+16]
  13654. movdqu xmm2, [r9+32]
  13655. movdqu xmm3, [r9+48]
  13656. pand xmm0, xmm12
  13657. pand xmm1, xmm12
  13658. pand xmm2, xmm12
  13659. pand xmm3, xmm12
  13660. por xmm4, xmm0
  13661. por xmm5, xmm1
  13662. por xmm6, xmm2
  13663. por xmm7, xmm3
  13664. paddd xmm13, xmm11
  13665. ; ENTRY: 28
  13666. mov r9, QWORD PTR [rdx+224]
  13667. movdqu xmm12, xmm13
  13668. pcmpeqd xmm12, xmm10
  13669. movdqu xmm0, [r9]
  13670. movdqu xmm1, [r9+16]
  13671. movdqu xmm2, [r9+32]
  13672. movdqu xmm3, [r9+48]
  13673. pand xmm0, xmm12
  13674. pand xmm1, xmm12
  13675. pand xmm2, xmm12
  13676. pand xmm3, xmm12
  13677. por xmm4, xmm0
  13678. por xmm5, xmm1
  13679. por xmm6, xmm2
  13680. por xmm7, xmm3
  13681. paddd xmm13, xmm11
  13682. ; ENTRY: 29
  13683. mov r9, QWORD PTR [rdx+232]
  13684. movdqu xmm12, xmm13
  13685. pcmpeqd xmm12, xmm10
  13686. movdqu xmm0, [r9]
  13687. movdqu xmm1, [r9+16]
  13688. movdqu xmm2, [r9+32]
  13689. movdqu xmm3, [r9+48]
  13690. pand xmm0, xmm12
  13691. pand xmm1, xmm12
  13692. pand xmm2, xmm12
  13693. pand xmm3, xmm12
  13694. por xmm4, xmm0
  13695. por xmm5, xmm1
  13696. por xmm6, xmm2
  13697. por xmm7, xmm3
  13698. paddd xmm13, xmm11
  13699. ; ENTRY: 30
  13700. mov r9, QWORD PTR [rdx+240]
  13701. movdqu xmm12, xmm13
  13702. pcmpeqd xmm12, xmm10
  13703. movdqu xmm0, [r9]
  13704. movdqu xmm1, [r9+16]
  13705. movdqu xmm2, [r9+32]
  13706. movdqu xmm3, [r9+48]
  13707. pand xmm0, xmm12
  13708. pand xmm1, xmm12
  13709. pand xmm2, xmm12
  13710. pand xmm3, xmm12
  13711. por xmm4, xmm0
  13712. por xmm5, xmm1
  13713. por xmm6, xmm2
  13714. por xmm7, xmm3
  13715. paddd xmm13, xmm11
  13716. ; ENTRY: 31
  13717. mov r9, QWORD PTR [rdx+248]
  13718. movdqu xmm12, xmm13
  13719. pcmpeqd xmm12, xmm10
  13720. movdqu xmm0, [r9]
  13721. movdqu xmm1, [r9+16]
  13722. movdqu xmm2, [r9+32]
  13723. movdqu xmm3, [r9+48]
  13724. pand xmm0, xmm12
  13725. pand xmm1, xmm12
  13726. pand xmm2, xmm12
  13727. pand xmm3, xmm12
  13728. por xmm4, xmm0
  13729. por xmm5, xmm1
  13730. por xmm6, xmm2
  13731. por xmm7, xmm3
  13732. paddd xmm13, xmm11
  13733. ; ENTRY: 32
  13734. mov r9, QWORD PTR [rdx+256]
  13735. movdqu xmm12, xmm13
  13736. pcmpeqd xmm12, xmm10
  13737. movdqu xmm0, [r9]
  13738. movdqu xmm1, [r9+16]
  13739. movdqu xmm2, [r9+32]
  13740. movdqu xmm3, [r9+48]
  13741. pand xmm0, xmm12
  13742. pand xmm1, xmm12
  13743. pand xmm2, xmm12
  13744. pand xmm3, xmm12
  13745. por xmm4, xmm0
  13746. por xmm5, xmm1
  13747. por xmm6, xmm2
  13748. por xmm7, xmm3
  13749. paddd xmm13, xmm11
  13750. ; ENTRY: 33
  13751. mov r9, QWORD PTR [rdx+264]
  13752. movdqu xmm12, xmm13
  13753. pcmpeqd xmm12, xmm10
  13754. movdqu xmm0, [r9]
  13755. movdqu xmm1, [r9+16]
  13756. movdqu xmm2, [r9+32]
  13757. movdqu xmm3, [r9+48]
  13758. pand xmm0, xmm12
  13759. pand xmm1, xmm12
  13760. pand xmm2, xmm12
  13761. pand xmm3, xmm12
  13762. por xmm4, xmm0
  13763. por xmm5, xmm1
  13764. por xmm6, xmm2
  13765. por xmm7, xmm3
  13766. paddd xmm13, xmm11
  13767. ; ENTRY: 34
  13768. mov r9, QWORD PTR [rdx+272]
  13769. movdqu xmm12, xmm13
  13770. pcmpeqd xmm12, xmm10
  13771. movdqu xmm0, [r9]
  13772. movdqu xmm1, [r9+16]
  13773. movdqu xmm2, [r9+32]
  13774. movdqu xmm3, [r9+48]
  13775. pand xmm0, xmm12
  13776. pand xmm1, xmm12
  13777. pand xmm2, xmm12
  13778. pand xmm3, xmm12
  13779. por xmm4, xmm0
  13780. por xmm5, xmm1
  13781. por xmm6, xmm2
  13782. por xmm7, xmm3
  13783. paddd xmm13, xmm11
  13784. ; ENTRY: 35
  13785. mov r9, QWORD PTR [rdx+280]
  13786. movdqu xmm12, xmm13
  13787. pcmpeqd xmm12, xmm10
  13788. movdqu xmm0, [r9]
  13789. movdqu xmm1, [r9+16]
  13790. movdqu xmm2, [r9+32]
  13791. movdqu xmm3, [r9+48]
  13792. pand xmm0, xmm12
  13793. pand xmm1, xmm12
  13794. pand xmm2, xmm12
  13795. pand xmm3, xmm12
  13796. por xmm4, xmm0
  13797. por xmm5, xmm1
  13798. por xmm6, xmm2
  13799. por xmm7, xmm3
  13800. paddd xmm13, xmm11
  13801. ; ENTRY: 36
  13802. mov r9, QWORD PTR [rdx+288]
  13803. movdqu xmm12, xmm13
  13804. pcmpeqd xmm12, xmm10
  13805. movdqu xmm0, [r9]
  13806. movdqu xmm1, [r9+16]
  13807. movdqu xmm2, [r9+32]
  13808. movdqu xmm3, [r9+48]
  13809. pand xmm0, xmm12
  13810. pand xmm1, xmm12
  13811. pand xmm2, xmm12
  13812. pand xmm3, xmm12
  13813. por xmm4, xmm0
  13814. por xmm5, xmm1
  13815. por xmm6, xmm2
  13816. por xmm7, xmm3
  13817. paddd xmm13, xmm11
  13818. ; ENTRY: 37
  13819. mov r9, QWORD PTR [rdx+296]
  13820. movdqu xmm12, xmm13
  13821. pcmpeqd xmm12, xmm10
  13822. movdqu xmm0, [r9]
  13823. movdqu xmm1, [r9+16]
  13824. movdqu xmm2, [r9+32]
  13825. movdqu xmm3, [r9+48]
  13826. pand xmm0, xmm12
  13827. pand xmm1, xmm12
  13828. pand xmm2, xmm12
  13829. pand xmm3, xmm12
  13830. por xmm4, xmm0
  13831. por xmm5, xmm1
  13832. por xmm6, xmm2
  13833. por xmm7, xmm3
  13834. paddd xmm13, xmm11
  13835. ; ENTRY: 38
  13836. mov r9, QWORD PTR [rdx+304]
  13837. movdqu xmm12, xmm13
  13838. pcmpeqd xmm12, xmm10
  13839. movdqu xmm0, [r9]
  13840. movdqu xmm1, [r9+16]
  13841. movdqu xmm2, [r9+32]
  13842. movdqu xmm3, [r9+48]
  13843. pand xmm0, xmm12
  13844. pand xmm1, xmm12
  13845. pand xmm2, xmm12
  13846. pand xmm3, xmm12
  13847. por xmm4, xmm0
  13848. por xmm5, xmm1
  13849. por xmm6, xmm2
  13850. por xmm7, xmm3
  13851. paddd xmm13, xmm11
  13852. ; ENTRY: 39
  13853. mov r9, QWORD PTR [rdx+312]
  13854. movdqu xmm12, xmm13
  13855. pcmpeqd xmm12, xmm10
  13856. movdqu xmm0, [r9]
  13857. movdqu xmm1, [r9+16]
  13858. movdqu xmm2, [r9+32]
  13859. movdqu xmm3, [r9+48]
  13860. pand xmm0, xmm12
  13861. pand xmm1, xmm12
  13862. pand xmm2, xmm12
  13863. pand xmm3, xmm12
  13864. por xmm4, xmm0
  13865. por xmm5, xmm1
  13866. por xmm6, xmm2
  13867. por xmm7, xmm3
  13868. paddd xmm13, xmm11
  13869. ; ENTRY: 40
  13870. mov r9, QWORD PTR [rdx+320]
  13871. movdqu xmm12, xmm13
  13872. pcmpeqd xmm12, xmm10
  13873. movdqu xmm0, [r9]
  13874. movdqu xmm1, [r9+16]
  13875. movdqu xmm2, [r9+32]
  13876. movdqu xmm3, [r9+48]
  13877. pand xmm0, xmm12
  13878. pand xmm1, xmm12
  13879. pand xmm2, xmm12
  13880. pand xmm3, xmm12
  13881. por xmm4, xmm0
  13882. por xmm5, xmm1
  13883. por xmm6, xmm2
  13884. por xmm7, xmm3
  13885. paddd xmm13, xmm11
  13886. ; ENTRY: 41
  13887. mov r9, QWORD PTR [rdx+328]
  13888. movdqu xmm12, xmm13
  13889. pcmpeqd xmm12, xmm10
  13890. movdqu xmm0, [r9]
  13891. movdqu xmm1, [r9+16]
  13892. movdqu xmm2, [r9+32]
  13893. movdqu xmm3, [r9+48]
  13894. pand xmm0, xmm12
  13895. pand xmm1, xmm12
  13896. pand xmm2, xmm12
  13897. pand xmm3, xmm12
  13898. por xmm4, xmm0
  13899. por xmm5, xmm1
  13900. por xmm6, xmm2
  13901. por xmm7, xmm3
  13902. paddd xmm13, xmm11
  13903. ; ENTRY: 42
  13904. mov r9, QWORD PTR [rdx+336]
  13905. movdqu xmm12, xmm13
  13906. pcmpeqd xmm12, xmm10
  13907. movdqu xmm0, [r9]
  13908. movdqu xmm1, [r9+16]
  13909. movdqu xmm2, [r9+32]
  13910. movdqu xmm3, [r9+48]
  13911. pand xmm0, xmm12
  13912. pand xmm1, xmm12
  13913. pand xmm2, xmm12
  13914. pand xmm3, xmm12
  13915. por xmm4, xmm0
  13916. por xmm5, xmm1
  13917. por xmm6, xmm2
  13918. por xmm7, xmm3
  13919. paddd xmm13, xmm11
  13920. ; ENTRY: 43
  13921. mov r9, QWORD PTR [rdx+344]
  13922. movdqu xmm12, xmm13
  13923. pcmpeqd xmm12, xmm10
  13924. movdqu xmm0, [r9]
  13925. movdqu xmm1, [r9+16]
  13926. movdqu xmm2, [r9+32]
  13927. movdqu xmm3, [r9+48]
  13928. pand xmm0, xmm12
  13929. pand xmm1, xmm12
  13930. pand xmm2, xmm12
  13931. pand xmm3, xmm12
  13932. por xmm4, xmm0
  13933. por xmm5, xmm1
  13934. por xmm6, xmm2
  13935. por xmm7, xmm3
  13936. paddd xmm13, xmm11
  13937. ; ENTRY: 44
  13938. mov r9, QWORD PTR [rdx+352]
  13939. movdqu xmm12, xmm13
  13940. pcmpeqd xmm12, xmm10
  13941. movdqu xmm0, [r9]
  13942. movdqu xmm1, [r9+16]
  13943. movdqu xmm2, [r9+32]
  13944. movdqu xmm3, [r9+48]
  13945. pand xmm0, xmm12
  13946. pand xmm1, xmm12
  13947. pand xmm2, xmm12
  13948. pand xmm3, xmm12
  13949. por xmm4, xmm0
  13950. por xmm5, xmm1
  13951. por xmm6, xmm2
  13952. por xmm7, xmm3
  13953. paddd xmm13, xmm11
  13954. ; ENTRY: 45
  13955. mov r9, QWORD PTR [rdx+360]
  13956. movdqu xmm12, xmm13
  13957. pcmpeqd xmm12, xmm10
  13958. movdqu xmm0, [r9]
  13959. movdqu xmm1, [r9+16]
  13960. movdqu xmm2, [r9+32]
  13961. movdqu xmm3, [r9+48]
  13962. pand xmm0, xmm12
  13963. pand xmm1, xmm12
  13964. pand xmm2, xmm12
  13965. pand xmm3, xmm12
  13966. por xmm4, xmm0
  13967. por xmm5, xmm1
  13968. por xmm6, xmm2
  13969. por xmm7, xmm3
  13970. paddd xmm13, xmm11
  13971. ; ENTRY: 46
  13972. mov r9, QWORD PTR [rdx+368]
  13973. movdqu xmm12, xmm13
  13974. pcmpeqd xmm12, xmm10
  13975. movdqu xmm0, [r9]
  13976. movdqu xmm1, [r9+16]
  13977. movdqu xmm2, [r9+32]
  13978. movdqu xmm3, [r9+48]
  13979. pand xmm0, xmm12
  13980. pand xmm1, xmm12
  13981. pand xmm2, xmm12
  13982. pand xmm3, xmm12
  13983. por xmm4, xmm0
  13984. por xmm5, xmm1
  13985. por xmm6, xmm2
  13986. por xmm7, xmm3
  13987. paddd xmm13, xmm11
  13988. ; ENTRY: 47
  13989. mov r9, QWORD PTR [rdx+376]
  13990. movdqu xmm12, xmm13
  13991. pcmpeqd xmm12, xmm10
  13992. movdqu xmm0, [r9]
  13993. movdqu xmm1, [r9+16]
  13994. movdqu xmm2, [r9+32]
  13995. movdqu xmm3, [r9+48]
  13996. pand xmm0, xmm12
  13997. pand xmm1, xmm12
  13998. pand xmm2, xmm12
  13999. pand xmm3, xmm12
  14000. por xmm4, xmm0
  14001. por xmm5, xmm1
  14002. por xmm6, xmm2
  14003. por xmm7, xmm3
  14004. paddd xmm13, xmm11
  14005. ; ENTRY: 48
  14006. mov r9, QWORD PTR [rdx+384]
  14007. movdqu xmm12, xmm13
  14008. pcmpeqd xmm12, xmm10
  14009. movdqu xmm0, [r9]
  14010. movdqu xmm1, [r9+16]
  14011. movdqu xmm2, [r9+32]
  14012. movdqu xmm3, [r9+48]
  14013. pand xmm0, xmm12
  14014. pand xmm1, xmm12
  14015. pand xmm2, xmm12
  14016. pand xmm3, xmm12
  14017. por xmm4, xmm0
  14018. por xmm5, xmm1
  14019. por xmm6, xmm2
  14020. por xmm7, xmm3
  14021. paddd xmm13, xmm11
  14022. ; ENTRY: 49
  14023. mov r9, QWORD PTR [rdx+392]
  14024. movdqu xmm12, xmm13
  14025. pcmpeqd xmm12, xmm10
  14026. movdqu xmm0, [r9]
  14027. movdqu xmm1, [r9+16]
  14028. movdqu xmm2, [r9+32]
  14029. movdqu xmm3, [r9+48]
  14030. pand xmm0, xmm12
  14031. pand xmm1, xmm12
  14032. pand xmm2, xmm12
  14033. pand xmm3, xmm12
  14034. por xmm4, xmm0
  14035. por xmm5, xmm1
  14036. por xmm6, xmm2
  14037. por xmm7, xmm3
  14038. paddd xmm13, xmm11
  14039. ; ENTRY: 50
  14040. mov r9, QWORD PTR [rdx+400]
  14041. movdqu xmm12, xmm13
  14042. pcmpeqd xmm12, xmm10
  14043. movdqu xmm0, [r9]
  14044. movdqu xmm1, [r9+16]
  14045. movdqu xmm2, [r9+32]
  14046. movdqu xmm3, [r9+48]
  14047. pand xmm0, xmm12
  14048. pand xmm1, xmm12
  14049. pand xmm2, xmm12
  14050. pand xmm3, xmm12
  14051. por xmm4, xmm0
  14052. por xmm5, xmm1
  14053. por xmm6, xmm2
  14054. por xmm7, xmm3
  14055. paddd xmm13, xmm11
  14056. ; ENTRY: 51
  14057. mov r9, QWORD PTR [rdx+408]
  14058. movdqu xmm12, xmm13
  14059. pcmpeqd xmm12, xmm10
  14060. movdqu xmm0, [r9]
  14061. movdqu xmm1, [r9+16]
  14062. movdqu xmm2, [r9+32]
  14063. movdqu xmm3, [r9+48]
  14064. pand xmm0, xmm12
  14065. pand xmm1, xmm12
  14066. pand xmm2, xmm12
  14067. pand xmm3, xmm12
  14068. por xmm4, xmm0
  14069. por xmm5, xmm1
  14070. por xmm6, xmm2
  14071. por xmm7, xmm3
  14072. paddd xmm13, xmm11
  14073. ; ENTRY: 52
  14074. mov r9, QWORD PTR [rdx+416]
  14075. movdqu xmm12, xmm13
  14076. pcmpeqd xmm12, xmm10
  14077. movdqu xmm0, [r9]
  14078. movdqu xmm1, [r9+16]
  14079. movdqu xmm2, [r9+32]
  14080. movdqu xmm3, [r9+48]
  14081. pand xmm0, xmm12
  14082. pand xmm1, xmm12
  14083. pand xmm2, xmm12
  14084. pand xmm3, xmm12
  14085. por xmm4, xmm0
  14086. por xmm5, xmm1
  14087. por xmm6, xmm2
  14088. por xmm7, xmm3
  14089. paddd xmm13, xmm11
  14090. ; ENTRY: 53
  14091. mov r9, QWORD PTR [rdx+424]
  14092. movdqu xmm12, xmm13
  14093. pcmpeqd xmm12, xmm10
  14094. movdqu xmm0, [r9]
  14095. movdqu xmm1, [r9+16]
  14096. movdqu xmm2, [r9+32]
  14097. movdqu xmm3, [r9+48]
  14098. pand xmm0, xmm12
  14099. pand xmm1, xmm12
  14100. pand xmm2, xmm12
  14101. pand xmm3, xmm12
  14102. por xmm4, xmm0
  14103. por xmm5, xmm1
  14104. por xmm6, xmm2
  14105. por xmm7, xmm3
  14106. paddd xmm13, xmm11
  14107. ; ENTRY: 54
  14108. mov r9, QWORD PTR [rdx+432]
  14109. movdqu xmm12, xmm13
  14110. pcmpeqd xmm12, xmm10
  14111. movdqu xmm0, [r9]
  14112. movdqu xmm1, [r9+16]
  14113. movdqu xmm2, [r9+32]
  14114. movdqu xmm3, [r9+48]
  14115. pand xmm0, xmm12
  14116. pand xmm1, xmm12
  14117. pand xmm2, xmm12
  14118. pand xmm3, xmm12
  14119. por xmm4, xmm0
  14120. por xmm5, xmm1
  14121. por xmm6, xmm2
  14122. por xmm7, xmm3
  14123. paddd xmm13, xmm11
  14124. ; ENTRY: 55
  14125. mov r9, QWORD PTR [rdx+440]
  14126. movdqu xmm12, xmm13
  14127. pcmpeqd xmm12, xmm10
  14128. movdqu xmm0, [r9]
  14129. movdqu xmm1, [r9+16]
  14130. movdqu xmm2, [r9+32]
  14131. movdqu xmm3, [r9+48]
  14132. pand xmm0, xmm12
  14133. pand xmm1, xmm12
  14134. pand xmm2, xmm12
  14135. pand xmm3, xmm12
  14136. por xmm4, xmm0
  14137. por xmm5, xmm1
  14138. por xmm6, xmm2
  14139. por xmm7, xmm3
  14140. paddd xmm13, xmm11
  14141. ; ENTRY: 56
  14142. mov r9, QWORD PTR [rdx+448]
  14143. movdqu xmm12, xmm13
  14144. pcmpeqd xmm12, xmm10
  14145. movdqu xmm0, [r9]
  14146. movdqu xmm1, [r9+16]
  14147. movdqu xmm2, [r9+32]
  14148. movdqu xmm3, [r9+48]
  14149. pand xmm0, xmm12
  14150. pand xmm1, xmm12
  14151. pand xmm2, xmm12
  14152. pand xmm3, xmm12
  14153. por xmm4, xmm0
  14154. por xmm5, xmm1
  14155. por xmm6, xmm2
  14156. por xmm7, xmm3
  14157. paddd xmm13, xmm11
  14158. ; ENTRY: 57
  14159. mov r9, QWORD PTR [rdx+456]
  14160. movdqu xmm12, xmm13
  14161. pcmpeqd xmm12, xmm10
  14162. movdqu xmm0, [r9]
  14163. movdqu xmm1, [r9+16]
  14164. movdqu xmm2, [r9+32]
  14165. movdqu xmm3, [r9+48]
  14166. pand xmm0, xmm12
  14167. pand xmm1, xmm12
  14168. pand xmm2, xmm12
  14169. pand xmm3, xmm12
  14170. por xmm4, xmm0
  14171. por xmm5, xmm1
  14172. por xmm6, xmm2
  14173. por xmm7, xmm3
  14174. paddd xmm13, xmm11
  14175. ; ENTRY: 58
  14176. mov r9, QWORD PTR [rdx+464]
  14177. movdqu xmm12, xmm13
  14178. pcmpeqd xmm12, xmm10
  14179. movdqu xmm0, [r9]
  14180. movdqu xmm1, [r9+16]
  14181. movdqu xmm2, [r9+32]
  14182. movdqu xmm3, [r9+48]
  14183. pand xmm0, xmm12
  14184. pand xmm1, xmm12
  14185. pand xmm2, xmm12
  14186. pand xmm3, xmm12
  14187. por xmm4, xmm0
  14188. por xmm5, xmm1
  14189. por xmm6, xmm2
  14190. por xmm7, xmm3
  14191. paddd xmm13, xmm11
  14192. ; ENTRY: 59
  14193. mov r9, QWORD PTR [rdx+472]
  14194. movdqu xmm12, xmm13
  14195. pcmpeqd xmm12, xmm10
  14196. movdqu xmm0, [r9]
  14197. movdqu xmm1, [r9+16]
  14198. movdqu xmm2, [r9+32]
  14199. movdqu xmm3, [r9+48]
  14200. pand xmm0, xmm12
  14201. pand xmm1, xmm12
  14202. pand xmm2, xmm12
  14203. pand xmm3, xmm12
  14204. por xmm4, xmm0
  14205. por xmm5, xmm1
  14206. por xmm6, xmm2
  14207. por xmm7, xmm3
  14208. paddd xmm13, xmm11
  14209. ; ENTRY: 60
  14210. mov r9, QWORD PTR [rdx+480]
  14211. movdqu xmm12, xmm13
  14212. pcmpeqd xmm12, xmm10
  14213. movdqu xmm0, [r9]
  14214. movdqu xmm1, [r9+16]
  14215. movdqu xmm2, [r9+32]
  14216. movdqu xmm3, [r9+48]
  14217. pand xmm0, xmm12
  14218. pand xmm1, xmm12
  14219. pand xmm2, xmm12
  14220. pand xmm3, xmm12
  14221. por xmm4, xmm0
  14222. por xmm5, xmm1
  14223. por xmm6, xmm2
  14224. por xmm7, xmm3
  14225. paddd xmm13, xmm11
  14226. ; ENTRY: 61
  14227. mov r9, QWORD PTR [rdx+488]
  14228. movdqu xmm12, xmm13
  14229. pcmpeqd xmm12, xmm10
  14230. movdqu xmm0, [r9]
  14231. movdqu xmm1, [r9+16]
  14232. movdqu xmm2, [r9+32]
  14233. movdqu xmm3, [r9+48]
  14234. pand xmm0, xmm12
  14235. pand xmm1, xmm12
  14236. pand xmm2, xmm12
  14237. pand xmm3, xmm12
  14238. por xmm4, xmm0
  14239. por xmm5, xmm1
  14240. por xmm6, xmm2
  14241. por xmm7, xmm3
  14242. paddd xmm13, xmm11
  14243. ; ENTRY: 62
  14244. mov r9, QWORD PTR [rdx+496]
  14245. movdqu xmm12, xmm13
  14246. pcmpeqd xmm12, xmm10
  14247. movdqu xmm0, [r9]
  14248. movdqu xmm1, [r9+16]
  14249. movdqu xmm2, [r9+32]
  14250. movdqu xmm3, [r9+48]
  14251. pand xmm0, xmm12
  14252. pand xmm1, xmm12
  14253. pand xmm2, xmm12
  14254. pand xmm3, xmm12
  14255. por xmm4, xmm0
  14256. por xmm5, xmm1
  14257. por xmm6, xmm2
  14258. por xmm7, xmm3
  14259. paddd xmm13, xmm11
  14260. ; ENTRY: 63
  14261. mov r9, QWORD PTR [rdx+504]
  14262. movdqu xmm12, xmm13
  14263. pcmpeqd xmm12, xmm10
  14264. movdqu xmm0, [r9]
  14265. movdqu xmm1, [r9+16]
  14266. movdqu xmm2, [r9+32]
  14267. movdqu xmm3, [r9+48]
  14268. pand xmm0, xmm12
  14269. pand xmm1, xmm12
  14270. pand xmm2, xmm12
  14271. pand xmm3, xmm12
  14272. por xmm4, xmm0
  14273. por xmm5, xmm1
  14274. por xmm6, xmm2
  14275. por xmm7, xmm3
  14276. paddd xmm13, xmm11
  14277. movdqu [rcx], xmm4
  14278. movdqu [rcx+16], xmm5
  14279. movdqu [rcx+32], xmm6
  14280. movdqu [rcx+48], xmm7
  14281. add rcx, 64
  14282. ; END: 0-7
  14283. ; START: 8-15
  14284. pxor xmm13, xmm13
  14285. pxor xmm4, xmm4
  14286. pxor xmm5, xmm5
  14287. pxor xmm6, xmm6
  14288. pxor xmm7, xmm7
  14289. ; ENTRY: 0
  14290. mov r9, QWORD PTR [rdx]
  14291. add r9, 64
  14292. movdqu xmm12, xmm13
  14293. pcmpeqd xmm12, xmm10
  14294. movdqu xmm0, [r9]
  14295. movdqu xmm1, [r9+16]
  14296. movdqu xmm2, [r9+32]
  14297. movdqu xmm3, [r9+48]
  14298. pand xmm0, xmm12
  14299. pand xmm1, xmm12
  14300. pand xmm2, xmm12
  14301. pand xmm3, xmm12
  14302. por xmm4, xmm0
  14303. por xmm5, xmm1
  14304. por xmm6, xmm2
  14305. por xmm7, xmm3
  14306. paddd xmm13, xmm11
  14307. ; ENTRY: 1
  14308. mov r9, QWORD PTR [rdx+8]
  14309. add r9, 64
  14310. movdqu xmm12, xmm13
  14311. pcmpeqd xmm12, xmm10
  14312. movdqu xmm0, [r9]
  14313. movdqu xmm1, [r9+16]
  14314. movdqu xmm2, [r9+32]
  14315. movdqu xmm3, [r9+48]
  14316. pand xmm0, xmm12
  14317. pand xmm1, xmm12
  14318. pand xmm2, xmm12
  14319. pand xmm3, xmm12
  14320. por xmm4, xmm0
  14321. por xmm5, xmm1
  14322. por xmm6, xmm2
  14323. por xmm7, xmm3
  14324. paddd xmm13, xmm11
  14325. ; ENTRY: 2
  14326. mov r9, QWORD PTR [rdx+16]
  14327. add r9, 64
  14328. movdqu xmm12, xmm13
  14329. pcmpeqd xmm12, xmm10
  14330. movdqu xmm0, [r9]
  14331. movdqu xmm1, [r9+16]
  14332. movdqu xmm2, [r9+32]
  14333. movdqu xmm3, [r9+48]
  14334. pand xmm0, xmm12
  14335. pand xmm1, xmm12
  14336. pand xmm2, xmm12
  14337. pand xmm3, xmm12
  14338. por xmm4, xmm0
  14339. por xmm5, xmm1
  14340. por xmm6, xmm2
  14341. por xmm7, xmm3
  14342. paddd xmm13, xmm11
  14343. ; ENTRY: 3
  14344. mov r9, QWORD PTR [rdx+24]
  14345. add r9, 64
  14346. movdqu xmm12, xmm13
  14347. pcmpeqd xmm12, xmm10
  14348. movdqu xmm0, [r9]
  14349. movdqu xmm1, [r9+16]
  14350. movdqu xmm2, [r9+32]
  14351. movdqu xmm3, [r9+48]
  14352. pand xmm0, xmm12
  14353. pand xmm1, xmm12
  14354. pand xmm2, xmm12
  14355. pand xmm3, xmm12
  14356. por xmm4, xmm0
  14357. por xmm5, xmm1
  14358. por xmm6, xmm2
  14359. por xmm7, xmm3
  14360. paddd xmm13, xmm11
  14361. ; ENTRY: 4
  14362. mov r9, QWORD PTR [rdx+32]
  14363. add r9, 64
  14364. movdqu xmm12, xmm13
  14365. pcmpeqd xmm12, xmm10
  14366. movdqu xmm0, [r9]
  14367. movdqu xmm1, [r9+16]
  14368. movdqu xmm2, [r9+32]
  14369. movdqu xmm3, [r9+48]
  14370. pand xmm0, xmm12
  14371. pand xmm1, xmm12
  14372. pand xmm2, xmm12
  14373. pand xmm3, xmm12
  14374. por xmm4, xmm0
  14375. por xmm5, xmm1
  14376. por xmm6, xmm2
  14377. por xmm7, xmm3
  14378. paddd xmm13, xmm11
  14379. ; ENTRY: 5
  14380. mov r9, QWORD PTR [rdx+40]
  14381. add r9, 64
  14382. movdqu xmm12, xmm13
  14383. pcmpeqd xmm12, xmm10
  14384. movdqu xmm0, [r9]
  14385. movdqu xmm1, [r9+16]
  14386. movdqu xmm2, [r9+32]
  14387. movdqu xmm3, [r9+48]
  14388. pand xmm0, xmm12
  14389. pand xmm1, xmm12
  14390. pand xmm2, xmm12
  14391. pand xmm3, xmm12
  14392. por xmm4, xmm0
  14393. por xmm5, xmm1
  14394. por xmm6, xmm2
  14395. por xmm7, xmm3
  14396. paddd xmm13, xmm11
  14397. ; ENTRY: 6
  14398. mov r9, QWORD PTR [rdx+48]
  14399. add r9, 64
  14400. movdqu xmm12, xmm13
  14401. pcmpeqd xmm12, xmm10
  14402. movdqu xmm0, [r9]
  14403. movdqu xmm1, [r9+16]
  14404. movdqu xmm2, [r9+32]
  14405. movdqu xmm3, [r9+48]
  14406. pand xmm0, xmm12
  14407. pand xmm1, xmm12
  14408. pand xmm2, xmm12
  14409. pand xmm3, xmm12
  14410. por xmm4, xmm0
  14411. por xmm5, xmm1
  14412. por xmm6, xmm2
  14413. por xmm7, xmm3
  14414. paddd xmm13, xmm11
  14415. ; ENTRY: 7
  14416. mov r9, QWORD PTR [rdx+56]
  14417. add r9, 64
  14418. movdqu xmm12, xmm13
  14419. pcmpeqd xmm12, xmm10
  14420. movdqu xmm0, [r9]
  14421. movdqu xmm1, [r9+16]
  14422. movdqu xmm2, [r9+32]
  14423. movdqu xmm3, [r9+48]
  14424. pand xmm0, xmm12
  14425. pand xmm1, xmm12
  14426. pand xmm2, xmm12
  14427. pand xmm3, xmm12
  14428. por xmm4, xmm0
  14429. por xmm5, xmm1
  14430. por xmm6, xmm2
  14431. por xmm7, xmm3
  14432. paddd xmm13, xmm11
  14433. ; ENTRY: 8
  14434. mov r9, QWORD PTR [rdx+64]
  14435. add r9, 64
  14436. movdqu xmm12, xmm13
  14437. pcmpeqd xmm12, xmm10
  14438. movdqu xmm0, [r9]
  14439. movdqu xmm1, [r9+16]
  14440. movdqu xmm2, [r9+32]
  14441. movdqu xmm3, [r9+48]
  14442. pand xmm0, xmm12
  14443. pand xmm1, xmm12
  14444. pand xmm2, xmm12
  14445. pand xmm3, xmm12
  14446. por xmm4, xmm0
  14447. por xmm5, xmm1
  14448. por xmm6, xmm2
  14449. por xmm7, xmm3
  14450. paddd xmm13, xmm11
  14451. ; ENTRY: 9
  14452. mov r9, QWORD PTR [rdx+72]
  14453. add r9, 64
  14454. movdqu xmm12, xmm13
  14455. pcmpeqd xmm12, xmm10
  14456. movdqu xmm0, [r9]
  14457. movdqu xmm1, [r9+16]
  14458. movdqu xmm2, [r9+32]
  14459. movdqu xmm3, [r9+48]
  14460. pand xmm0, xmm12
  14461. pand xmm1, xmm12
  14462. pand xmm2, xmm12
  14463. pand xmm3, xmm12
  14464. por xmm4, xmm0
  14465. por xmm5, xmm1
  14466. por xmm6, xmm2
  14467. por xmm7, xmm3
  14468. paddd xmm13, xmm11
  14469. ; ENTRY: 10
  14470. mov r9, QWORD PTR [rdx+80]
  14471. add r9, 64
  14472. movdqu xmm12, xmm13
  14473. pcmpeqd xmm12, xmm10
  14474. movdqu xmm0, [r9]
  14475. movdqu xmm1, [r9+16]
  14476. movdqu xmm2, [r9+32]
  14477. movdqu xmm3, [r9+48]
  14478. pand xmm0, xmm12
  14479. pand xmm1, xmm12
  14480. pand xmm2, xmm12
  14481. pand xmm3, xmm12
  14482. por xmm4, xmm0
  14483. por xmm5, xmm1
  14484. por xmm6, xmm2
  14485. por xmm7, xmm3
  14486. paddd xmm13, xmm11
  14487. ; ENTRY: 11
  14488. mov r9, QWORD PTR [rdx+88]
  14489. add r9, 64
  14490. movdqu xmm12, xmm13
  14491. pcmpeqd xmm12, xmm10
  14492. movdqu xmm0, [r9]
  14493. movdqu xmm1, [r9+16]
  14494. movdqu xmm2, [r9+32]
  14495. movdqu xmm3, [r9+48]
  14496. pand xmm0, xmm12
  14497. pand xmm1, xmm12
  14498. pand xmm2, xmm12
  14499. pand xmm3, xmm12
  14500. por xmm4, xmm0
  14501. por xmm5, xmm1
  14502. por xmm6, xmm2
  14503. por xmm7, xmm3
  14504. paddd xmm13, xmm11
  14505. ; ENTRY: 12
  14506. mov r9, QWORD PTR [rdx+96]
  14507. add r9, 64
  14508. movdqu xmm12, xmm13
  14509. pcmpeqd xmm12, xmm10
  14510. movdqu xmm0, [r9]
  14511. movdqu xmm1, [r9+16]
  14512. movdqu xmm2, [r9+32]
  14513. movdqu xmm3, [r9+48]
  14514. pand xmm0, xmm12
  14515. pand xmm1, xmm12
  14516. pand xmm2, xmm12
  14517. pand xmm3, xmm12
  14518. por xmm4, xmm0
  14519. por xmm5, xmm1
  14520. por xmm6, xmm2
  14521. por xmm7, xmm3
  14522. paddd xmm13, xmm11
  14523. ; ENTRY: 13
  14524. mov r9, QWORD PTR [rdx+104]
  14525. add r9, 64
  14526. movdqu xmm12, xmm13
  14527. pcmpeqd xmm12, xmm10
  14528. movdqu xmm0, [r9]
  14529. movdqu xmm1, [r9+16]
  14530. movdqu xmm2, [r9+32]
  14531. movdqu xmm3, [r9+48]
  14532. pand xmm0, xmm12
  14533. pand xmm1, xmm12
  14534. pand xmm2, xmm12
  14535. pand xmm3, xmm12
  14536. por xmm4, xmm0
  14537. por xmm5, xmm1
  14538. por xmm6, xmm2
  14539. por xmm7, xmm3
  14540. paddd xmm13, xmm11
  14541. ; ENTRY: 14
  14542. mov r9, QWORD PTR [rdx+112]
  14543. add r9, 64
  14544. movdqu xmm12, xmm13
  14545. pcmpeqd xmm12, xmm10
  14546. movdqu xmm0, [r9]
  14547. movdqu xmm1, [r9+16]
  14548. movdqu xmm2, [r9+32]
  14549. movdqu xmm3, [r9+48]
  14550. pand xmm0, xmm12
  14551. pand xmm1, xmm12
  14552. pand xmm2, xmm12
  14553. pand xmm3, xmm12
  14554. por xmm4, xmm0
  14555. por xmm5, xmm1
  14556. por xmm6, xmm2
  14557. por xmm7, xmm3
  14558. paddd xmm13, xmm11
  14559. ; ENTRY: 15
  14560. mov r9, QWORD PTR [rdx+120]
  14561. add r9, 64
  14562. movdqu xmm12, xmm13
  14563. pcmpeqd xmm12, xmm10
  14564. movdqu xmm0, [r9]
  14565. movdqu xmm1, [r9+16]
  14566. movdqu xmm2, [r9+32]
  14567. movdqu xmm3, [r9+48]
  14568. pand xmm0, xmm12
  14569. pand xmm1, xmm12
  14570. pand xmm2, xmm12
  14571. pand xmm3, xmm12
  14572. por xmm4, xmm0
  14573. por xmm5, xmm1
  14574. por xmm6, xmm2
  14575. por xmm7, xmm3
  14576. paddd xmm13, xmm11
  14577. ; ENTRY: 16
  14578. mov r9, QWORD PTR [rdx+128]
  14579. add r9, 64
  14580. movdqu xmm12, xmm13
  14581. pcmpeqd xmm12, xmm10
  14582. movdqu xmm0, [r9]
  14583. movdqu xmm1, [r9+16]
  14584. movdqu xmm2, [r9+32]
  14585. movdqu xmm3, [r9+48]
  14586. pand xmm0, xmm12
  14587. pand xmm1, xmm12
  14588. pand xmm2, xmm12
  14589. pand xmm3, xmm12
  14590. por xmm4, xmm0
  14591. por xmm5, xmm1
  14592. por xmm6, xmm2
  14593. por xmm7, xmm3
  14594. paddd xmm13, xmm11
  14595. ; ENTRY: 17
  14596. mov r9, QWORD PTR [rdx+136]
  14597. add r9, 64
  14598. movdqu xmm12, xmm13
  14599. pcmpeqd xmm12, xmm10
  14600. movdqu xmm0, [r9]
  14601. movdqu xmm1, [r9+16]
  14602. movdqu xmm2, [r9+32]
  14603. movdqu xmm3, [r9+48]
  14604. pand xmm0, xmm12
  14605. pand xmm1, xmm12
  14606. pand xmm2, xmm12
  14607. pand xmm3, xmm12
  14608. por xmm4, xmm0
  14609. por xmm5, xmm1
  14610. por xmm6, xmm2
  14611. por xmm7, xmm3
  14612. paddd xmm13, xmm11
  14613. ; ENTRY: 18
  14614. mov r9, QWORD PTR [rdx+144]
  14615. add r9, 64
  14616. movdqu xmm12, xmm13
  14617. pcmpeqd xmm12, xmm10
  14618. movdqu xmm0, [r9]
  14619. movdqu xmm1, [r9+16]
  14620. movdqu xmm2, [r9+32]
  14621. movdqu xmm3, [r9+48]
  14622. pand xmm0, xmm12
  14623. pand xmm1, xmm12
  14624. pand xmm2, xmm12
  14625. pand xmm3, xmm12
  14626. por xmm4, xmm0
  14627. por xmm5, xmm1
  14628. por xmm6, xmm2
  14629. por xmm7, xmm3
  14630. paddd xmm13, xmm11
  14631. ; ENTRY: 19
  14632. mov r9, QWORD PTR [rdx+152]
  14633. add r9, 64
  14634. movdqu xmm12, xmm13
  14635. pcmpeqd xmm12, xmm10
  14636. movdqu xmm0, [r9]
  14637. movdqu xmm1, [r9+16]
  14638. movdqu xmm2, [r9+32]
  14639. movdqu xmm3, [r9+48]
  14640. pand xmm0, xmm12
  14641. pand xmm1, xmm12
  14642. pand xmm2, xmm12
  14643. pand xmm3, xmm12
  14644. por xmm4, xmm0
  14645. por xmm5, xmm1
  14646. por xmm6, xmm2
  14647. por xmm7, xmm3
  14648. paddd xmm13, xmm11
  14649. ; ENTRY: 20
  14650. mov r9, QWORD PTR [rdx+160]
  14651. add r9, 64
  14652. movdqu xmm12, xmm13
  14653. pcmpeqd xmm12, xmm10
  14654. movdqu xmm0, [r9]
  14655. movdqu xmm1, [r9+16]
  14656. movdqu xmm2, [r9+32]
  14657. movdqu xmm3, [r9+48]
  14658. pand xmm0, xmm12
  14659. pand xmm1, xmm12
  14660. pand xmm2, xmm12
  14661. pand xmm3, xmm12
  14662. por xmm4, xmm0
  14663. por xmm5, xmm1
  14664. por xmm6, xmm2
  14665. por xmm7, xmm3
  14666. paddd xmm13, xmm11
  14667. ; ENTRY: 21
  14668. mov r9, QWORD PTR [rdx+168]
  14669. add r9, 64
  14670. movdqu xmm12, xmm13
  14671. pcmpeqd xmm12, xmm10
  14672. movdqu xmm0, [r9]
  14673. movdqu xmm1, [r9+16]
  14674. movdqu xmm2, [r9+32]
  14675. movdqu xmm3, [r9+48]
  14676. pand xmm0, xmm12
  14677. pand xmm1, xmm12
  14678. pand xmm2, xmm12
  14679. pand xmm3, xmm12
  14680. por xmm4, xmm0
  14681. por xmm5, xmm1
  14682. por xmm6, xmm2
  14683. por xmm7, xmm3
  14684. paddd xmm13, xmm11
  14685. ; ENTRY: 22
  14686. mov r9, QWORD PTR [rdx+176]
  14687. add r9, 64
  14688. movdqu xmm12, xmm13
  14689. pcmpeqd xmm12, xmm10
  14690. movdqu xmm0, [r9]
  14691. movdqu xmm1, [r9+16]
  14692. movdqu xmm2, [r9+32]
  14693. movdqu xmm3, [r9+48]
  14694. pand xmm0, xmm12
  14695. pand xmm1, xmm12
  14696. pand xmm2, xmm12
  14697. pand xmm3, xmm12
  14698. por xmm4, xmm0
  14699. por xmm5, xmm1
  14700. por xmm6, xmm2
  14701. por xmm7, xmm3
  14702. paddd xmm13, xmm11
  14703. ; ENTRY: 23
  14704. mov r9, QWORD PTR [rdx+184]
  14705. add r9, 64
  14706. movdqu xmm12, xmm13
  14707. pcmpeqd xmm12, xmm10
  14708. movdqu xmm0, [r9]
  14709. movdqu xmm1, [r9+16]
  14710. movdqu xmm2, [r9+32]
  14711. movdqu xmm3, [r9+48]
  14712. pand xmm0, xmm12
  14713. pand xmm1, xmm12
  14714. pand xmm2, xmm12
  14715. pand xmm3, xmm12
  14716. por xmm4, xmm0
  14717. por xmm5, xmm1
  14718. por xmm6, xmm2
  14719. por xmm7, xmm3
  14720. paddd xmm13, xmm11
  14721. ; ENTRY: 24
  14722. mov r9, QWORD PTR [rdx+192]
  14723. add r9, 64
  14724. movdqu xmm12, xmm13
  14725. pcmpeqd xmm12, xmm10
  14726. movdqu xmm0, [r9]
  14727. movdqu xmm1, [r9+16]
  14728. movdqu xmm2, [r9+32]
  14729. movdqu xmm3, [r9+48]
  14730. pand xmm0, xmm12
  14731. pand xmm1, xmm12
  14732. pand xmm2, xmm12
  14733. pand xmm3, xmm12
  14734. por xmm4, xmm0
  14735. por xmm5, xmm1
  14736. por xmm6, xmm2
  14737. por xmm7, xmm3
  14738. paddd xmm13, xmm11
  14739. ; ENTRY: 25
  14740. mov r9, QWORD PTR [rdx+200]
  14741. add r9, 64
  14742. movdqu xmm12, xmm13
  14743. pcmpeqd xmm12, xmm10
  14744. movdqu xmm0, [r9]
  14745. movdqu xmm1, [r9+16]
  14746. movdqu xmm2, [r9+32]
  14747. movdqu xmm3, [r9+48]
  14748. pand xmm0, xmm12
  14749. pand xmm1, xmm12
  14750. pand xmm2, xmm12
  14751. pand xmm3, xmm12
  14752. por xmm4, xmm0
  14753. por xmm5, xmm1
  14754. por xmm6, xmm2
  14755. por xmm7, xmm3
  14756. paddd xmm13, xmm11
  14757. ; ENTRY: 26
  14758. mov r9, QWORD PTR [rdx+208]
  14759. add r9, 64
  14760. movdqu xmm12, xmm13
  14761. pcmpeqd xmm12, xmm10
  14762. movdqu xmm0, [r9]
  14763. movdqu xmm1, [r9+16]
  14764. movdqu xmm2, [r9+32]
  14765. movdqu xmm3, [r9+48]
  14766. pand xmm0, xmm12
  14767. pand xmm1, xmm12
  14768. pand xmm2, xmm12
  14769. pand xmm3, xmm12
  14770. por xmm4, xmm0
  14771. por xmm5, xmm1
  14772. por xmm6, xmm2
  14773. por xmm7, xmm3
  14774. paddd xmm13, xmm11
  14775. ; ENTRY: 27
  14776. mov r9, QWORD PTR [rdx+216]
  14777. add r9, 64
  14778. movdqu xmm12, xmm13
  14779. pcmpeqd xmm12, xmm10
  14780. movdqu xmm0, [r9]
  14781. movdqu xmm1, [r9+16]
  14782. movdqu xmm2, [r9+32]
  14783. movdqu xmm3, [r9+48]
  14784. pand xmm0, xmm12
  14785. pand xmm1, xmm12
  14786. pand xmm2, xmm12
  14787. pand xmm3, xmm12
  14788. por xmm4, xmm0
  14789. por xmm5, xmm1
  14790. por xmm6, xmm2
  14791. por xmm7, xmm3
  14792. paddd xmm13, xmm11
  14793. ; ENTRY: 28
  14794. mov r9, QWORD PTR [rdx+224]
  14795. add r9, 64
  14796. movdqu xmm12, xmm13
  14797. pcmpeqd xmm12, xmm10
  14798. movdqu xmm0, [r9]
  14799. movdqu xmm1, [r9+16]
  14800. movdqu xmm2, [r9+32]
  14801. movdqu xmm3, [r9+48]
  14802. pand xmm0, xmm12
  14803. pand xmm1, xmm12
  14804. pand xmm2, xmm12
  14805. pand xmm3, xmm12
  14806. por xmm4, xmm0
  14807. por xmm5, xmm1
  14808. por xmm6, xmm2
  14809. por xmm7, xmm3
  14810. paddd xmm13, xmm11
  14811. ; ENTRY: 29
  14812. mov r9, QWORD PTR [rdx+232]
  14813. add r9, 64
  14814. movdqu xmm12, xmm13
  14815. pcmpeqd xmm12, xmm10
  14816. movdqu xmm0, [r9]
  14817. movdqu xmm1, [r9+16]
  14818. movdqu xmm2, [r9+32]
  14819. movdqu xmm3, [r9+48]
  14820. pand xmm0, xmm12
  14821. pand xmm1, xmm12
  14822. pand xmm2, xmm12
  14823. pand xmm3, xmm12
  14824. por xmm4, xmm0
  14825. por xmm5, xmm1
  14826. por xmm6, xmm2
  14827. por xmm7, xmm3
  14828. paddd xmm13, xmm11
  14829. ; ENTRY: 30
  14830. mov r9, QWORD PTR [rdx+240]
  14831. add r9, 64
  14832. movdqu xmm12, xmm13
  14833. pcmpeqd xmm12, xmm10
  14834. movdqu xmm0, [r9]
  14835. movdqu xmm1, [r9+16]
  14836. movdqu xmm2, [r9+32]
  14837. movdqu xmm3, [r9+48]
  14838. pand xmm0, xmm12
  14839. pand xmm1, xmm12
  14840. pand xmm2, xmm12
  14841. pand xmm3, xmm12
  14842. por xmm4, xmm0
  14843. por xmm5, xmm1
  14844. por xmm6, xmm2
  14845. por xmm7, xmm3
  14846. paddd xmm13, xmm11
  14847. ; ENTRY: 31
  14848. mov r9, QWORD PTR [rdx+248]
  14849. add r9, 64
  14850. movdqu xmm12, xmm13
  14851. pcmpeqd xmm12, xmm10
  14852. movdqu xmm0, [r9]
  14853. movdqu xmm1, [r9+16]
  14854. movdqu xmm2, [r9+32]
  14855. movdqu xmm3, [r9+48]
  14856. pand xmm0, xmm12
  14857. pand xmm1, xmm12
  14858. pand xmm2, xmm12
  14859. pand xmm3, xmm12
  14860. por xmm4, xmm0
  14861. por xmm5, xmm1
  14862. por xmm6, xmm2
  14863. por xmm7, xmm3
  14864. paddd xmm13, xmm11
  14865. ; ENTRY: 32
  14866. mov r9, QWORD PTR [rdx+256]
  14867. add r9, 64
  14868. movdqu xmm12, xmm13
  14869. pcmpeqd xmm12, xmm10
  14870. movdqu xmm0, [r9]
  14871. movdqu xmm1, [r9+16]
  14872. movdqu xmm2, [r9+32]
  14873. movdqu xmm3, [r9+48]
  14874. pand xmm0, xmm12
  14875. pand xmm1, xmm12
  14876. pand xmm2, xmm12
  14877. pand xmm3, xmm12
  14878. por xmm4, xmm0
  14879. por xmm5, xmm1
  14880. por xmm6, xmm2
  14881. por xmm7, xmm3
  14882. paddd xmm13, xmm11
  14883. ; ENTRY: 33
  14884. mov r9, QWORD PTR [rdx+264]
  14885. add r9, 64
  14886. movdqu xmm12, xmm13
  14887. pcmpeqd xmm12, xmm10
  14888. movdqu xmm0, [r9]
  14889. movdqu xmm1, [r9+16]
  14890. movdqu xmm2, [r9+32]
  14891. movdqu xmm3, [r9+48]
  14892. pand xmm0, xmm12
  14893. pand xmm1, xmm12
  14894. pand xmm2, xmm12
  14895. pand xmm3, xmm12
  14896. por xmm4, xmm0
  14897. por xmm5, xmm1
  14898. por xmm6, xmm2
  14899. por xmm7, xmm3
  14900. paddd xmm13, xmm11
  14901. ; ENTRY: 34
  14902. mov r9, QWORD PTR [rdx+272]
  14903. add r9, 64
  14904. movdqu xmm12, xmm13
  14905. pcmpeqd xmm12, xmm10
  14906. movdqu xmm0, [r9]
  14907. movdqu xmm1, [r9+16]
  14908. movdqu xmm2, [r9+32]
  14909. movdqu xmm3, [r9+48]
  14910. pand xmm0, xmm12
  14911. pand xmm1, xmm12
  14912. pand xmm2, xmm12
  14913. pand xmm3, xmm12
  14914. por xmm4, xmm0
  14915. por xmm5, xmm1
  14916. por xmm6, xmm2
  14917. por xmm7, xmm3
  14918. paddd xmm13, xmm11
  14919. ; ENTRY: 35
  14920. mov r9, QWORD PTR [rdx+280]
  14921. add r9, 64
  14922. movdqu xmm12, xmm13
  14923. pcmpeqd xmm12, xmm10
  14924. movdqu xmm0, [r9]
  14925. movdqu xmm1, [r9+16]
  14926. movdqu xmm2, [r9+32]
  14927. movdqu xmm3, [r9+48]
  14928. pand xmm0, xmm12
  14929. pand xmm1, xmm12
  14930. pand xmm2, xmm12
  14931. pand xmm3, xmm12
  14932. por xmm4, xmm0
  14933. por xmm5, xmm1
  14934. por xmm6, xmm2
  14935. por xmm7, xmm3
  14936. paddd xmm13, xmm11
  14937. ; ENTRY: 36
  14938. mov r9, QWORD PTR [rdx+288]
  14939. add r9, 64
  14940. movdqu xmm12, xmm13
  14941. pcmpeqd xmm12, xmm10
  14942. movdqu xmm0, [r9]
  14943. movdqu xmm1, [r9+16]
  14944. movdqu xmm2, [r9+32]
  14945. movdqu xmm3, [r9+48]
  14946. pand xmm0, xmm12
  14947. pand xmm1, xmm12
  14948. pand xmm2, xmm12
  14949. pand xmm3, xmm12
  14950. por xmm4, xmm0
  14951. por xmm5, xmm1
  14952. por xmm6, xmm2
  14953. por xmm7, xmm3
  14954. paddd xmm13, xmm11
  14955. ; ENTRY: 37
  14956. mov r9, QWORD PTR [rdx+296]
  14957. add r9, 64
  14958. movdqu xmm12, xmm13
  14959. pcmpeqd xmm12, xmm10
  14960. movdqu xmm0, [r9]
  14961. movdqu xmm1, [r9+16]
  14962. movdqu xmm2, [r9+32]
  14963. movdqu xmm3, [r9+48]
  14964. pand xmm0, xmm12
  14965. pand xmm1, xmm12
  14966. pand xmm2, xmm12
  14967. pand xmm3, xmm12
  14968. por xmm4, xmm0
  14969. por xmm5, xmm1
  14970. por xmm6, xmm2
  14971. por xmm7, xmm3
  14972. paddd xmm13, xmm11
  14973. ; ENTRY: 38
  14974. mov r9, QWORD PTR [rdx+304]
  14975. add r9, 64
  14976. movdqu xmm12, xmm13
  14977. pcmpeqd xmm12, xmm10
  14978. movdqu xmm0, [r9]
  14979. movdqu xmm1, [r9+16]
  14980. movdqu xmm2, [r9+32]
  14981. movdqu xmm3, [r9+48]
  14982. pand xmm0, xmm12
  14983. pand xmm1, xmm12
  14984. pand xmm2, xmm12
  14985. pand xmm3, xmm12
  14986. por xmm4, xmm0
  14987. por xmm5, xmm1
  14988. por xmm6, xmm2
  14989. por xmm7, xmm3
  14990. paddd xmm13, xmm11
  14991. ; ENTRY: 39
  14992. mov r9, QWORD PTR [rdx+312]
  14993. add r9, 64
  14994. movdqu xmm12, xmm13
  14995. pcmpeqd xmm12, xmm10
  14996. movdqu xmm0, [r9]
  14997. movdqu xmm1, [r9+16]
  14998. movdqu xmm2, [r9+32]
  14999. movdqu xmm3, [r9+48]
  15000. pand xmm0, xmm12
  15001. pand xmm1, xmm12
  15002. pand xmm2, xmm12
  15003. pand xmm3, xmm12
  15004. por xmm4, xmm0
  15005. por xmm5, xmm1
  15006. por xmm6, xmm2
  15007. por xmm7, xmm3
  15008. paddd xmm13, xmm11
  15009. ; ENTRY: 40
  15010. mov r9, QWORD PTR [rdx+320]
  15011. add r9, 64
  15012. movdqu xmm12, xmm13
  15013. pcmpeqd xmm12, xmm10
  15014. movdqu xmm0, [r9]
  15015. movdqu xmm1, [r9+16]
  15016. movdqu xmm2, [r9+32]
  15017. movdqu xmm3, [r9+48]
  15018. pand xmm0, xmm12
  15019. pand xmm1, xmm12
  15020. pand xmm2, xmm12
  15021. pand xmm3, xmm12
  15022. por xmm4, xmm0
  15023. por xmm5, xmm1
  15024. por xmm6, xmm2
  15025. por xmm7, xmm3
  15026. paddd xmm13, xmm11
  15027. ; ENTRY: 41
  15028. mov r9, QWORD PTR [rdx+328]
  15029. add r9, 64
  15030. movdqu xmm12, xmm13
  15031. pcmpeqd xmm12, xmm10
  15032. movdqu xmm0, [r9]
  15033. movdqu xmm1, [r9+16]
  15034. movdqu xmm2, [r9+32]
  15035. movdqu xmm3, [r9+48]
  15036. pand xmm0, xmm12
  15037. pand xmm1, xmm12
  15038. pand xmm2, xmm12
  15039. pand xmm3, xmm12
  15040. por xmm4, xmm0
  15041. por xmm5, xmm1
  15042. por xmm6, xmm2
  15043. por xmm7, xmm3
  15044. paddd xmm13, xmm11
  15045. ; ENTRY: 42
  15046. mov r9, QWORD PTR [rdx+336]
  15047. add r9, 64
  15048. movdqu xmm12, xmm13
  15049. pcmpeqd xmm12, xmm10
  15050. movdqu xmm0, [r9]
  15051. movdqu xmm1, [r9+16]
  15052. movdqu xmm2, [r9+32]
  15053. movdqu xmm3, [r9+48]
  15054. pand xmm0, xmm12
  15055. pand xmm1, xmm12
  15056. pand xmm2, xmm12
  15057. pand xmm3, xmm12
  15058. por xmm4, xmm0
  15059. por xmm5, xmm1
  15060. por xmm6, xmm2
  15061. por xmm7, xmm3
  15062. paddd xmm13, xmm11
  15063. ; ENTRY: 43
  15064. mov r9, QWORD PTR [rdx+344]
  15065. add r9, 64
  15066. movdqu xmm12, xmm13
  15067. pcmpeqd xmm12, xmm10
  15068. movdqu xmm0, [r9]
  15069. movdqu xmm1, [r9+16]
  15070. movdqu xmm2, [r9+32]
  15071. movdqu xmm3, [r9+48]
  15072. pand xmm0, xmm12
  15073. pand xmm1, xmm12
  15074. pand xmm2, xmm12
  15075. pand xmm3, xmm12
  15076. por xmm4, xmm0
  15077. por xmm5, xmm1
  15078. por xmm6, xmm2
  15079. por xmm7, xmm3
  15080. paddd xmm13, xmm11
  15081. ; ENTRY: 44
  15082. mov r9, QWORD PTR [rdx+352]
  15083. add r9, 64
  15084. movdqu xmm12, xmm13
  15085. pcmpeqd xmm12, xmm10
  15086. movdqu xmm0, [r9]
  15087. movdqu xmm1, [r9+16]
  15088. movdqu xmm2, [r9+32]
  15089. movdqu xmm3, [r9+48]
  15090. pand xmm0, xmm12
  15091. pand xmm1, xmm12
  15092. pand xmm2, xmm12
  15093. pand xmm3, xmm12
  15094. por xmm4, xmm0
  15095. por xmm5, xmm1
  15096. por xmm6, xmm2
  15097. por xmm7, xmm3
  15098. paddd xmm13, xmm11
  15099. ; ENTRY: 45
  15100. mov r9, QWORD PTR [rdx+360]
  15101. add r9, 64
  15102. movdqu xmm12, xmm13
  15103. pcmpeqd xmm12, xmm10
  15104. movdqu xmm0, [r9]
  15105. movdqu xmm1, [r9+16]
  15106. movdqu xmm2, [r9+32]
  15107. movdqu xmm3, [r9+48]
  15108. pand xmm0, xmm12
  15109. pand xmm1, xmm12
  15110. pand xmm2, xmm12
  15111. pand xmm3, xmm12
  15112. por xmm4, xmm0
  15113. por xmm5, xmm1
  15114. por xmm6, xmm2
  15115. por xmm7, xmm3
  15116. paddd xmm13, xmm11
  15117. ; ENTRY: 46
  15118. mov r9, QWORD PTR [rdx+368]
  15119. add r9, 64
  15120. movdqu xmm12, xmm13
  15121. pcmpeqd xmm12, xmm10
  15122. movdqu xmm0, [r9]
  15123. movdqu xmm1, [r9+16]
  15124. movdqu xmm2, [r9+32]
  15125. movdqu xmm3, [r9+48]
  15126. pand xmm0, xmm12
  15127. pand xmm1, xmm12
  15128. pand xmm2, xmm12
  15129. pand xmm3, xmm12
  15130. por xmm4, xmm0
  15131. por xmm5, xmm1
  15132. por xmm6, xmm2
  15133. por xmm7, xmm3
  15134. paddd xmm13, xmm11
  15135. ; ENTRY: 47
  15136. mov r9, QWORD PTR [rdx+376]
  15137. add r9, 64
  15138. movdqu xmm12, xmm13
  15139. pcmpeqd xmm12, xmm10
  15140. movdqu xmm0, [r9]
  15141. movdqu xmm1, [r9+16]
  15142. movdqu xmm2, [r9+32]
  15143. movdqu xmm3, [r9+48]
  15144. pand xmm0, xmm12
  15145. pand xmm1, xmm12
  15146. pand xmm2, xmm12
  15147. pand xmm3, xmm12
  15148. por xmm4, xmm0
  15149. por xmm5, xmm1
  15150. por xmm6, xmm2
  15151. por xmm7, xmm3
  15152. paddd xmm13, xmm11
  15153. ; ENTRY: 48
  15154. mov r9, QWORD PTR [rdx+384]
  15155. add r9, 64
  15156. movdqu xmm12, xmm13
  15157. pcmpeqd xmm12, xmm10
  15158. movdqu xmm0, [r9]
  15159. movdqu xmm1, [r9+16]
  15160. movdqu xmm2, [r9+32]
  15161. movdqu xmm3, [r9+48]
  15162. pand xmm0, xmm12
  15163. pand xmm1, xmm12
  15164. pand xmm2, xmm12
  15165. pand xmm3, xmm12
  15166. por xmm4, xmm0
  15167. por xmm5, xmm1
  15168. por xmm6, xmm2
  15169. por xmm7, xmm3
  15170. paddd xmm13, xmm11
  15171. ; ENTRY: 49
  15172. mov r9, QWORD PTR [rdx+392]
  15173. add r9, 64
  15174. movdqu xmm12, xmm13
  15175. pcmpeqd xmm12, xmm10
  15176. movdqu xmm0, [r9]
  15177. movdqu xmm1, [r9+16]
  15178. movdqu xmm2, [r9+32]
  15179. movdqu xmm3, [r9+48]
  15180. pand xmm0, xmm12
  15181. pand xmm1, xmm12
  15182. pand xmm2, xmm12
  15183. pand xmm3, xmm12
  15184. por xmm4, xmm0
  15185. por xmm5, xmm1
  15186. por xmm6, xmm2
  15187. por xmm7, xmm3
  15188. paddd xmm13, xmm11
  15189. ; ENTRY: 50
  15190. mov r9, QWORD PTR [rdx+400]
  15191. add r9, 64
  15192. movdqu xmm12, xmm13
  15193. pcmpeqd xmm12, xmm10
  15194. movdqu xmm0, [r9]
  15195. movdqu xmm1, [r9+16]
  15196. movdqu xmm2, [r9+32]
  15197. movdqu xmm3, [r9+48]
  15198. pand xmm0, xmm12
  15199. pand xmm1, xmm12
  15200. pand xmm2, xmm12
  15201. pand xmm3, xmm12
  15202. por xmm4, xmm0
  15203. por xmm5, xmm1
  15204. por xmm6, xmm2
  15205. por xmm7, xmm3
  15206. paddd xmm13, xmm11
  15207. ; ENTRY: 51
  15208. mov r9, QWORD PTR [rdx+408]
  15209. add r9, 64
  15210. movdqu xmm12, xmm13
  15211. pcmpeqd xmm12, xmm10
  15212. movdqu xmm0, [r9]
  15213. movdqu xmm1, [r9+16]
  15214. movdqu xmm2, [r9+32]
  15215. movdqu xmm3, [r9+48]
  15216. pand xmm0, xmm12
  15217. pand xmm1, xmm12
  15218. pand xmm2, xmm12
  15219. pand xmm3, xmm12
  15220. por xmm4, xmm0
  15221. por xmm5, xmm1
  15222. por xmm6, xmm2
  15223. por xmm7, xmm3
  15224. paddd xmm13, xmm11
  15225. ; ENTRY: 52
  15226. mov r9, QWORD PTR [rdx+416]
  15227. add r9, 64
  15228. movdqu xmm12, xmm13
  15229. pcmpeqd xmm12, xmm10
  15230. movdqu xmm0, [r9]
  15231. movdqu xmm1, [r9+16]
  15232. movdqu xmm2, [r9+32]
  15233. movdqu xmm3, [r9+48]
  15234. pand xmm0, xmm12
  15235. pand xmm1, xmm12
  15236. pand xmm2, xmm12
  15237. pand xmm3, xmm12
  15238. por xmm4, xmm0
  15239. por xmm5, xmm1
  15240. por xmm6, xmm2
  15241. por xmm7, xmm3
  15242. paddd xmm13, xmm11
  15243. ; ENTRY: 53
  15244. mov r9, QWORD PTR [rdx+424]
  15245. add r9, 64
  15246. movdqu xmm12, xmm13
  15247. pcmpeqd xmm12, xmm10
  15248. movdqu xmm0, [r9]
  15249. movdqu xmm1, [r9+16]
  15250. movdqu xmm2, [r9+32]
  15251. movdqu xmm3, [r9+48]
  15252. pand xmm0, xmm12
  15253. pand xmm1, xmm12
  15254. pand xmm2, xmm12
  15255. pand xmm3, xmm12
  15256. por xmm4, xmm0
  15257. por xmm5, xmm1
  15258. por xmm6, xmm2
  15259. por xmm7, xmm3
  15260. paddd xmm13, xmm11
  15261. ; ENTRY: 54
  15262. mov r9, QWORD PTR [rdx+432]
  15263. add r9, 64
  15264. movdqu xmm12, xmm13
  15265. pcmpeqd xmm12, xmm10
  15266. movdqu xmm0, [r9]
  15267. movdqu xmm1, [r9+16]
  15268. movdqu xmm2, [r9+32]
  15269. movdqu xmm3, [r9+48]
  15270. pand xmm0, xmm12
  15271. pand xmm1, xmm12
  15272. pand xmm2, xmm12
  15273. pand xmm3, xmm12
  15274. por xmm4, xmm0
  15275. por xmm5, xmm1
  15276. por xmm6, xmm2
  15277. por xmm7, xmm3
  15278. paddd xmm13, xmm11
  15279. ; ENTRY: 55
  15280. mov r9, QWORD PTR [rdx+440]
  15281. add r9, 64
  15282. movdqu xmm12, xmm13
  15283. pcmpeqd xmm12, xmm10
  15284. movdqu xmm0, [r9]
  15285. movdqu xmm1, [r9+16]
  15286. movdqu xmm2, [r9+32]
  15287. movdqu xmm3, [r9+48]
  15288. pand xmm0, xmm12
  15289. pand xmm1, xmm12
  15290. pand xmm2, xmm12
  15291. pand xmm3, xmm12
  15292. por xmm4, xmm0
  15293. por xmm5, xmm1
  15294. por xmm6, xmm2
  15295. por xmm7, xmm3
  15296. paddd xmm13, xmm11
  15297. ; ENTRY: 56
  15298. mov r9, QWORD PTR [rdx+448]
  15299. add r9, 64
  15300. movdqu xmm12, xmm13
  15301. pcmpeqd xmm12, xmm10
  15302. movdqu xmm0, [r9]
  15303. movdqu xmm1, [r9+16]
  15304. movdqu xmm2, [r9+32]
  15305. movdqu xmm3, [r9+48]
  15306. pand xmm0, xmm12
  15307. pand xmm1, xmm12
  15308. pand xmm2, xmm12
  15309. pand xmm3, xmm12
  15310. por xmm4, xmm0
  15311. por xmm5, xmm1
  15312. por xmm6, xmm2
  15313. por xmm7, xmm3
  15314. paddd xmm13, xmm11
  15315. ; ENTRY: 57
  15316. mov r9, QWORD PTR [rdx+456]
  15317. add r9, 64
  15318. movdqu xmm12, xmm13
  15319. pcmpeqd xmm12, xmm10
  15320. movdqu xmm0, [r9]
  15321. movdqu xmm1, [r9+16]
  15322. movdqu xmm2, [r9+32]
  15323. movdqu xmm3, [r9+48]
  15324. pand xmm0, xmm12
  15325. pand xmm1, xmm12
  15326. pand xmm2, xmm12
  15327. pand xmm3, xmm12
  15328. por xmm4, xmm0
  15329. por xmm5, xmm1
  15330. por xmm6, xmm2
  15331. por xmm7, xmm3
  15332. paddd xmm13, xmm11
  15333. ; ENTRY: 58
  15334. mov r9, QWORD PTR [rdx+464]
  15335. add r9, 64
  15336. movdqu xmm12, xmm13
  15337. pcmpeqd xmm12, xmm10
  15338. movdqu xmm0, [r9]
  15339. movdqu xmm1, [r9+16]
  15340. movdqu xmm2, [r9+32]
  15341. movdqu xmm3, [r9+48]
  15342. pand xmm0, xmm12
  15343. pand xmm1, xmm12
  15344. pand xmm2, xmm12
  15345. pand xmm3, xmm12
  15346. por xmm4, xmm0
  15347. por xmm5, xmm1
  15348. por xmm6, xmm2
  15349. por xmm7, xmm3
  15350. paddd xmm13, xmm11
  15351. ; ENTRY: 59
  15352. mov r9, QWORD PTR [rdx+472]
  15353. add r9, 64
  15354. movdqu xmm12, xmm13
  15355. pcmpeqd xmm12, xmm10
  15356. movdqu xmm0, [r9]
  15357. movdqu xmm1, [r9+16]
  15358. movdqu xmm2, [r9+32]
  15359. movdqu xmm3, [r9+48]
  15360. pand xmm0, xmm12
  15361. pand xmm1, xmm12
  15362. pand xmm2, xmm12
  15363. pand xmm3, xmm12
  15364. por xmm4, xmm0
  15365. por xmm5, xmm1
  15366. por xmm6, xmm2
  15367. por xmm7, xmm3
  15368. paddd xmm13, xmm11
  15369. ; ENTRY: 60
  15370. mov r9, QWORD PTR [rdx+480]
  15371. add r9, 64
  15372. movdqu xmm12, xmm13
  15373. pcmpeqd xmm12, xmm10
  15374. movdqu xmm0, [r9]
  15375. movdqu xmm1, [r9+16]
  15376. movdqu xmm2, [r9+32]
  15377. movdqu xmm3, [r9+48]
  15378. pand xmm0, xmm12
  15379. pand xmm1, xmm12
  15380. pand xmm2, xmm12
  15381. pand xmm3, xmm12
  15382. por xmm4, xmm0
  15383. por xmm5, xmm1
  15384. por xmm6, xmm2
  15385. por xmm7, xmm3
  15386. paddd xmm13, xmm11
  15387. ; ENTRY: 61
  15388. mov r9, QWORD PTR [rdx+488]
  15389. add r9, 64
  15390. movdqu xmm12, xmm13
  15391. pcmpeqd xmm12, xmm10
  15392. movdqu xmm0, [r9]
  15393. movdqu xmm1, [r9+16]
  15394. movdqu xmm2, [r9+32]
  15395. movdqu xmm3, [r9+48]
  15396. pand xmm0, xmm12
  15397. pand xmm1, xmm12
  15398. pand xmm2, xmm12
  15399. pand xmm3, xmm12
  15400. por xmm4, xmm0
  15401. por xmm5, xmm1
  15402. por xmm6, xmm2
  15403. por xmm7, xmm3
  15404. paddd xmm13, xmm11
  15405. ; ENTRY: 62
  15406. mov r9, QWORD PTR [rdx+496]
  15407. add r9, 64
  15408. movdqu xmm12, xmm13
  15409. pcmpeqd xmm12, xmm10
  15410. movdqu xmm0, [r9]
  15411. movdqu xmm1, [r9+16]
  15412. movdqu xmm2, [r9+32]
  15413. movdqu xmm3, [r9+48]
  15414. pand xmm0, xmm12
  15415. pand xmm1, xmm12
  15416. pand xmm2, xmm12
  15417. pand xmm3, xmm12
  15418. por xmm4, xmm0
  15419. por xmm5, xmm1
  15420. por xmm6, xmm2
  15421. por xmm7, xmm3
  15422. paddd xmm13, xmm11
  15423. ; ENTRY: 63
  15424. mov r9, QWORD PTR [rdx+504]
  15425. add r9, 64
  15426. movdqu xmm12, xmm13
  15427. pcmpeqd xmm12, xmm10
  15428. movdqu xmm0, [r9]
  15429. movdqu xmm1, [r9+16]
  15430. movdqu xmm2, [r9+32]
  15431. movdqu xmm3, [r9+48]
  15432. pand xmm0, xmm12
  15433. pand xmm1, xmm12
  15434. pand xmm2, xmm12
  15435. pand xmm3, xmm12
  15436. por xmm4, xmm0
  15437. por xmm5, xmm1
  15438. por xmm6, xmm2
  15439. por xmm7, xmm3
  15440. paddd xmm13, xmm11
  15441. movdqu [rcx], xmm4
  15442. movdqu [rcx+16], xmm5
  15443. movdqu [rcx+32], xmm6
  15444. movdqu [rcx+48], xmm7
  15445. add rcx, 64
  15446. ; END: 8-15
  15447. ; START: 16-23
  15448. pxor xmm13, xmm13
  15449. pxor xmm4, xmm4
  15450. pxor xmm5, xmm5
  15451. pxor xmm6, xmm6
  15452. pxor xmm7, xmm7
  15453. ; ENTRY: 0
  15454. mov r9, QWORD PTR [rdx]
  15455. add r9, 128
  15456. movdqu xmm12, xmm13
  15457. pcmpeqd xmm12, xmm10
  15458. movdqu xmm0, [r9]
  15459. movdqu xmm1, [r9+16]
  15460. movdqu xmm2, [r9+32]
  15461. movdqu xmm3, [r9+48]
  15462. pand xmm0, xmm12
  15463. pand xmm1, xmm12
  15464. pand xmm2, xmm12
  15465. pand xmm3, xmm12
  15466. por xmm4, xmm0
  15467. por xmm5, xmm1
  15468. por xmm6, xmm2
  15469. por xmm7, xmm3
  15470. paddd xmm13, xmm11
  15471. ; ENTRY: 1
  15472. mov r9, QWORD PTR [rdx+8]
  15473. add r9, 128
  15474. movdqu xmm12, xmm13
  15475. pcmpeqd xmm12, xmm10
  15476. movdqu xmm0, [r9]
  15477. movdqu xmm1, [r9+16]
  15478. movdqu xmm2, [r9+32]
  15479. movdqu xmm3, [r9+48]
  15480. pand xmm0, xmm12
  15481. pand xmm1, xmm12
  15482. pand xmm2, xmm12
  15483. pand xmm3, xmm12
  15484. por xmm4, xmm0
  15485. por xmm5, xmm1
  15486. por xmm6, xmm2
  15487. por xmm7, xmm3
  15488. paddd xmm13, xmm11
  15489. ; ENTRY: 2
  15490. mov r9, QWORD PTR [rdx+16]
  15491. add r9, 128
  15492. movdqu xmm12, xmm13
  15493. pcmpeqd xmm12, xmm10
  15494. movdqu xmm0, [r9]
  15495. movdqu xmm1, [r9+16]
  15496. movdqu xmm2, [r9+32]
  15497. movdqu xmm3, [r9+48]
  15498. pand xmm0, xmm12
  15499. pand xmm1, xmm12
  15500. pand xmm2, xmm12
  15501. pand xmm3, xmm12
  15502. por xmm4, xmm0
  15503. por xmm5, xmm1
  15504. por xmm6, xmm2
  15505. por xmm7, xmm3
  15506. paddd xmm13, xmm11
  15507. ; ENTRY: 3
  15508. mov r9, QWORD PTR [rdx+24]
  15509. add r9, 128
  15510. movdqu xmm12, xmm13
  15511. pcmpeqd xmm12, xmm10
  15512. movdqu xmm0, [r9]
  15513. movdqu xmm1, [r9+16]
  15514. movdqu xmm2, [r9+32]
  15515. movdqu xmm3, [r9+48]
  15516. pand xmm0, xmm12
  15517. pand xmm1, xmm12
  15518. pand xmm2, xmm12
  15519. pand xmm3, xmm12
  15520. por xmm4, xmm0
  15521. por xmm5, xmm1
  15522. por xmm6, xmm2
  15523. por xmm7, xmm3
  15524. paddd xmm13, xmm11
  15525. ; ENTRY: 4
  15526. mov r9, QWORD PTR [rdx+32]
  15527. add r9, 128
  15528. movdqu xmm12, xmm13
  15529. pcmpeqd xmm12, xmm10
  15530. movdqu xmm0, [r9]
  15531. movdqu xmm1, [r9+16]
  15532. movdqu xmm2, [r9+32]
  15533. movdqu xmm3, [r9+48]
  15534. pand xmm0, xmm12
  15535. pand xmm1, xmm12
  15536. pand xmm2, xmm12
  15537. pand xmm3, xmm12
  15538. por xmm4, xmm0
  15539. por xmm5, xmm1
  15540. por xmm6, xmm2
  15541. por xmm7, xmm3
  15542. paddd xmm13, xmm11
  15543. ; ENTRY: 5
  15544. mov r9, QWORD PTR [rdx+40]
  15545. add r9, 128
  15546. movdqu xmm12, xmm13
  15547. pcmpeqd xmm12, xmm10
  15548. movdqu xmm0, [r9]
  15549. movdqu xmm1, [r9+16]
  15550. movdqu xmm2, [r9+32]
  15551. movdqu xmm3, [r9+48]
  15552. pand xmm0, xmm12
  15553. pand xmm1, xmm12
  15554. pand xmm2, xmm12
  15555. pand xmm3, xmm12
  15556. por xmm4, xmm0
  15557. por xmm5, xmm1
  15558. por xmm6, xmm2
  15559. por xmm7, xmm3
  15560. paddd xmm13, xmm11
  15561. ; ENTRY: 6
  15562. mov r9, QWORD PTR [rdx+48]
  15563. add r9, 128
  15564. movdqu xmm12, xmm13
  15565. pcmpeqd xmm12, xmm10
  15566. movdqu xmm0, [r9]
  15567. movdqu xmm1, [r9+16]
  15568. movdqu xmm2, [r9+32]
  15569. movdqu xmm3, [r9+48]
  15570. pand xmm0, xmm12
  15571. pand xmm1, xmm12
  15572. pand xmm2, xmm12
  15573. pand xmm3, xmm12
  15574. por xmm4, xmm0
  15575. por xmm5, xmm1
  15576. por xmm6, xmm2
  15577. por xmm7, xmm3
  15578. paddd xmm13, xmm11
  15579. ; ENTRY: 7
  15580. mov r9, QWORD PTR [rdx+56]
  15581. add r9, 128
  15582. movdqu xmm12, xmm13
  15583. pcmpeqd xmm12, xmm10
  15584. movdqu xmm0, [r9]
  15585. movdqu xmm1, [r9+16]
  15586. movdqu xmm2, [r9+32]
  15587. movdqu xmm3, [r9+48]
  15588. pand xmm0, xmm12
  15589. pand xmm1, xmm12
  15590. pand xmm2, xmm12
  15591. pand xmm3, xmm12
  15592. por xmm4, xmm0
  15593. por xmm5, xmm1
  15594. por xmm6, xmm2
  15595. por xmm7, xmm3
  15596. paddd xmm13, xmm11
  15597. ; ENTRY: 8
  15598. mov r9, QWORD PTR [rdx+64]
  15599. add r9, 128
  15600. movdqu xmm12, xmm13
  15601. pcmpeqd xmm12, xmm10
  15602. movdqu xmm0, [r9]
  15603. movdqu xmm1, [r9+16]
  15604. movdqu xmm2, [r9+32]
  15605. movdqu xmm3, [r9+48]
  15606. pand xmm0, xmm12
  15607. pand xmm1, xmm12
  15608. pand xmm2, xmm12
  15609. pand xmm3, xmm12
  15610. por xmm4, xmm0
  15611. por xmm5, xmm1
  15612. por xmm6, xmm2
  15613. por xmm7, xmm3
  15614. paddd xmm13, xmm11
  15615. ; ENTRY: 9
  15616. mov r9, QWORD PTR [rdx+72]
  15617. add r9, 128
  15618. movdqu xmm12, xmm13
  15619. pcmpeqd xmm12, xmm10
  15620. movdqu xmm0, [r9]
  15621. movdqu xmm1, [r9+16]
  15622. movdqu xmm2, [r9+32]
  15623. movdqu xmm3, [r9+48]
  15624. pand xmm0, xmm12
  15625. pand xmm1, xmm12
  15626. pand xmm2, xmm12
  15627. pand xmm3, xmm12
  15628. por xmm4, xmm0
  15629. por xmm5, xmm1
  15630. por xmm6, xmm2
  15631. por xmm7, xmm3
  15632. paddd xmm13, xmm11
  15633. ; ENTRY: 10
  15634. mov r9, QWORD PTR [rdx+80]
  15635. add r9, 128
  15636. movdqu xmm12, xmm13
  15637. pcmpeqd xmm12, xmm10
  15638. movdqu xmm0, [r9]
  15639. movdqu xmm1, [r9+16]
  15640. movdqu xmm2, [r9+32]
  15641. movdqu xmm3, [r9+48]
  15642. pand xmm0, xmm12
  15643. pand xmm1, xmm12
  15644. pand xmm2, xmm12
  15645. pand xmm3, xmm12
  15646. por xmm4, xmm0
  15647. por xmm5, xmm1
  15648. por xmm6, xmm2
  15649. por xmm7, xmm3
  15650. paddd xmm13, xmm11
  15651. ; ENTRY: 11
  15652. mov r9, QWORD PTR [rdx+88]
  15653. add r9, 128
  15654. movdqu xmm12, xmm13
  15655. pcmpeqd xmm12, xmm10
  15656. movdqu xmm0, [r9]
  15657. movdqu xmm1, [r9+16]
  15658. movdqu xmm2, [r9+32]
  15659. movdqu xmm3, [r9+48]
  15660. pand xmm0, xmm12
  15661. pand xmm1, xmm12
  15662. pand xmm2, xmm12
  15663. pand xmm3, xmm12
  15664. por xmm4, xmm0
  15665. por xmm5, xmm1
  15666. por xmm6, xmm2
  15667. por xmm7, xmm3
  15668. paddd xmm13, xmm11
  15669. ; ENTRY: 12
  15670. mov r9, QWORD PTR [rdx+96]
  15671. add r9, 128
  15672. movdqu xmm12, xmm13
  15673. pcmpeqd xmm12, xmm10
  15674. movdqu xmm0, [r9]
  15675. movdqu xmm1, [r9+16]
  15676. movdqu xmm2, [r9+32]
  15677. movdqu xmm3, [r9+48]
  15678. pand xmm0, xmm12
  15679. pand xmm1, xmm12
  15680. pand xmm2, xmm12
  15681. pand xmm3, xmm12
  15682. por xmm4, xmm0
  15683. por xmm5, xmm1
  15684. por xmm6, xmm2
  15685. por xmm7, xmm3
  15686. paddd xmm13, xmm11
  15687. ; ENTRY: 13
  15688. mov r9, QWORD PTR [rdx+104]
  15689. add r9, 128
  15690. movdqu xmm12, xmm13
  15691. pcmpeqd xmm12, xmm10
  15692. movdqu xmm0, [r9]
  15693. movdqu xmm1, [r9+16]
  15694. movdqu xmm2, [r9+32]
  15695. movdqu xmm3, [r9+48]
  15696. pand xmm0, xmm12
  15697. pand xmm1, xmm12
  15698. pand xmm2, xmm12
  15699. pand xmm3, xmm12
  15700. por xmm4, xmm0
  15701. por xmm5, xmm1
  15702. por xmm6, xmm2
  15703. por xmm7, xmm3
  15704. paddd xmm13, xmm11
  15705. ; ENTRY: 14
  15706. mov r9, QWORD PTR [rdx+112]
  15707. add r9, 128
  15708. movdqu xmm12, xmm13
  15709. pcmpeqd xmm12, xmm10
  15710. movdqu xmm0, [r9]
  15711. movdqu xmm1, [r9+16]
  15712. movdqu xmm2, [r9+32]
  15713. movdqu xmm3, [r9+48]
  15714. pand xmm0, xmm12
  15715. pand xmm1, xmm12
  15716. pand xmm2, xmm12
  15717. pand xmm3, xmm12
  15718. por xmm4, xmm0
  15719. por xmm5, xmm1
  15720. por xmm6, xmm2
  15721. por xmm7, xmm3
  15722. paddd xmm13, xmm11
  15723. ; ENTRY: 15
  15724. mov r9, QWORD PTR [rdx+120]
  15725. add r9, 128
  15726. movdqu xmm12, xmm13
  15727. pcmpeqd xmm12, xmm10
  15728. movdqu xmm0, [r9]
  15729. movdqu xmm1, [r9+16]
  15730. movdqu xmm2, [r9+32]
  15731. movdqu xmm3, [r9+48]
  15732. pand xmm0, xmm12
  15733. pand xmm1, xmm12
  15734. pand xmm2, xmm12
  15735. pand xmm3, xmm12
  15736. por xmm4, xmm0
  15737. por xmm5, xmm1
  15738. por xmm6, xmm2
  15739. por xmm7, xmm3
  15740. paddd xmm13, xmm11
  15741. ; ENTRY: 16
  15742. mov r9, QWORD PTR [rdx+128]
  15743. add r9, 128
  15744. movdqu xmm12, xmm13
  15745. pcmpeqd xmm12, xmm10
  15746. movdqu xmm0, [r9]
  15747. movdqu xmm1, [r9+16]
  15748. movdqu xmm2, [r9+32]
  15749. movdqu xmm3, [r9+48]
  15750. pand xmm0, xmm12
  15751. pand xmm1, xmm12
  15752. pand xmm2, xmm12
  15753. pand xmm3, xmm12
  15754. por xmm4, xmm0
  15755. por xmm5, xmm1
  15756. por xmm6, xmm2
  15757. por xmm7, xmm3
  15758. paddd xmm13, xmm11
  15759. ; ENTRY: 17
  15760. mov r9, QWORD PTR [rdx+136]
  15761. add r9, 128
  15762. movdqu xmm12, xmm13
  15763. pcmpeqd xmm12, xmm10
  15764. movdqu xmm0, [r9]
  15765. movdqu xmm1, [r9+16]
  15766. movdqu xmm2, [r9+32]
  15767. movdqu xmm3, [r9+48]
  15768. pand xmm0, xmm12
  15769. pand xmm1, xmm12
  15770. pand xmm2, xmm12
  15771. pand xmm3, xmm12
  15772. por xmm4, xmm0
  15773. por xmm5, xmm1
  15774. por xmm6, xmm2
  15775. por xmm7, xmm3
  15776. paddd xmm13, xmm11
  15777. ; ENTRY: 18
  15778. mov r9, QWORD PTR [rdx+144]
  15779. add r9, 128
  15780. movdqu xmm12, xmm13
  15781. pcmpeqd xmm12, xmm10
  15782. movdqu xmm0, [r9]
  15783. movdqu xmm1, [r9+16]
  15784. movdqu xmm2, [r9+32]
  15785. movdqu xmm3, [r9+48]
  15786. pand xmm0, xmm12
  15787. pand xmm1, xmm12
  15788. pand xmm2, xmm12
  15789. pand xmm3, xmm12
  15790. por xmm4, xmm0
  15791. por xmm5, xmm1
  15792. por xmm6, xmm2
  15793. por xmm7, xmm3
  15794. paddd xmm13, xmm11
  15795. ; ENTRY: 19
  15796. mov r9, QWORD PTR [rdx+152]
  15797. add r9, 128
  15798. movdqu xmm12, xmm13
  15799. pcmpeqd xmm12, xmm10
  15800. movdqu xmm0, [r9]
  15801. movdqu xmm1, [r9+16]
  15802. movdqu xmm2, [r9+32]
  15803. movdqu xmm3, [r9+48]
  15804. pand xmm0, xmm12
  15805. pand xmm1, xmm12
  15806. pand xmm2, xmm12
  15807. pand xmm3, xmm12
  15808. por xmm4, xmm0
  15809. por xmm5, xmm1
  15810. por xmm6, xmm2
  15811. por xmm7, xmm3
  15812. paddd xmm13, xmm11
  15813. ; ENTRY: 20
  15814. mov r9, QWORD PTR [rdx+160]
  15815. add r9, 128
  15816. movdqu xmm12, xmm13
  15817. pcmpeqd xmm12, xmm10
  15818. movdqu xmm0, [r9]
  15819. movdqu xmm1, [r9+16]
  15820. movdqu xmm2, [r9+32]
  15821. movdqu xmm3, [r9+48]
  15822. pand xmm0, xmm12
  15823. pand xmm1, xmm12
  15824. pand xmm2, xmm12
  15825. pand xmm3, xmm12
  15826. por xmm4, xmm0
  15827. por xmm5, xmm1
  15828. por xmm6, xmm2
  15829. por xmm7, xmm3
  15830. paddd xmm13, xmm11
  15831. ; ENTRY: 21
  15832. mov r9, QWORD PTR [rdx+168]
  15833. add r9, 128
  15834. movdqu xmm12, xmm13
  15835. pcmpeqd xmm12, xmm10
  15836. movdqu xmm0, [r9]
  15837. movdqu xmm1, [r9+16]
  15838. movdqu xmm2, [r9+32]
  15839. movdqu xmm3, [r9+48]
  15840. pand xmm0, xmm12
  15841. pand xmm1, xmm12
  15842. pand xmm2, xmm12
  15843. pand xmm3, xmm12
  15844. por xmm4, xmm0
  15845. por xmm5, xmm1
  15846. por xmm6, xmm2
  15847. por xmm7, xmm3
  15848. paddd xmm13, xmm11
  15849. ; ENTRY: 22
  15850. mov r9, QWORD PTR [rdx+176]
  15851. add r9, 128
  15852. movdqu xmm12, xmm13
  15853. pcmpeqd xmm12, xmm10
  15854. movdqu xmm0, [r9]
  15855. movdqu xmm1, [r9+16]
  15856. movdqu xmm2, [r9+32]
  15857. movdqu xmm3, [r9+48]
  15858. pand xmm0, xmm12
  15859. pand xmm1, xmm12
  15860. pand xmm2, xmm12
  15861. pand xmm3, xmm12
  15862. por xmm4, xmm0
  15863. por xmm5, xmm1
  15864. por xmm6, xmm2
  15865. por xmm7, xmm3
  15866. paddd xmm13, xmm11
  15867. ; ENTRY: 23
  15868. mov r9, QWORD PTR [rdx+184]
  15869. add r9, 128
  15870. movdqu xmm12, xmm13
  15871. pcmpeqd xmm12, xmm10
  15872. movdqu xmm0, [r9]
  15873. movdqu xmm1, [r9+16]
  15874. movdqu xmm2, [r9+32]
  15875. movdqu xmm3, [r9+48]
  15876. pand xmm0, xmm12
  15877. pand xmm1, xmm12
  15878. pand xmm2, xmm12
  15879. pand xmm3, xmm12
  15880. por xmm4, xmm0
  15881. por xmm5, xmm1
  15882. por xmm6, xmm2
  15883. por xmm7, xmm3
  15884. paddd xmm13, xmm11
  15885. ; ENTRY: 24
  15886. mov r9, QWORD PTR [rdx+192]
  15887. add r9, 128
  15888. movdqu xmm12, xmm13
  15889. pcmpeqd xmm12, xmm10
  15890. movdqu xmm0, [r9]
  15891. movdqu xmm1, [r9+16]
  15892. movdqu xmm2, [r9+32]
  15893. movdqu xmm3, [r9+48]
  15894. pand xmm0, xmm12
  15895. pand xmm1, xmm12
  15896. pand xmm2, xmm12
  15897. pand xmm3, xmm12
  15898. por xmm4, xmm0
  15899. por xmm5, xmm1
  15900. por xmm6, xmm2
  15901. por xmm7, xmm3
  15902. paddd xmm13, xmm11
  15903. ; ENTRY: 25
  15904. mov r9, QWORD PTR [rdx+200]
  15905. add r9, 128
  15906. movdqu xmm12, xmm13
  15907. pcmpeqd xmm12, xmm10
  15908. movdqu xmm0, [r9]
  15909. movdqu xmm1, [r9+16]
  15910. movdqu xmm2, [r9+32]
  15911. movdqu xmm3, [r9+48]
  15912. pand xmm0, xmm12
  15913. pand xmm1, xmm12
  15914. pand xmm2, xmm12
  15915. pand xmm3, xmm12
  15916. por xmm4, xmm0
  15917. por xmm5, xmm1
  15918. por xmm6, xmm2
  15919. por xmm7, xmm3
  15920. paddd xmm13, xmm11
  15921. ; ENTRY: 26
  15922. mov r9, QWORD PTR [rdx+208]
  15923. add r9, 128
  15924. movdqu xmm12, xmm13
  15925. pcmpeqd xmm12, xmm10
  15926. movdqu xmm0, [r9]
  15927. movdqu xmm1, [r9+16]
  15928. movdqu xmm2, [r9+32]
  15929. movdqu xmm3, [r9+48]
  15930. pand xmm0, xmm12
  15931. pand xmm1, xmm12
  15932. pand xmm2, xmm12
  15933. pand xmm3, xmm12
  15934. por xmm4, xmm0
  15935. por xmm5, xmm1
  15936. por xmm6, xmm2
  15937. por xmm7, xmm3
  15938. paddd xmm13, xmm11
  15939. ; ENTRY: 27
  15940. mov r9, QWORD PTR [rdx+216]
  15941. add r9, 128
  15942. movdqu xmm12, xmm13
  15943. pcmpeqd xmm12, xmm10
  15944. movdqu xmm0, [r9]
  15945. movdqu xmm1, [r9+16]
  15946. movdqu xmm2, [r9+32]
  15947. movdqu xmm3, [r9+48]
  15948. pand xmm0, xmm12
  15949. pand xmm1, xmm12
  15950. pand xmm2, xmm12
  15951. pand xmm3, xmm12
  15952. por xmm4, xmm0
  15953. por xmm5, xmm1
  15954. por xmm6, xmm2
  15955. por xmm7, xmm3
  15956. paddd xmm13, xmm11
  15957. ; ENTRY: 28
  15958. mov r9, QWORD PTR [rdx+224]
  15959. add r9, 128
  15960. movdqu xmm12, xmm13
  15961. pcmpeqd xmm12, xmm10
  15962. movdqu xmm0, [r9]
  15963. movdqu xmm1, [r9+16]
  15964. movdqu xmm2, [r9+32]
  15965. movdqu xmm3, [r9+48]
  15966. pand xmm0, xmm12
  15967. pand xmm1, xmm12
  15968. pand xmm2, xmm12
  15969. pand xmm3, xmm12
  15970. por xmm4, xmm0
  15971. por xmm5, xmm1
  15972. por xmm6, xmm2
  15973. por xmm7, xmm3
  15974. paddd xmm13, xmm11
  15975. ; ENTRY: 29
  15976. mov r9, QWORD PTR [rdx+232]
  15977. add r9, 128
  15978. movdqu xmm12, xmm13
  15979. pcmpeqd xmm12, xmm10
  15980. movdqu xmm0, [r9]
  15981. movdqu xmm1, [r9+16]
  15982. movdqu xmm2, [r9+32]
  15983. movdqu xmm3, [r9+48]
  15984. pand xmm0, xmm12
  15985. pand xmm1, xmm12
  15986. pand xmm2, xmm12
  15987. pand xmm3, xmm12
  15988. por xmm4, xmm0
  15989. por xmm5, xmm1
  15990. por xmm6, xmm2
  15991. por xmm7, xmm3
  15992. paddd xmm13, xmm11
  15993. ; ENTRY: 30
  15994. mov r9, QWORD PTR [rdx+240]
  15995. add r9, 128
  15996. movdqu xmm12, xmm13
  15997. pcmpeqd xmm12, xmm10
  15998. movdqu xmm0, [r9]
  15999. movdqu xmm1, [r9+16]
  16000. movdqu xmm2, [r9+32]
  16001. movdqu xmm3, [r9+48]
  16002. pand xmm0, xmm12
  16003. pand xmm1, xmm12
  16004. pand xmm2, xmm12
  16005. pand xmm3, xmm12
  16006. por xmm4, xmm0
  16007. por xmm5, xmm1
  16008. por xmm6, xmm2
  16009. por xmm7, xmm3
  16010. paddd xmm13, xmm11
  16011. ; ENTRY: 31
  16012. mov r9, QWORD PTR [rdx+248]
  16013. add r9, 128
  16014. movdqu xmm12, xmm13
  16015. pcmpeqd xmm12, xmm10
  16016. movdqu xmm0, [r9]
  16017. movdqu xmm1, [r9+16]
  16018. movdqu xmm2, [r9+32]
  16019. movdqu xmm3, [r9+48]
  16020. pand xmm0, xmm12
  16021. pand xmm1, xmm12
  16022. pand xmm2, xmm12
  16023. pand xmm3, xmm12
  16024. por xmm4, xmm0
  16025. por xmm5, xmm1
  16026. por xmm6, xmm2
  16027. por xmm7, xmm3
  16028. paddd xmm13, xmm11
  16029. ; ENTRY: 32
  16030. mov r9, QWORD PTR [rdx+256]
  16031. add r9, 128
  16032. movdqu xmm12, xmm13
  16033. pcmpeqd xmm12, xmm10
  16034. movdqu xmm0, [r9]
  16035. movdqu xmm1, [r9+16]
  16036. movdqu xmm2, [r9+32]
  16037. movdqu xmm3, [r9+48]
  16038. pand xmm0, xmm12
  16039. pand xmm1, xmm12
  16040. pand xmm2, xmm12
  16041. pand xmm3, xmm12
  16042. por xmm4, xmm0
  16043. por xmm5, xmm1
  16044. por xmm6, xmm2
  16045. por xmm7, xmm3
  16046. paddd xmm13, xmm11
  16047. ; ENTRY: 33
  16048. mov r9, QWORD PTR [rdx+264]
  16049. add r9, 128
  16050. movdqu xmm12, xmm13
  16051. pcmpeqd xmm12, xmm10
  16052. movdqu xmm0, [r9]
  16053. movdqu xmm1, [r9+16]
  16054. movdqu xmm2, [r9+32]
  16055. movdqu xmm3, [r9+48]
  16056. pand xmm0, xmm12
  16057. pand xmm1, xmm12
  16058. pand xmm2, xmm12
  16059. pand xmm3, xmm12
  16060. por xmm4, xmm0
  16061. por xmm5, xmm1
  16062. por xmm6, xmm2
  16063. por xmm7, xmm3
  16064. paddd xmm13, xmm11
  16065. ; ENTRY: 34
  16066. mov r9, QWORD PTR [rdx+272]
  16067. add r9, 128
  16068. movdqu xmm12, xmm13
  16069. pcmpeqd xmm12, xmm10
  16070. movdqu xmm0, [r9]
  16071. movdqu xmm1, [r9+16]
  16072. movdqu xmm2, [r9+32]
  16073. movdqu xmm3, [r9+48]
  16074. pand xmm0, xmm12
  16075. pand xmm1, xmm12
  16076. pand xmm2, xmm12
  16077. pand xmm3, xmm12
  16078. por xmm4, xmm0
  16079. por xmm5, xmm1
  16080. por xmm6, xmm2
  16081. por xmm7, xmm3
  16082. paddd xmm13, xmm11
  16083. ; ENTRY: 35
  16084. mov r9, QWORD PTR [rdx+280]
  16085. add r9, 128
  16086. movdqu xmm12, xmm13
  16087. pcmpeqd xmm12, xmm10
  16088. movdqu xmm0, [r9]
  16089. movdqu xmm1, [r9+16]
  16090. movdqu xmm2, [r9+32]
  16091. movdqu xmm3, [r9+48]
  16092. pand xmm0, xmm12
  16093. pand xmm1, xmm12
  16094. pand xmm2, xmm12
  16095. pand xmm3, xmm12
  16096. por xmm4, xmm0
  16097. por xmm5, xmm1
  16098. por xmm6, xmm2
  16099. por xmm7, xmm3
  16100. paddd xmm13, xmm11
  16101. ; ENTRY: 36
  16102. mov r9, QWORD PTR [rdx+288]
  16103. add r9, 128
  16104. movdqu xmm12, xmm13
  16105. pcmpeqd xmm12, xmm10
  16106. movdqu xmm0, [r9]
  16107. movdqu xmm1, [r9+16]
  16108. movdqu xmm2, [r9+32]
  16109. movdqu xmm3, [r9+48]
  16110. pand xmm0, xmm12
  16111. pand xmm1, xmm12
  16112. pand xmm2, xmm12
  16113. pand xmm3, xmm12
  16114. por xmm4, xmm0
  16115. por xmm5, xmm1
  16116. por xmm6, xmm2
  16117. por xmm7, xmm3
  16118. paddd xmm13, xmm11
  16119. ; ENTRY: 37
  16120. mov r9, QWORD PTR [rdx+296]
  16121. add r9, 128
  16122. movdqu xmm12, xmm13
  16123. pcmpeqd xmm12, xmm10
  16124. movdqu xmm0, [r9]
  16125. movdqu xmm1, [r9+16]
  16126. movdqu xmm2, [r9+32]
  16127. movdqu xmm3, [r9+48]
  16128. pand xmm0, xmm12
  16129. pand xmm1, xmm12
  16130. pand xmm2, xmm12
  16131. pand xmm3, xmm12
  16132. por xmm4, xmm0
  16133. por xmm5, xmm1
  16134. por xmm6, xmm2
  16135. por xmm7, xmm3
  16136. paddd xmm13, xmm11
  16137. ; ENTRY: 38
  16138. mov r9, QWORD PTR [rdx+304]
  16139. add r9, 128
  16140. movdqu xmm12, xmm13
  16141. pcmpeqd xmm12, xmm10
  16142. movdqu xmm0, [r9]
  16143. movdqu xmm1, [r9+16]
  16144. movdqu xmm2, [r9+32]
  16145. movdqu xmm3, [r9+48]
  16146. pand xmm0, xmm12
  16147. pand xmm1, xmm12
  16148. pand xmm2, xmm12
  16149. pand xmm3, xmm12
  16150. por xmm4, xmm0
  16151. por xmm5, xmm1
  16152. por xmm6, xmm2
  16153. por xmm7, xmm3
  16154. paddd xmm13, xmm11
  16155. ; ENTRY: 39
  16156. mov r9, QWORD PTR [rdx+312]
  16157. add r9, 128
  16158. movdqu xmm12, xmm13
  16159. pcmpeqd xmm12, xmm10
  16160. movdqu xmm0, [r9]
  16161. movdqu xmm1, [r9+16]
  16162. movdqu xmm2, [r9+32]
  16163. movdqu xmm3, [r9+48]
  16164. pand xmm0, xmm12
  16165. pand xmm1, xmm12
  16166. pand xmm2, xmm12
  16167. pand xmm3, xmm12
  16168. por xmm4, xmm0
  16169. por xmm5, xmm1
  16170. por xmm6, xmm2
  16171. por xmm7, xmm3
  16172. paddd xmm13, xmm11
  16173. ; ENTRY: 40
  16174. mov r9, QWORD PTR [rdx+320]
  16175. add r9, 128
  16176. movdqu xmm12, xmm13
  16177. pcmpeqd xmm12, xmm10
  16178. movdqu xmm0, [r9]
  16179. movdqu xmm1, [r9+16]
  16180. movdqu xmm2, [r9+32]
  16181. movdqu xmm3, [r9+48]
  16182. pand xmm0, xmm12
  16183. pand xmm1, xmm12
  16184. pand xmm2, xmm12
  16185. pand xmm3, xmm12
  16186. por xmm4, xmm0
  16187. por xmm5, xmm1
  16188. por xmm6, xmm2
  16189. por xmm7, xmm3
  16190. paddd xmm13, xmm11
  16191. ; ENTRY: 41
  16192. mov r9, QWORD PTR [rdx+328]
  16193. add r9, 128
  16194. movdqu xmm12, xmm13
  16195. pcmpeqd xmm12, xmm10
  16196. movdqu xmm0, [r9]
  16197. movdqu xmm1, [r9+16]
  16198. movdqu xmm2, [r9+32]
  16199. movdqu xmm3, [r9+48]
  16200. pand xmm0, xmm12
  16201. pand xmm1, xmm12
  16202. pand xmm2, xmm12
  16203. pand xmm3, xmm12
  16204. por xmm4, xmm0
  16205. por xmm5, xmm1
  16206. por xmm6, xmm2
  16207. por xmm7, xmm3
  16208. paddd xmm13, xmm11
  16209. ; ENTRY: 42
  16210. mov r9, QWORD PTR [rdx+336]
  16211. add r9, 128
  16212. movdqu xmm12, xmm13
  16213. pcmpeqd xmm12, xmm10
  16214. movdqu xmm0, [r9]
  16215. movdqu xmm1, [r9+16]
  16216. movdqu xmm2, [r9+32]
  16217. movdqu xmm3, [r9+48]
  16218. pand xmm0, xmm12
  16219. pand xmm1, xmm12
  16220. pand xmm2, xmm12
  16221. pand xmm3, xmm12
  16222. por xmm4, xmm0
  16223. por xmm5, xmm1
  16224. por xmm6, xmm2
  16225. por xmm7, xmm3
  16226. paddd xmm13, xmm11
  16227. ; ENTRY: 43
  16228. mov r9, QWORD PTR [rdx+344]
  16229. add r9, 128
  16230. movdqu xmm12, xmm13
  16231. pcmpeqd xmm12, xmm10
  16232. movdqu xmm0, [r9]
  16233. movdqu xmm1, [r9+16]
  16234. movdqu xmm2, [r9+32]
  16235. movdqu xmm3, [r9+48]
  16236. pand xmm0, xmm12
  16237. pand xmm1, xmm12
  16238. pand xmm2, xmm12
  16239. pand xmm3, xmm12
  16240. por xmm4, xmm0
  16241. por xmm5, xmm1
  16242. por xmm6, xmm2
  16243. por xmm7, xmm3
  16244. paddd xmm13, xmm11
  16245. ; ENTRY: 44
  16246. mov r9, QWORD PTR [rdx+352]
  16247. add r9, 128
  16248. movdqu xmm12, xmm13
  16249. pcmpeqd xmm12, xmm10
  16250. movdqu xmm0, [r9]
  16251. movdqu xmm1, [r9+16]
  16252. movdqu xmm2, [r9+32]
  16253. movdqu xmm3, [r9+48]
  16254. pand xmm0, xmm12
  16255. pand xmm1, xmm12
  16256. pand xmm2, xmm12
  16257. pand xmm3, xmm12
  16258. por xmm4, xmm0
  16259. por xmm5, xmm1
  16260. por xmm6, xmm2
  16261. por xmm7, xmm3
  16262. paddd xmm13, xmm11
  16263. ; ENTRY: 45
  16264. mov r9, QWORD PTR [rdx+360]
  16265. add r9, 128
  16266. movdqu xmm12, xmm13
  16267. pcmpeqd xmm12, xmm10
  16268. movdqu xmm0, [r9]
  16269. movdqu xmm1, [r9+16]
  16270. movdqu xmm2, [r9+32]
  16271. movdqu xmm3, [r9+48]
  16272. pand xmm0, xmm12
  16273. pand xmm1, xmm12
  16274. pand xmm2, xmm12
  16275. pand xmm3, xmm12
  16276. por xmm4, xmm0
  16277. por xmm5, xmm1
  16278. por xmm6, xmm2
  16279. por xmm7, xmm3
  16280. paddd xmm13, xmm11
  16281. ; ENTRY: 46
  16282. mov r9, QWORD PTR [rdx+368]
  16283. add r9, 128
  16284. movdqu xmm12, xmm13
  16285. pcmpeqd xmm12, xmm10
  16286. movdqu xmm0, [r9]
  16287. movdqu xmm1, [r9+16]
  16288. movdqu xmm2, [r9+32]
  16289. movdqu xmm3, [r9+48]
  16290. pand xmm0, xmm12
  16291. pand xmm1, xmm12
  16292. pand xmm2, xmm12
  16293. pand xmm3, xmm12
  16294. por xmm4, xmm0
  16295. por xmm5, xmm1
  16296. por xmm6, xmm2
  16297. por xmm7, xmm3
  16298. paddd xmm13, xmm11
  16299. ; ENTRY: 47
  16300. mov r9, QWORD PTR [rdx+376]
  16301. add r9, 128
  16302. movdqu xmm12, xmm13
  16303. pcmpeqd xmm12, xmm10
  16304. movdqu xmm0, [r9]
  16305. movdqu xmm1, [r9+16]
  16306. movdqu xmm2, [r9+32]
  16307. movdqu xmm3, [r9+48]
  16308. pand xmm0, xmm12
  16309. pand xmm1, xmm12
  16310. pand xmm2, xmm12
  16311. pand xmm3, xmm12
  16312. por xmm4, xmm0
  16313. por xmm5, xmm1
  16314. por xmm6, xmm2
  16315. por xmm7, xmm3
  16316. paddd xmm13, xmm11
  16317. ; ENTRY: 48
  16318. mov r9, QWORD PTR [rdx+384]
  16319. add r9, 128
  16320. movdqu xmm12, xmm13
  16321. pcmpeqd xmm12, xmm10
  16322. movdqu xmm0, [r9]
  16323. movdqu xmm1, [r9+16]
  16324. movdqu xmm2, [r9+32]
  16325. movdqu xmm3, [r9+48]
  16326. pand xmm0, xmm12
  16327. pand xmm1, xmm12
  16328. pand xmm2, xmm12
  16329. pand xmm3, xmm12
  16330. por xmm4, xmm0
  16331. por xmm5, xmm1
  16332. por xmm6, xmm2
  16333. por xmm7, xmm3
  16334. paddd xmm13, xmm11
  16335. ; ENTRY: 49
  16336. mov r9, QWORD PTR [rdx+392]
  16337. add r9, 128
  16338. movdqu xmm12, xmm13
  16339. pcmpeqd xmm12, xmm10
  16340. movdqu xmm0, [r9]
  16341. movdqu xmm1, [r9+16]
  16342. movdqu xmm2, [r9+32]
  16343. movdqu xmm3, [r9+48]
  16344. pand xmm0, xmm12
  16345. pand xmm1, xmm12
  16346. pand xmm2, xmm12
  16347. pand xmm3, xmm12
  16348. por xmm4, xmm0
  16349. por xmm5, xmm1
  16350. por xmm6, xmm2
  16351. por xmm7, xmm3
  16352. paddd xmm13, xmm11
  16353. ; ENTRY: 50
  16354. mov r9, QWORD PTR [rdx+400]
  16355. add r9, 128
  16356. movdqu xmm12, xmm13
  16357. pcmpeqd xmm12, xmm10
  16358. movdqu xmm0, [r9]
  16359. movdqu xmm1, [r9+16]
  16360. movdqu xmm2, [r9+32]
  16361. movdqu xmm3, [r9+48]
  16362. pand xmm0, xmm12
  16363. pand xmm1, xmm12
  16364. pand xmm2, xmm12
  16365. pand xmm3, xmm12
  16366. por xmm4, xmm0
  16367. por xmm5, xmm1
  16368. por xmm6, xmm2
  16369. por xmm7, xmm3
  16370. paddd xmm13, xmm11
  16371. ; ENTRY: 51
  16372. mov r9, QWORD PTR [rdx+408]
  16373. add r9, 128
  16374. movdqu xmm12, xmm13
  16375. pcmpeqd xmm12, xmm10
  16376. movdqu xmm0, [r9]
  16377. movdqu xmm1, [r9+16]
  16378. movdqu xmm2, [r9+32]
  16379. movdqu xmm3, [r9+48]
  16380. pand xmm0, xmm12
  16381. pand xmm1, xmm12
  16382. pand xmm2, xmm12
  16383. pand xmm3, xmm12
  16384. por xmm4, xmm0
  16385. por xmm5, xmm1
  16386. por xmm6, xmm2
  16387. por xmm7, xmm3
  16388. paddd xmm13, xmm11
  16389. ; ENTRY: 52
  16390. mov r9, QWORD PTR [rdx+416]
  16391. add r9, 128
  16392. movdqu xmm12, xmm13
  16393. pcmpeqd xmm12, xmm10
  16394. movdqu xmm0, [r9]
  16395. movdqu xmm1, [r9+16]
  16396. movdqu xmm2, [r9+32]
  16397. movdqu xmm3, [r9+48]
  16398. pand xmm0, xmm12
  16399. pand xmm1, xmm12
  16400. pand xmm2, xmm12
  16401. pand xmm3, xmm12
  16402. por xmm4, xmm0
  16403. por xmm5, xmm1
  16404. por xmm6, xmm2
  16405. por xmm7, xmm3
  16406. paddd xmm13, xmm11
  16407. ; ENTRY: 53
  16408. mov r9, QWORD PTR [rdx+424]
  16409. add r9, 128
  16410. movdqu xmm12, xmm13
  16411. pcmpeqd xmm12, xmm10
  16412. movdqu xmm0, [r9]
  16413. movdqu xmm1, [r9+16]
  16414. movdqu xmm2, [r9+32]
  16415. movdqu xmm3, [r9+48]
  16416. pand xmm0, xmm12
  16417. pand xmm1, xmm12
  16418. pand xmm2, xmm12
  16419. pand xmm3, xmm12
  16420. por xmm4, xmm0
  16421. por xmm5, xmm1
  16422. por xmm6, xmm2
  16423. por xmm7, xmm3
  16424. paddd xmm13, xmm11
  16425. ; ENTRY: 54
  16426. mov r9, QWORD PTR [rdx+432]
  16427. add r9, 128
  16428. movdqu xmm12, xmm13
  16429. pcmpeqd xmm12, xmm10
  16430. movdqu xmm0, [r9]
  16431. movdqu xmm1, [r9+16]
  16432. movdqu xmm2, [r9+32]
  16433. movdqu xmm3, [r9+48]
  16434. pand xmm0, xmm12
  16435. pand xmm1, xmm12
  16436. pand xmm2, xmm12
  16437. pand xmm3, xmm12
  16438. por xmm4, xmm0
  16439. por xmm5, xmm1
  16440. por xmm6, xmm2
  16441. por xmm7, xmm3
  16442. paddd xmm13, xmm11
  16443. ; ENTRY: 55
  16444. mov r9, QWORD PTR [rdx+440]
  16445. add r9, 128
  16446. movdqu xmm12, xmm13
  16447. pcmpeqd xmm12, xmm10
  16448. movdqu xmm0, [r9]
  16449. movdqu xmm1, [r9+16]
  16450. movdqu xmm2, [r9+32]
  16451. movdqu xmm3, [r9+48]
  16452. pand xmm0, xmm12
  16453. pand xmm1, xmm12
  16454. pand xmm2, xmm12
  16455. pand xmm3, xmm12
  16456. por xmm4, xmm0
  16457. por xmm5, xmm1
  16458. por xmm6, xmm2
  16459. por xmm7, xmm3
  16460. paddd xmm13, xmm11
  16461. ; ENTRY: 56
  16462. mov r9, QWORD PTR [rdx+448]
  16463. add r9, 128
  16464. movdqu xmm12, xmm13
  16465. pcmpeqd xmm12, xmm10
  16466. movdqu xmm0, [r9]
  16467. movdqu xmm1, [r9+16]
  16468. movdqu xmm2, [r9+32]
  16469. movdqu xmm3, [r9+48]
  16470. pand xmm0, xmm12
  16471. pand xmm1, xmm12
  16472. pand xmm2, xmm12
  16473. pand xmm3, xmm12
  16474. por xmm4, xmm0
  16475. por xmm5, xmm1
  16476. por xmm6, xmm2
  16477. por xmm7, xmm3
  16478. paddd xmm13, xmm11
  16479. ; ENTRY: 57
  16480. mov r9, QWORD PTR [rdx+456]
  16481. add r9, 128
  16482. movdqu xmm12, xmm13
  16483. pcmpeqd xmm12, xmm10
  16484. movdqu xmm0, [r9]
  16485. movdqu xmm1, [r9+16]
  16486. movdqu xmm2, [r9+32]
  16487. movdqu xmm3, [r9+48]
  16488. pand xmm0, xmm12
  16489. pand xmm1, xmm12
  16490. pand xmm2, xmm12
  16491. pand xmm3, xmm12
  16492. por xmm4, xmm0
  16493. por xmm5, xmm1
  16494. por xmm6, xmm2
  16495. por xmm7, xmm3
  16496. paddd xmm13, xmm11
  16497. ; ENTRY: 58
  16498. mov r9, QWORD PTR [rdx+464]
  16499. add r9, 128
  16500. movdqu xmm12, xmm13
  16501. pcmpeqd xmm12, xmm10
  16502. movdqu xmm0, [r9]
  16503. movdqu xmm1, [r9+16]
  16504. movdqu xmm2, [r9+32]
  16505. movdqu xmm3, [r9+48]
  16506. pand xmm0, xmm12
  16507. pand xmm1, xmm12
  16508. pand xmm2, xmm12
  16509. pand xmm3, xmm12
  16510. por xmm4, xmm0
  16511. por xmm5, xmm1
  16512. por xmm6, xmm2
  16513. por xmm7, xmm3
  16514. paddd xmm13, xmm11
  16515. ; ENTRY: 59
  16516. mov r9, QWORD PTR [rdx+472]
  16517. add r9, 128
  16518. movdqu xmm12, xmm13
  16519. pcmpeqd xmm12, xmm10
  16520. movdqu xmm0, [r9]
  16521. movdqu xmm1, [r9+16]
  16522. movdqu xmm2, [r9+32]
  16523. movdqu xmm3, [r9+48]
  16524. pand xmm0, xmm12
  16525. pand xmm1, xmm12
  16526. pand xmm2, xmm12
  16527. pand xmm3, xmm12
  16528. por xmm4, xmm0
  16529. por xmm5, xmm1
  16530. por xmm6, xmm2
  16531. por xmm7, xmm3
  16532. paddd xmm13, xmm11
  16533. ; ENTRY: 60
  16534. mov r9, QWORD PTR [rdx+480]
  16535. add r9, 128
  16536. movdqu xmm12, xmm13
  16537. pcmpeqd xmm12, xmm10
  16538. movdqu xmm0, [r9]
  16539. movdqu xmm1, [r9+16]
  16540. movdqu xmm2, [r9+32]
  16541. movdqu xmm3, [r9+48]
  16542. pand xmm0, xmm12
  16543. pand xmm1, xmm12
  16544. pand xmm2, xmm12
  16545. pand xmm3, xmm12
  16546. por xmm4, xmm0
  16547. por xmm5, xmm1
  16548. por xmm6, xmm2
  16549. por xmm7, xmm3
  16550. paddd xmm13, xmm11
  16551. ; ENTRY: 61
  16552. mov r9, QWORD PTR [rdx+488]
  16553. add r9, 128
  16554. movdqu xmm12, xmm13
  16555. pcmpeqd xmm12, xmm10
  16556. movdqu xmm0, [r9]
  16557. movdqu xmm1, [r9+16]
  16558. movdqu xmm2, [r9+32]
  16559. movdqu xmm3, [r9+48]
  16560. pand xmm0, xmm12
  16561. pand xmm1, xmm12
  16562. pand xmm2, xmm12
  16563. pand xmm3, xmm12
  16564. por xmm4, xmm0
  16565. por xmm5, xmm1
  16566. por xmm6, xmm2
  16567. por xmm7, xmm3
  16568. paddd xmm13, xmm11
  16569. ; ENTRY: 62
  16570. mov r9, QWORD PTR [rdx+496]
  16571. add r9, 128
  16572. movdqu xmm12, xmm13
  16573. pcmpeqd xmm12, xmm10
  16574. movdqu xmm0, [r9]
  16575. movdqu xmm1, [r9+16]
  16576. movdqu xmm2, [r9+32]
  16577. movdqu xmm3, [r9+48]
  16578. pand xmm0, xmm12
  16579. pand xmm1, xmm12
  16580. pand xmm2, xmm12
  16581. pand xmm3, xmm12
  16582. por xmm4, xmm0
  16583. por xmm5, xmm1
  16584. por xmm6, xmm2
  16585. por xmm7, xmm3
  16586. paddd xmm13, xmm11
  16587. ; ENTRY: 63
  16588. mov r9, QWORD PTR [rdx+504]
  16589. add r9, 128
  16590. movdqu xmm12, xmm13
  16591. pcmpeqd xmm12, xmm10
  16592. movdqu xmm0, [r9]
  16593. movdqu xmm1, [r9+16]
  16594. movdqu xmm2, [r9+32]
  16595. movdqu xmm3, [r9+48]
  16596. pand xmm0, xmm12
  16597. pand xmm1, xmm12
  16598. pand xmm2, xmm12
  16599. pand xmm3, xmm12
  16600. por xmm4, xmm0
  16601. por xmm5, xmm1
  16602. por xmm6, xmm2
  16603. por xmm7, xmm3
  16604. paddd xmm13, xmm11
  16605. movdqu [rcx], xmm4
  16606. movdqu [rcx+16], xmm5
  16607. movdqu [rcx+32], xmm6
  16608. movdqu [rcx+48], xmm7
  16609. add rcx, 64
  16610. ; END: 16-23
  16611. ; START: 24-31
  16612. pxor xmm13, xmm13
  16613. pxor xmm4, xmm4
  16614. pxor xmm5, xmm5
  16615. pxor xmm6, xmm6
  16616. pxor xmm7, xmm7
  16617. ; ENTRY: 0
  16618. mov r9, QWORD PTR [rdx]
  16619. add r9, 192
  16620. movdqu xmm12, xmm13
  16621. pcmpeqd xmm12, xmm10
  16622. movdqu xmm0, [r9]
  16623. movdqu xmm1, [r9+16]
  16624. movdqu xmm2, [r9+32]
  16625. movdqu xmm3, [r9+48]
  16626. pand xmm0, xmm12
  16627. pand xmm1, xmm12
  16628. pand xmm2, xmm12
  16629. pand xmm3, xmm12
  16630. por xmm4, xmm0
  16631. por xmm5, xmm1
  16632. por xmm6, xmm2
  16633. por xmm7, xmm3
  16634. paddd xmm13, xmm11
  16635. ; ENTRY: 1
  16636. mov r9, QWORD PTR [rdx+8]
  16637. add r9, 192
  16638. movdqu xmm12, xmm13
  16639. pcmpeqd xmm12, xmm10
  16640. movdqu xmm0, [r9]
  16641. movdqu xmm1, [r9+16]
  16642. movdqu xmm2, [r9+32]
  16643. movdqu xmm3, [r9+48]
  16644. pand xmm0, xmm12
  16645. pand xmm1, xmm12
  16646. pand xmm2, xmm12
  16647. pand xmm3, xmm12
  16648. por xmm4, xmm0
  16649. por xmm5, xmm1
  16650. por xmm6, xmm2
  16651. por xmm7, xmm3
  16652. paddd xmm13, xmm11
  16653. ; ENTRY: 2
  16654. mov r9, QWORD PTR [rdx+16]
  16655. add r9, 192
  16656. movdqu xmm12, xmm13
  16657. pcmpeqd xmm12, xmm10
  16658. movdqu xmm0, [r9]
  16659. movdqu xmm1, [r9+16]
  16660. movdqu xmm2, [r9+32]
  16661. movdqu xmm3, [r9+48]
  16662. pand xmm0, xmm12
  16663. pand xmm1, xmm12
  16664. pand xmm2, xmm12
  16665. pand xmm3, xmm12
  16666. por xmm4, xmm0
  16667. por xmm5, xmm1
  16668. por xmm6, xmm2
  16669. por xmm7, xmm3
  16670. paddd xmm13, xmm11
  16671. ; ENTRY: 3
  16672. mov r9, QWORD PTR [rdx+24]
  16673. add r9, 192
  16674. movdqu xmm12, xmm13
  16675. pcmpeqd xmm12, xmm10
  16676. movdqu xmm0, [r9]
  16677. movdqu xmm1, [r9+16]
  16678. movdqu xmm2, [r9+32]
  16679. movdqu xmm3, [r9+48]
  16680. pand xmm0, xmm12
  16681. pand xmm1, xmm12
  16682. pand xmm2, xmm12
  16683. pand xmm3, xmm12
  16684. por xmm4, xmm0
  16685. por xmm5, xmm1
  16686. por xmm6, xmm2
  16687. por xmm7, xmm3
  16688. paddd xmm13, xmm11
  16689. ; ENTRY: 4
  16690. mov r9, QWORD PTR [rdx+32]
  16691. add r9, 192
  16692. movdqu xmm12, xmm13
  16693. pcmpeqd xmm12, xmm10
  16694. movdqu xmm0, [r9]
  16695. movdqu xmm1, [r9+16]
  16696. movdqu xmm2, [r9+32]
  16697. movdqu xmm3, [r9+48]
  16698. pand xmm0, xmm12
  16699. pand xmm1, xmm12
  16700. pand xmm2, xmm12
  16701. pand xmm3, xmm12
  16702. por xmm4, xmm0
  16703. por xmm5, xmm1
  16704. por xmm6, xmm2
  16705. por xmm7, xmm3
  16706. paddd xmm13, xmm11
  16707. ; ENTRY: 5
  16708. mov r9, QWORD PTR [rdx+40]
  16709. add r9, 192
  16710. movdqu xmm12, xmm13
  16711. pcmpeqd xmm12, xmm10
  16712. movdqu xmm0, [r9]
  16713. movdqu xmm1, [r9+16]
  16714. movdqu xmm2, [r9+32]
  16715. movdqu xmm3, [r9+48]
  16716. pand xmm0, xmm12
  16717. pand xmm1, xmm12
  16718. pand xmm2, xmm12
  16719. pand xmm3, xmm12
  16720. por xmm4, xmm0
  16721. por xmm5, xmm1
  16722. por xmm6, xmm2
  16723. por xmm7, xmm3
  16724. paddd xmm13, xmm11
  16725. ; ENTRY: 6
  16726. mov r9, QWORD PTR [rdx+48]
  16727. add r9, 192
  16728. movdqu xmm12, xmm13
  16729. pcmpeqd xmm12, xmm10
  16730. movdqu xmm0, [r9]
  16731. movdqu xmm1, [r9+16]
  16732. movdqu xmm2, [r9+32]
  16733. movdqu xmm3, [r9+48]
  16734. pand xmm0, xmm12
  16735. pand xmm1, xmm12
  16736. pand xmm2, xmm12
  16737. pand xmm3, xmm12
  16738. por xmm4, xmm0
  16739. por xmm5, xmm1
  16740. por xmm6, xmm2
  16741. por xmm7, xmm3
  16742. paddd xmm13, xmm11
  16743. ; ENTRY: 7
  16744. mov r9, QWORD PTR [rdx+56]
  16745. add r9, 192
  16746. movdqu xmm12, xmm13
  16747. pcmpeqd xmm12, xmm10
  16748. movdqu xmm0, [r9]
  16749. movdqu xmm1, [r9+16]
  16750. movdqu xmm2, [r9+32]
  16751. movdqu xmm3, [r9+48]
  16752. pand xmm0, xmm12
  16753. pand xmm1, xmm12
  16754. pand xmm2, xmm12
  16755. pand xmm3, xmm12
  16756. por xmm4, xmm0
  16757. por xmm5, xmm1
  16758. por xmm6, xmm2
  16759. por xmm7, xmm3
  16760. paddd xmm13, xmm11
  16761. ; ENTRY: 8
  16762. mov r9, QWORD PTR [rdx+64]
  16763. add r9, 192
  16764. movdqu xmm12, xmm13
  16765. pcmpeqd xmm12, xmm10
  16766. movdqu xmm0, [r9]
  16767. movdqu xmm1, [r9+16]
  16768. movdqu xmm2, [r9+32]
  16769. movdqu xmm3, [r9+48]
  16770. pand xmm0, xmm12
  16771. pand xmm1, xmm12
  16772. pand xmm2, xmm12
  16773. pand xmm3, xmm12
  16774. por xmm4, xmm0
  16775. por xmm5, xmm1
  16776. por xmm6, xmm2
  16777. por xmm7, xmm3
  16778. paddd xmm13, xmm11
  16779. ; ENTRY: 9
  16780. mov r9, QWORD PTR [rdx+72]
  16781. add r9, 192
  16782. movdqu xmm12, xmm13
  16783. pcmpeqd xmm12, xmm10
  16784. movdqu xmm0, [r9]
  16785. movdqu xmm1, [r9+16]
  16786. movdqu xmm2, [r9+32]
  16787. movdqu xmm3, [r9+48]
  16788. pand xmm0, xmm12
  16789. pand xmm1, xmm12
  16790. pand xmm2, xmm12
  16791. pand xmm3, xmm12
  16792. por xmm4, xmm0
  16793. por xmm5, xmm1
  16794. por xmm6, xmm2
  16795. por xmm7, xmm3
  16796. paddd xmm13, xmm11
  16797. ; ENTRY: 10
  16798. mov r9, QWORD PTR [rdx+80]
  16799. add r9, 192
  16800. movdqu xmm12, xmm13
  16801. pcmpeqd xmm12, xmm10
  16802. movdqu xmm0, [r9]
  16803. movdqu xmm1, [r9+16]
  16804. movdqu xmm2, [r9+32]
  16805. movdqu xmm3, [r9+48]
  16806. pand xmm0, xmm12
  16807. pand xmm1, xmm12
  16808. pand xmm2, xmm12
  16809. pand xmm3, xmm12
  16810. por xmm4, xmm0
  16811. por xmm5, xmm1
  16812. por xmm6, xmm2
  16813. por xmm7, xmm3
  16814. paddd xmm13, xmm11
  16815. ; ENTRY: 11
  16816. mov r9, QWORD PTR [rdx+88]
  16817. add r9, 192
  16818. movdqu xmm12, xmm13
  16819. pcmpeqd xmm12, xmm10
  16820. movdqu xmm0, [r9]
  16821. movdqu xmm1, [r9+16]
  16822. movdqu xmm2, [r9+32]
  16823. movdqu xmm3, [r9+48]
  16824. pand xmm0, xmm12
  16825. pand xmm1, xmm12
  16826. pand xmm2, xmm12
  16827. pand xmm3, xmm12
  16828. por xmm4, xmm0
  16829. por xmm5, xmm1
  16830. por xmm6, xmm2
  16831. por xmm7, xmm3
  16832. paddd xmm13, xmm11
  16833. ; ENTRY: 12
  16834. mov r9, QWORD PTR [rdx+96]
  16835. add r9, 192
  16836. movdqu xmm12, xmm13
  16837. pcmpeqd xmm12, xmm10
  16838. movdqu xmm0, [r9]
  16839. movdqu xmm1, [r9+16]
  16840. movdqu xmm2, [r9+32]
  16841. movdqu xmm3, [r9+48]
  16842. pand xmm0, xmm12
  16843. pand xmm1, xmm12
  16844. pand xmm2, xmm12
  16845. pand xmm3, xmm12
  16846. por xmm4, xmm0
  16847. por xmm5, xmm1
  16848. por xmm6, xmm2
  16849. por xmm7, xmm3
  16850. paddd xmm13, xmm11
  16851. ; ENTRY: 13
  16852. mov r9, QWORD PTR [rdx+104]
  16853. add r9, 192
  16854. movdqu xmm12, xmm13
  16855. pcmpeqd xmm12, xmm10
  16856. movdqu xmm0, [r9]
  16857. movdqu xmm1, [r9+16]
  16858. movdqu xmm2, [r9+32]
  16859. movdqu xmm3, [r9+48]
  16860. pand xmm0, xmm12
  16861. pand xmm1, xmm12
  16862. pand xmm2, xmm12
  16863. pand xmm3, xmm12
  16864. por xmm4, xmm0
  16865. por xmm5, xmm1
  16866. por xmm6, xmm2
  16867. por xmm7, xmm3
  16868. paddd xmm13, xmm11
  16869. ; ENTRY: 14
  16870. mov r9, QWORD PTR [rdx+112]
  16871. add r9, 192
  16872. movdqu xmm12, xmm13
  16873. pcmpeqd xmm12, xmm10
  16874. movdqu xmm0, [r9]
  16875. movdqu xmm1, [r9+16]
  16876. movdqu xmm2, [r9+32]
  16877. movdqu xmm3, [r9+48]
  16878. pand xmm0, xmm12
  16879. pand xmm1, xmm12
  16880. pand xmm2, xmm12
  16881. pand xmm3, xmm12
  16882. por xmm4, xmm0
  16883. por xmm5, xmm1
  16884. por xmm6, xmm2
  16885. por xmm7, xmm3
  16886. paddd xmm13, xmm11
  16887. ; ENTRY: 15
  16888. mov r9, QWORD PTR [rdx+120]
  16889. add r9, 192
  16890. movdqu xmm12, xmm13
  16891. pcmpeqd xmm12, xmm10
  16892. movdqu xmm0, [r9]
  16893. movdqu xmm1, [r9+16]
  16894. movdqu xmm2, [r9+32]
  16895. movdqu xmm3, [r9+48]
  16896. pand xmm0, xmm12
  16897. pand xmm1, xmm12
  16898. pand xmm2, xmm12
  16899. pand xmm3, xmm12
  16900. por xmm4, xmm0
  16901. por xmm5, xmm1
  16902. por xmm6, xmm2
  16903. por xmm7, xmm3
  16904. paddd xmm13, xmm11
  16905. ; ENTRY: 16
  16906. mov r9, QWORD PTR [rdx+128]
  16907. add r9, 192
  16908. movdqu xmm12, xmm13
  16909. pcmpeqd xmm12, xmm10
  16910. movdqu xmm0, [r9]
  16911. movdqu xmm1, [r9+16]
  16912. movdqu xmm2, [r9+32]
  16913. movdqu xmm3, [r9+48]
  16914. pand xmm0, xmm12
  16915. pand xmm1, xmm12
  16916. pand xmm2, xmm12
  16917. pand xmm3, xmm12
  16918. por xmm4, xmm0
  16919. por xmm5, xmm1
  16920. por xmm6, xmm2
  16921. por xmm7, xmm3
  16922. paddd xmm13, xmm11
  16923. ; ENTRY: 17
  16924. mov r9, QWORD PTR [rdx+136]
  16925. add r9, 192
  16926. movdqu xmm12, xmm13
  16927. pcmpeqd xmm12, xmm10
  16928. movdqu xmm0, [r9]
  16929. movdqu xmm1, [r9+16]
  16930. movdqu xmm2, [r9+32]
  16931. movdqu xmm3, [r9+48]
  16932. pand xmm0, xmm12
  16933. pand xmm1, xmm12
  16934. pand xmm2, xmm12
  16935. pand xmm3, xmm12
  16936. por xmm4, xmm0
  16937. por xmm5, xmm1
  16938. por xmm6, xmm2
  16939. por xmm7, xmm3
  16940. paddd xmm13, xmm11
  16941. ; ENTRY: 18
  16942. mov r9, QWORD PTR [rdx+144]
  16943. add r9, 192
  16944. movdqu xmm12, xmm13
  16945. pcmpeqd xmm12, xmm10
  16946. movdqu xmm0, [r9]
  16947. movdqu xmm1, [r9+16]
  16948. movdqu xmm2, [r9+32]
  16949. movdqu xmm3, [r9+48]
  16950. pand xmm0, xmm12
  16951. pand xmm1, xmm12
  16952. pand xmm2, xmm12
  16953. pand xmm3, xmm12
  16954. por xmm4, xmm0
  16955. por xmm5, xmm1
  16956. por xmm6, xmm2
  16957. por xmm7, xmm3
  16958. paddd xmm13, xmm11
  16959. ; ENTRY: 19
  16960. mov r9, QWORD PTR [rdx+152]
  16961. add r9, 192
  16962. movdqu xmm12, xmm13
  16963. pcmpeqd xmm12, xmm10
  16964. movdqu xmm0, [r9]
  16965. movdqu xmm1, [r9+16]
  16966. movdqu xmm2, [r9+32]
  16967. movdqu xmm3, [r9+48]
  16968. pand xmm0, xmm12
  16969. pand xmm1, xmm12
  16970. pand xmm2, xmm12
  16971. pand xmm3, xmm12
  16972. por xmm4, xmm0
  16973. por xmm5, xmm1
  16974. por xmm6, xmm2
  16975. por xmm7, xmm3
  16976. paddd xmm13, xmm11
  16977. ; ENTRY: 20
  16978. mov r9, QWORD PTR [rdx+160]
  16979. add r9, 192
  16980. movdqu xmm12, xmm13
  16981. pcmpeqd xmm12, xmm10
  16982. movdqu xmm0, [r9]
  16983. movdqu xmm1, [r9+16]
  16984. movdqu xmm2, [r9+32]
  16985. movdqu xmm3, [r9+48]
  16986. pand xmm0, xmm12
  16987. pand xmm1, xmm12
  16988. pand xmm2, xmm12
  16989. pand xmm3, xmm12
  16990. por xmm4, xmm0
  16991. por xmm5, xmm1
  16992. por xmm6, xmm2
  16993. por xmm7, xmm3
  16994. paddd xmm13, xmm11
  16995. ; ENTRY: 21
  16996. mov r9, QWORD PTR [rdx+168]
  16997. add r9, 192
  16998. movdqu xmm12, xmm13
  16999. pcmpeqd xmm12, xmm10
  17000. movdqu xmm0, [r9]
  17001. movdqu xmm1, [r9+16]
  17002. movdqu xmm2, [r9+32]
  17003. movdqu xmm3, [r9+48]
  17004. pand xmm0, xmm12
  17005. pand xmm1, xmm12
  17006. pand xmm2, xmm12
  17007. pand xmm3, xmm12
  17008. por xmm4, xmm0
  17009. por xmm5, xmm1
  17010. por xmm6, xmm2
  17011. por xmm7, xmm3
  17012. paddd xmm13, xmm11
  17013. ; ENTRY: 22
  17014. mov r9, QWORD PTR [rdx+176]
  17015. add r9, 192
  17016. movdqu xmm12, xmm13
  17017. pcmpeqd xmm12, xmm10
  17018. movdqu xmm0, [r9]
  17019. movdqu xmm1, [r9+16]
  17020. movdqu xmm2, [r9+32]
  17021. movdqu xmm3, [r9+48]
  17022. pand xmm0, xmm12
  17023. pand xmm1, xmm12
  17024. pand xmm2, xmm12
  17025. pand xmm3, xmm12
  17026. por xmm4, xmm0
  17027. por xmm5, xmm1
  17028. por xmm6, xmm2
  17029. por xmm7, xmm3
  17030. paddd xmm13, xmm11
  17031. ; ENTRY: 23
  17032. mov r9, QWORD PTR [rdx+184]
  17033. add r9, 192
  17034. movdqu xmm12, xmm13
  17035. pcmpeqd xmm12, xmm10
  17036. movdqu xmm0, [r9]
  17037. movdqu xmm1, [r9+16]
  17038. movdqu xmm2, [r9+32]
  17039. movdqu xmm3, [r9+48]
  17040. pand xmm0, xmm12
  17041. pand xmm1, xmm12
  17042. pand xmm2, xmm12
  17043. pand xmm3, xmm12
  17044. por xmm4, xmm0
  17045. por xmm5, xmm1
  17046. por xmm6, xmm2
  17047. por xmm7, xmm3
  17048. paddd xmm13, xmm11
  17049. ; ENTRY: 24
  17050. mov r9, QWORD PTR [rdx+192]
  17051. add r9, 192
  17052. movdqu xmm12, xmm13
  17053. pcmpeqd xmm12, xmm10
  17054. movdqu xmm0, [r9]
  17055. movdqu xmm1, [r9+16]
  17056. movdqu xmm2, [r9+32]
  17057. movdqu xmm3, [r9+48]
  17058. pand xmm0, xmm12
  17059. pand xmm1, xmm12
  17060. pand xmm2, xmm12
  17061. pand xmm3, xmm12
  17062. por xmm4, xmm0
  17063. por xmm5, xmm1
  17064. por xmm6, xmm2
  17065. por xmm7, xmm3
  17066. paddd xmm13, xmm11
  17067. ; ENTRY: 25
  17068. mov r9, QWORD PTR [rdx+200]
  17069. add r9, 192
  17070. movdqu xmm12, xmm13
  17071. pcmpeqd xmm12, xmm10
  17072. movdqu xmm0, [r9]
  17073. movdqu xmm1, [r9+16]
  17074. movdqu xmm2, [r9+32]
  17075. movdqu xmm3, [r9+48]
  17076. pand xmm0, xmm12
  17077. pand xmm1, xmm12
  17078. pand xmm2, xmm12
  17079. pand xmm3, xmm12
  17080. por xmm4, xmm0
  17081. por xmm5, xmm1
  17082. por xmm6, xmm2
  17083. por xmm7, xmm3
  17084. paddd xmm13, xmm11
  17085. ; ENTRY: 26
  17086. mov r9, QWORD PTR [rdx+208]
  17087. add r9, 192
  17088. movdqu xmm12, xmm13
  17089. pcmpeqd xmm12, xmm10
  17090. movdqu xmm0, [r9]
  17091. movdqu xmm1, [r9+16]
  17092. movdqu xmm2, [r9+32]
  17093. movdqu xmm3, [r9+48]
  17094. pand xmm0, xmm12
  17095. pand xmm1, xmm12
  17096. pand xmm2, xmm12
  17097. pand xmm3, xmm12
  17098. por xmm4, xmm0
  17099. por xmm5, xmm1
  17100. por xmm6, xmm2
  17101. por xmm7, xmm3
  17102. paddd xmm13, xmm11
  17103. ; ENTRY: 27
  17104. mov r9, QWORD PTR [rdx+216]
  17105. add r9, 192
  17106. movdqu xmm12, xmm13
  17107. pcmpeqd xmm12, xmm10
  17108. movdqu xmm0, [r9]
  17109. movdqu xmm1, [r9+16]
  17110. movdqu xmm2, [r9+32]
  17111. movdqu xmm3, [r9+48]
  17112. pand xmm0, xmm12
  17113. pand xmm1, xmm12
  17114. pand xmm2, xmm12
  17115. pand xmm3, xmm12
  17116. por xmm4, xmm0
  17117. por xmm5, xmm1
  17118. por xmm6, xmm2
  17119. por xmm7, xmm3
  17120. paddd xmm13, xmm11
  17121. ; ENTRY: 28
  17122. mov r9, QWORD PTR [rdx+224]
  17123. add r9, 192
  17124. movdqu xmm12, xmm13
  17125. pcmpeqd xmm12, xmm10
  17126. movdqu xmm0, [r9]
  17127. movdqu xmm1, [r9+16]
  17128. movdqu xmm2, [r9+32]
  17129. movdqu xmm3, [r9+48]
  17130. pand xmm0, xmm12
  17131. pand xmm1, xmm12
  17132. pand xmm2, xmm12
  17133. pand xmm3, xmm12
  17134. por xmm4, xmm0
  17135. por xmm5, xmm1
  17136. por xmm6, xmm2
  17137. por xmm7, xmm3
  17138. paddd xmm13, xmm11
  17139. ; ENTRY: 29
  17140. mov r9, QWORD PTR [rdx+232]
  17141. add r9, 192
  17142. movdqu xmm12, xmm13
  17143. pcmpeqd xmm12, xmm10
  17144. movdqu xmm0, [r9]
  17145. movdqu xmm1, [r9+16]
  17146. movdqu xmm2, [r9+32]
  17147. movdqu xmm3, [r9+48]
  17148. pand xmm0, xmm12
  17149. pand xmm1, xmm12
  17150. pand xmm2, xmm12
  17151. pand xmm3, xmm12
  17152. por xmm4, xmm0
  17153. por xmm5, xmm1
  17154. por xmm6, xmm2
  17155. por xmm7, xmm3
  17156. paddd xmm13, xmm11
  17157. ; ENTRY: 30
  17158. mov r9, QWORD PTR [rdx+240]
  17159. add r9, 192
  17160. movdqu xmm12, xmm13
  17161. pcmpeqd xmm12, xmm10
  17162. movdqu xmm0, [r9]
  17163. movdqu xmm1, [r9+16]
  17164. movdqu xmm2, [r9+32]
  17165. movdqu xmm3, [r9+48]
  17166. pand xmm0, xmm12
  17167. pand xmm1, xmm12
  17168. pand xmm2, xmm12
  17169. pand xmm3, xmm12
  17170. por xmm4, xmm0
  17171. por xmm5, xmm1
  17172. por xmm6, xmm2
  17173. por xmm7, xmm3
  17174. paddd xmm13, xmm11
  17175. ; ENTRY: 31
  17176. mov r9, QWORD PTR [rdx+248]
  17177. add r9, 192
  17178. movdqu xmm12, xmm13
  17179. pcmpeqd xmm12, xmm10
  17180. movdqu xmm0, [r9]
  17181. movdqu xmm1, [r9+16]
  17182. movdqu xmm2, [r9+32]
  17183. movdqu xmm3, [r9+48]
  17184. pand xmm0, xmm12
  17185. pand xmm1, xmm12
  17186. pand xmm2, xmm12
  17187. pand xmm3, xmm12
  17188. por xmm4, xmm0
  17189. por xmm5, xmm1
  17190. por xmm6, xmm2
  17191. por xmm7, xmm3
  17192. paddd xmm13, xmm11
  17193. ; ENTRY: 32
  17194. mov r9, QWORD PTR [rdx+256]
  17195. add r9, 192
  17196. movdqu xmm12, xmm13
  17197. pcmpeqd xmm12, xmm10
  17198. movdqu xmm0, [r9]
  17199. movdqu xmm1, [r9+16]
  17200. movdqu xmm2, [r9+32]
  17201. movdqu xmm3, [r9+48]
  17202. pand xmm0, xmm12
  17203. pand xmm1, xmm12
  17204. pand xmm2, xmm12
  17205. pand xmm3, xmm12
  17206. por xmm4, xmm0
  17207. por xmm5, xmm1
  17208. por xmm6, xmm2
  17209. por xmm7, xmm3
  17210. paddd xmm13, xmm11
  17211. ; ENTRY: 33
  17212. mov r9, QWORD PTR [rdx+264]
  17213. add r9, 192
  17214. movdqu xmm12, xmm13
  17215. pcmpeqd xmm12, xmm10
  17216. movdqu xmm0, [r9]
  17217. movdqu xmm1, [r9+16]
  17218. movdqu xmm2, [r9+32]
  17219. movdqu xmm3, [r9+48]
  17220. pand xmm0, xmm12
  17221. pand xmm1, xmm12
  17222. pand xmm2, xmm12
  17223. pand xmm3, xmm12
  17224. por xmm4, xmm0
  17225. por xmm5, xmm1
  17226. por xmm6, xmm2
  17227. por xmm7, xmm3
  17228. paddd xmm13, xmm11
  17229. ; ENTRY: 34
  17230. mov r9, QWORD PTR [rdx+272]
  17231. add r9, 192
  17232. movdqu xmm12, xmm13
  17233. pcmpeqd xmm12, xmm10
  17234. movdqu xmm0, [r9]
  17235. movdqu xmm1, [r9+16]
  17236. movdqu xmm2, [r9+32]
  17237. movdqu xmm3, [r9+48]
  17238. pand xmm0, xmm12
  17239. pand xmm1, xmm12
  17240. pand xmm2, xmm12
  17241. pand xmm3, xmm12
  17242. por xmm4, xmm0
  17243. por xmm5, xmm1
  17244. por xmm6, xmm2
  17245. por xmm7, xmm3
  17246. paddd xmm13, xmm11
  17247. ; ENTRY: 35
  17248. mov r9, QWORD PTR [rdx+280]
  17249. add r9, 192
  17250. movdqu xmm12, xmm13
  17251. pcmpeqd xmm12, xmm10
  17252. movdqu xmm0, [r9]
  17253. movdqu xmm1, [r9+16]
  17254. movdqu xmm2, [r9+32]
  17255. movdqu xmm3, [r9+48]
  17256. pand xmm0, xmm12
  17257. pand xmm1, xmm12
  17258. pand xmm2, xmm12
  17259. pand xmm3, xmm12
  17260. por xmm4, xmm0
  17261. por xmm5, xmm1
  17262. por xmm6, xmm2
  17263. por xmm7, xmm3
  17264. paddd xmm13, xmm11
  17265. ; ENTRY: 36
  17266. mov r9, QWORD PTR [rdx+288]
  17267. add r9, 192
  17268. movdqu xmm12, xmm13
  17269. pcmpeqd xmm12, xmm10
  17270. movdqu xmm0, [r9]
  17271. movdqu xmm1, [r9+16]
  17272. movdqu xmm2, [r9+32]
  17273. movdqu xmm3, [r9+48]
  17274. pand xmm0, xmm12
  17275. pand xmm1, xmm12
  17276. pand xmm2, xmm12
  17277. pand xmm3, xmm12
  17278. por xmm4, xmm0
  17279. por xmm5, xmm1
  17280. por xmm6, xmm2
  17281. por xmm7, xmm3
  17282. paddd xmm13, xmm11
  17283. ; ENTRY: 37
  17284. mov r9, QWORD PTR [rdx+296]
  17285. add r9, 192
  17286. movdqu xmm12, xmm13
  17287. pcmpeqd xmm12, xmm10
  17288. movdqu xmm0, [r9]
  17289. movdqu xmm1, [r9+16]
  17290. movdqu xmm2, [r9+32]
  17291. movdqu xmm3, [r9+48]
  17292. pand xmm0, xmm12
  17293. pand xmm1, xmm12
  17294. pand xmm2, xmm12
  17295. pand xmm3, xmm12
  17296. por xmm4, xmm0
  17297. por xmm5, xmm1
  17298. por xmm6, xmm2
  17299. por xmm7, xmm3
  17300. paddd xmm13, xmm11
  17301. ; ENTRY: 38
  17302. mov r9, QWORD PTR [rdx+304]
  17303. add r9, 192
  17304. movdqu xmm12, xmm13
  17305. pcmpeqd xmm12, xmm10
  17306. movdqu xmm0, [r9]
  17307. movdqu xmm1, [r9+16]
  17308. movdqu xmm2, [r9+32]
  17309. movdqu xmm3, [r9+48]
  17310. pand xmm0, xmm12
  17311. pand xmm1, xmm12
  17312. pand xmm2, xmm12
  17313. pand xmm3, xmm12
  17314. por xmm4, xmm0
  17315. por xmm5, xmm1
  17316. por xmm6, xmm2
  17317. por xmm7, xmm3
  17318. paddd xmm13, xmm11
  17319. ; ENTRY: 39
  17320. mov r9, QWORD PTR [rdx+312]
  17321. add r9, 192
  17322. movdqu xmm12, xmm13
  17323. pcmpeqd xmm12, xmm10
  17324. movdqu xmm0, [r9]
  17325. movdqu xmm1, [r9+16]
  17326. movdqu xmm2, [r9+32]
  17327. movdqu xmm3, [r9+48]
  17328. pand xmm0, xmm12
  17329. pand xmm1, xmm12
  17330. pand xmm2, xmm12
  17331. pand xmm3, xmm12
  17332. por xmm4, xmm0
  17333. por xmm5, xmm1
  17334. por xmm6, xmm2
  17335. por xmm7, xmm3
  17336. paddd xmm13, xmm11
  17337. ; ENTRY: 40
  17338. mov r9, QWORD PTR [rdx+320]
  17339. add r9, 192
  17340. movdqu xmm12, xmm13
  17341. pcmpeqd xmm12, xmm10
  17342. movdqu xmm0, [r9]
  17343. movdqu xmm1, [r9+16]
  17344. movdqu xmm2, [r9+32]
  17345. movdqu xmm3, [r9+48]
  17346. pand xmm0, xmm12
  17347. pand xmm1, xmm12
  17348. pand xmm2, xmm12
  17349. pand xmm3, xmm12
  17350. por xmm4, xmm0
  17351. por xmm5, xmm1
  17352. por xmm6, xmm2
  17353. por xmm7, xmm3
  17354. paddd xmm13, xmm11
  17355. ; ENTRY: 41
  17356. mov r9, QWORD PTR [rdx+328]
  17357. add r9, 192
  17358. movdqu xmm12, xmm13
  17359. pcmpeqd xmm12, xmm10
  17360. movdqu xmm0, [r9]
  17361. movdqu xmm1, [r9+16]
  17362. movdqu xmm2, [r9+32]
  17363. movdqu xmm3, [r9+48]
  17364. pand xmm0, xmm12
  17365. pand xmm1, xmm12
  17366. pand xmm2, xmm12
  17367. pand xmm3, xmm12
  17368. por xmm4, xmm0
  17369. por xmm5, xmm1
  17370. por xmm6, xmm2
  17371. por xmm7, xmm3
  17372. paddd xmm13, xmm11
  17373. ; ENTRY: 42
  17374. mov r9, QWORD PTR [rdx+336]
  17375. add r9, 192
  17376. movdqu xmm12, xmm13
  17377. pcmpeqd xmm12, xmm10
  17378. movdqu xmm0, [r9]
  17379. movdqu xmm1, [r9+16]
  17380. movdqu xmm2, [r9+32]
  17381. movdqu xmm3, [r9+48]
  17382. pand xmm0, xmm12
  17383. pand xmm1, xmm12
  17384. pand xmm2, xmm12
  17385. pand xmm3, xmm12
  17386. por xmm4, xmm0
  17387. por xmm5, xmm1
  17388. por xmm6, xmm2
  17389. por xmm7, xmm3
  17390. paddd xmm13, xmm11
  17391. ; ENTRY: 43
  17392. mov r9, QWORD PTR [rdx+344]
  17393. add r9, 192
  17394. movdqu xmm12, xmm13
  17395. pcmpeqd xmm12, xmm10
  17396. movdqu xmm0, [r9]
  17397. movdqu xmm1, [r9+16]
  17398. movdqu xmm2, [r9+32]
  17399. movdqu xmm3, [r9+48]
  17400. pand xmm0, xmm12
  17401. pand xmm1, xmm12
  17402. pand xmm2, xmm12
  17403. pand xmm3, xmm12
  17404. por xmm4, xmm0
  17405. por xmm5, xmm1
  17406. por xmm6, xmm2
  17407. por xmm7, xmm3
  17408. paddd xmm13, xmm11
  17409. ; ENTRY: 44
  17410. mov r9, QWORD PTR [rdx+352]
  17411. add r9, 192
  17412. movdqu xmm12, xmm13
  17413. pcmpeqd xmm12, xmm10
  17414. movdqu xmm0, [r9]
  17415. movdqu xmm1, [r9+16]
  17416. movdqu xmm2, [r9+32]
  17417. movdqu xmm3, [r9+48]
  17418. pand xmm0, xmm12
  17419. pand xmm1, xmm12
  17420. pand xmm2, xmm12
  17421. pand xmm3, xmm12
  17422. por xmm4, xmm0
  17423. por xmm5, xmm1
  17424. por xmm6, xmm2
  17425. por xmm7, xmm3
  17426. paddd xmm13, xmm11
  17427. ; ENTRY: 45
  17428. mov r9, QWORD PTR [rdx+360]
  17429. add r9, 192
  17430. movdqu xmm12, xmm13
  17431. pcmpeqd xmm12, xmm10
  17432. movdqu xmm0, [r9]
  17433. movdqu xmm1, [r9+16]
  17434. movdqu xmm2, [r9+32]
  17435. movdqu xmm3, [r9+48]
  17436. pand xmm0, xmm12
  17437. pand xmm1, xmm12
  17438. pand xmm2, xmm12
  17439. pand xmm3, xmm12
  17440. por xmm4, xmm0
  17441. por xmm5, xmm1
  17442. por xmm6, xmm2
  17443. por xmm7, xmm3
  17444. paddd xmm13, xmm11
  17445. ; ENTRY: 46
  17446. mov r9, QWORD PTR [rdx+368]
  17447. add r9, 192
  17448. movdqu xmm12, xmm13
  17449. pcmpeqd xmm12, xmm10
  17450. movdqu xmm0, [r9]
  17451. movdqu xmm1, [r9+16]
  17452. movdqu xmm2, [r9+32]
  17453. movdqu xmm3, [r9+48]
  17454. pand xmm0, xmm12
  17455. pand xmm1, xmm12
  17456. pand xmm2, xmm12
  17457. pand xmm3, xmm12
  17458. por xmm4, xmm0
  17459. por xmm5, xmm1
  17460. por xmm6, xmm2
  17461. por xmm7, xmm3
  17462. paddd xmm13, xmm11
  17463. ; ENTRY: 47
  17464. mov r9, QWORD PTR [rdx+376]
  17465. add r9, 192
  17466. movdqu xmm12, xmm13
  17467. pcmpeqd xmm12, xmm10
  17468. movdqu xmm0, [r9]
  17469. movdqu xmm1, [r9+16]
  17470. movdqu xmm2, [r9+32]
  17471. movdqu xmm3, [r9+48]
  17472. pand xmm0, xmm12
  17473. pand xmm1, xmm12
  17474. pand xmm2, xmm12
  17475. pand xmm3, xmm12
  17476. por xmm4, xmm0
  17477. por xmm5, xmm1
  17478. por xmm6, xmm2
  17479. por xmm7, xmm3
  17480. paddd xmm13, xmm11
  17481. ; ENTRY: 48
  17482. mov r9, QWORD PTR [rdx+384]
  17483. add r9, 192
  17484. movdqu xmm12, xmm13
  17485. pcmpeqd xmm12, xmm10
  17486. movdqu xmm0, [r9]
  17487. movdqu xmm1, [r9+16]
  17488. movdqu xmm2, [r9+32]
  17489. movdqu xmm3, [r9+48]
  17490. pand xmm0, xmm12
  17491. pand xmm1, xmm12
  17492. pand xmm2, xmm12
  17493. pand xmm3, xmm12
  17494. por xmm4, xmm0
  17495. por xmm5, xmm1
  17496. por xmm6, xmm2
  17497. por xmm7, xmm3
  17498. paddd xmm13, xmm11
  17499. ; ENTRY: 49
  17500. mov r9, QWORD PTR [rdx+392]
  17501. add r9, 192
  17502. movdqu xmm12, xmm13
  17503. pcmpeqd xmm12, xmm10
  17504. movdqu xmm0, [r9]
  17505. movdqu xmm1, [r9+16]
  17506. movdqu xmm2, [r9+32]
  17507. movdqu xmm3, [r9+48]
  17508. pand xmm0, xmm12
  17509. pand xmm1, xmm12
  17510. pand xmm2, xmm12
  17511. pand xmm3, xmm12
  17512. por xmm4, xmm0
  17513. por xmm5, xmm1
  17514. por xmm6, xmm2
  17515. por xmm7, xmm3
  17516. paddd xmm13, xmm11
  17517. ; ENTRY: 50
  17518. mov r9, QWORD PTR [rdx+400]
  17519. add r9, 192
  17520. movdqu xmm12, xmm13
  17521. pcmpeqd xmm12, xmm10
  17522. movdqu xmm0, [r9]
  17523. movdqu xmm1, [r9+16]
  17524. movdqu xmm2, [r9+32]
  17525. movdqu xmm3, [r9+48]
  17526. pand xmm0, xmm12
  17527. pand xmm1, xmm12
  17528. pand xmm2, xmm12
  17529. pand xmm3, xmm12
  17530. por xmm4, xmm0
  17531. por xmm5, xmm1
  17532. por xmm6, xmm2
  17533. por xmm7, xmm3
  17534. paddd xmm13, xmm11
  17535. ; ENTRY: 51
  17536. mov r9, QWORD PTR [rdx+408]
  17537. add r9, 192
  17538. movdqu xmm12, xmm13
  17539. pcmpeqd xmm12, xmm10
  17540. movdqu xmm0, [r9]
  17541. movdqu xmm1, [r9+16]
  17542. movdqu xmm2, [r9+32]
  17543. movdqu xmm3, [r9+48]
  17544. pand xmm0, xmm12
  17545. pand xmm1, xmm12
  17546. pand xmm2, xmm12
  17547. pand xmm3, xmm12
  17548. por xmm4, xmm0
  17549. por xmm5, xmm1
  17550. por xmm6, xmm2
  17551. por xmm7, xmm3
  17552. paddd xmm13, xmm11
  17553. ; ENTRY: 52
  17554. mov r9, QWORD PTR [rdx+416]
  17555. add r9, 192
  17556. movdqu xmm12, xmm13
  17557. pcmpeqd xmm12, xmm10
  17558. movdqu xmm0, [r9]
  17559. movdqu xmm1, [r9+16]
  17560. movdqu xmm2, [r9+32]
  17561. movdqu xmm3, [r9+48]
  17562. pand xmm0, xmm12
  17563. pand xmm1, xmm12
  17564. pand xmm2, xmm12
  17565. pand xmm3, xmm12
  17566. por xmm4, xmm0
  17567. por xmm5, xmm1
  17568. por xmm6, xmm2
  17569. por xmm7, xmm3
  17570. paddd xmm13, xmm11
  17571. ; ENTRY: 53
  17572. mov r9, QWORD PTR [rdx+424]
  17573. add r9, 192
  17574. movdqu xmm12, xmm13
  17575. pcmpeqd xmm12, xmm10
  17576. movdqu xmm0, [r9]
  17577. movdqu xmm1, [r9+16]
  17578. movdqu xmm2, [r9+32]
  17579. movdqu xmm3, [r9+48]
  17580. pand xmm0, xmm12
  17581. pand xmm1, xmm12
  17582. pand xmm2, xmm12
  17583. pand xmm3, xmm12
  17584. por xmm4, xmm0
  17585. por xmm5, xmm1
  17586. por xmm6, xmm2
  17587. por xmm7, xmm3
  17588. paddd xmm13, xmm11
  17589. ; ENTRY: 54
  17590. mov r9, QWORD PTR [rdx+432]
  17591. add r9, 192
  17592. movdqu xmm12, xmm13
  17593. pcmpeqd xmm12, xmm10
  17594. movdqu xmm0, [r9]
  17595. movdqu xmm1, [r9+16]
  17596. movdqu xmm2, [r9+32]
  17597. movdqu xmm3, [r9+48]
  17598. pand xmm0, xmm12
  17599. pand xmm1, xmm12
  17600. pand xmm2, xmm12
  17601. pand xmm3, xmm12
  17602. por xmm4, xmm0
  17603. por xmm5, xmm1
  17604. por xmm6, xmm2
  17605. por xmm7, xmm3
  17606. paddd xmm13, xmm11
  17607. ; ENTRY: 55
  17608. mov r9, QWORD PTR [rdx+440]
  17609. add r9, 192
  17610. movdqu xmm12, xmm13
  17611. pcmpeqd xmm12, xmm10
  17612. movdqu xmm0, [r9]
  17613. movdqu xmm1, [r9+16]
  17614. movdqu xmm2, [r9+32]
  17615. movdqu xmm3, [r9+48]
  17616. pand xmm0, xmm12
  17617. pand xmm1, xmm12
  17618. pand xmm2, xmm12
  17619. pand xmm3, xmm12
  17620. por xmm4, xmm0
  17621. por xmm5, xmm1
  17622. por xmm6, xmm2
  17623. por xmm7, xmm3
  17624. paddd xmm13, xmm11
  17625. ; ENTRY: 56
  17626. mov r9, QWORD PTR [rdx+448]
  17627. add r9, 192
  17628. movdqu xmm12, xmm13
  17629. pcmpeqd xmm12, xmm10
  17630. movdqu xmm0, [r9]
  17631. movdqu xmm1, [r9+16]
  17632. movdqu xmm2, [r9+32]
  17633. movdqu xmm3, [r9+48]
  17634. pand xmm0, xmm12
  17635. pand xmm1, xmm12
  17636. pand xmm2, xmm12
  17637. pand xmm3, xmm12
  17638. por xmm4, xmm0
  17639. por xmm5, xmm1
  17640. por xmm6, xmm2
  17641. por xmm7, xmm3
  17642. paddd xmm13, xmm11
  17643. ; ENTRY: 57
  17644. mov r9, QWORD PTR [rdx+456]
  17645. add r9, 192
  17646. movdqu xmm12, xmm13
  17647. pcmpeqd xmm12, xmm10
  17648. movdqu xmm0, [r9]
  17649. movdqu xmm1, [r9+16]
  17650. movdqu xmm2, [r9+32]
  17651. movdqu xmm3, [r9+48]
  17652. pand xmm0, xmm12
  17653. pand xmm1, xmm12
  17654. pand xmm2, xmm12
  17655. pand xmm3, xmm12
  17656. por xmm4, xmm0
  17657. por xmm5, xmm1
  17658. por xmm6, xmm2
  17659. por xmm7, xmm3
  17660. paddd xmm13, xmm11
  17661. ; ENTRY: 58
  17662. mov r9, QWORD PTR [rdx+464]
  17663. add r9, 192
  17664. movdqu xmm12, xmm13
  17665. pcmpeqd xmm12, xmm10
  17666. movdqu xmm0, [r9]
  17667. movdqu xmm1, [r9+16]
  17668. movdqu xmm2, [r9+32]
  17669. movdqu xmm3, [r9+48]
  17670. pand xmm0, xmm12
  17671. pand xmm1, xmm12
  17672. pand xmm2, xmm12
  17673. pand xmm3, xmm12
  17674. por xmm4, xmm0
  17675. por xmm5, xmm1
  17676. por xmm6, xmm2
  17677. por xmm7, xmm3
  17678. paddd xmm13, xmm11
  17679. ; ENTRY: 59
  17680. mov r9, QWORD PTR [rdx+472]
  17681. add r9, 192
  17682. movdqu xmm12, xmm13
  17683. pcmpeqd xmm12, xmm10
  17684. movdqu xmm0, [r9]
  17685. movdqu xmm1, [r9+16]
  17686. movdqu xmm2, [r9+32]
  17687. movdqu xmm3, [r9+48]
  17688. pand xmm0, xmm12
  17689. pand xmm1, xmm12
  17690. pand xmm2, xmm12
  17691. pand xmm3, xmm12
  17692. por xmm4, xmm0
  17693. por xmm5, xmm1
  17694. por xmm6, xmm2
  17695. por xmm7, xmm3
  17696. paddd xmm13, xmm11
  17697. ; ENTRY: 60
  17698. mov r9, QWORD PTR [rdx+480]
  17699. add r9, 192
  17700. movdqu xmm12, xmm13
  17701. pcmpeqd xmm12, xmm10
  17702. movdqu xmm0, [r9]
  17703. movdqu xmm1, [r9+16]
  17704. movdqu xmm2, [r9+32]
  17705. movdqu xmm3, [r9+48]
  17706. pand xmm0, xmm12
  17707. pand xmm1, xmm12
  17708. pand xmm2, xmm12
  17709. pand xmm3, xmm12
  17710. por xmm4, xmm0
  17711. por xmm5, xmm1
  17712. por xmm6, xmm2
  17713. por xmm7, xmm3
  17714. paddd xmm13, xmm11
  17715. ; ENTRY: 61
  17716. mov r9, QWORD PTR [rdx+488]
  17717. add r9, 192
  17718. movdqu xmm12, xmm13
  17719. pcmpeqd xmm12, xmm10
  17720. movdqu xmm0, [r9]
  17721. movdqu xmm1, [r9+16]
  17722. movdqu xmm2, [r9+32]
  17723. movdqu xmm3, [r9+48]
  17724. pand xmm0, xmm12
  17725. pand xmm1, xmm12
  17726. pand xmm2, xmm12
  17727. pand xmm3, xmm12
  17728. por xmm4, xmm0
  17729. por xmm5, xmm1
  17730. por xmm6, xmm2
  17731. por xmm7, xmm3
  17732. paddd xmm13, xmm11
  17733. ; ENTRY: 62
  17734. mov r9, QWORD PTR [rdx+496]
  17735. add r9, 192
  17736. movdqu xmm12, xmm13
  17737. pcmpeqd xmm12, xmm10
  17738. movdqu xmm0, [r9]
  17739. movdqu xmm1, [r9+16]
  17740. movdqu xmm2, [r9+32]
  17741. movdqu xmm3, [r9+48]
  17742. pand xmm0, xmm12
  17743. pand xmm1, xmm12
  17744. pand xmm2, xmm12
  17745. pand xmm3, xmm12
  17746. por xmm4, xmm0
  17747. por xmm5, xmm1
  17748. por xmm6, xmm2
  17749. por xmm7, xmm3
  17750. paddd xmm13, xmm11
  17751. ; ENTRY: 63
  17752. mov r9, QWORD PTR [rdx+504]
  17753. add r9, 192
  17754. movdqu xmm12, xmm13
  17755. pcmpeqd xmm12, xmm10
  17756. movdqu xmm0, [r9]
  17757. movdqu xmm1, [r9+16]
  17758. movdqu xmm2, [r9+32]
  17759. movdqu xmm3, [r9+48]
  17760. pand xmm0, xmm12
  17761. pand xmm1, xmm12
  17762. pand xmm2, xmm12
  17763. pand xmm3, xmm12
  17764. por xmm4, xmm0
  17765. por xmm5, xmm1
  17766. por xmm6, xmm2
  17767. por xmm7, xmm3
  17768. paddd xmm13, xmm11
  17769. movdqu [rcx], xmm4
  17770. movdqu [rcx+16], xmm5
  17771. movdqu [rcx+32], xmm6
  17772. movdqu [rcx+48], xmm7
  17773. ; END: 24-31
  17774. vmovdqu xmm6, OWORD PTR [rsp]
  17775. vmovdqu xmm7, OWORD PTR [rsp+16]
  17776. vmovdqu xmm8, OWORD PTR [rsp+32]
  17777. vmovdqu xmm9, OWORD PTR [rsp+48]
  17778. vmovdqu xmm10, OWORD PTR [rsp+64]
  17779. vmovdqu xmm11, OWORD PTR [rsp+80]
  17780. vmovdqu xmm12, OWORD PTR [rsp+96]
  17781. vmovdqu xmm13, OWORD PTR [rsp+112]
  17782. add rsp, 128
  17783. ret
  17784. sp_2048_get_from_table_32 ENDP
  17785. _text ENDS
  17786. ENDIF
  17787. IFDEF HAVE_INTEL_AVX2
  17788. ; /* Reduce the number back to 2048 bits using Montgomery reduction.
  17789. ; *
  17790. ; * a A single precision number to reduce in place.
  17791. ; * m The single precision number representing the modulus.
  17792. ; * mp The digit representing the negative inverse of m mod 2^n.
  17793. ; */
  17794. _text SEGMENT READONLY PARA
  17795. sp_2048_mont_reduce_avx2_32 PROC
  17796. push r12
  17797. push r13
  17798. push r14
  17799. push r15
  17800. push rdi
  17801. push rsi
  17802. push rbx
  17803. push rbp
  17804. mov r9, rcx
  17805. mov r10, rdx
  17806. xor rbp, rbp
  17807. ; i = 32
  17808. mov r11, 32
  17809. mov r14, QWORD PTR [r9]
  17810. mov r15, QWORD PTR [r9+8]
  17811. mov rdi, QWORD PTR [r9+16]
  17812. mov rsi, QWORD PTR [r9+24]
  17813. add r9, 128
  17814. xor rbp, rbp
  17815. L_2048_mont_reduce_avx2_32_loop:
  17816. ; mu = a[i] * mp
  17817. mov rdx, r14
  17818. mov r12, r14
  17819. imul rdx, r8
  17820. xor rbx, rbx
  17821. ; a[i+0] += m[0] * mu
  17822. mulx rcx, rax, QWORD PTR [r10]
  17823. mov r14, r15
  17824. adcx r12, rax
  17825. adox r14, rcx
  17826. ; a[i+1] += m[1] * mu
  17827. mulx rcx, rax, QWORD PTR [r10+8]
  17828. mov r15, rdi
  17829. adcx r14, rax
  17830. adox r15, rcx
  17831. ; a[i+2] += m[2] * mu
  17832. mulx rcx, rax, QWORD PTR [r10+16]
  17833. mov rdi, rsi
  17834. adcx r15, rax
  17835. adox rdi, rcx
  17836. ; a[i+3] += m[3] * mu
  17837. mulx rcx, rax, QWORD PTR [r10+24]
  17838. mov rsi, QWORD PTR [r9+-96]
  17839. adcx rdi, rax
  17840. adox rsi, rcx
  17841. ; a[i+4] += m[4] * mu
  17842. mulx rcx, rax, QWORD PTR [r10+32]
  17843. mov r13, QWORD PTR [r9+-88]
  17844. adcx rsi, rax
  17845. adox r13, rcx
  17846. ; a[i+5] += m[5] * mu
  17847. mulx rcx, rax, QWORD PTR [r10+40]
  17848. mov r12, QWORD PTR [r9+-80]
  17849. adcx r13, rax
  17850. adox r12, rcx
  17851. mov QWORD PTR [r9+-88], r13
  17852. ; a[i+6] += m[6] * mu
  17853. mulx rcx, rax, QWORD PTR [r10+48]
  17854. mov r13, QWORD PTR [r9+-72]
  17855. adcx r12, rax
  17856. adox r13, rcx
  17857. mov QWORD PTR [r9+-80], r12
  17858. ; a[i+7] += m[7] * mu
  17859. mulx rcx, rax, QWORD PTR [r10+56]
  17860. mov r12, QWORD PTR [r9+-64]
  17861. adcx r13, rax
  17862. adox r12, rcx
  17863. mov QWORD PTR [r9+-72], r13
  17864. ; a[i+8] += m[8] * mu
  17865. mulx rcx, rax, QWORD PTR [r10+64]
  17866. mov r13, QWORD PTR [r9+-56]
  17867. adcx r12, rax
  17868. adox r13, rcx
  17869. mov QWORD PTR [r9+-64], r12
  17870. ; a[i+9] += m[9] * mu
  17871. mulx rcx, rax, QWORD PTR [r10+72]
  17872. mov r12, QWORD PTR [r9+-48]
  17873. adcx r13, rax
  17874. adox r12, rcx
  17875. mov QWORD PTR [r9+-56], r13
  17876. ; a[i+10] += m[10] * mu
  17877. mulx rcx, rax, QWORD PTR [r10+80]
  17878. mov r13, QWORD PTR [r9+-40]
  17879. adcx r12, rax
  17880. adox r13, rcx
  17881. mov QWORD PTR [r9+-48], r12
  17882. ; a[i+11] += m[11] * mu
  17883. mulx rcx, rax, QWORD PTR [r10+88]
  17884. mov r12, QWORD PTR [r9+-32]
  17885. adcx r13, rax
  17886. adox r12, rcx
  17887. mov QWORD PTR [r9+-40], r13
  17888. ; a[i+12] += m[12] * mu
  17889. mulx rcx, rax, QWORD PTR [r10+96]
  17890. mov r13, QWORD PTR [r9+-24]
  17891. adcx r12, rax
  17892. adox r13, rcx
  17893. mov QWORD PTR [r9+-32], r12
  17894. ; a[i+13] += m[13] * mu
  17895. mulx rcx, rax, QWORD PTR [r10+104]
  17896. mov r12, QWORD PTR [r9+-16]
  17897. adcx r13, rax
  17898. adox r12, rcx
  17899. mov QWORD PTR [r9+-24], r13
  17900. ; a[i+14] += m[14] * mu
  17901. mulx rcx, rax, QWORD PTR [r10+112]
  17902. mov r13, QWORD PTR [r9+-8]
  17903. adcx r12, rax
  17904. adox r13, rcx
  17905. mov QWORD PTR [r9+-16], r12
  17906. ; a[i+15] += m[15] * mu
  17907. mulx rcx, rax, QWORD PTR [r10+120]
  17908. mov r12, QWORD PTR [r9]
  17909. adcx r13, rax
  17910. adox r12, rcx
  17911. mov QWORD PTR [r9+-8], r13
  17912. ; a[i+16] += m[16] * mu
  17913. mulx rcx, rax, QWORD PTR [r10+128]
  17914. mov r13, QWORD PTR [r9+8]
  17915. adcx r12, rax
  17916. adox r13, rcx
  17917. mov QWORD PTR [r9], r12
  17918. ; a[i+17] += m[17] * mu
  17919. mulx rcx, rax, QWORD PTR [r10+136]
  17920. mov r12, QWORD PTR [r9+16]
  17921. adcx r13, rax
  17922. adox r12, rcx
  17923. mov QWORD PTR [r9+8], r13
  17924. ; a[i+18] += m[18] * mu
  17925. mulx rcx, rax, QWORD PTR [r10+144]
  17926. mov r13, QWORD PTR [r9+24]
  17927. adcx r12, rax
  17928. adox r13, rcx
  17929. mov QWORD PTR [r9+16], r12
  17930. ; a[i+19] += m[19] * mu
  17931. mulx rcx, rax, QWORD PTR [r10+152]
  17932. mov r12, QWORD PTR [r9+32]
  17933. adcx r13, rax
  17934. adox r12, rcx
  17935. mov QWORD PTR [r9+24], r13
  17936. ; a[i+20] += m[20] * mu
  17937. mulx rcx, rax, QWORD PTR [r10+160]
  17938. mov r13, QWORD PTR [r9+40]
  17939. adcx r12, rax
  17940. adox r13, rcx
  17941. mov QWORD PTR [r9+32], r12
  17942. ; a[i+21] += m[21] * mu
  17943. mulx rcx, rax, QWORD PTR [r10+168]
  17944. mov r12, QWORD PTR [r9+48]
  17945. adcx r13, rax
  17946. adox r12, rcx
  17947. mov QWORD PTR [r9+40], r13
  17948. ; a[i+22] += m[22] * mu
  17949. mulx rcx, rax, QWORD PTR [r10+176]
  17950. mov r13, QWORD PTR [r9+56]
  17951. adcx r12, rax
  17952. adox r13, rcx
  17953. mov QWORD PTR [r9+48], r12
  17954. ; a[i+23] += m[23] * mu
  17955. mulx rcx, rax, QWORD PTR [r10+184]
  17956. mov r12, QWORD PTR [r9+64]
  17957. adcx r13, rax
  17958. adox r12, rcx
  17959. mov QWORD PTR [r9+56], r13
  17960. ; a[i+24] += m[24] * mu
  17961. mulx rcx, rax, QWORD PTR [r10+192]
  17962. mov r13, QWORD PTR [r9+72]
  17963. adcx r12, rax
  17964. adox r13, rcx
  17965. mov QWORD PTR [r9+64], r12
  17966. ; a[i+25] += m[25] * mu
  17967. mulx rcx, rax, QWORD PTR [r10+200]
  17968. mov r12, QWORD PTR [r9+80]
  17969. adcx r13, rax
  17970. adox r12, rcx
  17971. mov QWORD PTR [r9+72], r13
  17972. ; a[i+26] += m[26] * mu
  17973. mulx rcx, rax, QWORD PTR [r10+208]
  17974. mov r13, QWORD PTR [r9+88]
  17975. adcx r12, rax
  17976. adox r13, rcx
  17977. mov QWORD PTR [r9+80], r12
  17978. ; a[i+27] += m[27] * mu
  17979. mulx rcx, rax, QWORD PTR [r10+216]
  17980. mov r12, QWORD PTR [r9+96]
  17981. adcx r13, rax
  17982. adox r12, rcx
  17983. mov QWORD PTR [r9+88], r13
  17984. ; a[i+28] += m[28] * mu
  17985. mulx rcx, rax, QWORD PTR [r10+224]
  17986. mov r13, QWORD PTR [r9+104]
  17987. adcx r12, rax
  17988. adox r13, rcx
  17989. mov QWORD PTR [r9+96], r12
  17990. ; a[i+29] += m[29] * mu
  17991. mulx rcx, rax, QWORD PTR [r10+232]
  17992. mov r12, QWORD PTR [r9+112]
  17993. adcx r13, rax
  17994. adox r12, rcx
  17995. mov QWORD PTR [r9+104], r13
  17996. ; a[i+30] += m[30] * mu
  17997. mulx rcx, rax, QWORD PTR [r10+240]
  17998. mov r13, QWORD PTR [r9+120]
  17999. adcx r12, rax
  18000. adox r13, rcx
  18001. mov QWORD PTR [r9+112], r12
  18002. ; a[i+31] += m[31] * mu
  18003. mulx rcx, rax, QWORD PTR [r10+248]
  18004. mov r12, QWORD PTR [r9+128]
  18005. adcx r13, rax
  18006. adox r12, rcx
  18007. mov QWORD PTR [r9+120], r13
  18008. adcx r12, rbp
  18009. mov rbp, rbx
  18010. mov QWORD PTR [r9+128], r12
  18011. adox rbp, rbx
  18012. adcx rbp, rbx
  18013. ; a += 1
  18014. add r9, 8
  18015. ; i -= 1
  18016. sub r11, 1
  18017. jnz L_2048_mont_reduce_avx2_32_loop
  18018. sub r9, 128
  18019. neg rbp
  18020. mov r8, r9
  18021. sub r9, 256
  18022. mov rcx, QWORD PTR [r10]
  18023. mov rdx, r14
  18024. pext rcx, rcx, rbp
  18025. sub rdx, rcx
  18026. mov rcx, QWORD PTR [r10+8]
  18027. mov rax, r15
  18028. pext rcx, rcx, rbp
  18029. mov QWORD PTR [r9], rdx
  18030. sbb rax, rcx
  18031. mov rdx, QWORD PTR [r10+16]
  18032. mov rcx, rdi
  18033. pext rdx, rdx, rbp
  18034. mov QWORD PTR [r9+8], rax
  18035. sbb rcx, rdx
  18036. mov rax, QWORD PTR [r10+24]
  18037. mov rdx, rsi
  18038. pext rax, rax, rbp
  18039. mov QWORD PTR [r9+16], rcx
  18040. sbb rdx, rax
  18041. mov rcx, QWORD PTR [r10+32]
  18042. mov rax, QWORD PTR [r8+32]
  18043. pext rcx, rcx, rbp
  18044. mov QWORD PTR [r9+24], rdx
  18045. sbb rax, rcx
  18046. mov rdx, QWORD PTR [r10+40]
  18047. mov rcx, QWORD PTR [r8+40]
  18048. pext rdx, rdx, rbp
  18049. mov QWORD PTR [r9+32], rax
  18050. sbb rcx, rdx
  18051. mov rax, QWORD PTR [r10+48]
  18052. mov rdx, QWORD PTR [r8+48]
  18053. pext rax, rax, rbp
  18054. mov QWORD PTR [r9+40], rcx
  18055. sbb rdx, rax
  18056. mov rcx, QWORD PTR [r10+56]
  18057. mov rax, QWORD PTR [r8+56]
  18058. pext rcx, rcx, rbp
  18059. mov QWORD PTR [r9+48], rdx
  18060. sbb rax, rcx
  18061. mov rdx, QWORD PTR [r10+64]
  18062. mov rcx, QWORD PTR [r8+64]
  18063. pext rdx, rdx, rbp
  18064. mov QWORD PTR [r9+56], rax
  18065. sbb rcx, rdx
  18066. mov rax, QWORD PTR [r10+72]
  18067. mov rdx, QWORD PTR [r8+72]
  18068. pext rax, rax, rbp
  18069. mov QWORD PTR [r9+64], rcx
  18070. sbb rdx, rax
  18071. mov rcx, QWORD PTR [r10+80]
  18072. mov rax, QWORD PTR [r8+80]
  18073. pext rcx, rcx, rbp
  18074. mov QWORD PTR [r9+72], rdx
  18075. sbb rax, rcx
  18076. mov rdx, QWORD PTR [r10+88]
  18077. mov rcx, QWORD PTR [r8+88]
  18078. pext rdx, rdx, rbp
  18079. mov QWORD PTR [r9+80], rax
  18080. sbb rcx, rdx
  18081. mov rax, QWORD PTR [r10+96]
  18082. mov rdx, QWORD PTR [r8+96]
  18083. pext rax, rax, rbp
  18084. mov QWORD PTR [r9+88], rcx
  18085. sbb rdx, rax
  18086. mov rcx, QWORD PTR [r10+104]
  18087. mov rax, QWORD PTR [r8+104]
  18088. pext rcx, rcx, rbp
  18089. mov QWORD PTR [r9+96], rdx
  18090. sbb rax, rcx
  18091. mov rdx, QWORD PTR [r10+112]
  18092. mov rcx, QWORD PTR [r8+112]
  18093. pext rdx, rdx, rbp
  18094. mov QWORD PTR [r9+104], rax
  18095. sbb rcx, rdx
  18096. mov rax, QWORD PTR [r10+120]
  18097. mov rdx, QWORD PTR [r8+120]
  18098. pext rax, rax, rbp
  18099. mov QWORD PTR [r9+112], rcx
  18100. sbb rdx, rax
  18101. mov rcx, QWORD PTR [r10+128]
  18102. mov rax, QWORD PTR [r8+128]
  18103. pext rcx, rcx, rbp
  18104. mov QWORD PTR [r9+120], rdx
  18105. sbb rax, rcx
  18106. mov rdx, QWORD PTR [r10+136]
  18107. mov rcx, QWORD PTR [r8+136]
  18108. pext rdx, rdx, rbp
  18109. mov QWORD PTR [r9+128], rax
  18110. sbb rcx, rdx
  18111. mov rax, QWORD PTR [r10+144]
  18112. mov rdx, QWORD PTR [r8+144]
  18113. pext rax, rax, rbp
  18114. mov QWORD PTR [r9+136], rcx
  18115. sbb rdx, rax
  18116. mov rcx, QWORD PTR [r10+152]
  18117. mov rax, QWORD PTR [r8+152]
  18118. pext rcx, rcx, rbp
  18119. mov QWORD PTR [r9+144], rdx
  18120. sbb rax, rcx
  18121. mov rdx, QWORD PTR [r10+160]
  18122. mov rcx, QWORD PTR [r8+160]
  18123. pext rdx, rdx, rbp
  18124. mov QWORD PTR [r9+152], rax
  18125. sbb rcx, rdx
  18126. mov rax, QWORD PTR [r10+168]
  18127. mov rdx, QWORD PTR [r8+168]
  18128. pext rax, rax, rbp
  18129. mov QWORD PTR [r9+160], rcx
  18130. sbb rdx, rax
  18131. mov rcx, QWORD PTR [r10+176]
  18132. mov rax, QWORD PTR [r8+176]
  18133. pext rcx, rcx, rbp
  18134. mov QWORD PTR [r9+168], rdx
  18135. sbb rax, rcx
  18136. mov rdx, QWORD PTR [r10+184]
  18137. mov rcx, QWORD PTR [r8+184]
  18138. pext rdx, rdx, rbp
  18139. mov QWORD PTR [r9+176], rax
  18140. sbb rcx, rdx
  18141. mov rax, QWORD PTR [r10+192]
  18142. mov rdx, QWORD PTR [r8+192]
  18143. pext rax, rax, rbp
  18144. mov QWORD PTR [r9+184], rcx
  18145. sbb rdx, rax
  18146. mov rcx, QWORD PTR [r10+200]
  18147. mov rax, QWORD PTR [r8+200]
  18148. pext rcx, rcx, rbp
  18149. mov QWORD PTR [r9+192], rdx
  18150. sbb rax, rcx
  18151. mov rdx, QWORD PTR [r10+208]
  18152. mov rcx, QWORD PTR [r8+208]
  18153. pext rdx, rdx, rbp
  18154. mov QWORD PTR [r9+200], rax
  18155. sbb rcx, rdx
  18156. mov rax, QWORD PTR [r10+216]
  18157. mov rdx, QWORD PTR [r8+216]
  18158. pext rax, rax, rbp
  18159. mov QWORD PTR [r9+208], rcx
  18160. sbb rdx, rax
  18161. mov rcx, QWORD PTR [r10+224]
  18162. mov rax, QWORD PTR [r8+224]
  18163. pext rcx, rcx, rbp
  18164. mov QWORD PTR [r9+216], rdx
  18165. sbb rax, rcx
  18166. mov rdx, QWORD PTR [r10+232]
  18167. mov rcx, QWORD PTR [r8+232]
  18168. pext rdx, rdx, rbp
  18169. mov QWORD PTR [r9+224], rax
  18170. sbb rcx, rdx
  18171. mov rax, QWORD PTR [r10+240]
  18172. mov rdx, QWORD PTR [r8+240]
  18173. pext rax, rax, rbp
  18174. mov QWORD PTR [r9+232], rcx
  18175. sbb rdx, rax
  18176. mov rcx, QWORD PTR [r10+248]
  18177. mov rax, QWORD PTR [r8+248]
  18178. pext rcx, rcx, rbp
  18179. mov QWORD PTR [r9+240], rdx
  18180. sbb rax, rcx
  18181. mov QWORD PTR [r9+248], rax
  18182. pop rbp
  18183. pop rbx
  18184. pop rsi
  18185. pop rdi
  18186. pop r15
  18187. pop r14
  18188. pop r13
  18189. pop r12
  18190. ret
  18191. sp_2048_mont_reduce_avx2_32 ENDP
  18192. _text ENDS
  18193. ENDIF
  18194. IFNDEF WC_NO_CACHE_RESISTANT
  18195. _text SEGMENT READONLY PARA
  18196. sp_2048_get_from_table_avx2_32 PROC
  18197. sub rsp, 128
  18198. vmovdqu OWORD PTR [rsp], xmm6
  18199. vmovdqu OWORD PTR [rsp+16], xmm7
  18200. vmovdqu OWORD PTR [rsp+32], xmm8
  18201. vmovdqu OWORD PTR [rsp+48], xmm9
  18202. vmovdqu OWORD PTR [rsp+64], xmm10
  18203. vmovdqu OWORD PTR [rsp+80], xmm11
  18204. vmovdqu OWORD PTR [rsp+96], xmm12
  18205. vmovdqu OWORD PTR [rsp+112], xmm13
  18206. mov rax, 1
  18207. movd xmm10, r8
  18208. movd xmm11, rax
  18209. vpxor ymm13, ymm13, ymm13
  18210. vpermd ymm10, ymm13, ymm10
  18211. vpermd ymm11, ymm13, ymm11
  18212. ; START: 0-15
  18213. vpxor ymm13, ymm13, ymm13
  18214. vpxor ymm4, ymm4, ymm4
  18215. vpxor ymm5, ymm5, ymm5
  18216. vpxor ymm6, ymm6, ymm6
  18217. vpxor ymm7, ymm7, ymm7
  18218. ; ENTRY: 0
  18219. mov r9, QWORD PTR [rdx]
  18220. vpcmpeqd ymm12, ymm13, ymm10
  18221. vmovdqu ymm0, YMMWORD PTR [r9]
  18222. vmovdqu ymm1, YMMWORD PTR [r9+32]
  18223. vmovdqu ymm2, YMMWORD PTR [r9+64]
  18224. vmovdqu ymm3, YMMWORD PTR [r9+96]
  18225. vpand ymm0, ymm0, ymm12
  18226. vpand ymm1, ymm1, ymm12
  18227. vpand ymm2, ymm2, ymm12
  18228. vpand ymm3, ymm3, ymm12
  18229. vpor ymm4, ymm4, ymm0
  18230. vpor ymm5, ymm5, ymm1
  18231. vpor ymm6, ymm6, ymm2
  18232. vpor ymm7, ymm7, ymm3
  18233. vpaddd ymm13, ymm13, ymm11
  18234. ; ENTRY: 1
  18235. mov r9, QWORD PTR [rdx+8]
  18236. vpcmpeqd ymm12, ymm13, ymm10
  18237. vmovdqu ymm0, YMMWORD PTR [r9]
  18238. vmovdqu ymm1, YMMWORD PTR [r9+32]
  18239. vmovdqu ymm2, YMMWORD PTR [r9+64]
  18240. vmovdqu ymm3, YMMWORD PTR [r9+96]
  18241. vpand ymm0, ymm0, ymm12
  18242. vpand ymm1, ymm1, ymm12
  18243. vpand ymm2, ymm2, ymm12
  18244. vpand ymm3, ymm3, ymm12
  18245. vpor ymm4, ymm4, ymm0
  18246. vpor ymm5, ymm5, ymm1
  18247. vpor ymm6, ymm6, ymm2
  18248. vpor ymm7, ymm7, ymm3
  18249. vpaddd ymm13, ymm13, ymm11
  18250. ; ENTRY: 2
  18251. mov r9, QWORD PTR [rdx+16]
  18252. vpcmpeqd ymm12, ymm13, ymm10
  18253. vmovdqu ymm0, YMMWORD PTR [r9]
  18254. vmovdqu ymm1, YMMWORD PTR [r9+32]
  18255. vmovdqu ymm2, YMMWORD PTR [r9+64]
  18256. vmovdqu ymm3, YMMWORD PTR [r9+96]
  18257. vpand ymm0, ymm0, ymm12
  18258. vpand ymm1, ymm1, ymm12
  18259. vpand ymm2, ymm2, ymm12
  18260. vpand ymm3, ymm3, ymm12
  18261. vpor ymm4, ymm4, ymm0
  18262. vpor ymm5, ymm5, ymm1
  18263. vpor ymm6, ymm6, ymm2
  18264. vpor ymm7, ymm7, ymm3
  18265. vpaddd ymm13, ymm13, ymm11
  18266. ; ENTRY: 3
  18267. mov r9, QWORD PTR [rdx+24]
  18268. vpcmpeqd ymm12, ymm13, ymm10
  18269. vmovdqu ymm0, YMMWORD PTR [r9]
  18270. vmovdqu ymm1, YMMWORD PTR [r9+32]
  18271. vmovdqu ymm2, YMMWORD PTR [r9+64]
  18272. vmovdqu ymm3, YMMWORD PTR [r9+96]
  18273. vpand ymm0, ymm0, ymm12
  18274. vpand ymm1, ymm1, ymm12
  18275. vpand ymm2, ymm2, ymm12
  18276. vpand ymm3, ymm3, ymm12
  18277. vpor ymm4, ymm4, ymm0
  18278. vpor ymm5, ymm5, ymm1
  18279. vpor ymm6, ymm6, ymm2
  18280. vpor ymm7, ymm7, ymm3
  18281. vpaddd ymm13, ymm13, ymm11
  18282. ; ENTRY: 4
  18283. mov r9, QWORD PTR [rdx+32]
  18284. vpcmpeqd ymm12, ymm13, ymm10
  18285. vmovdqu ymm0, YMMWORD PTR [r9]
  18286. vmovdqu ymm1, YMMWORD PTR [r9+32]
  18287. vmovdqu ymm2, YMMWORD PTR [r9+64]
  18288. vmovdqu ymm3, YMMWORD PTR [r9+96]
  18289. vpand ymm0, ymm0, ymm12
  18290. vpand ymm1, ymm1, ymm12
  18291. vpand ymm2, ymm2, ymm12
  18292. vpand ymm3, ymm3, ymm12
  18293. vpor ymm4, ymm4, ymm0
  18294. vpor ymm5, ymm5, ymm1
  18295. vpor ymm6, ymm6, ymm2
  18296. vpor ymm7, ymm7, ymm3
  18297. vpaddd ymm13, ymm13, ymm11
  18298. ; ENTRY: 5
  18299. mov r9, QWORD PTR [rdx+40]
  18300. vpcmpeqd ymm12, ymm13, ymm10
  18301. vmovdqu ymm0, YMMWORD PTR [r9]
  18302. vmovdqu ymm1, YMMWORD PTR [r9+32]
  18303. vmovdqu ymm2, YMMWORD PTR [r9+64]
  18304. vmovdqu ymm3, YMMWORD PTR [r9+96]
  18305. vpand ymm0, ymm0, ymm12
  18306. vpand ymm1, ymm1, ymm12
  18307. vpand ymm2, ymm2, ymm12
  18308. vpand ymm3, ymm3, ymm12
  18309. vpor ymm4, ymm4, ymm0
  18310. vpor ymm5, ymm5, ymm1
  18311. vpor ymm6, ymm6, ymm2
  18312. vpor ymm7, ymm7, ymm3
  18313. vpaddd ymm13, ymm13, ymm11
  18314. ; ENTRY: 6
  18315. mov r9, QWORD PTR [rdx+48]
  18316. vpcmpeqd ymm12, ymm13, ymm10
  18317. vmovdqu ymm0, YMMWORD PTR [r9]
  18318. vmovdqu ymm1, YMMWORD PTR [r9+32]
  18319. vmovdqu ymm2, YMMWORD PTR [r9+64]
  18320. vmovdqu ymm3, YMMWORD PTR [r9+96]
  18321. vpand ymm0, ymm0, ymm12
  18322. vpand ymm1, ymm1, ymm12
  18323. vpand ymm2, ymm2, ymm12
  18324. vpand ymm3, ymm3, ymm12
  18325. vpor ymm4, ymm4, ymm0
  18326. vpor ymm5, ymm5, ymm1
  18327. vpor ymm6, ymm6, ymm2
  18328. vpor ymm7, ymm7, ymm3
  18329. vpaddd ymm13, ymm13, ymm11
  18330. ; ENTRY: 7
  18331. mov r9, QWORD PTR [rdx+56]
  18332. vpcmpeqd ymm12, ymm13, ymm10
  18333. vmovdqu ymm0, YMMWORD PTR [r9]
  18334. vmovdqu ymm1, YMMWORD PTR [r9+32]
  18335. vmovdqu ymm2, YMMWORD PTR [r9+64]
  18336. vmovdqu ymm3, YMMWORD PTR [r9+96]
  18337. vpand ymm0, ymm0, ymm12
  18338. vpand ymm1, ymm1, ymm12
  18339. vpand ymm2, ymm2, ymm12
  18340. vpand ymm3, ymm3, ymm12
  18341. vpor ymm4, ymm4, ymm0
  18342. vpor ymm5, ymm5, ymm1
  18343. vpor ymm6, ymm6, ymm2
  18344. vpor ymm7, ymm7, ymm3
  18345. vpaddd ymm13, ymm13, ymm11
  18346. ; ENTRY: 8
  18347. mov r9, QWORD PTR [rdx+64]
  18348. vpcmpeqd ymm12, ymm13, ymm10
  18349. vmovdqu ymm0, YMMWORD PTR [r9]
  18350. vmovdqu ymm1, YMMWORD PTR [r9+32]
  18351. vmovdqu ymm2, YMMWORD PTR [r9+64]
  18352. vmovdqu ymm3, YMMWORD PTR [r9+96]
  18353. vpand ymm0, ymm0, ymm12
  18354. vpand ymm1, ymm1, ymm12
  18355. vpand ymm2, ymm2, ymm12
  18356. vpand ymm3, ymm3, ymm12
  18357. vpor ymm4, ymm4, ymm0
  18358. vpor ymm5, ymm5, ymm1
  18359. vpor ymm6, ymm6, ymm2
  18360. vpor ymm7, ymm7, ymm3
  18361. vpaddd ymm13, ymm13, ymm11
  18362. ; ENTRY: 9
  18363. mov r9, QWORD PTR [rdx+72]
  18364. vpcmpeqd ymm12, ymm13, ymm10
  18365. vmovdqu ymm0, YMMWORD PTR [r9]
  18366. vmovdqu ymm1, YMMWORD PTR [r9+32]
  18367. vmovdqu ymm2, YMMWORD PTR [r9+64]
  18368. vmovdqu ymm3, YMMWORD PTR [r9+96]
  18369. vpand ymm0, ymm0, ymm12
  18370. vpand ymm1, ymm1, ymm12
  18371. vpand ymm2, ymm2, ymm12
  18372. vpand ymm3, ymm3, ymm12
  18373. vpor ymm4, ymm4, ymm0
  18374. vpor ymm5, ymm5, ymm1
  18375. vpor ymm6, ymm6, ymm2
  18376. vpor ymm7, ymm7, ymm3
  18377. vpaddd ymm13, ymm13, ymm11
  18378. ; ENTRY: 10
  18379. mov r9, QWORD PTR [rdx+80]
  18380. vpcmpeqd ymm12, ymm13, ymm10
  18381. vmovdqu ymm0, YMMWORD PTR [r9]
  18382. vmovdqu ymm1, YMMWORD PTR [r9+32]
  18383. vmovdqu ymm2, YMMWORD PTR [r9+64]
  18384. vmovdqu ymm3, YMMWORD PTR [r9+96]
  18385. vpand ymm0, ymm0, ymm12
  18386. vpand ymm1, ymm1, ymm12
  18387. vpand ymm2, ymm2, ymm12
  18388. vpand ymm3, ymm3, ymm12
  18389. vpor ymm4, ymm4, ymm0
  18390. vpor ymm5, ymm5, ymm1
  18391. vpor ymm6, ymm6, ymm2
  18392. vpor ymm7, ymm7, ymm3
  18393. vpaddd ymm13, ymm13, ymm11
  18394. ; ENTRY: 11
  18395. mov r9, QWORD PTR [rdx+88]
  18396. vpcmpeqd ymm12, ymm13, ymm10
  18397. vmovdqu ymm0, YMMWORD PTR [r9]
  18398. vmovdqu ymm1, YMMWORD PTR [r9+32]
  18399. vmovdqu ymm2, YMMWORD PTR [r9+64]
  18400. vmovdqu ymm3, YMMWORD PTR [r9+96]
  18401. vpand ymm0, ymm0, ymm12
  18402. vpand ymm1, ymm1, ymm12
  18403. vpand ymm2, ymm2, ymm12
  18404. vpand ymm3, ymm3, ymm12
  18405. vpor ymm4, ymm4, ymm0
  18406. vpor ymm5, ymm5, ymm1
  18407. vpor ymm6, ymm6, ymm2
  18408. vpor ymm7, ymm7, ymm3
  18409. vpaddd ymm13, ymm13, ymm11
  18410. ; ENTRY: 12
  18411. mov r9, QWORD PTR [rdx+96]
  18412. vpcmpeqd ymm12, ymm13, ymm10
  18413. vmovdqu ymm0, YMMWORD PTR [r9]
  18414. vmovdqu ymm1, YMMWORD PTR [r9+32]
  18415. vmovdqu ymm2, YMMWORD PTR [r9+64]
  18416. vmovdqu ymm3, YMMWORD PTR [r9+96]
  18417. vpand ymm0, ymm0, ymm12
  18418. vpand ymm1, ymm1, ymm12
  18419. vpand ymm2, ymm2, ymm12
  18420. vpand ymm3, ymm3, ymm12
  18421. vpor ymm4, ymm4, ymm0
  18422. vpor ymm5, ymm5, ymm1
  18423. vpor ymm6, ymm6, ymm2
  18424. vpor ymm7, ymm7, ymm3
  18425. vpaddd ymm13, ymm13, ymm11
  18426. ; ENTRY: 13
  18427. mov r9, QWORD PTR [rdx+104]
  18428. vpcmpeqd ymm12, ymm13, ymm10
  18429. vmovdqu ymm0, YMMWORD PTR [r9]
  18430. vmovdqu ymm1, YMMWORD PTR [r9+32]
  18431. vmovdqu ymm2, YMMWORD PTR [r9+64]
  18432. vmovdqu ymm3, YMMWORD PTR [r9+96]
  18433. vpand ymm0, ymm0, ymm12
  18434. vpand ymm1, ymm1, ymm12
  18435. vpand ymm2, ymm2, ymm12
  18436. vpand ymm3, ymm3, ymm12
  18437. vpor ymm4, ymm4, ymm0
  18438. vpor ymm5, ymm5, ymm1
  18439. vpor ymm6, ymm6, ymm2
  18440. vpor ymm7, ymm7, ymm3
  18441. vpaddd ymm13, ymm13, ymm11
  18442. ; ENTRY: 14
  18443. mov r9, QWORD PTR [rdx+112]
  18444. vpcmpeqd ymm12, ymm13, ymm10
  18445. vmovdqu ymm0, YMMWORD PTR [r9]
  18446. vmovdqu ymm1, YMMWORD PTR [r9+32]
  18447. vmovdqu ymm2, YMMWORD PTR [r9+64]
  18448. vmovdqu ymm3, YMMWORD PTR [r9+96]
  18449. vpand ymm0, ymm0, ymm12
  18450. vpand ymm1, ymm1, ymm12
  18451. vpand ymm2, ymm2, ymm12
  18452. vpand ymm3, ymm3, ymm12
  18453. vpor ymm4, ymm4, ymm0
  18454. vpor ymm5, ymm5, ymm1
  18455. vpor ymm6, ymm6, ymm2
  18456. vpor ymm7, ymm7, ymm3
  18457. vpaddd ymm13, ymm13, ymm11
  18458. ; ENTRY: 15
  18459. mov r9, QWORD PTR [rdx+120]
  18460. vpcmpeqd ymm12, ymm13, ymm10
  18461. vmovdqu ymm0, YMMWORD PTR [r9]
  18462. vmovdqu ymm1, YMMWORD PTR [r9+32]
  18463. vmovdqu ymm2, YMMWORD PTR [r9+64]
  18464. vmovdqu ymm3, YMMWORD PTR [r9+96]
  18465. vpand ymm0, ymm0, ymm12
  18466. vpand ymm1, ymm1, ymm12
  18467. vpand ymm2, ymm2, ymm12
  18468. vpand ymm3, ymm3, ymm12
  18469. vpor ymm4, ymm4, ymm0
  18470. vpor ymm5, ymm5, ymm1
  18471. vpor ymm6, ymm6, ymm2
  18472. vpor ymm7, ymm7, ymm3
  18473. vpaddd ymm13, ymm13, ymm11
  18474. ; ENTRY: 16
  18475. mov r9, QWORD PTR [rdx+128]
  18476. vpcmpeqd ymm12, ymm13, ymm10
  18477. vmovdqu ymm0, YMMWORD PTR [r9]
  18478. vmovdqu ymm1, YMMWORD PTR [r9+32]
  18479. vmovdqu ymm2, YMMWORD PTR [r9+64]
  18480. vmovdqu ymm3, YMMWORD PTR [r9+96]
  18481. vpand ymm0, ymm0, ymm12
  18482. vpand ymm1, ymm1, ymm12
  18483. vpand ymm2, ymm2, ymm12
  18484. vpand ymm3, ymm3, ymm12
  18485. vpor ymm4, ymm4, ymm0
  18486. vpor ymm5, ymm5, ymm1
  18487. vpor ymm6, ymm6, ymm2
  18488. vpor ymm7, ymm7, ymm3
  18489. vpaddd ymm13, ymm13, ymm11
  18490. ; ENTRY: 17
  18491. mov r9, QWORD PTR [rdx+136]
  18492. vpcmpeqd ymm12, ymm13, ymm10
  18493. vmovdqu ymm0, YMMWORD PTR [r9]
  18494. vmovdqu ymm1, YMMWORD PTR [r9+32]
  18495. vmovdqu ymm2, YMMWORD PTR [r9+64]
  18496. vmovdqu ymm3, YMMWORD PTR [r9+96]
  18497. vpand ymm0, ymm0, ymm12
  18498. vpand ymm1, ymm1, ymm12
  18499. vpand ymm2, ymm2, ymm12
  18500. vpand ymm3, ymm3, ymm12
  18501. vpor ymm4, ymm4, ymm0
  18502. vpor ymm5, ymm5, ymm1
  18503. vpor ymm6, ymm6, ymm2
  18504. vpor ymm7, ymm7, ymm3
  18505. vpaddd ymm13, ymm13, ymm11
  18506. ; ENTRY: 18
  18507. mov r9, QWORD PTR [rdx+144]
  18508. vpcmpeqd ymm12, ymm13, ymm10
  18509. vmovdqu ymm0, YMMWORD PTR [r9]
  18510. vmovdqu ymm1, YMMWORD PTR [r9+32]
  18511. vmovdqu ymm2, YMMWORD PTR [r9+64]
  18512. vmovdqu ymm3, YMMWORD PTR [r9+96]
  18513. vpand ymm0, ymm0, ymm12
  18514. vpand ymm1, ymm1, ymm12
  18515. vpand ymm2, ymm2, ymm12
  18516. vpand ymm3, ymm3, ymm12
  18517. vpor ymm4, ymm4, ymm0
  18518. vpor ymm5, ymm5, ymm1
  18519. vpor ymm6, ymm6, ymm2
  18520. vpor ymm7, ymm7, ymm3
  18521. vpaddd ymm13, ymm13, ymm11
  18522. ; ENTRY: 19
  18523. mov r9, QWORD PTR [rdx+152]
  18524. vpcmpeqd ymm12, ymm13, ymm10
  18525. vmovdqu ymm0, YMMWORD PTR [r9]
  18526. vmovdqu ymm1, YMMWORD PTR [r9+32]
  18527. vmovdqu ymm2, YMMWORD PTR [r9+64]
  18528. vmovdqu ymm3, YMMWORD PTR [r9+96]
  18529. vpand ymm0, ymm0, ymm12
  18530. vpand ymm1, ymm1, ymm12
  18531. vpand ymm2, ymm2, ymm12
  18532. vpand ymm3, ymm3, ymm12
  18533. vpor ymm4, ymm4, ymm0
  18534. vpor ymm5, ymm5, ymm1
  18535. vpor ymm6, ymm6, ymm2
  18536. vpor ymm7, ymm7, ymm3
  18537. vpaddd ymm13, ymm13, ymm11
  18538. ; ENTRY: 20
  18539. mov r9, QWORD PTR [rdx+160]
  18540. vpcmpeqd ymm12, ymm13, ymm10
  18541. vmovdqu ymm0, YMMWORD PTR [r9]
  18542. vmovdqu ymm1, YMMWORD PTR [r9+32]
  18543. vmovdqu ymm2, YMMWORD PTR [r9+64]
  18544. vmovdqu ymm3, YMMWORD PTR [r9+96]
  18545. vpand ymm0, ymm0, ymm12
  18546. vpand ymm1, ymm1, ymm12
  18547. vpand ymm2, ymm2, ymm12
  18548. vpand ymm3, ymm3, ymm12
  18549. vpor ymm4, ymm4, ymm0
  18550. vpor ymm5, ymm5, ymm1
  18551. vpor ymm6, ymm6, ymm2
  18552. vpor ymm7, ymm7, ymm3
  18553. vpaddd ymm13, ymm13, ymm11
  18554. ; ENTRY: 21
  18555. mov r9, QWORD PTR [rdx+168]
  18556. vpcmpeqd ymm12, ymm13, ymm10
  18557. vmovdqu ymm0, YMMWORD PTR [r9]
  18558. vmovdqu ymm1, YMMWORD PTR [r9+32]
  18559. vmovdqu ymm2, YMMWORD PTR [r9+64]
  18560. vmovdqu ymm3, YMMWORD PTR [r9+96]
  18561. vpand ymm0, ymm0, ymm12
  18562. vpand ymm1, ymm1, ymm12
  18563. vpand ymm2, ymm2, ymm12
  18564. vpand ymm3, ymm3, ymm12
  18565. vpor ymm4, ymm4, ymm0
  18566. vpor ymm5, ymm5, ymm1
  18567. vpor ymm6, ymm6, ymm2
  18568. vpor ymm7, ymm7, ymm3
  18569. vpaddd ymm13, ymm13, ymm11
  18570. ; ENTRY: 22
  18571. mov r9, QWORD PTR [rdx+176]
  18572. vpcmpeqd ymm12, ymm13, ymm10
  18573. vmovdqu ymm0, YMMWORD PTR [r9]
  18574. vmovdqu ymm1, YMMWORD PTR [r9+32]
  18575. vmovdqu ymm2, YMMWORD PTR [r9+64]
  18576. vmovdqu ymm3, YMMWORD PTR [r9+96]
  18577. vpand ymm0, ymm0, ymm12
  18578. vpand ymm1, ymm1, ymm12
  18579. vpand ymm2, ymm2, ymm12
  18580. vpand ymm3, ymm3, ymm12
  18581. vpor ymm4, ymm4, ymm0
  18582. vpor ymm5, ymm5, ymm1
  18583. vpor ymm6, ymm6, ymm2
  18584. vpor ymm7, ymm7, ymm3
  18585. vpaddd ymm13, ymm13, ymm11
  18586. ; ENTRY: 23
  18587. mov r9, QWORD PTR [rdx+184]
  18588. vpcmpeqd ymm12, ymm13, ymm10
  18589. vmovdqu ymm0, YMMWORD PTR [r9]
  18590. vmovdqu ymm1, YMMWORD PTR [r9+32]
  18591. vmovdqu ymm2, YMMWORD PTR [r9+64]
  18592. vmovdqu ymm3, YMMWORD PTR [r9+96]
  18593. vpand ymm0, ymm0, ymm12
  18594. vpand ymm1, ymm1, ymm12
  18595. vpand ymm2, ymm2, ymm12
  18596. vpand ymm3, ymm3, ymm12
  18597. vpor ymm4, ymm4, ymm0
  18598. vpor ymm5, ymm5, ymm1
  18599. vpor ymm6, ymm6, ymm2
  18600. vpor ymm7, ymm7, ymm3
  18601. vpaddd ymm13, ymm13, ymm11
  18602. ; ENTRY: 24
  18603. mov r9, QWORD PTR [rdx+192]
  18604. vpcmpeqd ymm12, ymm13, ymm10
  18605. vmovdqu ymm0, YMMWORD PTR [r9]
  18606. vmovdqu ymm1, YMMWORD PTR [r9+32]
  18607. vmovdqu ymm2, YMMWORD PTR [r9+64]
  18608. vmovdqu ymm3, YMMWORD PTR [r9+96]
  18609. vpand ymm0, ymm0, ymm12
  18610. vpand ymm1, ymm1, ymm12
  18611. vpand ymm2, ymm2, ymm12
  18612. vpand ymm3, ymm3, ymm12
  18613. vpor ymm4, ymm4, ymm0
  18614. vpor ymm5, ymm5, ymm1
  18615. vpor ymm6, ymm6, ymm2
  18616. vpor ymm7, ymm7, ymm3
  18617. vpaddd ymm13, ymm13, ymm11
  18618. ; ENTRY: 25
  18619. mov r9, QWORD PTR [rdx+200]
  18620. vpcmpeqd ymm12, ymm13, ymm10
  18621. vmovdqu ymm0, YMMWORD PTR [r9]
  18622. vmovdqu ymm1, YMMWORD PTR [r9+32]
  18623. vmovdqu ymm2, YMMWORD PTR [r9+64]
  18624. vmovdqu ymm3, YMMWORD PTR [r9+96]
  18625. vpand ymm0, ymm0, ymm12
  18626. vpand ymm1, ymm1, ymm12
  18627. vpand ymm2, ymm2, ymm12
  18628. vpand ymm3, ymm3, ymm12
  18629. vpor ymm4, ymm4, ymm0
  18630. vpor ymm5, ymm5, ymm1
  18631. vpor ymm6, ymm6, ymm2
  18632. vpor ymm7, ymm7, ymm3
  18633. vpaddd ymm13, ymm13, ymm11
  18634. ; ENTRY: 26
  18635. mov r9, QWORD PTR [rdx+208]
  18636. vpcmpeqd ymm12, ymm13, ymm10
  18637. vmovdqu ymm0, YMMWORD PTR [r9]
  18638. vmovdqu ymm1, YMMWORD PTR [r9+32]
  18639. vmovdqu ymm2, YMMWORD PTR [r9+64]
  18640. vmovdqu ymm3, YMMWORD PTR [r9+96]
  18641. vpand ymm0, ymm0, ymm12
  18642. vpand ymm1, ymm1, ymm12
  18643. vpand ymm2, ymm2, ymm12
  18644. vpand ymm3, ymm3, ymm12
  18645. vpor ymm4, ymm4, ymm0
  18646. vpor ymm5, ymm5, ymm1
  18647. vpor ymm6, ymm6, ymm2
  18648. vpor ymm7, ymm7, ymm3
  18649. vpaddd ymm13, ymm13, ymm11
  18650. ; ENTRY: 27
  18651. mov r9, QWORD PTR [rdx+216]
  18652. vpcmpeqd ymm12, ymm13, ymm10
  18653. vmovdqu ymm0, YMMWORD PTR [r9]
  18654. vmovdqu ymm1, YMMWORD PTR [r9+32]
  18655. vmovdqu ymm2, YMMWORD PTR [r9+64]
  18656. vmovdqu ymm3, YMMWORD PTR [r9+96]
  18657. vpand ymm0, ymm0, ymm12
  18658. vpand ymm1, ymm1, ymm12
  18659. vpand ymm2, ymm2, ymm12
  18660. vpand ymm3, ymm3, ymm12
  18661. vpor ymm4, ymm4, ymm0
  18662. vpor ymm5, ymm5, ymm1
  18663. vpor ymm6, ymm6, ymm2
  18664. vpor ymm7, ymm7, ymm3
  18665. vpaddd ymm13, ymm13, ymm11
  18666. ; ENTRY: 28
  18667. mov r9, QWORD PTR [rdx+224]
  18668. vpcmpeqd ymm12, ymm13, ymm10
  18669. vmovdqu ymm0, YMMWORD PTR [r9]
  18670. vmovdqu ymm1, YMMWORD PTR [r9+32]
  18671. vmovdqu ymm2, YMMWORD PTR [r9+64]
  18672. vmovdqu ymm3, YMMWORD PTR [r9+96]
  18673. vpand ymm0, ymm0, ymm12
  18674. vpand ymm1, ymm1, ymm12
  18675. vpand ymm2, ymm2, ymm12
  18676. vpand ymm3, ymm3, ymm12
  18677. vpor ymm4, ymm4, ymm0
  18678. vpor ymm5, ymm5, ymm1
  18679. vpor ymm6, ymm6, ymm2
  18680. vpor ymm7, ymm7, ymm3
  18681. vpaddd ymm13, ymm13, ymm11
  18682. ; ENTRY: 29
  18683. mov r9, QWORD PTR [rdx+232]
  18684. vpcmpeqd ymm12, ymm13, ymm10
  18685. vmovdqu ymm0, YMMWORD PTR [r9]
  18686. vmovdqu ymm1, YMMWORD PTR [r9+32]
  18687. vmovdqu ymm2, YMMWORD PTR [r9+64]
  18688. vmovdqu ymm3, YMMWORD PTR [r9+96]
  18689. vpand ymm0, ymm0, ymm12
  18690. vpand ymm1, ymm1, ymm12
  18691. vpand ymm2, ymm2, ymm12
  18692. vpand ymm3, ymm3, ymm12
  18693. vpor ymm4, ymm4, ymm0
  18694. vpor ymm5, ymm5, ymm1
  18695. vpor ymm6, ymm6, ymm2
  18696. vpor ymm7, ymm7, ymm3
  18697. vpaddd ymm13, ymm13, ymm11
  18698. ; ENTRY: 30
  18699. mov r9, QWORD PTR [rdx+240]
  18700. vpcmpeqd ymm12, ymm13, ymm10
  18701. vmovdqu ymm0, YMMWORD PTR [r9]
  18702. vmovdqu ymm1, YMMWORD PTR [r9+32]
  18703. vmovdqu ymm2, YMMWORD PTR [r9+64]
  18704. vmovdqu ymm3, YMMWORD PTR [r9+96]
  18705. vpand ymm0, ymm0, ymm12
  18706. vpand ymm1, ymm1, ymm12
  18707. vpand ymm2, ymm2, ymm12
  18708. vpand ymm3, ymm3, ymm12
  18709. vpor ymm4, ymm4, ymm0
  18710. vpor ymm5, ymm5, ymm1
  18711. vpor ymm6, ymm6, ymm2
  18712. vpor ymm7, ymm7, ymm3
  18713. vpaddd ymm13, ymm13, ymm11
  18714. ; ENTRY: 31
  18715. mov r9, QWORD PTR [rdx+248]
  18716. vpcmpeqd ymm12, ymm13, ymm10
  18717. vmovdqu ymm0, YMMWORD PTR [r9]
  18718. vmovdqu ymm1, YMMWORD PTR [r9+32]
  18719. vmovdqu ymm2, YMMWORD PTR [r9+64]
  18720. vmovdqu ymm3, YMMWORD PTR [r9+96]
  18721. vpand ymm0, ymm0, ymm12
  18722. vpand ymm1, ymm1, ymm12
  18723. vpand ymm2, ymm2, ymm12
  18724. vpand ymm3, ymm3, ymm12
  18725. vpor ymm4, ymm4, ymm0
  18726. vpor ymm5, ymm5, ymm1
  18727. vpor ymm6, ymm6, ymm2
  18728. vpor ymm7, ymm7, ymm3
  18729. vpaddd ymm13, ymm13, ymm11
  18730. ; ENTRY: 32
  18731. mov r9, QWORD PTR [rdx+256]
  18732. vpcmpeqd ymm12, ymm13, ymm10
  18733. vmovdqu ymm0, YMMWORD PTR [r9]
  18734. vmovdqu ymm1, YMMWORD PTR [r9+32]
  18735. vmovdqu ymm2, YMMWORD PTR [r9+64]
  18736. vmovdqu ymm3, YMMWORD PTR [r9+96]
  18737. vpand ymm0, ymm0, ymm12
  18738. vpand ymm1, ymm1, ymm12
  18739. vpand ymm2, ymm2, ymm12
  18740. vpand ymm3, ymm3, ymm12
  18741. vpor ymm4, ymm4, ymm0
  18742. vpor ymm5, ymm5, ymm1
  18743. vpor ymm6, ymm6, ymm2
  18744. vpor ymm7, ymm7, ymm3
  18745. vpaddd ymm13, ymm13, ymm11
  18746. ; ENTRY: 33
  18747. mov r9, QWORD PTR [rdx+264]
  18748. vpcmpeqd ymm12, ymm13, ymm10
  18749. vmovdqu ymm0, YMMWORD PTR [r9]
  18750. vmovdqu ymm1, YMMWORD PTR [r9+32]
  18751. vmovdqu ymm2, YMMWORD PTR [r9+64]
  18752. vmovdqu ymm3, YMMWORD PTR [r9+96]
  18753. vpand ymm0, ymm0, ymm12
  18754. vpand ymm1, ymm1, ymm12
  18755. vpand ymm2, ymm2, ymm12
  18756. vpand ymm3, ymm3, ymm12
  18757. vpor ymm4, ymm4, ymm0
  18758. vpor ymm5, ymm5, ymm1
  18759. vpor ymm6, ymm6, ymm2
  18760. vpor ymm7, ymm7, ymm3
  18761. vpaddd ymm13, ymm13, ymm11
  18762. ; ENTRY: 34
  18763. mov r9, QWORD PTR [rdx+272]
  18764. vpcmpeqd ymm12, ymm13, ymm10
  18765. vmovdqu ymm0, YMMWORD PTR [r9]
  18766. vmovdqu ymm1, YMMWORD PTR [r9+32]
  18767. vmovdqu ymm2, YMMWORD PTR [r9+64]
  18768. vmovdqu ymm3, YMMWORD PTR [r9+96]
  18769. vpand ymm0, ymm0, ymm12
  18770. vpand ymm1, ymm1, ymm12
  18771. vpand ymm2, ymm2, ymm12
  18772. vpand ymm3, ymm3, ymm12
  18773. vpor ymm4, ymm4, ymm0
  18774. vpor ymm5, ymm5, ymm1
  18775. vpor ymm6, ymm6, ymm2
  18776. vpor ymm7, ymm7, ymm3
  18777. vpaddd ymm13, ymm13, ymm11
  18778. ; ENTRY: 35
  18779. mov r9, QWORD PTR [rdx+280]
  18780. vpcmpeqd ymm12, ymm13, ymm10
  18781. vmovdqu ymm0, YMMWORD PTR [r9]
  18782. vmovdqu ymm1, YMMWORD PTR [r9+32]
  18783. vmovdqu ymm2, YMMWORD PTR [r9+64]
  18784. vmovdqu ymm3, YMMWORD PTR [r9+96]
  18785. vpand ymm0, ymm0, ymm12
  18786. vpand ymm1, ymm1, ymm12
  18787. vpand ymm2, ymm2, ymm12
  18788. vpand ymm3, ymm3, ymm12
  18789. vpor ymm4, ymm4, ymm0
  18790. vpor ymm5, ymm5, ymm1
  18791. vpor ymm6, ymm6, ymm2
  18792. vpor ymm7, ymm7, ymm3
  18793. vpaddd ymm13, ymm13, ymm11
  18794. ; ENTRY: 36
  18795. mov r9, QWORD PTR [rdx+288]
  18796. vpcmpeqd ymm12, ymm13, ymm10
  18797. vmovdqu ymm0, YMMWORD PTR [r9]
  18798. vmovdqu ymm1, YMMWORD PTR [r9+32]
  18799. vmovdqu ymm2, YMMWORD PTR [r9+64]
  18800. vmovdqu ymm3, YMMWORD PTR [r9+96]
  18801. vpand ymm0, ymm0, ymm12
  18802. vpand ymm1, ymm1, ymm12
  18803. vpand ymm2, ymm2, ymm12
  18804. vpand ymm3, ymm3, ymm12
  18805. vpor ymm4, ymm4, ymm0
  18806. vpor ymm5, ymm5, ymm1
  18807. vpor ymm6, ymm6, ymm2
  18808. vpor ymm7, ymm7, ymm3
  18809. vpaddd ymm13, ymm13, ymm11
  18810. ; ENTRY: 37
  18811. mov r9, QWORD PTR [rdx+296]
  18812. vpcmpeqd ymm12, ymm13, ymm10
  18813. vmovdqu ymm0, YMMWORD PTR [r9]
  18814. vmovdqu ymm1, YMMWORD PTR [r9+32]
  18815. vmovdqu ymm2, YMMWORD PTR [r9+64]
  18816. vmovdqu ymm3, YMMWORD PTR [r9+96]
  18817. vpand ymm0, ymm0, ymm12
  18818. vpand ymm1, ymm1, ymm12
  18819. vpand ymm2, ymm2, ymm12
  18820. vpand ymm3, ymm3, ymm12
  18821. vpor ymm4, ymm4, ymm0
  18822. vpor ymm5, ymm5, ymm1
  18823. vpor ymm6, ymm6, ymm2
  18824. vpor ymm7, ymm7, ymm3
  18825. vpaddd ymm13, ymm13, ymm11
  18826. ; ENTRY: 38
  18827. mov r9, QWORD PTR [rdx+304]
  18828. vpcmpeqd ymm12, ymm13, ymm10
  18829. vmovdqu ymm0, YMMWORD PTR [r9]
  18830. vmovdqu ymm1, YMMWORD PTR [r9+32]
  18831. vmovdqu ymm2, YMMWORD PTR [r9+64]
  18832. vmovdqu ymm3, YMMWORD PTR [r9+96]
  18833. vpand ymm0, ymm0, ymm12
  18834. vpand ymm1, ymm1, ymm12
  18835. vpand ymm2, ymm2, ymm12
  18836. vpand ymm3, ymm3, ymm12
  18837. vpor ymm4, ymm4, ymm0
  18838. vpor ymm5, ymm5, ymm1
  18839. vpor ymm6, ymm6, ymm2
  18840. vpor ymm7, ymm7, ymm3
  18841. vpaddd ymm13, ymm13, ymm11
  18842. ; ENTRY: 39
  18843. mov r9, QWORD PTR [rdx+312]
  18844. vpcmpeqd ymm12, ymm13, ymm10
  18845. vmovdqu ymm0, YMMWORD PTR [r9]
  18846. vmovdqu ymm1, YMMWORD PTR [r9+32]
  18847. vmovdqu ymm2, YMMWORD PTR [r9+64]
  18848. vmovdqu ymm3, YMMWORD PTR [r9+96]
  18849. vpand ymm0, ymm0, ymm12
  18850. vpand ymm1, ymm1, ymm12
  18851. vpand ymm2, ymm2, ymm12
  18852. vpand ymm3, ymm3, ymm12
  18853. vpor ymm4, ymm4, ymm0
  18854. vpor ymm5, ymm5, ymm1
  18855. vpor ymm6, ymm6, ymm2
  18856. vpor ymm7, ymm7, ymm3
  18857. vpaddd ymm13, ymm13, ymm11
  18858. ; ENTRY: 40
  18859. mov r9, QWORD PTR [rdx+320]
  18860. vpcmpeqd ymm12, ymm13, ymm10
  18861. vmovdqu ymm0, YMMWORD PTR [r9]
  18862. vmovdqu ymm1, YMMWORD PTR [r9+32]
  18863. vmovdqu ymm2, YMMWORD PTR [r9+64]
  18864. vmovdqu ymm3, YMMWORD PTR [r9+96]
  18865. vpand ymm0, ymm0, ymm12
  18866. vpand ymm1, ymm1, ymm12
  18867. vpand ymm2, ymm2, ymm12
  18868. vpand ymm3, ymm3, ymm12
  18869. vpor ymm4, ymm4, ymm0
  18870. vpor ymm5, ymm5, ymm1
  18871. vpor ymm6, ymm6, ymm2
  18872. vpor ymm7, ymm7, ymm3
  18873. vpaddd ymm13, ymm13, ymm11
  18874. ; ENTRY: 41
  18875. mov r9, QWORD PTR [rdx+328]
  18876. vpcmpeqd ymm12, ymm13, ymm10
  18877. vmovdqu ymm0, YMMWORD PTR [r9]
  18878. vmovdqu ymm1, YMMWORD PTR [r9+32]
  18879. vmovdqu ymm2, YMMWORD PTR [r9+64]
  18880. vmovdqu ymm3, YMMWORD PTR [r9+96]
  18881. vpand ymm0, ymm0, ymm12
  18882. vpand ymm1, ymm1, ymm12
  18883. vpand ymm2, ymm2, ymm12
  18884. vpand ymm3, ymm3, ymm12
  18885. vpor ymm4, ymm4, ymm0
  18886. vpor ymm5, ymm5, ymm1
  18887. vpor ymm6, ymm6, ymm2
  18888. vpor ymm7, ymm7, ymm3
  18889. vpaddd ymm13, ymm13, ymm11
  18890. ; ENTRY: 42
  18891. mov r9, QWORD PTR [rdx+336]
  18892. vpcmpeqd ymm12, ymm13, ymm10
  18893. vmovdqu ymm0, YMMWORD PTR [r9]
  18894. vmovdqu ymm1, YMMWORD PTR [r9+32]
  18895. vmovdqu ymm2, YMMWORD PTR [r9+64]
  18896. vmovdqu ymm3, YMMWORD PTR [r9+96]
  18897. vpand ymm0, ymm0, ymm12
  18898. vpand ymm1, ymm1, ymm12
  18899. vpand ymm2, ymm2, ymm12
  18900. vpand ymm3, ymm3, ymm12
  18901. vpor ymm4, ymm4, ymm0
  18902. vpor ymm5, ymm5, ymm1
  18903. vpor ymm6, ymm6, ymm2
  18904. vpor ymm7, ymm7, ymm3
  18905. vpaddd ymm13, ymm13, ymm11
  18906. ; ENTRY: 43
  18907. mov r9, QWORD PTR [rdx+344]
  18908. vpcmpeqd ymm12, ymm13, ymm10
  18909. vmovdqu ymm0, YMMWORD PTR [r9]
  18910. vmovdqu ymm1, YMMWORD PTR [r9+32]
  18911. vmovdqu ymm2, YMMWORD PTR [r9+64]
  18912. vmovdqu ymm3, YMMWORD PTR [r9+96]
  18913. vpand ymm0, ymm0, ymm12
  18914. vpand ymm1, ymm1, ymm12
  18915. vpand ymm2, ymm2, ymm12
  18916. vpand ymm3, ymm3, ymm12
  18917. vpor ymm4, ymm4, ymm0
  18918. vpor ymm5, ymm5, ymm1
  18919. vpor ymm6, ymm6, ymm2
  18920. vpor ymm7, ymm7, ymm3
  18921. vpaddd ymm13, ymm13, ymm11
  18922. ; ENTRY: 44
  18923. mov r9, QWORD PTR [rdx+352]
  18924. vpcmpeqd ymm12, ymm13, ymm10
  18925. vmovdqu ymm0, YMMWORD PTR [r9]
  18926. vmovdqu ymm1, YMMWORD PTR [r9+32]
  18927. vmovdqu ymm2, YMMWORD PTR [r9+64]
  18928. vmovdqu ymm3, YMMWORD PTR [r9+96]
  18929. vpand ymm0, ymm0, ymm12
  18930. vpand ymm1, ymm1, ymm12
  18931. vpand ymm2, ymm2, ymm12
  18932. vpand ymm3, ymm3, ymm12
  18933. vpor ymm4, ymm4, ymm0
  18934. vpor ymm5, ymm5, ymm1
  18935. vpor ymm6, ymm6, ymm2
  18936. vpor ymm7, ymm7, ymm3
  18937. vpaddd ymm13, ymm13, ymm11
  18938. ; ENTRY: 45
  18939. mov r9, QWORD PTR [rdx+360]
  18940. vpcmpeqd ymm12, ymm13, ymm10
  18941. vmovdqu ymm0, YMMWORD PTR [r9]
  18942. vmovdqu ymm1, YMMWORD PTR [r9+32]
  18943. vmovdqu ymm2, YMMWORD PTR [r9+64]
  18944. vmovdqu ymm3, YMMWORD PTR [r9+96]
  18945. vpand ymm0, ymm0, ymm12
  18946. vpand ymm1, ymm1, ymm12
  18947. vpand ymm2, ymm2, ymm12
  18948. vpand ymm3, ymm3, ymm12
  18949. vpor ymm4, ymm4, ymm0
  18950. vpor ymm5, ymm5, ymm1
  18951. vpor ymm6, ymm6, ymm2
  18952. vpor ymm7, ymm7, ymm3
  18953. vpaddd ymm13, ymm13, ymm11
  18954. ; ENTRY: 46
  18955. mov r9, QWORD PTR [rdx+368]
  18956. vpcmpeqd ymm12, ymm13, ymm10
  18957. vmovdqu ymm0, YMMWORD PTR [r9]
  18958. vmovdqu ymm1, YMMWORD PTR [r9+32]
  18959. vmovdqu ymm2, YMMWORD PTR [r9+64]
  18960. vmovdqu ymm3, YMMWORD PTR [r9+96]
  18961. vpand ymm0, ymm0, ymm12
  18962. vpand ymm1, ymm1, ymm12
  18963. vpand ymm2, ymm2, ymm12
  18964. vpand ymm3, ymm3, ymm12
  18965. vpor ymm4, ymm4, ymm0
  18966. vpor ymm5, ymm5, ymm1
  18967. vpor ymm6, ymm6, ymm2
  18968. vpor ymm7, ymm7, ymm3
  18969. vpaddd ymm13, ymm13, ymm11
  18970. ; ENTRY: 47
  18971. mov r9, QWORD PTR [rdx+376]
  18972. vpcmpeqd ymm12, ymm13, ymm10
  18973. vmovdqu ymm0, YMMWORD PTR [r9]
  18974. vmovdqu ymm1, YMMWORD PTR [r9+32]
  18975. vmovdqu ymm2, YMMWORD PTR [r9+64]
  18976. vmovdqu ymm3, YMMWORD PTR [r9+96]
  18977. vpand ymm0, ymm0, ymm12
  18978. vpand ymm1, ymm1, ymm12
  18979. vpand ymm2, ymm2, ymm12
  18980. vpand ymm3, ymm3, ymm12
  18981. vpor ymm4, ymm4, ymm0
  18982. vpor ymm5, ymm5, ymm1
  18983. vpor ymm6, ymm6, ymm2
  18984. vpor ymm7, ymm7, ymm3
  18985. vpaddd ymm13, ymm13, ymm11
  18986. ; ENTRY: 48
  18987. mov r9, QWORD PTR [rdx+384]
  18988. vpcmpeqd ymm12, ymm13, ymm10
  18989. vmovdqu ymm0, YMMWORD PTR [r9]
  18990. vmovdqu ymm1, YMMWORD PTR [r9+32]
  18991. vmovdqu ymm2, YMMWORD PTR [r9+64]
  18992. vmovdqu ymm3, YMMWORD PTR [r9+96]
  18993. vpand ymm0, ymm0, ymm12
  18994. vpand ymm1, ymm1, ymm12
  18995. vpand ymm2, ymm2, ymm12
  18996. vpand ymm3, ymm3, ymm12
  18997. vpor ymm4, ymm4, ymm0
  18998. vpor ymm5, ymm5, ymm1
  18999. vpor ymm6, ymm6, ymm2
  19000. vpor ymm7, ymm7, ymm3
  19001. vpaddd ymm13, ymm13, ymm11
  19002. ; ENTRY: 49
  19003. mov r9, QWORD PTR [rdx+392]
  19004. vpcmpeqd ymm12, ymm13, ymm10
  19005. vmovdqu ymm0, YMMWORD PTR [r9]
  19006. vmovdqu ymm1, YMMWORD PTR [r9+32]
  19007. vmovdqu ymm2, YMMWORD PTR [r9+64]
  19008. vmovdqu ymm3, YMMWORD PTR [r9+96]
  19009. vpand ymm0, ymm0, ymm12
  19010. vpand ymm1, ymm1, ymm12
  19011. vpand ymm2, ymm2, ymm12
  19012. vpand ymm3, ymm3, ymm12
  19013. vpor ymm4, ymm4, ymm0
  19014. vpor ymm5, ymm5, ymm1
  19015. vpor ymm6, ymm6, ymm2
  19016. vpor ymm7, ymm7, ymm3
  19017. vpaddd ymm13, ymm13, ymm11
  19018. ; ENTRY: 50
  19019. mov r9, QWORD PTR [rdx+400]
  19020. vpcmpeqd ymm12, ymm13, ymm10
  19021. vmovdqu ymm0, YMMWORD PTR [r9]
  19022. vmovdqu ymm1, YMMWORD PTR [r9+32]
  19023. vmovdqu ymm2, YMMWORD PTR [r9+64]
  19024. vmovdqu ymm3, YMMWORD PTR [r9+96]
  19025. vpand ymm0, ymm0, ymm12
  19026. vpand ymm1, ymm1, ymm12
  19027. vpand ymm2, ymm2, ymm12
  19028. vpand ymm3, ymm3, ymm12
  19029. vpor ymm4, ymm4, ymm0
  19030. vpor ymm5, ymm5, ymm1
  19031. vpor ymm6, ymm6, ymm2
  19032. vpor ymm7, ymm7, ymm3
  19033. vpaddd ymm13, ymm13, ymm11
  19034. ; ENTRY: 51
  19035. mov r9, QWORD PTR [rdx+408]
  19036. vpcmpeqd ymm12, ymm13, ymm10
  19037. vmovdqu ymm0, YMMWORD PTR [r9]
  19038. vmovdqu ymm1, YMMWORD PTR [r9+32]
  19039. vmovdqu ymm2, YMMWORD PTR [r9+64]
  19040. vmovdqu ymm3, YMMWORD PTR [r9+96]
  19041. vpand ymm0, ymm0, ymm12
  19042. vpand ymm1, ymm1, ymm12
  19043. vpand ymm2, ymm2, ymm12
  19044. vpand ymm3, ymm3, ymm12
  19045. vpor ymm4, ymm4, ymm0
  19046. vpor ymm5, ymm5, ymm1
  19047. vpor ymm6, ymm6, ymm2
  19048. vpor ymm7, ymm7, ymm3
  19049. vpaddd ymm13, ymm13, ymm11
  19050. ; ENTRY: 52
  19051. mov r9, QWORD PTR [rdx+416]
  19052. vpcmpeqd ymm12, ymm13, ymm10
  19053. vmovdqu ymm0, YMMWORD PTR [r9]
  19054. vmovdqu ymm1, YMMWORD PTR [r9+32]
  19055. vmovdqu ymm2, YMMWORD PTR [r9+64]
  19056. vmovdqu ymm3, YMMWORD PTR [r9+96]
  19057. vpand ymm0, ymm0, ymm12
  19058. vpand ymm1, ymm1, ymm12
  19059. vpand ymm2, ymm2, ymm12
  19060. vpand ymm3, ymm3, ymm12
  19061. vpor ymm4, ymm4, ymm0
  19062. vpor ymm5, ymm5, ymm1
  19063. vpor ymm6, ymm6, ymm2
  19064. vpor ymm7, ymm7, ymm3
  19065. vpaddd ymm13, ymm13, ymm11
  19066. ; ENTRY: 53
  19067. mov r9, QWORD PTR [rdx+424]
  19068. vpcmpeqd ymm12, ymm13, ymm10
  19069. vmovdqu ymm0, YMMWORD PTR [r9]
  19070. vmovdqu ymm1, YMMWORD PTR [r9+32]
  19071. vmovdqu ymm2, YMMWORD PTR [r9+64]
  19072. vmovdqu ymm3, YMMWORD PTR [r9+96]
  19073. vpand ymm0, ymm0, ymm12
  19074. vpand ymm1, ymm1, ymm12
  19075. vpand ymm2, ymm2, ymm12
  19076. vpand ymm3, ymm3, ymm12
  19077. vpor ymm4, ymm4, ymm0
  19078. vpor ymm5, ymm5, ymm1
  19079. vpor ymm6, ymm6, ymm2
  19080. vpor ymm7, ymm7, ymm3
  19081. vpaddd ymm13, ymm13, ymm11
  19082. ; ENTRY: 54
  19083. mov r9, QWORD PTR [rdx+432]
  19084. vpcmpeqd ymm12, ymm13, ymm10
  19085. vmovdqu ymm0, YMMWORD PTR [r9]
  19086. vmovdqu ymm1, YMMWORD PTR [r9+32]
  19087. vmovdqu ymm2, YMMWORD PTR [r9+64]
  19088. vmovdqu ymm3, YMMWORD PTR [r9+96]
  19089. vpand ymm0, ymm0, ymm12
  19090. vpand ymm1, ymm1, ymm12
  19091. vpand ymm2, ymm2, ymm12
  19092. vpand ymm3, ymm3, ymm12
  19093. vpor ymm4, ymm4, ymm0
  19094. vpor ymm5, ymm5, ymm1
  19095. vpor ymm6, ymm6, ymm2
  19096. vpor ymm7, ymm7, ymm3
  19097. vpaddd ymm13, ymm13, ymm11
  19098. ; ENTRY: 55
  19099. mov r9, QWORD PTR [rdx+440]
  19100. vpcmpeqd ymm12, ymm13, ymm10
  19101. vmovdqu ymm0, YMMWORD PTR [r9]
  19102. vmovdqu ymm1, YMMWORD PTR [r9+32]
  19103. vmovdqu ymm2, YMMWORD PTR [r9+64]
  19104. vmovdqu ymm3, YMMWORD PTR [r9+96]
  19105. vpand ymm0, ymm0, ymm12
  19106. vpand ymm1, ymm1, ymm12
  19107. vpand ymm2, ymm2, ymm12
  19108. vpand ymm3, ymm3, ymm12
  19109. vpor ymm4, ymm4, ymm0
  19110. vpor ymm5, ymm5, ymm1
  19111. vpor ymm6, ymm6, ymm2
  19112. vpor ymm7, ymm7, ymm3
  19113. vpaddd ymm13, ymm13, ymm11
  19114. ; ENTRY: 56
  19115. mov r9, QWORD PTR [rdx+448]
  19116. vpcmpeqd ymm12, ymm13, ymm10
  19117. vmovdqu ymm0, YMMWORD PTR [r9]
  19118. vmovdqu ymm1, YMMWORD PTR [r9+32]
  19119. vmovdqu ymm2, YMMWORD PTR [r9+64]
  19120. vmovdqu ymm3, YMMWORD PTR [r9+96]
  19121. vpand ymm0, ymm0, ymm12
  19122. vpand ymm1, ymm1, ymm12
  19123. vpand ymm2, ymm2, ymm12
  19124. vpand ymm3, ymm3, ymm12
  19125. vpor ymm4, ymm4, ymm0
  19126. vpor ymm5, ymm5, ymm1
  19127. vpor ymm6, ymm6, ymm2
  19128. vpor ymm7, ymm7, ymm3
  19129. vpaddd ymm13, ymm13, ymm11
  19130. ; ENTRY: 57
  19131. mov r9, QWORD PTR [rdx+456]
  19132. vpcmpeqd ymm12, ymm13, ymm10
  19133. vmovdqu ymm0, YMMWORD PTR [r9]
  19134. vmovdqu ymm1, YMMWORD PTR [r9+32]
  19135. vmovdqu ymm2, YMMWORD PTR [r9+64]
  19136. vmovdqu ymm3, YMMWORD PTR [r9+96]
  19137. vpand ymm0, ymm0, ymm12
  19138. vpand ymm1, ymm1, ymm12
  19139. vpand ymm2, ymm2, ymm12
  19140. vpand ymm3, ymm3, ymm12
  19141. vpor ymm4, ymm4, ymm0
  19142. vpor ymm5, ymm5, ymm1
  19143. vpor ymm6, ymm6, ymm2
  19144. vpor ymm7, ymm7, ymm3
  19145. vpaddd ymm13, ymm13, ymm11
  19146. ; ENTRY: 58
  19147. mov r9, QWORD PTR [rdx+464]
  19148. vpcmpeqd ymm12, ymm13, ymm10
  19149. vmovdqu ymm0, YMMWORD PTR [r9]
  19150. vmovdqu ymm1, YMMWORD PTR [r9+32]
  19151. vmovdqu ymm2, YMMWORD PTR [r9+64]
  19152. vmovdqu ymm3, YMMWORD PTR [r9+96]
  19153. vpand ymm0, ymm0, ymm12
  19154. vpand ymm1, ymm1, ymm12
  19155. vpand ymm2, ymm2, ymm12
  19156. vpand ymm3, ymm3, ymm12
  19157. vpor ymm4, ymm4, ymm0
  19158. vpor ymm5, ymm5, ymm1
  19159. vpor ymm6, ymm6, ymm2
  19160. vpor ymm7, ymm7, ymm3
  19161. vpaddd ymm13, ymm13, ymm11
  19162. ; ENTRY: 59
  19163. mov r9, QWORD PTR [rdx+472]
  19164. vpcmpeqd ymm12, ymm13, ymm10
  19165. vmovdqu ymm0, YMMWORD PTR [r9]
  19166. vmovdqu ymm1, YMMWORD PTR [r9+32]
  19167. vmovdqu ymm2, YMMWORD PTR [r9+64]
  19168. vmovdqu ymm3, YMMWORD PTR [r9+96]
  19169. vpand ymm0, ymm0, ymm12
  19170. vpand ymm1, ymm1, ymm12
  19171. vpand ymm2, ymm2, ymm12
  19172. vpand ymm3, ymm3, ymm12
  19173. vpor ymm4, ymm4, ymm0
  19174. vpor ymm5, ymm5, ymm1
  19175. vpor ymm6, ymm6, ymm2
  19176. vpor ymm7, ymm7, ymm3
  19177. vpaddd ymm13, ymm13, ymm11
  19178. ; ENTRY: 60
  19179. mov r9, QWORD PTR [rdx+480]
  19180. vpcmpeqd ymm12, ymm13, ymm10
  19181. vmovdqu ymm0, YMMWORD PTR [r9]
  19182. vmovdqu ymm1, YMMWORD PTR [r9+32]
  19183. vmovdqu ymm2, YMMWORD PTR [r9+64]
  19184. vmovdqu ymm3, YMMWORD PTR [r9+96]
  19185. vpand ymm0, ymm0, ymm12
  19186. vpand ymm1, ymm1, ymm12
  19187. vpand ymm2, ymm2, ymm12
  19188. vpand ymm3, ymm3, ymm12
  19189. vpor ymm4, ymm4, ymm0
  19190. vpor ymm5, ymm5, ymm1
  19191. vpor ymm6, ymm6, ymm2
  19192. vpor ymm7, ymm7, ymm3
  19193. vpaddd ymm13, ymm13, ymm11
  19194. ; ENTRY: 61
  19195. mov r9, QWORD PTR [rdx+488]
  19196. vpcmpeqd ymm12, ymm13, ymm10
  19197. vmovdqu ymm0, YMMWORD PTR [r9]
  19198. vmovdqu ymm1, YMMWORD PTR [r9+32]
  19199. vmovdqu ymm2, YMMWORD PTR [r9+64]
  19200. vmovdqu ymm3, YMMWORD PTR [r9+96]
  19201. vpand ymm0, ymm0, ymm12
  19202. vpand ymm1, ymm1, ymm12
  19203. vpand ymm2, ymm2, ymm12
  19204. vpand ymm3, ymm3, ymm12
  19205. vpor ymm4, ymm4, ymm0
  19206. vpor ymm5, ymm5, ymm1
  19207. vpor ymm6, ymm6, ymm2
  19208. vpor ymm7, ymm7, ymm3
  19209. vpaddd ymm13, ymm13, ymm11
  19210. ; ENTRY: 62
  19211. mov r9, QWORD PTR [rdx+496]
  19212. vpcmpeqd ymm12, ymm13, ymm10
  19213. vmovdqu ymm0, YMMWORD PTR [r9]
  19214. vmovdqu ymm1, YMMWORD PTR [r9+32]
  19215. vmovdqu ymm2, YMMWORD PTR [r9+64]
  19216. vmovdqu ymm3, YMMWORD PTR [r9+96]
  19217. vpand ymm0, ymm0, ymm12
  19218. vpand ymm1, ymm1, ymm12
  19219. vpand ymm2, ymm2, ymm12
  19220. vpand ymm3, ymm3, ymm12
  19221. vpor ymm4, ymm4, ymm0
  19222. vpor ymm5, ymm5, ymm1
  19223. vpor ymm6, ymm6, ymm2
  19224. vpor ymm7, ymm7, ymm3
  19225. vpaddd ymm13, ymm13, ymm11
  19226. ; ENTRY: 63
  19227. mov r9, QWORD PTR [rdx+504]
  19228. vpcmpeqd ymm12, ymm13, ymm10
  19229. vmovdqu ymm0, YMMWORD PTR [r9]
  19230. vmovdqu ymm1, YMMWORD PTR [r9+32]
  19231. vmovdqu ymm2, YMMWORD PTR [r9+64]
  19232. vmovdqu ymm3, YMMWORD PTR [r9+96]
  19233. vpand ymm0, ymm0, ymm12
  19234. vpand ymm1, ymm1, ymm12
  19235. vpand ymm2, ymm2, ymm12
  19236. vpand ymm3, ymm3, ymm12
  19237. vpor ymm4, ymm4, ymm0
  19238. vpor ymm5, ymm5, ymm1
  19239. vpor ymm6, ymm6, ymm2
  19240. vpor ymm7, ymm7, ymm3
  19241. vpaddd ymm13, ymm13, ymm11
  19242. vmovdqu YMMWORD PTR [rcx], ymm4
  19243. vmovdqu YMMWORD PTR [rcx+32], ymm5
  19244. vmovdqu YMMWORD PTR [rcx+64], ymm6
  19245. vmovdqu YMMWORD PTR [rcx+96], ymm7
  19246. add rcx, 128
  19247. ; END: 0-15
  19248. ; START: 16-31
  19249. vpxor ymm13, ymm13, ymm13
  19250. vpxor ymm4, ymm4, ymm4
  19251. vpxor ymm5, ymm5, ymm5
  19252. vpxor ymm6, ymm6, ymm6
  19253. vpxor ymm7, ymm7, ymm7
  19254. ; ENTRY: 0
  19255. mov r9, QWORD PTR [rdx]
  19256. add r9, 128
  19257. vpcmpeqd ymm12, ymm13, ymm10
  19258. vmovdqu ymm0, YMMWORD PTR [r9]
  19259. vmovdqu ymm1, YMMWORD PTR [r9+32]
  19260. vmovdqu ymm2, YMMWORD PTR [r9+64]
  19261. vmovdqu ymm3, YMMWORD PTR [r9+96]
  19262. vpand ymm0, ymm0, ymm12
  19263. vpand ymm1, ymm1, ymm12
  19264. vpand ymm2, ymm2, ymm12
  19265. vpand ymm3, ymm3, ymm12
  19266. vpor ymm4, ymm4, ymm0
  19267. vpor ymm5, ymm5, ymm1
  19268. vpor ymm6, ymm6, ymm2
  19269. vpor ymm7, ymm7, ymm3
  19270. vpaddd ymm13, ymm13, ymm11
  19271. ; ENTRY: 1
  19272. mov r9, QWORD PTR [rdx+8]
  19273. add r9, 128
  19274. vpcmpeqd ymm12, ymm13, ymm10
  19275. vmovdqu ymm0, YMMWORD PTR [r9]
  19276. vmovdqu ymm1, YMMWORD PTR [r9+32]
  19277. vmovdqu ymm2, YMMWORD PTR [r9+64]
  19278. vmovdqu ymm3, YMMWORD PTR [r9+96]
  19279. vpand ymm0, ymm0, ymm12
  19280. vpand ymm1, ymm1, ymm12
  19281. vpand ymm2, ymm2, ymm12
  19282. vpand ymm3, ymm3, ymm12
  19283. vpor ymm4, ymm4, ymm0
  19284. vpor ymm5, ymm5, ymm1
  19285. vpor ymm6, ymm6, ymm2
  19286. vpor ymm7, ymm7, ymm3
  19287. vpaddd ymm13, ymm13, ymm11
  19288. ; ENTRY: 2
  19289. mov r9, QWORD PTR [rdx+16]
  19290. add r9, 128
  19291. vpcmpeqd ymm12, ymm13, ymm10
  19292. vmovdqu ymm0, YMMWORD PTR [r9]
  19293. vmovdqu ymm1, YMMWORD PTR [r9+32]
  19294. vmovdqu ymm2, YMMWORD PTR [r9+64]
  19295. vmovdqu ymm3, YMMWORD PTR [r9+96]
  19296. vpand ymm0, ymm0, ymm12
  19297. vpand ymm1, ymm1, ymm12
  19298. vpand ymm2, ymm2, ymm12
  19299. vpand ymm3, ymm3, ymm12
  19300. vpor ymm4, ymm4, ymm0
  19301. vpor ymm5, ymm5, ymm1
  19302. vpor ymm6, ymm6, ymm2
  19303. vpor ymm7, ymm7, ymm3
  19304. vpaddd ymm13, ymm13, ymm11
  19305. ; ENTRY: 3
  19306. mov r9, QWORD PTR [rdx+24]
  19307. add r9, 128
  19308. vpcmpeqd ymm12, ymm13, ymm10
  19309. vmovdqu ymm0, YMMWORD PTR [r9]
  19310. vmovdqu ymm1, YMMWORD PTR [r9+32]
  19311. vmovdqu ymm2, YMMWORD PTR [r9+64]
  19312. vmovdqu ymm3, YMMWORD PTR [r9+96]
  19313. vpand ymm0, ymm0, ymm12
  19314. vpand ymm1, ymm1, ymm12
  19315. vpand ymm2, ymm2, ymm12
  19316. vpand ymm3, ymm3, ymm12
  19317. vpor ymm4, ymm4, ymm0
  19318. vpor ymm5, ymm5, ymm1
  19319. vpor ymm6, ymm6, ymm2
  19320. vpor ymm7, ymm7, ymm3
  19321. vpaddd ymm13, ymm13, ymm11
  19322. ; ENTRY: 4
  19323. mov r9, QWORD PTR [rdx+32]
  19324. add r9, 128
  19325. vpcmpeqd ymm12, ymm13, ymm10
  19326. vmovdqu ymm0, YMMWORD PTR [r9]
  19327. vmovdqu ymm1, YMMWORD PTR [r9+32]
  19328. vmovdqu ymm2, YMMWORD PTR [r9+64]
  19329. vmovdqu ymm3, YMMWORD PTR [r9+96]
  19330. vpand ymm0, ymm0, ymm12
  19331. vpand ymm1, ymm1, ymm12
  19332. vpand ymm2, ymm2, ymm12
  19333. vpand ymm3, ymm3, ymm12
  19334. vpor ymm4, ymm4, ymm0
  19335. vpor ymm5, ymm5, ymm1
  19336. vpor ymm6, ymm6, ymm2
  19337. vpor ymm7, ymm7, ymm3
  19338. vpaddd ymm13, ymm13, ymm11
  19339. ; ENTRY: 5
  19340. mov r9, QWORD PTR [rdx+40]
  19341. add r9, 128
  19342. vpcmpeqd ymm12, ymm13, ymm10
  19343. vmovdqu ymm0, YMMWORD PTR [r9]
  19344. vmovdqu ymm1, YMMWORD PTR [r9+32]
  19345. vmovdqu ymm2, YMMWORD PTR [r9+64]
  19346. vmovdqu ymm3, YMMWORD PTR [r9+96]
  19347. vpand ymm0, ymm0, ymm12
  19348. vpand ymm1, ymm1, ymm12
  19349. vpand ymm2, ymm2, ymm12
  19350. vpand ymm3, ymm3, ymm12
  19351. vpor ymm4, ymm4, ymm0
  19352. vpor ymm5, ymm5, ymm1
  19353. vpor ymm6, ymm6, ymm2
  19354. vpor ymm7, ymm7, ymm3
  19355. vpaddd ymm13, ymm13, ymm11
  19356. ; ENTRY: 6
  19357. mov r9, QWORD PTR [rdx+48]
  19358. add r9, 128
  19359. vpcmpeqd ymm12, ymm13, ymm10
  19360. vmovdqu ymm0, YMMWORD PTR [r9]
  19361. vmovdqu ymm1, YMMWORD PTR [r9+32]
  19362. vmovdqu ymm2, YMMWORD PTR [r9+64]
  19363. vmovdqu ymm3, YMMWORD PTR [r9+96]
  19364. vpand ymm0, ymm0, ymm12
  19365. vpand ymm1, ymm1, ymm12
  19366. vpand ymm2, ymm2, ymm12
  19367. vpand ymm3, ymm3, ymm12
  19368. vpor ymm4, ymm4, ymm0
  19369. vpor ymm5, ymm5, ymm1
  19370. vpor ymm6, ymm6, ymm2
  19371. vpor ymm7, ymm7, ymm3
  19372. vpaddd ymm13, ymm13, ymm11
  19373. ; ENTRY: 7
  19374. mov r9, QWORD PTR [rdx+56]
  19375. add r9, 128
  19376. vpcmpeqd ymm12, ymm13, ymm10
  19377. vmovdqu ymm0, YMMWORD PTR [r9]
  19378. vmovdqu ymm1, YMMWORD PTR [r9+32]
  19379. vmovdqu ymm2, YMMWORD PTR [r9+64]
  19380. vmovdqu ymm3, YMMWORD PTR [r9+96]
  19381. vpand ymm0, ymm0, ymm12
  19382. vpand ymm1, ymm1, ymm12
  19383. vpand ymm2, ymm2, ymm12
  19384. vpand ymm3, ymm3, ymm12
  19385. vpor ymm4, ymm4, ymm0
  19386. vpor ymm5, ymm5, ymm1
  19387. vpor ymm6, ymm6, ymm2
  19388. vpor ymm7, ymm7, ymm3
  19389. vpaddd ymm13, ymm13, ymm11
  19390. ; ENTRY: 8
  19391. mov r9, QWORD PTR [rdx+64]
  19392. add r9, 128
  19393. vpcmpeqd ymm12, ymm13, ymm10
  19394. vmovdqu ymm0, YMMWORD PTR [r9]
  19395. vmovdqu ymm1, YMMWORD PTR [r9+32]
  19396. vmovdqu ymm2, YMMWORD PTR [r9+64]
  19397. vmovdqu ymm3, YMMWORD PTR [r9+96]
  19398. vpand ymm0, ymm0, ymm12
  19399. vpand ymm1, ymm1, ymm12
  19400. vpand ymm2, ymm2, ymm12
  19401. vpand ymm3, ymm3, ymm12
  19402. vpor ymm4, ymm4, ymm0
  19403. vpor ymm5, ymm5, ymm1
  19404. vpor ymm6, ymm6, ymm2
  19405. vpor ymm7, ymm7, ymm3
  19406. vpaddd ymm13, ymm13, ymm11
  19407. ; ENTRY: 9
  19408. mov r9, QWORD PTR [rdx+72]
  19409. add r9, 128
  19410. vpcmpeqd ymm12, ymm13, ymm10
  19411. vmovdqu ymm0, YMMWORD PTR [r9]
  19412. vmovdqu ymm1, YMMWORD PTR [r9+32]
  19413. vmovdqu ymm2, YMMWORD PTR [r9+64]
  19414. vmovdqu ymm3, YMMWORD PTR [r9+96]
  19415. vpand ymm0, ymm0, ymm12
  19416. vpand ymm1, ymm1, ymm12
  19417. vpand ymm2, ymm2, ymm12
  19418. vpand ymm3, ymm3, ymm12
  19419. vpor ymm4, ymm4, ymm0
  19420. vpor ymm5, ymm5, ymm1
  19421. vpor ymm6, ymm6, ymm2
  19422. vpor ymm7, ymm7, ymm3
  19423. vpaddd ymm13, ymm13, ymm11
  19424. ; ENTRY: 10
  19425. mov r9, QWORD PTR [rdx+80]
  19426. add r9, 128
  19427. vpcmpeqd ymm12, ymm13, ymm10
  19428. vmovdqu ymm0, YMMWORD PTR [r9]
  19429. vmovdqu ymm1, YMMWORD PTR [r9+32]
  19430. vmovdqu ymm2, YMMWORD PTR [r9+64]
  19431. vmovdqu ymm3, YMMWORD PTR [r9+96]
  19432. vpand ymm0, ymm0, ymm12
  19433. vpand ymm1, ymm1, ymm12
  19434. vpand ymm2, ymm2, ymm12
  19435. vpand ymm3, ymm3, ymm12
  19436. vpor ymm4, ymm4, ymm0
  19437. vpor ymm5, ymm5, ymm1
  19438. vpor ymm6, ymm6, ymm2
  19439. vpor ymm7, ymm7, ymm3
  19440. vpaddd ymm13, ymm13, ymm11
  19441. ; ENTRY: 11
  19442. mov r9, QWORD PTR [rdx+88]
  19443. add r9, 128
  19444. vpcmpeqd ymm12, ymm13, ymm10
  19445. vmovdqu ymm0, YMMWORD PTR [r9]
  19446. vmovdqu ymm1, YMMWORD PTR [r9+32]
  19447. vmovdqu ymm2, YMMWORD PTR [r9+64]
  19448. vmovdqu ymm3, YMMWORD PTR [r9+96]
  19449. vpand ymm0, ymm0, ymm12
  19450. vpand ymm1, ymm1, ymm12
  19451. vpand ymm2, ymm2, ymm12
  19452. vpand ymm3, ymm3, ymm12
  19453. vpor ymm4, ymm4, ymm0
  19454. vpor ymm5, ymm5, ymm1
  19455. vpor ymm6, ymm6, ymm2
  19456. vpor ymm7, ymm7, ymm3
  19457. vpaddd ymm13, ymm13, ymm11
  19458. ; ENTRY: 12
  19459. mov r9, QWORD PTR [rdx+96]
  19460. add r9, 128
  19461. vpcmpeqd ymm12, ymm13, ymm10
  19462. vmovdqu ymm0, YMMWORD PTR [r9]
  19463. vmovdqu ymm1, YMMWORD PTR [r9+32]
  19464. vmovdqu ymm2, YMMWORD PTR [r9+64]
  19465. vmovdqu ymm3, YMMWORD PTR [r9+96]
  19466. vpand ymm0, ymm0, ymm12
  19467. vpand ymm1, ymm1, ymm12
  19468. vpand ymm2, ymm2, ymm12
  19469. vpand ymm3, ymm3, ymm12
  19470. vpor ymm4, ymm4, ymm0
  19471. vpor ymm5, ymm5, ymm1
  19472. vpor ymm6, ymm6, ymm2
  19473. vpor ymm7, ymm7, ymm3
  19474. vpaddd ymm13, ymm13, ymm11
  19475. ; ENTRY: 13
  19476. mov r9, QWORD PTR [rdx+104]
  19477. add r9, 128
  19478. vpcmpeqd ymm12, ymm13, ymm10
  19479. vmovdqu ymm0, YMMWORD PTR [r9]
  19480. vmovdqu ymm1, YMMWORD PTR [r9+32]
  19481. vmovdqu ymm2, YMMWORD PTR [r9+64]
  19482. vmovdqu ymm3, YMMWORD PTR [r9+96]
  19483. vpand ymm0, ymm0, ymm12
  19484. vpand ymm1, ymm1, ymm12
  19485. vpand ymm2, ymm2, ymm12
  19486. vpand ymm3, ymm3, ymm12
  19487. vpor ymm4, ymm4, ymm0
  19488. vpor ymm5, ymm5, ymm1
  19489. vpor ymm6, ymm6, ymm2
  19490. vpor ymm7, ymm7, ymm3
  19491. vpaddd ymm13, ymm13, ymm11
  19492. ; ENTRY: 14
  19493. mov r9, QWORD PTR [rdx+112]
  19494. add r9, 128
  19495. vpcmpeqd ymm12, ymm13, ymm10
  19496. vmovdqu ymm0, YMMWORD PTR [r9]
  19497. vmovdqu ymm1, YMMWORD PTR [r9+32]
  19498. vmovdqu ymm2, YMMWORD PTR [r9+64]
  19499. vmovdqu ymm3, YMMWORD PTR [r9+96]
  19500. vpand ymm0, ymm0, ymm12
  19501. vpand ymm1, ymm1, ymm12
  19502. vpand ymm2, ymm2, ymm12
  19503. vpand ymm3, ymm3, ymm12
  19504. vpor ymm4, ymm4, ymm0
  19505. vpor ymm5, ymm5, ymm1
  19506. vpor ymm6, ymm6, ymm2
  19507. vpor ymm7, ymm7, ymm3
  19508. vpaddd ymm13, ymm13, ymm11
  19509. ; ENTRY: 15
  19510. mov r9, QWORD PTR [rdx+120]
  19511. add r9, 128
  19512. vpcmpeqd ymm12, ymm13, ymm10
  19513. vmovdqu ymm0, YMMWORD PTR [r9]
  19514. vmovdqu ymm1, YMMWORD PTR [r9+32]
  19515. vmovdqu ymm2, YMMWORD PTR [r9+64]
  19516. vmovdqu ymm3, YMMWORD PTR [r9+96]
  19517. vpand ymm0, ymm0, ymm12
  19518. vpand ymm1, ymm1, ymm12
  19519. vpand ymm2, ymm2, ymm12
  19520. vpand ymm3, ymm3, ymm12
  19521. vpor ymm4, ymm4, ymm0
  19522. vpor ymm5, ymm5, ymm1
  19523. vpor ymm6, ymm6, ymm2
  19524. vpor ymm7, ymm7, ymm3
  19525. vpaddd ymm13, ymm13, ymm11
  19526. ; ENTRY: 16
  19527. mov r9, QWORD PTR [rdx+128]
  19528. add r9, 128
  19529. vpcmpeqd ymm12, ymm13, ymm10
  19530. vmovdqu ymm0, YMMWORD PTR [r9]
  19531. vmovdqu ymm1, YMMWORD PTR [r9+32]
  19532. vmovdqu ymm2, YMMWORD PTR [r9+64]
  19533. vmovdqu ymm3, YMMWORD PTR [r9+96]
  19534. vpand ymm0, ymm0, ymm12
  19535. vpand ymm1, ymm1, ymm12
  19536. vpand ymm2, ymm2, ymm12
  19537. vpand ymm3, ymm3, ymm12
  19538. vpor ymm4, ymm4, ymm0
  19539. vpor ymm5, ymm5, ymm1
  19540. vpor ymm6, ymm6, ymm2
  19541. vpor ymm7, ymm7, ymm3
  19542. vpaddd ymm13, ymm13, ymm11
  19543. ; ENTRY: 17
  19544. mov r9, QWORD PTR [rdx+136]
  19545. add r9, 128
  19546. vpcmpeqd ymm12, ymm13, ymm10
  19547. vmovdqu ymm0, YMMWORD PTR [r9]
  19548. vmovdqu ymm1, YMMWORD PTR [r9+32]
  19549. vmovdqu ymm2, YMMWORD PTR [r9+64]
  19550. vmovdqu ymm3, YMMWORD PTR [r9+96]
  19551. vpand ymm0, ymm0, ymm12
  19552. vpand ymm1, ymm1, ymm12
  19553. vpand ymm2, ymm2, ymm12
  19554. vpand ymm3, ymm3, ymm12
  19555. vpor ymm4, ymm4, ymm0
  19556. vpor ymm5, ymm5, ymm1
  19557. vpor ymm6, ymm6, ymm2
  19558. vpor ymm7, ymm7, ymm3
  19559. vpaddd ymm13, ymm13, ymm11
  19560. ; ENTRY: 18
  19561. mov r9, QWORD PTR [rdx+144]
  19562. add r9, 128
  19563. vpcmpeqd ymm12, ymm13, ymm10
  19564. vmovdqu ymm0, YMMWORD PTR [r9]
  19565. vmovdqu ymm1, YMMWORD PTR [r9+32]
  19566. vmovdqu ymm2, YMMWORD PTR [r9+64]
  19567. vmovdqu ymm3, YMMWORD PTR [r9+96]
  19568. vpand ymm0, ymm0, ymm12
  19569. vpand ymm1, ymm1, ymm12
  19570. vpand ymm2, ymm2, ymm12
  19571. vpand ymm3, ymm3, ymm12
  19572. vpor ymm4, ymm4, ymm0
  19573. vpor ymm5, ymm5, ymm1
  19574. vpor ymm6, ymm6, ymm2
  19575. vpor ymm7, ymm7, ymm3
  19576. vpaddd ymm13, ymm13, ymm11
  19577. ; ENTRY: 19
  19578. mov r9, QWORD PTR [rdx+152]
  19579. add r9, 128
  19580. vpcmpeqd ymm12, ymm13, ymm10
  19581. vmovdqu ymm0, YMMWORD PTR [r9]
  19582. vmovdqu ymm1, YMMWORD PTR [r9+32]
  19583. vmovdqu ymm2, YMMWORD PTR [r9+64]
  19584. vmovdqu ymm3, YMMWORD PTR [r9+96]
  19585. vpand ymm0, ymm0, ymm12
  19586. vpand ymm1, ymm1, ymm12
  19587. vpand ymm2, ymm2, ymm12
  19588. vpand ymm3, ymm3, ymm12
  19589. vpor ymm4, ymm4, ymm0
  19590. vpor ymm5, ymm5, ymm1
  19591. vpor ymm6, ymm6, ymm2
  19592. vpor ymm7, ymm7, ymm3
  19593. vpaddd ymm13, ymm13, ymm11
  19594. ; ENTRY: 20
  19595. mov r9, QWORD PTR [rdx+160]
  19596. add r9, 128
  19597. vpcmpeqd ymm12, ymm13, ymm10
  19598. vmovdqu ymm0, YMMWORD PTR [r9]
  19599. vmovdqu ymm1, YMMWORD PTR [r9+32]
  19600. vmovdqu ymm2, YMMWORD PTR [r9+64]
  19601. vmovdqu ymm3, YMMWORD PTR [r9+96]
  19602. vpand ymm0, ymm0, ymm12
  19603. vpand ymm1, ymm1, ymm12
  19604. vpand ymm2, ymm2, ymm12
  19605. vpand ymm3, ymm3, ymm12
  19606. vpor ymm4, ymm4, ymm0
  19607. vpor ymm5, ymm5, ymm1
  19608. vpor ymm6, ymm6, ymm2
  19609. vpor ymm7, ymm7, ymm3
  19610. vpaddd ymm13, ymm13, ymm11
  19611. ; ENTRY: 21
  19612. mov r9, QWORD PTR [rdx+168]
  19613. add r9, 128
  19614. vpcmpeqd ymm12, ymm13, ymm10
  19615. vmovdqu ymm0, YMMWORD PTR [r9]
  19616. vmovdqu ymm1, YMMWORD PTR [r9+32]
  19617. vmovdqu ymm2, YMMWORD PTR [r9+64]
  19618. vmovdqu ymm3, YMMWORD PTR [r9+96]
  19619. vpand ymm0, ymm0, ymm12
  19620. vpand ymm1, ymm1, ymm12
  19621. vpand ymm2, ymm2, ymm12
  19622. vpand ymm3, ymm3, ymm12
  19623. vpor ymm4, ymm4, ymm0
  19624. vpor ymm5, ymm5, ymm1
  19625. vpor ymm6, ymm6, ymm2
  19626. vpor ymm7, ymm7, ymm3
  19627. vpaddd ymm13, ymm13, ymm11
  19628. ; ENTRY: 22
  19629. mov r9, QWORD PTR [rdx+176]
  19630. add r9, 128
  19631. vpcmpeqd ymm12, ymm13, ymm10
  19632. vmovdqu ymm0, YMMWORD PTR [r9]
  19633. vmovdqu ymm1, YMMWORD PTR [r9+32]
  19634. vmovdqu ymm2, YMMWORD PTR [r9+64]
  19635. vmovdqu ymm3, YMMWORD PTR [r9+96]
  19636. vpand ymm0, ymm0, ymm12
  19637. vpand ymm1, ymm1, ymm12
  19638. vpand ymm2, ymm2, ymm12
  19639. vpand ymm3, ymm3, ymm12
  19640. vpor ymm4, ymm4, ymm0
  19641. vpor ymm5, ymm5, ymm1
  19642. vpor ymm6, ymm6, ymm2
  19643. vpor ymm7, ymm7, ymm3
  19644. vpaddd ymm13, ymm13, ymm11
  19645. ; ENTRY: 23
  19646. mov r9, QWORD PTR [rdx+184]
  19647. add r9, 128
  19648. vpcmpeqd ymm12, ymm13, ymm10
  19649. vmovdqu ymm0, YMMWORD PTR [r9]
  19650. vmovdqu ymm1, YMMWORD PTR [r9+32]
  19651. vmovdqu ymm2, YMMWORD PTR [r9+64]
  19652. vmovdqu ymm3, YMMWORD PTR [r9+96]
  19653. vpand ymm0, ymm0, ymm12
  19654. vpand ymm1, ymm1, ymm12
  19655. vpand ymm2, ymm2, ymm12
  19656. vpand ymm3, ymm3, ymm12
  19657. vpor ymm4, ymm4, ymm0
  19658. vpor ymm5, ymm5, ymm1
  19659. vpor ymm6, ymm6, ymm2
  19660. vpor ymm7, ymm7, ymm3
  19661. vpaddd ymm13, ymm13, ymm11
  19662. ; ENTRY: 24
  19663. mov r9, QWORD PTR [rdx+192]
  19664. add r9, 128
  19665. vpcmpeqd ymm12, ymm13, ymm10
  19666. vmovdqu ymm0, YMMWORD PTR [r9]
  19667. vmovdqu ymm1, YMMWORD PTR [r9+32]
  19668. vmovdqu ymm2, YMMWORD PTR [r9+64]
  19669. vmovdqu ymm3, YMMWORD PTR [r9+96]
  19670. vpand ymm0, ymm0, ymm12
  19671. vpand ymm1, ymm1, ymm12
  19672. vpand ymm2, ymm2, ymm12
  19673. vpand ymm3, ymm3, ymm12
  19674. vpor ymm4, ymm4, ymm0
  19675. vpor ymm5, ymm5, ymm1
  19676. vpor ymm6, ymm6, ymm2
  19677. vpor ymm7, ymm7, ymm3
  19678. vpaddd ymm13, ymm13, ymm11
  19679. ; ENTRY: 25
  19680. mov r9, QWORD PTR [rdx+200]
  19681. add r9, 128
  19682. vpcmpeqd ymm12, ymm13, ymm10
  19683. vmovdqu ymm0, YMMWORD PTR [r9]
  19684. vmovdqu ymm1, YMMWORD PTR [r9+32]
  19685. vmovdqu ymm2, YMMWORD PTR [r9+64]
  19686. vmovdqu ymm3, YMMWORD PTR [r9+96]
  19687. vpand ymm0, ymm0, ymm12
  19688. vpand ymm1, ymm1, ymm12
  19689. vpand ymm2, ymm2, ymm12
  19690. vpand ymm3, ymm3, ymm12
  19691. vpor ymm4, ymm4, ymm0
  19692. vpor ymm5, ymm5, ymm1
  19693. vpor ymm6, ymm6, ymm2
  19694. vpor ymm7, ymm7, ymm3
  19695. vpaddd ymm13, ymm13, ymm11
  19696. ; ENTRY: 26
  19697. mov r9, QWORD PTR [rdx+208]
  19698. add r9, 128
  19699. vpcmpeqd ymm12, ymm13, ymm10
  19700. vmovdqu ymm0, YMMWORD PTR [r9]
  19701. vmovdqu ymm1, YMMWORD PTR [r9+32]
  19702. vmovdqu ymm2, YMMWORD PTR [r9+64]
  19703. vmovdqu ymm3, YMMWORD PTR [r9+96]
  19704. vpand ymm0, ymm0, ymm12
  19705. vpand ymm1, ymm1, ymm12
  19706. vpand ymm2, ymm2, ymm12
  19707. vpand ymm3, ymm3, ymm12
  19708. vpor ymm4, ymm4, ymm0
  19709. vpor ymm5, ymm5, ymm1
  19710. vpor ymm6, ymm6, ymm2
  19711. vpor ymm7, ymm7, ymm3
  19712. vpaddd ymm13, ymm13, ymm11
  19713. ; ENTRY: 27
  19714. mov r9, QWORD PTR [rdx+216]
  19715. add r9, 128
  19716. vpcmpeqd ymm12, ymm13, ymm10
  19717. vmovdqu ymm0, YMMWORD PTR [r9]
  19718. vmovdqu ymm1, YMMWORD PTR [r9+32]
  19719. vmovdqu ymm2, YMMWORD PTR [r9+64]
  19720. vmovdqu ymm3, YMMWORD PTR [r9+96]
  19721. vpand ymm0, ymm0, ymm12
  19722. vpand ymm1, ymm1, ymm12
  19723. vpand ymm2, ymm2, ymm12
  19724. vpand ymm3, ymm3, ymm12
  19725. vpor ymm4, ymm4, ymm0
  19726. vpor ymm5, ymm5, ymm1
  19727. vpor ymm6, ymm6, ymm2
  19728. vpor ymm7, ymm7, ymm3
  19729. vpaddd ymm13, ymm13, ymm11
  19730. ; ENTRY: 28
  19731. mov r9, QWORD PTR [rdx+224]
  19732. add r9, 128
  19733. vpcmpeqd ymm12, ymm13, ymm10
  19734. vmovdqu ymm0, YMMWORD PTR [r9]
  19735. vmovdqu ymm1, YMMWORD PTR [r9+32]
  19736. vmovdqu ymm2, YMMWORD PTR [r9+64]
  19737. vmovdqu ymm3, YMMWORD PTR [r9+96]
  19738. vpand ymm0, ymm0, ymm12
  19739. vpand ymm1, ymm1, ymm12
  19740. vpand ymm2, ymm2, ymm12
  19741. vpand ymm3, ymm3, ymm12
  19742. vpor ymm4, ymm4, ymm0
  19743. vpor ymm5, ymm5, ymm1
  19744. vpor ymm6, ymm6, ymm2
  19745. vpor ymm7, ymm7, ymm3
  19746. vpaddd ymm13, ymm13, ymm11
  19747. ; ENTRY: 29
  19748. mov r9, QWORD PTR [rdx+232]
  19749. add r9, 128
  19750. vpcmpeqd ymm12, ymm13, ymm10
  19751. vmovdqu ymm0, YMMWORD PTR [r9]
  19752. vmovdqu ymm1, YMMWORD PTR [r9+32]
  19753. vmovdqu ymm2, YMMWORD PTR [r9+64]
  19754. vmovdqu ymm3, YMMWORD PTR [r9+96]
  19755. vpand ymm0, ymm0, ymm12
  19756. vpand ymm1, ymm1, ymm12
  19757. vpand ymm2, ymm2, ymm12
  19758. vpand ymm3, ymm3, ymm12
  19759. vpor ymm4, ymm4, ymm0
  19760. vpor ymm5, ymm5, ymm1
  19761. vpor ymm6, ymm6, ymm2
  19762. vpor ymm7, ymm7, ymm3
  19763. vpaddd ymm13, ymm13, ymm11
  19764. ; ENTRY: 30
  19765. mov r9, QWORD PTR [rdx+240]
  19766. add r9, 128
  19767. vpcmpeqd ymm12, ymm13, ymm10
  19768. vmovdqu ymm0, YMMWORD PTR [r9]
  19769. vmovdqu ymm1, YMMWORD PTR [r9+32]
  19770. vmovdqu ymm2, YMMWORD PTR [r9+64]
  19771. vmovdqu ymm3, YMMWORD PTR [r9+96]
  19772. vpand ymm0, ymm0, ymm12
  19773. vpand ymm1, ymm1, ymm12
  19774. vpand ymm2, ymm2, ymm12
  19775. vpand ymm3, ymm3, ymm12
  19776. vpor ymm4, ymm4, ymm0
  19777. vpor ymm5, ymm5, ymm1
  19778. vpor ymm6, ymm6, ymm2
  19779. vpor ymm7, ymm7, ymm3
  19780. vpaddd ymm13, ymm13, ymm11
  19781. ; ENTRY: 31
  19782. mov r9, QWORD PTR [rdx+248]
  19783. add r9, 128
  19784. vpcmpeqd ymm12, ymm13, ymm10
  19785. vmovdqu ymm0, YMMWORD PTR [r9]
  19786. vmovdqu ymm1, YMMWORD PTR [r9+32]
  19787. vmovdqu ymm2, YMMWORD PTR [r9+64]
  19788. vmovdqu ymm3, YMMWORD PTR [r9+96]
  19789. vpand ymm0, ymm0, ymm12
  19790. vpand ymm1, ymm1, ymm12
  19791. vpand ymm2, ymm2, ymm12
  19792. vpand ymm3, ymm3, ymm12
  19793. vpor ymm4, ymm4, ymm0
  19794. vpor ymm5, ymm5, ymm1
  19795. vpor ymm6, ymm6, ymm2
  19796. vpor ymm7, ymm7, ymm3
  19797. vpaddd ymm13, ymm13, ymm11
  19798. ; ENTRY: 32
  19799. mov r9, QWORD PTR [rdx+256]
  19800. add r9, 128
  19801. vpcmpeqd ymm12, ymm13, ymm10
  19802. vmovdqu ymm0, YMMWORD PTR [r9]
  19803. vmovdqu ymm1, YMMWORD PTR [r9+32]
  19804. vmovdqu ymm2, YMMWORD PTR [r9+64]
  19805. vmovdqu ymm3, YMMWORD PTR [r9+96]
  19806. vpand ymm0, ymm0, ymm12
  19807. vpand ymm1, ymm1, ymm12
  19808. vpand ymm2, ymm2, ymm12
  19809. vpand ymm3, ymm3, ymm12
  19810. vpor ymm4, ymm4, ymm0
  19811. vpor ymm5, ymm5, ymm1
  19812. vpor ymm6, ymm6, ymm2
  19813. vpor ymm7, ymm7, ymm3
  19814. vpaddd ymm13, ymm13, ymm11
  19815. ; ENTRY: 33
  19816. mov r9, QWORD PTR [rdx+264]
  19817. add r9, 128
  19818. vpcmpeqd ymm12, ymm13, ymm10
  19819. vmovdqu ymm0, YMMWORD PTR [r9]
  19820. vmovdqu ymm1, YMMWORD PTR [r9+32]
  19821. vmovdqu ymm2, YMMWORD PTR [r9+64]
  19822. vmovdqu ymm3, YMMWORD PTR [r9+96]
  19823. vpand ymm0, ymm0, ymm12
  19824. vpand ymm1, ymm1, ymm12
  19825. vpand ymm2, ymm2, ymm12
  19826. vpand ymm3, ymm3, ymm12
  19827. vpor ymm4, ymm4, ymm0
  19828. vpor ymm5, ymm5, ymm1
  19829. vpor ymm6, ymm6, ymm2
  19830. vpor ymm7, ymm7, ymm3
  19831. vpaddd ymm13, ymm13, ymm11
  19832. ; ENTRY: 34
  19833. mov r9, QWORD PTR [rdx+272]
  19834. add r9, 128
  19835. vpcmpeqd ymm12, ymm13, ymm10
  19836. vmovdqu ymm0, YMMWORD PTR [r9]
  19837. vmovdqu ymm1, YMMWORD PTR [r9+32]
  19838. vmovdqu ymm2, YMMWORD PTR [r9+64]
  19839. vmovdqu ymm3, YMMWORD PTR [r9+96]
  19840. vpand ymm0, ymm0, ymm12
  19841. vpand ymm1, ymm1, ymm12
  19842. vpand ymm2, ymm2, ymm12
  19843. vpand ymm3, ymm3, ymm12
  19844. vpor ymm4, ymm4, ymm0
  19845. vpor ymm5, ymm5, ymm1
  19846. vpor ymm6, ymm6, ymm2
  19847. vpor ymm7, ymm7, ymm3
  19848. vpaddd ymm13, ymm13, ymm11
  19849. ; ENTRY: 35
  19850. mov r9, QWORD PTR [rdx+280]
  19851. add r9, 128
  19852. vpcmpeqd ymm12, ymm13, ymm10
  19853. vmovdqu ymm0, YMMWORD PTR [r9]
  19854. vmovdqu ymm1, YMMWORD PTR [r9+32]
  19855. vmovdqu ymm2, YMMWORD PTR [r9+64]
  19856. vmovdqu ymm3, YMMWORD PTR [r9+96]
  19857. vpand ymm0, ymm0, ymm12
  19858. vpand ymm1, ymm1, ymm12
  19859. vpand ymm2, ymm2, ymm12
  19860. vpand ymm3, ymm3, ymm12
  19861. vpor ymm4, ymm4, ymm0
  19862. vpor ymm5, ymm5, ymm1
  19863. vpor ymm6, ymm6, ymm2
  19864. vpor ymm7, ymm7, ymm3
  19865. vpaddd ymm13, ymm13, ymm11
  19866. ; ENTRY: 36
  19867. mov r9, QWORD PTR [rdx+288]
  19868. add r9, 128
  19869. vpcmpeqd ymm12, ymm13, ymm10
  19870. vmovdqu ymm0, YMMWORD PTR [r9]
  19871. vmovdqu ymm1, YMMWORD PTR [r9+32]
  19872. vmovdqu ymm2, YMMWORD PTR [r9+64]
  19873. vmovdqu ymm3, YMMWORD PTR [r9+96]
  19874. vpand ymm0, ymm0, ymm12
  19875. vpand ymm1, ymm1, ymm12
  19876. vpand ymm2, ymm2, ymm12
  19877. vpand ymm3, ymm3, ymm12
  19878. vpor ymm4, ymm4, ymm0
  19879. vpor ymm5, ymm5, ymm1
  19880. vpor ymm6, ymm6, ymm2
  19881. vpor ymm7, ymm7, ymm3
  19882. vpaddd ymm13, ymm13, ymm11
  19883. ; ENTRY: 37
  19884. mov r9, QWORD PTR [rdx+296]
  19885. add r9, 128
  19886. vpcmpeqd ymm12, ymm13, ymm10
  19887. vmovdqu ymm0, YMMWORD PTR [r9]
  19888. vmovdqu ymm1, YMMWORD PTR [r9+32]
  19889. vmovdqu ymm2, YMMWORD PTR [r9+64]
  19890. vmovdqu ymm3, YMMWORD PTR [r9+96]
  19891. vpand ymm0, ymm0, ymm12
  19892. vpand ymm1, ymm1, ymm12
  19893. vpand ymm2, ymm2, ymm12
  19894. vpand ymm3, ymm3, ymm12
  19895. vpor ymm4, ymm4, ymm0
  19896. vpor ymm5, ymm5, ymm1
  19897. vpor ymm6, ymm6, ymm2
  19898. vpor ymm7, ymm7, ymm3
  19899. vpaddd ymm13, ymm13, ymm11
  19900. ; ENTRY: 38
  19901. mov r9, QWORD PTR [rdx+304]
  19902. add r9, 128
  19903. vpcmpeqd ymm12, ymm13, ymm10
  19904. vmovdqu ymm0, YMMWORD PTR [r9]
  19905. vmovdqu ymm1, YMMWORD PTR [r9+32]
  19906. vmovdqu ymm2, YMMWORD PTR [r9+64]
  19907. vmovdqu ymm3, YMMWORD PTR [r9+96]
  19908. vpand ymm0, ymm0, ymm12
  19909. vpand ymm1, ymm1, ymm12
  19910. vpand ymm2, ymm2, ymm12
  19911. vpand ymm3, ymm3, ymm12
  19912. vpor ymm4, ymm4, ymm0
  19913. vpor ymm5, ymm5, ymm1
  19914. vpor ymm6, ymm6, ymm2
  19915. vpor ymm7, ymm7, ymm3
  19916. vpaddd ymm13, ymm13, ymm11
  19917. ; ENTRY: 39
  19918. mov r9, QWORD PTR [rdx+312]
  19919. add r9, 128
  19920. vpcmpeqd ymm12, ymm13, ymm10
  19921. vmovdqu ymm0, YMMWORD PTR [r9]
  19922. vmovdqu ymm1, YMMWORD PTR [r9+32]
  19923. vmovdqu ymm2, YMMWORD PTR [r9+64]
  19924. vmovdqu ymm3, YMMWORD PTR [r9+96]
  19925. vpand ymm0, ymm0, ymm12
  19926. vpand ymm1, ymm1, ymm12
  19927. vpand ymm2, ymm2, ymm12
  19928. vpand ymm3, ymm3, ymm12
  19929. vpor ymm4, ymm4, ymm0
  19930. vpor ymm5, ymm5, ymm1
  19931. vpor ymm6, ymm6, ymm2
  19932. vpor ymm7, ymm7, ymm3
  19933. vpaddd ymm13, ymm13, ymm11
  19934. ; ENTRY: 40
  19935. mov r9, QWORD PTR [rdx+320]
  19936. add r9, 128
  19937. vpcmpeqd ymm12, ymm13, ymm10
  19938. vmovdqu ymm0, YMMWORD PTR [r9]
  19939. vmovdqu ymm1, YMMWORD PTR [r9+32]
  19940. vmovdqu ymm2, YMMWORD PTR [r9+64]
  19941. vmovdqu ymm3, YMMWORD PTR [r9+96]
  19942. vpand ymm0, ymm0, ymm12
  19943. vpand ymm1, ymm1, ymm12
  19944. vpand ymm2, ymm2, ymm12
  19945. vpand ymm3, ymm3, ymm12
  19946. vpor ymm4, ymm4, ymm0
  19947. vpor ymm5, ymm5, ymm1
  19948. vpor ymm6, ymm6, ymm2
  19949. vpor ymm7, ymm7, ymm3
  19950. vpaddd ymm13, ymm13, ymm11
  19951. ; ENTRY: 41
  19952. mov r9, QWORD PTR [rdx+328]
  19953. add r9, 128
  19954. vpcmpeqd ymm12, ymm13, ymm10
  19955. vmovdqu ymm0, YMMWORD PTR [r9]
  19956. vmovdqu ymm1, YMMWORD PTR [r9+32]
  19957. vmovdqu ymm2, YMMWORD PTR [r9+64]
  19958. vmovdqu ymm3, YMMWORD PTR [r9+96]
  19959. vpand ymm0, ymm0, ymm12
  19960. vpand ymm1, ymm1, ymm12
  19961. vpand ymm2, ymm2, ymm12
  19962. vpand ymm3, ymm3, ymm12
  19963. vpor ymm4, ymm4, ymm0
  19964. vpor ymm5, ymm5, ymm1
  19965. vpor ymm6, ymm6, ymm2
  19966. vpor ymm7, ymm7, ymm3
  19967. vpaddd ymm13, ymm13, ymm11
  19968. ; ENTRY: 42
  19969. mov r9, QWORD PTR [rdx+336]
  19970. add r9, 128
  19971. vpcmpeqd ymm12, ymm13, ymm10
  19972. vmovdqu ymm0, YMMWORD PTR [r9]
  19973. vmovdqu ymm1, YMMWORD PTR [r9+32]
  19974. vmovdqu ymm2, YMMWORD PTR [r9+64]
  19975. vmovdqu ymm3, YMMWORD PTR [r9+96]
  19976. vpand ymm0, ymm0, ymm12
  19977. vpand ymm1, ymm1, ymm12
  19978. vpand ymm2, ymm2, ymm12
  19979. vpand ymm3, ymm3, ymm12
  19980. vpor ymm4, ymm4, ymm0
  19981. vpor ymm5, ymm5, ymm1
  19982. vpor ymm6, ymm6, ymm2
  19983. vpor ymm7, ymm7, ymm3
  19984. vpaddd ymm13, ymm13, ymm11
  19985. ; ENTRY: 43
  19986. mov r9, QWORD PTR [rdx+344]
  19987. add r9, 128
  19988. vpcmpeqd ymm12, ymm13, ymm10
  19989. vmovdqu ymm0, YMMWORD PTR [r9]
  19990. vmovdqu ymm1, YMMWORD PTR [r9+32]
  19991. vmovdqu ymm2, YMMWORD PTR [r9+64]
  19992. vmovdqu ymm3, YMMWORD PTR [r9+96]
  19993. vpand ymm0, ymm0, ymm12
  19994. vpand ymm1, ymm1, ymm12
  19995. vpand ymm2, ymm2, ymm12
  19996. vpand ymm3, ymm3, ymm12
  19997. vpor ymm4, ymm4, ymm0
  19998. vpor ymm5, ymm5, ymm1
  19999. vpor ymm6, ymm6, ymm2
  20000. vpor ymm7, ymm7, ymm3
  20001. vpaddd ymm13, ymm13, ymm11
  20002. ; ENTRY: 44
  20003. mov r9, QWORD PTR [rdx+352]
  20004. add r9, 128
  20005. vpcmpeqd ymm12, ymm13, ymm10
  20006. vmovdqu ymm0, YMMWORD PTR [r9]
  20007. vmovdqu ymm1, YMMWORD PTR [r9+32]
  20008. vmovdqu ymm2, YMMWORD PTR [r9+64]
  20009. vmovdqu ymm3, YMMWORD PTR [r9+96]
  20010. vpand ymm0, ymm0, ymm12
  20011. vpand ymm1, ymm1, ymm12
  20012. vpand ymm2, ymm2, ymm12
  20013. vpand ymm3, ymm3, ymm12
  20014. vpor ymm4, ymm4, ymm0
  20015. vpor ymm5, ymm5, ymm1
  20016. vpor ymm6, ymm6, ymm2
  20017. vpor ymm7, ymm7, ymm3
  20018. vpaddd ymm13, ymm13, ymm11
  20019. ; ENTRY: 45
  20020. mov r9, QWORD PTR [rdx+360]
  20021. add r9, 128
  20022. vpcmpeqd ymm12, ymm13, ymm10
  20023. vmovdqu ymm0, YMMWORD PTR [r9]
  20024. vmovdqu ymm1, YMMWORD PTR [r9+32]
  20025. vmovdqu ymm2, YMMWORD PTR [r9+64]
  20026. vmovdqu ymm3, YMMWORD PTR [r9+96]
  20027. vpand ymm0, ymm0, ymm12
  20028. vpand ymm1, ymm1, ymm12
  20029. vpand ymm2, ymm2, ymm12
  20030. vpand ymm3, ymm3, ymm12
  20031. vpor ymm4, ymm4, ymm0
  20032. vpor ymm5, ymm5, ymm1
  20033. vpor ymm6, ymm6, ymm2
  20034. vpor ymm7, ymm7, ymm3
  20035. vpaddd ymm13, ymm13, ymm11
  20036. ; ENTRY: 46
  20037. mov r9, QWORD PTR [rdx+368]
  20038. add r9, 128
  20039. vpcmpeqd ymm12, ymm13, ymm10
  20040. vmovdqu ymm0, YMMWORD PTR [r9]
  20041. vmovdqu ymm1, YMMWORD PTR [r9+32]
  20042. vmovdqu ymm2, YMMWORD PTR [r9+64]
  20043. vmovdqu ymm3, YMMWORD PTR [r9+96]
  20044. vpand ymm0, ymm0, ymm12
  20045. vpand ymm1, ymm1, ymm12
  20046. vpand ymm2, ymm2, ymm12
  20047. vpand ymm3, ymm3, ymm12
  20048. vpor ymm4, ymm4, ymm0
  20049. vpor ymm5, ymm5, ymm1
  20050. vpor ymm6, ymm6, ymm2
  20051. vpor ymm7, ymm7, ymm3
  20052. vpaddd ymm13, ymm13, ymm11
  20053. ; ENTRY: 47
  20054. mov r9, QWORD PTR [rdx+376]
  20055. add r9, 128
  20056. vpcmpeqd ymm12, ymm13, ymm10
  20057. vmovdqu ymm0, YMMWORD PTR [r9]
  20058. vmovdqu ymm1, YMMWORD PTR [r9+32]
  20059. vmovdqu ymm2, YMMWORD PTR [r9+64]
  20060. vmovdqu ymm3, YMMWORD PTR [r9+96]
  20061. vpand ymm0, ymm0, ymm12
  20062. vpand ymm1, ymm1, ymm12
  20063. vpand ymm2, ymm2, ymm12
  20064. vpand ymm3, ymm3, ymm12
  20065. vpor ymm4, ymm4, ymm0
  20066. vpor ymm5, ymm5, ymm1
  20067. vpor ymm6, ymm6, ymm2
  20068. vpor ymm7, ymm7, ymm3
  20069. vpaddd ymm13, ymm13, ymm11
  20070. ; ENTRY: 48
  20071. mov r9, QWORD PTR [rdx+384]
  20072. add r9, 128
  20073. vpcmpeqd ymm12, ymm13, ymm10
  20074. vmovdqu ymm0, YMMWORD PTR [r9]
  20075. vmovdqu ymm1, YMMWORD PTR [r9+32]
  20076. vmovdqu ymm2, YMMWORD PTR [r9+64]
  20077. vmovdqu ymm3, YMMWORD PTR [r9+96]
  20078. vpand ymm0, ymm0, ymm12
  20079. vpand ymm1, ymm1, ymm12
  20080. vpand ymm2, ymm2, ymm12
  20081. vpand ymm3, ymm3, ymm12
  20082. vpor ymm4, ymm4, ymm0
  20083. vpor ymm5, ymm5, ymm1
  20084. vpor ymm6, ymm6, ymm2
  20085. vpor ymm7, ymm7, ymm3
  20086. vpaddd ymm13, ymm13, ymm11
  20087. ; ENTRY: 49
  20088. mov r9, QWORD PTR [rdx+392]
  20089. add r9, 128
  20090. vpcmpeqd ymm12, ymm13, ymm10
  20091. vmovdqu ymm0, YMMWORD PTR [r9]
  20092. vmovdqu ymm1, YMMWORD PTR [r9+32]
  20093. vmovdqu ymm2, YMMWORD PTR [r9+64]
  20094. vmovdqu ymm3, YMMWORD PTR [r9+96]
  20095. vpand ymm0, ymm0, ymm12
  20096. vpand ymm1, ymm1, ymm12
  20097. vpand ymm2, ymm2, ymm12
  20098. vpand ymm3, ymm3, ymm12
  20099. vpor ymm4, ymm4, ymm0
  20100. vpor ymm5, ymm5, ymm1
  20101. vpor ymm6, ymm6, ymm2
  20102. vpor ymm7, ymm7, ymm3
  20103. vpaddd ymm13, ymm13, ymm11
  20104. ; ENTRY: 50
  20105. mov r9, QWORD PTR [rdx+400]
  20106. add r9, 128
  20107. vpcmpeqd ymm12, ymm13, ymm10
  20108. vmovdqu ymm0, YMMWORD PTR [r9]
  20109. vmovdqu ymm1, YMMWORD PTR [r9+32]
  20110. vmovdqu ymm2, YMMWORD PTR [r9+64]
  20111. vmovdqu ymm3, YMMWORD PTR [r9+96]
  20112. vpand ymm0, ymm0, ymm12
  20113. vpand ymm1, ymm1, ymm12
  20114. vpand ymm2, ymm2, ymm12
  20115. vpand ymm3, ymm3, ymm12
  20116. vpor ymm4, ymm4, ymm0
  20117. vpor ymm5, ymm5, ymm1
  20118. vpor ymm6, ymm6, ymm2
  20119. vpor ymm7, ymm7, ymm3
  20120. vpaddd ymm13, ymm13, ymm11
  20121. ; ENTRY: 51
  20122. mov r9, QWORD PTR [rdx+408]
  20123. add r9, 128
  20124. vpcmpeqd ymm12, ymm13, ymm10
  20125. vmovdqu ymm0, YMMWORD PTR [r9]
  20126. vmovdqu ymm1, YMMWORD PTR [r9+32]
  20127. vmovdqu ymm2, YMMWORD PTR [r9+64]
  20128. vmovdqu ymm3, YMMWORD PTR [r9+96]
  20129. vpand ymm0, ymm0, ymm12
  20130. vpand ymm1, ymm1, ymm12
  20131. vpand ymm2, ymm2, ymm12
  20132. vpand ymm3, ymm3, ymm12
  20133. vpor ymm4, ymm4, ymm0
  20134. vpor ymm5, ymm5, ymm1
  20135. vpor ymm6, ymm6, ymm2
  20136. vpor ymm7, ymm7, ymm3
  20137. vpaddd ymm13, ymm13, ymm11
  20138. ; ENTRY: 52
  20139. mov r9, QWORD PTR [rdx+416]
  20140. add r9, 128
  20141. vpcmpeqd ymm12, ymm13, ymm10
  20142. vmovdqu ymm0, YMMWORD PTR [r9]
  20143. vmovdqu ymm1, YMMWORD PTR [r9+32]
  20144. vmovdqu ymm2, YMMWORD PTR [r9+64]
  20145. vmovdqu ymm3, YMMWORD PTR [r9+96]
  20146. vpand ymm0, ymm0, ymm12
  20147. vpand ymm1, ymm1, ymm12
  20148. vpand ymm2, ymm2, ymm12
  20149. vpand ymm3, ymm3, ymm12
  20150. vpor ymm4, ymm4, ymm0
  20151. vpor ymm5, ymm5, ymm1
  20152. vpor ymm6, ymm6, ymm2
  20153. vpor ymm7, ymm7, ymm3
  20154. vpaddd ymm13, ymm13, ymm11
  20155. ; ENTRY: 53
  20156. mov r9, QWORD PTR [rdx+424]
  20157. add r9, 128
  20158. vpcmpeqd ymm12, ymm13, ymm10
  20159. vmovdqu ymm0, YMMWORD PTR [r9]
  20160. vmovdqu ymm1, YMMWORD PTR [r9+32]
  20161. vmovdqu ymm2, YMMWORD PTR [r9+64]
  20162. vmovdqu ymm3, YMMWORD PTR [r9+96]
  20163. vpand ymm0, ymm0, ymm12
  20164. vpand ymm1, ymm1, ymm12
  20165. vpand ymm2, ymm2, ymm12
  20166. vpand ymm3, ymm3, ymm12
  20167. vpor ymm4, ymm4, ymm0
  20168. vpor ymm5, ymm5, ymm1
  20169. vpor ymm6, ymm6, ymm2
  20170. vpor ymm7, ymm7, ymm3
  20171. vpaddd ymm13, ymm13, ymm11
  20172. ; ENTRY: 54
  20173. mov r9, QWORD PTR [rdx+432]
  20174. add r9, 128
  20175. vpcmpeqd ymm12, ymm13, ymm10
  20176. vmovdqu ymm0, YMMWORD PTR [r9]
  20177. vmovdqu ymm1, YMMWORD PTR [r9+32]
  20178. vmovdqu ymm2, YMMWORD PTR [r9+64]
  20179. vmovdqu ymm3, YMMWORD PTR [r9+96]
  20180. vpand ymm0, ymm0, ymm12
  20181. vpand ymm1, ymm1, ymm12
  20182. vpand ymm2, ymm2, ymm12
  20183. vpand ymm3, ymm3, ymm12
  20184. vpor ymm4, ymm4, ymm0
  20185. vpor ymm5, ymm5, ymm1
  20186. vpor ymm6, ymm6, ymm2
  20187. vpor ymm7, ymm7, ymm3
  20188. vpaddd ymm13, ymm13, ymm11
  20189. ; ENTRY: 55
  20190. mov r9, QWORD PTR [rdx+440]
  20191. add r9, 128
  20192. vpcmpeqd ymm12, ymm13, ymm10
  20193. vmovdqu ymm0, YMMWORD PTR [r9]
  20194. vmovdqu ymm1, YMMWORD PTR [r9+32]
  20195. vmovdqu ymm2, YMMWORD PTR [r9+64]
  20196. vmovdqu ymm3, YMMWORD PTR [r9+96]
  20197. vpand ymm0, ymm0, ymm12
  20198. vpand ymm1, ymm1, ymm12
  20199. vpand ymm2, ymm2, ymm12
  20200. vpand ymm3, ymm3, ymm12
  20201. vpor ymm4, ymm4, ymm0
  20202. vpor ymm5, ymm5, ymm1
  20203. vpor ymm6, ymm6, ymm2
  20204. vpor ymm7, ymm7, ymm3
  20205. vpaddd ymm13, ymm13, ymm11
  20206. ; ENTRY: 56
  20207. mov r9, QWORD PTR [rdx+448]
  20208. add r9, 128
  20209. vpcmpeqd ymm12, ymm13, ymm10
  20210. vmovdqu ymm0, YMMWORD PTR [r9]
  20211. vmovdqu ymm1, YMMWORD PTR [r9+32]
  20212. vmovdqu ymm2, YMMWORD PTR [r9+64]
  20213. vmovdqu ymm3, YMMWORD PTR [r9+96]
  20214. vpand ymm0, ymm0, ymm12
  20215. vpand ymm1, ymm1, ymm12
  20216. vpand ymm2, ymm2, ymm12
  20217. vpand ymm3, ymm3, ymm12
  20218. vpor ymm4, ymm4, ymm0
  20219. vpor ymm5, ymm5, ymm1
  20220. vpor ymm6, ymm6, ymm2
  20221. vpor ymm7, ymm7, ymm3
  20222. vpaddd ymm13, ymm13, ymm11
  20223. ; ENTRY: 57
  20224. mov r9, QWORD PTR [rdx+456]
  20225. add r9, 128
  20226. vpcmpeqd ymm12, ymm13, ymm10
  20227. vmovdqu ymm0, YMMWORD PTR [r9]
  20228. vmovdqu ymm1, YMMWORD PTR [r9+32]
  20229. vmovdqu ymm2, YMMWORD PTR [r9+64]
  20230. vmovdqu ymm3, YMMWORD PTR [r9+96]
  20231. vpand ymm0, ymm0, ymm12
  20232. vpand ymm1, ymm1, ymm12
  20233. vpand ymm2, ymm2, ymm12
  20234. vpand ymm3, ymm3, ymm12
  20235. vpor ymm4, ymm4, ymm0
  20236. vpor ymm5, ymm5, ymm1
  20237. vpor ymm6, ymm6, ymm2
  20238. vpor ymm7, ymm7, ymm3
  20239. vpaddd ymm13, ymm13, ymm11
  20240. ; ENTRY: 58
  20241. mov r9, QWORD PTR [rdx+464]
  20242. add r9, 128
  20243. vpcmpeqd ymm12, ymm13, ymm10
  20244. vmovdqu ymm0, YMMWORD PTR [r9]
  20245. vmovdqu ymm1, YMMWORD PTR [r9+32]
  20246. vmovdqu ymm2, YMMWORD PTR [r9+64]
  20247. vmovdqu ymm3, YMMWORD PTR [r9+96]
  20248. vpand ymm0, ymm0, ymm12
  20249. vpand ymm1, ymm1, ymm12
  20250. vpand ymm2, ymm2, ymm12
  20251. vpand ymm3, ymm3, ymm12
  20252. vpor ymm4, ymm4, ymm0
  20253. vpor ymm5, ymm5, ymm1
  20254. vpor ymm6, ymm6, ymm2
  20255. vpor ymm7, ymm7, ymm3
  20256. vpaddd ymm13, ymm13, ymm11
  20257. ; ENTRY: 59
  20258. mov r9, QWORD PTR [rdx+472]
  20259. add r9, 128
  20260. vpcmpeqd ymm12, ymm13, ymm10
  20261. vmovdqu ymm0, YMMWORD PTR [r9]
  20262. vmovdqu ymm1, YMMWORD PTR [r9+32]
  20263. vmovdqu ymm2, YMMWORD PTR [r9+64]
  20264. vmovdqu ymm3, YMMWORD PTR [r9+96]
  20265. vpand ymm0, ymm0, ymm12
  20266. vpand ymm1, ymm1, ymm12
  20267. vpand ymm2, ymm2, ymm12
  20268. vpand ymm3, ymm3, ymm12
  20269. vpor ymm4, ymm4, ymm0
  20270. vpor ymm5, ymm5, ymm1
  20271. vpor ymm6, ymm6, ymm2
  20272. vpor ymm7, ymm7, ymm3
  20273. vpaddd ymm13, ymm13, ymm11
  20274. ; ENTRY: 60
  20275. mov r9, QWORD PTR [rdx+480]
  20276. add r9, 128
  20277. vpcmpeqd ymm12, ymm13, ymm10
  20278. vmovdqu ymm0, YMMWORD PTR [r9]
  20279. vmovdqu ymm1, YMMWORD PTR [r9+32]
  20280. vmovdqu ymm2, YMMWORD PTR [r9+64]
  20281. vmovdqu ymm3, YMMWORD PTR [r9+96]
  20282. vpand ymm0, ymm0, ymm12
  20283. vpand ymm1, ymm1, ymm12
  20284. vpand ymm2, ymm2, ymm12
  20285. vpand ymm3, ymm3, ymm12
  20286. vpor ymm4, ymm4, ymm0
  20287. vpor ymm5, ymm5, ymm1
  20288. vpor ymm6, ymm6, ymm2
  20289. vpor ymm7, ymm7, ymm3
  20290. vpaddd ymm13, ymm13, ymm11
  20291. ; ENTRY: 61
  20292. mov r9, QWORD PTR [rdx+488]
  20293. add r9, 128
  20294. vpcmpeqd ymm12, ymm13, ymm10
  20295. vmovdqu ymm0, YMMWORD PTR [r9]
  20296. vmovdqu ymm1, YMMWORD PTR [r9+32]
  20297. vmovdqu ymm2, YMMWORD PTR [r9+64]
  20298. vmovdqu ymm3, YMMWORD PTR [r9+96]
  20299. vpand ymm0, ymm0, ymm12
  20300. vpand ymm1, ymm1, ymm12
  20301. vpand ymm2, ymm2, ymm12
  20302. vpand ymm3, ymm3, ymm12
  20303. vpor ymm4, ymm4, ymm0
  20304. vpor ymm5, ymm5, ymm1
  20305. vpor ymm6, ymm6, ymm2
  20306. vpor ymm7, ymm7, ymm3
  20307. vpaddd ymm13, ymm13, ymm11
  20308. ; ENTRY: 62
  20309. mov r9, QWORD PTR [rdx+496]
  20310. add r9, 128
  20311. vpcmpeqd ymm12, ymm13, ymm10
  20312. vmovdqu ymm0, YMMWORD PTR [r9]
  20313. vmovdqu ymm1, YMMWORD PTR [r9+32]
  20314. vmovdqu ymm2, YMMWORD PTR [r9+64]
  20315. vmovdqu ymm3, YMMWORD PTR [r9+96]
  20316. vpand ymm0, ymm0, ymm12
  20317. vpand ymm1, ymm1, ymm12
  20318. vpand ymm2, ymm2, ymm12
  20319. vpand ymm3, ymm3, ymm12
  20320. vpor ymm4, ymm4, ymm0
  20321. vpor ymm5, ymm5, ymm1
  20322. vpor ymm6, ymm6, ymm2
  20323. vpor ymm7, ymm7, ymm3
  20324. vpaddd ymm13, ymm13, ymm11
  20325. ; ENTRY: 63
  20326. mov r9, QWORD PTR [rdx+504]
  20327. add r9, 128
  20328. vpcmpeqd ymm12, ymm13, ymm10
  20329. vmovdqu ymm0, YMMWORD PTR [r9]
  20330. vmovdqu ymm1, YMMWORD PTR [r9+32]
  20331. vmovdqu ymm2, YMMWORD PTR [r9+64]
  20332. vmovdqu ymm3, YMMWORD PTR [r9+96]
  20333. vpand ymm0, ymm0, ymm12
  20334. vpand ymm1, ymm1, ymm12
  20335. vpand ymm2, ymm2, ymm12
  20336. vpand ymm3, ymm3, ymm12
  20337. vpor ymm4, ymm4, ymm0
  20338. vpor ymm5, ymm5, ymm1
  20339. vpor ymm6, ymm6, ymm2
  20340. vpor ymm7, ymm7, ymm3
  20341. vpaddd ymm13, ymm13, ymm11
  20342. vmovdqu YMMWORD PTR [rcx], ymm4
  20343. vmovdqu YMMWORD PTR [rcx+32], ymm5
  20344. vmovdqu YMMWORD PTR [rcx+64], ymm6
  20345. vmovdqu YMMWORD PTR [rcx+96], ymm7
  20346. ; END: 16-31
  20347. vmovdqu xmm6, OWORD PTR [rsp]
  20348. vmovdqu xmm7, OWORD PTR [rsp+16]
  20349. vmovdqu xmm8, OWORD PTR [rsp+32]
  20350. vmovdqu xmm9, OWORD PTR [rsp+48]
  20351. vmovdqu xmm10, OWORD PTR [rsp+64]
  20352. vmovdqu xmm11, OWORD PTR [rsp+80]
  20353. vmovdqu xmm12, OWORD PTR [rsp+96]
  20354. vmovdqu xmm13, OWORD PTR [rsp+112]
  20355. add rsp, 128
  20356. ret
  20357. sp_2048_get_from_table_avx2_32 ENDP
  20358. _text ENDS
  20359. ENDIF
  20360. ; /* Conditionally add a and b using the mask m.
  20361. ; * m is -1 to add and 0 when not.
  20362. ; *
  20363. ; * r A single precision number representing conditional add result.
  20364. ; * a A single precision number to add with.
  20365. ; * b A single precision number to add.
  20366. ; * m Mask value to apply.
  20367. ; */
  20368. _text SEGMENT READONLY PARA
  20369. sp_2048_cond_add_16 PROC
  20370. sub rsp, 128
  20371. mov rax, 0
  20372. mov r10, QWORD PTR [r8]
  20373. mov r11, QWORD PTR [r8+8]
  20374. and r10, r9
  20375. and r11, r9
  20376. mov QWORD PTR [rsp], r10
  20377. mov QWORD PTR [rsp+8], r11
  20378. mov r10, QWORD PTR [r8+16]
  20379. mov r11, QWORD PTR [r8+24]
  20380. and r10, r9
  20381. and r11, r9
  20382. mov QWORD PTR [rsp+16], r10
  20383. mov QWORD PTR [rsp+24], r11
  20384. mov r10, QWORD PTR [r8+32]
  20385. mov r11, QWORD PTR [r8+40]
  20386. and r10, r9
  20387. and r11, r9
  20388. mov QWORD PTR [rsp+32], r10
  20389. mov QWORD PTR [rsp+40], r11
  20390. mov r10, QWORD PTR [r8+48]
  20391. mov r11, QWORD PTR [r8+56]
  20392. and r10, r9
  20393. and r11, r9
  20394. mov QWORD PTR [rsp+48], r10
  20395. mov QWORD PTR [rsp+56], r11
  20396. mov r10, QWORD PTR [r8+64]
  20397. mov r11, QWORD PTR [r8+72]
  20398. and r10, r9
  20399. and r11, r9
  20400. mov QWORD PTR [rsp+64], r10
  20401. mov QWORD PTR [rsp+72], r11
  20402. mov r10, QWORD PTR [r8+80]
  20403. mov r11, QWORD PTR [r8+88]
  20404. and r10, r9
  20405. and r11, r9
  20406. mov QWORD PTR [rsp+80], r10
  20407. mov QWORD PTR [rsp+88], r11
  20408. mov r10, QWORD PTR [r8+96]
  20409. mov r11, QWORD PTR [r8+104]
  20410. and r10, r9
  20411. and r11, r9
  20412. mov QWORD PTR [rsp+96], r10
  20413. mov QWORD PTR [rsp+104], r11
  20414. mov r10, QWORD PTR [r8+112]
  20415. mov r11, QWORD PTR [r8+120]
  20416. and r10, r9
  20417. and r11, r9
  20418. mov QWORD PTR [rsp+112], r10
  20419. mov QWORD PTR [rsp+120], r11
  20420. mov r10, QWORD PTR [rdx]
  20421. mov r8, QWORD PTR [rsp]
  20422. add r10, r8
  20423. mov r11, QWORD PTR [rdx+8]
  20424. mov r8, QWORD PTR [rsp+8]
  20425. adc r11, r8
  20426. mov QWORD PTR [rcx], r10
  20427. mov r10, QWORD PTR [rdx+16]
  20428. mov r8, QWORD PTR [rsp+16]
  20429. adc r10, r8
  20430. mov QWORD PTR [rcx+8], r11
  20431. mov r11, QWORD PTR [rdx+24]
  20432. mov r8, QWORD PTR [rsp+24]
  20433. adc r11, r8
  20434. mov QWORD PTR [rcx+16], r10
  20435. mov r10, QWORD PTR [rdx+32]
  20436. mov r8, QWORD PTR [rsp+32]
  20437. adc r10, r8
  20438. mov QWORD PTR [rcx+24], r11
  20439. mov r11, QWORD PTR [rdx+40]
  20440. mov r8, QWORD PTR [rsp+40]
  20441. adc r11, r8
  20442. mov QWORD PTR [rcx+32], r10
  20443. mov r10, QWORD PTR [rdx+48]
  20444. mov r8, QWORD PTR [rsp+48]
  20445. adc r10, r8
  20446. mov QWORD PTR [rcx+40], r11
  20447. mov r11, QWORD PTR [rdx+56]
  20448. mov r8, QWORD PTR [rsp+56]
  20449. adc r11, r8
  20450. mov QWORD PTR [rcx+48], r10
  20451. mov r10, QWORD PTR [rdx+64]
  20452. mov r8, QWORD PTR [rsp+64]
  20453. adc r10, r8
  20454. mov QWORD PTR [rcx+56], r11
  20455. mov r11, QWORD PTR [rdx+72]
  20456. mov r8, QWORD PTR [rsp+72]
  20457. adc r11, r8
  20458. mov QWORD PTR [rcx+64], r10
  20459. mov r10, QWORD PTR [rdx+80]
  20460. mov r8, QWORD PTR [rsp+80]
  20461. adc r10, r8
  20462. mov QWORD PTR [rcx+72], r11
  20463. mov r11, QWORD PTR [rdx+88]
  20464. mov r8, QWORD PTR [rsp+88]
  20465. adc r11, r8
  20466. mov QWORD PTR [rcx+80], r10
  20467. mov r10, QWORD PTR [rdx+96]
  20468. mov r8, QWORD PTR [rsp+96]
  20469. adc r10, r8
  20470. mov QWORD PTR [rcx+88], r11
  20471. mov r11, QWORD PTR [rdx+104]
  20472. mov r8, QWORD PTR [rsp+104]
  20473. adc r11, r8
  20474. mov QWORD PTR [rcx+96], r10
  20475. mov r10, QWORD PTR [rdx+112]
  20476. mov r8, QWORD PTR [rsp+112]
  20477. adc r10, r8
  20478. mov QWORD PTR [rcx+104], r11
  20479. mov r11, QWORD PTR [rdx+120]
  20480. mov r8, QWORD PTR [rsp+120]
  20481. adc r11, r8
  20482. mov QWORD PTR [rcx+112], r10
  20483. mov QWORD PTR [rcx+120], r11
  20484. adc rax, 0
  20485. add rsp, 128
  20486. ret
  20487. sp_2048_cond_add_16 ENDP
  20488. _text ENDS
  20489. IFDEF HAVE_INTEL_AVX2
  20490. ; /* Conditionally add a and b using the mask m.
  20491. ; * m is -1 to add and 0 when not.
  20492. ; *
  20493. ; * r A single precision number representing conditional add result.
  20494. ; * a A single precision number to add with.
  20495. ; * b A single precision number to add.
  20496. ; * m Mask value to apply.
  20497. ; */
  20498. _text SEGMENT READONLY PARA
  20499. sp_2048_cond_add_avx2_16 PROC
  20500. push r12
  20501. mov rax, 0
  20502. mov r12, QWORD PTR [r8]
  20503. mov r10, QWORD PTR [rdx]
  20504. pext r12, r12, r9
  20505. add r10, r12
  20506. mov r12, QWORD PTR [r8+8]
  20507. mov r11, QWORD PTR [rdx+8]
  20508. pext r12, r12, r9
  20509. mov QWORD PTR [rcx], r10
  20510. adc r11, r12
  20511. mov r10, QWORD PTR [r8+16]
  20512. mov r12, QWORD PTR [rdx+16]
  20513. pext r10, r10, r9
  20514. mov QWORD PTR [rcx+8], r11
  20515. adc r12, r10
  20516. mov r11, QWORD PTR [r8+24]
  20517. mov r10, QWORD PTR [rdx+24]
  20518. pext r11, r11, r9
  20519. mov QWORD PTR [rcx+16], r12
  20520. adc r10, r11
  20521. mov r12, QWORD PTR [r8+32]
  20522. mov r11, QWORD PTR [rdx+32]
  20523. pext r12, r12, r9
  20524. mov QWORD PTR [rcx+24], r10
  20525. adc r11, r12
  20526. mov r10, QWORD PTR [r8+40]
  20527. mov r12, QWORD PTR [rdx+40]
  20528. pext r10, r10, r9
  20529. mov QWORD PTR [rcx+32], r11
  20530. adc r12, r10
  20531. mov r11, QWORD PTR [r8+48]
  20532. mov r10, QWORD PTR [rdx+48]
  20533. pext r11, r11, r9
  20534. mov QWORD PTR [rcx+40], r12
  20535. adc r10, r11
  20536. mov r12, QWORD PTR [r8+56]
  20537. mov r11, QWORD PTR [rdx+56]
  20538. pext r12, r12, r9
  20539. mov QWORD PTR [rcx+48], r10
  20540. adc r11, r12
  20541. mov r10, QWORD PTR [r8+64]
  20542. mov r12, QWORD PTR [rdx+64]
  20543. pext r10, r10, r9
  20544. mov QWORD PTR [rcx+56], r11
  20545. adc r12, r10
  20546. mov r11, QWORD PTR [r8+72]
  20547. mov r10, QWORD PTR [rdx+72]
  20548. pext r11, r11, r9
  20549. mov QWORD PTR [rcx+64], r12
  20550. adc r10, r11
  20551. mov r12, QWORD PTR [r8+80]
  20552. mov r11, QWORD PTR [rdx+80]
  20553. pext r12, r12, r9
  20554. mov QWORD PTR [rcx+72], r10
  20555. adc r11, r12
  20556. mov r10, QWORD PTR [r8+88]
  20557. mov r12, QWORD PTR [rdx+88]
  20558. pext r10, r10, r9
  20559. mov QWORD PTR [rcx+80], r11
  20560. adc r12, r10
  20561. mov r11, QWORD PTR [r8+96]
  20562. mov r10, QWORD PTR [rdx+96]
  20563. pext r11, r11, r9
  20564. mov QWORD PTR [rcx+88], r12
  20565. adc r10, r11
  20566. mov r12, QWORD PTR [r8+104]
  20567. mov r11, QWORD PTR [rdx+104]
  20568. pext r12, r12, r9
  20569. mov QWORD PTR [rcx+96], r10
  20570. adc r11, r12
  20571. mov r10, QWORD PTR [r8+112]
  20572. mov r12, QWORD PTR [rdx+112]
  20573. pext r10, r10, r9
  20574. mov QWORD PTR [rcx+104], r11
  20575. adc r12, r10
  20576. mov r11, QWORD PTR [r8+120]
  20577. mov r10, QWORD PTR [rdx+120]
  20578. pext r11, r11, r9
  20579. mov QWORD PTR [rcx+112], r12
  20580. adc r10, r11
  20581. mov QWORD PTR [rcx+120], r10
  20582. adc rax, 0
  20583. pop r12
  20584. ret
  20585. sp_2048_cond_add_avx2_16 ENDP
  20586. _text ENDS
  20587. ENDIF
  20588. ; /* Shift number left by n bit. (r = a << n)
  20589. ; *
  20590. ; * r Result of left shift by n.
  20591. ; * a Number to shift.
  20592. ; * n Amoutnt o shift.
  20593. ; */
  20594. _text SEGMENT READONLY PARA
  20595. sp_2048_lshift_32 PROC
  20596. push r12
  20597. push r13
  20598. mov cl, r8b
  20599. mov rax, rcx
  20600. mov r12, 0
  20601. mov r13, QWORD PTR [rdx+216]
  20602. mov r8, QWORD PTR [rdx+224]
  20603. mov r9, QWORD PTR [rdx+232]
  20604. mov r10, QWORD PTR [rdx+240]
  20605. mov r11, QWORD PTR [rdx+248]
  20606. shld r12, r11, cl
  20607. shld r11, r10, cl
  20608. shld r10, r9, cl
  20609. shld r9, r8, cl
  20610. shld r8, r13, cl
  20611. mov QWORD PTR [rax+224], r8
  20612. mov QWORD PTR [rax+232], r9
  20613. mov QWORD PTR [rax+240], r10
  20614. mov QWORD PTR [rax+248], r11
  20615. mov QWORD PTR [rax+256], r12
  20616. mov r11, QWORD PTR [rdx+184]
  20617. mov r8, QWORD PTR [rdx+192]
  20618. mov r9, QWORD PTR [rdx+200]
  20619. mov r10, QWORD PTR [rdx+208]
  20620. shld r13, r10, cl
  20621. shld r10, r9, cl
  20622. shld r9, r8, cl
  20623. shld r8, r11, cl
  20624. mov QWORD PTR [rax+192], r8
  20625. mov QWORD PTR [rax+200], r9
  20626. mov QWORD PTR [rax+208], r10
  20627. mov QWORD PTR [rax+216], r13
  20628. mov r13, QWORD PTR [rdx+152]
  20629. mov r8, QWORD PTR [rdx+160]
  20630. mov r9, QWORD PTR [rdx+168]
  20631. mov r10, QWORD PTR [rdx+176]
  20632. shld r11, r10, cl
  20633. shld r10, r9, cl
  20634. shld r9, r8, cl
  20635. shld r8, r13, cl
  20636. mov QWORD PTR [rax+160], r8
  20637. mov QWORD PTR [rax+168], r9
  20638. mov QWORD PTR [rax+176], r10
  20639. mov QWORD PTR [rax+184], r11
  20640. mov r11, QWORD PTR [rdx+120]
  20641. mov r8, QWORD PTR [rdx+128]
  20642. mov r9, QWORD PTR [rdx+136]
  20643. mov r10, QWORD PTR [rdx+144]
  20644. shld r13, r10, cl
  20645. shld r10, r9, cl
  20646. shld r9, r8, cl
  20647. shld r8, r11, cl
  20648. mov QWORD PTR [rax+128], r8
  20649. mov QWORD PTR [rax+136], r9
  20650. mov QWORD PTR [rax+144], r10
  20651. mov QWORD PTR [rax+152], r13
  20652. mov r13, QWORD PTR [rdx+88]
  20653. mov r8, QWORD PTR [rdx+96]
  20654. mov r9, QWORD PTR [rdx+104]
  20655. mov r10, QWORD PTR [rdx+112]
  20656. shld r11, r10, cl
  20657. shld r10, r9, cl
  20658. shld r9, r8, cl
  20659. shld r8, r13, cl
  20660. mov QWORD PTR [rax+96], r8
  20661. mov QWORD PTR [rax+104], r9
  20662. mov QWORD PTR [rax+112], r10
  20663. mov QWORD PTR [rax+120], r11
  20664. mov r11, QWORD PTR [rdx+56]
  20665. mov r8, QWORD PTR [rdx+64]
  20666. mov r9, QWORD PTR [rdx+72]
  20667. mov r10, QWORD PTR [rdx+80]
  20668. shld r13, r10, cl
  20669. shld r10, r9, cl
  20670. shld r9, r8, cl
  20671. shld r8, r11, cl
  20672. mov QWORD PTR [rax+64], r8
  20673. mov QWORD PTR [rax+72], r9
  20674. mov QWORD PTR [rax+80], r10
  20675. mov QWORD PTR [rax+88], r13
  20676. mov r13, QWORD PTR [rdx+24]
  20677. mov r8, QWORD PTR [rdx+32]
  20678. mov r9, QWORD PTR [rdx+40]
  20679. mov r10, QWORD PTR [rdx+48]
  20680. shld r11, r10, cl
  20681. shld r10, r9, cl
  20682. shld r9, r8, cl
  20683. shld r8, r13, cl
  20684. mov QWORD PTR [rax+32], r8
  20685. mov QWORD PTR [rax+40], r9
  20686. mov QWORD PTR [rax+48], r10
  20687. mov QWORD PTR [rax+56], r11
  20688. mov r8, QWORD PTR [rdx]
  20689. mov r9, QWORD PTR [rdx+8]
  20690. mov r10, QWORD PTR [rdx+16]
  20691. shld r13, r10, cl
  20692. shld r10, r9, cl
  20693. shld r9, r8, cl
  20694. shl r8, cl
  20695. mov QWORD PTR [rax], r8
  20696. mov QWORD PTR [rax+8], r9
  20697. mov QWORD PTR [rax+16], r10
  20698. mov QWORD PTR [rax+24], r13
  20699. pop r13
  20700. pop r12
  20701. ret
  20702. sp_2048_lshift_32 ENDP
  20703. _text ENDS
  20704. ENDIF
  20705. ENDIF
  20706. IFNDEF WOLFSSL_SP_NO_3072
  20707. IFNDEF WOLFSSL_SP_NO_3072
  20708. ; /* Read big endian unsigned byte array into r.
  20709. ; * Uses the bswap instruction.
  20710. ; *
  20711. ; * r A single precision integer.
  20712. ; * size Maximum number of bytes to convert
  20713. ; * a Byte array.
  20714. ; * n Number of bytes in array to read.
  20715. ; */
  20716. _text SEGMENT READONLY PARA
  20717. sp_3072_from_bin_bswap PROC
  20718. push r12
  20719. push r13
  20720. mov r11, r8
  20721. mov r12, rcx
  20722. add r11, r9
  20723. add r12, 384
  20724. xor r13, r13
  20725. jmp L_3072_from_bin_bswap_64_end
  20726. L_3072_from_bin_bswap_64_start:
  20727. sub r11, 64
  20728. mov rax, QWORD PTR [r11+56]
  20729. mov r10, QWORD PTR [r11+48]
  20730. bswap rax
  20731. bswap r10
  20732. mov QWORD PTR [rcx], rax
  20733. mov QWORD PTR [rcx+8], r10
  20734. mov rax, QWORD PTR [r11+40]
  20735. mov r10, QWORD PTR [r11+32]
  20736. bswap rax
  20737. bswap r10
  20738. mov QWORD PTR [rcx+16], rax
  20739. mov QWORD PTR [rcx+24], r10
  20740. mov rax, QWORD PTR [r11+24]
  20741. mov r10, QWORD PTR [r11+16]
  20742. bswap rax
  20743. bswap r10
  20744. mov QWORD PTR [rcx+32], rax
  20745. mov QWORD PTR [rcx+40], r10
  20746. mov rax, QWORD PTR [r11+8]
  20747. mov r10, QWORD PTR [r11]
  20748. bswap rax
  20749. bswap r10
  20750. mov QWORD PTR [rcx+48], rax
  20751. mov QWORD PTR [rcx+56], r10
  20752. add rcx, 64
  20753. sub r9, 64
  20754. L_3072_from_bin_bswap_64_end:
  20755. cmp r9, 63
  20756. jg L_3072_from_bin_bswap_64_start
  20757. jmp L_3072_from_bin_bswap_8_end
  20758. L_3072_from_bin_bswap_8_start:
  20759. sub r11, 8
  20760. mov rax, QWORD PTR [r11]
  20761. bswap rax
  20762. mov QWORD PTR [rcx], rax
  20763. add rcx, 8
  20764. sub r9, 8
  20765. L_3072_from_bin_bswap_8_end:
  20766. cmp r9, 7
  20767. jg L_3072_from_bin_bswap_8_start
  20768. cmp r9, r13
  20769. je L_3072_from_bin_bswap_hi_end
  20770. mov r10, r13
  20771. mov rax, r13
  20772. L_3072_from_bin_bswap_hi_start:
  20773. mov al, BYTE PTR [r8]
  20774. shl r10, 8
  20775. inc r8
  20776. add r10, rax
  20777. dec r9
  20778. jg L_3072_from_bin_bswap_hi_start
  20779. mov QWORD PTR [rcx], r10
  20780. add rcx, 8
  20781. L_3072_from_bin_bswap_hi_end:
  20782. cmp rcx, r12
  20783. jge L_3072_from_bin_bswap_zero_end
  20784. L_3072_from_bin_bswap_zero_start:
  20785. mov QWORD PTR [rcx], r13
  20786. add rcx, 8
  20787. cmp rcx, r12
  20788. jl L_3072_from_bin_bswap_zero_start
  20789. L_3072_from_bin_bswap_zero_end:
  20790. pop r13
  20791. pop r12
  20792. ret
  20793. sp_3072_from_bin_bswap ENDP
  20794. _text ENDS
  20795. IFNDEF NO_MOVBE_SUPPORT
  20796. ; /* Read big endian unsigned byte array into r.
  20797. ; * Uses the movbe instruction which is an optional instruction.
  20798. ; *
  20799. ; * r A single precision integer.
  20800. ; * size Maximum number of bytes to convert
  20801. ; * a Byte array.
  20802. ; * n Number of bytes in array to read.
  20803. ; */
  20804. _text SEGMENT READONLY PARA
  20805. sp_3072_from_bin_movbe PROC
  20806. push r12
  20807. mov r11, r8
  20808. mov r12, rcx
  20809. add r11, r9
  20810. add r12, 384
  20811. jmp L_3072_from_bin_movbe_64_end
  20812. L_3072_from_bin_movbe_64_start:
  20813. sub r11, 64
  20814. movbe rax, QWORD PTR [r11+56]
  20815. movbe r10, QWORD PTR [r11+48]
  20816. mov QWORD PTR [rcx], rax
  20817. mov QWORD PTR [rcx+8], r10
  20818. movbe rax, QWORD PTR [r11+40]
  20819. movbe r10, QWORD PTR [r11+32]
  20820. mov QWORD PTR [rcx+16], rax
  20821. mov QWORD PTR [rcx+24], r10
  20822. movbe rax, QWORD PTR [r11+24]
  20823. movbe r10, QWORD PTR [r11+16]
  20824. mov QWORD PTR [rcx+32], rax
  20825. mov QWORD PTR [rcx+40], r10
  20826. movbe rax, QWORD PTR [r11+8]
  20827. movbe r10, QWORD PTR [r11]
  20828. mov QWORD PTR [rcx+48], rax
  20829. mov QWORD PTR [rcx+56], r10
  20830. add rcx, 64
  20831. sub r9, 64
  20832. L_3072_from_bin_movbe_64_end:
  20833. cmp r9, 63
  20834. jg L_3072_from_bin_movbe_64_start
  20835. jmp L_3072_from_bin_movbe_8_end
  20836. L_3072_from_bin_movbe_8_start:
  20837. sub r11, 8
  20838. movbe rax, QWORD PTR [r11]
  20839. mov QWORD PTR [rcx], rax
  20840. add rcx, 8
  20841. sub r9, 8
  20842. L_3072_from_bin_movbe_8_end:
  20843. cmp r9, 7
  20844. jg L_3072_from_bin_movbe_8_start
  20845. cmp r9, 0
  20846. je L_3072_from_bin_movbe_hi_end
  20847. mov r10, 0
  20848. mov rax, 0
  20849. L_3072_from_bin_movbe_hi_start:
  20850. mov al, BYTE PTR [r8]
  20851. shl r10, 8
  20852. inc r8
  20853. add r10, rax
  20854. dec r9
  20855. jg L_3072_from_bin_movbe_hi_start
  20856. mov QWORD PTR [rcx], r10
  20857. add rcx, 8
  20858. L_3072_from_bin_movbe_hi_end:
  20859. cmp rcx, r12
  20860. jge L_3072_from_bin_movbe_zero_end
  20861. L_3072_from_bin_movbe_zero_start:
  20862. mov QWORD PTR [rcx], 0
  20863. add rcx, 8
  20864. cmp rcx, r12
  20865. jl L_3072_from_bin_movbe_zero_start
  20866. L_3072_from_bin_movbe_zero_end:
  20867. pop r12
  20868. ret
  20869. sp_3072_from_bin_movbe ENDP
  20870. _text ENDS
  20871. ENDIF
  20872. ; /* Write r as big endian to byte array.
  20873. ; * Fixed length number of bytes written: 384
  20874. ; * Uses the bswap instruction.
  20875. ; *
  20876. ; * r A single precision integer.
  20877. ; * a Byte array.
  20878. ; */
  20879. _text SEGMENT READONLY PARA
  20880. sp_3072_to_bin_bswap_48 PROC
  20881. mov rax, QWORD PTR [rcx+376]
  20882. mov r8, QWORD PTR [rcx+368]
  20883. bswap rax
  20884. bswap r8
  20885. mov QWORD PTR [rdx], rax
  20886. mov QWORD PTR [rdx+8], r8
  20887. mov rax, QWORD PTR [rcx+360]
  20888. mov r8, QWORD PTR [rcx+352]
  20889. bswap rax
  20890. bswap r8
  20891. mov QWORD PTR [rdx+16], rax
  20892. mov QWORD PTR [rdx+24], r8
  20893. mov rax, QWORD PTR [rcx+344]
  20894. mov r8, QWORD PTR [rcx+336]
  20895. bswap rax
  20896. bswap r8
  20897. mov QWORD PTR [rdx+32], rax
  20898. mov QWORD PTR [rdx+40], r8
  20899. mov rax, QWORD PTR [rcx+328]
  20900. mov r8, QWORD PTR [rcx+320]
  20901. bswap rax
  20902. bswap r8
  20903. mov QWORD PTR [rdx+48], rax
  20904. mov QWORD PTR [rdx+56], r8
  20905. mov rax, QWORD PTR [rcx+312]
  20906. mov r8, QWORD PTR [rcx+304]
  20907. bswap rax
  20908. bswap r8
  20909. mov QWORD PTR [rdx+64], rax
  20910. mov QWORD PTR [rdx+72], r8
  20911. mov rax, QWORD PTR [rcx+296]
  20912. mov r8, QWORD PTR [rcx+288]
  20913. bswap rax
  20914. bswap r8
  20915. mov QWORD PTR [rdx+80], rax
  20916. mov QWORD PTR [rdx+88], r8
  20917. mov rax, QWORD PTR [rcx+280]
  20918. mov r8, QWORD PTR [rcx+272]
  20919. bswap rax
  20920. bswap r8
  20921. mov QWORD PTR [rdx+96], rax
  20922. mov QWORD PTR [rdx+104], r8
  20923. mov rax, QWORD PTR [rcx+264]
  20924. mov r8, QWORD PTR [rcx+256]
  20925. bswap rax
  20926. bswap r8
  20927. mov QWORD PTR [rdx+112], rax
  20928. mov QWORD PTR [rdx+120], r8
  20929. mov rax, QWORD PTR [rcx+248]
  20930. mov r8, QWORD PTR [rcx+240]
  20931. bswap rax
  20932. bswap r8
  20933. mov QWORD PTR [rdx+128], rax
  20934. mov QWORD PTR [rdx+136], r8
  20935. mov rax, QWORD PTR [rcx+232]
  20936. mov r8, QWORD PTR [rcx+224]
  20937. bswap rax
  20938. bswap r8
  20939. mov QWORD PTR [rdx+144], rax
  20940. mov QWORD PTR [rdx+152], r8
  20941. mov rax, QWORD PTR [rcx+216]
  20942. mov r8, QWORD PTR [rcx+208]
  20943. bswap rax
  20944. bswap r8
  20945. mov QWORD PTR [rdx+160], rax
  20946. mov QWORD PTR [rdx+168], r8
  20947. mov rax, QWORD PTR [rcx+200]
  20948. mov r8, QWORD PTR [rcx+192]
  20949. bswap rax
  20950. bswap r8
  20951. mov QWORD PTR [rdx+176], rax
  20952. mov QWORD PTR [rdx+184], r8
  20953. mov rax, QWORD PTR [rcx+184]
  20954. mov r8, QWORD PTR [rcx+176]
  20955. bswap rax
  20956. bswap r8
  20957. mov QWORD PTR [rdx+192], rax
  20958. mov QWORD PTR [rdx+200], r8
  20959. mov rax, QWORD PTR [rcx+168]
  20960. mov r8, QWORD PTR [rcx+160]
  20961. bswap rax
  20962. bswap r8
  20963. mov QWORD PTR [rdx+208], rax
  20964. mov QWORD PTR [rdx+216], r8
  20965. mov rax, QWORD PTR [rcx+152]
  20966. mov r8, QWORD PTR [rcx+144]
  20967. bswap rax
  20968. bswap r8
  20969. mov QWORD PTR [rdx+224], rax
  20970. mov QWORD PTR [rdx+232], r8
  20971. mov rax, QWORD PTR [rcx+136]
  20972. mov r8, QWORD PTR [rcx+128]
  20973. bswap rax
  20974. bswap r8
  20975. mov QWORD PTR [rdx+240], rax
  20976. mov QWORD PTR [rdx+248], r8
  20977. mov rax, QWORD PTR [rcx+120]
  20978. mov r8, QWORD PTR [rcx+112]
  20979. bswap rax
  20980. bswap r8
  20981. mov QWORD PTR [rdx+256], rax
  20982. mov QWORD PTR [rdx+264], r8
  20983. mov rax, QWORD PTR [rcx+104]
  20984. mov r8, QWORD PTR [rcx+96]
  20985. bswap rax
  20986. bswap r8
  20987. mov QWORD PTR [rdx+272], rax
  20988. mov QWORD PTR [rdx+280], r8
  20989. mov rax, QWORD PTR [rcx+88]
  20990. mov r8, QWORD PTR [rcx+80]
  20991. bswap rax
  20992. bswap r8
  20993. mov QWORD PTR [rdx+288], rax
  20994. mov QWORD PTR [rdx+296], r8
  20995. mov rax, QWORD PTR [rcx+72]
  20996. mov r8, QWORD PTR [rcx+64]
  20997. bswap rax
  20998. bswap r8
  20999. mov QWORD PTR [rdx+304], rax
  21000. mov QWORD PTR [rdx+312], r8
  21001. mov rax, QWORD PTR [rcx+56]
  21002. mov r8, QWORD PTR [rcx+48]
  21003. bswap rax
  21004. bswap r8
  21005. mov QWORD PTR [rdx+320], rax
  21006. mov QWORD PTR [rdx+328], r8
  21007. mov rax, QWORD PTR [rcx+40]
  21008. mov r8, QWORD PTR [rcx+32]
  21009. bswap rax
  21010. bswap r8
  21011. mov QWORD PTR [rdx+336], rax
  21012. mov QWORD PTR [rdx+344], r8
  21013. mov rax, QWORD PTR [rcx+24]
  21014. mov r8, QWORD PTR [rcx+16]
  21015. bswap rax
  21016. bswap r8
  21017. mov QWORD PTR [rdx+352], rax
  21018. mov QWORD PTR [rdx+360], r8
  21019. mov rax, QWORD PTR [rcx+8]
  21020. mov r8, QWORD PTR [rcx]
  21021. bswap rax
  21022. bswap r8
  21023. mov QWORD PTR [rdx+368], rax
  21024. mov QWORD PTR [rdx+376], r8
  21025. ret
  21026. sp_3072_to_bin_bswap_48 ENDP
  21027. _text ENDS
  21028. IFNDEF NO_MOVBE_SUPPORT
  21029. ; /* Write r as big endian to byte array.
  21030. ; * Fixed length number of bytes written: 384
  21031. ; * Uses the movbe instruction which is optional.
  21032. ; *
  21033. ; * r A single precision integer.
  21034. ; * a Byte array.
  21035. ; */
  21036. _text SEGMENT READONLY PARA
  21037. sp_3072_to_bin_movbe_48 PROC
  21038. movbe rax, QWORD PTR [rcx+376]
  21039. movbe r8, QWORD PTR [rcx+368]
  21040. mov QWORD PTR [rdx], rax
  21041. mov QWORD PTR [rdx+8], r8
  21042. movbe rax, QWORD PTR [rcx+360]
  21043. movbe r8, QWORD PTR [rcx+352]
  21044. mov QWORD PTR [rdx+16], rax
  21045. mov QWORD PTR [rdx+24], r8
  21046. movbe rax, QWORD PTR [rcx+344]
  21047. movbe r8, QWORD PTR [rcx+336]
  21048. mov QWORD PTR [rdx+32], rax
  21049. mov QWORD PTR [rdx+40], r8
  21050. movbe rax, QWORD PTR [rcx+328]
  21051. movbe r8, QWORD PTR [rcx+320]
  21052. mov QWORD PTR [rdx+48], rax
  21053. mov QWORD PTR [rdx+56], r8
  21054. movbe rax, QWORD PTR [rcx+312]
  21055. movbe r8, QWORD PTR [rcx+304]
  21056. mov QWORD PTR [rdx+64], rax
  21057. mov QWORD PTR [rdx+72], r8
  21058. movbe rax, QWORD PTR [rcx+296]
  21059. movbe r8, QWORD PTR [rcx+288]
  21060. mov QWORD PTR [rdx+80], rax
  21061. mov QWORD PTR [rdx+88], r8
  21062. movbe rax, QWORD PTR [rcx+280]
  21063. movbe r8, QWORD PTR [rcx+272]
  21064. mov QWORD PTR [rdx+96], rax
  21065. mov QWORD PTR [rdx+104], r8
  21066. movbe rax, QWORD PTR [rcx+264]
  21067. movbe r8, QWORD PTR [rcx+256]
  21068. mov QWORD PTR [rdx+112], rax
  21069. mov QWORD PTR [rdx+120], r8
  21070. movbe rax, QWORD PTR [rcx+248]
  21071. movbe r8, QWORD PTR [rcx+240]
  21072. mov QWORD PTR [rdx+128], rax
  21073. mov QWORD PTR [rdx+136], r8
  21074. movbe rax, QWORD PTR [rcx+232]
  21075. movbe r8, QWORD PTR [rcx+224]
  21076. mov QWORD PTR [rdx+144], rax
  21077. mov QWORD PTR [rdx+152], r8
  21078. movbe rax, QWORD PTR [rcx+216]
  21079. movbe r8, QWORD PTR [rcx+208]
  21080. mov QWORD PTR [rdx+160], rax
  21081. mov QWORD PTR [rdx+168], r8
  21082. movbe rax, QWORD PTR [rcx+200]
  21083. movbe r8, QWORD PTR [rcx+192]
  21084. mov QWORD PTR [rdx+176], rax
  21085. mov QWORD PTR [rdx+184], r8
  21086. movbe rax, QWORD PTR [rcx+184]
  21087. movbe r8, QWORD PTR [rcx+176]
  21088. mov QWORD PTR [rdx+192], rax
  21089. mov QWORD PTR [rdx+200], r8
  21090. movbe rax, QWORD PTR [rcx+168]
  21091. movbe r8, QWORD PTR [rcx+160]
  21092. mov QWORD PTR [rdx+208], rax
  21093. mov QWORD PTR [rdx+216], r8
  21094. movbe rax, QWORD PTR [rcx+152]
  21095. movbe r8, QWORD PTR [rcx+144]
  21096. mov QWORD PTR [rdx+224], rax
  21097. mov QWORD PTR [rdx+232], r8
  21098. movbe rax, QWORD PTR [rcx+136]
  21099. movbe r8, QWORD PTR [rcx+128]
  21100. mov QWORD PTR [rdx+240], rax
  21101. mov QWORD PTR [rdx+248], r8
  21102. movbe rax, QWORD PTR [rcx+120]
  21103. movbe r8, QWORD PTR [rcx+112]
  21104. mov QWORD PTR [rdx+256], rax
  21105. mov QWORD PTR [rdx+264], r8
  21106. movbe rax, QWORD PTR [rcx+104]
  21107. movbe r8, QWORD PTR [rcx+96]
  21108. mov QWORD PTR [rdx+272], rax
  21109. mov QWORD PTR [rdx+280], r8
  21110. movbe rax, QWORD PTR [rcx+88]
  21111. movbe r8, QWORD PTR [rcx+80]
  21112. mov QWORD PTR [rdx+288], rax
  21113. mov QWORD PTR [rdx+296], r8
  21114. movbe rax, QWORD PTR [rcx+72]
  21115. movbe r8, QWORD PTR [rcx+64]
  21116. mov QWORD PTR [rdx+304], rax
  21117. mov QWORD PTR [rdx+312], r8
  21118. movbe rax, QWORD PTR [rcx+56]
  21119. movbe r8, QWORD PTR [rcx+48]
  21120. mov QWORD PTR [rdx+320], rax
  21121. mov QWORD PTR [rdx+328], r8
  21122. movbe rax, QWORD PTR [rcx+40]
  21123. movbe r8, QWORD PTR [rcx+32]
  21124. mov QWORD PTR [rdx+336], rax
  21125. mov QWORD PTR [rdx+344], r8
  21126. movbe rax, QWORD PTR [rcx+24]
  21127. movbe r8, QWORD PTR [rcx+16]
  21128. mov QWORD PTR [rdx+352], rax
  21129. mov QWORD PTR [rdx+360], r8
  21130. movbe rax, QWORD PTR [rcx+8]
  21131. movbe r8, QWORD PTR [rcx]
  21132. mov QWORD PTR [rdx+368], rax
  21133. mov QWORD PTR [rdx+376], r8
  21134. ret
  21135. sp_3072_to_bin_movbe_48 ENDP
  21136. _text ENDS
  21137. ENDIF
  21138. ; /* Multiply a and b into r. (r = a * b)
  21139. ; *
  21140. ; * r A single precision integer.
  21141. ; * a A single precision integer.
  21142. ; * b A single precision integer.
  21143. ; */
  21144. _text SEGMENT READONLY PARA
  21145. sp_3072_mul_12 PROC
  21146. push r12
  21147. mov r9, rdx
  21148. sub rsp, 96
  21149. ; A[0] * B[0]
  21150. mov rax, QWORD PTR [r8]
  21151. mul QWORD PTR [r9]
  21152. xor r12, r12
  21153. mov QWORD PTR [rsp], rax
  21154. mov r11, rdx
  21155. ; A[0] * B[1]
  21156. mov rax, QWORD PTR [r8+8]
  21157. mul QWORD PTR [r9]
  21158. xor r10, r10
  21159. add r11, rax
  21160. adc r12, rdx
  21161. adc r10, 0
  21162. ; A[1] * B[0]
  21163. mov rax, QWORD PTR [r8]
  21164. mul QWORD PTR [r9+8]
  21165. add r11, rax
  21166. adc r12, rdx
  21167. adc r10, 0
  21168. mov QWORD PTR [rsp+8], r11
  21169. ; A[0] * B[2]
  21170. mov rax, QWORD PTR [r8+16]
  21171. mul QWORD PTR [r9]
  21172. xor r11, r11
  21173. add r12, rax
  21174. adc r10, rdx
  21175. adc r11, 0
  21176. ; A[1] * B[1]
  21177. mov rax, QWORD PTR [r8+8]
  21178. mul QWORD PTR [r9+8]
  21179. add r12, rax
  21180. adc r10, rdx
  21181. adc r11, 0
  21182. ; A[2] * B[0]
  21183. mov rax, QWORD PTR [r8]
  21184. mul QWORD PTR [r9+16]
  21185. add r12, rax
  21186. adc r10, rdx
  21187. adc r11, 0
  21188. mov QWORD PTR [rsp+16], r12
  21189. ; A[0] * B[3]
  21190. mov rax, QWORD PTR [r8+24]
  21191. mul QWORD PTR [r9]
  21192. xor r12, r12
  21193. add r10, rax
  21194. adc r11, rdx
  21195. adc r12, 0
  21196. ; A[1] * B[2]
  21197. mov rax, QWORD PTR [r8+16]
  21198. mul QWORD PTR [r9+8]
  21199. add r10, rax
  21200. adc r11, rdx
  21201. adc r12, 0
  21202. ; A[2] * B[1]
  21203. mov rax, QWORD PTR [r8+8]
  21204. mul QWORD PTR [r9+16]
  21205. add r10, rax
  21206. adc r11, rdx
  21207. adc r12, 0
  21208. ; A[3] * B[0]
  21209. mov rax, QWORD PTR [r8]
  21210. mul QWORD PTR [r9+24]
  21211. add r10, rax
  21212. adc r11, rdx
  21213. adc r12, 0
  21214. mov QWORD PTR [rsp+24], r10
  21215. ; A[0] * B[4]
  21216. mov rax, QWORD PTR [r8+32]
  21217. mul QWORD PTR [r9]
  21218. xor r10, r10
  21219. add r11, rax
  21220. adc r12, rdx
  21221. adc r10, 0
  21222. ; A[1] * B[3]
  21223. mov rax, QWORD PTR [r8+24]
  21224. mul QWORD PTR [r9+8]
  21225. add r11, rax
  21226. adc r12, rdx
  21227. adc r10, 0
  21228. ; A[2] * B[2]
  21229. mov rax, QWORD PTR [r8+16]
  21230. mul QWORD PTR [r9+16]
  21231. add r11, rax
  21232. adc r12, rdx
  21233. adc r10, 0
  21234. ; A[3] * B[1]
  21235. mov rax, QWORD PTR [r8+8]
  21236. mul QWORD PTR [r9+24]
  21237. add r11, rax
  21238. adc r12, rdx
  21239. adc r10, 0
  21240. ; A[4] * B[0]
  21241. mov rax, QWORD PTR [r8]
  21242. mul QWORD PTR [r9+32]
  21243. add r11, rax
  21244. adc r12, rdx
  21245. adc r10, 0
  21246. mov QWORD PTR [rsp+32], r11
  21247. ; A[0] * B[5]
  21248. mov rax, QWORD PTR [r8+40]
  21249. mul QWORD PTR [r9]
  21250. xor r11, r11
  21251. add r12, rax
  21252. adc r10, rdx
  21253. adc r11, 0
  21254. ; A[1] * B[4]
  21255. mov rax, QWORD PTR [r8+32]
  21256. mul QWORD PTR [r9+8]
  21257. add r12, rax
  21258. adc r10, rdx
  21259. adc r11, 0
  21260. ; A[2] * B[3]
  21261. mov rax, QWORD PTR [r8+24]
  21262. mul QWORD PTR [r9+16]
  21263. add r12, rax
  21264. adc r10, rdx
  21265. adc r11, 0
  21266. ; A[3] * B[2]
  21267. mov rax, QWORD PTR [r8+16]
  21268. mul QWORD PTR [r9+24]
  21269. add r12, rax
  21270. adc r10, rdx
  21271. adc r11, 0
  21272. ; A[4] * B[1]
  21273. mov rax, QWORD PTR [r8+8]
  21274. mul QWORD PTR [r9+32]
  21275. add r12, rax
  21276. adc r10, rdx
  21277. adc r11, 0
  21278. ; A[5] * B[0]
  21279. mov rax, QWORD PTR [r8]
  21280. mul QWORD PTR [r9+40]
  21281. add r12, rax
  21282. adc r10, rdx
  21283. adc r11, 0
  21284. mov QWORD PTR [rsp+40], r12
  21285. ; A[0] * B[6]
  21286. mov rax, QWORD PTR [r8+48]
  21287. mul QWORD PTR [r9]
  21288. xor r12, r12
  21289. add r10, rax
  21290. adc r11, rdx
  21291. adc r12, 0
  21292. ; A[1] * B[5]
  21293. mov rax, QWORD PTR [r8+40]
  21294. mul QWORD PTR [r9+8]
  21295. add r10, rax
  21296. adc r11, rdx
  21297. adc r12, 0
  21298. ; A[2] * B[4]
  21299. mov rax, QWORD PTR [r8+32]
  21300. mul QWORD PTR [r9+16]
  21301. add r10, rax
  21302. adc r11, rdx
  21303. adc r12, 0
  21304. ; A[3] * B[3]
  21305. mov rax, QWORD PTR [r8+24]
  21306. mul QWORD PTR [r9+24]
  21307. add r10, rax
  21308. adc r11, rdx
  21309. adc r12, 0
  21310. ; A[4] * B[2]
  21311. mov rax, QWORD PTR [r8+16]
  21312. mul QWORD PTR [r9+32]
  21313. add r10, rax
  21314. adc r11, rdx
  21315. adc r12, 0
  21316. ; A[5] * B[1]
  21317. mov rax, QWORD PTR [r8+8]
  21318. mul QWORD PTR [r9+40]
  21319. add r10, rax
  21320. adc r11, rdx
  21321. adc r12, 0
  21322. ; A[6] * B[0]
  21323. mov rax, QWORD PTR [r8]
  21324. mul QWORD PTR [r9+48]
  21325. add r10, rax
  21326. adc r11, rdx
  21327. adc r12, 0
  21328. mov QWORD PTR [rsp+48], r10
  21329. ; A[0] * B[7]
  21330. mov rax, QWORD PTR [r8+56]
  21331. mul QWORD PTR [r9]
  21332. xor r10, r10
  21333. add r11, rax
  21334. adc r12, rdx
  21335. adc r10, 0
  21336. ; A[1] * B[6]
  21337. mov rax, QWORD PTR [r8+48]
  21338. mul QWORD PTR [r9+8]
  21339. add r11, rax
  21340. adc r12, rdx
  21341. adc r10, 0
  21342. ; A[2] * B[5]
  21343. mov rax, QWORD PTR [r8+40]
  21344. mul QWORD PTR [r9+16]
  21345. add r11, rax
  21346. adc r12, rdx
  21347. adc r10, 0
  21348. ; A[3] * B[4]
  21349. mov rax, QWORD PTR [r8+32]
  21350. mul QWORD PTR [r9+24]
  21351. add r11, rax
  21352. adc r12, rdx
  21353. adc r10, 0
  21354. ; A[4] * B[3]
  21355. mov rax, QWORD PTR [r8+24]
  21356. mul QWORD PTR [r9+32]
  21357. add r11, rax
  21358. adc r12, rdx
  21359. adc r10, 0
  21360. ; A[5] * B[2]
  21361. mov rax, QWORD PTR [r8+16]
  21362. mul QWORD PTR [r9+40]
  21363. add r11, rax
  21364. adc r12, rdx
  21365. adc r10, 0
  21366. ; A[6] * B[1]
  21367. mov rax, QWORD PTR [r8+8]
  21368. mul QWORD PTR [r9+48]
  21369. add r11, rax
  21370. adc r12, rdx
  21371. adc r10, 0
  21372. ; A[7] * B[0]
  21373. mov rax, QWORD PTR [r8]
  21374. mul QWORD PTR [r9+56]
  21375. add r11, rax
  21376. adc r12, rdx
  21377. adc r10, 0
  21378. mov QWORD PTR [rsp+56], r11
  21379. ; A[0] * B[8]
  21380. mov rax, QWORD PTR [r8+64]
  21381. mul QWORD PTR [r9]
  21382. xor r11, r11
  21383. add r12, rax
  21384. adc r10, rdx
  21385. adc r11, 0
  21386. ; A[1] * B[7]
  21387. mov rax, QWORD PTR [r8+56]
  21388. mul QWORD PTR [r9+8]
  21389. add r12, rax
  21390. adc r10, rdx
  21391. adc r11, 0
  21392. ; A[2] * B[6]
  21393. mov rax, QWORD PTR [r8+48]
  21394. mul QWORD PTR [r9+16]
  21395. add r12, rax
  21396. adc r10, rdx
  21397. adc r11, 0
  21398. ; A[3] * B[5]
  21399. mov rax, QWORD PTR [r8+40]
  21400. mul QWORD PTR [r9+24]
  21401. add r12, rax
  21402. adc r10, rdx
  21403. adc r11, 0
  21404. ; A[4] * B[4]
  21405. mov rax, QWORD PTR [r8+32]
  21406. mul QWORD PTR [r9+32]
  21407. add r12, rax
  21408. adc r10, rdx
  21409. adc r11, 0
  21410. ; A[5] * B[3]
  21411. mov rax, QWORD PTR [r8+24]
  21412. mul QWORD PTR [r9+40]
  21413. add r12, rax
  21414. adc r10, rdx
  21415. adc r11, 0
  21416. ; A[6] * B[2]
  21417. mov rax, QWORD PTR [r8+16]
  21418. mul QWORD PTR [r9+48]
  21419. add r12, rax
  21420. adc r10, rdx
  21421. adc r11, 0
  21422. ; A[7] * B[1]
  21423. mov rax, QWORD PTR [r8+8]
  21424. mul QWORD PTR [r9+56]
  21425. add r12, rax
  21426. adc r10, rdx
  21427. adc r11, 0
  21428. ; A[8] * B[0]
  21429. mov rax, QWORD PTR [r8]
  21430. mul QWORD PTR [r9+64]
  21431. add r12, rax
  21432. adc r10, rdx
  21433. adc r11, 0
  21434. mov QWORD PTR [rsp+64], r12
  21435. ; A[0] * B[9]
  21436. mov rax, QWORD PTR [r8+72]
  21437. mul QWORD PTR [r9]
  21438. xor r12, r12
  21439. add r10, rax
  21440. adc r11, rdx
  21441. adc r12, 0
  21442. ; A[1] * B[8]
  21443. mov rax, QWORD PTR [r8+64]
  21444. mul QWORD PTR [r9+8]
  21445. add r10, rax
  21446. adc r11, rdx
  21447. adc r12, 0
  21448. ; A[2] * B[7]
  21449. mov rax, QWORD PTR [r8+56]
  21450. mul QWORD PTR [r9+16]
  21451. add r10, rax
  21452. adc r11, rdx
  21453. adc r12, 0
  21454. ; A[3] * B[6]
  21455. mov rax, QWORD PTR [r8+48]
  21456. mul QWORD PTR [r9+24]
  21457. add r10, rax
  21458. adc r11, rdx
  21459. adc r12, 0
  21460. ; A[4] * B[5]
  21461. mov rax, QWORD PTR [r8+40]
  21462. mul QWORD PTR [r9+32]
  21463. add r10, rax
  21464. adc r11, rdx
  21465. adc r12, 0
  21466. ; A[5] * B[4]
  21467. mov rax, QWORD PTR [r8+32]
  21468. mul QWORD PTR [r9+40]
  21469. add r10, rax
  21470. adc r11, rdx
  21471. adc r12, 0
  21472. ; A[6] * B[3]
  21473. mov rax, QWORD PTR [r8+24]
  21474. mul QWORD PTR [r9+48]
  21475. add r10, rax
  21476. adc r11, rdx
  21477. adc r12, 0
  21478. ; A[7] * B[2]
  21479. mov rax, QWORD PTR [r8+16]
  21480. mul QWORD PTR [r9+56]
  21481. add r10, rax
  21482. adc r11, rdx
  21483. adc r12, 0
  21484. ; A[8] * B[1]
  21485. mov rax, QWORD PTR [r8+8]
  21486. mul QWORD PTR [r9+64]
  21487. add r10, rax
  21488. adc r11, rdx
  21489. adc r12, 0
  21490. ; A[9] * B[0]
  21491. mov rax, QWORD PTR [r8]
  21492. mul QWORD PTR [r9+72]
  21493. add r10, rax
  21494. adc r11, rdx
  21495. adc r12, 0
  21496. mov QWORD PTR [rsp+72], r10
  21497. ; A[0] * B[10]
  21498. mov rax, QWORD PTR [r8+80]
  21499. mul QWORD PTR [r9]
  21500. xor r10, r10
  21501. add r11, rax
  21502. adc r12, rdx
  21503. adc r10, 0
  21504. ; A[1] * B[9]
  21505. mov rax, QWORD PTR [r8+72]
  21506. mul QWORD PTR [r9+8]
  21507. add r11, rax
  21508. adc r12, rdx
  21509. adc r10, 0
  21510. ; A[2] * B[8]
  21511. mov rax, QWORD PTR [r8+64]
  21512. mul QWORD PTR [r9+16]
  21513. add r11, rax
  21514. adc r12, rdx
  21515. adc r10, 0
  21516. ; A[3] * B[7]
  21517. mov rax, QWORD PTR [r8+56]
  21518. mul QWORD PTR [r9+24]
  21519. add r11, rax
  21520. adc r12, rdx
  21521. adc r10, 0
  21522. ; A[4] * B[6]
  21523. mov rax, QWORD PTR [r8+48]
  21524. mul QWORD PTR [r9+32]
  21525. add r11, rax
  21526. adc r12, rdx
  21527. adc r10, 0
  21528. ; A[5] * B[5]
  21529. mov rax, QWORD PTR [r8+40]
  21530. mul QWORD PTR [r9+40]
  21531. add r11, rax
  21532. adc r12, rdx
  21533. adc r10, 0
  21534. ; A[6] * B[4]
  21535. mov rax, QWORD PTR [r8+32]
  21536. mul QWORD PTR [r9+48]
  21537. add r11, rax
  21538. adc r12, rdx
  21539. adc r10, 0
  21540. ; A[7] * B[3]
  21541. mov rax, QWORD PTR [r8+24]
  21542. mul QWORD PTR [r9+56]
  21543. add r11, rax
  21544. adc r12, rdx
  21545. adc r10, 0
  21546. ; A[8] * B[2]
  21547. mov rax, QWORD PTR [r8+16]
  21548. mul QWORD PTR [r9+64]
  21549. add r11, rax
  21550. adc r12, rdx
  21551. adc r10, 0
  21552. ; A[9] * B[1]
  21553. mov rax, QWORD PTR [r8+8]
  21554. mul QWORD PTR [r9+72]
  21555. add r11, rax
  21556. adc r12, rdx
  21557. adc r10, 0
  21558. ; A[10] * B[0]
  21559. mov rax, QWORD PTR [r8]
  21560. mul QWORD PTR [r9+80]
  21561. add r11, rax
  21562. adc r12, rdx
  21563. adc r10, 0
  21564. mov QWORD PTR [rsp+80], r11
  21565. ; A[0] * B[11]
  21566. mov rax, QWORD PTR [r8+88]
  21567. mul QWORD PTR [r9]
  21568. xor r11, r11
  21569. add r12, rax
  21570. adc r10, rdx
  21571. adc r11, 0
  21572. ; A[1] * B[10]
  21573. mov rax, QWORD PTR [r8+80]
  21574. mul QWORD PTR [r9+8]
  21575. add r12, rax
  21576. adc r10, rdx
  21577. adc r11, 0
  21578. ; A[2] * B[9]
  21579. mov rax, QWORD PTR [r8+72]
  21580. mul QWORD PTR [r9+16]
  21581. add r12, rax
  21582. adc r10, rdx
  21583. adc r11, 0
  21584. ; A[3] * B[8]
  21585. mov rax, QWORD PTR [r8+64]
  21586. mul QWORD PTR [r9+24]
  21587. add r12, rax
  21588. adc r10, rdx
  21589. adc r11, 0
  21590. ; A[4] * B[7]
  21591. mov rax, QWORD PTR [r8+56]
  21592. mul QWORD PTR [r9+32]
  21593. add r12, rax
  21594. adc r10, rdx
  21595. adc r11, 0
  21596. ; A[5] * B[6]
  21597. mov rax, QWORD PTR [r8+48]
  21598. mul QWORD PTR [r9+40]
  21599. add r12, rax
  21600. adc r10, rdx
  21601. adc r11, 0
  21602. ; A[6] * B[5]
  21603. mov rax, QWORD PTR [r8+40]
  21604. mul QWORD PTR [r9+48]
  21605. add r12, rax
  21606. adc r10, rdx
  21607. adc r11, 0
  21608. ; A[7] * B[4]
  21609. mov rax, QWORD PTR [r8+32]
  21610. mul QWORD PTR [r9+56]
  21611. add r12, rax
  21612. adc r10, rdx
  21613. adc r11, 0
  21614. ; A[8] * B[3]
  21615. mov rax, QWORD PTR [r8+24]
  21616. mul QWORD PTR [r9+64]
  21617. add r12, rax
  21618. adc r10, rdx
  21619. adc r11, 0
  21620. ; A[9] * B[2]
  21621. mov rax, QWORD PTR [r8+16]
  21622. mul QWORD PTR [r9+72]
  21623. add r12, rax
  21624. adc r10, rdx
  21625. adc r11, 0
  21626. ; A[10] * B[1]
  21627. mov rax, QWORD PTR [r8+8]
  21628. mul QWORD PTR [r9+80]
  21629. add r12, rax
  21630. adc r10, rdx
  21631. adc r11, 0
  21632. ; A[11] * B[0]
  21633. mov rax, QWORD PTR [r8]
  21634. mul QWORD PTR [r9+88]
  21635. add r12, rax
  21636. adc r10, rdx
  21637. adc r11, 0
  21638. mov QWORD PTR [rsp+88], r12
  21639. ; A[1] * B[11]
  21640. mov rax, QWORD PTR [r8+88]
  21641. mul QWORD PTR [r9+8]
  21642. xor r12, r12
  21643. add r10, rax
  21644. adc r11, rdx
  21645. adc r12, 0
  21646. ; A[2] * B[10]
  21647. mov rax, QWORD PTR [r8+80]
  21648. mul QWORD PTR [r9+16]
  21649. add r10, rax
  21650. adc r11, rdx
  21651. adc r12, 0
  21652. ; A[3] * B[9]
  21653. mov rax, QWORD PTR [r8+72]
  21654. mul QWORD PTR [r9+24]
  21655. add r10, rax
  21656. adc r11, rdx
  21657. adc r12, 0
  21658. ; A[4] * B[8]
  21659. mov rax, QWORD PTR [r8+64]
  21660. mul QWORD PTR [r9+32]
  21661. add r10, rax
  21662. adc r11, rdx
  21663. adc r12, 0
  21664. ; A[5] * B[7]
  21665. mov rax, QWORD PTR [r8+56]
  21666. mul QWORD PTR [r9+40]
  21667. add r10, rax
  21668. adc r11, rdx
  21669. adc r12, 0
  21670. ; A[6] * B[6]
  21671. mov rax, QWORD PTR [r8+48]
  21672. mul QWORD PTR [r9+48]
  21673. add r10, rax
  21674. adc r11, rdx
  21675. adc r12, 0
  21676. ; A[7] * B[5]
  21677. mov rax, QWORD PTR [r8+40]
  21678. mul QWORD PTR [r9+56]
  21679. add r10, rax
  21680. adc r11, rdx
  21681. adc r12, 0
  21682. ; A[8] * B[4]
  21683. mov rax, QWORD PTR [r8+32]
  21684. mul QWORD PTR [r9+64]
  21685. add r10, rax
  21686. adc r11, rdx
  21687. adc r12, 0
  21688. ; A[9] * B[3]
  21689. mov rax, QWORD PTR [r8+24]
  21690. mul QWORD PTR [r9+72]
  21691. add r10, rax
  21692. adc r11, rdx
  21693. adc r12, 0
  21694. ; A[10] * B[2]
  21695. mov rax, QWORD PTR [r8+16]
  21696. mul QWORD PTR [r9+80]
  21697. add r10, rax
  21698. adc r11, rdx
  21699. adc r12, 0
  21700. ; A[11] * B[1]
  21701. mov rax, QWORD PTR [r8+8]
  21702. mul QWORD PTR [r9+88]
  21703. add r10, rax
  21704. adc r11, rdx
  21705. adc r12, 0
  21706. mov QWORD PTR [rcx+96], r10
  21707. ; A[2] * B[11]
  21708. mov rax, QWORD PTR [r8+88]
  21709. mul QWORD PTR [r9+16]
  21710. xor r10, r10
  21711. add r11, rax
  21712. adc r12, rdx
  21713. adc r10, 0
  21714. ; A[3] * B[10]
  21715. mov rax, QWORD PTR [r8+80]
  21716. mul QWORD PTR [r9+24]
  21717. add r11, rax
  21718. adc r12, rdx
  21719. adc r10, 0
  21720. ; A[4] * B[9]
  21721. mov rax, QWORD PTR [r8+72]
  21722. mul QWORD PTR [r9+32]
  21723. add r11, rax
  21724. adc r12, rdx
  21725. adc r10, 0
  21726. ; A[5] * B[8]
  21727. mov rax, QWORD PTR [r8+64]
  21728. mul QWORD PTR [r9+40]
  21729. add r11, rax
  21730. adc r12, rdx
  21731. adc r10, 0
  21732. ; A[6] * B[7]
  21733. mov rax, QWORD PTR [r8+56]
  21734. mul QWORD PTR [r9+48]
  21735. add r11, rax
  21736. adc r12, rdx
  21737. adc r10, 0
  21738. ; A[7] * B[6]
  21739. mov rax, QWORD PTR [r8+48]
  21740. mul QWORD PTR [r9+56]
  21741. add r11, rax
  21742. adc r12, rdx
  21743. adc r10, 0
  21744. ; A[8] * B[5]
  21745. mov rax, QWORD PTR [r8+40]
  21746. mul QWORD PTR [r9+64]
  21747. add r11, rax
  21748. adc r12, rdx
  21749. adc r10, 0
  21750. ; A[9] * B[4]
  21751. mov rax, QWORD PTR [r8+32]
  21752. mul QWORD PTR [r9+72]
  21753. add r11, rax
  21754. adc r12, rdx
  21755. adc r10, 0
  21756. ; A[10] * B[3]
  21757. mov rax, QWORD PTR [r8+24]
  21758. mul QWORD PTR [r9+80]
  21759. add r11, rax
  21760. adc r12, rdx
  21761. adc r10, 0
  21762. ; A[11] * B[2]
  21763. mov rax, QWORD PTR [r8+16]
  21764. mul QWORD PTR [r9+88]
  21765. add r11, rax
  21766. adc r12, rdx
  21767. adc r10, 0
  21768. mov QWORD PTR [rcx+104], r11
  21769. ; A[3] * B[11]
  21770. mov rax, QWORD PTR [r8+88]
  21771. mul QWORD PTR [r9+24]
  21772. xor r11, r11
  21773. add r12, rax
  21774. adc r10, rdx
  21775. adc r11, 0
  21776. ; A[4] * B[10]
  21777. mov rax, QWORD PTR [r8+80]
  21778. mul QWORD PTR [r9+32]
  21779. add r12, rax
  21780. adc r10, rdx
  21781. adc r11, 0
  21782. ; A[5] * B[9]
  21783. mov rax, QWORD PTR [r8+72]
  21784. mul QWORD PTR [r9+40]
  21785. add r12, rax
  21786. adc r10, rdx
  21787. adc r11, 0
  21788. ; A[6] * B[8]
  21789. mov rax, QWORD PTR [r8+64]
  21790. mul QWORD PTR [r9+48]
  21791. add r12, rax
  21792. adc r10, rdx
  21793. adc r11, 0
  21794. ; A[7] * B[7]
  21795. mov rax, QWORD PTR [r8+56]
  21796. mul QWORD PTR [r9+56]
  21797. add r12, rax
  21798. adc r10, rdx
  21799. adc r11, 0
  21800. ; A[8] * B[6]
  21801. mov rax, QWORD PTR [r8+48]
  21802. mul QWORD PTR [r9+64]
  21803. add r12, rax
  21804. adc r10, rdx
  21805. adc r11, 0
  21806. ; A[9] * B[5]
  21807. mov rax, QWORD PTR [r8+40]
  21808. mul QWORD PTR [r9+72]
  21809. add r12, rax
  21810. adc r10, rdx
  21811. adc r11, 0
  21812. ; A[10] * B[4]
  21813. mov rax, QWORD PTR [r8+32]
  21814. mul QWORD PTR [r9+80]
  21815. add r12, rax
  21816. adc r10, rdx
  21817. adc r11, 0
  21818. ; A[11] * B[3]
  21819. mov rax, QWORD PTR [r8+24]
  21820. mul QWORD PTR [r9+88]
  21821. add r12, rax
  21822. adc r10, rdx
  21823. adc r11, 0
  21824. mov QWORD PTR [rcx+112], r12
  21825. ; A[4] * B[11]
  21826. mov rax, QWORD PTR [r8+88]
  21827. mul QWORD PTR [r9+32]
  21828. xor r12, r12
  21829. add r10, rax
  21830. adc r11, rdx
  21831. adc r12, 0
  21832. ; A[5] * B[10]
  21833. mov rax, QWORD PTR [r8+80]
  21834. mul QWORD PTR [r9+40]
  21835. add r10, rax
  21836. adc r11, rdx
  21837. adc r12, 0
  21838. ; A[6] * B[9]
  21839. mov rax, QWORD PTR [r8+72]
  21840. mul QWORD PTR [r9+48]
  21841. add r10, rax
  21842. adc r11, rdx
  21843. adc r12, 0
  21844. ; A[7] * B[8]
  21845. mov rax, QWORD PTR [r8+64]
  21846. mul QWORD PTR [r9+56]
  21847. add r10, rax
  21848. adc r11, rdx
  21849. adc r12, 0
  21850. ; A[8] * B[7]
  21851. mov rax, QWORD PTR [r8+56]
  21852. mul QWORD PTR [r9+64]
  21853. add r10, rax
  21854. adc r11, rdx
  21855. adc r12, 0
  21856. ; A[9] * B[6]
  21857. mov rax, QWORD PTR [r8+48]
  21858. mul QWORD PTR [r9+72]
  21859. add r10, rax
  21860. adc r11, rdx
  21861. adc r12, 0
  21862. ; A[10] * B[5]
  21863. mov rax, QWORD PTR [r8+40]
  21864. mul QWORD PTR [r9+80]
  21865. add r10, rax
  21866. adc r11, rdx
  21867. adc r12, 0
  21868. ; A[11] * B[4]
  21869. mov rax, QWORD PTR [r8+32]
  21870. mul QWORD PTR [r9+88]
  21871. add r10, rax
  21872. adc r11, rdx
  21873. adc r12, 0
  21874. mov QWORD PTR [rcx+120], r10
  21875. ; A[5] * B[11]
  21876. mov rax, QWORD PTR [r8+88]
  21877. mul QWORD PTR [r9+40]
  21878. xor r10, r10
  21879. add r11, rax
  21880. adc r12, rdx
  21881. adc r10, 0
  21882. ; A[6] * B[10]
  21883. mov rax, QWORD PTR [r8+80]
  21884. mul QWORD PTR [r9+48]
  21885. add r11, rax
  21886. adc r12, rdx
  21887. adc r10, 0
  21888. ; A[7] * B[9]
  21889. mov rax, QWORD PTR [r8+72]
  21890. mul QWORD PTR [r9+56]
  21891. add r11, rax
  21892. adc r12, rdx
  21893. adc r10, 0
  21894. ; A[8] * B[8]
  21895. mov rax, QWORD PTR [r8+64]
  21896. mul QWORD PTR [r9+64]
  21897. add r11, rax
  21898. adc r12, rdx
  21899. adc r10, 0
  21900. ; A[9] * B[7]
  21901. mov rax, QWORD PTR [r8+56]
  21902. mul QWORD PTR [r9+72]
  21903. add r11, rax
  21904. adc r12, rdx
  21905. adc r10, 0
  21906. ; A[10] * B[6]
  21907. mov rax, QWORD PTR [r8+48]
  21908. mul QWORD PTR [r9+80]
  21909. add r11, rax
  21910. adc r12, rdx
  21911. adc r10, 0
  21912. ; A[11] * B[5]
  21913. mov rax, QWORD PTR [r8+40]
  21914. mul QWORD PTR [r9+88]
  21915. add r11, rax
  21916. adc r12, rdx
  21917. adc r10, 0
  21918. mov QWORD PTR [rcx+128], r11
  21919. ; A[6] * B[11]
  21920. mov rax, QWORD PTR [r8+88]
  21921. mul QWORD PTR [r9+48]
  21922. xor r11, r11
  21923. add r12, rax
  21924. adc r10, rdx
  21925. adc r11, 0
  21926. ; A[7] * B[10]
  21927. mov rax, QWORD PTR [r8+80]
  21928. mul QWORD PTR [r9+56]
  21929. add r12, rax
  21930. adc r10, rdx
  21931. adc r11, 0
  21932. ; A[8] * B[9]
  21933. mov rax, QWORD PTR [r8+72]
  21934. mul QWORD PTR [r9+64]
  21935. add r12, rax
  21936. adc r10, rdx
  21937. adc r11, 0
  21938. ; A[9] * B[8]
  21939. mov rax, QWORD PTR [r8+64]
  21940. mul QWORD PTR [r9+72]
  21941. add r12, rax
  21942. adc r10, rdx
  21943. adc r11, 0
  21944. ; A[10] * B[7]
  21945. mov rax, QWORD PTR [r8+56]
  21946. mul QWORD PTR [r9+80]
  21947. add r12, rax
  21948. adc r10, rdx
  21949. adc r11, 0
  21950. ; A[11] * B[6]
  21951. mov rax, QWORD PTR [r8+48]
  21952. mul QWORD PTR [r9+88]
  21953. add r12, rax
  21954. adc r10, rdx
  21955. adc r11, 0
  21956. mov QWORD PTR [rcx+136], r12
  21957. ; A[7] * B[11]
  21958. mov rax, QWORD PTR [r8+88]
  21959. mul QWORD PTR [r9+56]
  21960. xor r12, r12
  21961. add r10, rax
  21962. adc r11, rdx
  21963. adc r12, 0
  21964. ; A[8] * B[10]
  21965. mov rax, QWORD PTR [r8+80]
  21966. mul QWORD PTR [r9+64]
  21967. add r10, rax
  21968. adc r11, rdx
  21969. adc r12, 0
  21970. ; A[9] * B[9]
  21971. mov rax, QWORD PTR [r8+72]
  21972. mul QWORD PTR [r9+72]
  21973. add r10, rax
  21974. adc r11, rdx
  21975. adc r12, 0
  21976. ; A[10] * B[8]
  21977. mov rax, QWORD PTR [r8+64]
  21978. mul QWORD PTR [r9+80]
  21979. add r10, rax
  21980. adc r11, rdx
  21981. adc r12, 0
  21982. ; A[11] * B[7]
  21983. mov rax, QWORD PTR [r8+56]
  21984. mul QWORD PTR [r9+88]
  21985. add r10, rax
  21986. adc r11, rdx
  21987. adc r12, 0
  21988. mov QWORD PTR [rcx+144], r10
  21989. ; A[8] * B[11]
  21990. mov rax, QWORD PTR [r8+88]
  21991. mul QWORD PTR [r9+64]
  21992. xor r10, r10
  21993. add r11, rax
  21994. adc r12, rdx
  21995. adc r10, 0
  21996. ; A[9] * B[10]
  21997. mov rax, QWORD PTR [r8+80]
  21998. mul QWORD PTR [r9+72]
  21999. add r11, rax
  22000. adc r12, rdx
  22001. adc r10, 0
  22002. ; A[10] * B[9]
  22003. mov rax, QWORD PTR [r8+72]
  22004. mul QWORD PTR [r9+80]
  22005. add r11, rax
  22006. adc r12, rdx
  22007. adc r10, 0
  22008. ; A[11] * B[8]
  22009. mov rax, QWORD PTR [r8+64]
  22010. mul QWORD PTR [r9+88]
  22011. add r11, rax
  22012. adc r12, rdx
  22013. adc r10, 0
  22014. mov QWORD PTR [rcx+152], r11
  22015. ; A[9] * B[11]
  22016. mov rax, QWORD PTR [r8+88]
  22017. mul QWORD PTR [r9+72]
  22018. xor r11, r11
  22019. add r12, rax
  22020. adc r10, rdx
  22021. adc r11, 0
  22022. ; A[10] * B[10]
  22023. mov rax, QWORD PTR [r8+80]
  22024. mul QWORD PTR [r9+80]
  22025. add r12, rax
  22026. adc r10, rdx
  22027. adc r11, 0
  22028. ; A[11] * B[9]
  22029. mov rax, QWORD PTR [r8+72]
  22030. mul QWORD PTR [r9+88]
  22031. add r12, rax
  22032. adc r10, rdx
  22033. adc r11, 0
  22034. mov QWORD PTR [rcx+160], r12
  22035. ; A[10] * B[11]
  22036. mov rax, QWORD PTR [r8+88]
  22037. mul QWORD PTR [r9+80]
  22038. xor r12, r12
  22039. add r10, rax
  22040. adc r11, rdx
  22041. adc r12, 0
  22042. ; A[11] * B[10]
  22043. mov rax, QWORD PTR [r8+80]
  22044. mul QWORD PTR [r9+88]
  22045. add r10, rax
  22046. adc r11, rdx
  22047. adc r12, 0
  22048. mov QWORD PTR [rcx+168], r10
  22049. ; A[11] * B[11]
  22050. mov rax, QWORD PTR [r8+88]
  22051. mul QWORD PTR [r9+88]
  22052. add r11, rax
  22053. adc r12, rdx
  22054. mov QWORD PTR [rcx+176], r11
  22055. mov QWORD PTR [rcx+184], r12
  22056. mov rax, QWORD PTR [rsp]
  22057. mov rdx, QWORD PTR [rsp+8]
  22058. mov r10, QWORD PTR [rsp+16]
  22059. mov r11, QWORD PTR [rsp+24]
  22060. mov QWORD PTR [rcx], rax
  22061. mov QWORD PTR [rcx+8], rdx
  22062. mov QWORD PTR [rcx+16], r10
  22063. mov QWORD PTR [rcx+24], r11
  22064. mov rax, QWORD PTR [rsp+32]
  22065. mov rdx, QWORD PTR [rsp+40]
  22066. mov r10, QWORD PTR [rsp+48]
  22067. mov r11, QWORD PTR [rsp+56]
  22068. mov QWORD PTR [rcx+32], rax
  22069. mov QWORD PTR [rcx+40], rdx
  22070. mov QWORD PTR [rcx+48], r10
  22071. mov QWORD PTR [rcx+56], r11
  22072. mov rax, QWORD PTR [rsp+64]
  22073. mov rdx, QWORD PTR [rsp+72]
  22074. mov r10, QWORD PTR [rsp+80]
  22075. mov r11, QWORD PTR [rsp+88]
  22076. mov QWORD PTR [rcx+64], rax
  22077. mov QWORD PTR [rcx+72], rdx
  22078. mov QWORD PTR [rcx+80], r10
  22079. mov QWORD PTR [rcx+88], r11
  22080. add rsp, 96
  22081. pop r12
  22082. ret
  22083. sp_3072_mul_12 ENDP
  22084. _text ENDS
  22085. IFDEF HAVE_INTEL_AVX2
  22086. ; /* Multiply a and b into r. (r = a * b)
  22087. ; *
  22088. ; * r Result of multiplication.
  22089. ; * a First number to multiply.
  22090. ; * b Second number to multiply.
  22091. ; */
  22092. _text SEGMENT READONLY PARA
  22093. sp_3072_mul_avx2_12 PROC
  22094. push rbx
  22095. push rbp
  22096. push r12
  22097. push r13
  22098. push r14
  22099. mov rbp, r8
  22100. mov r8, rcx
  22101. mov r9, rdx
  22102. sub rsp, 96
  22103. cmp r9, r8
  22104. mov rbx, rsp
  22105. cmovne rbx, r8
  22106. cmp rbp, r8
  22107. cmove rbx, rsp
  22108. add r8, 96
  22109. xor r14, r14
  22110. mov rdx, QWORD PTR [r9]
  22111. ; A[0] * B[0]
  22112. mulx r11, r10, QWORD PTR [rbp]
  22113. ; A[0] * B[1]
  22114. mulx r12, rax, QWORD PTR [rbp+8]
  22115. mov QWORD PTR [rbx], r10
  22116. adcx r11, rax
  22117. mov QWORD PTR [rbx+8], r11
  22118. ; A[0] * B[2]
  22119. mulx r10, rax, QWORD PTR [rbp+16]
  22120. adcx r12, rax
  22121. ; A[0] * B[3]
  22122. mulx r11, rax, QWORD PTR [rbp+24]
  22123. mov QWORD PTR [rbx+16], r12
  22124. adcx r10, rax
  22125. mov QWORD PTR [rbx+24], r10
  22126. ; A[0] * B[4]
  22127. mulx r12, rax, QWORD PTR [rbp+32]
  22128. adcx r11, rax
  22129. ; A[0] * B[5]
  22130. mulx r10, rax, QWORD PTR [rbp+40]
  22131. mov QWORD PTR [rbx+32], r11
  22132. adcx r12, rax
  22133. mov QWORD PTR [rbx+40], r12
  22134. ; A[0] * B[6]
  22135. mulx r11, rax, QWORD PTR [rbp+48]
  22136. adcx r10, rax
  22137. ; A[0] * B[7]
  22138. mulx r12, rax, QWORD PTR [rbp+56]
  22139. mov QWORD PTR [rbx+48], r10
  22140. adcx r11, rax
  22141. mov QWORD PTR [rbx+56], r11
  22142. ; A[0] * B[8]
  22143. mulx r10, rax, QWORD PTR [rbp+64]
  22144. adcx r12, rax
  22145. ; A[0] * B[9]
  22146. mulx r11, rax, QWORD PTR [rbp+72]
  22147. mov QWORD PTR [rbx+64], r12
  22148. adcx r10, rax
  22149. mov QWORD PTR [rbx+72], r10
  22150. ; A[0] * B[10]
  22151. mulx r12, rax, QWORD PTR [rbp+80]
  22152. adcx r11, rax
  22153. ; A[0] * B[11]
  22154. mulx r10, rax, QWORD PTR [rbp+88]
  22155. mov QWORD PTR [rbx+80], r11
  22156. adcx r12, rax
  22157. adcx r10, r14
  22158. mov r13, r14
  22159. adcx r13, r14
  22160. mov QWORD PTR [rbx+88], r12
  22161. mov QWORD PTR [r8], r10
  22162. mov rdx, QWORD PTR [r9+8]
  22163. mov r11, QWORD PTR [rbx+8]
  22164. mov r12, QWORD PTR [rbx+16]
  22165. mov r10, QWORD PTR [rbx+24]
  22166. ; A[1] * B[0]
  22167. mulx rcx, rax, QWORD PTR [rbp]
  22168. adcx r11, rax
  22169. adox r12, rcx
  22170. ; A[1] * B[1]
  22171. mulx rcx, rax, QWORD PTR [rbp+8]
  22172. mov QWORD PTR [rbx+8], r11
  22173. adcx r12, rax
  22174. adox r10, rcx
  22175. mov QWORD PTR [rbx+16], r12
  22176. mov r11, QWORD PTR [rbx+32]
  22177. mov r12, QWORD PTR [rbx+40]
  22178. ; A[1] * B[2]
  22179. mulx rcx, rax, QWORD PTR [rbp+16]
  22180. adcx r10, rax
  22181. adox r11, rcx
  22182. ; A[1] * B[3]
  22183. mulx rcx, rax, QWORD PTR [rbp+24]
  22184. mov QWORD PTR [rbx+24], r10
  22185. adcx r11, rax
  22186. adox r12, rcx
  22187. mov QWORD PTR [rbx+32], r11
  22188. mov r10, QWORD PTR [rbx+48]
  22189. mov r11, QWORD PTR [rbx+56]
  22190. ; A[1] * B[4]
  22191. mulx rcx, rax, QWORD PTR [rbp+32]
  22192. adcx r12, rax
  22193. adox r10, rcx
  22194. ; A[1] * B[5]
  22195. mulx rcx, rax, QWORD PTR [rbp+40]
  22196. mov QWORD PTR [rbx+40], r12
  22197. adcx r10, rax
  22198. adox r11, rcx
  22199. mov QWORD PTR [rbx+48], r10
  22200. mov r12, QWORD PTR [rbx+64]
  22201. mov r10, QWORD PTR [rbx+72]
  22202. ; A[1] * B[6]
  22203. mulx rcx, rax, QWORD PTR [rbp+48]
  22204. adcx r11, rax
  22205. adox r12, rcx
  22206. ; A[1] * B[7]
  22207. mulx rcx, rax, QWORD PTR [rbp+56]
  22208. mov QWORD PTR [rbx+56], r11
  22209. adcx r12, rax
  22210. adox r10, rcx
  22211. mov QWORD PTR [rbx+64], r12
  22212. mov r11, QWORD PTR [rbx+80]
  22213. mov r12, QWORD PTR [rbx+88]
  22214. ; A[1] * B[8]
  22215. mulx rcx, rax, QWORD PTR [rbp+64]
  22216. adcx r10, rax
  22217. adox r11, rcx
  22218. ; A[1] * B[9]
  22219. mulx rcx, rax, QWORD PTR [rbp+72]
  22220. mov QWORD PTR [rbx+72], r10
  22221. adcx r11, rax
  22222. adox r12, rcx
  22223. mov QWORD PTR [rbx+80], r11
  22224. mov r10, QWORD PTR [r8]
  22225. ; A[1] * B[10]
  22226. mulx rcx, rax, QWORD PTR [rbp+80]
  22227. adcx r12, rax
  22228. adox r10, rcx
  22229. ; A[1] * B[11]
  22230. mulx rcx, rax, QWORD PTR [rbp+88]
  22231. mov QWORD PTR [rbx+88], r12
  22232. mov r11, r14
  22233. adcx r10, rax
  22234. adox r11, rcx
  22235. adcx r11, r13
  22236. mov r13, r14
  22237. adox r13, r14
  22238. adcx r13, r14
  22239. mov QWORD PTR [r8], r10
  22240. mov QWORD PTR [r8+8], r11
  22241. mov rdx, QWORD PTR [r9+16]
  22242. mov r12, QWORD PTR [rbx+16]
  22243. mov r10, QWORD PTR [rbx+24]
  22244. mov r11, QWORD PTR [rbx+32]
  22245. ; A[2] * B[0]
  22246. mulx rcx, rax, QWORD PTR [rbp]
  22247. adcx r12, rax
  22248. adox r10, rcx
  22249. ; A[2] * B[1]
  22250. mulx rcx, rax, QWORD PTR [rbp+8]
  22251. mov QWORD PTR [rbx+16], r12
  22252. adcx r10, rax
  22253. adox r11, rcx
  22254. mov QWORD PTR [rbx+24], r10
  22255. mov r12, QWORD PTR [rbx+40]
  22256. mov r10, QWORD PTR [rbx+48]
  22257. ; A[2] * B[2]
  22258. mulx rcx, rax, QWORD PTR [rbp+16]
  22259. adcx r11, rax
  22260. adox r12, rcx
  22261. ; A[2] * B[3]
  22262. mulx rcx, rax, QWORD PTR [rbp+24]
  22263. mov QWORD PTR [rbx+32], r11
  22264. adcx r12, rax
  22265. adox r10, rcx
  22266. mov QWORD PTR [rbx+40], r12
  22267. mov r11, QWORD PTR [rbx+56]
  22268. mov r12, QWORD PTR [rbx+64]
  22269. ; A[2] * B[4]
  22270. mulx rcx, rax, QWORD PTR [rbp+32]
  22271. adcx r10, rax
  22272. adox r11, rcx
  22273. ; A[2] * B[5]
  22274. mulx rcx, rax, QWORD PTR [rbp+40]
  22275. mov QWORD PTR [rbx+48], r10
  22276. adcx r11, rax
  22277. adox r12, rcx
  22278. mov QWORD PTR [rbx+56], r11
  22279. mov r10, QWORD PTR [rbx+72]
  22280. mov r11, QWORD PTR [rbx+80]
  22281. ; A[2] * B[6]
  22282. mulx rcx, rax, QWORD PTR [rbp+48]
  22283. adcx r12, rax
  22284. adox r10, rcx
  22285. ; A[2] * B[7]
  22286. mulx rcx, rax, QWORD PTR [rbp+56]
  22287. mov QWORD PTR [rbx+64], r12
  22288. adcx r10, rax
  22289. adox r11, rcx
  22290. mov QWORD PTR [rbx+72], r10
  22291. mov r12, QWORD PTR [rbx+88]
  22292. mov r10, QWORD PTR [r8]
  22293. ; A[2] * B[8]
  22294. mulx rcx, rax, QWORD PTR [rbp+64]
  22295. adcx r11, rax
  22296. adox r12, rcx
  22297. ; A[2] * B[9]
  22298. mulx rcx, rax, QWORD PTR [rbp+72]
  22299. mov QWORD PTR [rbx+80], r11
  22300. adcx r12, rax
  22301. adox r10, rcx
  22302. mov QWORD PTR [rbx+88], r12
  22303. mov r11, QWORD PTR [r8+8]
  22304. ; A[2] * B[10]
  22305. mulx rcx, rax, QWORD PTR [rbp+80]
  22306. adcx r10, rax
  22307. adox r11, rcx
  22308. ; A[2] * B[11]
  22309. mulx rcx, rax, QWORD PTR [rbp+88]
  22310. mov QWORD PTR [r8], r10
  22311. mov r12, r14
  22312. adcx r11, rax
  22313. adox r12, rcx
  22314. adcx r12, r13
  22315. mov r13, r14
  22316. adox r13, r14
  22317. adcx r13, r14
  22318. mov QWORD PTR [r8+8], r11
  22319. mov QWORD PTR [r8+16], r12
  22320. mov rdx, QWORD PTR [r9+24]
  22321. mov r10, QWORD PTR [rbx+24]
  22322. mov r11, QWORD PTR [rbx+32]
  22323. mov r12, QWORD PTR [rbx+40]
  22324. ; A[3] * B[0]
  22325. mulx rcx, rax, QWORD PTR [rbp]
  22326. adcx r10, rax
  22327. adox r11, rcx
  22328. ; A[3] * B[1]
  22329. mulx rcx, rax, QWORD PTR [rbp+8]
  22330. mov QWORD PTR [rbx+24], r10
  22331. adcx r11, rax
  22332. adox r12, rcx
  22333. mov QWORD PTR [rbx+32], r11
  22334. mov r10, QWORD PTR [rbx+48]
  22335. mov r11, QWORD PTR [rbx+56]
  22336. ; A[3] * B[2]
  22337. mulx rcx, rax, QWORD PTR [rbp+16]
  22338. adcx r12, rax
  22339. adox r10, rcx
  22340. ; A[3] * B[3]
  22341. mulx rcx, rax, QWORD PTR [rbp+24]
  22342. mov QWORD PTR [rbx+40], r12
  22343. adcx r10, rax
  22344. adox r11, rcx
  22345. mov QWORD PTR [rbx+48], r10
  22346. mov r12, QWORD PTR [rbx+64]
  22347. mov r10, QWORD PTR [rbx+72]
  22348. ; A[3] * B[4]
  22349. mulx rcx, rax, QWORD PTR [rbp+32]
  22350. adcx r11, rax
  22351. adox r12, rcx
  22352. ; A[3] * B[5]
  22353. mulx rcx, rax, QWORD PTR [rbp+40]
  22354. mov QWORD PTR [rbx+56], r11
  22355. adcx r12, rax
  22356. adox r10, rcx
  22357. mov QWORD PTR [rbx+64], r12
  22358. mov r11, QWORD PTR [rbx+80]
  22359. mov r12, QWORD PTR [rbx+88]
  22360. ; A[3] * B[6]
  22361. mulx rcx, rax, QWORD PTR [rbp+48]
  22362. adcx r10, rax
  22363. adox r11, rcx
  22364. ; A[3] * B[7]
  22365. mulx rcx, rax, QWORD PTR [rbp+56]
  22366. mov QWORD PTR [rbx+72], r10
  22367. adcx r11, rax
  22368. adox r12, rcx
  22369. mov QWORD PTR [rbx+80], r11
  22370. mov r10, QWORD PTR [r8]
  22371. mov r11, QWORD PTR [r8+8]
  22372. ; A[3] * B[8]
  22373. mulx rcx, rax, QWORD PTR [rbp+64]
  22374. adcx r12, rax
  22375. adox r10, rcx
  22376. ; A[3] * B[9]
  22377. mulx rcx, rax, QWORD PTR [rbp+72]
  22378. mov QWORD PTR [rbx+88], r12
  22379. adcx r10, rax
  22380. adox r11, rcx
  22381. mov QWORD PTR [r8], r10
  22382. mov r12, QWORD PTR [r8+16]
  22383. ; A[3] * B[10]
  22384. mulx rcx, rax, QWORD PTR [rbp+80]
  22385. adcx r11, rax
  22386. adox r12, rcx
  22387. ; A[3] * B[11]
  22388. mulx rcx, rax, QWORD PTR [rbp+88]
  22389. mov QWORD PTR [r8+8], r11
  22390. mov r10, r14
  22391. adcx r12, rax
  22392. adox r10, rcx
  22393. adcx r10, r13
  22394. mov r13, r14
  22395. adox r13, r14
  22396. adcx r13, r14
  22397. mov QWORD PTR [r8+16], r12
  22398. mov QWORD PTR [r8+24], r10
  22399. mov rdx, QWORD PTR [r9+32]
  22400. mov r11, QWORD PTR [rbx+32]
  22401. mov r12, QWORD PTR [rbx+40]
  22402. mov r10, QWORD PTR [rbx+48]
  22403. ; A[4] * B[0]
  22404. mulx rcx, rax, QWORD PTR [rbp]
  22405. adcx r11, rax
  22406. adox r12, rcx
  22407. ; A[4] * B[1]
  22408. mulx rcx, rax, QWORD PTR [rbp+8]
  22409. mov QWORD PTR [rbx+32], r11
  22410. adcx r12, rax
  22411. adox r10, rcx
  22412. mov QWORD PTR [rbx+40], r12
  22413. mov r11, QWORD PTR [rbx+56]
  22414. mov r12, QWORD PTR [rbx+64]
  22415. ; A[4] * B[2]
  22416. mulx rcx, rax, QWORD PTR [rbp+16]
  22417. adcx r10, rax
  22418. adox r11, rcx
  22419. ; A[4] * B[3]
  22420. mulx rcx, rax, QWORD PTR [rbp+24]
  22421. mov QWORD PTR [rbx+48], r10
  22422. adcx r11, rax
  22423. adox r12, rcx
  22424. mov QWORD PTR [rbx+56], r11
  22425. mov r10, QWORD PTR [rbx+72]
  22426. mov r11, QWORD PTR [rbx+80]
  22427. ; A[4] * B[4]
  22428. mulx rcx, rax, QWORD PTR [rbp+32]
  22429. adcx r12, rax
  22430. adox r10, rcx
  22431. ; A[4] * B[5]
  22432. mulx rcx, rax, QWORD PTR [rbp+40]
  22433. mov QWORD PTR [rbx+64], r12
  22434. adcx r10, rax
  22435. adox r11, rcx
  22436. mov QWORD PTR [rbx+72], r10
  22437. mov r12, QWORD PTR [rbx+88]
  22438. mov r10, QWORD PTR [r8]
  22439. ; A[4] * B[6]
  22440. mulx rcx, rax, QWORD PTR [rbp+48]
  22441. adcx r11, rax
  22442. adox r12, rcx
  22443. ; A[4] * B[7]
  22444. mulx rcx, rax, QWORD PTR [rbp+56]
  22445. mov QWORD PTR [rbx+80], r11
  22446. adcx r12, rax
  22447. adox r10, rcx
  22448. mov QWORD PTR [rbx+88], r12
  22449. mov r11, QWORD PTR [r8+8]
  22450. mov r12, QWORD PTR [r8+16]
  22451. ; A[4] * B[8]
  22452. mulx rcx, rax, QWORD PTR [rbp+64]
  22453. adcx r10, rax
  22454. adox r11, rcx
  22455. ; A[4] * B[9]
  22456. mulx rcx, rax, QWORD PTR [rbp+72]
  22457. mov QWORD PTR [r8], r10
  22458. adcx r11, rax
  22459. adox r12, rcx
  22460. mov QWORD PTR [r8+8], r11
  22461. mov r10, QWORD PTR [r8+24]
  22462. ; A[4] * B[10]
  22463. mulx rcx, rax, QWORD PTR [rbp+80]
  22464. adcx r12, rax
  22465. adox r10, rcx
  22466. ; A[4] * B[11]
  22467. mulx rcx, rax, QWORD PTR [rbp+88]
  22468. mov QWORD PTR [r8+16], r12
  22469. mov r11, r14
  22470. adcx r10, rax
  22471. adox r11, rcx
  22472. adcx r11, r13
  22473. mov r13, r14
  22474. adox r13, r14
  22475. adcx r13, r14
  22476. mov QWORD PTR [r8+24], r10
  22477. mov QWORD PTR [r8+32], r11
  22478. mov rdx, QWORD PTR [r9+40]
  22479. mov r12, QWORD PTR [rbx+40]
  22480. mov r10, QWORD PTR [rbx+48]
  22481. mov r11, QWORD PTR [rbx+56]
  22482. ; A[5] * B[0]
  22483. mulx rcx, rax, QWORD PTR [rbp]
  22484. adcx r12, rax
  22485. adox r10, rcx
  22486. ; A[5] * B[1]
  22487. mulx rcx, rax, QWORD PTR [rbp+8]
  22488. mov QWORD PTR [rbx+40], r12
  22489. adcx r10, rax
  22490. adox r11, rcx
  22491. mov QWORD PTR [rbx+48], r10
  22492. mov r12, QWORD PTR [rbx+64]
  22493. mov r10, QWORD PTR [rbx+72]
  22494. ; A[5] * B[2]
  22495. mulx rcx, rax, QWORD PTR [rbp+16]
  22496. adcx r11, rax
  22497. adox r12, rcx
  22498. ; A[5] * B[3]
  22499. mulx rcx, rax, QWORD PTR [rbp+24]
  22500. mov QWORD PTR [rbx+56], r11
  22501. adcx r12, rax
  22502. adox r10, rcx
  22503. mov QWORD PTR [rbx+64], r12
  22504. mov r11, QWORD PTR [rbx+80]
  22505. mov r12, QWORD PTR [rbx+88]
  22506. ; A[5] * B[4]
  22507. mulx rcx, rax, QWORD PTR [rbp+32]
  22508. adcx r10, rax
  22509. adox r11, rcx
  22510. ; A[5] * B[5]
  22511. mulx rcx, rax, QWORD PTR [rbp+40]
  22512. mov QWORD PTR [rbx+72], r10
  22513. adcx r11, rax
  22514. adox r12, rcx
  22515. mov QWORD PTR [rbx+80], r11
  22516. mov r10, QWORD PTR [r8]
  22517. mov r11, QWORD PTR [r8+8]
  22518. ; A[5] * B[6]
  22519. mulx rcx, rax, QWORD PTR [rbp+48]
  22520. adcx r12, rax
  22521. adox r10, rcx
  22522. ; A[5] * B[7]
  22523. mulx rcx, rax, QWORD PTR [rbp+56]
  22524. mov QWORD PTR [rbx+88], r12
  22525. adcx r10, rax
  22526. adox r11, rcx
  22527. mov QWORD PTR [r8], r10
  22528. mov r12, QWORD PTR [r8+16]
  22529. mov r10, QWORD PTR [r8+24]
  22530. ; A[5] * B[8]
  22531. mulx rcx, rax, QWORD PTR [rbp+64]
  22532. adcx r11, rax
  22533. adox r12, rcx
  22534. ; A[5] * B[9]
  22535. mulx rcx, rax, QWORD PTR [rbp+72]
  22536. mov QWORD PTR [r8+8], r11
  22537. adcx r12, rax
  22538. adox r10, rcx
  22539. mov QWORD PTR [r8+16], r12
  22540. mov r11, QWORD PTR [r8+32]
  22541. ; A[5] * B[10]
  22542. mulx rcx, rax, QWORD PTR [rbp+80]
  22543. adcx r10, rax
  22544. adox r11, rcx
  22545. ; A[5] * B[11]
  22546. mulx rcx, rax, QWORD PTR [rbp+88]
  22547. mov QWORD PTR [r8+24], r10
  22548. mov r12, r14
  22549. adcx r11, rax
  22550. adox r12, rcx
  22551. adcx r12, r13
  22552. mov r13, r14
  22553. adox r13, r14
  22554. adcx r13, r14
  22555. mov QWORD PTR [r8+32], r11
  22556. mov QWORD PTR [r8+40], r12
  22557. mov rdx, QWORD PTR [r9+48]
  22558. mov r10, QWORD PTR [rbx+48]
  22559. mov r11, QWORD PTR [rbx+56]
  22560. mov r12, QWORD PTR [rbx+64]
  22561. ; A[6] * B[0]
  22562. mulx rcx, rax, QWORD PTR [rbp]
  22563. adcx r10, rax
  22564. adox r11, rcx
  22565. ; A[6] * B[1]
  22566. mulx rcx, rax, QWORD PTR [rbp+8]
  22567. mov QWORD PTR [rbx+48], r10
  22568. adcx r11, rax
  22569. adox r12, rcx
  22570. mov QWORD PTR [rbx+56], r11
  22571. mov r10, QWORD PTR [rbx+72]
  22572. mov r11, QWORD PTR [rbx+80]
  22573. ; A[6] * B[2]
  22574. mulx rcx, rax, QWORD PTR [rbp+16]
  22575. adcx r12, rax
  22576. adox r10, rcx
  22577. ; A[6] * B[3]
  22578. mulx rcx, rax, QWORD PTR [rbp+24]
  22579. mov QWORD PTR [rbx+64], r12
  22580. adcx r10, rax
  22581. adox r11, rcx
  22582. mov QWORD PTR [rbx+72], r10
  22583. mov r12, QWORD PTR [rbx+88]
  22584. mov r10, QWORD PTR [r8]
  22585. ; A[6] * B[4]
  22586. mulx rcx, rax, QWORD PTR [rbp+32]
  22587. adcx r11, rax
  22588. adox r12, rcx
  22589. ; A[6] * B[5]
  22590. mulx rcx, rax, QWORD PTR [rbp+40]
  22591. mov QWORD PTR [rbx+80], r11
  22592. adcx r12, rax
  22593. adox r10, rcx
  22594. mov QWORD PTR [rbx+88], r12
  22595. mov r11, QWORD PTR [r8+8]
  22596. mov r12, QWORD PTR [r8+16]
  22597. ; A[6] * B[6]
  22598. mulx rcx, rax, QWORD PTR [rbp+48]
  22599. adcx r10, rax
  22600. adox r11, rcx
  22601. ; A[6] * B[7]
  22602. mulx rcx, rax, QWORD PTR [rbp+56]
  22603. mov QWORD PTR [r8], r10
  22604. adcx r11, rax
  22605. adox r12, rcx
  22606. mov QWORD PTR [r8+8], r11
  22607. mov r10, QWORD PTR [r8+24]
  22608. mov r11, QWORD PTR [r8+32]
  22609. ; A[6] * B[8]
  22610. mulx rcx, rax, QWORD PTR [rbp+64]
  22611. adcx r12, rax
  22612. adox r10, rcx
  22613. ; A[6] * B[9]
  22614. mulx rcx, rax, QWORD PTR [rbp+72]
  22615. mov QWORD PTR [r8+16], r12
  22616. adcx r10, rax
  22617. adox r11, rcx
  22618. mov QWORD PTR [r8+24], r10
  22619. mov r12, QWORD PTR [r8+40]
  22620. ; A[6] * B[10]
  22621. mulx rcx, rax, QWORD PTR [rbp+80]
  22622. adcx r11, rax
  22623. adox r12, rcx
  22624. ; A[6] * B[11]
  22625. mulx rcx, rax, QWORD PTR [rbp+88]
  22626. mov QWORD PTR [r8+32], r11
  22627. mov r10, r14
  22628. adcx r12, rax
  22629. adox r10, rcx
  22630. adcx r10, r13
  22631. mov r13, r14
  22632. adox r13, r14
  22633. adcx r13, r14
  22634. mov QWORD PTR [r8+40], r12
  22635. mov QWORD PTR [r8+48], r10
  22636. mov rdx, QWORD PTR [r9+56]
  22637. mov r11, QWORD PTR [rbx+56]
  22638. mov r12, QWORD PTR [rbx+64]
  22639. mov r10, QWORD PTR [rbx+72]
  22640. ; A[7] * B[0]
  22641. mulx rcx, rax, QWORD PTR [rbp]
  22642. adcx r11, rax
  22643. adox r12, rcx
  22644. ; A[7] * B[1]
  22645. mulx rcx, rax, QWORD PTR [rbp+8]
  22646. mov QWORD PTR [rbx+56], r11
  22647. adcx r12, rax
  22648. adox r10, rcx
  22649. mov QWORD PTR [rbx+64], r12
  22650. mov r11, QWORD PTR [rbx+80]
  22651. mov r12, QWORD PTR [rbx+88]
  22652. ; A[7] * B[2]
  22653. mulx rcx, rax, QWORD PTR [rbp+16]
  22654. adcx r10, rax
  22655. adox r11, rcx
  22656. ; A[7] * B[3]
  22657. mulx rcx, rax, QWORD PTR [rbp+24]
  22658. mov QWORD PTR [rbx+72], r10
  22659. adcx r11, rax
  22660. adox r12, rcx
  22661. mov QWORD PTR [rbx+80], r11
  22662. mov r10, QWORD PTR [r8]
  22663. mov r11, QWORD PTR [r8+8]
  22664. ; A[7] * B[4]
  22665. mulx rcx, rax, QWORD PTR [rbp+32]
  22666. adcx r12, rax
  22667. adox r10, rcx
  22668. ; A[7] * B[5]
  22669. mulx rcx, rax, QWORD PTR [rbp+40]
  22670. mov QWORD PTR [rbx+88], r12
  22671. adcx r10, rax
  22672. adox r11, rcx
  22673. mov QWORD PTR [r8], r10
  22674. mov r12, QWORD PTR [r8+16]
  22675. mov r10, QWORD PTR [r8+24]
  22676. ; A[7] * B[6]
  22677. mulx rcx, rax, QWORD PTR [rbp+48]
  22678. adcx r11, rax
  22679. adox r12, rcx
  22680. ; A[7] * B[7]
  22681. mulx rcx, rax, QWORD PTR [rbp+56]
  22682. mov QWORD PTR [r8+8], r11
  22683. adcx r12, rax
  22684. adox r10, rcx
  22685. mov QWORD PTR [r8+16], r12
  22686. mov r11, QWORD PTR [r8+32]
  22687. mov r12, QWORD PTR [r8+40]
  22688. ; A[7] * B[8]
  22689. mulx rcx, rax, QWORD PTR [rbp+64]
  22690. adcx r10, rax
  22691. adox r11, rcx
  22692. ; A[7] * B[9]
  22693. mulx rcx, rax, QWORD PTR [rbp+72]
  22694. mov QWORD PTR [r8+24], r10
  22695. adcx r11, rax
  22696. adox r12, rcx
  22697. mov QWORD PTR [r8+32], r11
  22698. mov r10, QWORD PTR [r8+48]
  22699. ; A[7] * B[10]
  22700. mulx rcx, rax, QWORD PTR [rbp+80]
  22701. adcx r12, rax
  22702. adox r10, rcx
  22703. ; A[7] * B[11]
  22704. mulx rcx, rax, QWORD PTR [rbp+88]
  22705. mov QWORD PTR [r8+40], r12
  22706. mov r11, r14
  22707. adcx r10, rax
  22708. adox r11, rcx
  22709. adcx r11, r13
  22710. mov r13, r14
  22711. adox r13, r14
  22712. adcx r13, r14
  22713. mov QWORD PTR [r8+48], r10
  22714. mov QWORD PTR [r8+56], r11
  22715. mov rdx, QWORD PTR [r9+64]
  22716. mov r12, QWORD PTR [rbx+64]
  22717. mov r10, QWORD PTR [rbx+72]
  22718. mov r11, QWORD PTR [rbx+80]
  22719. ; A[8] * B[0]
  22720. mulx rcx, rax, QWORD PTR [rbp]
  22721. adcx r12, rax
  22722. adox r10, rcx
  22723. ; A[8] * B[1]
  22724. mulx rcx, rax, QWORD PTR [rbp+8]
  22725. mov QWORD PTR [rbx+64], r12
  22726. adcx r10, rax
  22727. adox r11, rcx
  22728. mov QWORD PTR [rbx+72], r10
  22729. mov r12, QWORD PTR [rbx+88]
  22730. mov r10, QWORD PTR [r8]
  22731. ; A[8] * B[2]
  22732. mulx rcx, rax, QWORD PTR [rbp+16]
  22733. adcx r11, rax
  22734. adox r12, rcx
  22735. ; A[8] * B[3]
  22736. mulx rcx, rax, QWORD PTR [rbp+24]
  22737. mov QWORD PTR [rbx+80], r11
  22738. adcx r12, rax
  22739. adox r10, rcx
  22740. mov QWORD PTR [rbx+88], r12
  22741. mov r11, QWORD PTR [r8+8]
  22742. mov r12, QWORD PTR [r8+16]
  22743. ; A[8] * B[4]
  22744. mulx rcx, rax, QWORD PTR [rbp+32]
  22745. adcx r10, rax
  22746. adox r11, rcx
  22747. ; A[8] * B[5]
  22748. mulx rcx, rax, QWORD PTR [rbp+40]
  22749. mov QWORD PTR [r8], r10
  22750. adcx r11, rax
  22751. adox r12, rcx
  22752. mov QWORD PTR [r8+8], r11
  22753. mov r10, QWORD PTR [r8+24]
  22754. mov r11, QWORD PTR [r8+32]
  22755. ; A[8] * B[6]
  22756. mulx rcx, rax, QWORD PTR [rbp+48]
  22757. adcx r12, rax
  22758. adox r10, rcx
  22759. ; A[8] * B[7]
  22760. mulx rcx, rax, QWORD PTR [rbp+56]
  22761. mov QWORD PTR [r8+16], r12
  22762. adcx r10, rax
  22763. adox r11, rcx
  22764. mov QWORD PTR [r8+24], r10
  22765. mov r12, QWORD PTR [r8+40]
  22766. mov r10, QWORD PTR [r8+48]
  22767. ; A[8] * B[8]
  22768. mulx rcx, rax, QWORD PTR [rbp+64]
  22769. adcx r11, rax
  22770. adox r12, rcx
  22771. ; A[8] * B[9]
  22772. mulx rcx, rax, QWORD PTR [rbp+72]
  22773. mov QWORD PTR [r8+32], r11
  22774. adcx r12, rax
  22775. adox r10, rcx
  22776. mov QWORD PTR [r8+40], r12
  22777. mov r11, QWORD PTR [r8+56]
  22778. ; A[8] * B[10]
  22779. mulx rcx, rax, QWORD PTR [rbp+80]
  22780. adcx r10, rax
  22781. adox r11, rcx
  22782. ; A[8] * B[11]
  22783. mulx rcx, rax, QWORD PTR [rbp+88]
  22784. mov QWORD PTR [r8+48], r10
  22785. mov r12, r14
  22786. adcx r11, rax
  22787. adox r12, rcx
  22788. adcx r12, r13
  22789. mov r13, r14
  22790. adox r13, r14
  22791. adcx r13, r14
  22792. mov QWORD PTR [r8+56], r11
  22793. mov QWORD PTR [r8+64], r12
  22794. mov rdx, QWORD PTR [r9+72]
  22795. mov r10, QWORD PTR [rbx+72]
  22796. mov r11, QWORD PTR [rbx+80]
  22797. mov r12, QWORD PTR [rbx+88]
  22798. ; A[9] * B[0]
  22799. mulx rcx, rax, QWORD PTR [rbp]
  22800. adcx r10, rax
  22801. adox r11, rcx
  22802. ; A[9] * B[1]
  22803. mulx rcx, rax, QWORD PTR [rbp+8]
  22804. mov QWORD PTR [rbx+72], r10
  22805. adcx r11, rax
  22806. adox r12, rcx
  22807. mov QWORD PTR [rbx+80], r11
  22808. mov r10, QWORD PTR [r8]
  22809. mov r11, QWORD PTR [r8+8]
  22810. ; A[9] * B[2]
  22811. mulx rcx, rax, QWORD PTR [rbp+16]
  22812. adcx r12, rax
  22813. adox r10, rcx
  22814. ; A[9] * B[3]
  22815. mulx rcx, rax, QWORD PTR [rbp+24]
  22816. mov QWORD PTR [rbx+88], r12
  22817. adcx r10, rax
  22818. adox r11, rcx
  22819. mov QWORD PTR [r8], r10
  22820. mov r12, QWORD PTR [r8+16]
  22821. mov r10, QWORD PTR [r8+24]
  22822. ; A[9] * B[4]
  22823. mulx rcx, rax, QWORD PTR [rbp+32]
  22824. adcx r11, rax
  22825. adox r12, rcx
  22826. ; A[9] * B[5]
  22827. mulx rcx, rax, QWORD PTR [rbp+40]
  22828. mov QWORD PTR [r8+8], r11
  22829. adcx r12, rax
  22830. adox r10, rcx
  22831. mov QWORD PTR [r8+16], r12
  22832. mov r11, QWORD PTR [r8+32]
  22833. mov r12, QWORD PTR [r8+40]
  22834. ; A[9] * B[6]
  22835. mulx rcx, rax, QWORD PTR [rbp+48]
  22836. adcx r10, rax
  22837. adox r11, rcx
  22838. ; A[9] * B[7]
  22839. mulx rcx, rax, QWORD PTR [rbp+56]
  22840. mov QWORD PTR [r8+24], r10
  22841. adcx r11, rax
  22842. adox r12, rcx
  22843. mov QWORD PTR [r8+32], r11
  22844. mov r10, QWORD PTR [r8+48]
  22845. mov r11, QWORD PTR [r8+56]
  22846. ; A[9] * B[8]
  22847. mulx rcx, rax, QWORD PTR [rbp+64]
  22848. adcx r12, rax
  22849. adox r10, rcx
  22850. ; A[9] * B[9]
  22851. mulx rcx, rax, QWORD PTR [rbp+72]
  22852. mov QWORD PTR [r8+40], r12
  22853. adcx r10, rax
  22854. adox r11, rcx
  22855. mov QWORD PTR [r8+48], r10
  22856. mov r12, QWORD PTR [r8+64]
  22857. ; A[9] * B[10]
  22858. mulx rcx, rax, QWORD PTR [rbp+80]
  22859. adcx r11, rax
  22860. adox r12, rcx
  22861. ; A[9] * B[11]
  22862. mulx rcx, rax, QWORD PTR [rbp+88]
  22863. mov QWORD PTR [r8+56], r11
  22864. mov r10, r14
  22865. adcx r12, rax
  22866. adox r10, rcx
  22867. adcx r10, r13
  22868. mov r13, r14
  22869. adox r13, r14
  22870. adcx r13, r14
  22871. mov QWORD PTR [r8+64], r12
  22872. mov QWORD PTR [r8+72], r10
  22873. mov rdx, QWORD PTR [r9+80]
  22874. mov r11, QWORD PTR [rbx+80]
  22875. mov r12, QWORD PTR [rbx+88]
  22876. mov r10, QWORD PTR [r8]
  22877. ; A[10] * B[0]
  22878. mulx rcx, rax, QWORD PTR [rbp]
  22879. adcx r11, rax
  22880. adox r12, rcx
  22881. ; A[10] * B[1]
  22882. mulx rcx, rax, QWORD PTR [rbp+8]
  22883. mov QWORD PTR [rbx+80], r11
  22884. adcx r12, rax
  22885. adox r10, rcx
  22886. mov QWORD PTR [rbx+88], r12
  22887. mov r11, QWORD PTR [r8+8]
  22888. mov r12, QWORD PTR [r8+16]
  22889. ; A[10] * B[2]
  22890. mulx rcx, rax, QWORD PTR [rbp+16]
  22891. adcx r10, rax
  22892. adox r11, rcx
  22893. ; A[10] * B[3]
  22894. mulx rcx, rax, QWORD PTR [rbp+24]
  22895. mov QWORD PTR [r8], r10
  22896. adcx r11, rax
  22897. adox r12, rcx
  22898. mov QWORD PTR [r8+8], r11
  22899. mov r10, QWORD PTR [r8+24]
  22900. mov r11, QWORD PTR [r8+32]
  22901. ; A[10] * B[4]
  22902. mulx rcx, rax, QWORD PTR [rbp+32]
  22903. adcx r12, rax
  22904. adox r10, rcx
  22905. ; A[10] * B[5]
  22906. mulx rcx, rax, QWORD PTR [rbp+40]
  22907. mov QWORD PTR [r8+16], r12
  22908. adcx r10, rax
  22909. adox r11, rcx
  22910. mov QWORD PTR [r8+24], r10
  22911. mov r12, QWORD PTR [r8+40]
  22912. mov r10, QWORD PTR [r8+48]
  22913. ; A[10] * B[6]
  22914. mulx rcx, rax, QWORD PTR [rbp+48]
  22915. adcx r11, rax
  22916. adox r12, rcx
  22917. ; A[10] * B[7]
  22918. mulx rcx, rax, QWORD PTR [rbp+56]
  22919. mov QWORD PTR [r8+32], r11
  22920. adcx r12, rax
  22921. adox r10, rcx
  22922. mov QWORD PTR [r8+40], r12
  22923. mov r11, QWORD PTR [r8+56]
  22924. mov r12, QWORD PTR [r8+64]
  22925. ; A[10] * B[8]
  22926. mulx rcx, rax, QWORD PTR [rbp+64]
  22927. adcx r10, rax
  22928. adox r11, rcx
  22929. ; A[10] * B[9]
  22930. mulx rcx, rax, QWORD PTR [rbp+72]
  22931. mov QWORD PTR [r8+48], r10
  22932. adcx r11, rax
  22933. adox r12, rcx
  22934. mov QWORD PTR [r8+56], r11
  22935. mov r10, QWORD PTR [r8+72]
  22936. ; A[10] * B[10]
  22937. mulx rcx, rax, QWORD PTR [rbp+80]
  22938. adcx r12, rax
  22939. adox r10, rcx
  22940. ; A[10] * B[11]
  22941. mulx rcx, rax, QWORD PTR [rbp+88]
  22942. mov QWORD PTR [r8+64], r12
  22943. mov r11, r14
  22944. adcx r10, rax
  22945. adox r11, rcx
  22946. adcx r11, r13
  22947. mov r13, r14
  22948. adox r13, r14
  22949. adcx r13, r14
  22950. mov QWORD PTR [r8+72], r10
  22951. mov QWORD PTR [r8+80], r11
  22952. mov rdx, QWORD PTR [r9+88]
  22953. mov r12, QWORD PTR [rbx+88]
  22954. mov r10, QWORD PTR [r8]
  22955. mov r11, QWORD PTR [r8+8]
  22956. ; A[11] * B[0]
  22957. mulx rcx, rax, QWORD PTR [rbp]
  22958. adcx r12, rax
  22959. adox r10, rcx
  22960. ; A[11] * B[1]
  22961. mulx rcx, rax, QWORD PTR [rbp+8]
  22962. mov QWORD PTR [rbx+88], r12
  22963. adcx r10, rax
  22964. adox r11, rcx
  22965. mov QWORD PTR [r8], r10
  22966. mov r12, QWORD PTR [r8+16]
  22967. mov r10, QWORD PTR [r8+24]
  22968. ; A[11] * B[2]
  22969. mulx rcx, rax, QWORD PTR [rbp+16]
  22970. adcx r11, rax
  22971. adox r12, rcx
  22972. ; A[11] * B[3]
  22973. mulx rcx, rax, QWORD PTR [rbp+24]
  22974. mov QWORD PTR [r8+8], r11
  22975. adcx r12, rax
  22976. adox r10, rcx
  22977. mov QWORD PTR [r8+16], r12
  22978. mov r11, QWORD PTR [r8+32]
  22979. mov r12, QWORD PTR [r8+40]
  22980. ; A[11] * B[4]
  22981. mulx rcx, rax, QWORD PTR [rbp+32]
  22982. adcx r10, rax
  22983. adox r11, rcx
  22984. ; A[11] * B[5]
  22985. mulx rcx, rax, QWORD PTR [rbp+40]
  22986. mov QWORD PTR [r8+24], r10
  22987. adcx r11, rax
  22988. adox r12, rcx
  22989. mov QWORD PTR [r8+32], r11
  22990. mov r10, QWORD PTR [r8+48]
  22991. mov r11, QWORD PTR [r8+56]
  22992. ; A[11] * B[6]
  22993. mulx rcx, rax, QWORD PTR [rbp+48]
  22994. adcx r12, rax
  22995. adox r10, rcx
  22996. ; A[11] * B[7]
  22997. mulx rcx, rax, QWORD PTR [rbp+56]
  22998. mov QWORD PTR [r8+40], r12
  22999. adcx r10, rax
  23000. adox r11, rcx
  23001. mov QWORD PTR [r8+48], r10
  23002. mov r12, QWORD PTR [r8+64]
  23003. mov r10, QWORD PTR [r8+72]
  23004. ; A[11] * B[8]
  23005. mulx rcx, rax, QWORD PTR [rbp+64]
  23006. adcx r11, rax
  23007. adox r12, rcx
  23008. ; A[11] * B[9]
  23009. mulx rcx, rax, QWORD PTR [rbp+72]
  23010. mov QWORD PTR [r8+56], r11
  23011. adcx r12, rax
  23012. adox r10, rcx
  23013. mov QWORD PTR [r8+64], r12
  23014. mov r11, QWORD PTR [r8+80]
  23015. ; A[11] * B[10]
  23016. mulx rcx, rax, QWORD PTR [rbp+80]
  23017. adcx r10, rax
  23018. adox r11, rcx
  23019. ; A[11] * B[11]
  23020. mulx rcx, rax, QWORD PTR [rbp+88]
  23021. mov QWORD PTR [r8+72], r10
  23022. mov r12, r14
  23023. adcx r11, rax
  23024. adox r12, rcx
  23025. adcx r12, r13
  23026. mov QWORD PTR [r8+80], r11
  23027. mov QWORD PTR [r8+88], r12
  23028. sub r8, 96
  23029. cmp r9, r8
  23030. je L_start_3072_mul_avx2_12
  23031. cmp rbp, r8
  23032. jne L_end_3072_mul_avx2_12
  23033. L_start_3072_mul_avx2_12:
  23034. vmovdqu xmm0, OWORD PTR [rbx]
  23035. vmovups OWORD PTR [r8], xmm0
  23036. vmovdqu xmm0, OWORD PTR [rbx+16]
  23037. vmovups OWORD PTR [r8+16], xmm0
  23038. vmovdqu xmm0, OWORD PTR [rbx+32]
  23039. vmovups OWORD PTR [r8+32], xmm0
  23040. vmovdqu xmm0, OWORD PTR [rbx+48]
  23041. vmovups OWORD PTR [r8+48], xmm0
  23042. vmovdqu xmm0, OWORD PTR [rbx+64]
  23043. vmovups OWORD PTR [r8+64], xmm0
  23044. vmovdqu xmm0, OWORD PTR [rbx+80]
  23045. vmovups OWORD PTR [r8+80], xmm0
  23046. L_end_3072_mul_avx2_12:
  23047. add rsp, 96
  23048. pop r14
  23049. pop r13
  23050. pop r12
  23051. pop rbp
  23052. pop rbx
  23053. ret
  23054. sp_3072_mul_avx2_12 ENDP
  23055. _text ENDS
  23056. ENDIF
  23057. ; /* Add b to a into r. (r = a + b)
  23058. ; *
  23059. ; * r A single precision integer.
  23060. ; * a A single precision integer.
  23061. ; * b A single precision integer.
  23062. ; */
  23063. _text SEGMENT READONLY PARA
  23064. sp_3072_add_12 PROC
  23065. ; Add
  23066. mov r9, QWORD PTR [rdx]
  23067. xor rax, rax
  23068. add r9, QWORD PTR [r8]
  23069. mov r10, QWORD PTR [rdx+8]
  23070. mov QWORD PTR [rcx], r9
  23071. adc r10, QWORD PTR [r8+8]
  23072. mov r9, QWORD PTR [rdx+16]
  23073. mov QWORD PTR [rcx+8], r10
  23074. adc r9, QWORD PTR [r8+16]
  23075. mov r10, QWORD PTR [rdx+24]
  23076. mov QWORD PTR [rcx+16], r9
  23077. adc r10, QWORD PTR [r8+24]
  23078. mov r9, QWORD PTR [rdx+32]
  23079. mov QWORD PTR [rcx+24], r10
  23080. adc r9, QWORD PTR [r8+32]
  23081. mov r10, QWORD PTR [rdx+40]
  23082. mov QWORD PTR [rcx+32], r9
  23083. adc r10, QWORD PTR [r8+40]
  23084. mov r9, QWORD PTR [rdx+48]
  23085. mov QWORD PTR [rcx+40], r10
  23086. adc r9, QWORD PTR [r8+48]
  23087. mov r10, QWORD PTR [rdx+56]
  23088. mov QWORD PTR [rcx+48], r9
  23089. adc r10, QWORD PTR [r8+56]
  23090. mov r9, QWORD PTR [rdx+64]
  23091. mov QWORD PTR [rcx+56], r10
  23092. adc r9, QWORD PTR [r8+64]
  23093. mov r10, QWORD PTR [rdx+72]
  23094. mov QWORD PTR [rcx+64], r9
  23095. adc r10, QWORD PTR [r8+72]
  23096. mov r9, QWORD PTR [rdx+80]
  23097. mov QWORD PTR [rcx+72], r10
  23098. adc r9, QWORD PTR [r8+80]
  23099. mov r10, QWORD PTR [rdx+88]
  23100. mov QWORD PTR [rcx+80], r9
  23101. adc r10, QWORD PTR [r8+88]
  23102. mov QWORD PTR [rcx+88], r10
  23103. adc rax, 0
  23104. ret
  23105. sp_3072_add_12 ENDP
  23106. _text ENDS
  23107. ; /* Sub b from a into a. (a -= b)
  23108. ; *
  23109. ; * a A single precision integer and result.
  23110. ; * b A single precision integer.
  23111. ; */
  23112. _text SEGMENT READONLY PARA
  23113. sp_3072_sub_in_place_24 PROC
  23114. mov r8, QWORD PTR [rcx]
  23115. sub r8, QWORD PTR [rdx]
  23116. mov r9, QWORD PTR [rcx+8]
  23117. mov QWORD PTR [rcx], r8
  23118. sbb r9, QWORD PTR [rdx+8]
  23119. mov r8, QWORD PTR [rcx+16]
  23120. mov QWORD PTR [rcx+8], r9
  23121. sbb r8, QWORD PTR [rdx+16]
  23122. mov r9, QWORD PTR [rcx+24]
  23123. mov QWORD PTR [rcx+16], r8
  23124. sbb r9, QWORD PTR [rdx+24]
  23125. mov r8, QWORD PTR [rcx+32]
  23126. mov QWORD PTR [rcx+24], r9
  23127. sbb r8, QWORD PTR [rdx+32]
  23128. mov r9, QWORD PTR [rcx+40]
  23129. mov QWORD PTR [rcx+32], r8
  23130. sbb r9, QWORD PTR [rdx+40]
  23131. mov r8, QWORD PTR [rcx+48]
  23132. mov QWORD PTR [rcx+40], r9
  23133. sbb r8, QWORD PTR [rdx+48]
  23134. mov r9, QWORD PTR [rcx+56]
  23135. mov QWORD PTR [rcx+48], r8
  23136. sbb r9, QWORD PTR [rdx+56]
  23137. mov r8, QWORD PTR [rcx+64]
  23138. mov QWORD PTR [rcx+56], r9
  23139. sbb r8, QWORD PTR [rdx+64]
  23140. mov r9, QWORD PTR [rcx+72]
  23141. mov QWORD PTR [rcx+64], r8
  23142. sbb r9, QWORD PTR [rdx+72]
  23143. mov r8, QWORD PTR [rcx+80]
  23144. mov QWORD PTR [rcx+72], r9
  23145. sbb r8, QWORD PTR [rdx+80]
  23146. mov r9, QWORD PTR [rcx+88]
  23147. mov QWORD PTR [rcx+80], r8
  23148. sbb r9, QWORD PTR [rdx+88]
  23149. mov r8, QWORD PTR [rcx+96]
  23150. mov QWORD PTR [rcx+88], r9
  23151. sbb r8, QWORD PTR [rdx+96]
  23152. mov r9, QWORD PTR [rcx+104]
  23153. mov QWORD PTR [rcx+96], r8
  23154. sbb r9, QWORD PTR [rdx+104]
  23155. mov r8, QWORD PTR [rcx+112]
  23156. mov QWORD PTR [rcx+104], r9
  23157. sbb r8, QWORD PTR [rdx+112]
  23158. mov r9, QWORD PTR [rcx+120]
  23159. mov QWORD PTR [rcx+112], r8
  23160. sbb r9, QWORD PTR [rdx+120]
  23161. mov r8, QWORD PTR [rcx+128]
  23162. mov QWORD PTR [rcx+120], r9
  23163. sbb r8, QWORD PTR [rdx+128]
  23164. mov r9, QWORD PTR [rcx+136]
  23165. mov QWORD PTR [rcx+128], r8
  23166. sbb r9, QWORD PTR [rdx+136]
  23167. mov r8, QWORD PTR [rcx+144]
  23168. mov QWORD PTR [rcx+136], r9
  23169. sbb r8, QWORD PTR [rdx+144]
  23170. mov r9, QWORD PTR [rcx+152]
  23171. mov QWORD PTR [rcx+144], r8
  23172. sbb r9, QWORD PTR [rdx+152]
  23173. mov r8, QWORD PTR [rcx+160]
  23174. mov QWORD PTR [rcx+152], r9
  23175. sbb r8, QWORD PTR [rdx+160]
  23176. mov r9, QWORD PTR [rcx+168]
  23177. mov QWORD PTR [rcx+160], r8
  23178. sbb r9, QWORD PTR [rdx+168]
  23179. mov r8, QWORD PTR [rcx+176]
  23180. mov QWORD PTR [rcx+168], r9
  23181. sbb r8, QWORD PTR [rdx+176]
  23182. mov r9, QWORD PTR [rcx+184]
  23183. mov QWORD PTR [rcx+176], r8
  23184. sbb r9, QWORD PTR [rdx+184]
  23185. mov QWORD PTR [rcx+184], r9
  23186. sbb rax, rax
  23187. ret
  23188. sp_3072_sub_in_place_24 ENDP
  23189. _text ENDS
  23190. ; /* Add b to a into r. (r = a + b)
  23191. ; *
  23192. ; * r A single precision integer.
  23193. ; * a A single precision integer.
  23194. ; * b A single precision integer.
  23195. ; */
  23196. _text SEGMENT READONLY PARA
  23197. sp_3072_add_24 PROC
  23198. ; Add
  23199. mov r9, QWORD PTR [rdx]
  23200. xor rax, rax
  23201. add r9, QWORD PTR [r8]
  23202. mov r10, QWORD PTR [rdx+8]
  23203. mov QWORD PTR [rcx], r9
  23204. adc r10, QWORD PTR [r8+8]
  23205. mov r9, QWORD PTR [rdx+16]
  23206. mov QWORD PTR [rcx+8], r10
  23207. adc r9, QWORD PTR [r8+16]
  23208. mov r10, QWORD PTR [rdx+24]
  23209. mov QWORD PTR [rcx+16], r9
  23210. adc r10, QWORD PTR [r8+24]
  23211. mov r9, QWORD PTR [rdx+32]
  23212. mov QWORD PTR [rcx+24], r10
  23213. adc r9, QWORD PTR [r8+32]
  23214. mov r10, QWORD PTR [rdx+40]
  23215. mov QWORD PTR [rcx+32], r9
  23216. adc r10, QWORD PTR [r8+40]
  23217. mov r9, QWORD PTR [rdx+48]
  23218. mov QWORD PTR [rcx+40], r10
  23219. adc r9, QWORD PTR [r8+48]
  23220. mov r10, QWORD PTR [rdx+56]
  23221. mov QWORD PTR [rcx+48], r9
  23222. adc r10, QWORD PTR [r8+56]
  23223. mov r9, QWORD PTR [rdx+64]
  23224. mov QWORD PTR [rcx+56], r10
  23225. adc r9, QWORD PTR [r8+64]
  23226. mov r10, QWORD PTR [rdx+72]
  23227. mov QWORD PTR [rcx+64], r9
  23228. adc r10, QWORD PTR [r8+72]
  23229. mov r9, QWORD PTR [rdx+80]
  23230. mov QWORD PTR [rcx+72], r10
  23231. adc r9, QWORD PTR [r8+80]
  23232. mov r10, QWORD PTR [rdx+88]
  23233. mov QWORD PTR [rcx+80], r9
  23234. adc r10, QWORD PTR [r8+88]
  23235. mov r9, QWORD PTR [rdx+96]
  23236. mov QWORD PTR [rcx+88], r10
  23237. adc r9, QWORD PTR [r8+96]
  23238. mov r10, QWORD PTR [rdx+104]
  23239. mov QWORD PTR [rcx+96], r9
  23240. adc r10, QWORD PTR [r8+104]
  23241. mov r9, QWORD PTR [rdx+112]
  23242. mov QWORD PTR [rcx+104], r10
  23243. adc r9, QWORD PTR [r8+112]
  23244. mov r10, QWORD PTR [rdx+120]
  23245. mov QWORD PTR [rcx+112], r9
  23246. adc r10, QWORD PTR [r8+120]
  23247. mov r9, QWORD PTR [rdx+128]
  23248. mov QWORD PTR [rcx+120], r10
  23249. adc r9, QWORD PTR [r8+128]
  23250. mov r10, QWORD PTR [rdx+136]
  23251. mov QWORD PTR [rcx+128], r9
  23252. adc r10, QWORD PTR [r8+136]
  23253. mov r9, QWORD PTR [rdx+144]
  23254. mov QWORD PTR [rcx+136], r10
  23255. adc r9, QWORD PTR [r8+144]
  23256. mov r10, QWORD PTR [rdx+152]
  23257. mov QWORD PTR [rcx+144], r9
  23258. adc r10, QWORD PTR [r8+152]
  23259. mov r9, QWORD PTR [rdx+160]
  23260. mov QWORD PTR [rcx+152], r10
  23261. adc r9, QWORD PTR [r8+160]
  23262. mov r10, QWORD PTR [rdx+168]
  23263. mov QWORD PTR [rcx+160], r9
  23264. adc r10, QWORD PTR [r8+168]
  23265. mov r9, QWORD PTR [rdx+176]
  23266. mov QWORD PTR [rcx+168], r10
  23267. adc r9, QWORD PTR [r8+176]
  23268. mov r10, QWORD PTR [rdx+184]
  23269. mov QWORD PTR [rcx+176], r9
  23270. adc r10, QWORD PTR [r8+184]
  23271. mov QWORD PTR [rcx+184], r10
  23272. adc rax, 0
  23273. ret
  23274. sp_3072_add_24 ENDP
  23275. _text ENDS
  23276. ; /* Multiply a and b into r. (r = a * b)
  23277. ; *
  23278. ; * r A single precision integer.
  23279. ; * a A single precision integer.
  23280. ; * b A single precision integer.
  23281. ; */
  23282. _text SEGMENT READONLY PARA
  23283. sp_3072_mul_24 PROC
  23284. push r12
  23285. push r13
  23286. push r14
  23287. push r15
  23288. push rdi
  23289. push rsi
  23290. sub rsp, 616
  23291. mov QWORD PTR [rsp+576], rcx
  23292. mov QWORD PTR [rsp+584], rdx
  23293. mov QWORD PTR [rsp+592], r8
  23294. lea r12, QWORD PTR [rsp+384]
  23295. lea r14, QWORD PTR [rdx+96]
  23296. ; Add
  23297. mov rax, QWORD PTR [rdx]
  23298. xor r15, r15
  23299. add rax, QWORD PTR [r14]
  23300. mov r9, QWORD PTR [rdx+8]
  23301. mov QWORD PTR [r12], rax
  23302. adc r9, QWORD PTR [r14+8]
  23303. mov r10, QWORD PTR [rdx+16]
  23304. mov QWORD PTR [r12+8], r9
  23305. adc r10, QWORD PTR [r14+16]
  23306. mov rax, QWORD PTR [rdx+24]
  23307. mov QWORD PTR [r12+16], r10
  23308. adc rax, QWORD PTR [r14+24]
  23309. mov r9, QWORD PTR [rdx+32]
  23310. mov QWORD PTR [r12+24], rax
  23311. adc r9, QWORD PTR [r14+32]
  23312. mov r10, QWORD PTR [rdx+40]
  23313. mov QWORD PTR [r12+32], r9
  23314. adc r10, QWORD PTR [r14+40]
  23315. mov rax, QWORD PTR [rdx+48]
  23316. mov QWORD PTR [r12+40], r10
  23317. adc rax, QWORD PTR [r14+48]
  23318. mov r9, QWORD PTR [rdx+56]
  23319. mov QWORD PTR [r12+48], rax
  23320. adc r9, QWORD PTR [r14+56]
  23321. mov r10, QWORD PTR [rdx+64]
  23322. mov QWORD PTR [r12+56], r9
  23323. adc r10, QWORD PTR [r14+64]
  23324. mov rax, QWORD PTR [rdx+72]
  23325. mov QWORD PTR [r12+64], r10
  23326. adc rax, QWORD PTR [r14+72]
  23327. mov r9, QWORD PTR [rdx+80]
  23328. mov QWORD PTR [r12+72], rax
  23329. adc r9, QWORD PTR [r14+80]
  23330. mov r10, QWORD PTR [rdx+88]
  23331. mov QWORD PTR [r12+80], r9
  23332. adc r10, QWORD PTR [r14+88]
  23333. mov QWORD PTR [r12+88], r10
  23334. adc r15, 0
  23335. mov QWORD PTR [rsp+600], r15
  23336. lea r13, QWORD PTR [rsp+480]
  23337. lea r14, QWORD PTR [r8+96]
  23338. ; Add
  23339. mov rax, QWORD PTR [r8]
  23340. xor rdi, rdi
  23341. add rax, QWORD PTR [r14]
  23342. mov r9, QWORD PTR [r8+8]
  23343. mov QWORD PTR [r13], rax
  23344. adc r9, QWORD PTR [r14+8]
  23345. mov r10, QWORD PTR [r8+16]
  23346. mov QWORD PTR [r13+8], r9
  23347. adc r10, QWORD PTR [r14+16]
  23348. mov rax, QWORD PTR [r8+24]
  23349. mov QWORD PTR [r13+16], r10
  23350. adc rax, QWORD PTR [r14+24]
  23351. mov r9, QWORD PTR [r8+32]
  23352. mov QWORD PTR [r13+24], rax
  23353. adc r9, QWORD PTR [r14+32]
  23354. mov r10, QWORD PTR [r8+40]
  23355. mov QWORD PTR [r13+32], r9
  23356. adc r10, QWORD PTR [r14+40]
  23357. mov rax, QWORD PTR [r8+48]
  23358. mov QWORD PTR [r13+40], r10
  23359. adc rax, QWORD PTR [r14+48]
  23360. mov r9, QWORD PTR [r8+56]
  23361. mov QWORD PTR [r13+48], rax
  23362. adc r9, QWORD PTR [r14+56]
  23363. mov r10, QWORD PTR [r8+64]
  23364. mov QWORD PTR [r13+56], r9
  23365. adc r10, QWORD PTR [r14+64]
  23366. mov rax, QWORD PTR [r8+72]
  23367. mov QWORD PTR [r13+64], r10
  23368. adc rax, QWORD PTR [r14+72]
  23369. mov r9, QWORD PTR [r8+80]
  23370. mov QWORD PTR [r13+72], rax
  23371. adc r9, QWORD PTR [r14+80]
  23372. mov r10, QWORD PTR [r8+88]
  23373. mov QWORD PTR [r13+80], r9
  23374. adc r10, QWORD PTR [r14+88]
  23375. mov QWORD PTR [r13+88], r10
  23376. adc rdi, 0
  23377. mov QWORD PTR [rsp+608], rdi
  23378. mov r8, r13
  23379. mov rdx, r12
  23380. mov rcx, rsp
  23381. call sp_3072_mul_12
  23382. mov r8, QWORD PTR [rsp+592]
  23383. mov rdx, QWORD PTR [rsp+584]
  23384. lea rcx, QWORD PTR [rsp+192]
  23385. add r8, 96
  23386. add rdx, 96
  23387. call sp_3072_mul_12
  23388. mov r8, QWORD PTR [rsp+592]
  23389. mov rdx, QWORD PTR [rsp+584]
  23390. mov rcx, QWORD PTR [rsp+576]
  23391. call sp_3072_mul_12
  23392. IFDEF _WIN64
  23393. mov r8, QWORD PTR [rsp+592]
  23394. mov rdx, QWORD PTR [rsp+584]
  23395. mov rcx, QWORD PTR [rsp+576]
  23396. ENDIF
  23397. mov r15, QWORD PTR [rsp+600]
  23398. mov rdi, QWORD PTR [rsp+608]
  23399. mov rsi, QWORD PTR [rsp+576]
  23400. mov r11, r15
  23401. lea r12, QWORD PTR [rsp+384]
  23402. lea r13, QWORD PTR [rsp+480]
  23403. and r11, rdi
  23404. neg r15
  23405. neg rdi
  23406. add rsi, 192
  23407. mov rax, QWORD PTR [r12]
  23408. mov r9, QWORD PTR [r13]
  23409. and rax, rdi
  23410. and r9, r15
  23411. mov QWORD PTR [r12], rax
  23412. mov QWORD PTR [r13], r9
  23413. mov rax, QWORD PTR [r12+8]
  23414. mov r9, QWORD PTR [r13+8]
  23415. and rax, rdi
  23416. and r9, r15
  23417. mov QWORD PTR [r12+8], rax
  23418. mov QWORD PTR [r13+8], r9
  23419. mov rax, QWORD PTR [r12+16]
  23420. mov r9, QWORD PTR [r13+16]
  23421. and rax, rdi
  23422. and r9, r15
  23423. mov QWORD PTR [r12+16], rax
  23424. mov QWORD PTR [r13+16], r9
  23425. mov rax, QWORD PTR [r12+24]
  23426. mov r9, QWORD PTR [r13+24]
  23427. and rax, rdi
  23428. and r9, r15
  23429. mov QWORD PTR [r12+24], rax
  23430. mov QWORD PTR [r13+24], r9
  23431. mov rax, QWORD PTR [r12+32]
  23432. mov r9, QWORD PTR [r13+32]
  23433. and rax, rdi
  23434. and r9, r15
  23435. mov QWORD PTR [r12+32], rax
  23436. mov QWORD PTR [r13+32], r9
  23437. mov rax, QWORD PTR [r12+40]
  23438. mov r9, QWORD PTR [r13+40]
  23439. and rax, rdi
  23440. and r9, r15
  23441. mov QWORD PTR [r12+40], rax
  23442. mov QWORD PTR [r13+40], r9
  23443. mov rax, QWORD PTR [r12+48]
  23444. mov r9, QWORD PTR [r13+48]
  23445. and rax, rdi
  23446. and r9, r15
  23447. mov QWORD PTR [r12+48], rax
  23448. mov QWORD PTR [r13+48], r9
  23449. mov rax, QWORD PTR [r12+56]
  23450. mov r9, QWORD PTR [r13+56]
  23451. and rax, rdi
  23452. and r9, r15
  23453. mov QWORD PTR [r12+56], rax
  23454. mov QWORD PTR [r13+56], r9
  23455. mov rax, QWORD PTR [r12+64]
  23456. mov r9, QWORD PTR [r13+64]
  23457. and rax, rdi
  23458. and r9, r15
  23459. mov QWORD PTR [r12+64], rax
  23460. mov QWORD PTR [r13+64], r9
  23461. mov rax, QWORD PTR [r12+72]
  23462. mov r9, QWORD PTR [r13+72]
  23463. and rax, rdi
  23464. and r9, r15
  23465. mov QWORD PTR [r12+72], rax
  23466. mov QWORD PTR [r13+72], r9
  23467. mov rax, QWORD PTR [r12+80]
  23468. mov r9, QWORD PTR [r13+80]
  23469. and rax, rdi
  23470. and r9, r15
  23471. mov QWORD PTR [r12+80], rax
  23472. mov QWORD PTR [r13+80], r9
  23473. mov rax, QWORD PTR [r12+88]
  23474. mov r9, QWORD PTR [r13+88]
  23475. and rax, rdi
  23476. and r9, r15
  23477. mov QWORD PTR [r12+88], rax
  23478. mov QWORD PTR [r13+88], r9
  23479. mov rax, QWORD PTR [r12]
  23480. add rax, QWORD PTR [r13]
  23481. mov r9, QWORD PTR [r12+8]
  23482. mov QWORD PTR [rsi], rax
  23483. adc r9, QWORD PTR [r13+8]
  23484. mov r10, QWORD PTR [r12+16]
  23485. mov QWORD PTR [rsi+8], r9
  23486. adc r10, QWORD PTR [r13+16]
  23487. mov rax, QWORD PTR [r12+24]
  23488. mov QWORD PTR [rsi+16], r10
  23489. adc rax, QWORD PTR [r13+24]
  23490. mov r9, QWORD PTR [r12+32]
  23491. mov QWORD PTR [rsi+24], rax
  23492. adc r9, QWORD PTR [r13+32]
  23493. mov r10, QWORD PTR [r12+40]
  23494. mov QWORD PTR [rsi+32], r9
  23495. adc r10, QWORD PTR [r13+40]
  23496. mov rax, QWORD PTR [r12+48]
  23497. mov QWORD PTR [rsi+40], r10
  23498. adc rax, QWORD PTR [r13+48]
  23499. mov r9, QWORD PTR [r12+56]
  23500. mov QWORD PTR [rsi+48], rax
  23501. adc r9, QWORD PTR [r13+56]
  23502. mov r10, QWORD PTR [r12+64]
  23503. mov QWORD PTR [rsi+56], r9
  23504. adc r10, QWORD PTR [r13+64]
  23505. mov rax, QWORD PTR [r12+72]
  23506. mov QWORD PTR [rsi+64], r10
  23507. adc rax, QWORD PTR [r13+72]
  23508. mov r9, QWORD PTR [r12+80]
  23509. mov QWORD PTR [rsi+72], rax
  23510. adc r9, QWORD PTR [r13+80]
  23511. mov r10, QWORD PTR [r12+88]
  23512. mov QWORD PTR [rsi+80], r9
  23513. adc r10, QWORD PTR [r13+88]
  23514. mov QWORD PTR [rsi+88], r10
  23515. adc r11, 0
  23516. lea r13, QWORD PTR [rsp+192]
  23517. mov r12, rsp
  23518. mov rax, QWORD PTR [r12]
  23519. sub rax, QWORD PTR [r13]
  23520. mov r9, QWORD PTR [r12+8]
  23521. mov QWORD PTR [r12], rax
  23522. sbb r9, QWORD PTR [r13+8]
  23523. mov r10, QWORD PTR [r12+16]
  23524. mov QWORD PTR [r12+8], r9
  23525. sbb r10, QWORD PTR [r13+16]
  23526. mov rax, QWORD PTR [r12+24]
  23527. mov QWORD PTR [r12+16], r10
  23528. sbb rax, QWORD PTR [r13+24]
  23529. mov r9, QWORD PTR [r12+32]
  23530. mov QWORD PTR [r12+24], rax
  23531. sbb r9, QWORD PTR [r13+32]
  23532. mov r10, QWORD PTR [r12+40]
  23533. mov QWORD PTR [r12+32], r9
  23534. sbb r10, QWORD PTR [r13+40]
  23535. mov rax, QWORD PTR [r12+48]
  23536. mov QWORD PTR [r12+40], r10
  23537. sbb rax, QWORD PTR [r13+48]
  23538. mov r9, QWORD PTR [r12+56]
  23539. mov QWORD PTR [r12+48], rax
  23540. sbb r9, QWORD PTR [r13+56]
  23541. mov r10, QWORD PTR [r12+64]
  23542. mov QWORD PTR [r12+56], r9
  23543. sbb r10, QWORD PTR [r13+64]
  23544. mov rax, QWORD PTR [r12+72]
  23545. mov QWORD PTR [r12+64], r10
  23546. sbb rax, QWORD PTR [r13+72]
  23547. mov r9, QWORD PTR [r12+80]
  23548. mov QWORD PTR [r12+72], rax
  23549. sbb r9, QWORD PTR [r13+80]
  23550. mov r10, QWORD PTR [r12+88]
  23551. mov QWORD PTR [r12+80], r9
  23552. sbb r10, QWORD PTR [r13+88]
  23553. mov rax, QWORD PTR [r12+96]
  23554. mov QWORD PTR [r12+88], r10
  23555. sbb rax, QWORD PTR [r13+96]
  23556. mov r9, QWORD PTR [r12+104]
  23557. mov QWORD PTR [r12+96], rax
  23558. sbb r9, QWORD PTR [r13+104]
  23559. mov r10, QWORD PTR [r12+112]
  23560. mov QWORD PTR [r12+104], r9
  23561. sbb r10, QWORD PTR [r13+112]
  23562. mov rax, QWORD PTR [r12+120]
  23563. mov QWORD PTR [r12+112], r10
  23564. sbb rax, QWORD PTR [r13+120]
  23565. mov r9, QWORD PTR [r12+128]
  23566. mov QWORD PTR [r12+120], rax
  23567. sbb r9, QWORD PTR [r13+128]
  23568. mov r10, QWORD PTR [r12+136]
  23569. mov QWORD PTR [r12+128], r9
  23570. sbb r10, QWORD PTR [r13+136]
  23571. mov rax, QWORD PTR [r12+144]
  23572. mov QWORD PTR [r12+136], r10
  23573. sbb rax, QWORD PTR [r13+144]
  23574. mov r9, QWORD PTR [r12+152]
  23575. mov QWORD PTR [r12+144], rax
  23576. sbb r9, QWORD PTR [r13+152]
  23577. mov r10, QWORD PTR [r12+160]
  23578. mov QWORD PTR [r12+152], r9
  23579. sbb r10, QWORD PTR [r13+160]
  23580. mov rax, QWORD PTR [r12+168]
  23581. mov QWORD PTR [r12+160], r10
  23582. sbb rax, QWORD PTR [r13+168]
  23583. mov r9, QWORD PTR [r12+176]
  23584. mov QWORD PTR [r12+168], rax
  23585. sbb r9, QWORD PTR [r13+176]
  23586. mov r10, QWORD PTR [r12+184]
  23587. mov QWORD PTR [r12+176], r9
  23588. sbb r10, QWORD PTR [r13+184]
  23589. mov QWORD PTR [r12+184], r10
  23590. sbb r11, 0
  23591. mov rax, QWORD PTR [r12]
  23592. sub rax, QWORD PTR [rcx]
  23593. mov r9, QWORD PTR [r12+8]
  23594. mov QWORD PTR [r12], rax
  23595. sbb r9, QWORD PTR [rcx+8]
  23596. mov r10, QWORD PTR [r12+16]
  23597. mov QWORD PTR [r12+8], r9
  23598. sbb r10, QWORD PTR [rcx+16]
  23599. mov rax, QWORD PTR [r12+24]
  23600. mov QWORD PTR [r12+16], r10
  23601. sbb rax, QWORD PTR [rcx+24]
  23602. mov r9, QWORD PTR [r12+32]
  23603. mov QWORD PTR [r12+24], rax
  23604. sbb r9, QWORD PTR [rcx+32]
  23605. mov r10, QWORD PTR [r12+40]
  23606. mov QWORD PTR [r12+32], r9
  23607. sbb r10, QWORD PTR [rcx+40]
  23608. mov rax, QWORD PTR [r12+48]
  23609. mov QWORD PTR [r12+40], r10
  23610. sbb rax, QWORD PTR [rcx+48]
  23611. mov r9, QWORD PTR [r12+56]
  23612. mov QWORD PTR [r12+48], rax
  23613. sbb r9, QWORD PTR [rcx+56]
  23614. mov r10, QWORD PTR [r12+64]
  23615. mov QWORD PTR [r12+56], r9
  23616. sbb r10, QWORD PTR [rcx+64]
  23617. mov rax, QWORD PTR [r12+72]
  23618. mov QWORD PTR [r12+64], r10
  23619. sbb rax, QWORD PTR [rcx+72]
  23620. mov r9, QWORD PTR [r12+80]
  23621. mov QWORD PTR [r12+72], rax
  23622. sbb r9, QWORD PTR [rcx+80]
  23623. mov r10, QWORD PTR [r12+88]
  23624. mov QWORD PTR [r12+80], r9
  23625. sbb r10, QWORD PTR [rcx+88]
  23626. mov rax, QWORD PTR [r12+96]
  23627. mov QWORD PTR [r12+88], r10
  23628. sbb rax, QWORD PTR [rcx+96]
  23629. mov r9, QWORD PTR [r12+104]
  23630. mov QWORD PTR [r12+96], rax
  23631. sbb r9, QWORD PTR [rcx+104]
  23632. mov r10, QWORD PTR [r12+112]
  23633. mov QWORD PTR [r12+104], r9
  23634. sbb r10, QWORD PTR [rcx+112]
  23635. mov rax, QWORD PTR [r12+120]
  23636. mov QWORD PTR [r12+112], r10
  23637. sbb rax, QWORD PTR [rcx+120]
  23638. mov r9, QWORD PTR [r12+128]
  23639. mov QWORD PTR [r12+120], rax
  23640. sbb r9, QWORD PTR [rcx+128]
  23641. mov r10, QWORD PTR [r12+136]
  23642. mov QWORD PTR [r12+128], r9
  23643. sbb r10, QWORD PTR [rcx+136]
  23644. mov rax, QWORD PTR [r12+144]
  23645. mov QWORD PTR [r12+136], r10
  23646. sbb rax, QWORD PTR [rcx+144]
  23647. mov r9, QWORD PTR [r12+152]
  23648. mov QWORD PTR [r12+144], rax
  23649. sbb r9, QWORD PTR [rcx+152]
  23650. mov r10, QWORD PTR [r12+160]
  23651. mov QWORD PTR [r12+152], r9
  23652. sbb r10, QWORD PTR [rcx+160]
  23653. mov rax, QWORD PTR [r12+168]
  23654. mov QWORD PTR [r12+160], r10
  23655. sbb rax, QWORD PTR [rcx+168]
  23656. mov r9, QWORD PTR [r12+176]
  23657. mov QWORD PTR [r12+168], rax
  23658. sbb r9, QWORD PTR [rcx+176]
  23659. mov r10, QWORD PTR [r12+184]
  23660. mov QWORD PTR [r12+176], r9
  23661. sbb r10, QWORD PTR [rcx+184]
  23662. mov QWORD PTR [r12+184], r10
  23663. sbb r11, 0
  23664. sub rsi, 96
  23665. ; Add
  23666. mov rax, QWORD PTR [rsi]
  23667. add rax, QWORD PTR [r12]
  23668. mov r9, QWORD PTR [rsi+8]
  23669. mov QWORD PTR [rsi], rax
  23670. adc r9, QWORD PTR [r12+8]
  23671. mov r10, QWORD PTR [rsi+16]
  23672. mov QWORD PTR [rsi+8], r9
  23673. adc r10, QWORD PTR [r12+16]
  23674. mov rax, QWORD PTR [rsi+24]
  23675. mov QWORD PTR [rsi+16], r10
  23676. adc rax, QWORD PTR [r12+24]
  23677. mov r9, QWORD PTR [rsi+32]
  23678. mov QWORD PTR [rsi+24], rax
  23679. adc r9, QWORD PTR [r12+32]
  23680. mov r10, QWORD PTR [rsi+40]
  23681. mov QWORD PTR [rsi+32], r9
  23682. adc r10, QWORD PTR [r12+40]
  23683. mov rax, QWORD PTR [rsi+48]
  23684. mov QWORD PTR [rsi+40], r10
  23685. adc rax, QWORD PTR [r12+48]
  23686. mov r9, QWORD PTR [rsi+56]
  23687. mov QWORD PTR [rsi+48], rax
  23688. adc r9, QWORD PTR [r12+56]
  23689. mov r10, QWORD PTR [rsi+64]
  23690. mov QWORD PTR [rsi+56], r9
  23691. adc r10, QWORD PTR [r12+64]
  23692. mov rax, QWORD PTR [rsi+72]
  23693. mov QWORD PTR [rsi+64], r10
  23694. adc rax, QWORD PTR [r12+72]
  23695. mov r9, QWORD PTR [rsi+80]
  23696. mov QWORD PTR [rsi+72], rax
  23697. adc r9, QWORD PTR [r12+80]
  23698. mov r10, QWORD PTR [rsi+88]
  23699. mov QWORD PTR [rsi+80], r9
  23700. adc r10, QWORD PTR [r12+88]
  23701. mov rax, QWORD PTR [rsi+96]
  23702. mov QWORD PTR [rsi+88], r10
  23703. adc rax, QWORD PTR [r12+96]
  23704. mov r9, QWORD PTR [rsi+104]
  23705. mov QWORD PTR [rsi+96], rax
  23706. adc r9, QWORD PTR [r12+104]
  23707. mov r10, QWORD PTR [rsi+112]
  23708. mov QWORD PTR [rsi+104], r9
  23709. adc r10, QWORD PTR [r12+112]
  23710. mov rax, QWORD PTR [rsi+120]
  23711. mov QWORD PTR [rsi+112], r10
  23712. adc rax, QWORD PTR [r12+120]
  23713. mov r9, QWORD PTR [rsi+128]
  23714. mov QWORD PTR [rsi+120], rax
  23715. adc r9, QWORD PTR [r12+128]
  23716. mov r10, QWORD PTR [rsi+136]
  23717. mov QWORD PTR [rsi+128], r9
  23718. adc r10, QWORD PTR [r12+136]
  23719. mov rax, QWORD PTR [rsi+144]
  23720. mov QWORD PTR [rsi+136], r10
  23721. adc rax, QWORD PTR [r12+144]
  23722. mov r9, QWORD PTR [rsi+152]
  23723. mov QWORD PTR [rsi+144], rax
  23724. adc r9, QWORD PTR [r12+152]
  23725. mov r10, QWORD PTR [rsi+160]
  23726. mov QWORD PTR [rsi+152], r9
  23727. adc r10, QWORD PTR [r12+160]
  23728. mov rax, QWORD PTR [rsi+168]
  23729. mov QWORD PTR [rsi+160], r10
  23730. adc rax, QWORD PTR [r12+168]
  23731. mov r9, QWORD PTR [rsi+176]
  23732. mov QWORD PTR [rsi+168], rax
  23733. adc r9, QWORD PTR [r12+176]
  23734. mov r10, QWORD PTR [rsi+184]
  23735. mov QWORD PTR [rsi+176], r9
  23736. adc r10, QWORD PTR [r12+184]
  23737. mov QWORD PTR [rsi+184], r10
  23738. adc r11, 0
  23739. mov QWORD PTR [rcx+288], r11
  23740. add rsi, 96
  23741. ; Add
  23742. mov rax, QWORD PTR [rsi]
  23743. add rax, QWORD PTR [r13]
  23744. mov r9, QWORD PTR [rsi+8]
  23745. mov QWORD PTR [rsi], rax
  23746. adc r9, QWORD PTR [r13+8]
  23747. mov r10, QWORD PTR [rsi+16]
  23748. mov QWORD PTR [rsi+8], r9
  23749. adc r10, QWORD PTR [r13+16]
  23750. mov rax, QWORD PTR [rsi+24]
  23751. mov QWORD PTR [rsi+16], r10
  23752. adc rax, QWORD PTR [r13+24]
  23753. mov r9, QWORD PTR [rsi+32]
  23754. mov QWORD PTR [rsi+24], rax
  23755. adc r9, QWORD PTR [r13+32]
  23756. mov r10, QWORD PTR [rsi+40]
  23757. mov QWORD PTR [rsi+32], r9
  23758. adc r10, QWORD PTR [r13+40]
  23759. mov rax, QWORD PTR [rsi+48]
  23760. mov QWORD PTR [rsi+40], r10
  23761. adc rax, QWORD PTR [r13+48]
  23762. mov r9, QWORD PTR [rsi+56]
  23763. mov QWORD PTR [rsi+48], rax
  23764. adc r9, QWORD PTR [r13+56]
  23765. mov r10, QWORD PTR [rsi+64]
  23766. mov QWORD PTR [rsi+56], r9
  23767. adc r10, QWORD PTR [r13+64]
  23768. mov rax, QWORD PTR [rsi+72]
  23769. mov QWORD PTR [rsi+64], r10
  23770. adc rax, QWORD PTR [r13+72]
  23771. mov r9, QWORD PTR [rsi+80]
  23772. mov QWORD PTR [rsi+72], rax
  23773. adc r9, QWORD PTR [r13+80]
  23774. mov r10, QWORD PTR [rsi+88]
  23775. mov QWORD PTR [rsi+80], r9
  23776. adc r10, QWORD PTR [r13+88]
  23777. mov rax, QWORD PTR [rsi+96]
  23778. mov QWORD PTR [rsi+88], r10
  23779. adc rax, QWORD PTR [r13+96]
  23780. mov QWORD PTR [rsi+96], rax
  23781. ; Add to zero
  23782. mov rax, QWORD PTR [r13+104]
  23783. adc rax, 0
  23784. mov r9, QWORD PTR [r13+112]
  23785. mov QWORD PTR [rsi+104], rax
  23786. adc r9, 0
  23787. mov r10, QWORD PTR [r13+120]
  23788. mov QWORD PTR [rsi+112], r9
  23789. adc r10, 0
  23790. mov rax, QWORD PTR [r13+128]
  23791. mov QWORD PTR [rsi+120], r10
  23792. adc rax, 0
  23793. mov r9, QWORD PTR [r13+136]
  23794. mov QWORD PTR [rsi+128], rax
  23795. adc r9, 0
  23796. mov r10, QWORD PTR [r13+144]
  23797. mov QWORD PTR [rsi+136], r9
  23798. adc r10, 0
  23799. mov rax, QWORD PTR [r13+152]
  23800. mov QWORD PTR [rsi+144], r10
  23801. adc rax, 0
  23802. mov r9, QWORD PTR [r13+160]
  23803. mov QWORD PTR [rsi+152], rax
  23804. adc r9, 0
  23805. mov r10, QWORD PTR [r13+168]
  23806. mov QWORD PTR [rsi+160], r9
  23807. adc r10, 0
  23808. mov rax, QWORD PTR [r13+176]
  23809. mov QWORD PTR [rsi+168], r10
  23810. adc rax, 0
  23811. mov r9, QWORD PTR [r13+184]
  23812. mov QWORD PTR [rsi+176], rax
  23813. adc r9, 0
  23814. mov QWORD PTR [rsi+184], r9
  23815. add rsp, 616
  23816. pop rsi
  23817. pop rdi
  23818. pop r15
  23819. pop r14
  23820. pop r13
  23821. pop r12
  23822. ret
  23823. sp_3072_mul_24 ENDP
  23824. _text ENDS
  23825. IFDEF HAVE_INTEL_AVX2
  23826. ; /* Multiply a and b into r. (r = a * b)
  23827. ; *
  23828. ; * r A single precision integer.
  23829. ; * a A single precision integer.
  23830. ; * b A single precision integer.
  23831. ; */
  23832. _text SEGMENT READONLY PARA
  23833. sp_3072_mul_avx2_24 PROC
  23834. push r12
  23835. push r13
  23836. push r14
  23837. push r15
  23838. push rdi
  23839. push rsi
  23840. sub rsp, 616
  23841. mov QWORD PTR [rsp+576], rcx
  23842. mov QWORD PTR [rsp+584], rdx
  23843. mov QWORD PTR [rsp+592], r8
  23844. lea r12, QWORD PTR [rsp+384]
  23845. lea r14, QWORD PTR [rdx+96]
  23846. ; Add
  23847. mov rax, QWORD PTR [rdx]
  23848. xor r15, r15
  23849. add rax, QWORD PTR [r14]
  23850. mov r9, QWORD PTR [rdx+8]
  23851. mov QWORD PTR [r12], rax
  23852. adc r9, QWORD PTR [r14+8]
  23853. mov r10, QWORD PTR [rdx+16]
  23854. mov QWORD PTR [r12+8], r9
  23855. adc r10, QWORD PTR [r14+16]
  23856. mov rax, QWORD PTR [rdx+24]
  23857. mov QWORD PTR [r12+16], r10
  23858. adc rax, QWORD PTR [r14+24]
  23859. mov r9, QWORD PTR [rdx+32]
  23860. mov QWORD PTR [r12+24], rax
  23861. adc r9, QWORD PTR [r14+32]
  23862. mov r10, QWORD PTR [rdx+40]
  23863. mov QWORD PTR [r12+32], r9
  23864. adc r10, QWORD PTR [r14+40]
  23865. mov rax, QWORD PTR [rdx+48]
  23866. mov QWORD PTR [r12+40], r10
  23867. adc rax, QWORD PTR [r14+48]
  23868. mov r9, QWORD PTR [rdx+56]
  23869. mov QWORD PTR [r12+48], rax
  23870. adc r9, QWORD PTR [r14+56]
  23871. mov r10, QWORD PTR [rdx+64]
  23872. mov QWORD PTR [r12+56], r9
  23873. adc r10, QWORD PTR [r14+64]
  23874. mov rax, QWORD PTR [rdx+72]
  23875. mov QWORD PTR [r12+64], r10
  23876. adc rax, QWORD PTR [r14+72]
  23877. mov r9, QWORD PTR [rdx+80]
  23878. mov QWORD PTR [r12+72], rax
  23879. adc r9, QWORD PTR [r14+80]
  23880. mov r10, QWORD PTR [rdx+88]
  23881. mov QWORD PTR [r12+80], r9
  23882. adc r10, QWORD PTR [r14+88]
  23883. mov QWORD PTR [r12+88], r10
  23884. adc r15, 0
  23885. mov QWORD PTR [rsp+600], r15
  23886. lea r13, QWORD PTR [rsp+480]
  23887. lea r14, QWORD PTR [r8+96]
  23888. ; Add
  23889. mov rax, QWORD PTR [r8]
  23890. xor rdi, rdi
  23891. add rax, QWORD PTR [r14]
  23892. mov r9, QWORD PTR [r8+8]
  23893. mov QWORD PTR [r13], rax
  23894. adc r9, QWORD PTR [r14+8]
  23895. mov r10, QWORD PTR [r8+16]
  23896. mov QWORD PTR [r13+8], r9
  23897. adc r10, QWORD PTR [r14+16]
  23898. mov rax, QWORD PTR [r8+24]
  23899. mov QWORD PTR [r13+16], r10
  23900. adc rax, QWORD PTR [r14+24]
  23901. mov r9, QWORD PTR [r8+32]
  23902. mov QWORD PTR [r13+24], rax
  23903. adc r9, QWORD PTR [r14+32]
  23904. mov r10, QWORD PTR [r8+40]
  23905. mov QWORD PTR [r13+32], r9
  23906. adc r10, QWORD PTR [r14+40]
  23907. mov rax, QWORD PTR [r8+48]
  23908. mov QWORD PTR [r13+40], r10
  23909. adc rax, QWORD PTR [r14+48]
  23910. mov r9, QWORD PTR [r8+56]
  23911. mov QWORD PTR [r13+48], rax
  23912. adc r9, QWORD PTR [r14+56]
  23913. mov r10, QWORD PTR [r8+64]
  23914. mov QWORD PTR [r13+56], r9
  23915. adc r10, QWORD PTR [r14+64]
  23916. mov rax, QWORD PTR [r8+72]
  23917. mov QWORD PTR [r13+64], r10
  23918. adc rax, QWORD PTR [r14+72]
  23919. mov r9, QWORD PTR [r8+80]
  23920. mov QWORD PTR [r13+72], rax
  23921. adc r9, QWORD PTR [r14+80]
  23922. mov r10, QWORD PTR [r8+88]
  23923. mov QWORD PTR [r13+80], r9
  23924. adc r10, QWORD PTR [r14+88]
  23925. mov QWORD PTR [r13+88], r10
  23926. adc rdi, 0
  23927. mov QWORD PTR [rsp+608], rdi
  23928. mov r8, r13
  23929. mov rdx, r12
  23930. mov rcx, rsp
  23931. call sp_3072_mul_avx2_12
  23932. mov r8, QWORD PTR [rsp+592]
  23933. mov rdx, QWORD PTR [rsp+584]
  23934. lea rcx, QWORD PTR [rsp+192]
  23935. add r8, 96
  23936. add rdx, 96
  23937. call sp_3072_mul_avx2_12
  23938. mov r8, QWORD PTR [rsp+592]
  23939. mov rdx, QWORD PTR [rsp+584]
  23940. mov rcx, QWORD PTR [rsp+576]
  23941. call sp_3072_mul_avx2_12
  23942. IFDEF _WIN64
  23943. mov r8, QWORD PTR [rsp+592]
  23944. mov rdx, QWORD PTR [rsp+584]
  23945. mov rcx, QWORD PTR [rsp+576]
  23946. ENDIF
  23947. mov r15, QWORD PTR [rsp+600]
  23948. mov rdi, QWORD PTR [rsp+608]
  23949. mov rsi, QWORD PTR [rsp+576]
  23950. mov r11, r15
  23951. lea r12, QWORD PTR [rsp+384]
  23952. lea r13, QWORD PTR [rsp+480]
  23953. and r11, rdi
  23954. neg r15
  23955. neg rdi
  23956. add rsi, 192
  23957. mov rax, QWORD PTR [r12]
  23958. mov r9, QWORD PTR [r13]
  23959. pext rax, rax, rdi
  23960. pext r9, r9, r15
  23961. add rax, r9
  23962. mov r9, QWORD PTR [r12+8]
  23963. mov r10, QWORD PTR [r13+8]
  23964. pext r9, r9, rdi
  23965. pext r10, r10, r15
  23966. mov QWORD PTR [rsi], rax
  23967. adc r9, r10
  23968. mov r10, QWORD PTR [r12+16]
  23969. mov rax, QWORD PTR [r13+16]
  23970. pext r10, r10, rdi
  23971. pext rax, rax, r15
  23972. mov QWORD PTR [rsi+8], r9
  23973. adc r10, rax
  23974. mov rax, QWORD PTR [r12+24]
  23975. mov r9, QWORD PTR [r13+24]
  23976. pext rax, rax, rdi
  23977. pext r9, r9, r15
  23978. mov QWORD PTR [rsi+16], r10
  23979. adc rax, r9
  23980. mov r9, QWORD PTR [r12+32]
  23981. mov r10, QWORD PTR [r13+32]
  23982. pext r9, r9, rdi
  23983. pext r10, r10, r15
  23984. mov QWORD PTR [rsi+24], rax
  23985. adc r9, r10
  23986. mov r10, QWORD PTR [r12+40]
  23987. mov rax, QWORD PTR [r13+40]
  23988. pext r10, r10, rdi
  23989. pext rax, rax, r15
  23990. mov QWORD PTR [rsi+32], r9
  23991. adc r10, rax
  23992. mov rax, QWORD PTR [r12+48]
  23993. mov r9, QWORD PTR [r13+48]
  23994. pext rax, rax, rdi
  23995. pext r9, r9, r15
  23996. mov QWORD PTR [rsi+40], r10
  23997. adc rax, r9
  23998. mov r9, QWORD PTR [r12+56]
  23999. mov r10, QWORD PTR [r13+56]
  24000. pext r9, r9, rdi
  24001. pext r10, r10, r15
  24002. mov QWORD PTR [rsi+48], rax
  24003. adc r9, r10
  24004. mov r10, QWORD PTR [r12+64]
  24005. mov rax, QWORD PTR [r13+64]
  24006. pext r10, r10, rdi
  24007. pext rax, rax, r15
  24008. mov QWORD PTR [rsi+56], r9
  24009. adc r10, rax
  24010. mov rax, QWORD PTR [r12+72]
  24011. mov r9, QWORD PTR [r13+72]
  24012. pext rax, rax, rdi
  24013. pext r9, r9, r15
  24014. mov QWORD PTR [rsi+64], r10
  24015. adc rax, r9
  24016. mov r9, QWORD PTR [r12+80]
  24017. mov r10, QWORD PTR [r13+80]
  24018. pext r9, r9, rdi
  24019. pext r10, r10, r15
  24020. mov QWORD PTR [rsi+72], rax
  24021. adc r9, r10
  24022. mov r10, QWORD PTR [r12+88]
  24023. mov rax, QWORD PTR [r13+88]
  24024. pext r10, r10, rdi
  24025. pext rax, rax, r15
  24026. mov QWORD PTR [rsi+80], r9
  24027. adc r10, rax
  24028. mov QWORD PTR [rsi+88], r10
  24029. adc r11, 0
  24030. lea r13, QWORD PTR [rsp+192]
  24031. mov r12, rsp
  24032. mov rax, QWORD PTR [r12]
  24033. sub rax, QWORD PTR [r13]
  24034. mov r9, QWORD PTR [r12+8]
  24035. mov QWORD PTR [r12], rax
  24036. sbb r9, QWORD PTR [r13+8]
  24037. mov r10, QWORD PTR [r12+16]
  24038. mov QWORD PTR [r12+8], r9
  24039. sbb r10, QWORD PTR [r13+16]
  24040. mov rax, QWORD PTR [r12+24]
  24041. mov QWORD PTR [r12+16], r10
  24042. sbb rax, QWORD PTR [r13+24]
  24043. mov r9, QWORD PTR [r12+32]
  24044. mov QWORD PTR [r12+24], rax
  24045. sbb r9, QWORD PTR [r13+32]
  24046. mov r10, QWORD PTR [r12+40]
  24047. mov QWORD PTR [r12+32], r9
  24048. sbb r10, QWORD PTR [r13+40]
  24049. mov rax, QWORD PTR [r12+48]
  24050. mov QWORD PTR [r12+40], r10
  24051. sbb rax, QWORD PTR [r13+48]
  24052. mov r9, QWORD PTR [r12+56]
  24053. mov QWORD PTR [r12+48], rax
  24054. sbb r9, QWORD PTR [r13+56]
  24055. mov r10, QWORD PTR [r12+64]
  24056. mov QWORD PTR [r12+56], r9
  24057. sbb r10, QWORD PTR [r13+64]
  24058. mov rax, QWORD PTR [r12+72]
  24059. mov QWORD PTR [r12+64], r10
  24060. sbb rax, QWORD PTR [r13+72]
  24061. mov r9, QWORD PTR [r12+80]
  24062. mov QWORD PTR [r12+72], rax
  24063. sbb r9, QWORD PTR [r13+80]
  24064. mov r10, QWORD PTR [r12+88]
  24065. mov QWORD PTR [r12+80], r9
  24066. sbb r10, QWORD PTR [r13+88]
  24067. mov rax, QWORD PTR [r12+96]
  24068. mov QWORD PTR [r12+88], r10
  24069. sbb rax, QWORD PTR [r13+96]
  24070. mov r9, QWORD PTR [r12+104]
  24071. mov QWORD PTR [r12+96], rax
  24072. sbb r9, QWORD PTR [r13+104]
  24073. mov r10, QWORD PTR [r12+112]
  24074. mov QWORD PTR [r12+104], r9
  24075. sbb r10, QWORD PTR [r13+112]
  24076. mov rax, QWORD PTR [r12+120]
  24077. mov QWORD PTR [r12+112], r10
  24078. sbb rax, QWORD PTR [r13+120]
  24079. mov r9, QWORD PTR [r12+128]
  24080. mov QWORD PTR [r12+120], rax
  24081. sbb r9, QWORD PTR [r13+128]
  24082. mov r10, QWORD PTR [r12+136]
  24083. mov QWORD PTR [r12+128], r9
  24084. sbb r10, QWORD PTR [r13+136]
  24085. mov rax, QWORD PTR [r12+144]
  24086. mov QWORD PTR [r12+136], r10
  24087. sbb rax, QWORD PTR [r13+144]
  24088. mov r9, QWORD PTR [r12+152]
  24089. mov QWORD PTR [r12+144], rax
  24090. sbb r9, QWORD PTR [r13+152]
  24091. mov r10, QWORD PTR [r12+160]
  24092. mov QWORD PTR [r12+152], r9
  24093. sbb r10, QWORD PTR [r13+160]
  24094. mov rax, QWORD PTR [r12+168]
  24095. mov QWORD PTR [r12+160], r10
  24096. sbb rax, QWORD PTR [r13+168]
  24097. mov r9, QWORD PTR [r12+176]
  24098. mov QWORD PTR [r12+168], rax
  24099. sbb r9, QWORD PTR [r13+176]
  24100. mov r10, QWORD PTR [r12+184]
  24101. mov QWORD PTR [r12+176], r9
  24102. sbb r10, QWORD PTR [r13+184]
  24103. mov QWORD PTR [r12+184], r10
  24104. sbb r11, 0
  24105. mov rax, QWORD PTR [r12]
  24106. sub rax, QWORD PTR [rcx]
  24107. mov r9, QWORD PTR [r12+8]
  24108. mov QWORD PTR [r12], rax
  24109. sbb r9, QWORD PTR [rcx+8]
  24110. mov r10, QWORD PTR [r12+16]
  24111. mov QWORD PTR [r12+8], r9
  24112. sbb r10, QWORD PTR [rcx+16]
  24113. mov rax, QWORD PTR [r12+24]
  24114. mov QWORD PTR [r12+16], r10
  24115. sbb rax, QWORD PTR [rcx+24]
  24116. mov r9, QWORD PTR [r12+32]
  24117. mov QWORD PTR [r12+24], rax
  24118. sbb r9, QWORD PTR [rcx+32]
  24119. mov r10, QWORD PTR [r12+40]
  24120. mov QWORD PTR [r12+32], r9
  24121. sbb r10, QWORD PTR [rcx+40]
  24122. mov rax, QWORD PTR [r12+48]
  24123. mov QWORD PTR [r12+40], r10
  24124. sbb rax, QWORD PTR [rcx+48]
  24125. mov r9, QWORD PTR [r12+56]
  24126. mov QWORD PTR [r12+48], rax
  24127. sbb r9, QWORD PTR [rcx+56]
  24128. mov r10, QWORD PTR [r12+64]
  24129. mov QWORD PTR [r12+56], r9
  24130. sbb r10, QWORD PTR [rcx+64]
  24131. mov rax, QWORD PTR [r12+72]
  24132. mov QWORD PTR [r12+64], r10
  24133. sbb rax, QWORD PTR [rcx+72]
  24134. mov r9, QWORD PTR [r12+80]
  24135. mov QWORD PTR [r12+72], rax
  24136. sbb r9, QWORD PTR [rcx+80]
  24137. mov r10, QWORD PTR [r12+88]
  24138. mov QWORD PTR [r12+80], r9
  24139. sbb r10, QWORD PTR [rcx+88]
  24140. mov rax, QWORD PTR [r12+96]
  24141. mov QWORD PTR [r12+88], r10
  24142. sbb rax, QWORD PTR [rcx+96]
  24143. mov r9, QWORD PTR [r12+104]
  24144. mov QWORD PTR [r12+96], rax
  24145. sbb r9, QWORD PTR [rcx+104]
  24146. mov r10, QWORD PTR [r12+112]
  24147. mov QWORD PTR [r12+104], r9
  24148. sbb r10, QWORD PTR [rcx+112]
  24149. mov rax, QWORD PTR [r12+120]
  24150. mov QWORD PTR [r12+112], r10
  24151. sbb rax, QWORD PTR [rcx+120]
  24152. mov r9, QWORD PTR [r12+128]
  24153. mov QWORD PTR [r12+120], rax
  24154. sbb r9, QWORD PTR [rcx+128]
  24155. mov r10, QWORD PTR [r12+136]
  24156. mov QWORD PTR [r12+128], r9
  24157. sbb r10, QWORD PTR [rcx+136]
  24158. mov rax, QWORD PTR [r12+144]
  24159. mov QWORD PTR [r12+136], r10
  24160. sbb rax, QWORD PTR [rcx+144]
  24161. mov r9, QWORD PTR [r12+152]
  24162. mov QWORD PTR [r12+144], rax
  24163. sbb r9, QWORD PTR [rcx+152]
  24164. mov r10, QWORD PTR [r12+160]
  24165. mov QWORD PTR [r12+152], r9
  24166. sbb r10, QWORD PTR [rcx+160]
  24167. mov rax, QWORD PTR [r12+168]
  24168. mov QWORD PTR [r12+160], r10
  24169. sbb rax, QWORD PTR [rcx+168]
  24170. mov r9, QWORD PTR [r12+176]
  24171. mov QWORD PTR [r12+168], rax
  24172. sbb r9, QWORD PTR [rcx+176]
  24173. mov r10, QWORD PTR [r12+184]
  24174. mov QWORD PTR [r12+176], r9
  24175. sbb r10, QWORD PTR [rcx+184]
  24176. mov QWORD PTR [r12+184], r10
  24177. sbb r11, 0
  24178. sub rsi, 96
  24179. ; Add
  24180. mov rax, QWORD PTR [rsi]
  24181. add rax, QWORD PTR [r12]
  24182. mov r9, QWORD PTR [rsi+8]
  24183. mov QWORD PTR [rsi], rax
  24184. adc r9, QWORD PTR [r12+8]
  24185. mov r10, QWORD PTR [rsi+16]
  24186. mov QWORD PTR [rsi+8], r9
  24187. adc r10, QWORD PTR [r12+16]
  24188. mov rax, QWORD PTR [rsi+24]
  24189. mov QWORD PTR [rsi+16], r10
  24190. adc rax, QWORD PTR [r12+24]
  24191. mov r9, QWORD PTR [rsi+32]
  24192. mov QWORD PTR [rsi+24], rax
  24193. adc r9, QWORD PTR [r12+32]
  24194. mov r10, QWORD PTR [rsi+40]
  24195. mov QWORD PTR [rsi+32], r9
  24196. adc r10, QWORD PTR [r12+40]
  24197. mov rax, QWORD PTR [rsi+48]
  24198. mov QWORD PTR [rsi+40], r10
  24199. adc rax, QWORD PTR [r12+48]
  24200. mov r9, QWORD PTR [rsi+56]
  24201. mov QWORD PTR [rsi+48], rax
  24202. adc r9, QWORD PTR [r12+56]
  24203. mov r10, QWORD PTR [rsi+64]
  24204. mov QWORD PTR [rsi+56], r9
  24205. adc r10, QWORD PTR [r12+64]
  24206. mov rax, QWORD PTR [rsi+72]
  24207. mov QWORD PTR [rsi+64], r10
  24208. adc rax, QWORD PTR [r12+72]
  24209. mov r9, QWORD PTR [rsi+80]
  24210. mov QWORD PTR [rsi+72], rax
  24211. adc r9, QWORD PTR [r12+80]
  24212. mov r10, QWORD PTR [rsi+88]
  24213. mov QWORD PTR [rsi+80], r9
  24214. adc r10, QWORD PTR [r12+88]
  24215. mov rax, QWORD PTR [rsi+96]
  24216. mov QWORD PTR [rsi+88], r10
  24217. adc rax, QWORD PTR [r12+96]
  24218. mov r9, QWORD PTR [rsi+104]
  24219. mov QWORD PTR [rsi+96], rax
  24220. adc r9, QWORD PTR [r12+104]
  24221. mov r10, QWORD PTR [rsi+112]
  24222. mov QWORD PTR [rsi+104], r9
  24223. adc r10, QWORD PTR [r12+112]
  24224. mov rax, QWORD PTR [rsi+120]
  24225. mov QWORD PTR [rsi+112], r10
  24226. adc rax, QWORD PTR [r12+120]
  24227. mov r9, QWORD PTR [rsi+128]
  24228. mov QWORD PTR [rsi+120], rax
  24229. adc r9, QWORD PTR [r12+128]
  24230. mov r10, QWORD PTR [rsi+136]
  24231. mov QWORD PTR [rsi+128], r9
  24232. adc r10, QWORD PTR [r12+136]
  24233. mov rax, QWORD PTR [rsi+144]
  24234. mov QWORD PTR [rsi+136], r10
  24235. adc rax, QWORD PTR [r12+144]
  24236. mov r9, QWORD PTR [rsi+152]
  24237. mov QWORD PTR [rsi+144], rax
  24238. adc r9, QWORD PTR [r12+152]
  24239. mov r10, QWORD PTR [rsi+160]
  24240. mov QWORD PTR [rsi+152], r9
  24241. adc r10, QWORD PTR [r12+160]
  24242. mov rax, QWORD PTR [rsi+168]
  24243. mov QWORD PTR [rsi+160], r10
  24244. adc rax, QWORD PTR [r12+168]
  24245. mov r9, QWORD PTR [rsi+176]
  24246. mov QWORD PTR [rsi+168], rax
  24247. adc r9, QWORD PTR [r12+176]
  24248. mov r10, QWORD PTR [rsi+184]
  24249. mov QWORD PTR [rsi+176], r9
  24250. adc r10, QWORD PTR [r12+184]
  24251. mov QWORD PTR [rsi+184], r10
  24252. adc r11, 0
  24253. mov QWORD PTR [rcx+288], r11
  24254. add rsi, 96
  24255. ; Add
  24256. mov rax, QWORD PTR [rsi]
  24257. add rax, QWORD PTR [r13]
  24258. mov r9, QWORD PTR [rsi+8]
  24259. mov QWORD PTR [rsi], rax
  24260. adc r9, QWORD PTR [r13+8]
  24261. mov r10, QWORD PTR [rsi+16]
  24262. mov QWORD PTR [rsi+8], r9
  24263. adc r10, QWORD PTR [r13+16]
  24264. mov rax, QWORD PTR [rsi+24]
  24265. mov QWORD PTR [rsi+16], r10
  24266. adc rax, QWORD PTR [r13+24]
  24267. mov r9, QWORD PTR [rsi+32]
  24268. mov QWORD PTR [rsi+24], rax
  24269. adc r9, QWORD PTR [r13+32]
  24270. mov r10, QWORD PTR [rsi+40]
  24271. mov QWORD PTR [rsi+32], r9
  24272. adc r10, QWORD PTR [r13+40]
  24273. mov rax, QWORD PTR [rsi+48]
  24274. mov QWORD PTR [rsi+40], r10
  24275. adc rax, QWORD PTR [r13+48]
  24276. mov r9, QWORD PTR [rsi+56]
  24277. mov QWORD PTR [rsi+48], rax
  24278. adc r9, QWORD PTR [r13+56]
  24279. mov r10, QWORD PTR [rsi+64]
  24280. mov QWORD PTR [rsi+56], r9
  24281. adc r10, QWORD PTR [r13+64]
  24282. mov rax, QWORD PTR [rsi+72]
  24283. mov QWORD PTR [rsi+64], r10
  24284. adc rax, QWORD PTR [r13+72]
  24285. mov r9, QWORD PTR [rsi+80]
  24286. mov QWORD PTR [rsi+72], rax
  24287. adc r9, QWORD PTR [r13+80]
  24288. mov r10, QWORD PTR [rsi+88]
  24289. mov QWORD PTR [rsi+80], r9
  24290. adc r10, QWORD PTR [r13+88]
  24291. mov rax, QWORD PTR [rsi+96]
  24292. mov QWORD PTR [rsi+88], r10
  24293. adc rax, QWORD PTR [r13+96]
  24294. mov QWORD PTR [rsi+96], rax
  24295. ; Add to zero
  24296. mov rax, QWORD PTR [r13+104]
  24297. adc rax, 0
  24298. mov r9, QWORD PTR [r13+112]
  24299. mov QWORD PTR [rsi+104], rax
  24300. adc r9, 0
  24301. mov r10, QWORD PTR [r13+120]
  24302. mov QWORD PTR [rsi+112], r9
  24303. adc r10, 0
  24304. mov rax, QWORD PTR [r13+128]
  24305. mov QWORD PTR [rsi+120], r10
  24306. adc rax, 0
  24307. mov r9, QWORD PTR [r13+136]
  24308. mov QWORD PTR [rsi+128], rax
  24309. adc r9, 0
  24310. mov r10, QWORD PTR [r13+144]
  24311. mov QWORD PTR [rsi+136], r9
  24312. adc r10, 0
  24313. mov rax, QWORD PTR [r13+152]
  24314. mov QWORD PTR [rsi+144], r10
  24315. adc rax, 0
  24316. mov r9, QWORD PTR [r13+160]
  24317. mov QWORD PTR [rsi+152], rax
  24318. adc r9, 0
  24319. mov r10, QWORD PTR [r13+168]
  24320. mov QWORD PTR [rsi+160], r9
  24321. adc r10, 0
  24322. mov rax, QWORD PTR [r13+176]
  24323. mov QWORD PTR [rsi+168], r10
  24324. adc rax, 0
  24325. mov r9, QWORD PTR [r13+184]
  24326. mov QWORD PTR [rsi+176], rax
  24327. adc r9, 0
  24328. mov QWORD PTR [rsi+184], r9
  24329. add rsp, 616
  24330. pop rsi
  24331. pop rdi
  24332. pop r15
  24333. pop r14
  24334. pop r13
  24335. pop r12
  24336. ret
  24337. sp_3072_mul_avx2_24 ENDP
  24338. _text ENDS
  24339. ENDIF
  24340. ; /* Sub b from a into a. (a -= b)
  24341. ; *
  24342. ; * a A single precision integer and result.
  24343. ; * b A single precision integer.
  24344. ; */
  24345. _text SEGMENT READONLY PARA
  24346. sp_3072_sub_in_place_48 PROC
  24347. mov r8, QWORD PTR [rcx]
  24348. sub r8, QWORD PTR [rdx]
  24349. mov r9, QWORD PTR [rcx+8]
  24350. mov QWORD PTR [rcx], r8
  24351. sbb r9, QWORD PTR [rdx+8]
  24352. mov r8, QWORD PTR [rcx+16]
  24353. mov QWORD PTR [rcx+8], r9
  24354. sbb r8, QWORD PTR [rdx+16]
  24355. mov r9, QWORD PTR [rcx+24]
  24356. mov QWORD PTR [rcx+16], r8
  24357. sbb r9, QWORD PTR [rdx+24]
  24358. mov r8, QWORD PTR [rcx+32]
  24359. mov QWORD PTR [rcx+24], r9
  24360. sbb r8, QWORD PTR [rdx+32]
  24361. mov r9, QWORD PTR [rcx+40]
  24362. mov QWORD PTR [rcx+32], r8
  24363. sbb r9, QWORD PTR [rdx+40]
  24364. mov r8, QWORD PTR [rcx+48]
  24365. mov QWORD PTR [rcx+40], r9
  24366. sbb r8, QWORD PTR [rdx+48]
  24367. mov r9, QWORD PTR [rcx+56]
  24368. mov QWORD PTR [rcx+48], r8
  24369. sbb r9, QWORD PTR [rdx+56]
  24370. mov r8, QWORD PTR [rcx+64]
  24371. mov QWORD PTR [rcx+56], r9
  24372. sbb r8, QWORD PTR [rdx+64]
  24373. mov r9, QWORD PTR [rcx+72]
  24374. mov QWORD PTR [rcx+64], r8
  24375. sbb r9, QWORD PTR [rdx+72]
  24376. mov r8, QWORD PTR [rcx+80]
  24377. mov QWORD PTR [rcx+72], r9
  24378. sbb r8, QWORD PTR [rdx+80]
  24379. mov r9, QWORD PTR [rcx+88]
  24380. mov QWORD PTR [rcx+80], r8
  24381. sbb r9, QWORD PTR [rdx+88]
  24382. mov r8, QWORD PTR [rcx+96]
  24383. mov QWORD PTR [rcx+88], r9
  24384. sbb r8, QWORD PTR [rdx+96]
  24385. mov r9, QWORD PTR [rcx+104]
  24386. mov QWORD PTR [rcx+96], r8
  24387. sbb r9, QWORD PTR [rdx+104]
  24388. mov r8, QWORD PTR [rcx+112]
  24389. mov QWORD PTR [rcx+104], r9
  24390. sbb r8, QWORD PTR [rdx+112]
  24391. mov r9, QWORD PTR [rcx+120]
  24392. mov QWORD PTR [rcx+112], r8
  24393. sbb r9, QWORD PTR [rdx+120]
  24394. mov r8, QWORD PTR [rcx+128]
  24395. mov QWORD PTR [rcx+120], r9
  24396. sbb r8, QWORD PTR [rdx+128]
  24397. mov r9, QWORD PTR [rcx+136]
  24398. mov QWORD PTR [rcx+128], r8
  24399. sbb r9, QWORD PTR [rdx+136]
  24400. mov r8, QWORD PTR [rcx+144]
  24401. mov QWORD PTR [rcx+136], r9
  24402. sbb r8, QWORD PTR [rdx+144]
  24403. mov r9, QWORD PTR [rcx+152]
  24404. mov QWORD PTR [rcx+144], r8
  24405. sbb r9, QWORD PTR [rdx+152]
  24406. mov r8, QWORD PTR [rcx+160]
  24407. mov QWORD PTR [rcx+152], r9
  24408. sbb r8, QWORD PTR [rdx+160]
  24409. mov r9, QWORD PTR [rcx+168]
  24410. mov QWORD PTR [rcx+160], r8
  24411. sbb r9, QWORD PTR [rdx+168]
  24412. mov r8, QWORD PTR [rcx+176]
  24413. mov QWORD PTR [rcx+168], r9
  24414. sbb r8, QWORD PTR [rdx+176]
  24415. mov r9, QWORD PTR [rcx+184]
  24416. mov QWORD PTR [rcx+176], r8
  24417. sbb r9, QWORD PTR [rdx+184]
  24418. mov r8, QWORD PTR [rcx+192]
  24419. mov QWORD PTR [rcx+184], r9
  24420. sbb r8, QWORD PTR [rdx+192]
  24421. mov r9, QWORD PTR [rcx+200]
  24422. mov QWORD PTR [rcx+192], r8
  24423. sbb r9, QWORD PTR [rdx+200]
  24424. mov r8, QWORD PTR [rcx+208]
  24425. mov QWORD PTR [rcx+200], r9
  24426. sbb r8, QWORD PTR [rdx+208]
  24427. mov r9, QWORD PTR [rcx+216]
  24428. mov QWORD PTR [rcx+208], r8
  24429. sbb r9, QWORD PTR [rdx+216]
  24430. mov r8, QWORD PTR [rcx+224]
  24431. mov QWORD PTR [rcx+216], r9
  24432. sbb r8, QWORD PTR [rdx+224]
  24433. mov r9, QWORD PTR [rcx+232]
  24434. mov QWORD PTR [rcx+224], r8
  24435. sbb r9, QWORD PTR [rdx+232]
  24436. mov r8, QWORD PTR [rcx+240]
  24437. mov QWORD PTR [rcx+232], r9
  24438. sbb r8, QWORD PTR [rdx+240]
  24439. mov r9, QWORD PTR [rcx+248]
  24440. mov QWORD PTR [rcx+240], r8
  24441. sbb r9, QWORD PTR [rdx+248]
  24442. mov r8, QWORD PTR [rcx+256]
  24443. mov QWORD PTR [rcx+248], r9
  24444. sbb r8, QWORD PTR [rdx+256]
  24445. mov r9, QWORD PTR [rcx+264]
  24446. mov QWORD PTR [rcx+256], r8
  24447. sbb r9, QWORD PTR [rdx+264]
  24448. mov r8, QWORD PTR [rcx+272]
  24449. mov QWORD PTR [rcx+264], r9
  24450. sbb r8, QWORD PTR [rdx+272]
  24451. mov r9, QWORD PTR [rcx+280]
  24452. mov QWORD PTR [rcx+272], r8
  24453. sbb r9, QWORD PTR [rdx+280]
  24454. mov r8, QWORD PTR [rcx+288]
  24455. mov QWORD PTR [rcx+280], r9
  24456. sbb r8, QWORD PTR [rdx+288]
  24457. mov r9, QWORD PTR [rcx+296]
  24458. mov QWORD PTR [rcx+288], r8
  24459. sbb r9, QWORD PTR [rdx+296]
  24460. mov r8, QWORD PTR [rcx+304]
  24461. mov QWORD PTR [rcx+296], r9
  24462. sbb r8, QWORD PTR [rdx+304]
  24463. mov r9, QWORD PTR [rcx+312]
  24464. mov QWORD PTR [rcx+304], r8
  24465. sbb r9, QWORD PTR [rdx+312]
  24466. mov r8, QWORD PTR [rcx+320]
  24467. mov QWORD PTR [rcx+312], r9
  24468. sbb r8, QWORD PTR [rdx+320]
  24469. mov r9, QWORD PTR [rcx+328]
  24470. mov QWORD PTR [rcx+320], r8
  24471. sbb r9, QWORD PTR [rdx+328]
  24472. mov r8, QWORD PTR [rcx+336]
  24473. mov QWORD PTR [rcx+328], r9
  24474. sbb r8, QWORD PTR [rdx+336]
  24475. mov r9, QWORD PTR [rcx+344]
  24476. mov QWORD PTR [rcx+336], r8
  24477. sbb r9, QWORD PTR [rdx+344]
  24478. mov r8, QWORD PTR [rcx+352]
  24479. mov QWORD PTR [rcx+344], r9
  24480. sbb r8, QWORD PTR [rdx+352]
  24481. mov r9, QWORD PTR [rcx+360]
  24482. mov QWORD PTR [rcx+352], r8
  24483. sbb r9, QWORD PTR [rdx+360]
  24484. mov r8, QWORD PTR [rcx+368]
  24485. mov QWORD PTR [rcx+360], r9
  24486. sbb r8, QWORD PTR [rdx+368]
  24487. mov r9, QWORD PTR [rcx+376]
  24488. mov QWORD PTR [rcx+368], r8
  24489. sbb r9, QWORD PTR [rdx+376]
  24490. mov QWORD PTR [rcx+376], r9
  24491. sbb rax, rax
  24492. ret
  24493. sp_3072_sub_in_place_48 ENDP
  24494. _text ENDS
  24495. ; /* Add b to a into r. (r = a + b)
  24496. ; *
  24497. ; * r A single precision integer.
  24498. ; * a A single precision integer.
  24499. ; * b A single precision integer.
  24500. ; */
  24501. _text SEGMENT READONLY PARA
  24502. sp_3072_add_48 PROC
  24503. ; Add
  24504. mov r9, QWORD PTR [rdx]
  24505. xor rax, rax
  24506. add r9, QWORD PTR [r8]
  24507. mov r10, QWORD PTR [rdx+8]
  24508. mov QWORD PTR [rcx], r9
  24509. adc r10, QWORD PTR [r8+8]
  24510. mov r9, QWORD PTR [rdx+16]
  24511. mov QWORD PTR [rcx+8], r10
  24512. adc r9, QWORD PTR [r8+16]
  24513. mov r10, QWORD PTR [rdx+24]
  24514. mov QWORD PTR [rcx+16], r9
  24515. adc r10, QWORD PTR [r8+24]
  24516. mov r9, QWORD PTR [rdx+32]
  24517. mov QWORD PTR [rcx+24], r10
  24518. adc r9, QWORD PTR [r8+32]
  24519. mov r10, QWORD PTR [rdx+40]
  24520. mov QWORD PTR [rcx+32], r9
  24521. adc r10, QWORD PTR [r8+40]
  24522. mov r9, QWORD PTR [rdx+48]
  24523. mov QWORD PTR [rcx+40], r10
  24524. adc r9, QWORD PTR [r8+48]
  24525. mov r10, QWORD PTR [rdx+56]
  24526. mov QWORD PTR [rcx+48], r9
  24527. adc r10, QWORD PTR [r8+56]
  24528. mov r9, QWORD PTR [rdx+64]
  24529. mov QWORD PTR [rcx+56], r10
  24530. adc r9, QWORD PTR [r8+64]
  24531. mov r10, QWORD PTR [rdx+72]
  24532. mov QWORD PTR [rcx+64], r9
  24533. adc r10, QWORD PTR [r8+72]
  24534. mov r9, QWORD PTR [rdx+80]
  24535. mov QWORD PTR [rcx+72], r10
  24536. adc r9, QWORD PTR [r8+80]
  24537. mov r10, QWORD PTR [rdx+88]
  24538. mov QWORD PTR [rcx+80], r9
  24539. adc r10, QWORD PTR [r8+88]
  24540. mov r9, QWORD PTR [rdx+96]
  24541. mov QWORD PTR [rcx+88], r10
  24542. adc r9, QWORD PTR [r8+96]
  24543. mov r10, QWORD PTR [rdx+104]
  24544. mov QWORD PTR [rcx+96], r9
  24545. adc r10, QWORD PTR [r8+104]
  24546. mov r9, QWORD PTR [rdx+112]
  24547. mov QWORD PTR [rcx+104], r10
  24548. adc r9, QWORD PTR [r8+112]
  24549. mov r10, QWORD PTR [rdx+120]
  24550. mov QWORD PTR [rcx+112], r9
  24551. adc r10, QWORD PTR [r8+120]
  24552. mov r9, QWORD PTR [rdx+128]
  24553. mov QWORD PTR [rcx+120], r10
  24554. adc r9, QWORD PTR [r8+128]
  24555. mov r10, QWORD PTR [rdx+136]
  24556. mov QWORD PTR [rcx+128], r9
  24557. adc r10, QWORD PTR [r8+136]
  24558. mov r9, QWORD PTR [rdx+144]
  24559. mov QWORD PTR [rcx+136], r10
  24560. adc r9, QWORD PTR [r8+144]
  24561. mov r10, QWORD PTR [rdx+152]
  24562. mov QWORD PTR [rcx+144], r9
  24563. adc r10, QWORD PTR [r8+152]
  24564. mov r9, QWORD PTR [rdx+160]
  24565. mov QWORD PTR [rcx+152], r10
  24566. adc r9, QWORD PTR [r8+160]
  24567. mov r10, QWORD PTR [rdx+168]
  24568. mov QWORD PTR [rcx+160], r9
  24569. adc r10, QWORD PTR [r8+168]
  24570. mov r9, QWORD PTR [rdx+176]
  24571. mov QWORD PTR [rcx+168], r10
  24572. adc r9, QWORD PTR [r8+176]
  24573. mov r10, QWORD PTR [rdx+184]
  24574. mov QWORD PTR [rcx+176], r9
  24575. adc r10, QWORD PTR [r8+184]
  24576. mov r9, QWORD PTR [rdx+192]
  24577. mov QWORD PTR [rcx+184], r10
  24578. adc r9, QWORD PTR [r8+192]
  24579. mov r10, QWORD PTR [rdx+200]
  24580. mov QWORD PTR [rcx+192], r9
  24581. adc r10, QWORD PTR [r8+200]
  24582. mov r9, QWORD PTR [rdx+208]
  24583. mov QWORD PTR [rcx+200], r10
  24584. adc r9, QWORD PTR [r8+208]
  24585. mov r10, QWORD PTR [rdx+216]
  24586. mov QWORD PTR [rcx+208], r9
  24587. adc r10, QWORD PTR [r8+216]
  24588. mov r9, QWORD PTR [rdx+224]
  24589. mov QWORD PTR [rcx+216], r10
  24590. adc r9, QWORD PTR [r8+224]
  24591. mov r10, QWORD PTR [rdx+232]
  24592. mov QWORD PTR [rcx+224], r9
  24593. adc r10, QWORD PTR [r8+232]
  24594. mov r9, QWORD PTR [rdx+240]
  24595. mov QWORD PTR [rcx+232], r10
  24596. adc r9, QWORD PTR [r8+240]
  24597. mov r10, QWORD PTR [rdx+248]
  24598. mov QWORD PTR [rcx+240], r9
  24599. adc r10, QWORD PTR [r8+248]
  24600. mov r9, QWORD PTR [rdx+256]
  24601. mov QWORD PTR [rcx+248], r10
  24602. adc r9, QWORD PTR [r8+256]
  24603. mov r10, QWORD PTR [rdx+264]
  24604. mov QWORD PTR [rcx+256], r9
  24605. adc r10, QWORD PTR [r8+264]
  24606. mov r9, QWORD PTR [rdx+272]
  24607. mov QWORD PTR [rcx+264], r10
  24608. adc r9, QWORD PTR [r8+272]
  24609. mov r10, QWORD PTR [rdx+280]
  24610. mov QWORD PTR [rcx+272], r9
  24611. adc r10, QWORD PTR [r8+280]
  24612. mov r9, QWORD PTR [rdx+288]
  24613. mov QWORD PTR [rcx+280], r10
  24614. adc r9, QWORD PTR [r8+288]
  24615. mov r10, QWORD PTR [rdx+296]
  24616. mov QWORD PTR [rcx+288], r9
  24617. adc r10, QWORD PTR [r8+296]
  24618. mov r9, QWORD PTR [rdx+304]
  24619. mov QWORD PTR [rcx+296], r10
  24620. adc r9, QWORD PTR [r8+304]
  24621. mov r10, QWORD PTR [rdx+312]
  24622. mov QWORD PTR [rcx+304], r9
  24623. adc r10, QWORD PTR [r8+312]
  24624. mov r9, QWORD PTR [rdx+320]
  24625. mov QWORD PTR [rcx+312], r10
  24626. adc r9, QWORD PTR [r8+320]
  24627. mov r10, QWORD PTR [rdx+328]
  24628. mov QWORD PTR [rcx+320], r9
  24629. adc r10, QWORD PTR [r8+328]
  24630. mov r9, QWORD PTR [rdx+336]
  24631. mov QWORD PTR [rcx+328], r10
  24632. adc r9, QWORD PTR [r8+336]
  24633. mov r10, QWORD PTR [rdx+344]
  24634. mov QWORD PTR [rcx+336], r9
  24635. adc r10, QWORD PTR [r8+344]
  24636. mov r9, QWORD PTR [rdx+352]
  24637. mov QWORD PTR [rcx+344], r10
  24638. adc r9, QWORD PTR [r8+352]
  24639. mov r10, QWORD PTR [rdx+360]
  24640. mov QWORD PTR [rcx+352], r9
  24641. adc r10, QWORD PTR [r8+360]
  24642. mov r9, QWORD PTR [rdx+368]
  24643. mov QWORD PTR [rcx+360], r10
  24644. adc r9, QWORD PTR [r8+368]
  24645. mov r10, QWORD PTR [rdx+376]
  24646. mov QWORD PTR [rcx+368], r9
  24647. adc r10, QWORD PTR [r8+376]
  24648. mov QWORD PTR [rcx+376], r10
  24649. adc rax, 0
  24650. ret
  24651. sp_3072_add_48 ENDP
  24652. _text ENDS
  24653. ; /* Multiply a and b into r. (r = a * b)
  24654. ; *
  24655. ; * r A single precision integer.
  24656. ; * a A single precision integer.
  24657. ; * b A single precision integer.
  24658. ; */
  24659. _text SEGMENT READONLY PARA
  24660. sp_3072_mul_48 PROC
  24661. push r12
  24662. push r13
  24663. push r14
  24664. push r15
  24665. push rdi
  24666. push rsi
  24667. sub rsp, 1192
  24668. mov QWORD PTR [rsp+1152], rcx
  24669. mov QWORD PTR [rsp+1160], rdx
  24670. mov QWORD PTR [rsp+1168], r8
  24671. lea r12, QWORD PTR [rsp+768]
  24672. lea r14, QWORD PTR [rdx+192]
  24673. ; Add
  24674. mov rax, QWORD PTR [rdx]
  24675. xor r15, r15
  24676. add rax, QWORD PTR [r14]
  24677. mov r9, QWORD PTR [rdx+8]
  24678. mov QWORD PTR [r12], rax
  24679. adc r9, QWORD PTR [r14+8]
  24680. mov r10, QWORD PTR [rdx+16]
  24681. mov QWORD PTR [r12+8], r9
  24682. adc r10, QWORD PTR [r14+16]
  24683. mov rax, QWORD PTR [rdx+24]
  24684. mov QWORD PTR [r12+16], r10
  24685. adc rax, QWORD PTR [r14+24]
  24686. mov r9, QWORD PTR [rdx+32]
  24687. mov QWORD PTR [r12+24], rax
  24688. adc r9, QWORD PTR [r14+32]
  24689. mov r10, QWORD PTR [rdx+40]
  24690. mov QWORD PTR [r12+32], r9
  24691. adc r10, QWORD PTR [r14+40]
  24692. mov rax, QWORD PTR [rdx+48]
  24693. mov QWORD PTR [r12+40], r10
  24694. adc rax, QWORD PTR [r14+48]
  24695. mov r9, QWORD PTR [rdx+56]
  24696. mov QWORD PTR [r12+48], rax
  24697. adc r9, QWORD PTR [r14+56]
  24698. mov r10, QWORD PTR [rdx+64]
  24699. mov QWORD PTR [r12+56], r9
  24700. adc r10, QWORD PTR [r14+64]
  24701. mov rax, QWORD PTR [rdx+72]
  24702. mov QWORD PTR [r12+64], r10
  24703. adc rax, QWORD PTR [r14+72]
  24704. mov r9, QWORD PTR [rdx+80]
  24705. mov QWORD PTR [r12+72], rax
  24706. adc r9, QWORD PTR [r14+80]
  24707. mov r10, QWORD PTR [rdx+88]
  24708. mov QWORD PTR [r12+80], r9
  24709. adc r10, QWORD PTR [r14+88]
  24710. mov rax, QWORD PTR [rdx+96]
  24711. mov QWORD PTR [r12+88], r10
  24712. adc rax, QWORD PTR [r14+96]
  24713. mov r9, QWORD PTR [rdx+104]
  24714. mov QWORD PTR [r12+96], rax
  24715. adc r9, QWORD PTR [r14+104]
  24716. mov r10, QWORD PTR [rdx+112]
  24717. mov QWORD PTR [r12+104], r9
  24718. adc r10, QWORD PTR [r14+112]
  24719. mov rax, QWORD PTR [rdx+120]
  24720. mov QWORD PTR [r12+112], r10
  24721. adc rax, QWORD PTR [r14+120]
  24722. mov r9, QWORD PTR [rdx+128]
  24723. mov QWORD PTR [r12+120], rax
  24724. adc r9, QWORD PTR [r14+128]
  24725. mov r10, QWORD PTR [rdx+136]
  24726. mov QWORD PTR [r12+128], r9
  24727. adc r10, QWORD PTR [r14+136]
  24728. mov rax, QWORD PTR [rdx+144]
  24729. mov QWORD PTR [r12+136], r10
  24730. adc rax, QWORD PTR [r14+144]
  24731. mov r9, QWORD PTR [rdx+152]
  24732. mov QWORD PTR [r12+144], rax
  24733. adc r9, QWORD PTR [r14+152]
  24734. mov r10, QWORD PTR [rdx+160]
  24735. mov QWORD PTR [r12+152], r9
  24736. adc r10, QWORD PTR [r14+160]
  24737. mov rax, QWORD PTR [rdx+168]
  24738. mov QWORD PTR [r12+160], r10
  24739. adc rax, QWORD PTR [r14+168]
  24740. mov r9, QWORD PTR [rdx+176]
  24741. mov QWORD PTR [r12+168], rax
  24742. adc r9, QWORD PTR [r14+176]
  24743. mov r10, QWORD PTR [rdx+184]
  24744. mov QWORD PTR [r12+176], r9
  24745. adc r10, QWORD PTR [r14+184]
  24746. mov QWORD PTR [r12+184], r10
  24747. adc r15, 0
  24748. mov QWORD PTR [rsp+1176], r15
  24749. lea r13, QWORD PTR [rsp+960]
  24750. lea r14, QWORD PTR [r8+192]
  24751. ; Add
  24752. mov rax, QWORD PTR [r8]
  24753. xor rdi, rdi
  24754. add rax, QWORD PTR [r14]
  24755. mov r9, QWORD PTR [r8+8]
  24756. mov QWORD PTR [r13], rax
  24757. adc r9, QWORD PTR [r14+8]
  24758. mov r10, QWORD PTR [r8+16]
  24759. mov QWORD PTR [r13+8], r9
  24760. adc r10, QWORD PTR [r14+16]
  24761. mov rax, QWORD PTR [r8+24]
  24762. mov QWORD PTR [r13+16], r10
  24763. adc rax, QWORD PTR [r14+24]
  24764. mov r9, QWORD PTR [r8+32]
  24765. mov QWORD PTR [r13+24], rax
  24766. adc r9, QWORD PTR [r14+32]
  24767. mov r10, QWORD PTR [r8+40]
  24768. mov QWORD PTR [r13+32], r9
  24769. adc r10, QWORD PTR [r14+40]
  24770. mov rax, QWORD PTR [r8+48]
  24771. mov QWORD PTR [r13+40], r10
  24772. adc rax, QWORD PTR [r14+48]
  24773. mov r9, QWORD PTR [r8+56]
  24774. mov QWORD PTR [r13+48], rax
  24775. adc r9, QWORD PTR [r14+56]
  24776. mov r10, QWORD PTR [r8+64]
  24777. mov QWORD PTR [r13+56], r9
  24778. adc r10, QWORD PTR [r14+64]
  24779. mov rax, QWORD PTR [r8+72]
  24780. mov QWORD PTR [r13+64], r10
  24781. adc rax, QWORD PTR [r14+72]
  24782. mov r9, QWORD PTR [r8+80]
  24783. mov QWORD PTR [r13+72], rax
  24784. adc r9, QWORD PTR [r14+80]
  24785. mov r10, QWORD PTR [r8+88]
  24786. mov QWORD PTR [r13+80], r9
  24787. adc r10, QWORD PTR [r14+88]
  24788. mov rax, QWORD PTR [r8+96]
  24789. mov QWORD PTR [r13+88], r10
  24790. adc rax, QWORD PTR [r14+96]
  24791. mov r9, QWORD PTR [r8+104]
  24792. mov QWORD PTR [r13+96], rax
  24793. adc r9, QWORD PTR [r14+104]
  24794. mov r10, QWORD PTR [r8+112]
  24795. mov QWORD PTR [r13+104], r9
  24796. adc r10, QWORD PTR [r14+112]
  24797. mov rax, QWORD PTR [r8+120]
  24798. mov QWORD PTR [r13+112], r10
  24799. adc rax, QWORD PTR [r14+120]
  24800. mov r9, QWORD PTR [r8+128]
  24801. mov QWORD PTR [r13+120], rax
  24802. adc r9, QWORD PTR [r14+128]
  24803. mov r10, QWORD PTR [r8+136]
  24804. mov QWORD PTR [r13+128], r9
  24805. adc r10, QWORD PTR [r14+136]
  24806. mov rax, QWORD PTR [r8+144]
  24807. mov QWORD PTR [r13+136], r10
  24808. adc rax, QWORD PTR [r14+144]
  24809. mov r9, QWORD PTR [r8+152]
  24810. mov QWORD PTR [r13+144], rax
  24811. adc r9, QWORD PTR [r14+152]
  24812. mov r10, QWORD PTR [r8+160]
  24813. mov QWORD PTR [r13+152], r9
  24814. adc r10, QWORD PTR [r14+160]
  24815. mov rax, QWORD PTR [r8+168]
  24816. mov QWORD PTR [r13+160], r10
  24817. adc rax, QWORD PTR [r14+168]
  24818. mov r9, QWORD PTR [r8+176]
  24819. mov QWORD PTR [r13+168], rax
  24820. adc r9, QWORD PTR [r14+176]
  24821. mov r10, QWORD PTR [r8+184]
  24822. mov QWORD PTR [r13+176], r9
  24823. adc r10, QWORD PTR [r14+184]
  24824. mov QWORD PTR [r13+184], r10
  24825. adc rdi, 0
  24826. mov QWORD PTR [rsp+1184], rdi
  24827. mov r8, r13
  24828. mov rdx, r12
  24829. mov rcx, rsp
  24830. call sp_3072_mul_24
  24831. mov r8, QWORD PTR [rsp+1168]
  24832. mov rdx, QWORD PTR [rsp+1160]
  24833. lea rcx, QWORD PTR [rsp+384]
  24834. add r8, 192
  24835. add rdx, 192
  24836. call sp_3072_mul_24
  24837. mov r8, QWORD PTR [rsp+1168]
  24838. mov rdx, QWORD PTR [rsp+1160]
  24839. mov rcx, QWORD PTR [rsp+1152]
  24840. call sp_3072_mul_24
  24841. IFDEF _WIN64
  24842. mov r8, QWORD PTR [rsp+1168]
  24843. mov rdx, QWORD PTR [rsp+1160]
  24844. mov rcx, QWORD PTR [rsp+1152]
  24845. ENDIF
  24846. mov r15, QWORD PTR [rsp+1176]
  24847. mov rdi, QWORD PTR [rsp+1184]
  24848. mov rsi, QWORD PTR [rsp+1152]
  24849. mov r11, r15
  24850. lea r12, QWORD PTR [rsp+768]
  24851. lea r13, QWORD PTR [rsp+960]
  24852. and r11, rdi
  24853. neg r15
  24854. neg rdi
  24855. add rsi, 384
  24856. mov rax, QWORD PTR [r12]
  24857. mov r9, QWORD PTR [r13]
  24858. and rax, rdi
  24859. and r9, r15
  24860. mov QWORD PTR [r12], rax
  24861. mov QWORD PTR [r13], r9
  24862. mov rax, QWORD PTR [r12+8]
  24863. mov r9, QWORD PTR [r13+8]
  24864. and rax, rdi
  24865. and r9, r15
  24866. mov QWORD PTR [r12+8], rax
  24867. mov QWORD PTR [r13+8], r9
  24868. mov rax, QWORD PTR [r12+16]
  24869. mov r9, QWORD PTR [r13+16]
  24870. and rax, rdi
  24871. and r9, r15
  24872. mov QWORD PTR [r12+16], rax
  24873. mov QWORD PTR [r13+16], r9
  24874. mov rax, QWORD PTR [r12+24]
  24875. mov r9, QWORD PTR [r13+24]
  24876. and rax, rdi
  24877. and r9, r15
  24878. mov QWORD PTR [r12+24], rax
  24879. mov QWORD PTR [r13+24], r9
  24880. mov rax, QWORD PTR [r12+32]
  24881. mov r9, QWORD PTR [r13+32]
  24882. and rax, rdi
  24883. and r9, r15
  24884. mov QWORD PTR [r12+32], rax
  24885. mov QWORD PTR [r13+32], r9
  24886. mov rax, QWORD PTR [r12+40]
  24887. mov r9, QWORD PTR [r13+40]
  24888. and rax, rdi
  24889. and r9, r15
  24890. mov QWORD PTR [r12+40], rax
  24891. mov QWORD PTR [r13+40], r9
  24892. mov rax, QWORD PTR [r12+48]
  24893. mov r9, QWORD PTR [r13+48]
  24894. and rax, rdi
  24895. and r9, r15
  24896. mov QWORD PTR [r12+48], rax
  24897. mov QWORD PTR [r13+48], r9
  24898. mov rax, QWORD PTR [r12+56]
  24899. mov r9, QWORD PTR [r13+56]
  24900. and rax, rdi
  24901. and r9, r15
  24902. mov QWORD PTR [r12+56], rax
  24903. mov QWORD PTR [r13+56], r9
  24904. mov rax, QWORD PTR [r12+64]
  24905. mov r9, QWORD PTR [r13+64]
  24906. and rax, rdi
  24907. and r9, r15
  24908. mov QWORD PTR [r12+64], rax
  24909. mov QWORD PTR [r13+64], r9
  24910. mov rax, QWORD PTR [r12+72]
  24911. mov r9, QWORD PTR [r13+72]
  24912. and rax, rdi
  24913. and r9, r15
  24914. mov QWORD PTR [r12+72], rax
  24915. mov QWORD PTR [r13+72], r9
  24916. mov rax, QWORD PTR [r12+80]
  24917. mov r9, QWORD PTR [r13+80]
  24918. and rax, rdi
  24919. and r9, r15
  24920. mov QWORD PTR [r12+80], rax
  24921. mov QWORD PTR [r13+80], r9
  24922. mov rax, QWORD PTR [r12+88]
  24923. mov r9, QWORD PTR [r13+88]
  24924. and rax, rdi
  24925. and r9, r15
  24926. mov QWORD PTR [r12+88], rax
  24927. mov QWORD PTR [r13+88], r9
  24928. mov rax, QWORD PTR [r12+96]
  24929. mov r9, QWORD PTR [r13+96]
  24930. and rax, rdi
  24931. and r9, r15
  24932. mov QWORD PTR [r12+96], rax
  24933. mov QWORD PTR [r13+96], r9
  24934. mov rax, QWORD PTR [r12+104]
  24935. mov r9, QWORD PTR [r13+104]
  24936. and rax, rdi
  24937. and r9, r15
  24938. mov QWORD PTR [r12+104], rax
  24939. mov QWORD PTR [r13+104], r9
  24940. mov rax, QWORD PTR [r12+112]
  24941. mov r9, QWORD PTR [r13+112]
  24942. and rax, rdi
  24943. and r9, r15
  24944. mov QWORD PTR [r12+112], rax
  24945. mov QWORD PTR [r13+112], r9
  24946. mov rax, QWORD PTR [r12+120]
  24947. mov r9, QWORD PTR [r13+120]
  24948. and rax, rdi
  24949. and r9, r15
  24950. mov QWORD PTR [r12+120], rax
  24951. mov QWORD PTR [r13+120], r9
  24952. mov rax, QWORD PTR [r12+128]
  24953. mov r9, QWORD PTR [r13+128]
  24954. and rax, rdi
  24955. and r9, r15
  24956. mov QWORD PTR [r12+128], rax
  24957. mov QWORD PTR [r13+128], r9
  24958. mov rax, QWORD PTR [r12+136]
  24959. mov r9, QWORD PTR [r13+136]
  24960. and rax, rdi
  24961. and r9, r15
  24962. mov QWORD PTR [r12+136], rax
  24963. mov QWORD PTR [r13+136], r9
  24964. mov rax, QWORD PTR [r12+144]
  24965. mov r9, QWORD PTR [r13+144]
  24966. and rax, rdi
  24967. and r9, r15
  24968. mov QWORD PTR [r12+144], rax
  24969. mov QWORD PTR [r13+144], r9
  24970. mov rax, QWORD PTR [r12+152]
  24971. mov r9, QWORD PTR [r13+152]
  24972. and rax, rdi
  24973. and r9, r15
  24974. mov QWORD PTR [r12+152], rax
  24975. mov QWORD PTR [r13+152], r9
  24976. mov rax, QWORD PTR [r12+160]
  24977. mov r9, QWORD PTR [r13+160]
  24978. and rax, rdi
  24979. and r9, r15
  24980. mov QWORD PTR [r12+160], rax
  24981. mov QWORD PTR [r13+160], r9
  24982. mov rax, QWORD PTR [r12+168]
  24983. mov r9, QWORD PTR [r13+168]
  24984. and rax, rdi
  24985. and r9, r15
  24986. mov QWORD PTR [r12+168], rax
  24987. mov QWORD PTR [r13+168], r9
  24988. mov rax, QWORD PTR [r12+176]
  24989. mov r9, QWORD PTR [r13+176]
  24990. and rax, rdi
  24991. and r9, r15
  24992. mov QWORD PTR [r12+176], rax
  24993. mov QWORD PTR [r13+176], r9
  24994. mov rax, QWORD PTR [r12+184]
  24995. mov r9, QWORD PTR [r13+184]
  24996. and rax, rdi
  24997. and r9, r15
  24998. mov QWORD PTR [r12+184], rax
  24999. mov QWORD PTR [r13+184], r9
  25000. mov rax, QWORD PTR [r12]
  25001. add rax, QWORD PTR [r13]
  25002. mov r9, QWORD PTR [r12+8]
  25003. mov QWORD PTR [rsi], rax
  25004. adc r9, QWORD PTR [r13+8]
  25005. mov r10, QWORD PTR [r12+16]
  25006. mov QWORD PTR [rsi+8], r9
  25007. adc r10, QWORD PTR [r13+16]
  25008. mov rax, QWORD PTR [r12+24]
  25009. mov QWORD PTR [rsi+16], r10
  25010. adc rax, QWORD PTR [r13+24]
  25011. mov r9, QWORD PTR [r12+32]
  25012. mov QWORD PTR [rsi+24], rax
  25013. adc r9, QWORD PTR [r13+32]
  25014. mov r10, QWORD PTR [r12+40]
  25015. mov QWORD PTR [rsi+32], r9
  25016. adc r10, QWORD PTR [r13+40]
  25017. mov rax, QWORD PTR [r12+48]
  25018. mov QWORD PTR [rsi+40], r10
  25019. adc rax, QWORD PTR [r13+48]
  25020. mov r9, QWORD PTR [r12+56]
  25021. mov QWORD PTR [rsi+48], rax
  25022. adc r9, QWORD PTR [r13+56]
  25023. mov r10, QWORD PTR [r12+64]
  25024. mov QWORD PTR [rsi+56], r9
  25025. adc r10, QWORD PTR [r13+64]
  25026. mov rax, QWORD PTR [r12+72]
  25027. mov QWORD PTR [rsi+64], r10
  25028. adc rax, QWORD PTR [r13+72]
  25029. mov r9, QWORD PTR [r12+80]
  25030. mov QWORD PTR [rsi+72], rax
  25031. adc r9, QWORD PTR [r13+80]
  25032. mov r10, QWORD PTR [r12+88]
  25033. mov QWORD PTR [rsi+80], r9
  25034. adc r10, QWORD PTR [r13+88]
  25035. mov rax, QWORD PTR [r12+96]
  25036. mov QWORD PTR [rsi+88], r10
  25037. adc rax, QWORD PTR [r13+96]
  25038. mov r9, QWORD PTR [r12+104]
  25039. mov QWORD PTR [rsi+96], rax
  25040. adc r9, QWORD PTR [r13+104]
  25041. mov r10, QWORD PTR [r12+112]
  25042. mov QWORD PTR [rsi+104], r9
  25043. adc r10, QWORD PTR [r13+112]
  25044. mov rax, QWORD PTR [r12+120]
  25045. mov QWORD PTR [rsi+112], r10
  25046. adc rax, QWORD PTR [r13+120]
  25047. mov r9, QWORD PTR [r12+128]
  25048. mov QWORD PTR [rsi+120], rax
  25049. adc r9, QWORD PTR [r13+128]
  25050. mov r10, QWORD PTR [r12+136]
  25051. mov QWORD PTR [rsi+128], r9
  25052. adc r10, QWORD PTR [r13+136]
  25053. mov rax, QWORD PTR [r12+144]
  25054. mov QWORD PTR [rsi+136], r10
  25055. adc rax, QWORD PTR [r13+144]
  25056. mov r9, QWORD PTR [r12+152]
  25057. mov QWORD PTR [rsi+144], rax
  25058. adc r9, QWORD PTR [r13+152]
  25059. mov r10, QWORD PTR [r12+160]
  25060. mov QWORD PTR [rsi+152], r9
  25061. adc r10, QWORD PTR [r13+160]
  25062. mov rax, QWORD PTR [r12+168]
  25063. mov QWORD PTR [rsi+160], r10
  25064. adc rax, QWORD PTR [r13+168]
  25065. mov r9, QWORD PTR [r12+176]
  25066. mov QWORD PTR [rsi+168], rax
  25067. adc r9, QWORD PTR [r13+176]
  25068. mov r10, QWORD PTR [r12+184]
  25069. mov QWORD PTR [rsi+176], r9
  25070. adc r10, QWORD PTR [r13+184]
  25071. mov QWORD PTR [rsi+184], r10
  25072. adc r11, 0
  25073. lea r13, QWORD PTR [rsp+384]
  25074. mov r12, rsp
  25075. mov rax, QWORD PTR [r12]
  25076. sub rax, QWORD PTR [r13]
  25077. mov r9, QWORD PTR [r12+8]
  25078. mov QWORD PTR [r12], rax
  25079. sbb r9, QWORD PTR [r13+8]
  25080. mov r10, QWORD PTR [r12+16]
  25081. mov QWORD PTR [r12+8], r9
  25082. sbb r10, QWORD PTR [r13+16]
  25083. mov rax, QWORD PTR [r12+24]
  25084. mov QWORD PTR [r12+16], r10
  25085. sbb rax, QWORD PTR [r13+24]
  25086. mov r9, QWORD PTR [r12+32]
  25087. mov QWORD PTR [r12+24], rax
  25088. sbb r9, QWORD PTR [r13+32]
  25089. mov r10, QWORD PTR [r12+40]
  25090. mov QWORD PTR [r12+32], r9
  25091. sbb r10, QWORD PTR [r13+40]
  25092. mov rax, QWORD PTR [r12+48]
  25093. mov QWORD PTR [r12+40], r10
  25094. sbb rax, QWORD PTR [r13+48]
  25095. mov r9, QWORD PTR [r12+56]
  25096. mov QWORD PTR [r12+48], rax
  25097. sbb r9, QWORD PTR [r13+56]
  25098. mov r10, QWORD PTR [r12+64]
  25099. mov QWORD PTR [r12+56], r9
  25100. sbb r10, QWORD PTR [r13+64]
  25101. mov rax, QWORD PTR [r12+72]
  25102. mov QWORD PTR [r12+64], r10
  25103. sbb rax, QWORD PTR [r13+72]
  25104. mov r9, QWORD PTR [r12+80]
  25105. mov QWORD PTR [r12+72], rax
  25106. sbb r9, QWORD PTR [r13+80]
  25107. mov r10, QWORD PTR [r12+88]
  25108. mov QWORD PTR [r12+80], r9
  25109. sbb r10, QWORD PTR [r13+88]
  25110. mov rax, QWORD PTR [r12+96]
  25111. mov QWORD PTR [r12+88], r10
  25112. sbb rax, QWORD PTR [r13+96]
  25113. mov r9, QWORD PTR [r12+104]
  25114. mov QWORD PTR [r12+96], rax
  25115. sbb r9, QWORD PTR [r13+104]
  25116. mov r10, QWORD PTR [r12+112]
  25117. mov QWORD PTR [r12+104], r9
  25118. sbb r10, QWORD PTR [r13+112]
  25119. mov rax, QWORD PTR [r12+120]
  25120. mov QWORD PTR [r12+112], r10
  25121. sbb rax, QWORD PTR [r13+120]
  25122. mov r9, QWORD PTR [r12+128]
  25123. mov QWORD PTR [r12+120], rax
  25124. sbb r9, QWORD PTR [r13+128]
  25125. mov r10, QWORD PTR [r12+136]
  25126. mov QWORD PTR [r12+128], r9
  25127. sbb r10, QWORD PTR [r13+136]
  25128. mov rax, QWORD PTR [r12+144]
  25129. mov QWORD PTR [r12+136], r10
  25130. sbb rax, QWORD PTR [r13+144]
  25131. mov r9, QWORD PTR [r12+152]
  25132. mov QWORD PTR [r12+144], rax
  25133. sbb r9, QWORD PTR [r13+152]
  25134. mov r10, QWORD PTR [r12+160]
  25135. mov QWORD PTR [r12+152], r9
  25136. sbb r10, QWORD PTR [r13+160]
  25137. mov rax, QWORD PTR [r12+168]
  25138. mov QWORD PTR [r12+160], r10
  25139. sbb rax, QWORD PTR [r13+168]
  25140. mov r9, QWORD PTR [r12+176]
  25141. mov QWORD PTR [r12+168], rax
  25142. sbb r9, QWORD PTR [r13+176]
  25143. mov r10, QWORD PTR [r12+184]
  25144. mov QWORD PTR [r12+176], r9
  25145. sbb r10, QWORD PTR [r13+184]
  25146. mov rax, QWORD PTR [r12+192]
  25147. mov QWORD PTR [r12+184], r10
  25148. sbb rax, QWORD PTR [r13+192]
  25149. mov r9, QWORD PTR [r12+200]
  25150. mov QWORD PTR [r12+192], rax
  25151. sbb r9, QWORD PTR [r13+200]
  25152. mov r10, QWORD PTR [r12+208]
  25153. mov QWORD PTR [r12+200], r9
  25154. sbb r10, QWORD PTR [r13+208]
  25155. mov rax, QWORD PTR [r12+216]
  25156. mov QWORD PTR [r12+208], r10
  25157. sbb rax, QWORD PTR [r13+216]
  25158. mov r9, QWORD PTR [r12+224]
  25159. mov QWORD PTR [r12+216], rax
  25160. sbb r9, QWORD PTR [r13+224]
  25161. mov r10, QWORD PTR [r12+232]
  25162. mov QWORD PTR [r12+224], r9
  25163. sbb r10, QWORD PTR [r13+232]
  25164. mov rax, QWORD PTR [r12+240]
  25165. mov QWORD PTR [r12+232], r10
  25166. sbb rax, QWORD PTR [r13+240]
  25167. mov r9, QWORD PTR [r12+248]
  25168. mov QWORD PTR [r12+240], rax
  25169. sbb r9, QWORD PTR [r13+248]
  25170. mov r10, QWORD PTR [r12+256]
  25171. mov QWORD PTR [r12+248], r9
  25172. sbb r10, QWORD PTR [r13+256]
  25173. mov rax, QWORD PTR [r12+264]
  25174. mov QWORD PTR [r12+256], r10
  25175. sbb rax, QWORD PTR [r13+264]
  25176. mov r9, QWORD PTR [r12+272]
  25177. mov QWORD PTR [r12+264], rax
  25178. sbb r9, QWORD PTR [r13+272]
  25179. mov r10, QWORD PTR [r12+280]
  25180. mov QWORD PTR [r12+272], r9
  25181. sbb r10, QWORD PTR [r13+280]
  25182. mov rax, QWORD PTR [r12+288]
  25183. mov QWORD PTR [r12+280], r10
  25184. sbb rax, QWORD PTR [r13+288]
  25185. mov r9, QWORD PTR [r12+296]
  25186. mov QWORD PTR [r12+288], rax
  25187. sbb r9, QWORD PTR [r13+296]
  25188. mov r10, QWORD PTR [r12+304]
  25189. mov QWORD PTR [r12+296], r9
  25190. sbb r10, QWORD PTR [r13+304]
  25191. mov rax, QWORD PTR [r12+312]
  25192. mov QWORD PTR [r12+304], r10
  25193. sbb rax, QWORD PTR [r13+312]
  25194. mov r9, QWORD PTR [r12+320]
  25195. mov QWORD PTR [r12+312], rax
  25196. sbb r9, QWORD PTR [r13+320]
  25197. mov r10, QWORD PTR [r12+328]
  25198. mov QWORD PTR [r12+320], r9
  25199. sbb r10, QWORD PTR [r13+328]
  25200. mov rax, QWORD PTR [r12+336]
  25201. mov QWORD PTR [r12+328], r10
  25202. sbb rax, QWORD PTR [r13+336]
  25203. mov r9, QWORD PTR [r12+344]
  25204. mov QWORD PTR [r12+336], rax
  25205. sbb r9, QWORD PTR [r13+344]
  25206. mov r10, QWORD PTR [r12+352]
  25207. mov QWORD PTR [r12+344], r9
  25208. sbb r10, QWORD PTR [r13+352]
  25209. mov rax, QWORD PTR [r12+360]
  25210. mov QWORD PTR [r12+352], r10
  25211. sbb rax, QWORD PTR [r13+360]
  25212. mov r9, QWORD PTR [r12+368]
  25213. mov QWORD PTR [r12+360], rax
  25214. sbb r9, QWORD PTR [r13+368]
  25215. mov r10, QWORD PTR [r12+376]
  25216. mov QWORD PTR [r12+368], r9
  25217. sbb r10, QWORD PTR [r13+376]
  25218. mov QWORD PTR [r12+376], r10
  25219. sbb r11, 0
  25220. mov rax, QWORD PTR [r12]
  25221. sub rax, QWORD PTR [rcx]
  25222. mov r9, QWORD PTR [r12+8]
  25223. mov QWORD PTR [r12], rax
  25224. sbb r9, QWORD PTR [rcx+8]
  25225. mov r10, QWORD PTR [r12+16]
  25226. mov QWORD PTR [r12+8], r9
  25227. sbb r10, QWORD PTR [rcx+16]
  25228. mov rax, QWORD PTR [r12+24]
  25229. mov QWORD PTR [r12+16], r10
  25230. sbb rax, QWORD PTR [rcx+24]
  25231. mov r9, QWORD PTR [r12+32]
  25232. mov QWORD PTR [r12+24], rax
  25233. sbb r9, QWORD PTR [rcx+32]
  25234. mov r10, QWORD PTR [r12+40]
  25235. mov QWORD PTR [r12+32], r9
  25236. sbb r10, QWORD PTR [rcx+40]
  25237. mov rax, QWORD PTR [r12+48]
  25238. mov QWORD PTR [r12+40], r10
  25239. sbb rax, QWORD PTR [rcx+48]
  25240. mov r9, QWORD PTR [r12+56]
  25241. mov QWORD PTR [r12+48], rax
  25242. sbb r9, QWORD PTR [rcx+56]
  25243. mov r10, QWORD PTR [r12+64]
  25244. mov QWORD PTR [r12+56], r9
  25245. sbb r10, QWORD PTR [rcx+64]
  25246. mov rax, QWORD PTR [r12+72]
  25247. mov QWORD PTR [r12+64], r10
  25248. sbb rax, QWORD PTR [rcx+72]
  25249. mov r9, QWORD PTR [r12+80]
  25250. mov QWORD PTR [r12+72], rax
  25251. sbb r9, QWORD PTR [rcx+80]
  25252. mov r10, QWORD PTR [r12+88]
  25253. mov QWORD PTR [r12+80], r9
  25254. sbb r10, QWORD PTR [rcx+88]
  25255. mov rax, QWORD PTR [r12+96]
  25256. mov QWORD PTR [r12+88], r10
  25257. sbb rax, QWORD PTR [rcx+96]
  25258. mov r9, QWORD PTR [r12+104]
  25259. mov QWORD PTR [r12+96], rax
  25260. sbb r9, QWORD PTR [rcx+104]
  25261. mov r10, QWORD PTR [r12+112]
  25262. mov QWORD PTR [r12+104], r9
  25263. sbb r10, QWORD PTR [rcx+112]
  25264. mov rax, QWORD PTR [r12+120]
  25265. mov QWORD PTR [r12+112], r10
  25266. sbb rax, QWORD PTR [rcx+120]
  25267. mov r9, QWORD PTR [r12+128]
  25268. mov QWORD PTR [r12+120], rax
  25269. sbb r9, QWORD PTR [rcx+128]
  25270. mov r10, QWORD PTR [r12+136]
  25271. mov QWORD PTR [r12+128], r9
  25272. sbb r10, QWORD PTR [rcx+136]
  25273. mov rax, QWORD PTR [r12+144]
  25274. mov QWORD PTR [r12+136], r10
  25275. sbb rax, QWORD PTR [rcx+144]
  25276. mov r9, QWORD PTR [r12+152]
  25277. mov QWORD PTR [r12+144], rax
  25278. sbb r9, QWORD PTR [rcx+152]
  25279. mov r10, QWORD PTR [r12+160]
  25280. mov QWORD PTR [r12+152], r9
  25281. sbb r10, QWORD PTR [rcx+160]
  25282. mov rax, QWORD PTR [r12+168]
  25283. mov QWORD PTR [r12+160], r10
  25284. sbb rax, QWORD PTR [rcx+168]
  25285. mov r9, QWORD PTR [r12+176]
  25286. mov QWORD PTR [r12+168], rax
  25287. sbb r9, QWORD PTR [rcx+176]
  25288. mov r10, QWORD PTR [r12+184]
  25289. mov QWORD PTR [r12+176], r9
  25290. sbb r10, QWORD PTR [rcx+184]
  25291. mov rax, QWORD PTR [r12+192]
  25292. mov QWORD PTR [r12+184], r10
  25293. sbb rax, QWORD PTR [rcx+192]
  25294. mov r9, QWORD PTR [r12+200]
  25295. mov QWORD PTR [r12+192], rax
  25296. sbb r9, QWORD PTR [rcx+200]
  25297. mov r10, QWORD PTR [r12+208]
  25298. mov QWORD PTR [r12+200], r9
  25299. sbb r10, QWORD PTR [rcx+208]
  25300. mov rax, QWORD PTR [r12+216]
  25301. mov QWORD PTR [r12+208], r10
  25302. sbb rax, QWORD PTR [rcx+216]
  25303. mov r9, QWORD PTR [r12+224]
  25304. mov QWORD PTR [r12+216], rax
  25305. sbb r9, QWORD PTR [rcx+224]
  25306. mov r10, QWORD PTR [r12+232]
  25307. mov QWORD PTR [r12+224], r9
  25308. sbb r10, QWORD PTR [rcx+232]
  25309. mov rax, QWORD PTR [r12+240]
  25310. mov QWORD PTR [r12+232], r10
  25311. sbb rax, QWORD PTR [rcx+240]
  25312. mov r9, QWORD PTR [r12+248]
  25313. mov QWORD PTR [r12+240], rax
  25314. sbb r9, QWORD PTR [rcx+248]
  25315. mov r10, QWORD PTR [r12+256]
  25316. mov QWORD PTR [r12+248], r9
  25317. sbb r10, QWORD PTR [rcx+256]
  25318. mov rax, QWORD PTR [r12+264]
  25319. mov QWORD PTR [r12+256], r10
  25320. sbb rax, QWORD PTR [rcx+264]
  25321. mov r9, QWORD PTR [r12+272]
  25322. mov QWORD PTR [r12+264], rax
  25323. sbb r9, QWORD PTR [rcx+272]
  25324. mov r10, QWORD PTR [r12+280]
  25325. mov QWORD PTR [r12+272], r9
  25326. sbb r10, QWORD PTR [rcx+280]
  25327. mov rax, QWORD PTR [r12+288]
  25328. mov QWORD PTR [r12+280], r10
  25329. sbb rax, QWORD PTR [rcx+288]
  25330. mov r9, QWORD PTR [r12+296]
  25331. mov QWORD PTR [r12+288], rax
  25332. sbb r9, QWORD PTR [rcx+296]
  25333. mov r10, QWORD PTR [r12+304]
  25334. mov QWORD PTR [r12+296], r9
  25335. sbb r10, QWORD PTR [rcx+304]
  25336. mov rax, QWORD PTR [r12+312]
  25337. mov QWORD PTR [r12+304], r10
  25338. sbb rax, QWORD PTR [rcx+312]
  25339. mov r9, QWORD PTR [r12+320]
  25340. mov QWORD PTR [r12+312], rax
  25341. sbb r9, QWORD PTR [rcx+320]
  25342. mov r10, QWORD PTR [r12+328]
  25343. mov QWORD PTR [r12+320], r9
  25344. sbb r10, QWORD PTR [rcx+328]
  25345. mov rax, QWORD PTR [r12+336]
  25346. mov QWORD PTR [r12+328], r10
  25347. sbb rax, QWORD PTR [rcx+336]
  25348. mov r9, QWORD PTR [r12+344]
  25349. mov QWORD PTR [r12+336], rax
  25350. sbb r9, QWORD PTR [rcx+344]
  25351. mov r10, QWORD PTR [r12+352]
  25352. mov QWORD PTR [r12+344], r9
  25353. sbb r10, QWORD PTR [rcx+352]
  25354. mov rax, QWORD PTR [r12+360]
  25355. mov QWORD PTR [r12+352], r10
  25356. sbb rax, QWORD PTR [rcx+360]
  25357. mov r9, QWORD PTR [r12+368]
  25358. mov QWORD PTR [r12+360], rax
  25359. sbb r9, QWORD PTR [rcx+368]
  25360. mov r10, QWORD PTR [r12+376]
  25361. mov QWORD PTR [r12+368], r9
  25362. sbb r10, QWORD PTR [rcx+376]
  25363. mov QWORD PTR [r12+376], r10
  25364. sbb r11, 0
  25365. sub rsi, 192
  25366. ; Add
  25367. mov rax, QWORD PTR [rsi]
  25368. add rax, QWORD PTR [r12]
  25369. mov r9, QWORD PTR [rsi+8]
  25370. mov QWORD PTR [rsi], rax
  25371. adc r9, QWORD PTR [r12+8]
  25372. mov r10, QWORD PTR [rsi+16]
  25373. mov QWORD PTR [rsi+8], r9
  25374. adc r10, QWORD PTR [r12+16]
  25375. mov rax, QWORD PTR [rsi+24]
  25376. mov QWORD PTR [rsi+16], r10
  25377. adc rax, QWORD PTR [r12+24]
  25378. mov r9, QWORD PTR [rsi+32]
  25379. mov QWORD PTR [rsi+24], rax
  25380. adc r9, QWORD PTR [r12+32]
  25381. mov r10, QWORD PTR [rsi+40]
  25382. mov QWORD PTR [rsi+32], r9
  25383. adc r10, QWORD PTR [r12+40]
  25384. mov rax, QWORD PTR [rsi+48]
  25385. mov QWORD PTR [rsi+40], r10
  25386. adc rax, QWORD PTR [r12+48]
  25387. mov r9, QWORD PTR [rsi+56]
  25388. mov QWORD PTR [rsi+48], rax
  25389. adc r9, QWORD PTR [r12+56]
  25390. mov r10, QWORD PTR [rsi+64]
  25391. mov QWORD PTR [rsi+56], r9
  25392. adc r10, QWORD PTR [r12+64]
  25393. mov rax, QWORD PTR [rsi+72]
  25394. mov QWORD PTR [rsi+64], r10
  25395. adc rax, QWORD PTR [r12+72]
  25396. mov r9, QWORD PTR [rsi+80]
  25397. mov QWORD PTR [rsi+72], rax
  25398. adc r9, QWORD PTR [r12+80]
  25399. mov r10, QWORD PTR [rsi+88]
  25400. mov QWORD PTR [rsi+80], r9
  25401. adc r10, QWORD PTR [r12+88]
  25402. mov rax, QWORD PTR [rsi+96]
  25403. mov QWORD PTR [rsi+88], r10
  25404. adc rax, QWORD PTR [r12+96]
  25405. mov r9, QWORD PTR [rsi+104]
  25406. mov QWORD PTR [rsi+96], rax
  25407. adc r9, QWORD PTR [r12+104]
  25408. mov r10, QWORD PTR [rsi+112]
  25409. mov QWORD PTR [rsi+104], r9
  25410. adc r10, QWORD PTR [r12+112]
  25411. mov rax, QWORD PTR [rsi+120]
  25412. mov QWORD PTR [rsi+112], r10
  25413. adc rax, QWORD PTR [r12+120]
  25414. mov r9, QWORD PTR [rsi+128]
  25415. mov QWORD PTR [rsi+120], rax
  25416. adc r9, QWORD PTR [r12+128]
  25417. mov r10, QWORD PTR [rsi+136]
  25418. mov QWORD PTR [rsi+128], r9
  25419. adc r10, QWORD PTR [r12+136]
  25420. mov rax, QWORD PTR [rsi+144]
  25421. mov QWORD PTR [rsi+136], r10
  25422. adc rax, QWORD PTR [r12+144]
  25423. mov r9, QWORD PTR [rsi+152]
  25424. mov QWORD PTR [rsi+144], rax
  25425. adc r9, QWORD PTR [r12+152]
  25426. mov r10, QWORD PTR [rsi+160]
  25427. mov QWORD PTR [rsi+152], r9
  25428. adc r10, QWORD PTR [r12+160]
  25429. mov rax, QWORD PTR [rsi+168]
  25430. mov QWORD PTR [rsi+160], r10
  25431. adc rax, QWORD PTR [r12+168]
  25432. mov r9, QWORD PTR [rsi+176]
  25433. mov QWORD PTR [rsi+168], rax
  25434. adc r9, QWORD PTR [r12+176]
  25435. mov r10, QWORD PTR [rsi+184]
  25436. mov QWORD PTR [rsi+176], r9
  25437. adc r10, QWORD PTR [r12+184]
  25438. mov rax, QWORD PTR [rsi+192]
  25439. mov QWORD PTR [rsi+184], r10
  25440. adc rax, QWORD PTR [r12+192]
  25441. mov r9, QWORD PTR [rsi+200]
  25442. mov QWORD PTR [rsi+192], rax
  25443. adc r9, QWORD PTR [r12+200]
  25444. mov r10, QWORD PTR [rsi+208]
  25445. mov QWORD PTR [rsi+200], r9
  25446. adc r10, QWORD PTR [r12+208]
  25447. mov rax, QWORD PTR [rsi+216]
  25448. mov QWORD PTR [rsi+208], r10
  25449. adc rax, QWORD PTR [r12+216]
  25450. mov r9, QWORD PTR [rsi+224]
  25451. mov QWORD PTR [rsi+216], rax
  25452. adc r9, QWORD PTR [r12+224]
  25453. mov r10, QWORD PTR [rsi+232]
  25454. mov QWORD PTR [rsi+224], r9
  25455. adc r10, QWORD PTR [r12+232]
  25456. mov rax, QWORD PTR [rsi+240]
  25457. mov QWORD PTR [rsi+232], r10
  25458. adc rax, QWORD PTR [r12+240]
  25459. mov r9, QWORD PTR [rsi+248]
  25460. mov QWORD PTR [rsi+240], rax
  25461. adc r9, QWORD PTR [r12+248]
  25462. mov r10, QWORD PTR [rsi+256]
  25463. mov QWORD PTR [rsi+248], r9
  25464. adc r10, QWORD PTR [r12+256]
  25465. mov rax, QWORD PTR [rsi+264]
  25466. mov QWORD PTR [rsi+256], r10
  25467. adc rax, QWORD PTR [r12+264]
  25468. mov r9, QWORD PTR [rsi+272]
  25469. mov QWORD PTR [rsi+264], rax
  25470. adc r9, QWORD PTR [r12+272]
  25471. mov r10, QWORD PTR [rsi+280]
  25472. mov QWORD PTR [rsi+272], r9
  25473. adc r10, QWORD PTR [r12+280]
  25474. mov rax, QWORD PTR [rsi+288]
  25475. mov QWORD PTR [rsi+280], r10
  25476. adc rax, QWORD PTR [r12+288]
  25477. mov r9, QWORD PTR [rsi+296]
  25478. mov QWORD PTR [rsi+288], rax
  25479. adc r9, QWORD PTR [r12+296]
  25480. mov r10, QWORD PTR [rsi+304]
  25481. mov QWORD PTR [rsi+296], r9
  25482. adc r10, QWORD PTR [r12+304]
  25483. mov rax, QWORD PTR [rsi+312]
  25484. mov QWORD PTR [rsi+304], r10
  25485. adc rax, QWORD PTR [r12+312]
  25486. mov r9, QWORD PTR [rsi+320]
  25487. mov QWORD PTR [rsi+312], rax
  25488. adc r9, QWORD PTR [r12+320]
  25489. mov r10, QWORD PTR [rsi+328]
  25490. mov QWORD PTR [rsi+320], r9
  25491. adc r10, QWORD PTR [r12+328]
  25492. mov rax, QWORD PTR [rsi+336]
  25493. mov QWORD PTR [rsi+328], r10
  25494. adc rax, QWORD PTR [r12+336]
  25495. mov r9, QWORD PTR [rsi+344]
  25496. mov QWORD PTR [rsi+336], rax
  25497. adc r9, QWORD PTR [r12+344]
  25498. mov r10, QWORD PTR [rsi+352]
  25499. mov QWORD PTR [rsi+344], r9
  25500. adc r10, QWORD PTR [r12+352]
  25501. mov rax, QWORD PTR [rsi+360]
  25502. mov QWORD PTR [rsi+352], r10
  25503. adc rax, QWORD PTR [r12+360]
  25504. mov r9, QWORD PTR [rsi+368]
  25505. mov QWORD PTR [rsi+360], rax
  25506. adc r9, QWORD PTR [r12+368]
  25507. mov r10, QWORD PTR [rsi+376]
  25508. mov QWORD PTR [rsi+368], r9
  25509. adc r10, QWORD PTR [r12+376]
  25510. mov QWORD PTR [rsi+376], r10
  25511. adc r11, 0
  25512. mov QWORD PTR [rcx+576], r11
  25513. add rsi, 192
  25514. ; Add
  25515. mov rax, QWORD PTR [rsi]
  25516. add rax, QWORD PTR [r13]
  25517. mov r9, QWORD PTR [rsi+8]
  25518. mov QWORD PTR [rsi], rax
  25519. adc r9, QWORD PTR [r13+8]
  25520. mov r10, QWORD PTR [rsi+16]
  25521. mov QWORD PTR [rsi+8], r9
  25522. adc r10, QWORD PTR [r13+16]
  25523. mov rax, QWORD PTR [rsi+24]
  25524. mov QWORD PTR [rsi+16], r10
  25525. adc rax, QWORD PTR [r13+24]
  25526. mov r9, QWORD PTR [rsi+32]
  25527. mov QWORD PTR [rsi+24], rax
  25528. adc r9, QWORD PTR [r13+32]
  25529. mov r10, QWORD PTR [rsi+40]
  25530. mov QWORD PTR [rsi+32], r9
  25531. adc r10, QWORD PTR [r13+40]
  25532. mov rax, QWORD PTR [rsi+48]
  25533. mov QWORD PTR [rsi+40], r10
  25534. adc rax, QWORD PTR [r13+48]
  25535. mov r9, QWORD PTR [rsi+56]
  25536. mov QWORD PTR [rsi+48], rax
  25537. adc r9, QWORD PTR [r13+56]
  25538. mov r10, QWORD PTR [rsi+64]
  25539. mov QWORD PTR [rsi+56], r9
  25540. adc r10, QWORD PTR [r13+64]
  25541. mov rax, QWORD PTR [rsi+72]
  25542. mov QWORD PTR [rsi+64], r10
  25543. adc rax, QWORD PTR [r13+72]
  25544. mov r9, QWORD PTR [rsi+80]
  25545. mov QWORD PTR [rsi+72], rax
  25546. adc r9, QWORD PTR [r13+80]
  25547. mov r10, QWORD PTR [rsi+88]
  25548. mov QWORD PTR [rsi+80], r9
  25549. adc r10, QWORD PTR [r13+88]
  25550. mov rax, QWORD PTR [rsi+96]
  25551. mov QWORD PTR [rsi+88], r10
  25552. adc rax, QWORD PTR [r13+96]
  25553. mov r9, QWORD PTR [rsi+104]
  25554. mov QWORD PTR [rsi+96], rax
  25555. adc r9, QWORD PTR [r13+104]
  25556. mov r10, QWORD PTR [rsi+112]
  25557. mov QWORD PTR [rsi+104], r9
  25558. adc r10, QWORD PTR [r13+112]
  25559. mov rax, QWORD PTR [rsi+120]
  25560. mov QWORD PTR [rsi+112], r10
  25561. adc rax, QWORD PTR [r13+120]
  25562. mov r9, QWORD PTR [rsi+128]
  25563. mov QWORD PTR [rsi+120], rax
  25564. adc r9, QWORD PTR [r13+128]
  25565. mov r10, QWORD PTR [rsi+136]
  25566. mov QWORD PTR [rsi+128], r9
  25567. adc r10, QWORD PTR [r13+136]
  25568. mov rax, QWORD PTR [rsi+144]
  25569. mov QWORD PTR [rsi+136], r10
  25570. adc rax, QWORD PTR [r13+144]
  25571. mov r9, QWORD PTR [rsi+152]
  25572. mov QWORD PTR [rsi+144], rax
  25573. adc r9, QWORD PTR [r13+152]
  25574. mov r10, QWORD PTR [rsi+160]
  25575. mov QWORD PTR [rsi+152], r9
  25576. adc r10, QWORD PTR [r13+160]
  25577. mov rax, QWORD PTR [rsi+168]
  25578. mov QWORD PTR [rsi+160], r10
  25579. adc rax, QWORD PTR [r13+168]
  25580. mov r9, QWORD PTR [rsi+176]
  25581. mov QWORD PTR [rsi+168], rax
  25582. adc r9, QWORD PTR [r13+176]
  25583. mov r10, QWORD PTR [rsi+184]
  25584. mov QWORD PTR [rsi+176], r9
  25585. adc r10, QWORD PTR [r13+184]
  25586. mov rax, QWORD PTR [rsi+192]
  25587. mov QWORD PTR [rsi+184], r10
  25588. adc rax, QWORD PTR [r13+192]
  25589. mov QWORD PTR [rsi+192], rax
  25590. ; Add to zero
  25591. mov rax, QWORD PTR [r13+200]
  25592. adc rax, 0
  25593. mov r9, QWORD PTR [r13+208]
  25594. mov QWORD PTR [rsi+200], rax
  25595. adc r9, 0
  25596. mov r10, QWORD PTR [r13+216]
  25597. mov QWORD PTR [rsi+208], r9
  25598. adc r10, 0
  25599. mov rax, QWORD PTR [r13+224]
  25600. mov QWORD PTR [rsi+216], r10
  25601. adc rax, 0
  25602. mov r9, QWORD PTR [r13+232]
  25603. mov QWORD PTR [rsi+224], rax
  25604. adc r9, 0
  25605. mov r10, QWORD PTR [r13+240]
  25606. mov QWORD PTR [rsi+232], r9
  25607. adc r10, 0
  25608. mov rax, QWORD PTR [r13+248]
  25609. mov QWORD PTR [rsi+240], r10
  25610. adc rax, 0
  25611. mov r9, QWORD PTR [r13+256]
  25612. mov QWORD PTR [rsi+248], rax
  25613. adc r9, 0
  25614. mov r10, QWORD PTR [r13+264]
  25615. mov QWORD PTR [rsi+256], r9
  25616. adc r10, 0
  25617. mov rax, QWORD PTR [r13+272]
  25618. mov QWORD PTR [rsi+264], r10
  25619. adc rax, 0
  25620. mov r9, QWORD PTR [r13+280]
  25621. mov QWORD PTR [rsi+272], rax
  25622. adc r9, 0
  25623. mov r10, QWORD PTR [r13+288]
  25624. mov QWORD PTR [rsi+280], r9
  25625. adc r10, 0
  25626. mov rax, QWORD PTR [r13+296]
  25627. mov QWORD PTR [rsi+288], r10
  25628. adc rax, 0
  25629. mov r9, QWORD PTR [r13+304]
  25630. mov QWORD PTR [rsi+296], rax
  25631. adc r9, 0
  25632. mov r10, QWORD PTR [r13+312]
  25633. mov QWORD PTR [rsi+304], r9
  25634. adc r10, 0
  25635. mov rax, QWORD PTR [r13+320]
  25636. mov QWORD PTR [rsi+312], r10
  25637. adc rax, 0
  25638. mov r9, QWORD PTR [r13+328]
  25639. mov QWORD PTR [rsi+320], rax
  25640. adc r9, 0
  25641. mov r10, QWORD PTR [r13+336]
  25642. mov QWORD PTR [rsi+328], r9
  25643. adc r10, 0
  25644. mov rax, QWORD PTR [r13+344]
  25645. mov QWORD PTR [rsi+336], r10
  25646. adc rax, 0
  25647. mov r9, QWORD PTR [r13+352]
  25648. mov QWORD PTR [rsi+344], rax
  25649. adc r9, 0
  25650. mov r10, QWORD PTR [r13+360]
  25651. mov QWORD PTR [rsi+352], r9
  25652. adc r10, 0
  25653. mov rax, QWORD PTR [r13+368]
  25654. mov QWORD PTR [rsi+360], r10
  25655. adc rax, 0
  25656. mov r9, QWORD PTR [r13+376]
  25657. mov QWORD PTR [rsi+368], rax
  25658. adc r9, 0
  25659. mov QWORD PTR [rsi+376], r9
  25660. add rsp, 1192
  25661. pop rsi
  25662. pop rdi
  25663. pop r15
  25664. pop r14
  25665. pop r13
  25666. pop r12
  25667. ret
  25668. sp_3072_mul_48 ENDP
  25669. _text ENDS
  25670. IFDEF HAVE_INTEL_AVX2
  25671. ; /* Multiply a and b into r. (r = a * b)
  25672. ; *
  25673. ; * r A single precision integer.
  25674. ; * a A single precision integer.
  25675. ; * b A single precision integer.
  25676. ; */
  25677. _text SEGMENT READONLY PARA
  25678. sp_3072_mul_avx2_48 PROC
  25679. push r12
  25680. push r13
  25681. push r14
  25682. push r15
  25683. push rdi
  25684. push rsi
  25685. sub rsp, 1192
  25686. mov QWORD PTR [rsp+1152], rcx
  25687. mov QWORD PTR [rsp+1160], rdx
  25688. mov QWORD PTR [rsp+1168], r8
  25689. lea r12, QWORD PTR [rsp+768]
  25690. lea r14, QWORD PTR [rdx+192]
  25691. ; Add
  25692. mov rax, QWORD PTR [rdx]
  25693. xor r15, r15
  25694. add rax, QWORD PTR [r14]
  25695. mov r9, QWORD PTR [rdx+8]
  25696. mov QWORD PTR [r12], rax
  25697. adc r9, QWORD PTR [r14+8]
  25698. mov r10, QWORD PTR [rdx+16]
  25699. mov QWORD PTR [r12+8], r9
  25700. adc r10, QWORD PTR [r14+16]
  25701. mov rax, QWORD PTR [rdx+24]
  25702. mov QWORD PTR [r12+16], r10
  25703. adc rax, QWORD PTR [r14+24]
  25704. mov r9, QWORD PTR [rdx+32]
  25705. mov QWORD PTR [r12+24], rax
  25706. adc r9, QWORD PTR [r14+32]
  25707. mov r10, QWORD PTR [rdx+40]
  25708. mov QWORD PTR [r12+32], r9
  25709. adc r10, QWORD PTR [r14+40]
  25710. mov rax, QWORD PTR [rdx+48]
  25711. mov QWORD PTR [r12+40], r10
  25712. adc rax, QWORD PTR [r14+48]
  25713. mov r9, QWORD PTR [rdx+56]
  25714. mov QWORD PTR [r12+48], rax
  25715. adc r9, QWORD PTR [r14+56]
  25716. mov r10, QWORD PTR [rdx+64]
  25717. mov QWORD PTR [r12+56], r9
  25718. adc r10, QWORD PTR [r14+64]
  25719. mov rax, QWORD PTR [rdx+72]
  25720. mov QWORD PTR [r12+64], r10
  25721. adc rax, QWORD PTR [r14+72]
  25722. mov r9, QWORD PTR [rdx+80]
  25723. mov QWORD PTR [r12+72], rax
  25724. adc r9, QWORD PTR [r14+80]
  25725. mov r10, QWORD PTR [rdx+88]
  25726. mov QWORD PTR [r12+80], r9
  25727. adc r10, QWORD PTR [r14+88]
  25728. mov rax, QWORD PTR [rdx+96]
  25729. mov QWORD PTR [r12+88], r10
  25730. adc rax, QWORD PTR [r14+96]
  25731. mov r9, QWORD PTR [rdx+104]
  25732. mov QWORD PTR [r12+96], rax
  25733. adc r9, QWORD PTR [r14+104]
  25734. mov r10, QWORD PTR [rdx+112]
  25735. mov QWORD PTR [r12+104], r9
  25736. adc r10, QWORD PTR [r14+112]
  25737. mov rax, QWORD PTR [rdx+120]
  25738. mov QWORD PTR [r12+112], r10
  25739. adc rax, QWORD PTR [r14+120]
  25740. mov r9, QWORD PTR [rdx+128]
  25741. mov QWORD PTR [r12+120], rax
  25742. adc r9, QWORD PTR [r14+128]
  25743. mov r10, QWORD PTR [rdx+136]
  25744. mov QWORD PTR [r12+128], r9
  25745. adc r10, QWORD PTR [r14+136]
  25746. mov rax, QWORD PTR [rdx+144]
  25747. mov QWORD PTR [r12+136], r10
  25748. adc rax, QWORD PTR [r14+144]
  25749. mov r9, QWORD PTR [rdx+152]
  25750. mov QWORD PTR [r12+144], rax
  25751. adc r9, QWORD PTR [r14+152]
  25752. mov r10, QWORD PTR [rdx+160]
  25753. mov QWORD PTR [r12+152], r9
  25754. adc r10, QWORD PTR [r14+160]
  25755. mov rax, QWORD PTR [rdx+168]
  25756. mov QWORD PTR [r12+160], r10
  25757. adc rax, QWORD PTR [r14+168]
  25758. mov r9, QWORD PTR [rdx+176]
  25759. mov QWORD PTR [r12+168], rax
  25760. adc r9, QWORD PTR [r14+176]
  25761. mov r10, QWORD PTR [rdx+184]
  25762. mov QWORD PTR [r12+176], r9
  25763. adc r10, QWORD PTR [r14+184]
  25764. mov QWORD PTR [r12+184], r10
  25765. adc r15, 0
  25766. mov QWORD PTR [rsp+1176], r15
  25767. lea r13, QWORD PTR [rsp+960]
  25768. lea r14, QWORD PTR [r8+192]
  25769. ; Add
  25770. mov rax, QWORD PTR [r8]
  25771. xor rdi, rdi
  25772. add rax, QWORD PTR [r14]
  25773. mov r9, QWORD PTR [r8+8]
  25774. mov QWORD PTR [r13], rax
  25775. adc r9, QWORD PTR [r14+8]
  25776. mov r10, QWORD PTR [r8+16]
  25777. mov QWORD PTR [r13+8], r9
  25778. adc r10, QWORD PTR [r14+16]
  25779. mov rax, QWORD PTR [r8+24]
  25780. mov QWORD PTR [r13+16], r10
  25781. adc rax, QWORD PTR [r14+24]
  25782. mov r9, QWORD PTR [r8+32]
  25783. mov QWORD PTR [r13+24], rax
  25784. adc r9, QWORD PTR [r14+32]
  25785. mov r10, QWORD PTR [r8+40]
  25786. mov QWORD PTR [r13+32], r9
  25787. adc r10, QWORD PTR [r14+40]
  25788. mov rax, QWORD PTR [r8+48]
  25789. mov QWORD PTR [r13+40], r10
  25790. adc rax, QWORD PTR [r14+48]
  25791. mov r9, QWORD PTR [r8+56]
  25792. mov QWORD PTR [r13+48], rax
  25793. adc r9, QWORD PTR [r14+56]
  25794. mov r10, QWORD PTR [r8+64]
  25795. mov QWORD PTR [r13+56], r9
  25796. adc r10, QWORD PTR [r14+64]
  25797. mov rax, QWORD PTR [r8+72]
  25798. mov QWORD PTR [r13+64], r10
  25799. adc rax, QWORD PTR [r14+72]
  25800. mov r9, QWORD PTR [r8+80]
  25801. mov QWORD PTR [r13+72], rax
  25802. adc r9, QWORD PTR [r14+80]
  25803. mov r10, QWORD PTR [r8+88]
  25804. mov QWORD PTR [r13+80], r9
  25805. adc r10, QWORD PTR [r14+88]
  25806. mov rax, QWORD PTR [r8+96]
  25807. mov QWORD PTR [r13+88], r10
  25808. adc rax, QWORD PTR [r14+96]
  25809. mov r9, QWORD PTR [r8+104]
  25810. mov QWORD PTR [r13+96], rax
  25811. adc r9, QWORD PTR [r14+104]
  25812. mov r10, QWORD PTR [r8+112]
  25813. mov QWORD PTR [r13+104], r9
  25814. adc r10, QWORD PTR [r14+112]
  25815. mov rax, QWORD PTR [r8+120]
  25816. mov QWORD PTR [r13+112], r10
  25817. adc rax, QWORD PTR [r14+120]
  25818. mov r9, QWORD PTR [r8+128]
  25819. mov QWORD PTR [r13+120], rax
  25820. adc r9, QWORD PTR [r14+128]
  25821. mov r10, QWORD PTR [r8+136]
  25822. mov QWORD PTR [r13+128], r9
  25823. adc r10, QWORD PTR [r14+136]
  25824. mov rax, QWORD PTR [r8+144]
  25825. mov QWORD PTR [r13+136], r10
  25826. adc rax, QWORD PTR [r14+144]
  25827. mov r9, QWORD PTR [r8+152]
  25828. mov QWORD PTR [r13+144], rax
  25829. adc r9, QWORD PTR [r14+152]
  25830. mov r10, QWORD PTR [r8+160]
  25831. mov QWORD PTR [r13+152], r9
  25832. adc r10, QWORD PTR [r14+160]
  25833. mov rax, QWORD PTR [r8+168]
  25834. mov QWORD PTR [r13+160], r10
  25835. adc rax, QWORD PTR [r14+168]
  25836. mov r9, QWORD PTR [r8+176]
  25837. mov QWORD PTR [r13+168], rax
  25838. adc r9, QWORD PTR [r14+176]
  25839. mov r10, QWORD PTR [r8+184]
  25840. mov QWORD PTR [r13+176], r9
  25841. adc r10, QWORD PTR [r14+184]
  25842. mov QWORD PTR [r13+184], r10
  25843. adc rdi, 0
  25844. mov QWORD PTR [rsp+1184], rdi
  25845. mov r8, r13
  25846. mov rdx, r12
  25847. mov rcx, rsp
  25848. call sp_3072_mul_avx2_24
  25849. mov r8, QWORD PTR [rsp+1168]
  25850. mov rdx, QWORD PTR [rsp+1160]
  25851. lea rcx, QWORD PTR [rsp+384]
  25852. add r8, 192
  25853. add rdx, 192
  25854. call sp_3072_mul_avx2_24
  25855. mov r8, QWORD PTR [rsp+1168]
  25856. mov rdx, QWORD PTR [rsp+1160]
  25857. mov rcx, QWORD PTR [rsp+1152]
  25858. call sp_3072_mul_avx2_24
  25859. IFDEF _WIN64
  25860. mov r8, QWORD PTR [rsp+1168]
  25861. mov rdx, QWORD PTR [rsp+1160]
  25862. mov rcx, QWORD PTR [rsp+1152]
  25863. ENDIF
  25864. mov r15, QWORD PTR [rsp+1176]
  25865. mov rdi, QWORD PTR [rsp+1184]
  25866. mov rsi, QWORD PTR [rsp+1152]
  25867. mov r11, r15
  25868. lea r12, QWORD PTR [rsp+768]
  25869. lea r13, QWORD PTR [rsp+960]
  25870. and r11, rdi
  25871. neg r15
  25872. neg rdi
  25873. add rsi, 384
  25874. mov rax, QWORD PTR [r12]
  25875. mov r9, QWORD PTR [r13]
  25876. pext rax, rax, rdi
  25877. pext r9, r9, r15
  25878. add rax, r9
  25879. mov r9, QWORD PTR [r12+8]
  25880. mov r10, QWORD PTR [r13+8]
  25881. pext r9, r9, rdi
  25882. pext r10, r10, r15
  25883. mov QWORD PTR [rsi], rax
  25884. adc r9, r10
  25885. mov r10, QWORD PTR [r12+16]
  25886. mov rax, QWORD PTR [r13+16]
  25887. pext r10, r10, rdi
  25888. pext rax, rax, r15
  25889. mov QWORD PTR [rsi+8], r9
  25890. adc r10, rax
  25891. mov rax, QWORD PTR [r12+24]
  25892. mov r9, QWORD PTR [r13+24]
  25893. pext rax, rax, rdi
  25894. pext r9, r9, r15
  25895. mov QWORD PTR [rsi+16], r10
  25896. adc rax, r9
  25897. mov r9, QWORD PTR [r12+32]
  25898. mov r10, QWORD PTR [r13+32]
  25899. pext r9, r9, rdi
  25900. pext r10, r10, r15
  25901. mov QWORD PTR [rsi+24], rax
  25902. adc r9, r10
  25903. mov r10, QWORD PTR [r12+40]
  25904. mov rax, QWORD PTR [r13+40]
  25905. pext r10, r10, rdi
  25906. pext rax, rax, r15
  25907. mov QWORD PTR [rsi+32], r9
  25908. adc r10, rax
  25909. mov rax, QWORD PTR [r12+48]
  25910. mov r9, QWORD PTR [r13+48]
  25911. pext rax, rax, rdi
  25912. pext r9, r9, r15
  25913. mov QWORD PTR [rsi+40], r10
  25914. adc rax, r9
  25915. mov r9, QWORD PTR [r12+56]
  25916. mov r10, QWORD PTR [r13+56]
  25917. pext r9, r9, rdi
  25918. pext r10, r10, r15
  25919. mov QWORD PTR [rsi+48], rax
  25920. adc r9, r10
  25921. mov r10, QWORD PTR [r12+64]
  25922. mov rax, QWORD PTR [r13+64]
  25923. pext r10, r10, rdi
  25924. pext rax, rax, r15
  25925. mov QWORD PTR [rsi+56], r9
  25926. adc r10, rax
  25927. mov rax, QWORD PTR [r12+72]
  25928. mov r9, QWORD PTR [r13+72]
  25929. pext rax, rax, rdi
  25930. pext r9, r9, r15
  25931. mov QWORD PTR [rsi+64], r10
  25932. adc rax, r9
  25933. mov r9, QWORD PTR [r12+80]
  25934. mov r10, QWORD PTR [r13+80]
  25935. pext r9, r9, rdi
  25936. pext r10, r10, r15
  25937. mov QWORD PTR [rsi+72], rax
  25938. adc r9, r10
  25939. mov r10, QWORD PTR [r12+88]
  25940. mov rax, QWORD PTR [r13+88]
  25941. pext r10, r10, rdi
  25942. pext rax, rax, r15
  25943. mov QWORD PTR [rsi+80], r9
  25944. adc r10, rax
  25945. mov rax, QWORD PTR [r12+96]
  25946. mov r9, QWORD PTR [r13+96]
  25947. pext rax, rax, rdi
  25948. pext r9, r9, r15
  25949. mov QWORD PTR [rsi+88], r10
  25950. adc rax, r9
  25951. mov r9, QWORD PTR [r12+104]
  25952. mov r10, QWORD PTR [r13+104]
  25953. pext r9, r9, rdi
  25954. pext r10, r10, r15
  25955. mov QWORD PTR [rsi+96], rax
  25956. adc r9, r10
  25957. mov r10, QWORD PTR [r12+112]
  25958. mov rax, QWORD PTR [r13+112]
  25959. pext r10, r10, rdi
  25960. pext rax, rax, r15
  25961. mov QWORD PTR [rsi+104], r9
  25962. adc r10, rax
  25963. mov rax, QWORD PTR [r12+120]
  25964. mov r9, QWORD PTR [r13+120]
  25965. pext rax, rax, rdi
  25966. pext r9, r9, r15
  25967. mov QWORD PTR [rsi+112], r10
  25968. adc rax, r9
  25969. mov r9, QWORD PTR [r12+128]
  25970. mov r10, QWORD PTR [r13+128]
  25971. pext r9, r9, rdi
  25972. pext r10, r10, r15
  25973. mov QWORD PTR [rsi+120], rax
  25974. adc r9, r10
  25975. mov r10, QWORD PTR [r12+136]
  25976. mov rax, QWORD PTR [r13+136]
  25977. pext r10, r10, rdi
  25978. pext rax, rax, r15
  25979. mov QWORD PTR [rsi+128], r9
  25980. adc r10, rax
  25981. mov rax, QWORD PTR [r12+144]
  25982. mov r9, QWORD PTR [r13+144]
  25983. pext rax, rax, rdi
  25984. pext r9, r9, r15
  25985. mov QWORD PTR [rsi+136], r10
  25986. adc rax, r9
  25987. mov r9, QWORD PTR [r12+152]
  25988. mov r10, QWORD PTR [r13+152]
  25989. pext r9, r9, rdi
  25990. pext r10, r10, r15
  25991. mov QWORD PTR [rsi+144], rax
  25992. adc r9, r10
  25993. mov r10, QWORD PTR [r12+160]
  25994. mov rax, QWORD PTR [r13+160]
  25995. pext r10, r10, rdi
  25996. pext rax, rax, r15
  25997. mov QWORD PTR [rsi+152], r9
  25998. adc r10, rax
  25999. mov rax, QWORD PTR [r12+168]
  26000. mov r9, QWORD PTR [r13+168]
  26001. pext rax, rax, rdi
  26002. pext r9, r9, r15
  26003. mov QWORD PTR [rsi+160], r10
  26004. adc rax, r9
  26005. mov r9, QWORD PTR [r12+176]
  26006. mov r10, QWORD PTR [r13+176]
  26007. pext r9, r9, rdi
  26008. pext r10, r10, r15
  26009. mov QWORD PTR [rsi+168], rax
  26010. adc r9, r10
  26011. mov r10, QWORD PTR [r12+184]
  26012. mov rax, QWORD PTR [r13+184]
  26013. pext r10, r10, rdi
  26014. pext rax, rax, r15
  26015. mov QWORD PTR [rsi+176], r9
  26016. adc r10, rax
  26017. mov QWORD PTR [rsi+184], r10
  26018. adc r11, 0
  26019. lea r13, QWORD PTR [rsp+384]
  26020. mov r12, rsp
  26021. mov rax, QWORD PTR [r12]
  26022. sub rax, QWORD PTR [r13]
  26023. mov r9, QWORD PTR [r12+8]
  26024. mov QWORD PTR [r12], rax
  26025. sbb r9, QWORD PTR [r13+8]
  26026. mov r10, QWORD PTR [r12+16]
  26027. mov QWORD PTR [r12+8], r9
  26028. sbb r10, QWORD PTR [r13+16]
  26029. mov rax, QWORD PTR [r12+24]
  26030. mov QWORD PTR [r12+16], r10
  26031. sbb rax, QWORD PTR [r13+24]
  26032. mov r9, QWORD PTR [r12+32]
  26033. mov QWORD PTR [r12+24], rax
  26034. sbb r9, QWORD PTR [r13+32]
  26035. mov r10, QWORD PTR [r12+40]
  26036. mov QWORD PTR [r12+32], r9
  26037. sbb r10, QWORD PTR [r13+40]
  26038. mov rax, QWORD PTR [r12+48]
  26039. mov QWORD PTR [r12+40], r10
  26040. sbb rax, QWORD PTR [r13+48]
  26041. mov r9, QWORD PTR [r12+56]
  26042. mov QWORD PTR [r12+48], rax
  26043. sbb r9, QWORD PTR [r13+56]
  26044. mov r10, QWORD PTR [r12+64]
  26045. mov QWORD PTR [r12+56], r9
  26046. sbb r10, QWORD PTR [r13+64]
  26047. mov rax, QWORD PTR [r12+72]
  26048. mov QWORD PTR [r12+64], r10
  26049. sbb rax, QWORD PTR [r13+72]
  26050. mov r9, QWORD PTR [r12+80]
  26051. mov QWORD PTR [r12+72], rax
  26052. sbb r9, QWORD PTR [r13+80]
  26053. mov r10, QWORD PTR [r12+88]
  26054. mov QWORD PTR [r12+80], r9
  26055. sbb r10, QWORD PTR [r13+88]
  26056. mov rax, QWORD PTR [r12+96]
  26057. mov QWORD PTR [r12+88], r10
  26058. sbb rax, QWORD PTR [r13+96]
  26059. mov r9, QWORD PTR [r12+104]
  26060. mov QWORD PTR [r12+96], rax
  26061. sbb r9, QWORD PTR [r13+104]
  26062. mov r10, QWORD PTR [r12+112]
  26063. mov QWORD PTR [r12+104], r9
  26064. sbb r10, QWORD PTR [r13+112]
  26065. mov rax, QWORD PTR [r12+120]
  26066. mov QWORD PTR [r12+112], r10
  26067. sbb rax, QWORD PTR [r13+120]
  26068. mov r9, QWORD PTR [r12+128]
  26069. mov QWORD PTR [r12+120], rax
  26070. sbb r9, QWORD PTR [r13+128]
  26071. mov r10, QWORD PTR [r12+136]
  26072. mov QWORD PTR [r12+128], r9
  26073. sbb r10, QWORD PTR [r13+136]
  26074. mov rax, QWORD PTR [r12+144]
  26075. mov QWORD PTR [r12+136], r10
  26076. sbb rax, QWORD PTR [r13+144]
  26077. mov r9, QWORD PTR [r12+152]
  26078. mov QWORD PTR [r12+144], rax
  26079. sbb r9, QWORD PTR [r13+152]
  26080. mov r10, QWORD PTR [r12+160]
  26081. mov QWORD PTR [r12+152], r9
  26082. sbb r10, QWORD PTR [r13+160]
  26083. mov rax, QWORD PTR [r12+168]
  26084. mov QWORD PTR [r12+160], r10
  26085. sbb rax, QWORD PTR [r13+168]
  26086. mov r9, QWORD PTR [r12+176]
  26087. mov QWORD PTR [r12+168], rax
  26088. sbb r9, QWORD PTR [r13+176]
  26089. mov r10, QWORD PTR [r12+184]
  26090. mov QWORD PTR [r12+176], r9
  26091. sbb r10, QWORD PTR [r13+184]
  26092. mov rax, QWORD PTR [r12+192]
  26093. mov QWORD PTR [r12+184], r10
  26094. sbb rax, QWORD PTR [r13+192]
  26095. mov r9, QWORD PTR [r12+200]
  26096. mov QWORD PTR [r12+192], rax
  26097. sbb r9, QWORD PTR [r13+200]
  26098. mov r10, QWORD PTR [r12+208]
  26099. mov QWORD PTR [r12+200], r9
  26100. sbb r10, QWORD PTR [r13+208]
  26101. mov rax, QWORD PTR [r12+216]
  26102. mov QWORD PTR [r12+208], r10
  26103. sbb rax, QWORD PTR [r13+216]
  26104. mov r9, QWORD PTR [r12+224]
  26105. mov QWORD PTR [r12+216], rax
  26106. sbb r9, QWORD PTR [r13+224]
  26107. mov r10, QWORD PTR [r12+232]
  26108. mov QWORD PTR [r12+224], r9
  26109. sbb r10, QWORD PTR [r13+232]
  26110. mov rax, QWORD PTR [r12+240]
  26111. mov QWORD PTR [r12+232], r10
  26112. sbb rax, QWORD PTR [r13+240]
  26113. mov r9, QWORD PTR [r12+248]
  26114. mov QWORD PTR [r12+240], rax
  26115. sbb r9, QWORD PTR [r13+248]
  26116. mov r10, QWORD PTR [r12+256]
  26117. mov QWORD PTR [r12+248], r9
  26118. sbb r10, QWORD PTR [r13+256]
  26119. mov rax, QWORD PTR [r12+264]
  26120. mov QWORD PTR [r12+256], r10
  26121. sbb rax, QWORD PTR [r13+264]
  26122. mov r9, QWORD PTR [r12+272]
  26123. mov QWORD PTR [r12+264], rax
  26124. sbb r9, QWORD PTR [r13+272]
  26125. mov r10, QWORD PTR [r12+280]
  26126. mov QWORD PTR [r12+272], r9
  26127. sbb r10, QWORD PTR [r13+280]
  26128. mov rax, QWORD PTR [r12+288]
  26129. mov QWORD PTR [r12+280], r10
  26130. sbb rax, QWORD PTR [r13+288]
  26131. mov r9, QWORD PTR [r12+296]
  26132. mov QWORD PTR [r12+288], rax
  26133. sbb r9, QWORD PTR [r13+296]
  26134. mov r10, QWORD PTR [r12+304]
  26135. mov QWORD PTR [r12+296], r9
  26136. sbb r10, QWORD PTR [r13+304]
  26137. mov rax, QWORD PTR [r12+312]
  26138. mov QWORD PTR [r12+304], r10
  26139. sbb rax, QWORD PTR [r13+312]
  26140. mov r9, QWORD PTR [r12+320]
  26141. mov QWORD PTR [r12+312], rax
  26142. sbb r9, QWORD PTR [r13+320]
  26143. mov r10, QWORD PTR [r12+328]
  26144. mov QWORD PTR [r12+320], r9
  26145. sbb r10, QWORD PTR [r13+328]
  26146. mov rax, QWORD PTR [r12+336]
  26147. mov QWORD PTR [r12+328], r10
  26148. sbb rax, QWORD PTR [r13+336]
  26149. mov r9, QWORD PTR [r12+344]
  26150. mov QWORD PTR [r12+336], rax
  26151. sbb r9, QWORD PTR [r13+344]
  26152. mov r10, QWORD PTR [r12+352]
  26153. mov QWORD PTR [r12+344], r9
  26154. sbb r10, QWORD PTR [r13+352]
  26155. mov rax, QWORD PTR [r12+360]
  26156. mov QWORD PTR [r12+352], r10
  26157. sbb rax, QWORD PTR [r13+360]
  26158. mov r9, QWORD PTR [r12+368]
  26159. mov QWORD PTR [r12+360], rax
  26160. sbb r9, QWORD PTR [r13+368]
  26161. mov r10, QWORD PTR [r12+376]
  26162. mov QWORD PTR [r12+368], r9
  26163. sbb r10, QWORD PTR [r13+376]
  26164. mov QWORD PTR [r12+376], r10
  26165. sbb r11, 0
  26166. mov rax, QWORD PTR [r12]
  26167. sub rax, QWORD PTR [rcx]
  26168. mov r9, QWORD PTR [r12+8]
  26169. mov QWORD PTR [r12], rax
  26170. sbb r9, QWORD PTR [rcx+8]
  26171. mov r10, QWORD PTR [r12+16]
  26172. mov QWORD PTR [r12+8], r9
  26173. sbb r10, QWORD PTR [rcx+16]
  26174. mov rax, QWORD PTR [r12+24]
  26175. mov QWORD PTR [r12+16], r10
  26176. sbb rax, QWORD PTR [rcx+24]
  26177. mov r9, QWORD PTR [r12+32]
  26178. mov QWORD PTR [r12+24], rax
  26179. sbb r9, QWORD PTR [rcx+32]
  26180. mov r10, QWORD PTR [r12+40]
  26181. mov QWORD PTR [r12+32], r9
  26182. sbb r10, QWORD PTR [rcx+40]
  26183. mov rax, QWORD PTR [r12+48]
  26184. mov QWORD PTR [r12+40], r10
  26185. sbb rax, QWORD PTR [rcx+48]
  26186. mov r9, QWORD PTR [r12+56]
  26187. mov QWORD PTR [r12+48], rax
  26188. sbb r9, QWORD PTR [rcx+56]
  26189. mov r10, QWORD PTR [r12+64]
  26190. mov QWORD PTR [r12+56], r9
  26191. sbb r10, QWORD PTR [rcx+64]
  26192. mov rax, QWORD PTR [r12+72]
  26193. mov QWORD PTR [r12+64], r10
  26194. sbb rax, QWORD PTR [rcx+72]
  26195. mov r9, QWORD PTR [r12+80]
  26196. mov QWORD PTR [r12+72], rax
  26197. sbb r9, QWORD PTR [rcx+80]
  26198. mov r10, QWORD PTR [r12+88]
  26199. mov QWORD PTR [r12+80], r9
  26200. sbb r10, QWORD PTR [rcx+88]
  26201. mov rax, QWORD PTR [r12+96]
  26202. mov QWORD PTR [r12+88], r10
  26203. sbb rax, QWORD PTR [rcx+96]
  26204. mov r9, QWORD PTR [r12+104]
  26205. mov QWORD PTR [r12+96], rax
  26206. sbb r9, QWORD PTR [rcx+104]
  26207. mov r10, QWORD PTR [r12+112]
  26208. mov QWORD PTR [r12+104], r9
  26209. sbb r10, QWORD PTR [rcx+112]
  26210. mov rax, QWORD PTR [r12+120]
  26211. mov QWORD PTR [r12+112], r10
  26212. sbb rax, QWORD PTR [rcx+120]
  26213. mov r9, QWORD PTR [r12+128]
  26214. mov QWORD PTR [r12+120], rax
  26215. sbb r9, QWORD PTR [rcx+128]
  26216. mov r10, QWORD PTR [r12+136]
  26217. mov QWORD PTR [r12+128], r9
  26218. sbb r10, QWORD PTR [rcx+136]
  26219. mov rax, QWORD PTR [r12+144]
  26220. mov QWORD PTR [r12+136], r10
  26221. sbb rax, QWORD PTR [rcx+144]
  26222. mov r9, QWORD PTR [r12+152]
  26223. mov QWORD PTR [r12+144], rax
  26224. sbb r9, QWORD PTR [rcx+152]
  26225. mov r10, QWORD PTR [r12+160]
  26226. mov QWORD PTR [r12+152], r9
  26227. sbb r10, QWORD PTR [rcx+160]
  26228. mov rax, QWORD PTR [r12+168]
  26229. mov QWORD PTR [r12+160], r10
  26230. sbb rax, QWORD PTR [rcx+168]
  26231. mov r9, QWORD PTR [r12+176]
  26232. mov QWORD PTR [r12+168], rax
  26233. sbb r9, QWORD PTR [rcx+176]
  26234. mov r10, QWORD PTR [r12+184]
  26235. mov QWORD PTR [r12+176], r9
  26236. sbb r10, QWORD PTR [rcx+184]
  26237. mov rax, QWORD PTR [r12+192]
  26238. mov QWORD PTR [r12+184], r10
  26239. sbb rax, QWORD PTR [rcx+192]
  26240. mov r9, QWORD PTR [r12+200]
  26241. mov QWORD PTR [r12+192], rax
  26242. sbb r9, QWORD PTR [rcx+200]
  26243. mov r10, QWORD PTR [r12+208]
  26244. mov QWORD PTR [r12+200], r9
  26245. sbb r10, QWORD PTR [rcx+208]
  26246. mov rax, QWORD PTR [r12+216]
  26247. mov QWORD PTR [r12+208], r10
  26248. sbb rax, QWORD PTR [rcx+216]
  26249. mov r9, QWORD PTR [r12+224]
  26250. mov QWORD PTR [r12+216], rax
  26251. sbb r9, QWORD PTR [rcx+224]
  26252. mov r10, QWORD PTR [r12+232]
  26253. mov QWORD PTR [r12+224], r9
  26254. sbb r10, QWORD PTR [rcx+232]
  26255. mov rax, QWORD PTR [r12+240]
  26256. mov QWORD PTR [r12+232], r10
  26257. sbb rax, QWORD PTR [rcx+240]
  26258. mov r9, QWORD PTR [r12+248]
  26259. mov QWORD PTR [r12+240], rax
  26260. sbb r9, QWORD PTR [rcx+248]
  26261. mov r10, QWORD PTR [r12+256]
  26262. mov QWORD PTR [r12+248], r9
  26263. sbb r10, QWORD PTR [rcx+256]
  26264. mov rax, QWORD PTR [r12+264]
  26265. mov QWORD PTR [r12+256], r10
  26266. sbb rax, QWORD PTR [rcx+264]
  26267. mov r9, QWORD PTR [r12+272]
  26268. mov QWORD PTR [r12+264], rax
  26269. sbb r9, QWORD PTR [rcx+272]
  26270. mov r10, QWORD PTR [r12+280]
  26271. mov QWORD PTR [r12+272], r9
  26272. sbb r10, QWORD PTR [rcx+280]
  26273. mov rax, QWORD PTR [r12+288]
  26274. mov QWORD PTR [r12+280], r10
  26275. sbb rax, QWORD PTR [rcx+288]
  26276. mov r9, QWORD PTR [r12+296]
  26277. mov QWORD PTR [r12+288], rax
  26278. sbb r9, QWORD PTR [rcx+296]
  26279. mov r10, QWORD PTR [r12+304]
  26280. mov QWORD PTR [r12+296], r9
  26281. sbb r10, QWORD PTR [rcx+304]
  26282. mov rax, QWORD PTR [r12+312]
  26283. mov QWORD PTR [r12+304], r10
  26284. sbb rax, QWORD PTR [rcx+312]
  26285. mov r9, QWORD PTR [r12+320]
  26286. mov QWORD PTR [r12+312], rax
  26287. sbb r9, QWORD PTR [rcx+320]
  26288. mov r10, QWORD PTR [r12+328]
  26289. mov QWORD PTR [r12+320], r9
  26290. sbb r10, QWORD PTR [rcx+328]
  26291. mov rax, QWORD PTR [r12+336]
  26292. mov QWORD PTR [r12+328], r10
  26293. sbb rax, QWORD PTR [rcx+336]
  26294. mov r9, QWORD PTR [r12+344]
  26295. mov QWORD PTR [r12+336], rax
  26296. sbb r9, QWORD PTR [rcx+344]
  26297. mov r10, QWORD PTR [r12+352]
  26298. mov QWORD PTR [r12+344], r9
  26299. sbb r10, QWORD PTR [rcx+352]
  26300. mov rax, QWORD PTR [r12+360]
  26301. mov QWORD PTR [r12+352], r10
  26302. sbb rax, QWORD PTR [rcx+360]
  26303. mov r9, QWORD PTR [r12+368]
  26304. mov QWORD PTR [r12+360], rax
  26305. sbb r9, QWORD PTR [rcx+368]
  26306. mov r10, QWORD PTR [r12+376]
  26307. mov QWORD PTR [r12+368], r9
  26308. sbb r10, QWORD PTR [rcx+376]
  26309. mov QWORD PTR [r12+376], r10
  26310. sbb r11, 0
  26311. sub rsi, 192
  26312. ; Add
  26313. mov rax, QWORD PTR [rsi]
  26314. add rax, QWORD PTR [r12]
  26315. mov r9, QWORD PTR [rsi+8]
  26316. mov QWORD PTR [rsi], rax
  26317. adc r9, QWORD PTR [r12+8]
  26318. mov r10, QWORD PTR [rsi+16]
  26319. mov QWORD PTR [rsi+8], r9
  26320. adc r10, QWORD PTR [r12+16]
  26321. mov rax, QWORD PTR [rsi+24]
  26322. mov QWORD PTR [rsi+16], r10
  26323. adc rax, QWORD PTR [r12+24]
  26324. mov r9, QWORD PTR [rsi+32]
  26325. mov QWORD PTR [rsi+24], rax
  26326. adc r9, QWORD PTR [r12+32]
  26327. mov r10, QWORD PTR [rsi+40]
  26328. mov QWORD PTR [rsi+32], r9
  26329. adc r10, QWORD PTR [r12+40]
  26330. mov rax, QWORD PTR [rsi+48]
  26331. mov QWORD PTR [rsi+40], r10
  26332. adc rax, QWORD PTR [r12+48]
  26333. mov r9, QWORD PTR [rsi+56]
  26334. mov QWORD PTR [rsi+48], rax
  26335. adc r9, QWORD PTR [r12+56]
  26336. mov r10, QWORD PTR [rsi+64]
  26337. mov QWORD PTR [rsi+56], r9
  26338. adc r10, QWORD PTR [r12+64]
  26339. mov rax, QWORD PTR [rsi+72]
  26340. mov QWORD PTR [rsi+64], r10
  26341. adc rax, QWORD PTR [r12+72]
  26342. mov r9, QWORD PTR [rsi+80]
  26343. mov QWORD PTR [rsi+72], rax
  26344. adc r9, QWORD PTR [r12+80]
  26345. mov r10, QWORD PTR [rsi+88]
  26346. mov QWORD PTR [rsi+80], r9
  26347. adc r10, QWORD PTR [r12+88]
  26348. mov rax, QWORD PTR [rsi+96]
  26349. mov QWORD PTR [rsi+88], r10
  26350. adc rax, QWORD PTR [r12+96]
  26351. mov r9, QWORD PTR [rsi+104]
  26352. mov QWORD PTR [rsi+96], rax
  26353. adc r9, QWORD PTR [r12+104]
  26354. mov r10, QWORD PTR [rsi+112]
  26355. mov QWORD PTR [rsi+104], r9
  26356. adc r10, QWORD PTR [r12+112]
  26357. mov rax, QWORD PTR [rsi+120]
  26358. mov QWORD PTR [rsi+112], r10
  26359. adc rax, QWORD PTR [r12+120]
  26360. mov r9, QWORD PTR [rsi+128]
  26361. mov QWORD PTR [rsi+120], rax
  26362. adc r9, QWORD PTR [r12+128]
  26363. mov r10, QWORD PTR [rsi+136]
  26364. mov QWORD PTR [rsi+128], r9
  26365. adc r10, QWORD PTR [r12+136]
  26366. mov rax, QWORD PTR [rsi+144]
  26367. mov QWORD PTR [rsi+136], r10
  26368. adc rax, QWORD PTR [r12+144]
  26369. mov r9, QWORD PTR [rsi+152]
  26370. mov QWORD PTR [rsi+144], rax
  26371. adc r9, QWORD PTR [r12+152]
  26372. mov r10, QWORD PTR [rsi+160]
  26373. mov QWORD PTR [rsi+152], r9
  26374. adc r10, QWORD PTR [r12+160]
  26375. mov rax, QWORD PTR [rsi+168]
  26376. mov QWORD PTR [rsi+160], r10
  26377. adc rax, QWORD PTR [r12+168]
  26378. mov r9, QWORD PTR [rsi+176]
  26379. mov QWORD PTR [rsi+168], rax
  26380. adc r9, QWORD PTR [r12+176]
  26381. mov r10, QWORD PTR [rsi+184]
  26382. mov QWORD PTR [rsi+176], r9
  26383. adc r10, QWORD PTR [r12+184]
  26384. mov rax, QWORD PTR [rsi+192]
  26385. mov QWORD PTR [rsi+184], r10
  26386. adc rax, QWORD PTR [r12+192]
  26387. mov r9, QWORD PTR [rsi+200]
  26388. mov QWORD PTR [rsi+192], rax
  26389. adc r9, QWORD PTR [r12+200]
  26390. mov r10, QWORD PTR [rsi+208]
  26391. mov QWORD PTR [rsi+200], r9
  26392. adc r10, QWORD PTR [r12+208]
  26393. mov rax, QWORD PTR [rsi+216]
  26394. mov QWORD PTR [rsi+208], r10
  26395. adc rax, QWORD PTR [r12+216]
  26396. mov r9, QWORD PTR [rsi+224]
  26397. mov QWORD PTR [rsi+216], rax
  26398. adc r9, QWORD PTR [r12+224]
  26399. mov r10, QWORD PTR [rsi+232]
  26400. mov QWORD PTR [rsi+224], r9
  26401. adc r10, QWORD PTR [r12+232]
  26402. mov rax, QWORD PTR [rsi+240]
  26403. mov QWORD PTR [rsi+232], r10
  26404. adc rax, QWORD PTR [r12+240]
  26405. mov r9, QWORD PTR [rsi+248]
  26406. mov QWORD PTR [rsi+240], rax
  26407. adc r9, QWORD PTR [r12+248]
  26408. mov r10, QWORD PTR [rsi+256]
  26409. mov QWORD PTR [rsi+248], r9
  26410. adc r10, QWORD PTR [r12+256]
  26411. mov rax, QWORD PTR [rsi+264]
  26412. mov QWORD PTR [rsi+256], r10
  26413. adc rax, QWORD PTR [r12+264]
  26414. mov r9, QWORD PTR [rsi+272]
  26415. mov QWORD PTR [rsi+264], rax
  26416. adc r9, QWORD PTR [r12+272]
  26417. mov r10, QWORD PTR [rsi+280]
  26418. mov QWORD PTR [rsi+272], r9
  26419. adc r10, QWORD PTR [r12+280]
  26420. mov rax, QWORD PTR [rsi+288]
  26421. mov QWORD PTR [rsi+280], r10
  26422. adc rax, QWORD PTR [r12+288]
  26423. mov r9, QWORD PTR [rsi+296]
  26424. mov QWORD PTR [rsi+288], rax
  26425. adc r9, QWORD PTR [r12+296]
  26426. mov r10, QWORD PTR [rsi+304]
  26427. mov QWORD PTR [rsi+296], r9
  26428. adc r10, QWORD PTR [r12+304]
  26429. mov rax, QWORD PTR [rsi+312]
  26430. mov QWORD PTR [rsi+304], r10
  26431. adc rax, QWORD PTR [r12+312]
  26432. mov r9, QWORD PTR [rsi+320]
  26433. mov QWORD PTR [rsi+312], rax
  26434. adc r9, QWORD PTR [r12+320]
  26435. mov r10, QWORD PTR [rsi+328]
  26436. mov QWORD PTR [rsi+320], r9
  26437. adc r10, QWORD PTR [r12+328]
  26438. mov rax, QWORD PTR [rsi+336]
  26439. mov QWORD PTR [rsi+328], r10
  26440. adc rax, QWORD PTR [r12+336]
  26441. mov r9, QWORD PTR [rsi+344]
  26442. mov QWORD PTR [rsi+336], rax
  26443. adc r9, QWORD PTR [r12+344]
  26444. mov r10, QWORD PTR [rsi+352]
  26445. mov QWORD PTR [rsi+344], r9
  26446. adc r10, QWORD PTR [r12+352]
  26447. mov rax, QWORD PTR [rsi+360]
  26448. mov QWORD PTR [rsi+352], r10
  26449. adc rax, QWORD PTR [r12+360]
  26450. mov r9, QWORD PTR [rsi+368]
  26451. mov QWORD PTR [rsi+360], rax
  26452. adc r9, QWORD PTR [r12+368]
  26453. mov r10, QWORD PTR [rsi+376]
  26454. mov QWORD PTR [rsi+368], r9
  26455. adc r10, QWORD PTR [r12+376]
  26456. mov QWORD PTR [rsi+376], r10
  26457. adc r11, 0
  26458. mov QWORD PTR [rcx+576], r11
  26459. add rsi, 192
  26460. ; Add
  26461. mov rax, QWORD PTR [rsi]
  26462. add rax, QWORD PTR [r13]
  26463. mov r9, QWORD PTR [rsi+8]
  26464. mov QWORD PTR [rsi], rax
  26465. adc r9, QWORD PTR [r13+8]
  26466. mov r10, QWORD PTR [rsi+16]
  26467. mov QWORD PTR [rsi+8], r9
  26468. adc r10, QWORD PTR [r13+16]
  26469. mov rax, QWORD PTR [rsi+24]
  26470. mov QWORD PTR [rsi+16], r10
  26471. adc rax, QWORD PTR [r13+24]
  26472. mov r9, QWORD PTR [rsi+32]
  26473. mov QWORD PTR [rsi+24], rax
  26474. adc r9, QWORD PTR [r13+32]
  26475. mov r10, QWORD PTR [rsi+40]
  26476. mov QWORD PTR [rsi+32], r9
  26477. adc r10, QWORD PTR [r13+40]
  26478. mov rax, QWORD PTR [rsi+48]
  26479. mov QWORD PTR [rsi+40], r10
  26480. adc rax, QWORD PTR [r13+48]
  26481. mov r9, QWORD PTR [rsi+56]
  26482. mov QWORD PTR [rsi+48], rax
  26483. adc r9, QWORD PTR [r13+56]
  26484. mov r10, QWORD PTR [rsi+64]
  26485. mov QWORD PTR [rsi+56], r9
  26486. adc r10, QWORD PTR [r13+64]
  26487. mov rax, QWORD PTR [rsi+72]
  26488. mov QWORD PTR [rsi+64], r10
  26489. adc rax, QWORD PTR [r13+72]
  26490. mov r9, QWORD PTR [rsi+80]
  26491. mov QWORD PTR [rsi+72], rax
  26492. adc r9, QWORD PTR [r13+80]
  26493. mov r10, QWORD PTR [rsi+88]
  26494. mov QWORD PTR [rsi+80], r9
  26495. adc r10, QWORD PTR [r13+88]
  26496. mov rax, QWORD PTR [rsi+96]
  26497. mov QWORD PTR [rsi+88], r10
  26498. adc rax, QWORD PTR [r13+96]
  26499. mov r9, QWORD PTR [rsi+104]
  26500. mov QWORD PTR [rsi+96], rax
  26501. adc r9, QWORD PTR [r13+104]
  26502. mov r10, QWORD PTR [rsi+112]
  26503. mov QWORD PTR [rsi+104], r9
  26504. adc r10, QWORD PTR [r13+112]
  26505. mov rax, QWORD PTR [rsi+120]
  26506. mov QWORD PTR [rsi+112], r10
  26507. adc rax, QWORD PTR [r13+120]
  26508. mov r9, QWORD PTR [rsi+128]
  26509. mov QWORD PTR [rsi+120], rax
  26510. adc r9, QWORD PTR [r13+128]
  26511. mov r10, QWORD PTR [rsi+136]
  26512. mov QWORD PTR [rsi+128], r9
  26513. adc r10, QWORD PTR [r13+136]
  26514. mov rax, QWORD PTR [rsi+144]
  26515. mov QWORD PTR [rsi+136], r10
  26516. adc rax, QWORD PTR [r13+144]
  26517. mov r9, QWORD PTR [rsi+152]
  26518. mov QWORD PTR [rsi+144], rax
  26519. adc r9, QWORD PTR [r13+152]
  26520. mov r10, QWORD PTR [rsi+160]
  26521. mov QWORD PTR [rsi+152], r9
  26522. adc r10, QWORD PTR [r13+160]
  26523. mov rax, QWORD PTR [rsi+168]
  26524. mov QWORD PTR [rsi+160], r10
  26525. adc rax, QWORD PTR [r13+168]
  26526. mov r9, QWORD PTR [rsi+176]
  26527. mov QWORD PTR [rsi+168], rax
  26528. adc r9, QWORD PTR [r13+176]
  26529. mov r10, QWORD PTR [rsi+184]
  26530. mov QWORD PTR [rsi+176], r9
  26531. adc r10, QWORD PTR [r13+184]
  26532. mov rax, QWORD PTR [rsi+192]
  26533. mov QWORD PTR [rsi+184], r10
  26534. adc rax, QWORD PTR [r13+192]
  26535. mov QWORD PTR [rsi+192], rax
  26536. ; Add to zero
  26537. mov rax, QWORD PTR [r13+200]
  26538. adc rax, 0
  26539. mov r9, QWORD PTR [r13+208]
  26540. mov QWORD PTR [rsi+200], rax
  26541. adc r9, 0
  26542. mov r10, QWORD PTR [r13+216]
  26543. mov QWORD PTR [rsi+208], r9
  26544. adc r10, 0
  26545. mov rax, QWORD PTR [r13+224]
  26546. mov QWORD PTR [rsi+216], r10
  26547. adc rax, 0
  26548. mov r9, QWORD PTR [r13+232]
  26549. mov QWORD PTR [rsi+224], rax
  26550. adc r9, 0
  26551. mov r10, QWORD PTR [r13+240]
  26552. mov QWORD PTR [rsi+232], r9
  26553. adc r10, 0
  26554. mov rax, QWORD PTR [r13+248]
  26555. mov QWORD PTR [rsi+240], r10
  26556. adc rax, 0
  26557. mov r9, QWORD PTR [r13+256]
  26558. mov QWORD PTR [rsi+248], rax
  26559. adc r9, 0
  26560. mov r10, QWORD PTR [r13+264]
  26561. mov QWORD PTR [rsi+256], r9
  26562. adc r10, 0
  26563. mov rax, QWORD PTR [r13+272]
  26564. mov QWORD PTR [rsi+264], r10
  26565. adc rax, 0
  26566. mov r9, QWORD PTR [r13+280]
  26567. mov QWORD PTR [rsi+272], rax
  26568. adc r9, 0
  26569. mov r10, QWORD PTR [r13+288]
  26570. mov QWORD PTR [rsi+280], r9
  26571. adc r10, 0
  26572. mov rax, QWORD PTR [r13+296]
  26573. mov QWORD PTR [rsi+288], r10
  26574. adc rax, 0
  26575. mov r9, QWORD PTR [r13+304]
  26576. mov QWORD PTR [rsi+296], rax
  26577. adc r9, 0
  26578. mov r10, QWORD PTR [r13+312]
  26579. mov QWORD PTR [rsi+304], r9
  26580. adc r10, 0
  26581. mov rax, QWORD PTR [r13+320]
  26582. mov QWORD PTR [rsi+312], r10
  26583. adc rax, 0
  26584. mov r9, QWORD PTR [r13+328]
  26585. mov QWORD PTR [rsi+320], rax
  26586. adc r9, 0
  26587. mov r10, QWORD PTR [r13+336]
  26588. mov QWORD PTR [rsi+328], r9
  26589. adc r10, 0
  26590. mov rax, QWORD PTR [r13+344]
  26591. mov QWORD PTR [rsi+336], r10
  26592. adc rax, 0
  26593. mov r9, QWORD PTR [r13+352]
  26594. mov QWORD PTR [rsi+344], rax
  26595. adc r9, 0
  26596. mov r10, QWORD PTR [r13+360]
  26597. mov QWORD PTR [rsi+352], r9
  26598. adc r10, 0
  26599. mov rax, QWORD PTR [r13+368]
  26600. mov QWORD PTR [rsi+360], r10
  26601. adc rax, 0
  26602. mov r9, QWORD PTR [r13+376]
  26603. mov QWORD PTR [rsi+368], rax
  26604. adc r9, 0
  26605. mov QWORD PTR [rsi+376], r9
  26606. add rsp, 1192
  26607. pop rsi
  26608. pop rdi
  26609. pop r15
  26610. pop r14
  26611. pop r13
  26612. pop r12
  26613. ret
  26614. sp_3072_mul_avx2_48 ENDP
  26615. _text ENDS
  26616. ENDIF
  26617. ; /* Square a and put result in r. (r = a * a)
  26618. ; *
  26619. ; * r A single precision integer.
  26620. ; * a A single precision integer.
  26621. ; */
  26622. _text SEGMENT READONLY PARA
  26623. sp_3072_sqr_12 PROC
  26624. push r12
  26625. push r13
  26626. push r14
  26627. mov r8, rdx
  26628. sub rsp, 96
  26629. ; A[0] * A[0]
  26630. mov rax, QWORD PTR [r8]
  26631. mul rax
  26632. xor r11, r11
  26633. mov QWORD PTR [rsp], rax
  26634. mov r10, rdx
  26635. ; A[0] * A[1]
  26636. mov rax, QWORD PTR [r8+8]
  26637. mul QWORD PTR [r8]
  26638. xor r9, r9
  26639. add r10, rax
  26640. adc r11, rdx
  26641. adc r9, 0
  26642. add r10, rax
  26643. adc r11, rdx
  26644. adc r9, 0
  26645. mov QWORD PTR [rsp+8], r10
  26646. ; A[0] * A[2]
  26647. mov rax, QWORD PTR [r8+16]
  26648. mul QWORD PTR [r8]
  26649. xor r10, r10
  26650. add r11, rax
  26651. adc r9, rdx
  26652. adc r10, 0
  26653. add r11, rax
  26654. adc r9, rdx
  26655. adc r10, 0
  26656. ; A[1] * A[1]
  26657. mov rax, QWORD PTR [r8+8]
  26658. mul rax
  26659. add r11, rax
  26660. adc r9, rdx
  26661. adc r10, 0
  26662. mov QWORD PTR [rsp+16], r11
  26663. ; A[0] * A[3]
  26664. mov rax, QWORD PTR [r8+24]
  26665. mul QWORD PTR [r8]
  26666. xor r11, r11
  26667. add r9, rax
  26668. adc r10, rdx
  26669. adc r11, 0
  26670. add r9, rax
  26671. adc r10, rdx
  26672. adc r11, 0
  26673. ; A[1] * A[2]
  26674. mov rax, QWORD PTR [r8+16]
  26675. mul QWORD PTR [r8+8]
  26676. add r9, rax
  26677. adc r10, rdx
  26678. adc r11, 0
  26679. add r9, rax
  26680. adc r10, rdx
  26681. adc r11, 0
  26682. mov QWORD PTR [rsp+24], r9
  26683. ; A[0] * A[4]
  26684. mov rax, QWORD PTR [r8+32]
  26685. mul QWORD PTR [r8]
  26686. xor r9, r9
  26687. add r10, rax
  26688. adc r11, rdx
  26689. adc r9, 0
  26690. add r10, rax
  26691. adc r11, rdx
  26692. adc r9, 0
  26693. ; A[1] * A[3]
  26694. mov rax, QWORD PTR [r8+24]
  26695. mul QWORD PTR [r8+8]
  26696. add r10, rax
  26697. adc r11, rdx
  26698. adc r9, 0
  26699. add r10, rax
  26700. adc r11, rdx
  26701. adc r9, 0
  26702. ; A[2] * A[2]
  26703. mov rax, QWORD PTR [r8+16]
  26704. mul rax
  26705. add r10, rax
  26706. adc r11, rdx
  26707. adc r9, 0
  26708. mov QWORD PTR [rsp+32], r10
  26709. ; A[0] * A[5]
  26710. mov rax, QWORD PTR [r8+40]
  26711. mul QWORD PTR [r8]
  26712. xor r10, r10
  26713. xor r14, r14
  26714. mov r12, rax
  26715. mov r13, rdx
  26716. ; A[1] * A[4]
  26717. mov rax, QWORD PTR [r8+32]
  26718. mul QWORD PTR [r8+8]
  26719. add r12, rax
  26720. adc r13, rdx
  26721. adc r14, 0
  26722. ; A[2] * A[3]
  26723. mov rax, QWORD PTR [r8+24]
  26724. mul QWORD PTR [r8+16]
  26725. add r12, rax
  26726. adc r13, rdx
  26727. adc r14, 0
  26728. add r12, r12
  26729. adc r13, r13
  26730. adc r14, r14
  26731. add r11, r12
  26732. adc r9, r13
  26733. adc r10, r14
  26734. mov QWORD PTR [rsp+40], r11
  26735. ; A[0] * A[6]
  26736. mov rax, QWORD PTR [r8+48]
  26737. mul QWORD PTR [r8]
  26738. xor r11, r11
  26739. xor r14, r14
  26740. mov r12, rax
  26741. mov r13, rdx
  26742. ; A[1] * A[5]
  26743. mov rax, QWORD PTR [r8+40]
  26744. mul QWORD PTR [r8+8]
  26745. add r12, rax
  26746. adc r13, rdx
  26747. adc r14, 0
  26748. ; A[2] * A[4]
  26749. mov rax, QWORD PTR [r8+32]
  26750. mul QWORD PTR [r8+16]
  26751. add r12, rax
  26752. adc r13, rdx
  26753. adc r14, 0
  26754. ; A[3] * A[3]
  26755. mov rax, QWORD PTR [r8+24]
  26756. mul rax
  26757. add r12, r12
  26758. adc r13, r13
  26759. adc r14, r14
  26760. add r12, rax
  26761. adc r13, rdx
  26762. adc r14, 0
  26763. add r9, r12
  26764. adc r10, r13
  26765. adc r11, r14
  26766. mov QWORD PTR [rsp+48], r9
  26767. ; A[0] * A[7]
  26768. mov rax, QWORD PTR [r8+56]
  26769. mul QWORD PTR [r8]
  26770. xor r9, r9
  26771. xor r14, r14
  26772. mov r12, rax
  26773. mov r13, rdx
  26774. ; A[1] * A[6]
  26775. mov rax, QWORD PTR [r8+48]
  26776. mul QWORD PTR [r8+8]
  26777. add r12, rax
  26778. adc r13, rdx
  26779. adc r14, 0
  26780. ; A[2] * A[5]
  26781. mov rax, QWORD PTR [r8+40]
  26782. mul QWORD PTR [r8+16]
  26783. add r12, rax
  26784. adc r13, rdx
  26785. adc r14, 0
  26786. ; A[3] * A[4]
  26787. mov rax, QWORD PTR [r8+32]
  26788. mul QWORD PTR [r8+24]
  26789. add r12, rax
  26790. adc r13, rdx
  26791. adc r14, 0
  26792. add r12, r12
  26793. adc r13, r13
  26794. adc r14, r14
  26795. add r10, r12
  26796. adc r11, r13
  26797. adc r9, r14
  26798. mov QWORD PTR [rsp+56], r10
  26799. ; A[0] * A[8]
  26800. mov rax, QWORD PTR [r8+64]
  26801. mul QWORD PTR [r8]
  26802. xor r10, r10
  26803. xor r14, r14
  26804. mov r12, rax
  26805. mov r13, rdx
  26806. ; A[1] * A[7]
  26807. mov rax, QWORD PTR [r8+56]
  26808. mul QWORD PTR [r8+8]
  26809. add r12, rax
  26810. adc r13, rdx
  26811. adc r14, 0
  26812. ; A[2] * A[6]
  26813. mov rax, QWORD PTR [r8+48]
  26814. mul QWORD PTR [r8+16]
  26815. add r12, rax
  26816. adc r13, rdx
  26817. adc r14, 0
  26818. ; A[3] * A[5]
  26819. mov rax, QWORD PTR [r8+40]
  26820. mul QWORD PTR [r8+24]
  26821. add r12, rax
  26822. adc r13, rdx
  26823. adc r14, 0
  26824. ; A[4] * A[4]
  26825. mov rax, QWORD PTR [r8+32]
  26826. mul rax
  26827. add r12, r12
  26828. adc r13, r13
  26829. adc r14, r14
  26830. add r12, rax
  26831. adc r13, rdx
  26832. adc r14, 0
  26833. add r11, r12
  26834. adc r9, r13
  26835. adc r10, r14
  26836. mov QWORD PTR [rsp+64], r11
  26837. ; A[0] * A[9]
  26838. mov rax, QWORD PTR [r8+72]
  26839. mul QWORD PTR [r8]
  26840. xor r11, r11
  26841. xor r14, r14
  26842. mov r12, rax
  26843. mov r13, rdx
  26844. ; A[1] * A[8]
  26845. mov rax, QWORD PTR [r8+64]
  26846. mul QWORD PTR [r8+8]
  26847. add r12, rax
  26848. adc r13, rdx
  26849. adc r14, 0
  26850. ; A[2] * A[7]
  26851. mov rax, QWORD PTR [r8+56]
  26852. mul QWORD PTR [r8+16]
  26853. add r12, rax
  26854. adc r13, rdx
  26855. adc r14, 0
  26856. ; A[3] * A[6]
  26857. mov rax, QWORD PTR [r8+48]
  26858. mul QWORD PTR [r8+24]
  26859. add r12, rax
  26860. adc r13, rdx
  26861. adc r14, 0
  26862. ; A[4] * A[5]
  26863. mov rax, QWORD PTR [r8+40]
  26864. mul QWORD PTR [r8+32]
  26865. add r12, rax
  26866. adc r13, rdx
  26867. adc r14, 0
  26868. add r12, r12
  26869. adc r13, r13
  26870. adc r14, r14
  26871. add r9, r12
  26872. adc r10, r13
  26873. adc r11, r14
  26874. mov QWORD PTR [rsp+72], r9
  26875. ; A[0] * A[10]
  26876. mov rax, QWORD PTR [r8+80]
  26877. mul QWORD PTR [r8]
  26878. xor r9, r9
  26879. xor r14, r14
  26880. mov r12, rax
  26881. mov r13, rdx
  26882. ; A[1] * A[9]
  26883. mov rax, QWORD PTR [r8+72]
  26884. mul QWORD PTR [r8+8]
  26885. add r12, rax
  26886. adc r13, rdx
  26887. adc r14, 0
  26888. ; A[2] * A[8]
  26889. mov rax, QWORD PTR [r8+64]
  26890. mul QWORD PTR [r8+16]
  26891. add r12, rax
  26892. adc r13, rdx
  26893. adc r14, 0
  26894. ; A[3] * A[7]
  26895. mov rax, QWORD PTR [r8+56]
  26896. mul QWORD PTR [r8+24]
  26897. add r12, rax
  26898. adc r13, rdx
  26899. adc r14, 0
  26900. ; A[4] * A[6]
  26901. mov rax, QWORD PTR [r8+48]
  26902. mul QWORD PTR [r8+32]
  26903. add r12, rax
  26904. adc r13, rdx
  26905. adc r14, 0
  26906. ; A[5] * A[5]
  26907. mov rax, QWORD PTR [r8+40]
  26908. mul rax
  26909. add r12, r12
  26910. adc r13, r13
  26911. adc r14, r14
  26912. add r12, rax
  26913. adc r13, rdx
  26914. adc r14, 0
  26915. add r10, r12
  26916. adc r11, r13
  26917. adc r9, r14
  26918. mov QWORD PTR [rsp+80], r10
  26919. ; A[0] * A[11]
  26920. mov rax, QWORD PTR [r8+88]
  26921. mul QWORD PTR [r8]
  26922. xor r10, r10
  26923. xor r14, r14
  26924. mov r12, rax
  26925. mov r13, rdx
  26926. ; A[1] * A[10]
  26927. mov rax, QWORD PTR [r8+80]
  26928. mul QWORD PTR [r8+8]
  26929. add r12, rax
  26930. adc r13, rdx
  26931. adc r14, 0
  26932. ; A[2] * A[9]
  26933. mov rax, QWORD PTR [r8+72]
  26934. mul QWORD PTR [r8+16]
  26935. add r12, rax
  26936. adc r13, rdx
  26937. adc r14, 0
  26938. ; A[3] * A[8]
  26939. mov rax, QWORD PTR [r8+64]
  26940. mul QWORD PTR [r8+24]
  26941. add r12, rax
  26942. adc r13, rdx
  26943. adc r14, 0
  26944. ; A[4] * A[7]
  26945. mov rax, QWORD PTR [r8+56]
  26946. mul QWORD PTR [r8+32]
  26947. add r12, rax
  26948. adc r13, rdx
  26949. adc r14, 0
  26950. ; A[5] * A[6]
  26951. mov rax, QWORD PTR [r8+48]
  26952. mul QWORD PTR [r8+40]
  26953. add r12, rax
  26954. adc r13, rdx
  26955. adc r14, 0
  26956. add r12, r12
  26957. adc r13, r13
  26958. adc r14, r14
  26959. add r11, r12
  26960. adc r9, r13
  26961. adc r10, r14
  26962. mov QWORD PTR [rsp+88], r11
  26963. ; A[1] * A[11]
  26964. mov rax, QWORD PTR [r8+88]
  26965. mul QWORD PTR [r8+8]
  26966. xor r11, r11
  26967. xor r14, r14
  26968. mov r12, rax
  26969. mov r13, rdx
  26970. ; A[2] * A[10]
  26971. mov rax, QWORD PTR [r8+80]
  26972. mul QWORD PTR [r8+16]
  26973. add r12, rax
  26974. adc r13, rdx
  26975. adc r14, 0
  26976. ; A[3] * A[9]
  26977. mov rax, QWORD PTR [r8+72]
  26978. mul QWORD PTR [r8+24]
  26979. add r12, rax
  26980. adc r13, rdx
  26981. adc r14, 0
  26982. ; A[4] * A[8]
  26983. mov rax, QWORD PTR [r8+64]
  26984. mul QWORD PTR [r8+32]
  26985. add r12, rax
  26986. adc r13, rdx
  26987. adc r14, 0
  26988. ; A[5] * A[7]
  26989. mov rax, QWORD PTR [r8+56]
  26990. mul QWORD PTR [r8+40]
  26991. add r12, rax
  26992. adc r13, rdx
  26993. adc r14, 0
  26994. ; A[6] * A[6]
  26995. mov rax, QWORD PTR [r8+48]
  26996. mul rax
  26997. add r12, r12
  26998. adc r13, r13
  26999. adc r14, r14
  27000. add r12, rax
  27001. adc r13, rdx
  27002. adc r14, 0
  27003. add r9, r12
  27004. adc r10, r13
  27005. adc r11, r14
  27006. mov QWORD PTR [rcx+96], r9
  27007. ; A[2] * A[11]
  27008. mov rax, QWORD PTR [r8+88]
  27009. mul QWORD PTR [r8+16]
  27010. xor r9, r9
  27011. xor r14, r14
  27012. mov r12, rax
  27013. mov r13, rdx
  27014. ; A[3] * A[10]
  27015. mov rax, QWORD PTR [r8+80]
  27016. mul QWORD PTR [r8+24]
  27017. add r12, rax
  27018. adc r13, rdx
  27019. adc r14, 0
  27020. ; A[4] * A[9]
  27021. mov rax, QWORD PTR [r8+72]
  27022. mul QWORD PTR [r8+32]
  27023. add r12, rax
  27024. adc r13, rdx
  27025. adc r14, 0
  27026. ; A[5] * A[8]
  27027. mov rax, QWORD PTR [r8+64]
  27028. mul QWORD PTR [r8+40]
  27029. add r12, rax
  27030. adc r13, rdx
  27031. adc r14, 0
  27032. ; A[6] * A[7]
  27033. mov rax, QWORD PTR [r8+56]
  27034. mul QWORD PTR [r8+48]
  27035. add r12, rax
  27036. adc r13, rdx
  27037. adc r14, 0
  27038. add r12, r12
  27039. adc r13, r13
  27040. adc r14, r14
  27041. add r10, r12
  27042. adc r11, r13
  27043. adc r9, r14
  27044. mov QWORD PTR [rcx+104], r10
  27045. ; A[3] * A[11]
  27046. mov rax, QWORD PTR [r8+88]
  27047. mul QWORD PTR [r8+24]
  27048. xor r10, r10
  27049. xor r14, r14
  27050. mov r12, rax
  27051. mov r13, rdx
  27052. ; A[4] * A[10]
  27053. mov rax, QWORD PTR [r8+80]
  27054. mul QWORD PTR [r8+32]
  27055. add r12, rax
  27056. adc r13, rdx
  27057. adc r14, 0
  27058. ; A[5] * A[9]
  27059. mov rax, QWORD PTR [r8+72]
  27060. mul QWORD PTR [r8+40]
  27061. add r12, rax
  27062. adc r13, rdx
  27063. adc r14, 0
  27064. ; A[6] * A[8]
  27065. mov rax, QWORD PTR [r8+64]
  27066. mul QWORD PTR [r8+48]
  27067. add r12, rax
  27068. adc r13, rdx
  27069. adc r14, 0
  27070. ; A[7] * A[7]
  27071. mov rax, QWORD PTR [r8+56]
  27072. mul rax
  27073. add r12, r12
  27074. adc r13, r13
  27075. adc r14, r14
  27076. add r12, rax
  27077. adc r13, rdx
  27078. adc r14, 0
  27079. add r11, r12
  27080. adc r9, r13
  27081. adc r10, r14
  27082. mov QWORD PTR [rcx+112], r11
  27083. ; A[4] * A[11]
  27084. mov rax, QWORD PTR [r8+88]
  27085. mul QWORD PTR [r8+32]
  27086. xor r11, r11
  27087. xor r14, r14
  27088. mov r12, rax
  27089. mov r13, rdx
  27090. ; A[5] * A[10]
  27091. mov rax, QWORD PTR [r8+80]
  27092. mul QWORD PTR [r8+40]
  27093. add r12, rax
  27094. adc r13, rdx
  27095. adc r14, 0
  27096. ; A[6] * A[9]
  27097. mov rax, QWORD PTR [r8+72]
  27098. mul QWORD PTR [r8+48]
  27099. add r12, rax
  27100. adc r13, rdx
  27101. adc r14, 0
  27102. ; A[7] * A[8]
  27103. mov rax, QWORD PTR [r8+64]
  27104. mul QWORD PTR [r8+56]
  27105. add r12, rax
  27106. adc r13, rdx
  27107. adc r14, 0
  27108. add r12, r12
  27109. adc r13, r13
  27110. adc r14, r14
  27111. add r9, r12
  27112. adc r10, r13
  27113. adc r11, r14
  27114. mov QWORD PTR [rcx+120], r9
  27115. ; A[5] * A[11]
  27116. mov rax, QWORD PTR [r8+88]
  27117. mul QWORD PTR [r8+40]
  27118. xor r9, r9
  27119. xor r14, r14
  27120. mov r12, rax
  27121. mov r13, rdx
  27122. ; A[6] * A[10]
  27123. mov rax, QWORD PTR [r8+80]
  27124. mul QWORD PTR [r8+48]
  27125. add r12, rax
  27126. adc r13, rdx
  27127. adc r14, 0
  27128. ; A[7] * A[9]
  27129. mov rax, QWORD PTR [r8+72]
  27130. mul QWORD PTR [r8+56]
  27131. add r12, rax
  27132. adc r13, rdx
  27133. adc r14, 0
  27134. ; A[8] * A[8]
  27135. mov rax, QWORD PTR [r8+64]
  27136. mul rax
  27137. add r12, r12
  27138. adc r13, r13
  27139. adc r14, r14
  27140. add r12, rax
  27141. adc r13, rdx
  27142. adc r14, 0
  27143. add r10, r12
  27144. adc r11, r13
  27145. adc r9, r14
  27146. mov QWORD PTR [rcx+128], r10
  27147. ; A[6] * A[11]
  27148. mov rax, QWORD PTR [r8+88]
  27149. mul QWORD PTR [r8+48]
  27150. xor r10, r10
  27151. xor r14, r14
  27152. mov r12, rax
  27153. mov r13, rdx
  27154. ; A[7] * A[10]
  27155. mov rax, QWORD PTR [r8+80]
  27156. mul QWORD PTR [r8+56]
  27157. add r12, rax
  27158. adc r13, rdx
  27159. adc r14, 0
  27160. ; A[8] * A[9]
  27161. mov rax, QWORD PTR [r8+72]
  27162. mul QWORD PTR [r8+64]
  27163. add r12, rax
  27164. adc r13, rdx
  27165. adc r14, 0
  27166. add r12, r12
  27167. adc r13, r13
  27168. adc r14, r14
  27169. add r11, r12
  27170. adc r9, r13
  27171. adc r10, r14
  27172. mov QWORD PTR [rcx+136], r11
  27173. ; A[7] * A[11]
  27174. mov rax, QWORD PTR [r8+88]
  27175. mul QWORD PTR [r8+56]
  27176. xor r11, r11
  27177. add r9, rax
  27178. adc r10, rdx
  27179. adc r11, 0
  27180. add r9, rax
  27181. adc r10, rdx
  27182. adc r11, 0
  27183. ; A[8] * A[10]
  27184. mov rax, QWORD PTR [r8+80]
  27185. mul QWORD PTR [r8+64]
  27186. add r9, rax
  27187. adc r10, rdx
  27188. adc r11, 0
  27189. add r9, rax
  27190. adc r10, rdx
  27191. adc r11, 0
  27192. ; A[9] * A[9]
  27193. mov rax, QWORD PTR [r8+72]
  27194. mul rax
  27195. add r9, rax
  27196. adc r10, rdx
  27197. adc r11, 0
  27198. mov QWORD PTR [rcx+144], r9
  27199. ; A[8] * A[11]
  27200. mov rax, QWORD PTR [r8+88]
  27201. mul QWORD PTR [r8+64]
  27202. xor r9, r9
  27203. add r10, rax
  27204. adc r11, rdx
  27205. adc r9, 0
  27206. add r10, rax
  27207. adc r11, rdx
  27208. adc r9, 0
  27209. ; A[9] * A[10]
  27210. mov rax, QWORD PTR [r8+80]
  27211. mul QWORD PTR [r8+72]
  27212. add r10, rax
  27213. adc r11, rdx
  27214. adc r9, 0
  27215. add r10, rax
  27216. adc r11, rdx
  27217. adc r9, 0
  27218. mov QWORD PTR [rcx+152], r10
  27219. ; A[9] * A[11]
  27220. mov rax, QWORD PTR [r8+88]
  27221. mul QWORD PTR [r8+72]
  27222. xor r10, r10
  27223. add r11, rax
  27224. adc r9, rdx
  27225. adc r10, 0
  27226. add r11, rax
  27227. adc r9, rdx
  27228. adc r10, 0
  27229. ; A[10] * A[10]
  27230. mov rax, QWORD PTR [r8+80]
  27231. mul rax
  27232. add r11, rax
  27233. adc r9, rdx
  27234. adc r10, 0
  27235. mov QWORD PTR [rcx+160], r11
  27236. ; A[10] * A[11]
  27237. mov rax, QWORD PTR [r8+88]
  27238. mul QWORD PTR [r8+80]
  27239. xor r11, r11
  27240. add r9, rax
  27241. adc r10, rdx
  27242. adc r11, 0
  27243. add r9, rax
  27244. adc r10, rdx
  27245. adc r11, 0
  27246. mov QWORD PTR [rcx+168], r9
  27247. ; A[11] * A[11]
  27248. mov rax, QWORD PTR [r8+88]
  27249. mul rax
  27250. add r10, rax
  27251. adc r11, rdx
  27252. mov QWORD PTR [rcx+176], r10
  27253. mov QWORD PTR [rcx+184], r11
  27254. mov rax, QWORD PTR [rsp]
  27255. mov rdx, QWORD PTR [rsp+8]
  27256. mov r12, QWORD PTR [rsp+16]
  27257. mov r13, QWORD PTR [rsp+24]
  27258. mov QWORD PTR [rcx], rax
  27259. mov QWORD PTR [rcx+8], rdx
  27260. mov QWORD PTR [rcx+16], r12
  27261. mov QWORD PTR [rcx+24], r13
  27262. mov rax, QWORD PTR [rsp+32]
  27263. mov rdx, QWORD PTR [rsp+40]
  27264. mov r12, QWORD PTR [rsp+48]
  27265. mov r13, QWORD PTR [rsp+56]
  27266. mov QWORD PTR [rcx+32], rax
  27267. mov QWORD PTR [rcx+40], rdx
  27268. mov QWORD PTR [rcx+48], r12
  27269. mov QWORD PTR [rcx+56], r13
  27270. mov rax, QWORD PTR [rsp+64]
  27271. mov rdx, QWORD PTR [rsp+72]
  27272. mov r12, QWORD PTR [rsp+80]
  27273. mov r13, QWORD PTR [rsp+88]
  27274. mov QWORD PTR [rcx+64], rax
  27275. mov QWORD PTR [rcx+72], rdx
  27276. mov QWORD PTR [rcx+80], r12
  27277. mov QWORD PTR [rcx+88], r13
  27278. add rsp, 96
  27279. pop r14
  27280. pop r13
  27281. pop r12
  27282. ret
  27283. sp_3072_sqr_12 ENDP
  27284. _text ENDS
  27285. IFDEF HAVE_INTEL_AVX2
  27286. ; /* Square a and put result in r. (r = a * a)
  27287. ; *
  27288. ; * r A single precision integer.
  27289. ; * a A single precision integer.
  27290. ; */
  27291. _text SEGMENT READONLY PARA
  27292. sp_3072_sqr_avx2_12 PROC
  27293. push rbp
  27294. push r12
  27295. push r13
  27296. push r14
  27297. push r15
  27298. push rdi
  27299. push rsi
  27300. push rbx
  27301. mov r8, rcx
  27302. mov r9, rdx
  27303. sub rsp, 96
  27304. cmp r9, r8
  27305. mov rbp, rsp
  27306. cmovne rbp, r8
  27307. add r8, 96
  27308. xor r12, r12
  27309. ; Diagonal 1
  27310. ; Zero into %r9
  27311. ; A[1] x A[0]
  27312. mov rdx, QWORD PTR [r9]
  27313. mulx r11, r10, QWORD PTR [r9+8]
  27314. mov QWORD PTR [rbp+8], r10
  27315. ; Zero into %r8
  27316. ; A[2] x A[0]
  27317. mulx r10, rax, QWORD PTR [r9+16]
  27318. adcx r11, rax
  27319. adox r10, r12
  27320. mov QWORD PTR [rbp+16], r11
  27321. ; Zero into %r9
  27322. ; A[3] x A[0]
  27323. mulx r11, rax, QWORD PTR [r9+24]
  27324. adcx r10, rax
  27325. adox r11, r12
  27326. mov QWORD PTR [rbp+24], r10
  27327. ; Zero into %r8
  27328. ; A[4] x A[0]
  27329. mulx r10, rax, QWORD PTR [r9+32]
  27330. adcx r11, rax
  27331. adox r10, r12
  27332. mov QWORD PTR [rbp+32], r11
  27333. ; Zero into %r9
  27334. ; A[5] x A[0]
  27335. mulx r11, rax, QWORD PTR [r9+40]
  27336. adcx r10, rax
  27337. adox r11, r12
  27338. mov QWORD PTR [rbp+40], r10
  27339. ; No load %r12 - %r8
  27340. ; A[6] x A[0]
  27341. mulx r14, rax, QWORD PTR [r9+48]
  27342. adcx r11, rax
  27343. adox r14, r12
  27344. mov QWORD PTR [rbp+48], r11
  27345. ; No load %r13 - %r9
  27346. ; A[7] x A[0]
  27347. mulx r15, rax, QWORD PTR [r9+56]
  27348. adcx r14, rax
  27349. adox r15, r12
  27350. ; No store %r12 - %r8
  27351. ; No load %r14 - %r8
  27352. ; A[8] x A[0]
  27353. mulx rdi, rax, QWORD PTR [r9+64]
  27354. adcx r15, rax
  27355. adox rdi, r12
  27356. ; No store %r13 - %r9
  27357. ; No load %r15 - %r9
  27358. ; A[9] x A[0]
  27359. mulx rsi, rax, QWORD PTR [r9+72]
  27360. adcx rdi, rax
  27361. adox rsi, r12
  27362. ; No store %r14 - %r8
  27363. ; No load %rbx - %r8
  27364. ; A[10] x A[0]
  27365. mulx rbx, rax, QWORD PTR [r9+80]
  27366. adcx rsi, rax
  27367. adox rbx, r12
  27368. ; No store %r15 - %r9
  27369. ; Zero into %r9
  27370. ; A[11] x A[0]
  27371. mulx r11, rax, QWORD PTR [r9+88]
  27372. adcx rbx, rax
  27373. adox r11, r12
  27374. ; No store %rbx - %r8
  27375. ; Carry
  27376. adcx r11, r12
  27377. mov r13, r12
  27378. adcx r13, r12
  27379. adox r13, r12
  27380. mov QWORD PTR [r8], r11
  27381. ; Diagonal 2
  27382. mov r11, QWORD PTR [rbp+24]
  27383. mov r10, QWORD PTR [rbp+32]
  27384. ; A[2] x A[1]
  27385. mov rdx, QWORD PTR [r9+8]
  27386. mulx rcx, rax, QWORD PTR [r9+16]
  27387. adcx r11, rax
  27388. adox r10, rcx
  27389. mov QWORD PTR [rbp+24], r11
  27390. mov r11, QWORD PTR [rbp+40]
  27391. ; A[3] x A[1]
  27392. mulx rcx, rax, QWORD PTR [r9+24]
  27393. adcx r10, rax
  27394. adox r11, rcx
  27395. mov QWORD PTR [rbp+32], r10
  27396. mov r10, QWORD PTR [rbp+48]
  27397. ; A[4] x A[1]
  27398. mulx rcx, rax, QWORD PTR [r9+32]
  27399. adcx r11, rax
  27400. adox r10, rcx
  27401. mov QWORD PTR [rbp+40], r11
  27402. ; No load %r12 - %r9
  27403. ; A[5] x A[1]
  27404. mulx rcx, rax, QWORD PTR [r9+40]
  27405. adcx r10, rax
  27406. adox r14, rcx
  27407. mov QWORD PTR [rbp+48], r10
  27408. ; No load %r13 - %r8
  27409. ; A[6] x A[1]
  27410. mulx rcx, rax, QWORD PTR [r9+48]
  27411. adcx r14, rax
  27412. adox r15, rcx
  27413. ; No store %r12 - %r9
  27414. ; No load %r14 - %r9
  27415. ; A[7] x A[1]
  27416. mulx rcx, rax, QWORD PTR [r9+56]
  27417. adcx r15, rax
  27418. adox rdi, rcx
  27419. ; No store %r13 - %r8
  27420. ; No load %r15 - %r8
  27421. ; A[8] x A[1]
  27422. mulx rcx, rax, QWORD PTR [r9+64]
  27423. adcx rdi, rax
  27424. adox rsi, rcx
  27425. ; No store %r14 - %r9
  27426. ; No load %rbx - %r9
  27427. ; A[9] x A[1]
  27428. mulx rcx, rax, QWORD PTR [r9+72]
  27429. adcx rsi, rax
  27430. adox rbx, rcx
  27431. ; No store %r15 - %r8
  27432. mov r10, QWORD PTR [r8]
  27433. ; A[10] x A[1]
  27434. mulx rcx, rax, QWORD PTR [r9+80]
  27435. adcx rbx, rax
  27436. adox r10, rcx
  27437. ; No store %rbx - %r9
  27438. ; Zero into %r9
  27439. ; A[11] x A[1]
  27440. mulx r11, rax, QWORD PTR [r9+88]
  27441. adcx r10, rax
  27442. adox r11, r12
  27443. mov QWORD PTR [r8], r10
  27444. ; Zero into %r8
  27445. ; A[11] x A[2]
  27446. mov rdx, QWORD PTR [r9+16]
  27447. mulx r10, rax, QWORD PTR [r9+88]
  27448. adcx r11, rax
  27449. adox r10, r12
  27450. mov QWORD PTR [r8+8], r11
  27451. ; Carry
  27452. adcx r10, r13
  27453. mov r13, r12
  27454. adcx r13, r12
  27455. adox r13, r12
  27456. mov QWORD PTR [r8+16], r10
  27457. ; Diagonal 3
  27458. mov r10, QWORD PTR [rbp+40]
  27459. mov r11, QWORD PTR [rbp+48]
  27460. ; A[3] x A[2]
  27461. mulx rcx, rax, QWORD PTR [r9+24]
  27462. adcx r10, rax
  27463. adox r11, rcx
  27464. mov QWORD PTR [rbp+40], r10
  27465. ; No load %r12 - %r8
  27466. ; A[4] x A[2]
  27467. mulx rcx, rax, QWORD PTR [r9+32]
  27468. adcx r11, rax
  27469. adox r14, rcx
  27470. mov QWORD PTR [rbp+48], r11
  27471. ; No load %r13 - %r9
  27472. ; A[5] x A[2]
  27473. mulx rcx, rax, QWORD PTR [r9+40]
  27474. adcx r14, rax
  27475. adox r15, rcx
  27476. ; No store %r12 - %r8
  27477. ; No load %r14 - %r8
  27478. ; A[6] x A[2]
  27479. mulx rcx, rax, QWORD PTR [r9+48]
  27480. adcx r15, rax
  27481. adox rdi, rcx
  27482. ; No store %r13 - %r9
  27483. ; No load %r15 - %r9
  27484. ; A[7] x A[2]
  27485. mulx rcx, rax, QWORD PTR [r9+56]
  27486. adcx rdi, rax
  27487. adox rsi, rcx
  27488. ; No store %r14 - %r8
  27489. ; No load %rbx - %r8
  27490. ; A[8] x A[2]
  27491. mulx rcx, rax, QWORD PTR [r9+64]
  27492. adcx rsi, rax
  27493. adox rbx, rcx
  27494. ; No store %r15 - %r9
  27495. mov r11, QWORD PTR [r8]
  27496. ; A[9] x A[2]
  27497. mulx rcx, rax, QWORD PTR [r9+72]
  27498. adcx rbx, rax
  27499. adox r11, rcx
  27500. ; No store %rbx - %r8
  27501. mov r10, QWORD PTR [r8+8]
  27502. ; A[10] x A[2]
  27503. mulx rcx, rax, QWORD PTR [r9+80]
  27504. adcx r11, rax
  27505. adox r10, rcx
  27506. mov QWORD PTR [r8], r11
  27507. mov r11, QWORD PTR [r8+16]
  27508. ; A[10] x A[3]
  27509. mov rdx, QWORD PTR [r9+24]
  27510. mulx rcx, rax, QWORD PTR [r9+80]
  27511. adcx r10, rax
  27512. adox r11, rcx
  27513. mov QWORD PTR [r8+8], r10
  27514. ; Zero into %r8
  27515. ; A[10] x A[4]
  27516. mov rdx, QWORD PTR [r9+32]
  27517. mulx r10, rax, QWORD PTR [r9+80]
  27518. adcx r11, rax
  27519. adox r10, r12
  27520. mov QWORD PTR [r8+16], r11
  27521. ; Zero into %r9
  27522. ; A[10] x A[5]
  27523. mov rdx, QWORD PTR [r9+40]
  27524. mulx r11, rax, QWORD PTR [r9+80]
  27525. adcx r10, rax
  27526. adox r11, r12
  27527. mov QWORD PTR [r8+24], r10
  27528. ; Carry
  27529. adcx r11, r13
  27530. mov r13, r12
  27531. adcx r13, r12
  27532. adox r13, r12
  27533. mov QWORD PTR [r8+32], r11
  27534. ; Diagonal 4
  27535. ; No load %r13 - %r8
  27536. ; A[4] x A[3]
  27537. mov rdx, QWORD PTR [r9+24]
  27538. mulx rcx, rax, QWORD PTR [r9+32]
  27539. adcx r14, rax
  27540. adox r15, rcx
  27541. ; No store %r12 - %r9
  27542. ; No load %r14 - %r9
  27543. ; A[5] x A[3]
  27544. mulx rcx, rax, QWORD PTR [r9+40]
  27545. adcx r15, rax
  27546. adox rdi, rcx
  27547. ; No store %r13 - %r8
  27548. ; No load %r15 - %r8
  27549. ; A[6] x A[3]
  27550. mulx rcx, rax, QWORD PTR [r9+48]
  27551. adcx rdi, rax
  27552. adox rsi, rcx
  27553. ; No store %r14 - %r9
  27554. ; No load %rbx - %r9
  27555. ; A[7] x A[3]
  27556. mulx rcx, rax, QWORD PTR [r9+56]
  27557. adcx rsi, rax
  27558. adox rbx, rcx
  27559. ; No store %r15 - %r8
  27560. mov r10, QWORD PTR [r8]
  27561. ; A[8] x A[3]
  27562. mulx rcx, rax, QWORD PTR [r9+64]
  27563. adcx rbx, rax
  27564. adox r10, rcx
  27565. ; No store %rbx - %r9
  27566. mov r11, QWORD PTR [r8+8]
  27567. ; A[9] x A[3]
  27568. mulx rcx, rax, QWORD PTR [r9+72]
  27569. adcx r10, rax
  27570. adox r11, rcx
  27571. mov QWORD PTR [r8], r10
  27572. mov r10, QWORD PTR [r8+16]
  27573. ; A[9] x A[4]
  27574. mov rdx, QWORD PTR [r9+32]
  27575. mulx rcx, rax, QWORD PTR [r9+72]
  27576. adcx r11, rax
  27577. adox r10, rcx
  27578. mov QWORD PTR [r8+8], r11
  27579. mov r11, QWORD PTR [r8+24]
  27580. ; A[9] x A[5]
  27581. mov rdx, QWORD PTR [r9+40]
  27582. mulx rcx, rax, QWORD PTR [r9+72]
  27583. adcx r10, rax
  27584. adox r11, rcx
  27585. mov QWORD PTR [r8+16], r10
  27586. mov r10, QWORD PTR [r8+32]
  27587. ; A[9] x A[6]
  27588. mov rdx, QWORD PTR [r9+48]
  27589. mulx rcx, rax, QWORD PTR [r9+72]
  27590. adcx r11, rax
  27591. adox r10, rcx
  27592. mov QWORD PTR [r8+24], r11
  27593. ; Zero into %r9
  27594. ; A[9] x A[7]
  27595. mov rdx, QWORD PTR [r9+56]
  27596. mulx r11, rax, QWORD PTR [r9+72]
  27597. adcx r10, rax
  27598. adox r11, r12
  27599. mov QWORD PTR [r8+32], r10
  27600. ; Zero into %r8
  27601. ; A[9] x A[8]
  27602. mov rdx, QWORD PTR [r9+64]
  27603. mulx r10, rax, QWORD PTR [r9+72]
  27604. adcx r11, rax
  27605. adox r10, r12
  27606. mov QWORD PTR [r8+40], r11
  27607. ; Carry
  27608. adcx r10, r13
  27609. mov r13, r12
  27610. adcx r13, r12
  27611. adox r13, r12
  27612. mov QWORD PTR [r8+48], r10
  27613. ; Diagonal 5
  27614. ; No load %r15 - %r9
  27615. ; A[5] x A[4]
  27616. mov rdx, QWORD PTR [r9+32]
  27617. mulx rcx, rax, QWORD PTR [r9+40]
  27618. adcx rdi, rax
  27619. adox rsi, rcx
  27620. ; No store %r14 - %r8
  27621. ; No load %rbx - %r8
  27622. ; A[6] x A[4]
  27623. mulx rcx, rax, QWORD PTR [r9+48]
  27624. adcx rsi, rax
  27625. adox rbx, rcx
  27626. ; No store %r15 - %r9
  27627. mov r11, QWORD PTR [r8]
  27628. ; A[7] x A[4]
  27629. mulx rcx, rax, QWORD PTR [r9+56]
  27630. adcx rbx, rax
  27631. adox r11, rcx
  27632. ; No store %rbx - %r8
  27633. mov r10, QWORD PTR [r8+8]
  27634. ; A[8] x A[4]
  27635. mulx rcx, rax, QWORD PTR [r9+64]
  27636. adcx r11, rax
  27637. adox r10, rcx
  27638. mov QWORD PTR [r8], r11
  27639. mov r11, QWORD PTR [r8+16]
  27640. ; A[8] x A[5]
  27641. mov rdx, QWORD PTR [r9+40]
  27642. mulx rcx, rax, QWORD PTR [r9+64]
  27643. adcx r10, rax
  27644. adox r11, rcx
  27645. mov QWORD PTR [r8+8], r10
  27646. mov r10, QWORD PTR [r8+24]
  27647. ; A[8] x A[6]
  27648. mov rdx, QWORD PTR [r9+48]
  27649. mulx rcx, rax, QWORD PTR [r9+64]
  27650. adcx r11, rax
  27651. adox r10, rcx
  27652. mov QWORD PTR [r8+16], r11
  27653. mov r11, QWORD PTR [r8+32]
  27654. ; A[8] x A[7]
  27655. mov rdx, QWORD PTR [r9+56]
  27656. mulx rcx, rax, QWORD PTR [r9+64]
  27657. adcx r10, rax
  27658. adox r11, rcx
  27659. mov QWORD PTR [r8+24], r10
  27660. mov r10, QWORD PTR [r8+40]
  27661. ; A[10] x A[6]
  27662. mov rdx, QWORD PTR [r9+48]
  27663. mulx rcx, rax, QWORD PTR [r9+80]
  27664. adcx r11, rax
  27665. adox r10, rcx
  27666. mov QWORD PTR [r8+32], r11
  27667. mov r11, QWORD PTR [r8+48]
  27668. ; A[10] x A[7]
  27669. mov rdx, QWORD PTR [r9+56]
  27670. mulx rcx, rax, QWORD PTR [r9+80]
  27671. adcx r10, rax
  27672. adox r11, rcx
  27673. mov QWORD PTR [r8+40], r10
  27674. ; Zero into %r8
  27675. ; A[10] x A[8]
  27676. mov rdx, QWORD PTR [r9+64]
  27677. mulx r10, rax, QWORD PTR [r9+80]
  27678. adcx r11, rax
  27679. adox r10, r12
  27680. mov QWORD PTR [r8+48], r11
  27681. ; Zero into %r9
  27682. ; A[10] x A[9]
  27683. mov rdx, QWORD PTR [r9+72]
  27684. mulx r11, rax, QWORD PTR [r9+80]
  27685. adcx r10, rax
  27686. adox r11, r12
  27687. mov QWORD PTR [r8+56], r10
  27688. ; Carry
  27689. adcx r11, r13
  27690. mov r13, r12
  27691. adcx r13, r12
  27692. adox r13, r12
  27693. mov QWORD PTR [r8+64], r11
  27694. ; Diagonal 6
  27695. mov r10, QWORD PTR [r8]
  27696. ; A[6] x A[5]
  27697. mov rdx, QWORD PTR [r9+40]
  27698. mulx rcx, rax, QWORD PTR [r9+48]
  27699. adcx rbx, rax
  27700. adox r10, rcx
  27701. ; No store %rbx - %r9
  27702. mov r11, QWORD PTR [r8+8]
  27703. ; A[7] x A[5]
  27704. mulx rcx, rax, QWORD PTR [r9+56]
  27705. adcx r10, rax
  27706. adox r11, rcx
  27707. mov QWORD PTR [r8], r10
  27708. mov r10, QWORD PTR [r8+16]
  27709. ; A[7] x A[6]
  27710. mov rdx, QWORD PTR [r9+48]
  27711. mulx rcx, rax, QWORD PTR [r9+56]
  27712. adcx r11, rax
  27713. adox r10, rcx
  27714. mov QWORD PTR [r8+8], r11
  27715. mov r11, QWORD PTR [r8+24]
  27716. ; A[11] x A[3]
  27717. mov rdx, QWORD PTR [r9+24]
  27718. mulx rcx, rax, QWORD PTR [r9+88]
  27719. adcx r10, rax
  27720. adox r11, rcx
  27721. mov QWORD PTR [r8+16], r10
  27722. mov r10, QWORD PTR [r8+32]
  27723. ; A[11] x A[4]
  27724. mov rdx, QWORD PTR [r9+32]
  27725. mulx rcx, rax, QWORD PTR [r9+88]
  27726. adcx r11, rax
  27727. adox r10, rcx
  27728. mov QWORD PTR [r8+24], r11
  27729. mov r11, QWORD PTR [r8+40]
  27730. ; A[11] x A[5]
  27731. mov rdx, QWORD PTR [r9+40]
  27732. mulx rcx, rax, QWORD PTR [r9+88]
  27733. adcx r10, rax
  27734. adox r11, rcx
  27735. mov QWORD PTR [r8+32], r10
  27736. mov r10, QWORD PTR [r8+48]
  27737. ; A[11] x A[6]
  27738. mov rdx, QWORD PTR [r9+48]
  27739. mulx rcx, rax, QWORD PTR [r9+88]
  27740. adcx r11, rax
  27741. adox r10, rcx
  27742. mov QWORD PTR [r8+40], r11
  27743. mov r11, QWORD PTR [r8+56]
  27744. ; A[11] x A[7]
  27745. mov rdx, QWORD PTR [r9+56]
  27746. mulx rcx, rax, QWORD PTR [r9+88]
  27747. adcx r10, rax
  27748. adox r11, rcx
  27749. mov QWORD PTR [r8+48], r10
  27750. mov r10, QWORD PTR [r8+64]
  27751. ; A[11] x A[8]
  27752. mov rdx, QWORD PTR [r9+64]
  27753. mulx rcx, rax, QWORD PTR [r9+88]
  27754. adcx r11, rax
  27755. adox r10, rcx
  27756. mov QWORD PTR [r8+56], r11
  27757. ; Zero into %r9
  27758. ; A[11] x A[9]
  27759. mov rdx, QWORD PTR [r9+72]
  27760. mulx r11, rax, QWORD PTR [r9+88]
  27761. adcx r10, rax
  27762. adox r11, r12
  27763. mov QWORD PTR [r8+64], r10
  27764. ; Zero into %r8
  27765. ; A[11] x A[10]
  27766. mov rdx, QWORD PTR [r9+80]
  27767. mulx r10, rax, QWORD PTR [r9+88]
  27768. adcx r11, rax
  27769. adox r10, r12
  27770. mov QWORD PTR [r8+72], r11
  27771. ; Carry
  27772. adcx r10, r13
  27773. mov r13, r12
  27774. adcx r13, r12
  27775. adox r13, r12
  27776. mov QWORD PTR [r8+80], r10
  27777. mov QWORD PTR [r8+88], r13
  27778. ; Double and Add in A[i] x A[i]
  27779. mov r11, QWORD PTR [rbp+8]
  27780. ; A[0] x A[0]
  27781. mov rdx, QWORD PTR [r9]
  27782. mulx rcx, rax, rdx
  27783. mov QWORD PTR [rbp], rax
  27784. adox r11, r11
  27785. adcx r11, rcx
  27786. mov QWORD PTR [rbp+8], r11
  27787. mov r10, QWORD PTR [rbp+16]
  27788. mov r11, QWORD PTR [rbp+24]
  27789. ; A[1] x A[1]
  27790. mov rdx, QWORD PTR [r9+8]
  27791. mulx rcx, rax, rdx
  27792. adox r10, r10
  27793. adox r11, r11
  27794. adcx r10, rax
  27795. adcx r11, rcx
  27796. mov QWORD PTR [rbp+16], r10
  27797. mov QWORD PTR [rbp+24], r11
  27798. mov r10, QWORD PTR [rbp+32]
  27799. mov r11, QWORD PTR [rbp+40]
  27800. ; A[2] x A[2]
  27801. mov rdx, QWORD PTR [r9+16]
  27802. mulx rcx, rax, rdx
  27803. adox r10, r10
  27804. adox r11, r11
  27805. adcx r10, rax
  27806. adcx r11, rcx
  27807. mov QWORD PTR [rbp+32], r10
  27808. mov QWORD PTR [rbp+40], r11
  27809. mov r10, QWORD PTR [rbp+48]
  27810. ; A[3] x A[3]
  27811. mov rdx, QWORD PTR [r9+24]
  27812. mulx rcx, rax, rdx
  27813. adox r10, r10
  27814. adox r14, r14
  27815. adcx r10, rax
  27816. adcx r14, rcx
  27817. mov QWORD PTR [rbp+48], r10
  27818. ; A[4] x A[4]
  27819. mov rdx, QWORD PTR [r9+32]
  27820. mulx rcx, rax, rdx
  27821. adox r15, r15
  27822. adox rdi, rdi
  27823. adcx r15, rax
  27824. adcx rdi, rcx
  27825. ; A[5] x A[5]
  27826. mov rdx, QWORD PTR [r9+40]
  27827. mulx rcx, rax, rdx
  27828. adox rsi, rsi
  27829. adox rbx, rbx
  27830. adcx rsi, rax
  27831. adcx rbx, rcx
  27832. mov r10, QWORD PTR [r8]
  27833. mov r11, QWORD PTR [r8+8]
  27834. ; A[6] x A[6]
  27835. mov rdx, QWORD PTR [r9+48]
  27836. mulx rcx, rax, rdx
  27837. adox r10, r10
  27838. adox r11, r11
  27839. adcx r10, rax
  27840. adcx r11, rcx
  27841. mov QWORD PTR [r8], r10
  27842. mov QWORD PTR [r8+8], r11
  27843. mov r10, QWORD PTR [r8+16]
  27844. mov r11, QWORD PTR [r8+24]
  27845. ; A[7] x A[7]
  27846. mov rdx, QWORD PTR [r9+56]
  27847. mulx rcx, rax, rdx
  27848. adox r10, r10
  27849. adox r11, r11
  27850. adcx r10, rax
  27851. adcx r11, rcx
  27852. mov QWORD PTR [r8+16], r10
  27853. mov QWORD PTR [r8+24], r11
  27854. mov r10, QWORD PTR [r8+32]
  27855. mov r11, QWORD PTR [r8+40]
  27856. ; A[8] x A[8]
  27857. mov rdx, QWORD PTR [r9+64]
  27858. mulx rcx, rax, rdx
  27859. adox r10, r10
  27860. adox r11, r11
  27861. adcx r10, rax
  27862. adcx r11, rcx
  27863. mov QWORD PTR [r8+32], r10
  27864. mov QWORD PTR [r8+40], r11
  27865. mov r10, QWORD PTR [r8+48]
  27866. mov r11, QWORD PTR [r8+56]
  27867. ; A[9] x A[9]
  27868. mov rdx, QWORD PTR [r9+72]
  27869. mulx rcx, rax, rdx
  27870. adox r10, r10
  27871. adox r11, r11
  27872. adcx r10, rax
  27873. adcx r11, rcx
  27874. mov QWORD PTR [r8+48], r10
  27875. mov QWORD PTR [r8+56], r11
  27876. mov r10, QWORD PTR [r8+64]
  27877. mov r11, QWORD PTR [r8+72]
  27878. ; A[10] x A[10]
  27879. mov rdx, QWORD PTR [r9+80]
  27880. mulx rcx, rax, rdx
  27881. adox r10, r10
  27882. adox r11, r11
  27883. adcx r10, rax
  27884. adcx r11, rcx
  27885. mov QWORD PTR [r8+64], r10
  27886. mov QWORD PTR [r8+72], r11
  27887. mov r10, QWORD PTR [r8+80]
  27888. mov r11, QWORD PTR [r8+88]
  27889. ; A[11] x A[11]
  27890. mov rdx, QWORD PTR [r9+88]
  27891. mulx rcx, rax, rdx
  27892. adox r10, r10
  27893. adox r11, r11
  27894. adcx r10, rax
  27895. adcx r11, rcx
  27896. mov QWORD PTR [r8+80], r10
  27897. mov QWORD PTR [r8+88], r11
  27898. mov QWORD PTR [r8+-40], r14
  27899. mov QWORD PTR [r8+-32], r15
  27900. mov QWORD PTR [r8+-24], rdi
  27901. mov QWORD PTR [r8+-16], rsi
  27902. mov QWORD PTR [r8+-8], rbx
  27903. sub r8, 96
  27904. cmp r9, r8
  27905. jne L_end_3072_sqr_avx2_12
  27906. vmovdqu xmm0, OWORD PTR [rbp]
  27907. vmovups OWORD PTR [r8], xmm0
  27908. vmovdqu xmm0, OWORD PTR [rbp+16]
  27909. vmovups OWORD PTR [r8+16], xmm0
  27910. vmovdqu xmm0, OWORD PTR [rbp+32]
  27911. vmovups OWORD PTR [r8+32], xmm0
  27912. mov rax, QWORD PTR [rbp+48]
  27913. mov QWORD PTR [r8+48], rax
  27914. L_end_3072_sqr_avx2_12:
  27915. add rsp, 96
  27916. pop rbx
  27917. pop rsi
  27918. pop rdi
  27919. pop r15
  27920. pop r14
  27921. pop r13
  27922. pop r12
  27923. pop rbp
  27924. ret
  27925. sp_3072_sqr_avx2_12 ENDP
  27926. _text ENDS
  27927. ENDIF
  27928. ; /* Square a and put result in r. (r = a * a)
  27929. ; *
  27930. ; * Karatsuba: ah^2, al^2, (al - ah)^2
  27931. ; *
  27932. ; * r A single precision integer.
  27933. ; * a A single precision integer.
  27934. ; */
  27935. _text SEGMENT READONLY PARA
  27936. sp_3072_sqr_24 PROC
  27937. sub rsp, 208
  27938. mov QWORD PTR [rsp+192], rcx
  27939. mov QWORD PTR [rsp+200], rdx
  27940. mov r9, 0
  27941. mov r10, rsp
  27942. lea r11, QWORD PTR [rdx+96]
  27943. mov rax, QWORD PTR [rdx]
  27944. sub rax, QWORD PTR [r11]
  27945. mov r8, QWORD PTR [rdx+8]
  27946. mov QWORD PTR [r10], rax
  27947. sbb r8, QWORD PTR [r11+8]
  27948. mov rax, QWORD PTR [rdx+16]
  27949. mov QWORD PTR [r10+8], r8
  27950. sbb rax, QWORD PTR [r11+16]
  27951. mov r8, QWORD PTR [rdx+24]
  27952. mov QWORD PTR [r10+16], rax
  27953. sbb r8, QWORD PTR [r11+24]
  27954. mov rax, QWORD PTR [rdx+32]
  27955. mov QWORD PTR [r10+24], r8
  27956. sbb rax, QWORD PTR [r11+32]
  27957. mov r8, QWORD PTR [rdx+40]
  27958. mov QWORD PTR [r10+32], rax
  27959. sbb r8, QWORD PTR [r11+40]
  27960. mov rax, QWORD PTR [rdx+48]
  27961. mov QWORD PTR [r10+40], r8
  27962. sbb rax, QWORD PTR [r11+48]
  27963. mov r8, QWORD PTR [rdx+56]
  27964. mov QWORD PTR [r10+48], rax
  27965. sbb r8, QWORD PTR [r11+56]
  27966. mov rax, QWORD PTR [rdx+64]
  27967. mov QWORD PTR [r10+56], r8
  27968. sbb rax, QWORD PTR [r11+64]
  27969. mov r8, QWORD PTR [rdx+72]
  27970. mov QWORD PTR [r10+64], rax
  27971. sbb r8, QWORD PTR [r11+72]
  27972. mov rax, QWORD PTR [rdx+80]
  27973. mov QWORD PTR [r10+72], r8
  27974. sbb rax, QWORD PTR [r11+80]
  27975. mov r8, QWORD PTR [rdx+88]
  27976. mov QWORD PTR [r10+80], rax
  27977. sbb r8, QWORD PTR [r11+88]
  27978. mov QWORD PTR [r10+88], r8
  27979. sbb r9, 0
  27980. ; Cond Negate
  27981. mov rax, QWORD PTR [r10]
  27982. mov r11, r9
  27983. xor rax, r9
  27984. neg r11
  27985. sub rax, r9
  27986. mov r8, QWORD PTR [r10+8]
  27987. sbb r11, 0
  27988. mov QWORD PTR [r10], rax
  27989. xor r8, r9
  27990. add r8, r11
  27991. mov rax, QWORD PTR [r10+16]
  27992. setc r11b
  27993. mov QWORD PTR [r10+8], r8
  27994. xor rax, r9
  27995. add rax, r11
  27996. mov r8, QWORD PTR [r10+24]
  27997. setc r11b
  27998. mov QWORD PTR [r10+16], rax
  27999. xor r8, r9
  28000. add r8, r11
  28001. mov rax, QWORD PTR [r10+32]
  28002. setc r11b
  28003. mov QWORD PTR [r10+24], r8
  28004. xor rax, r9
  28005. add rax, r11
  28006. mov r8, QWORD PTR [r10+40]
  28007. setc r11b
  28008. mov QWORD PTR [r10+32], rax
  28009. xor r8, r9
  28010. add r8, r11
  28011. mov rax, QWORD PTR [r10+48]
  28012. setc r11b
  28013. mov QWORD PTR [r10+40], r8
  28014. xor rax, r9
  28015. add rax, r11
  28016. mov r8, QWORD PTR [r10+56]
  28017. setc r11b
  28018. mov QWORD PTR [r10+48], rax
  28019. xor r8, r9
  28020. add r8, r11
  28021. mov rax, QWORD PTR [r10+64]
  28022. setc r11b
  28023. mov QWORD PTR [r10+56], r8
  28024. xor rax, r9
  28025. add rax, r11
  28026. mov r8, QWORD PTR [r10+72]
  28027. setc r11b
  28028. mov QWORD PTR [r10+64], rax
  28029. xor r8, r9
  28030. add r8, r11
  28031. mov rax, QWORD PTR [r10+80]
  28032. setc r11b
  28033. mov QWORD PTR [r10+72], r8
  28034. xor rax, r9
  28035. add rax, r11
  28036. mov r8, QWORD PTR [r10+88]
  28037. setc r11b
  28038. mov QWORD PTR [r10+80], rax
  28039. xor r8, r9
  28040. add r8, r11
  28041. mov QWORD PTR [r10+88], r8
  28042. mov rdx, r10
  28043. mov rcx, rsp
  28044. call sp_3072_sqr_12
  28045. mov rdx, QWORD PTR [rsp+200]
  28046. mov rcx, QWORD PTR [rsp+192]
  28047. add rdx, 96
  28048. add rcx, 192
  28049. call sp_3072_sqr_12
  28050. mov rdx, QWORD PTR [rsp+200]
  28051. mov rcx, QWORD PTR [rsp+192]
  28052. call sp_3072_sqr_12
  28053. IFDEF _WIN64
  28054. mov rdx, QWORD PTR [rsp+200]
  28055. mov rcx, QWORD PTR [rsp+192]
  28056. ENDIF
  28057. mov rdx, QWORD PTR [rsp+192]
  28058. lea r10, QWORD PTR [rsp+96]
  28059. add rdx, 288
  28060. mov r9, 0
  28061. mov r8, QWORD PTR [r10+-96]
  28062. sub r8, QWORD PTR [rdx+-96]
  28063. mov rax, QWORD PTR [r10+-88]
  28064. mov QWORD PTR [r10+-96], r8
  28065. sbb rax, QWORD PTR [rdx+-88]
  28066. mov r8, QWORD PTR [r10+-80]
  28067. mov QWORD PTR [r10+-88], rax
  28068. sbb r8, QWORD PTR [rdx+-80]
  28069. mov rax, QWORD PTR [r10+-72]
  28070. mov QWORD PTR [r10+-80], r8
  28071. sbb rax, QWORD PTR [rdx+-72]
  28072. mov r8, QWORD PTR [r10+-64]
  28073. mov QWORD PTR [r10+-72], rax
  28074. sbb r8, QWORD PTR [rdx+-64]
  28075. mov rax, QWORD PTR [r10+-56]
  28076. mov QWORD PTR [r10+-64], r8
  28077. sbb rax, QWORD PTR [rdx+-56]
  28078. mov r8, QWORD PTR [r10+-48]
  28079. mov QWORD PTR [r10+-56], rax
  28080. sbb r8, QWORD PTR [rdx+-48]
  28081. mov rax, QWORD PTR [r10+-40]
  28082. mov QWORD PTR [r10+-48], r8
  28083. sbb rax, QWORD PTR [rdx+-40]
  28084. mov r8, QWORD PTR [r10+-32]
  28085. mov QWORD PTR [r10+-40], rax
  28086. sbb r8, QWORD PTR [rdx+-32]
  28087. mov rax, QWORD PTR [r10+-24]
  28088. mov QWORD PTR [r10+-32], r8
  28089. sbb rax, QWORD PTR [rdx+-24]
  28090. mov r8, QWORD PTR [r10+-16]
  28091. mov QWORD PTR [r10+-24], rax
  28092. sbb r8, QWORD PTR [rdx+-16]
  28093. mov rax, QWORD PTR [r10+-8]
  28094. mov QWORD PTR [r10+-16], r8
  28095. sbb rax, QWORD PTR [rdx+-8]
  28096. mov r8, QWORD PTR [r10]
  28097. mov QWORD PTR [r10+-8], rax
  28098. sbb r8, QWORD PTR [rdx]
  28099. mov rax, QWORD PTR [r10+8]
  28100. mov QWORD PTR [r10], r8
  28101. sbb rax, QWORD PTR [rdx+8]
  28102. mov r8, QWORD PTR [r10+16]
  28103. mov QWORD PTR [r10+8], rax
  28104. sbb r8, QWORD PTR [rdx+16]
  28105. mov rax, QWORD PTR [r10+24]
  28106. mov QWORD PTR [r10+16], r8
  28107. sbb rax, QWORD PTR [rdx+24]
  28108. mov r8, QWORD PTR [r10+32]
  28109. mov QWORD PTR [r10+24], rax
  28110. sbb r8, QWORD PTR [rdx+32]
  28111. mov rax, QWORD PTR [r10+40]
  28112. mov QWORD PTR [r10+32], r8
  28113. sbb rax, QWORD PTR [rdx+40]
  28114. mov r8, QWORD PTR [r10+48]
  28115. mov QWORD PTR [r10+40], rax
  28116. sbb r8, QWORD PTR [rdx+48]
  28117. mov rax, QWORD PTR [r10+56]
  28118. mov QWORD PTR [r10+48], r8
  28119. sbb rax, QWORD PTR [rdx+56]
  28120. mov r8, QWORD PTR [r10+64]
  28121. mov QWORD PTR [r10+56], rax
  28122. sbb r8, QWORD PTR [rdx+64]
  28123. mov rax, QWORD PTR [r10+72]
  28124. mov QWORD PTR [r10+64], r8
  28125. sbb rax, QWORD PTR [rdx+72]
  28126. mov r8, QWORD PTR [r10+80]
  28127. mov QWORD PTR [r10+72], rax
  28128. sbb r8, QWORD PTR [rdx+80]
  28129. mov rax, QWORD PTR [r10+88]
  28130. mov QWORD PTR [r10+80], r8
  28131. sbb rax, QWORD PTR [rdx+88]
  28132. mov QWORD PTR [r10+88], rax
  28133. sbb r9, 0
  28134. sub rdx, 192
  28135. mov r8, QWORD PTR [r10+-96]
  28136. sub r8, QWORD PTR [rdx+-96]
  28137. mov rax, QWORD PTR [r10+-88]
  28138. mov QWORD PTR [r10+-96], r8
  28139. sbb rax, QWORD PTR [rdx+-88]
  28140. mov r8, QWORD PTR [r10+-80]
  28141. mov QWORD PTR [r10+-88], rax
  28142. sbb r8, QWORD PTR [rdx+-80]
  28143. mov rax, QWORD PTR [r10+-72]
  28144. mov QWORD PTR [r10+-80], r8
  28145. sbb rax, QWORD PTR [rdx+-72]
  28146. mov r8, QWORD PTR [r10+-64]
  28147. mov QWORD PTR [r10+-72], rax
  28148. sbb r8, QWORD PTR [rdx+-64]
  28149. mov rax, QWORD PTR [r10+-56]
  28150. mov QWORD PTR [r10+-64], r8
  28151. sbb rax, QWORD PTR [rdx+-56]
  28152. mov r8, QWORD PTR [r10+-48]
  28153. mov QWORD PTR [r10+-56], rax
  28154. sbb r8, QWORD PTR [rdx+-48]
  28155. mov rax, QWORD PTR [r10+-40]
  28156. mov QWORD PTR [r10+-48], r8
  28157. sbb rax, QWORD PTR [rdx+-40]
  28158. mov r8, QWORD PTR [r10+-32]
  28159. mov QWORD PTR [r10+-40], rax
  28160. sbb r8, QWORD PTR [rdx+-32]
  28161. mov rax, QWORD PTR [r10+-24]
  28162. mov QWORD PTR [r10+-32], r8
  28163. sbb rax, QWORD PTR [rdx+-24]
  28164. mov r8, QWORD PTR [r10+-16]
  28165. mov QWORD PTR [r10+-24], rax
  28166. sbb r8, QWORD PTR [rdx+-16]
  28167. mov rax, QWORD PTR [r10+-8]
  28168. mov QWORD PTR [r10+-16], r8
  28169. sbb rax, QWORD PTR [rdx+-8]
  28170. mov r8, QWORD PTR [r10]
  28171. mov QWORD PTR [r10+-8], rax
  28172. sbb r8, QWORD PTR [rdx]
  28173. mov rax, QWORD PTR [r10+8]
  28174. mov QWORD PTR [r10], r8
  28175. sbb rax, QWORD PTR [rdx+8]
  28176. mov r8, QWORD PTR [r10+16]
  28177. mov QWORD PTR [r10+8], rax
  28178. sbb r8, QWORD PTR [rdx+16]
  28179. mov rax, QWORD PTR [r10+24]
  28180. mov QWORD PTR [r10+16], r8
  28181. sbb rax, QWORD PTR [rdx+24]
  28182. mov r8, QWORD PTR [r10+32]
  28183. mov QWORD PTR [r10+24], rax
  28184. sbb r8, QWORD PTR [rdx+32]
  28185. mov rax, QWORD PTR [r10+40]
  28186. mov QWORD PTR [r10+32], r8
  28187. sbb rax, QWORD PTR [rdx+40]
  28188. mov r8, QWORD PTR [r10+48]
  28189. mov QWORD PTR [r10+40], rax
  28190. sbb r8, QWORD PTR [rdx+48]
  28191. mov rax, QWORD PTR [r10+56]
  28192. mov QWORD PTR [r10+48], r8
  28193. sbb rax, QWORD PTR [rdx+56]
  28194. mov r8, QWORD PTR [r10+64]
  28195. mov QWORD PTR [r10+56], rax
  28196. sbb r8, QWORD PTR [rdx+64]
  28197. mov rax, QWORD PTR [r10+72]
  28198. mov QWORD PTR [r10+64], r8
  28199. sbb rax, QWORD PTR [rdx+72]
  28200. mov r8, QWORD PTR [r10+80]
  28201. mov QWORD PTR [r10+72], rax
  28202. sbb r8, QWORD PTR [rdx+80]
  28203. mov rax, QWORD PTR [r10+88]
  28204. mov QWORD PTR [r10+80], r8
  28205. sbb rax, QWORD PTR [rdx+88]
  28206. mov QWORD PTR [r10+88], rax
  28207. sbb r9, 0
  28208. mov rcx, QWORD PTR [rsp+192]
  28209. neg r9
  28210. add rcx, 192
  28211. mov r8, QWORD PTR [rcx+-96]
  28212. sub r8, QWORD PTR [r10+-96]
  28213. mov rax, QWORD PTR [rcx+-88]
  28214. mov QWORD PTR [rcx+-96], r8
  28215. sbb rax, QWORD PTR [r10+-88]
  28216. mov r8, QWORD PTR [rcx+-80]
  28217. mov QWORD PTR [rcx+-88], rax
  28218. sbb r8, QWORD PTR [r10+-80]
  28219. mov rax, QWORD PTR [rcx+-72]
  28220. mov QWORD PTR [rcx+-80], r8
  28221. sbb rax, QWORD PTR [r10+-72]
  28222. mov r8, QWORD PTR [rcx+-64]
  28223. mov QWORD PTR [rcx+-72], rax
  28224. sbb r8, QWORD PTR [r10+-64]
  28225. mov rax, QWORD PTR [rcx+-56]
  28226. mov QWORD PTR [rcx+-64], r8
  28227. sbb rax, QWORD PTR [r10+-56]
  28228. mov r8, QWORD PTR [rcx+-48]
  28229. mov QWORD PTR [rcx+-56], rax
  28230. sbb r8, QWORD PTR [r10+-48]
  28231. mov rax, QWORD PTR [rcx+-40]
  28232. mov QWORD PTR [rcx+-48], r8
  28233. sbb rax, QWORD PTR [r10+-40]
  28234. mov r8, QWORD PTR [rcx+-32]
  28235. mov QWORD PTR [rcx+-40], rax
  28236. sbb r8, QWORD PTR [r10+-32]
  28237. mov rax, QWORD PTR [rcx+-24]
  28238. mov QWORD PTR [rcx+-32], r8
  28239. sbb rax, QWORD PTR [r10+-24]
  28240. mov r8, QWORD PTR [rcx+-16]
  28241. mov QWORD PTR [rcx+-24], rax
  28242. sbb r8, QWORD PTR [r10+-16]
  28243. mov rax, QWORD PTR [rcx+-8]
  28244. mov QWORD PTR [rcx+-16], r8
  28245. sbb rax, QWORD PTR [r10+-8]
  28246. mov r8, QWORD PTR [rcx]
  28247. mov QWORD PTR [rcx+-8], rax
  28248. sbb r8, QWORD PTR [r10]
  28249. mov rax, QWORD PTR [rcx+8]
  28250. mov QWORD PTR [rcx], r8
  28251. sbb rax, QWORD PTR [r10+8]
  28252. mov r8, QWORD PTR [rcx+16]
  28253. mov QWORD PTR [rcx+8], rax
  28254. sbb r8, QWORD PTR [r10+16]
  28255. mov rax, QWORD PTR [rcx+24]
  28256. mov QWORD PTR [rcx+16], r8
  28257. sbb rax, QWORD PTR [r10+24]
  28258. mov r8, QWORD PTR [rcx+32]
  28259. mov QWORD PTR [rcx+24], rax
  28260. sbb r8, QWORD PTR [r10+32]
  28261. mov rax, QWORD PTR [rcx+40]
  28262. mov QWORD PTR [rcx+32], r8
  28263. sbb rax, QWORD PTR [r10+40]
  28264. mov r8, QWORD PTR [rcx+48]
  28265. mov QWORD PTR [rcx+40], rax
  28266. sbb r8, QWORD PTR [r10+48]
  28267. mov rax, QWORD PTR [rcx+56]
  28268. mov QWORD PTR [rcx+48], r8
  28269. sbb rax, QWORD PTR [r10+56]
  28270. mov r8, QWORD PTR [rcx+64]
  28271. mov QWORD PTR [rcx+56], rax
  28272. sbb r8, QWORD PTR [r10+64]
  28273. mov rax, QWORD PTR [rcx+72]
  28274. mov QWORD PTR [rcx+64], r8
  28275. sbb rax, QWORD PTR [r10+72]
  28276. mov r8, QWORD PTR [rcx+80]
  28277. mov QWORD PTR [rcx+72], rax
  28278. sbb r8, QWORD PTR [r10+80]
  28279. mov rax, QWORD PTR [rcx+88]
  28280. mov QWORD PTR [rcx+80], r8
  28281. sbb rax, QWORD PTR [r10+88]
  28282. mov QWORD PTR [rcx+88], rax
  28283. sbb r9, 0
  28284. mov rcx, QWORD PTR [rsp+192]
  28285. add rcx, 288
  28286. ; Add in word
  28287. mov r8, QWORD PTR [rcx]
  28288. add r8, r9
  28289. mov rax, QWORD PTR [rcx+8]
  28290. mov QWORD PTR [rcx], r8
  28291. adc rax, 0
  28292. mov r8, QWORD PTR [rcx+16]
  28293. mov QWORD PTR [rcx+8], rax
  28294. adc r8, 0
  28295. mov rax, QWORD PTR [rcx+24]
  28296. mov QWORD PTR [rcx+16], r8
  28297. adc rax, 0
  28298. mov r8, QWORD PTR [rcx+32]
  28299. mov QWORD PTR [rcx+24], rax
  28300. adc r8, 0
  28301. mov rax, QWORD PTR [rcx+40]
  28302. mov QWORD PTR [rcx+32], r8
  28303. adc rax, 0
  28304. mov r8, QWORD PTR [rcx+48]
  28305. mov QWORD PTR [rcx+40], rax
  28306. adc r8, 0
  28307. mov rax, QWORD PTR [rcx+56]
  28308. mov QWORD PTR [rcx+48], r8
  28309. adc rax, 0
  28310. mov r8, QWORD PTR [rcx+64]
  28311. mov QWORD PTR [rcx+56], rax
  28312. adc r8, 0
  28313. mov rax, QWORD PTR [rcx+72]
  28314. mov QWORD PTR [rcx+64], r8
  28315. adc rax, 0
  28316. mov r8, QWORD PTR [rcx+80]
  28317. mov QWORD PTR [rcx+72], rax
  28318. adc r8, 0
  28319. mov rax, QWORD PTR [rcx+88]
  28320. mov QWORD PTR [rcx+80], r8
  28321. adc rax, 0
  28322. mov QWORD PTR [rcx+88], rax
  28323. mov rdx, QWORD PTR [rsp+200]
  28324. mov rcx, QWORD PTR [rsp+192]
  28325. add rsp, 208
  28326. ret
  28327. sp_3072_sqr_24 ENDP
  28328. _text ENDS
  28329. IFDEF HAVE_INTEL_AVX2
  28330. ; /* Square a and put result in r. (r = a * a)
  28331. ; *
  28332. ; * Karatsuba: ah^2, al^2, (al - ah)^2
  28333. ; *
  28334. ; * r A single precision integer.
  28335. ; * a A single precision integer.
  28336. ; */
  28337. _text SEGMENT READONLY PARA
  28338. sp_3072_sqr_avx2_24 PROC
  28339. sub rsp, 208
  28340. mov QWORD PTR [rsp+192], rcx
  28341. mov QWORD PTR [rsp+200], rdx
  28342. mov r9, 0
  28343. mov r10, rsp
  28344. lea r11, QWORD PTR [rdx+96]
  28345. mov rax, QWORD PTR [rdx]
  28346. sub rax, QWORD PTR [r11]
  28347. mov r8, QWORD PTR [rdx+8]
  28348. mov QWORD PTR [r10], rax
  28349. sbb r8, QWORD PTR [r11+8]
  28350. mov rax, QWORD PTR [rdx+16]
  28351. mov QWORD PTR [r10+8], r8
  28352. sbb rax, QWORD PTR [r11+16]
  28353. mov r8, QWORD PTR [rdx+24]
  28354. mov QWORD PTR [r10+16], rax
  28355. sbb r8, QWORD PTR [r11+24]
  28356. mov rax, QWORD PTR [rdx+32]
  28357. mov QWORD PTR [r10+24], r8
  28358. sbb rax, QWORD PTR [r11+32]
  28359. mov r8, QWORD PTR [rdx+40]
  28360. mov QWORD PTR [r10+32], rax
  28361. sbb r8, QWORD PTR [r11+40]
  28362. mov rax, QWORD PTR [rdx+48]
  28363. mov QWORD PTR [r10+40], r8
  28364. sbb rax, QWORD PTR [r11+48]
  28365. mov r8, QWORD PTR [rdx+56]
  28366. mov QWORD PTR [r10+48], rax
  28367. sbb r8, QWORD PTR [r11+56]
  28368. mov rax, QWORD PTR [rdx+64]
  28369. mov QWORD PTR [r10+56], r8
  28370. sbb rax, QWORD PTR [r11+64]
  28371. mov r8, QWORD PTR [rdx+72]
  28372. mov QWORD PTR [r10+64], rax
  28373. sbb r8, QWORD PTR [r11+72]
  28374. mov rax, QWORD PTR [rdx+80]
  28375. mov QWORD PTR [r10+72], r8
  28376. sbb rax, QWORD PTR [r11+80]
  28377. mov r8, QWORD PTR [rdx+88]
  28378. mov QWORD PTR [r10+80], rax
  28379. sbb r8, QWORD PTR [r11+88]
  28380. mov QWORD PTR [r10+88], r8
  28381. sbb r9, 0
  28382. ; Cond Negate
  28383. mov rax, QWORD PTR [r10]
  28384. mov r11, r9
  28385. xor rax, r9
  28386. neg r11
  28387. sub rax, r9
  28388. mov r8, QWORD PTR [r10+8]
  28389. sbb r11, 0
  28390. mov QWORD PTR [r10], rax
  28391. xor r8, r9
  28392. add r8, r11
  28393. mov rax, QWORD PTR [r10+16]
  28394. setc r11b
  28395. mov QWORD PTR [r10+8], r8
  28396. xor rax, r9
  28397. add rax, r11
  28398. mov r8, QWORD PTR [r10+24]
  28399. setc r11b
  28400. mov QWORD PTR [r10+16], rax
  28401. xor r8, r9
  28402. add r8, r11
  28403. mov rax, QWORD PTR [r10+32]
  28404. setc r11b
  28405. mov QWORD PTR [r10+24], r8
  28406. xor rax, r9
  28407. add rax, r11
  28408. mov r8, QWORD PTR [r10+40]
  28409. setc r11b
  28410. mov QWORD PTR [r10+32], rax
  28411. xor r8, r9
  28412. add r8, r11
  28413. mov rax, QWORD PTR [r10+48]
  28414. setc r11b
  28415. mov QWORD PTR [r10+40], r8
  28416. xor rax, r9
  28417. add rax, r11
  28418. mov r8, QWORD PTR [r10+56]
  28419. setc r11b
  28420. mov QWORD PTR [r10+48], rax
  28421. xor r8, r9
  28422. add r8, r11
  28423. mov rax, QWORD PTR [r10+64]
  28424. setc r11b
  28425. mov QWORD PTR [r10+56], r8
  28426. xor rax, r9
  28427. add rax, r11
  28428. mov r8, QWORD PTR [r10+72]
  28429. setc r11b
  28430. mov QWORD PTR [r10+64], rax
  28431. xor r8, r9
  28432. add r8, r11
  28433. mov rax, QWORD PTR [r10+80]
  28434. setc r11b
  28435. mov QWORD PTR [r10+72], r8
  28436. xor rax, r9
  28437. add rax, r11
  28438. mov r8, QWORD PTR [r10+88]
  28439. setc r11b
  28440. mov QWORD PTR [r10+80], rax
  28441. xor r8, r9
  28442. add r8, r11
  28443. mov QWORD PTR [r10+88], r8
  28444. mov rdx, r10
  28445. mov rcx, rsp
  28446. call sp_3072_sqr_avx2_12
  28447. mov rdx, QWORD PTR [rsp+200]
  28448. mov rcx, QWORD PTR [rsp+192]
  28449. add rdx, 96
  28450. add rcx, 192
  28451. call sp_3072_sqr_avx2_12
  28452. mov rdx, QWORD PTR [rsp+200]
  28453. mov rcx, QWORD PTR [rsp+192]
  28454. call sp_3072_sqr_avx2_12
  28455. IFDEF _WIN64
  28456. mov rdx, QWORD PTR [rsp+200]
  28457. mov rcx, QWORD PTR [rsp+192]
  28458. ENDIF
  28459. mov rdx, QWORD PTR [rsp+192]
  28460. lea r10, QWORD PTR [rsp+96]
  28461. add rdx, 288
  28462. mov r9, 0
  28463. mov r8, QWORD PTR [r10+-96]
  28464. sub r8, QWORD PTR [rdx+-96]
  28465. mov rax, QWORD PTR [r10+-88]
  28466. mov QWORD PTR [r10+-96], r8
  28467. sbb rax, QWORD PTR [rdx+-88]
  28468. mov r8, QWORD PTR [r10+-80]
  28469. mov QWORD PTR [r10+-88], rax
  28470. sbb r8, QWORD PTR [rdx+-80]
  28471. mov rax, QWORD PTR [r10+-72]
  28472. mov QWORD PTR [r10+-80], r8
  28473. sbb rax, QWORD PTR [rdx+-72]
  28474. mov r8, QWORD PTR [r10+-64]
  28475. mov QWORD PTR [r10+-72], rax
  28476. sbb r8, QWORD PTR [rdx+-64]
  28477. mov rax, QWORD PTR [r10+-56]
  28478. mov QWORD PTR [r10+-64], r8
  28479. sbb rax, QWORD PTR [rdx+-56]
  28480. mov r8, QWORD PTR [r10+-48]
  28481. mov QWORD PTR [r10+-56], rax
  28482. sbb r8, QWORD PTR [rdx+-48]
  28483. mov rax, QWORD PTR [r10+-40]
  28484. mov QWORD PTR [r10+-48], r8
  28485. sbb rax, QWORD PTR [rdx+-40]
  28486. mov r8, QWORD PTR [r10+-32]
  28487. mov QWORD PTR [r10+-40], rax
  28488. sbb r8, QWORD PTR [rdx+-32]
  28489. mov rax, QWORD PTR [r10+-24]
  28490. mov QWORD PTR [r10+-32], r8
  28491. sbb rax, QWORD PTR [rdx+-24]
  28492. mov r8, QWORD PTR [r10+-16]
  28493. mov QWORD PTR [r10+-24], rax
  28494. sbb r8, QWORD PTR [rdx+-16]
  28495. mov rax, QWORD PTR [r10+-8]
  28496. mov QWORD PTR [r10+-16], r8
  28497. sbb rax, QWORD PTR [rdx+-8]
  28498. mov r8, QWORD PTR [r10]
  28499. mov QWORD PTR [r10+-8], rax
  28500. sbb r8, QWORD PTR [rdx]
  28501. mov rax, QWORD PTR [r10+8]
  28502. mov QWORD PTR [r10], r8
  28503. sbb rax, QWORD PTR [rdx+8]
  28504. mov r8, QWORD PTR [r10+16]
  28505. mov QWORD PTR [r10+8], rax
  28506. sbb r8, QWORD PTR [rdx+16]
  28507. mov rax, QWORD PTR [r10+24]
  28508. mov QWORD PTR [r10+16], r8
  28509. sbb rax, QWORD PTR [rdx+24]
  28510. mov r8, QWORD PTR [r10+32]
  28511. mov QWORD PTR [r10+24], rax
  28512. sbb r8, QWORD PTR [rdx+32]
  28513. mov rax, QWORD PTR [r10+40]
  28514. mov QWORD PTR [r10+32], r8
  28515. sbb rax, QWORD PTR [rdx+40]
  28516. mov r8, QWORD PTR [r10+48]
  28517. mov QWORD PTR [r10+40], rax
  28518. sbb r8, QWORD PTR [rdx+48]
  28519. mov rax, QWORD PTR [r10+56]
  28520. mov QWORD PTR [r10+48], r8
  28521. sbb rax, QWORD PTR [rdx+56]
  28522. mov r8, QWORD PTR [r10+64]
  28523. mov QWORD PTR [r10+56], rax
  28524. sbb r8, QWORD PTR [rdx+64]
  28525. mov rax, QWORD PTR [r10+72]
  28526. mov QWORD PTR [r10+64], r8
  28527. sbb rax, QWORD PTR [rdx+72]
  28528. mov r8, QWORD PTR [r10+80]
  28529. mov QWORD PTR [r10+72], rax
  28530. sbb r8, QWORD PTR [rdx+80]
  28531. mov rax, QWORD PTR [r10+88]
  28532. mov QWORD PTR [r10+80], r8
  28533. sbb rax, QWORD PTR [rdx+88]
  28534. mov QWORD PTR [r10+88], rax
  28535. sbb r9, 0
  28536. sub rdx, 192
  28537. mov r8, QWORD PTR [r10+-96]
  28538. sub r8, QWORD PTR [rdx+-96]
  28539. mov rax, QWORD PTR [r10+-88]
  28540. mov QWORD PTR [r10+-96], r8
  28541. sbb rax, QWORD PTR [rdx+-88]
  28542. mov r8, QWORD PTR [r10+-80]
  28543. mov QWORD PTR [r10+-88], rax
  28544. sbb r8, QWORD PTR [rdx+-80]
  28545. mov rax, QWORD PTR [r10+-72]
  28546. mov QWORD PTR [r10+-80], r8
  28547. sbb rax, QWORD PTR [rdx+-72]
  28548. mov r8, QWORD PTR [r10+-64]
  28549. mov QWORD PTR [r10+-72], rax
  28550. sbb r8, QWORD PTR [rdx+-64]
  28551. mov rax, QWORD PTR [r10+-56]
  28552. mov QWORD PTR [r10+-64], r8
  28553. sbb rax, QWORD PTR [rdx+-56]
  28554. mov r8, QWORD PTR [r10+-48]
  28555. mov QWORD PTR [r10+-56], rax
  28556. sbb r8, QWORD PTR [rdx+-48]
  28557. mov rax, QWORD PTR [r10+-40]
  28558. mov QWORD PTR [r10+-48], r8
  28559. sbb rax, QWORD PTR [rdx+-40]
  28560. mov r8, QWORD PTR [r10+-32]
  28561. mov QWORD PTR [r10+-40], rax
  28562. sbb r8, QWORD PTR [rdx+-32]
  28563. mov rax, QWORD PTR [r10+-24]
  28564. mov QWORD PTR [r10+-32], r8
  28565. sbb rax, QWORD PTR [rdx+-24]
  28566. mov r8, QWORD PTR [r10+-16]
  28567. mov QWORD PTR [r10+-24], rax
  28568. sbb r8, QWORD PTR [rdx+-16]
  28569. mov rax, QWORD PTR [r10+-8]
  28570. mov QWORD PTR [r10+-16], r8
  28571. sbb rax, QWORD PTR [rdx+-8]
  28572. mov r8, QWORD PTR [r10]
  28573. mov QWORD PTR [r10+-8], rax
  28574. sbb r8, QWORD PTR [rdx]
  28575. mov rax, QWORD PTR [r10+8]
  28576. mov QWORD PTR [r10], r8
  28577. sbb rax, QWORD PTR [rdx+8]
  28578. mov r8, QWORD PTR [r10+16]
  28579. mov QWORD PTR [r10+8], rax
  28580. sbb r8, QWORD PTR [rdx+16]
  28581. mov rax, QWORD PTR [r10+24]
  28582. mov QWORD PTR [r10+16], r8
  28583. sbb rax, QWORD PTR [rdx+24]
  28584. mov r8, QWORD PTR [r10+32]
  28585. mov QWORD PTR [r10+24], rax
  28586. sbb r8, QWORD PTR [rdx+32]
  28587. mov rax, QWORD PTR [r10+40]
  28588. mov QWORD PTR [r10+32], r8
  28589. sbb rax, QWORD PTR [rdx+40]
  28590. mov r8, QWORD PTR [r10+48]
  28591. mov QWORD PTR [r10+40], rax
  28592. sbb r8, QWORD PTR [rdx+48]
  28593. mov rax, QWORD PTR [r10+56]
  28594. mov QWORD PTR [r10+48], r8
  28595. sbb rax, QWORD PTR [rdx+56]
  28596. mov r8, QWORD PTR [r10+64]
  28597. mov QWORD PTR [r10+56], rax
  28598. sbb r8, QWORD PTR [rdx+64]
  28599. mov rax, QWORD PTR [r10+72]
  28600. mov QWORD PTR [r10+64], r8
  28601. sbb rax, QWORD PTR [rdx+72]
  28602. mov r8, QWORD PTR [r10+80]
  28603. mov QWORD PTR [r10+72], rax
  28604. sbb r8, QWORD PTR [rdx+80]
  28605. mov rax, QWORD PTR [r10+88]
  28606. mov QWORD PTR [r10+80], r8
  28607. sbb rax, QWORD PTR [rdx+88]
  28608. mov QWORD PTR [r10+88], rax
  28609. sbb r9, 0
  28610. mov rcx, QWORD PTR [rsp+192]
  28611. neg r9
  28612. add rcx, 192
  28613. mov r8, QWORD PTR [rcx+-96]
  28614. sub r8, QWORD PTR [r10+-96]
  28615. mov rax, QWORD PTR [rcx+-88]
  28616. mov QWORD PTR [rcx+-96], r8
  28617. sbb rax, QWORD PTR [r10+-88]
  28618. mov r8, QWORD PTR [rcx+-80]
  28619. mov QWORD PTR [rcx+-88], rax
  28620. sbb r8, QWORD PTR [r10+-80]
  28621. mov rax, QWORD PTR [rcx+-72]
  28622. mov QWORD PTR [rcx+-80], r8
  28623. sbb rax, QWORD PTR [r10+-72]
  28624. mov r8, QWORD PTR [rcx+-64]
  28625. mov QWORD PTR [rcx+-72], rax
  28626. sbb r8, QWORD PTR [r10+-64]
  28627. mov rax, QWORD PTR [rcx+-56]
  28628. mov QWORD PTR [rcx+-64], r8
  28629. sbb rax, QWORD PTR [r10+-56]
  28630. mov r8, QWORD PTR [rcx+-48]
  28631. mov QWORD PTR [rcx+-56], rax
  28632. sbb r8, QWORD PTR [r10+-48]
  28633. mov rax, QWORD PTR [rcx+-40]
  28634. mov QWORD PTR [rcx+-48], r8
  28635. sbb rax, QWORD PTR [r10+-40]
  28636. mov r8, QWORD PTR [rcx+-32]
  28637. mov QWORD PTR [rcx+-40], rax
  28638. sbb r8, QWORD PTR [r10+-32]
  28639. mov rax, QWORD PTR [rcx+-24]
  28640. mov QWORD PTR [rcx+-32], r8
  28641. sbb rax, QWORD PTR [r10+-24]
  28642. mov r8, QWORD PTR [rcx+-16]
  28643. mov QWORD PTR [rcx+-24], rax
  28644. sbb r8, QWORD PTR [r10+-16]
  28645. mov rax, QWORD PTR [rcx+-8]
  28646. mov QWORD PTR [rcx+-16], r8
  28647. sbb rax, QWORD PTR [r10+-8]
  28648. mov r8, QWORD PTR [rcx]
  28649. mov QWORD PTR [rcx+-8], rax
  28650. sbb r8, QWORD PTR [r10]
  28651. mov rax, QWORD PTR [rcx+8]
  28652. mov QWORD PTR [rcx], r8
  28653. sbb rax, QWORD PTR [r10+8]
  28654. mov r8, QWORD PTR [rcx+16]
  28655. mov QWORD PTR [rcx+8], rax
  28656. sbb r8, QWORD PTR [r10+16]
  28657. mov rax, QWORD PTR [rcx+24]
  28658. mov QWORD PTR [rcx+16], r8
  28659. sbb rax, QWORD PTR [r10+24]
  28660. mov r8, QWORD PTR [rcx+32]
  28661. mov QWORD PTR [rcx+24], rax
  28662. sbb r8, QWORD PTR [r10+32]
  28663. mov rax, QWORD PTR [rcx+40]
  28664. mov QWORD PTR [rcx+32], r8
  28665. sbb rax, QWORD PTR [r10+40]
  28666. mov r8, QWORD PTR [rcx+48]
  28667. mov QWORD PTR [rcx+40], rax
  28668. sbb r8, QWORD PTR [r10+48]
  28669. mov rax, QWORD PTR [rcx+56]
  28670. mov QWORD PTR [rcx+48], r8
  28671. sbb rax, QWORD PTR [r10+56]
  28672. mov r8, QWORD PTR [rcx+64]
  28673. mov QWORD PTR [rcx+56], rax
  28674. sbb r8, QWORD PTR [r10+64]
  28675. mov rax, QWORD PTR [rcx+72]
  28676. mov QWORD PTR [rcx+64], r8
  28677. sbb rax, QWORD PTR [r10+72]
  28678. mov r8, QWORD PTR [rcx+80]
  28679. mov QWORD PTR [rcx+72], rax
  28680. sbb r8, QWORD PTR [r10+80]
  28681. mov rax, QWORD PTR [rcx+88]
  28682. mov QWORD PTR [rcx+80], r8
  28683. sbb rax, QWORD PTR [r10+88]
  28684. mov QWORD PTR [rcx+88], rax
  28685. sbb r9, 0
  28686. mov rcx, QWORD PTR [rsp+192]
  28687. add rcx, 288
  28688. ; Add in word
  28689. mov r8, QWORD PTR [rcx]
  28690. add r8, r9
  28691. mov rax, QWORD PTR [rcx+8]
  28692. mov QWORD PTR [rcx], r8
  28693. adc rax, 0
  28694. mov r8, QWORD PTR [rcx+16]
  28695. mov QWORD PTR [rcx+8], rax
  28696. adc r8, 0
  28697. mov rax, QWORD PTR [rcx+24]
  28698. mov QWORD PTR [rcx+16], r8
  28699. adc rax, 0
  28700. mov r8, QWORD PTR [rcx+32]
  28701. mov QWORD PTR [rcx+24], rax
  28702. adc r8, 0
  28703. mov rax, QWORD PTR [rcx+40]
  28704. mov QWORD PTR [rcx+32], r8
  28705. adc rax, 0
  28706. mov r8, QWORD PTR [rcx+48]
  28707. mov QWORD PTR [rcx+40], rax
  28708. adc r8, 0
  28709. mov rax, QWORD PTR [rcx+56]
  28710. mov QWORD PTR [rcx+48], r8
  28711. adc rax, 0
  28712. mov r8, QWORD PTR [rcx+64]
  28713. mov QWORD PTR [rcx+56], rax
  28714. adc r8, 0
  28715. mov rax, QWORD PTR [rcx+72]
  28716. mov QWORD PTR [rcx+64], r8
  28717. adc rax, 0
  28718. mov r8, QWORD PTR [rcx+80]
  28719. mov QWORD PTR [rcx+72], rax
  28720. adc r8, 0
  28721. mov rax, QWORD PTR [rcx+88]
  28722. mov QWORD PTR [rcx+80], r8
  28723. adc rax, 0
  28724. mov QWORD PTR [rcx+88], rax
  28725. mov rdx, QWORD PTR [rsp+200]
  28726. mov rcx, QWORD PTR [rsp+192]
  28727. add rsp, 208
  28728. ret
  28729. sp_3072_sqr_avx2_24 ENDP
  28730. _text ENDS
  28731. ENDIF
  28732. ; /* Square a and put result in r. (r = a * a)
  28733. ; *
  28734. ; * Karatsuba: ah^2, al^2, (al - ah)^2
  28735. ; *
  28736. ; * r A single precision integer.
  28737. ; * a A single precision integer.
  28738. ; */
  28739. _text SEGMENT READONLY PARA
  28740. sp_3072_sqr_48 PROC
  28741. sub rsp, 400
  28742. mov QWORD PTR [rsp+384], rcx
  28743. mov QWORD PTR [rsp+392], rdx
  28744. mov r9, 0
  28745. mov r10, rsp
  28746. lea r11, QWORD PTR [rdx+192]
  28747. mov rax, QWORD PTR [rdx]
  28748. sub rax, QWORD PTR [r11]
  28749. mov r8, QWORD PTR [rdx+8]
  28750. mov QWORD PTR [r10], rax
  28751. sbb r8, QWORD PTR [r11+8]
  28752. mov rax, QWORD PTR [rdx+16]
  28753. mov QWORD PTR [r10+8], r8
  28754. sbb rax, QWORD PTR [r11+16]
  28755. mov r8, QWORD PTR [rdx+24]
  28756. mov QWORD PTR [r10+16], rax
  28757. sbb r8, QWORD PTR [r11+24]
  28758. mov rax, QWORD PTR [rdx+32]
  28759. mov QWORD PTR [r10+24], r8
  28760. sbb rax, QWORD PTR [r11+32]
  28761. mov r8, QWORD PTR [rdx+40]
  28762. mov QWORD PTR [r10+32], rax
  28763. sbb r8, QWORD PTR [r11+40]
  28764. mov rax, QWORD PTR [rdx+48]
  28765. mov QWORD PTR [r10+40], r8
  28766. sbb rax, QWORD PTR [r11+48]
  28767. mov r8, QWORD PTR [rdx+56]
  28768. mov QWORD PTR [r10+48], rax
  28769. sbb r8, QWORD PTR [r11+56]
  28770. mov rax, QWORD PTR [rdx+64]
  28771. mov QWORD PTR [r10+56], r8
  28772. sbb rax, QWORD PTR [r11+64]
  28773. mov r8, QWORD PTR [rdx+72]
  28774. mov QWORD PTR [r10+64], rax
  28775. sbb r8, QWORD PTR [r11+72]
  28776. mov rax, QWORD PTR [rdx+80]
  28777. mov QWORD PTR [r10+72], r8
  28778. sbb rax, QWORD PTR [r11+80]
  28779. mov r8, QWORD PTR [rdx+88]
  28780. mov QWORD PTR [r10+80], rax
  28781. sbb r8, QWORD PTR [r11+88]
  28782. mov rax, QWORD PTR [rdx+96]
  28783. mov QWORD PTR [r10+88], r8
  28784. sbb rax, QWORD PTR [r11+96]
  28785. mov r8, QWORD PTR [rdx+104]
  28786. mov QWORD PTR [r10+96], rax
  28787. sbb r8, QWORD PTR [r11+104]
  28788. mov rax, QWORD PTR [rdx+112]
  28789. mov QWORD PTR [r10+104], r8
  28790. sbb rax, QWORD PTR [r11+112]
  28791. mov r8, QWORD PTR [rdx+120]
  28792. mov QWORD PTR [r10+112], rax
  28793. sbb r8, QWORD PTR [r11+120]
  28794. mov rax, QWORD PTR [rdx+128]
  28795. mov QWORD PTR [r10+120], r8
  28796. sbb rax, QWORD PTR [r11+128]
  28797. mov r8, QWORD PTR [rdx+136]
  28798. mov QWORD PTR [r10+128], rax
  28799. sbb r8, QWORD PTR [r11+136]
  28800. mov rax, QWORD PTR [rdx+144]
  28801. mov QWORD PTR [r10+136], r8
  28802. sbb rax, QWORD PTR [r11+144]
  28803. mov r8, QWORD PTR [rdx+152]
  28804. mov QWORD PTR [r10+144], rax
  28805. sbb r8, QWORD PTR [r11+152]
  28806. mov rax, QWORD PTR [rdx+160]
  28807. mov QWORD PTR [r10+152], r8
  28808. sbb rax, QWORD PTR [r11+160]
  28809. mov r8, QWORD PTR [rdx+168]
  28810. mov QWORD PTR [r10+160], rax
  28811. sbb r8, QWORD PTR [r11+168]
  28812. mov rax, QWORD PTR [rdx+176]
  28813. mov QWORD PTR [r10+168], r8
  28814. sbb rax, QWORD PTR [r11+176]
  28815. mov r8, QWORD PTR [rdx+184]
  28816. mov QWORD PTR [r10+176], rax
  28817. sbb r8, QWORD PTR [r11+184]
  28818. mov QWORD PTR [r10+184], r8
  28819. sbb r9, 0
  28820. ; Cond Negate
  28821. mov rax, QWORD PTR [r10]
  28822. mov r11, r9
  28823. xor rax, r9
  28824. neg r11
  28825. sub rax, r9
  28826. mov r8, QWORD PTR [r10+8]
  28827. sbb r11, 0
  28828. mov QWORD PTR [r10], rax
  28829. xor r8, r9
  28830. add r8, r11
  28831. mov rax, QWORD PTR [r10+16]
  28832. setc r11b
  28833. mov QWORD PTR [r10+8], r8
  28834. xor rax, r9
  28835. add rax, r11
  28836. mov r8, QWORD PTR [r10+24]
  28837. setc r11b
  28838. mov QWORD PTR [r10+16], rax
  28839. xor r8, r9
  28840. add r8, r11
  28841. mov rax, QWORD PTR [r10+32]
  28842. setc r11b
  28843. mov QWORD PTR [r10+24], r8
  28844. xor rax, r9
  28845. add rax, r11
  28846. mov r8, QWORD PTR [r10+40]
  28847. setc r11b
  28848. mov QWORD PTR [r10+32], rax
  28849. xor r8, r9
  28850. add r8, r11
  28851. mov rax, QWORD PTR [r10+48]
  28852. setc r11b
  28853. mov QWORD PTR [r10+40], r8
  28854. xor rax, r9
  28855. add rax, r11
  28856. mov r8, QWORD PTR [r10+56]
  28857. setc r11b
  28858. mov QWORD PTR [r10+48], rax
  28859. xor r8, r9
  28860. add r8, r11
  28861. mov rax, QWORD PTR [r10+64]
  28862. setc r11b
  28863. mov QWORD PTR [r10+56], r8
  28864. xor rax, r9
  28865. add rax, r11
  28866. mov r8, QWORD PTR [r10+72]
  28867. setc r11b
  28868. mov QWORD PTR [r10+64], rax
  28869. xor r8, r9
  28870. add r8, r11
  28871. mov rax, QWORD PTR [r10+80]
  28872. setc r11b
  28873. mov QWORD PTR [r10+72], r8
  28874. xor rax, r9
  28875. add rax, r11
  28876. mov r8, QWORD PTR [r10+88]
  28877. setc r11b
  28878. mov QWORD PTR [r10+80], rax
  28879. xor r8, r9
  28880. add r8, r11
  28881. mov rax, QWORD PTR [r10+96]
  28882. setc r11b
  28883. mov QWORD PTR [r10+88], r8
  28884. xor rax, r9
  28885. add rax, r11
  28886. mov r8, QWORD PTR [r10+104]
  28887. setc r11b
  28888. mov QWORD PTR [r10+96], rax
  28889. xor r8, r9
  28890. add r8, r11
  28891. mov rax, QWORD PTR [r10+112]
  28892. setc r11b
  28893. mov QWORD PTR [r10+104], r8
  28894. xor rax, r9
  28895. add rax, r11
  28896. mov r8, QWORD PTR [r10+120]
  28897. setc r11b
  28898. mov QWORD PTR [r10+112], rax
  28899. xor r8, r9
  28900. add r8, r11
  28901. mov rax, QWORD PTR [r10+128]
  28902. setc r11b
  28903. mov QWORD PTR [r10+120], r8
  28904. xor rax, r9
  28905. add rax, r11
  28906. mov r8, QWORD PTR [r10+136]
  28907. setc r11b
  28908. mov QWORD PTR [r10+128], rax
  28909. xor r8, r9
  28910. add r8, r11
  28911. mov rax, QWORD PTR [r10+144]
  28912. setc r11b
  28913. mov QWORD PTR [r10+136], r8
  28914. xor rax, r9
  28915. add rax, r11
  28916. mov r8, QWORD PTR [r10+152]
  28917. setc r11b
  28918. mov QWORD PTR [r10+144], rax
  28919. xor r8, r9
  28920. add r8, r11
  28921. mov rax, QWORD PTR [r10+160]
  28922. setc r11b
  28923. mov QWORD PTR [r10+152], r8
  28924. xor rax, r9
  28925. add rax, r11
  28926. mov r8, QWORD PTR [r10+168]
  28927. setc r11b
  28928. mov QWORD PTR [r10+160], rax
  28929. xor r8, r9
  28930. add r8, r11
  28931. mov rax, QWORD PTR [r10+176]
  28932. setc r11b
  28933. mov QWORD PTR [r10+168], r8
  28934. xor rax, r9
  28935. add rax, r11
  28936. mov r8, QWORD PTR [r10+184]
  28937. setc r11b
  28938. mov QWORD PTR [r10+176], rax
  28939. xor r8, r9
  28940. add r8, r11
  28941. mov QWORD PTR [r10+184], r8
  28942. mov rdx, r10
  28943. mov rcx, rsp
  28944. call sp_3072_sqr_24
  28945. mov rdx, QWORD PTR [rsp+392]
  28946. mov rcx, QWORD PTR [rsp+384]
  28947. add rdx, 192
  28948. add rcx, 384
  28949. call sp_3072_sqr_24
  28950. mov rdx, QWORD PTR [rsp+392]
  28951. mov rcx, QWORD PTR [rsp+384]
  28952. call sp_3072_sqr_24
  28953. IFDEF _WIN64
  28954. mov rdx, QWORD PTR [rsp+392]
  28955. mov rcx, QWORD PTR [rsp+384]
  28956. ENDIF
  28957. mov rdx, QWORD PTR [rsp+384]
  28958. lea r10, QWORD PTR [rsp+192]
  28959. add rdx, 576
  28960. mov r9, 0
  28961. mov r8, QWORD PTR [r10+-192]
  28962. sub r8, QWORD PTR [rdx+-192]
  28963. mov rax, QWORD PTR [r10+-184]
  28964. mov QWORD PTR [r10+-192], r8
  28965. sbb rax, QWORD PTR [rdx+-184]
  28966. mov r8, QWORD PTR [r10+-176]
  28967. mov QWORD PTR [r10+-184], rax
  28968. sbb r8, QWORD PTR [rdx+-176]
  28969. mov rax, QWORD PTR [r10+-168]
  28970. mov QWORD PTR [r10+-176], r8
  28971. sbb rax, QWORD PTR [rdx+-168]
  28972. mov r8, QWORD PTR [r10+-160]
  28973. mov QWORD PTR [r10+-168], rax
  28974. sbb r8, QWORD PTR [rdx+-160]
  28975. mov rax, QWORD PTR [r10+-152]
  28976. mov QWORD PTR [r10+-160], r8
  28977. sbb rax, QWORD PTR [rdx+-152]
  28978. mov r8, QWORD PTR [r10+-144]
  28979. mov QWORD PTR [r10+-152], rax
  28980. sbb r8, QWORD PTR [rdx+-144]
  28981. mov rax, QWORD PTR [r10+-136]
  28982. mov QWORD PTR [r10+-144], r8
  28983. sbb rax, QWORD PTR [rdx+-136]
  28984. mov r8, QWORD PTR [r10+-128]
  28985. mov QWORD PTR [r10+-136], rax
  28986. sbb r8, QWORD PTR [rdx+-128]
  28987. mov rax, QWORD PTR [r10+-120]
  28988. mov QWORD PTR [r10+-128], r8
  28989. sbb rax, QWORD PTR [rdx+-120]
  28990. mov r8, QWORD PTR [r10+-112]
  28991. mov QWORD PTR [r10+-120], rax
  28992. sbb r8, QWORD PTR [rdx+-112]
  28993. mov rax, QWORD PTR [r10+-104]
  28994. mov QWORD PTR [r10+-112], r8
  28995. sbb rax, QWORD PTR [rdx+-104]
  28996. mov r8, QWORD PTR [r10+-96]
  28997. mov QWORD PTR [r10+-104], rax
  28998. sbb r8, QWORD PTR [rdx+-96]
  28999. mov rax, QWORD PTR [r10+-88]
  29000. mov QWORD PTR [r10+-96], r8
  29001. sbb rax, QWORD PTR [rdx+-88]
  29002. mov r8, QWORD PTR [r10+-80]
  29003. mov QWORD PTR [r10+-88], rax
  29004. sbb r8, QWORD PTR [rdx+-80]
  29005. mov rax, QWORD PTR [r10+-72]
  29006. mov QWORD PTR [r10+-80], r8
  29007. sbb rax, QWORD PTR [rdx+-72]
  29008. mov r8, QWORD PTR [r10+-64]
  29009. mov QWORD PTR [r10+-72], rax
  29010. sbb r8, QWORD PTR [rdx+-64]
  29011. mov rax, QWORD PTR [r10+-56]
  29012. mov QWORD PTR [r10+-64], r8
  29013. sbb rax, QWORD PTR [rdx+-56]
  29014. mov r8, QWORD PTR [r10+-48]
  29015. mov QWORD PTR [r10+-56], rax
  29016. sbb r8, QWORD PTR [rdx+-48]
  29017. mov rax, QWORD PTR [r10+-40]
  29018. mov QWORD PTR [r10+-48], r8
  29019. sbb rax, QWORD PTR [rdx+-40]
  29020. mov r8, QWORD PTR [r10+-32]
  29021. mov QWORD PTR [r10+-40], rax
  29022. sbb r8, QWORD PTR [rdx+-32]
  29023. mov rax, QWORD PTR [r10+-24]
  29024. mov QWORD PTR [r10+-32], r8
  29025. sbb rax, QWORD PTR [rdx+-24]
  29026. mov r8, QWORD PTR [r10+-16]
  29027. mov QWORD PTR [r10+-24], rax
  29028. sbb r8, QWORD PTR [rdx+-16]
  29029. mov rax, QWORD PTR [r10+-8]
  29030. mov QWORD PTR [r10+-16], r8
  29031. sbb rax, QWORD PTR [rdx+-8]
  29032. mov r8, QWORD PTR [r10]
  29033. mov QWORD PTR [r10+-8], rax
  29034. sbb r8, QWORD PTR [rdx]
  29035. mov rax, QWORD PTR [r10+8]
  29036. mov QWORD PTR [r10], r8
  29037. sbb rax, QWORD PTR [rdx+8]
  29038. mov r8, QWORD PTR [r10+16]
  29039. mov QWORD PTR [r10+8], rax
  29040. sbb r8, QWORD PTR [rdx+16]
  29041. mov rax, QWORD PTR [r10+24]
  29042. mov QWORD PTR [r10+16], r8
  29043. sbb rax, QWORD PTR [rdx+24]
  29044. mov r8, QWORD PTR [r10+32]
  29045. mov QWORD PTR [r10+24], rax
  29046. sbb r8, QWORD PTR [rdx+32]
  29047. mov rax, QWORD PTR [r10+40]
  29048. mov QWORD PTR [r10+32], r8
  29049. sbb rax, QWORD PTR [rdx+40]
  29050. mov r8, QWORD PTR [r10+48]
  29051. mov QWORD PTR [r10+40], rax
  29052. sbb r8, QWORD PTR [rdx+48]
  29053. mov rax, QWORD PTR [r10+56]
  29054. mov QWORD PTR [r10+48], r8
  29055. sbb rax, QWORD PTR [rdx+56]
  29056. mov r8, QWORD PTR [r10+64]
  29057. mov QWORD PTR [r10+56], rax
  29058. sbb r8, QWORD PTR [rdx+64]
  29059. mov rax, QWORD PTR [r10+72]
  29060. mov QWORD PTR [r10+64], r8
  29061. sbb rax, QWORD PTR [rdx+72]
  29062. mov r8, QWORD PTR [r10+80]
  29063. mov QWORD PTR [r10+72], rax
  29064. sbb r8, QWORD PTR [rdx+80]
  29065. mov rax, QWORD PTR [r10+88]
  29066. mov QWORD PTR [r10+80], r8
  29067. sbb rax, QWORD PTR [rdx+88]
  29068. mov r8, QWORD PTR [r10+96]
  29069. mov QWORD PTR [r10+88], rax
  29070. sbb r8, QWORD PTR [rdx+96]
  29071. mov rax, QWORD PTR [r10+104]
  29072. mov QWORD PTR [r10+96], r8
  29073. sbb rax, QWORD PTR [rdx+104]
  29074. mov r8, QWORD PTR [r10+112]
  29075. mov QWORD PTR [r10+104], rax
  29076. sbb r8, QWORD PTR [rdx+112]
  29077. mov rax, QWORD PTR [r10+120]
  29078. mov QWORD PTR [r10+112], r8
  29079. sbb rax, QWORD PTR [rdx+120]
  29080. mov r8, QWORD PTR [r10+128]
  29081. mov QWORD PTR [r10+120], rax
  29082. sbb r8, QWORD PTR [rdx+128]
  29083. mov rax, QWORD PTR [r10+136]
  29084. mov QWORD PTR [r10+128], r8
  29085. sbb rax, QWORD PTR [rdx+136]
  29086. mov r8, QWORD PTR [r10+144]
  29087. mov QWORD PTR [r10+136], rax
  29088. sbb r8, QWORD PTR [rdx+144]
  29089. mov rax, QWORD PTR [r10+152]
  29090. mov QWORD PTR [r10+144], r8
  29091. sbb rax, QWORD PTR [rdx+152]
  29092. mov r8, QWORD PTR [r10+160]
  29093. mov QWORD PTR [r10+152], rax
  29094. sbb r8, QWORD PTR [rdx+160]
  29095. mov rax, QWORD PTR [r10+168]
  29096. mov QWORD PTR [r10+160], r8
  29097. sbb rax, QWORD PTR [rdx+168]
  29098. mov r8, QWORD PTR [r10+176]
  29099. mov QWORD PTR [r10+168], rax
  29100. sbb r8, QWORD PTR [rdx+176]
  29101. mov rax, QWORD PTR [r10+184]
  29102. mov QWORD PTR [r10+176], r8
  29103. sbb rax, QWORD PTR [rdx+184]
  29104. mov QWORD PTR [r10+184], rax
  29105. sbb r9, 0
  29106. sub rdx, 384
  29107. mov r8, QWORD PTR [r10+-192]
  29108. sub r8, QWORD PTR [rdx+-192]
  29109. mov rax, QWORD PTR [r10+-184]
  29110. mov QWORD PTR [r10+-192], r8
  29111. sbb rax, QWORD PTR [rdx+-184]
  29112. mov r8, QWORD PTR [r10+-176]
  29113. mov QWORD PTR [r10+-184], rax
  29114. sbb r8, QWORD PTR [rdx+-176]
  29115. mov rax, QWORD PTR [r10+-168]
  29116. mov QWORD PTR [r10+-176], r8
  29117. sbb rax, QWORD PTR [rdx+-168]
  29118. mov r8, QWORD PTR [r10+-160]
  29119. mov QWORD PTR [r10+-168], rax
  29120. sbb r8, QWORD PTR [rdx+-160]
  29121. mov rax, QWORD PTR [r10+-152]
  29122. mov QWORD PTR [r10+-160], r8
  29123. sbb rax, QWORD PTR [rdx+-152]
  29124. mov r8, QWORD PTR [r10+-144]
  29125. mov QWORD PTR [r10+-152], rax
  29126. sbb r8, QWORD PTR [rdx+-144]
  29127. mov rax, QWORD PTR [r10+-136]
  29128. mov QWORD PTR [r10+-144], r8
  29129. sbb rax, QWORD PTR [rdx+-136]
  29130. mov r8, QWORD PTR [r10+-128]
  29131. mov QWORD PTR [r10+-136], rax
  29132. sbb r8, QWORD PTR [rdx+-128]
  29133. mov rax, QWORD PTR [r10+-120]
  29134. mov QWORD PTR [r10+-128], r8
  29135. sbb rax, QWORD PTR [rdx+-120]
  29136. mov r8, QWORD PTR [r10+-112]
  29137. mov QWORD PTR [r10+-120], rax
  29138. sbb r8, QWORD PTR [rdx+-112]
  29139. mov rax, QWORD PTR [r10+-104]
  29140. mov QWORD PTR [r10+-112], r8
  29141. sbb rax, QWORD PTR [rdx+-104]
  29142. mov r8, QWORD PTR [r10+-96]
  29143. mov QWORD PTR [r10+-104], rax
  29144. sbb r8, QWORD PTR [rdx+-96]
  29145. mov rax, QWORD PTR [r10+-88]
  29146. mov QWORD PTR [r10+-96], r8
  29147. sbb rax, QWORD PTR [rdx+-88]
  29148. mov r8, QWORD PTR [r10+-80]
  29149. mov QWORD PTR [r10+-88], rax
  29150. sbb r8, QWORD PTR [rdx+-80]
  29151. mov rax, QWORD PTR [r10+-72]
  29152. mov QWORD PTR [r10+-80], r8
  29153. sbb rax, QWORD PTR [rdx+-72]
  29154. mov r8, QWORD PTR [r10+-64]
  29155. mov QWORD PTR [r10+-72], rax
  29156. sbb r8, QWORD PTR [rdx+-64]
  29157. mov rax, QWORD PTR [r10+-56]
  29158. mov QWORD PTR [r10+-64], r8
  29159. sbb rax, QWORD PTR [rdx+-56]
  29160. mov r8, QWORD PTR [r10+-48]
  29161. mov QWORD PTR [r10+-56], rax
  29162. sbb r8, QWORD PTR [rdx+-48]
  29163. mov rax, QWORD PTR [r10+-40]
  29164. mov QWORD PTR [r10+-48], r8
  29165. sbb rax, QWORD PTR [rdx+-40]
  29166. mov r8, QWORD PTR [r10+-32]
  29167. mov QWORD PTR [r10+-40], rax
  29168. sbb r8, QWORD PTR [rdx+-32]
  29169. mov rax, QWORD PTR [r10+-24]
  29170. mov QWORD PTR [r10+-32], r8
  29171. sbb rax, QWORD PTR [rdx+-24]
  29172. mov r8, QWORD PTR [r10+-16]
  29173. mov QWORD PTR [r10+-24], rax
  29174. sbb r8, QWORD PTR [rdx+-16]
  29175. mov rax, QWORD PTR [r10+-8]
  29176. mov QWORD PTR [r10+-16], r8
  29177. sbb rax, QWORD PTR [rdx+-8]
  29178. mov r8, QWORD PTR [r10]
  29179. mov QWORD PTR [r10+-8], rax
  29180. sbb r8, QWORD PTR [rdx]
  29181. mov rax, QWORD PTR [r10+8]
  29182. mov QWORD PTR [r10], r8
  29183. sbb rax, QWORD PTR [rdx+8]
  29184. mov r8, QWORD PTR [r10+16]
  29185. mov QWORD PTR [r10+8], rax
  29186. sbb r8, QWORD PTR [rdx+16]
  29187. mov rax, QWORD PTR [r10+24]
  29188. mov QWORD PTR [r10+16], r8
  29189. sbb rax, QWORD PTR [rdx+24]
  29190. mov r8, QWORD PTR [r10+32]
  29191. mov QWORD PTR [r10+24], rax
  29192. sbb r8, QWORD PTR [rdx+32]
  29193. mov rax, QWORD PTR [r10+40]
  29194. mov QWORD PTR [r10+32], r8
  29195. sbb rax, QWORD PTR [rdx+40]
  29196. mov r8, QWORD PTR [r10+48]
  29197. mov QWORD PTR [r10+40], rax
  29198. sbb r8, QWORD PTR [rdx+48]
  29199. mov rax, QWORD PTR [r10+56]
  29200. mov QWORD PTR [r10+48], r8
  29201. sbb rax, QWORD PTR [rdx+56]
  29202. mov r8, QWORD PTR [r10+64]
  29203. mov QWORD PTR [r10+56], rax
  29204. sbb r8, QWORD PTR [rdx+64]
  29205. mov rax, QWORD PTR [r10+72]
  29206. mov QWORD PTR [r10+64], r8
  29207. sbb rax, QWORD PTR [rdx+72]
  29208. mov r8, QWORD PTR [r10+80]
  29209. mov QWORD PTR [r10+72], rax
  29210. sbb r8, QWORD PTR [rdx+80]
  29211. mov rax, QWORD PTR [r10+88]
  29212. mov QWORD PTR [r10+80], r8
  29213. sbb rax, QWORD PTR [rdx+88]
  29214. mov r8, QWORD PTR [r10+96]
  29215. mov QWORD PTR [r10+88], rax
  29216. sbb r8, QWORD PTR [rdx+96]
  29217. mov rax, QWORD PTR [r10+104]
  29218. mov QWORD PTR [r10+96], r8
  29219. sbb rax, QWORD PTR [rdx+104]
  29220. mov r8, QWORD PTR [r10+112]
  29221. mov QWORD PTR [r10+104], rax
  29222. sbb r8, QWORD PTR [rdx+112]
  29223. mov rax, QWORD PTR [r10+120]
  29224. mov QWORD PTR [r10+112], r8
  29225. sbb rax, QWORD PTR [rdx+120]
  29226. mov r8, QWORD PTR [r10+128]
  29227. mov QWORD PTR [r10+120], rax
  29228. sbb r8, QWORD PTR [rdx+128]
  29229. mov rax, QWORD PTR [r10+136]
  29230. mov QWORD PTR [r10+128], r8
  29231. sbb rax, QWORD PTR [rdx+136]
  29232. mov r8, QWORD PTR [r10+144]
  29233. mov QWORD PTR [r10+136], rax
  29234. sbb r8, QWORD PTR [rdx+144]
  29235. mov rax, QWORD PTR [r10+152]
  29236. mov QWORD PTR [r10+144], r8
  29237. sbb rax, QWORD PTR [rdx+152]
  29238. mov r8, QWORD PTR [r10+160]
  29239. mov QWORD PTR [r10+152], rax
  29240. sbb r8, QWORD PTR [rdx+160]
  29241. mov rax, QWORD PTR [r10+168]
  29242. mov QWORD PTR [r10+160], r8
  29243. sbb rax, QWORD PTR [rdx+168]
  29244. mov r8, QWORD PTR [r10+176]
  29245. mov QWORD PTR [r10+168], rax
  29246. sbb r8, QWORD PTR [rdx+176]
  29247. mov rax, QWORD PTR [r10+184]
  29248. mov QWORD PTR [r10+176], r8
  29249. sbb rax, QWORD PTR [rdx+184]
  29250. mov QWORD PTR [r10+184], rax
  29251. sbb r9, 0
  29252. mov rcx, QWORD PTR [rsp+384]
  29253. neg r9
  29254. add rcx, 384
  29255. mov r8, QWORD PTR [rcx+-192]
  29256. sub r8, QWORD PTR [r10+-192]
  29257. mov rax, QWORD PTR [rcx+-184]
  29258. mov QWORD PTR [rcx+-192], r8
  29259. sbb rax, QWORD PTR [r10+-184]
  29260. mov r8, QWORD PTR [rcx+-176]
  29261. mov QWORD PTR [rcx+-184], rax
  29262. sbb r8, QWORD PTR [r10+-176]
  29263. mov rax, QWORD PTR [rcx+-168]
  29264. mov QWORD PTR [rcx+-176], r8
  29265. sbb rax, QWORD PTR [r10+-168]
  29266. mov r8, QWORD PTR [rcx+-160]
  29267. mov QWORD PTR [rcx+-168], rax
  29268. sbb r8, QWORD PTR [r10+-160]
  29269. mov rax, QWORD PTR [rcx+-152]
  29270. mov QWORD PTR [rcx+-160], r8
  29271. sbb rax, QWORD PTR [r10+-152]
  29272. mov r8, QWORD PTR [rcx+-144]
  29273. mov QWORD PTR [rcx+-152], rax
  29274. sbb r8, QWORD PTR [r10+-144]
  29275. mov rax, QWORD PTR [rcx+-136]
  29276. mov QWORD PTR [rcx+-144], r8
  29277. sbb rax, QWORD PTR [r10+-136]
  29278. mov r8, QWORD PTR [rcx+-128]
  29279. mov QWORD PTR [rcx+-136], rax
  29280. sbb r8, QWORD PTR [r10+-128]
  29281. mov rax, QWORD PTR [rcx+-120]
  29282. mov QWORD PTR [rcx+-128], r8
  29283. sbb rax, QWORD PTR [r10+-120]
  29284. mov r8, QWORD PTR [rcx+-112]
  29285. mov QWORD PTR [rcx+-120], rax
  29286. sbb r8, QWORD PTR [r10+-112]
  29287. mov rax, QWORD PTR [rcx+-104]
  29288. mov QWORD PTR [rcx+-112], r8
  29289. sbb rax, QWORD PTR [r10+-104]
  29290. mov r8, QWORD PTR [rcx+-96]
  29291. mov QWORD PTR [rcx+-104], rax
  29292. sbb r8, QWORD PTR [r10+-96]
  29293. mov rax, QWORD PTR [rcx+-88]
  29294. mov QWORD PTR [rcx+-96], r8
  29295. sbb rax, QWORD PTR [r10+-88]
  29296. mov r8, QWORD PTR [rcx+-80]
  29297. mov QWORD PTR [rcx+-88], rax
  29298. sbb r8, QWORD PTR [r10+-80]
  29299. mov rax, QWORD PTR [rcx+-72]
  29300. mov QWORD PTR [rcx+-80], r8
  29301. sbb rax, QWORD PTR [r10+-72]
  29302. mov r8, QWORD PTR [rcx+-64]
  29303. mov QWORD PTR [rcx+-72], rax
  29304. sbb r8, QWORD PTR [r10+-64]
  29305. mov rax, QWORD PTR [rcx+-56]
  29306. mov QWORD PTR [rcx+-64], r8
  29307. sbb rax, QWORD PTR [r10+-56]
  29308. mov r8, QWORD PTR [rcx+-48]
  29309. mov QWORD PTR [rcx+-56], rax
  29310. sbb r8, QWORD PTR [r10+-48]
  29311. mov rax, QWORD PTR [rcx+-40]
  29312. mov QWORD PTR [rcx+-48], r8
  29313. sbb rax, QWORD PTR [r10+-40]
  29314. mov r8, QWORD PTR [rcx+-32]
  29315. mov QWORD PTR [rcx+-40], rax
  29316. sbb r8, QWORD PTR [r10+-32]
  29317. mov rax, QWORD PTR [rcx+-24]
  29318. mov QWORD PTR [rcx+-32], r8
  29319. sbb rax, QWORD PTR [r10+-24]
  29320. mov r8, QWORD PTR [rcx+-16]
  29321. mov QWORD PTR [rcx+-24], rax
  29322. sbb r8, QWORD PTR [r10+-16]
  29323. mov rax, QWORD PTR [rcx+-8]
  29324. mov QWORD PTR [rcx+-16], r8
  29325. sbb rax, QWORD PTR [r10+-8]
  29326. mov r8, QWORD PTR [rcx]
  29327. mov QWORD PTR [rcx+-8], rax
  29328. sbb r8, QWORD PTR [r10]
  29329. mov rax, QWORD PTR [rcx+8]
  29330. mov QWORD PTR [rcx], r8
  29331. sbb rax, QWORD PTR [r10+8]
  29332. mov r8, QWORD PTR [rcx+16]
  29333. mov QWORD PTR [rcx+8], rax
  29334. sbb r8, QWORD PTR [r10+16]
  29335. mov rax, QWORD PTR [rcx+24]
  29336. mov QWORD PTR [rcx+16], r8
  29337. sbb rax, QWORD PTR [r10+24]
  29338. mov r8, QWORD PTR [rcx+32]
  29339. mov QWORD PTR [rcx+24], rax
  29340. sbb r8, QWORD PTR [r10+32]
  29341. mov rax, QWORD PTR [rcx+40]
  29342. mov QWORD PTR [rcx+32], r8
  29343. sbb rax, QWORD PTR [r10+40]
  29344. mov r8, QWORD PTR [rcx+48]
  29345. mov QWORD PTR [rcx+40], rax
  29346. sbb r8, QWORD PTR [r10+48]
  29347. mov rax, QWORD PTR [rcx+56]
  29348. mov QWORD PTR [rcx+48], r8
  29349. sbb rax, QWORD PTR [r10+56]
  29350. mov r8, QWORD PTR [rcx+64]
  29351. mov QWORD PTR [rcx+56], rax
  29352. sbb r8, QWORD PTR [r10+64]
  29353. mov rax, QWORD PTR [rcx+72]
  29354. mov QWORD PTR [rcx+64], r8
  29355. sbb rax, QWORD PTR [r10+72]
  29356. mov r8, QWORD PTR [rcx+80]
  29357. mov QWORD PTR [rcx+72], rax
  29358. sbb r8, QWORD PTR [r10+80]
  29359. mov rax, QWORD PTR [rcx+88]
  29360. mov QWORD PTR [rcx+80], r8
  29361. sbb rax, QWORD PTR [r10+88]
  29362. mov r8, QWORD PTR [rcx+96]
  29363. mov QWORD PTR [rcx+88], rax
  29364. sbb r8, QWORD PTR [r10+96]
  29365. mov rax, QWORD PTR [rcx+104]
  29366. mov QWORD PTR [rcx+96], r8
  29367. sbb rax, QWORD PTR [r10+104]
  29368. mov r8, QWORD PTR [rcx+112]
  29369. mov QWORD PTR [rcx+104], rax
  29370. sbb r8, QWORD PTR [r10+112]
  29371. mov rax, QWORD PTR [rcx+120]
  29372. mov QWORD PTR [rcx+112], r8
  29373. sbb rax, QWORD PTR [r10+120]
  29374. mov r8, QWORD PTR [rcx+128]
  29375. mov QWORD PTR [rcx+120], rax
  29376. sbb r8, QWORD PTR [r10+128]
  29377. mov rax, QWORD PTR [rcx+136]
  29378. mov QWORD PTR [rcx+128], r8
  29379. sbb rax, QWORD PTR [r10+136]
  29380. mov r8, QWORD PTR [rcx+144]
  29381. mov QWORD PTR [rcx+136], rax
  29382. sbb r8, QWORD PTR [r10+144]
  29383. mov rax, QWORD PTR [rcx+152]
  29384. mov QWORD PTR [rcx+144], r8
  29385. sbb rax, QWORD PTR [r10+152]
  29386. mov r8, QWORD PTR [rcx+160]
  29387. mov QWORD PTR [rcx+152], rax
  29388. sbb r8, QWORD PTR [r10+160]
  29389. mov rax, QWORD PTR [rcx+168]
  29390. mov QWORD PTR [rcx+160], r8
  29391. sbb rax, QWORD PTR [r10+168]
  29392. mov r8, QWORD PTR [rcx+176]
  29393. mov QWORD PTR [rcx+168], rax
  29394. sbb r8, QWORD PTR [r10+176]
  29395. mov rax, QWORD PTR [rcx+184]
  29396. mov QWORD PTR [rcx+176], r8
  29397. sbb rax, QWORD PTR [r10+184]
  29398. mov QWORD PTR [rcx+184], rax
  29399. sbb r9, 0
  29400. mov rcx, QWORD PTR [rsp+384]
  29401. add rcx, 576
  29402. ; Add in word
  29403. mov r8, QWORD PTR [rcx]
  29404. add r8, r9
  29405. mov rax, QWORD PTR [rcx+8]
  29406. mov QWORD PTR [rcx], r8
  29407. adc rax, 0
  29408. mov r8, QWORD PTR [rcx+16]
  29409. mov QWORD PTR [rcx+8], rax
  29410. adc r8, 0
  29411. mov rax, QWORD PTR [rcx+24]
  29412. mov QWORD PTR [rcx+16], r8
  29413. adc rax, 0
  29414. mov r8, QWORD PTR [rcx+32]
  29415. mov QWORD PTR [rcx+24], rax
  29416. adc r8, 0
  29417. mov rax, QWORD PTR [rcx+40]
  29418. mov QWORD PTR [rcx+32], r8
  29419. adc rax, 0
  29420. mov r8, QWORD PTR [rcx+48]
  29421. mov QWORD PTR [rcx+40], rax
  29422. adc r8, 0
  29423. mov rax, QWORD PTR [rcx+56]
  29424. mov QWORD PTR [rcx+48], r8
  29425. adc rax, 0
  29426. mov r8, QWORD PTR [rcx+64]
  29427. mov QWORD PTR [rcx+56], rax
  29428. adc r8, 0
  29429. mov rax, QWORD PTR [rcx+72]
  29430. mov QWORD PTR [rcx+64], r8
  29431. adc rax, 0
  29432. mov r8, QWORD PTR [rcx+80]
  29433. mov QWORD PTR [rcx+72], rax
  29434. adc r8, 0
  29435. mov rax, QWORD PTR [rcx+88]
  29436. mov QWORD PTR [rcx+80], r8
  29437. adc rax, 0
  29438. mov r8, QWORD PTR [rcx+96]
  29439. mov QWORD PTR [rcx+88], rax
  29440. adc r8, 0
  29441. mov rax, QWORD PTR [rcx+104]
  29442. mov QWORD PTR [rcx+96], r8
  29443. adc rax, 0
  29444. mov r8, QWORD PTR [rcx+112]
  29445. mov QWORD PTR [rcx+104], rax
  29446. adc r8, 0
  29447. mov rax, QWORD PTR [rcx+120]
  29448. mov QWORD PTR [rcx+112], r8
  29449. adc rax, 0
  29450. mov r8, QWORD PTR [rcx+128]
  29451. mov QWORD PTR [rcx+120], rax
  29452. adc r8, 0
  29453. mov rax, QWORD PTR [rcx+136]
  29454. mov QWORD PTR [rcx+128], r8
  29455. adc rax, 0
  29456. mov r8, QWORD PTR [rcx+144]
  29457. mov QWORD PTR [rcx+136], rax
  29458. adc r8, 0
  29459. mov rax, QWORD PTR [rcx+152]
  29460. mov QWORD PTR [rcx+144], r8
  29461. adc rax, 0
  29462. mov r8, QWORD PTR [rcx+160]
  29463. mov QWORD PTR [rcx+152], rax
  29464. adc r8, 0
  29465. mov rax, QWORD PTR [rcx+168]
  29466. mov QWORD PTR [rcx+160], r8
  29467. adc rax, 0
  29468. mov r8, QWORD PTR [rcx+176]
  29469. mov QWORD PTR [rcx+168], rax
  29470. adc r8, 0
  29471. mov rax, QWORD PTR [rcx+184]
  29472. mov QWORD PTR [rcx+176], r8
  29473. adc rax, 0
  29474. mov QWORD PTR [rcx+184], rax
  29475. mov rdx, QWORD PTR [rsp+392]
  29476. mov rcx, QWORD PTR [rsp+384]
  29477. add rsp, 400
  29478. ret
  29479. sp_3072_sqr_48 ENDP
  29480. _text ENDS
  29481. IFDEF HAVE_INTEL_AVX2
  29482. ; /* Square a and put result in r. (r = a * a)
  29483. ; *
  29484. ; * Karatsuba: ah^2, al^2, (al - ah)^2
  29485. ; *
  29486. ; * r A single precision integer.
  29487. ; * a A single precision integer.
  29488. ; */
  29489. _text SEGMENT READONLY PARA
  29490. sp_3072_sqr_avx2_48 PROC
  29491. sub rsp, 400
  29492. mov QWORD PTR [rsp+384], rcx
  29493. mov QWORD PTR [rsp+392], rdx
  29494. mov r9, 0
  29495. mov r10, rsp
  29496. lea r11, QWORD PTR [rdx+192]
  29497. mov rax, QWORD PTR [rdx]
  29498. sub rax, QWORD PTR [r11]
  29499. mov r8, QWORD PTR [rdx+8]
  29500. mov QWORD PTR [r10], rax
  29501. sbb r8, QWORD PTR [r11+8]
  29502. mov rax, QWORD PTR [rdx+16]
  29503. mov QWORD PTR [r10+8], r8
  29504. sbb rax, QWORD PTR [r11+16]
  29505. mov r8, QWORD PTR [rdx+24]
  29506. mov QWORD PTR [r10+16], rax
  29507. sbb r8, QWORD PTR [r11+24]
  29508. mov rax, QWORD PTR [rdx+32]
  29509. mov QWORD PTR [r10+24], r8
  29510. sbb rax, QWORD PTR [r11+32]
  29511. mov r8, QWORD PTR [rdx+40]
  29512. mov QWORD PTR [r10+32], rax
  29513. sbb r8, QWORD PTR [r11+40]
  29514. mov rax, QWORD PTR [rdx+48]
  29515. mov QWORD PTR [r10+40], r8
  29516. sbb rax, QWORD PTR [r11+48]
  29517. mov r8, QWORD PTR [rdx+56]
  29518. mov QWORD PTR [r10+48], rax
  29519. sbb r8, QWORD PTR [r11+56]
  29520. mov rax, QWORD PTR [rdx+64]
  29521. mov QWORD PTR [r10+56], r8
  29522. sbb rax, QWORD PTR [r11+64]
  29523. mov r8, QWORD PTR [rdx+72]
  29524. mov QWORD PTR [r10+64], rax
  29525. sbb r8, QWORD PTR [r11+72]
  29526. mov rax, QWORD PTR [rdx+80]
  29527. mov QWORD PTR [r10+72], r8
  29528. sbb rax, QWORD PTR [r11+80]
  29529. mov r8, QWORD PTR [rdx+88]
  29530. mov QWORD PTR [r10+80], rax
  29531. sbb r8, QWORD PTR [r11+88]
  29532. mov rax, QWORD PTR [rdx+96]
  29533. mov QWORD PTR [r10+88], r8
  29534. sbb rax, QWORD PTR [r11+96]
  29535. mov r8, QWORD PTR [rdx+104]
  29536. mov QWORD PTR [r10+96], rax
  29537. sbb r8, QWORD PTR [r11+104]
  29538. mov rax, QWORD PTR [rdx+112]
  29539. mov QWORD PTR [r10+104], r8
  29540. sbb rax, QWORD PTR [r11+112]
  29541. mov r8, QWORD PTR [rdx+120]
  29542. mov QWORD PTR [r10+112], rax
  29543. sbb r8, QWORD PTR [r11+120]
  29544. mov rax, QWORD PTR [rdx+128]
  29545. mov QWORD PTR [r10+120], r8
  29546. sbb rax, QWORD PTR [r11+128]
  29547. mov r8, QWORD PTR [rdx+136]
  29548. mov QWORD PTR [r10+128], rax
  29549. sbb r8, QWORD PTR [r11+136]
  29550. mov rax, QWORD PTR [rdx+144]
  29551. mov QWORD PTR [r10+136], r8
  29552. sbb rax, QWORD PTR [r11+144]
  29553. mov r8, QWORD PTR [rdx+152]
  29554. mov QWORD PTR [r10+144], rax
  29555. sbb r8, QWORD PTR [r11+152]
  29556. mov rax, QWORD PTR [rdx+160]
  29557. mov QWORD PTR [r10+152], r8
  29558. sbb rax, QWORD PTR [r11+160]
  29559. mov r8, QWORD PTR [rdx+168]
  29560. mov QWORD PTR [r10+160], rax
  29561. sbb r8, QWORD PTR [r11+168]
  29562. mov rax, QWORD PTR [rdx+176]
  29563. mov QWORD PTR [r10+168], r8
  29564. sbb rax, QWORD PTR [r11+176]
  29565. mov r8, QWORD PTR [rdx+184]
  29566. mov QWORD PTR [r10+176], rax
  29567. sbb r8, QWORD PTR [r11+184]
  29568. mov QWORD PTR [r10+184], r8
  29569. sbb r9, 0
  29570. ; Cond Negate
  29571. mov rax, QWORD PTR [r10]
  29572. mov r11, r9
  29573. xor rax, r9
  29574. neg r11
  29575. sub rax, r9
  29576. mov r8, QWORD PTR [r10+8]
  29577. sbb r11, 0
  29578. mov QWORD PTR [r10], rax
  29579. xor r8, r9
  29580. add r8, r11
  29581. mov rax, QWORD PTR [r10+16]
  29582. setc r11b
  29583. mov QWORD PTR [r10+8], r8
  29584. xor rax, r9
  29585. add rax, r11
  29586. mov r8, QWORD PTR [r10+24]
  29587. setc r11b
  29588. mov QWORD PTR [r10+16], rax
  29589. xor r8, r9
  29590. add r8, r11
  29591. mov rax, QWORD PTR [r10+32]
  29592. setc r11b
  29593. mov QWORD PTR [r10+24], r8
  29594. xor rax, r9
  29595. add rax, r11
  29596. mov r8, QWORD PTR [r10+40]
  29597. setc r11b
  29598. mov QWORD PTR [r10+32], rax
  29599. xor r8, r9
  29600. add r8, r11
  29601. mov rax, QWORD PTR [r10+48]
  29602. setc r11b
  29603. mov QWORD PTR [r10+40], r8
  29604. xor rax, r9
  29605. add rax, r11
  29606. mov r8, QWORD PTR [r10+56]
  29607. setc r11b
  29608. mov QWORD PTR [r10+48], rax
  29609. xor r8, r9
  29610. add r8, r11
  29611. mov rax, QWORD PTR [r10+64]
  29612. setc r11b
  29613. mov QWORD PTR [r10+56], r8
  29614. xor rax, r9
  29615. add rax, r11
  29616. mov r8, QWORD PTR [r10+72]
  29617. setc r11b
  29618. mov QWORD PTR [r10+64], rax
  29619. xor r8, r9
  29620. add r8, r11
  29621. mov rax, QWORD PTR [r10+80]
  29622. setc r11b
  29623. mov QWORD PTR [r10+72], r8
  29624. xor rax, r9
  29625. add rax, r11
  29626. mov r8, QWORD PTR [r10+88]
  29627. setc r11b
  29628. mov QWORD PTR [r10+80], rax
  29629. xor r8, r9
  29630. add r8, r11
  29631. mov rax, QWORD PTR [r10+96]
  29632. setc r11b
  29633. mov QWORD PTR [r10+88], r8
  29634. xor rax, r9
  29635. add rax, r11
  29636. mov r8, QWORD PTR [r10+104]
  29637. setc r11b
  29638. mov QWORD PTR [r10+96], rax
  29639. xor r8, r9
  29640. add r8, r11
  29641. mov rax, QWORD PTR [r10+112]
  29642. setc r11b
  29643. mov QWORD PTR [r10+104], r8
  29644. xor rax, r9
  29645. add rax, r11
  29646. mov r8, QWORD PTR [r10+120]
  29647. setc r11b
  29648. mov QWORD PTR [r10+112], rax
  29649. xor r8, r9
  29650. add r8, r11
  29651. mov rax, QWORD PTR [r10+128]
  29652. setc r11b
  29653. mov QWORD PTR [r10+120], r8
  29654. xor rax, r9
  29655. add rax, r11
  29656. mov r8, QWORD PTR [r10+136]
  29657. setc r11b
  29658. mov QWORD PTR [r10+128], rax
  29659. xor r8, r9
  29660. add r8, r11
  29661. mov rax, QWORD PTR [r10+144]
  29662. setc r11b
  29663. mov QWORD PTR [r10+136], r8
  29664. xor rax, r9
  29665. add rax, r11
  29666. mov r8, QWORD PTR [r10+152]
  29667. setc r11b
  29668. mov QWORD PTR [r10+144], rax
  29669. xor r8, r9
  29670. add r8, r11
  29671. mov rax, QWORD PTR [r10+160]
  29672. setc r11b
  29673. mov QWORD PTR [r10+152], r8
  29674. xor rax, r9
  29675. add rax, r11
  29676. mov r8, QWORD PTR [r10+168]
  29677. setc r11b
  29678. mov QWORD PTR [r10+160], rax
  29679. xor r8, r9
  29680. add r8, r11
  29681. mov rax, QWORD PTR [r10+176]
  29682. setc r11b
  29683. mov QWORD PTR [r10+168], r8
  29684. xor rax, r9
  29685. add rax, r11
  29686. mov r8, QWORD PTR [r10+184]
  29687. setc r11b
  29688. mov QWORD PTR [r10+176], rax
  29689. xor r8, r9
  29690. add r8, r11
  29691. mov QWORD PTR [r10+184], r8
  29692. mov rdx, r10
  29693. mov rcx, rsp
  29694. call sp_3072_sqr_avx2_24
  29695. mov rdx, QWORD PTR [rsp+392]
  29696. mov rcx, QWORD PTR [rsp+384]
  29697. add rdx, 192
  29698. add rcx, 384
  29699. call sp_3072_sqr_avx2_24
  29700. mov rdx, QWORD PTR [rsp+392]
  29701. mov rcx, QWORD PTR [rsp+384]
  29702. call sp_3072_sqr_avx2_24
  29703. IFDEF _WIN64
  29704. mov rdx, QWORD PTR [rsp+392]
  29705. mov rcx, QWORD PTR [rsp+384]
  29706. ENDIF
  29707. mov rdx, QWORD PTR [rsp+384]
  29708. lea r10, QWORD PTR [rsp+192]
  29709. add rdx, 576
  29710. mov r9, 0
  29711. mov r8, QWORD PTR [r10+-192]
  29712. sub r8, QWORD PTR [rdx+-192]
  29713. mov rax, QWORD PTR [r10+-184]
  29714. mov QWORD PTR [r10+-192], r8
  29715. sbb rax, QWORD PTR [rdx+-184]
  29716. mov r8, QWORD PTR [r10+-176]
  29717. mov QWORD PTR [r10+-184], rax
  29718. sbb r8, QWORD PTR [rdx+-176]
  29719. mov rax, QWORD PTR [r10+-168]
  29720. mov QWORD PTR [r10+-176], r8
  29721. sbb rax, QWORD PTR [rdx+-168]
  29722. mov r8, QWORD PTR [r10+-160]
  29723. mov QWORD PTR [r10+-168], rax
  29724. sbb r8, QWORD PTR [rdx+-160]
  29725. mov rax, QWORD PTR [r10+-152]
  29726. mov QWORD PTR [r10+-160], r8
  29727. sbb rax, QWORD PTR [rdx+-152]
  29728. mov r8, QWORD PTR [r10+-144]
  29729. mov QWORD PTR [r10+-152], rax
  29730. sbb r8, QWORD PTR [rdx+-144]
  29731. mov rax, QWORD PTR [r10+-136]
  29732. mov QWORD PTR [r10+-144], r8
  29733. sbb rax, QWORD PTR [rdx+-136]
  29734. mov r8, QWORD PTR [r10+-128]
  29735. mov QWORD PTR [r10+-136], rax
  29736. sbb r8, QWORD PTR [rdx+-128]
  29737. mov rax, QWORD PTR [r10+-120]
  29738. mov QWORD PTR [r10+-128], r8
  29739. sbb rax, QWORD PTR [rdx+-120]
  29740. mov r8, QWORD PTR [r10+-112]
  29741. mov QWORD PTR [r10+-120], rax
  29742. sbb r8, QWORD PTR [rdx+-112]
  29743. mov rax, QWORD PTR [r10+-104]
  29744. mov QWORD PTR [r10+-112], r8
  29745. sbb rax, QWORD PTR [rdx+-104]
  29746. mov r8, QWORD PTR [r10+-96]
  29747. mov QWORD PTR [r10+-104], rax
  29748. sbb r8, QWORD PTR [rdx+-96]
  29749. mov rax, QWORD PTR [r10+-88]
  29750. mov QWORD PTR [r10+-96], r8
  29751. sbb rax, QWORD PTR [rdx+-88]
  29752. mov r8, QWORD PTR [r10+-80]
  29753. mov QWORD PTR [r10+-88], rax
  29754. sbb r8, QWORD PTR [rdx+-80]
  29755. mov rax, QWORD PTR [r10+-72]
  29756. mov QWORD PTR [r10+-80], r8
  29757. sbb rax, QWORD PTR [rdx+-72]
  29758. mov r8, QWORD PTR [r10+-64]
  29759. mov QWORD PTR [r10+-72], rax
  29760. sbb r8, QWORD PTR [rdx+-64]
  29761. mov rax, QWORD PTR [r10+-56]
  29762. mov QWORD PTR [r10+-64], r8
  29763. sbb rax, QWORD PTR [rdx+-56]
  29764. mov r8, QWORD PTR [r10+-48]
  29765. mov QWORD PTR [r10+-56], rax
  29766. sbb r8, QWORD PTR [rdx+-48]
  29767. mov rax, QWORD PTR [r10+-40]
  29768. mov QWORD PTR [r10+-48], r8
  29769. sbb rax, QWORD PTR [rdx+-40]
  29770. mov r8, QWORD PTR [r10+-32]
  29771. mov QWORD PTR [r10+-40], rax
  29772. sbb r8, QWORD PTR [rdx+-32]
  29773. mov rax, QWORD PTR [r10+-24]
  29774. mov QWORD PTR [r10+-32], r8
  29775. sbb rax, QWORD PTR [rdx+-24]
  29776. mov r8, QWORD PTR [r10+-16]
  29777. mov QWORD PTR [r10+-24], rax
  29778. sbb r8, QWORD PTR [rdx+-16]
  29779. mov rax, QWORD PTR [r10+-8]
  29780. mov QWORD PTR [r10+-16], r8
  29781. sbb rax, QWORD PTR [rdx+-8]
  29782. mov r8, QWORD PTR [r10]
  29783. mov QWORD PTR [r10+-8], rax
  29784. sbb r8, QWORD PTR [rdx]
  29785. mov rax, QWORD PTR [r10+8]
  29786. mov QWORD PTR [r10], r8
  29787. sbb rax, QWORD PTR [rdx+8]
  29788. mov r8, QWORD PTR [r10+16]
  29789. mov QWORD PTR [r10+8], rax
  29790. sbb r8, QWORD PTR [rdx+16]
  29791. mov rax, QWORD PTR [r10+24]
  29792. mov QWORD PTR [r10+16], r8
  29793. sbb rax, QWORD PTR [rdx+24]
  29794. mov r8, QWORD PTR [r10+32]
  29795. mov QWORD PTR [r10+24], rax
  29796. sbb r8, QWORD PTR [rdx+32]
  29797. mov rax, QWORD PTR [r10+40]
  29798. mov QWORD PTR [r10+32], r8
  29799. sbb rax, QWORD PTR [rdx+40]
  29800. mov r8, QWORD PTR [r10+48]
  29801. mov QWORD PTR [r10+40], rax
  29802. sbb r8, QWORD PTR [rdx+48]
  29803. mov rax, QWORD PTR [r10+56]
  29804. mov QWORD PTR [r10+48], r8
  29805. sbb rax, QWORD PTR [rdx+56]
  29806. mov r8, QWORD PTR [r10+64]
  29807. mov QWORD PTR [r10+56], rax
  29808. sbb r8, QWORD PTR [rdx+64]
  29809. mov rax, QWORD PTR [r10+72]
  29810. mov QWORD PTR [r10+64], r8
  29811. sbb rax, QWORD PTR [rdx+72]
  29812. mov r8, QWORD PTR [r10+80]
  29813. mov QWORD PTR [r10+72], rax
  29814. sbb r8, QWORD PTR [rdx+80]
  29815. mov rax, QWORD PTR [r10+88]
  29816. mov QWORD PTR [r10+80], r8
  29817. sbb rax, QWORD PTR [rdx+88]
  29818. mov r8, QWORD PTR [r10+96]
  29819. mov QWORD PTR [r10+88], rax
  29820. sbb r8, QWORD PTR [rdx+96]
  29821. mov rax, QWORD PTR [r10+104]
  29822. mov QWORD PTR [r10+96], r8
  29823. sbb rax, QWORD PTR [rdx+104]
  29824. mov r8, QWORD PTR [r10+112]
  29825. mov QWORD PTR [r10+104], rax
  29826. sbb r8, QWORD PTR [rdx+112]
  29827. mov rax, QWORD PTR [r10+120]
  29828. mov QWORD PTR [r10+112], r8
  29829. sbb rax, QWORD PTR [rdx+120]
  29830. mov r8, QWORD PTR [r10+128]
  29831. mov QWORD PTR [r10+120], rax
  29832. sbb r8, QWORD PTR [rdx+128]
  29833. mov rax, QWORD PTR [r10+136]
  29834. mov QWORD PTR [r10+128], r8
  29835. sbb rax, QWORD PTR [rdx+136]
  29836. mov r8, QWORD PTR [r10+144]
  29837. mov QWORD PTR [r10+136], rax
  29838. sbb r8, QWORD PTR [rdx+144]
  29839. mov rax, QWORD PTR [r10+152]
  29840. mov QWORD PTR [r10+144], r8
  29841. sbb rax, QWORD PTR [rdx+152]
  29842. mov r8, QWORD PTR [r10+160]
  29843. mov QWORD PTR [r10+152], rax
  29844. sbb r8, QWORD PTR [rdx+160]
  29845. mov rax, QWORD PTR [r10+168]
  29846. mov QWORD PTR [r10+160], r8
  29847. sbb rax, QWORD PTR [rdx+168]
  29848. mov r8, QWORD PTR [r10+176]
  29849. mov QWORD PTR [r10+168], rax
  29850. sbb r8, QWORD PTR [rdx+176]
  29851. mov rax, QWORD PTR [r10+184]
  29852. mov QWORD PTR [r10+176], r8
  29853. sbb rax, QWORD PTR [rdx+184]
  29854. mov QWORD PTR [r10+184], rax
  29855. sbb r9, 0
  29856. sub rdx, 384
  29857. mov r8, QWORD PTR [r10+-192]
  29858. sub r8, QWORD PTR [rdx+-192]
  29859. mov rax, QWORD PTR [r10+-184]
  29860. mov QWORD PTR [r10+-192], r8
  29861. sbb rax, QWORD PTR [rdx+-184]
  29862. mov r8, QWORD PTR [r10+-176]
  29863. mov QWORD PTR [r10+-184], rax
  29864. sbb r8, QWORD PTR [rdx+-176]
  29865. mov rax, QWORD PTR [r10+-168]
  29866. mov QWORD PTR [r10+-176], r8
  29867. sbb rax, QWORD PTR [rdx+-168]
  29868. mov r8, QWORD PTR [r10+-160]
  29869. mov QWORD PTR [r10+-168], rax
  29870. sbb r8, QWORD PTR [rdx+-160]
  29871. mov rax, QWORD PTR [r10+-152]
  29872. mov QWORD PTR [r10+-160], r8
  29873. sbb rax, QWORD PTR [rdx+-152]
  29874. mov r8, QWORD PTR [r10+-144]
  29875. mov QWORD PTR [r10+-152], rax
  29876. sbb r8, QWORD PTR [rdx+-144]
  29877. mov rax, QWORD PTR [r10+-136]
  29878. mov QWORD PTR [r10+-144], r8
  29879. sbb rax, QWORD PTR [rdx+-136]
  29880. mov r8, QWORD PTR [r10+-128]
  29881. mov QWORD PTR [r10+-136], rax
  29882. sbb r8, QWORD PTR [rdx+-128]
  29883. mov rax, QWORD PTR [r10+-120]
  29884. mov QWORD PTR [r10+-128], r8
  29885. sbb rax, QWORD PTR [rdx+-120]
  29886. mov r8, QWORD PTR [r10+-112]
  29887. mov QWORD PTR [r10+-120], rax
  29888. sbb r8, QWORD PTR [rdx+-112]
  29889. mov rax, QWORD PTR [r10+-104]
  29890. mov QWORD PTR [r10+-112], r8
  29891. sbb rax, QWORD PTR [rdx+-104]
  29892. mov r8, QWORD PTR [r10+-96]
  29893. mov QWORD PTR [r10+-104], rax
  29894. sbb r8, QWORD PTR [rdx+-96]
  29895. mov rax, QWORD PTR [r10+-88]
  29896. mov QWORD PTR [r10+-96], r8
  29897. sbb rax, QWORD PTR [rdx+-88]
  29898. mov r8, QWORD PTR [r10+-80]
  29899. mov QWORD PTR [r10+-88], rax
  29900. sbb r8, QWORD PTR [rdx+-80]
  29901. mov rax, QWORD PTR [r10+-72]
  29902. mov QWORD PTR [r10+-80], r8
  29903. sbb rax, QWORD PTR [rdx+-72]
  29904. mov r8, QWORD PTR [r10+-64]
  29905. mov QWORD PTR [r10+-72], rax
  29906. sbb r8, QWORD PTR [rdx+-64]
  29907. mov rax, QWORD PTR [r10+-56]
  29908. mov QWORD PTR [r10+-64], r8
  29909. sbb rax, QWORD PTR [rdx+-56]
  29910. mov r8, QWORD PTR [r10+-48]
  29911. mov QWORD PTR [r10+-56], rax
  29912. sbb r8, QWORD PTR [rdx+-48]
  29913. mov rax, QWORD PTR [r10+-40]
  29914. mov QWORD PTR [r10+-48], r8
  29915. sbb rax, QWORD PTR [rdx+-40]
  29916. mov r8, QWORD PTR [r10+-32]
  29917. mov QWORD PTR [r10+-40], rax
  29918. sbb r8, QWORD PTR [rdx+-32]
  29919. mov rax, QWORD PTR [r10+-24]
  29920. mov QWORD PTR [r10+-32], r8
  29921. sbb rax, QWORD PTR [rdx+-24]
  29922. mov r8, QWORD PTR [r10+-16]
  29923. mov QWORD PTR [r10+-24], rax
  29924. sbb r8, QWORD PTR [rdx+-16]
  29925. mov rax, QWORD PTR [r10+-8]
  29926. mov QWORD PTR [r10+-16], r8
  29927. sbb rax, QWORD PTR [rdx+-8]
  29928. mov r8, QWORD PTR [r10]
  29929. mov QWORD PTR [r10+-8], rax
  29930. sbb r8, QWORD PTR [rdx]
  29931. mov rax, QWORD PTR [r10+8]
  29932. mov QWORD PTR [r10], r8
  29933. sbb rax, QWORD PTR [rdx+8]
  29934. mov r8, QWORD PTR [r10+16]
  29935. mov QWORD PTR [r10+8], rax
  29936. sbb r8, QWORD PTR [rdx+16]
  29937. mov rax, QWORD PTR [r10+24]
  29938. mov QWORD PTR [r10+16], r8
  29939. sbb rax, QWORD PTR [rdx+24]
  29940. mov r8, QWORD PTR [r10+32]
  29941. mov QWORD PTR [r10+24], rax
  29942. sbb r8, QWORD PTR [rdx+32]
  29943. mov rax, QWORD PTR [r10+40]
  29944. mov QWORD PTR [r10+32], r8
  29945. sbb rax, QWORD PTR [rdx+40]
  29946. mov r8, QWORD PTR [r10+48]
  29947. mov QWORD PTR [r10+40], rax
  29948. sbb r8, QWORD PTR [rdx+48]
  29949. mov rax, QWORD PTR [r10+56]
  29950. mov QWORD PTR [r10+48], r8
  29951. sbb rax, QWORD PTR [rdx+56]
  29952. mov r8, QWORD PTR [r10+64]
  29953. mov QWORD PTR [r10+56], rax
  29954. sbb r8, QWORD PTR [rdx+64]
  29955. mov rax, QWORD PTR [r10+72]
  29956. mov QWORD PTR [r10+64], r8
  29957. sbb rax, QWORD PTR [rdx+72]
  29958. mov r8, QWORD PTR [r10+80]
  29959. mov QWORD PTR [r10+72], rax
  29960. sbb r8, QWORD PTR [rdx+80]
  29961. mov rax, QWORD PTR [r10+88]
  29962. mov QWORD PTR [r10+80], r8
  29963. sbb rax, QWORD PTR [rdx+88]
  29964. mov r8, QWORD PTR [r10+96]
  29965. mov QWORD PTR [r10+88], rax
  29966. sbb r8, QWORD PTR [rdx+96]
  29967. mov rax, QWORD PTR [r10+104]
  29968. mov QWORD PTR [r10+96], r8
  29969. sbb rax, QWORD PTR [rdx+104]
  29970. mov r8, QWORD PTR [r10+112]
  29971. mov QWORD PTR [r10+104], rax
  29972. sbb r8, QWORD PTR [rdx+112]
  29973. mov rax, QWORD PTR [r10+120]
  29974. mov QWORD PTR [r10+112], r8
  29975. sbb rax, QWORD PTR [rdx+120]
  29976. mov r8, QWORD PTR [r10+128]
  29977. mov QWORD PTR [r10+120], rax
  29978. sbb r8, QWORD PTR [rdx+128]
  29979. mov rax, QWORD PTR [r10+136]
  29980. mov QWORD PTR [r10+128], r8
  29981. sbb rax, QWORD PTR [rdx+136]
  29982. mov r8, QWORD PTR [r10+144]
  29983. mov QWORD PTR [r10+136], rax
  29984. sbb r8, QWORD PTR [rdx+144]
  29985. mov rax, QWORD PTR [r10+152]
  29986. mov QWORD PTR [r10+144], r8
  29987. sbb rax, QWORD PTR [rdx+152]
  29988. mov r8, QWORD PTR [r10+160]
  29989. mov QWORD PTR [r10+152], rax
  29990. sbb r8, QWORD PTR [rdx+160]
  29991. mov rax, QWORD PTR [r10+168]
  29992. mov QWORD PTR [r10+160], r8
  29993. sbb rax, QWORD PTR [rdx+168]
  29994. mov r8, QWORD PTR [r10+176]
  29995. mov QWORD PTR [r10+168], rax
  29996. sbb r8, QWORD PTR [rdx+176]
  29997. mov rax, QWORD PTR [r10+184]
  29998. mov QWORD PTR [r10+176], r8
  29999. sbb rax, QWORD PTR [rdx+184]
  30000. mov QWORD PTR [r10+184], rax
  30001. sbb r9, 0
  30002. mov rcx, QWORD PTR [rsp+384]
  30003. neg r9
  30004. add rcx, 384
  30005. mov r8, QWORD PTR [rcx+-192]
  30006. sub r8, QWORD PTR [r10+-192]
  30007. mov rax, QWORD PTR [rcx+-184]
  30008. mov QWORD PTR [rcx+-192], r8
  30009. sbb rax, QWORD PTR [r10+-184]
  30010. mov r8, QWORD PTR [rcx+-176]
  30011. mov QWORD PTR [rcx+-184], rax
  30012. sbb r8, QWORD PTR [r10+-176]
  30013. mov rax, QWORD PTR [rcx+-168]
  30014. mov QWORD PTR [rcx+-176], r8
  30015. sbb rax, QWORD PTR [r10+-168]
  30016. mov r8, QWORD PTR [rcx+-160]
  30017. mov QWORD PTR [rcx+-168], rax
  30018. sbb r8, QWORD PTR [r10+-160]
  30019. mov rax, QWORD PTR [rcx+-152]
  30020. mov QWORD PTR [rcx+-160], r8
  30021. sbb rax, QWORD PTR [r10+-152]
  30022. mov r8, QWORD PTR [rcx+-144]
  30023. mov QWORD PTR [rcx+-152], rax
  30024. sbb r8, QWORD PTR [r10+-144]
  30025. mov rax, QWORD PTR [rcx+-136]
  30026. mov QWORD PTR [rcx+-144], r8
  30027. sbb rax, QWORD PTR [r10+-136]
  30028. mov r8, QWORD PTR [rcx+-128]
  30029. mov QWORD PTR [rcx+-136], rax
  30030. sbb r8, QWORD PTR [r10+-128]
  30031. mov rax, QWORD PTR [rcx+-120]
  30032. mov QWORD PTR [rcx+-128], r8
  30033. sbb rax, QWORD PTR [r10+-120]
  30034. mov r8, QWORD PTR [rcx+-112]
  30035. mov QWORD PTR [rcx+-120], rax
  30036. sbb r8, QWORD PTR [r10+-112]
  30037. mov rax, QWORD PTR [rcx+-104]
  30038. mov QWORD PTR [rcx+-112], r8
  30039. sbb rax, QWORD PTR [r10+-104]
  30040. mov r8, QWORD PTR [rcx+-96]
  30041. mov QWORD PTR [rcx+-104], rax
  30042. sbb r8, QWORD PTR [r10+-96]
  30043. mov rax, QWORD PTR [rcx+-88]
  30044. mov QWORD PTR [rcx+-96], r8
  30045. sbb rax, QWORD PTR [r10+-88]
  30046. mov r8, QWORD PTR [rcx+-80]
  30047. mov QWORD PTR [rcx+-88], rax
  30048. sbb r8, QWORD PTR [r10+-80]
  30049. mov rax, QWORD PTR [rcx+-72]
  30050. mov QWORD PTR [rcx+-80], r8
  30051. sbb rax, QWORD PTR [r10+-72]
  30052. mov r8, QWORD PTR [rcx+-64]
  30053. mov QWORD PTR [rcx+-72], rax
  30054. sbb r8, QWORD PTR [r10+-64]
  30055. mov rax, QWORD PTR [rcx+-56]
  30056. mov QWORD PTR [rcx+-64], r8
  30057. sbb rax, QWORD PTR [r10+-56]
  30058. mov r8, QWORD PTR [rcx+-48]
  30059. mov QWORD PTR [rcx+-56], rax
  30060. sbb r8, QWORD PTR [r10+-48]
  30061. mov rax, QWORD PTR [rcx+-40]
  30062. mov QWORD PTR [rcx+-48], r8
  30063. sbb rax, QWORD PTR [r10+-40]
  30064. mov r8, QWORD PTR [rcx+-32]
  30065. mov QWORD PTR [rcx+-40], rax
  30066. sbb r8, QWORD PTR [r10+-32]
  30067. mov rax, QWORD PTR [rcx+-24]
  30068. mov QWORD PTR [rcx+-32], r8
  30069. sbb rax, QWORD PTR [r10+-24]
  30070. mov r8, QWORD PTR [rcx+-16]
  30071. mov QWORD PTR [rcx+-24], rax
  30072. sbb r8, QWORD PTR [r10+-16]
  30073. mov rax, QWORD PTR [rcx+-8]
  30074. mov QWORD PTR [rcx+-16], r8
  30075. sbb rax, QWORD PTR [r10+-8]
  30076. mov r8, QWORD PTR [rcx]
  30077. mov QWORD PTR [rcx+-8], rax
  30078. sbb r8, QWORD PTR [r10]
  30079. mov rax, QWORD PTR [rcx+8]
  30080. mov QWORD PTR [rcx], r8
  30081. sbb rax, QWORD PTR [r10+8]
  30082. mov r8, QWORD PTR [rcx+16]
  30083. mov QWORD PTR [rcx+8], rax
  30084. sbb r8, QWORD PTR [r10+16]
  30085. mov rax, QWORD PTR [rcx+24]
  30086. mov QWORD PTR [rcx+16], r8
  30087. sbb rax, QWORD PTR [r10+24]
  30088. mov r8, QWORD PTR [rcx+32]
  30089. mov QWORD PTR [rcx+24], rax
  30090. sbb r8, QWORD PTR [r10+32]
  30091. mov rax, QWORD PTR [rcx+40]
  30092. mov QWORD PTR [rcx+32], r8
  30093. sbb rax, QWORD PTR [r10+40]
  30094. mov r8, QWORD PTR [rcx+48]
  30095. mov QWORD PTR [rcx+40], rax
  30096. sbb r8, QWORD PTR [r10+48]
  30097. mov rax, QWORD PTR [rcx+56]
  30098. mov QWORD PTR [rcx+48], r8
  30099. sbb rax, QWORD PTR [r10+56]
  30100. mov r8, QWORD PTR [rcx+64]
  30101. mov QWORD PTR [rcx+56], rax
  30102. sbb r8, QWORD PTR [r10+64]
  30103. mov rax, QWORD PTR [rcx+72]
  30104. mov QWORD PTR [rcx+64], r8
  30105. sbb rax, QWORD PTR [r10+72]
  30106. mov r8, QWORD PTR [rcx+80]
  30107. mov QWORD PTR [rcx+72], rax
  30108. sbb r8, QWORD PTR [r10+80]
  30109. mov rax, QWORD PTR [rcx+88]
  30110. mov QWORD PTR [rcx+80], r8
  30111. sbb rax, QWORD PTR [r10+88]
  30112. mov r8, QWORD PTR [rcx+96]
  30113. mov QWORD PTR [rcx+88], rax
  30114. sbb r8, QWORD PTR [r10+96]
  30115. mov rax, QWORD PTR [rcx+104]
  30116. mov QWORD PTR [rcx+96], r8
  30117. sbb rax, QWORD PTR [r10+104]
  30118. mov r8, QWORD PTR [rcx+112]
  30119. mov QWORD PTR [rcx+104], rax
  30120. sbb r8, QWORD PTR [r10+112]
  30121. mov rax, QWORD PTR [rcx+120]
  30122. mov QWORD PTR [rcx+112], r8
  30123. sbb rax, QWORD PTR [r10+120]
  30124. mov r8, QWORD PTR [rcx+128]
  30125. mov QWORD PTR [rcx+120], rax
  30126. sbb r8, QWORD PTR [r10+128]
  30127. mov rax, QWORD PTR [rcx+136]
  30128. mov QWORD PTR [rcx+128], r8
  30129. sbb rax, QWORD PTR [r10+136]
  30130. mov r8, QWORD PTR [rcx+144]
  30131. mov QWORD PTR [rcx+136], rax
  30132. sbb r8, QWORD PTR [r10+144]
  30133. mov rax, QWORD PTR [rcx+152]
  30134. mov QWORD PTR [rcx+144], r8
  30135. sbb rax, QWORD PTR [r10+152]
  30136. mov r8, QWORD PTR [rcx+160]
  30137. mov QWORD PTR [rcx+152], rax
  30138. sbb r8, QWORD PTR [r10+160]
  30139. mov rax, QWORD PTR [rcx+168]
  30140. mov QWORD PTR [rcx+160], r8
  30141. sbb rax, QWORD PTR [r10+168]
  30142. mov r8, QWORD PTR [rcx+176]
  30143. mov QWORD PTR [rcx+168], rax
  30144. sbb r8, QWORD PTR [r10+176]
  30145. mov rax, QWORD PTR [rcx+184]
  30146. mov QWORD PTR [rcx+176], r8
  30147. sbb rax, QWORD PTR [r10+184]
  30148. mov QWORD PTR [rcx+184], rax
  30149. sbb r9, 0
  30150. mov rcx, QWORD PTR [rsp+384]
  30151. add rcx, 576
  30152. ; Add in word
  30153. mov r8, QWORD PTR [rcx]
  30154. add r8, r9
  30155. mov rax, QWORD PTR [rcx+8]
  30156. mov QWORD PTR [rcx], r8
  30157. adc rax, 0
  30158. mov r8, QWORD PTR [rcx+16]
  30159. mov QWORD PTR [rcx+8], rax
  30160. adc r8, 0
  30161. mov rax, QWORD PTR [rcx+24]
  30162. mov QWORD PTR [rcx+16], r8
  30163. adc rax, 0
  30164. mov r8, QWORD PTR [rcx+32]
  30165. mov QWORD PTR [rcx+24], rax
  30166. adc r8, 0
  30167. mov rax, QWORD PTR [rcx+40]
  30168. mov QWORD PTR [rcx+32], r8
  30169. adc rax, 0
  30170. mov r8, QWORD PTR [rcx+48]
  30171. mov QWORD PTR [rcx+40], rax
  30172. adc r8, 0
  30173. mov rax, QWORD PTR [rcx+56]
  30174. mov QWORD PTR [rcx+48], r8
  30175. adc rax, 0
  30176. mov r8, QWORD PTR [rcx+64]
  30177. mov QWORD PTR [rcx+56], rax
  30178. adc r8, 0
  30179. mov rax, QWORD PTR [rcx+72]
  30180. mov QWORD PTR [rcx+64], r8
  30181. adc rax, 0
  30182. mov r8, QWORD PTR [rcx+80]
  30183. mov QWORD PTR [rcx+72], rax
  30184. adc r8, 0
  30185. mov rax, QWORD PTR [rcx+88]
  30186. mov QWORD PTR [rcx+80], r8
  30187. adc rax, 0
  30188. mov r8, QWORD PTR [rcx+96]
  30189. mov QWORD PTR [rcx+88], rax
  30190. adc r8, 0
  30191. mov rax, QWORD PTR [rcx+104]
  30192. mov QWORD PTR [rcx+96], r8
  30193. adc rax, 0
  30194. mov r8, QWORD PTR [rcx+112]
  30195. mov QWORD PTR [rcx+104], rax
  30196. adc r8, 0
  30197. mov rax, QWORD PTR [rcx+120]
  30198. mov QWORD PTR [rcx+112], r8
  30199. adc rax, 0
  30200. mov r8, QWORD PTR [rcx+128]
  30201. mov QWORD PTR [rcx+120], rax
  30202. adc r8, 0
  30203. mov rax, QWORD PTR [rcx+136]
  30204. mov QWORD PTR [rcx+128], r8
  30205. adc rax, 0
  30206. mov r8, QWORD PTR [rcx+144]
  30207. mov QWORD PTR [rcx+136], rax
  30208. adc r8, 0
  30209. mov rax, QWORD PTR [rcx+152]
  30210. mov QWORD PTR [rcx+144], r8
  30211. adc rax, 0
  30212. mov r8, QWORD PTR [rcx+160]
  30213. mov QWORD PTR [rcx+152], rax
  30214. adc r8, 0
  30215. mov rax, QWORD PTR [rcx+168]
  30216. mov QWORD PTR [rcx+160], r8
  30217. adc rax, 0
  30218. mov r8, QWORD PTR [rcx+176]
  30219. mov QWORD PTR [rcx+168], rax
  30220. adc r8, 0
  30221. mov rax, QWORD PTR [rcx+184]
  30222. mov QWORD PTR [rcx+176], r8
  30223. adc rax, 0
  30224. mov QWORD PTR [rcx+184], rax
  30225. mov rdx, QWORD PTR [rsp+392]
  30226. mov rcx, QWORD PTR [rsp+384]
  30227. add rsp, 400
  30228. ret
  30229. sp_3072_sqr_avx2_48 ENDP
  30230. _text ENDS
  30231. ENDIF
  30232. ; /* Mul a by digit b into r. (r = a * b)
  30233. ; *
  30234. ; * r A single precision integer.
  30235. ; * a A single precision integer.
  30236. ; * b A single precision digit.
  30237. ; */
  30238. _text SEGMENT READONLY PARA
  30239. sp_3072_mul_d_48 PROC
  30240. push r12
  30241. mov r9, rdx
  30242. ; A[0] * B
  30243. mov rax, r8
  30244. xor r12, r12
  30245. mul QWORD PTR [r9]
  30246. mov r10, rax
  30247. mov r11, rdx
  30248. mov QWORD PTR [rcx], r10
  30249. ; A[1] * B
  30250. mov rax, r8
  30251. xor r10, r10
  30252. mul QWORD PTR [r9+8]
  30253. add r11, rax
  30254. mov QWORD PTR [rcx+8], r11
  30255. adc r12, rdx
  30256. adc r10, 0
  30257. ; A[2] * B
  30258. mov rax, r8
  30259. xor r11, r11
  30260. mul QWORD PTR [r9+16]
  30261. add r12, rax
  30262. mov QWORD PTR [rcx+16], r12
  30263. adc r10, rdx
  30264. adc r11, 0
  30265. ; A[3] * B
  30266. mov rax, r8
  30267. xor r12, r12
  30268. mul QWORD PTR [r9+24]
  30269. add r10, rax
  30270. mov QWORD PTR [rcx+24], r10
  30271. adc r11, rdx
  30272. adc r12, 0
  30273. ; A[4] * B
  30274. mov rax, r8
  30275. xor r10, r10
  30276. mul QWORD PTR [r9+32]
  30277. add r11, rax
  30278. mov QWORD PTR [rcx+32], r11
  30279. adc r12, rdx
  30280. adc r10, 0
  30281. ; A[5] * B
  30282. mov rax, r8
  30283. xor r11, r11
  30284. mul QWORD PTR [r9+40]
  30285. add r12, rax
  30286. mov QWORD PTR [rcx+40], r12
  30287. adc r10, rdx
  30288. adc r11, 0
  30289. ; A[6] * B
  30290. mov rax, r8
  30291. xor r12, r12
  30292. mul QWORD PTR [r9+48]
  30293. add r10, rax
  30294. mov QWORD PTR [rcx+48], r10
  30295. adc r11, rdx
  30296. adc r12, 0
  30297. ; A[7] * B
  30298. mov rax, r8
  30299. xor r10, r10
  30300. mul QWORD PTR [r9+56]
  30301. add r11, rax
  30302. mov QWORD PTR [rcx+56], r11
  30303. adc r12, rdx
  30304. adc r10, 0
  30305. ; A[8] * B
  30306. mov rax, r8
  30307. xor r11, r11
  30308. mul QWORD PTR [r9+64]
  30309. add r12, rax
  30310. mov QWORD PTR [rcx+64], r12
  30311. adc r10, rdx
  30312. adc r11, 0
  30313. ; A[9] * B
  30314. mov rax, r8
  30315. xor r12, r12
  30316. mul QWORD PTR [r9+72]
  30317. add r10, rax
  30318. mov QWORD PTR [rcx+72], r10
  30319. adc r11, rdx
  30320. adc r12, 0
  30321. ; A[10] * B
  30322. mov rax, r8
  30323. xor r10, r10
  30324. mul QWORD PTR [r9+80]
  30325. add r11, rax
  30326. mov QWORD PTR [rcx+80], r11
  30327. adc r12, rdx
  30328. adc r10, 0
  30329. ; A[11] * B
  30330. mov rax, r8
  30331. xor r11, r11
  30332. mul QWORD PTR [r9+88]
  30333. add r12, rax
  30334. mov QWORD PTR [rcx+88], r12
  30335. adc r10, rdx
  30336. adc r11, 0
  30337. ; A[12] * B
  30338. mov rax, r8
  30339. xor r12, r12
  30340. mul QWORD PTR [r9+96]
  30341. add r10, rax
  30342. mov QWORD PTR [rcx+96], r10
  30343. adc r11, rdx
  30344. adc r12, 0
  30345. ; A[13] * B
  30346. mov rax, r8
  30347. xor r10, r10
  30348. mul QWORD PTR [r9+104]
  30349. add r11, rax
  30350. mov QWORD PTR [rcx+104], r11
  30351. adc r12, rdx
  30352. adc r10, 0
  30353. ; A[14] * B
  30354. mov rax, r8
  30355. xor r11, r11
  30356. mul QWORD PTR [r9+112]
  30357. add r12, rax
  30358. mov QWORD PTR [rcx+112], r12
  30359. adc r10, rdx
  30360. adc r11, 0
  30361. ; A[15] * B
  30362. mov rax, r8
  30363. xor r12, r12
  30364. mul QWORD PTR [r9+120]
  30365. add r10, rax
  30366. mov QWORD PTR [rcx+120], r10
  30367. adc r11, rdx
  30368. adc r12, 0
  30369. ; A[16] * B
  30370. mov rax, r8
  30371. xor r10, r10
  30372. mul QWORD PTR [r9+128]
  30373. add r11, rax
  30374. mov QWORD PTR [rcx+128], r11
  30375. adc r12, rdx
  30376. adc r10, 0
  30377. ; A[17] * B
  30378. mov rax, r8
  30379. xor r11, r11
  30380. mul QWORD PTR [r9+136]
  30381. add r12, rax
  30382. mov QWORD PTR [rcx+136], r12
  30383. adc r10, rdx
  30384. adc r11, 0
  30385. ; A[18] * B
  30386. mov rax, r8
  30387. xor r12, r12
  30388. mul QWORD PTR [r9+144]
  30389. add r10, rax
  30390. mov QWORD PTR [rcx+144], r10
  30391. adc r11, rdx
  30392. adc r12, 0
  30393. ; A[19] * B
  30394. mov rax, r8
  30395. xor r10, r10
  30396. mul QWORD PTR [r9+152]
  30397. add r11, rax
  30398. mov QWORD PTR [rcx+152], r11
  30399. adc r12, rdx
  30400. adc r10, 0
  30401. ; A[20] * B
  30402. mov rax, r8
  30403. xor r11, r11
  30404. mul QWORD PTR [r9+160]
  30405. add r12, rax
  30406. mov QWORD PTR [rcx+160], r12
  30407. adc r10, rdx
  30408. adc r11, 0
  30409. ; A[21] * B
  30410. mov rax, r8
  30411. xor r12, r12
  30412. mul QWORD PTR [r9+168]
  30413. add r10, rax
  30414. mov QWORD PTR [rcx+168], r10
  30415. adc r11, rdx
  30416. adc r12, 0
  30417. ; A[22] * B
  30418. mov rax, r8
  30419. xor r10, r10
  30420. mul QWORD PTR [r9+176]
  30421. add r11, rax
  30422. mov QWORD PTR [rcx+176], r11
  30423. adc r12, rdx
  30424. adc r10, 0
  30425. ; A[23] * B
  30426. mov rax, r8
  30427. xor r11, r11
  30428. mul QWORD PTR [r9+184]
  30429. add r12, rax
  30430. mov QWORD PTR [rcx+184], r12
  30431. adc r10, rdx
  30432. adc r11, 0
  30433. ; A[24] * B
  30434. mov rax, r8
  30435. xor r12, r12
  30436. mul QWORD PTR [r9+192]
  30437. add r10, rax
  30438. mov QWORD PTR [rcx+192], r10
  30439. adc r11, rdx
  30440. adc r12, 0
  30441. ; A[25] * B
  30442. mov rax, r8
  30443. xor r10, r10
  30444. mul QWORD PTR [r9+200]
  30445. add r11, rax
  30446. mov QWORD PTR [rcx+200], r11
  30447. adc r12, rdx
  30448. adc r10, 0
  30449. ; A[26] * B
  30450. mov rax, r8
  30451. xor r11, r11
  30452. mul QWORD PTR [r9+208]
  30453. add r12, rax
  30454. mov QWORD PTR [rcx+208], r12
  30455. adc r10, rdx
  30456. adc r11, 0
  30457. ; A[27] * B
  30458. mov rax, r8
  30459. xor r12, r12
  30460. mul QWORD PTR [r9+216]
  30461. add r10, rax
  30462. mov QWORD PTR [rcx+216], r10
  30463. adc r11, rdx
  30464. adc r12, 0
  30465. ; A[28] * B
  30466. mov rax, r8
  30467. xor r10, r10
  30468. mul QWORD PTR [r9+224]
  30469. add r11, rax
  30470. mov QWORD PTR [rcx+224], r11
  30471. adc r12, rdx
  30472. adc r10, 0
  30473. ; A[29] * B
  30474. mov rax, r8
  30475. xor r11, r11
  30476. mul QWORD PTR [r9+232]
  30477. add r12, rax
  30478. mov QWORD PTR [rcx+232], r12
  30479. adc r10, rdx
  30480. adc r11, 0
  30481. ; A[30] * B
  30482. mov rax, r8
  30483. xor r12, r12
  30484. mul QWORD PTR [r9+240]
  30485. add r10, rax
  30486. mov QWORD PTR [rcx+240], r10
  30487. adc r11, rdx
  30488. adc r12, 0
  30489. ; A[31] * B
  30490. mov rax, r8
  30491. xor r10, r10
  30492. mul QWORD PTR [r9+248]
  30493. add r11, rax
  30494. mov QWORD PTR [rcx+248], r11
  30495. adc r12, rdx
  30496. adc r10, 0
  30497. ; A[32] * B
  30498. mov rax, r8
  30499. xor r11, r11
  30500. mul QWORD PTR [r9+256]
  30501. add r12, rax
  30502. mov QWORD PTR [rcx+256], r12
  30503. adc r10, rdx
  30504. adc r11, 0
  30505. ; A[33] * B
  30506. mov rax, r8
  30507. xor r12, r12
  30508. mul QWORD PTR [r9+264]
  30509. add r10, rax
  30510. mov QWORD PTR [rcx+264], r10
  30511. adc r11, rdx
  30512. adc r12, 0
  30513. ; A[34] * B
  30514. mov rax, r8
  30515. xor r10, r10
  30516. mul QWORD PTR [r9+272]
  30517. add r11, rax
  30518. mov QWORD PTR [rcx+272], r11
  30519. adc r12, rdx
  30520. adc r10, 0
  30521. ; A[35] * B
  30522. mov rax, r8
  30523. xor r11, r11
  30524. mul QWORD PTR [r9+280]
  30525. add r12, rax
  30526. mov QWORD PTR [rcx+280], r12
  30527. adc r10, rdx
  30528. adc r11, 0
  30529. ; A[36] * B
  30530. mov rax, r8
  30531. xor r12, r12
  30532. mul QWORD PTR [r9+288]
  30533. add r10, rax
  30534. mov QWORD PTR [rcx+288], r10
  30535. adc r11, rdx
  30536. adc r12, 0
  30537. ; A[37] * B
  30538. mov rax, r8
  30539. xor r10, r10
  30540. mul QWORD PTR [r9+296]
  30541. add r11, rax
  30542. mov QWORD PTR [rcx+296], r11
  30543. adc r12, rdx
  30544. adc r10, 0
  30545. ; A[38] * B
  30546. mov rax, r8
  30547. xor r11, r11
  30548. mul QWORD PTR [r9+304]
  30549. add r12, rax
  30550. mov QWORD PTR [rcx+304], r12
  30551. adc r10, rdx
  30552. adc r11, 0
  30553. ; A[39] * B
  30554. mov rax, r8
  30555. xor r12, r12
  30556. mul QWORD PTR [r9+312]
  30557. add r10, rax
  30558. mov QWORD PTR [rcx+312], r10
  30559. adc r11, rdx
  30560. adc r12, 0
  30561. ; A[40] * B
  30562. mov rax, r8
  30563. xor r10, r10
  30564. mul QWORD PTR [r9+320]
  30565. add r11, rax
  30566. mov QWORD PTR [rcx+320], r11
  30567. adc r12, rdx
  30568. adc r10, 0
  30569. ; A[41] * B
  30570. mov rax, r8
  30571. xor r11, r11
  30572. mul QWORD PTR [r9+328]
  30573. add r12, rax
  30574. mov QWORD PTR [rcx+328], r12
  30575. adc r10, rdx
  30576. adc r11, 0
  30577. ; A[42] * B
  30578. mov rax, r8
  30579. xor r12, r12
  30580. mul QWORD PTR [r9+336]
  30581. add r10, rax
  30582. mov QWORD PTR [rcx+336], r10
  30583. adc r11, rdx
  30584. adc r12, 0
  30585. ; A[43] * B
  30586. mov rax, r8
  30587. xor r10, r10
  30588. mul QWORD PTR [r9+344]
  30589. add r11, rax
  30590. mov QWORD PTR [rcx+344], r11
  30591. adc r12, rdx
  30592. adc r10, 0
  30593. ; A[44] * B
  30594. mov rax, r8
  30595. xor r11, r11
  30596. mul QWORD PTR [r9+352]
  30597. add r12, rax
  30598. mov QWORD PTR [rcx+352], r12
  30599. adc r10, rdx
  30600. adc r11, 0
  30601. ; A[45] * B
  30602. mov rax, r8
  30603. xor r12, r12
  30604. mul QWORD PTR [r9+360]
  30605. add r10, rax
  30606. mov QWORD PTR [rcx+360], r10
  30607. adc r11, rdx
  30608. adc r12, 0
  30609. ; A[46] * B
  30610. mov rax, r8
  30611. xor r10, r10
  30612. mul QWORD PTR [r9+368]
  30613. add r11, rax
  30614. mov QWORD PTR [rcx+368], r11
  30615. adc r12, rdx
  30616. adc r10, 0
  30617. ; A[47] * B
  30618. mov rax, r8
  30619. mul QWORD PTR [r9+376]
  30620. add r12, rax
  30621. adc r10, rdx
  30622. mov QWORD PTR [rcx+376], r12
  30623. mov QWORD PTR [rcx+384], r10
  30624. pop r12
  30625. ret
  30626. sp_3072_mul_d_48 ENDP
  30627. _text ENDS
  30628. ; /* Conditionally subtract b from a using the mask m.
  30629. ; * m is -1 to subtract and 0 when not copying.
  30630. ; *
  30631. ; * r A single precision number representing condition subtract result.
  30632. ; * a A single precision number to subtract from.
  30633. ; * b A single precision number to subtract.
  30634. ; * m Mask value to apply.
  30635. ; */
  30636. _text SEGMENT READONLY PARA
  30637. sp_3072_cond_sub_24 PROC
  30638. sub rsp, 192
  30639. mov r10, QWORD PTR [r8]
  30640. mov r11, QWORD PTR [r8+8]
  30641. and r10, r9
  30642. and r11, r9
  30643. mov QWORD PTR [rsp], r10
  30644. mov QWORD PTR [rsp+8], r11
  30645. mov r10, QWORD PTR [r8+16]
  30646. mov r11, QWORD PTR [r8+24]
  30647. and r10, r9
  30648. and r11, r9
  30649. mov QWORD PTR [rsp+16], r10
  30650. mov QWORD PTR [rsp+24], r11
  30651. mov r10, QWORD PTR [r8+32]
  30652. mov r11, QWORD PTR [r8+40]
  30653. and r10, r9
  30654. and r11, r9
  30655. mov QWORD PTR [rsp+32], r10
  30656. mov QWORD PTR [rsp+40], r11
  30657. mov r10, QWORD PTR [r8+48]
  30658. mov r11, QWORD PTR [r8+56]
  30659. and r10, r9
  30660. and r11, r9
  30661. mov QWORD PTR [rsp+48], r10
  30662. mov QWORD PTR [rsp+56], r11
  30663. mov r10, QWORD PTR [r8+64]
  30664. mov r11, QWORD PTR [r8+72]
  30665. and r10, r9
  30666. and r11, r9
  30667. mov QWORD PTR [rsp+64], r10
  30668. mov QWORD PTR [rsp+72], r11
  30669. mov r10, QWORD PTR [r8+80]
  30670. mov r11, QWORD PTR [r8+88]
  30671. and r10, r9
  30672. and r11, r9
  30673. mov QWORD PTR [rsp+80], r10
  30674. mov QWORD PTR [rsp+88], r11
  30675. mov r10, QWORD PTR [r8+96]
  30676. mov r11, QWORD PTR [r8+104]
  30677. and r10, r9
  30678. and r11, r9
  30679. mov QWORD PTR [rsp+96], r10
  30680. mov QWORD PTR [rsp+104], r11
  30681. mov r10, QWORD PTR [r8+112]
  30682. mov r11, QWORD PTR [r8+120]
  30683. and r10, r9
  30684. and r11, r9
  30685. mov QWORD PTR [rsp+112], r10
  30686. mov QWORD PTR [rsp+120], r11
  30687. mov r10, QWORD PTR [r8+128]
  30688. mov r11, QWORD PTR [r8+136]
  30689. and r10, r9
  30690. and r11, r9
  30691. mov QWORD PTR [rsp+128], r10
  30692. mov QWORD PTR [rsp+136], r11
  30693. mov r10, QWORD PTR [r8+144]
  30694. mov r11, QWORD PTR [r8+152]
  30695. and r10, r9
  30696. and r11, r9
  30697. mov QWORD PTR [rsp+144], r10
  30698. mov QWORD PTR [rsp+152], r11
  30699. mov r10, QWORD PTR [r8+160]
  30700. mov r11, QWORD PTR [r8+168]
  30701. and r10, r9
  30702. and r11, r9
  30703. mov QWORD PTR [rsp+160], r10
  30704. mov QWORD PTR [rsp+168], r11
  30705. mov r10, QWORD PTR [r8+176]
  30706. mov r11, QWORD PTR [r8+184]
  30707. and r10, r9
  30708. and r11, r9
  30709. mov QWORD PTR [rsp+176], r10
  30710. mov QWORD PTR [rsp+184], r11
  30711. mov r10, QWORD PTR [rdx]
  30712. mov r8, QWORD PTR [rsp]
  30713. sub r10, r8
  30714. mov r11, QWORD PTR [rdx+8]
  30715. mov r8, QWORD PTR [rsp+8]
  30716. sbb r11, r8
  30717. mov QWORD PTR [rcx], r10
  30718. mov r10, QWORD PTR [rdx+16]
  30719. mov r8, QWORD PTR [rsp+16]
  30720. sbb r10, r8
  30721. mov QWORD PTR [rcx+8], r11
  30722. mov r11, QWORD PTR [rdx+24]
  30723. mov r8, QWORD PTR [rsp+24]
  30724. sbb r11, r8
  30725. mov QWORD PTR [rcx+16], r10
  30726. mov r10, QWORD PTR [rdx+32]
  30727. mov r8, QWORD PTR [rsp+32]
  30728. sbb r10, r8
  30729. mov QWORD PTR [rcx+24], r11
  30730. mov r11, QWORD PTR [rdx+40]
  30731. mov r8, QWORD PTR [rsp+40]
  30732. sbb r11, r8
  30733. mov QWORD PTR [rcx+32], r10
  30734. mov r10, QWORD PTR [rdx+48]
  30735. mov r8, QWORD PTR [rsp+48]
  30736. sbb r10, r8
  30737. mov QWORD PTR [rcx+40], r11
  30738. mov r11, QWORD PTR [rdx+56]
  30739. mov r8, QWORD PTR [rsp+56]
  30740. sbb r11, r8
  30741. mov QWORD PTR [rcx+48], r10
  30742. mov r10, QWORD PTR [rdx+64]
  30743. mov r8, QWORD PTR [rsp+64]
  30744. sbb r10, r8
  30745. mov QWORD PTR [rcx+56], r11
  30746. mov r11, QWORD PTR [rdx+72]
  30747. mov r8, QWORD PTR [rsp+72]
  30748. sbb r11, r8
  30749. mov QWORD PTR [rcx+64], r10
  30750. mov r10, QWORD PTR [rdx+80]
  30751. mov r8, QWORD PTR [rsp+80]
  30752. sbb r10, r8
  30753. mov QWORD PTR [rcx+72], r11
  30754. mov r11, QWORD PTR [rdx+88]
  30755. mov r8, QWORD PTR [rsp+88]
  30756. sbb r11, r8
  30757. mov QWORD PTR [rcx+80], r10
  30758. mov r10, QWORD PTR [rdx+96]
  30759. mov r8, QWORD PTR [rsp+96]
  30760. sbb r10, r8
  30761. mov QWORD PTR [rcx+88], r11
  30762. mov r11, QWORD PTR [rdx+104]
  30763. mov r8, QWORD PTR [rsp+104]
  30764. sbb r11, r8
  30765. mov QWORD PTR [rcx+96], r10
  30766. mov r10, QWORD PTR [rdx+112]
  30767. mov r8, QWORD PTR [rsp+112]
  30768. sbb r10, r8
  30769. mov QWORD PTR [rcx+104], r11
  30770. mov r11, QWORD PTR [rdx+120]
  30771. mov r8, QWORD PTR [rsp+120]
  30772. sbb r11, r8
  30773. mov QWORD PTR [rcx+112], r10
  30774. mov r10, QWORD PTR [rdx+128]
  30775. mov r8, QWORD PTR [rsp+128]
  30776. sbb r10, r8
  30777. mov QWORD PTR [rcx+120], r11
  30778. mov r11, QWORD PTR [rdx+136]
  30779. mov r8, QWORD PTR [rsp+136]
  30780. sbb r11, r8
  30781. mov QWORD PTR [rcx+128], r10
  30782. mov r10, QWORD PTR [rdx+144]
  30783. mov r8, QWORD PTR [rsp+144]
  30784. sbb r10, r8
  30785. mov QWORD PTR [rcx+136], r11
  30786. mov r11, QWORD PTR [rdx+152]
  30787. mov r8, QWORD PTR [rsp+152]
  30788. sbb r11, r8
  30789. mov QWORD PTR [rcx+144], r10
  30790. mov r10, QWORD PTR [rdx+160]
  30791. mov r8, QWORD PTR [rsp+160]
  30792. sbb r10, r8
  30793. mov QWORD PTR [rcx+152], r11
  30794. mov r11, QWORD PTR [rdx+168]
  30795. mov r8, QWORD PTR [rsp+168]
  30796. sbb r11, r8
  30797. mov QWORD PTR [rcx+160], r10
  30798. mov r10, QWORD PTR [rdx+176]
  30799. mov r8, QWORD PTR [rsp+176]
  30800. sbb r10, r8
  30801. mov QWORD PTR [rcx+168], r11
  30802. mov r11, QWORD PTR [rdx+184]
  30803. mov r8, QWORD PTR [rsp+184]
  30804. sbb r11, r8
  30805. mov QWORD PTR [rcx+176], r10
  30806. mov QWORD PTR [rcx+184], r11
  30807. sbb rax, rax
  30808. add rsp, 192
  30809. ret
  30810. sp_3072_cond_sub_24 ENDP
  30811. _text ENDS
  30812. ; /* Reduce the number back to 3072 bits using Montgomery reduction.
  30813. ; *
  30814. ; * a A single precision number to reduce in place.
  30815. ; * m The single precision number representing the modulus.
  30816. ; * mp The digit representing the negative inverse of m mod 2^n.
  30817. ; */
  30818. _text SEGMENT READONLY PARA
  30819. sp_3072_mont_reduce_24 PROC
  30820. push r12
  30821. push r13
  30822. push r14
  30823. push r15
  30824. push rdi
  30825. push rsi
  30826. mov r9, rdx
  30827. xor rsi, rsi
  30828. ; i = 24
  30829. mov r10, 24
  30830. mov r15, QWORD PTR [rcx]
  30831. mov rdi, QWORD PTR [rcx+8]
  30832. L_3072_mont_reduce_24_loop:
  30833. ; mu = a[i] * mp
  30834. mov r13, r15
  30835. imul r13, r8
  30836. ; a[i+0] += m[0] * mu
  30837. mov rax, r13
  30838. xor r12, r12
  30839. mul QWORD PTR [r9]
  30840. add r15, rax
  30841. adc r12, rdx
  30842. ; a[i+1] += m[1] * mu
  30843. mov rax, r13
  30844. xor r11, r11
  30845. mul QWORD PTR [r9+8]
  30846. mov r15, rdi
  30847. add r15, rax
  30848. adc r11, rdx
  30849. add r15, r12
  30850. adc r11, 0
  30851. ; a[i+2] += m[2] * mu
  30852. mov rax, r13
  30853. xor r12, r12
  30854. mul QWORD PTR [r9+16]
  30855. mov rdi, QWORD PTR [rcx+16]
  30856. add rdi, rax
  30857. adc r12, rdx
  30858. add rdi, r11
  30859. adc r12, 0
  30860. ; a[i+3] += m[3] * mu
  30861. mov rax, r13
  30862. xor r11, r11
  30863. mul QWORD PTR [r9+24]
  30864. mov r14, QWORD PTR [rcx+24]
  30865. add r14, rax
  30866. adc r11, rdx
  30867. add r14, r12
  30868. mov QWORD PTR [rcx+24], r14
  30869. adc r11, 0
  30870. ; a[i+4] += m[4] * mu
  30871. mov rax, r13
  30872. xor r12, r12
  30873. mul QWORD PTR [r9+32]
  30874. mov r14, QWORD PTR [rcx+32]
  30875. add r14, rax
  30876. adc r12, rdx
  30877. add r14, r11
  30878. mov QWORD PTR [rcx+32], r14
  30879. adc r12, 0
  30880. ; a[i+5] += m[5] * mu
  30881. mov rax, r13
  30882. xor r11, r11
  30883. mul QWORD PTR [r9+40]
  30884. mov r14, QWORD PTR [rcx+40]
  30885. add r14, rax
  30886. adc r11, rdx
  30887. add r14, r12
  30888. mov QWORD PTR [rcx+40], r14
  30889. adc r11, 0
  30890. ; a[i+6] += m[6] * mu
  30891. mov rax, r13
  30892. xor r12, r12
  30893. mul QWORD PTR [r9+48]
  30894. mov r14, QWORD PTR [rcx+48]
  30895. add r14, rax
  30896. adc r12, rdx
  30897. add r14, r11
  30898. mov QWORD PTR [rcx+48], r14
  30899. adc r12, 0
  30900. ; a[i+7] += m[7] * mu
  30901. mov rax, r13
  30902. xor r11, r11
  30903. mul QWORD PTR [r9+56]
  30904. mov r14, QWORD PTR [rcx+56]
  30905. add r14, rax
  30906. adc r11, rdx
  30907. add r14, r12
  30908. mov QWORD PTR [rcx+56], r14
  30909. adc r11, 0
  30910. ; a[i+8] += m[8] * mu
  30911. mov rax, r13
  30912. xor r12, r12
  30913. mul QWORD PTR [r9+64]
  30914. mov r14, QWORD PTR [rcx+64]
  30915. add r14, rax
  30916. adc r12, rdx
  30917. add r14, r11
  30918. mov QWORD PTR [rcx+64], r14
  30919. adc r12, 0
  30920. ; a[i+9] += m[9] * mu
  30921. mov rax, r13
  30922. xor r11, r11
  30923. mul QWORD PTR [r9+72]
  30924. mov r14, QWORD PTR [rcx+72]
  30925. add r14, rax
  30926. adc r11, rdx
  30927. add r14, r12
  30928. mov QWORD PTR [rcx+72], r14
  30929. adc r11, 0
  30930. ; a[i+10] += m[10] * mu
  30931. mov rax, r13
  30932. xor r12, r12
  30933. mul QWORD PTR [r9+80]
  30934. mov r14, QWORD PTR [rcx+80]
  30935. add r14, rax
  30936. adc r12, rdx
  30937. add r14, r11
  30938. mov QWORD PTR [rcx+80], r14
  30939. adc r12, 0
  30940. ; a[i+11] += m[11] * mu
  30941. mov rax, r13
  30942. xor r11, r11
  30943. mul QWORD PTR [r9+88]
  30944. mov r14, QWORD PTR [rcx+88]
  30945. add r14, rax
  30946. adc r11, rdx
  30947. add r14, r12
  30948. mov QWORD PTR [rcx+88], r14
  30949. adc r11, 0
  30950. ; a[i+12] += m[12] * mu
  30951. mov rax, r13
  30952. xor r12, r12
  30953. mul QWORD PTR [r9+96]
  30954. mov r14, QWORD PTR [rcx+96]
  30955. add r14, rax
  30956. adc r12, rdx
  30957. add r14, r11
  30958. mov QWORD PTR [rcx+96], r14
  30959. adc r12, 0
  30960. ; a[i+13] += m[13] * mu
  30961. mov rax, r13
  30962. xor r11, r11
  30963. mul QWORD PTR [r9+104]
  30964. mov r14, QWORD PTR [rcx+104]
  30965. add r14, rax
  30966. adc r11, rdx
  30967. add r14, r12
  30968. mov QWORD PTR [rcx+104], r14
  30969. adc r11, 0
  30970. ; a[i+14] += m[14] * mu
  30971. mov rax, r13
  30972. xor r12, r12
  30973. mul QWORD PTR [r9+112]
  30974. mov r14, QWORD PTR [rcx+112]
  30975. add r14, rax
  30976. adc r12, rdx
  30977. add r14, r11
  30978. mov QWORD PTR [rcx+112], r14
  30979. adc r12, 0
  30980. ; a[i+15] += m[15] * mu
  30981. mov rax, r13
  30982. xor r11, r11
  30983. mul QWORD PTR [r9+120]
  30984. mov r14, QWORD PTR [rcx+120]
  30985. add r14, rax
  30986. adc r11, rdx
  30987. add r14, r12
  30988. mov QWORD PTR [rcx+120], r14
  30989. adc r11, 0
  30990. ; a[i+16] += m[16] * mu
  30991. mov rax, r13
  30992. xor r12, r12
  30993. mul QWORD PTR [r9+128]
  30994. mov r14, QWORD PTR [rcx+128]
  30995. add r14, rax
  30996. adc r12, rdx
  30997. add r14, r11
  30998. mov QWORD PTR [rcx+128], r14
  30999. adc r12, 0
  31000. ; a[i+17] += m[17] * mu
  31001. mov rax, r13
  31002. xor r11, r11
  31003. mul QWORD PTR [r9+136]
  31004. mov r14, QWORD PTR [rcx+136]
  31005. add r14, rax
  31006. adc r11, rdx
  31007. add r14, r12
  31008. mov QWORD PTR [rcx+136], r14
  31009. adc r11, 0
  31010. ; a[i+18] += m[18] * mu
  31011. mov rax, r13
  31012. xor r12, r12
  31013. mul QWORD PTR [r9+144]
  31014. mov r14, QWORD PTR [rcx+144]
  31015. add r14, rax
  31016. adc r12, rdx
  31017. add r14, r11
  31018. mov QWORD PTR [rcx+144], r14
  31019. adc r12, 0
  31020. ; a[i+19] += m[19] * mu
  31021. mov rax, r13
  31022. xor r11, r11
  31023. mul QWORD PTR [r9+152]
  31024. mov r14, QWORD PTR [rcx+152]
  31025. add r14, rax
  31026. adc r11, rdx
  31027. add r14, r12
  31028. mov QWORD PTR [rcx+152], r14
  31029. adc r11, 0
  31030. ; a[i+20] += m[20] * mu
  31031. mov rax, r13
  31032. xor r12, r12
  31033. mul QWORD PTR [r9+160]
  31034. mov r14, QWORD PTR [rcx+160]
  31035. add r14, rax
  31036. adc r12, rdx
  31037. add r14, r11
  31038. mov QWORD PTR [rcx+160], r14
  31039. adc r12, 0
  31040. ; a[i+21] += m[21] * mu
  31041. mov rax, r13
  31042. xor r11, r11
  31043. mul QWORD PTR [r9+168]
  31044. mov r14, QWORD PTR [rcx+168]
  31045. add r14, rax
  31046. adc r11, rdx
  31047. add r14, r12
  31048. mov QWORD PTR [rcx+168], r14
  31049. adc r11, 0
  31050. ; a[i+22] += m[22] * mu
  31051. mov rax, r13
  31052. xor r12, r12
  31053. mul QWORD PTR [r9+176]
  31054. mov r14, QWORD PTR [rcx+176]
  31055. add r14, rax
  31056. adc r12, rdx
  31057. add r14, r11
  31058. mov QWORD PTR [rcx+176], r14
  31059. adc r12, 0
  31060. ; a[i+23] += m[23] * mu
  31061. mov rax, r13
  31062. mul QWORD PTR [r9+184]
  31063. mov r14, QWORD PTR [rcx+184]
  31064. add r12, rax
  31065. adc rdx, rsi
  31066. mov rsi, 0
  31067. adc rsi, 0
  31068. add r14, r12
  31069. mov QWORD PTR [rcx+184], r14
  31070. adc QWORD PTR [rcx+192], rdx
  31071. adc rsi, 0
  31072. ; i -= 1
  31073. add rcx, 8
  31074. dec r10
  31075. jnz L_3072_mont_reduce_24_loop
  31076. mov QWORD PTR [rcx], r15
  31077. mov QWORD PTR [rcx+8], rdi
  31078. neg rsi
  31079. IFDEF _WIN64
  31080. mov r8, r9
  31081. mov r9, rsi
  31082. ELSE
  31083. mov r9, rsi
  31084. mov r8, r9
  31085. ENDIF
  31086. mov rdx, rcx
  31087. mov rcx, rcx
  31088. sub rcx, 192
  31089. call sp_3072_cond_sub_24
  31090. pop rsi
  31091. pop rdi
  31092. pop r15
  31093. pop r14
  31094. pop r13
  31095. pop r12
  31096. ret
  31097. sp_3072_mont_reduce_24 ENDP
  31098. _text ENDS
  31099. IFDEF HAVE_INTEL_AVX2
  31100. ; /* Conditionally subtract b from a using the mask m.
  31101. ; * m is -1 to subtract and 0 when not copying.
  31102. ; *
  31103. ; * r A single precision number representing condition subtract result.
  31104. ; * a A single precision number to subtract from.
  31105. ; * b A single precision number to subtract.
  31106. ; * m Mask value to apply.
  31107. ; */
  31108. _text SEGMENT READONLY PARA
  31109. sp_3072_cond_sub_avx2_24 PROC
  31110. push r12
  31111. mov r12, QWORD PTR [r8]
  31112. mov r10, QWORD PTR [rdx]
  31113. pext r12, r12, r9
  31114. sub r10, r12
  31115. mov r12, QWORD PTR [r8+8]
  31116. mov r11, QWORD PTR [rdx+8]
  31117. pext r12, r12, r9
  31118. mov QWORD PTR [rcx], r10
  31119. sbb r11, r12
  31120. mov r10, QWORD PTR [r8+16]
  31121. mov r12, QWORD PTR [rdx+16]
  31122. pext r10, r10, r9
  31123. mov QWORD PTR [rcx+8], r11
  31124. sbb r12, r10
  31125. mov r11, QWORD PTR [r8+24]
  31126. mov r10, QWORD PTR [rdx+24]
  31127. pext r11, r11, r9
  31128. mov QWORD PTR [rcx+16], r12
  31129. sbb r10, r11
  31130. mov r12, QWORD PTR [r8+32]
  31131. mov r11, QWORD PTR [rdx+32]
  31132. pext r12, r12, r9
  31133. mov QWORD PTR [rcx+24], r10
  31134. sbb r11, r12
  31135. mov r10, QWORD PTR [r8+40]
  31136. mov r12, QWORD PTR [rdx+40]
  31137. pext r10, r10, r9
  31138. mov QWORD PTR [rcx+32], r11
  31139. sbb r12, r10
  31140. mov r11, QWORD PTR [r8+48]
  31141. mov r10, QWORD PTR [rdx+48]
  31142. pext r11, r11, r9
  31143. mov QWORD PTR [rcx+40], r12
  31144. sbb r10, r11
  31145. mov r12, QWORD PTR [r8+56]
  31146. mov r11, QWORD PTR [rdx+56]
  31147. pext r12, r12, r9
  31148. mov QWORD PTR [rcx+48], r10
  31149. sbb r11, r12
  31150. mov r10, QWORD PTR [r8+64]
  31151. mov r12, QWORD PTR [rdx+64]
  31152. pext r10, r10, r9
  31153. mov QWORD PTR [rcx+56], r11
  31154. sbb r12, r10
  31155. mov r11, QWORD PTR [r8+72]
  31156. mov r10, QWORD PTR [rdx+72]
  31157. pext r11, r11, r9
  31158. mov QWORD PTR [rcx+64], r12
  31159. sbb r10, r11
  31160. mov r12, QWORD PTR [r8+80]
  31161. mov r11, QWORD PTR [rdx+80]
  31162. pext r12, r12, r9
  31163. mov QWORD PTR [rcx+72], r10
  31164. sbb r11, r12
  31165. mov r10, QWORD PTR [r8+88]
  31166. mov r12, QWORD PTR [rdx+88]
  31167. pext r10, r10, r9
  31168. mov QWORD PTR [rcx+80], r11
  31169. sbb r12, r10
  31170. mov r11, QWORD PTR [r8+96]
  31171. mov r10, QWORD PTR [rdx+96]
  31172. pext r11, r11, r9
  31173. mov QWORD PTR [rcx+88], r12
  31174. sbb r10, r11
  31175. mov r12, QWORD PTR [r8+104]
  31176. mov r11, QWORD PTR [rdx+104]
  31177. pext r12, r12, r9
  31178. mov QWORD PTR [rcx+96], r10
  31179. sbb r11, r12
  31180. mov r10, QWORD PTR [r8+112]
  31181. mov r12, QWORD PTR [rdx+112]
  31182. pext r10, r10, r9
  31183. mov QWORD PTR [rcx+104], r11
  31184. sbb r12, r10
  31185. mov r11, QWORD PTR [r8+120]
  31186. mov r10, QWORD PTR [rdx+120]
  31187. pext r11, r11, r9
  31188. mov QWORD PTR [rcx+112], r12
  31189. sbb r10, r11
  31190. mov r12, QWORD PTR [r8+128]
  31191. mov r11, QWORD PTR [rdx+128]
  31192. pext r12, r12, r9
  31193. mov QWORD PTR [rcx+120], r10
  31194. sbb r11, r12
  31195. mov r10, QWORD PTR [r8+136]
  31196. mov r12, QWORD PTR [rdx+136]
  31197. pext r10, r10, r9
  31198. mov QWORD PTR [rcx+128], r11
  31199. sbb r12, r10
  31200. mov r11, QWORD PTR [r8+144]
  31201. mov r10, QWORD PTR [rdx+144]
  31202. pext r11, r11, r9
  31203. mov QWORD PTR [rcx+136], r12
  31204. sbb r10, r11
  31205. mov r12, QWORD PTR [r8+152]
  31206. mov r11, QWORD PTR [rdx+152]
  31207. pext r12, r12, r9
  31208. mov QWORD PTR [rcx+144], r10
  31209. sbb r11, r12
  31210. mov r10, QWORD PTR [r8+160]
  31211. mov r12, QWORD PTR [rdx+160]
  31212. pext r10, r10, r9
  31213. mov QWORD PTR [rcx+152], r11
  31214. sbb r12, r10
  31215. mov r11, QWORD PTR [r8+168]
  31216. mov r10, QWORD PTR [rdx+168]
  31217. pext r11, r11, r9
  31218. mov QWORD PTR [rcx+160], r12
  31219. sbb r10, r11
  31220. mov r12, QWORD PTR [r8+176]
  31221. mov r11, QWORD PTR [rdx+176]
  31222. pext r12, r12, r9
  31223. mov QWORD PTR [rcx+168], r10
  31224. sbb r11, r12
  31225. mov r10, QWORD PTR [r8+184]
  31226. mov r12, QWORD PTR [rdx+184]
  31227. pext r10, r10, r9
  31228. mov QWORD PTR [rcx+176], r11
  31229. sbb r12, r10
  31230. mov QWORD PTR [rcx+184], r12
  31231. sbb rax, rax
  31232. pop r12
  31233. ret
  31234. sp_3072_cond_sub_avx2_24 ENDP
  31235. _text ENDS
  31236. ENDIF
  31237. ; /* Mul a by digit b into r. (r = a * b)
  31238. ; *
  31239. ; * r A single precision integer.
  31240. ; * a A single precision integer.
  31241. ; * b A single precision digit.
  31242. ; */
  31243. _text SEGMENT READONLY PARA
  31244. sp_3072_mul_d_24 PROC
  31245. push r12
  31246. mov r9, rdx
  31247. ; A[0] * B
  31248. mov rax, r8
  31249. xor r12, r12
  31250. mul QWORD PTR [r9]
  31251. mov r10, rax
  31252. mov r11, rdx
  31253. mov QWORD PTR [rcx], r10
  31254. ; A[1] * B
  31255. mov rax, r8
  31256. xor r10, r10
  31257. mul QWORD PTR [r9+8]
  31258. add r11, rax
  31259. mov QWORD PTR [rcx+8], r11
  31260. adc r12, rdx
  31261. adc r10, 0
  31262. ; A[2] * B
  31263. mov rax, r8
  31264. xor r11, r11
  31265. mul QWORD PTR [r9+16]
  31266. add r12, rax
  31267. mov QWORD PTR [rcx+16], r12
  31268. adc r10, rdx
  31269. adc r11, 0
  31270. ; A[3] * B
  31271. mov rax, r8
  31272. xor r12, r12
  31273. mul QWORD PTR [r9+24]
  31274. add r10, rax
  31275. mov QWORD PTR [rcx+24], r10
  31276. adc r11, rdx
  31277. adc r12, 0
  31278. ; A[4] * B
  31279. mov rax, r8
  31280. xor r10, r10
  31281. mul QWORD PTR [r9+32]
  31282. add r11, rax
  31283. mov QWORD PTR [rcx+32], r11
  31284. adc r12, rdx
  31285. adc r10, 0
  31286. ; A[5] * B
  31287. mov rax, r8
  31288. xor r11, r11
  31289. mul QWORD PTR [r9+40]
  31290. add r12, rax
  31291. mov QWORD PTR [rcx+40], r12
  31292. adc r10, rdx
  31293. adc r11, 0
  31294. ; A[6] * B
  31295. mov rax, r8
  31296. xor r12, r12
  31297. mul QWORD PTR [r9+48]
  31298. add r10, rax
  31299. mov QWORD PTR [rcx+48], r10
  31300. adc r11, rdx
  31301. adc r12, 0
  31302. ; A[7] * B
  31303. mov rax, r8
  31304. xor r10, r10
  31305. mul QWORD PTR [r9+56]
  31306. add r11, rax
  31307. mov QWORD PTR [rcx+56], r11
  31308. adc r12, rdx
  31309. adc r10, 0
  31310. ; A[8] * B
  31311. mov rax, r8
  31312. xor r11, r11
  31313. mul QWORD PTR [r9+64]
  31314. add r12, rax
  31315. mov QWORD PTR [rcx+64], r12
  31316. adc r10, rdx
  31317. adc r11, 0
  31318. ; A[9] * B
  31319. mov rax, r8
  31320. xor r12, r12
  31321. mul QWORD PTR [r9+72]
  31322. add r10, rax
  31323. mov QWORD PTR [rcx+72], r10
  31324. adc r11, rdx
  31325. adc r12, 0
  31326. ; A[10] * B
  31327. mov rax, r8
  31328. xor r10, r10
  31329. mul QWORD PTR [r9+80]
  31330. add r11, rax
  31331. mov QWORD PTR [rcx+80], r11
  31332. adc r12, rdx
  31333. adc r10, 0
  31334. ; A[11] * B
  31335. mov rax, r8
  31336. xor r11, r11
  31337. mul QWORD PTR [r9+88]
  31338. add r12, rax
  31339. mov QWORD PTR [rcx+88], r12
  31340. adc r10, rdx
  31341. adc r11, 0
  31342. ; A[12] * B
  31343. mov rax, r8
  31344. xor r12, r12
  31345. mul QWORD PTR [r9+96]
  31346. add r10, rax
  31347. mov QWORD PTR [rcx+96], r10
  31348. adc r11, rdx
  31349. adc r12, 0
  31350. ; A[13] * B
  31351. mov rax, r8
  31352. xor r10, r10
  31353. mul QWORD PTR [r9+104]
  31354. add r11, rax
  31355. mov QWORD PTR [rcx+104], r11
  31356. adc r12, rdx
  31357. adc r10, 0
  31358. ; A[14] * B
  31359. mov rax, r8
  31360. xor r11, r11
  31361. mul QWORD PTR [r9+112]
  31362. add r12, rax
  31363. mov QWORD PTR [rcx+112], r12
  31364. adc r10, rdx
  31365. adc r11, 0
  31366. ; A[15] * B
  31367. mov rax, r8
  31368. xor r12, r12
  31369. mul QWORD PTR [r9+120]
  31370. add r10, rax
  31371. mov QWORD PTR [rcx+120], r10
  31372. adc r11, rdx
  31373. adc r12, 0
  31374. ; A[16] * B
  31375. mov rax, r8
  31376. xor r10, r10
  31377. mul QWORD PTR [r9+128]
  31378. add r11, rax
  31379. mov QWORD PTR [rcx+128], r11
  31380. adc r12, rdx
  31381. adc r10, 0
  31382. ; A[17] * B
  31383. mov rax, r8
  31384. xor r11, r11
  31385. mul QWORD PTR [r9+136]
  31386. add r12, rax
  31387. mov QWORD PTR [rcx+136], r12
  31388. adc r10, rdx
  31389. adc r11, 0
  31390. ; A[18] * B
  31391. mov rax, r8
  31392. xor r12, r12
  31393. mul QWORD PTR [r9+144]
  31394. add r10, rax
  31395. mov QWORD PTR [rcx+144], r10
  31396. adc r11, rdx
  31397. adc r12, 0
  31398. ; A[19] * B
  31399. mov rax, r8
  31400. xor r10, r10
  31401. mul QWORD PTR [r9+152]
  31402. add r11, rax
  31403. mov QWORD PTR [rcx+152], r11
  31404. adc r12, rdx
  31405. adc r10, 0
  31406. ; A[20] * B
  31407. mov rax, r8
  31408. xor r11, r11
  31409. mul QWORD PTR [r9+160]
  31410. add r12, rax
  31411. mov QWORD PTR [rcx+160], r12
  31412. adc r10, rdx
  31413. adc r11, 0
  31414. ; A[21] * B
  31415. mov rax, r8
  31416. xor r12, r12
  31417. mul QWORD PTR [r9+168]
  31418. add r10, rax
  31419. mov QWORD PTR [rcx+168], r10
  31420. adc r11, rdx
  31421. adc r12, 0
  31422. ; A[22] * B
  31423. mov rax, r8
  31424. xor r10, r10
  31425. mul QWORD PTR [r9+176]
  31426. add r11, rax
  31427. mov QWORD PTR [rcx+176], r11
  31428. adc r12, rdx
  31429. adc r10, 0
  31430. ; A[23] * B
  31431. mov rax, r8
  31432. mul QWORD PTR [r9+184]
  31433. add r12, rax
  31434. adc r10, rdx
  31435. mov QWORD PTR [rcx+184], r12
  31436. mov QWORD PTR [rcx+192], r10
  31437. pop r12
  31438. ret
  31439. sp_3072_mul_d_24 ENDP
  31440. _text ENDS
  31441. IFDEF HAVE_INTEL_AVX2
  31442. ; /* Mul a by digit b into r. (r = a * b)
  31443. ; *
  31444. ; * r A single precision integer.
  31445. ; * a A single precision integer.
  31446. ; * b A single precision digit.
  31447. ; */
  31448. _text SEGMENT READONLY PARA
  31449. sp_3072_mul_d_avx2_24 PROC
  31450. push r12
  31451. push r13
  31452. mov rax, rdx
  31453. ; A[0] * B
  31454. mov rdx, r8
  31455. xor r13, r13
  31456. mulx r12, r11, QWORD PTR [rax]
  31457. mov QWORD PTR [rcx], r11
  31458. ; A[1] * B
  31459. mulx r10, r9, QWORD PTR [rax+8]
  31460. mov r11, r13
  31461. adcx r12, r9
  31462. adox r11, r10
  31463. mov QWORD PTR [rcx+8], r12
  31464. ; A[2] * B
  31465. mulx r10, r9, QWORD PTR [rax+16]
  31466. mov r12, r13
  31467. adcx r11, r9
  31468. adox r12, r10
  31469. mov QWORD PTR [rcx+16], r11
  31470. ; A[3] * B
  31471. mulx r10, r9, QWORD PTR [rax+24]
  31472. mov r11, r13
  31473. adcx r12, r9
  31474. adox r11, r10
  31475. mov QWORD PTR [rcx+24], r12
  31476. ; A[4] * B
  31477. mulx r10, r9, QWORD PTR [rax+32]
  31478. mov r12, r13
  31479. adcx r11, r9
  31480. adox r12, r10
  31481. mov QWORD PTR [rcx+32], r11
  31482. ; A[5] * B
  31483. mulx r10, r9, QWORD PTR [rax+40]
  31484. mov r11, r13
  31485. adcx r12, r9
  31486. adox r11, r10
  31487. mov QWORD PTR [rcx+40], r12
  31488. ; A[6] * B
  31489. mulx r10, r9, QWORD PTR [rax+48]
  31490. mov r12, r13
  31491. adcx r11, r9
  31492. adox r12, r10
  31493. mov QWORD PTR [rcx+48], r11
  31494. ; A[7] * B
  31495. mulx r10, r9, QWORD PTR [rax+56]
  31496. mov r11, r13
  31497. adcx r12, r9
  31498. adox r11, r10
  31499. mov QWORD PTR [rcx+56], r12
  31500. ; A[8] * B
  31501. mulx r10, r9, QWORD PTR [rax+64]
  31502. mov r12, r13
  31503. adcx r11, r9
  31504. adox r12, r10
  31505. mov QWORD PTR [rcx+64], r11
  31506. ; A[9] * B
  31507. mulx r10, r9, QWORD PTR [rax+72]
  31508. mov r11, r13
  31509. adcx r12, r9
  31510. adox r11, r10
  31511. mov QWORD PTR [rcx+72], r12
  31512. ; A[10] * B
  31513. mulx r10, r9, QWORD PTR [rax+80]
  31514. mov r12, r13
  31515. adcx r11, r9
  31516. adox r12, r10
  31517. mov QWORD PTR [rcx+80], r11
  31518. ; A[11] * B
  31519. mulx r10, r9, QWORD PTR [rax+88]
  31520. mov r11, r13
  31521. adcx r12, r9
  31522. adox r11, r10
  31523. mov QWORD PTR [rcx+88], r12
  31524. ; A[12] * B
  31525. mulx r10, r9, QWORD PTR [rax+96]
  31526. mov r12, r13
  31527. adcx r11, r9
  31528. adox r12, r10
  31529. mov QWORD PTR [rcx+96], r11
  31530. ; A[13] * B
  31531. mulx r10, r9, QWORD PTR [rax+104]
  31532. mov r11, r13
  31533. adcx r12, r9
  31534. adox r11, r10
  31535. mov QWORD PTR [rcx+104], r12
  31536. ; A[14] * B
  31537. mulx r10, r9, QWORD PTR [rax+112]
  31538. mov r12, r13
  31539. adcx r11, r9
  31540. adox r12, r10
  31541. mov QWORD PTR [rcx+112], r11
  31542. ; A[15] * B
  31543. mulx r10, r9, QWORD PTR [rax+120]
  31544. mov r11, r13
  31545. adcx r12, r9
  31546. adox r11, r10
  31547. mov QWORD PTR [rcx+120], r12
  31548. ; A[16] * B
  31549. mulx r10, r9, QWORD PTR [rax+128]
  31550. mov r12, r13
  31551. adcx r11, r9
  31552. adox r12, r10
  31553. mov QWORD PTR [rcx+128], r11
  31554. ; A[17] * B
  31555. mulx r10, r9, QWORD PTR [rax+136]
  31556. mov r11, r13
  31557. adcx r12, r9
  31558. adox r11, r10
  31559. mov QWORD PTR [rcx+136], r12
  31560. ; A[18] * B
  31561. mulx r10, r9, QWORD PTR [rax+144]
  31562. mov r12, r13
  31563. adcx r11, r9
  31564. adox r12, r10
  31565. mov QWORD PTR [rcx+144], r11
  31566. ; A[19] * B
  31567. mulx r10, r9, QWORD PTR [rax+152]
  31568. mov r11, r13
  31569. adcx r12, r9
  31570. adox r11, r10
  31571. mov QWORD PTR [rcx+152], r12
  31572. ; A[20] * B
  31573. mulx r10, r9, QWORD PTR [rax+160]
  31574. mov r12, r13
  31575. adcx r11, r9
  31576. adox r12, r10
  31577. mov QWORD PTR [rcx+160], r11
  31578. ; A[21] * B
  31579. mulx r10, r9, QWORD PTR [rax+168]
  31580. mov r11, r13
  31581. adcx r12, r9
  31582. adox r11, r10
  31583. mov QWORD PTR [rcx+168], r12
  31584. ; A[22] * B
  31585. mulx r10, r9, QWORD PTR [rax+176]
  31586. mov r12, r13
  31587. adcx r11, r9
  31588. adox r12, r10
  31589. mov QWORD PTR [rcx+176], r11
  31590. ; A[23] * B
  31591. mulx r10, r9, QWORD PTR [rax+184]
  31592. mov r11, r13
  31593. adcx r12, r9
  31594. adox r11, r10
  31595. adcx r11, r13
  31596. mov QWORD PTR [rcx+184], r12
  31597. mov QWORD PTR [rcx+192], r11
  31598. pop r13
  31599. pop r12
  31600. ret
  31601. sp_3072_mul_d_avx2_24 ENDP
  31602. _text ENDS
  31603. ENDIF
  31604. IFDEF _WIN64
  31605. ; /* Divide the double width number (d1|d0) by the dividend. (d1|d0 / div)
  31606. ; *
  31607. ; * d1 The high order half of the number to divide.
  31608. ; * d0 The low order half of the number to divide.
  31609. ; * div The dividend.
  31610. ; * returns the result of the division.
  31611. ; */
  31612. _text SEGMENT READONLY PARA
  31613. div_3072_word_asm_24 PROC
  31614. mov r9, rdx
  31615. mov rax, r9
  31616. mov rdx, rcx
  31617. div r8
  31618. ret
  31619. div_3072_word_asm_24 ENDP
  31620. _text ENDS
  31621. ENDIF
  31622. ; /* Compare a with b in constant time.
  31623. ; *
  31624. ; * a A single precision integer.
  31625. ; * b A single precision integer.
  31626. ; * return -ve, 0 or +ve if a is less than, equal to or greater than b
  31627. ; * respectively.
  31628. ; */
  31629. _text SEGMENT READONLY PARA
  31630. sp_3072_cmp_24 PROC
  31631. push r12
  31632. xor r9, r9
  31633. mov r8, -1
  31634. mov rax, -1
  31635. mov r10, 1
  31636. mov r11, QWORD PTR [rcx+184]
  31637. mov r12, QWORD PTR [rdx+184]
  31638. and r11, r8
  31639. and r12, r8
  31640. sub r11, r12
  31641. cmova rax, r10
  31642. cmovc rax, r8
  31643. cmovnz r8, r9
  31644. mov r11, QWORD PTR [rcx+176]
  31645. mov r12, QWORD PTR [rdx+176]
  31646. and r11, r8
  31647. and r12, r8
  31648. sub r11, r12
  31649. cmova rax, r10
  31650. cmovc rax, r8
  31651. cmovnz r8, r9
  31652. mov r11, QWORD PTR [rcx+168]
  31653. mov r12, QWORD PTR [rdx+168]
  31654. and r11, r8
  31655. and r12, r8
  31656. sub r11, r12
  31657. cmova rax, r10
  31658. cmovc rax, r8
  31659. cmovnz r8, r9
  31660. mov r11, QWORD PTR [rcx+160]
  31661. mov r12, QWORD PTR [rdx+160]
  31662. and r11, r8
  31663. and r12, r8
  31664. sub r11, r12
  31665. cmova rax, r10
  31666. cmovc rax, r8
  31667. cmovnz r8, r9
  31668. mov r11, QWORD PTR [rcx+152]
  31669. mov r12, QWORD PTR [rdx+152]
  31670. and r11, r8
  31671. and r12, r8
  31672. sub r11, r12
  31673. cmova rax, r10
  31674. cmovc rax, r8
  31675. cmovnz r8, r9
  31676. mov r11, QWORD PTR [rcx+144]
  31677. mov r12, QWORD PTR [rdx+144]
  31678. and r11, r8
  31679. and r12, r8
  31680. sub r11, r12
  31681. cmova rax, r10
  31682. cmovc rax, r8
  31683. cmovnz r8, r9
  31684. mov r11, QWORD PTR [rcx+136]
  31685. mov r12, QWORD PTR [rdx+136]
  31686. and r11, r8
  31687. and r12, r8
  31688. sub r11, r12
  31689. cmova rax, r10
  31690. cmovc rax, r8
  31691. cmovnz r8, r9
  31692. mov r11, QWORD PTR [rcx+128]
  31693. mov r12, QWORD PTR [rdx+128]
  31694. and r11, r8
  31695. and r12, r8
  31696. sub r11, r12
  31697. cmova rax, r10
  31698. cmovc rax, r8
  31699. cmovnz r8, r9
  31700. mov r11, QWORD PTR [rcx+120]
  31701. mov r12, QWORD PTR [rdx+120]
  31702. and r11, r8
  31703. and r12, r8
  31704. sub r11, r12
  31705. cmova rax, r10
  31706. cmovc rax, r8
  31707. cmovnz r8, r9
  31708. mov r11, QWORD PTR [rcx+112]
  31709. mov r12, QWORD PTR [rdx+112]
  31710. and r11, r8
  31711. and r12, r8
  31712. sub r11, r12
  31713. cmova rax, r10
  31714. cmovc rax, r8
  31715. cmovnz r8, r9
  31716. mov r11, QWORD PTR [rcx+104]
  31717. mov r12, QWORD PTR [rdx+104]
  31718. and r11, r8
  31719. and r12, r8
  31720. sub r11, r12
  31721. cmova rax, r10
  31722. cmovc rax, r8
  31723. cmovnz r8, r9
  31724. mov r11, QWORD PTR [rcx+96]
  31725. mov r12, QWORD PTR [rdx+96]
  31726. and r11, r8
  31727. and r12, r8
  31728. sub r11, r12
  31729. cmova rax, r10
  31730. cmovc rax, r8
  31731. cmovnz r8, r9
  31732. mov r11, QWORD PTR [rcx+88]
  31733. mov r12, QWORD PTR [rdx+88]
  31734. and r11, r8
  31735. and r12, r8
  31736. sub r11, r12
  31737. cmova rax, r10
  31738. cmovc rax, r8
  31739. cmovnz r8, r9
  31740. mov r11, QWORD PTR [rcx+80]
  31741. mov r12, QWORD PTR [rdx+80]
  31742. and r11, r8
  31743. and r12, r8
  31744. sub r11, r12
  31745. cmova rax, r10
  31746. cmovc rax, r8
  31747. cmovnz r8, r9
  31748. mov r11, QWORD PTR [rcx+72]
  31749. mov r12, QWORD PTR [rdx+72]
  31750. and r11, r8
  31751. and r12, r8
  31752. sub r11, r12
  31753. cmova rax, r10
  31754. cmovc rax, r8
  31755. cmovnz r8, r9
  31756. mov r11, QWORD PTR [rcx+64]
  31757. mov r12, QWORD PTR [rdx+64]
  31758. and r11, r8
  31759. and r12, r8
  31760. sub r11, r12
  31761. cmova rax, r10
  31762. cmovc rax, r8
  31763. cmovnz r8, r9
  31764. mov r11, QWORD PTR [rcx+56]
  31765. mov r12, QWORD PTR [rdx+56]
  31766. and r11, r8
  31767. and r12, r8
  31768. sub r11, r12
  31769. cmova rax, r10
  31770. cmovc rax, r8
  31771. cmovnz r8, r9
  31772. mov r11, QWORD PTR [rcx+48]
  31773. mov r12, QWORD PTR [rdx+48]
  31774. and r11, r8
  31775. and r12, r8
  31776. sub r11, r12
  31777. cmova rax, r10
  31778. cmovc rax, r8
  31779. cmovnz r8, r9
  31780. mov r11, QWORD PTR [rcx+40]
  31781. mov r12, QWORD PTR [rdx+40]
  31782. and r11, r8
  31783. and r12, r8
  31784. sub r11, r12
  31785. cmova rax, r10
  31786. cmovc rax, r8
  31787. cmovnz r8, r9
  31788. mov r11, QWORD PTR [rcx+32]
  31789. mov r12, QWORD PTR [rdx+32]
  31790. and r11, r8
  31791. and r12, r8
  31792. sub r11, r12
  31793. cmova rax, r10
  31794. cmovc rax, r8
  31795. cmovnz r8, r9
  31796. mov r11, QWORD PTR [rcx+24]
  31797. mov r12, QWORD PTR [rdx+24]
  31798. and r11, r8
  31799. and r12, r8
  31800. sub r11, r12
  31801. cmova rax, r10
  31802. cmovc rax, r8
  31803. cmovnz r8, r9
  31804. mov r11, QWORD PTR [rcx+16]
  31805. mov r12, QWORD PTR [rdx+16]
  31806. and r11, r8
  31807. and r12, r8
  31808. sub r11, r12
  31809. cmova rax, r10
  31810. cmovc rax, r8
  31811. cmovnz r8, r9
  31812. mov r11, QWORD PTR [rcx+8]
  31813. mov r12, QWORD PTR [rdx+8]
  31814. and r11, r8
  31815. and r12, r8
  31816. sub r11, r12
  31817. cmova rax, r10
  31818. cmovc rax, r8
  31819. cmovnz r8, r9
  31820. mov r11, QWORD PTR [rcx]
  31821. mov r12, QWORD PTR [rdx]
  31822. and r11, r8
  31823. and r12, r8
  31824. sub r11, r12
  31825. cmova rax, r10
  31826. cmovc rax, r8
  31827. cmovnz r8, r9
  31828. xor rax, r8
  31829. pop r12
  31830. ret
  31831. sp_3072_cmp_24 ENDP
  31832. _text ENDS
  31833. IFNDEF WC_NO_CACHE_RESISTANT
  31834. _text SEGMENT READONLY PARA
  31835. sp_3072_get_from_table_24 PROC
  31836. sub rsp, 128
  31837. vmovdqu OWORD PTR [rsp], xmm6
  31838. vmovdqu OWORD PTR [rsp+16], xmm7
  31839. vmovdqu OWORD PTR [rsp+32], xmm8
  31840. vmovdqu OWORD PTR [rsp+48], xmm9
  31841. vmovdqu OWORD PTR [rsp+64], xmm10
  31842. vmovdqu OWORD PTR [rsp+80], xmm11
  31843. vmovdqu OWORD PTR [rsp+96], xmm12
  31844. vmovdqu OWORD PTR [rsp+112], xmm13
  31845. mov rax, 1
  31846. movd xmm10, r8
  31847. movd xmm11, rax
  31848. pxor xmm13, xmm13
  31849. pshufd xmm11, xmm11, 0
  31850. pshufd xmm10, xmm10, 0
  31851. ; START: 0-7
  31852. pxor xmm13, xmm13
  31853. pxor xmm4, xmm4
  31854. pxor xmm5, xmm5
  31855. pxor xmm6, xmm6
  31856. pxor xmm7, xmm7
  31857. ; ENTRY: 0
  31858. mov r9, QWORD PTR [rdx]
  31859. movdqu xmm12, xmm13
  31860. pcmpeqd xmm12, xmm10
  31861. movdqu xmm0, [r9]
  31862. movdqu xmm1, [r9+16]
  31863. movdqu xmm2, [r9+32]
  31864. movdqu xmm3, [r9+48]
  31865. pand xmm0, xmm12
  31866. pand xmm1, xmm12
  31867. pand xmm2, xmm12
  31868. pand xmm3, xmm12
  31869. por xmm4, xmm0
  31870. por xmm5, xmm1
  31871. por xmm6, xmm2
  31872. por xmm7, xmm3
  31873. paddd xmm13, xmm11
  31874. ; ENTRY: 1
  31875. mov r9, QWORD PTR [rdx+8]
  31876. movdqu xmm12, xmm13
  31877. pcmpeqd xmm12, xmm10
  31878. movdqu xmm0, [r9]
  31879. movdqu xmm1, [r9+16]
  31880. movdqu xmm2, [r9+32]
  31881. movdqu xmm3, [r9+48]
  31882. pand xmm0, xmm12
  31883. pand xmm1, xmm12
  31884. pand xmm2, xmm12
  31885. pand xmm3, xmm12
  31886. por xmm4, xmm0
  31887. por xmm5, xmm1
  31888. por xmm6, xmm2
  31889. por xmm7, xmm3
  31890. paddd xmm13, xmm11
  31891. ; ENTRY: 2
  31892. mov r9, QWORD PTR [rdx+16]
  31893. movdqu xmm12, xmm13
  31894. pcmpeqd xmm12, xmm10
  31895. movdqu xmm0, [r9]
  31896. movdqu xmm1, [r9+16]
  31897. movdqu xmm2, [r9+32]
  31898. movdqu xmm3, [r9+48]
  31899. pand xmm0, xmm12
  31900. pand xmm1, xmm12
  31901. pand xmm2, xmm12
  31902. pand xmm3, xmm12
  31903. por xmm4, xmm0
  31904. por xmm5, xmm1
  31905. por xmm6, xmm2
  31906. por xmm7, xmm3
  31907. paddd xmm13, xmm11
  31908. ; ENTRY: 3
  31909. mov r9, QWORD PTR [rdx+24]
  31910. movdqu xmm12, xmm13
  31911. pcmpeqd xmm12, xmm10
  31912. movdqu xmm0, [r9]
  31913. movdqu xmm1, [r9+16]
  31914. movdqu xmm2, [r9+32]
  31915. movdqu xmm3, [r9+48]
  31916. pand xmm0, xmm12
  31917. pand xmm1, xmm12
  31918. pand xmm2, xmm12
  31919. pand xmm3, xmm12
  31920. por xmm4, xmm0
  31921. por xmm5, xmm1
  31922. por xmm6, xmm2
  31923. por xmm7, xmm3
  31924. paddd xmm13, xmm11
  31925. ; ENTRY: 4
  31926. mov r9, QWORD PTR [rdx+32]
  31927. movdqu xmm12, xmm13
  31928. pcmpeqd xmm12, xmm10
  31929. movdqu xmm0, [r9]
  31930. movdqu xmm1, [r9+16]
  31931. movdqu xmm2, [r9+32]
  31932. movdqu xmm3, [r9+48]
  31933. pand xmm0, xmm12
  31934. pand xmm1, xmm12
  31935. pand xmm2, xmm12
  31936. pand xmm3, xmm12
  31937. por xmm4, xmm0
  31938. por xmm5, xmm1
  31939. por xmm6, xmm2
  31940. por xmm7, xmm3
  31941. paddd xmm13, xmm11
  31942. ; ENTRY: 5
  31943. mov r9, QWORD PTR [rdx+40]
  31944. movdqu xmm12, xmm13
  31945. pcmpeqd xmm12, xmm10
  31946. movdqu xmm0, [r9]
  31947. movdqu xmm1, [r9+16]
  31948. movdqu xmm2, [r9+32]
  31949. movdqu xmm3, [r9+48]
  31950. pand xmm0, xmm12
  31951. pand xmm1, xmm12
  31952. pand xmm2, xmm12
  31953. pand xmm3, xmm12
  31954. por xmm4, xmm0
  31955. por xmm5, xmm1
  31956. por xmm6, xmm2
  31957. por xmm7, xmm3
  31958. paddd xmm13, xmm11
  31959. ; ENTRY: 6
  31960. mov r9, QWORD PTR [rdx+48]
  31961. movdqu xmm12, xmm13
  31962. pcmpeqd xmm12, xmm10
  31963. movdqu xmm0, [r9]
  31964. movdqu xmm1, [r9+16]
  31965. movdqu xmm2, [r9+32]
  31966. movdqu xmm3, [r9+48]
  31967. pand xmm0, xmm12
  31968. pand xmm1, xmm12
  31969. pand xmm2, xmm12
  31970. pand xmm3, xmm12
  31971. por xmm4, xmm0
  31972. por xmm5, xmm1
  31973. por xmm6, xmm2
  31974. por xmm7, xmm3
  31975. paddd xmm13, xmm11
  31976. ; ENTRY: 7
  31977. mov r9, QWORD PTR [rdx+56]
  31978. movdqu xmm12, xmm13
  31979. pcmpeqd xmm12, xmm10
  31980. movdqu xmm0, [r9]
  31981. movdqu xmm1, [r9+16]
  31982. movdqu xmm2, [r9+32]
  31983. movdqu xmm3, [r9+48]
  31984. pand xmm0, xmm12
  31985. pand xmm1, xmm12
  31986. pand xmm2, xmm12
  31987. pand xmm3, xmm12
  31988. por xmm4, xmm0
  31989. por xmm5, xmm1
  31990. por xmm6, xmm2
  31991. por xmm7, xmm3
  31992. paddd xmm13, xmm11
  31993. ; ENTRY: 8
  31994. mov r9, QWORD PTR [rdx+64]
  31995. movdqu xmm12, xmm13
  31996. pcmpeqd xmm12, xmm10
  31997. movdqu xmm0, [r9]
  31998. movdqu xmm1, [r9+16]
  31999. movdqu xmm2, [r9+32]
  32000. movdqu xmm3, [r9+48]
  32001. pand xmm0, xmm12
  32002. pand xmm1, xmm12
  32003. pand xmm2, xmm12
  32004. pand xmm3, xmm12
  32005. por xmm4, xmm0
  32006. por xmm5, xmm1
  32007. por xmm6, xmm2
  32008. por xmm7, xmm3
  32009. paddd xmm13, xmm11
  32010. ; ENTRY: 9
  32011. mov r9, QWORD PTR [rdx+72]
  32012. movdqu xmm12, xmm13
  32013. pcmpeqd xmm12, xmm10
  32014. movdqu xmm0, [r9]
  32015. movdqu xmm1, [r9+16]
  32016. movdqu xmm2, [r9+32]
  32017. movdqu xmm3, [r9+48]
  32018. pand xmm0, xmm12
  32019. pand xmm1, xmm12
  32020. pand xmm2, xmm12
  32021. pand xmm3, xmm12
  32022. por xmm4, xmm0
  32023. por xmm5, xmm1
  32024. por xmm6, xmm2
  32025. por xmm7, xmm3
  32026. paddd xmm13, xmm11
  32027. ; ENTRY: 10
  32028. mov r9, QWORD PTR [rdx+80]
  32029. movdqu xmm12, xmm13
  32030. pcmpeqd xmm12, xmm10
  32031. movdqu xmm0, [r9]
  32032. movdqu xmm1, [r9+16]
  32033. movdqu xmm2, [r9+32]
  32034. movdqu xmm3, [r9+48]
  32035. pand xmm0, xmm12
  32036. pand xmm1, xmm12
  32037. pand xmm2, xmm12
  32038. pand xmm3, xmm12
  32039. por xmm4, xmm0
  32040. por xmm5, xmm1
  32041. por xmm6, xmm2
  32042. por xmm7, xmm3
  32043. paddd xmm13, xmm11
  32044. ; ENTRY: 11
  32045. mov r9, QWORD PTR [rdx+88]
  32046. movdqu xmm12, xmm13
  32047. pcmpeqd xmm12, xmm10
  32048. movdqu xmm0, [r9]
  32049. movdqu xmm1, [r9+16]
  32050. movdqu xmm2, [r9+32]
  32051. movdqu xmm3, [r9+48]
  32052. pand xmm0, xmm12
  32053. pand xmm1, xmm12
  32054. pand xmm2, xmm12
  32055. pand xmm3, xmm12
  32056. por xmm4, xmm0
  32057. por xmm5, xmm1
  32058. por xmm6, xmm2
  32059. por xmm7, xmm3
  32060. paddd xmm13, xmm11
  32061. ; ENTRY: 12
  32062. mov r9, QWORD PTR [rdx+96]
  32063. movdqu xmm12, xmm13
  32064. pcmpeqd xmm12, xmm10
  32065. movdqu xmm0, [r9]
  32066. movdqu xmm1, [r9+16]
  32067. movdqu xmm2, [r9+32]
  32068. movdqu xmm3, [r9+48]
  32069. pand xmm0, xmm12
  32070. pand xmm1, xmm12
  32071. pand xmm2, xmm12
  32072. pand xmm3, xmm12
  32073. por xmm4, xmm0
  32074. por xmm5, xmm1
  32075. por xmm6, xmm2
  32076. por xmm7, xmm3
  32077. paddd xmm13, xmm11
  32078. ; ENTRY: 13
  32079. mov r9, QWORD PTR [rdx+104]
  32080. movdqu xmm12, xmm13
  32081. pcmpeqd xmm12, xmm10
  32082. movdqu xmm0, [r9]
  32083. movdqu xmm1, [r9+16]
  32084. movdqu xmm2, [r9+32]
  32085. movdqu xmm3, [r9+48]
  32086. pand xmm0, xmm12
  32087. pand xmm1, xmm12
  32088. pand xmm2, xmm12
  32089. pand xmm3, xmm12
  32090. por xmm4, xmm0
  32091. por xmm5, xmm1
  32092. por xmm6, xmm2
  32093. por xmm7, xmm3
  32094. paddd xmm13, xmm11
  32095. ; ENTRY: 14
  32096. mov r9, QWORD PTR [rdx+112]
  32097. movdqu xmm12, xmm13
  32098. pcmpeqd xmm12, xmm10
  32099. movdqu xmm0, [r9]
  32100. movdqu xmm1, [r9+16]
  32101. movdqu xmm2, [r9+32]
  32102. movdqu xmm3, [r9+48]
  32103. pand xmm0, xmm12
  32104. pand xmm1, xmm12
  32105. pand xmm2, xmm12
  32106. pand xmm3, xmm12
  32107. por xmm4, xmm0
  32108. por xmm5, xmm1
  32109. por xmm6, xmm2
  32110. por xmm7, xmm3
  32111. paddd xmm13, xmm11
  32112. ; ENTRY: 15
  32113. mov r9, QWORD PTR [rdx+120]
  32114. movdqu xmm12, xmm13
  32115. pcmpeqd xmm12, xmm10
  32116. movdqu xmm0, [r9]
  32117. movdqu xmm1, [r9+16]
  32118. movdqu xmm2, [r9+32]
  32119. movdqu xmm3, [r9+48]
  32120. pand xmm0, xmm12
  32121. pand xmm1, xmm12
  32122. pand xmm2, xmm12
  32123. pand xmm3, xmm12
  32124. por xmm4, xmm0
  32125. por xmm5, xmm1
  32126. por xmm6, xmm2
  32127. por xmm7, xmm3
  32128. paddd xmm13, xmm11
  32129. ; ENTRY: 16
  32130. mov r9, QWORD PTR [rdx+128]
  32131. movdqu xmm12, xmm13
  32132. pcmpeqd xmm12, xmm10
  32133. movdqu xmm0, [r9]
  32134. movdqu xmm1, [r9+16]
  32135. movdqu xmm2, [r9+32]
  32136. movdqu xmm3, [r9+48]
  32137. pand xmm0, xmm12
  32138. pand xmm1, xmm12
  32139. pand xmm2, xmm12
  32140. pand xmm3, xmm12
  32141. por xmm4, xmm0
  32142. por xmm5, xmm1
  32143. por xmm6, xmm2
  32144. por xmm7, xmm3
  32145. paddd xmm13, xmm11
  32146. ; ENTRY: 17
  32147. mov r9, QWORD PTR [rdx+136]
  32148. movdqu xmm12, xmm13
  32149. pcmpeqd xmm12, xmm10
  32150. movdqu xmm0, [r9]
  32151. movdqu xmm1, [r9+16]
  32152. movdqu xmm2, [r9+32]
  32153. movdqu xmm3, [r9+48]
  32154. pand xmm0, xmm12
  32155. pand xmm1, xmm12
  32156. pand xmm2, xmm12
  32157. pand xmm3, xmm12
  32158. por xmm4, xmm0
  32159. por xmm5, xmm1
  32160. por xmm6, xmm2
  32161. por xmm7, xmm3
  32162. paddd xmm13, xmm11
  32163. ; ENTRY: 18
  32164. mov r9, QWORD PTR [rdx+144]
  32165. movdqu xmm12, xmm13
  32166. pcmpeqd xmm12, xmm10
  32167. movdqu xmm0, [r9]
  32168. movdqu xmm1, [r9+16]
  32169. movdqu xmm2, [r9+32]
  32170. movdqu xmm3, [r9+48]
  32171. pand xmm0, xmm12
  32172. pand xmm1, xmm12
  32173. pand xmm2, xmm12
  32174. pand xmm3, xmm12
  32175. por xmm4, xmm0
  32176. por xmm5, xmm1
  32177. por xmm6, xmm2
  32178. por xmm7, xmm3
  32179. paddd xmm13, xmm11
  32180. ; ENTRY: 19
  32181. mov r9, QWORD PTR [rdx+152]
  32182. movdqu xmm12, xmm13
  32183. pcmpeqd xmm12, xmm10
  32184. movdqu xmm0, [r9]
  32185. movdqu xmm1, [r9+16]
  32186. movdqu xmm2, [r9+32]
  32187. movdqu xmm3, [r9+48]
  32188. pand xmm0, xmm12
  32189. pand xmm1, xmm12
  32190. pand xmm2, xmm12
  32191. pand xmm3, xmm12
  32192. por xmm4, xmm0
  32193. por xmm5, xmm1
  32194. por xmm6, xmm2
  32195. por xmm7, xmm3
  32196. paddd xmm13, xmm11
  32197. ; ENTRY: 20
  32198. mov r9, QWORD PTR [rdx+160]
  32199. movdqu xmm12, xmm13
  32200. pcmpeqd xmm12, xmm10
  32201. movdqu xmm0, [r9]
  32202. movdqu xmm1, [r9+16]
  32203. movdqu xmm2, [r9+32]
  32204. movdqu xmm3, [r9+48]
  32205. pand xmm0, xmm12
  32206. pand xmm1, xmm12
  32207. pand xmm2, xmm12
  32208. pand xmm3, xmm12
  32209. por xmm4, xmm0
  32210. por xmm5, xmm1
  32211. por xmm6, xmm2
  32212. por xmm7, xmm3
  32213. paddd xmm13, xmm11
  32214. ; ENTRY: 21
  32215. mov r9, QWORD PTR [rdx+168]
  32216. movdqu xmm12, xmm13
  32217. pcmpeqd xmm12, xmm10
  32218. movdqu xmm0, [r9]
  32219. movdqu xmm1, [r9+16]
  32220. movdqu xmm2, [r9+32]
  32221. movdqu xmm3, [r9+48]
  32222. pand xmm0, xmm12
  32223. pand xmm1, xmm12
  32224. pand xmm2, xmm12
  32225. pand xmm3, xmm12
  32226. por xmm4, xmm0
  32227. por xmm5, xmm1
  32228. por xmm6, xmm2
  32229. por xmm7, xmm3
  32230. paddd xmm13, xmm11
  32231. ; ENTRY: 22
  32232. mov r9, QWORD PTR [rdx+176]
  32233. movdqu xmm12, xmm13
  32234. pcmpeqd xmm12, xmm10
  32235. movdqu xmm0, [r9]
  32236. movdqu xmm1, [r9+16]
  32237. movdqu xmm2, [r9+32]
  32238. movdqu xmm3, [r9+48]
  32239. pand xmm0, xmm12
  32240. pand xmm1, xmm12
  32241. pand xmm2, xmm12
  32242. pand xmm3, xmm12
  32243. por xmm4, xmm0
  32244. por xmm5, xmm1
  32245. por xmm6, xmm2
  32246. por xmm7, xmm3
  32247. paddd xmm13, xmm11
  32248. ; ENTRY: 23
  32249. mov r9, QWORD PTR [rdx+184]
  32250. movdqu xmm12, xmm13
  32251. pcmpeqd xmm12, xmm10
  32252. movdqu xmm0, [r9]
  32253. movdqu xmm1, [r9+16]
  32254. movdqu xmm2, [r9+32]
  32255. movdqu xmm3, [r9+48]
  32256. pand xmm0, xmm12
  32257. pand xmm1, xmm12
  32258. pand xmm2, xmm12
  32259. pand xmm3, xmm12
  32260. por xmm4, xmm0
  32261. por xmm5, xmm1
  32262. por xmm6, xmm2
  32263. por xmm7, xmm3
  32264. paddd xmm13, xmm11
  32265. ; ENTRY: 24
  32266. mov r9, QWORD PTR [rdx+192]
  32267. movdqu xmm12, xmm13
  32268. pcmpeqd xmm12, xmm10
  32269. movdqu xmm0, [r9]
  32270. movdqu xmm1, [r9+16]
  32271. movdqu xmm2, [r9+32]
  32272. movdqu xmm3, [r9+48]
  32273. pand xmm0, xmm12
  32274. pand xmm1, xmm12
  32275. pand xmm2, xmm12
  32276. pand xmm3, xmm12
  32277. por xmm4, xmm0
  32278. por xmm5, xmm1
  32279. por xmm6, xmm2
  32280. por xmm7, xmm3
  32281. paddd xmm13, xmm11
  32282. ; ENTRY: 25
  32283. mov r9, QWORD PTR [rdx+200]
  32284. movdqu xmm12, xmm13
  32285. pcmpeqd xmm12, xmm10
  32286. movdqu xmm0, [r9]
  32287. movdqu xmm1, [r9+16]
  32288. movdqu xmm2, [r9+32]
  32289. movdqu xmm3, [r9+48]
  32290. pand xmm0, xmm12
  32291. pand xmm1, xmm12
  32292. pand xmm2, xmm12
  32293. pand xmm3, xmm12
  32294. por xmm4, xmm0
  32295. por xmm5, xmm1
  32296. por xmm6, xmm2
  32297. por xmm7, xmm3
  32298. paddd xmm13, xmm11
  32299. ; ENTRY: 26
  32300. mov r9, QWORD PTR [rdx+208]
  32301. movdqu xmm12, xmm13
  32302. pcmpeqd xmm12, xmm10
  32303. movdqu xmm0, [r9]
  32304. movdqu xmm1, [r9+16]
  32305. movdqu xmm2, [r9+32]
  32306. movdqu xmm3, [r9+48]
  32307. pand xmm0, xmm12
  32308. pand xmm1, xmm12
  32309. pand xmm2, xmm12
  32310. pand xmm3, xmm12
  32311. por xmm4, xmm0
  32312. por xmm5, xmm1
  32313. por xmm6, xmm2
  32314. por xmm7, xmm3
  32315. paddd xmm13, xmm11
  32316. ; ENTRY: 27
  32317. mov r9, QWORD PTR [rdx+216]
  32318. movdqu xmm12, xmm13
  32319. pcmpeqd xmm12, xmm10
  32320. movdqu xmm0, [r9]
  32321. movdqu xmm1, [r9+16]
  32322. movdqu xmm2, [r9+32]
  32323. movdqu xmm3, [r9+48]
  32324. pand xmm0, xmm12
  32325. pand xmm1, xmm12
  32326. pand xmm2, xmm12
  32327. pand xmm3, xmm12
  32328. por xmm4, xmm0
  32329. por xmm5, xmm1
  32330. por xmm6, xmm2
  32331. por xmm7, xmm3
  32332. paddd xmm13, xmm11
  32333. ; ENTRY: 28
  32334. mov r9, QWORD PTR [rdx+224]
  32335. movdqu xmm12, xmm13
  32336. pcmpeqd xmm12, xmm10
  32337. movdqu xmm0, [r9]
  32338. movdqu xmm1, [r9+16]
  32339. movdqu xmm2, [r9+32]
  32340. movdqu xmm3, [r9+48]
  32341. pand xmm0, xmm12
  32342. pand xmm1, xmm12
  32343. pand xmm2, xmm12
  32344. pand xmm3, xmm12
  32345. por xmm4, xmm0
  32346. por xmm5, xmm1
  32347. por xmm6, xmm2
  32348. por xmm7, xmm3
  32349. paddd xmm13, xmm11
  32350. ; ENTRY: 29
  32351. mov r9, QWORD PTR [rdx+232]
  32352. movdqu xmm12, xmm13
  32353. pcmpeqd xmm12, xmm10
  32354. movdqu xmm0, [r9]
  32355. movdqu xmm1, [r9+16]
  32356. movdqu xmm2, [r9+32]
  32357. movdqu xmm3, [r9+48]
  32358. pand xmm0, xmm12
  32359. pand xmm1, xmm12
  32360. pand xmm2, xmm12
  32361. pand xmm3, xmm12
  32362. por xmm4, xmm0
  32363. por xmm5, xmm1
  32364. por xmm6, xmm2
  32365. por xmm7, xmm3
  32366. paddd xmm13, xmm11
  32367. ; ENTRY: 30
  32368. mov r9, QWORD PTR [rdx+240]
  32369. movdqu xmm12, xmm13
  32370. pcmpeqd xmm12, xmm10
  32371. movdqu xmm0, [r9]
  32372. movdqu xmm1, [r9+16]
  32373. movdqu xmm2, [r9+32]
  32374. movdqu xmm3, [r9+48]
  32375. pand xmm0, xmm12
  32376. pand xmm1, xmm12
  32377. pand xmm2, xmm12
  32378. pand xmm3, xmm12
  32379. por xmm4, xmm0
  32380. por xmm5, xmm1
  32381. por xmm6, xmm2
  32382. por xmm7, xmm3
  32383. paddd xmm13, xmm11
  32384. ; ENTRY: 31
  32385. mov r9, QWORD PTR [rdx+248]
  32386. movdqu xmm12, xmm13
  32387. pcmpeqd xmm12, xmm10
  32388. movdqu xmm0, [r9]
  32389. movdqu xmm1, [r9+16]
  32390. movdqu xmm2, [r9+32]
  32391. movdqu xmm3, [r9+48]
  32392. pand xmm0, xmm12
  32393. pand xmm1, xmm12
  32394. pand xmm2, xmm12
  32395. pand xmm3, xmm12
  32396. por xmm4, xmm0
  32397. por xmm5, xmm1
  32398. por xmm6, xmm2
  32399. por xmm7, xmm3
  32400. paddd xmm13, xmm11
  32401. movdqu [rcx], xmm4
  32402. movdqu [rcx+16], xmm5
  32403. movdqu [rcx+32], xmm6
  32404. movdqu [rcx+48], xmm7
  32405. add rcx, 64
  32406. ; END: 0-7
  32407. ; START: 8-15
  32408. pxor xmm13, xmm13
  32409. pxor xmm4, xmm4
  32410. pxor xmm5, xmm5
  32411. pxor xmm6, xmm6
  32412. pxor xmm7, xmm7
  32413. ; ENTRY: 0
  32414. mov r9, QWORD PTR [rdx]
  32415. add r9, 64
  32416. movdqu xmm12, xmm13
  32417. pcmpeqd xmm12, xmm10
  32418. movdqu xmm0, [r9]
  32419. movdqu xmm1, [r9+16]
  32420. movdqu xmm2, [r9+32]
  32421. movdqu xmm3, [r9+48]
  32422. pand xmm0, xmm12
  32423. pand xmm1, xmm12
  32424. pand xmm2, xmm12
  32425. pand xmm3, xmm12
  32426. por xmm4, xmm0
  32427. por xmm5, xmm1
  32428. por xmm6, xmm2
  32429. por xmm7, xmm3
  32430. paddd xmm13, xmm11
  32431. ; ENTRY: 1
  32432. mov r9, QWORD PTR [rdx+8]
  32433. add r9, 64
  32434. movdqu xmm12, xmm13
  32435. pcmpeqd xmm12, xmm10
  32436. movdqu xmm0, [r9]
  32437. movdqu xmm1, [r9+16]
  32438. movdqu xmm2, [r9+32]
  32439. movdqu xmm3, [r9+48]
  32440. pand xmm0, xmm12
  32441. pand xmm1, xmm12
  32442. pand xmm2, xmm12
  32443. pand xmm3, xmm12
  32444. por xmm4, xmm0
  32445. por xmm5, xmm1
  32446. por xmm6, xmm2
  32447. por xmm7, xmm3
  32448. paddd xmm13, xmm11
  32449. ; ENTRY: 2
  32450. mov r9, QWORD PTR [rdx+16]
  32451. add r9, 64
  32452. movdqu xmm12, xmm13
  32453. pcmpeqd xmm12, xmm10
  32454. movdqu xmm0, [r9]
  32455. movdqu xmm1, [r9+16]
  32456. movdqu xmm2, [r9+32]
  32457. movdqu xmm3, [r9+48]
  32458. pand xmm0, xmm12
  32459. pand xmm1, xmm12
  32460. pand xmm2, xmm12
  32461. pand xmm3, xmm12
  32462. por xmm4, xmm0
  32463. por xmm5, xmm1
  32464. por xmm6, xmm2
  32465. por xmm7, xmm3
  32466. paddd xmm13, xmm11
  32467. ; ENTRY: 3
  32468. mov r9, QWORD PTR [rdx+24]
  32469. add r9, 64
  32470. movdqu xmm12, xmm13
  32471. pcmpeqd xmm12, xmm10
  32472. movdqu xmm0, [r9]
  32473. movdqu xmm1, [r9+16]
  32474. movdqu xmm2, [r9+32]
  32475. movdqu xmm3, [r9+48]
  32476. pand xmm0, xmm12
  32477. pand xmm1, xmm12
  32478. pand xmm2, xmm12
  32479. pand xmm3, xmm12
  32480. por xmm4, xmm0
  32481. por xmm5, xmm1
  32482. por xmm6, xmm2
  32483. por xmm7, xmm3
  32484. paddd xmm13, xmm11
  32485. ; ENTRY: 4
  32486. mov r9, QWORD PTR [rdx+32]
  32487. add r9, 64
  32488. movdqu xmm12, xmm13
  32489. pcmpeqd xmm12, xmm10
  32490. movdqu xmm0, [r9]
  32491. movdqu xmm1, [r9+16]
  32492. movdqu xmm2, [r9+32]
  32493. movdqu xmm3, [r9+48]
  32494. pand xmm0, xmm12
  32495. pand xmm1, xmm12
  32496. pand xmm2, xmm12
  32497. pand xmm3, xmm12
  32498. por xmm4, xmm0
  32499. por xmm5, xmm1
  32500. por xmm6, xmm2
  32501. por xmm7, xmm3
  32502. paddd xmm13, xmm11
  32503. ; ENTRY: 5
  32504. mov r9, QWORD PTR [rdx+40]
  32505. add r9, 64
  32506. movdqu xmm12, xmm13
  32507. pcmpeqd xmm12, xmm10
  32508. movdqu xmm0, [r9]
  32509. movdqu xmm1, [r9+16]
  32510. movdqu xmm2, [r9+32]
  32511. movdqu xmm3, [r9+48]
  32512. pand xmm0, xmm12
  32513. pand xmm1, xmm12
  32514. pand xmm2, xmm12
  32515. pand xmm3, xmm12
  32516. por xmm4, xmm0
  32517. por xmm5, xmm1
  32518. por xmm6, xmm2
  32519. por xmm7, xmm3
  32520. paddd xmm13, xmm11
  32521. ; ENTRY: 6
  32522. mov r9, QWORD PTR [rdx+48]
  32523. add r9, 64
  32524. movdqu xmm12, xmm13
  32525. pcmpeqd xmm12, xmm10
  32526. movdqu xmm0, [r9]
  32527. movdqu xmm1, [r9+16]
  32528. movdqu xmm2, [r9+32]
  32529. movdqu xmm3, [r9+48]
  32530. pand xmm0, xmm12
  32531. pand xmm1, xmm12
  32532. pand xmm2, xmm12
  32533. pand xmm3, xmm12
  32534. por xmm4, xmm0
  32535. por xmm5, xmm1
  32536. por xmm6, xmm2
  32537. por xmm7, xmm3
  32538. paddd xmm13, xmm11
  32539. ; ENTRY: 7
  32540. mov r9, QWORD PTR [rdx+56]
  32541. add r9, 64
  32542. movdqu xmm12, xmm13
  32543. pcmpeqd xmm12, xmm10
  32544. movdqu xmm0, [r9]
  32545. movdqu xmm1, [r9+16]
  32546. movdqu xmm2, [r9+32]
  32547. movdqu xmm3, [r9+48]
  32548. pand xmm0, xmm12
  32549. pand xmm1, xmm12
  32550. pand xmm2, xmm12
  32551. pand xmm3, xmm12
  32552. por xmm4, xmm0
  32553. por xmm5, xmm1
  32554. por xmm6, xmm2
  32555. por xmm7, xmm3
  32556. paddd xmm13, xmm11
  32557. ; ENTRY: 8
  32558. mov r9, QWORD PTR [rdx+64]
  32559. add r9, 64
  32560. movdqu xmm12, xmm13
  32561. pcmpeqd xmm12, xmm10
  32562. movdqu xmm0, [r9]
  32563. movdqu xmm1, [r9+16]
  32564. movdqu xmm2, [r9+32]
  32565. movdqu xmm3, [r9+48]
  32566. pand xmm0, xmm12
  32567. pand xmm1, xmm12
  32568. pand xmm2, xmm12
  32569. pand xmm3, xmm12
  32570. por xmm4, xmm0
  32571. por xmm5, xmm1
  32572. por xmm6, xmm2
  32573. por xmm7, xmm3
  32574. paddd xmm13, xmm11
  32575. ; ENTRY: 9
  32576. mov r9, QWORD PTR [rdx+72]
  32577. add r9, 64
  32578. movdqu xmm12, xmm13
  32579. pcmpeqd xmm12, xmm10
  32580. movdqu xmm0, [r9]
  32581. movdqu xmm1, [r9+16]
  32582. movdqu xmm2, [r9+32]
  32583. movdqu xmm3, [r9+48]
  32584. pand xmm0, xmm12
  32585. pand xmm1, xmm12
  32586. pand xmm2, xmm12
  32587. pand xmm3, xmm12
  32588. por xmm4, xmm0
  32589. por xmm5, xmm1
  32590. por xmm6, xmm2
  32591. por xmm7, xmm3
  32592. paddd xmm13, xmm11
  32593. ; ENTRY: 10
  32594. mov r9, QWORD PTR [rdx+80]
  32595. add r9, 64
  32596. movdqu xmm12, xmm13
  32597. pcmpeqd xmm12, xmm10
  32598. movdqu xmm0, [r9]
  32599. movdqu xmm1, [r9+16]
  32600. movdqu xmm2, [r9+32]
  32601. movdqu xmm3, [r9+48]
  32602. pand xmm0, xmm12
  32603. pand xmm1, xmm12
  32604. pand xmm2, xmm12
  32605. pand xmm3, xmm12
  32606. por xmm4, xmm0
  32607. por xmm5, xmm1
  32608. por xmm6, xmm2
  32609. por xmm7, xmm3
  32610. paddd xmm13, xmm11
  32611. ; ENTRY: 11
  32612. mov r9, QWORD PTR [rdx+88]
  32613. add r9, 64
  32614. movdqu xmm12, xmm13
  32615. pcmpeqd xmm12, xmm10
  32616. movdqu xmm0, [r9]
  32617. movdqu xmm1, [r9+16]
  32618. movdqu xmm2, [r9+32]
  32619. movdqu xmm3, [r9+48]
  32620. pand xmm0, xmm12
  32621. pand xmm1, xmm12
  32622. pand xmm2, xmm12
  32623. pand xmm3, xmm12
  32624. por xmm4, xmm0
  32625. por xmm5, xmm1
  32626. por xmm6, xmm2
  32627. por xmm7, xmm3
  32628. paddd xmm13, xmm11
  32629. ; ENTRY: 12
  32630. mov r9, QWORD PTR [rdx+96]
  32631. add r9, 64
  32632. movdqu xmm12, xmm13
  32633. pcmpeqd xmm12, xmm10
  32634. movdqu xmm0, [r9]
  32635. movdqu xmm1, [r9+16]
  32636. movdqu xmm2, [r9+32]
  32637. movdqu xmm3, [r9+48]
  32638. pand xmm0, xmm12
  32639. pand xmm1, xmm12
  32640. pand xmm2, xmm12
  32641. pand xmm3, xmm12
  32642. por xmm4, xmm0
  32643. por xmm5, xmm1
  32644. por xmm6, xmm2
  32645. por xmm7, xmm3
  32646. paddd xmm13, xmm11
  32647. ; ENTRY: 13
  32648. mov r9, QWORD PTR [rdx+104]
  32649. add r9, 64
  32650. movdqu xmm12, xmm13
  32651. pcmpeqd xmm12, xmm10
  32652. movdqu xmm0, [r9]
  32653. movdqu xmm1, [r9+16]
  32654. movdqu xmm2, [r9+32]
  32655. movdqu xmm3, [r9+48]
  32656. pand xmm0, xmm12
  32657. pand xmm1, xmm12
  32658. pand xmm2, xmm12
  32659. pand xmm3, xmm12
  32660. por xmm4, xmm0
  32661. por xmm5, xmm1
  32662. por xmm6, xmm2
  32663. por xmm7, xmm3
  32664. paddd xmm13, xmm11
  32665. ; ENTRY: 14
  32666. mov r9, QWORD PTR [rdx+112]
  32667. add r9, 64
  32668. movdqu xmm12, xmm13
  32669. pcmpeqd xmm12, xmm10
  32670. movdqu xmm0, [r9]
  32671. movdqu xmm1, [r9+16]
  32672. movdqu xmm2, [r9+32]
  32673. movdqu xmm3, [r9+48]
  32674. pand xmm0, xmm12
  32675. pand xmm1, xmm12
  32676. pand xmm2, xmm12
  32677. pand xmm3, xmm12
  32678. por xmm4, xmm0
  32679. por xmm5, xmm1
  32680. por xmm6, xmm2
  32681. por xmm7, xmm3
  32682. paddd xmm13, xmm11
  32683. ; ENTRY: 15
  32684. mov r9, QWORD PTR [rdx+120]
  32685. add r9, 64
  32686. movdqu xmm12, xmm13
  32687. pcmpeqd xmm12, xmm10
  32688. movdqu xmm0, [r9]
  32689. movdqu xmm1, [r9+16]
  32690. movdqu xmm2, [r9+32]
  32691. movdqu xmm3, [r9+48]
  32692. pand xmm0, xmm12
  32693. pand xmm1, xmm12
  32694. pand xmm2, xmm12
  32695. pand xmm3, xmm12
  32696. por xmm4, xmm0
  32697. por xmm5, xmm1
  32698. por xmm6, xmm2
  32699. por xmm7, xmm3
  32700. paddd xmm13, xmm11
  32701. ; ENTRY: 16
  32702. mov r9, QWORD PTR [rdx+128]
  32703. add r9, 64
  32704. movdqu xmm12, xmm13
  32705. pcmpeqd xmm12, xmm10
  32706. movdqu xmm0, [r9]
  32707. movdqu xmm1, [r9+16]
  32708. movdqu xmm2, [r9+32]
  32709. movdqu xmm3, [r9+48]
  32710. pand xmm0, xmm12
  32711. pand xmm1, xmm12
  32712. pand xmm2, xmm12
  32713. pand xmm3, xmm12
  32714. por xmm4, xmm0
  32715. por xmm5, xmm1
  32716. por xmm6, xmm2
  32717. por xmm7, xmm3
  32718. paddd xmm13, xmm11
  32719. ; ENTRY: 17
  32720. mov r9, QWORD PTR [rdx+136]
  32721. add r9, 64
  32722. movdqu xmm12, xmm13
  32723. pcmpeqd xmm12, xmm10
  32724. movdqu xmm0, [r9]
  32725. movdqu xmm1, [r9+16]
  32726. movdqu xmm2, [r9+32]
  32727. movdqu xmm3, [r9+48]
  32728. pand xmm0, xmm12
  32729. pand xmm1, xmm12
  32730. pand xmm2, xmm12
  32731. pand xmm3, xmm12
  32732. por xmm4, xmm0
  32733. por xmm5, xmm1
  32734. por xmm6, xmm2
  32735. por xmm7, xmm3
  32736. paddd xmm13, xmm11
  32737. ; ENTRY: 18
  32738. mov r9, QWORD PTR [rdx+144]
  32739. add r9, 64
  32740. movdqu xmm12, xmm13
  32741. pcmpeqd xmm12, xmm10
  32742. movdqu xmm0, [r9]
  32743. movdqu xmm1, [r9+16]
  32744. movdqu xmm2, [r9+32]
  32745. movdqu xmm3, [r9+48]
  32746. pand xmm0, xmm12
  32747. pand xmm1, xmm12
  32748. pand xmm2, xmm12
  32749. pand xmm3, xmm12
  32750. por xmm4, xmm0
  32751. por xmm5, xmm1
  32752. por xmm6, xmm2
  32753. por xmm7, xmm3
  32754. paddd xmm13, xmm11
  32755. ; ENTRY: 19
  32756. mov r9, QWORD PTR [rdx+152]
  32757. add r9, 64
  32758. movdqu xmm12, xmm13
  32759. pcmpeqd xmm12, xmm10
  32760. movdqu xmm0, [r9]
  32761. movdqu xmm1, [r9+16]
  32762. movdqu xmm2, [r9+32]
  32763. movdqu xmm3, [r9+48]
  32764. pand xmm0, xmm12
  32765. pand xmm1, xmm12
  32766. pand xmm2, xmm12
  32767. pand xmm3, xmm12
  32768. por xmm4, xmm0
  32769. por xmm5, xmm1
  32770. por xmm6, xmm2
  32771. por xmm7, xmm3
  32772. paddd xmm13, xmm11
  32773. ; ENTRY: 20
  32774. mov r9, QWORD PTR [rdx+160]
  32775. add r9, 64
  32776. movdqu xmm12, xmm13
  32777. pcmpeqd xmm12, xmm10
  32778. movdqu xmm0, [r9]
  32779. movdqu xmm1, [r9+16]
  32780. movdqu xmm2, [r9+32]
  32781. movdqu xmm3, [r9+48]
  32782. pand xmm0, xmm12
  32783. pand xmm1, xmm12
  32784. pand xmm2, xmm12
  32785. pand xmm3, xmm12
  32786. por xmm4, xmm0
  32787. por xmm5, xmm1
  32788. por xmm6, xmm2
  32789. por xmm7, xmm3
  32790. paddd xmm13, xmm11
  32791. ; ENTRY: 21
  32792. mov r9, QWORD PTR [rdx+168]
  32793. add r9, 64
  32794. movdqu xmm12, xmm13
  32795. pcmpeqd xmm12, xmm10
  32796. movdqu xmm0, [r9]
  32797. movdqu xmm1, [r9+16]
  32798. movdqu xmm2, [r9+32]
  32799. movdqu xmm3, [r9+48]
  32800. pand xmm0, xmm12
  32801. pand xmm1, xmm12
  32802. pand xmm2, xmm12
  32803. pand xmm3, xmm12
  32804. por xmm4, xmm0
  32805. por xmm5, xmm1
  32806. por xmm6, xmm2
  32807. por xmm7, xmm3
  32808. paddd xmm13, xmm11
  32809. ; ENTRY: 22
  32810. mov r9, QWORD PTR [rdx+176]
  32811. add r9, 64
  32812. movdqu xmm12, xmm13
  32813. pcmpeqd xmm12, xmm10
  32814. movdqu xmm0, [r9]
  32815. movdqu xmm1, [r9+16]
  32816. movdqu xmm2, [r9+32]
  32817. movdqu xmm3, [r9+48]
  32818. pand xmm0, xmm12
  32819. pand xmm1, xmm12
  32820. pand xmm2, xmm12
  32821. pand xmm3, xmm12
  32822. por xmm4, xmm0
  32823. por xmm5, xmm1
  32824. por xmm6, xmm2
  32825. por xmm7, xmm3
  32826. paddd xmm13, xmm11
  32827. ; ENTRY: 23
  32828. mov r9, QWORD PTR [rdx+184]
  32829. add r9, 64
  32830. movdqu xmm12, xmm13
  32831. pcmpeqd xmm12, xmm10
  32832. movdqu xmm0, [r9]
  32833. movdqu xmm1, [r9+16]
  32834. movdqu xmm2, [r9+32]
  32835. movdqu xmm3, [r9+48]
  32836. pand xmm0, xmm12
  32837. pand xmm1, xmm12
  32838. pand xmm2, xmm12
  32839. pand xmm3, xmm12
  32840. por xmm4, xmm0
  32841. por xmm5, xmm1
  32842. por xmm6, xmm2
  32843. por xmm7, xmm3
  32844. paddd xmm13, xmm11
  32845. ; ENTRY: 24
  32846. mov r9, QWORD PTR [rdx+192]
  32847. add r9, 64
  32848. movdqu xmm12, xmm13
  32849. pcmpeqd xmm12, xmm10
  32850. movdqu xmm0, [r9]
  32851. movdqu xmm1, [r9+16]
  32852. movdqu xmm2, [r9+32]
  32853. movdqu xmm3, [r9+48]
  32854. pand xmm0, xmm12
  32855. pand xmm1, xmm12
  32856. pand xmm2, xmm12
  32857. pand xmm3, xmm12
  32858. por xmm4, xmm0
  32859. por xmm5, xmm1
  32860. por xmm6, xmm2
  32861. por xmm7, xmm3
  32862. paddd xmm13, xmm11
  32863. ; ENTRY: 25
  32864. mov r9, QWORD PTR [rdx+200]
  32865. add r9, 64
  32866. movdqu xmm12, xmm13
  32867. pcmpeqd xmm12, xmm10
  32868. movdqu xmm0, [r9]
  32869. movdqu xmm1, [r9+16]
  32870. movdqu xmm2, [r9+32]
  32871. movdqu xmm3, [r9+48]
  32872. pand xmm0, xmm12
  32873. pand xmm1, xmm12
  32874. pand xmm2, xmm12
  32875. pand xmm3, xmm12
  32876. por xmm4, xmm0
  32877. por xmm5, xmm1
  32878. por xmm6, xmm2
  32879. por xmm7, xmm3
  32880. paddd xmm13, xmm11
  32881. ; ENTRY: 26
  32882. mov r9, QWORD PTR [rdx+208]
  32883. add r9, 64
  32884. movdqu xmm12, xmm13
  32885. pcmpeqd xmm12, xmm10
  32886. movdqu xmm0, [r9]
  32887. movdqu xmm1, [r9+16]
  32888. movdqu xmm2, [r9+32]
  32889. movdqu xmm3, [r9+48]
  32890. pand xmm0, xmm12
  32891. pand xmm1, xmm12
  32892. pand xmm2, xmm12
  32893. pand xmm3, xmm12
  32894. por xmm4, xmm0
  32895. por xmm5, xmm1
  32896. por xmm6, xmm2
  32897. por xmm7, xmm3
  32898. paddd xmm13, xmm11
  32899. ; ENTRY: 27
  32900. mov r9, QWORD PTR [rdx+216]
  32901. add r9, 64
  32902. movdqu xmm12, xmm13
  32903. pcmpeqd xmm12, xmm10
  32904. movdqu xmm0, [r9]
  32905. movdqu xmm1, [r9+16]
  32906. movdqu xmm2, [r9+32]
  32907. movdqu xmm3, [r9+48]
  32908. pand xmm0, xmm12
  32909. pand xmm1, xmm12
  32910. pand xmm2, xmm12
  32911. pand xmm3, xmm12
  32912. por xmm4, xmm0
  32913. por xmm5, xmm1
  32914. por xmm6, xmm2
  32915. por xmm7, xmm3
  32916. paddd xmm13, xmm11
  32917. ; ENTRY: 28
  32918. mov r9, QWORD PTR [rdx+224]
  32919. add r9, 64
  32920. movdqu xmm12, xmm13
  32921. pcmpeqd xmm12, xmm10
  32922. movdqu xmm0, [r9]
  32923. movdqu xmm1, [r9+16]
  32924. movdqu xmm2, [r9+32]
  32925. movdqu xmm3, [r9+48]
  32926. pand xmm0, xmm12
  32927. pand xmm1, xmm12
  32928. pand xmm2, xmm12
  32929. pand xmm3, xmm12
  32930. por xmm4, xmm0
  32931. por xmm5, xmm1
  32932. por xmm6, xmm2
  32933. por xmm7, xmm3
  32934. paddd xmm13, xmm11
  32935. ; ENTRY: 29
  32936. mov r9, QWORD PTR [rdx+232]
  32937. add r9, 64
  32938. movdqu xmm12, xmm13
  32939. pcmpeqd xmm12, xmm10
  32940. movdqu xmm0, [r9]
  32941. movdqu xmm1, [r9+16]
  32942. movdqu xmm2, [r9+32]
  32943. movdqu xmm3, [r9+48]
  32944. pand xmm0, xmm12
  32945. pand xmm1, xmm12
  32946. pand xmm2, xmm12
  32947. pand xmm3, xmm12
  32948. por xmm4, xmm0
  32949. por xmm5, xmm1
  32950. por xmm6, xmm2
  32951. por xmm7, xmm3
  32952. paddd xmm13, xmm11
  32953. ; ENTRY: 30
  32954. mov r9, QWORD PTR [rdx+240]
  32955. add r9, 64
  32956. movdqu xmm12, xmm13
  32957. pcmpeqd xmm12, xmm10
  32958. movdqu xmm0, [r9]
  32959. movdqu xmm1, [r9+16]
  32960. movdqu xmm2, [r9+32]
  32961. movdqu xmm3, [r9+48]
  32962. pand xmm0, xmm12
  32963. pand xmm1, xmm12
  32964. pand xmm2, xmm12
  32965. pand xmm3, xmm12
  32966. por xmm4, xmm0
  32967. por xmm5, xmm1
  32968. por xmm6, xmm2
  32969. por xmm7, xmm3
  32970. paddd xmm13, xmm11
  32971. ; ENTRY: 31
  32972. mov r9, QWORD PTR [rdx+248]
  32973. add r9, 64
  32974. movdqu xmm12, xmm13
  32975. pcmpeqd xmm12, xmm10
  32976. movdqu xmm0, [r9]
  32977. movdqu xmm1, [r9+16]
  32978. movdqu xmm2, [r9+32]
  32979. movdqu xmm3, [r9+48]
  32980. pand xmm0, xmm12
  32981. pand xmm1, xmm12
  32982. pand xmm2, xmm12
  32983. pand xmm3, xmm12
  32984. por xmm4, xmm0
  32985. por xmm5, xmm1
  32986. por xmm6, xmm2
  32987. por xmm7, xmm3
  32988. paddd xmm13, xmm11
  32989. movdqu [rcx], xmm4
  32990. movdqu [rcx+16], xmm5
  32991. movdqu [rcx+32], xmm6
  32992. movdqu [rcx+48], xmm7
  32993. add rcx, 64
  32994. ; END: 8-15
  32995. ; START: 16-23
  32996. pxor xmm13, xmm13
  32997. pxor xmm4, xmm4
  32998. pxor xmm5, xmm5
  32999. pxor xmm6, xmm6
  33000. pxor xmm7, xmm7
  33001. ; ENTRY: 0
  33002. mov r9, QWORD PTR [rdx]
  33003. add r9, 128
  33004. movdqu xmm12, xmm13
  33005. pcmpeqd xmm12, xmm10
  33006. movdqu xmm0, [r9]
  33007. movdqu xmm1, [r9+16]
  33008. movdqu xmm2, [r9+32]
  33009. movdqu xmm3, [r9+48]
  33010. pand xmm0, xmm12
  33011. pand xmm1, xmm12
  33012. pand xmm2, xmm12
  33013. pand xmm3, xmm12
  33014. por xmm4, xmm0
  33015. por xmm5, xmm1
  33016. por xmm6, xmm2
  33017. por xmm7, xmm3
  33018. paddd xmm13, xmm11
  33019. ; ENTRY: 1
  33020. mov r9, QWORD PTR [rdx+8]
  33021. add r9, 128
  33022. movdqu xmm12, xmm13
  33023. pcmpeqd xmm12, xmm10
  33024. movdqu xmm0, [r9]
  33025. movdqu xmm1, [r9+16]
  33026. movdqu xmm2, [r9+32]
  33027. movdqu xmm3, [r9+48]
  33028. pand xmm0, xmm12
  33029. pand xmm1, xmm12
  33030. pand xmm2, xmm12
  33031. pand xmm3, xmm12
  33032. por xmm4, xmm0
  33033. por xmm5, xmm1
  33034. por xmm6, xmm2
  33035. por xmm7, xmm3
  33036. paddd xmm13, xmm11
  33037. ; ENTRY: 2
  33038. mov r9, QWORD PTR [rdx+16]
  33039. add r9, 128
  33040. movdqu xmm12, xmm13
  33041. pcmpeqd xmm12, xmm10
  33042. movdqu xmm0, [r9]
  33043. movdqu xmm1, [r9+16]
  33044. movdqu xmm2, [r9+32]
  33045. movdqu xmm3, [r9+48]
  33046. pand xmm0, xmm12
  33047. pand xmm1, xmm12
  33048. pand xmm2, xmm12
  33049. pand xmm3, xmm12
  33050. por xmm4, xmm0
  33051. por xmm5, xmm1
  33052. por xmm6, xmm2
  33053. por xmm7, xmm3
  33054. paddd xmm13, xmm11
  33055. ; ENTRY: 3
  33056. mov r9, QWORD PTR [rdx+24]
  33057. add r9, 128
  33058. movdqu xmm12, xmm13
  33059. pcmpeqd xmm12, xmm10
  33060. movdqu xmm0, [r9]
  33061. movdqu xmm1, [r9+16]
  33062. movdqu xmm2, [r9+32]
  33063. movdqu xmm3, [r9+48]
  33064. pand xmm0, xmm12
  33065. pand xmm1, xmm12
  33066. pand xmm2, xmm12
  33067. pand xmm3, xmm12
  33068. por xmm4, xmm0
  33069. por xmm5, xmm1
  33070. por xmm6, xmm2
  33071. por xmm7, xmm3
  33072. paddd xmm13, xmm11
  33073. ; ENTRY: 4
  33074. mov r9, QWORD PTR [rdx+32]
  33075. add r9, 128
  33076. movdqu xmm12, xmm13
  33077. pcmpeqd xmm12, xmm10
  33078. movdqu xmm0, [r9]
  33079. movdqu xmm1, [r9+16]
  33080. movdqu xmm2, [r9+32]
  33081. movdqu xmm3, [r9+48]
  33082. pand xmm0, xmm12
  33083. pand xmm1, xmm12
  33084. pand xmm2, xmm12
  33085. pand xmm3, xmm12
  33086. por xmm4, xmm0
  33087. por xmm5, xmm1
  33088. por xmm6, xmm2
  33089. por xmm7, xmm3
  33090. paddd xmm13, xmm11
  33091. ; ENTRY: 5
  33092. mov r9, QWORD PTR [rdx+40]
  33093. add r9, 128
  33094. movdqu xmm12, xmm13
  33095. pcmpeqd xmm12, xmm10
  33096. movdqu xmm0, [r9]
  33097. movdqu xmm1, [r9+16]
  33098. movdqu xmm2, [r9+32]
  33099. movdqu xmm3, [r9+48]
  33100. pand xmm0, xmm12
  33101. pand xmm1, xmm12
  33102. pand xmm2, xmm12
  33103. pand xmm3, xmm12
  33104. por xmm4, xmm0
  33105. por xmm5, xmm1
  33106. por xmm6, xmm2
  33107. por xmm7, xmm3
  33108. paddd xmm13, xmm11
  33109. ; ENTRY: 6
  33110. mov r9, QWORD PTR [rdx+48]
  33111. add r9, 128
  33112. movdqu xmm12, xmm13
  33113. pcmpeqd xmm12, xmm10
  33114. movdqu xmm0, [r9]
  33115. movdqu xmm1, [r9+16]
  33116. movdqu xmm2, [r9+32]
  33117. movdqu xmm3, [r9+48]
  33118. pand xmm0, xmm12
  33119. pand xmm1, xmm12
  33120. pand xmm2, xmm12
  33121. pand xmm3, xmm12
  33122. por xmm4, xmm0
  33123. por xmm5, xmm1
  33124. por xmm6, xmm2
  33125. por xmm7, xmm3
  33126. paddd xmm13, xmm11
  33127. ; ENTRY: 7
  33128. mov r9, QWORD PTR [rdx+56]
  33129. add r9, 128
  33130. movdqu xmm12, xmm13
  33131. pcmpeqd xmm12, xmm10
  33132. movdqu xmm0, [r9]
  33133. movdqu xmm1, [r9+16]
  33134. movdqu xmm2, [r9+32]
  33135. movdqu xmm3, [r9+48]
  33136. pand xmm0, xmm12
  33137. pand xmm1, xmm12
  33138. pand xmm2, xmm12
  33139. pand xmm3, xmm12
  33140. por xmm4, xmm0
  33141. por xmm5, xmm1
  33142. por xmm6, xmm2
  33143. por xmm7, xmm3
  33144. paddd xmm13, xmm11
  33145. ; ENTRY: 8
  33146. mov r9, QWORD PTR [rdx+64]
  33147. add r9, 128
  33148. movdqu xmm12, xmm13
  33149. pcmpeqd xmm12, xmm10
  33150. movdqu xmm0, [r9]
  33151. movdqu xmm1, [r9+16]
  33152. movdqu xmm2, [r9+32]
  33153. movdqu xmm3, [r9+48]
  33154. pand xmm0, xmm12
  33155. pand xmm1, xmm12
  33156. pand xmm2, xmm12
  33157. pand xmm3, xmm12
  33158. por xmm4, xmm0
  33159. por xmm5, xmm1
  33160. por xmm6, xmm2
  33161. por xmm7, xmm3
  33162. paddd xmm13, xmm11
  33163. ; ENTRY: 9
  33164. mov r9, QWORD PTR [rdx+72]
  33165. add r9, 128
  33166. movdqu xmm12, xmm13
  33167. pcmpeqd xmm12, xmm10
  33168. movdqu xmm0, [r9]
  33169. movdqu xmm1, [r9+16]
  33170. movdqu xmm2, [r9+32]
  33171. movdqu xmm3, [r9+48]
  33172. pand xmm0, xmm12
  33173. pand xmm1, xmm12
  33174. pand xmm2, xmm12
  33175. pand xmm3, xmm12
  33176. por xmm4, xmm0
  33177. por xmm5, xmm1
  33178. por xmm6, xmm2
  33179. por xmm7, xmm3
  33180. paddd xmm13, xmm11
  33181. ; ENTRY: 10
  33182. mov r9, QWORD PTR [rdx+80]
  33183. add r9, 128
  33184. movdqu xmm12, xmm13
  33185. pcmpeqd xmm12, xmm10
  33186. movdqu xmm0, [r9]
  33187. movdqu xmm1, [r9+16]
  33188. movdqu xmm2, [r9+32]
  33189. movdqu xmm3, [r9+48]
  33190. pand xmm0, xmm12
  33191. pand xmm1, xmm12
  33192. pand xmm2, xmm12
  33193. pand xmm3, xmm12
  33194. por xmm4, xmm0
  33195. por xmm5, xmm1
  33196. por xmm6, xmm2
  33197. por xmm7, xmm3
  33198. paddd xmm13, xmm11
  33199. ; ENTRY: 11
  33200. mov r9, QWORD PTR [rdx+88]
  33201. add r9, 128
  33202. movdqu xmm12, xmm13
  33203. pcmpeqd xmm12, xmm10
  33204. movdqu xmm0, [r9]
  33205. movdqu xmm1, [r9+16]
  33206. movdqu xmm2, [r9+32]
  33207. movdqu xmm3, [r9+48]
  33208. pand xmm0, xmm12
  33209. pand xmm1, xmm12
  33210. pand xmm2, xmm12
  33211. pand xmm3, xmm12
  33212. por xmm4, xmm0
  33213. por xmm5, xmm1
  33214. por xmm6, xmm2
  33215. por xmm7, xmm3
  33216. paddd xmm13, xmm11
  33217. ; ENTRY: 12
  33218. mov r9, QWORD PTR [rdx+96]
  33219. add r9, 128
  33220. movdqu xmm12, xmm13
  33221. pcmpeqd xmm12, xmm10
  33222. movdqu xmm0, [r9]
  33223. movdqu xmm1, [r9+16]
  33224. movdqu xmm2, [r9+32]
  33225. movdqu xmm3, [r9+48]
  33226. pand xmm0, xmm12
  33227. pand xmm1, xmm12
  33228. pand xmm2, xmm12
  33229. pand xmm3, xmm12
  33230. por xmm4, xmm0
  33231. por xmm5, xmm1
  33232. por xmm6, xmm2
  33233. por xmm7, xmm3
  33234. paddd xmm13, xmm11
  33235. ; ENTRY: 13
  33236. mov r9, QWORD PTR [rdx+104]
  33237. add r9, 128
  33238. movdqu xmm12, xmm13
  33239. pcmpeqd xmm12, xmm10
  33240. movdqu xmm0, [r9]
  33241. movdqu xmm1, [r9+16]
  33242. movdqu xmm2, [r9+32]
  33243. movdqu xmm3, [r9+48]
  33244. pand xmm0, xmm12
  33245. pand xmm1, xmm12
  33246. pand xmm2, xmm12
  33247. pand xmm3, xmm12
  33248. por xmm4, xmm0
  33249. por xmm5, xmm1
  33250. por xmm6, xmm2
  33251. por xmm7, xmm3
  33252. paddd xmm13, xmm11
  33253. ; ENTRY: 14
  33254. mov r9, QWORD PTR [rdx+112]
  33255. add r9, 128
  33256. movdqu xmm12, xmm13
  33257. pcmpeqd xmm12, xmm10
  33258. movdqu xmm0, [r9]
  33259. movdqu xmm1, [r9+16]
  33260. movdqu xmm2, [r9+32]
  33261. movdqu xmm3, [r9+48]
  33262. pand xmm0, xmm12
  33263. pand xmm1, xmm12
  33264. pand xmm2, xmm12
  33265. pand xmm3, xmm12
  33266. por xmm4, xmm0
  33267. por xmm5, xmm1
  33268. por xmm6, xmm2
  33269. por xmm7, xmm3
  33270. paddd xmm13, xmm11
  33271. ; ENTRY: 15
  33272. mov r9, QWORD PTR [rdx+120]
  33273. add r9, 128
  33274. movdqu xmm12, xmm13
  33275. pcmpeqd xmm12, xmm10
  33276. movdqu xmm0, [r9]
  33277. movdqu xmm1, [r9+16]
  33278. movdqu xmm2, [r9+32]
  33279. movdqu xmm3, [r9+48]
  33280. pand xmm0, xmm12
  33281. pand xmm1, xmm12
  33282. pand xmm2, xmm12
  33283. pand xmm3, xmm12
  33284. por xmm4, xmm0
  33285. por xmm5, xmm1
  33286. por xmm6, xmm2
  33287. por xmm7, xmm3
  33288. paddd xmm13, xmm11
  33289. ; ENTRY: 16
  33290. mov r9, QWORD PTR [rdx+128]
  33291. add r9, 128
  33292. movdqu xmm12, xmm13
  33293. pcmpeqd xmm12, xmm10
  33294. movdqu xmm0, [r9]
  33295. movdqu xmm1, [r9+16]
  33296. movdqu xmm2, [r9+32]
  33297. movdqu xmm3, [r9+48]
  33298. pand xmm0, xmm12
  33299. pand xmm1, xmm12
  33300. pand xmm2, xmm12
  33301. pand xmm3, xmm12
  33302. por xmm4, xmm0
  33303. por xmm5, xmm1
  33304. por xmm6, xmm2
  33305. por xmm7, xmm3
  33306. paddd xmm13, xmm11
  33307. ; ENTRY: 17
  33308. mov r9, QWORD PTR [rdx+136]
  33309. add r9, 128
  33310. movdqu xmm12, xmm13
  33311. pcmpeqd xmm12, xmm10
  33312. movdqu xmm0, [r9]
  33313. movdqu xmm1, [r9+16]
  33314. movdqu xmm2, [r9+32]
  33315. movdqu xmm3, [r9+48]
  33316. pand xmm0, xmm12
  33317. pand xmm1, xmm12
  33318. pand xmm2, xmm12
  33319. pand xmm3, xmm12
  33320. por xmm4, xmm0
  33321. por xmm5, xmm1
  33322. por xmm6, xmm2
  33323. por xmm7, xmm3
  33324. paddd xmm13, xmm11
  33325. ; ENTRY: 18
  33326. mov r9, QWORD PTR [rdx+144]
  33327. add r9, 128
  33328. movdqu xmm12, xmm13
  33329. pcmpeqd xmm12, xmm10
  33330. movdqu xmm0, [r9]
  33331. movdqu xmm1, [r9+16]
  33332. movdqu xmm2, [r9+32]
  33333. movdqu xmm3, [r9+48]
  33334. pand xmm0, xmm12
  33335. pand xmm1, xmm12
  33336. pand xmm2, xmm12
  33337. pand xmm3, xmm12
  33338. por xmm4, xmm0
  33339. por xmm5, xmm1
  33340. por xmm6, xmm2
  33341. por xmm7, xmm3
  33342. paddd xmm13, xmm11
  33343. ; ENTRY: 19
  33344. mov r9, QWORD PTR [rdx+152]
  33345. add r9, 128
  33346. movdqu xmm12, xmm13
  33347. pcmpeqd xmm12, xmm10
  33348. movdqu xmm0, [r9]
  33349. movdqu xmm1, [r9+16]
  33350. movdqu xmm2, [r9+32]
  33351. movdqu xmm3, [r9+48]
  33352. pand xmm0, xmm12
  33353. pand xmm1, xmm12
  33354. pand xmm2, xmm12
  33355. pand xmm3, xmm12
  33356. por xmm4, xmm0
  33357. por xmm5, xmm1
  33358. por xmm6, xmm2
  33359. por xmm7, xmm3
  33360. paddd xmm13, xmm11
  33361. ; ENTRY: 20
  33362. mov r9, QWORD PTR [rdx+160]
  33363. add r9, 128
  33364. movdqu xmm12, xmm13
  33365. pcmpeqd xmm12, xmm10
  33366. movdqu xmm0, [r9]
  33367. movdqu xmm1, [r9+16]
  33368. movdqu xmm2, [r9+32]
  33369. movdqu xmm3, [r9+48]
  33370. pand xmm0, xmm12
  33371. pand xmm1, xmm12
  33372. pand xmm2, xmm12
  33373. pand xmm3, xmm12
  33374. por xmm4, xmm0
  33375. por xmm5, xmm1
  33376. por xmm6, xmm2
  33377. por xmm7, xmm3
  33378. paddd xmm13, xmm11
  33379. ; ENTRY: 21
  33380. mov r9, QWORD PTR [rdx+168]
  33381. add r9, 128
  33382. movdqu xmm12, xmm13
  33383. pcmpeqd xmm12, xmm10
  33384. movdqu xmm0, [r9]
  33385. movdqu xmm1, [r9+16]
  33386. movdqu xmm2, [r9+32]
  33387. movdqu xmm3, [r9+48]
  33388. pand xmm0, xmm12
  33389. pand xmm1, xmm12
  33390. pand xmm2, xmm12
  33391. pand xmm3, xmm12
  33392. por xmm4, xmm0
  33393. por xmm5, xmm1
  33394. por xmm6, xmm2
  33395. por xmm7, xmm3
  33396. paddd xmm13, xmm11
  33397. ; ENTRY: 22
  33398. mov r9, QWORD PTR [rdx+176]
  33399. add r9, 128
  33400. movdqu xmm12, xmm13
  33401. pcmpeqd xmm12, xmm10
  33402. movdqu xmm0, [r9]
  33403. movdqu xmm1, [r9+16]
  33404. movdqu xmm2, [r9+32]
  33405. movdqu xmm3, [r9+48]
  33406. pand xmm0, xmm12
  33407. pand xmm1, xmm12
  33408. pand xmm2, xmm12
  33409. pand xmm3, xmm12
  33410. por xmm4, xmm0
  33411. por xmm5, xmm1
  33412. por xmm6, xmm2
  33413. por xmm7, xmm3
  33414. paddd xmm13, xmm11
  33415. ; ENTRY: 23
  33416. mov r9, QWORD PTR [rdx+184]
  33417. add r9, 128
  33418. movdqu xmm12, xmm13
  33419. pcmpeqd xmm12, xmm10
  33420. movdqu xmm0, [r9]
  33421. movdqu xmm1, [r9+16]
  33422. movdqu xmm2, [r9+32]
  33423. movdqu xmm3, [r9+48]
  33424. pand xmm0, xmm12
  33425. pand xmm1, xmm12
  33426. pand xmm2, xmm12
  33427. pand xmm3, xmm12
  33428. por xmm4, xmm0
  33429. por xmm5, xmm1
  33430. por xmm6, xmm2
  33431. por xmm7, xmm3
  33432. paddd xmm13, xmm11
  33433. ; ENTRY: 24
  33434. mov r9, QWORD PTR [rdx+192]
  33435. add r9, 128
  33436. movdqu xmm12, xmm13
  33437. pcmpeqd xmm12, xmm10
  33438. movdqu xmm0, [r9]
  33439. movdqu xmm1, [r9+16]
  33440. movdqu xmm2, [r9+32]
  33441. movdqu xmm3, [r9+48]
  33442. pand xmm0, xmm12
  33443. pand xmm1, xmm12
  33444. pand xmm2, xmm12
  33445. pand xmm3, xmm12
  33446. por xmm4, xmm0
  33447. por xmm5, xmm1
  33448. por xmm6, xmm2
  33449. por xmm7, xmm3
  33450. paddd xmm13, xmm11
  33451. ; ENTRY: 25
  33452. mov r9, QWORD PTR [rdx+200]
  33453. add r9, 128
  33454. movdqu xmm12, xmm13
  33455. pcmpeqd xmm12, xmm10
  33456. movdqu xmm0, [r9]
  33457. movdqu xmm1, [r9+16]
  33458. movdqu xmm2, [r9+32]
  33459. movdqu xmm3, [r9+48]
  33460. pand xmm0, xmm12
  33461. pand xmm1, xmm12
  33462. pand xmm2, xmm12
  33463. pand xmm3, xmm12
  33464. por xmm4, xmm0
  33465. por xmm5, xmm1
  33466. por xmm6, xmm2
  33467. por xmm7, xmm3
  33468. paddd xmm13, xmm11
  33469. ; ENTRY: 26
  33470. mov r9, QWORD PTR [rdx+208]
  33471. add r9, 128
  33472. movdqu xmm12, xmm13
  33473. pcmpeqd xmm12, xmm10
  33474. movdqu xmm0, [r9]
  33475. movdqu xmm1, [r9+16]
  33476. movdqu xmm2, [r9+32]
  33477. movdqu xmm3, [r9+48]
  33478. pand xmm0, xmm12
  33479. pand xmm1, xmm12
  33480. pand xmm2, xmm12
  33481. pand xmm3, xmm12
  33482. por xmm4, xmm0
  33483. por xmm5, xmm1
  33484. por xmm6, xmm2
  33485. por xmm7, xmm3
  33486. paddd xmm13, xmm11
  33487. ; ENTRY: 27
  33488. mov r9, QWORD PTR [rdx+216]
  33489. add r9, 128
  33490. movdqu xmm12, xmm13
  33491. pcmpeqd xmm12, xmm10
  33492. movdqu xmm0, [r9]
  33493. movdqu xmm1, [r9+16]
  33494. movdqu xmm2, [r9+32]
  33495. movdqu xmm3, [r9+48]
  33496. pand xmm0, xmm12
  33497. pand xmm1, xmm12
  33498. pand xmm2, xmm12
  33499. pand xmm3, xmm12
  33500. por xmm4, xmm0
  33501. por xmm5, xmm1
  33502. por xmm6, xmm2
  33503. por xmm7, xmm3
  33504. paddd xmm13, xmm11
  33505. ; ENTRY: 28
  33506. mov r9, QWORD PTR [rdx+224]
  33507. add r9, 128
  33508. movdqu xmm12, xmm13
  33509. pcmpeqd xmm12, xmm10
  33510. movdqu xmm0, [r9]
  33511. movdqu xmm1, [r9+16]
  33512. movdqu xmm2, [r9+32]
  33513. movdqu xmm3, [r9+48]
  33514. pand xmm0, xmm12
  33515. pand xmm1, xmm12
  33516. pand xmm2, xmm12
  33517. pand xmm3, xmm12
  33518. por xmm4, xmm0
  33519. por xmm5, xmm1
  33520. por xmm6, xmm2
  33521. por xmm7, xmm3
  33522. paddd xmm13, xmm11
  33523. ; ENTRY: 29
  33524. mov r9, QWORD PTR [rdx+232]
  33525. add r9, 128
  33526. movdqu xmm12, xmm13
  33527. pcmpeqd xmm12, xmm10
  33528. movdqu xmm0, [r9]
  33529. movdqu xmm1, [r9+16]
  33530. movdqu xmm2, [r9+32]
  33531. movdqu xmm3, [r9+48]
  33532. pand xmm0, xmm12
  33533. pand xmm1, xmm12
  33534. pand xmm2, xmm12
  33535. pand xmm3, xmm12
  33536. por xmm4, xmm0
  33537. por xmm5, xmm1
  33538. por xmm6, xmm2
  33539. por xmm7, xmm3
  33540. paddd xmm13, xmm11
  33541. ; ENTRY: 30
  33542. mov r9, QWORD PTR [rdx+240]
  33543. add r9, 128
  33544. movdqu xmm12, xmm13
  33545. pcmpeqd xmm12, xmm10
  33546. movdqu xmm0, [r9]
  33547. movdqu xmm1, [r9+16]
  33548. movdqu xmm2, [r9+32]
  33549. movdqu xmm3, [r9+48]
  33550. pand xmm0, xmm12
  33551. pand xmm1, xmm12
  33552. pand xmm2, xmm12
  33553. pand xmm3, xmm12
  33554. por xmm4, xmm0
  33555. por xmm5, xmm1
  33556. por xmm6, xmm2
  33557. por xmm7, xmm3
  33558. paddd xmm13, xmm11
  33559. ; ENTRY: 31
  33560. mov r9, QWORD PTR [rdx+248]
  33561. add r9, 128
  33562. movdqu xmm12, xmm13
  33563. pcmpeqd xmm12, xmm10
  33564. movdqu xmm0, [r9]
  33565. movdqu xmm1, [r9+16]
  33566. movdqu xmm2, [r9+32]
  33567. movdqu xmm3, [r9+48]
  33568. pand xmm0, xmm12
  33569. pand xmm1, xmm12
  33570. pand xmm2, xmm12
  33571. pand xmm3, xmm12
  33572. por xmm4, xmm0
  33573. por xmm5, xmm1
  33574. por xmm6, xmm2
  33575. por xmm7, xmm3
  33576. paddd xmm13, xmm11
  33577. movdqu [rcx], xmm4
  33578. movdqu [rcx+16], xmm5
  33579. movdqu [rcx+32], xmm6
  33580. movdqu [rcx+48], xmm7
  33581. ; END: 16-23
  33582. vmovdqu xmm6, OWORD PTR [rsp]
  33583. vmovdqu xmm7, OWORD PTR [rsp+16]
  33584. vmovdqu xmm8, OWORD PTR [rsp+32]
  33585. vmovdqu xmm9, OWORD PTR [rsp+48]
  33586. vmovdqu xmm10, OWORD PTR [rsp+64]
  33587. vmovdqu xmm11, OWORD PTR [rsp+80]
  33588. vmovdqu xmm12, OWORD PTR [rsp+96]
  33589. vmovdqu xmm13, OWORD PTR [rsp+112]
  33590. add rsp, 128
  33591. ret
  33592. sp_3072_get_from_table_24 ENDP
  33593. _text ENDS
  33594. ENDIF
  33595. IFDEF HAVE_INTEL_AVX2
  33596. ; /* Reduce the number back to 3072 bits using Montgomery reduction.
  33597. ; *
  33598. ; * a A single precision number to reduce in place.
  33599. ; * m The single precision number representing the modulus.
  33600. ; * mp The digit representing the negative inverse of m mod 2^n.
  33601. ; */
  33602. _text SEGMENT READONLY PARA
  33603. sp_3072_mont_reduce_avx2_24 PROC
  33604. push r12
  33605. push r13
  33606. push r14
  33607. push r15
  33608. push rdi
  33609. push rsi
  33610. push rbx
  33611. push rbp
  33612. mov r9, rcx
  33613. mov r10, rdx
  33614. xor rbp, rbp
  33615. ; i = 24
  33616. mov r11, 24
  33617. mov r14, QWORD PTR [r9]
  33618. mov r15, QWORD PTR [r9+8]
  33619. mov rdi, QWORD PTR [r9+16]
  33620. mov rsi, QWORD PTR [r9+24]
  33621. add r9, 96
  33622. xor rbp, rbp
  33623. L_3072_mont_reduce_avx2_24_loop:
  33624. ; mu = a[i] * mp
  33625. mov rdx, r14
  33626. mov r12, r14
  33627. imul rdx, r8
  33628. xor rbx, rbx
  33629. ; a[i+0] += m[0] * mu
  33630. mulx rcx, rax, QWORD PTR [r10]
  33631. mov r14, r15
  33632. adcx r12, rax
  33633. adox r14, rcx
  33634. ; a[i+1] += m[1] * mu
  33635. mulx rcx, rax, QWORD PTR [r10+8]
  33636. mov r15, rdi
  33637. adcx r14, rax
  33638. adox r15, rcx
  33639. ; a[i+2] += m[2] * mu
  33640. mulx rcx, rax, QWORD PTR [r10+16]
  33641. mov rdi, rsi
  33642. adcx r15, rax
  33643. adox rdi, rcx
  33644. ; a[i+3] += m[3] * mu
  33645. mulx rcx, rax, QWORD PTR [r10+24]
  33646. mov rsi, QWORD PTR [r9+-64]
  33647. adcx rdi, rax
  33648. adox rsi, rcx
  33649. ; a[i+4] += m[4] * mu
  33650. mulx rcx, rax, QWORD PTR [r10+32]
  33651. mov r13, QWORD PTR [r9+-56]
  33652. adcx rsi, rax
  33653. adox r13, rcx
  33654. ; a[i+5] += m[5] * mu
  33655. mulx rcx, rax, QWORD PTR [r10+40]
  33656. mov r12, QWORD PTR [r9+-48]
  33657. adcx r13, rax
  33658. adox r12, rcx
  33659. mov QWORD PTR [r9+-56], r13
  33660. ; a[i+6] += m[6] * mu
  33661. mulx rcx, rax, QWORD PTR [r10+48]
  33662. mov r13, QWORD PTR [r9+-40]
  33663. adcx r12, rax
  33664. adox r13, rcx
  33665. mov QWORD PTR [r9+-48], r12
  33666. ; a[i+7] += m[7] * mu
  33667. mulx rcx, rax, QWORD PTR [r10+56]
  33668. mov r12, QWORD PTR [r9+-32]
  33669. adcx r13, rax
  33670. adox r12, rcx
  33671. mov QWORD PTR [r9+-40], r13
  33672. ; a[i+8] += m[8] * mu
  33673. mulx rcx, rax, QWORD PTR [r10+64]
  33674. mov r13, QWORD PTR [r9+-24]
  33675. adcx r12, rax
  33676. adox r13, rcx
  33677. mov QWORD PTR [r9+-32], r12
  33678. ; a[i+9] += m[9] * mu
  33679. mulx rcx, rax, QWORD PTR [r10+72]
  33680. mov r12, QWORD PTR [r9+-16]
  33681. adcx r13, rax
  33682. adox r12, rcx
  33683. mov QWORD PTR [r9+-24], r13
  33684. ; a[i+10] += m[10] * mu
  33685. mulx rcx, rax, QWORD PTR [r10+80]
  33686. mov r13, QWORD PTR [r9+-8]
  33687. adcx r12, rax
  33688. adox r13, rcx
  33689. mov QWORD PTR [r9+-16], r12
  33690. ; a[i+11] += m[11] * mu
  33691. mulx rcx, rax, QWORD PTR [r10+88]
  33692. mov r12, QWORD PTR [r9]
  33693. adcx r13, rax
  33694. adox r12, rcx
  33695. mov QWORD PTR [r9+-8], r13
  33696. ; a[i+12] += m[12] * mu
  33697. mulx rcx, rax, QWORD PTR [r10+96]
  33698. mov r13, QWORD PTR [r9+8]
  33699. adcx r12, rax
  33700. adox r13, rcx
  33701. mov QWORD PTR [r9], r12
  33702. ; a[i+13] += m[13] * mu
  33703. mulx rcx, rax, QWORD PTR [r10+104]
  33704. mov r12, QWORD PTR [r9+16]
  33705. adcx r13, rax
  33706. adox r12, rcx
  33707. mov QWORD PTR [r9+8], r13
  33708. ; a[i+14] += m[14] * mu
  33709. mulx rcx, rax, QWORD PTR [r10+112]
  33710. mov r13, QWORD PTR [r9+24]
  33711. adcx r12, rax
  33712. adox r13, rcx
  33713. mov QWORD PTR [r9+16], r12
  33714. ; a[i+15] += m[15] * mu
  33715. mulx rcx, rax, QWORD PTR [r10+120]
  33716. mov r12, QWORD PTR [r9+32]
  33717. adcx r13, rax
  33718. adox r12, rcx
  33719. mov QWORD PTR [r9+24], r13
  33720. ; a[i+16] += m[16] * mu
  33721. mulx rcx, rax, QWORD PTR [r10+128]
  33722. mov r13, QWORD PTR [r9+40]
  33723. adcx r12, rax
  33724. adox r13, rcx
  33725. mov QWORD PTR [r9+32], r12
  33726. ; a[i+17] += m[17] * mu
  33727. mulx rcx, rax, QWORD PTR [r10+136]
  33728. mov r12, QWORD PTR [r9+48]
  33729. adcx r13, rax
  33730. adox r12, rcx
  33731. mov QWORD PTR [r9+40], r13
  33732. ; a[i+18] += m[18] * mu
  33733. mulx rcx, rax, QWORD PTR [r10+144]
  33734. mov r13, QWORD PTR [r9+56]
  33735. adcx r12, rax
  33736. adox r13, rcx
  33737. mov QWORD PTR [r9+48], r12
  33738. ; a[i+19] += m[19] * mu
  33739. mulx rcx, rax, QWORD PTR [r10+152]
  33740. mov r12, QWORD PTR [r9+64]
  33741. adcx r13, rax
  33742. adox r12, rcx
  33743. mov QWORD PTR [r9+56], r13
  33744. ; a[i+20] += m[20] * mu
  33745. mulx rcx, rax, QWORD PTR [r10+160]
  33746. mov r13, QWORD PTR [r9+72]
  33747. adcx r12, rax
  33748. adox r13, rcx
  33749. mov QWORD PTR [r9+64], r12
  33750. ; a[i+21] += m[21] * mu
  33751. mulx rcx, rax, QWORD PTR [r10+168]
  33752. mov r12, QWORD PTR [r9+80]
  33753. adcx r13, rax
  33754. adox r12, rcx
  33755. mov QWORD PTR [r9+72], r13
  33756. ; a[i+22] += m[22] * mu
  33757. mulx rcx, rax, QWORD PTR [r10+176]
  33758. mov r13, QWORD PTR [r9+88]
  33759. adcx r12, rax
  33760. adox r13, rcx
  33761. mov QWORD PTR [r9+80], r12
  33762. ; a[i+23] += m[23] * mu
  33763. mulx rcx, rax, QWORD PTR [r10+184]
  33764. mov r12, QWORD PTR [r9+96]
  33765. adcx r13, rax
  33766. adox r12, rcx
  33767. mov QWORD PTR [r9+88], r13
  33768. adcx r12, rbp
  33769. mov rbp, rbx
  33770. mov QWORD PTR [r9+96], r12
  33771. adox rbp, rbx
  33772. adcx rbp, rbx
  33773. ; a += 1
  33774. add r9, 8
  33775. ; i -= 1
  33776. sub r11, 1
  33777. jnz L_3072_mont_reduce_avx2_24_loop
  33778. sub r9, 96
  33779. neg rbp
  33780. mov r8, r9
  33781. sub r9, 192
  33782. mov rcx, QWORD PTR [r10]
  33783. mov rdx, r14
  33784. pext rcx, rcx, rbp
  33785. sub rdx, rcx
  33786. mov rcx, QWORD PTR [r10+8]
  33787. mov rax, r15
  33788. pext rcx, rcx, rbp
  33789. mov QWORD PTR [r9], rdx
  33790. sbb rax, rcx
  33791. mov rdx, QWORD PTR [r10+16]
  33792. mov rcx, rdi
  33793. pext rdx, rdx, rbp
  33794. mov QWORD PTR [r9+8], rax
  33795. sbb rcx, rdx
  33796. mov rax, QWORD PTR [r10+24]
  33797. mov rdx, rsi
  33798. pext rax, rax, rbp
  33799. mov QWORD PTR [r9+16], rcx
  33800. sbb rdx, rax
  33801. mov rcx, QWORD PTR [r10+32]
  33802. mov rax, QWORD PTR [r8+32]
  33803. pext rcx, rcx, rbp
  33804. mov QWORD PTR [r9+24], rdx
  33805. sbb rax, rcx
  33806. mov rdx, QWORD PTR [r10+40]
  33807. mov rcx, QWORD PTR [r8+40]
  33808. pext rdx, rdx, rbp
  33809. mov QWORD PTR [r9+32], rax
  33810. sbb rcx, rdx
  33811. mov rax, QWORD PTR [r10+48]
  33812. mov rdx, QWORD PTR [r8+48]
  33813. pext rax, rax, rbp
  33814. mov QWORD PTR [r9+40], rcx
  33815. sbb rdx, rax
  33816. mov rcx, QWORD PTR [r10+56]
  33817. mov rax, QWORD PTR [r8+56]
  33818. pext rcx, rcx, rbp
  33819. mov QWORD PTR [r9+48], rdx
  33820. sbb rax, rcx
  33821. mov rdx, QWORD PTR [r10+64]
  33822. mov rcx, QWORD PTR [r8+64]
  33823. pext rdx, rdx, rbp
  33824. mov QWORD PTR [r9+56], rax
  33825. sbb rcx, rdx
  33826. mov rax, QWORD PTR [r10+72]
  33827. mov rdx, QWORD PTR [r8+72]
  33828. pext rax, rax, rbp
  33829. mov QWORD PTR [r9+64], rcx
  33830. sbb rdx, rax
  33831. mov rcx, QWORD PTR [r10+80]
  33832. mov rax, QWORD PTR [r8+80]
  33833. pext rcx, rcx, rbp
  33834. mov QWORD PTR [r9+72], rdx
  33835. sbb rax, rcx
  33836. mov rdx, QWORD PTR [r10+88]
  33837. mov rcx, QWORD PTR [r8+88]
  33838. pext rdx, rdx, rbp
  33839. mov QWORD PTR [r9+80], rax
  33840. sbb rcx, rdx
  33841. mov rax, QWORD PTR [r10+96]
  33842. mov rdx, QWORD PTR [r8+96]
  33843. pext rax, rax, rbp
  33844. mov QWORD PTR [r9+88], rcx
  33845. sbb rdx, rax
  33846. mov rcx, QWORD PTR [r10+104]
  33847. mov rax, QWORD PTR [r8+104]
  33848. pext rcx, rcx, rbp
  33849. mov QWORD PTR [r9+96], rdx
  33850. sbb rax, rcx
  33851. mov rdx, QWORD PTR [r10+112]
  33852. mov rcx, QWORD PTR [r8+112]
  33853. pext rdx, rdx, rbp
  33854. mov QWORD PTR [r9+104], rax
  33855. sbb rcx, rdx
  33856. mov rax, QWORD PTR [r10+120]
  33857. mov rdx, QWORD PTR [r8+120]
  33858. pext rax, rax, rbp
  33859. mov QWORD PTR [r9+112], rcx
  33860. sbb rdx, rax
  33861. mov rcx, QWORD PTR [r10+128]
  33862. mov rax, QWORD PTR [r8+128]
  33863. pext rcx, rcx, rbp
  33864. mov QWORD PTR [r9+120], rdx
  33865. sbb rax, rcx
  33866. mov rdx, QWORD PTR [r10+136]
  33867. mov rcx, QWORD PTR [r8+136]
  33868. pext rdx, rdx, rbp
  33869. mov QWORD PTR [r9+128], rax
  33870. sbb rcx, rdx
  33871. mov rax, QWORD PTR [r10+144]
  33872. mov rdx, QWORD PTR [r8+144]
  33873. pext rax, rax, rbp
  33874. mov QWORD PTR [r9+136], rcx
  33875. sbb rdx, rax
  33876. mov rcx, QWORD PTR [r10+152]
  33877. mov rax, QWORD PTR [r8+152]
  33878. pext rcx, rcx, rbp
  33879. mov QWORD PTR [r9+144], rdx
  33880. sbb rax, rcx
  33881. mov rdx, QWORD PTR [r10+160]
  33882. mov rcx, QWORD PTR [r8+160]
  33883. pext rdx, rdx, rbp
  33884. mov QWORD PTR [r9+152], rax
  33885. sbb rcx, rdx
  33886. mov rax, QWORD PTR [r10+168]
  33887. mov rdx, QWORD PTR [r8+168]
  33888. pext rax, rax, rbp
  33889. mov QWORD PTR [r9+160], rcx
  33890. sbb rdx, rax
  33891. mov rcx, QWORD PTR [r10+176]
  33892. mov rax, QWORD PTR [r8+176]
  33893. pext rcx, rcx, rbp
  33894. mov QWORD PTR [r9+168], rdx
  33895. sbb rax, rcx
  33896. mov rdx, QWORD PTR [r10+184]
  33897. mov rcx, QWORD PTR [r8+184]
  33898. pext rdx, rdx, rbp
  33899. mov QWORD PTR [r9+176], rax
  33900. sbb rcx, rdx
  33901. mov QWORD PTR [r9+184], rcx
  33902. pop rbp
  33903. pop rbx
  33904. pop rsi
  33905. pop rdi
  33906. pop r15
  33907. pop r14
  33908. pop r13
  33909. pop r12
  33910. ret
  33911. sp_3072_mont_reduce_avx2_24 ENDP
  33912. _text ENDS
  33913. ENDIF
  33914. IFNDEF WC_NO_CACHE_RESISTANT
  33915. _text SEGMENT READONLY PARA
  33916. sp_3072_get_from_table_avx2_24 PROC
  33917. sub rsp, 128
  33918. vmovdqu OWORD PTR [rsp], xmm6
  33919. vmovdqu OWORD PTR [rsp+16], xmm7
  33920. vmovdqu OWORD PTR [rsp+32], xmm8
  33921. vmovdqu OWORD PTR [rsp+48], xmm9
  33922. vmovdqu OWORD PTR [rsp+64], xmm10
  33923. vmovdqu OWORD PTR [rsp+80], xmm11
  33924. vmovdqu OWORD PTR [rsp+96], xmm12
  33925. vmovdqu OWORD PTR [rsp+112], xmm13
  33926. mov rax, 1
  33927. movd xmm10, r8
  33928. movd xmm11, rax
  33929. vpxor ymm13, ymm13, ymm13
  33930. vpermd ymm10, ymm13, ymm10
  33931. vpermd ymm11, ymm13, ymm11
  33932. ; START: 0-15
  33933. vpxor ymm13, ymm13, ymm13
  33934. vpxor ymm4, ymm4, ymm4
  33935. vpxor ymm5, ymm5, ymm5
  33936. vpxor ymm6, ymm6, ymm6
  33937. vpxor ymm7, ymm7, ymm7
  33938. ; ENTRY: 0
  33939. mov r9, QWORD PTR [rdx]
  33940. vpcmpeqd ymm12, ymm13, ymm10
  33941. vmovdqu ymm0, YMMWORD PTR [r9]
  33942. vmovdqu ymm1, YMMWORD PTR [r9+32]
  33943. vmovdqu ymm2, YMMWORD PTR [r9+64]
  33944. vmovdqu ymm3, YMMWORD PTR [r9+96]
  33945. vpand ymm0, ymm0, ymm12
  33946. vpand ymm1, ymm1, ymm12
  33947. vpand ymm2, ymm2, ymm12
  33948. vpand ymm3, ymm3, ymm12
  33949. vpor ymm4, ymm4, ymm0
  33950. vpor ymm5, ymm5, ymm1
  33951. vpor ymm6, ymm6, ymm2
  33952. vpor ymm7, ymm7, ymm3
  33953. vpaddd ymm13, ymm13, ymm11
  33954. ; ENTRY: 1
  33955. mov r9, QWORD PTR [rdx+8]
  33956. vpcmpeqd ymm12, ymm13, ymm10
  33957. vmovdqu ymm0, YMMWORD PTR [r9]
  33958. vmovdqu ymm1, YMMWORD PTR [r9+32]
  33959. vmovdqu ymm2, YMMWORD PTR [r9+64]
  33960. vmovdqu ymm3, YMMWORD PTR [r9+96]
  33961. vpand ymm0, ymm0, ymm12
  33962. vpand ymm1, ymm1, ymm12
  33963. vpand ymm2, ymm2, ymm12
  33964. vpand ymm3, ymm3, ymm12
  33965. vpor ymm4, ymm4, ymm0
  33966. vpor ymm5, ymm5, ymm1
  33967. vpor ymm6, ymm6, ymm2
  33968. vpor ymm7, ymm7, ymm3
  33969. vpaddd ymm13, ymm13, ymm11
  33970. ; ENTRY: 2
  33971. mov r9, QWORD PTR [rdx+16]
  33972. vpcmpeqd ymm12, ymm13, ymm10
  33973. vmovdqu ymm0, YMMWORD PTR [r9]
  33974. vmovdqu ymm1, YMMWORD PTR [r9+32]
  33975. vmovdqu ymm2, YMMWORD PTR [r9+64]
  33976. vmovdqu ymm3, YMMWORD PTR [r9+96]
  33977. vpand ymm0, ymm0, ymm12
  33978. vpand ymm1, ymm1, ymm12
  33979. vpand ymm2, ymm2, ymm12
  33980. vpand ymm3, ymm3, ymm12
  33981. vpor ymm4, ymm4, ymm0
  33982. vpor ymm5, ymm5, ymm1
  33983. vpor ymm6, ymm6, ymm2
  33984. vpor ymm7, ymm7, ymm3
  33985. vpaddd ymm13, ymm13, ymm11
  33986. ; ENTRY: 3
  33987. mov r9, QWORD PTR [rdx+24]
  33988. vpcmpeqd ymm12, ymm13, ymm10
  33989. vmovdqu ymm0, YMMWORD PTR [r9]
  33990. vmovdqu ymm1, YMMWORD PTR [r9+32]
  33991. vmovdqu ymm2, YMMWORD PTR [r9+64]
  33992. vmovdqu ymm3, YMMWORD PTR [r9+96]
  33993. vpand ymm0, ymm0, ymm12
  33994. vpand ymm1, ymm1, ymm12
  33995. vpand ymm2, ymm2, ymm12
  33996. vpand ymm3, ymm3, ymm12
  33997. vpor ymm4, ymm4, ymm0
  33998. vpor ymm5, ymm5, ymm1
  33999. vpor ymm6, ymm6, ymm2
  34000. vpor ymm7, ymm7, ymm3
  34001. vpaddd ymm13, ymm13, ymm11
  34002. ; ENTRY: 4
  34003. mov r9, QWORD PTR [rdx+32]
  34004. vpcmpeqd ymm12, ymm13, ymm10
  34005. vmovdqu ymm0, YMMWORD PTR [r9]
  34006. vmovdqu ymm1, YMMWORD PTR [r9+32]
  34007. vmovdqu ymm2, YMMWORD PTR [r9+64]
  34008. vmovdqu ymm3, YMMWORD PTR [r9+96]
  34009. vpand ymm0, ymm0, ymm12
  34010. vpand ymm1, ymm1, ymm12
  34011. vpand ymm2, ymm2, ymm12
  34012. vpand ymm3, ymm3, ymm12
  34013. vpor ymm4, ymm4, ymm0
  34014. vpor ymm5, ymm5, ymm1
  34015. vpor ymm6, ymm6, ymm2
  34016. vpor ymm7, ymm7, ymm3
  34017. vpaddd ymm13, ymm13, ymm11
  34018. ; ENTRY: 5
  34019. mov r9, QWORD PTR [rdx+40]
  34020. vpcmpeqd ymm12, ymm13, ymm10
  34021. vmovdqu ymm0, YMMWORD PTR [r9]
  34022. vmovdqu ymm1, YMMWORD PTR [r9+32]
  34023. vmovdqu ymm2, YMMWORD PTR [r9+64]
  34024. vmovdqu ymm3, YMMWORD PTR [r9+96]
  34025. vpand ymm0, ymm0, ymm12
  34026. vpand ymm1, ymm1, ymm12
  34027. vpand ymm2, ymm2, ymm12
  34028. vpand ymm3, ymm3, ymm12
  34029. vpor ymm4, ymm4, ymm0
  34030. vpor ymm5, ymm5, ymm1
  34031. vpor ymm6, ymm6, ymm2
  34032. vpor ymm7, ymm7, ymm3
  34033. vpaddd ymm13, ymm13, ymm11
  34034. ; ENTRY: 6
  34035. mov r9, QWORD PTR [rdx+48]
  34036. vpcmpeqd ymm12, ymm13, ymm10
  34037. vmovdqu ymm0, YMMWORD PTR [r9]
  34038. vmovdqu ymm1, YMMWORD PTR [r9+32]
  34039. vmovdqu ymm2, YMMWORD PTR [r9+64]
  34040. vmovdqu ymm3, YMMWORD PTR [r9+96]
  34041. vpand ymm0, ymm0, ymm12
  34042. vpand ymm1, ymm1, ymm12
  34043. vpand ymm2, ymm2, ymm12
  34044. vpand ymm3, ymm3, ymm12
  34045. vpor ymm4, ymm4, ymm0
  34046. vpor ymm5, ymm5, ymm1
  34047. vpor ymm6, ymm6, ymm2
  34048. vpor ymm7, ymm7, ymm3
  34049. vpaddd ymm13, ymm13, ymm11
  34050. ; ENTRY: 7
  34051. mov r9, QWORD PTR [rdx+56]
  34052. vpcmpeqd ymm12, ymm13, ymm10
  34053. vmovdqu ymm0, YMMWORD PTR [r9]
  34054. vmovdqu ymm1, YMMWORD PTR [r9+32]
  34055. vmovdqu ymm2, YMMWORD PTR [r9+64]
  34056. vmovdqu ymm3, YMMWORD PTR [r9+96]
  34057. vpand ymm0, ymm0, ymm12
  34058. vpand ymm1, ymm1, ymm12
  34059. vpand ymm2, ymm2, ymm12
  34060. vpand ymm3, ymm3, ymm12
  34061. vpor ymm4, ymm4, ymm0
  34062. vpor ymm5, ymm5, ymm1
  34063. vpor ymm6, ymm6, ymm2
  34064. vpor ymm7, ymm7, ymm3
  34065. vpaddd ymm13, ymm13, ymm11
  34066. ; ENTRY: 8
  34067. mov r9, QWORD PTR [rdx+64]
  34068. vpcmpeqd ymm12, ymm13, ymm10
  34069. vmovdqu ymm0, YMMWORD PTR [r9]
  34070. vmovdqu ymm1, YMMWORD PTR [r9+32]
  34071. vmovdqu ymm2, YMMWORD PTR [r9+64]
  34072. vmovdqu ymm3, YMMWORD PTR [r9+96]
  34073. vpand ymm0, ymm0, ymm12
  34074. vpand ymm1, ymm1, ymm12
  34075. vpand ymm2, ymm2, ymm12
  34076. vpand ymm3, ymm3, ymm12
  34077. vpor ymm4, ymm4, ymm0
  34078. vpor ymm5, ymm5, ymm1
  34079. vpor ymm6, ymm6, ymm2
  34080. vpor ymm7, ymm7, ymm3
  34081. vpaddd ymm13, ymm13, ymm11
  34082. ; ENTRY: 9
  34083. mov r9, QWORD PTR [rdx+72]
  34084. vpcmpeqd ymm12, ymm13, ymm10
  34085. vmovdqu ymm0, YMMWORD PTR [r9]
  34086. vmovdqu ymm1, YMMWORD PTR [r9+32]
  34087. vmovdqu ymm2, YMMWORD PTR [r9+64]
  34088. vmovdqu ymm3, YMMWORD PTR [r9+96]
  34089. vpand ymm0, ymm0, ymm12
  34090. vpand ymm1, ymm1, ymm12
  34091. vpand ymm2, ymm2, ymm12
  34092. vpand ymm3, ymm3, ymm12
  34093. vpor ymm4, ymm4, ymm0
  34094. vpor ymm5, ymm5, ymm1
  34095. vpor ymm6, ymm6, ymm2
  34096. vpor ymm7, ymm7, ymm3
  34097. vpaddd ymm13, ymm13, ymm11
  34098. ; ENTRY: 10
  34099. mov r9, QWORD PTR [rdx+80]
  34100. vpcmpeqd ymm12, ymm13, ymm10
  34101. vmovdqu ymm0, YMMWORD PTR [r9]
  34102. vmovdqu ymm1, YMMWORD PTR [r9+32]
  34103. vmovdqu ymm2, YMMWORD PTR [r9+64]
  34104. vmovdqu ymm3, YMMWORD PTR [r9+96]
  34105. vpand ymm0, ymm0, ymm12
  34106. vpand ymm1, ymm1, ymm12
  34107. vpand ymm2, ymm2, ymm12
  34108. vpand ymm3, ymm3, ymm12
  34109. vpor ymm4, ymm4, ymm0
  34110. vpor ymm5, ymm5, ymm1
  34111. vpor ymm6, ymm6, ymm2
  34112. vpor ymm7, ymm7, ymm3
  34113. vpaddd ymm13, ymm13, ymm11
  34114. ; ENTRY: 11
  34115. mov r9, QWORD PTR [rdx+88]
  34116. vpcmpeqd ymm12, ymm13, ymm10
  34117. vmovdqu ymm0, YMMWORD PTR [r9]
  34118. vmovdqu ymm1, YMMWORD PTR [r9+32]
  34119. vmovdqu ymm2, YMMWORD PTR [r9+64]
  34120. vmovdqu ymm3, YMMWORD PTR [r9+96]
  34121. vpand ymm0, ymm0, ymm12
  34122. vpand ymm1, ymm1, ymm12
  34123. vpand ymm2, ymm2, ymm12
  34124. vpand ymm3, ymm3, ymm12
  34125. vpor ymm4, ymm4, ymm0
  34126. vpor ymm5, ymm5, ymm1
  34127. vpor ymm6, ymm6, ymm2
  34128. vpor ymm7, ymm7, ymm3
  34129. vpaddd ymm13, ymm13, ymm11
  34130. ; ENTRY: 12
  34131. mov r9, QWORD PTR [rdx+96]
  34132. vpcmpeqd ymm12, ymm13, ymm10
  34133. vmovdqu ymm0, YMMWORD PTR [r9]
  34134. vmovdqu ymm1, YMMWORD PTR [r9+32]
  34135. vmovdqu ymm2, YMMWORD PTR [r9+64]
  34136. vmovdqu ymm3, YMMWORD PTR [r9+96]
  34137. vpand ymm0, ymm0, ymm12
  34138. vpand ymm1, ymm1, ymm12
  34139. vpand ymm2, ymm2, ymm12
  34140. vpand ymm3, ymm3, ymm12
  34141. vpor ymm4, ymm4, ymm0
  34142. vpor ymm5, ymm5, ymm1
  34143. vpor ymm6, ymm6, ymm2
  34144. vpor ymm7, ymm7, ymm3
  34145. vpaddd ymm13, ymm13, ymm11
  34146. ; ENTRY: 13
  34147. mov r9, QWORD PTR [rdx+104]
  34148. vpcmpeqd ymm12, ymm13, ymm10
  34149. vmovdqu ymm0, YMMWORD PTR [r9]
  34150. vmovdqu ymm1, YMMWORD PTR [r9+32]
  34151. vmovdqu ymm2, YMMWORD PTR [r9+64]
  34152. vmovdqu ymm3, YMMWORD PTR [r9+96]
  34153. vpand ymm0, ymm0, ymm12
  34154. vpand ymm1, ymm1, ymm12
  34155. vpand ymm2, ymm2, ymm12
  34156. vpand ymm3, ymm3, ymm12
  34157. vpor ymm4, ymm4, ymm0
  34158. vpor ymm5, ymm5, ymm1
  34159. vpor ymm6, ymm6, ymm2
  34160. vpor ymm7, ymm7, ymm3
  34161. vpaddd ymm13, ymm13, ymm11
  34162. ; ENTRY: 14
  34163. mov r9, QWORD PTR [rdx+112]
  34164. vpcmpeqd ymm12, ymm13, ymm10
  34165. vmovdqu ymm0, YMMWORD PTR [r9]
  34166. vmovdqu ymm1, YMMWORD PTR [r9+32]
  34167. vmovdqu ymm2, YMMWORD PTR [r9+64]
  34168. vmovdqu ymm3, YMMWORD PTR [r9+96]
  34169. vpand ymm0, ymm0, ymm12
  34170. vpand ymm1, ymm1, ymm12
  34171. vpand ymm2, ymm2, ymm12
  34172. vpand ymm3, ymm3, ymm12
  34173. vpor ymm4, ymm4, ymm0
  34174. vpor ymm5, ymm5, ymm1
  34175. vpor ymm6, ymm6, ymm2
  34176. vpor ymm7, ymm7, ymm3
  34177. vpaddd ymm13, ymm13, ymm11
  34178. ; ENTRY: 15
  34179. mov r9, QWORD PTR [rdx+120]
  34180. vpcmpeqd ymm12, ymm13, ymm10
  34181. vmovdqu ymm0, YMMWORD PTR [r9]
  34182. vmovdqu ymm1, YMMWORD PTR [r9+32]
  34183. vmovdqu ymm2, YMMWORD PTR [r9+64]
  34184. vmovdqu ymm3, YMMWORD PTR [r9+96]
  34185. vpand ymm0, ymm0, ymm12
  34186. vpand ymm1, ymm1, ymm12
  34187. vpand ymm2, ymm2, ymm12
  34188. vpand ymm3, ymm3, ymm12
  34189. vpor ymm4, ymm4, ymm0
  34190. vpor ymm5, ymm5, ymm1
  34191. vpor ymm6, ymm6, ymm2
  34192. vpor ymm7, ymm7, ymm3
  34193. vpaddd ymm13, ymm13, ymm11
  34194. ; ENTRY: 16
  34195. mov r9, QWORD PTR [rdx+128]
  34196. vpcmpeqd ymm12, ymm13, ymm10
  34197. vmovdqu ymm0, YMMWORD PTR [r9]
  34198. vmovdqu ymm1, YMMWORD PTR [r9+32]
  34199. vmovdqu ymm2, YMMWORD PTR [r9+64]
  34200. vmovdqu ymm3, YMMWORD PTR [r9+96]
  34201. vpand ymm0, ymm0, ymm12
  34202. vpand ymm1, ymm1, ymm12
  34203. vpand ymm2, ymm2, ymm12
  34204. vpand ymm3, ymm3, ymm12
  34205. vpor ymm4, ymm4, ymm0
  34206. vpor ymm5, ymm5, ymm1
  34207. vpor ymm6, ymm6, ymm2
  34208. vpor ymm7, ymm7, ymm3
  34209. vpaddd ymm13, ymm13, ymm11
  34210. ; ENTRY: 17
  34211. mov r9, QWORD PTR [rdx+136]
  34212. vpcmpeqd ymm12, ymm13, ymm10
  34213. vmovdqu ymm0, YMMWORD PTR [r9]
  34214. vmovdqu ymm1, YMMWORD PTR [r9+32]
  34215. vmovdqu ymm2, YMMWORD PTR [r9+64]
  34216. vmovdqu ymm3, YMMWORD PTR [r9+96]
  34217. vpand ymm0, ymm0, ymm12
  34218. vpand ymm1, ymm1, ymm12
  34219. vpand ymm2, ymm2, ymm12
  34220. vpand ymm3, ymm3, ymm12
  34221. vpor ymm4, ymm4, ymm0
  34222. vpor ymm5, ymm5, ymm1
  34223. vpor ymm6, ymm6, ymm2
  34224. vpor ymm7, ymm7, ymm3
  34225. vpaddd ymm13, ymm13, ymm11
  34226. ; ENTRY: 18
  34227. mov r9, QWORD PTR [rdx+144]
  34228. vpcmpeqd ymm12, ymm13, ymm10
  34229. vmovdqu ymm0, YMMWORD PTR [r9]
  34230. vmovdqu ymm1, YMMWORD PTR [r9+32]
  34231. vmovdqu ymm2, YMMWORD PTR [r9+64]
  34232. vmovdqu ymm3, YMMWORD PTR [r9+96]
  34233. vpand ymm0, ymm0, ymm12
  34234. vpand ymm1, ymm1, ymm12
  34235. vpand ymm2, ymm2, ymm12
  34236. vpand ymm3, ymm3, ymm12
  34237. vpor ymm4, ymm4, ymm0
  34238. vpor ymm5, ymm5, ymm1
  34239. vpor ymm6, ymm6, ymm2
  34240. vpor ymm7, ymm7, ymm3
  34241. vpaddd ymm13, ymm13, ymm11
  34242. ; ENTRY: 19
  34243. mov r9, QWORD PTR [rdx+152]
  34244. vpcmpeqd ymm12, ymm13, ymm10
  34245. vmovdqu ymm0, YMMWORD PTR [r9]
  34246. vmovdqu ymm1, YMMWORD PTR [r9+32]
  34247. vmovdqu ymm2, YMMWORD PTR [r9+64]
  34248. vmovdqu ymm3, YMMWORD PTR [r9+96]
  34249. vpand ymm0, ymm0, ymm12
  34250. vpand ymm1, ymm1, ymm12
  34251. vpand ymm2, ymm2, ymm12
  34252. vpand ymm3, ymm3, ymm12
  34253. vpor ymm4, ymm4, ymm0
  34254. vpor ymm5, ymm5, ymm1
  34255. vpor ymm6, ymm6, ymm2
  34256. vpor ymm7, ymm7, ymm3
  34257. vpaddd ymm13, ymm13, ymm11
  34258. ; ENTRY: 20
  34259. mov r9, QWORD PTR [rdx+160]
  34260. vpcmpeqd ymm12, ymm13, ymm10
  34261. vmovdqu ymm0, YMMWORD PTR [r9]
  34262. vmovdqu ymm1, YMMWORD PTR [r9+32]
  34263. vmovdqu ymm2, YMMWORD PTR [r9+64]
  34264. vmovdqu ymm3, YMMWORD PTR [r9+96]
  34265. vpand ymm0, ymm0, ymm12
  34266. vpand ymm1, ymm1, ymm12
  34267. vpand ymm2, ymm2, ymm12
  34268. vpand ymm3, ymm3, ymm12
  34269. vpor ymm4, ymm4, ymm0
  34270. vpor ymm5, ymm5, ymm1
  34271. vpor ymm6, ymm6, ymm2
  34272. vpor ymm7, ymm7, ymm3
  34273. vpaddd ymm13, ymm13, ymm11
  34274. ; ENTRY: 21
  34275. mov r9, QWORD PTR [rdx+168]
  34276. vpcmpeqd ymm12, ymm13, ymm10
  34277. vmovdqu ymm0, YMMWORD PTR [r9]
  34278. vmovdqu ymm1, YMMWORD PTR [r9+32]
  34279. vmovdqu ymm2, YMMWORD PTR [r9+64]
  34280. vmovdqu ymm3, YMMWORD PTR [r9+96]
  34281. vpand ymm0, ymm0, ymm12
  34282. vpand ymm1, ymm1, ymm12
  34283. vpand ymm2, ymm2, ymm12
  34284. vpand ymm3, ymm3, ymm12
  34285. vpor ymm4, ymm4, ymm0
  34286. vpor ymm5, ymm5, ymm1
  34287. vpor ymm6, ymm6, ymm2
  34288. vpor ymm7, ymm7, ymm3
  34289. vpaddd ymm13, ymm13, ymm11
  34290. ; ENTRY: 22
  34291. mov r9, QWORD PTR [rdx+176]
  34292. vpcmpeqd ymm12, ymm13, ymm10
  34293. vmovdqu ymm0, YMMWORD PTR [r9]
  34294. vmovdqu ymm1, YMMWORD PTR [r9+32]
  34295. vmovdqu ymm2, YMMWORD PTR [r9+64]
  34296. vmovdqu ymm3, YMMWORD PTR [r9+96]
  34297. vpand ymm0, ymm0, ymm12
  34298. vpand ymm1, ymm1, ymm12
  34299. vpand ymm2, ymm2, ymm12
  34300. vpand ymm3, ymm3, ymm12
  34301. vpor ymm4, ymm4, ymm0
  34302. vpor ymm5, ymm5, ymm1
  34303. vpor ymm6, ymm6, ymm2
  34304. vpor ymm7, ymm7, ymm3
  34305. vpaddd ymm13, ymm13, ymm11
  34306. ; ENTRY: 23
  34307. mov r9, QWORD PTR [rdx+184]
  34308. vpcmpeqd ymm12, ymm13, ymm10
  34309. vmovdqu ymm0, YMMWORD PTR [r9]
  34310. vmovdqu ymm1, YMMWORD PTR [r9+32]
  34311. vmovdqu ymm2, YMMWORD PTR [r9+64]
  34312. vmovdqu ymm3, YMMWORD PTR [r9+96]
  34313. vpand ymm0, ymm0, ymm12
  34314. vpand ymm1, ymm1, ymm12
  34315. vpand ymm2, ymm2, ymm12
  34316. vpand ymm3, ymm3, ymm12
  34317. vpor ymm4, ymm4, ymm0
  34318. vpor ymm5, ymm5, ymm1
  34319. vpor ymm6, ymm6, ymm2
  34320. vpor ymm7, ymm7, ymm3
  34321. vpaddd ymm13, ymm13, ymm11
  34322. ; ENTRY: 24
  34323. mov r9, QWORD PTR [rdx+192]
  34324. vpcmpeqd ymm12, ymm13, ymm10
  34325. vmovdqu ymm0, YMMWORD PTR [r9]
  34326. vmovdqu ymm1, YMMWORD PTR [r9+32]
  34327. vmovdqu ymm2, YMMWORD PTR [r9+64]
  34328. vmovdqu ymm3, YMMWORD PTR [r9+96]
  34329. vpand ymm0, ymm0, ymm12
  34330. vpand ymm1, ymm1, ymm12
  34331. vpand ymm2, ymm2, ymm12
  34332. vpand ymm3, ymm3, ymm12
  34333. vpor ymm4, ymm4, ymm0
  34334. vpor ymm5, ymm5, ymm1
  34335. vpor ymm6, ymm6, ymm2
  34336. vpor ymm7, ymm7, ymm3
  34337. vpaddd ymm13, ymm13, ymm11
  34338. ; ENTRY: 25
  34339. mov r9, QWORD PTR [rdx+200]
  34340. vpcmpeqd ymm12, ymm13, ymm10
  34341. vmovdqu ymm0, YMMWORD PTR [r9]
  34342. vmovdqu ymm1, YMMWORD PTR [r9+32]
  34343. vmovdqu ymm2, YMMWORD PTR [r9+64]
  34344. vmovdqu ymm3, YMMWORD PTR [r9+96]
  34345. vpand ymm0, ymm0, ymm12
  34346. vpand ymm1, ymm1, ymm12
  34347. vpand ymm2, ymm2, ymm12
  34348. vpand ymm3, ymm3, ymm12
  34349. vpor ymm4, ymm4, ymm0
  34350. vpor ymm5, ymm5, ymm1
  34351. vpor ymm6, ymm6, ymm2
  34352. vpor ymm7, ymm7, ymm3
  34353. vpaddd ymm13, ymm13, ymm11
  34354. ; ENTRY: 26
  34355. mov r9, QWORD PTR [rdx+208]
  34356. vpcmpeqd ymm12, ymm13, ymm10
  34357. vmovdqu ymm0, YMMWORD PTR [r9]
  34358. vmovdqu ymm1, YMMWORD PTR [r9+32]
  34359. vmovdqu ymm2, YMMWORD PTR [r9+64]
  34360. vmovdqu ymm3, YMMWORD PTR [r9+96]
  34361. vpand ymm0, ymm0, ymm12
  34362. vpand ymm1, ymm1, ymm12
  34363. vpand ymm2, ymm2, ymm12
  34364. vpand ymm3, ymm3, ymm12
  34365. vpor ymm4, ymm4, ymm0
  34366. vpor ymm5, ymm5, ymm1
  34367. vpor ymm6, ymm6, ymm2
  34368. vpor ymm7, ymm7, ymm3
  34369. vpaddd ymm13, ymm13, ymm11
  34370. ; ENTRY: 27
  34371. mov r9, QWORD PTR [rdx+216]
  34372. vpcmpeqd ymm12, ymm13, ymm10
  34373. vmovdqu ymm0, YMMWORD PTR [r9]
  34374. vmovdqu ymm1, YMMWORD PTR [r9+32]
  34375. vmovdqu ymm2, YMMWORD PTR [r9+64]
  34376. vmovdqu ymm3, YMMWORD PTR [r9+96]
  34377. vpand ymm0, ymm0, ymm12
  34378. vpand ymm1, ymm1, ymm12
  34379. vpand ymm2, ymm2, ymm12
  34380. vpand ymm3, ymm3, ymm12
  34381. vpor ymm4, ymm4, ymm0
  34382. vpor ymm5, ymm5, ymm1
  34383. vpor ymm6, ymm6, ymm2
  34384. vpor ymm7, ymm7, ymm3
  34385. vpaddd ymm13, ymm13, ymm11
  34386. ; ENTRY: 28
  34387. mov r9, QWORD PTR [rdx+224]
  34388. vpcmpeqd ymm12, ymm13, ymm10
  34389. vmovdqu ymm0, YMMWORD PTR [r9]
  34390. vmovdqu ymm1, YMMWORD PTR [r9+32]
  34391. vmovdqu ymm2, YMMWORD PTR [r9+64]
  34392. vmovdqu ymm3, YMMWORD PTR [r9+96]
  34393. vpand ymm0, ymm0, ymm12
  34394. vpand ymm1, ymm1, ymm12
  34395. vpand ymm2, ymm2, ymm12
  34396. vpand ymm3, ymm3, ymm12
  34397. vpor ymm4, ymm4, ymm0
  34398. vpor ymm5, ymm5, ymm1
  34399. vpor ymm6, ymm6, ymm2
  34400. vpor ymm7, ymm7, ymm3
  34401. vpaddd ymm13, ymm13, ymm11
  34402. ; ENTRY: 29
  34403. mov r9, QWORD PTR [rdx+232]
  34404. vpcmpeqd ymm12, ymm13, ymm10
  34405. vmovdqu ymm0, YMMWORD PTR [r9]
  34406. vmovdqu ymm1, YMMWORD PTR [r9+32]
  34407. vmovdqu ymm2, YMMWORD PTR [r9+64]
  34408. vmovdqu ymm3, YMMWORD PTR [r9+96]
  34409. vpand ymm0, ymm0, ymm12
  34410. vpand ymm1, ymm1, ymm12
  34411. vpand ymm2, ymm2, ymm12
  34412. vpand ymm3, ymm3, ymm12
  34413. vpor ymm4, ymm4, ymm0
  34414. vpor ymm5, ymm5, ymm1
  34415. vpor ymm6, ymm6, ymm2
  34416. vpor ymm7, ymm7, ymm3
  34417. vpaddd ymm13, ymm13, ymm11
  34418. ; ENTRY: 30
  34419. mov r9, QWORD PTR [rdx+240]
  34420. vpcmpeqd ymm12, ymm13, ymm10
  34421. vmovdqu ymm0, YMMWORD PTR [r9]
  34422. vmovdqu ymm1, YMMWORD PTR [r9+32]
  34423. vmovdqu ymm2, YMMWORD PTR [r9+64]
  34424. vmovdqu ymm3, YMMWORD PTR [r9+96]
  34425. vpand ymm0, ymm0, ymm12
  34426. vpand ymm1, ymm1, ymm12
  34427. vpand ymm2, ymm2, ymm12
  34428. vpand ymm3, ymm3, ymm12
  34429. vpor ymm4, ymm4, ymm0
  34430. vpor ymm5, ymm5, ymm1
  34431. vpor ymm6, ymm6, ymm2
  34432. vpor ymm7, ymm7, ymm3
  34433. vpaddd ymm13, ymm13, ymm11
  34434. ; ENTRY: 31
  34435. mov r9, QWORD PTR [rdx+248]
  34436. vpcmpeqd ymm12, ymm13, ymm10
  34437. vmovdqu ymm0, YMMWORD PTR [r9]
  34438. vmovdqu ymm1, YMMWORD PTR [r9+32]
  34439. vmovdqu ymm2, YMMWORD PTR [r9+64]
  34440. vmovdqu ymm3, YMMWORD PTR [r9+96]
  34441. vpand ymm0, ymm0, ymm12
  34442. vpand ymm1, ymm1, ymm12
  34443. vpand ymm2, ymm2, ymm12
  34444. vpand ymm3, ymm3, ymm12
  34445. vpor ymm4, ymm4, ymm0
  34446. vpor ymm5, ymm5, ymm1
  34447. vpor ymm6, ymm6, ymm2
  34448. vpor ymm7, ymm7, ymm3
  34449. vpaddd ymm13, ymm13, ymm11
  34450. vmovdqu YMMWORD PTR [rcx], ymm4
  34451. vmovdqu YMMWORD PTR [rcx+32], ymm5
  34452. vmovdqu YMMWORD PTR [rcx+64], ymm6
  34453. vmovdqu YMMWORD PTR [rcx+96], ymm7
  34454. add rcx, 128
  34455. ; END: 0-15
  34456. ; START: 16-23
  34457. vpxor ymm13, ymm13, ymm13
  34458. vpxor ymm4, ymm4, ymm4
  34459. vpxor ymm5, ymm5, ymm5
  34460. ; ENTRY: 0
  34461. mov r9, QWORD PTR [rdx]
  34462. add r9, 128
  34463. vpcmpeqd ymm12, ymm13, ymm10
  34464. vmovdqu ymm0, YMMWORD PTR [r9]
  34465. vmovdqu ymm1, YMMWORD PTR [r9+32]
  34466. vpand ymm0, ymm0, ymm12
  34467. vpand ymm1, ymm1, ymm12
  34468. vpor ymm4, ymm4, ymm0
  34469. vpor ymm5, ymm5, ymm1
  34470. vpaddd ymm13, ymm13, ymm11
  34471. ; ENTRY: 1
  34472. mov r9, QWORD PTR [rdx+8]
  34473. add r9, 128
  34474. vpcmpeqd ymm12, ymm13, ymm10
  34475. vmovdqu ymm0, YMMWORD PTR [r9]
  34476. vmovdqu ymm1, YMMWORD PTR [r9+32]
  34477. vpand ymm0, ymm0, ymm12
  34478. vpand ymm1, ymm1, ymm12
  34479. vpor ymm4, ymm4, ymm0
  34480. vpor ymm5, ymm5, ymm1
  34481. vpaddd ymm13, ymm13, ymm11
  34482. ; ENTRY: 2
  34483. mov r9, QWORD PTR [rdx+16]
  34484. add r9, 128
  34485. vpcmpeqd ymm12, ymm13, ymm10
  34486. vmovdqu ymm0, YMMWORD PTR [r9]
  34487. vmovdqu ymm1, YMMWORD PTR [r9+32]
  34488. vpand ymm0, ymm0, ymm12
  34489. vpand ymm1, ymm1, ymm12
  34490. vpor ymm4, ymm4, ymm0
  34491. vpor ymm5, ymm5, ymm1
  34492. vpaddd ymm13, ymm13, ymm11
  34493. ; ENTRY: 3
  34494. mov r9, QWORD PTR [rdx+24]
  34495. add r9, 128
  34496. vpcmpeqd ymm12, ymm13, ymm10
  34497. vmovdqu ymm0, YMMWORD PTR [r9]
  34498. vmovdqu ymm1, YMMWORD PTR [r9+32]
  34499. vpand ymm0, ymm0, ymm12
  34500. vpand ymm1, ymm1, ymm12
  34501. vpor ymm4, ymm4, ymm0
  34502. vpor ymm5, ymm5, ymm1
  34503. vpaddd ymm13, ymm13, ymm11
  34504. ; ENTRY: 4
  34505. mov r9, QWORD PTR [rdx+32]
  34506. add r9, 128
  34507. vpcmpeqd ymm12, ymm13, ymm10
  34508. vmovdqu ymm0, YMMWORD PTR [r9]
  34509. vmovdqu ymm1, YMMWORD PTR [r9+32]
  34510. vpand ymm0, ymm0, ymm12
  34511. vpand ymm1, ymm1, ymm12
  34512. vpor ymm4, ymm4, ymm0
  34513. vpor ymm5, ymm5, ymm1
  34514. vpaddd ymm13, ymm13, ymm11
  34515. ; ENTRY: 5
  34516. mov r9, QWORD PTR [rdx+40]
  34517. add r9, 128
  34518. vpcmpeqd ymm12, ymm13, ymm10
  34519. vmovdqu ymm0, YMMWORD PTR [r9]
  34520. vmovdqu ymm1, YMMWORD PTR [r9+32]
  34521. vpand ymm0, ymm0, ymm12
  34522. vpand ymm1, ymm1, ymm12
  34523. vpor ymm4, ymm4, ymm0
  34524. vpor ymm5, ymm5, ymm1
  34525. vpaddd ymm13, ymm13, ymm11
  34526. ; ENTRY: 6
  34527. mov r9, QWORD PTR [rdx+48]
  34528. add r9, 128
  34529. vpcmpeqd ymm12, ymm13, ymm10
  34530. vmovdqu ymm0, YMMWORD PTR [r9]
  34531. vmovdqu ymm1, YMMWORD PTR [r9+32]
  34532. vpand ymm0, ymm0, ymm12
  34533. vpand ymm1, ymm1, ymm12
  34534. vpor ymm4, ymm4, ymm0
  34535. vpor ymm5, ymm5, ymm1
  34536. vpaddd ymm13, ymm13, ymm11
  34537. ; ENTRY: 7
  34538. mov r9, QWORD PTR [rdx+56]
  34539. add r9, 128
  34540. vpcmpeqd ymm12, ymm13, ymm10
  34541. vmovdqu ymm0, YMMWORD PTR [r9]
  34542. vmovdqu ymm1, YMMWORD PTR [r9+32]
  34543. vpand ymm0, ymm0, ymm12
  34544. vpand ymm1, ymm1, ymm12
  34545. vpor ymm4, ymm4, ymm0
  34546. vpor ymm5, ymm5, ymm1
  34547. vpaddd ymm13, ymm13, ymm11
  34548. ; ENTRY: 8
  34549. mov r9, QWORD PTR [rdx+64]
  34550. add r9, 128
  34551. vpcmpeqd ymm12, ymm13, ymm10
  34552. vmovdqu ymm0, YMMWORD PTR [r9]
  34553. vmovdqu ymm1, YMMWORD PTR [r9+32]
  34554. vpand ymm0, ymm0, ymm12
  34555. vpand ymm1, ymm1, ymm12
  34556. vpor ymm4, ymm4, ymm0
  34557. vpor ymm5, ymm5, ymm1
  34558. vpaddd ymm13, ymm13, ymm11
  34559. ; ENTRY: 9
  34560. mov r9, QWORD PTR [rdx+72]
  34561. add r9, 128
  34562. vpcmpeqd ymm12, ymm13, ymm10
  34563. vmovdqu ymm0, YMMWORD PTR [r9]
  34564. vmovdqu ymm1, YMMWORD PTR [r9+32]
  34565. vpand ymm0, ymm0, ymm12
  34566. vpand ymm1, ymm1, ymm12
  34567. vpor ymm4, ymm4, ymm0
  34568. vpor ymm5, ymm5, ymm1
  34569. vpaddd ymm13, ymm13, ymm11
  34570. ; ENTRY: 10
  34571. mov r9, QWORD PTR [rdx+80]
  34572. add r9, 128
  34573. vpcmpeqd ymm12, ymm13, ymm10
  34574. vmovdqu ymm0, YMMWORD PTR [r9]
  34575. vmovdqu ymm1, YMMWORD PTR [r9+32]
  34576. vpand ymm0, ymm0, ymm12
  34577. vpand ymm1, ymm1, ymm12
  34578. vpor ymm4, ymm4, ymm0
  34579. vpor ymm5, ymm5, ymm1
  34580. vpaddd ymm13, ymm13, ymm11
  34581. ; ENTRY: 11
  34582. mov r9, QWORD PTR [rdx+88]
  34583. add r9, 128
  34584. vpcmpeqd ymm12, ymm13, ymm10
  34585. vmovdqu ymm0, YMMWORD PTR [r9]
  34586. vmovdqu ymm1, YMMWORD PTR [r9+32]
  34587. vpand ymm0, ymm0, ymm12
  34588. vpand ymm1, ymm1, ymm12
  34589. vpor ymm4, ymm4, ymm0
  34590. vpor ymm5, ymm5, ymm1
  34591. vpaddd ymm13, ymm13, ymm11
  34592. ; ENTRY: 12
  34593. mov r9, QWORD PTR [rdx+96]
  34594. add r9, 128
  34595. vpcmpeqd ymm12, ymm13, ymm10
  34596. vmovdqu ymm0, YMMWORD PTR [r9]
  34597. vmovdqu ymm1, YMMWORD PTR [r9+32]
  34598. vpand ymm0, ymm0, ymm12
  34599. vpand ymm1, ymm1, ymm12
  34600. vpor ymm4, ymm4, ymm0
  34601. vpor ymm5, ymm5, ymm1
  34602. vpaddd ymm13, ymm13, ymm11
  34603. ; ENTRY: 13
  34604. mov r9, QWORD PTR [rdx+104]
  34605. add r9, 128
  34606. vpcmpeqd ymm12, ymm13, ymm10
  34607. vmovdqu ymm0, YMMWORD PTR [r9]
  34608. vmovdqu ymm1, YMMWORD PTR [r9+32]
  34609. vpand ymm0, ymm0, ymm12
  34610. vpand ymm1, ymm1, ymm12
  34611. vpor ymm4, ymm4, ymm0
  34612. vpor ymm5, ymm5, ymm1
  34613. vpaddd ymm13, ymm13, ymm11
  34614. ; ENTRY: 14
  34615. mov r9, QWORD PTR [rdx+112]
  34616. add r9, 128
  34617. vpcmpeqd ymm12, ymm13, ymm10
  34618. vmovdqu ymm0, YMMWORD PTR [r9]
  34619. vmovdqu ymm1, YMMWORD PTR [r9+32]
  34620. vpand ymm0, ymm0, ymm12
  34621. vpand ymm1, ymm1, ymm12
  34622. vpor ymm4, ymm4, ymm0
  34623. vpor ymm5, ymm5, ymm1
  34624. vpaddd ymm13, ymm13, ymm11
  34625. ; ENTRY: 15
  34626. mov r9, QWORD PTR [rdx+120]
  34627. add r9, 128
  34628. vpcmpeqd ymm12, ymm13, ymm10
  34629. vmovdqu ymm0, YMMWORD PTR [r9]
  34630. vmovdqu ymm1, YMMWORD PTR [r9+32]
  34631. vpand ymm0, ymm0, ymm12
  34632. vpand ymm1, ymm1, ymm12
  34633. vpor ymm4, ymm4, ymm0
  34634. vpor ymm5, ymm5, ymm1
  34635. vpaddd ymm13, ymm13, ymm11
  34636. ; ENTRY: 16
  34637. mov r9, QWORD PTR [rdx+128]
  34638. add r9, 128
  34639. vpcmpeqd ymm12, ymm13, ymm10
  34640. vmovdqu ymm0, YMMWORD PTR [r9]
  34641. vmovdqu ymm1, YMMWORD PTR [r9+32]
  34642. vpand ymm0, ymm0, ymm12
  34643. vpand ymm1, ymm1, ymm12
  34644. vpor ymm4, ymm4, ymm0
  34645. vpor ymm5, ymm5, ymm1
  34646. vpaddd ymm13, ymm13, ymm11
  34647. ; ENTRY: 17
  34648. mov r9, QWORD PTR [rdx+136]
  34649. add r9, 128
  34650. vpcmpeqd ymm12, ymm13, ymm10
  34651. vmovdqu ymm0, YMMWORD PTR [r9]
  34652. vmovdqu ymm1, YMMWORD PTR [r9+32]
  34653. vpand ymm0, ymm0, ymm12
  34654. vpand ymm1, ymm1, ymm12
  34655. vpor ymm4, ymm4, ymm0
  34656. vpor ymm5, ymm5, ymm1
  34657. vpaddd ymm13, ymm13, ymm11
  34658. ; ENTRY: 18
  34659. mov r9, QWORD PTR [rdx+144]
  34660. add r9, 128
  34661. vpcmpeqd ymm12, ymm13, ymm10
  34662. vmovdqu ymm0, YMMWORD PTR [r9]
  34663. vmovdqu ymm1, YMMWORD PTR [r9+32]
  34664. vpand ymm0, ymm0, ymm12
  34665. vpand ymm1, ymm1, ymm12
  34666. vpor ymm4, ymm4, ymm0
  34667. vpor ymm5, ymm5, ymm1
  34668. vpaddd ymm13, ymm13, ymm11
  34669. ; ENTRY: 19
  34670. mov r9, QWORD PTR [rdx+152]
  34671. add r9, 128
  34672. vpcmpeqd ymm12, ymm13, ymm10
  34673. vmovdqu ymm0, YMMWORD PTR [r9]
  34674. vmovdqu ymm1, YMMWORD PTR [r9+32]
  34675. vpand ymm0, ymm0, ymm12
  34676. vpand ymm1, ymm1, ymm12
  34677. vpor ymm4, ymm4, ymm0
  34678. vpor ymm5, ymm5, ymm1
  34679. vpaddd ymm13, ymm13, ymm11
  34680. ; ENTRY: 20
  34681. mov r9, QWORD PTR [rdx+160]
  34682. add r9, 128
  34683. vpcmpeqd ymm12, ymm13, ymm10
  34684. vmovdqu ymm0, YMMWORD PTR [r9]
  34685. vmovdqu ymm1, YMMWORD PTR [r9+32]
  34686. vpand ymm0, ymm0, ymm12
  34687. vpand ymm1, ymm1, ymm12
  34688. vpor ymm4, ymm4, ymm0
  34689. vpor ymm5, ymm5, ymm1
  34690. vpaddd ymm13, ymm13, ymm11
  34691. ; ENTRY: 21
  34692. mov r9, QWORD PTR [rdx+168]
  34693. add r9, 128
  34694. vpcmpeqd ymm12, ymm13, ymm10
  34695. vmovdqu ymm0, YMMWORD PTR [r9]
  34696. vmovdqu ymm1, YMMWORD PTR [r9+32]
  34697. vpand ymm0, ymm0, ymm12
  34698. vpand ymm1, ymm1, ymm12
  34699. vpor ymm4, ymm4, ymm0
  34700. vpor ymm5, ymm5, ymm1
  34701. vpaddd ymm13, ymm13, ymm11
  34702. ; ENTRY: 22
  34703. mov r9, QWORD PTR [rdx+176]
  34704. add r9, 128
  34705. vpcmpeqd ymm12, ymm13, ymm10
  34706. vmovdqu ymm0, YMMWORD PTR [r9]
  34707. vmovdqu ymm1, YMMWORD PTR [r9+32]
  34708. vpand ymm0, ymm0, ymm12
  34709. vpand ymm1, ymm1, ymm12
  34710. vpor ymm4, ymm4, ymm0
  34711. vpor ymm5, ymm5, ymm1
  34712. vpaddd ymm13, ymm13, ymm11
  34713. ; ENTRY: 23
  34714. mov r9, QWORD PTR [rdx+184]
  34715. add r9, 128
  34716. vpcmpeqd ymm12, ymm13, ymm10
  34717. vmovdqu ymm0, YMMWORD PTR [r9]
  34718. vmovdqu ymm1, YMMWORD PTR [r9+32]
  34719. vpand ymm0, ymm0, ymm12
  34720. vpand ymm1, ymm1, ymm12
  34721. vpor ymm4, ymm4, ymm0
  34722. vpor ymm5, ymm5, ymm1
  34723. vpaddd ymm13, ymm13, ymm11
  34724. ; ENTRY: 24
  34725. mov r9, QWORD PTR [rdx+192]
  34726. add r9, 128
  34727. vpcmpeqd ymm12, ymm13, ymm10
  34728. vmovdqu ymm0, YMMWORD PTR [r9]
  34729. vmovdqu ymm1, YMMWORD PTR [r9+32]
  34730. vpand ymm0, ymm0, ymm12
  34731. vpand ymm1, ymm1, ymm12
  34732. vpor ymm4, ymm4, ymm0
  34733. vpor ymm5, ymm5, ymm1
  34734. vpaddd ymm13, ymm13, ymm11
  34735. ; ENTRY: 25
  34736. mov r9, QWORD PTR [rdx+200]
  34737. add r9, 128
  34738. vpcmpeqd ymm12, ymm13, ymm10
  34739. vmovdqu ymm0, YMMWORD PTR [r9]
  34740. vmovdqu ymm1, YMMWORD PTR [r9+32]
  34741. vpand ymm0, ymm0, ymm12
  34742. vpand ymm1, ymm1, ymm12
  34743. vpor ymm4, ymm4, ymm0
  34744. vpor ymm5, ymm5, ymm1
  34745. vpaddd ymm13, ymm13, ymm11
  34746. ; ENTRY: 26
  34747. mov r9, QWORD PTR [rdx+208]
  34748. add r9, 128
  34749. vpcmpeqd ymm12, ymm13, ymm10
  34750. vmovdqu ymm0, YMMWORD PTR [r9]
  34751. vmovdqu ymm1, YMMWORD PTR [r9+32]
  34752. vpand ymm0, ymm0, ymm12
  34753. vpand ymm1, ymm1, ymm12
  34754. vpor ymm4, ymm4, ymm0
  34755. vpor ymm5, ymm5, ymm1
  34756. vpaddd ymm13, ymm13, ymm11
  34757. ; ENTRY: 27
  34758. mov r9, QWORD PTR [rdx+216]
  34759. add r9, 128
  34760. vpcmpeqd ymm12, ymm13, ymm10
  34761. vmovdqu ymm0, YMMWORD PTR [r9]
  34762. vmovdqu ymm1, YMMWORD PTR [r9+32]
  34763. vpand ymm0, ymm0, ymm12
  34764. vpand ymm1, ymm1, ymm12
  34765. vpor ymm4, ymm4, ymm0
  34766. vpor ymm5, ymm5, ymm1
  34767. vpaddd ymm13, ymm13, ymm11
  34768. ; ENTRY: 28
  34769. mov r9, QWORD PTR [rdx+224]
  34770. add r9, 128
  34771. vpcmpeqd ymm12, ymm13, ymm10
  34772. vmovdqu ymm0, YMMWORD PTR [r9]
  34773. vmovdqu ymm1, YMMWORD PTR [r9+32]
  34774. vpand ymm0, ymm0, ymm12
  34775. vpand ymm1, ymm1, ymm12
  34776. vpor ymm4, ymm4, ymm0
  34777. vpor ymm5, ymm5, ymm1
  34778. vpaddd ymm13, ymm13, ymm11
  34779. ; ENTRY: 29
  34780. mov r9, QWORD PTR [rdx+232]
  34781. add r9, 128
  34782. vpcmpeqd ymm12, ymm13, ymm10
  34783. vmovdqu ymm0, YMMWORD PTR [r9]
  34784. vmovdqu ymm1, YMMWORD PTR [r9+32]
  34785. vpand ymm0, ymm0, ymm12
  34786. vpand ymm1, ymm1, ymm12
  34787. vpor ymm4, ymm4, ymm0
  34788. vpor ymm5, ymm5, ymm1
  34789. vpaddd ymm13, ymm13, ymm11
  34790. ; ENTRY: 30
  34791. mov r9, QWORD PTR [rdx+240]
  34792. add r9, 128
  34793. vpcmpeqd ymm12, ymm13, ymm10
  34794. vmovdqu ymm0, YMMWORD PTR [r9]
  34795. vmovdqu ymm1, YMMWORD PTR [r9+32]
  34796. vpand ymm0, ymm0, ymm12
  34797. vpand ymm1, ymm1, ymm12
  34798. vpor ymm4, ymm4, ymm0
  34799. vpor ymm5, ymm5, ymm1
  34800. vpaddd ymm13, ymm13, ymm11
  34801. ; ENTRY: 31
  34802. mov r9, QWORD PTR [rdx+248]
  34803. add r9, 128
  34804. vpcmpeqd ymm12, ymm13, ymm10
  34805. vmovdqu ymm0, YMMWORD PTR [r9]
  34806. vmovdqu ymm1, YMMWORD PTR [r9+32]
  34807. vpand ymm0, ymm0, ymm12
  34808. vpand ymm1, ymm1, ymm12
  34809. vpor ymm4, ymm4, ymm0
  34810. vpor ymm5, ymm5, ymm1
  34811. vpaddd ymm13, ymm13, ymm11
  34812. vmovdqu YMMWORD PTR [rcx], ymm4
  34813. vmovdqu YMMWORD PTR [rcx+32], ymm5
  34814. ; END: 16-23
  34815. vmovdqu xmm6, OWORD PTR [rsp]
  34816. vmovdqu xmm7, OWORD PTR [rsp+16]
  34817. vmovdqu xmm8, OWORD PTR [rsp+32]
  34818. vmovdqu xmm9, OWORD PTR [rsp+48]
  34819. vmovdqu xmm10, OWORD PTR [rsp+64]
  34820. vmovdqu xmm11, OWORD PTR [rsp+80]
  34821. vmovdqu xmm12, OWORD PTR [rsp+96]
  34822. vmovdqu xmm13, OWORD PTR [rsp+112]
  34823. add rsp, 128
  34824. ret
  34825. sp_3072_get_from_table_avx2_24 ENDP
  34826. _text ENDS
  34827. ENDIF
  34828. ; /* Conditionally subtract b from a using the mask m.
  34829. ; * m is -1 to subtract and 0 when not copying.
  34830. ; *
  34831. ; * r A single precision number representing condition subtract result.
  34832. ; * a A single precision number to subtract from.
  34833. ; * b A single precision number to subtract.
  34834. ; * m Mask value to apply.
  34835. ; */
  34836. _text SEGMENT READONLY PARA
  34837. sp_3072_cond_sub_48 PROC
  34838. sub rsp, 384
  34839. mov r10, QWORD PTR [r8]
  34840. mov r11, QWORD PTR [r8+8]
  34841. and r10, r9
  34842. and r11, r9
  34843. mov QWORD PTR [rsp], r10
  34844. mov QWORD PTR [rsp+8], r11
  34845. mov r10, QWORD PTR [r8+16]
  34846. mov r11, QWORD PTR [r8+24]
  34847. and r10, r9
  34848. and r11, r9
  34849. mov QWORD PTR [rsp+16], r10
  34850. mov QWORD PTR [rsp+24], r11
  34851. mov r10, QWORD PTR [r8+32]
  34852. mov r11, QWORD PTR [r8+40]
  34853. and r10, r9
  34854. and r11, r9
  34855. mov QWORD PTR [rsp+32], r10
  34856. mov QWORD PTR [rsp+40], r11
  34857. mov r10, QWORD PTR [r8+48]
  34858. mov r11, QWORD PTR [r8+56]
  34859. and r10, r9
  34860. and r11, r9
  34861. mov QWORD PTR [rsp+48], r10
  34862. mov QWORD PTR [rsp+56], r11
  34863. mov r10, QWORD PTR [r8+64]
  34864. mov r11, QWORD PTR [r8+72]
  34865. and r10, r9
  34866. and r11, r9
  34867. mov QWORD PTR [rsp+64], r10
  34868. mov QWORD PTR [rsp+72], r11
  34869. mov r10, QWORD PTR [r8+80]
  34870. mov r11, QWORD PTR [r8+88]
  34871. and r10, r9
  34872. and r11, r9
  34873. mov QWORD PTR [rsp+80], r10
  34874. mov QWORD PTR [rsp+88], r11
  34875. mov r10, QWORD PTR [r8+96]
  34876. mov r11, QWORD PTR [r8+104]
  34877. and r10, r9
  34878. and r11, r9
  34879. mov QWORD PTR [rsp+96], r10
  34880. mov QWORD PTR [rsp+104], r11
  34881. mov r10, QWORD PTR [r8+112]
  34882. mov r11, QWORD PTR [r8+120]
  34883. and r10, r9
  34884. and r11, r9
  34885. mov QWORD PTR [rsp+112], r10
  34886. mov QWORD PTR [rsp+120], r11
  34887. mov r10, QWORD PTR [r8+128]
  34888. mov r11, QWORD PTR [r8+136]
  34889. and r10, r9
  34890. and r11, r9
  34891. mov QWORD PTR [rsp+128], r10
  34892. mov QWORD PTR [rsp+136], r11
  34893. mov r10, QWORD PTR [r8+144]
  34894. mov r11, QWORD PTR [r8+152]
  34895. and r10, r9
  34896. and r11, r9
  34897. mov QWORD PTR [rsp+144], r10
  34898. mov QWORD PTR [rsp+152], r11
  34899. mov r10, QWORD PTR [r8+160]
  34900. mov r11, QWORD PTR [r8+168]
  34901. and r10, r9
  34902. and r11, r9
  34903. mov QWORD PTR [rsp+160], r10
  34904. mov QWORD PTR [rsp+168], r11
  34905. mov r10, QWORD PTR [r8+176]
  34906. mov r11, QWORD PTR [r8+184]
  34907. and r10, r9
  34908. and r11, r9
  34909. mov QWORD PTR [rsp+176], r10
  34910. mov QWORD PTR [rsp+184], r11
  34911. mov r10, QWORD PTR [r8+192]
  34912. mov r11, QWORD PTR [r8+200]
  34913. and r10, r9
  34914. and r11, r9
  34915. mov QWORD PTR [rsp+192], r10
  34916. mov QWORD PTR [rsp+200], r11
  34917. mov r10, QWORD PTR [r8+208]
  34918. mov r11, QWORD PTR [r8+216]
  34919. and r10, r9
  34920. and r11, r9
  34921. mov QWORD PTR [rsp+208], r10
  34922. mov QWORD PTR [rsp+216], r11
  34923. mov r10, QWORD PTR [r8+224]
  34924. mov r11, QWORD PTR [r8+232]
  34925. and r10, r9
  34926. and r11, r9
  34927. mov QWORD PTR [rsp+224], r10
  34928. mov QWORD PTR [rsp+232], r11
  34929. mov r10, QWORD PTR [r8+240]
  34930. mov r11, QWORD PTR [r8+248]
  34931. and r10, r9
  34932. and r11, r9
  34933. mov QWORD PTR [rsp+240], r10
  34934. mov QWORD PTR [rsp+248], r11
  34935. mov r10, QWORD PTR [r8+256]
  34936. mov r11, QWORD PTR [r8+264]
  34937. and r10, r9
  34938. and r11, r9
  34939. mov QWORD PTR [rsp+256], r10
  34940. mov QWORD PTR [rsp+264], r11
  34941. mov r10, QWORD PTR [r8+272]
  34942. mov r11, QWORD PTR [r8+280]
  34943. and r10, r9
  34944. and r11, r9
  34945. mov QWORD PTR [rsp+272], r10
  34946. mov QWORD PTR [rsp+280], r11
  34947. mov r10, QWORD PTR [r8+288]
  34948. mov r11, QWORD PTR [r8+296]
  34949. and r10, r9
  34950. and r11, r9
  34951. mov QWORD PTR [rsp+288], r10
  34952. mov QWORD PTR [rsp+296], r11
  34953. mov r10, QWORD PTR [r8+304]
  34954. mov r11, QWORD PTR [r8+312]
  34955. and r10, r9
  34956. and r11, r9
  34957. mov QWORD PTR [rsp+304], r10
  34958. mov QWORD PTR [rsp+312], r11
  34959. mov r10, QWORD PTR [r8+320]
  34960. mov r11, QWORD PTR [r8+328]
  34961. and r10, r9
  34962. and r11, r9
  34963. mov QWORD PTR [rsp+320], r10
  34964. mov QWORD PTR [rsp+328], r11
  34965. mov r10, QWORD PTR [r8+336]
  34966. mov r11, QWORD PTR [r8+344]
  34967. and r10, r9
  34968. and r11, r9
  34969. mov QWORD PTR [rsp+336], r10
  34970. mov QWORD PTR [rsp+344], r11
  34971. mov r10, QWORD PTR [r8+352]
  34972. mov r11, QWORD PTR [r8+360]
  34973. and r10, r9
  34974. and r11, r9
  34975. mov QWORD PTR [rsp+352], r10
  34976. mov QWORD PTR [rsp+360], r11
  34977. mov r10, QWORD PTR [r8+368]
  34978. mov r11, QWORD PTR [r8+376]
  34979. and r10, r9
  34980. and r11, r9
  34981. mov QWORD PTR [rsp+368], r10
  34982. mov QWORD PTR [rsp+376], r11
  34983. mov r10, QWORD PTR [rdx]
  34984. mov r8, QWORD PTR [rsp]
  34985. sub r10, r8
  34986. mov r11, QWORD PTR [rdx+8]
  34987. mov r8, QWORD PTR [rsp+8]
  34988. sbb r11, r8
  34989. mov QWORD PTR [rcx], r10
  34990. mov r10, QWORD PTR [rdx+16]
  34991. mov r8, QWORD PTR [rsp+16]
  34992. sbb r10, r8
  34993. mov QWORD PTR [rcx+8], r11
  34994. mov r11, QWORD PTR [rdx+24]
  34995. mov r8, QWORD PTR [rsp+24]
  34996. sbb r11, r8
  34997. mov QWORD PTR [rcx+16], r10
  34998. mov r10, QWORD PTR [rdx+32]
  34999. mov r8, QWORD PTR [rsp+32]
  35000. sbb r10, r8
  35001. mov QWORD PTR [rcx+24], r11
  35002. mov r11, QWORD PTR [rdx+40]
  35003. mov r8, QWORD PTR [rsp+40]
  35004. sbb r11, r8
  35005. mov QWORD PTR [rcx+32], r10
  35006. mov r10, QWORD PTR [rdx+48]
  35007. mov r8, QWORD PTR [rsp+48]
  35008. sbb r10, r8
  35009. mov QWORD PTR [rcx+40], r11
  35010. mov r11, QWORD PTR [rdx+56]
  35011. mov r8, QWORD PTR [rsp+56]
  35012. sbb r11, r8
  35013. mov QWORD PTR [rcx+48], r10
  35014. mov r10, QWORD PTR [rdx+64]
  35015. mov r8, QWORD PTR [rsp+64]
  35016. sbb r10, r8
  35017. mov QWORD PTR [rcx+56], r11
  35018. mov r11, QWORD PTR [rdx+72]
  35019. mov r8, QWORD PTR [rsp+72]
  35020. sbb r11, r8
  35021. mov QWORD PTR [rcx+64], r10
  35022. mov r10, QWORD PTR [rdx+80]
  35023. mov r8, QWORD PTR [rsp+80]
  35024. sbb r10, r8
  35025. mov QWORD PTR [rcx+72], r11
  35026. mov r11, QWORD PTR [rdx+88]
  35027. mov r8, QWORD PTR [rsp+88]
  35028. sbb r11, r8
  35029. mov QWORD PTR [rcx+80], r10
  35030. mov r10, QWORD PTR [rdx+96]
  35031. mov r8, QWORD PTR [rsp+96]
  35032. sbb r10, r8
  35033. mov QWORD PTR [rcx+88], r11
  35034. mov r11, QWORD PTR [rdx+104]
  35035. mov r8, QWORD PTR [rsp+104]
  35036. sbb r11, r8
  35037. mov QWORD PTR [rcx+96], r10
  35038. mov r10, QWORD PTR [rdx+112]
  35039. mov r8, QWORD PTR [rsp+112]
  35040. sbb r10, r8
  35041. mov QWORD PTR [rcx+104], r11
  35042. mov r11, QWORD PTR [rdx+120]
  35043. mov r8, QWORD PTR [rsp+120]
  35044. sbb r11, r8
  35045. mov QWORD PTR [rcx+112], r10
  35046. mov r10, QWORD PTR [rdx+128]
  35047. mov r8, QWORD PTR [rsp+128]
  35048. sbb r10, r8
  35049. mov QWORD PTR [rcx+120], r11
  35050. mov r11, QWORD PTR [rdx+136]
  35051. mov r8, QWORD PTR [rsp+136]
  35052. sbb r11, r8
  35053. mov QWORD PTR [rcx+128], r10
  35054. mov r10, QWORD PTR [rdx+144]
  35055. mov r8, QWORD PTR [rsp+144]
  35056. sbb r10, r8
  35057. mov QWORD PTR [rcx+136], r11
  35058. mov r11, QWORD PTR [rdx+152]
  35059. mov r8, QWORD PTR [rsp+152]
  35060. sbb r11, r8
  35061. mov QWORD PTR [rcx+144], r10
  35062. mov r10, QWORD PTR [rdx+160]
  35063. mov r8, QWORD PTR [rsp+160]
  35064. sbb r10, r8
  35065. mov QWORD PTR [rcx+152], r11
  35066. mov r11, QWORD PTR [rdx+168]
  35067. mov r8, QWORD PTR [rsp+168]
  35068. sbb r11, r8
  35069. mov QWORD PTR [rcx+160], r10
  35070. mov r10, QWORD PTR [rdx+176]
  35071. mov r8, QWORD PTR [rsp+176]
  35072. sbb r10, r8
  35073. mov QWORD PTR [rcx+168], r11
  35074. mov r11, QWORD PTR [rdx+184]
  35075. mov r8, QWORD PTR [rsp+184]
  35076. sbb r11, r8
  35077. mov QWORD PTR [rcx+176], r10
  35078. mov r10, QWORD PTR [rdx+192]
  35079. mov r8, QWORD PTR [rsp+192]
  35080. sbb r10, r8
  35081. mov QWORD PTR [rcx+184], r11
  35082. mov r11, QWORD PTR [rdx+200]
  35083. mov r8, QWORD PTR [rsp+200]
  35084. sbb r11, r8
  35085. mov QWORD PTR [rcx+192], r10
  35086. mov r10, QWORD PTR [rdx+208]
  35087. mov r8, QWORD PTR [rsp+208]
  35088. sbb r10, r8
  35089. mov QWORD PTR [rcx+200], r11
  35090. mov r11, QWORD PTR [rdx+216]
  35091. mov r8, QWORD PTR [rsp+216]
  35092. sbb r11, r8
  35093. mov QWORD PTR [rcx+208], r10
  35094. mov r10, QWORD PTR [rdx+224]
  35095. mov r8, QWORD PTR [rsp+224]
  35096. sbb r10, r8
  35097. mov QWORD PTR [rcx+216], r11
  35098. mov r11, QWORD PTR [rdx+232]
  35099. mov r8, QWORD PTR [rsp+232]
  35100. sbb r11, r8
  35101. mov QWORD PTR [rcx+224], r10
  35102. mov r10, QWORD PTR [rdx+240]
  35103. mov r8, QWORD PTR [rsp+240]
  35104. sbb r10, r8
  35105. mov QWORD PTR [rcx+232], r11
  35106. mov r11, QWORD PTR [rdx+248]
  35107. mov r8, QWORD PTR [rsp+248]
  35108. sbb r11, r8
  35109. mov QWORD PTR [rcx+240], r10
  35110. mov r10, QWORD PTR [rdx+256]
  35111. mov r8, QWORD PTR [rsp+256]
  35112. sbb r10, r8
  35113. mov QWORD PTR [rcx+248], r11
  35114. mov r11, QWORD PTR [rdx+264]
  35115. mov r8, QWORD PTR [rsp+264]
  35116. sbb r11, r8
  35117. mov QWORD PTR [rcx+256], r10
  35118. mov r10, QWORD PTR [rdx+272]
  35119. mov r8, QWORD PTR [rsp+272]
  35120. sbb r10, r8
  35121. mov QWORD PTR [rcx+264], r11
  35122. mov r11, QWORD PTR [rdx+280]
  35123. mov r8, QWORD PTR [rsp+280]
  35124. sbb r11, r8
  35125. mov QWORD PTR [rcx+272], r10
  35126. mov r10, QWORD PTR [rdx+288]
  35127. mov r8, QWORD PTR [rsp+288]
  35128. sbb r10, r8
  35129. mov QWORD PTR [rcx+280], r11
  35130. mov r11, QWORD PTR [rdx+296]
  35131. mov r8, QWORD PTR [rsp+296]
  35132. sbb r11, r8
  35133. mov QWORD PTR [rcx+288], r10
  35134. mov r10, QWORD PTR [rdx+304]
  35135. mov r8, QWORD PTR [rsp+304]
  35136. sbb r10, r8
  35137. mov QWORD PTR [rcx+296], r11
  35138. mov r11, QWORD PTR [rdx+312]
  35139. mov r8, QWORD PTR [rsp+312]
  35140. sbb r11, r8
  35141. mov QWORD PTR [rcx+304], r10
  35142. mov r10, QWORD PTR [rdx+320]
  35143. mov r8, QWORD PTR [rsp+320]
  35144. sbb r10, r8
  35145. mov QWORD PTR [rcx+312], r11
  35146. mov r11, QWORD PTR [rdx+328]
  35147. mov r8, QWORD PTR [rsp+328]
  35148. sbb r11, r8
  35149. mov QWORD PTR [rcx+320], r10
  35150. mov r10, QWORD PTR [rdx+336]
  35151. mov r8, QWORD PTR [rsp+336]
  35152. sbb r10, r8
  35153. mov QWORD PTR [rcx+328], r11
  35154. mov r11, QWORD PTR [rdx+344]
  35155. mov r8, QWORD PTR [rsp+344]
  35156. sbb r11, r8
  35157. mov QWORD PTR [rcx+336], r10
  35158. mov r10, QWORD PTR [rdx+352]
  35159. mov r8, QWORD PTR [rsp+352]
  35160. sbb r10, r8
  35161. mov QWORD PTR [rcx+344], r11
  35162. mov r11, QWORD PTR [rdx+360]
  35163. mov r8, QWORD PTR [rsp+360]
  35164. sbb r11, r8
  35165. mov QWORD PTR [rcx+352], r10
  35166. mov r10, QWORD PTR [rdx+368]
  35167. mov r8, QWORD PTR [rsp+368]
  35168. sbb r10, r8
  35169. mov QWORD PTR [rcx+360], r11
  35170. mov r11, QWORD PTR [rdx+376]
  35171. mov r8, QWORD PTR [rsp+376]
  35172. sbb r11, r8
  35173. mov QWORD PTR [rcx+368], r10
  35174. mov QWORD PTR [rcx+376], r11
  35175. sbb rax, rax
  35176. add rsp, 384
  35177. ret
  35178. sp_3072_cond_sub_48 ENDP
  35179. _text ENDS
  35180. ; /* Reduce the number back to 3072 bits using Montgomery reduction.
  35181. ; *
  35182. ; * a A single precision number to reduce in place.
  35183. ; * m The single precision number representing the modulus.
  35184. ; * mp The digit representing the negative inverse of m mod 2^n.
  35185. ; */
  35186. _text SEGMENT READONLY PARA
  35187. sp_3072_mont_reduce_48 PROC
  35188. push r12
  35189. push r13
  35190. push r14
  35191. push r15
  35192. push rdi
  35193. push rsi
  35194. mov r9, rdx
  35195. xor rsi, rsi
  35196. ; i = 48
  35197. mov r10, 48
  35198. mov r15, QWORD PTR [rcx]
  35199. mov rdi, QWORD PTR [rcx+8]
  35200. L_3072_mont_reduce_48_loop:
  35201. ; mu = a[i] * mp
  35202. mov r13, r15
  35203. imul r13, r8
  35204. ; a[i+0] += m[0] * mu
  35205. mov rax, r13
  35206. xor r12, r12
  35207. mul QWORD PTR [r9]
  35208. add r15, rax
  35209. adc r12, rdx
  35210. ; a[i+1] += m[1] * mu
  35211. mov rax, r13
  35212. xor r11, r11
  35213. mul QWORD PTR [r9+8]
  35214. mov r15, rdi
  35215. add r15, rax
  35216. adc r11, rdx
  35217. add r15, r12
  35218. adc r11, 0
  35219. ; a[i+2] += m[2] * mu
  35220. mov rax, r13
  35221. xor r12, r12
  35222. mul QWORD PTR [r9+16]
  35223. mov rdi, QWORD PTR [rcx+16]
  35224. add rdi, rax
  35225. adc r12, rdx
  35226. add rdi, r11
  35227. adc r12, 0
  35228. ; a[i+3] += m[3] * mu
  35229. mov rax, r13
  35230. xor r11, r11
  35231. mul QWORD PTR [r9+24]
  35232. mov r14, QWORD PTR [rcx+24]
  35233. add r14, rax
  35234. adc r11, rdx
  35235. add r14, r12
  35236. mov QWORD PTR [rcx+24], r14
  35237. adc r11, 0
  35238. ; a[i+4] += m[4] * mu
  35239. mov rax, r13
  35240. xor r12, r12
  35241. mul QWORD PTR [r9+32]
  35242. mov r14, QWORD PTR [rcx+32]
  35243. add r14, rax
  35244. adc r12, rdx
  35245. add r14, r11
  35246. mov QWORD PTR [rcx+32], r14
  35247. adc r12, 0
  35248. ; a[i+5] += m[5] * mu
  35249. mov rax, r13
  35250. xor r11, r11
  35251. mul QWORD PTR [r9+40]
  35252. mov r14, QWORD PTR [rcx+40]
  35253. add r14, rax
  35254. adc r11, rdx
  35255. add r14, r12
  35256. mov QWORD PTR [rcx+40], r14
  35257. adc r11, 0
  35258. ; a[i+6] += m[6] * mu
  35259. mov rax, r13
  35260. xor r12, r12
  35261. mul QWORD PTR [r9+48]
  35262. mov r14, QWORD PTR [rcx+48]
  35263. add r14, rax
  35264. adc r12, rdx
  35265. add r14, r11
  35266. mov QWORD PTR [rcx+48], r14
  35267. adc r12, 0
  35268. ; a[i+7] += m[7] * mu
  35269. mov rax, r13
  35270. xor r11, r11
  35271. mul QWORD PTR [r9+56]
  35272. mov r14, QWORD PTR [rcx+56]
  35273. add r14, rax
  35274. adc r11, rdx
  35275. add r14, r12
  35276. mov QWORD PTR [rcx+56], r14
  35277. adc r11, 0
  35278. ; a[i+8] += m[8] * mu
  35279. mov rax, r13
  35280. xor r12, r12
  35281. mul QWORD PTR [r9+64]
  35282. mov r14, QWORD PTR [rcx+64]
  35283. add r14, rax
  35284. adc r12, rdx
  35285. add r14, r11
  35286. mov QWORD PTR [rcx+64], r14
  35287. adc r12, 0
  35288. ; a[i+9] += m[9] * mu
  35289. mov rax, r13
  35290. xor r11, r11
  35291. mul QWORD PTR [r9+72]
  35292. mov r14, QWORD PTR [rcx+72]
  35293. add r14, rax
  35294. adc r11, rdx
  35295. add r14, r12
  35296. mov QWORD PTR [rcx+72], r14
  35297. adc r11, 0
  35298. ; a[i+10] += m[10] * mu
  35299. mov rax, r13
  35300. xor r12, r12
  35301. mul QWORD PTR [r9+80]
  35302. mov r14, QWORD PTR [rcx+80]
  35303. add r14, rax
  35304. adc r12, rdx
  35305. add r14, r11
  35306. mov QWORD PTR [rcx+80], r14
  35307. adc r12, 0
  35308. ; a[i+11] += m[11] * mu
  35309. mov rax, r13
  35310. xor r11, r11
  35311. mul QWORD PTR [r9+88]
  35312. mov r14, QWORD PTR [rcx+88]
  35313. add r14, rax
  35314. adc r11, rdx
  35315. add r14, r12
  35316. mov QWORD PTR [rcx+88], r14
  35317. adc r11, 0
  35318. ; a[i+12] += m[12] * mu
  35319. mov rax, r13
  35320. xor r12, r12
  35321. mul QWORD PTR [r9+96]
  35322. mov r14, QWORD PTR [rcx+96]
  35323. add r14, rax
  35324. adc r12, rdx
  35325. add r14, r11
  35326. mov QWORD PTR [rcx+96], r14
  35327. adc r12, 0
  35328. ; a[i+13] += m[13] * mu
  35329. mov rax, r13
  35330. xor r11, r11
  35331. mul QWORD PTR [r9+104]
  35332. mov r14, QWORD PTR [rcx+104]
  35333. add r14, rax
  35334. adc r11, rdx
  35335. add r14, r12
  35336. mov QWORD PTR [rcx+104], r14
  35337. adc r11, 0
  35338. ; a[i+14] += m[14] * mu
  35339. mov rax, r13
  35340. xor r12, r12
  35341. mul QWORD PTR [r9+112]
  35342. mov r14, QWORD PTR [rcx+112]
  35343. add r14, rax
  35344. adc r12, rdx
  35345. add r14, r11
  35346. mov QWORD PTR [rcx+112], r14
  35347. adc r12, 0
  35348. ; a[i+15] += m[15] * mu
  35349. mov rax, r13
  35350. xor r11, r11
  35351. mul QWORD PTR [r9+120]
  35352. mov r14, QWORD PTR [rcx+120]
  35353. add r14, rax
  35354. adc r11, rdx
  35355. add r14, r12
  35356. mov QWORD PTR [rcx+120], r14
  35357. adc r11, 0
  35358. ; a[i+16] += m[16] * mu
  35359. mov rax, r13
  35360. xor r12, r12
  35361. mul QWORD PTR [r9+128]
  35362. mov r14, QWORD PTR [rcx+128]
  35363. add r14, rax
  35364. adc r12, rdx
  35365. add r14, r11
  35366. mov QWORD PTR [rcx+128], r14
  35367. adc r12, 0
  35368. ; a[i+17] += m[17] * mu
  35369. mov rax, r13
  35370. xor r11, r11
  35371. mul QWORD PTR [r9+136]
  35372. mov r14, QWORD PTR [rcx+136]
  35373. add r14, rax
  35374. adc r11, rdx
  35375. add r14, r12
  35376. mov QWORD PTR [rcx+136], r14
  35377. adc r11, 0
  35378. ; a[i+18] += m[18] * mu
  35379. mov rax, r13
  35380. xor r12, r12
  35381. mul QWORD PTR [r9+144]
  35382. mov r14, QWORD PTR [rcx+144]
  35383. add r14, rax
  35384. adc r12, rdx
  35385. add r14, r11
  35386. mov QWORD PTR [rcx+144], r14
  35387. adc r12, 0
  35388. ; a[i+19] += m[19] * mu
  35389. mov rax, r13
  35390. xor r11, r11
  35391. mul QWORD PTR [r9+152]
  35392. mov r14, QWORD PTR [rcx+152]
  35393. add r14, rax
  35394. adc r11, rdx
  35395. add r14, r12
  35396. mov QWORD PTR [rcx+152], r14
  35397. adc r11, 0
  35398. ; a[i+20] += m[20] * mu
  35399. mov rax, r13
  35400. xor r12, r12
  35401. mul QWORD PTR [r9+160]
  35402. mov r14, QWORD PTR [rcx+160]
  35403. add r14, rax
  35404. adc r12, rdx
  35405. add r14, r11
  35406. mov QWORD PTR [rcx+160], r14
  35407. adc r12, 0
  35408. ; a[i+21] += m[21] * mu
  35409. mov rax, r13
  35410. xor r11, r11
  35411. mul QWORD PTR [r9+168]
  35412. mov r14, QWORD PTR [rcx+168]
  35413. add r14, rax
  35414. adc r11, rdx
  35415. add r14, r12
  35416. mov QWORD PTR [rcx+168], r14
  35417. adc r11, 0
  35418. ; a[i+22] += m[22] * mu
  35419. mov rax, r13
  35420. xor r12, r12
  35421. mul QWORD PTR [r9+176]
  35422. mov r14, QWORD PTR [rcx+176]
  35423. add r14, rax
  35424. adc r12, rdx
  35425. add r14, r11
  35426. mov QWORD PTR [rcx+176], r14
  35427. adc r12, 0
  35428. ; a[i+23] += m[23] * mu
  35429. mov rax, r13
  35430. xor r11, r11
  35431. mul QWORD PTR [r9+184]
  35432. mov r14, QWORD PTR [rcx+184]
  35433. add r14, rax
  35434. adc r11, rdx
  35435. add r14, r12
  35436. mov QWORD PTR [rcx+184], r14
  35437. adc r11, 0
  35438. ; a[i+24] += m[24] * mu
  35439. mov rax, r13
  35440. xor r12, r12
  35441. mul QWORD PTR [r9+192]
  35442. mov r14, QWORD PTR [rcx+192]
  35443. add r14, rax
  35444. adc r12, rdx
  35445. add r14, r11
  35446. mov QWORD PTR [rcx+192], r14
  35447. adc r12, 0
  35448. ; a[i+25] += m[25] * mu
  35449. mov rax, r13
  35450. xor r11, r11
  35451. mul QWORD PTR [r9+200]
  35452. mov r14, QWORD PTR [rcx+200]
  35453. add r14, rax
  35454. adc r11, rdx
  35455. add r14, r12
  35456. mov QWORD PTR [rcx+200], r14
  35457. adc r11, 0
  35458. ; a[i+26] += m[26] * mu
  35459. mov rax, r13
  35460. xor r12, r12
  35461. mul QWORD PTR [r9+208]
  35462. mov r14, QWORD PTR [rcx+208]
  35463. add r14, rax
  35464. adc r12, rdx
  35465. add r14, r11
  35466. mov QWORD PTR [rcx+208], r14
  35467. adc r12, 0
  35468. ; a[i+27] += m[27] * mu
  35469. mov rax, r13
  35470. xor r11, r11
  35471. mul QWORD PTR [r9+216]
  35472. mov r14, QWORD PTR [rcx+216]
  35473. add r14, rax
  35474. adc r11, rdx
  35475. add r14, r12
  35476. mov QWORD PTR [rcx+216], r14
  35477. adc r11, 0
  35478. ; a[i+28] += m[28] * mu
  35479. mov rax, r13
  35480. xor r12, r12
  35481. mul QWORD PTR [r9+224]
  35482. mov r14, QWORD PTR [rcx+224]
  35483. add r14, rax
  35484. adc r12, rdx
  35485. add r14, r11
  35486. mov QWORD PTR [rcx+224], r14
  35487. adc r12, 0
  35488. ; a[i+29] += m[29] * mu
  35489. mov rax, r13
  35490. xor r11, r11
  35491. mul QWORD PTR [r9+232]
  35492. mov r14, QWORD PTR [rcx+232]
  35493. add r14, rax
  35494. adc r11, rdx
  35495. add r14, r12
  35496. mov QWORD PTR [rcx+232], r14
  35497. adc r11, 0
  35498. ; a[i+30] += m[30] * mu
  35499. mov rax, r13
  35500. xor r12, r12
  35501. mul QWORD PTR [r9+240]
  35502. mov r14, QWORD PTR [rcx+240]
  35503. add r14, rax
  35504. adc r12, rdx
  35505. add r14, r11
  35506. mov QWORD PTR [rcx+240], r14
  35507. adc r12, 0
  35508. ; a[i+31] += m[31] * mu
  35509. mov rax, r13
  35510. xor r11, r11
  35511. mul QWORD PTR [r9+248]
  35512. mov r14, QWORD PTR [rcx+248]
  35513. add r14, rax
  35514. adc r11, rdx
  35515. add r14, r12
  35516. mov QWORD PTR [rcx+248], r14
  35517. adc r11, 0
  35518. ; a[i+32] += m[32] * mu
  35519. mov rax, r13
  35520. xor r12, r12
  35521. mul QWORD PTR [r9+256]
  35522. mov r14, QWORD PTR [rcx+256]
  35523. add r14, rax
  35524. adc r12, rdx
  35525. add r14, r11
  35526. mov QWORD PTR [rcx+256], r14
  35527. adc r12, 0
  35528. ; a[i+33] += m[33] * mu
  35529. mov rax, r13
  35530. xor r11, r11
  35531. mul QWORD PTR [r9+264]
  35532. mov r14, QWORD PTR [rcx+264]
  35533. add r14, rax
  35534. adc r11, rdx
  35535. add r14, r12
  35536. mov QWORD PTR [rcx+264], r14
  35537. adc r11, 0
  35538. ; a[i+34] += m[34] * mu
  35539. mov rax, r13
  35540. xor r12, r12
  35541. mul QWORD PTR [r9+272]
  35542. mov r14, QWORD PTR [rcx+272]
  35543. add r14, rax
  35544. adc r12, rdx
  35545. add r14, r11
  35546. mov QWORD PTR [rcx+272], r14
  35547. adc r12, 0
  35548. ; a[i+35] += m[35] * mu
  35549. mov rax, r13
  35550. xor r11, r11
  35551. mul QWORD PTR [r9+280]
  35552. mov r14, QWORD PTR [rcx+280]
  35553. add r14, rax
  35554. adc r11, rdx
  35555. add r14, r12
  35556. mov QWORD PTR [rcx+280], r14
  35557. adc r11, 0
  35558. ; a[i+36] += m[36] * mu
  35559. mov rax, r13
  35560. xor r12, r12
  35561. mul QWORD PTR [r9+288]
  35562. mov r14, QWORD PTR [rcx+288]
  35563. add r14, rax
  35564. adc r12, rdx
  35565. add r14, r11
  35566. mov QWORD PTR [rcx+288], r14
  35567. adc r12, 0
  35568. ; a[i+37] += m[37] * mu
  35569. mov rax, r13
  35570. xor r11, r11
  35571. mul QWORD PTR [r9+296]
  35572. mov r14, QWORD PTR [rcx+296]
  35573. add r14, rax
  35574. adc r11, rdx
  35575. add r14, r12
  35576. mov QWORD PTR [rcx+296], r14
  35577. adc r11, 0
  35578. ; a[i+38] += m[38] * mu
  35579. mov rax, r13
  35580. xor r12, r12
  35581. mul QWORD PTR [r9+304]
  35582. mov r14, QWORD PTR [rcx+304]
  35583. add r14, rax
  35584. adc r12, rdx
  35585. add r14, r11
  35586. mov QWORD PTR [rcx+304], r14
  35587. adc r12, 0
  35588. ; a[i+39] += m[39] * mu
  35589. mov rax, r13
  35590. xor r11, r11
  35591. mul QWORD PTR [r9+312]
  35592. mov r14, QWORD PTR [rcx+312]
  35593. add r14, rax
  35594. adc r11, rdx
  35595. add r14, r12
  35596. mov QWORD PTR [rcx+312], r14
  35597. adc r11, 0
  35598. ; a[i+40] += m[40] * mu
  35599. mov rax, r13
  35600. xor r12, r12
  35601. mul QWORD PTR [r9+320]
  35602. mov r14, QWORD PTR [rcx+320]
  35603. add r14, rax
  35604. adc r12, rdx
  35605. add r14, r11
  35606. mov QWORD PTR [rcx+320], r14
  35607. adc r12, 0
  35608. ; a[i+41] += m[41] * mu
  35609. mov rax, r13
  35610. xor r11, r11
  35611. mul QWORD PTR [r9+328]
  35612. mov r14, QWORD PTR [rcx+328]
  35613. add r14, rax
  35614. adc r11, rdx
  35615. add r14, r12
  35616. mov QWORD PTR [rcx+328], r14
  35617. adc r11, 0
  35618. ; a[i+42] += m[42] * mu
  35619. mov rax, r13
  35620. xor r12, r12
  35621. mul QWORD PTR [r9+336]
  35622. mov r14, QWORD PTR [rcx+336]
  35623. add r14, rax
  35624. adc r12, rdx
  35625. add r14, r11
  35626. mov QWORD PTR [rcx+336], r14
  35627. adc r12, 0
  35628. ; a[i+43] += m[43] * mu
  35629. mov rax, r13
  35630. xor r11, r11
  35631. mul QWORD PTR [r9+344]
  35632. mov r14, QWORD PTR [rcx+344]
  35633. add r14, rax
  35634. adc r11, rdx
  35635. add r14, r12
  35636. mov QWORD PTR [rcx+344], r14
  35637. adc r11, 0
  35638. ; a[i+44] += m[44] * mu
  35639. mov rax, r13
  35640. xor r12, r12
  35641. mul QWORD PTR [r9+352]
  35642. mov r14, QWORD PTR [rcx+352]
  35643. add r14, rax
  35644. adc r12, rdx
  35645. add r14, r11
  35646. mov QWORD PTR [rcx+352], r14
  35647. adc r12, 0
  35648. ; a[i+45] += m[45] * mu
  35649. mov rax, r13
  35650. xor r11, r11
  35651. mul QWORD PTR [r9+360]
  35652. mov r14, QWORD PTR [rcx+360]
  35653. add r14, rax
  35654. adc r11, rdx
  35655. add r14, r12
  35656. mov QWORD PTR [rcx+360], r14
  35657. adc r11, 0
  35658. ; a[i+46] += m[46] * mu
  35659. mov rax, r13
  35660. xor r12, r12
  35661. mul QWORD PTR [r9+368]
  35662. mov r14, QWORD PTR [rcx+368]
  35663. add r14, rax
  35664. adc r12, rdx
  35665. add r14, r11
  35666. mov QWORD PTR [rcx+368], r14
  35667. adc r12, 0
  35668. ; a[i+47] += m[47] * mu
  35669. mov rax, r13
  35670. mul QWORD PTR [r9+376]
  35671. mov r14, QWORD PTR [rcx+376]
  35672. add r12, rax
  35673. adc rdx, rsi
  35674. mov rsi, 0
  35675. adc rsi, 0
  35676. add r14, r12
  35677. mov QWORD PTR [rcx+376], r14
  35678. adc QWORD PTR [rcx+384], rdx
  35679. adc rsi, 0
  35680. ; i -= 1
  35681. add rcx, 8
  35682. dec r10
  35683. jnz L_3072_mont_reduce_48_loop
  35684. mov QWORD PTR [rcx], r15
  35685. mov QWORD PTR [rcx+8], rdi
  35686. neg rsi
  35687. IFDEF _WIN64
  35688. mov r8, r9
  35689. mov r9, rsi
  35690. ELSE
  35691. mov r9, rsi
  35692. mov r8, r9
  35693. ENDIF
  35694. mov rdx, rcx
  35695. mov rcx, rcx
  35696. sub rcx, 384
  35697. call sp_3072_cond_sub_48
  35698. pop rsi
  35699. pop rdi
  35700. pop r15
  35701. pop r14
  35702. pop r13
  35703. pop r12
  35704. ret
  35705. sp_3072_mont_reduce_48 ENDP
  35706. _text ENDS
  35707. ; /* Sub b from a into r. (r = a - b)
  35708. ; *
  35709. ; * r A single precision integer.
  35710. ; * a A single precision integer.
  35711. ; * b A single precision integer.
  35712. ; */
  35713. _text SEGMENT READONLY PARA
  35714. sp_3072_sub_48 PROC
  35715. mov r9, QWORD PTR [rdx]
  35716. sub r9, QWORD PTR [r8]
  35717. mov r10, QWORD PTR [rdx+8]
  35718. mov QWORD PTR [rcx], r9
  35719. sbb r10, QWORD PTR [r8+8]
  35720. mov r9, QWORD PTR [rdx+16]
  35721. mov QWORD PTR [rcx+8], r10
  35722. sbb r9, QWORD PTR [r8+16]
  35723. mov r10, QWORD PTR [rdx+24]
  35724. mov QWORD PTR [rcx+16], r9
  35725. sbb r10, QWORD PTR [r8+24]
  35726. mov r9, QWORD PTR [rdx+32]
  35727. mov QWORD PTR [rcx+24], r10
  35728. sbb r9, QWORD PTR [r8+32]
  35729. mov r10, QWORD PTR [rdx+40]
  35730. mov QWORD PTR [rcx+32], r9
  35731. sbb r10, QWORD PTR [r8+40]
  35732. mov r9, QWORD PTR [rdx+48]
  35733. mov QWORD PTR [rcx+40], r10
  35734. sbb r9, QWORD PTR [r8+48]
  35735. mov r10, QWORD PTR [rdx+56]
  35736. mov QWORD PTR [rcx+48], r9
  35737. sbb r10, QWORD PTR [r8+56]
  35738. mov r9, QWORD PTR [rdx+64]
  35739. mov QWORD PTR [rcx+56], r10
  35740. sbb r9, QWORD PTR [r8+64]
  35741. mov r10, QWORD PTR [rdx+72]
  35742. mov QWORD PTR [rcx+64], r9
  35743. sbb r10, QWORD PTR [r8+72]
  35744. mov r9, QWORD PTR [rdx+80]
  35745. mov QWORD PTR [rcx+72], r10
  35746. sbb r9, QWORD PTR [r8+80]
  35747. mov r10, QWORD PTR [rdx+88]
  35748. mov QWORD PTR [rcx+80], r9
  35749. sbb r10, QWORD PTR [r8+88]
  35750. mov r9, QWORD PTR [rdx+96]
  35751. mov QWORD PTR [rcx+88], r10
  35752. sbb r9, QWORD PTR [r8+96]
  35753. mov r10, QWORD PTR [rdx+104]
  35754. mov QWORD PTR [rcx+96], r9
  35755. sbb r10, QWORD PTR [r8+104]
  35756. mov r9, QWORD PTR [rdx+112]
  35757. mov QWORD PTR [rcx+104], r10
  35758. sbb r9, QWORD PTR [r8+112]
  35759. mov r10, QWORD PTR [rdx+120]
  35760. mov QWORD PTR [rcx+112], r9
  35761. sbb r10, QWORD PTR [r8+120]
  35762. mov r9, QWORD PTR [rdx+128]
  35763. mov QWORD PTR [rcx+120], r10
  35764. sbb r9, QWORD PTR [r8+128]
  35765. mov r10, QWORD PTR [rdx+136]
  35766. mov QWORD PTR [rcx+128], r9
  35767. sbb r10, QWORD PTR [r8+136]
  35768. mov r9, QWORD PTR [rdx+144]
  35769. mov QWORD PTR [rcx+136], r10
  35770. sbb r9, QWORD PTR [r8+144]
  35771. mov r10, QWORD PTR [rdx+152]
  35772. mov QWORD PTR [rcx+144], r9
  35773. sbb r10, QWORD PTR [r8+152]
  35774. mov r9, QWORD PTR [rdx+160]
  35775. mov QWORD PTR [rcx+152], r10
  35776. sbb r9, QWORD PTR [r8+160]
  35777. mov r10, QWORD PTR [rdx+168]
  35778. mov QWORD PTR [rcx+160], r9
  35779. sbb r10, QWORD PTR [r8+168]
  35780. mov r9, QWORD PTR [rdx+176]
  35781. mov QWORD PTR [rcx+168], r10
  35782. sbb r9, QWORD PTR [r8+176]
  35783. mov r10, QWORD PTR [rdx+184]
  35784. mov QWORD PTR [rcx+176], r9
  35785. sbb r10, QWORD PTR [r8+184]
  35786. mov r9, QWORD PTR [rdx+192]
  35787. mov QWORD PTR [rcx+184], r10
  35788. sbb r9, QWORD PTR [r8+192]
  35789. mov r10, QWORD PTR [rdx+200]
  35790. mov QWORD PTR [rcx+192], r9
  35791. sbb r10, QWORD PTR [r8+200]
  35792. mov r9, QWORD PTR [rdx+208]
  35793. mov QWORD PTR [rcx+200], r10
  35794. sbb r9, QWORD PTR [r8+208]
  35795. mov r10, QWORD PTR [rdx+216]
  35796. mov QWORD PTR [rcx+208], r9
  35797. sbb r10, QWORD PTR [r8+216]
  35798. mov r9, QWORD PTR [rdx+224]
  35799. mov QWORD PTR [rcx+216], r10
  35800. sbb r9, QWORD PTR [r8+224]
  35801. mov r10, QWORD PTR [rdx+232]
  35802. mov QWORD PTR [rcx+224], r9
  35803. sbb r10, QWORD PTR [r8+232]
  35804. mov r9, QWORD PTR [rdx+240]
  35805. mov QWORD PTR [rcx+232], r10
  35806. sbb r9, QWORD PTR [r8+240]
  35807. mov r10, QWORD PTR [rdx+248]
  35808. mov QWORD PTR [rcx+240], r9
  35809. sbb r10, QWORD PTR [r8+248]
  35810. mov r9, QWORD PTR [rdx+256]
  35811. mov QWORD PTR [rcx+248], r10
  35812. sbb r9, QWORD PTR [r8+256]
  35813. mov r10, QWORD PTR [rdx+264]
  35814. mov QWORD PTR [rcx+256], r9
  35815. sbb r10, QWORD PTR [r8+264]
  35816. mov r9, QWORD PTR [rdx+272]
  35817. mov QWORD PTR [rcx+264], r10
  35818. sbb r9, QWORD PTR [r8+272]
  35819. mov r10, QWORD PTR [rdx+280]
  35820. mov QWORD PTR [rcx+272], r9
  35821. sbb r10, QWORD PTR [r8+280]
  35822. mov r9, QWORD PTR [rdx+288]
  35823. mov QWORD PTR [rcx+280], r10
  35824. sbb r9, QWORD PTR [r8+288]
  35825. mov r10, QWORD PTR [rdx+296]
  35826. mov QWORD PTR [rcx+288], r9
  35827. sbb r10, QWORD PTR [r8+296]
  35828. mov r9, QWORD PTR [rdx+304]
  35829. mov QWORD PTR [rcx+296], r10
  35830. sbb r9, QWORD PTR [r8+304]
  35831. mov r10, QWORD PTR [rdx+312]
  35832. mov QWORD PTR [rcx+304], r9
  35833. sbb r10, QWORD PTR [r8+312]
  35834. mov r9, QWORD PTR [rdx+320]
  35835. mov QWORD PTR [rcx+312], r10
  35836. sbb r9, QWORD PTR [r8+320]
  35837. mov r10, QWORD PTR [rdx+328]
  35838. mov QWORD PTR [rcx+320], r9
  35839. sbb r10, QWORD PTR [r8+328]
  35840. mov r9, QWORD PTR [rdx+336]
  35841. mov QWORD PTR [rcx+328], r10
  35842. sbb r9, QWORD PTR [r8+336]
  35843. mov r10, QWORD PTR [rdx+344]
  35844. mov QWORD PTR [rcx+336], r9
  35845. sbb r10, QWORD PTR [r8+344]
  35846. mov r9, QWORD PTR [rdx+352]
  35847. mov QWORD PTR [rcx+344], r10
  35848. sbb r9, QWORD PTR [r8+352]
  35849. mov r10, QWORD PTR [rdx+360]
  35850. mov QWORD PTR [rcx+352], r9
  35851. sbb r10, QWORD PTR [r8+360]
  35852. mov r9, QWORD PTR [rdx+368]
  35853. mov QWORD PTR [rcx+360], r10
  35854. sbb r9, QWORD PTR [r8+368]
  35855. mov r10, QWORD PTR [rdx+376]
  35856. mov QWORD PTR [rcx+368], r9
  35857. sbb r10, QWORD PTR [r8+376]
  35858. mov QWORD PTR [rcx+376], r10
  35859. sbb rax, rax
  35860. ret
  35861. sp_3072_sub_48 ENDP
  35862. _text ENDS
  35863. IFDEF HAVE_INTEL_AVX2
  35864. ; /* Mul a by digit b into r. (r = a * b)
  35865. ; *
  35866. ; * r A single precision integer.
  35867. ; * a A single precision integer.
  35868. ; * b A single precision digit.
  35869. ; */
  35870. _text SEGMENT READONLY PARA
  35871. sp_3072_mul_d_avx2_48 PROC
  35872. push r12
  35873. push r13
  35874. mov rax, rdx
  35875. ; A[0] * B
  35876. mov rdx, r8
  35877. xor r13, r13
  35878. mulx r12, r11, QWORD PTR [rax]
  35879. mov QWORD PTR [rcx], r11
  35880. ; A[1] * B
  35881. mulx r10, r9, QWORD PTR [rax+8]
  35882. mov r11, r13
  35883. adcx r12, r9
  35884. adox r11, r10
  35885. mov QWORD PTR [rcx+8], r12
  35886. ; A[2] * B
  35887. mulx r10, r9, QWORD PTR [rax+16]
  35888. mov r12, r13
  35889. adcx r11, r9
  35890. adox r12, r10
  35891. mov QWORD PTR [rcx+16], r11
  35892. ; A[3] * B
  35893. mulx r10, r9, QWORD PTR [rax+24]
  35894. mov r11, r13
  35895. adcx r12, r9
  35896. adox r11, r10
  35897. mov QWORD PTR [rcx+24], r12
  35898. ; A[4] * B
  35899. mulx r10, r9, QWORD PTR [rax+32]
  35900. mov r12, r13
  35901. adcx r11, r9
  35902. adox r12, r10
  35903. mov QWORD PTR [rcx+32], r11
  35904. ; A[5] * B
  35905. mulx r10, r9, QWORD PTR [rax+40]
  35906. mov r11, r13
  35907. adcx r12, r9
  35908. adox r11, r10
  35909. mov QWORD PTR [rcx+40], r12
  35910. ; A[6] * B
  35911. mulx r10, r9, QWORD PTR [rax+48]
  35912. mov r12, r13
  35913. adcx r11, r9
  35914. adox r12, r10
  35915. mov QWORD PTR [rcx+48], r11
  35916. ; A[7] * B
  35917. mulx r10, r9, QWORD PTR [rax+56]
  35918. mov r11, r13
  35919. adcx r12, r9
  35920. adox r11, r10
  35921. mov QWORD PTR [rcx+56], r12
  35922. ; A[8] * B
  35923. mulx r10, r9, QWORD PTR [rax+64]
  35924. mov r12, r13
  35925. adcx r11, r9
  35926. adox r12, r10
  35927. mov QWORD PTR [rcx+64], r11
  35928. ; A[9] * B
  35929. mulx r10, r9, QWORD PTR [rax+72]
  35930. mov r11, r13
  35931. adcx r12, r9
  35932. adox r11, r10
  35933. mov QWORD PTR [rcx+72], r12
  35934. ; A[10] * B
  35935. mulx r10, r9, QWORD PTR [rax+80]
  35936. mov r12, r13
  35937. adcx r11, r9
  35938. adox r12, r10
  35939. mov QWORD PTR [rcx+80], r11
  35940. ; A[11] * B
  35941. mulx r10, r9, QWORD PTR [rax+88]
  35942. mov r11, r13
  35943. adcx r12, r9
  35944. adox r11, r10
  35945. mov QWORD PTR [rcx+88], r12
  35946. ; A[12] * B
  35947. mulx r10, r9, QWORD PTR [rax+96]
  35948. mov r12, r13
  35949. adcx r11, r9
  35950. adox r12, r10
  35951. mov QWORD PTR [rcx+96], r11
  35952. ; A[13] * B
  35953. mulx r10, r9, QWORD PTR [rax+104]
  35954. mov r11, r13
  35955. adcx r12, r9
  35956. adox r11, r10
  35957. mov QWORD PTR [rcx+104], r12
  35958. ; A[14] * B
  35959. mulx r10, r9, QWORD PTR [rax+112]
  35960. mov r12, r13
  35961. adcx r11, r9
  35962. adox r12, r10
  35963. mov QWORD PTR [rcx+112], r11
  35964. ; A[15] * B
  35965. mulx r10, r9, QWORD PTR [rax+120]
  35966. mov r11, r13
  35967. adcx r12, r9
  35968. adox r11, r10
  35969. mov QWORD PTR [rcx+120], r12
  35970. ; A[16] * B
  35971. mulx r10, r9, QWORD PTR [rax+128]
  35972. mov r12, r13
  35973. adcx r11, r9
  35974. adox r12, r10
  35975. mov QWORD PTR [rcx+128], r11
  35976. ; A[17] * B
  35977. mulx r10, r9, QWORD PTR [rax+136]
  35978. mov r11, r13
  35979. adcx r12, r9
  35980. adox r11, r10
  35981. mov QWORD PTR [rcx+136], r12
  35982. ; A[18] * B
  35983. mulx r10, r9, QWORD PTR [rax+144]
  35984. mov r12, r13
  35985. adcx r11, r9
  35986. adox r12, r10
  35987. mov QWORD PTR [rcx+144], r11
  35988. ; A[19] * B
  35989. mulx r10, r9, QWORD PTR [rax+152]
  35990. mov r11, r13
  35991. adcx r12, r9
  35992. adox r11, r10
  35993. mov QWORD PTR [rcx+152], r12
  35994. ; A[20] * B
  35995. mulx r10, r9, QWORD PTR [rax+160]
  35996. mov r12, r13
  35997. adcx r11, r9
  35998. adox r12, r10
  35999. mov QWORD PTR [rcx+160], r11
  36000. ; A[21] * B
  36001. mulx r10, r9, QWORD PTR [rax+168]
  36002. mov r11, r13
  36003. adcx r12, r9
  36004. adox r11, r10
  36005. mov QWORD PTR [rcx+168], r12
  36006. ; A[22] * B
  36007. mulx r10, r9, QWORD PTR [rax+176]
  36008. mov r12, r13
  36009. adcx r11, r9
  36010. adox r12, r10
  36011. mov QWORD PTR [rcx+176], r11
  36012. ; A[23] * B
  36013. mulx r10, r9, QWORD PTR [rax+184]
  36014. mov r11, r13
  36015. adcx r12, r9
  36016. adox r11, r10
  36017. mov QWORD PTR [rcx+184], r12
  36018. ; A[24] * B
  36019. mulx r10, r9, QWORD PTR [rax+192]
  36020. mov r12, r13
  36021. adcx r11, r9
  36022. adox r12, r10
  36023. mov QWORD PTR [rcx+192], r11
  36024. ; A[25] * B
  36025. mulx r10, r9, QWORD PTR [rax+200]
  36026. mov r11, r13
  36027. adcx r12, r9
  36028. adox r11, r10
  36029. mov QWORD PTR [rcx+200], r12
  36030. ; A[26] * B
  36031. mulx r10, r9, QWORD PTR [rax+208]
  36032. mov r12, r13
  36033. adcx r11, r9
  36034. adox r12, r10
  36035. mov QWORD PTR [rcx+208], r11
  36036. ; A[27] * B
  36037. mulx r10, r9, QWORD PTR [rax+216]
  36038. mov r11, r13
  36039. adcx r12, r9
  36040. adox r11, r10
  36041. mov QWORD PTR [rcx+216], r12
  36042. ; A[28] * B
  36043. mulx r10, r9, QWORD PTR [rax+224]
  36044. mov r12, r13
  36045. adcx r11, r9
  36046. adox r12, r10
  36047. mov QWORD PTR [rcx+224], r11
  36048. ; A[29] * B
  36049. mulx r10, r9, QWORD PTR [rax+232]
  36050. mov r11, r13
  36051. adcx r12, r9
  36052. adox r11, r10
  36053. mov QWORD PTR [rcx+232], r12
  36054. ; A[30] * B
  36055. mulx r10, r9, QWORD PTR [rax+240]
  36056. mov r12, r13
  36057. adcx r11, r9
  36058. adox r12, r10
  36059. mov QWORD PTR [rcx+240], r11
  36060. ; A[31] * B
  36061. mulx r10, r9, QWORD PTR [rax+248]
  36062. mov r11, r13
  36063. adcx r12, r9
  36064. adox r11, r10
  36065. mov QWORD PTR [rcx+248], r12
  36066. ; A[32] * B
  36067. mulx r10, r9, QWORD PTR [rax+256]
  36068. mov r12, r13
  36069. adcx r11, r9
  36070. adox r12, r10
  36071. mov QWORD PTR [rcx+256], r11
  36072. ; A[33] * B
  36073. mulx r10, r9, QWORD PTR [rax+264]
  36074. mov r11, r13
  36075. adcx r12, r9
  36076. adox r11, r10
  36077. mov QWORD PTR [rcx+264], r12
  36078. ; A[34] * B
  36079. mulx r10, r9, QWORD PTR [rax+272]
  36080. mov r12, r13
  36081. adcx r11, r9
  36082. adox r12, r10
  36083. mov QWORD PTR [rcx+272], r11
  36084. ; A[35] * B
  36085. mulx r10, r9, QWORD PTR [rax+280]
  36086. mov r11, r13
  36087. adcx r12, r9
  36088. adox r11, r10
  36089. mov QWORD PTR [rcx+280], r12
  36090. ; A[36] * B
  36091. mulx r10, r9, QWORD PTR [rax+288]
  36092. mov r12, r13
  36093. adcx r11, r9
  36094. adox r12, r10
  36095. mov QWORD PTR [rcx+288], r11
  36096. ; A[37] * B
  36097. mulx r10, r9, QWORD PTR [rax+296]
  36098. mov r11, r13
  36099. adcx r12, r9
  36100. adox r11, r10
  36101. mov QWORD PTR [rcx+296], r12
  36102. ; A[38] * B
  36103. mulx r10, r9, QWORD PTR [rax+304]
  36104. mov r12, r13
  36105. adcx r11, r9
  36106. adox r12, r10
  36107. mov QWORD PTR [rcx+304], r11
  36108. ; A[39] * B
  36109. mulx r10, r9, QWORD PTR [rax+312]
  36110. mov r11, r13
  36111. adcx r12, r9
  36112. adox r11, r10
  36113. mov QWORD PTR [rcx+312], r12
  36114. ; A[40] * B
  36115. mulx r10, r9, QWORD PTR [rax+320]
  36116. mov r12, r13
  36117. adcx r11, r9
  36118. adox r12, r10
  36119. mov QWORD PTR [rcx+320], r11
  36120. ; A[41] * B
  36121. mulx r10, r9, QWORD PTR [rax+328]
  36122. mov r11, r13
  36123. adcx r12, r9
  36124. adox r11, r10
  36125. mov QWORD PTR [rcx+328], r12
  36126. ; A[42] * B
  36127. mulx r10, r9, QWORD PTR [rax+336]
  36128. mov r12, r13
  36129. adcx r11, r9
  36130. adox r12, r10
  36131. mov QWORD PTR [rcx+336], r11
  36132. ; A[43] * B
  36133. mulx r10, r9, QWORD PTR [rax+344]
  36134. mov r11, r13
  36135. adcx r12, r9
  36136. adox r11, r10
  36137. mov QWORD PTR [rcx+344], r12
  36138. ; A[44] * B
  36139. mulx r10, r9, QWORD PTR [rax+352]
  36140. mov r12, r13
  36141. adcx r11, r9
  36142. adox r12, r10
  36143. mov QWORD PTR [rcx+352], r11
  36144. ; A[45] * B
  36145. mulx r10, r9, QWORD PTR [rax+360]
  36146. mov r11, r13
  36147. adcx r12, r9
  36148. adox r11, r10
  36149. mov QWORD PTR [rcx+360], r12
  36150. ; A[46] * B
  36151. mulx r10, r9, QWORD PTR [rax+368]
  36152. mov r12, r13
  36153. adcx r11, r9
  36154. adox r12, r10
  36155. mov QWORD PTR [rcx+368], r11
  36156. ; A[47] * B
  36157. mulx r10, r9, QWORD PTR [rax+376]
  36158. mov r11, r13
  36159. adcx r12, r9
  36160. adox r11, r10
  36161. adcx r11, r13
  36162. mov QWORD PTR [rcx+376], r12
  36163. mov QWORD PTR [rcx+384], r11
  36164. pop r13
  36165. pop r12
  36166. ret
  36167. sp_3072_mul_d_avx2_48 ENDP
  36168. _text ENDS
  36169. ENDIF
  36170. IFDEF _WIN64
  36171. ; /* Divide the double width number (d1|d0) by the dividend. (d1|d0 / div)
  36172. ; *
  36173. ; * d1 The high order half of the number to divide.
  36174. ; * d0 The low order half of the number to divide.
  36175. ; * div The dividend.
  36176. ; * returns the result of the division.
  36177. ; */
  36178. _text SEGMENT READONLY PARA
  36179. div_3072_word_asm_48 PROC
  36180. mov r9, rdx
  36181. mov rax, r9
  36182. mov rdx, rcx
  36183. div r8
  36184. ret
  36185. div_3072_word_asm_48 ENDP
  36186. _text ENDS
  36187. ENDIF
  36188. IFDEF HAVE_INTEL_AVX2
  36189. ; /* Conditionally subtract b from a using the mask m.
  36190. ; * m is -1 to subtract and 0 when not copying.
  36191. ; *
  36192. ; * r A single precision number representing condition subtract result.
  36193. ; * a A single precision number to subtract from.
  36194. ; * b A single precision number to subtract.
  36195. ; * m Mask value to apply.
  36196. ; */
  36197. _text SEGMENT READONLY PARA
  36198. sp_3072_cond_sub_avx2_48 PROC
  36199. push r12
  36200. mov r12, QWORD PTR [r8]
  36201. mov r10, QWORD PTR [rdx]
  36202. pext r12, r12, r9
  36203. sub r10, r12
  36204. mov r12, QWORD PTR [r8+8]
  36205. mov r11, QWORD PTR [rdx+8]
  36206. pext r12, r12, r9
  36207. mov QWORD PTR [rcx], r10
  36208. sbb r11, r12
  36209. mov r10, QWORD PTR [r8+16]
  36210. mov r12, QWORD PTR [rdx+16]
  36211. pext r10, r10, r9
  36212. mov QWORD PTR [rcx+8], r11
  36213. sbb r12, r10
  36214. mov r11, QWORD PTR [r8+24]
  36215. mov r10, QWORD PTR [rdx+24]
  36216. pext r11, r11, r9
  36217. mov QWORD PTR [rcx+16], r12
  36218. sbb r10, r11
  36219. mov r12, QWORD PTR [r8+32]
  36220. mov r11, QWORD PTR [rdx+32]
  36221. pext r12, r12, r9
  36222. mov QWORD PTR [rcx+24], r10
  36223. sbb r11, r12
  36224. mov r10, QWORD PTR [r8+40]
  36225. mov r12, QWORD PTR [rdx+40]
  36226. pext r10, r10, r9
  36227. mov QWORD PTR [rcx+32], r11
  36228. sbb r12, r10
  36229. mov r11, QWORD PTR [r8+48]
  36230. mov r10, QWORD PTR [rdx+48]
  36231. pext r11, r11, r9
  36232. mov QWORD PTR [rcx+40], r12
  36233. sbb r10, r11
  36234. mov r12, QWORD PTR [r8+56]
  36235. mov r11, QWORD PTR [rdx+56]
  36236. pext r12, r12, r9
  36237. mov QWORD PTR [rcx+48], r10
  36238. sbb r11, r12
  36239. mov r10, QWORD PTR [r8+64]
  36240. mov r12, QWORD PTR [rdx+64]
  36241. pext r10, r10, r9
  36242. mov QWORD PTR [rcx+56], r11
  36243. sbb r12, r10
  36244. mov r11, QWORD PTR [r8+72]
  36245. mov r10, QWORD PTR [rdx+72]
  36246. pext r11, r11, r9
  36247. mov QWORD PTR [rcx+64], r12
  36248. sbb r10, r11
  36249. mov r12, QWORD PTR [r8+80]
  36250. mov r11, QWORD PTR [rdx+80]
  36251. pext r12, r12, r9
  36252. mov QWORD PTR [rcx+72], r10
  36253. sbb r11, r12
  36254. mov r10, QWORD PTR [r8+88]
  36255. mov r12, QWORD PTR [rdx+88]
  36256. pext r10, r10, r9
  36257. mov QWORD PTR [rcx+80], r11
  36258. sbb r12, r10
  36259. mov r11, QWORD PTR [r8+96]
  36260. mov r10, QWORD PTR [rdx+96]
  36261. pext r11, r11, r9
  36262. mov QWORD PTR [rcx+88], r12
  36263. sbb r10, r11
  36264. mov r12, QWORD PTR [r8+104]
  36265. mov r11, QWORD PTR [rdx+104]
  36266. pext r12, r12, r9
  36267. mov QWORD PTR [rcx+96], r10
  36268. sbb r11, r12
  36269. mov r10, QWORD PTR [r8+112]
  36270. mov r12, QWORD PTR [rdx+112]
  36271. pext r10, r10, r9
  36272. mov QWORD PTR [rcx+104], r11
  36273. sbb r12, r10
  36274. mov r11, QWORD PTR [r8+120]
  36275. mov r10, QWORD PTR [rdx+120]
  36276. pext r11, r11, r9
  36277. mov QWORD PTR [rcx+112], r12
  36278. sbb r10, r11
  36279. mov r12, QWORD PTR [r8+128]
  36280. mov r11, QWORD PTR [rdx+128]
  36281. pext r12, r12, r9
  36282. mov QWORD PTR [rcx+120], r10
  36283. sbb r11, r12
  36284. mov r10, QWORD PTR [r8+136]
  36285. mov r12, QWORD PTR [rdx+136]
  36286. pext r10, r10, r9
  36287. mov QWORD PTR [rcx+128], r11
  36288. sbb r12, r10
  36289. mov r11, QWORD PTR [r8+144]
  36290. mov r10, QWORD PTR [rdx+144]
  36291. pext r11, r11, r9
  36292. mov QWORD PTR [rcx+136], r12
  36293. sbb r10, r11
  36294. mov r12, QWORD PTR [r8+152]
  36295. mov r11, QWORD PTR [rdx+152]
  36296. pext r12, r12, r9
  36297. mov QWORD PTR [rcx+144], r10
  36298. sbb r11, r12
  36299. mov r10, QWORD PTR [r8+160]
  36300. mov r12, QWORD PTR [rdx+160]
  36301. pext r10, r10, r9
  36302. mov QWORD PTR [rcx+152], r11
  36303. sbb r12, r10
  36304. mov r11, QWORD PTR [r8+168]
  36305. mov r10, QWORD PTR [rdx+168]
  36306. pext r11, r11, r9
  36307. mov QWORD PTR [rcx+160], r12
  36308. sbb r10, r11
  36309. mov r12, QWORD PTR [r8+176]
  36310. mov r11, QWORD PTR [rdx+176]
  36311. pext r12, r12, r9
  36312. mov QWORD PTR [rcx+168], r10
  36313. sbb r11, r12
  36314. mov r10, QWORD PTR [r8+184]
  36315. mov r12, QWORD PTR [rdx+184]
  36316. pext r10, r10, r9
  36317. mov QWORD PTR [rcx+176], r11
  36318. sbb r12, r10
  36319. mov r11, QWORD PTR [r8+192]
  36320. mov r10, QWORD PTR [rdx+192]
  36321. pext r11, r11, r9
  36322. mov QWORD PTR [rcx+184], r12
  36323. sbb r10, r11
  36324. mov r12, QWORD PTR [r8+200]
  36325. mov r11, QWORD PTR [rdx+200]
  36326. pext r12, r12, r9
  36327. mov QWORD PTR [rcx+192], r10
  36328. sbb r11, r12
  36329. mov r10, QWORD PTR [r8+208]
  36330. mov r12, QWORD PTR [rdx+208]
  36331. pext r10, r10, r9
  36332. mov QWORD PTR [rcx+200], r11
  36333. sbb r12, r10
  36334. mov r11, QWORD PTR [r8+216]
  36335. mov r10, QWORD PTR [rdx+216]
  36336. pext r11, r11, r9
  36337. mov QWORD PTR [rcx+208], r12
  36338. sbb r10, r11
  36339. mov r12, QWORD PTR [r8+224]
  36340. mov r11, QWORD PTR [rdx+224]
  36341. pext r12, r12, r9
  36342. mov QWORD PTR [rcx+216], r10
  36343. sbb r11, r12
  36344. mov r10, QWORD PTR [r8+232]
  36345. mov r12, QWORD PTR [rdx+232]
  36346. pext r10, r10, r9
  36347. mov QWORD PTR [rcx+224], r11
  36348. sbb r12, r10
  36349. mov r11, QWORD PTR [r8+240]
  36350. mov r10, QWORD PTR [rdx+240]
  36351. pext r11, r11, r9
  36352. mov QWORD PTR [rcx+232], r12
  36353. sbb r10, r11
  36354. mov r12, QWORD PTR [r8+248]
  36355. mov r11, QWORD PTR [rdx+248]
  36356. pext r12, r12, r9
  36357. mov QWORD PTR [rcx+240], r10
  36358. sbb r11, r12
  36359. mov r10, QWORD PTR [r8+256]
  36360. mov r12, QWORD PTR [rdx+256]
  36361. pext r10, r10, r9
  36362. mov QWORD PTR [rcx+248], r11
  36363. sbb r12, r10
  36364. mov r11, QWORD PTR [r8+264]
  36365. mov r10, QWORD PTR [rdx+264]
  36366. pext r11, r11, r9
  36367. mov QWORD PTR [rcx+256], r12
  36368. sbb r10, r11
  36369. mov r12, QWORD PTR [r8+272]
  36370. mov r11, QWORD PTR [rdx+272]
  36371. pext r12, r12, r9
  36372. mov QWORD PTR [rcx+264], r10
  36373. sbb r11, r12
  36374. mov r10, QWORD PTR [r8+280]
  36375. mov r12, QWORD PTR [rdx+280]
  36376. pext r10, r10, r9
  36377. mov QWORD PTR [rcx+272], r11
  36378. sbb r12, r10
  36379. mov r11, QWORD PTR [r8+288]
  36380. mov r10, QWORD PTR [rdx+288]
  36381. pext r11, r11, r9
  36382. mov QWORD PTR [rcx+280], r12
  36383. sbb r10, r11
  36384. mov r12, QWORD PTR [r8+296]
  36385. mov r11, QWORD PTR [rdx+296]
  36386. pext r12, r12, r9
  36387. mov QWORD PTR [rcx+288], r10
  36388. sbb r11, r12
  36389. mov r10, QWORD PTR [r8+304]
  36390. mov r12, QWORD PTR [rdx+304]
  36391. pext r10, r10, r9
  36392. mov QWORD PTR [rcx+296], r11
  36393. sbb r12, r10
  36394. mov r11, QWORD PTR [r8+312]
  36395. mov r10, QWORD PTR [rdx+312]
  36396. pext r11, r11, r9
  36397. mov QWORD PTR [rcx+304], r12
  36398. sbb r10, r11
  36399. mov r12, QWORD PTR [r8+320]
  36400. mov r11, QWORD PTR [rdx+320]
  36401. pext r12, r12, r9
  36402. mov QWORD PTR [rcx+312], r10
  36403. sbb r11, r12
  36404. mov r10, QWORD PTR [r8+328]
  36405. mov r12, QWORD PTR [rdx+328]
  36406. pext r10, r10, r9
  36407. mov QWORD PTR [rcx+320], r11
  36408. sbb r12, r10
  36409. mov r11, QWORD PTR [r8+336]
  36410. mov r10, QWORD PTR [rdx+336]
  36411. pext r11, r11, r9
  36412. mov QWORD PTR [rcx+328], r12
  36413. sbb r10, r11
  36414. mov r12, QWORD PTR [r8+344]
  36415. mov r11, QWORD PTR [rdx+344]
  36416. pext r12, r12, r9
  36417. mov QWORD PTR [rcx+336], r10
  36418. sbb r11, r12
  36419. mov r10, QWORD PTR [r8+352]
  36420. mov r12, QWORD PTR [rdx+352]
  36421. pext r10, r10, r9
  36422. mov QWORD PTR [rcx+344], r11
  36423. sbb r12, r10
  36424. mov r11, QWORD PTR [r8+360]
  36425. mov r10, QWORD PTR [rdx+360]
  36426. pext r11, r11, r9
  36427. mov QWORD PTR [rcx+352], r12
  36428. sbb r10, r11
  36429. mov r12, QWORD PTR [r8+368]
  36430. mov r11, QWORD PTR [rdx+368]
  36431. pext r12, r12, r9
  36432. mov QWORD PTR [rcx+360], r10
  36433. sbb r11, r12
  36434. mov r10, QWORD PTR [r8+376]
  36435. mov r12, QWORD PTR [rdx+376]
  36436. pext r10, r10, r9
  36437. mov QWORD PTR [rcx+368], r11
  36438. sbb r12, r10
  36439. mov QWORD PTR [rcx+376], r12
  36440. sbb rax, rax
  36441. pop r12
  36442. ret
  36443. sp_3072_cond_sub_avx2_48 ENDP
  36444. _text ENDS
  36445. ENDIF
  36446. ; /* Compare a with b in constant time.
  36447. ; *
  36448. ; * a A single precision integer.
  36449. ; * b A single precision integer.
  36450. ; * return -ve, 0 or +ve if a is less than, equal to or greater than b
  36451. ; * respectively.
  36452. ; */
  36453. _text SEGMENT READONLY PARA
  36454. sp_3072_cmp_48 PROC
  36455. push r12
  36456. xor r9, r9
  36457. mov r8, -1
  36458. mov rax, -1
  36459. mov r10, 1
  36460. mov r11, QWORD PTR [rcx+376]
  36461. mov r12, QWORD PTR [rdx+376]
  36462. and r11, r8
  36463. and r12, r8
  36464. sub r11, r12
  36465. cmova rax, r10
  36466. cmovc rax, r8
  36467. cmovnz r8, r9
  36468. mov r11, QWORD PTR [rcx+368]
  36469. mov r12, QWORD PTR [rdx+368]
  36470. and r11, r8
  36471. and r12, r8
  36472. sub r11, r12
  36473. cmova rax, r10
  36474. cmovc rax, r8
  36475. cmovnz r8, r9
  36476. mov r11, QWORD PTR [rcx+360]
  36477. mov r12, QWORD PTR [rdx+360]
  36478. and r11, r8
  36479. and r12, r8
  36480. sub r11, r12
  36481. cmova rax, r10
  36482. cmovc rax, r8
  36483. cmovnz r8, r9
  36484. mov r11, QWORD PTR [rcx+352]
  36485. mov r12, QWORD PTR [rdx+352]
  36486. and r11, r8
  36487. and r12, r8
  36488. sub r11, r12
  36489. cmova rax, r10
  36490. cmovc rax, r8
  36491. cmovnz r8, r9
  36492. mov r11, QWORD PTR [rcx+344]
  36493. mov r12, QWORD PTR [rdx+344]
  36494. and r11, r8
  36495. and r12, r8
  36496. sub r11, r12
  36497. cmova rax, r10
  36498. cmovc rax, r8
  36499. cmovnz r8, r9
  36500. mov r11, QWORD PTR [rcx+336]
  36501. mov r12, QWORD PTR [rdx+336]
  36502. and r11, r8
  36503. and r12, r8
  36504. sub r11, r12
  36505. cmova rax, r10
  36506. cmovc rax, r8
  36507. cmovnz r8, r9
  36508. mov r11, QWORD PTR [rcx+328]
  36509. mov r12, QWORD PTR [rdx+328]
  36510. and r11, r8
  36511. and r12, r8
  36512. sub r11, r12
  36513. cmova rax, r10
  36514. cmovc rax, r8
  36515. cmovnz r8, r9
  36516. mov r11, QWORD PTR [rcx+320]
  36517. mov r12, QWORD PTR [rdx+320]
  36518. and r11, r8
  36519. and r12, r8
  36520. sub r11, r12
  36521. cmova rax, r10
  36522. cmovc rax, r8
  36523. cmovnz r8, r9
  36524. mov r11, QWORD PTR [rcx+312]
  36525. mov r12, QWORD PTR [rdx+312]
  36526. and r11, r8
  36527. and r12, r8
  36528. sub r11, r12
  36529. cmova rax, r10
  36530. cmovc rax, r8
  36531. cmovnz r8, r9
  36532. mov r11, QWORD PTR [rcx+304]
  36533. mov r12, QWORD PTR [rdx+304]
  36534. and r11, r8
  36535. and r12, r8
  36536. sub r11, r12
  36537. cmova rax, r10
  36538. cmovc rax, r8
  36539. cmovnz r8, r9
  36540. mov r11, QWORD PTR [rcx+296]
  36541. mov r12, QWORD PTR [rdx+296]
  36542. and r11, r8
  36543. and r12, r8
  36544. sub r11, r12
  36545. cmova rax, r10
  36546. cmovc rax, r8
  36547. cmovnz r8, r9
  36548. mov r11, QWORD PTR [rcx+288]
  36549. mov r12, QWORD PTR [rdx+288]
  36550. and r11, r8
  36551. and r12, r8
  36552. sub r11, r12
  36553. cmova rax, r10
  36554. cmovc rax, r8
  36555. cmovnz r8, r9
  36556. mov r11, QWORD PTR [rcx+280]
  36557. mov r12, QWORD PTR [rdx+280]
  36558. and r11, r8
  36559. and r12, r8
  36560. sub r11, r12
  36561. cmova rax, r10
  36562. cmovc rax, r8
  36563. cmovnz r8, r9
  36564. mov r11, QWORD PTR [rcx+272]
  36565. mov r12, QWORD PTR [rdx+272]
  36566. and r11, r8
  36567. and r12, r8
  36568. sub r11, r12
  36569. cmova rax, r10
  36570. cmovc rax, r8
  36571. cmovnz r8, r9
  36572. mov r11, QWORD PTR [rcx+264]
  36573. mov r12, QWORD PTR [rdx+264]
  36574. and r11, r8
  36575. and r12, r8
  36576. sub r11, r12
  36577. cmova rax, r10
  36578. cmovc rax, r8
  36579. cmovnz r8, r9
  36580. mov r11, QWORD PTR [rcx+256]
  36581. mov r12, QWORD PTR [rdx+256]
  36582. and r11, r8
  36583. and r12, r8
  36584. sub r11, r12
  36585. cmova rax, r10
  36586. cmovc rax, r8
  36587. cmovnz r8, r9
  36588. mov r11, QWORD PTR [rcx+248]
  36589. mov r12, QWORD PTR [rdx+248]
  36590. and r11, r8
  36591. and r12, r8
  36592. sub r11, r12
  36593. cmova rax, r10
  36594. cmovc rax, r8
  36595. cmovnz r8, r9
  36596. mov r11, QWORD PTR [rcx+240]
  36597. mov r12, QWORD PTR [rdx+240]
  36598. and r11, r8
  36599. and r12, r8
  36600. sub r11, r12
  36601. cmova rax, r10
  36602. cmovc rax, r8
  36603. cmovnz r8, r9
  36604. mov r11, QWORD PTR [rcx+232]
  36605. mov r12, QWORD PTR [rdx+232]
  36606. and r11, r8
  36607. and r12, r8
  36608. sub r11, r12
  36609. cmova rax, r10
  36610. cmovc rax, r8
  36611. cmovnz r8, r9
  36612. mov r11, QWORD PTR [rcx+224]
  36613. mov r12, QWORD PTR [rdx+224]
  36614. and r11, r8
  36615. and r12, r8
  36616. sub r11, r12
  36617. cmova rax, r10
  36618. cmovc rax, r8
  36619. cmovnz r8, r9
  36620. mov r11, QWORD PTR [rcx+216]
  36621. mov r12, QWORD PTR [rdx+216]
  36622. and r11, r8
  36623. and r12, r8
  36624. sub r11, r12
  36625. cmova rax, r10
  36626. cmovc rax, r8
  36627. cmovnz r8, r9
  36628. mov r11, QWORD PTR [rcx+208]
  36629. mov r12, QWORD PTR [rdx+208]
  36630. and r11, r8
  36631. and r12, r8
  36632. sub r11, r12
  36633. cmova rax, r10
  36634. cmovc rax, r8
  36635. cmovnz r8, r9
  36636. mov r11, QWORD PTR [rcx+200]
  36637. mov r12, QWORD PTR [rdx+200]
  36638. and r11, r8
  36639. and r12, r8
  36640. sub r11, r12
  36641. cmova rax, r10
  36642. cmovc rax, r8
  36643. cmovnz r8, r9
  36644. mov r11, QWORD PTR [rcx+192]
  36645. mov r12, QWORD PTR [rdx+192]
  36646. and r11, r8
  36647. and r12, r8
  36648. sub r11, r12
  36649. cmova rax, r10
  36650. cmovc rax, r8
  36651. cmovnz r8, r9
  36652. mov r11, QWORD PTR [rcx+184]
  36653. mov r12, QWORD PTR [rdx+184]
  36654. and r11, r8
  36655. and r12, r8
  36656. sub r11, r12
  36657. cmova rax, r10
  36658. cmovc rax, r8
  36659. cmovnz r8, r9
  36660. mov r11, QWORD PTR [rcx+176]
  36661. mov r12, QWORD PTR [rdx+176]
  36662. and r11, r8
  36663. and r12, r8
  36664. sub r11, r12
  36665. cmova rax, r10
  36666. cmovc rax, r8
  36667. cmovnz r8, r9
  36668. mov r11, QWORD PTR [rcx+168]
  36669. mov r12, QWORD PTR [rdx+168]
  36670. and r11, r8
  36671. and r12, r8
  36672. sub r11, r12
  36673. cmova rax, r10
  36674. cmovc rax, r8
  36675. cmovnz r8, r9
  36676. mov r11, QWORD PTR [rcx+160]
  36677. mov r12, QWORD PTR [rdx+160]
  36678. and r11, r8
  36679. and r12, r8
  36680. sub r11, r12
  36681. cmova rax, r10
  36682. cmovc rax, r8
  36683. cmovnz r8, r9
  36684. mov r11, QWORD PTR [rcx+152]
  36685. mov r12, QWORD PTR [rdx+152]
  36686. and r11, r8
  36687. and r12, r8
  36688. sub r11, r12
  36689. cmova rax, r10
  36690. cmovc rax, r8
  36691. cmovnz r8, r9
  36692. mov r11, QWORD PTR [rcx+144]
  36693. mov r12, QWORD PTR [rdx+144]
  36694. and r11, r8
  36695. and r12, r8
  36696. sub r11, r12
  36697. cmova rax, r10
  36698. cmovc rax, r8
  36699. cmovnz r8, r9
  36700. mov r11, QWORD PTR [rcx+136]
  36701. mov r12, QWORD PTR [rdx+136]
  36702. and r11, r8
  36703. and r12, r8
  36704. sub r11, r12
  36705. cmova rax, r10
  36706. cmovc rax, r8
  36707. cmovnz r8, r9
  36708. mov r11, QWORD PTR [rcx+128]
  36709. mov r12, QWORD PTR [rdx+128]
  36710. and r11, r8
  36711. and r12, r8
  36712. sub r11, r12
  36713. cmova rax, r10
  36714. cmovc rax, r8
  36715. cmovnz r8, r9
  36716. mov r11, QWORD PTR [rcx+120]
  36717. mov r12, QWORD PTR [rdx+120]
  36718. and r11, r8
  36719. and r12, r8
  36720. sub r11, r12
  36721. cmova rax, r10
  36722. cmovc rax, r8
  36723. cmovnz r8, r9
  36724. mov r11, QWORD PTR [rcx+112]
  36725. mov r12, QWORD PTR [rdx+112]
  36726. and r11, r8
  36727. and r12, r8
  36728. sub r11, r12
  36729. cmova rax, r10
  36730. cmovc rax, r8
  36731. cmovnz r8, r9
  36732. mov r11, QWORD PTR [rcx+104]
  36733. mov r12, QWORD PTR [rdx+104]
  36734. and r11, r8
  36735. and r12, r8
  36736. sub r11, r12
  36737. cmova rax, r10
  36738. cmovc rax, r8
  36739. cmovnz r8, r9
  36740. mov r11, QWORD PTR [rcx+96]
  36741. mov r12, QWORD PTR [rdx+96]
  36742. and r11, r8
  36743. and r12, r8
  36744. sub r11, r12
  36745. cmova rax, r10
  36746. cmovc rax, r8
  36747. cmovnz r8, r9
  36748. mov r11, QWORD PTR [rcx+88]
  36749. mov r12, QWORD PTR [rdx+88]
  36750. and r11, r8
  36751. and r12, r8
  36752. sub r11, r12
  36753. cmova rax, r10
  36754. cmovc rax, r8
  36755. cmovnz r8, r9
  36756. mov r11, QWORD PTR [rcx+80]
  36757. mov r12, QWORD PTR [rdx+80]
  36758. and r11, r8
  36759. and r12, r8
  36760. sub r11, r12
  36761. cmova rax, r10
  36762. cmovc rax, r8
  36763. cmovnz r8, r9
  36764. mov r11, QWORD PTR [rcx+72]
  36765. mov r12, QWORD PTR [rdx+72]
  36766. and r11, r8
  36767. and r12, r8
  36768. sub r11, r12
  36769. cmova rax, r10
  36770. cmovc rax, r8
  36771. cmovnz r8, r9
  36772. mov r11, QWORD PTR [rcx+64]
  36773. mov r12, QWORD PTR [rdx+64]
  36774. and r11, r8
  36775. and r12, r8
  36776. sub r11, r12
  36777. cmova rax, r10
  36778. cmovc rax, r8
  36779. cmovnz r8, r9
  36780. mov r11, QWORD PTR [rcx+56]
  36781. mov r12, QWORD PTR [rdx+56]
  36782. and r11, r8
  36783. and r12, r8
  36784. sub r11, r12
  36785. cmova rax, r10
  36786. cmovc rax, r8
  36787. cmovnz r8, r9
  36788. mov r11, QWORD PTR [rcx+48]
  36789. mov r12, QWORD PTR [rdx+48]
  36790. and r11, r8
  36791. and r12, r8
  36792. sub r11, r12
  36793. cmova rax, r10
  36794. cmovc rax, r8
  36795. cmovnz r8, r9
  36796. mov r11, QWORD PTR [rcx+40]
  36797. mov r12, QWORD PTR [rdx+40]
  36798. and r11, r8
  36799. and r12, r8
  36800. sub r11, r12
  36801. cmova rax, r10
  36802. cmovc rax, r8
  36803. cmovnz r8, r9
  36804. mov r11, QWORD PTR [rcx+32]
  36805. mov r12, QWORD PTR [rdx+32]
  36806. and r11, r8
  36807. and r12, r8
  36808. sub r11, r12
  36809. cmova rax, r10
  36810. cmovc rax, r8
  36811. cmovnz r8, r9
  36812. mov r11, QWORD PTR [rcx+24]
  36813. mov r12, QWORD PTR [rdx+24]
  36814. and r11, r8
  36815. and r12, r8
  36816. sub r11, r12
  36817. cmova rax, r10
  36818. cmovc rax, r8
  36819. cmovnz r8, r9
  36820. mov r11, QWORD PTR [rcx+16]
  36821. mov r12, QWORD PTR [rdx+16]
  36822. and r11, r8
  36823. and r12, r8
  36824. sub r11, r12
  36825. cmova rax, r10
  36826. cmovc rax, r8
  36827. cmovnz r8, r9
  36828. mov r11, QWORD PTR [rcx+8]
  36829. mov r12, QWORD PTR [rdx+8]
  36830. and r11, r8
  36831. and r12, r8
  36832. sub r11, r12
  36833. cmova rax, r10
  36834. cmovc rax, r8
  36835. cmovnz r8, r9
  36836. mov r11, QWORD PTR [rcx]
  36837. mov r12, QWORD PTR [rdx]
  36838. and r11, r8
  36839. and r12, r8
  36840. sub r11, r12
  36841. cmova rax, r10
  36842. cmovc rax, r8
  36843. cmovnz r8, r9
  36844. xor rax, r8
  36845. pop r12
  36846. ret
  36847. sp_3072_cmp_48 ENDP
  36848. _text ENDS
  36849. IFNDEF WC_NO_CACHE_RESISTANT
  36850. _text SEGMENT READONLY PARA
  36851. sp_3072_get_from_table_48 PROC
  36852. sub rsp, 128
  36853. vmovdqu OWORD PTR [rsp], xmm6
  36854. vmovdqu OWORD PTR [rsp+16], xmm7
  36855. vmovdqu OWORD PTR [rsp+32], xmm8
  36856. vmovdqu OWORD PTR [rsp+48], xmm9
  36857. vmovdqu OWORD PTR [rsp+64], xmm10
  36858. vmovdqu OWORD PTR [rsp+80], xmm11
  36859. vmovdqu OWORD PTR [rsp+96], xmm12
  36860. vmovdqu OWORD PTR [rsp+112], xmm13
  36861. mov rax, 1
  36862. movd xmm10, r8
  36863. movd xmm11, rax
  36864. pxor xmm13, xmm13
  36865. pshufd xmm11, xmm11, 0
  36866. pshufd xmm10, xmm10, 0
  36867. ; START: 0-7
  36868. pxor xmm13, xmm13
  36869. pxor xmm4, xmm4
  36870. pxor xmm5, xmm5
  36871. pxor xmm6, xmm6
  36872. pxor xmm7, xmm7
  36873. ; ENTRY: 0
  36874. mov r9, QWORD PTR [rdx]
  36875. movdqu xmm12, xmm13
  36876. pcmpeqd xmm12, xmm10
  36877. movdqu xmm0, [r9]
  36878. movdqu xmm1, [r9+16]
  36879. movdqu xmm2, [r9+32]
  36880. movdqu xmm3, [r9+48]
  36881. pand xmm0, xmm12
  36882. pand xmm1, xmm12
  36883. pand xmm2, xmm12
  36884. pand xmm3, xmm12
  36885. por xmm4, xmm0
  36886. por xmm5, xmm1
  36887. por xmm6, xmm2
  36888. por xmm7, xmm3
  36889. paddd xmm13, xmm11
  36890. ; ENTRY: 1
  36891. mov r9, QWORD PTR [rdx+8]
  36892. movdqu xmm12, xmm13
  36893. pcmpeqd xmm12, xmm10
  36894. movdqu xmm0, [r9]
  36895. movdqu xmm1, [r9+16]
  36896. movdqu xmm2, [r9+32]
  36897. movdqu xmm3, [r9+48]
  36898. pand xmm0, xmm12
  36899. pand xmm1, xmm12
  36900. pand xmm2, xmm12
  36901. pand xmm3, xmm12
  36902. por xmm4, xmm0
  36903. por xmm5, xmm1
  36904. por xmm6, xmm2
  36905. por xmm7, xmm3
  36906. paddd xmm13, xmm11
  36907. ; ENTRY: 2
  36908. mov r9, QWORD PTR [rdx+16]
  36909. movdqu xmm12, xmm13
  36910. pcmpeqd xmm12, xmm10
  36911. movdqu xmm0, [r9]
  36912. movdqu xmm1, [r9+16]
  36913. movdqu xmm2, [r9+32]
  36914. movdqu xmm3, [r9+48]
  36915. pand xmm0, xmm12
  36916. pand xmm1, xmm12
  36917. pand xmm2, xmm12
  36918. pand xmm3, xmm12
  36919. por xmm4, xmm0
  36920. por xmm5, xmm1
  36921. por xmm6, xmm2
  36922. por xmm7, xmm3
  36923. paddd xmm13, xmm11
  36924. ; ENTRY: 3
  36925. mov r9, QWORD PTR [rdx+24]
  36926. movdqu xmm12, xmm13
  36927. pcmpeqd xmm12, xmm10
  36928. movdqu xmm0, [r9]
  36929. movdqu xmm1, [r9+16]
  36930. movdqu xmm2, [r9+32]
  36931. movdqu xmm3, [r9+48]
  36932. pand xmm0, xmm12
  36933. pand xmm1, xmm12
  36934. pand xmm2, xmm12
  36935. pand xmm3, xmm12
  36936. por xmm4, xmm0
  36937. por xmm5, xmm1
  36938. por xmm6, xmm2
  36939. por xmm7, xmm3
  36940. paddd xmm13, xmm11
  36941. ; ENTRY: 4
  36942. mov r9, QWORD PTR [rdx+32]
  36943. movdqu xmm12, xmm13
  36944. pcmpeqd xmm12, xmm10
  36945. movdqu xmm0, [r9]
  36946. movdqu xmm1, [r9+16]
  36947. movdqu xmm2, [r9+32]
  36948. movdqu xmm3, [r9+48]
  36949. pand xmm0, xmm12
  36950. pand xmm1, xmm12
  36951. pand xmm2, xmm12
  36952. pand xmm3, xmm12
  36953. por xmm4, xmm0
  36954. por xmm5, xmm1
  36955. por xmm6, xmm2
  36956. por xmm7, xmm3
  36957. paddd xmm13, xmm11
  36958. ; ENTRY: 5
  36959. mov r9, QWORD PTR [rdx+40]
  36960. movdqu xmm12, xmm13
  36961. pcmpeqd xmm12, xmm10
  36962. movdqu xmm0, [r9]
  36963. movdqu xmm1, [r9+16]
  36964. movdqu xmm2, [r9+32]
  36965. movdqu xmm3, [r9+48]
  36966. pand xmm0, xmm12
  36967. pand xmm1, xmm12
  36968. pand xmm2, xmm12
  36969. pand xmm3, xmm12
  36970. por xmm4, xmm0
  36971. por xmm5, xmm1
  36972. por xmm6, xmm2
  36973. por xmm7, xmm3
  36974. paddd xmm13, xmm11
  36975. ; ENTRY: 6
  36976. mov r9, QWORD PTR [rdx+48]
  36977. movdqu xmm12, xmm13
  36978. pcmpeqd xmm12, xmm10
  36979. movdqu xmm0, [r9]
  36980. movdqu xmm1, [r9+16]
  36981. movdqu xmm2, [r9+32]
  36982. movdqu xmm3, [r9+48]
  36983. pand xmm0, xmm12
  36984. pand xmm1, xmm12
  36985. pand xmm2, xmm12
  36986. pand xmm3, xmm12
  36987. por xmm4, xmm0
  36988. por xmm5, xmm1
  36989. por xmm6, xmm2
  36990. por xmm7, xmm3
  36991. paddd xmm13, xmm11
  36992. ; ENTRY: 7
  36993. mov r9, QWORD PTR [rdx+56]
  36994. movdqu xmm12, xmm13
  36995. pcmpeqd xmm12, xmm10
  36996. movdqu xmm0, [r9]
  36997. movdqu xmm1, [r9+16]
  36998. movdqu xmm2, [r9+32]
  36999. movdqu xmm3, [r9+48]
  37000. pand xmm0, xmm12
  37001. pand xmm1, xmm12
  37002. pand xmm2, xmm12
  37003. pand xmm3, xmm12
  37004. por xmm4, xmm0
  37005. por xmm5, xmm1
  37006. por xmm6, xmm2
  37007. por xmm7, xmm3
  37008. paddd xmm13, xmm11
  37009. ; ENTRY: 8
  37010. mov r9, QWORD PTR [rdx+64]
  37011. movdqu xmm12, xmm13
  37012. pcmpeqd xmm12, xmm10
  37013. movdqu xmm0, [r9]
  37014. movdqu xmm1, [r9+16]
  37015. movdqu xmm2, [r9+32]
  37016. movdqu xmm3, [r9+48]
  37017. pand xmm0, xmm12
  37018. pand xmm1, xmm12
  37019. pand xmm2, xmm12
  37020. pand xmm3, xmm12
  37021. por xmm4, xmm0
  37022. por xmm5, xmm1
  37023. por xmm6, xmm2
  37024. por xmm7, xmm3
  37025. paddd xmm13, xmm11
  37026. ; ENTRY: 9
  37027. mov r9, QWORD PTR [rdx+72]
  37028. movdqu xmm12, xmm13
  37029. pcmpeqd xmm12, xmm10
  37030. movdqu xmm0, [r9]
  37031. movdqu xmm1, [r9+16]
  37032. movdqu xmm2, [r9+32]
  37033. movdqu xmm3, [r9+48]
  37034. pand xmm0, xmm12
  37035. pand xmm1, xmm12
  37036. pand xmm2, xmm12
  37037. pand xmm3, xmm12
  37038. por xmm4, xmm0
  37039. por xmm5, xmm1
  37040. por xmm6, xmm2
  37041. por xmm7, xmm3
  37042. paddd xmm13, xmm11
  37043. ; ENTRY: 10
  37044. mov r9, QWORD PTR [rdx+80]
  37045. movdqu xmm12, xmm13
  37046. pcmpeqd xmm12, xmm10
  37047. movdqu xmm0, [r9]
  37048. movdqu xmm1, [r9+16]
  37049. movdqu xmm2, [r9+32]
  37050. movdqu xmm3, [r9+48]
  37051. pand xmm0, xmm12
  37052. pand xmm1, xmm12
  37053. pand xmm2, xmm12
  37054. pand xmm3, xmm12
  37055. por xmm4, xmm0
  37056. por xmm5, xmm1
  37057. por xmm6, xmm2
  37058. por xmm7, xmm3
  37059. paddd xmm13, xmm11
  37060. ; ENTRY: 11
  37061. mov r9, QWORD PTR [rdx+88]
  37062. movdqu xmm12, xmm13
  37063. pcmpeqd xmm12, xmm10
  37064. movdqu xmm0, [r9]
  37065. movdqu xmm1, [r9+16]
  37066. movdqu xmm2, [r9+32]
  37067. movdqu xmm3, [r9+48]
  37068. pand xmm0, xmm12
  37069. pand xmm1, xmm12
  37070. pand xmm2, xmm12
  37071. pand xmm3, xmm12
  37072. por xmm4, xmm0
  37073. por xmm5, xmm1
  37074. por xmm6, xmm2
  37075. por xmm7, xmm3
  37076. paddd xmm13, xmm11
  37077. ; ENTRY: 12
  37078. mov r9, QWORD PTR [rdx+96]
  37079. movdqu xmm12, xmm13
  37080. pcmpeqd xmm12, xmm10
  37081. movdqu xmm0, [r9]
  37082. movdqu xmm1, [r9+16]
  37083. movdqu xmm2, [r9+32]
  37084. movdqu xmm3, [r9+48]
  37085. pand xmm0, xmm12
  37086. pand xmm1, xmm12
  37087. pand xmm2, xmm12
  37088. pand xmm3, xmm12
  37089. por xmm4, xmm0
  37090. por xmm5, xmm1
  37091. por xmm6, xmm2
  37092. por xmm7, xmm3
  37093. paddd xmm13, xmm11
  37094. ; ENTRY: 13
  37095. mov r9, QWORD PTR [rdx+104]
  37096. movdqu xmm12, xmm13
  37097. pcmpeqd xmm12, xmm10
  37098. movdqu xmm0, [r9]
  37099. movdqu xmm1, [r9+16]
  37100. movdqu xmm2, [r9+32]
  37101. movdqu xmm3, [r9+48]
  37102. pand xmm0, xmm12
  37103. pand xmm1, xmm12
  37104. pand xmm2, xmm12
  37105. pand xmm3, xmm12
  37106. por xmm4, xmm0
  37107. por xmm5, xmm1
  37108. por xmm6, xmm2
  37109. por xmm7, xmm3
  37110. paddd xmm13, xmm11
  37111. ; ENTRY: 14
  37112. mov r9, QWORD PTR [rdx+112]
  37113. movdqu xmm12, xmm13
  37114. pcmpeqd xmm12, xmm10
  37115. movdqu xmm0, [r9]
  37116. movdqu xmm1, [r9+16]
  37117. movdqu xmm2, [r9+32]
  37118. movdqu xmm3, [r9+48]
  37119. pand xmm0, xmm12
  37120. pand xmm1, xmm12
  37121. pand xmm2, xmm12
  37122. pand xmm3, xmm12
  37123. por xmm4, xmm0
  37124. por xmm5, xmm1
  37125. por xmm6, xmm2
  37126. por xmm7, xmm3
  37127. paddd xmm13, xmm11
  37128. ; ENTRY: 15
  37129. mov r9, QWORD PTR [rdx+120]
  37130. movdqu xmm12, xmm13
  37131. pcmpeqd xmm12, xmm10
  37132. movdqu xmm0, [r9]
  37133. movdqu xmm1, [r9+16]
  37134. movdqu xmm2, [r9+32]
  37135. movdqu xmm3, [r9+48]
  37136. pand xmm0, xmm12
  37137. pand xmm1, xmm12
  37138. pand xmm2, xmm12
  37139. pand xmm3, xmm12
  37140. por xmm4, xmm0
  37141. por xmm5, xmm1
  37142. por xmm6, xmm2
  37143. por xmm7, xmm3
  37144. paddd xmm13, xmm11
  37145. movdqu [rcx], xmm4
  37146. movdqu [rcx+16], xmm5
  37147. movdqu [rcx+32], xmm6
  37148. movdqu [rcx+48], xmm7
  37149. add rcx, 64
  37150. ; END: 0-7
  37151. ; START: 8-15
  37152. pxor xmm13, xmm13
  37153. pxor xmm4, xmm4
  37154. pxor xmm5, xmm5
  37155. pxor xmm6, xmm6
  37156. pxor xmm7, xmm7
  37157. ; ENTRY: 0
  37158. mov r9, QWORD PTR [rdx]
  37159. add r9, 64
  37160. movdqu xmm12, xmm13
  37161. pcmpeqd xmm12, xmm10
  37162. movdqu xmm0, [r9]
  37163. movdqu xmm1, [r9+16]
  37164. movdqu xmm2, [r9+32]
  37165. movdqu xmm3, [r9+48]
  37166. pand xmm0, xmm12
  37167. pand xmm1, xmm12
  37168. pand xmm2, xmm12
  37169. pand xmm3, xmm12
  37170. por xmm4, xmm0
  37171. por xmm5, xmm1
  37172. por xmm6, xmm2
  37173. por xmm7, xmm3
  37174. paddd xmm13, xmm11
  37175. ; ENTRY: 1
  37176. mov r9, QWORD PTR [rdx+8]
  37177. add r9, 64
  37178. movdqu xmm12, xmm13
  37179. pcmpeqd xmm12, xmm10
  37180. movdqu xmm0, [r9]
  37181. movdqu xmm1, [r9+16]
  37182. movdqu xmm2, [r9+32]
  37183. movdqu xmm3, [r9+48]
  37184. pand xmm0, xmm12
  37185. pand xmm1, xmm12
  37186. pand xmm2, xmm12
  37187. pand xmm3, xmm12
  37188. por xmm4, xmm0
  37189. por xmm5, xmm1
  37190. por xmm6, xmm2
  37191. por xmm7, xmm3
  37192. paddd xmm13, xmm11
  37193. ; ENTRY: 2
  37194. mov r9, QWORD PTR [rdx+16]
  37195. add r9, 64
  37196. movdqu xmm12, xmm13
  37197. pcmpeqd xmm12, xmm10
  37198. movdqu xmm0, [r9]
  37199. movdqu xmm1, [r9+16]
  37200. movdqu xmm2, [r9+32]
  37201. movdqu xmm3, [r9+48]
  37202. pand xmm0, xmm12
  37203. pand xmm1, xmm12
  37204. pand xmm2, xmm12
  37205. pand xmm3, xmm12
  37206. por xmm4, xmm0
  37207. por xmm5, xmm1
  37208. por xmm6, xmm2
  37209. por xmm7, xmm3
  37210. paddd xmm13, xmm11
  37211. ; ENTRY: 3
  37212. mov r9, QWORD PTR [rdx+24]
  37213. add r9, 64
  37214. movdqu xmm12, xmm13
  37215. pcmpeqd xmm12, xmm10
  37216. movdqu xmm0, [r9]
  37217. movdqu xmm1, [r9+16]
  37218. movdqu xmm2, [r9+32]
  37219. movdqu xmm3, [r9+48]
  37220. pand xmm0, xmm12
  37221. pand xmm1, xmm12
  37222. pand xmm2, xmm12
  37223. pand xmm3, xmm12
  37224. por xmm4, xmm0
  37225. por xmm5, xmm1
  37226. por xmm6, xmm2
  37227. por xmm7, xmm3
  37228. paddd xmm13, xmm11
  37229. ; ENTRY: 4
  37230. mov r9, QWORD PTR [rdx+32]
  37231. add r9, 64
  37232. movdqu xmm12, xmm13
  37233. pcmpeqd xmm12, xmm10
  37234. movdqu xmm0, [r9]
  37235. movdqu xmm1, [r9+16]
  37236. movdqu xmm2, [r9+32]
  37237. movdqu xmm3, [r9+48]
  37238. pand xmm0, xmm12
  37239. pand xmm1, xmm12
  37240. pand xmm2, xmm12
  37241. pand xmm3, xmm12
  37242. por xmm4, xmm0
  37243. por xmm5, xmm1
  37244. por xmm6, xmm2
  37245. por xmm7, xmm3
  37246. paddd xmm13, xmm11
  37247. ; ENTRY: 5
  37248. mov r9, QWORD PTR [rdx+40]
  37249. add r9, 64
  37250. movdqu xmm12, xmm13
  37251. pcmpeqd xmm12, xmm10
  37252. movdqu xmm0, [r9]
  37253. movdqu xmm1, [r9+16]
  37254. movdqu xmm2, [r9+32]
  37255. movdqu xmm3, [r9+48]
  37256. pand xmm0, xmm12
  37257. pand xmm1, xmm12
  37258. pand xmm2, xmm12
  37259. pand xmm3, xmm12
  37260. por xmm4, xmm0
  37261. por xmm5, xmm1
  37262. por xmm6, xmm2
  37263. por xmm7, xmm3
  37264. paddd xmm13, xmm11
  37265. ; ENTRY: 6
  37266. mov r9, QWORD PTR [rdx+48]
  37267. add r9, 64
  37268. movdqu xmm12, xmm13
  37269. pcmpeqd xmm12, xmm10
  37270. movdqu xmm0, [r9]
  37271. movdqu xmm1, [r9+16]
  37272. movdqu xmm2, [r9+32]
  37273. movdqu xmm3, [r9+48]
  37274. pand xmm0, xmm12
  37275. pand xmm1, xmm12
  37276. pand xmm2, xmm12
  37277. pand xmm3, xmm12
  37278. por xmm4, xmm0
  37279. por xmm5, xmm1
  37280. por xmm6, xmm2
  37281. por xmm7, xmm3
  37282. paddd xmm13, xmm11
  37283. ; ENTRY: 7
  37284. mov r9, QWORD PTR [rdx+56]
  37285. add r9, 64
  37286. movdqu xmm12, xmm13
  37287. pcmpeqd xmm12, xmm10
  37288. movdqu xmm0, [r9]
  37289. movdqu xmm1, [r9+16]
  37290. movdqu xmm2, [r9+32]
  37291. movdqu xmm3, [r9+48]
  37292. pand xmm0, xmm12
  37293. pand xmm1, xmm12
  37294. pand xmm2, xmm12
  37295. pand xmm3, xmm12
  37296. por xmm4, xmm0
  37297. por xmm5, xmm1
  37298. por xmm6, xmm2
  37299. por xmm7, xmm3
  37300. paddd xmm13, xmm11
  37301. ; ENTRY: 8
  37302. mov r9, QWORD PTR [rdx+64]
  37303. add r9, 64
  37304. movdqu xmm12, xmm13
  37305. pcmpeqd xmm12, xmm10
  37306. movdqu xmm0, [r9]
  37307. movdqu xmm1, [r9+16]
  37308. movdqu xmm2, [r9+32]
  37309. movdqu xmm3, [r9+48]
  37310. pand xmm0, xmm12
  37311. pand xmm1, xmm12
  37312. pand xmm2, xmm12
  37313. pand xmm3, xmm12
  37314. por xmm4, xmm0
  37315. por xmm5, xmm1
  37316. por xmm6, xmm2
  37317. por xmm7, xmm3
  37318. paddd xmm13, xmm11
  37319. ; ENTRY: 9
  37320. mov r9, QWORD PTR [rdx+72]
  37321. add r9, 64
  37322. movdqu xmm12, xmm13
  37323. pcmpeqd xmm12, xmm10
  37324. movdqu xmm0, [r9]
  37325. movdqu xmm1, [r9+16]
  37326. movdqu xmm2, [r9+32]
  37327. movdqu xmm3, [r9+48]
  37328. pand xmm0, xmm12
  37329. pand xmm1, xmm12
  37330. pand xmm2, xmm12
  37331. pand xmm3, xmm12
  37332. por xmm4, xmm0
  37333. por xmm5, xmm1
  37334. por xmm6, xmm2
  37335. por xmm7, xmm3
  37336. paddd xmm13, xmm11
  37337. ; ENTRY: 10
  37338. mov r9, QWORD PTR [rdx+80]
  37339. add r9, 64
  37340. movdqu xmm12, xmm13
  37341. pcmpeqd xmm12, xmm10
  37342. movdqu xmm0, [r9]
  37343. movdqu xmm1, [r9+16]
  37344. movdqu xmm2, [r9+32]
  37345. movdqu xmm3, [r9+48]
  37346. pand xmm0, xmm12
  37347. pand xmm1, xmm12
  37348. pand xmm2, xmm12
  37349. pand xmm3, xmm12
  37350. por xmm4, xmm0
  37351. por xmm5, xmm1
  37352. por xmm6, xmm2
  37353. por xmm7, xmm3
  37354. paddd xmm13, xmm11
  37355. ; ENTRY: 11
  37356. mov r9, QWORD PTR [rdx+88]
  37357. add r9, 64
  37358. movdqu xmm12, xmm13
  37359. pcmpeqd xmm12, xmm10
  37360. movdqu xmm0, [r9]
  37361. movdqu xmm1, [r9+16]
  37362. movdqu xmm2, [r9+32]
  37363. movdqu xmm3, [r9+48]
  37364. pand xmm0, xmm12
  37365. pand xmm1, xmm12
  37366. pand xmm2, xmm12
  37367. pand xmm3, xmm12
  37368. por xmm4, xmm0
  37369. por xmm5, xmm1
  37370. por xmm6, xmm2
  37371. por xmm7, xmm3
  37372. paddd xmm13, xmm11
  37373. ; ENTRY: 12
  37374. mov r9, QWORD PTR [rdx+96]
  37375. add r9, 64
  37376. movdqu xmm12, xmm13
  37377. pcmpeqd xmm12, xmm10
  37378. movdqu xmm0, [r9]
  37379. movdqu xmm1, [r9+16]
  37380. movdqu xmm2, [r9+32]
  37381. movdqu xmm3, [r9+48]
  37382. pand xmm0, xmm12
  37383. pand xmm1, xmm12
  37384. pand xmm2, xmm12
  37385. pand xmm3, xmm12
  37386. por xmm4, xmm0
  37387. por xmm5, xmm1
  37388. por xmm6, xmm2
  37389. por xmm7, xmm3
  37390. paddd xmm13, xmm11
  37391. ; ENTRY: 13
  37392. mov r9, QWORD PTR [rdx+104]
  37393. add r9, 64
  37394. movdqu xmm12, xmm13
  37395. pcmpeqd xmm12, xmm10
  37396. movdqu xmm0, [r9]
  37397. movdqu xmm1, [r9+16]
  37398. movdqu xmm2, [r9+32]
  37399. movdqu xmm3, [r9+48]
  37400. pand xmm0, xmm12
  37401. pand xmm1, xmm12
  37402. pand xmm2, xmm12
  37403. pand xmm3, xmm12
  37404. por xmm4, xmm0
  37405. por xmm5, xmm1
  37406. por xmm6, xmm2
  37407. por xmm7, xmm3
  37408. paddd xmm13, xmm11
  37409. ; ENTRY: 14
  37410. mov r9, QWORD PTR [rdx+112]
  37411. add r9, 64
  37412. movdqu xmm12, xmm13
  37413. pcmpeqd xmm12, xmm10
  37414. movdqu xmm0, [r9]
  37415. movdqu xmm1, [r9+16]
  37416. movdqu xmm2, [r9+32]
  37417. movdqu xmm3, [r9+48]
  37418. pand xmm0, xmm12
  37419. pand xmm1, xmm12
  37420. pand xmm2, xmm12
  37421. pand xmm3, xmm12
  37422. por xmm4, xmm0
  37423. por xmm5, xmm1
  37424. por xmm6, xmm2
  37425. por xmm7, xmm3
  37426. paddd xmm13, xmm11
  37427. ; ENTRY: 15
  37428. mov r9, QWORD PTR [rdx+120]
  37429. add r9, 64
  37430. movdqu xmm12, xmm13
  37431. pcmpeqd xmm12, xmm10
  37432. movdqu xmm0, [r9]
  37433. movdqu xmm1, [r9+16]
  37434. movdqu xmm2, [r9+32]
  37435. movdqu xmm3, [r9+48]
  37436. pand xmm0, xmm12
  37437. pand xmm1, xmm12
  37438. pand xmm2, xmm12
  37439. pand xmm3, xmm12
  37440. por xmm4, xmm0
  37441. por xmm5, xmm1
  37442. por xmm6, xmm2
  37443. por xmm7, xmm3
  37444. paddd xmm13, xmm11
  37445. movdqu [rcx], xmm4
  37446. movdqu [rcx+16], xmm5
  37447. movdqu [rcx+32], xmm6
  37448. movdqu [rcx+48], xmm7
  37449. add rcx, 64
  37450. ; END: 8-15
  37451. ; START: 16-23
  37452. pxor xmm13, xmm13
  37453. pxor xmm4, xmm4
  37454. pxor xmm5, xmm5
  37455. pxor xmm6, xmm6
  37456. pxor xmm7, xmm7
  37457. ; ENTRY: 0
  37458. mov r9, QWORD PTR [rdx]
  37459. add r9, 128
  37460. movdqu xmm12, xmm13
  37461. pcmpeqd xmm12, xmm10
  37462. movdqu xmm0, [r9]
  37463. movdqu xmm1, [r9+16]
  37464. movdqu xmm2, [r9+32]
  37465. movdqu xmm3, [r9+48]
  37466. pand xmm0, xmm12
  37467. pand xmm1, xmm12
  37468. pand xmm2, xmm12
  37469. pand xmm3, xmm12
  37470. por xmm4, xmm0
  37471. por xmm5, xmm1
  37472. por xmm6, xmm2
  37473. por xmm7, xmm3
  37474. paddd xmm13, xmm11
  37475. ; ENTRY: 1
  37476. mov r9, QWORD PTR [rdx+8]
  37477. add r9, 128
  37478. movdqu xmm12, xmm13
  37479. pcmpeqd xmm12, xmm10
  37480. movdqu xmm0, [r9]
  37481. movdqu xmm1, [r9+16]
  37482. movdqu xmm2, [r9+32]
  37483. movdqu xmm3, [r9+48]
  37484. pand xmm0, xmm12
  37485. pand xmm1, xmm12
  37486. pand xmm2, xmm12
  37487. pand xmm3, xmm12
  37488. por xmm4, xmm0
  37489. por xmm5, xmm1
  37490. por xmm6, xmm2
  37491. por xmm7, xmm3
  37492. paddd xmm13, xmm11
  37493. ; ENTRY: 2
  37494. mov r9, QWORD PTR [rdx+16]
  37495. add r9, 128
  37496. movdqu xmm12, xmm13
  37497. pcmpeqd xmm12, xmm10
  37498. movdqu xmm0, [r9]
  37499. movdqu xmm1, [r9+16]
  37500. movdqu xmm2, [r9+32]
  37501. movdqu xmm3, [r9+48]
  37502. pand xmm0, xmm12
  37503. pand xmm1, xmm12
  37504. pand xmm2, xmm12
  37505. pand xmm3, xmm12
  37506. por xmm4, xmm0
  37507. por xmm5, xmm1
  37508. por xmm6, xmm2
  37509. por xmm7, xmm3
  37510. paddd xmm13, xmm11
  37511. ; ENTRY: 3
  37512. mov r9, QWORD PTR [rdx+24]
  37513. add r9, 128
  37514. movdqu xmm12, xmm13
  37515. pcmpeqd xmm12, xmm10
  37516. movdqu xmm0, [r9]
  37517. movdqu xmm1, [r9+16]
  37518. movdqu xmm2, [r9+32]
  37519. movdqu xmm3, [r9+48]
  37520. pand xmm0, xmm12
  37521. pand xmm1, xmm12
  37522. pand xmm2, xmm12
  37523. pand xmm3, xmm12
  37524. por xmm4, xmm0
  37525. por xmm5, xmm1
  37526. por xmm6, xmm2
  37527. por xmm7, xmm3
  37528. paddd xmm13, xmm11
  37529. ; ENTRY: 4
  37530. mov r9, QWORD PTR [rdx+32]
  37531. add r9, 128
  37532. movdqu xmm12, xmm13
  37533. pcmpeqd xmm12, xmm10
  37534. movdqu xmm0, [r9]
  37535. movdqu xmm1, [r9+16]
  37536. movdqu xmm2, [r9+32]
  37537. movdqu xmm3, [r9+48]
  37538. pand xmm0, xmm12
  37539. pand xmm1, xmm12
  37540. pand xmm2, xmm12
  37541. pand xmm3, xmm12
  37542. por xmm4, xmm0
  37543. por xmm5, xmm1
  37544. por xmm6, xmm2
  37545. por xmm7, xmm3
  37546. paddd xmm13, xmm11
  37547. ; ENTRY: 5
  37548. mov r9, QWORD PTR [rdx+40]
  37549. add r9, 128
  37550. movdqu xmm12, xmm13
  37551. pcmpeqd xmm12, xmm10
  37552. movdqu xmm0, [r9]
  37553. movdqu xmm1, [r9+16]
  37554. movdqu xmm2, [r9+32]
  37555. movdqu xmm3, [r9+48]
  37556. pand xmm0, xmm12
  37557. pand xmm1, xmm12
  37558. pand xmm2, xmm12
  37559. pand xmm3, xmm12
  37560. por xmm4, xmm0
  37561. por xmm5, xmm1
  37562. por xmm6, xmm2
  37563. por xmm7, xmm3
  37564. paddd xmm13, xmm11
  37565. ; ENTRY: 6
  37566. mov r9, QWORD PTR [rdx+48]
  37567. add r9, 128
  37568. movdqu xmm12, xmm13
  37569. pcmpeqd xmm12, xmm10
  37570. movdqu xmm0, [r9]
  37571. movdqu xmm1, [r9+16]
  37572. movdqu xmm2, [r9+32]
  37573. movdqu xmm3, [r9+48]
  37574. pand xmm0, xmm12
  37575. pand xmm1, xmm12
  37576. pand xmm2, xmm12
  37577. pand xmm3, xmm12
  37578. por xmm4, xmm0
  37579. por xmm5, xmm1
  37580. por xmm6, xmm2
  37581. por xmm7, xmm3
  37582. paddd xmm13, xmm11
  37583. ; ENTRY: 7
  37584. mov r9, QWORD PTR [rdx+56]
  37585. add r9, 128
  37586. movdqu xmm12, xmm13
  37587. pcmpeqd xmm12, xmm10
  37588. movdqu xmm0, [r9]
  37589. movdqu xmm1, [r9+16]
  37590. movdqu xmm2, [r9+32]
  37591. movdqu xmm3, [r9+48]
  37592. pand xmm0, xmm12
  37593. pand xmm1, xmm12
  37594. pand xmm2, xmm12
  37595. pand xmm3, xmm12
  37596. por xmm4, xmm0
  37597. por xmm5, xmm1
  37598. por xmm6, xmm2
  37599. por xmm7, xmm3
  37600. paddd xmm13, xmm11
  37601. ; ENTRY: 8
  37602. mov r9, QWORD PTR [rdx+64]
  37603. add r9, 128
  37604. movdqu xmm12, xmm13
  37605. pcmpeqd xmm12, xmm10
  37606. movdqu xmm0, [r9]
  37607. movdqu xmm1, [r9+16]
  37608. movdqu xmm2, [r9+32]
  37609. movdqu xmm3, [r9+48]
  37610. pand xmm0, xmm12
  37611. pand xmm1, xmm12
  37612. pand xmm2, xmm12
  37613. pand xmm3, xmm12
  37614. por xmm4, xmm0
  37615. por xmm5, xmm1
  37616. por xmm6, xmm2
  37617. por xmm7, xmm3
  37618. paddd xmm13, xmm11
  37619. ; ENTRY: 9
  37620. mov r9, QWORD PTR [rdx+72]
  37621. add r9, 128
  37622. movdqu xmm12, xmm13
  37623. pcmpeqd xmm12, xmm10
  37624. movdqu xmm0, [r9]
  37625. movdqu xmm1, [r9+16]
  37626. movdqu xmm2, [r9+32]
  37627. movdqu xmm3, [r9+48]
  37628. pand xmm0, xmm12
  37629. pand xmm1, xmm12
  37630. pand xmm2, xmm12
  37631. pand xmm3, xmm12
  37632. por xmm4, xmm0
  37633. por xmm5, xmm1
  37634. por xmm6, xmm2
  37635. por xmm7, xmm3
  37636. paddd xmm13, xmm11
  37637. ; ENTRY: 10
  37638. mov r9, QWORD PTR [rdx+80]
  37639. add r9, 128
  37640. movdqu xmm12, xmm13
  37641. pcmpeqd xmm12, xmm10
  37642. movdqu xmm0, [r9]
  37643. movdqu xmm1, [r9+16]
  37644. movdqu xmm2, [r9+32]
  37645. movdqu xmm3, [r9+48]
  37646. pand xmm0, xmm12
  37647. pand xmm1, xmm12
  37648. pand xmm2, xmm12
  37649. pand xmm3, xmm12
  37650. por xmm4, xmm0
  37651. por xmm5, xmm1
  37652. por xmm6, xmm2
  37653. por xmm7, xmm3
  37654. paddd xmm13, xmm11
  37655. ; ENTRY: 11
  37656. mov r9, QWORD PTR [rdx+88]
  37657. add r9, 128
  37658. movdqu xmm12, xmm13
  37659. pcmpeqd xmm12, xmm10
  37660. movdqu xmm0, [r9]
  37661. movdqu xmm1, [r9+16]
  37662. movdqu xmm2, [r9+32]
  37663. movdqu xmm3, [r9+48]
  37664. pand xmm0, xmm12
  37665. pand xmm1, xmm12
  37666. pand xmm2, xmm12
  37667. pand xmm3, xmm12
  37668. por xmm4, xmm0
  37669. por xmm5, xmm1
  37670. por xmm6, xmm2
  37671. por xmm7, xmm3
  37672. paddd xmm13, xmm11
  37673. ; ENTRY: 12
  37674. mov r9, QWORD PTR [rdx+96]
  37675. add r9, 128
  37676. movdqu xmm12, xmm13
  37677. pcmpeqd xmm12, xmm10
  37678. movdqu xmm0, [r9]
  37679. movdqu xmm1, [r9+16]
  37680. movdqu xmm2, [r9+32]
  37681. movdqu xmm3, [r9+48]
  37682. pand xmm0, xmm12
  37683. pand xmm1, xmm12
  37684. pand xmm2, xmm12
  37685. pand xmm3, xmm12
  37686. por xmm4, xmm0
  37687. por xmm5, xmm1
  37688. por xmm6, xmm2
  37689. por xmm7, xmm3
  37690. paddd xmm13, xmm11
  37691. ; ENTRY: 13
  37692. mov r9, QWORD PTR [rdx+104]
  37693. add r9, 128
  37694. movdqu xmm12, xmm13
  37695. pcmpeqd xmm12, xmm10
  37696. movdqu xmm0, [r9]
  37697. movdqu xmm1, [r9+16]
  37698. movdqu xmm2, [r9+32]
  37699. movdqu xmm3, [r9+48]
  37700. pand xmm0, xmm12
  37701. pand xmm1, xmm12
  37702. pand xmm2, xmm12
  37703. pand xmm3, xmm12
  37704. por xmm4, xmm0
  37705. por xmm5, xmm1
  37706. por xmm6, xmm2
  37707. por xmm7, xmm3
  37708. paddd xmm13, xmm11
  37709. ; ENTRY: 14
  37710. mov r9, QWORD PTR [rdx+112]
  37711. add r9, 128
  37712. movdqu xmm12, xmm13
  37713. pcmpeqd xmm12, xmm10
  37714. movdqu xmm0, [r9]
  37715. movdqu xmm1, [r9+16]
  37716. movdqu xmm2, [r9+32]
  37717. movdqu xmm3, [r9+48]
  37718. pand xmm0, xmm12
  37719. pand xmm1, xmm12
  37720. pand xmm2, xmm12
  37721. pand xmm3, xmm12
  37722. por xmm4, xmm0
  37723. por xmm5, xmm1
  37724. por xmm6, xmm2
  37725. por xmm7, xmm3
  37726. paddd xmm13, xmm11
  37727. ; ENTRY: 15
  37728. mov r9, QWORD PTR [rdx+120]
  37729. add r9, 128
  37730. movdqu xmm12, xmm13
  37731. pcmpeqd xmm12, xmm10
  37732. movdqu xmm0, [r9]
  37733. movdqu xmm1, [r9+16]
  37734. movdqu xmm2, [r9+32]
  37735. movdqu xmm3, [r9+48]
  37736. pand xmm0, xmm12
  37737. pand xmm1, xmm12
  37738. pand xmm2, xmm12
  37739. pand xmm3, xmm12
  37740. por xmm4, xmm0
  37741. por xmm5, xmm1
  37742. por xmm6, xmm2
  37743. por xmm7, xmm3
  37744. paddd xmm13, xmm11
  37745. movdqu [rcx], xmm4
  37746. movdqu [rcx+16], xmm5
  37747. movdqu [rcx+32], xmm6
  37748. movdqu [rcx+48], xmm7
  37749. add rcx, 64
  37750. ; END: 16-23
  37751. ; START: 24-31
  37752. pxor xmm13, xmm13
  37753. pxor xmm4, xmm4
  37754. pxor xmm5, xmm5
  37755. pxor xmm6, xmm6
  37756. pxor xmm7, xmm7
  37757. ; ENTRY: 0
  37758. mov r9, QWORD PTR [rdx]
  37759. add r9, 192
  37760. movdqu xmm12, xmm13
  37761. pcmpeqd xmm12, xmm10
  37762. movdqu xmm0, [r9]
  37763. movdqu xmm1, [r9+16]
  37764. movdqu xmm2, [r9+32]
  37765. movdqu xmm3, [r9+48]
  37766. pand xmm0, xmm12
  37767. pand xmm1, xmm12
  37768. pand xmm2, xmm12
  37769. pand xmm3, xmm12
  37770. por xmm4, xmm0
  37771. por xmm5, xmm1
  37772. por xmm6, xmm2
  37773. por xmm7, xmm3
  37774. paddd xmm13, xmm11
  37775. ; ENTRY: 1
  37776. mov r9, QWORD PTR [rdx+8]
  37777. add r9, 192
  37778. movdqu xmm12, xmm13
  37779. pcmpeqd xmm12, xmm10
  37780. movdqu xmm0, [r9]
  37781. movdqu xmm1, [r9+16]
  37782. movdqu xmm2, [r9+32]
  37783. movdqu xmm3, [r9+48]
  37784. pand xmm0, xmm12
  37785. pand xmm1, xmm12
  37786. pand xmm2, xmm12
  37787. pand xmm3, xmm12
  37788. por xmm4, xmm0
  37789. por xmm5, xmm1
  37790. por xmm6, xmm2
  37791. por xmm7, xmm3
  37792. paddd xmm13, xmm11
  37793. ; ENTRY: 2
  37794. mov r9, QWORD PTR [rdx+16]
  37795. add r9, 192
  37796. movdqu xmm12, xmm13
  37797. pcmpeqd xmm12, xmm10
  37798. movdqu xmm0, [r9]
  37799. movdqu xmm1, [r9+16]
  37800. movdqu xmm2, [r9+32]
  37801. movdqu xmm3, [r9+48]
  37802. pand xmm0, xmm12
  37803. pand xmm1, xmm12
  37804. pand xmm2, xmm12
  37805. pand xmm3, xmm12
  37806. por xmm4, xmm0
  37807. por xmm5, xmm1
  37808. por xmm6, xmm2
  37809. por xmm7, xmm3
  37810. paddd xmm13, xmm11
  37811. ; ENTRY: 3
  37812. mov r9, QWORD PTR [rdx+24]
  37813. add r9, 192
  37814. movdqu xmm12, xmm13
  37815. pcmpeqd xmm12, xmm10
  37816. movdqu xmm0, [r9]
  37817. movdqu xmm1, [r9+16]
  37818. movdqu xmm2, [r9+32]
  37819. movdqu xmm3, [r9+48]
  37820. pand xmm0, xmm12
  37821. pand xmm1, xmm12
  37822. pand xmm2, xmm12
  37823. pand xmm3, xmm12
  37824. por xmm4, xmm0
  37825. por xmm5, xmm1
  37826. por xmm6, xmm2
  37827. por xmm7, xmm3
  37828. paddd xmm13, xmm11
  37829. ; ENTRY: 4
  37830. mov r9, QWORD PTR [rdx+32]
  37831. add r9, 192
  37832. movdqu xmm12, xmm13
  37833. pcmpeqd xmm12, xmm10
  37834. movdqu xmm0, [r9]
  37835. movdqu xmm1, [r9+16]
  37836. movdqu xmm2, [r9+32]
  37837. movdqu xmm3, [r9+48]
  37838. pand xmm0, xmm12
  37839. pand xmm1, xmm12
  37840. pand xmm2, xmm12
  37841. pand xmm3, xmm12
  37842. por xmm4, xmm0
  37843. por xmm5, xmm1
  37844. por xmm6, xmm2
  37845. por xmm7, xmm3
  37846. paddd xmm13, xmm11
  37847. ; ENTRY: 5
  37848. mov r9, QWORD PTR [rdx+40]
  37849. add r9, 192
  37850. movdqu xmm12, xmm13
  37851. pcmpeqd xmm12, xmm10
  37852. movdqu xmm0, [r9]
  37853. movdqu xmm1, [r9+16]
  37854. movdqu xmm2, [r9+32]
  37855. movdqu xmm3, [r9+48]
  37856. pand xmm0, xmm12
  37857. pand xmm1, xmm12
  37858. pand xmm2, xmm12
  37859. pand xmm3, xmm12
  37860. por xmm4, xmm0
  37861. por xmm5, xmm1
  37862. por xmm6, xmm2
  37863. por xmm7, xmm3
  37864. paddd xmm13, xmm11
  37865. ; ENTRY: 6
  37866. mov r9, QWORD PTR [rdx+48]
  37867. add r9, 192
  37868. movdqu xmm12, xmm13
  37869. pcmpeqd xmm12, xmm10
  37870. movdqu xmm0, [r9]
  37871. movdqu xmm1, [r9+16]
  37872. movdqu xmm2, [r9+32]
  37873. movdqu xmm3, [r9+48]
  37874. pand xmm0, xmm12
  37875. pand xmm1, xmm12
  37876. pand xmm2, xmm12
  37877. pand xmm3, xmm12
  37878. por xmm4, xmm0
  37879. por xmm5, xmm1
  37880. por xmm6, xmm2
  37881. por xmm7, xmm3
  37882. paddd xmm13, xmm11
  37883. ; ENTRY: 7
  37884. mov r9, QWORD PTR [rdx+56]
  37885. add r9, 192
  37886. movdqu xmm12, xmm13
  37887. pcmpeqd xmm12, xmm10
  37888. movdqu xmm0, [r9]
  37889. movdqu xmm1, [r9+16]
  37890. movdqu xmm2, [r9+32]
  37891. movdqu xmm3, [r9+48]
  37892. pand xmm0, xmm12
  37893. pand xmm1, xmm12
  37894. pand xmm2, xmm12
  37895. pand xmm3, xmm12
  37896. por xmm4, xmm0
  37897. por xmm5, xmm1
  37898. por xmm6, xmm2
  37899. por xmm7, xmm3
  37900. paddd xmm13, xmm11
  37901. ; ENTRY: 8
  37902. mov r9, QWORD PTR [rdx+64]
  37903. add r9, 192
  37904. movdqu xmm12, xmm13
  37905. pcmpeqd xmm12, xmm10
  37906. movdqu xmm0, [r9]
  37907. movdqu xmm1, [r9+16]
  37908. movdqu xmm2, [r9+32]
  37909. movdqu xmm3, [r9+48]
  37910. pand xmm0, xmm12
  37911. pand xmm1, xmm12
  37912. pand xmm2, xmm12
  37913. pand xmm3, xmm12
  37914. por xmm4, xmm0
  37915. por xmm5, xmm1
  37916. por xmm6, xmm2
  37917. por xmm7, xmm3
  37918. paddd xmm13, xmm11
  37919. ; ENTRY: 9
  37920. mov r9, QWORD PTR [rdx+72]
  37921. add r9, 192
  37922. movdqu xmm12, xmm13
  37923. pcmpeqd xmm12, xmm10
  37924. movdqu xmm0, [r9]
  37925. movdqu xmm1, [r9+16]
  37926. movdqu xmm2, [r9+32]
  37927. movdqu xmm3, [r9+48]
  37928. pand xmm0, xmm12
  37929. pand xmm1, xmm12
  37930. pand xmm2, xmm12
  37931. pand xmm3, xmm12
  37932. por xmm4, xmm0
  37933. por xmm5, xmm1
  37934. por xmm6, xmm2
  37935. por xmm7, xmm3
  37936. paddd xmm13, xmm11
  37937. ; ENTRY: 10
  37938. mov r9, QWORD PTR [rdx+80]
  37939. add r9, 192
  37940. movdqu xmm12, xmm13
  37941. pcmpeqd xmm12, xmm10
  37942. movdqu xmm0, [r9]
  37943. movdqu xmm1, [r9+16]
  37944. movdqu xmm2, [r9+32]
  37945. movdqu xmm3, [r9+48]
  37946. pand xmm0, xmm12
  37947. pand xmm1, xmm12
  37948. pand xmm2, xmm12
  37949. pand xmm3, xmm12
  37950. por xmm4, xmm0
  37951. por xmm5, xmm1
  37952. por xmm6, xmm2
  37953. por xmm7, xmm3
  37954. paddd xmm13, xmm11
  37955. ; ENTRY: 11
  37956. mov r9, QWORD PTR [rdx+88]
  37957. add r9, 192
  37958. movdqu xmm12, xmm13
  37959. pcmpeqd xmm12, xmm10
  37960. movdqu xmm0, [r9]
  37961. movdqu xmm1, [r9+16]
  37962. movdqu xmm2, [r9+32]
  37963. movdqu xmm3, [r9+48]
  37964. pand xmm0, xmm12
  37965. pand xmm1, xmm12
  37966. pand xmm2, xmm12
  37967. pand xmm3, xmm12
  37968. por xmm4, xmm0
  37969. por xmm5, xmm1
  37970. por xmm6, xmm2
  37971. por xmm7, xmm3
  37972. paddd xmm13, xmm11
  37973. ; ENTRY: 12
  37974. mov r9, QWORD PTR [rdx+96]
  37975. add r9, 192
  37976. movdqu xmm12, xmm13
  37977. pcmpeqd xmm12, xmm10
  37978. movdqu xmm0, [r9]
  37979. movdqu xmm1, [r9+16]
  37980. movdqu xmm2, [r9+32]
  37981. movdqu xmm3, [r9+48]
  37982. pand xmm0, xmm12
  37983. pand xmm1, xmm12
  37984. pand xmm2, xmm12
  37985. pand xmm3, xmm12
  37986. por xmm4, xmm0
  37987. por xmm5, xmm1
  37988. por xmm6, xmm2
  37989. por xmm7, xmm3
  37990. paddd xmm13, xmm11
  37991. ; ENTRY: 13
  37992. mov r9, QWORD PTR [rdx+104]
  37993. add r9, 192
  37994. movdqu xmm12, xmm13
  37995. pcmpeqd xmm12, xmm10
  37996. movdqu xmm0, [r9]
  37997. movdqu xmm1, [r9+16]
  37998. movdqu xmm2, [r9+32]
  37999. movdqu xmm3, [r9+48]
  38000. pand xmm0, xmm12
  38001. pand xmm1, xmm12
  38002. pand xmm2, xmm12
  38003. pand xmm3, xmm12
  38004. por xmm4, xmm0
  38005. por xmm5, xmm1
  38006. por xmm6, xmm2
  38007. por xmm7, xmm3
  38008. paddd xmm13, xmm11
  38009. ; ENTRY: 14
  38010. mov r9, QWORD PTR [rdx+112]
  38011. add r9, 192
  38012. movdqu xmm12, xmm13
  38013. pcmpeqd xmm12, xmm10
  38014. movdqu xmm0, [r9]
  38015. movdqu xmm1, [r9+16]
  38016. movdqu xmm2, [r9+32]
  38017. movdqu xmm3, [r9+48]
  38018. pand xmm0, xmm12
  38019. pand xmm1, xmm12
  38020. pand xmm2, xmm12
  38021. pand xmm3, xmm12
  38022. por xmm4, xmm0
  38023. por xmm5, xmm1
  38024. por xmm6, xmm2
  38025. por xmm7, xmm3
  38026. paddd xmm13, xmm11
  38027. ; ENTRY: 15
  38028. mov r9, QWORD PTR [rdx+120]
  38029. add r9, 192
  38030. movdqu xmm12, xmm13
  38031. pcmpeqd xmm12, xmm10
  38032. movdqu xmm0, [r9]
  38033. movdqu xmm1, [r9+16]
  38034. movdqu xmm2, [r9+32]
  38035. movdqu xmm3, [r9+48]
  38036. pand xmm0, xmm12
  38037. pand xmm1, xmm12
  38038. pand xmm2, xmm12
  38039. pand xmm3, xmm12
  38040. por xmm4, xmm0
  38041. por xmm5, xmm1
  38042. por xmm6, xmm2
  38043. por xmm7, xmm3
  38044. paddd xmm13, xmm11
  38045. movdqu [rcx], xmm4
  38046. movdqu [rcx+16], xmm5
  38047. movdqu [rcx+32], xmm6
  38048. movdqu [rcx+48], xmm7
  38049. add rcx, 64
  38050. ; END: 24-31
  38051. ; START: 32-39
  38052. pxor xmm13, xmm13
  38053. pxor xmm4, xmm4
  38054. pxor xmm5, xmm5
  38055. pxor xmm6, xmm6
  38056. pxor xmm7, xmm7
  38057. ; ENTRY: 0
  38058. mov r9, QWORD PTR [rdx]
  38059. add r9, 256
  38060. movdqu xmm12, xmm13
  38061. pcmpeqd xmm12, xmm10
  38062. movdqu xmm0, [r9]
  38063. movdqu xmm1, [r9+16]
  38064. movdqu xmm2, [r9+32]
  38065. movdqu xmm3, [r9+48]
  38066. pand xmm0, xmm12
  38067. pand xmm1, xmm12
  38068. pand xmm2, xmm12
  38069. pand xmm3, xmm12
  38070. por xmm4, xmm0
  38071. por xmm5, xmm1
  38072. por xmm6, xmm2
  38073. por xmm7, xmm3
  38074. paddd xmm13, xmm11
  38075. ; ENTRY: 1
  38076. mov r9, QWORD PTR [rdx+8]
  38077. add r9, 256
  38078. movdqu xmm12, xmm13
  38079. pcmpeqd xmm12, xmm10
  38080. movdqu xmm0, [r9]
  38081. movdqu xmm1, [r9+16]
  38082. movdqu xmm2, [r9+32]
  38083. movdqu xmm3, [r9+48]
  38084. pand xmm0, xmm12
  38085. pand xmm1, xmm12
  38086. pand xmm2, xmm12
  38087. pand xmm3, xmm12
  38088. por xmm4, xmm0
  38089. por xmm5, xmm1
  38090. por xmm6, xmm2
  38091. por xmm7, xmm3
  38092. paddd xmm13, xmm11
  38093. ; ENTRY: 2
  38094. mov r9, QWORD PTR [rdx+16]
  38095. add r9, 256
  38096. movdqu xmm12, xmm13
  38097. pcmpeqd xmm12, xmm10
  38098. movdqu xmm0, [r9]
  38099. movdqu xmm1, [r9+16]
  38100. movdqu xmm2, [r9+32]
  38101. movdqu xmm3, [r9+48]
  38102. pand xmm0, xmm12
  38103. pand xmm1, xmm12
  38104. pand xmm2, xmm12
  38105. pand xmm3, xmm12
  38106. por xmm4, xmm0
  38107. por xmm5, xmm1
  38108. por xmm6, xmm2
  38109. por xmm7, xmm3
  38110. paddd xmm13, xmm11
  38111. ; ENTRY: 3
  38112. mov r9, QWORD PTR [rdx+24]
  38113. add r9, 256
  38114. movdqu xmm12, xmm13
  38115. pcmpeqd xmm12, xmm10
  38116. movdqu xmm0, [r9]
  38117. movdqu xmm1, [r9+16]
  38118. movdqu xmm2, [r9+32]
  38119. movdqu xmm3, [r9+48]
  38120. pand xmm0, xmm12
  38121. pand xmm1, xmm12
  38122. pand xmm2, xmm12
  38123. pand xmm3, xmm12
  38124. por xmm4, xmm0
  38125. por xmm5, xmm1
  38126. por xmm6, xmm2
  38127. por xmm7, xmm3
  38128. paddd xmm13, xmm11
  38129. ; ENTRY: 4
  38130. mov r9, QWORD PTR [rdx+32]
  38131. add r9, 256
  38132. movdqu xmm12, xmm13
  38133. pcmpeqd xmm12, xmm10
  38134. movdqu xmm0, [r9]
  38135. movdqu xmm1, [r9+16]
  38136. movdqu xmm2, [r9+32]
  38137. movdqu xmm3, [r9+48]
  38138. pand xmm0, xmm12
  38139. pand xmm1, xmm12
  38140. pand xmm2, xmm12
  38141. pand xmm3, xmm12
  38142. por xmm4, xmm0
  38143. por xmm5, xmm1
  38144. por xmm6, xmm2
  38145. por xmm7, xmm3
  38146. paddd xmm13, xmm11
  38147. ; ENTRY: 5
  38148. mov r9, QWORD PTR [rdx+40]
  38149. add r9, 256
  38150. movdqu xmm12, xmm13
  38151. pcmpeqd xmm12, xmm10
  38152. movdqu xmm0, [r9]
  38153. movdqu xmm1, [r9+16]
  38154. movdqu xmm2, [r9+32]
  38155. movdqu xmm3, [r9+48]
  38156. pand xmm0, xmm12
  38157. pand xmm1, xmm12
  38158. pand xmm2, xmm12
  38159. pand xmm3, xmm12
  38160. por xmm4, xmm0
  38161. por xmm5, xmm1
  38162. por xmm6, xmm2
  38163. por xmm7, xmm3
  38164. paddd xmm13, xmm11
  38165. ; ENTRY: 6
  38166. mov r9, QWORD PTR [rdx+48]
  38167. add r9, 256
  38168. movdqu xmm12, xmm13
  38169. pcmpeqd xmm12, xmm10
  38170. movdqu xmm0, [r9]
  38171. movdqu xmm1, [r9+16]
  38172. movdqu xmm2, [r9+32]
  38173. movdqu xmm3, [r9+48]
  38174. pand xmm0, xmm12
  38175. pand xmm1, xmm12
  38176. pand xmm2, xmm12
  38177. pand xmm3, xmm12
  38178. por xmm4, xmm0
  38179. por xmm5, xmm1
  38180. por xmm6, xmm2
  38181. por xmm7, xmm3
  38182. paddd xmm13, xmm11
  38183. ; ENTRY: 7
  38184. mov r9, QWORD PTR [rdx+56]
  38185. add r9, 256
  38186. movdqu xmm12, xmm13
  38187. pcmpeqd xmm12, xmm10
  38188. movdqu xmm0, [r9]
  38189. movdqu xmm1, [r9+16]
  38190. movdqu xmm2, [r9+32]
  38191. movdqu xmm3, [r9+48]
  38192. pand xmm0, xmm12
  38193. pand xmm1, xmm12
  38194. pand xmm2, xmm12
  38195. pand xmm3, xmm12
  38196. por xmm4, xmm0
  38197. por xmm5, xmm1
  38198. por xmm6, xmm2
  38199. por xmm7, xmm3
  38200. paddd xmm13, xmm11
  38201. ; ENTRY: 8
  38202. mov r9, QWORD PTR [rdx+64]
  38203. add r9, 256
  38204. movdqu xmm12, xmm13
  38205. pcmpeqd xmm12, xmm10
  38206. movdqu xmm0, [r9]
  38207. movdqu xmm1, [r9+16]
  38208. movdqu xmm2, [r9+32]
  38209. movdqu xmm3, [r9+48]
  38210. pand xmm0, xmm12
  38211. pand xmm1, xmm12
  38212. pand xmm2, xmm12
  38213. pand xmm3, xmm12
  38214. por xmm4, xmm0
  38215. por xmm5, xmm1
  38216. por xmm6, xmm2
  38217. por xmm7, xmm3
  38218. paddd xmm13, xmm11
  38219. ; ENTRY: 9
  38220. mov r9, QWORD PTR [rdx+72]
  38221. add r9, 256
  38222. movdqu xmm12, xmm13
  38223. pcmpeqd xmm12, xmm10
  38224. movdqu xmm0, [r9]
  38225. movdqu xmm1, [r9+16]
  38226. movdqu xmm2, [r9+32]
  38227. movdqu xmm3, [r9+48]
  38228. pand xmm0, xmm12
  38229. pand xmm1, xmm12
  38230. pand xmm2, xmm12
  38231. pand xmm3, xmm12
  38232. por xmm4, xmm0
  38233. por xmm5, xmm1
  38234. por xmm6, xmm2
  38235. por xmm7, xmm3
  38236. paddd xmm13, xmm11
  38237. ; ENTRY: 10
  38238. mov r9, QWORD PTR [rdx+80]
  38239. add r9, 256
  38240. movdqu xmm12, xmm13
  38241. pcmpeqd xmm12, xmm10
  38242. movdqu xmm0, [r9]
  38243. movdqu xmm1, [r9+16]
  38244. movdqu xmm2, [r9+32]
  38245. movdqu xmm3, [r9+48]
  38246. pand xmm0, xmm12
  38247. pand xmm1, xmm12
  38248. pand xmm2, xmm12
  38249. pand xmm3, xmm12
  38250. por xmm4, xmm0
  38251. por xmm5, xmm1
  38252. por xmm6, xmm2
  38253. por xmm7, xmm3
  38254. paddd xmm13, xmm11
  38255. ; ENTRY: 11
  38256. mov r9, QWORD PTR [rdx+88]
  38257. add r9, 256
  38258. movdqu xmm12, xmm13
  38259. pcmpeqd xmm12, xmm10
  38260. movdqu xmm0, [r9]
  38261. movdqu xmm1, [r9+16]
  38262. movdqu xmm2, [r9+32]
  38263. movdqu xmm3, [r9+48]
  38264. pand xmm0, xmm12
  38265. pand xmm1, xmm12
  38266. pand xmm2, xmm12
  38267. pand xmm3, xmm12
  38268. por xmm4, xmm0
  38269. por xmm5, xmm1
  38270. por xmm6, xmm2
  38271. por xmm7, xmm3
  38272. paddd xmm13, xmm11
  38273. ; ENTRY: 12
  38274. mov r9, QWORD PTR [rdx+96]
  38275. add r9, 256
  38276. movdqu xmm12, xmm13
  38277. pcmpeqd xmm12, xmm10
  38278. movdqu xmm0, [r9]
  38279. movdqu xmm1, [r9+16]
  38280. movdqu xmm2, [r9+32]
  38281. movdqu xmm3, [r9+48]
  38282. pand xmm0, xmm12
  38283. pand xmm1, xmm12
  38284. pand xmm2, xmm12
  38285. pand xmm3, xmm12
  38286. por xmm4, xmm0
  38287. por xmm5, xmm1
  38288. por xmm6, xmm2
  38289. por xmm7, xmm3
  38290. paddd xmm13, xmm11
  38291. ; ENTRY: 13
  38292. mov r9, QWORD PTR [rdx+104]
  38293. add r9, 256
  38294. movdqu xmm12, xmm13
  38295. pcmpeqd xmm12, xmm10
  38296. movdqu xmm0, [r9]
  38297. movdqu xmm1, [r9+16]
  38298. movdqu xmm2, [r9+32]
  38299. movdqu xmm3, [r9+48]
  38300. pand xmm0, xmm12
  38301. pand xmm1, xmm12
  38302. pand xmm2, xmm12
  38303. pand xmm3, xmm12
  38304. por xmm4, xmm0
  38305. por xmm5, xmm1
  38306. por xmm6, xmm2
  38307. por xmm7, xmm3
  38308. paddd xmm13, xmm11
  38309. ; ENTRY: 14
  38310. mov r9, QWORD PTR [rdx+112]
  38311. add r9, 256
  38312. movdqu xmm12, xmm13
  38313. pcmpeqd xmm12, xmm10
  38314. movdqu xmm0, [r9]
  38315. movdqu xmm1, [r9+16]
  38316. movdqu xmm2, [r9+32]
  38317. movdqu xmm3, [r9+48]
  38318. pand xmm0, xmm12
  38319. pand xmm1, xmm12
  38320. pand xmm2, xmm12
  38321. pand xmm3, xmm12
  38322. por xmm4, xmm0
  38323. por xmm5, xmm1
  38324. por xmm6, xmm2
  38325. por xmm7, xmm3
  38326. paddd xmm13, xmm11
  38327. ; ENTRY: 15
  38328. mov r9, QWORD PTR [rdx+120]
  38329. add r9, 256
  38330. movdqu xmm12, xmm13
  38331. pcmpeqd xmm12, xmm10
  38332. movdqu xmm0, [r9]
  38333. movdqu xmm1, [r9+16]
  38334. movdqu xmm2, [r9+32]
  38335. movdqu xmm3, [r9+48]
  38336. pand xmm0, xmm12
  38337. pand xmm1, xmm12
  38338. pand xmm2, xmm12
  38339. pand xmm3, xmm12
  38340. por xmm4, xmm0
  38341. por xmm5, xmm1
  38342. por xmm6, xmm2
  38343. por xmm7, xmm3
  38344. paddd xmm13, xmm11
  38345. movdqu [rcx], xmm4
  38346. movdqu [rcx+16], xmm5
  38347. movdqu [rcx+32], xmm6
  38348. movdqu [rcx+48], xmm7
  38349. add rcx, 64
  38350. ; END: 32-39
  38351. ; START: 40-47
  38352. pxor xmm13, xmm13
  38353. pxor xmm4, xmm4
  38354. pxor xmm5, xmm5
  38355. pxor xmm6, xmm6
  38356. pxor xmm7, xmm7
  38357. ; ENTRY: 0
  38358. mov r9, QWORD PTR [rdx]
  38359. add r9, 320
  38360. movdqu xmm12, xmm13
  38361. pcmpeqd xmm12, xmm10
  38362. movdqu xmm0, [r9]
  38363. movdqu xmm1, [r9+16]
  38364. movdqu xmm2, [r9+32]
  38365. movdqu xmm3, [r9+48]
  38366. pand xmm0, xmm12
  38367. pand xmm1, xmm12
  38368. pand xmm2, xmm12
  38369. pand xmm3, xmm12
  38370. por xmm4, xmm0
  38371. por xmm5, xmm1
  38372. por xmm6, xmm2
  38373. por xmm7, xmm3
  38374. paddd xmm13, xmm11
  38375. ; ENTRY: 1
  38376. mov r9, QWORD PTR [rdx+8]
  38377. add r9, 320
  38378. movdqu xmm12, xmm13
  38379. pcmpeqd xmm12, xmm10
  38380. movdqu xmm0, [r9]
  38381. movdqu xmm1, [r9+16]
  38382. movdqu xmm2, [r9+32]
  38383. movdqu xmm3, [r9+48]
  38384. pand xmm0, xmm12
  38385. pand xmm1, xmm12
  38386. pand xmm2, xmm12
  38387. pand xmm3, xmm12
  38388. por xmm4, xmm0
  38389. por xmm5, xmm1
  38390. por xmm6, xmm2
  38391. por xmm7, xmm3
  38392. paddd xmm13, xmm11
  38393. ; ENTRY: 2
  38394. mov r9, QWORD PTR [rdx+16]
  38395. add r9, 320
  38396. movdqu xmm12, xmm13
  38397. pcmpeqd xmm12, xmm10
  38398. movdqu xmm0, [r9]
  38399. movdqu xmm1, [r9+16]
  38400. movdqu xmm2, [r9+32]
  38401. movdqu xmm3, [r9+48]
  38402. pand xmm0, xmm12
  38403. pand xmm1, xmm12
  38404. pand xmm2, xmm12
  38405. pand xmm3, xmm12
  38406. por xmm4, xmm0
  38407. por xmm5, xmm1
  38408. por xmm6, xmm2
  38409. por xmm7, xmm3
  38410. paddd xmm13, xmm11
  38411. ; ENTRY: 3
  38412. mov r9, QWORD PTR [rdx+24]
  38413. add r9, 320
  38414. movdqu xmm12, xmm13
  38415. pcmpeqd xmm12, xmm10
  38416. movdqu xmm0, [r9]
  38417. movdqu xmm1, [r9+16]
  38418. movdqu xmm2, [r9+32]
  38419. movdqu xmm3, [r9+48]
  38420. pand xmm0, xmm12
  38421. pand xmm1, xmm12
  38422. pand xmm2, xmm12
  38423. pand xmm3, xmm12
  38424. por xmm4, xmm0
  38425. por xmm5, xmm1
  38426. por xmm6, xmm2
  38427. por xmm7, xmm3
  38428. paddd xmm13, xmm11
  38429. ; ENTRY: 4
  38430. mov r9, QWORD PTR [rdx+32]
  38431. add r9, 320
  38432. movdqu xmm12, xmm13
  38433. pcmpeqd xmm12, xmm10
  38434. movdqu xmm0, [r9]
  38435. movdqu xmm1, [r9+16]
  38436. movdqu xmm2, [r9+32]
  38437. movdqu xmm3, [r9+48]
  38438. pand xmm0, xmm12
  38439. pand xmm1, xmm12
  38440. pand xmm2, xmm12
  38441. pand xmm3, xmm12
  38442. por xmm4, xmm0
  38443. por xmm5, xmm1
  38444. por xmm6, xmm2
  38445. por xmm7, xmm3
  38446. paddd xmm13, xmm11
  38447. ; ENTRY: 5
  38448. mov r9, QWORD PTR [rdx+40]
  38449. add r9, 320
  38450. movdqu xmm12, xmm13
  38451. pcmpeqd xmm12, xmm10
  38452. movdqu xmm0, [r9]
  38453. movdqu xmm1, [r9+16]
  38454. movdqu xmm2, [r9+32]
  38455. movdqu xmm3, [r9+48]
  38456. pand xmm0, xmm12
  38457. pand xmm1, xmm12
  38458. pand xmm2, xmm12
  38459. pand xmm3, xmm12
  38460. por xmm4, xmm0
  38461. por xmm5, xmm1
  38462. por xmm6, xmm2
  38463. por xmm7, xmm3
  38464. paddd xmm13, xmm11
  38465. ; ENTRY: 6
  38466. mov r9, QWORD PTR [rdx+48]
  38467. add r9, 320
  38468. movdqu xmm12, xmm13
  38469. pcmpeqd xmm12, xmm10
  38470. movdqu xmm0, [r9]
  38471. movdqu xmm1, [r9+16]
  38472. movdqu xmm2, [r9+32]
  38473. movdqu xmm3, [r9+48]
  38474. pand xmm0, xmm12
  38475. pand xmm1, xmm12
  38476. pand xmm2, xmm12
  38477. pand xmm3, xmm12
  38478. por xmm4, xmm0
  38479. por xmm5, xmm1
  38480. por xmm6, xmm2
  38481. por xmm7, xmm3
  38482. paddd xmm13, xmm11
  38483. ; ENTRY: 7
  38484. mov r9, QWORD PTR [rdx+56]
  38485. add r9, 320
  38486. movdqu xmm12, xmm13
  38487. pcmpeqd xmm12, xmm10
  38488. movdqu xmm0, [r9]
  38489. movdqu xmm1, [r9+16]
  38490. movdqu xmm2, [r9+32]
  38491. movdqu xmm3, [r9+48]
  38492. pand xmm0, xmm12
  38493. pand xmm1, xmm12
  38494. pand xmm2, xmm12
  38495. pand xmm3, xmm12
  38496. por xmm4, xmm0
  38497. por xmm5, xmm1
  38498. por xmm6, xmm2
  38499. por xmm7, xmm3
  38500. paddd xmm13, xmm11
  38501. ; ENTRY: 8
  38502. mov r9, QWORD PTR [rdx+64]
  38503. add r9, 320
  38504. movdqu xmm12, xmm13
  38505. pcmpeqd xmm12, xmm10
  38506. movdqu xmm0, [r9]
  38507. movdqu xmm1, [r9+16]
  38508. movdqu xmm2, [r9+32]
  38509. movdqu xmm3, [r9+48]
  38510. pand xmm0, xmm12
  38511. pand xmm1, xmm12
  38512. pand xmm2, xmm12
  38513. pand xmm3, xmm12
  38514. por xmm4, xmm0
  38515. por xmm5, xmm1
  38516. por xmm6, xmm2
  38517. por xmm7, xmm3
  38518. paddd xmm13, xmm11
  38519. ; ENTRY: 9
  38520. mov r9, QWORD PTR [rdx+72]
  38521. add r9, 320
  38522. movdqu xmm12, xmm13
  38523. pcmpeqd xmm12, xmm10
  38524. movdqu xmm0, [r9]
  38525. movdqu xmm1, [r9+16]
  38526. movdqu xmm2, [r9+32]
  38527. movdqu xmm3, [r9+48]
  38528. pand xmm0, xmm12
  38529. pand xmm1, xmm12
  38530. pand xmm2, xmm12
  38531. pand xmm3, xmm12
  38532. por xmm4, xmm0
  38533. por xmm5, xmm1
  38534. por xmm6, xmm2
  38535. por xmm7, xmm3
  38536. paddd xmm13, xmm11
  38537. ; ENTRY: 10
  38538. mov r9, QWORD PTR [rdx+80]
  38539. add r9, 320
  38540. movdqu xmm12, xmm13
  38541. pcmpeqd xmm12, xmm10
  38542. movdqu xmm0, [r9]
  38543. movdqu xmm1, [r9+16]
  38544. movdqu xmm2, [r9+32]
  38545. movdqu xmm3, [r9+48]
  38546. pand xmm0, xmm12
  38547. pand xmm1, xmm12
  38548. pand xmm2, xmm12
  38549. pand xmm3, xmm12
  38550. por xmm4, xmm0
  38551. por xmm5, xmm1
  38552. por xmm6, xmm2
  38553. por xmm7, xmm3
  38554. paddd xmm13, xmm11
  38555. ; ENTRY: 11
  38556. mov r9, QWORD PTR [rdx+88]
  38557. add r9, 320
  38558. movdqu xmm12, xmm13
  38559. pcmpeqd xmm12, xmm10
  38560. movdqu xmm0, [r9]
  38561. movdqu xmm1, [r9+16]
  38562. movdqu xmm2, [r9+32]
  38563. movdqu xmm3, [r9+48]
  38564. pand xmm0, xmm12
  38565. pand xmm1, xmm12
  38566. pand xmm2, xmm12
  38567. pand xmm3, xmm12
  38568. por xmm4, xmm0
  38569. por xmm5, xmm1
  38570. por xmm6, xmm2
  38571. por xmm7, xmm3
  38572. paddd xmm13, xmm11
  38573. ; ENTRY: 12
  38574. mov r9, QWORD PTR [rdx+96]
  38575. add r9, 320
  38576. movdqu xmm12, xmm13
  38577. pcmpeqd xmm12, xmm10
  38578. movdqu xmm0, [r9]
  38579. movdqu xmm1, [r9+16]
  38580. movdqu xmm2, [r9+32]
  38581. movdqu xmm3, [r9+48]
  38582. pand xmm0, xmm12
  38583. pand xmm1, xmm12
  38584. pand xmm2, xmm12
  38585. pand xmm3, xmm12
  38586. por xmm4, xmm0
  38587. por xmm5, xmm1
  38588. por xmm6, xmm2
  38589. por xmm7, xmm3
  38590. paddd xmm13, xmm11
  38591. ; ENTRY: 13
  38592. mov r9, QWORD PTR [rdx+104]
  38593. add r9, 320
  38594. movdqu xmm12, xmm13
  38595. pcmpeqd xmm12, xmm10
  38596. movdqu xmm0, [r9]
  38597. movdqu xmm1, [r9+16]
  38598. movdqu xmm2, [r9+32]
  38599. movdqu xmm3, [r9+48]
  38600. pand xmm0, xmm12
  38601. pand xmm1, xmm12
  38602. pand xmm2, xmm12
  38603. pand xmm3, xmm12
  38604. por xmm4, xmm0
  38605. por xmm5, xmm1
  38606. por xmm6, xmm2
  38607. por xmm7, xmm3
  38608. paddd xmm13, xmm11
  38609. ; ENTRY: 14
  38610. mov r9, QWORD PTR [rdx+112]
  38611. add r9, 320
  38612. movdqu xmm12, xmm13
  38613. pcmpeqd xmm12, xmm10
  38614. movdqu xmm0, [r9]
  38615. movdqu xmm1, [r9+16]
  38616. movdqu xmm2, [r9+32]
  38617. movdqu xmm3, [r9+48]
  38618. pand xmm0, xmm12
  38619. pand xmm1, xmm12
  38620. pand xmm2, xmm12
  38621. pand xmm3, xmm12
  38622. por xmm4, xmm0
  38623. por xmm5, xmm1
  38624. por xmm6, xmm2
  38625. por xmm7, xmm3
  38626. paddd xmm13, xmm11
  38627. ; ENTRY: 15
  38628. mov r9, QWORD PTR [rdx+120]
  38629. add r9, 320
  38630. movdqu xmm12, xmm13
  38631. pcmpeqd xmm12, xmm10
  38632. movdqu xmm0, [r9]
  38633. movdqu xmm1, [r9+16]
  38634. movdqu xmm2, [r9+32]
  38635. movdqu xmm3, [r9+48]
  38636. pand xmm0, xmm12
  38637. pand xmm1, xmm12
  38638. pand xmm2, xmm12
  38639. pand xmm3, xmm12
  38640. por xmm4, xmm0
  38641. por xmm5, xmm1
  38642. por xmm6, xmm2
  38643. por xmm7, xmm3
  38644. paddd xmm13, xmm11
  38645. movdqu [rcx], xmm4
  38646. movdqu [rcx+16], xmm5
  38647. movdqu [rcx+32], xmm6
  38648. movdqu [rcx+48], xmm7
  38649. ; END: 40-47
  38650. vmovdqu xmm6, OWORD PTR [rsp]
  38651. vmovdqu xmm7, OWORD PTR [rsp+16]
  38652. vmovdqu xmm8, OWORD PTR [rsp+32]
  38653. vmovdqu xmm9, OWORD PTR [rsp+48]
  38654. vmovdqu xmm10, OWORD PTR [rsp+64]
  38655. vmovdqu xmm11, OWORD PTR [rsp+80]
  38656. vmovdqu xmm12, OWORD PTR [rsp+96]
  38657. vmovdqu xmm13, OWORD PTR [rsp+112]
  38658. add rsp, 128
  38659. ret
  38660. sp_3072_get_from_table_48 ENDP
  38661. _text ENDS
  38662. ENDIF
  38663. IFDEF HAVE_INTEL_AVX2
  38664. ; /* Reduce the number back to 3072 bits using Montgomery reduction.
  38665. ; *
  38666. ; * a A single precision number to reduce in place.
  38667. ; * m The single precision number representing the modulus.
  38668. ; * mp The digit representing the negative inverse of m mod 2^n.
  38669. ; */
  38670. _text SEGMENT READONLY PARA
  38671. sp_3072_mont_reduce_avx2_48 PROC
  38672. push r12
  38673. push r13
  38674. push r14
  38675. push r15
  38676. push rdi
  38677. push rsi
  38678. push rbx
  38679. push rbp
  38680. mov r9, rcx
  38681. mov r10, rdx
  38682. xor rbp, rbp
  38683. ; i = 48
  38684. mov r11, 48
  38685. mov r14, QWORD PTR [r9]
  38686. mov r15, QWORD PTR [r9+8]
  38687. mov rdi, QWORD PTR [r9+16]
  38688. mov rsi, QWORD PTR [r9+24]
  38689. add r9, 192
  38690. xor rbp, rbp
  38691. L_3072_mont_reduce_avx2_48_loop:
  38692. ; mu = a[i] * mp
  38693. mov rdx, r14
  38694. mov r12, r14
  38695. imul rdx, r8
  38696. xor rbx, rbx
  38697. ; a[i+0] += m[0] * mu
  38698. mulx rcx, rax, QWORD PTR [r10]
  38699. mov r14, r15
  38700. adcx r12, rax
  38701. adox r14, rcx
  38702. ; a[i+1] += m[1] * mu
  38703. mulx rcx, rax, QWORD PTR [r10+8]
  38704. mov r15, rdi
  38705. adcx r14, rax
  38706. adox r15, rcx
  38707. ; a[i+2] += m[2] * mu
  38708. mulx rcx, rax, QWORD PTR [r10+16]
  38709. mov rdi, rsi
  38710. adcx r15, rax
  38711. adox rdi, rcx
  38712. ; a[i+3] += m[3] * mu
  38713. mulx rcx, rax, QWORD PTR [r10+24]
  38714. mov rsi, QWORD PTR [r9+-160]
  38715. adcx rdi, rax
  38716. adox rsi, rcx
  38717. ; a[i+4] += m[4] * mu
  38718. mulx rcx, rax, QWORD PTR [r10+32]
  38719. mov r13, QWORD PTR [r9+-152]
  38720. adcx rsi, rax
  38721. adox r13, rcx
  38722. ; a[i+5] += m[5] * mu
  38723. mulx rcx, rax, QWORD PTR [r10+40]
  38724. mov r12, QWORD PTR [r9+-144]
  38725. adcx r13, rax
  38726. adox r12, rcx
  38727. mov QWORD PTR [r9+-152], r13
  38728. ; a[i+6] += m[6] * mu
  38729. mulx rcx, rax, QWORD PTR [r10+48]
  38730. mov r13, QWORD PTR [r9+-136]
  38731. adcx r12, rax
  38732. adox r13, rcx
  38733. mov QWORD PTR [r9+-144], r12
  38734. ; a[i+7] += m[7] * mu
  38735. mulx rcx, rax, QWORD PTR [r10+56]
  38736. mov r12, QWORD PTR [r9+-128]
  38737. adcx r13, rax
  38738. adox r12, rcx
  38739. mov QWORD PTR [r9+-136], r13
  38740. ; a[i+8] += m[8] * mu
  38741. mulx rcx, rax, QWORD PTR [r10+64]
  38742. mov r13, QWORD PTR [r9+-120]
  38743. adcx r12, rax
  38744. adox r13, rcx
  38745. mov QWORD PTR [r9+-128], r12
  38746. ; a[i+9] += m[9] * mu
  38747. mulx rcx, rax, QWORD PTR [r10+72]
  38748. mov r12, QWORD PTR [r9+-112]
  38749. adcx r13, rax
  38750. adox r12, rcx
  38751. mov QWORD PTR [r9+-120], r13
  38752. ; a[i+10] += m[10] * mu
  38753. mulx rcx, rax, QWORD PTR [r10+80]
  38754. mov r13, QWORD PTR [r9+-104]
  38755. adcx r12, rax
  38756. adox r13, rcx
  38757. mov QWORD PTR [r9+-112], r12
  38758. ; a[i+11] += m[11] * mu
  38759. mulx rcx, rax, QWORD PTR [r10+88]
  38760. mov r12, QWORD PTR [r9+-96]
  38761. adcx r13, rax
  38762. adox r12, rcx
  38763. mov QWORD PTR [r9+-104], r13
  38764. ; a[i+12] += m[12] * mu
  38765. mulx rcx, rax, QWORD PTR [r10+96]
  38766. mov r13, QWORD PTR [r9+-88]
  38767. adcx r12, rax
  38768. adox r13, rcx
  38769. mov QWORD PTR [r9+-96], r12
  38770. ; a[i+13] += m[13] * mu
  38771. mulx rcx, rax, QWORD PTR [r10+104]
  38772. mov r12, QWORD PTR [r9+-80]
  38773. adcx r13, rax
  38774. adox r12, rcx
  38775. mov QWORD PTR [r9+-88], r13
  38776. ; a[i+14] += m[14] * mu
  38777. mulx rcx, rax, QWORD PTR [r10+112]
  38778. mov r13, QWORD PTR [r9+-72]
  38779. adcx r12, rax
  38780. adox r13, rcx
  38781. mov QWORD PTR [r9+-80], r12
  38782. ; a[i+15] += m[15] * mu
  38783. mulx rcx, rax, QWORD PTR [r10+120]
  38784. mov r12, QWORD PTR [r9+-64]
  38785. adcx r13, rax
  38786. adox r12, rcx
  38787. mov QWORD PTR [r9+-72], r13
  38788. ; a[i+16] += m[16] * mu
  38789. mulx rcx, rax, QWORD PTR [r10+128]
  38790. mov r13, QWORD PTR [r9+-56]
  38791. adcx r12, rax
  38792. adox r13, rcx
  38793. mov QWORD PTR [r9+-64], r12
  38794. ; a[i+17] += m[17] * mu
  38795. mulx rcx, rax, QWORD PTR [r10+136]
  38796. mov r12, QWORD PTR [r9+-48]
  38797. adcx r13, rax
  38798. adox r12, rcx
  38799. mov QWORD PTR [r9+-56], r13
  38800. ; a[i+18] += m[18] * mu
  38801. mulx rcx, rax, QWORD PTR [r10+144]
  38802. mov r13, QWORD PTR [r9+-40]
  38803. adcx r12, rax
  38804. adox r13, rcx
  38805. mov QWORD PTR [r9+-48], r12
  38806. ; a[i+19] += m[19] * mu
  38807. mulx rcx, rax, QWORD PTR [r10+152]
  38808. mov r12, QWORD PTR [r9+-32]
  38809. adcx r13, rax
  38810. adox r12, rcx
  38811. mov QWORD PTR [r9+-40], r13
  38812. ; a[i+20] += m[20] * mu
  38813. mulx rcx, rax, QWORD PTR [r10+160]
  38814. mov r13, QWORD PTR [r9+-24]
  38815. adcx r12, rax
  38816. adox r13, rcx
  38817. mov QWORD PTR [r9+-32], r12
  38818. ; a[i+21] += m[21] * mu
  38819. mulx rcx, rax, QWORD PTR [r10+168]
  38820. mov r12, QWORD PTR [r9+-16]
  38821. adcx r13, rax
  38822. adox r12, rcx
  38823. mov QWORD PTR [r9+-24], r13
  38824. ; a[i+22] += m[22] * mu
  38825. mulx rcx, rax, QWORD PTR [r10+176]
  38826. mov r13, QWORD PTR [r9+-8]
  38827. adcx r12, rax
  38828. adox r13, rcx
  38829. mov QWORD PTR [r9+-16], r12
  38830. ; a[i+23] += m[23] * mu
  38831. mulx rcx, rax, QWORD PTR [r10+184]
  38832. mov r12, QWORD PTR [r9]
  38833. adcx r13, rax
  38834. adox r12, rcx
  38835. mov QWORD PTR [r9+-8], r13
  38836. ; a[i+24] += m[24] * mu
  38837. mulx rcx, rax, QWORD PTR [r10+192]
  38838. mov r13, QWORD PTR [r9+8]
  38839. adcx r12, rax
  38840. adox r13, rcx
  38841. mov QWORD PTR [r9], r12
  38842. ; a[i+25] += m[25] * mu
  38843. mulx rcx, rax, QWORD PTR [r10+200]
  38844. mov r12, QWORD PTR [r9+16]
  38845. adcx r13, rax
  38846. adox r12, rcx
  38847. mov QWORD PTR [r9+8], r13
  38848. ; a[i+26] += m[26] * mu
  38849. mulx rcx, rax, QWORD PTR [r10+208]
  38850. mov r13, QWORD PTR [r9+24]
  38851. adcx r12, rax
  38852. adox r13, rcx
  38853. mov QWORD PTR [r9+16], r12
  38854. ; a[i+27] += m[27] * mu
  38855. mulx rcx, rax, QWORD PTR [r10+216]
  38856. mov r12, QWORD PTR [r9+32]
  38857. adcx r13, rax
  38858. adox r12, rcx
  38859. mov QWORD PTR [r9+24], r13
  38860. ; a[i+28] += m[28] * mu
  38861. mulx rcx, rax, QWORD PTR [r10+224]
  38862. mov r13, QWORD PTR [r9+40]
  38863. adcx r12, rax
  38864. adox r13, rcx
  38865. mov QWORD PTR [r9+32], r12
  38866. ; a[i+29] += m[29] * mu
  38867. mulx rcx, rax, QWORD PTR [r10+232]
  38868. mov r12, QWORD PTR [r9+48]
  38869. adcx r13, rax
  38870. adox r12, rcx
  38871. mov QWORD PTR [r9+40], r13
  38872. ; a[i+30] += m[30] * mu
  38873. mulx rcx, rax, QWORD PTR [r10+240]
  38874. mov r13, QWORD PTR [r9+56]
  38875. adcx r12, rax
  38876. adox r13, rcx
  38877. mov QWORD PTR [r9+48], r12
  38878. ; a[i+31] += m[31] * mu
  38879. mulx rcx, rax, QWORD PTR [r10+248]
  38880. mov r12, QWORD PTR [r9+64]
  38881. adcx r13, rax
  38882. adox r12, rcx
  38883. mov QWORD PTR [r9+56], r13
  38884. ; a[i+32] += m[32] * mu
  38885. mulx rcx, rax, QWORD PTR [r10+256]
  38886. mov r13, QWORD PTR [r9+72]
  38887. adcx r12, rax
  38888. adox r13, rcx
  38889. mov QWORD PTR [r9+64], r12
  38890. ; a[i+33] += m[33] * mu
  38891. mulx rcx, rax, QWORD PTR [r10+264]
  38892. mov r12, QWORD PTR [r9+80]
  38893. adcx r13, rax
  38894. adox r12, rcx
  38895. mov QWORD PTR [r9+72], r13
  38896. ; a[i+34] += m[34] * mu
  38897. mulx rcx, rax, QWORD PTR [r10+272]
  38898. mov r13, QWORD PTR [r9+88]
  38899. adcx r12, rax
  38900. adox r13, rcx
  38901. mov QWORD PTR [r9+80], r12
  38902. ; a[i+35] += m[35] * mu
  38903. mulx rcx, rax, QWORD PTR [r10+280]
  38904. mov r12, QWORD PTR [r9+96]
  38905. adcx r13, rax
  38906. adox r12, rcx
  38907. mov QWORD PTR [r9+88], r13
  38908. ; a[i+36] += m[36] * mu
  38909. mulx rcx, rax, QWORD PTR [r10+288]
  38910. mov r13, QWORD PTR [r9+104]
  38911. adcx r12, rax
  38912. adox r13, rcx
  38913. mov QWORD PTR [r9+96], r12
  38914. ; a[i+37] += m[37] * mu
  38915. mulx rcx, rax, QWORD PTR [r10+296]
  38916. mov r12, QWORD PTR [r9+112]
  38917. adcx r13, rax
  38918. adox r12, rcx
  38919. mov QWORD PTR [r9+104], r13
  38920. ; a[i+38] += m[38] * mu
  38921. mulx rcx, rax, QWORD PTR [r10+304]
  38922. mov r13, QWORD PTR [r9+120]
  38923. adcx r12, rax
  38924. adox r13, rcx
  38925. mov QWORD PTR [r9+112], r12
  38926. ; a[i+39] += m[39] * mu
  38927. mulx rcx, rax, QWORD PTR [r10+312]
  38928. mov r12, QWORD PTR [r9+128]
  38929. adcx r13, rax
  38930. adox r12, rcx
  38931. mov QWORD PTR [r9+120], r13
  38932. ; a[i+40] += m[40] * mu
  38933. mulx rcx, rax, QWORD PTR [r10+320]
  38934. mov r13, QWORD PTR [r9+136]
  38935. adcx r12, rax
  38936. adox r13, rcx
  38937. mov QWORD PTR [r9+128], r12
  38938. ; a[i+41] += m[41] * mu
  38939. mulx rcx, rax, QWORD PTR [r10+328]
  38940. mov r12, QWORD PTR [r9+144]
  38941. adcx r13, rax
  38942. adox r12, rcx
  38943. mov QWORD PTR [r9+136], r13
  38944. ; a[i+42] += m[42] * mu
  38945. mulx rcx, rax, QWORD PTR [r10+336]
  38946. mov r13, QWORD PTR [r9+152]
  38947. adcx r12, rax
  38948. adox r13, rcx
  38949. mov QWORD PTR [r9+144], r12
  38950. ; a[i+43] += m[43] * mu
  38951. mulx rcx, rax, QWORD PTR [r10+344]
  38952. mov r12, QWORD PTR [r9+160]
  38953. adcx r13, rax
  38954. adox r12, rcx
  38955. mov QWORD PTR [r9+152], r13
  38956. ; a[i+44] += m[44] * mu
  38957. mulx rcx, rax, QWORD PTR [r10+352]
  38958. mov r13, QWORD PTR [r9+168]
  38959. adcx r12, rax
  38960. adox r13, rcx
  38961. mov QWORD PTR [r9+160], r12
  38962. ; a[i+45] += m[45] * mu
  38963. mulx rcx, rax, QWORD PTR [r10+360]
  38964. mov r12, QWORD PTR [r9+176]
  38965. adcx r13, rax
  38966. adox r12, rcx
  38967. mov QWORD PTR [r9+168], r13
  38968. ; a[i+46] += m[46] * mu
  38969. mulx rcx, rax, QWORD PTR [r10+368]
  38970. mov r13, QWORD PTR [r9+184]
  38971. adcx r12, rax
  38972. adox r13, rcx
  38973. mov QWORD PTR [r9+176], r12
  38974. ; a[i+47] += m[47] * mu
  38975. mulx rcx, rax, QWORD PTR [r10+376]
  38976. mov r12, QWORD PTR [r9+192]
  38977. adcx r13, rax
  38978. adox r12, rcx
  38979. mov QWORD PTR [r9+184], r13
  38980. adcx r12, rbp
  38981. mov rbp, rbx
  38982. mov QWORD PTR [r9+192], r12
  38983. adox rbp, rbx
  38984. adcx rbp, rbx
  38985. ; a += 1
  38986. add r9, 8
  38987. ; i -= 1
  38988. sub r11, 1
  38989. jnz L_3072_mont_reduce_avx2_48_loop
  38990. sub r9, 192
  38991. neg rbp
  38992. mov r8, r9
  38993. sub r9, 384
  38994. mov rcx, QWORD PTR [r10]
  38995. mov rdx, r14
  38996. pext rcx, rcx, rbp
  38997. sub rdx, rcx
  38998. mov rcx, QWORD PTR [r10+8]
  38999. mov rax, r15
  39000. pext rcx, rcx, rbp
  39001. mov QWORD PTR [r9], rdx
  39002. sbb rax, rcx
  39003. mov rdx, QWORD PTR [r10+16]
  39004. mov rcx, rdi
  39005. pext rdx, rdx, rbp
  39006. mov QWORD PTR [r9+8], rax
  39007. sbb rcx, rdx
  39008. mov rax, QWORD PTR [r10+24]
  39009. mov rdx, rsi
  39010. pext rax, rax, rbp
  39011. mov QWORD PTR [r9+16], rcx
  39012. sbb rdx, rax
  39013. mov rcx, QWORD PTR [r10+32]
  39014. mov rax, QWORD PTR [r8+32]
  39015. pext rcx, rcx, rbp
  39016. mov QWORD PTR [r9+24], rdx
  39017. sbb rax, rcx
  39018. mov rdx, QWORD PTR [r10+40]
  39019. mov rcx, QWORD PTR [r8+40]
  39020. pext rdx, rdx, rbp
  39021. mov QWORD PTR [r9+32], rax
  39022. sbb rcx, rdx
  39023. mov rax, QWORD PTR [r10+48]
  39024. mov rdx, QWORD PTR [r8+48]
  39025. pext rax, rax, rbp
  39026. mov QWORD PTR [r9+40], rcx
  39027. sbb rdx, rax
  39028. mov rcx, QWORD PTR [r10+56]
  39029. mov rax, QWORD PTR [r8+56]
  39030. pext rcx, rcx, rbp
  39031. mov QWORD PTR [r9+48], rdx
  39032. sbb rax, rcx
  39033. mov rdx, QWORD PTR [r10+64]
  39034. mov rcx, QWORD PTR [r8+64]
  39035. pext rdx, rdx, rbp
  39036. mov QWORD PTR [r9+56], rax
  39037. sbb rcx, rdx
  39038. mov rax, QWORD PTR [r10+72]
  39039. mov rdx, QWORD PTR [r8+72]
  39040. pext rax, rax, rbp
  39041. mov QWORD PTR [r9+64], rcx
  39042. sbb rdx, rax
  39043. mov rcx, QWORD PTR [r10+80]
  39044. mov rax, QWORD PTR [r8+80]
  39045. pext rcx, rcx, rbp
  39046. mov QWORD PTR [r9+72], rdx
  39047. sbb rax, rcx
  39048. mov rdx, QWORD PTR [r10+88]
  39049. mov rcx, QWORD PTR [r8+88]
  39050. pext rdx, rdx, rbp
  39051. mov QWORD PTR [r9+80], rax
  39052. sbb rcx, rdx
  39053. mov rax, QWORD PTR [r10+96]
  39054. mov rdx, QWORD PTR [r8+96]
  39055. pext rax, rax, rbp
  39056. mov QWORD PTR [r9+88], rcx
  39057. sbb rdx, rax
  39058. mov rcx, QWORD PTR [r10+104]
  39059. mov rax, QWORD PTR [r8+104]
  39060. pext rcx, rcx, rbp
  39061. mov QWORD PTR [r9+96], rdx
  39062. sbb rax, rcx
  39063. mov rdx, QWORD PTR [r10+112]
  39064. mov rcx, QWORD PTR [r8+112]
  39065. pext rdx, rdx, rbp
  39066. mov QWORD PTR [r9+104], rax
  39067. sbb rcx, rdx
  39068. mov rax, QWORD PTR [r10+120]
  39069. mov rdx, QWORD PTR [r8+120]
  39070. pext rax, rax, rbp
  39071. mov QWORD PTR [r9+112], rcx
  39072. sbb rdx, rax
  39073. mov rcx, QWORD PTR [r10+128]
  39074. mov rax, QWORD PTR [r8+128]
  39075. pext rcx, rcx, rbp
  39076. mov QWORD PTR [r9+120], rdx
  39077. sbb rax, rcx
  39078. mov rdx, QWORD PTR [r10+136]
  39079. mov rcx, QWORD PTR [r8+136]
  39080. pext rdx, rdx, rbp
  39081. mov QWORD PTR [r9+128], rax
  39082. sbb rcx, rdx
  39083. mov rax, QWORD PTR [r10+144]
  39084. mov rdx, QWORD PTR [r8+144]
  39085. pext rax, rax, rbp
  39086. mov QWORD PTR [r9+136], rcx
  39087. sbb rdx, rax
  39088. mov rcx, QWORD PTR [r10+152]
  39089. mov rax, QWORD PTR [r8+152]
  39090. pext rcx, rcx, rbp
  39091. mov QWORD PTR [r9+144], rdx
  39092. sbb rax, rcx
  39093. mov rdx, QWORD PTR [r10+160]
  39094. mov rcx, QWORD PTR [r8+160]
  39095. pext rdx, rdx, rbp
  39096. mov QWORD PTR [r9+152], rax
  39097. sbb rcx, rdx
  39098. mov rax, QWORD PTR [r10+168]
  39099. mov rdx, QWORD PTR [r8+168]
  39100. pext rax, rax, rbp
  39101. mov QWORD PTR [r9+160], rcx
  39102. sbb rdx, rax
  39103. mov rcx, QWORD PTR [r10+176]
  39104. mov rax, QWORD PTR [r8+176]
  39105. pext rcx, rcx, rbp
  39106. mov QWORD PTR [r9+168], rdx
  39107. sbb rax, rcx
  39108. mov rdx, QWORD PTR [r10+184]
  39109. mov rcx, QWORD PTR [r8+184]
  39110. pext rdx, rdx, rbp
  39111. mov QWORD PTR [r9+176], rax
  39112. sbb rcx, rdx
  39113. mov rax, QWORD PTR [r10+192]
  39114. mov rdx, QWORD PTR [r8+192]
  39115. pext rax, rax, rbp
  39116. mov QWORD PTR [r9+184], rcx
  39117. sbb rdx, rax
  39118. mov rcx, QWORD PTR [r10+200]
  39119. mov rax, QWORD PTR [r8+200]
  39120. pext rcx, rcx, rbp
  39121. mov QWORD PTR [r9+192], rdx
  39122. sbb rax, rcx
  39123. mov rdx, QWORD PTR [r10+208]
  39124. mov rcx, QWORD PTR [r8+208]
  39125. pext rdx, rdx, rbp
  39126. mov QWORD PTR [r9+200], rax
  39127. sbb rcx, rdx
  39128. mov rax, QWORD PTR [r10+216]
  39129. mov rdx, QWORD PTR [r8+216]
  39130. pext rax, rax, rbp
  39131. mov QWORD PTR [r9+208], rcx
  39132. sbb rdx, rax
  39133. mov rcx, QWORD PTR [r10+224]
  39134. mov rax, QWORD PTR [r8+224]
  39135. pext rcx, rcx, rbp
  39136. mov QWORD PTR [r9+216], rdx
  39137. sbb rax, rcx
  39138. mov rdx, QWORD PTR [r10+232]
  39139. mov rcx, QWORD PTR [r8+232]
  39140. pext rdx, rdx, rbp
  39141. mov QWORD PTR [r9+224], rax
  39142. sbb rcx, rdx
  39143. mov rax, QWORD PTR [r10+240]
  39144. mov rdx, QWORD PTR [r8+240]
  39145. pext rax, rax, rbp
  39146. mov QWORD PTR [r9+232], rcx
  39147. sbb rdx, rax
  39148. mov rcx, QWORD PTR [r10+248]
  39149. mov rax, QWORD PTR [r8+248]
  39150. pext rcx, rcx, rbp
  39151. mov QWORD PTR [r9+240], rdx
  39152. sbb rax, rcx
  39153. mov rdx, QWORD PTR [r10+256]
  39154. mov rcx, QWORD PTR [r8+256]
  39155. pext rdx, rdx, rbp
  39156. mov QWORD PTR [r9+248], rax
  39157. sbb rcx, rdx
  39158. mov rax, QWORD PTR [r10+264]
  39159. mov rdx, QWORD PTR [r8+264]
  39160. pext rax, rax, rbp
  39161. mov QWORD PTR [r9+256], rcx
  39162. sbb rdx, rax
  39163. mov rcx, QWORD PTR [r10+272]
  39164. mov rax, QWORD PTR [r8+272]
  39165. pext rcx, rcx, rbp
  39166. mov QWORD PTR [r9+264], rdx
  39167. sbb rax, rcx
  39168. mov rdx, QWORD PTR [r10+280]
  39169. mov rcx, QWORD PTR [r8+280]
  39170. pext rdx, rdx, rbp
  39171. mov QWORD PTR [r9+272], rax
  39172. sbb rcx, rdx
  39173. mov rax, QWORD PTR [r10+288]
  39174. mov rdx, QWORD PTR [r8+288]
  39175. pext rax, rax, rbp
  39176. mov QWORD PTR [r9+280], rcx
  39177. sbb rdx, rax
  39178. mov rcx, QWORD PTR [r10+296]
  39179. mov rax, QWORD PTR [r8+296]
  39180. pext rcx, rcx, rbp
  39181. mov QWORD PTR [r9+288], rdx
  39182. sbb rax, rcx
  39183. mov rdx, QWORD PTR [r10+304]
  39184. mov rcx, QWORD PTR [r8+304]
  39185. pext rdx, rdx, rbp
  39186. mov QWORD PTR [r9+296], rax
  39187. sbb rcx, rdx
  39188. mov rax, QWORD PTR [r10+312]
  39189. mov rdx, QWORD PTR [r8+312]
  39190. pext rax, rax, rbp
  39191. mov QWORD PTR [r9+304], rcx
  39192. sbb rdx, rax
  39193. mov rcx, QWORD PTR [r10+320]
  39194. mov rax, QWORD PTR [r8+320]
  39195. pext rcx, rcx, rbp
  39196. mov QWORD PTR [r9+312], rdx
  39197. sbb rax, rcx
  39198. mov rdx, QWORD PTR [r10+328]
  39199. mov rcx, QWORD PTR [r8+328]
  39200. pext rdx, rdx, rbp
  39201. mov QWORD PTR [r9+320], rax
  39202. sbb rcx, rdx
  39203. mov rax, QWORD PTR [r10+336]
  39204. mov rdx, QWORD PTR [r8+336]
  39205. pext rax, rax, rbp
  39206. mov QWORD PTR [r9+328], rcx
  39207. sbb rdx, rax
  39208. mov rcx, QWORD PTR [r10+344]
  39209. mov rax, QWORD PTR [r8+344]
  39210. pext rcx, rcx, rbp
  39211. mov QWORD PTR [r9+336], rdx
  39212. sbb rax, rcx
  39213. mov rdx, QWORD PTR [r10+352]
  39214. mov rcx, QWORD PTR [r8+352]
  39215. pext rdx, rdx, rbp
  39216. mov QWORD PTR [r9+344], rax
  39217. sbb rcx, rdx
  39218. mov rax, QWORD PTR [r10+360]
  39219. mov rdx, QWORD PTR [r8+360]
  39220. pext rax, rax, rbp
  39221. mov QWORD PTR [r9+352], rcx
  39222. sbb rdx, rax
  39223. mov rcx, QWORD PTR [r10+368]
  39224. mov rax, QWORD PTR [r8+368]
  39225. pext rcx, rcx, rbp
  39226. mov QWORD PTR [r9+360], rdx
  39227. sbb rax, rcx
  39228. mov rdx, QWORD PTR [r10+376]
  39229. mov rcx, QWORD PTR [r8+376]
  39230. pext rdx, rdx, rbp
  39231. mov QWORD PTR [r9+368], rax
  39232. sbb rcx, rdx
  39233. mov QWORD PTR [r9+376], rcx
  39234. pop rbp
  39235. pop rbx
  39236. pop rsi
  39237. pop rdi
  39238. pop r15
  39239. pop r14
  39240. pop r13
  39241. pop r12
  39242. ret
  39243. sp_3072_mont_reduce_avx2_48 ENDP
  39244. _text ENDS
  39245. ENDIF
  39246. IFNDEF WC_NO_CACHE_RESISTANT
  39247. _text SEGMENT READONLY PARA
  39248. sp_3072_get_from_table_avx2_48 PROC
  39249. sub rsp, 128
  39250. vmovdqu OWORD PTR [rsp], xmm6
  39251. vmovdqu OWORD PTR [rsp+16], xmm7
  39252. vmovdqu OWORD PTR [rsp+32], xmm8
  39253. vmovdqu OWORD PTR [rsp+48], xmm9
  39254. vmovdqu OWORD PTR [rsp+64], xmm10
  39255. vmovdqu OWORD PTR [rsp+80], xmm11
  39256. vmovdqu OWORD PTR [rsp+96], xmm12
  39257. vmovdqu OWORD PTR [rsp+112], xmm13
  39258. mov rax, 1
  39259. movd xmm10, r8
  39260. movd xmm11, rax
  39261. vpxor ymm13, ymm13, ymm13
  39262. vpermd ymm10, ymm13, ymm10
  39263. vpermd ymm11, ymm13, ymm11
  39264. ; START: 0-15
  39265. vpxor ymm13, ymm13, ymm13
  39266. vpxor ymm4, ymm4, ymm4
  39267. vpxor ymm5, ymm5, ymm5
  39268. vpxor ymm6, ymm6, ymm6
  39269. vpxor ymm7, ymm7, ymm7
  39270. ; ENTRY: 0
  39271. mov r9, QWORD PTR [rdx]
  39272. vpcmpeqd ymm12, ymm13, ymm10
  39273. vmovdqu ymm0, YMMWORD PTR [r9]
  39274. vmovdqu ymm1, YMMWORD PTR [r9+32]
  39275. vmovdqu ymm2, YMMWORD PTR [r9+64]
  39276. vmovdqu ymm3, YMMWORD PTR [r9+96]
  39277. vpand ymm0, ymm0, ymm12
  39278. vpand ymm1, ymm1, ymm12
  39279. vpand ymm2, ymm2, ymm12
  39280. vpand ymm3, ymm3, ymm12
  39281. vpor ymm4, ymm4, ymm0
  39282. vpor ymm5, ymm5, ymm1
  39283. vpor ymm6, ymm6, ymm2
  39284. vpor ymm7, ymm7, ymm3
  39285. vpaddd ymm13, ymm13, ymm11
  39286. ; ENTRY: 1
  39287. mov r9, QWORD PTR [rdx+8]
  39288. vpcmpeqd ymm12, ymm13, ymm10
  39289. vmovdqu ymm0, YMMWORD PTR [r9]
  39290. vmovdqu ymm1, YMMWORD PTR [r9+32]
  39291. vmovdqu ymm2, YMMWORD PTR [r9+64]
  39292. vmovdqu ymm3, YMMWORD PTR [r9+96]
  39293. vpand ymm0, ymm0, ymm12
  39294. vpand ymm1, ymm1, ymm12
  39295. vpand ymm2, ymm2, ymm12
  39296. vpand ymm3, ymm3, ymm12
  39297. vpor ymm4, ymm4, ymm0
  39298. vpor ymm5, ymm5, ymm1
  39299. vpor ymm6, ymm6, ymm2
  39300. vpor ymm7, ymm7, ymm3
  39301. vpaddd ymm13, ymm13, ymm11
  39302. ; ENTRY: 2
  39303. mov r9, QWORD PTR [rdx+16]
  39304. vpcmpeqd ymm12, ymm13, ymm10
  39305. vmovdqu ymm0, YMMWORD PTR [r9]
  39306. vmovdqu ymm1, YMMWORD PTR [r9+32]
  39307. vmovdqu ymm2, YMMWORD PTR [r9+64]
  39308. vmovdqu ymm3, YMMWORD PTR [r9+96]
  39309. vpand ymm0, ymm0, ymm12
  39310. vpand ymm1, ymm1, ymm12
  39311. vpand ymm2, ymm2, ymm12
  39312. vpand ymm3, ymm3, ymm12
  39313. vpor ymm4, ymm4, ymm0
  39314. vpor ymm5, ymm5, ymm1
  39315. vpor ymm6, ymm6, ymm2
  39316. vpor ymm7, ymm7, ymm3
  39317. vpaddd ymm13, ymm13, ymm11
  39318. ; ENTRY: 3
  39319. mov r9, QWORD PTR [rdx+24]
  39320. vpcmpeqd ymm12, ymm13, ymm10
  39321. vmovdqu ymm0, YMMWORD PTR [r9]
  39322. vmovdqu ymm1, YMMWORD PTR [r9+32]
  39323. vmovdqu ymm2, YMMWORD PTR [r9+64]
  39324. vmovdqu ymm3, YMMWORD PTR [r9+96]
  39325. vpand ymm0, ymm0, ymm12
  39326. vpand ymm1, ymm1, ymm12
  39327. vpand ymm2, ymm2, ymm12
  39328. vpand ymm3, ymm3, ymm12
  39329. vpor ymm4, ymm4, ymm0
  39330. vpor ymm5, ymm5, ymm1
  39331. vpor ymm6, ymm6, ymm2
  39332. vpor ymm7, ymm7, ymm3
  39333. vpaddd ymm13, ymm13, ymm11
  39334. ; ENTRY: 4
  39335. mov r9, QWORD PTR [rdx+32]
  39336. vpcmpeqd ymm12, ymm13, ymm10
  39337. vmovdqu ymm0, YMMWORD PTR [r9]
  39338. vmovdqu ymm1, YMMWORD PTR [r9+32]
  39339. vmovdqu ymm2, YMMWORD PTR [r9+64]
  39340. vmovdqu ymm3, YMMWORD PTR [r9+96]
  39341. vpand ymm0, ymm0, ymm12
  39342. vpand ymm1, ymm1, ymm12
  39343. vpand ymm2, ymm2, ymm12
  39344. vpand ymm3, ymm3, ymm12
  39345. vpor ymm4, ymm4, ymm0
  39346. vpor ymm5, ymm5, ymm1
  39347. vpor ymm6, ymm6, ymm2
  39348. vpor ymm7, ymm7, ymm3
  39349. vpaddd ymm13, ymm13, ymm11
  39350. ; ENTRY: 5
  39351. mov r9, QWORD PTR [rdx+40]
  39352. vpcmpeqd ymm12, ymm13, ymm10
  39353. vmovdqu ymm0, YMMWORD PTR [r9]
  39354. vmovdqu ymm1, YMMWORD PTR [r9+32]
  39355. vmovdqu ymm2, YMMWORD PTR [r9+64]
  39356. vmovdqu ymm3, YMMWORD PTR [r9+96]
  39357. vpand ymm0, ymm0, ymm12
  39358. vpand ymm1, ymm1, ymm12
  39359. vpand ymm2, ymm2, ymm12
  39360. vpand ymm3, ymm3, ymm12
  39361. vpor ymm4, ymm4, ymm0
  39362. vpor ymm5, ymm5, ymm1
  39363. vpor ymm6, ymm6, ymm2
  39364. vpor ymm7, ymm7, ymm3
  39365. vpaddd ymm13, ymm13, ymm11
  39366. ; ENTRY: 6
  39367. mov r9, QWORD PTR [rdx+48]
  39368. vpcmpeqd ymm12, ymm13, ymm10
  39369. vmovdqu ymm0, YMMWORD PTR [r9]
  39370. vmovdqu ymm1, YMMWORD PTR [r9+32]
  39371. vmovdqu ymm2, YMMWORD PTR [r9+64]
  39372. vmovdqu ymm3, YMMWORD PTR [r9+96]
  39373. vpand ymm0, ymm0, ymm12
  39374. vpand ymm1, ymm1, ymm12
  39375. vpand ymm2, ymm2, ymm12
  39376. vpand ymm3, ymm3, ymm12
  39377. vpor ymm4, ymm4, ymm0
  39378. vpor ymm5, ymm5, ymm1
  39379. vpor ymm6, ymm6, ymm2
  39380. vpor ymm7, ymm7, ymm3
  39381. vpaddd ymm13, ymm13, ymm11
  39382. ; ENTRY: 7
  39383. mov r9, QWORD PTR [rdx+56]
  39384. vpcmpeqd ymm12, ymm13, ymm10
  39385. vmovdqu ymm0, YMMWORD PTR [r9]
  39386. vmovdqu ymm1, YMMWORD PTR [r9+32]
  39387. vmovdqu ymm2, YMMWORD PTR [r9+64]
  39388. vmovdqu ymm3, YMMWORD PTR [r9+96]
  39389. vpand ymm0, ymm0, ymm12
  39390. vpand ymm1, ymm1, ymm12
  39391. vpand ymm2, ymm2, ymm12
  39392. vpand ymm3, ymm3, ymm12
  39393. vpor ymm4, ymm4, ymm0
  39394. vpor ymm5, ymm5, ymm1
  39395. vpor ymm6, ymm6, ymm2
  39396. vpor ymm7, ymm7, ymm3
  39397. vpaddd ymm13, ymm13, ymm11
  39398. ; ENTRY: 8
  39399. mov r9, QWORD PTR [rdx+64]
  39400. vpcmpeqd ymm12, ymm13, ymm10
  39401. vmovdqu ymm0, YMMWORD PTR [r9]
  39402. vmovdqu ymm1, YMMWORD PTR [r9+32]
  39403. vmovdqu ymm2, YMMWORD PTR [r9+64]
  39404. vmovdqu ymm3, YMMWORD PTR [r9+96]
  39405. vpand ymm0, ymm0, ymm12
  39406. vpand ymm1, ymm1, ymm12
  39407. vpand ymm2, ymm2, ymm12
  39408. vpand ymm3, ymm3, ymm12
  39409. vpor ymm4, ymm4, ymm0
  39410. vpor ymm5, ymm5, ymm1
  39411. vpor ymm6, ymm6, ymm2
  39412. vpor ymm7, ymm7, ymm3
  39413. vpaddd ymm13, ymm13, ymm11
  39414. ; ENTRY: 9
  39415. mov r9, QWORD PTR [rdx+72]
  39416. vpcmpeqd ymm12, ymm13, ymm10
  39417. vmovdqu ymm0, YMMWORD PTR [r9]
  39418. vmovdqu ymm1, YMMWORD PTR [r9+32]
  39419. vmovdqu ymm2, YMMWORD PTR [r9+64]
  39420. vmovdqu ymm3, YMMWORD PTR [r9+96]
  39421. vpand ymm0, ymm0, ymm12
  39422. vpand ymm1, ymm1, ymm12
  39423. vpand ymm2, ymm2, ymm12
  39424. vpand ymm3, ymm3, ymm12
  39425. vpor ymm4, ymm4, ymm0
  39426. vpor ymm5, ymm5, ymm1
  39427. vpor ymm6, ymm6, ymm2
  39428. vpor ymm7, ymm7, ymm3
  39429. vpaddd ymm13, ymm13, ymm11
  39430. ; ENTRY: 10
  39431. mov r9, QWORD PTR [rdx+80]
  39432. vpcmpeqd ymm12, ymm13, ymm10
  39433. vmovdqu ymm0, YMMWORD PTR [r9]
  39434. vmovdqu ymm1, YMMWORD PTR [r9+32]
  39435. vmovdqu ymm2, YMMWORD PTR [r9+64]
  39436. vmovdqu ymm3, YMMWORD PTR [r9+96]
  39437. vpand ymm0, ymm0, ymm12
  39438. vpand ymm1, ymm1, ymm12
  39439. vpand ymm2, ymm2, ymm12
  39440. vpand ymm3, ymm3, ymm12
  39441. vpor ymm4, ymm4, ymm0
  39442. vpor ymm5, ymm5, ymm1
  39443. vpor ymm6, ymm6, ymm2
  39444. vpor ymm7, ymm7, ymm3
  39445. vpaddd ymm13, ymm13, ymm11
  39446. ; ENTRY: 11
  39447. mov r9, QWORD PTR [rdx+88]
  39448. vpcmpeqd ymm12, ymm13, ymm10
  39449. vmovdqu ymm0, YMMWORD PTR [r9]
  39450. vmovdqu ymm1, YMMWORD PTR [r9+32]
  39451. vmovdqu ymm2, YMMWORD PTR [r9+64]
  39452. vmovdqu ymm3, YMMWORD PTR [r9+96]
  39453. vpand ymm0, ymm0, ymm12
  39454. vpand ymm1, ymm1, ymm12
  39455. vpand ymm2, ymm2, ymm12
  39456. vpand ymm3, ymm3, ymm12
  39457. vpor ymm4, ymm4, ymm0
  39458. vpor ymm5, ymm5, ymm1
  39459. vpor ymm6, ymm6, ymm2
  39460. vpor ymm7, ymm7, ymm3
  39461. vpaddd ymm13, ymm13, ymm11
  39462. ; ENTRY: 12
  39463. mov r9, QWORD PTR [rdx+96]
  39464. vpcmpeqd ymm12, ymm13, ymm10
  39465. vmovdqu ymm0, YMMWORD PTR [r9]
  39466. vmovdqu ymm1, YMMWORD PTR [r9+32]
  39467. vmovdqu ymm2, YMMWORD PTR [r9+64]
  39468. vmovdqu ymm3, YMMWORD PTR [r9+96]
  39469. vpand ymm0, ymm0, ymm12
  39470. vpand ymm1, ymm1, ymm12
  39471. vpand ymm2, ymm2, ymm12
  39472. vpand ymm3, ymm3, ymm12
  39473. vpor ymm4, ymm4, ymm0
  39474. vpor ymm5, ymm5, ymm1
  39475. vpor ymm6, ymm6, ymm2
  39476. vpor ymm7, ymm7, ymm3
  39477. vpaddd ymm13, ymm13, ymm11
  39478. ; ENTRY: 13
  39479. mov r9, QWORD PTR [rdx+104]
  39480. vpcmpeqd ymm12, ymm13, ymm10
  39481. vmovdqu ymm0, YMMWORD PTR [r9]
  39482. vmovdqu ymm1, YMMWORD PTR [r9+32]
  39483. vmovdqu ymm2, YMMWORD PTR [r9+64]
  39484. vmovdqu ymm3, YMMWORD PTR [r9+96]
  39485. vpand ymm0, ymm0, ymm12
  39486. vpand ymm1, ymm1, ymm12
  39487. vpand ymm2, ymm2, ymm12
  39488. vpand ymm3, ymm3, ymm12
  39489. vpor ymm4, ymm4, ymm0
  39490. vpor ymm5, ymm5, ymm1
  39491. vpor ymm6, ymm6, ymm2
  39492. vpor ymm7, ymm7, ymm3
  39493. vpaddd ymm13, ymm13, ymm11
  39494. ; ENTRY: 14
  39495. mov r9, QWORD PTR [rdx+112]
  39496. vpcmpeqd ymm12, ymm13, ymm10
  39497. vmovdqu ymm0, YMMWORD PTR [r9]
  39498. vmovdqu ymm1, YMMWORD PTR [r9+32]
  39499. vmovdqu ymm2, YMMWORD PTR [r9+64]
  39500. vmovdqu ymm3, YMMWORD PTR [r9+96]
  39501. vpand ymm0, ymm0, ymm12
  39502. vpand ymm1, ymm1, ymm12
  39503. vpand ymm2, ymm2, ymm12
  39504. vpand ymm3, ymm3, ymm12
  39505. vpor ymm4, ymm4, ymm0
  39506. vpor ymm5, ymm5, ymm1
  39507. vpor ymm6, ymm6, ymm2
  39508. vpor ymm7, ymm7, ymm3
  39509. vpaddd ymm13, ymm13, ymm11
  39510. ; ENTRY: 15
  39511. mov r9, QWORD PTR [rdx+120]
  39512. vpcmpeqd ymm12, ymm13, ymm10
  39513. vmovdqu ymm0, YMMWORD PTR [r9]
  39514. vmovdqu ymm1, YMMWORD PTR [r9+32]
  39515. vmovdqu ymm2, YMMWORD PTR [r9+64]
  39516. vmovdqu ymm3, YMMWORD PTR [r9+96]
  39517. vpand ymm0, ymm0, ymm12
  39518. vpand ymm1, ymm1, ymm12
  39519. vpand ymm2, ymm2, ymm12
  39520. vpand ymm3, ymm3, ymm12
  39521. vpor ymm4, ymm4, ymm0
  39522. vpor ymm5, ymm5, ymm1
  39523. vpor ymm6, ymm6, ymm2
  39524. vpor ymm7, ymm7, ymm3
  39525. vpaddd ymm13, ymm13, ymm11
  39526. vmovdqu YMMWORD PTR [rcx], ymm4
  39527. vmovdqu YMMWORD PTR [rcx+32], ymm5
  39528. vmovdqu YMMWORD PTR [rcx+64], ymm6
  39529. vmovdqu YMMWORD PTR [rcx+96], ymm7
  39530. add rcx, 128
  39531. ; END: 0-15
  39532. ; START: 16-31
  39533. vpxor ymm13, ymm13, ymm13
  39534. vpxor ymm4, ymm4, ymm4
  39535. vpxor ymm5, ymm5, ymm5
  39536. vpxor ymm6, ymm6, ymm6
  39537. vpxor ymm7, ymm7, ymm7
  39538. ; ENTRY: 0
  39539. mov r9, QWORD PTR [rdx]
  39540. add r9, 128
  39541. vpcmpeqd ymm12, ymm13, ymm10
  39542. vmovdqu ymm0, YMMWORD PTR [r9]
  39543. vmovdqu ymm1, YMMWORD PTR [r9+32]
  39544. vmovdqu ymm2, YMMWORD PTR [r9+64]
  39545. vmovdqu ymm3, YMMWORD PTR [r9+96]
  39546. vpand ymm0, ymm0, ymm12
  39547. vpand ymm1, ymm1, ymm12
  39548. vpand ymm2, ymm2, ymm12
  39549. vpand ymm3, ymm3, ymm12
  39550. vpor ymm4, ymm4, ymm0
  39551. vpor ymm5, ymm5, ymm1
  39552. vpor ymm6, ymm6, ymm2
  39553. vpor ymm7, ymm7, ymm3
  39554. vpaddd ymm13, ymm13, ymm11
  39555. ; ENTRY: 1
  39556. mov r9, QWORD PTR [rdx+8]
  39557. add r9, 128
  39558. vpcmpeqd ymm12, ymm13, ymm10
  39559. vmovdqu ymm0, YMMWORD PTR [r9]
  39560. vmovdqu ymm1, YMMWORD PTR [r9+32]
  39561. vmovdqu ymm2, YMMWORD PTR [r9+64]
  39562. vmovdqu ymm3, YMMWORD PTR [r9+96]
  39563. vpand ymm0, ymm0, ymm12
  39564. vpand ymm1, ymm1, ymm12
  39565. vpand ymm2, ymm2, ymm12
  39566. vpand ymm3, ymm3, ymm12
  39567. vpor ymm4, ymm4, ymm0
  39568. vpor ymm5, ymm5, ymm1
  39569. vpor ymm6, ymm6, ymm2
  39570. vpor ymm7, ymm7, ymm3
  39571. vpaddd ymm13, ymm13, ymm11
  39572. ; ENTRY: 2
  39573. mov r9, QWORD PTR [rdx+16]
  39574. add r9, 128
  39575. vpcmpeqd ymm12, ymm13, ymm10
  39576. vmovdqu ymm0, YMMWORD PTR [r9]
  39577. vmovdqu ymm1, YMMWORD PTR [r9+32]
  39578. vmovdqu ymm2, YMMWORD PTR [r9+64]
  39579. vmovdqu ymm3, YMMWORD PTR [r9+96]
  39580. vpand ymm0, ymm0, ymm12
  39581. vpand ymm1, ymm1, ymm12
  39582. vpand ymm2, ymm2, ymm12
  39583. vpand ymm3, ymm3, ymm12
  39584. vpor ymm4, ymm4, ymm0
  39585. vpor ymm5, ymm5, ymm1
  39586. vpor ymm6, ymm6, ymm2
  39587. vpor ymm7, ymm7, ymm3
  39588. vpaddd ymm13, ymm13, ymm11
  39589. ; ENTRY: 3
  39590. mov r9, QWORD PTR [rdx+24]
  39591. add r9, 128
  39592. vpcmpeqd ymm12, ymm13, ymm10
  39593. vmovdqu ymm0, YMMWORD PTR [r9]
  39594. vmovdqu ymm1, YMMWORD PTR [r9+32]
  39595. vmovdqu ymm2, YMMWORD PTR [r9+64]
  39596. vmovdqu ymm3, YMMWORD PTR [r9+96]
  39597. vpand ymm0, ymm0, ymm12
  39598. vpand ymm1, ymm1, ymm12
  39599. vpand ymm2, ymm2, ymm12
  39600. vpand ymm3, ymm3, ymm12
  39601. vpor ymm4, ymm4, ymm0
  39602. vpor ymm5, ymm5, ymm1
  39603. vpor ymm6, ymm6, ymm2
  39604. vpor ymm7, ymm7, ymm3
  39605. vpaddd ymm13, ymm13, ymm11
  39606. ; ENTRY: 4
  39607. mov r9, QWORD PTR [rdx+32]
  39608. add r9, 128
  39609. vpcmpeqd ymm12, ymm13, ymm10
  39610. vmovdqu ymm0, YMMWORD PTR [r9]
  39611. vmovdqu ymm1, YMMWORD PTR [r9+32]
  39612. vmovdqu ymm2, YMMWORD PTR [r9+64]
  39613. vmovdqu ymm3, YMMWORD PTR [r9+96]
  39614. vpand ymm0, ymm0, ymm12
  39615. vpand ymm1, ymm1, ymm12
  39616. vpand ymm2, ymm2, ymm12
  39617. vpand ymm3, ymm3, ymm12
  39618. vpor ymm4, ymm4, ymm0
  39619. vpor ymm5, ymm5, ymm1
  39620. vpor ymm6, ymm6, ymm2
  39621. vpor ymm7, ymm7, ymm3
  39622. vpaddd ymm13, ymm13, ymm11
  39623. ; ENTRY: 5
  39624. mov r9, QWORD PTR [rdx+40]
  39625. add r9, 128
  39626. vpcmpeqd ymm12, ymm13, ymm10
  39627. vmovdqu ymm0, YMMWORD PTR [r9]
  39628. vmovdqu ymm1, YMMWORD PTR [r9+32]
  39629. vmovdqu ymm2, YMMWORD PTR [r9+64]
  39630. vmovdqu ymm3, YMMWORD PTR [r9+96]
  39631. vpand ymm0, ymm0, ymm12
  39632. vpand ymm1, ymm1, ymm12
  39633. vpand ymm2, ymm2, ymm12
  39634. vpand ymm3, ymm3, ymm12
  39635. vpor ymm4, ymm4, ymm0
  39636. vpor ymm5, ymm5, ymm1
  39637. vpor ymm6, ymm6, ymm2
  39638. vpor ymm7, ymm7, ymm3
  39639. vpaddd ymm13, ymm13, ymm11
  39640. ; ENTRY: 6
  39641. mov r9, QWORD PTR [rdx+48]
  39642. add r9, 128
  39643. vpcmpeqd ymm12, ymm13, ymm10
  39644. vmovdqu ymm0, YMMWORD PTR [r9]
  39645. vmovdqu ymm1, YMMWORD PTR [r9+32]
  39646. vmovdqu ymm2, YMMWORD PTR [r9+64]
  39647. vmovdqu ymm3, YMMWORD PTR [r9+96]
  39648. vpand ymm0, ymm0, ymm12
  39649. vpand ymm1, ymm1, ymm12
  39650. vpand ymm2, ymm2, ymm12
  39651. vpand ymm3, ymm3, ymm12
  39652. vpor ymm4, ymm4, ymm0
  39653. vpor ymm5, ymm5, ymm1
  39654. vpor ymm6, ymm6, ymm2
  39655. vpor ymm7, ymm7, ymm3
  39656. vpaddd ymm13, ymm13, ymm11
  39657. ; ENTRY: 7
  39658. mov r9, QWORD PTR [rdx+56]
  39659. add r9, 128
  39660. vpcmpeqd ymm12, ymm13, ymm10
  39661. vmovdqu ymm0, YMMWORD PTR [r9]
  39662. vmovdqu ymm1, YMMWORD PTR [r9+32]
  39663. vmovdqu ymm2, YMMWORD PTR [r9+64]
  39664. vmovdqu ymm3, YMMWORD PTR [r9+96]
  39665. vpand ymm0, ymm0, ymm12
  39666. vpand ymm1, ymm1, ymm12
  39667. vpand ymm2, ymm2, ymm12
  39668. vpand ymm3, ymm3, ymm12
  39669. vpor ymm4, ymm4, ymm0
  39670. vpor ymm5, ymm5, ymm1
  39671. vpor ymm6, ymm6, ymm2
  39672. vpor ymm7, ymm7, ymm3
  39673. vpaddd ymm13, ymm13, ymm11
  39674. ; ENTRY: 8
  39675. mov r9, QWORD PTR [rdx+64]
  39676. add r9, 128
  39677. vpcmpeqd ymm12, ymm13, ymm10
  39678. vmovdqu ymm0, YMMWORD PTR [r9]
  39679. vmovdqu ymm1, YMMWORD PTR [r9+32]
  39680. vmovdqu ymm2, YMMWORD PTR [r9+64]
  39681. vmovdqu ymm3, YMMWORD PTR [r9+96]
  39682. vpand ymm0, ymm0, ymm12
  39683. vpand ymm1, ymm1, ymm12
  39684. vpand ymm2, ymm2, ymm12
  39685. vpand ymm3, ymm3, ymm12
  39686. vpor ymm4, ymm4, ymm0
  39687. vpor ymm5, ymm5, ymm1
  39688. vpor ymm6, ymm6, ymm2
  39689. vpor ymm7, ymm7, ymm3
  39690. vpaddd ymm13, ymm13, ymm11
  39691. ; ENTRY: 9
  39692. mov r9, QWORD PTR [rdx+72]
  39693. add r9, 128
  39694. vpcmpeqd ymm12, ymm13, ymm10
  39695. vmovdqu ymm0, YMMWORD PTR [r9]
  39696. vmovdqu ymm1, YMMWORD PTR [r9+32]
  39697. vmovdqu ymm2, YMMWORD PTR [r9+64]
  39698. vmovdqu ymm3, YMMWORD PTR [r9+96]
  39699. vpand ymm0, ymm0, ymm12
  39700. vpand ymm1, ymm1, ymm12
  39701. vpand ymm2, ymm2, ymm12
  39702. vpand ymm3, ymm3, ymm12
  39703. vpor ymm4, ymm4, ymm0
  39704. vpor ymm5, ymm5, ymm1
  39705. vpor ymm6, ymm6, ymm2
  39706. vpor ymm7, ymm7, ymm3
  39707. vpaddd ymm13, ymm13, ymm11
  39708. ; ENTRY: 10
  39709. mov r9, QWORD PTR [rdx+80]
  39710. add r9, 128
  39711. vpcmpeqd ymm12, ymm13, ymm10
  39712. vmovdqu ymm0, YMMWORD PTR [r9]
  39713. vmovdqu ymm1, YMMWORD PTR [r9+32]
  39714. vmovdqu ymm2, YMMWORD PTR [r9+64]
  39715. vmovdqu ymm3, YMMWORD PTR [r9+96]
  39716. vpand ymm0, ymm0, ymm12
  39717. vpand ymm1, ymm1, ymm12
  39718. vpand ymm2, ymm2, ymm12
  39719. vpand ymm3, ymm3, ymm12
  39720. vpor ymm4, ymm4, ymm0
  39721. vpor ymm5, ymm5, ymm1
  39722. vpor ymm6, ymm6, ymm2
  39723. vpor ymm7, ymm7, ymm3
  39724. vpaddd ymm13, ymm13, ymm11
  39725. ; ENTRY: 11
  39726. mov r9, QWORD PTR [rdx+88]
  39727. add r9, 128
  39728. vpcmpeqd ymm12, ymm13, ymm10
  39729. vmovdqu ymm0, YMMWORD PTR [r9]
  39730. vmovdqu ymm1, YMMWORD PTR [r9+32]
  39731. vmovdqu ymm2, YMMWORD PTR [r9+64]
  39732. vmovdqu ymm3, YMMWORD PTR [r9+96]
  39733. vpand ymm0, ymm0, ymm12
  39734. vpand ymm1, ymm1, ymm12
  39735. vpand ymm2, ymm2, ymm12
  39736. vpand ymm3, ymm3, ymm12
  39737. vpor ymm4, ymm4, ymm0
  39738. vpor ymm5, ymm5, ymm1
  39739. vpor ymm6, ymm6, ymm2
  39740. vpor ymm7, ymm7, ymm3
  39741. vpaddd ymm13, ymm13, ymm11
  39742. ; ENTRY: 12
  39743. mov r9, QWORD PTR [rdx+96]
  39744. add r9, 128
  39745. vpcmpeqd ymm12, ymm13, ymm10
  39746. vmovdqu ymm0, YMMWORD PTR [r9]
  39747. vmovdqu ymm1, YMMWORD PTR [r9+32]
  39748. vmovdqu ymm2, YMMWORD PTR [r9+64]
  39749. vmovdqu ymm3, YMMWORD PTR [r9+96]
  39750. vpand ymm0, ymm0, ymm12
  39751. vpand ymm1, ymm1, ymm12
  39752. vpand ymm2, ymm2, ymm12
  39753. vpand ymm3, ymm3, ymm12
  39754. vpor ymm4, ymm4, ymm0
  39755. vpor ymm5, ymm5, ymm1
  39756. vpor ymm6, ymm6, ymm2
  39757. vpor ymm7, ymm7, ymm3
  39758. vpaddd ymm13, ymm13, ymm11
  39759. ; ENTRY: 13
  39760. mov r9, QWORD PTR [rdx+104]
  39761. add r9, 128
  39762. vpcmpeqd ymm12, ymm13, ymm10
  39763. vmovdqu ymm0, YMMWORD PTR [r9]
  39764. vmovdqu ymm1, YMMWORD PTR [r9+32]
  39765. vmovdqu ymm2, YMMWORD PTR [r9+64]
  39766. vmovdqu ymm3, YMMWORD PTR [r9+96]
  39767. vpand ymm0, ymm0, ymm12
  39768. vpand ymm1, ymm1, ymm12
  39769. vpand ymm2, ymm2, ymm12
  39770. vpand ymm3, ymm3, ymm12
  39771. vpor ymm4, ymm4, ymm0
  39772. vpor ymm5, ymm5, ymm1
  39773. vpor ymm6, ymm6, ymm2
  39774. vpor ymm7, ymm7, ymm3
  39775. vpaddd ymm13, ymm13, ymm11
  39776. ; ENTRY: 14
  39777. mov r9, QWORD PTR [rdx+112]
  39778. add r9, 128
  39779. vpcmpeqd ymm12, ymm13, ymm10
  39780. vmovdqu ymm0, YMMWORD PTR [r9]
  39781. vmovdqu ymm1, YMMWORD PTR [r9+32]
  39782. vmovdqu ymm2, YMMWORD PTR [r9+64]
  39783. vmovdqu ymm3, YMMWORD PTR [r9+96]
  39784. vpand ymm0, ymm0, ymm12
  39785. vpand ymm1, ymm1, ymm12
  39786. vpand ymm2, ymm2, ymm12
  39787. vpand ymm3, ymm3, ymm12
  39788. vpor ymm4, ymm4, ymm0
  39789. vpor ymm5, ymm5, ymm1
  39790. vpor ymm6, ymm6, ymm2
  39791. vpor ymm7, ymm7, ymm3
  39792. vpaddd ymm13, ymm13, ymm11
  39793. ; ENTRY: 15
  39794. mov r9, QWORD PTR [rdx+120]
  39795. add r9, 128
  39796. vpcmpeqd ymm12, ymm13, ymm10
  39797. vmovdqu ymm0, YMMWORD PTR [r9]
  39798. vmovdqu ymm1, YMMWORD PTR [r9+32]
  39799. vmovdqu ymm2, YMMWORD PTR [r9+64]
  39800. vmovdqu ymm3, YMMWORD PTR [r9+96]
  39801. vpand ymm0, ymm0, ymm12
  39802. vpand ymm1, ymm1, ymm12
  39803. vpand ymm2, ymm2, ymm12
  39804. vpand ymm3, ymm3, ymm12
  39805. vpor ymm4, ymm4, ymm0
  39806. vpor ymm5, ymm5, ymm1
  39807. vpor ymm6, ymm6, ymm2
  39808. vpor ymm7, ymm7, ymm3
  39809. vpaddd ymm13, ymm13, ymm11
  39810. vmovdqu YMMWORD PTR [rcx], ymm4
  39811. vmovdqu YMMWORD PTR [rcx+32], ymm5
  39812. vmovdqu YMMWORD PTR [rcx+64], ymm6
  39813. vmovdqu YMMWORD PTR [rcx+96], ymm7
  39814. add rcx, 128
  39815. ; END: 16-31
  39816. ; START: 32-47
  39817. vpxor ymm13, ymm13, ymm13
  39818. vpxor ymm4, ymm4, ymm4
  39819. vpxor ymm5, ymm5, ymm5
  39820. vpxor ymm6, ymm6, ymm6
  39821. vpxor ymm7, ymm7, ymm7
  39822. ; ENTRY: 0
  39823. mov r9, QWORD PTR [rdx]
  39824. add r9, 256
  39825. vpcmpeqd ymm12, ymm13, ymm10
  39826. vmovdqu ymm0, YMMWORD PTR [r9]
  39827. vmovdqu ymm1, YMMWORD PTR [r9+32]
  39828. vmovdqu ymm2, YMMWORD PTR [r9+64]
  39829. vmovdqu ymm3, YMMWORD PTR [r9+96]
  39830. vpand ymm0, ymm0, ymm12
  39831. vpand ymm1, ymm1, ymm12
  39832. vpand ymm2, ymm2, ymm12
  39833. vpand ymm3, ymm3, ymm12
  39834. vpor ymm4, ymm4, ymm0
  39835. vpor ymm5, ymm5, ymm1
  39836. vpor ymm6, ymm6, ymm2
  39837. vpor ymm7, ymm7, ymm3
  39838. vpaddd ymm13, ymm13, ymm11
  39839. ; ENTRY: 1
  39840. mov r9, QWORD PTR [rdx+8]
  39841. add r9, 256
  39842. vpcmpeqd ymm12, ymm13, ymm10
  39843. vmovdqu ymm0, YMMWORD PTR [r9]
  39844. vmovdqu ymm1, YMMWORD PTR [r9+32]
  39845. vmovdqu ymm2, YMMWORD PTR [r9+64]
  39846. vmovdqu ymm3, YMMWORD PTR [r9+96]
  39847. vpand ymm0, ymm0, ymm12
  39848. vpand ymm1, ymm1, ymm12
  39849. vpand ymm2, ymm2, ymm12
  39850. vpand ymm3, ymm3, ymm12
  39851. vpor ymm4, ymm4, ymm0
  39852. vpor ymm5, ymm5, ymm1
  39853. vpor ymm6, ymm6, ymm2
  39854. vpor ymm7, ymm7, ymm3
  39855. vpaddd ymm13, ymm13, ymm11
  39856. ; ENTRY: 2
  39857. mov r9, QWORD PTR [rdx+16]
  39858. add r9, 256
  39859. vpcmpeqd ymm12, ymm13, ymm10
  39860. vmovdqu ymm0, YMMWORD PTR [r9]
  39861. vmovdqu ymm1, YMMWORD PTR [r9+32]
  39862. vmovdqu ymm2, YMMWORD PTR [r9+64]
  39863. vmovdqu ymm3, YMMWORD PTR [r9+96]
  39864. vpand ymm0, ymm0, ymm12
  39865. vpand ymm1, ymm1, ymm12
  39866. vpand ymm2, ymm2, ymm12
  39867. vpand ymm3, ymm3, ymm12
  39868. vpor ymm4, ymm4, ymm0
  39869. vpor ymm5, ymm5, ymm1
  39870. vpor ymm6, ymm6, ymm2
  39871. vpor ymm7, ymm7, ymm3
  39872. vpaddd ymm13, ymm13, ymm11
  39873. ; ENTRY: 3
  39874. mov r9, QWORD PTR [rdx+24]
  39875. add r9, 256
  39876. vpcmpeqd ymm12, ymm13, ymm10
  39877. vmovdqu ymm0, YMMWORD PTR [r9]
  39878. vmovdqu ymm1, YMMWORD PTR [r9+32]
  39879. vmovdqu ymm2, YMMWORD PTR [r9+64]
  39880. vmovdqu ymm3, YMMWORD PTR [r9+96]
  39881. vpand ymm0, ymm0, ymm12
  39882. vpand ymm1, ymm1, ymm12
  39883. vpand ymm2, ymm2, ymm12
  39884. vpand ymm3, ymm3, ymm12
  39885. vpor ymm4, ymm4, ymm0
  39886. vpor ymm5, ymm5, ymm1
  39887. vpor ymm6, ymm6, ymm2
  39888. vpor ymm7, ymm7, ymm3
  39889. vpaddd ymm13, ymm13, ymm11
  39890. ; ENTRY: 4
  39891. mov r9, QWORD PTR [rdx+32]
  39892. add r9, 256
  39893. vpcmpeqd ymm12, ymm13, ymm10
  39894. vmovdqu ymm0, YMMWORD PTR [r9]
  39895. vmovdqu ymm1, YMMWORD PTR [r9+32]
  39896. vmovdqu ymm2, YMMWORD PTR [r9+64]
  39897. vmovdqu ymm3, YMMWORD PTR [r9+96]
  39898. vpand ymm0, ymm0, ymm12
  39899. vpand ymm1, ymm1, ymm12
  39900. vpand ymm2, ymm2, ymm12
  39901. vpand ymm3, ymm3, ymm12
  39902. vpor ymm4, ymm4, ymm0
  39903. vpor ymm5, ymm5, ymm1
  39904. vpor ymm6, ymm6, ymm2
  39905. vpor ymm7, ymm7, ymm3
  39906. vpaddd ymm13, ymm13, ymm11
  39907. ; ENTRY: 5
  39908. mov r9, QWORD PTR [rdx+40]
  39909. add r9, 256
  39910. vpcmpeqd ymm12, ymm13, ymm10
  39911. vmovdqu ymm0, YMMWORD PTR [r9]
  39912. vmovdqu ymm1, YMMWORD PTR [r9+32]
  39913. vmovdqu ymm2, YMMWORD PTR [r9+64]
  39914. vmovdqu ymm3, YMMWORD PTR [r9+96]
  39915. vpand ymm0, ymm0, ymm12
  39916. vpand ymm1, ymm1, ymm12
  39917. vpand ymm2, ymm2, ymm12
  39918. vpand ymm3, ymm3, ymm12
  39919. vpor ymm4, ymm4, ymm0
  39920. vpor ymm5, ymm5, ymm1
  39921. vpor ymm6, ymm6, ymm2
  39922. vpor ymm7, ymm7, ymm3
  39923. vpaddd ymm13, ymm13, ymm11
  39924. ; ENTRY: 6
  39925. mov r9, QWORD PTR [rdx+48]
  39926. add r9, 256
  39927. vpcmpeqd ymm12, ymm13, ymm10
  39928. vmovdqu ymm0, YMMWORD PTR [r9]
  39929. vmovdqu ymm1, YMMWORD PTR [r9+32]
  39930. vmovdqu ymm2, YMMWORD PTR [r9+64]
  39931. vmovdqu ymm3, YMMWORD PTR [r9+96]
  39932. vpand ymm0, ymm0, ymm12
  39933. vpand ymm1, ymm1, ymm12
  39934. vpand ymm2, ymm2, ymm12
  39935. vpand ymm3, ymm3, ymm12
  39936. vpor ymm4, ymm4, ymm0
  39937. vpor ymm5, ymm5, ymm1
  39938. vpor ymm6, ymm6, ymm2
  39939. vpor ymm7, ymm7, ymm3
  39940. vpaddd ymm13, ymm13, ymm11
  39941. ; ENTRY: 7
  39942. mov r9, QWORD PTR [rdx+56]
  39943. add r9, 256
  39944. vpcmpeqd ymm12, ymm13, ymm10
  39945. vmovdqu ymm0, YMMWORD PTR [r9]
  39946. vmovdqu ymm1, YMMWORD PTR [r9+32]
  39947. vmovdqu ymm2, YMMWORD PTR [r9+64]
  39948. vmovdqu ymm3, YMMWORD PTR [r9+96]
  39949. vpand ymm0, ymm0, ymm12
  39950. vpand ymm1, ymm1, ymm12
  39951. vpand ymm2, ymm2, ymm12
  39952. vpand ymm3, ymm3, ymm12
  39953. vpor ymm4, ymm4, ymm0
  39954. vpor ymm5, ymm5, ymm1
  39955. vpor ymm6, ymm6, ymm2
  39956. vpor ymm7, ymm7, ymm3
  39957. vpaddd ymm13, ymm13, ymm11
  39958. ; ENTRY: 8
  39959. mov r9, QWORD PTR [rdx+64]
  39960. add r9, 256
  39961. vpcmpeqd ymm12, ymm13, ymm10
  39962. vmovdqu ymm0, YMMWORD PTR [r9]
  39963. vmovdqu ymm1, YMMWORD PTR [r9+32]
  39964. vmovdqu ymm2, YMMWORD PTR [r9+64]
  39965. vmovdqu ymm3, YMMWORD PTR [r9+96]
  39966. vpand ymm0, ymm0, ymm12
  39967. vpand ymm1, ymm1, ymm12
  39968. vpand ymm2, ymm2, ymm12
  39969. vpand ymm3, ymm3, ymm12
  39970. vpor ymm4, ymm4, ymm0
  39971. vpor ymm5, ymm5, ymm1
  39972. vpor ymm6, ymm6, ymm2
  39973. vpor ymm7, ymm7, ymm3
  39974. vpaddd ymm13, ymm13, ymm11
  39975. ; ENTRY: 9
  39976. mov r9, QWORD PTR [rdx+72]
  39977. add r9, 256
  39978. vpcmpeqd ymm12, ymm13, ymm10
  39979. vmovdqu ymm0, YMMWORD PTR [r9]
  39980. vmovdqu ymm1, YMMWORD PTR [r9+32]
  39981. vmovdqu ymm2, YMMWORD PTR [r9+64]
  39982. vmovdqu ymm3, YMMWORD PTR [r9+96]
  39983. vpand ymm0, ymm0, ymm12
  39984. vpand ymm1, ymm1, ymm12
  39985. vpand ymm2, ymm2, ymm12
  39986. vpand ymm3, ymm3, ymm12
  39987. vpor ymm4, ymm4, ymm0
  39988. vpor ymm5, ymm5, ymm1
  39989. vpor ymm6, ymm6, ymm2
  39990. vpor ymm7, ymm7, ymm3
  39991. vpaddd ymm13, ymm13, ymm11
  39992. ; ENTRY: 10
  39993. mov r9, QWORD PTR [rdx+80]
  39994. add r9, 256
  39995. vpcmpeqd ymm12, ymm13, ymm10
  39996. vmovdqu ymm0, YMMWORD PTR [r9]
  39997. vmovdqu ymm1, YMMWORD PTR [r9+32]
  39998. vmovdqu ymm2, YMMWORD PTR [r9+64]
  39999. vmovdqu ymm3, YMMWORD PTR [r9+96]
  40000. vpand ymm0, ymm0, ymm12
  40001. vpand ymm1, ymm1, ymm12
  40002. vpand ymm2, ymm2, ymm12
  40003. vpand ymm3, ymm3, ymm12
  40004. vpor ymm4, ymm4, ymm0
  40005. vpor ymm5, ymm5, ymm1
  40006. vpor ymm6, ymm6, ymm2
  40007. vpor ymm7, ymm7, ymm3
  40008. vpaddd ymm13, ymm13, ymm11
  40009. ; ENTRY: 11
  40010. mov r9, QWORD PTR [rdx+88]
  40011. add r9, 256
  40012. vpcmpeqd ymm12, ymm13, ymm10
  40013. vmovdqu ymm0, YMMWORD PTR [r9]
  40014. vmovdqu ymm1, YMMWORD PTR [r9+32]
  40015. vmovdqu ymm2, YMMWORD PTR [r9+64]
  40016. vmovdqu ymm3, YMMWORD PTR [r9+96]
  40017. vpand ymm0, ymm0, ymm12
  40018. vpand ymm1, ymm1, ymm12
  40019. vpand ymm2, ymm2, ymm12
  40020. vpand ymm3, ymm3, ymm12
  40021. vpor ymm4, ymm4, ymm0
  40022. vpor ymm5, ymm5, ymm1
  40023. vpor ymm6, ymm6, ymm2
  40024. vpor ymm7, ymm7, ymm3
  40025. vpaddd ymm13, ymm13, ymm11
  40026. ; ENTRY: 12
  40027. mov r9, QWORD PTR [rdx+96]
  40028. add r9, 256
  40029. vpcmpeqd ymm12, ymm13, ymm10
  40030. vmovdqu ymm0, YMMWORD PTR [r9]
  40031. vmovdqu ymm1, YMMWORD PTR [r9+32]
  40032. vmovdqu ymm2, YMMWORD PTR [r9+64]
  40033. vmovdqu ymm3, YMMWORD PTR [r9+96]
  40034. vpand ymm0, ymm0, ymm12
  40035. vpand ymm1, ymm1, ymm12
  40036. vpand ymm2, ymm2, ymm12
  40037. vpand ymm3, ymm3, ymm12
  40038. vpor ymm4, ymm4, ymm0
  40039. vpor ymm5, ymm5, ymm1
  40040. vpor ymm6, ymm6, ymm2
  40041. vpor ymm7, ymm7, ymm3
  40042. vpaddd ymm13, ymm13, ymm11
  40043. ; ENTRY: 13
  40044. mov r9, QWORD PTR [rdx+104]
  40045. add r9, 256
  40046. vpcmpeqd ymm12, ymm13, ymm10
  40047. vmovdqu ymm0, YMMWORD PTR [r9]
  40048. vmovdqu ymm1, YMMWORD PTR [r9+32]
  40049. vmovdqu ymm2, YMMWORD PTR [r9+64]
  40050. vmovdqu ymm3, YMMWORD PTR [r9+96]
  40051. vpand ymm0, ymm0, ymm12
  40052. vpand ymm1, ymm1, ymm12
  40053. vpand ymm2, ymm2, ymm12
  40054. vpand ymm3, ymm3, ymm12
  40055. vpor ymm4, ymm4, ymm0
  40056. vpor ymm5, ymm5, ymm1
  40057. vpor ymm6, ymm6, ymm2
  40058. vpor ymm7, ymm7, ymm3
  40059. vpaddd ymm13, ymm13, ymm11
  40060. ; ENTRY: 14
  40061. mov r9, QWORD PTR [rdx+112]
  40062. add r9, 256
  40063. vpcmpeqd ymm12, ymm13, ymm10
  40064. vmovdqu ymm0, YMMWORD PTR [r9]
  40065. vmovdqu ymm1, YMMWORD PTR [r9+32]
  40066. vmovdqu ymm2, YMMWORD PTR [r9+64]
  40067. vmovdqu ymm3, YMMWORD PTR [r9+96]
  40068. vpand ymm0, ymm0, ymm12
  40069. vpand ymm1, ymm1, ymm12
  40070. vpand ymm2, ymm2, ymm12
  40071. vpand ymm3, ymm3, ymm12
  40072. vpor ymm4, ymm4, ymm0
  40073. vpor ymm5, ymm5, ymm1
  40074. vpor ymm6, ymm6, ymm2
  40075. vpor ymm7, ymm7, ymm3
  40076. vpaddd ymm13, ymm13, ymm11
  40077. ; ENTRY: 15
  40078. mov r9, QWORD PTR [rdx+120]
  40079. add r9, 256
  40080. vpcmpeqd ymm12, ymm13, ymm10
  40081. vmovdqu ymm0, YMMWORD PTR [r9]
  40082. vmovdqu ymm1, YMMWORD PTR [r9+32]
  40083. vmovdqu ymm2, YMMWORD PTR [r9+64]
  40084. vmovdqu ymm3, YMMWORD PTR [r9+96]
  40085. vpand ymm0, ymm0, ymm12
  40086. vpand ymm1, ymm1, ymm12
  40087. vpand ymm2, ymm2, ymm12
  40088. vpand ymm3, ymm3, ymm12
  40089. vpor ymm4, ymm4, ymm0
  40090. vpor ymm5, ymm5, ymm1
  40091. vpor ymm6, ymm6, ymm2
  40092. vpor ymm7, ymm7, ymm3
  40093. vpaddd ymm13, ymm13, ymm11
  40094. vmovdqu YMMWORD PTR [rcx], ymm4
  40095. vmovdqu YMMWORD PTR [rcx+32], ymm5
  40096. vmovdqu YMMWORD PTR [rcx+64], ymm6
  40097. vmovdqu YMMWORD PTR [rcx+96], ymm7
  40098. ; END: 32-47
  40099. vmovdqu xmm6, OWORD PTR [rsp]
  40100. vmovdqu xmm7, OWORD PTR [rsp+16]
  40101. vmovdqu xmm8, OWORD PTR [rsp+32]
  40102. vmovdqu xmm9, OWORD PTR [rsp+48]
  40103. vmovdqu xmm10, OWORD PTR [rsp+64]
  40104. vmovdqu xmm11, OWORD PTR [rsp+80]
  40105. vmovdqu xmm12, OWORD PTR [rsp+96]
  40106. vmovdqu xmm13, OWORD PTR [rsp+112]
  40107. add rsp, 128
  40108. ret
  40109. sp_3072_get_from_table_avx2_48 ENDP
  40110. _text ENDS
  40111. ENDIF
  40112. ; /* Conditionally add a and b using the mask m.
  40113. ; * m is -1 to add and 0 when not.
  40114. ; *
  40115. ; * r A single precision number representing conditional add result.
  40116. ; * a A single precision number to add with.
  40117. ; * b A single precision number to add.
  40118. ; * m Mask value to apply.
  40119. ; */
  40120. _text SEGMENT READONLY PARA
  40121. sp_3072_cond_add_24 PROC
  40122. sub rsp, 192
  40123. mov rax, 0
  40124. mov r10, QWORD PTR [r8]
  40125. mov r11, QWORD PTR [r8+8]
  40126. and r10, r9
  40127. and r11, r9
  40128. mov QWORD PTR [rsp], r10
  40129. mov QWORD PTR [rsp+8], r11
  40130. mov r10, QWORD PTR [r8+16]
  40131. mov r11, QWORD PTR [r8+24]
  40132. and r10, r9
  40133. and r11, r9
  40134. mov QWORD PTR [rsp+16], r10
  40135. mov QWORD PTR [rsp+24], r11
  40136. mov r10, QWORD PTR [r8+32]
  40137. mov r11, QWORD PTR [r8+40]
  40138. and r10, r9
  40139. and r11, r9
  40140. mov QWORD PTR [rsp+32], r10
  40141. mov QWORD PTR [rsp+40], r11
  40142. mov r10, QWORD PTR [r8+48]
  40143. mov r11, QWORD PTR [r8+56]
  40144. and r10, r9
  40145. and r11, r9
  40146. mov QWORD PTR [rsp+48], r10
  40147. mov QWORD PTR [rsp+56], r11
  40148. mov r10, QWORD PTR [r8+64]
  40149. mov r11, QWORD PTR [r8+72]
  40150. and r10, r9
  40151. and r11, r9
  40152. mov QWORD PTR [rsp+64], r10
  40153. mov QWORD PTR [rsp+72], r11
  40154. mov r10, QWORD PTR [r8+80]
  40155. mov r11, QWORD PTR [r8+88]
  40156. and r10, r9
  40157. and r11, r9
  40158. mov QWORD PTR [rsp+80], r10
  40159. mov QWORD PTR [rsp+88], r11
  40160. mov r10, QWORD PTR [r8+96]
  40161. mov r11, QWORD PTR [r8+104]
  40162. and r10, r9
  40163. and r11, r9
  40164. mov QWORD PTR [rsp+96], r10
  40165. mov QWORD PTR [rsp+104], r11
  40166. mov r10, QWORD PTR [r8+112]
  40167. mov r11, QWORD PTR [r8+120]
  40168. and r10, r9
  40169. and r11, r9
  40170. mov QWORD PTR [rsp+112], r10
  40171. mov QWORD PTR [rsp+120], r11
  40172. mov r10, QWORD PTR [r8+128]
  40173. mov r11, QWORD PTR [r8+136]
  40174. and r10, r9
  40175. and r11, r9
  40176. mov QWORD PTR [rsp+128], r10
  40177. mov QWORD PTR [rsp+136], r11
  40178. mov r10, QWORD PTR [r8+144]
  40179. mov r11, QWORD PTR [r8+152]
  40180. and r10, r9
  40181. and r11, r9
  40182. mov QWORD PTR [rsp+144], r10
  40183. mov QWORD PTR [rsp+152], r11
  40184. mov r10, QWORD PTR [r8+160]
  40185. mov r11, QWORD PTR [r8+168]
  40186. and r10, r9
  40187. and r11, r9
  40188. mov QWORD PTR [rsp+160], r10
  40189. mov QWORD PTR [rsp+168], r11
  40190. mov r10, QWORD PTR [r8+176]
  40191. mov r11, QWORD PTR [r8+184]
  40192. and r10, r9
  40193. and r11, r9
  40194. mov QWORD PTR [rsp+176], r10
  40195. mov QWORD PTR [rsp+184], r11
  40196. mov r10, QWORD PTR [rdx]
  40197. mov r8, QWORD PTR [rsp]
  40198. add r10, r8
  40199. mov r11, QWORD PTR [rdx+8]
  40200. mov r8, QWORD PTR [rsp+8]
  40201. adc r11, r8
  40202. mov QWORD PTR [rcx], r10
  40203. mov r10, QWORD PTR [rdx+16]
  40204. mov r8, QWORD PTR [rsp+16]
  40205. adc r10, r8
  40206. mov QWORD PTR [rcx+8], r11
  40207. mov r11, QWORD PTR [rdx+24]
  40208. mov r8, QWORD PTR [rsp+24]
  40209. adc r11, r8
  40210. mov QWORD PTR [rcx+16], r10
  40211. mov r10, QWORD PTR [rdx+32]
  40212. mov r8, QWORD PTR [rsp+32]
  40213. adc r10, r8
  40214. mov QWORD PTR [rcx+24], r11
  40215. mov r11, QWORD PTR [rdx+40]
  40216. mov r8, QWORD PTR [rsp+40]
  40217. adc r11, r8
  40218. mov QWORD PTR [rcx+32], r10
  40219. mov r10, QWORD PTR [rdx+48]
  40220. mov r8, QWORD PTR [rsp+48]
  40221. adc r10, r8
  40222. mov QWORD PTR [rcx+40], r11
  40223. mov r11, QWORD PTR [rdx+56]
  40224. mov r8, QWORD PTR [rsp+56]
  40225. adc r11, r8
  40226. mov QWORD PTR [rcx+48], r10
  40227. mov r10, QWORD PTR [rdx+64]
  40228. mov r8, QWORD PTR [rsp+64]
  40229. adc r10, r8
  40230. mov QWORD PTR [rcx+56], r11
  40231. mov r11, QWORD PTR [rdx+72]
  40232. mov r8, QWORD PTR [rsp+72]
  40233. adc r11, r8
  40234. mov QWORD PTR [rcx+64], r10
  40235. mov r10, QWORD PTR [rdx+80]
  40236. mov r8, QWORD PTR [rsp+80]
  40237. adc r10, r8
  40238. mov QWORD PTR [rcx+72], r11
  40239. mov r11, QWORD PTR [rdx+88]
  40240. mov r8, QWORD PTR [rsp+88]
  40241. adc r11, r8
  40242. mov QWORD PTR [rcx+80], r10
  40243. mov r10, QWORD PTR [rdx+96]
  40244. mov r8, QWORD PTR [rsp+96]
  40245. adc r10, r8
  40246. mov QWORD PTR [rcx+88], r11
  40247. mov r11, QWORD PTR [rdx+104]
  40248. mov r8, QWORD PTR [rsp+104]
  40249. adc r11, r8
  40250. mov QWORD PTR [rcx+96], r10
  40251. mov r10, QWORD PTR [rdx+112]
  40252. mov r8, QWORD PTR [rsp+112]
  40253. adc r10, r8
  40254. mov QWORD PTR [rcx+104], r11
  40255. mov r11, QWORD PTR [rdx+120]
  40256. mov r8, QWORD PTR [rsp+120]
  40257. adc r11, r8
  40258. mov QWORD PTR [rcx+112], r10
  40259. mov r10, QWORD PTR [rdx+128]
  40260. mov r8, QWORD PTR [rsp+128]
  40261. adc r10, r8
  40262. mov QWORD PTR [rcx+120], r11
  40263. mov r11, QWORD PTR [rdx+136]
  40264. mov r8, QWORD PTR [rsp+136]
  40265. adc r11, r8
  40266. mov QWORD PTR [rcx+128], r10
  40267. mov r10, QWORD PTR [rdx+144]
  40268. mov r8, QWORD PTR [rsp+144]
  40269. adc r10, r8
  40270. mov QWORD PTR [rcx+136], r11
  40271. mov r11, QWORD PTR [rdx+152]
  40272. mov r8, QWORD PTR [rsp+152]
  40273. adc r11, r8
  40274. mov QWORD PTR [rcx+144], r10
  40275. mov r10, QWORD PTR [rdx+160]
  40276. mov r8, QWORD PTR [rsp+160]
  40277. adc r10, r8
  40278. mov QWORD PTR [rcx+152], r11
  40279. mov r11, QWORD PTR [rdx+168]
  40280. mov r8, QWORD PTR [rsp+168]
  40281. adc r11, r8
  40282. mov QWORD PTR [rcx+160], r10
  40283. mov r10, QWORD PTR [rdx+176]
  40284. mov r8, QWORD PTR [rsp+176]
  40285. adc r10, r8
  40286. mov QWORD PTR [rcx+168], r11
  40287. mov r11, QWORD PTR [rdx+184]
  40288. mov r8, QWORD PTR [rsp+184]
  40289. adc r11, r8
  40290. mov QWORD PTR [rcx+176], r10
  40291. mov QWORD PTR [rcx+184], r11
  40292. adc rax, 0
  40293. add rsp, 192
  40294. ret
  40295. sp_3072_cond_add_24 ENDP
  40296. _text ENDS
  40297. IFDEF HAVE_INTEL_AVX2
  40298. ; /* Conditionally add a and b using the mask m.
  40299. ; * m is -1 to add and 0 when not.
  40300. ; *
  40301. ; * r A single precision number representing conditional add result.
  40302. ; * a A single precision number to add with.
  40303. ; * b A single precision number to add.
  40304. ; * m Mask value to apply.
  40305. ; */
  40306. _text SEGMENT READONLY PARA
  40307. sp_3072_cond_add_avx2_24 PROC
  40308. push r12
  40309. mov rax, 0
  40310. mov r12, QWORD PTR [r8]
  40311. mov r10, QWORD PTR [rdx]
  40312. pext r12, r12, r9
  40313. add r10, r12
  40314. mov r12, QWORD PTR [r8+8]
  40315. mov r11, QWORD PTR [rdx+8]
  40316. pext r12, r12, r9
  40317. mov QWORD PTR [rcx], r10
  40318. adc r11, r12
  40319. mov r10, QWORD PTR [r8+16]
  40320. mov r12, QWORD PTR [rdx+16]
  40321. pext r10, r10, r9
  40322. mov QWORD PTR [rcx+8], r11
  40323. adc r12, r10
  40324. mov r11, QWORD PTR [r8+24]
  40325. mov r10, QWORD PTR [rdx+24]
  40326. pext r11, r11, r9
  40327. mov QWORD PTR [rcx+16], r12
  40328. adc r10, r11
  40329. mov r12, QWORD PTR [r8+32]
  40330. mov r11, QWORD PTR [rdx+32]
  40331. pext r12, r12, r9
  40332. mov QWORD PTR [rcx+24], r10
  40333. adc r11, r12
  40334. mov r10, QWORD PTR [r8+40]
  40335. mov r12, QWORD PTR [rdx+40]
  40336. pext r10, r10, r9
  40337. mov QWORD PTR [rcx+32], r11
  40338. adc r12, r10
  40339. mov r11, QWORD PTR [r8+48]
  40340. mov r10, QWORD PTR [rdx+48]
  40341. pext r11, r11, r9
  40342. mov QWORD PTR [rcx+40], r12
  40343. adc r10, r11
  40344. mov r12, QWORD PTR [r8+56]
  40345. mov r11, QWORD PTR [rdx+56]
  40346. pext r12, r12, r9
  40347. mov QWORD PTR [rcx+48], r10
  40348. adc r11, r12
  40349. mov r10, QWORD PTR [r8+64]
  40350. mov r12, QWORD PTR [rdx+64]
  40351. pext r10, r10, r9
  40352. mov QWORD PTR [rcx+56], r11
  40353. adc r12, r10
  40354. mov r11, QWORD PTR [r8+72]
  40355. mov r10, QWORD PTR [rdx+72]
  40356. pext r11, r11, r9
  40357. mov QWORD PTR [rcx+64], r12
  40358. adc r10, r11
  40359. mov r12, QWORD PTR [r8+80]
  40360. mov r11, QWORD PTR [rdx+80]
  40361. pext r12, r12, r9
  40362. mov QWORD PTR [rcx+72], r10
  40363. adc r11, r12
  40364. mov r10, QWORD PTR [r8+88]
  40365. mov r12, QWORD PTR [rdx+88]
  40366. pext r10, r10, r9
  40367. mov QWORD PTR [rcx+80], r11
  40368. adc r12, r10
  40369. mov r11, QWORD PTR [r8+96]
  40370. mov r10, QWORD PTR [rdx+96]
  40371. pext r11, r11, r9
  40372. mov QWORD PTR [rcx+88], r12
  40373. adc r10, r11
  40374. mov r12, QWORD PTR [r8+104]
  40375. mov r11, QWORD PTR [rdx+104]
  40376. pext r12, r12, r9
  40377. mov QWORD PTR [rcx+96], r10
  40378. adc r11, r12
  40379. mov r10, QWORD PTR [r8+112]
  40380. mov r12, QWORD PTR [rdx+112]
  40381. pext r10, r10, r9
  40382. mov QWORD PTR [rcx+104], r11
  40383. adc r12, r10
  40384. mov r11, QWORD PTR [r8+120]
  40385. mov r10, QWORD PTR [rdx+120]
  40386. pext r11, r11, r9
  40387. mov QWORD PTR [rcx+112], r12
  40388. adc r10, r11
  40389. mov r12, QWORD PTR [r8+128]
  40390. mov r11, QWORD PTR [rdx+128]
  40391. pext r12, r12, r9
  40392. mov QWORD PTR [rcx+120], r10
  40393. adc r11, r12
  40394. mov r10, QWORD PTR [r8+136]
  40395. mov r12, QWORD PTR [rdx+136]
  40396. pext r10, r10, r9
  40397. mov QWORD PTR [rcx+128], r11
  40398. adc r12, r10
  40399. mov r11, QWORD PTR [r8+144]
  40400. mov r10, QWORD PTR [rdx+144]
  40401. pext r11, r11, r9
  40402. mov QWORD PTR [rcx+136], r12
  40403. adc r10, r11
  40404. mov r12, QWORD PTR [r8+152]
  40405. mov r11, QWORD PTR [rdx+152]
  40406. pext r12, r12, r9
  40407. mov QWORD PTR [rcx+144], r10
  40408. adc r11, r12
  40409. mov r10, QWORD PTR [r8+160]
  40410. mov r12, QWORD PTR [rdx+160]
  40411. pext r10, r10, r9
  40412. mov QWORD PTR [rcx+152], r11
  40413. adc r12, r10
  40414. mov r11, QWORD PTR [r8+168]
  40415. mov r10, QWORD PTR [rdx+168]
  40416. pext r11, r11, r9
  40417. mov QWORD PTR [rcx+160], r12
  40418. adc r10, r11
  40419. mov r12, QWORD PTR [r8+176]
  40420. mov r11, QWORD PTR [rdx+176]
  40421. pext r12, r12, r9
  40422. mov QWORD PTR [rcx+168], r10
  40423. adc r11, r12
  40424. mov r10, QWORD PTR [r8+184]
  40425. mov r12, QWORD PTR [rdx+184]
  40426. pext r10, r10, r9
  40427. mov QWORD PTR [rcx+176], r11
  40428. adc r12, r10
  40429. mov QWORD PTR [rcx+184], r12
  40430. adc rax, 0
  40431. pop r12
  40432. ret
  40433. sp_3072_cond_add_avx2_24 ENDP
  40434. _text ENDS
  40435. ENDIF
  40436. ; /* Shift number left by n bit. (r = a << n)
  40437. ; *
  40438. ; * r Result of left shift by n.
  40439. ; * a Number to shift.
  40440. ; * n Amoutnt o shift.
  40441. ; */
  40442. _text SEGMENT READONLY PARA
  40443. sp_3072_lshift_48 PROC
  40444. push r12
  40445. push r13
  40446. mov cl, r8b
  40447. mov rax, rcx
  40448. mov r12, 0
  40449. mov r13, QWORD PTR [rdx+344]
  40450. mov r8, QWORD PTR [rdx+352]
  40451. mov r9, QWORD PTR [rdx+360]
  40452. mov r10, QWORD PTR [rdx+368]
  40453. mov r11, QWORD PTR [rdx+376]
  40454. shld r12, r11, cl
  40455. shld r11, r10, cl
  40456. shld r10, r9, cl
  40457. shld r9, r8, cl
  40458. shld r8, r13, cl
  40459. mov QWORD PTR [rax+352], r8
  40460. mov QWORD PTR [rax+360], r9
  40461. mov QWORD PTR [rax+368], r10
  40462. mov QWORD PTR [rax+376], r11
  40463. mov QWORD PTR [rax+384], r12
  40464. mov r11, QWORD PTR [rdx+312]
  40465. mov r8, QWORD PTR [rdx+320]
  40466. mov r9, QWORD PTR [rdx+328]
  40467. mov r10, QWORD PTR [rdx+336]
  40468. shld r13, r10, cl
  40469. shld r10, r9, cl
  40470. shld r9, r8, cl
  40471. shld r8, r11, cl
  40472. mov QWORD PTR [rax+320], r8
  40473. mov QWORD PTR [rax+328], r9
  40474. mov QWORD PTR [rax+336], r10
  40475. mov QWORD PTR [rax+344], r13
  40476. mov r13, QWORD PTR [rdx+280]
  40477. mov r8, QWORD PTR [rdx+288]
  40478. mov r9, QWORD PTR [rdx+296]
  40479. mov r10, QWORD PTR [rdx+304]
  40480. shld r11, r10, cl
  40481. shld r10, r9, cl
  40482. shld r9, r8, cl
  40483. shld r8, r13, cl
  40484. mov QWORD PTR [rax+288], r8
  40485. mov QWORD PTR [rax+296], r9
  40486. mov QWORD PTR [rax+304], r10
  40487. mov QWORD PTR [rax+312], r11
  40488. mov r11, QWORD PTR [rdx+248]
  40489. mov r8, QWORD PTR [rdx+256]
  40490. mov r9, QWORD PTR [rdx+264]
  40491. mov r10, QWORD PTR [rdx+272]
  40492. shld r13, r10, cl
  40493. shld r10, r9, cl
  40494. shld r9, r8, cl
  40495. shld r8, r11, cl
  40496. mov QWORD PTR [rax+256], r8
  40497. mov QWORD PTR [rax+264], r9
  40498. mov QWORD PTR [rax+272], r10
  40499. mov QWORD PTR [rax+280], r13
  40500. mov r13, QWORD PTR [rdx+216]
  40501. mov r8, QWORD PTR [rdx+224]
  40502. mov r9, QWORD PTR [rdx+232]
  40503. mov r10, QWORD PTR [rdx+240]
  40504. shld r11, r10, cl
  40505. shld r10, r9, cl
  40506. shld r9, r8, cl
  40507. shld r8, r13, cl
  40508. mov QWORD PTR [rax+224], r8
  40509. mov QWORD PTR [rax+232], r9
  40510. mov QWORD PTR [rax+240], r10
  40511. mov QWORD PTR [rax+248], r11
  40512. mov r11, QWORD PTR [rdx+184]
  40513. mov r8, QWORD PTR [rdx+192]
  40514. mov r9, QWORD PTR [rdx+200]
  40515. mov r10, QWORD PTR [rdx+208]
  40516. shld r13, r10, cl
  40517. shld r10, r9, cl
  40518. shld r9, r8, cl
  40519. shld r8, r11, cl
  40520. mov QWORD PTR [rax+192], r8
  40521. mov QWORD PTR [rax+200], r9
  40522. mov QWORD PTR [rax+208], r10
  40523. mov QWORD PTR [rax+216], r13
  40524. mov r13, QWORD PTR [rdx+152]
  40525. mov r8, QWORD PTR [rdx+160]
  40526. mov r9, QWORD PTR [rdx+168]
  40527. mov r10, QWORD PTR [rdx+176]
  40528. shld r11, r10, cl
  40529. shld r10, r9, cl
  40530. shld r9, r8, cl
  40531. shld r8, r13, cl
  40532. mov QWORD PTR [rax+160], r8
  40533. mov QWORD PTR [rax+168], r9
  40534. mov QWORD PTR [rax+176], r10
  40535. mov QWORD PTR [rax+184], r11
  40536. mov r11, QWORD PTR [rdx+120]
  40537. mov r8, QWORD PTR [rdx+128]
  40538. mov r9, QWORD PTR [rdx+136]
  40539. mov r10, QWORD PTR [rdx+144]
  40540. shld r13, r10, cl
  40541. shld r10, r9, cl
  40542. shld r9, r8, cl
  40543. shld r8, r11, cl
  40544. mov QWORD PTR [rax+128], r8
  40545. mov QWORD PTR [rax+136], r9
  40546. mov QWORD PTR [rax+144], r10
  40547. mov QWORD PTR [rax+152], r13
  40548. mov r13, QWORD PTR [rdx+88]
  40549. mov r8, QWORD PTR [rdx+96]
  40550. mov r9, QWORD PTR [rdx+104]
  40551. mov r10, QWORD PTR [rdx+112]
  40552. shld r11, r10, cl
  40553. shld r10, r9, cl
  40554. shld r9, r8, cl
  40555. shld r8, r13, cl
  40556. mov QWORD PTR [rax+96], r8
  40557. mov QWORD PTR [rax+104], r9
  40558. mov QWORD PTR [rax+112], r10
  40559. mov QWORD PTR [rax+120], r11
  40560. mov r11, QWORD PTR [rdx+56]
  40561. mov r8, QWORD PTR [rdx+64]
  40562. mov r9, QWORD PTR [rdx+72]
  40563. mov r10, QWORD PTR [rdx+80]
  40564. shld r13, r10, cl
  40565. shld r10, r9, cl
  40566. shld r9, r8, cl
  40567. shld r8, r11, cl
  40568. mov QWORD PTR [rax+64], r8
  40569. mov QWORD PTR [rax+72], r9
  40570. mov QWORD PTR [rax+80], r10
  40571. mov QWORD PTR [rax+88], r13
  40572. mov r13, QWORD PTR [rdx+24]
  40573. mov r8, QWORD PTR [rdx+32]
  40574. mov r9, QWORD PTR [rdx+40]
  40575. mov r10, QWORD PTR [rdx+48]
  40576. shld r11, r10, cl
  40577. shld r10, r9, cl
  40578. shld r9, r8, cl
  40579. shld r8, r13, cl
  40580. mov QWORD PTR [rax+32], r8
  40581. mov QWORD PTR [rax+40], r9
  40582. mov QWORD PTR [rax+48], r10
  40583. mov QWORD PTR [rax+56], r11
  40584. mov r8, QWORD PTR [rdx]
  40585. mov r9, QWORD PTR [rdx+8]
  40586. mov r10, QWORD PTR [rdx+16]
  40587. shld r13, r10, cl
  40588. shld r10, r9, cl
  40589. shld r9, r8, cl
  40590. shl r8, cl
  40591. mov QWORD PTR [rax], r8
  40592. mov QWORD PTR [rax+8], r9
  40593. mov QWORD PTR [rax+16], r10
  40594. mov QWORD PTR [rax+24], r13
  40595. pop r13
  40596. pop r12
  40597. ret
  40598. sp_3072_lshift_48 ENDP
  40599. _text ENDS
  40600. ENDIF
  40601. ENDIF
  40602. IFDEF WOLFSSL_SP_4096
  40603. IFDEF WOLFSSL_SP_4096
  40604. ; /* Read big endian unsigned byte array into r.
  40605. ; * Uses the bswap instruction.
  40606. ; *
  40607. ; * r A single precision integer.
  40608. ; * size Maximum number of bytes to convert
  40609. ; * a Byte array.
  40610. ; * n Number of bytes in array to read.
  40611. ; */
  40612. _text SEGMENT READONLY PARA
  40613. sp_4096_from_bin_bswap PROC
  40614. push r12
  40615. push r13
  40616. mov r11, r8
  40617. mov r12, rcx
  40618. add r11, r9
  40619. add r12, 512
  40620. xor r13, r13
  40621. jmp L_4096_from_bin_bswap_64_end
  40622. L_4096_from_bin_bswap_64_start:
  40623. sub r11, 64
  40624. mov rax, QWORD PTR [r11+56]
  40625. mov r10, QWORD PTR [r11+48]
  40626. bswap rax
  40627. bswap r10
  40628. mov QWORD PTR [rcx], rax
  40629. mov QWORD PTR [rcx+8], r10
  40630. mov rax, QWORD PTR [r11+40]
  40631. mov r10, QWORD PTR [r11+32]
  40632. bswap rax
  40633. bswap r10
  40634. mov QWORD PTR [rcx+16], rax
  40635. mov QWORD PTR [rcx+24], r10
  40636. mov rax, QWORD PTR [r11+24]
  40637. mov r10, QWORD PTR [r11+16]
  40638. bswap rax
  40639. bswap r10
  40640. mov QWORD PTR [rcx+32], rax
  40641. mov QWORD PTR [rcx+40], r10
  40642. mov rax, QWORD PTR [r11+8]
  40643. mov r10, QWORD PTR [r11]
  40644. bswap rax
  40645. bswap r10
  40646. mov QWORD PTR [rcx+48], rax
  40647. mov QWORD PTR [rcx+56], r10
  40648. add rcx, 64
  40649. sub r9, 64
  40650. L_4096_from_bin_bswap_64_end:
  40651. cmp r9, 63
  40652. jg L_4096_from_bin_bswap_64_start
  40653. jmp L_4096_from_bin_bswap_8_end
  40654. L_4096_from_bin_bswap_8_start:
  40655. sub r11, 8
  40656. mov rax, QWORD PTR [r11]
  40657. bswap rax
  40658. mov QWORD PTR [rcx], rax
  40659. add rcx, 8
  40660. sub r9, 8
  40661. L_4096_from_bin_bswap_8_end:
  40662. cmp r9, 7
  40663. jg L_4096_from_bin_bswap_8_start
  40664. cmp r9, r13
  40665. je L_4096_from_bin_bswap_hi_end
  40666. mov r10, r13
  40667. mov rax, r13
  40668. L_4096_from_bin_bswap_hi_start:
  40669. mov al, BYTE PTR [r8]
  40670. shl r10, 8
  40671. inc r8
  40672. add r10, rax
  40673. dec r9
  40674. jg L_4096_from_bin_bswap_hi_start
  40675. mov QWORD PTR [rcx], r10
  40676. add rcx, 8
  40677. L_4096_from_bin_bswap_hi_end:
  40678. cmp rcx, r12
  40679. jge L_4096_from_bin_bswap_zero_end
  40680. L_4096_from_bin_bswap_zero_start:
  40681. mov QWORD PTR [rcx], r13
  40682. add rcx, 8
  40683. cmp rcx, r12
  40684. jl L_4096_from_bin_bswap_zero_start
  40685. L_4096_from_bin_bswap_zero_end:
  40686. pop r13
  40687. pop r12
  40688. ret
  40689. sp_4096_from_bin_bswap ENDP
  40690. _text ENDS
  40691. IFNDEF NO_MOVBE_SUPPORT
  40692. ; /* Read big endian unsigned byte array into r.
  40693. ; * Uses the movbe instruction which is an optional instruction.
  40694. ; *
  40695. ; * r A single precision integer.
  40696. ; * size Maximum number of bytes to convert
  40697. ; * a Byte array.
  40698. ; * n Number of bytes in array to read.
  40699. ; */
  40700. _text SEGMENT READONLY PARA
  40701. sp_4096_from_bin_movbe PROC
  40702. push r12
  40703. mov r11, r8
  40704. mov r12, rcx
  40705. add r11, r9
  40706. add r12, 512
  40707. jmp L_4096_from_bin_movbe_64_end
  40708. L_4096_from_bin_movbe_64_start:
  40709. sub r11, 64
  40710. movbe rax, QWORD PTR [r11+56]
  40711. movbe r10, QWORD PTR [r11+48]
  40712. mov QWORD PTR [rcx], rax
  40713. mov QWORD PTR [rcx+8], r10
  40714. movbe rax, QWORD PTR [r11+40]
  40715. movbe r10, QWORD PTR [r11+32]
  40716. mov QWORD PTR [rcx+16], rax
  40717. mov QWORD PTR [rcx+24], r10
  40718. movbe rax, QWORD PTR [r11+24]
  40719. movbe r10, QWORD PTR [r11+16]
  40720. mov QWORD PTR [rcx+32], rax
  40721. mov QWORD PTR [rcx+40], r10
  40722. movbe rax, QWORD PTR [r11+8]
  40723. movbe r10, QWORD PTR [r11]
  40724. mov QWORD PTR [rcx+48], rax
  40725. mov QWORD PTR [rcx+56], r10
  40726. add rcx, 64
  40727. sub r9, 64
  40728. L_4096_from_bin_movbe_64_end:
  40729. cmp r9, 63
  40730. jg L_4096_from_bin_movbe_64_start
  40731. jmp L_4096_from_bin_movbe_8_end
  40732. L_4096_from_bin_movbe_8_start:
  40733. sub r11, 8
  40734. movbe rax, QWORD PTR [r11]
  40735. mov QWORD PTR [rcx], rax
  40736. add rcx, 8
  40737. sub r9, 8
  40738. L_4096_from_bin_movbe_8_end:
  40739. cmp r9, 7
  40740. jg L_4096_from_bin_movbe_8_start
  40741. cmp r9, 0
  40742. je L_4096_from_bin_movbe_hi_end
  40743. mov r10, 0
  40744. mov rax, 0
  40745. L_4096_from_bin_movbe_hi_start:
  40746. mov al, BYTE PTR [r8]
  40747. shl r10, 8
  40748. inc r8
  40749. add r10, rax
  40750. dec r9
  40751. jg L_4096_from_bin_movbe_hi_start
  40752. mov QWORD PTR [rcx], r10
  40753. add rcx, 8
  40754. L_4096_from_bin_movbe_hi_end:
  40755. cmp rcx, r12
  40756. jge L_4096_from_bin_movbe_zero_end
  40757. L_4096_from_bin_movbe_zero_start:
  40758. mov QWORD PTR [rcx], 0
  40759. add rcx, 8
  40760. cmp rcx, r12
  40761. jl L_4096_from_bin_movbe_zero_start
  40762. L_4096_from_bin_movbe_zero_end:
  40763. pop r12
  40764. ret
  40765. sp_4096_from_bin_movbe ENDP
  40766. _text ENDS
  40767. ENDIF
  40768. ; /* Write r as big endian to byte array.
  40769. ; * Fixed length number of bytes written: 512
  40770. ; * Uses the bswap instruction.
  40771. ; *
  40772. ; * r A single precision integer.
  40773. ; * a Byte array.
  40774. ; */
  40775. _text SEGMENT READONLY PARA
  40776. sp_4096_to_bin_bswap_64 PROC
  40777. mov rax, QWORD PTR [rcx+504]
  40778. mov r8, QWORD PTR [rcx+496]
  40779. bswap rax
  40780. bswap r8
  40781. mov QWORD PTR [rdx], rax
  40782. mov QWORD PTR [rdx+8], r8
  40783. mov rax, QWORD PTR [rcx+488]
  40784. mov r8, QWORD PTR [rcx+480]
  40785. bswap rax
  40786. bswap r8
  40787. mov QWORD PTR [rdx+16], rax
  40788. mov QWORD PTR [rdx+24], r8
  40789. mov rax, QWORD PTR [rcx+472]
  40790. mov r8, QWORD PTR [rcx+464]
  40791. bswap rax
  40792. bswap r8
  40793. mov QWORD PTR [rdx+32], rax
  40794. mov QWORD PTR [rdx+40], r8
  40795. mov rax, QWORD PTR [rcx+456]
  40796. mov r8, QWORD PTR [rcx+448]
  40797. bswap rax
  40798. bswap r8
  40799. mov QWORD PTR [rdx+48], rax
  40800. mov QWORD PTR [rdx+56], r8
  40801. mov rax, QWORD PTR [rcx+440]
  40802. mov r8, QWORD PTR [rcx+432]
  40803. bswap rax
  40804. bswap r8
  40805. mov QWORD PTR [rdx+64], rax
  40806. mov QWORD PTR [rdx+72], r8
  40807. mov rax, QWORD PTR [rcx+424]
  40808. mov r8, QWORD PTR [rcx+416]
  40809. bswap rax
  40810. bswap r8
  40811. mov QWORD PTR [rdx+80], rax
  40812. mov QWORD PTR [rdx+88], r8
  40813. mov rax, QWORD PTR [rcx+408]
  40814. mov r8, QWORD PTR [rcx+400]
  40815. bswap rax
  40816. bswap r8
  40817. mov QWORD PTR [rdx+96], rax
  40818. mov QWORD PTR [rdx+104], r8
  40819. mov rax, QWORD PTR [rcx+392]
  40820. mov r8, QWORD PTR [rcx+384]
  40821. bswap rax
  40822. bswap r8
  40823. mov QWORD PTR [rdx+112], rax
  40824. mov QWORD PTR [rdx+120], r8
  40825. mov rax, QWORD PTR [rcx+376]
  40826. mov r8, QWORD PTR [rcx+368]
  40827. bswap rax
  40828. bswap r8
  40829. mov QWORD PTR [rdx+128], rax
  40830. mov QWORD PTR [rdx+136], r8
  40831. mov rax, QWORD PTR [rcx+360]
  40832. mov r8, QWORD PTR [rcx+352]
  40833. bswap rax
  40834. bswap r8
  40835. mov QWORD PTR [rdx+144], rax
  40836. mov QWORD PTR [rdx+152], r8
  40837. mov rax, QWORD PTR [rcx+344]
  40838. mov r8, QWORD PTR [rcx+336]
  40839. bswap rax
  40840. bswap r8
  40841. mov QWORD PTR [rdx+160], rax
  40842. mov QWORD PTR [rdx+168], r8
  40843. mov rax, QWORD PTR [rcx+328]
  40844. mov r8, QWORD PTR [rcx+320]
  40845. bswap rax
  40846. bswap r8
  40847. mov QWORD PTR [rdx+176], rax
  40848. mov QWORD PTR [rdx+184], r8
  40849. mov rax, QWORD PTR [rcx+312]
  40850. mov r8, QWORD PTR [rcx+304]
  40851. bswap rax
  40852. bswap r8
  40853. mov QWORD PTR [rdx+192], rax
  40854. mov QWORD PTR [rdx+200], r8
  40855. mov rax, QWORD PTR [rcx+296]
  40856. mov r8, QWORD PTR [rcx+288]
  40857. bswap rax
  40858. bswap r8
  40859. mov QWORD PTR [rdx+208], rax
  40860. mov QWORD PTR [rdx+216], r8
  40861. mov rax, QWORD PTR [rcx+280]
  40862. mov r8, QWORD PTR [rcx+272]
  40863. bswap rax
  40864. bswap r8
  40865. mov QWORD PTR [rdx+224], rax
  40866. mov QWORD PTR [rdx+232], r8
  40867. mov rax, QWORD PTR [rcx+264]
  40868. mov r8, QWORD PTR [rcx+256]
  40869. bswap rax
  40870. bswap r8
  40871. mov QWORD PTR [rdx+240], rax
  40872. mov QWORD PTR [rdx+248], r8
  40873. mov rax, QWORD PTR [rcx+248]
  40874. mov r8, QWORD PTR [rcx+240]
  40875. bswap rax
  40876. bswap r8
  40877. mov QWORD PTR [rdx+256], rax
  40878. mov QWORD PTR [rdx+264], r8
  40879. mov rax, QWORD PTR [rcx+232]
  40880. mov r8, QWORD PTR [rcx+224]
  40881. bswap rax
  40882. bswap r8
  40883. mov QWORD PTR [rdx+272], rax
  40884. mov QWORD PTR [rdx+280], r8
  40885. mov rax, QWORD PTR [rcx+216]
  40886. mov r8, QWORD PTR [rcx+208]
  40887. bswap rax
  40888. bswap r8
  40889. mov QWORD PTR [rdx+288], rax
  40890. mov QWORD PTR [rdx+296], r8
  40891. mov rax, QWORD PTR [rcx+200]
  40892. mov r8, QWORD PTR [rcx+192]
  40893. bswap rax
  40894. bswap r8
  40895. mov QWORD PTR [rdx+304], rax
  40896. mov QWORD PTR [rdx+312], r8
  40897. mov rax, QWORD PTR [rcx+184]
  40898. mov r8, QWORD PTR [rcx+176]
  40899. bswap rax
  40900. bswap r8
  40901. mov QWORD PTR [rdx+320], rax
  40902. mov QWORD PTR [rdx+328], r8
  40903. mov rax, QWORD PTR [rcx+168]
  40904. mov r8, QWORD PTR [rcx+160]
  40905. bswap rax
  40906. bswap r8
  40907. mov QWORD PTR [rdx+336], rax
  40908. mov QWORD PTR [rdx+344], r8
  40909. mov rax, QWORD PTR [rcx+152]
  40910. mov r8, QWORD PTR [rcx+144]
  40911. bswap rax
  40912. bswap r8
  40913. mov QWORD PTR [rdx+352], rax
  40914. mov QWORD PTR [rdx+360], r8
  40915. mov rax, QWORD PTR [rcx+136]
  40916. mov r8, QWORD PTR [rcx+128]
  40917. bswap rax
  40918. bswap r8
  40919. mov QWORD PTR [rdx+368], rax
  40920. mov QWORD PTR [rdx+376], r8
  40921. mov rax, QWORD PTR [rcx+120]
  40922. mov r8, QWORD PTR [rcx+112]
  40923. bswap rax
  40924. bswap r8
  40925. mov QWORD PTR [rdx+384], rax
  40926. mov QWORD PTR [rdx+392], r8
  40927. mov rax, QWORD PTR [rcx+104]
  40928. mov r8, QWORD PTR [rcx+96]
  40929. bswap rax
  40930. bswap r8
  40931. mov QWORD PTR [rdx+400], rax
  40932. mov QWORD PTR [rdx+408], r8
  40933. mov rax, QWORD PTR [rcx+88]
  40934. mov r8, QWORD PTR [rcx+80]
  40935. bswap rax
  40936. bswap r8
  40937. mov QWORD PTR [rdx+416], rax
  40938. mov QWORD PTR [rdx+424], r8
  40939. mov rax, QWORD PTR [rcx+72]
  40940. mov r8, QWORD PTR [rcx+64]
  40941. bswap rax
  40942. bswap r8
  40943. mov QWORD PTR [rdx+432], rax
  40944. mov QWORD PTR [rdx+440], r8
  40945. mov rax, QWORD PTR [rcx+56]
  40946. mov r8, QWORD PTR [rcx+48]
  40947. bswap rax
  40948. bswap r8
  40949. mov QWORD PTR [rdx+448], rax
  40950. mov QWORD PTR [rdx+456], r8
  40951. mov rax, QWORD PTR [rcx+40]
  40952. mov r8, QWORD PTR [rcx+32]
  40953. bswap rax
  40954. bswap r8
  40955. mov QWORD PTR [rdx+464], rax
  40956. mov QWORD PTR [rdx+472], r8
  40957. mov rax, QWORD PTR [rcx+24]
  40958. mov r8, QWORD PTR [rcx+16]
  40959. bswap rax
  40960. bswap r8
  40961. mov QWORD PTR [rdx+480], rax
  40962. mov QWORD PTR [rdx+488], r8
  40963. mov rax, QWORD PTR [rcx+8]
  40964. mov r8, QWORD PTR [rcx]
  40965. bswap rax
  40966. bswap r8
  40967. mov QWORD PTR [rdx+496], rax
  40968. mov QWORD PTR [rdx+504], r8
  40969. ret
  40970. sp_4096_to_bin_bswap_64 ENDP
  40971. _text ENDS
  40972. IFNDEF NO_MOVBE_SUPPORT
  40973. ; /* Write r as big endian to byte array.
  40974. ; * Fixed length number of bytes written: 512
  40975. ; * Uses the movbe instruction which is optional.
  40976. ; *
  40977. ; * r A single precision integer.
  40978. ; * a Byte array.
  40979. ; */
  40980. _text SEGMENT READONLY PARA
  40981. sp_4096_to_bin_movbe_64 PROC
  40982. movbe rax, QWORD PTR [rcx+504]
  40983. movbe r8, QWORD PTR [rcx+496]
  40984. mov QWORD PTR [rdx], rax
  40985. mov QWORD PTR [rdx+8], r8
  40986. movbe rax, QWORD PTR [rcx+488]
  40987. movbe r8, QWORD PTR [rcx+480]
  40988. mov QWORD PTR [rdx+16], rax
  40989. mov QWORD PTR [rdx+24], r8
  40990. movbe rax, QWORD PTR [rcx+472]
  40991. movbe r8, QWORD PTR [rcx+464]
  40992. mov QWORD PTR [rdx+32], rax
  40993. mov QWORD PTR [rdx+40], r8
  40994. movbe rax, QWORD PTR [rcx+456]
  40995. movbe r8, QWORD PTR [rcx+448]
  40996. mov QWORD PTR [rdx+48], rax
  40997. mov QWORD PTR [rdx+56], r8
  40998. movbe rax, QWORD PTR [rcx+440]
  40999. movbe r8, QWORD PTR [rcx+432]
  41000. mov QWORD PTR [rdx+64], rax
  41001. mov QWORD PTR [rdx+72], r8
  41002. movbe rax, QWORD PTR [rcx+424]
  41003. movbe r8, QWORD PTR [rcx+416]
  41004. mov QWORD PTR [rdx+80], rax
  41005. mov QWORD PTR [rdx+88], r8
  41006. movbe rax, QWORD PTR [rcx+408]
  41007. movbe r8, QWORD PTR [rcx+400]
  41008. mov QWORD PTR [rdx+96], rax
  41009. mov QWORD PTR [rdx+104], r8
  41010. movbe rax, QWORD PTR [rcx+392]
  41011. movbe r8, QWORD PTR [rcx+384]
  41012. mov QWORD PTR [rdx+112], rax
  41013. mov QWORD PTR [rdx+120], r8
  41014. movbe rax, QWORD PTR [rcx+376]
  41015. movbe r8, QWORD PTR [rcx+368]
  41016. mov QWORD PTR [rdx+128], rax
  41017. mov QWORD PTR [rdx+136], r8
  41018. movbe rax, QWORD PTR [rcx+360]
  41019. movbe r8, QWORD PTR [rcx+352]
  41020. mov QWORD PTR [rdx+144], rax
  41021. mov QWORD PTR [rdx+152], r8
  41022. movbe rax, QWORD PTR [rcx+344]
  41023. movbe r8, QWORD PTR [rcx+336]
  41024. mov QWORD PTR [rdx+160], rax
  41025. mov QWORD PTR [rdx+168], r8
  41026. movbe rax, QWORD PTR [rcx+328]
  41027. movbe r8, QWORD PTR [rcx+320]
  41028. mov QWORD PTR [rdx+176], rax
  41029. mov QWORD PTR [rdx+184], r8
  41030. movbe rax, QWORD PTR [rcx+312]
  41031. movbe r8, QWORD PTR [rcx+304]
  41032. mov QWORD PTR [rdx+192], rax
  41033. mov QWORD PTR [rdx+200], r8
  41034. movbe rax, QWORD PTR [rcx+296]
  41035. movbe r8, QWORD PTR [rcx+288]
  41036. mov QWORD PTR [rdx+208], rax
  41037. mov QWORD PTR [rdx+216], r8
  41038. movbe rax, QWORD PTR [rcx+280]
  41039. movbe r8, QWORD PTR [rcx+272]
  41040. mov QWORD PTR [rdx+224], rax
  41041. mov QWORD PTR [rdx+232], r8
  41042. movbe rax, QWORD PTR [rcx+264]
  41043. movbe r8, QWORD PTR [rcx+256]
  41044. mov QWORD PTR [rdx+240], rax
  41045. mov QWORD PTR [rdx+248], r8
  41046. movbe rax, QWORD PTR [rcx+248]
  41047. movbe r8, QWORD PTR [rcx+240]
  41048. mov QWORD PTR [rdx+256], rax
  41049. mov QWORD PTR [rdx+264], r8
  41050. movbe rax, QWORD PTR [rcx+232]
  41051. movbe r8, QWORD PTR [rcx+224]
  41052. mov QWORD PTR [rdx+272], rax
  41053. mov QWORD PTR [rdx+280], r8
  41054. movbe rax, QWORD PTR [rcx+216]
  41055. movbe r8, QWORD PTR [rcx+208]
  41056. mov QWORD PTR [rdx+288], rax
  41057. mov QWORD PTR [rdx+296], r8
  41058. movbe rax, QWORD PTR [rcx+200]
  41059. movbe r8, QWORD PTR [rcx+192]
  41060. mov QWORD PTR [rdx+304], rax
  41061. mov QWORD PTR [rdx+312], r8
  41062. movbe rax, QWORD PTR [rcx+184]
  41063. movbe r8, QWORD PTR [rcx+176]
  41064. mov QWORD PTR [rdx+320], rax
  41065. mov QWORD PTR [rdx+328], r8
  41066. movbe rax, QWORD PTR [rcx+168]
  41067. movbe r8, QWORD PTR [rcx+160]
  41068. mov QWORD PTR [rdx+336], rax
  41069. mov QWORD PTR [rdx+344], r8
  41070. movbe rax, QWORD PTR [rcx+152]
  41071. movbe r8, QWORD PTR [rcx+144]
  41072. mov QWORD PTR [rdx+352], rax
  41073. mov QWORD PTR [rdx+360], r8
  41074. movbe rax, QWORD PTR [rcx+136]
  41075. movbe r8, QWORD PTR [rcx+128]
  41076. mov QWORD PTR [rdx+368], rax
  41077. mov QWORD PTR [rdx+376], r8
  41078. movbe rax, QWORD PTR [rcx+120]
  41079. movbe r8, QWORD PTR [rcx+112]
  41080. mov QWORD PTR [rdx+384], rax
  41081. mov QWORD PTR [rdx+392], r8
  41082. movbe rax, QWORD PTR [rcx+104]
  41083. movbe r8, QWORD PTR [rcx+96]
  41084. mov QWORD PTR [rdx+400], rax
  41085. mov QWORD PTR [rdx+408], r8
  41086. movbe rax, QWORD PTR [rcx+88]
  41087. movbe r8, QWORD PTR [rcx+80]
  41088. mov QWORD PTR [rdx+416], rax
  41089. mov QWORD PTR [rdx+424], r8
  41090. movbe rax, QWORD PTR [rcx+72]
  41091. movbe r8, QWORD PTR [rcx+64]
  41092. mov QWORD PTR [rdx+432], rax
  41093. mov QWORD PTR [rdx+440], r8
  41094. movbe rax, QWORD PTR [rcx+56]
  41095. movbe r8, QWORD PTR [rcx+48]
  41096. mov QWORD PTR [rdx+448], rax
  41097. mov QWORD PTR [rdx+456], r8
  41098. movbe rax, QWORD PTR [rcx+40]
  41099. movbe r8, QWORD PTR [rcx+32]
  41100. mov QWORD PTR [rdx+464], rax
  41101. mov QWORD PTR [rdx+472], r8
  41102. movbe rax, QWORD PTR [rcx+24]
  41103. movbe r8, QWORD PTR [rcx+16]
  41104. mov QWORD PTR [rdx+480], rax
  41105. mov QWORD PTR [rdx+488], r8
  41106. movbe rax, QWORD PTR [rcx+8]
  41107. movbe r8, QWORD PTR [rcx]
  41108. mov QWORD PTR [rdx+496], rax
  41109. mov QWORD PTR [rdx+504], r8
  41110. ret
  41111. sp_4096_to_bin_movbe_64 ENDP
  41112. _text ENDS
  41113. ENDIF
  41114. ; /* Sub b from a into a. (a -= b)
  41115. ; *
  41116. ; * a A single precision integer and result.
  41117. ; * b A single precision integer.
  41118. ; */
  41119. _text SEGMENT READONLY PARA
  41120. sp_4096_sub_in_place_64 PROC
  41121. mov r8, QWORD PTR [rcx]
  41122. sub r8, QWORD PTR [rdx]
  41123. mov r9, QWORD PTR [rcx+8]
  41124. mov QWORD PTR [rcx], r8
  41125. sbb r9, QWORD PTR [rdx+8]
  41126. mov r8, QWORD PTR [rcx+16]
  41127. mov QWORD PTR [rcx+8], r9
  41128. sbb r8, QWORD PTR [rdx+16]
  41129. mov r9, QWORD PTR [rcx+24]
  41130. mov QWORD PTR [rcx+16], r8
  41131. sbb r9, QWORD PTR [rdx+24]
  41132. mov r8, QWORD PTR [rcx+32]
  41133. mov QWORD PTR [rcx+24], r9
  41134. sbb r8, QWORD PTR [rdx+32]
  41135. mov r9, QWORD PTR [rcx+40]
  41136. mov QWORD PTR [rcx+32], r8
  41137. sbb r9, QWORD PTR [rdx+40]
  41138. mov r8, QWORD PTR [rcx+48]
  41139. mov QWORD PTR [rcx+40], r9
  41140. sbb r8, QWORD PTR [rdx+48]
  41141. mov r9, QWORD PTR [rcx+56]
  41142. mov QWORD PTR [rcx+48], r8
  41143. sbb r9, QWORD PTR [rdx+56]
  41144. mov r8, QWORD PTR [rcx+64]
  41145. mov QWORD PTR [rcx+56], r9
  41146. sbb r8, QWORD PTR [rdx+64]
  41147. mov r9, QWORD PTR [rcx+72]
  41148. mov QWORD PTR [rcx+64], r8
  41149. sbb r9, QWORD PTR [rdx+72]
  41150. mov r8, QWORD PTR [rcx+80]
  41151. mov QWORD PTR [rcx+72], r9
  41152. sbb r8, QWORD PTR [rdx+80]
  41153. mov r9, QWORD PTR [rcx+88]
  41154. mov QWORD PTR [rcx+80], r8
  41155. sbb r9, QWORD PTR [rdx+88]
  41156. mov r8, QWORD PTR [rcx+96]
  41157. mov QWORD PTR [rcx+88], r9
  41158. sbb r8, QWORD PTR [rdx+96]
  41159. mov r9, QWORD PTR [rcx+104]
  41160. mov QWORD PTR [rcx+96], r8
  41161. sbb r9, QWORD PTR [rdx+104]
  41162. mov r8, QWORD PTR [rcx+112]
  41163. mov QWORD PTR [rcx+104], r9
  41164. sbb r8, QWORD PTR [rdx+112]
  41165. mov r9, QWORD PTR [rcx+120]
  41166. mov QWORD PTR [rcx+112], r8
  41167. sbb r9, QWORD PTR [rdx+120]
  41168. mov r8, QWORD PTR [rcx+128]
  41169. mov QWORD PTR [rcx+120], r9
  41170. sbb r8, QWORD PTR [rdx+128]
  41171. mov r9, QWORD PTR [rcx+136]
  41172. mov QWORD PTR [rcx+128], r8
  41173. sbb r9, QWORD PTR [rdx+136]
  41174. mov r8, QWORD PTR [rcx+144]
  41175. mov QWORD PTR [rcx+136], r9
  41176. sbb r8, QWORD PTR [rdx+144]
  41177. mov r9, QWORD PTR [rcx+152]
  41178. mov QWORD PTR [rcx+144], r8
  41179. sbb r9, QWORD PTR [rdx+152]
  41180. mov r8, QWORD PTR [rcx+160]
  41181. mov QWORD PTR [rcx+152], r9
  41182. sbb r8, QWORD PTR [rdx+160]
  41183. mov r9, QWORD PTR [rcx+168]
  41184. mov QWORD PTR [rcx+160], r8
  41185. sbb r9, QWORD PTR [rdx+168]
  41186. mov r8, QWORD PTR [rcx+176]
  41187. mov QWORD PTR [rcx+168], r9
  41188. sbb r8, QWORD PTR [rdx+176]
  41189. mov r9, QWORD PTR [rcx+184]
  41190. mov QWORD PTR [rcx+176], r8
  41191. sbb r9, QWORD PTR [rdx+184]
  41192. mov r8, QWORD PTR [rcx+192]
  41193. mov QWORD PTR [rcx+184], r9
  41194. sbb r8, QWORD PTR [rdx+192]
  41195. mov r9, QWORD PTR [rcx+200]
  41196. mov QWORD PTR [rcx+192], r8
  41197. sbb r9, QWORD PTR [rdx+200]
  41198. mov r8, QWORD PTR [rcx+208]
  41199. mov QWORD PTR [rcx+200], r9
  41200. sbb r8, QWORD PTR [rdx+208]
  41201. mov r9, QWORD PTR [rcx+216]
  41202. mov QWORD PTR [rcx+208], r8
  41203. sbb r9, QWORD PTR [rdx+216]
  41204. mov r8, QWORD PTR [rcx+224]
  41205. mov QWORD PTR [rcx+216], r9
  41206. sbb r8, QWORD PTR [rdx+224]
  41207. mov r9, QWORD PTR [rcx+232]
  41208. mov QWORD PTR [rcx+224], r8
  41209. sbb r9, QWORD PTR [rdx+232]
  41210. mov r8, QWORD PTR [rcx+240]
  41211. mov QWORD PTR [rcx+232], r9
  41212. sbb r8, QWORD PTR [rdx+240]
  41213. mov r9, QWORD PTR [rcx+248]
  41214. mov QWORD PTR [rcx+240], r8
  41215. sbb r9, QWORD PTR [rdx+248]
  41216. mov r8, QWORD PTR [rcx+256]
  41217. mov QWORD PTR [rcx+248], r9
  41218. sbb r8, QWORD PTR [rdx+256]
  41219. mov r9, QWORD PTR [rcx+264]
  41220. mov QWORD PTR [rcx+256], r8
  41221. sbb r9, QWORD PTR [rdx+264]
  41222. mov r8, QWORD PTR [rcx+272]
  41223. mov QWORD PTR [rcx+264], r9
  41224. sbb r8, QWORD PTR [rdx+272]
  41225. mov r9, QWORD PTR [rcx+280]
  41226. mov QWORD PTR [rcx+272], r8
  41227. sbb r9, QWORD PTR [rdx+280]
  41228. mov r8, QWORD PTR [rcx+288]
  41229. mov QWORD PTR [rcx+280], r9
  41230. sbb r8, QWORD PTR [rdx+288]
  41231. mov r9, QWORD PTR [rcx+296]
  41232. mov QWORD PTR [rcx+288], r8
  41233. sbb r9, QWORD PTR [rdx+296]
  41234. mov r8, QWORD PTR [rcx+304]
  41235. mov QWORD PTR [rcx+296], r9
  41236. sbb r8, QWORD PTR [rdx+304]
  41237. mov r9, QWORD PTR [rcx+312]
  41238. mov QWORD PTR [rcx+304], r8
  41239. sbb r9, QWORD PTR [rdx+312]
  41240. mov r8, QWORD PTR [rcx+320]
  41241. mov QWORD PTR [rcx+312], r9
  41242. sbb r8, QWORD PTR [rdx+320]
  41243. mov r9, QWORD PTR [rcx+328]
  41244. mov QWORD PTR [rcx+320], r8
  41245. sbb r9, QWORD PTR [rdx+328]
  41246. mov r8, QWORD PTR [rcx+336]
  41247. mov QWORD PTR [rcx+328], r9
  41248. sbb r8, QWORD PTR [rdx+336]
  41249. mov r9, QWORD PTR [rcx+344]
  41250. mov QWORD PTR [rcx+336], r8
  41251. sbb r9, QWORD PTR [rdx+344]
  41252. mov r8, QWORD PTR [rcx+352]
  41253. mov QWORD PTR [rcx+344], r9
  41254. sbb r8, QWORD PTR [rdx+352]
  41255. mov r9, QWORD PTR [rcx+360]
  41256. mov QWORD PTR [rcx+352], r8
  41257. sbb r9, QWORD PTR [rdx+360]
  41258. mov r8, QWORD PTR [rcx+368]
  41259. mov QWORD PTR [rcx+360], r9
  41260. sbb r8, QWORD PTR [rdx+368]
  41261. mov r9, QWORD PTR [rcx+376]
  41262. mov QWORD PTR [rcx+368], r8
  41263. sbb r9, QWORD PTR [rdx+376]
  41264. mov r8, QWORD PTR [rcx+384]
  41265. mov QWORD PTR [rcx+376], r9
  41266. sbb r8, QWORD PTR [rdx+384]
  41267. mov r9, QWORD PTR [rcx+392]
  41268. mov QWORD PTR [rcx+384], r8
  41269. sbb r9, QWORD PTR [rdx+392]
  41270. mov r8, QWORD PTR [rcx+400]
  41271. mov QWORD PTR [rcx+392], r9
  41272. sbb r8, QWORD PTR [rdx+400]
  41273. mov r9, QWORD PTR [rcx+408]
  41274. mov QWORD PTR [rcx+400], r8
  41275. sbb r9, QWORD PTR [rdx+408]
  41276. mov r8, QWORD PTR [rcx+416]
  41277. mov QWORD PTR [rcx+408], r9
  41278. sbb r8, QWORD PTR [rdx+416]
  41279. mov r9, QWORD PTR [rcx+424]
  41280. mov QWORD PTR [rcx+416], r8
  41281. sbb r9, QWORD PTR [rdx+424]
  41282. mov r8, QWORD PTR [rcx+432]
  41283. mov QWORD PTR [rcx+424], r9
  41284. sbb r8, QWORD PTR [rdx+432]
  41285. mov r9, QWORD PTR [rcx+440]
  41286. mov QWORD PTR [rcx+432], r8
  41287. sbb r9, QWORD PTR [rdx+440]
  41288. mov r8, QWORD PTR [rcx+448]
  41289. mov QWORD PTR [rcx+440], r9
  41290. sbb r8, QWORD PTR [rdx+448]
  41291. mov r9, QWORD PTR [rcx+456]
  41292. mov QWORD PTR [rcx+448], r8
  41293. sbb r9, QWORD PTR [rdx+456]
  41294. mov r8, QWORD PTR [rcx+464]
  41295. mov QWORD PTR [rcx+456], r9
  41296. sbb r8, QWORD PTR [rdx+464]
  41297. mov r9, QWORD PTR [rcx+472]
  41298. mov QWORD PTR [rcx+464], r8
  41299. sbb r9, QWORD PTR [rdx+472]
  41300. mov r8, QWORD PTR [rcx+480]
  41301. mov QWORD PTR [rcx+472], r9
  41302. sbb r8, QWORD PTR [rdx+480]
  41303. mov r9, QWORD PTR [rcx+488]
  41304. mov QWORD PTR [rcx+480], r8
  41305. sbb r9, QWORD PTR [rdx+488]
  41306. mov r8, QWORD PTR [rcx+496]
  41307. mov QWORD PTR [rcx+488], r9
  41308. sbb r8, QWORD PTR [rdx+496]
  41309. mov r9, QWORD PTR [rcx+504]
  41310. mov QWORD PTR [rcx+496], r8
  41311. sbb r9, QWORD PTR [rdx+504]
  41312. mov QWORD PTR [rcx+504], r9
  41313. sbb rax, rax
  41314. ret
  41315. sp_4096_sub_in_place_64 ENDP
  41316. _text ENDS
  41317. ; /* Add b to a into r. (r = a + b)
  41318. ; *
  41319. ; * r A single precision integer.
  41320. ; * a A single precision integer.
  41321. ; * b A single precision integer.
  41322. ; */
  41323. _text SEGMENT READONLY PARA
  41324. sp_4096_add_64 PROC
  41325. ; Add
  41326. mov r9, QWORD PTR [rdx]
  41327. xor rax, rax
  41328. add r9, QWORD PTR [r8]
  41329. mov r10, QWORD PTR [rdx+8]
  41330. mov QWORD PTR [rcx], r9
  41331. adc r10, QWORD PTR [r8+8]
  41332. mov r9, QWORD PTR [rdx+16]
  41333. mov QWORD PTR [rcx+8], r10
  41334. adc r9, QWORD PTR [r8+16]
  41335. mov r10, QWORD PTR [rdx+24]
  41336. mov QWORD PTR [rcx+16], r9
  41337. adc r10, QWORD PTR [r8+24]
  41338. mov r9, QWORD PTR [rdx+32]
  41339. mov QWORD PTR [rcx+24], r10
  41340. adc r9, QWORD PTR [r8+32]
  41341. mov r10, QWORD PTR [rdx+40]
  41342. mov QWORD PTR [rcx+32], r9
  41343. adc r10, QWORD PTR [r8+40]
  41344. mov r9, QWORD PTR [rdx+48]
  41345. mov QWORD PTR [rcx+40], r10
  41346. adc r9, QWORD PTR [r8+48]
  41347. mov r10, QWORD PTR [rdx+56]
  41348. mov QWORD PTR [rcx+48], r9
  41349. adc r10, QWORD PTR [r8+56]
  41350. mov r9, QWORD PTR [rdx+64]
  41351. mov QWORD PTR [rcx+56], r10
  41352. adc r9, QWORD PTR [r8+64]
  41353. mov r10, QWORD PTR [rdx+72]
  41354. mov QWORD PTR [rcx+64], r9
  41355. adc r10, QWORD PTR [r8+72]
  41356. mov r9, QWORD PTR [rdx+80]
  41357. mov QWORD PTR [rcx+72], r10
  41358. adc r9, QWORD PTR [r8+80]
  41359. mov r10, QWORD PTR [rdx+88]
  41360. mov QWORD PTR [rcx+80], r9
  41361. adc r10, QWORD PTR [r8+88]
  41362. mov r9, QWORD PTR [rdx+96]
  41363. mov QWORD PTR [rcx+88], r10
  41364. adc r9, QWORD PTR [r8+96]
  41365. mov r10, QWORD PTR [rdx+104]
  41366. mov QWORD PTR [rcx+96], r9
  41367. adc r10, QWORD PTR [r8+104]
  41368. mov r9, QWORD PTR [rdx+112]
  41369. mov QWORD PTR [rcx+104], r10
  41370. adc r9, QWORD PTR [r8+112]
  41371. mov r10, QWORD PTR [rdx+120]
  41372. mov QWORD PTR [rcx+112], r9
  41373. adc r10, QWORD PTR [r8+120]
  41374. mov r9, QWORD PTR [rdx+128]
  41375. mov QWORD PTR [rcx+120], r10
  41376. adc r9, QWORD PTR [r8+128]
  41377. mov r10, QWORD PTR [rdx+136]
  41378. mov QWORD PTR [rcx+128], r9
  41379. adc r10, QWORD PTR [r8+136]
  41380. mov r9, QWORD PTR [rdx+144]
  41381. mov QWORD PTR [rcx+136], r10
  41382. adc r9, QWORD PTR [r8+144]
  41383. mov r10, QWORD PTR [rdx+152]
  41384. mov QWORD PTR [rcx+144], r9
  41385. adc r10, QWORD PTR [r8+152]
  41386. mov r9, QWORD PTR [rdx+160]
  41387. mov QWORD PTR [rcx+152], r10
  41388. adc r9, QWORD PTR [r8+160]
  41389. mov r10, QWORD PTR [rdx+168]
  41390. mov QWORD PTR [rcx+160], r9
  41391. adc r10, QWORD PTR [r8+168]
  41392. mov r9, QWORD PTR [rdx+176]
  41393. mov QWORD PTR [rcx+168], r10
  41394. adc r9, QWORD PTR [r8+176]
  41395. mov r10, QWORD PTR [rdx+184]
  41396. mov QWORD PTR [rcx+176], r9
  41397. adc r10, QWORD PTR [r8+184]
  41398. mov r9, QWORD PTR [rdx+192]
  41399. mov QWORD PTR [rcx+184], r10
  41400. adc r9, QWORD PTR [r8+192]
  41401. mov r10, QWORD PTR [rdx+200]
  41402. mov QWORD PTR [rcx+192], r9
  41403. adc r10, QWORD PTR [r8+200]
  41404. mov r9, QWORD PTR [rdx+208]
  41405. mov QWORD PTR [rcx+200], r10
  41406. adc r9, QWORD PTR [r8+208]
  41407. mov r10, QWORD PTR [rdx+216]
  41408. mov QWORD PTR [rcx+208], r9
  41409. adc r10, QWORD PTR [r8+216]
  41410. mov r9, QWORD PTR [rdx+224]
  41411. mov QWORD PTR [rcx+216], r10
  41412. adc r9, QWORD PTR [r8+224]
  41413. mov r10, QWORD PTR [rdx+232]
  41414. mov QWORD PTR [rcx+224], r9
  41415. adc r10, QWORD PTR [r8+232]
  41416. mov r9, QWORD PTR [rdx+240]
  41417. mov QWORD PTR [rcx+232], r10
  41418. adc r9, QWORD PTR [r8+240]
  41419. mov r10, QWORD PTR [rdx+248]
  41420. mov QWORD PTR [rcx+240], r9
  41421. adc r10, QWORD PTR [r8+248]
  41422. mov r9, QWORD PTR [rdx+256]
  41423. mov QWORD PTR [rcx+248], r10
  41424. adc r9, QWORD PTR [r8+256]
  41425. mov r10, QWORD PTR [rdx+264]
  41426. mov QWORD PTR [rcx+256], r9
  41427. adc r10, QWORD PTR [r8+264]
  41428. mov r9, QWORD PTR [rdx+272]
  41429. mov QWORD PTR [rcx+264], r10
  41430. adc r9, QWORD PTR [r8+272]
  41431. mov r10, QWORD PTR [rdx+280]
  41432. mov QWORD PTR [rcx+272], r9
  41433. adc r10, QWORD PTR [r8+280]
  41434. mov r9, QWORD PTR [rdx+288]
  41435. mov QWORD PTR [rcx+280], r10
  41436. adc r9, QWORD PTR [r8+288]
  41437. mov r10, QWORD PTR [rdx+296]
  41438. mov QWORD PTR [rcx+288], r9
  41439. adc r10, QWORD PTR [r8+296]
  41440. mov r9, QWORD PTR [rdx+304]
  41441. mov QWORD PTR [rcx+296], r10
  41442. adc r9, QWORD PTR [r8+304]
  41443. mov r10, QWORD PTR [rdx+312]
  41444. mov QWORD PTR [rcx+304], r9
  41445. adc r10, QWORD PTR [r8+312]
  41446. mov r9, QWORD PTR [rdx+320]
  41447. mov QWORD PTR [rcx+312], r10
  41448. adc r9, QWORD PTR [r8+320]
  41449. mov r10, QWORD PTR [rdx+328]
  41450. mov QWORD PTR [rcx+320], r9
  41451. adc r10, QWORD PTR [r8+328]
  41452. mov r9, QWORD PTR [rdx+336]
  41453. mov QWORD PTR [rcx+328], r10
  41454. adc r9, QWORD PTR [r8+336]
  41455. mov r10, QWORD PTR [rdx+344]
  41456. mov QWORD PTR [rcx+336], r9
  41457. adc r10, QWORD PTR [r8+344]
  41458. mov r9, QWORD PTR [rdx+352]
  41459. mov QWORD PTR [rcx+344], r10
  41460. adc r9, QWORD PTR [r8+352]
  41461. mov r10, QWORD PTR [rdx+360]
  41462. mov QWORD PTR [rcx+352], r9
  41463. adc r10, QWORD PTR [r8+360]
  41464. mov r9, QWORD PTR [rdx+368]
  41465. mov QWORD PTR [rcx+360], r10
  41466. adc r9, QWORD PTR [r8+368]
  41467. mov r10, QWORD PTR [rdx+376]
  41468. mov QWORD PTR [rcx+368], r9
  41469. adc r10, QWORD PTR [r8+376]
  41470. mov r9, QWORD PTR [rdx+384]
  41471. mov QWORD PTR [rcx+376], r10
  41472. adc r9, QWORD PTR [r8+384]
  41473. mov r10, QWORD PTR [rdx+392]
  41474. mov QWORD PTR [rcx+384], r9
  41475. adc r10, QWORD PTR [r8+392]
  41476. mov r9, QWORD PTR [rdx+400]
  41477. mov QWORD PTR [rcx+392], r10
  41478. adc r9, QWORD PTR [r8+400]
  41479. mov r10, QWORD PTR [rdx+408]
  41480. mov QWORD PTR [rcx+400], r9
  41481. adc r10, QWORD PTR [r8+408]
  41482. mov r9, QWORD PTR [rdx+416]
  41483. mov QWORD PTR [rcx+408], r10
  41484. adc r9, QWORD PTR [r8+416]
  41485. mov r10, QWORD PTR [rdx+424]
  41486. mov QWORD PTR [rcx+416], r9
  41487. adc r10, QWORD PTR [r8+424]
  41488. mov r9, QWORD PTR [rdx+432]
  41489. mov QWORD PTR [rcx+424], r10
  41490. adc r9, QWORD PTR [r8+432]
  41491. mov r10, QWORD PTR [rdx+440]
  41492. mov QWORD PTR [rcx+432], r9
  41493. adc r10, QWORD PTR [r8+440]
  41494. mov r9, QWORD PTR [rdx+448]
  41495. mov QWORD PTR [rcx+440], r10
  41496. adc r9, QWORD PTR [r8+448]
  41497. mov r10, QWORD PTR [rdx+456]
  41498. mov QWORD PTR [rcx+448], r9
  41499. adc r10, QWORD PTR [r8+456]
  41500. mov r9, QWORD PTR [rdx+464]
  41501. mov QWORD PTR [rcx+456], r10
  41502. adc r9, QWORD PTR [r8+464]
  41503. mov r10, QWORD PTR [rdx+472]
  41504. mov QWORD PTR [rcx+464], r9
  41505. adc r10, QWORD PTR [r8+472]
  41506. mov r9, QWORD PTR [rdx+480]
  41507. mov QWORD PTR [rcx+472], r10
  41508. adc r9, QWORD PTR [r8+480]
  41509. mov r10, QWORD PTR [rdx+488]
  41510. mov QWORD PTR [rcx+480], r9
  41511. adc r10, QWORD PTR [r8+488]
  41512. mov r9, QWORD PTR [rdx+496]
  41513. mov QWORD PTR [rcx+488], r10
  41514. adc r9, QWORD PTR [r8+496]
  41515. mov r10, QWORD PTR [rdx+504]
  41516. mov QWORD PTR [rcx+496], r9
  41517. adc r10, QWORD PTR [r8+504]
  41518. mov QWORD PTR [rcx+504], r10
  41519. adc rax, 0
  41520. ret
  41521. sp_4096_add_64 ENDP
  41522. _text ENDS
  41523. ; /* Multiply a and b into r. (r = a * b)
  41524. ; *
  41525. ; * r A single precision integer.
  41526. ; * a A single precision integer.
  41527. ; * b A single precision integer.
  41528. ; */
  41529. _text SEGMENT READONLY PARA
  41530. sp_4096_mul_64 PROC
  41531. push r12
  41532. push r13
  41533. push r14
  41534. push r15
  41535. push rdi
  41536. push rsi
  41537. sub rsp, 1576
  41538. mov QWORD PTR [rsp+1536], rcx
  41539. mov QWORD PTR [rsp+1544], rdx
  41540. mov QWORD PTR [rsp+1552], r8
  41541. lea r12, QWORD PTR [rsp+1024]
  41542. lea r14, QWORD PTR [rdx+256]
  41543. ; Add
  41544. mov rax, QWORD PTR [rdx]
  41545. xor r15, r15
  41546. add rax, QWORD PTR [r14]
  41547. mov r9, QWORD PTR [rdx+8]
  41548. mov QWORD PTR [r12], rax
  41549. adc r9, QWORD PTR [r14+8]
  41550. mov r10, QWORD PTR [rdx+16]
  41551. mov QWORD PTR [r12+8], r9
  41552. adc r10, QWORD PTR [r14+16]
  41553. mov rax, QWORD PTR [rdx+24]
  41554. mov QWORD PTR [r12+16], r10
  41555. adc rax, QWORD PTR [r14+24]
  41556. mov r9, QWORD PTR [rdx+32]
  41557. mov QWORD PTR [r12+24], rax
  41558. adc r9, QWORD PTR [r14+32]
  41559. mov r10, QWORD PTR [rdx+40]
  41560. mov QWORD PTR [r12+32], r9
  41561. adc r10, QWORD PTR [r14+40]
  41562. mov rax, QWORD PTR [rdx+48]
  41563. mov QWORD PTR [r12+40], r10
  41564. adc rax, QWORD PTR [r14+48]
  41565. mov r9, QWORD PTR [rdx+56]
  41566. mov QWORD PTR [r12+48], rax
  41567. adc r9, QWORD PTR [r14+56]
  41568. mov r10, QWORD PTR [rdx+64]
  41569. mov QWORD PTR [r12+56], r9
  41570. adc r10, QWORD PTR [r14+64]
  41571. mov rax, QWORD PTR [rdx+72]
  41572. mov QWORD PTR [r12+64], r10
  41573. adc rax, QWORD PTR [r14+72]
  41574. mov r9, QWORD PTR [rdx+80]
  41575. mov QWORD PTR [r12+72], rax
  41576. adc r9, QWORD PTR [r14+80]
  41577. mov r10, QWORD PTR [rdx+88]
  41578. mov QWORD PTR [r12+80], r9
  41579. adc r10, QWORD PTR [r14+88]
  41580. mov rax, QWORD PTR [rdx+96]
  41581. mov QWORD PTR [r12+88], r10
  41582. adc rax, QWORD PTR [r14+96]
  41583. mov r9, QWORD PTR [rdx+104]
  41584. mov QWORD PTR [r12+96], rax
  41585. adc r9, QWORD PTR [r14+104]
  41586. mov r10, QWORD PTR [rdx+112]
  41587. mov QWORD PTR [r12+104], r9
  41588. adc r10, QWORD PTR [r14+112]
  41589. mov rax, QWORD PTR [rdx+120]
  41590. mov QWORD PTR [r12+112], r10
  41591. adc rax, QWORD PTR [r14+120]
  41592. mov r9, QWORD PTR [rdx+128]
  41593. mov QWORD PTR [r12+120], rax
  41594. adc r9, QWORD PTR [r14+128]
  41595. mov r10, QWORD PTR [rdx+136]
  41596. mov QWORD PTR [r12+128], r9
  41597. adc r10, QWORD PTR [r14+136]
  41598. mov rax, QWORD PTR [rdx+144]
  41599. mov QWORD PTR [r12+136], r10
  41600. adc rax, QWORD PTR [r14+144]
  41601. mov r9, QWORD PTR [rdx+152]
  41602. mov QWORD PTR [r12+144], rax
  41603. adc r9, QWORD PTR [r14+152]
  41604. mov r10, QWORD PTR [rdx+160]
  41605. mov QWORD PTR [r12+152], r9
  41606. adc r10, QWORD PTR [r14+160]
  41607. mov rax, QWORD PTR [rdx+168]
  41608. mov QWORD PTR [r12+160], r10
  41609. adc rax, QWORD PTR [r14+168]
  41610. mov r9, QWORD PTR [rdx+176]
  41611. mov QWORD PTR [r12+168], rax
  41612. adc r9, QWORD PTR [r14+176]
  41613. mov r10, QWORD PTR [rdx+184]
  41614. mov QWORD PTR [r12+176], r9
  41615. adc r10, QWORD PTR [r14+184]
  41616. mov rax, QWORD PTR [rdx+192]
  41617. mov QWORD PTR [r12+184], r10
  41618. adc rax, QWORD PTR [r14+192]
  41619. mov r9, QWORD PTR [rdx+200]
  41620. mov QWORD PTR [r12+192], rax
  41621. adc r9, QWORD PTR [r14+200]
  41622. mov r10, QWORD PTR [rdx+208]
  41623. mov QWORD PTR [r12+200], r9
  41624. adc r10, QWORD PTR [r14+208]
  41625. mov rax, QWORD PTR [rdx+216]
  41626. mov QWORD PTR [r12+208], r10
  41627. adc rax, QWORD PTR [r14+216]
  41628. mov r9, QWORD PTR [rdx+224]
  41629. mov QWORD PTR [r12+216], rax
  41630. adc r9, QWORD PTR [r14+224]
  41631. mov r10, QWORD PTR [rdx+232]
  41632. mov QWORD PTR [r12+224], r9
  41633. adc r10, QWORD PTR [r14+232]
  41634. mov rax, QWORD PTR [rdx+240]
  41635. mov QWORD PTR [r12+232], r10
  41636. adc rax, QWORD PTR [r14+240]
  41637. mov r9, QWORD PTR [rdx+248]
  41638. mov QWORD PTR [r12+240], rax
  41639. adc r9, QWORD PTR [r14+248]
  41640. mov QWORD PTR [r12+248], r9
  41641. adc r15, 0
  41642. mov QWORD PTR [rsp+1560], r15
  41643. lea r13, QWORD PTR [rsp+1280]
  41644. lea r14, QWORD PTR [r8+256]
  41645. ; Add
  41646. mov rax, QWORD PTR [r8]
  41647. xor rdi, rdi
  41648. add rax, QWORD PTR [r14]
  41649. mov r9, QWORD PTR [r8+8]
  41650. mov QWORD PTR [r13], rax
  41651. adc r9, QWORD PTR [r14+8]
  41652. mov r10, QWORD PTR [r8+16]
  41653. mov QWORD PTR [r13+8], r9
  41654. adc r10, QWORD PTR [r14+16]
  41655. mov rax, QWORD PTR [r8+24]
  41656. mov QWORD PTR [r13+16], r10
  41657. adc rax, QWORD PTR [r14+24]
  41658. mov r9, QWORD PTR [r8+32]
  41659. mov QWORD PTR [r13+24], rax
  41660. adc r9, QWORD PTR [r14+32]
  41661. mov r10, QWORD PTR [r8+40]
  41662. mov QWORD PTR [r13+32], r9
  41663. adc r10, QWORD PTR [r14+40]
  41664. mov rax, QWORD PTR [r8+48]
  41665. mov QWORD PTR [r13+40], r10
  41666. adc rax, QWORD PTR [r14+48]
  41667. mov r9, QWORD PTR [r8+56]
  41668. mov QWORD PTR [r13+48], rax
  41669. adc r9, QWORD PTR [r14+56]
  41670. mov r10, QWORD PTR [r8+64]
  41671. mov QWORD PTR [r13+56], r9
  41672. adc r10, QWORD PTR [r14+64]
  41673. mov rax, QWORD PTR [r8+72]
  41674. mov QWORD PTR [r13+64], r10
  41675. adc rax, QWORD PTR [r14+72]
  41676. mov r9, QWORD PTR [r8+80]
  41677. mov QWORD PTR [r13+72], rax
  41678. adc r9, QWORD PTR [r14+80]
  41679. mov r10, QWORD PTR [r8+88]
  41680. mov QWORD PTR [r13+80], r9
  41681. adc r10, QWORD PTR [r14+88]
  41682. mov rax, QWORD PTR [r8+96]
  41683. mov QWORD PTR [r13+88], r10
  41684. adc rax, QWORD PTR [r14+96]
  41685. mov r9, QWORD PTR [r8+104]
  41686. mov QWORD PTR [r13+96], rax
  41687. adc r9, QWORD PTR [r14+104]
  41688. mov r10, QWORD PTR [r8+112]
  41689. mov QWORD PTR [r13+104], r9
  41690. adc r10, QWORD PTR [r14+112]
  41691. mov rax, QWORD PTR [r8+120]
  41692. mov QWORD PTR [r13+112], r10
  41693. adc rax, QWORD PTR [r14+120]
  41694. mov r9, QWORD PTR [r8+128]
  41695. mov QWORD PTR [r13+120], rax
  41696. adc r9, QWORD PTR [r14+128]
  41697. mov r10, QWORD PTR [r8+136]
  41698. mov QWORD PTR [r13+128], r9
  41699. adc r10, QWORD PTR [r14+136]
  41700. mov rax, QWORD PTR [r8+144]
  41701. mov QWORD PTR [r13+136], r10
  41702. adc rax, QWORD PTR [r14+144]
  41703. mov r9, QWORD PTR [r8+152]
  41704. mov QWORD PTR [r13+144], rax
  41705. adc r9, QWORD PTR [r14+152]
  41706. mov r10, QWORD PTR [r8+160]
  41707. mov QWORD PTR [r13+152], r9
  41708. adc r10, QWORD PTR [r14+160]
  41709. mov rax, QWORD PTR [r8+168]
  41710. mov QWORD PTR [r13+160], r10
  41711. adc rax, QWORD PTR [r14+168]
  41712. mov r9, QWORD PTR [r8+176]
  41713. mov QWORD PTR [r13+168], rax
  41714. adc r9, QWORD PTR [r14+176]
  41715. mov r10, QWORD PTR [r8+184]
  41716. mov QWORD PTR [r13+176], r9
  41717. adc r10, QWORD PTR [r14+184]
  41718. mov rax, QWORD PTR [r8+192]
  41719. mov QWORD PTR [r13+184], r10
  41720. adc rax, QWORD PTR [r14+192]
  41721. mov r9, QWORD PTR [r8+200]
  41722. mov QWORD PTR [r13+192], rax
  41723. adc r9, QWORD PTR [r14+200]
  41724. mov r10, QWORD PTR [r8+208]
  41725. mov QWORD PTR [r13+200], r9
  41726. adc r10, QWORD PTR [r14+208]
  41727. mov rax, QWORD PTR [r8+216]
  41728. mov QWORD PTR [r13+208], r10
  41729. adc rax, QWORD PTR [r14+216]
  41730. mov r9, QWORD PTR [r8+224]
  41731. mov QWORD PTR [r13+216], rax
  41732. adc r9, QWORD PTR [r14+224]
  41733. mov r10, QWORD PTR [r8+232]
  41734. mov QWORD PTR [r13+224], r9
  41735. adc r10, QWORD PTR [r14+232]
  41736. mov rax, QWORD PTR [r8+240]
  41737. mov QWORD PTR [r13+232], r10
  41738. adc rax, QWORD PTR [r14+240]
  41739. mov r9, QWORD PTR [r8+248]
  41740. mov QWORD PTR [r13+240], rax
  41741. adc r9, QWORD PTR [r14+248]
  41742. mov QWORD PTR [r13+248], r9
  41743. adc rdi, 0
  41744. mov QWORD PTR [rsp+1568], rdi
  41745. mov r8, r13
  41746. mov rdx, r12
  41747. mov rcx, rsp
  41748. call sp_2048_mul_32
  41749. mov r8, QWORD PTR [rsp+1552]
  41750. mov rdx, QWORD PTR [rsp+1544]
  41751. lea rcx, QWORD PTR [rsp+512]
  41752. add r8, 256
  41753. add rdx, 256
  41754. call sp_2048_mul_32
  41755. mov r8, QWORD PTR [rsp+1552]
  41756. mov rdx, QWORD PTR [rsp+1544]
  41757. mov rcx, QWORD PTR [rsp+1536]
  41758. call sp_2048_mul_32
  41759. IFDEF _WIN64
  41760. mov r8, QWORD PTR [rsp+1552]
  41761. mov rdx, QWORD PTR [rsp+1544]
  41762. mov rcx, QWORD PTR [rsp+1536]
  41763. ENDIF
  41764. mov r15, QWORD PTR [rsp+1560]
  41765. mov rdi, QWORD PTR [rsp+1568]
  41766. mov rsi, QWORD PTR [rsp+1536]
  41767. mov r11, r15
  41768. lea r12, QWORD PTR [rsp+1024]
  41769. lea r13, QWORD PTR [rsp+1280]
  41770. and r11, rdi
  41771. neg r15
  41772. neg rdi
  41773. add rsi, 512
  41774. mov rax, QWORD PTR [r12]
  41775. mov r9, QWORD PTR [r13]
  41776. and rax, rdi
  41777. and r9, r15
  41778. mov QWORD PTR [r12], rax
  41779. mov QWORD PTR [r13], r9
  41780. mov rax, QWORD PTR [r12+8]
  41781. mov r9, QWORD PTR [r13+8]
  41782. and rax, rdi
  41783. and r9, r15
  41784. mov QWORD PTR [r12+8], rax
  41785. mov QWORD PTR [r13+8], r9
  41786. mov rax, QWORD PTR [r12+16]
  41787. mov r9, QWORD PTR [r13+16]
  41788. and rax, rdi
  41789. and r9, r15
  41790. mov QWORD PTR [r12+16], rax
  41791. mov QWORD PTR [r13+16], r9
  41792. mov rax, QWORD PTR [r12+24]
  41793. mov r9, QWORD PTR [r13+24]
  41794. and rax, rdi
  41795. and r9, r15
  41796. mov QWORD PTR [r12+24], rax
  41797. mov QWORD PTR [r13+24], r9
  41798. mov rax, QWORD PTR [r12+32]
  41799. mov r9, QWORD PTR [r13+32]
  41800. and rax, rdi
  41801. and r9, r15
  41802. mov QWORD PTR [r12+32], rax
  41803. mov QWORD PTR [r13+32], r9
  41804. mov rax, QWORD PTR [r12+40]
  41805. mov r9, QWORD PTR [r13+40]
  41806. and rax, rdi
  41807. and r9, r15
  41808. mov QWORD PTR [r12+40], rax
  41809. mov QWORD PTR [r13+40], r9
  41810. mov rax, QWORD PTR [r12+48]
  41811. mov r9, QWORD PTR [r13+48]
  41812. and rax, rdi
  41813. and r9, r15
  41814. mov QWORD PTR [r12+48], rax
  41815. mov QWORD PTR [r13+48], r9
  41816. mov rax, QWORD PTR [r12+56]
  41817. mov r9, QWORD PTR [r13+56]
  41818. and rax, rdi
  41819. and r9, r15
  41820. mov QWORD PTR [r12+56], rax
  41821. mov QWORD PTR [r13+56], r9
  41822. mov rax, QWORD PTR [r12+64]
  41823. mov r9, QWORD PTR [r13+64]
  41824. and rax, rdi
  41825. and r9, r15
  41826. mov QWORD PTR [r12+64], rax
  41827. mov QWORD PTR [r13+64], r9
  41828. mov rax, QWORD PTR [r12+72]
  41829. mov r9, QWORD PTR [r13+72]
  41830. and rax, rdi
  41831. and r9, r15
  41832. mov QWORD PTR [r12+72], rax
  41833. mov QWORD PTR [r13+72], r9
  41834. mov rax, QWORD PTR [r12+80]
  41835. mov r9, QWORD PTR [r13+80]
  41836. and rax, rdi
  41837. and r9, r15
  41838. mov QWORD PTR [r12+80], rax
  41839. mov QWORD PTR [r13+80], r9
  41840. mov rax, QWORD PTR [r12+88]
  41841. mov r9, QWORD PTR [r13+88]
  41842. and rax, rdi
  41843. and r9, r15
  41844. mov QWORD PTR [r12+88], rax
  41845. mov QWORD PTR [r13+88], r9
  41846. mov rax, QWORD PTR [r12+96]
  41847. mov r9, QWORD PTR [r13+96]
  41848. and rax, rdi
  41849. and r9, r15
  41850. mov QWORD PTR [r12+96], rax
  41851. mov QWORD PTR [r13+96], r9
  41852. mov rax, QWORD PTR [r12+104]
  41853. mov r9, QWORD PTR [r13+104]
  41854. and rax, rdi
  41855. and r9, r15
  41856. mov QWORD PTR [r12+104], rax
  41857. mov QWORD PTR [r13+104], r9
  41858. mov rax, QWORD PTR [r12+112]
  41859. mov r9, QWORD PTR [r13+112]
  41860. and rax, rdi
  41861. and r9, r15
  41862. mov QWORD PTR [r12+112], rax
  41863. mov QWORD PTR [r13+112], r9
  41864. mov rax, QWORD PTR [r12+120]
  41865. mov r9, QWORD PTR [r13+120]
  41866. and rax, rdi
  41867. and r9, r15
  41868. mov QWORD PTR [r12+120], rax
  41869. mov QWORD PTR [r13+120], r9
  41870. mov rax, QWORD PTR [r12+128]
  41871. mov r9, QWORD PTR [r13+128]
  41872. and rax, rdi
  41873. and r9, r15
  41874. mov QWORD PTR [r12+128], rax
  41875. mov QWORD PTR [r13+128], r9
  41876. mov rax, QWORD PTR [r12+136]
  41877. mov r9, QWORD PTR [r13+136]
  41878. and rax, rdi
  41879. and r9, r15
  41880. mov QWORD PTR [r12+136], rax
  41881. mov QWORD PTR [r13+136], r9
  41882. mov rax, QWORD PTR [r12+144]
  41883. mov r9, QWORD PTR [r13+144]
  41884. and rax, rdi
  41885. and r9, r15
  41886. mov QWORD PTR [r12+144], rax
  41887. mov QWORD PTR [r13+144], r9
  41888. mov rax, QWORD PTR [r12+152]
  41889. mov r9, QWORD PTR [r13+152]
  41890. and rax, rdi
  41891. and r9, r15
  41892. mov QWORD PTR [r12+152], rax
  41893. mov QWORD PTR [r13+152], r9
  41894. mov rax, QWORD PTR [r12+160]
  41895. mov r9, QWORD PTR [r13+160]
  41896. and rax, rdi
  41897. and r9, r15
  41898. mov QWORD PTR [r12+160], rax
  41899. mov QWORD PTR [r13+160], r9
  41900. mov rax, QWORD PTR [r12+168]
  41901. mov r9, QWORD PTR [r13+168]
  41902. and rax, rdi
  41903. and r9, r15
  41904. mov QWORD PTR [r12+168], rax
  41905. mov QWORD PTR [r13+168], r9
  41906. mov rax, QWORD PTR [r12+176]
  41907. mov r9, QWORD PTR [r13+176]
  41908. and rax, rdi
  41909. and r9, r15
  41910. mov QWORD PTR [r12+176], rax
  41911. mov QWORD PTR [r13+176], r9
  41912. mov rax, QWORD PTR [r12+184]
  41913. mov r9, QWORD PTR [r13+184]
  41914. and rax, rdi
  41915. and r9, r15
  41916. mov QWORD PTR [r12+184], rax
  41917. mov QWORD PTR [r13+184], r9
  41918. mov rax, QWORD PTR [r12+192]
  41919. mov r9, QWORD PTR [r13+192]
  41920. and rax, rdi
  41921. and r9, r15
  41922. mov QWORD PTR [r12+192], rax
  41923. mov QWORD PTR [r13+192], r9
  41924. mov rax, QWORD PTR [r12+200]
  41925. mov r9, QWORD PTR [r13+200]
  41926. and rax, rdi
  41927. and r9, r15
  41928. mov QWORD PTR [r12+200], rax
  41929. mov QWORD PTR [r13+200], r9
  41930. mov rax, QWORD PTR [r12+208]
  41931. mov r9, QWORD PTR [r13+208]
  41932. and rax, rdi
  41933. and r9, r15
  41934. mov QWORD PTR [r12+208], rax
  41935. mov QWORD PTR [r13+208], r9
  41936. mov rax, QWORD PTR [r12+216]
  41937. mov r9, QWORD PTR [r13+216]
  41938. and rax, rdi
  41939. and r9, r15
  41940. mov QWORD PTR [r12+216], rax
  41941. mov QWORD PTR [r13+216], r9
  41942. mov rax, QWORD PTR [r12+224]
  41943. mov r9, QWORD PTR [r13+224]
  41944. and rax, rdi
  41945. and r9, r15
  41946. mov QWORD PTR [r12+224], rax
  41947. mov QWORD PTR [r13+224], r9
  41948. mov rax, QWORD PTR [r12+232]
  41949. mov r9, QWORD PTR [r13+232]
  41950. and rax, rdi
  41951. and r9, r15
  41952. mov QWORD PTR [r12+232], rax
  41953. mov QWORD PTR [r13+232], r9
  41954. mov rax, QWORD PTR [r12+240]
  41955. mov r9, QWORD PTR [r13+240]
  41956. and rax, rdi
  41957. and r9, r15
  41958. mov QWORD PTR [r12+240], rax
  41959. mov QWORD PTR [r13+240], r9
  41960. mov rax, QWORD PTR [r12+248]
  41961. mov r9, QWORD PTR [r13+248]
  41962. and rax, rdi
  41963. and r9, r15
  41964. mov QWORD PTR [r12+248], rax
  41965. mov QWORD PTR [r13+248], r9
  41966. mov rax, QWORD PTR [r12]
  41967. add rax, QWORD PTR [r13]
  41968. mov r9, QWORD PTR [r12+8]
  41969. mov QWORD PTR [rsi], rax
  41970. adc r9, QWORD PTR [r13+8]
  41971. mov r10, QWORD PTR [r12+16]
  41972. mov QWORD PTR [rsi+8], r9
  41973. adc r10, QWORD PTR [r13+16]
  41974. mov rax, QWORD PTR [r12+24]
  41975. mov QWORD PTR [rsi+16], r10
  41976. adc rax, QWORD PTR [r13+24]
  41977. mov r9, QWORD PTR [r12+32]
  41978. mov QWORD PTR [rsi+24], rax
  41979. adc r9, QWORD PTR [r13+32]
  41980. mov r10, QWORD PTR [r12+40]
  41981. mov QWORD PTR [rsi+32], r9
  41982. adc r10, QWORD PTR [r13+40]
  41983. mov rax, QWORD PTR [r12+48]
  41984. mov QWORD PTR [rsi+40], r10
  41985. adc rax, QWORD PTR [r13+48]
  41986. mov r9, QWORD PTR [r12+56]
  41987. mov QWORD PTR [rsi+48], rax
  41988. adc r9, QWORD PTR [r13+56]
  41989. mov r10, QWORD PTR [r12+64]
  41990. mov QWORD PTR [rsi+56], r9
  41991. adc r10, QWORD PTR [r13+64]
  41992. mov rax, QWORD PTR [r12+72]
  41993. mov QWORD PTR [rsi+64], r10
  41994. adc rax, QWORD PTR [r13+72]
  41995. mov r9, QWORD PTR [r12+80]
  41996. mov QWORD PTR [rsi+72], rax
  41997. adc r9, QWORD PTR [r13+80]
  41998. mov r10, QWORD PTR [r12+88]
  41999. mov QWORD PTR [rsi+80], r9
  42000. adc r10, QWORD PTR [r13+88]
  42001. mov rax, QWORD PTR [r12+96]
  42002. mov QWORD PTR [rsi+88], r10
  42003. adc rax, QWORD PTR [r13+96]
  42004. mov r9, QWORD PTR [r12+104]
  42005. mov QWORD PTR [rsi+96], rax
  42006. adc r9, QWORD PTR [r13+104]
  42007. mov r10, QWORD PTR [r12+112]
  42008. mov QWORD PTR [rsi+104], r9
  42009. adc r10, QWORD PTR [r13+112]
  42010. mov rax, QWORD PTR [r12+120]
  42011. mov QWORD PTR [rsi+112], r10
  42012. adc rax, QWORD PTR [r13+120]
  42013. mov r9, QWORD PTR [r12+128]
  42014. mov QWORD PTR [rsi+120], rax
  42015. adc r9, QWORD PTR [r13+128]
  42016. mov r10, QWORD PTR [r12+136]
  42017. mov QWORD PTR [rsi+128], r9
  42018. adc r10, QWORD PTR [r13+136]
  42019. mov rax, QWORD PTR [r12+144]
  42020. mov QWORD PTR [rsi+136], r10
  42021. adc rax, QWORD PTR [r13+144]
  42022. mov r9, QWORD PTR [r12+152]
  42023. mov QWORD PTR [rsi+144], rax
  42024. adc r9, QWORD PTR [r13+152]
  42025. mov r10, QWORD PTR [r12+160]
  42026. mov QWORD PTR [rsi+152], r9
  42027. adc r10, QWORD PTR [r13+160]
  42028. mov rax, QWORD PTR [r12+168]
  42029. mov QWORD PTR [rsi+160], r10
  42030. adc rax, QWORD PTR [r13+168]
  42031. mov r9, QWORD PTR [r12+176]
  42032. mov QWORD PTR [rsi+168], rax
  42033. adc r9, QWORD PTR [r13+176]
  42034. mov r10, QWORD PTR [r12+184]
  42035. mov QWORD PTR [rsi+176], r9
  42036. adc r10, QWORD PTR [r13+184]
  42037. mov rax, QWORD PTR [r12+192]
  42038. mov QWORD PTR [rsi+184], r10
  42039. adc rax, QWORD PTR [r13+192]
  42040. mov r9, QWORD PTR [r12+200]
  42041. mov QWORD PTR [rsi+192], rax
  42042. adc r9, QWORD PTR [r13+200]
  42043. mov r10, QWORD PTR [r12+208]
  42044. mov QWORD PTR [rsi+200], r9
  42045. adc r10, QWORD PTR [r13+208]
  42046. mov rax, QWORD PTR [r12+216]
  42047. mov QWORD PTR [rsi+208], r10
  42048. adc rax, QWORD PTR [r13+216]
  42049. mov r9, QWORD PTR [r12+224]
  42050. mov QWORD PTR [rsi+216], rax
  42051. adc r9, QWORD PTR [r13+224]
  42052. mov r10, QWORD PTR [r12+232]
  42053. mov QWORD PTR [rsi+224], r9
  42054. adc r10, QWORD PTR [r13+232]
  42055. mov rax, QWORD PTR [r12+240]
  42056. mov QWORD PTR [rsi+232], r10
  42057. adc rax, QWORD PTR [r13+240]
  42058. mov r9, QWORD PTR [r12+248]
  42059. mov QWORD PTR [rsi+240], rax
  42060. adc r9, QWORD PTR [r13+248]
  42061. mov QWORD PTR [rsi+248], r9
  42062. adc r11, 0
  42063. lea r13, QWORD PTR [rsp+512]
  42064. mov r12, rsp
  42065. mov rax, QWORD PTR [r12]
  42066. sub rax, QWORD PTR [r13]
  42067. mov r9, QWORD PTR [r12+8]
  42068. mov QWORD PTR [r12], rax
  42069. sbb r9, QWORD PTR [r13+8]
  42070. mov r10, QWORD PTR [r12+16]
  42071. mov QWORD PTR [r12+8], r9
  42072. sbb r10, QWORD PTR [r13+16]
  42073. mov rax, QWORD PTR [r12+24]
  42074. mov QWORD PTR [r12+16], r10
  42075. sbb rax, QWORD PTR [r13+24]
  42076. mov r9, QWORD PTR [r12+32]
  42077. mov QWORD PTR [r12+24], rax
  42078. sbb r9, QWORD PTR [r13+32]
  42079. mov r10, QWORD PTR [r12+40]
  42080. mov QWORD PTR [r12+32], r9
  42081. sbb r10, QWORD PTR [r13+40]
  42082. mov rax, QWORD PTR [r12+48]
  42083. mov QWORD PTR [r12+40], r10
  42084. sbb rax, QWORD PTR [r13+48]
  42085. mov r9, QWORD PTR [r12+56]
  42086. mov QWORD PTR [r12+48], rax
  42087. sbb r9, QWORD PTR [r13+56]
  42088. mov r10, QWORD PTR [r12+64]
  42089. mov QWORD PTR [r12+56], r9
  42090. sbb r10, QWORD PTR [r13+64]
  42091. mov rax, QWORD PTR [r12+72]
  42092. mov QWORD PTR [r12+64], r10
  42093. sbb rax, QWORD PTR [r13+72]
  42094. mov r9, QWORD PTR [r12+80]
  42095. mov QWORD PTR [r12+72], rax
  42096. sbb r9, QWORD PTR [r13+80]
  42097. mov r10, QWORD PTR [r12+88]
  42098. mov QWORD PTR [r12+80], r9
  42099. sbb r10, QWORD PTR [r13+88]
  42100. mov rax, QWORD PTR [r12+96]
  42101. mov QWORD PTR [r12+88], r10
  42102. sbb rax, QWORD PTR [r13+96]
  42103. mov r9, QWORD PTR [r12+104]
  42104. mov QWORD PTR [r12+96], rax
  42105. sbb r9, QWORD PTR [r13+104]
  42106. mov r10, QWORD PTR [r12+112]
  42107. mov QWORD PTR [r12+104], r9
  42108. sbb r10, QWORD PTR [r13+112]
  42109. mov rax, QWORD PTR [r12+120]
  42110. mov QWORD PTR [r12+112], r10
  42111. sbb rax, QWORD PTR [r13+120]
  42112. mov r9, QWORD PTR [r12+128]
  42113. mov QWORD PTR [r12+120], rax
  42114. sbb r9, QWORD PTR [r13+128]
  42115. mov r10, QWORD PTR [r12+136]
  42116. mov QWORD PTR [r12+128], r9
  42117. sbb r10, QWORD PTR [r13+136]
  42118. mov rax, QWORD PTR [r12+144]
  42119. mov QWORD PTR [r12+136], r10
  42120. sbb rax, QWORD PTR [r13+144]
  42121. mov r9, QWORD PTR [r12+152]
  42122. mov QWORD PTR [r12+144], rax
  42123. sbb r9, QWORD PTR [r13+152]
  42124. mov r10, QWORD PTR [r12+160]
  42125. mov QWORD PTR [r12+152], r9
  42126. sbb r10, QWORD PTR [r13+160]
  42127. mov rax, QWORD PTR [r12+168]
  42128. mov QWORD PTR [r12+160], r10
  42129. sbb rax, QWORD PTR [r13+168]
  42130. mov r9, QWORD PTR [r12+176]
  42131. mov QWORD PTR [r12+168], rax
  42132. sbb r9, QWORD PTR [r13+176]
  42133. mov r10, QWORD PTR [r12+184]
  42134. mov QWORD PTR [r12+176], r9
  42135. sbb r10, QWORD PTR [r13+184]
  42136. mov rax, QWORD PTR [r12+192]
  42137. mov QWORD PTR [r12+184], r10
  42138. sbb rax, QWORD PTR [r13+192]
  42139. mov r9, QWORD PTR [r12+200]
  42140. mov QWORD PTR [r12+192], rax
  42141. sbb r9, QWORD PTR [r13+200]
  42142. mov r10, QWORD PTR [r12+208]
  42143. mov QWORD PTR [r12+200], r9
  42144. sbb r10, QWORD PTR [r13+208]
  42145. mov rax, QWORD PTR [r12+216]
  42146. mov QWORD PTR [r12+208], r10
  42147. sbb rax, QWORD PTR [r13+216]
  42148. mov r9, QWORD PTR [r12+224]
  42149. mov QWORD PTR [r12+216], rax
  42150. sbb r9, QWORD PTR [r13+224]
  42151. mov r10, QWORD PTR [r12+232]
  42152. mov QWORD PTR [r12+224], r9
  42153. sbb r10, QWORD PTR [r13+232]
  42154. mov rax, QWORD PTR [r12+240]
  42155. mov QWORD PTR [r12+232], r10
  42156. sbb rax, QWORD PTR [r13+240]
  42157. mov r9, QWORD PTR [r12+248]
  42158. mov QWORD PTR [r12+240], rax
  42159. sbb r9, QWORD PTR [r13+248]
  42160. mov r10, QWORD PTR [r12+256]
  42161. mov QWORD PTR [r12+248], r9
  42162. sbb r10, QWORD PTR [r13+256]
  42163. mov rax, QWORD PTR [r12+264]
  42164. mov QWORD PTR [r12+256], r10
  42165. sbb rax, QWORD PTR [r13+264]
  42166. mov r9, QWORD PTR [r12+272]
  42167. mov QWORD PTR [r12+264], rax
  42168. sbb r9, QWORD PTR [r13+272]
  42169. mov r10, QWORD PTR [r12+280]
  42170. mov QWORD PTR [r12+272], r9
  42171. sbb r10, QWORD PTR [r13+280]
  42172. mov rax, QWORD PTR [r12+288]
  42173. mov QWORD PTR [r12+280], r10
  42174. sbb rax, QWORD PTR [r13+288]
  42175. mov r9, QWORD PTR [r12+296]
  42176. mov QWORD PTR [r12+288], rax
  42177. sbb r9, QWORD PTR [r13+296]
  42178. mov r10, QWORD PTR [r12+304]
  42179. mov QWORD PTR [r12+296], r9
  42180. sbb r10, QWORD PTR [r13+304]
  42181. mov rax, QWORD PTR [r12+312]
  42182. mov QWORD PTR [r12+304], r10
  42183. sbb rax, QWORD PTR [r13+312]
  42184. mov r9, QWORD PTR [r12+320]
  42185. mov QWORD PTR [r12+312], rax
  42186. sbb r9, QWORD PTR [r13+320]
  42187. mov r10, QWORD PTR [r12+328]
  42188. mov QWORD PTR [r12+320], r9
  42189. sbb r10, QWORD PTR [r13+328]
  42190. mov rax, QWORD PTR [r12+336]
  42191. mov QWORD PTR [r12+328], r10
  42192. sbb rax, QWORD PTR [r13+336]
  42193. mov r9, QWORD PTR [r12+344]
  42194. mov QWORD PTR [r12+336], rax
  42195. sbb r9, QWORD PTR [r13+344]
  42196. mov r10, QWORD PTR [r12+352]
  42197. mov QWORD PTR [r12+344], r9
  42198. sbb r10, QWORD PTR [r13+352]
  42199. mov rax, QWORD PTR [r12+360]
  42200. mov QWORD PTR [r12+352], r10
  42201. sbb rax, QWORD PTR [r13+360]
  42202. mov r9, QWORD PTR [r12+368]
  42203. mov QWORD PTR [r12+360], rax
  42204. sbb r9, QWORD PTR [r13+368]
  42205. mov r10, QWORD PTR [r12+376]
  42206. mov QWORD PTR [r12+368], r9
  42207. sbb r10, QWORD PTR [r13+376]
  42208. mov rax, QWORD PTR [r12+384]
  42209. mov QWORD PTR [r12+376], r10
  42210. sbb rax, QWORD PTR [r13+384]
  42211. mov r9, QWORD PTR [r12+392]
  42212. mov QWORD PTR [r12+384], rax
  42213. sbb r9, QWORD PTR [r13+392]
  42214. mov r10, QWORD PTR [r12+400]
  42215. mov QWORD PTR [r12+392], r9
  42216. sbb r10, QWORD PTR [r13+400]
  42217. mov rax, QWORD PTR [r12+408]
  42218. mov QWORD PTR [r12+400], r10
  42219. sbb rax, QWORD PTR [r13+408]
  42220. mov r9, QWORD PTR [r12+416]
  42221. mov QWORD PTR [r12+408], rax
  42222. sbb r9, QWORD PTR [r13+416]
  42223. mov r10, QWORD PTR [r12+424]
  42224. mov QWORD PTR [r12+416], r9
  42225. sbb r10, QWORD PTR [r13+424]
  42226. mov rax, QWORD PTR [r12+432]
  42227. mov QWORD PTR [r12+424], r10
  42228. sbb rax, QWORD PTR [r13+432]
  42229. mov r9, QWORD PTR [r12+440]
  42230. mov QWORD PTR [r12+432], rax
  42231. sbb r9, QWORD PTR [r13+440]
  42232. mov r10, QWORD PTR [r12+448]
  42233. mov QWORD PTR [r12+440], r9
  42234. sbb r10, QWORD PTR [r13+448]
  42235. mov rax, QWORD PTR [r12+456]
  42236. mov QWORD PTR [r12+448], r10
  42237. sbb rax, QWORD PTR [r13+456]
  42238. mov r9, QWORD PTR [r12+464]
  42239. mov QWORD PTR [r12+456], rax
  42240. sbb r9, QWORD PTR [r13+464]
  42241. mov r10, QWORD PTR [r12+472]
  42242. mov QWORD PTR [r12+464], r9
  42243. sbb r10, QWORD PTR [r13+472]
  42244. mov rax, QWORD PTR [r12+480]
  42245. mov QWORD PTR [r12+472], r10
  42246. sbb rax, QWORD PTR [r13+480]
  42247. mov r9, QWORD PTR [r12+488]
  42248. mov QWORD PTR [r12+480], rax
  42249. sbb r9, QWORD PTR [r13+488]
  42250. mov r10, QWORD PTR [r12+496]
  42251. mov QWORD PTR [r12+488], r9
  42252. sbb r10, QWORD PTR [r13+496]
  42253. mov rax, QWORD PTR [r12+504]
  42254. mov QWORD PTR [r12+496], r10
  42255. sbb rax, QWORD PTR [r13+504]
  42256. mov QWORD PTR [r12+504], rax
  42257. sbb r11, 0
  42258. mov rax, QWORD PTR [r12]
  42259. sub rax, QWORD PTR [rcx]
  42260. mov r9, QWORD PTR [r12+8]
  42261. mov QWORD PTR [r12], rax
  42262. sbb r9, QWORD PTR [rcx+8]
  42263. mov r10, QWORD PTR [r12+16]
  42264. mov QWORD PTR [r12+8], r9
  42265. sbb r10, QWORD PTR [rcx+16]
  42266. mov rax, QWORD PTR [r12+24]
  42267. mov QWORD PTR [r12+16], r10
  42268. sbb rax, QWORD PTR [rcx+24]
  42269. mov r9, QWORD PTR [r12+32]
  42270. mov QWORD PTR [r12+24], rax
  42271. sbb r9, QWORD PTR [rcx+32]
  42272. mov r10, QWORD PTR [r12+40]
  42273. mov QWORD PTR [r12+32], r9
  42274. sbb r10, QWORD PTR [rcx+40]
  42275. mov rax, QWORD PTR [r12+48]
  42276. mov QWORD PTR [r12+40], r10
  42277. sbb rax, QWORD PTR [rcx+48]
  42278. mov r9, QWORD PTR [r12+56]
  42279. mov QWORD PTR [r12+48], rax
  42280. sbb r9, QWORD PTR [rcx+56]
  42281. mov r10, QWORD PTR [r12+64]
  42282. mov QWORD PTR [r12+56], r9
  42283. sbb r10, QWORD PTR [rcx+64]
  42284. mov rax, QWORD PTR [r12+72]
  42285. mov QWORD PTR [r12+64], r10
  42286. sbb rax, QWORD PTR [rcx+72]
  42287. mov r9, QWORD PTR [r12+80]
  42288. mov QWORD PTR [r12+72], rax
  42289. sbb r9, QWORD PTR [rcx+80]
  42290. mov r10, QWORD PTR [r12+88]
  42291. mov QWORD PTR [r12+80], r9
  42292. sbb r10, QWORD PTR [rcx+88]
  42293. mov rax, QWORD PTR [r12+96]
  42294. mov QWORD PTR [r12+88], r10
  42295. sbb rax, QWORD PTR [rcx+96]
  42296. mov r9, QWORD PTR [r12+104]
  42297. mov QWORD PTR [r12+96], rax
  42298. sbb r9, QWORD PTR [rcx+104]
  42299. mov r10, QWORD PTR [r12+112]
  42300. mov QWORD PTR [r12+104], r9
  42301. sbb r10, QWORD PTR [rcx+112]
  42302. mov rax, QWORD PTR [r12+120]
  42303. mov QWORD PTR [r12+112], r10
  42304. sbb rax, QWORD PTR [rcx+120]
  42305. mov r9, QWORD PTR [r12+128]
  42306. mov QWORD PTR [r12+120], rax
  42307. sbb r9, QWORD PTR [rcx+128]
  42308. mov r10, QWORD PTR [r12+136]
  42309. mov QWORD PTR [r12+128], r9
  42310. sbb r10, QWORD PTR [rcx+136]
  42311. mov rax, QWORD PTR [r12+144]
  42312. mov QWORD PTR [r12+136], r10
  42313. sbb rax, QWORD PTR [rcx+144]
  42314. mov r9, QWORD PTR [r12+152]
  42315. mov QWORD PTR [r12+144], rax
  42316. sbb r9, QWORD PTR [rcx+152]
  42317. mov r10, QWORD PTR [r12+160]
  42318. mov QWORD PTR [r12+152], r9
  42319. sbb r10, QWORD PTR [rcx+160]
  42320. mov rax, QWORD PTR [r12+168]
  42321. mov QWORD PTR [r12+160], r10
  42322. sbb rax, QWORD PTR [rcx+168]
  42323. mov r9, QWORD PTR [r12+176]
  42324. mov QWORD PTR [r12+168], rax
  42325. sbb r9, QWORD PTR [rcx+176]
  42326. mov r10, QWORD PTR [r12+184]
  42327. mov QWORD PTR [r12+176], r9
  42328. sbb r10, QWORD PTR [rcx+184]
  42329. mov rax, QWORD PTR [r12+192]
  42330. mov QWORD PTR [r12+184], r10
  42331. sbb rax, QWORD PTR [rcx+192]
  42332. mov r9, QWORD PTR [r12+200]
  42333. mov QWORD PTR [r12+192], rax
  42334. sbb r9, QWORD PTR [rcx+200]
  42335. mov r10, QWORD PTR [r12+208]
  42336. mov QWORD PTR [r12+200], r9
  42337. sbb r10, QWORD PTR [rcx+208]
  42338. mov rax, QWORD PTR [r12+216]
  42339. mov QWORD PTR [r12+208], r10
  42340. sbb rax, QWORD PTR [rcx+216]
  42341. mov r9, QWORD PTR [r12+224]
  42342. mov QWORD PTR [r12+216], rax
  42343. sbb r9, QWORD PTR [rcx+224]
  42344. mov r10, QWORD PTR [r12+232]
  42345. mov QWORD PTR [r12+224], r9
  42346. sbb r10, QWORD PTR [rcx+232]
  42347. mov rax, QWORD PTR [r12+240]
  42348. mov QWORD PTR [r12+232], r10
  42349. sbb rax, QWORD PTR [rcx+240]
  42350. mov r9, QWORD PTR [r12+248]
  42351. mov QWORD PTR [r12+240], rax
  42352. sbb r9, QWORD PTR [rcx+248]
  42353. mov r10, QWORD PTR [r12+256]
  42354. mov QWORD PTR [r12+248], r9
  42355. sbb r10, QWORD PTR [rcx+256]
  42356. mov rax, QWORD PTR [r12+264]
  42357. mov QWORD PTR [r12+256], r10
  42358. sbb rax, QWORD PTR [rcx+264]
  42359. mov r9, QWORD PTR [r12+272]
  42360. mov QWORD PTR [r12+264], rax
  42361. sbb r9, QWORD PTR [rcx+272]
  42362. mov r10, QWORD PTR [r12+280]
  42363. mov QWORD PTR [r12+272], r9
  42364. sbb r10, QWORD PTR [rcx+280]
  42365. mov rax, QWORD PTR [r12+288]
  42366. mov QWORD PTR [r12+280], r10
  42367. sbb rax, QWORD PTR [rcx+288]
  42368. mov r9, QWORD PTR [r12+296]
  42369. mov QWORD PTR [r12+288], rax
  42370. sbb r9, QWORD PTR [rcx+296]
  42371. mov r10, QWORD PTR [r12+304]
  42372. mov QWORD PTR [r12+296], r9
  42373. sbb r10, QWORD PTR [rcx+304]
  42374. mov rax, QWORD PTR [r12+312]
  42375. mov QWORD PTR [r12+304], r10
  42376. sbb rax, QWORD PTR [rcx+312]
  42377. mov r9, QWORD PTR [r12+320]
  42378. mov QWORD PTR [r12+312], rax
  42379. sbb r9, QWORD PTR [rcx+320]
  42380. mov r10, QWORD PTR [r12+328]
  42381. mov QWORD PTR [r12+320], r9
  42382. sbb r10, QWORD PTR [rcx+328]
  42383. mov rax, QWORD PTR [r12+336]
  42384. mov QWORD PTR [r12+328], r10
  42385. sbb rax, QWORD PTR [rcx+336]
  42386. mov r9, QWORD PTR [r12+344]
  42387. mov QWORD PTR [r12+336], rax
  42388. sbb r9, QWORD PTR [rcx+344]
  42389. mov r10, QWORD PTR [r12+352]
  42390. mov QWORD PTR [r12+344], r9
  42391. sbb r10, QWORD PTR [rcx+352]
  42392. mov rax, QWORD PTR [r12+360]
  42393. mov QWORD PTR [r12+352], r10
  42394. sbb rax, QWORD PTR [rcx+360]
  42395. mov r9, QWORD PTR [r12+368]
  42396. mov QWORD PTR [r12+360], rax
  42397. sbb r9, QWORD PTR [rcx+368]
  42398. mov r10, QWORD PTR [r12+376]
  42399. mov QWORD PTR [r12+368], r9
  42400. sbb r10, QWORD PTR [rcx+376]
  42401. mov rax, QWORD PTR [r12+384]
  42402. mov QWORD PTR [r12+376], r10
  42403. sbb rax, QWORD PTR [rcx+384]
  42404. mov r9, QWORD PTR [r12+392]
  42405. mov QWORD PTR [r12+384], rax
  42406. sbb r9, QWORD PTR [rcx+392]
  42407. mov r10, QWORD PTR [r12+400]
  42408. mov QWORD PTR [r12+392], r9
  42409. sbb r10, QWORD PTR [rcx+400]
  42410. mov rax, QWORD PTR [r12+408]
  42411. mov QWORD PTR [r12+400], r10
  42412. sbb rax, QWORD PTR [rcx+408]
  42413. mov r9, QWORD PTR [r12+416]
  42414. mov QWORD PTR [r12+408], rax
  42415. sbb r9, QWORD PTR [rcx+416]
  42416. mov r10, QWORD PTR [r12+424]
  42417. mov QWORD PTR [r12+416], r9
  42418. sbb r10, QWORD PTR [rcx+424]
  42419. mov rax, QWORD PTR [r12+432]
  42420. mov QWORD PTR [r12+424], r10
  42421. sbb rax, QWORD PTR [rcx+432]
  42422. mov r9, QWORD PTR [r12+440]
  42423. mov QWORD PTR [r12+432], rax
  42424. sbb r9, QWORD PTR [rcx+440]
  42425. mov r10, QWORD PTR [r12+448]
  42426. mov QWORD PTR [r12+440], r9
  42427. sbb r10, QWORD PTR [rcx+448]
  42428. mov rax, QWORD PTR [r12+456]
  42429. mov QWORD PTR [r12+448], r10
  42430. sbb rax, QWORD PTR [rcx+456]
  42431. mov r9, QWORD PTR [r12+464]
  42432. mov QWORD PTR [r12+456], rax
  42433. sbb r9, QWORD PTR [rcx+464]
  42434. mov r10, QWORD PTR [r12+472]
  42435. mov QWORD PTR [r12+464], r9
  42436. sbb r10, QWORD PTR [rcx+472]
  42437. mov rax, QWORD PTR [r12+480]
  42438. mov QWORD PTR [r12+472], r10
  42439. sbb rax, QWORD PTR [rcx+480]
  42440. mov r9, QWORD PTR [r12+488]
  42441. mov QWORD PTR [r12+480], rax
  42442. sbb r9, QWORD PTR [rcx+488]
  42443. mov r10, QWORD PTR [r12+496]
  42444. mov QWORD PTR [r12+488], r9
  42445. sbb r10, QWORD PTR [rcx+496]
  42446. mov rax, QWORD PTR [r12+504]
  42447. mov QWORD PTR [r12+496], r10
  42448. sbb rax, QWORD PTR [rcx+504]
  42449. mov QWORD PTR [r12+504], rax
  42450. sbb r11, 0
  42451. sub rsi, 256
  42452. ; Add
  42453. mov rax, QWORD PTR [rsi]
  42454. add rax, QWORD PTR [r12]
  42455. mov r9, QWORD PTR [rsi+8]
  42456. mov QWORD PTR [rsi], rax
  42457. adc r9, QWORD PTR [r12+8]
  42458. mov r10, QWORD PTR [rsi+16]
  42459. mov QWORD PTR [rsi+8], r9
  42460. adc r10, QWORD PTR [r12+16]
  42461. mov rax, QWORD PTR [rsi+24]
  42462. mov QWORD PTR [rsi+16], r10
  42463. adc rax, QWORD PTR [r12+24]
  42464. mov r9, QWORD PTR [rsi+32]
  42465. mov QWORD PTR [rsi+24], rax
  42466. adc r9, QWORD PTR [r12+32]
  42467. mov r10, QWORD PTR [rsi+40]
  42468. mov QWORD PTR [rsi+32], r9
  42469. adc r10, QWORD PTR [r12+40]
  42470. mov rax, QWORD PTR [rsi+48]
  42471. mov QWORD PTR [rsi+40], r10
  42472. adc rax, QWORD PTR [r12+48]
  42473. mov r9, QWORD PTR [rsi+56]
  42474. mov QWORD PTR [rsi+48], rax
  42475. adc r9, QWORD PTR [r12+56]
  42476. mov r10, QWORD PTR [rsi+64]
  42477. mov QWORD PTR [rsi+56], r9
  42478. adc r10, QWORD PTR [r12+64]
  42479. mov rax, QWORD PTR [rsi+72]
  42480. mov QWORD PTR [rsi+64], r10
  42481. adc rax, QWORD PTR [r12+72]
  42482. mov r9, QWORD PTR [rsi+80]
  42483. mov QWORD PTR [rsi+72], rax
  42484. adc r9, QWORD PTR [r12+80]
  42485. mov r10, QWORD PTR [rsi+88]
  42486. mov QWORD PTR [rsi+80], r9
  42487. adc r10, QWORD PTR [r12+88]
  42488. mov rax, QWORD PTR [rsi+96]
  42489. mov QWORD PTR [rsi+88], r10
  42490. adc rax, QWORD PTR [r12+96]
  42491. mov r9, QWORD PTR [rsi+104]
  42492. mov QWORD PTR [rsi+96], rax
  42493. adc r9, QWORD PTR [r12+104]
  42494. mov r10, QWORD PTR [rsi+112]
  42495. mov QWORD PTR [rsi+104], r9
  42496. adc r10, QWORD PTR [r12+112]
  42497. mov rax, QWORD PTR [rsi+120]
  42498. mov QWORD PTR [rsi+112], r10
  42499. adc rax, QWORD PTR [r12+120]
  42500. mov r9, QWORD PTR [rsi+128]
  42501. mov QWORD PTR [rsi+120], rax
  42502. adc r9, QWORD PTR [r12+128]
  42503. mov r10, QWORD PTR [rsi+136]
  42504. mov QWORD PTR [rsi+128], r9
  42505. adc r10, QWORD PTR [r12+136]
  42506. mov rax, QWORD PTR [rsi+144]
  42507. mov QWORD PTR [rsi+136], r10
  42508. adc rax, QWORD PTR [r12+144]
  42509. mov r9, QWORD PTR [rsi+152]
  42510. mov QWORD PTR [rsi+144], rax
  42511. adc r9, QWORD PTR [r12+152]
  42512. mov r10, QWORD PTR [rsi+160]
  42513. mov QWORD PTR [rsi+152], r9
  42514. adc r10, QWORD PTR [r12+160]
  42515. mov rax, QWORD PTR [rsi+168]
  42516. mov QWORD PTR [rsi+160], r10
  42517. adc rax, QWORD PTR [r12+168]
  42518. mov r9, QWORD PTR [rsi+176]
  42519. mov QWORD PTR [rsi+168], rax
  42520. adc r9, QWORD PTR [r12+176]
  42521. mov r10, QWORD PTR [rsi+184]
  42522. mov QWORD PTR [rsi+176], r9
  42523. adc r10, QWORD PTR [r12+184]
  42524. mov rax, QWORD PTR [rsi+192]
  42525. mov QWORD PTR [rsi+184], r10
  42526. adc rax, QWORD PTR [r12+192]
  42527. mov r9, QWORD PTR [rsi+200]
  42528. mov QWORD PTR [rsi+192], rax
  42529. adc r9, QWORD PTR [r12+200]
  42530. mov r10, QWORD PTR [rsi+208]
  42531. mov QWORD PTR [rsi+200], r9
  42532. adc r10, QWORD PTR [r12+208]
  42533. mov rax, QWORD PTR [rsi+216]
  42534. mov QWORD PTR [rsi+208], r10
  42535. adc rax, QWORD PTR [r12+216]
  42536. mov r9, QWORD PTR [rsi+224]
  42537. mov QWORD PTR [rsi+216], rax
  42538. adc r9, QWORD PTR [r12+224]
  42539. mov r10, QWORD PTR [rsi+232]
  42540. mov QWORD PTR [rsi+224], r9
  42541. adc r10, QWORD PTR [r12+232]
  42542. mov rax, QWORD PTR [rsi+240]
  42543. mov QWORD PTR [rsi+232], r10
  42544. adc rax, QWORD PTR [r12+240]
  42545. mov r9, QWORD PTR [rsi+248]
  42546. mov QWORD PTR [rsi+240], rax
  42547. adc r9, QWORD PTR [r12+248]
  42548. mov r10, QWORD PTR [rsi+256]
  42549. mov QWORD PTR [rsi+248], r9
  42550. adc r10, QWORD PTR [r12+256]
  42551. mov rax, QWORD PTR [rsi+264]
  42552. mov QWORD PTR [rsi+256], r10
  42553. adc rax, QWORD PTR [r12+264]
  42554. mov r9, QWORD PTR [rsi+272]
  42555. mov QWORD PTR [rsi+264], rax
  42556. adc r9, QWORD PTR [r12+272]
  42557. mov r10, QWORD PTR [rsi+280]
  42558. mov QWORD PTR [rsi+272], r9
  42559. adc r10, QWORD PTR [r12+280]
  42560. mov rax, QWORD PTR [rsi+288]
  42561. mov QWORD PTR [rsi+280], r10
  42562. adc rax, QWORD PTR [r12+288]
  42563. mov r9, QWORD PTR [rsi+296]
  42564. mov QWORD PTR [rsi+288], rax
  42565. adc r9, QWORD PTR [r12+296]
  42566. mov r10, QWORD PTR [rsi+304]
  42567. mov QWORD PTR [rsi+296], r9
  42568. adc r10, QWORD PTR [r12+304]
  42569. mov rax, QWORD PTR [rsi+312]
  42570. mov QWORD PTR [rsi+304], r10
  42571. adc rax, QWORD PTR [r12+312]
  42572. mov r9, QWORD PTR [rsi+320]
  42573. mov QWORD PTR [rsi+312], rax
  42574. adc r9, QWORD PTR [r12+320]
  42575. mov r10, QWORD PTR [rsi+328]
  42576. mov QWORD PTR [rsi+320], r9
  42577. adc r10, QWORD PTR [r12+328]
  42578. mov rax, QWORD PTR [rsi+336]
  42579. mov QWORD PTR [rsi+328], r10
  42580. adc rax, QWORD PTR [r12+336]
  42581. mov r9, QWORD PTR [rsi+344]
  42582. mov QWORD PTR [rsi+336], rax
  42583. adc r9, QWORD PTR [r12+344]
  42584. mov r10, QWORD PTR [rsi+352]
  42585. mov QWORD PTR [rsi+344], r9
  42586. adc r10, QWORD PTR [r12+352]
  42587. mov rax, QWORD PTR [rsi+360]
  42588. mov QWORD PTR [rsi+352], r10
  42589. adc rax, QWORD PTR [r12+360]
  42590. mov r9, QWORD PTR [rsi+368]
  42591. mov QWORD PTR [rsi+360], rax
  42592. adc r9, QWORD PTR [r12+368]
  42593. mov r10, QWORD PTR [rsi+376]
  42594. mov QWORD PTR [rsi+368], r9
  42595. adc r10, QWORD PTR [r12+376]
  42596. mov rax, QWORD PTR [rsi+384]
  42597. mov QWORD PTR [rsi+376], r10
  42598. adc rax, QWORD PTR [r12+384]
  42599. mov r9, QWORD PTR [rsi+392]
  42600. mov QWORD PTR [rsi+384], rax
  42601. adc r9, QWORD PTR [r12+392]
  42602. mov r10, QWORD PTR [rsi+400]
  42603. mov QWORD PTR [rsi+392], r9
  42604. adc r10, QWORD PTR [r12+400]
  42605. mov rax, QWORD PTR [rsi+408]
  42606. mov QWORD PTR [rsi+400], r10
  42607. adc rax, QWORD PTR [r12+408]
  42608. mov r9, QWORD PTR [rsi+416]
  42609. mov QWORD PTR [rsi+408], rax
  42610. adc r9, QWORD PTR [r12+416]
  42611. mov r10, QWORD PTR [rsi+424]
  42612. mov QWORD PTR [rsi+416], r9
  42613. adc r10, QWORD PTR [r12+424]
  42614. mov rax, QWORD PTR [rsi+432]
  42615. mov QWORD PTR [rsi+424], r10
  42616. adc rax, QWORD PTR [r12+432]
  42617. mov r9, QWORD PTR [rsi+440]
  42618. mov QWORD PTR [rsi+432], rax
  42619. adc r9, QWORD PTR [r12+440]
  42620. mov r10, QWORD PTR [rsi+448]
  42621. mov QWORD PTR [rsi+440], r9
  42622. adc r10, QWORD PTR [r12+448]
  42623. mov rax, QWORD PTR [rsi+456]
  42624. mov QWORD PTR [rsi+448], r10
  42625. adc rax, QWORD PTR [r12+456]
  42626. mov r9, QWORD PTR [rsi+464]
  42627. mov QWORD PTR [rsi+456], rax
  42628. adc r9, QWORD PTR [r12+464]
  42629. mov r10, QWORD PTR [rsi+472]
  42630. mov QWORD PTR [rsi+464], r9
  42631. adc r10, QWORD PTR [r12+472]
  42632. mov rax, QWORD PTR [rsi+480]
  42633. mov QWORD PTR [rsi+472], r10
  42634. adc rax, QWORD PTR [r12+480]
  42635. mov r9, QWORD PTR [rsi+488]
  42636. mov QWORD PTR [rsi+480], rax
  42637. adc r9, QWORD PTR [r12+488]
  42638. mov r10, QWORD PTR [rsi+496]
  42639. mov QWORD PTR [rsi+488], r9
  42640. adc r10, QWORD PTR [r12+496]
  42641. mov rax, QWORD PTR [rsi+504]
  42642. mov QWORD PTR [rsi+496], r10
  42643. adc rax, QWORD PTR [r12+504]
  42644. mov QWORD PTR [rsi+504], rax
  42645. adc r11, 0
  42646. mov QWORD PTR [rcx+768], r11
  42647. add rsi, 256
  42648. ; Add
  42649. mov rax, QWORD PTR [rsi]
  42650. add rax, QWORD PTR [r13]
  42651. mov r9, QWORD PTR [rsi+8]
  42652. mov QWORD PTR [rsi], rax
  42653. adc r9, QWORD PTR [r13+8]
  42654. mov r10, QWORD PTR [rsi+16]
  42655. mov QWORD PTR [rsi+8], r9
  42656. adc r10, QWORD PTR [r13+16]
  42657. mov rax, QWORD PTR [rsi+24]
  42658. mov QWORD PTR [rsi+16], r10
  42659. adc rax, QWORD PTR [r13+24]
  42660. mov r9, QWORD PTR [rsi+32]
  42661. mov QWORD PTR [rsi+24], rax
  42662. adc r9, QWORD PTR [r13+32]
  42663. mov r10, QWORD PTR [rsi+40]
  42664. mov QWORD PTR [rsi+32], r9
  42665. adc r10, QWORD PTR [r13+40]
  42666. mov rax, QWORD PTR [rsi+48]
  42667. mov QWORD PTR [rsi+40], r10
  42668. adc rax, QWORD PTR [r13+48]
  42669. mov r9, QWORD PTR [rsi+56]
  42670. mov QWORD PTR [rsi+48], rax
  42671. adc r9, QWORD PTR [r13+56]
  42672. mov r10, QWORD PTR [rsi+64]
  42673. mov QWORD PTR [rsi+56], r9
  42674. adc r10, QWORD PTR [r13+64]
  42675. mov rax, QWORD PTR [rsi+72]
  42676. mov QWORD PTR [rsi+64], r10
  42677. adc rax, QWORD PTR [r13+72]
  42678. mov r9, QWORD PTR [rsi+80]
  42679. mov QWORD PTR [rsi+72], rax
  42680. adc r9, QWORD PTR [r13+80]
  42681. mov r10, QWORD PTR [rsi+88]
  42682. mov QWORD PTR [rsi+80], r9
  42683. adc r10, QWORD PTR [r13+88]
  42684. mov rax, QWORD PTR [rsi+96]
  42685. mov QWORD PTR [rsi+88], r10
  42686. adc rax, QWORD PTR [r13+96]
  42687. mov r9, QWORD PTR [rsi+104]
  42688. mov QWORD PTR [rsi+96], rax
  42689. adc r9, QWORD PTR [r13+104]
  42690. mov r10, QWORD PTR [rsi+112]
  42691. mov QWORD PTR [rsi+104], r9
  42692. adc r10, QWORD PTR [r13+112]
  42693. mov rax, QWORD PTR [rsi+120]
  42694. mov QWORD PTR [rsi+112], r10
  42695. adc rax, QWORD PTR [r13+120]
  42696. mov r9, QWORD PTR [rsi+128]
  42697. mov QWORD PTR [rsi+120], rax
  42698. adc r9, QWORD PTR [r13+128]
  42699. mov r10, QWORD PTR [rsi+136]
  42700. mov QWORD PTR [rsi+128], r9
  42701. adc r10, QWORD PTR [r13+136]
  42702. mov rax, QWORD PTR [rsi+144]
  42703. mov QWORD PTR [rsi+136], r10
  42704. adc rax, QWORD PTR [r13+144]
  42705. mov r9, QWORD PTR [rsi+152]
  42706. mov QWORD PTR [rsi+144], rax
  42707. adc r9, QWORD PTR [r13+152]
  42708. mov r10, QWORD PTR [rsi+160]
  42709. mov QWORD PTR [rsi+152], r9
  42710. adc r10, QWORD PTR [r13+160]
  42711. mov rax, QWORD PTR [rsi+168]
  42712. mov QWORD PTR [rsi+160], r10
  42713. adc rax, QWORD PTR [r13+168]
  42714. mov r9, QWORD PTR [rsi+176]
  42715. mov QWORD PTR [rsi+168], rax
  42716. adc r9, QWORD PTR [r13+176]
  42717. mov r10, QWORD PTR [rsi+184]
  42718. mov QWORD PTR [rsi+176], r9
  42719. adc r10, QWORD PTR [r13+184]
  42720. mov rax, QWORD PTR [rsi+192]
  42721. mov QWORD PTR [rsi+184], r10
  42722. adc rax, QWORD PTR [r13+192]
  42723. mov r9, QWORD PTR [rsi+200]
  42724. mov QWORD PTR [rsi+192], rax
  42725. adc r9, QWORD PTR [r13+200]
  42726. mov r10, QWORD PTR [rsi+208]
  42727. mov QWORD PTR [rsi+200], r9
  42728. adc r10, QWORD PTR [r13+208]
  42729. mov rax, QWORD PTR [rsi+216]
  42730. mov QWORD PTR [rsi+208], r10
  42731. adc rax, QWORD PTR [r13+216]
  42732. mov r9, QWORD PTR [rsi+224]
  42733. mov QWORD PTR [rsi+216], rax
  42734. adc r9, QWORD PTR [r13+224]
  42735. mov r10, QWORD PTR [rsi+232]
  42736. mov QWORD PTR [rsi+224], r9
  42737. adc r10, QWORD PTR [r13+232]
  42738. mov rax, QWORD PTR [rsi+240]
  42739. mov QWORD PTR [rsi+232], r10
  42740. adc rax, QWORD PTR [r13+240]
  42741. mov r9, QWORD PTR [rsi+248]
  42742. mov QWORD PTR [rsi+240], rax
  42743. adc r9, QWORD PTR [r13+248]
  42744. mov r10, QWORD PTR [rsi+256]
  42745. mov QWORD PTR [rsi+248], r9
  42746. adc r10, QWORD PTR [r13+256]
  42747. mov QWORD PTR [rsi+256], r10
  42748. ; Add to zero
  42749. mov rax, QWORD PTR [r13+264]
  42750. adc rax, 0
  42751. mov r9, QWORD PTR [r13+272]
  42752. mov QWORD PTR [rsi+264], rax
  42753. adc r9, 0
  42754. mov r10, QWORD PTR [r13+280]
  42755. mov QWORD PTR [rsi+272], r9
  42756. adc r10, 0
  42757. mov rax, QWORD PTR [r13+288]
  42758. mov QWORD PTR [rsi+280], r10
  42759. adc rax, 0
  42760. mov r9, QWORD PTR [r13+296]
  42761. mov QWORD PTR [rsi+288], rax
  42762. adc r9, 0
  42763. mov r10, QWORD PTR [r13+304]
  42764. mov QWORD PTR [rsi+296], r9
  42765. adc r10, 0
  42766. mov rax, QWORD PTR [r13+312]
  42767. mov QWORD PTR [rsi+304], r10
  42768. adc rax, 0
  42769. mov r9, QWORD PTR [r13+320]
  42770. mov QWORD PTR [rsi+312], rax
  42771. adc r9, 0
  42772. mov r10, QWORD PTR [r13+328]
  42773. mov QWORD PTR [rsi+320], r9
  42774. adc r10, 0
  42775. mov rax, QWORD PTR [r13+336]
  42776. mov QWORD PTR [rsi+328], r10
  42777. adc rax, 0
  42778. mov r9, QWORD PTR [r13+344]
  42779. mov QWORD PTR [rsi+336], rax
  42780. adc r9, 0
  42781. mov r10, QWORD PTR [r13+352]
  42782. mov QWORD PTR [rsi+344], r9
  42783. adc r10, 0
  42784. mov rax, QWORD PTR [r13+360]
  42785. mov QWORD PTR [rsi+352], r10
  42786. adc rax, 0
  42787. mov r9, QWORD PTR [r13+368]
  42788. mov QWORD PTR [rsi+360], rax
  42789. adc r9, 0
  42790. mov r10, QWORD PTR [r13+376]
  42791. mov QWORD PTR [rsi+368], r9
  42792. adc r10, 0
  42793. mov rax, QWORD PTR [r13+384]
  42794. mov QWORD PTR [rsi+376], r10
  42795. adc rax, 0
  42796. mov r9, QWORD PTR [r13+392]
  42797. mov QWORD PTR [rsi+384], rax
  42798. adc r9, 0
  42799. mov r10, QWORD PTR [r13+400]
  42800. mov QWORD PTR [rsi+392], r9
  42801. adc r10, 0
  42802. mov rax, QWORD PTR [r13+408]
  42803. mov QWORD PTR [rsi+400], r10
  42804. adc rax, 0
  42805. mov r9, QWORD PTR [r13+416]
  42806. mov QWORD PTR [rsi+408], rax
  42807. adc r9, 0
  42808. mov r10, QWORD PTR [r13+424]
  42809. mov QWORD PTR [rsi+416], r9
  42810. adc r10, 0
  42811. mov rax, QWORD PTR [r13+432]
  42812. mov QWORD PTR [rsi+424], r10
  42813. adc rax, 0
  42814. mov r9, QWORD PTR [r13+440]
  42815. mov QWORD PTR [rsi+432], rax
  42816. adc r9, 0
  42817. mov r10, QWORD PTR [r13+448]
  42818. mov QWORD PTR [rsi+440], r9
  42819. adc r10, 0
  42820. mov rax, QWORD PTR [r13+456]
  42821. mov QWORD PTR [rsi+448], r10
  42822. adc rax, 0
  42823. mov r9, QWORD PTR [r13+464]
  42824. mov QWORD PTR [rsi+456], rax
  42825. adc r9, 0
  42826. mov r10, QWORD PTR [r13+472]
  42827. mov QWORD PTR [rsi+464], r9
  42828. adc r10, 0
  42829. mov rax, QWORD PTR [r13+480]
  42830. mov QWORD PTR [rsi+472], r10
  42831. adc rax, 0
  42832. mov r9, QWORD PTR [r13+488]
  42833. mov QWORD PTR [rsi+480], rax
  42834. adc r9, 0
  42835. mov r10, QWORD PTR [r13+496]
  42836. mov QWORD PTR [rsi+488], r9
  42837. adc r10, 0
  42838. mov rax, QWORD PTR [r13+504]
  42839. mov QWORD PTR [rsi+496], r10
  42840. adc rax, 0
  42841. mov QWORD PTR [rsi+504], rax
  42842. add rsp, 1576
  42843. pop rsi
  42844. pop rdi
  42845. pop r15
  42846. pop r14
  42847. pop r13
  42848. pop r12
  42849. ret
  42850. sp_4096_mul_64 ENDP
  42851. _text ENDS
  42852. IFDEF HAVE_INTEL_AVX2
  42853. ; /* Multiply a and b into r. (r = a * b)
  42854. ; *
  42855. ; * r A single precision integer.
  42856. ; * a A single precision integer.
  42857. ; * b A single precision integer.
  42858. ; */
  42859. _text SEGMENT READONLY PARA
  42860. sp_4096_mul_avx2_64 PROC
  42861. push r12
  42862. push r13
  42863. push r14
  42864. push r15
  42865. push rdi
  42866. push rsi
  42867. sub rsp, 1576
  42868. mov QWORD PTR [rsp+1536], rcx
  42869. mov QWORD PTR [rsp+1544], rdx
  42870. mov QWORD PTR [rsp+1552], r8
  42871. lea r12, QWORD PTR [rsp+1024]
  42872. lea r14, QWORD PTR [rdx+256]
  42873. ; Add
  42874. mov rax, QWORD PTR [rdx]
  42875. xor r15, r15
  42876. add rax, QWORD PTR [r14]
  42877. mov r9, QWORD PTR [rdx+8]
  42878. mov QWORD PTR [r12], rax
  42879. adc r9, QWORD PTR [r14+8]
  42880. mov r10, QWORD PTR [rdx+16]
  42881. mov QWORD PTR [r12+8], r9
  42882. adc r10, QWORD PTR [r14+16]
  42883. mov rax, QWORD PTR [rdx+24]
  42884. mov QWORD PTR [r12+16], r10
  42885. adc rax, QWORD PTR [r14+24]
  42886. mov r9, QWORD PTR [rdx+32]
  42887. mov QWORD PTR [r12+24], rax
  42888. adc r9, QWORD PTR [r14+32]
  42889. mov r10, QWORD PTR [rdx+40]
  42890. mov QWORD PTR [r12+32], r9
  42891. adc r10, QWORD PTR [r14+40]
  42892. mov rax, QWORD PTR [rdx+48]
  42893. mov QWORD PTR [r12+40], r10
  42894. adc rax, QWORD PTR [r14+48]
  42895. mov r9, QWORD PTR [rdx+56]
  42896. mov QWORD PTR [r12+48], rax
  42897. adc r9, QWORD PTR [r14+56]
  42898. mov r10, QWORD PTR [rdx+64]
  42899. mov QWORD PTR [r12+56], r9
  42900. adc r10, QWORD PTR [r14+64]
  42901. mov rax, QWORD PTR [rdx+72]
  42902. mov QWORD PTR [r12+64], r10
  42903. adc rax, QWORD PTR [r14+72]
  42904. mov r9, QWORD PTR [rdx+80]
  42905. mov QWORD PTR [r12+72], rax
  42906. adc r9, QWORD PTR [r14+80]
  42907. mov r10, QWORD PTR [rdx+88]
  42908. mov QWORD PTR [r12+80], r9
  42909. adc r10, QWORD PTR [r14+88]
  42910. mov rax, QWORD PTR [rdx+96]
  42911. mov QWORD PTR [r12+88], r10
  42912. adc rax, QWORD PTR [r14+96]
  42913. mov r9, QWORD PTR [rdx+104]
  42914. mov QWORD PTR [r12+96], rax
  42915. adc r9, QWORD PTR [r14+104]
  42916. mov r10, QWORD PTR [rdx+112]
  42917. mov QWORD PTR [r12+104], r9
  42918. adc r10, QWORD PTR [r14+112]
  42919. mov rax, QWORD PTR [rdx+120]
  42920. mov QWORD PTR [r12+112], r10
  42921. adc rax, QWORD PTR [r14+120]
  42922. mov r9, QWORD PTR [rdx+128]
  42923. mov QWORD PTR [r12+120], rax
  42924. adc r9, QWORD PTR [r14+128]
  42925. mov r10, QWORD PTR [rdx+136]
  42926. mov QWORD PTR [r12+128], r9
  42927. adc r10, QWORD PTR [r14+136]
  42928. mov rax, QWORD PTR [rdx+144]
  42929. mov QWORD PTR [r12+136], r10
  42930. adc rax, QWORD PTR [r14+144]
  42931. mov r9, QWORD PTR [rdx+152]
  42932. mov QWORD PTR [r12+144], rax
  42933. adc r9, QWORD PTR [r14+152]
  42934. mov r10, QWORD PTR [rdx+160]
  42935. mov QWORD PTR [r12+152], r9
  42936. adc r10, QWORD PTR [r14+160]
  42937. mov rax, QWORD PTR [rdx+168]
  42938. mov QWORD PTR [r12+160], r10
  42939. adc rax, QWORD PTR [r14+168]
  42940. mov r9, QWORD PTR [rdx+176]
  42941. mov QWORD PTR [r12+168], rax
  42942. adc r9, QWORD PTR [r14+176]
  42943. mov r10, QWORD PTR [rdx+184]
  42944. mov QWORD PTR [r12+176], r9
  42945. adc r10, QWORD PTR [r14+184]
  42946. mov rax, QWORD PTR [rdx+192]
  42947. mov QWORD PTR [r12+184], r10
  42948. adc rax, QWORD PTR [r14+192]
  42949. mov r9, QWORD PTR [rdx+200]
  42950. mov QWORD PTR [r12+192], rax
  42951. adc r9, QWORD PTR [r14+200]
  42952. mov r10, QWORD PTR [rdx+208]
  42953. mov QWORD PTR [r12+200], r9
  42954. adc r10, QWORD PTR [r14+208]
  42955. mov rax, QWORD PTR [rdx+216]
  42956. mov QWORD PTR [r12+208], r10
  42957. adc rax, QWORD PTR [r14+216]
  42958. mov r9, QWORD PTR [rdx+224]
  42959. mov QWORD PTR [r12+216], rax
  42960. adc r9, QWORD PTR [r14+224]
  42961. mov r10, QWORD PTR [rdx+232]
  42962. mov QWORD PTR [r12+224], r9
  42963. adc r10, QWORD PTR [r14+232]
  42964. mov rax, QWORD PTR [rdx+240]
  42965. mov QWORD PTR [r12+232], r10
  42966. adc rax, QWORD PTR [r14+240]
  42967. mov r9, QWORD PTR [rdx+248]
  42968. mov QWORD PTR [r12+240], rax
  42969. adc r9, QWORD PTR [r14+248]
  42970. mov QWORD PTR [r12+248], r9
  42971. adc r15, 0
  42972. mov QWORD PTR [rsp+1560], r15
  42973. lea r13, QWORD PTR [rsp+1280]
  42974. lea r14, QWORD PTR [r8+256]
  42975. ; Add
  42976. mov rax, QWORD PTR [r8]
  42977. xor rdi, rdi
  42978. add rax, QWORD PTR [r14]
  42979. mov r9, QWORD PTR [r8+8]
  42980. mov QWORD PTR [r13], rax
  42981. adc r9, QWORD PTR [r14+8]
  42982. mov r10, QWORD PTR [r8+16]
  42983. mov QWORD PTR [r13+8], r9
  42984. adc r10, QWORD PTR [r14+16]
  42985. mov rax, QWORD PTR [r8+24]
  42986. mov QWORD PTR [r13+16], r10
  42987. adc rax, QWORD PTR [r14+24]
  42988. mov r9, QWORD PTR [r8+32]
  42989. mov QWORD PTR [r13+24], rax
  42990. adc r9, QWORD PTR [r14+32]
  42991. mov r10, QWORD PTR [r8+40]
  42992. mov QWORD PTR [r13+32], r9
  42993. adc r10, QWORD PTR [r14+40]
  42994. mov rax, QWORD PTR [r8+48]
  42995. mov QWORD PTR [r13+40], r10
  42996. adc rax, QWORD PTR [r14+48]
  42997. mov r9, QWORD PTR [r8+56]
  42998. mov QWORD PTR [r13+48], rax
  42999. adc r9, QWORD PTR [r14+56]
  43000. mov r10, QWORD PTR [r8+64]
  43001. mov QWORD PTR [r13+56], r9
  43002. adc r10, QWORD PTR [r14+64]
  43003. mov rax, QWORD PTR [r8+72]
  43004. mov QWORD PTR [r13+64], r10
  43005. adc rax, QWORD PTR [r14+72]
  43006. mov r9, QWORD PTR [r8+80]
  43007. mov QWORD PTR [r13+72], rax
  43008. adc r9, QWORD PTR [r14+80]
  43009. mov r10, QWORD PTR [r8+88]
  43010. mov QWORD PTR [r13+80], r9
  43011. adc r10, QWORD PTR [r14+88]
  43012. mov rax, QWORD PTR [r8+96]
  43013. mov QWORD PTR [r13+88], r10
  43014. adc rax, QWORD PTR [r14+96]
  43015. mov r9, QWORD PTR [r8+104]
  43016. mov QWORD PTR [r13+96], rax
  43017. adc r9, QWORD PTR [r14+104]
  43018. mov r10, QWORD PTR [r8+112]
  43019. mov QWORD PTR [r13+104], r9
  43020. adc r10, QWORD PTR [r14+112]
  43021. mov rax, QWORD PTR [r8+120]
  43022. mov QWORD PTR [r13+112], r10
  43023. adc rax, QWORD PTR [r14+120]
  43024. mov r9, QWORD PTR [r8+128]
  43025. mov QWORD PTR [r13+120], rax
  43026. adc r9, QWORD PTR [r14+128]
  43027. mov r10, QWORD PTR [r8+136]
  43028. mov QWORD PTR [r13+128], r9
  43029. adc r10, QWORD PTR [r14+136]
  43030. mov rax, QWORD PTR [r8+144]
  43031. mov QWORD PTR [r13+136], r10
  43032. adc rax, QWORD PTR [r14+144]
  43033. mov r9, QWORD PTR [r8+152]
  43034. mov QWORD PTR [r13+144], rax
  43035. adc r9, QWORD PTR [r14+152]
  43036. mov r10, QWORD PTR [r8+160]
  43037. mov QWORD PTR [r13+152], r9
  43038. adc r10, QWORD PTR [r14+160]
  43039. mov rax, QWORD PTR [r8+168]
  43040. mov QWORD PTR [r13+160], r10
  43041. adc rax, QWORD PTR [r14+168]
  43042. mov r9, QWORD PTR [r8+176]
  43043. mov QWORD PTR [r13+168], rax
  43044. adc r9, QWORD PTR [r14+176]
  43045. mov r10, QWORD PTR [r8+184]
  43046. mov QWORD PTR [r13+176], r9
  43047. adc r10, QWORD PTR [r14+184]
  43048. mov rax, QWORD PTR [r8+192]
  43049. mov QWORD PTR [r13+184], r10
  43050. adc rax, QWORD PTR [r14+192]
  43051. mov r9, QWORD PTR [r8+200]
  43052. mov QWORD PTR [r13+192], rax
  43053. adc r9, QWORD PTR [r14+200]
  43054. mov r10, QWORD PTR [r8+208]
  43055. mov QWORD PTR [r13+200], r9
  43056. adc r10, QWORD PTR [r14+208]
  43057. mov rax, QWORD PTR [r8+216]
  43058. mov QWORD PTR [r13+208], r10
  43059. adc rax, QWORD PTR [r14+216]
  43060. mov r9, QWORD PTR [r8+224]
  43061. mov QWORD PTR [r13+216], rax
  43062. adc r9, QWORD PTR [r14+224]
  43063. mov r10, QWORD PTR [r8+232]
  43064. mov QWORD PTR [r13+224], r9
  43065. adc r10, QWORD PTR [r14+232]
  43066. mov rax, QWORD PTR [r8+240]
  43067. mov QWORD PTR [r13+232], r10
  43068. adc rax, QWORD PTR [r14+240]
  43069. mov r9, QWORD PTR [r8+248]
  43070. mov QWORD PTR [r13+240], rax
  43071. adc r9, QWORD PTR [r14+248]
  43072. mov QWORD PTR [r13+248], r9
  43073. adc rdi, 0
  43074. mov QWORD PTR [rsp+1568], rdi
  43075. mov r8, r13
  43076. mov rdx, r12
  43077. mov rcx, rsp
  43078. call sp_2048_mul_avx2_32
  43079. mov r8, QWORD PTR [rsp+1552]
  43080. mov rdx, QWORD PTR [rsp+1544]
  43081. lea rcx, QWORD PTR [rsp+512]
  43082. add r8, 256
  43083. add rdx, 256
  43084. call sp_2048_mul_avx2_32
  43085. mov r8, QWORD PTR [rsp+1552]
  43086. mov rdx, QWORD PTR [rsp+1544]
  43087. mov rcx, QWORD PTR [rsp+1536]
  43088. call sp_2048_mul_avx2_32
  43089. IFDEF _WIN64
  43090. mov r8, QWORD PTR [rsp+1552]
  43091. mov rdx, QWORD PTR [rsp+1544]
  43092. mov rcx, QWORD PTR [rsp+1536]
  43093. ENDIF
  43094. mov r15, QWORD PTR [rsp+1560]
  43095. mov rdi, QWORD PTR [rsp+1568]
  43096. mov rsi, QWORD PTR [rsp+1536]
  43097. mov r11, r15
  43098. lea r12, QWORD PTR [rsp+1024]
  43099. lea r13, QWORD PTR [rsp+1280]
  43100. and r11, rdi
  43101. neg r15
  43102. neg rdi
  43103. add rsi, 512
  43104. mov rax, QWORD PTR [r12]
  43105. mov r9, QWORD PTR [r13]
  43106. pext rax, rax, rdi
  43107. pext r9, r9, r15
  43108. add rax, r9
  43109. mov r9, QWORD PTR [r12+8]
  43110. mov r10, QWORD PTR [r13+8]
  43111. pext r9, r9, rdi
  43112. pext r10, r10, r15
  43113. mov QWORD PTR [rsi], rax
  43114. adc r9, r10
  43115. mov r10, QWORD PTR [r12+16]
  43116. mov rax, QWORD PTR [r13+16]
  43117. pext r10, r10, rdi
  43118. pext rax, rax, r15
  43119. mov QWORD PTR [rsi+8], r9
  43120. adc r10, rax
  43121. mov rax, QWORD PTR [r12+24]
  43122. mov r9, QWORD PTR [r13+24]
  43123. pext rax, rax, rdi
  43124. pext r9, r9, r15
  43125. mov QWORD PTR [rsi+16], r10
  43126. adc rax, r9
  43127. mov r9, QWORD PTR [r12+32]
  43128. mov r10, QWORD PTR [r13+32]
  43129. pext r9, r9, rdi
  43130. pext r10, r10, r15
  43131. mov QWORD PTR [rsi+24], rax
  43132. adc r9, r10
  43133. mov r10, QWORD PTR [r12+40]
  43134. mov rax, QWORD PTR [r13+40]
  43135. pext r10, r10, rdi
  43136. pext rax, rax, r15
  43137. mov QWORD PTR [rsi+32], r9
  43138. adc r10, rax
  43139. mov rax, QWORD PTR [r12+48]
  43140. mov r9, QWORD PTR [r13+48]
  43141. pext rax, rax, rdi
  43142. pext r9, r9, r15
  43143. mov QWORD PTR [rsi+40], r10
  43144. adc rax, r9
  43145. mov r9, QWORD PTR [r12+56]
  43146. mov r10, QWORD PTR [r13+56]
  43147. pext r9, r9, rdi
  43148. pext r10, r10, r15
  43149. mov QWORD PTR [rsi+48], rax
  43150. adc r9, r10
  43151. mov r10, QWORD PTR [r12+64]
  43152. mov rax, QWORD PTR [r13+64]
  43153. pext r10, r10, rdi
  43154. pext rax, rax, r15
  43155. mov QWORD PTR [rsi+56], r9
  43156. adc r10, rax
  43157. mov rax, QWORD PTR [r12+72]
  43158. mov r9, QWORD PTR [r13+72]
  43159. pext rax, rax, rdi
  43160. pext r9, r9, r15
  43161. mov QWORD PTR [rsi+64], r10
  43162. adc rax, r9
  43163. mov r9, QWORD PTR [r12+80]
  43164. mov r10, QWORD PTR [r13+80]
  43165. pext r9, r9, rdi
  43166. pext r10, r10, r15
  43167. mov QWORD PTR [rsi+72], rax
  43168. adc r9, r10
  43169. mov r10, QWORD PTR [r12+88]
  43170. mov rax, QWORD PTR [r13+88]
  43171. pext r10, r10, rdi
  43172. pext rax, rax, r15
  43173. mov QWORD PTR [rsi+80], r9
  43174. adc r10, rax
  43175. mov rax, QWORD PTR [r12+96]
  43176. mov r9, QWORD PTR [r13+96]
  43177. pext rax, rax, rdi
  43178. pext r9, r9, r15
  43179. mov QWORD PTR [rsi+88], r10
  43180. adc rax, r9
  43181. mov r9, QWORD PTR [r12+104]
  43182. mov r10, QWORD PTR [r13+104]
  43183. pext r9, r9, rdi
  43184. pext r10, r10, r15
  43185. mov QWORD PTR [rsi+96], rax
  43186. adc r9, r10
  43187. mov r10, QWORD PTR [r12+112]
  43188. mov rax, QWORD PTR [r13+112]
  43189. pext r10, r10, rdi
  43190. pext rax, rax, r15
  43191. mov QWORD PTR [rsi+104], r9
  43192. adc r10, rax
  43193. mov rax, QWORD PTR [r12+120]
  43194. mov r9, QWORD PTR [r13+120]
  43195. pext rax, rax, rdi
  43196. pext r9, r9, r15
  43197. mov QWORD PTR [rsi+112], r10
  43198. adc rax, r9
  43199. mov r9, QWORD PTR [r12+128]
  43200. mov r10, QWORD PTR [r13+128]
  43201. pext r9, r9, rdi
  43202. pext r10, r10, r15
  43203. mov QWORD PTR [rsi+120], rax
  43204. adc r9, r10
  43205. mov r10, QWORD PTR [r12+136]
  43206. mov rax, QWORD PTR [r13+136]
  43207. pext r10, r10, rdi
  43208. pext rax, rax, r15
  43209. mov QWORD PTR [rsi+128], r9
  43210. adc r10, rax
  43211. mov rax, QWORD PTR [r12+144]
  43212. mov r9, QWORD PTR [r13+144]
  43213. pext rax, rax, rdi
  43214. pext r9, r9, r15
  43215. mov QWORD PTR [rsi+136], r10
  43216. adc rax, r9
  43217. mov r9, QWORD PTR [r12+152]
  43218. mov r10, QWORD PTR [r13+152]
  43219. pext r9, r9, rdi
  43220. pext r10, r10, r15
  43221. mov QWORD PTR [rsi+144], rax
  43222. adc r9, r10
  43223. mov r10, QWORD PTR [r12+160]
  43224. mov rax, QWORD PTR [r13+160]
  43225. pext r10, r10, rdi
  43226. pext rax, rax, r15
  43227. mov QWORD PTR [rsi+152], r9
  43228. adc r10, rax
  43229. mov rax, QWORD PTR [r12+168]
  43230. mov r9, QWORD PTR [r13+168]
  43231. pext rax, rax, rdi
  43232. pext r9, r9, r15
  43233. mov QWORD PTR [rsi+160], r10
  43234. adc rax, r9
  43235. mov r9, QWORD PTR [r12+176]
  43236. mov r10, QWORD PTR [r13+176]
  43237. pext r9, r9, rdi
  43238. pext r10, r10, r15
  43239. mov QWORD PTR [rsi+168], rax
  43240. adc r9, r10
  43241. mov r10, QWORD PTR [r12+184]
  43242. mov rax, QWORD PTR [r13+184]
  43243. pext r10, r10, rdi
  43244. pext rax, rax, r15
  43245. mov QWORD PTR [rsi+176], r9
  43246. adc r10, rax
  43247. mov rax, QWORD PTR [r12+192]
  43248. mov r9, QWORD PTR [r13+192]
  43249. pext rax, rax, rdi
  43250. pext r9, r9, r15
  43251. mov QWORD PTR [rsi+184], r10
  43252. adc rax, r9
  43253. mov r9, QWORD PTR [r12+200]
  43254. mov r10, QWORD PTR [r13+200]
  43255. pext r9, r9, rdi
  43256. pext r10, r10, r15
  43257. mov QWORD PTR [rsi+192], rax
  43258. adc r9, r10
  43259. mov r10, QWORD PTR [r12+208]
  43260. mov rax, QWORD PTR [r13+208]
  43261. pext r10, r10, rdi
  43262. pext rax, rax, r15
  43263. mov QWORD PTR [rsi+200], r9
  43264. adc r10, rax
  43265. mov rax, QWORD PTR [r12+216]
  43266. mov r9, QWORD PTR [r13+216]
  43267. pext rax, rax, rdi
  43268. pext r9, r9, r15
  43269. mov QWORD PTR [rsi+208], r10
  43270. adc rax, r9
  43271. mov r9, QWORD PTR [r12+224]
  43272. mov r10, QWORD PTR [r13+224]
  43273. pext r9, r9, rdi
  43274. pext r10, r10, r15
  43275. mov QWORD PTR [rsi+216], rax
  43276. adc r9, r10
  43277. mov r10, QWORD PTR [r12+232]
  43278. mov rax, QWORD PTR [r13+232]
  43279. pext r10, r10, rdi
  43280. pext rax, rax, r15
  43281. mov QWORD PTR [rsi+224], r9
  43282. adc r10, rax
  43283. mov rax, QWORD PTR [r12+240]
  43284. mov r9, QWORD PTR [r13+240]
  43285. pext rax, rax, rdi
  43286. pext r9, r9, r15
  43287. mov QWORD PTR [rsi+232], r10
  43288. adc rax, r9
  43289. mov r9, QWORD PTR [r12+248]
  43290. mov r10, QWORD PTR [r13+248]
  43291. pext r9, r9, rdi
  43292. pext r10, r10, r15
  43293. mov QWORD PTR [rsi+240], rax
  43294. adc r9, r10
  43295. mov QWORD PTR [rsi+248], r9
  43296. adc r11, 0
  43297. lea r13, QWORD PTR [rsp+512]
  43298. mov r12, rsp
  43299. mov rax, QWORD PTR [r12]
  43300. sub rax, QWORD PTR [r13]
  43301. mov r9, QWORD PTR [r12+8]
  43302. mov QWORD PTR [r12], rax
  43303. sbb r9, QWORD PTR [r13+8]
  43304. mov r10, QWORD PTR [r12+16]
  43305. mov QWORD PTR [r12+8], r9
  43306. sbb r10, QWORD PTR [r13+16]
  43307. mov rax, QWORD PTR [r12+24]
  43308. mov QWORD PTR [r12+16], r10
  43309. sbb rax, QWORD PTR [r13+24]
  43310. mov r9, QWORD PTR [r12+32]
  43311. mov QWORD PTR [r12+24], rax
  43312. sbb r9, QWORD PTR [r13+32]
  43313. mov r10, QWORD PTR [r12+40]
  43314. mov QWORD PTR [r12+32], r9
  43315. sbb r10, QWORD PTR [r13+40]
  43316. mov rax, QWORD PTR [r12+48]
  43317. mov QWORD PTR [r12+40], r10
  43318. sbb rax, QWORD PTR [r13+48]
  43319. mov r9, QWORD PTR [r12+56]
  43320. mov QWORD PTR [r12+48], rax
  43321. sbb r9, QWORD PTR [r13+56]
  43322. mov r10, QWORD PTR [r12+64]
  43323. mov QWORD PTR [r12+56], r9
  43324. sbb r10, QWORD PTR [r13+64]
  43325. mov rax, QWORD PTR [r12+72]
  43326. mov QWORD PTR [r12+64], r10
  43327. sbb rax, QWORD PTR [r13+72]
  43328. mov r9, QWORD PTR [r12+80]
  43329. mov QWORD PTR [r12+72], rax
  43330. sbb r9, QWORD PTR [r13+80]
  43331. mov r10, QWORD PTR [r12+88]
  43332. mov QWORD PTR [r12+80], r9
  43333. sbb r10, QWORD PTR [r13+88]
  43334. mov rax, QWORD PTR [r12+96]
  43335. mov QWORD PTR [r12+88], r10
  43336. sbb rax, QWORD PTR [r13+96]
  43337. mov r9, QWORD PTR [r12+104]
  43338. mov QWORD PTR [r12+96], rax
  43339. sbb r9, QWORD PTR [r13+104]
  43340. mov r10, QWORD PTR [r12+112]
  43341. mov QWORD PTR [r12+104], r9
  43342. sbb r10, QWORD PTR [r13+112]
  43343. mov rax, QWORD PTR [r12+120]
  43344. mov QWORD PTR [r12+112], r10
  43345. sbb rax, QWORD PTR [r13+120]
  43346. mov r9, QWORD PTR [r12+128]
  43347. mov QWORD PTR [r12+120], rax
  43348. sbb r9, QWORD PTR [r13+128]
  43349. mov r10, QWORD PTR [r12+136]
  43350. mov QWORD PTR [r12+128], r9
  43351. sbb r10, QWORD PTR [r13+136]
  43352. mov rax, QWORD PTR [r12+144]
  43353. mov QWORD PTR [r12+136], r10
  43354. sbb rax, QWORD PTR [r13+144]
  43355. mov r9, QWORD PTR [r12+152]
  43356. mov QWORD PTR [r12+144], rax
  43357. sbb r9, QWORD PTR [r13+152]
  43358. mov r10, QWORD PTR [r12+160]
  43359. mov QWORD PTR [r12+152], r9
  43360. sbb r10, QWORD PTR [r13+160]
  43361. mov rax, QWORD PTR [r12+168]
  43362. mov QWORD PTR [r12+160], r10
  43363. sbb rax, QWORD PTR [r13+168]
  43364. mov r9, QWORD PTR [r12+176]
  43365. mov QWORD PTR [r12+168], rax
  43366. sbb r9, QWORD PTR [r13+176]
  43367. mov r10, QWORD PTR [r12+184]
  43368. mov QWORD PTR [r12+176], r9
  43369. sbb r10, QWORD PTR [r13+184]
  43370. mov rax, QWORD PTR [r12+192]
  43371. mov QWORD PTR [r12+184], r10
  43372. sbb rax, QWORD PTR [r13+192]
  43373. mov r9, QWORD PTR [r12+200]
  43374. mov QWORD PTR [r12+192], rax
  43375. sbb r9, QWORD PTR [r13+200]
  43376. mov r10, QWORD PTR [r12+208]
  43377. mov QWORD PTR [r12+200], r9
  43378. sbb r10, QWORD PTR [r13+208]
  43379. mov rax, QWORD PTR [r12+216]
  43380. mov QWORD PTR [r12+208], r10
  43381. sbb rax, QWORD PTR [r13+216]
  43382. mov r9, QWORD PTR [r12+224]
  43383. mov QWORD PTR [r12+216], rax
  43384. sbb r9, QWORD PTR [r13+224]
  43385. mov r10, QWORD PTR [r12+232]
  43386. mov QWORD PTR [r12+224], r9
  43387. sbb r10, QWORD PTR [r13+232]
  43388. mov rax, QWORD PTR [r12+240]
  43389. mov QWORD PTR [r12+232], r10
  43390. sbb rax, QWORD PTR [r13+240]
  43391. mov r9, QWORD PTR [r12+248]
  43392. mov QWORD PTR [r12+240], rax
  43393. sbb r9, QWORD PTR [r13+248]
  43394. mov r10, QWORD PTR [r12+256]
  43395. mov QWORD PTR [r12+248], r9
  43396. sbb r10, QWORD PTR [r13+256]
  43397. mov rax, QWORD PTR [r12+264]
  43398. mov QWORD PTR [r12+256], r10
  43399. sbb rax, QWORD PTR [r13+264]
  43400. mov r9, QWORD PTR [r12+272]
  43401. mov QWORD PTR [r12+264], rax
  43402. sbb r9, QWORD PTR [r13+272]
  43403. mov r10, QWORD PTR [r12+280]
  43404. mov QWORD PTR [r12+272], r9
  43405. sbb r10, QWORD PTR [r13+280]
  43406. mov rax, QWORD PTR [r12+288]
  43407. mov QWORD PTR [r12+280], r10
  43408. sbb rax, QWORD PTR [r13+288]
  43409. mov r9, QWORD PTR [r12+296]
  43410. mov QWORD PTR [r12+288], rax
  43411. sbb r9, QWORD PTR [r13+296]
  43412. mov r10, QWORD PTR [r12+304]
  43413. mov QWORD PTR [r12+296], r9
  43414. sbb r10, QWORD PTR [r13+304]
  43415. mov rax, QWORD PTR [r12+312]
  43416. mov QWORD PTR [r12+304], r10
  43417. sbb rax, QWORD PTR [r13+312]
  43418. mov r9, QWORD PTR [r12+320]
  43419. mov QWORD PTR [r12+312], rax
  43420. sbb r9, QWORD PTR [r13+320]
  43421. mov r10, QWORD PTR [r12+328]
  43422. mov QWORD PTR [r12+320], r9
  43423. sbb r10, QWORD PTR [r13+328]
  43424. mov rax, QWORD PTR [r12+336]
  43425. mov QWORD PTR [r12+328], r10
  43426. sbb rax, QWORD PTR [r13+336]
  43427. mov r9, QWORD PTR [r12+344]
  43428. mov QWORD PTR [r12+336], rax
  43429. sbb r9, QWORD PTR [r13+344]
  43430. mov r10, QWORD PTR [r12+352]
  43431. mov QWORD PTR [r12+344], r9
  43432. sbb r10, QWORD PTR [r13+352]
  43433. mov rax, QWORD PTR [r12+360]
  43434. mov QWORD PTR [r12+352], r10
  43435. sbb rax, QWORD PTR [r13+360]
  43436. mov r9, QWORD PTR [r12+368]
  43437. mov QWORD PTR [r12+360], rax
  43438. sbb r9, QWORD PTR [r13+368]
  43439. mov r10, QWORD PTR [r12+376]
  43440. mov QWORD PTR [r12+368], r9
  43441. sbb r10, QWORD PTR [r13+376]
  43442. mov rax, QWORD PTR [r12+384]
  43443. mov QWORD PTR [r12+376], r10
  43444. sbb rax, QWORD PTR [r13+384]
  43445. mov r9, QWORD PTR [r12+392]
  43446. mov QWORD PTR [r12+384], rax
  43447. sbb r9, QWORD PTR [r13+392]
  43448. mov r10, QWORD PTR [r12+400]
  43449. mov QWORD PTR [r12+392], r9
  43450. sbb r10, QWORD PTR [r13+400]
  43451. mov rax, QWORD PTR [r12+408]
  43452. mov QWORD PTR [r12+400], r10
  43453. sbb rax, QWORD PTR [r13+408]
  43454. mov r9, QWORD PTR [r12+416]
  43455. mov QWORD PTR [r12+408], rax
  43456. sbb r9, QWORD PTR [r13+416]
  43457. mov r10, QWORD PTR [r12+424]
  43458. mov QWORD PTR [r12+416], r9
  43459. sbb r10, QWORD PTR [r13+424]
  43460. mov rax, QWORD PTR [r12+432]
  43461. mov QWORD PTR [r12+424], r10
  43462. sbb rax, QWORD PTR [r13+432]
  43463. mov r9, QWORD PTR [r12+440]
  43464. mov QWORD PTR [r12+432], rax
  43465. sbb r9, QWORD PTR [r13+440]
  43466. mov r10, QWORD PTR [r12+448]
  43467. mov QWORD PTR [r12+440], r9
  43468. sbb r10, QWORD PTR [r13+448]
  43469. mov rax, QWORD PTR [r12+456]
  43470. mov QWORD PTR [r12+448], r10
  43471. sbb rax, QWORD PTR [r13+456]
  43472. mov r9, QWORD PTR [r12+464]
  43473. mov QWORD PTR [r12+456], rax
  43474. sbb r9, QWORD PTR [r13+464]
  43475. mov r10, QWORD PTR [r12+472]
  43476. mov QWORD PTR [r12+464], r9
  43477. sbb r10, QWORD PTR [r13+472]
  43478. mov rax, QWORD PTR [r12+480]
  43479. mov QWORD PTR [r12+472], r10
  43480. sbb rax, QWORD PTR [r13+480]
  43481. mov r9, QWORD PTR [r12+488]
  43482. mov QWORD PTR [r12+480], rax
  43483. sbb r9, QWORD PTR [r13+488]
  43484. mov r10, QWORD PTR [r12+496]
  43485. mov QWORD PTR [r12+488], r9
  43486. sbb r10, QWORD PTR [r13+496]
  43487. mov rax, QWORD PTR [r12+504]
  43488. mov QWORD PTR [r12+496], r10
  43489. sbb rax, QWORD PTR [r13+504]
  43490. mov QWORD PTR [r12+504], rax
  43491. sbb r11, 0
  43492. mov rax, QWORD PTR [r12]
  43493. sub rax, QWORD PTR [rcx]
  43494. mov r9, QWORD PTR [r12+8]
  43495. mov QWORD PTR [r12], rax
  43496. sbb r9, QWORD PTR [rcx+8]
  43497. mov r10, QWORD PTR [r12+16]
  43498. mov QWORD PTR [r12+8], r9
  43499. sbb r10, QWORD PTR [rcx+16]
  43500. mov rax, QWORD PTR [r12+24]
  43501. mov QWORD PTR [r12+16], r10
  43502. sbb rax, QWORD PTR [rcx+24]
  43503. mov r9, QWORD PTR [r12+32]
  43504. mov QWORD PTR [r12+24], rax
  43505. sbb r9, QWORD PTR [rcx+32]
  43506. mov r10, QWORD PTR [r12+40]
  43507. mov QWORD PTR [r12+32], r9
  43508. sbb r10, QWORD PTR [rcx+40]
  43509. mov rax, QWORD PTR [r12+48]
  43510. mov QWORD PTR [r12+40], r10
  43511. sbb rax, QWORD PTR [rcx+48]
  43512. mov r9, QWORD PTR [r12+56]
  43513. mov QWORD PTR [r12+48], rax
  43514. sbb r9, QWORD PTR [rcx+56]
  43515. mov r10, QWORD PTR [r12+64]
  43516. mov QWORD PTR [r12+56], r9
  43517. sbb r10, QWORD PTR [rcx+64]
  43518. mov rax, QWORD PTR [r12+72]
  43519. mov QWORD PTR [r12+64], r10
  43520. sbb rax, QWORD PTR [rcx+72]
  43521. mov r9, QWORD PTR [r12+80]
  43522. mov QWORD PTR [r12+72], rax
  43523. sbb r9, QWORD PTR [rcx+80]
  43524. mov r10, QWORD PTR [r12+88]
  43525. mov QWORD PTR [r12+80], r9
  43526. sbb r10, QWORD PTR [rcx+88]
  43527. mov rax, QWORD PTR [r12+96]
  43528. mov QWORD PTR [r12+88], r10
  43529. sbb rax, QWORD PTR [rcx+96]
  43530. mov r9, QWORD PTR [r12+104]
  43531. mov QWORD PTR [r12+96], rax
  43532. sbb r9, QWORD PTR [rcx+104]
  43533. mov r10, QWORD PTR [r12+112]
  43534. mov QWORD PTR [r12+104], r9
  43535. sbb r10, QWORD PTR [rcx+112]
  43536. mov rax, QWORD PTR [r12+120]
  43537. mov QWORD PTR [r12+112], r10
  43538. sbb rax, QWORD PTR [rcx+120]
  43539. mov r9, QWORD PTR [r12+128]
  43540. mov QWORD PTR [r12+120], rax
  43541. sbb r9, QWORD PTR [rcx+128]
  43542. mov r10, QWORD PTR [r12+136]
  43543. mov QWORD PTR [r12+128], r9
  43544. sbb r10, QWORD PTR [rcx+136]
  43545. mov rax, QWORD PTR [r12+144]
  43546. mov QWORD PTR [r12+136], r10
  43547. sbb rax, QWORD PTR [rcx+144]
  43548. mov r9, QWORD PTR [r12+152]
  43549. mov QWORD PTR [r12+144], rax
  43550. sbb r9, QWORD PTR [rcx+152]
  43551. mov r10, QWORD PTR [r12+160]
  43552. mov QWORD PTR [r12+152], r9
  43553. sbb r10, QWORD PTR [rcx+160]
  43554. mov rax, QWORD PTR [r12+168]
  43555. mov QWORD PTR [r12+160], r10
  43556. sbb rax, QWORD PTR [rcx+168]
  43557. mov r9, QWORD PTR [r12+176]
  43558. mov QWORD PTR [r12+168], rax
  43559. sbb r9, QWORD PTR [rcx+176]
  43560. mov r10, QWORD PTR [r12+184]
  43561. mov QWORD PTR [r12+176], r9
  43562. sbb r10, QWORD PTR [rcx+184]
  43563. mov rax, QWORD PTR [r12+192]
  43564. mov QWORD PTR [r12+184], r10
  43565. sbb rax, QWORD PTR [rcx+192]
  43566. mov r9, QWORD PTR [r12+200]
  43567. mov QWORD PTR [r12+192], rax
  43568. sbb r9, QWORD PTR [rcx+200]
  43569. mov r10, QWORD PTR [r12+208]
  43570. mov QWORD PTR [r12+200], r9
  43571. sbb r10, QWORD PTR [rcx+208]
  43572. mov rax, QWORD PTR [r12+216]
  43573. mov QWORD PTR [r12+208], r10
  43574. sbb rax, QWORD PTR [rcx+216]
  43575. mov r9, QWORD PTR [r12+224]
  43576. mov QWORD PTR [r12+216], rax
  43577. sbb r9, QWORD PTR [rcx+224]
  43578. mov r10, QWORD PTR [r12+232]
  43579. mov QWORD PTR [r12+224], r9
  43580. sbb r10, QWORD PTR [rcx+232]
  43581. mov rax, QWORD PTR [r12+240]
  43582. mov QWORD PTR [r12+232], r10
  43583. sbb rax, QWORD PTR [rcx+240]
  43584. mov r9, QWORD PTR [r12+248]
  43585. mov QWORD PTR [r12+240], rax
  43586. sbb r9, QWORD PTR [rcx+248]
  43587. mov r10, QWORD PTR [r12+256]
  43588. mov QWORD PTR [r12+248], r9
  43589. sbb r10, QWORD PTR [rcx+256]
  43590. mov rax, QWORD PTR [r12+264]
  43591. mov QWORD PTR [r12+256], r10
  43592. sbb rax, QWORD PTR [rcx+264]
  43593. mov r9, QWORD PTR [r12+272]
  43594. mov QWORD PTR [r12+264], rax
  43595. sbb r9, QWORD PTR [rcx+272]
  43596. mov r10, QWORD PTR [r12+280]
  43597. mov QWORD PTR [r12+272], r9
  43598. sbb r10, QWORD PTR [rcx+280]
  43599. mov rax, QWORD PTR [r12+288]
  43600. mov QWORD PTR [r12+280], r10
  43601. sbb rax, QWORD PTR [rcx+288]
  43602. mov r9, QWORD PTR [r12+296]
  43603. mov QWORD PTR [r12+288], rax
  43604. sbb r9, QWORD PTR [rcx+296]
  43605. mov r10, QWORD PTR [r12+304]
  43606. mov QWORD PTR [r12+296], r9
  43607. sbb r10, QWORD PTR [rcx+304]
  43608. mov rax, QWORD PTR [r12+312]
  43609. mov QWORD PTR [r12+304], r10
  43610. sbb rax, QWORD PTR [rcx+312]
  43611. mov r9, QWORD PTR [r12+320]
  43612. mov QWORD PTR [r12+312], rax
  43613. sbb r9, QWORD PTR [rcx+320]
  43614. mov r10, QWORD PTR [r12+328]
  43615. mov QWORD PTR [r12+320], r9
  43616. sbb r10, QWORD PTR [rcx+328]
  43617. mov rax, QWORD PTR [r12+336]
  43618. mov QWORD PTR [r12+328], r10
  43619. sbb rax, QWORD PTR [rcx+336]
  43620. mov r9, QWORD PTR [r12+344]
  43621. mov QWORD PTR [r12+336], rax
  43622. sbb r9, QWORD PTR [rcx+344]
  43623. mov r10, QWORD PTR [r12+352]
  43624. mov QWORD PTR [r12+344], r9
  43625. sbb r10, QWORD PTR [rcx+352]
  43626. mov rax, QWORD PTR [r12+360]
  43627. mov QWORD PTR [r12+352], r10
  43628. sbb rax, QWORD PTR [rcx+360]
  43629. mov r9, QWORD PTR [r12+368]
  43630. mov QWORD PTR [r12+360], rax
  43631. sbb r9, QWORD PTR [rcx+368]
  43632. mov r10, QWORD PTR [r12+376]
  43633. mov QWORD PTR [r12+368], r9
  43634. sbb r10, QWORD PTR [rcx+376]
  43635. mov rax, QWORD PTR [r12+384]
  43636. mov QWORD PTR [r12+376], r10
  43637. sbb rax, QWORD PTR [rcx+384]
  43638. mov r9, QWORD PTR [r12+392]
  43639. mov QWORD PTR [r12+384], rax
  43640. sbb r9, QWORD PTR [rcx+392]
  43641. mov r10, QWORD PTR [r12+400]
  43642. mov QWORD PTR [r12+392], r9
  43643. sbb r10, QWORD PTR [rcx+400]
  43644. mov rax, QWORD PTR [r12+408]
  43645. mov QWORD PTR [r12+400], r10
  43646. sbb rax, QWORD PTR [rcx+408]
  43647. mov r9, QWORD PTR [r12+416]
  43648. mov QWORD PTR [r12+408], rax
  43649. sbb r9, QWORD PTR [rcx+416]
  43650. mov r10, QWORD PTR [r12+424]
  43651. mov QWORD PTR [r12+416], r9
  43652. sbb r10, QWORD PTR [rcx+424]
  43653. mov rax, QWORD PTR [r12+432]
  43654. mov QWORD PTR [r12+424], r10
  43655. sbb rax, QWORD PTR [rcx+432]
  43656. mov r9, QWORD PTR [r12+440]
  43657. mov QWORD PTR [r12+432], rax
  43658. sbb r9, QWORD PTR [rcx+440]
  43659. mov r10, QWORD PTR [r12+448]
  43660. mov QWORD PTR [r12+440], r9
  43661. sbb r10, QWORD PTR [rcx+448]
  43662. mov rax, QWORD PTR [r12+456]
  43663. mov QWORD PTR [r12+448], r10
  43664. sbb rax, QWORD PTR [rcx+456]
  43665. mov r9, QWORD PTR [r12+464]
  43666. mov QWORD PTR [r12+456], rax
  43667. sbb r9, QWORD PTR [rcx+464]
  43668. mov r10, QWORD PTR [r12+472]
  43669. mov QWORD PTR [r12+464], r9
  43670. sbb r10, QWORD PTR [rcx+472]
  43671. mov rax, QWORD PTR [r12+480]
  43672. mov QWORD PTR [r12+472], r10
  43673. sbb rax, QWORD PTR [rcx+480]
  43674. mov r9, QWORD PTR [r12+488]
  43675. mov QWORD PTR [r12+480], rax
  43676. sbb r9, QWORD PTR [rcx+488]
  43677. mov r10, QWORD PTR [r12+496]
  43678. mov QWORD PTR [r12+488], r9
  43679. sbb r10, QWORD PTR [rcx+496]
  43680. mov rax, QWORD PTR [r12+504]
  43681. mov QWORD PTR [r12+496], r10
  43682. sbb rax, QWORD PTR [rcx+504]
  43683. mov QWORD PTR [r12+504], rax
  43684. sbb r11, 0
  43685. sub rsi, 256
  43686. ; Add
  43687. mov rax, QWORD PTR [rsi]
  43688. add rax, QWORD PTR [r12]
  43689. mov r9, QWORD PTR [rsi+8]
  43690. mov QWORD PTR [rsi], rax
  43691. adc r9, QWORD PTR [r12+8]
  43692. mov r10, QWORD PTR [rsi+16]
  43693. mov QWORD PTR [rsi+8], r9
  43694. adc r10, QWORD PTR [r12+16]
  43695. mov rax, QWORD PTR [rsi+24]
  43696. mov QWORD PTR [rsi+16], r10
  43697. adc rax, QWORD PTR [r12+24]
  43698. mov r9, QWORD PTR [rsi+32]
  43699. mov QWORD PTR [rsi+24], rax
  43700. adc r9, QWORD PTR [r12+32]
  43701. mov r10, QWORD PTR [rsi+40]
  43702. mov QWORD PTR [rsi+32], r9
  43703. adc r10, QWORD PTR [r12+40]
  43704. mov rax, QWORD PTR [rsi+48]
  43705. mov QWORD PTR [rsi+40], r10
  43706. adc rax, QWORD PTR [r12+48]
  43707. mov r9, QWORD PTR [rsi+56]
  43708. mov QWORD PTR [rsi+48], rax
  43709. adc r9, QWORD PTR [r12+56]
  43710. mov r10, QWORD PTR [rsi+64]
  43711. mov QWORD PTR [rsi+56], r9
  43712. adc r10, QWORD PTR [r12+64]
  43713. mov rax, QWORD PTR [rsi+72]
  43714. mov QWORD PTR [rsi+64], r10
  43715. adc rax, QWORD PTR [r12+72]
  43716. mov r9, QWORD PTR [rsi+80]
  43717. mov QWORD PTR [rsi+72], rax
  43718. adc r9, QWORD PTR [r12+80]
  43719. mov r10, QWORD PTR [rsi+88]
  43720. mov QWORD PTR [rsi+80], r9
  43721. adc r10, QWORD PTR [r12+88]
  43722. mov rax, QWORD PTR [rsi+96]
  43723. mov QWORD PTR [rsi+88], r10
  43724. adc rax, QWORD PTR [r12+96]
  43725. mov r9, QWORD PTR [rsi+104]
  43726. mov QWORD PTR [rsi+96], rax
  43727. adc r9, QWORD PTR [r12+104]
  43728. mov r10, QWORD PTR [rsi+112]
  43729. mov QWORD PTR [rsi+104], r9
  43730. adc r10, QWORD PTR [r12+112]
  43731. mov rax, QWORD PTR [rsi+120]
  43732. mov QWORD PTR [rsi+112], r10
  43733. adc rax, QWORD PTR [r12+120]
  43734. mov r9, QWORD PTR [rsi+128]
  43735. mov QWORD PTR [rsi+120], rax
  43736. adc r9, QWORD PTR [r12+128]
  43737. mov r10, QWORD PTR [rsi+136]
  43738. mov QWORD PTR [rsi+128], r9
  43739. adc r10, QWORD PTR [r12+136]
  43740. mov rax, QWORD PTR [rsi+144]
  43741. mov QWORD PTR [rsi+136], r10
  43742. adc rax, QWORD PTR [r12+144]
  43743. mov r9, QWORD PTR [rsi+152]
  43744. mov QWORD PTR [rsi+144], rax
  43745. adc r9, QWORD PTR [r12+152]
  43746. mov r10, QWORD PTR [rsi+160]
  43747. mov QWORD PTR [rsi+152], r9
  43748. adc r10, QWORD PTR [r12+160]
  43749. mov rax, QWORD PTR [rsi+168]
  43750. mov QWORD PTR [rsi+160], r10
  43751. adc rax, QWORD PTR [r12+168]
  43752. mov r9, QWORD PTR [rsi+176]
  43753. mov QWORD PTR [rsi+168], rax
  43754. adc r9, QWORD PTR [r12+176]
  43755. mov r10, QWORD PTR [rsi+184]
  43756. mov QWORD PTR [rsi+176], r9
  43757. adc r10, QWORD PTR [r12+184]
  43758. mov rax, QWORD PTR [rsi+192]
  43759. mov QWORD PTR [rsi+184], r10
  43760. adc rax, QWORD PTR [r12+192]
  43761. mov r9, QWORD PTR [rsi+200]
  43762. mov QWORD PTR [rsi+192], rax
  43763. adc r9, QWORD PTR [r12+200]
  43764. mov r10, QWORD PTR [rsi+208]
  43765. mov QWORD PTR [rsi+200], r9
  43766. adc r10, QWORD PTR [r12+208]
  43767. mov rax, QWORD PTR [rsi+216]
  43768. mov QWORD PTR [rsi+208], r10
  43769. adc rax, QWORD PTR [r12+216]
  43770. mov r9, QWORD PTR [rsi+224]
  43771. mov QWORD PTR [rsi+216], rax
  43772. adc r9, QWORD PTR [r12+224]
  43773. mov r10, QWORD PTR [rsi+232]
  43774. mov QWORD PTR [rsi+224], r9
  43775. adc r10, QWORD PTR [r12+232]
  43776. mov rax, QWORD PTR [rsi+240]
  43777. mov QWORD PTR [rsi+232], r10
  43778. adc rax, QWORD PTR [r12+240]
  43779. mov r9, QWORD PTR [rsi+248]
  43780. mov QWORD PTR [rsi+240], rax
  43781. adc r9, QWORD PTR [r12+248]
  43782. mov r10, QWORD PTR [rsi+256]
  43783. mov QWORD PTR [rsi+248], r9
  43784. adc r10, QWORD PTR [r12+256]
  43785. mov rax, QWORD PTR [rsi+264]
  43786. mov QWORD PTR [rsi+256], r10
  43787. adc rax, QWORD PTR [r12+264]
  43788. mov r9, QWORD PTR [rsi+272]
  43789. mov QWORD PTR [rsi+264], rax
  43790. adc r9, QWORD PTR [r12+272]
  43791. mov r10, QWORD PTR [rsi+280]
  43792. mov QWORD PTR [rsi+272], r9
  43793. adc r10, QWORD PTR [r12+280]
  43794. mov rax, QWORD PTR [rsi+288]
  43795. mov QWORD PTR [rsi+280], r10
  43796. adc rax, QWORD PTR [r12+288]
  43797. mov r9, QWORD PTR [rsi+296]
  43798. mov QWORD PTR [rsi+288], rax
  43799. adc r9, QWORD PTR [r12+296]
  43800. mov r10, QWORD PTR [rsi+304]
  43801. mov QWORD PTR [rsi+296], r9
  43802. adc r10, QWORD PTR [r12+304]
  43803. mov rax, QWORD PTR [rsi+312]
  43804. mov QWORD PTR [rsi+304], r10
  43805. adc rax, QWORD PTR [r12+312]
  43806. mov r9, QWORD PTR [rsi+320]
  43807. mov QWORD PTR [rsi+312], rax
  43808. adc r9, QWORD PTR [r12+320]
  43809. mov r10, QWORD PTR [rsi+328]
  43810. mov QWORD PTR [rsi+320], r9
  43811. adc r10, QWORD PTR [r12+328]
  43812. mov rax, QWORD PTR [rsi+336]
  43813. mov QWORD PTR [rsi+328], r10
  43814. adc rax, QWORD PTR [r12+336]
  43815. mov r9, QWORD PTR [rsi+344]
  43816. mov QWORD PTR [rsi+336], rax
  43817. adc r9, QWORD PTR [r12+344]
  43818. mov r10, QWORD PTR [rsi+352]
  43819. mov QWORD PTR [rsi+344], r9
  43820. adc r10, QWORD PTR [r12+352]
  43821. mov rax, QWORD PTR [rsi+360]
  43822. mov QWORD PTR [rsi+352], r10
  43823. adc rax, QWORD PTR [r12+360]
  43824. mov r9, QWORD PTR [rsi+368]
  43825. mov QWORD PTR [rsi+360], rax
  43826. adc r9, QWORD PTR [r12+368]
  43827. mov r10, QWORD PTR [rsi+376]
  43828. mov QWORD PTR [rsi+368], r9
  43829. adc r10, QWORD PTR [r12+376]
  43830. mov rax, QWORD PTR [rsi+384]
  43831. mov QWORD PTR [rsi+376], r10
  43832. adc rax, QWORD PTR [r12+384]
  43833. mov r9, QWORD PTR [rsi+392]
  43834. mov QWORD PTR [rsi+384], rax
  43835. adc r9, QWORD PTR [r12+392]
  43836. mov r10, QWORD PTR [rsi+400]
  43837. mov QWORD PTR [rsi+392], r9
  43838. adc r10, QWORD PTR [r12+400]
  43839. mov rax, QWORD PTR [rsi+408]
  43840. mov QWORD PTR [rsi+400], r10
  43841. adc rax, QWORD PTR [r12+408]
  43842. mov r9, QWORD PTR [rsi+416]
  43843. mov QWORD PTR [rsi+408], rax
  43844. adc r9, QWORD PTR [r12+416]
  43845. mov r10, QWORD PTR [rsi+424]
  43846. mov QWORD PTR [rsi+416], r9
  43847. adc r10, QWORD PTR [r12+424]
  43848. mov rax, QWORD PTR [rsi+432]
  43849. mov QWORD PTR [rsi+424], r10
  43850. adc rax, QWORD PTR [r12+432]
  43851. mov r9, QWORD PTR [rsi+440]
  43852. mov QWORD PTR [rsi+432], rax
  43853. adc r9, QWORD PTR [r12+440]
  43854. mov r10, QWORD PTR [rsi+448]
  43855. mov QWORD PTR [rsi+440], r9
  43856. adc r10, QWORD PTR [r12+448]
  43857. mov rax, QWORD PTR [rsi+456]
  43858. mov QWORD PTR [rsi+448], r10
  43859. adc rax, QWORD PTR [r12+456]
  43860. mov r9, QWORD PTR [rsi+464]
  43861. mov QWORD PTR [rsi+456], rax
  43862. adc r9, QWORD PTR [r12+464]
  43863. mov r10, QWORD PTR [rsi+472]
  43864. mov QWORD PTR [rsi+464], r9
  43865. adc r10, QWORD PTR [r12+472]
  43866. mov rax, QWORD PTR [rsi+480]
  43867. mov QWORD PTR [rsi+472], r10
  43868. adc rax, QWORD PTR [r12+480]
  43869. mov r9, QWORD PTR [rsi+488]
  43870. mov QWORD PTR [rsi+480], rax
  43871. adc r9, QWORD PTR [r12+488]
  43872. mov r10, QWORD PTR [rsi+496]
  43873. mov QWORD PTR [rsi+488], r9
  43874. adc r10, QWORD PTR [r12+496]
  43875. mov rax, QWORD PTR [rsi+504]
  43876. mov QWORD PTR [rsi+496], r10
  43877. adc rax, QWORD PTR [r12+504]
  43878. mov QWORD PTR [rsi+504], rax
  43879. adc r11, 0
  43880. mov QWORD PTR [rcx+768], r11
  43881. add rsi, 256
  43882. ; Add
  43883. mov rax, QWORD PTR [rsi]
  43884. add rax, QWORD PTR [r13]
  43885. mov r9, QWORD PTR [rsi+8]
  43886. mov QWORD PTR [rsi], rax
  43887. adc r9, QWORD PTR [r13+8]
  43888. mov r10, QWORD PTR [rsi+16]
  43889. mov QWORD PTR [rsi+8], r9
  43890. adc r10, QWORD PTR [r13+16]
  43891. mov rax, QWORD PTR [rsi+24]
  43892. mov QWORD PTR [rsi+16], r10
  43893. adc rax, QWORD PTR [r13+24]
  43894. mov r9, QWORD PTR [rsi+32]
  43895. mov QWORD PTR [rsi+24], rax
  43896. adc r9, QWORD PTR [r13+32]
  43897. mov r10, QWORD PTR [rsi+40]
  43898. mov QWORD PTR [rsi+32], r9
  43899. adc r10, QWORD PTR [r13+40]
  43900. mov rax, QWORD PTR [rsi+48]
  43901. mov QWORD PTR [rsi+40], r10
  43902. adc rax, QWORD PTR [r13+48]
  43903. mov r9, QWORD PTR [rsi+56]
  43904. mov QWORD PTR [rsi+48], rax
  43905. adc r9, QWORD PTR [r13+56]
  43906. mov r10, QWORD PTR [rsi+64]
  43907. mov QWORD PTR [rsi+56], r9
  43908. adc r10, QWORD PTR [r13+64]
  43909. mov rax, QWORD PTR [rsi+72]
  43910. mov QWORD PTR [rsi+64], r10
  43911. adc rax, QWORD PTR [r13+72]
  43912. mov r9, QWORD PTR [rsi+80]
  43913. mov QWORD PTR [rsi+72], rax
  43914. adc r9, QWORD PTR [r13+80]
  43915. mov r10, QWORD PTR [rsi+88]
  43916. mov QWORD PTR [rsi+80], r9
  43917. adc r10, QWORD PTR [r13+88]
  43918. mov rax, QWORD PTR [rsi+96]
  43919. mov QWORD PTR [rsi+88], r10
  43920. adc rax, QWORD PTR [r13+96]
  43921. mov r9, QWORD PTR [rsi+104]
  43922. mov QWORD PTR [rsi+96], rax
  43923. adc r9, QWORD PTR [r13+104]
  43924. mov r10, QWORD PTR [rsi+112]
  43925. mov QWORD PTR [rsi+104], r9
  43926. adc r10, QWORD PTR [r13+112]
  43927. mov rax, QWORD PTR [rsi+120]
  43928. mov QWORD PTR [rsi+112], r10
  43929. adc rax, QWORD PTR [r13+120]
  43930. mov r9, QWORD PTR [rsi+128]
  43931. mov QWORD PTR [rsi+120], rax
  43932. adc r9, QWORD PTR [r13+128]
  43933. mov r10, QWORD PTR [rsi+136]
  43934. mov QWORD PTR [rsi+128], r9
  43935. adc r10, QWORD PTR [r13+136]
  43936. mov rax, QWORD PTR [rsi+144]
  43937. mov QWORD PTR [rsi+136], r10
  43938. adc rax, QWORD PTR [r13+144]
  43939. mov r9, QWORD PTR [rsi+152]
  43940. mov QWORD PTR [rsi+144], rax
  43941. adc r9, QWORD PTR [r13+152]
  43942. mov r10, QWORD PTR [rsi+160]
  43943. mov QWORD PTR [rsi+152], r9
  43944. adc r10, QWORD PTR [r13+160]
  43945. mov rax, QWORD PTR [rsi+168]
  43946. mov QWORD PTR [rsi+160], r10
  43947. adc rax, QWORD PTR [r13+168]
  43948. mov r9, QWORD PTR [rsi+176]
  43949. mov QWORD PTR [rsi+168], rax
  43950. adc r9, QWORD PTR [r13+176]
  43951. mov r10, QWORD PTR [rsi+184]
  43952. mov QWORD PTR [rsi+176], r9
  43953. adc r10, QWORD PTR [r13+184]
  43954. mov rax, QWORD PTR [rsi+192]
  43955. mov QWORD PTR [rsi+184], r10
  43956. adc rax, QWORD PTR [r13+192]
  43957. mov r9, QWORD PTR [rsi+200]
  43958. mov QWORD PTR [rsi+192], rax
  43959. adc r9, QWORD PTR [r13+200]
  43960. mov r10, QWORD PTR [rsi+208]
  43961. mov QWORD PTR [rsi+200], r9
  43962. adc r10, QWORD PTR [r13+208]
  43963. mov rax, QWORD PTR [rsi+216]
  43964. mov QWORD PTR [rsi+208], r10
  43965. adc rax, QWORD PTR [r13+216]
  43966. mov r9, QWORD PTR [rsi+224]
  43967. mov QWORD PTR [rsi+216], rax
  43968. adc r9, QWORD PTR [r13+224]
  43969. mov r10, QWORD PTR [rsi+232]
  43970. mov QWORD PTR [rsi+224], r9
  43971. adc r10, QWORD PTR [r13+232]
  43972. mov rax, QWORD PTR [rsi+240]
  43973. mov QWORD PTR [rsi+232], r10
  43974. adc rax, QWORD PTR [r13+240]
  43975. mov r9, QWORD PTR [rsi+248]
  43976. mov QWORD PTR [rsi+240], rax
  43977. adc r9, QWORD PTR [r13+248]
  43978. mov r10, QWORD PTR [rsi+256]
  43979. mov QWORD PTR [rsi+248], r9
  43980. adc r10, QWORD PTR [r13+256]
  43981. mov QWORD PTR [rsi+256], r10
  43982. ; Add to zero
  43983. mov rax, QWORD PTR [r13+264]
  43984. adc rax, 0
  43985. mov r9, QWORD PTR [r13+272]
  43986. mov QWORD PTR [rsi+264], rax
  43987. adc r9, 0
  43988. mov r10, QWORD PTR [r13+280]
  43989. mov QWORD PTR [rsi+272], r9
  43990. adc r10, 0
  43991. mov rax, QWORD PTR [r13+288]
  43992. mov QWORD PTR [rsi+280], r10
  43993. adc rax, 0
  43994. mov r9, QWORD PTR [r13+296]
  43995. mov QWORD PTR [rsi+288], rax
  43996. adc r9, 0
  43997. mov r10, QWORD PTR [r13+304]
  43998. mov QWORD PTR [rsi+296], r9
  43999. adc r10, 0
  44000. mov rax, QWORD PTR [r13+312]
  44001. mov QWORD PTR [rsi+304], r10
  44002. adc rax, 0
  44003. mov r9, QWORD PTR [r13+320]
  44004. mov QWORD PTR [rsi+312], rax
  44005. adc r9, 0
  44006. mov r10, QWORD PTR [r13+328]
  44007. mov QWORD PTR [rsi+320], r9
  44008. adc r10, 0
  44009. mov rax, QWORD PTR [r13+336]
  44010. mov QWORD PTR [rsi+328], r10
  44011. adc rax, 0
  44012. mov r9, QWORD PTR [r13+344]
  44013. mov QWORD PTR [rsi+336], rax
  44014. adc r9, 0
  44015. mov r10, QWORD PTR [r13+352]
  44016. mov QWORD PTR [rsi+344], r9
  44017. adc r10, 0
  44018. mov rax, QWORD PTR [r13+360]
  44019. mov QWORD PTR [rsi+352], r10
  44020. adc rax, 0
  44021. mov r9, QWORD PTR [r13+368]
  44022. mov QWORD PTR [rsi+360], rax
  44023. adc r9, 0
  44024. mov r10, QWORD PTR [r13+376]
  44025. mov QWORD PTR [rsi+368], r9
  44026. adc r10, 0
  44027. mov rax, QWORD PTR [r13+384]
  44028. mov QWORD PTR [rsi+376], r10
  44029. adc rax, 0
  44030. mov r9, QWORD PTR [r13+392]
  44031. mov QWORD PTR [rsi+384], rax
  44032. adc r9, 0
  44033. mov r10, QWORD PTR [r13+400]
  44034. mov QWORD PTR [rsi+392], r9
  44035. adc r10, 0
  44036. mov rax, QWORD PTR [r13+408]
  44037. mov QWORD PTR [rsi+400], r10
  44038. adc rax, 0
  44039. mov r9, QWORD PTR [r13+416]
  44040. mov QWORD PTR [rsi+408], rax
  44041. adc r9, 0
  44042. mov r10, QWORD PTR [r13+424]
  44043. mov QWORD PTR [rsi+416], r9
  44044. adc r10, 0
  44045. mov rax, QWORD PTR [r13+432]
  44046. mov QWORD PTR [rsi+424], r10
  44047. adc rax, 0
  44048. mov r9, QWORD PTR [r13+440]
  44049. mov QWORD PTR [rsi+432], rax
  44050. adc r9, 0
  44051. mov r10, QWORD PTR [r13+448]
  44052. mov QWORD PTR [rsi+440], r9
  44053. adc r10, 0
  44054. mov rax, QWORD PTR [r13+456]
  44055. mov QWORD PTR [rsi+448], r10
  44056. adc rax, 0
  44057. mov r9, QWORD PTR [r13+464]
  44058. mov QWORD PTR [rsi+456], rax
  44059. adc r9, 0
  44060. mov r10, QWORD PTR [r13+472]
  44061. mov QWORD PTR [rsi+464], r9
  44062. adc r10, 0
  44063. mov rax, QWORD PTR [r13+480]
  44064. mov QWORD PTR [rsi+472], r10
  44065. adc rax, 0
  44066. mov r9, QWORD PTR [r13+488]
  44067. mov QWORD PTR [rsi+480], rax
  44068. adc r9, 0
  44069. mov r10, QWORD PTR [r13+496]
  44070. mov QWORD PTR [rsi+488], r9
  44071. adc r10, 0
  44072. mov rax, QWORD PTR [r13+504]
  44073. mov QWORD PTR [rsi+496], r10
  44074. adc rax, 0
  44075. mov QWORD PTR [rsi+504], rax
  44076. add rsp, 1576
  44077. pop rsi
  44078. pop rdi
  44079. pop r15
  44080. pop r14
  44081. pop r13
  44082. pop r12
  44083. ret
  44084. sp_4096_mul_avx2_64 ENDP
  44085. _text ENDS
  44086. ENDIF
  44087. ; /* Square a and put result in r. (r = a * a)
  44088. ; *
  44089. ; * Karatsuba: ah^2, al^2, (al - ah)^2
  44090. ; *
  44091. ; * r A single precision integer.
  44092. ; * a A single precision integer.
  44093. ; */
  44094. _text SEGMENT READONLY PARA
  44095. sp_4096_sqr_64 PROC
  44096. sub rsp, 528
  44097. mov QWORD PTR [rsp+512], rcx
  44098. mov QWORD PTR [rsp+520], rdx
  44099. mov r9, 0
  44100. mov r10, rsp
  44101. lea r11, QWORD PTR [rdx+256]
  44102. mov rax, QWORD PTR [rdx]
  44103. sub rax, QWORD PTR [r11]
  44104. mov r8, QWORD PTR [rdx+8]
  44105. mov QWORD PTR [r10], rax
  44106. sbb r8, QWORD PTR [r11+8]
  44107. mov rax, QWORD PTR [rdx+16]
  44108. mov QWORD PTR [r10+8], r8
  44109. sbb rax, QWORD PTR [r11+16]
  44110. mov r8, QWORD PTR [rdx+24]
  44111. mov QWORD PTR [r10+16], rax
  44112. sbb r8, QWORD PTR [r11+24]
  44113. mov rax, QWORD PTR [rdx+32]
  44114. mov QWORD PTR [r10+24], r8
  44115. sbb rax, QWORD PTR [r11+32]
  44116. mov r8, QWORD PTR [rdx+40]
  44117. mov QWORD PTR [r10+32], rax
  44118. sbb r8, QWORD PTR [r11+40]
  44119. mov rax, QWORD PTR [rdx+48]
  44120. mov QWORD PTR [r10+40], r8
  44121. sbb rax, QWORD PTR [r11+48]
  44122. mov r8, QWORD PTR [rdx+56]
  44123. mov QWORD PTR [r10+48], rax
  44124. sbb r8, QWORD PTR [r11+56]
  44125. mov rax, QWORD PTR [rdx+64]
  44126. mov QWORD PTR [r10+56], r8
  44127. sbb rax, QWORD PTR [r11+64]
  44128. mov r8, QWORD PTR [rdx+72]
  44129. mov QWORD PTR [r10+64], rax
  44130. sbb r8, QWORD PTR [r11+72]
  44131. mov rax, QWORD PTR [rdx+80]
  44132. mov QWORD PTR [r10+72], r8
  44133. sbb rax, QWORD PTR [r11+80]
  44134. mov r8, QWORD PTR [rdx+88]
  44135. mov QWORD PTR [r10+80], rax
  44136. sbb r8, QWORD PTR [r11+88]
  44137. mov rax, QWORD PTR [rdx+96]
  44138. mov QWORD PTR [r10+88], r8
  44139. sbb rax, QWORD PTR [r11+96]
  44140. mov r8, QWORD PTR [rdx+104]
  44141. mov QWORD PTR [r10+96], rax
  44142. sbb r8, QWORD PTR [r11+104]
  44143. mov rax, QWORD PTR [rdx+112]
  44144. mov QWORD PTR [r10+104], r8
  44145. sbb rax, QWORD PTR [r11+112]
  44146. mov r8, QWORD PTR [rdx+120]
  44147. mov QWORD PTR [r10+112], rax
  44148. sbb r8, QWORD PTR [r11+120]
  44149. mov rax, QWORD PTR [rdx+128]
  44150. mov QWORD PTR [r10+120], r8
  44151. sbb rax, QWORD PTR [r11+128]
  44152. mov r8, QWORD PTR [rdx+136]
  44153. mov QWORD PTR [r10+128], rax
  44154. sbb r8, QWORD PTR [r11+136]
  44155. mov rax, QWORD PTR [rdx+144]
  44156. mov QWORD PTR [r10+136], r8
  44157. sbb rax, QWORD PTR [r11+144]
  44158. mov r8, QWORD PTR [rdx+152]
  44159. mov QWORD PTR [r10+144], rax
  44160. sbb r8, QWORD PTR [r11+152]
  44161. mov rax, QWORD PTR [rdx+160]
  44162. mov QWORD PTR [r10+152], r8
  44163. sbb rax, QWORD PTR [r11+160]
  44164. mov r8, QWORD PTR [rdx+168]
  44165. mov QWORD PTR [r10+160], rax
  44166. sbb r8, QWORD PTR [r11+168]
  44167. mov rax, QWORD PTR [rdx+176]
  44168. mov QWORD PTR [r10+168], r8
  44169. sbb rax, QWORD PTR [r11+176]
  44170. mov r8, QWORD PTR [rdx+184]
  44171. mov QWORD PTR [r10+176], rax
  44172. sbb r8, QWORD PTR [r11+184]
  44173. mov rax, QWORD PTR [rdx+192]
  44174. mov QWORD PTR [r10+184], r8
  44175. sbb rax, QWORD PTR [r11+192]
  44176. mov r8, QWORD PTR [rdx+200]
  44177. mov QWORD PTR [r10+192], rax
  44178. sbb r8, QWORD PTR [r11+200]
  44179. mov rax, QWORD PTR [rdx+208]
  44180. mov QWORD PTR [r10+200], r8
  44181. sbb rax, QWORD PTR [r11+208]
  44182. mov r8, QWORD PTR [rdx+216]
  44183. mov QWORD PTR [r10+208], rax
  44184. sbb r8, QWORD PTR [r11+216]
  44185. mov rax, QWORD PTR [rdx+224]
  44186. mov QWORD PTR [r10+216], r8
  44187. sbb rax, QWORD PTR [r11+224]
  44188. mov r8, QWORD PTR [rdx+232]
  44189. mov QWORD PTR [r10+224], rax
  44190. sbb r8, QWORD PTR [r11+232]
  44191. mov rax, QWORD PTR [rdx+240]
  44192. mov QWORD PTR [r10+232], r8
  44193. sbb rax, QWORD PTR [r11+240]
  44194. mov r8, QWORD PTR [rdx+248]
  44195. mov QWORD PTR [r10+240], rax
  44196. sbb r8, QWORD PTR [r11+248]
  44197. mov QWORD PTR [r10+248], r8
  44198. sbb r9, 0
  44199. ; Cond Negate
  44200. mov rax, QWORD PTR [r10]
  44201. mov r11, r9
  44202. xor rax, r9
  44203. neg r11
  44204. sub rax, r9
  44205. mov r8, QWORD PTR [r10+8]
  44206. sbb r11, 0
  44207. mov QWORD PTR [r10], rax
  44208. xor r8, r9
  44209. add r8, r11
  44210. mov rax, QWORD PTR [r10+16]
  44211. setc r11b
  44212. mov QWORD PTR [r10+8], r8
  44213. xor rax, r9
  44214. add rax, r11
  44215. mov r8, QWORD PTR [r10+24]
  44216. setc r11b
  44217. mov QWORD PTR [r10+16], rax
  44218. xor r8, r9
  44219. add r8, r11
  44220. mov rax, QWORD PTR [r10+32]
  44221. setc r11b
  44222. mov QWORD PTR [r10+24], r8
  44223. xor rax, r9
  44224. add rax, r11
  44225. mov r8, QWORD PTR [r10+40]
  44226. setc r11b
  44227. mov QWORD PTR [r10+32], rax
  44228. xor r8, r9
  44229. add r8, r11
  44230. mov rax, QWORD PTR [r10+48]
  44231. setc r11b
  44232. mov QWORD PTR [r10+40], r8
  44233. xor rax, r9
  44234. add rax, r11
  44235. mov r8, QWORD PTR [r10+56]
  44236. setc r11b
  44237. mov QWORD PTR [r10+48], rax
  44238. xor r8, r9
  44239. add r8, r11
  44240. mov rax, QWORD PTR [r10+64]
  44241. setc r11b
  44242. mov QWORD PTR [r10+56], r8
  44243. xor rax, r9
  44244. add rax, r11
  44245. mov r8, QWORD PTR [r10+72]
  44246. setc r11b
  44247. mov QWORD PTR [r10+64], rax
  44248. xor r8, r9
  44249. add r8, r11
  44250. mov rax, QWORD PTR [r10+80]
  44251. setc r11b
  44252. mov QWORD PTR [r10+72], r8
  44253. xor rax, r9
  44254. add rax, r11
  44255. mov r8, QWORD PTR [r10+88]
  44256. setc r11b
  44257. mov QWORD PTR [r10+80], rax
  44258. xor r8, r9
  44259. add r8, r11
  44260. mov rax, QWORD PTR [r10+96]
  44261. setc r11b
  44262. mov QWORD PTR [r10+88], r8
  44263. xor rax, r9
  44264. add rax, r11
  44265. mov r8, QWORD PTR [r10+104]
  44266. setc r11b
  44267. mov QWORD PTR [r10+96], rax
  44268. xor r8, r9
  44269. add r8, r11
  44270. mov rax, QWORD PTR [r10+112]
  44271. setc r11b
  44272. mov QWORD PTR [r10+104], r8
  44273. xor rax, r9
  44274. add rax, r11
  44275. mov r8, QWORD PTR [r10+120]
  44276. setc r11b
  44277. mov QWORD PTR [r10+112], rax
  44278. xor r8, r9
  44279. add r8, r11
  44280. mov rax, QWORD PTR [r10+128]
  44281. setc r11b
  44282. mov QWORD PTR [r10+120], r8
  44283. xor rax, r9
  44284. add rax, r11
  44285. mov r8, QWORD PTR [r10+136]
  44286. setc r11b
  44287. mov QWORD PTR [r10+128], rax
  44288. xor r8, r9
  44289. add r8, r11
  44290. mov rax, QWORD PTR [r10+144]
  44291. setc r11b
  44292. mov QWORD PTR [r10+136], r8
  44293. xor rax, r9
  44294. add rax, r11
  44295. mov r8, QWORD PTR [r10+152]
  44296. setc r11b
  44297. mov QWORD PTR [r10+144], rax
  44298. xor r8, r9
  44299. add r8, r11
  44300. mov rax, QWORD PTR [r10+160]
  44301. setc r11b
  44302. mov QWORD PTR [r10+152], r8
  44303. xor rax, r9
  44304. add rax, r11
  44305. mov r8, QWORD PTR [r10+168]
  44306. setc r11b
  44307. mov QWORD PTR [r10+160], rax
  44308. xor r8, r9
  44309. add r8, r11
  44310. mov rax, QWORD PTR [r10+176]
  44311. setc r11b
  44312. mov QWORD PTR [r10+168], r8
  44313. xor rax, r9
  44314. add rax, r11
  44315. mov r8, QWORD PTR [r10+184]
  44316. setc r11b
  44317. mov QWORD PTR [r10+176], rax
  44318. xor r8, r9
  44319. add r8, r11
  44320. mov rax, QWORD PTR [r10+192]
  44321. setc r11b
  44322. mov QWORD PTR [r10+184], r8
  44323. xor rax, r9
  44324. add rax, r11
  44325. mov r8, QWORD PTR [r10+200]
  44326. setc r11b
  44327. mov QWORD PTR [r10+192], rax
  44328. xor r8, r9
  44329. add r8, r11
  44330. mov rax, QWORD PTR [r10+208]
  44331. setc r11b
  44332. mov QWORD PTR [r10+200], r8
  44333. xor rax, r9
  44334. add rax, r11
  44335. mov r8, QWORD PTR [r10+216]
  44336. setc r11b
  44337. mov QWORD PTR [r10+208], rax
  44338. xor r8, r9
  44339. add r8, r11
  44340. mov rax, QWORD PTR [r10+224]
  44341. setc r11b
  44342. mov QWORD PTR [r10+216], r8
  44343. xor rax, r9
  44344. add rax, r11
  44345. mov r8, QWORD PTR [r10+232]
  44346. setc r11b
  44347. mov QWORD PTR [r10+224], rax
  44348. xor r8, r9
  44349. add r8, r11
  44350. mov rax, QWORD PTR [r10+240]
  44351. setc r11b
  44352. mov QWORD PTR [r10+232], r8
  44353. xor rax, r9
  44354. add rax, r11
  44355. mov r8, QWORD PTR [r10+248]
  44356. setc r11b
  44357. mov QWORD PTR [r10+240], rax
  44358. xor r8, r9
  44359. add r8, r11
  44360. mov QWORD PTR [r10+248], r8
  44361. mov rdx, r10
  44362. mov rcx, rsp
  44363. call sp_2048_sqr_32
  44364. mov rdx, QWORD PTR [rsp+520]
  44365. mov rcx, QWORD PTR [rsp+512]
  44366. add rdx, 256
  44367. add rcx, 512
  44368. call sp_2048_sqr_32
  44369. mov rdx, QWORD PTR [rsp+520]
  44370. mov rcx, QWORD PTR [rsp+512]
  44371. call sp_2048_sqr_32
  44372. IFDEF _WIN64
  44373. mov rdx, QWORD PTR [rsp+520]
  44374. mov rcx, QWORD PTR [rsp+512]
  44375. ENDIF
  44376. mov rdx, QWORD PTR [rsp+512]
  44377. lea r10, QWORD PTR [rsp+256]
  44378. add rdx, 768
  44379. mov r9, 0
  44380. mov r8, QWORD PTR [r10+-256]
  44381. sub r8, QWORD PTR [rdx+-256]
  44382. mov rax, QWORD PTR [r10+-248]
  44383. mov QWORD PTR [r10+-256], r8
  44384. sbb rax, QWORD PTR [rdx+-248]
  44385. mov r8, QWORD PTR [r10+-240]
  44386. mov QWORD PTR [r10+-248], rax
  44387. sbb r8, QWORD PTR [rdx+-240]
  44388. mov rax, QWORD PTR [r10+-232]
  44389. mov QWORD PTR [r10+-240], r8
  44390. sbb rax, QWORD PTR [rdx+-232]
  44391. mov r8, QWORD PTR [r10+-224]
  44392. mov QWORD PTR [r10+-232], rax
  44393. sbb r8, QWORD PTR [rdx+-224]
  44394. mov rax, QWORD PTR [r10+-216]
  44395. mov QWORD PTR [r10+-224], r8
  44396. sbb rax, QWORD PTR [rdx+-216]
  44397. mov r8, QWORD PTR [r10+-208]
  44398. mov QWORD PTR [r10+-216], rax
  44399. sbb r8, QWORD PTR [rdx+-208]
  44400. mov rax, QWORD PTR [r10+-200]
  44401. mov QWORD PTR [r10+-208], r8
  44402. sbb rax, QWORD PTR [rdx+-200]
  44403. mov r8, QWORD PTR [r10+-192]
  44404. mov QWORD PTR [r10+-200], rax
  44405. sbb r8, QWORD PTR [rdx+-192]
  44406. mov rax, QWORD PTR [r10+-184]
  44407. mov QWORD PTR [r10+-192], r8
  44408. sbb rax, QWORD PTR [rdx+-184]
  44409. mov r8, QWORD PTR [r10+-176]
  44410. mov QWORD PTR [r10+-184], rax
  44411. sbb r8, QWORD PTR [rdx+-176]
  44412. mov rax, QWORD PTR [r10+-168]
  44413. mov QWORD PTR [r10+-176], r8
  44414. sbb rax, QWORD PTR [rdx+-168]
  44415. mov r8, QWORD PTR [r10+-160]
  44416. mov QWORD PTR [r10+-168], rax
  44417. sbb r8, QWORD PTR [rdx+-160]
  44418. mov rax, QWORD PTR [r10+-152]
  44419. mov QWORD PTR [r10+-160], r8
  44420. sbb rax, QWORD PTR [rdx+-152]
  44421. mov r8, QWORD PTR [r10+-144]
  44422. mov QWORD PTR [r10+-152], rax
  44423. sbb r8, QWORD PTR [rdx+-144]
  44424. mov rax, QWORD PTR [r10+-136]
  44425. mov QWORD PTR [r10+-144], r8
  44426. sbb rax, QWORD PTR [rdx+-136]
  44427. mov r8, QWORD PTR [r10+-128]
  44428. mov QWORD PTR [r10+-136], rax
  44429. sbb r8, QWORD PTR [rdx+-128]
  44430. mov rax, QWORD PTR [r10+-120]
  44431. mov QWORD PTR [r10+-128], r8
  44432. sbb rax, QWORD PTR [rdx+-120]
  44433. mov r8, QWORD PTR [r10+-112]
  44434. mov QWORD PTR [r10+-120], rax
  44435. sbb r8, QWORD PTR [rdx+-112]
  44436. mov rax, QWORD PTR [r10+-104]
  44437. mov QWORD PTR [r10+-112], r8
  44438. sbb rax, QWORD PTR [rdx+-104]
  44439. mov r8, QWORD PTR [r10+-96]
  44440. mov QWORD PTR [r10+-104], rax
  44441. sbb r8, QWORD PTR [rdx+-96]
  44442. mov rax, QWORD PTR [r10+-88]
  44443. mov QWORD PTR [r10+-96], r8
  44444. sbb rax, QWORD PTR [rdx+-88]
  44445. mov r8, QWORD PTR [r10+-80]
  44446. mov QWORD PTR [r10+-88], rax
  44447. sbb r8, QWORD PTR [rdx+-80]
  44448. mov rax, QWORD PTR [r10+-72]
  44449. mov QWORD PTR [r10+-80], r8
  44450. sbb rax, QWORD PTR [rdx+-72]
  44451. mov r8, QWORD PTR [r10+-64]
  44452. mov QWORD PTR [r10+-72], rax
  44453. sbb r8, QWORD PTR [rdx+-64]
  44454. mov rax, QWORD PTR [r10+-56]
  44455. mov QWORD PTR [r10+-64], r8
  44456. sbb rax, QWORD PTR [rdx+-56]
  44457. mov r8, QWORD PTR [r10+-48]
  44458. mov QWORD PTR [r10+-56], rax
  44459. sbb r8, QWORD PTR [rdx+-48]
  44460. mov rax, QWORD PTR [r10+-40]
  44461. mov QWORD PTR [r10+-48], r8
  44462. sbb rax, QWORD PTR [rdx+-40]
  44463. mov r8, QWORD PTR [r10+-32]
  44464. mov QWORD PTR [r10+-40], rax
  44465. sbb r8, QWORD PTR [rdx+-32]
  44466. mov rax, QWORD PTR [r10+-24]
  44467. mov QWORD PTR [r10+-32], r8
  44468. sbb rax, QWORD PTR [rdx+-24]
  44469. mov r8, QWORD PTR [r10+-16]
  44470. mov QWORD PTR [r10+-24], rax
  44471. sbb r8, QWORD PTR [rdx+-16]
  44472. mov rax, QWORD PTR [r10+-8]
  44473. mov QWORD PTR [r10+-16], r8
  44474. sbb rax, QWORD PTR [rdx+-8]
  44475. mov r8, QWORD PTR [r10]
  44476. mov QWORD PTR [r10+-8], rax
  44477. sbb r8, QWORD PTR [rdx]
  44478. mov rax, QWORD PTR [r10+8]
  44479. mov QWORD PTR [r10], r8
  44480. sbb rax, QWORD PTR [rdx+8]
  44481. mov r8, QWORD PTR [r10+16]
  44482. mov QWORD PTR [r10+8], rax
  44483. sbb r8, QWORD PTR [rdx+16]
  44484. mov rax, QWORD PTR [r10+24]
  44485. mov QWORD PTR [r10+16], r8
  44486. sbb rax, QWORD PTR [rdx+24]
  44487. mov r8, QWORD PTR [r10+32]
  44488. mov QWORD PTR [r10+24], rax
  44489. sbb r8, QWORD PTR [rdx+32]
  44490. mov rax, QWORD PTR [r10+40]
  44491. mov QWORD PTR [r10+32], r8
  44492. sbb rax, QWORD PTR [rdx+40]
  44493. mov r8, QWORD PTR [r10+48]
  44494. mov QWORD PTR [r10+40], rax
  44495. sbb r8, QWORD PTR [rdx+48]
  44496. mov rax, QWORD PTR [r10+56]
  44497. mov QWORD PTR [r10+48], r8
  44498. sbb rax, QWORD PTR [rdx+56]
  44499. mov r8, QWORD PTR [r10+64]
  44500. mov QWORD PTR [r10+56], rax
  44501. sbb r8, QWORD PTR [rdx+64]
  44502. mov rax, QWORD PTR [r10+72]
  44503. mov QWORD PTR [r10+64], r8
  44504. sbb rax, QWORD PTR [rdx+72]
  44505. mov r8, QWORD PTR [r10+80]
  44506. mov QWORD PTR [r10+72], rax
  44507. sbb r8, QWORD PTR [rdx+80]
  44508. mov rax, QWORD PTR [r10+88]
  44509. mov QWORD PTR [r10+80], r8
  44510. sbb rax, QWORD PTR [rdx+88]
  44511. mov r8, QWORD PTR [r10+96]
  44512. mov QWORD PTR [r10+88], rax
  44513. sbb r8, QWORD PTR [rdx+96]
  44514. mov rax, QWORD PTR [r10+104]
  44515. mov QWORD PTR [r10+96], r8
  44516. sbb rax, QWORD PTR [rdx+104]
  44517. mov r8, QWORD PTR [r10+112]
  44518. mov QWORD PTR [r10+104], rax
  44519. sbb r8, QWORD PTR [rdx+112]
  44520. mov rax, QWORD PTR [r10+120]
  44521. mov QWORD PTR [r10+112], r8
  44522. sbb rax, QWORD PTR [rdx+120]
  44523. mov r8, QWORD PTR [r10+128]
  44524. mov QWORD PTR [r10+120], rax
  44525. sbb r8, QWORD PTR [rdx+128]
  44526. mov rax, QWORD PTR [r10+136]
  44527. mov QWORD PTR [r10+128], r8
  44528. sbb rax, QWORD PTR [rdx+136]
  44529. mov r8, QWORD PTR [r10+144]
  44530. mov QWORD PTR [r10+136], rax
  44531. sbb r8, QWORD PTR [rdx+144]
  44532. mov rax, QWORD PTR [r10+152]
  44533. mov QWORD PTR [r10+144], r8
  44534. sbb rax, QWORD PTR [rdx+152]
  44535. mov r8, QWORD PTR [r10+160]
  44536. mov QWORD PTR [r10+152], rax
  44537. sbb r8, QWORD PTR [rdx+160]
  44538. mov rax, QWORD PTR [r10+168]
  44539. mov QWORD PTR [r10+160], r8
  44540. sbb rax, QWORD PTR [rdx+168]
  44541. mov r8, QWORD PTR [r10+176]
  44542. mov QWORD PTR [r10+168], rax
  44543. sbb r8, QWORD PTR [rdx+176]
  44544. mov rax, QWORD PTR [r10+184]
  44545. mov QWORD PTR [r10+176], r8
  44546. sbb rax, QWORD PTR [rdx+184]
  44547. mov r8, QWORD PTR [r10+192]
  44548. mov QWORD PTR [r10+184], rax
  44549. sbb r8, QWORD PTR [rdx+192]
  44550. mov rax, QWORD PTR [r10+200]
  44551. mov QWORD PTR [r10+192], r8
  44552. sbb rax, QWORD PTR [rdx+200]
  44553. mov r8, QWORD PTR [r10+208]
  44554. mov QWORD PTR [r10+200], rax
  44555. sbb r8, QWORD PTR [rdx+208]
  44556. mov rax, QWORD PTR [r10+216]
  44557. mov QWORD PTR [r10+208], r8
  44558. sbb rax, QWORD PTR [rdx+216]
  44559. mov r8, QWORD PTR [r10+224]
  44560. mov QWORD PTR [r10+216], rax
  44561. sbb r8, QWORD PTR [rdx+224]
  44562. mov rax, QWORD PTR [r10+232]
  44563. mov QWORD PTR [r10+224], r8
  44564. sbb rax, QWORD PTR [rdx+232]
  44565. mov r8, QWORD PTR [r10+240]
  44566. mov QWORD PTR [r10+232], rax
  44567. sbb r8, QWORD PTR [rdx+240]
  44568. mov rax, QWORD PTR [r10+248]
  44569. mov QWORD PTR [r10+240], r8
  44570. sbb rax, QWORD PTR [rdx+248]
  44571. mov QWORD PTR [r10+248], rax
  44572. sbb r9, 0
  44573. sub rdx, 512
  44574. mov r8, QWORD PTR [r10+-256]
  44575. sub r8, QWORD PTR [rdx+-256]
  44576. mov rax, QWORD PTR [r10+-248]
  44577. mov QWORD PTR [r10+-256], r8
  44578. sbb rax, QWORD PTR [rdx+-248]
  44579. mov r8, QWORD PTR [r10+-240]
  44580. mov QWORD PTR [r10+-248], rax
  44581. sbb r8, QWORD PTR [rdx+-240]
  44582. mov rax, QWORD PTR [r10+-232]
  44583. mov QWORD PTR [r10+-240], r8
  44584. sbb rax, QWORD PTR [rdx+-232]
  44585. mov r8, QWORD PTR [r10+-224]
  44586. mov QWORD PTR [r10+-232], rax
  44587. sbb r8, QWORD PTR [rdx+-224]
  44588. mov rax, QWORD PTR [r10+-216]
  44589. mov QWORD PTR [r10+-224], r8
  44590. sbb rax, QWORD PTR [rdx+-216]
  44591. mov r8, QWORD PTR [r10+-208]
  44592. mov QWORD PTR [r10+-216], rax
  44593. sbb r8, QWORD PTR [rdx+-208]
  44594. mov rax, QWORD PTR [r10+-200]
  44595. mov QWORD PTR [r10+-208], r8
  44596. sbb rax, QWORD PTR [rdx+-200]
  44597. mov r8, QWORD PTR [r10+-192]
  44598. mov QWORD PTR [r10+-200], rax
  44599. sbb r8, QWORD PTR [rdx+-192]
  44600. mov rax, QWORD PTR [r10+-184]
  44601. mov QWORD PTR [r10+-192], r8
  44602. sbb rax, QWORD PTR [rdx+-184]
  44603. mov r8, QWORD PTR [r10+-176]
  44604. mov QWORD PTR [r10+-184], rax
  44605. sbb r8, QWORD PTR [rdx+-176]
  44606. mov rax, QWORD PTR [r10+-168]
  44607. mov QWORD PTR [r10+-176], r8
  44608. sbb rax, QWORD PTR [rdx+-168]
  44609. mov r8, QWORD PTR [r10+-160]
  44610. mov QWORD PTR [r10+-168], rax
  44611. sbb r8, QWORD PTR [rdx+-160]
  44612. mov rax, QWORD PTR [r10+-152]
  44613. mov QWORD PTR [r10+-160], r8
  44614. sbb rax, QWORD PTR [rdx+-152]
  44615. mov r8, QWORD PTR [r10+-144]
  44616. mov QWORD PTR [r10+-152], rax
  44617. sbb r8, QWORD PTR [rdx+-144]
  44618. mov rax, QWORD PTR [r10+-136]
  44619. mov QWORD PTR [r10+-144], r8
  44620. sbb rax, QWORD PTR [rdx+-136]
  44621. mov r8, QWORD PTR [r10+-128]
  44622. mov QWORD PTR [r10+-136], rax
  44623. sbb r8, QWORD PTR [rdx+-128]
  44624. mov rax, QWORD PTR [r10+-120]
  44625. mov QWORD PTR [r10+-128], r8
  44626. sbb rax, QWORD PTR [rdx+-120]
  44627. mov r8, QWORD PTR [r10+-112]
  44628. mov QWORD PTR [r10+-120], rax
  44629. sbb r8, QWORD PTR [rdx+-112]
  44630. mov rax, QWORD PTR [r10+-104]
  44631. mov QWORD PTR [r10+-112], r8
  44632. sbb rax, QWORD PTR [rdx+-104]
  44633. mov r8, QWORD PTR [r10+-96]
  44634. mov QWORD PTR [r10+-104], rax
  44635. sbb r8, QWORD PTR [rdx+-96]
  44636. mov rax, QWORD PTR [r10+-88]
  44637. mov QWORD PTR [r10+-96], r8
  44638. sbb rax, QWORD PTR [rdx+-88]
  44639. mov r8, QWORD PTR [r10+-80]
  44640. mov QWORD PTR [r10+-88], rax
  44641. sbb r8, QWORD PTR [rdx+-80]
  44642. mov rax, QWORD PTR [r10+-72]
  44643. mov QWORD PTR [r10+-80], r8
  44644. sbb rax, QWORD PTR [rdx+-72]
  44645. mov r8, QWORD PTR [r10+-64]
  44646. mov QWORD PTR [r10+-72], rax
  44647. sbb r8, QWORD PTR [rdx+-64]
  44648. mov rax, QWORD PTR [r10+-56]
  44649. mov QWORD PTR [r10+-64], r8
  44650. sbb rax, QWORD PTR [rdx+-56]
  44651. mov r8, QWORD PTR [r10+-48]
  44652. mov QWORD PTR [r10+-56], rax
  44653. sbb r8, QWORD PTR [rdx+-48]
  44654. mov rax, QWORD PTR [r10+-40]
  44655. mov QWORD PTR [r10+-48], r8
  44656. sbb rax, QWORD PTR [rdx+-40]
  44657. mov r8, QWORD PTR [r10+-32]
  44658. mov QWORD PTR [r10+-40], rax
  44659. sbb r8, QWORD PTR [rdx+-32]
  44660. mov rax, QWORD PTR [r10+-24]
  44661. mov QWORD PTR [r10+-32], r8
  44662. sbb rax, QWORD PTR [rdx+-24]
  44663. mov r8, QWORD PTR [r10+-16]
  44664. mov QWORD PTR [r10+-24], rax
  44665. sbb r8, QWORD PTR [rdx+-16]
  44666. mov rax, QWORD PTR [r10+-8]
  44667. mov QWORD PTR [r10+-16], r8
  44668. sbb rax, QWORD PTR [rdx+-8]
  44669. mov r8, QWORD PTR [r10]
  44670. mov QWORD PTR [r10+-8], rax
  44671. sbb r8, QWORD PTR [rdx]
  44672. mov rax, QWORD PTR [r10+8]
  44673. mov QWORD PTR [r10], r8
  44674. sbb rax, QWORD PTR [rdx+8]
  44675. mov r8, QWORD PTR [r10+16]
  44676. mov QWORD PTR [r10+8], rax
  44677. sbb r8, QWORD PTR [rdx+16]
  44678. mov rax, QWORD PTR [r10+24]
  44679. mov QWORD PTR [r10+16], r8
  44680. sbb rax, QWORD PTR [rdx+24]
  44681. mov r8, QWORD PTR [r10+32]
  44682. mov QWORD PTR [r10+24], rax
  44683. sbb r8, QWORD PTR [rdx+32]
  44684. mov rax, QWORD PTR [r10+40]
  44685. mov QWORD PTR [r10+32], r8
  44686. sbb rax, QWORD PTR [rdx+40]
  44687. mov r8, QWORD PTR [r10+48]
  44688. mov QWORD PTR [r10+40], rax
  44689. sbb r8, QWORD PTR [rdx+48]
  44690. mov rax, QWORD PTR [r10+56]
  44691. mov QWORD PTR [r10+48], r8
  44692. sbb rax, QWORD PTR [rdx+56]
  44693. mov r8, QWORD PTR [r10+64]
  44694. mov QWORD PTR [r10+56], rax
  44695. sbb r8, QWORD PTR [rdx+64]
  44696. mov rax, QWORD PTR [r10+72]
  44697. mov QWORD PTR [r10+64], r8
  44698. sbb rax, QWORD PTR [rdx+72]
  44699. mov r8, QWORD PTR [r10+80]
  44700. mov QWORD PTR [r10+72], rax
  44701. sbb r8, QWORD PTR [rdx+80]
  44702. mov rax, QWORD PTR [r10+88]
  44703. mov QWORD PTR [r10+80], r8
  44704. sbb rax, QWORD PTR [rdx+88]
  44705. mov r8, QWORD PTR [r10+96]
  44706. mov QWORD PTR [r10+88], rax
  44707. sbb r8, QWORD PTR [rdx+96]
  44708. mov rax, QWORD PTR [r10+104]
  44709. mov QWORD PTR [r10+96], r8
  44710. sbb rax, QWORD PTR [rdx+104]
  44711. mov r8, QWORD PTR [r10+112]
  44712. mov QWORD PTR [r10+104], rax
  44713. sbb r8, QWORD PTR [rdx+112]
  44714. mov rax, QWORD PTR [r10+120]
  44715. mov QWORD PTR [r10+112], r8
  44716. sbb rax, QWORD PTR [rdx+120]
  44717. mov r8, QWORD PTR [r10+128]
  44718. mov QWORD PTR [r10+120], rax
  44719. sbb r8, QWORD PTR [rdx+128]
  44720. mov rax, QWORD PTR [r10+136]
  44721. mov QWORD PTR [r10+128], r8
  44722. sbb rax, QWORD PTR [rdx+136]
  44723. mov r8, QWORD PTR [r10+144]
  44724. mov QWORD PTR [r10+136], rax
  44725. sbb r8, QWORD PTR [rdx+144]
  44726. mov rax, QWORD PTR [r10+152]
  44727. mov QWORD PTR [r10+144], r8
  44728. sbb rax, QWORD PTR [rdx+152]
  44729. mov r8, QWORD PTR [r10+160]
  44730. mov QWORD PTR [r10+152], rax
  44731. sbb r8, QWORD PTR [rdx+160]
  44732. mov rax, QWORD PTR [r10+168]
  44733. mov QWORD PTR [r10+160], r8
  44734. sbb rax, QWORD PTR [rdx+168]
  44735. mov r8, QWORD PTR [r10+176]
  44736. mov QWORD PTR [r10+168], rax
  44737. sbb r8, QWORD PTR [rdx+176]
  44738. mov rax, QWORD PTR [r10+184]
  44739. mov QWORD PTR [r10+176], r8
  44740. sbb rax, QWORD PTR [rdx+184]
  44741. mov r8, QWORD PTR [r10+192]
  44742. mov QWORD PTR [r10+184], rax
  44743. sbb r8, QWORD PTR [rdx+192]
  44744. mov rax, QWORD PTR [r10+200]
  44745. mov QWORD PTR [r10+192], r8
  44746. sbb rax, QWORD PTR [rdx+200]
  44747. mov r8, QWORD PTR [r10+208]
  44748. mov QWORD PTR [r10+200], rax
  44749. sbb r8, QWORD PTR [rdx+208]
  44750. mov rax, QWORD PTR [r10+216]
  44751. mov QWORD PTR [r10+208], r8
  44752. sbb rax, QWORD PTR [rdx+216]
  44753. mov r8, QWORD PTR [r10+224]
  44754. mov QWORD PTR [r10+216], rax
  44755. sbb r8, QWORD PTR [rdx+224]
  44756. mov rax, QWORD PTR [r10+232]
  44757. mov QWORD PTR [r10+224], r8
  44758. sbb rax, QWORD PTR [rdx+232]
  44759. mov r8, QWORD PTR [r10+240]
  44760. mov QWORD PTR [r10+232], rax
  44761. sbb r8, QWORD PTR [rdx+240]
  44762. mov rax, QWORD PTR [r10+248]
  44763. mov QWORD PTR [r10+240], r8
  44764. sbb rax, QWORD PTR [rdx+248]
  44765. mov QWORD PTR [r10+248], rax
  44766. sbb r9, 0
  44767. mov rcx, QWORD PTR [rsp+512]
  44768. neg r9
  44769. add rcx, 512
  44770. mov r8, QWORD PTR [rcx+-256]
  44771. sub r8, QWORD PTR [r10+-256]
  44772. mov rax, QWORD PTR [rcx+-248]
  44773. mov QWORD PTR [rcx+-256], r8
  44774. sbb rax, QWORD PTR [r10+-248]
  44775. mov r8, QWORD PTR [rcx+-240]
  44776. mov QWORD PTR [rcx+-248], rax
  44777. sbb r8, QWORD PTR [r10+-240]
  44778. mov rax, QWORD PTR [rcx+-232]
  44779. mov QWORD PTR [rcx+-240], r8
  44780. sbb rax, QWORD PTR [r10+-232]
  44781. mov r8, QWORD PTR [rcx+-224]
  44782. mov QWORD PTR [rcx+-232], rax
  44783. sbb r8, QWORD PTR [r10+-224]
  44784. mov rax, QWORD PTR [rcx+-216]
  44785. mov QWORD PTR [rcx+-224], r8
  44786. sbb rax, QWORD PTR [r10+-216]
  44787. mov r8, QWORD PTR [rcx+-208]
  44788. mov QWORD PTR [rcx+-216], rax
  44789. sbb r8, QWORD PTR [r10+-208]
  44790. mov rax, QWORD PTR [rcx+-200]
  44791. mov QWORD PTR [rcx+-208], r8
  44792. sbb rax, QWORD PTR [r10+-200]
  44793. mov r8, QWORD PTR [rcx+-192]
  44794. mov QWORD PTR [rcx+-200], rax
  44795. sbb r8, QWORD PTR [r10+-192]
  44796. mov rax, QWORD PTR [rcx+-184]
  44797. mov QWORD PTR [rcx+-192], r8
  44798. sbb rax, QWORD PTR [r10+-184]
  44799. mov r8, QWORD PTR [rcx+-176]
  44800. mov QWORD PTR [rcx+-184], rax
  44801. sbb r8, QWORD PTR [r10+-176]
  44802. mov rax, QWORD PTR [rcx+-168]
  44803. mov QWORD PTR [rcx+-176], r8
  44804. sbb rax, QWORD PTR [r10+-168]
  44805. mov r8, QWORD PTR [rcx+-160]
  44806. mov QWORD PTR [rcx+-168], rax
  44807. sbb r8, QWORD PTR [r10+-160]
  44808. mov rax, QWORD PTR [rcx+-152]
  44809. mov QWORD PTR [rcx+-160], r8
  44810. sbb rax, QWORD PTR [r10+-152]
  44811. mov r8, QWORD PTR [rcx+-144]
  44812. mov QWORD PTR [rcx+-152], rax
  44813. sbb r8, QWORD PTR [r10+-144]
  44814. mov rax, QWORD PTR [rcx+-136]
  44815. mov QWORD PTR [rcx+-144], r8
  44816. sbb rax, QWORD PTR [r10+-136]
  44817. mov r8, QWORD PTR [rcx+-128]
  44818. mov QWORD PTR [rcx+-136], rax
  44819. sbb r8, QWORD PTR [r10+-128]
  44820. mov rax, QWORD PTR [rcx+-120]
  44821. mov QWORD PTR [rcx+-128], r8
  44822. sbb rax, QWORD PTR [r10+-120]
  44823. mov r8, QWORD PTR [rcx+-112]
  44824. mov QWORD PTR [rcx+-120], rax
  44825. sbb r8, QWORD PTR [r10+-112]
  44826. mov rax, QWORD PTR [rcx+-104]
  44827. mov QWORD PTR [rcx+-112], r8
  44828. sbb rax, QWORD PTR [r10+-104]
  44829. mov r8, QWORD PTR [rcx+-96]
  44830. mov QWORD PTR [rcx+-104], rax
  44831. sbb r8, QWORD PTR [r10+-96]
  44832. mov rax, QWORD PTR [rcx+-88]
  44833. mov QWORD PTR [rcx+-96], r8
  44834. sbb rax, QWORD PTR [r10+-88]
  44835. mov r8, QWORD PTR [rcx+-80]
  44836. mov QWORD PTR [rcx+-88], rax
  44837. sbb r8, QWORD PTR [r10+-80]
  44838. mov rax, QWORD PTR [rcx+-72]
  44839. mov QWORD PTR [rcx+-80], r8
  44840. sbb rax, QWORD PTR [r10+-72]
  44841. mov r8, QWORD PTR [rcx+-64]
  44842. mov QWORD PTR [rcx+-72], rax
  44843. sbb r8, QWORD PTR [r10+-64]
  44844. mov rax, QWORD PTR [rcx+-56]
  44845. mov QWORD PTR [rcx+-64], r8
  44846. sbb rax, QWORD PTR [r10+-56]
  44847. mov r8, QWORD PTR [rcx+-48]
  44848. mov QWORD PTR [rcx+-56], rax
  44849. sbb r8, QWORD PTR [r10+-48]
  44850. mov rax, QWORD PTR [rcx+-40]
  44851. mov QWORD PTR [rcx+-48], r8
  44852. sbb rax, QWORD PTR [r10+-40]
  44853. mov r8, QWORD PTR [rcx+-32]
  44854. mov QWORD PTR [rcx+-40], rax
  44855. sbb r8, QWORD PTR [r10+-32]
  44856. mov rax, QWORD PTR [rcx+-24]
  44857. mov QWORD PTR [rcx+-32], r8
  44858. sbb rax, QWORD PTR [r10+-24]
  44859. mov r8, QWORD PTR [rcx+-16]
  44860. mov QWORD PTR [rcx+-24], rax
  44861. sbb r8, QWORD PTR [r10+-16]
  44862. mov rax, QWORD PTR [rcx+-8]
  44863. mov QWORD PTR [rcx+-16], r8
  44864. sbb rax, QWORD PTR [r10+-8]
  44865. mov r8, QWORD PTR [rcx]
  44866. mov QWORD PTR [rcx+-8], rax
  44867. sbb r8, QWORD PTR [r10]
  44868. mov rax, QWORD PTR [rcx+8]
  44869. mov QWORD PTR [rcx], r8
  44870. sbb rax, QWORD PTR [r10+8]
  44871. mov r8, QWORD PTR [rcx+16]
  44872. mov QWORD PTR [rcx+8], rax
  44873. sbb r8, QWORD PTR [r10+16]
  44874. mov rax, QWORD PTR [rcx+24]
  44875. mov QWORD PTR [rcx+16], r8
  44876. sbb rax, QWORD PTR [r10+24]
  44877. mov r8, QWORD PTR [rcx+32]
  44878. mov QWORD PTR [rcx+24], rax
  44879. sbb r8, QWORD PTR [r10+32]
  44880. mov rax, QWORD PTR [rcx+40]
  44881. mov QWORD PTR [rcx+32], r8
  44882. sbb rax, QWORD PTR [r10+40]
  44883. mov r8, QWORD PTR [rcx+48]
  44884. mov QWORD PTR [rcx+40], rax
  44885. sbb r8, QWORD PTR [r10+48]
  44886. mov rax, QWORD PTR [rcx+56]
  44887. mov QWORD PTR [rcx+48], r8
  44888. sbb rax, QWORD PTR [r10+56]
  44889. mov r8, QWORD PTR [rcx+64]
  44890. mov QWORD PTR [rcx+56], rax
  44891. sbb r8, QWORD PTR [r10+64]
  44892. mov rax, QWORD PTR [rcx+72]
  44893. mov QWORD PTR [rcx+64], r8
  44894. sbb rax, QWORD PTR [r10+72]
  44895. mov r8, QWORD PTR [rcx+80]
  44896. mov QWORD PTR [rcx+72], rax
  44897. sbb r8, QWORD PTR [r10+80]
  44898. mov rax, QWORD PTR [rcx+88]
  44899. mov QWORD PTR [rcx+80], r8
  44900. sbb rax, QWORD PTR [r10+88]
  44901. mov r8, QWORD PTR [rcx+96]
  44902. mov QWORD PTR [rcx+88], rax
  44903. sbb r8, QWORD PTR [r10+96]
  44904. mov rax, QWORD PTR [rcx+104]
  44905. mov QWORD PTR [rcx+96], r8
  44906. sbb rax, QWORD PTR [r10+104]
  44907. mov r8, QWORD PTR [rcx+112]
  44908. mov QWORD PTR [rcx+104], rax
  44909. sbb r8, QWORD PTR [r10+112]
  44910. mov rax, QWORD PTR [rcx+120]
  44911. mov QWORD PTR [rcx+112], r8
  44912. sbb rax, QWORD PTR [r10+120]
  44913. mov r8, QWORD PTR [rcx+128]
  44914. mov QWORD PTR [rcx+120], rax
  44915. sbb r8, QWORD PTR [r10+128]
  44916. mov rax, QWORD PTR [rcx+136]
  44917. mov QWORD PTR [rcx+128], r8
  44918. sbb rax, QWORD PTR [r10+136]
  44919. mov r8, QWORD PTR [rcx+144]
  44920. mov QWORD PTR [rcx+136], rax
  44921. sbb r8, QWORD PTR [r10+144]
  44922. mov rax, QWORD PTR [rcx+152]
  44923. mov QWORD PTR [rcx+144], r8
  44924. sbb rax, QWORD PTR [r10+152]
  44925. mov r8, QWORD PTR [rcx+160]
  44926. mov QWORD PTR [rcx+152], rax
  44927. sbb r8, QWORD PTR [r10+160]
  44928. mov rax, QWORD PTR [rcx+168]
  44929. mov QWORD PTR [rcx+160], r8
  44930. sbb rax, QWORD PTR [r10+168]
  44931. mov r8, QWORD PTR [rcx+176]
  44932. mov QWORD PTR [rcx+168], rax
  44933. sbb r8, QWORD PTR [r10+176]
  44934. mov rax, QWORD PTR [rcx+184]
  44935. mov QWORD PTR [rcx+176], r8
  44936. sbb rax, QWORD PTR [r10+184]
  44937. mov r8, QWORD PTR [rcx+192]
  44938. mov QWORD PTR [rcx+184], rax
  44939. sbb r8, QWORD PTR [r10+192]
  44940. mov rax, QWORD PTR [rcx+200]
  44941. mov QWORD PTR [rcx+192], r8
  44942. sbb rax, QWORD PTR [r10+200]
  44943. mov r8, QWORD PTR [rcx+208]
  44944. mov QWORD PTR [rcx+200], rax
  44945. sbb r8, QWORD PTR [r10+208]
  44946. mov rax, QWORD PTR [rcx+216]
  44947. mov QWORD PTR [rcx+208], r8
  44948. sbb rax, QWORD PTR [r10+216]
  44949. mov r8, QWORD PTR [rcx+224]
  44950. mov QWORD PTR [rcx+216], rax
  44951. sbb r8, QWORD PTR [r10+224]
  44952. mov rax, QWORD PTR [rcx+232]
  44953. mov QWORD PTR [rcx+224], r8
  44954. sbb rax, QWORD PTR [r10+232]
  44955. mov r8, QWORD PTR [rcx+240]
  44956. mov QWORD PTR [rcx+232], rax
  44957. sbb r8, QWORD PTR [r10+240]
  44958. mov rax, QWORD PTR [rcx+248]
  44959. mov QWORD PTR [rcx+240], r8
  44960. sbb rax, QWORD PTR [r10+248]
  44961. mov QWORD PTR [rcx+248], rax
  44962. sbb r9, 0
  44963. mov rcx, QWORD PTR [rsp+512]
  44964. add rcx, 768
  44965. ; Add in word
  44966. mov r8, QWORD PTR [rcx]
  44967. add r8, r9
  44968. mov rax, QWORD PTR [rcx+8]
  44969. mov QWORD PTR [rcx], r8
  44970. adc rax, 0
  44971. mov r8, QWORD PTR [rcx+16]
  44972. mov QWORD PTR [rcx+8], rax
  44973. adc r8, 0
  44974. mov rax, QWORD PTR [rcx+24]
  44975. mov QWORD PTR [rcx+16], r8
  44976. adc rax, 0
  44977. mov r8, QWORD PTR [rcx+32]
  44978. mov QWORD PTR [rcx+24], rax
  44979. adc r8, 0
  44980. mov rax, QWORD PTR [rcx+40]
  44981. mov QWORD PTR [rcx+32], r8
  44982. adc rax, 0
  44983. mov r8, QWORD PTR [rcx+48]
  44984. mov QWORD PTR [rcx+40], rax
  44985. adc r8, 0
  44986. mov rax, QWORD PTR [rcx+56]
  44987. mov QWORD PTR [rcx+48], r8
  44988. adc rax, 0
  44989. mov r8, QWORD PTR [rcx+64]
  44990. mov QWORD PTR [rcx+56], rax
  44991. adc r8, 0
  44992. mov rax, QWORD PTR [rcx+72]
  44993. mov QWORD PTR [rcx+64], r8
  44994. adc rax, 0
  44995. mov r8, QWORD PTR [rcx+80]
  44996. mov QWORD PTR [rcx+72], rax
  44997. adc r8, 0
  44998. mov rax, QWORD PTR [rcx+88]
  44999. mov QWORD PTR [rcx+80], r8
  45000. adc rax, 0
  45001. mov r8, QWORD PTR [rcx+96]
  45002. mov QWORD PTR [rcx+88], rax
  45003. adc r8, 0
  45004. mov rax, QWORD PTR [rcx+104]
  45005. mov QWORD PTR [rcx+96], r8
  45006. adc rax, 0
  45007. mov r8, QWORD PTR [rcx+112]
  45008. mov QWORD PTR [rcx+104], rax
  45009. adc r8, 0
  45010. mov rax, QWORD PTR [rcx+120]
  45011. mov QWORD PTR [rcx+112], r8
  45012. adc rax, 0
  45013. mov r8, QWORD PTR [rcx+128]
  45014. mov QWORD PTR [rcx+120], rax
  45015. adc r8, 0
  45016. mov rax, QWORD PTR [rcx+136]
  45017. mov QWORD PTR [rcx+128], r8
  45018. adc rax, 0
  45019. mov r8, QWORD PTR [rcx+144]
  45020. mov QWORD PTR [rcx+136], rax
  45021. adc r8, 0
  45022. mov rax, QWORD PTR [rcx+152]
  45023. mov QWORD PTR [rcx+144], r8
  45024. adc rax, 0
  45025. mov r8, QWORD PTR [rcx+160]
  45026. mov QWORD PTR [rcx+152], rax
  45027. adc r8, 0
  45028. mov rax, QWORD PTR [rcx+168]
  45029. mov QWORD PTR [rcx+160], r8
  45030. adc rax, 0
  45031. mov r8, QWORD PTR [rcx+176]
  45032. mov QWORD PTR [rcx+168], rax
  45033. adc r8, 0
  45034. mov rax, QWORD PTR [rcx+184]
  45035. mov QWORD PTR [rcx+176], r8
  45036. adc rax, 0
  45037. mov r8, QWORD PTR [rcx+192]
  45038. mov QWORD PTR [rcx+184], rax
  45039. adc r8, 0
  45040. mov rax, QWORD PTR [rcx+200]
  45041. mov QWORD PTR [rcx+192], r8
  45042. adc rax, 0
  45043. mov r8, QWORD PTR [rcx+208]
  45044. mov QWORD PTR [rcx+200], rax
  45045. adc r8, 0
  45046. mov rax, QWORD PTR [rcx+216]
  45047. mov QWORD PTR [rcx+208], r8
  45048. adc rax, 0
  45049. mov r8, QWORD PTR [rcx+224]
  45050. mov QWORD PTR [rcx+216], rax
  45051. adc r8, 0
  45052. mov rax, QWORD PTR [rcx+232]
  45053. mov QWORD PTR [rcx+224], r8
  45054. adc rax, 0
  45055. mov r8, QWORD PTR [rcx+240]
  45056. mov QWORD PTR [rcx+232], rax
  45057. adc r8, 0
  45058. mov rax, QWORD PTR [rcx+248]
  45059. mov QWORD PTR [rcx+240], r8
  45060. adc rax, 0
  45061. mov QWORD PTR [rcx+248], rax
  45062. mov rdx, QWORD PTR [rsp+520]
  45063. mov rcx, QWORD PTR [rsp+512]
  45064. add rsp, 528
  45065. ret
  45066. sp_4096_sqr_64 ENDP
  45067. _text ENDS
  45068. IFDEF HAVE_INTEL_AVX2
  45069. ; /* Square a and put result in r. (r = a * a)
  45070. ; *
  45071. ; * Karatsuba: ah^2, al^2, (al - ah)^2
  45072. ; *
  45073. ; * r A single precision integer.
  45074. ; * a A single precision integer.
  45075. ; */
  45076. _text SEGMENT READONLY PARA
  45077. sp_4096_sqr_avx2_64 PROC
  45078. sub rsp, 528
  45079. mov QWORD PTR [rsp+512], rcx
  45080. mov QWORD PTR [rsp+520], rdx
  45081. mov r9, 0
  45082. mov r10, rsp
  45083. lea r11, QWORD PTR [rdx+256]
  45084. mov rax, QWORD PTR [rdx]
  45085. sub rax, QWORD PTR [r11]
  45086. mov r8, QWORD PTR [rdx+8]
  45087. mov QWORD PTR [r10], rax
  45088. sbb r8, QWORD PTR [r11+8]
  45089. mov rax, QWORD PTR [rdx+16]
  45090. mov QWORD PTR [r10+8], r8
  45091. sbb rax, QWORD PTR [r11+16]
  45092. mov r8, QWORD PTR [rdx+24]
  45093. mov QWORD PTR [r10+16], rax
  45094. sbb r8, QWORD PTR [r11+24]
  45095. mov rax, QWORD PTR [rdx+32]
  45096. mov QWORD PTR [r10+24], r8
  45097. sbb rax, QWORD PTR [r11+32]
  45098. mov r8, QWORD PTR [rdx+40]
  45099. mov QWORD PTR [r10+32], rax
  45100. sbb r8, QWORD PTR [r11+40]
  45101. mov rax, QWORD PTR [rdx+48]
  45102. mov QWORD PTR [r10+40], r8
  45103. sbb rax, QWORD PTR [r11+48]
  45104. mov r8, QWORD PTR [rdx+56]
  45105. mov QWORD PTR [r10+48], rax
  45106. sbb r8, QWORD PTR [r11+56]
  45107. mov rax, QWORD PTR [rdx+64]
  45108. mov QWORD PTR [r10+56], r8
  45109. sbb rax, QWORD PTR [r11+64]
  45110. mov r8, QWORD PTR [rdx+72]
  45111. mov QWORD PTR [r10+64], rax
  45112. sbb r8, QWORD PTR [r11+72]
  45113. mov rax, QWORD PTR [rdx+80]
  45114. mov QWORD PTR [r10+72], r8
  45115. sbb rax, QWORD PTR [r11+80]
  45116. mov r8, QWORD PTR [rdx+88]
  45117. mov QWORD PTR [r10+80], rax
  45118. sbb r8, QWORD PTR [r11+88]
  45119. mov rax, QWORD PTR [rdx+96]
  45120. mov QWORD PTR [r10+88], r8
  45121. sbb rax, QWORD PTR [r11+96]
  45122. mov r8, QWORD PTR [rdx+104]
  45123. mov QWORD PTR [r10+96], rax
  45124. sbb r8, QWORD PTR [r11+104]
  45125. mov rax, QWORD PTR [rdx+112]
  45126. mov QWORD PTR [r10+104], r8
  45127. sbb rax, QWORD PTR [r11+112]
  45128. mov r8, QWORD PTR [rdx+120]
  45129. mov QWORD PTR [r10+112], rax
  45130. sbb r8, QWORD PTR [r11+120]
  45131. mov rax, QWORD PTR [rdx+128]
  45132. mov QWORD PTR [r10+120], r8
  45133. sbb rax, QWORD PTR [r11+128]
  45134. mov r8, QWORD PTR [rdx+136]
  45135. mov QWORD PTR [r10+128], rax
  45136. sbb r8, QWORD PTR [r11+136]
  45137. mov rax, QWORD PTR [rdx+144]
  45138. mov QWORD PTR [r10+136], r8
  45139. sbb rax, QWORD PTR [r11+144]
  45140. mov r8, QWORD PTR [rdx+152]
  45141. mov QWORD PTR [r10+144], rax
  45142. sbb r8, QWORD PTR [r11+152]
  45143. mov rax, QWORD PTR [rdx+160]
  45144. mov QWORD PTR [r10+152], r8
  45145. sbb rax, QWORD PTR [r11+160]
  45146. mov r8, QWORD PTR [rdx+168]
  45147. mov QWORD PTR [r10+160], rax
  45148. sbb r8, QWORD PTR [r11+168]
  45149. mov rax, QWORD PTR [rdx+176]
  45150. mov QWORD PTR [r10+168], r8
  45151. sbb rax, QWORD PTR [r11+176]
  45152. mov r8, QWORD PTR [rdx+184]
  45153. mov QWORD PTR [r10+176], rax
  45154. sbb r8, QWORD PTR [r11+184]
  45155. mov rax, QWORD PTR [rdx+192]
  45156. mov QWORD PTR [r10+184], r8
  45157. sbb rax, QWORD PTR [r11+192]
  45158. mov r8, QWORD PTR [rdx+200]
  45159. mov QWORD PTR [r10+192], rax
  45160. sbb r8, QWORD PTR [r11+200]
  45161. mov rax, QWORD PTR [rdx+208]
  45162. mov QWORD PTR [r10+200], r8
  45163. sbb rax, QWORD PTR [r11+208]
  45164. mov r8, QWORD PTR [rdx+216]
  45165. mov QWORD PTR [r10+208], rax
  45166. sbb r8, QWORD PTR [r11+216]
  45167. mov rax, QWORD PTR [rdx+224]
  45168. mov QWORD PTR [r10+216], r8
  45169. sbb rax, QWORD PTR [r11+224]
  45170. mov r8, QWORD PTR [rdx+232]
  45171. mov QWORD PTR [r10+224], rax
  45172. sbb r8, QWORD PTR [r11+232]
  45173. mov rax, QWORD PTR [rdx+240]
  45174. mov QWORD PTR [r10+232], r8
  45175. sbb rax, QWORD PTR [r11+240]
  45176. mov r8, QWORD PTR [rdx+248]
  45177. mov QWORD PTR [r10+240], rax
  45178. sbb r8, QWORD PTR [r11+248]
  45179. mov QWORD PTR [r10+248], r8
  45180. sbb r9, 0
  45181. ; Cond Negate
  45182. mov rax, QWORD PTR [r10]
  45183. mov r11, r9
  45184. xor rax, r9
  45185. neg r11
  45186. sub rax, r9
  45187. mov r8, QWORD PTR [r10+8]
  45188. sbb r11, 0
  45189. mov QWORD PTR [r10], rax
  45190. xor r8, r9
  45191. add r8, r11
  45192. mov rax, QWORD PTR [r10+16]
  45193. setc r11b
  45194. mov QWORD PTR [r10+8], r8
  45195. xor rax, r9
  45196. add rax, r11
  45197. mov r8, QWORD PTR [r10+24]
  45198. setc r11b
  45199. mov QWORD PTR [r10+16], rax
  45200. xor r8, r9
  45201. add r8, r11
  45202. mov rax, QWORD PTR [r10+32]
  45203. setc r11b
  45204. mov QWORD PTR [r10+24], r8
  45205. xor rax, r9
  45206. add rax, r11
  45207. mov r8, QWORD PTR [r10+40]
  45208. setc r11b
  45209. mov QWORD PTR [r10+32], rax
  45210. xor r8, r9
  45211. add r8, r11
  45212. mov rax, QWORD PTR [r10+48]
  45213. setc r11b
  45214. mov QWORD PTR [r10+40], r8
  45215. xor rax, r9
  45216. add rax, r11
  45217. mov r8, QWORD PTR [r10+56]
  45218. setc r11b
  45219. mov QWORD PTR [r10+48], rax
  45220. xor r8, r9
  45221. add r8, r11
  45222. mov rax, QWORD PTR [r10+64]
  45223. setc r11b
  45224. mov QWORD PTR [r10+56], r8
  45225. xor rax, r9
  45226. add rax, r11
  45227. mov r8, QWORD PTR [r10+72]
  45228. setc r11b
  45229. mov QWORD PTR [r10+64], rax
  45230. xor r8, r9
  45231. add r8, r11
  45232. mov rax, QWORD PTR [r10+80]
  45233. setc r11b
  45234. mov QWORD PTR [r10+72], r8
  45235. xor rax, r9
  45236. add rax, r11
  45237. mov r8, QWORD PTR [r10+88]
  45238. setc r11b
  45239. mov QWORD PTR [r10+80], rax
  45240. xor r8, r9
  45241. add r8, r11
  45242. mov rax, QWORD PTR [r10+96]
  45243. setc r11b
  45244. mov QWORD PTR [r10+88], r8
  45245. xor rax, r9
  45246. add rax, r11
  45247. mov r8, QWORD PTR [r10+104]
  45248. setc r11b
  45249. mov QWORD PTR [r10+96], rax
  45250. xor r8, r9
  45251. add r8, r11
  45252. mov rax, QWORD PTR [r10+112]
  45253. setc r11b
  45254. mov QWORD PTR [r10+104], r8
  45255. xor rax, r9
  45256. add rax, r11
  45257. mov r8, QWORD PTR [r10+120]
  45258. setc r11b
  45259. mov QWORD PTR [r10+112], rax
  45260. xor r8, r9
  45261. add r8, r11
  45262. mov rax, QWORD PTR [r10+128]
  45263. setc r11b
  45264. mov QWORD PTR [r10+120], r8
  45265. xor rax, r9
  45266. add rax, r11
  45267. mov r8, QWORD PTR [r10+136]
  45268. setc r11b
  45269. mov QWORD PTR [r10+128], rax
  45270. xor r8, r9
  45271. add r8, r11
  45272. mov rax, QWORD PTR [r10+144]
  45273. setc r11b
  45274. mov QWORD PTR [r10+136], r8
  45275. xor rax, r9
  45276. add rax, r11
  45277. mov r8, QWORD PTR [r10+152]
  45278. setc r11b
  45279. mov QWORD PTR [r10+144], rax
  45280. xor r8, r9
  45281. add r8, r11
  45282. mov rax, QWORD PTR [r10+160]
  45283. setc r11b
  45284. mov QWORD PTR [r10+152], r8
  45285. xor rax, r9
  45286. add rax, r11
  45287. mov r8, QWORD PTR [r10+168]
  45288. setc r11b
  45289. mov QWORD PTR [r10+160], rax
  45290. xor r8, r9
  45291. add r8, r11
  45292. mov rax, QWORD PTR [r10+176]
  45293. setc r11b
  45294. mov QWORD PTR [r10+168], r8
  45295. xor rax, r9
  45296. add rax, r11
  45297. mov r8, QWORD PTR [r10+184]
  45298. setc r11b
  45299. mov QWORD PTR [r10+176], rax
  45300. xor r8, r9
  45301. add r8, r11
  45302. mov rax, QWORD PTR [r10+192]
  45303. setc r11b
  45304. mov QWORD PTR [r10+184], r8
  45305. xor rax, r9
  45306. add rax, r11
  45307. mov r8, QWORD PTR [r10+200]
  45308. setc r11b
  45309. mov QWORD PTR [r10+192], rax
  45310. xor r8, r9
  45311. add r8, r11
  45312. mov rax, QWORD PTR [r10+208]
  45313. setc r11b
  45314. mov QWORD PTR [r10+200], r8
  45315. xor rax, r9
  45316. add rax, r11
  45317. mov r8, QWORD PTR [r10+216]
  45318. setc r11b
  45319. mov QWORD PTR [r10+208], rax
  45320. xor r8, r9
  45321. add r8, r11
  45322. mov rax, QWORD PTR [r10+224]
  45323. setc r11b
  45324. mov QWORD PTR [r10+216], r8
  45325. xor rax, r9
  45326. add rax, r11
  45327. mov r8, QWORD PTR [r10+232]
  45328. setc r11b
  45329. mov QWORD PTR [r10+224], rax
  45330. xor r8, r9
  45331. add r8, r11
  45332. mov rax, QWORD PTR [r10+240]
  45333. setc r11b
  45334. mov QWORD PTR [r10+232], r8
  45335. xor rax, r9
  45336. add rax, r11
  45337. mov r8, QWORD PTR [r10+248]
  45338. setc r11b
  45339. mov QWORD PTR [r10+240], rax
  45340. xor r8, r9
  45341. add r8, r11
  45342. mov QWORD PTR [r10+248], r8
  45343. mov rdx, r10
  45344. mov rcx, rsp
  45345. call sp_2048_sqr_avx2_32
  45346. mov rdx, QWORD PTR [rsp+520]
  45347. mov rcx, QWORD PTR [rsp+512]
  45348. add rdx, 256
  45349. add rcx, 512
  45350. call sp_2048_sqr_avx2_32
  45351. mov rdx, QWORD PTR [rsp+520]
  45352. mov rcx, QWORD PTR [rsp+512]
  45353. call sp_2048_sqr_avx2_32
  45354. IFDEF _WIN64
  45355. mov rdx, QWORD PTR [rsp+520]
  45356. mov rcx, QWORD PTR [rsp+512]
  45357. ENDIF
  45358. mov rdx, QWORD PTR [rsp+512]
  45359. lea r10, QWORD PTR [rsp+256]
  45360. add rdx, 768
  45361. mov r9, 0
  45362. mov r8, QWORD PTR [r10+-256]
  45363. sub r8, QWORD PTR [rdx+-256]
  45364. mov rax, QWORD PTR [r10+-248]
  45365. mov QWORD PTR [r10+-256], r8
  45366. sbb rax, QWORD PTR [rdx+-248]
  45367. mov r8, QWORD PTR [r10+-240]
  45368. mov QWORD PTR [r10+-248], rax
  45369. sbb r8, QWORD PTR [rdx+-240]
  45370. mov rax, QWORD PTR [r10+-232]
  45371. mov QWORD PTR [r10+-240], r8
  45372. sbb rax, QWORD PTR [rdx+-232]
  45373. mov r8, QWORD PTR [r10+-224]
  45374. mov QWORD PTR [r10+-232], rax
  45375. sbb r8, QWORD PTR [rdx+-224]
  45376. mov rax, QWORD PTR [r10+-216]
  45377. mov QWORD PTR [r10+-224], r8
  45378. sbb rax, QWORD PTR [rdx+-216]
  45379. mov r8, QWORD PTR [r10+-208]
  45380. mov QWORD PTR [r10+-216], rax
  45381. sbb r8, QWORD PTR [rdx+-208]
  45382. mov rax, QWORD PTR [r10+-200]
  45383. mov QWORD PTR [r10+-208], r8
  45384. sbb rax, QWORD PTR [rdx+-200]
  45385. mov r8, QWORD PTR [r10+-192]
  45386. mov QWORD PTR [r10+-200], rax
  45387. sbb r8, QWORD PTR [rdx+-192]
  45388. mov rax, QWORD PTR [r10+-184]
  45389. mov QWORD PTR [r10+-192], r8
  45390. sbb rax, QWORD PTR [rdx+-184]
  45391. mov r8, QWORD PTR [r10+-176]
  45392. mov QWORD PTR [r10+-184], rax
  45393. sbb r8, QWORD PTR [rdx+-176]
  45394. mov rax, QWORD PTR [r10+-168]
  45395. mov QWORD PTR [r10+-176], r8
  45396. sbb rax, QWORD PTR [rdx+-168]
  45397. mov r8, QWORD PTR [r10+-160]
  45398. mov QWORD PTR [r10+-168], rax
  45399. sbb r8, QWORD PTR [rdx+-160]
  45400. mov rax, QWORD PTR [r10+-152]
  45401. mov QWORD PTR [r10+-160], r8
  45402. sbb rax, QWORD PTR [rdx+-152]
  45403. mov r8, QWORD PTR [r10+-144]
  45404. mov QWORD PTR [r10+-152], rax
  45405. sbb r8, QWORD PTR [rdx+-144]
  45406. mov rax, QWORD PTR [r10+-136]
  45407. mov QWORD PTR [r10+-144], r8
  45408. sbb rax, QWORD PTR [rdx+-136]
  45409. mov r8, QWORD PTR [r10+-128]
  45410. mov QWORD PTR [r10+-136], rax
  45411. sbb r8, QWORD PTR [rdx+-128]
  45412. mov rax, QWORD PTR [r10+-120]
  45413. mov QWORD PTR [r10+-128], r8
  45414. sbb rax, QWORD PTR [rdx+-120]
  45415. mov r8, QWORD PTR [r10+-112]
  45416. mov QWORD PTR [r10+-120], rax
  45417. sbb r8, QWORD PTR [rdx+-112]
  45418. mov rax, QWORD PTR [r10+-104]
  45419. mov QWORD PTR [r10+-112], r8
  45420. sbb rax, QWORD PTR [rdx+-104]
  45421. mov r8, QWORD PTR [r10+-96]
  45422. mov QWORD PTR [r10+-104], rax
  45423. sbb r8, QWORD PTR [rdx+-96]
  45424. mov rax, QWORD PTR [r10+-88]
  45425. mov QWORD PTR [r10+-96], r8
  45426. sbb rax, QWORD PTR [rdx+-88]
  45427. mov r8, QWORD PTR [r10+-80]
  45428. mov QWORD PTR [r10+-88], rax
  45429. sbb r8, QWORD PTR [rdx+-80]
  45430. mov rax, QWORD PTR [r10+-72]
  45431. mov QWORD PTR [r10+-80], r8
  45432. sbb rax, QWORD PTR [rdx+-72]
  45433. mov r8, QWORD PTR [r10+-64]
  45434. mov QWORD PTR [r10+-72], rax
  45435. sbb r8, QWORD PTR [rdx+-64]
  45436. mov rax, QWORD PTR [r10+-56]
  45437. mov QWORD PTR [r10+-64], r8
  45438. sbb rax, QWORD PTR [rdx+-56]
  45439. mov r8, QWORD PTR [r10+-48]
  45440. mov QWORD PTR [r10+-56], rax
  45441. sbb r8, QWORD PTR [rdx+-48]
  45442. mov rax, QWORD PTR [r10+-40]
  45443. mov QWORD PTR [r10+-48], r8
  45444. sbb rax, QWORD PTR [rdx+-40]
  45445. mov r8, QWORD PTR [r10+-32]
  45446. mov QWORD PTR [r10+-40], rax
  45447. sbb r8, QWORD PTR [rdx+-32]
  45448. mov rax, QWORD PTR [r10+-24]
  45449. mov QWORD PTR [r10+-32], r8
  45450. sbb rax, QWORD PTR [rdx+-24]
  45451. mov r8, QWORD PTR [r10+-16]
  45452. mov QWORD PTR [r10+-24], rax
  45453. sbb r8, QWORD PTR [rdx+-16]
  45454. mov rax, QWORD PTR [r10+-8]
  45455. mov QWORD PTR [r10+-16], r8
  45456. sbb rax, QWORD PTR [rdx+-8]
  45457. mov r8, QWORD PTR [r10]
  45458. mov QWORD PTR [r10+-8], rax
  45459. sbb r8, QWORD PTR [rdx]
  45460. mov rax, QWORD PTR [r10+8]
  45461. mov QWORD PTR [r10], r8
  45462. sbb rax, QWORD PTR [rdx+8]
  45463. mov r8, QWORD PTR [r10+16]
  45464. mov QWORD PTR [r10+8], rax
  45465. sbb r8, QWORD PTR [rdx+16]
  45466. mov rax, QWORD PTR [r10+24]
  45467. mov QWORD PTR [r10+16], r8
  45468. sbb rax, QWORD PTR [rdx+24]
  45469. mov r8, QWORD PTR [r10+32]
  45470. mov QWORD PTR [r10+24], rax
  45471. sbb r8, QWORD PTR [rdx+32]
  45472. mov rax, QWORD PTR [r10+40]
  45473. mov QWORD PTR [r10+32], r8
  45474. sbb rax, QWORD PTR [rdx+40]
  45475. mov r8, QWORD PTR [r10+48]
  45476. mov QWORD PTR [r10+40], rax
  45477. sbb r8, QWORD PTR [rdx+48]
  45478. mov rax, QWORD PTR [r10+56]
  45479. mov QWORD PTR [r10+48], r8
  45480. sbb rax, QWORD PTR [rdx+56]
  45481. mov r8, QWORD PTR [r10+64]
  45482. mov QWORD PTR [r10+56], rax
  45483. sbb r8, QWORD PTR [rdx+64]
  45484. mov rax, QWORD PTR [r10+72]
  45485. mov QWORD PTR [r10+64], r8
  45486. sbb rax, QWORD PTR [rdx+72]
  45487. mov r8, QWORD PTR [r10+80]
  45488. mov QWORD PTR [r10+72], rax
  45489. sbb r8, QWORD PTR [rdx+80]
  45490. mov rax, QWORD PTR [r10+88]
  45491. mov QWORD PTR [r10+80], r8
  45492. sbb rax, QWORD PTR [rdx+88]
  45493. mov r8, QWORD PTR [r10+96]
  45494. mov QWORD PTR [r10+88], rax
  45495. sbb r8, QWORD PTR [rdx+96]
  45496. mov rax, QWORD PTR [r10+104]
  45497. mov QWORD PTR [r10+96], r8
  45498. sbb rax, QWORD PTR [rdx+104]
  45499. mov r8, QWORD PTR [r10+112]
  45500. mov QWORD PTR [r10+104], rax
  45501. sbb r8, QWORD PTR [rdx+112]
  45502. mov rax, QWORD PTR [r10+120]
  45503. mov QWORD PTR [r10+112], r8
  45504. sbb rax, QWORD PTR [rdx+120]
  45505. mov r8, QWORD PTR [r10+128]
  45506. mov QWORD PTR [r10+120], rax
  45507. sbb r8, QWORD PTR [rdx+128]
  45508. mov rax, QWORD PTR [r10+136]
  45509. mov QWORD PTR [r10+128], r8
  45510. sbb rax, QWORD PTR [rdx+136]
  45511. mov r8, QWORD PTR [r10+144]
  45512. mov QWORD PTR [r10+136], rax
  45513. sbb r8, QWORD PTR [rdx+144]
  45514. mov rax, QWORD PTR [r10+152]
  45515. mov QWORD PTR [r10+144], r8
  45516. sbb rax, QWORD PTR [rdx+152]
  45517. mov r8, QWORD PTR [r10+160]
  45518. mov QWORD PTR [r10+152], rax
  45519. sbb r8, QWORD PTR [rdx+160]
  45520. mov rax, QWORD PTR [r10+168]
  45521. mov QWORD PTR [r10+160], r8
  45522. sbb rax, QWORD PTR [rdx+168]
  45523. mov r8, QWORD PTR [r10+176]
  45524. mov QWORD PTR [r10+168], rax
  45525. sbb r8, QWORD PTR [rdx+176]
  45526. mov rax, QWORD PTR [r10+184]
  45527. mov QWORD PTR [r10+176], r8
  45528. sbb rax, QWORD PTR [rdx+184]
  45529. mov r8, QWORD PTR [r10+192]
  45530. mov QWORD PTR [r10+184], rax
  45531. sbb r8, QWORD PTR [rdx+192]
  45532. mov rax, QWORD PTR [r10+200]
  45533. mov QWORD PTR [r10+192], r8
  45534. sbb rax, QWORD PTR [rdx+200]
  45535. mov r8, QWORD PTR [r10+208]
  45536. mov QWORD PTR [r10+200], rax
  45537. sbb r8, QWORD PTR [rdx+208]
  45538. mov rax, QWORD PTR [r10+216]
  45539. mov QWORD PTR [r10+208], r8
  45540. sbb rax, QWORD PTR [rdx+216]
  45541. mov r8, QWORD PTR [r10+224]
  45542. mov QWORD PTR [r10+216], rax
  45543. sbb r8, QWORD PTR [rdx+224]
  45544. mov rax, QWORD PTR [r10+232]
  45545. mov QWORD PTR [r10+224], r8
  45546. sbb rax, QWORD PTR [rdx+232]
  45547. mov r8, QWORD PTR [r10+240]
  45548. mov QWORD PTR [r10+232], rax
  45549. sbb r8, QWORD PTR [rdx+240]
  45550. mov rax, QWORD PTR [r10+248]
  45551. mov QWORD PTR [r10+240], r8
  45552. sbb rax, QWORD PTR [rdx+248]
  45553. mov QWORD PTR [r10+248], rax
  45554. sbb r9, 0
  45555. sub rdx, 512
  45556. mov r8, QWORD PTR [r10+-256]
  45557. sub r8, QWORD PTR [rdx+-256]
  45558. mov rax, QWORD PTR [r10+-248]
  45559. mov QWORD PTR [r10+-256], r8
  45560. sbb rax, QWORD PTR [rdx+-248]
  45561. mov r8, QWORD PTR [r10+-240]
  45562. mov QWORD PTR [r10+-248], rax
  45563. sbb r8, QWORD PTR [rdx+-240]
  45564. mov rax, QWORD PTR [r10+-232]
  45565. mov QWORD PTR [r10+-240], r8
  45566. sbb rax, QWORD PTR [rdx+-232]
  45567. mov r8, QWORD PTR [r10+-224]
  45568. mov QWORD PTR [r10+-232], rax
  45569. sbb r8, QWORD PTR [rdx+-224]
  45570. mov rax, QWORD PTR [r10+-216]
  45571. mov QWORD PTR [r10+-224], r8
  45572. sbb rax, QWORD PTR [rdx+-216]
  45573. mov r8, QWORD PTR [r10+-208]
  45574. mov QWORD PTR [r10+-216], rax
  45575. sbb r8, QWORD PTR [rdx+-208]
  45576. mov rax, QWORD PTR [r10+-200]
  45577. mov QWORD PTR [r10+-208], r8
  45578. sbb rax, QWORD PTR [rdx+-200]
  45579. mov r8, QWORD PTR [r10+-192]
  45580. mov QWORD PTR [r10+-200], rax
  45581. sbb r8, QWORD PTR [rdx+-192]
  45582. mov rax, QWORD PTR [r10+-184]
  45583. mov QWORD PTR [r10+-192], r8
  45584. sbb rax, QWORD PTR [rdx+-184]
  45585. mov r8, QWORD PTR [r10+-176]
  45586. mov QWORD PTR [r10+-184], rax
  45587. sbb r8, QWORD PTR [rdx+-176]
  45588. mov rax, QWORD PTR [r10+-168]
  45589. mov QWORD PTR [r10+-176], r8
  45590. sbb rax, QWORD PTR [rdx+-168]
  45591. mov r8, QWORD PTR [r10+-160]
  45592. mov QWORD PTR [r10+-168], rax
  45593. sbb r8, QWORD PTR [rdx+-160]
  45594. mov rax, QWORD PTR [r10+-152]
  45595. mov QWORD PTR [r10+-160], r8
  45596. sbb rax, QWORD PTR [rdx+-152]
  45597. mov r8, QWORD PTR [r10+-144]
  45598. mov QWORD PTR [r10+-152], rax
  45599. sbb r8, QWORD PTR [rdx+-144]
  45600. mov rax, QWORD PTR [r10+-136]
  45601. mov QWORD PTR [r10+-144], r8
  45602. sbb rax, QWORD PTR [rdx+-136]
  45603. mov r8, QWORD PTR [r10+-128]
  45604. mov QWORD PTR [r10+-136], rax
  45605. sbb r8, QWORD PTR [rdx+-128]
  45606. mov rax, QWORD PTR [r10+-120]
  45607. mov QWORD PTR [r10+-128], r8
  45608. sbb rax, QWORD PTR [rdx+-120]
  45609. mov r8, QWORD PTR [r10+-112]
  45610. mov QWORD PTR [r10+-120], rax
  45611. sbb r8, QWORD PTR [rdx+-112]
  45612. mov rax, QWORD PTR [r10+-104]
  45613. mov QWORD PTR [r10+-112], r8
  45614. sbb rax, QWORD PTR [rdx+-104]
  45615. mov r8, QWORD PTR [r10+-96]
  45616. mov QWORD PTR [r10+-104], rax
  45617. sbb r8, QWORD PTR [rdx+-96]
  45618. mov rax, QWORD PTR [r10+-88]
  45619. mov QWORD PTR [r10+-96], r8
  45620. sbb rax, QWORD PTR [rdx+-88]
  45621. mov r8, QWORD PTR [r10+-80]
  45622. mov QWORD PTR [r10+-88], rax
  45623. sbb r8, QWORD PTR [rdx+-80]
  45624. mov rax, QWORD PTR [r10+-72]
  45625. mov QWORD PTR [r10+-80], r8
  45626. sbb rax, QWORD PTR [rdx+-72]
  45627. mov r8, QWORD PTR [r10+-64]
  45628. mov QWORD PTR [r10+-72], rax
  45629. sbb r8, QWORD PTR [rdx+-64]
  45630. mov rax, QWORD PTR [r10+-56]
  45631. mov QWORD PTR [r10+-64], r8
  45632. sbb rax, QWORD PTR [rdx+-56]
  45633. mov r8, QWORD PTR [r10+-48]
  45634. mov QWORD PTR [r10+-56], rax
  45635. sbb r8, QWORD PTR [rdx+-48]
  45636. mov rax, QWORD PTR [r10+-40]
  45637. mov QWORD PTR [r10+-48], r8
  45638. sbb rax, QWORD PTR [rdx+-40]
  45639. mov r8, QWORD PTR [r10+-32]
  45640. mov QWORD PTR [r10+-40], rax
  45641. sbb r8, QWORD PTR [rdx+-32]
  45642. mov rax, QWORD PTR [r10+-24]
  45643. mov QWORD PTR [r10+-32], r8
  45644. sbb rax, QWORD PTR [rdx+-24]
  45645. mov r8, QWORD PTR [r10+-16]
  45646. mov QWORD PTR [r10+-24], rax
  45647. sbb r8, QWORD PTR [rdx+-16]
  45648. mov rax, QWORD PTR [r10+-8]
  45649. mov QWORD PTR [r10+-16], r8
  45650. sbb rax, QWORD PTR [rdx+-8]
  45651. mov r8, QWORD PTR [r10]
  45652. mov QWORD PTR [r10+-8], rax
  45653. sbb r8, QWORD PTR [rdx]
  45654. mov rax, QWORD PTR [r10+8]
  45655. mov QWORD PTR [r10], r8
  45656. sbb rax, QWORD PTR [rdx+8]
  45657. mov r8, QWORD PTR [r10+16]
  45658. mov QWORD PTR [r10+8], rax
  45659. sbb r8, QWORD PTR [rdx+16]
  45660. mov rax, QWORD PTR [r10+24]
  45661. mov QWORD PTR [r10+16], r8
  45662. sbb rax, QWORD PTR [rdx+24]
  45663. mov r8, QWORD PTR [r10+32]
  45664. mov QWORD PTR [r10+24], rax
  45665. sbb r8, QWORD PTR [rdx+32]
  45666. mov rax, QWORD PTR [r10+40]
  45667. mov QWORD PTR [r10+32], r8
  45668. sbb rax, QWORD PTR [rdx+40]
  45669. mov r8, QWORD PTR [r10+48]
  45670. mov QWORD PTR [r10+40], rax
  45671. sbb r8, QWORD PTR [rdx+48]
  45672. mov rax, QWORD PTR [r10+56]
  45673. mov QWORD PTR [r10+48], r8
  45674. sbb rax, QWORD PTR [rdx+56]
  45675. mov r8, QWORD PTR [r10+64]
  45676. mov QWORD PTR [r10+56], rax
  45677. sbb r8, QWORD PTR [rdx+64]
  45678. mov rax, QWORD PTR [r10+72]
  45679. mov QWORD PTR [r10+64], r8
  45680. sbb rax, QWORD PTR [rdx+72]
  45681. mov r8, QWORD PTR [r10+80]
  45682. mov QWORD PTR [r10+72], rax
  45683. sbb r8, QWORD PTR [rdx+80]
  45684. mov rax, QWORD PTR [r10+88]
  45685. mov QWORD PTR [r10+80], r8
  45686. sbb rax, QWORD PTR [rdx+88]
  45687. mov r8, QWORD PTR [r10+96]
  45688. mov QWORD PTR [r10+88], rax
  45689. sbb r8, QWORD PTR [rdx+96]
  45690. mov rax, QWORD PTR [r10+104]
  45691. mov QWORD PTR [r10+96], r8
  45692. sbb rax, QWORD PTR [rdx+104]
  45693. mov r8, QWORD PTR [r10+112]
  45694. mov QWORD PTR [r10+104], rax
  45695. sbb r8, QWORD PTR [rdx+112]
  45696. mov rax, QWORD PTR [r10+120]
  45697. mov QWORD PTR [r10+112], r8
  45698. sbb rax, QWORD PTR [rdx+120]
  45699. mov r8, QWORD PTR [r10+128]
  45700. mov QWORD PTR [r10+120], rax
  45701. sbb r8, QWORD PTR [rdx+128]
  45702. mov rax, QWORD PTR [r10+136]
  45703. mov QWORD PTR [r10+128], r8
  45704. sbb rax, QWORD PTR [rdx+136]
  45705. mov r8, QWORD PTR [r10+144]
  45706. mov QWORD PTR [r10+136], rax
  45707. sbb r8, QWORD PTR [rdx+144]
  45708. mov rax, QWORD PTR [r10+152]
  45709. mov QWORD PTR [r10+144], r8
  45710. sbb rax, QWORD PTR [rdx+152]
  45711. mov r8, QWORD PTR [r10+160]
  45712. mov QWORD PTR [r10+152], rax
  45713. sbb r8, QWORD PTR [rdx+160]
  45714. mov rax, QWORD PTR [r10+168]
  45715. mov QWORD PTR [r10+160], r8
  45716. sbb rax, QWORD PTR [rdx+168]
  45717. mov r8, QWORD PTR [r10+176]
  45718. mov QWORD PTR [r10+168], rax
  45719. sbb r8, QWORD PTR [rdx+176]
  45720. mov rax, QWORD PTR [r10+184]
  45721. mov QWORD PTR [r10+176], r8
  45722. sbb rax, QWORD PTR [rdx+184]
  45723. mov r8, QWORD PTR [r10+192]
  45724. mov QWORD PTR [r10+184], rax
  45725. sbb r8, QWORD PTR [rdx+192]
  45726. mov rax, QWORD PTR [r10+200]
  45727. mov QWORD PTR [r10+192], r8
  45728. sbb rax, QWORD PTR [rdx+200]
  45729. mov r8, QWORD PTR [r10+208]
  45730. mov QWORD PTR [r10+200], rax
  45731. sbb r8, QWORD PTR [rdx+208]
  45732. mov rax, QWORD PTR [r10+216]
  45733. mov QWORD PTR [r10+208], r8
  45734. sbb rax, QWORD PTR [rdx+216]
  45735. mov r8, QWORD PTR [r10+224]
  45736. mov QWORD PTR [r10+216], rax
  45737. sbb r8, QWORD PTR [rdx+224]
  45738. mov rax, QWORD PTR [r10+232]
  45739. mov QWORD PTR [r10+224], r8
  45740. sbb rax, QWORD PTR [rdx+232]
  45741. mov r8, QWORD PTR [r10+240]
  45742. mov QWORD PTR [r10+232], rax
  45743. sbb r8, QWORD PTR [rdx+240]
  45744. mov rax, QWORD PTR [r10+248]
  45745. mov QWORD PTR [r10+240], r8
  45746. sbb rax, QWORD PTR [rdx+248]
  45747. mov QWORD PTR [r10+248], rax
  45748. sbb r9, 0
  45749. mov rcx, QWORD PTR [rsp+512]
  45750. neg r9
  45751. add rcx, 512
  45752. mov r8, QWORD PTR [rcx+-256]
  45753. sub r8, QWORD PTR [r10+-256]
  45754. mov rax, QWORD PTR [rcx+-248]
  45755. mov QWORD PTR [rcx+-256], r8
  45756. sbb rax, QWORD PTR [r10+-248]
  45757. mov r8, QWORD PTR [rcx+-240]
  45758. mov QWORD PTR [rcx+-248], rax
  45759. sbb r8, QWORD PTR [r10+-240]
  45760. mov rax, QWORD PTR [rcx+-232]
  45761. mov QWORD PTR [rcx+-240], r8
  45762. sbb rax, QWORD PTR [r10+-232]
  45763. mov r8, QWORD PTR [rcx+-224]
  45764. mov QWORD PTR [rcx+-232], rax
  45765. sbb r8, QWORD PTR [r10+-224]
  45766. mov rax, QWORD PTR [rcx+-216]
  45767. mov QWORD PTR [rcx+-224], r8
  45768. sbb rax, QWORD PTR [r10+-216]
  45769. mov r8, QWORD PTR [rcx+-208]
  45770. mov QWORD PTR [rcx+-216], rax
  45771. sbb r8, QWORD PTR [r10+-208]
  45772. mov rax, QWORD PTR [rcx+-200]
  45773. mov QWORD PTR [rcx+-208], r8
  45774. sbb rax, QWORD PTR [r10+-200]
  45775. mov r8, QWORD PTR [rcx+-192]
  45776. mov QWORD PTR [rcx+-200], rax
  45777. sbb r8, QWORD PTR [r10+-192]
  45778. mov rax, QWORD PTR [rcx+-184]
  45779. mov QWORD PTR [rcx+-192], r8
  45780. sbb rax, QWORD PTR [r10+-184]
  45781. mov r8, QWORD PTR [rcx+-176]
  45782. mov QWORD PTR [rcx+-184], rax
  45783. sbb r8, QWORD PTR [r10+-176]
  45784. mov rax, QWORD PTR [rcx+-168]
  45785. mov QWORD PTR [rcx+-176], r8
  45786. sbb rax, QWORD PTR [r10+-168]
  45787. mov r8, QWORD PTR [rcx+-160]
  45788. mov QWORD PTR [rcx+-168], rax
  45789. sbb r8, QWORD PTR [r10+-160]
  45790. mov rax, QWORD PTR [rcx+-152]
  45791. mov QWORD PTR [rcx+-160], r8
  45792. sbb rax, QWORD PTR [r10+-152]
  45793. mov r8, QWORD PTR [rcx+-144]
  45794. mov QWORD PTR [rcx+-152], rax
  45795. sbb r8, QWORD PTR [r10+-144]
  45796. mov rax, QWORD PTR [rcx+-136]
  45797. mov QWORD PTR [rcx+-144], r8
  45798. sbb rax, QWORD PTR [r10+-136]
  45799. mov r8, QWORD PTR [rcx+-128]
  45800. mov QWORD PTR [rcx+-136], rax
  45801. sbb r8, QWORD PTR [r10+-128]
  45802. mov rax, QWORD PTR [rcx+-120]
  45803. mov QWORD PTR [rcx+-128], r8
  45804. sbb rax, QWORD PTR [r10+-120]
  45805. mov r8, QWORD PTR [rcx+-112]
  45806. mov QWORD PTR [rcx+-120], rax
  45807. sbb r8, QWORD PTR [r10+-112]
  45808. mov rax, QWORD PTR [rcx+-104]
  45809. mov QWORD PTR [rcx+-112], r8
  45810. sbb rax, QWORD PTR [r10+-104]
  45811. mov r8, QWORD PTR [rcx+-96]
  45812. mov QWORD PTR [rcx+-104], rax
  45813. sbb r8, QWORD PTR [r10+-96]
  45814. mov rax, QWORD PTR [rcx+-88]
  45815. mov QWORD PTR [rcx+-96], r8
  45816. sbb rax, QWORD PTR [r10+-88]
  45817. mov r8, QWORD PTR [rcx+-80]
  45818. mov QWORD PTR [rcx+-88], rax
  45819. sbb r8, QWORD PTR [r10+-80]
  45820. mov rax, QWORD PTR [rcx+-72]
  45821. mov QWORD PTR [rcx+-80], r8
  45822. sbb rax, QWORD PTR [r10+-72]
  45823. mov r8, QWORD PTR [rcx+-64]
  45824. mov QWORD PTR [rcx+-72], rax
  45825. sbb r8, QWORD PTR [r10+-64]
  45826. mov rax, QWORD PTR [rcx+-56]
  45827. mov QWORD PTR [rcx+-64], r8
  45828. sbb rax, QWORD PTR [r10+-56]
  45829. mov r8, QWORD PTR [rcx+-48]
  45830. mov QWORD PTR [rcx+-56], rax
  45831. sbb r8, QWORD PTR [r10+-48]
  45832. mov rax, QWORD PTR [rcx+-40]
  45833. mov QWORD PTR [rcx+-48], r8
  45834. sbb rax, QWORD PTR [r10+-40]
  45835. mov r8, QWORD PTR [rcx+-32]
  45836. mov QWORD PTR [rcx+-40], rax
  45837. sbb r8, QWORD PTR [r10+-32]
  45838. mov rax, QWORD PTR [rcx+-24]
  45839. mov QWORD PTR [rcx+-32], r8
  45840. sbb rax, QWORD PTR [r10+-24]
  45841. mov r8, QWORD PTR [rcx+-16]
  45842. mov QWORD PTR [rcx+-24], rax
  45843. sbb r8, QWORD PTR [r10+-16]
  45844. mov rax, QWORD PTR [rcx+-8]
  45845. mov QWORD PTR [rcx+-16], r8
  45846. sbb rax, QWORD PTR [r10+-8]
  45847. mov r8, QWORD PTR [rcx]
  45848. mov QWORD PTR [rcx+-8], rax
  45849. sbb r8, QWORD PTR [r10]
  45850. mov rax, QWORD PTR [rcx+8]
  45851. mov QWORD PTR [rcx], r8
  45852. sbb rax, QWORD PTR [r10+8]
  45853. mov r8, QWORD PTR [rcx+16]
  45854. mov QWORD PTR [rcx+8], rax
  45855. sbb r8, QWORD PTR [r10+16]
  45856. mov rax, QWORD PTR [rcx+24]
  45857. mov QWORD PTR [rcx+16], r8
  45858. sbb rax, QWORD PTR [r10+24]
  45859. mov r8, QWORD PTR [rcx+32]
  45860. mov QWORD PTR [rcx+24], rax
  45861. sbb r8, QWORD PTR [r10+32]
  45862. mov rax, QWORD PTR [rcx+40]
  45863. mov QWORD PTR [rcx+32], r8
  45864. sbb rax, QWORD PTR [r10+40]
  45865. mov r8, QWORD PTR [rcx+48]
  45866. mov QWORD PTR [rcx+40], rax
  45867. sbb r8, QWORD PTR [r10+48]
  45868. mov rax, QWORD PTR [rcx+56]
  45869. mov QWORD PTR [rcx+48], r8
  45870. sbb rax, QWORD PTR [r10+56]
  45871. mov r8, QWORD PTR [rcx+64]
  45872. mov QWORD PTR [rcx+56], rax
  45873. sbb r8, QWORD PTR [r10+64]
  45874. mov rax, QWORD PTR [rcx+72]
  45875. mov QWORD PTR [rcx+64], r8
  45876. sbb rax, QWORD PTR [r10+72]
  45877. mov r8, QWORD PTR [rcx+80]
  45878. mov QWORD PTR [rcx+72], rax
  45879. sbb r8, QWORD PTR [r10+80]
  45880. mov rax, QWORD PTR [rcx+88]
  45881. mov QWORD PTR [rcx+80], r8
  45882. sbb rax, QWORD PTR [r10+88]
  45883. mov r8, QWORD PTR [rcx+96]
  45884. mov QWORD PTR [rcx+88], rax
  45885. sbb r8, QWORD PTR [r10+96]
  45886. mov rax, QWORD PTR [rcx+104]
  45887. mov QWORD PTR [rcx+96], r8
  45888. sbb rax, QWORD PTR [r10+104]
  45889. mov r8, QWORD PTR [rcx+112]
  45890. mov QWORD PTR [rcx+104], rax
  45891. sbb r8, QWORD PTR [r10+112]
  45892. mov rax, QWORD PTR [rcx+120]
  45893. mov QWORD PTR [rcx+112], r8
  45894. sbb rax, QWORD PTR [r10+120]
  45895. mov r8, QWORD PTR [rcx+128]
  45896. mov QWORD PTR [rcx+120], rax
  45897. sbb r8, QWORD PTR [r10+128]
  45898. mov rax, QWORD PTR [rcx+136]
  45899. mov QWORD PTR [rcx+128], r8
  45900. sbb rax, QWORD PTR [r10+136]
  45901. mov r8, QWORD PTR [rcx+144]
  45902. mov QWORD PTR [rcx+136], rax
  45903. sbb r8, QWORD PTR [r10+144]
  45904. mov rax, QWORD PTR [rcx+152]
  45905. mov QWORD PTR [rcx+144], r8
  45906. sbb rax, QWORD PTR [r10+152]
  45907. mov r8, QWORD PTR [rcx+160]
  45908. mov QWORD PTR [rcx+152], rax
  45909. sbb r8, QWORD PTR [r10+160]
  45910. mov rax, QWORD PTR [rcx+168]
  45911. mov QWORD PTR [rcx+160], r8
  45912. sbb rax, QWORD PTR [r10+168]
  45913. mov r8, QWORD PTR [rcx+176]
  45914. mov QWORD PTR [rcx+168], rax
  45915. sbb r8, QWORD PTR [r10+176]
  45916. mov rax, QWORD PTR [rcx+184]
  45917. mov QWORD PTR [rcx+176], r8
  45918. sbb rax, QWORD PTR [r10+184]
  45919. mov r8, QWORD PTR [rcx+192]
  45920. mov QWORD PTR [rcx+184], rax
  45921. sbb r8, QWORD PTR [r10+192]
  45922. mov rax, QWORD PTR [rcx+200]
  45923. mov QWORD PTR [rcx+192], r8
  45924. sbb rax, QWORD PTR [r10+200]
  45925. mov r8, QWORD PTR [rcx+208]
  45926. mov QWORD PTR [rcx+200], rax
  45927. sbb r8, QWORD PTR [r10+208]
  45928. mov rax, QWORD PTR [rcx+216]
  45929. mov QWORD PTR [rcx+208], r8
  45930. sbb rax, QWORD PTR [r10+216]
  45931. mov r8, QWORD PTR [rcx+224]
  45932. mov QWORD PTR [rcx+216], rax
  45933. sbb r8, QWORD PTR [r10+224]
  45934. mov rax, QWORD PTR [rcx+232]
  45935. mov QWORD PTR [rcx+224], r8
  45936. sbb rax, QWORD PTR [r10+232]
  45937. mov r8, QWORD PTR [rcx+240]
  45938. mov QWORD PTR [rcx+232], rax
  45939. sbb r8, QWORD PTR [r10+240]
  45940. mov rax, QWORD PTR [rcx+248]
  45941. mov QWORD PTR [rcx+240], r8
  45942. sbb rax, QWORD PTR [r10+248]
  45943. mov QWORD PTR [rcx+248], rax
  45944. sbb r9, 0
  45945. mov rcx, QWORD PTR [rsp+512]
  45946. add rcx, 768
  45947. ; Add in word
  45948. mov r8, QWORD PTR [rcx]
  45949. add r8, r9
  45950. mov rax, QWORD PTR [rcx+8]
  45951. mov QWORD PTR [rcx], r8
  45952. adc rax, 0
  45953. mov r8, QWORD PTR [rcx+16]
  45954. mov QWORD PTR [rcx+8], rax
  45955. adc r8, 0
  45956. mov rax, QWORD PTR [rcx+24]
  45957. mov QWORD PTR [rcx+16], r8
  45958. adc rax, 0
  45959. mov r8, QWORD PTR [rcx+32]
  45960. mov QWORD PTR [rcx+24], rax
  45961. adc r8, 0
  45962. mov rax, QWORD PTR [rcx+40]
  45963. mov QWORD PTR [rcx+32], r8
  45964. adc rax, 0
  45965. mov r8, QWORD PTR [rcx+48]
  45966. mov QWORD PTR [rcx+40], rax
  45967. adc r8, 0
  45968. mov rax, QWORD PTR [rcx+56]
  45969. mov QWORD PTR [rcx+48], r8
  45970. adc rax, 0
  45971. mov r8, QWORD PTR [rcx+64]
  45972. mov QWORD PTR [rcx+56], rax
  45973. adc r8, 0
  45974. mov rax, QWORD PTR [rcx+72]
  45975. mov QWORD PTR [rcx+64], r8
  45976. adc rax, 0
  45977. mov r8, QWORD PTR [rcx+80]
  45978. mov QWORD PTR [rcx+72], rax
  45979. adc r8, 0
  45980. mov rax, QWORD PTR [rcx+88]
  45981. mov QWORD PTR [rcx+80], r8
  45982. adc rax, 0
  45983. mov r8, QWORD PTR [rcx+96]
  45984. mov QWORD PTR [rcx+88], rax
  45985. adc r8, 0
  45986. mov rax, QWORD PTR [rcx+104]
  45987. mov QWORD PTR [rcx+96], r8
  45988. adc rax, 0
  45989. mov r8, QWORD PTR [rcx+112]
  45990. mov QWORD PTR [rcx+104], rax
  45991. adc r8, 0
  45992. mov rax, QWORD PTR [rcx+120]
  45993. mov QWORD PTR [rcx+112], r8
  45994. adc rax, 0
  45995. mov r8, QWORD PTR [rcx+128]
  45996. mov QWORD PTR [rcx+120], rax
  45997. adc r8, 0
  45998. mov rax, QWORD PTR [rcx+136]
  45999. mov QWORD PTR [rcx+128], r8
  46000. adc rax, 0
  46001. mov r8, QWORD PTR [rcx+144]
  46002. mov QWORD PTR [rcx+136], rax
  46003. adc r8, 0
  46004. mov rax, QWORD PTR [rcx+152]
  46005. mov QWORD PTR [rcx+144], r8
  46006. adc rax, 0
  46007. mov r8, QWORD PTR [rcx+160]
  46008. mov QWORD PTR [rcx+152], rax
  46009. adc r8, 0
  46010. mov rax, QWORD PTR [rcx+168]
  46011. mov QWORD PTR [rcx+160], r8
  46012. adc rax, 0
  46013. mov r8, QWORD PTR [rcx+176]
  46014. mov QWORD PTR [rcx+168], rax
  46015. adc r8, 0
  46016. mov rax, QWORD PTR [rcx+184]
  46017. mov QWORD PTR [rcx+176], r8
  46018. adc rax, 0
  46019. mov r8, QWORD PTR [rcx+192]
  46020. mov QWORD PTR [rcx+184], rax
  46021. adc r8, 0
  46022. mov rax, QWORD PTR [rcx+200]
  46023. mov QWORD PTR [rcx+192], r8
  46024. adc rax, 0
  46025. mov r8, QWORD PTR [rcx+208]
  46026. mov QWORD PTR [rcx+200], rax
  46027. adc r8, 0
  46028. mov rax, QWORD PTR [rcx+216]
  46029. mov QWORD PTR [rcx+208], r8
  46030. adc rax, 0
  46031. mov r8, QWORD PTR [rcx+224]
  46032. mov QWORD PTR [rcx+216], rax
  46033. adc r8, 0
  46034. mov rax, QWORD PTR [rcx+232]
  46035. mov QWORD PTR [rcx+224], r8
  46036. adc rax, 0
  46037. mov r8, QWORD PTR [rcx+240]
  46038. mov QWORD PTR [rcx+232], rax
  46039. adc r8, 0
  46040. mov rax, QWORD PTR [rcx+248]
  46041. mov QWORD PTR [rcx+240], r8
  46042. adc rax, 0
  46043. mov QWORD PTR [rcx+248], rax
  46044. mov rdx, QWORD PTR [rsp+520]
  46045. mov rcx, QWORD PTR [rsp+512]
  46046. add rsp, 528
  46047. ret
  46048. sp_4096_sqr_avx2_64 ENDP
  46049. _text ENDS
  46050. ENDIF
  46051. ; /* Mul a by digit b into r. (r = a * b)
  46052. ; *
  46053. ; * r A single precision integer.
  46054. ; * a A single precision integer.
  46055. ; * b A single precision digit.
  46056. ; */
  46057. _text SEGMENT READONLY PARA
  46058. sp_4096_mul_d_64 PROC
  46059. push r12
  46060. mov r9, rdx
  46061. ; A[0] * B
  46062. mov rax, r8
  46063. xor r12, r12
  46064. mul QWORD PTR [r9]
  46065. mov r10, rax
  46066. mov r11, rdx
  46067. mov QWORD PTR [rcx], r10
  46068. ; A[1] * B
  46069. mov rax, r8
  46070. xor r10, r10
  46071. mul QWORD PTR [r9+8]
  46072. add r11, rax
  46073. mov QWORD PTR [rcx+8], r11
  46074. adc r12, rdx
  46075. adc r10, 0
  46076. ; A[2] * B
  46077. mov rax, r8
  46078. xor r11, r11
  46079. mul QWORD PTR [r9+16]
  46080. add r12, rax
  46081. mov QWORD PTR [rcx+16], r12
  46082. adc r10, rdx
  46083. adc r11, 0
  46084. ; A[3] * B
  46085. mov rax, r8
  46086. xor r12, r12
  46087. mul QWORD PTR [r9+24]
  46088. add r10, rax
  46089. mov QWORD PTR [rcx+24], r10
  46090. adc r11, rdx
  46091. adc r12, 0
  46092. ; A[4] * B
  46093. mov rax, r8
  46094. xor r10, r10
  46095. mul QWORD PTR [r9+32]
  46096. add r11, rax
  46097. mov QWORD PTR [rcx+32], r11
  46098. adc r12, rdx
  46099. adc r10, 0
  46100. ; A[5] * B
  46101. mov rax, r8
  46102. xor r11, r11
  46103. mul QWORD PTR [r9+40]
  46104. add r12, rax
  46105. mov QWORD PTR [rcx+40], r12
  46106. adc r10, rdx
  46107. adc r11, 0
  46108. ; A[6] * B
  46109. mov rax, r8
  46110. xor r12, r12
  46111. mul QWORD PTR [r9+48]
  46112. add r10, rax
  46113. mov QWORD PTR [rcx+48], r10
  46114. adc r11, rdx
  46115. adc r12, 0
  46116. ; A[7] * B
  46117. mov rax, r8
  46118. xor r10, r10
  46119. mul QWORD PTR [r9+56]
  46120. add r11, rax
  46121. mov QWORD PTR [rcx+56], r11
  46122. adc r12, rdx
  46123. adc r10, 0
  46124. ; A[8] * B
  46125. mov rax, r8
  46126. xor r11, r11
  46127. mul QWORD PTR [r9+64]
  46128. add r12, rax
  46129. mov QWORD PTR [rcx+64], r12
  46130. adc r10, rdx
  46131. adc r11, 0
  46132. ; A[9] * B
  46133. mov rax, r8
  46134. xor r12, r12
  46135. mul QWORD PTR [r9+72]
  46136. add r10, rax
  46137. mov QWORD PTR [rcx+72], r10
  46138. adc r11, rdx
  46139. adc r12, 0
  46140. ; A[10] * B
  46141. mov rax, r8
  46142. xor r10, r10
  46143. mul QWORD PTR [r9+80]
  46144. add r11, rax
  46145. mov QWORD PTR [rcx+80], r11
  46146. adc r12, rdx
  46147. adc r10, 0
  46148. ; A[11] * B
  46149. mov rax, r8
  46150. xor r11, r11
  46151. mul QWORD PTR [r9+88]
  46152. add r12, rax
  46153. mov QWORD PTR [rcx+88], r12
  46154. adc r10, rdx
  46155. adc r11, 0
  46156. ; A[12] * B
  46157. mov rax, r8
  46158. xor r12, r12
  46159. mul QWORD PTR [r9+96]
  46160. add r10, rax
  46161. mov QWORD PTR [rcx+96], r10
  46162. adc r11, rdx
  46163. adc r12, 0
  46164. ; A[13] * B
  46165. mov rax, r8
  46166. xor r10, r10
  46167. mul QWORD PTR [r9+104]
  46168. add r11, rax
  46169. mov QWORD PTR [rcx+104], r11
  46170. adc r12, rdx
  46171. adc r10, 0
  46172. ; A[14] * B
  46173. mov rax, r8
  46174. xor r11, r11
  46175. mul QWORD PTR [r9+112]
  46176. add r12, rax
  46177. mov QWORD PTR [rcx+112], r12
  46178. adc r10, rdx
  46179. adc r11, 0
  46180. ; A[15] * B
  46181. mov rax, r8
  46182. xor r12, r12
  46183. mul QWORD PTR [r9+120]
  46184. add r10, rax
  46185. mov QWORD PTR [rcx+120], r10
  46186. adc r11, rdx
  46187. adc r12, 0
  46188. ; A[16] * B
  46189. mov rax, r8
  46190. xor r10, r10
  46191. mul QWORD PTR [r9+128]
  46192. add r11, rax
  46193. mov QWORD PTR [rcx+128], r11
  46194. adc r12, rdx
  46195. adc r10, 0
  46196. ; A[17] * B
  46197. mov rax, r8
  46198. xor r11, r11
  46199. mul QWORD PTR [r9+136]
  46200. add r12, rax
  46201. mov QWORD PTR [rcx+136], r12
  46202. adc r10, rdx
  46203. adc r11, 0
  46204. ; A[18] * B
  46205. mov rax, r8
  46206. xor r12, r12
  46207. mul QWORD PTR [r9+144]
  46208. add r10, rax
  46209. mov QWORD PTR [rcx+144], r10
  46210. adc r11, rdx
  46211. adc r12, 0
  46212. ; A[19] * B
  46213. mov rax, r8
  46214. xor r10, r10
  46215. mul QWORD PTR [r9+152]
  46216. add r11, rax
  46217. mov QWORD PTR [rcx+152], r11
  46218. adc r12, rdx
  46219. adc r10, 0
  46220. ; A[20] * B
  46221. mov rax, r8
  46222. xor r11, r11
  46223. mul QWORD PTR [r9+160]
  46224. add r12, rax
  46225. mov QWORD PTR [rcx+160], r12
  46226. adc r10, rdx
  46227. adc r11, 0
  46228. ; A[21] * B
  46229. mov rax, r8
  46230. xor r12, r12
  46231. mul QWORD PTR [r9+168]
  46232. add r10, rax
  46233. mov QWORD PTR [rcx+168], r10
  46234. adc r11, rdx
  46235. adc r12, 0
  46236. ; A[22] * B
  46237. mov rax, r8
  46238. xor r10, r10
  46239. mul QWORD PTR [r9+176]
  46240. add r11, rax
  46241. mov QWORD PTR [rcx+176], r11
  46242. adc r12, rdx
  46243. adc r10, 0
  46244. ; A[23] * B
  46245. mov rax, r8
  46246. xor r11, r11
  46247. mul QWORD PTR [r9+184]
  46248. add r12, rax
  46249. mov QWORD PTR [rcx+184], r12
  46250. adc r10, rdx
  46251. adc r11, 0
  46252. ; A[24] * B
  46253. mov rax, r8
  46254. xor r12, r12
  46255. mul QWORD PTR [r9+192]
  46256. add r10, rax
  46257. mov QWORD PTR [rcx+192], r10
  46258. adc r11, rdx
  46259. adc r12, 0
  46260. ; A[25] * B
  46261. mov rax, r8
  46262. xor r10, r10
  46263. mul QWORD PTR [r9+200]
  46264. add r11, rax
  46265. mov QWORD PTR [rcx+200], r11
  46266. adc r12, rdx
  46267. adc r10, 0
  46268. ; A[26] * B
  46269. mov rax, r8
  46270. xor r11, r11
  46271. mul QWORD PTR [r9+208]
  46272. add r12, rax
  46273. mov QWORD PTR [rcx+208], r12
  46274. adc r10, rdx
  46275. adc r11, 0
  46276. ; A[27] * B
  46277. mov rax, r8
  46278. xor r12, r12
  46279. mul QWORD PTR [r9+216]
  46280. add r10, rax
  46281. mov QWORD PTR [rcx+216], r10
  46282. adc r11, rdx
  46283. adc r12, 0
  46284. ; A[28] * B
  46285. mov rax, r8
  46286. xor r10, r10
  46287. mul QWORD PTR [r9+224]
  46288. add r11, rax
  46289. mov QWORD PTR [rcx+224], r11
  46290. adc r12, rdx
  46291. adc r10, 0
  46292. ; A[29] * B
  46293. mov rax, r8
  46294. xor r11, r11
  46295. mul QWORD PTR [r9+232]
  46296. add r12, rax
  46297. mov QWORD PTR [rcx+232], r12
  46298. adc r10, rdx
  46299. adc r11, 0
  46300. ; A[30] * B
  46301. mov rax, r8
  46302. xor r12, r12
  46303. mul QWORD PTR [r9+240]
  46304. add r10, rax
  46305. mov QWORD PTR [rcx+240], r10
  46306. adc r11, rdx
  46307. adc r12, 0
  46308. ; A[31] * B
  46309. mov rax, r8
  46310. xor r10, r10
  46311. mul QWORD PTR [r9+248]
  46312. add r11, rax
  46313. mov QWORD PTR [rcx+248], r11
  46314. adc r12, rdx
  46315. adc r10, 0
  46316. ; A[32] * B
  46317. mov rax, r8
  46318. xor r11, r11
  46319. mul QWORD PTR [r9+256]
  46320. add r12, rax
  46321. mov QWORD PTR [rcx+256], r12
  46322. adc r10, rdx
  46323. adc r11, 0
  46324. ; A[33] * B
  46325. mov rax, r8
  46326. xor r12, r12
  46327. mul QWORD PTR [r9+264]
  46328. add r10, rax
  46329. mov QWORD PTR [rcx+264], r10
  46330. adc r11, rdx
  46331. adc r12, 0
  46332. ; A[34] * B
  46333. mov rax, r8
  46334. xor r10, r10
  46335. mul QWORD PTR [r9+272]
  46336. add r11, rax
  46337. mov QWORD PTR [rcx+272], r11
  46338. adc r12, rdx
  46339. adc r10, 0
  46340. ; A[35] * B
  46341. mov rax, r8
  46342. xor r11, r11
  46343. mul QWORD PTR [r9+280]
  46344. add r12, rax
  46345. mov QWORD PTR [rcx+280], r12
  46346. adc r10, rdx
  46347. adc r11, 0
  46348. ; A[36] * B
  46349. mov rax, r8
  46350. xor r12, r12
  46351. mul QWORD PTR [r9+288]
  46352. add r10, rax
  46353. mov QWORD PTR [rcx+288], r10
  46354. adc r11, rdx
  46355. adc r12, 0
  46356. ; A[37] * B
  46357. mov rax, r8
  46358. xor r10, r10
  46359. mul QWORD PTR [r9+296]
  46360. add r11, rax
  46361. mov QWORD PTR [rcx+296], r11
  46362. adc r12, rdx
  46363. adc r10, 0
  46364. ; A[38] * B
  46365. mov rax, r8
  46366. xor r11, r11
  46367. mul QWORD PTR [r9+304]
  46368. add r12, rax
  46369. mov QWORD PTR [rcx+304], r12
  46370. adc r10, rdx
  46371. adc r11, 0
  46372. ; A[39] * B
  46373. mov rax, r8
  46374. xor r12, r12
  46375. mul QWORD PTR [r9+312]
  46376. add r10, rax
  46377. mov QWORD PTR [rcx+312], r10
  46378. adc r11, rdx
  46379. adc r12, 0
  46380. ; A[40] * B
  46381. mov rax, r8
  46382. xor r10, r10
  46383. mul QWORD PTR [r9+320]
  46384. add r11, rax
  46385. mov QWORD PTR [rcx+320], r11
  46386. adc r12, rdx
  46387. adc r10, 0
  46388. ; A[41] * B
  46389. mov rax, r8
  46390. xor r11, r11
  46391. mul QWORD PTR [r9+328]
  46392. add r12, rax
  46393. mov QWORD PTR [rcx+328], r12
  46394. adc r10, rdx
  46395. adc r11, 0
  46396. ; A[42] * B
  46397. mov rax, r8
  46398. xor r12, r12
  46399. mul QWORD PTR [r9+336]
  46400. add r10, rax
  46401. mov QWORD PTR [rcx+336], r10
  46402. adc r11, rdx
  46403. adc r12, 0
  46404. ; A[43] * B
  46405. mov rax, r8
  46406. xor r10, r10
  46407. mul QWORD PTR [r9+344]
  46408. add r11, rax
  46409. mov QWORD PTR [rcx+344], r11
  46410. adc r12, rdx
  46411. adc r10, 0
  46412. ; A[44] * B
  46413. mov rax, r8
  46414. xor r11, r11
  46415. mul QWORD PTR [r9+352]
  46416. add r12, rax
  46417. mov QWORD PTR [rcx+352], r12
  46418. adc r10, rdx
  46419. adc r11, 0
  46420. ; A[45] * B
  46421. mov rax, r8
  46422. xor r12, r12
  46423. mul QWORD PTR [r9+360]
  46424. add r10, rax
  46425. mov QWORD PTR [rcx+360], r10
  46426. adc r11, rdx
  46427. adc r12, 0
  46428. ; A[46] * B
  46429. mov rax, r8
  46430. xor r10, r10
  46431. mul QWORD PTR [r9+368]
  46432. add r11, rax
  46433. mov QWORD PTR [rcx+368], r11
  46434. adc r12, rdx
  46435. adc r10, 0
  46436. ; A[47] * B
  46437. mov rax, r8
  46438. xor r11, r11
  46439. mul QWORD PTR [r9+376]
  46440. add r12, rax
  46441. mov QWORD PTR [rcx+376], r12
  46442. adc r10, rdx
  46443. adc r11, 0
  46444. ; A[48] * B
  46445. mov rax, r8
  46446. xor r12, r12
  46447. mul QWORD PTR [r9+384]
  46448. add r10, rax
  46449. mov QWORD PTR [rcx+384], r10
  46450. adc r11, rdx
  46451. adc r12, 0
  46452. ; A[49] * B
  46453. mov rax, r8
  46454. xor r10, r10
  46455. mul QWORD PTR [r9+392]
  46456. add r11, rax
  46457. mov QWORD PTR [rcx+392], r11
  46458. adc r12, rdx
  46459. adc r10, 0
  46460. ; A[50] * B
  46461. mov rax, r8
  46462. xor r11, r11
  46463. mul QWORD PTR [r9+400]
  46464. add r12, rax
  46465. mov QWORD PTR [rcx+400], r12
  46466. adc r10, rdx
  46467. adc r11, 0
  46468. ; A[51] * B
  46469. mov rax, r8
  46470. xor r12, r12
  46471. mul QWORD PTR [r9+408]
  46472. add r10, rax
  46473. mov QWORD PTR [rcx+408], r10
  46474. adc r11, rdx
  46475. adc r12, 0
  46476. ; A[52] * B
  46477. mov rax, r8
  46478. xor r10, r10
  46479. mul QWORD PTR [r9+416]
  46480. add r11, rax
  46481. mov QWORD PTR [rcx+416], r11
  46482. adc r12, rdx
  46483. adc r10, 0
  46484. ; A[53] * B
  46485. mov rax, r8
  46486. xor r11, r11
  46487. mul QWORD PTR [r9+424]
  46488. add r12, rax
  46489. mov QWORD PTR [rcx+424], r12
  46490. adc r10, rdx
  46491. adc r11, 0
  46492. ; A[54] * B
  46493. mov rax, r8
  46494. xor r12, r12
  46495. mul QWORD PTR [r9+432]
  46496. add r10, rax
  46497. mov QWORD PTR [rcx+432], r10
  46498. adc r11, rdx
  46499. adc r12, 0
  46500. ; A[55] * B
  46501. mov rax, r8
  46502. xor r10, r10
  46503. mul QWORD PTR [r9+440]
  46504. add r11, rax
  46505. mov QWORD PTR [rcx+440], r11
  46506. adc r12, rdx
  46507. adc r10, 0
  46508. ; A[56] * B
  46509. mov rax, r8
  46510. xor r11, r11
  46511. mul QWORD PTR [r9+448]
  46512. add r12, rax
  46513. mov QWORD PTR [rcx+448], r12
  46514. adc r10, rdx
  46515. adc r11, 0
  46516. ; A[57] * B
  46517. mov rax, r8
  46518. xor r12, r12
  46519. mul QWORD PTR [r9+456]
  46520. add r10, rax
  46521. mov QWORD PTR [rcx+456], r10
  46522. adc r11, rdx
  46523. adc r12, 0
  46524. ; A[58] * B
  46525. mov rax, r8
  46526. xor r10, r10
  46527. mul QWORD PTR [r9+464]
  46528. add r11, rax
  46529. mov QWORD PTR [rcx+464], r11
  46530. adc r12, rdx
  46531. adc r10, 0
  46532. ; A[59] * B
  46533. mov rax, r8
  46534. xor r11, r11
  46535. mul QWORD PTR [r9+472]
  46536. add r12, rax
  46537. mov QWORD PTR [rcx+472], r12
  46538. adc r10, rdx
  46539. adc r11, 0
  46540. ; A[60] * B
  46541. mov rax, r8
  46542. xor r12, r12
  46543. mul QWORD PTR [r9+480]
  46544. add r10, rax
  46545. mov QWORD PTR [rcx+480], r10
  46546. adc r11, rdx
  46547. adc r12, 0
  46548. ; A[61] * B
  46549. mov rax, r8
  46550. xor r10, r10
  46551. mul QWORD PTR [r9+488]
  46552. add r11, rax
  46553. mov QWORD PTR [rcx+488], r11
  46554. adc r12, rdx
  46555. adc r10, 0
  46556. ; A[62] * B
  46557. mov rax, r8
  46558. xor r11, r11
  46559. mul QWORD PTR [r9+496]
  46560. add r12, rax
  46561. mov QWORD PTR [rcx+496], r12
  46562. adc r10, rdx
  46563. adc r11, 0
  46564. ; A[63] * B
  46565. mov rax, r8
  46566. mul QWORD PTR [r9+504]
  46567. add r10, rax
  46568. adc r11, rdx
  46569. mov QWORD PTR [rcx+504], r10
  46570. mov QWORD PTR [rcx+512], r11
  46571. pop r12
  46572. ret
  46573. sp_4096_mul_d_64 ENDP
  46574. _text ENDS
  46575. ; /* Conditionally subtract b from a using the mask m.
  46576. ; * m is -1 to subtract and 0 when not copying.
  46577. ; *
  46578. ; * r A single precision number representing condition subtract result.
  46579. ; * a A single precision number to subtract from.
  46580. ; * b A single precision number to subtract.
  46581. ; * m Mask value to apply.
  46582. ; */
  46583. _text SEGMENT READONLY PARA
  46584. sp_4096_cond_sub_64 PROC
  46585. sub rsp, 512
  46586. mov r10, QWORD PTR [r8]
  46587. mov r11, QWORD PTR [r8+8]
  46588. and r10, r9
  46589. and r11, r9
  46590. mov QWORD PTR [rsp], r10
  46591. mov QWORD PTR [rsp+8], r11
  46592. mov r10, QWORD PTR [r8+16]
  46593. mov r11, QWORD PTR [r8+24]
  46594. and r10, r9
  46595. and r11, r9
  46596. mov QWORD PTR [rsp+16], r10
  46597. mov QWORD PTR [rsp+24], r11
  46598. mov r10, QWORD PTR [r8+32]
  46599. mov r11, QWORD PTR [r8+40]
  46600. and r10, r9
  46601. and r11, r9
  46602. mov QWORD PTR [rsp+32], r10
  46603. mov QWORD PTR [rsp+40], r11
  46604. mov r10, QWORD PTR [r8+48]
  46605. mov r11, QWORD PTR [r8+56]
  46606. and r10, r9
  46607. and r11, r9
  46608. mov QWORD PTR [rsp+48], r10
  46609. mov QWORD PTR [rsp+56], r11
  46610. mov r10, QWORD PTR [r8+64]
  46611. mov r11, QWORD PTR [r8+72]
  46612. and r10, r9
  46613. and r11, r9
  46614. mov QWORD PTR [rsp+64], r10
  46615. mov QWORD PTR [rsp+72], r11
  46616. mov r10, QWORD PTR [r8+80]
  46617. mov r11, QWORD PTR [r8+88]
  46618. and r10, r9
  46619. and r11, r9
  46620. mov QWORD PTR [rsp+80], r10
  46621. mov QWORD PTR [rsp+88], r11
  46622. mov r10, QWORD PTR [r8+96]
  46623. mov r11, QWORD PTR [r8+104]
  46624. and r10, r9
  46625. and r11, r9
  46626. mov QWORD PTR [rsp+96], r10
  46627. mov QWORD PTR [rsp+104], r11
  46628. mov r10, QWORD PTR [r8+112]
  46629. mov r11, QWORD PTR [r8+120]
  46630. and r10, r9
  46631. and r11, r9
  46632. mov QWORD PTR [rsp+112], r10
  46633. mov QWORD PTR [rsp+120], r11
  46634. mov r10, QWORD PTR [r8+128]
  46635. mov r11, QWORD PTR [r8+136]
  46636. and r10, r9
  46637. and r11, r9
  46638. mov QWORD PTR [rsp+128], r10
  46639. mov QWORD PTR [rsp+136], r11
  46640. mov r10, QWORD PTR [r8+144]
  46641. mov r11, QWORD PTR [r8+152]
  46642. and r10, r9
  46643. and r11, r9
  46644. mov QWORD PTR [rsp+144], r10
  46645. mov QWORD PTR [rsp+152], r11
  46646. mov r10, QWORD PTR [r8+160]
  46647. mov r11, QWORD PTR [r8+168]
  46648. and r10, r9
  46649. and r11, r9
  46650. mov QWORD PTR [rsp+160], r10
  46651. mov QWORD PTR [rsp+168], r11
  46652. mov r10, QWORD PTR [r8+176]
  46653. mov r11, QWORD PTR [r8+184]
  46654. and r10, r9
  46655. and r11, r9
  46656. mov QWORD PTR [rsp+176], r10
  46657. mov QWORD PTR [rsp+184], r11
  46658. mov r10, QWORD PTR [r8+192]
  46659. mov r11, QWORD PTR [r8+200]
  46660. and r10, r9
  46661. and r11, r9
  46662. mov QWORD PTR [rsp+192], r10
  46663. mov QWORD PTR [rsp+200], r11
  46664. mov r10, QWORD PTR [r8+208]
  46665. mov r11, QWORD PTR [r8+216]
  46666. and r10, r9
  46667. and r11, r9
  46668. mov QWORD PTR [rsp+208], r10
  46669. mov QWORD PTR [rsp+216], r11
  46670. mov r10, QWORD PTR [r8+224]
  46671. mov r11, QWORD PTR [r8+232]
  46672. and r10, r9
  46673. and r11, r9
  46674. mov QWORD PTR [rsp+224], r10
  46675. mov QWORD PTR [rsp+232], r11
  46676. mov r10, QWORD PTR [r8+240]
  46677. mov r11, QWORD PTR [r8+248]
  46678. and r10, r9
  46679. and r11, r9
  46680. mov QWORD PTR [rsp+240], r10
  46681. mov QWORD PTR [rsp+248], r11
  46682. mov r10, QWORD PTR [r8+256]
  46683. mov r11, QWORD PTR [r8+264]
  46684. and r10, r9
  46685. and r11, r9
  46686. mov QWORD PTR [rsp+256], r10
  46687. mov QWORD PTR [rsp+264], r11
  46688. mov r10, QWORD PTR [r8+272]
  46689. mov r11, QWORD PTR [r8+280]
  46690. and r10, r9
  46691. and r11, r9
  46692. mov QWORD PTR [rsp+272], r10
  46693. mov QWORD PTR [rsp+280], r11
  46694. mov r10, QWORD PTR [r8+288]
  46695. mov r11, QWORD PTR [r8+296]
  46696. and r10, r9
  46697. and r11, r9
  46698. mov QWORD PTR [rsp+288], r10
  46699. mov QWORD PTR [rsp+296], r11
  46700. mov r10, QWORD PTR [r8+304]
  46701. mov r11, QWORD PTR [r8+312]
  46702. and r10, r9
  46703. and r11, r9
  46704. mov QWORD PTR [rsp+304], r10
  46705. mov QWORD PTR [rsp+312], r11
  46706. mov r10, QWORD PTR [r8+320]
  46707. mov r11, QWORD PTR [r8+328]
  46708. and r10, r9
  46709. and r11, r9
  46710. mov QWORD PTR [rsp+320], r10
  46711. mov QWORD PTR [rsp+328], r11
  46712. mov r10, QWORD PTR [r8+336]
  46713. mov r11, QWORD PTR [r8+344]
  46714. and r10, r9
  46715. and r11, r9
  46716. mov QWORD PTR [rsp+336], r10
  46717. mov QWORD PTR [rsp+344], r11
  46718. mov r10, QWORD PTR [r8+352]
  46719. mov r11, QWORD PTR [r8+360]
  46720. and r10, r9
  46721. and r11, r9
  46722. mov QWORD PTR [rsp+352], r10
  46723. mov QWORD PTR [rsp+360], r11
  46724. mov r10, QWORD PTR [r8+368]
  46725. mov r11, QWORD PTR [r8+376]
  46726. and r10, r9
  46727. and r11, r9
  46728. mov QWORD PTR [rsp+368], r10
  46729. mov QWORD PTR [rsp+376], r11
  46730. mov r10, QWORD PTR [r8+384]
  46731. mov r11, QWORD PTR [r8+392]
  46732. and r10, r9
  46733. and r11, r9
  46734. mov QWORD PTR [rsp+384], r10
  46735. mov QWORD PTR [rsp+392], r11
  46736. mov r10, QWORD PTR [r8+400]
  46737. mov r11, QWORD PTR [r8+408]
  46738. and r10, r9
  46739. and r11, r9
  46740. mov QWORD PTR [rsp+400], r10
  46741. mov QWORD PTR [rsp+408], r11
  46742. mov r10, QWORD PTR [r8+416]
  46743. mov r11, QWORD PTR [r8+424]
  46744. and r10, r9
  46745. and r11, r9
  46746. mov QWORD PTR [rsp+416], r10
  46747. mov QWORD PTR [rsp+424], r11
  46748. mov r10, QWORD PTR [r8+432]
  46749. mov r11, QWORD PTR [r8+440]
  46750. and r10, r9
  46751. and r11, r9
  46752. mov QWORD PTR [rsp+432], r10
  46753. mov QWORD PTR [rsp+440], r11
  46754. mov r10, QWORD PTR [r8+448]
  46755. mov r11, QWORD PTR [r8+456]
  46756. and r10, r9
  46757. and r11, r9
  46758. mov QWORD PTR [rsp+448], r10
  46759. mov QWORD PTR [rsp+456], r11
  46760. mov r10, QWORD PTR [r8+464]
  46761. mov r11, QWORD PTR [r8+472]
  46762. and r10, r9
  46763. and r11, r9
  46764. mov QWORD PTR [rsp+464], r10
  46765. mov QWORD PTR [rsp+472], r11
  46766. mov r10, QWORD PTR [r8+480]
  46767. mov r11, QWORD PTR [r8+488]
  46768. and r10, r9
  46769. and r11, r9
  46770. mov QWORD PTR [rsp+480], r10
  46771. mov QWORD PTR [rsp+488], r11
  46772. mov r10, QWORD PTR [r8+496]
  46773. mov r11, QWORD PTR [r8+504]
  46774. and r10, r9
  46775. and r11, r9
  46776. mov QWORD PTR [rsp+496], r10
  46777. mov QWORD PTR [rsp+504], r11
  46778. mov r10, QWORD PTR [rdx]
  46779. mov r8, QWORD PTR [rsp]
  46780. sub r10, r8
  46781. mov r11, QWORD PTR [rdx+8]
  46782. mov r8, QWORD PTR [rsp+8]
  46783. sbb r11, r8
  46784. mov QWORD PTR [rcx], r10
  46785. mov r10, QWORD PTR [rdx+16]
  46786. mov r8, QWORD PTR [rsp+16]
  46787. sbb r10, r8
  46788. mov QWORD PTR [rcx+8], r11
  46789. mov r11, QWORD PTR [rdx+24]
  46790. mov r8, QWORD PTR [rsp+24]
  46791. sbb r11, r8
  46792. mov QWORD PTR [rcx+16], r10
  46793. mov r10, QWORD PTR [rdx+32]
  46794. mov r8, QWORD PTR [rsp+32]
  46795. sbb r10, r8
  46796. mov QWORD PTR [rcx+24], r11
  46797. mov r11, QWORD PTR [rdx+40]
  46798. mov r8, QWORD PTR [rsp+40]
  46799. sbb r11, r8
  46800. mov QWORD PTR [rcx+32], r10
  46801. mov r10, QWORD PTR [rdx+48]
  46802. mov r8, QWORD PTR [rsp+48]
  46803. sbb r10, r8
  46804. mov QWORD PTR [rcx+40], r11
  46805. mov r11, QWORD PTR [rdx+56]
  46806. mov r8, QWORD PTR [rsp+56]
  46807. sbb r11, r8
  46808. mov QWORD PTR [rcx+48], r10
  46809. mov r10, QWORD PTR [rdx+64]
  46810. mov r8, QWORD PTR [rsp+64]
  46811. sbb r10, r8
  46812. mov QWORD PTR [rcx+56], r11
  46813. mov r11, QWORD PTR [rdx+72]
  46814. mov r8, QWORD PTR [rsp+72]
  46815. sbb r11, r8
  46816. mov QWORD PTR [rcx+64], r10
  46817. mov r10, QWORD PTR [rdx+80]
  46818. mov r8, QWORD PTR [rsp+80]
  46819. sbb r10, r8
  46820. mov QWORD PTR [rcx+72], r11
  46821. mov r11, QWORD PTR [rdx+88]
  46822. mov r8, QWORD PTR [rsp+88]
  46823. sbb r11, r8
  46824. mov QWORD PTR [rcx+80], r10
  46825. mov r10, QWORD PTR [rdx+96]
  46826. mov r8, QWORD PTR [rsp+96]
  46827. sbb r10, r8
  46828. mov QWORD PTR [rcx+88], r11
  46829. mov r11, QWORD PTR [rdx+104]
  46830. mov r8, QWORD PTR [rsp+104]
  46831. sbb r11, r8
  46832. mov QWORD PTR [rcx+96], r10
  46833. mov r10, QWORD PTR [rdx+112]
  46834. mov r8, QWORD PTR [rsp+112]
  46835. sbb r10, r8
  46836. mov QWORD PTR [rcx+104], r11
  46837. mov r11, QWORD PTR [rdx+120]
  46838. mov r8, QWORD PTR [rsp+120]
  46839. sbb r11, r8
  46840. mov QWORD PTR [rcx+112], r10
  46841. mov r10, QWORD PTR [rdx+128]
  46842. mov r8, QWORD PTR [rsp+128]
  46843. sbb r10, r8
  46844. mov QWORD PTR [rcx+120], r11
  46845. mov r11, QWORD PTR [rdx+136]
  46846. mov r8, QWORD PTR [rsp+136]
  46847. sbb r11, r8
  46848. mov QWORD PTR [rcx+128], r10
  46849. mov r10, QWORD PTR [rdx+144]
  46850. mov r8, QWORD PTR [rsp+144]
  46851. sbb r10, r8
  46852. mov QWORD PTR [rcx+136], r11
  46853. mov r11, QWORD PTR [rdx+152]
  46854. mov r8, QWORD PTR [rsp+152]
  46855. sbb r11, r8
  46856. mov QWORD PTR [rcx+144], r10
  46857. mov r10, QWORD PTR [rdx+160]
  46858. mov r8, QWORD PTR [rsp+160]
  46859. sbb r10, r8
  46860. mov QWORD PTR [rcx+152], r11
  46861. mov r11, QWORD PTR [rdx+168]
  46862. mov r8, QWORD PTR [rsp+168]
  46863. sbb r11, r8
  46864. mov QWORD PTR [rcx+160], r10
  46865. mov r10, QWORD PTR [rdx+176]
  46866. mov r8, QWORD PTR [rsp+176]
  46867. sbb r10, r8
  46868. mov QWORD PTR [rcx+168], r11
  46869. mov r11, QWORD PTR [rdx+184]
  46870. mov r8, QWORD PTR [rsp+184]
  46871. sbb r11, r8
  46872. mov QWORD PTR [rcx+176], r10
  46873. mov r10, QWORD PTR [rdx+192]
  46874. mov r8, QWORD PTR [rsp+192]
  46875. sbb r10, r8
  46876. mov QWORD PTR [rcx+184], r11
  46877. mov r11, QWORD PTR [rdx+200]
  46878. mov r8, QWORD PTR [rsp+200]
  46879. sbb r11, r8
  46880. mov QWORD PTR [rcx+192], r10
  46881. mov r10, QWORD PTR [rdx+208]
  46882. mov r8, QWORD PTR [rsp+208]
  46883. sbb r10, r8
  46884. mov QWORD PTR [rcx+200], r11
  46885. mov r11, QWORD PTR [rdx+216]
  46886. mov r8, QWORD PTR [rsp+216]
  46887. sbb r11, r8
  46888. mov QWORD PTR [rcx+208], r10
  46889. mov r10, QWORD PTR [rdx+224]
  46890. mov r8, QWORD PTR [rsp+224]
  46891. sbb r10, r8
  46892. mov QWORD PTR [rcx+216], r11
  46893. mov r11, QWORD PTR [rdx+232]
  46894. mov r8, QWORD PTR [rsp+232]
  46895. sbb r11, r8
  46896. mov QWORD PTR [rcx+224], r10
  46897. mov r10, QWORD PTR [rdx+240]
  46898. mov r8, QWORD PTR [rsp+240]
  46899. sbb r10, r8
  46900. mov QWORD PTR [rcx+232], r11
  46901. mov r11, QWORD PTR [rdx+248]
  46902. mov r8, QWORD PTR [rsp+248]
  46903. sbb r11, r8
  46904. mov QWORD PTR [rcx+240], r10
  46905. mov r10, QWORD PTR [rdx+256]
  46906. mov r8, QWORD PTR [rsp+256]
  46907. sbb r10, r8
  46908. mov QWORD PTR [rcx+248], r11
  46909. mov r11, QWORD PTR [rdx+264]
  46910. mov r8, QWORD PTR [rsp+264]
  46911. sbb r11, r8
  46912. mov QWORD PTR [rcx+256], r10
  46913. mov r10, QWORD PTR [rdx+272]
  46914. mov r8, QWORD PTR [rsp+272]
  46915. sbb r10, r8
  46916. mov QWORD PTR [rcx+264], r11
  46917. mov r11, QWORD PTR [rdx+280]
  46918. mov r8, QWORD PTR [rsp+280]
  46919. sbb r11, r8
  46920. mov QWORD PTR [rcx+272], r10
  46921. mov r10, QWORD PTR [rdx+288]
  46922. mov r8, QWORD PTR [rsp+288]
  46923. sbb r10, r8
  46924. mov QWORD PTR [rcx+280], r11
  46925. mov r11, QWORD PTR [rdx+296]
  46926. mov r8, QWORD PTR [rsp+296]
  46927. sbb r11, r8
  46928. mov QWORD PTR [rcx+288], r10
  46929. mov r10, QWORD PTR [rdx+304]
  46930. mov r8, QWORD PTR [rsp+304]
  46931. sbb r10, r8
  46932. mov QWORD PTR [rcx+296], r11
  46933. mov r11, QWORD PTR [rdx+312]
  46934. mov r8, QWORD PTR [rsp+312]
  46935. sbb r11, r8
  46936. mov QWORD PTR [rcx+304], r10
  46937. mov r10, QWORD PTR [rdx+320]
  46938. mov r8, QWORD PTR [rsp+320]
  46939. sbb r10, r8
  46940. mov QWORD PTR [rcx+312], r11
  46941. mov r11, QWORD PTR [rdx+328]
  46942. mov r8, QWORD PTR [rsp+328]
  46943. sbb r11, r8
  46944. mov QWORD PTR [rcx+320], r10
  46945. mov r10, QWORD PTR [rdx+336]
  46946. mov r8, QWORD PTR [rsp+336]
  46947. sbb r10, r8
  46948. mov QWORD PTR [rcx+328], r11
  46949. mov r11, QWORD PTR [rdx+344]
  46950. mov r8, QWORD PTR [rsp+344]
  46951. sbb r11, r8
  46952. mov QWORD PTR [rcx+336], r10
  46953. mov r10, QWORD PTR [rdx+352]
  46954. mov r8, QWORD PTR [rsp+352]
  46955. sbb r10, r8
  46956. mov QWORD PTR [rcx+344], r11
  46957. mov r11, QWORD PTR [rdx+360]
  46958. mov r8, QWORD PTR [rsp+360]
  46959. sbb r11, r8
  46960. mov QWORD PTR [rcx+352], r10
  46961. mov r10, QWORD PTR [rdx+368]
  46962. mov r8, QWORD PTR [rsp+368]
  46963. sbb r10, r8
  46964. mov QWORD PTR [rcx+360], r11
  46965. mov r11, QWORD PTR [rdx+376]
  46966. mov r8, QWORD PTR [rsp+376]
  46967. sbb r11, r8
  46968. mov QWORD PTR [rcx+368], r10
  46969. mov r10, QWORD PTR [rdx+384]
  46970. mov r8, QWORD PTR [rsp+384]
  46971. sbb r10, r8
  46972. mov QWORD PTR [rcx+376], r11
  46973. mov r11, QWORD PTR [rdx+392]
  46974. mov r8, QWORD PTR [rsp+392]
  46975. sbb r11, r8
  46976. mov QWORD PTR [rcx+384], r10
  46977. mov r10, QWORD PTR [rdx+400]
  46978. mov r8, QWORD PTR [rsp+400]
  46979. sbb r10, r8
  46980. mov QWORD PTR [rcx+392], r11
  46981. mov r11, QWORD PTR [rdx+408]
  46982. mov r8, QWORD PTR [rsp+408]
  46983. sbb r11, r8
  46984. mov QWORD PTR [rcx+400], r10
  46985. mov r10, QWORD PTR [rdx+416]
  46986. mov r8, QWORD PTR [rsp+416]
  46987. sbb r10, r8
  46988. mov QWORD PTR [rcx+408], r11
  46989. mov r11, QWORD PTR [rdx+424]
  46990. mov r8, QWORD PTR [rsp+424]
  46991. sbb r11, r8
  46992. mov QWORD PTR [rcx+416], r10
  46993. mov r10, QWORD PTR [rdx+432]
  46994. mov r8, QWORD PTR [rsp+432]
  46995. sbb r10, r8
  46996. mov QWORD PTR [rcx+424], r11
  46997. mov r11, QWORD PTR [rdx+440]
  46998. mov r8, QWORD PTR [rsp+440]
  46999. sbb r11, r8
  47000. mov QWORD PTR [rcx+432], r10
  47001. mov r10, QWORD PTR [rdx+448]
  47002. mov r8, QWORD PTR [rsp+448]
  47003. sbb r10, r8
  47004. mov QWORD PTR [rcx+440], r11
  47005. mov r11, QWORD PTR [rdx+456]
  47006. mov r8, QWORD PTR [rsp+456]
  47007. sbb r11, r8
  47008. mov QWORD PTR [rcx+448], r10
  47009. mov r10, QWORD PTR [rdx+464]
  47010. mov r8, QWORD PTR [rsp+464]
  47011. sbb r10, r8
  47012. mov QWORD PTR [rcx+456], r11
  47013. mov r11, QWORD PTR [rdx+472]
  47014. mov r8, QWORD PTR [rsp+472]
  47015. sbb r11, r8
  47016. mov QWORD PTR [rcx+464], r10
  47017. mov r10, QWORD PTR [rdx+480]
  47018. mov r8, QWORD PTR [rsp+480]
  47019. sbb r10, r8
  47020. mov QWORD PTR [rcx+472], r11
  47021. mov r11, QWORD PTR [rdx+488]
  47022. mov r8, QWORD PTR [rsp+488]
  47023. sbb r11, r8
  47024. mov QWORD PTR [rcx+480], r10
  47025. mov r10, QWORD PTR [rdx+496]
  47026. mov r8, QWORD PTR [rsp+496]
  47027. sbb r10, r8
  47028. mov QWORD PTR [rcx+488], r11
  47029. mov r11, QWORD PTR [rdx+504]
  47030. mov r8, QWORD PTR [rsp+504]
  47031. sbb r11, r8
  47032. mov QWORD PTR [rcx+496], r10
  47033. mov QWORD PTR [rcx+504], r11
  47034. sbb rax, rax
  47035. add rsp, 512
  47036. ret
  47037. sp_4096_cond_sub_64 ENDP
  47038. _text ENDS
  47039. ; /* Reduce the number back to 4096 bits using Montgomery reduction.
  47040. ; *
  47041. ; * a A single precision number to reduce in place.
  47042. ; * m The single precision number representing the modulus.
  47043. ; * mp The digit representing the negative inverse of m mod 2^n.
  47044. ; */
  47045. _text SEGMENT READONLY PARA
  47046. sp_4096_mont_reduce_64 PROC
  47047. push r12
  47048. push r13
  47049. push r14
  47050. push r15
  47051. push rdi
  47052. push rsi
  47053. mov r9, rdx
  47054. xor rsi, rsi
  47055. ; i = 64
  47056. mov r10, 64
  47057. mov r15, QWORD PTR [rcx]
  47058. mov rdi, QWORD PTR [rcx+8]
  47059. L_4096_mont_reduce_64_loop:
  47060. ; mu = a[i] * mp
  47061. mov r13, r15
  47062. imul r13, r8
  47063. ; a[i+0] += m[0] * mu
  47064. mov rax, r13
  47065. xor r12, r12
  47066. mul QWORD PTR [r9]
  47067. add r15, rax
  47068. adc r12, rdx
  47069. ; a[i+1] += m[1] * mu
  47070. mov rax, r13
  47071. xor r11, r11
  47072. mul QWORD PTR [r9+8]
  47073. mov r15, rdi
  47074. add r15, rax
  47075. adc r11, rdx
  47076. add r15, r12
  47077. adc r11, 0
  47078. ; a[i+2] += m[2] * mu
  47079. mov rax, r13
  47080. xor r12, r12
  47081. mul QWORD PTR [r9+16]
  47082. mov rdi, QWORD PTR [rcx+16]
  47083. add rdi, rax
  47084. adc r12, rdx
  47085. add rdi, r11
  47086. adc r12, 0
  47087. ; a[i+3] += m[3] * mu
  47088. mov rax, r13
  47089. xor r11, r11
  47090. mul QWORD PTR [r9+24]
  47091. mov r14, QWORD PTR [rcx+24]
  47092. add r14, rax
  47093. adc r11, rdx
  47094. add r14, r12
  47095. mov QWORD PTR [rcx+24], r14
  47096. adc r11, 0
  47097. ; a[i+4] += m[4] * mu
  47098. mov rax, r13
  47099. xor r12, r12
  47100. mul QWORD PTR [r9+32]
  47101. mov r14, QWORD PTR [rcx+32]
  47102. add r14, rax
  47103. adc r12, rdx
  47104. add r14, r11
  47105. mov QWORD PTR [rcx+32], r14
  47106. adc r12, 0
  47107. ; a[i+5] += m[5] * mu
  47108. mov rax, r13
  47109. xor r11, r11
  47110. mul QWORD PTR [r9+40]
  47111. mov r14, QWORD PTR [rcx+40]
  47112. add r14, rax
  47113. adc r11, rdx
  47114. add r14, r12
  47115. mov QWORD PTR [rcx+40], r14
  47116. adc r11, 0
  47117. ; a[i+6] += m[6] * mu
  47118. mov rax, r13
  47119. xor r12, r12
  47120. mul QWORD PTR [r9+48]
  47121. mov r14, QWORD PTR [rcx+48]
  47122. add r14, rax
  47123. adc r12, rdx
  47124. add r14, r11
  47125. mov QWORD PTR [rcx+48], r14
  47126. adc r12, 0
  47127. ; a[i+7] += m[7] * mu
  47128. mov rax, r13
  47129. xor r11, r11
  47130. mul QWORD PTR [r9+56]
  47131. mov r14, QWORD PTR [rcx+56]
  47132. add r14, rax
  47133. adc r11, rdx
  47134. add r14, r12
  47135. mov QWORD PTR [rcx+56], r14
  47136. adc r11, 0
  47137. ; a[i+8] += m[8] * mu
  47138. mov rax, r13
  47139. xor r12, r12
  47140. mul QWORD PTR [r9+64]
  47141. mov r14, QWORD PTR [rcx+64]
  47142. add r14, rax
  47143. adc r12, rdx
  47144. add r14, r11
  47145. mov QWORD PTR [rcx+64], r14
  47146. adc r12, 0
  47147. ; a[i+9] += m[9] * mu
  47148. mov rax, r13
  47149. xor r11, r11
  47150. mul QWORD PTR [r9+72]
  47151. mov r14, QWORD PTR [rcx+72]
  47152. add r14, rax
  47153. adc r11, rdx
  47154. add r14, r12
  47155. mov QWORD PTR [rcx+72], r14
  47156. adc r11, 0
  47157. ; a[i+10] += m[10] * mu
  47158. mov rax, r13
  47159. xor r12, r12
  47160. mul QWORD PTR [r9+80]
  47161. mov r14, QWORD PTR [rcx+80]
  47162. add r14, rax
  47163. adc r12, rdx
  47164. add r14, r11
  47165. mov QWORD PTR [rcx+80], r14
  47166. adc r12, 0
  47167. ; a[i+11] += m[11] * mu
  47168. mov rax, r13
  47169. xor r11, r11
  47170. mul QWORD PTR [r9+88]
  47171. mov r14, QWORD PTR [rcx+88]
  47172. add r14, rax
  47173. adc r11, rdx
  47174. add r14, r12
  47175. mov QWORD PTR [rcx+88], r14
  47176. adc r11, 0
  47177. ; a[i+12] += m[12] * mu
  47178. mov rax, r13
  47179. xor r12, r12
  47180. mul QWORD PTR [r9+96]
  47181. mov r14, QWORD PTR [rcx+96]
  47182. add r14, rax
  47183. adc r12, rdx
  47184. add r14, r11
  47185. mov QWORD PTR [rcx+96], r14
  47186. adc r12, 0
  47187. ; a[i+13] += m[13] * mu
  47188. mov rax, r13
  47189. xor r11, r11
  47190. mul QWORD PTR [r9+104]
  47191. mov r14, QWORD PTR [rcx+104]
  47192. add r14, rax
  47193. adc r11, rdx
  47194. add r14, r12
  47195. mov QWORD PTR [rcx+104], r14
  47196. adc r11, 0
  47197. ; a[i+14] += m[14] * mu
  47198. mov rax, r13
  47199. xor r12, r12
  47200. mul QWORD PTR [r9+112]
  47201. mov r14, QWORD PTR [rcx+112]
  47202. add r14, rax
  47203. adc r12, rdx
  47204. add r14, r11
  47205. mov QWORD PTR [rcx+112], r14
  47206. adc r12, 0
  47207. ; a[i+15] += m[15] * mu
  47208. mov rax, r13
  47209. xor r11, r11
  47210. mul QWORD PTR [r9+120]
  47211. mov r14, QWORD PTR [rcx+120]
  47212. add r14, rax
  47213. adc r11, rdx
  47214. add r14, r12
  47215. mov QWORD PTR [rcx+120], r14
  47216. adc r11, 0
  47217. ; a[i+16] += m[16] * mu
  47218. mov rax, r13
  47219. xor r12, r12
  47220. mul QWORD PTR [r9+128]
  47221. mov r14, QWORD PTR [rcx+128]
  47222. add r14, rax
  47223. adc r12, rdx
  47224. add r14, r11
  47225. mov QWORD PTR [rcx+128], r14
  47226. adc r12, 0
  47227. ; a[i+17] += m[17] * mu
  47228. mov rax, r13
  47229. xor r11, r11
  47230. mul QWORD PTR [r9+136]
  47231. mov r14, QWORD PTR [rcx+136]
  47232. add r14, rax
  47233. adc r11, rdx
  47234. add r14, r12
  47235. mov QWORD PTR [rcx+136], r14
  47236. adc r11, 0
  47237. ; a[i+18] += m[18] * mu
  47238. mov rax, r13
  47239. xor r12, r12
  47240. mul QWORD PTR [r9+144]
  47241. mov r14, QWORD PTR [rcx+144]
  47242. add r14, rax
  47243. adc r12, rdx
  47244. add r14, r11
  47245. mov QWORD PTR [rcx+144], r14
  47246. adc r12, 0
  47247. ; a[i+19] += m[19] * mu
  47248. mov rax, r13
  47249. xor r11, r11
  47250. mul QWORD PTR [r9+152]
  47251. mov r14, QWORD PTR [rcx+152]
  47252. add r14, rax
  47253. adc r11, rdx
  47254. add r14, r12
  47255. mov QWORD PTR [rcx+152], r14
  47256. adc r11, 0
  47257. ; a[i+20] += m[20] * mu
  47258. mov rax, r13
  47259. xor r12, r12
  47260. mul QWORD PTR [r9+160]
  47261. mov r14, QWORD PTR [rcx+160]
  47262. add r14, rax
  47263. adc r12, rdx
  47264. add r14, r11
  47265. mov QWORD PTR [rcx+160], r14
  47266. adc r12, 0
  47267. ; a[i+21] += m[21] * mu
  47268. mov rax, r13
  47269. xor r11, r11
  47270. mul QWORD PTR [r9+168]
  47271. mov r14, QWORD PTR [rcx+168]
  47272. add r14, rax
  47273. adc r11, rdx
  47274. add r14, r12
  47275. mov QWORD PTR [rcx+168], r14
  47276. adc r11, 0
  47277. ; a[i+22] += m[22] * mu
  47278. mov rax, r13
  47279. xor r12, r12
  47280. mul QWORD PTR [r9+176]
  47281. mov r14, QWORD PTR [rcx+176]
  47282. add r14, rax
  47283. adc r12, rdx
  47284. add r14, r11
  47285. mov QWORD PTR [rcx+176], r14
  47286. adc r12, 0
  47287. ; a[i+23] += m[23] * mu
  47288. mov rax, r13
  47289. xor r11, r11
  47290. mul QWORD PTR [r9+184]
  47291. mov r14, QWORD PTR [rcx+184]
  47292. add r14, rax
  47293. adc r11, rdx
  47294. add r14, r12
  47295. mov QWORD PTR [rcx+184], r14
  47296. adc r11, 0
  47297. ; a[i+24] += m[24] * mu
  47298. mov rax, r13
  47299. xor r12, r12
  47300. mul QWORD PTR [r9+192]
  47301. mov r14, QWORD PTR [rcx+192]
  47302. add r14, rax
  47303. adc r12, rdx
  47304. add r14, r11
  47305. mov QWORD PTR [rcx+192], r14
  47306. adc r12, 0
  47307. ; a[i+25] += m[25] * mu
  47308. mov rax, r13
  47309. xor r11, r11
  47310. mul QWORD PTR [r9+200]
  47311. mov r14, QWORD PTR [rcx+200]
  47312. add r14, rax
  47313. adc r11, rdx
  47314. add r14, r12
  47315. mov QWORD PTR [rcx+200], r14
  47316. adc r11, 0
  47317. ; a[i+26] += m[26] * mu
  47318. mov rax, r13
  47319. xor r12, r12
  47320. mul QWORD PTR [r9+208]
  47321. mov r14, QWORD PTR [rcx+208]
  47322. add r14, rax
  47323. adc r12, rdx
  47324. add r14, r11
  47325. mov QWORD PTR [rcx+208], r14
  47326. adc r12, 0
  47327. ; a[i+27] += m[27] * mu
  47328. mov rax, r13
  47329. xor r11, r11
  47330. mul QWORD PTR [r9+216]
  47331. mov r14, QWORD PTR [rcx+216]
  47332. add r14, rax
  47333. adc r11, rdx
  47334. add r14, r12
  47335. mov QWORD PTR [rcx+216], r14
  47336. adc r11, 0
  47337. ; a[i+28] += m[28] * mu
  47338. mov rax, r13
  47339. xor r12, r12
  47340. mul QWORD PTR [r9+224]
  47341. mov r14, QWORD PTR [rcx+224]
  47342. add r14, rax
  47343. adc r12, rdx
  47344. add r14, r11
  47345. mov QWORD PTR [rcx+224], r14
  47346. adc r12, 0
  47347. ; a[i+29] += m[29] * mu
  47348. mov rax, r13
  47349. xor r11, r11
  47350. mul QWORD PTR [r9+232]
  47351. mov r14, QWORD PTR [rcx+232]
  47352. add r14, rax
  47353. adc r11, rdx
  47354. add r14, r12
  47355. mov QWORD PTR [rcx+232], r14
  47356. adc r11, 0
  47357. ; a[i+30] += m[30] * mu
  47358. mov rax, r13
  47359. xor r12, r12
  47360. mul QWORD PTR [r9+240]
  47361. mov r14, QWORD PTR [rcx+240]
  47362. add r14, rax
  47363. adc r12, rdx
  47364. add r14, r11
  47365. mov QWORD PTR [rcx+240], r14
  47366. adc r12, 0
  47367. ; a[i+31] += m[31] * mu
  47368. mov rax, r13
  47369. xor r11, r11
  47370. mul QWORD PTR [r9+248]
  47371. mov r14, QWORD PTR [rcx+248]
  47372. add r14, rax
  47373. adc r11, rdx
  47374. add r14, r12
  47375. mov QWORD PTR [rcx+248], r14
  47376. adc r11, 0
  47377. ; a[i+32] += m[32] * mu
  47378. mov rax, r13
  47379. xor r12, r12
  47380. mul QWORD PTR [r9+256]
  47381. mov r14, QWORD PTR [rcx+256]
  47382. add r14, rax
  47383. adc r12, rdx
  47384. add r14, r11
  47385. mov QWORD PTR [rcx+256], r14
  47386. adc r12, 0
  47387. ; a[i+33] += m[33] * mu
  47388. mov rax, r13
  47389. xor r11, r11
  47390. mul QWORD PTR [r9+264]
  47391. mov r14, QWORD PTR [rcx+264]
  47392. add r14, rax
  47393. adc r11, rdx
  47394. add r14, r12
  47395. mov QWORD PTR [rcx+264], r14
  47396. adc r11, 0
  47397. ; a[i+34] += m[34] * mu
  47398. mov rax, r13
  47399. xor r12, r12
  47400. mul QWORD PTR [r9+272]
  47401. mov r14, QWORD PTR [rcx+272]
  47402. add r14, rax
  47403. adc r12, rdx
  47404. add r14, r11
  47405. mov QWORD PTR [rcx+272], r14
  47406. adc r12, 0
  47407. ; a[i+35] += m[35] * mu
  47408. mov rax, r13
  47409. xor r11, r11
  47410. mul QWORD PTR [r9+280]
  47411. mov r14, QWORD PTR [rcx+280]
  47412. add r14, rax
  47413. adc r11, rdx
  47414. add r14, r12
  47415. mov QWORD PTR [rcx+280], r14
  47416. adc r11, 0
  47417. ; a[i+36] += m[36] * mu
  47418. mov rax, r13
  47419. xor r12, r12
  47420. mul QWORD PTR [r9+288]
  47421. mov r14, QWORD PTR [rcx+288]
  47422. add r14, rax
  47423. adc r12, rdx
  47424. add r14, r11
  47425. mov QWORD PTR [rcx+288], r14
  47426. adc r12, 0
  47427. ; a[i+37] += m[37] * mu
  47428. mov rax, r13
  47429. xor r11, r11
  47430. mul QWORD PTR [r9+296]
  47431. mov r14, QWORD PTR [rcx+296]
  47432. add r14, rax
  47433. adc r11, rdx
  47434. add r14, r12
  47435. mov QWORD PTR [rcx+296], r14
  47436. adc r11, 0
  47437. ; a[i+38] += m[38] * mu
  47438. mov rax, r13
  47439. xor r12, r12
  47440. mul QWORD PTR [r9+304]
  47441. mov r14, QWORD PTR [rcx+304]
  47442. add r14, rax
  47443. adc r12, rdx
  47444. add r14, r11
  47445. mov QWORD PTR [rcx+304], r14
  47446. adc r12, 0
  47447. ; a[i+39] += m[39] * mu
  47448. mov rax, r13
  47449. xor r11, r11
  47450. mul QWORD PTR [r9+312]
  47451. mov r14, QWORD PTR [rcx+312]
  47452. add r14, rax
  47453. adc r11, rdx
  47454. add r14, r12
  47455. mov QWORD PTR [rcx+312], r14
  47456. adc r11, 0
  47457. ; a[i+40] += m[40] * mu
  47458. mov rax, r13
  47459. xor r12, r12
  47460. mul QWORD PTR [r9+320]
  47461. mov r14, QWORD PTR [rcx+320]
  47462. add r14, rax
  47463. adc r12, rdx
  47464. add r14, r11
  47465. mov QWORD PTR [rcx+320], r14
  47466. adc r12, 0
  47467. ; a[i+41] += m[41] * mu
  47468. mov rax, r13
  47469. xor r11, r11
  47470. mul QWORD PTR [r9+328]
  47471. mov r14, QWORD PTR [rcx+328]
  47472. add r14, rax
  47473. adc r11, rdx
  47474. add r14, r12
  47475. mov QWORD PTR [rcx+328], r14
  47476. adc r11, 0
  47477. ; a[i+42] += m[42] * mu
  47478. mov rax, r13
  47479. xor r12, r12
  47480. mul QWORD PTR [r9+336]
  47481. mov r14, QWORD PTR [rcx+336]
  47482. add r14, rax
  47483. adc r12, rdx
  47484. add r14, r11
  47485. mov QWORD PTR [rcx+336], r14
  47486. adc r12, 0
  47487. ; a[i+43] += m[43] * mu
  47488. mov rax, r13
  47489. xor r11, r11
  47490. mul QWORD PTR [r9+344]
  47491. mov r14, QWORD PTR [rcx+344]
  47492. add r14, rax
  47493. adc r11, rdx
  47494. add r14, r12
  47495. mov QWORD PTR [rcx+344], r14
  47496. adc r11, 0
  47497. ; a[i+44] += m[44] * mu
  47498. mov rax, r13
  47499. xor r12, r12
  47500. mul QWORD PTR [r9+352]
  47501. mov r14, QWORD PTR [rcx+352]
  47502. add r14, rax
  47503. adc r12, rdx
  47504. add r14, r11
  47505. mov QWORD PTR [rcx+352], r14
  47506. adc r12, 0
  47507. ; a[i+45] += m[45] * mu
  47508. mov rax, r13
  47509. xor r11, r11
  47510. mul QWORD PTR [r9+360]
  47511. mov r14, QWORD PTR [rcx+360]
  47512. add r14, rax
  47513. adc r11, rdx
  47514. add r14, r12
  47515. mov QWORD PTR [rcx+360], r14
  47516. adc r11, 0
  47517. ; a[i+46] += m[46] * mu
  47518. mov rax, r13
  47519. xor r12, r12
  47520. mul QWORD PTR [r9+368]
  47521. mov r14, QWORD PTR [rcx+368]
  47522. add r14, rax
  47523. adc r12, rdx
  47524. add r14, r11
  47525. mov QWORD PTR [rcx+368], r14
  47526. adc r12, 0
  47527. ; a[i+47] += m[47] * mu
  47528. mov rax, r13
  47529. xor r11, r11
  47530. mul QWORD PTR [r9+376]
  47531. mov r14, QWORD PTR [rcx+376]
  47532. add r14, rax
  47533. adc r11, rdx
  47534. add r14, r12
  47535. mov QWORD PTR [rcx+376], r14
  47536. adc r11, 0
  47537. ; a[i+48] += m[48] * mu
  47538. mov rax, r13
  47539. xor r12, r12
  47540. mul QWORD PTR [r9+384]
  47541. mov r14, QWORD PTR [rcx+384]
  47542. add r14, rax
  47543. adc r12, rdx
  47544. add r14, r11
  47545. mov QWORD PTR [rcx+384], r14
  47546. adc r12, 0
  47547. ; a[i+49] += m[49] * mu
  47548. mov rax, r13
  47549. xor r11, r11
  47550. mul QWORD PTR [r9+392]
  47551. mov r14, QWORD PTR [rcx+392]
  47552. add r14, rax
  47553. adc r11, rdx
  47554. add r14, r12
  47555. mov QWORD PTR [rcx+392], r14
  47556. adc r11, 0
  47557. ; a[i+50] += m[50] * mu
  47558. mov rax, r13
  47559. xor r12, r12
  47560. mul QWORD PTR [r9+400]
  47561. mov r14, QWORD PTR [rcx+400]
  47562. add r14, rax
  47563. adc r12, rdx
  47564. add r14, r11
  47565. mov QWORD PTR [rcx+400], r14
  47566. adc r12, 0
  47567. ; a[i+51] += m[51] * mu
  47568. mov rax, r13
  47569. xor r11, r11
  47570. mul QWORD PTR [r9+408]
  47571. mov r14, QWORD PTR [rcx+408]
  47572. add r14, rax
  47573. adc r11, rdx
  47574. add r14, r12
  47575. mov QWORD PTR [rcx+408], r14
  47576. adc r11, 0
  47577. ; a[i+52] += m[52] * mu
  47578. mov rax, r13
  47579. xor r12, r12
  47580. mul QWORD PTR [r9+416]
  47581. mov r14, QWORD PTR [rcx+416]
  47582. add r14, rax
  47583. adc r12, rdx
  47584. add r14, r11
  47585. mov QWORD PTR [rcx+416], r14
  47586. adc r12, 0
  47587. ; a[i+53] += m[53] * mu
  47588. mov rax, r13
  47589. xor r11, r11
  47590. mul QWORD PTR [r9+424]
  47591. mov r14, QWORD PTR [rcx+424]
  47592. add r14, rax
  47593. adc r11, rdx
  47594. add r14, r12
  47595. mov QWORD PTR [rcx+424], r14
  47596. adc r11, 0
  47597. ; a[i+54] += m[54] * mu
  47598. mov rax, r13
  47599. xor r12, r12
  47600. mul QWORD PTR [r9+432]
  47601. mov r14, QWORD PTR [rcx+432]
  47602. add r14, rax
  47603. adc r12, rdx
  47604. add r14, r11
  47605. mov QWORD PTR [rcx+432], r14
  47606. adc r12, 0
  47607. ; a[i+55] += m[55] * mu
  47608. mov rax, r13
  47609. xor r11, r11
  47610. mul QWORD PTR [r9+440]
  47611. mov r14, QWORD PTR [rcx+440]
  47612. add r14, rax
  47613. adc r11, rdx
  47614. add r14, r12
  47615. mov QWORD PTR [rcx+440], r14
  47616. adc r11, 0
  47617. ; a[i+56] += m[56] * mu
  47618. mov rax, r13
  47619. xor r12, r12
  47620. mul QWORD PTR [r9+448]
  47621. mov r14, QWORD PTR [rcx+448]
  47622. add r14, rax
  47623. adc r12, rdx
  47624. add r14, r11
  47625. mov QWORD PTR [rcx+448], r14
  47626. adc r12, 0
  47627. ; a[i+57] += m[57] * mu
  47628. mov rax, r13
  47629. xor r11, r11
  47630. mul QWORD PTR [r9+456]
  47631. mov r14, QWORD PTR [rcx+456]
  47632. add r14, rax
  47633. adc r11, rdx
  47634. add r14, r12
  47635. mov QWORD PTR [rcx+456], r14
  47636. adc r11, 0
  47637. ; a[i+58] += m[58] * mu
  47638. mov rax, r13
  47639. xor r12, r12
  47640. mul QWORD PTR [r9+464]
  47641. mov r14, QWORD PTR [rcx+464]
  47642. add r14, rax
  47643. adc r12, rdx
  47644. add r14, r11
  47645. mov QWORD PTR [rcx+464], r14
  47646. adc r12, 0
  47647. ; a[i+59] += m[59] * mu
  47648. mov rax, r13
  47649. xor r11, r11
  47650. mul QWORD PTR [r9+472]
  47651. mov r14, QWORD PTR [rcx+472]
  47652. add r14, rax
  47653. adc r11, rdx
  47654. add r14, r12
  47655. mov QWORD PTR [rcx+472], r14
  47656. adc r11, 0
  47657. ; a[i+60] += m[60] * mu
  47658. mov rax, r13
  47659. xor r12, r12
  47660. mul QWORD PTR [r9+480]
  47661. mov r14, QWORD PTR [rcx+480]
  47662. add r14, rax
  47663. adc r12, rdx
  47664. add r14, r11
  47665. mov QWORD PTR [rcx+480], r14
  47666. adc r12, 0
  47667. ; a[i+61] += m[61] * mu
  47668. mov rax, r13
  47669. xor r11, r11
  47670. mul QWORD PTR [r9+488]
  47671. mov r14, QWORD PTR [rcx+488]
  47672. add r14, rax
  47673. adc r11, rdx
  47674. add r14, r12
  47675. mov QWORD PTR [rcx+488], r14
  47676. adc r11, 0
  47677. ; a[i+62] += m[62] * mu
  47678. mov rax, r13
  47679. xor r12, r12
  47680. mul QWORD PTR [r9+496]
  47681. mov r14, QWORD PTR [rcx+496]
  47682. add r14, rax
  47683. adc r12, rdx
  47684. add r14, r11
  47685. mov QWORD PTR [rcx+496], r14
  47686. adc r12, 0
  47687. ; a[i+63] += m[63] * mu
  47688. mov rax, r13
  47689. mul QWORD PTR [r9+504]
  47690. mov r14, QWORD PTR [rcx+504]
  47691. add r12, rax
  47692. adc rdx, rsi
  47693. mov rsi, 0
  47694. adc rsi, 0
  47695. add r14, r12
  47696. mov QWORD PTR [rcx+504], r14
  47697. adc QWORD PTR [rcx+512], rdx
  47698. adc rsi, 0
  47699. ; i -= 1
  47700. add rcx, 8
  47701. dec r10
  47702. jnz L_4096_mont_reduce_64_loop
  47703. mov QWORD PTR [rcx], r15
  47704. mov QWORD PTR [rcx+8], rdi
  47705. neg rsi
  47706. IFDEF _WIN64
  47707. mov r8, r9
  47708. mov r9, rsi
  47709. ELSE
  47710. mov r9, rsi
  47711. mov r8, r9
  47712. ENDIF
  47713. mov rdx, rcx
  47714. mov rcx, rcx
  47715. sub rcx, 512
  47716. call sp_4096_cond_sub_64
  47717. pop rsi
  47718. pop rdi
  47719. pop r15
  47720. pop r14
  47721. pop r13
  47722. pop r12
  47723. ret
  47724. sp_4096_mont_reduce_64 ENDP
  47725. _text ENDS
  47726. ; /* Sub b from a into r. (r = a - b)
  47727. ; *
  47728. ; * r A single precision integer.
  47729. ; * a A single precision integer.
  47730. ; * b A single precision integer.
  47731. ; */
  47732. _text SEGMENT READONLY PARA
  47733. sp_4096_sub_64 PROC
  47734. mov r9, QWORD PTR [rdx]
  47735. sub r9, QWORD PTR [r8]
  47736. mov r10, QWORD PTR [rdx+8]
  47737. mov QWORD PTR [rcx], r9
  47738. sbb r10, QWORD PTR [r8+8]
  47739. mov r9, QWORD PTR [rdx+16]
  47740. mov QWORD PTR [rcx+8], r10
  47741. sbb r9, QWORD PTR [r8+16]
  47742. mov r10, QWORD PTR [rdx+24]
  47743. mov QWORD PTR [rcx+16], r9
  47744. sbb r10, QWORD PTR [r8+24]
  47745. mov r9, QWORD PTR [rdx+32]
  47746. mov QWORD PTR [rcx+24], r10
  47747. sbb r9, QWORD PTR [r8+32]
  47748. mov r10, QWORD PTR [rdx+40]
  47749. mov QWORD PTR [rcx+32], r9
  47750. sbb r10, QWORD PTR [r8+40]
  47751. mov r9, QWORD PTR [rdx+48]
  47752. mov QWORD PTR [rcx+40], r10
  47753. sbb r9, QWORD PTR [r8+48]
  47754. mov r10, QWORD PTR [rdx+56]
  47755. mov QWORD PTR [rcx+48], r9
  47756. sbb r10, QWORD PTR [r8+56]
  47757. mov r9, QWORD PTR [rdx+64]
  47758. mov QWORD PTR [rcx+56], r10
  47759. sbb r9, QWORD PTR [r8+64]
  47760. mov r10, QWORD PTR [rdx+72]
  47761. mov QWORD PTR [rcx+64], r9
  47762. sbb r10, QWORD PTR [r8+72]
  47763. mov r9, QWORD PTR [rdx+80]
  47764. mov QWORD PTR [rcx+72], r10
  47765. sbb r9, QWORD PTR [r8+80]
  47766. mov r10, QWORD PTR [rdx+88]
  47767. mov QWORD PTR [rcx+80], r9
  47768. sbb r10, QWORD PTR [r8+88]
  47769. mov r9, QWORD PTR [rdx+96]
  47770. mov QWORD PTR [rcx+88], r10
  47771. sbb r9, QWORD PTR [r8+96]
  47772. mov r10, QWORD PTR [rdx+104]
  47773. mov QWORD PTR [rcx+96], r9
  47774. sbb r10, QWORD PTR [r8+104]
  47775. mov r9, QWORD PTR [rdx+112]
  47776. mov QWORD PTR [rcx+104], r10
  47777. sbb r9, QWORD PTR [r8+112]
  47778. mov r10, QWORD PTR [rdx+120]
  47779. mov QWORD PTR [rcx+112], r9
  47780. sbb r10, QWORD PTR [r8+120]
  47781. mov r9, QWORD PTR [rdx+128]
  47782. mov QWORD PTR [rcx+120], r10
  47783. sbb r9, QWORD PTR [r8+128]
  47784. mov r10, QWORD PTR [rdx+136]
  47785. mov QWORD PTR [rcx+128], r9
  47786. sbb r10, QWORD PTR [r8+136]
  47787. mov r9, QWORD PTR [rdx+144]
  47788. mov QWORD PTR [rcx+136], r10
  47789. sbb r9, QWORD PTR [r8+144]
  47790. mov r10, QWORD PTR [rdx+152]
  47791. mov QWORD PTR [rcx+144], r9
  47792. sbb r10, QWORD PTR [r8+152]
  47793. mov r9, QWORD PTR [rdx+160]
  47794. mov QWORD PTR [rcx+152], r10
  47795. sbb r9, QWORD PTR [r8+160]
  47796. mov r10, QWORD PTR [rdx+168]
  47797. mov QWORD PTR [rcx+160], r9
  47798. sbb r10, QWORD PTR [r8+168]
  47799. mov r9, QWORD PTR [rdx+176]
  47800. mov QWORD PTR [rcx+168], r10
  47801. sbb r9, QWORD PTR [r8+176]
  47802. mov r10, QWORD PTR [rdx+184]
  47803. mov QWORD PTR [rcx+176], r9
  47804. sbb r10, QWORD PTR [r8+184]
  47805. mov r9, QWORD PTR [rdx+192]
  47806. mov QWORD PTR [rcx+184], r10
  47807. sbb r9, QWORD PTR [r8+192]
  47808. mov r10, QWORD PTR [rdx+200]
  47809. mov QWORD PTR [rcx+192], r9
  47810. sbb r10, QWORD PTR [r8+200]
  47811. mov r9, QWORD PTR [rdx+208]
  47812. mov QWORD PTR [rcx+200], r10
  47813. sbb r9, QWORD PTR [r8+208]
  47814. mov r10, QWORD PTR [rdx+216]
  47815. mov QWORD PTR [rcx+208], r9
  47816. sbb r10, QWORD PTR [r8+216]
  47817. mov r9, QWORD PTR [rdx+224]
  47818. mov QWORD PTR [rcx+216], r10
  47819. sbb r9, QWORD PTR [r8+224]
  47820. mov r10, QWORD PTR [rdx+232]
  47821. mov QWORD PTR [rcx+224], r9
  47822. sbb r10, QWORD PTR [r8+232]
  47823. mov r9, QWORD PTR [rdx+240]
  47824. mov QWORD PTR [rcx+232], r10
  47825. sbb r9, QWORD PTR [r8+240]
  47826. mov r10, QWORD PTR [rdx+248]
  47827. mov QWORD PTR [rcx+240], r9
  47828. sbb r10, QWORD PTR [r8+248]
  47829. mov r9, QWORD PTR [rdx+256]
  47830. mov QWORD PTR [rcx+248], r10
  47831. sbb r9, QWORD PTR [r8+256]
  47832. mov r10, QWORD PTR [rdx+264]
  47833. mov QWORD PTR [rcx+256], r9
  47834. sbb r10, QWORD PTR [r8+264]
  47835. mov r9, QWORD PTR [rdx+272]
  47836. mov QWORD PTR [rcx+264], r10
  47837. sbb r9, QWORD PTR [r8+272]
  47838. mov r10, QWORD PTR [rdx+280]
  47839. mov QWORD PTR [rcx+272], r9
  47840. sbb r10, QWORD PTR [r8+280]
  47841. mov r9, QWORD PTR [rdx+288]
  47842. mov QWORD PTR [rcx+280], r10
  47843. sbb r9, QWORD PTR [r8+288]
  47844. mov r10, QWORD PTR [rdx+296]
  47845. mov QWORD PTR [rcx+288], r9
  47846. sbb r10, QWORD PTR [r8+296]
  47847. mov r9, QWORD PTR [rdx+304]
  47848. mov QWORD PTR [rcx+296], r10
  47849. sbb r9, QWORD PTR [r8+304]
  47850. mov r10, QWORD PTR [rdx+312]
  47851. mov QWORD PTR [rcx+304], r9
  47852. sbb r10, QWORD PTR [r8+312]
  47853. mov r9, QWORD PTR [rdx+320]
  47854. mov QWORD PTR [rcx+312], r10
  47855. sbb r9, QWORD PTR [r8+320]
  47856. mov r10, QWORD PTR [rdx+328]
  47857. mov QWORD PTR [rcx+320], r9
  47858. sbb r10, QWORD PTR [r8+328]
  47859. mov r9, QWORD PTR [rdx+336]
  47860. mov QWORD PTR [rcx+328], r10
  47861. sbb r9, QWORD PTR [r8+336]
  47862. mov r10, QWORD PTR [rdx+344]
  47863. mov QWORD PTR [rcx+336], r9
  47864. sbb r10, QWORD PTR [r8+344]
  47865. mov r9, QWORD PTR [rdx+352]
  47866. mov QWORD PTR [rcx+344], r10
  47867. sbb r9, QWORD PTR [r8+352]
  47868. mov r10, QWORD PTR [rdx+360]
  47869. mov QWORD PTR [rcx+352], r9
  47870. sbb r10, QWORD PTR [r8+360]
  47871. mov r9, QWORD PTR [rdx+368]
  47872. mov QWORD PTR [rcx+360], r10
  47873. sbb r9, QWORD PTR [r8+368]
  47874. mov r10, QWORD PTR [rdx+376]
  47875. mov QWORD PTR [rcx+368], r9
  47876. sbb r10, QWORD PTR [r8+376]
  47877. mov r9, QWORD PTR [rdx+384]
  47878. mov QWORD PTR [rcx+376], r10
  47879. sbb r9, QWORD PTR [r8+384]
  47880. mov r10, QWORD PTR [rdx+392]
  47881. mov QWORD PTR [rcx+384], r9
  47882. sbb r10, QWORD PTR [r8+392]
  47883. mov r9, QWORD PTR [rdx+400]
  47884. mov QWORD PTR [rcx+392], r10
  47885. sbb r9, QWORD PTR [r8+400]
  47886. mov r10, QWORD PTR [rdx+408]
  47887. mov QWORD PTR [rcx+400], r9
  47888. sbb r10, QWORD PTR [r8+408]
  47889. mov r9, QWORD PTR [rdx+416]
  47890. mov QWORD PTR [rcx+408], r10
  47891. sbb r9, QWORD PTR [r8+416]
  47892. mov r10, QWORD PTR [rdx+424]
  47893. mov QWORD PTR [rcx+416], r9
  47894. sbb r10, QWORD PTR [r8+424]
  47895. mov r9, QWORD PTR [rdx+432]
  47896. mov QWORD PTR [rcx+424], r10
  47897. sbb r9, QWORD PTR [r8+432]
  47898. mov r10, QWORD PTR [rdx+440]
  47899. mov QWORD PTR [rcx+432], r9
  47900. sbb r10, QWORD PTR [r8+440]
  47901. mov r9, QWORD PTR [rdx+448]
  47902. mov QWORD PTR [rcx+440], r10
  47903. sbb r9, QWORD PTR [r8+448]
  47904. mov r10, QWORD PTR [rdx+456]
  47905. mov QWORD PTR [rcx+448], r9
  47906. sbb r10, QWORD PTR [r8+456]
  47907. mov r9, QWORD PTR [rdx+464]
  47908. mov QWORD PTR [rcx+456], r10
  47909. sbb r9, QWORD PTR [r8+464]
  47910. mov r10, QWORD PTR [rdx+472]
  47911. mov QWORD PTR [rcx+464], r9
  47912. sbb r10, QWORD PTR [r8+472]
  47913. mov r9, QWORD PTR [rdx+480]
  47914. mov QWORD PTR [rcx+472], r10
  47915. sbb r9, QWORD PTR [r8+480]
  47916. mov r10, QWORD PTR [rdx+488]
  47917. mov QWORD PTR [rcx+480], r9
  47918. sbb r10, QWORD PTR [r8+488]
  47919. mov r9, QWORD PTR [rdx+496]
  47920. mov QWORD PTR [rcx+488], r10
  47921. sbb r9, QWORD PTR [r8+496]
  47922. mov r10, QWORD PTR [rdx+504]
  47923. mov QWORD PTR [rcx+496], r9
  47924. sbb r10, QWORD PTR [r8+504]
  47925. mov QWORD PTR [rcx+504], r10
  47926. sbb rax, rax
  47927. ret
  47928. sp_4096_sub_64 ENDP
  47929. _text ENDS
  47930. IFDEF HAVE_INTEL_AVX2
  47931. ; /* Mul a by digit b into r. (r = a * b)
  47932. ; *
  47933. ; * r A single precision integer.
  47934. ; * a A single precision integer.
  47935. ; * b A single precision digit.
  47936. ; */
  47937. _text SEGMENT READONLY PARA
  47938. sp_4096_mul_d_avx2_64 PROC
  47939. push r12
  47940. push r13
  47941. mov rax, rdx
  47942. ; A[0] * B
  47943. mov rdx, r8
  47944. xor r13, r13
  47945. mulx r12, r11, QWORD PTR [rax]
  47946. mov QWORD PTR [rcx], r11
  47947. ; A[1] * B
  47948. mulx r10, r9, QWORD PTR [rax+8]
  47949. mov r11, r13
  47950. adcx r12, r9
  47951. adox r11, r10
  47952. mov QWORD PTR [rcx+8], r12
  47953. ; A[2] * B
  47954. mulx r10, r9, QWORD PTR [rax+16]
  47955. mov r12, r13
  47956. adcx r11, r9
  47957. adox r12, r10
  47958. mov QWORD PTR [rcx+16], r11
  47959. ; A[3] * B
  47960. mulx r10, r9, QWORD PTR [rax+24]
  47961. mov r11, r13
  47962. adcx r12, r9
  47963. adox r11, r10
  47964. mov QWORD PTR [rcx+24], r12
  47965. ; A[4] * B
  47966. mulx r10, r9, QWORD PTR [rax+32]
  47967. mov r12, r13
  47968. adcx r11, r9
  47969. adox r12, r10
  47970. mov QWORD PTR [rcx+32], r11
  47971. ; A[5] * B
  47972. mulx r10, r9, QWORD PTR [rax+40]
  47973. mov r11, r13
  47974. adcx r12, r9
  47975. adox r11, r10
  47976. mov QWORD PTR [rcx+40], r12
  47977. ; A[6] * B
  47978. mulx r10, r9, QWORD PTR [rax+48]
  47979. mov r12, r13
  47980. adcx r11, r9
  47981. adox r12, r10
  47982. mov QWORD PTR [rcx+48], r11
  47983. ; A[7] * B
  47984. mulx r10, r9, QWORD PTR [rax+56]
  47985. mov r11, r13
  47986. adcx r12, r9
  47987. adox r11, r10
  47988. mov QWORD PTR [rcx+56], r12
  47989. ; A[8] * B
  47990. mulx r10, r9, QWORD PTR [rax+64]
  47991. mov r12, r13
  47992. adcx r11, r9
  47993. adox r12, r10
  47994. mov QWORD PTR [rcx+64], r11
  47995. ; A[9] * B
  47996. mulx r10, r9, QWORD PTR [rax+72]
  47997. mov r11, r13
  47998. adcx r12, r9
  47999. adox r11, r10
  48000. mov QWORD PTR [rcx+72], r12
  48001. ; A[10] * B
  48002. mulx r10, r9, QWORD PTR [rax+80]
  48003. mov r12, r13
  48004. adcx r11, r9
  48005. adox r12, r10
  48006. mov QWORD PTR [rcx+80], r11
  48007. ; A[11] * B
  48008. mulx r10, r9, QWORD PTR [rax+88]
  48009. mov r11, r13
  48010. adcx r12, r9
  48011. adox r11, r10
  48012. mov QWORD PTR [rcx+88], r12
  48013. ; A[12] * B
  48014. mulx r10, r9, QWORD PTR [rax+96]
  48015. mov r12, r13
  48016. adcx r11, r9
  48017. adox r12, r10
  48018. mov QWORD PTR [rcx+96], r11
  48019. ; A[13] * B
  48020. mulx r10, r9, QWORD PTR [rax+104]
  48021. mov r11, r13
  48022. adcx r12, r9
  48023. adox r11, r10
  48024. mov QWORD PTR [rcx+104], r12
  48025. ; A[14] * B
  48026. mulx r10, r9, QWORD PTR [rax+112]
  48027. mov r12, r13
  48028. adcx r11, r9
  48029. adox r12, r10
  48030. mov QWORD PTR [rcx+112], r11
  48031. ; A[15] * B
  48032. mulx r10, r9, QWORD PTR [rax+120]
  48033. mov r11, r13
  48034. adcx r12, r9
  48035. adox r11, r10
  48036. mov QWORD PTR [rcx+120], r12
  48037. ; A[16] * B
  48038. mulx r10, r9, QWORD PTR [rax+128]
  48039. mov r12, r13
  48040. adcx r11, r9
  48041. adox r12, r10
  48042. mov QWORD PTR [rcx+128], r11
  48043. ; A[17] * B
  48044. mulx r10, r9, QWORD PTR [rax+136]
  48045. mov r11, r13
  48046. adcx r12, r9
  48047. adox r11, r10
  48048. mov QWORD PTR [rcx+136], r12
  48049. ; A[18] * B
  48050. mulx r10, r9, QWORD PTR [rax+144]
  48051. mov r12, r13
  48052. adcx r11, r9
  48053. adox r12, r10
  48054. mov QWORD PTR [rcx+144], r11
  48055. ; A[19] * B
  48056. mulx r10, r9, QWORD PTR [rax+152]
  48057. mov r11, r13
  48058. adcx r12, r9
  48059. adox r11, r10
  48060. mov QWORD PTR [rcx+152], r12
  48061. ; A[20] * B
  48062. mulx r10, r9, QWORD PTR [rax+160]
  48063. mov r12, r13
  48064. adcx r11, r9
  48065. adox r12, r10
  48066. mov QWORD PTR [rcx+160], r11
  48067. ; A[21] * B
  48068. mulx r10, r9, QWORD PTR [rax+168]
  48069. mov r11, r13
  48070. adcx r12, r9
  48071. adox r11, r10
  48072. mov QWORD PTR [rcx+168], r12
  48073. ; A[22] * B
  48074. mulx r10, r9, QWORD PTR [rax+176]
  48075. mov r12, r13
  48076. adcx r11, r9
  48077. adox r12, r10
  48078. mov QWORD PTR [rcx+176], r11
  48079. ; A[23] * B
  48080. mulx r10, r9, QWORD PTR [rax+184]
  48081. mov r11, r13
  48082. adcx r12, r9
  48083. adox r11, r10
  48084. mov QWORD PTR [rcx+184], r12
  48085. ; A[24] * B
  48086. mulx r10, r9, QWORD PTR [rax+192]
  48087. mov r12, r13
  48088. adcx r11, r9
  48089. adox r12, r10
  48090. mov QWORD PTR [rcx+192], r11
  48091. ; A[25] * B
  48092. mulx r10, r9, QWORD PTR [rax+200]
  48093. mov r11, r13
  48094. adcx r12, r9
  48095. adox r11, r10
  48096. mov QWORD PTR [rcx+200], r12
  48097. ; A[26] * B
  48098. mulx r10, r9, QWORD PTR [rax+208]
  48099. mov r12, r13
  48100. adcx r11, r9
  48101. adox r12, r10
  48102. mov QWORD PTR [rcx+208], r11
  48103. ; A[27] * B
  48104. mulx r10, r9, QWORD PTR [rax+216]
  48105. mov r11, r13
  48106. adcx r12, r9
  48107. adox r11, r10
  48108. mov QWORD PTR [rcx+216], r12
  48109. ; A[28] * B
  48110. mulx r10, r9, QWORD PTR [rax+224]
  48111. mov r12, r13
  48112. adcx r11, r9
  48113. adox r12, r10
  48114. mov QWORD PTR [rcx+224], r11
  48115. ; A[29] * B
  48116. mulx r10, r9, QWORD PTR [rax+232]
  48117. mov r11, r13
  48118. adcx r12, r9
  48119. adox r11, r10
  48120. mov QWORD PTR [rcx+232], r12
  48121. ; A[30] * B
  48122. mulx r10, r9, QWORD PTR [rax+240]
  48123. mov r12, r13
  48124. adcx r11, r9
  48125. adox r12, r10
  48126. mov QWORD PTR [rcx+240], r11
  48127. ; A[31] * B
  48128. mulx r10, r9, QWORD PTR [rax+248]
  48129. mov r11, r13
  48130. adcx r12, r9
  48131. adox r11, r10
  48132. mov QWORD PTR [rcx+248], r12
  48133. ; A[32] * B
  48134. mulx r10, r9, QWORD PTR [rax+256]
  48135. mov r12, r13
  48136. adcx r11, r9
  48137. adox r12, r10
  48138. mov QWORD PTR [rcx+256], r11
  48139. ; A[33] * B
  48140. mulx r10, r9, QWORD PTR [rax+264]
  48141. mov r11, r13
  48142. adcx r12, r9
  48143. adox r11, r10
  48144. mov QWORD PTR [rcx+264], r12
  48145. ; A[34] * B
  48146. mulx r10, r9, QWORD PTR [rax+272]
  48147. mov r12, r13
  48148. adcx r11, r9
  48149. adox r12, r10
  48150. mov QWORD PTR [rcx+272], r11
  48151. ; A[35] * B
  48152. mulx r10, r9, QWORD PTR [rax+280]
  48153. mov r11, r13
  48154. adcx r12, r9
  48155. adox r11, r10
  48156. mov QWORD PTR [rcx+280], r12
  48157. ; A[36] * B
  48158. mulx r10, r9, QWORD PTR [rax+288]
  48159. mov r12, r13
  48160. adcx r11, r9
  48161. adox r12, r10
  48162. mov QWORD PTR [rcx+288], r11
  48163. ; A[37] * B
  48164. mulx r10, r9, QWORD PTR [rax+296]
  48165. mov r11, r13
  48166. adcx r12, r9
  48167. adox r11, r10
  48168. mov QWORD PTR [rcx+296], r12
  48169. ; A[38] * B
  48170. mulx r10, r9, QWORD PTR [rax+304]
  48171. mov r12, r13
  48172. adcx r11, r9
  48173. adox r12, r10
  48174. mov QWORD PTR [rcx+304], r11
  48175. ; A[39] * B
  48176. mulx r10, r9, QWORD PTR [rax+312]
  48177. mov r11, r13
  48178. adcx r12, r9
  48179. adox r11, r10
  48180. mov QWORD PTR [rcx+312], r12
  48181. ; A[40] * B
  48182. mulx r10, r9, QWORD PTR [rax+320]
  48183. mov r12, r13
  48184. adcx r11, r9
  48185. adox r12, r10
  48186. mov QWORD PTR [rcx+320], r11
  48187. ; A[41] * B
  48188. mulx r10, r9, QWORD PTR [rax+328]
  48189. mov r11, r13
  48190. adcx r12, r9
  48191. adox r11, r10
  48192. mov QWORD PTR [rcx+328], r12
  48193. ; A[42] * B
  48194. mulx r10, r9, QWORD PTR [rax+336]
  48195. mov r12, r13
  48196. adcx r11, r9
  48197. adox r12, r10
  48198. mov QWORD PTR [rcx+336], r11
  48199. ; A[43] * B
  48200. mulx r10, r9, QWORD PTR [rax+344]
  48201. mov r11, r13
  48202. adcx r12, r9
  48203. adox r11, r10
  48204. mov QWORD PTR [rcx+344], r12
  48205. ; A[44] * B
  48206. mulx r10, r9, QWORD PTR [rax+352]
  48207. mov r12, r13
  48208. adcx r11, r9
  48209. adox r12, r10
  48210. mov QWORD PTR [rcx+352], r11
  48211. ; A[45] * B
  48212. mulx r10, r9, QWORD PTR [rax+360]
  48213. mov r11, r13
  48214. adcx r12, r9
  48215. adox r11, r10
  48216. mov QWORD PTR [rcx+360], r12
  48217. ; A[46] * B
  48218. mulx r10, r9, QWORD PTR [rax+368]
  48219. mov r12, r13
  48220. adcx r11, r9
  48221. adox r12, r10
  48222. mov QWORD PTR [rcx+368], r11
  48223. ; A[47] * B
  48224. mulx r10, r9, QWORD PTR [rax+376]
  48225. mov r11, r13
  48226. adcx r12, r9
  48227. adox r11, r10
  48228. mov QWORD PTR [rcx+376], r12
  48229. ; A[48] * B
  48230. mulx r10, r9, QWORD PTR [rax+384]
  48231. mov r12, r13
  48232. adcx r11, r9
  48233. adox r12, r10
  48234. mov QWORD PTR [rcx+384], r11
  48235. ; A[49] * B
  48236. mulx r10, r9, QWORD PTR [rax+392]
  48237. mov r11, r13
  48238. adcx r12, r9
  48239. adox r11, r10
  48240. mov QWORD PTR [rcx+392], r12
  48241. ; A[50] * B
  48242. mulx r10, r9, QWORD PTR [rax+400]
  48243. mov r12, r13
  48244. adcx r11, r9
  48245. adox r12, r10
  48246. mov QWORD PTR [rcx+400], r11
  48247. ; A[51] * B
  48248. mulx r10, r9, QWORD PTR [rax+408]
  48249. mov r11, r13
  48250. adcx r12, r9
  48251. adox r11, r10
  48252. mov QWORD PTR [rcx+408], r12
  48253. ; A[52] * B
  48254. mulx r10, r9, QWORD PTR [rax+416]
  48255. mov r12, r13
  48256. adcx r11, r9
  48257. adox r12, r10
  48258. mov QWORD PTR [rcx+416], r11
  48259. ; A[53] * B
  48260. mulx r10, r9, QWORD PTR [rax+424]
  48261. mov r11, r13
  48262. adcx r12, r9
  48263. adox r11, r10
  48264. mov QWORD PTR [rcx+424], r12
  48265. ; A[54] * B
  48266. mulx r10, r9, QWORD PTR [rax+432]
  48267. mov r12, r13
  48268. adcx r11, r9
  48269. adox r12, r10
  48270. mov QWORD PTR [rcx+432], r11
  48271. ; A[55] * B
  48272. mulx r10, r9, QWORD PTR [rax+440]
  48273. mov r11, r13
  48274. adcx r12, r9
  48275. adox r11, r10
  48276. mov QWORD PTR [rcx+440], r12
  48277. ; A[56] * B
  48278. mulx r10, r9, QWORD PTR [rax+448]
  48279. mov r12, r13
  48280. adcx r11, r9
  48281. adox r12, r10
  48282. mov QWORD PTR [rcx+448], r11
  48283. ; A[57] * B
  48284. mulx r10, r9, QWORD PTR [rax+456]
  48285. mov r11, r13
  48286. adcx r12, r9
  48287. adox r11, r10
  48288. mov QWORD PTR [rcx+456], r12
  48289. ; A[58] * B
  48290. mulx r10, r9, QWORD PTR [rax+464]
  48291. mov r12, r13
  48292. adcx r11, r9
  48293. adox r12, r10
  48294. mov QWORD PTR [rcx+464], r11
  48295. ; A[59] * B
  48296. mulx r10, r9, QWORD PTR [rax+472]
  48297. mov r11, r13
  48298. adcx r12, r9
  48299. adox r11, r10
  48300. mov QWORD PTR [rcx+472], r12
  48301. ; A[60] * B
  48302. mulx r10, r9, QWORD PTR [rax+480]
  48303. mov r12, r13
  48304. adcx r11, r9
  48305. adox r12, r10
  48306. mov QWORD PTR [rcx+480], r11
  48307. ; A[61] * B
  48308. mulx r10, r9, QWORD PTR [rax+488]
  48309. mov r11, r13
  48310. adcx r12, r9
  48311. adox r11, r10
  48312. mov QWORD PTR [rcx+488], r12
  48313. ; A[62] * B
  48314. mulx r10, r9, QWORD PTR [rax+496]
  48315. mov r12, r13
  48316. adcx r11, r9
  48317. adox r12, r10
  48318. mov QWORD PTR [rcx+496], r11
  48319. ; A[63] * B
  48320. mulx r10, r9, QWORD PTR [rax+504]
  48321. mov r11, r13
  48322. adcx r12, r9
  48323. adox r11, r10
  48324. adcx r11, r13
  48325. mov QWORD PTR [rcx+504], r12
  48326. mov QWORD PTR [rcx+512], r11
  48327. pop r13
  48328. pop r12
  48329. ret
  48330. sp_4096_mul_d_avx2_64 ENDP
  48331. _text ENDS
  48332. ENDIF
  48333. IFDEF _WIN64
  48334. ; /* Divide the double width number (d1|d0) by the dividend. (d1|d0 / div)
  48335. ; *
  48336. ; * d1 The high order half of the number to divide.
  48337. ; * d0 The low order half of the number to divide.
  48338. ; * div The dividend.
  48339. ; * returns the result of the division.
  48340. ; */
  48341. _text SEGMENT READONLY PARA
  48342. div_4096_word_asm_64 PROC
  48343. mov r9, rdx
  48344. mov rax, r9
  48345. mov rdx, rcx
  48346. div r8
  48347. ret
  48348. div_4096_word_asm_64 ENDP
  48349. _text ENDS
  48350. ENDIF
  48351. IFDEF HAVE_INTEL_AVX2
  48352. ; /* Conditionally subtract b from a using the mask m.
  48353. ; * m is -1 to subtract and 0 when not copying.
  48354. ; *
  48355. ; * r A single precision number representing condition subtract result.
  48356. ; * a A single precision number to subtract from.
  48357. ; * b A single precision number to subtract.
  48358. ; * m Mask value to apply.
  48359. ; */
  48360. _text SEGMENT READONLY PARA
  48361. sp_4096_cond_sub_avx2_64 PROC
  48362. push r12
  48363. mov r12, QWORD PTR [r8]
  48364. mov r10, QWORD PTR [rdx]
  48365. pext r12, r12, r9
  48366. sub r10, r12
  48367. mov r12, QWORD PTR [r8+8]
  48368. mov r11, QWORD PTR [rdx+8]
  48369. pext r12, r12, r9
  48370. mov QWORD PTR [rcx], r10
  48371. sbb r11, r12
  48372. mov r10, QWORD PTR [r8+16]
  48373. mov r12, QWORD PTR [rdx+16]
  48374. pext r10, r10, r9
  48375. mov QWORD PTR [rcx+8], r11
  48376. sbb r12, r10
  48377. mov r11, QWORD PTR [r8+24]
  48378. mov r10, QWORD PTR [rdx+24]
  48379. pext r11, r11, r9
  48380. mov QWORD PTR [rcx+16], r12
  48381. sbb r10, r11
  48382. mov r12, QWORD PTR [r8+32]
  48383. mov r11, QWORD PTR [rdx+32]
  48384. pext r12, r12, r9
  48385. mov QWORD PTR [rcx+24], r10
  48386. sbb r11, r12
  48387. mov r10, QWORD PTR [r8+40]
  48388. mov r12, QWORD PTR [rdx+40]
  48389. pext r10, r10, r9
  48390. mov QWORD PTR [rcx+32], r11
  48391. sbb r12, r10
  48392. mov r11, QWORD PTR [r8+48]
  48393. mov r10, QWORD PTR [rdx+48]
  48394. pext r11, r11, r9
  48395. mov QWORD PTR [rcx+40], r12
  48396. sbb r10, r11
  48397. mov r12, QWORD PTR [r8+56]
  48398. mov r11, QWORD PTR [rdx+56]
  48399. pext r12, r12, r9
  48400. mov QWORD PTR [rcx+48], r10
  48401. sbb r11, r12
  48402. mov r10, QWORD PTR [r8+64]
  48403. mov r12, QWORD PTR [rdx+64]
  48404. pext r10, r10, r9
  48405. mov QWORD PTR [rcx+56], r11
  48406. sbb r12, r10
  48407. mov r11, QWORD PTR [r8+72]
  48408. mov r10, QWORD PTR [rdx+72]
  48409. pext r11, r11, r9
  48410. mov QWORD PTR [rcx+64], r12
  48411. sbb r10, r11
  48412. mov r12, QWORD PTR [r8+80]
  48413. mov r11, QWORD PTR [rdx+80]
  48414. pext r12, r12, r9
  48415. mov QWORD PTR [rcx+72], r10
  48416. sbb r11, r12
  48417. mov r10, QWORD PTR [r8+88]
  48418. mov r12, QWORD PTR [rdx+88]
  48419. pext r10, r10, r9
  48420. mov QWORD PTR [rcx+80], r11
  48421. sbb r12, r10
  48422. mov r11, QWORD PTR [r8+96]
  48423. mov r10, QWORD PTR [rdx+96]
  48424. pext r11, r11, r9
  48425. mov QWORD PTR [rcx+88], r12
  48426. sbb r10, r11
  48427. mov r12, QWORD PTR [r8+104]
  48428. mov r11, QWORD PTR [rdx+104]
  48429. pext r12, r12, r9
  48430. mov QWORD PTR [rcx+96], r10
  48431. sbb r11, r12
  48432. mov r10, QWORD PTR [r8+112]
  48433. mov r12, QWORD PTR [rdx+112]
  48434. pext r10, r10, r9
  48435. mov QWORD PTR [rcx+104], r11
  48436. sbb r12, r10
  48437. mov r11, QWORD PTR [r8+120]
  48438. mov r10, QWORD PTR [rdx+120]
  48439. pext r11, r11, r9
  48440. mov QWORD PTR [rcx+112], r12
  48441. sbb r10, r11
  48442. mov r12, QWORD PTR [r8+128]
  48443. mov r11, QWORD PTR [rdx+128]
  48444. pext r12, r12, r9
  48445. mov QWORD PTR [rcx+120], r10
  48446. sbb r11, r12
  48447. mov r10, QWORD PTR [r8+136]
  48448. mov r12, QWORD PTR [rdx+136]
  48449. pext r10, r10, r9
  48450. mov QWORD PTR [rcx+128], r11
  48451. sbb r12, r10
  48452. mov r11, QWORD PTR [r8+144]
  48453. mov r10, QWORD PTR [rdx+144]
  48454. pext r11, r11, r9
  48455. mov QWORD PTR [rcx+136], r12
  48456. sbb r10, r11
  48457. mov r12, QWORD PTR [r8+152]
  48458. mov r11, QWORD PTR [rdx+152]
  48459. pext r12, r12, r9
  48460. mov QWORD PTR [rcx+144], r10
  48461. sbb r11, r12
  48462. mov r10, QWORD PTR [r8+160]
  48463. mov r12, QWORD PTR [rdx+160]
  48464. pext r10, r10, r9
  48465. mov QWORD PTR [rcx+152], r11
  48466. sbb r12, r10
  48467. mov r11, QWORD PTR [r8+168]
  48468. mov r10, QWORD PTR [rdx+168]
  48469. pext r11, r11, r9
  48470. mov QWORD PTR [rcx+160], r12
  48471. sbb r10, r11
  48472. mov r12, QWORD PTR [r8+176]
  48473. mov r11, QWORD PTR [rdx+176]
  48474. pext r12, r12, r9
  48475. mov QWORD PTR [rcx+168], r10
  48476. sbb r11, r12
  48477. mov r10, QWORD PTR [r8+184]
  48478. mov r12, QWORD PTR [rdx+184]
  48479. pext r10, r10, r9
  48480. mov QWORD PTR [rcx+176], r11
  48481. sbb r12, r10
  48482. mov r11, QWORD PTR [r8+192]
  48483. mov r10, QWORD PTR [rdx+192]
  48484. pext r11, r11, r9
  48485. mov QWORD PTR [rcx+184], r12
  48486. sbb r10, r11
  48487. mov r12, QWORD PTR [r8+200]
  48488. mov r11, QWORD PTR [rdx+200]
  48489. pext r12, r12, r9
  48490. mov QWORD PTR [rcx+192], r10
  48491. sbb r11, r12
  48492. mov r10, QWORD PTR [r8+208]
  48493. mov r12, QWORD PTR [rdx+208]
  48494. pext r10, r10, r9
  48495. mov QWORD PTR [rcx+200], r11
  48496. sbb r12, r10
  48497. mov r11, QWORD PTR [r8+216]
  48498. mov r10, QWORD PTR [rdx+216]
  48499. pext r11, r11, r9
  48500. mov QWORD PTR [rcx+208], r12
  48501. sbb r10, r11
  48502. mov r12, QWORD PTR [r8+224]
  48503. mov r11, QWORD PTR [rdx+224]
  48504. pext r12, r12, r9
  48505. mov QWORD PTR [rcx+216], r10
  48506. sbb r11, r12
  48507. mov r10, QWORD PTR [r8+232]
  48508. mov r12, QWORD PTR [rdx+232]
  48509. pext r10, r10, r9
  48510. mov QWORD PTR [rcx+224], r11
  48511. sbb r12, r10
  48512. mov r11, QWORD PTR [r8+240]
  48513. mov r10, QWORD PTR [rdx+240]
  48514. pext r11, r11, r9
  48515. mov QWORD PTR [rcx+232], r12
  48516. sbb r10, r11
  48517. mov r12, QWORD PTR [r8+248]
  48518. mov r11, QWORD PTR [rdx+248]
  48519. pext r12, r12, r9
  48520. mov QWORD PTR [rcx+240], r10
  48521. sbb r11, r12
  48522. mov r10, QWORD PTR [r8+256]
  48523. mov r12, QWORD PTR [rdx+256]
  48524. pext r10, r10, r9
  48525. mov QWORD PTR [rcx+248], r11
  48526. sbb r12, r10
  48527. mov r11, QWORD PTR [r8+264]
  48528. mov r10, QWORD PTR [rdx+264]
  48529. pext r11, r11, r9
  48530. mov QWORD PTR [rcx+256], r12
  48531. sbb r10, r11
  48532. mov r12, QWORD PTR [r8+272]
  48533. mov r11, QWORD PTR [rdx+272]
  48534. pext r12, r12, r9
  48535. mov QWORD PTR [rcx+264], r10
  48536. sbb r11, r12
  48537. mov r10, QWORD PTR [r8+280]
  48538. mov r12, QWORD PTR [rdx+280]
  48539. pext r10, r10, r9
  48540. mov QWORD PTR [rcx+272], r11
  48541. sbb r12, r10
  48542. mov r11, QWORD PTR [r8+288]
  48543. mov r10, QWORD PTR [rdx+288]
  48544. pext r11, r11, r9
  48545. mov QWORD PTR [rcx+280], r12
  48546. sbb r10, r11
  48547. mov r12, QWORD PTR [r8+296]
  48548. mov r11, QWORD PTR [rdx+296]
  48549. pext r12, r12, r9
  48550. mov QWORD PTR [rcx+288], r10
  48551. sbb r11, r12
  48552. mov r10, QWORD PTR [r8+304]
  48553. mov r12, QWORD PTR [rdx+304]
  48554. pext r10, r10, r9
  48555. mov QWORD PTR [rcx+296], r11
  48556. sbb r12, r10
  48557. mov r11, QWORD PTR [r8+312]
  48558. mov r10, QWORD PTR [rdx+312]
  48559. pext r11, r11, r9
  48560. mov QWORD PTR [rcx+304], r12
  48561. sbb r10, r11
  48562. mov r12, QWORD PTR [r8+320]
  48563. mov r11, QWORD PTR [rdx+320]
  48564. pext r12, r12, r9
  48565. mov QWORD PTR [rcx+312], r10
  48566. sbb r11, r12
  48567. mov r10, QWORD PTR [r8+328]
  48568. mov r12, QWORD PTR [rdx+328]
  48569. pext r10, r10, r9
  48570. mov QWORD PTR [rcx+320], r11
  48571. sbb r12, r10
  48572. mov r11, QWORD PTR [r8+336]
  48573. mov r10, QWORD PTR [rdx+336]
  48574. pext r11, r11, r9
  48575. mov QWORD PTR [rcx+328], r12
  48576. sbb r10, r11
  48577. mov r12, QWORD PTR [r8+344]
  48578. mov r11, QWORD PTR [rdx+344]
  48579. pext r12, r12, r9
  48580. mov QWORD PTR [rcx+336], r10
  48581. sbb r11, r12
  48582. mov r10, QWORD PTR [r8+352]
  48583. mov r12, QWORD PTR [rdx+352]
  48584. pext r10, r10, r9
  48585. mov QWORD PTR [rcx+344], r11
  48586. sbb r12, r10
  48587. mov r11, QWORD PTR [r8+360]
  48588. mov r10, QWORD PTR [rdx+360]
  48589. pext r11, r11, r9
  48590. mov QWORD PTR [rcx+352], r12
  48591. sbb r10, r11
  48592. mov r12, QWORD PTR [r8+368]
  48593. mov r11, QWORD PTR [rdx+368]
  48594. pext r12, r12, r9
  48595. mov QWORD PTR [rcx+360], r10
  48596. sbb r11, r12
  48597. mov r10, QWORD PTR [r8+376]
  48598. mov r12, QWORD PTR [rdx+376]
  48599. pext r10, r10, r9
  48600. mov QWORD PTR [rcx+368], r11
  48601. sbb r12, r10
  48602. mov r11, QWORD PTR [r8+384]
  48603. mov r10, QWORD PTR [rdx+384]
  48604. pext r11, r11, r9
  48605. mov QWORD PTR [rcx+376], r12
  48606. sbb r10, r11
  48607. mov r12, QWORD PTR [r8+392]
  48608. mov r11, QWORD PTR [rdx+392]
  48609. pext r12, r12, r9
  48610. mov QWORD PTR [rcx+384], r10
  48611. sbb r11, r12
  48612. mov r10, QWORD PTR [r8+400]
  48613. mov r12, QWORD PTR [rdx+400]
  48614. pext r10, r10, r9
  48615. mov QWORD PTR [rcx+392], r11
  48616. sbb r12, r10
  48617. mov r11, QWORD PTR [r8+408]
  48618. mov r10, QWORD PTR [rdx+408]
  48619. pext r11, r11, r9
  48620. mov QWORD PTR [rcx+400], r12
  48621. sbb r10, r11
  48622. mov r12, QWORD PTR [r8+416]
  48623. mov r11, QWORD PTR [rdx+416]
  48624. pext r12, r12, r9
  48625. mov QWORD PTR [rcx+408], r10
  48626. sbb r11, r12
  48627. mov r10, QWORD PTR [r8+424]
  48628. mov r12, QWORD PTR [rdx+424]
  48629. pext r10, r10, r9
  48630. mov QWORD PTR [rcx+416], r11
  48631. sbb r12, r10
  48632. mov r11, QWORD PTR [r8+432]
  48633. mov r10, QWORD PTR [rdx+432]
  48634. pext r11, r11, r9
  48635. mov QWORD PTR [rcx+424], r12
  48636. sbb r10, r11
  48637. mov r12, QWORD PTR [r8+440]
  48638. mov r11, QWORD PTR [rdx+440]
  48639. pext r12, r12, r9
  48640. mov QWORD PTR [rcx+432], r10
  48641. sbb r11, r12
  48642. mov r10, QWORD PTR [r8+448]
  48643. mov r12, QWORD PTR [rdx+448]
  48644. pext r10, r10, r9
  48645. mov QWORD PTR [rcx+440], r11
  48646. sbb r12, r10
  48647. mov r11, QWORD PTR [r8+456]
  48648. mov r10, QWORD PTR [rdx+456]
  48649. pext r11, r11, r9
  48650. mov QWORD PTR [rcx+448], r12
  48651. sbb r10, r11
  48652. mov r12, QWORD PTR [r8+464]
  48653. mov r11, QWORD PTR [rdx+464]
  48654. pext r12, r12, r9
  48655. mov QWORD PTR [rcx+456], r10
  48656. sbb r11, r12
  48657. mov r10, QWORD PTR [r8+472]
  48658. mov r12, QWORD PTR [rdx+472]
  48659. pext r10, r10, r9
  48660. mov QWORD PTR [rcx+464], r11
  48661. sbb r12, r10
  48662. mov r11, QWORD PTR [r8+480]
  48663. mov r10, QWORD PTR [rdx+480]
  48664. pext r11, r11, r9
  48665. mov QWORD PTR [rcx+472], r12
  48666. sbb r10, r11
  48667. mov r12, QWORD PTR [r8+488]
  48668. mov r11, QWORD PTR [rdx+488]
  48669. pext r12, r12, r9
  48670. mov QWORD PTR [rcx+480], r10
  48671. sbb r11, r12
  48672. mov r10, QWORD PTR [r8+496]
  48673. mov r12, QWORD PTR [rdx+496]
  48674. pext r10, r10, r9
  48675. mov QWORD PTR [rcx+488], r11
  48676. sbb r12, r10
  48677. mov r11, QWORD PTR [r8+504]
  48678. mov r10, QWORD PTR [rdx+504]
  48679. pext r11, r11, r9
  48680. mov QWORD PTR [rcx+496], r12
  48681. sbb r10, r11
  48682. mov QWORD PTR [rcx+504], r10
  48683. sbb rax, rax
  48684. pop r12
  48685. ret
  48686. sp_4096_cond_sub_avx2_64 ENDP
  48687. _text ENDS
  48688. ENDIF
  48689. ; /* Compare a with b in constant time.
  48690. ; *
  48691. ; * a A single precision integer.
  48692. ; * b A single precision integer.
  48693. ; * return -ve, 0 or +ve if a is less than, equal to or greater than b
  48694. ; * respectively.
  48695. ; */
  48696. _text SEGMENT READONLY PARA
  48697. sp_4096_cmp_64 PROC
  48698. push r12
  48699. xor r9, r9
  48700. mov r8, -1
  48701. mov rax, -1
  48702. mov r10, 1
  48703. mov r11, QWORD PTR [rcx+504]
  48704. mov r12, QWORD PTR [rdx+504]
  48705. and r11, r8
  48706. and r12, r8
  48707. sub r11, r12
  48708. cmova rax, r10
  48709. cmovc rax, r8
  48710. cmovnz r8, r9
  48711. mov r11, QWORD PTR [rcx+496]
  48712. mov r12, QWORD PTR [rdx+496]
  48713. and r11, r8
  48714. and r12, r8
  48715. sub r11, r12
  48716. cmova rax, r10
  48717. cmovc rax, r8
  48718. cmovnz r8, r9
  48719. mov r11, QWORD PTR [rcx+488]
  48720. mov r12, QWORD PTR [rdx+488]
  48721. and r11, r8
  48722. and r12, r8
  48723. sub r11, r12
  48724. cmova rax, r10
  48725. cmovc rax, r8
  48726. cmovnz r8, r9
  48727. mov r11, QWORD PTR [rcx+480]
  48728. mov r12, QWORD PTR [rdx+480]
  48729. and r11, r8
  48730. and r12, r8
  48731. sub r11, r12
  48732. cmova rax, r10
  48733. cmovc rax, r8
  48734. cmovnz r8, r9
  48735. mov r11, QWORD PTR [rcx+472]
  48736. mov r12, QWORD PTR [rdx+472]
  48737. and r11, r8
  48738. and r12, r8
  48739. sub r11, r12
  48740. cmova rax, r10
  48741. cmovc rax, r8
  48742. cmovnz r8, r9
  48743. mov r11, QWORD PTR [rcx+464]
  48744. mov r12, QWORD PTR [rdx+464]
  48745. and r11, r8
  48746. and r12, r8
  48747. sub r11, r12
  48748. cmova rax, r10
  48749. cmovc rax, r8
  48750. cmovnz r8, r9
  48751. mov r11, QWORD PTR [rcx+456]
  48752. mov r12, QWORD PTR [rdx+456]
  48753. and r11, r8
  48754. and r12, r8
  48755. sub r11, r12
  48756. cmova rax, r10
  48757. cmovc rax, r8
  48758. cmovnz r8, r9
  48759. mov r11, QWORD PTR [rcx+448]
  48760. mov r12, QWORD PTR [rdx+448]
  48761. and r11, r8
  48762. and r12, r8
  48763. sub r11, r12
  48764. cmova rax, r10
  48765. cmovc rax, r8
  48766. cmovnz r8, r9
  48767. mov r11, QWORD PTR [rcx+440]
  48768. mov r12, QWORD PTR [rdx+440]
  48769. and r11, r8
  48770. and r12, r8
  48771. sub r11, r12
  48772. cmova rax, r10
  48773. cmovc rax, r8
  48774. cmovnz r8, r9
  48775. mov r11, QWORD PTR [rcx+432]
  48776. mov r12, QWORD PTR [rdx+432]
  48777. and r11, r8
  48778. and r12, r8
  48779. sub r11, r12
  48780. cmova rax, r10
  48781. cmovc rax, r8
  48782. cmovnz r8, r9
  48783. mov r11, QWORD PTR [rcx+424]
  48784. mov r12, QWORD PTR [rdx+424]
  48785. and r11, r8
  48786. and r12, r8
  48787. sub r11, r12
  48788. cmova rax, r10
  48789. cmovc rax, r8
  48790. cmovnz r8, r9
  48791. mov r11, QWORD PTR [rcx+416]
  48792. mov r12, QWORD PTR [rdx+416]
  48793. and r11, r8
  48794. and r12, r8
  48795. sub r11, r12
  48796. cmova rax, r10
  48797. cmovc rax, r8
  48798. cmovnz r8, r9
  48799. mov r11, QWORD PTR [rcx+408]
  48800. mov r12, QWORD PTR [rdx+408]
  48801. and r11, r8
  48802. and r12, r8
  48803. sub r11, r12
  48804. cmova rax, r10
  48805. cmovc rax, r8
  48806. cmovnz r8, r9
  48807. mov r11, QWORD PTR [rcx+400]
  48808. mov r12, QWORD PTR [rdx+400]
  48809. and r11, r8
  48810. and r12, r8
  48811. sub r11, r12
  48812. cmova rax, r10
  48813. cmovc rax, r8
  48814. cmovnz r8, r9
  48815. mov r11, QWORD PTR [rcx+392]
  48816. mov r12, QWORD PTR [rdx+392]
  48817. and r11, r8
  48818. and r12, r8
  48819. sub r11, r12
  48820. cmova rax, r10
  48821. cmovc rax, r8
  48822. cmovnz r8, r9
  48823. mov r11, QWORD PTR [rcx+384]
  48824. mov r12, QWORD PTR [rdx+384]
  48825. and r11, r8
  48826. and r12, r8
  48827. sub r11, r12
  48828. cmova rax, r10
  48829. cmovc rax, r8
  48830. cmovnz r8, r9
  48831. mov r11, QWORD PTR [rcx+376]
  48832. mov r12, QWORD PTR [rdx+376]
  48833. and r11, r8
  48834. and r12, r8
  48835. sub r11, r12
  48836. cmova rax, r10
  48837. cmovc rax, r8
  48838. cmovnz r8, r9
  48839. mov r11, QWORD PTR [rcx+368]
  48840. mov r12, QWORD PTR [rdx+368]
  48841. and r11, r8
  48842. and r12, r8
  48843. sub r11, r12
  48844. cmova rax, r10
  48845. cmovc rax, r8
  48846. cmovnz r8, r9
  48847. mov r11, QWORD PTR [rcx+360]
  48848. mov r12, QWORD PTR [rdx+360]
  48849. and r11, r8
  48850. and r12, r8
  48851. sub r11, r12
  48852. cmova rax, r10
  48853. cmovc rax, r8
  48854. cmovnz r8, r9
  48855. mov r11, QWORD PTR [rcx+352]
  48856. mov r12, QWORD PTR [rdx+352]
  48857. and r11, r8
  48858. and r12, r8
  48859. sub r11, r12
  48860. cmova rax, r10
  48861. cmovc rax, r8
  48862. cmovnz r8, r9
  48863. mov r11, QWORD PTR [rcx+344]
  48864. mov r12, QWORD PTR [rdx+344]
  48865. and r11, r8
  48866. and r12, r8
  48867. sub r11, r12
  48868. cmova rax, r10
  48869. cmovc rax, r8
  48870. cmovnz r8, r9
  48871. mov r11, QWORD PTR [rcx+336]
  48872. mov r12, QWORD PTR [rdx+336]
  48873. and r11, r8
  48874. and r12, r8
  48875. sub r11, r12
  48876. cmova rax, r10
  48877. cmovc rax, r8
  48878. cmovnz r8, r9
  48879. mov r11, QWORD PTR [rcx+328]
  48880. mov r12, QWORD PTR [rdx+328]
  48881. and r11, r8
  48882. and r12, r8
  48883. sub r11, r12
  48884. cmova rax, r10
  48885. cmovc rax, r8
  48886. cmovnz r8, r9
  48887. mov r11, QWORD PTR [rcx+320]
  48888. mov r12, QWORD PTR [rdx+320]
  48889. and r11, r8
  48890. and r12, r8
  48891. sub r11, r12
  48892. cmova rax, r10
  48893. cmovc rax, r8
  48894. cmovnz r8, r9
  48895. mov r11, QWORD PTR [rcx+312]
  48896. mov r12, QWORD PTR [rdx+312]
  48897. and r11, r8
  48898. and r12, r8
  48899. sub r11, r12
  48900. cmova rax, r10
  48901. cmovc rax, r8
  48902. cmovnz r8, r9
  48903. mov r11, QWORD PTR [rcx+304]
  48904. mov r12, QWORD PTR [rdx+304]
  48905. and r11, r8
  48906. and r12, r8
  48907. sub r11, r12
  48908. cmova rax, r10
  48909. cmovc rax, r8
  48910. cmovnz r8, r9
  48911. mov r11, QWORD PTR [rcx+296]
  48912. mov r12, QWORD PTR [rdx+296]
  48913. and r11, r8
  48914. and r12, r8
  48915. sub r11, r12
  48916. cmova rax, r10
  48917. cmovc rax, r8
  48918. cmovnz r8, r9
  48919. mov r11, QWORD PTR [rcx+288]
  48920. mov r12, QWORD PTR [rdx+288]
  48921. and r11, r8
  48922. and r12, r8
  48923. sub r11, r12
  48924. cmova rax, r10
  48925. cmovc rax, r8
  48926. cmovnz r8, r9
  48927. mov r11, QWORD PTR [rcx+280]
  48928. mov r12, QWORD PTR [rdx+280]
  48929. and r11, r8
  48930. and r12, r8
  48931. sub r11, r12
  48932. cmova rax, r10
  48933. cmovc rax, r8
  48934. cmovnz r8, r9
  48935. mov r11, QWORD PTR [rcx+272]
  48936. mov r12, QWORD PTR [rdx+272]
  48937. and r11, r8
  48938. and r12, r8
  48939. sub r11, r12
  48940. cmova rax, r10
  48941. cmovc rax, r8
  48942. cmovnz r8, r9
  48943. mov r11, QWORD PTR [rcx+264]
  48944. mov r12, QWORD PTR [rdx+264]
  48945. and r11, r8
  48946. and r12, r8
  48947. sub r11, r12
  48948. cmova rax, r10
  48949. cmovc rax, r8
  48950. cmovnz r8, r9
  48951. mov r11, QWORD PTR [rcx+256]
  48952. mov r12, QWORD PTR [rdx+256]
  48953. and r11, r8
  48954. and r12, r8
  48955. sub r11, r12
  48956. cmova rax, r10
  48957. cmovc rax, r8
  48958. cmovnz r8, r9
  48959. mov r11, QWORD PTR [rcx+248]
  48960. mov r12, QWORD PTR [rdx+248]
  48961. and r11, r8
  48962. and r12, r8
  48963. sub r11, r12
  48964. cmova rax, r10
  48965. cmovc rax, r8
  48966. cmovnz r8, r9
  48967. mov r11, QWORD PTR [rcx+240]
  48968. mov r12, QWORD PTR [rdx+240]
  48969. and r11, r8
  48970. and r12, r8
  48971. sub r11, r12
  48972. cmova rax, r10
  48973. cmovc rax, r8
  48974. cmovnz r8, r9
  48975. mov r11, QWORD PTR [rcx+232]
  48976. mov r12, QWORD PTR [rdx+232]
  48977. and r11, r8
  48978. and r12, r8
  48979. sub r11, r12
  48980. cmova rax, r10
  48981. cmovc rax, r8
  48982. cmovnz r8, r9
  48983. mov r11, QWORD PTR [rcx+224]
  48984. mov r12, QWORD PTR [rdx+224]
  48985. and r11, r8
  48986. and r12, r8
  48987. sub r11, r12
  48988. cmova rax, r10
  48989. cmovc rax, r8
  48990. cmovnz r8, r9
  48991. mov r11, QWORD PTR [rcx+216]
  48992. mov r12, QWORD PTR [rdx+216]
  48993. and r11, r8
  48994. and r12, r8
  48995. sub r11, r12
  48996. cmova rax, r10
  48997. cmovc rax, r8
  48998. cmovnz r8, r9
  48999. mov r11, QWORD PTR [rcx+208]
  49000. mov r12, QWORD PTR [rdx+208]
  49001. and r11, r8
  49002. and r12, r8
  49003. sub r11, r12
  49004. cmova rax, r10
  49005. cmovc rax, r8
  49006. cmovnz r8, r9
  49007. mov r11, QWORD PTR [rcx+200]
  49008. mov r12, QWORD PTR [rdx+200]
  49009. and r11, r8
  49010. and r12, r8
  49011. sub r11, r12
  49012. cmova rax, r10
  49013. cmovc rax, r8
  49014. cmovnz r8, r9
  49015. mov r11, QWORD PTR [rcx+192]
  49016. mov r12, QWORD PTR [rdx+192]
  49017. and r11, r8
  49018. and r12, r8
  49019. sub r11, r12
  49020. cmova rax, r10
  49021. cmovc rax, r8
  49022. cmovnz r8, r9
  49023. mov r11, QWORD PTR [rcx+184]
  49024. mov r12, QWORD PTR [rdx+184]
  49025. and r11, r8
  49026. and r12, r8
  49027. sub r11, r12
  49028. cmova rax, r10
  49029. cmovc rax, r8
  49030. cmovnz r8, r9
  49031. mov r11, QWORD PTR [rcx+176]
  49032. mov r12, QWORD PTR [rdx+176]
  49033. and r11, r8
  49034. and r12, r8
  49035. sub r11, r12
  49036. cmova rax, r10
  49037. cmovc rax, r8
  49038. cmovnz r8, r9
  49039. mov r11, QWORD PTR [rcx+168]
  49040. mov r12, QWORD PTR [rdx+168]
  49041. and r11, r8
  49042. and r12, r8
  49043. sub r11, r12
  49044. cmova rax, r10
  49045. cmovc rax, r8
  49046. cmovnz r8, r9
  49047. mov r11, QWORD PTR [rcx+160]
  49048. mov r12, QWORD PTR [rdx+160]
  49049. and r11, r8
  49050. and r12, r8
  49051. sub r11, r12
  49052. cmova rax, r10
  49053. cmovc rax, r8
  49054. cmovnz r8, r9
  49055. mov r11, QWORD PTR [rcx+152]
  49056. mov r12, QWORD PTR [rdx+152]
  49057. and r11, r8
  49058. and r12, r8
  49059. sub r11, r12
  49060. cmova rax, r10
  49061. cmovc rax, r8
  49062. cmovnz r8, r9
  49063. mov r11, QWORD PTR [rcx+144]
  49064. mov r12, QWORD PTR [rdx+144]
  49065. and r11, r8
  49066. and r12, r8
  49067. sub r11, r12
  49068. cmova rax, r10
  49069. cmovc rax, r8
  49070. cmovnz r8, r9
  49071. mov r11, QWORD PTR [rcx+136]
  49072. mov r12, QWORD PTR [rdx+136]
  49073. and r11, r8
  49074. and r12, r8
  49075. sub r11, r12
  49076. cmova rax, r10
  49077. cmovc rax, r8
  49078. cmovnz r8, r9
  49079. mov r11, QWORD PTR [rcx+128]
  49080. mov r12, QWORD PTR [rdx+128]
  49081. and r11, r8
  49082. and r12, r8
  49083. sub r11, r12
  49084. cmova rax, r10
  49085. cmovc rax, r8
  49086. cmovnz r8, r9
  49087. mov r11, QWORD PTR [rcx+120]
  49088. mov r12, QWORD PTR [rdx+120]
  49089. and r11, r8
  49090. and r12, r8
  49091. sub r11, r12
  49092. cmova rax, r10
  49093. cmovc rax, r8
  49094. cmovnz r8, r9
  49095. mov r11, QWORD PTR [rcx+112]
  49096. mov r12, QWORD PTR [rdx+112]
  49097. and r11, r8
  49098. and r12, r8
  49099. sub r11, r12
  49100. cmova rax, r10
  49101. cmovc rax, r8
  49102. cmovnz r8, r9
  49103. mov r11, QWORD PTR [rcx+104]
  49104. mov r12, QWORD PTR [rdx+104]
  49105. and r11, r8
  49106. and r12, r8
  49107. sub r11, r12
  49108. cmova rax, r10
  49109. cmovc rax, r8
  49110. cmovnz r8, r9
  49111. mov r11, QWORD PTR [rcx+96]
  49112. mov r12, QWORD PTR [rdx+96]
  49113. and r11, r8
  49114. and r12, r8
  49115. sub r11, r12
  49116. cmova rax, r10
  49117. cmovc rax, r8
  49118. cmovnz r8, r9
  49119. mov r11, QWORD PTR [rcx+88]
  49120. mov r12, QWORD PTR [rdx+88]
  49121. and r11, r8
  49122. and r12, r8
  49123. sub r11, r12
  49124. cmova rax, r10
  49125. cmovc rax, r8
  49126. cmovnz r8, r9
  49127. mov r11, QWORD PTR [rcx+80]
  49128. mov r12, QWORD PTR [rdx+80]
  49129. and r11, r8
  49130. and r12, r8
  49131. sub r11, r12
  49132. cmova rax, r10
  49133. cmovc rax, r8
  49134. cmovnz r8, r9
  49135. mov r11, QWORD PTR [rcx+72]
  49136. mov r12, QWORD PTR [rdx+72]
  49137. and r11, r8
  49138. and r12, r8
  49139. sub r11, r12
  49140. cmova rax, r10
  49141. cmovc rax, r8
  49142. cmovnz r8, r9
  49143. mov r11, QWORD PTR [rcx+64]
  49144. mov r12, QWORD PTR [rdx+64]
  49145. and r11, r8
  49146. and r12, r8
  49147. sub r11, r12
  49148. cmova rax, r10
  49149. cmovc rax, r8
  49150. cmovnz r8, r9
  49151. mov r11, QWORD PTR [rcx+56]
  49152. mov r12, QWORD PTR [rdx+56]
  49153. and r11, r8
  49154. and r12, r8
  49155. sub r11, r12
  49156. cmova rax, r10
  49157. cmovc rax, r8
  49158. cmovnz r8, r9
  49159. mov r11, QWORD PTR [rcx+48]
  49160. mov r12, QWORD PTR [rdx+48]
  49161. and r11, r8
  49162. and r12, r8
  49163. sub r11, r12
  49164. cmova rax, r10
  49165. cmovc rax, r8
  49166. cmovnz r8, r9
  49167. mov r11, QWORD PTR [rcx+40]
  49168. mov r12, QWORD PTR [rdx+40]
  49169. and r11, r8
  49170. and r12, r8
  49171. sub r11, r12
  49172. cmova rax, r10
  49173. cmovc rax, r8
  49174. cmovnz r8, r9
  49175. mov r11, QWORD PTR [rcx+32]
  49176. mov r12, QWORD PTR [rdx+32]
  49177. and r11, r8
  49178. and r12, r8
  49179. sub r11, r12
  49180. cmova rax, r10
  49181. cmovc rax, r8
  49182. cmovnz r8, r9
  49183. mov r11, QWORD PTR [rcx+24]
  49184. mov r12, QWORD PTR [rdx+24]
  49185. and r11, r8
  49186. and r12, r8
  49187. sub r11, r12
  49188. cmova rax, r10
  49189. cmovc rax, r8
  49190. cmovnz r8, r9
  49191. mov r11, QWORD PTR [rcx+16]
  49192. mov r12, QWORD PTR [rdx+16]
  49193. and r11, r8
  49194. and r12, r8
  49195. sub r11, r12
  49196. cmova rax, r10
  49197. cmovc rax, r8
  49198. cmovnz r8, r9
  49199. mov r11, QWORD PTR [rcx+8]
  49200. mov r12, QWORD PTR [rdx+8]
  49201. and r11, r8
  49202. and r12, r8
  49203. sub r11, r12
  49204. cmova rax, r10
  49205. cmovc rax, r8
  49206. cmovnz r8, r9
  49207. mov r11, QWORD PTR [rcx]
  49208. mov r12, QWORD PTR [rdx]
  49209. and r11, r8
  49210. and r12, r8
  49211. sub r11, r12
  49212. cmova rax, r10
  49213. cmovc rax, r8
  49214. cmovnz r8, r9
  49215. xor rax, r8
  49216. pop r12
  49217. ret
  49218. sp_4096_cmp_64 ENDP
  49219. _text ENDS
  49220. IFNDEF WC_NO_CACHE_RESISTANT
  49221. _text SEGMENT READONLY PARA
  49222. sp_4096_get_from_table_64 PROC
  49223. sub rsp, 128
  49224. vmovdqu OWORD PTR [rsp], xmm6
  49225. vmovdqu OWORD PTR [rsp+16], xmm7
  49226. vmovdqu OWORD PTR [rsp+32], xmm8
  49227. vmovdqu OWORD PTR [rsp+48], xmm9
  49228. vmovdqu OWORD PTR [rsp+64], xmm10
  49229. vmovdqu OWORD PTR [rsp+80], xmm11
  49230. vmovdqu OWORD PTR [rsp+96], xmm12
  49231. vmovdqu OWORD PTR [rsp+112], xmm13
  49232. mov rax, 1
  49233. movd xmm10, r8
  49234. movd xmm11, rax
  49235. pxor xmm13, xmm13
  49236. pshufd xmm11, xmm11, 0
  49237. pshufd xmm10, xmm10, 0
  49238. ; START: 0-7
  49239. pxor xmm13, xmm13
  49240. pxor xmm4, xmm4
  49241. pxor xmm5, xmm5
  49242. pxor xmm6, xmm6
  49243. pxor xmm7, xmm7
  49244. ; ENTRY: 0
  49245. mov r9, QWORD PTR [rdx]
  49246. movdqu xmm12, xmm13
  49247. pcmpeqd xmm12, xmm10
  49248. movdqu xmm0, [r9]
  49249. movdqu xmm1, [r9+16]
  49250. movdqu xmm2, [r9+32]
  49251. movdqu xmm3, [r9+48]
  49252. pand xmm0, xmm12
  49253. pand xmm1, xmm12
  49254. pand xmm2, xmm12
  49255. pand xmm3, xmm12
  49256. por xmm4, xmm0
  49257. por xmm5, xmm1
  49258. por xmm6, xmm2
  49259. por xmm7, xmm3
  49260. paddd xmm13, xmm11
  49261. ; ENTRY: 1
  49262. mov r9, QWORD PTR [rdx+8]
  49263. movdqu xmm12, xmm13
  49264. pcmpeqd xmm12, xmm10
  49265. movdqu xmm0, [r9]
  49266. movdqu xmm1, [r9+16]
  49267. movdqu xmm2, [r9+32]
  49268. movdqu xmm3, [r9+48]
  49269. pand xmm0, xmm12
  49270. pand xmm1, xmm12
  49271. pand xmm2, xmm12
  49272. pand xmm3, xmm12
  49273. por xmm4, xmm0
  49274. por xmm5, xmm1
  49275. por xmm6, xmm2
  49276. por xmm7, xmm3
  49277. paddd xmm13, xmm11
  49278. ; ENTRY: 2
  49279. mov r9, QWORD PTR [rdx+16]
  49280. movdqu xmm12, xmm13
  49281. pcmpeqd xmm12, xmm10
  49282. movdqu xmm0, [r9]
  49283. movdqu xmm1, [r9+16]
  49284. movdqu xmm2, [r9+32]
  49285. movdqu xmm3, [r9+48]
  49286. pand xmm0, xmm12
  49287. pand xmm1, xmm12
  49288. pand xmm2, xmm12
  49289. pand xmm3, xmm12
  49290. por xmm4, xmm0
  49291. por xmm5, xmm1
  49292. por xmm6, xmm2
  49293. por xmm7, xmm3
  49294. paddd xmm13, xmm11
  49295. ; ENTRY: 3
  49296. mov r9, QWORD PTR [rdx+24]
  49297. movdqu xmm12, xmm13
  49298. pcmpeqd xmm12, xmm10
  49299. movdqu xmm0, [r9]
  49300. movdqu xmm1, [r9+16]
  49301. movdqu xmm2, [r9+32]
  49302. movdqu xmm3, [r9+48]
  49303. pand xmm0, xmm12
  49304. pand xmm1, xmm12
  49305. pand xmm2, xmm12
  49306. pand xmm3, xmm12
  49307. por xmm4, xmm0
  49308. por xmm5, xmm1
  49309. por xmm6, xmm2
  49310. por xmm7, xmm3
  49311. paddd xmm13, xmm11
  49312. ; ENTRY: 4
  49313. mov r9, QWORD PTR [rdx+32]
  49314. movdqu xmm12, xmm13
  49315. pcmpeqd xmm12, xmm10
  49316. movdqu xmm0, [r9]
  49317. movdqu xmm1, [r9+16]
  49318. movdqu xmm2, [r9+32]
  49319. movdqu xmm3, [r9+48]
  49320. pand xmm0, xmm12
  49321. pand xmm1, xmm12
  49322. pand xmm2, xmm12
  49323. pand xmm3, xmm12
  49324. por xmm4, xmm0
  49325. por xmm5, xmm1
  49326. por xmm6, xmm2
  49327. por xmm7, xmm3
  49328. paddd xmm13, xmm11
  49329. ; ENTRY: 5
  49330. mov r9, QWORD PTR [rdx+40]
  49331. movdqu xmm12, xmm13
  49332. pcmpeqd xmm12, xmm10
  49333. movdqu xmm0, [r9]
  49334. movdqu xmm1, [r9+16]
  49335. movdqu xmm2, [r9+32]
  49336. movdqu xmm3, [r9+48]
  49337. pand xmm0, xmm12
  49338. pand xmm1, xmm12
  49339. pand xmm2, xmm12
  49340. pand xmm3, xmm12
  49341. por xmm4, xmm0
  49342. por xmm5, xmm1
  49343. por xmm6, xmm2
  49344. por xmm7, xmm3
  49345. paddd xmm13, xmm11
  49346. ; ENTRY: 6
  49347. mov r9, QWORD PTR [rdx+48]
  49348. movdqu xmm12, xmm13
  49349. pcmpeqd xmm12, xmm10
  49350. movdqu xmm0, [r9]
  49351. movdqu xmm1, [r9+16]
  49352. movdqu xmm2, [r9+32]
  49353. movdqu xmm3, [r9+48]
  49354. pand xmm0, xmm12
  49355. pand xmm1, xmm12
  49356. pand xmm2, xmm12
  49357. pand xmm3, xmm12
  49358. por xmm4, xmm0
  49359. por xmm5, xmm1
  49360. por xmm6, xmm2
  49361. por xmm7, xmm3
  49362. paddd xmm13, xmm11
  49363. ; ENTRY: 7
  49364. mov r9, QWORD PTR [rdx+56]
  49365. movdqu xmm12, xmm13
  49366. pcmpeqd xmm12, xmm10
  49367. movdqu xmm0, [r9]
  49368. movdqu xmm1, [r9+16]
  49369. movdqu xmm2, [r9+32]
  49370. movdqu xmm3, [r9+48]
  49371. pand xmm0, xmm12
  49372. pand xmm1, xmm12
  49373. pand xmm2, xmm12
  49374. pand xmm3, xmm12
  49375. por xmm4, xmm0
  49376. por xmm5, xmm1
  49377. por xmm6, xmm2
  49378. por xmm7, xmm3
  49379. paddd xmm13, xmm11
  49380. ; ENTRY: 8
  49381. mov r9, QWORD PTR [rdx+64]
  49382. movdqu xmm12, xmm13
  49383. pcmpeqd xmm12, xmm10
  49384. movdqu xmm0, [r9]
  49385. movdqu xmm1, [r9+16]
  49386. movdqu xmm2, [r9+32]
  49387. movdqu xmm3, [r9+48]
  49388. pand xmm0, xmm12
  49389. pand xmm1, xmm12
  49390. pand xmm2, xmm12
  49391. pand xmm3, xmm12
  49392. por xmm4, xmm0
  49393. por xmm5, xmm1
  49394. por xmm6, xmm2
  49395. por xmm7, xmm3
  49396. paddd xmm13, xmm11
  49397. ; ENTRY: 9
  49398. mov r9, QWORD PTR [rdx+72]
  49399. movdqu xmm12, xmm13
  49400. pcmpeqd xmm12, xmm10
  49401. movdqu xmm0, [r9]
  49402. movdqu xmm1, [r9+16]
  49403. movdqu xmm2, [r9+32]
  49404. movdqu xmm3, [r9+48]
  49405. pand xmm0, xmm12
  49406. pand xmm1, xmm12
  49407. pand xmm2, xmm12
  49408. pand xmm3, xmm12
  49409. por xmm4, xmm0
  49410. por xmm5, xmm1
  49411. por xmm6, xmm2
  49412. por xmm7, xmm3
  49413. paddd xmm13, xmm11
  49414. ; ENTRY: 10
  49415. mov r9, QWORD PTR [rdx+80]
  49416. movdqu xmm12, xmm13
  49417. pcmpeqd xmm12, xmm10
  49418. movdqu xmm0, [r9]
  49419. movdqu xmm1, [r9+16]
  49420. movdqu xmm2, [r9+32]
  49421. movdqu xmm3, [r9+48]
  49422. pand xmm0, xmm12
  49423. pand xmm1, xmm12
  49424. pand xmm2, xmm12
  49425. pand xmm3, xmm12
  49426. por xmm4, xmm0
  49427. por xmm5, xmm1
  49428. por xmm6, xmm2
  49429. por xmm7, xmm3
  49430. paddd xmm13, xmm11
  49431. ; ENTRY: 11
  49432. mov r9, QWORD PTR [rdx+88]
  49433. movdqu xmm12, xmm13
  49434. pcmpeqd xmm12, xmm10
  49435. movdqu xmm0, [r9]
  49436. movdqu xmm1, [r9+16]
  49437. movdqu xmm2, [r9+32]
  49438. movdqu xmm3, [r9+48]
  49439. pand xmm0, xmm12
  49440. pand xmm1, xmm12
  49441. pand xmm2, xmm12
  49442. pand xmm3, xmm12
  49443. por xmm4, xmm0
  49444. por xmm5, xmm1
  49445. por xmm6, xmm2
  49446. por xmm7, xmm3
  49447. paddd xmm13, xmm11
  49448. ; ENTRY: 12
  49449. mov r9, QWORD PTR [rdx+96]
  49450. movdqu xmm12, xmm13
  49451. pcmpeqd xmm12, xmm10
  49452. movdqu xmm0, [r9]
  49453. movdqu xmm1, [r9+16]
  49454. movdqu xmm2, [r9+32]
  49455. movdqu xmm3, [r9+48]
  49456. pand xmm0, xmm12
  49457. pand xmm1, xmm12
  49458. pand xmm2, xmm12
  49459. pand xmm3, xmm12
  49460. por xmm4, xmm0
  49461. por xmm5, xmm1
  49462. por xmm6, xmm2
  49463. por xmm7, xmm3
  49464. paddd xmm13, xmm11
  49465. ; ENTRY: 13
  49466. mov r9, QWORD PTR [rdx+104]
  49467. movdqu xmm12, xmm13
  49468. pcmpeqd xmm12, xmm10
  49469. movdqu xmm0, [r9]
  49470. movdqu xmm1, [r9+16]
  49471. movdqu xmm2, [r9+32]
  49472. movdqu xmm3, [r9+48]
  49473. pand xmm0, xmm12
  49474. pand xmm1, xmm12
  49475. pand xmm2, xmm12
  49476. pand xmm3, xmm12
  49477. por xmm4, xmm0
  49478. por xmm5, xmm1
  49479. por xmm6, xmm2
  49480. por xmm7, xmm3
  49481. paddd xmm13, xmm11
  49482. ; ENTRY: 14
  49483. mov r9, QWORD PTR [rdx+112]
  49484. movdqu xmm12, xmm13
  49485. pcmpeqd xmm12, xmm10
  49486. movdqu xmm0, [r9]
  49487. movdqu xmm1, [r9+16]
  49488. movdqu xmm2, [r9+32]
  49489. movdqu xmm3, [r9+48]
  49490. pand xmm0, xmm12
  49491. pand xmm1, xmm12
  49492. pand xmm2, xmm12
  49493. pand xmm3, xmm12
  49494. por xmm4, xmm0
  49495. por xmm5, xmm1
  49496. por xmm6, xmm2
  49497. por xmm7, xmm3
  49498. paddd xmm13, xmm11
  49499. ; ENTRY: 15
  49500. mov r9, QWORD PTR [rdx+120]
  49501. movdqu xmm12, xmm13
  49502. pcmpeqd xmm12, xmm10
  49503. movdqu xmm0, [r9]
  49504. movdqu xmm1, [r9+16]
  49505. movdqu xmm2, [r9+32]
  49506. movdqu xmm3, [r9+48]
  49507. pand xmm0, xmm12
  49508. pand xmm1, xmm12
  49509. pand xmm2, xmm12
  49510. pand xmm3, xmm12
  49511. por xmm4, xmm0
  49512. por xmm5, xmm1
  49513. por xmm6, xmm2
  49514. por xmm7, xmm3
  49515. paddd xmm13, xmm11
  49516. movdqu [rcx], xmm4
  49517. movdqu [rcx+16], xmm5
  49518. movdqu [rcx+32], xmm6
  49519. movdqu [rcx+48], xmm7
  49520. add rcx, 64
  49521. ; END: 0-7
  49522. ; START: 8-15
  49523. pxor xmm13, xmm13
  49524. pxor xmm4, xmm4
  49525. pxor xmm5, xmm5
  49526. pxor xmm6, xmm6
  49527. pxor xmm7, xmm7
  49528. ; ENTRY: 0
  49529. mov r9, QWORD PTR [rdx]
  49530. add r9, 64
  49531. movdqu xmm12, xmm13
  49532. pcmpeqd xmm12, xmm10
  49533. movdqu xmm0, [r9]
  49534. movdqu xmm1, [r9+16]
  49535. movdqu xmm2, [r9+32]
  49536. movdqu xmm3, [r9+48]
  49537. pand xmm0, xmm12
  49538. pand xmm1, xmm12
  49539. pand xmm2, xmm12
  49540. pand xmm3, xmm12
  49541. por xmm4, xmm0
  49542. por xmm5, xmm1
  49543. por xmm6, xmm2
  49544. por xmm7, xmm3
  49545. paddd xmm13, xmm11
  49546. ; ENTRY: 1
  49547. mov r9, QWORD PTR [rdx+8]
  49548. add r9, 64
  49549. movdqu xmm12, xmm13
  49550. pcmpeqd xmm12, xmm10
  49551. movdqu xmm0, [r9]
  49552. movdqu xmm1, [r9+16]
  49553. movdqu xmm2, [r9+32]
  49554. movdqu xmm3, [r9+48]
  49555. pand xmm0, xmm12
  49556. pand xmm1, xmm12
  49557. pand xmm2, xmm12
  49558. pand xmm3, xmm12
  49559. por xmm4, xmm0
  49560. por xmm5, xmm1
  49561. por xmm6, xmm2
  49562. por xmm7, xmm3
  49563. paddd xmm13, xmm11
  49564. ; ENTRY: 2
  49565. mov r9, QWORD PTR [rdx+16]
  49566. add r9, 64
  49567. movdqu xmm12, xmm13
  49568. pcmpeqd xmm12, xmm10
  49569. movdqu xmm0, [r9]
  49570. movdqu xmm1, [r9+16]
  49571. movdqu xmm2, [r9+32]
  49572. movdqu xmm3, [r9+48]
  49573. pand xmm0, xmm12
  49574. pand xmm1, xmm12
  49575. pand xmm2, xmm12
  49576. pand xmm3, xmm12
  49577. por xmm4, xmm0
  49578. por xmm5, xmm1
  49579. por xmm6, xmm2
  49580. por xmm7, xmm3
  49581. paddd xmm13, xmm11
  49582. ; ENTRY: 3
  49583. mov r9, QWORD PTR [rdx+24]
  49584. add r9, 64
  49585. movdqu xmm12, xmm13
  49586. pcmpeqd xmm12, xmm10
  49587. movdqu xmm0, [r9]
  49588. movdqu xmm1, [r9+16]
  49589. movdqu xmm2, [r9+32]
  49590. movdqu xmm3, [r9+48]
  49591. pand xmm0, xmm12
  49592. pand xmm1, xmm12
  49593. pand xmm2, xmm12
  49594. pand xmm3, xmm12
  49595. por xmm4, xmm0
  49596. por xmm5, xmm1
  49597. por xmm6, xmm2
  49598. por xmm7, xmm3
  49599. paddd xmm13, xmm11
  49600. ; ENTRY: 4
  49601. mov r9, QWORD PTR [rdx+32]
  49602. add r9, 64
  49603. movdqu xmm12, xmm13
  49604. pcmpeqd xmm12, xmm10
  49605. movdqu xmm0, [r9]
  49606. movdqu xmm1, [r9+16]
  49607. movdqu xmm2, [r9+32]
  49608. movdqu xmm3, [r9+48]
  49609. pand xmm0, xmm12
  49610. pand xmm1, xmm12
  49611. pand xmm2, xmm12
  49612. pand xmm3, xmm12
  49613. por xmm4, xmm0
  49614. por xmm5, xmm1
  49615. por xmm6, xmm2
  49616. por xmm7, xmm3
  49617. paddd xmm13, xmm11
  49618. ; ENTRY: 5
  49619. mov r9, QWORD PTR [rdx+40]
  49620. add r9, 64
  49621. movdqu xmm12, xmm13
  49622. pcmpeqd xmm12, xmm10
  49623. movdqu xmm0, [r9]
  49624. movdqu xmm1, [r9+16]
  49625. movdqu xmm2, [r9+32]
  49626. movdqu xmm3, [r9+48]
  49627. pand xmm0, xmm12
  49628. pand xmm1, xmm12
  49629. pand xmm2, xmm12
  49630. pand xmm3, xmm12
  49631. por xmm4, xmm0
  49632. por xmm5, xmm1
  49633. por xmm6, xmm2
  49634. por xmm7, xmm3
  49635. paddd xmm13, xmm11
  49636. ; ENTRY: 6
  49637. mov r9, QWORD PTR [rdx+48]
  49638. add r9, 64
  49639. movdqu xmm12, xmm13
  49640. pcmpeqd xmm12, xmm10
  49641. movdqu xmm0, [r9]
  49642. movdqu xmm1, [r9+16]
  49643. movdqu xmm2, [r9+32]
  49644. movdqu xmm3, [r9+48]
  49645. pand xmm0, xmm12
  49646. pand xmm1, xmm12
  49647. pand xmm2, xmm12
  49648. pand xmm3, xmm12
  49649. por xmm4, xmm0
  49650. por xmm5, xmm1
  49651. por xmm6, xmm2
  49652. por xmm7, xmm3
  49653. paddd xmm13, xmm11
  49654. ; ENTRY: 7
  49655. mov r9, QWORD PTR [rdx+56]
  49656. add r9, 64
  49657. movdqu xmm12, xmm13
  49658. pcmpeqd xmm12, xmm10
  49659. movdqu xmm0, [r9]
  49660. movdqu xmm1, [r9+16]
  49661. movdqu xmm2, [r9+32]
  49662. movdqu xmm3, [r9+48]
  49663. pand xmm0, xmm12
  49664. pand xmm1, xmm12
  49665. pand xmm2, xmm12
  49666. pand xmm3, xmm12
  49667. por xmm4, xmm0
  49668. por xmm5, xmm1
  49669. por xmm6, xmm2
  49670. por xmm7, xmm3
  49671. paddd xmm13, xmm11
  49672. ; ENTRY: 8
  49673. mov r9, QWORD PTR [rdx+64]
  49674. add r9, 64
  49675. movdqu xmm12, xmm13
  49676. pcmpeqd xmm12, xmm10
  49677. movdqu xmm0, [r9]
  49678. movdqu xmm1, [r9+16]
  49679. movdqu xmm2, [r9+32]
  49680. movdqu xmm3, [r9+48]
  49681. pand xmm0, xmm12
  49682. pand xmm1, xmm12
  49683. pand xmm2, xmm12
  49684. pand xmm3, xmm12
  49685. por xmm4, xmm0
  49686. por xmm5, xmm1
  49687. por xmm6, xmm2
  49688. por xmm7, xmm3
  49689. paddd xmm13, xmm11
  49690. ; ENTRY: 9
  49691. mov r9, QWORD PTR [rdx+72]
  49692. add r9, 64
  49693. movdqu xmm12, xmm13
  49694. pcmpeqd xmm12, xmm10
  49695. movdqu xmm0, [r9]
  49696. movdqu xmm1, [r9+16]
  49697. movdqu xmm2, [r9+32]
  49698. movdqu xmm3, [r9+48]
  49699. pand xmm0, xmm12
  49700. pand xmm1, xmm12
  49701. pand xmm2, xmm12
  49702. pand xmm3, xmm12
  49703. por xmm4, xmm0
  49704. por xmm5, xmm1
  49705. por xmm6, xmm2
  49706. por xmm7, xmm3
  49707. paddd xmm13, xmm11
  49708. ; ENTRY: 10
  49709. mov r9, QWORD PTR [rdx+80]
  49710. add r9, 64
  49711. movdqu xmm12, xmm13
  49712. pcmpeqd xmm12, xmm10
  49713. movdqu xmm0, [r9]
  49714. movdqu xmm1, [r9+16]
  49715. movdqu xmm2, [r9+32]
  49716. movdqu xmm3, [r9+48]
  49717. pand xmm0, xmm12
  49718. pand xmm1, xmm12
  49719. pand xmm2, xmm12
  49720. pand xmm3, xmm12
  49721. por xmm4, xmm0
  49722. por xmm5, xmm1
  49723. por xmm6, xmm2
  49724. por xmm7, xmm3
  49725. paddd xmm13, xmm11
  49726. ; ENTRY: 11
  49727. mov r9, QWORD PTR [rdx+88]
  49728. add r9, 64
  49729. movdqu xmm12, xmm13
  49730. pcmpeqd xmm12, xmm10
  49731. movdqu xmm0, [r9]
  49732. movdqu xmm1, [r9+16]
  49733. movdqu xmm2, [r9+32]
  49734. movdqu xmm3, [r9+48]
  49735. pand xmm0, xmm12
  49736. pand xmm1, xmm12
  49737. pand xmm2, xmm12
  49738. pand xmm3, xmm12
  49739. por xmm4, xmm0
  49740. por xmm5, xmm1
  49741. por xmm6, xmm2
  49742. por xmm7, xmm3
  49743. paddd xmm13, xmm11
  49744. ; ENTRY: 12
  49745. mov r9, QWORD PTR [rdx+96]
  49746. add r9, 64
  49747. movdqu xmm12, xmm13
  49748. pcmpeqd xmm12, xmm10
  49749. movdqu xmm0, [r9]
  49750. movdqu xmm1, [r9+16]
  49751. movdqu xmm2, [r9+32]
  49752. movdqu xmm3, [r9+48]
  49753. pand xmm0, xmm12
  49754. pand xmm1, xmm12
  49755. pand xmm2, xmm12
  49756. pand xmm3, xmm12
  49757. por xmm4, xmm0
  49758. por xmm5, xmm1
  49759. por xmm6, xmm2
  49760. por xmm7, xmm3
  49761. paddd xmm13, xmm11
  49762. ; ENTRY: 13
  49763. mov r9, QWORD PTR [rdx+104]
  49764. add r9, 64
  49765. movdqu xmm12, xmm13
  49766. pcmpeqd xmm12, xmm10
  49767. movdqu xmm0, [r9]
  49768. movdqu xmm1, [r9+16]
  49769. movdqu xmm2, [r9+32]
  49770. movdqu xmm3, [r9+48]
  49771. pand xmm0, xmm12
  49772. pand xmm1, xmm12
  49773. pand xmm2, xmm12
  49774. pand xmm3, xmm12
  49775. por xmm4, xmm0
  49776. por xmm5, xmm1
  49777. por xmm6, xmm2
  49778. por xmm7, xmm3
  49779. paddd xmm13, xmm11
  49780. ; ENTRY: 14
  49781. mov r9, QWORD PTR [rdx+112]
  49782. add r9, 64
  49783. movdqu xmm12, xmm13
  49784. pcmpeqd xmm12, xmm10
  49785. movdqu xmm0, [r9]
  49786. movdqu xmm1, [r9+16]
  49787. movdqu xmm2, [r9+32]
  49788. movdqu xmm3, [r9+48]
  49789. pand xmm0, xmm12
  49790. pand xmm1, xmm12
  49791. pand xmm2, xmm12
  49792. pand xmm3, xmm12
  49793. por xmm4, xmm0
  49794. por xmm5, xmm1
  49795. por xmm6, xmm2
  49796. por xmm7, xmm3
  49797. paddd xmm13, xmm11
  49798. ; ENTRY: 15
  49799. mov r9, QWORD PTR [rdx+120]
  49800. add r9, 64
  49801. movdqu xmm12, xmm13
  49802. pcmpeqd xmm12, xmm10
  49803. movdqu xmm0, [r9]
  49804. movdqu xmm1, [r9+16]
  49805. movdqu xmm2, [r9+32]
  49806. movdqu xmm3, [r9+48]
  49807. pand xmm0, xmm12
  49808. pand xmm1, xmm12
  49809. pand xmm2, xmm12
  49810. pand xmm3, xmm12
  49811. por xmm4, xmm0
  49812. por xmm5, xmm1
  49813. por xmm6, xmm2
  49814. por xmm7, xmm3
  49815. paddd xmm13, xmm11
  49816. movdqu [rcx], xmm4
  49817. movdqu [rcx+16], xmm5
  49818. movdqu [rcx+32], xmm6
  49819. movdqu [rcx+48], xmm7
  49820. add rcx, 64
  49821. ; END: 8-15
  49822. ; START: 16-23
  49823. pxor xmm13, xmm13
  49824. pxor xmm4, xmm4
  49825. pxor xmm5, xmm5
  49826. pxor xmm6, xmm6
  49827. pxor xmm7, xmm7
  49828. ; ENTRY: 0
  49829. mov r9, QWORD PTR [rdx]
  49830. add r9, 128
  49831. movdqu xmm12, xmm13
  49832. pcmpeqd xmm12, xmm10
  49833. movdqu xmm0, [r9]
  49834. movdqu xmm1, [r9+16]
  49835. movdqu xmm2, [r9+32]
  49836. movdqu xmm3, [r9+48]
  49837. pand xmm0, xmm12
  49838. pand xmm1, xmm12
  49839. pand xmm2, xmm12
  49840. pand xmm3, xmm12
  49841. por xmm4, xmm0
  49842. por xmm5, xmm1
  49843. por xmm6, xmm2
  49844. por xmm7, xmm3
  49845. paddd xmm13, xmm11
  49846. ; ENTRY: 1
  49847. mov r9, QWORD PTR [rdx+8]
  49848. add r9, 128
  49849. movdqu xmm12, xmm13
  49850. pcmpeqd xmm12, xmm10
  49851. movdqu xmm0, [r9]
  49852. movdqu xmm1, [r9+16]
  49853. movdqu xmm2, [r9+32]
  49854. movdqu xmm3, [r9+48]
  49855. pand xmm0, xmm12
  49856. pand xmm1, xmm12
  49857. pand xmm2, xmm12
  49858. pand xmm3, xmm12
  49859. por xmm4, xmm0
  49860. por xmm5, xmm1
  49861. por xmm6, xmm2
  49862. por xmm7, xmm3
  49863. paddd xmm13, xmm11
  49864. ; ENTRY: 2
  49865. mov r9, QWORD PTR [rdx+16]
  49866. add r9, 128
  49867. movdqu xmm12, xmm13
  49868. pcmpeqd xmm12, xmm10
  49869. movdqu xmm0, [r9]
  49870. movdqu xmm1, [r9+16]
  49871. movdqu xmm2, [r9+32]
  49872. movdqu xmm3, [r9+48]
  49873. pand xmm0, xmm12
  49874. pand xmm1, xmm12
  49875. pand xmm2, xmm12
  49876. pand xmm3, xmm12
  49877. por xmm4, xmm0
  49878. por xmm5, xmm1
  49879. por xmm6, xmm2
  49880. por xmm7, xmm3
  49881. paddd xmm13, xmm11
  49882. ; ENTRY: 3
  49883. mov r9, QWORD PTR [rdx+24]
  49884. add r9, 128
  49885. movdqu xmm12, xmm13
  49886. pcmpeqd xmm12, xmm10
  49887. movdqu xmm0, [r9]
  49888. movdqu xmm1, [r9+16]
  49889. movdqu xmm2, [r9+32]
  49890. movdqu xmm3, [r9+48]
  49891. pand xmm0, xmm12
  49892. pand xmm1, xmm12
  49893. pand xmm2, xmm12
  49894. pand xmm3, xmm12
  49895. por xmm4, xmm0
  49896. por xmm5, xmm1
  49897. por xmm6, xmm2
  49898. por xmm7, xmm3
  49899. paddd xmm13, xmm11
  49900. ; ENTRY: 4
  49901. mov r9, QWORD PTR [rdx+32]
  49902. add r9, 128
  49903. movdqu xmm12, xmm13
  49904. pcmpeqd xmm12, xmm10
  49905. movdqu xmm0, [r9]
  49906. movdqu xmm1, [r9+16]
  49907. movdqu xmm2, [r9+32]
  49908. movdqu xmm3, [r9+48]
  49909. pand xmm0, xmm12
  49910. pand xmm1, xmm12
  49911. pand xmm2, xmm12
  49912. pand xmm3, xmm12
  49913. por xmm4, xmm0
  49914. por xmm5, xmm1
  49915. por xmm6, xmm2
  49916. por xmm7, xmm3
  49917. paddd xmm13, xmm11
  49918. ; ENTRY: 5
  49919. mov r9, QWORD PTR [rdx+40]
  49920. add r9, 128
  49921. movdqu xmm12, xmm13
  49922. pcmpeqd xmm12, xmm10
  49923. movdqu xmm0, [r9]
  49924. movdqu xmm1, [r9+16]
  49925. movdqu xmm2, [r9+32]
  49926. movdqu xmm3, [r9+48]
  49927. pand xmm0, xmm12
  49928. pand xmm1, xmm12
  49929. pand xmm2, xmm12
  49930. pand xmm3, xmm12
  49931. por xmm4, xmm0
  49932. por xmm5, xmm1
  49933. por xmm6, xmm2
  49934. por xmm7, xmm3
  49935. paddd xmm13, xmm11
  49936. ; ENTRY: 6
  49937. mov r9, QWORD PTR [rdx+48]
  49938. add r9, 128
  49939. movdqu xmm12, xmm13
  49940. pcmpeqd xmm12, xmm10
  49941. movdqu xmm0, [r9]
  49942. movdqu xmm1, [r9+16]
  49943. movdqu xmm2, [r9+32]
  49944. movdqu xmm3, [r9+48]
  49945. pand xmm0, xmm12
  49946. pand xmm1, xmm12
  49947. pand xmm2, xmm12
  49948. pand xmm3, xmm12
  49949. por xmm4, xmm0
  49950. por xmm5, xmm1
  49951. por xmm6, xmm2
  49952. por xmm7, xmm3
  49953. paddd xmm13, xmm11
  49954. ; ENTRY: 7
  49955. mov r9, QWORD PTR [rdx+56]
  49956. add r9, 128
  49957. movdqu xmm12, xmm13
  49958. pcmpeqd xmm12, xmm10
  49959. movdqu xmm0, [r9]
  49960. movdqu xmm1, [r9+16]
  49961. movdqu xmm2, [r9+32]
  49962. movdqu xmm3, [r9+48]
  49963. pand xmm0, xmm12
  49964. pand xmm1, xmm12
  49965. pand xmm2, xmm12
  49966. pand xmm3, xmm12
  49967. por xmm4, xmm0
  49968. por xmm5, xmm1
  49969. por xmm6, xmm2
  49970. por xmm7, xmm3
  49971. paddd xmm13, xmm11
  49972. ; ENTRY: 8
  49973. mov r9, QWORD PTR [rdx+64]
  49974. add r9, 128
  49975. movdqu xmm12, xmm13
  49976. pcmpeqd xmm12, xmm10
  49977. movdqu xmm0, [r9]
  49978. movdqu xmm1, [r9+16]
  49979. movdqu xmm2, [r9+32]
  49980. movdqu xmm3, [r9+48]
  49981. pand xmm0, xmm12
  49982. pand xmm1, xmm12
  49983. pand xmm2, xmm12
  49984. pand xmm3, xmm12
  49985. por xmm4, xmm0
  49986. por xmm5, xmm1
  49987. por xmm6, xmm2
  49988. por xmm7, xmm3
  49989. paddd xmm13, xmm11
  49990. ; ENTRY: 9
  49991. mov r9, QWORD PTR [rdx+72]
  49992. add r9, 128
  49993. movdqu xmm12, xmm13
  49994. pcmpeqd xmm12, xmm10
  49995. movdqu xmm0, [r9]
  49996. movdqu xmm1, [r9+16]
  49997. movdqu xmm2, [r9+32]
  49998. movdqu xmm3, [r9+48]
  49999. pand xmm0, xmm12
  50000. pand xmm1, xmm12
  50001. pand xmm2, xmm12
  50002. pand xmm3, xmm12
  50003. por xmm4, xmm0
  50004. por xmm5, xmm1
  50005. por xmm6, xmm2
  50006. por xmm7, xmm3
  50007. paddd xmm13, xmm11
  50008. ; ENTRY: 10
  50009. mov r9, QWORD PTR [rdx+80]
  50010. add r9, 128
  50011. movdqu xmm12, xmm13
  50012. pcmpeqd xmm12, xmm10
  50013. movdqu xmm0, [r9]
  50014. movdqu xmm1, [r9+16]
  50015. movdqu xmm2, [r9+32]
  50016. movdqu xmm3, [r9+48]
  50017. pand xmm0, xmm12
  50018. pand xmm1, xmm12
  50019. pand xmm2, xmm12
  50020. pand xmm3, xmm12
  50021. por xmm4, xmm0
  50022. por xmm5, xmm1
  50023. por xmm6, xmm2
  50024. por xmm7, xmm3
  50025. paddd xmm13, xmm11
  50026. ; ENTRY: 11
  50027. mov r9, QWORD PTR [rdx+88]
  50028. add r9, 128
  50029. movdqu xmm12, xmm13
  50030. pcmpeqd xmm12, xmm10
  50031. movdqu xmm0, [r9]
  50032. movdqu xmm1, [r9+16]
  50033. movdqu xmm2, [r9+32]
  50034. movdqu xmm3, [r9+48]
  50035. pand xmm0, xmm12
  50036. pand xmm1, xmm12
  50037. pand xmm2, xmm12
  50038. pand xmm3, xmm12
  50039. por xmm4, xmm0
  50040. por xmm5, xmm1
  50041. por xmm6, xmm2
  50042. por xmm7, xmm3
  50043. paddd xmm13, xmm11
  50044. ; ENTRY: 12
  50045. mov r9, QWORD PTR [rdx+96]
  50046. add r9, 128
  50047. movdqu xmm12, xmm13
  50048. pcmpeqd xmm12, xmm10
  50049. movdqu xmm0, [r9]
  50050. movdqu xmm1, [r9+16]
  50051. movdqu xmm2, [r9+32]
  50052. movdqu xmm3, [r9+48]
  50053. pand xmm0, xmm12
  50054. pand xmm1, xmm12
  50055. pand xmm2, xmm12
  50056. pand xmm3, xmm12
  50057. por xmm4, xmm0
  50058. por xmm5, xmm1
  50059. por xmm6, xmm2
  50060. por xmm7, xmm3
  50061. paddd xmm13, xmm11
  50062. ; ENTRY: 13
  50063. mov r9, QWORD PTR [rdx+104]
  50064. add r9, 128
  50065. movdqu xmm12, xmm13
  50066. pcmpeqd xmm12, xmm10
  50067. movdqu xmm0, [r9]
  50068. movdqu xmm1, [r9+16]
  50069. movdqu xmm2, [r9+32]
  50070. movdqu xmm3, [r9+48]
  50071. pand xmm0, xmm12
  50072. pand xmm1, xmm12
  50073. pand xmm2, xmm12
  50074. pand xmm3, xmm12
  50075. por xmm4, xmm0
  50076. por xmm5, xmm1
  50077. por xmm6, xmm2
  50078. por xmm7, xmm3
  50079. paddd xmm13, xmm11
  50080. ; ENTRY: 14
  50081. mov r9, QWORD PTR [rdx+112]
  50082. add r9, 128
  50083. movdqu xmm12, xmm13
  50084. pcmpeqd xmm12, xmm10
  50085. movdqu xmm0, [r9]
  50086. movdqu xmm1, [r9+16]
  50087. movdqu xmm2, [r9+32]
  50088. movdqu xmm3, [r9+48]
  50089. pand xmm0, xmm12
  50090. pand xmm1, xmm12
  50091. pand xmm2, xmm12
  50092. pand xmm3, xmm12
  50093. por xmm4, xmm0
  50094. por xmm5, xmm1
  50095. por xmm6, xmm2
  50096. por xmm7, xmm3
  50097. paddd xmm13, xmm11
  50098. ; ENTRY: 15
  50099. mov r9, QWORD PTR [rdx+120]
  50100. add r9, 128
  50101. movdqu xmm12, xmm13
  50102. pcmpeqd xmm12, xmm10
  50103. movdqu xmm0, [r9]
  50104. movdqu xmm1, [r9+16]
  50105. movdqu xmm2, [r9+32]
  50106. movdqu xmm3, [r9+48]
  50107. pand xmm0, xmm12
  50108. pand xmm1, xmm12
  50109. pand xmm2, xmm12
  50110. pand xmm3, xmm12
  50111. por xmm4, xmm0
  50112. por xmm5, xmm1
  50113. por xmm6, xmm2
  50114. por xmm7, xmm3
  50115. paddd xmm13, xmm11
  50116. movdqu [rcx], xmm4
  50117. movdqu [rcx+16], xmm5
  50118. movdqu [rcx+32], xmm6
  50119. movdqu [rcx+48], xmm7
  50120. add rcx, 64
  50121. ; END: 16-23
  50122. ; START: 24-31
  50123. pxor xmm13, xmm13
  50124. pxor xmm4, xmm4
  50125. pxor xmm5, xmm5
  50126. pxor xmm6, xmm6
  50127. pxor xmm7, xmm7
  50128. ; ENTRY: 0
  50129. mov r9, QWORD PTR [rdx]
  50130. add r9, 192
  50131. movdqu xmm12, xmm13
  50132. pcmpeqd xmm12, xmm10
  50133. movdqu xmm0, [r9]
  50134. movdqu xmm1, [r9+16]
  50135. movdqu xmm2, [r9+32]
  50136. movdqu xmm3, [r9+48]
  50137. pand xmm0, xmm12
  50138. pand xmm1, xmm12
  50139. pand xmm2, xmm12
  50140. pand xmm3, xmm12
  50141. por xmm4, xmm0
  50142. por xmm5, xmm1
  50143. por xmm6, xmm2
  50144. por xmm7, xmm3
  50145. paddd xmm13, xmm11
  50146. ; ENTRY: 1
  50147. mov r9, QWORD PTR [rdx+8]
  50148. add r9, 192
  50149. movdqu xmm12, xmm13
  50150. pcmpeqd xmm12, xmm10
  50151. movdqu xmm0, [r9]
  50152. movdqu xmm1, [r9+16]
  50153. movdqu xmm2, [r9+32]
  50154. movdqu xmm3, [r9+48]
  50155. pand xmm0, xmm12
  50156. pand xmm1, xmm12
  50157. pand xmm2, xmm12
  50158. pand xmm3, xmm12
  50159. por xmm4, xmm0
  50160. por xmm5, xmm1
  50161. por xmm6, xmm2
  50162. por xmm7, xmm3
  50163. paddd xmm13, xmm11
  50164. ; ENTRY: 2
  50165. mov r9, QWORD PTR [rdx+16]
  50166. add r9, 192
  50167. movdqu xmm12, xmm13
  50168. pcmpeqd xmm12, xmm10
  50169. movdqu xmm0, [r9]
  50170. movdqu xmm1, [r9+16]
  50171. movdqu xmm2, [r9+32]
  50172. movdqu xmm3, [r9+48]
  50173. pand xmm0, xmm12
  50174. pand xmm1, xmm12
  50175. pand xmm2, xmm12
  50176. pand xmm3, xmm12
  50177. por xmm4, xmm0
  50178. por xmm5, xmm1
  50179. por xmm6, xmm2
  50180. por xmm7, xmm3
  50181. paddd xmm13, xmm11
  50182. ; ENTRY: 3
  50183. mov r9, QWORD PTR [rdx+24]
  50184. add r9, 192
  50185. movdqu xmm12, xmm13
  50186. pcmpeqd xmm12, xmm10
  50187. movdqu xmm0, [r9]
  50188. movdqu xmm1, [r9+16]
  50189. movdqu xmm2, [r9+32]
  50190. movdqu xmm3, [r9+48]
  50191. pand xmm0, xmm12
  50192. pand xmm1, xmm12
  50193. pand xmm2, xmm12
  50194. pand xmm3, xmm12
  50195. por xmm4, xmm0
  50196. por xmm5, xmm1
  50197. por xmm6, xmm2
  50198. por xmm7, xmm3
  50199. paddd xmm13, xmm11
  50200. ; ENTRY: 4
  50201. mov r9, QWORD PTR [rdx+32]
  50202. add r9, 192
  50203. movdqu xmm12, xmm13
  50204. pcmpeqd xmm12, xmm10
  50205. movdqu xmm0, [r9]
  50206. movdqu xmm1, [r9+16]
  50207. movdqu xmm2, [r9+32]
  50208. movdqu xmm3, [r9+48]
  50209. pand xmm0, xmm12
  50210. pand xmm1, xmm12
  50211. pand xmm2, xmm12
  50212. pand xmm3, xmm12
  50213. por xmm4, xmm0
  50214. por xmm5, xmm1
  50215. por xmm6, xmm2
  50216. por xmm7, xmm3
  50217. paddd xmm13, xmm11
  50218. ; ENTRY: 5
  50219. mov r9, QWORD PTR [rdx+40]
  50220. add r9, 192
  50221. movdqu xmm12, xmm13
  50222. pcmpeqd xmm12, xmm10
  50223. movdqu xmm0, [r9]
  50224. movdqu xmm1, [r9+16]
  50225. movdqu xmm2, [r9+32]
  50226. movdqu xmm3, [r9+48]
  50227. pand xmm0, xmm12
  50228. pand xmm1, xmm12
  50229. pand xmm2, xmm12
  50230. pand xmm3, xmm12
  50231. por xmm4, xmm0
  50232. por xmm5, xmm1
  50233. por xmm6, xmm2
  50234. por xmm7, xmm3
  50235. paddd xmm13, xmm11
  50236. ; ENTRY: 6
  50237. mov r9, QWORD PTR [rdx+48]
  50238. add r9, 192
  50239. movdqu xmm12, xmm13
  50240. pcmpeqd xmm12, xmm10
  50241. movdqu xmm0, [r9]
  50242. movdqu xmm1, [r9+16]
  50243. movdqu xmm2, [r9+32]
  50244. movdqu xmm3, [r9+48]
  50245. pand xmm0, xmm12
  50246. pand xmm1, xmm12
  50247. pand xmm2, xmm12
  50248. pand xmm3, xmm12
  50249. por xmm4, xmm0
  50250. por xmm5, xmm1
  50251. por xmm6, xmm2
  50252. por xmm7, xmm3
  50253. paddd xmm13, xmm11
  50254. ; ENTRY: 7
  50255. mov r9, QWORD PTR [rdx+56]
  50256. add r9, 192
  50257. movdqu xmm12, xmm13
  50258. pcmpeqd xmm12, xmm10
  50259. movdqu xmm0, [r9]
  50260. movdqu xmm1, [r9+16]
  50261. movdqu xmm2, [r9+32]
  50262. movdqu xmm3, [r9+48]
  50263. pand xmm0, xmm12
  50264. pand xmm1, xmm12
  50265. pand xmm2, xmm12
  50266. pand xmm3, xmm12
  50267. por xmm4, xmm0
  50268. por xmm5, xmm1
  50269. por xmm6, xmm2
  50270. por xmm7, xmm3
  50271. paddd xmm13, xmm11
  50272. ; ENTRY: 8
  50273. mov r9, QWORD PTR [rdx+64]
  50274. add r9, 192
  50275. movdqu xmm12, xmm13
  50276. pcmpeqd xmm12, xmm10
  50277. movdqu xmm0, [r9]
  50278. movdqu xmm1, [r9+16]
  50279. movdqu xmm2, [r9+32]
  50280. movdqu xmm3, [r9+48]
  50281. pand xmm0, xmm12
  50282. pand xmm1, xmm12
  50283. pand xmm2, xmm12
  50284. pand xmm3, xmm12
  50285. por xmm4, xmm0
  50286. por xmm5, xmm1
  50287. por xmm6, xmm2
  50288. por xmm7, xmm3
  50289. paddd xmm13, xmm11
  50290. ; ENTRY: 9
  50291. mov r9, QWORD PTR [rdx+72]
  50292. add r9, 192
  50293. movdqu xmm12, xmm13
  50294. pcmpeqd xmm12, xmm10
  50295. movdqu xmm0, [r9]
  50296. movdqu xmm1, [r9+16]
  50297. movdqu xmm2, [r9+32]
  50298. movdqu xmm3, [r9+48]
  50299. pand xmm0, xmm12
  50300. pand xmm1, xmm12
  50301. pand xmm2, xmm12
  50302. pand xmm3, xmm12
  50303. por xmm4, xmm0
  50304. por xmm5, xmm1
  50305. por xmm6, xmm2
  50306. por xmm7, xmm3
  50307. paddd xmm13, xmm11
  50308. ; ENTRY: 10
  50309. mov r9, QWORD PTR [rdx+80]
  50310. add r9, 192
  50311. movdqu xmm12, xmm13
  50312. pcmpeqd xmm12, xmm10
  50313. movdqu xmm0, [r9]
  50314. movdqu xmm1, [r9+16]
  50315. movdqu xmm2, [r9+32]
  50316. movdqu xmm3, [r9+48]
  50317. pand xmm0, xmm12
  50318. pand xmm1, xmm12
  50319. pand xmm2, xmm12
  50320. pand xmm3, xmm12
  50321. por xmm4, xmm0
  50322. por xmm5, xmm1
  50323. por xmm6, xmm2
  50324. por xmm7, xmm3
  50325. paddd xmm13, xmm11
  50326. ; ENTRY: 11
  50327. mov r9, QWORD PTR [rdx+88]
  50328. add r9, 192
  50329. movdqu xmm12, xmm13
  50330. pcmpeqd xmm12, xmm10
  50331. movdqu xmm0, [r9]
  50332. movdqu xmm1, [r9+16]
  50333. movdqu xmm2, [r9+32]
  50334. movdqu xmm3, [r9+48]
  50335. pand xmm0, xmm12
  50336. pand xmm1, xmm12
  50337. pand xmm2, xmm12
  50338. pand xmm3, xmm12
  50339. por xmm4, xmm0
  50340. por xmm5, xmm1
  50341. por xmm6, xmm2
  50342. por xmm7, xmm3
  50343. paddd xmm13, xmm11
  50344. ; ENTRY: 12
  50345. mov r9, QWORD PTR [rdx+96]
  50346. add r9, 192
  50347. movdqu xmm12, xmm13
  50348. pcmpeqd xmm12, xmm10
  50349. movdqu xmm0, [r9]
  50350. movdqu xmm1, [r9+16]
  50351. movdqu xmm2, [r9+32]
  50352. movdqu xmm3, [r9+48]
  50353. pand xmm0, xmm12
  50354. pand xmm1, xmm12
  50355. pand xmm2, xmm12
  50356. pand xmm3, xmm12
  50357. por xmm4, xmm0
  50358. por xmm5, xmm1
  50359. por xmm6, xmm2
  50360. por xmm7, xmm3
  50361. paddd xmm13, xmm11
  50362. ; ENTRY: 13
  50363. mov r9, QWORD PTR [rdx+104]
  50364. add r9, 192
  50365. movdqu xmm12, xmm13
  50366. pcmpeqd xmm12, xmm10
  50367. movdqu xmm0, [r9]
  50368. movdqu xmm1, [r9+16]
  50369. movdqu xmm2, [r9+32]
  50370. movdqu xmm3, [r9+48]
  50371. pand xmm0, xmm12
  50372. pand xmm1, xmm12
  50373. pand xmm2, xmm12
  50374. pand xmm3, xmm12
  50375. por xmm4, xmm0
  50376. por xmm5, xmm1
  50377. por xmm6, xmm2
  50378. por xmm7, xmm3
  50379. paddd xmm13, xmm11
  50380. ; ENTRY: 14
  50381. mov r9, QWORD PTR [rdx+112]
  50382. add r9, 192
  50383. movdqu xmm12, xmm13
  50384. pcmpeqd xmm12, xmm10
  50385. movdqu xmm0, [r9]
  50386. movdqu xmm1, [r9+16]
  50387. movdqu xmm2, [r9+32]
  50388. movdqu xmm3, [r9+48]
  50389. pand xmm0, xmm12
  50390. pand xmm1, xmm12
  50391. pand xmm2, xmm12
  50392. pand xmm3, xmm12
  50393. por xmm4, xmm0
  50394. por xmm5, xmm1
  50395. por xmm6, xmm2
  50396. por xmm7, xmm3
  50397. paddd xmm13, xmm11
  50398. ; ENTRY: 15
  50399. mov r9, QWORD PTR [rdx+120]
  50400. add r9, 192
  50401. movdqu xmm12, xmm13
  50402. pcmpeqd xmm12, xmm10
  50403. movdqu xmm0, [r9]
  50404. movdqu xmm1, [r9+16]
  50405. movdqu xmm2, [r9+32]
  50406. movdqu xmm3, [r9+48]
  50407. pand xmm0, xmm12
  50408. pand xmm1, xmm12
  50409. pand xmm2, xmm12
  50410. pand xmm3, xmm12
  50411. por xmm4, xmm0
  50412. por xmm5, xmm1
  50413. por xmm6, xmm2
  50414. por xmm7, xmm3
  50415. paddd xmm13, xmm11
  50416. movdqu [rcx], xmm4
  50417. movdqu [rcx+16], xmm5
  50418. movdqu [rcx+32], xmm6
  50419. movdqu [rcx+48], xmm7
  50420. add rcx, 64
  50421. ; END: 24-31
  50422. ; START: 32-39
  50423. pxor xmm13, xmm13
  50424. pxor xmm4, xmm4
  50425. pxor xmm5, xmm5
  50426. pxor xmm6, xmm6
  50427. pxor xmm7, xmm7
  50428. ; ENTRY: 0
  50429. mov r9, QWORD PTR [rdx]
  50430. add r9, 256
  50431. movdqu xmm12, xmm13
  50432. pcmpeqd xmm12, xmm10
  50433. movdqu xmm0, [r9]
  50434. movdqu xmm1, [r9+16]
  50435. movdqu xmm2, [r9+32]
  50436. movdqu xmm3, [r9+48]
  50437. pand xmm0, xmm12
  50438. pand xmm1, xmm12
  50439. pand xmm2, xmm12
  50440. pand xmm3, xmm12
  50441. por xmm4, xmm0
  50442. por xmm5, xmm1
  50443. por xmm6, xmm2
  50444. por xmm7, xmm3
  50445. paddd xmm13, xmm11
  50446. ; ENTRY: 1
  50447. mov r9, QWORD PTR [rdx+8]
  50448. add r9, 256
  50449. movdqu xmm12, xmm13
  50450. pcmpeqd xmm12, xmm10
  50451. movdqu xmm0, [r9]
  50452. movdqu xmm1, [r9+16]
  50453. movdqu xmm2, [r9+32]
  50454. movdqu xmm3, [r9+48]
  50455. pand xmm0, xmm12
  50456. pand xmm1, xmm12
  50457. pand xmm2, xmm12
  50458. pand xmm3, xmm12
  50459. por xmm4, xmm0
  50460. por xmm5, xmm1
  50461. por xmm6, xmm2
  50462. por xmm7, xmm3
  50463. paddd xmm13, xmm11
  50464. ; ENTRY: 2
  50465. mov r9, QWORD PTR [rdx+16]
  50466. add r9, 256
  50467. movdqu xmm12, xmm13
  50468. pcmpeqd xmm12, xmm10
  50469. movdqu xmm0, [r9]
  50470. movdqu xmm1, [r9+16]
  50471. movdqu xmm2, [r9+32]
  50472. movdqu xmm3, [r9+48]
  50473. pand xmm0, xmm12
  50474. pand xmm1, xmm12
  50475. pand xmm2, xmm12
  50476. pand xmm3, xmm12
  50477. por xmm4, xmm0
  50478. por xmm5, xmm1
  50479. por xmm6, xmm2
  50480. por xmm7, xmm3
  50481. paddd xmm13, xmm11
  50482. ; ENTRY: 3
  50483. mov r9, QWORD PTR [rdx+24]
  50484. add r9, 256
  50485. movdqu xmm12, xmm13
  50486. pcmpeqd xmm12, xmm10
  50487. movdqu xmm0, [r9]
  50488. movdqu xmm1, [r9+16]
  50489. movdqu xmm2, [r9+32]
  50490. movdqu xmm3, [r9+48]
  50491. pand xmm0, xmm12
  50492. pand xmm1, xmm12
  50493. pand xmm2, xmm12
  50494. pand xmm3, xmm12
  50495. por xmm4, xmm0
  50496. por xmm5, xmm1
  50497. por xmm6, xmm2
  50498. por xmm7, xmm3
  50499. paddd xmm13, xmm11
  50500. ; ENTRY: 4
  50501. mov r9, QWORD PTR [rdx+32]
  50502. add r9, 256
  50503. movdqu xmm12, xmm13
  50504. pcmpeqd xmm12, xmm10
  50505. movdqu xmm0, [r9]
  50506. movdqu xmm1, [r9+16]
  50507. movdqu xmm2, [r9+32]
  50508. movdqu xmm3, [r9+48]
  50509. pand xmm0, xmm12
  50510. pand xmm1, xmm12
  50511. pand xmm2, xmm12
  50512. pand xmm3, xmm12
  50513. por xmm4, xmm0
  50514. por xmm5, xmm1
  50515. por xmm6, xmm2
  50516. por xmm7, xmm3
  50517. paddd xmm13, xmm11
  50518. ; ENTRY: 5
  50519. mov r9, QWORD PTR [rdx+40]
  50520. add r9, 256
  50521. movdqu xmm12, xmm13
  50522. pcmpeqd xmm12, xmm10
  50523. movdqu xmm0, [r9]
  50524. movdqu xmm1, [r9+16]
  50525. movdqu xmm2, [r9+32]
  50526. movdqu xmm3, [r9+48]
  50527. pand xmm0, xmm12
  50528. pand xmm1, xmm12
  50529. pand xmm2, xmm12
  50530. pand xmm3, xmm12
  50531. por xmm4, xmm0
  50532. por xmm5, xmm1
  50533. por xmm6, xmm2
  50534. por xmm7, xmm3
  50535. paddd xmm13, xmm11
  50536. ; ENTRY: 6
  50537. mov r9, QWORD PTR [rdx+48]
  50538. add r9, 256
  50539. movdqu xmm12, xmm13
  50540. pcmpeqd xmm12, xmm10
  50541. movdqu xmm0, [r9]
  50542. movdqu xmm1, [r9+16]
  50543. movdqu xmm2, [r9+32]
  50544. movdqu xmm3, [r9+48]
  50545. pand xmm0, xmm12
  50546. pand xmm1, xmm12
  50547. pand xmm2, xmm12
  50548. pand xmm3, xmm12
  50549. por xmm4, xmm0
  50550. por xmm5, xmm1
  50551. por xmm6, xmm2
  50552. por xmm7, xmm3
  50553. paddd xmm13, xmm11
  50554. ; ENTRY: 7
  50555. mov r9, QWORD PTR [rdx+56]
  50556. add r9, 256
  50557. movdqu xmm12, xmm13
  50558. pcmpeqd xmm12, xmm10
  50559. movdqu xmm0, [r9]
  50560. movdqu xmm1, [r9+16]
  50561. movdqu xmm2, [r9+32]
  50562. movdqu xmm3, [r9+48]
  50563. pand xmm0, xmm12
  50564. pand xmm1, xmm12
  50565. pand xmm2, xmm12
  50566. pand xmm3, xmm12
  50567. por xmm4, xmm0
  50568. por xmm5, xmm1
  50569. por xmm6, xmm2
  50570. por xmm7, xmm3
  50571. paddd xmm13, xmm11
  50572. ; ENTRY: 8
  50573. mov r9, QWORD PTR [rdx+64]
  50574. add r9, 256
  50575. movdqu xmm12, xmm13
  50576. pcmpeqd xmm12, xmm10
  50577. movdqu xmm0, [r9]
  50578. movdqu xmm1, [r9+16]
  50579. movdqu xmm2, [r9+32]
  50580. movdqu xmm3, [r9+48]
  50581. pand xmm0, xmm12
  50582. pand xmm1, xmm12
  50583. pand xmm2, xmm12
  50584. pand xmm3, xmm12
  50585. por xmm4, xmm0
  50586. por xmm5, xmm1
  50587. por xmm6, xmm2
  50588. por xmm7, xmm3
  50589. paddd xmm13, xmm11
  50590. ; ENTRY: 9
  50591. mov r9, QWORD PTR [rdx+72]
  50592. add r9, 256
  50593. movdqu xmm12, xmm13
  50594. pcmpeqd xmm12, xmm10
  50595. movdqu xmm0, [r9]
  50596. movdqu xmm1, [r9+16]
  50597. movdqu xmm2, [r9+32]
  50598. movdqu xmm3, [r9+48]
  50599. pand xmm0, xmm12
  50600. pand xmm1, xmm12
  50601. pand xmm2, xmm12
  50602. pand xmm3, xmm12
  50603. por xmm4, xmm0
  50604. por xmm5, xmm1
  50605. por xmm6, xmm2
  50606. por xmm7, xmm3
  50607. paddd xmm13, xmm11
  50608. ; ENTRY: 10
  50609. mov r9, QWORD PTR [rdx+80]
  50610. add r9, 256
  50611. movdqu xmm12, xmm13
  50612. pcmpeqd xmm12, xmm10
  50613. movdqu xmm0, [r9]
  50614. movdqu xmm1, [r9+16]
  50615. movdqu xmm2, [r9+32]
  50616. movdqu xmm3, [r9+48]
  50617. pand xmm0, xmm12
  50618. pand xmm1, xmm12
  50619. pand xmm2, xmm12
  50620. pand xmm3, xmm12
  50621. por xmm4, xmm0
  50622. por xmm5, xmm1
  50623. por xmm6, xmm2
  50624. por xmm7, xmm3
  50625. paddd xmm13, xmm11
  50626. ; ENTRY: 11
  50627. mov r9, QWORD PTR [rdx+88]
  50628. add r9, 256
  50629. movdqu xmm12, xmm13
  50630. pcmpeqd xmm12, xmm10
  50631. movdqu xmm0, [r9]
  50632. movdqu xmm1, [r9+16]
  50633. movdqu xmm2, [r9+32]
  50634. movdqu xmm3, [r9+48]
  50635. pand xmm0, xmm12
  50636. pand xmm1, xmm12
  50637. pand xmm2, xmm12
  50638. pand xmm3, xmm12
  50639. por xmm4, xmm0
  50640. por xmm5, xmm1
  50641. por xmm6, xmm2
  50642. por xmm7, xmm3
  50643. paddd xmm13, xmm11
  50644. ; ENTRY: 12
  50645. mov r9, QWORD PTR [rdx+96]
  50646. add r9, 256
  50647. movdqu xmm12, xmm13
  50648. pcmpeqd xmm12, xmm10
  50649. movdqu xmm0, [r9]
  50650. movdqu xmm1, [r9+16]
  50651. movdqu xmm2, [r9+32]
  50652. movdqu xmm3, [r9+48]
  50653. pand xmm0, xmm12
  50654. pand xmm1, xmm12
  50655. pand xmm2, xmm12
  50656. pand xmm3, xmm12
  50657. por xmm4, xmm0
  50658. por xmm5, xmm1
  50659. por xmm6, xmm2
  50660. por xmm7, xmm3
  50661. paddd xmm13, xmm11
  50662. ; ENTRY: 13
  50663. mov r9, QWORD PTR [rdx+104]
  50664. add r9, 256
  50665. movdqu xmm12, xmm13
  50666. pcmpeqd xmm12, xmm10
  50667. movdqu xmm0, [r9]
  50668. movdqu xmm1, [r9+16]
  50669. movdqu xmm2, [r9+32]
  50670. movdqu xmm3, [r9+48]
  50671. pand xmm0, xmm12
  50672. pand xmm1, xmm12
  50673. pand xmm2, xmm12
  50674. pand xmm3, xmm12
  50675. por xmm4, xmm0
  50676. por xmm5, xmm1
  50677. por xmm6, xmm2
  50678. por xmm7, xmm3
  50679. paddd xmm13, xmm11
  50680. ; ENTRY: 14
  50681. mov r9, QWORD PTR [rdx+112]
  50682. add r9, 256
  50683. movdqu xmm12, xmm13
  50684. pcmpeqd xmm12, xmm10
  50685. movdqu xmm0, [r9]
  50686. movdqu xmm1, [r9+16]
  50687. movdqu xmm2, [r9+32]
  50688. movdqu xmm3, [r9+48]
  50689. pand xmm0, xmm12
  50690. pand xmm1, xmm12
  50691. pand xmm2, xmm12
  50692. pand xmm3, xmm12
  50693. por xmm4, xmm0
  50694. por xmm5, xmm1
  50695. por xmm6, xmm2
  50696. por xmm7, xmm3
  50697. paddd xmm13, xmm11
  50698. ; ENTRY: 15
  50699. mov r9, QWORD PTR [rdx+120]
  50700. add r9, 256
  50701. movdqu xmm12, xmm13
  50702. pcmpeqd xmm12, xmm10
  50703. movdqu xmm0, [r9]
  50704. movdqu xmm1, [r9+16]
  50705. movdqu xmm2, [r9+32]
  50706. movdqu xmm3, [r9+48]
  50707. pand xmm0, xmm12
  50708. pand xmm1, xmm12
  50709. pand xmm2, xmm12
  50710. pand xmm3, xmm12
  50711. por xmm4, xmm0
  50712. por xmm5, xmm1
  50713. por xmm6, xmm2
  50714. por xmm7, xmm3
  50715. paddd xmm13, xmm11
  50716. movdqu [rcx], xmm4
  50717. movdqu [rcx+16], xmm5
  50718. movdqu [rcx+32], xmm6
  50719. movdqu [rcx+48], xmm7
  50720. add rcx, 64
  50721. ; END: 32-39
  50722. ; START: 40-47
  50723. pxor xmm13, xmm13
  50724. pxor xmm4, xmm4
  50725. pxor xmm5, xmm5
  50726. pxor xmm6, xmm6
  50727. pxor xmm7, xmm7
  50728. ; ENTRY: 0
  50729. mov r9, QWORD PTR [rdx]
  50730. add r9, 320
  50731. movdqu xmm12, xmm13
  50732. pcmpeqd xmm12, xmm10
  50733. movdqu xmm0, [r9]
  50734. movdqu xmm1, [r9+16]
  50735. movdqu xmm2, [r9+32]
  50736. movdqu xmm3, [r9+48]
  50737. pand xmm0, xmm12
  50738. pand xmm1, xmm12
  50739. pand xmm2, xmm12
  50740. pand xmm3, xmm12
  50741. por xmm4, xmm0
  50742. por xmm5, xmm1
  50743. por xmm6, xmm2
  50744. por xmm7, xmm3
  50745. paddd xmm13, xmm11
  50746. ; ENTRY: 1
  50747. mov r9, QWORD PTR [rdx+8]
  50748. add r9, 320
  50749. movdqu xmm12, xmm13
  50750. pcmpeqd xmm12, xmm10
  50751. movdqu xmm0, [r9]
  50752. movdqu xmm1, [r9+16]
  50753. movdqu xmm2, [r9+32]
  50754. movdqu xmm3, [r9+48]
  50755. pand xmm0, xmm12
  50756. pand xmm1, xmm12
  50757. pand xmm2, xmm12
  50758. pand xmm3, xmm12
  50759. por xmm4, xmm0
  50760. por xmm5, xmm1
  50761. por xmm6, xmm2
  50762. por xmm7, xmm3
  50763. paddd xmm13, xmm11
  50764. ; ENTRY: 2
  50765. mov r9, QWORD PTR [rdx+16]
  50766. add r9, 320
  50767. movdqu xmm12, xmm13
  50768. pcmpeqd xmm12, xmm10
  50769. movdqu xmm0, [r9]
  50770. movdqu xmm1, [r9+16]
  50771. movdqu xmm2, [r9+32]
  50772. movdqu xmm3, [r9+48]
  50773. pand xmm0, xmm12
  50774. pand xmm1, xmm12
  50775. pand xmm2, xmm12
  50776. pand xmm3, xmm12
  50777. por xmm4, xmm0
  50778. por xmm5, xmm1
  50779. por xmm6, xmm2
  50780. por xmm7, xmm3
  50781. paddd xmm13, xmm11
  50782. ; ENTRY: 3
  50783. mov r9, QWORD PTR [rdx+24]
  50784. add r9, 320
  50785. movdqu xmm12, xmm13
  50786. pcmpeqd xmm12, xmm10
  50787. movdqu xmm0, [r9]
  50788. movdqu xmm1, [r9+16]
  50789. movdqu xmm2, [r9+32]
  50790. movdqu xmm3, [r9+48]
  50791. pand xmm0, xmm12
  50792. pand xmm1, xmm12
  50793. pand xmm2, xmm12
  50794. pand xmm3, xmm12
  50795. por xmm4, xmm0
  50796. por xmm5, xmm1
  50797. por xmm6, xmm2
  50798. por xmm7, xmm3
  50799. paddd xmm13, xmm11
  50800. ; ENTRY: 4
  50801. mov r9, QWORD PTR [rdx+32]
  50802. add r9, 320
  50803. movdqu xmm12, xmm13
  50804. pcmpeqd xmm12, xmm10
  50805. movdqu xmm0, [r9]
  50806. movdqu xmm1, [r9+16]
  50807. movdqu xmm2, [r9+32]
  50808. movdqu xmm3, [r9+48]
  50809. pand xmm0, xmm12
  50810. pand xmm1, xmm12
  50811. pand xmm2, xmm12
  50812. pand xmm3, xmm12
  50813. por xmm4, xmm0
  50814. por xmm5, xmm1
  50815. por xmm6, xmm2
  50816. por xmm7, xmm3
  50817. paddd xmm13, xmm11
  50818. ; ENTRY: 5
  50819. mov r9, QWORD PTR [rdx+40]
  50820. add r9, 320
  50821. movdqu xmm12, xmm13
  50822. pcmpeqd xmm12, xmm10
  50823. movdqu xmm0, [r9]
  50824. movdqu xmm1, [r9+16]
  50825. movdqu xmm2, [r9+32]
  50826. movdqu xmm3, [r9+48]
  50827. pand xmm0, xmm12
  50828. pand xmm1, xmm12
  50829. pand xmm2, xmm12
  50830. pand xmm3, xmm12
  50831. por xmm4, xmm0
  50832. por xmm5, xmm1
  50833. por xmm6, xmm2
  50834. por xmm7, xmm3
  50835. paddd xmm13, xmm11
  50836. ; ENTRY: 6
  50837. mov r9, QWORD PTR [rdx+48]
  50838. add r9, 320
  50839. movdqu xmm12, xmm13
  50840. pcmpeqd xmm12, xmm10
  50841. movdqu xmm0, [r9]
  50842. movdqu xmm1, [r9+16]
  50843. movdqu xmm2, [r9+32]
  50844. movdqu xmm3, [r9+48]
  50845. pand xmm0, xmm12
  50846. pand xmm1, xmm12
  50847. pand xmm2, xmm12
  50848. pand xmm3, xmm12
  50849. por xmm4, xmm0
  50850. por xmm5, xmm1
  50851. por xmm6, xmm2
  50852. por xmm7, xmm3
  50853. paddd xmm13, xmm11
  50854. ; ENTRY: 7
  50855. mov r9, QWORD PTR [rdx+56]
  50856. add r9, 320
  50857. movdqu xmm12, xmm13
  50858. pcmpeqd xmm12, xmm10
  50859. movdqu xmm0, [r9]
  50860. movdqu xmm1, [r9+16]
  50861. movdqu xmm2, [r9+32]
  50862. movdqu xmm3, [r9+48]
  50863. pand xmm0, xmm12
  50864. pand xmm1, xmm12
  50865. pand xmm2, xmm12
  50866. pand xmm3, xmm12
  50867. por xmm4, xmm0
  50868. por xmm5, xmm1
  50869. por xmm6, xmm2
  50870. por xmm7, xmm3
  50871. paddd xmm13, xmm11
  50872. ; ENTRY: 8
  50873. mov r9, QWORD PTR [rdx+64]
  50874. add r9, 320
  50875. movdqu xmm12, xmm13
  50876. pcmpeqd xmm12, xmm10
  50877. movdqu xmm0, [r9]
  50878. movdqu xmm1, [r9+16]
  50879. movdqu xmm2, [r9+32]
  50880. movdqu xmm3, [r9+48]
  50881. pand xmm0, xmm12
  50882. pand xmm1, xmm12
  50883. pand xmm2, xmm12
  50884. pand xmm3, xmm12
  50885. por xmm4, xmm0
  50886. por xmm5, xmm1
  50887. por xmm6, xmm2
  50888. por xmm7, xmm3
  50889. paddd xmm13, xmm11
  50890. ; ENTRY: 9
  50891. mov r9, QWORD PTR [rdx+72]
  50892. add r9, 320
  50893. movdqu xmm12, xmm13
  50894. pcmpeqd xmm12, xmm10
  50895. movdqu xmm0, [r9]
  50896. movdqu xmm1, [r9+16]
  50897. movdqu xmm2, [r9+32]
  50898. movdqu xmm3, [r9+48]
  50899. pand xmm0, xmm12
  50900. pand xmm1, xmm12
  50901. pand xmm2, xmm12
  50902. pand xmm3, xmm12
  50903. por xmm4, xmm0
  50904. por xmm5, xmm1
  50905. por xmm6, xmm2
  50906. por xmm7, xmm3
  50907. paddd xmm13, xmm11
  50908. ; ENTRY: 10
  50909. mov r9, QWORD PTR [rdx+80]
  50910. add r9, 320
  50911. movdqu xmm12, xmm13
  50912. pcmpeqd xmm12, xmm10
  50913. movdqu xmm0, [r9]
  50914. movdqu xmm1, [r9+16]
  50915. movdqu xmm2, [r9+32]
  50916. movdqu xmm3, [r9+48]
  50917. pand xmm0, xmm12
  50918. pand xmm1, xmm12
  50919. pand xmm2, xmm12
  50920. pand xmm3, xmm12
  50921. por xmm4, xmm0
  50922. por xmm5, xmm1
  50923. por xmm6, xmm2
  50924. por xmm7, xmm3
  50925. paddd xmm13, xmm11
  50926. ; ENTRY: 11
  50927. mov r9, QWORD PTR [rdx+88]
  50928. add r9, 320
  50929. movdqu xmm12, xmm13
  50930. pcmpeqd xmm12, xmm10
  50931. movdqu xmm0, [r9]
  50932. movdqu xmm1, [r9+16]
  50933. movdqu xmm2, [r9+32]
  50934. movdqu xmm3, [r9+48]
  50935. pand xmm0, xmm12
  50936. pand xmm1, xmm12
  50937. pand xmm2, xmm12
  50938. pand xmm3, xmm12
  50939. por xmm4, xmm0
  50940. por xmm5, xmm1
  50941. por xmm6, xmm2
  50942. por xmm7, xmm3
  50943. paddd xmm13, xmm11
  50944. ; ENTRY: 12
  50945. mov r9, QWORD PTR [rdx+96]
  50946. add r9, 320
  50947. movdqu xmm12, xmm13
  50948. pcmpeqd xmm12, xmm10
  50949. movdqu xmm0, [r9]
  50950. movdqu xmm1, [r9+16]
  50951. movdqu xmm2, [r9+32]
  50952. movdqu xmm3, [r9+48]
  50953. pand xmm0, xmm12
  50954. pand xmm1, xmm12
  50955. pand xmm2, xmm12
  50956. pand xmm3, xmm12
  50957. por xmm4, xmm0
  50958. por xmm5, xmm1
  50959. por xmm6, xmm2
  50960. por xmm7, xmm3
  50961. paddd xmm13, xmm11
  50962. ; ENTRY: 13
  50963. mov r9, QWORD PTR [rdx+104]
  50964. add r9, 320
  50965. movdqu xmm12, xmm13
  50966. pcmpeqd xmm12, xmm10
  50967. movdqu xmm0, [r9]
  50968. movdqu xmm1, [r9+16]
  50969. movdqu xmm2, [r9+32]
  50970. movdqu xmm3, [r9+48]
  50971. pand xmm0, xmm12
  50972. pand xmm1, xmm12
  50973. pand xmm2, xmm12
  50974. pand xmm3, xmm12
  50975. por xmm4, xmm0
  50976. por xmm5, xmm1
  50977. por xmm6, xmm2
  50978. por xmm7, xmm3
  50979. paddd xmm13, xmm11
  50980. ; ENTRY: 14
  50981. mov r9, QWORD PTR [rdx+112]
  50982. add r9, 320
  50983. movdqu xmm12, xmm13
  50984. pcmpeqd xmm12, xmm10
  50985. movdqu xmm0, [r9]
  50986. movdqu xmm1, [r9+16]
  50987. movdqu xmm2, [r9+32]
  50988. movdqu xmm3, [r9+48]
  50989. pand xmm0, xmm12
  50990. pand xmm1, xmm12
  50991. pand xmm2, xmm12
  50992. pand xmm3, xmm12
  50993. por xmm4, xmm0
  50994. por xmm5, xmm1
  50995. por xmm6, xmm2
  50996. por xmm7, xmm3
  50997. paddd xmm13, xmm11
  50998. ; ENTRY: 15
  50999. mov r9, QWORD PTR [rdx+120]
  51000. add r9, 320
  51001. movdqu xmm12, xmm13
  51002. pcmpeqd xmm12, xmm10
  51003. movdqu xmm0, [r9]
  51004. movdqu xmm1, [r9+16]
  51005. movdqu xmm2, [r9+32]
  51006. movdqu xmm3, [r9+48]
  51007. pand xmm0, xmm12
  51008. pand xmm1, xmm12
  51009. pand xmm2, xmm12
  51010. pand xmm3, xmm12
  51011. por xmm4, xmm0
  51012. por xmm5, xmm1
  51013. por xmm6, xmm2
  51014. por xmm7, xmm3
  51015. paddd xmm13, xmm11
  51016. movdqu [rcx], xmm4
  51017. movdqu [rcx+16], xmm5
  51018. movdqu [rcx+32], xmm6
  51019. movdqu [rcx+48], xmm7
  51020. add rcx, 64
  51021. ; END: 40-47
  51022. ; START: 48-55
  51023. pxor xmm13, xmm13
  51024. pxor xmm4, xmm4
  51025. pxor xmm5, xmm5
  51026. pxor xmm6, xmm6
  51027. pxor xmm7, xmm7
  51028. ; ENTRY: 0
  51029. mov r9, QWORD PTR [rdx]
  51030. add r9, 384
  51031. movdqu xmm12, xmm13
  51032. pcmpeqd xmm12, xmm10
  51033. movdqu xmm0, [r9]
  51034. movdqu xmm1, [r9+16]
  51035. movdqu xmm2, [r9+32]
  51036. movdqu xmm3, [r9+48]
  51037. pand xmm0, xmm12
  51038. pand xmm1, xmm12
  51039. pand xmm2, xmm12
  51040. pand xmm3, xmm12
  51041. por xmm4, xmm0
  51042. por xmm5, xmm1
  51043. por xmm6, xmm2
  51044. por xmm7, xmm3
  51045. paddd xmm13, xmm11
  51046. ; ENTRY: 1
  51047. mov r9, QWORD PTR [rdx+8]
  51048. add r9, 384
  51049. movdqu xmm12, xmm13
  51050. pcmpeqd xmm12, xmm10
  51051. movdqu xmm0, [r9]
  51052. movdqu xmm1, [r9+16]
  51053. movdqu xmm2, [r9+32]
  51054. movdqu xmm3, [r9+48]
  51055. pand xmm0, xmm12
  51056. pand xmm1, xmm12
  51057. pand xmm2, xmm12
  51058. pand xmm3, xmm12
  51059. por xmm4, xmm0
  51060. por xmm5, xmm1
  51061. por xmm6, xmm2
  51062. por xmm7, xmm3
  51063. paddd xmm13, xmm11
  51064. ; ENTRY: 2
  51065. mov r9, QWORD PTR [rdx+16]
  51066. add r9, 384
  51067. movdqu xmm12, xmm13
  51068. pcmpeqd xmm12, xmm10
  51069. movdqu xmm0, [r9]
  51070. movdqu xmm1, [r9+16]
  51071. movdqu xmm2, [r9+32]
  51072. movdqu xmm3, [r9+48]
  51073. pand xmm0, xmm12
  51074. pand xmm1, xmm12
  51075. pand xmm2, xmm12
  51076. pand xmm3, xmm12
  51077. por xmm4, xmm0
  51078. por xmm5, xmm1
  51079. por xmm6, xmm2
  51080. por xmm7, xmm3
  51081. paddd xmm13, xmm11
  51082. ; ENTRY: 3
  51083. mov r9, QWORD PTR [rdx+24]
  51084. add r9, 384
  51085. movdqu xmm12, xmm13
  51086. pcmpeqd xmm12, xmm10
  51087. movdqu xmm0, [r9]
  51088. movdqu xmm1, [r9+16]
  51089. movdqu xmm2, [r9+32]
  51090. movdqu xmm3, [r9+48]
  51091. pand xmm0, xmm12
  51092. pand xmm1, xmm12
  51093. pand xmm2, xmm12
  51094. pand xmm3, xmm12
  51095. por xmm4, xmm0
  51096. por xmm5, xmm1
  51097. por xmm6, xmm2
  51098. por xmm7, xmm3
  51099. paddd xmm13, xmm11
  51100. ; ENTRY: 4
  51101. mov r9, QWORD PTR [rdx+32]
  51102. add r9, 384
  51103. movdqu xmm12, xmm13
  51104. pcmpeqd xmm12, xmm10
  51105. movdqu xmm0, [r9]
  51106. movdqu xmm1, [r9+16]
  51107. movdqu xmm2, [r9+32]
  51108. movdqu xmm3, [r9+48]
  51109. pand xmm0, xmm12
  51110. pand xmm1, xmm12
  51111. pand xmm2, xmm12
  51112. pand xmm3, xmm12
  51113. por xmm4, xmm0
  51114. por xmm5, xmm1
  51115. por xmm6, xmm2
  51116. por xmm7, xmm3
  51117. paddd xmm13, xmm11
  51118. ; ENTRY: 5
  51119. mov r9, QWORD PTR [rdx+40]
  51120. add r9, 384
  51121. movdqu xmm12, xmm13
  51122. pcmpeqd xmm12, xmm10
  51123. movdqu xmm0, [r9]
  51124. movdqu xmm1, [r9+16]
  51125. movdqu xmm2, [r9+32]
  51126. movdqu xmm3, [r9+48]
  51127. pand xmm0, xmm12
  51128. pand xmm1, xmm12
  51129. pand xmm2, xmm12
  51130. pand xmm3, xmm12
  51131. por xmm4, xmm0
  51132. por xmm5, xmm1
  51133. por xmm6, xmm2
  51134. por xmm7, xmm3
  51135. paddd xmm13, xmm11
  51136. ; ENTRY: 6
  51137. mov r9, QWORD PTR [rdx+48]
  51138. add r9, 384
  51139. movdqu xmm12, xmm13
  51140. pcmpeqd xmm12, xmm10
  51141. movdqu xmm0, [r9]
  51142. movdqu xmm1, [r9+16]
  51143. movdqu xmm2, [r9+32]
  51144. movdqu xmm3, [r9+48]
  51145. pand xmm0, xmm12
  51146. pand xmm1, xmm12
  51147. pand xmm2, xmm12
  51148. pand xmm3, xmm12
  51149. por xmm4, xmm0
  51150. por xmm5, xmm1
  51151. por xmm6, xmm2
  51152. por xmm7, xmm3
  51153. paddd xmm13, xmm11
  51154. ; ENTRY: 7
  51155. mov r9, QWORD PTR [rdx+56]
  51156. add r9, 384
  51157. movdqu xmm12, xmm13
  51158. pcmpeqd xmm12, xmm10
  51159. movdqu xmm0, [r9]
  51160. movdqu xmm1, [r9+16]
  51161. movdqu xmm2, [r9+32]
  51162. movdqu xmm3, [r9+48]
  51163. pand xmm0, xmm12
  51164. pand xmm1, xmm12
  51165. pand xmm2, xmm12
  51166. pand xmm3, xmm12
  51167. por xmm4, xmm0
  51168. por xmm5, xmm1
  51169. por xmm6, xmm2
  51170. por xmm7, xmm3
  51171. paddd xmm13, xmm11
  51172. ; ENTRY: 8
  51173. mov r9, QWORD PTR [rdx+64]
  51174. add r9, 384
  51175. movdqu xmm12, xmm13
  51176. pcmpeqd xmm12, xmm10
  51177. movdqu xmm0, [r9]
  51178. movdqu xmm1, [r9+16]
  51179. movdqu xmm2, [r9+32]
  51180. movdqu xmm3, [r9+48]
  51181. pand xmm0, xmm12
  51182. pand xmm1, xmm12
  51183. pand xmm2, xmm12
  51184. pand xmm3, xmm12
  51185. por xmm4, xmm0
  51186. por xmm5, xmm1
  51187. por xmm6, xmm2
  51188. por xmm7, xmm3
  51189. paddd xmm13, xmm11
  51190. ; ENTRY: 9
  51191. mov r9, QWORD PTR [rdx+72]
  51192. add r9, 384
  51193. movdqu xmm12, xmm13
  51194. pcmpeqd xmm12, xmm10
  51195. movdqu xmm0, [r9]
  51196. movdqu xmm1, [r9+16]
  51197. movdqu xmm2, [r9+32]
  51198. movdqu xmm3, [r9+48]
  51199. pand xmm0, xmm12
  51200. pand xmm1, xmm12
  51201. pand xmm2, xmm12
  51202. pand xmm3, xmm12
  51203. por xmm4, xmm0
  51204. por xmm5, xmm1
  51205. por xmm6, xmm2
  51206. por xmm7, xmm3
  51207. paddd xmm13, xmm11
  51208. ; ENTRY: 10
  51209. mov r9, QWORD PTR [rdx+80]
  51210. add r9, 384
  51211. movdqu xmm12, xmm13
  51212. pcmpeqd xmm12, xmm10
  51213. movdqu xmm0, [r9]
  51214. movdqu xmm1, [r9+16]
  51215. movdqu xmm2, [r9+32]
  51216. movdqu xmm3, [r9+48]
  51217. pand xmm0, xmm12
  51218. pand xmm1, xmm12
  51219. pand xmm2, xmm12
  51220. pand xmm3, xmm12
  51221. por xmm4, xmm0
  51222. por xmm5, xmm1
  51223. por xmm6, xmm2
  51224. por xmm7, xmm3
  51225. paddd xmm13, xmm11
  51226. ; ENTRY: 11
  51227. mov r9, QWORD PTR [rdx+88]
  51228. add r9, 384
  51229. movdqu xmm12, xmm13
  51230. pcmpeqd xmm12, xmm10
  51231. movdqu xmm0, [r9]
  51232. movdqu xmm1, [r9+16]
  51233. movdqu xmm2, [r9+32]
  51234. movdqu xmm3, [r9+48]
  51235. pand xmm0, xmm12
  51236. pand xmm1, xmm12
  51237. pand xmm2, xmm12
  51238. pand xmm3, xmm12
  51239. por xmm4, xmm0
  51240. por xmm5, xmm1
  51241. por xmm6, xmm2
  51242. por xmm7, xmm3
  51243. paddd xmm13, xmm11
  51244. ; ENTRY: 12
  51245. mov r9, QWORD PTR [rdx+96]
  51246. add r9, 384
  51247. movdqu xmm12, xmm13
  51248. pcmpeqd xmm12, xmm10
  51249. movdqu xmm0, [r9]
  51250. movdqu xmm1, [r9+16]
  51251. movdqu xmm2, [r9+32]
  51252. movdqu xmm3, [r9+48]
  51253. pand xmm0, xmm12
  51254. pand xmm1, xmm12
  51255. pand xmm2, xmm12
  51256. pand xmm3, xmm12
  51257. por xmm4, xmm0
  51258. por xmm5, xmm1
  51259. por xmm6, xmm2
  51260. por xmm7, xmm3
  51261. paddd xmm13, xmm11
  51262. ; ENTRY: 13
  51263. mov r9, QWORD PTR [rdx+104]
  51264. add r9, 384
  51265. movdqu xmm12, xmm13
  51266. pcmpeqd xmm12, xmm10
  51267. movdqu xmm0, [r9]
  51268. movdqu xmm1, [r9+16]
  51269. movdqu xmm2, [r9+32]
  51270. movdqu xmm3, [r9+48]
  51271. pand xmm0, xmm12
  51272. pand xmm1, xmm12
  51273. pand xmm2, xmm12
  51274. pand xmm3, xmm12
  51275. por xmm4, xmm0
  51276. por xmm5, xmm1
  51277. por xmm6, xmm2
  51278. por xmm7, xmm3
  51279. paddd xmm13, xmm11
  51280. ; ENTRY: 14
  51281. mov r9, QWORD PTR [rdx+112]
  51282. add r9, 384
  51283. movdqu xmm12, xmm13
  51284. pcmpeqd xmm12, xmm10
  51285. movdqu xmm0, [r9]
  51286. movdqu xmm1, [r9+16]
  51287. movdqu xmm2, [r9+32]
  51288. movdqu xmm3, [r9+48]
  51289. pand xmm0, xmm12
  51290. pand xmm1, xmm12
  51291. pand xmm2, xmm12
  51292. pand xmm3, xmm12
  51293. por xmm4, xmm0
  51294. por xmm5, xmm1
  51295. por xmm6, xmm2
  51296. por xmm7, xmm3
  51297. paddd xmm13, xmm11
  51298. ; ENTRY: 15
  51299. mov r9, QWORD PTR [rdx+120]
  51300. add r9, 384
  51301. movdqu xmm12, xmm13
  51302. pcmpeqd xmm12, xmm10
  51303. movdqu xmm0, [r9]
  51304. movdqu xmm1, [r9+16]
  51305. movdqu xmm2, [r9+32]
  51306. movdqu xmm3, [r9+48]
  51307. pand xmm0, xmm12
  51308. pand xmm1, xmm12
  51309. pand xmm2, xmm12
  51310. pand xmm3, xmm12
  51311. por xmm4, xmm0
  51312. por xmm5, xmm1
  51313. por xmm6, xmm2
  51314. por xmm7, xmm3
  51315. paddd xmm13, xmm11
  51316. movdqu [rcx], xmm4
  51317. movdqu [rcx+16], xmm5
  51318. movdqu [rcx+32], xmm6
  51319. movdqu [rcx+48], xmm7
  51320. add rcx, 64
  51321. ; END: 48-55
  51322. ; START: 56-63
  51323. pxor xmm13, xmm13
  51324. pxor xmm4, xmm4
  51325. pxor xmm5, xmm5
  51326. pxor xmm6, xmm6
  51327. pxor xmm7, xmm7
  51328. ; ENTRY: 0
  51329. mov r9, QWORD PTR [rdx]
  51330. add r9, 448
  51331. movdqu xmm12, xmm13
  51332. pcmpeqd xmm12, xmm10
  51333. movdqu xmm0, [r9]
  51334. movdqu xmm1, [r9+16]
  51335. movdqu xmm2, [r9+32]
  51336. movdqu xmm3, [r9+48]
  51337. pand xmm0, xmm12
  51338. pand xmm1, xmm12
  51339. pand xmm2, xmm12
  51340. pand xmm3, xmm12
  51341. por xmm4, xmm0
  51342. por xmm5, xmm1
  51343. por xmm6, xmm2
  51344. por xmm7, xmm3
  51345. paddd xmm13, xmm11
  51346. ; ENTRY: 1
  51347. mov r9, QWORD PTR [rdx+8]
  51348. add r9, 448
  51349. movdqu xmm12, xmm13
  51350. pcmpeqd xmm12, xmm10
  51351. movdqu xmm0, [r9]
  51352. movdqu xmm1, [r9+16]
  51353. movdqu xmm2, [r9+32]
  51354. movdqu xmm3, [r9+48]
  51355. pand xmm0, xmm12
  51356. pand xmm1, xmm12
  51357. pand xmm2, xmm12
  51358. pand xmm3, xmm12
  51359. por xmm4, xmm0
  51360. por xmm5, xmm1
  51361. por xmm6, xmm2
  51362. por xmm7, xmm3
  51363. paddd xmm13, xmm11
  51364. ; ENTRY: 2
  51365. mov r9, QWORD PTR [rdx+16]
  51366. add r9, 448
  51367. movdqu xmm12, xmm13
  51368. pcmpeqd xmm12, xmm10
  51369. movdqu xmm0, [r9]
  51370. movdqu xmm1, [r9+16]
  51371. movdqu xmm2, [r9+32]
  51372. movdqu xmm3, [r9+48]
  51373. pand xmm0, xmm12
  51374. pand xmm1, xmm12
  51375. pand xmm2, xmm12
  51376. pand xmm3, xmm12
  51377. por xmm4, xmm0
  51378. por xmm5, xmm1
  51379. por xmm6, xmm2
  51380. por xmm7, xmm3
  51381. paddd xmm13, xmm11
  51382. ; ENTRY: 3
  51383. mov r9, QWORD PTR [rdx+24]
  51384. add r9, 448
  51385. movdqu xmm12, xmm13
  51386. pcmpeqd xmm12, xmm10
  51387. movdqu xmm0, [r9]
  51388. movdqu xmm1, [r9+16]
  51389. movdqu xmm2, [r9+32]
  51390. movdqu xmm3, [r9+48]
  51391. pand xmm0, xmm12
  51392. pand xmm1, xmm12
  51393. pand xmm2, xmm12
  51394. pand xmm3, xmm12
  51395. por xmm4, xmm0
  51396. por xmm5, xmm1
  51397. por xmm6, xmm2
  51398. por xmm7, xmm3
  51399. paddd xmm13, xmm11
  51400. ; ENTRY: 4
  51401. mov r9, QWORD PTR [rdx+32]
  51402. add r9, 448
  51403. movdqu xmm12, xmm13
  51404. pcmpeqd xmm12, xmm10
  51405. movdqu xmm0, [r9]
  51406. movdqu xmm1, [r9+16]
  51407. movdqu xmm2, [r9+32]
  51408. movdqu xmm3, [r9+48]
  51409. pand xmm0, xmm12
  51410. pand xmm1, xmm12
  51411. pand xmm2, xmm12
  51412. pand xmm3, xmm12
  51413. por xmm4, xmm0
  51414. por xmm5, xmm1
  51415. por xmm6, xmm2
  51416. por xmm7, xmm3
  51417. paddd xmm13, xmm11
  51418. ; ENTRY: 5
  51419. mov r9, QWORD PTR [rdx+40]
  51420. add r9, 448
  51421. movdqu xmm12, xmm13
  51422. pcmpeqd xmm12, xmm10
  51423. movdqu xmm0, [r9]
  51424. movdqu xmm1, [r9+16]
  51425. movdqu xmm2, [r9+32]
  51426. movdqu xmm3, [r9+48]
  51427. pand xmm0, xmm12
  51428. pand xmm1, xmm12
  51429. pand xmm2, xmm12
  51430. pand xmm3, xmm12
  51431. por xmm4, xmm0
  51432. por xmm5, xmm1
  51433. por xmm6, xmm2
  51434. por xmm7, xmm3
  51435. paddd xmm13, xmm11
  51436. ; ENTRY: 6
  51437. mov r9, QWORD PTR [rdx+48]
  51438. add r9, 448
  51439. movdqu xmm12, xmm13
  51440. pcmpeqd xmm12, xmm10
  51441. movdqu xmm0, [r9]
  51442. movdqu xmm1, [r9+16]
  51443. movdqu xmm2, [r9+32]
  51444. movdqu xmm3, [r9+48]
  51445. pand xmm0, xmm12
  51446. pand xmm1, xmm12
  51447. pand xmm2, xmm12
  51448. pand xmm3, xmm12
  51449. por xmm4, xmm0
  51450. por xmm5, xmm1
  51451. por xmm6, xmm2
  51452. por xmm7, xmm3
  51453. paddd xmm13, xmm11
  51454. ; ENTRY: 7
  51455. mov r9, QWORD PTR [rdx+56]
  51456. add r9, 448
  51457. movdqu xmm12, xmm13
  51458. pcmpeqd xmm12, xmm10
  51459. movdqu xmm0, [r9]
  51460. movdqu xmm1, [r9+16]
  51461. movdqu xmm2, [r9+32]
  51462. movdqu xmm3, [r9+48]
  51463. pand xmm0, xmm12
  51464. pand xmm1, xmm12
  51465. pand xmm2, xmm12
  51466. pand xmm3, xmm12
  51467. por xmm4, xmm0
  51468. por xmm5, xmm1
  51469. por xmm6, xmm2
  51470. por xmm7, xmm3
  51471. paddd xmm13, xmm11
  51472. ; ENTRY: 8
  51473. mov r9, QWORD PTR [rdx+64]
  51474. add r9, 448
  51475. movdqu xmm12, xmm13
  51476. pcmpeqd xmm12, xmm10
  51477. movdqu xmm0, [r9]
  51478. movdqu xmm1, [r9+16]
  51479. movdqu xmm2, [r9+32]
  51480. movdqu xmm3, [r9+48]
  51481. pand xmm0, xmm12
  51482. pand xmm1, xmm12
  51483. pand xmm2, xmm12
  51484. pand xmm3, xmm12
  51485. por xmm4, xmm0
  51486. por xmm5, xmm1
  51487. por xmm6, xmm2
  51488. por xmm7, xmm3
  51489. paddd xmm13, xmm11
  51490. ; ENTRY: 9
  51491. mov r9, QWORD PTR [rdx+72]
  51492. add r9, 448
  51493. movdqu xmm12, xmm13
  51494. pcmpeqd xmm12, xmm10
  51495. movdqu xmm0, [r9]
  51496. movdqu xmm1, [r9+16]
  51497. movdqu xmm2, [r9+32]
  51498. movdqu xmm3, [r9+48]
  51499. pand xmm0, xmm12
  51500. pand xmm1, xmm12
  51501. pand xmm2, xmm12
  51502. pand xmm3, xmm12
  51503. por xmm4, xmm0
  51504. por xmm5, xmm1
  51505. por xmm6, xmm2
  51506. por xmm7, xmm3
  51507. paddd xmm13, xmm11
  51508. ; ENTRY: 10
  51509. mov r9, QWORD PTR [rdx+80]
  51510. add r9, 448
  51511. movdqu xmm12, xmm13
  51512. pcmpeqd xmm12, xmm10
  51513. movdqu xmm0, [r9]
  51514. movdqu xmm1, [r9+16]
  51515. movdqu xmm2, [r9+32]
  51516. movdqu xmm3, [r9+48]
  51517. pand xmm0, xmm12
  51518. pand xmm1, xmm12
  51519. pand xmm2, xmm12
  51520. pand xmm3, xmm12
  51521. por xmm4, xmm0
  51522. por xmm5, xmm1
  51523. por xmm6, xmm2
  51524. por xmm7, xmm3
  51525. paddd xmm13, xmm11
  51526. ; ENTRY: 11
  51527. mov r9, QWORD PTR [rdx+88]
  51528. add r9, 448
  51529. movdqu xmm12, xmm13
  51530. pcmpeqd xmm12, xmm10
  51531. movdqu xmm0, [r9]
  51532. movdqu xmm1, [r9+16]
  51533. movdqu xmm2, [r9+32]
  51534. movdqu xmm3, [r9+48]
  51535. pand xmm0, xmm12
  51536. pand xmm1, xmm12
  51537. pand xmm2, xmm12
  51538. pand xmm3, xmm12
  51539. por xmm4, xmm0
  51540. por xmm5, xmm1
  51541. por xmm6, xmm2
  51542. por xmm7, xmm3
  51543. paddd xmm13, xmm11
  51544. ; ENTRY: 12
  51545. mov r9, QWORD PTR [rdx+96]
  51546. add r9, 448
  51547. movdqu xmm12, xmm13
  51548. pcmpeqd xmm12, xmm10
  51549. movdqu xmm0, [r9]
  51550. movdqu xmm1, [r9+16]
  51551. movdqu xmm2, [r9+32]
  51552. movdqu xmm3, [r9+48]
  51553. pand xmm0, xmm12
  51554. pand xmm1, xmm12
  51555. pand xmm2, xmm12
  51556. pand xmm3, xmm12
  51557. por xmm4, xmm0
  51558. por xmm5, xmm1
  51559. por xmm6, xmm2
  51560. por xmm7, xmm3
  51561. paddd xmm13, xmm11
  51562. ; ENTRY: 13
  51563. mov r9, QWORD PTR [rdx+104]
  51564. add r9, 448
  51565. movdqu xmm12, xmm13
  51566. pcmpeqd xmm12, xmm10
  51567. movdqu xmm0, [r9]
  51568. movdqu xmm1, [r9+16]
  51569. movdqu xmm2, [r9+32]
  51570. movdqu xmm3, [r9+48]
  51571. pand xmm0, xmm12
  51572. pand xmm1, xmm12
  51573. pand xmm2, xmm12
  51574. pand xmm3, xmm12
  51575. por xmm4, xmm0
  51576. por xmm5, xmm1
  51577. por xmm6, xmm2
  51578. por xmm7, xmm3
  51579. paddd xmm13, xmm11
  51580. ; ENTRY: 14
  51581. mov r9, QWORD PTR [rdx+112]
  51582. add r9, 448
  51583. movdqu xmm12, xmm13
  51584. pcmpeqd xmm12, xmm10
  51585. movdqu xmm0, [r9]
  51586. movdqu xmm1, [r9+16]
  51587. movdqu xmm2, [r9+32]
  51588. movdqu xmm3, [r9+48]
  51589. pand xmm0, xmm12
  51590. pand xmm1, xmm12
  51591. pand xmm2, xmm12
  51592. pand xmm3, xmm12
  51593. por xmm4, xmm0
  51594. por xmm5, xmm1
  51595. por xmm6, xmm2
  51596. por xmm7, xmm3
  51597. paddd xmm13, xmm11
  51598. ; ENTRY: 15
  51599. mov r9, QWORD PTR [rdx+120]
  51600. add r9, 448
  51601. movdqu xmm12, xmm13
  51602. pcmpeqd xmm12, xmm10
  51603. movdqu xmm0, [r9]
  51604. movdqu xmm1, [r9+16]
  51605. movdqu xmm2, [r9+32]
  51606. movdqu xmm3, [r9+48]
  51607. pand xmm0, xmm12
  51608. pand xmm1, xmm12
  51609. pand xmm2, xmm12
  51610. pand xmm3, xmm12
  51611. por xmm4, xmm0
  51612. por xmm5, xmm1
  51613. por xmm6, xmm2
  51614. por xmm7, xmm3
  51615. paddd xmm13, xmm11
  51616. movdqu [rcx], xmm4
  51617. movdqu [rcx+16], xmm5
  51618. movdqu [rcx+32], xmm6
  51619. movdqu [rcx+48], xmm7
  51620. ; END: 56-63
  51621. vmovdqu xmm6, OWORD PTR [rsp]
  51622. vmovdqu xmm7, OWORD PTR [rsp+16]
  51623. vmovdqu xmm8, OWORD PTR [rsp+32]
  51624. vmovdqu xmm9, OWORD PTR [rsp+48]
  51625. vmovdqu xmm10, OWORD PTR [rsp+64]
  51626. vmovdqu xmm11, OWORD PTR [rsp+80]
  51627. vmovdqu xmm12, OWORD PTR [rsp+96]
  51628. vmovdqu xmm13, OWORD PTR [rsp+112]
  51629. add rsp, 128
  51630. ret
  51631. sp_4096_get_from_table_64 ENDP
  51632. _text ENDS
  51633. ENDIF
  51634. IFDEF HAVE_INTEL_AVX2
  51635. ; /* Reduce the number back to 4096 bits using Montgomery reduction.
  51636. ; *
  51637. ; * a A single precision number to reduce in place.
  51638. ; * m The single precision number representing the modulus.
  51639. ; * mp The digit representing the negative inverse of m mod 2^n.
  51640. ; */
  51641. _text SEGMENT READONLY PARA
  51642. sp_4096_mont_reduce_avx2_64 PROC
  51643. push r12
  51644. push r13
  51645. push r14
  51646. push r15
  51647. push rdi
  51648. push rsi
  51649. push rbx
  51650. push rbp
  51651. mov r9, rcx
  51652. mov r10, rdx
  51653. xor rbp, rbp
  51654. ; i = 64
  51655. mov r11, 64
  51656. mov r14, QWORD PTR [r9]
  51657. mov r15, QWORD PTR [r9+8]
  51658. mov rdi, QWORD PTR [r9+16]
  51659. mov rsi, QWORD PTR [r9+24]
  51660. add r9, 256
  51661. xor rbp, rbp
  51662. L_4096_mont_reduce_avx2_64_loop:
  51663. ; mu = a[i] * mp
  51664. mov rdx, r14
  51665. mov r12, r14
  51666. imul rdx, r8
  51667. xor rbx, rbx
  51668. ; a[i+0] += m[0] * mu
  51669. mulx rcx, rax, QWORD PTR [r10]
  51670. mov r14, r15
  51671. adcx r12, rax
  51672. adox r14, rcx
  51673. ; a[i+1] += m[1] * mu
  51674. mulx rcx, rax, QWORD PTR [r10+8]
  51675. mov r15, rdi
  51676. adcx r14, rax
  51677. adox r15, rcx
  51678. ; a[i+2] += m[2] * mu
  51679. mulx rcx, rax, QWORD PTR [r10+16]
  51680. mov rdi, rsi
  51681. adcx r15, rax
  51682. adox rdi, rcx
  51683. ; a[i+3] += m[3] * mu
  51684. mulx rcx, rax, QWORD PTR [r10+24]
  51685. mov rsi, QWORD PTR [r9+-224]
  51686. adcx rdi, rax
  51687. adox rsi, rcx
  51688. ; a[i+4] += m[4] * mu
  51689. mulx rcx, rax, QWORD PTR [r10+32]
  51690. mov r13, QWORD PTR [r9+-216]
  51691. adcx rsi, rax
  51692. adox r13, rcx
  51693. ; a[i+5] += m[5] * mu
  51694. mulx rcx, rax, QWORD PTR [r10+40]
  51695. mov r12, QWORD PTR [r9+-208]
  51696. adcx r13, rax
  51697. adox r12, rcx
  51698. mov QWORD PTR [r9+-216], r13
  51699. ; a[i+6] += m[6] * mu
  51700. mulx rcx, rax, QWORD PTR [r10+48]
  51701. mov r13, QWORD PTR [r9+-200]
  51702. adcx r12, rax
  51703. adox r13, rcx
  51704. mov QWORD PTR [r9+-208], r12
  51705. ; a[i+7] += m[7] * mu
  51706. mulx rcx, rax, QWORD PTR [r10+56]
  51707. mov r12, QWORD PTR [r9+-192]
  51708. adcx r13, rax
  51709. adox r12, rcx
  51710. mov QWORD PTR [r9+-200], r13
  51711. ; a[i+8] += m[8] * mu
  51712. mulx rcx, rax, QWORD PTR [r10+64]
  51713. mov r13, QWORD PTR [r9+-184]
  51714. adcx r12, rax
  51715. adox r13, rcx
  51716. mov QWORD PTR [r9+-192], r12
  51717. ; a[i+9] += m[9] * mu
  51718. mulx rcx, rax, QWORD PTR [r10+72]
  51719. mov r12, QWORD PTR [r9+-176]
  51720. adcx r13, rax
  51721. adox r12, rcx
  51722. mov QWORD PTR [r9+-184], r13
  51723. ; a[i+10] += m[10] * mu
  51724. mulx rcx, rax, QWORD PTR [r10+80]
  51725. mov r13, QWORD PTR [r9+-168]
  51726. adcx r12, rax
  51727. adox r13, rcx
  51728. mov QWORD PTR [r9+-176], r12
  51729. ; a[i+11] += m[11] * mu
  51730. mulx rcx, rax, QWORD PTR [r10+88]
  51731. mov r12, QWORD PTR [r9+-160]
  51732. adcx r13, rax
  51733. adox r12, rcx
  51734. mov QWORD PTR [r9+-168], r13
  51735. ; a[i+12] += m[12] * mu
  51736. mulx rcx, rax, QWORD PTR [r10+96]
  51737. mov r13, QWORD PTR [r9+-152]
  51738. adcx r12, rax
  51739. adox r13, rcx
  51740. mov QWORD PTR [r9+-160], r12
  51741. ; a[i+13] += m[13] * mu
  51742. mulx rcx, rax, QWORD PTR [r10+104]
  51743. mov r12, QWORD PTR [r9+-144]
  51744. adcx r13, rax
  51745. adox r12, rcx
  51746. mov QWORD PTR [r9+-152], r13
  51747. ; a[i+14] += m[14] * mu
  51748. mulx rcx, rax, QWORD PTR [r10+112]
  51749. mov r13, QWORD PTR [r9+-136]
  51750. adcx r12, rax
  51751. adox r13, rcx
  51752. mov QWORD PTR [r9+-144], r12
  51753. ; a[i+15] += m[15] * mu
  51754. mulx rcx, rax, QWORD PTR [r10+120]
  51755. mov r12, QWORD PTR [r9+-128]
  51756. adcx r13, rax
  51757. adox r12, rcx
  51758. mov QWORD PTR [r9+-136], r13
  51759. ; a[i+16] += m[16] * mu
  51760. mulx rcx, rax, QWORD PTR [r10+128]
  51761. mov r13, QWORD PTR [r9+-120]
  51762. adcx r12, rax
  51763. adox r13, rcx
  51764. mov QWORD PTR [r9+-128], r12
  51765. ; a[i+17] += m[17] * mu
  51766. mulx rcx, rax, QWORD PTR [r10+136]
  51767. mov r12, QWORD PTR [r9+-112]
  51768. adcx r13, rax
  51769. adox r12, rcx
  51770. mov QWORD PTR [r9+-120], r13
  51771. ; a[i+18] += m[18] * mu
  51772. mulx rcx, rax, QWORD PTR [r10+144]
  51773. mov r13, QWORD PTR [r9+-104]
  51774. adcx r12, rax
  51775. adox r13, rcx
  51776. mov QWORD PTR [r9+-112], r12
  51777. ; a[i+19] += m[19] * mu
  51778. mulx rcx, rax, QWORD PTR [r10+152]
  51779. mov r12, QWORD PTR [r9+-96]
  51780. adcx r13, rax
  51781. adox r12, rcx
  51782. mov QWORD PTR [r9+-104], r13
  51783. ; a[i+20] += m[20] * mu
  51784. mulx rcx, rax, QWORD PTR [r10+160]
  51785. mov r13, QWORD PTR [r9+-88]
  51786. adcx r12, rax
  51787. adox r13, rcx
  51788. mov QWORD PTR [r9+-96], r12
  51789. ; a[i+21] += m[21] * mu
  51790. mulx rcx, rax, QWORD PTR [r10+168]
  51791. mov r12, QWORD PTR [r9+-80]
  51792. adcx r13, rax
  51793. adox r12, rcx
  51794. mov QWORD PTR [r9+-88], r13
  51795. ; a[i+22] += m[22] * mu
  51796. mulx rcx, rax, QWORD PTR [r10+176]
  51797. mov r13, QWORD PTR [r9+-72]
  51798. adcx r12, rax
  51799. adox r13, rcx
  51800. mov QWORD PTR [r9+-80], r12
  51801. ; a[i+23] += m[23] * mu
  51802. mulx rcx, rax, QWORD PTR [r10+184]
  51803. mov r12, QWORD PTR [r9+-64]
  51804. adcx r13, rax
  51805. adox r12, rcx
  51806. mov QWORD PTR [r9+-72], r13
  51807. ; a[i+24] += m[24] * mu
  51808. mulx rcx, rax, QWORD PTR [r10+192]
  51809. mov r13, QWORD PTR [r9+-56]
  51810. adcx r12, rax
  51811. adox r13, rcx
  51812. mov QWORD PTR [r9+-64], r12
  51813. ; a[i+25] += m[25] * mu
  51814. mulx rcx, rax, QWORD PTR [r10+200]
  51815. mov r12, QWORD PTR [r9+-48]
  51816. adcx r13, rax
  51817. adox r12, rcx
  51818. mov QWORD PTR [r9+-56], r13
  51819. ; a[i+26] += m[26] * mu
  51820. mulx rcx, rax, QWORD PTR [r10+208]
  51821. mov r13, QWORD PTR [r9+-40]
  51822. adcx r12, rax
  51823. adox r13, rcx
  51824. mov QWORD PTR [r9+-48], r12
  51825. ; a[i+27] += m[27] * mu
  51826. mulx rcx, rax, QWORD PTR [r10+216]
  51827. mov r12, QWORD PTR [r9+-32]
  51828. adcx r13, rax
  51829. adox r12, rcx
  51830. mov QWORD PTR [r9+-40], r13
  51831. ; a[i+28] += m[28] * mu
  51832. mulx rcx, rax, QWORD PTR [r10+224]
  51833. mov r13, QWORD PTR [r9+-24]
  51834. adcx r12, rax
  51835. adox r13, rcx
  51836. mov QWORD PTR [r9+-32], r12
  51837. ; a[i+29] += m[29] * mu
  51838. mulx rcx, rax, QWORD PTR [r10+232]
  51839. mov r12, QWORD PTR [r9+-16]
  51840. adcx r13, rax
  51841. adox r12, rcx
  51842. mov QWORD PTR [r9+-24], r13
  51843. ; a[i+30] += m[30] * mu
  51844. mulx rcx, rax, QWORD PTR [r10+240]
  51845. mov r13, QWORD PTR [r9+-8]
  51846. adcx r12, rax
  51847. adox r13, rcx
  51848. mov QWORD PTR [r9+-16], r12
  51849. ; a[i+31] += m[31] * mu
  51850. mulx rcx, rax, QWORD PTR [r10+248]
  51851. mov r12, QWORD PTR [r9]
  51852. adcx r13, rax
  51853. adox r12, rcx
  51854. mov QWORD PTR [r9+-8], r13
  51855. ; a[i+32] += m[32] * mu
  51856. mulx rcx, rax, QWORD PTR [r10+256]
  51857. mov r13, QWORD PTR [r9+8]
  51858. adcx r12, rax
  51859. adox r13, rcx
  51860. mov QWORD PTR [r9], r12
  51861. ; a[i+33] += m[33] * mu
  51862. mulx rcx, rax, QWORD PTR [r10+264]
  51863. mov r12, QWORD PTR [r9+16]
  51864. adcx r13, rax
  51865. adox r12, rcx
  51866. mov QWORD PTR [r9+8], r13
  51867. ; a[i+34] += m[34] * mu
  51868. mulx rcx, rax, QWORD PTR [r10+272]
  51869. mov r13, QWORD PTR [r9+24]
  51870. adcx r12, rax
  51871. adox r13, rcx
  51872. mov QWORD PTR [r9+16], r12
  51873. ; a[i+35] += m[35] * mu
  51874. mulx rcx, rax, QWORD PTR [r10+280]
  51875. mov r12, QWORD PTR [r9+32]
  51876. adcx r13, rax
  51877. adox r12, rcx
  51878. mov QWORD PTR [r9+24], r13
  51879. ; a[i+36] += m[36] * mu
  51880. mulx rcx, rax, QWORD PTR [r10+288]
  51881. mov r13, QWORD PTR [r9+40]
  51882. adcx r12, rax
  51883. adox r13, rcx
  51884. mov QWORD PTR [r9+32], r12
  51885. ; a[i+37] += m[37] * mu
  51886. mulx rcx, rax, QWORD PTR [r10+296]
  51887. mov r12, QWORD PTR [r9+48]
  51888. adcx r13, rax
  51889. adox r12, rcx
  51890. mov QWORD PTR [r9+40], r13
  51891. ; a[i+38] += m[38] * mu
  51892. mulx rcx, rax, QWORD PTR [r10+304]
  51893. mov r13, QWORD PTR [r9+56]
  51894. adcx r12, rax
  51895. adox r13, rcx
  51896. mov QWORD PTR [r9+48], r12
  51897. ; a[i+39] += m[39] * mu
  51898. mulx rcx, rax, QWORD PTR [r10+312]
  51899. mov r12, QWORD PTR [r9+64]
  51900. adcx r13, rax
  51901. adox r12, rcx
  51902. mov QWORD PTR [r9+56], r13
  51903. ; a[i+40] += m[40] * mu
  51904. mulx rcx, rax, QWORD PTR [r10+320]
  51905. mov r13, QWORD PTR [r9+72]
  51906. adcx r12, rax
  51907. adox r13, rcx
  51908. mov QWORD PTR [r9+64], r12
  51909. ; a[i+41] += m[41] * mu
  51910. mulx rcx, rax, QWORD PTR [r10+328]
  51911. mov r12, QWORD PTR [r9+80]
  51912. adcx r13, rax
  51913. adox r12, rcx
  51914. mov QWORD PTR [r9+72], r13
  51915. ; a[i+42] += m[42] * mu
  51916. mulx rcx, rax, QWORD PTR [r10+336]
  51917. mov r13, QWORD PTR [r9+88]
  51918. adcx r12, rax
  51919. adox r13, rcx
  51920. mov QWORD PTR [r9+80], r12
  51921. ; a[i+43] += m[43] * mu
  51922. mulx rcx, rax, QWORD PTR [r10+344]
  51923. mov r12, QWORD PTR [r9+96]
  51924. adcx r13, rax
  51925. adox r12, rcx
  51926. mov QWORD PTR [r9+88], r13
  51927. ; a[i+44] += m[44] * mu
  51928. mulx rcx, rax, QWORD PTR [r10+352]
  51929. mov r13, QWORD PTR [r9+104]
  51930. adcx r12, rax
  51931. adox r13, rcx
  51932. mov QWORD PTR [r9+96], r12
  51933. ; a[i+45] += m[45] * mu
  51934. mulx rcx, rax, QWORD PTR [r10+360]
  51935. mov r12, QWORD PTR [r9+112]
  51936. adcx r13, rax
  51937. adox r12, rcx
  51938. mov QWORD PTR [r9+104], r13
  51939. ; a[i+46] += m[46] * mu
  51940. mulx rcx, rax, QWORD PTR [r10+368]
  51941. mov r13, QWORD PTR [r9+120]
  51942. adcx r12, rax
  51943. adox r13, rcx
  51944. mov QWORD PTR [r9+112], r12
  51945. ; a[i+47] += m[47] * mu
  51946. mulx rcx, rax, QWORD PTR [r10+376]
  51947. mov r12, QWORD PTR [r9+128]
  51948. adcx r13, rax
  51949. adox r12, rcx
  51950. mov QWORD PTR [r9+120], r13
  51951. ; a[i+48] += m[48] * mu
  51952. mulx rcx, rax, QWORD PTR [r10+384]
  51953. mov r13, QWORD PTR [r9+136]
  51954. adcx r12, rax
  51955. adox r13, rcx
  51956. mov QWORD PTR [r9+128], r12
  51957. ; a[i+49] += m[49] * mu
  51958. mulx rcx, rax, QWORD PTR [r10+392]
  51959. mov r12, QWORD PTR [r9+144]
  51960. adcx r13, rax
  51961. adox r12, rcx
  51962. mov QWORD PTR [r9+136], r13
  51963. ; a[i+50] += m[50] * mu
  51964. mulx rcx, rax, QWORD PTR [r10+400]
  51965. mov r13, QWORD PTR [r9+152]
  51966. adcx r12, rax
  51967. adox r13, rcx
  51968. mov QWORD PTR [r9+144], r12
  51969. ; a[i+51] += m[51] * mu
  51970. mulx rcx, rax, QWORD PTR [r10+408]
  51971. mov r12, QWORD PTR [r9+160]
  51972. adcx r13, rax
  51973. adox r12, rcx
  51974. mov QWORD PTR [r9+152], r13
  51975. ; a[i+52] += m[52] * mu
  51976. mulx rcx, rax, QWORD PTR [r10+416]
  51977. mov r13, QWORD PTR [r9+168]
  51978. adcx r12, rax
  51979. adox r13, rcx
  51980. mov QWORD PTR [r9+160], r12
  51981. ; a[i+53] += m[53] * mu
  51982. mulx rcx, rax, QWORD PTR [r10+424]
  51983. mov r12, QWORD PTR [r9+176]
  51984. adcx r13, rax
  51985. adox r12, rcx
  51986. mov QWORD PTR [r9+168], r13
  51987. ; a[i+54] += m[54] * mu
  51988. mulx rcx, rax, QWORD PTR [r10+432]
  51989. mov r13, QWORD PTR [r9+184]
  51990. adcx r12, rax
  51991. adox r13, rcx
  51992. mov QWORD PTR [r9+176], r12
  51993. ; a[i+55] += m[55] * mu
  51994. mulx rcx, rax, QWORD PTR [r10+440]
  51995. mov r12, QWORD PTR [r9+192]
  51996. adcx r13, rax
  51997. adox r12, rcx
  51998. mov QWORD PTR [r9+184], r13
  51999. ; a[i+56] += m[56] * mu
  52000. mulx rcx, rax, QWORD PTR [r10+448]
  52001. mov r13, QWORD PTR [r9+200]
  52002. adcx r12, rax
  52003. adox r13, rcx
  52004. mov QWORD PTR [r9+192], r12
  52005. ; a[i+57] += m[57] * mu
  52006. mulx rcx, rax, QWORD PTR [r10+456]
  52007. mov r12, QWORD PTR [r9+208]
  52008. adcx r13, rax
  52009. adox r12, rcx
  52010. mov QWORD PTR [r9+200], r13
  52011. ; a[i+58] += m[58] * mu
  52012. mulx rcx, rax, QWORD PTR [r10+464]
  52013. mov r13, QWORD PTR [r9+216]
  52014. adcx r12, rax
  52015. adox r13, rcx
  52016. mov QWORD PTR [r9+208], r12
  52017. ; a[i+59] += m[59] * mu
  52018. mulx rcx, rax, QWORD PTR [r10+472]
  52019. mov r12, QWORD PTR [r9+224]
  52020. adcx r13, rax
  52021. adox r12, rcx
  52022. mov QWORD PTR [r9+216], r13
  52023. ; a[i+60] += m[60] * mu
  52024. mulx rcx, rax, QWORD PTR [r10+480]
  52025. mov r13, QWORD PTR [r9+232]
  52026. adcx r12, rax
  52027. adox r13, rcx
  52028. mov QWORD PTR [r9+224], r12
  52029. ; a[i+61] += m[61] * mu
  52030. mulx rcx, rax, QWORD PTR [r10+488]
  52031. mov r12, QWORD PTR [r9+240]
  52032. adcx r13, rax
  52033. adox r12, rcx
  52034. mov QWORD PTR [r9+232], r13
  52035. ; a[i+62] += m[62] * mu
  52036. mulx rcx, rax, QWORD PTR [r10+496]
  52037. mov r13, QWORD PTR [r9+248]
  52038. adcx r12, rax
  52039. adox r13, rcx
  52040. mov QWORD PTR [r9+240], r12
  52041. ; a[i+63] += m[63] * mu
  52042. mulx rcx, rax, QWORD PTR [r10+504]
  52043. mov r12, QWORD PTR [r9+256]
  52044. adcx r13, rax
  52045. adox r12, rcx
  52046. mov QWORD PTR [r9+248], r13
  52047. adcx r12, rbp
  52048. mov rbp, rbx
  52049. mov QWORD PTR [r9+256], r12
  52050. adox rbp, rbx
  52051. adcx rbp, rbx
  52052. ; a += 1
  52053. add r9, 8
  52054. ; i -= 1
  52055. sub r11, 1
  52056. jnz L_4096_mont_reduce_avx2_64_loop
  52057. sub r9, 256
  52058. neg rbp
  52059. mov r8, r9
  52060. sub r9, 512
  52061. mov rcx, QWORD PTR [r10]
  52062. mov rdx, r14
  52063. pext rcx, rcx, rbp
  52064. sub rdx, rcx
  52065. mov rcx, QWORD PTR [r10+8]
  52066. mov rax, r15
  52067. pext rcx, rcx, rbp
  52068. mov QWORD PTR [r9], rdx
  52069. sbb rax, rcx
  52070. mov rdx, QWORD PTR [r10+16]
  52071. mov rcx, rdi
  52072. pext rdx, rdx, rbp
  52073. mov QWORD PTR [r9+8], rax
  52074. sbb rcx, rdx
  52075. mov rax, QWORD PTR [r10+24]
  52076. mov rdx, rsi
  52077. pext rax, rax, rbp
  52078. mov QWORD PTR [r9+16], rcx
  52079. sbb rdx, rax
  52080. mov rcx, QWORD PTR [r10+32]
  52081. mov rax, QWORD PTR [r8+32]
  52082. pext rcx, rcx, rbp
  52083. mov QWORD PTR [r9+24], rdx
  52084. sbb rax, rcx
  52085. mov rdx, QWORD PTR [r10+40]
  52086. mov rcx, QWORD PTR [r8+40]
  52087. pext rdx, rdx, rbp
  52088. mov QWORD PTR [r9+32], rax
  52089. sbb rcx, rdx
  52090. mov rax, QWORD PTR [r10+48]
  52091. mov rdx, QWORD PTR [r8+48]
  52092. pext rax, rax, rbp
  52093. mov QWORD PTR [r9+40], rcx
  52094. sbb rdx, rax
  52095. mov rcx, QWORD PTR [r10+56]
  52096. mov rax, QWORD PTR [r8+56]
  52097. pext rcx, rcx, rbp
  52098. mov QWORD PTR [r9+48], rdx
  52099. sbb rax, rcx
  52100. mov rdx, QWORD PTR [r10+64]
  52101. mov rcx, QWORD PTR [r8+64]
  52102. pext rdx, rdx, rbp
  52103. mov QWORD PTR [r9+56], rax
  52104. sbb rcx, rdx
  52105. mov rax, QWORD PTR [r10+72]
  52106. mov rdx, QWORD PTR [r8+72]
  52107. pext rax, rax, rbp
  52108. mov QWORD PTR [r9+64], rcx
  52109. sbb rdx, rax
  52110. mov rcx, QWORD PTR [r10+80]
  52111. mov rax, QWORD PTR [r8+80]
  52112. pext rcx, rcx, rbp
  52113. mov QWORD PTR [r9+72], rdx
  52114. sbb rax, rcx
  52115. mov rdx, QWORD PTR [r10+88]
  52116. mov rcx, QWORD PTR [r8+88]
  52117. pext rdx, rdx, rbp
  52118. mov QWORD PTR [r9+80], rax
  52119. sbb rcx, rdx
  52120. mov rax, QWORD PTR [r10+96]
  52121. mov rdx, QWORD PTR [r8+96]
  52122. pext rax, rax, rbp
  52123. mov QWORD PTR [r9+88], rcx
  52124. sbb rdx, rax
  52125. mov rcx, QWORD PTR [r10+104]
  52126. mov rax, QWORD PTR [r8+104]
  52127. pext rcx, rcx, rbp
  52128. mov QWORD PTR [r9+96], rdx
  52129. sbb rax, rcx
  52130. mov rdx, QWORD PTR [r10+112]
  52131. mov rcx, QWORD PTR [r8+112]
  52132. pext rdx, rdx, rbp
  52133. mov QWORD PTR [r9+104], rax
  52134. sbb rcx, rdx
  52135. mov rax, QWORD PTR [r10+120]
  52136. mov rdx, QWORD PTR [r8+120]
  52137. pext rax, rax, rbp
  52138. mov QWORD PTR [r9+112], rcx
  52139. sbb rdx, rax
  52140. mov rcx, QWORD PTR [r10+128]
  52141. mov rax, QWORD PTR [r8+128]
  52142. pext rcx, rcx, rbp
  52143. mov QWORD PTR [r9+120], rdx
  52144. sbb rax, rcx
  52145. mov rdx, QWORD PTR [r10+136]
  52146. mov rcx, QWORD PTR [r8+136]
  52147. pext rdx, rdx, rbp
  52148. mov QWORD PTR [r9+128], rax
  52149. sbb rcx, rdx
  52150. mov rax, QWORD PTR [r10+144]
  52151. mov rdx, QWORD PTR [r8+144]
  52152. pext rax, rax, rbp
  52153. mov QWORD PTR [r9+136], rcx
  52154. sbb rdx, rax
  52155. mov rcx, QWORD PTR [r10+152]
  52156. mov rax, QWORD PTR [r8+152]
  52157. pext rcx, rcx, rbp
  52158. mov QWORD PTR [r9+144], rdx
  52159. sbb rax, rcx
  52160. mov rdx, QWORD PTR [r10+160]
  52161. mov rcx, QWORD PTR [r8+160]
  52162. pext rdx, rdx, rbp
  52163. mov QWORD PTR [r9+152], rax
  52164. sbb rcx, rdx
  52165. mov rax, QWORD PTR [r10+168]
  52166. mov rdx, QWORD PTR [r8+168]
  52167. pext rax, rax, rbp
  52168. mov QWORD PTR [r9+160], rcx
  52169. sbb rdx, rax
  52170. mov rcx, QWORD PTR [r10+176]
  52171. mov rax, QWORD PTR [r8+176]
  52172. pext rcx, rcx, rbp
  52173. mov QWORD PTR [r9+168], rdx
  52174. sbb rax, rcx
  52175. mov rdx, QWORD PTR [r10+184]
  52176. mov rcx, QWORD PTR [r8+184]
  52177. pext rdx, rdx, rbp
  52178. mov QWORD PTR [r9+176], rax
  52179. sbb rcx, rdx
  52180. mov rax, QWORD PTR [r10+192]
  52181. mov rdx, QWORD PTR [r8+192]
  52182. pext rax, rax, rbp
  52183. mov QWORD PTR [r9+184], rcx
  52184. sbb rdx, rax
  52185. mov rcx, QWORD PTR [r10+200]
  52186. mov rax, QWORD PTR [r8+200]
  52187. pext rcx, rcx, rbp
  52188. mov QWORD PTR [r9+192], rdx
  52189. sbb rax, rcx
  52190. mov rdx, QWORD PTR [r10+208]
  52191. mov rcx, QWORD PTR [r8+208]
  52192. pext rdx, rdx, rbp
  52193. mov QWORD PTR [r9+200], rax
  52194. sbb rcx, rdx
  52195. mov rax, QWORD PTR [r10+216]
  52196. mov rdx, QWORD PTR [r8+216]
  52197. pext rax, rax, rbp
  52198. mov QWORD PTR [r9+208], rcx
  52199. sbb rdx, rax
  52200. mov rcx, QWORD PTR [r10+224]
  52201. mov rax, QWORD PTR [r8+224]
  52202. pext rcx, rcx, rbp
  52203. mov QWORD PTR [r9+216], rdx
  52204. sbb rax, rcx
  52205. mov rdx, QWORD PTR [r10+232]
  52206. mov rcx, QWORD PTR [r8+232]
  52207. pext rdx, rdx, rbp
  52208. mov QWORD PTR [r9+224], rax
  52209. sbb rcx, rdx
  52210. mov rax, QWORD PTR [r10+240]
  52211. mov rdx, QWORD PTR [r8+240]
  52212. pext rax, rax, rbp
  52213. mov QWORD PTR [r9+232], rcx
  52214. sbb rdx, rax
  52215. mov rcx, QWORD PTR [r10+248]
  52216. mov rax, QWORD PTR [r8+248]
  52217. pext rcx, rcx, rbp
  52218. mov QWORD PTR [r9+240], rdx
  52219. sbb rax, rcx
  52220. mov rdx, QWORD PTR [r10+256]
  52221. mov rcx, QWORD PTR [r8+256]
  52222. pext rdx, rdx, rbp
  52223. mov QWORD PTR [r9+248], rax
  52224. sbb rcx, rdx
  52225. mov rax, QWORD PTR [r10+264]
  52226. mov rdx, QWORD PTR [r8+264]
  52227. pext rax, rax, rbp
  52228. mov QWORD PTR [r9+256], rcx
  52229. sbb rdx, rax
  52230. mov rcx, QWORD PTR [r10+272]
  52231. mov rax, QWORD PTR [r8+272]
  52232. pext rcx, rcx, rbp
  52233. mov QWORD PTR [r9+264], rdx
  52234. sbb rax, rcx
  52235. mov rdx, QWORD PTR [r10+280]
  52236. mov rcx, QWORD PTR [r8+280]
  52237. pext rdx, rdx, rbp
  52238. mov QWORD PTR [r9+272], rax
  52239. sbb rcx, rdx
  52240. mov rax, QWORD PTR [r10+288]
  52241. mov rdx, QWORD PTR [r8+288]
  52242. pext rax, rax, rbp
  52243. mov QWORD PTR [r9+280], rcx
  52244. sbb rdx, rax
  52245. mov rcx, QWORD PTR [r10+296]
  52246. mov rax, QWORD PTR [r8+296]
  52247. pext rcx, rcx, rbp
  52248. mov QWORD PTR [r9+288], rdx
  52249. sbb rax, rcx
  52250. mov rdx, QWORD PTR [r10+304]
  52251. mov rcx, QWORD PTR [r8+304]
  52252. pext rdx, rdx, rbp
  52253. mov QWORD PTR [r9+296], rax
  52254. sbb rcx, rdx
  52255. mov rax, QWORD PTR [r10+312]
  52256. mov rdx, QWORD PTR [r8+312]
  52257. pext rax, rax, rbp
  52258. mov QWORD PTR [r9+304], rcx
  52259. sbb rdx, rax
  52260. mov rcx, QWORD PTR [r10+320]
  52261. mov rax, QWORD PTR [r8+320]
  52262. pext rcx, rcx, rbp
  52263. mov QWORD PTR [r9+312], rdx
  52264. sbb rax, rcx
  52265. mov rdx, QWORD PTR [r10+328]
  52266. mov rcx, QWORD PTR [r8+328]
  52267. pext rdx, rdx, rbp
  52268. mov QWORD PTR [r9+320], rax
  52269. sbb rcx, rdx
  52270. mov rax, QWORD PTR [r10+336]
  52271. mov rdx, QWORD PTR [r8+336]
  52272. pext rax, rax, rbp
  52273. mov QWORD PTR [r9+328], rcx
  52274. sbb rdx, rax
  52275. mov rcx, QWORD PTR [r10+344]
  52276. mov rax, QWORD PTR [r8+344]
  52277. pext rcx, rcx, rbp
  52278. mov QWORD PTR [r9+336], rdx
  52279. sbb rax, rcx
  52280. mov rdx, QWORD PTR [r10+352]
  52281. mov rcx, QWORD PTR [r8+352]
  52282. pext rdx, rdx, rbp
  52283. mov QWORD PTR [r9+344], rax
  52284. sbb rcx, rdx
  52285. mov rax, QWORD PTR [r10+360]
  52286. mov rdx, QWORD PTR [r8+360]
  52287. pext rax, rax, rbp
  52288. mov QWORD PTR [r9+352], rcx
  52289. sbb rdx, rax
  52290. mov rcx, QWORD PTR [r10+368]
  52291. mov rax, QWORD PTR [r8+368]
  52292. pext rcx, rcx, rbp
  52293. mov QWORD PTR [r9+360], rdx
  52294. sbb rax, rcx
  52295. mov rdx, QWORD PTR [r10+376]
  52296. mov rcx, QWORD PTR [r8+376]
  52297. pext rdx, rdx, rbp
  52298. mov QWORD PTR [r9+368], rax
  52299. sbb rcx, rdx
  52300. mov rax, QWORD PTR [r10+384]
  52301. mov rdx, QWORD PTR [r8+384]
  52302. pext rax, rax, rbp
  52303. mov QWORD PTR [r9+376], rcx
  52304. sbb rdx, rax
  52305. mov rcx, QWORD PTR [r10+392]
  52306. mov rax, QWORD PTR [r8+392]
  52307. pext rcx, rcx, rbp
  52308. mov QWORD PTR [r9+384], rdx
  52309. sbb rax, rcx
  52310. mov rdx, QWORD PTR [r10+400]
  52311. mov rcx, QWORD PTR [r8+400]
  52312. pext rdx, rdx, rbp
  52313. mov QWORD PTR [r9+392], rax
  52314. sbb rcx, rdx
  52315. mov rax, QWORD PTR [r10+408]
  52316. mov rdx, QWORD PTR [r8+408]
  52317. pext rax, rax, rbp
  52318. mov QWORD PTR [r9+400], rcx
  52319. sbb rdx, rax
  52320. mov rcx, QWORD PTR [r10+416]
  52321. mov rax, QWORD PTR [r8+416]
  52322. pext rcx, rcx, rbp
  52323. mov QWORD PTR [r9+408], rdx
  52324. sbb rax, rcx
  52325. mov rdx, QWORD PTR [r10+424]
  52326. mov rcx, QWORD PTR [r8+424]
  52327. pext rdx, rdx, rbp
  52328. mov QWORD PTR [r9+416], rax
  52329. sbb rcx, rdx
  52330. mov rax, QWORD PTR [r10+432]
  52331. mov rdx, QWORD PTR [r8+432]
  52332. pext rax, rax, rbp
  52333. mov QWORD PTR [r9+424], rcx
  52334. sbb rdx, rax
  52335. mov rcx, QWORD PTR [r10+440]
  52336. mov rax, QWORD PTR [r8+440]
  52337. pext rcx, rcx, rbp
  52338. mov QWORD PTR [r9+432], rdx
  52339. sbb rax, rcx
  52340. mov rdx, QWORD PTR [r10+448]
  52341. mov rcx, QWORD PTR [r8+448]
  52342. pext rdx, rdx, rbp
  52343. mov QWORD PTR [r9+440], rax
  52344. sbb rcx, rdx
  52345. mov rax, QWORD PTR [r10+456]
  52346. mov rdx, QWORD PTR [r8+456]
  52347. pext rax, rax, rbp
  52348. mov QWORD PTR [r9+448], rcx
  52349. sbb rdx, rax
  52350. mov rcx, QWORD PTR [r10+464]
  52351. mov rax, QWORD PTR [r8+464]
  52352. pext rcx, rcx, rbp
  52353. mov QWORD PTR [r9+456], rdx
  52354. sbb rax, rcx
  52355. mov rdx, QWORD PTR [r10+472]
  52356. mov rcx, QWORD PTR [r8+472]
  52357. pext rdx, rdx, rbp
  52358. mov QWORD PTR [r9+464], rax
  52359. sbb rcx, rdx
  52360. mov rax, QWORD PTR [r10+480]
  52361. mov rdx, QWORD PTR [r8+480]
  52362. pext rax, rax, rbp
  52363. mov QWORD PTR [r9+472], rcx
  52364. sbb rdx, rax
  52365. mov rcx, QWORD PTR [r10+488]
  52366. mov rax, QWORD PTR [r8+488]
  52367. pext rcx, rcx, rbp
  52368. mov QWORD PTR [r9+480], rdx
  52369. sbb rax, rcx
  52370. mov rdx, QWORD PTR [r10+496]
  52371. mov rcx, QWORD PTR [r8+496]
  52372. pext rdx, rdx, rbp
  52373. mov QWORD PTR [r9+488], rax
  52374. sbb rcx, rdx
  52375. mov rax, QWORD PTR [r10+504]
  52376. mov rdx, QWORD PTR [r8+504]
  52377. pext rax, rax, rbp
  52378. mov QWORD PTR [r9+496], rcx
  52379. sbb rdx, rax
  52380. mov QWORD PTR [r9+504], rdx
  52381. pop rbp
  52382. pop rbx
  52383. pop rsi
  52384. pop rdi
  52385. pop r15
  52386. pop r14
  52387. pop r13
  52388. pop r12
  52389. ret
  52390. sp_4096_mont_reduce_avx2_64 ENDP
  52391. _text ENDS
  52392. ENDIF
  52393. IFNDEF WC_NO_CACHE_RESISTANT
  52394. _text SEGMENT READONLY PARA
  52395. sp_4096_get_from_table_avx2_64 PROC
  52396. sub rsp, 128
  52397. vmovdqu OWORD PTR [rsp], xmm6
  52398. vmovdqu OWORD PTR [rsp+16], xmm7
  52399. vmovdqu OWORD PTR [rsp+32], xmm8
  52400. vmovdqu OWORD PTR [rsp+48], xmm9
  52401. vmovdqu OWORD PTR [rsp+64], xmm10
  52402. vmovdqu OWORD PTR [rsp+80], xmm11
  52403. vmovdqu OWORD PTR [rsp+96], xmm12
  52404. vmovdqu OWORD PTR [rsp+112], xmm13
  52405. mov rax, 1
  52406. movd xmm10, r8
  52407. movd xmm11, rax
  52408. vpxor ymm13, ymm13, ymm13
  52409. vpermd ymm10, ymm13, ymm10
  52410. vpermd ymm11, ymm13, ymm11
  52411. ; START: 0-15
  52412. vpxor ymm13, ymm13, ymm13
  52413. vpxor ymm4, ymm4, ymm4
  52414. vpxor ymm5, ymm5, ymm5
  52415. vpxor ymm6, ymm6, ymm6
  52416. vpxor ymm7, ymm7, ymm7
  52417. ; ENTRY: 0
  52418. mov r9, QWORD PTR [rdx]
  52419. vpcmpeqd ymm12, ymm13, ymm10
  52420. vmovdqu ymm0, YMMWORD PTR [r9]
  52421. vmovdqu ymm1, YMMWORD PTR [r9+32]
  52422. vmovdqu ymm2, YMMWORD PTR [r9+64]
  52423. vmovdqu ymm3, YMMWORD PTR [r9+96]
  52424. vpand ymm0, ymm0, ymm12
  52425. vpand ymm1, ymm1, ymm12
  52426. vpand ymm2, ymm2, ymm12
  52427. vpand ymm3, ymm3, ymm12
  52428. vpor ymm4, ymm4, ymm0
  52429. vpor ymm5, ymm5, ymm1
  52430. vpor ymm6, ymm6, ymm2
  52431. vpor ymm7, ymm7, ymm3
  52432. vpaddd ymm13, ymm13, ymm11
  52433. ; ENTRY: 1
  52434. mov r9, QWORD PTR [rdx+8]
  52435. vpcmpeqd ymm12, ymm13, ymm10
  52436. vmovdqu ymm0, YMMWORD PTR [r9]
  52437. vmovdqu ymm1, YMMWORD PTR [r9+32]
  52438. vmovdqu ymm2, YMMWORD PTR [r9+64]
  52439. vmovdqu ymm3, YMMWORD PTR [r9+96]
  52440. vpand ymm0, ymm0, ymm12
  52441. vpand ymm1, ymm1, ymm12
  52442. vpand ymm2, ymm2, ymm12
  52443. vpand ymm3, ymm3, ymm12
  52444. vpor ymm4, ymm4, ymm0
  52445. vpor ymm5, ymm5, ymm1
  52446. vpor ymm6, ymm6, ymm2
  52447. vpor ymm7, ymm7, ymm3
  52448. vpaddd ymm13, ymm13, ymm11
  52449. ; ENTRY: 2
  52450. mov r9, QWORD PTR [rdx+16]
  52451. vpcmpeqd ymm12, ymm13, ymm10
  52452. vmovdqu ymm0, YMMWORD PTR [r9]
  52453. vmovdqu ymm1, YMMWORD PTR [r9+32]
  52454. vmovdqu ymm2, YMMWORD PTR [r9+64]
  52455. vmovdqu ymm3, YMMWORD PTR [r9+96]
  52456. vpand ymm0, ymm0, ymm12
  52457. vpand ymm1, ymm1, ymm12
  52458. vpand ymm2, ymm2, ymm12
  52459. vpand ymm3, ymm3, ymm12
  52460. vpor ymm4, ymm4, ymm0
  52461. vpor ymm5, ymm5, ymm1
  52462. vpor ymm6, ymm6, ymm2
  52463. vpor ymm7, ymm7, ymm3
  52464. vpaddd ymm13, ymm13, ymm11
  52465. ; ENTRY: 3
  52466. mov r9, QWORD PTR [rdx+24]
  52467. vpcmpeqd ymm12, ymm13, ymm10
  52468. vmovdqu ymm0, YMMWORD PTR [r9]
  52469. vmovdqu ymm1, YMMWORD PTR [r9+32]
  52470. vmovdqu ymm2, YMMWORD PTR [r9+64]
  52471. vmovdqu ymm3, YMMWORD PTR [r9+96]
  52472. vpand ymm0, ymm0, ymm12
  52473. vpand ymm1, ymm1, ymm12
  52474. vpand ymm2, ymm2, ymm12
  52475. vpand ymm3, ymm3, ymm12
  52476. vpor ymm4, ymm4, ymm0
  52477. vpor ymm5, ymm5, ymm1
  52478. vpor ymm6, ymm6, ymm2
  52479. vpor ymm7, ymm7, ymm3
  52480. vpaddd ymm13, ymm13, ymm11
  52481. ; ENTRY: 4
  52482. mov r9, QWORD PTR [rdx+32]
  52483. vpcmpeqd ymm12, ymm13, ymm10
  52484. vmovdqu ymm0, YMMWORD PTR [r9]
  52485. vmovdqu ymm1, YMMWORD PTR [r9+32]
  52486. vmovdqu ymm2, YMMWORD PTR [r9+64]
  52487. vmovdqu ymm3, YMMWORD PTR [r9+96]
  52488. vpand ymm0, ymm0, ymm12
  52489. vpand ymm1, ymm1, ymm12
  52490. vpand ymm2, ymm2, ymm12
  52491. vpand ymm3, ymm3, ymm12
  52492. vpor ymm4, ymm4, ymm0
  52493. vpor ymm5, ymm5, ymm1
  52494. vpor ymm6, ymm6, ymm2
  52495. vpor ymm7, ymm7, ymm3
  52496. vpaddd ymm13, ymm13, ymm11
  52497. ; ENTRY: 5
  52498. mov r9, QWORD PTR [rdx+40]
  52499. vpcmpeqd ymm12, ymm13, ymm10
  52500. vmovdqu ymm0, YMMWORD PTR [r9]
  52501. vmovdqu ymm1, YMMWORD PTR [r9+32]
  52502. vmovdqu ymm2, YMMWORD PTR [r9+64]
  52503. vmovdqu ymm3, YMMWORD PTR [r9+96]
  52504. vpand ymm0, ymm0, ymm12
  52505. vpand ymm1, ymm1, ymm12
  52506. vpand ymm2, ymm2, ymm12
  52507. vpand ymm3, ymm3, ymm12
  52508. vpor ymm4, ymm4, ymm0
  52509. vpor ymm5, ymm5, ymm1
  52510. vpor ymm6, ymm6, ymm2
  52511. vpor ymm7, ymm7, ymm3
  52512. vpaddd ymm13, ymm13, ymm11
  52513. ; ENTRY: 6
  52514. mov r9, QWORD PTR [rdx+48]
  52515. vpcmpeqd ymm12, ymm13, ymm10
  52516. vmovdqu ymm0, YMMWORD PTR [r9]
  52517. vmovdqu ymm1, YMMWORD PTR [r9+32]
  52518. vmovdqu ymm2, YMMWORD PTR [r9+64]
  52519. vmovdqu ymm3, YMMWORD PTR [r9+96]
  52520. vpand ymm0, ymm0, ymm12
  52521. vpand ymm1, ymm1, ymm12
  52522. vpand ymm2, ymm2, ymm12
  52523. vpand ymm3, ymm3, ymm12
  52524. vpor ymm4, ymm4, ymm0
  52525. vpor ymm5, ymm5, ymm1
  52526. vpor ymm6, ymm6, ymm2
  52527. vpor ymm7, ymm7, ymm3
  52528. vpaddd ymm13, ymm13, ymm11
  52529. ; ENTRY: 7
  52530. mov r9, QWORD PTR [rdx+56]
  52531. vpcmpeqd ymm12, ymm13, ymm10
  52532. vmovdqu ymm0, YMMWORD PTR [r9]
  52533. vmovdqu ymm1, YMMWORD PTR [r9+32]
  52534. vmovdqu ymm2, YMMWORD PTR [r9+64]
  52535. vmovdqu ymm3, YMMWORD PTR [r9+96]
  52536. vpand ymm0, ymm0, ymm12
  52537. vpand ymm1, ymm1, ymm12
  52538. vpand ymm2, ymm2, ymm12
  52539. vpand ymm3, ymm3, ymm12
  52540. vpor ymm4, ymm4, ymm0
  52541. vpor ymm5, ymm5, ymm1
  52542. vpor ymm6, ymm6, ymm2
  52543. vpor ymm7, ymm7, ymm3
  52544. vpaddd ymm13, ymm13, ymm11
  52545. ; ENTRY: 8
  52546. mov r9, QWORD PTR [rdx+64]
  52547. vpcmpeqd ymm12, ymm13, ymm10
  52548. vmovdqu ymm0, YMMWORD PTR [r9]
  52549. vmovdqu ymm1, YMMWORD PTR [r9+32]
  52550. vmovdqu ymm2, YMMWORD PTR [r9+64]
  52551. vmovdqu ymm3, YMMWORD PTR [r9+96]
  52552. vpand ymm0, ymm0, ymm12
  52553. vpand ymm1, ymm1, ymm12
  52554. vpand ymm2, ymm2, ymm12
  52555. vpand ymm3, ymm3, ymm12
  52556. vpor ymm4, ymm4, ymm0
  52557. vpor ymm5, ymm5, ymm1
  52558. vpor ymm6, ymm6, ymm2
  52559. vpor ymm7, ymm7, ymm3
  52560. vpaddd ymm13, ymm13, ymm11
  52561. ; ENTRY: 9
  52562. mov r9, QWORD PTR [rdx+72]
  52563. vpcmpeqd ymm12, ymm13, ymm10
  52564. vmovdqu ymm0, YMMWORD PTR [r9]
  52565. vmovdqu ymm1, YMMWORD PTR [r9+32]
  52566. vmovdqu ymm2, YMMWORD PTR [r9+64]
  52567. vmovdqu ymm3, YMMWORD PTR [r9+96]
  52568. vpand ymm0, ymm0, ymm12
  52569. vpand ymm1, ymm1, ymm12
  52570. vpand ymm2, ymm2, ymm12
  52571. vpand ymm3, ymm3, ymm12
  52572. vpor ymm4, ymm4, ymm0
  52573. vpor ymm5, ymm5, ymm1
  52574. vpor ymm6, ymm6, ymm2
  52575. vpor ymm7, ymm7, ymm3
  52576. vpaddd ymm13, ymm13, ymm11
  52577. ; ENTRY: 10
  52578. mov r9, QWORD PTR [rdx+80]
  52579. vpcmpeqd ymm12, ymm13, ymm10
  52580. vmovdqu ymm0, YMMWORD PTR [r9]
  52581. vmovdqu ymm1, YMMWORD PTR [r9+32]
  52582. vmovdqu ymm2, YMMWORD PTR [r9+64]
  52583. vmovdqu ymm3, YMMWORD PTR [r9+96]
  52584. vpand ymm0, ymm0, ymm12
  52585. vpand ymm1, ymm1, ymm12
  52586. vpand ymm2, ymm2, ymm12
  52587. vpand ymm3, ymm3, ymm12
  52588. vpor ymm4, ymm4, ymm0
  52589. vpor ymm5, ymm5, ymm1
  52590. vpor ymm6, ymm6, ymm2
  52591. vpor ymm7, ymm7, ymm3
  52592. vpaddd ymm13, ymm13, ymm11
  52593. ; ENTRY: 11
  52594. mov r9, QWORD PTR [rdx+88]
  52595. vpcmpeqd ymm12, ymm13, ymm10
  52596. vmovdqu ymm0, YMMWORD PTR [r9]
  52597. vmovdqu ymm1, YMMWORD PTR [r9+32]
  52598. vmovdqu ymm2, YMMWORD PTR [r9+64]
  52599. vmovdqu ymm3, YMMWORD PTR [r9+96]
  52600. vpand ymm0, ymm0, ymm12
  52601. vpand ymm1, ymm1, ymm12
  52602. vpand ymm2, ymm2, ymm12
  52603. vpand ymm3, ymm3, ymm12
  52604. vpor ymm4, ymm4, ymm0
  52605. vpor ymm5, ymm5, ymm1
  52606. vpor ymm6, ymm6, ymm2
  52607. vpor ymm7, ymm7, ymm3
  52608. vpaddd ymm13, ymm13, ymm11
  52609. ; ENTRY: 12
  52610. mov r9, QWORD PTR [rdx+96]
  52611. vpcmpeqd ymm12, ymm13, ymm10
  52612. vmovdqu ymm0, YMMWORD PTR [r9]
  52613. vmovdqu ymm1, YMMWORD PTR [r9+32]
  52614. vmovdqu ymm2, YMMWORD PTR [r9+64]
  52615. vmovdqu ymm3, YMMWORD PTR [r9+96]
  52616. vpand ymm0, ymm0, ymm12
  52617. vpand ymm1, ymm1, ymm12
  52618. vpand ymm2, ymm2, ymm12
  52619. vpand ymm3, ymm3, ymm12
  52620. vpor ymm4, ymm4, ymm0
  52621. vpor ymm5, ymm5, ymm1
  52622. vpor ymm6, ymm6, ymm2
  52623. vpor ymm7, ymm7, ymm3
  52624. vpaddd ymm13, ymm13, ymm11
  52625. ; ENTRY: 13
  52626. mov r9, QWORD PTR [rdx+104]
  52627. vpcmpeqd ymm12, ymm13, ymm10
  52628. vmovdqu ymm0, YMMWORD PTR [r9]
  52629. vmovdqu ymm1, YMMWORD PTR [r9+32]
  52630. vmovdqu ymm2, YMMWORD PTR [r9+64]
  52631. vmovdqu ymm3, YMMWORD PTR [r9+96]
  52632. vpand ymm0, ymm0, ymm12
  52633. vpand ymm1, ymm1, ymm12
  52634. vpand ymm2, ymm2, ymm12
  52635. vpand ymm3, ymm3, ymm12
  52636. vpor ymm4, ymm4, ymm0
  52637. vpor ymm5, ymm5, ymm1
  52638. vpor ymm6, ymm6, ymm2
  52639. vpor ymm7, ymm7, ymm3
  52640. vpaddd ymm13, ymm13, ymm11
  52641. ; ENTRY: 14
  52642. mov r9, QWORD PTR [rdx+112]
  52643. vpcmpeqd ymm12, ymm13, ymm10
  52644. vmovdqu ymm0, YMMWORD PTR [r9]
  52645. vmovdqu ymm1, YMMWORD PTR [r9+32]
  52646. vmovdqu ymm2, YMMWORD PTR [r9+64]
  52647. vmovdqu ymm3, YMMWORD PTR [r9+96]
  52648. vpand ymm0, ymm0, ymm12
  52649. vpand ymm1, ymm1, ymm12
  52650. vpand ymm2, ymm2, ymm12
  52651. vpand ymm3, ymm3, ymm12
  52652. vpor ymm4, ymm4, ymm0
  52653. vpor ymm5, ymm5, ymm1
  52654. vpor ymm6, ymm6, ymm2
  52655. vpor ymm7, ymm7, ymm3
  52656. vpaddd ymm13, ymm13, ymm11
  52657. ; ENTRY: 15
  52658. mov r9, QWORD PTR [rdx+120]
  52659. vpcmpeqd ymm12, ymm13, ymm10
  52660. vmovdqu ymm0, YMMWORD PTR [r9]
  52661. vmovdqu ymm1, YMMWORD PTR [r9+32]
  52662. vmovdqu ymm2, YMMWORD PTR [r9+64]
  52663. vmovdqu ymm3, YMMWORD PTR [r9+96]
  52664. vpand ymm0, ymm0, ymm12
  52665. vpand ymm1, ymm1, ymm12
  52666. vpand ymm2, ymm2, ymm12
  52667. vpand ymm3, ymm3, ymm12
  52668. vpor ymm4, ymm4, ymm0
  52669. vpor ymm5, ymm5, ymm1
  52670. vpor ymm6, ymm6, ymm2
  52671. vpor ymm7, ymm7, ymm3
  52672. vpaddd ymm13, ymm13, ymm11
  52673. vmovdqu YMMWORD PTR [rcx], ymm4
  52674. vmovdqu YMMWORD PTR [rcx+32], ymm5
  52675. vmovdqu YMMWORD PTR [rcx+64], ymm6
  52676. vmovdqu YMMWORD PTR [rcx+96], ymm7
  52677. add rcx, 128
  52678. ; END: 0-15
  52679. ; START: 16-31
  52680. vpxor ymm13, ymm13, ymm13
  52681. vpxor ymm4, ymm4, ymm4
  52682. vpxor ymm5, ymm5, ymm5
  52683. vpxor ymm6, ymm6, ymm6
  52684. vpxor ymm7, ymm7, ymm7
  52685. ; ENTRY: 0
  52686. mov r9, QWORD PTR [rdx]
  52687. add r9, 128
  52688. vpcmpeqd ymm12, ymm13, ymm10
  52689. vmovdqu ymm0, YMMWORD PTR [r9]
  52690. vmovdqu ymm1, YMMWORD PTR [r9+32]
  52691. vmovdqu ymm2, YMMWORD PTR [r9+64]
  52692. vmovdqu ymm3, YMMWORD PTR [r9+96]
  52693. vpand ymm0, ymm0, ymm12
  52694. vpand ymm1, ymm1, ymm12
  52695. vpand ymm2, ymm2, ymm12
  52696. vpand ymm3, ymm3, ymm12
  52697. vpor ymm4, ymm4, ymm0
  52698. vpor ymm5, ymm5, ymm1
  52699. vpor ymm6, ymm6, ymm2
  52700. vpor ymm7, ymm7, ymm3
  52701. vpaddd ymm13, ymm13, ymm11
  52702. ; ENTRY: 1
  52703. mov r9, QWORD PTR [rdx+8]
  52704. add r9, 128
  52705. vpcmpeqd ymm12, ymm13, ymm10
  52706. vmovdqu ymm0, YMMWORD PTR [r9]
  52707. vmovdqu ymm1, YMMWORD PTR [r9+32]
  52708. vmovdqu ymm2, YMMWORD PTR [r9+64]
  52709. vmovdqu ymm3, YMMWORD PTR [r9+96]
  52710. vpand ymm0, ymm0, ymm12
  52711. vpand ymm1, ymm1, ymm12
  52712. vpand ymm2, ymm2, ymm12
  52713. vpand ymm3, ymm3, ymm12
  52714. vpor ymm4, ymm4, ymm0
  52715. vpor ymm5, ymm5, ymm1
  52716. vpor ymm6, ymm6, ymm2
  52717. vpor ymm7, ymm7, ymm3
  52718. vpaddd ymm13, ymm13, ymm11
  52719. ; ENTRY: 2
  52720. mov r9, QWORD PTR [rdx+16]
  52721. add r9, 128
  52722. vpcmpeqd ymm12, ymm13, ymm10
  52723. vmovdqu ymm0, YMMWORD PTR [r9]
  52724. vmovdqu ymm1, YMMWORD PTR [r9+32]
  52725. vmovdqu ymm2, YMMWORD PTR [r9+64]
  52726. vmovdqu ymm3, YMMWORD PTR [r9+96]
  52727. vpand ymm0, ymm0, ymm12
  52728. vpand ymm1, ymm1, ymm12
  52729. vpand ymm2, ymm2, ymm12
  52730. vpand ymm3, ymm3, ymm12
  52731. vpor ymm4, ymm4, ymm0
  52732. vpor ymm5, ymm5, ymm1
  52733. vpor ymm6, ymm6, ymm2
  52734. vpor ymm7, ymm7, ymm3
  52735. vpaddd ymm13, ymm13, ymm11
  52736. ; ENTRY: 3
  52737. mov r9, QWORD PTR [rdx+24]
  52738. add r9, 128
  52739. vpcmpeqd ymm12, ymm13, ymm10
  52740. vmovdqu ymm0, YMMWORD PTR [r9]
  52741. vmovdqu ymm1, YMMWORD PTR [r9+32]
  52742. vmovdqu ymm2, YMMWORD PTR [r9+64]
  52743. vmovdqu ymm3, YMMWORD PTR [r9+96]
  52744. vpand ymm0, ymm0, ymm12
  52745. vpand ymm1, ymm1, ymm12
  52746. vpand ymm2, ymm2, ymm12
  52747. vpand ymm3, ymm3, ymm12
  52748. vpor ymm4, ymm4, ymm0
  52749. vpor ymm5, ymm5, ymm1
  52750. vpor ymm6, ymm6, ymm2
  52751. vpor ymm7, ymm7, ymm3
  52752. vpaddd ymm13, ymm13, ymm11
  52753. ; ENTRY: 4
  52754. mov r9, QWORD PTR [rdx+32]
  52755. add r9, 128
  52756. vpcmpeqd ymm12, ymm13, ymm10
  52757. vmovdqu ymm0, YMMWORD PTR [r9]
  52758. vmovdqu ymm1, YMMWORD PTR [r9+32]
  52759. vmovdqu ymm2, YMMWORD PTR [r9+64]
  52760. vmovdqu ymm3, YMMWORD PTR [r9+96]
  52761. vpand ymm0, ymm0, ymm12
  52762. vpand ymm1, ymm1, ymm12
  52763. vpand ymm2, ymm2, ymm12
  52764. vpand ymm3, ymm3, ymm12
  52765. vpor ymm4, ymm4, ymm0
  52766. vpor ymm5, ymm5, ymm1
  52767. vpor ymm6, ymm6, ymm2
  52768. vpor ymm7, ymm7, ymm3
  52769. vpaddd ymm13, ymm13, ymm11
  52770. ; ENTRY: 5
  52771. mov r9, QWORD PTR [rdx+40]
  52772. add r9, 128
  52773. vpcmpeqd ymm12, ymm13, ymm10
  52774. vmovdqu ymm0, YMMWORD PTR [r9]
  52775. vmovdqu ymm1, YMMWORD PTR [r9+32]
  52776. vmovdqu ymm2, YMMWORD PTR [r9+64]
  52777. vmovdqu ymm3, YMMWORD PTR [r9+96]
  52778. vpand ymm0, ymm0, ymm12
  52779. vpand ymm1, ymm1, ymm12
  52780. vpand ymm2, ymm2, ymm12
  52781. vpand ymm3, ymm3, ymm12
  52782. vpor ymm4, ymm4, ymm0
  52783. vpor ymm5, ymm5, ymm1
  52784. vpor ymm6, ymm6, ymm2
  52785. vpor ymm7, ymm7, ymm3
  52786. vpaddd ymm13, ymm13, ymm11
  52787. ; ENTRY: 6
  52788. mov r9, QWORD PTR [rdx+48]
  52789. add r9, 128
  52790. vpcmpeqd ymm12, ymm13, ymm10
  52791. vmovdqu ymm0, YMMWORD PTR [r9]
  52792. vmovdqu ymm1, YMMWORD PTR [r9+32]
  52793. vmovdqu ymm2, YMMWORD PTR [r9+64]
  52794. vmovdqu ymm3, YMMWORD PTR [r9+96]
  52795. vpand ymm0, ymm0, ymm12
  52796. vpand ymm1, ymm1, ymm12
  52797. vpand ymm2, ymm2, ymm12
  52798. vpand ymm3, ymm3, ymm12
  52799. vpor ymm4, ymm4, ymm0
  52800. vpor ymm5, ymm5, ymm1
  52801. vpor ymm6, ymm6, ymm2
  52802. vpor ymm7, ymm7, ymm3
  52803. vpaddd ymm13, ymm13, ymm11
  52804. ; ENTRY: 7
  52805. mov r9, QWORD PTR [rdx+56]
  52806. add r9, 128
  52807. vpcmpeqd ymm12, ymm13, ymm10
  52808. vmovdqu ymm0, YMMWORD PTR [r9]
  52809. vmovdqu ymm1, YMMWORD PTR [r9+32]
  52810. vmovdqu ymm2, YMMWORD PTR [r9+64]
  52811. vmovdqu ymm3, YMMWORD PTR [r9+96]
  52812. vpand ymm0, ymm0, ymm12
  52813. vpand ymm1, ymm1, ymm12
  52814. vpand ymm2, ymm2, ymm12
  52815. vpand ymm3, ymm3, ymm12
  52816. vpor ymm4, ymm4, ymm0
  52817. vpor ymm5, ymm5, ymm1
  52818. vpor ymm6, ymm6, ymm2
  52819. vpor ymm7, ymm7, ymm3
  52820. vpaddd ymm13, ymm13, ymm11
  52821. ; ENTRY: 8
  52822. mov r9, QWORD PTR [rdx+64]
  52823. add r9, 128
  52824. vpcmpeqd ymm12, ymm13, ymm10
  52825. vmovdqu ymm0, YMMWORD PTR [r9]
  52826. vmovdqu ymm1, YMMWORD PTR [r9+32]
  52827. vmovdqu ymm2, YMMWORD PTR [r9+64]
  52828. vmovdqu ymm3, YMMWORD PTR [r9+96]
  52829. vpand ymm0, ymm0, ymm12
  52830. vpand ymm1, ymm1, ymm12
  52831. vpand ymm2, ymm2, ymm12
  52832. vpand ymm3, ymm3, ymm12
  52833. vpor ymm4, ymm4, ymm0
  52834. vpor ymm5, ymm5, ymm1
  52835. vpor ymm6, ymm6, ymm2
  52836. vpor ymm7, ymm7, ymm3
  52837. vpaddd ymm13, ymm13, ymm11
  52838. ; ENTRY: 9
  52839. mov r9, QWORD PTR [rdx+72]
  52840. add r9, 128
  52841. vpcmpeqd ymm12, ymm13, ymm10
  52842. vmovdqu ymm0, YMMWORD PTR [r9]
  52843. vmovdqu ymm1, YMMWORD PTR [r9+32]
  52844. vmovdqu ymm2, YMMWORD PTR [r9+64]
  52845. vmovdqu ymm3, YMMWORD PTR [r9+96]
  52846. vpand ymm0, ymm0, ymm12
  52847. vpand ymm1, ymm1, ymm12
  52848. vpand ymm2, ymm2, ymm12
  52849. vpand ymm3, ymm3, ymm12
  52850. vpor ymm4, ymm4, ymm0
  52851. vpor ymm5, ymm5, ymm1
  52852. vpor ymm6, ymm6, ymm2
  52853. vpor ymm7, ymm7, ymm3
  52854. vpaddd ymm13, ymm13, ymm11
  52855. ; ENTRY: 10
  52856. mov r9, QWORD PTR [rdx+80]
  52857. add r9, 128
  52858. vpcmpeqd ymm12, ymm13, ymm10
  52859. vmovdqu ymm0, YMMWORD PTR [r9]
  52860. vmovdqu ymm1, YMMWORD PTR [r9+32]
  52861. vmovdqu ymm2, YMMWORD PTR [r9+64]
  52862. vmovdqu ymm3, YMMWORD PTR [r9+96]
  52863. vpand ymm0, ymm0, ymm12
  52864. vpand ymm1, ymm1, ymm12
  52865. vpand ymm2, ymm2, ymm12
  52866. vpand ymm3, ymm3, ymm12
  52867. vpor ymm4, ymm4, ymm0
  52868. vpor ymm5, ymm5, ymm1
  52869. vpor ymm6, ymm6, ymm2
  52870. vpor ymm7, ymm7, ymm3
  52871. vpaddd ymm13, ymm13, ymm11
  52872. ; ENTRY: 11
  52873. mov r9, QWORD PTR [rdx+88]
  52874. add r9, 128
  52875. vpcmpeqd ymm12, ymm13, ymm10
  52876. vmovdqu ymm0, YMMWORD PTR [r9]
  52877. vmovdqu ymm1, YMMWORD PTR [r9+32]
  52878. vmovdqu ymm2, YMMWORD PTR [r9+64]
  52879. vmovdqu ymm3, YMMWORD PTR [r9+96]
  52880. vpand ymm0, ymm0, ymm12
  52881. vpand ymm1, ymm1, ymm12
  52882. vpand ymm2, ymm2, ymm12
  52883. vpand ymm3, ymm3, ymm12
  52884. vpor ymm4, ymm4, ymm0
  52885. vpor ymm5, ymm5, ymm1
  52886. vpor ymm6, ymm6, ymm2
  52887. vpor ymm7, ymm7, ymm3
  52888. vpaddd ymm13, ymm13, ymm11
  52889. ; ENTRY: 12
  52890. mov r9, QWORD PTR [rdx+96]
  52891. add r9, 128
  52892. vpcmpeqd ymm12, ymm13, ymm10
  52893. vmovdqu ymm0, YMMWORD PTR [r9]
  52894. vmovdqu ymm1, YMMWORD PTR [r9+32]
  52895. vmovdqu ymm2, YMMWORD PTR [r9+64]
  52896. vmovdqu ymm3, YMMWORD PTR [r9+96]
  52897. vpand ymm0, ymm0, ymm12
  52898. vpand ymm1, ymm1, ymm12
  52899. vpand ymm2, ymm2, ymm12
  52900. vpand ymm3, ymm3, ymm12
  52901. vpor ymm4, ymm4, ymm0
  52902. vpor ymm5, ymm5, ymm1
  52903. vpor ymm6, ymm6, ymm2
  52904. vpor ymm7, ymm7, ymm3
  52905. vpaddd ymm13, ymm13, ymm11
  52906. ; ENTRY: 13
  52907. mov r9, QWORD PTR [rdx+104]
  52908. add r9, 128
  52909. vpcmpeqd ymm12, ymm13, ymm10
  52910. vmovdqu ymm0, YMMWORD PTR [r9]
  52911. vmovdqu ymm1, YMMWORD PTR [r9+32]
  52912. vmovdqu ymm2, YMMWORD PTR [r9+64]
  52913. vmovdqu ymm3, YMMWORD PTR [r9+96]
  52914. vpand ymm0, ymm0, ymm12
  52915. vpand ymm1, ymm1, ymm12
  52916. vpand ymm2, ymm2, ymm12
  52917. vpand ymm3, ymm3, ymm12
  52918. vpor ymm4, ymm4, ymm0
  52919. vpor ymm5, ymm5, ymm1
  52920. vpor ymm6, ymm6, ymm2
  52921. vpor ymm7, ymm7, ymm3
  52922. vpaddd ymm13, ymm13, ymm11
  52923. ; ENTRY: 14
  52924. mov r9, QWORD PTR [rdx+112]
  52925. add r9, 128
  52926. vpcmpeqd ymm12, ymm13, ymm10
  52927. vmovdqu ymm0, YMMWORD PTR [r9]
  52928. vmovdqu ymm1, YMMWORD PTR [r9+32]
  52929. vmovdqu ymm2, YMMWORD PTR [r9+64]
  52930. vmovdqu ymm3, YMMWORD PTR [r9+96]
  52931. vpand ymm0, ymm0, ymm12
  52932. vpand ymm1, ymm1, ymm12
  52933. vpand ymm2, ymm2, ymm12
  52934. vpand ymm3, ymm3, ymm12
  52935. vpor ymm4, ymm4, ymm0
  52936. vpor ymm5, ymm5, ymm1
  52937. vpor ymm6, ymm6, ymm2
  52938. vpor ymm7, ymm7, ymm3
  52939. vpaddd ymm13, ymm13, ymm11
  52940. ; ENTRY: 15
  52941. mov r9, QWORD PTR [rdx+120]
  52942. add r9, 128
  52943. vpcmpeqd ymm12, ymm13, ymm10
  52944. vmovdqu ymm0, YMMWORD PTR [r9]
  52945. vmovdqu ymm1, YMMWORD PTR [r9+32]
  52946. vmovdqu ymm2, YMMWORD PTR [r9+64]
  52947. vmovdqu ymm3, YMMWORD PTR [r9+96]
  52948. vpand ymm0, ymm0, ymm12
  52949. vpand ymm1, ymm1, ymm12
  52950. vpand ymm2, ymm2, ymm12
  52951. vpand ymm3, ymm3, ymm12
  52952. vpor ymm4, ymm4, ymm0
  52953. vpor ymm5, ymm5, ymm1
  52954. vpor ymm6, ymm6, ymm2
  52955. vpor ymm7, ymm7, ymm3
  52956. vpaddd ymm13, ymm13, ymm11
  52957. vmovdqu YMMWORD PTR [rcx], ymm4
  52958. vmovdqu YMMWORD PTR [rcx+32], ymm5
  52959. vmovdqu YMMWORD PTR [rcx+64], ymm6
  52960. vmovdqu YMMWORD PTR [rcx+96], ymm7
  52961. add rcx, 128
  52962. ; END: 16-31
  52963. ; START: 32-47
  52964. vpxor ymm13, ymm13, ymm13
  52965. vpxor ymm4, ymm4, ymm4
  52966. vpxor ymm5, ymm5, ymm5
  52967. vpxor ymm6, ymm6, ymm6
  52968. vpxor ymm7, ymm7, ymm7
  52969. ; ENTRY: 0
  52970. mov r9, QWORD PTR [rdx]
  52971. add r9, 256
  52972. vpcmpeqd ymm12, ymm13, ymm10
  52973. vmovdqu ymm0, YMMWORD PTR [r9]
  52974. vmovdqu ymm1, YMMWORD PTR [r9+32]
  52975. vmovdqu ymm2, YMMWORD PTR [r9+64]
  52976. vmovdqu ymm3, YMMWORD PTR [r9+96]
  52977. vpand ymm0, ymm0, ymm12
  52978. vpand ymm1, ymm1, ymm12
  52979. vpand ymm2, ymm2, ymm12
  52980. vpand ymm3, ymm3, ymm12
  52981. vpor ymm4, ymm4, ymm0
  52982. vpor ymm5, ymm5, ymm1
  52983. vpor ymm6, ymm6, ymm2
  52984. vpor ymm7, ymm7, ymm3
  52985. vpaddd ymm13, ymm13, ymm11
  52986. ; ENTRY: 1
  52987. mov r9, QWORD PTR [rdx+8]
  52988. add r9, 256
  52989. vpcmpeqd ymm12, ymm13, ymm10
  52990. vmovdqu ymm0, YMMWORD PTR [r9]
  52991. vmovdqu ymm1, YMMWORD PTR [r9+32]
  52992. vmovdqu ymm2, YMMWORD PTR [r9+64]
  52993. vmovdqu ymm3, YMMWORD PTR [r9+96]
  52994. vpand ymm0, ymm0, ymm12
  52995. vpand ymm1, ymm1, ymm12
  52996. vpand ymm2, ymm2, ymm12
  52997. vpand ymm3, ymm3, ymm12
  52998. vpor ymm4, ymm4, ymm0
  52999. vpor ymm5, ymm5, ymm1
  53000. vpor ymm6, ymm6, ymm2
  53001. vpor ymm7, ymm7, ymm3
  53002. vpaddd ymm13, ymm13, ymm11
  53003. ; ENTRY: 2
  53004. mov r9, QWORD PTR [rdx+16]
  53005. add r9, 256
  53006. vpcmpeqd ymm12, ymm13, ymm10
  53007. vmovdqu ymm0, YMMWORD PTR [r9]
  53008. vmovdqu ymm1, YMMWORD PTR [r9+32]
  53009. vmovdqu ymm2, YMMWORD PTR [r9+64]
  53010. vmovdqu ymm3, YMMWORD PTR [r9+96]
  53011. vpand ymm0, ymm0, ymm12
  53012. vpand ymm1, ymm1, ymm12
  53013. vpand ymm2, ymm2, ymm12
  53014. vpand ymm3, ymm3, ymm12
  53015. vpor ymm4, ymm4, ymm0
  53016. vpor ymm5, ymm5, ymm1
  53017. vpor ymm6, ymm6, ymm2
  53018. vpor ymm7, ymm7, ymm3
  53019. vpaddd ymm13, ymm13, ymm11
  53020. ; ENTRY: 3
  53021. mov r9, QWORD PTR [rdx+24]
  53022. add r9, 256
  53023. vpcmpeqd ymm12, ymm13, ymm10
  53024. vmovdqu ymm0, YMMWORD PTR [r9]
  53025. vmovdqu ymm1, YMMWORD PTR [r9+32]
  53026. vmovdqu ymm2, YMMWORD PTR [r9+64]
  53027. vmovdqu ymm3, YMMWORD PTR [r9+96]
  53028. vpand ymm0, ymm0, ymm12
  53029. vpand ymm1, ymm1, ymm12
  53030. vpand ymm2, ymm2, ymm12
  53031. vpand ymm3, ymm3, ymm12
  53032. vpor ymm4, ymm4, ymm0
  53033. vpor ymm5, ymm5, ymm1
  53034. vpor ymm6, ymm6, ymm2
  53035. vpor ymm7, ymm7, ymm3
  53036. vpaddd ymm13, ymm13, ymm11
  53037. ; ENTRY: 4
  53038. mov r9, QWORD PTR [rdx+32]
  53039. add r9, 256
  53040. vpcmpeqd ymm12, ymm13, ymm10
  53041. vmovdqu ymm0, YMMWORD PTR [r9]
  53042. vmovdqu ymm1, YMMWORD PTR [r9+32]
  53043. vmovdqu ymm2, YMMWORD PTR [r9+64]
  53044. vmovdqu ymm3, YMMWORD PTR [r9+96]
  53045. vpand ymm0, ymm0, ymm12
  53046. vpand ymm1, ymm1, ymm12
  53047. vpand ymm2, ymm2, ymm12
  53048. vpand ymm3, ymm3, ymm12
  53049. vpor ymm4, ymm4, ymm0
  53050. vpor ymm5, ymm5, ymm1
  53051. vpor ymm6, ymm6, ymm2
  53052. vpor ymm7, ymm7, ymm3
  53053. vpaddd ymm13, ymm13, ymm11
  53054. ; ENTRY: 5
  53055. mov r9, QWORD PTR [rdx+40]
  53056. add r9, 256
  53057. vpcmpeqd ymm12, ymm13, ymm10
  53058. vmovdqu ymm0, YMMWORD PTR [r9]
  53059. vmovdqu ymm1, YMMWORD PTR [r9+32]
  53060. vmovdqu ymm2, YMMWORD PTR [r9+64]
  53061. vmovdqu ymm3, YMMWORD PTR [r9+96]
  53062. vpand ymm0, ymm0, ymm12
  53063. vpand ymm1, ymm1, ymm12
  53064. vpand ymm2, ymm2, ymm12
  53065. vpand ymm3, ymm3, ymm12
  53066. vpor ymm4, ymm4, ymm0
  53067. vpor ymm5, ymm5, ymm1
  53068. vpor ymm6, ymm6, ymm2
  53069. vpor ymm7, ymm7, ymm3
  53070. vpaddd ymm13, ymm13, ymm11
  53071. ; ENTRY: 6
  53072. mov r9, QWORD PTR [rdx+48]
  53073. add r9, 256
  53074. vpcmpeqd ymm12, ymm13, ymm10
  53075. vmovdqu ymm0, YMMWORD PTR [r9]
  53076. vmovdqu ymm1, YMMWORD PTR [r9+32]
  53077. vmovdqu ymm2, YMMWORD PTR [r9+64]
  53078. vmovdqu ymm3, YMMWORD PTR [r9+96]
  53079. vpand ymm0, ymm0, ymm12
  53080. vpand ymm1, ymm1, ymm12
  53081. vpand ymm2, ymm2, ymm12
  53082. vpand ymm3, ymm3, ymm12
  53083. vpor ymm4, ymm4, ymm0
  53084. vpor ymm5, ymm5, ymm1
  53085. vpor ymm6, ymm6, ymm2
  53086. vpor ymm7, ymm7, ymm3
  53087. vpaddd ymm13, ymm13, ymm11
  53088. ; ENTRY: 7
  53089. mov r9, QWORD PTR [rdx+56]
  53090. add r9, 256
  53091. vpcmpeqd ymm12, ymm13, ymm10
  53092. vmovdqu ymm0, YMMWORD PTR [r9]
  53093. vmovdqu ymm1, YMMWORD PTR [r9+32]
  53094. vmovdqu ymm2, YMMWORD PTR [r9+64]
  53095. vmovdqu ymm3, YMMWORD PTR [r9+96]
  53096. vpand ymm0, ymm0, ymm12
  53097. vpand ymm1, ymm1, ymm12
  53098. vpand ymm2, ymm2, ymm12
  53099. vpand ymm3, ymm3, ymm12
  53100. vpor ymm4, ymm4, ymm0
  53101. vpor ymm5, ymm5, ymm1
  53102. vpor ymm6, ymm6, ymm2
  53103. vpor ymm7, ymm7, ymm3
  53104. vpaddd ymm13, ymm13, ymm11
  53105. ; ENTRY: 8
  53106. mov r9, QWORD PTR [rdx+64]
  53107. add r9, 256
  53108. vpcmpeqd ymm12, ymm13, ymm10
  53109. vmovdqu ymm0, YMMWORD PTR [r9]
  53110. vmovdqu ymm1, YMMWORD PTR [r9+32]
  53111. vmovdqu ymm2, YMMWORD PTR [r9+64]
  53112. vmovdqu ymm3, YMMWORD PTR [r9+96]
  53113. vpand ymm0, ymm0, ymm12
  53114. vpand ymm1, ymm1, ymm12
  53115. vpand ymm2, ymm2, ymm12
  53116. vpand ymm3, ymm3, ymm12
  53117. vpor ymm4, ymm4, ymm0
  53118. vpor ymm5, ymm5, ymm1
  53119. vpor ymm6, ymm6, ymm2
  53120. vpor ymm7, ymm7, ymm3
  53121. vpaddd ymm13, ymm13, ymm11
  53122. ; ENTRY: 9
  53123. mov r9, QWORD PTR [rdx+72]
  53124. add r9, 256
  53125. vpcmpeqd ymm12, ymm13, ymm10
  53126. vmovdqu ymm0, YMMWORD PTR [r9]
  53127. vmovdqu ymm1, YMMWORD PTR [r9+32]
  53128. vmovdqu ymm2, YMMWORD PTR [r9+64]
  53129. vmovdqu ymm3, YMMWORD PTR [r9+96]
  53130. vpand ymm0, ymm0, ymm12
  53131. vpand ymm1, ymm1, ymm12
  53132. vpand ymm2, ymm2, ymm12
  53133. vpand ymm3, ymm3, ymm12
  53134. vpor ymm4, ymm4, ymm0
  53135. vpor ymm5, ymm5, ymm1
  53136. vpor ymm6, ymm6, ymm2
  53137. vpor ymm7, ymm7, ymm3
  53138. vpaddd ymm13, ymm13, ymm11
  53139. ; ENTRY: 10
  53140. mov r9, QWORD PTR [rdx+80]
  53141. add r9, 256
  53142. vpcmpeqd ymm12, ymm13, ymm10
  53143. vmovdqu ymm0, YMMWORD PTR [r9]
  53144. vmovdqu ymm1, YMMWORD PTR [r9+32]
  53145. vmovdqu ymm2, YMMWORD PTR [r9+64]
  53146. vmovdqu ymm3, YMMWORD PTR [r9+96]
  53147. vpand ymm0, ymm0, ymm12
  53148. vpand ymm1, ymm1, ymm12
  53149. vpand ymm2, ymm2, ymm12
  53150. vpand ymm3, ymm3, ymm12
  53151. vpor ymm4, ymm4, ymm0
  53152. vpor ymm5, ymm5, ymm1
  53153. vpor ymm6, ymm6, ymm2
  53154. vpor ymm7, ymm7, ymm3
  53155. vpaddd ymm13, ymm13, ymm11
  53156. ; ENTRY: 11
  53157. mov r9, QWORD PTR [rdx+88]
  53158. add r9, 256
  53159. vpcmpeqd ymm12, ymm13, ymm10
  53160. vmovdqu ymm0, YMMWORD PTR [r9]
  53161. vmovdqu ymm1, YMMWORD PTR [r9+32]
  53162. vmovdqu ymm2, YMMWORD PTR [r9+64]
  53163. vmovdqu ymm3, YMMWORD PTR [r9+96]
  53164. vpand ymm0, ymm0, ymm12
  53165. vpand ymm1, ymm1, ymm12
  53166. vpand ymm2, ymm2, ymm12
  53167. vpand ymm3, ymm3, ymm12
  53168. vpor ymm4, ymm4, ymm0
  53169. vpor ymm5, ymm5, ymm1
  53170. vpor ymm6, ymm6, ymm2
  53171. vpor ymm7, ymm7, ymm3
  53172. vpaddd ymm13, ymm13, ymm11
  53173. ; ENTRY: 12
  53174. mov r9, QWORD PTR [rdx+96]
  53175. add r9, 256
  53176. vpcmpeqd ymm12, ymm13, ymm10
  53177. vmovdqu ymm0, YMMWORD PTR [r9]
  53178. vmovdqu ymm1, YMMWORD PTR [r9+32]
  53179. vmovdqu ymm2, YMMWORD PTR [r9+64]
  53180. vmovdqu ymm3, YMMWORD PTR [r9+96]
  53181. vpand ymm0, ymm0, ymm12
  53182. vpand ymm1, ymm1, ymm12
  53183. vpand ymm2, ymm2, ymm12
  53184. vpand ymm3, ymm3, ymm12
  53185. vpor ymm4, ymm4, ymm0
  53186. vpor ymm5, ymm5, ymm1
  53187. vpor ymm6, ymm6, ymm2
  53188. vpor ymm7, ymm7, ymm3
  53189. vpaddd ymm13, ymm13, ymm11
  53190. ; ENTRY: 13
  53191. mov r9, QWORD PTR [rdx+104]
  53192. add r9, 256
  53193. vpcmpeqd ymm12, ymm13, ymm10
  53194. vmovdqu ymm0, YMMWORD PTR [r9]
  53195. vmovdqu ymm1, YMMWORD PTR [r9+32]
  53196. vmovdqu ymm2, YMMWORD PTR [r9+64]
  53197. vmovdqu ymm3, YMMWORD PTR [r9+96]
  53198. vpand ymm0, ymm0, ymm12
  53199. vpand ymm1, ymm1, ymm12
  53200. vpand ymm2, ymm2, ymm12
  53201. vpand ymm3, ymm3, ymm12
  53202. vpor ymm4, ymm4, ymm0
  53203. vpor ymm5, ymm5, ymm1
  53204. vpor ymm6, ymm6, ymm2
  53205. vpor ymm7, ymm7, ymm3
  53206. vpaddd ymm13, ymm13, ymm11
  53207. ; ENTRY: 14
  53208. mov r9, QWORD PTR [rdx+112]
  53209. add r9, 256
  53210. vpcmpeqd ymm12, ymm13, ymm10
  53211. vmovdqu ymm0, YMMWORD PTR [r9]
  53212. vmovdqu ymm1, YMMWORD PTR [r9+32]
  53213. vmovdqu ymm2, YMMWORD PTR [r9+64]
  53214. vmovdqu ymm3, YMMWORD PTR [r9+96]
  53215. vpand ymm0, ymm0, ymm12
  53216. vpand ymm1, ymm1, ymm12
  53217. vpand ymm2, ymm2, ymm12
  53218. vpand ymm3, ymm3, ymm12
  53219. vpor ymm4, ymm4, ymm0
  53220. vpor ymm5, ymm5, ymm1
  53221. vpor ymm6, ymm6, ymm2
  53222. vpor ymm7, ymm7, ymm3
  53223. vpaddd ymm13, ymm13, ymm11
  53224. ; ENTRY: 15
  53225. mov r9, QWORD PTR [rdx+120]
  53226. add r9, 256
  53227. vpcmpeqd ymm12, ymm13, ymm10
  53228. vmovdqu ymm0, YMMWORD PTR [r9]
  53229. vmovdqu ymm1, YMMWORD PTR [r9+32]
  53230. vmovdqu ymm2, YMMWORD PTR [r9+64]
  53231. vmovdqu ymm3, YMMWORD PTR [r9+96]
  53232. vpand ymm0, ymm0, ymm12
  53233. vpand ymm1, ymm1, ymm12
  53234. vpand ymm2, ymm2, ymm12
  53235. vpand ymm3, ymm3, ymm12
  53236. vpor ymm4, ymm4, ymm0
  53237. vpor ymm5, ymm5, ymm1
  53238. vpor ymm6, ymm6, ymm2
  53239. vpor ymm7, ymm7, ymm3
  53240. vpaddd ymm13, ymm13, ymm11
  53241. vmovdqu YMMWORD PTR [rcx], ymm4
  53242. vmovdqu YMMWORD PTR [rcx+32], ymm5
  53243. vmovdqu YMMWORD PTR [rcx+64], ymm6
  53244. vmovdqu YMMWORD PTR [rcx+96], ymm7
  53245. add rcx, 128
  53246. ; END: 32-47
  53247. ; START: 48-63
  53248. vpxor ymm13, ymm13, ymm13
  53249. vpxor ymm4, ymm4, ymm4
  53250. vpxor ymm5, ymm5, ymm5
  53251. vpxor ymm6, ymm6, ymm6
  53252. vpxor ymm7, ymm7, ymm7
  53253. ; ENTRY: 0
  53254. mov r9, QWORD PTR [rdx]
  53255. add r9, 384
  53256. vpcmpeqd ymm12, ymm13, ymm10
  53257. vmovdqu ymm0, YMMWORD PTR [r9]
  53258. vmovdqu ymm1, YMMWORD PTR [r9+32]
  53259. vmovdqu ymm2, YMMWORD PTR [r9+64]
  53260. vmovdqu ymm3, YMMWORD PTR [r9+96]
  53261. vpand ymm0, ymm0, ymm12
  53262. vpand ymm1, ymm1, ymm12
  53263. vpand ymm2, ymm2, ymm12
  53264. vpand ymm3, ymm3, ymm12
  53265. vpor ymm4, ymm4, ymm0
  53266. vpor ymm5, ymm5, ymm1
  53267. vpor ymm6, ymm6, ymm2
  53268. vpor ymm7, ymm7, ymm3
  53269. vpaddd ymm13, ymm13, ymm11
  53270. ; ENTRY: 1
  53271. mov r9, QWORD PTR [rdx+8]
  53272. add r9, 384
  53273. vpcmpeqd ymm12, ymm13, ymm10
  53274. vmovdqu ymm0, YMMWORD PTR [r9]
  53275. vmovdqu ymm1, YMMWORD PTR [r9+32]
  53276. vmovdqu ymm2, YMMWORD PTR [r9+64]
  53277. vmovdqu ymm3, YMMWORD PTR [r9+96]
  53278. vpand ymm0, ymm0, ymm12
  53279. vpand ymm1, ymm1, ymm12
  53280. vpand ymm2, ymm2, ymm12
  53281. vpand ymm3, ymm3, ymm12
  53282. vpor ymm4, ymm4, ymm0
  53283. vpor ymm5, ymm5, ymm1
  53284. vpor ymm6, ymm6, ymm2
  53285. vpor ymm7, ymm7, ymm3
  53286. vpaddd ymm13, ymm13, ymm11
  53287. ; ENTRY: 2
  53288. mov r9, QWORD PTR [rdx+16]
  53289. add r9, 384
  53290. vpcmpeqd ymm12, ymm13, ymm10
  53291. vmovdqu ymm0, YMMWORD PTR [r9]
  53292. vmovdqu ymm1, YMMWORD PTR [r9+32]
  53293. vmovdqu ymm2, YMMWORD PTR [r9+64]
  53294. vmovdqu ymm3, YMMWORD PTR [r9+96]
  53295. vpand ymm0, ymm0, ymm12
  53296. vpand ymm1, ymm1, ymm12
  53297. vpand ymm2, ymm2, ymm12
  53298. vpand ymm3, ymm3, ymm12
  53299. vpor ymm4, ymm4, ymm0
  53300. vpor ymm5, ymm5, ymm1
  53301. vpor ymm6, ymm6, ymm2
  53302. vpor ymm7, ymm7, ymm3
  53303. vpaddd ymm13, ymm13, ymm11
  53304. ; ENTRY: 3
  53305. mov r9, QWORD PTR [rdx+24]
  53306. add r9, 384
  53307. vpcmpeqd ymm12, ymm13, ymm10
  53308. vmovdqu ymm0, YMMWORD PTR [r9]
  53309. vmovdqu ymm1, YMMWORD PTR [r9+32]
  53310. vmovdqu ymm2, YMMWORD PTR [r9+64]
  53311. vmovdqu ymm3, YMMWORD PTR [r9+96]
  53312. vpand ymm0, ymm0, ymm12
  53313. vpand ymm1, ymm1, ymm12
  53314. vpand ymm2, ymm2, ymm12
  53315. vpand ymm3, ymm3, ymm12
  53316. vpor ymm4, ymm4, ymm0
  53317. vpor ymm5, ymm5, ymm1
  53318. vpor ymm6, ymm6, ymm2
  53319. vpor ymm7, ymm7, ymm3
  53320. vpaddd ymm13, ymm13, ymm11
  53321. ; ENTRY: 4
  53322. mov r9, QWORD PTR [rdx+32]
  53323. add r9, 384
  53324. vpcmpeqd ymm12, ymm13, ymm10
  53325. vmovdqu ymm0, YMMWORD PTR [r9]
  53326. vmovdqu ymm1, YMMWORD PTR [r9+32]
  53327. vmovdqu ymm2, YMMWORD PTR [r9+64]
  53328. vmovdqu ymm3, YMMWORD PTR [r9+96]
  53329. vpand ymm0, ymm0, ymm12
  53330. vpand ymm1, ymm1, ymm12
  53331. vpand ymm2, ymm2, ymm12
  53332. vpand ymm3, ymm3, ymm12
  53333. vpor ymm4, ymm4, ymm0
  53334. vpor ymm5, ymm5, ymm1
  53335. vpor ymm6, ymm6, ymm2
  53336. vpor ymm7, ymm7, ymm3
  53337. vpaddd ymm13, ymm13, ymm11
  53338. ; ENTRY: 5
  53339. mov r9, QWORD PTR [rdx+40]
  53340. add r9, 384
  53341. vpcmpeqd ymm12, ymm13, ymm10
  53342. vmovdqu ymm0, YMMWORD PTR [r9]
  53343. vmovdqu ymm1, YMMWORD PTR [r9+32]
  53344. vmovdqu ymm2, YMMWORD PTR [r9+64]
  53345. vmovdqu ymm3, YMMWORD PTR [r9+96]
  53346. vpand ymm0, ymm0, ymm12
  53347. vpand ymm1, ymm1, ymm12
  53348. vpand ymm2, ymm2, ymm12
  53349. vpand ymm3, ymm3, ymm12
  53350. vpor ymm4, ymm4, ymm0
  53351. vpor ymm5, ymm5, ymm1
  53352. vpor ymm6, ymm6, ymm2
  53353. vpor ymm7, ymm7, ymm3
  53354. vpaddd ymm13, ymm13, ymm11
  53355. ; ENTRY: 6
  53356. mov r9, QWORD PTR [rdx+48]
  53357. add r9, 384
  53358. vpcmpeqd ymm12, ymm13, ymm10
  53359. vmovdqu ymm0, YMMWORD PTR [r9]
  53360. vmovdqu ymm1, YMMWORD PTR [r9+32]
  53361. vmovdqu ymm2, YMMWORD PTR [r9+64]
  53362. vmovdqu ymm3, YMMWORD PTR [r9+96]
  53363. vpand ymm0, ymm0, ymm12
  53364. vpand ymm1, ymm1, ymm12
  53365. vpand ymm2, ymm2, ymm12
  53366. vpand ymm3, ymm3, ymm12
  53367. vpor ymm4, ymm4, ymm0
  53368. vpor ymm5, ymm5, ymm1
  53369. vpor ymm6, ymm6, ymm2
  53370. vpor ymm7, ymm7, ymm3
  53371. vpaddd ymm13, ymm13, ymm11
  53372. ; ENTRY: 7
  53373. mov r9, QWORD PTR [rdx+56]
  53374. add r9, 384
  53375. vpcmpeqd ymm12, ymm13, ymm10
  53376. vmovdqu ymm0, YMMWORD PTR [r9]
  53377. vmovdqu ymm1, YMMWORD PTR [r9+32]
  53378. vmovdqu ymm2, YMMWORD PTR [r9+64]
  53379. vmovdqu ymm3, YMMWORD PTR [r9+96]
  53380. vpand ymm0, ymm0, ymm12
  53381. vpand ymm1, ymm1, ymm12
  53382. vpand ymm2, ymm2, ymm12
  53383. vpand ymm3, ymm3, ymm12
  53384. vpor ymm4, ymm4, ymm0
  53385. vpor ymm5, ymm5, ymm1
  53386. vpor ymm6, ymm6, ymm2
  53387. vpor ymm7, ymm7, ymm3
  53388. vpaddd ymm13, ymm13, ymm11
  53389. ; ENTRY: 8
  53390. mov r9, QWORD PTR [rdx+64]
  53391. add r9, 384
  53392. vpcmpeqd ymm12, ymm13, ymm10
  53393. vmovdqu ymm0, YMMWORD PTR [r9]
  53394. vmovdqu ymm1, YMMWORD PTR [r9+32]
  53395. vmovdqu ymm2, YMMWORD PTR [r9+64]
  53396. vmovdqu ymm3, YMMWORD PTR [r9+96]
  53397. vpand ymm0, ymm0, ymm12
  53398. vpand ymm1, ymm1, ymm12
  53399. vpand ymm2, ymm2, ymm12
  53400. vpand ymm3, ymm3, ymm12
  53401. vpor ymm4, ymm4, ymm0
  53402. vpor ymm5, ymm5, ymm1
  53403. vpor ymm6, ymm6, ymm2
  53404. vpor ymm7, ymm7, ymm3
  53405. vpaddd ymm13, ymm13, ymm11
  53406. ; ENTRY: 9
  53407. mov r9, QWORD PTR [rdx+72]
  53408. add r9, 384
  53409. vpcmpeqd ymm12, ymm13, ymm10
  53410. vmovdqu ymm0, YMMWORD PTR [r9]
  53411. vmovdqu ymm1, YMMWORD PTR [r9+32]
  53412. vmovdqu ymm2, YMMWORD PTR [r9+64]
  53413. vmovdqu ymm3, YMMWORD PTR [r9+96]
  53414. vpand ymm0, ymm0, ymm12
  53415. vpand ymm1, ymm1, ymm12
  53416. vpand ymm2, ymm2, ymm12
  53417. vpand ymm3, ymm3, ymm12
  53418. vpor ymm4, ymm4, ymm0
  53419. vpor ymm5, ymm5, ymm1
  53420. vpor ymm6, ymm6, ymm2
  53421. vpor ymm7, ymm7, ymm3
  53422. vpaddd ymm13, ymm13, ymm11
  53423. ; ENTRY: 10
  53424. mov r9, QWORD PTR [rdx+80]
  53425. add r9, 384
  53426. vpcmpeqd ymm12, ymm13, ymm10
  53427. vmovdqu ymm0, YMMWORD PTR [r9]
  53428. vmovdqu ymm1, YMMWORD PTR [r9+32]
  53429. vmovdqu ymm2, YMMWORD PTR [r9+64]
  53430. vmovdqu ymm3, YMMWORD PTR [r9+96]
  53431. vpand ymm0, ymm0, ymm12
  53432. vpand ymm1, ymm1, ymm12
  53433. vpand ymm2, ymm2, ymm12
  53434. vpand ymm3, ymm3, ymm12
  53435. vpor ymm4, ymm4, ymm0
  53436. vpor ymm5, ymm5, ymm1
  53437. vpor ymm6, ymm6, ymm2
  53438. vpor ymm7, ymm7, ymm3
  53439. vpaddd ymm13, ymm13, ymm11
  53440. ; ENTRY: 11
  53441. mov r9, QWORD PTR [rdx+88]
  53442. add r9, 384
  53443. vpcmpeqd ymm12, ymm13, ymm10
  53444. vmovdqu ymm0, YMMWORD PTR [r9]
  53445. vmovdqu ymm1, YMMWORD PTR [r9+32]
  53446. vmovdqu ymm2, YMMWORD PTR [r9+64]
  53447. vmovdqu ymm3, YMMWORD PTR [r9+96]
  53448. vpand ymm0, ymm0, ymm12
  53449. vpand ymm1, ymm1, ymm12
  53450. vpand ymm2, ymm2, ymm12
  53451. vpand ymm3, ymm3, ymm12
  53452. vpor ymm4, ymm4, ymm0
  53453. vpor ymm5, ymm5, ymm1
  53454. vpor ymm6, ymm6, ymm2
  53455. vpor ymm7, ymm7, ymm3
  53456. vpaddd ymm13, ymm13, ymm11
  53457. ; ENTRY: 12
  53458. mov r9, QWORD PTR [rdx+96]
  53459. add r9, 384
  53460. vpcmpeqd ymm12, ymm13, ymm10
  53461. vmovdqu ymm0, YMMWORD PTR [r9]
  53462. vmovdqu ymm1, YMMWORD PTR [r9+32]
  53463. vmovdqu ymm2, YMMWORD PTR [r9+64]
  53464. vmovdqu ymm3, YMMWORD PTR [r9+96]
  53465. vpand ymm0, ymm0, ymm12
  53466. vpand ymm1, ymm1, ymm12
  53467. vpand ymm2, ymm2, ymm12
  53468. vpand ymm3, ymm3, ymm12
  53469. vpor ymm4, ymm4, ymm0
  53470. vpor ymm5, ymm5, ymm1
  53471. vpor ymm6, ymm6, ymm2
  53472. vpor ymm7, ymm7, ymm3
  53473. vpaddd ymm13, ymm13, ymm11
  53474. ; ENTRY: 13
  53475. mov r9, QWORD PTR [rdx+104]
  53476. add r9, 384
  53477. vpcmpeqd ymm12, ymm13, ymm10
  53478. vmovdqu ymm0, YMMWORD PTR [r9]
  53479. vmovdqu ymm1, YMMWORD PTR [r9+32]
  53480. vmovdqu ymm2, YMMWORD PTR [r9+64]
  53481. vmovdqu ymm3, YMMWORD PTR [r9+96]
  53482. vpand ymm0, ymm0, ymm12
  53483. vpand ymm1, ymm1, ymm12
  53484. vpand ymm2, ymm2, ymm12
  53485. vpand ymm3, ymm3, ymm12
  53486. vpor ymm4, ymm4, ymm0
  53487. vpor ymm5, ymm5, ymm1
  53488. vpor ymm6, ymm6, ymm2
  53489. vpor ymm7, ymm7, ymm3
  53490. vpaddd ymm13, ymm13, ymm11
  53491. ; ENTRY: 14
  53492. mov r9, QWORD PTR [rdx+112]
  53493. add r9, 384
  53494. vpcmpeqd ymm12, ymm13, ymm10
  53495. vmovdqu ymm0, YMMWORD PTR [r9]
  53496. vmovdqu ymm1, YMMWORD PTR [r9+32]
  53497. vmovdqu ymm2, YMMWORD PTR [r9+64]
  53498. vmovdqu ymm3, YMMWORD PTR [r9+96]
  53499. vpand ymm0, ymm0, ymm12
  53500. vpand ymm1, ymm1, ymm12
  53501. vpand ymm2, ymm2, ymm12
  53502. vpand ymm3, ymm3, ymm12
  53503. vpor ymm4, ymm4, ymm0
  53504. vpor ymm5, ymm5, ymm1
  53505. vpor ymm6, ymm6, ymm2
  53506. vpor ymm7, ymm7, ymm3
  53507. vpaddd ymm13, ymm13, ymm11
  53508. ; ENTRY: 15
  53509. mov r9, QWORD PTR [rdx+120]
  53510. add r9, 384
  53511. vpcmpeqd ymm12, ymm13, ymm10
  53512. vmovdqu ymm0, YMMWORD PTR [r9]
  53513. vmovdqu ymm1, YMMWORD PTR [r9+32]
  53514. vmovdqu ymm2, YMMWORD PTR [r9+64]
  53515. vmovdqu ymm3, YMMWORD PTR [r9+96]
  53516. vpand ymm0, ymm0, ymm12
  53517. vpand ymm1, ymm1, ymm12
  53518. vpand ymm2, ymm2, ymm12
  53519. vpand ymm3, ymm3, ymm12
  53520. vpor ymm4, ymm4, ymm0
  53521. vpor ymm5, ymm5, ymm1
  53522. vpor ymm6, ymm6, ymm2
  53523. vpor ymm7, ymm7, ymm3
  53524. vpaddd ymm13, ymm13, ymm11
  53525. vmovdqu YMMWORD PTR [rcx], ymm4
  53526. vmovdqu YMMWORD PTR [rcx+32], ymm5
  53527. vmovdqu YMMWORD PTR [rcx+64], ymm6
  53528. vmovdqu YMMWORD PTR [rcx+96], ymm7
  53529. ; END: 48-63
  53530. vmovdqu xmm6, OWORD PTR [rsp]
  53531. vmovdqu xmm7, OWORD PTR [rsp+16]
  53532. vmovdqu xmm8, OWORD PTR [rsp+32]
  53533. vmovdqu xmm9, OWORD PTR [rsp+48]
  53534. vmovdqu xmm10, OWORD PTR [rsp+64]
  53535. vmovdqu xmm11, OWORD PTR [rsp+80]
  53536. vmovdqu xmm12, OWORD PTR [rsp+96]
  53537. vmovdqu xmm13, OWORD PTR [rsp+112]
  53538. add rsp, 128
  53539. ret
  53540. sp_4096_get_from_table_avx2_64 ENDP
  53541. _text ENDS
  53542. ENDIF
  53543. ; /* Conditionally add a and b using the mask m.
  53544. ; * m is -1 to add and 0 when not.
  53545. ; *
  53546. ; * r A single precision number representing conditional add result.
  53547. ; * a A single precision number to add with.
  53548. ; * b A single precision number to add.
  53549. ; * m Mask value to apply.
  53550. ; */
  53551. _text SEGMENT READONLY PARA
  53552. sp_4096_cond_add_32 PROC
  53553. sub rsp, 256
  53554. mov rax, 0
  53555. mov r10, QWORD PTR [r8]
  53556. mov r11, QWORD PTR [r8+8]
  53557. and r10, r9
  53558. and r11, r9
  53559. mov QWORD PTR [rsp], r10
  53560. mov QWORD PTR [rsp+8], r11
  53561. mov r10, QWORD PTR [r8+16]
  53562. mov r11, QWORD PTR [r8+24]
  53563. and r10, r9
  53564. and r11, r9
  53565. mov QWORD PTR [rsp+16], r10
  53566. mov QWORD PTR [rsp+24], r11
  53567. mov r10, QWORD PTR [r8+32]
  53568. mov r11, QWORD PTR [r8+40]
  53569. and r10, r9
  53570. and r11, r9
  53571. mov QWORD PTR [rsp+32], r10
  53572. mov QWORD PTR [rsp+40], r11
  53573. mov r10, QWORD PTR [r8+48]
  53574. mov r11, QWORD PTR [r8+56]
  53575. and r10, r9
  53576. and r11, r9
  53577. mov QWORD PTR [rsp+48], r10
  53578. mov QWORD PTR [rsp+56], r11
  53579. mov r10, QWORD PTR [r8+64]
  53580. mov r11, QWORD PTR [r8+72]
  53581. and r10, r9
  53582. and r11, r9
  53583. mov QWORD PTR [rsp+64], r10
  53584. mov QWORD PTR [rsp+72], r11
  53585. mov r10, QWORD PTR [r8+80]
  53586. mov r11, QWORD PTR [r8+88]
  53587. and r10, r9
  53588. and r11, r9
  53589. mov QWORD PTR [rsp+80], r10
  53590. mov QWORD PTR [rsp+88], r11
  53591. mov r10, QWORD PTR [r8+96]
  53592. mov r11, QWORD PTR [r8+104]
  53593. and r10, r9
  53594. and r11, r9
  53595. mov QWORD PTR [rsp+96], r10
  53596. mov QWORD PTR [rsp+104], r11
  53597. mov r10, QWORD PTR [r8+112]
  53598. mov r11, QWORD PTR [r8+120]
  53599. and r10, r9
  53600. and r11, r9
  53601. mov QWORD PTR [rsp+112], r10
  53602. mov QWORD PTR [rsp+120], r11
  53603. mov r10, QWORD PTR [r8+128]
  53604. mov r11, QWORD PTR [r8+136]
  53605. and r10, r9
  53606. and r11, r9
  53607. mov QWORD PTR [rsp+128], r10
  53608. mov QWORD PTR [rsp+136], r11
  53609. mov r10, QWORD PTR [r8+144]
  53610. mov r11, QWORD PTR [r8+152]
  53611. and r10, r9
  53612. and r11, r9
  53613. mov QWORD PTR [rsp+144], r10
  53614. mov QWORD PTR [rsp+152], r11
  53615. mov r10, QWORD PTR [r8+160]
  53616. mov r11, QWORD PTR [r8+168]
  53617. and r10, r9
  53618. and r11, r9
  53619. mov QWORD PTR [rsp+160], r10
  53620. mov QWORD PTR [rsp+168], r11
  53621. mov r10, QWORD PTR [r8+176]
  53622. mov r11, QWORD PTR [r8+184]
  53623. and r10, r9
  53624. and r11, r9
  53625. mov QWORD PTR [rsp+176], r10
  53626. mov QWORD PTR [rsp+184], r11
  53627. mov r10, QWORD PTR [r8+192]
  53628. mov r11, QWORD PTR [r8+200]
  53629. and r10, r9
  53630. and r11, r9
  53631. mov QWORD PTR [rsp+192], r10
  53632. mov QWORD PTR [rsp+200], r11
  53633. mov r10, QWORD PTR [r8+208]
  53634. mov r11, QWORD PTR [r8+216]
  53635. and r10, r9
  53636. and r11, r9
  53637. mov QWORD PTR [rsp+208], r10
  53638. mov QWORD PTR [rsp+216], r11
  53639. mov r10, QWORD PTR [r8+224]
  53640. mov r11, QWORD PTR [r8+232]
  53641. and r10, r9
  53642. and r11, r9
  53643. mov QWORD PTR [rsp+224], r10
  53644. mov QWORD PTR [rsp+232], r11
  53645. mov r10, QWORD PTR [r8+240]
  53646. mov r11, QWORD PTR [r8+248]
  53647. and r10, r9
  53648. and r11, r9
  53649. mov QWORD PTR [rsp+240], r10
  53650. mov QWORD PTR [rsp+248], r11
  53651. mov r10, QWORD PTR [rdx]
  53652. mov r8, QWORD PTR [rsp]
  53653. add r10, r8
  53654. mov r11, QWORD PTR [rdx+8]
  53655. mov r8, QWORD PTR [rsp+8]
  53656. adc r11, r8
  53657. mov QWORD PTR [rcx], r10
  53658. mov r10, QWORD PTR [rdx+16]
  53659. mov r8, QWORD PTR [rsp+16]
  53660. adc r10, r8
  53661. mov QWORD PTR [rcx+8], r11
  53662. mov r11, QWORD PTR [rdx+24]
  53663. mov r8, QWORD PTR [rsp+24]
  53664. adc r11, r8
  53665. mov QWORD PTR [rcx+16], r10
  53666. mov r10, QWORD PTR [rdx+32]
  53667. mov r8, QWORD PTR [rsp+32]
  53668. adc r10, r8
  53669. mov QWORD PTR [rcx+24], r11
  53670. mov r11, QWORD PTR [rdx+40]
  53671. mov r8, QWORD PTR [rsp+40]
  53672. adc r11, r8
  53673. mov QWORD PTR [rcx+32], r10
  53674. mov r10, QWORD PTR [rdx+48]
  53675. mov r8, QWORD PTR [rsp+48]
  53676. adc r10, r8
  53677. mov QWORD PTR [rcx+40], r11
  53678. mov r11, QWORD PTR [rdx+56]
  53679. mov r8, QWORD PTR [rsp+56]
  53680. adc r11, r8
  53681. mov QWORD PTR [rcx+48], r10
  53682. mov r10, QWORD PTR [rdx+64]
  53683. mov r8, QWORD PTR [rsp+64]
  53684. adc r10, r8
  53685. mov QWORD PTR [rcx+56], r11
  53686. mov r11, QWORD PTR [rdx+72]
  53687. mov r8, QWORD PTR [rsp+72]
  53688. adc r11, r8
  53689. mov QWORD PTR [rcx+64], r10
  53690. mov r10, QWORD PTR [rdx+80]
  53691. mov r8, QWORD PTR [rsp+80]
  53692. adc r10, r8
  53693. mov QWORD PTR [rcx+72], r11
  53694. mov r11, QWORD PTR [rdx+88]
  53695. mov r8, QWORD PTR [rsp+88]
  53696. adc r11, r8
  53697. mov QWORD PTR [rcx+80], r10
  53698. mov r10, QWORD PTR [rdx+96]
  53699. mov r8, QWORD PTR [rsp+96]
  53700. adc r10, r8
  53701. mov QWORD PTR [rcx+88], r11
  53702. mov r11, QWORD PTR [rdx+104]
  53703. mov r8, QWORD PTR [rsp+104]
  53704. adc r11, r8
  53705. mov QWORD PTR [rcx+96], r10
  53706. mov r10, QWORD PTR [rdx+112]
  53707. mov r8, QWORD PTR [rsp+112]
  53708. adc r10, r8
  53709. mov QWORD PTR [rcx+104], r11
  53710. mov r11, QWORD PTR [rdx+120]
  53711. mov r8, QWORD PTR [rsp+120]
  53712. adc r11, r8
  53713. mov QWORD PTR [rcx+112], r10
  53714. mov r10, QWORD PTR [rdx+128]
  53715. mov r8, QWORD PTR [rsp+128]
  53716. adc r10, r8
  53717. mov QWORD PTR [rcx+120], r11
  53718. mov r11, QWORD PTR [rdx+136]
  53719. mov r8, QWORD PTR [rsp+136]
  53720. adc r11, r8
  53721. mov QWORD PTR [rcx+128], r10
  53722. mov r10, QWORD PTR [rdx+144]
  53723. mov r8, QWORD PTR [rsp+144]
  53724. adc r10, r8
  53725. mov QWORD PTR [rcx+136], r11
  53726. mov r11, QWORD PTR [rdx+152]
  53727. mov r8, QWORD PTR [rsp+152]
  53728. adc r11, r8
  53729. mov QWORD PTR [rcx+144], r10
  53730. mov r10, QWORD PTR [rdx+160]
  53731. mov r8, QWORD PTR [rsp+160]
  53732. adc r10, r8
  53733. mov QWORD PTR [rcx+152], r11
  53734. mov r11, QWORD PTR [rdx+168]
  53735. mov r8, QWORD PTR [rsp+168]
  53736. adc r11, r8
  53737. mov QWORD PTR [rcx+160], r10
  53738. mov r10, QWORD PTR [rdx+176]
  53739. mov r8, QWORD PTR [rsp+176]
  53740. adc r10, r8
  53741. mov QWORD PTR [rcx+168], r11
  53742. mov r11, QWORD PTR [rdx+184]
  53743. mov r8, QWORD PTR [rsp+184]
  53744. adc r11, r8
  53745. mov QWORD PTR [rcx+176], r10
  53746. mov r10, QWORD PTR [rdx+192]
  53747. mov r8, QWORD PTR [rsp+192]
  53748. adc r10, r8
  53749. mov QWORD PTR [rcx+184], r11
  53750. mov r11, QWORD PTR [rdx+200]
  53751. mov r8, QWORD PTR [rsp+200]
  53752. adc r11, r8
  53753. mov QWORD PTR [rcx+192], r10
  53754. mov r10, QWORD PTR [rdx+208]
  53755. mov r8, QWORD PTR [rsp+208]
  53756. adc r10, r8
  53757. mov QWORD PTR [rcx+200], r11
  53758. mov r11, QWORD PTR [rdx+216]
  53759. mov r8, QWORD PTR [rsp+216]
  53760. adc r11, r8
  53761. mov QWORD PTR [rcx+208], r10
  53762. mov r10, QWORD PTR [rdx+224]
  53763. mov r8, QWORD PTR [rsp+224]
  53764. adc r10, r8
  53765. mov QWORD PTR [rcx+216], r11
  53766. mov r11, QWORD PTR [rdx+232]
  53767. mov r8, QWORD PTR [rsp+232]
  53768. adc r11, r8
  53769. mov QWORD PTR [rcx+224], r10
  53770. mov r10, QWORD PTR [rdx+240]
  53771. mov r8, QWORD PTR [rsp+240]
  53772. adc r10, r8
  53773. mov QWORD PTR [rcx+232], r11
  53774. mov r11, QWORD PTR [rdx+248]
  53775. mov r8, QWORD PTR [rsp+248]
  53776. adc r11, r8
  53777. mov QWORD PTR [rcx+240], r10
  53778. mov QWORD PTR [rcx+248], r11
  53779. adc rax, 0
  53780. add rsp, 256
  53781. ret
  53782. sp_4096_cond_add_32 ENDP
  53783. _text ENDS
  53784. IFDEF HAVE_INTEL_AVX2
  53785. ; /* Conditionally add a and b using the mask m.
  53786. ; * m is -1 to add and 0 when not.
  53787. ; *
  53788. ; * r A single precision number representing conditional add result.
  53789. ; * a A single precision number to add with.
  53790. ; * b A single precision number to add.
  53791. ; * m Mask value to apply.
  53792. ; */
  53793. _text SEGMENT READONLY PARA
  53794. sp_4096_cond_add_avx2_32 PROC
  53795. push r12
  53796. mov rax, 0
  53797. mov r12, QWORD PTR [r8]
  53798. mov r10, QWORD PTR [rdx]
  53799. pext r12, r12, r9
  53800. add r10, r12
  53801. mov r12, QWORD PTR [r8+8]
  53802. mov r11, QWORD PTR [rdx+8]
  53803. pext r12, r12, r9
  53804. mov QWORD PTR [rcx], r10
  53805. adc r11, r12
  53806. mov r10, QWORD PTR [r8+16]
  53807. mov r12, QWORD PTR [rdx+16]
  53808. pext r10, r10, r9
  53809. mov QWORD PTR [rcx+8], r11
  53810. adc r12, r10
  53811. mov r11, QWORD PTR [r8+24]
  53812. mov r10, QWORD PTR [rdx+24]
  53813. pext r11, r11, r9
  53814. mov QWORD PTR [rcx+16], r12
  53815. adc r10, r11
  53816. mov r12, QWORD PTR [r8+32]
  53817. mov r11, QWORD PTR [rdx+32]
  53818. pext r12, r12, r9
  53819. mov QWORD PTR [rcx+24], r10
  53820. adc r11, r12
  53821. mov r10, QWORD PTR [r8+40]
  53822. mov r12, QWORD PTR [rdx+40]
  53823. pext r10, r10, r9
  53824. mov QWORD PTR [rcx+32], r11
  53825. adc r12, r10
  53826. mov r11, QWORD PTR [r8+48]
  53827. mov r10, QWORD PTR [rdx+48]
  53828. pext r11, r11, r9
  53829. mov QWORD PTR [rcx+40], r12
  53830. adc r10, r11
  53831. mov r12, QWORD PTR [r8+56]
  53832. mov r11, QWORD PTR [rdx+56]
  53833. pext r12, r12, r9
  53834. mov QWORD PTR [rcx+48], r10
  53835. adc r11, r12
  53836. mov r10, QWORD PTR [r8+64]
  53837. mov r12, QWORD PTR [rdx+64]
  53838. pext r10, r10, r9
  53839. mov QWORD PTR [rcx+56], r11
  53840. adc r12, r10
  53841. mov r11, QWORD PTR [r8+72]
  53842. mov r10, QWORD PTR [rdx+72]
  53843. pext r11, r11, r9
  53844. mov QWORD PTR [rcx+64], r12
  53845. adc r10, r11
  53846. mov r12, QWORD PTR [r8+80]
  53847. mov r11, QWORD PTR [rdx+80]
  53848. pext r12, r12, r9
  53849. mov QWORD PTR [rcx+72], r10
  53850. adc r11, r12
  53851. mov r10, QWORD PTR [r8+88]
  53852. mov r12, QWORD PTR [rdx+88]
  53853. pext r10, r10, r9
  53854. mov QWORD PTR [rcx+80], r11
  53855. adc r12, r10
  53856. mov r11, QWORD PTR [r8+96]
  53857. mov r10, QWORD PTR [rdx+96]
  53858. pext r11, r11, r9
  53859. mov QWORD PTR [rcx+88], r12
  53860. adc r10, r11
  53861. mov r12, QWORD PTR [r8+104]
  53862. mov r11, QWORD PTR [rdx+104]
  53863. pext r12, r12, r9
  53864. mov QWORD PTR [rcx+96], r10
  53865. adc r11, r12
  53866. mov r10, QWORD PTR [r8+112]
  53867. mov r12, QWORD PTR [rdx+112]
  53868. pext r10, r10, r9
  53869. mov QWORD PTR [rcx+104], r11
  53870. adc r12, r10
  53871. mov r11, QWORD PTR [r8+120]
  53872. mov r10, QWORD PTR [rdx+120]
  53873. pext r11, r11, r9
  53874. mov QWORD PTR [rcx+112], r12
  53875. adc r10, r11
  53876. mov r12, QWORD PTR [r8+128]
  53877. mov r11, QWORD PTR [rdx+128]
  53878. pext r12, r12, r9
  53879. mov QWORD PTR [rcx+120], r10
  53880. adc r11, r12
  53881. mov r10, QWORD PTR [r8+136]
  53882. mov r12, QWORD PTR [rdx+136]
  53883. pext r10, r10, r9
  53884. mov QWORD PTR [rcx+128], r11
  53885. adc r12, r10
  53886. mov r11, QWORD PTR [r8+144]
  53887. mov r10, QWORD PTR [rdx+144]
  53888. pext r11, r11, r9
  53889. mov QWORD PTR [rcx+136], r12
  53890. adc r10, r11
  53891. mov r12, QWORD PTR [r8+152]
  53892. mov r11, QWORD PTR [rdx+152]
  53893. pext r12, r12, r9
  53894. mov QWORD PTR [rcx+144], r10
  53895. adc r11, r12
  53896. mov r10, QWORD PTR [r8+160]
  53897. mov r12, QWORD PTR [rdx+160]
  53898. pext r10, r10, r9
  53899. mov QWORD PTR [rcx+152], r11
  53900. adc r12, r10
  53901. mov r11, QWORD PTR [r8+168]
  53902. mov r10, QWORD PTR [rdx+168]
  53903. pext r11, r11, r9
  53904. mov QWORD PTR [rcx+160], r12
  53905. adc r10, r11
  53906. mov r12, QWORD PTR [r8+176]
  53907. mov r11, QWORD PTR [rdx+176]
  53908. pext r12, r12, r9
  53909. mov QWORD PTR [rcx+168], r10
  53910. adc r11, r12
  53911. mov r10, QWORD PTR [r8+184]
  53912. mov r12, QWORD PTR [rdx+184]
  53913. pext r10, r10, r9
  53914. mov QWORD PTR [rcx+176], r11
  53915. adc r12, r10
  53916. mov r11, QWORD PTR [r8+192]
  53917. mov r10, QWORD PTR [rdx+192]
  53918. pext r11, r11, r9
  53919. mov QWORD PTR [rcx+184], r12
  53920. adc r10, r11
  53921. mov r12, QWORD PTR [r8+200]
  53922. mov r11, QWORD PTR [rdx+200]
  53923. pext r12, r12, r9
  53924. mov QWORD PTR [rcx+192], r10
  53925. adc r11, r12
  53926. mov r10, QWORD PTR [r8+208]
  53927. mov r12, QWORD PTR [rdx+208]
  53928. pext r10, r10, r9
  53929. mov QWORD PTR [rcx+200], r11
  53930. adc r12, r10
  53931. mov r11, QWORD PTR [r8+216]
  53932. mov r10, QWORD PTR [rdx+216]
  53933. pext r11, r11, r9
  53934. mov QWORD PTR [rcx+208], r12
  53935. adc r10, r11
  53936. mov r12, QWORD PTR [r8+224]
  53937. mov r11, QWORD PTR [rdx+224]
  53938. pext r12, r12, r9
  53939. mov QWORD PTR [rcx+216], r10
  53940. adc r11, r12
  53941. mov r10, QWORD PTR [r8+232]
  53942. mov r12, QWORD PTR [rdx+232]
  53943. pext r10, r10, r9
  53944. mov QWORD PTR [rcx+224], r11
  53945. adc r12, r10
  53946. mov r11, QWORD PTR [r8+240]
  53947. mov r10, QWORD PTR [rdx+240]
  53948. pext r11, r11, r9
  53949. mov QWORD PTR [rcx+232], r12
  53950. adc r10, r11
  53951. mov r12, QWORD PTR [r8+248]
  53952. mov r11, QWORD PTR [rdx+248]
  53953. pext r12, r12, r9
  53954. mov QWORD PTR [rcx+240], r10
  53955. adc r11, r12
  53956. mov QWORD PTR [rcx+248], r11
  53957. adc rax, 0
  53958. pop r12
  53959. ret
  53960. sp_4096_cond_add_avx2_32 ENDP
  53961. _text ENDS
  53962. ENDIF
  53963. ; /* Shift number left by n bit. (r = a << n)
  53964. ; *
  53965. ; * r Result of left shift by n.
  53966. ; * a Number to shift.
  53967. ; * n Amoutnt o shift.
  53968. ; */
  53969. _text SEGMENT READONLY PARA
  53970. sp_4096_lshift_64 PROC
  53971. push r12
  53972. push r13
  53973. mov cl, r8b
  53974. mov rax, rcx
  53975. mov r12, 0
  53976. mov r13, QWORD PTR [rdx+472]
  53977. mov r8, QWORD PTR [rdx+480]
  53978. mov r9, QWORD PTR [rdx+488]
  53979. mov r10, QWORD PTR [rdx+496]
  53980. mov r11, QWORD PTR [rdx+504]
  53981. shld r12, r11, cl
  53982. shld r11, r10, cl
  53983. shld r10, r9, cl
  53984. shld r9, r8, cl
  53985. shld r8, r13, cl
  53986. mov QWORD PTR [rax+480], r8
  53987. mov QWORD PTR [rax+488], r9
  53988. mov QWORD PTR [rax+496], r10
  53989. mov QWORD PTR [rax+504], r11
  53990. mov QWORD PTR [rax+512], r12
  53991. mov r11, QWORD PTR [rdx+440]
  53992. mov r8, QWORD PTR [rdx+448]
  53993. mov r9, QWORD PTR [rdx+456]
  53994. mov r10, QWORD PTR [rdx+464]
  53995. shld r13, r10, cl
  53996. shld r10, r9, cl
  53997. shld r9, r8, cl
  53998. shld r8, r11, cl
  53999. mov QWORD PTR [rax+448], r8
  54000. mov QWORD PTR [rax+456], r9
  54001. mov QWORD PTR [rax+464], r10
  54002. mov QWORD PTR [rax+472], r13
  54003. mov r13, QWORD PTR [rdx+408]
  54004. mov r8, QWORD PTR [rdx+416]
  54005. mov r9, QWORD PTR [rdx+424]
  54006. mov r10, QWORD PTR [rdx+432]
  54007. shld r11, r10, cl
  54008. shld r10, r9, cl
  54009. shld r9, r8, cl
  54010. shld r8, r13, cl
  54011. mov QWORD PTR [rax+416], r8
  54012. mov QWORD PTR [rax+424], r9
  54013. mov QWORD PTR [rax+432], r10
  54014. mov QWORD PTR [rax+440], r11
  54015. mov r11, QWORD PTR [rdx+376]
  54016. mov r8, QWORD PTR [rdx+384]
  54017. mov r9, QWORD PTR [rdx+392]
  54018. mov r10, QWORD PTR [rdx+400]
  54019. shld r13, r10, cl
  54020. shld r10, r9, cl
  54021. shld r9, r8, cl
  54022. shld r8, r11, cl
  54023. mov QWORD PTR [rax+384], r8
  54024. mov QWORD PTR [rax+392], r9
  54025. mov QWORD PTR [rax+400], r10
  54026. mov QWORD PTR [rax+408], r13
  54027. mov r13, QWORD PTR [rdx+344]
  54028. mov r8, QWORD PTR [rdx+352]
  54029. mov r9, QWORD PTR [rdx+360]
  54030. mov r10, QWORD PTR [rdx+368]
  54031. shld r11, r10, cl
  54032. shld r10, r9, cl
  54033. shld r9, r8, cl
  54034. shld r8, r13, cl
  54035. mov QWORD PTR [rax+352], r8
  54036. mov QWORD PTR [rax+360], r9
  54037. mov QWORD PTR [rax+368], r10
  54038. mov QWORD PTR [rax+376], r11
  54039. mov r11, QWORD PTR [rdx+312]
  54040. mov r8, QWORD PTR [rdx+320]
  54041. mov r9, QWORD PTR [rdx+328]
  54042. mov r10, QWORD PTR [rdx+336]
  54043. shld r13, r10, cl
  54044. shld r10, r9, cl
  54045. shld r9, r8, cl
  54046. shld r8, r11, cl
  54047. mov QWORD PTR [rax+320], r8
  54048. mov QWORD PTR [rax+328], r9
  54049. mov QWORD PTR [rax+336], r10
  54050. mov QWORD PTR [rax+344], r13
  54051. mov r13, QWORD PTR [rdx+280]
  54052. mov r8, QWORD PTR [rdx+288]
  54053. mov r9, QWORD PTR [rdx+296]
  54054. mov r10, QWORD PTR [rdx+304]
  54055. shld r11, r10, cl
  54056. shld r10, r9, cl
  54057. shld r9, r8, cl
  54058. shld r8, r13, cl
  54059. mov QWORD PTR [rax+288], r8
  54060. mov QWORD PTR [rax+296], r9
  54061. mov QWORD PTR [rax+304], r10
  54062. mov QWORD PTR [rax+312], r11
  54063. mov r11, QWORD PTR [rdx+248]
  54064. mov r8, QWORD PTR [rdx+256]
  54065. mov r9, QWORD PTR [rdx+264]
  54066. mov r10, QWORD PTR [rdx+272]
  54067. shld r13, r10, cl
  54068. shld r10, r9, cl
  54069. shld r9, r8, cl
  54070. shld r8, r11, cl
  54071. mov QWORD PTR [rax+256], r8
  54072. mov QWORD PTR [rax+264], r9
  54073. mov QWORD PTR [rax+272], r10
  54074. mov QWORD PTR [rax+280], r13
  54075. mov r13, QWORD PTR [rdx+216]
  54076. mov r8, QWORD PTR [rdx+224]
  54077. mov r9, QWORD PTR [rdx+232]
  54078. mov r10, QWORD PTR [rdx+240]
  54079. shld r11, r10, cl
  54080. shld r10, r9, cl
  54081. shld r9, r8, cl
  54082. shld r8, r13, cl
  54083. mov QWORD PTR [rax+224], r8
  54084. mov QWORD PTR [rax+232], r9
  54085. mov QWORD PTR [rax+240], r10
  54086. mov QWORD PTR [rax+248], r11
  54087. mov r11, QWORD PTR [rdx+184]
  54088. mov r8, QWORD PTR [rdx+192]
  54089. mov r9, QWORD PTR [rdx+200]
  54090. mov r10, QWORD PTR [rdx+208]
  54091. shld r13, r10, cl
  54092. shld r10, r9, cl
  54093. shld r9, r8, cl
  54094. shld r8, r11, cl
  54095. mov QWORD PTR [rax+192], r8
  54096. mov QWORD PTR [rax+200], r9
  54097. mov QWORD PTR [rax+208], r10
  54098. mov QWORD PTR [rax+216], r13
  54099. mov r13, QWORD PTR [rdx+152]
  54100. mov r8, QWORD PTR [rdx+160]
  54101. mov r9, QWORD PTR [rdx+168]
  54102. mov r10, QWORD PTR [rdx+176]
  54103. shld r11, r10, cl
  54104. shld r10, r9, cl
  54105. shld r9, r8, cl
  54106. shld r8, r13, cl
  54107. mov QWORD PTR [rax+160], r8
  54108. mov QWORD PTR [rax+168], r9
  54109. mov QWORD PTR [rax+176], r10
  54110. mov QWORD PTR [rax+184], r11
  54111. mov r11, QWORD PTR [rdx+120]
  54112. mov r8, QWORD PTR [rdx+128]
  54113. mov r9, QWORD PTR [rdx+136]
  54114. mov r10, QWORD PTR [rdx+144]
  54115. shld r13, r10, cl
  54116. shld r10, r9, cl
  54117. shld r9, r8, cl
  54118. shld r8, r11, cl
  54119. mov QWORD PTR [rax+128], r8
  54120. mov QWORD PTR [rax+136], r9
  54121. mov QWORD PTR [rax+144], r10
  54122. mov QWORD PTR [rax+152], r13
  54123. mov r13, QWORD PTR [rdx+88]
  54124. mov r8, QWORD PTR [rdx+96]
  54125. mov r9, QWORD PTR [rdx+104]
  54126. mov r10, QWORD PTR [rdx+112]
  54127. shld r11, r10, cl
  54128. shld r10, r9, cl
  54129. shld r9, r8, cl
  54130. shld r8, r13, cl
  54131. mov QWORD PTR [rax+96], r8
  54132. mov QWORD PTR [rax+104], r9
  54133. mov QWORD PTR [rax+112], r10
  54134. mov QWORD PTR [rax+120], r11
  54135. mov r11, QWORD PTR [rdx+56]
  54136. mov r8, QWORD PTR [rdx+64]
  54137. mov r9, QWORD PTR [rdx+72]
  54138. mov r10, QWORD PTR [rdx+80]
  54139. shld r13, r10, cl
  54140. shld r10, r9, cl
  54141. shld r9, r8, cl
  54142. shld r8, r11, cl
  54143. mov QWORD PTR [rax+64], r8
  54144. mov QWORD PTR [rax+72], r9
  54145. mov QWORD PTR [rax+80], r10
  54146. mov QWORD PTR [rax+88], r13
  54147. mov r13, QWORD PTR [rdx+24]
  54148. mov r8, QWORD PTR [rdx+32]
  54149. mov r9, QWORD PTR [rdx+40]
  54150. mov r10, QWORD PTR [rdx+48]
  54151. shld r11, r10, cl
  54152. shld r10, r9, cl
  54153. shld r9, r8, cl
  54154. shld r8, r13, cl
  54155. mov QWORD PTR [rax+32], r8
  54156. mov QWORD PTR [rax+40], r9
  54157. mov QWORD PTR [rax+48], r10
  54158. mov QWORD PTR [rax+56], r11
  54159. mov r8, QWORD PTR [rdx]
  54160. mov r9, QWORD PTR [rdx+8]
  54161. mov r10, QWORD PTR [rdx+16]
  54162. shld r13, r10, cl
  54163. shld r10, r9, cl
  54164. shld r9, r8, cl
  54165. shl r8, cl
  54166. mov QWORD PTR [rax], r8
  54167. mov QWORD PTR [rax+8], r9
  54168. mov QWORD PTR [rax+16], r10
  54169. mov QWORD PTR [rax+24], r13
  54170. pop r13
  54171. pop r12
  54172. ret
  54173. sp_4096_lshift_64 ENDP
  54174. _text ENDS
  54175. ENDIF
  54176. ENDIF
  54177. IFNDEF WOLFSSL_SP_NO_256
  54178. ; /* Multiply a and b into r. (r = a * b)
  54179. ; *
  54180. ; * r A single precision integer.
  54181. ; * a A single precision integer.
  54182. ; * b A single precision integer.
  54183. ; */
  54184. _text SEGMENT READONLY PARA
  54185. sp_256_mul_4 PROC
  54186. push r12
  54187. mov r9, rdx
  54188. sub rsp, 32
  54189. ; A[0] * B[0]
  54190. mov rax, QWORD PTR [r8]
  54191. mul QWORD PTR [r9]
  54192. xor r12, r12
  54193. mov QWORD PTR [rsp], rax
  54194. mov r11, rdx
  54195. ; A[0] * B[1]
  54196. mov rax, QWORD PTR [r8+8]
  54197. mul QWORD PTR [r9]
  54198. xor r10, r10
  54199. add r11, rax
  54200. adc r12, rdx
  54201. adc r10, 0
  54202. ; A[1] * B[0]
  54203. mov rax, QWORD PTR [r8]
  54204. mul QWORD PTR [r9+8]
  54205. add r11, rax
  54206. adc r12, rdx
  54207. adc r10, 0
  54208. mov QWORD PTR [rsp+8], r11
  54209. ; A[0] * B[2]
  54210. mov rax, QWORD PTR [r8+16]
  54211. mul QWORD PTR [r9]
  54212. xor r11, r11
  54213. add r12, rax
  54214. adc r10, rdx
  54215. adc r11, 0
  54216. ; A[1] * B[1]
  54217. mov rax, QWORD PTR [r8+8]
  54218. mul QWORD PTR [r9+8]
  54219. add r12, rax
  54220. adc r10, rdx
  54221. adc r11, 0
  54222. ; A[2] * B[0]
  54223. mov rax, QWORD PTR [r8]
  54224. mul QWORD PTR [r9+16]
  54225. add r12, rax
  54226. adc r10, rdx
  54227. adc r11, 0
  54228. mov QWORD PTR [rsp+16], r12
  54229. ; A[0] * B[3]
  54230. mov rax, QWORD PTR [r8+24]
  54231. mul QWORD PTR [r9]
  54232. xor r12, r12
  54233. add r10, rax
  54234. adc r11, rdx
  54235. adc r12, 0
  54236. ; A[1] * B[2]
  54237. mov rax, QWORD PTR [r8+16]
  54238. mul QWORD PTR [r9+8]
  54239. add r10, rax
  54240. adc r11, rdx
  54241. adc r12, 0
  54242. ; A[2] * B[1]
  54243. mov rax, QWORD PTR [r8+8]
  54244. mul QWORD PTR [r9+16]
  54245. add r10, rax
  54246. adc r11, rdx
  54247. adc r12, 0
  54248. ; A[3] * B[0]
  54249. mov rax, QWORD PTR [r8]
  54250. mul QWORD PTR [r9+24]
  54251. add r10, rax
  54252. adc r11, rdx
  54253. adc r12, 0
  54254. mov QWORD PTR [rsp+24], r10
  54255. ; A[1] * B[3]
  54256. mov rax, QWORD PTR [r8+24]
  54257. mul QWORD PTR [r9+8]
  54258. xor r10, r10
  54259. add r11, rax
  54260. adc r12, rdx
  54261. adc r10, 0
  54262. ; A[2] * B[2]
  54263. mov rax, QWORD PTR [r8+16]
  54264. mul QWORD PTR [r9+16]
  54265. add r11, rax
  54266. adc r12, rdx
  54267. adc r10, 0
  54268. ; A[3] * B[1]
  54269. mov rax, QWORD PTR [r8+8]
  54270. mul QWORD PTR [r9+24]
  54271. add r11, rax
  54272. adc r12, rdx
  54273. adc r10, 0
  54274. mov QWORD PTR [rcx+32], r11
  54275. ; A[2] * B[3]
  54276. mov rax, QWORD PTR [r8+24]
  54277. mul QWORD PTR [r9+16]
  54278. xor r11, r11
  54279. add r12, rax
  54280. adc r10, rdx
  54281. adc r11, 0
  54282. ; A[3] * B[2]
  54283. mov rax, QWORD PTR [r8+16]
  54284. mul QWORD PTR [r9+24]
  54285. add r12, rax
  54286. adc r10, rdx
  54287. adc r11, 0
  54288. mov QWORD PTR [rcx+40], r12
  54289. ; A[3] * B[3]
  54290. mov rax, QWORD PTR [r8+24]
  54291. mul QWORD PTR [r9+24]
  54292. add r10, rax
  54293. adc r11, rdx
  54294. mov QWORD PTR [rcx+48], r10
  54295. mov QWORD PTR [rcx+56], r11
  54296. mov rax, QWORD PTR [rsp]
  54297. mov rdx, QWORD PTR [rsp+8]
  54298. mov r10, QWORD PTR [rsp+16]
  54299. mov r11, QWORD PTR [rsp+24]
  54300. mov QWORD PTR [rcx], rax
  54301. mov QWORD PTR [rcx+8], rdx
  54302. mov QWORD PTR [rcx+16], r10
  54303. mov QWORD PTR [rcx+24], r11
  54304. add rsp, 32
  54305. pop r12
  54306. ret
  54307. sp_256_mul_4 ENDP
  54308. _text ENDS
  54309. IFDEF HAVE_INTEL_AVX2
  54310. ; /* Multiply a and b into r. (r = a * b)
  54311. ; *
  54312. ; * r Result of multiplication.
  54313. ; * a First number to multiply.
  54314. ; * b Second number to multiply.
  54315. ; */
  54316. _text SEGMENT READONLY PARA
  54317. sp_256_mul_avx2_4 PROC
  54318. push rbp
  54319. push r12
  54320. push r13
  54321. push r14
  54322. push r15
  54323. push rdi
  54324. push rsi
  54325. push rbx
  54326. mov rbp, r8
  54327. mov rax, rdx
  54328. mov rdx, QWORD PTR [rax]
  54329. ; A[0] * B[0]
  54330. mulx r9, r8, QWORD PTR [rbp]
  54331. xor rbx, rbx
  54332. ; A[0] * B[1]
  54333. mulx r10, rdi, QWORD PTR [rbp+8]
  54334. adcx r9, rdi
  54335. ; A[0] * B[2]
  54336. mulx r11, rdi, QWORD PTR [rbp+16]
  54337. adcx r10, rdi
  54338. ; A[0] * B[3]
  54339. mulx r12, rdi, QWORD PTR [rbp+24]
  54340. adcx r11, rdi
  54341. mov rdx, QWORD PTR [rax+8]
  54342. adcx r12, rbx
  54343. ; A[1] * B[0]
  54344. mulx rsi, rdi, QWORD PTR [rbp]
  54345. xor rbx, rbx
  54346. adcx r9, rdi
  54347. ; A[1] * B[1]
  54348. mulx r15, rdi, QWORD PTR [rbp+8]
  54349. adox r10, rsi
  54350. adcx r10, rdi
  54351. ; A[1] * B[2]
  54352. mulx rsi, rdi, QWORD PTR [rbp+16]
  54353. adox r11, r15
  54354. adcx r11, rdi
  54355. ; A[1] * B[3]
  54356. mulx r13, rdi, QWORD PTR [rbp+24]
  54357. adox r12, rsi
  54358. adcx r12, rdi
  54359. adox r13, rbx
  54360. mov rdx, QWORD PTR [rax+16]
  54361. adcx r13, rbx
  54362. ; A[2] * B[0]
  54363. mulx rsi, rdi, QWORD PTR [rbp]
  54364. xor rbx, rbx
  54365. adcx r10, rdi
  54366. ; A[2] * B[1]
  54367. mulx r15, rdi, QWORD PTR [rbp+8]
  54368. adox r11, rsi
  54369. adcx r11, rdi
  54370. ; A[2] * B[2]
  54371. mulx rsi, rdi, QWORD PTR [rbp+16]
  54372. adox r12, r15
  54373. adcx r12, rdi
  54374. ; A[2] * B[3]
  54375. mulx r14, rdi, QWORD PTR [rbp+24]
  54376. adox r13, rsi
  54377. adcx r13, rdi
  54378. adox r14, rbx
  54379. mov rdx, QWORD PTR [rax+24]
  54380. adcx r14, rbx
  54381. ; A[3] * B[0]
  54382. mulx rsi, rdi, QWORD PTR [rbp]
  54383. xor rbx, rbx
  54384. adcx r11, rdi
  54385. ; A[3] * B[1]
  54386. mulx r15, rdi, QWORD PTR [rbp+8]
  54387. adox r12, rsi
  54388. adcx r12, rdi
  54389. ; A[3] * B[2]
  54390. mulx rsi, rdi, QWORD PTR [rbp+16]
  54391. adox r13, r15
  54392. adcx r13, rdi
  54393. ; A[3] * B[3]
  54394. mulx r15, rdi, QWORD PTR [rbp+24]
  54395. adox r14, rsi
  54396. adcx r14, rdi
  54397. adox r15, rbx
  54398. adcx r15, rbx
  54399. mov QWORD PTR [rcx], r8
  54400. mov QWORD PTR [rcx+8], r9
  54401. mov QWORD PTR [rcx+16], r10
  54402. mov QWORD PTR [rcx+24], r11
  54403. mov QWORD PTR [rcx+32], r12
  54404. mov QWORD PTR [rcx+40], r13
  54405. mov QWORD PTR [rcx+48], r14
  54406. mov QWORD PTR [rcx+56], r15
  54407. pop rbx
  54408. pop rsi
  54409. pop rdi
  54410. pop r15
  54411. pop r14
  54412. pop r13
  54413. pop r12
  54414. pop rbp
  54415. ret
  54416. sp_256_mul_avx2_4 ENDP
  54417. _text ENDS
  54418. ENDIF
  54419. ; /* Square a and put result in r. (r = a * a)
  54420. ; *
  54421. ; * r A single precision integer.
  54422. ; * a A single precision integer.
  54423. ; */
  54424. _text SEGMENT READONLY PARA
  54425. sp_256_sqr_4 PROC
  54426. push r12
  54427. push r13
  54428. push r14
  54429. mov r8, rdx
  54430. sub rsp, 32
  54431. ; A[0] * A[0]
  54432. mov rax, QWORD PTR [r8]
  54433. mul rax
  54434. xor r11, r11
  54435. mov QWORD PTR [rsp], rax
  54436. mov r10, rdx
  54437. ; A[0] * A[1]
  54438. mov rax, QWORD PTR [r8+8]
  54439. mul QWORD PTR [r8]
  54440. xor r9, r9
  54441. add r10, rax
  54442. adc r11, rdx
  54443. adc r9, 0
  54444. add r10, rax
  54445. adc r11, rdx
  54446. adc r9, 0
  54447. mov QWORD PTR [rsp+8], r10
  54448. ; A[0] * A[2]
  54449. mov rax, QWORD PTR [r8+16]
  54450. mul QWORD PTR [r8]
  54451. xor r10, r10
  54452. add r11, rax
  54453. adc r9, rdx
  54454. adc r10, 0
  54455. add r11, rax
  54456. adc r9, rdx
  54457. adc r10, 0
  54458. ; A[1] * A[1]
  54459. mov rax, QWORD PTR [r8+8]
  54460. mul rax
  54461. add r11, rax
  54462. adc r9, rdx
  54463. adc r10, 0
  54464. mov QWORD PTR [rsp+16], r11
  54465. ; A[0] * A[3]
  54466. mov rax, QWORD PTR [r8+24]
  54467. mul QWORD PTR [r8]
  54468. xor r11, r11
  54469. add r9, rax
  54470. adc r10, rdx
  54471. adc r11, 0
  54472. add r9, rax
  54473. adc r10, rdx
  54474. adc r11, 0
  54475. ; A[1] * A[2]
  54476. mov rax, QWORD PTR [r8+16]
  54477. mul QWORD PTR [r8+8]
  54478. add r9, rax
  54479. adc r10, rdx
  54480. adc r11, 0
  54481. add r9, rax
  54482. adc r10, rdx
  54483. adc r11, 0
  54484. mov QWORD PTR [rsp+24], r9
  54485. ; A[1] * A[3]
  54486. mov rax, QWORD PTR [r8+24]
  54487. mul QWORD PTR [r8+8]
  54488. xor r9, r9
  54489. add r10, rax
  54490. adc r11, rdx
  54491. adc r9, 0
  54492. add r10, rax
  54493. adc r11, rdx
  54494. adc r9, 0
  54495. ; A[2] * A[2]
  54496. mov rax, QWORD PTR [r8+16]
  54497. mul rax
  54498. add r10, rax
  54499. adc r11, rdx
  54500. adc r9, 0
  54501. mov QWORD PTR [rcx+32], r10
  54502. ; A[2] * A[3]
  54503. mov rax, QWORD PTR [r8+24]
  54504. mul QWORD PTR [r8+16]
  54505. xor r10, r10
  54506. add r11, rax
  54507. adc r9, rdx
  54508. adc r10, 0
  54509. add r11, rax
  54510. adc r9, rdx
  54511. adc r10, 0
  54512. mov QWORD PTR [rcx+40], r11
  54513. ; A[3] * A[3]
  54514. mov rax, QWORD PTR [r8+24]
  54515. mul rax
  54516. add r9, rax
  54517. adc r10, rdx
  54518. mov QWORD PTR [rcx+48], r9
  54519. mov QWORD PTR [rcx+56], r10
  54520. mov rax, QWORD PTR [rsp]
  54521. mov rdx, QWORD PTR [rsp+8]
  54522. mov r12, QWORD PTR [rsp+16]
  54523. mov r13, QWORD PTR [rsp+24]
  54524. mov QWORD PTR [rcx], rax
  54525. mov QWORD PTR [rcx+8], rdx
  54526. mov QWORD PTR [rcx+16], r12
  54527. mov QWORD PTR [rcx+24], r13
  54528. add rsp, 32
  54529. pop r14
  54530. pop r13
  54531. pop r12
  54532. ret
  54533. sp_256_sqr_4 ENDP
  54534. _text ENDS
  54535. IFDEF HAVE_INTEL_AVX2
  54536. ; /* Square a and put result in r. (r = a * a)
  54537. ; *
  54538. ; * r Result of squaring.
  54539. ; * a Number to square in Montgomery form.
  54540. ; */
  54541. _text SEGMENT READONLY PARA
  54542. sp_256_sqr_avx2_4 PROC
  54543. push r12
  54544. push r13
  54545. push r14
  54546. push r15
  54547. push rdi
  54548. push rsi
  54549. push rbx
  54550. mov rax, rdx
  54551. xor r8, r8
  54552. mov rdx, QWORD PTR [rax]
  54553. mov rsi, QWORD PTR [rax+8]
  54554. mov rbx, QWORD PTR [rax+16]
  54555. mov r15, QWORD PTR [rax+24]
  54556. ; A[0] * A[1]
  54557. mulx r10, r9, rsi
  54558. ; A[0] * A[2]
  54559. mulx r11, r8, rbx
  54560. adox r10, r8
  54561. ; A[0] * A[3]
  54562. mulx r12, r8, r15
  54563. mov rdx, rsi
  54564. adox r11, r8
  54565. ; A[1] * A[2]
  54566. mulx rdi, r8, rbx
  54567. mov rdx, r15
  54568. adcx r11, r8
  54569. ; A[1] * A[3]
  54570. mulx r13, r8, rsi
  54571. mov r15, 0
  54572. adox r12, rdi
  54573. adcx r12, r8
  54574. ; A[2] * A[3]
  54575. mulx r14, r8, rbx
  54576. adox r13, r15
  54577. adcx r13, r8
  54578. adox r14, r15
  54579. adcx r14, r15
  54580. ; Double with Carry Flag
  54581. xor r15, r15
  54582. ; A[0] * A[0]
  54583. mov rdx, QWORD PTR [rax]
  54584. mulx rdi, r8, rdx
  54585. adcx r9, r9
  54586. adcx r10, r10
  54587. adox r9, rdi
  54588. ; A[1] * A[1]
  54589. mov rdx, QWORD PTR [rax+8]
  54590. mulx rbx, rsi, rdx
  54591. adcx r11, r11
  54592. adox r10, rsi
  54593. ; A[2] * A[2]
  54594. mov rdx, QWORD PTR [rax+16]
  54595. mulx rsi, rdi, rdx
  54596. adcx r12, r12
  54597. adox r11, rbx
  54598. adcx r13, r13
  54599. adox r12, rdi
  54600. adcx r14, r14
  54601. ; A[3] * A[3]
  54602. mov rdx, QWORD PTR [rax+24]
  54603. mulx rbx, rdi, rdx
  54604. adox r13, rsi
  54605. adcx r15, r15
  54606. adox r14, rdi
  54607. adox r15, rbx
  54608. mov QWORD PTR [rcx], r8
  54609. mov QWORD PTR [rcx+8], r9
  54610. mov QWORD PTR [rcx+16], r10
  54611. mov QWORD PTR [rcx+24], r11
  54612. mov QWORD PTR [rcx+32], r12
  54613. mov QWORD PTR [rcx+40], r13
  54614. mov QWORD PTR [rcx+48], r14
  54615. mov QWORD PTR [rcx+56], r15
  54616. pop rbx
  54617. pop rsi
  54618. pop rdi
  54619. pop r15
  54620. pop r14
  54621. pop r13
  54622. pop r12
  54623. ret
  54624. sp_256_sqr_avx2_4 ENDP
  54625. _text ENDS
  54626. ENDIF
  54627. ; /* Add b to a into r. (r = a + b)
  54628. ; *
  54629. ; * r A single precision integer.
  54630. ; * a A single precision integer.
  54631. ; * b A single precision integer.
  54632. ; */
  54633. _text SEGMENT READONLY PARA
  54634. sp_256_add_4 PROC
  54635. push r12
  54636. xor rax, rax
  54637. mov r9, QWORD PTR [rdx]
  54638. mov r10, QWORD PTR [rdx+8]
  54639. mov r11, QWORD PTR [rdx+16]
  54640. mov r12, QWORD PTR [rdx+24]
  54641. add r9, QWORD PTR [r8]
  54642. adc r10, QWORD PTR [r8+8]
  54643. adc r11, QWORD PTR [r8+16]
  54644. adc r12, QWORD PTR [r8+24]
  54645. mov QWORD PTR [rcx], r9
  54646. mov QWORD PTR [rcx+8], r10
  54647. mov QWORD PTR [rcx+16], r11
  54648. mov QWORD PTR [rcx+24], r12
  54649. adc rax, 0
  54650. pop r12
  54651. ret
  54652. sp_256_add_4 ENDP
  54653. _text ENDS
  54654. ; /* Sub b from a into r. (r = a - b)
  54655. ; *
  54656. ; * r A single precision integer.
  54657. ; * a A single precision integer.
  54658. ; * b A single precision integer.
  54659. ; */
  54660. _text SEGMENT READONLY PARA
  54661. sp_256_sub_4 PROC
  54662. push r12
  54663. xor rax, rax
  54664. mov r9, QWORD PTR [rdx]
  54665. mov r10, QWORD PTR [rdx+8]
  54666. mov r11, QWORD PTR [rdx+16]
  54667. mov r12, QWORD PTR [rdx+24]
  54668. sub r9, QWORD PTR [r8]
  54669. sbb r10, QWORD PTR [r8+8]
  54670. sbb r11, QWORD PTR [r8+16]
  54671. sbb r12, QWORD PTR [r8+24]
  54672. mov QWORD PTR [rcx], r9
  54673. mov QWORD PTR [rcx+8], r10
  54674. mov QWORD PTR [rcx+16], r11
  54675. mov QWORD PTR [rcx+24], r12
  54676. sbb rax, rax
  54677. pop r12
  54678. ret
  54679. sp_256_sub_4 ENDP
  54680. _text ENDS
  54681. ; /* Conditionally copy a into r using the mask m.
  54682. ; * m is -1 to copy and 0 when not.
  54683. ; *
  54684. ; * r A single precision number to copy over.
  54685. ; * a A single precision number to copy.
  54686. ; * m Mask value to apply.
  54687. ; */
  54688. _text SEGMENT READONLY PARA
  54689. sp_256_cond_copy_4 PROC
  54690. mov rax, QWORD PTR [rcx]
  54691. mov r9, QWORD PTR [rcx+8]
  54692. mov r10, QWORD PTR [rcx+16]
  54693. mov r11, QWORD PTR [rcx+24]
  54694. xor rax, QWORD PTR [rdx]
  54695. xor r9, QWORD PTR [rdx+8]
  54696. xor r10, QWORD PTR [rdx+16]
  54697. xor r11, QWORD PTR [rdx+24]
  54698. and rax, r8
  54699. and r9, r8
  54700. and r10, r8
  54701. and r11, r8
  54702. xor QWORD PTR [rcx], rax
  54703. xor QWORD PTR [rcx+8], r9
  54704. xor QWORD PTR [rcx+16], r10
  54705. xor QWORD PTR [rcx+24], r11
  54706. ret
  54707. sp_256_cond_copy_4 ENDP
  54708. _text ENDS
  54709. ; /* Multiply two Montgomery form numbers mod the modulus (prime).
  54710. ; * (r = a * b mod m)
  54711. ; *
  54712. ; * r Result of multiplication.
  54713. ; * a First number to multiply in Montgomery form.
  54714. ; * b Second number to multiply in Montgomery form.
  54715. ; * m Modulus (prime).
  54716. ; * mp Montgomery mulitplier.
  54717. ; */
  54718. _text SEGMENT READONLY PARA
  54719. sp_256_mont_mul_4 PROC
  54720. push r12
  54721. push r13
  54722. push r14
  54723. push r15
  54724. push rdi
  54725. push rsi
  54726. push rbx
  54727. mov r10, rdx
  54728. ; A[0] * B[0]
  54729. mov rax, QWORD PTR [r8]
  54730. mul QWORD PTR [r10]
  54731. mov r11, rax
  54732. mov r12, rdx
  54733. ; A[0] * B[1]
  54734. mov rax, QWORD PTR [r8+8]
  54735. mul QWORD PTR [r10]
  54736. xor r13, r13
  54737. add r12, rax
  54738. adc r13, rdx
  54739. ; A[1] * B[0]
  54740. mov rax, QWORD PTR [r8]
  54741. mul QWORD PTR [r10+8]
  54742. xor r14, r14
  54743. add r12, rax
  54744. adc r13, rdx
  54745. adc r14, 0
  54746. ; A[0] * B[2]
  54747. mov rax, QWORD PTR [r8+16]
  54748. mul QWORD PTR [r10]
  54749. add r13, rax
  54750. adc r14, rdx
  54751. ; A[1] * B[1]
  54752. mov rax, QWORD PTR [r8+8]
  54753. mul QWORD PTR [r10+8]
  54754. xor r15, r15
  54755. add r13, rax
  54756. adc r14, rdx
  54757. adc r15, 0
  54758. ; A[2] * B[0]
  54759. mov rax, QWORD PTR [r8]
  54760. mul QWORD PTR [r10+16]
  54761. add r13, rax
  54762. adc r14, rdx
  54763. adc r15, 0
  54764. ; A[0] * B[3]
  54765. mov rax, QWORD PTR [r8+24]
  54766. mul QWORD PTR [r10]
  54767. xor rdi, rdi
  54768. add r14, rax
  54769. adc r15, rdx
  54770. adc rdi, 0
  54771. ; A[1] * B[2]
  54772. mov rax, QWORD PTR [r8+16]
  54773. mul QWORD PTR [r10+8]
  54774. add r14, rax
  54775. adc r15, rdx
  54776. adc rdi, 0
  54777. ; A[2] * B[1]
  54778. mov rax, QWORD PTR [r8+8]
  54779. mul QWORD PTR [r10+16]
  54780. add r14, rax
  54781. adc r15, rdx
  54782. adc rdi, 0
  54783. ; A[3] * B[0]
  54784. mov rax, QWORD PTR [r8]
  54785. mul QWORD PTR [r10+24]
  54786. add r14, rax
  54787. adc r15, rdx
  54788. adc rdi, 0
  54789. ; A[1] * B[3]
  54790. mov rax, QWORD PTR [r8+24]
  54791. mul QWORD PTR [r10+8]
  54792. xor rsi, rsi
  54793. add r15, rax
  54794. adc rdi, rdx
  54795. adc rsi, 0
  54796. ; A[2] * B[2]
  54797. mov rax, QWORD PTR [r8+16]
  54798. mul QWORD PTR [r10+16]
  54799. add r15, rax
  54800. adc rdi, rdx
  54801. adc rsi, 0
  54802. ; A[3] * B[1]
  54803. mov rax, QWORD PTR [r8+8]
  54804. mul QWORD PTR [r10+24]
  54805. add r15, rax
  54806. adc rdi, rdx
  54807. adc rsi, 0
  54808. ; A[2] * B[3]
  54809. mov rax, QWORD PTR [r8+24]
  54810. mul QWORD PTR [r10+16]
  54811. xor rbx, rbx
  54812. add rdi, rax
  54813. adc rsi, rdx
  54814. adc rbx, 0
  54815. ; A[3] * B[2]
  54816. mov rax, QWORD PTR [r8+16]
  54817. mul QWORD PTR [r10+24]
  54818. add rdi, rax
  54819. adc rsi, rdx
  54820. adc rbx, 0
  54821. ; A[3] * B[3]
  54822. mov rax, QWORD PTR [r8+24]
  54823. mul QWORD PTR [r10+24]
  54824. add rsi, rax
  54825. adc rbx, rdx
  54826. ; Start Reduction
  54827. ; mu = a[0]-a[3] + a[0]-a[2] << 32 << 64 + (a[0] * 2) << 192
  54828. ; - a[0] << 32 << 192
  54829. ; a[0]-a[3] + (a[0] * 2) << 192
  54830. mov rax, r11
  54831. lea rdx, QWORD PTR [r14+2*r11]
  54832. mov r10, r12
  54833. mov r8, r13
  54834. mov r9, r13
  54835. ; a[0]-a[2] << 32
  54836. shl r11, 32
  54837. shld r9, r10, 32
  54838. shld r12, rax, 32
  54839. ; - a[0] << 32 << 192
  54840. sub rdx, r11
  54841. ; + a[0]-a[2] << 32 << 64
  54842. add r10, r11
  54843. adc r8, r12
  54844. adc rdx, r9
  54845. ; a += (mu << 256) - (mu << 224) + (mu << 192) + (mu << 96) - mu
  54846. xor r9, r9
  54847. ; a += mu << 256
  54848. add r15, rax
  54849. adc rdi, r10
  54850. adc rsi, r8
  54851. adc rbx, rdx
  54852. sbb r11, r11
  54853. ; a += mu << 192
  54854. add r14, rax
  54855. adc r15, r10
  54856. mov r12, r10
  54857. adc rdi, r8
  54858. adc rsi, rdx
  54859. adc rbx, 0
  54860. sbb r11, 0
  54861. ; mu <<= 32
  54862. shld r9, rdx, 32
  54863. shld rdx, r8, 32
  54864. shld r8, r10, 32
  54865. shld r10, rax, 32
  54866. shl rax, 32
  54867. ; a -= (mu << 32) << 192
  54868. sub r14, rax
  54869. sbb r15, r10
  54870. sbb rdi, r8
  54871. sbb rsi, rdx
  54872. sbb rbx, r9
  54873. adc r11, 0
  54874. ; a += (mu << 32) << 64
  54875. sub r12, rax
  54876. adc r13, r10
  54877. adc r14, r8
  54878. adc r15, rdx
  54879. adc rdi, r9
  54880. adc rsi, 0
  54881. adc rbx, 0
  54882. sbb r11, 0
  54883. mov r10, 18446744069414584321
  54884. mov rax, r11
  54885. ; mask m and sub from result if overflow
  54886. ; m[0] = -1 & mask = mask
  54887. shr rax, 32
  54888. ; m[2] = 0 & mask = 0
  54889. and r10, r11
  54890. sub r15, r11
  54891. sbb rdi, rax
  54892. mov QWORD PTR [rcx], r15
  54893. sbb rsi, 0
  54894. mov QWORD PTR [rcx+8], rdi
  54895. sbb rbx, r10
  54896. mov QWORD PTR [rcx+16], rsi
  54897. mov QWORD PTR [rcx+24], rbx
  54898. pop rbx
  54899. pop rsi
  54900. pop rdi
  54901. pop r15
  54902. pop r14
  54903. pop r13
  54904. pop r12
  54905. ret
  54906. sp_256_mont_mul_4 ENDP
  54907. _text ENDS
  54908. ; /* Square the Montgomery form number mod the modulus (prime). (r = a * a mod m)
  54909. ; *
  54910. ; * r Result of squaring.
  54911. ; * a Number to square in Montgomery form.
  54912. ; * m Modulus (prime).
  54913. ; * mp Montgomery mulitplier.
  54914. ; */
  54915. _text SEGMENT READONLY PARA
  54916. sp_256_mont_sqr_4 PROC
  54917. push r12
  54918. push r13
  54919. push r14
  54920. push r15
  54921. push rdi
  54922. push rsi
  54923. push rbx
  54924. mov r8, rdx
  54925. ; A[0] * A[1]
  54926. mov rax, QWORD PTR [r8]
  54927. mul QWORD PTR [r8+8]
  54928. mov r11, rax
  54929. mov r12, rdx
  54930. ; A[0] * A[2]
  54931. mov rax, QWORD PTR [r8]
  54932. mul QWORD PTR [r8+16]
  54933. xor r13, r13
  54934. add r12, rax
  54935. adc r13, rdx
  54936. ; A[0] * A[3]
  54937. mov rax, QWORD PTR [r8]
  54938. mul QWORD PTR [r8+24]
  54939. xor r14, r14
  54940. add r13, rax
  54941. adc r14, rdx
  54942. ; A[1] * A[2]
  54943. mov rax, QWORD PTR [r8+8]
  54944. mul QWORD PTR [r8+16]
  54945. xor r15, r15
  54946. add r13, rax
  54947. adc r14, rdx
  54948. adc r15, 0
  54949. ; A[1] * A[3]
  54950. mov rax, QWORD PTR [r8+8]
  54951. mul QWORD PTR [r8+24]
  54952. add r14, rax
  54953. adc r15, rdx
  54954. ; A[2] * A[3]
  54955. mov rax, QWORD PTR [r8+16]
  54956. mul QWORD PTR [r8+24]
  54957. xor rdi, rdi
  54958. add r15, rax
  54959. adc rdi, rdx
  54960. ; Double
  54961. xor rsi, rsi
  54962. add r11, r11
  54963. adc r12, r12
  54964. adc r13, r13
  54965. adc r14, r14
  54966. adc r15, r15
  54967. adc rdi, rdi
  54968. adc rsi, 0
  54969. ; A[0] * A[0]
  54970. mov rax, QWORD PTR [r8]
  54971. mul rax
  54972. mov rax, rax
  54973. mov rdx, rdx
  54974. mov r10, rax
  54975. mov rbx, rdx
  54976. ; A[1] * A[1]
  54977. mov rax, QWORD PTR [r8+8]
  54978. mul rax
  54979. mov rax, rax
  54980. mov rdx, rdx
  54981. add r11, rbx
  54982. adc r12, rax
  54983. adc rdx, 0
  54984. mov rbx, rdx
  54985. ; A[2] * A[2]
  54986. mov rax, QWORD PTR [r8+16]
  54987. mul rax
  54988. mov rax, rax
  54989. mov rdx, rdx
  54990. add r13, rbx
  54991. adc r14, rax
  54992. adc rdx, 0
  54993. mov rbx, rdx
  54994. ; A[3] * A[3]
  54995. mov rax, QWORD PTR [r8+24]
  54996. mul rax
  54997. mov rax, rax
  54998. mov rdx, rdx
  54999. add r15, rbx
  55000. adc rdi, rax
  55001. adc rsi, rdx
  55002. ; Start Reduction
  55003. ; mu = a[0]-a[3] + a[0]-a[2] << 32 << 64 + (a[0] * 2) << 192
  55004. ; - a[0] << 32 << 192
  55005. ; a[0]-a[3] + (a[0] * 2) << 192
  55006. mov rax, r10
  55007. lea rdx, QWORD PTR [r13+2*r10]
  55008. mov r8, r11
  55009. mov rbx, r12
  55010. mov r9, r12
  55011. ; a[0]-a[2] << 32
  55012. shl r10, 32
  55013. shld r9, r8, 32
  55014. shld r11, rax, 32
  55015. ; - a[0] << 32 << 192
  55016. sub rdx, r10
  55017. ; + a[0]-a[2] << 32 << 64
  55018. add r8, r10
  55019. adc rbx, r11
  55020. adc rdx, r9
  55021. ; a += (mu << 256) - (mu << 224) + (mu << 192) + (mu << 96) - mu
  55022. xor r9, r9
  55023. ; a += mu << 256
  55024. add r14, rax
  55025. adc r15, r8
  55026. adc rdi, rbx
  55027. adc rsi, rdx
  55028. sbb r10, r10
  55029. ; a += mu << 192
  55030. add r13, rax
  55031. adc r14, r8
  55032. mov r11, r8
  55033. adc r15, rbx
  55034. adc rdi, rdx
  55035. adc rsi, 0
  55036. sbb r10, 0
  55037. ; mu <<= 32
  55038. shld r9, rdx, 32
  55039. shld rdx, rbx, 32
  55040. shld rbx, r8, 32
  55041. shld r8, rax, 32
  55042. shl rax, 32
  55043. ; a -= (mu << 32) << 192
  55044. sub r13, rax
  55045. sbb r14, r8
  55046. sbb r15, rbx
  55047. sbb rdi, rdx
  55048. sbb rsi, r9
  55049. adc r10, 0
  55050. ; a += (mu << 32) << 64
  55051. sub r11, rax
  55052. adc r12, r8
  55053. adc r13, rbx
  55054. adc r14, rdx
  55055. adc r15, r9
  55056. adc rdi, 0
  55057. adc rsi, 0
  55058. sbb r10, 0
  55059. mov r8, 18446744069414584321
  55060. mov rax, r10
  55061. ; mask m and sub from result if overflow
  55062. ; m[0] = -1 & mask = mask
  55063. shr rax, 32
  55064. ; m[2] = 0 & mask = 0
  55065. and r8, r10
  55066. sub r14, r10
  55067. sbb r15, rax
  55068. mov QWORD PTR [rcx], r14
  55069. sbb rdi, 0
  55070. mov QWORD PTR [rcx+8], r15
  55071. sbb rsi, r8
  55072. mov QWORD PTR [rcx+16], rdi
  55073. mov QWORD PTR [rcx+24], rsi
  55074. pop rbx
  55075. pop rsi
  55076. pop rdi
  55077. pop r15
  55078. pop r14
  55079. pop r13
  55080. pop r12
  55081. ret
  55082. sp_256_mont_sqr_4 ENDP
  55083. _text ENDS
  55084. ; /* Compare a with b in constant time.
  55085. ; *
  55086. ; * a A single precision integer.
  55087. ; * b A single precision integer.
  55088. ; * return -ve, 0 or +ve if a is less than, equal to or greater than b
  55089. ; * respectively.
  55090. ; */
  55091. _text SEGMENT READONLY PARA
  55092. sp_256_cmp_4 PROC
  55093. push r12
  55094. xor r9, r9
  55095. mov r8, -1
  55096. mov rax, -1
  55097. mov r10, 1
  55098. mov r11, QWORD PTR [rcx+24]
  55099. mov r12, QWORD PTR [rdx+24]
  55100. and r11, r8
  55101. and r12, r8
  55102. sub r11, r12
  55103. cmova rax, r10
  55104. cmovc rax, r8
  55105. cmovnz r8, r9
  55106. mov r11, QWORD PTR [rcx+16]
  55107. mov r12, QWORD PTR [rdx+16]
  55108. and r11, r8
  55109. and r12, r8
  55110. sub r11, r12
  55111. cmova rax, r10
  55112. cmovc rax, r8
  55113. cmovnz r8, r9
  55114. mov r11, QWORD PTR [rcx+8]
  55115. mov r12, QWORD PTR [rdx+8]
  55116. and r11, r8
  55117. and r12, r8
  55118. sub r11, r12
  55119. cmova rax, r10
  55120. cmovc rax, r8
  55121. cmovnz r8, r9
  55122. mov r11, QWORD PTR [rcx]
  55123. mov r12, QWORD PTR [rdx]
  55124. and r11, r8
  55125. and r12, r8
  55126. sub r11, r12
  55127. cmova rax, r10
  55128. cmovc rax, r8
  55129. cmovnz r8, r9
  55130. xor rax, r8
  55131. pop r12
  55132. ret
  55133. sp_256_cmp_4 ENDP
  55134. _text ENDS
  55135. ; /* Conditionally subtract b from a using the mask m.
  55136. ; * m is -1 to subtract and 0 when not copying.
  55137. ; *
  55138. ; * r A single precision number representing condition subtract result.
  55139. ; * a A single precision number to subtract from.
  55140. ; * b A single precision number to subtract.
  55141. ; * m Mask value to apply.
  55142. ; */
  55143. _text SEGMENT READONLY PARA
  55144. sp_256_cond_sub_4 PROC
  55145. push r12
  55146. push r13
  55147. push r14
  55148. push r15
  55149. push rdi
  55150. push rsi
  55151. mov r14, QWORD PTR [r8]
  55152. mov r15, QWORD PTR [r8+8]
  55153. mov rdi, QWORD PTR [r8+16]
  55154. mov rsi, QWORD PTR [r8+24]
  55155. and r14, r9
  55156. and r15, r9
  55157. and rdi, r9
  55158. and rsi, r9
  55159. mov r10, QWORD PTR [rdx]
  55160. mov r11, QWORD PTR [rdx+8]
  55161. mov r12, QWORD PTR [rdx+16]
  55162. mov r13, QWORD PTR [rdx+24]
  55163. sub r10, r14
  55164. sbb r11, r15
  55165. sbb r12, rdi
  55166. sbb r13, rsi
  55167. mov QWORD PTR [rcx], r10
  55168. mov QWORD PTR [rcx+8], r11
  55169. mov QWORD PTR [rcx+16], r12
  55170. mov QWORD PTR [rcx+24], r13
  55171. sbb rax, rax
  55172. pop rsi
  55173. pop rdi
  55174. pop r15
  55175. pop r14
  55176. pop r13
  55177. pop r12
  55178. ret
  55179. sp_256_cond_sub_4 ENDP
  55180. _text ENDS
  55181. ; /* Reduce the number back to 256 bits using Montgomery reduction.
  55182. ; *
  55183. ; * a A single precision number to reduce in place.
  55184. ; * m The single precision number representing the modulus.
  55185. ; * mp The digit representing the negative inverse of m mod 2^n.
  55186. ; */
  55187. _text SEGMENT READONLY PARA
  55188. sp_256_mont_reduce_4 PROC
  55189. push rbx
  55190. push rsi
  55191. push r12
  55192. push r13
  55193. push r14
  55194. push r15
  55195. push rdi
  55196. mov r8, rcx
  55197. mov r9, QWORD PTR [r8]
  55198. mov r10, QWORD PTR [r8+8]
  55199. mov r11, QWORD PTR [r8+16]
  55200. mov r12, QWORD PTR [r8+24]
  55201. mov r13, QWORD PTR [r8+32]
  55202. mov r14, QWORD PTR [r8+40]
  55203. mov r15, QWORD PTR [r8+48]
  55204. mov rdi, QWORD PTR [r8+56]
  55205. ; Start Reduction
  55206. ; mu = a[0]-a[3] + a[0]-a[2] << 32 << 64 + (a[0] * 2) << 192
  55207. ; - a[0] << 32 << 192
  55208. ; a[0]-a[3] + (a[0] * 2) << 192
  55209. mov rax, r9
  55210. lea rdx, QWORD PTR [r12+2*r9]
  55211. mov rbx, r10
  55212. mov rcx, r11
  55213. mov rsi, r11
  55214. ; a[0]-a[2] << 32
  55215. shl r9, 32
  55216. shld rsi, rbx, 32
  55217. shld r10, rax, 32
  55218. ; - a[0] << 32 << 192
  55219. sub rdx, r9
  55220. ; + a[0]-a[2] << 32 << 64
  55221. add rbx, r9
  55222. adc rcx, r10
  55223. adc rdx, rsi
  55224. ; a += (mu << 256) - (mu << 224) + (mu << 192) + (mu << 96) - mu
  55225. xor rsi, rsi
  55226. ; a += mu << 256
  55227. add r13, rax
  55228. adc r14, rbx
  55229. adc r15, rcx
  55230. adc rdi, rdx
  55231. sbb r9, r9
  55232. ; a += mu << 192
  55233. add r12, rax
  55234. adc r13, rbx
  55235. mov r10, rbx
  55236. adc r14, rcx
  55237. adc r15, rdx
  55238. adc rdi, 0
  55239. sbb r9, 0
  55240. ; mu <<= 32
  55241. shld rsi, rdx, 32
  55242. shld rdx, rcx, 32
  55243. shld rcx, rbx, 32
  55244. shld rbx, rax, 32
  55245. shl rax, 32
  55246. ; a -= (mu << 32) << 192
  55247. sub r12, rax
  55248. sbb r13, rbx
  55249. sbb r14, rcx
  55250. sbb r15, rdx
  55251. sbb rdi, rsi
  55252. adc r9, 0
  55253. ; a += (mu << 32) << 64
  55254. sub r10, rax
  55255. adc r11, rbx
  55256. adc r12, rcx
  55257. adc r13, rdx
  55258. adc r14, rsi
  55259. adc r15, 0
  55260. adc rdi, 0
  55261. sbb r9, 0
  55262. mov rbx, 18446744069414584321
  55263. mov rax, r9
  55264. ; mask m and sub from result if overflow
  55265. ; m[0] = -1 & mask = mask
  55266. shr rax, 32
  55267. ; m[2] = 0 & mask = 0
  55268. and rbx, r9
  55269. sub r13, r9
  55270. sbb r14, rax
  55271. mov QWORD PTR [r8], r13
  55272. sbb r15, 0
  55273. mov QWORD PTR [r8+8], r14
  55274. sbb rdi, rbx
  55275. mov QWORD PTR [r8+16], r15
  55276. mov QWORD PTR [r8+24], rdi
  55277. pop rdi
  55278. pop r15
  55279. pop r14
  55280. pop r13
  55281. pop r12
  55282. pop rsi
  55283. pop rbx
  55284. ret
  55285. sp_256_mont_reduce_4 ENDP
  55286. _text ENDS
  55287. ; /* Reduce the number back to 256 bits using Montgomery reduction.
  55288. ; *
  55289. ; * a A single precision number to reduce in place.
  55290. ; * m The single precision number representing the modulus.
  55291. ; * mp The digit representing the negative inverse of m mod 2^n.
  55292. ; */
  55293. _text SEGMENT READONLY PARA
  55294. sp_256_mont_reduce_order_4 PROC
  55295. push r12
  55296. push r13
  55297. push r14
  55298. push r15
  55299. push rdi
  55300. push rsi
  55301. mov r9, rdx
  55302. ; i = 0
  55303. xor rdi, rdi
  55304. mov r10, 4
  55305. mov r15, rcx
  55306. L_mont_loop_4:
  55307. ; mu = a[i] * mp
  55308. mov r14, QWORD PTR [r15]
  55309. imul r14, r8
  55310. ; a[i+0] += m[0] * mu
  55311. mov rax, QWORD PTR [r9]
  55312. mov r12, QWORD PTR [r9+8]
  55313. mul r14
  55314. mov rsi, QWORD PTR [r15]
  55315. add rsi, rax
  55316. mov r11, rdx
  55317. mov QWORD PTR [r15], rsi
  55318. adc r11, 0
  55319. ; a[i+1] += m[1] * mu
  55320. mov rax, r12
  55321. mul r14
  55322. mov r12, QWORD PTR [r9+16]
  55323. mov rsi, QWORD PTR [r15+8]
  55324. add rax, r11
  55325. mov r13, rdx
  55326. adc r13, 0
  55327. add rsi, rax
  55328. mov QWORD PTR [r15+8], rsi
  55329. adc r13, 0
  55330. ; a[i+2] += m[2] * mu
  55331. mov rax, r12
  55332. mul r14
  55333. mov r12, QWORD PTR [r9+24]
  55334. mov rsi, QWORD PTR [r15+16]
  55335. add rax, r13
  55336. mov r11, rdx
  55337. adc r11, 0
  55338. add rsi, rax
  55339. mov QWORD PTR [r15+16], rsi
  55340. adc r11, 0
  55341. ; a[i+3] += m[3] * mu
  55342. mov rax, r12
  55343. mul r14
  55344. mov rsi, QWORD PTR [r15+24]
  55345. add rax, r11
  55346. adc rdx, rdi
  55347. mov rdi, 0
  55348. adc rdi, 0
  55349. add rsi, rax
  55350. mov QWORD PTR [r15+24], rsi
  55351. adc QWORD PTR [r15+32], rdx
  55352. adc rdi, 0
  55353. ; i += 1
  55354. add r15, 8
  55355. dec r10
  55356. jnz L_mont_loop_4
  55357. xor rax, rax
  55358. mov rdx, QWORD PTR [rcx+32]
  55359. mov r10, QWORD PTR [rcx+40]
  55360. mov rsi, QWORD PTR [rcx+48]
  55361. mov r11, QWORD PTR [rcx+56]
  55362. sub rax, rdi
  55363. mov r12, QWORD PTR [r9]
  55364. mov r13, QWORD PTR [r9+8]
  55365. mov r14, QWORD PTR [r9+16]
  55366. mov r15, QWORD PTR [r9+24]
  55367. and r12, rax
  55368. and r13, rax
  55369. and r14, rax
  55370. and r15, rax
  55371. sub rdx, r12
  55372. sbb r10, r13
  55373. sbb rsi, r14
  55374. sbb r11, r15
  55375. mov QWORD PTR [rcx], rdx
  55376. mov QWORD PTR [rcx+8], r10
  55377. mov QWORD PTR [rcx+16], rsi
  55378. mov QWORD PTR [rcx+24], r11
  55379. pop rsi
  55380. pop rdi
  55381. pop r15
  55382. pop r14
  55383. pop r13
  55384. pop r12
  55385. ret
  55386. sp_256_mont_reduce_order_4 ENDP
  55387. _text ENDS
  55388. ; /* Add two Montgomery form numbers (r = a + b % m).
  55389. ; *
  55390. ; * r Result of addition.
  55391. ; * a First number to add in Montgomery form.
  55392. ; * b Second number to add in Montgomery form.
  55393. ; * m Modulus (prime).
  55394. ; */
  55395. _text SEGMENT READONLY PARA
  55396. sp_256_mont_add_4 PROC
  55397. push r12
  55398. push r13
  55399. mov rax, QWORD PTR [rdx]
  55400. mov r9, QWORD PTR [rdx+8]
  55401. mov r10, QWORD PTR [rdx+16]
  55402. mov r11, QWORD PTR [rdx+24]
  55403. add rax, QWORD PTR [r8]
  55404. mov r12, 4294967295
  55405. adc r9, QWORD PTR [r8+8]
  55406. mov r13, 18446744069414584321
  55407. adc r10, QWORD PTR [r8+16]
  55408. adc r11, QWORD PTR [r8+24]
  55409. sbb rdx, rdx
  55410. and r12, rdx
  55411. and r13, rdx
  55412. sub rax, rdx
  55413. sbb r9, r12
  55414. sbb r10, 0
  55415. sbb r11, r13
  55416. adc rdx, 0
  55417. and r12, rdx
  55418. and r13, rdx
  55419. sub rax, rdx
  55420. sbb r9, r12
  55421. mov QWORD PTR [rcx], rax
  55422. sbb r10, 0
  55423. mov QWORD PTR [rcx+8], r9
  55424. sbb r11, r13
  55425. mov QWORD PTR [rcx+16], r10
  55426. mov QWORD PTR [rcx+24], r11
  55427. pop r13
  55428. pop r12
  55429. ret
  55430. sp_256_mont_add_4 ENDP
  55431. _text ENDS
  55432. ; /* Double a Montgomery form number (r = a + a % m).
  55433. ; *
  55434. ; * r Result of doubling.
  55435. ; * a Number to double in Montgomery form.
  55436. ; * m Modulus (prime).
  55437. ; */
  55438. _text SEGMENT READONLY PARA
  55439. sp_256_mont_dbl_4 PROC
  55440. push r12
  55441. push r13
  55442. mov rax, QWORD PTR [rdx]
  55443. mov r8, QWORD PTR [rdx+8]
  55444. mov r9, QWORD PTR [rdx+16]
  55445. mov r10, QWORD PTR [rdx+24]
  55446. add rax, rax
  55447. mov r11, 4294967295
  55448. adc r8, r8
  55449. mov r12, 18446744069414584321
  55450. adc r9, r9
  55451. adc r10, r10
  55452. sbb r13, r13
  55453. and r11, r13
  55454. and r12, r13
  55455. sub rax, r13
  55456. sbb r8, r11
  55457. sbb r9, 0
  55458. sbb r10, r12
  55459. adc r13, 0
  55460. and r11, r13
  55461. and r12, r13
  55462. sub rax, r13
  55463. sbb r8, r11
  55464. mov QWORD PTR [rcx], rax
  55465. sbb r9, 0
  55466. mov QWORD PTR [rcx+8], r8
  55467. sbb r10, r12
  55468. mov QWORD PTR [rcx+16], r9
  55469. mov QWORD PTR [rcx+24], r10
  55470. pop r13
  55471. pop r12
  55472. ret
  55473. sp_256_mont_dbl_4 ENDP
  55474. _text ENDS
  55475. ; /* Triple a Montgomery form number (r = a + a + a % m).
  55476. ; *
  55477. ; * r Result of Tripling.
  55478. ; * a Number to triple in Montgomery form.
  55479. ; * m Modulus (prime).
  55480. ; */
  55481. _text SEGMENT READONLY PARA
  55482. sp_256_mont_tpl_4 PROC
  55483. push r12
  55484. push r13
  55485. mov rax, QWORD PTR [rdx]
  55486. mov r8, QWORD PTR [rdx+8]
  55487. mov r9, QWORD PTR [rdx+16]
  55488. mov r10, QWORD PTR [rdx+24]
  55489. add rax, rax
  55490. mov r11, 4294967295
  55491. adc r8, r8
  55492. mov r12, 18446744069414584321
  55493. adc r9, r9
  55494. adc r10, r10
  55495. sbb r13, r13
  55496. and r11, r13
  55497. and r12, r13
  55498. sub rax, r13
  55499. sbb r8, r11
  55500. sbb r9, 0
  55501. sbb r10, r12
  55502. adc r13, 0
  55503. and r11, r13
  55504. and r12, r13
  55505. sub rax, r13
  55506. sbb r8, r11
  55507. sbb r9, 0
  55508. sbb r10, r12
  55509. add rax, QWORD PTR [rdx]
  55510. mov r11, 4294967295
  55511. adc r8, QWORD PTR [rdx+8]
  55512. mov r12, 18446744069414584321
  55513. adc r9, QWORD PTR [rdx+16]
  55514. adc r10, QWORD PTR [rdx+24]
  55515. sbb r13, r13
  55516. and r11, r13
  55517. and r12, r13
  55518. sub rax, r13
  55519. sbb r8, r11
  55520. sbb r9, 0
  55521. sbb r10, r12
  55522. adc r13, 0
  55523. and r11, r13
  55524. and r12, r13
  55525. sub rax, r13
  55526. sbb r8, r11
  55527. mov QWORD PTR [rcx], rax
  55528. sbb r9, 0
  55529. mov QWORD PTR [rcx+8], r8
  55530. sbb r10, r12
  55531. mov QWORD PTR [rcx+16], r9
  55532. mov QWORD PTR [rcx+24], r10
  55533. pop r13
  55534. pop r12
  55535. ret
  55536. sp_256_mont_tpl_4 ENDP
  55537. _text ENDS
  55538. ; /* Subtract two Montgomery form numbers (r = a - b % m).
  55539. ; *
  55540. ; * r Result of subtration.
  55541. ; * a Number to subtract from in Montgomery form.
  55542. ; * b Number to subtract with in Montgomery form.
  55543. ; * m Modulus (prime).
  55544. ; */
  55545. _text SEGMENT READONLY PARA
  55546. sp_256_mont_sub_4 PROC
  55547. push r12
  55548. push r13
  55549. mov rax, QWORD PTR [rdx]
  55550. mov r9, QWORD PTR [rdx+8]
  55551. mov r10, QWORD PTR [rdx+16]
  55552. mov r11, QWORD PTR [rdx+24]
  55553. sub rax, QWORD PTR [r8]
  55554. mov r12, 4294967295
  55555. sbb r9, QWORD PTR [r8+8]
  55556. mov r13, 18446744069414584321
  55557. sbb r10, QWORD PTR [r8+16]
  55558. sbb r11, QWORD PTR [r8+24]
  55559. sbb rdx, rdx
  55560. and r12, rdx
  55561. and r13, rdx
  55562. add rax, rdx
  55563. adc r9, r12
  55564. adc r10, 0
  55565. adc r11, r13
  55566. adc rdx, 0
  55567. and r12, rdx
  55568. and r13, rdx
  55569. add rax, rdx
  55570. adc r9, r12
  55571. mov QWORD PTR [rcx], rax
  55572. adc r10, 0
  55573. mov QWORD PTR [rcx+8], r9
  55574. adc r11, r13
  55575. mov QWORD PTR [rcx+16], r10
  55576. mov QWORD PTR [rcx+24], r11
  55577. pop r13
  55578. pop r12
  55579. ret
  55580. sp_256_mont_sub_4 ENDP
  55581. _text ENDS
  55582. ; /* Subtract two Montgomery form numbers (r = a - b % m).
  55583. ; *
  55584. ; * b is less than the modulus.
  55585. ; *
  55586. ; * r Result of subtration.
  55587. ; * a Number to subtract from in Montgomery form.
  55588. ; * b Number to subtract with in Montgomery form.
  55589. ; * m Modulus (prime).
  55590. ; */
  55591. _text SEGMENT READONLY PARA
  55592. sp_256_mont_sub_lower_4 PROC
  55593. push r12
  55594. push r13
  55595. mov rax, QWORD PTR [rdx]
  55596. mov r9, QWORD PTR [rdx+8]
  55597. mov r10, QWORD PTR [rdx+16]
  55598. mov r11, QWORD PTR [rdx+24]
  55599. sub rax, QWORD PTR [r8]
  55600. mov r12, 4294967295
  55601. sbb r9, QWORD PTR [r8+8]
  55602. mov r13, 18446744069414584321
  55603. sbb r10, QWORD PTR [r8+16]
  55604. sbb r11, QWORD PTR [r8+24]
  55605. sbb rdx, rdx
  55606. and r12, rdx
  55607. and r13, rdx
  55608. add rax, rdx
  55609. adc r9, r12
  55610. mov QWORD PTR [rcx], rax
  55611. adc r10, 0
  55612. mov QWORD PTR [rcx+8], r9
  55613. adc r11, r13
  55614. mov QWORD PTR [rcx+16], r10
  55615. mov QWORD PTR [rcx+24], r11
  55616. pop r13
  55617. pop r12
  55618. ret
  55619. sp_256_mont_sub_lower_4 ENDP
  55620. _text ENDS
  55621. ; /* Divide the number by 2 mod the modulus (prime). (r = a / 2 % m)
  55622. ; *
  55623. ; * r Result of division by 2.
  55624. ; * a Number to divide.
  55625. ; * m Modulus (prime).
  55626. ; */
  55627. _text SEGMENT READONLY PARA
  55628. sp_256_div2_4 PROC
  55629. push r12
  55630. push r13
  55631. mov rax, QWORD PTR [rdx]
  55632. mov r8, QWORD PTR [rdx+8]
  55633. mov r9, QWORD PTR [rdx+16]
  55634. mov r10, QWORD PTR [rdx+24]
  55635. mov r11, 4294967295
  55636. mov r12, 18446744069414584321
  55637. mov r13, rax
  55638. and r13, 1
  55639. neg r13
  55640. and r11, r13
  55641. and r12, r13
  55642. add rax, r13
  55643. adc r8, r11
  55644. adc r9, 0
  55645. adc r10, r12
  55646. mov r13, 0
  55647. adc r13, 0
  55648. shrd rax, r8, 1
  55649. shrd r8, r9, 1
  55650. shrd r9, r10, 1
  55651. shrd r10, r13, 1
  55652. mov QWORD PTR [rcx], rax
  55653. mov QWORD PTR [rcx+8], r8
  55654. mov QWORD PTR [rcx+16], r9
  55655. mov QWORD PTR [rcx+24], r10
  55656. pop r13
  55657. pop r12
  55658. ret
  55659. sp_256_div2_4 ENDP
  55660. _text ENDS
  55661. ; /* Triple a Montgomery form number (r = a + a + a % m).
  55662. ; *
  55663. ; * a is less than m.
  55664. ; *
  55665. ; * r Result of Tripling.
  55666. ; * a Number to triple in Montgomery form.
  55667. ; * m Modulus (prime).
  55668. ; */
  55669. _text SEGMENT READONLY PARA
  55670. sp_256_mont_tpl_lower_4 PROC
  55671. push r12
  55672. push r13
  55673. mov rax, QWORD PTR [rdx]
  55674. mov r8, QWORD PTR [rdx+8]
  55675. mov r9, QWORD PTR [rdx+16]
  55676. mov r10, QWORD PTR [rdx+24]
  55677. add rax, rax
  55678. mov r11, 4294967295
  55679. adc r8, r8
  55680. mov r12, 18446744069414584321
  55681. adc r9, r9
  55682. adc r10, r10
  55683. sbb r13, r13
  55684. and r11, r13
  55685. and r12, r13
  55686. sub rax, r13
  55687. sbb r8, r11
  55688. sbb r9, 0
  55689. sbb r10, r12
  55690. add rax, QWORD PTR [rdx]
  55691. mov r11, 4294967295
  55692. adc r8, QWORD PTR [rdx+8]
  55693. mov r12, 18446744069414584321
  55694. adc r9, QWORD PTR [rdx+16]
  55695. adc r10, QWORD PTR [rdx+24]
  55696. sbb r13, r13
  55697. and r11, r13
  55698. and r12, r13
  55699. sub rax, r13
  55700. sbb r8, r11
  55701. sbb r9, 0
  55702. sbb r10, r12
  55703. adc r13, 0
  55704. and r11, r13
  55705. and r12, r13
  55706. sub rax, r13
  55707. sbb r8, r11
  55708. mov QWORD PTR [rcx], rax
  55709. sbb r9, 0
  55710. mov QWORD PTR [rcx+8], r8
  55711. sbb r10, r12
  55712. mov QWORD PTR [rcx+16], r9
  55713. mov QWORD PTR [rcx+24], r10
  55714. pop r13
  55715. pop r12
  55716. ret
  55717. sp_256_mont_tpl_lower_4 ENDP
  55718. _text ENDS
  55719. ; /* Two Montgomery numbers, subtract double second from first (r = a - 2.b % m).
  55720. ; *
  55721. ; * r Result of subtration.
  55722. ; * a Number to subtract from in Montgomery form.
  55723. ; * b Number to double and subtract with in Montgomery form.
  55724. ; * m Modulus (prime).
  55725. ; */
  55726. _text SEGMENT READONLY PARA
  55727. sp_256_mont_sub_dbl_4 PROC
  55728. push r12
  55729. push r13
  55730. push r14
  55731. push r15
  55732. push rdi
  55733. push rsi
  55734. mov rax, QWORD PTR [rdx]
  55735. mov r9, QWORD PTR [rdx+8]
  55736. mov r10, QWORD PTR [rdx+16]
  55737. mov r11, QWORD PTR [rdx+24]
  55738. mov r12, QWORD PTR [r8]
  55739. mov r13, QWORD PTR [r8+8]
  55740. mov r14, QWORD PTR [r8+16]
  55741. mov r15, QWORD PTR [r8+24]
  55742. add r12, r12
  55743. mov rdi, 4294967295
  55744. adc r13, r13
  55745. mov rsi, 18446744069414584321
  55746. adc r14, r14
  55747. adc r15, r15
  55748. sbb r8, r8
  55749. and rdi, r8
  55750. and rsi, r8
  55751. sub r12, r8
  55752. sbb r13, rdi
  55753. sbb r14, 0
  55754. sbb r15, rsi
  55755. adc r8, 0
  55756. and rdi, r8
  55757. and rsi, r8
  55758. sub r12, r8
  55759. sbb r13, rdi
  55760. sbb r14, 0
  55761. sbb r15, rsi
  55762. sub rax, r12
  55763. mov rdi, 4294967295
  55764. sbb r9, r13
  55765. mov rsi, 18446744069414584321
  55766. sbb r10, r14
  55767. sbb r11, r15
  55768. sbb r8, r8
  55769. and rdi, r8
  55770. and rsi, r8
  55771. add rax, r8
  55772. adc r9, rdi
  55773. adc r10, 0
  55774. adc r11, rsi
  55775. adc r8, 0
  55776. and rdi, r8
  55777. and rsi, r8
  55778. add rax, r8
  55779. adc r9, rdi
  55780. mov QWORD PTR [rcx], rax
  55781. adc r10, 0
  55782. mov QWORD PTR [rcx+8], r9
  55783. adc r11, rsi
  55784. mov QWORD PTR [rcx+16], r10
  55785. mov QWORD PTR [rcx+24], r11
  55786. pop rsi
  55787. pop rdi
  55788. pop r15
  55789. pop r14
  55790. pop r13
  55791. pop r12
  55792. ret
  55793. sp_256_mont_sub_dbl_4 ENDP
  55794. _text ENDS
  55795. ; /* Two Montgomery numbers, subtract second from first and double.
  55796. ; * (r = 2.(a - b) % m).
  55797. ; *
  55798. ; * b must have came from a mont_sub operation.
  55799. ; *
  55800. ; * r Result of subtration.
  55801. ; * a Number to subtract from in Montgomery form.
  55802. ; * b Number to subtract with in Montgomery form.
  55803. ; * m Modulus (prime).
  55804. ; */
  55805. _text SEGMENT READONLY PARA
  55806. sp_256_mont_dbl_sub_4 PROC
  55807. push r12
  55808. push r13
  55809. mov rax, QWORD PTR [rdx]
  55810. mov r9, QWORD PTR [rdx+8]
  55811. mov r10, QWORD PTR [rdx+16]
  55812. mov r11, QWORD PTR [rdx+24]
  55813. sub rax, QWORD PTR [r8]
  55814. mov r12, 4294967295
  55815. sbb r9, QWORD PTR [r8+8]
  55816. mov r13, 18446744069414584321
  55817. sbb r10, QWORD PTR [r8+16]
  55818. sbb r11, QWORD PTR [r8+24]
  55819. sbb r8, r8
  55820. and r12, r8
  55821. and r13, r8
  55822. add rax, r8
  55823. adc r9, r12
  55824. adc r10, 0
  55825. adc r11, r13
  55826. add rax, rax
  55827. mov r12, 4294967295
  55828. adc r9, r9
  55829. mov r13, 18446744069414584321
  55830. adc r10, r10
  55831. adc r11, r11
  55832. sbb r8, r8
  55833. and r12, r8
  55834. and r13, r8
  55835. sub rax, r8
  55836. sbb r9, r12
  55837. mov QWORD PTR [rcx], rax
  55838. sbb r10, 0
  55839. mov QWORD PTR [rcx+8], r9
  55840. sbb r11, r13
  55841. mov QWORD PTR [rcx+16], r10
  55842. mov QWORD PTR [rcx+24], r11
  55843. pop r13
  55844. pop r12
  55845. ret
  55846. sp_256_mont_dbl_sub_4 ENDP
  55847. _text ENDS
  55848. IFNDEF WC_NO_CACHE_RESISTANT
  55849. ; /* Touch each possible point that could be being copied.
  55850. ; *
  55851. ; * r Point to copy into.
  55852. ; * table Table - start of the entires to access
  55853. ; * idx Index of point to retrieve.
  55854. ; */
  55855. _text SEGMENT READONLY PARA
  55856. sp_256_get_point_33_4 PROC
  55857. sub rsp, 160
  55858. vmovdqu OWORD PTR [rsp], xmm6
  55859. vmovdqu OWORD PTR [rsp+16], xmm7
  55860. vmovdqu OWORD PTR [rsp+32], xmm8
  55861. vmovdqu OWORD PTR [rsp+48], xmm9
  55862. vmovdqu OWORD PTR [rsp+64], xmm10
  55863. vmovdqu OWORD PTR [rsp+80], xmm11
  55864. vmovdqu OWORD PTR [rsp+96], xmm12
  55865. vmovdqu OWORD PTR [rsp+112], xmm13
  55866. vmovdqu OWORD PTR [rsp+128], xmm14
  55867. vmovdqu OWORD PTR [rsp+144], xmm15
  55868. mov rax, 1
  55869. movd xmm13, r8d
  55870. add rdx, 200
  55871. movd xmm15, eax
  55872. mov rax, 32
  55873. pshufd xmm15, xmm15, 0
  55874. pshufd xmm13, xmm13, 0
  55875. pxor xmm14, xmm14
  55876. pxor xmm0, xmm0
  55877. pxor xmm1, xmm1
  55878. pxor xmm2, xmm2
  55879. pxor xmm3, xmm3
  55880. pxor xmm4, xmm4
  55881. pxor xmm5, xmm5
  55882. movdqa xmm14, xmm15
  55883. L_256_get_point_33_4_start_1:
  55884. movdqa xmm12, xmm14
  55885. paddd xmm14, xmm15
  55886. pcmpeqd xmm12, xmm13
  55887. movdqu xmm6, [rdx]
  55888. movdqu xmm7, [rdx+16]
  55889. movdqu xmm8, [rdx+64]
  55890. movdqu xmm9, [rdx+80]
  55891. movdqu xmm10, [rdx+128]
  55892. movdqu xmm11, [rdx+144]
  55893. add rdx, 200
  55894. pand xmm6, xmm12
  55895. pand xmm7, xmm12
  55896. pand xmm8, xmm12
  55897. pand xmm9, xmm12
  55898. pand xmm10, xmm12
  55899. pand xmm11, xmm12
  55900. por xmm0, xmm6
  55901. por xmm1, xmm7
  55902. por xmm2, xmm8
  55903. por xmm3, xmm9
  55904. por xmm4, xmm10
  55905. por xmm5, xmm11
  55906. dec rax
  55907. jnz L_256_get_point_33_4_start_1
  55908. movdqu [rcx], xmm0
  55909. movdqu [rcx+16], xmm1
  55910. movdqu [rcx+64], xmm2
  55911. movdqu [rcx+80], xmm3
  55912. movdqu [rcx+128], xmm4
  55913. movdqu [rcx+144], xmm5
  55914. vmovdqu xmm6, OWORD PTR [rsp]
  55915. vmovdqu xmm7, OWORD PTR [rsp+16]
  55916. vmovdqu xmm8, OWORD PTR [rsp+32]
  55917. vmovdqu xmm9, OWORD PTR [rsp+48]
  55918. vmovdqu xmm10, OWORD PTR [rsp+64]
  55919. vmovdqu xmm11, OWORD PTR [rsp+80]
  55920. vmovdqu xmm12, OWORD PTR [rsp+96]
  55921. vmovdqu xmm13, OWORD PTR [rsp+112]
  55922. vmovdqu xmm14, OWORD PTR [rsp+128]
  55923. vmovdqu xmm15, OWORD PTR [rsp+144]
  55924. add rsp, 160
  55925. ret
  55926. sp_256_get_point_33_4 ENDP
  55927. _text ENDS
  55928. IFDEF HAVE_INTEL_AVX2
  55929. ; /* Touch each possible point that could be being copied.
  55930. ; *
  55931. ; * r Point to copy into.
  55932. ; * table Table - start of the entires to access
  55933. ; * idx Index of point to retrieve.
  55934. ; */
  55935. _text SEGMENT READONLY PARA
  55936. sp_256_get_point_33_avx2_4 PROC
  55937. sub rsp, 64
  55938. vmovdqu OWORD PTR [rsp], xmm6
  55939. vmovdqu OWORD PTR [rsp+16], xmm7
  55940. vmovdqu OWORD PTR [rsp+32], xmm8
  55941. vmovdqu OWORD PTR [rsp+48], xmm9
  55942. mov rax, 1
  55943. movd xmm7, r8d
  55944. add rdx, 200
  55945. movd xmm9, eax
  55946. mov rax, 32
  55947. vpxor ymm8, ymm8, ymm8
  55948. vpermd ymm7, ymm8, ymm7
  55949. vpermd ymm9, ymm8, ymm9
  55950. vpxor ymm0, ymm0, ymm0
  55951. vpxor ymm1, ymm1, ymm1
  55952. vpxor ymm2, ymm2, ymm2
  55953. vmovdqa ymm8, ymm9
  55954. L_256_get_point_33_avx2_4_start:
  55955. vpcmpeqd ymm6, ymm8, ymm7
  55956. vpaddd ymm8, ymm8, ymm9
  55957. vmovupd ymm3, YMMWORD PTR [rdx]
  55958. vmovupd ymm4, YMMWORD PTR [rdx+64]
  55959. vmovupd ymm5, YMMWORD PTR [rdx+128]
  55960. add rdx, 200
  55961. vpand ymm3, ymm3, ymm6
  55962. vpand ymm4, ymm4, ymm6
  55963. vpand ymm5, ymm5, ymm6
  55964. vpor ymm0, ymm0, ymm3
  55965. vpor ymm1, ymm1, ymm4
  55966. vpor ymm2, ymm2, ymm5
  55967. dec rax
  55968. jnz L_256_get_point_33_avx2_4_start
  55969. vmovupd YMMWORD PTR [rcx], ymm0
  55970. vmovupd YMMWORD PTR [rcx+64], ymm1
  55971. vmovupd YMMWORD PTR [rcx+128], ymm2
  55972. vmovdqu xmm6, OWORD PTR [rsp]
  55973. vmovdqu xmm7, OWORD PTR [rsp+16]
  55974. vmovdqu xmm8, OWORD PTR [rsp+32]
  55975. vmovdqu xmm9, OWORD PTR [rsp+48]
  55976. add rsp, 64
  55977. ret
  55978. sp_256_get_point_33_avx2_4 ENDP
  55979. _text ENDS
  55980. ENDIF
  55981. ENDIF
  55982. IFDEF HAVE_INTEL_AVX2
  55983. ; /* Multiply two Montgomery form numbers mod the modulus (prime).
  55984. ; * (r = a * b mod m)
  55985. ; *
  55986. ; * r Result of multiplication.
  55987. ; * a First number to multiply in Montgomery form.
  55988. ; * b Second number to multiply in Montgomery form.
  55989. ; * m Modulus (prime).
  55990. ; * mp Montgomery mulitplier.
  55991. ; */
  55992. _text SEGMENT READONLY PARA
  55993. sp_256_mont_mul_avx2_4 PROC
  55994. push rbp
  55995. push r12
  55996. push r13
  55997. push r14
  55998. push r15
  55999. push rdi
  56000. push rsi
  56001. push rbx
  56002. mov rbp, r8
  56003. mov rax, rdx
  56004. mov rdx, QWORD PTR [rax]
  56005. ; A[0] * B[0]
  56006. mulx r9, r8, QWORD PTR [rbp]
  56007. xor rbx, rbx
  56008. ; A[0] * B[1]
  56009. mulx r10, rdi, QWORD PTR [rbp+8]
  56010. adcx r9, rdi
  56011. ; A[0] * B[2]
  56012. mulx r11, rdi, QWORD PTR [rbp+16]
  56013. adcx r10, rdi
  56014. ; A[0] * B[3]
  56015. mulx r12, rdi, QWORD PTR [rbp+24]
  56016. adcx r11, rdi
  56017. mov rdx, QWORD PTR [rax+8]
  56018. adcx r12, rbx
  56019. ; A[1] * B[0]
  56020. mulx rsi, rdi, QWORD PTR [rbp]
  56021. xor rbx, rbx
  56022. adcx r9, rdi
  56023. ; A[1] * B[1]
  56024. mulx r15, rdi, QWORD PTR [rbp+8]
  56025. adox r10, rsi
  56026. adcx r10, rdi
  56027. ; A[1] * B[2]
  56028. mulx rsi, rdi, QWORD PTR [rbp+16]
  56029. adox r11, r15
  56030. adcx r11, rdi
  56031. ; A[1] * B[3]
  56032. mulx r13, rdi, QWORD PTR [rbp+24]
  56033. adox r12, rsi
  56034. adcx r12, rdi
  56035. adox r13, rbx
  56036. mov rdx, QWORD PTR [rax+16]
  56037. adcx r13, rbx
  56038. ; A[2] * B[0]
  56039. mulx rsi, rdi, QWORD PTR [rbp]
  56040. xor rbx, rbx
  56041. adcx r10, rdi
  56042. ; A[2] * B[1]
  56043. mulx r15, rdi, QWORD PTR [rbp+8]
  56044. adox r11, rsi
  56045. adcx r11, rdi
  56046. ; A[2] * B[2]
  56047. mulx rsi, rdi, QWORD PTR [rbp+16]
  56048. adox r12, r15
  56049. adcx r12, rdi
  56050. ; A[2] * B[3]
  56051. mulx r14, rdi, QWORD PTR [rbp+24]
  56052. adox r13, rsi
  56053. adcx r13, rdi
  56054. adox r14, rbx
  56055. mov rdx, QWORD PTR [rax+24]
  56056. adcx r14, rbx
  56057. ; A[3] * B[0]
  56058. mulx rsi, rdi, QWORD PTR [rbp]
  56059. xor rbx, rbx
  56060. adcx r11, rdi
  56061. ; A[3] * B[1]
  56062. mulx r15, rdi, QWORD PTR [rbp+8]
  56063. adox r12, rsi
  56064. adcx r12, rdi
  56065. ; A[3] * B[2]
  56066. mulx rsi, rdi, QWORD PTR [rbp+16]
  56067. adox r13, r15
  56068. adcx r13, rdi
  56069. ; A[3] * B[3]
  56070. mulx r15, rdi, QWORD PTR [rbp+24]
  56071. adox r14, rsi
  56072. adcx r14, rdi
  56073. adox r15, rbx
  56074. adcx r15, rbx
  56075. ; Start Reduction
  56076. ; mu = a[0]-a[3] + a[0]-a[2] << 32 << 64 + (a[0] * 2) << 192
  56077. ; - a[0] << 32 << 192
  56078. ; a[0]-a[3] + (a[0] * 2) << 192
  56079. mov rdi, r8
  56080. lea rdx, QWORD PTR [r11+2*r8]
  56081. mov rax, r9
  56082. mov rbp, r10
  56083. mov rsi, r10
  56084. ; a[0]-a[2] << 32
  56085. shl r8, 32
  56086. shld rsi, rax, 32
  56087. shld r9, rdi, 32
  56088. ; - a[0] << 32 << 192
  56089. sub rdx, r8
  56090. ; + a[0]-a[2] << 32 << 64
  56091. add rax, r8
  56092. adc rbp, r9
  56093. adc rdx, rsi
  56094. ; a += (mu << 256) - (mu << 224) + (mu << 192) + (mu << 96) - mu
  56095. xor rsi, rsi
  56096. ; a += mu << 256
  56097. add r12, rdi
  56098. adc r13, rax
  56099. adc r14, rbp
  56100. adc r15, rdx
  56101. sbb r8, r8
  56102. ; a += mu << 192
  56103. add r11, rdi
  56104. adc r12, rax
  56105. mov r9, rax
  56106. adc r13, rbp
  56107. adc r14, rdx
  56108. adc r15, 0
  56109. sbb r8, 0
  56110. ; mu <<= 32
  56111. shld rsi, rdx, 32
  56112. shld rdx, rbp, 32
  56113. shld rbp, rax, 32
  56114. shld rax, rdi, 32
  56115. shl rdi, 32
  56116. ; a -= (mu << 32) << 192
  56117. sub r11, rdi
  56118. sbb r12, rax
  56119. sbb r13, rbp
  56120. sbb r14, rdx
  56121. sbb r15, rsi
  56122. adc r8, 0
  56123. ; a += (mu << 32) << 64
  56124. sub r9, rdi
  56125. adc r10, rax
  56126. adc r11, rbp
  56127. adc r12, rdx
  56128. adc r13, rsi
  56129. adc r14, 0
  56130. adc r15, 0
  56131. sbb r8, 0
  56132. mov rax, 18446744069414584321
  56133. mov rdi, r8
  56134. ; mask m and sub from result if overflow
  56135. ; m[0] = -1 & mask = mask
  56136. shr rdi, 32
  56137. ; m[2] = 0 & mask = 0
  56138. and rax, r8
  56139. sub r12, r8
  56140. sbb r13, rdi
  56141. mov QWORD PTR [rcx], r12
  56142. sbb r14, 0
  56143. mov QWORD PTR [rcx+8], r13
  56144. sbb r15, rax
  56145. mov QWORD PTR [rcx+16], r14
  56146. mov QWORD PTR [rcx+24], r15
  56147. pop rbx
  56148. pop rsi
  56149. pop rdi
  56150. pop r15
  56151. pop r14
  56152. pop r13
  56153. pop r12
  56154. pop rbp
  56155. ret
  56156. sp_256_mont_mul_avx2_4 ENDP
  56157. _text ENDS
  56158. ENDIF
  56159. IFDEF HAVE_INTEL_AVX2
  56160. ; /* Square the Montgomery form number mod the modulus (prime). (r = a * a mod m)
  56161. ; *
  56162. ; * r Result of squaring.
  56163. ; * a Number to square in Montgomery form.
  56164. ; * m Modulus (prime).
  56165. ; * mp Montgomery mulitplier.
  56166. ; */
  56167. _text SEGMENT READONLY PARA
  56168. sp_256_mont_sqr_avx2_4 PROC
  56169. push r12
  56170. push r13
  56171. push r14
  56172. push r15
  56173. push rdi
  56174. push rsi
  56175. push rbx
  56176. mov rax, rdx
  56177. xor r8, r8
  56178. mov rdx, QWORD PTR [rax]
  56179. mov rsi, QWORD PTR [rax+8]
  56180. mov rbx, QWORD PTR [rax+16]
  56181. mov r15, QWORD PTR [rax+24]
  56182. ; A[0] * A[1]
  56183. mulx r10, r9, rsi
  56184. ; A[0] * A[2]
  56185. mulx r11, r8, rbx
  56186. adox r10, r8
  56187. ; A[0] * A[3]
  56188. mulx r12, r8, r15
  56189. mov rdx, rsi
  56190. adox r11, r8
  56191. ; A[1] * A[2]
  56192. mulx rdi, r8, rbx
  56193. mov rdx, r15
  56194. adcx r11, r8
  56195. ; A[1] * A[3]
  56196. mulx r13, r8, rsi
  56197. mov r15, 0
  56198. adox r12, rdi
  56199. adcx r12, r8
  56200. ; A[2] * A[3]
  56201. mulx r14, r8, rbx
  56202. adox r13, r15
  56203. adcx r13, r8
  56204. adox r14, r15
  56205. adcx r14, r15
  56206. ; Double with Carry Flag
  56207. xor r15, r15
  56208. ; A[0] * A[0]
  56209. mov rdx, QWORD PTR [rax]
  56210. mulx rdi, r8, rdx
  56211. adcx r9, r9
  56212. adcx r10, r10
  56213. adox r9, rdi
  56214. ; A[1] * A[1]
  56215. mov rdx, QWORD PTR [rax+8]
  56216. mulx rbx, rsi, rdx
  56217. adcx r11, r11
  56218. adox r10, rsi
  56219. ; A[2] * A[2]
  56220. mov rdx, QWORD PTR [rax+16]
  56221. mulx rsi, rdi, rdx
  56222. adcx r12, r12
  56223. adox r11, rbx
  56224. adcx r13, r13
  56225. adox r12, rdi
  56226. adcx r14, r14
  56227. ; A[3] * A[3]
  56228. mov rdx, QWORD PTR [rax+24]
  56229. mulx rbx, rdi, rdx
  56230. adox r13, rsi
  56231. adcx r15, r15
  56232. adox r14, rdi
  56233. adox r15, rbx
  56234. ; Start Reduction
  56235. ; mu = a[0]-a[3] + a[0]-a[2] << 32 << 64 + (a[0] * 2) << 192
  56236. ; - a[0] << 32 << 192
  56237. ; a[0]-a[3] + (a[0] * 2) << 192
  56238. mov rdi, r8
  56239. lea rdx, QWORD PTR [r11+2*r8]
  56240. mov rax, r9
  56241. mov rsi, r10
  56242. mov rbx, r10
  56243. ; a[0]-a[2] << 32
  56244. shl r8, 32
  56245. shld rbx, rax, 32
  56246. shld r9, rdi, 32
  56247. ; - a[0] << 32 << 192
  56248. sub rdx, r8
  56249. ; + a[0]-a[2] << 32 << 64
  56250. add rax, r8
  56251. adc rsi, r9
  56252. adc rdx, rbx
  56253. ; a += (mu << 256) - (mu << 224) + (mu << 192) + (mu << 96) - mu
  56254. xor rbx, rbx
  56255. ; a += mu << 256
  56256. add r12, rdi
  56257. adc r13, rax
  56258. adc r14, rsi
  56259. adc r15, rdx
  56260. sbb r8, r8
  56261. ; a += mu << 192
  56262. add r11, rdi
  56263. adc r12, rax
  56264. mov r9, rax
  56265. adc r13, rsi
  56266. adc r14, rdx
  56267. adc r15, 0
  56268. sbb r8, 0
  56269. ; mu <<= 32
  56270. shld rbx, rdx, 32
  56271. shld rdx, rsi, 32
  56272. shld rsi, rax, 32
  56273. shld rax, rdi, 32
  56274. shl rdi, 32
  56275. ; a -= (mu << 32) << 192
  56276. sub r11, rdi
  56277. sbb r12, rax
  56278. sbb r13, rsi
  56279. sbb r14, rdx
  56280. sbb r15, rbx
  56281. adc r8, 0
  56282. ; a += (mu << 32) << 64
  56283. sub r9, rdi
  56284. adc r10, rax
  56285. adc r11, rsi
  56286. adc r12, rdx
  56287. adc r13, rbx
  56288. adc r14, 0
  56289. adc r15, 0
  56290. sbb r8, 0
  56291. mov rax, 18446744069414584321
  56292. mov rdi, r8
  56293. ; mask m and sub from result if overflow
  56294. ; m[0] = -1 & mask = mask
  56295. shr rdi, 32
  56296. ; m[2] = 0 & mask = 0
  56297. and rax, r8
  56298. sub r12, r8
  56299. sbb r13, rdi
  56300. mov QWORD PTR [rcx], r12
  56301. sbb r14, 0
  56302. mov QWORD PTR [rcx+8], r13
  56303. sbb r15, rax
  56304. mov QWORD PTR [rcx+16], r14
  56305. mov QWORD PTR [rcx+24], r15
  56306. pop rbx
  56307. pop rsi
  56308. pop rdi
  56309. pop r15
  56310. pop r14
  56311. pop r13
  56312. pop r12
  56313. ret
  56314. sp_256_mont_sqr_avx2_4 ENDP
  56315. _text ENDS
  56316. ENDIF
  56317. IFDEF HAVE_INTEL_AVX2
  56318. ; /* Conditionally subtract b from a using the mask m.
  56319. ; * m is -1 to subtract and 0 when not copying.
  56320. ; *
  56321. ; * r A single precision number representing condition subtract result.
  56322. ; * a A single precision number to subtract from.
  56323. ; * b A single precision number to subtract.
  56324. ; * m Mask value to apply.
  56325. ; */
  56326. _text SEGMENT READONLY PARA
  56327. sp_256_cond_sub_avx2_4 PROC
  56328. push r12
  56329. push r13
  56330. push r14
  56331. push r15
  56332. push rdi
  56333. push rsi
  56334. mov r14, QWORD PTR [r8]
  56335. mov r15, QWORD PTR [r8+8]
  56336. mov rdi, QWORD PTR [r8+16]
  56337. mov rsi, QWORD PTR [r8+24]
  56338. and r14, r9
  56339. and r15, r9
  56340. and rdi, r9
  56341. and rsi, r9
  56342. mov r10, QWORD PTR [rdx]
  56343. mov r11, QWORD PTR [rdx+8]
  56344. mov r12, QWORD PTR [rdx+16]
  56345. mov r13, QWORD PTR [rdx+24]
  56346. sub r10, r14
  56347. sbb r11, r15
  56348. sbb r12, rdi
  56349. sbb r13, rsi
  56350. mov QWORD PTR [rcx], r10
  56351. mov QWORD PTR [rcx+8], r11
  56352. mov QWORD PTR [rcx+16], r12
  56353. mov QWORD PTR [rcx+24], r13
  56354. sbb rax, rax
  56355. pop rsi
  56356. pop rdi
  56357. pop r15
  56358. pop r14
  56359. pop r13
  56360. pop r12
  56361. ret
  56362. sp_256_cond_sub_avx2_4 ENDP
  56363. _text ENDS
  56364. ENDIF
  56365. IFDEF HAVE_INTEL_AVX2
  56366. ; /* Reduce the number back to 256 bits using Montgomery reduction.
  56367. ; *
  56368. ; * a A single precision number to reduce in place.
  56369. ; * m The single precision number representing the modulus.
  56370. ; * mp The digit representing the negative inverse of m mod 2^n.
  56371. ; */
  56372. _text SEGMENT READONLY PARA
  56373. sp_256_mont_reduce_avx2_order_4 PROC
  56374. push r12
  56375. push r13
  56376. push r14
  56377. push r15
  56378. push rdi
  56379. push rsi
  56380. push rbx
  56381. mov rax, rcx
  56382. mov r10, rdx
  56383. mov r11, r8
  56384. mov r14, QWORD PTR [rax]
  56385. mov r15, QWORD PTR [rax+8]
  56386. mov rdi, QWORD PTR [rax+16]
  56387. mov rsi, QWORD PTR [rax+24]
  56388. xor r13, r13
  56389. xor r12, r12
  56390. ; a[0-4] += m[0-3] * mu = m[0-3] * (a[0] * mp)
  56391. mov rbx, QWORD PTR [rax+32]
  56392. ; mu = a[0] * mp
  56393. mov rdx, r14
  56394. mulx rcx, rdx, r11
  56395. ; a[0] += m[0] * mu
  56396. mulx r9, r8, QWORD PTR [r10]
  56397. adcx r14, r8
  56398. ; a[1] += m[1] * mu
  56399. mulx rcx, r8, QWORD PTR [r10+8]
  56400. adox r15, r9
  56401. adcx r15, r8
  56402. ; a[2] += m[2] * mu
  56403. mulx r9, r8, QWORD PTR [r10+16]
  56404. adox rdi, rcx
  56405. adcx rdi, r8
  56406. ; a[3] += m[3] * mu
  56407. mulx rcx, r8, QWORD PTR [r10+24]
  56408. adox rsi, r9
  56409. adcx rsi, r8
  56410. ; a[4] += carry
  56411. adox rbx, rcx
  56412. adcx rbx, r12
  56413. ; carry
  56414. adox r13, r12
  56415. adcx r13, r12
  56416. ; a[1-5] += m[0-3] * mu = m[0-3] * (a[1] * mp)
  56417. mov r14, QWORD PTR [rax+40]
  56418. ; mu = a[1] * mp
  56419. mov rdx, r15
  56420. mulx rcx, rdx, r11
  56421. ; a[1] += m[0] * mu
  56422. mulx r9, r8, QWORD PTR [r10]
  56423. adcx r15, r8
  56424. ; a[2] += m[1] * mu
  56425. mulx rcx, r8, QWORD PTR [r10+8]
  56426. adox rdi, r9
  56427. adcx rdi, r8
  56428. ; a[3] += m[2] * mu
  56429. mulx r9, r8, QWORD PTR [r10+16]
  56430. adox rsi, rcx
  56431. adcx rsi, r8
  56432. ; a[4] += m[3] * mu
  56433. mulx rcx, r8, QWORD PTR [r10+24]
  56434. adox rbx, r9
  56435. adcx rbx, r8
  56436. ; a[5] += carry
  56437. adox r14, rcx
  56438. adcx r14, r13
  56439. mov r13, r12
  56440. ; carry
  56441. adox r13, r12
  56442. adcx r13, r12
  56443. ; a[2-6] += m[0-3] * mu = m[0-3] * (a[2] * mp)
  56444. mov r15, QWORD PTR [rax+48]
  56445. ; mu = a[2] * mp
  56446. mov rdx, rdi
  56447. mulx rcx, rdx, r11
  56448. ; a[2] += m[0] * mu
  56449. mulx r9, r8, QWORD PTR [r10]
  56450. adcx rdi, r8
  56451. ; a[3] += m[1] * mu
  56452. mulx rcx, r8, QWORD PTR [r10+8]
  56453. adox rsi, r9
  56454. adcx rsi, r8
  56455. ; a[4] += m[2] * mu
  56456. mulx r9, r8, QWORD PTR [r10+16]
  56457. adox rbx, rcx
  56458. adcx rbx, r8
  56459. ; a[5] += m[3] * mu
  56460. mulx rcx, r8, QWORD PTR [r10+24]
  56461. adox r14, r9
  56462. adcx r14, r8
  56463. ; a[6] += carry
  56464. adox r15, rcx
  56465. adcx r15, r13
  56466. mov r13, r12
  56467. ; carry
  56468. adox r13, r12
  56469. adcx r13, r12
  56470. ; a[3-7] += m[0-3] * mu = m[0-3] * (a[3] * mp)
  56471. mov rdi, QWORD PTR [rax+56]
  56472. ; mu = a[3] * mp
  56473. mov rdx, rsi
  56474. mulx rcx, rdx, r11
  56475. ; a[3] += m[0] * mu
  56476. mulx r9, r8, QWORD PTR [r10]
  56477. adcx rsi, r8
  56478. ; a[4] += m[1] * mu
  56479. mulx rcx, r8, QWORD PTR [r10+8]
  56480. adox rbx, r9
  56481. adcx rbx, r8
  56482. ; a[5] += m[2] * mu
  56483. mulx r9, r8, QWORD PTR [r10+16]
  56484. adox r14, rcx
  56485. adcx r14, r8
  56486. ; a[6] += m[3] * mu
  56487. mulx rcx, r8, QWORD PTR [r10+24]
  56488. adox r15, r9
  56489. adcx r15, r8
  56490. ; a[7] += carry
  56491. adox rdi, rcx
  56492. adcx rdi, r13
  56493. mov r13, r12
  56494. ; carry
  56495. adox r13, r12
  56496. adcx r13, r12
  56497. ; Subtract mod if carry
  56498. neg r13
  56499. mov r8, 17562291160714782033
  56500. mov r9, 13611842547513532036
  56501. mov rdx, 18446744069414584320
  56502. and r8, r13
  56503. and r9, r13
  56504. and rdx, r13
  56505. sub rbx, r8
  56506. sbb r14, r9
  56507. sbb r15, r13
  56508. sbb rdi, rdx
  56509. mov QWORD PTR [rax], rbx
  56510. mov QWORD PTR [rax+8], r14
  56511. mov QWORD PTR [rax+16], r15
  56512. mov QWORD PTR [rax+24], rdi
  56513. pop rbx
  56514. pop rsi
  56515. pop rdi
  56516. pop r15
  56517. pop r14
  56518. pop r13
  56519. pop r12
  56520. ret
  56521. sp_256_mont_reduce_avx2_order_4 ENDP
  56522. _text ENDS
  56523. ENDIF
  56524. IFDEF HAVE_INTEL_AVX2
  56525. ; /* Divide the number by 2 mod the modulus (prime). (r = a / 2 % m)
  56526. ; *
  56527. ; * r Result of division by 2.
  56528. ; * a Number to divide.
  56529. ; * m Modulus (prime).
  56530. ; */
  56531. _text SEGMENT READONLY PARA
  56532. sp_256_div2_avx2_4 PROC
  56533. push r12
  56534. push r13
  56535. mov rax, QWORD PTR [rdx]
  56536. mov r8, QWORD PTR [rdx+8]
  56537. mov r9, QWORD PTR [rdx+16]
  56538. mov r10, QWORD PTR [rdx+24]
  56539. mov r11, 4294967295
  56540. mov r12, 18446744069414584321
  56541. mov r13, rax
  56542. and r13, 1
  56543. neg r13
  56544. and r11, r13
  56545. and r12, r13
  56546. add rax, r13
  56547. adc r8, r11
  56548. adc r9, 0
  56549. adc r10, r12
  56550. mov r13, 0
  56551. adc r13, 0
  56552. shrd rax, r8, 1
  56553. shrd r8, r9, 1
  56554. shrd r9, r10, 1
  56555. shrd r10, r13, 1
  56556. mov QWORD PTR [rcx], rax
  56557. mov QWORD PTR [rcx+8], r8
  56558. mov QWORD PTR [rcx+16], r9
  56559. mov QWORD PTR [rcx+24], r10
  56560. pop r13
  56561. pop r12
  56562. ret
  56563. sp_256_div2_avx2_4 ENDP
  56564. _text ENDS
  56565. ENDIF
  56566. IFNDEF WC_NO_CACHE_RESISTANT
  56567. ; /* Touch each possible entry that could be being copied.
  56568. ; *
  56569. ; * r Point to copy into.
  56570. ; * table Table - start of the entires to access
  56571. ; * idx Index of entry to retrieve.
  56572. ; */
  56573. _text SEGMENT READONLY PARA
  56574. sp_256_get_entry_64_4 PROC
  56575. sub rsp, 96
  56576. vmovdqu OWORD PTR [rsp], xmm6
  56577. vmovdqu OWORD PTR [rsp+16], xmm7
  56578. vmovdqu OWORD PTR [rsp+32], xmm8
  56579. vmovdqu OWORD PTR [rsp+48], xmm9
  56580. vmovdqu OWORD PTR [rsp+64], xmm10
  56581. vmovdqu OWORD PTR [rsp+80], xmm11
  56582. ; From entry 1
  56583. mov rax, 1
  56584. movd xmm9, r8d
  56585. add rdx, 64
  56586. movd xmm11, eax
  56587. mov rax, 63
  56588. pshufd xmm11, xmm11, 0
  56589. pshufd xmm9, xmm9, 0
  56590. pxor xmm10, xmm10
  56591. pxor xmm0, xmm0
  56592. pxor xmm1, xmm1
  56593. pxor xmm2, xmm2
  56594. pxor xmm3, xmm3
  56595. movdqa xmm10, xmm11
  56596. L_256_get_entry_64_4_start_0:
  56597. movdqa xmm8, xmm10
  56598. paddd xmm10, xmm11
  56599. pcmpeqd xmm8, xmm9
  56600. movdqu xmm4, [rdx]
  56601. movdqu xmm5, [rdx+16]
  56602. movdqu xmm6, [rdx+32]
  56603. movdqu xmm7, [rdx+48]
  56604. add rdx, 64
  56605. pand xmm4, xmm8
  56606. pand xmm5, xmm8
  56607. pand xmm6, xmm8
  56608. pand xmm7, xmm8
  56609. por xmm0, xmm4
  56610. por xmm1, xmm5
  56611. por xmm2, xmm6
  56612. por xmm3, xmm7
  56613. dec rax
  56614. jnz L_256_get_entry_64_4_start_0
  56615. movdqu [rcx], xmm0
  56616. movdqu [rcx+16], xmm1
  56617. movdqu [rcx+64], xmm2
  56618. movdqu [rcx+80], xmm3
  56619. vmovdqu xmm6, OWORD PTR [rsp]
  56620. vmovdqu xmm7, OWORD PTR [rsp+16]
  56621. vmovdqu xmm8, OWORD PTR [rsp+32]
  56622. vmovdqu xmm9, OWORD PTR [rsp+48]
  56623. vmovdqu xmm10, OWORD PTR [rsp+64]
  56624. vmovdqu xmm11, OWORD PTR [rsp+80]
  56625. add rsp, 96
  56626. ret
  56627. sp_256_get_entry_64_4 ENDP
  56628. _text ENDS
  56629. IFDEF HAVE_INTEL_AVX2
  56630. ; /* Touch each possible entry that could be being copied.
  56631. ; *
  56632. ; * r Point to copy into.
  56633. ; * table Table - start of the entires to access
  56634. ; * idx Index of entry to retrieve.
  56635. ; */
  56636. _text SEGMENT READONLY PARA
  56637. sp_256_get_entry_64_avx2_4 PROC
  56638. sub rsp, 32
  56639. vmovdqu OWORD PTR [rsp], xmm6
  56640. vmovdqu OWORD PTR [rsp+16], xmm7
  56641. mov rax, 1
  56642. movd xmm5, r8d
  56643. add rdx, 64
  56644. movd xmm7, eax
  56645. mov rax, 64
  56646. vpxor ymm6, ymm6, ymm6
  56647. vpermd ymm5, ymm6, ymm5
  56648. vpermd ymm7, ymm6, ymm7
  56649. vpxor ymm0, ymm0, ymm0
  56650. vpxor ymm1, ymm1, ymm1
  56651. vmovdqa ymm6, ymm7
  56652. L_256_get_entry_64_avx2_4_start:
  56653. vpcmpeqd ymm4, ymm6, ymm5
  56654. vpaddd ymm6, ymm6, ymm7
  56655. vmovupd ymm2, YMMWORD PTR [rdx]
  56656. vmovupd ymm3, YMMWORD PTR [rdx+32]
  56657. add rdx, 64
  56658. vpand ymm2, ymm2, ymm4
  56659. vpand ymm3, ymm3, ymm4
  56660. vpor ymm0, ymm0, ymm2
  56661. vpor ymm1, ymm1, ymm3
  56662. dec rax
  56663. jnz L_256_get_entry_64_avx2_4_start
  56664. vmovupd YMMWORD PTR [rcx], ymm0
  56665. vmovupd YMMWORD PTR [rcx+64], ymm1
  56666. vmovdqu xmm6, OWORD PTR [rsp]
  56667. vmovdqu xmm7, OWORD PTR [rsp+16]
  56668. add rsp, 32
  56669. ret
  56670. sp_256_get_entry_64_avx2_4 ENDP
  56671. _text ENDS
  56672. ENDIF
  56673. ENDIF
  56674. IFNDEF WC_NO_CACHE_RESISTANT
  56675. ; /* Touch each possible entry that could be being copied.
  56676. ; *
  56677. ; * r Point to copy into.
  56678. ; * table Table - start of the entires to access
  56679. ; * idx Index of entry to retrieve.
  56680. ; */
  56681. _text SEGMENT READONLY PARA
  56682. sp_256_get_entry_65_4 PROC
  56683. sub rsp, 96
  56684. vmovdqu OWORD PTR [rsp], xmm6
  56685. vmovdqu OWORD PTR [rsp+16], xmm7
  56686. vmovdqu OWORD PTR [rsp+32], xmm8
  56687. vmovdqu OWORD PTR [rsp+48], xmm9
  56688. vmovdqu OWORD PTR [rsp+64], xmm10
  56689. vmovdqu OWORD PTR [rsp+80], xmm11
  56690. ; From entry 1
  56691. mov rax, 1
  56692. movd xmm9, r8d
  56693. add rdx, 64
  56694. movd xmm11, eax
  56695. mov rax, 64
  56696. pshufd xmm11, xmm11, 0
  56697. pshufd xmm9, xmm9, 0
  56698. pxor xmm10, xmm10
  56699. pxor xmm0, xmm0
  56700. pxor xmm1, xmm1
  56701. pxor xmm2, xmm2
  56702. pxor xmm3, xmm3
  56703. movdqa xmm10, xmm11
  56704. L_256_get_entry_65_4_start_0:
  56705. movdqa xmm8, xmm10
  56706. paddd xmm10, xmm11
  56707. pcmpeqd xmm8, xmm9
  56708. movdqu xmm4, [rdx]
  56709. movdqu xmm5, [rdx+16]
  56710. movdqu xmm6, [rdx+32]
  56711. movdqu xmm7, [rdx+48]
  56712. add rdx, 64
  56713. pand xmm4, xmm8
  56714. pand xmm5, xmm8
  56715. pand xmm6, xmm8
  56716. pand xmm7, xmm8
  56717. por xmm0, xmm4
  56718. por xmm1, xmm5
  56719. por xmm2, xmm6
  56720. por xmm3, xmm7
  56721. dec rax
  56722. jnz L_256_get_entry_65_4_start_0
  56723. movdqu [rcx], xmm0
  56724. movdqu [rcx+16], xmm1
  56725. movdqu [rcx+64], xmm2
  56726. movdqu [rcx+80], xmm3
  56727. vmovdqu xmm6, OWORD PTR [rsp]
  56728. vmovdqu xmm7, OWORD PTR [rsp+16]
  56729. vmovdqu xmm8, OWORD PTR [rsp+32]
  56730. vmovdqu xmm9, OWORD PTR [rsp+48]
  56731. vmovdqu xmm10, OWORD PTR [rsp+64]
  56732. vmovdqu xmm11, OWORD PTR [rsp+80]
  56733. add rsp, 96
  56734. ret
  56735. sp_256_get_entry_65_4 ENDP
  56736. _text ENDS
  56737. IFDEF HAVE_INTEL_AVX2
  56738. ; /* Touch each possible entry that could be being copied.
  56739. ; *
  56740. ; * r Point to copy into.
  56741. ; * table Table - start of the entires to access
  56742. ; * idx Index of entry to retrieve.
  56743. ; */
  56744. _text SEGMENT READONLY PARA
  56745. sp_256_get_entry_65_avx2_4 PROC
  56746. sub rsp, 32
  56747. vmovdqu OWORD PTR [rsp], xmm6
  56748. vmovdqu OWORD PTR [rsp+16], xmm7
  56749. mov rax, 1
  56750. movd xmm5, r8d
  56751. add rdx, 64
  56752. movd xmm7, eax
  56753. mov rax, 65
  56754. vpxor ymm6, ymm6, ymm6
  56755. vpermd ymm5, ymm6, ymm5
  56756. vpermd ymm7, ymm6, ymm7
  56757. vpxor ymm0, ymm0, ymm0
  56758. vpxor ymm1, ymm1, ymm1
  56759. vmovdqa ymm6, ymm7
  56760. L_256_get_entry_65_avx2_4_start:
  56761. vpcmpeqd ymm4, ymm6, ymm5
  56762. vpaddd ymm6, ymm6, ymm7
  56763. vmovupd ymm2, YMMWORD PTR [rdx]
  56764. vmovupd ymm3, YMMWORD PTR [rdx+32]
  56765. add rdx, 64
  56766. vpand ymm2, ymm2, ymm4
  56767. vpand ymm3, ymm3, ymm4
  56768. vpor ymm0, ymm0, ymm2
  56769. vpor ymm1, ymm1, ymm3
  56770. dec rax
  56771. jnz L_256_get_entry_65_avx2_4_start
  56772. vmovupd YMMWORD PTR [rcx], ymm0
  56773. vmovupd YMMWORD PTR [rcx+64], ymm1
  56774. vmovdqu xmm6, OWORD PTR [rsp]
  56775. vmovdqu xmm7, OWORD PTR [rsp+16]
  56776. add rsp, 32
  56777. ret
  56778. sp_256_get_entry_65_avx2_4 ENDP
  56779. _text ENDS
  56780. ENDIF
  56781. ENDIF
  56782. ; /* Add 1 to a. (a = a + 1)
  56783. ; *
  56784. ; * a A single precision integer.
  56785. ; */
  56786. _text SEGMENT READONLY PARA
  56787. sp_256_add_one_4 PROC
  56788. add QWORD PTR [rcx], 1
  56789. adc QWORD PTR [rcx+8], 0
  56790. adc QWORD PTR [rcx+16], 0
  56791. adc QWORD PTR [rcx+24], 0
  56792. ret
  56793. sp_256_add_one_4 ENDP
  56794. _text ENDS
  56795. ; /* Read big endian unsigned byte array into r.
  56796. ; * Uses the bswap instruction.
  56797. ; *
  56798. ; * r A single precision integer.
  56799. ; * size Maximum number of bytes to convert
  56800. ; * a Byte array.
  56801. ; * n Number of bytes in array to read.
  56802. ; */
  56803. _text SEGMENT READONLY PARA
  56804. sp_256_from_bin_bswap PROC
  56805. push r12
  56806. push r13
  56807. mov r11, r8
  56808. mov r12, rcx
  56809. add r11, r9
  56810. add r12, 32
  56811. xor r13, r13
  56812. jmp L_256_from_bin_bswap_64_end
  56813. L_256_from_bin_bswap_64_start:
  56814. sub r11, 64
  56815. mov rax, QWORD PTR [r11+56]
  56816. mov r10, QWORD PTR [r11+48]
  56817. bswap rax
  56818. bswap r10
  56819. mov QWORD PTR [rcx], rax
  56820. mov QWORD PTR [rcx+8], r10
  56821. mov rax, QWORD PTR [r11+40]
  56822. mov r10, QWORD PTR [r11+32]
  56823. bswap rax
  56824. bswap r10
  56825. mov QWORD PTR [rcx+16], rax
  56826. mov QWORD PTR [rcx+24], r10
  56827. mov rax, QWORD PTR [r11+24]
  56828. mov r10, QWORD PTR [r11+16]
  56829. bswap rax
  56830. bswap r10
  56831. mov QWORD PTR [rcx+32], rax
  56832. mov QWORD PTR [rcx+40], r10
  56833. mov rax, QWORD PTR [r11+8]
  56834. mov r10, QWORD PTR [r11]
  56835. bswap rax
  56836. bswap r10
  56837. mov QWORD PTR [rcx+48], rax
  56838. mov QWORD PTR [rcx+56], r10
  56839. add rcx, 64
  56840. sub r9, 64
  56841. L_256_from_bin_bswap_64_end:
  56842. cmp r9, 63
  56843. jg L_256_from_bin_bswap_64_start
  56844. jmp L_256_from_bin_bswap_8_end
  56845. L_256_from_bin_bswap_8_start:
  56846. sub r11, 8
  56847. mov rax, QWORD PTR [r11]
  56848. bswap rax
  56849. mov QWORD PTR [rcx], rax
  56850. add rcx, 8
  56851. sub r9, 8
  56852. L_256_from_bin_bswap_8_end:
  56853. cmp r9, 7
  56854. jg L_256_from_bin_bswap_8_start
  56855. cmp r9, r13
  56856. je L_256_from_bin_bswap_hi_end
  56857. mov r10, r13
  56858. mov rax, r13
  56859. L_256_from_bin_bswap_hi_start:
  56860. mov al, BYTE PTR [r8]
  56861. shl r10, 8
  56862. inc r8
  56863. add r10, rax
  56864. dec r9
  56865. jg L_256_from_bin_bswap_hi_start
  56866. mov QWORD PTR [rcx], r10
  56867. add rcx, 8
  56868. L_256_from_bin_bswap_hi_end:
  56869. cmp rcx, r12
  56870. jge L_256_from_bin_bswap_zero_end
  56871. L_256_from_bin_bswap_zero_start:
  56872. mov QWORD PTR [rcx], r13
  56873. add rcx, 8
  56874. cmp rcx, r12
  56875. jl L_256_from_bin_bswap_zero_start
  56876. L_256_from_bin_bswap_zero_end:
  56877. pop r13
  56878. pop r12
  56879. ret
  56880. sp_256_from_bin_bswap ENDP
  56881. _text ENDS
  56882. IFNDEF NO_MOVBE_SUPPORT
  56883. ; /* Read big endian unsigned byte array into r.
  56884. ; * Uses the movbe instruction which is an optional instruction.
  56885. ; *
  56886. ; * r A single precision integer.
  56887. ; * size Maximum number of bytes to convert
  56888. ; * a Byte array.
  56889. ; * n Number of bytes in array to read.
  56890. ; */
  56891. _text SEGMENT READONLY PARA
  56892. sp_256_from_bin_movbe PROC
  56893. push r12
  56894. mov r11, r8
  56895. mov r12, rcx
  56896. add r11, r9
  56897. add r12, 32
  56898. jmp L_256_from_bin_movbe_64_end
  56899. L_256_from_bin_movbe_64_start:
  56900. sub r11, 64
  56901. movbe rax, QWORD PTR [r11+56]
  56902. movbe r10, QWORD PTR [r11+48]
  56903. mov QWORD PTR [rcx], rax
  56904. mov QWORD PTR [rcx+8], r10
  56905. movbe rax, QWORD PTR [r11+40]
  56906. movbe r10, QWORD PTR [r11+32]
  56907. mov QWORD PTR [rcx+16], rax
  56908. mov QWORD PTR [rcx+24], r10
  56909. movbe rax, QWORD PTR [r11+24]
  56910. movbe r10, QWORD PTR [r11+16]
  56911. mov QWORD PTR [rcx+32], rax
  56912. mov QWORD PTR [rcx+40], r10
  56913. movbe rax, QWORD PTR [r11+8]
  56914. movbe r10, QWORD PTR [r11]
  56915. mov QWORD PTR [rcx+48], rax
  56916. mov QWORD PTR [rcx+56], r10
  56917. add rcx, 64
  56918. sub r9, 64
  56919. L_256_from_bin_movbe_64_end:
  56920. cmp r9, 63
  56921. jg L_256_from_bin_movbe_64_start
  56922. jmp L_256_from_bin_movbe_8_end
  56923. L_256_from_bin_movbe_8_start:
  56924. sub r11, 8
  56925. movbe rax, QWORD PTR [r11]
  56926. mov QWORD PTR [rcx], rax
  56927. add rcx, 8
  56928. sub r9, 8
  56929. L_256_from_bin_movbe_8_end:
  56930. cmp r9, 7
  56931. jg L_256_from_bin_movbe_8_start
  56932. cmp r9, 0
  56933. je L_256_from_bin_movbe_hi_end
  56934. mov r10, 0
  56935. mov rax, 0
  56936. L_256_from_bin_movbe_hi_start:
  56937. mov al, BYTE PTR [r8]
  56938. shl r10, 8
  56939. inc r8
  56940. add r10, rax
  56941. dec r9
  56942. jg L_256_from_bin_movbe_hi_start
  56943. mov QWORD PTR [rcx], r10
  56944. add rcx, 8
  56945. L_256_from_bin_movbe_hi_end:
  56946. cmp rcx, r12
  56947. jge L_256_from_bin_movbe_zero_end
  56948. L_256_from_bin_movbe_zero_start:
  56949. mov QWORD PTR [rcx], 0
  56950. add rcx, 8
  56951. cmp rcx, r12
  56952. jl L_256_from_bin_movbe_zero_start
  56953. L_256_from_bin_movbe_zero_end:
  56954. pop r12
  56955. ret
  56956. sp_256_from_bin_movbe ENDP
  56957. _text ENDS
  56958. ENDIF
  56959. ; /* Write r as big endian to byte array.
  56960. ; * Fixed length number of bytes written: 32
  56961. ; * Uses the bswap instruction.
  56962. ; *
  56963. ; * r A single precision integer.
  56964. ; * a Byte array.
  56965. ; */
  56966. _text SEGMENT READONLY PARA
  56967. sp_256_to_bin_bswap_4 PROC
  56968. mov rax, QWORD PTR [rcx+24]
  56969. mov r8, QWORD PTR [rcx+16]
  56970. bswap rax
  56971. bswap r8
  56972. mov QWORD PTR [rdx], rax
  56973. mov QWORD PTR [rdx+8], r8
  56974. mov rax, QWORD PTR [rcx+8]
  56975. mov r8, QWORD PTR [rcx]
  56976. bswap rax
  56977. bswap r8
  56978. mov QWORD PTR [rdx+16], rax
  56979. mov QWORD PTR [rdx+24], r8
  56980. ret
  56981. sp_256_to_bin_bswap_4 ENDP
  56982. _text ENDS
  56983. IFNDEF NO_MOVBE_SUPPORT
  56984. ; /* Write r as big endian to byte array.
  56985. ; * Fixed length number of bytes written: 32
  56986. ; * Uses the movbe instruction which is optional.
  56987. ; *
  56988. ; * r A single precision integer.
  56989. ; * a Byte array.
  56990. ; */
  56991. _text SEGMENT READONLY PARA
  56992. sp_256_to_bin_movbe_4 PROC
  56993. movbe rax, QWORD PTR [rcx+24]
  56994. movbe r8, QWORD PTR [rcx+16]
  56995. mov QWORD PTR [rdx], rax
  56996. mov QWORD PTR [rdx+8], r8
  56997. movbe rax, QWORD PTR [rcx+8]
  56998. movbe r8, QWORD PTR [rcx]
  56999. mov QWORD PTR [rdx+16], rax
  57000. mov QWORD PTR [rdx+24], r8
  57001. ret
  57002. sp_256_to_bin_movbe_4 ENDP
  57003. _text ENDS
  57004. ENDIF
  57005. ; /* Sub b from a into a. (a -= b)
  57006. ; *
  57007. ; * a A single precision integer and result.
  57008. ; * b A single precision integer.
  57009. ; */
  57010. _text SEGMENT READONLY PARA
  57011. sp_256_sub_in_place_4 PROC
  57012. mov r8, QWORD PTR [rdx]
  57013. mov r9, QWORD PTR [rdx+8]
  57014. mov r10, QWORD PTR [rdx+16]
  57015. mov r11, QWORD PTR [rdx+24]
  57016. sub QWORD PTR [rcx], r8
  57017. sbb QWORD PTR [rcx+8], r9
  57018. sbb QWORD PTR [rcx+16], r10
  57019. sbb QWORD PTR [rcx+24], r11
  57020. sbb rax, rax
  57021. ret
  57022. sp_256_sub_in_place_4 ENDP
  57023. _text ENDS
  57024. ; /* Mul a by digit b into r. (r = a * b)
  57025. ; *
  57026. ; * r A single precision integer.
  57027. ; * a A single precision integer.
  57028. ; * b A single precision digit.
  57029. ; */
  57030. _text SEGMENT READONLY PARA
  57031. sp_256_mul_d_4 PROC
  57032. push r12
  57033. mov r9, rdx
  57034. ; A[0] * B
  57035. mov rax, r8
  57036. xor r12, r12
  57037. mul QWORD PTR [r9]
  57038. mov r10, rax
  57039. mov r11, rdx
  57040. mov QWORD PTR [rcx], r10
  57041. ; A[1] * B
  57042. mov rax, r8
  57043. xor r10, r10
  57044. mul QWORD PTR [r9+8]
  57045. add r11, rax
  57046. mov QWORD PTR [rcx+8], r11
  57047. adc r12, rdx
  57048. adc r10, 0
  57049. ; A[2] * B
  57050. mov rax, r8
  57051. xor r11, r11
  57052. mul QWORD PTR [r9+16]
  57053. add r12, rax
  57054. mov QWORD PTR [rcx+16], r12
  57055. adc r10, rdx
  57056. adc r11, 0
  57057. ; A[3] * B
  57058. mov rax, r8
  57059. mul QWORD PTR [r9+24]
  57060. add r10, rax
  57061. adc r11, rdx
  57062. mov QWORD PTR [rcx+24], r10
  57063. mov QWORD PTR [rcx+32], r11
  57064. pop r12
  57065. ret
  57066. sp_256_mul_d_4 ENDP
  57067. _text ENDS
  57068. IFDEF HAVE_INTEL_AVX2
  57069. ; /* Mul a by digit b into r. (r = a * b)
  57070. ; *
  57071. ; * r A single precision integer.
  57072. ; * a A single precision integer.
  57073. ; * b A single precision digit.
  57074. ; */
  57075. _text SEGMENT READONLY PARA
  57076. sp_256_mul_d_avx2_4 PROC
  57077. push r12
  57078. push r13
  57079. mov rax, rdx
  57080. ; A[0] * B
  57081. mov rdx, r8
  57082. xor r13, r13
  57083. mulx r12, r11, QWORD PTR [rax]
  57084. mov QWORD PTR [rcx], r11
  57085. ; A[1] * B
  57086. mulx r10, r9, QWORD PTR [rax+8]
  57087. mov r11, r13
  57088. adcx r12, r9
  57089. adox r11, r10
  57090. mov QWORD PTR [rcx+8], r12
  57091. ; A[2] * B
  57092. mulx r10, r9, QWORD PTR [rax+16]
  57093. mov r12, r13
  57094. adcx r11, r9
  57095. adox r12, r10
  57096. mov QWORD PTR [rcx+16], r11
  57097. ; A[3] * B
  57098. mulx r10, r9, QWORD PTR [rax+24]
  57099. mov r11, r13
  57100. adcx r12, r9
  57101. adox r11, r10
  57102. adcx r11, r13
  57103. mov QWORD PTR [rcx+24], r12
  57104. mov QWORD PTR [rcx+32], r11
  57105. pop r13
  57106. pop r12
  57107. ret
  57108. sp_256_mul_d_avx2_4 ENDP
  57109. _text ENDS
  57110. ENDIF
  57111. IFDEF _WIN64
  57112. ; /* Divide the double width number (d1|d0) by the dividend. (d1|d0 / div)
  57113. ; *
  57114. ; * d1 The high order half of the number to divide.
  57115. ; * d0 The low order half of the number to divide.
  57116. ; * div The dividend.
  57117. ; * returns the result of the division.
  57118. ; */
  57119. _text SEGMENT READONLY PARA
  57120. div_256_word_asm_4 PROC
  57121. mov r9, rdx
  57122. mov rax, r9
  57123. mov rdx, rcx
  57124. div r8
  57125. ret
  57126. div_256_word_asm_4 ENDP
  57127. _text ENDS
  57128. ENDIF
  57129. IFDEF HAVE_INTEL_AVX2
  57130. ; /* Multiply two Montgomery form numbers mod the modulus (prime).
  57131. ; * (r = a * b mod m)
  57132. ; *
  57133. ; * r Result of multiplication.
  57134. ; * a First number to multiply in Montgomery form.
  57135. ; * b Second number to multiply in Montgomery form.
  57136. ; */
  57137. _text SEGMENT READONLY PARA
  57138. sp_256_mont_mul_order_avx2_4 PROC
  57139. push rbp
  57140. push r12
  57141. push r13
  57142. push r14
  57143. push r15
  57144. push rdi
  57145. push rsi
  57146. push rbx
  57147. mov rbp, r8
  57148. mov rax, rdx
  57149. mov rdx, QWORD PTR [rax]
  57150. ; A[0] * B[0]
  57151. mulx r9, r8, QWORD PTR [rbp]
  57152. xor rbx, rbx
  57153. ; A[0] * B[1]
  57154. mulx r10, rdi, QWORD PTR [rbp+8]
  57155. adcx r9, rdi
  57156. ; A[0] * B[2]
  57157. mulx r11, rdi, QWORD PTR [rbp+16]
  57158. adcx r10, rdi
  57159. ; A[0] * B[3]
  57160. mulx r12, rdi, QWORD PTR [rbp+24]
  57161. adcx r11, rdi
  57162. mov rdx, QWORD PTR [rax+8]
  57163. adcx r12, rbx
  57164. ; A[1] * B[0]
  57165. mulx rsi, rdi, QWORD PTR [rbp]
  57166. xor rbx, rbx
  57167. adcx r9, rdi
  57168. ; A[1] * B[1]
  57169. mulx r15, rdi, QWORD PTR [rbp+8]
  57170. adox r10, rsi
  57171. adcx r10, rdi
  57172. ; A[1] * B[2]
  57173. mulx rsi, rdi, QWORD PTR [rbp+16]
  57174. adox r11, r15
  57175. adcx r11, rdi
  57176. ; A[1] * B[3]
  57177. mulx r13, rdi, QWORD PTR [rbp+24]
  57178. adox r12, rsi
  57179. adcx r12, rdi
  57180. adox r13, rbx
  57181. mov rdx, QWORD PTR [rax+16]
  57182. adcx r13, rbx
  57183. ; A[2] * B[0]
  57184. mulx rsi, rdi, QWORD PTR [rbp]
  57185. xor rbx, rbx
  57186. adcx r10, rdi
  57187. ; A[2] * B[1]
  57188. mulx r15, rdi, QWORD PTR [rbp+8]
  57189. adox r11, rsi
  57190. adcx r11, rdi
  57191. ; A[2] * B[2]
  57192. mulx rsi, rdi, QWORD PTR [rbp+16]
  57193. adox r12, r15
  57194. adcx r12, rdi
  57195. ; A[2] * B[3]
  57196. mulx r14, rdi, QWORD PTR [rbp+24]
  57197. adox r13, rsi
  57198. adcx r13, rdi
  57199. adox r14, rbx
  57200. mov rdx, QWORD PTR [rax+24]
  57201. adcx r14, rbx
  57202. ; A[3] * B[0]
  57203. mulx rsi, rdi, QWORD PTR [rbp]
  57204. xor rbx, rbx
  57205. adcx r11, rdi
  57206. ; A[3] * B[1]
  57207. mulx r15, rdi, QWORD PTR [rbp+8]
  57208. adox r12, rsi
  57209. adcx r12, rdi
  57210. ; A[3] * B[2]
  57211. mulx rsi, rdi, QWORD PTR [rbp+16]
  57212. adox r13, r15
  57213. adcx r13, rdi
  57214. ; A[3] * B[3]
  57215. mulx r15, rdi, QWORD PTR [rbp+24]
  57216. adox r14, rsi
  57217. adcx r14, rdi
  57218. adox r15, rbx
  57219. adcx r15, rbx
  57220. ; Start Reduction
  57221. mov rbx, 14758798090332847183
  57222. ; A[0]
  57223. mov rdx, rbx
  57224. imul rdx, r8
  57225. mov rdi, 17562291160714782033
  57226. xor rbp, rbp
  57227. mulx rax, rsi, rdi
  57228. mov rdi, 13611842547513532036
  57229. adcx r8, rsi
  57230. adox r9, rax
  57231. mulx rax, rsi, rdi
  57232. mov rdi, 18446744073709551615
  57233. adcx r9, rsi
  57234. adox r10, rax
  57235. mulx rax, rsi, rdi
  57236. mov rdi, 18446744069414584320
  57237. adcx r10, rsi
  57238. adox r11, rax
  57239. mulx rax, rsi, rdi
  57240. adcx r11, rsi
  57241. adox r12, rax
  57242. adcx r12, rbp
  57243. mov r8, rbp
  57244. ; carry
  57245. adox r8, rbp
  57246. adcx r8, rbp
  57247. ; A[1]
  57248. mov rdx, rbx
  57249. imul rdx, r9
  57250. mov rdi, 17562291160714782033
  57251. xor rbp, rbp
  57252. mulx rax, rsi, rdi
  57253. mov rdi, 13611842547513532036
  57254. adcx r9, rsi
  57255. adox r10, rax
  57256. mulx rax, rsi, rdi
  57257. mov rdi, 18446744073709551615
  57258. adcx r10, rsi
  57259. adox r11, rax
  57260. mulx rax, rsi, rdi
  57261. mov rdi, 18446744069414584320
  57262. adcx r11, rsi
  57263. adox r12, rax
  57264. mulx rax, rsi, rdi
  57265. adcx r12, rsi
  57266. adox r13, rax
  57267. adcx r13, r8
  57268. mov r8, rbp
  57269. ; carry
  57270. adox r8, rbp
  57271. adcx r8, rbp
  57272. ; A[2]
  57273. mov rdx, rbx
  57274. imul rdx, r10
  57275. mov rdi, 17562291160714782033
  57276. xor rbp, rbp
  57277. mulx rax, rsi, rdi
  57278. mov rdi, 13611842547513532036
  57279. adcx r10, rsi
  57280. adox r11, rax
  57281. mulx rax, rsi, rdi
  57282. mov rdi, 18446744073709551615
  57283. adcx r11, rsi
  57284. adox r12, rax
  57285. mulx rax, rsi, rdi
  57286. mov rdi, 18446744069414584320
  57287. adcx r12, rsi
  57288. adox r13, rax
  57289. mulx rax, rsi, rdi
  57290. adcx r13, rsi
  57291. adox r14, rax
  57292. adcx r14, r8
  57293. mov r8, rbp
  57294. ; carry
  57295. adox r8, rbp
  57296. adcx r8, rbp
  57297. ; A[3]
  57298. mov rdx, rbx
  57299. imul rdx, r11
  57300. mov rdi, 17562291160714782033
  57301. xor rbp, rbp
  57302. mulx rax, rsi, rdi
  57303. mov rdi, 13611842547513532036
  57304. adcx r11, rsi
  57305. adox r12, rax
  57306. mulx rax, rsi, rdi
  57307. mov rdi, 18446744073709551615
  57308. adcx r12, rsi
  57309. adox r13, rax
  57310. mulx rax, rsi, rdi
  57311. mov rdi, 18446744069414584320
  57312. adcx r13, rsi
  57313. adox r14, rax
  57314. mulx rax, rsi, rdi
  57315. adcx r14, rsi
  57316. adox r15, rax
  57317. adcx r15, r8
  57318. mov r8, rbp
  57319. ; carry
  57320. adox r8, rbp
  57321. adcx r8, rbp
  57322. neg r8
  57323. mov rdi, 17562291160714782033
  57324. mov rbx, 13611842547513532036
  57325. and rdi, r8
  57326. mov rbp, 18446744069414584320
  57327. and rbx, r8
  57328. and rbp, r8
  57329. sub r12, rdi
  57330. sbb r13, rbx
  57331. mov QWORD PTR [rcx], r12
  57332. sbb r14, r8
  57333. mov QWORD PTR [rcx+8], r13
  57334. sbb r15, rbp
  57335. mov QWORD PTR [rcx+16], r14
  57336. mov QWORD PTR [rcx+24], r15
  57337. pop rbx
  57338. pop rsi
  57339. pop rdi
  57340. pop r15
  57341. pop r14
  57342. pop r13
  57343. pop r12
  57344. pop rbp
  57345. ret
  57346. sp_256_mont_mul_order_avx2_4 ENDP
  57347. _text ENDS
  57348. ENDIF
  57349. IFDEF HAVE_INTEL_AVX2
  57350. ; /* Square the Montgomery form number mod the modulus (prime). (r = a * a mod m)
  57351. ; *
  57352. ; * r Result of squaring.
  57353. ; * a Number to square in Montgomery form.
  57354. ; */
  57355. _text SEGMENT READONLY PARA
  57356. sp_256_mont_sqr_order_avx2_4 PROC
  57357. push rbp
  57358. push r12
  57359. push r13
  57360. push r14
  57361. push r15
  57362. push rdi
  57363. push rsi
  57364. push rbx
  57365. mov rax, rdx
  57366. xor r8, r8
  57367. mov rdx, QWORD PTR [rax]
  57368. mov rsi, QWORD PTR [rax+8]
  57369. mov rbx, QWORD PTR [rax+16]
  57370. mov r15, QWORD PTR [rax+24]
  57371. ; A[0] * A[1]
  57372. mulx r10, r9, rsi
  57373. ; A[0] * A[2]
  57374. mulx r11, r8, rbx
  57375. adox r10, r8
  57376. ; A[0] * A[3]
  57377. mulx r12, r8, r15
  57378. mov rdx, rsi
  57379. adox r11, r8
  57380. ; A[1] * A[2]
  57381. mulx rdi, r8, rbx
  57382. mov rdx, r15
  57383. adcx r11, r8
  57384. ; A[1] * A[3]
  57385. mulx r13, r8, rsi
  57386. mov r15, 0
  57387. adox r12, rdi
  57388. adcx r12, r8
  57389. ; A[2] * A[3]
  57390. mulx r14, r8, rbx
  57391. adox r13, r15
  57392. adcx r13, r8
  57393. adox r14, r15
  57394. adcx r14, r15
  57395. ; Double with Carry Flag
  57396. xor r15, r15
  57397. ; A[0] * A[0]
  57398. mov rdx, QWORD PTR [rax]
  57399. mulx rdi, r8, rdx
  57400. adcx r9, r9
  57401. adcx r10, r10
  57402. adox r9, rdi
  57403. ; A[1] * A[1]
  57404. mov rdx, QWORD PTR [rax+8]
  57405. mulx rbx, rsi, rdx
  57406. adcx r11, r11
  57407. adox r10, rsi
  57408. ; A[2] * A[2]
  57409. mov rdx, QWORD PTR [rax+16]
  57410. mulx rsi, rdi, rdx
  57411. adcx r12, r12
  57412. adox r11, rbx
  57413. adcx r13, r13
  57414. adox r12, rdi
  57415. adcx r14, r14
  57416. ; A[3] * A[3]
  57417. mov rdx, QWORD PTR [rax+24]
  57418. mulx rbx, rdi, rdx
  57419. adox r13, rsi
  57420. adcx r15, r15
  57421. adox r14, rdi
  57422. adox r15, rbx
  57423. ; Start Reduction
  57424. mov rbx, 14758798090332847183
  57425. ; A[0]
  57426. mov rdx, rbx
  57427. imul rdx, r8
  57428. mov rdi, 17562291160714782033
  57429. xor rbp, rbp
  57430. mulx rax, rsi, rdi
  57431. mov rdi, 13611842547513532036
  57432. adcx r8, rsi
  57433. adox r9, rax
  57434. mulx rax, rsi, rdi
  57435. mov rdi, 18446744073709551615
  57436. adcx r9, rsi
  57437. adox r10, rax
  57438. mulx rax, rsi, rdi
  57439. mov rdi, 18446744069414584320
  57440. adcx r10, rsi
  57441. adox r11, rax
  57442. mulx rax, rsi, rdi
  57443. adcx r11, rsi
  57444. adox r12, rax
  57445. adcx r12, rbp
  57446. mov r8, rbp
  57447. ; carry
  57448. adox r8, rbp
  57449. adcx r8, rbp
  57450. ; A[1]
  57451. mov rdx, rbx
  57452. imul rdx, r9
  57453. mov rdi, 17562291160714782033
  57454. xor rbp, rbp
  57455. mulx rax, rsi, rdi
  57456. mov rdi, 13611842547513532036
  57457. adcx r9, rsi
  57458. adox r10, rax
  57459. mulx rax, rsi, rdi
  57460. mov rdi, 18446744073709551615
  57461. adcx r10, rsi
  57462. adox r11, rax
  57463. mulx rax, rsi, rdi
  57464. mov rdi, 18446744069414584320
  57465. adcx r11, rsi
  57466. adox r12, rax
  57467. mulx rax, rsi, rdi
  57468. adcx r12, rsi
  57469. adox r13, rax
  57470. adcx r13, r8
  57471. mov r8, rbp
  57472. ; carry
  57473. adox r8, rbp
  57474. adcx r8, rbp
  57475. ; A[2]
  57476. mov rdx, rbx
  57477. imul rdx, r10
  57478. mov rdi, 17562291160714782033
  57479. xor rbp, rbp
  57480. mulx rax, rsi, rdi
  57481. mov rdi, 13611842547513532036
  57482. adcx r10, rsi
  57483. adox r11, rax
  57484. mulx rax, rsi, rdi
  57485. mov rdi, 18446744073709551615
  57486. adcx r11, rsi
  57487. adox r12, rax
  57488. mulx rax, rsi, rdi
  57489. mov rdi, 18446744069414584320
  57490. adcx r12, rsi
  57491. adox r13, rax
  57492. mulx rax, rsi, rdi
  57493. adcx r13, rsi
  57494. adox r14, rax
  57495. adcx r14, r8
  57496. mov r8, rbp
  57497. ; carry
  57498. adox r8, rbp
  57499. adcx r8, rbp
  57500. ; A[3]
  57501. mov rdx, rbx
  57502. imul rdx, r11
  57503. mov rdi, 17562291160714782033
  57504. xor rbp, rbp
  57505. mulx rax, rsi, rdi
  57506. mov rdi, 13611842547513532036
  57507. adcx r11, rsi
  57508. adox r12, rax
  57509. mulx rax, rsi, rdi
  57510. mov rdi, 18446744073709551615
  57511. adcx r12, rsi
  57512. adox r13, rax
  57513. mulx rax, rsi, rdi
  57514. mov rdi, 18446744069414584320
  57515. adcx r13, rsi
  57516. adox r14, rax
  57517. mulx rax, rsi, rdi
  57518. adcx r14, rsi
  57519. adox r15, rax
  57520. adcx r15, r8
  57521. mov r8, rbp
  57522. ; carry
  57523. adox r8, rbp
  57524. adcx r8, rbp
  57525. neg r8
  57526. mov rdi, 17562291160714782033
  57527. mov rbx, 13611842547513532036
  57528. and rdi, r8
  57529. mov rbp, 18446744069414584320
  57530. and rbx, r8
  57531. and rbp, r8
  57532. sub r12, rdi
  57533. sbb r13, rbx
  57534. mov QWORD PTR [rcx], r12
  57535. sbb r14, r8
  57536. mov QWORD PTR [rcx+8], r13
  57537. sbb r15, rbp
  57538. mov QWORD PTR [rcx+16], r14
  57539. mov QWORD PTR [rcx+24], r15
  57540. pop rbx
  57541. pop rsi
  57542. pop rdi
  57543. pop r15
  57544. pop r14
  57545. pop r13
  57546. pop r12
  57547. pop rbp
  57548. ret
  57549. sp_256_mont_sqr_order_avx2_4 ENDP
  57550. _text ENDS
  57551. ENDIF
  57552. ; /* Non-constant time modular inversion.
  57553. ; *
  57554. ; * @param [out] r Resulting number.
  57555. ; * @param [in] a Number to invert.
  57556. ; * @param [in] m Modulus.
  57557. ; * @return MP_OKAY on success.
  57558. ; */
  57559. _text SEGMENT READONLY PARA
  57560. sp_256_mod_inv_4 PROC
  57561. push r12
  57562. push r13
  57563. push r14
  57564. push r15
  57565. push rdi
  57566. push rsi
  57567. sub rsp, 513
  57568. mov r9, QWORD PTR [r8]
  57569. mov r10, QWORD PTR [r8+8]
  57570. mov r11, QWORD PTR [r8+16]
  57571. mov r12, QWORD PTR [r8+24]
  57572. mov r13, QWORD PTR [rdx]
  57573. mov r14, QWORD PTR [rdx+8]
  57574. mov r15, QWORD PTR [rdx+16]
  57575. mov rdi, QWORD PTR [rdx+24]
  57576. mov rsi, 0
  57577. test r13b, 1
  57578. jnz L_256_mod_inv_4_v_even_end
  57579. L_256_mod_inv_4_v_even_start:
  57580. shrd r13, r14, 1
  57581. shrd r14, r15, 1
  57582. shrd r15, rdi, 1
  57583. shr rdi, 1
  57584. mov BYTE PTR [rsp+rsi], 1
  57585. inc rsi
  57586. test r13b, 1
  57587. jz L_256_mod_inv_4_v_even_start
  57588. L_256_mod_inv_4_v_even_end:
  57589. L_256_mod_inv_4_uv_start:
  57590. cmp r12, rdi
  57591. jb L_256_mod_inv_4_uv_v
  57592. ja L_256_mod_inv_4_uv_u
  57593. cmp r11, r15
  57594. jb L_256_mod_inv_4_uv_v
  57595. ja L_256_mod_inv_4_uv_u
  57596. cmp r10, r14
  57597. jb L_256_mod_inv_4_uv_v
  57598. ja L_256_mod_inv_4_uv_u
  57599. cmp r9, r13
  57600. jb L_256_mod_inv_4_uv_v
  57601. L_256_mod_inv_4_uv_u:
  57602. mov BYTE PTR [rsp+rsi], 2
  57603. inc rsi
  57604. sub r9, r13
  57605. sbb r10, r14
  57606. sbb r11, r15
  57607. sbb r12, rdi
  57608. shrd r9, r10, 1
  57609. shrd r10, r11, 1
  57610. shrd r11, r12, 1
  57611. shr r12, 1
  57612. test r9b, 1
  57613. jnz L_256_mod_inv_4_usubv_even_end
  57614. L_256_mod_inv_4_usubv_even_start:
  57615. shrd r9, r10, 1
  57616. shrd r10, r11, 1
  57617. shrd r11, r12, 1
  57618. shr r12, 1
  57619. mov BYTE PTR [rsp+rsi], 0
  57620. inc rsi
  57621. test r9b, 1
  57622. jz L_256_mod_inv_4_usubv_even_start
  57623. L_256_mod_inv_4_usubv_even_end:
  57624. cmp r9, 1
  57625. jne L_256_mod_inv_4_uv_start
  57626. mov rdx, r10
  57627. or rdx, r11
  57628. jne L_256_mod_inv_4_uv_start
  57629. or rdx, r12
  57630. jne L_256_mod_inv_4_uv_start
  57631. mov al, 1
  57632. jmp L_256_mod_inv_4_uv_end
  57633. L_256_mod_inv_4_uv_v:
  57634. mov BYTE PTR [rsp+rsi], 3
  57635. inc rsi
  57636. sub r13, r9
  57637. sbb r14, r10
  57638. sbb r15, r11
  57639. sbb rdi, r12
  57640. shrd r13, r14, 1
  57641. shrd r14, r15, 1
  57642. shrd r15, rdi, 1
  57643. shr rdi, 1
  57644. test r13b, 1
  57645. jnz L_256_mod_inv_4_vsubu_even_end
  57646. L_256_mod_inv_4_vsubu_even_start:
  57647. shrd r13, r14, 1
  57648. shrd r14, r15, 1
  57649. shrd r15, rdi, 1
  57650. shr rdi, 1
  57651. mov BYTE PTR [rsp+rsi], 1
  57652. inc rsi
  57653. test r13b, 1
  57654. jz L_256_mod_inv_4_vsubu_even_start
  57655. L_256_mod_inv_4_vsubu_even_end:
  57656. cmp r13, 1
  57657. jne L_256_mod_inv_4_uv_start
  57658. mov rdx, r14
  57659. or rdx, r15
  57660. jne L_256_mod_inv_4_uv_start
  57661. or rdx, rdi
  57662. jne L_256_mod_inv_4_uv_start
  57663. mov al, 0
  57664. L_256_mod_inv_4_uv_end:
  57665. mov r9, QWORD PTR [r8]
  57666. mov r10, QWORD PTR [r8+8]
  57667. mov r11, QWORD PTR [r8+16]
  57668. mov r12, QWORD PTR [r8+24]
  57669. mov r13, 1
  57670. xor r14, r14
  57671. xor r15, r15
  57672. xor rdi, rdi
  57673. mov BYTE PTR [rsp+rsi], 7
  57674. mov dl, BYTE PTR [rsp]
  57675. mov rsi, 1
  57676. cmp dl, 1
  57677. je L_256_mod_inv_4_op_div2_d
  57678. jl L_256_mod_inv_4_op_div2_b
  57679. cmp dl, 3
  57680. je L_256_mod_inv_4_op_d_sub_b
  57681. jl L_256_mod_inv_4_op_b_sub_d
  57682. jmp L_256_mod_inv_4_op_end
  57683. L_256_mod_inv_4_op_b_sub_d:
  57684. sub r9, r13
  57685. sbb r10, r14
  57686. sbb r11, r15
  57687. sbb r12, rdi
  57688. jnc L_256_mod_inv_4_op_div2_b
  57689. add r9, QWORD PTR [r8]
  57690. adc r10, QWORD PTR [r8+8]
  57691. adc r11, QWORD PTR [r8+16]
  57692. adc r12, QWORD PTR [r8+24]
  57693. L_256_mod_inv_4_op_div2_b:
  57694. test r9b, 1
  57695. mov rdx, 0
  57696. jz L_256_mod_inv_4_op_div2_b_mod
  57697. add r9, QWORD PTR [r8]
  57698. adc r10, QWORD PTR [r8+8]
  57699. adc r11, QWORD PTR [r8+16]
  57700. adc r12, QWORD PTR [r8+24]
  57701. adc rdx, 0
  57702. L_256_mod_inv_4_op_div2_b_mod:
  57703. shrd r9, r10, 1
  57704. shrd r10, r11, 1
  57705. shrd r11, r12, 1
  57706. shrd r12, rdx, 1
  57707. mov dl, BYTE PTR [rsp+rsi]
  57708. inc rsi
  57709. cmp dl, 1
  57710. je L_256_mod_inv_4_op_div2_d
  57711. jl L_256_mod_inv_4_op_div2_b
  57712. cmp dl, 3
  57713. je L_256_mod_inv_4_op_d_sub_b
  57714. jl L_256_mod_inv_4_op_b_sub_d
  57715. jmp L_256_mod_inv_4_op_end
  57716. L_256_mod_inv_4_op_d_sub_b:
  57717. sub r13, r9
  57718. sbb r14, r10
  57719. sbb r15, r11
  57720. sbb rdi, r12
  57721. jnc L_256_mod_inv_4_op_div2_d
  57722. add r13, QWORD PTR [r8]
  57723. adc r14, QWORD PTR [r8+8]
  57724. adc r15, QWORD PTR [r8+16]
  57725. adc rdi, QWORD PTR [r8+24]
  57726. L_256_mod_inv_4_op_div2_d:
  57727. test r13b, 1
  57728. mov rdx, 0
  57729. jz L_256_mod_inv_4_op_div2_d_mod
  57730. add r13, QWORD PTR [r8]
  57731. adc r14, QWORD PTR [r8+8]
  57732. adc r15, QWORD PTR [r8+16]
  57733. adc rdi, QWORD PTR [r8+24]
  57734. adc rdx, 0
  57735. L_256_mod_inv_4_op_div2_d_mod:
  57736. shrd r13, r14, 1
  57737. shrd r14, r15, 1
  57738. shrd r15, rdi, 1
  57739. shrd rdi, rdx, 1
  57740. mov dl, BYTE PTR [rsp+rsi]
  57741. inc rsi
  57742. cmp dl, 1
  57743. je L_256_mod_inv_4_op_div2_d
  57744. jl L_256_mod_inv_4_op_div2_b
  57745. cmp dl, 3
  57746. je L_256_mod_inv_4_op_d_sub_b
  57747. jl L_256_mod_inv_4_op_b_sub_d
  57748. L_256_mod_inv_4_op_end:
  57749. cmp al, 1
  57750. jne L_256_mod_inv_4_store_d
  57751. mov QWORD PTR [rcx], r9
  57752. mov QWORD PTR [rcx+8], r10
  57753. mov QWORD PTR [rcx+16], r11
  57754. mov QWORD PTR [rcx+24], r12
  57755. jmp L_256_mod_inv_4_store_end
  57756. L_256_mod_inv_4_store_d:
  57757. mov QWORD PTR [rcx], r13
  57758. mov QWORD PTR [rcx+8], r14
  57759. mov QWORD PTR [rcx+16], r15
  57760. mov QWORD PTR [rcx+24], rdi
  57761. L_256_mod_inv_4_store_end:
  57762. add rsp, 513
  57763. pop rsi
  57764. pop rdi
  57765. pop r15
  57766. pop r14
  57767. pop r13
  57768. pop r12
  57769. ret
  57770. sp_256_mod_inv_4 ENDP
  57771. _text ENDS
  57772. IFDEF HAVE_INTEL_AVX2
  57773. _DATA SEGMENT
  57774. ALIGN 16
  57775. L_sp256_mod_inv_avx2_4_order DWORD 6497617,32001851,62711546,67108863,67043328,0,0,0,41070783,45522014,67108863,1023,4194303,0,0,0
  57776. ptr_L_sp256_mod_inv_avx2_4_order QWORD L_sp256_mod_inv_avx2_4_order
  57777. _DATA ENDS
  57778. _DATA SEGMENT
  57779. ALIGN 16
  57780. L_sp256_mod_inv_avx2_4_one QWORD 1, 0,
  57781. 0, 0
  57782. ptr_L_sp256_mod_inv_avx2_4_one QWORD L_sp256_mod_inv_avx2_4_one
  57783. _DATA ENDS
  57784. _DATA SEGMENT
  57785. ALIGN 16
  57786. L_sp256_mod_inv_avx2_4_all_one DWORD 1,1,1,1,1,1,1,1
  57787. ptr_L_sp256_mod_inv_avx2_4_all_one QWORD L_sp256_mod_inv_avx2_4_all_one
  57788. _DATA ENDS
  57789. _DATA SEGMENT
  57790. ALIGN 16
  57791. L_sp256_mod_inv_avx2_4_mask01111 DWORD 0,1,1,1,1,0,0,0
  57792. ptr_L_sp256_mod_inv_avx2_4_mask01111 QWORD L_sp256_mod_inv_avx2_4_mask01111
  57793. _DATA ENDS
  57794. _DATA SEGMENT
  57795. ALIGN 16
  57796. L_sp256_mod_inv_avx2_4_down_one_dword DWORD 1,2,3,4,5,6,7,7
  57797. ptr_L_sp256_mod_inv_avx2_4_down_one_dword QWORD L_sp256_mod_inv_avx2_4_down_one_dword
  57798. _DATA ENDS
  57799. _DATA SEGMENT
  57800. ALIGN 16
  57801. L_sp256_mod_inv_avx2_4_neg DWORD 0,0,0,0,2147483648,0,0,0
  57802. ptr_L_sp256_mod_inv_avx2_4_neg QWORD L_sp256_mod_inv_avx2_4_neg
  57803. _DATA ENDS
  57804. _DATA SEGMENT
  57805. ALIGN 16
  57806. L_sp256_mod_inv_avx2_4_up_one_dword DWORD 7,0,1,2,3,7,7,7
  57807. ptr_L_sp256_mod_inv_avx2_4_up_one_dword QWORD L_sp256_mod_inv_avx2_4_up_one_dword
  57808. _DATA ENDS
  57809. _DATA SEGMENT
  57810. ALIGN 16
  57811. L_sp256_mod_inv_avx2_4_mask26 DWORD 67108863,67108863,67108863,67108863,67108863,0,0,0
  57812. ptr_L_sp256_mod_inv_avx2_4_mask26 QWORD L_sp256_mod_inv_avx2_4_mask26
  57813. _DATA ENDS
  57814. ; /* Non-constant time modular inversion.
  57815. ; *
  57816. ; * @param [out] r Resulting number.
  57817. ; * @param [in] a Number to invert.
  57818. ; * @param [in] m Modulus.
  57819. ; * @return MP_OKAY on success.
  57820. ; */
  57821. _text SEGMENT READONLY PARA
  57822. sp_256_mod_inv_avx2_4 PROC
  57823. push r12
  57824. push r13
  57825. push r14
  57826. push r15
  57827. push rdi
  57828. push rsi
  57829. push rbx
  57830. sub rsp, 144
  57831. vmovdqu OWORD PTR [rsp], xmm6
  57832. vmovdqu OWORD PTR [rsp+16], xmm7
  57833. vmovdqu OWORD PTR [rsp+32], xmm8
  57834. vmovdqu OWORD PTR [rsp+48], xmm9
  57835. vmovdqu OWORD PTR [rsp+64], xmm10
  57836. vmovdqu OWORD PTR [rsp+80], xmm11
  57837. vmovdqu OWORD PTR [rsp+96], xmm12
  57838. vmovdqu OWORD PTR [rsp+112], xmm13
  57839. vmovdqu OWORD PTR [rsp+128], xmm14
  57840. mov rax, QWORD PTR [r8]
  57841. mov r9, QWORD PTR [r8+8]
  57842. mov r10, QWORD PTR [r8+16]
  57843. mov r11, QWORD PTR [r8+24]
  57844. mov r12, QWORD PTR [rdx]
  57845. mov r13, QWORD PTR [rdx+8]
  57846. mov r14, QWORD PTR [rdx+16]
  57847. mov r15, QWORD PTR [rdx+24]
  57848. mov rbx, ptr_L_sp256_mod_inv_avx2_4_order
  57849. vmovupd ymm6, YMMWORD PTR [rbx]
  57850. vmovupd ymm7, YMMWORD PTR [rbx+32]
  57851. mov rbx, ptr_L_sp256_mod_inv_avx2_4_one
  57852. vmovupd ymm8, YMMWORD PTR [rbx]
  57853. mov rbx, ptr_L_sp256_mod_inv_avx2_4_mask01111
  57854. vmovupd ymm9, YMMWORD PTR [rbx]
  57855. mov rbx, ptr_L_sp256_mod_inv_avx2_4_all_one
  57856. vmovupd ymm10, YMMWORD PTR [rbx]
  57857. mov rbx, ptr_L_sp256_mod_inv_avx2_4_down_one_dword
  57858. vmovupd ymm11, YMMWORD PTR [rbx]
  57859. mov rbx, ptr_L_sp256_mod_inv_avx2_4_neg
  57860. vmovupd ymm12, YMMWORD PTR [rbx]
  57861. mov rbx, ptr_L_sp256_mod_inv_avx2_4_up_one_dword
  57862. vmovupd ymm13, YMMWORD PTR [rbx]
  57863. mov rbx, ptr_L_sp256_mod_inv_avx2_4_mask26
  57864. vmovupd ymm14, YMMWORD PTR [rbx]
  57865. vpxor xmm0, xmm0, xmm0
  57866. vpxor xmm1, xmm1, xmm1
  57867. vmovdqu ymm2, ymm8
  57868. vpxor xmm3, xmm3, xmm3
  57869. test r12b, 1
  57870. jnz L_256_mod_inv_avx2_4_v_even_end
  57871. L_256_mod_inv_avx2_4_v_even_start:
  57872. shrd r12, r13, 1
  57873. shrd r13, r14, 1
  57874. shrd r14, r15, 1
  57875. shr r15, 1
  57876. vptest ymm2, ymm8
  57877. jz L_256_mod_inv_avx2_4_v_even_shr1
  57878. vpaddd ymm2, ymm2, ymm6
  57879. vpaddd ymm3, ymm3, ymm7
  57880. L_256_mod_inv_avx2_4_v_even_shr1:
  57881. vpand ymm4, ymm2, ymm9
  57882. vpand ymm5, ymm3, ymm10
  57883. vpermd ymm4, ymm11, ymm4
  57884. vpsrad ymm2, ymm2, 1
  57885. vpsrad ymm3, ymm3, 1
  57886. vpslld ymm5, ymm5, 25
  57887. vpslld xmm4, xmm4, 25
  57888. vpaddd ymm2, ymm2, ymm5
  57889. vpaddd ymm3, ymm3, ymm4
  57890. test r12b, 1
  57891. jz L_256_mod_inv_avx2_4_v_even_start
  57892. L_256_mod_inv_avx2_4_v_even_end:
  57893. L_256_mod_inv_avx2_4_uv_start:
  57894. cmp r11, r15
  57895. jb L_256_mod_inv_avx2_4_uv_v
  57896. ja L_256_mod_inv_avx2_4_uv_u
  57897. cmp r10, r14
  57898. jb L_256_mod_inv_avx2_4_uv_v
  57899. ja L_256_mod_inv_avx2_4_uv_u
  57900. cmp r9, r13
  57901. jb L_256_mod_inv_avx2_4_uv_v
  57902. ja L_256_mod_inv_avx2_4_uv_u
  57903. cmp rax, r12
  57904. jb L_256_mod_inv_avx2_4_uv_v
  57905. L_256_mod_inv_avx2_4_uv_u:
  57906. sub rax, r12
  57907. sbb r9, r13
  57908. vpsubd ymm0, ymm0, ymm2
  57909. sbb r10, r14
  57910. vpsubd ymm1, ymm1, ymm3
  57911. sbb r11, r15
  57912. vptest ymm1, ymm12
  57913. jz L_256_mod_inv_avx2_4_usubv_done_neg
  57914. vpaddd ymm0, ymm0, ymm6
  57915. vpaddd ymm1, ymm1, ymm7
  57916. L_256_mod_inv_avx2_4_usubv_done_neg:
  57917. L_256_mod_inv_avx2_4_usubv_shr1:
  57918. shrd rax, r9, 1
  57919. shrd r9, r10, 1
  57920. shrd r10, r11, 1
  57921. shr r11, 1
  57922. vptest ymm0, ymm8
  57923. jz L_256_mod_inv_avx2_4_usubv_sub_shr1
  57924. vpaddd ymm0, ymm0, ymm6
  57925. vpaddd ymm1, ymm1, ymm7
  57926. L_256_mod_inv_avx2_4_usubv_sub_shr1:
  57927. vpand ymm4, ymm0, ymm9
  57928. vpand ymm5, ymm1, ymm10
  57929. vpermd ymm4, ymm11, ymm4
  57930. vpsrad ymm0, ymm0, 1
  57931. vpsrad ymm1, ymm1, 1
  57932. vpslld ymm5, ymm5, 25
  57933. vpslld xmm4, xmm4, 25
  57934. vpaddd ymm0, ymm0, ymm5
  57935. vpaddd ymm1, ymm1, ymm4
  57936. test al, 1
  57937. jz L_256_mod_inv_avx2_4_usubv_shr1
  57938. cmp rax, 1
  57939. jne L_256_mod_inv_avx2_4_uv_start
  57940. mov rdx, r9
  57941. or rdx, r10
  57942. jne L_256_mod_inv_avx2_4_uv_start
  57943. or rdx, r11
  57944. jne L_256_mod_inv_avx2_4_uv_start
  57945. vpextrd eax, xmm0, 0
  57946. vpextrd r10d, xmm0, 1
  57947. vpextrd r12d, xmm0, 2
  57948. vpextrd r14d, xmm0, 3
  57949. vpextrd r9d, xmm1, 0
  57950. vpextrd r11d, xmm1, 1
  57951. vpextrd r13d, xmm1, 2
  57952. vpextrd r15d, xmm1, 3
  57953. vextracti128 xmm0, ymm0, 1
  57954. vextracti128 xmm1, ymm1, 1
  57955. vpextrd edi, xmm0, 0
  57956. vpextrd esi, xmm1, 0
  57957. jmp L_256_mod_inv_avx2_4_store_done
  57958. L_256_mod_inv_avx2_4_uv_v:
  57959. sub r12, rax
  57960. sbb r13, r9
  57961. vpsubd ymm2, ymm2, ymm0
  57962. sbb r14, r10
  57963. vpsubd ymm3, ymm3, ymm1
  57964. sbb r15, r11
  57965. vptest ymm3, ymm12
  57966. jz L_256_mod_inv_avx2_4_vsubu_done_neg
  57967. vpaddd ymm2, ymm2, ymm6
  57968. vpaddd ymm3, ymm3, ymm7
  57969. L_256_mod_inv_avx2_4_vsubu_done_neg:
  57970. L_256_mod_inv_avx2_4_vsubu_shr1:
  57971. shrd r12, r13, 1
  57972. shrd r13, r14, 1
  57973. shrd r14, r15, 1
  57974. shr r15, 1
  57975. vptest ymm2, ymm8
  57976. jz L_256_mod_inv_avx2_4_vsubu_sub_shr1
  57977. vpaddd ymm2, ymm2, ymm6
  57978. vpaddd ymm3, ymm3, ymm7
  57979. L_256_mod_inv_avx2_4_vsubu_sub_shr1:
  57980. vpand ymm4, ymm2, ymm9
  57981. vpand ymm5, ymm3, ymm10
  57982. vpermd ymm4, ymm11, ymm4
  57983. vpsrad ymm2, ymm2, 1
  57984. vpsrad ymm3, ymm3, 1
  57985. vpslld ymm5, ymm5, 25
  57986. vpslld xmm4, xmm4, 25
  57987. vpaddd ymm2, ymm2, ymm5
  57988. vpaddd ymm3, ymm3, ymm4
  57989. test r12b, 1
  57990. jz L_256_mod_inv_avx2_4_vsubu_shr1
  57991. cmp r12, 1
  57992. jne L_256_mod_inv_avx2_4_uv_start
  57993. mov rdx, r13
  57994. or rdx, r14
  57995. jne L_256_mod_inv_avx2_4_uv_start
  57996. or rdx, r15
  57997. jne L_256_mod_inv_avx2_4_uv_start
  57998. vpextrd eax, xmm2, 0
  57999. vpextrd r10d, xmm2, 1
  58000. vpextrd r12d, xmm2, 2
  58001. vpextrd r14d, xmm2, 3
  58002. vpextrd r9d, xmm3, 0
  58003. vpextrd r11d, xmm3, 1
  58004. vpextrd r13d, xmm3, 2
  58005. vpextrd r15d, xmm3, 3
  58006. vextracti128 xmm2, ymm2, 1
  58007. vextracti128 xmm3, ymm3, 1
  58008. vpextrd edi, xmm2, 0
  58009. vpextrd esi, xmm3, 0
  58010. L_256_mod_inv_avx2_4_store_done:
  58011. mov edx, eax
  58012. and eax, 67108863
  58013. sar edx, 26
  58014. add r9d, edx
  58015. mov edx, r9d
  58016. and r9d, 67108863
  58017. sar edx, 26
  58018. add r10d, edx
  58019. mov edx, r10d
  58020. and r10d, 67108863
  58021. sar edx, 26
  58022. add r11d, edx
  58023. mov edx, r11d
  58024. and r11d, 67108863
  58025. sar edx, 26
  58026. add r12d, edx
  58027. mov edx, r12d
  58028. and r12d, 67108863
  58029. sar edx, 26
  58030. add r13d, edx
  58031. mov edx, r13d
  58032. and r13d, 67108863
  58033. sar edx, 26
  58034. add r14d, edx
  58035. mov edx, r14d
  58036. and r14d, 67108863
  58037. sar edx, 26
  58038. add r15d, edx
  58039. mov edx, r15d
  58040. and r15d, 67108863
  58041. sar edx, 26
  58042. add edi, edx
  58043. mov edx, edi
  58044. and edi, 67108863
  58045. sar edx, 26
  58046. add esi, edx
  58047. movsxd r9, r9d
  58048. movsxd r11, r11d
  58049. movsxd r13, r13d
  58050. movsxd r15, r15d
  58051. movsxd rsi, esi
  58052. shl r9, 26
  58053. shl r11, 26
  58054. shl r13, 26
  58055. shl r15, 26
  58056. shl rsi, 26
  58057. movsxd rax, eax
  58058. add rax, r9
  58059. movsxd r10, r10d
  58060. adc r10, r11
  58061. movsxd r12, r12d
  58062. adc r12, r13
  58063. movsxd r14, r14d
  58064. adc r14, r15
  58065. movsxd rdi, edi
  58066. adc rdi, rsi
  58067. jge L_256_mod_inv_avx2_4_3_no_add_order
  58068. mov r9, 2756213597218129
  58069. mov r11, 3054930678533947
  58070. mov r13, 4503599622973178
  58071. mov r15, 68719476735
  58072. mov rsi, 281474976645120
  58073. add rax, r9
  58074. add r10, r11
  58075. add r12, r13
  58076. add r14, r15
  58077. add rdi, rsi
  58078. mov rdx, 4503599627370495
  58079. mov r9, rax
  58080. and rax, rdx
  58081. sar r9, 52
  58082. add r10, r9
  58083. mov r11, r10
  58084. and r10, rdx
  58085. sar r11, 52
  58086. add r12, r11
  58087. mov r13, r12
  58088. and r12, rdx
  58089. sar r13, 52
  58090. add r14, r13
  58091. mov r15, r14
  58092. and r14, rdx
  58093. sar r15, 52
  58094. add rdi, r15
  58095. L_256_mod_inv_avx2_4_3_no_add_order:
  58096. mov r9, r10
  58097. mov r11, r12
  58098. mov r13, r14
  58099. shl r9, 52
  58100. sar r10, 12
  58101. shl r11, 40
  58102. sar r12, 24
  58103. shl r13, 28
  58104. sar r14, 36
  58105. shl rdi, 16
  58106. add rax, r9
  58107. adc r10, r11
  58108. adc r12, r13
  58109. adc r14, rdi
  58110. mov QWORD PTR [rcx], rax
  58111. mov QWORD PTR [rcx+8], r10
  58112. mov QWORD PTR [rcx+16], r12
  58113. mov QWORD PTR [rcx+24], r14
  58114. vmovdqu xmm6, OWORD PTR [rsp]
  58115. vmovdqu xmm7, OWORD PTR [rsp+16]
  58116. vmovdqu xmm8, OWORD PTR [rsp+32]
  58117. vmovdqu xmm9, OWORD PTR [rsp+48]
  58118. vmovdqu xmm10, OWORD PTR [rsp+64]
  58119. vmovdqu xmm11, OWORD PTR [rsp+80]
  58120. vmovdqu xmm12, OWORD PTR [rsp+96]
  58121. vmovdqu xmm13, OWORD PTR [rsp+112]
  58122. vmovdqu xmm14, OWORD PTR [rsp+128]
  58123. add rsp, 144
  58124. pop rbx
  58125. pop rsi
  58126. pop rdi
  58127. pop r15
  58128. pop r14
  58129. pop r13
  58130. pop r12
  58131. ret
  58132. sp_256_mod_inv_avx2_4 ENDP
  58133. _text ENDS
  58134. ENDIF
  58135. ENDIF
  58136. IFDEF WOLFSSL_SP_384
  58137. ; /* Multiply a and b into r. (r = a * b)
  58138. ; *
  58139. ; * r A single precision integer.
  58140. ; * a A single precision integer.
  58141. ; * b A single precision integer.
  58142. ; */
  58143. _text SEGMENT READONLY PARA
  58144. sp_384_mul_6 PROC
  58145. push r12
  58146. mov r9, rdx
  58147. sub rsp, 48
  58148. ; A[0] * B[0]
  58149. mov rax, QWORD PTR [r8]
  58150. mul QWORD PTR [r9]
  58151. xor r12, r12
  58152. mov QWORD PTR [rsp], rax
  58153. mov r11, rdx
  58154. ; A[0] * B[1]
  58155. mov rax, QWORD PTR [r8+8]
  58156. mul QWORD PTR [r9]
  58157. xor r10, r10
  58158. add r11, rax
  58159. adc r12, rdx
  58160. adc r10, 0
  58161. ; A[1] * B[0]
  58162. mov rax, QWORD PTR [r8]
  58163. mul QWORD PTR [r9+8]
  58164. add r11, rax
  58165. adc r12, rdx
  58166. adc r10, 0
  58167. mov QWORD PTR [rsp+8], r11
  58168. ; A[0] * B[2]
  58169. mov rax, QWORD PTR [r8+16]
  58170. mul QWORD PTR [r9]
  58171. xor r11, r11
  58172. add r12, rax
  58173. adc r10, rdx
  58174. adc r11, 0
  58175. ; A[1] * B[1]
  58176. mov rax, QWORD PTR [r8+8]
  58177. mul QWORD PTR [r9+8]
  58178. add r12, rax
  58179. adc r10, rdx
  58180. adc r11, 0
  58181. ; A[2] * B[0]
  58182. mov rax, QWORD PTR [r8]
  58183. mul QWORD PTR [r9+16]
  58184. add r12, rax
  58185. adc r10, rdx
  58186. adc r11, 0
  58187. mov QWORD PTR [rsp+16], r12
  58188. ; A[0] * B[3]
  58189. mov rax, QWORD PTR [r8+24]
  58190. mul QWORD PTR [r9]
  58191. xor r12, r12
  58192. add r10, rax
  58193. adc r11, rdx
  58194. adc r12, 0
  58195. ; A[1] * B[2]
  58196. mov rax, QWORD PTR [r8+16]
  58197. mul QWORD PTR [r9+8]
  58198. add r10, rax
  58199. adc r11, rdx
  58200. adc r12, 0
  58201. ; A[2] * B[1]
  58202. mov rax, QWORD PTR [r8+8]
  58203. mul QWORD PTR [r9+16]
  58204. add r10, rax
  58205. adc r11, rdx
  58206. adc r12, 0
  58207. ; A[3] * B[0]
  58208. mov rax, QWORD PTR [r8]
  58209. mul QWORD PTR [r9+24]
  58210. add r10, rax
  58211. adc r11, rdx
  58212. adc r12, 0
  58213. mov QWORD PTR [rsp+24], r10
  58214. ; A[0] * B[4]
  58215. mov rax, QWORD PTR [r8+32]
  58216. mul QWORD PTR [r9]
  58217. xor r10, r10
  58218. add r11, rax
  58219. adc r12, rdx
  58220. adc r10, 0
  58221. ; A[1] * B[3]
  58222. mov rax, QWORD PTR [r8+24]
  58223. mul QWORD PTR [r9+8]
  58224. add r11, rax
  58225. adc r12, rdx
  58226. adc r10, 0
  58227. ; A[2] * B[2]
  58228. mov rax, QWORD PTR [r8+16]
  58229. mul QWORD PTR [r9+16]
  58230. add r11, rax
  58231. adc r12, rdx
  58232. adc r10, 0
  58233. ; A[3] * B[1]
  58234. mov rax, QWORD PTR [r8+8]
  58235. mul QWORD PTR [r9+24]
  58236. add r11, rax
  58237. adc r12, rdx
  58238. adc r10, 0
  58239. ; A[4] * B[0]
  58240. mov rax, QWORD PTR [r8]
  58241. mul QWORD PTR [r9+32]
  58242. add r11, rax
  58243. adc r12, rdx
  58244. adc r10, 0
  58245. mov QWORD PTR [rsp+32], r11
  58246. ; A[0] * B[5]
  58247. mov rax, QWORD PTR [r8+40]
  58248. mul QWORD PTR [r9]
  58249. xor r11, r11
  58250. add r12, rax
  58251. adc r10, rdx
  58252. adc r11, 0
  58253. ; A[1] * B[4]
  58254. mov rax, QWORD PTR [r8+32]
  58255. mul QWORD PTR [r9+8]
  58256. add r12, rax
  58257. adc r10, rdx
  58258. adc r11, 0
  58259. ; A[2] * B[3]
  58260. mov rax, QWORD PTR [r8+24]
  58261. mul QWORD PTR [r9+16]
  58262. add r12, rax
  58263. adc r10, rdx
  58264. adc r11, 0
  58265. ; A[3] * B[2]
  58266. mov rax, QWORD PTR [r8+16]
  58267. mul QWORD PTR [r9+24]
  58268. add r12, rax
  58269. adc r10, rdx
  58270. adc r11, 0
  58271. ; A[4] * B[1]
  58272. mov rax, QWORD PTR [r8+8]
  58273. mul QWORD PTR [r9+32]
  58274. add r12, rax
  58275. adc r10, rdx
  58276. adc r11, 0
  58277. ; A[5] * B[0]
  58278. mov rax, QWORD PTR [r8]
  58279. mul QWORD PTR [r9+40]
  58280. add r12, rax
  58281. adc r10, rdx
  58282. adc r11, 0
  58283. mov QWORD PTR [rsp+40], r12
  58284. ; A[1] * B[5]
  58285. mov rax, QWORD PTR [r8+40]
  58286. mul QWORD PTR [r9+8]
  58287. xor r12, r12
  58288. add r10, rax
  58289. adc r11, rdx
  58290. adc r12, 0
  58291. ; A[2] * B[4]
  58292. mov rax, QWORD PTR [r8+32]
  58293. mul QWORD PTR [r9+16]
  58294. add r10, rax
  58295. adc r11, rdx
  58296. adc r12, 0
  58297. ; A[3] * B[3]
  58298. mov rax, QWORD PTR [r8+24]
  58299. mul QWORD PTR [r9+24]
  58300. add r10, rax
  58301. adc r11, rdx
  58302. adc r12, 0
  58303. ; A[4] * B[2]
  58304. mov rax, QWORD PTR [r8+16]
  58305. mul QWORD PTR [r9+32]
  58306. add r10, rax
  58307. adc r11, rdx
  58308. adc r12, 0
  58309. ; A[5] * B[1]
  58310. mov rax, QWORD PTR [r8+8]
  58311. mul QWORD PTR [r9+40]
  58312. add r10, rax
  58313. adc r11, rdx
  58314. adc r12, 0
  58315. mov QWORD PTR [rcx+48], r10
  58316. ; A[2] * B[5]
  58317. mov rax, QWORD PTR [r8+40]
  58318. mul QWORD PTR [r9+16]
  58319. xor r10, r10
  58320. add r11, rax
  58321. adc r12, rdx
  58322. adc r10, 0
  58323. ; A[3] * B[4]
  58324. mov rax, QWORD PTR [r8+32]
  58325. mul QWORD PTR [r9+24]
  58326. add r11, rax
  58327. adc r12, rdx
  58328. adc r10, 0
  58329. ; A[4] * B[3]
  58330. mov rax, QWORD PTR [r8+24]
  58331. mul QWORD PTR [r9+32]
  58332. add r11, rax
  58333. adc r12, rdx
  58334. adc r10, 0
  58335. ; A[5] * B[2]
  58336. mov rax, QWORD PTR [r8+16]
  58337. mul QWORD PTR [r9+40]
  58338. add r11, rax
  58339. adc r12, rdx
  58340. adc r10, 0
  58341. mov QWORD PTR [rcx+56], r11
  58342. ; A[3] * B[5]
  58343. mov rax, QWORD PTR [r8+40]
  58344. mul QWORD PTR [r9+24]
  58345. xor r11, r11
  58346. add r12, rax
  58347. adc r10, rdx
  58348. adc r11, 0
  58349. ; A[4] * B[4]
  58350. mov rax, QWORD PTR [r8+32]
  58351. mul QWORD PTR [r9+32]
  58352. add r12, rax
  58353. adc r10, rdx
  58354. adc r11, 0
  58355. ; A[5] * B[3]
  58356. mov rax, QWORD PTR [r8+24]
  58357. mul QWORD PTR [r9+40]
  58358. add r12, rax
  58359. adc r10, rdx
  58360. adc r11, 0
  58361. mov QWORD PTR [rcx+64], r12
  58362. ; A[4] * B[5]
  58363. mov rax, QWORD PTR [r8+40]
  58364. mul QWORD PTR [r9+32]
  58365. xor r12, r12
  58366. add r10, rax
  58367. adc r11, rdx
  58368. adc r12, 0
  58369. ; A[5] * B[4]
  58370. mov rax, QWORD PTR [r8+32]
  58371. mul QWORD PTR [r9+40]
  58372. add r10, rax
  58373. adc r11, rdx
  58374. adc r12, 0
  58375. mov QWORD PTR [rcx+72], r10
  58376. ; A[5] * B[5]
  58377. mov rax, QWORD PTR [r8+40]
  58378. mul QWORD PTR [r9+40]
  58379. add r11, rax
  58380. adc r12, rdx
  58381. mov QWORD PTR [rcx+80], r11
  58382. mov QWORD PTR [rcx+88], r12
  58383. mov rax, QWORD PTR [rsp]
  58384. mov rdx, QWORD PTR [rsp+8]
  58385. mov r10, QWORD PTR [rsp+16]
  58386. mov r11, QWORD PTR [rsp+24]
  58387. mov QWORD PTR [rcx], rax
  58388. mov QWORD PTR [rcx+8], rdx
  58389. mov QWORD PTR [rcx+16], r10
  58390. mov QWORD PTR [rcx+24], r11
  58391. mov rax, QWORD PTR [rsp+32]
  58392. mov rdx, QWORD PTR [rsp+40]
  58393. mov QWORD PTR [rcx+32], rax
  58394. mov QWORD PTR [rcx+40], rdx
  58395. add rsp, 48
  58396. pop r12
  58397. ret
  58398. sp_384_mul_6 ENDP
  58399. _text ENDS
  58400. IFDEF HAVE_INTEL_AVX2
  58401. ; /* Multiply a and b into r. (r = a * b)
  58402. ; *
  58403. ; * r Result of multiplication.
  58404. ; * a First number to multiply.
  58405. ; * b Second number to multiply.
  58406. ; */
  58407. _text SEGMENT READONLY PARA
  58408. sp_384_mul_avx2_6 PROC
  58409. push r12
  58410. push r13
  58411. push r14
  58412. push r15
  58413. push rdi
  58414. push rsi
  58415. push rbx
  58416. mov rax, rdx
  58417. sub rsp, 40
  58418. xor rbx, rbx
  58419. mov rdx, QWORD PTR [rax]
  58420. ; A[0] * B[0]
  58421. mulx r12, r11, QWORD PTR [r8]
  58422. ; A[0] * B[1]
  58423. mulx r13, r9, QWORD PTR [r8+8]
  58424. adcx r12, r9
  58425. ; A[0] * B[2]
  58426. mulx r14, r9, QWORD PTR [r8+16]
  58427. adcx r13, r9
  58428. ; A[0] * B[3]
  58429. mulx r15, r9, QWORD PTR [r8+24]
  58430. adcx r14, r9
  58431. ; A[0] * B[4]
  58432. mulx rdi, r9, QWORD PTR [r8+32]
  58433. adcx r15, r9
  58434. ; A[0] * B[5]
  58435. mulx rsi, r9, QWORD PTR [r8+40]
  58436. adcx rdi, r9
  58437. adcx rsi, rbx
  58438. mov QWORD PTR [rsp], r11
  58439. mov r11, 0
  58440. adcx r11, rbx
  58441. xor rbx, rbx
  58442. mov rdx, QWORD PTR [rax+8]
  58443. ; A[1] * B[0]
  58444. mulx r10, r9, QWORD PTR [r8]
  58445. adcx r12, r9
  58446. adox r13, r10
  58447. ; A[1] * B[1]
  58448. mulx r10, r9, QWORD PTR [r8+8]
  58449. adcx r13, r9
  58450. adox r14, r10
  58451. ; A[1] * B[2]
  58452. mulx r10, r9, QWORD PTR [r8+16]
  58453. adcx r14, r9
  58454. adox r15, r10
  58455. ; A[1] * B[3]
  58456. mulx r10, r9, QWORD PTR [r8+24]
  58457. adcx r15, r9
  58458. adox rdi, r10
  58459. ; A[1] * B[4]
  58460. mulx r10, r9, QWORD PTR [r8+32]
  58461. adcx rdi, r9
  58462. adox rsi, r10
  58463. ; A[1] * B[5]
  58464. mulx r10, r9, QWORD PTR [r8+40]
  58465. adcx rsi, r9
  58466. adox r11, r10
  58467. adcx r11, rbx
  58468. mov QWORD PTR [rsp+8], r12
  58469. mov r12, 0
  58470. adcx r12, rbx
  58471. adox r12, rbx
  58472. xor rbx, rbx
  58473. mov rdx, QWORD PTR [rax+16]
  58474. ; A[2] * B[0]
  58475. mulx r10, r9, QWORD PTR [r8]
  58476. adcx r13, r9
  58477. adox r14, r10
  58478. ; A[2] * B[1]
  58479. mulx r10, r9, QWORD PTR [r8+8]
  58480. adcx r14, r9
  58481. adox r15, r10
  58482. ; A[2] * B[2]
  58483. mulx r10, r9, QWORD PTR [r8+16]
  58484. adcx r15, r9
  58485. adox rdi, r10
  58486. ; A[2] * B[3]
  58487. mulx r10, r9, QWORD PTR [r8+24]
  58488. adcx rdi, r9
  58489. adox rsi, r10
  58490. ; A[2] * B[4]
  58491. mulx r10, r9, QWORD PTR [r8+32]
  58492. adcx rsi, r9
  58493. adox r11, r10
  58494. ; A[2] * B[5]
  58495. mulx r10, r9, QWORD PTR [r8+40]
  58496. adcx r11, r9
  58497. adox r12, r10
  58498. adcx r12, rbx
  58499. mov QWORD PTR [rsp+16], r13
  58500. mov r13, 0
  58501. adcx r13, rbx
  58502. adox r13, rbx
  58503. xor rbx, rbx
  58504. mov rdx, QWORD PTR [rax+24]
  58505. ; A[3] * B[0]
  58506. mulx r10, r9, QWORD PTR [r8]
  58507. adcx r14, r9
  58508. adox r15, r10
  58509. ; A[3] * B[1]
  58510. mulx r10, r9, QWORD PTR [r8+8]
  58511. adcx r15, r9
  58512. adox rdi, r10
  58513. ; A[3] * B[2]
  58514. mulx r10, r9, QWORD PTR [r8+16]
  58515. adcx rdi, r9
  58516. adox rsi, r10
  58517. ; A[3] * B[3]
  58518. mulx r10, r9, QWORD PTR [r8+24]
  58519. adcx rsi, r9
  58520. adox r11, r10
  58521. ; A[3] * B[4]
  58522. mulx r10, r9, QWORD PTR [r8+32]
  58523. adcx r11, r9
  58524. adox r12, r10
  58525. ; A[3] * B[5]
  58526. mulx r10, r9, QWORD PTR [r8+40]
  58527. adcx r12, r9
  58528. adox r13, r10
  58529. adcx r13, rbx
  58530. mov QWORD PTR [rsp+24], r14
  58531. mov r14, 0
  58532. adcx r14, rbx
  58533. adox r14, rbx
  58534. xor rbx, rbx
  58535. mov rdx, QWORD PTR [rax+32]
  58536. ; A[4] * B[0]
  58537. mulx r10, r9, QWORD PTR [r8]
  58538. adcx r15, r9
  58539. adox rdi, r10
  58540. ; A[4] * B[1]
  58541. mulx r10, r9, QWORD PTR [r8+8]
  58542. adcx rdi, r9
  58543. adox rsi, r10
  58544. ; A[4] * B[2]
  58545. mulx r10, r9, QWORD PTR [r8+16]
  58546. adcx rsi, r9
  58547. adox r11, r10
  58548. ; A[4] * B[3]
  58549. mulx r10, r9, QWORD PTR [r8+24]
  58550. adcx r11, r9
  58551. adox r12, r10
  58552. ; A[4] * B[4]
  58553. mulx r10, r9, QWORD PTR [r8+32]
  58554. adcx r12, r9
  58555. adox r13, r10
  58556. ; A[4] * B[5]
  58557. mulx r10, r9, QWORD PTR [r8+40]
  58558. adcx r13, r9
  58559. adox r14, r10
  58560. adcx r14, rbx
  58561. mov QWORD PTR [rsp+32], r15
  58562. mov rdx, QWORD PTR [rax+40]
  58563. ; A[5] * B[0]
  58564. mulx r10, r9, QWORD PTR [r8]
  58565. adcx rdi, r9
  58566. adox rsi, r10
  58567. ; A[5] * B[1]
  58568. mulx r10, r9, QWORD PTR [r8+8]
  58569. adcx rsi, r9
  58570. adox r11, r10
  58571. ; A[5] * B[2]
  58572. mulx r10, r9, QWORD PTR [r8+16]
  58573. adcx r11, r9
  58574. adox r12, r10
  58575. ; A[5] * B[3]
  58576. mulx r10, r9, QWORD PTR [r8+24]
  58577. adcx r12, r9
  58578. adox r13, r10
  58579. ; A[5] * B[4]
  58580. mulx r10, r9, QWORD PTR [r8+32]
  58581. adcx r13, r9
  58582. adox r14, r10
  58583. ; A[5] * B[5]
  58584. mulx r15, r9, QWORD PTR [r8+40]
  58585. adcx r14, r9
  58586. adox r15, rbx
  58587. adcx r15, rbx
  58588. mov QWORD PTR [rcx+40], rdi
  58589. mov QWORD PTR [rcx+48], rsi
  58590. mov QWORD PTR [rcx+56], r11
  58591. mov QWORD PTR [rcx+64], r12
  58592. mov QWORD PTR [rcx+72], r13
  58593. mov QWORD PTR [rcx+80], r14
  58594. mov QWORD PTR [rcx+88], r15
  58595. mov r11, QWORD PTR [rsp]
  58596. mov r12, QWORD PTR [rsp+8]
  58597. mov r13, QWORD PTR [rsp+16]
  58598. mov r14, QWORD PTR [rsp+24]
  58599. mov r15, QWORD PTR [rsp+32]
  58600. mov QWORD PTR [rcx], r11
  58601. mov QWORD PTR [rcx+8], r12
  58602. mov QWORD PTR [rcx+16], r13
  58603. mov QWORD PTR [rcx+24], r14
  58604. mov QWORD PTR [rcx+32], r15
  58605. add rsp, 40
  58606. pop rbx
  58607. pop rsi
  58608. pop rdi
  58609. pop r15
  58610. pop r14
  58611. pop r13
  58612. pop r12
  58613. ret
  58614. sp_384_mul_avx2_6 ENDP
  58615. _text ENDS
  58616. ENDIF
  58617. ; /* Square a and put result in r. (r = a * a)
  58618. ; *
  58619. ; * r A single precision integer.
  58620. ; * a A single precision integer.
  58621. ; */
  58622. _text SEGMENT READONLY PARA
  58623. sp_384_sqr_6 PROC
  58624. push r12
  58625. push r13
  58626. push r14
  58627. mov r8, rdx
  58628. sub rsp, 48
  58629. ; A[0] * A[0]
  58630. mov rax, QWORD PTR [r8]
  58631. mul rax
  58632. xor r11, r11
  58633. mov QWORD PTR [rsp], rax
  58634. mov r10, rdx
  58635. ; A[0] * A[1]
  58636. mov rax, QWORD PTR [r8+8]
  58637. mul QWORD PTR [r8]
  58638. xor r9, r9
  58639. add r10, rax
  58640. adc r11, rdx
  58641. adc r9, 0
  58642. add r10, rax
  58643. adc r11, rdx
  58644. adc r9, 0
  58645. mov QWORD PTR [rsp+8], r10
  58646. ; A[0] * A[2]
  58647. mov rax, QWORD PTR [r8+16]
  58648. mul QWORD PTR [r8]
  58649. xor r10, r10
  58650. add r11, rax
  58651. adc r9, rdx
  58652. adc r10, 0
  58653. add r11, rax
  58654. adc r9, rdx
  58655. adc r10, 0
  58656. ; A[1] * A[1]
  58657. mov rax, QWORD PTR [r8+8]
  58658. mul rax
  58659. add r11, rax
  58660. adc r9, rdx
  58661. adc r10, 0
  58662. mov QWORD PTR [rsp+16], r11
  58663. ; A[0] * A[3]
  58664. mov rax, QWORD PTR [r8+24]
  58665. mul QWORD PTR [r8]
  58666. xor r11, r11
  58667. add r9, rax
  58668. adc r10, rdx
  58669. adc r11, 0
  58670. add r9, rax
  58671. adc r10, rdx
  58672. adc r11, 0
  58673. ; A[1] * A[2]
  58674. mov rax, QWORD PTR [r8+16]
  58675. mul QWORD PTR [r8+8]
  58676. add r9, rax
  58677. adc r10, rdx
  58678. adc r11, 0
  58679. add r9, rax
  58680. adc r10, rdx
  58681. adc r11, 0
  58682. mov QWORD PTR [rsp+24], r9
  58683. ; A[0] * A[4]
  58684. mov rax, QWORD PTR [r8+32]
  58685. mul QWORD PTR [r8]
  58686. xor r9, r9
  58687. add r10, rax
  58688. adc r11, rdx
  58689. adc r9, 0
  58690. add r10, rax
  58691. adc r11, rdx
  58692. adc r9, 0
  58693. ; A[1] * A[3]
  58694. mov rax, QWORD PTR [r8+24]
  58695. mul QWORD PTR [r8+8]
  58696. add r10, rax
  58697. adc r11, rdx
  58698. adc r9, 0
  58699. add r10, rax
  58700. adc r11, rdx
  58701. adc r9, 0
  58702. ; A[2] * A[2]
  58703. mov rax, QWORD PTR [r8+16]
  58704. mul rax
  58705. add r10, rax
  58706. adc r11, rdx
  58707. adc r9, 0
  58708. mov QWORD PTR [rsp+32], r10
  58709. ; A[0] * A[5]
  58710. mov rax, QWORD PTR [r8+40]
  58711. mul QWORD PTR [r8]
  58712. xor r10, r10
  58713. xor r14, r14
  58714. mov r12, rax
  58715. mov r13, rdx
  58716. ; A[1] * A[4]
  58717. mov rax, QWORD PTR [r8+32]
  58718. mul QWORD PTR [r8+8]
  58719. add r12, rax
  58720. adc r13, rdx
  58721. adc r14, 0
  58722. ; A[2] * A[3]
  58723. mov rax, QWORD PTR [r8+24]
  58724. mul QWORD PTR [r8+16]
  58725. add r12, rax
  58726. adc r13, rdx
  58727. adc r14, 0
  58728. add r12, r12
  58729. adc r13, r13
  58730. adc r14, r14
  58731. add r11, r12
  58732. adc r9, r13
  58733. adc r10, r14
  58734. mov QWORD PTR [rsp+40], r11
  58735. ; A[1] * A[5]
  58736. mov rax, QWORD PTR [r8+40]
  58737. mul QWORD PTR [r8+8]
  58738. xor r11, r11
  58739. add r9, rax
  58740. adc r10, rdx
  58741. adc r11, 0
  58742. add r9, rax
  58743. adc r10, rdx
  58744. adc r11, 0
  58745. ; A[2] * A[4]
  58746. mov rax, QWORD PTR [r8+32]
  58747. mul QWORD PTR [r8+16]
  58748. add r9, rax
  58749. adc r10, rdx
  58750. adc r11, 0
  58751. add r9, rax
  58752. adc r10, rdx
  58753. adc r11, 0
  58754. ; A[3] * A[3]
  58755. mov rax, QWORD PTR [r8+24]
  58756. mul rax
  58757. add r9, rax
  58758. adc r10, rdx
  58759. adc r11, 0
  58760. mov QWORD PTR [rcx+48], r9
  58761. ; A[2] * A[5]
  58762. mov rax, QWORD PTR [r8+40]
  58763. mul QWORD PTR [r8+16]
  58764. xor r9, r9
  58765. add r10, rax
  58766. adc r11, rdx
  58767. adc r9, 0
  58768. add r10, rax
  58769. adc r11, rdx
  58770. adc r9, 0
  58771. ; A[3] * A[4]
  58772. mov rax, QWORD PTR [r8+32]
  58773. mul QWORD PTR [r8+24]
  58774. add r10, rax
  58775. adc r11, rdx
  58776. adc r9, 0
  58777. add r10, rax
  58778. adc r11, rdx
  58779. adc r9, 0
  58780. mov QWORD PTR [rcx+56], r10
  58781. ; A[3] * A[5]
  58782. mov rax, QWORD PTR [r8+40]
  58783. mul QWORD PTR [r8+24]
  58784. xor r10, r10
  58785. add r11, rax
  58786. adc r9, rdx
  58787. adc r10, 0
  58788. add r11, rax
  58789. adc r9, rdx
  58790. adc r10, 0
  58791. ; A[4] * A[4]
  58792. mov rax, QWORD PTR [r8+32]
  58793. mul rax
  58794. add r11, rax
  58795. adc r9, rdx
  58796. adc r10, 0
  58797. mov QWORD PTR [rcx+64], r11
  58798. ; A[4] * A[5]
  58799. mov rax, QWORD PTR [r8+40]
  58800. mul QWORD PTR [r8+32]
  58801. xor r11, r11
  58802. add r9, rax
  58803. adc r10, rdx
  58804. adc r11, 0
  58805. add r9, rax
  58806. adc r10, rdx
  58807. adc r11, 0
  58808. mov QWORD PTR [rcx+72], r9
  58809. ; A[5] * A[5]
  58810. mov rax, QWORD PTR [r8+40]
  58811. mul rax
  58812. add r10, rax
  58813. adc r11, rdx
  58814. mov QWORD PTR [rcx+80], r10
  58815. mov QWORD PTR [rcx+88], r11
  58816. mov rax, QWORD PTR [rsp]
  58817. mov rdx, QWORD PTR [rsp+8]
  58818. mov r12, QWORD PTR [rsp+16]
  58819. mov r13, QWORD PTR [rsp+24]
  58820. mov QWORD PTR [rcx], rax
  58821. mov QWORD PTR [rcx+8], rdx
  58822. mov QWORD PTR [rcx+16], r12
  58823. mov QWORD PTR [rcx+24], r13
  58824. mov rax, QWORD PTR [rsp+32]
  58825. mov rdx, QWORD PTR [rsp+40]
  58826. mov QWORD PTR [rcx+32], rax
  58827. mov QWORD PTR [rcx+40], rdx
  58828. add rsp, 48
  58829. pop r14
  58830. pop r13
  58831. pop r12
  58832. ret
  58833. sp_384_sqr_6 ENDP
  58834. _text ENDS
  58835. IFDEF HAVE_INTEL_AVX2
  58836. ; /* Square a and put result in r. (r = a * a)
  58837. ; *
  58838. ; * r Result of squaring.
  58839. ; * a Number to square in Montgomery form.
  58840. ; */
  58841. _text SEGMENT READONLY PARA
  58842. sp_384_sqr_avx2_6 PROC
  58843. push r12
  58844. push r13
  58845. push r14
  58846. push r15
  58847. push rdi
  58848. push rsi
  58849. push rbx
  58850. push rbp
  58851. mov rax, rdx
  58852. push rcx
  58853. xor rcx, rcx
  58854. mov rdx, QWORD PTR [rax]
  58855. mov rsi, QWORD PTR [rax+8]
  58856. mov rbx, QWORD PTR [rax+16]
  58857. mov rbp, QWORD PTR [rax+24]
  58858. ; Diagonal 0
  58859. ; A[1] * A[0]
  58860. mulx r11, r10, QWORD PTR [rax+8]
  58861. ; A[2] * A[0]
  58862. mulx r12, r8, QWORD PTR [rax+16]
  58863. adcx r11, r8
  58864. ; A[3] * A[0]
  58865. mulx r13, r8, QWORD PTR [rax+24]
  58866. adcx r12, r8
  58867. ; A[4] * A[0]
  58868. mulx r14, r8, QWORD PTR [rax+32]
  58869. adcx r13, r8
  58870. ; A[5] * A[0]
  58871. mulx r15, r8, QWORD PTR [rax+40]
  58872. adcx r14, r8
  58873. adcx r15, rcx
  58874. ; Diagonal 1
  58875. mov rdx, rsi
  58876. ; A[2] * A[1]
  58877. mulx r9, r8, QWORD PTR [rax+16]
  58878. adcx r12, r8
  58879. adox r13, r9
  58880. ; A[3] * A[1]
  58881. mulx r9, r8, QWORD PTR [rax+24]
  58882. adcx r13, r8
  58883. adox r14, r9
  58884. ; A[4] * A[1]
  58885. mulx r9, r8, QWORD PTR [rax+32]
  58886. adcx r14, r8
  58887. adox r15, r9
  58888. ; A[5] * A[1]
  58889. mulx rdi, r8, QWORD PTR [rax+40]
  58890. adcx r15, r8
  58891. adox rdi, rcx
  58892. mov rdx, rbx
  58893. ; A[5] * A[2]
  58894. mulx rsi, r8, QWORD PTR [rax+40]
  58895. adcx rdi, r8
  58896. adox rsi, rcx
  58897. adcx rsi, rcx
  58898. adcx rbx, rcx
  58899. ; Diagonal 2
  58900. ; A[3] * A[2]
  58901. mulx r9, r8, QWORD PTR [rax+24]
  58902. adcx r14, r8
  58903. adox r15, r9
  58904. ; A[4] * A[2]
  58905. mulx r9, r8, QWORD PTR [rax+32]
  58906. adcx r15, r8
  58907. adox rdi, r9
  58908. mov rdx, rbp
  58909. ; A[4] * A[3]
  58910. mulx r9, r8, QWORD PTR [rax+32]
  58911. adcx rdi, r8
  58912. adox rsi, r9
  58913. ; A[5] * A[3]
  58914. mulx rbx, r8, QWORD PTR [rax+40]
  58915. adcx rsi, r8
  58916. adox rbx, rcx
  58917. mov rdx, QWORD PTR [rax+32]
  58918. ; A[5] * A[4]
  58919. mulx rbp, r8, QWORD PTR [rax+40]
  58920. adcx rbx, r8
  58921. adox rbp, rcx
  58922. adcx rbp, rcx
  58923. adcx rcx, rcx
  58924. ; Doubling previous result as we add in square words results
  58925. ; A[0] * A[0]
  58926. mov rdx, QWORD PTR [rax]
  58927. mulx r9, r8, rdx
  58928. pop rdx
  58929. mov QWORD PTR [rdx], r8
  58930. adox r10, r10
  58931. push rdx
  58932. adcx r10, r9
  58933. ; A[1] * A[1]
  58934. mov rdx, QWORD PTR [rax+8]
  58935. mulx r9, r8, rdx
  58936. adox r11, r11
  58937. adcx r11, r8
  58938. adox r12, r12
  58939. adcx r12, r9
  58940. ; A[2] * A[2]
  58941. mov rdx, QWORD PTR [rax+16]
  58942. mulx r9, r8, rdx
  58943. adox r13, r13
  58944. adcx r13, r8
  58945. adox r14, r14
  58946. adcx r14, r9
  58947. ; A[3] * A[3]
  58948. mov rdx, QWORD PTR [rax+24]
  58949. mulx r9, r8, rdx
  58950. adox r15, r15
  58951. adcx r15, r8
  58952. adox rdi, rdi
  58953. adcx rdi, r9
  58954. ; A[4] * A[4]
  58955. mov rdx, QWORD PTR [rax+32]
  58956. mulx r9, r8, rdx
  58957. adox rsi, rsi
  58958. adcx rsi, r8
  58959. adox rbx, rbx
  58960. adcx rbx, r9
  58961. ; A[5] * A[5]
  58962. mov rdx, QWORD PTR [rax+40]
  58963. mulx r9, r8, rdx
  58964. adox rbp, rbp
  58965. adcx rbp, r8
  58966. adcx r9, rcx
  58967. mov r8, 0
  58968. adox r9, r8
  58969. pop rcx
  58970. mov QWORD PTR [rcx+8], r10
  58971. mov QWORD PTR [rcx+16], r11
  58972. mov QWORD PTR [rcx+24], r12
  58973. mov QWORD PTR [rcx+32], r13
  58974. mov QWORD PTR [rcx+40], r14
  58975. mov QWORD PTR [rcx+48], r15
  58976. mov QWORD PTR [rcx+56], rdi
  58977. mov QWORD PTR [rcx+64], rsi
  58978. mov QWORD PTR [rcx+72], rbx
  58979. mov QWORD PTR [rcx+80], rbp
  58980. mov QWORD PTR [rcx+88], r9
  58981. pop rbp
  58982. pop rbx
  58983. pop rsi
  58984. pop rdi
  58985. pop r15
  58986. pop r14
  58987. pop r13
  58988. pop r12
  58989. ret
  58990. sp_384_sqr_avx2_6 ENDP
  58991. _text ENDS
  58992. ENDIF
  58993. ; /* Add b to a into r. (r = a + b)
  58994. ; *
  58995. ; * r A single precision integer.
  58996. ; * a A single precision integer.
  58997. ; * b A single precision integer.
  58998. ; */
  58999. _text SEGMENT READONLY PARA
  59000. sp_384_add_6 PROC
  59001. push r12
  59002. push r13
  59003. push r14
  59004. xor rax, rax
  59005. mov r9, QWORD PTR [rdx]
  59006. mov r10, QWORD PTR [rdx+8]
  59007. mov r11, QWORD PTR [rdx+16]
  59008. mov r12, QWORD PTR [rdx+24]
  59009. mov r13, QWORD PTR [rdx+32]
  59010. mov r14, QWORD PTR [rdx+40]
  59011. add r9, QWORD PTR [r8]
  59012. adc r10, QWORD PTR [r8+8]
  59013. adc r11, QWORD PTR [r8+16]
  59014. adc r12, QWORD PTR [r8+24]
  59015. adc r13, QWORD PTR [r8+32]
  59016. adc r14, QWORD PTR [r8+40]
  59017. mov QWORD PTR [rcx], r9
  59018. mov QWORD PTR [rcx+8], r10
  59019. mov QWORD PTR [rcx+16], r11
  59020. mov QWORD PTR [rcx+24], r12
  59021. mov QWORD PTR [rcx+32], r13
  59022. mov QWORD PTR [rcx+40], r14
  59023. adc rax, 0
  59024. pop r14
  59025. pop r13
  59026. pop r12
  59027. ret
  59028. sp_384_add_6 ENDP
  59029. _text ENDS
  59030. ; /* Sub b from a into r. (r = a - b)
  59031. ; *
  59032. ; * r A single precision integer.
  59033. ; * a A single precision integer.
  59034. ; * b A single precision integer.
  59035. ; */
  59036. _text SEGMENT READONLY PARA
  59037. sp_384_sub_6 PROC
  59038. push r12
  59039. push r13
  59040. push r14
  59041. xor rax, rax
  59042. mov r9, QWORD PTR [rdx]
  59043. mov r10, QWORD PTR [rdx+8]
  59044. mov r11, QWORD PTR [rdx+16]
  59045. mov r12, QWORD PTR [rdx+24]
  59046. mov r13, QWORD PTR [rdx+32]
  59047. mov r14, QWORD PTR [rdx+40]
  59048. sub r9, QWORD PTR [r8]
  59049. sbb r10, QWORD PTR [r8+8]
  59050. sbb r11, QWORD PTR [r8+16]
  59051. sbb r12, QWORD PTR [r8+24]
  59052. sbb r13, QWORD PTR [r8+32]
  59053. sbb r14, QWORD PTR [r8+40]
  59054. mov QWORD PTR [rcx], r9
  59055. mov QWORD PTR [rcx+8], r10
  59056. mov QWORD PTR [rcx+16], r11
  59057. mov QWORD PTR [rcx+24], r12
  59058. mov QWORD PTR [rcx+32], r13
  59059. mov QWORD PTR [rcx+40], r14
  59060. sbb rax, rax
  59061. pop r14
  59062. pop r13
  59063. pop r12
  59064. ret
  59065. sp_384_sub_6 ENDP
  59066. _text ENDS
  59067. ; /* Conditionally copy a into r using the mask m.
  59068. ; * m is -1 to copy and 0 when not.
  59069. ; *
  59070. ; * r A single precision number to copy over.
  59071. ; * a A single precision number to copy.
  59072. ; * m Mask value to apply.
  59073. ; */
  59074. _text SEGMENT READONLY PARA
  59075. sp_384_cond_copy_6 PROC
  59076. push r12
  59077. push r13
  59078. mov rax, QWORD PTR [rcx]
  59079. mov r9, QWORD PTR [rcx+8]
  59080. mov r10, QWORD PTR [rcx+16]
  59081. mov r11, QWORD PTR [rcx+24]
  59082. mov r12, QWORD PTR [rcx+32]
  59083. mov r13, QWORD PTR [rcx+40]
  59084. xor rax, QWORD PTR [rdx]
  59085. xor r9, QWORD PTR [rdx+8]
  59086. xor r10, QWORD PTR [rdx+16]
  59087. xor r11, QWORD PTR [rdx+24]
  59088. xor r12, QWORD PTR [rdx+32]
  59089. xor r13, QWORD PTR [rdx+40]
  59090. and rax, r8
  59091. and r9, r8
  59092. and r10, r8
  59093. and r11, r8
  59094. and r12, r8
  59095. and r13, r8
  59096. xor QWORD PTR [rcx], rax
  59097. xor QWORD PTR [rcx+8], r9
  59098. xor QWORD PTR [rcx+16], r10
  59099. xor QWORD PTR [rcx+24], r11
  59100. xor QWORD PTR [rcx+32], r12
  59101. xor QWORD PTR [rcx+40], r13
  59102. pop r13
  59103. pop r12
  59104. ret
  59105. sp_384_cond_copy_6 ENDP
  59106. _text ENDS
  59107. ; /* Conditionally subtract b from a using the mask m.
  59108. ; * m is -1 to subtract and 0 when not copying.
  59109. ; *
  59110. ; * r A single precision number representing condition subtract result.
  59111. ; * a A single precision number to subtract from.
  59112. ; * b A single precision number to subtract.
  59113. ; * m Mask value to apply.
  59114. ; */
  59115. _text SEGMENT READONLY PARA
  59116. sp_384_cond_sub_6 PROC
  59117. sub rsp, 48
  59118. mov r10, QWORD PTR [r8]
  59119. mov r11, QWORD PTR [r8+8]
  59120. and r10, r9
  59121. and r11, r9
  59122. mov QWORD PTR [rsp], r10
  59123. mov QWORD PTR [rsp+8], r11
  59124. mov r10, QWORD PTR [r8+16]
  59125. mov r11, QWORD PTR [r8+24]
  59126. and r10, r9
  59127. and r11, r9
  59128. mov QWORD PTR [rsp+16], r10
  59129. mov QWORD PTR [rsp+24], r11
  59130. mov r10, QWORD PTR [r8+32]
  59131. mov r11, QWORD PTR [r8+40]
  59132. and r10, r9
  59133. and r11, r9
  59134. mov QWORD PTR [rsp+32], r10
  59135. mov QWORD PTR [rsp+40], r11
  59136. mov r10, QWORD PTR [rdx]
  59137. mov r8, QWORD PTR [rsp]
  59138. sub r10, r8
  59139. mov r11, QWORD PTR [rdx+8]
  59140. mov r8, QWORD PTR [rsp+8]
  59141. sbb r11, r8
  59142. mov QWORD PTR [rcx], r10
  59143. mov r10, QWORD PTR [rdx+16]
  59144. mov r8, QWORD PTR [rsp+16]
  59145. sbb r10, r8
  59146. mov QWORD PTR [rcx+8], r11
  59147. mov r11, QWORD PTR [rdx+24]
  59148. mov r8, QWORD PTR [rsp+24]
  59149. sbb r11, r8
  59150. mov QWORD PTR [rcx+16], r10
  59151. mov r10, QWORD PTR [rdx+32]
  59152. mov r8, QWORD PTR [rsp+32]
  59153. sbb r10, r8
  59154. mov QWORD PTR [rcx+24], r11
  59155. mov r11, QWORD PTR [rdx+40]
  59156. mov r8, QWORD PTR [rsp+40]
  59157. sbb r11, r8
  59158. mov QWORD PTR [rcx+32], r10
  59159. mov QWORD PTR [rcx+40], r11
  59160. sbb rax, rax
  59161. add rsp, 48
  59162. ret
  59163. sp_384_cond_sub_6 ENDP
  59164. _text ENDS
  59165. ; /* Reduce the number back to 384 bits using Montgomery reduction.
  59166. ; *
  59167. ; * a A single precision number to reduce in place.
  59168. ; * m The single precision number representing the modulus.
  59169. ; * mp The digit representing the negative inverse of m mod 2^n.
  59170. ; */
  59171. _text SEGMENT READONLY PARA
  59172. sp_384_mont_reduce_6 PROC
  59173. push r12
  59174. push r13
  59175. push r14
  59176. push r15
  59177. push rdi
  59178. push rsi
  59179. push rbx
  59180. push rbp
  59181. mov r12, QWORD PTR [rcx]
  59182. mov r13, QWORD PTR [rcx+8]
  59183. mov r14, QWORD PTR [rcx+16]
  59184. mov r15, QWORD PTR [rcx+24]
  59185. mov rdi, QWORD PTR [rcx+32]
  59186. mov rsi, QWORD PTR [rcx+40]
  59187. xor r11, r11
  59188. ; a[0-7] += m[0-5] * mu[0..1] = m[0-5] * (a[0..1] * mp)
  59189. mov rbx, QWORD PTR [rcx+48]
  59190. mov rbp, QWORD PTR [rcx+56]
  59191. mov rdx, r12
  59192. mov rax, r13
  59193. shld rax, rdx, 32
  59194. shl rdx, 32
  59195. add rdx, r12
  59196. adc rax, r13
  59197. add rax, r12
  59198. mov r8, rdx
  59199. mov r9, rax
  59200. mov r10, rax
  59201. shld r9, r8, 32
  59202. shl r8, 32
  59203. shr r10, 32
  59204. add r12, r8
  59205. adc r13, r9
  59206. adc r14, r10
  59207. adc r15, 0
  59208. adc rdi, 0
  59209. adc rsi, 0
  59210. adc rbx, rdx
  59211. adc rbp, rax
  59212. adc r11, 0
  59213. add r8, rax
  59214. adc r9, rdx
  59215. adc r10, rax
  59216. mov rax, 0
  59217. adc rax, 0
  59218. sub r14, r9
  59219. sbb r15, r10
  59220. sbb rdi, rax
  59221. sbb rsi, 0
  59222. sbb rbx, 0
  59223. sbb rbp, 0
  59224. sbb r11, 0
  59225. ; a[2-9] += m[0-5] * mu[0..1] = m[0-5] * (a[2..3] * mp)
  59226. mov r12, QWORD PTR [rcx+64]
  59227. mov r13, QWORD PTR [rcx+72]
  59228. mov rdx, r14
  59229. mov rax, r15
  59230. shld rax, rdx, 32
  59231. shl rdx, 32
  59232. add rdx, r14
  59233. adc rax, r15
  59234. add rax, r14
  59235. mov r8, rdx
  59236. mov r9, rax
  59237. mov r10, rax
  59238. shld r9, r8, 32
  59239. shl r8, 32
  59240. shr r10, 32
  59241. add r12, r11
  59242. adc r13, 0
  59243. mov r11, 0
  59244. adc r11, 0
  59245. add r14, r8
  59246. adc r15, r9
  59247. adc rdi, r10
  59248. adc rsi, 0
  59249. adc rbx, 0
  59250. adc rbp, 0
  59251. adc r12, rdx
  59252. adc r13, rax
  59253. adc r11, 0
  59254. add r8, rax
  59255. adc r9, rdx
  59256. adc r10, rax
  59257. mov rax, 0
  59258. adc rax, 0
  59259. sub rdi, r9
  59260. sbb rsi, r10
  59261. sbb rbx, rax
  59262. sbb rbp, 0
  59263. sbb r12, 0
  59264. sbb r13, 0
  59265. sbb r11, 0
  59266. ; a[4-11] += m[0-5] * mu[0..1] = m[0-5] * (a[4..5] * mp)
  59267. mov r14, QWORD PTR [rcx+80]
  59268. mov r15, QWORD PTR [rcx+88]
  59269. mov rdx, rdi
  59270. mov rax, rsi
  59271. shld rax, rdx, 32
  59272. shl rdx, 32
  59273. add rdx, rdi
  59274. adc rax, rsi
  59275. add rax, rdi
  59276. mov r8, rdx
  59277. mov r9, rax
  59278. mov r10, rax
  59279. shld r9, r8, 32
  59280. shl r8, 32
  59281. shr r10, 32
  59282. add r14, r11
  59283. adc r15, 0
  59284. mov r11, 0
  59285. adc r11, 0
  59286. add rdi, r8
  59287. adc rsi, r9
  59288. adc rbx, r10
  59289. adc rbp, 0
  59290. adc r12, 0
  59291. adc r13, 0
  59292. adc r14, rdx
  59293. adc r15, rax
  59294. adc r11, 0
  59295. add r8, rax
  59296. adc r9, rdx
  59297. adc r10, rax
  59298. mov rax, 0
  59299. adc rax, 0
  59300. sub rbx, r9
  59301. sbb rbp, r10
  59302. sbb r12, rax
  59303. sbb r13, 0
  59304. sbb r14, 0
  59305. sbb r15, 0
  59306. sbb r11, 0
  59307. ; Subtract mod if carry
  59308. neg r11
  59309. mov r10, 18446744073709551614
  59310. mov r8, r11
  59311. mov r9, r11
  59312. shr r8, 32
  59313. shl r9, 32
  59314. and r10, r11
  59315. sub rbx, r8
  59316. sbb rbp, r9
  59317. sbb r12, r10
  59318. sbb r13, r11
  59319. sbb r14, r11
  59320. sbb r15, r11
  59321. mov QWORD PTR [rcx], rbx
  59322. mov QWORD PTR [rcx+8], rbp
  59323. mov QWORD PTR [rcx+16], r12
  59324. mov QWORD PTR [rcx+24], r13
  59325. mov QWORD PTR [rcx+32], r14
  59326. mov QWORD PTR [rcx+40], r15
  59327. pop rbp
  59328. pop rbx
  59329. pop rsi
  59330. pop rdi
  59331. pop r15
  59332. pop r14
  59333. pop r13
  59334. pop r12
  59335. ret
  59336. sp_384_mont_reduce_6 ENDP
  59337. _text ENDS
  59338. ; /* Reduce the number back to 384 bits using Montgomery reduction.
  59339. ; *
  59340. ; * a A single precision number to reduce in place.
  59341. ; * m The single precision number representing the modulus.
  59342. ; * mp The digit representing the negative inverse of m mod 2^n.
  59343. ; */
  59344. _text SEGMENT READONLY PARA
  59345. sp_384_mont_reduce_order_6 PROC
  59346. push r12
  59347. push r13
  59348. push r14
  59349. push r15
  59350. push rdi
  59351. push rsi
  59352. mov r9, rdx
  59353. xor rsi, rsi
  59354. ; i = 6
  59355. mov r10, 6
  59356. mov r15, QWORD PTR [rcx]
  59357. mov rdi, QWORD PTR [rcx+8]
  59358. L_384_mont_reduce_order_6_loop:
  59359. ; mu = a[i] * mp
  59360. mov r13, r15
  59361. imul r13, r8
  59362. ; a[i+0] += m[0] * mu
  59363. mov rax, r13
  59364. xor r12, r12
  59365. mul QWORD PTR [r9]
  59366. add r15, rax
  59367. adc r12, rdx
  59368. ; a[i+1] += m[1] * mu
  59369. mov rax, r13
  59370. xor r11, r11
  59371. mul QWORD PTR [r9+8]
  59372. mov r15, rdi
  59373. add r15, rax
  59374. adc r11, rdx
  59375. add r15, r12
  59376. adc r11, 0
  59377. ; a[i+2] += m[2] * mu
  59378. mov rax, r13
  59379. xor r12, r12
  59380. mul QWORD PTR [r9+16]
  59381. mov rdi, QWORD PTR [rcx+16]
  59382. add rdi, rax
  59383. adc r12, rdx
  59384. add rdi, r11
  59385. adc r12, 0
  59386. ; a[i+3] += m[3] * mu
  59387. mov rax, r13
  59388. xor r11, r11
  59389. mul QWORD PTR [r9+24]
  59390. mov r14, QWORD PTR [rcx+24]
  59391. add r14, rax
  59392. adc r11, rdx
  59393. add r14, r12
  59394. mov QWORD PTR [rcx+24], r14
  59395. adc r11, 0
  59396. ; a[i+4] += m[4] * mu
  59397. mov rax, r13
  59398. xor r12, r12
  59399. mul QWORD PTR [r9+32]
  59400. mov r14, QWORD PTR [rcx+32]
  59401. add r14, rax
  59402. adc r12, rdx
  59403. add r14, r11
  59404. mov QWORD PTR [rcx+32], r14
  59405. adc r12, 0
  59406. ; a[i+5] += m[5] * mu
  59407. mov rax, r13
  59408. mul QWORD PTR [r9+40]
  59409. mov r14, QWORD PTR [rcx+40]
  59410. add r12, rax
  59411. adc rdx, rsi
  59412. mov rsi, 0
  59413. adc rsi, 0
  59414. add r14, r12
  59415. mov QWORD PTR [rcx+40], r14
  59416. adc QWORD PTR [rcx+48], rdx
  59417. adc rsi, 0
  59418. ; i -= 1
  59419. add rcx, 8
  59420. dec r10
  59421. jnz L_384_mont_reduce_order_6_loop
  59422. mov QWORD PTR [rcx], r15
  59423. mov QWORD PTR [rcx+8], rdi
  59424. neg rsi
  59425. IFDEF _WIN64
  59426. mov r8, r9
  59427. mov r9, rsi
  59428. ELSE
  59429. mov r9, rsi
  59430. mov r8, r9
  59431. ENDIF
  59432. mov rdx, rcx
  59433. mov rcx, rcx
  59434. sub rcx, 48
  59435. call sp_384_cond_sub_6
  59436. pop rsi
  59437. pop rdi
  59438. pop r15
  59439. pop r14
  59440. pop r13
  59441. pop r12
  59442. ret
  59443. sp_384_mont_reduce_order_6 ENDP
  59444. _text ENDS
  59445. ; /* Compare a with b in constant time.
  59446. ; *
  59447. ; * a A single precision integer.
  59448. ; * b A single precision integer.
  59449. ; * return -ve, 0 or +ve if a is less than, equal to or greater than b
  59450. ; * respectively.
  59451. ; */
  59452. _text SEGMENT READONLY PARA
  59453. sp_384_cmp_6 PROC
  59454. push r12
  59455. xor r9, r9
  59456. mov r8, -1
  59457. mov rax, -1
  59458. mov r10, 1
  59459. mov r11, QWORD PTR [rcx+40]
  59460. mov r12, QWORD PTR [rdx+40]
  59461. and r11, r8
  59462. and r12, r8
  59463. sub r11, r12
  59464. cmova rax, r10
  59465. cmovc rax, r8
  59466. cmovnz r8, r9
  59467. mov r11, QWORD PTR [rcx+32]
  59468. mov r12, QWORD PTR [rdx+32]
  59469. and r11, r8
  59470. and r12, r8
  59471. sub r11, r12
  59472. cmova rax, r10
  59473. cmovc rax, r8
  59474. cmovnz r8, r9
  59475. mov r11, QWORD PTR [rcx+24]
  59476. mov r12, QWORD PTR [rdx+24]
  59477. and r11, r8
  59478. and r12, r8
  59479. sub r11, r12
  59480. cmova rax, r10
  59481. cmovc rax, r8
  59482. cmovnz r8, r9
  59483. mov r11, QWORD PTR [rcx+16]
  59484. mov r12, QWORD PTR [rdx+16]
  59485. and r11, r8
  59486. and r12, r8
  59487. sub r11, r12
  59488. cmova rax, r10
  59489. cmovc rax, r8
  59490. cmovnz r8, r9
  59491. mov r11, QWORD PTR [rcx+8]
  59492. mov r12, QWORD PTR [rdx+8]
  59493. and r11, r8
  59494. and r12, r8
  59495. sub r11, r12
  59496. cmova rax, r10
  59497. cmovc rax, r8
  59498. cmovnz r8, r9
  59499. mov r11, QWORD PTR [rcx]
  59500. mov r12, QWORD PTR [rdx]
  59501. and r11, r8
  59502. and r12, r8
  59503. sub r11, r12
  59504. cmova rax, r10
  59505. cmovc rax, r8
  59506. cmovnz r8, r9
  59507. xor rax, r8
  59508. pop r12
  59509. ret
  59510. sp_384_cmp_6 ENDP
  59511. _text ENDS
  59512. ; /* Add two Montgomery form numbers (r = a + b % m).
  59513. ; *
  59514. ; * r Result of addition.
  59515. ; * a First number to add in Montgomery form.
  59516. ; * b Second number to add in Montgomery form.
  59517. ; * m Modulus (prime).
  59518. ; */
  59519. _text SEGMENT READONLY PARA
  59520. sp_384_mont_add_6 PROC
  59521. push r12
  59522. push r13
  59523. push r14
  59524. push r15
  59525. push rdi
  59526. mov rax, QWORD PTR [rdx]
  59527. mov r9, QWORD PTR [rdx+8]
  59528. mov r10, QWORD PTR [rdx+16]
  59529. mov r11, QWORD PTR [rdx+24]
  59530. mov r12, QWORD PTR [rdx+32]
  59531. mov r13, QWORD PTR [rdx+40]
  59532. add rax, QWORD PTR [r8]
  59533. mov r14, 4294967295
  59534. adc r9, QWORD PTR [r8+8]
  59535. mov r15, 18446744069414584320
  59536. adc r10, QWORD PTR [r8+16]
  59537. mov rdi, 18446744073709551614
  59538. adc r11, QWORD PTR [r8+24]
  59539. adc r12, QWORD PTR [r8+32]
  59540. adc r13, QWORD PTR [r8+40]
  59541. sbb rdx, rdx
  59542. and r14, rdx
  59543. and r15, rdx
  59544. and rdi, rdx
  59545. sub rax, r14
  59546. sbb r9, r15
  59547. sbb r10, rdi
  59548. sbb r11, rdx
  59549. sbb r12, rdx
  59550. sbb r13, rdx
  59551. adc rdx, 0
  59552. and r14, rdx
  59553. and r15, rdx
  59554. and rdi, rdx
  59555. sub rax, r14
  59556. sbb r9, r15
  59557. mov QWORD PTR [rcx], rax
  59558. sbb r10, rdi
  59559. mov QWORD PTR [rcx+8], r9
  59560. sbb r11, rdx
  59561. mov QWORD PTR [rcx+16], r10
  59562. sbb r12, rdx
  59563. mov QWORD PTR [rcx+24], r11
  59564. sbb r13, rdx
  59565. mov QWORD PTR [rcx+32], r12
  59566. mov QWORD PTR [rcx+40], r13
  59567. pop rdi
  59568. pop r15
  59569. pop r14
  59570. pop r13
  59571. pop r12
  59572. ret
  59573. sp_384_mont_add_6 ENDP
  59574. _text ENDS
  59575. ; /* Double a Montgomery form number (r = a + a % m).
  59576. ; *
  59577. ; * r Result of doubling.
  59578. ; * a Number to double in Montgomery form.
  59579. ; * m Modulus (prime).
  59580. ; */
  59581. _text SEGMENT READONLY PARA
  59582. sp_384_mont_dbl_6 PROC
  59583. push r12
  59584. push r13
  59585. push r14
  59586. push r15
  59587. push rdi
  59588. mov rax, QWORD PTR [rdx]
  59589. mov r8, QWORD PTR [rdx+8]
  59590. mov r9, QWORD PTR [rdx+16]
  59591. mov r10, QWORD PTR [rdx+24]
  59592. mov r11, QWORD PTR [rdx+32]
  59593. mov r12, QWORD PTR [rdx+40]
  59594. add rax, rax
  59595. mov r13, 4294967295
  59596. adc r8, r8
  59597. mov r14, 18446744069414584320
  59598. adc r9, r9
  59599. mov r15, 18446744073709551614
  59600. adc r10, r10
  59601. adc r11, r11
  59602. adc r12, r12
  59603. sbb rdi, rdi
  59604. and r13, rdi
  59605. and r14, rdi
  59606. and r15, rdi
  59607. sub rax, r13
  59608. sbb r8, r14
  59609. sbb r9, r15
  59610. sbb r10, rdi
  59611. sbb r11, rdi
  59612. sbb r12, rdi
  59613. adc rdi, 0
  59614. and r13, rdi
  59615. and r14, rdi
  59616. and r15, rdi
  59617. sub rax, r13
  59618. sbb r8, r14
  59619. mov QWORD PTR [rcx], rax
  59620. sbb r9, r15
  59621. mov QWORD PTR [rcx+8], r8
  59622. sbb r10, rdi
  59623. mov QWORD PTR [rcx+16], r9
  59624. sbb r11, rdi
  59625. mov QWORD PTR [rcx+24], r10
  59626. sbb r12, rdi
  59627. mov QWORD PTR [rcx+32], r11
  59628. mov QWORD PTR [rcx+40], r12
  59629. pop rdi
  59630. pop r15
  59631. pop r14
  59632. pop r13
  59633. pop r12
  59634. ret
  59635. sp_384_mont_dbl_6 ENDP
  59636. _text ENDS
  59637. ; /* Double a Montgomery form number (r = a + a % m).
  59638. ; *
  59639. ; * r Result of doubling.
  59640. ; * a Number to double in Montgomery form.
  59641. ; * m Modulus (prime).
  59642. ; */
  59643. _text SEGMENT READONLY PARA
  59644. sp_384_mont_tpl_6 PROC
  59645. push r12
  59646. push r13
  59647. push r14
  59648. push r15
  59649. push rdi
  59650. mov rax, QWORD PTR [rdx]
  59651. mov r8, QWORD PTR [rdx+8]
  59652. mov r9, QWORD PTR [rdx+16]
  59653. mov r10, QWORD PTR [rdx+24]
  59654. mov r11, QWORD PTR [rdx+32]
  59655. mov r12, QWORD PTR [rdx+40]
  59656. add rax, rax
  59657. mov r13, 4294967295
  59658. adc r8, r8
  59659. mov r14, 18446744069414584320
  59660. adc r9, r9
  59661. mov r15, 18446744073709551614
  59662. adc r10, r10
  59663. adc r11, r11
  59664. adc r12, r12
  59665. sbb rdi, rdi
  59666. and r13, rdi
  59667. and r14, rdi
  59668. and r15, rdi
  59669. sub rax, r13
  59670. sbb r8, r14
  59671. sbb r9, r15
  59672. sbb r10, rdi
  59673. sbb r11, rdi
  59674. sbb r12, rdi
  59675. adc rdi, 0
  59676. and r13, rdi
  59677. and r14, rdi
  59678. and r15, rdi
  59679. sub rax, r13
  59680. sbb r8, r14
  59681. mov QWORD PTR [rcx], rax
  59682. sbb r9, r15
  59683. sbb r10, rdi
  59684. sbb r11, rdi
  59685. sbb r12, rdi
  59686. add rax, QWORD PTR [rdx]
  59687. mov r13, 4294967295
  59688. adc r8, QWORD PTR [rdx+8]
  59689. mov r14, 18446744069414584320
  59690. adc r9, QWORD PTR [rdx+16]
  59691. mov r15, 18446744073709551614
  59692. adc r10, QWORD PTR [rdx+24]
  59693. adc r11, QWORD PTR [rdx+32]
  59694. adc r12, QWORD PTR [rdx+40]
  59695. sbb rdi, rdi
  59696. and r13, rdi
  59697. and r14, rdi
  59698. and r15, rdi
  59699. sub rax, r13
  59700. sbb r8, r14
  59701. sbb r9, r15
  59702. sbb r10, rdi
  59703. sbb r11, rdi
  59704. sbb r12, rdi
  59705. adc rdi, 0
  59706. and r13, rdi
  59707. and r14, rdi
  59708. and r15, rdi
  59709. sub rax, r13
  59710. sbb r8, r14
  59711. mov QWORD PTR [rcx], rax
  59712. sbb r9, r15
  59713. mov QWORD PTR [rcx+8], r8
  59714. sbb r10, rdi
  59715. mov QWORD PTR [rcx+16], r9
  59716. sbb r11, rdi
  59717. mov QWORD PTR [rcx+24], r10
  59718. sbb r12, rdi
  59719. mov QWORD PTR [rcx+32], r11
  59720. mov QWORD PTR [rcx+40], r12
  59721. pop rdi
  59722. pop r15
  59723. pop r14
  59724. pop r13
  59725. pop r12
  59726. ret
  59727. sp_384_mont_tpl_6 ENDP
  59728. _text ENDS
  59729. ; /* Subtract two Montgomery form numbers (r = a - b % m).
  59730. ; *
  59731. ; * r Result of subtration.
  59732. ; * a Number to subtract from in Montgomery form.
  59733. ; * b Number to subtract with in Montgomery form.
  59734. ; * m Modulus (prime).
  59735. ; */
  59736. _text SEGMENT READONLY PARA
  59737. sp_384_mont_sub_6 PROC
  59738. push r12
  59739. push r13
  59740. push r14
  59741. push r15
  59742. push rdi
  59743. mov rax, QWORD PTR [rdx]
  59744. mov r9, QWORD PTR [rdx+8]
  59745. mov r10, QWORD PTR [rdx+16]
  59746. mov r11, QWORD PTR [rdx+24]
  59747. mov r12, QWORD PTR [rdx+32]
  59748. mov r13, QWORD PTR [rdx+40]
  59749. sub rax, QWORD PTR [r8]
  59750. mov r14, 4294967295
  59751. sbb r9, QWORD PTR [r8+8]
  59752. mov r15, 18446744069414584320
  59753. sbb r10, QWORD PTR [r8+16]
  59754. mov rdi, 18446744073709551614
  59755. sbb r11, QWORD PTR [r8+24]
  59756. sbb r12, QWORD PTR [r8+32]
  59757. sbb r13, QWORD PTR [r8+40]
  59758. sbb rdx, rdx
  59759. and r14, rdx
  59760. and r15, rdx
  59761. and rdi, rdx
  59762. add rax, r14
  59763. adc r9, r15
  59764. adc r10, rdi
  59765. adc r11, rdx
  59766. adc r12, rdx
  59767. adc r13, rdx
  59768. adc rdx, 0
  59769. and r14, rdx
  59770. and r15, rdx
  59771. and rdi, rdx
  59772. add rax, r14
  59773. adc r9, r15
  59774. mov QWORD PTR [rcx], rax
  59775. adc r10, rdi
  59776. mov QWORD PTR [rcx+8], r9
  59777. adc r11, rdx
  59778. mov QWORD PTR [rcx+16], r10
  59779. adc r12, rdx
  59780. mov QWORD PTR [rcx+24], r11
  59781. adc r13, rdx
  59782. mov QWORD PTR [rcx+32], r12
  59783. mov QWORD PTR [rcx+40], r13
  59784. pop rdi
  59785. pop r15
  59786. pop r14
  59787. pop r13
  59788. pop r12
  59789. ret
  59790. sp_384_mont_sub_6 ENDP
  59791. _text ENDS
  59792. ; /* Subtract two Montgomery form numbers (r = a - b % m).
  59793. ; *
  59794. ; * b is less than the modulus.
  59795. ; *
  59796. ; * r Result of subtration.
  59797. ; * a Number to subtract from in Montgomery form.
  59798. ; * b Number to subtract with in Montgomery form.
  59799. ; * m Modulus (prime).
  59800. ; */
  59801. _text SEGMENT READONLY PARA
  59802. sp_384_mont_sub_lower_6 PROC
  59803. push r12
  59804. push r13
  59805. push r14
  59806. push r15
  59807. push rdi
  59808. mov rax, QWORD PTR [rdx]
  59809. mov r9, QWORD PTR [rdx+8]
  59810. mov r10, QWORD PTR [rdx+16]
  59811. mov r11, QWORD PTR [rdx+24]
  59812. mov r12, QWORD PTR [rdx+32]
  59813. mov r13, QWORD PTR [rdx+40]
  59814. sub rax, QWORD PTR [r8]
  59815. mov r14, 4294967295
  59816. sbb r9, QWORD PTR [r8+8]
  59817. mov r15, 18446744069414584320
  59818. sbb r10, QWORD PTR [r8+16]
  59819. mov rdi, 18446744073709551614
  59820. sbb r11, QWORD PTR [r8+24]
  59821. sbb r12, QWORD PTR [r8+32]
  59822. sbb r13, QWORD PTR [r8+40]
  59823. sbb rdx, rdx
  59824. and r14, rdx
  59825. and r15, rdx
  59826. and rdi, rdx
  59827. add rax, r14
  59828. adc r9, r15
  59829. mov QWORD PTR [rcx], rax
  59830. adc r10, rdi
  59831. mov QWORD PTR [rcx+8], r9
  59832. adc r11, rdx
  59833. mov QWORD PTR [rcx+16], r10
  59834. adc r12, rdx
  59835. mov QWORD PTR [rcx+24], r11
  59836. adc r13, rdx
  59837. mov QWORD PTR [rcx+32], r12
  59838. mov QWORD PTR [rcx+40], r13
  59839. pop rdi
  59840. pop r15
  59841. pop r14
  59842. pop r13
  59843. pop r12
  59844. ret
  59845. sp_384_mont_sub_lower_6 ENDP
  59846. _text ENDS
  59847. ; /* Divide the number by 2 mod the modulus (prime). (r = a / 2 % m)
  59848. ; *
  59849. ; * r Result of division by 2.
  59850. ; * a Number to divide.
  59851. ; * m Modulus (prime).
  59852. ; */
  59853. _text SEGMENT READONLY PARA
  59854. sp_384_div2_6 PROC
  59855. push r12
  59856. push r13
  59857. sub rsp, 48
  59858. mov r13, QWORD PTR [rdx]
  59859. xor r12, r12
  59860. mov rax, r13
  59861. and r13, 1
  59862. neg r13
  59863. mov r10, QWORD PTR [r8]
  59864. and r10, r13
  59865. mov QWORD PTR [rsp], r10
  59866. mov r10, QWORD PTR [r8+8]
  59867. and r10, r13
  59868. mov QWORD PTR [rsp+8], r10
  59869. mov r10, QWORD PTR [r8+16]
  59870. and r10, r13
  59871. mov QWORD PTR [rsp+16], r10
  59872. mov r10, QWORD PTR [r8+24]
  59873. and r10, r13
  59874. mov QWORD PTR [rsp+24], r10
  59875. mov r10, QWORD PTR [r8+32]
  59876. and r10, r13
  59877. mov QWORD PTR [rsp+32], r10
  59878. mov r10, QWORD PTR [r8+40]
  59879. and r10, r13
  59880. mov QWORD PTR [rsp+40], r10
  59881. add QWORD PTR [rsp], rax
  59882. mov rax, QWORD PTR [rdx+8]
  59883. adc QWORD PTR [rsp+8], rax
  59884. mov rax, QWORD PTR [rdx+16]
  59885. adc QWORD PTR [rsp+16], rax
  59886. mov rax, QWORD PTR [rdx+24]
  59887. adc QWORD PTR [rsp+24], rax
  59888. mov rax, QWORD PTR [rdx+32]
  59889. adc QWORD PTR [rsp+32], rax
  59890. mov rax, QWORD PTR [rdx+40]
  59891. adc QWORD PTR [rsp+40], rax
  59892. adc r12, 0
  59893. mov rax, QWORD PTR [rsp]
  59894. mov r9, QWORD PTR [rsp+8]
  59895. shrd rax, r9, 1
  59896. mov QWORD PTR [rcx], rax
  59897. mov rax, QWORD PTR [rsp+16]
  59898. shrd r9, rax, 1
  59899. mov QWORD PTR [rcx+8], r9
  59900. mov r9, QWORD PTR [rsp+24]
  59901. shrd rax, r9, 1
  59902. mov QWORD PTR [rcx+16], rax
  59903. mov rax, QWORD PTR [rsp+32]
  59904. shrd r9, rax, 1
  59905. mov QWORD PTR [rcx+24], r9
  59906. mov r9, QWORD PTR [rsp+40]
  59907. shrd rax, r9, 1
  59908. mov QWORD PTR [rcx+32], rax
  59909. shrd r9, r12, 1
  59910. mov QWORD PTR [rcx+40], r9
  59911. add rsp, 48
  59912. pop r13
  59913. pop r12
  59914. ret
  59915. sp_384_div2_6 ENDP
  59916. _text ENDS
  59917. ; /* Double a Montgomery form number (r = a + a % m).
  59918. ; *
  59919. ; * a is less than m.
  59920. ; *
  59921. ; * r Result of doubling.
  59922. ; * a Number to double in Montgomery form.
  59923. ; * m Modulus (prime).
  59924. ; */
  59925. _text SEGMENT READONLY PARA
  59926. sp_384_mont_dbl_lower_6 PROC
  59927. push r12
  59928. push r13
  59929. push r14
  59930. push r15
  59931. push rdi
  59932. mov rax, QWORD PTR [rdx]
  59933. mov r8, QWORD PTR [rdx+8]
  59934. mov r9, QWORD PTR [rdx+16]
  59935. mov r10, QWORD PTR [rdx+24]
  59936. mov r11, QWORD PTR [rdx+32]
  59937. mov r12, QWORD PTR [rdx+40]
  59938. add rax, rax
  59939. mov r13, 4294967295
  59940. adc r8, r8
  59941. mov r14, 18446744069414584320
  59942. adc r9, r9
  59943. mov r15, 18446744073709551614
  59944. adc r10, r10
  59945. adc r11, r11
  59946. adc r12, r12
  59947. sbb rdi, rdi
  59948. and r13, rdi
  59949. and r14, rdi
  59950. and r15, rdi
  59951. sub rax, r13
  59952. sbb r8, r14
  59953. mov QWORD PTR [rcx], rax
  59954. sbb r9, r15
  59955. mov QWORD PTR [rcx+8], r8
  59956. sbb r10, rdi
  59957. mov QWORD PTR [rcx+16], r9
  59958. sbb r11, rdi
  59959. mov QWORD PTR [rcx+24], r10
  59960. sbb r12, rdi
  59961. mov QWORD PTR [rcx+32], r11
  59962. mov QWORD PTR [rcx+40], r12
  59963. pop rdi
  59964. pop r15
  59965. pop r14
  59966. pop r13
  59967. pop r12
  59968. ret
  59969. sp_384_mont_dbl_lower_6 ENDP
  59970. _text ENDS
  59971. ; /* Double a Montgomery form number (r = a + a % m).
  59972. ; *
  59973. ; * a is less than m.
  59974. ; *
  59975. ; * r Result of doubling.
  59976. ; * a Number to double in Montgomery form.
  59977. ; * m Modulus (prime).
  59978. ; */
  59979. _text SEGMENT READONLY PARA
  59980. sp_384_mont_tpl_lower_6 PROC
  59981. push r12
  59982. push r13
  59983. push r14
  59984. push r15
  59985. push rdi
  59986. mov rax, QWORD PTR [rdx]
  59987. mov r8, QWORD PTR [rdx+8]
  59988. mov r9, QWORD PTR [rdx+16]
  59989. mov r10, QWORD PTR [rdx+24]
  59990. mov r11, QWORD PTR [rdx+32]
  59991. mov r12, QWORD PTR [rdx+40]
  59992. add rax, rax
  59993. mov r13, 4294967295
  59994. adc r8, r8
  59995. mov r14, 18446744069414584320
  59996. adc r9, r9
  59997. mov r15, 18446744073709551614
  59998. adc r10, r10
  59999. adc r11, r11
  60000. adc r12, r12
  60001. sbb rdi, rdi
  60002. and r13, rdi
  60003. and r14, rdi
  60004. and r15, rdi
  60005. sub rax, r13
  60006. sbb r8, r14
  60007. mov QWORD PTR [rcx], rax
  60008. sbb r9, r15
  60009. sbb r10, rdi
  60010. sbb r11, rdi
  60011. sbb r12, rdi
  60012. add rax, QWORD PTR [rdx]
  60013. mov r13, 4294967295
  60014. adc r8, QWORD PTR [rdx+8]
  60015. mov r14, 18446744069414584320
  60016. adc r9, QWORD PTR [rdx+16]
  60017. mov r15, 18446744073709551614
  60018. adc r10, QWORD PTR [rdx+24]
  60019. adc r11, QWORD PTR [rdx+32]
  60020. adc r12, QWORD PTR [rdx+40]
  60021. sbb rdi, rdi
  60022. and r13, rdi
  60023. and r14, rdi
  60024. and r15, rdi
  60025. sub rax, r13
  60026. sbb r8, r14
  60027. sbb r9, r15
  60028. sbb r10, rdi
  60029. sbb r11, rdi
  60030. sbb r12, rdi
  60031. adc rdi, 0
  60032. and r13, rdi
  60033. and r14, rdi
  60034. and r15, rdi
  60035. sub rax, r13
  60036. sbb r8, r14
  60037. mov QWORD PTR [rcx], rax
  60038. sbb r9, r15
  60039. mov QWORD PTR [rcx+8], r8
  60040. sbb r10, rdi
  60041. mov QWORD PTR [rcx+16], r9
  60042. sbb r11, rdi
  60043. mov QWORD PTR [rcx+24], r10
  60044. sbb r12, rdi
  60045. mov QWORD PTR [rcx+32], r11
  60046. mov QWORD PTR [rcx+40], r12
  60047. pop rdi
  60048. pop r15
  60049. pop r14
  60050. pop r13
  60051. pop r12
  60052. ret
  60053. sp_384_mont_tpl_lower_6 ENDP
  60054. _text ENDS
  60055. IFNDEF WC_NO_CACHE_RESISTANT
  60056. ; /* Touch each possible point that could be being copied.
  60057. ; *
  60058. ; * r Point to copy into.
  60059. ; * table Table - start of the entires to access
  60060. ; * idx Index of point to retrieve.
  60061. ; */
  60062. _text SEGMENT READONLY PARA
  60063. sp_384_get_point_33_6 PROC
  60064. sub rsp, 160
  60065. vmovdqu OWORD PTR [rsp], xmm6
  60066. vmovdqu OWORD PTR [rsp+16], xmm7
  60067. vmovdqu OWORD PTR [rsp+32], xmm8
  60068. vmovdqu OWORD PTR [rsp+48], xmm9
  60069. vmovdqu OWORD PTR [rsp+64], xmm10
  60070. vmovdqu OWORD PTR [rsp+80], xmm11
  60071. vmovdqu OWORD PTR [rsp+96], xmm12
  60072. vmovdqu OWORD PTR [rsp+112], xmm13
  60073. vmovdqu OWORD PTR [rsp+128], xmm14
  60074. vmovdqu OWORD PTR [rsp+144], xmm15
  60075. mov rax, 1
  60076. movd xmm13, r8d
  60077. add rdx, 296
  60078. movd xmm15, eax
  60079. mov rax, 32
  60080. pshufd xmm15, xmm15, 0
  60081. pshufd xmm13, xmm13, 0
  60082. pxor xmm14, xmm14
  60083. pxor xmm0, xmm0
  60084. pxor xmm1, xmm1
  60085. pxor xmm2, xmm2
  60086. pxor xmm3, xmm3
  60087. pxor xmm4, xmm4
  60088. pxor xmm5, xmm5
  60089. movdqa xmm14, xmm15
  60090. L_384_get_point_33_6_start_1:
  60091. movdqa xmm12, xmm14
  60092. paddd xmm14, xmm15
  60093. pcmpeqd xmm12, xmm13
  60094. movdqu xmm6, [rdx]
  60095. movdqu xmm7, [rdx+16]
  60096. movdqu xmm8, [rdx+32]
  60097. movdqu xmm9, [rdx+96]
  60098. movdqu xmm10, [rdx+112]
  60099. movdqu xmm11, [rdx+128]
  60100. add rdx, 296
  60101. pand xmm6, xmm12
  60102. pand xmm7, xmm12
  60103. pand xmm8, xmm12
  60104. pand xmm9, xmm12
  60105. pand xmm10, xmm12
  60106. pand xmm11, xmm12
  60107. por xmm0, xmm6
  60108. por xmm1, xmm7
  60109. por xmm2, xmm8
  60110. por xmm3, xmm9
  60111. por xmm4, xmm10
  60112. por xmm5, xmm11
  60113. dec rax
  60114. jnz L_384_get_point_33_6_start_1
  60115. movdqu [rcx], xmm0
  60116. movdqu [rcx+16], xmm1
  60117. movdqu [rcx+32], xmm2
  60118. movdqu [rcx+96], xmm3
  60119. movdqu [rcx+112], xmm4
  60120. movdqu [rcx+128], xmm5
  60121. mov rax, 1
  60122. movd xmm13, r8d
  60123. sub rdx, 9472
  60124. movd xmm15, eax
  60125. mov rax, 32
  60126. pshufd xmm15, xmm15, 0
  60127. pshufd xmm13, xmm13, 0
  60128. pxor xmm14, xmm14
  60129. pxor xmm0, xmm0
  60130. pxor xmm1, xmm1
  60131. pxor xmm2, xmm2
  60132. movdqa xmm14, xmm15
  60133. L_384_get_point_33_6_start_2:
  60134. movdqa xmm12, xmm14
  60135. paddd xmm14, xmm15
  60136. pcmpeqd xmm12, xmm13
  60137. movdqu xmm6, [rdx+192]
  60138. movdqu xmm7, [rdx+208]
  60139. movdqu xmm8, [rdx+224]
  60140. add rdx, 296
  60141. pand xmm6, xmm12
  60142. pand xmm7, xmm12
  60143. pand xmm8, xmm12
  60144. por xmm0, xmm6
  60145. por xmm1, xmm7
  60146. por xmm2, xmm8
  60147. dec rax
  60148. jnz L_384_get_point_33_6_start_2
  60149. movdqu [rcx+192], xmm0
  60150. movdqu [rcx+208], xmm1
  60151. movdqu [rcx+224], xmm2
  60152. vmovdqu xmm6, OWORD PTR [rsp]
  60153. vmovdqu xmm7, OWORD PTR [rsp+16]
  60154. vmovdqu xmm8, OWORD PTR [rsp+32]
  60155. vmovdqu xmm9, OWORD PTR [rsp+48]
  60156. vmovdqu xmm10, OWORD PTR [rsp+64]
  60157. vmovdqu xmm11, OWORD PTR [rsp+80]
  60158. vmovdqu xmm12, OWORD PTR [rsp+96]
  60159. vmovdqu xmm13, OWORD PTR [rsp+112]
  60160. vmovdqu xmm14, OWORD PTR [rsp+128]
  60161. vmovdqu xmm15, OWORD PTR [rsp+144]
  60162. add rsp, 160
  60163. ret
  60164. sp_384_get_point_33_6 ENDP
  60165. _text ENDS
  60166. IFDEF HAVE_INTEL_AVX2
  60167. ; /* Touch each possible point that could be being copied.
  60168. ; *
  60169. ; * r Point to copy into.
  60170. ; * table Table - start of the entires to access
  60171. ; * idx Index of point to retrieve.
  60172. ; */
  60173. _text SEGMENT READONLY PARA
  60174. sp_384_get_point_33_avx2_6 PROC
  60175. sub rsp, 160
  60176. vmovdqu OWORD PTR [rsp], xmm6
  60177. vmovdqu OWORD PTR [rsp+16], xmm7
  60178. vmovdqu OWORD PTR [rsp+32], xmm8
  60179. vmovdqu OWORD PTR [rsp+48], xmm9
  60180. vmovdqu OWORD PTR [rsp+64], xmm10
  60181. vmovdqu OWORD PTR [rsp+80], xmm11
  60182. vmovdqu OWORD PTR [rsp+96], xmm12
  60183. vmovdqu OWORD PTR [rsp+112], xmm13
  60184. vmovdqu OWORD PTR [rsp+128], xmm14
  60185. vmovdqu OWORD PTR [rsp+144], xmm15
  60186. mov rax, 1
  60187. movd xmm13, r8d
  60188. add rdx, 296
  60189. movd xmm15, eax
  60190. mov rax, 32
  60191. vpxor ymm14, ymm14, ymm14
  60192. vpermd ymm13, ymm14, ymm13
  60193. vpermd ymm15, ymm14, ymm15
  60194. vpxor ymm0, ymm0, ymm0
  60195. vpxor xmm1, xmm1, xmm1
  60196. vpxor ymm2, ymm2, ymm2
  60197. vpxor xmm3, xmm3, xmm3
  60198. vpxor ymm4, ymm4, ymm4
  60199. vpxor xmm5, xmm5, xmm5
  60200. vmovdqa ymm14, ymm15
  60201. L_384_get_point_33_avx2_6_start:
  60202. vpcmpeqd ymm12, ymm14, ymm13
  60203. vpaddd ymm14, ymm14, ymm15
  60204. vmovupd ymm6, YMMWORD PTR [rdx]
  60205. vmovdqu xmm7, OWORD PTR [rdx+32]
  60206. vmovupd ymm8, YMMWORD PTR [rdx+96]
  60207. vmovdqu xmm9, OWORD PTR [rdx+128]
  60208. vmovupd ymm10, YMMWORD PTR [rdx+192]
  60209. vmovdqu xmm11, OWORD PTR [rdx+224]
  60210. add rdx, 296
  60211. vpand ymm6, ymm6, ymm12
  60212. vpand xmm7, xmm7, xmm12
  60213. vpand ymm8, ymm8, ymm12
  60214. vpand xmm9, xmm9, xmm12
  60215. vpand ymm10, ymm10, ymm12
  60216. vpand xmm11, xmm11, xmm12
  60217. vpor ymm0, ymm0, ymm6
  60218. vpor xmm1, xmm1, xmm7
  60219. vpor ymm2, ymm2, ymm8
  60220. vpor xmm3, xmm3, xmm9
  60221. vpor ymm4, ymm4, ymm10
  60222. vpor xmm5, xmm5, xmm11
  60223. dec rax
  60224. jnz L_384_get_point_33_avx2_6_start
  60225. vmovupd YMMWORD PTR [rcx], ymm0
  60226. vmovdqu OWORD PTR [rcx+32], xmm1
  60227. vmovupd YMMWORD PTR [rcx+96], ymm2
  60228. vmovdqu OWORD PTR [rcx+128], xmm3
  60229. vmovupd YMMWORD PTR [rcx+192], ymm4
  60230. vmovdqu OWORD PTR [rcx+224], xmm5
  60231. vmovdqu xmm6, OWORD PTR [rsp]
  60232. vmovdqu xmm7, OWORD PTR [rsp+16]
  60233. vmovdqu xmm8, OWORD PTR [rsp+32]
  60234. vmovdqu xmm9, OWORD PTR [rsp+48]
  60235. vmovdqu xmm10, OWORD PTR [rsp+64]
  60236. vmovdqu xmm11, OWORD PTR [rsp+80]
  60237. vmovdqu xmm12, OWORD PTR [rsp+96]
  60238. vmovdqu xmm13, OWORD PTR [rsp+112]
  60239. vmovdqu xmm14, OWORD PTR [rsp+128]
  60240. vmovdqu xmm15, OWORD PTR [rsp+144]
  60241. add rsp, 160
  60242. ret
  60243. sp_384_get_point_33_avx2_6 ENDP
  60244. _text ENDS
  60245. ENDIF
  60246. ENDIF
  60247. IFDEF HAVE_INTEL_AVX2
  60248. ; /* Reduce the number back to 384 bits using Montgomery reduction.
  60249. ; *
  60250. ; * a A single precision number to reduce in place.
  60251. ; * m The single precision number representing the modulus.
  60252. ; * mp The digit representing the negative inverse of m mod 2^n.
  60253. ; */
  60254. _text SEGMENT READONLY PARA
  60255. sp_384_mont_reduce_order_avx2_6 PROC
  60256. push r12
  60257. push r13
  60258. push r14
  60259. push r15
  60260. mov rax, rdx
  60261. xor r15, r15
  60262. mov r14, QWORD PTR [rcx]
  60263. xor r13, r13
  60264. L_mont_loop_order_avx2_6:
  60265. ; mu = a[i] * mp
  60266. mov rdx, r14
  60267. mov r11, r14
  60268. imul rdx, r8
  60269. xor r13, r13
  60270. ; a[i+0] += m[0] * mu
  60271. mulx r10, r9, QWORD PTR [rax]
  60272. mov r14, QWORD PTR [rcx+8]
  60273. adcx r11, r9
  60274. adox r14, r10
  60275. ; a[i+1] += m[1] * mu
  60276. mulx r10, r9, QWORD PTR [rax+8]
  60277. mov r11, QWORD PTR [rcx+16]
  60278. adcx r14, r9
  60279. adox r11, r10
  60280. ; a[i+2] += m[2] * mu
  60281. mulx r10, r9, QWORD PTR [rax+16]
  60282. mov r12, QWORD PTR [rcx+24]
  60283. adcx r11, r9
  60284. adox r12, r10
  60285. mov QWORD PTR [rcx+16], r11
  60286. ; a[i+3] += m[3] * mu
  60287. mulx r10, r9, QWORD PTR [rax+24]
  60288. mov r11, QWORD PTR [rcx+32]
  60289. adcx r12, r9
  60290. adox r11, r10
  60291. mov QWORD PTR [rcx+24], r12
  60292. ; a[i+4] += m[4] * mu
  60293. mulx r10, r9, QWORD PTR [rax+32]
  60294. mov r12, QWORD PTR [rcx+40]
  60295. adcx r11, r9
  60296. adox r12, r10
  60297. mov QWORD PTR [rcx+32], r11
  60298. ; a[i+5] += m[5] * mu
  60299. mulx r10, r9, QWORD PTR [rax+40]
  60300. mov r11, QWORD PTR [rcx+48]
  60301. adcx r12, r9
  60302. adox r11, r10
  60303. mov QWORD PTR [rcx+40], r12
  60304. adcx r11, r15
  60305. mov QWORD PTR [rcx+48], r11
  60306. mov r15, r13
  60307. adox r15, r13
  60308. adcx r15, r13
  60309. ; mu = a[i] * mp
  60310. mov rdx, r14
  60311. mov r11, r14
  60312. imul rdx, r8
  60313. xor r13, r13
  60314. ; a[i+0] += m[0] * mu
  60315. mulx r10, r9, QWORD PTR [rax]
  60316. mov r14, QWORD PTR [rcx+16]
  60317. adcx r11, r9
  60318. adox r14, r10
  60319. ; a[i+1] += m[1] * mu
  60320. mulx r10, r9, QWORD PTR [rax+8]
  60321. mov r11, QWORD PTR [rcx+24]
  60322. adcx r14, r9
  60323. adox r11, r10
  60324. ; a[i+2] += m[2] * mu
  60325. mulx r10, r9, QWORD PTR [rax+16]
  60326. mov r12, QWORD PTR [rcx+32]
  60327. adcx r11, r9
  60328. adox r12, r10
  60329. mov QWORD PTR [rcx+24], r11
  60330. ; a[i+3] += m[3] * mu
  60331. mulx r10, r9, QWORD PTR [rax+24]
  60332. mov r11, QWORD PTR [rcx+40]
  60333. adcx r12, r9
  60334. adox r11, r10
  60335. mov QWORD PTR [rcx+32], r12
  60336. ; a[i+4] += m[4] * mu
  60337. mulx r10, r9, QWORD PTR [rax+32]
  60338. mov r12, QWORD PTR [rcx+48]
  60339. adcx r11, r9
  60340. adox r12, r10
  60341. mov QWORD PTR [rcx+40], r11
  60342. ; a[i+5] += m[5] * mu
  60343. mulx r10, r9, QWORD PTR [rax+40]
  60344. mov r11, QWORD PTR [rcx+56]
  60345. adcx r12, r9
  60346. adox r11, r10
  60347. mov QWORD PTR [rcx+48], r12
  60348. adcx r11, r15
  60349. mov QWORD PTR [rcx+56], r11
  60350. mov r15, r13
  60351. adox r15, r13
  60352. adcx r15, r13
  60353. ; mu = a[i] * mp
  60354. mov rdx, r14
  60355. mov r11, r14
  60356. imul rdx, r8
  60357. xor r13, r13
  60358. ; a[i+0] += m[0] * mu
  60359. mulx r10, r9, QWORD PTR [rax]
  60360. mov r14, QWORD PTR [rcx+24]
  60361. adcx r11, r9
  60362. adox r14, r10
  60363. ; a[i+1] += m[1] * mu
  60364. mulx r10, r9, QWORD PTR [rax+8]
  60365. mov r11, QWORD PTR [rcx+32]
  60366. adcx r14, r9
  60367. adox r11, r10
  60368. ; a[i+2] += m[2] * mu
  60369. mulx r10, r9, QWORD PTR [rax+16]
  60370. mov r12, QWORD PTR [rcx+40]
  60371. adcx r11, r9
  60372. adox r12, r10
  60373. mov QWORD PTR [rcx+32], r11
  60374. ; a[i+3] += m[3] * mu
  60375. mulx r10, r9, QWORD PTR [rax+24]
  60376. mov r11, QWORD PTR [rcx+48]
  60377. adcx r12, r9
  60378. adox r11, r10
  60379. mov QWORD PTR [rcx+40], r12
  60380. ; a[i+4] += m[4] * mu
  60381. mulx r10, r9, QWORD PTR [rax+32]
  60382. mov r12, QWORD PTR [rcx+56]
  60383. adcx r11, r9
  60384. adox r12, r10
  60385. mov QWORD PTR [rcx+48], r11
  60386. ; a[i+5] += m[5] * mu
  60387. mulx r10, r9, QWORD PTR [rax+40]
  60388. mov r11, QWORD PTR [rcx+64]
  60389. adcx r12, r9
  60390. adox r11, r10
  60391. mov QWORD PTR [rcx+56], r12
  60392. adcx r11, r15
  60393. mov QWORD PTR [rcx+64], r11
  60394. mov r15, r13
  60395. adox r15, r13
  60396. adcx r15, r13
  60397. ; mu = a[i] * mp
  60398. mov rdx, r14
  60399. mov r11, r14
  60400. imul rdx, r8
  60401. xor r13, r13
  60402. ; a[i+0] += m[0] * mu
  60403. mulx r10, r9, QWORD PTR [rax]
  60404. mov r14, QWORD PTR [rcx+32]
  60405. adcx r11, r9
  60406. adox r14, r10
  60407. ; a[i+1] += m[1] * mu
  60408. mulx r10, r9, QWORD PTR [rax+8]
  60409. mov r11, QWORD PTR [rcx+40]
  60410. adcx r14, r9
  60411. adox r11, r10
  60412. ; a[i+2] += m[2] * mu
  60413. mulx r10, r9, QWORD PTR [rax+16]
  60414. mov r12, QWORD PTR [rcx+48]
  60415. adcx r11, r9
  60416. adox r12, r10
  60417. mov QWORD PTR [rcx+40], r11
  60418. ; a[i+3] += m[3] * mu
  60419. mulx r10, r9, QWORD PTR [rax+24]
  60420. mov r11, QWORD PTR [rcx+56]
  60421. adcx r12, r9
  60422. adox r11, r10
  60423. mov QWORD PTR [rcx+48], r12
  60424. ; a[i+4] += m[4] * mu
  60425. mulx r10, r9, QWORD PTR [rax+32]
  60426. mov r12, QWORD PTR [rcx+64]
  60427. adcx r11, r9
  60428. adox r12, r10
  60429. mov QWORD PTR [rcx+56], r11
  60430. ; a[i+5] += m[5] * mu
  60431. mulx r10, r9, QWORD PTR [rax+40]
  60432. mov r11, QWORD PTR [rcx+72]
  60433. adcx r12, r9
  60434. adox r11, r10
  60435. mov QWORD PTR [rcx+64], r12
  60436. adcx r11, r15
  60437. mov QWORD PTR [rcx+72], r11
  60438. mov r15, r13
  60439. adox r15, r13
  60440. adcx r15, r13
  60441. ; mu = a[i] * mp
  60442. mov rdx, r14
  60443. mov r11, r14
  60444. imul rdx, r8
  60445. xor r13, r13
  60446. ; a[i+0] += m[0] * mu
  60447. mulx r10, r9, QWORD PTR [rax]
  60448. mov r14, QWORD PTR [rcx+40]
  60449. adcx r11, r9
  60450. adox r14, r10
  60451. ; a[i+1] += m[1] * mu
  60452. mulx r10, r9, QWORD PTR [rax+8]
  60453. mov r11, QWORD PTR [rcx+48]
  60454. adcx r14, r9
  60455. adox r11, r10
  60456. ; a[i+2] += m[2] * mu
  60457. mulx r10, r9, QWORD PTR [rax+16]
  60458. mov r12, QWORD PTR [rcx+56]
  60459. adcx r11, r9
  60460. adox r12, r10
  60461. mov QWORD PTR [rcx+48], r11
  60462. ; a[i+3] += m[3] * mu
  60463. mulx r10, r9, QWORD PTR [rax+24]
  60464. mov r11, QWORD PTR [rcx+64]
  60465. adcx r12, r9
  60466. adox r11, r10
  60467. mov QWORD PTR [rcx+56], r12
  60468. ; a[i+4] += m[4] * mu
  60469. mulx r10, r9, QWORD PTR [rax+32]
  60470. mov r12, QWORD PTR [rcx+72]
  60471. adcx r11, r9
  60472. adox r12, r10
  60473. mov QWORD PTR [rcx+64], r11
  60474. ; a[i+5] += m[5] * mu
  60475. mulx r10, r9, QWORD PTR [rax+40]
  60476. mov r11, QWORD PTR [rcx+80]
  60477. adcx r12, r9
  60478. adox r11, r10
  60479. mov QWORD PTR [rcx+72], r12
  60480. adcx r11, r15
  60481. mov QWORD PTR [rcx+80], r11
  60482. mov r15, r13
  60483. adox r15, r13
  60484. adcx r15, r13
  60485. ; mu = a[i] * mp
  60486. mov rdx, r14
  60487. mov r11, r14
  60488. imul rdx, r8
  60489. xor r13, r13
  60490. ; a[i+0] += m[0] * mu
  60491. mulx r10, r9, QWORD PTR [rax]
  60492. mov r14, QWORD PTR [rcx+48]
  60493. adcx r11, r9
  60494. adox r14, r10
  60495. ; a[i+1] += m[1] * mu
  60496. mulx r10, r9, QWORD PTR [rax+8]
  60497. mov r11, QWORD PTR [rcx+56]
  60498. adcx r14, r9
  60499. adox r11, r10
  60500. ; a[i+2] += m[2] * mu
  60501. mulx r10, r9, QWORD PTR [rax+16]
  60502. mov r12, QWORD PTR [rcx+64]
  60503. adcx r11, r9
  60504. adox r12, r10
  60505. mov QWORD PTR [rcx+56], r11
  60506. ; a[i+3] += m[3] * mu
  60507. mulx r10, r9, QWORD PTR [rax+24]
  60508. mov r11, QWORD PTR [rcx+72]
  60509. adcx r12, r9
  60510. adox r11, r10
  60511. mov QWORD PTR [rcx+64], r12
  60512. ; a[i+4] += m[4] * mu
  60513. mulx r10, r9, QWORD PTR [rax+32]
  60514. mov r12, QWORD PTR [rcx+80]
  60515. adcx r11, r9
  60516. adox r12, r10
  60517. mov QWORD PTR [rcx+72], r11
  60518. ; a[i+5] += m[5] * mu
  60519. mulx r10, r9, QWORD PTR [rax+40]
  60520. mov r11, QWORD PTR [rcx+88]
  60521. adcx r12, r9
  60522. adox r11, r10
  60523. mov QWORD PTR [rcx+80], r12
  60524. adcx r11, r15
  60525. mov QWORD PTR [rcx+88], r11
  60526. mov r15, r13
  60527. adox r15, r13
  60528. adcx r15, r13
  60529. neg r15
  60530. mov r8, rcx
  60531. add rcx, 48
  60532. mov r10, QWORD PTR [rax]
  60533. mov rdx, r14
  60534. pext r10, r10, r15
  60535. sub rdx, r10
  60536. mov r10, QWORD PTR [rax+8]
  60537. mov r9, QWORD PTR [rcx+8]
  60538. pext r10, r10, r15
  60539. mov QWORD PTR [r8], rdx
  60540. sbb r9, r10
  60541. mov rdx, QWORD PTR [rax+16]
  60542. mov r10, QWORD PTR [rcx+16]
  60543. pext rdx, rdx, r15
  60544. mov QWORD PTR [r8+8], r9
  60545. sbb r10, rdx
  60546. mov r9, QWORD PTR [rax+24]
  60547. mov rdx, QWORD PTR [rcx+24]
  60548. pext r9, r9, r15
  60549. mov QWORD PTR [r8+16], r10
  60550. sbb rdx, r9
  60551. mov r10, QWORD PTR [rax+32]
  60552. mov r9, QWORD PTR [rcx+32]
  60553. pext r10, r10, r15
  60554. mov QWORD PTR [r8+24], rdx
  60555. sbb r9, r10
  60556. mov rdx, QWORD PTR [rax+40]
  60557. mov r10, QWORD PTR [rcx+40]
  60558. pext rdx, rdx, r15
  60559. mov QWORD PTR [r8+32], r9
  60560. sbb r10, rdx
  60561. mov QWORD PTR [r8+40], r10
  60562. pop r15
  60563. pop r14
  60564. pop r13
  60565. pop r12
  60566. ret
  60567. sp_384_mont_reduce_order_avx2_6 ENDP
  60568. _text ENDS
  60569. ENDIF
  60570. IFDEF HAVE_INTEL_AVX2
  60571. ; /* Conditionally subtract b from a using the mask m.
  60572. ; * m is -1 to subtract and 0 when not copying.
  60573. ; *
  60574. ; * r A single precision number representing condition subtract result.
  60575. ; * a A single precision number to subtract from.
  60576. ; * b A single precision number to subtract.
  60577. ; * m Mask value to apply.
  60578. ; */
  60579. _text SEGMENT READONLY PARA
  60580. sp_384_cond_sub_avx2_6 PROC
  60581. push r12
  60582. mov r12, QWORD PTR [r8]
  60583. mov r10, QWORD PTR [rdx]
  60584. pext r12, r12, r9
  60585. sub r10, r12
  60586. mov r12, QWORD PTR [r8+8]
  60587. mov r11, QWORD PTR [rdx+8]
  60588. pext r12, r12, r9
  60589. mov QWORD PTR [rcx], r10
  60590. sbb r11, r12
  60591. mov r10, QWORD PTR [r8+16]
  60592. mov r12, QWORD PTR [rdx+16]
  60593. pext r10, r10, r9
  60594. mov QWORD PTR [rcx+8], r11
  60595. sbb r12, r10
  60596. mov r11, QWORD PTR [r8+24]
  60597. mov r10, QWORD PTR [rdx+24]
  60598. pext r11, r11, r9
  60599. mov QWORD PTR [rcx+16], r12
  60600. sbb r10, r11
  60601. mov r12, QWORD PTR [r8+32]
  60602. mov r11, QWORD PTR [rdx+32]
  60603. pext r12, r12, r9
  60604. mov QWORD PTR [rcx+24], r10
  60605. sbb r11, r12
  60606. mov r10, QWORD PTR [r8+40]
  60607. mov r12, QWORD PTR [rdx+40]
  60608. pext r10, r10, r9
  60609. mov QWORD PTR [rcx+32], r11
  60610. sbb r12, r10
  60611. mov QWORD PTR [rcx+40], r12
  60612. sbb rax, rax
  60613. pop r12
  60614. ret
  60615. sp_384_cond_sub_avx2_6 ENDP
  60616. _text ENDS
  60617. ENDIF
  60618. IFDEF HAVE_INTEL_AVX2
  60619. ; /* Divide the number by 2 mod the modulus (prime). (r = a / 2 % m)
  60620. ; *
  60621. ; * r Result of division by 2.
  60622. ; * a Number to divide.
  60623. ; * m Modulus (prime).
  60624. ; */
  60625. _text SEGMENT READONLY PARA
  60626. sp_384_div2_avx2_6 PROC
  60627. push r12
  60628. push r13
  60629. mov r13, QWORD PTR [rdx]
  60630. xor r12, r12
  60631. mov r10, r13
  60632. and r13, 1
  60633. neg r13
  60634. mov rax, QWORD PTR [r8]
  60635. mov r9, QWORD PTR [r8+8]
  60636. mov r10, QWORD PTR [rdx]
  60637. mov r11, QWORD PTR [rdx+8]
  60638. pext rax, rax, r13
  60639. pext r9, r9, r13
  60640. add r10, rax
  60641. adc r11, r9
  60642. mov QWORD PTR [rcx], r10
  60643. mov QWORD PTR [rcx+8], r11
  60644. mov rax, QWORD PTR [r8+16]
  60645. mov r9, QWORD PTR [r8+24]
  60646. mov r10, QWORD PTR [rdx+16]
  60647. mov r11, QWORD PTR [rdx+24]
  60648. pext rax, rax, r13
  60649. pext r9, r9, r13
  60650. adc r10, rax
  60651. adc r11, r9
  60652. mov QWORD PTR [rcx+16], r10
  60653. mov QWORD PTR [rcx+24], r11
  60654. mov rax, QWORD PTR [r8+32]
  60655. mov r9, QWORD PTR [r8+40]
  60656. mov r10, QWORD PTR [rdx+32]
  60657. mov r11, QWORD PTR [rdx+40]
  60658. pext rax, rax, r13
  60659. pext r9, r9, r13
  60660. adc r10, rax
  60661. adc r11, r9
  60662. mov QWORD PTR [rcx+32], r10
  60663. mov QWORD PTR [rcx+40], r11
  60664. adc r12, 0
  60665. mov r10, QWORD PTR [rcx]
  60666. mov r11, QWORD PTR [rcx+8]
  60667. shrd r10, r11, 1
  60668. mov QWORD PTR [rcx], r10
  60669. mov r10, QWORD PTR [rcx+16]
  60670. shrd r11, r10, 1
  60671. mov QWORD PTR [rcx+8], r11
  60672. mov r11, QWORD PTR [rcx+24]
  60673. shrd r10, r11, 1
  60674. mov QWORD PTR [rcx+16], r10
  60675. mov r10, QWORD PTR [rcx+32]
  60676. shrd r11, r10, 1
  60677. mov QWORD PTR [rcx+24], r11
  60678. mov r11, QWORD PTR [rcx+40]
  60679. shrd r10, r11, 1
  60680. mov QWORD PTR [rcx+32], r10
  60681. shrd r11, r12, 1
  60682. mov QWORD PTR [rcx+40], r11
  60683. pop r13
  60684. pop r12
  60685. ret
  60686. sp_384_div2_avx2_6 ENDP
  60687. _text ENDS
  60688. ENDIF
  60689. IFNDEF WC_NO_CACHE_RESISTANT
  60690. ; /* Touch each possible entry that could be being copied.
  60691. ; *
  60692. ; * r Point to copy into.
  60693. ; * table Table - start of the entires to access
  60694. ; * idx Index of entry to retrieve.
  60695. ; */
  60696. _text SEGMENT READONLY PARA
  60697. sp_384_get_entry_64_6 PROC
  60698. sub rsp, 160
  60699. vmovdqu OWORD PTR [rsp], xmm6
  60700. vmovdqu OWORD PTR [rsp+16], xmm7
  60701. vmovdqu OWORD PTR [rsp+32], xmm8
  60702. vmovdqu OWORD PTR [rsp+48], xmm9
  60703. vmovdqu OWORD PTR [rsp+64], xmm10
  60704. vmovdqu OWORD PTR [rsp+80], xmm11
  60705. vmovdqu OWORD PTR [rsp+96], xmm12
  60706. vmovdqu OWORD PTR [rsp+112], xmm13
  60707. vmovdqu OWORD PTR [rsp+128], xmm14
  60708. vmovdqu OWORD PTR [rsp+144], xmm15
  60709. ; From entry 1
  60710. mov rax, 1
  60711. movd xmm13, r8d
  60712. add rdx, 96
  60713. movd xmm15, eax
  60714. mov rax, 63
  60715. pshufd xmm15, xmm15, 0
  60716. pshufd xmm13, xmm13, 0
  60717. pxor xmm14, xmm14
  60718. pxor xmm0, xmm0
  60719. pxor xmm1, xmm1
  60720. pxor xmm2, xmm2
  60721. pxor xmm3, xmm3
  60722. pxor xmm4, xmm4
  60723. pxor xmm5, xmm5
  60724. movdqa xmm14, xmm15
  60725. L_384_get_entry_64_6_start_0:
  60726. movdqa xmm12, xmm14
  60727. paddd xmm14, xmm15
  60728. pcmpeqd xmm12, xmm13
  60729. movdqu xmm6, [rdx]
  60730. movdqu xmm7, [rdx+16]
  60731. movdqu xmm8, [rdx+32]
  60732. movdqu xmm9, [rdx+48]
  60733. movdqu xmm10, [rdx+64]
  60734. movdqu xmm11, [rdx+80]
  60735. add rdx, 96
  60736. pand xmm6, xmm12
  60737. pand xmm7, xmm12
  60738. pand xmm8, xmm12
  60739. pand xmm9, xmm12
  60740. pand xmm10, xmm12
  60741. pand xmm11, xmm12
  60742. por xmm0, xmm6
  60743. por xmm1, xmm7
  60744. por xmm2, xmm8
  60745. por xmm3, xmm9
  60746. por xmm4, xmm10
  60747. por xmm5, xmm11
  60748. dec rax
  60749. jnz L_384_get_entry_64_6_start_0
  60750. movdqu [rcx], xmm0
  60751. movdqu [rcx+16], xmm1
  60752. movdqu [rcx+32], xmm2
  60753. movdqu [rcx+96], xmm3
  60754. movdqu [rcx+112], xmm4
  60755. movdqu [rcx+128], xmm5
  60756. vmovdqu xmm6, OWORD PTR [rsp]
  60757. vmovdqu xmm7, OWORD PTR [rsp+16]
  60758. vmovdqu xmm8, OWORD PTR [rsp+32]
  60759. vmovdqu xmm9, OWORD PTR [rsp+48]
  60760. vmovdqu xmm10, OWORD PTR [rsp+64]
  60761. vmovdqu xmm11, OWORD PTR [rsp+80]
  60762. vmovdqu xmm12, OWORD PTR [rsp+96]
  60763. vmovdqu xmm13, OWORD PTR [rsp+112]
  60764. vmovdqu xmm14, OWORD PTR [rsp+128]
  60765. vmovdqu xmm15, OWORD PTR [rsp+144]
  60766. add rsp, 160
  60767. ret
  60768. sp_384_get_entry_64_6 ENDP
  60769. _text ENDS
  60770. IFDEF HAVE_INTEL_AVX2
  60771. ; /* Touch each possible entry that could be being copied.
  60772. ; *
  60773. ; * r Point to copy into.
  60774. ; * table Table - start of the entires to access
  60775. ; * idx Index of entry to retrieve.
  60776. ; */
  60777. _text SEGMENT READONLY PARA
  60778. sp_384_get_entry_64_avx2_6 PROC
  60779. sub rsp, 96
  60780. vmovdqu OWORD PTR [rsp], xmm6
  60781. vmovdqu OWORD PTR [rsp+16], xmm7
  60782. vmovdqu OWORD PTR [rsp+32], xmm8
  60783. vmovdqu OWORD PTR [rsp+48], xmm9
  60784. vmovdqu OWORD PTR [rsp+64], xmm10
  60785. vmovdqu OWORD PTR [rsp+80], xmm11
  60786. mov rax, 1
  60787. movd xmm9, r8d
  60788. add rdx, 96
  60789. movd xmm11, eax
  60790. mov rax, 64
  60791. vpxor ymm10, ymm10, ymm10
  60792. vpermd ymm9, ymm10, ymm9
  60793. vpermd ymm11, ymm10, ymm11
  60794. vpxor ymm0, ymm0, ymm0
  60795. vpxor xmm1, xmm1, xmm1
  60796. vpxor ymm2, ymm2, ymm2
  60797. vpxor xmm3, xmm3, xmm3
  60798. vmovdqa ymm10, ymm11
  60799. L_384_get_entry_64_avx2_6_start:
  60800. vpcmpeqd ymm8, ymm10, ymm9
  60801. vpaddd ymm10, ymm10, ymm11
  60802. vmovupd ymm4, YMMWORD PTR [rdx]
  60803. vmovdqu xmm5, OWORD PTR [rdx+32]
  60804. vmovupd ymm6, YMMWORD PTR [rdx+48]
  60805. vmovdqu xmm7, OWORD PTR [rdx+80]
  60806. add rdx, 96
  60807. vpand ymm4, ymm4, ymm8
  60808. vpand xmm5, xmm5, xmm8
  60809. vpand ymm6, ymm6, ymm8
  60810. vpand xmm7, xmm7, xmm8
  60811. vpor ymm0, ymm0, ymm4
  60812. vpor xmm1, xmm1, xmm5
  60813. vpor ymm2, ymm2, ymm6
  60814. vpor xmm3, xmm3, xmm7
  60815. dec rax
  60816. jnz L_384_get_entry_64_avx2_6_start
  60817. vmovupd YMMWORD PTR [rcx], ymm0
  60818. vmovdqu OWORD PTR [rcx+32], xmm1
  60819. vmovupd YMMWORD PTR [rcx+96], ymm2
  60820. vmovdqu OWORD PTR [rcx+128], xmm3
  60821. vmovdqu xmm6, OWORD PTR [rsp]
  60822. vmovdqu xmm7, OWORD PTR [rsp+16]
  60823. vmovdqu xmm8, OWORD PTR [rsp+32]
  60824. vmovdqu xmm9, OWORD PTR [rsp+48]
  60825. vmovdqu xmm10, OWORD PTR [rsp+64]
  60826. vmovdqu xmm11, OWORD PTR [rsp+80]
  60827. add rsp, 96
  60828. ret
  60829. sp_384_get_entry_64_avx2_6 ENDP
  60830. _text ENDS
  60831. ENDIF
  60832. ENDIF
  60833. IFNDEF WC_NO_CACHE_RESISTANT
  60834. ; /* Touch each possible entry that could be being copied.
  60835. ; *
  60836. ; * r Point to copy into.
  60837. ; * table Table - start of the entires to access
  60838. ; * idx Index of entry to retrieve.
  60839. ; */
  60840. _text SEGMENT READONLY PARA
  60841. sp_384_get_entry_65_6 PROC
  60842. sub rsp, 160
  60843. vmovdqu OWORD PTR [rsp], xmm6
  60844. vmovdqu OWORD PTR [rsp+16], xmm7
  60845. vmovdqu OWORD PTR [rsp+32], xmm8
  60846. vmovdqu OWORD PTR [rsp+48], xmm9
  60847. vmovdqu OWORD PTR [rsp+64], xmm10
  60848. vmovdqu OWORD PTR [rsp+80], xmm11
  60849. vmovdqu OWORD PTR [rsp+96], xmm12
  60850. vmovdqu OWORD PTR [rsp+112], xmm13
  60851. vmovdqu OWORD PTR [rsp+128], xmm14
  60852. vmovdqu OWORD PTR [rsp+144], xmm15
  60853. ; From entry 1
  60854. mov rax, 1
  60855. movd xmm13, r8d
  60856. add rdx, 96
  60857. movd xmm15, eax
  60858. mov rax, 64
  60859. pshufd xmm15, xmm15, 0
  60860. pshufd xmm13, xmm13, 0
  60861. pxor xmm14, xmm14
  60862. pxor xmm0, xmm0
  60863. pxor xmm1, xmm1
  60864. pxor xmm2, xmm2
  60865. pxor xmm3, xmm3
  60866. pxor xmm4, xmm4
  60867. pxor xmm5, xmm5
  60868. movdqa xmm14, xmm15
  60869. L_384_get_entry_65_6_start_0:
  60870. movdqa xmm12, xmm14
  60871. paddd xmm14, xmm15
  60872. pcmpeqd xmm12, xmm13
  60873. movdqu xmm6, [rdx]
  60874. movdqu xmm7, [rdx+16]
  60875. movdqu xmm8, [rdx+32]
  60876. movdqu xmm9, [rdx+48]
  60877. movdqu xmm10, [rdx+64]
  60878. movdqu xmm11, [rdx+80]
  60879. add rdx, 96
  60880. pand xmm6, xmm12
  60881. pand xmm7, xmm12
  60882. pand xmm8, xmm12
  60883. pand xmm9, xmm12
  60884. pand xmm10, xmm12
  60885. pand xmm11, xmm12
  60886. por xmm0, xmm6
  60887. por xmm1, xmm7
  60888. por xmm2, xmm8
  60889. por xmm3, xmm9
  60890. por xmm4, xmm10
  60891. por xmm5, xmm11
  60892. dec rax
  60893. jnz L_384_get_entry_65_6_start_0
  60894. movdqu [rcx], xmm0
  60895. movdqu [rcx+16], xmm1
  60896. movdqu [rcx+32], xmm2
  60897. movdqu [rcx+96], xmm3
  60898. movdqu [rcx+112], xmm4
  60899. movdqu [rcx+128], xmm5
  60900. vmovdqu xmm6, OWORD PTR [rsp]
  60901. vmovdqu xmm7, OWORD PTR [rsp+16]
  60902. vmovdqu xmm8, OWORD PTR [rsp+32]
  60903. vmovdqu xmm9, OWORD PTR [rsp+48]
  60904. vmovdqu xmm10, OWORD PTR [rsp+64]
  60905. vmovdqu xmm11, OWORD PTR [rsp+80]
  60906. vmovdqu xmm12, OWORD PTR [rsp+96]
  60907. vmovdqu xmm13, OWORD PTR [rsp+112]
  60908. vmovdqu xmm14, OWORD PTR [rsp+128]
  60909. vmovdqu xmm15, OWORD PTR [rsp+144]
  60910. add rsp, 160
  60911. ret
  60912. sp_384_get_entry_65_6 ENDP
  60913. _text ENDS
  60914. IFDEF HAVE_INTEL_AVX2
  60915. ; /* Touch each possible entry that could be being copied.
  60916. ; *
  60917. ; * r Point to copy into.
  60918. ; * table Table - start of the entires to access
  60919. ; * idx Index of entry to retrieve.
  60920. ; */
  60921. _text SEGMENT READONLY PARA
  60922. sp_384_get_entry_65_avx2_6 PROC
  60923. sub rsp, 96
  60924. vmovdqu OWORD PTR [rsp], xmm6
  60925. vmovdqu OWORD PTR [rsp+16], xmm7
  60926. vmovdqu OWORD PTR [rsp+32], xmm8
  60927. vmovdqu OWORD PTR [rsp+48], xmm9
  60928. vmovdqu OWORD PTR [rsp+64], xmm10
  60929. vmovdqu OWORD PTR [rsp+80], xmm11
  60930. mov rax, 1
  60931. movd xmm9, r8d
  60932. add rdx, 96
  60933. movd xmm11, eax
  60934. mov rax, 65
  60935. vpxor ymm10, ymm10, ymm10
  60936. vpermd ymm9, ymm10, ymm9
  60937. vpermd ymm11, ymm10, ymm11
  60938. vpxor ymm0, ymm0, ymm0
  60939. vpxor xmm1, xmm1, xmm1
  60940. vpxor ymm2, ymm2, ymm2
  60941. vpxor xmm3, xmm3, xmm3
  60942. vmovdqa ymm10, ymm11
  60943. L_384_get_entry_65_avx2_6_start:
  60944. vpcmpeqd ymm8, ymm10, ymm9
  60945. vpaddd ymm10, ymm10, ymm11
  60946. vmovupd ymm4, YMMWORD PTR [rdx]
  60947. vmovdqu xmm5, OWORD PTR [rdx+32]
  60948. vmovupd ymm6, YMMWORD PTR [rdx+48]
  60949. vmovdqu xmm7, OWORD PTR [rdx+80]
  60950. add rdx, 96
  60951. vpand ymm4, ymm4, ymm8
  60952. vpand xmm5, xmm5, xmm8
  60953. vpand ymm6, ymm6, ymm8
  60954. vpand xmm7, xmm7, xmm8
  60955. vpor ymm0, ymm0, ymm4
  60956. vpor xmm1, xmm1, xmm5
  60957. vpor ymm2, ymm2, ymm6
  60958. vpor xmm3, xmm3, xmm7
  60959. dec rax
  60960. jnz L_384_get_entry_65_avx2_6_start
  60961. vmovupd YMMWORD PTR [rcx], ymm0
  60962. vmovdqu OWORD PTR [rcx+32], xmm1
  60963. vmovupd YMMWORD PTR [rcx+96], ymm2
  60964. vmovdqu OWORD PTR [rcx+128], xmm3
  60965. vmovdqu xmm6, OWORD PTR [rsp]
  60966. vmovdqu xmm7, OWORD PTR [rsp+16]
  60967. vmovdqu xmm8, OWORD PTR [rsp+32]
  60968. vmovdqu xmm9, OWORD PTR [rsp+48]
  60969. vmovdqu xmm10, OWORD PTR [rsp+64]
  60970. vmovdqu xmm11, OWORD PTR [rsp+80]
  60971. add rsp, 96
  60972. ret
  60973. sp_384_get_entry_65_avx2_6 ENDP
  60974. _text ENDS
  60975. ENDIF
  60976. ENDIF
  60977. ; /* Add 1 to a. (a = a + 1)
  60978. ; *
  60979. ; * a A single precision integer.
  60980. ; */
  60981. _text SEGMENT READONLY PARA
  60982. sp_384_add_one_6 PROC
  60983. add QWORD PTR [rcx], 1
  60984. adc QWORD PTR [rcx+8], 0
  60985. adc QWORD PTR [rcx+16], 0
  60986. adc QWORD PTR [rcx+24], 0
  60987. adc QWORD PTR [rcx+32], 0
  60988. adc QWORD PTR [rcx+40], 0
  60989. ret
  60990. sp_384_add_one_6 ENDP
  60991. _text ENDS
  60992. ; /* Read big endian unsigned byte array into r.
  60993. ; * Uses the bswap instruction.
  60994. ; *
  60995. ; * r A single precision integer.
  60996. ; * size Maximum number of bytes to convert
  60997. ; * a Byte array.
  60998. ; * n Number of bytes in array to read.
  60999. ; */
  61000. _text SEGMENT READONLY PARA
  61001. sp_384_from_bin_bswap PROC
  61002. push r12
  61003. push r13
  61004. mov r11, r8
  61005. mov r12, rcx
  61006. add r11, r9
  61007. add r12, 48
  61008. xor r13, r13
  61009. jmp L_384_from_bin_bswap_64_end
  61010. L_384_from_bin_bswap_64_start:
  61011. sub r11, 64
  61012. mov rax, QWORD PTR [r11+56]
  61013. mov r10, QWORD PTR [r11+48]
  61014. bswap rax
  61015. bswap r10
  61016. mov QWORD PTR [rcx], rax
  61017. mov QWORD PTR [rcx+8], r10
  61018. mov rax, QWORD PTR [r11+40]
  61019. mov r10, QWORD PTR [r11+32]
  61020. bswap rax
  61021. bswap r10
  61022. mov QWORD PTR [rcx+16], rax
  61023. mov QWORD PTR [rcx+24], r10
  61024. mov rax, QWORD PTR [r11+24]
  61025. mov r10, QWORD PTR [r11+16]
  61026. bswap rax
  61027. bswap r10
  61028. mov QWORD PTR [rcx+32], rax
  61029. mov QWORD PTR [rcx+40], r10
  61030. mov rax, QWORD PTR [r11+8]
  61031. mov r10, QWORD PTR [r11]
  61032. bswap rax
  61033. bswap r10
  61034. mov QWORD PTR [rcx+48], rax
  61035. mov QWORD PTR [rcx+56], r10
  61036. add rcx, 64
  61037. sub r9, 64
  61038. L_384_from_bin_bswap_64_end:
  61039. cmp r9, 63
  61040. jg L_384_from_bin_bswap_64_start
  61041. jmp L_384_from_bin_bswap_8_end
  61042. L_384_from_bin_bswap_8_start:
  61043. sub r11, 8
  61044. mov rax, QWORD PTR [r11]
  61045. bswap rax
  61046. mov QWORD PTR [rcx], rax
  61047. add rcx, 8
  61048. sub r9, 8
  61049. L_384_from_bin_bswap_8_end:
  61050. cmp r9, 7
  61051. jg L_384_from_bin_bswap_8_start
  61052. cmp r9, r13
  61053. je L_384_from_bin_bswap_hi_end
  61054. mov r10, r13
  61055. mov rax, r13
  61056. L_384_from_bin_bswap_hi_start:
  61057. mov al, BYTE PTR [r8]
  61058. shl r10, 8
  61059. inc r8
  61060. add r10, rax
  61061. dec r9
  61062. jg L_384_from_bin_bswap_hi_start
  61063. mov QWORD PTR [rcx], r10
  61064. add rcx, 8
  61065. L_384_from_bin_bswap_hi_end:
  61066. cmp rcx, r12
  61067. jge L_384_from_bin_bswap_zero_end
  61068. L_384_from_bin_bswap_zero_start:
  61069. mov QWORD PTR [rcx], r13
  61070. add rcx, 8
  61071. cmp rcx, r12
  61072. jl L_384_from_bin_bswap_zero_start
  61073. L_384_from_bin_bswap_zero_end:
  61074. pop r13
  61075. pop r12
  61076. ret
  61077. sp_384_from_bin_bswap ENDP
  61078. _text ENDS
  61079. IFNDEF NO_MOVBE_SUPPORT
  61080. ; /* Read big endian unsigned byte array into r.
  61081. ; * Uses the movbe instruction which is an optional instruction.
  61082. ; *
  61083. ; * r A single precision integer.
  61084. ; * size Maximum number of bytes to convert
  61085. ; * a Byte array.
  61086. ; * n Number of bytes in array to read.
  61087. ; */
  61088. _text SEGMENT READONLY PARA
  61089. sp_384_from_bin_movbe PROC
  61090. push r12
  61091. mov r11, r8
  61092. mov r12, rcx
  61093. add r11, r9
  61094. add r12, 48
  61095. jmp L_384_from_bin_movbe_64_end
  61096. L_384_from_bin_movbe_64_start:
  61097. sub r11, 64
  61098. movbe rax, QWORD PTR [r11+56]
  61099. movbe r10, QWORD PTR [r11+48]
  61100. mov QWORD PTR [rcx], rax
  61101. mov QWORD PTR [rcx+8], r10
  61102. movbe rax, QWORD PTR [r11+40]
  61103. movbe r10, QWORD PTR [r11+32]
  61104. mov QWORD PTR [rcx+16], rax
  61105. mov QWORD PTR [rcx+24], r10
  61106. movbe rax, QWORD PTR [r11+24]
  61107. movbe r10, QWORD PTR [r11+16]
  61108. mov QWORD PTR [rcx+32], rax
  61109. mov QWORD PTR [rcx+40], r10
  61110. movbe rax, QWORD PTR [r11+8]
  61111. movbe r10, QWORD PTR [r11]
  61112. mov QWORD PTR [rcx+48], rax
  61113. mov QWORD PTR [rcx+56], r10
  61114. add rcx, 64
  61115. sub r9, 64
  61116. L_384_from_bin_movbe_64_end:
  61117. cmp r9, 63
  61118. jg L_384_from_bin_movbe_64_start
  61119. jmp L_384_from_bin_movbe_8_end
  61120. L_384_from_bin_movbe_8_start:
  61121. sub r11, 8
  61122. movbe rax, QWORD PTR [r11]
  61123. mov QWORD PTR [rcx], rax
  61124. add rcx, 8
  61125. sub r9, 8
  61126. L_384_from_bin_movbe_8_end:
  61127. cmp r9, 7
  61128. jg L_384_from_bin_movbe_8_start
  61129. cmp r9, 0
  61130. je L_384_from_bin_movbe_hi_end
  61131. mov r10, 0
  61132. mov rax, 0
  61133. L_384_from_bin_movbe_hi_start:
  61134. mov al, BYTE PTR [r8]
  61135. shl r10, 8
  61136. inc r8
  61137. add r10, rax
  61138. dec r9
  61139. jg L_384_from_bin_movbe_hi_start
  61140. mov QWORD PTR [rcx], r10
  61141. add rcx, 8
  61142. L_384_from_bin_movbe_hi_end:
  61143. cmp rcx, r12
  61144. jge L_384_from_bin_movbe_zero_end
  61145. L_384_from_bin_movbe_zero_start:
  61146. mov QWORD PTR [rcx], 0
  61147. add rcx, 8
  61148. cmp rcx, r12
  61149. jl L_384_from_bin_movbe_zero_start
  61150. L_384_from_bin_movbe_zero_end:
  61151. pop r12
  61152. ret
  61153. sp_384_from_bin_movbe ENDP
  61154. _text ENDS
  61155. ENDIF
  61156. ; /* Write r as big endian to byte array.
  61157. ; * Fixed length number of bytes written: 48
  61158. ; * Uses the bswap instruction.
  61159. ; *
  61160. ; * r A single precision integer.
  61161. ; * a Byte array.
  61162. ; */
  61163. _text SEGMENT READONLY PARA
  61164. sp_384_to_bin_bswap_6 PROC
  61165. mov rax, QWORD PTR [rcx+40]
  61166. mov r8, QWORD PTR [rcx+32]
  61167. bswap rax
  61168. bswap r8
  61169. mov QWORD PTR [rdx], rax
  61170. mov QWORD PTR [rdx+8], r8
  61171. mov rax, QWORD PTR [rcx+24]
  61172. mov r8, QWORD PTR [rcx+16]
  61173. bswap rax
  61174. bswap r8
  61175. mov QWORD PTR [rdx+16], rax
  61176. mov QWORD PTR [rdx+24], r8
  61177. mov rax, QWORD PTR [rcx+8]
  61178. mov r8, QWORD PTR [rcx]
  61179. bswap rax
  61180. bswap r8
  61181. mov QWORD PTR [rdx+32], rax
  61182. mov QWORD PTR [rdx+40], r8
  61183. ret
  61184. sp_384_to_bin_bswap_6 ENDP
  61185. _text ENDS
  61186. IFNDEF NO_MOVBE_SUPPORT
  61187. ; /* Write r as big endian to byte array.
  61188. ; * Fixed length number of bytes written: 48
  61189. ; * Uses the movbe instruction which is optional.
  61190. ; *
  61191. ; * r A single precision integer.
  61192. ; * a Byte array.
  61193. ; */
  61194. _text SEGMENT READONLY PARA
  61195. sp_384_to_bin_movbe_6 PROC
  61196. movbe rax, QWORD PTR [rcx+40]
  61197. movbe r8, QWORD PTR [rcx+32]
  61198. mov QWORD PTR [rdx], rax
  61199. mov QWORD PTR [rdx+8], r8
  61200. movbe rax, QWORD PTR [rcx+24]
  61201. movbe r8, QWORD PTR [rcx+16]
  61202. mov QWORD PTR [rdx+16], rax
  61203. mov QWORD PTR [rdx+24], r8
  61204. movbe rax, QWORD PTR [rcx+8]
  61205. movbe r8, QWORD PTR [rcx]
  61206. mov QWORD PTR [rdx+32], rax
  61207. mov QWORD PTR [rdx+40], r8
  61208. ret
  61209. sp_384_to_bin_movbe_6 ENDP
  61210. _text ENDS
  61211. ENDIF
  61212. ; /* Sub b from a into a. (a -= b)
  61213. ; *
  61214. ; * a A single precision integer and result.
  61215. ; * b A single precision integer.
  61216. ; */
  61217. _text SEGMENT READONLY PARA
  61218. sp_384_sub_in_place_6 PROC
  61219. push r12
  61220. push r13
  61221. mov r8, QWORD PTR [rdx]
  61222. mov r9, QWORD PTR [rdx+8]
  61223. mov r10, QWORD PTR [rdx+16]
  61224. mov r11, QWORD PTR [rdx+24]
  61225. mov r12, QWORD PTR [rdx+32]
  61226. mov r13, QWORD PTR [rdx+40]
  61227. sub QWORD PTR [rcx], r8
  61228. sbb QWORD PTR [rcx+8], r9
  61229. sbb QWORD PTR [rcx+16], r10
  61230. sbb QWORD PTR [rcx+24], r11
  61231. sbb QWORD PTR [rcx+32], r12
  61232. sbb QWORD PTR [rcx+40], r13
  61233. sbb rax, rax
  61234. pop r13
  61235. pop r12
  61236. ret
  61237. sp_384_sub_in_place_6 ENDP
  61238. _text ENDS
  61239. ; /* Mul a by digit b into r. (r = a * b)
  61240. ; *
  61241. ; * r A single precision integer.
  61242. ; * a A single precision integer.
  61243. ; * b A single precision digit.
  61244. ; */
  61245. _text SEGMENT READONLY PARA
  61246. sp_384_mul_d_6 PROC
  61247. push r12
  61248. mov r9, rdx
  61249. ; A[0] * B
  61250. mov rax, r8
  61251. xor r12, r12
  61252. mul QWORD PTR [r9]
  61253. mov r10, rax
  61254. mov r11, rdx
  61255. mov QWORD PTR [rcx], r10
  61256. ; A[1] * B
  61257. mov rax, r8
  61258. xor r10, r10
  61259. mul QWORD PTR [r9+8]
  61260. add r11, rax
  61261. mov QWORD PTR [rcx+8], r11
  61262. adc r12, rdx
  61263. adc r10, 0
  61264. ; A[2] * B
  61265. mov rax, r8
  61266. xor r11, r11
  61267. mul QWORD PTR [r9+16]
  61268. add r12, rax
  61269. mov QWORD PTR [rcx+16], r12
  61270. adc r10, rdx
  61271. adc r11, 0
  61272. ; A[3] * B
  61273. mov rax, r8
  61274. xor r12, r12
  61275. mul QWORD PTR [r9+24]
  61276. add r10, rax
  61277. mov QWORD PTR [rcx+24], r10
  61278. adc r11, rdx
  61279. adc r12, 0
  61280. ; A[4] * B
  61281. mov rax, r8
  61282. xor r10, r10
  61283. mul QWORD PTR [r9+32]
  61284. add r11, rax
  61285. mov QWORD PTR [rcx+32], r11
  61286. adc r12, rdx
  61287. adc r10, 0
  61288. ; A[5] * B
  61289. mov rax, r8
  61290. mul QWORD PTR [r9+40]
  61291. add r12, rax
  61292. adc r10, rdx
  61293. mov QWORD PTR [rcx+40], r12
  61294. mov QWORD PTR [rcx+48], r10
  61295. pop r12
  61296. ret
  61297. sp_384_mul_d_6 ENDP
  61298. _text ENDS
  61299. IFDEF HAVE_INTEL_AVX2
  61300. ; /* Mul a by digit b into r. (r = a * b)
  61301. ; *
  61302. ; * r A single precision integer.
  61303. ; * a A single precision integer.
  61304. ; * b A single precision digit.
  61305. ; */
  61306. _text SEGMENT READONLY PARA
  61307. sp_384_mul_d_avx2_6 PROC
  61308. push r12
  61309. push r13
  61310. mov rax, rdx
  61311. ; A[0] * B
  61312. mov rdx, r8
  61313. xor r13, r13
  61314. mulx r12, r11, QWORD PTR [rax]
  61315. mov QWORD PTR [rcx], r11
  61316. ; A[1] * B
  61317. mulx r10, r9, QWORD PTR [rax+8]
  61318. mov r11, r13
  61319. adcx r12, r9
  61320. adox r11, r10
  61321. mov QWORD PTR [rcx+8], r12
  61322. ; A[2] * B
  61323. mulx r10, r9, QWORD PTR [rax+16]
  61324. mov r12, r13
  61325. adcx r11, r9
  61326. adox r12, r10
  61327. mov QWORD PTR [rcx+16], r11
  61328. ; A[3] * B
  61329. mulx r10, r9, QWORD PTR [rax+24]
  61330. mov r11, r13
  61331. adcx r12, r9
  61332. adox r11, r10
  61333. mov QWORD PTR [rcx+24], r12
  61334. ; A[4] * B
  61335. mulx r10, r9, QWORD PTR [rax+32]
  61336. mov r12, r13
  61337. adcx r11, r9
  61338. adox r12, r10
  61339. mov QWORD PTR [rcx+32], r11
  61340. ; A[5] * B
  61341. mulx r10, r9, QWORD PTR [rax+40]
  61342. mov r11, r13
  61343. adcx r12, r9
  61344. adox r11, r10
  61345. adcx r11, r13
  61346. mov QWORD PTR [rcx+40], r12
  61347. mov QWORD PTR [rcx+48], r11
  61348. pop r13
  61349. pop r12
  61350. ret
  61351. sp_384_mul_d_avx2_6 ENDP
  61352. _text ENDS
  61353. ENDIF
  61354. IFDEF _WIN64
  61355. ; /* Divide the double width number (d1|d0) by the dividend. (d1|d0 / div)
  61356. ; *
  61357. ; * d1 The high order half of the number to divide.
  61358. ; * d0 The low order half of the number to divide.
  61359. ; * div The dividend.
  61360. ; * returns the result of the division.
  61361. ; */
  61362. _text SEGMENT READONLY PARA
  61363. div_384_word_asm_6 PROC
  61364. mov r9, rdx
  61365. mov rax, r9
  61366. mov rdx, rcx
  61367. div r8
  61368. ret
  61369. div_384_word_asm_6 ENDP
  61370. _text ENDS
  61371. ENDIF
  61372. ; /* Shift number right by 1 bit. (r = a >> 1)
  61373. ; *
  61374. ; * r Result of right shift by 1.
  61375. ; * a Number to shift.
  61376. ; */
  61377. _text SEGMENT READONLY PARA
  61378. sp_384_rshift1_6 PROC
  61379. push r12
  61380. mov rax, QWORD PTR [rdx]
  61381. mov r8, QWORD PTR [rdx+8]
  61382. mov r9, QWORD PTR [rdx+16]
  61383. mov r10, QWORD PTR [rdx+24]
  61384. mov r11, QWORD PTR [rdx+32]
  61385. mov r12, QWORD PTR [rdx+40]
  61386. shrd rax, r8, 1
  61387. shrd r8, r9, 1
  61388. shrd r9, r10, 1
  61389. shrd r10, r11, 1
  61390. shrd r11, r12, 1
  61391. shr r12, 1
  61392. mov QWORD PTR [rcx], rax
  61393. mov QWORD PTR [rcx+8], r8
  61394. mov QWORD PTR [rcx+16], r9
  61395. mov QWORD PTR [rcx+24], r10
  61396. mov QWORD PTR [rcx+32], r11
  61397. mov QWORD PTR [rcx+40], r12
  61398. pop r12
  61399. ret
  61400. sp_384_rshift1_6 ENDP
  61401. _text ENDS
  61402. ; /* Divide the number by 2 mod the prime. (r = a / 2 % m)
  61403. ; *
  61404. ; * r Result of division by 2.
  61405. ; * a Number to divide.
  61406. ; * m Modulus
  61407. ; */
  61408. _text SEGMENT READONLY PARA
  61409. sp_384_div2_mod_6 PROC
  61410. push r12
  61411. push r13
  61412. push r14
  61413. push r15
  61414. push rdi
  61415. push rsi
  61416. push rbx
  61417. push rbp
  61418. mov rax, QWORD PTR [rdx]
  61419. mov r9, QWORD PTR [rdx+8]
  61420. mov r10, QWORD PTR [rdx+16]
  61421. mov r11, QWORD PTR [rdx+24]
  61422. mov r12, QWORD PTR [rdx+32]
  61423. mov r13, QWORD PTR [rdx+40]
  61424. mov r14, QWORD PTR [r8]
  61425. mov r15, QWORD PTR [r8+8]
  61426. mov rdi, QWORD PTR [r8+16]
  61427. mov rsi, QWORD PTR [r8+24]
  61428. mov rbx, QWORD PTR [r8+32]
  61429. mov rbp, QWORD PTR [r8+40]
  61430. mov r8, rax
  61431. and r8, 1
  61432. je L_384_mod_inv_6_div2_mod_no_add
  61433. add rax, r14
  61434. adc r9, r15
  61435. adc r10, rdi
  61436. adc r11, rsi
  61437. adc r12, rbx
  61438. adc r13, rbp
  61439. mov r8, 0
  61440. adc r8, 0
  61441. L_384_mod_inv_6_div2_mod_no_add:
  61442. shrd rax, r9, 1
  61443. shrd r9, r10, 1
  61444. shrd r10, r11, 1
  61445. shrd r11, r12, 1
  61446. shrd r12, r13, 1
  61447. shrd r13, r8, 1
  61448. mov QWORD PTR [rcx], rax
  61449. mov QWORD PTR [rcx+8], r9
  61450. mov QWORD PTR [rcx+16], r10
  61451. mov QWORD PTR [rcx+24], r11
  61452. mov QWORD PTR [rcx+32], r12
  61453. mov QWORD PTR [rcx+40], r13
  61454. pop rbp
  61455. pop rbx
  61456. pop rsi
  61457. pop rdi
  61458. pop r15
  61459. pop r14
  61460. pop r13
  61461. pop r12
  61462. ret
  61463. sp_384_div2_mod_6 ENDP
  61464. _text ENDS
  61465. _text SEGMENT READONLY PARA
  61466. sp_384_num_bits_6 PROC
  61467. xor rax, rax
  61468. mov rdx, QWORD PTR [rcx+40]
  61469. cmp rdx, 0
  61470. je L_384_num_bits_6_end_320
  61471. mov rax, -1
  61472. bsr rax, rdx
  61473. add rax, 321
  61474. jmp L_384_num_bits_6_done
  61475. L_384_num_bits_6_end_320:
  61476. mov rdx, QWORD PTR [rcx+32]
  61477. cmp rdx, 0
  61478. je L_384_num_bits_6_end_256
  61479. mov rax, -1
  61480. bsr rax, rdx
  61481. add rax, 257
  61482. jmp L_384_num_bits_6_done
  61483. L_384_num_bits_6_end_256:
  61484. mov rdx, QWORD PTR [rcx+24]
  61485. cmp rdx, 0
  61486. je L_384_num_bits_6_end_192
  61487. mov rax, -1
  61488. bsr rax, rdx
  61489. add rax, 193
  61490. jmp L_384_num_bits_6_done
  61491. L_384_num_bits_6_end_192:
  61492. mov rdx, QWORD PTR [rcx+16]
  61493. cmp rdx, 0
  61494. je L_384_num_bits_6_end_128
  61495. mov rax, -1
  61496. bsr rax, rdx
  61497. add rax, 129
  61498. jmp L_384_num_bits_6_done
  61499. L_384_num_bits_6_end_128:
  61500. mov rdx, QWORD PTR [rcx+8]
  61501. cmp rdx, 0
  61502. je L_384_num_bits_6_end_64
  61503. mov rax, -1
  61504. bsr rax, rdx
  61505. add rax, 65
  61506. jmp L_384_num_bits_6_done
  61507. L_384_num_bits_6_end_64:
  61508. mov rdx, QWORD PTR [rcx]
  61509. cmp rdx, 0
  61510. je L_384_num_bits_6_end_0
  61511. mov rax, -1
  61512. bsr rax, rdx
  61513. add rax, 1
  61514. jmp L_384_num_bits_6_done
  61515. L_384_num_bits_6_end_0:
  61516. L_384_num_bits_6_done:
  61517. ret
  61518. sp_384_num_bits_6 ENDP
  61519. _text ENDS
  61520. ENDIF
  61521. IFDEF WOLFSSL_SP_521
  61522. ; /* Multiply a and b into r. (r = a * b)
  61523. ; *
  61524. ; * r A single precision integer.
  61525. ; * a A single precision integer.
  61526. ; * b A single precision integer.
  61527. ; */
  61528. _text SEGMENT READONLY PARA
  61529. sp_521_mul_9 PROC
  61530. push r12
  61531. mov r9, rdx
  61532. sub rsp, 72
  61533. ; A[0] * B[0]
  61534. mov rax, QWORD PTR [r8]
  61535. mul QWORD PTR [r9]
  61536. xor r12, r12
  61537. mov QWORD PTR [rsp], rax
  61538. mov r11, rdx
  61539. ; A[0] * B[1]
  61540. mov rax, QWORD PTR [r8+8]
  61541. mul QWORD PTR [r9]
  61542. xor r10, r10
  61543. add r11, rax
  61544. adc r12, rdx
  61545. adc r10, 0
  61546. ; A[1] * B[0]
  61547. mov rax, QWORD PTR [r8]
  61548. mul QWORD PTR [r9+8]
  61549. add r11, rax
  61550. adc r12, rdx
  61551. adc r10, 0
  61552. mov QWORD PTR [rsp+8], r11
  61553. ; A[0] * B[2]
  61554. mov rax, QWORD PTR [r8+16]
  61555. mul QWORD PTR [r9]
  61556. xor r11, r11
  61557. add r12, rax
  61558. adc r10, rdx
  61559. adc r11, 0
  61560. ; A[1] * B[1]
  61561. mov rax, QWORD PTR [r8+8]
  61562. mul QWORD PTR [r9+8]
  61563. add r12, rax
  61564. adc r10, rdx
  61565. adc r11, 0
  61566. ; A[2] * B[0]
  61567. mov rax, QWORD PTR [r8]
  61568. mul QWORD PTR [r9+16]
  61569. add r12, rax
  61570. adc r10, rdx
  61571. adc r11, 0
  61572. mov QWORD PTR [rsp+16], r12
  61573. ; A[0] * B[3]
  61574. mov rax, QWORD PTR [r8+24]
  61575. mul QWORD PTR [r9]
  61576. xor r12, r12
  61577. add r10, rax
  61578. adc r11, rdx
  61579. adc r12, 0
  61580. ; A[1] * B[2]
  61581. mov rax, QWORD PTR [r8+16]
  61582. mul QWORD PTR [r9+8]
  61583. add r10, rax
  61584. adc r11, rdx
  61585. adc r12, 0
  61586. ; A[2] * B[1]
  61587. mov rax, QWORD PTR [r8+8]
  61588. mul QWORD PTR [r9+16]
  61589. add r10, rax
  61590. adc r11, rdx
  61591. adc r12, 0
  61592. ; A[3] * B[0]
  61593. mov rax, QWORD PTR [r8]
  61594. mul QWORD PTR [r9+24]
  61595. add r10, rax
  61596. adc r11, rdx
  61597. adc r12, 0
  61598. mov QWORD PTR [rsp+24], r10
  61599. ; A[0] * B[4]
  61600. mov rax, QWORD PTR [r8+32]
  61601. mul QWORD PTR [r9]
  61602. xor r10, r10
  61603. add r11, rax
  61604. adc r12, rdx
  61605. adc r10, 0
  61606. ; A[1] * B[3]
  61607. mov rax, QWORD PTR [r8+24]
  61608. mul QWORD PTR [r9+8]
  61609. add r11, rax
  61610. adc r12, rdx
  61611. adc r10, 0
  61612. ; A[2] * B[2]
  61613. mov rax, QWORD PTR [r8+16]
  61614. mul QWORD PTR [r9+16]
  61615. add r11, rax
  61616. adc r12, rdx
  61617. adc r10, 0
  61618. ; A[3] * B[1]
  61619. mov rax, QWORD PTR [r8+8]
  61620. mul QWORD PTR [r9+24]
  61621. add r11, rax
  61622. adc r12, rdx
  61623. adc r10, 0
  61624. ; A[4] * B[0]
  61625. mov rax, QWORD PTR [r8]
  61626. mul QWORD PTR [r9+32]
  61627. add r11, rax
  61628. adc r12, rdx
  61629. adc r10, 0
  61630. mov QWORD PTR [rsp+32], r11
  61631. ; A[0] * B[5]
  61632. mov rax, QWORD PTR [r8+40]
  61633. mul QWORD PTR [r9]
  61634. xor r11, r11
  61635. add r12, rax
  61636. adc r10, rdx
  61637. adc r11, 0
  61638. ; A[1] * B[4]
  61639. mov rax, QWORD PTR [r8+32]
  61640. mul QWORD PTR [r9+8]
  61641. add r12, rax
  61642. adc r10, rdx
  61643. adc r11, 0
  61644. ; A[2] * B[3]
  61645. mov rax, QWORD PTR [r8+24]
  61646. mul QWORD PTR [r9+16]
  61647. add r12, rax
  61648. adc r10, rdx
  61649. adc r11, 0
  61650. ; A[3] * B[2]
  61651. mov rax, QWORD PTR [r8+16]
  61652. mul QWORD PTR [r9+24]
  61653. add r12, rax
  61654. adc r10, rdx
  61655. adc r11, 0
  61656. ; A[4] * B[1]
  61657. mov rax, QWORD PTR [r8+8]
  61658. mul QWORD PTR [r9+32]
  61659. add r12, rax
  61660. adc r10, rdx
  61661. adc r11, 0
  61662. ; A[5] * B[0]
  61663. mov rax, QWORD PTR [r8]
  61664. mul QWORD PTR [r9+40]
  61665. add r12, rax
  61666. adc r10, rdx
  61667. adc r11, 0
  61668. mov QWORD PTR [rsp+40], r12
  61669. ; A[0] * B[6]
  61670. mov rax, QWORD PTR [r8+48]
  61671. mul QWORD PTR [r9]
  61672. xor r12, r12
  61673. add r10, rax
  61674. adc r11, rdx
  61675. adc r12, 0
  61676. ; A[1] * B[5]
  61677. mov rax, QWORD PTR [r8+40]
  61678. mul QWORD PTR [r9+8]
  61679. add r10, rax
  61680. adc r11, rdx
  61681. adc r12, 0
  61682. ; A[2] * B[4]
  61683. mov rax, QWORD PTR [r8+32]
  61684. mul QWORD PTR [r9+16]
  61685. add r10, rax
  61686. adc r11, rdx
  61687. adc r12, 0
  61688. ; A[3] * B[3]
  61689. mov rax, QWORD PTR [r8+24]
  61690. mul QWORD PTR [r9+24]
  61691. add r10, rax
  61692. adc r11, rdx
  61693. adc r12, 0
  61694. ; A[4] * B[2]
  61695. mov rax, QWORD PTR [r8+16]
  61696. mul QWORD PTR [r9+32]
  61697. add r10, rax
  61698. adc r11, rdx
  61699. adc r12, 0
  61700. ; A[5] * B[1]
  61701. mov rax, QWORD PTR [r8+8]
  61702. mul QWORD PTR [r9+40]
  61703. add r10, rax
  61704. adc r11, rdx
  61705. adc r12, 0
  61706. ; A[6] * B[0]
  61707. mov rax, QWORD PTR [r8]
  61708. mul QWORD PTR [r9+48]
  61709. add r10, rax
  61710. adc r11, rdx
  61711. adc r12, 0
  61712. mov QWORD PTR [rsp+48], r10
  61713. ; A[0] * B[7]
  61714. mov rax, QWORD PTR [r8+56]
  61715. mul QWORD PTR [r9]
  61716. xor r10, r10
  61717. add r11, rax
  61718. adc r12, rdx
  61719. adc r10, 0
  61720. ; A[1] * B[6]
  61721. mov rax, QWORD PTR [r8+48]
  61722. mul QWORD PTR [r9+8]
  61723. add r11, rax
  61724. adc r12, rdx
  61725. adc r10, 0
  61726. ; A[2] * B[5]
  61727. mov rax, QWORD PTR [r8+40]
  61728. mul QWORD PTR [r9+16]
  61729. add r11, rax
  61730. adc r12, rdx
  61731. adc r10, 0
  61732. ; A[3] * B[4]
  61733. mov rax, QWORD PTR [r8+32]
  61734. mul QWORD PTR [r9+24]
  61735. add r11, rax
  61736. adc r12, rdx
  61737. adc r10, 0
  61738. ; A[4] * B[3]
  61739. mov rax, QWORD PTR [r8+24]
  61740. mul QWORD PTR [r9+32]
  61741. add r11, rax
  61742. adc r12, rdx
  61743. adc r10, 0
  61744. ; A[5] * B[2]
  61745. mov rax, QWORD PTR [r8+16]
  61746. mul QWORD PTR [r9+40]
  61747. add r11, rax
  61748. adc r12, rdx
  61749. adc r10, 0
  61750. ; A[6] * B[1]
  61751. mov rax, QWORD PTR [r8+8]
  61752. mul QWORD PTR [r9+48]
  61753. add r11, rax
  61754. adc r12, rdx
  61755. adc r10, 0
  61756. ; A[7] * B[0]
  61757. mov rax, QWORD PTR [r8]
  61758. mul QWORD PTR [r9+56]
  61759. add r11, rax
  61760. adc r12, rdx
  61761. adc r10, 0
  61762. mov QWORD PTR [rsp+56], r11
  61763. ; A[0] * B[8]
  61764. mov rax, QWORD PTR [r8+64]
  61765. mul QWORD PTR [r9]
  61766. xor r11, r11
  61767. add r12, rax
  61768. adc r10, rdx
  61769. adc r11, 0
  61770. ; A[1] * B[7]
  61771. mov rax, QWORD PTR [r8+56]
  61772. mul QWORD PTR [r9+8]
  61773. add r12, rax
  61774. adc r10, rdx
  61775. adc r11, 0
  61776. ; A[2] * B[6]
  61777. mov rax, QWORD PTR [r8+48]
  61778. mul QWORD PTR [r9+16]
  61779. add r12, rax
  61780. adc r10, rdx
  61781. adc r11, 0
  61782. ; A[3] * B[5]
  61783. mov rax, QWORD PTR [r8+40]
  61784. mul QWORD PTR [r9+24]
  61785. add r12, rax
  61786. adc r10, rdx
  61787. adc r11, 0
  61788. ; A[4] * B[4]
  61789. mov rax, QWORD PTR [r8+32]
  61790. mul QWORD PTR [r9+32]
  61791. add r12, rax
  61792. adc r10, rdx
  61793. adc r11, 0
  61794. ; A[5] * B[3]
  61795. mov rax, QWORD PTR [r8+24]
  61796. mul QWORD PTR [r9+40]
  61797. add r12, rax
  61798. adc r10, rdx
  61799. adc r11, 0
  61800. ; A[6] * B[2]
  61801. mov rax, QWORD PTR [r8+16]
  61802. mul QWORD PTR [r9+48]
  61803. add r12, rax
  61804. adc r10, rdx
  61805. adc r11, 0
  61806. ; A[7] * B[1]
  61807. mov rax, QWORD PTR [r8+8]
  61808. mul QWORD PTR [r9+56]
  61809. add r12, rax
  61810. adc r10, rdx
  61811. adc r11, 0
  61812. ; A[8] * B[0]
  61813. mov rax, QWORD PTR [r8]
  61814. mul QWORD PTR [r9+64]
  61815. add r12, rax
  61816. adc r10, rdx
  61817. adc r11, 0
  61818. mov QWORD PTR [rsp+64], r12
  61819. ; A[1] * B[8]
  61820. mov rax, QWORD PTR [r8+64]
  61821. mul QWORD PTR [r9+8]
  61822. xor r12, r12
  61823. add r10, rax
  61824. adc r11, rdx
  61825. adc r12, 0
  61826. ; A[2] * B[7]
  61827. mov rax, QWORD PTR [r8+56]
  61828. mul QWORD PTR [r9+16]
  61829. add r10, rax
  61830. adc r11, rdx
  61831. adc r12, 0
  61832. ; A[3] * B[6]
  61833. mov rax, QWORD PTR [r8+48]
  61834. mul QWORD PTR [r9+24]
  61835. add r10, rax
  61836. adc r11, rdx
  61837. adc r12, 0
  61838. ; A[4] * B[5]
  61839. mov rax, QWORD PTR [r8+40]
  61840. mul QWORD PTR [r9+32]
  61841. add r10, rax
  61842. adc r11, rdx
  61843. adc r12, 0
  61844. ; A[5] * B[4]
  61845. mov rax, QWORD PTR [r8+32]
  61846. mul QWORD PTR [r9+40]
  61847. add r10, rax
  61848. adc r11, rdx
  61849. adc r12, 0
  61850. ; A[6] * B[3]
  61851. mov rax, QWORD PTR [r8+24]
  61852. mul QWORD PTR [r9+48]
  61853. add r10, rax
  61854. adc r11, rdx
  61855. adc r12, 0
  61856. ; A[7] * B[2]
  61857. mov rax, QWORD PTR [r8+16]
  61858. mul QWORD PTR [r9+56]
  61859. add r10, rax
  61860. adc r11, rdx
  61861. adc r12, 0
  61862. ; A[8] * B[1]
  61863. mov rax, QWORD PTR [r8+8]
  61864. mul QWORD PTR [r9+64]
  61865. add r10, rax
  61866. adc r11, rdx
  61867. adc r12, 0
  61868. mov QWORD PTR [rcx+72], r10
  61869. ; A[2] * B[8]
  61870. mov rax, QWORD PTR [r8+64]
  61871. mul QWORD PTR [r9+16]
  61872. xor r10, r10
  61873. add r11, rax
  61874. adc r12, rdx
  61875. adc r10, 0
  61876. ; A[3] * B[7]
  61877. mov rax, QWORD PTR [r8+56]
  61878. mul QWORD PTR [r9+24]
  61879. add r11, rax
  61880. adc r12, rdx
  61881. adc r10, 0
  61882. ; A[4] * B[6]
  61883. mov rax, QWORD PTR [r8+48]
  61884. mul QWORD PTR [r9+32]
  61885. add r11, rax
  61886. adc r12, rdx
  61887. adc r10, 0
  61888. ; A[5] * B[5]
  61889. mov rax, QWORD PTR [r8+40]
  61890. mul QWORD PTR [r9+40]
  61891. add r11, rax
  61892. adc r12, rdx
  61893. adc r10, 0
  61894. ; A[6] * B[4]
  61895. mov rax, QWORD PTR [r8+32]
  61896. mul QWORD PTR [r9+48]
  61897. add r11, rax
  61898. adc r12, rdx
  61899. adc r10, 0
  61900. ; A[7] * B[3]
  61901. mov rax, QWORD PTR [r8+24]
  61902. mul QWORD PTR [r9+56]
  61903. add r11, rax
  61904. adc r12, rdx
  61905. adc r10, 0
  61906. ; A[8] * B[2]
  61907. mov rax, QWORD PTR [r8+16]
  61908. mul QWORD PTR [r9+64]
  61909. add r11, rax
  61910. adc r12, rdx
  61911. adc r10, 0
  61912. mov QWORD PTR [rcx+80], r11
  61913. ; A[3] * B[8]
  61914. mov rax, QWORD PTR [r8+64]
  61915. mul QWORD PTR [r9+24]
  61916. xor r11, r11
  61917. add r12, rax
  61918. adc r10, rdx
  61919. adc r11, 0
  61920. ; A[4] * B[7]
  61921. mov rax, QWORD PTR [r8+56]
  61922. mul QWORD PTR [r9+32]
  61923. add r12, rax
  61924. adc r10, rdx
  61925. adc r11, 0
  61926. ; A[5] * B[6]
  61927. mov rax, QWORD PTR [r8+48]
  61928. mul QWORD PTR [r9+40]
  61929. add r12, rax
  61930. adc r10, rdx
  61931. adc r11, 0
  61932. ; A[6] * B[5]
  61933. mov rax, QWORD PTR [r8+40]
  61934. mul QWORD PTR [r9+48]
  61935. add r12, rax
  61936. adc r10, rdx
  61937. adc r11, 0
  61938. ; A[7] * B[4]
  61939. mov rax, QWORD PTR [r8+32]
  61940. mul QWORD PTR [r9+56]
  61941. add r12, rax
  61942. adc r10, rdx
  61943. adc r11, 0
  61944. ; A[8] * B[3]
  61945. mov rax, QWORD PTR [r8+24]
  61946. mul QWORD PTR [r9+64]
  61947. add r12, rax
  61948. adc r10, rdx
  61949. adc r11, 0
  61950. mov QWORD PTR [rcx+88], r12
  61951. ; A[4] * B[8]
  61952. mov rax, QWORD PTR [r8+64]
  61953. mul QWORD PTR [r9+32]
  61954. xor r12, r12
  61955. add r10, rax
  61956. adc r11, rdx
  61957. adc r12, 0
  61958. ; A[5] * B[7]
  61959. mov rax, QWORD PTR [r8+56]
  61960. mul QWORD PTR [r9+40]
  61961. add r10, rax
  61962. adc r11, rdx
  61963. adc r12, 0
  61964. ; A[6] * B[6]
  61965. mov rax, QWORD PTR [r8+48]
  61966. mul QWORD PTR [r9+48]
  61967. add r10, rax
  61968. adc r11, rdx
  61969. adc r12, 0
  61970. ; A[7] * B[5]
  61971. mov rax, QWORD PTR [r8+40]
  61972. mul QWORD PTR [r9+56]
  61973. add r10, rax
  61974. adc r11, rdx
  61975. adc r12, 0
  61976. ; A[8] * B[4]
  61977. mov rax, QWORD PTR [r8+32]
  61978. mul QWORD PTR [r9+64]
  61979. add r10, rax
  61980. adc r11, rdx
  61981. adc r12, 0
  61982. mov QWORD PTR [rcx+96], r10
  61983. ; A[5] * B[8]
  61984. mov rax, QWORD PTR [r8+64]
  61985. mul QWORD PTR [r9+40]
  61986. xor r10, r10
  61987. add r11, rax
  61988. adc r12, rdx
  61989. adc r10, 0
  61990. ; A[6] * B[7]
  61991. mov rax, QWORD PTR [r8+56]
  61992. mul QWORD PTR [r9+48]
  61993. add r11, rax
  61994. adc r12, rdx
  61995. adc r10, 0
  61996. ; A[7] * B[6]
  61997. mov rax, QWORD PTR [r8+48]
  61998. mul QWORD PTR [r9+56]
  61999. add r11, rax
  62000. adc r12, rdx
  62001. adc r10, 0
  62002. ; A[8] * B[5]
  62003. mov rax, QWORD PTR [r8+40]
  62004. mul QWORD PTR [r9+64]
  62005. add r11, rax
  62006. adc r12, rdx
  62007. adc r10, 0
  62008. mov QWORD PTR [rcx+104], r11
  62009. ; A[6] * B[8]
  62010. mov rax, QWORD PTR [r8+64]
  62011. mul QWORD PTR [r9+48]
  62012. xor r11, r11
  62013. add r12, rax
  62014. adc r10, rdx
  62015. adc r11, 0
  62016. ; A[7] * B[7]
  62017. mov rax, QWORD PTR [r8+56]
  62018. mul QWORD PTR [r9+56]
  62019. add r12, rax
  62020. adc r10, rdx
  62021. adc r11, 0
  62022. ; A[8] * B[6]
  62023. mov rax, QWORD PTR [r8+48]
  62024. mul QWORD PTR [r9+64]
  62025. add r12, rax
  62026. adc r10, rdx
  62027. adc r11, 0
  62028. mov QWORD PTR [rcx+112], r12
  62029. ; A[7] * B[8]
  62030. mov rax, QWORD PTR [r8+64]
  62031. mul QWORD PTR [r9+56]
  62032. xor r12, r12
  62033. add r10, rax
  62034. adc r11, rdx
  62035. adc r12, 0
  62036. ; A[8] * B[7]
  62037. mov rax, QWORD PTR [r8+56]
  62038. mul QWORD PTR [r9+64]
  62039. add r10, rax
  62040. adc r11, rdx
  62041. adc r12, 0
  62042. mov QWORD PTR [rcx+120], r10
  62043. ; A[8] * B[8]
  62044. mov rax, QWORD PTR [r8+64]
  62045. mul QWORD PTR [r9+64]
  62046. add r11, rax
  62047. adc r12, rdx
  62048. mov QWORD PTR [rcx+128], r11
  62049. mov QWORD PTR [rcx+136], r12
  62050. mov rax, QWORD PTR [rsp]
  62051. mov rdx, QWORD PTR [rsp+8]
  62052. mov r10, QWORD PTR [rsp+16]
  62053. mov r11, QWORD PTR [rsp+24]
  62054. mov QWORD PTR [rcx], rax
  62055. mov QWORD PTR [rcx+8], rdx
  62056. mov QWORD PTR [rcx+16], r10
  62057. mov QWORD PTR [rcx+24], r11
  62058. mov rax, QWORD PTR [rsp+32]
  62059. mov rdx, QWORD PTR [rsp+40]
  62060. mov r10, QWORD PTR [rsp+48]
  62061. mov r11, QWORD PTR [rsp+56]
  62062. mov QWORD PTR [rcx+32], rax
  62063. mov QWORD PTR [rcx+40], rdx
  62064. mov QWORD PTR [rcx+48], r10
  62065. mov QWORD PTR [rcx+56], r11
  62066. mov rax, QWORD PTR [rsp+64]
  62067. mov QWORD PTR [rcx+64], rax
  62068. add rsp, 72
  62069. pop r12
  62070. ret
  62071. sp_521_mul_9 ENDP
  62072. _text ENDS
  62073. IFDEF HAVE_INTEL_AVX2
  62074. ; /* Multiply a and b into r. (r = a * b)
  62075. ; *
  62076. ; * r Result of multiplication.
  62077. ; * a First number to multiply.
  62078. ; * b Second number to multiply.
  62079. ; */
  62080. _text SEGMENT READONLY PARA
  62081. sp_521_mul_avx2_9 PROC
  62082. push rbx
  62083. push rbp
  62084. push r12
  62085. push r13
  62086. push r14
  62087. push r15
  62088. mov rbp, r8
  62089. mov r8, rcx
  62090. mov r9, rdx
  62091. sub rsp, 72
  62092. cmp r9, r8
  62093. mov rbx, rsp
  62094. cmovne rbx, r8
  62095. cmp rbp, r8
  62096. cmove rbx, rsp
  62097. add r8, 72
  62098. xor r15, r15
  62099. mov rdx, QWORD PTR [r9]
  62100. ; A[0] * B[0]
  62101. mulx r11, r10, QWORD PTR [rbp]
  62102. ; A[0] * B[1]
  62103. mulx r12, rax, QWORD PTR [rbp+8]
  62104. mov QWORD PTR [rbx], r10
  62105. adcx r11, rax
  62106. ; A[0] * B[2]
  62107. mulx r13, rax, QWORD PTR [rbp+16]
  62108. mov QWORD PTR [rbx+8], r11
  62109. adcx r12, rax
  62110. mov QWORD PTR [rbx+16], r12
  62111. ; A[0] * B[3]
  62112. mulx r10, rax, QWORD PTR [rbp+24]
  62113. adcx r13, rax
  62114. ; A[0] * B[4]
  62115. mulx r11, rax, QWORD PTR [rbp+32]
  62116. mov QWORD PTR [rbx+24], r13
  62117. adcx r10, rax
  62118. ; A[0] * B[5]
  62119. mulx r12, rax, QWORD PTR [rbp+40]
  62120. mov QWORD PTR [rbx+32], r10
  62121. adcx r11, rax
  62122. mov QWORD PTR [rbx+40], r11
  62123. ; A[0] * B[6]
  62124. mulx r13, rax, QWORD PTR [rbp+48]
  62125. adcx r12, rax
  62126. ; A[0] * B[7]
  62127. mulx r10, rax, QWORD PTR [rbp+56]
  62128. mov QWORD PTR [rbx+48], r12
  62129. adcx r13, rax
  62130. ; A[0] * B[8]
  62131. mulx r11, rax, QWORD PTR [rbp+64]
  62132. mov QWORD PTR [rbx+56], r13
  62133. adcx r10, rax
  62134. adcx r11, r15
  62135. mov r14, r15
  62136. adcx r14, r15
  62137. mov QWORD PTR [rbx+64], r10
  62138. mov QWORD PTR [r8], r11
  62139. mov rdx, QWORD PTR [r9+8]
  62140. mov r11, QWORD PTR [rbx+8]
  62141. mov r12, QWORD PTR [rbx+16]
  62142. mov r13, QWORD PTR [rbx+24]
  62143. mov r10, QWORD PTR [rbx+32]
  62144. ; A[1] * B[0]
  62145. mulx rcx, rax, QWORD PTR [rbp]
  62146. adcx r11, rax
  62147. adox r12, rcx
  62148. ; A[1] * B[1]
  62149. mulx rcx, rax, QWORD PTR [rbp+8]
  62150. mov QWORD PTR [rbx+8], r11
  62151. adcx r12, rax
  62152. adox r13, rcx
  62153. ; A[1] * B[2]
  62154. mulx rcx, rax, QWORD PTR [rbp+16]
  62155. mov QWORD PTR [rbx+16], r12
  62156. adcx r13, rax
  62157. adox r10, rcx
  62158. mov QWORD PTR [rbx+24], r13
  62159. mov r11, QWORD PTR [rbx+40]
  62160. mov r12, QWORD PTR [rbx+48]
  62161. mov r13, QWORD PTR [rbx+56]
  62162. ; A[1] * B[3]
  62163. mulx rcx, rax, QWORD PTR [rbp+24]
  62164. adcx r10, rax
  62165. adox r11, rcx
  62166. ; A[1] * B[4]
  62167. mulx rcx, rax, QWORD PTR [rbp+32]
  62168. mov QWORD PTR [rbx+32], r10
  62169. adcx r11, rax
  62170. adox r12, rcx
  62171. ; A[1] * B[5]
  62172. mulx rcx, rax, QWORD PTR [rbp+40]
  62173. mov QWORD PTR [rbx+40], r11
  62174. adcx r12, rax
  62175. adox r13, rcx
  62176. mov QWORD PTR [rbx+48], r12
  62177. mov r10, QWORD PTR [rbx+64]
  62178. mov r11, QWORD PTR [r8]
  62179. ; A[1] * B[6]
  62180. mulx rcx, rax, QWORD PTR [rbp+48]
  62181. adcx r13, rax
  62182. adox r10, rcx
  62183. ; A[1] * B[7]
  62184. mulx rcx, rax, QWORD PTR [rbp+56]
  62185. mov QWORD PTR [rbx+56], r13
  62186. adcx r10, rax
  62187. adox r11, rcx
  62188. ; A[1] * B[8]
  62189. mulx rcx, rax, QWORD PTR [rbp+64]
  62190. mov QWORD PTR [rbx+64], r10
  62191. mov r12, r15
  62192. adcx r11, rax
  62193. adox r12, rcx
  62194. adcx r12, r14
  62195. mov r14, r15
  62196. adox r14, r15
  62197. adcx r14, r15
  62198. mov QWORD PTR [r8], r11
  62199. mov QWORD PTR [r8+8], r12
  62200. mov rdx, QWORD PTR [r9+16]
  62201. mov r12, QWORD PTR [rbx+16]
  62202. mov r13, QWORD PTR [rbx+24]
  62203. mov r10, QWORD PTR [rbx+32]
  62204. mov r11, QWORD PTR [rbx+40]
  62205. ; A[2] * B[0]
  62206. mulx rcx, rax, QWORD PTR [rbp]
  62207. adcx r12, rax
  62208. adox r13, rcx
  62209. ; A[2] * B[1]
  62210. mulx rcx, rax, QWORD PTR [rbp+8]
  62211. mov QWORD PTR [rbx+16], r12
  62212. adcx r13, rax
  62213. adox r10, rcx
  62214. ; A[2] * B[2]
  62215. mulx rcx, rax, QWORD PTR [rbp+16]
  62216. mov QWORD PTR [rbx+24], r13
  62217. adcx r10, rax
  62218. adox r11, rcx
  62219. mov QWORD PTR [rbx+32], r10
  62220. mov r12, QWORD PTR [rbx+48]
  62221. mov r13, QWORD PTR [rbx+56]
  62222. mov r10, QWORD PTR [rbx+64]
  62223. ; A[2] * B[3]
  62224. mulx rcx, rax, QWORD PTR [rbp+24]
  62225. adcx r11, rax
  62226. adox r12, rcx
  62227. ; A[2] * B[4]
  62228. mulx rcx, rax, QWORD PTR [rbp+32]
  62229. mov QWORD PTR [rbx+40], r11
  62230. adcx r12, rax
  62231. adox r13, rcx
  62232. ; A[2] * B[5]
  62233. mulx rcx, rax, QWORD PTR [rbp+40]
  62234. mov QWORD PTR [rbx+48], r12
  62235. adcx r13, rax
  62236. adox r10, rcx
  62237. mov QWORD PTR [rbx+56], r13
  62238. mov r11, QWORD PTR [r8]
  62239. mov r12, QWORD PTR [r8+8]
  62240. ; A[2] * B[6]
  62241. mulx rcx, rax, QWORD PTR [rbp+48]
  62242. adcx r10, rax
  62243. adox r11, rcx
  62244. ; A[2] * B[7]
  62245. mulx rcx, rax, QWORD PTR [rbp+56]
  62246. mov QWORD PTR [rbx+64], r10
  62247. adcx r11, rax
  62248. adox r12, rcx
  62249. ; A[2] * B[8]
  62250. mulx rcx, rax, QWORD PTR [rbp+64]
  62251. mov QWORD PTR [r8], r11
  62252. mov r13, r15
  62253. adcx r12, rax
  62254. adox r13, rcx
  62255. adcx r13, r14
  62256. mov r14, r15
  62257. adox r14, r15
  62258. adcx r14, r15
  62259. mov QWORD PTR [r8+8], r12
  62260. mov QWORD PTR [r8+16], r13
  62261. mov rdx, QWORD PTR [r9+24]
  62262. mov r13, QWORD PTR [rbx+24]
  62263. mov r10, QWORD PTR [rbx+32]
  62264. mov r11, QWORD PTR [rbx+40]
  62265. mov r12, QWORD PTR [rbx+48]
  62266. ; A[3] * B[0]
  62267. mulx rcx, rax, QWORD PTR [rbp]
  62268. adcx r13, rax
  62269. adox r10, rcx
  62270. ; A[3] * B[1]
  62271. mulx rcx, rax, QWORD PTR [rbp+8]
  62272. mov QWORD PTR [rbx+24], r13
  62273. adcx r10, rax
  62274. adox r11, rcx
  62275. ; A[3] * B[2]
  62276. mulx rcx, rax, QWORD PTR [rbp+16]
  62277. mov QWORD PTR [rbx+32], r10
  62278. adcx r11, rax
  62279. adox r12, rcx
  62280. mov QWORD PTR [rbx+40], r11
  62281. mov r13, QWORD PTR [rbx+56]
  62282. mov r10, QWORD PTR [rbx+64]
  62283. mov r11, QWORD PTR [r8]
  62284. ; A[3] * B[3]
  62285. mulx rcx, rax, QWORD PTR [rbp+24]
  62286. adcx r12, rax
  62287. adox r13, rcx
  62288. ; A[3] * B[4]
  62289. mulx rcx, rax, QWORD PTR [rbp+32]
  62290. mov QWORD PTR [rbx+48], r12
  62291. adcx r13, rax
  62292. adox r10, rcx
  62293. ; A[3] * B[5]
  62294. mulx rcx, rax, QWORD PTR [rbp+40]
  62295. mov QWORD PTR [rbx+56], r13
  62296. adcx r10, rax
  62297. adox r11, rcx
  62298. mov QWORD PTR [rbx+64], r10
  62299. mov r12, QWORD PTR [r8+8]
  62300. mov r13, QWORD PTR [r8+16]
  62301. ; A[3] * B[6]
  62302. mulx rcx, rax, QWORD PTR [rbp+48]
  62303. adcx r11, rax
  62304. adox r12, rcx
  62305. ; A[3] * B[7]
  62306. mulx rcx, rax, QWORD PTR [rbp+56]
  62307. mov QWORD PTR [r8], r11
  62308. adcx r12, rax
  62309. adox r13, rcx
  62310. ; A[3] * B[8]
  62311. mulx rcx, rax, QWORD PTR [rbp+64]
  62312. mov QWORD PTR [r8+8], r12
  62313. mov r10, r15
  62314. adcx r13, rax
  62315. adox r10, rcx
  62316. adcx r10, r14
  62317. mov r14, r15
  62318. adox r14, r15
  62319. adcx r14, r15
  62320. mov QWORD PTR [r8+16], r13
  62321. mov QWORD PTR [r8+24], r10
  62322. mov rdx, QWORD PTR [r9+32]
  62323. mov r10, QWORD PTR [rbx+32]
  62324. mov r11, QWORD PTR [rbx+40]
  62325. mov r12, QWORD PTR [rbx+48]
  62326. mov r13, QWORD PTR [rbx+56]
  62327. ; A[4] * B[0]
  62328. mulx rcx, rax, QWORD PTR [rbp]
  62329. adcx r10, rax
  62330. adox r11, rcx
  62331. ; A[4] * B[1]
  62332. mulx rcx, rax, QWORD PTR [rbp+8]
  62333. mov QWORD PTR [rbx+32], r10
  62334. adcx r11, rax
  62335. adox r12, rcx
  62336. ; A[4] * B[2]
  62337. mulx rcx, rax, QWORD PTR [rbp+16]
  62338. mov QWORD PTR [rbx+40], r11
  62339. adcx r12, rax
  62340. adox r13, rcx
  62341. mov QWORD PTR [rbx+48], r12
  62342. mov r10, QWORD PTR [rbx+64]
  62343. mov r11, QWORD PTR [r8]
  62344. mov r12, QWORD PTR [r8+8]
  62345. ; A[4] * B[3]
  62346. mulx rcx, rax, QWORD PTR [rbp+24]
  62347. adcx r13, rax
  62348. adox r10, rcx
  62349. ; A[4] * B[4]
  62350. mulx rcx, rax, QWORD PTR [rbp+32]
  62351. mov QWORD PTR [rbx+56], r13
  62352. adcx r10, rax
  62353. adox r11, rcx
  62354. ; A[4] * B[5]
  62355. mulx rcx, rax, QWORD PTR [rbp+40]
  62356. mov QWORD PTR [rbx+64], r10
  62357. adcx r11, rax
  62358. adox r12, rcx
  62359. mov QWORD PTR [r8], r11
  62360. mov r13, QWORD PTR [r8+16]
  62361. mov r10, QWORD PTR [r8+24]
  62362. ; A[4] * B[6]
  62363. mulx rcx, rax, QWORD PTR [rbp+48]
  62364. adcx r12, rax
  62365. adox r13, rcx
  62366. ; A[4] * B[7]
  62367. mulx rcx, rax, QWORD PTR [rbp+56]
  62368. mov QWORD PTR [r8+8], r12
  62369. adcx r13, rax
  62370. adox r10, rcx
  62371. ; A[4] * B[8]
  62372. mulx rcx, rax, QWORD PTR [rbp+64]
  62373. mov QWORD PTR [r8+16], r13
  62374. mov r11, r15
  62375. adcx r10, rax
  62376. adox r11, rcx
  62377. adcx r11, r14
  62378. mov r14, r15
  62379. adox r14, r15
  62380. adcx r14, r15
  62381. mov QWORD PTR [r8+24], r10
  62382. mov QWORD PTR [r8+32], r11
  62383. mov rdx, QWORD PTR [r9+40]
  62384. mov r11, QWORD PTR [rbx+40]
  62385. mov r12, QWORD PTR [rbx+48]
  62386. mov r13, QWORD PTR [rbx+56]
  62387. mov r10, QWORD PTR [rbx+64]
  62388. ; A[5] * B[0]
  62389. mulx rcx, rax, QWORD PTR [rbp]
  62390. adcx r11, rax
  62391. adox r12, rcx
  62392. ; A[5] * B[1]
  62393. mulx rcx, rax, QWORD PTR [rbp+8]
  62394. mov QWORD PTR [rbx+40], r11
  62395. adcx r12, rax
  62396. adox r13, rcx
  62397. ; A[5] * B[2]
  62398. mulx rcx, rax, QWORD PTR [rbp+16]
  62399. mov QWORD PTR [rbx+48], r12
  62400. adcx r13, rax
  62401. adox r10, rcx
  62402. mov QWORD PTR [rbx+56], r13
  62403. mov r11, QWORD PTR [r8]
  62404. mov r12, QWORD PTR [r8+8]
  62405. mov r13, QWORD PTR [r8+16]
  62406. ; A[5] * B[3]
  62407. mulx rcx, rax, QWORD PTR [rbp+24]
  62408. adcx r10, rax
  62409. adox r11, rcx
  62410. ; A[5] * B[4]
  62411. mulx rcx, rax, QWORD PTR [rbp+32]
  62412. mov QWORD PTR [rbx+64], r10
  62413. adcx r11, rax
  62414. adox r12, rcx
  62415. ; A[5] * B[5]
  62416. mulx rcx, rax, QWORD PTR [rbp+40]
  62417. mov QWORD PTR [r8], r11
  62418. adcx r12, rax
  62419. adox r13, rcx
  62420. mov QWORD PTR [r8+8], r12
  62421. mov r10, QWORD PTR [r8+24]
  62422. mov r11, QWORD PTR [r8+32]
  62423. ; A[5] * B[6]
  62424. mulx rcx, rax, QWORD PTR [rbp+48]
  62425. adcx r13, rax
  62426. adox r10, rcx
  62427. ; A[5] * B[7]
  62428. mulx rcx, rax, QWORD PTR [rbp+56]
  62429. mov QWORD PTR [r8+16], r13
  62430. adcx r10, rax
  62431. adox r11, rcx
  62432. ; A[5] * B[8]
  62433. mulx rcx, rax, QWORD PTR [rbp+64]
  62434. mov QWORD PTR [r8+24], r10
  62435. mov r12, r15
  62436. adcx r11, rax
  62437. adox r12, rcx
  62438. adcx r12, r14
  62439. mov r14, r15
  62440. adox r14, r15
  62441. adcx r14, r15
  62442. mov QWORD PTR [r8+32], r11
  62443. mov QWORD PTR [r8+40], r12
  62444. mov rdx, QWORD PTR [r9+48]
  62445. mov r12, QWORD PTR [rbx+48]
  62446. mov r13, QWORD PTR [rbx+56]
  62447. mov r10, QWORD PTR [rbx+64]
  62448. mov r11, QWORD PTR [r8]
  62449. ; A[6] * B[0]
  62450. mulx rcx, rax, QWORD PTR [rbp]
  62451. adcx r12, rax
  62452. adox r13, rcx
  62453. ; A[6] * B[1]
  62454. mulx rcx, rax, QWORD PTR [rbp+8]
  62455. mov QWORD PTR [rbx+48], r12
  62456. adcx r13, rax
  62457. adox r10, rcx
  62458. ; A[6] * B[2]
  62459. mulx rcx, rax, QWORD PTR [rbp+16]
  62460. mov QWORD PTR [rbx+56], r13
  62461. adcx r10, rax
  62462. adox r11, rcx
  62463. mov QWORD PTR [rbx+64], r10
  62464. mov r12, QWORD PTR [r8+8]
  62465. mov r13, QWORD PTR [r8+16]
  62466. mov r10, QWORD PTR [r8+24]
  62467. ; A[6] * B[3]
  62468. mulx rcx, rax, QWORD PTR [rbp+24]
  62469. adcx r11, rax
  62470. adox r12, rcx
  62471. ; A[6] * B[4]
  62472. mulx rcx, rax, QWORD PTR [rbp+32]
  62473. mov QWORD PTR [r8], r11
  62474. adcx r12, rax
  62475. adox r13, rcx
  62476. ; A[6] * B[5]
  62477. mulx rcx, rax, QWORD PTR [rbp+40]
  62478. mov QWORD PTR [r8+8], r12
  62479. adcx r13, rax
  62480. adox r10, rcx
  62481. mov QWORD PTR [r8+16], r13
  62482. mov r11, QWORD PTR [r8+32]
  62483. mov r12, QWORD PTR [r8+40]
  62484. ; A[6] * B[6]
  62485. mulx rcx, rax, QWORD PTR [rbp+48]
  62486. adcx r10, rax
  62487. adox r11, rcx
  62488. ; A[6] * B[7]
  62489. mulx rcx, rax, QWORD PTR [rbp+56]
  62490. mov QWORD PTR [r8+24], r10
  62491. adcx r11, rax
  62492. adox r12, rcx
  62493. ; A[6] * B[8]
  62494. mulx rcx, rax, QWORD PTR [rbp+64]
  62495. mov QWORD PTR [r8+32], r11
  62496. mov r13, r15
  62497. adcx r12, rax
  62498. adox r13, rcx
  62499. adcx r13, r14
  62500. mov r14, r15
  62501. adox r14, r15
  62502. adcx r14, r15
  62503. mov QWORD PTR [r8+40], r12
  62504. mov QWORD PTR [r8+48], r13
  62505. mov rdx, QWORD PTR [r9+56]
  62506. mov r13, QWORD PTR [rbx+56]
  62507. mov r10, QWORD PTR [rbx+64]
  62508. mov r11, QWORD PTR [r8]
  62509. mov r12, QWORD PTR [r8+8]
  62510. ; A[7] * B[0]
  62511. mulx rcx, rax, QWORD PTR [rbp]
  62512. adcx r13, rax
  62513. adox r10, rcx
  62514. ; A[7] * B[1]
  62515. mulx rcx, rax, QWORD PTR [rbp+8]
  62516. mov QWORD PTR [rbx+56], r13
  62517. adcx r10, rax
  62518. adox r11, rcx
  62519. ; A[7] * B[2]
  62520. mulx rcx, rax, QWORD PTR [rbp+16]
  62521. mov QWORD PTR [rbx+64], r10
  62522. adcx r11, rax
  62523. adox r12, rcx
  62524. mov QWORD PTR [r8], r11
  62525. mov r13, QWORD PTR [r8+16]
  62526. mov r10, QWORD PTR [r8+24]
  62527. mov r11, QWORD PTR [r8+32]
  62528. ; A[7] * B[3]
  62529. mulx rcx, rax, QWORD PTR [rbp+24]
  62530. adcx r12, rax
  62531. adox r13, rcx
  62532. ; A[7] * B[4]
  62533. mulx rcx, rax, QWORD PTR [rbp+32]
  62534. mov QWORD PTR [r8+8], r12
  62535. adcx r13, rax
  62536. adox r10, rcx
  62537. ; A[7] * B[5]
  62538. mulx rcx, rax, QWORD PTR [rbp+40]
  62539. mov QWORD PTR [r8+16], r13
  62540. adcx r10, rax
  62541. adox r11, rcx
  62542. mov QWORD PTR [r8+24], r10
  62543. mov r12, QWORD PTR [r8+40]
  62544. mov r13, QWORD PTR [r8+48]
  62545. ; A[7] * B[6]
  62546. mulx rcx, rax, QWORD PTR [rbp+48]
  62547. adcx r11, rax
  62548. adox r12, rcx
  62549. ; A[7] * B[7]
  62550. mulx rcx, rax, QWORD PTR [rbp+56]
  62551. mov QWORD PTR [r8+32], r11
  62552. adcx r12, rax
  62553. adox r13, rcx
  62554. ; A[7] * B[8]
  62555. mulx rcx, rax, QWORD PTR [rbp+64]
  62556. mov QWORD PTR [r8+40], r12
  62557. mov r10, r15
  62558. adcx r13, rax
  62559. adox r10, rcx
  62560. adcx r10, r14
  62561. mov r14, r15
  62562. adox r14, r15
  62563. adcx r14, r15
  62564. mov QWORD PTR [r8+48], r13
  62565. mov QWORD PTR [r8+56], r10
  62566. mov rdx, QWORD PTR [r9+64]
  62567. mov r10, QWORD PTR [rbx+64]
  62568. mov r11, QWORD PTR [r8]
  62569. mov r12, QWORD PTR [r8+8]
  62570. mov r13, QWORD PTR [r8+16]
  62571. ; A[8] * B[0]
  62572. mulx rcx, rax, QWORD PTR [rbp]
  62573. adcx r10, rax
  62574. adox r11, rcx
  62575. ; A[8] * B[1]
  62576. mulx rcx, rax, QWORD PTR [rbp+8]
  62577. mov QWORD PTR [rbx+64], r10
  62578. adcx r11, rax
  62579. adox r12, rcx
  62580. ; A[8] * B[2]
  62581. mulx rcx, rax, QWORD PTR [rbp+16]
  62582. mov QWORD PTR [r8], r11
  62583. adcx r12, rax
  62584. adox r13, rcx
  62585. mov QWORD PTR [r8+8], r12
  62586. mov r10, QWORD PTR [r8+24]
  62587. mov r11, QWORD PTR [r8+32]
  62588. mov r12, QWORD PTR [r8+40]
  62589. ; A[8] * B[3]
  62590. mulx rcx, rax, QWORD PTR [rbp+24]
  62591. adcx r13, rax
  62592. adox r10, rcx
  62593. ; A[8] * B[4]
  62594. mulx rcx, rax, QWORD PTR [rbp+32]
  62595. mov QWORD PTR [r8+16], r13
  62596. adcx r10, rax
  62597. adox r11, rcx
  62598. ; A[8] * B[5]
  62599. mulx rcx, rax, QWORD PTR [rbp+40]
  62600. mov QWORD PTR [r8+24], r10
  62601. adcx r11, rax
  62602. adox r12, rcx
  62603. mov QWORD PTR [r8+32], r11
  62604. mov r13, QWORD PTR [r8+48]
  62605. mov r10, QWORD PTR [r8+56]
  62606. ; A[8] * B[6]
  62607. mulx rcx, rax, QWORD PTR [rbp+48]
  62608. adcx r12, rax
  62609. adox r13, rcx
  62610. ; A[8] * B[7]
  62611. mulx rcx, rax, QWORD PTR [rbp+56]
  62612. mov QWORD PTR [r8+40], r12
  62613. adcx r13, rax
  62614. adox r10, rcx
  62615. ; A[8] * B[8]
  62616. mulx rcx, rax, QWORD PTR [rbp+64]
  62617. mov QWORD PTR [r8+48], r13
  62618. mov r11, r15
  62619. adcx r10, rax
  62620. adox r11, rcx
  62621. adcx r11, r14
  62622. mov QWORD PTR [r8+56], r10
  62623. mov QWORD PTR [r8+64], r11
  62624. sub r8, 72
  62625. cmp r9, r8
  62626. je L_start_521_mul_avx2_9
  62627. cmp rbp, r8
  62628. jne L_end_521_mul_avx2_9
  62629. L_start_521_mul_avx2_9:
  62630. vmovdqu xmm0, OWORD PTR [rbx]
  62631. vmovups OWORD PTR [r8], xmm0
  62632. vmovdqu xmm0, OWORD PTR [rbx+16]
  62633. vmovups OWORD PTR [r8+16], xmm0
  62634. vmovdqu xmm0, OWORD PTR [rbx+32]
  62635. vmovups OWORD PTR [r8+32], xmm0
  62636. vmovdqu xmm0, OWORD PTR [rbx+48]
  62637. vmovups OWORD PTR [r8+48], xmm0
  62638. mov rax, QWORD PTR [rbx+64]
  62639. mov QWORD PTR [r8+64], rax
  62640. L_end_521_mul_avx2_9:
  62641. add rsp, 72
  62642. pop r15
  62643. pop r14
  62644. pop r13
  62645. pop r12
  62646. pop rbp
  62647. pop rbx
  62648. ret
  62649. sp_521_mul_avx2_9 ENDP
  62650. _text ENDS
  62651. ENDIF
  62652. ; /* Square a and put result in r. (r = a * a)
  62653. ; *
  62654. ; * r A single precision integer.
  62655. ; * a A single precision integer.
  62656. ; */
  62657. _text SEGMENT READONLY PARA
  62658. sp_521_sqr_9 PROC
  62659. push r12
  62660. push r13
  62661. push r14
  62662. mov r8, rdx
  62663. sub rsp, 72
  62664. ; A[0] * A[0]
  62665. mov rax, QWORD PTR [r8]
  62666. mul rax
  62667. xor r11, r11
  62668. mov QWORD PTR [rsp], rax
  62669. mov r10, rdx
  62670. ; A[0] * A[1]
  62671. mov rax, QWORD PTR [r8+8]
  62672. mul QWORD PTR [r8]
  62673. xor r9, r9
  62674. add r10, rax
  62675. adc r11, rdx
  62676. adc r9, 0
  62677. add r10, rax
  62678. adc r11, rdx
  62679. adc r9, 0
  62680. mov QWORD PTR [rsp+8], r10
  62681. ; A[0] * A[2]
  62682. mov rax, QWORD PTR [r8+16]
  62683. mul QWORD PTR [r8]
  62684. xor r10, r10
  62685. add r11, rax
  62686. adc r9, rdx
  62687. adc r10, 0
  62688. add r11, rax
  62689. adc r9, rdx
  62690. adc r10, 0
  62691. ; A[1] * A[1]
  62692. mov rax, QWORD PTR [r8+8]
  62693. mul rax
  62694. add r11, rax
  62695. adc r9, rdx
  62696. adc r10, 0
  62697. mov QWORD PTR [rsp+16], r11
  62698. ; A[0] * A[3]
  62699. mov rax, QWORD PTR [r8+24]
  62700. mul QWORD PTR [r8]
  62701. xor r11, r11
  62702. add r9, rax
  62703. adc r10, rdx
  62704. adc r11, 0
  62705. add r9, rax
  62706. adc r10, rdx
  62707. adc r11, 0
  62708. ; A[1] * A[2]
  62709. mov rax, QWORD PTR [r8+16]
  62710. mul QWORD PTR [r8+8]
  62711. add r9, rax
  62712. adc r10, rdx
  62713. adc r11, 0
  62714. add r9, rax
  62715. adc r10, rdx
  62716. adc r11, 0
  62717. mov QWORD PTR [rsp+24], r9
  62718. ; A[0] * A[4]
  62719. mov rax, QWORD PTR [r8+32]
  62720. mul QWORD PTR [r8]
  62721. xor r9, r9
  62722. add r10, rax
  62723. adc r11, rdx
  62724. adc r9, 0
  62725. add r10, rax
  62726. adc r11, rdx
  62727. adc r9, 0
  62728. ; A[1] * A[3]
  62729. mov rax, QWORD PTR [r8+24]
  62730. mul QWORD PTR [r8+8]
  62731. add r10, rax
  62732. adc r11, rdx
  62733. adc r9, 0
  62734. add r10, rax
  62735. adc r11, rdx
  62736. adc r9, 0
  62737. ; A[2] * A[2]
  62738. mov rax, QWORD PTR [r8+16]
  62739. mul rax
  62740. add r10, rax
  62741. adc r11, rdx
  62742. adc r9, 0
  62743. mov QWORD PTR [rsp+32], r10
  62744. ; A[0] * A[5]
  62745. mov rax, QWORD PTR [r8+40]
  62746. mul QWORD PTR [r8]
  62747. xor r10, r10
  62748. xor r14, r14
  62749. mov r12, rax
  62750. mov r13, rdx
  62751. ; A[1] * A[4]
  62752. mov rax, QWORD PTR [r8+32]
  62753. mul QWORD PTR [r8+8]
  62754. add r12, rax
  62755. adc r13, rdx
  62756. adc r14, 0
  62757. ; A[2] * A[3]
  62758. mov rax, QWORD PTR [r8+24]
  62759. mul QWORD PTR [r8+16]
  62760. add r12, rax
  62761. adc r13, rdx
  62762. adc r14, 0
  62763. add r12, r12
  62764. adc r13, r13
  62765. adc r14, r14
  62766. add r11, r12
  62767. adc r9, r13
  62768. adc r10, r14
  62769. mov QWORD PTR [rsp+40], r11
  62770. ; A[0] * A[6]
  62771. mov rax, QWORD PTR [r8+48]
  62772. mul QWORD PTR [r8]
  62773. xor r11, r11
  62774. xor r14, r14
  62775. mov r12, rax
  62776. mov r13, rdx
  62777. ; A[1] * A[5]
  62778. mov rax, QWORD PTR [r8+40]
  62779. mul QWORD PTR [r8+8]
  62780. add r12, rax
  62781. adc r13, rdx
  62782. adc r14, 0
  62783. ; A[2] * A[4]
  62784. mov rax, QWORD PTR [r8+32]
  62785. mul QWORD PTR [r8+16]
  62786. add r12, rax
  62787. adc r13, rdx
  62788. adc r14, 0
  62789. ; A[3] * A[3]
  62790. mov rax, QWORD PTR [r8+24]
  62791. mul rax
  62792. add r12, r12
  62793. adc r13, r13
  62794. adc r14, r14
  62795. add r12, rax
  62796. adc r13, rdx
  62797. adc r14, 0
  62798. add r9, r12
  62799. adc r10, r13
  62800. adc r11, r14
  62801. mov QWORD PTR [rsp+48], r9
  62802. ; A[0] * A[7]
  62803. mov rax, QWORD PTR [r8+56]
  62804. mul QWORD PTR [r8]
  62805. xor r9, r9
  62806. xor r14, r14
  62807. mov r12, rax
  62808. mov r13, rdx
  62809. ; A[1] * A[6]
  62810. mov rax, QWORD PTR [r8+48]
  62811. mul QWORD PTR [r8+8]
  62812. add r12, rax
  62813. adc r13, rdx
  62814. adc r14, 0
  62815. ; A[2] * A[5]
  62816. mov rax, QWORD PTR [r8+40]
  62817. mul QWORD PTR [r8+16]
  62818. add r12, rax
  62819. adc r13, rdx
  62820. adc r14, 0
  62821. ; A[3] * A[4]
  62822. mov rax, QWORD PTR [r8+32]
  62823. mul QWORD PTR [r8+24]
  62824. add r12, rax
  62825. adc r13, rdx
  62826. adc r14, 0
  62827. add r12, r12
  62828. adc r13, r13
  62829. adc r14, r14
  62830. add r10, r12
  62831. adc r11, r13
  62832. adc r9, r14
  62833. mov QWORD PTR [rsp+56], r10
  62834. ; A[0] * A[8]
  62835. mov rax, QWORD PTR [r8+64]
  62836. mul QWORD PTR [r8]
  62837. xor r10, r10
  62838. xor r14, r14
  62839. mov r12, rax
  62840. mov r13, rdx
  62841. ; A[1] * A[7]
  62842. mov rax, QWORD PTR [r8+56]
  62843. mul QWORD PTR [r8+8]
  62844. add r12, rax
  62845. adc r13, rdx
  62846. adc r14, 0
  62847. ; A[2] * A[6]
  62848. mov rax, QWORD PTR [r8+48]
  62849. mul QWORD PTR [r8+16]
  62850. add r12, rax
  62851. adc r13, rdx
  62852. adc r14, 0
  62853. ; A[3] * A[5]
  62854. mov rax, QWORD PTR [r8+40]
  62855. mul QWORD PTR [r8+24]
  62856. add r12, rax
  62857. adc r13, rdx
  62858. adc r14, 0
  62859. ; A[4] * A[4]
  62860. mov rax, QWORD PTR [r8+32]
  62861. mul rax
  62862. add r12, r12
  62863. adc r13, r13
  62864. adc r14, r14
  62865. add r12, rax
  62866. adc r13, rdx
  62867. adc r14, 0
  62868. add r11, r12
  62869. adc r9, r13
  62870. adc r10, r14
  62871. mov QWORD PTR [rsp+64], r11
  62872. ; A[1] * A[8]
  62873. mov rax, QWORD PTR [r8+64]
  62874. mul QWORD PTR [r8+8]
  62875. xor r11, r11
  62876. xor r14, r14
  62877. mov r12, rax
  62878. mov r13, rdx
  62879. ; A[2] * A[7]
  62880. mov rax, QWORD PTR [r8+56]
  62881. mul QWORD PTR [r8+16]
  62882. add r12, rax
  62883. adc r13, rdx
  62884. adc r14, 0
  62885. ; A[3] * A[6]
  62886. mov rax, QWORD PTR [r8+48]
  62887. mul QWORD PTR [r8+24]
  62888. add r12, rax
  62889. adc r13, rdx
  62890. adc r14, 0
  62891. ; A[4] * A[5]
  62892. mov rax, QWORD PTR [r8+40]
  62893. mul QWORD PTR [r8+32]
  62894. add r12, rax
  62895. adc r13, rdx
  62896. adc r14, 0
  62897. add r12, r12
  62898. adc r13, r13
  62899. adc r14, r14
  62900. add r9, r12
  62901. adc r10, r13
  62902. adc r11, r14
  62903. mov QWORD PTR [rcx+72], r9
  62904. ; A[2] * A[8]
  62905. mov rax, QWORD PTR [r8+64]
  62906. mul QWORD PTR [r8+16]
  62907. xor r9, r9
  62908. xor r14, r14
  62909. mov r12, rax
  62910. mov r13, rdx
  62911. ; A[3] * A[7]
  62912. mov rax, QWORD PTR [r8+56]
  62913. mul QWORD PTR [r8+24]
  62914. add r12, rax
  62915. adc r13, rdx
  62916. adc r14, 0
  62917. ; A[4] * A[6]
  62918. mov rax, QWORD PTR [r8+48]
  62919. mul QWORD PTR [r8+32]
  62920. add r12, rax
  62921. adc r13, rdx
  62922. adc r14, 0
  62923. ; A[5] * A[5]
  62924. mov rax, QWORD PTR [r8+40]
  62925. mul rax
  62926. add r12, r12
  62927. adc r13, r13
  62928. adc r14, r14
  62929. add r12, rax
  62930. adc r13, rdx
  62931. adc r14, 0
  62932. add r10, r12
  62933. adc r11, r13
  62934. adc r9, r14
  62935. mov QWORD PTR [rcx+80], r10
  62936. ; A[3] * A[8]
  62937. mov rax, QWORD PTR [r8+64]
  62938. mul QWORD PTR [r8+24]
  62939. xor r10, r10
  62940. xor r14, r14
  62941. mov r12, rax
  62942. mov r13, rdx
  62943. ; A[4] * A[7]
  62944. mov rax, QWORD PTR [r8+56]
  62945. mul QWORD PTR [r8+32]
  62946. add r12, rax
  62947. adc r13, rdx
  62948. adc r14, 0
  62949. ; A[5] * A[6]
  62950. mov rax, QWORD PTR [r8+48]
  62951. mul QWORD PTR [r8+40]
  62952. add r12, rax
  62953. adc r13, rdx
  62954. adc r14, 0
  62955. add r12, r12
  62956. adc r13, r13
  62957. adc r14, r14
  62958. add r11, r12
  62959. adc r9, r13
  62960. adc r10, r14
  62961. mov QWORD PTR [rcx+88], r11
  62962. ; A[4] * A[8]
  62963. mov rax, QWORD PTR [r8+64]
  62964. mul QWORD PTR [r8+32]
  62965. xor r11, r11
  62966. add r9, rax
  62967. adc r10, rdx
  62968. adc r11, 0
  62969. add r9, rax
  62970. adc r10, rdx
  62971. adc r11, 0
  62972. ; A[5] * A[7]
  62973. mov rax, QWORD PTR [r8+56]
  62974. mul QWORD PTR [r8+40]
  62975. add r9, rax
  62976. adc r10, rdx
  62977. adc r11, 0
  62978. add r9, rax
  62979. adc r10, rdx
  62980. adc r11, 0
  62981. ; A[6] * A[6]
  62982. mov rax, QWORD PTR [r8+48]
  62983. mul rax
  62984. add r9, rax
  62985. adc r10, rdx
  62986. adc r11, 0
  62987. mov QWORD PTR [rcx+96], r9
  62988. ; A[5] * A[8]
  62989. mov rax, QWORD PTR [r8+64]
  62990. mul QWORD PTR [r8+40]
  62991. xor r9, r9
  62992. add r10, rax
  62993. adc r11, rdx
  62994. adc r9, 0
  62995. add r10, rax
  62996. adc r11, rdx
  62997. adc r9, 0
  62998. ; A[6] * A[7]
  62999. mov rax, QWORD PTR [r8+56]
  63000. mul QWORD PTR [r8+48]
  63001. add r10, rax
  63002. adc r11, rdx
  63003. adc r9, 0
  63004. add r10, rax
  63005. adc r11, rdx
  63006. adc r9, 0
  63007. mov QWORD PTR [rcx+104], r10
  63008. ; A[6] * A[8]
  63009. mov rax, QWORD PTR [r8+64]
  63010. mul QWORD PTR [r8+48]
  63011. xor r10, r10
  63012. add r11, rax
  63013. adc r9, rdx
  63014. adc r10, 0
  63015. add r11, rax
  63016. adc r9, rdx
  63017. adc r10, 0
  63018. ; A[7] * A[7]
  63019. mov rax, QWORD PTR [r8+56]
  63020. mul rax
  63021. add r11, rax
  63022. adc r9, rdx
  63023. adc r10, 0
  63024. mov QWORD PTR [rcx+112], r11
  63025. ; A[7] * A[8]
  63026. mov rax, QWORD PTR [r8+64]
  63027. mul QWORD PTR [r8+56]
  63028. xor r11, r11
  63029. add r9, rax
  63030. adc r10, rdx
  63031. adc r11, 0
  63032. add r9, rax
  63033. adc r10, rdx
  63034. adc r11, 0
  63035. mov QWORD PTR [rcx+120], r9
  63036. ; A[8] * A[8]
  63037. mov rax, QWORD PTR [r8+64]
  63038. mul rax
  63039. add r10, rax
  63040. adc r11, rdx
  63041. mov QWORD PTR [rcx+128], r10
  63042. mov QWORD PTR [rcx+136], r11
  63043. mov rax, QWORD PTR [rsp]
  63044. mov rdx, QWORD PTR [rsp+8]
  63045. mov r12, QWORD PTR [rsp+16]
  63046. mov r13, QWORD PTR [rsp+24]
  63047. mov QWORD PTR [rcx], rax
  63048. mov QWORD PTR [rcx+8], rdx
  63049. mov QWORD PTR [rcx+16], r12
  63050. mov QWORD PTR [rcx+24], r13
  63051. mov rax, QWORD PTR [rsp+32]
  63052. mov rdx, QWORD PTR [rsp+40]
  63053. mov r12, QWORD PTR [rsp+48]
  63054. mov r13, QWORD PTR [rsp+56]
  63055. mov QWORD PTR [rcx+32], rax
  63056. mov QWORD PTR [rcx+40], rdx
  63057. mov QWORD PTR [rcx+48], r12
  63058. mov QWORD PTR [rcx+56], r13
  63059. mov rax, QWORD PTR [rsp+64]
  63060. mov QWORD PTR [rcx+64], rax
  63061. add rsp, 72
  63062. pop r14
  63063. pop r13
  63064. pop r12
  63065. ret
  63066. sp_521_sqr_9 ENDP
  63067. _text ENDS
  63068. IFDEF HAVE_INTEL_AVX2
  63069. ; /* Square a and put result in r. (r = a * a)
  63070. ; *
  63071. ; * r A single precision integer.
  63072. ; * a A single precision integer.
  63073. ; */
  63074. _text SEGMENT READONLY PARA
  63075. sp_521_sqr_avx2_9 PROC
  63076. push rbp
  63077. push r12
  63078. push r13
  63079. push r14
  63080. push r15
  63081. push rdi
  63082. push rsi
  63083. push rbx
  63084. mov r8, rcx
  63085. mov r9, rdx
  63086. sub rsp, 72
  63087. cmp r9, r8
  63088. mov rbp, rsp
  63089. cmovne rbp, r8
  63090. add r8, 72
  63091. xor r12, r12
  63092. ; Diagonal 1
  63093. ; Zero into %r9
  63094. ; A[1] x A[0]
  63095. mov rdx, QWORD PTR [r9]
  63096. mulx r11, r10, QWORD PTR [r9+8]
  63097. mov QWORD PTR [rbp+8], r10
  63098. ; Zero into %r8
  63099. ; A[2] x A[0]
  63100. mulx r10, rax, QWORD PTR [r9+16]
  63101. adcx r11, rax
  63102. adox r10, r12
  63103. mov QWORD PTR [rbp+16], r11
  63104. ; No load %r12 - %r9
  63105. ; A[3] x A[0]
  63106. mulx r14, rax, QWORD PTR [r9+24]
  63107. adcx r10, rax
  63108. adox r14, r12
  63109. mov QWORD PTR [rbp+24], r10
  63110. ; No load %r13 - %r8
  63111. ; A[4] x A[0]
  63112. mulx r15, rax, QWORD PTR [r9+32]
  63113. adcx r14, rax
  63114. adox r15, r12
  63115. ; No store %r12 - %r9
  63116. ; No load %r14 - %r9
  63117. ; A[5] x A[0]
  63118. mulx rdi, rax, QWORD PTR [r9+40]
  63119. adcx r15, rax
  63120. adox rdi, r12
  63121. ; No store %r13 - %r8
  63122. ; No load %r15 - %r8
  63123. ; A[6] x A[0]
  63124. mulx rsi, rax, QWORD PTR [r9+48]
  63125. adcx rdi, rax
  63126. adox rsi, r12
  63127. ; No store %r14 - %r9
  63128. ; No load %rbx - %r9
  63129. ; A[7] x A[0]
  63130. mulx rbx, rax, QWORD PTR [r9+56]
  63131. adcx rsi, rax
  63132. adox rbx, r12
  63133. ; No store %r15 - %r8
  63134. ; Zero into %r8
  63135. ; A[8] x A[0]
  63136. mulx r10, rax, QWORD PTR [r9+64]
  63137. adcx rbx, rax
  63138. adox r10, r12
  63139. ; No store %rbx - %r9
  63140. ; Zero into %r9
  63141. ; A[8] x A[1]
  63142. mov rdx, QWORD PTR [r9+8]
  63143. mulx r11, rax, QWORD PTR [r9+64]
  63144. adcx r10, rax
  63145. adox r11, r12
  63146. mov QWORD PTR [r8], r10
  63147. ; Carry
  63148. adcx r11, r12
  63149. mov r13, r12
  63150. adcx r13, r12
  63151. adox r13, r12
  63152. mov QWORD PTR [r8+8], r11
  63153. ; Diagonal 2
  63154. mov r11, QWORD PTR [rbp+24]
  63155. ; No load %r12 - %r8
  63156. ; A[2] x A[1]
  63157. mulx rcx, rax, QWORD PTR [r9+16]
  63158. adcx r11, rax
  63159. adox r14, rcx
  63160. mov QWORD PTR [rbp+24], r11
  63161. ; No load %r13 - %r9
  63162. ; A[3] x A[1]
  63163. mulx rcx, rax, QWORD PTR [r9+24]
  63164. adcx r14, rax
  63165. adox r15, rcx
  63166. ; No store %r12 - %r8
  63167. ; No load %r14 - %r8
  63168. ; A[4] x A[1]
  63169. mulx rcx, rax, QWORD PTR [r9+32]
  63170. adcx r15, rax
  63171. adox rdi, rcx
  63172. ; No store %r13 - %r9
  63173. ; No load %r15 - %r9
  63174. ; A[5] x A[1]
  63175. mulx rcx, rax, QWORD PTR [r9+40]
  63176. adcx rdi, rax
  63177. adox rsi, rcx
  63178. ; No store %r14 - %r8
  63179. ; No load %rbx - %r8
  63180. ; A[6] x A[1]
  63181. mulx rcx, rax, QWORD PTR [r9+48]
  63182. adcx rsi, rax
  63183. adox rbx, rcx
  63184. ; No store %r15 - %r9
  63185. mov r11, QWORD PTR [r8]
  63186. ; A[7] x A[1]
  63187. mulx rcx, rax, QWORD PTR [r9+56]
  63188. adcx rbx, rax
  63189. adox r11, rcx
  63190. ; No store %rbx - %r8
  63191. mov r10, QWORD PTR [r8+8]
  63192. ; A[7] x A[2]
  63193. mov rdx, QWORD PTR [r9+16]
  63194. mulx rcx, rax, QWORD PTR [r9+56]
  63195. adcx r11, rax
  63196. adox r10, rcx
  63197. mov QWORD PTR [r8], r11
  63198. ; Zero into %r9
  63199. ; A[7] x A[3]
  63200. mov rdx, QWORD PTR [r9+24]
  63201. mulx r11, rax, QWORD PTR [r9+56]
  63202. adcx r10, rax
  63203. adox r11, r12
  63204. mov QWORD PTR [r8+8], r10
  63205. ; Zero into %r8
  63206. ; A[7] x A[4]
  63207. mov rdx, QWORD PTR [r9+32]
  63208. mulx r10, rax, QWORD PTR [r9+56]
  63209. adcx r11, rax
  63210. adox r10, r12
  63211. mov QWORD PTR [r8+16], r11
  63212. ; Carry
  63213. adcx r10, r13
  63214. mov r13, r12
  63215. adcx r13, r12
  63216. adox r13, r12
  63217. mov QWORD PTR [r8+24], r10
  63218. ; Diagonal 3
  63219. ; No load %r14 - %r9
  63220. ; A[3] x A[2]
  63221. mov rdx, QWORD PTR [r9+16]
  63222. mulx rcx, rax, QWORD PTR [r9+24]
  63223. adcx r15, rax
  63224. adox rdi, rcx
  63225. ; No store %r13 - %r8
  63226. ; No load %r15 - %r8
  63227. ; A[4] x A[2]
  63228. mulx rcx, rax, QWORD PTR [r9+32]
  63229. adcx rdi, rax
  63230. adox rsi, rcx
  63231. ; No store %r14 - %r9
  63232. ; No load %rbx - %r9
  63233. ; A[5] x A[2]
  63234. mulx rcx, rax, QWORD PTR [r9+40]
  63235. adcx rsi, rax
  63236. adox rbx, rcx
  63237. ; No store %r15 - %r8
  63238. mov r10, QWORD PTR [r8]
  63239. ; A[6] x A[2]
  63240. mulx rcx, rax, QWORD PTR [r9+48]
  63241. adcx rbx, rax
  63242. adox r10, rcx
  63243. ; No store %rbx - %r9
  63244. mov r11, QWORD PTR [r8+8]
  63245. ; A[6] x A[3]
  63246. mov rdx, QWORD PTR [r9+24]
  63247. mulx rcx, rax, QWORD PTR [r9+48]
  63248. adcx r10, rax
  63249. adox r11, rcx
  63250. mov QWORD PTR [r8], r10
  63251. mov r10, QWORD PTR [r8+16]
  63252. ; A[6] x A[4]
  63253. mov rdx, QWORD PTR [r9+32]
  63254. mulx rcx, rax, QWORD PTR [r9+48]
  63255. adcx r11, rax
  63256. adox r10, rcx
  63257. mov QWORD PTR [r8+8], r11
  63258. mov r11, QWORD PTR [r8+24]
  63259. ; A[6] x A[5]
  63260. mov rdx, QWORD PTR [r9+40]
  63261. mulx rcx, rax, QWORD PTR [r9+48]
  63262. adcx r10, rax
  63263. adox r11, rcx
  63264. mov QWORD PTR [r8+16], r10
  63265. ; Zero into %r8
  63266. ; A[8] x A[4]
  63267. mov rdx, QWORD PTR [r9+32]
  63268. mulx r10, rax, QWORD PTR [r9+64]
  63269. adcx r11, rax
  63270. adox r10, r12
  63271. mov QWORD PTR [r8+24], r11
  63272. ; Zero into %r9
  63273. ; A[8] x A[5]
  63274. mov rdx, QWORD PTR [r9+40]
  63275. mulx r11, rax, QWORD PTR [r9+64]
  63276. adcx r10, rax
  63277. adox r11, r12
  63278. mov QWORD PTR [r8+32], r10
  63279. ; Carry
  63280. adcx r11, r13
  63281. mov r13, r12
  63282. adcx r13, r12
  63283. adox r13, r12
  63284. mov QWORD PTR [r8+40], r11
  63285. ; Diagonal 4
  63286. ; No load %rbx - %r8
  63287. ; A[4] x A[3]
  63288. mov rdx, QWORD PTR [r9+24]
  63289. mulx rcx, rax, QWORD PTR [r9+32]
  63290. adcx rsi, rax
  63291. adox rbx, rcx
  63292. ; No store %r15 - %r9
  63293. mov r11, QWORD PTR [r8]
  63294. ; A[5] x A[3]
  63295. mulx rcx, rax, QWORD PTR [r9+40]
  63296. adcx rbx, rax
  63297. adox r11, rcx
  63298. ; No store %rbx - %r8
  63299. mov r10, QWORD PTR [r8+8]
  63300. ; A[5] x A[4]
  63301. mov rdx, QWORD PTR [r9+32]
  63302. mulx rcx, rax, QWORD PTR [r9+40]
  63303. adcx r11, rax
  63304. adox r10, rcx
  63305. mov QWORD PTR [r8], r11
  63306. mov r11, QWORD PTR [r8+16]
  63307. ; A[8] x A[2]
  63308. mov rdx, QWORD PTR [r9+16]
  63309. mulx rcx, rax, QWORD PTR [r9+64]
  63310. adcx r10, rax
  63311. adox r11, rcx
  63312. mov QWORD PTR [r8+8], r10
  63313. mov r10, QWORD PTR [r8+24]
  63314. ; A[8] x A[3]
  63315. mov rdx, QWORD PTR [r9+24]
  63316. mulx rcx, rax, QWORD PTR [r9+64]
  63317. adcx r11, rax
  63318. adox r10, rcx
  63319. mov QWORD PTR [r8+16], r11
  63320. mov r11, QWORD PTR [r8+32]
  63321. ; A[7] x A[5]
  63322. mov rdx, QWORD PTR [r9+40]
  63323. mulx rcx, rax, QWORD PTR [r9+56]
  63324. adcx r10, rax
  63325. adox r11, rcx
  63326. mov QWORD PTR [r8+24], r10
  63327. mov r10, QWORD PTR [r8+40]
  63328. ; A[7] x A[6]
  63329. mov rdx, QWORD PTR [r9+48]
  63330. mulx rcx, rax, QWORD PTR [r9+56]
  63331. adcx r11, rax
  63332. adox r10, rcx
  63333. mov QWORD PTR [r8+32], r11
  63334. ; Zero into %r9
  63335. ; A[8] x A[6]
  63336. mulx r11, rax, QWORD PTR [r9+64]
  63337. adcx r10, rax
  63338. adox r11, r12
  63339. mov QWORD PTR [r8+40], r10
  63340. ; Zero into %r8
  63341. ; A[8] x A[7]
  63342. mov rdx, QWORD PTR [r9+56]
  63343. mulx r10, rax, QWORD PTR [r9+64]
  63344. adcx r11, rax
  63345. adox r10, r12
  63346. mov QWORD PTR [r8+48], r11
  63347. ; Carry
  63348. adcx r10, r13
  63349. mov r13, r12
  63350. adcx r13, r12
  63351. adox r13, r12
  63352. mov QWORD PTR [r8+56], r10
  63353. mov QWORD PTR [r8+64], r13
  63354. ; Double and Add in A[i] x A[i]
  63355. mov r11, QWORD PTR [rbp+8]
  63356. ; A[0] x A[0]
  63357. mov rdx, QWORD PTR [r9]
  63358. mulx rcx, rax, rdx
  63359. mov QWORD PTR [rbp], rax
  63360. adox r11, r11
  63361. adcx r11, rcx
  63362. mov QWORD PTR [rbp+8], r11
  63363. mov r10, QWORD PTR [rbp+16]
  63364. mov r11, QWORD PTR [rbp+24]
  63365. ; A[1] x A[1]
  63366. mov rdx, QWORD PTR [r9+8]
  63367. mulx rcx, rax, rdx
  63368. adox r10, r10
  63369. adox r11, r11
  63370. adcx r10, rax
  63371. adcx r11, rcx
  63372. mov QWORD PTR [rbp+16], r10
  63373. mov QWORD PTR [rbp+24], r11
  63374. ; A[2] x A[2]
  63375. mov rdx, QWORD PTR [r9+16]
  63376. mulx rcx, rax, rdx
  63377. adox r14, r14
  63378. adox r15, r15
  63379. adcx r14, rax
  63380. adcx r15, rcx
  63381. ; A[3] x A[3]
  63382. mov rdx, QWORD PTR [r9+24]
  63383. mulx rcx, rax, rdx
  63384. adox rdi, rdi
  63385. adox rsi, rsi
  63386. adcx rdi, rax
  63387. adcx rsi, rcx
  63388. mov r11, QWORD PTR [r8]
  63389. ; A[4] x A[4]
  63390. mov rdx, QWORD PTR [r9+32]
  63391. mulx rcx, rax, rdx
  63392. adox rbx, rbx
  63393. adox r11, r11
  63394. adcx rbx, rax
  63395. adcx r11, rcx
  63396. mov QWORD PTR [r8], r11
  63397. mov r10, QWORD PTR [r8+8]
  63398. mov r11, QWORD PTR [r8+16]
  63399. ; A[5] x A[5]
  63400. mov rdx, QWORD PTR [r9+40]
  63401. mulx rcx, rax, rdx
  63402. adox r10, r10
  63403. adox r11, r11
  63404. adcx r10, rax
  63405. adcx r11, rcx
  63406. mov QWORD PTR [r8+8], r10
  63407. mov QWORD PTR [r8+16], r11
  63408. mov r10, QWORD PTR [r8+24]
  63409. mov r11, QWORD PTR [r8+32]
  63410. ; A[6] x A[6]
  63411. mov rdx, QWORD PTR [r9+48]
  63412. mulx rcx, rax, rdx
  63413. adox r10, r10
  63414. adox r11, r11
  63415. adcx r10, rax
  63416. adcx r11, rcx
  63417. mov QWORD PTR [r8+24], r10
  63418. mov QWORD PTR [r8+32], r11
  63419. mov r10, QWORD PTR [r8+40]
  63420. mov r11, QWORD PTR [r8+48]
  63421. ; A[7] x A[7]
  63422. mov rdx, QWORD PTR [r9+56]
  63423. mulx rcx, rax, rdx
  63424. adox r10, r10
  63425. adox r11, r11
  63426. adcx r10, rax
  63427. adcx r11, rcx
  63428. mov QWORD PTR [r8+40], r10
  63429. mov QWORD PTR [r8+48], r11
  63430. mov r10, QWORD PTR [r8+56]
  63431. mov r11, QWORD PTR [r8+64]
  63432. ; A[8] x A[8]
  63433. mov rdx, QWORD PTR [r9+64]
  63434. mulx rcx, rax, rdx
  63435. adox r10, r10
  63436. adox r11, r11
  63437. adcx r10, rax
  63438. adcx r11, rcx
  63439. mov QWORD PTR [r8+56], r10
  63440. mov QWORD PTR [r8+64], r11
  63441. mov QWORD PTR [r8+-40], r14
  63442. mov QWORD PTR [r8+-32], r15
  63443. mov QWORD PTR [r8+-24], rdi
  63444. mov QWORD PTR [r8+-16], rsi
  63445. mov QWORD PTR [r8+-8], rbx
  63446. sub r8, 72
  63447. cmp r9, r8
  63448. jne L_end_521_sqr_avx2_9
  63449. vmovdqu xmm0, OWORD PTR [rbp]
  63450. vmovups OWORD PTR [r8], xmm0
  63451. vmovdqu xmm0, OWORD PTR [rbp+16]
  63452. vmovups OWORD PTR [r8+16], xmm0
  63453. L_end_521_sqr_avx2_9:
  63454. add rsp, 72
  63455. pop rbx
  63456. pop rsi
  63457. pop rdi
  63458. pop r15
  63459. pop r14
  63460. pop r13
  63461. pop r12
  63462. pop rbp
  63463. ret
  63464. sp_521_sqr_avx2_9 ENDP
  63465. _text ENDS
  63466. ENDIF
  63467. ; /* Add b to a into r. (r = a + b)
  63468. ; *
  63469. ; * r A single precision integer.
  63470. ; * a A single precision integer.
  63471. ; * b A single precision integer.
  63472. ; */
  63473. _text SEGMENT READONLY PARA
  63474. sp_521_add_9 PROC
  63475. ; Add
  63476. mov r9, QWORD PTR [rdx]
  63477. xor rax, rax
  63478. add r9, QWORD PTR [r8]
  63479. mov r10, QWORD PTR [rdx+8]
  63480. mov QWORD PTR [rcx], r9
  63481. adc r10, QWORD PTR [r8+8]
  63482. mov r9, QWORD PTR [rdx+16]
  63483. mov QWORD PTR [rcx+8], r10
  63484. adc r9, QWORD PTR [r8+16]
  63485. mov r10, QWORD PTR [rdx+24]
  63486. mov QWORD PTR [rcx+16], r9
  63487. adc r10, QWORD PTR [r8+24]
  63488. mov r9, QWORD PTR [rdx+32]
  63489. mov QWORD PTR [rcx+24], r10
  63490. adc r9, QWORD PTR [r8+32]
  63491. mov r10, QWORD PTR [rdx+40]
  63492. mov QWORD PTR [rcx+32], r9
  63493. adc r10, QWORD PTR [r8+40]
  63494. mov r9, QWORD PTR [rdx+48]
  63495. mov QWORD PTR [rcx+40], r10
  63496. adc r9, QWORD PTR [r8+48]
  63497. mov r10, QWORD PTR [rdx+56]
  63498. mov QWORD PTR [rcx+48], r9
  63499. adc r10, QWORD PTR [r8+56]
  63500. mov r9, QWORD PTR [rdx+64]
  63501. mov QWORD PTR [rcx+56], r10
  63502. adc r9, QWORD PTR [r8+64]
  63503. mov QWORD PTR [rcx+64], r9
  63504. adc rax, 0
  63505. ret
  63506. sp_521_add_9 ENDP
  63507. _text ENDS
  63508. ; /* Sub b from a into r. (r = a - b)
  63509. ; *
  63510. ; * r A single precision integer.
  63511. ; * a A single precision integer.
  63512. ; * b A single precision integer.
  63513. ; */
  63514. _text SEGMENT READONLY PARA
  63515. sp_521_sub_9 PROC
  63516. mov r9, QWORD PTR [rdx]
  63517. sub r9, QWORD PTR [r8]
  63518. mov r10, QWORD PTR [rdx+8]
  63519. mov QWORD PTR [rcx], r9
  63520. sbb r10, QWORD PTR [r8+8]
  63521. mov r9, QWORD PTR [rdx+16]
  63522. mov QWORD PTR [rcx+8], r10
  63523. sbb r9, QWORD PTR [r8+16]
  63524. mov r10, QWORD PTR [rdx+24]
  63525. mov QWORD PTR [rcx+16], r9
  63526. sbb r10, QWORD PTR [r8+24]
  63527. mov r9, QWORD PTR [rdx+32]
  63528. mov QWORD PTR [rcx+24], r10
  63529. sbb r9, QWORD PTR [r8+32]
  63530. mov r10, QWORD PTR [rdx+40]
  63531. mov QWORD PTR [rcx+32], r9
  63532. sbb r10, QWORD PTR [r8+40]
  63533. mov r9, QWORD PTR [rdx+48]
  63534. mov QWORD PTR [rcx+40], r10
  63535. sbb r9, QWORD PTR [r8+48]
  63536. mov r10, QWORD PTR [rdx+56]
  63537. mov QWORD PTR [rcx+48], r9
  63538. sbb r10, QWORD PTR [r8+56]
  63539. mov r9, QWORD PTR [rdx+64]
  63540. mov QWORD PTR [rcx+56], r10
  63541. sbb r9, QWORD PTR [r8+64]
  63542. mov QWORD PTR [rcx+64], r9
  63543. sbb rax, rax
  63544. ret
  63545. sp_521_sub_9 ENDP
  63546. _text ENDS
  63547. ; /* Conditionally copy a into r using the mask m.
  63548. ; * m is -1 to copy and 0 when not.
  63549. ; *
  63550. ; * r A single precision number to copy over.
  63551. ; * a A single precision number to copy.
  63552. ; * m Mask value to apply.
  63553. ; */
  63554. _text SEGMENT READONLY PARA
  63555. sp_521_cond_copy_9 PROC
  63556. push r12
  63557. mov rax, QWORD PTR [rcx]
  63558. mov r9, QWORD PTR [rcx+8]
  63559. mov r10, QWORD PTR [rcx+16]
  63560. mov r11, QWORD PTR [rcx+24]
  63561. mov r12, QWORD PTR [rcx+32]
  63562. xor rax, QWORD PTR [rdx]
  63563. xor r9, QWORD PTR [rdx+8]
  63564. xor r10, QWORD PTR [rdx+16]
  63565. xor r11, QWORD PTR [rdx+24]
  63566. xor r12, QWORD PTR [rdx+32]
  63567. and rax, r8
  63568. and r9, r8
  63569. and r10, r8
  63570. and r11, r8
  63571. and r12, r8
  63572. xor QWORD PTR [rcx], rax
  63573. xor QWORD PTR [rcx+8], r9
  63574. xor QWORD PTR [rcx+16], r10
  63575. xor QWORD PTR [rcx+24], r11
  63576. xor QWORD PTR [rcx+32], r12
  63577. mov rax, QWORD PTR [rcx+40]
  63578. mov r9, QWORD PTR [rcx+48]
  63579. mov r10, QWORD PTR [rcx+56]
  63580. mov r11, QWORD PTR [rcx+64]
  63581. xor rax, QWORD PTR [rdx+40]
  63582. xor r9, QWORD PTR [rdx+48]
  63583. xor r10, QWORD PTR [rdx+56]
  63584. xor r11, QWORD PTR [rdx+64]
  63585. and rax, r8
  63586. and r9, r8
  63587. and r10, r8
  63588. and r11, r8
  63589. xor QWORD PTR [rcx+40], rax
  63590. xor QWORD PTR [rcx+48], r9
  63591. xor QWORD PTR [rcx+56], r10
  63592. xor QWORD PTR [rcx+64], r11
  63593. pop r12
  63594. ret
  63595. sp_521_cond_copy_9 ENDP
  63596. _text ENDS
  63597. ; /* Multiply two Montgomery form numbers mod the modulus (prime).
  63598. ; * (r = a * b mod m)
  63599. ; *
  63600. ; * r Result of multiplication.
  63601. ; * a First number to multiply in Montgomery form.
  63602. ; * b Second number to multiply in Montgomery form.
  63603. ; * m Modulus (prime).
  63604. ; * mp Montgomery mulitplier.
  63605. ; */
  63606. _text SEGMENT READONLY PARA
  63607. sp_521_mont_mul_9 PROC
  63608. push r12
  63609. push r13
  63610. push r14
  63611. push r15
  63612. mov r9, rdx
  63613. sub rsp, 144
  63614. ; A[0] * B[0]
  63615. mov rax, QWORD PTR [r8]
  63616. mul QWORD PTR [r9]
  63617. xor r15, r15
  63618. mov QWORD PTR [rsp], rax
  63619. mov r14, rdx
  63620. ; A[0] * B[1]
  63621. mov rax, QWORD PTR [r8+8]
  63622. mul QWORD PTR [r9]
  63623. xor r13, r13
  63624. add r14, rax
  63625. adc r15, rdx
  63626. adc r13, 0
  63627. ; A[1] * B[0]
  63628. mov rax, QWORD PTR [r8]
  63629. mul QWORD PTR [r9+8]
  63630. add r14, rax
  63631. adc r15, rdx
  63632. adc r13, 0
  63633. mov QWORD PTR [rsp+8], r14
  63634. ; A[0] * B[2]
  63635. mov rax, QWORD PTR [r8+16]
  63636. mul QWORD PTR [r9]
  63637. xor r14, r14
  63638. add r15, rax
  63639. adc r13, rdx
  63640. adc r14, 0
  63641. ; A[1] * B[1]
  63642. mov rax, QWORD PTR [r8+8]
  63643. mul QWORD PTR [r9+8]
  63644. add r15, rax
  63645. adc r13, rdx
  63646. adc r14, 0
  63647. ; A[2] * B[0]
  63648. mov rax, QWORD PTR [r8]
  63649. mul QWORD PTR [r9+16]
  63650. add r15, rax
  63651. adc r13, rdx
  63652. adc r14, 0
  63653. mov QWORD PTR [rsp+16], r15
  63654. ; A[0] * B[3]
  63655. mov rax, QWORD PTR [r8+24]
  63656. mul QWORD PTR [r9]
  63657. xor r15, r15
  63658. add r13, rax
  63659. adc r14, rdx
  63660. adc r15, 0
  63661. ; A[1] * B[2]
  63662. mov rax, QWORD PTR [r8+16]
  63663. mul QWORD PTR [r9+8]
  63664. add r13, rax
  63665. adc r14, rdx
  63666. adc r15, 0
  63667. ; A[2] * B[1]
  63668. mov rax, QWORD PTR [r8+8]
  63669. mul QWORD PTR [r9+16]
  63670. add r13, rax
  63671. adc r14, rdx
  63672. adc r15, 0
  63673. ; A[3] * B[0]
  63674. mov rax, QWORD PTR [r8]
  63675. mul QWORD PTR [r9+24]
  63676. add r13, rax
  63677. adc r14, rdx
  63678. adc r15, 0
  63679. mov QWORD PTR [rsp+24], r13
  63680. ; A[0] * B[4]
  63681. mov rax, QWORD PTR [r8+32]
  63682. mul QWORD PTR [r9]
  63683. xor r13, r13
  63684. add r14, rax
  63685. adc r15, rdx
  63686. adc r13, 0
  63687. ; A[1] * B[3]
  63688. mov rax, QWORD PTR [r8+24]
  63689. mul QWORD PTR [r9+8]
  63690. add r14, rax
  63691. adc r15, rdx
  63692. adc r13, 0
  63693. ; A[2] * B[2]
  63694. mov rax, QWORD PTR [r8+16]
  63695. mul QWORD PTR [r9+16]
  63696. add r14, rax
  63697. adc r15, rdx
  63698. adc r13, 0
  63699. ; A[3] * B[1]
  63700. mov rax, QWORD PTR [r8+8]
  63701. mul QWORD PTR [r9+24]
  63702. add r14, rax
  63703. adc r15, rdx
  63704. adc r13, 0
  63705. ; A[4] * B[0]
  63706. mov rax, QWORD PTR [r8]
  63707. mul QWORD PTR [r9+32]
  63708. add r14, rax
  63709. adc r15, rdx
  63710. adc r13, 0
  63711. mov QWORD PTR [rsp+32], r14
  63712. ; A[0] * B[5]
  63713. mov rax, QWORD PTR [r8+40]
  63714. mul QWORD PTR [r9]
  63715. xor r14, r14
  63716. add r15, rax
  63717. adc r13, rdx
  63718. adc r14, 0
  63719. ; A[1] * B[4]
  63720. mov rax, QWORD PTR [r8+32]
  63721. mul QWORD PTR [r9+8]
  63722. add r15, rax
  63723. adc r13, rdx
  63724. adc r14, 0
  63725. ; A[2] * B[3]
  63726. mov rax, QWORD PTR [r8+24]
  63727. mul QWORD PTR [r9+16]
  63728. add r15, rax
  63729. adc r13, rdx
  63730. adc r14, 0
  63731. ; A[3] * B[2]
  63732. mov rax, QWORD PTR [r8+16]
  63733. mul QWORD PTR [r9+24]
  63734. add r15, rax
  63735. adc r13, rdx
  63736. adc r14, 0
  63737. ; A[4] * B[1]
  63738. mov rax, QWORD PTR [r8+8]
  63739. mul QWORD PTR [r9+32]
  63740. add r15, rax
  63741. adc r13, rdx
  63742. adc r14, 0
  63743. ; A[5] * B[0]
  63744. mov rax, QWORD PTR [r8]
  63745. mul QWORD PTR [r9+40]
  63746. add r15, rax
  63747. adc r13, rdx
  63748. adc r14, 0
  63749. mov QWORD PTR [rsp+40], r15
  63750. ; A[0] * B[6]
  63751. mov rax, QWORD PTR [r8+48]
  63752. mul QWORD PTR [r9]
  63753. xor r15, r15
  63754. add r13, rax
  63755. adc r14, rdx
  63756. adc r15, 0
  63757. ; A[1] * B[5]
  63758. mov rax, QWORD PTR [r8+40]
  63759. mul QWORD PTR [r9+8]
  63760. add r13, rax
  63761. adc r14, rdx
  63762. adc r15, 0
  63763. ; A[2] * B[4]
  63764. mov rax, QWORD PTR [r8+32]
  63765. mul QWORD PTR [r9+16]
  63766. add r13, rax
  63767. adc r14, rdx
  63768. adc r15, 0
  63769. ; A[3] * B[3]
  63770. mov rax, QWORD PTR [r8+24]
  63771. mul QWORD PTR [r9+24]
  63772. add r13, rax
  63773. adc r14, rdx
  63774. adc r15, 0
  63775. ; A[4] * B[2]
  63776. mov rax, QWORD PTR [r8+16]
  63777. mul QWORD PTR [r9+32]
  63778. add r13, rax
  63779. adc r14, rdx
  63780. adc r15, 0
  63781. ; A[5] * B[1]
  63782. mov rax, QWORD PTR [r8+8]
  63783. mul QWORD PTR [r9+40]
  63784. add r13, rax
  63785. adc r14, rdx
  63786. adc r15, 0
  63787. ; A[6] * B[0]
  63788. mov rax, QWORD PTR [r8]
  63789. mul QWORD PTR [r9+48]
  63790. add r13, rax
  63791. adc r14, rdx
  63792. adc r15, 0
  63793. mov QWORD PTR [rsp+48], r13
  63794. ; A[0] * B[7]
  63795. mov rax, QWORD PTR [r8+56]
  63796. mul QWORD PTR [r9]
  63797. xor r13, r13
  63798. add r14, rax
  63799. adc r15, rdx
  63800. adc r13, 0
  63801. ; A[1] * B[6]
  63802. mov rax, QWORD PTR [r8+48]
  63803. mul QWORD PTR [r9+8]
  63804. add r14, rax
  63805. adc r15, rdx
  63806. adc r13, 0
  63807. ; A[2] * B[5]
  63808. mov rax, QWORD PTR [r8+40]
  63809. mul QWORD PTR [r9+16]
  63810. add r14, rax
  63811. adc r15, rdx
  63812. adc r13, 0
  63813. ; A[3] * B[4]
  63814. mov rax, QWORD PTR [r8+32]
  63815. mul QWORD PTR [r9+24]
  63816. add r14, rax
  63817. adc r15, rdx
  63818. adc r13, 0
  63819. ; A[4] * B[3]
  63820. mov rax, QWORD PTR [r8+24]
  63821. mul QWORD PTR [r9+32]
  63822. add r14, rax
  63823. adc r15, rdx
  63824. adc r13, 0
  63825. ; A[5] * B[2]
  63826. mov rax, QWORD PTR [r8+16]
  63827. mul QWORD PTR [r9+40]
  63828. add r14, rax
  63829. adc r15, rdx
  63830. adc r13, 0
  63831. ; A[6] * B[1]
  63832. mov rax, QWORD PTR [r8+8]
  63833. mul QWORD PTR [r9+48]
  63834. add r14, rax
  63835. adc r15, rdx
  63836. adc r13, 0
  63837. ; A[7] * B[0]
  63838. mov rax, QWORD PTR [r8]
  63839. mul QWORD PTR [r9+56]
  63840. add r14, rax
  63841. adc r15, rdx
  63842. adc r13, 0
  63843. mov QWORD PTR [rsp+56], r14
  63844. ; A[0] * B[8]
  63845. mov rax, QWORD PTR [r8+64]
  63846. mul QWORD PTR [r9]
  63847. xor r14, r14
  63848. add r15, rax
  63849. adc r13, rdx
  63850. adc r14, 0
  63851. ; A[1] * B[7]
  63852. mov rax, QWORD PTR [r8+56]
  63853. mul QWORD PTR [r9+8]
  63854. add r15, rax
  63855. adc r13, rdx
  63856. adc r14, 0
  63857. ; A[2] * B[6]
  63858. mov rax, QWORD PTR [r8+48]
  63859. mul QWORD PTR [r9+16]
  63860. add r15, rax
  63861. adc r13, rdx
  63862. adc r14, 0
  63863. ; A[3] * B[5]
  63864. mov rax, QWORD PTR [r8+40]
  63865. mul QWORD PTR [r9+24]
  63866. add r15, rax
  63867. adc r13, rdx
  63868. adc r14, 0
  63869. ; A[4] * B[4]
  63870. mov rax, QWORD PTR [r8+32]
  63871. mul QWORD PTR [r9+32]
  63872. add r15, rax
  63873. adc r13, rdx
  63874. adc r14, 0
  63875. ; A[5] * B[3]
  63876. mov rax, QWORD PTR [r8+24]
  63877. mul QWORD PTR [r9+40]
  63878. add r15, rax
  63879. adc r13, rdx
  63880. adc r14, 0
  63881. ; A[6] * B[2]
  63882. mov rax, QWORD PTR [r8+16]
  63883. mul QWORD PTR [r9+48]
  63884. add r15, rax
  63885. adc r13, rdx
  63886. adc r14, 0
  63887. ; A[7] * B[1]
  63888. mov rax, QWORD PTR [r8+8]
  63889. mul QWORD PTR [r9+56]
  63890. add r15, rax
  63891. adc r13, rdx
  63892. adc r14, 0
  63893. ; A[8] * B[0]
  63894. mov rax, QWORD PTR [r8]
  63895. mul QWORD PTR [r9+64]
  63896. add r15, rax
  63897. adc r13, rdx
  63898. adc r14, 0
  63899. mov QWORD PTR [rsp+64], r15
  63900. ; A[1] * B[8]
  63901. mov rax, QWORD PTR [r8+64]
  63902. mul QWORD PTR [r9+8]
  63903. xor r15, r15
  63904. add r13, rax
  63905. adc r14, rdx
  63906. adc r15, 0
  63907. ; A[2] * B[7]
  63908. mov rax, QWORD PTR [r8+56]
  63909. mul QWORD PTR [r9+16]
  63910. add r13, rax
  63911. adc r14, rdx
  63912. adc r15, 0
  63913. ; A[3] * B[6]
  63914. mov rax, QWORD PTR [r8+48]
  63915. mul QWORD PTR [r9+24]
  63916. add r13, rax
  63917. adc r14, rdx
  63918. adc r15, 0
  63919. ; A[4] * B[5]
  63920. mov rax, QWORD PTR [r8+40]
  63921. mul QWORD PTR [r9+32]
  63922. add r13, rax
  63923. adc r14, rdx
  63924. adc r15, 0
  63925. ; A[5] * B[4]
  63926. mov rax, QWORD PTR [r8+32]
  63927. mul QWORD PTR [r9+40]
  63928. add r13, rax
  63929. adc r14, rdx
  63930. adc r15, 0
  63931. ; A[6] * B[3]
  63932. mov rax, QWORD PTR [r8+24]
  63933. mul QWORD PTR [r9+48]
  63934. add r13, rax
  63935. adc r14, rdx
  63936. adc r15, 0
  63937. ; A[7] * B[2]
  63938. mov rax, QWORD PTR [r8+16]
  63939. mul QWORD PTR [r9+56]
  63940. add r13, rax
  63941. adc r14, rdx
  63942. adc r15, 0
  63943. ; A[8] * B[1]
  63944. mov rax, QWORD PTR [r8+8]
  63945. mul QWORD PTR [r9+64]
  63946. add r13, rax
  63947. adc r14, rdx
  63948. adc r15, 0
  63949. mov QWORD PTR [rsp+72], r13
  63950. ; A[2] * B[8]
  63951. mov rax, QWORD PTR [r8+64]
  63952. mul QWORD PTR [r9+16]
  63953. xor r13, r13
  63954. add r14, rax
  63955. adc r15, rdx
  63956. adc r13, 0
  63957. ; A[3] * B[7]
  63958. mov rax, QWORD PTR [r8+56]
  63959. mul QWORD PTR [r9+24]
  63960. add r14, rax
  63961. adc r15, rdx
  63962. adc r13, 0
  63963. ; A[4] * B[6]
  63964. mov rax, QWORD PTR [r8+48]
  63965. mul QWORD PTR [r9+32]
  63966. add r14, rax
  63967. adc r15, rdx
  63968. adc r13, 0
  63969. ; A[5] * B[5]
  63970. mov rax, QWORD PTR [r8+40]
  63971. mul QWORD PTR [r9+40]
  63972. add r14, rax
  63973. adc r15, rdx
  63974. adc r13, 0
  63975. ; A[6] * B[4]
  63976. mov rax, QWORD PTR [r8+32]
  63977. mul QWORD PTR [r9+48]
  63978. add r14, rax
  63979. adc r15, rdx
  63980. adc r13, 0
  63981. ; A[7] * B[3]
  63982. mov rax, QWORD PTR [r8+24]
  63983. mul QWORD PTR [r9+56]
  63984. add r14, rax
  63985. adc r15, rdx
  63986. adc r13, 0
  63987. ; A[8] * B[2]
  63988. mov rax, QWORD PTR [r8+16]
  63989. mul QWORD PTR [r9+64]
  63990. add r14, rax
  63991. adc r15, rdx
  63992. adc r13, 0
  63993. mov QWORD PTR [rsp+80], r14
  63994. ; A[3] * B[8]
  63995. mov rax, QWORD PTR [r8+64]
  63996. mul QWORD PTR [r9+24]
  63997. xor r14, r14
  63998. add r15, rax
  63999. adc r13, rdx
  64000. adc r14, 0
  64001. ; A[4] * B[7]
  64002. mov rax, QWORD PTR [r8+56]
  64003. mul QWORD PTR [r9+32]
  64004. add r15, rax
  64005. adc r13, rdx
  64006. adc r14, 0
  64007. ; A[5] * B[6]
  64008. mov rax, QWORD PTR [r8+48]
  64009. mul QWORD PTR [r9+40]
  64010. add r15, rax
  64011. adc r13, rdx
  64012. adc r14, 0
  64013. ; A[6] * B[5]
  64014. mov rax, QWORD PTR [r8+40]
  64015. mul QWORD PTR [r9+48]
  64016. add r15, rax
  64017. adc r13, rdx
  64018. adc r14, 0
  64019. ; A[7] * B[4]
  64020. mov rax, QWORD PTR [r8+32]
  64021. mul QWORD PTR [r9+56]
  64022. add r15, rax
  64023. adc r13, rdx
  64024. adc r14, 0
  64025. ; A[8] * B[3]
  64026. mov rax, QWORD PTR [r8+24]
  64027. mul QWORD PTR [r9+64]
  64028. add r15, rax
  64029. adc r13, rdx
  64030. adc r14, 0
  64031. mov QWORD PTR [rsp+88], r15
  64032. ; A[4] * B[8]
  64033. mov rax, QWORD PTR [r8+64]
  64034. mul QWORD PTR [r9+32]
  64035. xor r15, r15
  64036. add r13, rax
  64037. adc r14, rdx
  64038. adc r15, 0
  64039. ; A[5] * B[7]
  64040. mov rax, QWORD PTR [r8+56]
  64041. mul QWORD PTR [r9+40]
  64042. add r13, rax
  64043. adc r14, rdx
  64044. adc r15, 0
  64045. ; A[6] * B[6]
  64046. mov rax, QWORD PTR [r8+48]
  64047. mul QWORD PTR [r9+48]
  64048. add r13, rax
  64049. adc r14, rdx
  64050. adc r15, 0
  64051. ; A[7] * B[5]
  64052. mov rax, QWORD PTR [r8+40]
  64053. mul QWORD PTR [r9+56]
  64054. add r13, rax
  64055. adc r14, rdx
  64056. adc r15, 0
  64057. ; A[8] * B[4]
  64058. mov rax, QWORD PTR [r8+32]
  64059. mul QWORD PTR [r9+64]
  64060. add r13, rax
  64061. adc r14, rdx
  64062. adc r15, 0
  64063. mov QWORD PTR [rsp+96], r13
  64064. ; A[5] * B[8]
  64065. mov rax, QWORD PTR [r8+64]
  64066. mul QWORD PTR [r9+40]
  64067. xor r13, r13
  64068. add r14, rax
  64069. adc r15, rdx
  64070. adc r13, 0
  64071. ; A[6] * B[7]
  64072. mov rax, QWORD PTR [r8+56]
  64073. mul QWORD PTR [r9+48]
  64074. add r14, rax
  64075. adc r15, rdx
  64076. adc r13, 0
  64077. ; A[7] * B[6]
  64078. mov rax, QWORD PTR [r8+48]
  64079. mul QWORD PTR [r9+56]
  64080. add r14, rax
  64081. adc r15, rdx
  64082. adc r13, 0
  64083. ; A[8] * B[5]
  64084. mov rax, QWORD PTR [r8+40]
  64085. mul QWORD PTR [r9+64]
  64086. add r14, rax
  64087. adc r15, rdx
  64088. adc r13, 0
  64089. mov QWORD PTR [rsp+104], r14
  64090. ; A[6] * B[8]
  64091. mov rax, QWORD PTR [r8+64]
  64092. mul QWORD PTR [r9+48]
  64093. xor r14, r14
  64094. add r15, rax
  64095. adc r13, rdx
  64096. adc r14, 0
  64097. ; A[7] * B[7]
  64098. mov rax, QWORD PTR [r8+56]
  64099. mul QWORD PTR [r9+56]
  64100. add r15, rax
  64101. adc r13, rdx
  64102. adc r14, 0
  64103. ; A[8] * B[6]
  64104. mov rax, QWORD PTR [r8+48]
  64105. mul QWORD PTR [r9+64]
  64106. add r15, rax
  64107. adc r13, rdx
  64108. adc r14, 0
  64109. mov QWORD PTR [rsp+112], r15
  64110. ; A[7] * B[8]
  64111. mov rax, QWORD PTR [r8+64]
  64112. mul QWORD PTR [r9+56]
  64113. xor r15, r15
  64114. add r13, rax
  64115. adc r14, rdx
  64116. adc r15, 0
  64117. ; A[8] * B[7]
  64118. mov rax, QWORD PTR [r8+56]
  64119. mul QWORD PTR [r9+64]
  64120. add r13, rax
  64121. adc r14, rdx
  64122. adc r15, 0
  64123. mov QWORD PTR [rsp+120], r13
  64124. ; A[8] * B[8]
  64125. mov rax, QWORD PTR [r8+64]
  64126. mul QWORD PTR [r9+64]
  64127. add r14, rax
  64128. adc r15, rdx
  64129. mov QWORD PTR [rsp+128], r14
  64130. mov QWORD PTR [rsp+136], r15
  64131. mov rax, QWORD PTR [rsp+64]
  64132. mov rdx, QWORD PTR [rsp+72]
  64133. mov r13, QWORD PTR [rsp+80]
  64134. mov r12, rax
  64135. and r12, 511
  64136. mov r14, QWORD PTR [rsp+88]
  64137. mov r15, QWORD PTR [rsp+96]
  64138. mov r8, QWORD PTR [rsp+104]
  64139. mov r9, QWORD PTR [rsp+112]
  64140. mov r10, QWORD PTR [rsp+120]
  64141. mov r11, QWORD PTR [rsp+128]
  64142. shrd rax, rdx, 9
  64143. shrd rdx, r13, 9
  64144. shrd r13, r14, 9
  64145. shrd r14, r15, 9
  64146. shrd r15, r8, 9
  64147. shrd r8, r9, 9
  64148. shrd r9, r10, 9
  64149. shrd r10, r11, 9
  64150. shr r11, 9
  64151. add rax, QWORD PTR [rsp]
  64152. adc rdx, QWORD PTR [rsp+8]
  64153. adc r13, QWORD PTR [rsp+16]
  64154. adc r14, QWORD PTR [rsp+24]
  64155. adc r15, QWORD PTR [rsp+32]
  64156. adc r8, QWORD PTR [rsp+40]
  64157. adc r9, QWORD PTR [rsp+48]
  64158. adc r10, QWORD PTR [rsp+56]
  64159. adc r12, r11
  64160. mov r11, r12
  64161. shr r12, 9
  64162. and r11, 511
  64163. add rax, r12
  64164. adc rdx, 0
  64165. adc r13, 0
  64166. adc r14, 0
  64167. adc r15, 0
  64168. adc r8, 0
  64169. adc r9, 0
  64170. adc r10, 0
  64171. adc r11, 0
  64172. mov QWORD PTR [rcx], rax
  64173. mov QWORD PTR [rcx+8], rdx
  64174. mov QWORD PTR [rcx+16], r13
  64175. mov QWORD PTR [rcx+24], r14
  64176. mov QWORD PTR [rcx+32], r15
  64177. mov QWORD PTR [rcx+40], r8
  64178. mov QWORD PTR [rcx+48], r9
  64179. mov QWORD PTR [rcx+56], r10
  64180. mov QWORD PTR [rcx+64], r11
  64181. add rsp, 144
  64182. pop r15
  64183. pop r14
  64184. pop r13
  64185. pop r12
  64186. ret
  64187. sp_521_mont_mul_9 ENDP
  64188. _text ENDS
  64189. ; /* Square the Montgomery form number mod the modulus (prime). (r = a * a mod m)
  64190. ; *
  64191. ; * r Result of squaring.
  64192. ; * a Number to square in Montgomery form.
  64193. ; * m Modulus (prime).
  64194. ; * mp Montgomery mulitplier.
  64195. ; */
  64196. _text SEGMENT READONLY PARA
  64197. sp_521_mont_sqr_9 PROC
  64198. push r12
  64199. push r13
  64200. push r14
  64201. push r15
  64202. mov r8, rdx
  64203. sub rsp, 144
  64204. ; A[0] * A[0]
  64205. mov rax, QWORD PTR [r8]
  64206. mul rax
  64207. xor r12, r12
  64208. mov QWORD PTR [rsp], rax
  64209. mov r11, rdx
  64210. ; A[0] * A[1]
  64211. mov rax, QWORD PTR [r8+8]
  64212. mul QWORD PTR [r8]
  64213. xor r10, r10
  64214. add r11, rax
  64215. adc r12, rdx
  64216. adc r10, 0
  64217. add r11, rax
  64218. adc r12, rdx
  64219. adc r10, 0
  64220. mov QWORD PTR [rsp+8], r11
  64221. ; A[0] * A[2]
  64222. mov rax, QWORD PTR [r8+16]
  64223. mul QWORD PTR [r8]
  64224. xor r11, r11
  64225. add r12, rax
  64226. adc r10, rdx
  64227. adc r11, 0
  64228. add r12, rax
  64229. adc r10, rdx
  64230. adc r11, 0
  64231. ; A[1] * A[1]
  64232. mov rax, QWORD PTR [r8+8]
  64233. mul rax
  64234. add r12, rax
  64235. adc r10, rdx
  64236. adc r11, 0
  64237. mov QWORD PTR [rsp+16], r12
  64238. ; A[0] * A[3]
  64239. mov rax, QWORD PTR [r8+24]
  64240. mul QWORD PTR [r8]
  64241. xor r12, r12
  64242. add r10, rax
  64243. adc r11, rdx
  64244. adc r12, 0
  64245. add r10, rax
  64246. adc r11, rdx
  64247. adc r12, 0
  64248. ; A[1] * A[2]
  64249. mov rax, QWORD PTR [r8+16]
  64250. mul QWORD PTR [r8+8]
  64251. add r10, rax
  64252. adc r11, rdx
  64253. adc r12, 0
  64254. add r10, rax
  64255. adc r11, rdx
  64256. adc r12, 0
  64257. mov QWORD PTR [rsp+24], r10
  64258. ; A[0] * A[4]
  64259. mov rax, QWORD PTR [r8+32]
  64260. mul QWORD PTR [r8]
  64261. xor r10, r10
  64262. add r11, rax
  64263. adc r12, rdx
  64264. adc r10, 0
  64265. add r11, rax
  64266. adc r12, rdx
  64267. adc r10, 0
  64268. ; A[1] * A[3]
  64269. mov rax, QWORD PTR [r8+24]
  64270. mul QWORD PTR [r8+8]
  64271. add r11, rax
  64272. adc r12, rdx
  64273. adc r10, 0
  64274. add r11, rax
  64275. adc r12, rdx
  64276. adc r10, 0
  64277. ; A[2] * A[2]
  64278. mov rax, QWORD PTR [r8+16]
  64279. mul rax
  64280. add r11, rax
  64281. adc r12, rdx
  64282. adc r10, 0
  64283. mov QWORD PTR [rsp+32], r11
  64284. ; A[0] * A[5]
  64285. mov rax, QWORD PTR [r8+40]
  64286. mul QWORD PTR [r8]
  64287. xor r11, r11
  64288. xor r15, r15
  64289. mov r13, rax
  64290. mov r14, rdx
  64291. ; A[1] * A[4]
  64292. mov rax, QWORD PTR [r8+32]
  64293. mul QWORD PTR [r8+8]
  64294. add r13, rax
  64295. adc r14, rdx
  64296. adc r15, 0
  64297. ; A[2] * A[3]
  64298. mov rax, QWORD PTR [r8+24]
  64299. mul QWORD PTR [r8+16]
  64300. add r13, rax
  64301. adc r14, rdx
  64302. adc r15, 0
  64303. add r13, r13
  64304. adc r14, r14
  64305. adc r15, r15
  64306. add r12, r13
  64307. adc r10, r14
  64308. adc r11, r15
  64309. mov QWORD PTR [rsp+40], r12
  64310. ; A[0] * A[6]
  64311. mov rax, QWORD PTR [r8+48]
  64312. mul QWORD PTR [r8]
  64313. xor r12, r12
  64314. xor r15, r15
  64315. mov r13, rax
  64316. mov r14, rdx
  64317. ; A[1] * A[5]
  64318. mov rax, QWORD PTR [r8+40]
  64319. mul QWORD PTR [r8+8]
  64320. add r13, rax
  64321. adc r14, rdx
  64322. adc r15, 0
  64323. ; A[2] * A[4]
  64324. mov rax, QWORD PTR [r8+32]
  64325. mul QWORD PTR [r8+16]
  64326. add r13, rax
  64327. adc r14, rdx
  64328. adc r15, 0
  64329. ; A[3] * A[3]
  64330. mov rax, QWORD PTR [r8+24]
  64331. mul rax
  64332. add r13, r13
  64333. adc r14, r14
  64334. adc r15, r15
  64335. add r13, rax
  64336. adc r14, rdx
  64337. adc r15, 0
  64338. add r10, r13
  64339. adc r11, r14
  64340. adc r12, r15
  64341. mov QWORD PTR [rsp+48], r10
  64342. ; A[0] * A[7]
  64343. mov rax, QWORD PTR [r8+56]
  64344. mul QWORD PTR [r8]
  64345. xor r10, r10
  64346. xor r15, r15
  64347. mov r13, rax
  64348. mov r14, rdx
  64349. ; A[1] * A[6]
  64350. mov rax, QWORD PTR [r8+48]
  64351. mul QWORD PTR [r8+8]
  64352. add r13, rax
  64353. adc r14, rdx
  64354. adc r15, 0
  64355. ; A[2] * A[5]
  64356. mov rax, QWORD PTR [r8+40]
  64357. mul QWORD PTR [r8+16]
  64358. add r13, rax
  64359. adc r14, rdx
  64360. adc r15, 0
  64361. ; A[3] * A[4]
  64362. mov rax, QWORD PTR [r8+32]
  64363. mul QWORD PTR [r8+24]
  64364. add r13, rax
  64365. adc r14, rdx
  64366. adc r15, 0
  64367. add r13, r13
  64368. adc r14, r14
  64369. adc r15, r15
  64370. add r11, r13
  64371. adc r12, r14
  64372. adc r10, r15
  64373. mov QWORD PTR [rsp+56], r11
  64374. ; A[0] * A[8]
  64375. mov rax, QWORD PTR [r8+64]
  64376. mul QWORD PTR [r8]
  64377. xor r11, r11
  64378. xor r15, r15
  64379. mov r13, rax
  64380. mov r14, rdx
  64381. ; A[1] * A[7]
  64382. mov rax, QWORD PTR [r8+56]
  64383. mul QWORD PTR [r8+8]
  64384. add r13, rax
  64385. adc r14, rdx
  64386. adc r15, 0
  64387. ; A[2] * A[6]
  64388. mov rax, QWORD PTR [r8+48]
  64389. mul QWORD PTR [r8+16]
  64390. add r13, rax
  64391. adc r14, rdx
  64392. adc r15, 0
  64393. ; A[3] * A[5]
  64394. mov rax, QWORD PTR [r8+40]
  64395. mul QWORD PTR [r8+24]
  64396. add r13, rax
  64397. adc r14, rdx
  64398. adc r15, 0
  64399. ; A[4] * A[4]
  64400. mov rax, QWORD PTR [r8+32]
  64401. mul rax
  64402. add r13, r13
  64403. adc r14, r14
  64404. adc r15, r15
  64405. add r13, rax
  64406. adc r14, rdx
  64407. adc r15, 0
  64408. add r12, r13
  64409. adc r10, r14
  64410. adc r11, r15
  64411. mov QWORD PTR [rsp+64], r12
  64412. ; A[1] * A[8]
  64413. mov rax, QWORD PTR [r8+64]
  64414. mul QWORD PTR [r8+8]
  64415. xor r12, r12
  64416. xor r15, r15
  64417. mov r13, rax
  64418. mov r14, rdx
  64419. ; A[2] * A[7]
  64420. mov rax, QWORD PTR [r8+56]
  64421. mul QWORD PTR [r8+16]
  64422. add r13, rax
  64423. adc r14, rdx
  64424. adc r15, 0
  64425. ; A[3] * A[6]
  64426. mov rax, QWORD PTR [r8+48]
  64427. mul QWORD PTR [r8+24]
  64428. add r13, rax
  64429. adc r14, rdx
  64430. adc r15, 0
  64431. ; A[4] * A[5]
  64432. mov rax, QWORD PTR [r8+40]
  64433. mul QWORD PTR [r8+32]
  64434. add r13, rax
  64435. adc r14, rdx
  64436. adc r15, 0
  64437. add r13, r13
  64438. adc r14, r14
  64439. adc r15, r15
  64440. add r10, r13
  64441. adc r11, r14
  64442. adc r12, r15
  64443. mov QWORD PTR [rsp+72], r10
  64444. ; A[2] * A[8]
  64445. mov rax, QWORD PTR [r8+64]
  64446. mul QWORD PTR [r8+16]
  64447. xor r10, r10
  64448. xor r15, r15
  64449. mov r13, rax
  64450. mov r14, rdx
  64451. ; A[3] * A[7]
  64452. mov rax, QWORD PTR [r8+56]
  64453. mul QWORD PTR [r8+24]
  64454. add r13, rax
  64455. adc r14, rdx
  64456. adc r15, 0
  64457. ; A[4] * A[6]
  64458. mov rax, QWORD PTR [r8+48]
  64459. mul QWORD PTR [r8+32]
  64460. add r13, rax
  64461. adc r14, rdx
  64462. adc r15, 0
  64463. ; A[5] * A[5]
  64464. mov rax, QWORD PTR [r8+40]
  64465. mul rax
  64466. add r13, r13
  64467. adc r14, r14
  64468. adc r15, r15
  64469. add r13, rax
  64470. adc r14, rdx
  64471. adc r15, 0
  64472. add r11, r13
  64473. adc r12, r14
  64474. adc r10, r15
  64475. mov QWORD PTR [rsp+80], r11
  64476. ; A[3] * A[8]
  64477. mov rax, QWORD PTR [r8+64]
  64478. mul QWORD PTR [r8+24]
  64479. xor r11, r11
  64480. xor r15, r15
  64481. mov r13, rax
  64482. mov r14, rdx
  64483. ; A[4] * A[7]
  64484. mov rax, QWORD PTR [r8+56]
  64485. mul QWORD PTR [r8+32]
  64486. add r13, rax
  64487. adc r14, rdx
  64488. adc r15, 0
  64489. ; A[5] * A[6]
  64490. mov rax, QWORD PTR [r8+48]
  64491. mul QWORD PTR [r8+40]
  64492. add r13, rax
  64493. adc r14, rdx
  64494. adc r15, 0
  64495. add r13, r13
  64496. adc r14, r14
  64497. adc r15, r15
  64498. add r12, r13
  64499. adc r10, r14
  64500. adc r11, r15
  64501. mov QWORD PTR [rsp+88], r12
  64502. ; A[4] * A[8]
  64503. mov rax, QWORD PTR [r8+64]
  64504. mul QWORD PTR [r8+32]
  64505. xor r12, r12
  64506. add r10, rax
  64507. adc r11, rdx
  64508. adc r12, 0
  64509. add r10, rax
  64510. adc r11, rdx
  64511. adc r12, 0
  64512. ; A[5] * A[7]
  64513. mov rax, QWORD PTR [r8+56]
  64514. mul QWORD PTR [r8+40]
  64515. add r10, rax
  64516. adc r11, rdx
  64517. adc r12, 0
  64518. add r10, rax
  64519. adc r11, rdx
  64520. adc r12, 0
  64521. ; A[6] * A[6]
  64522. mov rax, QWORD PTR [r8+48]
  64523. mul rax
  64524. add r10, rax
  64525. adc r11, rdx
  64526. adc r12, 0
  64527. mov QWORD PTR [rsp+96], r10
  64528. ; A[5] * A[8]
  64529. mov rax, QWORD PTR [r8+64]
  64530. mul QWORD PTR [r8+40]
  64531. xor r10, r10
  64532. add r11, rax
  64533. adc r12, rdx
  64534. adc r10, 0
  64535. add r11, rax
  64536. adc r12, rdx
  64537. adc r10, 0
  64538. ; A[6] * A[7]
  64539. mov rax, QWORD PTR [r8+56]
  64540. mul QWORD PTR [r8+48]
  64541. add r11, rax
  64542. adc r12, rdx
  64543. adc r10, 0
  64544. add r11, rax
  64545. adc r12, rdx
  64546. adc r10, 0
  64547. mov QWORD PTR [rsp+104], r11
  64548. ; A[6] * A[8]
  64549. mov rax, QWORD PTR [r8+64]
  64550. mul QWORD PTR [r8+48]
  64551. xor r11, r11
  64552. add r12, rax
  64553. adc r10, rdx
  64554. adc r11, 0
  64555. add r12, rax
  64556. adc r10, rdx
  64557. adc r11, 0
  64558. ; A[7] * A[7]
  64559. mov rax, QWORD PTR [r8+56]
  64560. mul rax
  64561. add r12, rax
  64562. adc r10, rdx
  64563. adc r11, 0
  64564. mov QWORD PTR [rsp+112], r12
  64565. ; A[7] * A[8]
  64566. mov rax, QWORD PTR [r8+64]
  64567. mul QWORD PTR [r8+56]
  64568. xor r12, r12
  64569. add r10, rax
  64570. adc r11, rdx
  64571. adc r12, 0
  64572. add r10, rax
  64573. adc r11, rdx
  64574. adc r12, 0
  64575. mov QWORD PTR [rsp+120], r10
  64576. ; A[8] * A[8]
  64577. mov rax, QWORD PTR [r8+64]
  64578. mul rax
  64579. add r11, rax
  64580. adc r12, rdx
  64581. mov QWORD PTR [rsp+128], r11
  64582. mov QWORD PTR [rsp+136], r12
  64583. mov r10, QWORD PTR [rsp+64]
  64584. mov r11, QWORD PTR [rsp+72]
  64585. mov r12, QWORD PTR [rsp+80]
  64586. mov r9, r10
  64587. and r9, 511
  64588. mov rax, QWORD PTR [rsp+88]
  64589. mov rdx, QWORD PTR [rsp+96]
  64590. mov r13, QWORD PTR [rsp+104]
  64591. mov r14, QWORD PTR [rsp+112]
  64592. mov r15, QWORD PTR [rsp+120]
  64593. mov r8, QWORD PTR [rsp+128]
  64594. shrd r10, r11, 9
  64595. shrd r11, r12, 9
  64596. shrd r12, rax, 9
  64597. shrd rax, rdx, 9
  64598. shrd rdx, r13, 9
  64599. shrd r13, r14, 9
  64600. shrd r14, r15, 9
  64601. shrd r15, r8, 9
  64602. shr r8, 9
  64603. add r10, QWORD PTR [rsp]
  64604. adc r11, QWORD PTR [rsp+8]
  64605. adc r12, QWORD PTR [rsp+16]
  64606. adc rax, QWORD PTR [rsp+24]
  64607. adc rdx, QWORD PTR [rsp+32]
  64608. adc r13, QWORD PTR [rsp+40]
  64609. adc r14, QWORD PTR [rsp+48]
  64610. adc r15, QWORD PTR [rsp+56]
  64611. adc r9, r8
  64612. mov r8, r9
  64613. shr r9, 9
  64614. and r8, 511
  64615. add r10, r9
  64616. adc r11, 0
  64617. adc r12, 0
  64618. adc rax, 0
  64619. adc rdx, 0
  64620. adc r13, 0
  64621. adc r14, 0
  64622. adc r15, 0
  64623. adc r8, 0
  64624. mov QWORD PTR [rcx], r10
  64625. mov QWORD PTR [rcx+8], r11
  64626. mov QWORD PTR [rcx+16], r12
  64627. mov QWORD PTR [rcx+24], rax
  64628. mov QWORD PTR [rcx+32], rdx
  64629. mov QWORD PTR [rcx+40], r13
  64630. mov QWORD PTR [rcx+48], r14
  64631. mov QWORD PTR [rcx+56], r15
  64632. mov QWORD PTR [rcx+64], r8
  64633. add rsp, 144
  64634. pop r15
  64635. pop r14
  64636. pop r13
  64637. pop r12
  64638. ret
  64639. sp_521_mont_sqr_9 ENDP
  64640. _text ENDS
  64641. ; /* Compare a with b in constant time.
  64642. ; *
  64643. ; * a A single precision integer.
  64644. ; * b A single precision integer.
  64645. ; * return -ve, 0 or +ve if a is less than, equal to or greater than b
  64646. ; * respectively.
  64647. ; */
  64648. _text SEGMENT READONLY PARA
  64649. sp_521_cmp_9 PROC
  64650. push r12
  64651. xor r9, r9
  64652. mov r8, -1
  64653. mov rax, -1
  64654. mov r10, 1
  64655. mov r11, QWORD PTR [rcx+64]
  64656. mov r12, QWORD PTR [rdx+64]
  64657. and r11, r8
  64658. and r12, r8
  64659. sub r11, r12
  64660. cmova rax, r10
  64661. cmovc rax, r8
  64662. cmovnz r8, r9
  64663. mov r11, QWORD PTR [rcx+56]
  64664. mov r12, QWORD PTR [rdx+56]
  64665. and r11, r8
  64666. and r12, r8
  64667. sub r11, r12
  64668. cmova rax, r10
  64669. cmovc rax, r8
  64670. cmovnz r8, r9
  64671. mov r11, QWORD PTR [rcx+48]
  64672. mov r12, QWORD PTR [rdx+48]
  64673. and r11, r8
  64674. and r12, r8
  64675. sub r11, r12
  64676. cmova rax, r10
  64677. cmovc rax, r8
  64678. cmovnz r8, r9
  64679. mov r11, QWORD PTR [rcx+40]
  64680. mov r12, QWORD PTR [rdx+40]
  64681. and r11, r8
  64682. and r12, r8
  64683. sub r11, r12
  64684. cmova rax, r10
  64685. cmovc rax, r8
  64686. cmovnz r8, r9
  64687. mov r11, QWORD PTR [rcx+32]
  64688. mov r12, QWORD PTR [rdx+32]
  64689. and r11, r8
  64690. and r12, r8
  64691. sub r11, r12
  64692. cmova rax, r10
  64693. cmovc rax, r8
  64694. cmovnz r8, r9
  64695. mov r11, QWORD PTR [rcx+24]
  64696. mov r12, QWORD PTR [rdx+24]
  64697. and r11, r8
  64698. and r12, r8
  64699. sub r11, r12
  64700. cmova rax, r10
  64701. cmovc rax, r8
  64702. cmovnz r8, r9
  64703. mov r11, QWORD PTR [rcx+16]
  64704. mov r12, QWORD PTR [rdx+16]
  64705. and r11, r8
  64706. and r12, r8
  64707. sub r11, r12
  64708. cmova rax, r10
  64709. cmovc rax, r8
  64710. cmovnz r8, r9
  64711. mov r11, QWORD PTR [rcx+8]
  64712. mov r12, QWORD PTR [rdx+8]
  64713. and r11, r8
  64714. and r12, r8
  64715. sub r11, r12
  64716. cmova rax, r10
  64717. cmovc rax, r8
  64718. cmovnz r8, r9
  64719. mov r11, QWORD PTR [rcx]
  64720. mov r12, QWORD PTR [rdx]
  64721. and r11, r8
  64722. and r12, r8
  64723. sub r11, r12
  64724. cmova rax, r10
  64725. cmovc rax, r8
  64726. cmovnz r8, r9
  64727. xor rax, r8
  64728. pop r12
  64729. ret
  64730. sp_521_cmp_9 ENDP
  64731. _text ENDS
  64732. ; /* Conditionally subtract b from a using the mask m.
  64733. ; * m is -1 to subtract and 0 when not copying.
  64734. ; *
  64735. ; * r A single precision number representing condition subtract result.
  64736. ; * a A single precision number to subtract from.
  64737. ; * b A single precision number to subtract.
  64738. ; * m Mask value to apply.
  64739. ; */
  64740. _text SEGMENT READONLY PARA
  64741. sp_521_cond_sub_9 PROC
  64742. sub rsp, 72
  64743. mov r10, QWORD PTR [r8]
  64744. mov r11, QWORD PTR [r8+8]
  64745. and r10, r9
  64746. and r11, r9
  64747. mov QWORD PTR [rsp], r10
  64748. mov QWORD PTR [rsp+8], r11
  64749. mov r10, QWORD PTR [r8+16]
  64750. mov r11, QWORD PTR [r8+24]
  64751. and r10, r9
  64752. and r11, r9
  64753. mov QWORD PTR [rsp+16], r10
  64754. mov QWORD PTR [rsp+24], r11
  64755. mov r10, QWORD PTR [r8+32]
  64756. mov r11, QWORD PTR [r8+40]
  64757. and r10, r9
  64758. and r11, r9
  64759. mov QWORD PTR [rsp+32], r10
  64760. mov QWORD PTR [rsp+40], r11
  64761. mov r10, QWORD PTR [r8+48]
  64762. mov r11, QWORD PTR [r8+56]
  64763. and r10, r9
  64764. and r11, r9
  64765. mov QWORD PTR [rsp+48], r10
  64766. mov QWORD PTR [rsp+56], r11
  64767. mov r10, QWORD PTR [r8+64]
  64768. and r10, r9
  64769. mov QWORD PTR [rsp+64], r10
  64770. mov r10, QWORD PTR [rdx]
  64771. mov r8, QWORD PTR [rsp]
  64772. sub r10, r8
  64773. mov r11, QWORD PTR [rdx+8]
  64774. mov r8, QWORD PTR [rsp+8]
  64775. sbb r11, r8
  64776. mov QWORD PTR [rcx], r10
  64777. mov r10, QWORD PTR [rdx+16]
  64778. mov r8, QWORD PTR [rsp+16]
  64779. sbb r10, r8
  64780. mov QWORD PTR [rcx+8], r11
  64781. mov r11, QWORD PTR [rdx+24]
  64782. mov r8, QWORD PTR [rsp+24]
  64783. sbb r11, r8
  64784. mov QWORD PTR [rcx+16], r10
  64785. mov r10, QWORD PTR [rdx+32]
  64786. mov r8, QWORD PTR [rsp+32]
  64787. sbb r10, r8
  64788. mov QWORD PTR [rcx+24], r11
  64789. mov r11, QWORD PTR [rdx+40]
  64790. mov r8, QWORD PTR [rsp+40]
  64791. sbb r11, r8
  64792. mov QWORD PTR [rcx+32], r10
  64793. mov r10, QWORD PTR [rdx+48]
  64794. mov r8, QWORD PTR [rsp+48]
  64795. sbb r10, r8
  64796. mov QWORD PTR [rcx+40], r11
  64797. mov r11, QWORD PTR [rdx+56]
  64798. mov r8, QWORD PTR [rsp+56]
  64799. sbb r11, r8
  64800. mov QWORD PTR [rcx+48], r10
  64801. mov r10, QWORD PTR [rdx+64]
  64802. mov r8, QWORD PTR [rsp+64]
  64803. sbb r10, r8
  64804. mov QWORD PTR [rcx+56], r11
  64805. mov QWORD PTR [rcx+64], r10
  64806. sbb rax, rax
  64807. add rsp, 72
  64808. ret
  64809. sp_521_cond_sub_9 ENDP
  64810. _text ENDS
  64811. ; /* Reduce the number back to 521 bits using Montgomery reduction.
  64812. ; *
  64813. ; * a A single precision number to reduce in place.
  64814. ; * m The single precision number representing the modulus.
  64815. ; * mp The digit representing the negative inverse of m mod 2^n.
  64816. ; */
  64817. _text SEGMENT READONLY PARA
  64818. sp_521_mont_reduce_9 PROC
  64819. push r12
  64820. push r13
  64821. push r14
  64822. push r15
  64823. mov rdx, QWORD PTR [rcx+64]
  64824. mov rax, QWORD PTR [rcx+72]
  64825. mov r8, QWORD PTR [rcx+80]
  64826. mov r15, rdx
  64827. and r15, 511
  64828. mov r9, QWORD PTR [rcx+88]
  64829. mov r10, QWORD PTR [rcx+96]
  64830. mov r11, QWORD PTR [rcx+104]
  64831. mov r12, QWORD PTR [rcx+112]
  64832. mov r13, QWORD PTR [rcx+120]
  64833. mov r14, QWORD PTR [rcx+128]
  64834. shrd rdx, rax, 9
  64835. shrd rax, r8, 9
  64836. shrd r8, r9, 9
  64837. shrd r9, r10, 9
  64838. shrd r10, r11, 9
  64839. shrd r11, r12, 9
  64840. shrd r12, r13, 9
  64841. shrd r13, r14, 9
  64842. shr r14, 9
  64843. add rdx, QWORD PTR [rcx]
  64844. adc rax, QWORD PTR [rcx+8]
  64845. adc r8, QWORD PTR [rcx+16]
  64846. adc r9, QWORD PTR [rcx+24]
  64847. adc r10, QWORD PTR [rcx+32]
  64848. adc r11, QWORD PTR [rcx+40]
  64849. adc r12, QWORD PTR [rcx+48]
  64850. adc r13, QWORD PTR [rcx+56]
  64851. adc r15, r14
  64852. mov r14, r15
  64853. shr r15, 9
  64854. and r14, 511
  64855. add rdx, r15
  64856. adc rax, 0
  64857. adc r8, 0
  64858. adc r9, 0
  64859. adc r10, 0
  64860. adc r11, 0
  64861. adc r12, 0
  64862. adc r13, 0
  64863. adc r14, 0
  64864. mov QWORD PTR [rcx], rdx
  64865. mov QWORD PTR [rcx+8], rax
  64866. mov QWORD PTR [rcx+16], r8
  64867. mov QWORD PTR [rcx+24], r9
  64868. mov QWORD PTR [rcx+32], r10
  64869. mov QWORD PTR [rcx+40], r11
  64870. mov QWORD PTR [rcx+48], r12
  64871. mov QWORD PTR [rcx+56], r13
  64872. mov QWORD PTR [rcx+64], r14
  64873. pop r15
  64874. pop r14
  64875. pop r13
  64876. pop r12
  64877. ret
  64878. sp_521_mont_reduce_9 ENDP
  64879. _text ENDS
  64880. ; /* Reduce the number back to 521 bits using Montgomery reduction.
  64881. ; *
  64882. ; * a A single precision number to reduce in place.
  64883. ; * m The single precision number representing the modulus.
  64884. ; * mp The digit representing the negative inverse of m mod 2^n.
  64885. ; */
  64886. _text SEGMENT READONLY PARA
  64887. sp_521_mont_reduce_order_9 PROC
  64888. push r12
  64889. push r13
  64890. push r14
  64891. push r15
  64892. push rdi
  64893. push rsi
  64894. mov r9, rdx
  64895. xor rsi, rsi
  64896. ; i = 9
  64897. mov r10, 9
  64898. mov r15, QWORD PTR [rcx]
  64899. mov rdi, QWORD PTR [rcx+8]
  64900. L_521_mont_reduce_order_9_loop:
  64901. ; mu = a[i] * mp
  64902. mov r13, r15
  64903. imul r13, r8
  64904. cmp r10, 1
  64905. jne L_521_mont_reduce_order_9_nomask
  64906. and r13, 511
  64907. L_521_mont_reduce_order_9_nomask:
  64908. ; a[i+0] += m[0] * mu
  64909. mov rax, r13
  64910. xor r12, r12
  64911. mul QWORD PTR [r9]
  64912. add r15, rax
  64913. mov QWORD PTR [rcx], r15
  64914. adc r12, rdx
  64915. ; a[i+1] += m[1] * mu
  64916. mov rax, r13
  64917. xor r11, r11
  64918. mul QWORD PTR [r9+8]
  64919. mov r15, rdi
  64920. add r15, rax
  64921. adc r11, rdx
  64922. add r15, r12
  64923. adc r11, 0
  64924. ; a[i+2] += m[2] * mu
  64925. mov rax, r13
  64926. xor r12, r12
  64927. mul QWORD PTR [r9+16]
  64928. mov rdi, QWORD PTR [rcx+16]
  64929. add rdi, rax
  64930. adc r12, rdx
  64931. add rdi, r11
  64932. adc r12, 0
  64933. ; a[i+3] += m[3] * mu
  64934. mov rax, r13
  64935. xor r11, r11
  64936. mul QWORD PTR [r9+24]
  64937. mov r14, QWORD PTR [rcx+24]
  64938. add r14, rax
  64939. adc r11, rdx
  64940. add r14, r12
  64941. mov QWORD PTR [rcx+24], r14
  64942. adc r11, 0
  64943. ; a[i+4] += m[4] * mu
  64944. mov rax, r13
  64945. xor r12, r12
  64946. mul QWORD PTR [r9+32]
  64947. mov r14, QWORD PTR [rcx+32]
  64948. add r14, rax
  64949. adc r12, rdx
  64950. add r14, r11
  64951. mov QWORD PTR [rcx+32], r14
  64952. adc r12, 0
  64953. ; a[i+5] += m[5] * mu
  64954. mov rax, r13
  64955. xor r11, r11
  64956. mul QWORD PTR [r9+40]
  64957. mov r14, QWORD PTR [rcx+40]
  64958. add r14, rax
  64959. adc r11, rdx
  64960. add r14, r12
  64961. mov QWORD PTR [rcx+40], r14
  64962. adc r11, 0
  64963. ; a[i+6] += m[6] * mu
  64964. mov rax, r13
  64965. xor r12, r12
  64966. mul QWORD PTR [r9+48]
  64967. mov r14, QWORD PTR [rcx+48]
  64968. add r14, rax
  64969. adc r12, rdx
  64970. add r14, r11
  64971. mov QWORD PTR [rcx+48], r14
  64972. adc r12, 0
  64973. ; a[i+7] += m[7] * mu
  64974. mov rax, r13
  64975. xor r11, r11
  64976. mul QWORD PTR [r9+56]
  64977. mov r14, QWORD PTR [rcx+56]
  64978. add r14, rax
  64979. adc r11, rdx
  64980. add r14, r12
  64981. mov QWORD PTR [rcx+56], r14
  64982. adc r11, 0
  64983. ; a[i+8] += m[8] * mu
  64984. mov rax, r13
  64985. mul QWORD PTR [r9+64]
  64986. mov r14, QWORD PTR [rcx+64]
  64987. add r11, rax
  64988. adc rdx, rsi
  64989. mov rsi, 0
  64990. adc rsi, 0
  64991. add r14, r11
  64992. mov QWORD PTR [rcx+64], r14
  64993. adc QWORD PTR [rcx+72], rdx
  64994. adc rsi, 0
  64995. ; i -= 1
  64996. add rcx, 8
  64997. dec r10
  64998. jnz L_521_mont_reduce_order_9_loop
  64999. mov QWORD PTR [rcx], r15
  65000. mov QWORD PTR [rcx+8], rdi
  65001. mov r8, rcx
  65002. sub rcx, 72
  65003. sub r8, 8
  65004. mov rax, QWORD PTR [r8]
  65005. mov rdx, QWORD PTR [r8+8]
  65006. mov r10, QWORD PTR [r8+16]
  65007. mov r11, QWORD PTR [r8+24]
  65008. mov r13, QWORD PTR [r8+32]
  65009. shrd rax, rdx, 9
  65010. shrd rdx, r10, 9
  65011. shrd r10, r11, 9
  65012. shrd r11, r13, 9
  65013. mov QWORD PTR [rcx], rax
  65014. mov QWORD PTR [rcx+8], rdx
  65015. mov QWORD PTR [rcx+16], r10
  65016. mov QWORD PTR [rcx+24], r11
  65017. mov rdx, QWORD PTR [r8+40]
  65018. mov r10, QWORD PTR [r8+48]
  65019. mov r11, QWORD PTR [r8+56]
  65020. mov rax, QWORD PTR [r8+64]
  65021. shrd r13, rdx, 9
  65022. shrd rdx, r10, 9
  65023. shrd r10, r11, 9
  65024. shrd r11, rax, 9
  65025. mov QWORD PTR [rcx+32], r13
  65026. mov QWORD PTR [rcx+40], rdx
  65027. mov QWORD PTR [rcx+48], r10
  65028. mov QWORD PTR [rcx+56], r11
  65029. mov rdx, QWORD PTR [r8+72]
  65030. shrd rax, rdx, 9
  65031. shr rdx, 9
  65032. mov QWORD PTR [rcx+64], rax
  65033. mov QWORD PTR [rcx+72], rdx
  65034. mov rsi, QWORD PTR [rcx+64]
  65035. shr rsi, 9
  65036. neg rsi
  65037. IFDEF _WIN64
  65038. mov r8, r9
  65039. mov r9, rsi
  65040. ELSE
  65041. mov r9, rsi
  65042. mov r8, r9
  65043. ENDIF
  65044. mov rdx, rcx
  65045. call sp_521_cond_sub_9
  65046. pop rsi
  65047. pop rdi
  65048. pop r15
  65049. pop r14
  65050. pop r13
  65051. pop r12
  65052. ret
  65053. sp_521_mont_reduce_order_9 ENDP
  65054. _text ENDS
  65055. ; /* Add two Montgomery form numbers (r = a + b % m).
  65056. ; *
  65057. ; * r Result of addition.
  65058. ; * a First number to add in Montgomery form.
  65059. ; * b Second number to add in Montgomery form.
  65060. ; * m Modulus (prime).
  65061. ; */
  65062. _text SEGMENT READONLY PARA
  65063. sp_521_mont_add_9 PROC
  65064. push r12
  65065. push r13
  65066. push r14
  65067. push r15
  65068. push rdi
  65069. push rsi
  65070. mov rax, QWORD PTR [rdx]
  65071. mov r9, QWORD PTR [rdx+8]
  65072. mov r10, QWORD PTR [rdx+16]
  65073. mov r11, QWORD PTR [rdx+24]
  65074. mov r12, QWORD PTR [rdx+32]
  65075. mov r13, QWORD PTR [rdx+40]
  65076. mov r14, QWORD PTR [rdx+48]
  65077. mov r15, QWORD PTR [rdx+56]
  65078. mov rdi, QWORD PTR [rdx+64]
  65079. add rax, QWORD PTR [r8]
  65080. adc r9, QWORD PTR [r8+8]
  65081. adc r10, QWORD PTR [r8+16]
  65082. adc r11, QWORD PTR [r8+24]
  65083. adc r12, QWORD PTR [r8+32]
  65084. adc r13, QWORD PTR [r8+40]
  65085. adc r14, QWORD PTR [r8+48]
  65086. adc r15, QWORD PTR [r8+56]
  65087. adc rdi, QWORD PTR [r8+64]
  65088. mov rsi, rdi
  65089. and rdi, 511
  65090. shr rsi, 9
  65091. add rax, rsi
  65092. adc r9, 0
  65093. adc r10, 0
  65094. adc r11, 0
  65095. adc r12, 0
  65096. adc r13, 0
  65097. adc r14, 0
  65098. adc r15, 0
  65099. adc rdi, 0
  65100. mov QWORD PTR [rcx], rax
  65101. mov QWORD PTR [rcx+8], r9
  65102. mov QWORD PTR [rcx+16], r10
  65103. mov QWORD PTR [rcx+24], r11
  65104. mov QWORD PTR [rcx+32], r12
  65105. mov QWORD PTR [rcx+40], r13
  65106. mov QWORD PTR [rcx+48], r14
  65107. mov QWORD PTR [rcx+56], r15
  65108. mov QWORD PTR [rcx+64], rdi
  65109. pop rsi
  65110. pop rdi
  65111. pop r15
  65112. pop r14
  65113. pop r13
  65114. pop r12
  65115. ret
  65116. sp_521_mont_add_9 ENDP
  65117. _text ENDS
  65118. ; /* Double a Montgomery form number (r = a + a % m).
  65119. ; *
  65120. ; * r Result of addition.
  65121. ; * a Number to souble in Montgomery form.
  65122. ; * m Modulus (prime).
  65123. ; */
  65124. _text SEGMENT READONLY PARA
  65125. sp_521_mont_dbl_9 PROC
  65126. push r12
  65127. push r13
  65128. push r14
  65129. push r15
  65130. push rdi
  65131. mov rax, QWORD PTR [rdx]
  65132. mov r8, QWORD PTR [rdx+8]
  65133. mov r9, QWORD PTR [rdx+16]
  65134. mov r10, QWORD PTR [rdx+24]
  65135. mov r11, QWORD PTR [rdx+32]
  65136. mov r12, QWORD PTR [rdx+40]
  65137. mov r13, QWORD PTR [rdx+48]
  65138. mov r14, QWORD PTR [rdx+56]
  65139. mov r15, QWORD PTR [rdx+64]
  65140. add rax, rax
  65141. adc r8, r8
  65142. adc r9, r9
  65143. adc r10, r10
  65144. adc r11, r11
  65145. adc r12, r12
  65146. adc r13, r13
  65147. adc r14, r14
  65148. adc r15, r15
  65149. mov rdi, r15
  65150. and r15, 511
  65151. shr rdi, 9
  65152. add rax, rdi
  65153. adc r8, 0
  65154. adc r9, 0
  65155. adc r10, 0
  65156. adc r11, 0
  65157. adc r12, 0
  65158. adc r13, 0
  65159. adc r14, 0
  65160. adc r15, 0
  65161. mov QWORD PTR [rcx], rax
  65162. mov QWORD PTR [rcx+8], r8
  65163. mov QWORD PTR [rcx+16], r9
  65164. mov QWORD PTR [rcx+24], r10
  65165. mov QWORD PTR [rcx+32], r11
  65166. mov QWORD PTR [rcx+40], r12
  65167. mov QWORD PTR [rcx+48], r13
  65168. mov QWORD PTR [rcx+56], r14
  65169. mov QWORD PTR [rcx+64], r15
  65170. pop rdi
  65171. pop r15
  65172. pop r14
  65173. pop r13
  65174. pop r12
  65175. ret
  65176. sp_521_mont_dbl_9 ENDP
  65177. _text ENDS
  65178. ; /* Triple a Montgomery form number (r = a + a + a % m).
  65179. ; *
  65180. ; * r Result of Tripling.
  65181. ; * a Number to triple in Montgomery form.
  65182. ; * m Modulus (prime).
  65183. ; */
  65184. _text SEGMENT READONLY PARA
  65185. sp_521_mont_tpl_9 PROC
  65186. push r12
  65187. push r13
  65188. push r14
  65189. push r15
  65190. push rdi
  65191. mov rax, QWORD PTR [rdx]
  65192. mov r8, QWORD PTR [rdx+8]
  65193. mov r9, QWORD PTR [rdx+16]
  65194. mov r10, QWORD PTR [rdx+24]
  65195. mov r11, QWORD PTR [rdx+32]
  65196. mov r12, QWORD PTR [rdx+40]
  65197. mov r13, QWORD PTR [rdx+48]
  65198. mov r14, QWORD PTR [rdx+56]
  65199. mov r15, QWORD PTR [rdx+64]
  65200. add rax, rax
  65201. adc r8, r8
  65202. adc r9, r9
  65203. adc r10, r10
  65204. adc r11, r11
  65205. adc r12, r12
  65206. adc r13, r13
  65207. adc r14, r14
  65208. adc r15, r15
  65209. add rax, QWORD PTR [rdx]
  65210. adc r8, QWORD PTR [rdx+8]
  65211. adc r9, QWORD PTR [rdx+16]
  65212. adc r10, QWORD PTR [rdx+24]
  65213. adc r11, QWORD PTR [rdx+32]
  65214. adc r12, QWORD PTR [rdx+40]
  65215. adc r13, QWORD PTR [rdx+48]
  65216. adc r14, QWORD PTR [rdx+56]
  65217. adc r15, QWORD PTR [rdx+64]
  65218. mov rdi, r15
  65219. and r15, 511
  65220. shr rdi, 9
  65221. add rax, rdi
  65222. adc r8, 0
  65223. adc r9, 0
  65224. adc r10, 0
  65225. adc r11, 0
  65226. adc r12, 0
  65227. adc r13, 0
  65228. adc r14, 0
  65229. adc r15, 0
  65230. mov QWORD PTR [rcx], rax
  65231. mov QWORD PTR [rcx+8], r8
  65232. mov QWORD PTR [rcx+16], r9
  65233. mov QWORD PTR [rcx+24], r10
  65234. mov QWORD PTR [rcx+32], r11
  65235. mov QWORD PTR [rcx+40], r12
  65236. mov QWORD PTR [rcx+48], r13
  65237. mov QWORD PTR [rcx+56], r14
  65238. mov QWORD PTR [rcx+64], r15
  65239. pop rdi
  65240. pop r15
  65241. pop r14
  65242. pop r13
  65243. pop r12
  65244. ret
  65245. sp_521_mont_tpl_9 ENDP
  65246. _text ENDS
  65247. ; /* Subtract two Montgomery form numbers (r = a - b % m).
  65248. ; *
  65249. ; * r Result of addition.
  65250. ; * a First number to add in Montgomery form.
  65251. ; * b Second number to add in Montgomery form.
  65252. ; * m Modulus (prime).
  65253. ; */
  65254. _text SEGMENT READONLY PARA
  65255. sp_521_mont_sub_9 PROC
  65256. push r12
  65257. push r13
  65258. push r14
  65259. push r15
  65260. push rdi
  65261. push rsi
  65262. mov rax, QWORD PTR [rdx]
  65263. mov r9, QWORD PTR [rdx+8]
  65264. mov r10, QWORD PTR [rdx+16]
  65265. mov r11, QWORD PTR [rdx+24]
  65266. mov r12, QWORD PTR [rdx+32]
  65267. mov r13, QWORD PTR [rdx+40]
  65268. mov r14, QWORD PTR [rdx+48]
  65269. mov r15, QWORD PTR [rdx+56]
  65270. mov rdi, QWORD PTR [rdx+64]
  65271. sub rax, QWORD PTR [r8]
  65272. sbb r9, QWORD PTR [r8+8]
  65273. sbb r10, QWORD PTR [r8+16]
  65274. sbb r11, QWORD PTR [r8+24]
  65275. sbb r12, QWORD PTR [r8+32]
  65276. sbb r13, QWORD PTR [r8+40]
  65277. sbb r14, QWORD PTR [r8+48]
  65278. sbb r15, QWORD PTR [r8+56]
  65279. sbb rdi, QWORD PTR [r8+64]
  65280. mov rsi, rdi
  65281. and rdi, 511
  65282. sar rsi, 9
  65283. neg rsi
  65284. sub rax, rsi
  65285. sbb r9, 0
  65286. sbb r10, 0
  65287. sbb r11, 0
  65288. sbb r12, 0
  65289. sbb r13, 0
  65290. sbb r14, 0
  65291. sbb r15, 0
  65292. sbb rdi, 0
  65293. mov QWORD PTR [rcx], rax
  65294. mov QWORD PTR [rcx+8], r9
  65295. mov QWORD PTR [rcx+16], r10
  65296. mov QWORD PTR [rcx+24], r11
  65297. mov QWORD PTR [rcx+32], r12
  65298. mov QWORD PTR [rcx+40], r13
  65299. mov QWORD PTR [rcx+48], r14
  65300. mov QWORD PTR [rcx+56], r15
  65301. mov QWORD PTR [rcx+64], rdi
  65302. pop rsi
  65303. pop rdi
  65304. pop r15
  65305. pop r14
  65306. pop r13
  65307. pop r12
  65308. ret
  65309. sp_521_mont_sub_9 ENDP
  65310. _text ENDS
  65311. ; /* Divide the number by 2 mod the modulus (prime). (r = a / 2 % m)
  65312. ; *
  65313. ; * r Result of division by 2.
  65314. ; * a Number to divide.
  65315. ; * m Modulus (prime).
  65316. ; */
  65317. _text SEGMENT READONLY PARA
  65318. sp_521_div2_9 PROC
  65319. push r12
  65320. push r13
  65321. push r14
  65322. push r15
  65323. push rdi
  65324. mov rax, QWORD PTR [rdx]
  65325. mov r8, QWORD PTR [rdx+8]
  65326. mov r9, QWORD PTR [rdx+16]
  65327. mov r10, QWORD PTR [rdx+24]
  65328. mov r11, QWORD PTR [rdx+32]
  65329. mov r12, QWORD PTR [rdx+40]
  65330. mov r13, QWORD PTR [rdx+48]
  65331. mov r14, QWORD PTR [rdx+56]
  65332. mov r15, QWORD PTR [rdx+64]
  65333. mov rdi, rax
  65334. and rdi, 1
  65335. sub rax, rdi
  65336. sbb r8, 0
  65337. sbb r9, 0
  65338. sbb r10, 0
  65339. sbb r11, 0
  65340. sbb r12, 0
  65341. sbb r13, 0
  65342. sbb r14, 0
  65343. sbb r15, 0
  65344. shl rdi, 9
  65345. add r15, rdi
  65346. shrd rax, r8, 1
  65347. shrd r8, r9, 1
  65348. shrd r9, r10, 1
  65349. shrd r10, r11, 1
  65350. shrd r11, r12, 1
  65351. shrd r12, r13, 1
  65352. shrd r13, r14, 1
  65353. shrd r14, r15, 1
  65354. shr r15, 1
  65355. mov QWORD PTR [rcx], rax
  65356. mov QWORD PTR [rcx+8], r8
  65357. mov QWORD PTR [rcx+16], r9
  65358. mov QWORD PTR [rcx+24], r10
  65359. mov QWORD PTR [rcx+32], r11
  65360. mov QWORD PTR [rcx+40], r12
  65361. mov QWORD PTR [rcx+48], r13
  65362. mov QWORD PTR [rcx+56], r14
  65363. mov QWORD PTR [rcx+64], r15
  65364. pop rdi
  65365. pop r15
  65366. pop r14
  65367. pop r13
  65368. pop r12
  65369. ret
  65370. sp_521_div2_9 ENDP
  65371. _text ENDS
  65372. IFNDEF WC_NO_CACHE_RESISTANT
  65373. ; /* Touch each possible point that could be being copied.
  65374. ; *
  65375. ; * r Point to copy into.
  65376. ; * table Table - start of the entires to access
  65377. ; * idx Index of point to retrieve.
  65378. ; */
  65379. _text SEGMENT READONLY PARA
  65380. sp_521_get_point_33_9 PROC
  65381. push r12
  65382. push r13
  65383. push r14
  65384. sub rsp, 160
  65385. vmovdqu OWORD PTR [rsp], xmm6
  65386. vmovdqu OWORD PTR [rsp+16], xmm7
  65387. vmovdqu OWORD PTR [rsp+32], xmm8
  65388. vmovdqu OWORD PTR [rsp+48], xmm9
  65389. vmovdqu OWORD PTR [rsp+64], xmm10
  65390. vmovdqu OWORD PTR [rsp+80], xmm11
  65391. vmovdqu OWORD PTR [rsp+96], xmm12
  65392. vmovdqu OWORD PTR [rsp+112], xmm13
  65393. vmovdqu OWORD PTR [rsp+128], xmm14
  65394. vmovdqu OWORD PTR [rsp+144], xmm15
  65395. mov r14, 1
  65396. mov rax, 1
  65397. movd xmm13, r8d
  65398. add rdx, 440
  65399. movd xmm15, eax
  65400. mov rax, 32
  65401. pshufd xmm15, xmm15, 0
  65402. pshufd xmm13, xmm13, 0
  65403. pxor xmm14, xmm14
  65404. pxor xmm0, xmm0
  65405. pxor xmm1, xmm1
  65406. pxor xmm2, xmm2
  65407. pxor xmm3, xmm3
  65408. pxor xmm4, xmm4
  65409. pxor xmm5, xmm5
  65410. xor r12, r12
  65411. xor r13, r13
  65412. movdqa xmm14, xmm15
  65413. L_521_get_point_33_9_start_1:
  65414. movdqa xmm12, xmm14
  65415. paddd xmm14, xmm15
  65416. pcmpeqd xmm12, xmm13
  65417. xor r9, r9
  65418. cmp r8, r14
  65419. sete r9b
  65420. neg r9
  65421. inc r14
  65422. movdqu xmm6, [rdx]
  65423. movdqu xmm7, [rdx+16]
  65424. movdqu xmm8, [rdx+32]
  65425. movdqu xmm9, [rdx+48]
  65426. mov r10, QWORD PTR [rdx+64]
  65427. movdqu xmm10, [rdx+144]
  65428. movdqu xmm11, [rdx+160]
  65429. add rdx, 440
  65430. pand xmm6, xmm12
  65431. pand xmm7, xmm12
  65432. pand xmm8, xmm12
  65433. pand xmm9, xmm12
  65434. pand xmm10, xmm12
  65435. pand xmm11, xmm12
  65436. and r10, r9
  65437. por xmm0, xmm6
  65438. por xmm1, xmm7
  65439. por xmm2, xmm8
  65440. por xmm3, xmm9
  65441. por xmm4, xmm10
  65442. por xmm5, xmm11
  65443. or r12, r10
  65444. dec rax
  65445. jnz L_521_get_point_33_9_start_1
  65446. movdqu [rcx], xmm0
  65447. movdqu [rcx+16], xmm1
  65448. movdqu [rcx+32], xmm2
  65449. movdqu [rcx+48], xmm3
  65450. mov QWORD PTR [rcx+64], r12
  65451. movdqu [rcx+144], xmm4
  65452. movdqu [rcx+160], xmm5
  65453. mov r14, 1
  65454. mov rax, 1
  65455. movd xmm13, r8d
  65456. sub rdx, 14080
  65457. movd xmm15, eax
  65458. mov rax, 32
  65459. pshufd xmm15, xmm15, 0
  65460. pshufd xmm13, xmm13, 0
  65461. pxor xmm14, xmm14
  65462. pxor xmm0, xmm0
  65463. pxor xmm1, xmm1
  65464. pxor xmm2, xmm2
  65465. pxor xmm3, xmm3
  65466. pxor xmm4, xmm4
  65467. pxor xmm5, xmm5
  65468. xor r12, r12
  65469. xor r13, r13
  65470. movdqa xmm14, xmm15
  65471. L_521_get_point_33_9_start_2:
  65472. movdqa xmm12, xmm14
  65473. paddd xmm14, xmm15
  65474. pcmpeqd xmm12, xmm13
  65475. xor r9, r9
  65476. cmp r8, r14
  65477. sete r9b
  65478. neg r9
  65479. inc r14
  65480. movdqu xmm6, [rdx+176]
  65481. movdqu xmm7, [rdx+192]
  65482. mov r10, QWORD PTR [rdx+208]
  65483. movdqu xmm8, [rdx+288]
  65484. movdqu xmm9, [rdx+304]
  65485. movdqu xmm10, [rdx+320]
  65486. movdqu xmm11, [rdx+336]
  65487. mov r11, QWORD PTR [rdx+352]
  65488. add rdx, 440
  65489. pand xmm6, xmm12
  65490. pand xmm7, xmm12
  65491. pand xmm8, xmm12
  65492. pand xmm9, xmm12
  65493. pand xmm10, xmm12
  65494. pand xmm11, xmm12
  65495. and r10, r9
  65496. and r11, r9
  65497. por xmm0, xmm6
  65498. por xmm1, xmm7
  65499. por xmm2, xmm8
  65500. por xmm3, xmm9
  65501. por xmm4, xmm10
  65502. por xmm5, xmm11
  65503. or r12, r10
  65504. or r13, r11
  65505. dec rax
  65506. jnz L_521_get_point_33_9_start_2
  65507. movdqu [rcx+176], xmm0
  65508. movdqu [rcx+192], xmm1
  65509. mov QWORD PTR [rcx+208], r12
  65510. movdqu [rcx+288], xmm2
  65511. movdqu [rcx+304], xmm3
  65512. movdqu [rcx+320], xmm4
  65513. movdqu [rcx+336], xmm5
  65514. mov QWORD PTR [rcx+352], r13
  65515. vmovdqu xmm6, OWORD PTR [rsp]
  65516. vmovdqu xmm7, OWORD PTR [rsp+16]
  65517. vmovdqu xmm8, OWORD PTR [rsp+32]
  65518. vmovdqu xmm9, OWORD PTR [rsp+48]
  65519. vmovdqu xmm10, OWORD PTR [rsp+64]
  65520. vmovdqu xmm11, OWORD PTR [rsp+80]
  65521. vmovdqu xmm12, OWORD PTR [rsp+96]
  65522. vmovdqu xmm13, OWORD PTR [rsp+112]
  65523. vmovdqu xmm14, OWORD PTR [rsp+128]
  65524. vmovdqu xmm15, OWORD PTR [rsp+144]
  65525. add rsp, 160
  65526. pop r14
  65527. pop r13
  65528. pop r12
  65529. ret
  65530. sp_521_get_point_33_9 ENDP
  65531. _text ENDS
  65532. IFDEF HAVE_INTEL_AVX2
  65533. ; /* Touch each possible point that could be being copied.
  65534. ; *
  65535. ; * r Point to copy into.
  65536. ; * table Table - start of the entires to access
  65537. ; * idx Index of point to retrieve.
  65538. ; */
  65539. _text SEGMENT READONLY PARA
  65540. sp_521_get_point_33_avx2_9 PROC
  65541. push r12
  65542. push r13
  65543. push r14
  65544. push r15
  65545. push rdi
  65546. sub rsp, 160
  65547. vmovdqu OWORD PTR [rsp], xmm6
  65548. vmovdqu OWORD PTR [rsp+16], xmm7
  65549. vmovdqu OWORD PTR [rsp+32], xmm8
  65550. vmovdqu OWORD PTR [rsp+48], xmm9
  65551. vmovdqu OWORD PTR [rsp+64], xmm10
  65552. vmovdqu OWORD PTR [rsp+80], xmm11
  65553. vmovdqu OWORD PTR [rsp+96], xmm12
  65554. vmovdqu OWORD PTR [rsp+112], xmm13
  65555. vmovdqu OWORD PTR [rsp+128], xmm14
  65556. vmovdqu OWORD PTR [rsp+144], xmm15
  65557. mov rdi, 1
  65558. mov rax, 1
  65559. movd xmm13, r8d
  65560. add rdx, 440
  65561. movd xmm15, eax
  65562. mov rax, 32
  65563. vpxor ymm14, ymm14, ymm14
  65564. vpermd ymm13, ymm14, ymm13
  65565. vpermd ymm15, ymm14, ymm15
  65566. vpxor ymm0, ymm0, ymm0
  65567. vpxor ymm1, ymm1, ymm1
  65568. vpxor ymm2, ymm2, ymm2
  65569. vpxor ymm3, ymm3, ymm3
  65570. vpxor ymm4, ymm4, ymm4
  65571. vpxor ymm5, ymm5, ymm5
  65572. xor r10, r10
  65573. xor r11, r11
  65574. xor r12, r12
  65575. vmovdqa ymm14, ymm15
  65576. L_521_get_point_33_avx2_9_start:
  65577. vpcmpeqd ymm12, ymm14, ymm13
  65578. vpaddd ymm14, ymm14, ymm15
  65579. xor r9, r9
  65580. cmp r8, rdi
  65581. sete r9b
  65582. neg r9
  65583. inc rdi
  65584. vmovupd ymm6, YMMWORD PTR [rdx]
  65585. vmovupd ymm7, YMMWORD PTR [rdx+32]
  65586. vmovupd ymm8, YMMWORD PTR [rdx+144]
  65587. vmovupd ymm9, YMMWORD PTR [rdx+176]
  65588. vmovupd ymm10, YMMWORD PTR [rdx+288]
  65589. vmovupd ymm11, YMMWORD PTR [rdx+320]
  65590. mov r13, QWORD PTR [rdx+64]
  65591. mov r14, QWORD PTR [rdx+208]
  65592. mov r15, QWORD PTR [rdx+352]
  65593. add rdx, 440
  65594. vpand ymm6, ymm6, ymm12
  65595. vpand ymm7, ymm7, ymm12
  65596. vpand ymm8, ymm8, ymm12
  65597. vpand ymm9, ymm9, ymm12
  65598. vpand ymm10, ymm10, ymm12
  65599. vpand ymm11, ymm11, ymm12
  65600. and r13, r9
  65601. and r14, r9
  65602. and r15, r9
  65603. vpor ymm0, ymm0, ymm6
  65604. vpor ymm1, ymm1, ymm7
  65605. vpor ymm2, ymm2, ymm8
  65606. vpor ymm3, ymm3, ymm9
  65607. vpor ymm4, ymm4, ymm10
  65608. vpor ymm5, ymm5, ymm11
  65609. or r10, r13
  65610. or r11, r14
  65611. or r12, r15
  65612. dec rax
  65613. jnz L_521_get_point_33_avx2_9_start
  65614. vmovupd YMMWORD PTR [rcx], ymm0
  65615. vmovupd YMMWORD PTR [rcx+32], ymm1
  65616. vmovupd YMMWORD PTR [rcx+144], ymm2
  65617. vmovupd YMMWORD PTR [rcx+176], ymm3
  65618. vmovupd YMMWORD PTR [rcx+288], ymm4
  65619. vmovupd YMMWORD PTR [rcx+320], ymm5
  65620. mov QWORD PTR [rcx+64], r10
  65621. mov QWORD PTR [rcx+208], r11
  65622. mov QWORD PTR [rcx+352], r12
  65623. vmovdqu xmm6, OWORD PTR [rsp]
  65624. vmovdqu xmm7, OWORD PTR [rsp+16]
  65625. vmovdqu xmm8, OWORD PTR [rsp+32]
  65626. vmovdqu xmm9, OWORD PTR [rsp+48]
  65627. vmovdqu xmm10, OWORD PTR [rsp+64]
  65628. vmovdqu xmm11, OWORD PTR [rsp+80]
  65629. vmovdqu xmm12, OWORD PTR [rsp+96]
  65630. vmovdqu xmm13, OWORD PTR [rsp+112]
  65631. vmovdqu xmm14, OWORD PTR [rsp+128]
  65632. vmovdqu xmm15, OWORD PTR [rsp+144]
  65633. add rsp, 160
  65634. pop rdi
  65635. pop r15
  65636. pop r14
  65637. pop r13
  65638. pop r12
  65639. ret
  65640. sp_521_get_point_33_avx2_9 ENDP
  65641. _text ENDS
  65642. ENDIF
  65643. ENDIF
  65644. IFDEF HAVE_INTEL_AVX2
  65645. ; /* Multiply two Montgomery form numbers mod the modulus (prime).
  65646. ; * (r = a * b mod m)
  65647. ; *
  65648. ; * r Result of multiplication.
  65649. ; * a First number to multiply in Montgomery form.
  65650. ; * b Second number to multiply in Montgomery form.
  65651. ; * m Modulus (prime).
  65652. ; * mp Montgomery mulitplier.
  65653. ; */
  65654. _text SEGMENT READONLY PARA
  65655. sp_521_mont_mul_avx2_9 PROC
  65656. push rbx
  65657. push rbp
  65658. push r12
  65659. push r13
  65660. push r14
  65661. push r15
  65662. mov rbp, r8
  65663. mov r8, rcx
  65664. mov r9, rdx
  65665. sub rsp, 144
  65666. mov rbx, rsp
  65667. add rsp, 72
  65668. xor r15, r15
  65669. mov rdx, QWORD PTR [r9]
  65670. ; A[0] * B[0]
  65671. mulx r11, r10, QWORD PTR [rbp]
  65672. ; A[0] * B[1]
  65673. mulx r12, rax, QWORD PTR [rbp+8]
  65674. mov QWORD PTR [rbx], r10
  65675. adcx r11, rax
  65676. ; A[0] * B[2]
  65677. mulx r13, rax, QWORD PTR [rbp+16]
  65678. mov QWORD PTR [rbx+8], r11
  65679. adcx r12, rax
  65680. mov QWORD PTR [rbx+16], r12
  65681. ; A[0] * B[3]
  65682. mulx r10, rax, QWORD PTR [rbp+24]
  65683. adcx r13, rax
  65684. ; A[0] * B[4]
  65685. mulx r11, rax, QWORD PTR [rbp+32]
  65686. mov QWORD PTR [rbx+24], r13
  65687. adcx r10, rax
  65688. ; A[0] * B[5]
  65689. mulx r12, rax, QWORD PTR [rbp+40]
  65690. mov QWORD PTR [rbx+32], r10
  65691. adcx r11, rax
  65692. mov QWORD PTR [rbx+40], r11
  65693. ; A[0] * B[6]
  65694. mulx r13, rax, QWORD PTR [rbp+48]
  65695. adcx r12, rax
  65696. ; A[0] * B[7]
  65697. mulx r10, rax, QWORD PTR [rbp+56]
  65698. mov QWORD PTR [rbx+48], r12
  65699. adcx r13, rax
  65700. ; A[0] * B[8]
  65701. mulx r11, rax, QWORD PTR [rbp+64]
  65702. mov QWORD PTR [rbx+56], r13
  65703. adcx r10, rax
  65704. adcx r11, r15
  65705. mov r14, r15
  65706. adcx r14, r15
  65707. mov QWORD PTR [rbx+64], r10
  65708. mov QWORD PTR [rsp], r11
  65709. mov rdx, QWORD PTR [r9+8]
  65710. mov r11, QWORD PTR [rbx+8]
  65711. mov r12, QWORD PTR [rbx+16]
  65712. mov r13, QWORD PTR [rbx+24]
  65713. mov r10, QWORD PTR [rbx+32]
  65714. ; A[1] * B[0]
  65715. mulx rcx, rax, QWORD PTR [rbp]
  65716. adcx r11, rax
  65717. adox r12, rcx
  65718. ; A[1] * B[1]
  65719. mulx rcx, rax, QWORD PTR [rbp+8]
  65720. mov QWORD PTR [rbx+8], r11
  65721. adcx r12, rax
  65722. adox r13, rcx
  65723. ; A[1] * B[2]
  65724. mulx rcx, rax, QWORD PTR [rbp+16]
  65725. mov QWORD PTR [rbx+16], r12
  65726. adcx r13, rax
  65727. adox r10, rcx
  65728. mov QWORD PTR [rbx+24], r13
  65729. mov r11, QWORD PTR [rbx+40]
  65730. mov r12, QWORD PTR [rbx+48]
  65731. mov r13, QWORD PTR [rbx+56]
  65732. ; A[1] * B[3]
  65733. mulx rcx, rax, QWORD PTR [rbp+24]
  65734. adcx r10, rax
  65735. adox r11, rcx
  65736. ; A[1] * B[4]
  65737. mulx rcx, rax, QWORD PTR [rbp+32]
  65738. mov QWORD PTR [rbx+32], r10
  65739. adcx r11, rax
  65740. adox r12, rcx
  65741. ; A[1] * B[5]
  65742. mulx rcx, rax, QWORD PTR [rbp+40]
  65743. mov QWORD PTR [rbx+40], r11
  65744. adcx r12, rax
  65745. adox r13, rcx
  65746. mov QWORD PTR [rbx+48], r12
  65747. mov r10, QWORD PTR [rbx+64]
  65748. mov r11, QWORD PTR [rsp]
  65749. ; A[1] * B[6]
  65750. mulx rcx, rax, QWORD PTR [rbp+48]
  65751. adcx r13, rax
  65752. adox r10, rcx
  65753. ; A[1] * B[7]
  65754. mulx rcx, rax, QWORD PTR [rbp+56]
  65755. mov QWORD PTR [rbx+56], r13
  65756. adcx r10, rax
  65757. adox r11, rcx
  65758. ; A[1] * B[8]
  65759. mulx rcx, rax, QWORD PTR [rbp+64]
  65760. mov QWORD PTR [rbx+64], r10
  65761. mov r12, r15
  65762. adcx r11, rax
  65763. adox r12, rcx
  65764. adcx r12, r14
  65765. mov r14, r15
  65766. adox r14, r15
  65767. adcx r14, r15
  65768. mov QWORD PTR [rsp], r11
  65769. mov QWORD PTR [rsp+8], r12
  65770. mov rdx, QWORD PTR [r9+16]
  65771. mov r12, QWORD PTR [rbx+16]
  65772. mov r13, QWORD PTR [rbx+24]
  65773. mov r10, QWORD PTR [rbx+32]
  65774. mov r11, QWORD PTR [rbx+40]
  65775. ; A[2] * B[0]
  65776. mulx rcx, rax, QWORD PTR [rbp]
  65777. adcx r12, rax
  65778. adox r13, rcx
  65779. ; A[2] * B[1]
  65780. mulx rcx, rax, QWORD PTR [rbp+8]
  65781. mov QWORD PTR [rbx+16], r12
  65782. adcx r13, rax
  65783. adox r10, rcx
  65784. ; A[2] * B[2]
  65785. mulx rcx, rax, QWORD PTR [rbp+16]
  65786. mov QWORD PTR [rbx+24], r13
  65787. adcx r10, rax
  65788. adox r11, rcx
  65789. mov QWORD PTR [rbx+32], r10
  65790. mov r12, QWORD PTR [rbx+48]
  65791. mov r13, QWORD PTR [rbx+56]
  65792. mov r10, QWORD PTR [rbx+64]
  65793. ; A[2] * B[3]
  65794. mulx rcx, rax, QWORD PTR [rbp+24]
  65795. adcx r11, rax
  65796. adox r12, rcx
  65797. ; A[2] * B[4]
  65798. mulx rcx, rax, QWORD PTR [rbp+32]
  65799. mov QWORD PTR [rbx+40], r11
  65800. adcx r12, rax
  65801. adox r13, rcx
  65802. ; A[2] * B[5]
  65803. mulx rcx, rax, QWORD PTR [rbp+40]
  65804. mov QWORD PTR [rbx+48], r12
  65805. adcx r13, rax
  65806. adox r10, rcx
  65807. mov QWORD PTR [rbx+56], r13
  65808. mov r11, QWORD PTR [rsp]
  65809. mov r12, QWORD PTR [rsp+8]
  65810. ; A[2] * B[6]
  65811. mulx rcx, rax, QWORD PTR [rbp+48]
  65812. adcx r10, rax
  65813. adox r11, rcx
  65814. ; A[2] * B[7]
  65815. mulx rcx, rax, QWORD PTR [rbp+56]
  65816. mov QWORD PTR [rbx+64], r10
  65817. adcx r11, rax
  65818. adox r12, rcx
  65819. ; A[2] * B[8]
  65820. mulx rcx, rax, QWORD PTR [rbp+64]
  65821. mov QWORD PTR [rsp], r11
  65822. mov r13, r15
  65823. adcx r12, rax
  65824. adox r13, rcx
  65825. adcx r13, r14
  65826. mov r14, r15
  65827. adox r14, r15
  65828. adcx r14, r15
  65829. mov QWORD PTR [rsp+8], r12
  65830. mov QWORD PTR [rsp+16], r13
  65831. mov rdx, QWORD PTR [r9+24]
  65832. mov r13, QWORD PTR [rbx+24]
  65833. mov r10, QWORD PTR [rbx+32]
  65834. mov r11, QWORD PTR [rbx+40]
  65835. mov r12, QWORD PTR [rbx+48]
  65836. ; A[3] * B[0]
  65837. mulx rcx, rax, QWORD PTR [rbp]
  65838. adcx r13, rax
  65839. adox r10, rcx
  65840. ; A[3] * B[1]
  65841. mulx rcx, rax, QWORD PTR [rbp+8]
  65842. mov QWORD PTR [rbx+24], r13
  65843. adcx r10, rax
  65844. adox r11, rcx
  65845. ; A[3] * B[2]
  65846. mulx rcx, rax, QWORD PTR [rbp+16]
  65847. mov QWORD PTR [rbx+32], r10
  65848. adcx r11, rax
  65849. adox r12, rcx
  65850. mov QWORD PTR [rbx+40], r11
  65851. mov r13, QWORD PTR [rbx+56]
  65852. mov r10, QWORD PTR [rbx+64]
  65853. mov r11, QWORD PTR [rsp]
  65854. ; A[3] * B[3]
  65855. mulx rcx, rax, QWORD PTR [rbp+24]
  65856. adcx r12, rax
  65857. adox r13, rcx
  65858. ; A[3] * B[4]
  65859. mulx rcx, rax, QWORD PTR [rbp+32]
  65860. mov QWORD PTR [rbx+48], r12
  65861. adcx r13, rax
  65862. adox r10, rcx
  65863. ; A[3] * B[5]
  65864. mulx rcx, rax, QWORD PTR [rbp+40]
  65865. mov QWORD PTR [rbx+56], r13
  65866. adcx r10, rax
  65867. adox r11, rcx
  65868. mov QWORD PTR [rbx+64], r10
  65869. mov r12, QWORD PTR [rsp+8]
  65870. mov r13, QWORD PTR [rsp+16]
  65871. ; A[3] * B[6]
  65872. mulx rcx, rax, QWORD PTR [rbp+48]
  65873. adcx r11, rax
  65874. adox r12, rcx
  65875. ; A[3] * B[7]
  65876. mulx rcx, rax, QWORD PTR [rbp+56]
  65877. mov QWORD PTR [rsp], r11
  65878. adcx r12, rax
  65879. adox r13, rcx
  65880. ; A[3] * B[8]
  65881. mulx rcx, rax, QWORD PTR [rbp+64]
  65882. mov QWORD PTR [rsp+8], r12
  65883. mov r10, r15
  65884. adcx r13, rax
  65885. adox r10, rcx
  65886. adcx r10, r14
  65887. mov r14, r15
  65888. adox r14, r15
  65889. adcx r14, r15
  65890. mov QWORD PTR [rsp+16], r13
  65891. mov QWORD PTR [rsp+24], r10
  65892. mov rdx, QWORD PTR [r9+32]
  65893. mov r10, QWORD PTR [rbx+32]
  65894. mov r11, QWORD PTR [rbx+40]
  65895. mov r12, QWORD PTR [rbx+48]
  65896. mov r13, QWORD PTR [rbx+56]
  65897. ; A[4] * B[0]
  65898. mulx rcx, rax, QWORD PTR [rbp]
  65899. adcx r10, rax
  65900. adox r11, rcx
  65901. ; A[4] * B[1]
  65902. mulx rcx, rax, QWORD PTR [rbp+8]
  65903. mov QWORD PTR [rbx+32], r10
  65904. adcx r11, rax
  65905. adox r12, rcx
  65906. ; A[4] * B[2]
  65907. mulx rcx, rax, QWORD PTR [rbp+16]
  65908. mov QWORD PTR [rbx+40], r11
  65909. adcx r12, rax
  65910. adox r13, rcx
  65911. mov QWORD PTR [rbx+48], r12
  65912. mov r10, QWORD PTR [rbx+64]
  65913. mov r11, QWORD PTR [rsp]
  65914. mov r12, QWORD PTR [rsp+8]
  65915. ; A[4] * B[3]
  65916. mulx rcx, rax, QWORD PTR [rbp+24]
  65917. adcx r13, rax
  65918. adox r10, rcx
  65919. ; A[4] * B[4]
  65920. mulx rcx, rax, QWORD PTR [rbp+32]
  65921. mov QWORD PTR [rbx+56], r13
  65922. adcx r10, rax
  65923. adox r11, rcx
  65924. ; A[4] * B[5]
  65925. mulx rcx, rax, QWORD PTR [rbp+40]
  65926. mov QWORD PTR [rbx+64], r10
  65927. adcx r11, rax
  65928. adox r12, rcx
  65929. mov QWORD PTR [rsp], r11
  65930. mov r13, QWORD PTR [rsp+16]
  65931. mov r10, QWORD PTR [rsp+24]
  65932. ; A[4] * B[6]
  65933. mulx rcx, rax, QWORD PTR [rbp+48]
  65934. adcx r12, rax
  65935. adox r13, rcx
  65936. ; A[4] * B[7]
  65937. mulx rcx, rax, QWORD PTR [rbp+56]
  65938. mov QWORD PTR [rsp+8], r12
  65939. adcx r13, rax
  65940. adox r10, rcx
  65941. ; A[4] * B[8]
  65942. mulx rcx, rax, QWORD PTR [rbp+64]
  65943. mov QWORD PTR [rsp+16], r13
  65944. mov r11, r15
  65945. adcx r10, rax
  65946. adox r11, rcx
  65947. adcx r11, r14
  65948. mov r14, r15
  65949. adox r14, r15
  65950. adcx r14, r15
  65951. mov QWORD PTR [rsp+24], r10
  65952. mov QWORD PTR [rsp+32], r11
  65953. mov rdx, QWORD PTR [r9+40]
  65954. mov r11, QWORD PTR [rbx+40]
  65955. mov r12, QWORD PTR [rbx+48]
  65956. mov r13, QWORD PTR [rbx+56]
  65957. mov r10, QWORD PTR [rbx+64]
  65958. ; A[5] * B[0]
  65959. mulx rcx, rax, QWORD PTR [rbp]
  65960. adcx r11, rax
  65961. adox r12, rcx
  65962. ; A[5] * B[1]
  65963. mulx rcx, rax, QWORD PTR [rbp+8]
  65964. mov QWORD PTR [rbx+40], r11
  65965. adcx r12, rax
  65966. adox r13, rcx
  65967. ; A[5] * B[2]
  65968. mulx rcx, rax, QWORD PTR [rbp+16]
  65969. mov QWORD PTR [rbx+48], r12
  65970. adcx r13, rax
  65971. adox r10, rcx
  65972. mov QWORD PTR [rbx+56], r13
  65973. mov r11, QWORD PTR [rsp]
  65974. mov r12, QWORD PTR [rsp+8]
  65975. mov r13, QWORD PTR [rsp+16]
  65976. ; A[5] * B[3]
  65977. mulx rcx, rax, QWORD PTR [rbp+24]
  65978. adcx r10, rax
  65979. adox r11, rcx
  65980. ; A[5] * B[4]
  65981. mulx rcx, rax, QWORD PTR [rbp+32]
  65982. mov QWORD PTR [rbx+64], r10
  65983. adcx r11, rax
  65984. adox r12, rcx
  65985. ; A[5] * B[5]
  65986. mulx rcx, rax, QWORD PTR [rbp+40]
  65987. mov QWORD PTR [rsp], r11
  65988. adcx r12, rax
  65989. adox r13, rcx
  65990. mov QWORD PTR [rsp+8], r12
  65991. mov r10, QWORD PTR [rsp+24]
  65992. mov r11, QWORD PTR [rsp+32]
  65993. ; A[5] * B[6]
  65994. mulx rcx, rax, QWORD PTR [rbp+48]
  65995. adcx r13, rax
  65996. adox r10, rcx
  65997. ; A[5] * B[7]
  65998. mulx rcx, rax, QWORD PTR [rbp+56]
  65999. mov QWORD PTR [rsp+16], r13
  66000. adcx r10, rax
  66001. adox r11, rcx
  66002. ; A[5] * B[8]
  66003. mulx rcx, rax, QWORD PTR [rbp+64]
  66004. mov QWORD PTR [rsp+24], r10
  66005. mov r12, r15
  66006. adcx r11, rax
  66007. adox r12, rcx
  66008. adcx r12, r14
  66009. mov r14, r15
  66010. adox r14, r15
  66011. adcx r14, r15
  66012. mov QWORD PTR [rsp+32], r11
  66013. mov QWORD PTR [rsp+40], r12
  66014. mov rdx, QWORD PTR [r9+48]
  66015. mov r12, QWORD PTR [rbx+48]
  66016. mov r13, QWORD PTR [rbx+56]
  66017. mov r10, QWORD PTR [rbx+64]
  66018. mov r11, QWORD PTR [rsp]
  66019. ; A[6] * B[0]
  66020. mulx rcx, rax, QWORD PTR [rbp]
  66021. adcx r12, rax
  66022. adox r13, rcx
  66023. ; A[6] * B[1]
  66024. mulx rcx, rax, QWORD PTR [rbp+8]
  66025. mov QWORD PTR [rbx+48], r12
  66026. adcx r13, rax
  66027. adox r10, rcx
  66028. ; A[6] * B[2]
  66029. mulx rcx, rax, QWORD PTR [rbp+16]
  66030. mov QWORD PTR [rbx+56], r13
  66031. adcx r10, rax
  66032. adox r11, rcx
  66033. mov QWORD PTR [rbx+64], r10
  66034. mov r12, QWORD PTR [rsp+8]
  66035. mov r13, QWORD PTR [rsp+16]
  66036. mov r10, QWORD PTR [rsp+24]
  66037. ; A[6] * B[3]
  66038. mulx rcx, rax, QWORD PTR [rbp+24]
  66039. adcx r11, rax
  66040. adox r12, rcx
  66041. ; A[6] * B[4]
  66042. mulx rcx, rax, QWORD PTR [rbp+32]
  66043. mov QWORD PTR [rsp], r11
  66044. adcx r12, rax
  66045. adox r13, rcx
  66046. ; A[6] * B[5]
  66047. mulx rcx, rax, QWORD PTR [rbp+40]
  66048. mov QWORD PTR [rsp+8], r12
  66049. adcx r13, rax
  66050. adox r10, rcx
  66051. mov QWORD PTR [rsp+16], r13
  66052. mov r11, QWORD PTR [rsp+32]
  66053. mov r12, QWORD PTR [rsp+40]
  66054. ; A[6] * B[6]
  66055. mulx rcx, rax, QWORD PTR [rbp+48]
  66056. adcx r10, rax
  66057. adox r11, rcx
  66058. ; A[6] * B[7]
  66059. mulx rcx, rax, QWORD PTR [rbp+56]
  66060. mov QWORD PTR [rsp+24], r10
  66061. adcx r11, rax
  66062. adox r12, rcx
  66063. ; A[6] * B[8]
  66064. mulx rcx, rax, QWORD PTR [rbp+64]
  66065. mov QWORD PTR [rsp+32], r11
  66066. mov r13, r15
  66067. adcx r12, rax
  66068. adox r13, rcx
  66069. adcx r13, r14
  66070. mov r14, r15
  66071. adox r14, r15
  66072. adcx r14, r15
  66073. mov QWORD PTR [rsp+40], r12
  66074. mov QWORD PTR [rsp+48], r13
  66075. mov rdx, QWORD PTR [r9+56]
  66076. mov r13, QWORD PTR [rbx+56]
  66077. mov r10, QWORD PTR [rbx+64]
  66078. mov r11, QWORD PTR [rsp]
  66079. mov r12, QWORD PTR [rsp+8]
  66080. ; A[7] * B[0]
  66081. mulx rcx, rax, QWORD PTR [rbp]
  66082. adcx r13, rax
  66083. adox r10, rcx
  66084. ; A[7] * B[1]
  66085. mulx rcx, rax, QWORD PTR [rbp+8]
  66086. mov QWORD PTR [rbx+56], r13
  66087. adcx r10, rax
  66088. adox r11, rcx
  66089. ; A[7] * B[2]
  66090. mulx rcx, rax, QWORD PTR [rbp+16]
  66091. mov QWORD PTR [rbx+64], r10
  66092. adcx r11, rax
  66093. adox r12, rcx
  66094. mov QWORD PTR [rsp], r11
  66095. mov r13, QWORD PTR [rsp+16]
  66096. mov r10, QWORD PTR [rsp+24]
  66097. mov r11, QWORD PTR [rsp+32]
  66098. ; A[7] * B[3]
  66099. mulx rcx, rax, QWORD PTR [rbp+24]
  66100. adcx r12, rax
  66101. adox r13, rcx
  66102. ; A[7] * B[4]
  66103. mulx rcx, rax, QWORD PTR [rbp+32]
  66104. mov QWORD PTR [rsp+8], r12
  66105. adcx r13, rax
  66106. adox r10, rcx
  66107. ; A[7] * B[5]
  66108. mulx rcx, rax, QWORD PTR [rbp+40]
  66109. mov QWORD PTR [rsp+16], r13
  66110. adcx r10, rax
  66111. adox r11, rcx
  66112. mov QWORD PTR [rsp+24], r10
  66113. mov r12, QWORD PTR [rsp+40]
  66114. mov r13, QWORD PTR [rsp+48]
  66115. ; A[7] * B[6]
  66116. mulx rcx, rax, QWORD PTR [rbp+48]
  66117. adcx r11, rax
  66118. adox r12, rcx
  66119. ; A[7] * B[7]
  66120. mulx rcx, rax, QWORD PTR [rbp+56]
  66121. mov QWORD PTR [rsp+32], r11
  66122. adcx r12, rax
  66123. adox r13, rcx
  66124. ; A[7] * B[8]
  66125. mulx rcx, rax, QWORD PTR [rbp+64]
  66126. mov QWORD PTR [rsp+40], r12
  66127. mov r10, r15
  66128. adcx r13, rax
  66129. adox r10, rcx
  66130. adcx r10, r14
  66131. mov r14, r15
  66132. adox r14, r15
  66133. adcx r14, r15
  66134. mov QWORD PTR [rsp+48], r13
  66135. mov QWORD PTR [rsp+56], r10
  66136. mov rdx, QWORD PTR [r9+64]
  66137. mov r10, QWORD PTR [rbx+64]
  66138. mov r11, QWORD PTR [rsp]
  66139. mov r12, QWORD PTR [rsp+8]
  66140. mov r13, QWORD PTR [rsp+16]
  66141. ; A[8] * B[0]
  66142. mulx rcx, rax, QWORD PTR [rbp]
  66143. adcx r10, rax
  66144. adox r11, rcx
  66145. ; A[8] * B[1]
  66146. mulx rcx, rax, QWORD PTR [rbp+8]
  66147. mov QWORD PTR [rbx+64], r10
  66148. adcx r11, rax
  66149. adox r12, rcx
  66150. ; A[8] * B[2]
  66151. mulx rcx, rax, QWORD PTR [rbp+16]
  66152. mov QWORD PTR [rsp], r11
  66153. adcx r12, rax
  66154. adox r13, rcx
  66155. mov QWORD PTR [rsp+8], r12
  66156. mov r10, QWORD PTR [rsp+24]
  66157. mov r11, QWORD PTR [rsp+32]
  66158. mov r12, QWORD PTR [rsp+40]
  66159. ; A[8] * B[3]
  66160. mulx rcx, rax, QWORD PTR [rbp+24]
  66161. adcx r13, rax
  66162. adox r10, rcx
  66163. ; A[8] * B[4]
  66164. mulx rcx, rax, QWORD PTR [rbp+32]
  66165. mov QWORD PTR [rsp+16], r13
  66166. adcx r10, rax
  66167. adox r11, rcx
  66168. ; A[8] * B[5]
  66169. mulx rcx, rax, QWORD PTR [rbp+40]
  66170. mov QWORD PTR [rsp+24], r10
  66171. adcx r11, rax
  66172. adox r12, rcx
  66173. mov QWORD PTR [rsp+32], r11
  66174. mov r13, QWORD PTR [rsp+48]
  66175. mov r10, QWORD PTR [rsp+56]
  66176. ; A[8] * B[6]
  66177. mulx rcx, rax, QWORD PTR [rbp+48]
  66178. adcx r12, rax
  66179. adox r13, rcx
  66180. ; A[8] * B[7]
  66181. mulx rcx, rax, QWORD PTR [rbp+56]
  66182. mov QWORD PTR [rsp+40], r12
  66183. adcx r13, rax
  66184. adox r10, rcx
  66185. ; A[8] * B[8]
  66186. mulx rcx, rax, QWORD PTR [rbp+64]
  66187. mov QWORD PTR [rsp+48], r13
  66188. mov r11, r15
  66189. adcx r10, rax
  66190. adox r11, rcx
  66191. adcx r11, r14
  66192. mov QWORD PTR [rsp+56], r10
  66193. mov QWORD PTR [rsp+64], r11
  66194. mov rax, QWORD PTR [rsp+-8]
  66195. mov rcx, QWORD PTR [rsp]
  66196. mov r10, QWORD PTR [rsp+8]
  66197. mov r15, rax
  66198. and r15, 511
  66199. mov r11, QWORD PTR [rsp+16]
  66200. mov r12, QWORD PTR [rsp+24]
  66201. mov r13, QWORD PTR [rsp+32]
  66202. mov r14, QWORD PTR [rsp+40]
  66203. mov rbx, QWORD PTR [rsp+48]
  66204. mov rdx, QWORD PTR [rsp+56]
  66205. sub rsp, 72
  66206. shrd rax, rcx, 9
  66207. shrd rcx, r10, 9
  66208. shrd r10, r11, 9
  66209. shrd r11, r12, 9
  66210. shrd r12, r13, 9
  66211. shrd r13, r14, 9
  66212. shrd r14, rbx, 9
  66213. shrd rbx, rdx, 9
  66214. shr rdx, 9
  66215. add rax, QWORD PTR [rsp]
  66216. adc rcx, QWORD PTR [rsp+8]
  66217. adc r10, QWORD PTR [rsp+16]
  66218. adc r11, QWORD PTR [rsp+24]
  66219. adc r12, QWORD PTR [rsp+32]
  66220. adc r13, QWORD PTR [rsp+40]
  66221. adc r14, QWORD PTR [rsp+48]
  66222. adc rbx, QWORD PTR [rsp+56]
  66223. adc r15, rdx
  66224. mov rdx, r15
  66225. shr r15, 9
  66226. and rdx, 511
  66227. add rax, r15
  66228. adc rcx, 0
  66229. adc r10, 0
  66230. adc r11, 0
  66231. adc r12, 0
  66232. adc r13, 0
  66233. adc r14, 0
  66234. adc rbx, 0
  66235. adc rdx, 0
  66236. mov QWORD PTR [r8], rax
  66237. mov QWORD PTR [r8+8], rcx
  66238. mov QWORD PTR [r8+16], r10
  66239. mov QWORD PTR [r8+24], r11
  66240. mov QWORD PTR [r8+32], r12
  66241. mov QWORD PTR [r8+40], r13
  66242. mov QWORD PTR [r8+48], r14
  66243. mov QWORD PTR [r8+56], rbx
  66244. mov QWORD PTR [r8+64], rdx
  66245. add rsp, 144
  66246. pop r15
  66247. pop r14
  66248. pop r13
  66249. pop r12
  66250. pop rbp
  66251. pop rbx
  66252. ret
  66253. sp_521_mont_mul_avx2_9 ENDP
  66254. _text ENDS
  66255. ENDIF
  66256. IFDEF HAVE_INTEL_AVX2
  66257. ; /* Square the Montgomery form number mod the modulus (prime). (r = a * a mod m)
  66258. ; *
  66259. ; * r Result of squaring.
  66260. ; * a Number to square in Montgomery form.
  66261. ; * m Modulus (prime).
  66262. ; * mp Montgomery mulitplier.
  66263. ; */
  66264. _text SEGMENT READONLY PARA
  66265. sp_521_mont_sqr_avx2_9 PROC
  66266. push rbp
  66267. push r12
  66268. push r13
  66269. push r14
  66270. push r15
  66271. push rdi
  66272. push rsi
  66273. push rbx
  66274. mov r8, rcx
  66275. mov r9, rdx
  66276. sub rsp, 144
  66277. mov rbp, rsp
  66278. add rsp, 72
  66279. xor r12, r12
  66280. ; Diagonal 1
  66281. ; Zero into %r9
  66282. ; A[1] x A[0]
  66283. mov rdx, QWORD PTR [r9]
  66284. mulx r11, r10, QWORD PTR [r9+8]
  66285. mov QWORD PTR [rbp+8], r10
  66286. ; Zero into %r8
  66287. ; A[2] x A[0]
  66288. mulx r10, rax, QWORD PTR [r9+16]
  66289. adcx r11, rax
  66290. adox r10, r12
  66291. mov QWORD PTR [rbp+16], r11
  66292. ; No load %r12 - %r9
  66293. ; A[3] x A[0]
  66294. mulx r14, rax, QWORD PTR [r9+24]
  66295. adcx r10, rax
  66296. adox r14, r12
  66297. mov QWORD PTR [rbp+24], r10
  66298. ; No load %r13 - %r8
  66299. ; A[4] x A[0]
  66300. mulx r15, rax, QWORD PTR [r9+32]
  66301. adcx r14, rax
  66302. adox r15, r12
  66303. ; No store %r12 - %r9
  66304. ; No load %r14 - %r9
  66305. ; A[5] x A[0]
  66306. mulx rdi, rax, QWORD PTR [r9+40]
  66307. adcx r15, rax
  66308. adox rdi, r12
  66309. ; No store %r13 - %r8
  66310. ; No load %r15 - %r8
  66311. ; A[6] x A[0]
  66312. mulx rsi, rax, QWORD PTR [r9+48]
  66313. adcx rdi, rax
  66314. adox rsi, r12
  66315. ; No store %r14 - %r9
  66316. ; No load %rbx - %r9
  66317. ; A[7] x A[0]
  66318. mulx rbx, rax, QWORD PTR [r9+56]
  66319. adcx rsi, rax
  66320. adox rbx, r12
  66321. ; No store %r15 - %r8
  66322. ; Zero into %r8
  66323. ; A[8] x A[0]
  66324. mulx r10, rax, QWORD PTR [r9+64]
  66325. adcx rbx, rax
  66326. adox r10, r12
  66327. ; No store %rbx - %r9
  66328. ; Zero into %r9
  66329. ; A[8] x A[1]
  66330. mov rdx, QWORD PTR [r9+8]
  66331. mulx r11, rax, QWORD PTR [r9+64]
  66332. adcx r10, rax
  66333. adox r11, r12
  66334. mov QWORD PTR [rsp], r10
  66335. ; Carry
  66336. adcx r11, r12
  66337. mov r13, r12
  66338. adcx r13, r12
  66339. adox r13, r12
  66340. mov QWORD PTR [rsp+8], r11
  66341. ; Diagonal 2
  66342. mov r11, QWORD PTR [rbp+24]
  66343. ; No load %r12 - %r8
  66344. ; A[2] x A[1]
  66345. mulx rcx, rax, QWORD PTR [r9+16]
  66346. adcx r11, rax
  66347. adox r14, rcx
  66348. mov QWORD PTR [rbp+24], r11
  66349. ; No load %r13 - %r9
  66350. ; A[3] x A[1]
  66351. mulx rcx, rax, QWORD PTR [r9+24]
  66352. adcx r14, rax
  66353. adox r15, rcx
  66354. ; No store %r12 - %r8
  66355. ; No load %r14 - %r8
  66356. ; A[4] x A[1]
  66357. mulx rcx, rax, QWORD PTR [r9+32]
  66358. adcx r15, rax
  66359. adox rdi, rcx
  66360. ; No store %r13 - %r9
  66361. ; No load %r15 - %r9
  66362. ; A[5] x A[1]
  66363. mulx rcx, rax, QWORD PTR [r9+40]
  66364. adcx rdi, rax
  66365. adox rsi, rcx
  66366. ; No store %r14 - %r8
  66367. ; No load %rbx - %r8
  66368. ; A[6] x A[1]
  66369. mulx rcx, rax, QWORD PTR [r9+48]
  66370. adcx rsi, rax
  66371. adox rbx, rcx
  66372. ; No store %r15 - %r9
  66373. mov r11, QWORD PTR [rsp]
  66374. ; A[7] x A[1]
  66375. mulx rcx, rax, QWORD PTR [r9+56]
  66376. adcx rbx, rax
  66377. adox r11, rcx
  66378. ; No store %rbx - %r8
  66379. mov r10, QWORD PTR [rsp+8]
  66380. ; A[7] x A[2]
  66381. mov rdx, QWORD PTR [r9+16]
  66382. mulx rcx, rax, QWORD PTR [r9+56]
  66383. adcx r11, rax
  66384. adox r10, rcx
  66385. mov QWORD PTR [rsp], r11
  66386. ; Zero into %r9
  66387. ; A[7] x A[3]
  66388. mov rdx, QWORD PTR [r9+24]
  66389. mulx r11, rax, QWORD PTR [r9+56]
  66390. adcx r10, rax
  66391. adox r11, r12
  66392. mov QWORD PTR [rsp+8], r10
  66393. ; Zero into %r8
  66394. ; A[7] x A[4]
  66395. mov rdx, QWORD PTR [r9+32]
  66396. mulx r10, rax, QWORD PTR [r9+56]
  66397. adcx r11, rax
  66398. adox r10, r12
  66399. mov QWORD PTR [rsp+16], r11
  66400. ; Carry
  66401. adcx r10, r13
  66402. mov r13, r12
  66403. adcx r13, r12
  66404. adox r13, r12
  66405. mov QWORD PTR [rsp+24], r10
  66406. ; Diagonal 3
  66407. ; No load %r14 - %r9
  66408. ; A[3] x A[2]
  66409. mov rdx, QWORD PTR [r9+16]
  66410. mulx rcx, rax, QWORD PTR [r9+24]
  66411. adcx r15, rax
  66412. adox rdi, rcx
  66413. ; No store %r13 - %r8
  66414. ; No load %r15 - %r8
  66415. ; A[4] x A[2]
  66416. mulx rcx, rax, QWORD PTR [r9+32]
  66417. adcx rdi, rax
  66418. adox rsi, rcx
  66419. ; No store %r14 - %r9
  66420. ; No load %rbx - %r9
  66421. ; A[5] x A[2]
  66422. mulx rcx, rax, QWORD PTR [r9+40]
  66423. adcx rsi, rax
  66424. adox rbx, rcx
  66425. ; No store %r15 - %r8
  66426. mov r10, QWORD PTR [rsp]
  66427. ; A[6] x A[2]
  66428. mulx rcx, rax, QWORD PTR [r9+48]
  66429. adcx rbx, rax
  66430. adox r10, rcx
  66431. ; No store %rbx - %r9
  66432. mov r11, QWORD PTR [rsp+8]
  66433. ; A[6] x A[3]
  66434. mov rdx, QWORD PTR [r9+24]
  66435. mulx rcx, rax, QWORD PTR [r9+48]
  66436. adcx r10, rax
  66437. adox r11, rcx
  66438. mov QWORD PTR [rsp], r10
  66439. mov r10, QWORD PTR [rsp+16]
  66440. ; A[6] x A[4]
  66441. mov rdx, QWORD PTR [r9+32]
  66442. mulx rcx, rax, QWORD PTR [r9+48]
  66443. adcx r11, rax
  66444. adox r10, rcx
  66445. mov QWORD PTR [rsp+8], r11
  66446. mov r11, QWORD PTR [rsp+24]
  66447. ; A[6] x A[5]
  66448. mov rdx, QWORD PTR [r9+40]
  66449. mulx rcx, rax, QWORD PTR [r9+48]
  66450. adcx r10, rax
  66451. adox r11, rcx
  66452. mov QWORD PTR [rsp+16], r10
  66453. ; Zero into %r8
  66454. ; A[8] x A[4]
  66455. mov rdx, QWORD PTR [r9+32]
  66456. mulx r10, rax, QWORD PTR [r9+64]
  66457. adcx r11, rax
  66458. adox r10, r12
  66459. mov QWORD PTR [rsp+24], r11
  66460. ; Zero into %r9
  66461. ; A[8] x A[5]
  66462. mov rdx, QWORD PTR [r9+40]
  66463. mulx r11, rax, QWORD PTR [r9+64]
  66464. adcx r10, rax
  66465. adox r11, r12
  66466. mov QWORD PTR [rsp+32], r10
  66467. ; Carry
  66468. adcx r11, r13
  66469. mov r13, r12
  66470. adcx r13, r12
  66471. adox r13, r12
  66472. mov QWORD PTR [rsp+40], r11
  66473. ; Diagonal 4
  66474. ; No load %rbx - %r8
  66475. ; A[4] x A[3]
  66476. mov rdx, QWORD PTR [r9+24]
  66477. mulx rcx, rax, QWORD PTR [r9+32]
  66478. adcx rsi, rax
  66479. adox rbx, rcx
  66480. ; No store %r15 - %r9
  66481. mov r11, QWORD PTR [rsp]
  66482. ; A[5] x A[3]
  66483. mulx rcx, rax, QWORD PTR [r9+40]
  66484. adcx rbx, rax
  66485. adox r11, rcx
  66486. ; No store %rbx - %r8
  66487. mov r10, QWORD PTR [rsp+8]
  66488. ; A[5] x A[4]
  66489. mov rdx, QWORD PTR [r9+32]
  66490. mulx rcx, rax, QWORD PTR [r9+40]
  66491. adcx r11, rax
  66492. adox r10, rcx
  66493. mov QWORD PTR [rsp], r11
  66494. mov r11, QWORD PTR [rsp+16]
  66495. ; A[8] x A[2]
  66496. mov rdx, QWORD PTR [r9+16]
  66497. mulx rcx, rax, QWORD PTR [r9+64]
  66498. adcx r10, rax
  66499. adox r11, rcx
  66500. mov QWORD PTR [rsp+8], r10
  66501. mov r10, QWORD PTR [rsp+24]
  66502. ; A[8] x A[3]
  66503. mov rdx, QWORD PTR [r9+24]
  66504. mulx rcx, rax, QWORD PTR [r9+64]
  66505. adcx r11, rax
  66506. adox r10, rcx
  66507. mov QWORD PTR [rsp+16], r11
  66508. mov r11, QWORD PTR [rsp+32]
  66509. ; A[7] x A[5]
  66510. mov rdx, QWORD PTR [r9+40]
  66511. mulx rcx, rax, QWORD PTR [r9+56]
  66512. adcx r10, rax
  66513. adox r11, rcx
  66514. mov QWORD PTR [rsp+24], r10
  66515. mov r10, QWORD PTR [rsp+40]
  66516. ; A[7] x A[6]
  66517. mov rdx, QWORD PTR [r9+48]
  66518. mulx rcx, rax, QWORD PTR [r9+56]
  66519. adcx r11, rax
  66520. adox r10, rcx
  66521. mov QWORD PTR [rsp+32], r11
  66522. ; Zero into %r9
  66523. ; A[8] x A[6]
  66524. mulx r11, rax, QWORD PTR [r9+64]
  66525. adcx r10, rax
  66526. adox r11, r12
  66527. mov QWORD PTR [rsp+40], r10
  66528. ; Zero into %r8
  66529. ; A[8] x A[7]
  66530. mov rdx, QWORD PTR [r9+56]
  66531. mulx r10, rax, QWORD PTR [r9+64]
  66532. adcx r11, rax
  66533. adox r10, r12
  66534. mov QWORD PTR [rsp+48], r11
  66535. ; Carry
  66536. adcx r10, r13
  66537. mov r13, r12
  66538. adcx r13, r12
  66539. adox r13, r12
  66540. mov QWORD PTR [rsp+56], r10
  66541. mov QWORD PTR [rsp+64], r13
  66542. ; Double and Add in A[i] x A[i]
  66543. mov r11, QWORD PTR [rbp+8]
  66544. ; A[0] x A[0]
  66545. mov rdx, QWORD PTR [r9]
  66546. mulx rcx, rax, rdx
  66547. mov QWORD PTR [rbp], rax
  66548. adox r11, r11
  66549. adcx r11, rcx
  66550. mov QWORD PTR [rbp+8], r11
  66551. mov r10, QWORD PTR [rbp+16]
  66552. mov r11, QWORD PTR [rbp+24]
  66553. ; A[1] x A[1]
  66554. mov rdx, QWORD PTR [r9+8]
  66555. mulx rcx, rax, rdx
  66556. adox r10, r10
  66557. adox r11, r11
  66558. adcx r10, rax
  66559. adcx r11, rcx
  66560. mov QWORD PTR [rbp+16], r10
  66561. mov QWORD PTR [rbp+24], r11
  66562. ; A[2] x A[2]
  66563. mov rdx, QWORD PTR [r9+16]
  66564. mulx rcx, rax, rdx
  66565. adox r14, r14
  66566. adox r15, r15
  66567. adcx r14, rax
  66568. adcx r15, rcx
  66569. ; A[3] x A[3]
  66570. mov rdx, QWORD PTR [r9+24]
  66571. mulx rcx, rax, rdx
  66572. adox rdi, rdi
  66573. adox rsi, rsi
  66574. adcx rdi, rax
  66575. adcx rsi, rcx
  66576. mov r11, QWORD PTR [rsp]
  66577. ; A[4] x A[4]
  66578. mov rdx, QWORD PTR [r9+32]
  66579. mulx rcx, rax, rdx
  66580. adox rbx, rbx
  66581. adox r11, r11
  66582. adcx rbx, rax
  66583. adcx r11, rcx
  66584. mov QWORD PTR [rsp], r11
  66585. mov r10, QWORD PTR [rsp+8]
  66586. mov r11, QWORD PTR [rsp+16]
  66587. ; A[5] x A[5]
  66588. mov rdx, QWORD PTR [r9+40]
  66589. mulx rcx, rax, rdx
  66590. adox r10, r10
  66591. adox r11, r11
  66592. adcx r10, rax
  66593. adcx r11, rcx
  66594. mov QWORD PTR [rsp+8], r10
  66595. mov QWORD PTR [rsp+16], r11
  66596. mov r10, QWORD PTR [rsp+24]
  66597. mov r11, QWORD PTR [rsp+32]
  66598. ; A[6] x A[6]
  66599. mov rdx, QWORD PTR [r9+48]
  66600. mulx rcx, rax, rdx
  66601. adox r10, r10
  66602. adox r11, r11
  66603. adcx r10, rax
  66604. adcx r11, rcx
  66605. mov QWORD PTR [rsp+24], r10
  66606. mov QWORD PTR [rsp+32], r11
  66607. mov r10, QWORD PTR [rsp+40]
  66608. mov r11, QWORD PTR [rsp+48]
  66609. ; A[7] x A[7]
  66610. mov rdx, QWORD PTR [r9+56]
  66611. mulx rcx, rax, rdx
  66612. adox r10, r10
  66613. adox r11, r11
  66614. adcx r10, rax
  66615. adcx r11, rcx
  66616. mov QWORD PTR [rsp+40], r10
  66617. mov QWORD PTR [rsp+48], r11
  66618. mov r10, QWORD PTR [rsp+56]
  66619. mov r11, QWORD PTR [rsp+64]
  66620. ; A[8] x A[8]
  66621. mov rdx, QWORD PTR [r9+64]
  66622. mulx rcx, rax, rdx
  66623. adox r10, r10
  66624. adox r11, r11
  66625. adcx r10, rax
  66626. adcx r11, rcx
  66627. mov QWORD PTR [rsp+56], r10
  66628. mov QWORD PTR [rsp+64], r11
  66629. mov QWORD PTR [rsp+-40], r14
  66630. mov QWORD PTR [rsp+-32], r15
  66631. mov QWORD PTR [rsp+-24], rdi
  66632. mov QWORD PTR [rsp+-16], rsi
  66633. mov QWORD PTR [rsp+-8], rbx
  66634. mov r10, QWORD PTR [rsp+-8]
  66635. mov r11, QWORD PTR [rsp]
  66636. mov r14, QWORD PTR [rsp+8]
  66637. mov rcx, r10
  66638. and rcx, 511
  66639. mov r15, QWORD PTR [rsp+16]
  66640. mov rdi, QWORD PTR [rsp+24]
  66641. mov rsi, QWORD PTR [rsp+32]
  66642. mov rbx, QWORD PTR [rsp+40]
  66643. mov rdx, QWORD PTR [rsp+48]
  66644. mov rax, QWORD PTR [rsp+56]
  66645. sub rsp, 72
  66646. shrd r10, r11, 9
  66647. shrd r11, r14, 9
  66648. shrd r14, r15, 9
  66649. shrd r15, rdi, 9
  66650. shrd rdi, rsi, 9
  66651. shrd rsi, rbx, 9
  66652. shrd rbx, rdx, 9
  66653. shrd rdx, rax, 9
  66654. shr rax, 9
  66655. add r10, QWORD PTR [rsp]
  66656. adc r11, QWORD PTR [rsp+8]
  66657. adc r14, QWORD PTR [rsp+16]
  66658. adc r15, QWORD PTR [rsp+24]
  66659. adc rdi, QWORD PTR [rsp+32]
  66660. adc rsi, QWORD PTR [rsp+40]
  66661. adc rbx, QWORD PTR [rsp+48]
  66662. adc rdx, QWORD PTR [rsp+56]
  66663. adc rcx, rax
  66664. mov rax, rcx
  66665. shr rcx, 9
  66666. and rax, 511
  66667. add r10, rcx
  66668. adc r11, 0
  66669. adc r14, 0
  66670. adc r15, 0
  66671. adc rdi, 0
  66672. adc rsi, 0
  66673. adc rbx, 0
  66674. adc rdx, 0
  66675. adc rax, 0
  66676. mov QWORD PTR [r8], r10
  66677. mov QWORD PTR [r8+8], r11
  66678. mov QWORD PTR [r8+16], r14
  66679. mov QWORD PTR [r8+24], r15
  66680. mov QWORD PTR [r8+32], rdi
  66681. mov QWORD PTR [r8+40], rsi
  66682. mov QWORD PTR [r8+48], rbx
  66683. mov QWORD PTR [r8+56], rdx
  66684. mov QWORD PTR [r8+64], rax
  66685. add rsp, 144
  66686. pop rbx
  66687. pop rsi
  66688. pop rdi
  66689. pop r15
  66690. pop r14
  66691. pop r13
  66692. pop r12
  66693. pop rbp
  66694. ret
  66695. sp_521_mont_sqr_avx2_9 ENDP
  66696. _text ENDS
  66697. ENDIF
  66698. IFDEF HAVE_INTEL_AVX2
  66699. ; /* Conditionally subtract b from a using the mask m.
  66700. ; * m is -1 to subtract and 0 when not copying.
  66701. ; *
  66702. ; * r A single precision number representing condition subtract result.
  66703. ; * a A single precision number to subtract from.
  66704. ; * b A single precision number to subtract.
  66705. ; * m Mask value to apply.
  66706. ; */
  66707. _text SEGMENT READONLY PARA
  66708. sp_521_cond_sub_avx2_9 PROC
  66709. push r12
  66710. mov r12, QWORD PTR [r8]
  66711. mov r10, QWORD PTR [rdx]
  66712. pext r12, r12, r9
  66713. sub r10, r12
  66714. mov r12, QWORD PTR [r8+8]
  66715. mov r11, QWORD PTR [rdx+8]
  66716. pext r12, r12, r9
  66717. mov QWORD PTR [rcx], r10
  66718. sbb r11, r12
  66719. mov r10, QWORD PTR [r8+16]
  66720. mov r12, QWORD PTR [rdx+16]
  66721. pext r10, r10, r9
  66722. mov QWORD PTR [rcx+8], r11
  66723. sbb r12, r10
  66724. mov r11, QWORD PTR [r8+24]
  66725. mov r10, QWORD PTR [rdx+24]
  66726. pext r11, r11, r9
  66727. mov QWORD PTR [rcx+16], r12
  66728. sbb r10, r11
  66729. mov r12, QWORD PTR [r8+32]
  66730. mov r11, QWORD PTR [rdx+32]
  66731. pext r12, r12, r9
  66732. mov QWORD PTR [rcx+24], r10
  66733. sbb r11, r12
  66734. mov r10, QWORD PTR [r8+40]
  66735. mov r12, QWORD PTR [rdx+40]
  66736. pext r10, r10, r9
  66737. mov QWORD PTR [rcx+32], r11
  66738. sbb r12, r10
  66739. mov r11, QWORD PTR [r8+48]
  66740. mov r10, QWORD PTR [rdx+48]
  66741. pext r11, r11, r9
  66742. mov QWORD PTR [rcx+40], r12
  66743. sbb r10, r11
  66744. mov r12, QWORD PTR [r8+56]
  66745. mov r11, QWORD PTR [rdx+56]
  66746. pext r12, r12, r9
  66747. mov QWORD PTR [rcx+48], r10
  66748. sbb r11, r12
  66749. mov r10, QWORD PTR [r8+64]
  66750. mov r12, QWORD PTR [rdx+64]
  66751. pext r10, r10, r9
  66752. mov QWORD PTR [rcx+56], r11
  66753. sbb r12, r10
  66754. mov QWORD PTR [rcx+64], r12
  66755. sbb rax, rax
  66756. pop r12
  66757. ret
  66758. sp_521_cond_sub_avx2_9 ENDP
  66759. _text ENDS
  66760. ENDIF
  66761. IFDEF HAVE_INTEL_AVX2
  66762. ; /* Reduce the number back to 521 bits using Montgomery reduction.
  66763. ; *
  66764. ; * a A single precision number to reduce in place.
  66765. ; * m The single precision number representing the modulus.
  66766. ; * mp The digit representing the negative inverse of m mod 2^n.
  66767. ; */
  66768. _text SEGMENT READONLY PARA
  66769. sp_521_mont_reduce_order_avx2_9 PROC
  66770. push r12
  66771. push r13
  66772. push r14
  66773. push r15
  66774. push rdi
  66775. push rsi
  66776. push rbx
  66777. push rbp
  66778. mov r9, rcx
  66779. mov r10, rdx
  66780. xor rbp, rbp
  66781. ; i = 9
  66782. mov r11, 8
  66783. mov r14, QWORD PTR [r9]
  66784. mov r15, QWORD PTR [r9+8]
  66785. mov rdi, QWORD PTR [r9+16]
  66786. mov rsi, QWORD PTR [r9+24]
  66787. add r9, 32
  66788. xor rbp, rbp
  66789. L_521_mont_reduce_order_avx2_9_loop:
  66790. ; mu = a[i] * mp
  66791. mov rdx, r14
  66792. mov r12, r14
  66793. imul rdx, r8
  66794. xor rbx, rbx
  66795. ; a[i+0] += m[0] * mu
  66796. mulx rcx, rax, QWORD PTR [r10]
  66797. mov r14, r15
  66798. adcx r12, rax
  66799. adox r14, rcx
  66800. mov QWORD PTR [r9+-32], r12
  66801. ; a[i+1] += m[1] * mu
  66802. mulx rcx, rax, QWORD PTR [r10+8]
  66803. mov r15, rdi
  66804. adcx r14, rax
  66805. adox r15, rcx
  66806. ; a[i+2] += m[2] * mu
  66807. mulx rcx, rax, QWORD PTR [r10+16]
  66808. mov rdi, rsi
  66809. adcx r15, rax
  66810. adox rdi, rcx
  66811. ; a[i+3] += m[3] * mu
  66812. mulx rcx, rax, QWORD PTR [r10+24]
  66813. mov rsi, QWORD PTR [r9]
  66814. adcx rdi, rax
  66815. adox rsi, rcx
  66816. ; a[i+4] += m[4] * mu
  66817. mulx rcx, rax, QWORD PTR [r10+32]
  66818. mov r13, QWORD PTR [r9+8]
  66819. adcx rsi, rax
  66820. adox r13, rcx
  66821. ; a[i+5] += m[5] * mu
  66822. mulx rcx, rax, QWORD PTR [r10+40]
  66823. mov r12, QWORD PTR [r9+16]
  66824. adcx r13, rax
  66825. adox r12, rcx
  66826. mov QWORD PTR [r9+8], r13
  66827. ; a[i+6] += m[6] * mu
  66828. mulx rcx, rax, QWORD PTR [r10+48]
  66829. mov r13, QWORD PTR [r9+24]
  66830. adcx r12, rax
  66831. adox r13, rcx
  66832. mov QWORD PTR [r9+16], r12
  66833. ; a[i+7] += m[7] * mu
  66834. mulx rcx, rax, QWORD PTR [r10+56]
  66835. mov r12, QWORD PTR [r9+32]
  66836. adcx r13, rax
  66837. adox r12, rcx
  66838. mov QWORD PTR [r9+24], r13
  66839. ; a[i+8] += m[8] * mu
  66840. mulx rcx, rax, QWORD PTR [r10+64]
  66841. mov r13, QWORD PTR [r9+40]
  66842. adcx r12, rax
  66843. adox r13, rcx
  66844. mov QWORD PTR [r9+32], r12
  66845. adcx r13, rbp
  66846. mov rbp, rbx
  66847. mov QWORD PTR [r9+40], r13
  66848. adox rbp, rbx
  66849. adcx rbp, rbx
  66850. ; mu = a[i] * mp
  66851. mov rdx, r14
  66852. mov r13, r14
  66853. imul rdx, r8
  66854. xor rbx, rbx
  66855. ; a[i+0] += m[0] * mu
  66856. mulx rcx, rax, QWORD PTR [r10]
  66857. mov r14, r15
  66858. adcx r13, rax
  66859. adox r14, rcx
  66860. mov QWORD PTR [r9+-24], r13
  66861. ; a[i+1] += m[1] * mu
  66862. mulx rcx, rax, QWORD PTR [r10+8]
  66863. mov r15, rdi
  66864. adcx r14, rax
  66865. adox r15, rcx
  66866. ; a[i+2] += m[2] * mu
  66867. mulx rcx, rax, QWORD PTR [r10+16]
  66868. mov rdi, rsi
  66869. adcx r15, rax
  66870. adox rdi, rcx
  66871. ; a[i+3] += m[3] * mu
  66872. mulx rcx, rax, QWORD PTR [r10+24]
  66873. mov rsi, QWORD PTR [r9+8]
  66874. adcx rdi, rax
  66875. adox rsi, rcx
  66876. ; a[i+4] += m[4] * mu
  66877. mulx rcx, rax, QWORD PTR [r10+32]
  66878. mov r12, QWORD PTR [r9+16]
  66879. adcx rsi, rax
  66880. adox r12, rcx
  66881. ; a[i+5] += m[5] * mu
  66882. mulx rcx, rax, QWORD PTR [r10+40]
  66883. mov r13, QWORD PTR [r9+24]
  66884. adcx r12, rax
  66885. adox r13, rcx
  66886. mov QWORD PTR [r9+16], r12
  66887. ; a[i+6] += m[6] * mu
  66888. mulx rcx, rax, QWORD PTR [r10+48]
  66889. mov r12, QWORD PTR [r9+32]
  66890. adcx r13, rax
  66891. adox r12, rcx
  66892. mov QWORD PTR [r9+24], r13
  66893. ; a[i+7] += m[7] * mu
  66894. mulx rcx, rax, QWORD PTR [r10+56]
  66895. mov r13, QWORD PTR [r9+40]
  66896. adcx r12, rax
  66897. adox r13, rcx
  66898. mov QWORD PTR [r9+32], r12
  66899. ; a[i+8] += m[8] * mu
  66900. mulx rcx, rax, QWORD PTR [r10+64]
  66901. mov r12, QWORD PTR [r9+48]
  66902. adcx r13, rax
  66903. adox r12, rcx
  66904. mov QWORD PTR [r9+40], r13
  66905. adcx r12, rbp
  66906. mov rbp, rbx
  66907. mov QWORD PTR [r9+48], r12
  66908. adox rbp, rbx
  66909. adcx rbp, rbx
  66910. ; a += 2
  66911. add r9, 16
  66912. ; i -= 2
  66913. sub r11, 2
  66914. jnz L_521_mont_reduce_order_avx2_9_loop
  66915. ; mu = a[i] * mp
  66916. mov rdx, r14
  66917. mov r12, r14
  66918. imul rdx, r8
  66919. and rdx, 511
  66920. xor rbx, rbx
  66921. ; a[i+0] += m[0] * mu
  66922. mulx rcx, rax, QWORD PTR [r10]
  66923. mov r14, r15
  66924. adcx r12, rax
  66925. adox r14, rcx
  66926. mov QWORD PTR [r9+-32], r12
  66927. ; a[i+1] += m[1] * mu
  66928. mulx rcx, rax, QWORD PTR [r10+8]
  66929. mov r15, rdi
  66930. adcx r14, rax
  66931. adox r15, rcx
  66932. ; a[i+2] += m[2] * mu
  66933. mulx rcx, rax, QWORD PTR [r10+16]
  66934. mov rdi, rsi
  66935. adcx r15, rax
  66936. adox rdi, rcx
  66937. ; a[i+3] += m[3] * mu
  66938. mulx rcx, rax, QWORD PTR [r10+24]
  66939. mov rsi, QWORD PTR [r9]
  66940. adcx rdi, rax
  66941. adox rsi, rcx
  66942. ; a[i+4] += m[4] * mu
  66943. mulx rcx, rax, QWORD PTR [r10+32]
  66944. mov r13, QWORD PTR [r9+8]
  66945. adcx rsi, rax
  66946. adox r13, rcx
  66947. ; a[i+5] += m[5] * mu
  66948. mulx rcx, rax, QWORD PTR [r10+40]
  66949. mov r12, QWORD PTR [r9+16]
  66950. adcx r13, rax
  66951. adox r12, rcx
  66952. mov QWORD PTR [r9+8], r13
  66953. ; a[i+6] += m[6] * mu
  66954. mulx rcx, rax, QWORD PTR [r10+48]
  66955. mov r13, QWORD PTR [r9+24]
  66956. adcx r12, rax
  66957. adox r13, rcx
  66958. mov QWORD PTR [r9+16], r12
  66959. ; a[i+7] += m[7] * mu
  66960. mulx rcx, rax, QWORD PTR [r10+56]
  66961. mov r12, QWORD PTR [r9+32]
  66962. adcx r13, rax
  66963. adox r12, rcx
  66964. mov QWORD PTR [r9+24], r13
  66965. ; a[i+8] += m[8] * mu
  66966. mulx rcx, rax, QWORD PTR [r10+64]
  66967. mov r13, QWORD PTR [r9+40]
  66968. adcx r12, rax
  66969. adox r13, rcx
  66970. mov QWORD PTR [r9+32], r12
  66971. adcx r13, rbp
  66972. mov rbp, rbx
  66973. mov QWORD PTR [r9+40], r13
  66974. adox rbp, rbx
  66975. ; a += 1
  66976. add r9, 8
  66977. mov QWORD PTR [r9+-32], r14
  66978. mov QWORD PTR [r9+-24], r15
  66979. mov QWORD PTR [r9+-16], rdi
  66980. mov QWORD PTR [r9+-8], rsi
  66981. sub r9, 32
  66982. lea r8, QWORD PTR [r9+-8]
  66983. sub r9, 72
  66984. mov r12, QWORD PTR [r8]
  66985. mov r14, QWORD PTR [r8+8]
  66986. mov r15, QWORD PTR [r8+16]
  66987. mov rdi, QWORD PTR [r8+24]
  66988. mov r13, QWORD PTR [r8+32]
  66989. shrd r12, r14, 9
  66990. shrd r14, r15, 9
  66991. shrd r15, rdi, 9
  66992. shrd rdi, r13, 9
  66993. mov QWORD PTR [r9], r12
  66994. mov QWORD PTR [r9+8], r14
  66995. mov QWORD PTR [r9+16], r15
  66996. mov QWORD PTR [r9+24], rdi
  66997. mov r14, QWORD PTR [r8+40]
  66998. mov r15, QWORD PTR [r8+48]
  66999. mov rdi, QWORD PTR [r8+56]
  67000. mov r12, QWORD PTR [r8+64]
  67001. shrd r13, r14, 9
  67002. shrd r14, r15, 9
  67003. shrd r15, rdi, 9
  67004. shrd rdi, r12, 9
  67005. mov QWORD PTR [r9+32], r13
  67006. mov QWORD PTR [r9+40], r14
  67007. mov QWORD PTR [r9+48], r15
  67008. mov QWORD PTR [r9+56], rdi
  67009. mov r14, QWORD PTR [r8+72]
  67010. shrd r12, r14, 9
  67011. shr r14, 9
  67012. mov QWORD PTR [r9+64], r12
  67013. mov QWORD PTR [r9+72], r14
  67014. mov rbp, QWORD PTR [r9+64]
  67015. shr rbp, 9
  67016. neg rbp
  67017. mov rcx, QWORD PTR [r10]
  67018. mov rdx, QWORD PTR [r9]
  67019. pext rcx, rcx, rbp
  67020. sub rdx, rcx
  67021. mov rcx, QWORD PTR [r10+8]
  67022. mov rax, QWORD PTR [r9+8]
  67023. pext rcx, rcx, rbp
  67024. mov QWORD PTR [r9], rdx
  67025. sbb rax, rcx
  67026. mov rdx, QWORD PTR [r10+16]
  67027. mov rcx, QWORD PTR [r9+16]
  67028. pext rdx, rdx, rbp
  67029. mov QWORD PTR [r9+8], rax
  67030. sbb rcx, rdx
  67031. mov rax, QWORD PTR [r10+24]
  67032. mov rdx, QWORD PTR [r9+24]
  67033. pext rax, rax, rbp
  67034. mov QWORD PTR [r9+16], rcx
  67035. sbb rdx, rax
  67036. mov rcx, QWORD PTR [r10+32]
  67037. mov rax, QWORD PTR [r9+32]
  67038. pext rcx, rcx, rbp
  67039. mov QWORD PTR [r9+24], rdx
  67040. sbb rax, rcx
  67041. mov rdx, QWORD PTR [r10+40]
  67042. mov rcx, QWORD PTR [r9+40]
  67043. pext rdx, rdx, rbp
  67044. mov QWORD PTR [r9+32], rax
  67045. sbb rcx, rdx
  67046. mov rax, QWORD PTR [r10+48]
  67047. mov rdx, QWORD PTR [r9+48]
  67048. pext rax, rax, rbp
  67049. mov QWORD PTR [r9+40], rcx
  67050. sbb rdx, rax
  67051. mov rcx, QWORD PTR [r10+56]
  67052. mov rax, QWORD PTR [r9+56]
  67053. pext rcx, rcx, rbp
  67054. mov QWORD PTR [r9+48], rdx
  67055. sbb rax, rcx
  67056. mov rdx, QWORD PTR [r10+64]
  67057. mov rcx, QWORD PTR [r9+64]
  67058. pext rdx, rdx, rbp
  67059. mov QWORD PTR [r9+56], rax
  67060. sbb rcx, rdx
  67061. mov QWORD PTR [r9+64], rcx
  67062. pop rbp
  67063. pop rbx
  67064. pop rsi
  67065. pop rdi
  67066. pop r15
  67067. pop r14
  67068. pop r13
  67069. pop r12
  67070. ret
  67071. sp_521_mont_reduce_order_avx2_9 ENDP
  67072. _text ENDS
  67073. ENDIF
  67074. IFDEF HAVE_INTEL_AVX2
  67075. ; /* Divide the number by 2 mod the modulus (prime). (r = a / 2 % m)
  67076. ; *
  67077. ; * r Result of division by 2.
  67078. ; * a Number to divide.
  67079. ; * m Modulus (prime).
  67080. ; */
  67081. _text SEGMENT READONLY PARA
  67082. sp_521_div2_avx2_9 PROC
  67083. push r12
  67084. push r13
  67085. push r14
  67086. push r15
  67087. push rdi
  67088. mov rax, QWORD PTR [rdx]
  67089. mov r8, QWORD PTR [rdx+8]
  67090. mov r9, QWORD PTR [rdx+16]
  67091. mov r10, QWORD PTR [rdx+24]
  67092. mov r11, QWORD PTR [rdx+32]
  67093. mov r12, QWORD PTR [rdx+40]
  67094. mov r13, QWORD PTR [rdx+48]
  67095. mov r14, QWORD PTR [rdx+56]
  67096. mov r15, QWORD PTR [rdx+64]
  67097. mov rdi, rax
  67098. and rdi, 1
  67099. sub rax, rdi
  67100. sbb r8, 0
  67101. sbb r9, 0
  67102. sbb r10, 0
  67103. sbb r11, 0
  67104. sbb r12, 0
  67105. sbb r13, 0
  67106. sbb r14, 0
  67107. sbb r15, 0
  67108. shl rdi, 9
  67109. add r15, rdi
  67110. shrd rax, r8, 1
  67111. shrd r8, r9, 1
  67112. shrd r9, r10, 1
  67113. shrd r10, r11, 1
  67114. shrd r11, r12, 1
  67115. shrd r12, r13, 1
  67116. shrd r13, r14, 1
  67117. shrd r14, r15, 1
  67118. shr r15, 1
  67119. mov QWORD PTR [rcx], rax
  67120. mov QWORD PTR [rcx+8], r8
  67121. mov QWORD PTR [rcx+16], r9
  67122. mov QWORD PTR [rcx+24], r10
  67123. mov QWORD PTR [rcx+32], r11
  67124. mov QWORD PTR [rcx+40], r12
  67125. mov QWORD PTR [rcx+48], r13
  67126. mov QWORD PTR [rcx+56], r14
  67127. mov QWORD PTR [rcx+64], r15
  67128. pop rdi
  67129. pop r15
  67130. pop r14
  67131. pop r13
  67132. pop r12
  67133. ret
  67134. sp_521_div2_avx2_9 ENDP
  67135. _text ENDS
  67136. ENDIF
  67137. IFNDEF WC_NO_CACHE_RESISTANT
  67138. ; /* Touch each possible entry that could be being copied.
  67139. ; *
  67140. ; * r Point to copy into.
  67141. ; * table Table - start of the entires to access
  67142. ; * idx Index of entry to retrieve.
  67143. ; */
  67144. _text SEGMENT READONLY PARA
  67145. sp_521_get_entry_64_9 PROC
  67146. push r12
  67147. sub rsp, 160
  67148. vmovdqu OWORD PTR [rsp], xmm6
  67149. vmovdqu OWORD PTR [rsp+16], xmm7
  67150. vmovdqu OWORD PTR [rsp+32], xmm8
  67151. vmovdqu OWORD PTR [rsp+48], xmm9
  67152. vmovdqu OWORD PTR [rsp+64], xmm10
  67153. vmovdqu OWORD PTR [rsp+80], xmm11
  67154. vmovdqu OWORD PTR [rsp+96], xmm12
  67155. vmovdqu OWORD PTR [rsp+112], xmm13
  67156. vmovdqu OWORD PTR [rsp+128], xmm14
  67157. vmovdqu OWORD PTR [rsp+144], xmm15
  67158. ; From entry 1
  67159. mov r12, 1
  67160. mov rax, 1
  67161. movd xmm13, r8d
  67162. add rdx, 144
  67163. movd xmm15, eax
  67164. mov rax, 63
  67165. pshufd xmm15, xmm15, 0
  67166. pshufd xmm13, xmm13, 0
  67167. pxor xmm14, xmm14
  67168. pxor xmm0, xmm0
  67169. pxor xmm1, xmm1
  67170. pxor xmm2, xmm2
  67171. pxor xmm3, xmm3
  67172. xor r11, r11
  67173. movdqa xmm14, xmm15
  67174. L_521_get_entry_64_9_start_0:
  67175. movdqa xmm12, xmm14
  67176. paddd xmm14, xmm15
  67177. pcmpeqd xmm12, xmm13
  67178. xor r9, r9
  67179. cmp r8, r12
  67180. sete r9b
  67181. neg r9
  67182. inc r12
  67183. movdqu xmm4, [rdx]
  67184. movdqu xmm5, [rdx+16]
  67185. movdqu xmm6, [rdx+32]
  67186. movdqu xmm7, [rdx+48]
  67187. mov r10, QWORD PTR [rdx+64]
  67188. add rdx, 144
  67189. pand xmm4, xmm12
  67190. pand xmm5, xmm12
  67191. pand xmm6, xmm12
  67192. pand xmm7, xmm12
  67193. and r10, r9
  67194. por xmm0, xmm4
  67195. por xmm1, xmm5
  67196. por xmm2, xmm6
  67197. por xmm3, xmm7
  67198. or r11, r10
  67199. dec rax
  67200. jnz L_521_get_entry_64_9_start_0
  67201. movdqu [rcx], xmm0
  67202. movdqu [rcx+16], xmm1
  67203. movdqu [rcx+32], xmm2
  67204. movdqu [rcx+48], xmm3
  67205. mov QWORD PTR [rcx+64], r11
  67206. ; From entry 1
  67207. mov r12, 1
  67208. mov rax, 1
  67209. movd xmm13, r8d
  67210. sub rdx, 9000
  67211. movd xmm15, eax
  67212. mov rax, 63
  67213. pshufd xmm15, xmm15, 0
  67214. pshufd xmm13, xmm13, 0
  67215. pxor xmm14, xmm14
  67216. pxor xmm0, xmm0
  67217. pxor xmm1, xmm1
  67218. pxor xmm2, xmm2
  67219. pxor xmm3, xmm3
  67220. xor r11, r11
  67221. movdqa xmm14, xmm15
  67222. L_521_get_entry_64_9_start_1:
  67223. movdqa xmm12, xmm14
  67224. paddd xmm14, xmm15
  67225. pcmpeqd xmm12, xmm13
  67226. xor r9, r9
  67227. cmp r8, r12
  67228. sete r9b
  67229. neg r9
  67230. inc r12
  67231. movdqu xmm4, [rdx]
  67232. movdqu xmm5, [rdx+16]
  67233. movdqu xmm6, [rdx+32]
  67234. movdqu xmm7, [rdx+48]
  67235. mov r10, QWORD PTR [rdx+64]
  67236. add rdx, 144
  67237. pand xmm4, xmm12
  67238. pand xmm5, xmm12
  67239. pand xmm6, xmm12
  67240. pand xmm7, xmm12
  67241. and r10, r9
  67242. por xmm0, xmm4
  67243. por xmm1, xmm5
  67244. por xmm2, xmm6
  67245. por xmm3, xmm7
  67246. or r11, r10
  67247. dec rax
  67248. jnz L_521_get_entry_64_9_start_1
  67249. movdqu [rcx+144], xmm0
  67250. movdqu [rcx+160], xmm1
  67251. movdqu [rcx+176], xmm2
  67252. movdqu [rcx+192], xmm3
  67253. mov QWORD PTR [rcx+208], r11
  67254. vmovdqu xmm6, OWORD PTR [rsp]
  67255. vmovdqu xmm7, OWORD PTR [rsp+16]
  67256. vmovdqu xmm8, OWORD PTR [rsp+32]
  67257. vmovdqu xmm9, OWORD PTR [rsp+48]
  67258. vmovdqu xmm10, OWORD PTR [rsp+64]
  67259. vmovdqu xmm11, OWORD PTR [rsp+80]
  67260. vmovdqu xmm12, OWORD PTR [rsp+96]
  67261. vmovdqu xmm13, OWORD PTR [rsp+112]
  67262. vmovdqu xmm14, OWORD PTR [rsp+128]
  67263. vmovdqu xmm15, OWORD PTR [rsp+144]
  67264. add rsp, 160
  67265. pop r12
  67266. ret
  67267. sp_521_get_entry_64_9 ENDP
  67268. _text ENDS
  67269. IFDEF HAVE_INTEL_AVX2
  67270. ; /* Touch each possible entry that could be being copied.
  67271. ; *
  67272. ; * r Point to copy into.
  67273. ; * table Table - start of the entires to access
  67274. ; * idx Index of entry to retrieve.
  67275. ; */
  67276. _text SEGMENT READONLY PARA
  67277. sp_521_get_entry_64_avx2_9 PROC
  67278. push r12
  67279. push r13
  67280. push r14
  67281. sub rsp, 96
  67282. vmovdqu OWORD PTR [rsp], xmm6
  67283. vmovdqu OWORD PTR [rsp+16], xmm7
  67284. vmovdqu OWORD PTR [rsp+32], xmm8
  67285. vmovdqu OWORD PTR [rsp+48], xmm9
  67286. vmovdqu OWORD PTR [rsp+64], xmm10
  67287. vmovdqu OWORD PTR [rsp+80], xmm11
  67288. mov r14, 1
  67289. mov rax, 1
  67290. movd xmm9, r8d
  67291. add rdx, 144
  67292. movd xmm11, eax
  67293. mov rax, 64
  67294. vpxor ymm10, ymm10, ymm10
  67295. vpermd ymm9, ymm10, ymm9
  67296. vpermd ymm11, ymm10, ymm11
  67297. vpxor ymm0, ymm0, ymm0
  67298. vpxor ymm1, ymm1, ymm1
  67299. vpxor ymm2, ymm2, ymm2
  67300. vpxor ymm3, ymm3, ymm3
  67301. xor r10, r10
  67302. xor r11, r11
  67303. vmovdqa ymm10, ymm11
  67304. L_521_get_entry_64_avx2_9_start:
  67305. vpcmpeqd ymm8, ymm10, ymm9
  67306. vpaddd ymm10, ymm10, ymm11
  67307. xor r9, r9
  67308. cmp r8, r14
  67309. sete r9b
  67310. neg r9
  67311. inc r14
  67312. vmovupd ymm4, YMMWORD PTR [rdx]
  67313. vmovupd ymm5, YMMWORD PTR [rdx+32]
  67314. vmovupd ymm6, YMMWORD PTR [rdx+72]
  67315. vmovupd ymm7, YMMWORD PTR [rdx+104]
  67316. mov r12, QWORD PTR [rdx+64]
  67317. mov r13, QWORD PTR [rdx+136]
  67318. add rdx, 144
  67319. vpand ymm4, ymm4, ymm8
  67320. vpand ymm5, ymm5, ymm8
  67321. vpand ymm6, ymm6, ymm8
  67322. vpand ymm7, ymm7, ymm8
  67323. and r12, r9
  67324. and r13, r9
  67325. vpor ymm0, ymm0, ymm4
  67326. vpor ymm1, ymm1, ymm5
  67327. vpor ymm2, ymm2, ymm6
  67328. vpor ymm3, ymm3, ymm7
  67329. or r10, r12
  67330. or r11, r13
  67331. dec rax
  67332. jnz L_521_get_entry_64_avx2_9_start
  67333. vmovupd YMMWORD PTR [rcx], ymm0
  67334. vmovupd YMMWORD PTR [rcx+32], ymm1
  67335. vmovupd YMMWORD PTR [rcx+144], ymm2
  67336. vmovupd YMMWORD PTR [rcx+176], ymm3
  67337. mov QWORD PTR [rcx+64], r10
  67338. mov QWORD PTR [rcx+208], r11
  67339. vmovdqu xmm6, OWORD PTR [rsp]
  67340. vmovdqu xmm7, OWORD PTR [rsp+16]
  67341. vmovdqu xmm8, OWORD PTR [rsp+32]
  67342. vmovdqu xmm9, OWORD PTR [rsp+48]
  67343. vmovdqu xmm10, OWORD PTR [rsp+64]
  67344. vmovdqu xmm11, OWORD PTR [rsp+80]
  67345. add rsp, 96
  67346. pop r14
  67347. pop r13
  67348. pop r12
  67349. ret
  67350. sp_521_get_entry_64_avx2_9 ENDP
  67351. _text ENDS
  67352. ENDIF
  67353. ENDIF
  67354. IFNDEF WC_NO_CACHE_RESISTANT
  67355. ; /* Touch each possible entry that could be being copied.
  67356. ; *
  67357. ; * r Point to copy into.
  67358. ; * table Table - start of the entires to access
  67359. ; * idx Index of entry to retrieve.
  67360. ; */
  67361. _text SEGMENT READONLY PARA
  67362. sp_521_get_entry_65_9 PROC
  67363. push r12
  67364. sub rsp, 160
  67365. vmovdqu OWORD PTR [rsp], xmm6
  67366. vmovdqu OWORD PTR [rsp+16], xmm7
  67367. vmovdqu OWORD PTR [rsp+32], xmm8
  67368. vmovdqu OWORD PTR [rsp+48], xmm9
  67369. vmovdqu OWORD PTR [rsp+64], xmm10
  67370. vmovdqu OWORD PTR [rsp+80], xmm11
  67371. vmovdqu OWORD PTR [rsp+96], xmm12
  67372. vmovdqu OWORD PTR [rsp+112], xmm13
  67373. vmovdqu OWORD PTR [rsp+128], xmm14
  67374. vmovdqu OWORD PTR [rsp+144], xmm15
  67375. ; From entry 1
  67376. mov r12, 1
  67377. mov rax, 1
  67378. movd xmm13, r8d
  67379. add rdx, 144
  67380. movd xmm15, eax
  67381. mov rax, 64
  67382. pshufd xmm15, xmm15, 0
  67383. pshufd xmm13, xmm13, 0
  67384. pxor xmm14, xmm14
  67385. pxor xmm0, xmm0
  67386. pxor xmm1, xmm1
  67387. pxor xmm2, xmm2
  67388. pxor xmm3, xmm3
  67389. xor r11, r11
  67390. movdqa xmm14, xmm15
  67391. L_521_get_entry_65_9_start_0:
  67392. movdqa xmm12, xmm14
  67393. paddd xmm14, xmm15
  67394. pcmpeqd xmm12, xmm13
  67395. xor r9, r9
  67396. cmp r8, r12
  67397. sete r9b
  67398. neg r9
  67399. inc r12
  67400. movdqu xmm4, [rdx]
  67401. movdqu xmm5, [rdx+16]
  67402. movdqu xmm6, [rdx+32]
  67403. movdqu xmm7, [rdx+48]
  67404. mov r10, QWORD PTR [rdx+64]
  67405. add rdx, 144
  67406. pand xmm4, xmm12
  67407. pand xmm5, xmm12
  67408. pand xmm6, xmm12
  67409. pand xmm7, xmm12
  67410. and r10, r9
  67411. por xmm0, xmm4
  67412. por xmm1, xmm5
  67413. por xmm2, xmm6
  67414. por xmm3, xmm7
  67415. or r11, r10
  67416. dec rax
  67417. jnz L_521_get_entry_65_9_start_0
  67418. movdqu [rcx], xmm0
  67419. movdqu [rcx+16], xmm1
  67420. movdqu [rcx+32], xmm2
  67421. movdqu [rcx+48], xmm3
  67422. mov QWORD PTR [rcx+64], r11
  67423. ; From entry 1
  67424. mov r12, 1
  67425. mov rax, 1
  67426. movd xmm13, r8d
  67427. sub rdx, 9144
  67428. movd xmm15, eax
  67429. mov rax, 64
  67430. pshufd xmm15, xmm15, 0
  67431. pshufd xmm13, xmm13, 0
  67432. pxor xmm14, xmm14
  67433. pxor xmm0, xmm0
  67434. pxor xmm1, xmm1
  67435. pxor xmm2, xmm2
  67436. pxor xmm3, xmm3
  67437. xor r11, r11
  67438. movdqa xmm14, xmm15
  67439. L_521_get_entry_65_9_start_1:
  67440. movdqa xmm12, xmm14
  67441. paddd xmm14, xmm15
  67442. pcmpeqd xmm12, xmm13
  67443. xor r9, r9
  67444. cmp r8, r12
  67445. sete r9b
  67446. neg r9
  67447. inc r12
  67448. movdqu xmm4, [rdx]
  67449. movdqu xmm5, [rdx+16]
  67450. movdqu xmm6, [rdx+32]
  67451. movdqu xmm7, [rdx+48]
  67452. mov r10, QWORD PTR [rdx+64]
  67453. add rdx, 144
  67454. pand xmm4, xmm12
  67455. pand xmm5, xmm12
  67456. pand xmm6, xmm12
  67457. pand xmm7, xmm12
  67458. and r10, r9
  67459. por xmm0, xmm4
  67460. por xmm1, xmm5
  67461. por xmm2, xmm6
  67462. por xmm3, xmm7
  67463. or r11, r10
  67464. dec rax
  67465. jnz L_521_get_entry_65_9_start_1
  67466. movdqu [rcx+144], xmm0
  67467. movdqu [rcx+160], xmm1
  67468. movdqu [rcx+176], xmm2
  67469. movdqu [rcx+192], xmm3
  67470. mov QWORD PTR [rcx+208], r11
  67471. vmovdqu xmm6, OWORD PTR [rsp]
  67472. vmovdqu xmm7, OWORD PTR [rsp+16]
  67473. vmovdqu xmm8, OWORD PTR [rsp+32]
  67474. vmovdqu xmm9, OWORD PTR [rsp+48]
  67475. vmovdqu xmm10, OWORD PTR [rsp+64]
  67476. vmovdqu xmm11, OWORD PTR [rsp+80]
  67477. vmovdqu xmm12, OWORD PTR [rsp+96]
  67478. vmovdqu xmm13, OWORD PTR [rsp+112]
  67479. vmovdqu xmm14, OWORD PTR [rsp+128]
  67480. vmovdqu xmm15, OWORD PTR [rsp+144]
  67481. add rsp, 160
  67482. pop r12
  67483. ret
  67484. sp_521_get_entry_65_9 ENDP
  67485. _text ENDS
  67486. IFDEF HAVE_INTEL_AVX2
  67487. ; /* Touch each possible entry that could be being copied.
  67488. ; *
  67489. ; * r Point to copy into.
  67490. ; * table Table - start of the entires to access
  67491. ; * idx Index of entry to retrieve.
  67492. ; */
  67493. _text SEGMENT READONLY PARA
  67494. sp_521_get_entry_65_avx2_9 PROC
  67495. push r12
  67496. push r13
  67497. push r14
  67498. sub rsp, 96
  67499. vmovdqu OWORD PTR [rsp], xmm6
  67500. vmovdqu OWORD PTR [rsp+16], xmm7
  67501. vmovdqu OWORD PTR [rsp+32], xmm8
  67502. vmovdqu OWORD PTR [rsp+48], xmm9
  67503. vmovdqu OWORD PTR [rsp+64], xmm10
  67504. vmovdqu OWORD PTR [rsp+80], xmm11
  67505. mov r14, 1
  67506. mov rax, 1
  67507. movd xmm9, r8d
  67508. add rdx, 144
  67509. movd xmm11, eax
  67510. mov rax, 65
  67511. vpxor ymm10, ymm10, ymm10
  67512. vpermd ymm9, ymm10, ymm9
  67513. vpermd ymm11, ymm10, ymm11
  67514. vpxor ymm0, ymm0, ymm0
  67515. vpxor ymm1, ymm1, ymm1
  67516. vpxor ymm2, ymm2, ymm2
  67517. vpxor ymm3, ymm3, ymm3
  67518. xor r10, r10
  67519. xor r11, r11
  67520. vmovdqa ymm10, ymm11
  67521. L_521_get_entry_65_avx2_9_start:
  67522. vpcmpeqd ymm8, ymm10, ymm9
  67523. vpaddd ymm10, ymm10, ymm11
  67524. xor r9, r9
  67525. cmp r8, r14
  67526. sete r9b
  67527. neg r9
  67528. inc r14
  67529. vmovupd ymm4, YMMWORD PTR [rdx]
  67530. vmovupd ymm5, YMMWORD PTR [rdx+32]
  67531. vmovupd ymm6, YMMWORD PTR [rdx+72]
  67532. vmovupd ymm7, YMMWORD PTR [rdx+104]
  67533. mov r12, QWORD PTR [rdx+64]
  67534. mov r13, QWORD PTR [rdx+136]
  67535. add rdx, 144
  67536. vpand ymm4, ymm4, ymm8
  67537. vpand ymm5, ymm5, ymm8
  67538. vpand ymm6, ymm6, ymm8
  67539. vpand ymm7, ymm7, ymm8
  67540. and r12, r9
  67541. and r13, r9
  67542. vpor ymm0, ymm0, ymm4
  67543. vpor ymm1, ymm1, ymm5
  67544. vpor ymm2, ymm2, ymm6
  67545. vpor ymm3, ymm3, ymm7
  67546. or r10, r12
  67547. or r11, r13
  67548. dec rax
  67549. jnz L_521_get_entry_65_avx2_9_start
  67550. vmovupd YMMWORD PTR [rcx], ymm0
  67551. vmovupd YMMWORD PTR [rcx+32], ymm1
  67552. vmovupd YMMWORD PTR [rcx+144], ymm2
  67553. vmovupd YMMWORD PTR [rcx+176], ymm3
  67554. mov QWORD PTR [rcx+64], r10
  67555. mov QWORD PTR [rcx+208], r11
  67556. vmovdqu xmm6, OWORD PTR [rsp]
  67557. vmovdqu xmm7, OWORD PTR [rsp+16]
  67558. vmovdqu xmm8, OWORD PTR [rsp+32]
  67559. vmovdqu xmm9, OWORD PTR [rsp+48]
  67560. vmovdqu xmm10, OWORD PTR [rsp+64]
  67561. vmovdqu xmm11, OWORD PTR [rsp+80]
  67562. add rsp, 96
  67563. pop r14
  67564. pop r13
  67565. pop r12
  67566. ret
  67567. sp_521_get_entry_65_avx2_9 ENDP
  67568. _text ENDS
  67569. ENDIF
  67570. ENDIF
  67571. ; /* Add 1 to a. (a = a + 1)
  67572. ; *
  67573. ; * a A single precision integer.
  67574. ; */
  67575. _text SEGMENT READONLY PARA
  67576. sp_521_add_one_9 PROC
  67577. add QWORD PTR [rcx], 1
  67578. adc QWORD PTR [rcx+8], 0
  67579. adc QWORD PTR [rcx+16], 0
  67580. adc QWORD PTR [rcx+24], 0
  67581. adc QWORD PTR [rcx+32], 0
  67582. adc QWORD PTR [rcx+40], 0
  67583. adc QWORD PTR [rcx+48], 0
  67584. adc QWORD PTR [rcx+56], 0
  67585. adc QWORD PTR [rcx+64], 0
  67586. ret
  67587. sp_521_add_one_9 ENDP
  67588. _text ENDS
  67589. ; /* Read big endian unsigned byte array into r.
  67590. ; * Uses the bswap instruction.
  67591. ; *
  67592. ; * r A single precision integer.
  67593. ; * size Maximum number of bytes to convert
  67594. ; * a Byte array.
  67595. ; * n Number of bytes in array to read.
  67596. ; */
  67597. _text SEGMENT READONLY PARA
  67598. sp_521_from_bin_bswap PROC
  67599. push r12
  67600. push r13
  67601. mov r11, r8
  67602. mov r12, rcx
  67603. add r11, r9
  67604. add r12, 65
  67605. xor r13, r13
  67606. jmp L_521_from_bin_bswap_64_end
  67607. L_521_from_bin_bswap_64_start:
  67608. sub r11, 64
  67609. mov rax, QWORD PTR [r11+56]
  67610. mov r10, QWORD PTR [r11+48]
  67611. bswap rax
  67612. bswap r10
  67613. mov QWORD PTR [rcx], rax
  67614. mov QWORD PTR [rcx+8], r10
  67615. mov rax, QWORD PTR [r11+40]
  67616. mov r10, QWORD PTR [r11+32]
  67617. bswap rax
  67618. bswap r10
  67619. mov QWORD PTR [rcx+16], rax
  67620. mov QWORD PTR [rcx+24], r10
  67621. mov rax, QWORD PTR [r11+24]
  67622. mov r10, QWORD PTR [r11+16]
  67623. bswap rax
  67624. bswap r10
  67625. mov QWORD PTR [rcx+32], rax
  67626. mov QWORD PTR [rcx+40], r10
  67627. mov rax, QWORD PTR [r11+8]
  67628. mov r10, QWORD PTR [r11]
  67629. bswap rax
  67630. bswap r10
  67631. mov QWORD PTR [rcx+48], rax
  67632. mov QWORD PTR [rcx+56], r10
  67633. add rcx, 64
  67634. sub r9, 64
  67635. L_521_from_bin_bswap_64_end:
  67636. cmp r9, 63
  67637. jg L_521_from_bin_bswap_64_start
  67638. jmp L_521_from_bin_bswap_8_end
  67639. L_521_from_bin_bswap_8_start:
  67640. sub r11, 8
  67641. mov rax, QWORD PTR [r11]
  67642. bswap rax
  67643. mov QWORD PTR [rcx], rax
  67644. add rcx, 8
  67645. sub r9, 8
  67646. L_521_from_bin_bswap_8_end:
  67647. cmp r9, 7
  67648. jg L_521_from_bin_bswap_8_start
  67649. cmp r9, r13
  67650. je L_521_from_bin_bswap_hi_end
  67651. mov r10, r13
  67652. mov rax, r13
  67653. L_521_from_bin_bswap_hi_start:
  67654. mov al, BYTE PTR [r8]
  67655. shl r10, 8
  67656. inc r8
  67657. add r10, rax
  67658. dec r9
  67659. jg L_521_from_bin_bswap_hi_start
  67660. mov QWORD PTR [rcx], r10
  67661. add rcx, 8
  67662. L_521_from_bin_bswap_hi_end:
  67663. cmp rcx, r12
  67664. jge L_521_from_bin_bswap_zero_end
  67665. L_521_from_bin_bswap_zero_start:
  67666. mov QWORD PTR [rcx], r13
  67667. add rcx, 8
  67668. cmp rcx, r12
  67669. jl L_521_from_bin_bswap_zero_start
  67670. L_521_from_bin_bswap_zero_end:
  67671. pop r13
  67672. pop r12
  67673. ret
  67674. sp_521_from_bin_bswap ENDP
  67675. _text ENDS
  67676. IFNDEF NO_MOVBE_SUPPORT
  67677. ; /* Read big endian unsigned byte array into r.
  67678. ; * Uses the movbe instruction which is an optional instruction.
  67679. ; *
  67680. ; * r A single precision integer.
  67681. ; * size Maximum number of bytes to convert
  67682. ; * a Byte array.
  67683. ; * n Number of bytes in array to read.
  67684. ; */
  67685. _text SEGMENT READONLY PARA
  67686. sp_521_from_bin_movbe PROC
  67687. push r12
  67688. mov r11, r8
  67689. mov r12, rcx
  67690. add r11, r9
  67691. add r12, 66
  67692. jmp L_521_from_bin_movbe_64_end
  67693. L_521_from_bin_movbe_64_start:
  67694. sub r11, 64
  67695. movbe rax, QWORD PTR [r11+56]
  67696. movbe r10, QWORD PTR [r11+48]
  67697. mov QWORD PTR [rcx], rax
  67698. mov QWORD PTR [rcx+8], r10
  67699. movbe rax, QWORD PTR [r11+40]
  67700. movbe r10, QWORD PTR [r11+32]
  67701. mov QWORD PTR [rcx+16], rax
  67702. mov QWORD PTR [rcx+24], r10
  67703. movbe rax, QWORD PTR [r11+24]
  67704. movbe r10, QWORD PTR [r11+16]
  67705. mov QWORD PTR [rcx+32], rax
  67706. mov QWORD PTR [rcx+40], r10
  67707. movbe rax, QWORD PTR [r11+8]
  67708. movbe r10, QWORD PTR [r11]
  67709. mov QWORD PTR [rcx+48], rax
  67710. mov QWORD PTR [rcx+56], r10
  67711. add rcx, 64
  67712. sub r9, 64
  67713. L_521_from_bin_movbe_64_end:
  67714. cmp r9, 63
  67715. jg L_521_from_bin_movbe_64_start
  67716. jmp L_521_from_bin_movbe_8_end
  67717. L_521_from_bin_movbe_8_start:
  67718. sub r11, 8
  67719. movbe rax, QWORD PTR [r11]
  67720. mov QWORD PTR [rcx], rax
  67721. add rcx, 8
  67722. sub r9, 8
  67723. L_521_from_bin_movbe_8_end:
  67724. cmp r9, 7
  67725. jg L_521_from_bin_movbe_8_start
  67726. cmp r9, 0
  67727. je L_521_from_bin_movbe_hi_end
  67728. mov r10, 0
  67729. mov rax, 0
  67730. L_521_from_bin_movbe_hi_start:
  67731. mov al, BYTE PTR [r8]
  67732. shl r10, 8
  67733. inc r8
  67734. add r10, rax
  67735. dec r9
  67736. jg L_521_from_bin_movbe_hi_start
  67737. mov QWORD PTR [rcx], r10
  67738. add rcx, 8
  67739. L_521_from_bin_movbe_hi_end:
  67740. cmp rcx, r12
  67741. jge L_521_from_bin_movbe_zero_end
  67742. L_521_from_bin_movbe_zero_start:
  67743. mov QWORD PTR [rcx], 0
  67744. add rcx, 8
  67745. cmp rcx, r12
  67746. jl L_521_from_bin_movbe_zero_start
  67747. L_521_from_bin_movbe_zero_end:
  67748. pop r12
  67749. ret
  67750. sp_521_from_bin_movbe ENDP
  67751. _text ENDS
  67752. ENDIF
  67753. ; /* Write r as big endian to byte array.
  67754. ; * Fixed length number of bytes written: 65
  67755. ; * Uses the bswap instruction.
  67756. ; *
  67757. ; * r A single precision integer.
  67758. ; * a Byte array.
  67759. ; */
  67760. _text SEGMENT READONLY PARA
  67761. sp_521_to_bin_bswap_9 PROC
  67762. mov r8b, BYTE PTR [rcx+64]
  67763. mov al, BYTE PTR [rcx+65]
  67764. mov BYTE PTR [rdx], al
  67765. mov BYTE PTR [rdx+1], r8b
  67766. mov rax, QWORD PTR [rcx+56]
  67767. mov r8, QWORD PTR [rcx+48]
  67768. bswap rax
  67769. bswap r8
  67770. mov QWORD PTR [rdx+2], rax
  67771. mov QWORD PTR [rdx+10], r8
  67772. mov rax, QWORD PTR [rcx+40]
  67773. mov r8, QWORD PTR [rcx+32]
  67774. bswap rax
  67775. bswap r8
  67776. mov QWORD PTR [rdx+18], rax
  67777. mov QWORD PTR [rdx+26], r8
  67778. mov rax, QWORD PTR [rcx+24]
  67779. mov r8, QWORD PTR [rcx+16]
  67780. bswap rax
  67781. bswap r8
  67782. mov QWORD PTR [rdx+34], rax
  67783. mov QWORD PTR [rdx+42], r8
  67784. mov rax, QWORD PTR [rcx+8]
  67785. mov r8, QWORD PTR [rcx]
  67786. bswap rax
  67787. bswap r8
  67788. mov QWORD PTR [rdx+50], rax
  67789. mov QWORD PTR [rdx+58], r8
  67790. ret
  67791. sp_521_to_bin_bswap_9 ENDP
  67792. _text ENDS
  67793. IFNDEF NO_MOVBE_SUPPORT
  67794. ; /* Write r as big endian to byte array.
  67795. ; * Fixed length number of bytes written: 65
  67796. ; * Uses the movbe instruction which is optional.
  67797. ; *
  67798. ; * r A single precision integer.
  67799. ; * a Byte array.
  67800. ; */
  67801. _text SEGMENT READONLY PARA
  67802. sp_521_to_bin_movbe_9 PROC
  67803. mov r8b, BYTE PTR [rcx+64]
  67804. mov al, BYTE PTR [rcx+65]
  67805. mov BYTE PTR [rdx], al
  67806. mov BYTE PTR [rdx+1], r8b
  67807. movbe rax, QWORD PTR [rcx+56]
  67808. movbe r8, QWORD PTR [rcx+48]
  67809. mov QWORD PTR [rdx+2], rax
  67810. mov QWORD PTR [rdx+10], r8
  67811. movbe rax, QWORD PTR [rcx+40]
  67812. movbe r8, QWORD PTR [rcx+32]
  67813. mov QWORD PTR [rdx+18], rax
  67814. mov QWORD PTR [rdx+26], r8
  67815. movbe rax, QWORD PTR [rcx+24]
  67816. movbe r8, QWORD PTR [rcx+16]
  67817. mov QWORD PTR [rdx+34], rax
  67818. mov QWORD PTR [rdx+42], r8
  67819. movbe rax, QWORD PTR [rcx+8]
  67820. movbe r8, QWORD PTR [rcx]
  67821. mov QWORD PTR [rdx+50], rax
  67822. mov QWORD PTR [rdx+58], r8
  67823. ret
  67824. sp_521_to_bin_movbe_9 ENDP
  67825. _text ENDS
  67826. ENDIF
  67827. ; /* Shift number right by 1 bit. (r = a >> 1)
  67828. ; *
  67829. ; * r Result of right shift by 1.
  67830. ; * a Number to shift.
  67831. ; */
  67832. _text SEGMENT READONLY PARA
  67833. sp_521_rshift_9 PROC
  67834. push r12
  67835. mov rcx, r8
  67836. mov rax, rcx
  67837. mov r8, QWORD PTR [rdx]
  67838. mov r9, QWORD PTR [rdx+8]
  67839. mov r10, QWORD PTR [rdx+16]
  67840. mov r11, QWORD PTR [rdx+24]
  67841. mov r12, QWORD PTR [rdx+32]
  67842. shrd r8, r9, cl
  67843. shrd r9, r10, cl
  67844. shrd r10, r11, cl
  67845. shrd r11, r12, cl
  67846. mov QWORD PTR [rax], r8
  67847. mov QWORD PTR [rax+8], r9
  67848. mov QWORD PTR [rax+16], r10
  67849. mov QWORD PTR [rax+24], r11
  67850. mov r9, QWORD PTR [rdx+40]
  67851. mov r10, QWORD PTR [rdx+48]
  67852. mov r11, QWORD PTR [rdx+56]
  67853. mov r8, QWORD PTR [rdx+64]
  67854. shrd r12, r9, cl
  67855. shrd r9, r10, cl
  67856. shrd r10, r11, cl
  67857. shrd r11, r8, cl
  67858. mov QWORD PTR [rax+32], r12
  67859. mov QWORD PTR [rax+40], r9
  67860. mov QWORD PTR [rax+48], r10
  67861. mov QWORD PTR [rax+56], r11
  67862. shr r8, cl
  67863. mov QWORD PTR [rax+64], r8
  67864. pop r12
  67865. ret
  67866. sp_521_rshift_9 ENDP
  67867. _text ENDS
  67868. ; /* Shift number left by n bit. (r = a << n)
  67869. ; *
  67870. ; * r Result of left shift by n.
  67871. ; * a Number to shift.
  67872. ; * n Amoutnt o shift.
  67873. ; */
  67874. _text SEGMENT READONLY PARA
  67875. sp_521_lshift_9 PROC
  67876. push r12
  67877. push r13
  67878. mov cl, r8b
  67879. mov rax, rcx
  67880. mov r12, 0
  67881. mov r13, QWORD PTR [rdx+32]
  67882. mov r8, QWORD PTR [rdx+40]
  67883. mov r9, QWORD PTR [rdx+48]
  67884. mov r10, QWORD PTR [rdx+56]
  67885. mov r11, QWORD PTR [rdx+64]
  67886. shld r12, r11, cl
  67887. shld r11, r10, cl
  67888. shld r10, r9, cl
  67889. shld r9, r8, cl
  67890. shld r8, r13, cl
  67891. mov QWORD PTR [rax+40], r8
  67892. mov QWORD PTR [rax+48], r9
  67893. mov QWORD PTR [rax+56], r10
  67894. mov QWORD PTR [rax+64], r11
  67895. mov QWORD PTR [rax+72], r12
  67896. mov r11, QWORD PTR [rdx]
  67897. mov r8, QWORD PTR [rdx+8]
  67898. mov r9, QWORD PTR [rdx+16]
  67899. mov r10, QWORD PTR [rdx+24]
  67900. shld r13, r10, cl
  67901. shld r10, r9, cl
  67902. shld r9, r8, cl
  67903. shld r8, r11, cl
  67904. mov QWORD PTR [rax+8], r8
  67905. mov QWORD PTR [rax+16], r9
  67906. mov QWORD PTR [rax+24], r10
  67907. mov QWORD PTR [rax+32], r13
  67908. shl r11, cl
  67909. mov QWORD PTR [rax], r11
  67910. pop r13
  67911. pop r12
  67912. ret
  67913. sp_521_lshift_9 ENDP
  67914. _text ENDS
  67915. ; /* Shift number left by n bit. (r = a << n)
  67916. ; *
  67917. ; * r Result of left shift by n.
  67918. ; * a Number to shift.
  67919. ; * n Amoutnt o shift.
  67920. ; */
  67921. _text SEGMENT READONLY PARA
  67922. sp_521_lshift_18 PROC
  67923. push r12
  67924. push r13
  67925. mov cl, r8b
  67926. mov rax, rcx
  67927. mov r12, 0
  67928. mov r13, QWORD PTR [rdx+104]
  67929. mov r8, QWORD PTR [rdx+112]
  67930. mov r9, QWORD PTR [rdx+120]
  67931. mov r10, QWORD PTR [rdx+128]
  67932. mov r11, QWORD PTR [rdx+136]
  67933. shld r12, r11, cl
  67934. shld r11, r10, cl
  67935. shld r10, r9, cl
  67936. shld r9, r8, cl
  67937. shld r8, r13, cl
  67938. mov QWORD PTR [rax+112], r8
  67939. mov QWORD PTR [rax+120], r9
  67940. mov QWORD PTR [rax+128], r10
  67941. mov QWORD PTR [rax+136], r11
  67942. mov QWORD PTR [rax+144], r12
  67943. mov r11, QWORD PTR [rdx+72]
  67944. mov r8, QWORD PTR [rdx+80]
  67945. mov r9, QWORD PTR [rdx+88]
  67946. mov r10, QWORD PTR [rdx+96]
  67947. shld r13, r10, cl
  67948. shld r10, r9, cl
  67949. shld r9, r8, cl
  67950. shld r8, r11, cl
  67951. mov QWORD PTR [rax+80], r8
  67952. mov QWORD PTR [rax+88], r9
  67953. mov QWORD PTR [rax+96], r10
  67954. mov QWORD PTR [rax+104], r13
  67955. mov r13, QWORD PTR [rdx+40]
  67956. mov r8, QWORD PTR [rdx+48]
  67957. mov r9, QWORD PTR [rdx+56]
  67958. mov r10, QWORD PTR [rdx+64]
  67959. shld r11, r10, cl
  67960. shld r10, r9, cl
  67961. shld r9, r8, cl
  67962. shld r8, r13, cl
  67963. mov QWORD PTR [rax+48], r8
  67964. mov QWORD PTR [rax+56], r9
  67965. mov QWORD PTR [rax+64], r10
  67966. mov QWORD PTR [rax+72], r11
  67967. mov r11, QWORD PTR [rdx+8]
  67968. mov r8, QWORD PTR [rdx+16]
  67969. mov r9, QWORD PTR [rdx+24]
  67970. mov r10, QWORD PTR [rdx+32]
  67971. shld r13, r10, cl
  67972. shld r10, r9, cl
  67973. shld r9, r8, cl
  67974. shld r8, r11, cl
  67975. mov QWORD PTR [rax+16], r8
  67976. mov QWORD PTR [rax+24], r9
  67977. mov QWORD PTR [rax+32], r10
  67978. mov QWORD PTR [rax+40], r13
  67979. mov r10, QWORD PTR [rdx]
  67980. shld r11, r10, cl
  67981. shl r10, cl
  67982. mov QWORD PTR [rax], r10
  67983. mov QWORD PTR [rax+8], r11
  67984. pop r13
  67985. pop r12
  67986. ret
  67987. sp_521_lshift_18 ENDP
  67988. _text ENDS
  67989. ; /* Sub b from a into a. (a -= b)
  67990. ; *
  67991. ; * a A single precision integer and result.
  67992. ; * b A single precision integer.
  67993. ; */
  67994. _text SEGMENT READONLY PARA
  67995. sp_521_sub_in_place_9 PROC
  67996. mov r8, QWORD PTR [rcx]
  67997. sub r8, QWORD PTR [rdx]
  67998. mov r9, QWORD PTR [rcx+8]
  67999. mov QWORD PTR [rcx], r8
  68000. sbb r9, QWORD PTR [rdx+8]
  68001. mov r8, QWORD PTR [rcx+16]
  68002. mov QWORD PTR [rcx+8], r9
  68003. sbb r8, QWORD PTR [rdx+16]
  68004. mov r9, QWORD PTR [rcx+24]
  68005. mov QWORD PTR [rcx+16], r8
  68006. sbb r9, QWORD PTR [rdx+24]
  68007. mov r8, QWORD PTR [rcx+32]
  68008. mov QWORD PTR [rcx+24], r9
  68009. sbb r8, QWORD PTR [rdx+32]
  68010. mov r9, QWORD PTR [rcx+40]
  68011. mov QWORD PTR [rcx+32], r8
  68012. sbb r9, QWORD PTR [rdx+40]
  68013. mov r8, QWORD PTR [rcx+48]
  68014. mov QWORD PTR [rcx+40], r9
  68015. sbb r8, QWORD PTR [rdx+48]
  68016. mov r9, QWORD PTR [rcx+56]
  68017. mov QWORD PTR [rcx+48], r8
  68018. sbb r9, QWORD PTR [rdx+56]
  68019. mov r8, QWORD PTR [rcx+64]
  68020. mov QWORD PTR [rcx+56], r9
  68021. sbb r8, QWORD PTR [rdx+64]
  68022. mov QWORD PTR [rcx+64], r8
  68023. sbb rax, rax
  68024. ret
  68025. sp_521_sub_in_place_9 ENDP
  68026. _text ENDS
  68027. ; /* Mul a by digit b into r. (r = a * b)
  68028. ; *
  68029. ; * r A single precision integer.
  68030. ; * a A single precision integer.
  68031. ; * b A single precision digit.
  68032. ; */
  68033. _text SEGMENT READONLY PARA
  68034. sp_521_mul_d_9 PROC
  68035. push r12
  68036. mov r9, rdx
  68037. ; A[0] * B
  68038. mov rax, r8
  68039. xor r12, r12
  68040. mul QWORD PTR [r9]
  68041. mov r10, rax
  68042. mov r11, rdx
  68043. mov QWORD PTR [rcx], r10
  68044. ; A[1] * B
  68045. mov rax, r8
  68046. xor r10, r10
  68047. mul QWORD PTR [r9+8]
  68048. add r11, rax
  68049. mov QWORD PTR [rcx+8], r11
  68050. adc r12, rdx
  68051. adc r10, 0
  68052. ; A[2] * B
  68053. mov rax, r8
  68054. xor r11, r11
  68055. mul QWORD PTR [r9+16]
  68056. add r12, rax
  68057. mov QWORD PTR [rcx+16], r12
  68058. adc r10, rdx
  68059. adc r11, 0
  68060. ; A[3] * B
  68061. mov rax, r8
  68062. xor r12, r12
  68063. mul QWORD PTR [r9+24]
  68064. add r10, rax
  68065. mov QWORD PTR [rcx+24], r10
  68066. adc r11, rdx
  68067. adc r12, 0
  68068. ; A[4] * B
  68069. mov rax, r8
  68070. xor r10, r10
  68071. mul QWORD PTR [r9+32]
  68072. add r11, rax
  68073. mov QWORD PTR [rcx+32], r11
  68074. adc r12, rdx
  68075. adc r10, 0
  68076. ; A[5] * B
  68077. mov rax, r8
  68078. xor r11, r11
  68079. mul QWORD PTR [r9+40]
  68080. add r12, rax
  68081. mov QWORD PTR [rcx+40], r12
  68082. adc r10, rdx
  68083. adc r11, 0
  68084. ; A[6] * B
  68085. mov rax, r8
  68086. xor r12, r12
  68087. mul QWORD PTR [r9+48]
  68088. add r10, rax
  68089. mov QWORD PTR [rcx+48], r10
  68090. adc r11, rdx
  68091. adc r12, 0
  68092. ; A[7] * B
  68093. mov rax, r8
  68094. xor r10, r10
  68095. mul QWORD PTR [r9+56]
  68096. add r11, rax
  68097. mov QWORD PTR [rcx+56], r11
  68098. adc r12, rdx
  68099. adc r10, 0
  68100. ; A[8] * B
  68101. mov rax, r8
  68102. mul QWORD PTR [r9+64]
  68103. add r12, rax
  68104. adc r10, rdx
  68105. mov QWORD PTR [rcx+64], r12
  68106. mov QWORD PTR [rcx+72], r10
  68107. pop r12
  68108. ret
  68109. sp_521_mul_d_9 ENDP
  68110. _text ENDS
  68111. IFDEF HAVE_INTEL_AVX2
  68112. ; /* Mul a by digit b into r. (r = a * b)
  68113. ; *
  68114. ; * r A single precision integer.
  68115. ; * a A single precision integer.
  68116. ; * b A single precision digit.
  68117. ; */
  68118. _text SEGMENT READONLY PARA
  68119. sp_521_mul_d_avx2_9 PROC
  68120. push r12
  68121. push r13
  68122. mov rax, rdx
  68123. ; A[0] * B
  68124. mov rdx, r8
  68125. xor r13, r13
  68126. mulx r12, r11, QWORD PTR [rax]
  68127. mov QWORD PTR [rcx], r11
  68128. ; A[1] * B
  68129. mulx r10, r9, QWORD PTR [rax+8]
  68130. mov r11, r13
  68131. adcx r12, r9
  68132. adox r11, r10
  68133. mov QWORD PTR [rcx+8], r12
  68134. ; A[2] * B
  68135. mulx r10, r9, QWORD PTR [rax+16]
  68136. mov r12, r13
  68137. adcx r11, r9
  68138. adox r12, r10
  68139. mov QWORD PTR [rcx+16], r11
  68140. ; A[3] * B
  68141. mulx r10, r9, QWORD PTR [rax+24]
  68142. mov r11, r13
  68143. adcx r12, r9
  68144. adox r11, r10
  68145. mov QWORD PTR [rcx+24], r12
  68146. ; A[4] * B
  68147. mulx r10, r9, QWORD PTR [rax+32]
  68148. mov r12, r13
  68149. adcx r11, r9
  68150. adox r12, r10
  68151. mov QWORD PTR [rcx+32], r11
  68152. ; A[5] * B
  68153. mulx r10, r9, QWORD PTR [rax+40]
  68154. mov r11, r13
  68155. adcx r12, r9
  68156. adox r11, r10
  68157. mov QWORD PTR [rcx+40], r12
  68158. ; A[6] * B
  68159. mulx r10, r9, QWORD PTR [rax+48]
  68160. mov r12, r13
  68161. adcx r11, r9
  68162. adox r12, r10
  68163. mov QWORD PTR [rcx+48], r11
  68164. ; A[7] * B
  68165. mulx r10, r9, QWORD PTR [rax+56]
  68166. mov r11, r13
  68167. adcx r12, r9
  68168. adox r11, r10
  68169. mov QWORD PTR [rcx+56], r12
  68170. ; A[8] * B
  68171. mulx r10, r9, QWORD PTR [rax+64]
  68172. mov r12, r13
  68173. adcx r11, r9
  68174. adox r12, r10
  68175. adcx r12, r13
  68176. mov QWORD PTR [rcx+64], r11
  68177. mov QWORD PTR [rcx+72], r12
  68178. pop r13
  68179. pop r12
  68180. ret
  68181. sp_521_mul_d_avx2_9 ENDP
  68182. _text ENDS
  68183. ENDIF
  68184. IFDEF _WIN64
  68185. ; /* Divide the double width number (d1|d0) by the dividend. (d1|d0 / div)
  68186. ; *
  68187. ; * d1 The high order half of the number to divide.
  68188. ; * d0 The low order half of the number to divide.
  68189. ; * div The dividend.
  68190. ; * returns the result of the division.
  68191. ; */
  68192. _text SEGMENT READONLY PARA
  68193. div_521_word_asm_9 PROC
  68194. mov r9, rdx
  68195. mov rax, r9
  68196. mov rdx, rcx
  68197. div r8
  68198. ret
  68199. div_521_word_asm_9 ENDP
  68200. _text ENDS
  68201. ENDIF
  68202. ; /* Shift number right by 1 bit. (r = a >> 1)
  68203. ; *
  68204. ; * r Result of right shift by 1.
  68205. ; * a Number to shift.
  68206. ; */
  68207. _text SEGMENT READONLY PARA
  68208. sp_521_rshift1_9 PROC
  68209. push r12
  68210. mov rax, QWORD PTR [rdx]
  68211. mov r8, QWORD PTR [rdx+8]
  68212. mov r9, QWORD PTR [rdx+16]
  68213. mov r10, QWORD PTR [rdx+24]
  68214. mov r12, QWORD PTR [rdx+32]
  68215. shrd rax, r8, 1
  68216. shrd r8, r9, 1
  68217. shrd r9, r10, 1
  68218. shrd r10, r12, 1
  68219. mov QWORD PTR [rcx], rax
  68220. mov QWORD PTR [rcx+8], r8
  68221. mov QWORD PTR [rcx+16], r9
  68222. mov QWORD PTR [rcx+24], r10
  68223. mov r8, QWORD PTR [rdx+40]
  68224. mov r9, QWORD PTR [rdx+48]
  68225. mov r10, QWORD PTR [rdx+56]
  68226. mov rax, QWORD PTR [rdx+64]
  68227. shrd r12, r8, 1
  68228. shrd r8, r9, 1
  68229. shrd r9, r10, 1
  68230. shrd r10, rax, 1
  68231. mov QWORD PTR [rcx+32], r12
  68232. mov QWORD PTR [rcx+40], r8
  68233. mov QWORD PTR [rcx+48], r9
  68234. mov QWORD PTR [rcx+56], r10
  68235. shr rax, 1
  68236. mov QWORD PTR [rcx+64], rax
  68237. pop r12
  68238. ret
  68239. sp_521_rshift1_9 ENDP
  68240. _text ENDS
  68241. ; /* Divide the number by 2 mod the prime. (r = a / 2 % m)
  68242. ; *
  68243. ; * r Result of division by 2.
  68244. ; * a Number to divide.
  68245. ; * m Modulus
  68246. ; */
  68247. _text SEGMENT READONLY PARA
  68248. sp_521_div2_mod_9 PROC
  68249. push r12
  68250. mov rax, QWORD PTR [rdx]
  68251. and rax, 1
  68252. je L_521_mod_inv_9_div2_mod_no_add
  68253. mov rax, QWORD PTR [rdx]
  68254. mov r9, QWORD PTR [rdx+8]
  68255. mov r10, QWORD PTR [r8]
  68256. mov r11, QWORD PTR [r8+8]
  68257. add rax, r10
  68258. adc r9, r11
  68259. mov QWORD PTR [rcx], rax
  68260. mov QWORD PTR [rcx+8], r9
  68261. mov rax, QWORD PTR [rdx+16]
  68262. mov r9, QWORD PTR [rdx+24]
  68263. mov r10, QWORD PTR [r8+16]
  68264. mov r11, QWORD PTR [r8+24]
  68265. adc rax, r10
  68266. adc r9, r11
  68267. mov QWORD PTR [rcx+16], rax
  68268. mov QWORD PTR [rcx+24], r9
  68269. mov rax, QWORD PTR [rdx+32]
  68270. mov r9, QWORD PTR [rdx+40]
  68271. mov r10, QWORD PTR [r8+32]
  68272. mov r11, QWORD PTR [r8+40]
  68273. adc rax, r10
  68274. adc r9, r11
  68275. mov QWORD PTR [rcx+32], rax
  68276. mov QWORD PTR [rcx+40], r9
  68277. mov rax, QWORD PTR [rdx+48]
  68278. mov r9, QWORD PTR [rdx+56]
  68279. mov r10, QWORD PTR [r8+48]
  68280. mov r11, QWORD PTR [r8+56]
  68281. adc rax, r10
  68282. adc r9, r11
  68283. mov QWORD PTR [rcx+48], rax
  68284. mov QWORD PTR [rcx+56], r9
  68285. mov rax, QWORD PTR [rdx+64]
  68286. mov r10, QWORD PTR [r8+64]
  68287. adc rax, r10
  68288. mov QWORD PTR [rcx+64], rax
  68289. L_521_mod_inv_9_div2_mod_no_add:
  68290. mov rax, QWORD PTR [rdx]
  68291. mov r9, QWORD PTR [rdx+8]
  68292. mov r10, QWORD PTR [rdx+16]
  68293. mov r11, QWORD PTR [rdx+24]
  68294. mov r12, QWORD PTR [rdx+32]
  68295. shrd rax, r9, 1
  68296. shrd r9, r10, 1
  68297. shrd r10, r11, 1
  68298. shrd r11, r12, 1
  68299. mov QWORD PTR [rcx], rax
  68300. mov QWORD PTR [rcx+8], r9
  68301. mov QWORD PTR [rcx+16], r10
  68302. mov QWORD PTR [rcx+24], r11
  68303. mov r9, QWORD PTR [rdx+40]
  68304. mov r10, QWORD PTR [rdx+48]
  68305. mov r11, QWORD PTR [rdx+56]
  68306. mov rax, QWORD PTR [rdx+64]
  68307. shrd r12, r9, 1
  68308. shrd r9, r10, 1
  68309. shrd r10, r11, 1
  68310. shrd r11, rax, 1
  68311. mov QWORD PTR [rcx+32], r12
  68312. mov QWORD PTR [rcx+40], r9
  68313. mov QWORD PTR [rcx+48], r10
  68314. mov QWORD PTR [rcx+56], r11
  68315. shr rax, 1
  68316. mov QWORD PTR [rcx+64], rax
  68317. pop r12
  68318. ret
  68319. sp_521_div2_mod_9 ENDP
  68320. _text ENDS
  68321. _text SEGMENT READONLY PARA
  68322. sp_521_num_bits_9 PROC
  68323. xor rax, rax
  68324. mov rdx, QWORD PTR [rcx+64]
  68325. cmp rdx, 0
  68326. je L_521_num_bits_9_end_512
  68327. mov rax, -1
  68328. bsr rax, rdx
  68329. add rax, 513
  68330. jmp L_521_num_bits_9_done
  68331. L_521_num_bits_9_end_512:
  68332. mov rdx, QWORD PTR [rcx+56]
  68333. cmp rdx, 0
  68334. je L_521_num_bits_9_end_448
  68335. mov rax, -1
  68336. bsr rax, rdx
  68337. add rax, 449
  68338. jmp L_521_num_bits_9_done
  68339. L_521_num_bits_9_end_448:
  68340. mov rdx, QWORD PTR [rcx+48]
  68341. cmp rdx, 0
  68342. je L_521_num_bits_9_end_384
  68343. mov rax, -1
  68344. bsr rax, rdx
  68345. add rax, 385
  68346. jmp L_521_num_bits_9_done
  68347. L_521_num_bits_9_end_384:
  68348. mov rdx, QWORD PTR [rcx+40]
  68349. cmp rdx, 0
  68350. je L_521_num_bits_9_end_320
  68351. mov rax, -1
  68352. bsr rax, rdx
  68353. add rax, 321
  68354. jmp L_521_num_bits_9_done
  68355. L_521_num_bits_9_end_320:
  68356. mov rdx, QWORD PTR [rcx+32]
  68357. cmp rdx, 0
  68358. je L_521_num_bits_9_end_256
  68359. mov rax, -1
  68360. bsr rax, rdx
  68361. add rax, 257
  68362. jmp L_521_num_bits_9_done
  68363. L_521_num_bits_9_end_256:
  68364. mov rdx, QWORD PTR [rcx+24]
  68365. cmp rdx, 0
  68366. je L_521_num_bits_9_end_192
  68367. mov rax, -1
  68368. bsr rax, rdx
  68369. add rax, 193
  68370. jmp L_521_num_bits_9_done
  68371. L_521_num_bits_9_end_192:
  68372. mov rdx, QWORD PTR [rcx+16]
  68373. cmp rdx, 0
  68374. je L_521_num_bits_9_end_128
  68375. mov rax, -1
  68376. bsr rax, rdx
  68377. add rax, 129
  68378. jmp L_521_num_bits_9_done
  68379. L_521_num_bits_9_end_128:
  68380. mov rdx, QWORD PTR [rcx+8]
  68381. cmp rdx, 0
  68382. je L_521_num_bits_9_end_64
  68383. mov rax, -1
  68384. bsr rax, rdx
  68385. add rax, 65
  68386. jmp L_521_num_bits_9_done
  68387. L_521_num_bits_9_end_64:
  68388. mov rdx, QWORD PTR [rcx]
  68389. cmp rdx, 0
  68390. je L_521_num_bits_9_end_0
  68391. mov rax, -1
  68392. bsr rax, rdx
  68393. add rax, 1
  68394. jmp L_521_num_bits_9_done
  68395. L_521_num_bits_9_end_0:
  68396. L_521_num_bits_9_done:
  68397. ret
  68398. sp_521_num_bits_9 ENDP
  68399. _text ENDS
  68400. ENDIF
  68401. IFDEF WOLFSSL_SP_1024
  68402. ; /* Multiply a and b into r. (r = a * b)
  68403. ; *
  68404. ; * r A single precision integer.
  68405. ; * a A single precision integer.
  68406. ; * b A single precision integer.
  68407. ; */
  68408. _text SEGMENT READONLY PARA
  68409. sp_1024_mul_16 PROC
  68410. push r12
  68411. mov r9, rdx
  68412. sub rsp, 128
  68413. ; A[0] * B[0]
  68414. mov rax, QWORD PTR [r8]
  68415. mul QWORD PTR [r9]
  68416. xor r12, r12
  68417. mov QWORD PTR [rsp], rax
  68418. mov r11, rdx
  68419. ; A[0] * B[1]
  68420. mov rax, QWORD PTR [r8+8]
  68421. mul QWORD PTR [r9]
  68422. xor r10, r10
  68423. add r11, rax
  68424. adc r12, rdx
  68425. adc r10, 0
  68426. ; A[1] * B[0]
  68427. mov rax, QWORD PTR [r8]
  68428. mul QWORD PTR [r9+8]
  68429. add r11, rax
  68430. adc r12, rdx
  68431. adc r10, 0
  68432. mov QWORD PTR [rsp+8], r11
  68433. ; A[0] * B[2]
  68434. mov rax, QWORD PTR [r8+16]
  68435. mul QWORD PTR [r9]
  68436. xor r11, r11
  68437. add r12, rax
  68438. adc r10, rdx
  68439. adc r11, 0
  68440. ; A[1] * B[1]
  68441. mov rax, QWORD PTR [r8+8]
  68442. mul QWORD PTR [r9+8]
  68443. add r12, rax
  68444. adc r10, rdx
  68445. adc r11, 0
  68446. ; A[2] * B[0]
  68447. mov rax, QWORD PTR [r8]
  68448. mul QWORD PTR [r9+16]
  68449. add r12, rax
  68450. adc r10, rdx
  68451. adc r11, 0
  68452. mov QWORD PTR [rsp+16], r12
  68453. ; A[0] * B[3]
  68454. mov rax, QWORD PTR [r8+24]
  68455. mul QWORD PTR [r9]
  68456. xor r12, r12
  68457. add r10, rax
  68458. adc r11, rdx
  68459. adc r12, 0
  68460. ; A[1] * B[2]
  68461. mov rax, QWORD PTR [r8+16]
  68462. mul QWORD PTR [r9+8]
  68463. add r10, rax
  68464. adc r11, rdx
  68465. adc r12, 0
  68466. ; A[2] * B[1]
  68467. mov rax, QWORD PTR [r8+8]
  68468. mul QWORD PTR [r9+16]
  68469. add r10, rax
  68470. adc r11, rdx
  68471. adc r12, 0
  68472. ; A[3] * B[0]
  68473. mov rax, QWORD PTR [r8]
  68474. mul QWORD PTR [r9+24]
  68475. add r10, rax
  68476. adc r11, rdx
  68477. adc r12, 0
  68478. mov QWORD PTR [rsp+24], r10
  68479. ; A[0] * B[4]
  68480. mov rax, QWORD PTR [r8+32]
  68481. mul QWORD PTR [r9]
  68482. xor r10, r10
  68483. add r11, rax
  68484. adc r12, rdx
  68485. adc r10, 0
  68486. ; A[1] * B[3]
  68487. mov rax, QWORD PTR [r8+24]
  68488. mul QWORD PTR [r9+8]
  68489. add r11, rax
  68490. adc r12, rdx
  68491. adc r10, 0
  68492. ; A[2] * B[2]
  68493. mov rax, QWORD PTR [r8+16]
  68494. mul QWORD PTR [r9+16]
  68495. add r11, rax
  68496. adc r12, rdx
  68497. adc r10, 0
  68498. ; A[3] * B[1]
  68499. mov rax, QWORD PTR [r8+8]
  68500. mul QWORD PTR [r9+24]
  68501. add r11, rax
  68502. adc r12, rdx
  68503. adc r10, 0
  68504. ; A[4] * B[0]
  68505. mov rax, QWORD PTR [r8]
  68506. mul QWORD PTR [r9+32]
  68507. add r11, rax
  68508. adc r12, rdx
  68509. adc r10, 0
  68510. mov QWORD PTR [rsp+32], r11
  68511. ; A[0] * B[5]
  68512. mov rax, QWORD PTR [r8+40]
  68513. mul QWORD PTR [r9]
  68514. xor r11, r11
  68515. add r12, rax
  68516. adc r10, rdx
  68517. adc r11, 0
  68518. ; A[1] * B[4]
  68519. mov rax, QWORD PTR [r8+32]
  68520. mul QWORD PTR [r9+8]
  68521. add r12, rax
  68522. adc r10, rdx
  68523. adc r11, 0
  68524. ; A[2] * B[3]
  68525. mov rax, QWORD PTR [r8+24]
  68526. mul QWORD PTR [r9+16]
  68527. add r12, rax
  68528. adc r10, rdx
  68529. adc r11, 0
  68530. ; A[3] * B[2]
  68531. mov rax, QWORD PTR [r8+16]
  68532. mul QWORD PTR [r9+24]
  68533. add r12, rax
  68534. adc r10, rdx
  68535. adc r11, 0
  68536. ; A[4] * B[1]
  68537. mov rax, QWORD PTR [r8+8]
  68538. mul QWORD PTR [r9+32]
  68539. add r12, rax
  68540. adc r10, rdx
  68541. adc r11, 0
  68542. ; A[5] * B[0]
  68543. mov rax, QWORD PTR [r8]
  68544. mul QWORD PTR [r9+40]
  68545. add r12, rax
  68546. adc r10, rdx
  68547. adc r11, 0
  68548. mov QWORD PTR [rsp+40], r12
  68549. ; A[0] * B[6]
  68550. mov rax, QWORD PTR [r8+48]
  68551. mul QWORD PTR [r9]
  68552. xor r12, r12
  68553. add r10, rax
  68554. adc r11, rdx
  68555. adc r12, 0
  68556. ; A[1] * B[5]
  68557. mov rax, QWORD PTR [r8+40]
  68558. mul QWORD PTR [r9+8]
  68559. add r10, rax
  68560. adc r11, rdx
  68561. adc r12, 0
  68562. ; A[2] * B[4]
  68563. mov rax, QWORD PTR [r8+32]
  68564. mul QWORD PTR [r9+16]
  68565. add r10, rax
  68566. adc r11, rdx
  68567. adc r12, 0
  68568. ; A[3] * B[3]
  68569. mov rax, QWORD PTR [r8+24]
  68570. mul QWORD PTR [r9+24]
  68571. add r10, rax
  68572. adc r11, rdx
  68573. adc r12, 0
  68574. ; A[4] * B[2]
  68575. mov rax, QWORD PTR [r8+16]
  68576. mul QWORD PTR [r9+32]
  68577. add r10, rax
  68578. adc r11, rdx
  68579. adc r12, 0
  68580. ; A[5] * B[1]
  68581. mov rax, QWORD PTR [r8+8]
  68582. mul QWORD PTR [r9+40]
  68583. add r10, rax
  68584. adc r11, rdx
  68585. adc r12, 0
  68586. ; A[6] * B[0]
  68587. mov rax, QWORD PTR [r8]
  68588. mul QWORD PTR [r9+48]
  68589. add r10, rax
  68590. adc r11, rdx
  68591. adc r12, 0
  68592. mov QWORD PTR [rsp+48], r10
  68593. ; A[0] * B[7]
  68594. mov rax, QWORD PTR [r8+56]
  68595. mul QWORD PTR [r9]
  68596. xor r10, r10
  68597. add r11, rax
  68598. adc r12, rdx
  68599. adc r10, 0
  68600. ; A[1] * B[6]
  68601. mov rax, QWORD PTR [r8+48]
  68602. mul QWORD PTR [r9+8]
  68603. add r11, rax
  68604. adc r12, rdx
  68605. adc r10, 0
  68606. ; A[2] * B[5]
  68607. mov rax, QWORD PTR [r8+40]
  68608. mul QWORD PTR [r9+16]
  68609. add r11, rax
  68610. adc r12, rdx
  68611. adc r10, 0
  68612. ; A[3] * B[4]
  68613. mov rax, QWORD PTR [r8+32]
  68614. mul QWORD PTR [r9+24]
  68615. add r11, rax
  68616. adc r12, rdx
  68617. adc r10, 0
  68618. ; A[4] * B[3]
  68619. mov rax, QWORD PTR [r8+24]
  68620. mul QWORD PTR [r9+32]
  68621. add r11, rax
  68622. adc r12, rdx
  68623. adc r10, 0
  68624. ; A[5] * B[2]
  68625. mov rax, QWORD PTR [r8+16]
  68626. mul QWORD PTR [r9+40]
  68627. add r11, rax
  68628. adc r12, rdx
  68629. adc r10, 0
  68630. ; A[6] * B[1]
  68631. mov rax, QWORD PTR [r8+8]
  68632. mul QWORD PTR [r9+48]
  68633. add r11, rax
  68634. adc r12, rdx
  68635. adc r10, 0
  68636. ; A[7] * B[0]
  68637. mov rax, QWORD PTR [r8]
  68638. mul QWORD PTR [r9+56]
  68639. add r11, rax
  68640. adc r12, rdx
  68641. adc r10, 0
  68642. mov QWORD PTR [rsp+56], r11
  68643. ; A[0] * B[8]
  68644. mov rax, QWORD PTR [r8+64]
  68645. mul QWORD PTR [r9]
  68646. xor r11, r11
  68647. add r12, rax
  68648. adc r10, rdx
  68649. adc r11, 0
  68650. ; A[1] * B[7]
  68651. mov rax, QWORD PTR [r8+56]
  68652. mul QWORD PTR [r9+8]
  68653. add r12, rax
  68654. adc r10, rdx
  68655. adc r11, 0
  68656. ; A[2] * B[6]
  68657. mov rax, QWORD PTR [r8+48]
  68658. mul QWORD PTR [r9+16]
  68659. add r12, rax
  68660. adc r10, rdx
  68661. adc r11, 0
  68662. ; A[3] * B[5]
  68663. mov rax, QWORD PTR [r8+40]
  68664. mul QWORD PTR [r9+24]
  68665. add r12, rax
  68666. adc r10, rdx
  68667. adc r11, 0
  68668. ; A[4] * B[4]
  68669. mov rax, QWORD PTR [r8+32]
  68670. mul QWORD PTR [r9+32]
  68671. add r12, rax
  68672. adc r10, rdx
  68673. adc r11, 0
  68674. ; A[5] * B[3]
  68675. mov rax, QWORD PTR [r8+24]
  68676. mul QWORD PTR [r9+40]
  68677. add r12, rax
  68678. adc r10, rdx
  68679. adc r11, 0
  68680. ; A[6] * B[2]
  68681. mov rax, QWORD PTR [r8+16]
  68682. mul QWORD PTR [r9+48]
  68683. add r12, rax
  68684. adc r10, rdx
  68685. adc r11, 0
  68686. ; A[7] * B[1]
  68687. mov rax, QWORD PTR [r8+8]
  68688. mul QWORD PTR [r9+56]
  68689. add r12, rax
  68690. adc r10, rdx
  68691. adc r11, 0
  68692. ; A[8] * B[0]
  68693. mov rax, QWORD PTR [r8]
  68694. mul QWORD PTR [r9+64]
  68695. add r12, rax
  68696. adc r10, rdx
  68697. adc r11, 0
  68698. mov QWORD PTR [rsp+64], r12
  68699. ; A[0] * B[9]
  68700. mov rax, QWORD PTR [r8+72]
  68701. mul QWORD PTR [r9]
  68702. xor r12, r12
  68703. add r10, rax
  68704. adc r11, rdx
  68705. adc r12, 0
  68706. ; A[1] * B[8]
  68707. mov rax, QWORD PTR [r8+64]
  68708. mul QWORD PTR [r9+8]
  68709. add r10, rax
  68710. adc r11, rdx
  68711. adc r12, 0
  68712. ; A[2] * B[7]
  68713. mov rax, QWORD PTR [r8+56]
  68714. mul QWORD PTR [r9+16]
  68715. add r10, rax
  68716. adc r11, rdx
  68717. adc r12, 0
  68718. ; A[3] * B[6]
  68719. mov rax, QWORD PTR [r8+48]
  68720. mul QWORD PTR [r9+24]
  68721. add r10, rax
  68722. adc r11, rdx
  68723. adc r12, 0
  68724. ; A[4] * B[5]
  68725. mov rax, QWORD PTR [r8+40]
  68726. mul QWORD PTR [r9+32]
  68727. add r10, rax
  68728. adc r11, rdx
  68729. adc r12, 0
  68730. ; A[5] * B[4]
  68731. mov rax, QWORD PTR [r8+32]
  68732. mul QWORD PTR [r9+40]
  68733. add r10, rax
  68734. adc r11, rdx
  68735. adc r12, 0
  68736. ; A[6] * B[3]
  68737. mov rax, QWORD PTR [r8+24]
  68738. mul QWORD PTR [r9+48]
  68739. add r10, rax
  68740. adc r11, rdx
  68741. adc r12, 0
  68742. ; A[7] * B[2]
  68743. mov rax, QWORD PTR [r8+16]
  68744. mul QWORD PTR [r9+56]
  68745. add r10, rax
  68746. adc r11, rdx
  68747. adc r12, 0
  68748. ; A[8] * B[1]
  68749. mov rax, QWORD PTR [r8+8]
  68750. mul QWORD PTR [r9+64]
  68751. add r10, rax
  68752. adc r11, rdx
  68753. adc r12, 0
  68754. ; A[9] * B[0]
  68755. mov rax, QWORD PTR [r8]
  68756. mul QWORD PTR [r9+72]
  68757. add r10, rax
  68758. adc r11, rdx
  68759. adc r12, 0
  68760. mov QWORD PTR [rsp+72], r10
  68761. ; A[0] * B[10]
  68762. mov rax, QWORD PTR [r8+80]
  68763. mul QWORD PTR [r9]
  68764. xor r10, r10
  68765. add r11, rax
  68766. adc r12, rdx
  68767. adc r10, 0
  68768. ; A[1] * B[9]
  68769. mov rax, QWORD PTR [r8+72]
  68770. mul QWORD PTR [r9+8]
  68771. add r11, rax
  68772. adc r12, rdx
  68773. adc r10, 0
  68774. ; A[2] * B[8]
  68775. mov rax, QWORD PTR [r8+64]
  68776. mul QWORD PTR [r9+16]
  68777. add r11, rax
  68778. adc r12, rdx
  68779. adc r10, 0
  68780. ; A[3] * B[7]
  68781. mov rax, QWORD PTR [r8+56]
  68782. mul QWORD PTR [r9+24]
  68783. add r11, rax
  68784. adc r12, rdx
  68785. adc r10, 0
  68786. ; A[4] * B[6]
  68787. mov rax, QWORD PTR [r8+48]
  68788. mul QWORD PTR [r9+32]
  68789. add r11, rax
  68790. adc r12, rdx
  68791. adc r10, 0
  68792. ; A[5] * B[5]
  68793. mov rax, QWORD PTR [r8+40]
  68794. mul QWORD PTR [r9+40]
  68795. add r11, rax
  68796. adc r12, rdx
  68797. adc r10, 0
  68798. ; A[6] * B[4]
  68799. mov rax, QWORD PTR [r8+32]
  68800. mul QWORD PTR [r9+48]
  68801. add r11, rax
  68802. adc r12, rdx
  68803. adc r10, 0
  68804. ; A[7] * B[3]
  68805. mov rax, QWORD PTR [r8+24]
  68806. mul QWORD PTR [r9+56]
  68807. add r11, rax
  68808. adc r12, rdx
  68809. adc r10, 0
  68810. ; A[8] * B[2]
  68811. mov rax, QWORD PTR [r8+16]
  68812. mul QWORD PTR [r9+64]
  68813. add r11, rax
  68814. adc r12, rdx
  68815. adc r10, 0
  68816. ; A[9] * B[1]
  68817. mov rax, QWORD PTR [r8+8]
  68818. mul QWORD PTR [r9+72]
  68819. add r11, rax
  68820. adc r12, rdx
  68821. adc r10, 0
  68822. ; A[10] * B[0]
  68823. mov rax, QWORD PTR [r8]
  68824. mul QWORD PTR [r9+80]
  68825. add r11, rax
  68826. adc r12, rdx
  68827. adc r10, 0
  68828. mov QWORD PTR [rsp+80], r11
  68829. ; A[0] * B[11]
  68830. mov rax, QWORD PTR [r8+88]
  68831. mul QWORD PTR [r9]
  68832. xor r11, r11
  68833. add r12, rax
  68834. adc r10, rdx
  68835. adc r11, 0
  68836. ; A[1] * B[10]
  68837. mov rax, QWORD PTR [r8+80]
  68838. mul QWORD PTR [r9+8]
  68839. add r12, rax
  68840. adc r10, rdx
  68841. adc r11, 0
  68842. ; A[2] * B[9]
  68843. mov rax, QWORD PTR [r8+72]
  68844. mul QWORD PTR [r9+16]
  68845. add r12, rax
  68846. adc r10, rdx
  68847. adc r11, 0
  68848. ; A[3] * B[8]
  68849. mov rax, QWORD PTR [r8+64]
  68850. mul QWORD PTR [r9+24]
  68851. add r12, rax
  68852. adc r10, rdx
  68853. adc r11, 0
  68854. ; A[4] * B[7]
  68855. mov rax, QWORD PTR [r8+56]
  68856. mul QWORD PTR [r9+32]
  68857. add r12, rax
  68858. adc r10, rdx
  68859. adc r11, 0
  68860. ; A[5] * B[6]
  68861. mov rax, QWORD PTR [r8+48]
  68862. mul QWORD PTR [r9+40]
  68863. add r12, rax
  68864. adc r10, rdx
  68865. adc r11, 0
  68866. ; A[6] * B[5]
  68867. mov rax, QWORD PTR [r8+40]
  68868. mul QWORD PTR [r9+48]
  68869. add r12, rax
  68870. adc r10, rdx
  68871. adc r11, 0
  68872. ; A[7] * B[4]
  68873. mov rax, QWORD PTR [r8+32]
  68874. mul QWORD PTR [r9+56]
  68875. add r12, rax
  68876. adc r10, rdx
  68877. adc r11, 0
  68878. ; A[8] * B[3]
  68879. mov rax, QWORD PTR [r8+24]
  68880. mul QWORD PTR [r9+64]
  68881. add r12, rax
  68882. adc r10, rdx
  68883. adc r11, 0
  68884. ; A[9] * B[2]
  68885. mov rax, QWORD PTR [r8+16]
  68886. mul QWORD PTR [r9+72]
  68887. add r12, rax
  68888. adc r10, rdx
  68889. adc r11, 0
  68890. ; A[10] * B[1]
  68891. mov rax, QWORD PTR [r8+8]
  68892. mul QWORD PTR [r9+80]
  68893. add r12, rax
  68894. adc r10, rdx
  68895. adc r11, 0
  68896. ; A[11] * B[0]
  68897. mov rax, QWORD PTR [r8]
  68898. mul QWORD PTR [r9+88]
  68899. add r12, rax
  68900. adc r10, rdx
  68901. adc r11, 0
  68902. mov QWORD PTR [rsp+88], r12
  68903. ; A[0] * B[12]
  68904. mov rax, QWORD PTR [r8+96]
  68905. mul QWORD PTR [r9]
  68906. xor r12, r12
  68907. add r10, rax
  68908. adc r11, rdx
  68909. adc r12, 0
  68910. ; A[1] * B[11]
  68911. mov rax, QWORD PTR [r8+88]
  68912. mul QWORD PTR [r9+8]
  68913. add r10, rax
  68914. adc r11, rdx
  68915. adc r12, 0
  68916. ; A[2] * B[10]
  68917. mov rax, QWORD PTR [r8+80]
  68918. mul QWORD PTR [r9+16]
  68919. add r10, rax
  68920. adc r11, rdx
  68921. adc r12, 0
  68922. ; A[3] * B[9]
  68923. mov rax, QWORD PTR [r8+72]
  68924. mul QWORD PTR [r9+24]
  68925. add r10, rax
  68926. adc r11, rdx
  68927. adc r12, 0
  68928. ; A[4] * B[8]
  68929. mov rax, QWORD PTR [r8+64]
  68930. mul QWORD PTR [r9+32]
  68931. add r10, rax
  68932. adc r11, rdx
  68933. adc r12, 0
  68934. ; A[5] * B[7]
  68935. mov rax, QWORD PTR [r8+56]
  68936. mul QWORD PTR [r9+40]
  68937. add r10, rax
  68938. adc r11, rdx
  68939. adc r12, 0
  68940. ; A[6] * B[6]
  68941. mov rax, QWORD PTR [r8+48]
  68942. mul QWORD PTR [r9+48]
  68943. add r10, rax
  68944. adc r11, rdx
  68945. adc r12, 0
  68946. ; A[7] * B[5]
  68947. mov rax, QWORD PTR [r8+40]
  68948. mul QWORD PTR [r9+56]
  68949. add r10, rax
  68950. adc r11, rdx
  68951. adc r12, 0
  68952. ; A[8] * B[4]
  68953. mov rax, QWORD PTR [r8+32]
  68954. mul QWORD PTR [r9+64]
  68955. add r10, rax
  68956. adc r11, rdx
  68957. adc r12, 0
  68958. ; A[9] * B[3]
  68959. mov rax, QWORD PTR [r8+24]
  68960. mul QWORD PTR [r9+72]
  68961. add r10, rax
  68962. adc r11, rdx
  68963. adc r12, 0
  68964. ; A[10] * B[2]
  68965. mov rax, QWORD PTR [r8+16]
  68966. mul QWORD PTR [r9+80]
  68967. add r10, rax
  68968. adc r11, rdx
  68969. adc r12, 0
  68970. ; A[11] * B[1]
  68971. mov rax, QWORD PTR [r8+8]
  68972. mul QWORD PTR [r9+88]
  68973. add r10, rax
  68974. adc r11, rdx
  68975. adc r12, 0
  68976. ; A[12] * B[0]
  68977. mov rax, QWORD PTR [r8]
  68978. mul QWORD PTR [r9+96]
  68979. add r10, rax
  68980. adc r11, rdx
  68981. adc r12, 0
  68982. mov QWORD PTR [rsp+96], r10
  68983. ; A[0] * B[13]
  68984. mov rax, QWORD PTR [r8+104]
  68985. mul QWORD PTR [r9]
  68986. xor r10, r10
  68987. add r11, rax
  68988. adc r12, rdx
  68989. adc r10, 0
  68990. ; A[1] * B[12]
  68991. mov rax, QWORD PTR [r8+96]
  68992. mul QWORD PTR [r9+8]
  68993. add r11, rax
  68994. adc r12, rdx
  68995. adc r10, 0
  68996. ; A[2] * B[11]
  68997. mov rax, QWORD PTR [r8+88]
  68998. mul QWORD PTR [r9+16]
  68999. add r11, rax
  69000. adc r12, rdx
  69001. adc r10, 0
  69002. ; A[3] * B[10]
  69003. mov rax, QWORD PTR [r8+80]
  69004. mul QWORD PTR [r9+24]
  69005. add r11, rax
  69006. adc r12, rdx
  69007. adc r10, 0
  69008. ; A[4] * B[9]
  69009. mov rax, QWORD PTR [r8+72]
  69010. mul QWORD PTR [r9+32]
  69011. add r11, rax
  69012. adc r12, rdx
  69013. adc r10, 0
  69014. ; A[5] * B[8]
  69015. mov rax, QWORD PTR [r8+64]
  69016. mul QWORD PTR [r9+40]
  69017. add r11, rax
  69018. adc r12, rdx
  69019. adc r10, 0
  69020. ; A[6] * B[7]
  69021. mov rax, QWORD PTR [r8+56]
  69022. mul QWORD PTR [r9+48]
  69023. add r11, rax
  69024. adc r12, rdx
  69025. adc r10, 0
  69026. ; A[7] * B[6]
  69027. mov rax, QWORD PTR [r8+48]
  69028. mul QWORD PTR [r9+56]
  69029. add r11, rax
  69030. adc r12, rdx
  69031. adc r10, 0
  69032. ; A[8] * B[5]
  69033. mov rax, QWORD PTR [r8+40]
  69034. mul QWORD PTR [r9+64]
  69035. add r11, rax
  69036. adc r12, rdx
  69037. adc r10, 0
  69038. ; A[9] * B[4]
  69039. mov rax, QWORD PTR [r8+32]
  69040. mul QWORD PTR [r9+72]
  69041. add r11, rax
  69042. adc r12, rdx
  69043. adc r10, 0
  69044. ; A[10] * B[3]
  69045. mov rax, QWORD PTR [r8+24]
  69046. mul QWORD PTR [r9+80]
  69047. add r11, rax
  69048. adc r12, rdx
  69049. adc r10, 0
  69050. ; A[11] * B[2]
  69051. mov rax, QWORD PTR [r8+16]
  69052. mul QWORD PTR [r9+88]
  69053. add r11, rax
  69054. adc r12, rdx
  69055. adc r10, 0
  69056. ; A[12] * B[1]
  69057. mov rax, QWORD PTR [r8+8]
  69058. mul QWORD PTR [r9+96]
  69059. add r11, rax
  69060. adc r12, rdx
  69061. adc r10, 0
  69062. ; A[13] * B[0]
  69063. mov rax, QWORD PTR [r8]
  69064. mul QWORD PTR [r9+104]
  69065. add r11, rax
  69066. adc r12, rdx
  69067. adc r10, 0
  69068. mov QWORD PTR [rsp+104], r11
  69069. ; A[0] * B[14]
  69070. mov rax, QWORD PTR [r8+112]
  69071. mul QWORD PTR [r9]
  69072. xor r11, r11
  69073. add r12, rax
  69074. adc r10, rdx
  69075. adc r11, 0
  69076. ; A[1] * B[13]
  69077. mov rax, QWORD PTR [r8+104]
  69078. mul QWORD PTR [r9+8]
  69079. add r12, rax
  69080. adc r10, rdx
  69081. adc r11, 0
  69082. ; A[2] * B[12]
  69083. mov rax, QWORD PTR [r8+96]
  69084. mul QWORD PTR [r9+16]
  69085. add r12, rax
  69086. adc r10, rdx
  69087. adc r11, 0
  69088. ; A[3] * B[11]
  69089. mov rax, QWORD PTR [r8+88]
  69090. mul QWORD PTR [r9+24]
  69091. add r12, rax
  69092. adc r10, rdx
  69093. adc r11, 0
  69094. ; A[4] * B[10]
  69095. mov rax, QWORD PTR [r8+80]
  69096. mul QWORD PTR [r9+32]
  69097. add r12, rax
  69098. adc r10, rdx
  69099. adc r11, 0
  69100. ; A[5] * B[9]
  69101. mov rax, QWORD PTR [r8+72]
  69102. mul QWORD PTR [r9+40]
  69103. add r12, rax
  69104. adc r10, rdx
  69105. adc r11, 0
  69106. ; A[6] * B[8]
  69107. mov rax, QWORD PTR [r8+64]
  69108. mul QWORD PTR [r9+48]
  69109. add r12, rax
  69110. adc r10, rdx
  69111. adc r11, 0
  69112. ; A[7] * B[7]
  69113. mov rax, QWORD PTR [r8+56]
  69114. mul QWORD PTR [r9+56]
  69115. add r12, rax
  69116. adc r10, rdx
  69117. adc r11, 0
  69118. ; A[8] * B[6]
  69119. mov rax, QWORD PTR [r8+48]
  69120. mul QWORD PTR [r9+64]
  69121. add r12, rax
  69122. adc r10, rdx
  69123. adc r11, 0
  69124. ; A[9] * B[5]
  69125. mov rax, QWORD PTR [r8+40]
  69126. mul QWORD PTR [r9+72]
  69127. add r12, rax
  69128. adc r10, rdx
  69129. adc r11, 0
  69130. ; A[10] * B[4]
  69131. mov rax, QWORD PTR [r8+32]
  69132. mul QWORD PTR [r9+80]
  69133. add r12, rax
  69134. adc r10, rdx
  69135. adc r11, 0
  69136. ; A[11] * B[3]
  69137. mov rax, QWORD PTR [r8+24]
  69138. mul QWORD PTR [r9+88]
  69139. add r12, rax
  69140. adc r10, rdx
  69141. adc r11, 0
  69142. ; A[12] * B[2]
  69143. mov rax, QWORD PTR [r8+16]
  69144. mul QWORD PTR [r9+96]
  69145. add r12, rax
  69146. adc r10, rdx
  69147. adc r11, 0
  69148. ; A[13] * B[1]
  69149. mov rax, QWORD PTR [r8+8]
  69150. mul QWORD PTR [r9+104]
  69151. add r12, rax
  69152. adc r10, rdx
  69153. adc r11, 0
  69154. ; A[14] * B[0]
  69155. mov rax, QWORD PTR [r8]
  69156. mul QWORD PTR [r9+112]
  69157. add r12, rax
  69158. adc r10, rdx
  69159. adc r11, 0
  69160. mov QWORD PTR [rsp+112], r12
  69161. ; A[0] * B[15]
  69162. mov rax, QWORD PTR [r8+120]
  69163. mul QWORD PTR [r9]
  69164. xor r12, r12
  69165. add r10, rax
  69166. adc r11, rdx
  69167. adc r12, 0
  69168. ; A[1] * B[14]
  69169. mov rax, QWORD PTR [r8+112]
  69170. mul QWORD PTR [r9+8]
  69171. add r10, rax
  69172. adc r11, rdx
  69173. adc r12, 0
  69174. ; A[2] * B[13]
  69175. mov rax, QWORD PTR [r8+104]
  69176. mul QWORD PTR [r9+16]
  69177. add r10, rax
  69178. adc r11, rdx
  69179. adc r12, 0
  69180. ; A[3] * B[12]
  69181. mov rax, QWORD PTR [r8+96]
  69182. mul QWORD PTR [r9+24]
  69183. add r10, rax
  69184. adc r11, rdx
  69185. adc r12, 0
  69186. ; A[4] * B[11]
  69187. mov rax, QWORD PTR [r8+88]
  69188. mul QWORD PTR [r9+32]
  69189. add r10, rax
  69190. adc r11, rdx
  69191. adc r12, 0
  69192. ; A[5] * B[10]
  69193. mov rax, QWORD PTR [r8+80]
  69194. mul QWORD PTR [r9+40]
  69195. add r10, rax
  69196. adc r11, rdx
  69197. adc r12, 0
  69198. ; A[6] * B[9]
  69199. mov rax, QWORD PTR [r8+72]
  69200. mul QWORD PTR [r9+48]
  69201. add r10, rax
  69202. adc r11, rdx
  69203. adc r12, 0
  69204. ; A[7] * B[8]
  69205. mov rax, QWORD PTR [r8+64]
  69206. mul QWORD PTR [r9+56]
  69207. add r10, rax
  69208. adc r11, rdx
  69209. adc r12, 0
  69210. ; A[8] * B[7]
  69211. mov rax, QWORD PTR [r8+56]
  69212. mul QWORD PTR [r9+64]
  69213. add r10, rax
  69214. adc r11, rdx
  69215. adc r12, 0
  69216. ; A[9] * B[6]
  69217. mov rax, QWORD PTR [r8+48]
  69218. mul QWORD PTR [r9+72]
  69219. add r10, rax
  69220. adc r11, rdx
  69221. adc r12, 0
  69222. ; A[10] * B[5]
  69223. mov rax, QWORD PTR [r8+40]
  69224. mul QWORD PTR [r9+80]
  69225. add r10, rax
  69226. adc r11, rdx
  69227. adc r12, 0
  69228. ; A[11] * B[4]
  69229. mov rax, QWORD PTR [r8+32]
  69230. mul QWORD PTR [r9+88]
  69231. add r10, rax
  69232. adc r11, rdx
  69233. adc r12, 0
  69234. ; A[12] * B[3]
  69235. mov rax, QWORD PTR [r8+24]
  69236. mul QWORD PTR [r9+96]
  69237. add r10, rax
  69238. adc r11, rdx
  69239. adc r12, 0
  69240. ; A[13] * B[2]
  69241. mov rax, QWORD PTR [r8+16]
  69242. mul QWORD PTR [r9+104]
  69243. add r10, rax
  69244. adc r11, rdx
  69245. adc r12, 0
  69246. ; A[14] * B[1]
  69247. mov rax, QWORD PTR [r8+8]
  69248. mul QWORD PTR [r9+112]
  69249. add r10, rax
  69250. adc r11, rdx
  69251. adc r12, 0
  69252. ; A[15] * B[0]
  69253. mov rax, QWORD PTR [r8]
  69254. mul QWORD PTR [r9+120]
  69255. add r10, rax
  69256. adc r11, rdx
  69257. adc r12, 0
  69258. mov QWORD PTR [rsp+120], r10
  69259. ; A[1] * B[15]
  69260. mov rax, QWORD PTR [r8+120]
  69261. mul QWORD PTR [r9+8]
  69262. xor r10, r10
  69263. add r11, rax
  69264. adc r12, rdx
  69265. adc r10, 0
  69266. ; A[2] * B[14]
  69267. mov rax, QWORD PTR [r8+112]
  69268. mul QWORD PTR [r9+16]
  69269. add r11, rax
  69270. adc r12, rdx
  69271. adc r10, 0
  69272. ; A[3] * B[13]
  69273. mov rax, QWORD PTR [r8+104]
  69274. mul QWORD PTR [r9+24]
  69275. add r11, rax
  69276. adc r12, rdx
  69277. adc r10, 0
  69278. ; A[4] * B[12]
  69279. mov rax, QWORD PTR [r8+96]
  69280. mul QWORD PTR [r9+32]
  69281. add r11, rax
  69282. adc r12, rdx
  69283. adc r10, 0
  69284. ; A[5] * B[11]
  69285. mov rax, QWORD PTR [r8+88]
  69286. mul QWORD PTR [r9+40]
  69287. add r11, rax
  69288. adc r12, rdx
  69289. adc r10, 0
  69290. ; A[6] * B[10]
  69291. mov rax, QWORD PTR [r8+80]
  69292. mul QWORD PTR [r9+48]
  69293. add r11, rax
  69294. adc r12, rdx
  69295. adc r10, 0
  69296. ; A[7] * B[9]
  69297. mov rax, QWORD PTR [r8+72]
  69298. mul QWORD PTR [r9+56]
  69299. add r11, rax
  69300. adc r12, rdx
  69301. adc r10, 0
  69302. ; A[8] * B[8]
  69303. mov rax, QWORD PTR [r8+64]
  69304. mul QWORD PTR [r9+64]
  69305. add r11, rax
  69306. adc r12, rdx
  69307. adc r10, 0
  69308. ; A[9] * B[7]
  69309. mov rax, QWORD PTR [r8+56]
  69310. mul QWORD PTR [r9+72]
  69311. add r11, rax
  69312. adc r12, rdx
  69313. adc r10, 0
  69314. ; A[10] * B[6]
  69315. mov rax, QWORD PTR [r8+48]
  69316. mul QWORD PTR [r9+80]
  69317. add r11, rax
  69318. adc r12, rdx
  69319. adc r10, 0
  69320. ; A[11] * B[5]
  69321. mov rax, QWORD PTR [r8+40]
  69322. mul QWORD PTR [r9+88]
  69323. add r11, rax
  69324. adc r12, rdx
  69325. adc r10, 0
  69326. ; A[12] * B[4]
  69327. mov rax, QWORD PTR [r8+32]
  69328. mul QWORD PTR [r9+96]
  69329. add r11, rax
  69330. adc r12, rdx
  69331. adc r10, 0
  69332. ; A[13] * B[3]
  69333. mov rax, QWORD PTR [r8+24]
  69334. mul QWORD PTR [r9+104]
  69335. add r11, rax
  69336. adc r12, rdx
  69337. adc r10, 0
  69338. ; A[14] * B[2]
  69339. mov rax, QWORD PTR [r8+16]
  69340. mul QWORD PTR [r9+112]
  69341. add r11, rax
  69342. adc r12, rdx
  69343. adc r10, 0
  69344. ; A[15] * B[1]
  69345. mov rax, QWORD PTR [r8+8]
  69346. mul QWORD PTR [r9+120]
  69347. add r11, rax
  69348. adc r12, rdx
  69349. adc r10, 0
  69350. mov QWORD PTR [rcx+128], r11
  69351. ; A[2] * B[15]
  69352. mov rax, QWORD PTR [r8+120]
  69353. mul QWORD PTR [r9+16]
  69354. xor r11, r11
  69355. add r12, rax
  69356. adc r10, rdx
  69357. adc r11, 0
  69358. ; A[3] * B[14]
  69359. mov rax, QWORD PTR [r8+112]
  69360. mul QWORD PTR [r9+24]
  69361. add r12, rax
  69362. adc r10, rdx
  69363. adc r11, 0
  69364. ; A[4] * B[13]
  69365. mov rax, QWORD PTR [r8+104]
  69366. mul QWORD PTR [r9+32]
  69367. add r12, rax
  69368. adc r10, rdx
  69369. adc r11, 0
  69370. ; A[5] * B[12]
  69371. mov rax, QWORD PTR [r8+96]
  69372. mul QWORD PTR [r9+40]
  69373. add r12, rax
  69374. adc r10, rdx
  69375. adc r11, 0
  69376. ; A[6] * B[11]
  69377. mov rax, QWORD PTR [r8+88]
  69378. mul QWORD PTR [r9+48]
  69379. add r12, rax
  69380. adc r10, rdx
  69381. adc r11, 0
  69382. ; A[7] * B[10]
  69383. mov rax, QWORD PTR [r8+80]
  69384. mul QWORD PTR [r9+56]
  69385. add r12, rax
  69386. adc r10, rdx
  69387. adc r11, 0
  69388. ; A[8] * B[9]
  69389. mov rax, QWORD PTR [r8+72]
  69390. mul QWORD PTR [r9+64]
  69391. add r12, rax
  69392. adc r10, rdx
  69393. adc r11, 0
  69394. ; A[9] * B[8]
  69395. mov rax, QWORD PTR [r8+64]
  69396. mul QWORD PTR [r9+72]
  69397. add r12, rax
  69398. adc r10, rdx
  69399. adc r11, 0
  69400. ; A[10] * B[7]
  69401. mov rax, QWORD PTR [r8+56]
  69402. mul QWORD PTR [r9+80]
  69403. add r12, rax
  69404. adc r10, rdx
  69405. adc r11, 0
  69406. ; A[11] * B[6]
  69407. mov rax, QWORD PTR [r8+48]
  69408. mul QWORD PTR [r9+88]
  69409. add r12, rax
  69410. adc r10, rdx
  69411. adc r11, 0
  69412. ; A[12] * B[5]
  69413. mov rax, QWORD PTR [r8+40]
  69414. mul QWORD PTR [r9+96]
  69415. add r12, rax
  69416. adc r10, rdx
  69417. adc r11, 0
  69418. ; A[13] * B[4]
  69419. mov rax, QWORD PTR [r8+32]
  69420. mul QWORD PTR [r9+104]
  69421. add r12, rax
  69422. adc r10, rdx
  69423. adc r11, 0
  69424. ; A[14] * B[3]
  69425. mov rax, QWORD PTR [r8+24]
  69426. mul QWORD PTR [r9+112]
  69427. add r12, rax
  69428. adc r10, rdx
  69429. adc r11, 0
  69430. ; A[15] * B[2]
  69431. mov rax, QWORD PTR [r8+16]
  69432. mul QWORD PTR [r9+120]
  69433. add r12, rax
  69434. adc r10, rdx
  69435. adc r11, 0
  69436. mov QWORD PTR [rcx+136], r12
  69437. ; A[3] * B[15]
  69438. mov rax, QWORD PTR [r8+120]
  69439. mul QWORD PTR [r9+24]
  69440. xor r12, r12
  69441. add r10, rax
  69442. adc r11, rdx
  69443. adc r12, 0
  69444. ; A[4] * B[14]
  69445. mov rax, QWORD PTR [r8+112]
  69446. mul QWORD PTR [r9+32]
  69447. add r10, rax
  69448. adc r11, rdx
  69449. adc r12, 0
  69450. ; A[5] * B[13]
  69451. mov rax, QWORD PTR [r8+104]
  69452. mul QWORD PTR [r9+40]
  69453. add r10, rax
  69454. adc r11, rdx
  69455. adc r12, 0
  69456. ; A[6] * B[12]
  69457. mov rax, QWORD PTR [r8+96]
  69458. mul QWORD PTR [r9+48]
  69459. add r10, rax
  69460. adc r11, rdx
  69461. adc r12, 0
  69462. ; A[7] * B[11]
  69463. mov rax, QWORD PTR [r8+88]
  69464. mul QWORD PTR [r9+56]
  69465. add r10, rax
  69466. adc r11, rdx
  69467. adc r12, 0
  69468. ; A[8] * B[10]
  69469. mov rax, QWORD PTR [r8+80]
  69470. mul QWORD PTR [r9+64]
  69471. add r10, rax
  69472. adc r11, rdx
  69473. adc r12, 0
  69474. ; A[9] * B[9]
  69475. mov rax, QWORD PTR [r8+72]
  69476. mul QWORD PTR [r9+72]
  69477. add r10, rax
  69478. adc r11, rdx
  69479. adc r12, 0
  69480. ; A[10] * B[8]
  69481. mov rax, QWORD PTR [r8+64]
  69482. mul QWORD PTR [r9+80]
  69483. add r10, rax
  69484. adc r11, rdx
  69485. adc r12, 0
  69486. ; A[11] * B[7]
  69487. mov rax, QWORD PTR [r8+56]
  69488. mul QWORD PTR [r9+88]
  69489. add r10, rax
  69490. adc r11, rdx
  69491. adc r12, 0
  69492. ; A[12] * B[6]
  69493. mov rax, QWORD PTR [r8+48]
  69494. mul QWORD PTR [r9+96]
  69495. add r10, rax
  69496. adc r11, rdx
  69497. adc r12, 0
  69498. ; A[13] * B[5]
  69499. mov rax, QWORD PTR [r8+40]
  69500. mul QWORD PTR [r9+104]
  69501. add r10, rax
  69502. adc r11, rdx
  69503. adc r12, 0
  69504. ; A[14] * B[4]
  69505. mov rax, QWORD PTR [r8+32]
  69506. mul QWORD PTR [r9+112]
  69507. add r10, rax
  69508. adc r11, rdx
  69509. adc r12, 0
  69510. ; A[15] * B[3]
  69511. mov rax, QWORD PTR [r8+24]
  69512. mul QWORD PTR [r9+120]
  69513. add r10, rax
  69514. adc r11, rdx
  69515. adc r12, 0
  69516. mov QWORD PTR [rcx+144], r10
  69517. ; A[4] * B[15]
  69518. mov rax, QWORD PTR [r8+120]
  69519. mul QWORD PTR [r9+32]
  69520. xor r10, r10
  69521. add r11, rax
  69522. adc r12, rdx
  69523. adc r10, 0
  69524. ; A[5] * B[14]
  69525. mov rax, QWORD PTR [r8+112]
  69526. mul QWORD PTR [r9+40]
  69527. add r11, rax
  69528. adc r12, rdx
  69529. adc r10, 0
  69530. ; A[6] * B[13]
  69531. mov rax, QWORD PTR [r8+104]
  69532. mul QWORD PTR [r9+48]
  69533. add r11, rax
  69534. adc r12, rdx
  69535. adc r10, 0
  69536. ; A[7] * B[12]
  69537. mov rax, QWORD PTR [r8+96]
  69538. mul QWORD PTR [r9+56]
  69539. add r11, rax
  69540. adc r12, rdx
  69541. adc r10, 0
  69542. ; A[8] * B[11]
  69543. mov rax, QWORD PTR [r8+88]
  69544. mul QWORD PTR [r9+64]
  69545. add r11, rax
  69546. adc r12, rdx
  69547. adc r10, 0
  69548. ; A[9] * B[10]
  69549. mov rax, QWORD PTR [r8+80]
  69550. mul QWORD PTR [r9+72]
  69551. add r11, rax
  69552. adc r12, rdx
  69553. adc r10, 0
  69554. ; A[10] * B[9]
  69555. mov rax, QWORD PTR [r8+72]
  69556. mul QWORD PTR [r9+80]
  69557. add r11, rax
  69558. adc r12, rdx
  69559. adc r10, 0
  69560. ; A[11] * B[8]
  69561. mov rax, QWORD PTR [r8+64]
  69562. mul QWORD PTR [r9+88]
  69563. add r11, rax
  69564. adc r12, rdx
  69565. adc r10, 0
  69566. ; A[12] * B[7]
  69567. mov rax, QWORD PTR [r8+56]
  69568. mul QWORD PTR [r9+96]
  69569. add r11, rax
  69570. adc r12, rdx
  69571. adc r10, 0
  69572. ; A[13] * B[6]
  69573. mov rax, QWORD PTR [r8+48]
  69574. mul QWORD PTR [r9+104]
  69575. add r11, rax
  69576. adc r12, rdx
  69577. adc r10, 0
  69578. ; A[14] * B[5]
  69579. mov rax, QWORD PTR [r8+40]
  69580. mul QWORD PTR [r9+112]
  69581. add r11, rax
  69582. adc r12, rdx
  69583. adc r10, 0
  69584. ; A[15] * B[4]
  69585. mov rax, QWORD PTR [r8+32]
  69586. mul QWORD PTR [r9+120]
  69587. add r11, rax
  69588. adc r12, rdx
  69589. adc r10, 0
  69590. mov QWORD PTR [rcx+152], r11
  69591. ; A[5] * B[15]
  69592. mov rax, QWORD PTR [r8+120]
  69593. mul QWORD PTR [r9+40]
  69594. xor r11, r11
  69595. add r12, rax
  69596. adc r10, rdx
  69597. adc r11, 0
  69598. ; A[6] * B[14]
  69599. mov rax, QWORD PTR [r8+112]
  69600. mul QWORD PTR [r9+48]
  69601. add r12, rax
  69602. adc r10, rdx
  69603. adc r11, 0
  69604. ; A[7] * B[13]
  69605. mov rax, QWORD PTR [r8+104]
  69606. mul QWORD PTR [r9+56]
  69607. add r12, rax
  69608. adc r10, rdx
  69609. adc r11, 0
  69610. ; A[8] * B[12]
  69611. mov rax, QWORD PTR [r8+96]
  69612. mul QWORD PTR [r9+64]
  69613. add r12, rax
  69614. adc r10, rdx
  69615. adc r11, 0
  69616. ; A[9] * B[11]
  69617. mov rax, QWORD PTR [r8+88]
  69618. mul QWORD PTR [r9+72]
  69619. add r12, rax
  69620. adc r10, rdx
  69621. adc r11, 0
  69622. ; A[10] * B[10]
  69623. mov rax, QWORD PTR [r8+80]
  69624. mul QWORD PTR [r9+80]
  69625. add r12, rax
  69626. adc r10, rdx
  69627. adc r11, 0
  69628. ; A[11] * B[9]
  69629. mov rax, QWORD PTR [r8+72]
  69630. mul QWORD PTR [r9+88]
  69631. add r12, rax
  69632. adc r10, rdx
  69633. adc r11, 0
  69634. ; A[12] * B[8]
  69635. mov rax, QWORD PTR [r8+64]
  69636. mul QWORD PTR [r9+96]
  69637. add r12, rax
  69638. adc r10, rdx
  69639. adc r11, 0
  69640. ; A[13] * B[7]
  69641. mov rax, QWORD PTR [r8+56]
  69642. mul QWORD PTR [r9+104]
  69643. add r12, rax
  69644. adc r10, rdx
  69645. adc r11, 0
  69646. ; A[14] * B[6]
  69647. mov rax, QWORD PTR [r8+48]
  69648. mul QWORD PTR [r9+112]
  69649. add r12, rax
  69650. adc r10, rdx
  69651. adc r11, 0
  69652. ; A[15] * B[5]
  69653. mov rax, QWORD PTR [r8+40]
  69654. mul QWORD PTR [r9+120]
  69655. add r12, rax
  69656. adc r10, rdx
  69657. adc r11, 0
  69658. mov QWORD PTR [rcx+160], r12
  69659. ; A[6] * B[15]
  69660. mov rax, QWORD PTR [r8+120]
  69661. mul QWORD PTR [r9+48]
  69662. xor r12, r12
  69663. add r10, rax
  69664. adc r11, rdx
  69665. adc r12, 0
  69666. ; A[7] * B[14]
  69667. mov rax, QWORD PTR [r8+112]
  69668. mul QWORD PTR [r9+56]
  69669. add r10, rax
  69670. adc r11, rdx
  69671. adc r12, 0
  69672. ; A[8] * B[13]
  69673. mov rax, QWORD PTR [r8+104]
  69674. mul QWORD PTR [r9+64]
  69675. add r10, rax
  69676. adc r11, rdx
  69677. adc r12, 0
  69678. ; A[9] * B[12]
  69679. mov rax, QWORD PTR [r8+96]
  69680. mul QWORD PTR [r9+72]
  69681. add r10, rax
  69682. adc r11, rdx
  69683. adc r12, 0
  69684. ; A[10] * B[11]
  69685. mov rax, QWORD PTR [r8+88]
  69686. mul QWORD PTR [r9+80]
  69687. add r10, rax
  69688. adc r11, rdx
  69689. adc r12, 0
  69690. ; A[11] * B[10]
  69691. mov rax, QWORD PTR [r8+80]
  69692. mul QWORD PTR [r9+88]
  69693. add r10, rax
  69694. adc r11, rdx
  69695. adc r12, 0
  69696. ; A[12] * B[9]
  69697. mov rax, QWORD PTR [r8+72]
  69698. mul QWORD PTR [r9+96]
  69699. add r10, rax
  69700. adc r11, rdx
  69701. adc r12, 0
  69702. ; A[13] * B[8]
  69703. mov rax, QWORD PTR [r8+64]
  69704. mul QWORD PTR [r9+104]
  69705. add r10, rax
  69706. adc r11, rdx
  69707. adc r12, 0
  69708. ; A[14] * B[7]
  69709. mov rax, QWORD PTR [r8+56]
  69710. mul QWORD PTR [r9+112]
  69711. add r10, rax
  69712. adc r11, rdx
  69713. adc r12, 0
  69714. ; A[15] * B[6]
  69715. mov rax, QWORD PTR [r8+48]
  69716. mul QWORD PTR [r9+120]
  69717. add r10, rax
  69718. adc r11, rdx
  69719. adc r12, 0
  69720. mov QWORD PTR [rcx+168], r10
  69721. ; A[7] * B[15]
  69722. mov rax, QWORD PTR [r8+120]
  69723. mul QWORD PTR [r9+56]
  69724. xor r10, r10
  69725. add r11, rax
  69726. adc r12, rdx
  69727. adc r10, 0
  69728. ; A[8] * B[14]
  69729. mov rax, QWORD PTR [r8+112]
  69730. mul QWORD PTR [r9+64]
  69731. add r11, rax
  69732. adc r12, rdx
  69733. adc r10, 0
  69734. ; A[9] * B[13]
  69735. mov rax, QWORD PTR [r8+104]
  69736. mul QWORD PTR [r9+72]
  69737. add r11, rax
  69738. adc r12, rdx
  69739. adc r10, 0
  69740. ; A[10] * B[12]
  69741. mov rax, QWORD PTR [r8+96]
  69742. mul QWORD PTR [r9+80]
  69743. add r11, rax
  69744. adc r12, rdx
  69745. adc r10, 0
  69746. ; A[11] * B[11]
  69747. mov rax, QWORD PTR [r8+88]
  69748. mul QWORD PTR [r9+88]
  69749. add r11, rax
  69750. adc r12, rdx
  69751. adc r10, 0
  69752. ; A[12] * B[10]
  69753. mov rax, QWORD PTR [r8+80]
  69754. mul QWORD PTR [r9+96]
  69755. add r11, rax
  69756. adc r12, rdx
  69757. adc r10, 0
  69758. ; A[13] * B[9]
  69759. mov rax, QWORD PTR [r8+72]
  69760. mul QWORD PTR [r9+104]
  69761. add r11, rax
  69762. adc r12, rdx
  69763. adc r10, 0
  69764. ; A[14] * B[8]
  69765. mov rax, QWORD PTR [r8+64]
  69766. mul QWORD PTR [r9+112]
  69767. add r11, rax
  69768. adc r12, rdx
  69769. adc r10, 0
  69770. ; A[15] * B[7]
  69771. mov rax, QWORD PTR [r8+56]
  69772. mul QWORD PTR [r9+120]
  69773. add r11, rax
  69774. adc r12, rdx
  69775. adc r10, 0
  69776. mov QWORD PTR [rcx+176], r11
  69777. ; A[8] * B[15]
  69778. mov rax, QWORD PTR [r8+120]
  69779. mul QWORD PTR [r9+64]
  69780. xor r11, r11
  69781. add r12, rax
  69782. adc r10, rdx
  69783. adc r11, 0
  69784. ; A[9] * B[14]
  69785. mov rax, QWORD PTR [r8+112]
  69786. mul QWORD PTR [r9+72]
  69787. add r12, rax
  69788. adc r10, rdx
  69789. adc r11, 0
  69790. ; A[10] * B[13]
  69791. mov rax, QWORD PTR [r8+104]
  69792. mul QWORD PTR [r9+80]
  69793. add r12, rax
  69794. adc r10, rdx
  69795. adc r11, 0
  69796. ; A[11] * B[12]
  69797. mov rax, QWORD PTR [r8+96]
  69798. mul QWORD PTR [r9+88]
  69799. add r12, rax
  69800. adc r10, rdx
  69801. adc r11, 0
  69802. ; A[12] * B[11]
  69803. mov rax, QWORD PTR [r8+88]
  69804. mul QWORD PTR [r9+96]
  69805. add r12, rax
  69806. adc r10, rdx
  69807. adc r11, 0
  69808. ; A[13] * B[10]
  69809. mov rax, QWORD PTR [r8+80]
  69810. mul QWORD PTR [r9+104]
  69811. add r12, rax
  69812. adc r10, rdx
  69813. adc r11, 0
  69814. ; A[14] * B[9]
  69815. mov rax, QWORD PTR [r8+72]
  69816. mul QWORD PTR [r9+112]
  69817. add r12, rax
  69818. adc r10, rdx
  69819. adc r11, 0
  69820. ; A[15] * B[8]
  69821. mov rax, QWORD PTR [r8+64]
  69822. mul QWORD PTR [r9+120]
  69823. add r12, rax
  69824. adc r10, rdx
  69825. adc r11, 0
  69826. mov QWORD PTR [rcx+184], r12
  69827. ; A[9] * B[15]
  69828. mov rax, QWORD PTR [r8+120]
  69829. mul QWORD PTR [r9+72]
  69830. xor r12, r12
  69831. add r10, rax
  69832. adc r11, rdx
  69833. adc r12, 0
  69834. ; A[10] * B[14]
  69835. mov rax, QWORD PTR [r8+112]
  69836. mul QWORD PTR [r9+80]
  69837. add r10, rax
  69838. adc r11, rdx
  69839. adc r12, 0
  69840. ; A[11] * B[13]
  69841. mov rax, QWORD PTR [r8+104]
  69842. mul QWORD PTR [r9+88]
  69843. add r10, rax
  69844. adc r11, rdx
  69845. adc r12, 0
  69846. ; A[12] * B[12]
  69847. mov rax, QWORD PTR [r8+96]
  69848. mul QWORD PTR [r9+96]
  69849. add r10, rax
  69850. adc r11, rdx
  69851. adc r12, 0
  69852. ; A[13] * B[11]
  69853. mov rax, QWORD PTR [r8+88]
  69854. mul QWORD PTR [r9+104]
  69855. add r10, rax
  69856. adc r11, rdx
  69857. adc r12, 0
  69858. ; A[14] * B[10]
  69859. mov rax, QWORD PTR [r8+80]
  69860. mul QWORD PTR [r9+112]
  69861. add r10, rax
  69862. adc r11, rdx
  69863. adc r12, 0
  69864. ; A[15] * B[9]
  69865. mov rax, QWORD PTR [r8+72]
  69866. mul QWORD PTR [r9+120]
  69867. add r10, rax
  69868. adc r11, rdx
  69869. adc r12, 0
  69870. mov QWORD PTR [rcx+192], r10
  69871. ; A[10] * B[15]
  69872. mov rax, QWORD PTR [r8+120]
  69873. mul QWORD PTR [r9+80]
  69874. xor r10, r10
  69875. add r11, rax
  69876. adc r12, rdx
  69877. adc r10, 0
  69878. ; A[11] * B[14]
  69879. mov rax, QWORD PTR [r8+112]
  69880. mul QWORD PTR [r9+88]
  69881. add r11, rax
  69882. adc r12, rdx
  69883. adc r10, 0
  69884. ; A[12] * B[13]
  69885. mov rax, QWORD PTR [r8+104]
  69886. mul QWORD PTR [r9+96]
  69887. add r11, rax
  69888. adc r12, rdx
  69889. adc r10, 0
  69890. ; A[13] * B[12]
  69891. mov rax, QWORD PTR [r8+96]
  69892. mul QWORD PTR [r9+104]
  69893. add r11, rax
  69894. adc r12, rdx
  69895. adc r10, 0
  69896. ; A[14] * B[11]
  69897. mov rax, QWORD PTR [r8+88]
  69898. mul QWORD PTR [r9+112]
  69899. add r11, rax
  69900. adc r12, rdx
  69901. adc r10, 0
  69902. ; A[15] * B[10]
  69903. mov rax, QWORD PTR [r8+80]
  69904. mul QWORD PTR [r9+120]
  69905. add r11, rax
  69906. adc r12, rdx
  69907. adc r10, 0
  69908. mov QWORD PTR [rcx+200], r11
  69909. ; A[11] * B[15]
  69910. mov rax, QWORD PTR [r8+120]
  69911. mul QWORD PTR [r9+88]
  69912. xor r11, r11
  69913. add r12, rax
  69914. adc r10, rdx
  69915. adc r11, 0
  69916. ; A[12] * B[14]
  69917. mov rax, QWORD PTR [r8+112]
  69918. mul QWORD PTR [r9+96]
  69919. add r12, rax
  69920. adc r10, rdx
  69921. adc r11, 0
  69922. ; A[13] * B[13]
  69923. mov rax, QWORD PTR [r8+104]
  69924. mul QWORD PTR [r9+104]
  69925. add r12, rax
  69926. adc r10, rdx
  69927. adc r11, 0
  69928. ; A[14] * B[12]
  69929. mov rax, QWORD PTR [r8+96]
  69930. mul QWORD PTR [r9+112]
  69931. add r12, rax
  69932. adc r10, rdx
  69933. adc r11, 0
  69934. ; A[15] * B[11]
  69935. mov rax, QWORD PTR [r8+88]
  69936. mul QWORD PTR [r9+120]
  69937. add r12, rax
  69938. adc r10, rdx
  69939. adc r11, 0
  69940. mov QWORD PTR [rcx+208], r12
  69941. ; A[12] * B[15]
  69942. mov rax, QWORD PTR [r8+120]
  69943. mul QWORD PTR [r9+96]
  69944. xor r12, r12
  69945. add r10, rax
  69946. adc r11, rdx
  69947. adc r12, 0
  69948. ; A[13] * B[14]
  69949. mov rax, QWORD PTR [r8+112]
  69950. mul QWORD PTR [r9+104]
  69951. add r10, rax
  69952. adc r11, rdx
  69953. adc r12, 0
  69954. ; A[14] * B[13]
  69955. mov rax, QWORD PTR [r8+104]
  69956. mul QWORD PTR [r9+112]
  69957. add r10, rax
  69958. adc r11, rdx
  69959. adc r12, 0
  69960. ; A[15] * B[12]
  69961. mov rax, QWORD PTR [r8+96]
  69962. mul QWORD PTR [r9+120]
  69963. add r10, rax
  69964. adc r11, rdx
  69965. adc r12, 0
  69966. mov QWORD PTR [rcx+216], r10
  69967. ; A[13] * B[15]
  69968. mov rax, QWORD PTR [r8+120]
  69969. mul QWORD PTR [r9+104]
  69970. xor r10, r10
  69971. add r11, rax
  69972. adc r12, rdx
  69973. adc r10, 0
  69974. ; A[14] * B[14]
  69975. mov rax, QWORD PTR [r8+112]
  69976. mul QWORD PTR [r9+112]
  69977. add r11, rax
  69978. adc r12, rdx
  69979. adc r10, 0
  69980. ; A[15] * B[13]
  69981. mov rax, QWORD PTR [r8+104]
  69982. mul QWORD PTR [r9+120]
  69983. add r11, rax
  69984. adc r12, rdx
  69985. adc r10, 0
  69986. mov QWORD PTR [rcx+224], r11
  69987. ; A[14] * B[15]
  69988. mov rax, QWORD PTR [r8+120]
  69989. mul QWORD PTR [r9+112]
  69990. xor r11, r11
  69991. add r12, rax
  69992. adc r10, rdx
  69993. adc r11, 0
  69994. ; A[15] * B[14]
  69995. mov rax, QWORD PTR [r8+112]
  69996. mul QWORD PTR [r9+120]
  69997. add r12, rax
  69998. adc r10, rdx
  69999. adc r11, 0
  70000. mov QWORD PTR [rcx+232], r12
  70001. ; A[15] * B[15]
  70002. mov rax, QWORD PTR [r8+120]
  70003. mul QWORD PTR [r9+120]
  70004. add r10, rax
  70005. adc r11, rdx
  70006. mov QWORD PTR [rcx+240], r10
  70007. mov QWORD PTR [rcx+248], r11
  70008. mov rax, QWORD PTR [rsp]
  70009. mov rdx, QWORD PTR [rsp+8]
  70010. mov r10, QWORD PTR [rsp+16]
  70011. mov r11, QWORD PTR [rsp+24]
  70012. mov QWORD PTR [rcx], rax
  70013. mov QWORD PTR [rcx+8], rdx
  70014. mov QWORD PTR [rcx+16], r10
  70015. mov QWORD PTR [rcx+24], r11
  70016. mov rax, QWORD PTR [rsp+32]
  70017. mov rdx, QWORD PTR [rsp+40]
  70018. mov r10, QWORD PTR [rsp+48]
  70019. mov r11, QWORD PTR [rsp+56]
  70020. mov QWORD PTR [rcx+32], rax
  70021. mov QWORD PTR [rcx+40], rdx
  70022. mov QWORD PTR [rcx+48], r10
  70023. mov QWORD PTR [rcx+56], r11
  70024. mov rax, QWORD PTR [rsp+64]
  70025. mov rdx, QWORD PTR [rsp+72]
  70026. mov r10, QWORD PTR [rsp+80]
  70027. mov r11, QWORD PTR [rsp+88]
  70028. mov QWORD PTR [rcx+64], rax
  70029. mov QWORD PTR [rcx+72], rdx
  70030. mov QWORD PTR [rcx+80], r10
  70031. mov QWORD PTR [rcx+88], r11
  70032. mov rax, QWORD PTR [rsp+96]
  70033. mov rdx, QWORD PTR [rsp+104]
  70034. mov r10, QWORD PTR [rsp+112]
  70035. mov r11, QWORD PTR [rsp+120]
  70036. mov QWORD PTR [rcx+96], rax
  70037. mov QWORD PTR [rcx+104], rdx
  70038. mov QWORD PTR [rcx+112], r10
  70039. mov QWORD PTR [rcx+120], r11
  70040. add rsp, 128
  70041. pop r12
  70042. ret
  70043. sp_1024_mul_16 ENDP
  70044. _text ENDS
  70045. ; /* Square a and put result in r. (r = a * a)
  70046. ; *
  70047. ; * r A single precision integer.
  70048. ; * a A single precision integer.
  70049. ; */
  70050. _text SEGMENT READONLY PARA
  70051. sp_1024_sqr_16 PROC
  70052. push r12
  70053. push r13
  70054. push r14
  70055. mov r8, rdx
  70056. sub rsp, 128
  70057. ; A[0] * A[0]
  70058. mov rax, QWORD PTR [r8]
  70059. mul rax
  70060. xor r11, r11
  70061. mov QWORD PTR [rsp], rax
  70062. mov r10, rdx
  70063. ; A[0] * A[1]
  70064. mov rax, QWORD PTR [r8+8]
  70065. mul QWORD PTR [r8]
  70066. xor r9, r9
  70067. add r10, rax
  70068. adc r11, rdx
  70069. adc r9, 0
  70070. add r10, rax
  70071. adc r11, rdx
  70072. adc r9, 0
  70073. mov QWORD PTR [rsp+8], r10
  70074. ; A[0] * A[2]
  70075. mov rax, QWORD PTR [r8+16]
  70076. mul QWORD PTR [r8]
  70077. xor r10, r10
  70078. add r11, rax
  70079. adc r9, rdx
  70080. adc r10, 0
  70081. add r11, rax
  70082. adc r9, rdx
  70083. adc r10, 0
  70084. ; A[1] * A[1]
  70085. mov rax, QWORD PTR [r8+8]
  70086. mul rax
  70087. add r11, rax
  70088. adc r9, rdx
  70089. adc r10, 0
  70090. mov QWORD PTR [rsp+16], r11
  70091. ; A[0] * A[3]
  70092. mov rax, QWORD PTR [r8+24]
  70093. mul QWORD PTR [r8]
  70094. xor r11, r11
  70095. add r9, rax
  70096. adc r10, rdx
  70097. adc r11, 0
  70098. add r9, rax
  70099. adc r10, rdx
  70100. adc r11, 0
  70101. ; A[1] * A[2]
  70102. mov rax, QWORD PTR [r8+16]
  70103. mul QWORD PTR [r8+8]
  70104. add r9, rax
  70105. adc r10, rdx
  70106. adc r11, 0
  70107. add r9, rax
  70108. adc r10, rdx
  70109. adc r11, 0
  70110. mov QWORD PTR [rsp+24], r9
  70111. ; A[0] * A[4]
  70112. mov rax, QWORD PTR [r8+32]
  70113. mul QWORD PTR [r8]
  70114. xor r9, r9
  70115. add r10, rax
  70116. adc r11, rdx
  70117. adc r9, 0
  70118. add r10, rax
  70119. adc r11, rdx
  70120. adc r9, 0
  70121. ; A[1] * A[3]
  70122. mov rax, QWORD PTR [r8+24]
  70123. mul QWORD PTR [r8+8]
  70124. add r10, rax
  70125. adc r11, rdx
  70126. adc r9, 0
  70127. add r10, rax
  70128. adc r11, rdx
  70129. adc r9, 0
  70130. ; A[2] * A[2]
  70131. mov rax, QWORD PTR [r8+16]
  70132. mul rax
  70133. add r10, rax
  70134. adc r11, rdx
  70135. adc r9, 0
  70136. mov QWORD PTR [rsp+32], r10
  70137. ; A[0] * A[5]
  70138. mov rax, QWORD PTR [r8+40]
  70139. mul QWORD PTR [r8]
  70140. xor r10, r10
  70141. xor r14, r14
  70142. mov r12, rax
  70143. mov r13, rdx
  70144. ; A[1] * A[4]
  70145. mov rax, QWORD PTR [r8+32]
  70146. mul QWORD PTR [r8+8]
  70147. add r12, rax
  70148. adc r13, rdx
  70149. adc r14, 0
  70150. ; A[2] * A[3]
  70151. mov rax, QWORD PTR [r8+24]
  70152. mul QWORD PTR [r8+16]
  70153. add r12, rax
  70154. adc r13, rdx
  70155. adc r14, 0
  70156. add r12, r12
  70157. adc r13, r13
  70158. adc r14, r14
  70159. add r11, r12
  70160. adc r9, r13
  70161. adc r10, r14
  70162. mov QWORD PTR [rsp+40], r11
  70163. ; A[0] * A[6]
  70164. mov rax, QWORD PTR [r8+48]
  70165. mul QWORD PTR [r8]
  70166. xor r11, r11
  70167. xor r14, r14
  70168. mov r12, rax
  70169. mov r13, rdx
  70170. ; A[1] * A[5]
  70171. mov rax, QWORD PTR [r8+40]
  70172. mul QWORD PTR [r8+8]
  70173. add r12, rax
  70174. adc r13, rdx
  70175. adc r14, 0
  70176. ; A[2] * A[4]
  70177. mov rax, QWORD PTR [r8+32]
  70178. mul QWORD PTR [r8+16]
  70179. add r12, rax
  70180. adc r13, rdx
  70181. adc r14, 0
  70182. ; A[3] * A[3]
  70183. mov rax, QWORD PTR [r8+24]
  70184. mul rax
  70185. add r12, r12
  70186. adc r13, r13
  70187. adc r14, r14
  70188. add r12, rax
  70189. adc r13, rdx
  70190. adc r14, 0
  70191. add r9, r12
  70192. adc r10, r13
  70193. adc r11, r14
  70194. mov QWORD PTR [rsp+48], r9
  70195. ; A[0] * A[7]
  70196. mov rax, QWORD PTR [r8+56]
  70197. mul QWORD PTR [r8]
  70198. xor r9, r9
  70199. xor r14, r14
  70200. mov r12, rax
  70201. mov r13, rdx
  70202. ; A[1] * A[6]
  70203. mov rax, QWORD PTR [r8+48]
  70204. mul QWORD PTR [r8+8]
  70205. add r12, rax
  70206. adc r13, rdx
  70207. adc r14, 0
  70208. ; A[2] * A[5]
  70209. mov rax, QWORD PTR [r8+40]
  70210. mul QWORD PTR [r8+16]
  70211. add r12, rax
  70212. adc r13, rdx
  70213. adc r14, 0
  70214. ; A[3] * A[4]
  70215. mov rax, QWORD PTR [r8+32]
  70216. mul QWORD PTR [r8+24]
  70217. add r12, rax
  70218. adc r13, rdx
  70219. adc r14, 0
  70220. add r12, r12
  70221. adc r13, r13
  70222. adc r14, r14
  70223. add r10, r12
  70224. adc r11, r13
  70225. adc r9, r14
  70226. mov QWORD PTR [rsp+56], r10
  70227. ; A[0] * A[8]
  70228. mov rax, QWORD PTR [r8+64]
  70229. mul QWORD PTR [r8]
  70230. xor r10, r10
  70231. xor r14, r14
  70232. mov r12, rax
  70233. mov r13, rdx
  70234. ; A[1] * A[7]
  70235. mov rax, QWORD PTR [r8+56]
  70236. mul QWORD PTR [r8+8]
  70237. add r12, rax
  70238. adc r13, rdx
  70239. adc r14, 0
  70240. ; A[2] * A[6]
  70241. mov rax, QWORD PTR [r8+48]
  70242. mul QWORD PTR [r8+16]
  70243. add r12, rax
  70244. adc r13, rdx
  70245. adc r14, 0
  70246. ; A[3] * A[5]
  70247. mov rax, QWORD PTR [r8+40]
  70248. mul QWORD PTR [r8+24]
  70249. add r12, rax
  70250. adc r13, rdx
  70251. adc r14, 0
  70252. ; A[4] * A[4]
  70253. mov rax, QWORD PTR [r8+32]
  70254. mul rax
  70255. add r12, r12
  70256. adc r13, r13
  70257. adc r14, r14
  70258. add r12, rax
  70259. adc r13, rdx
  70260. adc r14, 0
  70261. add r11, r12
  70262. adc r9, r13
  70263. adc r10, r14
  70264. mov QWORD PTR [rsp+64], r11
  70265. ; A[0] * A[9]
  70266. mov rax, QWORD PTR [r8+72]
  70267. mul QWORD PTR [r8]
  70268. xor r11, r11
  70269. xor r14, r14
  70270. mov r12, rax
  70271. mov r13, rdx
  70272. ; A[1] * A[8]
  70273. mov rax, QWORD PTR [r8+64]
  70274. mul QWORD PTR [r8+8]
  70275. add r12, rax
  70276. adc r13, rdx
  70277. adc r14, 0
  70278. ; A[2] * A[7]
  70279. mov rax, QWORD PTR [r8+56]
  70280. mul QWORD PTR [r8+16]
  70281. add r12, rax
  70282. adc r13, rdx
  70283. adc r14, 0
  70284. ; A[3] * A[6]
  70285. mov rax, QWORD PTR [r8+48]
  70286. mul QWORD PTR [r8+24]
  70287. add r12, rax
  70288. adc r13, rdx
  70289. adc r14, 0
  70290. ; A[4] * A[5]
  70291. mov rax, QWORD PTR [r8+40]
  70292. mul QWORD PTR [r8+32]
  70293. add r12, rax
  70294. adc r13, rdx
  70295. adc r14, 0
  70296. add r12, r12
  70297. adc r13, r13
  70298. adc r14, r14
  70299. add r9, r12
  70300. adc r10, r13
  70301. adc r11, r14
  70302. mov QWORD PTR [rsp+72], r9
  70303. ; A[0] * A[10]
  70304. mov rax, QWORD PTR [r8+80]
  70305. mul QWORD PTR [r8]
  70306. xor r9, r9
  70307. xor r14, r14
  70308. mov r12, rax
  70309. mov r13, rdx
  70310. ; A[1] * A[9]
  70311. mov rax, QWORD PTR [r8+72]
  70312. mul QWORD PTR [r8+8]
  70313. add r12, rax
  70314. adc r13, rdx
  70315. adc r14, 0
  70316. ; A[2] * A[8]
  70317. mov rax, QWORD PTR [r8+64]
  70318. mul QWORD PTR [r8+16]
  70319. add r12, rax
  70320. adc r13, rdx
  70321. adc r14, 0
  70322. ; A[3] * A[7]
  70323. mov rax, QWORD PTR [r8+56]
  70324. mul QWORD PTR [r8+24]
  70325. add r12, rax
  70326. adc r13, rdx
  70327. adc r14, 0
  70328. ; A[4] * A[6]
  70329. mov rax, QWORD PTR [r8+48]
  70330. mul QWORD PTR [r8+32]
  70331. add r12, rax
  70332. adc r13, rdx
  70333. adc r14, 0
  70334. ; A[5] * A[5]
  70335. mov rax, QWORD PTR [r8+40]
  70336. mul rax
  70337. add r12, r12
  70338. adc r13, r13
  70339. adc r14, r14
  70340. add r12, rax
  70341. adc r13, rdx
  70342. adc r14, 0
  70343. add r10, r12
  70344. adc r11, r13
  70345. adc r9, r14
  70346. mov QWORD PTR [rsp+80], r10
  70347. ; A[0] * A[11]
  70348. mov rax, QWORD PTR [r8+88]
  70349. mul QWORD PTR [r8]
  70350. xor r10, r10
  70351. xor r14, r14
  70352. mov r12, rax
  70353. mov r13, rdx
  70354. ; A[1] * A[10]
  70355. mov rax, QWORD PTR [r8+80]
  70356. mul QWORD PTR [r8+8]
  70357. add r12, rax
  70358. adc r13, rdx
  70359. adc r14, 0
  70360. ; A[2] * A[9]
  70361. mov rax, QWORD PTR [r8+72]
  70362. mul QWORD PTR [r8+16]
  70363. add r12, rax
  70364. adc r13, rdx
  70365. adc r14, 0
  70366. ; A[3] * A[8]
  70367. mov rax, QWORD PTR [r8+64]
  70368. mul QWORD PTR [r8+24]
  70369. add r12, rax
  70370. adc r13, rdx
  70371. adc r14, 0
  70372. ; A[4] * A[7]
  70373. mov rax, QWORD PTR [r8+56]
  70374. mul QWORD PTR [r8+32]
  70375. add r12, rax
  70376. adc r13, rdx
  70377. adc r14, 0
  70378. ; A[5] * A[6]
  70379. mov rax, QWORD PTR [r8+48]
  70380. mul QWORD PTR [r8+40]
  70381. add r12, rax
  70382. adc r13, rdx
  70383. adc r14, 0
  70384. add r12, r12
  70385. adc r13, r13
  70386. adc r14, r14
  70387. add r11, r12
  70388. adc r9, r13
  70389. adc r10, r14
  70390. mov QWORD PTR [rsp+88], r11
  70391. ; A[0] * A[12]
  70392. mov rax, QWORD PTR [r8+96]
  70393. mul QWORD PTR [r8]
  70394. xor r11, r11
  70395. xor r14, r14
  70396. mov r12, rax
  70397. mov r13, rdx
  70398. ; A[1] * A[11]
  70399. mov rax, QWORD PTR [r8+88]
  70400. mul QWORD PTR [r8+8]
  70401. add r12, rax
  70402. adc r13, rdx
  70403. adc r14, 0
  70404. ; A[2] * A[10]
  70405. mov rax, QWORD PTR [r8+80]
  70406. mul QWORD PTR [r8+16]
  70407. add r12, rax
  70408. adc r13, rdx
  70409. adc r14, 0
  70410. ; A[3] * A[9]
  70411. mov rax, QWORD PTR [r8+72]
  70412. mul QWORD PTR [r8+24]
  70413. add r12, rax
  70414. adc r13, rdx
  70415. adc r14, 0
  70416. ; A[4] * A[8]
  70417. mov rax, QWORD PTR [r8+64]
  70418. mul QWORD PTR [r8+32]
  70419. add r12, rax
  70420. adc r13, rdx
  70421. adc r14, 0
  70422. ; A[5] * A[7]
  70423. mov rax, QWORD PTR [r8+56]
  70424. mul QWORD PTR [r8+40]
  70425. add r12, rax
  70426. adc r13, rdx
  70427. adc r14, 0
  70428. ; A[6] * A[6]
  70429. mov rax, QWORD PTR [r8+48]
  70430. mul rax
  70431. add r12, r12
  70432. adc r13, r13
  70433. adc r14, r14
  70434. add r12, rax
  70435. adc r13, rdx
  70436. adc r14, 0
  70437. add r9, r12
  70438. adc r10, r13
  70439. adc r11, r14
  70440. mov QWORD PTR [rsp+96], r9
  70441. ; A[0] * A[13]
  70442. mov rax, QWORD PTR [r8+104]
  70443. mul QWORD PTR [r8]
  70444. xor r9, r9
  70445. xor r14, r14
  70446. mov r12, rax
  70447. mov r13, rdx
  70448. ; A[1] * A[12]
  70449. mov rax, QWORD PTR [r8+96]
  70450. mul QWORD PTR [r8+8]
  70451. add r12, rax
  70452. adc r13, rdx
  70453. adc r14, 0
  70454. ; A[2] * A[11]
  70455. mov rax, QWORD PTR [r8+88]
  70456. mul QWORD PTR [r8+16]
  70457. add r12, rax
  70458. adc r13, rdx
  70459. adc r14, 0
  70460. ; A[3] * A[10]
  70461. mov rax, QWORD PTR [r8+80]
  70462. mul QWORD PTR [r8+24]
  70463. add r12, rax
  70464. adc r13, rdx
  70465. adc r14, 0
  70466. ; A[4] * A[9]
  70467. mov rax, QWORD PTR [r8+72]
  70468. mul QWORD PTR [r8+32]
  70469. add r12, rax
  70470. adc r13, rdx
  70471. adc r14, 0
  70472. ; A[5] * A[8]
  70473. mov rax, QWORD PTR [r8+64]
  70474. mul QWORD PTR [r8+40]
  70475. add r12, rax
  70476. adc r13, rdx
  70477. adc r14, 0
  70478. ; A[6] * A[7]
  70479. mov rax, QWORD PTR [r8+56]
  70480. mul QWORD PTR [r8+48]
  70481. add r12, rax
  70482. adc r13, rdx
  70483. adc r14, 0
  70484. add r12, r12
  70485. adc r13, r13
  70486. adc r14, r14
  70487. add r10, r12
  70488. adc r11, r13
  70489. adc r9, r14
  70490. mov QWORD PTR [rsp+104], r10
  70491. ; A[0] * A[14]
  70492. mov rax, QWORD PTR [r8+112]
  70493. mul QWORD PTR [r8]
  70494. xor r10, r10
  70495. xor r14, r14
  70496. mov r12, rax
  70497. mov r13, rdx
  70498. ; A[1] * A[13]
  70499. mov rax, QWORD PTR [r8+104]
  70500. mul QWORD PTR [r8+8]
  70501. add r12, rax
  70502. adc r13, rdx
  70503. adc r14, 0
  70504. ; A[2] * A[12]
  70505. mov rax, QWORD PTR [r8+96]
  70506. mul QWORD PTR [r8+16]
  70507. add r12, rax
  70508. adc r13, rdx
  70509. adc r14, 0
  70510. ; A[3] * A[11]
  70511. mov rax, QWORD PTR [r8+88]
  70512. mul QWORD PTR [r8+24]
  70513. add r12, rax
  70514. adc r13, rdx
  70515. adc r14, 0
  70516. ; A[4] * A[10]
  70517. mov rax, QWORD PTR [r8+80]
  70518. mul QWORD PTR [r8+32]
  70519. add r12, rax
  70520. adc r13, rdx
  70521. adc r14, 0
  70522. ; A[5] * A[9]
  70523. mov rax, QWORD PTR [r8+72]
  70524. mul QWORD PTR [r8+40]
  70525. add r12, rax
  70526. adc r13, rdx
  70527. adc r14, 0
  70528. ; A[6] * A[8]
  70529. mov rax, QWORD PTR [r8+64]
  70530. mul QWORD PTR [r8+48]
  70531. add r12, rax
  70532. adc r13, rdx
  70533. adc r14, 0
  70534. ; A[7] * A[7]
  70535. mov rax, QWORD PTR [r8+56]
  70536. mul rax
  70537. add r12, r12
  70538. adc r13, r13
  70539. adc r14, r14
  70540. add r12, rax
  70541. adc r13, rdx
  70542. adc r14, 0
  70543. add r11, r12
  70544. adc r9, r13
  70545. adc r10, r14
  70546. mov QWORD PTR [rsp+112], r11
  70547. ; A[0] * A[15]
  70548. mov rax, QWORD PTR [r8+120]
  70549. mul QWORD PTR [r8]
  70550. xor r11, r11
  70551. xor r14, r14
  70552. mov r12, rax
  70553. mov r13, rdx
  70554. ; A[1] * A[14]
  70555. mov rax, QWORD PTR [r8+112]
  70556. mul QWORD PTR [r8+8]
  70557. add r12, rax
  70558. adc r13, rdx
  70559. adc r14, 0
  70560. ; A[2] * A[13]
  70561. mov rax, QWORD PTR [r8+104]
  70562. mul QWORD PTR [r8+16]
  70563. add r12, rax
  70564. adc r13, rdx
  70565. adc r14, 0
  70566. ; A[3] * A[12]
  70567. mov rax, QWORD PTR [r8+96]
  70568. mul QWORD PTR [r8+24]
  70569. add r12, rax
  70570. adc r13, rdx
  70571. adc r14, 0
  70572. ; A[4] * A[11]
  70573. mov rax, QWORD PTR [r8+88]
  70574. mul QWORD PTR [r8+32]
  70575. add r12, rax
  70576. adc r13, rdx
  70577. adc r14, 0
  70578. ; A[5] * A[10]
  70579. mov rax, QWORD PTR [r8+80]
  70580. mul QWORD PTR [r8+40]
  70581. add r12, rax
  70582. adc r13, rdx
  70583. adc r14, 0
  70584. ; A[6] * A[9]
  70585. mov rax, QWORD PTR [r8+72]
  70586. mul QWORD PTR [r8+48]
  70587. add r12, rax
  70588. adc r13, rdx
  70589. adc r14, 0
  70590. ; A[7] * A[8]
  70591. mov rax, QWORD PTR [r8+64]
  70592. mul QWORD PTR [r8+56]
  70593. add r12, rax
  70594. adc r13, rdx
  70595. adc r14, 0
  70596. add r12, r12
  70597. adc r13, r13
  70598. adc r14, r14
  70599. add r9, r12
  70600. adc r10, r13
  70601. adc r11, r14
  70602. mov QWORD PTR [rsp+120], r9
  70603. ; A[1] * A[15]
  70604. mov rax, QWORD PTR [r8+120]
  70605. mul QWORD PTR [r8+8]
  70606. xor r9, r9
  70607. xor r14, r14
  70608. mov r12, rax
  70609. mov r13, rdx
  70610. ; A[2] * A[14]
  70611. mov rax, QWORD PTR [r8+112]
  70612. mul QWORD PTR [r8+16]
  70613. add r12, rax
  70614. adc r13, rdx
  70615. adc r14, 0
  70616. ; A[3] * A[13]
  70617. mov rax, QWORD PTR [r8+104]
  70618. mul QWORD PTR [r8+24]
  70619. add r12, rax
  70620. adc r13, rdx
  70621. adc r14, 0
  70622. ; A[4] * A[12]
  70623. mov rax, QWORD PTR [r8+96]
  70624. mul QWORD PTR [r8+32]
  70625. add r12, rax
  70626. adc r13, rdx
  70627. adc r14, 0
  70628. ; A[5] * A[11]
  70629. mov rax, QWORD PTR [r8+88]
  70630. mul QWORD PTR [r8+40]
  70631. add r12, rax
  70632. adc r13, rdx
  70633. adc r14, 0
  70634. ; A[6] * A[10]
  70635. mov rax, QWORD PTR [r8+80]
  70636. mul QWORD PTR [r8+48]
  70637. add r12, rax
  70638. adc r13, rdx
  70639. adc r14, 0
  70640. ; A[7] * A[9]
  70641. mov rax, QWORD PTR [r8+72]
  70642. mul QWORD PTR [r8+56]
  70643. add r12, rax
  70644. adc r13, rdx
  70645. adc r14, 0
  70646. ; A[8] * A[8]
  70647. mov rax, QWORD PTR [r8+64]
  70648. mul rax
  70649. add r12, r12
  70650. adc r13, r13
  70651. adc r14, r14
  70652. add r12, rax
  70653. adc r13, rdx
  70654. adc r14, 0
  70655. add r10, r12
  70656. adc r11, r13
  70657. adc r9, r14
  70658. mov QWORD PTR [rcx+128], r10
  70659. ; A[2] * A[15]
  70660. mov rax, QWORD PTR [r8+120]
  70661. mul QWORD PTR [r8+16]
  70662. xor r10, r10
  70663. xor r14, r14
  70664. mov r12, rax
  70665. mov r13, rdx
  70666. ; A[3] * A[14]
  70667. mov rax, QWORD PTR [r8+112]
  70668. mul QWORD PTR [r8+24]
  70669. add r12, rax
  70670. adc r13, rdx
  70671. adc r14, 0
  70672. ; A[4] * A[13]
  70673. mov rax, QWORD PTR [r8+104]
  70674. mul QWORD PTR [r8+32]
  70675. add r12, rax
  70676. adc r13, rdx
  70677. adc r14, 0
  70678. ; A[5] * A[12]
  70679. mov rax, QWORD PTR [r8+96]
  70680. mul QWORD PTR [r8+40]
  70681. add r12, rax
  70682. adc r13, rdx
  70683. adc r14, 0
  70684. ; A[6] * A[11]
  70685. mov rax, QWORD PTR [r8+88]
  70686. mul QWORD PTR [r8+48]
  70687. add r12, rax
  70688. adc r13, rdx
  70689. adc r14, 0
  70690. ; A[7] * A[10]
  70691. mov rax, QWORD PTR [r8+80]
  70692. mul QWORD PTR [r8+56]
  70693. add r12, rax
  70694. adc r13, rdx
  70695. adc r14, 0
  70696. ; A[8] * A[9]
  70697. mov rax, QWORD PTR [r8+72]
  70698. mul QWORD PTR [r8+64]
  70699. add r12, rax
  70700. adc r13, rdx
  70701. adc r14, 0
  70702. add r12, r12
  70703. adc r13, r13
  70704. adc r14, r14
  70705. add r11, r12
  70706. adc r9, r13
  70707. adc r10, r14
  70708. mov QWORD PTR [rcx+136], r11
  70709. ; A[3] * A[15]
  70710. mov rax, QWORD PTR [r8+120]
  70711. mul QWORD PTR [r8+24]
  70712. xor r11, r11
  70713. xor r14, r14
  70714. mov r12, rax
  70715. mov r13, rdx
  70716. ; A[4] * A[14]
  70717. mov rax, QWORD PTR [r8+112]
  70718. mul QWORD PTR [r8+32]
  70719. add r12, rax
  70720. adc r13, rdx
  70721. adc r14, 0
  70722. ; A[5] * A[13]
  70723. mov rax, QWORD PTR [r8+104]
  70724. mul QWORD PTR [r8+40]
  70725. add r12, rax
  70726. adc r13, rdx
  70727. adc r14, 0
  70728. ; A[6] * A[12]
  70729. mov rax, QWORD PTR [r8+96]
  70730. mul QWORD PTR [r8+48]
  70731. add r12, rax
  70732. adc r13, rdx
  70733. adc r14, 0
  70734. ; A[7] * A[11]
  70735. mov rax, QWORD PTR [r8+88]
  70736. mul QWORD PTR [r8+56]
  70737. add r12, rax
  70738. adc r13, rdx
  70739. adc r14, 0
  70740. ; A[8] * A[10]
  70741. mov rax, QWORD PTR [r8+80]
  70742. mul QWORD PTR [r8+64]
  70743. add r12, rax
  70744. adc r13, rdx
  70745. adc r14, 0
  70746. ; A[9] * A[9]
  70747. mov rax, QWORD PTR [r8+72]
  70748. mul rax
  70749. add r12, r12
  70750. adc r13, r13
  70751. adc r14, r14
  70752. add r12, rax
  70753. adc r13, rdx
  70754. adc r14, 0
  70755. add r9, r12
  70756. adc r10, r13
  70757. adc r11, r14
  70758. mov QWORD PTR [rcx+144], r9
  70759. ; A[4] * A[15]
  70760. mov rax, QWORD PTR [r8+120]
  70761. mul QWORD PTR [r8+32]
  70762. xor r9, r9
  70763. xor r14, r14
  70764. mov r12, rax
  70765. mov r13, rdx
  70766. ; A[5] * A[14]
  70767. mov rax, QWORD PTR [r8+112]
  70768. mul QWORD PTR [r8+40]
  70769. add r12, rax
  70770. adc r13, rdx
  70771. adc r14, 0
  70772. ; A[6] * A[13]
  70773. mov rax, QWORD PTR [r8+104]
  70774. mul QWORD PTR [r8+48]
  70775. add r12, rax
  70776. adc r13, rdx
  70777. adc r14, 0
  70778. ; A[7] * A[12]
  70779. mov rax, QWORD PTR [r8+96]
  70780. mul QWORD PTR [r8+56]
  70781. add r12, rax
  70782. adc r13, rdx
  70783. adc r14, 0
  70784. ; A[8] * A[11]
  70785. mov rax, QWORD PTR [r8+88]
  70786. mul QWORD PTR [r8+64]
  70787. add r12, rax
  70788. adc r13, rdx
  70789. adc r14, 0
  70790. ; A[9] * A[10]
  70791. mov rax, QWORD PTR [r8+80]
  70792. mul QWORD PTR [r8+72]
  70793. add r12, rax
  70794. adc r13, rdx
  70795. adc r14, 0
  70796. add r12, r12
  70797. adc r13, r13
  70798. adc r14, r14
  70799. add r10, r12
  70800. adc r11, r13
  70801. adc r9, r14
  70802. mov QWORD PTR [rcx+152], r10
  70803. ; A[5] * A[15]
  70804. mov rax, QWORD PTR [r8+120]
  70805. mul QWORD PTR [r8+40]
  70806. xor r10, r10
  70807. xor r14, r14
  70808. mov r12, rax
  70809. mov r13, rdx
  70810. ; A[6] * A[14]
  70811. mov rax, QWORD PTR [r8+112]
  70812. mul QWORD PTR [r8+48]
  70813. add r12, rax
  70814. adc r13, rdx
  70815. adc r14, 0
  70816. ; A[7] * A[13]
  70817. mov rax, QWORD PTR [r8+104]
  70818. mul QWORD PTR [r8+56]
  70819. add r12, rax
  70820. adc r13, rdx
  70821. adc r14, 0
  70822. ; A[8] * A[12]
  70823. mov rax, QWORD PTR [r8+96]
  70824. mul QWORD PTR [r8+64]
  70825. add r12, rax
  70826. adc r13, rdx
  70827. adc r14, 0
  70828. ; A[9] * A[11]
  70829. mov rax, QWORD PTR [r8+88]
  70830. mul QWORD PTR [r8+72]
  70831. add r12, rax
  70832. adc r13, rdx
  70833. adc r14, 0
  70834. ; A[10] * A[10]
  70835. mov rax, QWORD PTR [r8+80]
  70836. mul rax
  70837. add r12, r12
  70838. adc r13, r13
  70839. adc r14, r14
  70840. add r12, rax
  70841. adc r13, rdx
  70842. adc r14, 0
  70843. add r11, r12
  70844. adc r9, r13
  70845. adc r10, r14
  70846. mov QWORD PTR [rcx+160], r11
  70847. ; A[6] * A[15]
  70848. mov rax, QWORD PTR [r8+120]
  70849. mul QWORD PTR [r8+48]
  70850. xor r11, r11
  70851. xor r14, r14
  70852. mov r12, rax
  70853. mov r13, rdx
  70854. ; A[7] * A[14]
  70855. mov rax, QWORD PTR [r8+112]
  70856. mul QWORD PTR [r8+56]
  70857. add r12, rax
  70858. adc r13, rdx
  70859. adc r14, 0
  70860. ; A[8] * A[13]
  70861. mov rax, QWORD PTR [r8+104]
  70862. mul QWORD PTR [r8+64]
  70863. add r12, rax
  70864. adc r13, rdx
  70865. adc r14, 0
  70866. ; A[9] * A[12]
  70867. mov rax, QWORD PTR [r8+96]
  70868. mul QWORD PTR [r8+72]
  70869. add r12, rax
  70870. adc r13, rdx
  70871. adc r14, 0
  70872. ; A[10] * A[11]
  70873. mov rax, QWORD PTR [r8+88]
  70874. mul QWORD PTR [r8+80]
  70875. add r12, rax
  70876. adc r13, rdx
  70877. adc r14, 0
  70878. add r12, r12
  70879. adc r13, r13
  70880. adc r14, r14
  70881. add r9, r12
  70882. adc r10, r13
  70883. adc r11, r14
  70884. mov QWORD PTR [rcx+168], r9
  70885. ; A[7] * A[15]
  70886. mov rax, QWORD PTR [r8+120]
  70887. mul QWORD PTR [r8+56]
  70888. xor r9, r9
  70889. xor r14, r14
  70890. mov r12, rax
  70891. mov r13, rdx
  70892. ; A[8] * A[14]
  70893. mov rax, QWORD PTR [r8+112]
  70894. mul QWORD PTR [r8+64]
  70895. add r12, rax
  70896. adc r13, rdx
  70897. adc r14, 0
  70898. ; A[9] * A[13]
  70899. mov rax, QWORD PTR [r8+104]
  70900. mul QWORD PTR [r8+72]
  70901. add r12, rax
  70902. adc r13, rdx
  70903. adc r14, 0
  70904. ; A[10] * A[12]
  70905. mov rax, QWORD PTR [r8+96]
  70906. mul QWORD PTR [r8+80]
  70907. add r12, rax
  70908. adc r13, rdx
  70909. adc r14, 0
  70910. ; A[11] * A[11]
  70911. mov rax, QWORD PTR [r8+88]
  70912. mul rax
  70913. add r12, r12
  70914. adc r13, r13
  70915. adc r14, r14
  70916. add r12, rax
  70917. adc r13, rdx
  70918. adc r14, 0
  70919. add r10, r12
  70920. adc r11, r13
  70921. adc r9, r14
  70922. mov QWORD PTR [rcx+176], r10
  70923. ; A[8] * A[15]
  70924. mov rax, QWORD PTR [r8+120]
  70925. mul QWORD PTR [r8+64]
  70926. xor r10, r10
  70927. xor r14, r14
  70928. mov r12, rax
  70929. mov r13, rdx
  70930. ; A[9] * A[14]
  70931. mov rax, QWORD PTR [r8+112]
  70932. mul QWORD PTR [r8+72]
  70933. add r12, rax
  70934. adc r13, rdx
  70935. adc r14, 0
  70936. ; A[10] * A[13]
  70937. mov rax, QWORD PTR [r8+104]
  70938. mul QWORD PTR [r8+80]
  70939. add r12, rax
  70940. adc r13, rdx
  70941. adc r14, 0
  70942. ; A[11] * A[12]
  70943. mov rax, QWORD PTR [r8+96]
  70944. mul QWORD PTR [r8+88]
  70945. add r12, rax
  70946. adc r13, rdx
  70947. adc r14, 0
  70948. add r12, r12
  70949. adc r13, r13
  70950. adc r14, r14
  70951. add r11, r12
  70952. adc r9, r13
  70953. adc r10, r14
  70954. mov QWORD PTR [rcx+184], r11
  70955. ; A[9] * A[15]
  70956. mov rax, QWORD PTR [r8+120]
  70957. mul QWORD PTR [r8+72]
  70958. xor r11, r11
  70959. xor r14, r14
  70960. mov r12, rax
  70961. mov r13, rdx
  70962. ; A[10] * A[14]
  70963. mov rax, QWORD PTR [r8+112]
  70964. mul QWORD PTR [r8+80]
  70965. add r12, rax
  70966. adc r13, rdx
  70967. adc r14, 0
  70968. ; A[11] * A[13]
  70969. mov rax, QWORD PTR [r8+104]
  70970. mul QWORD PTR [r8+88]
  70971. add r12, rax
  70972. adc r13, rdx
  70973. adc r14, 0
  70974. ; A[12] * A[12]
  70975. mov rax, QWORD PTR [r8+96]
  70976. mul rax
  70977. add r12, r12
  70978. adc r13, r13
  70979. adc r14, r14
  70980. add r12, rax
  70981. adc r13, rdx
  70982. adc r14, 0
  70983. add r9, r12
  70984. adc r10, r13
  70985. adc r11, r14
  70986. mov QWORD PTR [rcx+192], r9
  70987. ; A[10] * A[15]
  70988. mov rax, QWORD PTR [r8+120]
  70989. mul QWORD PTR [r8+80]
  70990. xor r9, r9
  70991. xor r14, r14
  70992. mov r12, rax
  70993. mov r13, rdx
  70994. ; A[11] * A[14]
  70995. mov rax, QWORD PTR [r8+112]
  70996. mul QWORD PTR [r8+88]
  70997. add r12, rax
  70998. adc r13, rdx
  70999. adc r14, 0
  71000. ; A[12] * A[13]
  71001. mov rax, QWORD PTR [r8+104]
  71002. mul QWORD PTR [r8+96]
  71003. add r12, rax
  71004. adc r13, rdx
  71005. adc r14, 0
  71006. add r12, r12
  71007. adc r13, r13
  71008. adc r14, r14
  71009. add r10, r12
  71010. adc r11, r13
  71011. adc r9, r14
  71012. mov QWORD PTR [rcx+200], r10
  71013. ; A[11] * A[15]
  71014. mov rax, QWORD PTR [r8+120]
  71015. mul QWORD PTR [r8+88]
  71016. xor r10, r10
  71017. add r11, rax
  71018. adc r9, rdx
  71019. adc r10, 0
  71020. add r11, rax
  71021. adc r9, rdx
  71022. adc r10, 0
  71023. ; A[12] * A[14]
  71024. mov rax, QWORD PTR [r8+112]
  71025. mul QWORD PTR [r8+96]
  71026. add r11, rax
  71027. adc r9, rdx
  71028. adc r10, 0
  71029. add r11, rax
  71030. adc r9, rdx
  71031. adc r10, 0
  71032. ; A[13] * A[13]
  71033. mov rax, QWORD PTR [r8+104]
  71034. mul rax
  71035. add r11, rax
  71036. adc r9, rdx
  71037. adc r10, 0
  71038. mov QWORD PTR [rcx+208], r11
  71039. ; A[12] * A[15]
  71040. mov rax, QWORD PTR [r8+120]
  71041. mul QWORD PTR [r8+96]
  71042. xor r11, r11
  71043. add r9, rax
  71044. adc r10, rdx
  71045. adc r11, 0
  71046. add r9, rax
  71047. adc r10, rdx
  71048. adc r11, 0
  71049. ; A[13] * A[14]
  71050. mov rax, QWORD PTR [r8+112]
  71051. mul QWORD PTR [r8+104]
  71052. add r9, rax
  71053. adc r10, rdx
  71054. adc r11, 0
  71055. add r9, rax
  71056. adc r10, rdx
  71057. adc r11, 0
  71058. mov QWORD PTR [rcx+216], r9
  71059. ; A[13] * A[15]
  71060. mov rax, QWORD PTR [r8+120]
  71061. mul QWORD PTR [r8+104]
  71062. xor r9, r9
  71063. add r10, rax
  71064. adc r11, rdx
  71065. adc r9, 0
  71066. add r10, rax
  71067. adc r11, rdx
  71068. adc r9, 0
  71069. ; A[14] * A[14]
  71070. mov rax, QWORD PTR [r8+112]
  71071. mul rax
  71072. add r10, rax
  71073. adc r11, rdx
  71074. adc r9, 0
  71075. mov QWORD PTR [rcx+224], r10
  71076. ; A[14] * A[15]
  71077. mov rax, QWORD PTR [r8+120]
  71078. mul QWORD PTR [r8+112]
  71079. xor r10, r10
  71080. add r11, rax
  71081. adc r9, rdx
  71082. adc r10, 0
  71083. add r11, rax
  71084. adc r9, rdx
  71085. adc r10, 0
  71086. mov QWORD PTR [rcx+232], r11
  71087. ; A[15] * A[15]
  71088. mov rax, QWORD PTR [r8+120]
  71089. mul rax
  71090. add r9, rax
  71091. adc r10, rdx
  71092. mov QWORD PTR [rcx+240], r9
  71093. mov QWORD PTR [rcx+248], r10
  71094. mov rax, QWORD PTR [rsp]
  71095. mov rdx, QWORD PTR [rsp+8]
  71096. mov r12, QWORD PTR [rsp+16]
  71097. mov r13, QWORD PTR [rsp+24]
  71098. mov QWORD PTR [rcx], rax
  71099. mov QWORD PTR [rcx+8], rdx
  71100. mov QWORD PTR [rcx+16], r12
  71101. mov QWORD PTR [rcx+24], r13
  71102. mov rax, QWORD PTR [rsp+32]
  71103. mov rdx, QWORD PTR [rsp+40]
  71104. mov r12, QWORD PTR [rsp+48]
  71105. mov r13, QWORD PTR [rsp+56]
  71106. mov QWORD PTR [rcx+32], rax
  71107. mov QWORD PTR [rcx+40], rdx
  71108. mov QWORD PTR [rcx+48], r12
  71109. mov QWORD PTR [rcx+56], r13
  71110. mov rax, QWORD PTR [rsp+64]
  71111. mov rdx, QWORD PTR [rsp+72]
  71112. mov r12, QWORD PTR [rsp+80]
  71113. mov r13, QWORD PTR [rsp+88]
  71114. mov QWORD PTR [rcx+64], rax
  71115. mov QWORD PTR [rcx+72], rdx
  71116. mov QWORD PTR [rcx+80], r12
  71117. mov QWORD PTR [rcx+88], r13
  71118. mov rax, QWORD PTR [rsp+96]
  71119. mov rdx, QWORD PTR [rsp+104]
  71120. mov r12, QWORD PTR [rsp+112]
  71121. mov r13, QWORD PTR [rsp+120]
  71122. mov QWORD PTR [rcx+96], rax
  71123. mov QWORD PTR [rcx+104], rdx
  71124. mov QWORD PTR [rcx+112], r12
  71125. mov QWORD PTR [rcx+120], r13
  71126. add rsp, 128
  71127. pop r14
  71128. pop r13
  71129. pop r12
  71130. ret
  71131. sp_1024_sqr_16 ENDP
  71132. _text ENDS
  71133. IFDEF HAVE_INTEL_AVX2
  71134. ; /* Multiply a and b into r. (r = a * b)
  71135. ; *
  71136. ; * r Result of multiplication.
  71137. ; * a First number to multiply.
  71138. ; * b Second number to multiply.
  71139. ; */
  71140. _text SEGMENT READONLY PARA
  71141. sp_1024_mul_avx2_16 PROC
  71142. push rbx
  71143. push rbp
  71144. push r12
  71145. push r13
  71146. push r14
  71147. push r15
  71148. push rdi
  71149. mov rbp, r8
  71150. mov r8, rcx
  71151. mov r9, rdx
  71152. sub rsp, 128
  71153. cmp r9, r8
  71154. mov rbx, rsp
  71155. cmovne rbx, r8
  71156. cmp rbp, r8
  71157. cmove rbx, rsp
  71158. add r8, 128
  71159. xor rdi, rdi
  71160. mov rdx, QWORD PTR [r9]
  71161. ; A[0] * B[0]
  71162. mulx r11, r10, QWORD PTR [rbp]
  71163. ; A[0] * B[1]
  71164. mulx r12, rax, QWORD PTR [rbp+8]
  71165. mov QWORD PTR [rbx], r10
  71166. adcx r11, rax
  71167. ; A[0] * B[2]
  71168. mulx r13, rax, QWORD PTR [rbp+16]
  71169. mov QWORD PTR [rbx+8], r11
  71170. adcx r12, rax
  71171. ; A[0] * B[3]
  71172. mulx r14, rax, QWORD PTR [rbp+24]
  71173. mov QWORD PTR [rbx+16], r12
  71174. adcx r13, rax
  71175. mov QWORD PTR [rbx+24], r13
  71176. ; A[0] * B[4]
  71177. mulx r10, rax, QWORD PTR [rbp+32]
  71178. adcx r14, rax
  71179. ; A[0] * B[5]
  71180. mulx r11, rax, QWORD PTR [rbp+40]
  71181. mov QWORD PTR [rbx+32], r14
  71182. adcx r10, rax
  71183. ; A[0] * B[6]
  71184. mulx r12, rax, QWORD PTR [rbp+48]
  71185. mov QWORD PTR [rbx+40], r10
  71186. adcx r11, rax
  71187. ; A[0] * B[7]
  71188. mulx r13, rax, QWORD PTR [rbp+56]
  71189. mov QWORD PTR [rbx+48], r11
  71190. adcx r12, rax
  71191. mov QWORD PTR [rbx+56], r12
  71192. ; A[0] * B[8]
  71193. mulx r14, rax, QWORD PTR [rbp+64]
  71194. adcx r13, rax
  71195. ; A[0] * B[9]
  71196. mulx r10, rax, QWORD PTR [rbp+72]
  71197. mov QWORD PTR [rbx+64], r13
  71198. adcx r14, rax
  71199. ; A[0] * B[10]
  71200. mulx r11, rax, QWORD PTR [rbp+80]
  71201. mov QWORD PTR [rbx+72], r14
  71202. adcx r10, rax
  71203. ; A[0] * B[11]
  71204. mulx r12, rax, QWORD PTR [rbp+88]
  71205. mov QWORD PTR [rbx+80], r10
  71206. adcx r11, rax
  71207. mov QWORD PTR [rbx+88], r11
  71208. ; A[0] * B[12]
  71209. mulx r13, rax, QWORD PTR [rbp+96]
  71210. adcx r12, rax
  71211. ; A[0] * B[13]
  71212. mulx r14, rax, QWORD PTR [rbp+104]
  71213. mov QWORD PTR [rbx+96], r12
  71214. adcx r13, rax
  71215. ; A[0] * B[14]
  71216. mulx r10, rax, QWORD PTR [rbp+112]
  71217. mov QWORD PTR [rbx+104], r13
  71218. adcx r14, rax
  71219. ; A[0] * B[15]
  71220. mulx r11, rax, QWORD PTR [rbp+120]
  71221. mov QWORD PTR [rbx+112], r14
  71222. adcx r10, rax
  71223. adcx r11, rdi
  71224. mov r15, rdi
  71225. adcx r15, rdi
  71226. mov QWORD PTR [rbx+120], r10
  71227. mov QWORD PTR [r8], r11
  71228. mov rdx, QWORD PTR [r9+8]
  71229. mov r11, QWORD PTR [rbx+8]
  71230. mov r12, QWORD PTR [rbx+16]
  71231. mov r13, QWORD PTR [rbx+24]
  71232. mov r14, QWORD PTR [rbx+32]
  71233. mov r10, QWORD PTR [rbx+40]
  71234. ; A[1] * B[0]
  71235. mulx rcx, rax, QWORD PTR [rbp]
  71236. adcx r11, rax
  71237. adox r12, rcx
  71238. ; A[1] * B[1]
  71239. mulx rcx, rax, QWORD PTR [rbp+8]
  71240. mov QWORD PTR [rbx+8], r11
  71241. adcx r12, rax
  71242. adox r13, rcx
  71243. ; A[1] * B[2]
  71244. mulx rcx, rax, QWORD PTR [rbp+16]
  71245. mov QWORD PTR [rbx+16], r12
  71246. adcx r13, rax
  71247. adox r14, rcx
  71248. ; A[1] * B[3]
  71249. mulx rcx, rax, QWORD PTR [rbp+24]
  71250. mov QWORD PTR [rbx+24], r13
  71251. adcx r14, rax
  71252. adox r10, rcx
  71253. mov QWORD PTR [rbx+32], r14
  71254. mov r11, QWORD PTR [rbx+48]
  71255. mov r12, QWORD PTR [rbx+56]
  71256. mov r13, QWORD PTR [rbx+64]
  71257. mov r14, QWORD PTR [rbx+72]
  71258. ; A[1] * B[4]
  71259. mulx rcx, rax, QWORD PTR [rbp+32]
  71260. adcx r10, rax
  71261. adox r11, rcx
  71262. ; A[1] * B[5]
  71263. mulx rcx, rax, QWORD PTR [rbp+40]
  71264. mov QWORD PTR [rbx+40], r10
  71265. adcx r11, rax
  71266. adox r12, rcx
  71267. ; A[1] * B[6]
  71268. mulx rcx, rax, QWORD PTR [rbp+48]
  71269. mov QWORD PTR [rbx+48], r11
  71270. adcx r12, rax
  71271. adox r13, rcx
  71272. ; A[1] * B[7]
  71273. mulx rcx, rax, QWORD PTR [rbp+56]
  71274. mov QWORD PTR [rbx+56], r12
  71275. adcx r13, rax
  71276. adox r14, rcx
  71277. mov QWORD PTR [rbx+64], r13
  71278. mov r10, QWORD PTR [rbx+80]
  71279. mov r11, QWORD PTR [rbx+88]
  71280. mov r12, QWORD PTR [rbx+96]
  71281. mov r13, QWORD PTR [rbx+104]
  71282. ; A[1] * B[8]
  71283. mulx rcx, rax, QWORD PTR [rbp+64]
  71284. adcx r14, rax
  71285. adox r10, rcx
  71286. ; A[1] * B[9]
  71287. mulx rcx, rax, QWORD PTR [rbp+72]
  71288. mov QWORD PTR [rbx+72], r14
  71289. adcx r10, rax
  71290. adox r11, rcx
  71291. ; A[1] * B[10]
  71292. mulx rcx, rax, QWORD PTR [rbp+80]
  71293. mov QWORD PTR [rbx+80], r10
  71294. adcx r11, rax
  71295. adox r12, rcx
  71296. ; A[1] * B[11]
  71297. mulx rcx, rax, QWORD PTR [rbp+88]
  71298. mov QWORD PTR [rbx+88], r11
  71299. adcx r12, rax
  71300. adox r13, rcx
  71301. mov QWORD PTR [rbx+96], r12
  71302. mov r14, QWORD PTR [rbx+112]
  71303. mov r10, QWORD PTR [rbx+120]
  71304. mov r11, QWORD PTR [r8]
  71305. ; A[1] * B[12]
  71306. mulx rcx, rax, QWORD PTR [rbp+96]
  71307. adcx r13, rax
  71308. adox r14, rcx
  71309. ; A[1] * B[13]
  71310. mulx rcx, rax, QWORD PTR [rbp+104]
  71311. mov QWORD PTR [rbx+104], r13
  71312. adcx r14, rax
  71313. adox r10, rcx
  71314. ; A[1] * B[14]
  71315. mulx rcx, rax, QWORD PTR [rbp+112]
  71316. mov QWORD PTR [rbx+112], r14
  71317. adcx r10, rax
  71318. adox r11, rcx
  71319. ; A[1] * B[15]
  71320. mulx rcx, rax, QWORD PTR [rbp+120]
  71321. mov QWORD PTR [rbx+120], r10
  71322. mov r12, rdi
  71323. adcx r11, rax
  71324. adox r12, rcx
  71325. adcx r12, r15
  71326. mov r15, rdi
  71327. adox r15, rdi
  71328. adcx r15, rdi
  71329. mov QWORD PTR [r8], r11
  71330. mov QWORD PTR [r8+8], r12
  71331. mov rdx, QWORD PTR [r9+16]
  71332. mov r12, QWORD PTR [rbx+16]
  71333. mov r13, QWORD PTR [rbx+24]
  71334. mov r14, QWORD PTR [rbx+32]
  71335. mov r10, QWORD PTR [rbx+40]
  71336. mov r11, QWORD PTR [rbx+48]
  71337. ; A[2] * B[0]
  71338. mulx rcx, rax, QWORD PTR [rbp]
  71339. adcx r12, rax
  71340. adox r13, rcx
  71341. ; A[2] * B[1]
  71342. mulx rcx, rax, QWORD PTR [rbp+8]
  71343. mov QWORD PTR [rbx+16], r12
  71344. adcx r13, rax
  71345. adox r14, rcx
  71346. ; A[2] * B[2]
  71347. mulx rcx, rax, QWORD PTR [rbp+16]
  71348. mov QWORD PTR [rbx+24], r13
  71349. adcx r14, rax
  71350. adox r10, rcx
  71351. ; A[2] * B[3]
  71352. mulx rcx, rax, QWORD PTR [rbp+24]
  71353. mov QWORD PTR [rbx+32], r14
  71354. adcx r10, rax
  71355. adox r11, rcx
  71356. mov QWORD PTR [rbx+40], r10
  71357. mov r12, QWORD PTR [rbx+56]
  71358. mov r13, QWORD PTR [rbx+64]
  71359. mov r14, QWORD PTR [rbx+72]
  71360. mov r10, QWORD PTR [rbx+80]
  71361. ; A[2] * B[4]
  71362. mulx rcx, rax, QWORD PTR [rbp+32]
  71363. adcx r11, rax
  71364. adox r12, rcx
  71365. ; A[2] * B[5]
  71366. mulx rcx, rax, QWORD PTR [rbp+40]
  71367. mov QWORD PTR [rbx+48], r11
  71368. adcx r12, rax
  71369. adox r13, rcx
  71370. ; A[2] * B[6]
  71371. mulx rcx, rax, QWORD PTR [rbp+48]
  71372. mov QWORD PTR [rbx+56], r12
  71373. adcx r13, rax
  71374. adox r14, rcx
  71375. ; A[2] * B[7]
  71376. mulx rcx, rax, QWORD PTR [rbp+56]
  71377. mov QWORD PTR [rbx+64], r13
  71378. adcx r14, rax
  71379. adox r10, rcx
  71380. mov QWORD PTR [rbx+72], r14
  71381. mov r11, QWORD PTR [rbx+88]
  71382. mov r12, QWORD PTR [rbx+96]
  71383. mov r13, QWORD PTR [rbx+104]
  71384. mov r14, QWORD PTR [rbx+112]
  71385. ; A[2] * B[8]
  71386. mulx rcx, rax, QWORD PTR [rbp+64]
  71387. adcx r10, rax
  71388. adox r11, rcx
  71389. ; A[2] * B[9]
  71390. mulx rcx, rax, QWORD PTR [rbp+72]
  71391. mov QWORD PTR [rbx+80], r10
  71392. adcx r11, rax
  71393. adox r12, rcx
  71394. ; A[2] * B[10]
  71395. mulx rcx, rax, QWORD PTR [rbp+80]
  71396. mov QWORD PTR [rbx+88], r11
  71397. adcx r12, rax
  71398. adox r13, rcx
  71399. ; A[2] * B[11]
  71400. mulx rcx, rax, QWORD PTR [rbp+88]
  71401. mov QWORD PTR [rbx+96], r12
  71402. adcx r13, rax
  71403. adox r14, rcx
  71404. mov QWORD PTR [rbx+104], r13
  71405. mov r10, QWORD PTR [rbx+120]
  71406. mov r11, QWORD PTR [r8]
  71407. mov r12, QWORD PTR [r8+8]
  71408. ; A[2] * B[12]
  71409. mulx rcx, rax, QWORD PTR [rbp+96]
  71410. adcx r14, rax
  71411. adox r10, rcx
  71412. ; A[2] * B[13]
  71413. mulx rcx, rax, QWORD PTR [rbp+104]
  71414. mov QWORD PTR [rbx+112], r14
  71415. adcx r10, rax
  71416. adox r11, rcx
  71417. ; A[2] * B[14]
  71418. mulx rcx, rax, QWORD PTR [rbp+112]
  71419. mov QWORD PTR [rbx+120], r10
  71420. adcx r11, rax
  71421. adox r12, rcx
  71422. ; A[2] * B[15]
  71423. mulx rcx, rax, QWORD PTR [rbp+120]
  71424. mov QWORD PTR [r8], r11
  71425. mov r13, rdi
  71426. adcx r12, rax
  71427. adox r13, rcx
  71428. adcx r13, r15
  71429. mov r15, rdi
  71430. adox r15, rdi
  71431. adcx r15, rdi
  71432. mov QWORD PTR [r8+8], r12
  71433. mov QWORD PTR [r8+16], r13
  71434. mov rdx, QWORD PTR [r9+24]
  71435. mov r13, QWORD PTR [rbx+24]
  71436. mov r14, QWORD PTR [rbx+32]
  71437. mov r10, QWORD PTR [rbx+40]
  71438. mov r11, QWORD PTR [rbx+48]
  71439. mov r12, QWORD PTR [rbx+56]
  71440. ; A[3] * B[0]
  71441. mulx rcx, rax, QWORD PTR [rbp]
  71442. adcx r13, rax
  71443. adox r14, rcx
  71444. ; A[3] * B[1]
  71445. mulx rcx, rax, QWORD PTR [rbp+8]
  71446. mov QWORD PTR [rbx+24], r13
  71447. adcx r14, rax
  71448. adox r10, rcx
  71449. ; A[3] * B[2]
  71450. mulx rcx, rax, QWORD PTR [rbp+16]
  71451. mov QWORD PTR [rbx+32], r14
  71452. adcx r10, rax
  71453. adox r11, rcx
  71454. ; A[3] * B[3]
  71455. mulx rcx, rax, QWORD PTR [rbp+24]
  71456. mov QWORD PTR [rbx+40], r10
  71457. adcx r11, rax
  71458. adox r12, rcx
  71459. mov QWORD PTR [rbx+48], r11
  71460. mov r13, QWORD PTR [rbx+64]
  71461. mov r14, QWORD PTR [rbx+72]
  71462. mov r10, QWORD PTR [rbx+80]
  71463. mov r11, QWORD PTR [rbx+88]
  71464. ; A[3] * B[4]
  71465. mulx rcx, rax, QWORD PTR [rbp+32]
  71466. adcx r12, rax
  71467. adox r13, rcx
  71468. ; A[3] * B[5]
  71469. mulx rcx, rax, QWORD PTR [rbp+40]
  71470. mov QWORD PTR [rbx+56], r12
  71471. adcx r13, rax
  71472. adox r14, rcx
  71473. ; A[3] * B[6]
  71474. mulx rcx, rax, QWORD PTR [rbp+48]
  71475. mov QWORD PTR [rbx+64], r13
  71476. adcx r14, rax
  71477. adox r10, rcx
  71478. ; A[3] * B[7]
  71479. mulx rcx, rax, QWORD PTR [rbp+56]
  71480. mov QWORD PTR [rbx+72], r14
  71481. adcx r10, rax
  71482. adox r11, rcx
  71483. mov QWORD PTR [rbx+80], r10
  71484. mov r12, QWORD PTR [rbx+96]
  71485. mov r13, QWORD PTR [rbx+104]
  71486. mov r14, QWORD PTR [rbx+112]
  71487. mov r10, QWORD PTR [rbx+120]
  71488. ; A[3] * B[8]
  71489. mulx rcx, rax, QWORD PTR [rbp+64]
  71490. adcx r11, rax
  71491. adox r12, rcx
  71492. ; A[3] * B[9]
  71493. mulx rcx, rax, QWORD PTR [rbp+72]
  71494. mov QWORD PTR [rbx+88], r11
  71495. adcx r12, rax
  71496. adox r13, rcx
  71497. ; A[3] * B[10]
  71498. mulx rcx, rax, QWORD PTR [rbp+80]
  71499. mov QWORD PTR [rbx+96], r12
  71500. adcx r13, rax
  71501. adox r14, rcx
  71502. ; A[3] * B[11]
  71503. mulx rcx, rax, QWORD PTR [rbp+88]
  71504. mov QWORD PTR [rbx+104], r13
  71505. adcx r14, rax
  71506. adox r10, rcx
  71507. mov QWORD PTR [rbx+112], r14
  71508. mov r11, QWORD PTR [r8]
  71509. mov r12, QWORD PTR [r8+8]
  71510. mov r13, QWORD PTR [r8+16]
  71511. ; A[3] * B[12]
  71512. mulx rcx, rax, QWORD PTR [rbp+96]
  71513. adcx r10, rax
  71514. adox r11, rcx
  71515. ; A[3] * B[13]
  71516. mulx rcx, rax, QWORD PTR [rbp+104]
  71517. mov QWORD PTR [rbx+120], r10
  71518. adcx r11, rax
  71519. adox r12, rcx
  71520. ; A[3] * B[14]
  71521. mulx rcx, rax, QWORD PTR [rbp+112]
  71522. mov QWORD PTR [r8], r11
  71523. adcx r12, rax
  71524. adox r13, rcx
  71525. ; A[3] * B[15]
  71526. mulx rcx, rax, QWORD PTR [rbp+120]
  71527. mov QWORD PTR [r8+8], r12
  71528. mov r14, rdi
  71529. adcx r13, rax
  71530. adox r14, rcx
  71531. adcx r14, r15
  71532. mov r15, rdi
  71533. adox r15, rdi
  71534. adcx r15, rdi
  71535. mov QWORD PTR [r8+16], r13
  71536. mov QWORD PTR [r8+24], r14
  71537. mov rdx, QWORD PTR [r9+32]
  71538. mov r14, QWORD PTR [rbx+32]
  71539. mov r10, QWORD PTR [rbx+40]
  71540. mov r11, QWORD PTR [rbx+48]
  71541. mov r12, QWORD PTR [rbx+56]
  71542. mov r13, QWORD PTR [rbx+64]
  71543. ; A[4] * B[0]
  71544. mulx rcx, rax, QWORD PTR [rbp]
  71545. adcx r14, rax
  71546. adox r10, rcx
  71547. ; A[4] * B[1]
  71548. mulx rcx, rax, QWORD PTR [rbp+8]
  71549. mov QWORD PTR [rbx+32], r14
  71550. adcx r10, rax
  71551. adox r11, rcx
  71552. ; A[4] * B[2]
  71553. mulx rcx, rax, QWORD PTR [rbp+16]
  71554. mov QWORD PTR [rbx+40], r10
  71555. adcx r11, rax
  71556. adox r12, rcx
  71557. ; A[4] * B[3]
  71558. mulx rcx, rax, QWORD PTR [rbp+24]
  71559. mov QWORD PTR [rbx+48], r11
  71560. adcx r12, rax
  71561. adox r13, rcx
  71562. mov QWORD PTR [rbx+56], r12
  71563. mov r14, QWORD PTR [rbx+72]
  71564. mov r10, QWORD PTR [rbx+80]
  71565. mov r11, QWORD PTR [rbx+88]
  71566. mov r12, QWORD PTR [rbx+96]
  71567. ; A[4] * B[4]
  71568. mulx rcx, rax, QWORD PTR [rbp+32]
  71569. adcx r13, rax
  71570. adox r14, rcx
  71571. ; A[4] * B[5]
  71572. mulx rcx, rax, QWORD PTR [rbp+40]
  71573. mov QWORD PTR [rbx+64], r13
  71574. adcx r14, rax
  71575. adox r10, rcx
  71576. ; A[4] * B[6]
  71577. mulx rcx, rax, QWORD PTR [rbp+48]
  71578. mov QWORD PTR [rbx+72], r14
  71579. adcx r10, rax
  71580. adox r11, rcx
  71581. ; A[4] * B[7]
  71582. mulx rcx, rax, QWORD PTR [rbp+56]
  71583. mov QWORD PTR [rbx+80], r10
  71584. adcx r11, rax
  71585. adox r12, rcx
  71586. mov QWORD PTR [rbx+88], r11
  71587. mov r13, QWORD PTR [rbx+104]
  71588. mov r14, QWORD PTR [rbx+112]
  71589. mov r10, QWORD PTR [rbx+120]
  71590. mov r11, QWORD PTR [r8]
  71591. ; A[4] * B[8]
  71592. mulx rcx, rax, QWORD PTR [rbp+64]
  71593. adcx r12, rax
  71594. adox r13, rcx
  71595. ; A[4] * B[9]
  71596. mulx rcx, rax, QWORD PTR [rbp+72]
  71597. mov QWORD PTR [rbx+96], r12
  71598. adcx r13, rax
  71599. adox r14, rcx
  71600. ; A[4] * B[10]
  71601. mulx rcx, rax, QWORD PTR [rbp+80]
  71602. mov QWORD PTR [rbx+104], r13
  71603. adcx r14, rax
  71604. adox r10, rcx
  71605. ; A[4] * B[11]
  71606. mulx rcx, rax, QWORD PTR [rbp+88]
  71607. mov QWORD PTR [rbx+112], r14
  71608. adcx r10, rax
  71609. adox r11, rcx
  71610. mov QWORD PTR [rbx+120], r10
  71611. mov r12, QWORD PTR [r8+8]
  71612. mov r13, QWORD PTR [r8+16]
  71613. mov r14, QWORD PTR [r8+24]
  71614. ; A[4] * B[12]
  71615. mulx rcx, rax, QWORD PTR [rbp+96]
  71616. adcx r11, rax
  71617. adox r12, rcx
  71618. ; A[4] * B[13]
  71619. mulx rcx, rax, QWORD PTR [rbp+104]
  71620. mov QWORD PTR [r8], r11
  71621. adcx r12, rax
  71622. adox r13, rcx
  71623. ; A[4] * B[14]
  71624. mulx rcx, rax, QWORD PTR [rbp+112]
  71625. mov QWORD PTR [r8+8], r12
  71626. adcx r13, rax
  71627. adox r14, rcx
  71628. ; A[4] * B[15]
  71629. mulx rcx, rax, QWORD PTR [rbp+120]
  71630. mov QWORD PTR [r8+16], r13
  71631. mov r10, rdi
  71632. adcx r14, rax
  71633. adox r10, rcx
  71634. adcx r10, r15
  71635. mov r15, rdi
  71636. adox r15, rdi
  71637. adcx r15, rdi
  71638. mov QWORD PTR [r8+24], r14
  71639. mov QWORD PTR [r8+32], r10
  71640. mov rdx, QWORD PTR [r9+40]
  71641. mov r10, QWORD PTR [rbx+40]
  71642. mov r11, QWORD PTR [rbx+48]
  71643. mov r12, QWORD PTR [rbx+56]
  71644. mov r13, QWORD PTR [rbx+64]
  71645. mov r14, QWORD PTR [rbx+72]
  71646. ; A[5] * B[0]
  71647. mulx rcx, rax, QWORD PTR [rbp]
  71648. adcx r10, rax
  71649. adox r11, rcx
  71650. ; A[5] * B[1]
  71651. mulx rcx, rax, QWORD PTR [rbp+8]
  71652. mov QWORD PTR [rbx+40], r10
  71653. adcx r11, rax
  71654. adox r12, rcx
  71655. ; A[5] * B[2]
  71656. mulx rcx, rax, QWORD PTR [rbp+16]
  71657. mov QWORD PTR [rbx+48], r11
  71658. adcx r12, rax
  71659. adox r13, rcx
  71660. ; A[5] * B[3]
  71661. mulx rcx, rax, QWORD PTR [rbp+24]
  71662. mov QWORD PTR [rbx+56], r12
  71663. adcx r13, rax
  71664. adox r14, rcx
  71665. mov QWORD PTR [rbx+64], r13
  71666. mov r10, QWORD PTR [rbx+80]
  71667. mov r11, QWORD PTR [rbx+88]
  71668. mov r12, QWORD PTR [rbx+96]
  71669. mov r13, QWORD PTR [rbx+104]
  71670. ; A[5] * B[4]
  71671. mulx rcx, rax, QWORD PTR [rbp+32]
  71672. adcx r14, rax
  71673. adox r10, rcx
  71674. ; A[5] * B[5]
  71675. mulx rcx, rax, QWORD PTR [rbp+40]
  71676. mov QWORD PTR [rbx+72], r14
  71677. adcx r10, rax
  71678. adox r11, rcx
  71679. ; A[5] * B[6]
  71680. mulx rcx, rax, QWORD PTR [rbp+48]
  71681. mov QWORD PTR [rbx+80], r10
  71682. adcx r11, rax
  71683. adox r12, rcx
  71684. ; A[5] * B[7]
  71685. mulx rcx, rax, QWORD PTR [rbp+56]
  71686. mov QWORD PTR [rbx+88], r11
  71687. adcx r12, rax
  71688. adox r13, rcx
  71689. mov QWORD PTR [rbx+96], r12
  71690. mov r14, QWORD PTR [rbx+112]
  71691. mov r10, QWORD PTR [rbx+120]
  71692. mov r11, QWORD PTR [r8]
  71693. mov r12, QWORD PTR [r8+8]
  71694. ; A[5] * B[8]
  71695. mulx rcx, rax, QWORD PTR [rbp+64]
  71696. adcx r13, rax
  71697. adox r14, rcx
  71698. ; A[5] * B[9]
  71699. mulx rcx, rax, QWORD PTR [rbp+72]
  71700. mov QWORD PTR [rbx+104], r13
  71701. adcx r14, rax
  71702. adox r10, rcx
  71703. ; A[5] * B[10]
  71704. mulx rcx, rax, QWORD PTR [rbp+80]
  71705. mov QWORD PTR [rbx+112], r14
  71706. adcx r10, rax
  71707. adox r11, rcx
  71708. ; A[5] * B[11]
  71709. mulx rcx, rax, QWORD PTR [rbp+88]
  71710. mov QWORD PTR [rbx+120], r10
  71711. adcx r11, rax
  71712. adox r12, rcx
  71713. mov QWORD PTR [r8], r11
  71714. mov r13, QWORD PTR [r8+16]
  71715. mov r14, QWORD PTR [r8+24]
  71716. mov r10, QWORD PTR [r8+32]
  71717. ; A[5] * B[12]
  71718. mulx rcx, rax, QWORD PTR [rbp+96]
  71719. adcx r12, rax
  71720. adox r13, rcx
  71721. ; A[5] * B[13]
  71722. mulx rcx, rax, QWORD PTR [rbp+104]
  71723. mov QWORD PTR [r8+8], r12
  71724. adcx r13, rax
  71725. adox r14, rcx
  71726. ; A[5] * B[14]
  71727. mulx rcx, rax, QWORD PTR [rbp+112]
  71728. mov QWORD PTR [r8+16], r13
  71729. adcx r14, rax
  71730. adox r10, rcx
  71731. ; A[5] * B[15]
  71732. mulx rcx, rax, QWORD PTR [rbp+120]
  71733. mov QWORD PTR [r8+24], r14
  71734. mov r11, rdi
  71735. adcx r10, rax
  71736. adox r11, rcx
  71737. adcx r11, r15
  71738. mov r15, rdi
  71739. adox r15, rdi
  71740. adcx r15, rdi
  71741. mov QWORD PTR [r8+32], r10
  71742. mov QWORD PTR [r8+40], r11
  71743. mov rdx, QWORD PTR [r9+48]
  71744. mov r11, QWORD PTR [rbx+48]
  71745. mov r12, QWORD PTR [rbx+56]
  71746. mov r13, QWORD PTR [rbx+64]
  71747. mov r14, QWORD PTR [rbx+72]
  71748. mov r10, QWORD PTR [rbx+80]
  71749. ; A[6] * B[0]
  71750. mulx rcx, rax, QWORD PTR [rbp]
  71751. adcx r11, rax
  71752. adox r12, rcx
  71753. ; A[6] * B[1]
  71754. mulx rcx, rax, QWORD PTR [rbp+8]
  71755. mov QWORD PTR [rbx+48], r11
  71756. adcx r12, rax
  71757. adox r13, rcx
  71758. ; A[6] * B[2]
  71759. mulx rcx, rax, QWORD PTR [rbp+16]
  71760. mov QWORD PTR [rbx+56], r12
  71761. adcx r13, rax
  71762. adox r14, rcx
  71763. ; A[6] * B[3]
  71764. mulx rcx, rax, QWORD PTR [rbp+24]
  71765. mov QWORD PTR [rbx+64], r13
  71766. adcx r14, rax
  71767. adox r10, rcx
  71768. mov QWORD PTR [rbx+72], r14
  71769. mov r11, QWORD PTR [rbx+88]
  71770. mov r12, QWORD PTR [rbx+96]
  71771. mov r13, QWORD PTR [rbx+104]
  71772. mov r14, QWORD PTR [rbx+112]
  71773. ; A[6] * B[4]
  71774. mulx rcx, rax, QWORD PTR [rbp+32]
  71775. adcx r10, rax
  71776. adox r11, rcx
  71777. ; A[6] * B[5]
  71778. mulx rcx, rax, QWORD PTR [rbp+40]
  71779. mov QWORD PTR [rbx+80], r10
  71780. adcx r11, rax
  71781. adox r12, rcx
  71782. ; A[6] * B[6]
  71783. mulx rcx, rax, QWORD PTR [rbp+48]
  71784. mov QWORD PTR [rbx+88], r11
  71785. adcx r12, rax
  71786. adox r13, rcx
  71787. ; A[6] * B[7]
  71788. mulx rcx, rax, QWORD PTR [rbp+56]
  71789. mov QWORD PTR [rbx+96], r12
  71790. adcx r13, rax
  71791. adox r14, rcx
  71792. mov QWORD PTR [rbx+104], r13
  71793. mov r10, QWORD PTR [rbx+120]
  71794. mov r11, QWORD PTR [r8]
  71795. mov r12, QWORD PTR [r8+8]
  71796. mov r13, QWORD PTR [r8+16]
  71797. ; A[6] * B[8]
  71798. mulx rcx, rax, QWORD PTR [rbp+64]
  71799. adcx r14, rax
  71800. adox r10, rcx
  71801. ; A[6] * B[9]
  71802. mulx rcx, rax, QWORD PTR [rbp+72]
  71803. mov QWORD PTR [rbx+112], r14
  71804. adcx r10, rax
  71805. adox r11, rcx
  71806. ; A[6] * B[10]
  71807. mulx rcx, rax, QWORD PTR [rbp+80]
  71808. mov QWORD PTR [rbx+120], r10
  71809. adcx r11, rax
  71810. adox r12, rcx
  71811. ; A[6] * B[11]
  71812. mulx rcx, rax, QWORD PTR [rbp+88]
  71813. mov QWORD PTR [r8], r11
  71814. adcx r12, rax
  71815. adox r13, rcx
  71816. mov QWORD PTR [r8+8], r12
  71817. mov r14, QWORD PTR [r8+24]
  71818. mov r10, QWORD PTR [r8+32]
  71819. mov r11, QWORD PTR [r8+40]
  71820. ; A[6] * B[12]
  71821. mulx rcx, rax, QWORD PTR [rbp+96]
  71822. adcx r13, rax
  71823. adox r14, rcx
  71824. ; A[6] * B[13]
  71825. mulx rcx, rax, QWORD PTR [rbp+104]
  71826. mov QWORD PTR [r8+16], r13
  71827. adcx r14, rax
  71828. adox r10, rcx
  71829. ; A[6] * B[14]
  71830. mulx rcx, rax, QWORD PTR [rbp+112]
  71831. mov QWORD PTR [r8+24], r14
  71832. adcx r10, rax
  71833. adox r11, rcx
  71834. ; A[6] * B[15]
  71835. mulx rcx, rax, QWORD PTR [rbp+120]
  71836. mov QWORD PTR [r8+32], r10
  71837. mov r12, rdi
  71838. adcx r11, rax
  71839. adox r12, rcx
  71840. adcx r12, r15
  71841. mov r15, rdi
  71842. adox r15, rdi
  71843. adcx r15, rdi
  71844. mov QWORD PTR [r8+40], r11
  71845. mov QWORD PTR [r8+48], r12
  71846. mov rdx, QWORD PTR [r9+56]
  71847. mov r12, QWORD PTR [rbx+56]
  71848. mov r13, QWORD PTR [rbx+64]
  71849. mov r14, QWORD PTR [rbx+72]
  71850. mov r10, QWORD PTR [rbx+80]
  71851. mov r11, QWORD PTR [rbx+88]
  71852. ; A[7] * B[0]
  71853. mulx rcx, rax, QWORD PTR [rbp]
  71854. adcx r12, rax
  71855. adox r13, rcx
  71856. ; A[7] * B[1]
  71857. mulx rcx, rax, QWORD PTR [rbp+8]
  71858. mov QWORD PTR [rbx+56], r12
  71859. adcx r13, rax
  71860. adox r14, rcx
  71861. ; A[7] * B[2]
  71862. mulx rcx, rax, QWORD PTR [rbp+16]
  71863. mov QWORD PTR [rbx+64], r13
  71864. adcx r14, rax
  71865. adox r10, rcx
  71866. ; A[7] * B[3]
  71867. mulx rcx, rax, QWORD PTR [rbp+24]
  71868. mov QWORD PTR [rbx+72], r14
  71869. adcx r10, rax
  71870. adox r11, rcx
  71871. mov QWORD PTR [rbx+80], r10
  71872. mov r12, QWORD PTR [rbx+96]
  71873. mov r13, QWORD PTR [rbx+104]
  71874. mov r14, QWORD PTR [rbx+112]
  71875. mov r10, QWORD PTR [rbx+120]
  71876. ; A[7] * B[4]
  71877. mulx rcx, rax, QWORD PTR [rbp+32]
  71878. adcx r11, rax
  71879. adox r12, rcx
  71880. ; A[7] * B[5]
  71881. mulx rcx, rax, QWORD PTR [rbp+40]
  71882. mov QWORD PTR [rbx+88], r11
  71883. adcx r12, rax
  71884. adox r13, rcx
  71885. ; A[7] * B[6]
  71886. mulx rcx, rax, QWORD PTR [rbp+48]
  71887. mov QWORD PTR [rbx+96], r12
  71888. adcx r13, rax
  71889. adox r14, rcx
  71890. ; A[7] * B[7]
  71891. mulx rcx, rax, QWORD PTR [rbp+56]
  71892. mov QWORD PTR [rbx+104], r13
  71893. adcx r14, rax
  71894. adox r10, rcx
  71895. mov QWORD PTR [rbx+112], r14
  71896. mov r11, QWORD PTR [r8]
  71897. mov r12, QWORD PTR [r8+8]
  71898. mov r13, QWORD PTR [r8+16]
  71899. mov r14, QWORD PTR [r8+24]
  71900. ; A[7] * B[8]
  71901. mulx rcx, rax, QWORD PTR [rbp+64]
  71902. adcx r10, rax
  71903. adox r11, rcx
  71904. ; A[7] * B[9]
  71905. mulx rcx, rax, QWORD PTR [rbp+72]
  71906. mov QWORD PTR [rbx+120], r10
  71907. adcx r11, rax
  71908. adox r12, rcx
  71909. ; A[7] * B[10]
  71910. mulx rcx, rax, QWORD PTR [rbp+80]
  71911. mov QWORD PTR [r8], r11
  71912. adcx r12, rax
  71913. adox r13, rcx
  71914. ; A[7] * B[11]
  71915. mulx rcx, rax, QWORD PTR [rbp+88]
  71916. mov QWORD PTR [r8+8], r12
  71917. adcx r13, rax
  71918. adox r14, rcx
  71919. mov QWORD PTR [r8+16], r13
  71920. mov r10, QWORD PTR [r8+32]
  71921. mov r11, QWORD PTR [r8+40]
  71922. mov r12, QWORD PTR [r8+48]
  71923. ; A[7] * B[12]
  71924. mulx rcx, rax, QWORD PTR [rbp+96]
  71925. adcx r14, rax
  71926. adox r10, rcx
  71927. ; A[7] * B[13]
  71928. mulx rcx, rax, QWORD PTR [rbp+104]
  71929. mov QWORD PTR [r8+24], r14
  71930. adcx r10, rax
  71931. adox r11, rcx
  71932. ; A[7] * B[14]
  71933. mulx rcx, rax, QWORD PTR [rbp+112]
  71934. mov QWORD PTR [r8+32], r10
  71935. adcx r11, rax
  71936. adox r12, rcx
  71937. ; A[7] * B[15]
  71938. mulx rcx, rax, QWORD PTR [rbp+120]
  71939. mov QWORD PTR [r8+40], r11
  71940. mov r13, rdi
  71941. adcx r12, rax
  71942. adox r13, rcx
  71943. adcx r13, r15
  71944. mov r15, rdi
  71945. adox r15, rdi
  71946. adcx r15, rdi
  71947. mov QWORD PTR [r8+48], r12
  71948. mov QWORD PTR [r8+56], r13
  71949. mov rdx, QWORD PTR [r9+64]
  71950. mov r13, QWORD PTR [rbx+64]
  71951. mov r14, QWORD PTR [rbx+72]
  71952. mov r10, QWORD PTR [rbx+80]
  71953. mov r11, QWORD PTR [rbx+88]
  71954. mov r12, QWORD PTR [rbx+96]
  71955. ; A[8] * B[0]
  71956. mulx rcx, rax, QWORD PTR [rbp]
  71957. adcx r13, rax
  71958. adox r14, rcx
  71959. ; A[8] * B[1]
  71960. mulx rcx, rax, QWORD PTR [rbp+8]
  71961. mov QWORD PTR [rbx+64], r13
  71962. adcx r14, rax
  71963. adox r10, rcx
  71964. ; A[8] * B[2]
  71965. mulx rcx, rax, QWORD PTR [rbp+16]
  71966. mov QWORD PTR [rbx+72], r14
  71967. adcx r10, rax
  71968. adox r11, rcx
  71969. ; A[8] * B[3]
  71970. mulx rcx, rax, QWORD PTR [rbp+24]
  71971. mov QWORD PTR [rbx+80], r10
  71972. adcx r11, rax
  71973. adox r12, rcx
  71974. mov QWORD PTR [rbx+88], r11
  71975. mov r13, QWORD PTR [rbx+104]
  71976. mov r14, QWORD PTR [rbx+112]
  71977. mov r10, QWORD PTR [rbx+120]
  71978. mov r11, QWORD PTR [r8]
  71979. ; A[8] * B[4]
  71980. mulx rcx, rax, QWORD PTR [rbp+32]
  71981. adcx r12, rax
  71982. adox r13, rcx
  71983. ; A[8] * B[5]
  71984. mulx rcx, rax, QWORD PTR [rbp+40]
  71985. mov QWORD PTR [rbx+96], r12
  71986. adcx r13, rax
  71987. adox r14, rcx
  71988. ; A[8] * B[6]
  71989. mulx rcx, rax, QWORD PTR [rbp+48]
  71990. mov QWORD PTR [rbx+104], r13
  71991. adcx r14, rax
  71992. adox r10, rcx
  71993. ; A[8] * B[7]
  71994. mulx rcx, rax, QWORD PTR [rbp+56]
  71995. mov QWORD PTR [rbx+112], r14
  71996. adcx r10, rax
  71997. adox r11, rcx
  71998. mov QWORD PTR [rbx+120], r10
  71999. mov r12, QWORD PTR [r8+8]
  72000. mov r13, QWORD PTR [r8+16]
  72001. mov r14, QWORD PTR [r8+24]
  72002. mov r10, QWORD PTR [r8+32]
  72003. ; A[8] * B[8]
  72004. mulx rcx, rax, QWORD PTR [rbp+64]
  72005. adcx r11, rax
  72006. adox r12, rcx
  72007. ; A[8] * B[9]
  72008. mulx rcx, rax, QWORD PTR [rbp+72]
  72009. mov QWORD PTR [r8], r11
  72010. adcx r12, rax
  72011. adox r13, rcx
  72012. ; A[8] * B[10]
  72013. mulx rcx, rax, QWORD PTR [rbp+80]
  72014. mov QWORD PTR [r8+8], r12
  72015. adcx r13, rax
  72016. adox r14, rcx
  72017. ; A[8] * B[11]
  72018. mulx rcx, rax, QWORD PTR [rbp+88]
  72019. mov QWORD PTR [r8+16], r13
  72020. adcx r14, rax
  72021. adox r10, rcx
  72022. mov QWORD PTR [r8+24], r14
  72023. mov r11, QWORD PTR [r8+40]
  72024. mov r12, QWORD PTR [r8+48]
  72025. mov r13, QWORD PTR [r8+56]
  72026. ; A[8] * B[12]
  72027. mulx rcx, rax, QWORD PTR [rbp+96]
  72028. adcx r10, rax
  72029. adox r11, rcx
  72030. ; A[8] * B[13]
  72031. mulx rcx, rax, QWORD PTR [rbp+104]
  72032. mov QWORD PTR [r8+32], r10
  72033. adcx r11, rax
  72034. adox r12, rcx
  72035. ; A[8] * B[14]
  72036. mulx rcx, rax, QWORD PTR [rbp+112]
  72037. mov QWORD PTR [r8+40], r11
  72038. adcx r12, rax
  72039. adox r13, rcx
  72040. ; A[8] * B[15]
  72041. mulx rcx, rax, QWORD PTR [rbp+120]
  72042. mov QWORD PTR [r8+48], r12
  72043. mov r14, rdi
  72044. adcx r13, rax
  72045. adox r14, rcx
  72046. adcx r14, r15
  72047. mov r15, rdi
  72048. adox r15, rdi
  72049. adcx r15, rdi
  72050. mov QWORD PTR [r8+56], r13
  72051. mov QWORD PTR [r8+64], r14
  72052. mov rdx, QWORD PTR [r9+72]
  72053. mov r14, QWORD PTR [rbx+72]
  72054. mov r10, QWORD PTR [rbx+80]
  72055. mov r11, QWORD PTR [rbx+88]
  72056. mov r12, QWORD PTR [rbx+96]
  72057. mov r13, QWORD PTR [rbx+104]
  72058. ; A[9] * B[0]
  72059. mulx rcx, rax, QWORD PTR [rbp]
  72060. adcx r14, rax
  72061. adox r10, rcx
  72062. ; A[9] * B[1]
  72063. mulx rcx, rax, QWORD PTR [rbp+8]
  72064. mov QWORD PTR [rbx+72], r14
  72065. adcx r10, rax
  72066. adox r11, rcx
  72067. ; A[9] * B[2]
  72068. mulx rcx, rax, QWORD PTR [rbp+16]
  72069. mov QWORD PTR [rbx+80], r10
  72070. adcx r11, rax
  72071. adox r12, rcx
  72072. ; A[9] * B[3]
  72073. mulx rcx, rax, QWORD PTR [rbp+24]
  72074. mov QWORD PTR [rbx+88], r11
  72075. adcx r12, rax
  72076. adox r13, rcx
  72077. mov QWORD PTR [rbx+96], r12
  72078. mov r14, QWORD PTR [rbx+112]
  72079. mov r10, QWORD PTR [rbx+120]
  72080. mov r11, QWORD PTR [r8]
  72081. mov r12, QWORD PTR [r8+8]
  72082. ; A[9] * B[4]
  72083. mulx rcx, rax, QWORD PTR [rbp+32]
  72084. adcx r13, rax
  72085. adox r14, rcx
  72086. ; A[9] * B[5]
  72087. mulx rcx, rax, QWORD PTR [rbp+40]
  72088. mov QWORD PTR [rbx+104], r13
  72089. adcx r14, rax
  72090. adox r10, rcx
  72091. ; A[9] * B[6]
  72092. mulx rcx, rax, QWORD PTR [rbp+48]
  72093. mov QWORD PTR [rbx+112], r14
  72094. adcx r10, rax
  72095. adox r11, rcx
  72096. ; A[9] * B[7]
  72097. mulx rcx, rax, QWORD PTR [rbp+56]
  72098. mov QWORD PTR [rbx+120], r10
  72099. adcx r11, rax
  72100. adox r12, rcx
  72101. mov QWORD PTR [r8], r11
  72102. mov r13, QWORD PTR [r8+16]
  72103. mov r14, QWORD PTR [r8+24]
  72104. mov r10, QWORD PTR [r8+32]
  72105. mov r11, QWORD PTR [r8+40]
  72106. ; A[9] * B[8]
  72107. mulx rcx, rax, QWORD PTR [rbp+64]
  72108. adcx r12, rax
  72109. adox r13, rcx
  72110. ; A[9] * B[9]
  72111. mulx rcx, rax, QWORD PTR [rbp+72]
  72112. mov QWORD PTR [r8+8], r12
  72113. adcx r13, rax
  72114. adox r14, rcx
  72115. ; A[9] * B[10]
  72116. mulx rcx, rax, QWORD PTR [rbp+80]
  72117. mov QWORD PTR [r8+16], r13
  72118. adcx r14, rax
  72119. adox r10, rcx
  72120. ; A[9] * B[11]
  72121. mulx rcx, rax, QWORD PTR [rbp+88]
  72122. mov QWORD PTR [r8+24], r14
  72123. adcx r10, rax
  72124. adox r11, rcx
  72125. mov QWORD PTR [r8+32], r10
  72126. mov r12, QWORD PTR [r8+48]
  72127. mov r13, QWORD PTR [r8+56]
  72128. mov r14, QWORD PTR [r8+64]
  72129. ; A[9] * B[12]
  72130. mulx rcx, rax, QWORD PTR [rbp+96]
  72131. adcx r11, rax
  72132. adox r12, rcx
  72133. ; A[9] * B[13]
  72134. mulx rcx, rax, QWORD PTR [rbp+104]
  72135. mov QWORD PTR [r8+40], r11
  72136. adcx r12, rax
  72137. adox r13, rcx
  72138. ; A[9] * B[14]
  72139. mulx rcx, rax, QWORD PTR [rbp+112]
  72140. mov QWORD PTR [r8+48], r12
  72141. adcx r13, rax
  72142. adox r14, rcx
  72143. ; A[9] * B[15]
  72144. mulx rcx, rax, QWORD PTR [rbp+120]
  72145. mov QWORD PTR [r8+56], r13
  72146. mov r10, rdi
  72147. adcx r14, rax
  72148. adox r10, rcx
  72149. adcx r10, r15
  72150. mov r15, rdi
  72151. adox r15, rdi
  72152. adcx r15, rdi
  72153. mov QWORD PTR [r8+64], r14
  72154. mov QWORD PTR [r8+72], r10
  72155. mov rdx, QWORD PTR [r9+80]
  72156. mov r10, QWORD PTR [rbx+80]
  72157. mov r11, QWORD PTR [rbx+88]
  72158. mov r12, QWORD PTR [rbx+96]
  72159. mov r13, QWORD PTR [rbx+104]
  72160. mov r14, QWORD PTR [rbx+112]
  72161. ; A[10] * B[0]
  72162. mulx rcx, rax, QWORD PTR [rbp]
  72163. adcx r10, rax
  72164. adox r11, rcx
  72165. ; A[10] * B[1]
  72166. mulx rcx, rax, QWORD PTR [rbp+8]
  72167. mov QWORD PTR [rbx+80], r10
  72168. adcx r11, rax
  72169. adox r12, rcx
  72170. ; A[10] * B[2]
  72171. mulx rcx, rax, QWORD PTR [rbp+16]
  72172. mov QWORD PTR [rbx+88], r11
  72173. adcx r12, rax
  72174. adox r13, rcx
  72175. ; A[10] * B[3]
  72176. mulx rcx, rax, QWORD PTR [rbp+24]
  72177. mov QWORD PTR [rbx+96], r12
  72178. adcx r13, rax
  72179. adox r14, rcx
  72180. mov QWORD PTR [rbx+104], r13
  72181. mov r10, QWORD PTR [rbx+120]
  72182. mov r11, QWORD PTR [r8]
  72183. mov r12, QWORD PTR [r8+8]
  72184. mov r13, QWORD PTR [r8+16]
  72185. ; A[10] * B[4]
  72186. mulx rcx, rax, QWORD PTR [rbp+32]
  72187. adcx r14, rax
  72188. adox r10, rcx
  72189. ; A[10] * B[5]
  72190. mulx rcx, rax, QWORD PTR [rbp+40]
  72191. mov QWORD PTR [rbx+112], r14
  72192. adcx r10, rax
  72193. adox r11, rcx
  72194. ; A[10] * B[6]
  72195. mulx rcx, rax, QWORD PTR [rbp+48]
  72196. mov QWORD PTR [rbx+120], r10
  72197. adcx r11, rax
  72198. adox r12, rcx
  72199. ; A[10] * B[7]
  72200. mulx rcx, rax, QWORD PTR [rbp+56]
  72201. mov QWORD PTR [r8], r11
  72202. adcx r12, rax
  72203. adox r13, rcx
  72204. mov QWORD PTR [r8+8], r12
  72205. mov r14, QWORD PTR [r8+24]
  72206. mov r10, QWORD PTR [r8+32]
  72207. mov r11, QWORD PTR [r8+40]
  72208. mov r12, QWORD PTR [r8+48]
  72209. ; A[10] * B[8]
  72210. mulx rcx, rax, QWORD PTR [rbp+64]
  72211. adcx r13, rax
  72212. adox r14, rcx
  72213. ; A[10] * B[9]
  72214. mulx rcx, rax, QWORD PTR [rbp+72]
  72215. mov QWORD PTR [r8+16], r13
  72216. adcx r14, rax
  72217. adox r10, rcx
  72218. ; A[10] * B[10]
  72219. mulx rcx, rax, QWORD PTR [rbp+80]
  72220. mov QWORD PTR [r8+24], r14
  72221. adcx r10, rax
  72222. adox r11, rcx
  72223. ; A[10] * B[11]
  72224. mulx rcx, rax, QWORD PTR [rbp+88]
  72225. mov QWORD PTR [r8+32], r10
  72226. adcx r11, rax
  72227. adox r12, rcx
  72228. mov QWORD PTR [r8+40], r11
  72229. mov r13, QWORD PTR [r8+56]
  72230. mov r14, QWORD PTR [r8+64]
  72231. mov r10, QWORD PTR [r8+72]
  72232. ; A[10] * B[12]
  72233. mulx rcx, rax, QWORD PTR [rbp+96]
  72234. adcx r12, rax
  72235. adox r13, rcx
  72236. ; A[10] * B[13]
  72237. mulx rcx, rax, QWORD PTR [rbp+104]
  72238. mov QWORD PTR [r8+48], r12
  72239. adcx r13, rax
  72240. adox r14, rcx
  72241. ; A[10] * B[14]
  72242. mulx rcx, rax, QWORD PTR [rbp+112]
  72243. mov QWORD PTR [r8+56], r13
  72244. adcx r14, rax
  72245. adox r10, rcx
  72246. ; A[10] * B[15]
  72247. mulx rcx, rax, QWORD PTR [rbp+120]
  72248. mov QWORD PTR [r8+64], r14
  72249. mov r11, rdi
  72250. adcx r10, rax
  72251. adox r11, rcx
  72252. adcx r11, r15
  72253. mov r15, rdi
  72254. adox r15, rdi
  72255. adcx r15, rdi
  72256. mov QWORD PTR [r8+72], r10
  72257. mov QWORD PTR [r8+80], r11
  72258. mov rdx, QWORD PTR [r9+88]
  72259. mov r11, QWORD PTR [rbx+88]
  72260. mov r12, QWORD PTR [rbx+96]
  72261. mov r13, QWORD PTR [rbx+104]
  72262. mov r14, QWORD PTR [rbx+112]
  72263. mov r10, QWORD PTR [rbx+120]
  72264. ; A[11] * B[0]
  72265. mulx rcx, rax, QWORD PTR [rbp]
  72266. adcx r11, rax
  72267. adox r12, rcx
  72268. ; A[11] * B[1]
  72269. mulx rcx, rax, QWORD PTR [rbp+8]
  72270. mov QWORD PTR [rbx+88], r11
  72271. adcx r12, rax
  72272. adox r13, rcx
  72273. ; A[11] * B[2]
  72274. mulx rcx, rax, QWORD PTR [rbp+16]
  72275. mov QWORD PTR [rbx+96], r12
  72276. adcx r13, rax
  72277. adox r14, rcx
  72278. ; A[11] * B[3]
  72279. mulx rcx, rax, QWORD PTR [rbp+24]
  72280. mov QWORD PTR [rbx+104], r13
  72281. adcx r14, rax
  72282. adox r10, rcx
  72283. mov QWORD PTR [rbx+112], r14
  72284. mov r11, QWORD PTR [r8]
  72285. mov r12, QWORD PTR [r8+8]
  72286. mov r13, QWORD PTR [r8+16]
  72287. mov r14, QWORD PTR [r8+24]
  72288. ; A[11] * B[4]
  72289. mulx rcx, rax, QWORD PTR [rbp+32]
  72290. adcx r10, rax
  72291. adox r11, rcx
  72292. ; A[11] * B[5]
  72293. mulx rcx, rax, QWORD PTR [rbp+40]
  72294. mov QWORD PTR [rbx+120], r10
  72295. adcx r11, rax
  72296. adox r12, rcx
  72297. ; A[11] * B[6]
  72298. mulx rcx, rax, QWORD PTR [rbp+48]
  72299. mov QWORD PTR [r8], r11
  72300. adcx r12, rax
  72301. adox r13, rcx
  72302. ; A[11] * B[7]
  72303. mulx rcx, rax, QWORD PTR [rbp+56]
  72304. mov QWORD PTR [r8+8], r12
  72305. adcx r13, rax
  72306. adox r14, rcx
  72307. mov QWORD PTR [r8+16], r13
  72308. mov r10, QWORD PTR [r8+32]
  72309. mov r11, QWORD PTR [r8+40]
  72310. mov r12, QWORD PTR [r8+48]
  72311. mov r13, QWORD PTR [r8+56]
  72312. ; A[11] * B[8]
  72313. mulx rcx, rax, QWORD PTR [rbp+64]
  72314. adcx r14, rax
  72315. adox r10, rcx
  72316. ; A[11] * B[9]
  72317. mulx rcx, rax, QWORD PTR [rbp+72]
  72318. mov QWORD PTR [r8+24], r14
  72319. adcx r10, rax
  72320. adox r11, rcx
  72321. ; A[11] * B[10]
  72322. mulx rcx, rax, QWORD PTR [rbp+80]
  72323. mov QWORD PTR [r8+32], r10
  72324. adcx r11, rax
  72325. adox r12, rcx
  72326. ; A[11] * B[11]
  72327. mulx rcx, rax, QWORD PTR [rbp+88]
  72328. mov QWORD PTR [r8+40], r11
  72329. adcx r12, rax
  72330. adox r13, rcx
  72331. mov QWORD PTR [r8+48], r12
  72332. mov r14, QWORD PTR [r8+64]
  72333. mov r10, QWORD PTR [r8+72]
  72334. mov r11, QWORD PTR [r8+80]
  72335. ; A[11] * B[12]
  72336. mulx rcx, rax, QWORD PTR [rbp+96]
  72337. adcx r13, rax
  72338. adox r14, rcx
  72339. ; A[11] * B[13]
  72340. mulx rcx, rax, QWORD PTR [rbp+104]
  72341. mov QWORD PTR [r8+56], r13
  72342. adcx r14, rax
  72343. adox r10, rcx
  72344. ; A[11] * B[14]
  72345. mulx rcx, rax, QWORD PTR [rbp+112]
  72346. mov QWORD PTR [r8+64], r14
  72347. adcx r10, rax
  72348. adox r11, rcx
  72349. ; A[11] * B[15]
  72350. mulx rcx, rax, QWORD PTR [rbp+120]
  72351. mov QWORD PTR [r8+72], r10
  72352. mov r12, rdi
  72353. adcx r11, rax
  72354. adox r12, rcx
  72355. adcx r12, r15
  72356. mov r15, rdi
  72357. adox r15, rdi
  72358. adcx r15, rdi
  72359. mov QWORD PTR [r8+80], r11
  72360. mov QWORD PTR [r8+88], r12
  72361. mov rdx, QWORD PTR [r9+96]
  72362. mov r12, QWORD PTR [rbx+96]
  72363. mov r13, QWORD PTR [rbx+104]
  72364. mov r14, QWORD PTR [rbx+112]
  72365. mov r10, QWORD PTR [rbx+120]
  72366. mov r11, QWORD PTR [r8]
  72367. ; A[12] * B[0]
  72368. mulx rcx, rax, QWORD PTR [rbp]
  72369. adcx r12, rax
  72370. adox r13, rcx
  72371. ; A[12] * B[1]
  72372. mulx rcx, rax, QWORD PTR [rbp+8]
  72373. mov QWORD PTR [rbx+96], r12
  72374. adcx r13, rax
  72375. adox r14, rcx
  72376. ; A[12] * B[2]
  72377. mulx rcx, rax, QWORD PTR [rbp+16]
  72378. mov QWORD PTR [rbx+104], r13
  72379. adcx r14, rax
  72380. adox r10, rcx
  72381. ; A[12] * B[3]
  72382. mulx rcx, rax, QWORD PTR [rbp+24]
  72383. mov QWORD PTR [rbx+112], r14
  72384. adcx r10, rax
  72385. adox r11, rcx
  72386. mov QWORD PTR [rbx+120], r10
  72387. mov r12, QWORD PTR [r8+8]
  72388. mov r13, QWORD PTR [r8+16]
  72389. mov r14, QWORD PTR [r8+24]
  72390. mov r10, QWORD PTR [r8+32]
  72391. ; A[12] * B[4]
  72392. mulx rcx, rax, QWORD PTR [rbp+32]
  72393. adcx r11, rax
  72394. adox r12, rcx
  72395. ; A[12] * B[5]
  72396. mulx rcx, rax, QWORD PTR [rbp+40]
  72397. mov QWORD PTR [r8], r11
  72398. adcx r12, rax
  72399. adox r13, rcx
  72400. ; A[12] * B[6]
  72401. mulx rcx, rax, QWORD PTR [rbp+48]
  72402. mov QWORD PTR [r8+8], r12
  72403. adcx r13, rax
  72404. adox r14, rcx
  72405. ; A[12] * B[7]
  72406. mulx rcx, rax, QWORD PTR [rbp+56]
  72407. mov QWORD PTR [r8+16], r13
  72408. adcx r14, rax
  72409. adox r10, rcx
  72410. mov QWORD PTR [r8+24], r14
  72411. mov r11, QWORD PTR [r8+40]
  72412. mov r12, QWORD PTR [r8+48]
  72413. mov r13, QWORD PTR [r8+56]
  72414. mov r14, QWORD PTR [r8+64]
  72415. ; A[12] * B[8]
  72416. mulx rcx, rax, QWORD PTR [rbp+64]
  72417. adcx r10, rax
  72418. adox r11, rcx
  72419. ; A[12] * B[9]
  72420. mulx rcx, rax, QWORD PTR [rbp+72]
  72421. mov QWORD PTR [r8+32], r10
  72422. adcx r11, rax
  72423. adox r12, rcx
  72424. ; A[12] * B[10]
  72425. mulx rcx, rax, QWORD PTR [rbp+80]
  72426. mov QWORD PTR [r8+40], r11
  72427. adcx r12, rax
  72428. adox r13, rcx
  72429. ; A[12] * B[11]
  72430. mulx rcx, rax, QWORD PTR [rbp+88]
  72431. mov QWORD PTR [r8+48], r12
  72432. adcx r13, rax
  72433. adox r14, rcx
  72434. mov QWORD PTR [r8+56], r13
  72435. mov r10, QWORD PTR [r8+72]
  72436. mov r11, QWORD PTR [r8+80]
  72437. mov r12, QWORD PTR [r8+88]
  72438. ; A[12] * B[12]
  72439. mulx rcx, rax, QWORD PTR [rbp+96]
  72440. adcx r14, rax
  72441. adox r10, rcx
  72442. ; A[12] * B[13]
  72443. mulx rcx, rax, QWORD PTR [rbp+104]
  72444. mov QWORD PTR [r8+64], r14
  72445. adcx r10, rax
  72446. adox r11, rcx
  72447. ; A[12] * B[14]
  72448. mulx rcx, rax, QWORD PTR [rbp+112]
  72449. mov QWORD PTR [r8+72], r10
  72450. adcx r11, rax
  72451. adox r12, rcx
  72452. ; A[12] * B[15]
  72453. mulx rcx, rax, QWORD PTR [rbp+120]
  72454. mov QWORD PTR [r8+80], r11
  72455. mov r13, rdi
  72456. adcx r12, rax
  72457. adox r13, rcx
  72458. adcx r13, r15
  72459. mov r15, rdi
  72460. adox r15, rdi
  72461. adcx r15, rdi
  72462. mov QWORD PTR [r8+88], r12
  72463. mov QWORD PTR [r8+96], r13
  72464. mov rdx, QWORD PTR [r9+104]
  72465. mov r13, QWORD PTR [rbx+104]
  72466. mov r14, QWORD PTR [rbx+112]
  72467. mov r10, QWORD PTR [rbx+120]
  72468. mov r11, QWORD PTR [r8]
  72469. mov r12, QWORD PTR [r8+8]
  72470. ; A[13] * B[0]
  72471. mulx rcx, rax, QWORD PTR [rbp]
  72472. adcx r13, rax
  72473. adox r14, rcx
  72474. ; A[13] * B[1]
  72475. mulx rcx, rax, QWORD PTR [rbp+8]
  72476. mov QWORD PTR [rbx+104], r13
  72477. adcx r14, rax
  72478. adox r10, rcx
  72479. ; A[13] * B[2]
  72480. mulx rcx, rax, QWORD PTR [rbp+16]
  72481. mov QWORD PTR [rbx+112], r14
  72482. adcx r10, rax
  72483. adox r11, rcx
  72484. ; A[13] * B[3]
  72485. mulx rcx, rax, QWORD PTR [rbp+24]
  72486. mov QWORD PTR [rbx+120], r10
  72487. adcx r11, rax
  72488. adox r12, rcx
  72489. mov QWORD PTR [r8], r11
  72490. mov r13, QWORD PTR [r8+16]
  72491. mov r14, QWORD PTR [r8+24]
  72492. mov r10, QWORD PTR [r8+32]
  72493. mov r11, QWORD PTR [r8+40]
  72494. ; A[13] * B[4]
  72495. mulx rcx, rax, QWORD PTR [rbp+32]
  72496. adcx r12, rax
  72497. adox r13, rcx
  72498. ; A[13] * B[5]
  72499. mulx rcx, rax, QWORD PTR [rbp+40]
  72500. mov QWORD PTR [r8+8], r12
  72501. adcx r13, rax
  72502. adox r14, rcx
  72503. ; A[13] * B[6]
  72504. mulx rcx, rax, QWORD PTR [rbp+48]
  72505. mov QWORD PTR [r8+16], r13
  72506. adcx r14, rax
  72507. adox r10, rcx
  72508. ; A[13] * B[7]
  72509. mulx rcx, rax, QWORD PTR [rbp+56]
  72510. mov QWORD PTR [r8+24], r14
  72511. adcx r10, rax
  72512. adox r11, rcx
  72513. mov QWORD PTR [r8+32], r10
  72514. mov r12, QWORD PTR [r8+48]
  72515. mov r13, QWORD PTR [r8+56]
  72516. mov r14, QWORD PTR [r8+64]
  72517. mov r10, QWORD PTR [r8+72]
  72518. ; A[13] * B[8]
  72519. mulx rcx, rax, QWORD PTR [rbp+64]
  72520. adcx r11, rax
  72521. adox r12, rcx
  72522. ; A[13] * B[9]
  72523. mulx rcx, rax, QWORD PTR [rbp+72]
  72524. mov QWORD PTR [r8+40], r11
  72525. adcx r12, rax
  72526. adox r13, rcx
  72527. ; A[13] * B[10]
  72528. mulx rcx, rax, QWORD PTR [rbp+80]
  72529. mov QWORD PTR [r8+48], r12
  72530. adcx r13, rax
  72531. adox r14, rcx
  72532. ; A[13] * B[11]
  72533. mulx rcx, rax, QWORD PTR [rbp+88]
  72534. mov QWORD PTR [r8+56], r13
  72535. adcx r14, rax
  72536. adox r10, rcx
  72537. mov QWORD PTR [r8+64], r14
  72538. mov r11, QWORD PTR [r8+80]
  72539. mov r12, QWORD PTR [r8+88]
  72540. mov r13, QWORD PTR [r8+96]
  72541. ; A[13] * B[12]
  72542. mulx rcx, rax, QWORD PTR [rbp+96]
  72543. adcx r10, rax
  72544. adox r11, rcx
  72545. ; A[13] * B[13]
  72546. mulx rcx, rax, QWORD PTR [rbp+104]
  72547. mov QWORD PTR [r8+72], r10
  72548. adcx r11, rax
  72549. adox r12, rcx
  72550. ; A[13] * B[14]
  72551. mulx rcx, rax, QWORD PTR [rbp+112]
  72552. mov QWORD PTR [r8+80], r11
  72553. adcx r12, rax
  72554. adox r13, rcx
  72555. ; A[13] * B[15]
  72556. mulx rcx, rax, QWORD PTR [rbp+120]
  72557. mov QWORD PTR [r8+88], r12
  72558. mov r14, rdi
  72559. adcx r13, rax
  72560. adox r14, rcx
  72561. adcx r14, r15
  72562. mov r15, rdi
  72563. adox r15, rdi
  72564. adcx r15, rdi
  72565. mov QWORD PTR [r8+96], r13
  72566. mov QWORD PTR [r8+104], r14
  72567. mov rdx, QWORD PTR [r9+112]
  72568. mov r14, QWORD PTR [rbx+112]
  72569. mov r10, QWORD PTR [rbx+120]
  72570. mov r11, QWORD PTR [r8]
  72571. mov r12, QWORD PTR [r8+8]
  72572. mov r13, QWORD PTR [r8+16]
  72573. ; A[14] * B[0]
  72574. mulx rcx, rax, QWORD PTR [rbp]
  72575. adcx r14, rax
  72576. adox r10, rcx
  72577. ; A[14] * B[1]
  72578. mulx rcx, rax, QWORD PTR [rbp+8]
  72579. mov QWORD PTR [rbx+112], r14
  72580. adcx r10, rax
  72581. adox r11, rcx
  72582. ; A[14] * B[2]
  72583. mulx rcx, rax, QWORD PTR [rbp+16]
  72584. mov QWORD PTR [rbx+120], r10
  72585. adcx r11, rax
  72586. adox r12, rcx
  72587. ; A[14] * B[3]
  72588. mulx rcx, rax, QWORD PTR [rbp+24]
  72589. mov QWORD PTR [r8], r11
  72590. adcx r12, rax
  72591. adox r13, rcx
  72592. mov QWORD PTR [r8+8], r12
  72593. mov r14, QWORD PTR [r8+24]
  72594. mov r10, QWORD PTR [r8+32]
  72595. mov r11, QWORD PTR [r8+40]
  72596. mov r12, QWORD PTR [r8+48]
  72597. ; A[14] * B[4]
  72598. mulx rcx, rax, QWORD PTR [rbp+32]
  72599. adcx r13, rax
  72600. adox r14, rcx
  72601. ; A[14] * B[5]
  72602. mulx rcx, rax, QWORD PTR [rbp+40]
  72603. mov QWORD PTR [r8+16], r13
  72604. adcx r14, rax
  72605. adox r10, rcx
  72606. ; A[14] * B[6]
  72607. mulx rcx, rax, QWORD PTR [rbp+48]
  72608. mov QWORD PTR [r8+24], r14
  72609. adcx r10, rax
  72610. adox r11, rcx
  72611. ; A[14] * B[7]
  72612. mulx rcx, rax, QWORD PTR [rbp+56]
  72613. mov QWORD PTR [r8+32], r10
  72614. adcx r11, rax
  72615. adox r12, rcx
  72616. mov QWORD PTR [r8+40], r11
  72617. mov r13, QWORD PTR [r8+56]
  72618. mov r14, QWORD PTR [r8+64]
  72619. mov r10, QWORD PTR [r8+72]
  72620. mov r11, QWORD PTR [r8+80]
  72621. ; A[14] * B[8]
  72622. mulx rcx, rax, QWORD PTR [rbp+64]
  72623. adcx r12, rax
  72624. adox r13, rcx
  72625. ; A[14] * B[9]
  72626. mulx rcx, rax, QWORD PTR [rbp+72]
  72627. mov QWORD PTR [r8+48], r12
  72628. adcx r13, rax
  72629. adox r14, rcx
  72630. ; A[14] * B[10]
  72631. mulx rcx, rax, QWORD PTR [rbp+80]
  72632. mov QWORD PTR [r8+56], r13
  72633. adcx r14, rax
  72634. adox r10, rcx
  72635. ; A[14] * B[11]
  72636. mulx rcx, rax, QWORD PTR [rbp+88]
  72637. mov QWORD PTR [r8+64], r14
  72638. adcx r10, rax
  72639. adox r11, rcx
  72640. mov QWORD PTR [r8+72], r10
  72641. mov r12, QWORD PTR [r8+88]
  72642. mov r13, QWORD PTR [r8+96]
  72643. mov r14, QWORD PTR [r8+104]
  72644. ; A[14] * B[12]
  72645. mulx rcx, rax, QWORD PTR [rbp+96]
  72646. adcx r11, rax
  72647. adox r12, rcx
  72648. ; A[14] * B[13]
  72649. mulx rcx, rax, QWORD PTR [rbp+104]
  72650. mov QWORD PTR [r8+80], r11
  72651. adcx r12, rax
  72652. adox r13, rcx
  72653. ; A[14] * B[14]
  72654. mulx rcx, rax, QWORD PTR [rbp+112]
  72655. mov QWORD PTR [r8+88], r12
  72656. adcx r13, rax
  72657. adox r14, rcx
  72658. ; A[14] * B[15]
  72659. mulx rcx, rax, QWORD PTR [rbp+120]
  72660. mov QWORD PTR [r8+96], r13
  72661. mov r10, rdi
  72662. adcx r14, rax
  72663. adox r10, rcx
  72664. adcx r10, r15
  72665. mov r15, rdi
  72666. adox r15, rdi
  72667. adcx r15, rdi
  72668. mov QWORD PTR [r8+104], r14
  72669. mov QWORD PTR [r8+112], r10
  72670. mov rdx, QWORD PTR [r9+120]
  72671. mov r10, QWORD PTR [rbx+120]
  72672. mov r11, QWORD PTR [r8]
  72673. mov r12, QWORD PTR [r8+8]
  72674. mov r13, QWORD PTR [r8+16]
  72675. mov r14, QWORD PTR [r8+24]
  72676. ; A[15] * B[0]
  72677. mulx rcx, rax, QWORD PTR [rbp]
  72678. adcx r10, rax
  72679. adox r11, rcx
  72680. ; A[15] * B[1]
  72681. mulx rcx, rax, QWORD PTR [rbp+8]
  72682. mov QWORD PTR [rbx+120], r10
  72683. adcx r11, rax
  72684. adox r12, rcx
  72685. ; A[15] * B[2]
  72686. mulx rcx, rax, QWORD PTR [rbp+16]
  72687. mov QWORD PTR [r8], r11
  72688. adcx r12, rax
  72689. adox r13, rcx
  72690. ; A[15] * B[3]
  72691. mulx rcx, rax, QWORD PTR [rbp+24]
  72692. mov QWORD PTR [r8+8], r12
  72693. adcx r13, rax
  72694. adox r14, rcx
  72695. mov QWORD PTR [r8+16], r13
  72696. mov r10, QWORD PTR [r8+32]
  72697. mov r11, QWORD PTR [r8+40]
  72698. mov r12, QWORD PTR [r8+48]
  72699. mov r13, QWORD PTR [r8+56]
  72700. ; A[15] * B[4]
  72701. mulx rcx, rax, QWORD PTR [rbp+32]
  72702. adcx r14, rax
  72703. adox r10, rcx
  72704. ; A[15] * B[5]
  72705. mulx rcx, rax, QWORD PTR [rbp+40]
  72706. mov QWORD PTR [r8+24], r14
  72707. adcx r10, rax
  72708. adox r11, rcx
  72709. ; A[15] * B[6]
  72710. mulx rcx, rax, QWORD PTR [rbp+48]
  72711. mov QWORD PTR [r8+32], r10
  72712. adcx r11, rax
  72713. adox r12, rcx
  72714. ; A[15] * B[7]
  72715. mulx rcx, rax, QWORD PTR [rbp+56]
  72716. mov QWORD PTR [r8+40], r11
  72717. adcx r12, rax
  72718. adox r13, rcx
  72719. mov QWORD PTR [r8+48], r12
  72720. mov r14, QWORD PTR [r8+64]
  72721. mov r10, QWORD PTR [r8+72]
  72722. mov r11, QWORD PTR [r8+80]
  72723. mov r12, QWORD PTR [r8+88]
  72724. ; A[15] * B[8]
  72725. mulx rcx, rax, QWORD PTR [rbp+64]
  72726. adcx r13, rax
  72727. adox r14, rcx
  72728. ; A[15] * B[9]
  72729. mulx rcx, rax, QWORD PTR [rbp+72]
  72730. mov QWORD PTR [r8+56], r13
  72731. adcx r14, rax
  72732. adox r10, rcx
  72733. ; A[15] * B[10]
  72734. mulx rcx, rax, QWORD PTR [rbp+80]
  72735. mov QWORD PTR [r8+64], r14
  72736. adcx r10, rax
  72737. adox r11, rcx
  72738. ; A[15] * B[11]
  72739. mulx rcx, rax, QWORD PTR [rbp+88]
  72740. mov QWORD PTR [r8+72], r10
  72741. adcx r11, rax
  72742. adox r12, rcx
  72743. mov QWORD PTR [r8+80], r11
  72744. mov r13, QWORD PTR [r8+96]
  72745. mov r14, QWORD PTR [r8+104]
  72746. mov r10, QWORD PTR [r8+112]
  72747. ; A[15] * B[12]
  72748. mulx rcx, rax, QWORD PTR [rbp+96]
  72749. adcx r12, rax
  72750. adox r13, rcx
  72751. ; A[15] * B[13]
  72752. mulx rcx, rax, QWORD PTR [rbp+104]
  72753. mov QWORD PTR [r8+88], r12
  72754. adcx r13, rax
  72755. adox r14, rcx
  72756. ; A[15] * B[14]
  72757. mulx rcx, rax, QWORD PTR [rbp+112]
  72758. mov QWORD PTR [r8+96], r13
  72759. adcx r14, rax
  72760. adox r10, rcx
  72761. ; A[15] * B[15]
  72762. mulx rcx, rax, QWORD PTR [rbp+120]
  72763. mov QWORD PTR [r8+104], r14
  72764. mov r11, rdi
  72765. adcx r10, rax
  72766. adox r11, rcx
  72767. adcx r11, r15
  72768. mov QWORD PTR [r8+112], r10
  72769. mov QWORD PTR [r8+120], r11
  72770. sub r8, 128
  72771. cmp r9, r8
  72772. je L_start_1024_mul_avx2_16
  72773. cmp rbp, r8
  72774. jne L_end_1024_mul_avx2_16
  72775. L_start_1024_mul_avx2_16:
  72776. vmovdqu xmm0, OWORD PTR [rbx]
  72777. vmovups OWORD PTR [r8], xmm0
  72778. vmovdqu xmm0, OWORD PTR [rbx+16]
  72779. vmovups OWORD PTR [r8+16], xmm0
  72780. vmovdqu xmm0, OWORD PTR [rbx+32]
  72781. vmovups OWORD PTR [r8+32], xmm0
  72782. vmovdqu xmm0, OWORD PTR [rbx+48]
  72783. vmovups OWORD PTR [r8+48], xmm0
  72784. vmovdqu xmm0, OWORD PTR [rbx+64]
  72785. vmovups OWORD PTR [r8+64], xmm0
  72786. vmovdqu xmm0, OWORD PTR [rbx+80]
  72787. vmovups OWORD PTR [r8+80], xmm0
  72788. vmovdqu xmm0, OWORD PTR [rbx+96]
  72789. vmovups OWORD PTR [r8+96], xmm0
  72790. vmovdqu xmm0, OWORD PTR [rbx+112]
  72791. vmovups OWORD PTR [r8+112], xmm0
  72792. L_end_1024_mul_avx2_16:
  72793. add rsp, 128
  72794. pop rdi
  72795. pop r15
  72796. pop r14
  72797. pop r13
  72798. pop r12
  72799. pop rbp
  72800. pop rbx
  72801. ret
  72802. sp_1024_mul_avx2_16 ENDP
  72803. _text ENDS
  72804. ENDIF
  72805. IFDEF HAVE_INTEL_AVX2
  72806. ; /* Square a and put result in r. (r = a * a)
  72807. ; *
  72808. ; * r A single precision integer.
  72809. ; * a A single precision integer.
  72810. ; */
  72811. _text SEGMENT READONLY PARA
  72812. sp_1024_sqr_avx2_16 PROC
  72813. push rbp
  72814. push r12
  72815. push r13
  72816. push r14
  72817. push r15
  72818. push rdi
  72819. push rsi
  72820. push rbx
  72821. mov r8, rcx
  72822. mov r9, rdx
  72823. sub rsp, 128
  72824. cmp r9, r8
  72825. mov rbp, rsp
  72826. cmovne rbp, r8
  72827. add r8, 128
  72828. xor r13, r13
  72829. ; Diagonal 1
  72830. ; Zero into %r9
  72831. ; Zero into %r10
  72832. ; A[1] x A[0]
  72833. mov rdx, QWORD PTR [r9]
  72834. mulx r11, r10, QWORD PTR [r9+8]
  72835. ; A[2] x A[0]
  72836. mulx r12, rax, QWORD PTR [r9+16]
  72837. adcx r11, rax
  72838. adox r12, r13
  72839. mov QWORD PTR [rbp+8], r10
  72840. mov QWORD PTR [rbp+16], r11
  72841. ; Zero into %r8
  72842. ; Zero into %r9
  72843. ; A[3] x A[0]
  72844. mulx r10, rax, QWORD PTR [r9+24]
  72845. adcx r12, rax
  72846. adox r10, r13
  72847. ; A[4] x A[0]
  72848. mulx r11, rax, QWORD PTR [r9+32]
  72849. adcx r10, rax
  72850. adox r11, r13
  72851. mov QWORD PTR [rbp+24], r12
  72852. mov QWORD PTR [rbp+32], r10
  72853. ; Zero into %r10
  72854. ; Zero into %r8
  72855. ; A[5] x A[0]
  72856. mulx r12, rax, QWORD PTR [r9+40]
  72857. adcx r11, rax
  72858. adox r12, r13
  72859. ; A[6] x A[0]
  72860. mulx r10, rax, QWORD PTR [r9+48]
  72861. adcx r12, rax
  72862. adox r10, r13
  72863. mov QWORD PTR [rbp+40], r11
  72864. mov QWORD PTR [rbp+48], r12
  72865. ; Zero into %r9
  72866. ; Zero into %r10
  72867. ; A[7] x A[0]
  72868. mulx r11, rax, QWORD PTR [r9+56]
  72869. adcx r10, rax
  72870. adox r11, r13
  72871. ; A[8] x A[0]
  72872. mulx r12, rax, QWORD PTR [r9+64]
  72873. adcx r11, rax
  72874. adox r12, r13
  72875. mov QWORD PTR [rbp+56], r10
  72876. mov QWORD PTR [rbp+64], r11
  72877. ; Zero into %r8
  72878. ; Zero into %r9
  72879. ; A[9] x A[0]
  72880. mulx r10, rax, QWORD PTR [r9+72]
  72881. adcx r12, rax
  72882. adox r10, r13
  72883. ; A[10] x A[0]
  72884. mulx r11, rax, QWORD PTR [r9+80]
  72885. adcx r10, rax
  72886. adox r11, r13
  72887. mov QWORD PTR [rbp+72], r12
  72888. mov QWORD PTR [rbp+80], r10
  72889. ; No load %r13 - %r10
  72890. ; A[11] x A[0]
  72891. mulx r15, rax, QWORD PTR [r9+88]
  72892. adcx r11, rax
  72893. adox r15, r13
  72894. ; A[12] x A[0]
  72895. mulx rdi, rax, QWORD PTR [r9+96]
  72896. adcx r15, rax
  72897. adox rdi, r13
  72898. mov QWORD PTR [rbp+88], r11
  72899. ; No store %r13 - %r10
  72900. ; No load %r15 - %r9
  72901. ; A[13] x A[0]
  72902. mulx rsi, rax, QWORD PTR [r9+104]
  72903. adcx rdi, rax
  72904. adox rsi, r13
  72905. ; A[14] x A[0]
  72906. mulx rbx, rax, QWORD PTR [r9+112]
  72907. adcx rsi, rax
  72908. adox rbx, r13
  72909. ; No store %r14 - %r8
  72910. ; No store %r15 - %r9
  72911. ; Zero into %r8
  72912. ; Zero into %r9
  72913. ; A[15] x A[0]
  72914. mulx r10, rax, QWORD PTR [r9+120]
  72915. adcx rbx, rax
  72916. adox r10, r13
  72917. ; No store %rbx - %r10
  72918. ; Carry
  72919. adcx r10, r13
  72920. mov r14, r13
  72921. adcx r14, r13
  72922. adox r14, r13
  72923. mov QWORD PTR [r8], r10
  72924. ; Diagonal 2
  72925. mov r10, QWORD PTR [rbp+24]
  72926. mov r11, QWORD PTR [rbp+32]
  72927. mov r12, QWORD PTR [rbp+40]
  72928. ; A[2] x A[1]
  72929. mov rdx, QWORD PTR [r9+8]
  72930. mulx rcx, rax, QWORD PTR [r9+16]
  72931. adcx r10, rax
  72932. adox r11, rcx
  72933. ; A[3] x A[1]
  72934. mulx rcx, rax, QWORD PTR [r9+24]
  72935. adcx r11, rax
  72936. adox r12, rcx
  72937. mov QWORD PTR [rbp+24], r10
  72938. mov QWORD PTR [rbp+32], r11
  72939. mov r10, QWORD PTR [rbp+48]
  72940. mov r11, QWORD PTR [rbp+56]
  72941. ; A[4] x A[1]
  72942. mulx rcx, rax, QWORD PTR [r9+32]
  72943. adcx r12, rax
  72944. adox r10, rcx
  72945. ; A[5] x A[1]
  72946. mulx rcx, rax, QWORD PTR [r9+40]
  72947. adcx r10, rax
  72948. adox r11, rcx
  72949. mov QWORD PTR [rbp+40], r12
  72950. mov QWORD PTR [rbp+48], r10
  72951. mov r12, QWORD PTR [rbp+64]
  72952. mov r10, QWORD PTR [rbp+72]
  72953. ; A[6] x A[1]
  72954. mulx rcx, rax, QWORD PTR [r9+48]
  72955. adcx r11, rax
  72956. adox r12, rcx
  72957. ; A[7] x A[1]
  72958. mulx rcx, rax, QWORD PTR [r9+56]
  72959. adcx r12, rax
  72960. adox r10, rcx
  72961. mov QWORD PTR [rbp+56], r11
  72962. mov QWORD PTR [rbp+64], r12
  72963. mov r11, QWORD PTR [rbp+80]
  72964. mov r12, QWORD PTR [rbp+88]
  72965. ; A[8] x A[1]
  72966. mulx rcx, rax, QWORD PTR [r9+64]
  72967. adcx r10, rax
  72968. adox r11, rcx
  72969. ; A[9] x A[1]
  72970. mulx rcx, rax, QWORD PTR [r9+72]
  72971. adcx r11, rax
  72972. adox r12, rcx
  72973. mov QWORD PTR [rbp+72], r10
  72974. mov QWORD PTR [rbp+80], r11
  72975. ; No load %r13 - %r8
  72976. ; A[10] x A[1]
  72977. mulx rcx, rax, QWORD PTR [r9+80]
  72978. adcx r12, rax
  72979. adox r15, rcx
  72980. ; A[11] x A[1]
  72981. mulx rcx, rax, QWORD PTR [r9+88]
  72982. adcx r15, rax
  72983. adox rdi, rcx
  72984. mov QWORD PTR [rbp+88], r12
  72985. ; No store %r13 - %r8
  72986. ; No load %r15 - %r10
  72987. ; A[12] x A[1]
  72988. mulx rcx, rax, QWORD PTR [r9+96]
  72989. adcx rdi, rax
  72990. adox rsi, rcx
  72991. ; A[13] x A[1]
  72992. mulx rcx, rax, QWORD PTR [r9+104]
  72993. adcx rsi, rax
  72994. adox rbx, rcx
  72995. ; No store %r14 - %r9
  72996. ; No store %r15 - %r10
  72997. mov r11, QWORD PTR [r8]
  72998. ; Zero into %r10
  72999. ; A[14] x A[1]
  73000. mulx rcx, rax, QWORD PTR [r9+112]
  73001. adcx rbx, rax
  73002. adox r11, rcx
  73003. ; A[15] x A[1]
  73004. mulx r12, rax, QWORD PTR [r9+120]
  73005. adcx r11, rax
  73006. adox r12, r13
  73007. ; No store %rbx - %r8
  73008. mov QWORD PTR [r8], r11
  73009. ; Zero into %r8
  73010. ; Zero into %r9
  73011. ; A[15] x A[2]
  73012. mov rdx, QWORD PTR [r9+16]
  73013. mulx r10, rax, QWORD PTR [r9+120]
  73014. adcx r12, rax
  73015. adox r10, r13
  73016. mov QWORD PTR [r8+8], r12
  73017. ; Carry
  73018. adcx r10, r14
  73019. mov r14, r13
  73020. adcx r14, r13
  73021. adox r14, r13
  73022. mov QWORD PTR [r8+16], r10
  73023. ; Diagonal 3
  73024. mov r10, QWORD PTR [rbp+40]
  73025. mov r11, QWORD PTR [rbp+48]
  73026. mov r12, QWORD PTR [rbp+56]
  73027. ; A[3] x A[2]
  73028. mulx rcx, rax, QWORD PTR [r9+24]
  73029. adcx r10, rax
  73030. adox r11, rcx
  73031. ; A[4] x A[2]
  73032. mulx rcx, rax, QWORD PTR [r9+32]
  73033. adcx r11, rax
  73034. adox r12, rcx
  73035. mov QWORD PTR [rbp+40], r10
  73036. mov QWORD PTR [rbp+48], r11
  73037. mov r10, QWORD PTR [rbp+64]
  73038. mov r11, QWORD PTR [rbp+72]
  73039. ; A[5] x A[2]
  73040. mulx rcx, rax, QWORD PTR [r9+40]
  73041. adcx r12, rax
  73042. adox r10, rcx
  73043. ; A[6] x A[2]
  73044. mulx rcx, rax, QWORD PTR [r9+48]
  73045. adcx r10, rax
  73046. adox r11, rcx
  73047. mov QWORD PTR [rbp+56], r12
  73048. mov QWORD PTR [rbp+64], r10
  73049. mov r12, QWORD PTR [rbp+80]
  73050. mov r10, QWORD PTR [rbp+88]
  73051. ; A[7] x A[2]
  73052. mulx rcx, rax, QWORD PTR [r9+56]
  73053. adcx r11, rax
  73054. adox r12, rcx
  73055. ; A[8] x A[2]
  73056. mulx rcx, rax, QWORD PTR [r9+64]
  73057. adcx r12, rax
  73058. adox r10, rcx
  73059. mov QWORD PTR [rbp+72], r11
  73060. mov QWORD PTR [rbp+80], r12
  73061. ; No load %r13 - %r9
  73062. ; A[9] x A[2]
  73063. mulx rcx, rax, QWORD PTR [r9+72]
  73064. adcx r10, rax
  73065. adox r15, rcx
  73066. ; A[10] x A[2]
  73067. mulx rcx, rax, QWORD PTR [r9+80]
  73068. adcx r15, rax
  73069. adox rdi, rcx
  73070. mov QWORD PTR [rbp+88], r10
  73071. ; No store %r13 - %r9
  73072. ; No load %r15 - %r8
  73073. ; A[11] x A[2]
  73074. mulx rcx, rax, QWORD PTR [r9+88]
  73075. adcx rdi, rax
  73076. adox rsi, rcx
  73077. ; A[12] x A[2]
  73078. mulx rcx, rax, QWORD PTR [r9+96]
  73079. adcx rsi, rax
  73080. adox rbx, rcx
  73081. ; No store %r14 - %r10
  73082. ; No store %r15 - %r8
  73083. mov r12, QWORD PTR [r8]
  73084. mov r10, QWORD PTR [r8+8]
  73085. ; A[13] x A[2]
  73086. mulx rcx, rax, QWORD PTR [r9+104]
  73087. adcx rbx, rax
  73088. adox r12, rcx
  73089. ; A[14] x A[2]
  73090. mulx rcx, rax, QWORD PTR [r9+112]
  73091. adcx r12, rax
  73092. adox r10, rcx
  73093. ; No store %rbx - %r9
  73094. mov QWORD PTR [r8], r12
  73095. mov r11, QWORD PTR [r8+16]
  73096. ; Zero into %r10
  73097. ; A[14] x A[3]
  73098. mov rdx, QWORD PTR [r9+24]
  73099. mulx rcx, rax, QWORD PTR [r9+112]
  73100. adcx r10, rax
  73101. adox r11, rcx
  73102. ; A[14] x A[4]
  73103. mov rdx, QWORD PTR [r9+32]
  73104. mulx r12, rax, QWORD PTR [r9+112]
  73105. adcx r11, rax
  73106. adox r12, r13
  73107. mov QWORD PTR [r8+8], r10
  73108. mov QWORD PTR [r8+16], r11
  73109. ; Zero into %r8
  73110. ; Zero into %r9
  73111. ; A[14] x A[5]
  73112. mov rdx, QWORD PTR [r9+40]
  73113. mulx r10, rax, QWORD PTR [r9+112]
  73114. adcx r12, rax
  73115. adox r10, r13
  73116. mov QWORD PTR [r8+24], r12
  73117. ; Carry
  73118. adcx r10, r14
  73119. mov r14, r13
  73120. adcx r14, r13
  73121. adox r14, r13
  73122. mov QWORD PTR [r8+32], r10
  73123. ; Diagonal 4
  73124. mov r10, QWORD PTR [rbp+56]
  73125. mov r11, QWORD PTR [rbp+64]
  73126. mov r12, QWORD PTR [rbp+72]
  73127. ; A[4] x A[3]
  73128. mov rdx, QWORD PTR [r9+24]
  73129. mulx rcx, rax, QWORD PTR [r9+32]
  73130. adcx r10, rax
  73131. adox r11, rcx
  73132. ; A[5] x A[3]
  73133. mulx rcx, rax, QWORD PTR [r9+40]
  73134. adcx r11, rax
  73135. adox r12, rcx
  73136. mov QWORD PTR [rbp+56], r10
  73137. mov QWORD PTR [rbp+64], r11
  73138. mov r10, QWORD PTR [rbp+80]
  73139. mov r11, QWORD PTR [rbp+88]
  73140. ; A[6] x A[3]
  73141. mulx rcx, rax, QWORD PTR [r9+48]
  73142. adcx r12, rax
  73143. adox r10, rcx
  73144. ; A[7] x A[3]
  73145. mulx rcx, rax, QWORD PTR [r9+56]
  73146. adcx r10, rax
  73147. adox r11, rcx
  73148. mov QWORD PTR [rbp+72], r12
  73149. mov QWORD PTR [rbp+80], r10
  73150. ; No load %r13 - %r10
  73151. ; A[8] x A[3]
  73152. mulx rcx, rax, QWORD PTR [r9+64]
  73153. adcx r11, rax
  73154. adox r15, rcx
  73155. ; A[9] x A[3]
  73156. mulx rcx, rax, QWORD PTR [r9+72]
  73157. adcx r15, rax
  73158. adox rdi, rcx
  73159. mov QWORD PTR [rbp+88], r11
  73160. ; No store %r13 - %r10
  73161. ; No load %r15 - %r9
  73162. ; A[10] x A[3]
  73163. mulx rcx, rax, QWORD PTR [r9+80]
  73164. adcx rdi, rax
  73165. adox rsi, rcx
  73166. ; A[11] x A[3]
  73167. mulx rcx, rax, QWORD PTR [r9+88]
  73168. adcx rsi, rax
  73169. adox rbx, rcx
  73170. ; No store %r14 - %r8
  73171. ; No store %r15 - %r9
  73172. mov r10, QWORD PTR [r8]
  73173. mov r11, QWORD PTR [r8+8]
  73174. ; A[12] x A[3]
  73175. mulx rcx, rax, QWORD PTR [r9+96]
  73176. adcx rbx, rax
  73177. adox r10, rcx
  73178. ; A[13] x A[3]
  73179. mulx rcx, rax, QWORD PTR [r9+104]
  73180. adcx r10, rax
  73181. adox r11, rcx
  73182. ; No store %rbx - %r10
  73183. mov QWORD PTR [r8], r10
  73184. mov r12, QWORD PTR [r8+16]
  73185. mov r10, QWORD PTR [r8+24]
  73186. ; A[13] x A[4]
  73187. mov rdx, QWORD PTR [r9+32]
  73188. mulx rcx, rax, QWORD PTR [r9+104]
  73189. adcx r11, rax
  73190. adox r12, rcx
  73191. ; A[13] x A[5]
  73192. mov rdx, QWORD PTR [r9+40]
  73193. mulx rcx, rax, QWORD PTR [r9+104]
  73194. adcx r12, rax
  73195. adox r10, rcx
  73196. mov QWORD PTR [r8+8], r11
  73197. mov QWORD PTR [r8+16], r12
  73198. mov r11, QWORD PTR [r8+32]
  73199. ; Zero into %r10
  73200. ; A[13] x A[6]
  73201. mov rdx, QWORD PTR [r9+48]
  73202. mulx rcx, rax, QWORD PTR [r9+104]
  73203. adcx r10, rax
  73204. adox r11, rcx
  73205. ; A[13] x A[7]
  73206. mov rdx, QWORD PTR [r9+56]
  73207. mulx r12, rax, QWORD PTR [r9+104]
  73208. adcx r11, rax
  73209. adox r12, r13
  73210. mov QWORD PTR [r8+24], r10
  73211. mov QWORD PTR [r8+32], r11
  73212. ; Zero into %r8
  73213. ; Zero into %r9
  73214. ; A[13] x A[8]
  73215. mov rdx, QWORD PTR [r9+64]
  73216. mulx r10, rax, QWORD PTR [r9+104]
  73217. adcx r12, rax
  73218. adox r10, r13
  73219. mov QWORD PTR [r8+40], r12
  73220. ; Carry
  73221. adcx r10, r14
  73222. mov r14, r13
  73223. adcx r14, r13
  73224. adox r14, r13
  73225. mov QWORD PTR [r8+48], r10
  73226. ; Diagonal 5
  73227. mov r10, QWORD PTR [rbp+72]
  73228. mov r11, QWORD PTR [rbp+80]
  73229. mov r12, QWORD PTR [rbp+88]
  73230. ; A[5] x A[4]
  73231. mov rdx, QWORD PTR [r9+32]
  73232. mulx rcx, rax, QWORD PTR [r9+40]
  73233. adcx r10, rax
  73234. adox r11, rcx
  73235. ; A[6] x A[4]
  73236. mulx rcx, rax, QWORD PTR [r9+48]
  73237. adcx r11, rax
  73238. adox r12, rcx
  73239. mov QWORD PTR [rbp+72], r10
  73240. mov QWORD PTR [rbp+80], r11
  73241. ; No load %r13 - %r8
  73242. ; A[7] x A[4]
  73243. mulx rcx, rax, QWORD PTR [r9+56]
  73244. adcx r12, rax
  73245. adox r15, rcx
  73246. ; A[8] x A[4]
  73247. mulx rcx, rax, QWORD PTR [r9+64]
  73248. adcx r15, rax
  73249. adox rdi, rcx
  73250. mov QWORD PTR [rbp+88], r12
  73251. ; No store %r13 - %r8
  73252. ; No load %r15 - %r10
  73253. ; A[9] x A[4]
  73254. mulx rcx, rax, QWORD PTR [r9+72]
  73255. adcx rdi, rax
  73256. adox rsi, rcx
  73257. ; A[10] x A[4]
  73258. mulx rcx, rax, QWORD PTR [r9+80]
  73259. adcx rsi, rax
  73260. adox rbx, rcx
  73261. ; No store %r14 - %r9
  73262. ; No store %r15 - %r10
  73263. mov r11, QWORD PTR [r8]
  73264. mov r12, QWORD PTR [r8+8]
  73265. ; A[11] x A[4]
  73266. mulx rcx, rax, QWORD PTR [r9+88]
  73267. adcx rbx, rax
  73268. adox r11, rcx
  73269. ; A[12] x A[4]
  73270. mulx rcx, rax, QWORD PTR [r9+96]
  73271. adcx r11, rax
  73272. adox r12, rcx
  73273. ; No store %rbx - %r8
  73274. mov QWORD PTR [r8], r11
  73275. mov r10, QWORD PTR [r8+16]
  73276. mov r11, QWORD PTR [r8+24]
  73277. ; A[12] x A[5]
  73278. mov rdx, QWORD PTR [r9+40]
  73279. mulx rcx, rax, QWORD PTR [r9+96]
  73280. adcx r12, rax
  73281. adox r10, rcx
  73282. ; A[12] x A[6]
  73283. mov rdx, QWORD PTR [r9+48]
  73284. mulx rcx, rax, QWORD PTR [r9+96]
  73285. adcx r10, rax
  73286. adox r11, rcx
  73287. mov QWORD PTR [r8+8], r12
  73288. mov QWORD PTR [r8+16], r10
  73289. mov r12, QWORD PTR [r8+32]
  73290. mov r10, QWORD PTR [r8+40]
  73291. ; A[12] x A[7]
  73292. mov rdx, QWORD PTR [r9+56]
  73293. mulx rcx, rax, QWORD PTR [r9+96]
  73294. adcx r11, rax
  73295. adox r12, rcx
  73296. ; A[12] x A[8]
  73297. mov rdx, QWORD PTR [r9+64]
  73298. mulx rcx, rax, QWORD PTR [r9+96]
  73299. adcx r12, rax
  73300. adox r10, rcx
  73301. mov QWORD PTR [r8+24], r11
  73302. mov QWORD PTR [r8+32], r12
  73303. mov r11, QWORD PTR [r8+48]
  73304. ; Zero into %r10
  73305. ; A[12] x A[9]
  73306. mov rdx, QWORD PTR [r9+72]
  73307. mulx rcx, rax, QWORD PTR [r9+96]
  73308. adcx r10, rax
  73309. adox r11, rcx
  73310. ; A[12] x A[10]
  73311. mov rdx, QWORD PTR [r9+80]
  73312. mulx r12, rax, QWORD PTR [r9+96]
  73313. adcx r11, rax
  73314. adox r12, r13
  73315. mov QWORD PTR [r8+40], r10
  73316. mov QWORD PTR [r8+48], r11
  73317. ; Zero into %r8
  73318. ; Zero into %r9
  73319. ; A[12] x A[11]
  73320. mov rdx, QWORD PTR [r9+88]
  73321. mulx r10, rax, QWORD PTR [r9+96]
  73322. adcx r12, rax
  73323. adox r10, r13
  73324. mov QWORD PTR [r8+56], r12
  73325. ; Carry
  73326. adcx r10, r14
  73327. mov r14, r13
  73328. adcx r14, r13
  73329. adox r14, r13
  73330. mov QWORD PTR [r8+64], r10
  73331. ; Diagonal 6
  73332. mov r10, QWORD PTR [rbp+88]
  73333. ; No load %r13 - %r9
  73334. ; A[6] x A[5]
  73335. mov rdx, QWORD PTR [r9+40]
  73336. mulx rcx, rax, QWORD PTR [r9+48]
  73337. adcx r10, rax
  73338. adox r15, rcx
  73339. ; A[7] x A[5]
  73340. mulx rcx, rax, QWORD PTR [r9+56]
  73341. adcx r15, rax
  73342. adox rdi, rcx
  73343. mov QWORD PTR [rbp+88], r10
  73344. ; No store %r13 - %r9
  73345. ; No load %r15 - %r8
  73346. ; A[8] x A[5]
  73347. mulx rcx, rax, QWORD PTR [r9+64]
  73348. adcx rdi, rax
  73349. adox rsi, rcx
  73350. ; A[9] x A[5]
  73351. mulx rcx, rax, QWORD PTR [r9+72]
  73352. adcx rsi, rax
  73353. adox rbx, rcx
  73354. ; No store %r14 - %r10
  73355. ; No store %r15 - %r8
  73356. mov r12, QWORD PTR [r8]
  73357. mov r10, QWORD PTR [r8+8]
  73358. ; A[10] x A[5]
  73359. mulx rcx, rax, QWORD PTR [r9+80]
  73360. adcx rbx, rax
  73361. adox r12, rcx
  73362. ; A[11] x A[5]
  73363. mulx rcx, rax, QWORD PTR [r9+88]
  73364. adcx r12, rax
  73365. adox r10, rcx
  73366. ; No store %rbx - %r9
  73367. mov QWORD PTR [r8], r12
  73368. mov r11, QWORD PTR [r8+16]
  73369. mov r12, QWORD PTR [r8+24]
  73370. ; A[11] x A[6]
  73371. mov rdx, QWORD PTR [r9+48]
  73372. mulx rcx, rax, QWORD PTR [r9+88]
  73373. adcx r10, rax
  73374. adox r11, rcx
  73375. ; A[11] x A[7]
  73376. mov rdx, QWORD PTR [r9+56]
  73377. mulx rcx, rax, QWORD PTR [r9+88]
  73378. adcx r11, rax
  73379. adox r12, rcx
  73380. mov QWORD PTR [r8+8], r10
  73381. mov QWORD PTR [r8+16], r11
  73382. mov r10, QWORD PTR [r8+32]
  73383. mov r11, QWORD PTR [r8+40]
  73384. ; A[11] x A[8]
  73385. mov rdx, QWORD PTR [r9+64]
  73386. mulx rcx, rax, QWORD PTR [r9+88]
  73387. adcx r12, rax
  73388. adox r10, rcx
  73389. ; A[11] x A[9]
  73390. mov rdx, QWORD PTR [r9+72]
  73391. mulx rcx, rax, QWORD PTR [r9+88]
  73392. adcx r10, rax
  73393. adox r11, rcx
  73394. mov QWORD PTR [r8+24], r12
  73395. mov QWORD PTR [r8+32], r10
  73396. mov r12, QWORD PTR [r8+48]
  73397. mov r10, QWORD PTR [r8+56]
  73398. ; A[11] x A[10]
  73399. mov rdx, QWORD PTR [r9+80]
  73400. mulx rcx, rax, QWORD PTR [r9+88]
  73401. adcx r11, rax
  73402. adox r12, rcx
  73403. ; A[13] x A[9]
  73404. mov rdx, QWORD PTR [r9+72]
  73405. mulx rcx, rax, QWORD PTR [r9+104]
  73406. adcx r12, rax
  73407. adox r10, rcx
  73408. mov QWORD PTR [r8+40], r11
  73409. mov QWORD PTR [r8+48], r12
  73410. mov r11, QWORD PTR [r8+64]
  73411. ; Zero into %r10
  73412. ; A[13] x A[10]
  73413. mov rdx, QWORD PTR [r9+80]
  73414. mulx rcx, rax, QWORD PTR [r9+104]
  73415. adcx r10, rax
  73416. adox r11, rcx
  73417. ; A[13] x A[11]
  73418. mov rdx, QWORD PTR [r9+88]
  73419. mulx r12, rax, QWORD PTR [r9+104]
  73420. adcx r11, rax
  73421. adox r12, r13
  73422. mov QWORD PTR [r8+56], r10
  73423. mov QWORD PTR [r8+64], r11
  73424. ; Zero into %r8
  73425. ; Zero into %r9
  73426. ; A[13] x A[12]
  73427. mov rdx, QWORD PTR [r9+96]
  73428. mulx r10, rax, QWORD PTR [r9+104]
  73429. adcx r12, rax
  73430. adox r10, r13
  73431. mov QWORD PTR [r8+72], r12
  73432. ; Carry
  73433. adcx r10, r14
  73434. mov r14, r13
  73435. adcx r14, r13
  73436. adox r14, r13
  73437. mov QWORD PTR [r8+80], r10
  73438. ; Diagonal 7
  73439. ; No load %r15 - %r9
  73440. ; A[7] x A[6]
  73441. mov rdx, QWORD PTR [r9+48]
  73442. mulx rcx, rax, QWORD PTR [r9+56]
  73443. adcx rdi, rax
  73444. adox rsi, rcx
  73445. ; A[8] x A[6]
  73446. mulx rcx, rax, QWORD PTR [r9+64]
  73447. adcx rsi, rax
  73448. adox rbx, rcx
  73449. ; No store %r14 - %r8
  73450. ; No store %r15 - %r9
  73451. mov r10, QWORD PTR [r8]
  73452. mov r11, QWORD PTR [r8+8]
  73453. ; A[9] x A[6]
  73454. mulx rcx, rax, QWORD PTR [r9+72]
  73455. adcx rbx, rax
  73456. adox r10, rcx
  73457. ; A[10] x A[6]
  73458. mulx rcx, rax, QWORD PTR [r9+80]
  73459. adcx r10, rax
  73460. adox r11, rcx
  73461. ; No store %rbx - %r10
  73462. mov QWORD PTR [r8], r10
  73463. mov r12, QWORD PTR [r8+16]
  73464. mov r10, QWORD PTR [r8+24]
  73465. ; A[10] x A[7]
  73466. mov rdx, QWORD PTR [r9+56]
  73467. mulx rcx, rax, QWORD PTR [r9+80]
  73468. adcx r11, rax
  73469. adox r12, rcx
  73470. ; A[10] x A[8]
  73471. mov rdx, QWORD PTR [r9+64]
  73472. mulx rcx, rax, QWORD PTR [r9+80]
  73473. adcx r12, rax
  73474. adox r10, rcx
  73475. mov QWORD PTR [r8+8], r11
  73476. mov QWORD PTR [r8+16], r12
  73477. mov r11, QWORD PTR [r8+32]
  73478. mov r12, QWORD PTR [r8+40]
  73479. ; A[10] x A[9]
  73480. mov rdx, QWORD PTR [r9+72]
  73481. mulx rcx, rax, QWORD PTR [r9+80]
  73482. adcx r10, rax
  73483. adox r11, rcx
  73484. ; A[14] x A[6]
  73485. mov rdx, QWORD PTR [r9+48]
  73486. mulx rcx, rax, QWORD PTR [r9+112]
  73487. adcx r11, rax
  73488. adox r12, rcx
  73489. mov QWORD PTR [r8+24], r10
  73490. mov QWORD PTR [r8+32], r11
  73491. mov r10, QWORD PTR [r8+48]
  73492. mov r11, QWORD PTR [r8+56]
  73493. ; A[14] x A[7]
  73494. mov rdx, QWORD PTR [r9+56]
  73495. mulx rcx, rax, QWORD PTR [r9+112]
  73496. adcx r12, rax
  73497. adox r10, rcx
  73498. ; A[14] x A[8]
  73499. mov rdx, QWORD PTR [r9+64]
  73500. mulx rcx, rax, QWORD PTR [r9+112]
  73501. adcx r10, rax
  73502. adox r11, rcx
  73503. mov QWORD PTR [r8+40], r12
  73504. mov QWORD PTR [r8+48], r10
  73505. mov r12, QWORD PTR [r8+64]
  73506. mov r10, QWORD PTR [r8+72]
  73507. ; A[14] x A[9]
  73508. mov rdx, QWORD PTR [r9+72]
  73509. mulx rcx, rax, QWORD PTR [r9+112]
  73510. adcx r11, rax
  73511. adox r12, rcx
  73512. ; A[14] x A[10]
  73513. mov rdx, QWORD PTR [r9+80]
  73514. mulx rcx, rax, QWORD PTR [r9+112]
  73515. adcx r12, rax
  73516. adox r10, rcx
  73517. mov QWORD PTR [r8+56], r11
  73518. mov QWORD PTR [r8+64], r12
  73519. mov r11, QWORD PTR [r8+80]
  73520. ; Zero into %r10
  73521. ; A[14] x A[11]
  73522. mov rdx, QWORD PTR [r9+88]
  73523. mulx rcx, rax, QWORD PTR [r9+112]
  73524. adcx r10, rax
  73525. adox r11, rcx
  73526. ; A[14] x A[12]
  73527. mov rdx, QWORD PTR [r9+96]
  73528. mulx r12, rax, QWORD PTR [r9+112]
  73529. adcx r11, rax
  73530. adox r12, r13
  73531. mov QWORD PTR [r8+72], r10
  73532. mov QWORD PTR [r8+80], r11
  73533. ; Zero into %r8
  73534. ; Zero into %r9
  73535. ; A[14] x A[13]
  73536. mov rdx, QWORD PTR [r9+104]
  73537. mulx r10, rax, QWORD PTR [r9+112]
  73538. adcx r12, rax
  73539. adox r10, r13
  73540. mov QWORD PTR [r8+88], r12
  73541. ; Carry
  73542. adcx r10, r14
  73543. mov r14, r13
  73544. adcx r14, r13
  73545. adox r14, r13
  73546. mov QWORD PTR [r8+96], r10
  73547. ; Diagonal 8
  73548. mov r11, QWORD PTR [r8]
  73549. mov r12, QWORD PTR [r8+8]
  73550. ; A[8] x A[7]
  73551. mov rdx, QWORD PTR [r9+56]
  73552. mulx rcx, rax, QWORD PTR [r9+64]
  73553. adcx rbx, rax
  73554. adox r11, rcx
  73555. ; A[9] x A[7]
  73556. mulx rcx, rax, QWORD PTR [r9+72]
  73557. adcx r11, rax
  73558. adox r12, rcx
  73559. ; No store %rbx - %r8
  73560. mov QWORD PTR [r8], r11
  73561. mov r10, QWORD PTR [r8+16]
  73562. mov r11, QWORD PTR [r8+24]
  73563. ; A[9] x A[8]
  73564. mov rdx, QWORD PTR [r9+64]
  73565. mulx rcx, rax, QWORD PTR [r9+72]
  73566. adcx r12, rax
  73567. adox r10, rcx
  73568. ; A[15] x A[3]
  73569. mov rdx, QWORD PTR [r9+24]
  73570. mulx rcx, rax, QWORD PTR [r9+120]
  73571. adcx r10, rax
  73572. adox r11, rcx
  73573. mov QWORD PTR [r8+8], r12
  73574. mov QWORD PTR [r8+16], r10
  73575. mov r12, QWORD PTR [r8+32]
  73576. mov r10, QWORD PTR [r8+40]
  73577. ; A[15] x A[4]
  73578. mov rdx, QWORD PTR [r9+32]
  73579. mulx rcx, rax, QWORD PTR [r9+120]
  73580. adcx r11, rax
  73581. adox r12, rcx
  73582. ; A[15] x A[5]
  73583. mov rdx, QWORD PTR [r9+40]
  73584. mulx rcx, rax, QWORD PTR [r9+120]
  73585. adcx r12, rax
  73586. adox r10, rcx
  73587. mov QWORD PTR [r8+24], r11
  73588. mov QWORD PTR [r8+32], r12
  73589. mov r11, QWORD PTR [r8+48]
  73590. mov r12, QWORD PTR [r8+56]
  73591. ; A[15] x A[6]
  73592. mov rdx, QWORD PTR [r9+48]
  73593. mulx rcx, rax, QWORD PTR [r9+120]
  73594. adcx r10, rax
  73595. adox r11, rcx
  73596. ; A[15] x A[7]
  73597. mov rdx, QWORD PTR [r9+56]
  73598. mulx rcx, rax, QWORD PTR [r9+120]
  73599. adcx r11, rax
  73600. adox r12, rcx
  73601. mov QWORD PTR [r8+40], r10
  73602. mov QWORD PTR [r8+48], r11
  73603. mov r10, QWORD PTR [r8+64]
  73604. mov r11, QWORD PTR [r8+72]
  73605. ; A[15] x A[8]
  73606. mov rdx, QWORD PTR [r9+64]
  73607. mulx rcx, rax, QWORD PTR [r9+120]
  73608. adcx r12, rax
  73609. adox r10, rcx
  73610. ; A[15] x A[9]
  73611. mov rdx, QWORD PTR [r9+72]
  73612. mulx rcx, rax, QWORD PTR [r9+120]
  73613. adcx r10, rax
  73614. adox r11, rcx
  73615. mov QWORD PTR [r8+56], r12
  73616. mov QWORD PTR [r8+64], r10
  73617. mov r12, QWORD PTR [r8+80]
  73618. mov r10, QWORD PTR [r8+88]
  73619. ; A[15] x A[10]
  73620. mov rdx, QWORD PTR [r9+80]
  73621. mulx rcx, rax, QWORD PTR [r9+120]
  73622. adcx r11, rax
  73623. adox r12, rcx
  73624. ; A[15] x A[11]
  73625. mov rdx, QWORD PTR [r9+88]
  73626. mulx rcx, rax, QWORD PTR [r9+120]
  73627. adcx r12, rax
  73628. adox r10, rcx
  73629. mov QWORD PTR [r8+72], r11
  73630. mov QWORD PTR [r8+80], r12
  73631. mov r11, QWORD PTR [r8+96]
  73632. ; Zero into %r10
  73633. ; A[15] x A[12]
  73634. mov rdx, QWORD PTR [r9+96]
  73635. mulx rcx, rax, QWORD PTR [r9+120]
  73636. adcx r10, rax
  73637. adox r11, rcx
  73638. ; A[15] x A[13]
  73639. mov rdx, QWORD PTR [r9+104]
  73640. mulx r12, rax, QWORD PTR [r9+120]
  73641. adcx r11, rax
  73642. adox r12, r13
  73643. mov QWORD PTR [r8+88], r10
  73644. mov QWORD PTR [r8+96], r11
  73645. ; Zero into %r8
  73646. ; Zero into %r9
  73647. ; A[15] x A[14]
  73648. mov rdx, QWORD PTR [r9+112]
  73649. mulx r10, rax, QWORD PTR [r9+120]
  73650. adcx r12, rax
  73651. adox r10, r13
  73652. mov QWORD PTR [r8+104], r12
  73653. ; Carry
  73654. adcx r10, r14
  73655. mov r14, r13
  73656. adcx r14, r13
  73657. adox r14, r13
  73658. mov QWORD PTR [r8+112], r10
  73659. mov QWORD PTR [r8+120], r14
  73660. ; Double and Add in A[i] x A[i]
  73661. mov r11, QWORD PTR [rbp+8]
  73662. ; A[0] x A[0]
  73663. mov rdx, QWORD PTR [r9]
  73664. mulx rcx, rax, rdx
  73665. mov QWORD PTR [rbp], rax
  73666. adox r11, r11
  73667. adcx r11, rcx
  73668. mov QWORD PTR [rbp+8], r11
  73669. mov r10, QWORD PTR [rbp+16]
  73670. mov r11, QWORD PTR [rbp+24]
  73671. ; A[1] x A[1]
  73672. mov rdx, QWORD PTR [r9+8]
  73673. mulx rcx, rax, rdx
  73674. adox r10, r10
  73675. adox r11, r11
  73676. adcx r10, rax
  73677. adcx r11, rcx
  73678. mov QWORD PTR [rbp+16], r10
  73679. mov QWORD PTR [rbp+24], r11
  73680. mov r10, QWORD PTR [rbp+32]
  73681. mov r11, QWORD PTR [rbp+40]
  73682. ; A[2] x A[2]
  73683. mov rdx, QWORD PTR [r9+16]
  73684. mulx rcx, rax, rdx
  73685. adox r10, r10
  73686. adox r11, r11
  73687. adcx r10, rax
  73688. adcx r11, rcx
  73689. mov QWORD PTR [rbp+32], r10
  73690. mov QWORD PTR [rbp+40], r11
  73691. mov r10, QWORD PTR [rbp+48]
  73692. mov r11, QWORD PTR [rbp+56]
  73693. ; A[3] x A[3]
  73694. mov rdx, QWORD PTR [r9+24]
  73695. mulx rcx, rax, rdx
  73696. adox r10, r10
  73697. adox r11, r11
  73698. adcx r10, rax
  73699. adcx r11, rcx
  73700. mov QWORD PTR [rbp+48], r10
  73701. mov QWORD PTR [rbp+56], r11
  73702. mov r10, QWORD PTR [rbp+64]
  73703. mov r11, QWORD PTR [rbp+72]
  73704. ; A[4] x A[4]
  73705. mov rdx, QWORD PTR [r9+32]
  73706. mulx rcx, rax, rdx
  73707. adox r10, r10
  73708. adox r11, r11
  73709. adcx r10, rax
  73710. adcx r11, rcx
  73711. mov QWORD PTR [rbp+64], r10
  73712. mov QWORD PTR [rbp+72], r11
  73713. mov r10, QWORD PTR [rbp+80]
  73714. mov r11, QWORD PTR [rbp+88]
  73715. ; A[5] x A[5]
  73716. mov rdx, QWORD PTR [r9+40]
  73717. mulx rcx, rax, rdx
  73718. adox r10, r10
  73719. adox r11, r11
  73720. adcx r10, rax
  73721. adcx r11, rcx
  73722. mov QWORD PTR [rbp+80], r10
  73723. mov QWORD PTR [rbp+88], r11
  73724. ; A[6] x A[6]
  73725. mov rdx, QWORD PTR [r9+48]
  73726. mulx rcx, rax, rdx
  73727. adox r15, r15
  73728. adox rdi, rdi
  73729. adcx r15, rax
  73730. adcx rdi, rcx
  73731. ; A[7] x A[7]
  73732. mov rdx, QWORD PTR [r9+56]
  73733. mulx rcx, rax, rdx
  73734. adox rsi, rsi
  73735. adox rbx, rbx
  73736. adcx rsi, rax
  73737. adcx rbx, rcx
  73738. mov r10, QWORD PTR [r8]
  73739. mov r11, QWORD PTR [r8+8]
  73740. ; A[8] x A[8]
  73741. mov rdx, QWORD PTR [r9+64]
  73742. mulx rcx, rax, rdx
  73743. adox r10, r10
  73744. adox r11, r11
  73745. adcx r10, rax
  73746. adcx r11, rcx
  73747. mov QWORD PTR [r8], r10
  73748. mov QWORD PTR [r8+8], r11
  73749. mov r10, QWORD PTR [r8+16]
  73750. mov r11, QWORD PTR [r8+24]
  73751. ; A[9] x A[9]
  73752. mov rdx, QWORD PTR [r9+72]
  73753. mulx rcx, rax, rdx
  73754. adox r10, r10
  73755. adox r11, r11
  73756. adcx r10, rax
  73757. adcx r11, rcx
  73758. mov QWORD PTR [r8+16], r10
  73759. mov QWORD PTR [r8+24], r11
  73760. mov r10, QWORD PTR [r8+32]
  73761. mov r11, QWORD PTR [r8+40]
  73762. ; A[10] x A[10]
  73763. mov rdx, QWORD PTR [r9+80]
  73764. mulx rcx, rax, rdx
  73765. adox r10, r10
  73766. adox r11, r11
  73767. adcx r10, rax
  73768. adcx r11, rcx
  73769. mov QWORD PTR [r8+32], r10
  73770. mov QWORD PTR [r8+40], r11
  73771. mov r10, QWORD PTR [r8+48]
  73772. mov r11, QWORD PTR [r8+56]
  73773. ; A[11] x A[11]
  73774. mov rdx, QWORD PTR [r9+88]
  73775. mulx rcx, rax, rdx
  73776. adox r10, r10
  73777. adox r11, r11
  73778. adcx r10, rax
  73779. adcx r11, rcx
  73780. mov QWORD PTR [r8+48], r10
  73781. mov QWORD PTR [r8+56], r11
  73782. mov r10, QWORD PTR [r8+64]
  73783. mov r11, QWORD PTR [r8+72]
  73784. ; A[12] x A[12]
  73785. mov rdx, QWORD PTR [r9+96]
  73786. mulx rcx, rax, rdx
  73787. adox r10, r10
  73788. adox r11, r11
  73789. adcx r10, rax
  73790. adcx r11, rcx
  73791. mov QWORD PTR [r8+64], r10
  73792. mov QWORD PTR [r8+72], r11
  73793. mov r10, QWORD PTR [r8+80]
  73794. mov r11, QWORD PTR [r8+88]
  73795. ; A[13] x A[13]
  73796. mov rdx, QWORD PTR [r9+104]
  73797. mulx rcx, rax, rdx
  73798. adox r10, r10
  73799. adox r11, r11
  73800. adcx r10, rax
  73801. adcx r11, rcx
  73802. mov QWORD PTR [r8+80], r10
  73803. mov QWORD PTR [r8+88], r11
  73804. mov r10, QWORD PTR [r8+96]
  73805. mov r11, QWORD PTR [r8+104]
  73806. ; A[14] x A[14]
  73807. mov rdx, QWORD PTR [r9+112]
  73808. mulx rcx, rax, rdx
  73809. adox r10, r10
  73810. adox r11, r11
  73811. adcx r10, rax
  73812. adcx r11, rcx
  73813. mov QWORD PTR [r8+96], r10
  73814. mov QWORD PTR [r8+104], r11
  73815. mov r10, QWORD PTR [r8+112]
  73816. mov r11, QWORD PTR [r8+120]
  73817. ; A[15] x A[15]
  73818. mov rdx, QWORD PTR [r9+120]
  73819. mulx rcx, rax, rdx
  73820. adox r10, r10
  73821. adox r11, r11
  73822. adcx r10, rax
  73823. adcx r11, rcx
  73824. mov QWORD PTR [r8+112], r10
  73825. mov QWORD PTR [r8+120], r11
  73826. mov QWORD PTR [r8+-32], r15
  73827. mov QWORD PTR [r8+-24], rdi
  73828. mov QWORD PTR [r8+-16], rsi
  73829. mov QWORD PTR [r8+-8], rbx
  73830. sub r8, 128
  73831. cmp r9, r8
  73832. jne L_end_1024_sqr_avx2_16
  73833. vmovdqu xmm0, OWORD PTR [rbp]
  73834. vmovups OWORD PTR [r8], xmm0
  73835. vmovdqu xmm0, OWORD PTR [rbp+16]
  73836. vmovups OWORD PTR [r8+16], xmm0
  73837. vmovdqu xmm0, OWORD PTR [rbp+32]
  73838. vmovups OWORD PTR [r8+32], xmm0
  73839. vmovdqu xmm0, OWORD PTR [rbp+48]
  73840. vmovups OWORD PTR [r8+48], xmm0
  73841. vmovdqu xmm0, OWORD PTR [rbp+64]
  73842. vmovups OWORD PTR [r8+64], xmm0
  73843. vmovdqu xmm0, OWORD PTR [rbp+80]
  73844. vmovups OWORD PTR [r8+80], xmm0
  73845. L_end_1024_sqr_avx2_16:
  73846. add rsp, 128
  73847. pop rbx
  73848. pop rsi
  73849. pop rdi
  73850. pop r15
  73851. pop r14
  73852. pop r13
  73853. pop r12
  73854. pop rbp
  73855. ret
  73856. sp_1024_sqr_avx2_16 ENDP
  73857. _text ENDS
  73858. ENDIF
  73859. ; /* Add b to a into r. (r = a + b)
  73860. ; *
  73861. ; * r A single precision integer.
  73862. ; * a A single precision integer.
  73863. ; * b A single precision integer.
  73864. ; */
  73865. _text SEGMENT READONLY PARA
  73866. sp_1024_add_16 PROC
  73867. ; Add
  73868. mov r9, QWORD PTR [rdx]
  73869. xor rax, rax
  73870. add r9, QWORD PTR [r8]
  73871. mov r10, QWORD PTR [rdx+8]
  73872. mov QWORD PTR [rcx], r9
  73873. adc r10, QWORD PTR [r8+8]
  73874. mov r9, QWORD PTR [rdx+16]
  73875. mov QWORD PTR [rcx+8], r10
  73876. adc r9, QWORD PTR [r8+16]
  73877. mov r10, QWORD PTR [rdx+24]
  73878. mov QWORD PTR [rcx+16], r9
  73879. adc r10, QWORD PTR [r8+24]
  73880. mov r9, QWORD PTR [rdx+32]
  73881. mov QWORD PTR [rcx+24], r10
  73882. adc r9, QWORD PTR [r8+32]
  73883. mov r10, QWORD PTR [rdx+40]
  73884. mov QWORD PTR [rcx+32], r9
  73885. adc r10, QWORD PTR [r8+40]
  73886. mov r9, QWORD PTR [rdx+48]
  73887. mov QWORD PTR [rcx+40], r10
  73888. adc r9, QWORD PTR [r8+48]
  73889. mov r10, QWORD PTR [rdx+56]
  73890. mov QWORD PTR [rcx+48], r9
  73891. adc r10, QWORD PTR [r8+56]
  73892. mov r9, QWORD PTR [rdx+64]
  73893. mov QWORD PTR [rcx+56], r10
  73894. adc r9, QWORD PTR [r8+64]
  73895. mov r10, QWORD PTR [rdx+72]
  73896. mov QWORD PTR [rcx+64], r9
  73897. adc r10, QWORD PTR [r8+72]
  73898. mov r9, QWORD PTR [rdx+80]
  73899. mov QWORD PTR [rcx+72], r10
  73900. adc r9, QWORD PTR [r8+80]
  73901. mov r10, QWORD PTR [rdx+88]
  73902. mov QWORD PTR [rcx+80], r9
  73903. adc r10, QWORD PTR [r8+88]
  73904. mov r9, QWORD PTR [rdx+96]
  73905. mov QWORD PTR [rcx+88], r10
  73906. adc r9, QWORD PTR [r8+96]
  73907. mov r10, QWORD PTR [rdx+104]
  73908. mov QWORD PTR [rcx+96], r9
  73909. adc r10, QWORD PTR [r8+104]
  73910. mov r9, QWORD PTR [rdx+112]
  73911. mov QWORD PTR [rcx+104], r10
  73912. adc r9, QWORD PTR [r8+112]
  73913. mov r10, QWORD PTR [rdx+120]
  73914. mov QWORD PTR [rcx+112], r9
  73915. adc r10, QWORD PTR [r8+120]
  73916. mov QWORD PTR [rcx+120], r10
  73917. adc rax, 0
  73918. ret
  73919. sp_1024_add_16 ENDP
  73920. _text ENDS
  73921. ; /* Sub b from a into a. (a -= b)
  73922. ; *
  73923. ; * a A single precision integer and result.
  73924. ; * b A single precision integer.
  73925. ; */
  73926. _text SEGMENT READONLY PARA
  73927. sp_1024_sub_in_place_16 PROC
  73928. mov r8, QWORD PTR [rcx]
  73929. sub r8, QWORD PTR [rdx]
  73930. mov r9, QWORD PTR [rcx+8]
  73931. mov QWORD PTR [rcx], r8
  73932. sbb r9, QWORD PTR [rdx+8]
  73933. mov r8, QWORD PTR [rcx+16]
  73934. mov QWORD PTR [rcx+8], r9
  73935. sbb r8, QWORD PTR [rdx+16]
  73936. mov r9, QWORD PTR [rcx+24]
  73937. mov QWORD PTR [rcx+16], r8
  73938. sbb r9, QWORD PTR [rdx+24]
  73939. mov r8, QWORD PTR [rcx+32]
  73940. mov QWORD PTR [rcx+24], r9
  73941. sbb r8, QWORD PTR [rdx+32]
  73942. mov r9, QWORD PTR [rcx+40]
  73943. mov QWORD PTR [rcx+32], r8
  73944. sbb r9, QWORD PTR [rdx+40]
  73945. mov r8, QWORD PTR [rcx+48]
  73946. mov QWORD PTR [rcx+40], r9
  73947. sbb r8, QWORD PTR [rdx+48]
  73948. mov r9, QWORD PTR [rcx+56]
  73949. mov QWORD PTR [rcx+48], r8
  73950. sbb r9, QWORD PTR [rdx+56]
  73951. mov r8, QWORD PTR [rcx+64]
  73952. mov QWORD PTR [rcx+56], r9
  73953. sbb r8, QWORD PTR [rdx+64]
  73954. mov r9, QWORD PTR [rcx+72]
  73955. mov QWORD PTR [rcx+64], r8
  73956. sbb r9, QWORD PTR [rdx+72]
  73957. mov r8, QWORD PTR [rcx+80]
  73958. mov QWORD PTR [rcx+72], r9
  73959. sbb r8, QWORD PTR [rdx+80]
  73960. mov r9, QWORD PTR [rcx+88]
  73961. mov QWORD PTR [rcx+80], r8
  73962. sbb r9, QWORD PTR [rdx+88]
  73963. mov r8, QWORD PTR [rcx+96]
  73964. mov QWORD PTR [rcx+88], r9
  73965. sbb r8, QWORD PTR [rdx+96]
  73966. mov r9, QWORD PTR [rcx+104]
  73967. mov QWORD PTR [rcx+96], r8
  73968. sbb r9, QWORD PTR [rdx+104]
  73969. mov r8, QWORD PTR [rcx+112]
  73970. mov QWORD PTR [rcx+104], r9
  73971. sbb r8, QWORD PTR [rdx+112]
  73972. mov r9, QWORD PTR [rcx+120]
  73973. mov QWORD PTR [rcx+112], r8
  73974. sbb r9, QWORD PTR [rdx+120]
  73975. mov QWORD PTR [rcx+120], r9
  73976. sbb rax, rax
  73977. ret
  73978. sp_1024_sub_in_place_16 ENDP
  73979. _text ENDS
  73980. ; /* Conditionally subtract b from a using the mask m.
  73981. ; * m is -1 to subtract and 0 when not copying.
  73982. ; *
  73983. ; * r A single precision number representing condition subtract result.
  73984. ; * a A single precision number to subtract from.
  73985. ; * b A single precision number to subtract.
  73986. ; * m Mask value to apply.
  73987. ; */
  73988. _text SEGMENT READONLY PARA
  73989. sp_1024_cond_sub_16 PROC
  73990. sub rsp, 128
  73991. mov r10, QWORD PTR [r8]
  73992. mov r11, QWORD PTR [r8+8]
  73993. and r10, r9
  73994. and r11, r9
  73995. mov QWORD PTR [rsp], r10
  73996. mov QWORD PTR [rsp+8], r11
  73997. mov r10, QWORD PTR [r8+16]
  73998. mov r11, QWORD PTR [r8+24]
  73999. and r10, r9
  74000. and r11, r9
  74001. mov QWORD PTR [rsp+16], r10
  74002. mov QWORD PTR [rsp+24], r11
  74003. mov r10, QWORD PTR [r8+32]
  74004. mov r11, QWORD PTR [r8+40]
  74005. and r10, r9
  74006. and r11, r9
  74007. mov QWORD PTR [rsp+32], r10
  74008. mov QWORD PTR [rsp+40], r11
  74009. mov r10, QWORD PTR [r8+48]
  74010. mov r11, QWORD PTR [r8+56]
  74011. and r10, r9
  74012. and r11, r9
  74013. mov QWORD PTR [rsp+48], r10
  74014. mov QWORD PTR [rsp+56], r11
  74015. mov r10, QWORD PTR [r8+64]
  74016. mov r11, QWORD PTR [r8+72]
  74017. and r10, r9
  74018. and r11, r9
  74019. mov QWORD PTR [rsp+64], r10
  74020. mov QWORD PTR [rsp+72], r11
  74021. mov r10, QWORD PTR [r8+80]
  74022. mov r11, QWORD PTR [r8+88]
  74023. and r10, r9
  74024. and r11, r9
  74025. mov QWORD PTR [rsp+80], r10
  74026. mov QWORD PTR [rsp+88], r11
  74027. mov r10, QWORD PTR [r8+96]
  74028. mov r11, QWORD PTR [r8+104]
  74029. and r10, r9
  74030. and r11, r9
  74031. mov QWORD PTR [rsp+96], r10
  74032. mov QWORD PTR [rsp+104], r11
  74033. mov r10, QWORD PTR [r8+112]
  74034. mov r11, QWORD PTR [r8+120]
  74035. and r10, r9
  74036. and r11, r9
  74037. mov QWORD PTR [rsp+112], r10
  74038. mov QWORD PTR [rsp+120], r11
  74039. mov r10, QWORD PTR [rdx]
  74040. mov r8, QWORD PTR [rsp]
  74041. sub r10, r8
  74042. mov r11, QWORD PTR [rdx+8]
  74043. mov r8, QWORD PTR [rsp+8]
  74044. sbb r11, r8
  74045. mov QWORD PTR [rcx], r10
  74046. mov r10, QWORD PTR [rdx+16]
  74047. mov r8, QWORD PTR [rsp+16]
  74048. sbb r10, r8
  74049. mov QWORD PTR [rcx+8], r11
  74050. mov r11, QWORD PTR [rdx+24]
  74051. mov r8, QWORD PTR [rsp+24]
  74052. sbb r11, r8
  74053. mov QWORD PTR [rcx+16], r10
  74054. mov r10, QWORD PTR [rdx+32]
  74055. mov r8, QWORD PTR [rsp+32]
  74056. sbb r10, r8
  74057. mov QWORD PTR [rcx+24], r11
  74058. mov r11, QWORD PTR [rdx+40]
  74059. mov r8, QWORD PTR [rsp+40]
  74060. sbb r11, r8
  74061. mov QWORD PTR [rcx+32], r10
  74062. mov r10, QWORD PTR [rdx+48]
  74063. mov r8, QWORD PTR [rsp+48]
  74064. sbb r10, r8
  74065. mov QWORD PTR [rcx+40], r11
  74066. mov r11, QWORD PTR [rdx+56]
  74067. mov r8, QWORD PTR [rsp+56]
  74068. sbb r11, r8
  74069. mov QWORD PTR [rcx+48], r10
  74070. mov r10, QWORD PTR [rdx+64]
  74071. mov r8, QWORD PTR [rsp+64]
  74072. sbb r10, r8
  74073. mov QWORD PTR [rcx+56], r11
  74074. mov r11, QWORD PTR [rdx+72]
  74075. mov r8, QWORD PTR [rsp+72]
  74076. sbb r11, r8
  74077. mov QWORD PTR [rcx+64], r10
  74078. mov r10, QWORD PTR [rdx+80]
  74079. mov r8, QWORD PTR [rsp+80]
  74080. sbb r10, r8
  74081. mov QWORD PTR [rcx+72], r11
  74082. mov r11, QWORD PTR [rdx+88]
  74083. mov r8, QWORD PTR [rsp+88]
  74084. sbb r11, r8
  74085. mov QWORD PTR [rcx+80], r10
  74086. mov r10, QWORD PTR [rdx+96]
  74087. mov r8, QWORD PTR [rsp+96]
  74088. sbb r10, r8
  74089. mov QWORD PTR [rcx+88], r11
  74090. mov r11, QWORD PTR [rdx+104]
  74091. mov r8, QWORD PTR [rsp+104]
  74092. sbb r11, r8
  74093. mov QWORD PTR [rcx+96], r10
  74094. mov r10, QWORD PTR [rdx+112]
  74095. mov r8, QWORD PTR [rsp+112]
  74096. sbb r10, r8
  74097. mov QWORD PTR [rcx+104], r11
  74098. mov r11, QWORD PTR [rdx+120]
  74099. mov r8, QWORD PTR [rsp+120]
  74100. sbb r11, r8
  74101. mov QWORD PTR [rcx+112], r10
  74102. mov QWORD PTR [rcx+120], r11
  74103. sbb rax, rax
  74104. add rsp, 128
  74105. ret
  74106. sp_1024_cond_sub_16 ENDP
  74107. _text ENDS
  74108. IFDEF HAVE_INTEL_AVX2
  74109. ; /* Conditionally subtract b from a using the mask m.
  74110. ; * m is -1 to subtract and 0 when not copying.
  74111. ; *
  74112. ; * r A single precision number representing condition subtract result.
  74113. ; * a A single precision number to subtract from.
  74114. ; * b A single precision number to subtract.
  74115. ; * m Mask value to apply.
  74116. ; */
  74117. _text SEGMENT READONLY PARA
  74118. sp_1024_cond_sub_avx2_16 PROC
  74119. push r12
  74120. mov r12, QWORD PTR [r8]
  74121. mov r10, QWORD PTR [rdx]
  74122. pext r12, r12, r9
  74123. sub r10, r12
  74124. mov r12, QWORD PTR [r8+8]
  74125. mov r11, QWORD PTR [rdx+8]
  74126. pext r12, r12, r9
  74127. mov QWORD PTR [rcx], r10
  74128. sbb r11, r12
  74129. mov r10, QWORD PTR [r8+16]
  74130. mov r12, QWORD PTR [rdx+16]
  74131. pext r10, r10, r9
  74132. mov QWORD PTR [rcx+8], r11
  74133. sbb r12, r10
  74134. mov r11, QWORD PTR [r8+24]
  74135. mov r10, QWORD PTR [rdx+24]
  74136. pext r11, r11, r9
  74137. mov QWORD PTR [rcx+16], r12
  74138. sbb r10, r11
  74139. mov r12, QWORD PTR [r8+32]
  74140. mov r11, QWORD PTR [rdx+32]
  74141. pext r12, r12, r9
  74142. mov QWORD PTR [rcx+24], r10
  74143. sbb r11, r12
  74144. mov r10, QWORD PTR [r8+40]
  74145. mov r12, QWORD PTR [rdx+40]
  74146. pext r10, r10, r9
  74147. mov QWORD PTR [rcx+32], r11
  74148. sbb r12, r10
  74149. mov r11, QWORD PTR [r8+48]
  74150. mov r10, QWORD PTR [rdx+48]
  74151. pext r11, r11, r9
  74152. mov QWORD PTR [rcx+40], r12
  74153. sbb r10, r11
  74154. mov r12, QWORD PTR [r8+56]
  74155. mov r11, QWORD PTR [rdx+56]
  74156. pext r12, r12, r9
  74157. mov QWORD PTR [rcx+48], r10
  74158. sbb r11, r12
  74159. mov r10, QWORD PTR [r8+64]
  74160. mov r12, QWORD PTR [rdx+64]
  74161. pext r10, r10, r9
  74162. mov QWORD PTR [rcx+56], r11
  74163. sbb r12, r10
  74164. mov r11, QWORD PTR [r8+72]
  74165. mov r10, QWORD PTR [rdx+72]
  74166. pext r11, r11, r9
  74167. mov QWORD PTR [rcx+64], r12
  74168. sbb r10, r11
  74169. mov r12, QWORD PTR [r8+80]
  74170. mov r11, QWORD PTR [rdx+80]
  74171. pext r12, r12, r9
  74172. mov QWORD PTR [rcx+72], r10
  74173. sbb r11, r12
  74174. mov r10, QWORD PTR [r8+88]
  74175. mov r12, QWORD PTR [rdx+88]
  74176. pext r10, r10, r9
  74177. mov QWORD PTR [rcx+80], r11
  74178. sbb r12, r10
  74179. mov r11, QWORD PTR [r8+96]
  74180. mov r10, QWORD PTR [rdx+96]
  74181. pext r11, r11, r9
  74182. mov QWORD PTR [rcx+88], r12
  74183. sbb r10, r11
  74184. mov r12, QWORD PTR [r8+104]
  74185. mov r11, QWORD PTR [rdx+104]
  74186. pext r12, r12, r9
  74187. mov QWORD PTR [rcx+96], r10
  74188. sbb r11, r12
  74189. mov r10, QWORD PTR [r8+112]
  74190. mov r12, QWORD PTR [rdx+112]
  74191. pext r10, r10, r9
  74192. mov QWORD PTR [rcx+104], r11
  74193. sbb r12, r10
  74194. mov r11, QWORD PTR [r8+120]
  74195. mov r10, QWORD PTR [rdx+120]
  74196. pext r11, r11, r9
  74197. mov QWORD PTR [rcx+112], r12
  74198. sbb r10, r11
  74199. mov QWORD PTR [rcx+120], r10
  74200. sbb rax, rax
  74201. pop r12
  74202. ret
  74203. sp_1024_cond_sub_avx2_16 ENDP
  74204. _text ENDS
  74205. ENDIF
  74206. ; /* Mul a by digit b into r. (r = a * b)
  74207. ; *
  74208. ; * r A single precision integer.
  74209. ; * a A single precision integer.
  74210. ; * b A single precision digit.
  74211. ; */
  74212. _text SEGMENT READONLY PARA
  74213. sp_1024_mul_d_16 PROC
  74214. push r12
  74215. mov r9, rdx
  74216. ; A[0] * B
  74217. mov rax, r8
  74218. xor r12, r12
  74219. mul QWORD PTR [r9]
  74220. mov r10, rax
  74221. mov r11, rdx
  74222. mov QWORD PTR [rcx], r10
  74223. ; A[1] * B
  74224. mov rax, r8
  74225. xor r10, r10
  74226. mul QWORD PTR [r9+8]
  74227. add r11, rax
  74228. mov QWORD PTR [rcx+8], r11
  74229. adc r12, rdx
  74230. adc r10, 0
  74231. ; A[2] * B
  74232. mov rax, r8
  74233. xor r11, r11
  74234. mul QWORD PTR [r9+16]
  74235. add r12, rax
  74236. mov QWORD PTR [rcx+16], r12
  74237. adc r10, rdx
  74238. adc r11, 0
  74239. ; A[3] * B
  74240. mov rax, r8
  74241. xor r12, r12
  74242. mul QWORD PTR [r9+24]
  74243. add r10, rax
  74244. mov QWORD PTR [rcx+24], r10
  74245. adc r11, rdx
  74246. adc r12, 0
  74247. ; A[4] * B
  74248. mov rax, r8
  74249. xor r10, r10
  74250. mul QWORD PTR [r9+32]
  74251. add r11, rax
  74252. mov QWORD PTR [rcx+32], r11
  74253. adc r12, rdx
  74254. adc r10, 0
  74255. ; A[5] * B
  74256. mov rax, r8
  74257. xor r11, r11
  74258. mul QWORD PTR [r9+40]
  74259. add r12, rax
  74260. mov QWORD PTR [rcx+40], r12
  74261. adc r10, rdx
  74262. adc r11, 0
  74263. ; A[6] * B
  74264. mov rax, r8
  74265. xor r12, r12
  74266. mul QWORD PTR [r9+48]
  74267. add r10, rax
  74268. mov QWORD PTR [rcx+48], r10
  74269. adc r11, rdx
  74270. adc r12, 0
  74271. ; A[7] * B
  74272. mov rax, r8
  74273. xor r10, r10
  74274. mul QWORD PTR [r9+56]
  74275. add r11, rax
  74276. mov QWORD PTR [rcx+56], r11
  74277. adc r12, rdx
  74278. adc r10, 0
  74279. ; A[8] * B
  74280. mov rax, r8
  74281. xor r11, r11
  74282. mul QWORD PTR [r9+64]
  74283. add r12, rax
  74284. mov QWORD PTR [rcx+64], r12
  74285. adc r10, rdx
  74286. adc r11, 0
  74287. ; A[9] * B
  74288. mov rax, r8
  74289. xor r12, r12
  74290. mul QWORD PTR [r9+72]
  74291. add r10, rax
  74292. mov QWORD PTR [rcx+72], r10
  74293. adc r11, rdx
  74294. adc r12, 0
  74295. ; A[10] * B
  74296. mov rax, r8
  74297. xor r10, r10
  74298. mul QWORD PTR [r9+80]
  74299. add r11, rax
  74300. mov QWORD PTR [rcx+80], r11
  74301. adc r12, rdx
  74302. adc r10, 0
  74303. ; A[11] * B
  74304. mov rax, r8
  74305. xor r11, r11
  74306. mul QWORD PTR [r9+88]
  74307. add r12, rax
  74308. mov QWORD PTR [rcx+88], r12
  74309. adc r10, rdx
  74310. adc r11, 0
  74311. ; A[12] * B
  74312. mov rax, r8
  74313. xor r12, r12
  74314. mul QWORD PTR [r9+96]
  74315. add r10, rax
  74316. mov QWORD PTR [rcx+96], r10
  74317. adc r11, rdx
  74318. adc r12, 0
  74319. ; A[13] * B
  74320. mov rax, r8
  74321. xor r10, r10
  74322. mul QWORD PTR [r9+104]
  74323. add r11, rax
  74324. mov QWORD PTR [rcx+104], r11
  74325. adc r12, rdx
  74326. adc r10, 0
  74327. ; A[14] * B
  74328. mov rax, r8
  74329. xor r11, r11
  74330. mul QWORD PTR [r9+112]
  74331. add r12, rax
  74332. mov QWORD PTR [rcx+112], r12
  74333. adc r10, rdx
  74334. adc r11, 0
  74335. ; A[15] * B
  74336. mov rax, r8
  74337. mul QWORD PTR [r9+120]
  74338. add r10, rax
  74339. adc r11, rdx
  74340. mov QWORD PTR [rcx+120], r10
  74341. mov QWORD PTR [rcx+128], r11
  74342. pop r12
  74343. ret
  74344. sp_1024_mul_d_16 ENDP
  74345. _text ENDS
  74346. IFDEF HAVE_INTEL_AVX2
  74347. ; /* Mul a by digit b into r. (r = a * b)
  74348. ; *
  74349. ; * r A single precision integer.
  74350. ; * a A single precision integer.
  74351. ; * b A single precision digit.
  74352. ; */
  74353. _text SEGMENT READONLY PARA
  74354. sp_1024_mul_d_avx2_16 PROC
  74355. push r12
  74356. push r13
  74357. mov rax, rdx
  74358. ; A[0] * B
  74359. mov rdx, r8
  74360. xor r13, r13
  74361. mulx r12, r11, QWORD PTR [rax]
  74362. mov QWORD PTR [rcx], r11
  74363. ; A[1] * B
  74364. mulx r10, r9, QWORD PTR [rax+8]
  74365. mov r11, r13
  74366. adcx r12, r9
  74367. adox r11, r10
  74368. mov QWORD PTR [rcx+8], r12
  74369. ; A[2] * B
  74370. mulx r10, r9, QWORD PTR [rax+16]
  74371. mov r12, r13
  74372. adcx r11, r9
  74373. adox r12, r10
  74374. mov QWORD PTR [rcx+16], r11
  74375. ; A[3] * B
  74376. mulx r10, r9, QWORD PTR [rax+24]
  74377. mov r11, r13
  74378. adcx r12, r9
  74379. adox r11, r10
  74380. mov QWORD PTR [rcx+24], r12
  74381. ; A[4] * B
  74382. mulx r10, r9, QWORD PTR [rax+32]
  74383. mov r12, r13
  74384. adcx r11, r9
  74385. adox r12, r10
  74386. mov QWORD PTR [rcx+32], r11
  74387. ; A[5] * B
  74388. mulx r10, r9, QWORD PTR [rax+40]
  74389. mov r11, r13
  74390. adcx r12, r9
  74391. adox r11, r10
  74392. mov QWORD PTR [rcx+40], r12
  74393. ; A[6] * B
  74394. mulx r10, r9, QWORD PTR [rax+48]
  74395. mov r12, r13
  74396. adcx r11, r9
  74397. adox r12, r10
  74398. mov QWORD PTR [rcx+48], r11
  74399. ; A[7] * B
  74400. mulx r10, r9, QWORD PTR [rax+56]
  74401. mov r11, r13
  74402. adcx r12, r9
  74403. adox r11, r10
  74404. mov QWORD PTR [rcx+56], r12
  74405. ; A[8] * B
  74406. mulx r10, r9, QWORD PTR [rax+64]
  74407. mov r12, r13
  74408. adcx r11, r9
  74409. adox r12, r10
  74410. mov QWORD PTR [rcx+64], r11
  74411. ; A[9] * B
  74412. mulx r10, r9, QWORD PTR [rax+72]
  74413. mov r11, r13
  74414. adcx r12, r9
  74415. adox r11, r10
  74416. mov QWORD PTR [rcx+72], r12
  74417. ; A[10] * B
  74418. mulx r10, r9, QWORD PTR [rax+80]
  74419. mov r12, r13
  74420. adcx r11, r9
  74421. adox r12, r10
  74422. mov QWORD PTR [rcx+80], r11
  74423. ; A[11] * B
  74424. mulx r10, r9, QWORD PTR [rax+88]
  74425. mov r11, r13
  74426. adcx r12, r9
  74427. adox r11, r10
  74428. mov QWORD PTR [rcx+88], r12
  74429. ; A[12] * B
  74430. mulx r10, r9, QWORD PTR [rax+96]
  74431. mov r12, r13
  74432. adcx r11, r9
  74433. adox r12, r10
  74434. mov QWORD PTR [rcx+96], r11
  74435. ; A[13] * B
  74436. mulx r10, r9, QWORD PTR [rax+104]
  74437. mov r11, r13
  74438. adcx r12, r9
  74439. adox r11, r10
  74440. mov QWORD PTR [rcx+104], r12
  74441. ; A[14] * B
  74442. mulx r10, r9, QWORD PTR [rax+112]
  74443. mov r12, r13
  74444. adcx r11, r9
  74445. adox r12, r10
  74446. mov QWORD PTR [rcx+112], r11
  74447. ; A[15] * B
  74448. mulx r10, r9, QWORD PTR [rax+120]
  74449. mov r11, r13
  74450. adcx r12, r9
  74451. adox r11, r10
  74452. adcx r11, r13
  74453. mov QWORD PTR [rcx+120], r12
  74454. mov QWORD PTR [rcx+128], r11
  74455. pop r13
  74456. pop r12
  74457. ret
  74458. sp_1024_mul_d_avx2_16 ENDP
  74459. _text ENDS
  74460. ENDIF
  74461. IFDEF _WIN64
  74462. ; /* Divide the double width number (d1|d0) by the dividend. (d1|d0 / div)
  74463. ; *
  74464. ; * d1 The high order half of the number to divide.
  74465. ; * d0 The low order half of the number to divide.
  74466. ; * div The dividend.
  74467. ; * returns the result of the division.
  74468. ; */
  74469. _text SEGMENT READONLY PARA
  74470. div_1024_word_asm_16 PROC
  74471. mov r9, rdx
  74472. mov rax, r9
  74473. mov rdx, rcx
  74474. div r8
  74475. ret
  74476. div_1024_word_asm_16 ENDP
  74477. _text ENDS
  74478. ENDIF
  74479. ; /* Compare a with b in constant time.
  74480. ; *
  74481. ; * a A single precision integer.
  74482. ; * b A single precision integer.
  74483. ; * return -ve, 0 or +ve if a is less than, equal to or greater than b
  74484. ; * respectively.
  74485. ; */
  74486. _text SEGMENT READONLY PARA
  74487. sp_1024_cmp_16 PROC
  74488. push r12
  74489. xor r9, r9
  74490. mov r8, -1
  74491. mov rax, -1
  74492. mov r10, 1
  74493. mov r11, QWORD PTR [rcx+120]
  74494. mov r12, QWORD PTR [rdx+120]
  74495. and r11, r8
  74496. and r12, r8
  74497. sub r11, r12
  74498. cmova rax, r10
  74499. cmovc rax, r8
  74500. cmovnz r8, r9
  74501. mov r11, QWORD PTR [rcx+112]
  74502. mov r12, QWORD PTR [rdx+112]
  74503. and r11, r8
  74504. and r12, r8
  74505. sub r11, r12
  74506. cmova rax, r10
  74507. cmovc rax, r8
  74508. cmovnz r8, r9
  74509. mov r11, QWORD PTR [rcx+104]
  74510. mov r12, QWORD PTR [rdx+104]
  74511. and r11, r8
  74512. and r12, r8
  74513. sub r11, r12
  74514. cmova rax, r10
  74515. cmovc rax, r8
  74516. cmovnz r8, r9
  74517. mov r11, QWORD PTR [rcx+96]
  74518. mov r12, QWORD PTR [rdx+96]
  74519. and r11, r8
  74520. and r12, r8
  74521. sub r11, r12
  74522. cmova rax, r10
  74523. cmovc rax, r8
  74524. cmovnz r8, r9
  74525. mov r11, QWORD PTR [rcx+88]
  74526. mov r12, QWORD PTR [rdx+88]
  74527. and r11, r8
  74528. and r12, r8
  74529. sub r11, r12
  74530. cmova rax, r10
  74531. cmovc rax, r8
  74532. cmovnz r8, r9
  74533. mov r11, QWORD PTR [rcx+80]
  74534. mov r12, QWORD PTR [rdx+80]
  74535. and r11, r8
  74536. and r12, r8
  74537. sub r11, r12
  74538. cmova rax, r10
  74539. cmovc rax, r8
  74540. cmovnz r8, r9
  74541. mov r11, QWORD PTR [rcx+72]
  74542. mov r12, QWORD PTR [rdx+72]
  74543. and r11, r8
  74544. and r12, r8
  74545. sub r11, r12
  74546. cmova rax, r10
  74547. cmovc rax, r8
  74548. cmovnz r8, r9
  74549. mov r11, QWORD PTR [rcx+64]
  74550. mov r12, QWORD PTR [rdx+64]
  74551. and r11, r8
  74552. and r12, r8
  74553. sub r11, r12
  74554. cmova rax, r10
  74555. cmovc rax, r8
  74556. cmovnz r8, r9
  74557. mov r11, QWORD PTR [rcx+56]
  74558. mov r12, QWORD PTR [rdx+56]
  74559. and r11, r8
  74560. and r12, r8
  74561. sub r11, r12
  74562. cmova rax, r10
  74563. cmovc rax, r8
  74564. cmovnz r8, r9
  74565. mov r11, QWORD PTR [rcx+48]
  74566. mov r12, QWORD PTR [rdx+48]
  74567. and r11, r8
  74568. and r12, r8
  74569. sub r11, r12
  74570. cmova rax, r10
  74571. cmovc rax, r8
  74572. cmovnz r8, r9
  74573. mov r11, QWORD PTR [rcx+40]
  74574. mov r12, QWORD PTR [rdx+40]
  74575. and r11, r8
  74576. and r12, r8
  74577. sub r11, r12
  74578. cmova rax, r10
  74579. cmovc rax, r8
  74580. cmovnz r8, r9
  74581. mov r11, QWORD PTR [rcx+32]
  74582. mov r12, QWORD PTR [rdx+32]
  74583. and r11, r8
  74584. and r12, r8
  74585. sub r11, r12
  74586. cmova rax, r10
  74587. cmovc rax, r8
  74588. cmovnz r8, r9
  74589. mov r11, QWORD PTR [rcx+24]
  74590. mov r12, QWORD PTR [rdx+24]
  74591. and r11, r8
  74592. and r12, r8
  74593. sub r11, r12
  74594. cmova rax, r10
  74595. cmovc rax, r8
  74596. cmovnz r8, r9
  74597. mov r11, QWORD PTR [rcx+16]
  74598. mov r12, QWORD PTR [rdx+16]
  74599. and r11, r8
  74600. and r12, r8
  74601. sub r11, r12
  74602. cmova rax, r10
  74603. cmovc rax, r8
  74604. cmovnz r8, r9
  74605. mov r11, QWORD PTR [rcx+8]
  74606. mov r12, QWORD PTR [rdx+8]
  74607. and r11, r8
  74608. and r12, r8
  74609. sub r11, r12
  74610. cmova rax, r10
  74611. cmovc rax, r8
  74612. cmovnz r8, r9
  74613. mov r11, QWORD PTR [rcx]
  74614. mov r12, QWORD PTR [rdx]
  74615. and r11, r8
  74616. and r12, r8
  74617. sub r11, r12
  74618. cmova rax, r10
  74619. cmovc rax, r8
  74620. cmovnz r8, r9
  74621. xor rax, r8
  74622. pop r12
  74623. ret
  74624. sp_1024_cmp_16 ENDP
  74625. _text ENDS
  74626. ; /* Conditionally copy a into r using the mask m.
  74627. ; * m is -1 to copy and 0 when not.
  74628. ; *
  74629. ; * r A single precision number to copy over.
  74630. ; * a A single precision number to copy.
  74631. ; * m Mask value to apply.
  74632. ; */
  74633. _text SEGMENT READONLY PARA
  74634. sp_1024_cond_copy_16 PROC
  74635. mov rax, QWORD PTR [rcx]
  74636. mov r9, QWORD PTR [rcx+8]
  74637. mov r10, QWORD PTR [rcx+16]
  74638. mov r11, QWORD PTR [rcx+24]
  74639. xor rax, QWORD PTR [rdx]
  74640. xor r9, QWORD PTR [rdx+8]
  74641. xor r10, QWORD PTR [rdx+16]
  74642. xor r11, QWORD PTR [rdx+24]
  74643. and rax, r8
  74644. and r9, r8
  74645. and r10, r8
  74646. and r11, r8
  74647. xor QWORD PTR [rcx], rax
  74648. xor QWORD PTR [rcx+8], r9
  74649. xor QWORD PTR [rcx+16], r10
  74650. xor QWORD PTR [rcx+24], r11
  74651. mov rax, QWORD PTR [rcx+32]
  74652. mov r9, QWORD PTR [rcx+40]
  74653. mov r10, QWORD PTR [rcx+48]
  74654. mov r11, QWORD PTR [rcx+56]
  74655. xor rax, QWORD PTR [rdx+32]
  74656. xor r9, QWORD PTR [rdx+40]
  74657. xor r10, QWORD PTR [rdx+48]
  74658. xor r11, QWORD PTR [rdx+56]
  74659. and rax, r8
  74660. and r9, r8
  74661. and r10, r8
  74662. and r11, r8
  74663. xor QWORD PTR [rcx+32], rax
  74664. xor QWORD PTR [rcx+40], r9
  74665. xor QWORD PTR [rcx+48], r10
  74666. xor QWORD PTR [rcx+56], r11
  74667. mov rax, QWORD PTR [rcx+64]
  74668. mov r9, QWORD PTR [rcx+72]
  74669. mov r10, QWORD PTR [rcx+80]
  74670. mov r11, QWORD PTR [rcx+88]
  74671. xor rax, QWORD PTR [rdx+64]
  74672. xor r9, QWORD PTR [rdx+72]
  74673. xor r10, QWORD PTR [rdx+80]
  74674. xor r11, QWORD PTR [rdx+88]
  74675. and rax, r8
  74676. and r9, r8
  74677. and r10, r8
  74678. and r11, r8
  74679. xor QWORD PTR [rcx+64], rax
  74680. xor QWORD PTR [rcx+72], r9
  74681. xor QWORD PTR [rcx+80], r10
  74682. xor QWORD PTR [rcx+88], r11
  74683. mov rax, QWORD PTR [rcx+96]
  74684. mov r9, QWORD PTR [rcx+104]
  74685. mov r10, QWORD PTR [rcx+112]
  74686. mov r11, QWORD PTR [rcx+120]
  74687. xor rax, QWORD PTR [rdx+96]
  74688. xor r9, QWORD PTR [rdx+104]
  74689. xor r10, QWORD PTR [rdx+112]
  74690. xor r11, QWORD PTR [rdx+120]
  74691. and rax, r8
  74692. and r9, r8
  74693. and r10, r8
  74694. and r11, r8
  74695. xor QWORD PTR [rcx+96], rax
  74696. xor QWORD PTR [rcx+104], r9
  74697. xor QWORD PTR [rcx+112], r10
  74698. xor QWORD PTR [rcx+120], r11
  74699. ret
  74700. sp_1024_cond_copy_16 ENDP
  74701. _text ENDS
  74702. ; /* Reduce the number back to 1024 bits using Montgomery reduction.
  74703. ; *
  74704. ; * a A single precision number to reduce in place.
  74705. ; * m The single precision number representing the modulus.
  74706. ; * mp The digit representing the negative inverse of m mod 2^n.
  74707. ; */
  74708. _text SEGMENT READONLY PARA
  74709. sp_1024_mont_reduce_16 PROC
  74710. push r12
  74711. push r13
  74712. push r14
  74713. push r15
  74714. push rdi
  74715. push rsi
  74716. mov r9, rdx
  74717. xor rsi, rsi
  74718. ; i = 16
  74719. mov r10, 16
  74720. mov r15, QWORD PTR [rcx]
  74721. mov rdi, QWORD PTR [rcx+8]
  74722. L_1024_mont_reduce_16_loop:
  74723. ; mu = a[i] * mp
  74724. mov r13, r15
  74725. imul r13, r8
  74726. ; a[i+0] += m[0] * mu
  74727. mov rax, r13
  74728. xor r12, r12
  74729. mul QWORD PTR [r9]
  74730. add r15, rax
  74731. adc r12, rdx
  74732. ; a[i+1] += m[1] * mu
  74733. mov rax, r13
  74734. xor r11, r11
  74735. mul QWORD PTR [r9+8]
  74736. mov r15, rdi
  74737. add r15, rax
  74738. adc r11, rdx
  74739. add r15, r12
  74740. adc r11, 0
  74741. ; a[i+2] += m[2] * mu
  74742. mov rax, r13
  74743. xor r12, r12
  74744. mul QWORD PTR [r9+16]
  74745. mov rdi, QWORD PTR [rcx+16]
  74746. add rdi, rax
  74747. adc r12, rdx
  74748. add rdi, r11
  74749. adc r12, 0
  74750. ; a[i+3] += m[3] * mu
  74751. mov rax, r13
  74752. xor r11, r11
  74753. mul QWORD PTR [r9+24]
  74754. mov r14, QWORD PTR [rcx+24]
  74755. add r14, rax
  74756. adc r11, rdx
  74757. add r14, r12
  74758. mov QWORD PTR [rcx+24], r14
  74759. adc r11, 0
  74760. ; a[i+4] += m[4] * mu
  74761. mov rax, r13
  74762. xor r12, r12
  74763. mul QWORD PTR [r9+32]
  74764. mov r14, QWORD PTR [rcx+32]
  74765. add r14, rax
  74766. adc r12, rdx
  74767. add r14, r11
  74768. mov QWORD PTR [rcx+32], r14
  74769. adc r12, 0
  74770. ; a[i+5] += m[5] * mu
  74771. mov rax, r13
  74772. xor r11, r11
  74773. mul QWORD PTR [r9+40]
  74774. mov r14, QWORD PTR [rcx+40]
  74775. add r14, rax
  74776. adc r11, rdx
  74777. add r14, r12
  74778. mov QWORD PTR [rcx+40], r14
  74779. adc r11, 0
  74780. ; a[i+6] += m[6] * mu
  74781. mov rax, r13
  74782. xor r12, r12
  74783. mul QWORD PTR [r9+48]
  74784. mov r14, QWORD PTR [rcx+48]
  74785. add r14, rax
  74786. adc r12, rdx
  74787. add r14, r11
  74788. mov QWORD PTR [rcx+48], r14
  74789. adc r12, 0
  74790. ; a[i+7] += m[7] * mu
  74791. mov rax, r13
  74792. xor r11, r11
  74793. mul QWORD PTR [r9+56]
  74794. mov r14, QWORD PTR [rcx+56]
  74795. add r14, rax
  74796. adc r11, rdx
  74797. add r14, r12
  74798. mov QWORD PTR [rcx+56], r14
  74799. adc r11, 0
  74800. ; a[i+8] += m[8] * mu
  74801. mov rax, r13
  74802. xor r12, r12
  74803. mul QWORD PTR [r9+64]
  74804. mov r14, QWORD PTR [rcx+64]
  74805. add r14, rax
  74806. adc r12, rdx
  74807. add r14, r11
  74808. mov QWORD PTR [rcx+64], r14
  74809. adc r12, 0
  74810. ; a[i+9] += m[9] * mu
  74811. mov rax, r13
  74812. xor r11, r11
  74813. mul QWORD PTR [r9+72]
  74814. mov r14, QWORD PTR [rcx+72]
  74815. add r14, rax
  74816. adc r11, rdx
  74817. add r14, r12
  74818. mov QWORD PTR [rcx+72], r14
  74819. adc r11, 0
  74820. ; a[i+10] += m[10] * mu
  74821. mov rax, r13
  74822. xor r12, r12
  74823. mul QWORD PTR [r9+80]
  74824. mov r14, QWORD PTR [rcx+80]
  74825. add r14, rax
  74826. adc r12, rdx
  74827. add r14, r11
  74828. mov QWORD PTR [rcx+80], r14
  74829. adc r12, 0
  74830. ; a[i+11] += m[11] * mu
  74831. mov rax, r13
  74832. xor r11, r11
  74833. mul QWORD PTR [r9+88]
  74834. mov r14, QWORD PTR [rcx+88]
  74835. add r14, rax
  74836. adc r11, rdx
  74837. add r14, r12
  74838. mov QWORD PTR [rcx+88], r14
  74839. adc r11, 0
  74840. ; a[i+12] += m[12] * mu
  74841. mov rax, r13
  74842. xor r12, r12
  74843. mul QWORD PTR [r9+96]
  74844. mov r14, QWORD PTR [rcx+96]
  74845. add r14, rax
  74846. adc r12, rdx
  74847. add r14, r11
  74848. mov QWORD PTR [rcx+96], r14
  74849. adc r12, 0
  74850. ; a[i+13] += m[13] * mu
  74851. mov rax, r13
  74852. xor r11, r11
  74853. mul QWORD PTR [r9+104]
  74854. mov r14, QWORD PTR [rcx+104]
  74855. add r14, rax
  74856. adc r11, rdx
  74857. add r14, r12
  74858. mov QWORD PTR [rcx+104], r14
  74859. adc r11, 0
  74860. ; a[i+14] += m[14] * mu
  74861. mov rax, r13
  74862. xor r12, r12
  74863. mul QWORD PTR [r9+112]
  74864. mov r14, QWORD PTR [rcx+112]
  74865. add r14, rax
  74866. adc r12, rdx
  74867. add r14, r11
  74868. mov QWORD PTR [rcx+112], r14
  74869. adc r12, 0
  74870. ; a[i+15] += m[15] * mu
  74871. mov rax, r13
  74872. mul QWORD PTR [r9+120]
  74873. mov r14, QWORD PTR [rcx+120]
  74874. add r12, rax
  74875. adc rdx, rsi
  74876. mov rsi, 0
  74877. adc rsi, 0
  74878. add r14, r12
  74879. mov QWORD PTR [rcx+120], r14
  74880. adc QWORD PTR [rcx+128], rdx
  74881. adc rsi, 0
  74882. ; i -= 1
  74883. add rcx, 8
  74884. dec r10
  74885. jnz L_1024_mont_reduce_16_loop
  74886. mov r14, QWORD PTR [rcx+120]
  74887. mov QWORD PTR [rcx], r15
  74888. sub r14, QWORD PTR [r9+120]
  74889. mov QWORD PTR [rcx+8], rdi
  74890. sbb r14, r14
  74891. neg rsi
  74892. not r14
  74893. or rsi, r14
  74894. IFDEF _WIN64
  74895. mov r8, r9
  74896. mov r9, rsi
  74897. ELSE
  74898. mov r9, rsi
  74899. mov r8, r9
  74900. ENDIF
  74901. mov rdx, rcx
  74902. mov rcx, rcx
  74903. sub rcx, 128
  74904. call sp_1024_cond_sub_16
  74905. pop rsi
  74906. pop rdi
  74907. pop r15
  74908. pop r14
  74909. pop r13
  74910. pop r12
  74911. ret
  74912. sp_1024_mont_reduce_16 ENDP
  74913. _text ENDS
  74914. ; /* Add two Montgomery form numbers (r = a + b % m).
  74915. ; *
  74916. ; * r Result of addition.
  74917. ; * a First number to add in Montgomery form.
  74918. ; * b Second number to add in Montgomery form.
  74919. ; * m Modulus (prime).
  74920. ; */
  74921. _text SEGMENT READONLY PARA
  74922. sp_1024_mont_add_16 PROC
  74923. push r12
  74924. push r13
  74925. sub rsp, 128
  74926. mov rax, QWORD PTR [rdx]
  74927. mov r10, QWORD PTR [rdx+8]
  74928. mov r11, QWORD PTR [rdx+16]
  74929. mov r12, QWORD PTR [rdx+24]
  74930. add rax, QWORD PTR [r8]
  74931. mov r13, 0
  74932. adc r10, QWORD PTR [r8+8]
  74933. adc r11, QWORD PTR [r8+16]
  74934. adc r12, QWORD PTR [r8+24]
  74935. mov QWORD PTR [rcx], rax
  74936. mov QWORD PTR [rcx+8], r10
  74937. mov QWORD PTR [rcx+16], r11
  74938. mov QWORD PTR [rcx+24], r12
  74939. mov rax, QWORD PTR [rdx+32]
  74940. mov r10, QWORD PTR [rdx+40]
  74941. mov r11, QWORD PTR [rdx+48]
  74942. mov r12, QWORD PTR [rdx+56]
  74943. adc rax, QWORD PTR [r8+32]
  74944. adc r10, QWORD PTR [r8+40]
  74945. adc r11, QWORD PTR [r8+48]
  74946. adc r12, QWORD PTR [r8+56]
  74947. mov QWORD PTR [rcx+32], rax
  74948. mov QWORD PTR [rcx+40], r10
  74949. mov QWORD PTR [rcx+48], r11
  74950. mov QWORD PTR [rcx+56], r12
  74951. mov rax, QWORD PTR [rdx+64]
  74952. mov r10, QWORD PTR [rdx+72]
  74953. mov r11, QWORD PTR [rdx+80]
  74954. mov r12, QWORD PTR [rdx+88]
  74955. adc rax, QWORD PTR [r8+64]
  74956. adc r10, QWORD PTR [r8+72]
  74957. adc r11, QWORD PTR [r8+80]
  74958. adc r12, QWORD PTR [r8+88]
  74959. mov QWORD PTR [rcx+64], rax
  74960. mov QWORD PTR [rcx+72], r10
  74961. mov QWORD PTR [rcx+80], r11
  74962. mov QWORD PTR [rcx+88], r12
  74963. mov rax, QWORD PTR [rdx+96]
  74964. mov r10, QWORD PTR [rdx+104]
  74965. mov r11, QWORD PTR [rdx+112]
  74966. mov r12, QWORD PTR [rdx+120]
  74967. adc rax, QWORD PTR [r8+96]
  74968. adc r10, QWORD PTR [r8+104]
  74969. adc r11, QWORD PTR [r8+112]
  74970. adc r12, QWORD PTR [r8+120]
  74971. mov QWORD PTR [rcx+96], rax
  74972. mov QWORD PTR [rcx+104], r10
  74973. mov QWORD PTR [rcx+112], r11
  74974. mov QWORD PTR [rcx+120], r12
  74975. sbb r13, 0
  74976. sub r12, QWORD PTR [r9+120]
  74977. sbb r12, r12
  74978. not r12
  74979. or r13, r12
  74980. mov r11, QWORD PTR [r9]
  74981. mov r12, QWORD PTR [r9+8]
  74982. and r11, r13
  74983. and r12, r13
  74984. mov QWORD PTR [rsp], r11
  74985. mov QWORD PTR [rsp+8], r12
  74986. mov r11, QWORD PTR [r9+16]
  74987. mov r12, QWORD PTR [r9+24]
  74988. and r11, r13
  74989. and r12, r13
  74990. mov QWORD PTR [rsp+16], r11
  74991. mov QWORD PTR [rsp+24], r12
  74992. mov r11, QWORD PTR [r9+32]
  74993. mov r12, QWORD PTR [r9+40]
  74994. and r11, r13
  74995. and r12, r13
  74996. mov QWORD PTR [rsp+32], r11
  74997. mov QWORD PTR [rsp+40], r12
  74998. mov r11, QWORD PTR [r9+48]
  74999. mov r12, QWORD PTR [r9+56]
  75000. and r11, r13
  75001. and r12, r13
  75002. mov QWORD PTR [rsp+48], r11
  75003. mov QWORD PTR [rsp+56], r12
  75004. mov r11, QWORD PTR [r9+64]
  75005. mov r12, QWORD PTR [r9+72]
  75006. and r11, r13
  75007. and r12, r13
  75008. mov QWORD PTR [rsp+64], r11
  75009. mov QWORD PTR [rsp+72], r12
  75010. mov r11, QWORD PTR [r9+80]
  75011. mov r12, QWORD PTR [r9+88]
  75012. and r11, r13
  75013. and r12, r13
  75014. mov QWORD PTR [rsp+80], r11
  75015. mov QWORD PTR [rsp+88], r12
  75016. mov r11, QWORD PTR [r9+96]
  75017. mov r12, QWORD PTR [r9+104]
  75018. and r11, r13
  75019. and r12, r13
  75020. mov QWORD PTR [rsp+96], r11
  75021. mov QWORD PTR [rsp+104], r12
  75022. mov r11, QWORD PTR [r9+112]
  75023. mov r12, QWORD PTR [r9+120]
  75024. and r11, r13
  75025. and r12, r13
  75026. mov QWORD PTR [rsp+112], r11
  75027. mov QWORD PTR [rsp+120], r12
  75028. mov rax, QWORD PTR [rcx]
  75029. mov r10, QWORD PTR [rcx+8]
  75030. sub rax, QWORD PTR [rsp]
  75031. sbb r10, QWORD PTR [rsp+8]
  75032. mov QWORD PTR [rcx], rax
  75033. mov QWORD PTR [rcx+8], r10
  75034. mov rax, QWORD PTR [rcx+16]
  75035. mov r10, QWORD PTR [rcx+24]
  75036. sbb rax, QWORD PTR [rsp+16]
  75037. sbb r10, QWORD PTR [rsp+24]
  75038. mov QWORD PTR [rcx+16], rax
  75039. mov QWORD PTR [rcx+24], r10
  75040. mov rax, QWORD PTR [rcx+32]
  75041. mov r10, QWORD PTR [rcx+40]
  75042. sbb rax, QWORD PTR [rsp+32]
  75043. sbb r10, QWORD PTR [rsp+40]
  75044. mov QWORD PTR [rcx+32], rax
  75045. mov QWORD PTR [rcx+40], r10
  75046. mov rax, QWORD PTR [rcx+48]
  75047. mov r10, QWORD PTR [rcx+56]
  75048. sbb rax, QWORD PTR [rsp+48]
  75049. sbb r10, QWORD PTR [rsp+56]
  75050. mov QWORD PTR [rcx+48], rax
  75051. mov QWORD PTR [rcx+56], r10
  75052. mov rax, QWORD PTR [rcx+64]
  75053. mov r10, QWORD PTR [rcx+72]
  75054. sbb rax, QWORD PTR [rsp+64]
  75055. sbb r10, QWORD PTR [rsp+72]
  75056. mov QWORD PTR [rcx+64], rax
  75057. mov QWORD PTR [rcx+72], r10
  75058. mov rax, QWORD PTR [rcx+80]
  75059. mov r10, QWORD PTR [rcx+88]
  75060. sbb rax, QWORD PTR [rsp+80]
  75061. sbb r10, QWORD PTR [rsp+88]
  75062. mov QWORD PTR [rcx+80], rax
  75063. mov QWORD PTR [rcx+88], r10
  75064. mov rax, QWORD PTR [rcx+96]
  75065. mov r10, QWORD PTR [rcx+104]
  75066. sbb rax, QWORD PTR [rsp+96]
  75067. sbb r10, QWORD PTR [rsp+104]
  75068. mov QWORD PTR [rcx+96], rax
  75069. mov QWORD PTR [rcx+104], r10
  75070. mov rax, QWORD PTR [rcx+112]
  75071. mov r10, QWORD PTR [rcx+120]
  75072. sbb rax, QWORD PTR [rsp+112]
  75073. sbb r10, QWORD PTR [rsp+120]
  75074. mov QWORD PTR [rcx+112], rax
  75075. mov QWORD PTR [rcx+120], r10
  75076. add rsp, 128
  75077. pop r13
  75078. pop r12
  75079. ret
  75080. sp_1024_mont_add_16 ENDP
  75081. _text ENDS
  75082. ; /* Double a Montgomery form number (r = a + a % m).
  75083. ; *
  75084. ; * r Result of addition.
  75085. ; * a Number to souble in Montgomery form.
  75086. ; * m Modulus (prime).
  75087. ; */
  75088. _text SEGMENT READONLY PARA
  75089. sp_1024_mont_dbl_16 PROC
  75090. push r12
  75091. sub rsp, 128
  75092. mov rax, QWORD PTR [rdx]
  75093. mov r9, QWORD PTR [rdx+8]
  75094. mov r10, QWORD PTR [rdx+16]
  75095. mov r11, QWORD PTR [rdx+24]
  75096. add rax, QWORD PTR [rdx]
  75097. mov r12, 0
  75098. adc r9, QWORD PTR [rdx+8]
  75099. adc r10, QWORD PTR [rdx+16]
  75100. adc r11, QWORD PTR [rdx+24]
  75101. mov QWORD PTR [rcx], rax
  75102. mov QWORD PTR [rcx+8], r9
  75103. mov QWORD PTR [rcx+16], r10
  75104. mov QWORD PTR [rcx+24], r11
  75105. mov rax, QWORD PTR [rdx+32]
  75106. mov r9, QWORD PTR [rdx+40]
  75107. mov r10, QWORD PTR [rdx+48]
  75108. mov r11, QWORD PTR [rdx+56]
  75109. adc rax, QWORD PTR [rdx+32]
  75110. adc r9, QWORD PTR [rdx+40]
  75111. adc r10, QWORD PTR [rdx+48]
  75112. adc r11, QWORD PTR [rdx+56]
  75113. mov QWORD PTR [rcx+32], rax
  75114. mov QWORD PTR [rcx+40], r9
  75115. mov QWORD PTR [rcx+48], r10
  75116. mov QWORD PTR [rcx+56], r11
  75117. mov rax, QWORD PTR [rdx+64]
  75118. mov r9, QWORD PTR [rdx+72]
  75119. mov r10, QWORD PTR [rdx+80]
  75120. mov r11, QWORD PTR [rdx+88]
  75121. adc rax, QWORD PTR [rdx+64]
  75122. adc r9, QWORD PTR [rdx+72]
  75123. adc r10, QWORD PTR [rdx+80]
  75124. adc r11, QWORD PTR [rdx+88]
  75125. mov QWORD PTR [rcx+64], rax
  75126. mov QWORD PTR [rcx+72], r9
  75127. mov QWORD PTR [rcx+80], r10
  75128. mov QWORD PTR [rcx+88], r11
  75129. mov rax, QWORD PTR [rdx+96]
  75130. mov r9, QWORD PTR [rdx+104]
  75131. mov r10, QWORD PTR [rdx+112]
  75132. mov r11, QWORD PTR [rdx+120]
  75133. adc rax, QWORD PTR [rdx+96]
  75134. adc r9, QWORD PTR [rdx+104]
  75135. adc r10, QWORD PTR [rdx+112]
  75136. adc r11, QWORD PTR [rdx+120]
  75137. mov QWORD PTR [rcx+96], rax
  75138. mov QWORD PTR [rcx+104], r9
  75139. mov QWORD PTR [rcx+112], r10
  75140. mov QWORD PTR [rcx+120], r11
  75141. sbb r12, 0
  75142. sub r11, QWORD PTR [r8+120]
  75143. sbb r11, r11
  75144. not r11
  75145. or r12, r11
  75146. mov r10, QWORD PTR [r8]
  75147. mov r11, QWORD PTR [r8+8]
  75148. and r10, r12
  75149. and r11, r12
  75150. mov QWORD PTR [rsp], r10
  75151. mov QWORD PTR [rsp+8], r11
  75152. mov r10, QWORD PTR [r8+16]
  75153. mov r11, QWORD PTR [r8+24]
  75154. and r10, r12
  75155. and r11, r12
  75156. mov QWORD PTR [rsp+16], r10
  75157. mov QWORD PTR [rsp+24], r11
  75158. mov r10, QWORD PTR [r8+32]
  75159. mov r11, QWORD PTR [r8+40]
  75160. and r10, r12
  75161. and r11, r12
  75162. mov QWORD PTR [rsp+32], r10
  75163. mov QWORD PTR [rsp+40], r11
  75164. mov r10, QWORD PTR [r8+48]
  75165. mov r11, QWORD PTR [r8+56]
  75166. and r10, r12
  75167. and r11, r12
  75168. mov QWORD PTR [rsp+48], r10
  75169. mov QWORD PTR [rsp+56], r11
  75170. mov r10, QWORD PTR [r8+64]
  75171. mov r11, QWORD PTR [r8+72]
  75172. and r10, r12
  75173. and r11, r12
  75174. mov QWORD PTR [rsp+64], r10
  75175. mov QWORD PTR [rsp+72], r11
  75176. mov r10, QWORD PTR [r8+80]
  75177. mov r11, QWORD PTR [r8+88]
  75178. and r10, r12
  75179. and r11, r12
  75180. mov QWORD PTR [rsp+80], r10
  75181. mov QWORD PTR [rsp+88], r11
  75182. mov r10, QWORD PTR [r8+96]
  75183. mov r11, QWORD PTR [r8+104]
  75184. and r10, r12
  75185. and r11, r12
  75186. mov QWORD PTR [rsp+96], r10
  75187. mov QWORD PTR [rsp+104], r11
  75188. mov r10, QWORD PTR [r8+112]
  75189. mov r11, QWORD PTR [r8+120]
  75190. and r10, r12
  75191. and r11, r12
  75192. mov QWORD PTR [rsp+112], r10
  75193. mov QWORD PTR [rsp+120], r11
  75194. mov rax, QWORD PTR [rcx]
  75195. mov r9, QWORD PTR [rcx+8]
  75196. sub rax, QWORD PTR [rsp]
  75197. sbb r9, QWORD PTR [rsp+8]
  75198. mov QWORD PTR [rcx], rax
  75199. mov QWORD PTR [rcx+8], r9
  75200. mov rax, QWORD PTR [rcx+16]
  75201. mov r9, QWORD PTR [rcx+24]
  75202. sbb rax, QWORD PTR [rsp+16]
  75203. sbb r9, QWORD PTR [rsp+24]
  75204. mov QWORD PTR [rcx+16], rax
  75205. mov QWORD PTR [rcx+24], r9
  75206. mov rax, QWORD PTR [rcx+32]
  75207. mov r9, QWORD PTR [rcx+40]
  75208. sbb rax, QWORD PTR [rsp+32]
  75209. sbb r9, QWORD PTR [rsp+40]
  75210. mov QWORD PTR [rcx+32], rax
  75211. mov QWORD PTR [rcx+40], r9
  75212. mov rax, QWORD PTR [rcx+48]
  75213. mov r9, QWORD PTR [rcx+56]
  75214. sbb rax, QWORD PTR [rsp+48]
  75215. sbb r9, QWORD PTR [rsp+56]
  75216. mov QWORD PTR [rcx+48], rax
  75217. mov QWORD PTR [rcx+56], r9
  75218. mov rax, QWORD PTR [rcx+64]
  75219. mov r9, QWORD PTR [rcx+72]
  75220. sbb rax, QWORD PTR [rsp+64]
  75221. sbb r9, QWORD PTR [rsp+72]
  75222. mov QWORD PTR [rcx+64], rax
  75223. mov QWORD PTR [rcx+72], r9
  75224. mov rax, QWORD PTR [rcx+80]
  75225. mov r9, QWORD PTR [rcx+88]
  75226. sbb rax, QWORD PTR [rsp+80]
  75227. sbb r9, QWORD PTR [rsp+88]
  75228. mov QWORD PTR [rcx+80], rax
  75229. mov QWORD PTR [rcx+88], r9
  75230. mov rax, QWORD PTR [rcx+96]
  75231. mov r9, QWORD PTR [rcx+104]
  75232. sbb rax, QWORD PTR [rsp+96]
  75233. sbb r9, QWORD PTR [rsp+104]
  75234. mov QWORD PTR [rcx+96], rax
  75235. mov QWORD PTR [rcx+104], r9
  75236. mov rax, QWORD PTR [rcx+112]
  75237. mov r9, QWORD PTR [rcx+120]
  75238. sbb rax, QWORD PTR [rsp+112]
  75239. sbb r9, QWORD PTR [rsp+120]
  75240. mov QWORD PTR [rcx+112], rax
  75241. mov QWORD PTR [rcx+120], r9
  75242. add rsp, 128
  75243. pop r12
  75244. ret
  75245. sp_1024_mont_dbl_16 ENDP
  75246. _text ENDS
  75247. ; /* Triple a Montgomery form number (r = a + a + a % m).
  75248. ; *
  75249. ; * r Result of addition.
  75250. ; * a Number to souble in Montgomery form.
  75251. ; * m Modulus (prime).
  75252. ; */
  75253. _text SEGMENT READONLY PARA
  75254. sp_1024_mont_tpl_16 PROC
  75255. push r12
  75256. sub rsp, 128
  75257. mov rax, QWORD PTR [rdx]
  75258. mov r9, QWORD PTR [rdx+8]
  75259. mov r10, QWORD PTR [rdx+16]
  75260. mov r11, QWORD PTR [rdx+24]
  75261. add rax, QWORD PTR [rdx]
  75262. mov r12, 0
  75263. adc r9, QWORD PTR [rdx+8]
  75264. adc r10, QWORD PTR [rdx+16]
  75265. adc r11, QWORD PTR [rdx+24]
  75266. mov QWORD PTR [rcx], rax
  75267. mov QWORD PTR [rcx+8], r9
  75268. mov QWORD PTR [rcx+16], r10
  75269. mov QWORD PTR [rcx+24], r11
  75270. mov rax, QWORD PTR [rdx+32]
  75271. mov r9, QWORD PTR [rdx+40]
  75272. mov r10, QWORD PTR [rdx+48]
  75273. mov r11, QWORD PTR [rdx+56]
  75274. adc rax, QWORD PTR [rdx+32]
  75275. adc r9, QWORD PTR [rdx+40]
  75276. adc r10, QWORD PTR [rdx+48]
  75277. adc r11, QWORD PTR [rdx+56]
  75278. mov QWORD PTR [rcx+32], rax
  75279. mov QWORD PTR [rcx+40], r9
  75280. mov QWORD PTR [rcx+48], r10
  75281. mov QWORD PTR [rcx+56], r11
  75282. mov rax, QWORD PTR [rdx+64]
  75283. mov r9, QWORD PTR [rdx+72]
  75284. mov r10, QWORD PTR [rdx+80]
  75285. mov r11, QWORD PTR [rdx+88]
  75286. adc rax, QWORD PTR [rdx+64]
  75287. adc r9, QWORD PTR [rdx+72]
  75288. adc r10, QWORD PTR [rdx+80]
  75289. adc r11, QWORD PTR [rdx+88]
  75290. mov QWORD PTR [rcx+64], rax
  75291. mov QWORD PTR [rcx+72], r9
  75292. mov QWORD PTR [rcx+80], r10
  75293. mov QWORD PTR [rcx+88], r11
  75294. mov rax, QWORD PTR [rdx+96]
  75295. mov r9, QWORD PTR [rdx+104]
  75296. mov r10, QWORD PTR [rdx+112]
  75297. mov r11, QWORD PTR [rdx+120]
  75298. adc rax, QWORD PTR [rdx+96]
  75299. adc r9, QWORD PTR [rdx+104]
  75300. adc r10, QWORD PTR [rdx+112]
  75301. adc r11, QWORD PTR [rdx+120]
  75302. mov QWORD PTR [rcx+96], rax
  75303. mov QWORD PTR [rcx+104], r9
  75304. mov QWORD PTR [rcx+112], r10
  75305. mov QWORD PTR [rcx+120], r11
  75306. sbb r12, 0
  75307. sub r11, QWORD PTR [r8+120]
  75308. sbb r11, r11
  75309. not r11
  75310. or r12, r11
  75311. mov r10, QWORD PTR [r8]
  75312. mov r11, QWORD PTR [r8+8]
  75313. and r10, r12
  75314. and r11, r12
  75315. mov QWORD PTR [rsp], r10
  75316. mov QWORD PTR [rsp+8], r11
  75317. mov r10, QWORD PTR [r8+16]
  75318. mov r11, QWORD PTR [r8+24]
  75319. and r10, r12
  75320. and r11, r12
  75321. mov QWORD PTR [rsp+16], r10
  75322. mov QWORD PTR [rsp+24], r11
  75323. mov r10, QWORD PTR [r8+32]
  75324. mov r11, QWORD PTR [r8+40]
  75325. and r10, r12
  75326. and r11, r12
  75327. mov QWORD PTR [rsp+32], r10
  75328. mov QWORD PTR [rsp+40], r11
  75329. mov r10, QWORD PTR [r8+48]
  75330. mov r11, QWORD PTR [r8+56]
  75331. and r10, r12
  75332. and r11, r12
  75333. mov QWORD PTR [rsp+48], r10
  75334. mov QWORD PTR [rsp+56], r11
  75335. mov r10, QWORD PTR [r8+64]
  75336. mov r11, QWORD PTR [r8+72]
  75337. and r10, r12
  75338. and r11, r12
  75339. mov QWORD PTR [rsp+64], r10
  75340. mov QWORD PTR [rsp+72], r11
  75341. mov r10, QWORD PTR [r8+80]
  75342. mov r11, QWORD PTR [r8+88]
  75343. and r10, r12
  75344. and r11, r12
  75345. mov QWORD PTR [rsp+80], r10
  75346. mov QWORD PTR [rsp+88], r11
  75347. mov r10, QWORD PTR [r8+96]
  75348. mov r11, QWORD PTR [r8+104]
  75349. and r10, r12
  75350. and r11, r12
  75351. mov QWORD PTR [rsp+96], r10
  75352. mov QWORD PTR [rsp+104], r11
  75353. mov r10, QWORD PTR [r8+112]
  75354. mov r11, QWORD PTR [r8+120]
  75355. and r10, r12
  75356. and r11, r12
  75357. mov QWORD PTR [rsp+112], r10
  75358. mov QWORD PTR [rsp+120], r11
  75359. mov rax, QWORD PTR [rcx]
  75360. mov r9, QWORD PTR [rcx+8]
  75361. sub rax, QWORD PTR [rsp]
  75362. sbb r9, QWORD PTR [rsp+8]
  75363. mov QWORD PTR [rcx], rax
  75364. mov QWORD PTR [rcx+8], r9
  75365. mov rax, QWORD PTR [rcx+16]
  75366. mov r9, QWORD PTR [rcx+24]
  75367. sbb rax, QWORD PTR [rsp+16]
  75368. sbb r9, QWORD PTR [rsp+24]
  75369. mov QWORD PTR [rcx+16], rax
  75370. mov QWORD PTR [rcx+24], r9
  75371. mov rax, QWORD PTR [rcx+32]
  75372. mov r9, QWORD PTR [rcx+40]
  75373. sbb rax, QWORD PTR [rsp+32]
  75374. sbb r9, QWORD PTR [rsp+40]
  75375. mov QWORD PTR [rcx+32], rax
  75376. mov QWORD PTR [rcx+40], r9
  75377. mov rax, QWORD PTR [rcx+48]
  75378. mov r9, QWORD PTR [rcx+56]
  75379. sbb rax, QWORD PTR [rsp+48]
  75380. sbb r9, QWORD PTR [rsp+56]
  75381. mov QWORD PTR [rcx+48], rax
  75382. mov QWORD PTR [rcx+56], r9
  75383. mov rax, QWORD PTR [rcx+64]
  75384. mov r9, QWORD PTR [rcx+72]
  75385. sbb rax, QWORD PTR [rsp+64]
  75386. sbb r9, QWORD PTR [rsp+72]
  75387. mov QWORD PTR [rcx+64], rax
  75388. mov QWORD PTR [rcx+72], r9
  75389. mov rax, QWORD PTR [rcx+80]
  75390. mov r9, QWORD PTR [rcx+88]
  75391. sbb rax, QWORD PTR [rsp+80]
  75392. sbb r9, QWORD PTR [rsp+88]
  75393. mov QWORD PTR [rcx+80], rax
  75394. mov QWORD PTR [rcx+88], r9
  75395. mov rax, QWORD PTR [rcx+96]
  75396. mov r9, QWORD PTR [rcx+104]
  75397. sbb rax, QWORD PTR [rsp+96]
  75398. sbb r9, QWORD PTR [rsp+104]
  75399. mov QWORD PTR [rcx+96], rax
  75400. mov QWORD PTR [rcx+104], r9
  75401. mov rax, QWORD PTR [rcx+112]
  75402. mov r9, QWORD PTR [rcx+120]
  75403. sbb rax, QWORD PTR [rsp+112]
  75404. sbb r9, QWORD PTR [rsp+120]
  75405. mov QWORD PTR [rcx+112], rax
  75406. mov QWORD PTR [rcx+120], r9
  75407. mov rax, QWORD PTR [rcx]
  75408. mov r9, QWORD PTR [rcx+8]
  75409. mov r10, QWORD PTR [rcx+16]
  75410. mov r11, QWORD PTR [rcx+24]
  75411. add rax, QWORD PTR [rdx]
  75412. mov r12, 0
  75413. adc r9, QWORD PTR [rdx+8]
  75414. adc r10, QWORD PTR [rdx+16]
  75415. adc r11, QWORD PTR [rdx+24]
  75416. mov QWORD PTR [rcx], rax
  75417. mov QWORD PTR [rcx+8], r9
  75418. mov QWORD PTR [rcx+16], r10
  75419. mov QWORD PTR [rcx+24], r11
  75420. mov rax, QWORD PTR [rcx+32]
  75421. mov r9, QWORD PTR [rcx+40]
  75422. mov r10, QWORD PTR [rcx+48]
  75423. mov r11, QWORD PTR [rcx+56]
  75424. adc rax, QWORD PTR [rdx+32]
  75425. adc r9, QWORD PTR [rdx+40]
  75426. adc r10, QWORD PTR [rdx+48]
  75427. adc r11, QWORD PTR [rdx+56]
  75428. mov QWORD PTR [rcx+32], rax
  75429. mov QWORD PTR [rcx+40], r9
  75430. mov QWORD PTR [rcx+48], r10
  75431. mov QWORD PTR [rcx+56], r11
  75432. mov rax, QWORD PTR [rcx+64]
  75433. mov r9, QWORD PTR [rcx+72]
  75434. mov r10, QWORD PTR [rcx+80]
  75435. mov r11, QWORD PTR [rcx+88]
  75436. adc rax, QWORD PTR [rdx+64]
  75437. adc r9, QWORD PTR [rdx+72]
  75438. adc r10, QWORD PTR [rdx+80]
  75439. adc r11, QWORD PTR [rdx+88]
  75440. mov QWORD PTR [rcx+64], rax
  75441. mov QWORD PTR [rcx+72], r9
  75442. mov QWORD PTR [rcx+80], r10
  75443. mov QWORD PTR [rcx+88], r11
  75444. mov rax, QWORD PTR [rcx+96]
  75445. mov r9, QWORD PTR [rcx+104]
  75446. mov r10, QWORD PTR [rcx+112]
  75447. mov r11, QWORD PTR [rcx+120]
  75448. adc rax, QWORD PTR [rdx+96]
  75449. adc r9, QWORD PTR [rdx+104]
  75450. adc r10, QWORD PTR [rdx+112]
  75451. adc r11, QWORD PTR [rdx+120]
  75452. mov QWORD PTR [rcx+96], rax
  75453. mov QWORD PTR [rcx+104], r9
  75454. mov QWORD PTR [rcx+112], r10
  75455. mov QWORD PTR [rcx+120], r11
  75456. sbb r12, 0
  75457. sub r11, QWORD PTR [r8+120]
  75458. sbb r11, r11
  75459. not r11
  75460. or r12, r11
  75461. mov r10, QWORD PTR [r8]
  75462. mov r11, QWORD PTR [r8+8]
  75463. and r10, r12
  75464. and r11, r12
  75465. mov QWORD PTR [rsp], r10
  75466. mov QWORD PTR [rsp+8], r11
  75467. mov r10, QWORD PTR [r8+16]
  75468. mov r11, QWORD PTR [r8+24]
  75469. and r10, r12
  75470. and r11, r12
  75471. mov QWORD PTR [rsp+16], r10
  75472. mov QWORD PTR [rsp+24], r11
  75473. mov r10, QWORD PTR [r8+32]
  75474. mov r11, QWORD PTR [r8+40]
  75475. and r10, r12
  75476. and r11, r12
  75477. mov QWORD PTR [rsp+32], r10
  75478. mov QWORD PTR [rsp+40], r11
  75479. mov r10, QWORD PTR [r8+48]
  75480. mov r11, QWORD PTR [r8+56]
  75481. and r10, r12
  75482. and r11, r12
  75483. mov QWORD PTR [rsp+48], r10
  75484. mov QWORD PTR [rsp+56], r11
  75485. mov r10, QWORD PTR [r8+64]
  75486. mov r11, QWORD PTR [r8+72]
  75487. and r10, r12
  75488. and r11, r12
  75489. mov QWORD PTR [rsp+64], r10
  75490. mov QWORD PTR [rsp+72], r11
  75491. mov r10, QWORD PTR [r8+80]
  75492. mov r11, QWORD PTR [r8+88]
  75493. and r10, r12
  75494. and r11, r12
  75495. mov QWORD PTR [rsp+80], r10
  75496. mov QWORD PTR [rsp+88], r11
  75497. mov r10, QWORD PTR [r8+96]
  75498. mov r11, QWORD PTR [r8+104]
  75499. and r10, r12
  75500. and r11, r12
  75501. mov QWORD PTR [rsp+96], r10
  75502. mov QWORD PTR [rsp+104], r11
  75503. mov r10, QWORD PTR [r8+112]
  75504. mov r11, QWORD PTR [r8+120]
  75505. and r10, r12
  75506. and r11, r12
  75507. mov QWORD PTR [rsp+112], r10
  75508. mov QWORD PTR [rsp+120], r11
  75509. mov rax, QWORD PTR [rcx]
  75510. mov r9, QWORD PTR [rcx+8]
  75511. sub rax, QWORD PTR [rsp]
  75512. sbb r9, QWORD PTR [rsp+8]
  75513. mov QWORD PTR [rcx], rax
  75514. mov QWORD PTR [rcx+8], r9
  75515. mov rax, QWORD PTR [rcx+16]
  75516. mov r9, QWORD PTR [rcx+24]
  75517. sbb rax, QWORD PTR [rsp+16]
  75518. sbb r9, QWORD PTR [rsp+24]
  75519. mov QWORD PTR [rcx+16], rax
  75520. mov QWORD PTR [rcx+24], r9
  75521. mov rax, QWORD PTR [rcx+32]
  75522. mov r9, QWORD PTR [rcx+40]
  75523. sbb rax, QWORD PTR [rsp+32]
  75524. sbb r9, QWORD PTR [rsp+40]
  75525. mov QWORD PTR [rcx+32], rax
  75526. mov QWORD PTR [rcx+40], r9
  75527. mov rax, QWORD PTR [rcx+48]
  75528. mov r9, QWORD PTR [rcx+56]
  75529. sbb rax, QWORD PTR [rsp+48]
  75530. sbb r9, QWORD PTR [rsp+56]
  75531. mov QWORD PTR [rcx+48], rax
  75532. mov QWORD PTR [rcx+56], r9
  75533. mov rax, QWORD PTR [rcx+64]
  75534. mov r9, QWORD PTR [rcx+72]
  75535. sbb rax, QWORD PTR [rsp+64]
  75536. sbb r9, QWORD PTR [rsp+72]
  75537. mov QWORD PTR [rcx+64], rax
  75538. mov QWORD PTR [rcx+72], r9
  75539. mov rax, QWORD PTR [rcx+80]
  75540. mov r9, QWORD PTR [rcx+88]
  75541. sbb rax, QWORD PTR [rsp+80]
  75542. sbb r9, QWORD PTR [rsp+88]
  75543. mov QWORD PTR [rcx+80], rax
  75544. mov QWORD PTR [rcx+88], r9
  75545. mov rax, QWORD PTR [rcx+96]
  75546. mov r9, QWORD PTR [rcx+104]
  75547. sbb rax, QWORD PTR [rsp+96]
  75548. sbb r9, QWORD PTR [rsp+104]
  75549. mov QWORD PTR [rcx+96], rax
  75550. mov QWORD PTR [rcx+104], r9
  75551. mov rax, QWORD PTR [rcx+112]
  75552. mov r9, QWORD PTR [rcx+120]
  75553. sbb rax, QWORD PTR [rsp+112]
  75554. sbb r9, QWORD PTR [rsp+120]
  75555. mov QWORD PTR [rcx+112], rax
  75556. mov QWORD PTR [rcx+120], r9
  75557. add rsp, 128
  75558. pop r12
  75559. ret
  75560. sp_1024_mont_tpl_16 ENDP
  75561. _text ENDS
  75562. ; /* Subtract two Montgomery form numbers (r = a - b % m).
  75563. ; *
  75564. ; * r Result of addition.
  75565. ; * a First number to add in Montgomery form.
  75566. ; * b Second number to add in Montgomery form.
  75567. ; * m Modulus (prime).
  75568. ; */
  75569. _text SEGMENT READONLY PARA
  75570. sp_1024_mont_sub_16 PROC
  75571. push r12
  75572. push r13
  75573. sub rsp, 128
  75574. mov rax, QWORD PTR [rdx]
  75575. mov r10, QWORD PTR [rdx+8]
  75576. mov r11, QWORD PTR [rdx+16]
  75577. mov r12, QWORD PTR [rdx+24]
  75578. sub rax, QWORD PTR [r8]
  75579. mov r13, 0
  75580. sbb r10, QWORD PTR [r8+8]
  75581. sbb r11, QWORD PTR [r8+16]
  75582. sbb r12, QWORD PTR [r8+24]
  75583. mov QWORD PTR [rcx], rax
  75584. mov QWORD PTR [rcx+8], r10
  75585. mov QWORD PTR [rcx+16], r11
  75586. mov QWORD PTR [rcx+24], r12
  75587. mov rax, QWORD PTR [rdx+32]
  75588. mov r10, QWORD PTR [rdx+40]
  75589. mov r11, QWORD PTR [rdx+48]
  75590. mov r12, QWORD PTR [rdx+56]
  75591. sbb rax, QWORD PTR [r8+32]
  75592. sbb r10, QWORD PTR [r8+40]
  75593. sbb r11, QWORD PTR [r8+48]
  75594. sbb r12, QWORD PTR [r8+56]
  75595. mov QWORD PTR [rcx+32], rax
  75596. mov QWORD PTR [rcx+40], r10
  75597. mov QWORD PTR [rcx+48], r11
  75598. mov QWORD PTR [rcx+56], r12
  75599. mov rax, QWORD PTR [rdx+64]
  75600. mov r10, QWORD PTR [rdx+72]
  75601. mov r11, QWORD PTR [rdx+80]
  75602. mov r12, QWORD PTR [rdx+88]
  75603. sbb rax, QWORD PTR [r8+64]
  75604. sbb r10, QWORD PTR [r8+72]
  75605. sbb r11, QWORD PTR [r8+80]
  75606. sbb r12, QWORD PTR [r8+88]
  75607. mov QWORD PTR [rcx+64], rax
  75608. mov QWORD PTR [rcx+72], r10
  75609. mov QWORD PTR [rcx+80], r11
  75610. mov QWORD PTR [rcx+88], r12
  75611. mov rax, QWORD PTR [rdx+96]
  75612. mov r10, QWORD PTR [rdx+104]
  75613. mov r11, QWORD PTR [rdx+112]
  75614. mov r12, QWORD PTR [rdx+120]
  75615. sbb rax, QWORD PTR [r8+96]
  75616. sbb r10, QWORD PTR [r8+104]
  75617. sbb r11, QWORD PTR [r8+112]
  75618. sbb r12, QWORD PTR [r8+120]
  75619. mov QWORD PTR [rcx+96], rax
  75620. mov QWORD PTR [rcx+104], r10
  75621. mov QWORD PTR [rcx+112], r11
  75622. mov QWORD PTR [rcx+120], r12
  75623. sbb r13, 0
  75624. mov r11, QWORD PTR [r9]
  75625. mov r12, QWORD PTR [r9+8]
  75626. and r11, r13
  75627. and r12, r13
  75628. mov QWORD PTR [rsp], r11
  75629. mov QWORD PTR [rsp+8], r12
  75630. mov r11, QWORD PTR [r9+16]
  75631. mov r12, QWORD PTR [r9+24]
  75632. and r11, r13
  75633. and r12, r13
  75634. mov QWORD PTR [rsp+16], r11
  75635. mov QWORD PTR [rsp+24], r12
  75636. mov r11, QWORD PTR [r9+32]
  75637. mov r12, QWORD PTR [r9+40]
  75638. and r11, r13
  75639. and r12, r13
  75640. mov QWORD PTR [rsp+32], r11
  75641. mov QWORD PTR [rsp+40], r12
  75642. mov r11, QWORD PTR [r9+48]
  75643. mov r12, QWORD PTR [r9+56]
  75644. and r11, r13
  75645. and r12, r13
  75646. mov QWORD PTR [rsp+48], r11
  75647. mov QWORD PTR [rsp+56], r12
  75648. mov r11, QWORD PTR [r9+64]
  75649. mov r12, QWORD PTR [r9+72]
  75650. and r11, r13
  75651. and r12, r13
  75652. mov QWORD PTR [rsp+64], r11
  75653. mov QWORD PTR [rsp+72], r12
  75654. mov r11, QWORD PTR [r9+80]
  75655. mov r12, QWORD PTR [r9+88]
  75656. and r11, r13
  75657. and r12, r13
  75658. mov QWORD PTR [rsp+80], r11
  75659. mov QWORD PTR [rsp+88], r12
  75660. mov r11, QWORD PTR [r9+96]
  75661. mov r12, QWORD PTR [r9+104]
  75662. and r11, r13
  75663. and r12, r13
  75664. mov QWORD PTR [rsp+96], r11
  75665. mov QWORD PTR [rsp+104], r12
  75666. mov r11, QWORD PTR [r9+112]
  75667. mov r12, QWORD PTR [r9+120]
  75668. and r11, r13
  75669. and r12, r13
  75670. mov QWORD PTR [rsp+112], r11
  75671. mov QWORD PTR [rsp+120], r12
  75672. mov rax, QWORD PTR [rcx]
  75673. mov r10, QWORD PTR [rcx+8]
  75674. add rax, QWORD PTR [rsp]
  75675. adc r10, QWORD PTR [rsp+8]
  75676. mov QWORD PTR [rcx], rax
  75677. mov QWORD PTR [rcx+8], r10
  75678. mov rax, QWORD PTR [rcx+16]
  75679. mov r10, QWORD PTR [rcx+24]
  75680. adc rax, QWORD PTR [rsp+16]
  75681. adc r10, QWORD PTR [rsp+24]
  75682. mov QWORD PTR [rcx+16], rax
  75683. mov QWORD PTR [rcx+24], r10
  75684. mov rax, QWORD PTR [rcx+32]
  75685. mov r10, QWORD PTR [rcx+40]
  75686. adc rax, QWORD PTR [rsp+32]
  75687. adc r10, QWORD PTR [rsp+40]
  75688. mov QWORD PTR [rcx+32], rax
  75689. mov QWORD PTR [rcx+40], r10
  75690. mov rax, QWORD PTR [rcx+48]
  75691. mov r10, QWORD PTR [rcx+56]
  75692. adc rax, QWORD PTR [rsp+48]
  75693. adc r10, QWORD PTR [rsp+56]
  75694. mov QWORD PTR [rcx+48], rax
  75695. mov QWORD PTR [rcx+56], r10
  75696. mov rax, QWORD PTR [rcx+64]
  75697. mov r10, QWORD PTR [rcx+72]
  75698. adc rax, QWORD PTR [rsp+64]
  75699. adc r10, QWORD PTR [rsp+72]
  75700. mov QWORD PTR [rcx+64], rax
  75701. mov QWORD PTR [rcx+72], r10
  75702. mov rax, QWORD PTR [rcx+80]
  75703. mov r10, QWORD PTR [rcx+88]
  75704. adc rax, QWORD PTR [rsp+80]
  75705. adc r10, QWORD PTR [rsp+88]
  75706. mov QWORD PTR [rcx+80], rax
  75707. mov QWORD PTR [rcx+88], r10
  75708. mov rax, QWORD PTR [rcx+96]
  75709. mov r10, QWORD PTR [rcx+104]
  75710. adc rax, QWORD PTR [rsp+96]
  75711. adc r10, QWORD PTR [rsp+104]
  75712. mov QWORD PTR [rcx+96], rax
  75713. mov QWORD PTR [rcx+104], r10
  75714. mov rax, QWORD PTR [rcx+112]
  75715. mov r10, QWORD PTR [rcx+120]
  75716. adc rax, QWORD PTR [rsp+112]
  75717. adc r10, QWORD PTR [rsp+120]
  75718. mov QWORD PTR [rcx+112], rax
  75719. mov QWORD PTR [rcx+120], r10
  75720. add rsp, 128
  75721. pop r13
  75722. pop r12
  75723. ret
  75724. sp_1024_mont_sub_16 ENDP
  75725. _text ENDS
  75726. ; /* Divide the number by 2 mod the modulus (prime). (r = a / 2 % m)
  75727. ; *
  75728. ; * r Result of division by 2.
  75729. ; * a Number to divide.
  75730. ; * m Modulus (prime).
  75731. ; */
  75732. _text SEGMENT READONLY PARA
  75733. sp_1024_div2_16 PROC
  75734. push r12
  75735. push r13
  75736. sub rsp, 128
  75737. mov r13, QWORD PTR [rdx]
  75738. xor r12, r12
  75739. mov rax, r13
  75740. and r13, 1
  75741. neg r13
  75742. mov r10, QWORD PTR [r8]
  75743. and r10, r13
  75744. mov QWORD PTR [rsp], r10
  75745. mov r10, QWORD PTR [r8+8]
  75746. and r10, r13
  75747. mov QWORD PTR [rsp+8], r10
  75748. mov r10, QWORD PTR [r8+16]
  75749. and r10, r13
  75750. mov QWORD PTR [rsp+16], r10
  75751. mov r10, QWORD PTR [r8+24]
  75752. and r10, r13
  75753. mov QWORD PTR [rsp+24], r10
  75754. mov r10, QWORD PTR [r8+32]
  75755. and r10, r13
  75756. mov QWORD PTR [rsp+32], r10
  75757. mov r10, QWORD PTR [r8+40]
  75758. and r10, r13
  75759. mov QWORD PTR [rsp+40], r10
  75760. mov r10, QWORD PTR [r8+48]
  75761. and r10, r13
  75762. mov QWORD PTR [rsp+48], r10
  75763. mov r10, QWORD PTR [r8+56]
  75764. and r10, r13
  75765. mov QWORD PTR [rsp+56], r10
  75766. mov r10, QWORD PTR [r8+64]
  75767. and r10, r13
  75768. mov QWORD PTR [rsp+64], r10
  75769. mov r10, QWORD PTR [r8+72]
  75770. and r10, r13
  75771. mov QWORD PTR [rsp+72], r10
  75772. mov r10, QWORD PTR [r8+80]
  75773. and r10, r13
  75774. mov QWORD PTR [rsp+80], r10
  75775. mov r10, QWORD PTR [r8+88]
  75776. and r10, r13
  75777. mov QWORD PTR [rsp+88], r10
  75778. mov r10, QWORD PTR [r8+96]
  75779. and r10, r13
  75780. mov QWORD PTR [rsp+96], r10
  75781. mov r10, QWORD PTR [r8+104]
  75782. and r10, r13
  75783. mov QWORD PTR [rsp+104], r10
  75784. mov r10, QWORD PTR [r8+112]
  75785. and r10, r13
  75786. mov QWORD PTR [rsp+112], r10
  75787. mov r10, QWORD PTR [r8+120]
  75788. and r10, r13
  75789. mov QWORD PTR [rsp+120], r10
  75790. add QWORD PTR [rsp], rax
  75791. mov rax, QWORD PTR [rdx+8]
  75792. adc QWORD PTR [rsp+8], rax
  75793. mov rax, QWORD PTR [rdx+16]
  75794. adc QWORD PTR [rsp+16], rax
  75795. mov rax, QWORD PTR [rdx+24]
  75796. adc QWORD PTR [rsp+24], rax
  75797. mov rax, QWORD PTR [rdx+32]
  75798. adc QWORD PTR [rsp+32], rax
  75799. mov rax, QWORD PTR [rdx+40]
  75800. adc QWORD PTR [rsp+40], rax
  75801. mov rax, QWORD PTR [rdx+48]
  75802. adc QWORD PTR [rsp+48], rax
  75803. mov rax, QWORD PTR [rdx+56]
  75804. adc QWORD PTR [rsp+56], rax
  75805. mov rax, QWORD PTR [rdx+64]
  75806. adc QWORD PTR [rsp+64], rax
  75807. mov rax, QWORD PTR [rdx+72]
  75808. adc QWORD PTR [rsp+72], rax
  75809. mov rax, QWORD PTR [rdx+80]
  75810. adc QWORD PTR [rsp+80], rax
  75811. mov rax, QWORD PTR [rdx+88]
  75812. adc QWORD PTR [rsp+88], rax
  75813. mov rax, QWORD PTR [rdx+96]
  75814. adc QWORD PTR [rsp+96], rax
  75815. mov rax, QWORD PTR [rdx+104]
  75816. adc QWORD PTR [rsp+104], rax
  75817. mov rax, QWORD PTR [rdx+112]
  75818. adc QWORD PTR [rsp+112], rax
  75819. mov rax, QWORD PTR [rdx+120]
  75820. adc QWORD PTR [rsp+120], rax
  75821. adc r12, 0
  75822. mov rax, QWORD PTR [rsp]
  75823. mov r9, QWORD PTR [rsp+8]
  75824. shrd rax, r9, 1
  75825. mov QWORD PTR [rcx], rax
  75826. mov rax, QWORD PTR [rsp+16]
  75827. shrd r9, rax, 1
  75828. mov QWORD PTR [rcx+8], r9
  75829. mov r9, QWORD PTR [rsp+24]
  75830. shrd rax, r9, 1
  75831. mov QWORD PTR [rcx+16], rax
  75832. mov rax, QWORD PTR [rsp+32]
  75833. shrd r9, rax, 1
  75834. mov QWORD PTR [rcx+24], r9
  75835. mov r9, QWORD PTR [rsp+40]
  75836. shrd rax, r9, 1
  75837. mov QWORD PTR [rcx+32], rax
  75838. mov rax, QWORD PTR [rsp+48]
  75839. shrd r9, rax, 1
  75840. mov QWORD PTR [rcx+40], r9
  75841. mov r9, QWORD PTR [rsp+56]
  75842. shrd rax, r9, 1
  75843. mov QWORD PTR [rcx+48], rax
  75844. mov rax, QWORD PTR [rsp+64]
  75845. shrd r9, rax, 1
  75846. mov QWORD PTR [rcx+56], r9
  75847. mov r9, QWORD PTR [rsp+72]
  75848. shrd rax, r9, 1
  75849. mov QWORD PTR [rcx+64], rax
  75850. mov rax, QWORD PTR [rsp+80]
  75851. shrd r9, rax, 1
  75852. mov QWORD PTR [rcx+72], r9
  75853. mov r9, QWORD PTR [rsp+88]
  75854. shrd rax, r9, 1
  75855. mov QWORD PTR [rcx+80], rax
  75856. mov rax, QWORD PTR [rsp+96]
  75857. shrd r9, rax, 1
  75858. mov QWORD PTR [rcx+88], r9
  75859. mov r9, QWORD PTR [rsp+104]
  75860. shrd rax, r9, 1
  75861. mov QWORD PTR [rcx+96], rax
  75862. mov rax, QWORD PTR [rsp+112]
  75863. shrd r9, rax, 1
  75864. mov QWORD PTR [rcx+104], r9
  75865. mov r9, QWORD PTR [rsp+120]
  75866. shrd rax, r9, 1
  75867. mov QWORD PTR [rcx+112], rax
  75868. shrd r9, r12, 1
  75869. mov QWORD PTR [rcx+120], r9
  75870. add rsp, 128
  75871. pop r13
  75872. pop r12
  75873. ret
  75874. sp_1024_div2_16 ENDP
  75875. _text ENDS
  75876. ; /* Sub b from a into r. (r = a - b)
  75877. ; *
  75878. ; * r A single precision integer.
  75879. ; * a A single precision integer.
  75880. ; * b A single precision integer.
  75881. ; */
  75882. _text SEGMENT READONLY PARA
  75883. sp_1024_sub_16 PROC
  75884. mov r9, QWORD PTR [rdx]
  75885. sub r9, QWORD PTR [r8]
  75886. mov r10, QWORD PTR [rdx+8]
  75887. mov QWORD PTR [rcx], r9
  75888. sbb r10, QWORD PTR [r8+8]
  75889. mov r9, QWORD PTR [rdx+16]
  75890. mov QWORD PTR [rcx+8], r10
  75891. sbb r9, QWORD PTR [r8+16]
  75892. mov r10, QWORD PTR [rdx+24]
  75893. mov QWORD PTR [rcx+16], r9
  75894. sbb r10, QWORD PTR [r8+24]
  75895. mov r9, QWORD PTR [rdx+32]
  75896. mov QWORD PTR [rcx+24], r10
  75897. sbb r9, QWORD PTR [r8+32]
  75898. mov r10, QWORD PTR [rdx+40]
  75899. mov QWORD PTR [rcx+32], r9
  75900. sbb r10, QWORD PTR [r8+40]
  75901. mov r9, QWORD PTR [rdx+48]
  75902. mov QWORD PTR [rcx+40], r10
  75903. sbb r9, QWORD PTR [r8+48]
  75904. mov r10, QWORD PTR [rdx+56]
  75905. mov QWORD PTR [rcx+48], r9
  75906. sbb r10, QWORD PTR [r8+56]
  75907. mov r9, QWORD PTR [rdx+64]
  75908. mov QWORD PTR [rcx+56], r10
  75909. sbb r9, QWORD PTR [r8+64]
  75910. mov r10, QWORD PTR [rdx+72]
  75911. mov QWORD PTR [rcx+64], r9
  75912. sbb r10, QWORD PTR [r8+72]
  75913. mov r9, QWORD PTR [rdx+80]
  75914. mov QWORD PTR [rcx+72], r10
  75915. sbb r9, QWORD PTR [r8+80]
  75916. mov r10, QWORD PTR [rdx+88]
  75917. mov QWORD PTR [rcx+80], r9
  75918. sbb r10, QWORD PTR [r8+88]
  75919. mov r9, QWORD PTR [rdx+96]
  75920. mov QWORD PTR [rcx+88], r10
  75921. sbb r9, QWORD PTR [r8+96]
  75922. mov r10, QWORD PTR [rdx+104]
  75923. mov QWORD PTR [rcx+96], r9
  75924. sbb r10, QWORD PTR [r8+104]
  75925. mov r9, QWORD PTR [rdx+112]
  75926. mov QWORD PTR [rcx+104], r10
  75927. sbb r9, QWORD PTR [r8+112]
  75928. mov r10, QWORD PTR [rdx+120]
  75929. mov QWORD PTR [rcx+112], r9
  75930. sbb r10, QWORD PTR [r8+120]
  75931. mov QWORD PTR [rcx+120], r10
  75932. sbb rax, rax
  75933. ret
  75934. sp_1024_sub_16 ENDP
  75935. _text ENDS
  75936. IFDEF HAVE_INTEL_AVX2
  75937. ; /* Reduce the number back to 1024 bits using Montgomery reduction.
  75938. ; *
  75939. ; * a A single precision number to reduce in place.
  75940. ; * m The single precision number representing the modulus.
  75941. ; * mp The digit representing the negative inverse of m mod 2^n.
  75942. ; */
  75943. _text SEGMENT READONLY PARA
  75944. sp_1024_mont_reduce_avx2_16 PROC
  75945. push r12
  75946. push r13
  75947. push r14
  75948. push r15
  75949. push rdi
  75950. push rsi
  75951. push rbx
  75952. push rbp
  75953. mov r9, rcx
  75954. mov r10, rdx
  75955. xor rbp, rbp
  75956. ; i = 16
  75957. mov r11, 16
  75958. mov r14, QWORD PTR [r9]
  75959. mov r15, QWORD PTR [r9+8]
  75960. mov rdi, QWORD PTR [r9+16]
  75961. mov rsi, QWORD PTR [r9+24]
  75962. add r9, 64
  75963. xor rbp, rbp
  75964. L_1024_mont_reduce_avx2_16_loop:
  75965. ; mu = a[i] * mp
  75966. mov rdx, r14
  75967. mov r12, r14
  75968. imul rdx, r8
  75969. xor rbx, rbx
  75970. ; a[i+0] += m[0] * mu
  75971. mulx rcx, rax, QWORD PTR [r10]
  75972. mov r14, r15
  75973. adcx r12, rax
  75974. adox r14, rcx
  75975. ; a[i+1] += m[1] * mu
  75976. mulx rcx, rax, QWORD PTR [r10+8]
  75977. mov r15, rdi
  75978. adcx r14, rax
  75979. adox r15, rcx
  75980. ; a[i+2] += m[2] * mu
  75981. mulx rcx, rax, QWORD PTR [r10+16]
  75982. mov rdi, rsi
  75983. adcx r15, rax
  75984. adox rdi, rcx
  75985. ; a[i+3] += m[3] * mu
  75986. mulx rcx, rax, QWORD PTR [r10+24]
  75987. mov rsi, QWORD PTR [r9+-32]
  75988. adcx rdi, rax
  75989. adox rsi, rcx
  75990. ; a[i+4] += m[4] * mu
  75991. mulx rcx, rax, QWORD PTR [r10+32]
  75992. mov r13, QWORD PTR [r9+-24]
  75993. adcx rsi, rax
  75994. adox r13, rcx
  75995. ; a[i+5] += m[5] * mu
  75996. mulx rcx, rax, QWORD PTR [r10+40]
  75997. mov r12, QWORD PTR [r9+-16]
  75998. adcx r13, rax
  75999. adox r12, rcx
  76000. mov QWORD PTR [r9+-24], r13
  76001. ; a[i+6] += m[6] * mu
  76002. mulx rcx, rax, QWORD PTR [r10+48]
  76003. mov r13, QWORD PTR [r9+-8]
  76004. adcx r12, rax
  76005. adox r13, rcx
  76006. mov QWORD PTR [r9+-16], r12
  76007. ; a[i+7] += m[7] * mu
  76008. mulx rcx, rax, QWORD PTR [r10+56]
  76009. mov r12, QWORD PTR [r9]
  76010. adcx r13, rax
  76011. adox r12, rcx
  76012. mov QWORD PTR [r9+-8], r13
  76013. ; a[i+8] += m[8] * mu
  76014. mulx rcx, rax, QWORD PTR [r10+64]
  76015. mov r13, QWORD PTR [r9+8]
  76016. adcx r12, rax
  76017. adox r13, rcx
  76018. mov QWORD PTR [r9], r12
  76019. ; a[i+9] += m[9] * mu
  76020. mulx rcx, rax, QWORD PTR [r10+72]
  76021. mov r12, QWORD PTR [r9+16]
  76022. adcx r13, rax
  76023. adox r12, rcx
  76024. mov QWORD PTR [r9+8], r13
  76025. ; a[i+10] += m[10] * mu
  76026. mulx rcx, rax, QWORD PTR [r10+80]
  76027. mov r13, QWORD PTR [r9+24]
  76028. adcx r12, rax
  76029. adox r13, rcx
  76030. mov QWORD PTR [r9+16], r12
  76031. ; a[i+11] += m[11] * mu
  76032. mulx rcx, rax, QWORD PTR [r10+88]
  76033. mov r12, QWORD PTR [r9+32]
  76034. adcx r13, rax
  76035. adox r12, rcx
  76036. mov QWORD PTR [r9+24], r13
  76037. ; a[i+12] += m[12] * mu
  76038. mulx rcx, rax, QWORD PTR [r10+96]
  76039. mov r13, QWORD PTR [r9+40]
  76040. adcx r12, rax
  76041. adox r13, rcx
  76042. mov QWORD PTR [r9+32], r12
  76043. ; a[i+13] += m[13] * mu
  76044. mulx rcx, rax, QWORD PTR [r10+104]
  76045. mov r12, QWORD PTR [r9+48]
  76046. adcx r13, rax
  76047. adox r12, rcx
  76048. mov QWORD PTR [r9+40], r13
  76049. ; a[i+14] += m[14] * mu
  76050. mulx rcx, rax, QWORD PTR [r10+112]
  76051. mov r13, QWORD PTR [r9+56]
  76052. adcx r12, rax
  76053. adox r13, rcx
  76054. mov QWORD PTR [r9+48], r12
  76055. ; a[i+15] += m[15] * mu
  76056. mulx rcx, rax, QWORD PTR [r10+120]
  76057. mov r12, QWORD PTR [r9+64]
  76058. adcx r13, rax
  76059. adox r12, rcx
  76060. mov QWORD PTR [r9+56], r13
  76061. adcx r12, rbp
  76062. mov rbp, rbx
  76063. mov QWORD PTR [r9+64], r12
  76064. adox rbp, rbx
  76065. adcx rbp, rbx
  76066. ; mu = a[i] * mp
  76067. mov rdx, r14
  76068. mov r12, r14
  76069. imul rdx, r8
  76070. xor rbx, rbx
  76071. ; a[i+0] += m[0] * mu
  76072. mulx rcx, rax, QWORD PTR [r10]
  76073. mov r14, r15
  76074. adcx r12, rax
  76075. adox r14, rcx
  76076. ; a[i+1] += m[1] * mu
  76077. mulx rcx, rax, QWORD PTR [r10+8]
  76078. mov r15, rdi
  76079. adcx r14, rax
  76080. adox r15, rcx
  76081. ; a[i+2] += m[2] * mu
  76082. mulx rcx, rax, QWORD PTR [r10+16]
  76083. mov rdi, rsi
  76084. adcx r15, rax
  76085. adox rdi, rcx
  76086. ; a[i+3] += m[3] * mu
  76087. mulx rcx, rax, QWORD PTR [r10+24]
  76088. mov rsi, QWORD PTR [r9+-24]
  76089. adcx rdi, rax
  76090. adox rsi, rcx
  76091. ; a[i+4] += m[4] * mu
  76092. mulx rcx, rax, QWORD PTR [r10+32]
  76093. mov r13, QWORD PTR [r9+-16]
  76094. adcx rsi, rax
  76095. adox r13, rcx
  76096. ; a[i+5] += m[5] * mu
  76097. mulx rcx, rax, QWORD PTR [r10+40]
  76098. mov r12, QWORD PTR [r9+-8]
  76099. adcx r13, rax
  76100. adox r12, rcx
  76101. mov QWORD PTR [r9+-16], r13
  76102. ; a[i+6] += m[6] * mu
  76103. mulx rcx, rax, QWORD PTR [r10+48]
  76104. mov r13, QWORD PTR [r9]
  76105. adcx r12, rax
  76106. adox r13, rcx
  76107. mov QWORD PTR [r9+-8], r12
  76108. ; a[i+7] += m[7] * mu
  76109. mulx rcx, rax, QWORD PTR [r10+56]
  76110. mov r12, QWORD PTR [r9+8]
  76111. adcx r13, rax
  76112. adox r12, rcx
  76113. mov QWORD PTR [r9], r13
  76114. ; a[i+8] += m[8] * mu
  76115. mulx rcx, rax, QWORD PTR [r10+64]
  76116. mov r13, QWORD PTR [r9+16]
  76117. adcx r12, rax
  76118. adox r13, rcx
  76119. mov QWORD PTR [r9+8], r12
  76120. ; a[i+9] += m[9] * mu
  76121. mulx rcx, rax, QWORD PTR [r10+72]
  76122. mov r12, QWORD PTR [r9+24]
  76123. adcx r13, rax
  76124. adox r12, rcx
  76125. mov QWORD PTR [r9+16], r13
  76126. ; a[i+10] += m[10] * mu
  76127. mulx rcx, rax, QWORD PTR [r10+80]
  76128. mov r13, QWORD PTR [r9+32]
  76129. adcx r12, rax
  76130. adox r13, rcx
  76131. mov QWORD PTR [r9+24], r12
  76132. ; a[i+11] += m[11] * mu
  76133. mulx rcx, rax, QWORD PTR [r10+88]
  76134. mov r12, QWORD PTR [r9+40]
  76135. adcx r13, rax
  76136. adox r12, rcx
  76137. mov QWORD PTR [r9+32], r13
  76138. ; a[i+12] += m[12] * mu
  76139. mulx rcx, rax, QWORD PTR [r10+96]
  76140. mov r13, QWORD PTR [r9+48]
  76141. adcx r12, rax
  76142. adox r13, rcx
  76143. mov QWORD PTR [r9+40], r12
  76144. ; a[i+13] += m[13] * mu
  76145. mulx rcx, rax, QWORD PTR [r10+104]
  76146. mov r12, QWORD PTR [r9+56]
  76147. adcx r13, rax
  76148. adox r12, rcx
  76149. mov QWORD PTR [r9+48], r13
  76150. ; a[i+14] += m[14] * mu
  76151. mulx rcx, rax, QWORD PTR [r10+112]
  76152. mov r13, QWORD PTR [r9+64]
  76153. adcx r12, rax
  76154. adox r13, rcx
  76155. mov QWORD PTR [r9+56], r12
  76156. ; a[i+15] += m[15] * mu
  76157. mulx rcx, rax, QWORD PTR [r10+120]
  76158. mov r12, QWORD PTR [r9+72]
  76159. adcx r13, rax
  76160. adox r12, rcx
  76161. mov QWORD PTR [r9+64], r13
  76162. adcx r12, rbp
  76163. mov rbp, rbx
  76164. mov QWORD PTR [r9+72], r12
  76165. adox rbp, rbx
  76166. adcx rbp, rbx
  76167. ; a += 2
  76168. add r9, 16
  76169. ; i -= 2
  76170. sub r11, 2
  76171. jnz L_1024_mont_reduce_avx2_16_loop
  76172. sub r9, 64
  76173. sub r12, QWORD PTR [r10+120]
  76174. mov r8, r9
  76175. sbb r12, r12
  76176. neg rbp
  76177. not r12
  76178. or rbp, r12
  76179. sub r9, 128
  76180. mov rcx, QWORD PTR [r10]
  76181. mov rdx, r14
  76182. pext rcx, rcx, rbp
  76183. sub rdx, rcx
  76184. mov rcx, QWORD PTR [r10+8]
  76185. mov rax, r15
  76186. pext rcx, rcx, rbp
  76187. mov QWORD PTR [r9], rdx
  76188. sbb rax, rcx
  76189. mov rdx, QWORD PTR [r10+16]
  76190. mov rcx, rdi
  76191. pext rdx, rdx, rbp
  76192. mov QWORD PTR [r9+8], rax
  76193. sbb rcx, rdx
  76194. mov rax, QWORD PTR [r10+24]
  76195. mov rdx, rsi
  76196. pext rax, rax, rbp
  76197. mov QWORD PTR [r9+16], rcx
  76198. sbb rdx, rax
  76199. mov rcx, QWORD PTR [r10+32]
  76200. mov rax, QWORD PTR [r8+32]
  76201. pext rcx, rcx, rbp
  76202. mov QWORD PTR [r9+24], rdx
  76203. sbb rax, rcx
  76204. mov rdx, QWORD PTR [r10+40]
  76205. mov rcx, QWORD PTR [r8+40]
  76206. pext rdx, rdx, rbp
  76207. mov QWORD PTR [r9+32], rax
  76208. sbb rcx, rdx
  76209. mov rax, QWORD PTR [r10+48]
  76210. mov rdx, QWORD PTR [r8+48]
  76211. pext rax, rax, rbp
  76212. mov QWORD PTR [r9+40], rcx
  76213. sbb rdx, rax
  76214. mov rcx, QWORD PTR [r10+56]
  76215. mov rax, QWORD PTR [r8+56]
  76216. pext rcx, rcx, rbp
  76217. mov QWORD PTR [r9+48], rdx
  76218. sbb rax, rcx
  76219. mov rdx, QWORD PTR [r10+64]
  76220. mov rcx, QWORD PTR [r8+64]
  76221. pext rdx, rdx, rbp
  76222. mov QWORD PTR [r9+56], rax
  76223. sbb rcx, rdx
  76224. mov rax, QWORD PTR [r10+72]
  76225. mov rdx, QWORD PTR [r8+72]
  76226. pext rax, rax, rbp
  76227. mov QWORD PTR [r9+64], rcx
  76228. sbb rdx, rax
  76229. mov rcx, QWORD PTR [r10+80]
  76230. mov rax, QWORD PTR [r8+80]
  76231. pext rcx, rcx, rbp
  76232. mov QWORD PTR [r9+72], rdx
  76233. sbb rax, rcx
  76234. mov rdx, QWORD PTR [r10+88]
  76235. mov rcx, QWORD PTR [r8+88]
  76236. pext rdx, rdx, rbp
  76237. mov QWORD PTR [r9+80], rax
  76238. sbb rcx, rdx
  76239. mov rax, QWORD PTR [r10+96]
  76240. mov rdx, QWORD PTR [r8+96]
  76241. pext rax, rax, rbp
  76242. mov QWORD PTR [r9+88], rcx
  76243. sbb rdx, rax
  76244. mov rcx, QWORD PTR [r10+104]
  76245. mov rax, QWORD PTR [r8+104]
  76246. pext rcx, rcx, rbp
  76247. mov QWORD PTR [r9+96], rdx
  76248. sbb rax, rcx
  76249. mov rdx, QWORD PTR [r10+112]
  76250. mov rcx, QWORD PTR [r8+112]
  76251. pext rdx, rdx, rbp
  76252. mov QWORD PTR [r9+104], rax
  76253. sbb rcx, rdx
  76254. mov rax, QWORD PTR [r10+120]
  76255. mov rdx, QWORD PTR [r8+120]
  76256. pext rax, rax, rbp
  76257. mov QWORD PTR [r9+112], rcx
  76258. sbb rdx, rax
  76259. mov QWORD PTR [r9+120], rdx
  76260. pop rbp
  76261. pop rbx
  76262. pop rsi
  76263. pop rdi
  76264. pop r15
  76265. pop r14
  76266. pop r13
  76267. pop r12
  76268. ret
  76269. sp_1024_mont_reduce_avx2_16 ENDP
  76270. _text ENDS
  76271. ENDIF
  76272. IFDEF HAVE_INTEL_AVX2
  76273. ; /* Add two Montgomery form numbers (r = a + b % m).
  76274. ; *
  76275. ; * r Result of addition.
  76276. ; * a First number to add in Montgomery form.
  76277. ; * b Second number to add in Montgomery form.
  76278. ; * m Modulus (prime).
  76279. ; */
  76280. _text SEGMENT READONLY PARA
  76281. sp_1024_mont_add_avx2_16 PROC
  76282. push r12
  76283. push r13
  76284. mov rax, QWORD PTR [rdx]
  76285. mov r10, QWORD PTR [rdx+8]
  76286. mov r11, QWORD PTR [rdx+16]
  76287. mov r12, QWORD PTR [rdx+24]
  76288. add rax, QWORD PTR [r8]
  76289. mov r13, 0
  76290. adc r10, QWORD PTR [r8+8]
  76291. adc r11, QWORD PTR [r8+16]
  76292. adc r12, QWORD PTR [r8+24]
  76293. mov QWORD PTR [rcx], rax
  76294. mov QWORD PTR [rcx+8], r10
  76295. mov QWORD PTR [rcx+16], r11
  76296. mov QWORD PTR [rcx+24], r12
  76297. mov rax, QWORD PTR [rdx+32]
  76298. mov r10, QWORD PTR [rdx+40]
  76299. mov r11, QWORD PTR [rdx+48]
  76300. mov r12, QWORD PTR [rdx+56]
  76301. adc rax, QWORD PTR [r8+32]
  76302. adc r10, QWORD PTR [r8+40]
  76303. adc r11, QWORD PTR [r8+48]
  76304. adc r12, QWORD PTR [r8+56]
  76305. mov QWORD PTR [rcx+32], rax
  76306. mov QWORD PTR [rcx+40], r10
  76307. mov QWORD PTR [rcx+48], r11
  76308. mov QWORD PTR [rcx+56], r12
  76309. mov rax, QWORD PTR [rdx+64]
  76310. mov r10, QWORD PTR [rdx+72]
  76311. mov r11, QWORD PTR [rdx+80]
  76312. mov r12, QWORD PTR [rdx+88]
  76313. adc rax, QWORD PTR [r8+64]
  76314. adc r10, QWORD PTR [r8+72]
  76315. adc r11, QWORD PTR [r8+80]
  76316. adc r12, QWORD PTR [r8+88]
  76317. mov QWORD PTR [rcx+64], rax
  76318. mov QWORD PTR [rcx+72], r10
  76319. mov QWORD PTR [rcx+80], r11
  76320. mov QWORD PTR [rcx+88], r12
  76321. mov rax, QWORD PTR [rdx+96]
  76322. mov r10, QWORD PTR [rdx+104]
  76323. mov r11, QWORD PTR [rdx+112]
  76324. mov r12, QWORD PTR [rdx+120]
  76325. adc rax, QWORD PTR [r8+96]
  76326. adc r10, QWORD PTR [r8+104]
  76327. adc r11, QWORD PTR [r8+112]
  76328. adc r12, QWORD PTR [r8+120]
  76329. mov QWORD PTR [rcx+96], rax
  76330. mov QWORD PTR [rcx+104], r10
  76331. mov QWORD PTR [rcx+112], r11
  76332. mov QWORD PTR [rcx+120], r12
  76333. sbb r13, 0
  76334. sub r12, QWORD PTR [r9+120]
  76335. sbb r12, r12
  76336. not r12
  76337. or r13, r12
  76338. mov r11, QWORD PTR [r9]
  76339. mov r12, QWORD PTR [r9+8]
  76340. mov rax, QWORD PTR [rcx]
  76341. mov r10, QWORD PTR [rcx+8]
  76342. pext r11, r11, r13
  76343. pext r12, r12, r13
  76344. sub rax, r11
  76345. sbb r10, r12
  76346. mov QWORD PTR [rcx], rax
  76347. mov QWORD PTR [rcx+8], r10
  76348. mov r11, QWORD PTR [r9+16]
  76349. mov r12, QWORD PTR [r9+24]
  76350. mov rax, QWORD PTR [rcx+16]
  76351. mov r10, QWORD PTR [rcx+24]
  76352. pext r11, r11, r13
  76353. pext r12, r12, r13
  76354. sbb rax, r11
  76355. sbb r10, r12
  76356. mov QWORD PTR [rcx+16], rax
  76357. mov QWORD PTR [rcx+24], r10
  76358. mov r11, QWORD PTR [r9+32]
  76359. mov r12, QWORD PTR [r9+40]
  76360. mov rax, QWORD PTR [rcx+32]
  76361. mov r10, QWORD PTR [rcx+40]
  76362. pext r11, r11, r13
  76363. pext r12, r12, r13
  76364. sbb rax, r11
  76365. sbb r10, r12
  76366. mov QWORD PTR [rcx+32], rax
  76367. mov QWORD PTR [rcx+40], r10
  76368. mov r11, QWORD PTR [r9+48]
  76369. mov r12, QWORD PTR [r9+56]
  76370. mov rax, QWORD PTR [rcx+48]
  76371. mov r10, QWORD PTR [rcx+56]
  76372. pext r11, r11, r13
  76373. pext r12, r12, r13
  76374. sbb rax, r11
  76375. sbb r10, r12
  76376. mov QWORD PTR [rcx+48], rax
  76377. mov QWORD PTR [rcx+56], r10
  76378. mov r11, QWORD PTR [r9+64]
  76379. mov r12, QWORD PTR [r9+72]
  76380. mov rax, QWORD PTR [rcx+64]
  76381. mov r10, QWORD PTR [rcx+72]
  76382. pext r11, r11, r13
  76383. pext r12, r12, r13
  76384. sbb rax, r11
  76385. sbb r10, r12
  76386. mov QWORD PTR [rcx+64], rax
  76387. mov QWORD PTR [rcx+72], r10
  76388. mov r11, QWORD PTR [r9+80]
  76389. mov r12, QWORD PTR [r9+88]
  76390. mov rax, QWORD PTR [rcx+80]
  76391. mov r10, QWORD PTR [rcx+88]
  76392. pext r11, r11, r13
  76393. pext r12, r12, r13
  76394. sbb rax, r11
  76395. sbb r10, r12
  76396. mov QWORD PTR [rcx+80], rax
  76397. mov QWORD PTR [rcx+88], r10
  76398. mov r11, QWORD PTR [r9+96]
  76399. mov r12, QWORD PTR [r9+104]
  76400. mov rax, QWORD PTR [rcx+96]
  76401. mov r10, QWORD PTR [rcx+104]
  76402. pext r11, r11, r13
  76403. pext r12, r12, r13
  76404. sbb rax, r11
  76405. sbb r10, r12
  76406. mov QWORD PTR [rcx+96], rax
  76407. mov QWORD PTR [rcx+104], r10
  76408. mov r11, QWORD PTR [r9+112]
  76409. mov r12, QWORD PTR [r9+120]
  76410. mov rax, QWORD PTR [rcx+112]
  76411. mov r10, QWORD PTR [rcx+120]
  76412. pext r11, r11, r13
  76413. pext r12, r12, r13
  76414. sbb rax, r11
  76415. sbb r10, r12
  76416. mov QWORD PTR [rcx+112], rax
  76417. mov QWORD PTR [rcx+120], r10
  76418. pop r13
  76419. pop r12
  76420. ret
  76421. sp_1024_mont_add_avx2_16 ENDP
  76422. _text ENDS
  76423. ENDIF
  76424. IFDEF HAVE_INTEL_AVX2
  76425. ; /* Double a Montgomery form number (r = a + a % m).
  76426. ; *
  76427. ; * r Result of addition.
  76428. ; * a Number to souble in Montgomery form.
  76429. ; * m Modulus (prime).
  76430. ; */
  76431. _text SEGMENT READONLY PARA
  76432. sp_1024_mont_dbl_avx2_16 PROC
  76433. push r12
  76434. mov rax, QWORD PTR [rdx]
  76435. mov r9, QWORD PTR [rdx+8]
  76436. mov r10, QWORD PTR [rdx+16]
  76437. mov r11, QWORD PTR [rdx+24]
  76438. add rax, QWORD PTR [rdx]
  76439. mov r12, 0
  76440. adc r9, QWORD PTR [rdx+8]
  76441. adc r10, QWORD PTR [rdx+16]
  76442. adc r11, QWORD PTR [rdx+24]
  76443. mov QWORD PTR [rcx], rax
  76444. mov QWORD PTR [rcx+8], r9
  76445. mov QWORD PTR [rcx+16], r10
  76446. mov QWORD PTR [rcx+24], r11
  76447. mov rax, QWORD PTR [rdx+32]
  76448. mov r9, QWORD PTR [rdx+40]
  76449. mov r10, QWORD PTR [rdx+48]
  76450. mov r11, QWORD PTR [rdx+56]
  76451. adc rax, QWORD PTR [rdx+32]
  76452. adc r9, QWORD PTR [rdx+40]
  76453. adc r10, QWORD PTR [rdx+48]
  76454. adc r11, QWORD PTR [rdx+56]
  76455. mov QWORD PTR [rcx+32], rax
  76456. mov QWORD PTR [rcx+40], r9
  76457. mov QWORD PTR [rcx+48], r10
  76458. mov QWORD PTR [rcx+56], r11
  76459. mov rax, QWORD PTR [rdx+64]
  76460. mov r9, QWORD PTR [rdx+72]
  76461. mov r10, QWORD PTR [rdx+80]
  76462. mov r11, QWORD PTR [rdx+88]
  76463. adc rax, QWORD PTR [rdx+64]
  76464. adc r9, QWORD PTR [rdx+72]
  76465. adc r10, QWORD PTR [rdx+80]
  76466. adc r11, QWORD PTR [rdx+88]
  76467. mov QWORD PTR [rcx+64], rax
  76468. mov QWORD PTR [rcx+72], r9
  76469. mov QWORD PTR [rcx+80], r10
  76470. mov QWORD PTR [rcx+88], r11
  76471. mov rax, QWORD PTR [rdx+96]
  76472. mov r9, QWORD PTR [rdx+104]
  76473. mov r10, QWORD PTR [rdx+112]
  76474. mov r11, QWORD PTR [rdx+120]
  76475. adc rax, QWORD PTR [rdx+96]
  76476. adc r9, QWORD PTR [rdx+104]
  76477. adc r10, QWORD PTR [rdx+112]
  76478. adc r11, QWORD PTR [rdx+120]
  76479. mov QWORD PTR [rcx+96], rax
  76480. mov QWORD PTR [rcx+104], r9
  76481. mov QWORD PTR [rcx+112], r10
  76482. mov QWORD PTR [rcx+120], r11
  76483. sbb r12, 0
  76484. sub r11, QWORD PTR [r8+120]
  76485. sbb r11, r11
  76486. not r11
  76487. or r12, r11
  76488. mov r10, QWORD PTR [r8]
  76489. mov r11, QWORD PTR [r8+8]
  76490. mov rax, QWORD PTR [rcx]
  76491. mov r9, QWORD PTR [rcx+8]
  76492. pext r10, r10, r12
  76493. pext r11, r11, r12
  76494. sub rax, r10
  76495. sbb r9, r11
  76496. mov QWORD PTR [rcx], rax
  76497. mov QWORD PTR [rcx+8], r9
  76498. mov r10, QWORD PTR [r8+16]
  76499. mov r11, QWORD PTR [r8+24]
  76500. mov rax, QWORD PTR [rcx+16]
  76501. mov r9, QWORD PTR [rcx+24]
  76502. pext r10, r10, r12
  76503. pext r11, r11, r12
  76504. sbb rax, r10
  76505. sbb r9, r11
  76506. mov QWORD PTR [rcx+16], rax
  76507. mov QWORD PTR [rcx+24], r9
  76508. mov r10, QWORD PTR [r8+32]
  76509. mov r11, QWORD PTR [r8+40]
  76510. mov rax, QWORD PTR [rcx+32]
  76511. mov r9, QWORD PTR [rcx+40]
  76512. pext r10, r10, r12
  76513. pext r11, r11, r12
  76514. sbb rax, r10
  76515. sbb r9, r11
  76516. mov QWORD PTR [rcx+32], rax
  76517. mov QWORD PTR [rcx+40], r9
  76518. mov r10, QWORD PTR [r8+48]
  76519. mov r11, QWORD PTR [r8+56]
  76520. mov rax, QWORD PTR [rcx+48]
  76521. mov r9, QWORD PTR [rcx+56]
  76522. pext r10, r10, r12
  76523. pext r11, r11, r12
  76524. sbb rax, r10
  76525. sbb r9, r11
  76526. mov QWORD PTR [rcx+48], rax
  76527. mov QWORD PTR [rcx+56], r9
  76528. mov r10, QWORD PTR [r8+64]
  76529. mov r11, QWORD PTR [r8+72]
  76530. mov rax, QWORD PTR [rcx+64]
  76531. mov r9, QWORD PTR [rcx+72]
  76532. pext r10, r10, r12
  76533. pext r11, r11, r12
  76534. sbb rax, r10
  76535. sbb r9, r11
  76536. mov QWORD PTR [rcx+64], rax
  76537. mov QWORD PTR [rcx+72], r9
  76538. mov r10, QWORD PTR [r8+80]
  76539. mov r11, QWORD PTR [r8+88]
  76540. mov rax, QWORD PTR [rcx+80]
  76541. mov r9, QWORD PTR [rcx+88]
  76542. pext r10, r10, r12
  76543. pext r11, r11, r12
  76544. sbb rax, r10
  76545. sbb r9, r11
  76546. mov QWORD PTR [rcx+80], rax
  76547. mov QWORD PTR [rcx+88], r9
  76548. mov r10, QWORD PTR [r8+96]
  76549. mov r11, QWORD PTR [r8+104]
  76550. mov rax, QWORD PTR [rcx+96]
  76551. mov r9, QWORD PTR [rcx+104]
  76552. pext r10, r10, r12
  76553. pext r11, r11, r12
  76554. sbb rax, r10
  76555. sbb r9, r11
  76556. mov QWORD PTR [rcx+96], rax
  76557. mov QWORD PTR [rcx+104], r9
  76558. mov r10, QWORD PTR [r8+112]
  76559. mov r11, QWORD PTR [r8+120]
  76560. mov rax, QWORD PTR [rcx+112]
  76561. mov r9, QWORD PTR [rcx+120]
  76562. pext r10, r10, r12
  76563. pext r11, r11, r12
  76564. sbb rax, r10
  76565. sbb r9, r11
  76566. mov QWORD PTR [rcx+112], rax
  76567. mov QWORD PTR [rcx+120], r9
  76568. pop r12
  76569. ret
  76570. sp_1024_mont_dbl_avx2_16 ENDP
  76571. _text ENDS
  76572. ENDIF
  76573. IFDEF HAVE_INTEL_AVX2
  76574. ; /* Triple a Montgomery form number (r = a + a + a % m).
  76575. ; *
  76576. ; * r Result of addition.
  76577. ; * a Number to souble in Montgomery form.
  76578. ; * m Modulus (prime).
  76579. ; */
  76580. _text SEGMENT READONLY PARA
  76581. sp_1024_mont_tpl_avx2_16 PROC
  76582. push r12
  76583. mov rax, QWORD PTR [rdx]
  76584. mov r9, QWORD PTR [rdx+8]
  76585. mov r10, QWORD PTR [rdx+16]
  76586. mov r11, QWORD PTR [rdx+24]
  76587. add rax, QWORD PTR [rdx]
  76588. mov r12, 0
  76589. adc r9, QWORD PTR [rdx+8]
  76590. adc r10, QWORD PTR [rdx+16]
  76591. adc r11, QWORD PTR [rdx+24]
  76592. mov QWORD PTR [rcx], rax
  76593. mov QWORD PTR [rcx+8], r9
  76594. mov QWORD PTR [rcx+16], r10
  76595. mov QWORD PTR [rcx+24], r11
  76596. mov rax, QWORD PTR [rdx+32]
  76597. mov r9, QWORD PTR [rdx+40]
  76598. mov r10, QWORD PTR [rdx+48]
  76599. mov r11, QWORD PTR [rdx+56]
  76600. adc rax, QWORD PTR [rdx+32]
  76601. adc r9, QWORD PTR [rdx+40]
  76602. adc r10, QWORD PTR [rdx+48]
  76603. adc r11, QWORD PTR [rdx+56]
  76604. mov QWORD PTR [rcx+32], rax
  76605. mov QWORD PTR [rcx+40], r9
  76606. mov QWORD PTR [rcx+48], r10
  76607. mov QWORD PTR [rcx+56], r11
  76608. mov rax, QWORD PTR [rdx+64]
  76609. mov r9, QWORD PTR [rdx+72]
  76610. mov r10, QWORD PTR [rdx+80]
  76611. mov r11, QWORD PTR [rdx+88]
  76612. adc rax, QWORD PTR [rdx+64]
  76613. adc r9, QWORD PTR [rdx+72]
  76614. adc r10, QWORD PTR [rdx+80]
  76615. adc r11, QWORD PTR [rdx+88]
  76616. mov QWORD PTR [rcx+64], rax
  76617. mov QWORD PTR [rcx+72], r9
  76618. mov QWORD PTR [rcx+80], r10
  76619. mov QWORD PTR [rcx+88], r11
  76620. mov rax, QWORD PTR [rdx+96]
  76621. mov r9, QWORD PTR [rdx+104]
  76622. mov r10, QWORD PTR [rdx+112]
  76623. mov r11, QWORD PTR [rdx+120]
  76624. adc rax, QWORD PTR [rdx+96]
  76625. adc r9, QWORD PTR [rdx+104]
  76626. adc r10, QWORD PTR [rdx+112]
  76627. adc r11, QWORD PTR [rdx+120]
  76628. mov QWORD PTR [rcx+96], rax
  76629. mov QWORD PTR [rcx+104], r9
  76630. mov QWORD PTR [rcx+112], r10
  76631. mov QWORD PTR [rcx+120], r11
  76632. sbb r12, 0
  76633. sub r11, QWORD PTR [r8+120]
  76634. sbb r11, r11
  76635. not r11
  76636. or r12, r11
  76637. mov r10, QWORD PTR [r8]
  76638. mov r11, QWORD PTR [r8+8]
  76639. mov rax, QWORD PTR [rcx]
  76640. mov r9, QWORD PTR [rcx+8]
  76641. pext r10, r10, r12
  76642. pext r11, r11, r12
  76643. sub rax, r10
  76644. sbb r9, r11
  76645. mov QWORD PTR [rcx], rax
  76646. mov QWORD PTR [rcx+8], r9
  76647. mov r10, QWORD PTR [r8+16]
  76648. mov r11, QWORD PTR [r8+24]
  76649. mov rax, QWORD PTR [rcx+16]
  76650. mov r9, QWORD PTR [rcx+24]
  76651. pext r10, r10, r12
  76652. pext r11, r11, r12
  76653. sbb rax, r10
  76654. sbb r9, r11
  76655. mov QWORD PTR [rcx+16], rax
  76656. mov QWORD PTR [rcx+24], r9
  76657. mov r10, QWORD PTR [r8+32]
  76658. mov r11, QWORD PTR [r8+40]
  76659. mov rax, QWORD PTR [rcx+32]
  76660. mov r9, QWORD PTR [rcx+40]
  76661. pext r10, r10, r12
  76662. pext r11, r11, r12
  76663. sbb rax, r10
  76664. sbb r9, r11
  76665. mov QWORD PTR [rcx+32], rax
  76666. mov QWORD PTR [rcx+40], r9
  76667. mov r10, QWORD PTR [r8+48]
  76668. mov r11, QWORD PTR [r8+56]
  76669. mov rax, QWORD PTR [rcx+48]
  76670. mov r9, QWORD PTR [rcx+56]
  76671. pext r10, r10, r12
  76672. pext r11, r11, r12
  76673. sbb rax, r10
  76674. sbb r9, r11
  76675. mov QWORD PTR [rcx+48], rax
  76676. mov QWORD PTR [rcx+56], r9
  76677. mov r10, QWORD PTR [r8+64]
  76678. mov r11, QWORD PTR [r8+72]
  76679. mov rax, QWORD PTR [rcx+64]
  76680. mov r9, QWORD PTR [rcx+72]
  76681. pext r10, r10, r12
  76682. pext r11, r11, r12
  76683. sbb rax, r10
  76684. sbb r9, r11
  76685. mov QWORD PTR [rcx+64], rax
  76686. mov QWORD PTR [rcx+72], r9
  76687. mov r10, QWORD PTR [r8+80]
  76688. mov r11, QWORD PTR [r8+88]
  76689. mov rax, QWORD PTR [rcx+80]
  76690. mov r9, QWORD PTR [rcx+88]
  76691. pext r10, r10, r12
  76692. pext r11, r11, r12
  76693. sbb rax, r10
  76694. sbb r9, r11
  76695. mov QWORD PTR [rcx+80], rax
  76696. mov QWORD PTR [rcx+88], r9
  76697. mov r10, QWORD PTR [r8+96]
  76698. mov r11, QWORD PTR [r8+104]
  76699. mov rax, QWORD PTR [rcx+96]
  76700. mov r9, QWORD PTR [rcx+104]
  76701. pext r10, r10, r12
  76702. pext r11, r11, r12
  76703. sbb rax, r10
  76704. sbb r9, r11
  76705. mov QWORD PTR [rcx+96], rax
  76706. mov QWORD PTR [rcx+104], r9
  76707. mov r10, QWORD PTR [r8+112]
  76708. mov r11, QWORD PTR [r8+120]
  76709. mov rax, QWORD PTR [rcx+112]
  76710. mov r9, QWORD PTR [rcx+120]
  76711. pext r10, r10, r12
  76712. pext r11, r11, r12
  76713. sbb rax, r10
  76714. sbb r9, r11
  76715. mov QWORD PTR [rcx+112], rax
  76716. mov QWORD PTR [rcx+120], r9
  76717. mov rax, QWORD PTR [rcx]
  76718. mov r9, QWORD PTR [rcx+8]
  76719. mov r10, QWORD PTR [rcx+16]
  76720. mov r11, QWORD PTR [rcx+24]
  76721. add rax, QWORD PTR [rdx]
  76722. mov r12, 0
  76723. adc r9, QWORD PTR [rdx+8]
  76724. adc r10, QWORD PTR [rdx+16]
  76725. adc r11, QWORD PTR [rdx+24]
  76726. mov QWORD PTR [rcx], rax
  76727. mov QWORD PTR [rcx+8], r9
  76728. mov QWORD PTR [rcx+16], r10
  76729. mov QWORD PTR [rcx+24], r11
  76730. mov rax, QWORD PTR [rcx+32]
  76731. mov r9, QWORD PTR [rcx+40]
  76732. mov r10, QWORD PTR [rcx+48]
  76733. mov r11, QWORD PTR [rcx+56]
  76734. adc rax, QWORD PTR [rdx+32]
  76735. adc r9, QWORD PTR [rdx+40]
  76736. adc r10, QWORD PTR [rdx+48]
  76737. adc r11, QWORD PTR [rdx+56]
  76738. mov QWORD PTR [rcx+32], rax
  76739. mov QWORD PTR [rcx+40], r9
  76740. mov QWORD PTR [rcx+48], r10
  76741. mov QWORD PTR [rcx+56], r11
  76742. mov rax, QWORD PTR [rcx+64]
  76743. mov r9, QWORD PTR [rcx+72]
  76744. mov r10, QWORD PTR [rcx+80]
  76745. mov r11, QWORD PTR [rcx+88]
  76746. adc rax, QWORD PTR [rdx+64]
  76747. adc r9, QWORD PTR [rdx+72]
  76748. adc r10, QWORD PTR [rdx+80]
  76749. adc r11, QWORD PTR [rdx+88]
  76750. mov QWORD PTR [rcx+64], rax
  76751. mov QWORD PTR [rcx+72], r9
  76752. mov QWORD PTR [rcx+80], r10
  76753. mov QWORD PTR [rcx+88], r11
  76754. mov rax, QWORD PTR [rcx+96]
  76755. mov r9, QWORD PTR [rcx+104]
  76756. mov r10, QWORD PTR [rcx+112]
  76757. mov r11, QWORD PTR [rcx+120]
  76758. adc rax, QWORD PTR [rdx+96]
  76759. adc r9, QWORD PTR [rdx+104]
  76760. adc r10, QWORD PTR [rdx+112]
  76761. adc r11, QWORD PTR [rdx+120]
  76762. mov QWORD PTR [rcx+96], rax
  76763. mov QWORD PTR [rcx+104], r9
  76764. mov QWORD PTR [rcx+112], r10
  76765. mov QWORD PTR [rcx+120], r11
  76766. sbb r12, 0
  76767. sub r11, QWORD PTR [r8+120]
  76768. sbb r11, r11
  76769. not r11
  76770. or r12, r11
  76771. mov r10, QWORD PTR [r8]
  76772. mov r11, QWORD PTR [r8+8]
  76773. mov rax, QWORD PTR [rcx]
  76774. mov r9, QWORD PTR [rcx+8]
  76775. pext r10, r10, r12
  76776. pext r11, r11, r12
  76777. sub rax, r10
  76778. sbb r9, r11
  76779. mov QWORD PTR [rcx], rax
  76780. mov QWORD PTR [rcx+8], r9
  76781. mov r10, QWORD PTR [r8+16]
  76782. mov r11, QWORD PTR [r8+24]
  76783. mov rax, QWORD PTR [rcx+16]
  76784. mov r9, QWORD PTR [rcx+24]
  76785. pext r10, r10, r12
  76786. pext r11, r11, r12
  76787. sbb rax, r10
  76788. sbb r9, r11
  76789. mov QWORD PTR [rcx+16], rax
  76790. mov QWORD PTR [rcx+24], r9
  76791. mov r10, QWORD PTR [r8+32]
  76792. mov r11, QWORD PTR [r8+40]
  76793. mov rax, QWORD PTR [rcx+32]
  76794. mov r9, QWORD PTR [rcx+40]
  76795. pext r10, r10, r12
  76796. pext r11, r11, r12
  76797. sbb rax, r10
  76798. sbb r9, r11
  76799. mov QWORD PTR [rcx+32], rax
  76800. mov QWORD PTR [rcx+40], r9
  76801. mov r10, QWORD PTR [r8+48]
  76802. mov r11, QWORD PTR [r8+56]
  76803. mov rax, QWORD PTR [rcx+48]
  76804. mov r9, QWORD PTR [rcx+56]
  76805. pext r10, r10, r12
  76806. pext r11, r11, r12
  76807. sbb rax, r10
  76808. sbb r9, r11
  76809. mov QWORD PTR [rcx+48], rax
  76810. mov QWORD PTR [rcx+56], r9
  76811. mov r10, QWORD PTR [r8+64]
  76812. mov r11, QWORD PTR [r8+72]
  76813. mov rax, QWORD PTR [rcx+64]
  76814. mov r9, QWORD PTR [rcx+72]
  76815. pext r10, r10, r12
  76816. pext r11, r11, r12
  76817. sbb rax, r10
  76818. sbb r9, r11
  76819. mov QWORD PTR [rcx+64], rax
  76820. mov QWORD PTR [rcx+72], r9
  76821. mov r10, QWORD PTR [r8+80]
  76822. mov r11, QWORD PTR [r8+88]
  76823. mov rax, QWORD PTR [rcx+80]
  76824. mov r9, QWORD PTR [rcx+88]
  76825. pext r10, r10, r12
  76826. pext r11, r11, r12
  76827. sbb rax, r10
  76828. sbb r9, r11
  76829. mov QWORD PTR [rcx+80], rax
  76830. mov QWORD PTR [rcx+88], r9
  76831. mov r10, QWORD PTR [r8+96]
  76832. mov r11, QWORD PTR [r8+104]
  76833. mov rax, QWORD PTR [rcx+96]
  76834. mov r9, QWORD PTR [rcx+104]
  76835. pext r10, r10, r12
  76836. pext r11, r11, r12
  76837. sbb rax, r10
  76838. sbb r9, r11
  76839. mov QWORD PTR [rcx+96], rax
  76840. mov QWORD PTR [rcx+104], r9
  76841. mov r10, QWORD PTR [r8+112]
  76842. mov r11, QWORD PTR [r8+120]
  76843. mov rax, QWORD PTR [rcx+112]
  76844. mov r9, QWORD PTR [rcx+120]
  76845. pext r10, r10, r12
  76846. pext r11, r11, r12
  76847. sbb rax, r10
  76848. sbb r9, r11
  76849. mov QWORD PTR [rcx+112], rax
  76850. mov QWORD PTR [rcx+120], r9
  76851. pop r12
  76852. ret
  76853. sp_1024_mont_tpl_avx2_16 ENDP
  76854. _text ENDS
  76855. ENDIF
  76856. IFDEF HAVE_INTEL_AVX2
  76857. ; /* Subtract two Montgomery form numbers (r = a - b % m).
  76858. ; *
  76859. ; * r Result of addition.
  76860. ; * a First number to add in Montgomery form.
  76861. ; * b Second number to add in Montgomery form.
  76862. ; * m Modulus (prime).
  76863. ; */
  76864. _text SEGMENT READONLY PARA
  76865. sp_1024_mont_sub_avx2_16 PROC
  76866. push r12
  76867. push r13
  76868. mov rax, QWORD PTR [rdx]
  76869. mov r10, QWORD PTR [rdx+8]
  76870. mov r11, QWORD PTR [rdx+16]
  76871. mov r12, QWORD PTR [rdx+24]
  76872. sub rax, QWORD PTR [r8]
  76873. mov r13, 0
  76874. sbb r10, QWORD PTR [r8+8]
  76875. sbb r11, QWORD PTR [r8+16]
  76876. sbb r12, QWORD PTR [r8+24]
  76877. mov QWORD PTR [rcx], rax
  76878. mov QWORD PTR [rcx+8], r10
  76879. mov QWORD PTR [rcx+16], r11
  76880. mov QWORD PTR [rcx+24], r12
  76881. mov rax, QWORD PTR [rdx+32]
  76882. mov r10, QWORD PTR [rdx+40]
  76883. mov r11, QWORD PTR [rdx+48]
  76884. mov r12, QWORD PTR [rdx+56]
  76885. sbb rax, QWORD PTR [r8+32]
  76886. sbb r10, QWORD PTR [r8+40]
  76887. sbb r11, QWORD PTR [r8+48]
  76888. sbb r12, QWORD PTR [r8+56]
  76889. mov QWORD PTR [rcx+32], rax
  76890. mov QWORD PTR [rcx+40], r10
  76891. mov QWORD PTR [rcx+48], r11
  76892. mov QWORD PTR [rcx+56], r12
  76893. mov rax, QWORD PTR [rdx+64]
  76894. mov r10, QWORD PTR [rdx+72]
  76895. mov r11, QWORD PTR [rdx+80]
  76896. mov r12, QWORD PTR [rdx+88]
  76897. sbb rax, QWORD PTR [r8+64]
  76898. sbb r10, QWORD PTR [r8+72]
  76899. sbb r11, QWORD PTR [r8+80]
  76900. sbb r12, QWORD PTR [r8+88]
  76901. mov QWORD PTR [rcx+64], rax
  76902. mov QWORD PTR [rcx+72], r10
  76903. mov QWORD PTR [rcx+80], r11
  76904. mov QWORD PTR [rcx+88], r12
  76905. mov rax, QWORD PTR [rdx+96]
  76906. mov r10, QWORD PTR [rdx+104]
  76907. mov r11, QWORD PTR [rdx+112]
  76908. mov r12, QWORD PTR [rdx+120]
  76909. sbb rax, QWORD PTR [r8+96]
  76910. sbb r10, QWORD PTR [r8+104]
  76911. sbb r11, QWORD PTR [r8+112]
  76912. sbb r12, QWORD PTR [r8+120]
  76913. mov QWORD PTR [rcx+96], rax
  76914. mov QWORD PTR [rcx+104], r10
  76915. mov QWORD PTR [rcx+112], r11
  76916. mov QWORD PTR [rcx+120], r12
  76917. sbb r13, 0
  76918. mov r11, QWORD PTR [r9]
  76919. mov r12, QWORD PTR [r9+8]
  76920. mov rax, QWORD PTR [rcx]
  76921. mov r10, QWORD PTR [rcx+8]
  76922. pext r11, r11, r13
  76923. pext r12, r12, r13
  76924. add rax, r11
  76925. adc r10, r12
  76926. mov QWORD PTR [rcx], rax
  76927. mov QWORD PTR [rcx+8], r10
  76928. mov r11, QWORD PTR [r9+16]
  76929. mov r12, QWORD PTR [r9+24]
  76930. mov rax, QWORD PTR [rcx+16]
  76931. mov r10, QWORD PTR [rcx+24]
  76932. pext r11, r11, r13
  76933. pext r12, r12, r13
  76934. adc rax, r11
  76935. adc r10, r12
  76936. mov QWORD PTR [rcx+16], rax
  76937. mov QWORD PTR [rcx+24], r10
  76938. mov r11, QWORD PTR [r9+32]
  76939. mov r12, QWORD PTR [r9+40]
  76940. mov rax, QWORD PTR [rcx+32]
  76941. mov r10, QWORD PTR [rcx+40]
  76942. pext r11, r11, r13
  76943. pext r12, r12, r13
  76944. adc rax, r11
  76945. adc r10, r12
  76946. mov QWORD PTR [rcx+32], rax
  76947. mov QWORD PTR [rcx+40], r10
  76948. mov r11, QWORD PTR [r9+48]
  76949. mov r12, QWORD PTR [r9+56]
  76950. mov rax, QWORD PTR [rcx+48]
  76951. mov r10, QWORD PTR [rcx+56]
  76952. pext r11, r11, r13
  76953. pext r12, r12, r13
  76954. adc rax, r11
  76955. adc r10, r12
  76956. mov QWORD PTR [rcx+48], rax
  76957. mov QWORD PTR [rcx+56], r10
  76958. mov r11, QWORD PTR [r9+64]
  76959. mov r12, QWORD PTR [r9+72]
  76960. mov rax, QWORD PTR [rcx+64]
  76961. mov r10, QWORD PTR [rcx+72]
  76962. pext r11, r11, r13
  76963. pext r12, r12, r13
  76964. adc rax, r11
  76965. adc r10, r12
  76966. mov QWORD PTR [rcx+64], rax
  76967. mov QWORD PTR [rcx+72], r10
  76968. mov r11, QWORD PTR [r9+80]
  76969. mov r12, QWORD PTR [r9+88]
  76970. mov rax, QWORD PTR [rcx+80]
  76971. mov r10, QWORD PTR [rcx+88]
  76972. pext r11, r11, r13
  76973. pext r12, r12, r13
  76974. adc rax, r11
  76975. adc r10, r12
  76976. mov QWORD PTR [rcx+80], rax
  76977. mov QWORD PTR [rcx+88], r10
  76978. mov r11, QWORD PTR [r9+96]
  76979. mov r12, QWORD PTR [r9+104]
  76980. mov rax, QWORD PTR [rcx+96]
  76981. mov r10, QWORD PTR [rcx+104]
  76982. pext r11, r11, r13
  76983. pext r12, r12, r13
  76984. adc rax, r11
  76985. adc r10, r12
  76986. mov QWORD PTR [rcx+96], rax
  76987. mov QWORD PTR [rcx+104], r10
  76988. mov r11, QWORD PTR [r9+112]
  76989. mov r12, QWORD PTR [r9+120]
  76990. mov rax, QWORD PTR [rcx+112]
  76991. mov r10, QWORD PTR [rcx+120]
  76992. pext r11, r11, r13
  76993. pext r12, r12, r13
  76994. adc rax, r11
  76995. adc r10, r12
  76996. mov QWORD PTR [rcx+112], rax
  76997. mov QWORD PTR [rcx+120], r10
  76998. pop r13
  76999. pop r12
  77000. ret
  77001. sp_1024_mont_sub_avx2_16 ENDP
  77002. _text ENDS
  77003. ENDIF
  77004. IFDEF HAVE_INTEL_AVX2
  77005. ; /* Divide the number by 2 mod the modulus (prime). (r = a / 2 % m)
  77006. ; *
  77007. ; * r Result of division by 2.
  77008. ; * a Number to divide.
  77009. ; * m Modulus (prime).
  77010. ; */
  77011. _text SEGMENT READONLY PARA
  77012. sp_1024_div2_avx2_16 PROC
  77013. push r12
  77014. push r13
  77015. mov r13, QWORD PTR [rdx]
  77016. xor r12, r12
  77017. mov r10, r13
  77018. and r13, 1
  77019. neg r13
  77020. mov rax, QWORD PTR [r8]
  77021. mov r9, QWORD PTR [r8+8]
  77022. mov r10, QWORD PTR [rdx]
  77023. mov r11, QWORD PTR [rdx+8]
  77024. pext rax, rax, r13
  77025. pext r9, r9, r13
  77026. add r10, rax
  77027. adc r11, r9
  77028. mov QWORD PTR [rcx], r10
  77029. mov QWORD PTR [rcx+8], r11
  77030. mov rax, QWORD PTR [r8+16]
  77031. mov r9, QWORD PTR [r8+24]
  77032. mov r10, QWORD PTR [rdx+16]
  77033. mov r11, QWORD PTR [rdx+24]
  77034. pext rax, rax, r13
  77035. pext r9, r9, r13
  77036. adc r10, rax
  77037. adc r11, r9
  77038. mov QWORD PTR [rcx+16], r10
  77039. mov QWORD PTR [rcx+24], r11
  77040. mov rax, QWORD PTR [r8+32]
  77041. mov r9, QWORD PTR [r8+40]
  77042. mov r10, QWORD PTR [rdx+32]
  77043. mov r11, QWORD PTR [rdx+40]
  77044. pext rax, rax, r13
  77045. pext r9, r9, r13
  77046. adc r10, rax
  77047. adc r11, r9
  77048. mov QWORD PTR [rcx+32], r10
  77049. mov QWORD PTR [rcx+40], r11
  77050. mov rax, QWORD PTR [r8+48]
  77051. mov r9, QWORD PTR [r8+56]
  77052. mov r10, QWORD PTR [rdx+48]
  77053. mov r11, QWORD PTR [rdx+56]
  77054. pext rax, rax, r13
  77055. pext r9, r9, r13
  77056. adc r10, rax
  77057. adc r11, r9
  77058. mov QWORD PTR [rcx+48], r10
  77059. mov QWORD PTR [rcx+56], r11
  77060. mov rax, QWORD PTR [r8+64]
  77061. mov r9, QWORD PTR [r8+72]
  77062. mov r10, QWORD PTR [rdx+64]
  77063. mov r11, QWORD PTR [rdx+72]
  77064. pext rax, rax, r13
  77065. pext r9, r9, r13
  77066. adc r10, rax
  77067. adc r11, r9
  77068. mov QWORD PTR [rcx+64], r10
  77069. mov QWORD PTR [rcx+72], r11
  77070. mov rax, QWORD PTR [r8+80]
  77071. mov r9, QWORD PTR [r8+88]
  77072. mov r10, QWORD PTR [rdx+80]
  77073. mov r11, QWORD PTR [rdx+88]
  77074. pext rax, rax, r13
  77075. pext r9, r9, r13
  77076. adc r10, rax
  77077. adc r11, r9
  77078. mov QWORD PTR [rcx+80], r10
  77079. mov QWORD PTR [rcx+88], r11
  77080. mov rax, QWORD PTR [r8+96]
  77081. mov r9, QWORD PTR [r8+104]
  77082. mov r10, QWORD PTR [rdx+96]
  77083. mov r11, QWORD PTR [rdx+104]
  77084. pext rax, rax, r13
  77085. pext r9, r9, r13
  77086. adc r10, rax
  77087. adc r11, r9
  77088. mov QWORD PTR [rcx+96], r10
  77089. mov QWORD PTR [rcx+104], r11
  77090. mov rax, QWORD PTR [r8+112]
  77091. mov r9, QWORD PTR [r8+120]
  77092. mov r10, QWORD PTR [rdx+112]
  77093. mov r11, QWORD PTR [rdx+120]
  77094. pext rax, rax, r13
  77095. pext r9, r9, r13
  77096. adc r10, rax
  77097. adc r11, r9
  77098. mov QWORD PTR [rcx+112], r10
  77099. mov QWORD PTR [rcx+120], r11
  77100. adc r12, 0
  77101. mov r10, QWORD PTR [rcx]
  77102. mov r11, QWORD PTR [rcx+8]
  77103. shrd r10, r11, 1
  77104. mov QWORD PTR [rcx], r10
  77105. mov r10, QWORD PTR [rcx+16]
  77106. shrd r11, r10, 1
  77107. mov QWORD PTR [rcx+8], r11
  77108. mov r11, QWORD PTR [rcx+24]
  77109. shrd r10, r11, 1
  77110. mov QWORD PTR [rcx+16], r10
  77111. mov r10, QWORD PTR [rcx+32]
  77112. shrd r11, r10, 1
  77113. mov QWORD PTR [rcx+24], r11
  77114. mov r11, QWORD PTR [rcx+40]
  77115. shrd r10, r11, 1
  77116. mov QWORD PTR [rcx+32], r10
  77117. mov r10, QWORD PTR [rcx+48]
  77118. shrd r11, r10, 1
  77119. mov QWORD PTR [rcx+40], r11
  77120. mov r11, QWORD PTR [rcx+56]
  77121. shrd r10, r11, 1
  77122. mov QWORD PTR [rcx+48], r10
  77123. mov r10, QWORD PTR [rcx+64]
  77124. shrd r11, r10, 1
  77125. mov QWORD PTR [rcx+56], r11
  77126. mov r11, QWORD PTR [rcx+72]
  77127. shrd r10, r11, 1
  77128. mov QWORD PTR [rcx+64], r10
  77129. mov r10, QWORD PTR [rcx+80]
  77130. shrd r11, r10, 1
  77131. mov QWORD PTR [rcx+72], r11
  77132. mov r11, QWORD PTR [rcx+88]
  77133. shrd r10, r11, 1
  77134. mov QWORD PTR [rcx+80], r10
  77135. mov r10, QWORD PTR [rcx+96]
  77136. shrd r11, r10, 1
  77137. mov QWORD PTR [rcx+88], r11
  77138. mov r11, QWORD PTR [rcx+104]
  77139. shrd r10, r11, 1
  77140. mov QWORD PTR [rcx+96], r10
  77141. mov r10, QWORD PTR [rcx+112]
  77142. shrd r11, r10, 1
  77143. mov QWORD PTR [rcx+104], r11
  77144. mov r11, QWORD PTR [rcx+120]
  77145. shrd r10, r11, 1
  77146. mov QWORD PTR [rcx+112], r10
  77147. shrd r11, r12, 1
  77148. mov QWORD PTR [rcx+120], r11
  77149. pop r13
  77150. pop r12
  77151. ret
  77152. sp_1024_div2_avx2_16 ENDP
  77153. _text ENDS
  77154. ENDIF
  77155. ; /* Read big endian unsigned byte array into r.
  77156. ; * Uses the bswap instruction.
  77157. ; *
  77158. ; * r A single precision integer.
  77159. ; * size Maximum number of bytes to convert
  77160. ; * a Byte array.
  77161. ; * n Number of bytes in array to read.
  77162. ; */
  77163. _text SEGMENT READONLY PARA
  77164. sp_1024_from_bin_bswap PROC
  77165. push r12
  77166. push r13
  77167. mov r11, r8
  77168. mov r12, rcx
  77169. add r11, r9
  77170. add r12, 128
  77171. xor r13, r13
  77172. jmp L_1024_from_bin_bswap_64_end
  77173. L_1024_from_bin_bswap_64_start:
  77174. sub r11, 64
  77175. mov rax, QWORD PTR [r11+56]
  77176. mov r10, QWORD PTR [r11+48]
  77177. bswap rax
  77178. bswap r10
  77179. mov QWORD PTR [rcx], rax
  77180. mov QWORD PTR [rcx+8], r10
  77181. mov rax, QWORD PTR [r11+40]
  77182. mov r10, QWORD PTR [r11+32]
  77183. bswap rax
  77184. bswap r10
  77185. mov QWORD PTR [rcx+16], rax
  77186. mov QWORD PTR [rcx+24], r10
  77187. mov rax, QWORD PTR [r11+24]
  77188. mov r10, QWORD PTR [r11+16]
  77189. bswap rax
  77190. bswap r10
  77191. mov QWORD PTR [rcx+32], rax
  77192. mov QWORD PTR [rcx+40], r10
  77193. mov rax, QWORD PTR [r11+8]
  77194. mov r10, QWORD PTR [r11]
  77195. bswap rax
  77196. bswap r10
  77197. mov QWORD PTR [rcx+48], rax
  77198. mov QWORD PTR [rcx+56], r10
  77199. add rcx, 64
  77200. sub r9, 64
  77201. L_1024_from_bin_bswap_64_end:
  77202. cmp r9, 63
  77203. jg L_1024_from_bin_bswap_64_start
  77204. jmp L_1024_from_bin_bswap_8_end
  77205. L_1024_from_bin_bswap_8_start:
  77206. sub r11, 8
  77207. mov rax, QWORD PTR [r11]
  77208. bswap rax
  77209. mov QWORD PTR [rcx], rax
  77210. add rcx, 8
  77211. sub r9, 8
  77212. L_1024_from_bin_bswap_8_end:
  77213. cmp r9, 7
  77214. jg L_1024_from_bin_bswap_8_start
  77215. cmp r9, r13
  77216. je L_1024_from_bin_bswap_hi_end
  77217. mov r10, r13
  77218. mov rax, r13
  77219. L_1024_from_bin_bswap_hi_start:
  77220. mov al, BYTE PTR [r8]
  77221. shl r10, 8
  77222. inc r8
  77223. add r10, rax
  77224. dec r9
  77225. jg L_1024_from_bin_bswap_hi_start
  77226. mov QWORD PTR [rcx], r10
  77227. add rcx, 8
  77228. L_1024_from_bin_bswap_hi_end:
  77229. cmp rcx, r12
  77230. jge L_1024_from_bin_bswap_zero_end
  77231. L_1024_from_bin_bswap_zero_start:
  77232. mov QWORD PTR [rcx], r13
  77233. add rcx, 8
  77234. cmp rcx, r12
  77235. jl L_1024_from_bin_bswap_zero_start
  77236. L_1024_from_bin_bswap_zero_end:
  77237. pop r13
  77238. pop r12
  77239. ret
  77240. sp_1024_from_bin_bswap ENDP
  77241. _text ENDS
  77242. IFNDEF NO_MOVBE_SUPPORT
  77243. ; /* Read big endian unsigned byte array into r.
  77244. ; * Uses the movbe instruction which is an optional instruction.
  77245. ; *
  77246. ; * r A single precision integer.
  77247. ; * size Maximum number of bytes to convert
  77248. ; * a Byte array.
  77249. ; * n Number of bytes in array to read.
  77250. ; */
  77251. _text SEGMENT READONLY PARA
  77252. sp_1024_from_bin_movbe PROC
  77253. push r12
  77254. mov r11, r8
  77255. mov r12, rcx
  77256. add r11, r9
  77257. add r12, 128
  77258. jmp L_1024_from_bin_movbe_64_end
  77259. L_1024_from_bin_movbe_64_start:
  77260. sub r11, 64
  77261. movbe rax, QWORD PTR [r11+56]
  77262. movbe r10, QWORD PTR [r11+48]
  77263. mov QWORD PTR [rcx], rax
  77264. mov QWORD PTR [rcx+8], r10
  77265. movbe rax, QWORD PTR [r11+40]
  77266. movbe r10, QWORD PTR [r11+32]
  77267. mov QWORD PTR [rcx+16], rax
  77268. mov QWORD PTR [rcx+24], r10
  77269. movbe rax, QWORD PTR [r11+24]
  77270. movbe r10, QWORD PTR [r11+16]
  77271. mov QWORD PTR [rcx+32], rax
  77272. mov QWORD PTR [rcx+40], r10
  77273. movbe rax, QWORD PTR [r11+8]
  77274. movbe r10, QWORD PTR [r11]
  77275. mov QWORD PTR [rcx+48], rax
  77276. mov QWORD PTR [rcx+56], r10
  77277. add rcx, 64
  77278. sub r9, 64
  77279. L_1024_from_bin_movbe_64_end:
  77280. cmp r9, 63
  77281. jg L_1024_from_bin_movbe_64_start
  77282. jmp L_1024_from_bin_movbe_8_end
  77283. L_1024_from_bin_movbe_8_start:
  77284. sub r11, 8
  77285. movbe rax, QWORD PTR [r11]
  77286. mov QWORD PTR [rcx], rax
  77287. add rcx, 8
  77288. sub r9, 8
  77289. L_1024_from_bin_movbe_8_end:
  77290. cmp r9, 7
  77291. jg L_1024_from_bin_movbe_8_start
  77292. cmp r9, 0
  77293. je L_1024_from_bin_movbe_hi_end
  77294. mov r10, 0
  77295. mov rax, 0
  77296. L_1024_from_bin_movbe_hi_start:
  77297. mov al, BYTE PTR [r8]
  77298. shl r10, 8
  77299. inc r8
  77300. add r10, rax
  77301. dec r9
  77302. jg L_1024_from_bin_movbe_hi_start
  77303. mov QWORD PTR [rcx], r10
  77304. add rcx, 8
  77305. L_1024_from_bin_movbe_hi_end:
  77306. cmp rcx, r12
  77307. jge L_1024_from_bin_movbe_zero_end
  77308. L_1024_from_bin_movbe_zero_start:
  77309. mov QWORD PTR [rcx], 0
  77310. add rcx, 8
  77311. cmp rcx, r12
  77312. jl L_1024_from_bin_movbe_zero_start
  77313. L_1024_from_bin_movbe_zero_end:
  77314. pop r12
  77315. ret
  77316. sp_1024_from_bin_movbe ENDP
  77317. _text ENDS
  77318. ENDIF
  77319. ENDIF
  77320. END