/* sha256_asm * * Copyright (C) 2006-2022 wolfSSL Inc. * * This file is part of wolfSSL. * * wolfSSL is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * wolfSSL is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA */ #ifndef HAVE_INTEL_AVX1 #define HAVE_INTEL_AVX1 #endif /* HAVE_INTEL_AVX1 */ #ifndef NO_AVX2_SUPPORT #define HAVE_INTEL_AVX2 #endif /* NO_AVX2_SUPPORT */ #ifdef HAVE_INTEL_AVX1 #ifndef __APPLE__ .data #else .section __DATA,__data #endif /* __APPLE__ */ L_avx1_sha256_k: .long 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5 .long 0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5 .long 0xd807aa98,0x12835b01,0x243185be,0x550c7dc3 .long 0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174 .long 0xe49b69c1,0xefbe4786,0xfc19dc6,0x240ca1cc .long 0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da .long 0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7 .long 0xc6e00bf3,0xd5a79147,0x6ca6351,0x14292967 .long 0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13 .long 0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85 .long 0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3 .long 0xd192e819,0xd6990624,0xf40e3585,0x106aa070 .long 0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5 .long 0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3 .long 0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208 .long 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2 #ifndef __APPLE__ .data #else .section __DATA,__data #endif /* __APPLE__ */ #ifndef __APPLE__ .align 16 #else .p2align 4 #endif /* __APPLE__ */ L_avx1_sha256_shuf_00BA: .quad 0xb0a090803020100, 0xffffffffffffffff #ifndef __APPLE__ .data #else .section __DATA,__data #endif /* __APPLE__ */ #ifndef __APPLE__ .align 16 #else .p2align 4 #endif /* __APPLE__ */ L_avx1_sha256_shuf_DC00: .quad 0xffffffffffffffff, 0xb0a090803020100 #ifndef __APPLE__ .data #else .section __DATA,__data #endif /* __APPLE__ */ #ifndef __APPLE__ .align 16 #else .p2align 4 #endif /* __APPLE__ */ L_avx1_sha256_flip_mask: .quad 0x405060700010203, 0xc0d0e0f08090a0b #ifndef __APPLE__ .text .globl Transform_Sha256_AVX1 .type Transform_Sha256_AVX1,@function .align 16 Transform_Sha256_AVX1: #else .section __TEXT,__text .globl _Transform_Sha256_AVX1 .p2align 4 _Transform_Sha256_AVX1: #endif /* __APPLE__ */ pushq %rbx pushq %r12 pushq %r13 pushq %r14 pushq %r15 subq $0x40, %rsp leaq 32(%rdi), %rax vmovdqa L_avx1_sha256_flip_mask(%rip), %xmm13 vmovdqa L_avx1_sha256_shuf_00BA(%rip), %xmm11 vmovdqa L_avx1_sha256_shuf_DC00(%rip), %xmm12 movl (%rdi), %r8d movl 4(%rdi), %r9d movl 8(%rdi), %r10d movl 12(%rdi), %r11d movl 16(%rdi), %r12d movl 20(%rdi), %r13d movl 24(%rdi), %r14d movl 28(%rdi), %r15d # X0, X1, X2, X3 = W[0..15] vmovdqu (%rax), %xmm0 vmovdqu 16(%rax), %xmm1 vpshufb %xmm13, %xmm0, %xmm0 vpshufb %xmm13, %xmm1, %xmm1 vmovdqu 32(%rax), %xmm2 vmovdqu 48(%rax), %xmm3 vpshufb %xmm13, %xmm2, %xmm2 vpshufb %xmm13, %xmm3, %xmm3 movl %r9d, %ebx movl %r12d, %edx xorl %r10d, %ebx # set_w_k_xfer_4: 0 vpaddd 0+L_avx1_sha256_k(%rip), %xmm0, %xmm4 vpaddd 16+L_avx1_sha256_k(%rip), %xmm1, %xmm5 vmovdqu %xmm4, (%rsp) vmovdqu %xmm5, 16(%rsp) vpaddd 32+L_avx1_sha256_k(%rip), %xmm2, %xmm6 vpaddd 48+L_avx1_sha256_k(%rip), %xmm3, %xmm7 vmovdqu %xmm6, 32(%rsp) vmovdqu %xmm7, 48(%rsp) # msg_sched: 0-3 # rnd_0: 0 - 0 rorl $14, %edx vpalignr $4, %xmm0, %xmm1, %xmm5 vpalignr $4, %xmm2, %xmm3, %xmm4 # rnd_0: 1 - 2 movl %r9d, %eax movl %r13d, %ecx addl (%rsp), %r15d xorl %r14d, %ecx xorl %r12d, %edx andl %r12d, %ecx vpsrld $7, %xmm5, %xmm6 vpslld $25, %xmm5, %xmm7 # rnd_0: 3 - 4 rorl $5, %edx xorl %r14d, %ecx xorl %r12d, %edx addl %ecx, %r15d rorl $6, %edx xorl %r8d, %eax addl %edx, %r15d movl %r8d, %ecx vpsrld $18, %xmm5, %xmm8 vpslld $14, %xmm5, %xmm9 # rnd_0: 5 - 6 andl %eax, %ebx rorl $9, %ecx xorl %r8d, %ecx xorl %r9d, %ebx rorl $11, %ecx addl %r15d, %r11d xorl %r8d, %ecx addl %ebx, %r15d vpor %xmm6, %xmm7, %xmm6 vpor %xmm8, %xmm9, %xmm8 # rnd_0: 7 - 7 rorl $2, %ecx movl %r11d, %edx addl %ecx, %r15d # rnd_1: 0 - 1 rorl $14, %edx movl %r8d, %ebx movl %r12d, %ecx addl 4(%rsp), %r14d xorl %r13d, %ecx vpsrld $3, %xmm5, %xmm9 vpxor %xmm6, %xmm8, %xmm6 # rnd_1: 2 - 3 xorl %r11d, %edx andl %r11d, %ecx rorl $5, %edx xorl %r13d, %ecx xorl %r11d, %edx addl %ecx, %r14d vpxor %xmm6, %xmm9, %xmm5 vpshufd $0xfa, %xmm3, %xmm6 # rnd_1: 4 - 5 rorl $6, %edx xorl %r15d, %ebx addl %edx, %r14d movl %r15d, %ecx andl %ebx, %eax rorl $9, %ecx xorl %r15d, %ecx xorl %r8d, %eax vpsrld $10, %xmm6, %xmm8 vpsrlq $19, %xmm6, %xmm7 # rnd_1: 6 - 7 rorl $11, %ecx addl %r14d, %r10d xorl %r15d, %ecx addl %eax, %r14d rorl $2, %ecx movl %r10d, %edx addl %ecx, %r14d # rnd_0: 0 - 0 rorl $14, %edx vpsrlq $0x11, %xmm6, %xmm6 vpaddd %xmm0, %xmm4, %xmm4 # rnd_0: 1 - 3 movl %r15d, %eax movl %r11d, %ecx addl 8(%rsp), %r13d xorl %r12d, %ecx xorl %r10d, %edx andl %r10d, %ecx rorl $5, %edx xorl %r12d, %ecx xorl %r10d, %edx addl %ecx, %r13d vpxor %xmm6, %xmm7, %xmm6 vpaddd %xmm5, %xmm4, %xmm4 # rnd_0: 4 - 4 rorl $6, %edx xorl %r14d, %eax addl %edx, %r13d movl %r14d, %ecx vpxor %xmm6, %xmm8, %xmm8 # rnd_0: 5 - 5 andl %eax, %ebx rorl $9, %ecx xorl %r14d, %ecx xorl %r15d, %ebx vpshufb %xmm11, %xmm8, %xmm8 # rnd_0: 6 - 6 rorl $11, %ecx addl %r13d, %r9d xorl %r14d, %ecx addl %ebx, %r13d vpaddd %xmm8, %xmm4, %xmm4 # rnd_0: 7 - 7 rorl $2, %ecx movl %r9d, %edx addl %ecx, %r13d # rnd_1: 0 - 0 rorl $14, %edx vpshufd $0x50, %xmm4, %xmm6 # rnd_1: 1 - 1 movl %r14d, %ebx movl %r10d, %ecx addl 12(%rsp), %r12d xorl %r11d, %ecx vpsrlq $0x11, %xmm6, %xmm8 vpsrlq $19, %xmm6, %xmm7 # rnd_1: 2 - 3 xorl %r9d, %edx andl %r9d, %ecx rorl $5, %edx xorl %r11d, %ecx xorl %r9d, %edx addl %ecx, %r12d vpsrld $10, %xmm6, %xmm9 vpxor %xmm8, %xmm7, %xmm8 # rnd_1: 4 - 5 rorl $6, %edx xorl %r13d, %ebx addl %edx, %r12d movl %r13d, %ecx andl %ebx, %eax rorl $9, %ecx xorl %r13d, %ecx xorl %r14d, %eax vpxor %xmm9, %xmm8, %xmm9 # rnd_1: 6 - 6 rorl $11, %ecx addl %r12d, %r8d xorl %r13d, %ecx addl %eax, %r12d vpshufb %xmm12, %xmm9, %xmm9 # rnd_1: 7 - 7 rorl $2, %ecx movl %r8d, %edx addl %ecx, %r12d vpaddd %xmm4, %xmm9, %xmm0 # msg_sched done: 0-3 # msg_sched: 4-7 # rnd_0: 0 - 0 rorl $14, %edx vpalignr $4, %xmm1, %xmm2, %xmm5 vpalignr $4, %xmm3, %xmm0, %xmm4 # rnd_0: 1 - 2 movl %r13d, %eax movl %r9d, %ecx addl 16(%rsp), %r11d xorl %r10d, %ecx xorl %r8d, %edx andl %r8d, %ecx vpsrld $7, %xmm5, %xmm6 vpslld $25, %xmm5, %xmm7 # rnd_0: 3 - 4 rorl $5, %edx xorl %r10d, %ecx xorl %r8d, %edx addl %ecx, %r11d rorl $6, %edx xorl %r12d, %eax addl %edx, %r11d movl %r12d, %ecx vpsrld $18, %xmm5, %xmm8 vpslld $14, %xmm5, %xmm9 # rnd_0: 5 - 6 andl %eax, %ebx rorl $9, %ecx xorl %r12d, %ecx xorl %r13d, %ebx rorl $11, %ecx addl %r11d, %r15d xorl %r12d, %ecx addl %ebx, %r11d vpor %xmm6, %xmm7, %xmm6 vpor %xmm8, %xmm9, %xmm8 # rnd_0: 7 - 7 rorl $2, %ecx movl %r15d, %edx addl %ecx, %r11d # rnd_1: 0 - 1 rorl $14, %edx movl %r12d, %ebx movl %r8d, %ecx addl 20(%rsp), %r10d xorl %r9d, %ecx vpsrld $3, %xmm5, %xmm9 vpxor %xmm6, %xmm8, %xmm6 # rnd_1: 2 - 3 xorl %r15d, %edx andl %r15d, %ecx rorl $5, %edx xorl %r9d, %ecx xorl %r15d, %edx addl %ecx, %r10d vpxor %xmm6, %xmm9, %xmm5 vpshufd $0xfa, %xmm0, %xmm6 # rnd_1: 4 - 5 rorl $6, %edx xorl %r11d, %ebx addl %edx, %r10d movl %r11d, %ecx andl %ebx, %eax rorl $9, %ecx xorl %r11d, %ecx xorl %r12d, %eax vpsrld $10, %xmm6, %xmm8 vpsrlq $19, %xmm6, %xmm7 # rnd_1: 6 - 7 rorl $11, %ecx addl %r10d, %r14d xorl %r11d, %ecx addl %eax, %r10d rorl $2, %ecx movl %r14d, %edx addl %ecx, %r10d # rnd_0: 0 - 0 rorl $14, %edx vpsrlq $0x11, %xmm6, %xmm6 vpaddd %xmm1, %xmm4, %xmm4 # rnd_0: 1 - 3 movl %r11d, %eax movl %r15d, %ecx addl 24(%rsp), %r9d xorl %r8d, %ecx xorl %r14d, %edx andl %r14d, %ecx rorl $5, %edx xorl %r8d, %ecx xorl %r14d, %edx addl %ecx, %r9d vpxor %xmm6, %xmm7, %xmm6 vpaddd %xmm5, %xmm4, %xmm4 # rnd_0: 4 - 4 rorl $6, %edx xorl %r10d, %eax addl %edx, %r9d movl %r10d, %ecx vpxor %xmm6, %xmm8, %xmm8 # rnd_0: 5 - 5 andl %eax, %ebx rorl $9, %ecx xorl %r10d, %ecx xorl %r11d, %ebx vpshufb %xmm11, %xmm8, %xmm8 # rnd_0: 6 - 6 rorl $11, %ecx addl %r9d, %r13d xorl %r10d, %ecx addl %ebx, %r9d vpaddd %xmm8, %xmm4, %xmm4 # rnd_0: 7 - 7 rorl $2, %ecx movl %r13d, %edx addl %ecx, %r9d # rnd_1: 0 - 0 rorl $14, %edx vpshufd $0x50, %xmm4, %xmm6 # rnd_1: 1 - 1 movl %r10d, %ebx movl %r14d, %ecx addl 28(%rsp), %r8d xorl %r15d, %ecx vpsrlq $0x11, %xmm6, %xmm8 vpsrlq $19, %xmm6, %xmm7 # rnd_1: 2 - 3 xorl %r13d, %edx andl %r13d, %ecx rorl $5, %edx xorl %r15d, %ecx xorl %r13d, %edx addl %ecx, %r8d vpsrld $10, %xmm6, %xmm9 vpxor %xmm8, %xmm7, %xmm8 # rnd_1: 4 - 5 rorl $6, %edx xorl %r9d, %ebx addl %edx, %r8d movl %r9d, %ecx andl %ebx, %eax rorl $9, %ecx xorl %r9d, %ecx xorl %r10d, %eax vpxor %xmm9, %xmm8, %xmm9 # rnd_1: 6 - 6 rorl $11, %ecx addl %r8d, %r12d xorl %r9d, %ecx addl %eax, %r8d vpshufb %xmm12, %xmm9, %xmm9 # rnd_1: 7 - 7 rorl $2, %ecx movl %r12d, %edx addl %ecx, %r8d vpaddd %xmm4, %xmm9, %xmm1 # msg_sched done: 4-7 # msg_sched: 8-11 # rnd_0: 0 - 0 rorl $14, %edx vpalignr $4, %xmm2, %xmm3, %xmm5 vpalignr $4, %xmm0, %xmm1, %xmm4 # rnd_0: 1 - 2 movl %r9d, %eax movl %r13d, %ecx addl 32(%rsp), %r15d xorl %r14d, %ecx xorl %r12d, %edx andl %r12d, %ecx vpsrld $7, %xmm5, %xmm6 vpslld $25, %xmm5, %xmm7 # rnd_0: 3 - 4 rorl $5, %edx xorl %r14d, %ecx xorl %r12d, %edx addl %ecx, %r15d rorl $6, %edx xorl %r8d, %eax addl %edx, %r15d movl %r8d, %ecx vpsrld $18, %xmm5, %xmm8 vpslld $14, %xmm5, %xmm9 # rnd_0: 5 - 6 andl %eax, %ebx rorl $9, %ecx xorl %r8d, %ecx xorl %r9d, %ebx rorl $11, %ecx addl %r15d, %r11d xorl %r8d, %ecx addl %ebx, %r15d vpor %xmm6, %xmm7, %xmm6 vpor %xmm8, %xmm9, %xmm8 # rnd_0: 7 - 7 rorl $2, %ecx movl %r11d, %edx addl %ecx, %r15d # rnd_1: 0 - 1 rorl $14, %edx movl %r8d, %ebx movl %r12d, %ecx addl 36(%rsp), %r14d xorl %r13d, %ecx vpsrld $3, %xmm5, %xmm9 vpxor %xmm6, %xmm8, %xmm6 # rnd_1: 2 - 3 xorl %r11d, %edx andl %r11d, %ecx rorl $5, %edx xorl %r13d, %ecx xorl %r11d, %edx addl %ecx, %r14d vpxor %xmm6, %xmm9, %xmm5 vpshufd $0xfa, %xmm1, %xmm6 # rnd_1: 4 - 5 rorl $6, %edx xorl %r15d, %ebx addl %edx, %r14d movl %r15d, %ecx andl %ebx, %eax rorl $9, %ecx xorl %r15d, %ecx xorl %r8d, %eax vpsrld $10, %xmm6, %xmm8 vpsrlq $19, %xmm6, %xmm7 # rnd_1: 6 - 7 rorl $11, %ecx addl %r14d, %r10d xorl %r15d, %ecx addl %eax, %r14d rorl $2, %ecx movl %r10d, %edx addl %ecx, %r14d # rnd_0: 0 - 0 rorl $14, %edx vpsrlq $0x11, %xmm6, %xmm6 vpaddd %xmm2, %xmm4, %xmm4 # rnd_0: 1 - 3 movl %r15d, %eax movl %r11d, %ecx addl 40(%rsp), %r13d xorl %r12d, %ecx xorl %r10d, %edx andl %r10d, %ecx rorl $5, %edx xorl %r12d, %ecx xorl %r10d, %edx addl %ecx, %r13d vpxor %xmm6, %xmm7, %xmm6 vpaddd %xmm5, %xmm4, %xmm4 # rnd_0: 4 - 4 rorl $6, %edx xorl %r14d, %eax addl %edx, %r13d movl %r14d, %ecx vpxor %xmm6, %xmm8, %xmm8 # rnd_0: 5 - 5 andl %eax, %ebx rorl $9, %ecx xorl %r14d, %ecx xorl %r15d, %ebx vpshufb %xmm11, %xmm8, %xmm8 # rnd_0: 6 - 6 rorl $11, %ecx addl %r13d, %r9d xorl %r14d, %ecx addl %ebx, %r13d vpaddd %xmm8, %xmm4, %xmm4 # rnd_0: 7 - 7 rorl $2, %ecx movl %r9d, %edx addl %ecx, %r13d # rnd_1: 0 - 0 rorl $14, %edx vpshufd $0x50, %xmm4, %xmm6 # rnd_1: 1 - 1 movl %r14d, %ebx movl %r10d, %ecx addl 44(%rsp), %r12d xorl %r11d, %ecx vpsrlq $0x11, %xmm6, %xmm8 vpsrlq $19, %xmm6, %xmm7 # rnd_1: 2 - 3 xorl %r9d, %edx andl %r9d, %ecx rorl $5, %edx xorl %r11d, %ecx xorl %r9d, %edx addl %ecx, %r12d vpsrld $10, %xmm6, %xmm9 vpxor %xmm8, %xmm7, %xmm8 # rnd_1: 4 - 5 rorl $6, %edx xorl %r13d, %ebx addl %edx, %r12d movl %r13d, %ecx andl %ebx, %eax rorl $9, %ecx xorl %r13d, %ecx xorl %r14d, %eax vpxor %xmm9, %xmm8, %xmm9 # rnd_1: 6 - 6 rorl $11, %ecx addl %r12d, %r8d xorl %r13d, %ecx addl %eax, %r12d vpshufb %xmm12, %xmm9, %xmm9 # rnd_1: 7 - 7 rorl $2, %ecx movl %r8d, %edx addl %ecx, %r12d vpaddd %xmm4, %xmm9, %xmm2 # msg_sched done: 8-11 # msg_sched: 12-15 # rnd_0: 0 - 0 rorl $14, %edx vpalignr $4, %xmm3, %xmm0, %xmm5 vpalignr $4, %xmm1, %xmm2, %xmm4 # rnd_0: 1 - 2 movl %r13d, %eax movl %r9d, %ecx addl 48(%rsp), %r11d xorl %r10d, %ecx xorl %r8d, %edx andl %r8d, %ecx vpsrld $7, %xmm5, %xmm6 vpslld $25, %xmm5, %xmm7 # rnd_0: 3 - 4 rorl $5, %edx xorl %r10d, %ecx xorl %r8d, %edx addl %ecx, %r11d rorl $6, %edx xorl %r12d, %eax addl %edx, %r11d movl %r12d, %ecx vpsrld $18, %xmm5, %xmm8 vpslld $14, %xmm5, %xmm9 # rnd_0: 5 - 6 andl %eax, %ebx rorl $9, %ecx xorl %r12d, %ecx xorl %r13d, %ebx rorl $11, %ecx addl %r11d, %r15d xorl %r12d, %ecx addl %ebx, %r11d vpor %xmm6, %xmm7, %xmm6 vpor %xmm8, %xmm9, %xmm8 # rnd_0: 7 - 7 rorl $2, %ecx movl %r15d, %edx addl %ecx, %r11d # rnd_1: 0 - 1 rorl $14, %edx movl %r12d, %ebx movl %r8d, %ecx addl 52(%rsp), %r10d xorl %r9d, %ecx vpsrld $3, %xmm5, %xmm9 vpxor %xmm6, %xmm8, %xmm6 # rnd_1: 2 - 3 xorl %r15d, %edx andl %r15d, %ecx rorl $5, %edx xorl %r9d, %ecx xorl %r15d, %edx addl %ecx, %r10d vpxor %xmm6, %xmm9, %xmm5 vpshufd $0xfa, %xmm2, %xmm6 # rnd_1: 4 - 5 rorl $6, %edx xorl %r11d, %ebx addl %edx, %r10d movl %r11d, %ecx andl %ebx, %eax rorl $9, %ecx xorl %r11d, %ecx xorl %r12d, %eax vpsrld $10, %xmm6, %xmm8 vpsrlq $19, %xmm6, %xmm7 # rnd_1: 6 - 7 rorl $11, %ecx addl %r10d, %r14d xorl %r11d, %ecx addl %eax, %r10d rorl $2, %ecx movl %r14d, %edx addl %ecx, %r10d # rnd_0: 0 - 0 rorl $14, %edx vpsrlq $0x11, %xmm6, %xmm6 vpaddd %xmm3, %xmm4, %xmm4 # rnd_0: 1 - 3 movl %r11d, %eax movl %r15d, %ecx addl 56(%rsp), %r9d xorl %r8d, %ecx xorl %r14d, %edx andl %r14d, %ecx rorl $5, %edx xorl %r8d, %ecx xorl %r14d, %edx addl %ecx, %r9d vpxor %xmm6, %xmm7, %xmm6 vpaddd %xmm5, %xmm4, %xmm4 # rnd_0: 4 - 4 rorl $6, %edx xorl %r10d, %eax addl %edx, %r9d movl %r10d, %ecx vpxor %xmm6, %xmm8, %xmm8 # rnd_0: 5 - 5 andl %eax, %ebx rorl $9, %ecx xorl %r10d, %ecx xorl %r11d, %ebx vpshufb %xmm11, %xmm8, %xmm8 # rnd_0: 6 - 6 rorl $11, %ecx addl %r9d, %r13d xorl %r10d, %ecx addl %ebx, %r9d vpaddd %xmm8, %xmm4, %xmm4 # rnd_0: 7 - 7 rorl $2, %ecx movl %r13d, %edx addl %ecx, %r9d # rnd_1: 0 - 0 rorl $14, %edx vpshufd $0x50, %xmm4, %xmm6 # rnd_1: 1 - 1 movl %r10d, %ebx movl %r14d, %ecx addl 60(%rsp), %r8d xorl %r15d, %ecx vpsrlq $0x11, %xmm6, %xmm8 vpsrlq $19, %xmm6, %xmm7 # rnd_1: 2 - 3 xorl %r13d, %edx andl %r13d, %ecx rorl $5, %edx xorl %r15d, %ecx xorl %r13d, %edx addl %ecx, %r8d vpsrld $10, %xmm6, %xmm9 vpxor %xmm8, %xmm7, %xmm8 # rnd_1: 4 - 5 rorl $6, %edx xorl %r9d, %ebx addl %edx, %r8d movl %r9d, %ecx andl %ebx, %eax rorl $9, %ecx xorl %r9d, %ecx xorl %r10d, %eax vpxor %xmm9, %xmm8, %xmm9 # rnd_1: 6 - 6 rorl $11, %ecx addl %r8d, %r12d xorl %r9d, %ecx addl %eax, %r8d vpshufb %xmm12, %xmm9, %xmm9 # rnd_1: 7 - 7 rorl $2, %ecx movl %r12d, %edx addl %ecx, %r8d vpaddd %xmm4, %xmm9, %xmm3 # msg_sched done: 12-15 # set_w_k_xfer_4: 4 vpaddd 64+L_avx1_sha256_k(%rip), %xmm0, %xmm4 vpaddd 80+L_avx1_sha256_k(%rip), %xmm1, %xmm5 vmovdqu %xmm4, (%rsp) vmovdqu %xmm5, 16(%rsp) vpaddd 96+L_avx1_sha256_k(%rip), %xmm2, %xmm6 vpaddd 112+L_avx1_sha256_k(%rip), %xmm3, %xmm7 vmovdqu %xmm6, 32(%rsp) vmovdqu %xmm7, 48(%rsp) # msg_sched: 0-3 # rnd_0: 0 - 0 rorl $14, %edx vpalignr $4, %xmm0, %xmm1, %xmm5 vpalignr $4, %xmm2, %xmm3, %xmm4 # rnd_0: 1 - 2 movl %r9d, %eax movl %r13d, %ecx addl (%rsp), %r15d xorl %r14d, %ecx xorl %r12d, %edx andl %r12d, %ecx vpsrld $7, %xmm5, %xmm6 vpslld $25, %xmm5, %xmm7 # rnd_0: 3 - 4 rorl $5, %edx xorl %r14d, %ecx xorl %r12d, %edx addl %ecx, %r15d rorl $6, %edx xorl %r8d, %eax addl %edx, %r15d movl %r8d, %ecx vpsrld $18, %xmm5, %xmm8 vpslld $14, %xmm5, %xmm9 # rnd_0: 5 - 6 andl %eax, %ebx rorl $9, %ecx xorl %r8d, %ecx xorl %r9d, %ebx rorl $11, %ecx addl %r15d, %r11d xorl %r8d, %ecx addl %ebx, %r15d vpor %xmm6, %xmm7, %xmm6 vpor %xmm8, %xmm9, %xmm8 # rnd_0: 7 - 7 rorl $2, %ecx movl %r11d, %edx addl %ecx, %r15d # rnd_1: 0 - 1 rorl $14, %edx movl %r8d, %ebx movl %r12d, %ecx addl 4(%rsp), %r14d xorl %r13d, %ecx vpsrld $3, %xmm5, %xmm9 vpxor %xmm6, %xmm8, %xmm6 # rnd_1: 2 - 3 xorl %r11d, %edx andl %r11d, %ecx rorl $5, %edx xorl %r13d, %ecx xorl %r11d, %edx addl %ecx, %r14d vpxor %xmm6, %xmm9, %xmm5 vpshufd $0xfa, %xmm3, %xmm6 # rnd_1: 4 - 5 rorl $6, %edx xorl %r15d, %ebx addl %edx, %r14d movl %r15d, %ecx andl %ebx, %eax rorl $9, %ecx xorl %r15d, %ecx xorl %r8d, %eax vpsrld $10, %xmm6, %xmm8 vpsrlq $19, %xmm6, %xmm7 # rnd_1: 6 - 7 rorl $11, %ecx addl %r14d, %r10d xorl %r15d, %ecx addl %eax, %r14d rorl $2, %ecx movl %r10d, %edx addl %ecx, %r14d # rnd_0: 0 - 0 rorl $14, %edx vpsrlq $0x11, %xmm6, %xmm6 vpaddd %xmm0, %xmm4, %xmm4 # rnd_0: 1 - 3 movl %r15d, %eax movl %r11d, %ecx addl 8(%rsp), %r13d xorl %r12d, %ecx xorl %r10d, %edx andl %r10d, %ecx rorl $5, %edx xorl %r12d, %ecx xorl %r10d, %edx addl %ecx, %r13d vpxor %xmm6, %xmm7, %xmm6 vpaddd %xmm5, %xmm4, %xmm4 # rnd_0: 4 - 4 rorl $6, %edx xorl %r14d, %eax addl %edx, %r13d movl %r14d, %ecx vpxor %xmm6, %xmm8, %xmm8 # rnd_0: 5 - 5 andl %eax, %ebx rorl $9, %ecx xorl %r14d, %ecx xorl %r15d, %ebx vpshufb %xmm11, %xmm8, %xmm8 # rnd_0: 6 - 6 rorl $11, %ecx addl %r13d, %r9d xorl %r14d, %ecx addl %ebx, %r13d vpaddd %xmm8, %xmm4, %xmm4 # rnd_0: 7 - 7 rorl $2, %ecx movl %r9d, %edx addl %ecx, %r13d # rnd_1: 0 - 0 rorl $14, %edx vpshufd $0x50, %xmm4, %xmm6 # rnd_1: 1 - 1 movl %r14d, %ebx movl %r10d, %ecx addl 12(%rsp), %r12d xorl %r11d, %ecx vpsrlq $0x11, %xmm6, %xmm8 vpsrlq $19, %xmm6, %xmm7 # rnd_1: 2 - 3 xorl %r9d, %edx andl %r9d, %ecx rorl $5, %edx xorl %r11d, %ecx xorl %r9d, %edx addl %ecx, %r12d vpsrld $10, %xmm6, %xmm9 vpxor %xmm8, %xmm7, %xmm8 # rnd_1: 4 - 5 rorl $6, %edx xorl %r13d, %ebx addl %edx, %r12d movl %r13d, %ecx andl %ebx, %eax rorl $9, %ecx xorl %r13d, %ecx xorl %r14d, %eax vpxor %xmm9, %xmm8, %xmm9 # rnd_1: 6 - 6 rorl $11, %ecx addl %r12d, %r8d xorl %r13d, %ecx addl %eax, %r12d vpshufb %xmm12, %xmm9, %xmm9 # rnd_1: 7 - 7 rorl $2, %ecx movl %r8d, %edx addl %ecx, %r12d vpaddd %xmm4, %xmm9, %xmm0 # msg_sched done: 0-3 # msg_sched: 4-7 # rnd_0: 0 - 0 rorl $14, %edx vpalignr $4, %xmm1, %xmm2, %xmm5 vpalignr $4, %xmm3, %xmm0, %xmm4 # rnd_0: 1 - 2 movl %r13d, %eax movl %r9d, %ecx addl 16(%rsp), %r11d xorl %r10d, %ecx xorl %r8d, %edx andl %r8d, %ecx vpsrld $7, %xmm5, %xmm6 vpslld $25, %xmm5, %xmm7 # rnd_0: 3 - 4 rorl $5, %edx xorl %r10d, %ecx xorl %r8d, %edx addl %ecx, %r11d rorl $6, %edx xorl %r12d, %eax addl %edx, %r11d movl %r12d, %ecx vpsrld $18, %xmm5, %xmm8 vpslld $14, %xmm5, %xmm9 # rnd_0: 5 - 6 andl %eax, %ebx rorl $9, %ecx xorl %r12d, %ecx xorl %r13d, %ebx rorl $11, %ecx addl %r11d, %r15d xorl %r12d, %ecx addl %ebx, %r11d vpor %xmm6, %xmm7, %xmm6 vpor %xmm8, %xmm9, %xmm8 # rnd_0: 7 - 7 rorl $2, %ecx movl %r15d, %edx addl %ecx, %r11d # rnd_1: 0 - 1 rorl $14, %edx movl %r12d, %ebx movl %r8d, %ecx addl 20(%rsp), %r10d xorl %r9d, %ecx vpsrld $3, %xmm5, %xmm9 vpxor %xmm6, %xmm8, %xmm6 # rnd_1: 2 - 3 xorl %r15d, %edx andl %r15d, %ecx rorl $5, %edx xorl %r9d, %ecx xorl %r15d, %edx addl %ecx, %r10d vpxor %xmm6, %xmm9, %xmm5 vpshufd $0xfa, %xmm0, %xmm6 # rnd_1: 4 - 5 rorl $6, %edx xorl %r11d, %ebx addl %edx, %r10d movl %r11d, %ecx andl %ebx, %eax rorl $9, %ecx xorl %r11d, %ecx xorl %r12d, %eax vpsrld $10, %xmm6, %xmm8 vpsrlq $19, %xmm6, %xmm7 # rnd_1: 6 - 7 rorl $11, %ecx addl %r10d, %r14d xorl %r11d, %ecx addl %eax, %r10d rorl $2, %ecx movl %r14d, %edx addl %ecx, %r10d # rnd_0: 0 - 0 rorl $14, %edx vpsrlq $0x11, %xmm6, %xmm6 vpaddd %xmm1, %xmm4, %xmm4 # rnd_0: 1 - 3 movl %r11d, %eax movl %r15d, %ecx addl 24(%rsp), %r9d xorl %r8d, %ecx xorl %r14d, %edx andl %r14d, %ecx rorl $5, %edx xorl %r8d, %ecx xorl %r14d, %edx addl %ecx, %r9d vpxor %xmm6, %xmm7, %xmm6 vpaddd %xmm5, %xmm4, %xmm4 # rnd_0: 4 - 4 rorl $6, %edx xorl %r10d, %eax addl %edx, %r9d movl %r10d, %ecx vpxor %xmm6, %xmm8, %xmm8 # rnd_0: 5 - 5 andl %eax, %ebx rorl $9, %ecx xorl %r10d, %ecx xorl %r11d, %ebx vpshufb %xmm11, %xmm8, %xmm8 # rnd_0: 6 - 6 rorl $11, %ecx addl %r9d, %r13d xorl %r10d, %ecx addl %ebx, %r9d vpaddd %xmm8, %xmm4, %xmm4 # rnd_0: 7 - 7 rorl $2, %ecx movl %r13d, %edx addl %ecx, %r9d # rnd_1: 0 - 0 rorl $14, %edx vpshufd $0x50, %xmm4, %xmm6 # rnd_1: 1 - 1 movl %r10d, %ebx movl %r14d, %ecx addl 28(%rsp), %r8d xorl %r15d, %ecx vpsrlq $0x11, %xmm6, %xmm8 vpsrlq $19, %xmm6, %xmm7 # rnd_1: 2 - 3 xorl %r13d, %edx andl %r13d, %ecx rorl $5, %edx xorl %r15d, %ecx xorl %r13d, %edx addl %ecx, %r8d vpsrld $10, %xmm6, %xmm9 vpxor %xmm8, %xmm7, %xmm8 # rnd_1: 4 - 5 rorl $6, %edx xorl %r9d, %ebx addl %edx, %r8d movl %r9d, %ecx andl %ebx, %eax rorl $9, %ecx xorl %r9d, %ecx xorl %r10d, %eax vpxor %xmm9, %xmm8, %xmm9 # rnd_1: 6 - 6 rorl $11, %ecx addl %r8d, %r12d xorl %r9d, %ecx addl %eax, %r8d vpshufb %xmm12, %xmm9, %xmm9 # rnd_1: 7 - 7 rorl $2, %ecx movl %r12d, %edx addl %ecx, %r8d vpaddd %xmm4, %xmm9, %xmm1 # msg_sched done: 4-7 # msg_sched: 8-11 # rnd_0: 0 - 0 rorl $14, %edx vpalignr $4, %xmm2, %xmm3, %xmm5 vpalignr $4, %xmm0, %xmm1, %xmm4 # rnd_0: 1 - 2 movl %r9d, %eax movl %r13d, %ecx addl 32(%rsp), %r15d xorl %r14d, %ecx xorl %r12d, %edx andl %r12d, %ecx vpsrld $7, %xmm5, %xmm6 vpslld $25, %xmm5, %xmm7 # rnd_0: 3 - 4 rorl $5, %edx xorl %r14d, %ecx xorl %r12d, %edx addl %ecx, %r15d rorl $6, %edx xorl %r8d, %eax addl %edx, %r15d movl %r8d, %ecx vpsrld $18, %xmm5, %xmm8 vpslld $14, %xmm5, %xmm9 # rnd_0: 5 - 6 andl %eax, %ebx rorl $9, %ecx xorl %r8d, %ecx xorl %r9d, %ebx rorl $11, %ecx addl %r15d, %r11d xorl %r8d, %ecx addl %ebx, %r15d vpor %xmm6, %xmm7, %xmm6 vpor %xmm8, %xmm9, %xmm8 # rnd_0: 7 - 7 rorl $2, %ecx movl %r11d, %edx addl %ecx, %r15d # rnd_1: 0 - 1 rorl $14, %edx movl %r8d, %ebx movl %r12d, %ecx addl 36(%rsp), %r14d xorl %r13d, %ecx vpsrld $3, %xmm5, %xmm9 vpxor %xmm6, %xmm8, %xmm6 # rnd_1: 2 - 3 xorl %r11d, %edx andl %r11d, %ecx rorl $5, %edx xorl %r13d, %ecx xorl %r11d, %edx addl %ecx, %r14d vpxor %xmm6, %xmm9, %xmm5 vpshufd $0xfa, %xmm1, %xmm6 # rnd_1: 4 - 5 rorl $6, %edx xorl %r15d, %ebx addl %edx, %r14d movl %r15d, %ecx andl %ebx, %eax rorl $9, %ecx xorl %r15d, %ecx xorl %r8d, %eax vpsrld $10, %xmm6, %xmm8 vpsrlq $19, %xmm6, %xmm7 # rnd_1: 6 - 7 rorl $11, %ecx addl %r14d, %r10d xorl %r15d, %ecx addl %eax, %r14d rorl $2, %ecx movl %r10d, %edx addl %ecx, %r14d # rnd_0: 0 - 0 rorl $14, %edx vpsrlq $0x11, %xmm6, %xmm6 vpaddd %xmm2, %xmm4, %xmm4 # rnd_0: 1 - 3 movl %r15d, %eax movl %r11d, %ecx addl 40(%rsp), %r13d xorl %r12d, %ecx xorl %r10d, %edx andl %r10d, %ecx rorl $5, %edx xorl %r12d, %ecx xorl %r10d, %edx addl %ecx, %r13d vpxor %xmm6, %xmm7, %xmm6 vpaddd %xmm5, %xmm4, %xmm4 # rnd_0: 4 - 4 rorl $6, %edx xorl %r14d, %eax addl %edx, %r13d movl %r14d, %ecx vpxor %xmm6, %xmm8, %xmm8 # rnd_0: 5 - 5 andl %eax, %ebx rorl $9, %ecx xorl %r14d, %ecx xorl %r15d, %ebx vpshufb %xmm11, %xmm8, %xmm8 # rnd_0: 6 - 6 rorl $11, %ecx addl %r13d, %r9d xorl %r14d, %ecx addl %ebx, %r13d vpaddd %xmm8, %xmm4, %xmm4 # rnd_0: 7 - 7 rorl $2, %ecx movl %r9d, %edx addl %ecx, %r13d # rnd_1: 0 - 0 rorl $14, %edx vpshufd $0x50, %xmm4, %xmm6 # rnd_1: 1 - 1 movl %r14d, %ebx movl %r10d, %ecx addl 44(%rsp), %r12d xorl %r11d, %ecx vpsrlq $0x11, %xmm6, %xmm8 vpsrlq $19, %xmm6, %xmm7 # rnd_1: 2 - 3 xorl %r9d, %edx andl %r9d, %ecx rorl $5, %edx xorl %r11d, %ecx xorl %r9d, %edx addl %ecx, %r12d vpsrld $10, %xmm6, %xmm9 vpxor %xmm8, %xmm7, %xmm8 # rnd_1: 4 - 5 rorl $6, %edx xorl %r13d, %ebx addl %edx, %r12d movl %r13d, %ecx andl %ebx, %eax rorl $9, %ecx xorl %r13d, %ecx xorl %r14d, %eax vpxor %xmm9, %xmm8, %xmm9 # rnd_1: 6 - 6 rorl $11, %ecx addl %r12d, %r8d xorl %r13d, %ecx addl %eax, %r12d vpshufb %xmm12, %xmm9, %xmm9 # rnd_1: 7 - 7 rorl $2, %ecx movl %r8d, %edx addl %ecx, %r12d vpaddd %xmm4, %xmm9, %xmm2 # msg_sched done: 8-11 # msg_sched: 12-15 # rnd_0: 0 - 0 rorl $14, %edx vpalignr $4, %xmm3, %xmm0, %xmm5 vpalignr $4, %xmm1, %xmm2, %xmm4 # rnd_0: 1 - 2 movl %r13d, %eax movl %r9d, %ecx addl 48(%rsp), %r11d xorl %r10d, %ecx xorl %r8d, %edx andl %r8d, %ecx vpsrld $7, %xmm5, %xmm6 vpslld $25, %xmm5, %xmm7 # rnd_0: 3 - 4 rorl $5, %edx xorl %r10d, %ecx xorl %r8d, %edx addl %ecx, %r11d rorl $6, %edx xorl %r12d, %eax addl %edx, %r11d movl %r12d, %ecx vpsrld $18, %xmm5, %xmm8 vpslld $14, %xmm5, %xmm9 # rnd_0: 5 - 6 andl %eax, %ebx rorl $9, %ecx xorl %r12d, %ecx xorl %r13d, %ebx rorl $11, %ecx addl %r11d, %r15d xorl %r12d, %ecx addl %ebx, %r11d vpor %xmm6, %xmm7, %xmm6 vpor %xmm8, %xmm9, %xmm8 # rnd_0: 7 - 7 rorl $2, %ecx movl %r15d, %edx addl %ecx, %r11d # rnd_1: 0 - 1 rorl $14, %edx movl %r12d, %ebx movl %r8d, %ecx addl 52(%rsp), %r10d xorl %r9d, %ecx vpsrld $3, %xmm5, %xmm9 vpxor %xmm6, %xmm8, %xmm6 # rnd_1: 2 - 3 xorl %r15d, %edx andl %r15d, %ecx rorl $5, %edx xorl %r9d, %ecx xorl %r15d, %edx addl %ecx, %r10d vpxor %xmm6, %xmm9, %xmm5 vpshufd $0xfa, %xmm2, %xmm6 # rnd_1: 4 - 5 rorl $6, %edx xorl %r11d, %ebx addl %edx, %r10d movl %r11d, %ecx andl %ebx, %eax rorl $9, %ecx xorl %r11d, %ecx xorl %r12d, %eax vpsrld $10, %xmm6, %xmm8 vpsrlq $19, %xmm6, %xmm7 # rnd_1: 6 - 7 rorl $11, %ecx addl %r10d, %r14d xorl %r11d, %ecx addl %eax, %r10d rorl $2, %ecx movl %r14d, %edx addl %ecx, %r10d # rnd_0: 0 - 0 rorl $14, %edx vpsrlq $0x11, %xmm6, %xmm6 vpaddd %xmm3, %xmm4, %xmm4 # rnd_0: 1 - 3 movl %r11d, %eax movl %r15d, %ecx addl 56(%rsp), %r9d xorl %r8d, %ecx xorl %r14d, %edx andl %r14d, %ecx rorl $5, %edx xorl %r8d, %ecx xorl %r14d, %edx addl %ecx, %r9d vpxor %xmm6, %xmm7, %xmm6 vpaddd %xmm5, %xmm4, %xmm4 # rnd_0: 4 - 4 rorl $6, %edx xorl %r10d, %eax addl %edx, %r9d movl %r10d, %ecx vpxor %xmm6, %xmm8, %xmm8 # rnd_0: 5 - 5 andl %eax, %ebx rorl $9, %ecx xorl %r10d, %ecx xorl %r11d, %ebx vpshufb %xmm11, %xmm8, %xmm8 # rnd_0: 6 - 6 rorl $11, %ecx addl %r9d, %r13d xorl %r10d, %ecx addl %ebx, %r9d vpaddd %xmm8, %xmm4, %xmm4 # rnd_0: 7 - 7 rorl $2, %ecx movl %r13d, %edx addl %ecx, %r9d # rnd_1: 0 - 0 rorl $14, %edx vpshufd $0x50, %xmm4, %xmm6 # rnd_1: 1 - 1 movl %r10d, %ebx movl %r14d, %ecx addl 60(%rsp), %r8d xorl %r15d, %ecx vpsrlq $0x11, %xmm6, %xmm8 vpsrlq $19, %xmm6, %xmm7 # rnd_1: 2 - 3 xorl %r13d, %edx andl %r13d, %ecx rorl $5, %edx xorl %r15d, %ecx xorl %r13d, %edx addl %ecx, %r8d vpsrld $10, %xmm6, %xmm9 vpxor %xmm8, %xmm7, %xmm8 # rnd_1: 4 - 5 rorl $6, %edx xorl %r9d, %ebx addl %edx, %r8d movl %r9d, %ecx andl %ebx, %eax rorl $9, %ecx xorl %r9d, %ecx xorl %r10d, %eax vpxor %xmm9, %xmm8, %xmm9 # rnd_1: 6 - 6 rorl $11, %ecx addl %r8d, %r12d xorl %r9d, %ecx addl %eax, %r8d vpshufb %xmm12, %xmm9, %xmm9 # rnd_1: 7 - 7 rorl $2, %ecx movl %r12d, %edx addl %ecx, %r8d vpaddd %xmm4, %xmm9, %xmm3 # msg_sched done: 12-15 # set_w_k_xfer_4: 8 vpaddd 128+L_avx1_sha256_k(%rip), %xmm0, %xmm4 vpaddd 144+L_avx1_sha256_k(%rip), %xmm1, %xmm5 vmovdqu %xmm4, (%rsp) vmovdqu %xmm5, 16(%rsp) vpaddd 160+L_avx1_sha256_k(%rip), %xmm2, %xmm6 vpaddd 176+L_avx1_sha256_k(%rip), %xmm3, %xmm7 vmovdqu %xmm6, 32(%rsp) vmovdqu %xmm7, 48(%rsp) # msg_sched: 0-3 # rnd_0: 0 - 0 rorl $14, %edx vpalignr $4, %xmm0, %xmm1, %xmm5 vpalignr $4, %xmm2, %xmm3, %xmm4 # rnd_0: 1 - 2 movl %r9d, %eax movl %r13d, %ecx addl (%rsp), %r15d xorl %r14d, %ecx xorl %r12d, %edx andl %r12d, %ecx vpsrld $7, %xmm5, %xmm6 vpslld $25, %xmm5, %xmm7 # rnd_0: 3 - 4 rorl $5, %edx xorl %r14d, %ecx xorl %r12d, %edx addl %ecx, %r15d rorl $6, %edx xorl %r8d, %eax addl %edx, %r15d movl %r8d, %ecx vpsrld $18, %xmm5, %xmm8 vpslld $14, %xmm5, %xmm9 # rnd_0: 5 - 6 andl %eax, %ebx rorl $9, %ecx xorl %r8d, %ecx xorl %r9d, %ebx rorl $11, %ecx addl %r15d, %r11d xorl %r8d, %ecx addl %ebx, %r15d vpor %xmm6, %xmm7, %xmm6 vpor %xmm8, %xmm9, %xmm8 # rnd_0: 7 - 7 rorl $2, %ecx movl %r11d, %edx addl %ecx, %r15d # rnd_1: 0 - 1 rorl $14, %edx movl %r8d, %ebx movl %r12d, %ecx addl 4(%rsp), %r14d xorl %r13d, %ecx vpsrld $3, %xmm5, %xmm9 vpxor %xmm6, %xmm8, %xmm6 # rnd_1: 2 - 3 xorl %r11d, %edx andl %r11d, %ecx rorl $5, %edx xorl %r13d, %ecx xorl %r11d, %edx addl %ecx, %r14d vpxor %xmm6, %xmm9, %xmm5 vpshufd $0xfa, %xmm3, %xmm6 # rnd_1: 4 - 5 rorl $6, %edx xorl %r15d, %ebx addl %edx, %r14d movl %r15d, %ecx andl %ebx, %eax rorl $9, %ecx xorl %r15d, %ecx xorl %r8d, %eax vpsrld $10, %xmm6, %xmm8 vpsrlq $19, %xmm6, %xmm7 # rnd_1: 6 - 7 rorl $11, %ecx addl %r14d, %r10d xorl %r15d, %ecx addl %eax, %r14d rorl $2, %ecx movl %r10d, %edx addl %ecx, %r14d # rnd_0: 0 - 0 rorl $14, %edx vpsrlq $0x11, %xmm6, %xmm6 vpaddd %xmm0, %xmm4, %xmm4 # rnd_0: 1 - 3 movl %r15d, %eax movl %r11d, %ecx addl 8(%rsp), %r13d xorl %r12d, %ecx xorl %r10d, %edx andl %r10d, %ecx rorl $5, %edx xorl %r12d, %ecx xorl %r10d, %edx addl %ecx, %r13d vpxor %xmm6, %xmm7, %xmm6 vpaddd %xmm5, %xmm4, %xmm4 # rnd_0: 4 - 4 rorl $6, %edx xorl %r14d, %eax addl %edx, %r13d movl %r14d, %ecx vpxor %xmm6, %xmm8, %xmm8 # rnd_0: 5 - 5 andl %eax, %ebx rorl $9, %ecx xorl %r14d, %ecx xorl %r15d, %ebx vpshufb %xmm11, %xmm8, %xmm8 # rnd_0: 6 - 6 rorl $11, %ecx addl %r13d, %r9d xorl %r14d, %ecx addl %ebx, %r13d vpaddd %xmm8, %xmm4, %xmm4 # rnd_0: 7 - 7 rorl $2, %ecx movl %r9d, %edx addl %ecx, %r13d # rnd_1: 0 - 0 rorl $14, %edx vpshufd $0x50, %xmm4, %xmm6 # rnd_1: 1 - 1 movl %r14d, %ebx movl %r10d, %ecx addl 12(%rsp), %r12d xorl %r11d, %ecx vpsrlq $0x11, %xmm6, %xmm8 vpsrlq $19, %xmm6, %xmm7 # rnd_1: 2 - 3 xorl %r9d, %edx andl %r9d, %ecx rorl $5, %edx xorl %r11d, %ecx xorl %r9d, %edx addl %ecx, %r12d vpsrld $10, %xmm6, %xmm9 vpxor %xmm8, %xmm7, %xmm8 # rnd_1: 4 - 5 rorl $6, %edx xorl %r13d, %ebx addl %edx, %r12d movl %r13d, %ecx andl %ebx, %eax rorl $9, %ecx xorl %r13d, %ecx xorl %r14d, %eax vpxor %xmm9, %xmm8, %xmm9 # rnd_1: 6 - 6 rorl $11, %ecx addl %r12d, %r8d xorl %r13d, %ecx addl %eax, %r12d vpshufb %xmm12, %xmm9, %xmm9 # rnd_1: 7 - 7 rorl $2, %ecx movl %r8d, %edx addl %ecx, %r12d vpaddd %xmm4, %xmm9, %xmm0 # msg_sched done: 0-3 # msg_sched: 4-7 # rnd_0: 0 - 0 rorl $14, %edx vpalignr $4, %xmm1, %xmm2, %xmm5 vpalignr $4, %xmm3, %xmm0, %xmm4 # rnd_0: 1 - 2 movl %r13d, %eax movl %r9d, %ecx addl 16(%rsp), %r11d xorl %r10d, %ecx xorl %r8d, %edx andl %r8d, %ecx vpsrld $7, %xmm5, %xmm6 vpslld $25, %xmm5, %xmm7 # rnd_0: 3 - 4 rorl $5, %edx xorl %r10d, %ecx xorl %r8d, %edx addl %ecx, %r11d rorl $6, %edx xorl %r12d, %eax addl %edx, %r11d movl %r12d, %ecx vpsrld $18, %xmm5, %xmm8 vpslld $14, %xmm5, %xmm9 # rnd_0: 5 - 6 andl %eax, %ebx rorl $9, %ecx xorl %r12d, %ecx xorl %r13d, %ebx rorl $11, %ecx addl %r11d, %r15d xorl %r12d, %ecx addl %ebx, %r11d vpor %xmm6, %xmm7, %xmm6 vpor %xmm8, %xmm9, %xmm8 # rnd_0: 7 - 7 rorl $2, %ecx movl %r15d, %edx addl %ecx, %r11d # rnd_1: 0 - 1 rorl $14, %edx movl %r12d, %ebx movl %r8d, %ecx addl 20(%rsp), %r10d xorl %r9d, %ecx vpsrld $3, %xmm5, %xmm9 vpxor %xmm6, %xmm8, %xmm6 # rnd_1: 2 - 3 xorl %r15d, %edx andl %r15d, %ecx rorl $5, %edx xorl %r9d, %ecx xorl %r15d, %edx addl %ecx, %r10d vpxor %xmm6, %xmm9, %xmm5 vpshufd $0xfa, %xmm0, %xmm6 # rnd_1: 4 - 5 rorl $6, %edx xorl %r11d, %ebx addl %edx, %r10d movl %r11d, %ecx andl %ebx, %eax rorl $9, %ecx xorl %r11d, %ecx xorl %r12d, %eax vpsrld $10, %xmm6, %xmm8 vpsrlq $19, %xmm6, %xmm7 # rnd_1: 6 - 7 rorl $11, %ecx addl %r10d, %r14d xorl %r11d, %ecx addl %eax, %r10d rorl $2, %ecx movl %r14d, %edx addl %ecx, %r10d # rnd_0: 0 - 0 rorl $14, %edx vpsrlq $0x11, %xmm6, %xmm6 vpaddd %xmm1, %xmm4, %xmm4 # rnd_0: 1 - 3 movl %r11d, %eax movl %r15d, %ecx addl 24(%rsp), %r9d xorl %r8d, %ecx xorl %r14d, %edx andl %r14d, %ecx rorl $5, %edx xorl %r8d, %ecx xorl %r14d, %edx addl %ecx, %r9d vpxor %xmm6, %xmm7, %xmm6 vpaddd %xmm5, %xmm4, %xmm4 # rnd_0: 4 - 4 rorl $6, %edx xorl %r10d, %eax addl %edx, %r9d movl %r10d, %ecx vpxor %xmm6, %xmm8, %xmm8 # rnd_0: 5 - 5 andl %eax, %ebx rorl $9, %ecx xorl %r10d, %ecx xorl %r11d, %ebx vpshufb %xmm11, %xmm8, %xmm8 # rnd_0: 6 - 6 rorl $11, %ecx addl %r9d, %r13d xorl %r10d, %ecx addl %ebx, %r9d vpaddd %xmm8, %xmm4, %xmm4 # rnd_0: 7 - 7 rorl $2, %ecx movl %r13d, %edx addl %ecx, %r9d # rnd_1: 0 - 0 rorl $14, %edx vpshufd $0x50, %xmm4, %xmm6 # rnd_1: 1 - 1 movl %r10d, %ebx movl %r14d, %ecx addl 28(%rsp), %r8d xorl %r15d, %ecx vpsrlq $0x11, %xmm6, %xmm8 vpsrlq $19, %xmm6, %xmm7 # rnd_1: 2 - 3 xorl %r13d, %edx andl %r13d, %ecx rorl $5, %edx xorl %r15d, %ecx xorl %r13d, %edx addl %ecx, %r8d vpsrld $10, %xmm6, %xmm9 vpxor %xmm8, %xmm7, %xmm8 # rnd_1: 4 - 5 rorl $6, %edx xorl %r9d, %ebx addl %edx, %r8d movl %r9d, %ecx andl %ebx, %eax rorl $9, %ecx xorl %r9d, %ecx xorl %r10d, %eax vpxor %xmm9, %xmm8, %xmm9 # rnd_1: 6 - 6 rorl $11, %ecx addl %r8d, %r12d xorl %r9d, %ecx addl %eax, %r8d vpshufb %xmm12, %xmm9, %xmm9 # rnd_1: 7 - 7 rorl $2, %ecx movl %r12d, %edx addl %ecx, %r8d vpaddd %xmm4, %xmm9, %xmm1 # msg_sched done: 4-7 # msg_sched: 8-11 # rnd_0: 0 - 0 rorl $14, %edx vpalignr $4, %xmm2, %xmm3, %xmm5 vpalignr $4, %xmm0, %xmm1, %xmm4 # rnd_0: 1 - 2 movl %r9d, %eax movl %r13d, %ecx addl 32(%rsp), %r15d xorl %r14d, %ecx xorl %r12d, %edx andl %r12d, %ecx vpsrld $7, %xmm5, %xmm6 vpslld $25, %xmm5, %xmm7 # rnd_0: 3 - 4 rorl $5, %edx xorl %r14d, %ecx xorl %r12d, %edx addl %ecx, %r15d rorl $6, %edx xorl %r8d, %eax addl %edx, %r15d movl %r8d, %ecx vpsrld $18, %xmm5, %xmm8 vpslld $14, %xmm5, %xmm9 # rnd_0: 5 - 6 andl %eax, %ebx rorl $9, %ecx xorl %r8d, %ecx xorl %r9d, %ebx rorl $11, %ecx addl %r15d, %r11d xorl %r8d, %ecx addl %ebx, %r15d vpor %xmm6, %xmm7, %xmm6 vpor %xmm8, %xmm9, %xmm8 # rnd_0: 7 - 7 rorl $2, %ecx movl %r11d, %edx addl %ecx, %r15d # rnd_1: 0 - 1 rorl $14, %edx movl %r8d, %ebx movl %r12d, %ecx addl 36(%rsp), %r14d xorl %r13d, %ecx vpsrld $3, %xmm5, %xmm9 vpxor %xmm6, %xmm8, %xmm6 # rnd_1: 2 - 3 xorl %r11d, %edx andl %r11d, %ecx rorl $5, %edx xorl %r13d, %ecx xorl %r11d, %edx addl %ecx, %r14d vpxor %xmm6, %xmm9, %xmm5 vpshufd $0xfa, %xmm1, %xmm6 # rnd_1: 4 - 5 rorl $6, %edx xorl %r15d, %ebx addl %edx, %r14d movl %r15d, %ecx andl %ebx, %eax rorl $9, %ecx xorl %r15d, %ecx xorl %r8d, %eax vpsrld $10, %xmm6, %xmm8 vpsrlq $19, %xmm6, %xmm7 # rnd_1: 6 - 7 rorl $11, %ecx addl %r14d, %r10d xorl %r15d, %ecx addl %eax, %r14d rorl $2, %ecx movl %r10d, %edx addl %ecx, %r14d # rnd_0: 0 - 0 rorl $14, %edx vpsrlq $0x11, %xmm6, %xmm6 vpaddd %xmm2, %xmm4, %xmm4 # rnd_0: 1 - 3 movl %r15d, %eax movl %r11d, %ecx addl 40(%rsp), %r13d xorl %r12d, %ecx xorl %r10d, %edx andl %r10d, %ecx rorl $5, %edx xorl %r12d, %ecx xorl %r10d, %edx addl %ecx, %r13d vpxor %xmm6, %xmm7, %xmm6 vpaddd %xmm5, %xmm4, %xmm4 # rnd_0: 4 - 4 rorl $6, %edx xorl %r14d, %eax addl %edx, %r13d movl %r14d, %ecx vpxor %xmm6, %xmm8, %xmm8 # rnd_0: 5 - 5 andl %eax, %ebx rorl $9, %ecx xorl %r14d, %ecx xorl %r15d, %ebx vpshufb %xmm11, %xmm8, %xmm8 # rnd_0: 6 - 6 rorl $11, %ecx addl %r13d, %r9d xorl %r14d, %ecx addl %ebx, %r13d vpaddd %xmm8, %xmm4, %xmm4 # rnd_0: 7 - 7 rorl $2, %ecx movl %r9d, %edx addl %ecx, %r13d # rnd_1: 0 - 0 rorl $14, %edx vpshufd $0x50, %xmm4, %xmm6 # rnd_1: 1 - 1 movl %r14d, %ebx movl %r10d, %ecx addl 44(%rsp), %r12d xorl %r11d, %ecx vpsrlq $0x11, %xmm6, %xmm8 vpsrlq $19, %xmm6, %xmm7 # rnd_1: 2 - 3 xorl %r9d, %edx andl %r9d, %ecx rorl $5, %edx xorl %r11d, %ecx xorl %r9d, %edx addl %ecx, %r12d vpsrld $10, %xmm6, %xmm9 vpxor %xmm8, %xmm7, %xmm8 # rnd_1: 4 - 5 rorl $6, %edx xorl %r13d, %ebx addl %edx, %r12d movl %r13d, %ecx andl %ebx, %eax rorl $9, %ecx xorl %r13d, %ecx xorl %r14d, %eax vpxor %xmm9, %xmm8, %xmm9 # rnd_1: 6 - 6 rorl $11, %ecx addl %r12d, %r8d xorl %r13d, %ecx addl %eax, %r12d vpshufb %xmm12, %xmm9, %xmm9 # rnd_1: 7 - 7 rorl $2, %ecx movl %r8d, %edx addl %ecx, %r12d vpaddd %xmm4, %xmm9, %xmm2 # msg_sched done: 8-11 # msg_sched: 12-15 # rnd_0: 0 - 0 rorl $14, %edx vpalignr $4, %xmm3, %xmm0, %xmm5 vpalignr $4, %xmm1, %xmm2, %xmm4 # rnd_0: 1 - 2 movl %r13d, %eax movl %r9d, %ecx addl 48(%rsp), %r11d xorl %r10d, %ecx xorl %r8d, %edx andl %r8d, %ecx vpsrld $7, %xmm5, %xmm6 vpslld $25, %xmm5, %xmm7 # rnd_0: 3 - 4 rorl $5, %edx xorl %r10d, %ecx xorl %r8d, %edx addl %ecx, %r11d rorl $6, %edx xorl %r12d, %eax addl %edx, %r11d movl %r12d, %ecx vpsrld $18, %xmm5, %xmm8 vpslld $14, %xmm5, %xmm9 # rnd_0: 5 - 6 andl %eax, %ebx rorl $9, %ecx xorl %r12d, %ecx xorl %r13d, %ebx rorl $11, %ecx addl %r11d, %r15d xorl %r12d, %ecx addl %ebx, %r11d vpor %xmm6, %xmm7, %xmm6 vpor %xmm8, %xmm9, %xmm8 # rnd_0: 7 - 7 rorl $2, %ecx movl %r15d, %edx addl %ecx, %r11d # rnd_1: 0 - 1 rorl $14, %edx movl %r12d, %ebx movl %r8d, %ecx addl 52(%rsp), %r10d xorl %r9d, %ecx vpsrld $3, %xmm5, %xmm9 vpxor %xmm6, %xmm8, %xmm6 # rnd_1: 2 - 3 xorl %r15d, %edx andl %r15d, %ecx rorl $5, %edx xorl %r9d, %ecx xorl %r15d, %edx addl %ecx, %r10d vpxor %xmm6, %xmm9, %xmm5 vpshufd $0xfa, %xmm2, %xmm6 # rnd_1: 4 - 5 rorl $6, %edx xorl %r11d, %ebx addl %edx, %r10d movl %r11d, %ecx andl %ebx, %eax rorl $9, %ecx xorl %r11d, %ecx xorl %r12d, %eax vpsrld $10, %xmm6, %xmm8 vpsrlq $19, %xmm6, %xmm7 # rnd_1: 6 - 7 rorl $11, %ecx addl %r10d, %r14d xorl %r11d, %ecx addl %eax, %r10d rorl $2, %ecx movl %r14d, %edx addl %ecx, %r10d # rnd_0: 0 - 0 rorl $14, %edx vpsrlq $0x11, %xmm6, %xmm6 vpaddd %xmm3, %xmm4, %xmm4 # rnd_0: 1 - 3 movl %r11d, %eax movl %r15d, %ecx addl 56(%rsp), %r9d xorl %r8d, %ecx xorl %r14d, %edx andl %r14d, %ecx rorl $5, %edx xorl %r8d, %ecx xorl %r14d, %edx addl %ecx, %r9d vpxor %xmm6, %xmm7, %xmm6 vpaddd %xmm5, %xmm4, %xmm4 # rnd_0: 4 - 4 rorl $6, %edx xorl %r10d, %eax addl %edx, %r9d movl %r10d, %ecx vpxor %xmm6, %xmm8, %xmm8 # rnd_0: 5 - 5 andl %eax, %ebx rorl $9, %ecx xorl %r10d, %ecx xorl %r11d, %ebx vpshufb %xmm11, %xmm8, %xmm8 # rnd_0: 6 - 6 rorl $11, %ecx addl %r9d, %r13d xorl %r10d, %ecx addl %ebx, %r9d vpaddd %xmm8, %xmm4, %xmm4 # rnd_0: 7 - 7 rorl $2, %ecx movl %r13d, %edx addl %ecx, %r9d # rnd_1: 0 - 0 rorl $14, %edx vpshufd $0x50, %xmm4, %xmm6 # rnd_1: 1 - 1 movl %r10d, %ebx movl %r14d, %ecx addl 60(%rsp), %r8d xorl %r15d, %ecx vpsrlq $0x11, %xmm6, %xmm8 vpsrlq $19, %xmm6, %xmm7 # rnd_1: 2 - 3 xorl %r13d, %edx andl %r13d, %ecx rorl $5, %edx xorl %r15d, %ecx xorl %r13d, %edx addl %ecx, %r8d vpsrld $10, %xmm6, %xmm9 vpxor %xmm8, %xmm7, %xmm8 # rnd_1: 4 - 5 rorl $6, %edx xorl %r9d, %ebx addl %edx, %r8d movl %r9d, %ecx andl %ebx, %eax rorl $9, %ecx xorl %r9d, %ecx xorl %r10d, %eax vpxor %xmm9, %xmm8, %xmm9 # rnd_1: 6 - 6 rorl $11, %ecx addl %r8d, %r12d xorl %r9d, %ecx addl %eax, %r8d vpshufb %xmm12, %xmm9, %xmm9 # rnd_1: 7 - 7 rorl $2, %ecx movl %r12d, %edx addl %ecx, %r8d vpaddd %xmm4, %xmm9, %xmm3 # msg_sched done: 12-15 # set_w_k_xfer_4: 12 vpaddd 192+L_avx1_sha256_k(%rip), %xmm0, %xmm4 vpaddd 208+L_avx1_sha256_k(%rip), %xmm1, %xmm5 vmovdqu %xmm4, (%rsp) vmovdqu %xmm5, 16(%rsp) vpaddd 224+L_avx1_sha256_k(%rip), %xmm2, %xmm6 vpaddd 240+L_avx1_sha256_k(%rip), %xmm3, %xmm7 vmovdqu %xmm6, 32(%rsp) vmovdqu %xmm7, 48(%rsp) # rnd_all_4: 0-3 addl (%rsp), %r15d movl %r13d, %ecx movl %r9d, %eax xorl %r14d, %ecx rorl $14, %edx andl %r12d, %ecx xorl %r12d, %edx xorl %r14d, %ecx rorl $5, %edx addl %ecx, %r15d xorl %r12d, %edx xorl %r8d, %eax rorl $6, %edx movl %r8d, %ecx addl %edx, %r15d rorl $9, %ecx andl %eax, %ebx xorl %r8d, %ecx xorl %r9d, %ebx rorl $11, %ecx addl %r15d, %r11d xorl %r8d, %ecx addl %ebx, %r15d rorl $2, %ecx movl %r11d, %edx addl %ecx, %r15d addl 4(%rsp), %r14d movl %r12d, %ecx movl %r8d, %ebx xorl %r13d, %ecx rorl $14, %edx andl %r11d, %ecx xorl %r11d, %edx xorl %r13d, %ecx rorl $5, %edx addl %ecx, %r14d xorl %r11d, %edx xorl %r15d, %ebx rorl $6, %edx movl %r15d, %ecx addl %edx, %r14d rorl $9, %ecx andl %ebx, %eax xorl %r15d, %ecx xorl %r8d, %eax rorl $11, %ecx addl %r14d, %r10d xorl %r15d, %ecx addl %eax, %r14d rorl $2, %ecx movl %r10d, %edx addl %ecx, %r14d addl 8(%rsp), %r13d movl %r11d, %ecx movl %r15d, %eax xorl %r12d, %ecx rorl $14, %edx andl %r10d, %ecx xorl %r10d, %edx xorl %r12d, %ecx rorl $5, %edx addl %ecx, %r13d xorl %r10d, %edx xorl %r14d, %eax rorl $6, %edx movl %r14d, %ecx addl %edx, %r13d rorl $9, %ecx andl %eax, %ebx xorl %r14d, %ecx xorl %r15d, %ebx rorl $11, %ecx addl %r13d, %r9d xorl %r14d, %ecx addl %ebx, %r13d rorl $2, %ecx movl %r9d, %edx addl %ecx, %r13d addl 12(%rsp), %r12d movl %r10d, %ecx movl %r14d, %ebx xorl %r11d, %ecx rorl $14, %edx andl %r9d, %ecx xorl %r9d, %edx xorl %r11d, %ecx rorl $5, %edx addl %ecx, %r12d xorl %r9d, %edx xorl %r13d, %ebx rorl $6, %edx movl %r13d, %ecx addl %edx, %r12d rorl $9, %ecx andl %ebx, %eax xorl %r13d, %ecx xorl %r14d, %eax rorl $11, %ecx addl %r12d, %r8d xorl %r13d, %ecx addl %eax, %r12d rorl $2, %ecx movl %r8d, %edx addl %ecx, %r12d # rnd_all_4: 1-4 addl 16(%rsp), %r11d movl %r9d, %ecx movl %r13d, %eax xorl %r10d, %ecx rorl $14, %edx andl %r8d, %ecx xorl %r8d, %edx xorl %r10d, %ecx rorl $5, %edx addl %ecx, %r11d xorl %r8d, %edx xorl %r12d, %eax rorl $6, %edx movl %r12d, %ecx addl %edx, %r11d rorl $9, %ecx andl %eax, %ebx xorl %r12d, %ecx xorl %r13d, %ebx rorl $11, %ecx addl %r11d, %r15d xorl %r12d, %ecx addl %ebx, %r11d rorl $2, %ecx movl %r15d, %edx addl %ecx, %r11d addl 20(%rsp), %r10d movl %r8d, %ecx movl %r12d, %ebx xorl %r9d, %ecx rorl $14, %edx andl %r15d, %ecx xorl %r15d, %edx xorl %r9d, %ecx rorl $5, %edx addl %ecx, %r10d xorl %r15d, %edx xorl %r11d, %ebx rorl $6, %edx movl %r11d, %ecx addl %edx, %r10d rorl $9, %ecx andl %ebx, %eax xorl %r11d, %ecx xorl %r12d, %eax rorl $11, %ecx addl %r10d, %r14d xorl %r11d, %ecx addl %eax, %r10d rorl $2, %ecx movl %r14d, %edx addl %ecx, %r10d addl 24(%rsp), %r9d movl %r15d, %ecx movl %r11d, %eax xorl %r8d, %ecx rorl $14, %edx andl %r14d, %ecx xorl %r14d, %edx xorl %r8d, %ecx rorl $5, %edx addl %ecx, %r9d xorl %r14d, %edx xorl %r10d, %eax rorl $6, %edx movl %r10d, %ecx addl %edx, %r9d rorl $9, %ecx andl %eax, %ebx xorl %r10d, %ecx xorl %r11d, %ebx rorl $11, %ecx addl %r9d, %r13d xorl %r10d, %ecx addl %ebx, %r9d rorl $2, %ecx movl %r13d, %edx addl %ecx, %r9d addl 28(%rsp), %r8d movl %r14d, %ecx movl %r10d, %ebx xorl %r15d, %ecx rorl $14, %edx andl %r13d, %ecx xorl %r13d, %edx xorl %r15d, %ecx rorl $5, %edx addl %ecx, %r8d xorl %r13d, %edx xorl %r9d, %ebx rorl $6, %edx movl %r9d, %ecx addl %edx, %r8d rorl $9, %ecx andl %ebx, %eax xorl %r9d, %ecx xorl %r10d, %eax rorl $11, %ecx addl %r8d, %r12d xorl %r9d, %ecx addl %eax, %r8d rorl $2, %ecx movl %r12d, %edx addl %ecx, %r8d # rnd_all_4: 2-5 addl 32(%rsp), %r15d movl %r13d, %ecx movl %r9d, %eax xorl %r14d, %ecx rorl $14, %edx andl %r12d, %ecx xorl %r12d, %edx xorl %r14d, %ecx rorl $5, %edx addl %ecx, %r15d xorl %r12d, %edx xorl %r8d, %eax rorl $6, %edx movl %r8d, %ecx addl %edx, %r15d rorl $9, %ecx andl %eax, %ebx xorl %r8d, %ecx xorl %r9d, %ebx rorl $11, %ecx addl %r15d, %r11d xorl %r8d, %ecx addl %ebx, %r15d rorl $2, %ecx movl %r11d, %edx addl %ecx, %r15d addl 36(%rsp), %r14d movl %r12d, %ecx movl %r8d, %ebx xorl %r13d, %ecx rorl $14, %edx andl %r11d, %ecx xorl %r11d, %edx xorl %r13d, %ecx rorl $5, %edx addl %ecx, %r14d xorl %r11d, %edx xorl %r15d, %ebx rorl $6, %edx movl %r15d, %ecx addl %edx, %r14d rorl $9, %ecx andl %ebx, %eax xorl %r15d, %ecx xorl %r8d, %eax rorl $11, %ecx addl %r14d, %r10d xorl %r15d, %ecx addl %eax, %r14d rorl $2, %ecx movl %r10d, %edx addl %ecx, %r14d addl 40(%rsp), %r13d movl %r11d, %ecx movl %r15d, %eax xorl %r12d, %ecx rorl $14, %edx andl %r10d, %ecx xorl %r10d, %edx xorl %r12d, %ecx rorl $5, %edx addl %ecx, %r13d xorl %r10d, %edx xorl %r14d, %eax rorl $6, %edx movl %r14d, %ecx addl %edx, %r13d rorl $9, %ecx andl %eax, %ebx xorl %r14d, %ecx xorl %r15d, %ebx rorl $11, %ecx addl %r13d, %r9d xorl %r14d, %ecx addl %ebx, %r13d rorl $2, %ecx movl %r9d, %edx addl %ecx, %r13d addl 44(%rsp), %r12d movl %r10d, %ecx movl %r14d, %ebx xorl %r11d, %ecx rorl $14, %edx andl %r9d, %ecx xorl %r9d, %edx xorl %r11d, %ecx rorl $5, %edx addl %ecx, %r12d xorl %r9d, %edx xorl %r13d, %ebx rorl $6, %edx movl %r13d, %ecx addl %edx, %r12d rorl $9, %ecx andl %ebx, %eax xorl %r13d, %ecx xorl %r14d, %eax rorl $11, %ecx addl %r12d, %r8d xorl %r13d, %ecx addl %eax, %r12d rorl $2, %ecx movl %r8d, %edx addl %ecx, %r12d # rnd_all_4: 3-6 addl 48(%rsp), %r11d movl %r9d, %ecx movl %r13d, %eax xorl %r10d, %ecx rorl $14, %edx andl %r8d, %ecx xorl %r8d, %edx xorl %r10d, %ecx rorl $5, %edx addl %ecx, %r11d xorl %r8d, %edx xorl %r12d, %eax rorl $6, %edx movl %r12d, %ecx addl %edx, %r11d rorl $9, %ecx andl %eax, %ebx xorl %r12d, %ecx xorl %r13d, %ebx rorl $11, %ecx addl %r11d, %r15d xorl %r12d, %ecx addl %ebx, %r11d rorl $2, %ecx movl %r15d, %edx addl %ecx, %r11d addl 52(%rsp), %r10d movl %r8d, %ecx movl %r12d, %ebx xorl %r9d, %ecx rorl $14, %edx andl %r15d, %ecx xorl %r15d, %edx xorl %r9d, %ecx rorl $5, %edx addl %ecx, %r10d xorl %r15d, %edx xorl %r11d, %ebx rorl $6, %edx movl %r11d, %ecx addl %edx, %r10d rorl $9, %ecx andl %ebx, %eax xorl %r11d, %ecx xorl %r12d, %eax rorl $11, %ecx addl %r10d, %r14d xorl %r11d, %ecx addl %eax, %r10d rorl $2, %ecx movl %r14d, %edx addl %ecx, %r10d addl 56(%rsp), %r9d movl %r15d, %ecx movl %r11d, %eax xorl %r8d, %ecx rorl $14, %edx andl %r14d, %ecx xorl %r14d, %edx xorl %r8d, %ecx rorl $5, %edx addl %ecx, %r9d xorl %r14d, %edx xorl %r10d, %eax rorl $6, %edx movl %r10d, %ecx addl %edx, %r9d rorl $9, %ecx andl %eax, %ebx xorl %r10d, %ecx xorl %r11d, %ebx rorl $11, %ecx addl %r9d, %r13d xorl %r10d, %ecx addl %ebx, %r9d rorl $2, %ecx movl %r13d, %edx addl %ecx, %r9d addl 60(%rsp), %r8d movl %r14d, %ecx movl %r10d, %ebx xorl %r15d, %ecx rorl $14, %edx andl %r13d, %ecx xorl %r13d, %edx xorl %r15d, %ecx rorl $5, %edx addl %ecx, %r8d xorl %r13d, %edx xorl %r9d, %ebx rorl $6, %edx movl %r9d, %ecx addl %edx, %r8d rorl $9, %ecx andl %ebx, %eax xorl %r9d, %ecx xorl %r10d, %eax rorl $11, %ecx addl %r8d, %r12d xorl %r9d, %ecx addl %eax, %r8d rorl $2, %ecx movl %r12d, %edx addl %ecx, %r8d addl %r8d, (%rdi) addl %r9d, 4(%rdi) addl %r10d, 8(%rdi) addl %r11d, 12(%rdi) addl %r12d, 16(%rdi) addl %r13d, 20(%rdi) addl %r14d, 24(%rdi) addl %r15d, 28(%rdi) xorq %rax, %rax vzeroupper addq $0x40, %rsp popq %r15 popq %r14 popq %r13 popq %r12 popq %rbx repz retq #ifndef __APPLE__ .size Transform_Sha256_AVX1,.-Transform_Sha256_AVX1 #endif /* __APPLE__ */ #ifndef __APPLE__ .text .globl Transform_Sha256_AVX1_Len .type Transform_Sha256_AVX1_Len,@function .align 16 Transform_Sha256_AVX1_Len: #else .section __TEXT,__text .globl _Transform_Sha256_AVX1_Len .p2align 4 _Transform_Sha256_AVX1_Len: #endif /* __APPLE__ */ pushq %rbx pushq %r12 pushq %r13 pushq %r14 pushq %r15 pushq %rbp movq %rsi, %rbp movq %rdx, %rsi subq $0x40, %rsp vmovdqa L_avx1_sha256_flip_mask(%rip), %xmm13 vmovdqa L_avx1_sha256_shuf_00BA(%rip), %xmm11 vmovdqa L_avx1_sha256_shuf_DC00(%rip), %xmm12 movl (%rdi), %r8d movl 4(%rdi), %r9d movl 8(%rdi), %r10d movl 12(%rdi), %r11d movl 16(%rdi), %r12d movl 20(%rdi), %r13d movl 24(%rdi), %r14d movl 28(%rdi), %r15d # Start of loop processing a block L_sha256_len_avx1_start: # X0, X1, X2, X3 = W[0..15] vmovdqu (%rbp), %xmm0 vmovdqu 16(%rbp), %xmm1 vpshufb %xmm13, %xmm0, %xmm0 vpshufb %xmm13, %xmm1, %xmm1 vmovdqu 32(%rbp), %xmm2 vmovdqu 48(%rbp), %xmm3 vpshufb %xmm13, %xmm2, %xmm2 vpshufb %xmm13, %xmm3, %xmm3 movl %r9d, %ebx movl %r12d, %edx xorl %r10d, %ebx # set_w_k_xfer_4: 0 vpaddd 0+L_avx1_sha256_k(%rip), %xmm0, %xmm4 vpaddd 16+L_avx1_sha256_k(%rip), %xmm1, %xmm5 vmovdqu %xmm4, (%rsp) vmovdqu %xmm5, 16(%rsp) vpaddd 32+L_avx1_sha256_k(%rip), %xmm2, %xmm6 vpaddd 48+L_avx1_sha256_k(%rip), %xmm3, %xmm7 vmovdqu %xmm6, 32(%rsp) vmovdqu %xmm7, 48(%rsp) # msg_sched: 0-3 # rnd_0: 0 - 0 rorl $14, %edx vpalignr $4, %xmm0, %xmm1, %xmm5 vpalignr $4, %xmm2, %xmm3, %xmm4 # rnd_0: 1 - 2 movl %r9d, %eax movl %r13d, %ecx addl (%rsp), %r15d xorl %r14d, %ecx xorl %r12d, %edx andl %r12d, %ecx vpsrld $7, %xmm5, %xmm6 vpslld $25, %xmm5, %xmm7 # rnd_0: 3 - 4 rorl $5, %edx xorl %r14d, %ecx xorl %r12d, %edx addl %ecx, %r15d rorl $6, %edx xorl %r8d, %eax addl %edx, %r15d movl %r8d, %ecx vpsrld $18, %xmm5, %xmm8 vpslld $14, %xmm5, %xmm9 # rnd_0: 5 - 6 andl %eax, %ebx rorl $9, %ecx xorl %r8d, %ecx xorl %r9d, %ebx rorl $11, %ecx addl %r15d, %r11d xorl %r8d, %ecx addl %ebx, %r15d vpor %xmm6, %xmm7, %xmm6 vpor %xmm8, %xmm9, %xmm8 # rnd_0: 7 - 7 rorl $2, %ecx movl %r11d, %edx addl %ecx, %r15d # rnd_1: 0 - 1 rorl $14, %edx movl %r8d, %ebx movl %r12d, %ecx addl 4(%rsp), %r14d xorl %r13d, %ecx vpsrld $3, %xmm5, %xmm9 vpxor %xmm6, %xmm8, %xmm6 # rnd_1: 2 - 3 xorl %r11d, %edx andl %r11d, %ecx rorl $5, %edx xorl %r13d, %ecx xorl %r11d, %edx addl %ecx, %r14d vpxor %xmm6, %xmm9, %xmm5 vpshufd $0xfa, %xmm3, %xmm6 # rnd_1: 4 - 5 rorl $6, %edx xorl %r15d, %ebx addl %edx, %r14d movl %r15d, %ecx andl %ebx, %eax rorl $9, %ecx xorl %r15d, %ecx xorl %r8d, %eax vpsrld $10, %xmm6, %xmm8 vpsrlq $19, %xmm6, %xmm7 # rnd_1: 6 - 7 rorl $11, %ecx addl %r14d, %r10d xorl %r15d, %ecx addl %eax, %r14d rorl $2, %ecx movl %r10d, %edx addl %ecx, %r14d # rnd_0: 0 - 0 rorl $14, %edx vpsrlq $0x11, %xmm6, %xmm6 vpaddd %xmm0, %xmm4, %xmm4 # rnd_0: 1 - 3 movl %r15d, %eax movl %r11d, %ecx addl 8(%rsp), %r13d xorl %r12d, %ecx xorl %r10d, %edx andl %r10d, %ecx rorl $5, %edx xorl %r12d, %ecx xorl %r10d, %edx addl %ecx, %r13d vpxor %xmm6, %xmm7, %xmm6 vpaddd %xmm5, %xmm4, %xmm4 # rnd_0: 4 - 4 rorl $6, %edx xorl %r14d, %eax addl %edx, %r13d movl %r14d, %ecx vpxor %xmm6, %xmm8, %xmm8 # rnd_0: 5 - 5 andl %eax, %ebx rorl $9, %ecx xorl %r14d, %ecx xorl %r15d, %ebx vpshufb %xmm11, %xmm8, %xmm8 # rnd_0: 6 - 6 rorl $11, %ecx addl %r13d, %r9d xorl %r14d, %ecx addl %ebx, %r13d vpaddd %xmm8, %xmm4, %xmm4 # rnd_0: 7 - 7 rorl $2, %ecx movl %r9d, %edx addl %ecx, %r13d # rnd_1: 0 - 0 rorl $14, %edx vpshufd $0x50, %xmm4, %xmm6 # rnd_1: 1 - 1 movl %r14d, %ebx movl %r10d, %ecx addl 12(%rsp), %r12d xorl %r11d, %ecx vpsrlq $0x11, %xmm6, %xmm8 vpsrlq $19, %xmm6, %xmm7 # rnd_1: 2 - 3 xorl %r9d, %edx andl %r9d, %ecx rorl $5, %edx xorl %r11d, %ecx xorl %r9d, %edx addl %ecx, %r12d vpsrld $10, %xmm6, %xmm9 vpxor %xmm8, %xmm7, %xmm8 # rnd_1: 4 - 5 rorl $6, %edx xorl %r13d, %ebx addl %edx, %r12d movl %r13d, %ecx andl %ebx, %eax rorl $9, %ecx xorl %r13d, %ecx xorl %r14d, %eax vpxor %xmm9, %xmm8, %xmm9 # rnd_1: 6 - 6 rorl $11, %ecx addl %r12d, %r8d xorl %r13d, %ecx addl %eax, %r12d vpshufb %xmm12, %xmm9, %xmm9 # rnd_1: 7 - 7 rorl $2, %ecx movl %r8d, %edx addl %ecx, %r12d vpaddd %xmm4, %xmm9, %xmm0 # msg_sched done: 0-3 # msg_sched: 4-7 # rnd_0: 0 - 0 rorl $14, %edx vpalignr $4, %xmm1, %xmm2, %xmm5 vpalignr $4, %xmm3, %xmm0, %xmm4 # rnd_0: 1 - 2 movl %r13d, %eax movl %r9d, %ecx addl 16(%rsp), %r11d xorl %r10d, %ecx xorl %r8d, %edx andl %r8d, %ecx vpsrld $7, %xmm5, %xmm6 vpslld $25, %xmm5, %xmm7 # rnd_0: 3 - 4 rorl $5, %edx xorl %r10d, %ecx xorl %r8d, %edx addl %ecx, %r11d rorl $6, %edx xorl %r12d, %eax addl %edx, %r11d movl %r12d, %ecx vpsrld $18, %xmm5, %xmm8 vpslld $14, %xmm5, %xmm9 # rnd_0: 5 - 6 andl %eax, %ebx rorl $9, %ecx xorl %r12d, %ecx xorl %r13d, %ebx rorl $11, %ecx addl %r11d, %r15d xorl %r12d, %ecx addl %ebx, %r11d vpor %xmm6, %xmm7, %xmm6 vpor %xmm8, %xmm9, %xmm8 # rnd_0: 7 - 7 rorl $2, %ecx movl %r15d, %edx addl %ecx, %r11d # rnd_1: 0 - 1 rorl $14, %edx movl %r12d, %ebx movl %r8d, %ecx addl 20(%rsp), %r10d xorl %r9d, %ecx vpsrld $3, %xmm5, %xmm9 vpxor %xmm6, %xmm8, %xmm6 # rnd_1: 2 - 3 xorl %r15d, %edx andl %r15d, %ecx rorl $5, %edx xorl %r9d, %ecx xorl %r15d, %edx addl %ecx, %r10d vpxor %xmm6, %xmm9, %xmm5 vpshufd $0xfa, %xmm0, %xmm6 # rnd_1: 4 - 5 rorl $6, %edx xorl %r11d, %ebx addl %edx, %r10d movl %r11d, %ecx andl %ebx, %eax rorl $9, %ecx xorl %r11d, %ecx xorl %r12d, %eax vpsrld $10, %xmm6, %xmm8 vpsrlq $19, %xmm6, %xmm7 # rnd_1: 6 - 7 rorl $11, %ecx addl %r10d, %r14d xorl %r11d, %ecx addl %eax, %r10d rorl $2, %ecx movl %r14d, %edx addl %ecx, %r10d # rnd_0: 0 - 0 rorl $14, %edx vpsrlq $0x11, %xmm6, %xmm6 vpaddd %xmm1, %xmm4, %xmm4 # rnd_0: 1 - 3 movl %r11d, %eax movl %r15d, %ecx addl 24(%rsp), %r9d xorl %r8d, %ecx xorl %r14d, %edx andl %r14d, %ecx rorl $5, %edx xorl %r8d, %ecx xorl %r14d, %edx addl %ecx, %r9d vpxor %xmm6, %xmm7, %xmm6 vpaddd %xmm5, %xmm4, %xmm4 # rnd_0: 4 - 4 rorl $6, %edx xorl %r10d, %eax addl %edx, %r9d movl %r10d, %ecx vpxor %xmm6, %xmm8, %xmm8 # rnd_0: 5 - 5 andl %eax, %ebx rorl $9, %ecx xorl %r10d, %ecx xorl %r11d, %ebx vpshufb %xmm11, %xmm8, %xmm8 # rnd_0: 6 - 6 rorl $11, %ecx addl %r9d, %r13d xorl %r10d, %ecx addl %ebx, %r9d vpaddd %xmm8, %xmm4, %xmm4 # rnd_0: 7 - 7 rorl $2, %ecx movl %r13d, %edx addl %ecx, %r9d # rnd_1: 0 - 0 rorl $14, %edx vpshufd $0x50, %xmm4, %xmm6 # rnd_1: 1 - 1 movl %r10d, %ebx movl %r14d, %ecx addl 28(%rsp), %r8d xorl %r15d, %ecx vpsrlq $0x11, %xmm6, %xmm8 vpsrlq $19, %xmm6, %xmm7 # rnd_1: 2 - 3 xorl %r13d, %edx andl %r13d, %ecx rorl $5, %edx xorl %r15d, %ecx xorl %r13d, %edx addl %ecx, %r8d vpsrld $10, %xmm6, %xmm9 vpxor %xmm8, %xmm7, %xmm8 # rnd_1: 4 - 5 rorl $6, %edx xorl %r9d, %ebx addl %edx, %r8d movl %r9d, %ecx andl %ebx, %eax rorl $9, %ecx xorl %r9d, %ecx xorl %r10d, %eax vpxor %xmm9, %xmm8, %xmm9 # rnd_1: 6 - 6 rorl $11, %ecx addl %r8d, %r12d xorl %r9d, %ecx addl %eax, %r8d vpshufb %xmm12, %xmm9, %xmm9 # rnd_1: 7 - 7 rorl $2, %ecx movl %r12d, %edx addl %ecx, %r8d vpaddd %xmm4, %xmm9, %xmm1 # msg_sched done: 4-7 # msg_sched: 8-11 # rnd_0: 0 - 0 rorl $14, %edx vpalignr $4, %xmm2, %xmm3, %xmm5 vpalignr $4, %xmm0, %xmm1, %xmm4 # rnd_0: 1 - 2 movl %r9d, %eax movl %r13d, %ecx addl 32(%rsp), %r15d xorl %r14d, %ecx xorl %r12d, %edx andl %r12d, %ecx vpsrld $7, %xmm5, %xmm6 vpslld $25, %xmm5, %xmm7 # rnd_0: 3 - 4 rorl $5, %edx xorl %r14d, %ecx xorl %r12d, %edx addl %ecx, %r15d rorl $6, %edx xorl %r8d, %eax addl %edx, %r15d movl %r8d, %ecx vpsrld $18, %xmm5, %xmm8 vpslld $14, %xmm5, %xmm9 # rnd_0: 5 - 6 andl %eax, %ebx rorl $9, %ecx xorl %r8d, %ecx xorl %r9d, %ebx rorl $11, %ecx addl %r15d, %r11d xorl %r8d, %ecx addl %ebx, %r15d vpor %xmm6, %xmm7, %xmm6 vpor %xmm8, %xmm9, %xmm8 # rnd_0: 7 - 7 rorl $2, %ecx movl %r11d, %edx addl %ecx, %r15d # rnd_1: 0 - 1 rorl $14, %edx movl %r8d, %ebx movl %r12d, %ecx addl 36(%rsp), %r14d xorl %r13d, %ecx vpsrld $3, %xmm5, %xmm9 vpxor %xmm6, %xmm8, %xmm6 # rnd_1: 2 - 3 xorl %r11d, %edx andl %r11d, %ecx rorl $5, %edx xorl %r13d, %ecx xorl %r11d, %edx addl %ecx, %r14d vpxor %xmm6, %xmm9, %xmm5 vpshufd $0xfa, %xmm1, %xmm6 # rnd_1: 4 - 5 rorl $6, %edx xorl %r15d, %ebx addl %edx, %r14d movl %r15d, %ecx andl %ebx, %eax rorl $9, %ecx xorl %r15d, %ecx xorl %r8d, %eax vpsrld $10, %xmm6, %xmm8 vpsrlq $19, %xmm6, %xmm7 # rnd_1: 6 - 7 rorl $11, %ecx addl %r14d, %r10d xorl %r15d, %ecx addl %eax, %r14d rorl $2, %ecx movl %r10d, %edx addl %ecx, %r14d # rnd_0: 0 - 0 rorl $14, %edx vpsrlq $0x11, %xmm6, %xmm6 vpaddd %xmm2, %xmm4, %xmm4 # rnd_0: 1 - 3 movl %r15d, %eax movl %r11d, %ecx addl 40(%rsp), %r13d xorl %r12d, %ecx xorl %r10d, %edx andl %r10d, %ecx rorl $5, %edx xorl %r12d, %ecx xorl %r10d, %edx addl %ecx, %r13d vpxor %xmm6, %xmm7, %xmm6 vpaddd %xmm5, %xmm4, %xmm4 # rnd_0: 4 - 4 rorl $6, %edx xorl %r14d, %eax addl %edx, %r13d movl %r14d, %ecx vpxor %xmm6, %xmm8, %xmm8 # rnd_0: 5 - 5 andl %eax, %ebx rorl $9, %ecx xorl %r14d, %ecx xorl %r15d, %ebx vpshufb %xmm11, %xmm8, %xmm8 # rnd_0: 6 - 6 rorl $11, %ecx addl %r13d, %r9d xorl %r14d, %ecx addl %ebx, %r13d vpaddd %xmm8, %xmm4, %xmm4 # rnd_0: 7 - 7 rorl $2, %ecx movl %r9d, %edx addl %ecx, %r13d # rnd_1: 0 - 0 rorl $14, %edx vpshufd $0x50, %xmm4, %xmm6 # rnd_1: 1 - 1 movl %r14d, %ebx movl %r10d, %ecx addl 44(%rsp), %r12d xorl %r11d, %ecx vpsrlq $0x11, %xmm6, %xmm8 vpsrlq $19, %xmm6, %xmm7 # rnd_1: 2 - 3 xorl %r9d, %edx andl %r9d, %ecx rorl $5, %edx xorl %r11d, %ecx xorl %r9d, %edx addl %ecx, %r12d vpsrld $10, %xmm6, %xmm9 vpxor %xmm8, %xmm7, %xmm8 # rnd_1: 4 - 5 rorl $6, %edx xorl %r13d, %ebx addl %edx, %r12d movl %r13d, %ecx andl %ebx, %eax rorl $9, %ecx xorl %r13d, %ecx xorl %r14d, %eax vpxor %xmm9, %xmm8, %xmm9 # rnd_1: 6 - 6 rorl $11, %ecx addl %r12d, %r8d xorl %r13d, %ecx addl %eax, %r12d vpshufb %xmm12, %xmm9, %xmm9 # rnd_1: 7 - 7 rorl $2, %ecx movl %r8d, %edx addl %ecx, %r12d vpaddd %xmm4, %xmm9, %xmm2 # msg_sched done: 8-11 # msg_sched: 12-15 # rnd_0: 0 - 0 rorl $14, %edx vpalignr $4, %xmm3, %xmm0, %xmm5 vpalignr $4, %xmm1, %xmm2, %xmm4 # rnd_0: 1 - 2 movl %r13d, %eax movl %r9d, %ecx addl 48(%rsp), %r11d xorl %r10d, %ecx xorl %r8d, %edx andl %r8d, %ecx vpsrld $7, %xmm5, %xmm6 vpslld $25, %xmm5, %xmm7 # rnd_0: 3 - 4 rorl $5, %edx xorl %r10d, %ecx xorl %r8d, %edx addl %ecx, %r11d rorl $6, %edx xorl %r12d, %eax addl %edx, %r11d movl %r12d, %ecx vpsrld $18, %xmm5, %xmm8 vpslld $14, %xmm5, %xmm9 # rnd_0: 5 - 6 andl %eax, %ebx rorl $9, %ecx xorl %r12d, %ecx xorl %r13d, %ebx rorl $11, %ecx addl %r11d, %r15d xorl %r12d, %ecx addl %ebx, %r11d vpor %xmm6, %xmm7, %xmm6 vpor %xmm8, %xmm9, %xmm8 # rnd_0: 7 - 7 rorl $2, %ecx movl %r15d, %edx addl %ecx, %r11d # rnd_1: 0 - 1 rorl $14, %edx movl %r12d, %ebx movl %r8d, %ecx addl 52(%rsp), %r10d xorl %r9d, %ecx vpsrld $3, %xmm5, %xmm9 vpxor %xmm6, %xmm8, %xmm6 # rnd_1: 2 - 3 xorl %r15d, %edx andl %r15d, %ecx rorl $5, %edx xorl %r9d, %ecx xorl %r15d, %edx addl %ecx, %r10d vpxor %xmm6, %xmm9, %xmm5 vpshufd $0xfa, %xmm2, %xmm6 # rnd_1: 4 - 5 rorl $6, %edx xorl %r11d, %ebx addl %edx, %r10d movl %r11d, %ecx andl %ebx, %eax rorl $9, %ecx xorl %r11d, %ecx xorl %r12d, %eax vpsrld $10, %xmm6, %xmm8 vpsrlq $19, %xmm6, %xmm7 # rnd_1: 6 - 7 rorl $11, %ecx addl %r10d, %r14d xorl %r11d, %ecx addl %eax, %r10d rorl $2, %ecx movl %r14d, %edx addl %ecx, %r10d # rnd_0: 0 - 0 rorl $14, %edx vpsrlq $0x11, %xmm6, %xmm6 vpaddd %xmm3, %xmm4, %xmm4 # rnd_0: 1 - 3 movl %r11d, %eax movl %r15d, %ecx addl 56(%rsp), %r9d xorl %r8d, %ecx xorl %r14d, %edx andl %r14d, %ecx rorl $5, %edx xorl %r8d, %ecx xorl %r14d, %edx addl %ecx, %r9d vpxor %xmm6, %xmm7, %xmm6 vpaddd %xmm5, %xmm4, %xmm4 # rnd_0: 4 - 4 rorl $6, %edx xorl %r10d, %eax addl %edx, %r9d movl %r10d, %ecx vpxor %xmm6, %xmm8, %xmm8 # rnd_0: 5 - 5 andl %eax, %ebx rorl $9, %ecx xorl %r10d, %ecx xorl %r11d, %ebx vpshufb %xmm11, %xmm8, %xmm8 # rnd_0: 6 - 6 rorl $11, %ecx addl %r9d, %r13d xorl %r10d, %ecx addl %ebx, %r9d vpaddd %xmm8, %xmm4, %xmm4 # rnd_0: 7 - 7 rorl $2, %ecx movl %r13d, %edx addl %ecx, %r9d # rnd_1: 0 - 0 rorl $14, %edx vpshufd $0x50, %xmm4, %xmm6 # rnd_1: 1 - 1 movl %r10d, %ebx movl %r14d, %ecx addl 60(%rsp), %r8d xorl %r15d, %ecx vpsrlq $0x11, %xmm6, %xmm8 vpsrlq $19, %xmm6, %xmm7 # rnd_1: 2 - 3 xorl %r13d, %edx andl %r13d, %ecx rorl $5, %edx xorl %r15d, %ecx xorl %r13d, %edx addl %ecx, %r8d vpsrld $10, %xmm6, %xmm9 vpxor %xmm8, %xmm7, %xmm8 # rnd_1: 4 - 5 rorl $6, %edx xorl %r9d, %ebx addl %edx, %r8d movl %r9d, %ecx andl %ebx, %eax rorl $9, %ecx xorl %r9d, %ecx xorl %r10d, %eax vpxor %xmm9, %xmm8, %xmm9 # rnd_1: 6 - 6 rorl $11, %ecx addl %r8d, %r12d xorl %r9d, %ecx addl %eax, %r8d vpshufb %xmm12, %xmm9, %xmm9 # rnd_1: 7 - 7 rorl $2, %ecx movl %r12d, %edx addl %ecx, %r8d vpaddd %xmm4, %xmm9, %xmm3 # msg_sched done: 12-15 # set_w_k_xfer_4: 4 vpaddd 64+L_avx1_sha256_k(%rip), %xmm0, %xmm4 vpaddd 80+L_avx1_sha256_k(%rip), %xmm1, %xmm5 vmovdqu %xmm4, (%rsp) vmovdqu %xmm5, 16(%rsp) vpaddd 96+L_avx1_sha256_k(%rip), %xmm2, %xmm6 vpaddd 112+L_avx1_sha256_k(%rip), %xmm3, %xmm7 vmovdqu %xmm6, 32(%rsp) vmovdqu %xmm7, 48(%rsp) # msg_sched: 0-3 # rnd_0: 0 - 0 rorl $14, %edx vpalignr $4, %xmm0, %xmm1, %xmm5 vpalignr $4, %xmm2, %xmm3, %xmm4 # rnd_0: 1 - 2 movl %r9d, %eax movl %r13d, %ecx addl (%rsp), %r15d xorl %r14d, %ecx xorl %r12d, %edx andl %r12d, %ecx vpsrld $7, %xmm5, %xmm6 vpslld $25, %xmm5, %xmm7 # rnd_0: 3 - 4 rorl $5, %edx xorl %r14d, %ecx xorl %r12d, %edx addl %ecx, %r15d rorl $6, %edx xorl %r8d, %eax addl %edx, %r15d movl %r8d, %ecx vpsrld $18, %xmm5, %xmm8 vpslld $14, %xmm5, %xmm9 # rnd_0: 5 - 6 andl %eax, %ebx rorl $9, %ecx xorl %r8d, %ecx xorl %r9d, %ebx rorl $11, %ecx addl %r15d, %r11d xorl %r8d, %ecx addl %ebx, %r15d vpor %xmm6, %xmm7, %xmm6 vpor %xmm8, %xmm9, %xmm8 # rnd_0: 7 - 7 rorl $2, %ecx movl %r11d, %edx addl %ecx, %r15d # rnd_1: 0 - 1 rorl $14, %edx movl %r8d, %ebx movl %r12d, %ecx addl 4(%rsp), %r14d xorl %r13d, %ecx vpsrld $3, %xmm5, %xmm9 vpxor %xmm6, %xmm8, %xmm6 # rnd_1: 2 - 3 xorl %r11d, %edx andl %r11d, %ecx rorl $5, %edx xorl %r13d, %ecx xorl %r11d, %edx addl %ecx, %r14d vpxor %xmm6, %xmm9, %xmm5 vpshufd $0xfa, %xmm3, %xmm6 # rnd_1: 4 - 5 rorl $6, %edx xorl %r15d, %ebx addl %edx, %r14d movl %r15d, %ecx andl %ebx, %eax rorl $9, %ecx xorl %r15d, %ecx xorl %r8d, %eax vpsrld $10, %xmm6, %xmm8 vpsrlq $19, %xmm6, %xmm7 # rnd_1: 6 - 7 rorl $11, %ecx addl %r14d, %r10d xorl %r15d, %ecx addl %eax, %r14d rorl $2, %ecx movl %r10d, %edx addl %ecx, %r14d # rnd_0: 0 - 0 rorl $14, %edx vpsrlq $0x11, %xmm6, %xmm6 vpaddd %xmm0, %xmm4, %xmm4 # rnd_0: 1 - 3 movl %r15d, %eax movl %r11d, %ecx addl 8(%rsp), %r13d xorl %r12d, %ecx xorl %r10d, %edx andl %r10d, %ecx rorl $5, %edx xorl %r12d, %ecx xorl %r10d, %edx addl %ecx, %r13d vpxor %xmm6, %xmm7, %xmm6 vpaddd %xmm5, %xmm4, %xmm4 # rnd_0: 4 - 4 rorl $6, %edx xorl %r14d, %eax addl %edx, %r13d movl %r14d, %ecx vpxor %xmm6, %xmm8, %xmm8 # rnd_0: 5 - 5 andl %eax, %ebx rorl $9, %ecx xorl %r14d, %ecx xorl %r15d, %ebx vpshufb %xmm11, %xmm8, %xmm8 # rnd_0: 6 - 6 rorl $11, %ecx addl %r13d, %r9d xorl %r14d, %ecx addl %ebx, %r13d vpaddd %xmm8, %xmm4, %xmm4 # rnd_0: 7 - 7 rorl $2, %ecx movl %r9d, %edx addl %ecx, %r13d # rnd_1: 0 - 0 rorl $14, %edx vpshufd $0x50, %xmm4, %xmm6 # rnd_1: 1 - 1 movl %r14d, %ebx movl %r10d, %ecx addl 12(%rsp), %r12d xorl %r11d, %ecx vpsrlq $0x11, %xmm6, %xmm8 vpsrlq $19, %xmm6, %xmm7 # rnd_1: 2 - 3 xorl %r9d, %edx andl %r9d, %ecx rorl $5, %edx xorl %r11d, %ecx xorl %r9d, %edx addl %ecx, %r12d vpsrld $10, %xmm6, %xmm9 vpxor %xmm8, %xmm7, %xmm8 # rnd_1: 4 - 5 rorl $6, %edx xorl %r13d, %ebx addl %edx, %r12d movl %r13d, %ecx andl %ebx, %eax rorl $9, %ecx xorl %r13d, %ecx xorl %r14d, %eax vpxor %xmm9, %xmm8, %xmm9 # rnd_1: 6 - 6 rorl $11, %ecx addl %r12d, %r8d xorl %r13d, %ecx addl %eax, %r12d vpshufb %xmm12, %xmm9, %xmm9 # rnd_1: 7 - 7 rorl $2, %ecx movl %r8d, %edx addl %ecx, %r12d vpaddd %xmm4, %xmm9, %xmm0 # msg_sched done: 0-3 # msg_sched: 4-7 # rnd_0: 0 - 0 rorl $14, %edx vpalignr $4, %xmm1, %xmm2, %xmm5 vpalignr $4, %xmm3, %xmm0, %xmm4 # rnd_0: 1 - 2 movl %r13d, %eax movl %r9d, %ecx addl 16(%rsp), %r11d xorl %r10d, %ecx xorl %r8d, %edx andl %r8d, %ecx vpsrld $7, %xmm5, %xmm6 vpslld $25, %xmm5, %xmm7 # rnd_0: 3 - 4 rorl $5, %edx xorl %r10d, %ecx xorl %r8d, %edx addl %ecx, %r11d rorl $6, %edx xorl %r12d, %eax addl %edx, %r11d movl %r12d, %ecx vpsrld $18, %xmm5, %xmm8 vpslld $14, %xmm5, %xmm9 # rnd_0: 5 - 6 andl %eax, %ebx rorl $9, %ecx xorl %r12d, %ecx xorl %r13d, %ebx rorl $11, %ecx addl %r11d, %r15d xorl %r12d, %ecx addl %ebx, %r11d vpor %xmm6, %xmm7, %xmm6 vpor %xmm8, %xmm9, %xmm8 # rnd_0: 7 - 7 rorl $2, %ecx movl %r15d, %edx addl %ecx, %r11d # rnd_1: 0 - 1 rorl $14, %edx movl %r12d, %ebx movl %r8d, %ecx addl 20(%rsp), %r10d xorl %r9d, %ecx vpsrld $3, %xmm5, %xmm9 vpxor %xmm6, %xmm8, %xmm6 # rnd_1: 2 - 3 xorl %r15d, %edx andl %r15d, %ecx rorl $5, %edx xorl %r9d, %ecx xorl %r15d, %edx addl %ecx, %r10d vpxor %xmm6, %xmm9, %xmm5 vpshufd $0xfa, %xmm0, %xmm6 # rnd_1: 4 - 5 rorl $6, %edx xorl %r11d, %ebx addl %edx, %r10d movl %r11d, %ecx andl %ebx, %eax rorl $9, %ecx xorl %r11d, %ecx xorl %r12d, %eax vpsrld $10, %xmm6, %xmm8 vpsrlq $19, %xmm6, %xmm7 # rnd_1: 6 - 7 rorl $11, %ecx addl %r10d, %r14d xorl %r11d, %ecx addl %eax, %r10d rorl $2, %ecx movl %r14d, %edx addl %ecx, %r10d # rnd_0: 0 - 0 rorl $14, %edx vpsrlq $0x11, %xmm6, %xmm6 vpaddd %xmm1, %xmm4, %xmm4 # rnd_0: 1 - 3 movl %r11d, %eax movl %r15d, %ecx addl 24(%rsp), %r9d xorl %r8d, %ecx xorl %r14d, %edx andl %r14d, %ecx rorl $5, %edx xorl %r8d, %ecx xorl %r14d, %edx addl %ecx, %r9d vpxor %xmm6, %xmm7, %xmm6 vpaddd %xmm5, %xmm4, %xmm4 # rnd_0: 4 - 4 rorl $6, %edx xorl %r10d, %eax addl %edx, %r9d movl %r10d, %ecx vpxor %xmm6, %xmm8, %xmm8 # rnd_0: 5 - 5 andl %eax, %ebx rorl $9, %ecx xorl %r10d, %ecx xorl %r11d, %ebx vpshufb %xmm11, %xmm8, %xmm8 # rnd_0: 6 - 6 rorl $11, %ecx addl %r9d, %r13d xorl %r10d, %ecx addl %ebx, %r9d vpaddd %xmm8, %xmm4, %xmm4 # rnd_0: 7 - 7 rorl $2, %ecx movl %r13d, %edx addl %ecx, %r9d # rnd_1: 0 - 0 rorl $14, %edx vpshufd $0x50, %xmm4, %xmm6 # rnd_1: 1 - 1 movl %r10d, %ebx movl %r14d, %ecx addl 28(%rsp), %r8d xorl %r15d, %ecx vpsrlq $0x11, %xmm6, %xmm8 vpsrlq $19, %xmm6, %xmm7 # rnd_1: 2 - 3 xorl %r13d, %edx andl %r13d, %ecx rorl $5, %edx xorl %r15d, %ecx xorl %r13d, %edx addl %ecx, %r8d vpsrld $10, %xmm6, %xmm9 vpxor %xmm8, %xmm7, %xmm8 # rnd_1: 4 - 5 rorl $6, %edx xorl %r9d, %ebx addl %edx, %r8d movl %r9d, %ecx andl %ebx, %eax rorl $9, %ecx xorl %r9d, %ecx xorl %r10d, %eax vpxor %xmm9, %xmm8, %xmm9 # rnd_1: 6 - 6 rorl $11, %ecx addl %r8d, %r12d xorl %r9d, %ecx addl %eax, %r8d vpshufb %xmm12, %xmm9, %xmm9 # rnd_1: 7 - 7 rorl $2, %ecx movl %r12d, %edx addl %ecx, %r8d vpaddd %xmm4, %xmm9, %xmm1 # msg_sched done: 4-7 # msg_sched: 8-11 # rnd_0: 0 - 0 rorl $14, %edx vpalignr $4, %xmm2, %xmm3, %xmm5 vpalignr $4, %xmm0, %xmm1, %xmm4 # rnd_0: 1 - 2 movl %r9d, %eax movl %r13d, %ecx addl 32(%rsp), %r15d xorl %r14d, %ecx xorl %r12d, %edx andl %r12d, %ecx vpsrld $7, %xmm5, %xmm6 vpslld $25, %xmm5, %xmm7 # rnd_0: 3 - 4 rorl $5, %edx xorl %r14d, %ecx xorl %r12d, %edx addl %ecx, %r15d rorl $6, %edx xorl %r8d, %eax addl %edx, %r15d movl %r8d, %ecx vpsrld $18, %xmm5, %xmm8 vpslld $14, %xmm5, %xmm9 # rnd_0: 5 - 6 andl %eax, %ebx rorl $9, %ecx xorl %r8d, %ecx xorl %r9d, %ebx rorl $11, %ecx addl %r15d, %r11d xorl %r8d, %ecx addl %ebx, %r15d vpor %xmm6, %xmm7, %xmm6 vpor %xmm8, %xmm9, %xmm8 # rnd_0: 7 - 7 rorl $2, %ecx movl %r11d, %edx addl %ecx, %r15d # rnd_1: 0 - 1 rorl $14, %edx movl %r8d, %ebx movl %r12d, %ecx addl 36(%rsp), %r14d xorl %r13d, %ecx vpsrld $3, %xmm5, %xmm9 vpxor %xmm6, %xmm8, %xmm6 # rnd_1: 2 - 3 xorl %r11d, %edx andl %r11d, %ecx rorl $5, %edx xorl %r13d, %ecx xorl %r11d, %edx addl %ecx, %r14d vpxor %xmm6, %xmm9, %xmm5 vpshufd $0xfa, %xmm1, %xmm6 # rnd_1: 4 - 5 rorl $6, %edx xorl %r15d, %ebx addl %edx, %r14d movl %r15d, %ecx andl %ebx, %eax rorl $9, %ecx xorl %r15d, %ecx xorl %r8d, %eax vpsrld $10, %xmm6, %xmm8 vpsrlq $19, %xmm6, %xmm7 # rnd_1: 6 - 7 rorl $11, %ecx addl %r14d, %r10d xorl %r15d, %ecx addl %eax, %r14d rorl $2, %ecx movl %r10d, %edx addl %ecx, %r14d # rnd_0: 0 - 0 rorl $14, %edx vpsrlq $0x11, %xmm6, %xmm6 vpaddd %xmm2, %xmm4, %xmm4 # rnd_0: 1 - 3 movl %r15d, %eax movl %r11d, %ecx addl 40(%rsp), %r13d xorl %r12d, %ecx xorl %r10d, %edx andl %r10d, %ecx rorl $5, %edx xorl %r12d, %ecx xorl %r10d, %edx addl %ecx, %r13d vpxor %xmm6, %xmm7, %xmm6 vpaddd %xmm5, %xmm4, %xmm4 # rnd_0: 4 - 4 rorl $6, %edx xorl %r14d, %eax addl %edx, %r13d movl %r14d, %ecx vpxor %xmm6, %xmm8, %xmm8 # rnd_0: 5 - 5 andl %eax, %ebx rorl $9, %ecx xorl %r14d, %ecx xorl %r15d, %ebx vpshufb %xmm11, %xmm8, %xmm8 # rnd_0: 6 - 6 rorl $11, %ecx addl %r13d, %r9d xorl %r14d, %ecx addl %ebx, %r13d vpaddd %xmm8, %xmm4, %xmm4 # rnd_0: 7 - 7 rorl $2, %ecx movl %r9d, %edx addl %ecx, %r13d # rnd_1: 0 - 0 rorl $14, %edx vpshufd $0x50, %xmm4, %xmm6 # rnd_1: 1 - 1 movl %r14d, %ebx movl %r10d, %ecx addl 44(%rsp), %r12d xorl %r11d, %ecx vpsrlq $0x11, %xmm6, %xmm8 vpsrlq $19, %xmm6, %xmm7 # rnd_1: 2 - 3 xorl %r9d, %edx andl %r9d, %ecx rorl $5, %edx xorl %r11d, %ecx xorl %r9d, %edx addl %ecx, %r12d vpsrld $10, %xmm6, %xmm9 vpxor %xmm8, %xmm7, %xmm8 # rnd_1: 4 - 5 rorl $6, %edx xorl %r13d, %ebx addl %edx, %r12d movl %r13d, %ecx andl %ebx, %eax rorl $9, %ecx xorl %r13d, %ecx xorl %r14d, %eax vpxor %xmm9, %xmm8, %xmm9 # rnd_1: 6 - 6 rorl $11, %ecx addl %r12d, %r8d xorl %r13d, %ecx addl %eax, %r12d vpshufb %xmm12, %xmm9, %xmm9 # rnd_1: 7 - 7 rorl $2, %ecx movl %r8d, %edx addl %ecx, %r12d vpaddd %xmm4, %xmm9, %xmm2 # msg_sched done: 8-11 # msg_sched: 12-15 # rnd_0: 0 - 0 rorl $14, %edx vpalignr $4, %xmm3, %xmm0, %xmm5 vpalignr $4, %xmm1, %xmm2, %xmm4 # rnd_0: 1 - 2 movl %r13d, %eax movl %r9d, %ecx addl 48(%rsp), %r11d xorl %r10d, %ecx xorl %r8d, %edx andl %r8d, %ecx vpsrld $7, %xmm5, %xmm6 vpslld $25, %xmm5, %xmm7 # rnd_0: 3 - 4 rorl $5, %edx xorl %r10d, %ecx xorl %r8d, %edx addl %ecx, %r11d rorl $6, %edx xorl %r12d, %eax addl %edx, %r11d movl %r12d, %ecx vpsrld $18, %xmm5, %xmm8 vpslld $14, %xmm5, %xmm9 # rnd_0: 5 - 6 andl %eax, %ebx rorl $9, %ecx xorl %r12d, %ecx xorl %r13d, %ebx rorl $11, %ecx addl %r11d, %r15d xorl %r12d, %ecx addl %ebx, %r11d vpor %xmm6, %xmm7, %xmm6 vpor %xmm8, %xmm9, %xmm8 # rnd_0: 7 - 7 rorl $2, %ecx movl %r15d, %edx addl %ecx, %r11d # rnd_1: 0 - 1 rorl $14, %edx movl %r12d, %ebx movl %r8d, %ecx addl 52(%rsp), %r10d xorl %r9d, %ecx vpsrld $3, %xmm5, %xmm9 vpxor %xmm6, %xmm8, %xmm6 # rnd_1: 2 - 3 xorl %r15d, %edx andl %r15d, %ecx rorl $5, %edx xorl %r9d, %ecx xorl %r15d, %edx addl %ecx, %r10d vpxor %xmm6, %xmm9, %xmm5 vpshufd $0xfa, %xmm2, %xmm6 # rnd_1: 4 - 5 rorl $6, %edx xorl %r11d, %ebx addl %edx, %r10d movl %r11d, %ecx andl %ebx, %eax rorl $9, %ecx xorl %r11d, %ecx xorl %r12d, %eax vpsrld $10, %xmm6, %xmm8 vpsrlq $19, %xmm6, %xmm7 # rnd_1: 6 - 7 rorl $11, %ecx addl %r10d, %r14d xorl %r11d, %ecx addl %eax, %r10d rorl $2, %ecx movl %r14d, %edx addl %ecx, %r10d # rnd_0: 0 - 0 rorl $14, %edx vpsrlq $0x11, %xmm6, %xmm6 vpaddd %xmm3, %xmm4, %xmm4 # rnd_0: 1 - 3 movl %r11d, %eax movl %r15d, %ecx addl 56(%rsp), %r9d xorl %r8d, %ecx xorl %r14d, %edx andl %r14d, %ecx rorl $5, %edx xorl %r8d, %ecx xorl %r14d, %edx addl %ecx, %r9d vpxor %xmm6, %xmm7, %xmm6 vpaddd %xmm5, %xmm4, %xmm4 # rnd_0: 4 - 4 rorl $6, %edx xorl %r10d, %eax addl %edx, %r9d movl %r10d, %ecx vpxor %xmm6, %xmm8, %xmm8 # rnd_0: 5 - 5 andl %eax, %ebx rorl $9, %ecx xorl %r10d, %ecx xorl %r11d, %ebx vpshufb %xmm11, %xmm8, %xmm8 # rnd_0: 6 - 6 rorl $11, %ecx addl %r9d, %r13d xorl %r10d, %ecx addl %ebx, %r9d vpaddd %xmm8, %xmm4, %xmm4 # rnd_0: 7 - 7 rorl $2, %ecx movl %r13d, %edx addl %ecx, %r9d # rnd_1: 0 - 0 rorl $14, %edx vpshufd $0x50, %xmm4, %xmm6 # rnd_1: 1 - 1 movl %r10d, %ebx movl %r14d, %ecx addl 60(%rsp), %r8d xorl %r15d, %ecx vpsrlq $0x11, %xmm6, %xmm8 vpsrlq $19, %xmm6, %xmm7 # rnd_1: 2 - 3 xorl %r13d, %edx andl %r13d, %ecx rorl $5, %edx xorl %r15d, %ecx xorl %r13d, %edx addl %ecx, %r8d vpsrld $10, %xmm6, %xmm9 vpxor %xmm8, %xmm7, %xmm8 # rnd_1: 4 - 5 rorl $6, %edx xorl %r9d, %ebx addl %edx, %r8d movl %r9d, %ecx andl %ebx, %eax rorl $9, %ecx xorl %r9d, %ecx xorl %r10d, %eax vpxor %xmm9, %xmm8, %xmm9 # rnd_1: 6 - 6 rorl $11, %ecx addl %r8d, %r12d xorl %r9d, %ecx addl %eax, %r8d vpshufb %xmm12, %xmm9, %xmm9 # rnd_1: 7 - 7 rorl $2, %ecx movl %r12d, %edx addl %ecx, %r8d vpaddd %xmm4, %xmm9, %xmm3 # msg_sched done: 12-15 # set_w_k_xfer_4: 8 vpaddd 128+L_avx1_sha256_k(%rip), %xmm0, %xmm4 vpaddd 144+L_avx1_sha256_k(%rip), %xmm1, %xmm5 vmovdqu %xmm4, (%rsp) vmovdqu %xmm5, 16(%rsp) vpaddd 160+L_avx1_sha256_k(%rip), %xmm2, %xmm6 vpaddd 176+L_avx1_sha256_k(%rip), %xmm3, %xmm7 vmovdqu %xmm6, 32(%rsp) vmovdqu %xmm7, 48(%rsp) # msg_sched: 0-3 # rnd_0: 0 - 0 rorl $14, %edx vpalignr $4, %xmm0, %xmm1, %xmm5 vpalignr $4, %xmm2, %xmm3, %xmm4 # rnd_0: 1 - 2 movl %r9d, %eax movl %r13d, %ecx addl (%rsp), %r15d xorl %r14d, %ecx xorl %r12d, %edx andl %r12d, %ecx vpsrld $7, %xmm5, %xmm6 vpslld $25, %xmm5, %xmm7 # rnd_0: 3 - 4 rorl $5, %edx xorl %r14d, %ecx xorl %r12d, %edx addl %ecx, %r15d rorl $6, %edx xorl %r8d, %eax addl %edx, %r15d movl %r8d, %ecx vpsrld $18, %xmm5, %xmm8 vpslld $14, %xmm5, %xmm9 # rnd_0: 5 - 6 andl %eax, %ebx rorl $9, %ecx xorl %r8d, %ecx xorl %r9d, %ebx rorl $11, %ecx addl %r15d, %r11d xorl %r8d, %ecx addl %ebx, %r15d vpor %xmm6, %xmm7, %xmm6 vpor %xmm8, %xmm9, %xmm8 # rnd_0: 7 - 7 rorl $2, %ecx movl %r11d, %edx addl %ecx, %r15d # rnd_1: 0 - 1 rorl $14, %edx movl %r8d, %ebx movl %r12d, %ecx addl 4(%rsp), %r14d xorl %r13d, %ecx vpsrld $3, %xmm5, %xmm9 vpxor %xmm6, %xmm8, %xmm6 # rnd_1: 2 - 3 xorl %r11d, %edx andl %r11d, %ecx rorl $5, %edx xorl %r13d, %ecx xorl %r11d, %edx addl %ecx, %r14d vpxor %xmm6, %xmm9, %xmm5 vpshufd $0xfa, %xmm3, %xmm6 # rnd_1: 4 - 5 rorl $6, %edx xorl %r15d, %ebx addl %edx, %r14d movl %r15d, %ecx andl %ebx, %eax rorl $9, %ecx xorl %r15d, %ecx xorl %r8d, %eax vpsrld $10, %xmm6, %xmm8 vpsrlq $19, %xmm6, %xmm7 # rnd_1: 6 - 7 rorl $11, %ecx addl %r14d, %r10d xorl %r15d, %ecx addl %eax, %r14d rorl $2, %ecx movl %r10d, %edx addl %ecx, %r14d # rnd_0: 0 - 0 rorl $14, %edx vpsrlq $0x11, %xmm6, %xmm6 vpaddd %xmm0, %xmm4, %xmm4 # rnd_0: 1 - 3 movl %r15d, %eax movl %r11d, %ecx addl 8(%rsp), %r13d xorl %r12d, %ecx xorl %r10d, %edx andl %r10d, %ecx rorl $5, %edx xorl %r12d, %ecx xorl %r10d, %edx addl %ecx, %r13d vpxor %xmm6, %xmm7, %xmm6 vpaddd %xmm5, %xmm4, %xmm4 # rnd_0: 4 - 4 rorl $6, %edx xorl %r14d, %eax addl %edx, %r13d movl %r14d, %ecx vpxor %xmm6, %xmm8, %xmm8 # rnd_0: 5 - 5 andl %eax, %ebx rorl $9, %ecx xorl %r14d, %ecx xorl %r15d, %ebx vpshufb %xmm11, %xmm8, %xmm8 # rnd_0: 6 - 6 rorl $11, %ecx addl %r13d, %r9d xorl %r14d, %ecx addl %ebx, %r13d vpaddd %xmm8, %xmm4, %xmm4 # rnd_0: 7 - 7 rorl $2, %ecx movl %r9d, %edx addl %ecx, %r13d # rnd_1: 0 - 0 rorl $14, %edx vpshufd $0x50, %xmm4, %xmm6 # rnd_1: 1 - 1 movl %r14d, %ebx movl %r10d, %ecx addl 12(%rsp), %r12d xorl %r11d, %ecx vpsrlq $0x11, %xmm6, %xmm8 vpsrlq $19, %xmm6, %xmm7 # rnd_1: 2 - 3 xorl %r9d, %edx andl %r9d, %ecx rorl $5, %edx xorl %r11d, %ecx xorl %r9d, %edx addl %ecx, %r12d vpsrld $10, %xmm6, %xmm9 vpxor %xmm8, %xmm7, %xmm8 # rnd_1: 4 - 5 rorl $6, %edx xorl %r13d, %ebx addl %edx, %r12d movl %r13d, %ecx andl %ebx, %eax rorl $9, %ecx xorl %r13d, %ecx xorl %r14d, %eax vpxor %xmm9, %xmm8, %xmm9 # rnd_1: 6 - 6 rorl $11, %ecx addl %r12d, %r8d xorl %r13d, %ecx addl %eax, %r12d vpshufb %xmm12, %xmm9, %xmm9 # rnd_1: 7 - 7 rorl $2, %ecx movl %r8d, %edx addl %ecx, %r12d vpaddd %xmm4, %xmm9, %xmm0 # msg_sched done: 0-3 # msg_sched: 4-7 # rnd_0: 0 - 0 rorl $14, %edx vpalignr $4, %xmm1, %xmm2, %xmm5 vpalignr $4, %xmm3, %xmm0, %xmm4 # rnd_0: 1 - 2 movl %r13d, %eax movl %r9d, %ecx addl 16(%rsp), %r11d xorl %r10d, %ecx xorl %r8d, %edx andl %r8d, %ecx vpsrld $7, %xmm5, %xmm6 vpslld $25, %xmm5, %xmm7 # rnd_0: 3 - 4 rorl $5, %edx xorl %r10d, %ecx xorl %r8d, %edx addl %ecx, %r11d rorl $6, %edx xorl %r12d, %eax addl %edx, %r11d movl %r12d, %ecx vpsrld $18, %xmm5, %xmm8 vpslld $14, %xmm5, %xmm9 # rnd_0: 5 - 6 andl %eax, %ebx rorl $9, %ecx xorl %r12d, %ecx xorl %r13d, %ebx rorl $11, %ecx addl %r11d, %r15d xorl %r12d, %ecx addl %ebx, %r11d vpor %xmm6, %xmm7, %xmm6 vpor %xmm8, %xmm9, %xmm8 # rnd_0: 7 - 7 rorl $2, %ecx movl %r15d, %edx addl %ecx, %r11d # rnd_1: 0 - 1 rorl $14, %edx movl %r12d, %ebx movl %r8d, %ecx addl 20(%rsp), %r10d xorl %r9d, %ecx vpsrld $3, %xmm5, %xmm9 vpxor %xmm6, %xmm8, %xmm6 # rnd_1: 2 - 3 xorl %r15d, %edx andl %r15d, %ecx rorl $5, %edx xorl %r9d, %ecx xorl %r15d, %edx addl %ecx, %r10d vpxor %xmm6, %xmm9, %xmm5 vpshufd $0xfa, %xmm0, %xmm6 # rnd_1: 4 - 5 rorl $6, %edx xorl %r11d, %ebx addl %edx, %r10d movl %r11d, %ecx andl %ebx, %eax rorl $9, %ecx xorl %r11d, %ecx xorl %r12d, %eax vpsrld $10, %xmm6, %xmm8 vpsrlq $19, %xmm6, %xmm7 # rnd_1: 6 - 7 rorl $11, %ecx addl %r10d, %r14d xorl %r11d, %ecx addl %eax, %r10d rorl $2, %ecx movl %r14d, %edx addl %ecx, %r10d # rnd_0: 0 - 0 rorl $14, %edx vpsrlq $0x11, %xmm6, %xmm6 vpaddd %xmm1, %xmm4, %xmm4 # rnd_0: 1 - 3 movl %r11d, %eax movl %r15d, %ecx addl 24(%rsp), %r9d xorl %r8d, %ecx xorl %r14d, %edx andl %r14d, %ecx rorl $5, %edx xorl %r8d, %ecx xorl %r14d, %edx addl %ecx, %r9d vpxor %xmm6, %xmm7, %xmm6 vpaddd %xmm5, %xmm4, %xmm4 # rnd_0: 4 - 4 rorl $6, %edx xorl %r10d, %eax addl %edx, %r9d movl %r10d, %ecx vpxor %xmm6, %xmm8, %xmm8 # rnd_0: 5 - 5 andl %eax, %ebx rorl $9, %ecx xorl %r10d, %ecx xorl %r11d, %ebx vpshufb %xmm11, %xmm8, %xmm8 # rnd_0: 6 - 6 rorl $11, %ecx addl %r9d, %r13d xorl %r10d, %ecx addl %ebx, %r9d vpaddd %xmm8, %xmm4, %xmm4 # rnd_0: 7 - 7 rorl $2, %ecx movl %r13d, %edx addl %ecx, %r9d # rnd_1: 0 - 0 rorl $14, %edx vpshufd $0x50, %xmm4, %xmm6 # rnd_1: 1 - 1 movl %r10d, %ebx movl %r14d, %ecx addl 28(%rsp), %r8d xorl %r15d, %ecx vpsrlq $0x11, %xmm6, %xmm8 vpsrlq $19, %xmm6, %xmm7 # rnd_1: 2 - 3 xorl %r13d, %edx andl %r13d, %ecx rorl $5, %edx xorl %r15d, %ecx xorl %r13d, %edx addl %ecx, %r8d vpsrld $10, %xmm6, %xmm9 vpxor %xmm8, %xmm7, %xmm8 # rnd_1: 4 - 5 rorl $6, %edx xorl %r9d, %ebx addl %edx, %r8d movl %r9d, %ecx andl %ebx, %eax rorl $9, %ecx xorl %r9d, %ecx xorl %r10d, %eax vpxor %xmm9, %xmm8, %xmm9 # rnd_1: 6 - 6 rorl $11, %ecx addl %r8d, %r12d xorl %r9d, %ecx addl %eax, %r8d vpshufb %xmm12, %xmm9, %xmm9 # rnd_1: 7 - 7 rorl $2, %ecx movl %r12d, %edx addl %ecx, %r8d vpaddd %xmm4, %xmm9, %xmm1 # msg_sched done: 4-7 # msg_sched: 8-11 # rnd_0: 0 - 0 rorl $14, %edx vpalignr $4, %xmm2, %xmm3, %xmm5 vpalignr $4, %xmm0, %xmm1, %xmm4 # rnd_0: 1 - 2 movl %r9d, %eax movl %r13d, %ecx addl 32(%rsp), %r15d xorl %r14d, %ecx xorl %r12d, %edx andl %r12d, %ecx vpsrld $7, %xmm5, %xmm6 vpslld $25, %xmm5, %xmm7 # rnd_0: 3 - 4 rorl $5, %edx xorl %r14d, %ecx xorl %r12d, %edx addl %ecx, %r15d rorl $6, %edx xorl %r8d, %eax addl %edx, %r15d movl %r8d, %ecx vpsrld $18, %xmm5, %xmm8 vpslld $14, %xmm5, %xmm9 # rnd_0: 5 - 6 andl %eax, %ebx rorl $9, %ecx xorl %r8d, %ecx xorl %r9d, %ebx rorl $11, %ecx addl %r15d, %r11d xorl %r8d, %ecx addl %ebx, %r15d vpor %xmm6, %xmm7, %xmm6 vpor %xmm8, %xmm9, %xmm8 # rnd_0: 7 - 7 rorl $2, %ecx movl %r11d, %edx addl %ecx, %r15d # rnd_1: 0 - 1 rorl $14, %edx movl %r8d, %ebx movl %r12d, %ecx addl 36(%rsp), %r14d xorl %r13d, %ecx vpsrld $3, %xmm5, %xmm9 vpxor %xmm6, %xmm8, %xmm6 # rnd_1: 2 - 3 xorl %r11d, %edx andl %r11d, %ecx rorl $5, %edx xorl %r13d, %ecx xorl %r11d, %edx addl %ecx, %r14d vpxor %xmm6, %xmm9, %xmm5 vpshufd $0xfa, %xmm1, %xmm6 # rnd_1: 4 - 5 rorl $6, %edx xorl %r15d, %ebx addl %edx, %r14d movl %r15d, %ecx andl %ebx, %eax rorl $9, %ecx xorl %r15d, %ecx xorl %r8d, %eax vpsrld $10, %xmm6, %xmm8 vpsrlq $19, %xmm6, %xmm7 # rnd_1: 6 - 7 rorl $11, %ecx addl %r14d, %r10d xorl %r15d, %ecx addl %eax, %r14d rorl $2, %ecx movl %r10d, %edx addl %ecx, %r14d # rnd_0: 0 - 0 rorl $14, %edx vpsrlq $0x11, %xmm6, %xmm6 vpaddd %xmm2, %xmm4, %xmm4 # rnd_0: 1 - 3 movl %r15d, %eax movl %r11d, %ecx addl 40(%rsp), %r13d xorl %r12d, %ecx xorl %r10d, %edx andl %r10d, %ecx rorl $5, %edx xorl %r12d, %ecx xorl %r10d, %edx addl %ecx, %r13d vpxor %xmm6, %xmm7, %xmm6 vpaddd %xmm5, %xmm4, %xmm4 # rnd_0: 4 - 4 rorl $6, %edx xorl %r14d, %eax addl %edx, %r13d movl %r14d, %ecx vpxor %xmm6, %xmm8, %xmm8 # rnd_0: 5 - 5 andl %eax, %ebx rorl $9, %ecx xorl %r14d, %ecx xorl %r15d, %ebx vpshufb %xmm11, %xmm8, %xmm8 # rnd_0: 6 - 6 rorl $11, %ecx addl %r13d, %r9d xorl %r14d, %ecx addl %ebx, %r13d vpaddd %xmm8, %xmm4, %xmm4 # rnd_0: 7 - 7 rorl $2, %ecx movl %r9d, %edx addl %ecx, %r13d # rnd_1: 0 - 0 rorl $14, %edx vpshufd $0x50, %xmm4, %xmm6 # rnd_1: 1 - 1 movl %r14d, %ebx movl %r10d, %ecx addl 44(%rsp), %r12d xorl %r11d, %ecx vpsrlq $0x11, %xmm6, %xmm8 vpsrlq $19, %xmm6, %xmm7 # rnd_1: 2 - 3 xorl %r9d, %edx andl %r9d, %ecx rorl $5, %edx xorl %r11d, %ecx xorl %r9d, %edx addl %ecx, %r12d vpsrld $10, %xmm6, %xmm9 vpxor %xmm8, %xmm7, %xmm8 # rnd_1: 4 - 5 rorl $6, %edx xorl %r13d, %ebx addl %edx, %r12d movl %r13d, %ecx andl %ebx, %eax rorl $9, %ecx xorl %r13d, %ecx xorl %r14d, %eax vpxor %xmm9, %xmm8, %xmm9 # rnd_1: 6 - 6 rorl $11, %ecx addl %r12d, %r8d xorl %r13d, %ecx addl %eax, %r12d vpshufb %xmm12, %xmm9, %xmm9 # rnd_1: 7 - 7 rorl $2, %ecx movl %r8d, %edx addl %ecx, %r12d vpaddd %xmm4, %xmm9, %xmm2 # msg_sched done: 8-11 # msg_sched: 12-15 # rnd_0: 0 - 0 rorl $14, %edx vpalignr $4, %xmm3, %xmm0, %xmm5 vpalignr $4, %xmm1, %xmm2, %xmm4 # rnd_0: 1 - 2 movl %r13d, %eax movl %r9d, %ecx addl 48(%rsp), %r11d xorl %r10d, %ecx xorl %r8d, %edx andl %r8d, %ecx vpsrld $7, %xmm5, %xmm6 vpslld $25, %xmm5, %xmm7 # rnd_0: 3 - 4 rorl $5, %edx xorl %r10d, %ecx xorl %r8d, %edx addl %ecx, %r11d rorl $6, %edx xorl %r12d, %eax addl %edx, %r11d movl %r12d, %ecx vpsrld $18, %xmm5, %xmm8 vpslld $14, %xmm5, %xmm9 # rnd_0: 5 - 6 andl %eax, %ebx rorl $9, %ecx xorl %r12d, %ecx xorl %r13d, %ebx rorl $11, %ecx addl %r11d, %r15d xorl %r12d, %ecx addl %ebx, %r11d vpor %xmm6, %xmm7, %xmm6 vpor %xmm8, %xmm9, %xmm8 # rnd_0: 7 - 7 rorl $2, %ecx movl %r15d, %edx addl %ecx, %r11d # rnd_1: 0 - 1 rorl $14, %edx movl %r12d, %ebx movl %r8d, %ecx addl 52(%rsp), %r10d xorl %r9d, %ecx vpsrld $3, %xmm5, %xmm9 vpxor %xmm6, %xmm8, %xmm6 # rnd_1: 2 - 3 xorl %r15d, %edx andl %r15d, %ecx rorl $5, %edx xorl %r9d, %ecx xorl %r15d, %edx addl %ecx, %r10d vpxor %xmm6, %xmm9, %xmm5 vpshufd $0xfa, %xmm2, %xmm6 # rnd_1: 4 - 5 rorl $6, %edx xorl %r11d, %ebx addl %edx, %r10d movl %r11d, %ecx andl %ebx, %eax rorl $9, %ecx xorl %r11d, %ecx xorl %r12d, %eax vpsrld $10, %xmm6, %xmm8 vpsrlq $19, %xmm6, %xmm7 # rnd_1: 6 - 7 rorl $11, %ecx addl %r10d, %r14d xorl %r11d, %ecx addl %eax, %r10d rorl $2, %ecx movl %r14d, %edx addl %ecx, %r10d # rnd_0: 0 - 0 rorl $14, %edx vpsrlq $0x11, %xmm6, %xmm6 vpaddd %xmm3, %xmm4, %xmm4 # rnd_0: 1 - 3 movl %r11d, %eax movl %r15d, %ecx addl 56(%rsp), %r9d xorl %r8d, %ecx xorl %r14d, %edx andl %r14d, %ecx rorl $5, %edx xorl %r8d, %ecx xorl %r14d, %edx addl %ecx, %r9d vpxor %xmm6, %xmm7, %xmm6 vpaddd %xmm5, %xmm4, %xmm4 # rnd_0: 4 - 4 rorl $6, %edx xorl %r10d, %eax addl %edx, %r9d movl %r10d, %ecx vpxor %xmm6, %xmm8, %xmm8 # rnd_0: 5 - 5 andl %eax, %ebx rorl $9, %ecx xorl %r10d, %ecx xorl %r11d, %ebx vpshufb %xmm11, %xmm8, %xmm8 # rnd_0: 6 - 6 rorl $11, %ecx addl %r9d, %r13d xorl %r10d, %ecx addl %ebx, %r9d vpaddd %xmm8, %xmm4, %xmm4 # rnd_0: 7 - 7 rorl $2, %ecx movl %r13d, %edx addl %ecx, %r9d # rnd_1: 0 - 0 rorl $14, %edx vpshufd $0x50, %xmm4, %xmm6 # rnd_1: 1 - 1 movl %r10d, %ebx movl %r14d, %ecx addl 60(%rsp), %r8d xorl %r15d, %ecx vpsrlq $0x11, %xmm6, %xmm8 vpsrlq $19, %xmm6, %xmm7 # rnd_1: 2 - 3 xorl %r13d, %edx andl %r13d, %ecx rorl $5, %edx xorl %r15d, %ecx xorl %r13d, %edx addl %ecx, %r8d vpsrld $10, %xmm6, %xmm9 vpxor %xmm8, %xmm7, %xmm8 # rnd_1: 4 - 5 rorl $6, %edx xorl %r9d, %ebx addl %edx, %r8d movl %r9d, %ecx andl %ebx, %eax rorl $9, %ecx xorl %r9d, %ecx xorl %r10d, %eax vpxor %xmm9, %xmm8, %xmm9 # rnd_1: 6 - 6 rorl $11, %ecx addl %r8d, %r12d xorl %r9d, %ecx addl %eax, %r8d vpshufb %xmm12, %xmm9, %xmm9 # rnd_1: 7 - 7 rorl $2, %ecx movl %r12d, %edx addl %ecx, %r8d vpaddd %xmm4, %xmm9, %xmm3 # msg_sched done: 12-15 # set_w_k_xfer_4: 12 vpaddd 192+L_avx1_sha256_k(%rip), %xmm0, %xmm4 vpaddd 208+L_avx1_sha256_k(%rip), %xmm1, %xmm5 vmovdqu %xmm4, (%rsp) vmovdqu %xmm5, 16(%rsp) vpaddd 224+L_avx1_sha256_k(%rip), %xmm2, %xmm6 vpaddd 240+L_avx1_sha256_k(%rip), %xmm3, %xmm7 vmovdqu %xmm6, 32(%rsp) vmovdqu %xmm7, 48(%rsp) # rnd_all_4: 0-3 addl (%rsp), %r15d movl %r13d, %ecx movl %r9d, %eax xorl %r14d, %ecx rorl $14, %edx andl %r12d, %ecx xorl %r12d, %edx xorl %r14d, %ecx rorl $5, %edx addl %ecx, %r15d xorl %r12d, %edx xorl %r8d, %eax rorl $6, %edx movl %r8d, %ecx addl %edx, %r15d rorl $9, %ecx andl %eax, %ebx xorl %r8d, %ecx xorl %r9d, %ebx rorl $11, %ecx addl %r15d, %r11d xorl %r8d, %ecx addl %ebx, %r15d rorl $2, %ecx movl %r11d, %edx addl %ecx, %r15d addl 4(%rsp), %r14d movl %r12d, %ecx movl %r8d, %ebx xorl %r13d, %ecx rorl $14, %edx andl %r11d, %ecx xorl %r11d, %edx xorl %r13d, %ecx rorl $5, %edx addl %ecx, %r14d xorl %r11d, %edx xorl %r15d, %ebx rorl $6, %edx movl %r15d, %ecx addl %edx, %r14d rorl $9, %ecx andl %ebx, %eax xorl %r15d, %ecx xorl %r8d, %eax rorl $11, %ecx addl %r14d, %r10d xorl %r15d, %ecx addl %eax, %r14d rorl $2, %ecx movl %r10d, %edx addl %ecx, %r14d addl 8(%rsp), %r13d movl %r11d, %ecx movl %r15d, %eax xorl %r12d, %ecx rorl $14, %edx andl %r10d, %ecx xorl %r10d, %edx xorl %r12d, %ecx rorl $5, %edx addl %ecx, %r13d xorl %r10d, %edx xorl %r14d, %eax rorl $6, %edx movl %r14d, %ecx addl %edx, %r13d rorl $9, %ecx andl %eax, %ebx xorl %r14d, %ecx xorl %r15d, %ebx rorl $11, %ecx addl %r13d, %r9d xorl %r14d, %ecx addl %ebx, %r13d rorl $2, %ecx movl %r9d, %edx addl %ecx, %r13d addl 12(%rsp), %r12d movl %r10d, %ecx movl %r14d, %ebx xorl %r11d, %ecx rorl $14, %edx andl %r9d, %ecx xorl %r9d, %edx xorl %r11d, %ecx rorl $5, %edx addl %ecx, %r12d xorl %r9d, %edx xorl %r13d, %ebx rorl $6, %edx movl %r13d, %ecx addl %edx, %r12d rorl $9, %ecx andl %ebx, %eax xorl %r13d, %ecx xorl %r14d, %eax rorl $11, %ecx addl %r12d, %r8d xorl %r13d, %ecx addl %eax, %r12d rorl $2, %ecx movl %r8d, %edx addl %ecx, %r12d # rnd_all_4: 1-4 addl 16(%rsp), %r11d movl %r9d, %ecx movl %r13d, %eax xorl %r10d, %ecx rorl $14, %edx andl %r8d, %ecx xorl %r8d, %edx xorl %r10d, %ecx rorl $5, %edx addl %ecx, %r11d xorl %r8d, %edx xorl %r12d, %eax rorl $6, %edx movl %r12d, %ecx addl %edx, %r11d rorl $9, %ecx andl %eax, %ebx xorl %r12d, %ecx xorl %r13d, %ebx rorl $11, %ecx addl %r11d, %r15d xorl %r12d, %ecx addl %ebx, %r11d rorl $2, %ecx movl %r15d, %edx addl %ecx, %r11d addl 20(%rsp), %r10d movl %r8d, %ecx movl %r12d, %ebx xorl %r9d, %ecx rorl $14, %edx andl %r15d, %ecx xorl %r15d, %edx xorl %r9d, %ecx rorl $5, %edx addl %ecx, %r10d xorl %r15d, %edx xorl %r11d, %ebx rorl $6, %edx movl %r11d, %ecx addl %edx, %r10d rorl $9, %ecx andl %ebx, %eax xorl %r11d, %ecx xorl %r12d, %eax rorl $11, %ecx addl %r10d, %r14d xorl %r11d, %ecx addl %eax, %r10d rorl $2, %ecx movl %r14d, %edx addl %ecx, %r10d addl 24(%rsp), %r9d movl %r15d, %ecx movl %r11d, %eax xorl %r8d, %ecx rorl $14, %edx andl %r14d, %ecx xorl %r14d, %edx xorl %r8d, %ecx rorl $5, %edx addl %ecx, %r9d xorl %r14d, %edx xorl %r10d, %eax rorl $6, %edx movl %r10d, %ecx addl %edx, %r9d rorl $9, %ecx andl %eax, %ebx xorl %r10d, %ecx xorl %r11d, %ebx rorl $11, %ecx addl %r9d, %r13d xorl %r10d, %ecx addl %ebx, %r9d rorl $2, %ecx movl %r13d, %edx addl %ecx, %r9d addl 28(%rsp), %r8d movl %r14d, %ecx movl %r10d, %ebx xorl %r15d, %ecx rorl $14, %edx andl %r13d, %ecx xorl %r13d, %edx xorl %r15d, %ecx rorl $5, %edx addl %ecx, %r8d xorl %r13d, %edx xorl %r9d, %ebx rorl $6, %edx movl %r9d, %ecx addl %edx, %r8d rorl $9, %ecx andl %ebx, %eax xorl %r9d, %ecx xorl %r10d, %eax rorl $11, %ecx addl %r8d, %r12d xorl %r9d, %ecx addl %eax, %r8d rorl $2, %ecx movl %r12d, %edx addl %ecx, %r8d # rnd_all_4: 2-5 addl 32(%rsp), %r15d movl %r13d, %ecx movl %r9d, %eax xorl %r14d, %ecx rorl $14, %edx andl %r12d, %ecx xorl %r12d, %edx xorl %r14d, %ecx rorl $5, %edx addl %ecx, %r15d xorl %r12d, %edx xorl %r8d, %eax rorl $6, %edx movl %r8d, %ecx addl %edx, %r15d rorl $9, %ecx andl %eax, %ebx xorl %r8d, %ecx xorl %r9d, %ebx rorl $11, %ecx addl %r15d, %r11d xorl %r8d, %ecx addl %ebx, %r15d rorl $2, %ecx movl %r11d, %edx addl %ecx, %r15d addl 36(%rsp), %r14d movl %r12d, %ecx movl %r8d, %ebx xorl %r13d, %ecx rorl $14, %edx andl %r11d, %ecx xorl %r11d, %edx xorl %r13d, %ecx rorl $5, %edx addl %ecx, %r14d xorl %r11d, %edx xorl %r15d, %ebx rorl $6, %edx movl %r15d, %ecx addl %edx, %r14d rorl $9, %ecx andl %ebx, %eax xorl %r15d, %ecx xorl %r8d, %eax rorl $11, %ecx addl %r14d, %r10d xorl %r15d, %ecx addl %eax, %r14d rorl $2, %ecx movl %r10d, %edx addl %ecx, %r14d addl 40(%rsp), %r13d movl %r11d, %ecx movl %r15d, %eax xorl %r12d, %ecx rorl $14, %edx andl %r10d, %ecx xorl %r10d, %edx xorl %r12d, %ecx rorl $5, %edx addl %ecx, %r13d xorl %r10d, %edx xorl %r14d, %eax rorl $6, %edx movl %r14d, %ecx addl %edx, %r13d rorl $9, %ecx andl %eax, %ebx xorl %r14d, %ecx xorl %r15d, %ebx rorl $11, %ecx addl %r13d, %r9d xorl %r14d, %ecx addl %ebx, %r13d rorl $2, %ecx movl %r9d, %edx addl %ecx, %r13d addl 44(%rsp), %r12d movl %r10d, %ecx movl %r14d, %ebx xorl %r11d, %ecx rorl $14, %edx andl %r9d, %ecx xorl %r9d, %edx xorl %r11d, %ecx rorl $5, %edx addl %ecx, %r12d xorl %r9d, %edx xorl %r13d, %ebx rorl $6, %edx movl %r13d, %ecx addl %edx, %r12d rorl $9, %ecx andl %ebx, %eax xorl %r13d, %ecx xorl %r14d, %eax rorl $11, %ecx addl %r12d, %r8d xorl %r13d, %ecx addl %eax, %r12d rorl $2, %ecx movl %r8d, %edx addl %ecx, %r12d # rnd_all_4: 3-6 addl 48(%rsp), %r11d movl %r9d, %ecx movl %r13d, %eax xorl %r10d, %ecx rorl $14, %edx andl %r8d, %ecx xorl %r8d, %edx xorl %r10d, %ecx rorl $5, %edx addl %ecx, %r11d xorl %r8d, %edx xorl %r12d, %eax rorl $6, %edx movl %r12d, %ecx addl %edx, %r11d rorl $9, %ecx andl %eax, %ebx xorl %r12d, %ecx xorl %r13d, %ebx rorl $11, %ecx addl %r11d, %r15d xorl %r12d, %ecx addl %ebx, %r11d rorl $2, %ecx movl %r15d, %edx addl %ecx, %r11d addl 52(%rsp), %r10d movl %r8d, %ecx movl %r12d, %ebx xorl %r9d, %ecx rorl $14, %edx andl %r15d, %ecx xorl %r15d, %edx xorl %r9d, %ecx rorl $5, %edx addl %ecx, %r10d xorl %r15d, %edx xorl %r11d, %ebx rorl $6, %edx movl %r11d, %ecx addl %edx, %r10d rorl $9, %ecx andl %ebx, %eax xorl %r11d, %ecx xorl %r12d, %eax rorl $11, %ecx addl %r10d, %r14d xorl %r11d, %ecx addl %eax, %r10d rorl $2, %ecx movl %r14d, %edx addl %ecx, %r10d addl 56(%rsp), %r9d movl %r15d, %ecx movl %r11d, %eax xorl %r8d, %ecx rorl $14, %edx andl %r14d, %ecx xorl %r14d, %edx xorl %r8d, %ecx rorl $5, %edx addl %ecx, %r9d xorl %r14d, %edx xorl %r10d, %eax rorl $6, %edx movl %r10d, %ecx addl %edx, %r9d rorl $9, %ecx andl %eax, %ebx xorl %r10d, %ecx xorl %r11d, %ebx rorl $11, %ecx addl %r9d, %r13d xorl %r10d, %ecx addl %ebx, %r9d rorl $2, %ecx movl %r13d, %edx addl %ecx, %r9d addl 60(%rsp), %r8d movl %r14d, %ecx movl %r10d, %ebx xorl %r15d, %ecx rorl $14, %edx andl %r13d, %ecx xorl %r13d, %edx xorl %r15d, %ecx rorl $5, %edx addl %ecx, %r8d xorl %r13d, %edx xorl %r9d, %ebx rorl $6, %edx movl %r9d, %ecx addl %edx, %r8d rorl $9, %ecx andl %ebx, %eax xorl %r9d, %ecx xorl %r10d, %eax rorl $11, %ecx addl %r8d, %r12d xorl %r9d, %ecx addl %eax, %r8d rorl $2, %ecx movl %r12d, %edx addl %ecx, %r8d addl (%rdi), %r8d addl 4(%rdi), %r9d addl 8(%rdi), %r10d addl 12(%rdi), %r11d addl 16(%rdi), %r12d addl 20(%rdi), %r13d addl 24(%rdi), %r14d addl 28(%rdi), %r15d addq $0x40, %rbp subl $0x40, %esi movl %r8d, (%rdi) movl %r9d, 4(%rdi) movl %r10d, 8(%rdi) movl %r11d, 12(%rdi) movl %r12d, 16(%rdi) movl %r13d, 20(%rdi) movl %r14d, 24(%rdi) movl %r15d, 28(%rdi) jnz L_sha256_len_avx1_start xorq %rax, %rax vzeroupper addq $0x40, %rsp popq %rbp popq %r15 popq %r14 popq %r13 popq %r12 popq %rbx repz retq #ifndef __APPLE__ .size Transform_Sha256_AVX1_Len,.-Transform_Sha256_AVX1_Len #endif /* __APPLE__ */ #ifndef __APPLE__ .data #else .section __DATA,__data #endif /* __APPLE__ */ L_avx1_rorx_sha256_k: .long 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5 .long 0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5 .long 0xd807aa98,0x12835b01,0x243185be,0x550c7dc3 .long 0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174 .long 0xe49b69c1,0xefbe4786,0xfc19dc6,0x240ca1cc .long 0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da .long 0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7 .long 0xc6e00bf3,0xd5a79147,0x6ca6351,0x14292967 .long 0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13 .long 0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85 .long 0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3 .long 0xd192e819,0xd6990624,0xf40e3585,0x106aa070 .long 0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5 .long 0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3 .long 0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208 .long 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2 #ifndef __APPLE__ .data #else .section __DATA,__data #endif /* __APPLE__ */ #ifndef __APPLE__ .align 16 #else .p2align 4 #endif /* __APPLE__ */ L_avx1_rorx_sha256_shuf_00BA: .quad 0xb0a090803020100, 0xffffffffffffffff #ifndef __APPLE__ .data #else .section __DATA,__data #endif /* __APPLE__ */ #ifndef __APPLE__ .align 16 #else .p2align 4 #endif /* __APPLE__ */ L_avx1_rorx_sha256_shuf_DC00: .quad 0xffffffffffffffff, 0xb0a090803020100 #ifndef __APPLE__ .data #else .section __DATA,__data #endif /* __APPLE__ */ #ifndef __APPLE__ .align 16 #else .p2align 4 #endif /* __APPLE__ */ L_avx1_rorx_sha256_flip_mask: .quad 0x405060700010203, 0xc0d0e0f08090a0b #ifndef __APPLE__ .text .globl Transform_Sha256_AVX1_RORX .type Transform_Sha256_AVX1_RORX,@function .align 16 Transform_Sha256_AVX1_RORX: #else .section __TEXT,__text .globl _Transform_Sha256_AVX1_RORX .p2align 4 _Transform_Sha256_AVX1_RORX: #endif /* __APPLE__ */ pushq %rbx pushq %r12 pushq %r13 pushq %r14 pushq %r15 subq $0x40, %rsp vmovdqa L_avx1_rorx_sha256_flip_mask(%rip), %xmm13 vmovdqa L_avx1_rorx_sha256_shuf_00BA(%rip), %xmm11 vmovdqa L_avx1_rorx_sha256_shuf_DC00(%rip), %xmm12 leaq 32(%rdi), %rax # X0, X1, X2, X3 = W[0..15] vmovdqu (%rax), %xmm0 vmovdqu 16(%rax), %xmm1 vpshufb %xmm13, %xmm0, %xmm0 vpshufb %xmm13, %xmm1, %xmm1 vmovdqu 32(%rax), %xmm2 vmovdqu 48(%rax), %xmm3 vpshufb %xmm13, %xmm2, %xmm2 vpshufb %xmm13, %xmm3, %xmm3 movl (%rdi), %r8d movl 4(%rdi), %r9d movl 8(%rdi), %r10d movl 12(%rdi), %r11d movl 16(%rdi), %r12d movl 20(%rdi), %r13d movl 24(%rdi), %r14d movl 28(%rdi), %r15d # set_w_k_xfer_4: 0 vpaddd 0+L_avx1_rorx_sha256_k(%rip), %xmm0, %xmm4 vpaddd 16+L_avx1_rorx_sha256_k(%rip), %xmm1, %xmm5 vmovdqu %xmm4, (%rsp) vmovdqu %xmm5, 16(%rsp) vpaddd 32+L_avx1_rorx_sha256_k(%rip), %xmm2, %xmm6 vpaddd 48+L_avx1_rorx_sha256_k(%rip), %xmm3, %xmm7 vmovdqu %xmm6, 32(%rsp) vmovdqu %xmm7, 48(%rsp) movl %r9d, %ebx rorxl $6, %r12d, %edx xorl %r10d, %ebx # msg_sched: 0-3 # rnd_0: 0 - 0 movl %r13d, %eax rorxl $11, %r12d, %ecx addl (%rsp), %r15d vpalignr $4, %xmm2, %xmm3, %xmm4 vpalignr $4, %xmm0, %xmm1, %xmm5 # rnd_0: 1 - 2 xorl %edx, %ecx xorl %r14d, %eax rorxl $25, %r12d, %edx andl %r12d, %eax xorl %ecx, %edx rorxl $13, %r8d, %ecx vpsrld $7, %xmm5, %xmm6 vpslld $25, %xmm5, %xmm7 # rnd_0: 3 - 4 addl %edx, %r15d rorxl $2, %r8d, %edx xorl %r14d, %eax xorl %edx, %ecx rorxl $22, %r8d, %edx addl %eax, %r15d vpsrld $3, %xmm5, %xmm8 vpor %xmm6, %xmm7, %xmm7 # rnd_0: 5 - 7 xorl %ecx, %edx movl %r9d, %eax addl %r15d, %r11d xorl %r8d, %eax addl %edx, %r15d andl %eax, %ebx xorl %r9d, %ebx rorxl $6, %r11d, %edx addl %ebx, %r15d # rnd_1: 0 - 0 movl %r12d, %ebx rorxl $11, %r11d, %ecx addl 4(%rsp), %r14d vpsrld $18, %xmm5, %xmm6 # rnd_1: 1 - 1 xorl %edx, %ecx xorl %r13d, %ebx rorxl $25, %r11d, %edx vpslld $14, %xmm5, %xmm5 # rnd_1: 2 - 2 andl %r11d, %ebx xorl %ecx, %edx rorxl $13, %r15d, %ecx vpxor %xmm5, %xmm7, %xmm7 # rnd_1: 3 - 3 addl %edx, %r14d rorxl $2, %r15d, %edx xorl %r13d, %ebx vpxor %xmm6, %xmm7, %xmm7 # rnd_1: 4 - 4 xorl %edx, %ecx rorxl $22, %r15d, %edx addl %ebx, %r14d vpshufd $0xfa, %xmm3, %xmm6 # rnd_1: 5 - 5 xorl %ecx, %edx movl %r8d, %ebx addl %r14d, %r10d vpxor %xmm8, %xmm7, %xmm5 # rnd_1: 6 - 6 xorl %r15d, %ebx addl %edx, %r14d andl %ebx, %eax vpsrld $10, %xmm6, %xmm8 # rnd_1: 7 - 7 xorl %r8d, %eax rorxl $6, %r10d, %edx addl %eax, %r14d # rnd_0: 0 - 0 movl %r11d, %eax rorxl $11, %r10d, %ecx addl 8(%rsp), %r13d vpsrlq $19, %xmm6, %xmm7 # rnd_0: 1 - 1 xorl %edx, %ecx xorl %r12d, %eax rorxl $25, %r10d, %edx vpsrlq $0x11, %xmm6, %xmm6 vpaddd %xmm0, %xmm4, %xmm4 # rnd_0: 2 - 2 andl %r10d, %eax xorl %ecx, %edx rorxl $13, %r14d, %ecx vpaddd %xmm5, %xmm4, %xmm4 # rnd_0: 3 - 3 addl %edx, %r13d rorxl $2, %r14d, %edx xorl %r12d, %eax vpxor %xmm7, %xmm6, %xmm6 # rnd_0: 4 - 4 xorl %edx, %ecx rorxl $22, %r14d, %edx addl %eax, %r13d vpxor %xmm6, %xmm8, %xmm8 # rnd_0: 5 - 5 xorl %ecx, %edx movl %r15d, %eax addl %r13d, %r9d vpshufb %xmm11, %xmm8, %xmm8 # rnd_0: 6 - 6 xorl %r14d, %eax addl %edx, %r13d andl %eax, %ebx vpaddd %xmm8, %xmm4, %xmm4 # rnd_0: 7 - 7 xorl %r15d, %ebx rorxl $6, %r9d, %edx addl %ebx, %r13d # rnd_1: 0 - 0 movl %r10d, %ebx rorxl $11, %r9d, %ecx addl 12(%rsp), %r12d vpshufd $0x50, %xmm4, %xmm6 # rnd_1: 1 - 1 xorl %edx, %ecx xorl %r11d, %ebx rorxl $25, %r9d, %edx vpsrld $10, %xmm6, %xmm9 # rnd_1: 2 - 2 andl %r9d, %ebx xorl %ecx, %edx rorxl $13, %r13d, %ecx vpsrlq $19, %xmm6, %xmm7 # rnd_1: 3 - 3 addl %edx, %r12d rorxl $2, %r13d, %edx xorl %r11d, %ebx vpsrlq $0x11, %xmm6, %xmm6 # rnd_1: 4 - 4 xorl %edx, %ecx rorxl $22, %r13d, %edx addl %ebx, %r12d vpxor %xmm7, %xmm6, %xmm6 # rnd_1: 5 - 5 xorl %ecx, %edx movl %r14d, %ebx addl %r12d, %r8d vpxor %xmm6, %xmm9, %xmm9 # rnd_1: 6 - 6 xorl %r13d, %ebx addl %edx, %r12d andl %ebx, %eax vpshufb %xmm12, %xmm9, %xmm9 # rnd_1: 7 - 7 xorl %r14d, %eax rorxl $6, %r8d, %edx addl %eax, %r12d vpaddd %xmm4, %xmm9, %xmm0 # msg_sched done: 0-3 # msg_sched: 4-7 # rnd_0: 0 - 0 movl %r9d, %eax rorxl $11, %r8d, %ecx addl 16(%rsp), %r11d vpalignr $4, %xmm3, %xmm0, %xmm4 vpalignr $4, %xmm1, %xmm2, %xmm5 # rnd_0: 1 - 2 xorl %edx, %ecx xorl %r10d, %eax rorxl $25, %r8d, %edx andl %r8d, %eax xorl %ecx, %edx rorxl $13, %r12d, %ecx vpsrld $7, %xmm5, %xmm6 vpslld $25, %xmm5, %xmm7 # rnd_0: 3 - 4 addl %edx, %r11d rorxl $2, %r12d, %edx xorl %r10d, %eax xorl %edx, %ecx rorxl $22, %r12d, %edx addl %eax, %r11d vpsrld $3, %xmm5, %xmm8 vpor %xmm6, %xmm7, %xmm7 # rnd_0: 5 - 7 xorl %ecx, %edx movl %r13d, %eax addl %r11d, %r15d xorl %r12d, %eax addl %edx, %r11d andl %eax, %ebx xorl %r13d, %ebx rorxl $6, %r15d, %edx addl %ebx, %r11d # rnd_1: 0 - 0 movl %r8d, %ebx rorxl $11, %r15d, %ecx addl 20(%rsp), %r10d vpsrld $18, %xmm5, %xmm6 # rnd_1: 1 - 1 xorl %edx, %ecx xorl %r9d, %ebx rorxl $25, %r15d, %edx vpslld $14, %xmm5, %xmm5 # rnd_1: 2 - 2 andl %r15d, %ebx xorl %ecx, %edx rorxl $13, %r11d, %ecx vpxor %xmm5, %xmm7, %xmm7 # rnd_1: 3 - 3 addl %edx, %r10d rorxl $2, %r11d, %edx xorl %r9d, %ebx vpxor %xmm6, %xmm7, %xmm7 # rnd_1: 4 - 4 xorl %edx, %ecx rorxl $22, %r11d, %edx addl %ebx, %r10d vpshufd $0xfa, %xmm0, %xmm6 # rnd_1: 5 - 5 xorl %ecx, %edx movl %r12d, %ebx addl %r10d, %r14d vpxor %xmm8, %xmm7, %xmm5 # rnd_1: 6 - 6 xorl %r11d, %ebx addl %edx, %r10d andl %ebx, %eax vpsrld $10, %xmm6, %xmm8 # rnd_1: 7 - 7 xorl %r12d, %eax rorxl $6, %r14d, %edx addl %eax, %r10d # rnd_0: 0 - 0 movl %r15d, %eax rorxl $11, %r14d, %ecx addl 24(%rsp), %r9d vpsrlq $19, %xmm6, %xmm7 # rnd_0: 1 - 1 xorl %edx, %ecx xorl %r8d, %eax rorxl $25, %r14d, %edx vpsrlq $0x11, %xmm6, %xmm6 vpaddd %xmm1, %xmm4, %xmm4 # rnd_0: 2 - 2 andl %r14d, %eax xorl %ecx, %edx rorxl $13, %r10d, %ecx vpaddd %xmm5, %xmm4, %xmm4 # rnd_0: 3 - 3 addl %edx, %r9d rorxl $2, %r10d, %edx xorl %r8d, %eax vpxor %xmm7, %xmm6, %xmm6 # rnd_0: 4 - 4 xorl %edx, %ecx rorxl $22, %r10d, %edx addl %eax, %r9d vpxor %xmm6, %xmm8, %xmm8 # rnd_0: 5 - 5 xorl %ecx, %edx movl %r11d, %eax addl %r9d, %r13d vpshufb %xmm11, %xmm8, %xmm8 # rnd_0: 6 - 6 xorl %r10d, %eax addl %edx, %r9d andl %eax, %ebx vpaddd %xmm8, %xmm4, %xmm4 # rnd_0: 7 - 7 xorl %r11d, %ebx rorxl $6, %r13d, %edx addl %ebx, %r9d # rnd_1: 0 - 0 movl %r14d, %ebx rorxl $11, %r13d, %ecx addl 28(%rsp), %r8d vpshufd $0x50, %xmm4, %xmm6 # rnd_1: 1 - 1 xorl %edx, %ecx xorl %r15d, %ebx rorxl $25, %r13d, %edx vpsrld $10, %xmm6, %xmm9 # rnd_1: 2 - 2 andl %r13d, %ebx xorl %ecx, %edx rorxl $13, %r9d, %ecx vpsrlq $19, %xmm6, %xmm7 # rnd_1: 3 - 3 addl %edx, %r8d rorxl $2, %r9d, %edx xorl %r15d, %ebx vpsrlq $0x11, %xmm6, %xmm6 # rnd_1: 4 - 4 xorl %edx, %ecx rorxl $22, %r9d, %edx addl %ebx, %r8d vpxor %xmm7, %xmm6, %xmm6 # rnd_1: 5 - 5 xorl %ecx, %edx movl %r10d, %ebx addl %r8d, %r12d vpxor %xmm6, %xmm9, %xmm9 # rnd_1: 6 - 6 xorl %r9d, %ebx addl %edx, %r8d andl %ebx, %eax vpshufb %xmm12, %xmm9, %xmm9 # rnd_1: 7 - 7 xorl %r10d, %eax rorxl $6, %r12d, %edx addl %eax, %r8d vpaddd %xmm4, %xmm9, %xmm1 # msg_sched done: 4-7 # msg_sched: 8-11 # rnd_0: 0 - 0 movl %r13d, %eax rorxl $11, %r12d, %ecx addl 32(%rsp), %r15d vpalignr $4, %xmm0, %xmm1, %xmm4 vpalignr $4, %xmm2, %xmm3, %xmm5 # rnd_0: 1 - 2 xorl %edx, %ecx xorl %r14d, %eax rorxl $25, %r12d, %edx andl %r12d, %eax xorl %ecx, %edx rorxl $13, %r8d, %ecx vpsrld $7, %xmm5, %xmm6 vpslld $25, %xmm5, %xmm7 # rnd_0: 3 - 4 addl %edx, %r15d rorxl $2, %r8d, %edx xorl %r14d, %eax xorl %edx, %ecx rorxl $22, %r8d, %edx addl %eax, %r15d vpsrld $3, %xmm5, %xmm8 vpor %xmm6, %xmm7, %xmm7 # rnd_0: 5 - 7 xorl %ecx, %edx movl %r9d, %eax addl %r15d, %r11d xorl %r8d, %eax addl %edx, %r15d andl %eax, %ebx xorl %r9d, %ebx rorxl $6, %r11d, %edx addl %ebx, %r15d # rnd_1: 0 - 0 movl %r12d, %ebx rorxl $11, %r11d, %ecx addl 36(%rsp), %r14d vpsrld $18, %xmm5, %xmm6 # rnd_1: 1 - 1 xorl %edx, %ecx xorl %r13d, %ebx rorxl $25, %r11d, %edx vpslld $14, %xmm5, %xmm5 # rnd_1: 2 - 2 andl %r11d, %ebx xorl %ecx, %edx rorxl $13, %r15d, %ecx vpxor %xmm5, %xmm7, %xmm7 # rnd_1: 3 - 3 addl %edx, %r14d rorxl $2, %r15d, %edx xorl %r13d, %ebx vpxor %xmm6, %xmm7, %xmm7 # rnd_1: 4 - 4 xorl %edx, %ecx rorxl $22, %r15d, %edx addl %ebx, %r14d vpshufd $0xfa, %xmm1, %xmm6 # rnd_1: 5 - 5 xorl %ecx, %edx movl %r8d, %ebx addl %r14d, %r10d vpxor %xmm8, %xmm7, %xmm5 # rnd_1: 6 - 6 xorl %r15d, %ebx addl %edx, %r14d andl %ebx, %eax vpsrld $10, %xmm6, %xmm8 # rnd_1: 7 - 7 xorl %r8d, %eax rorxl $6, %r10d, %edx addl %eax, %r14d # rnd_0: 0 - 0 movl %r11d, %eax rorxl $11, %r10d, %ecx addl 40(%rsp), %r13d vpsrlq $19, %xmm6, %xmm7 # rnd_0: 1 - 1 xorl %edx, %ecx xorl %r12d, %eax rorxl $25, %r10d, %edx vpsrlq $0x11, %xmm6, %xmm6 vpaddd %xmm2, %xmm4, %xmm4 # rnd_0: 2 - 2 andl %r10d, %eax xorl %ecx, %edx rorxl $13, %r14d, %ecx vpaddd %xmm5, %xmm4, %xmm4 # rnd_0: 3 - 3 addl %edx, %r13d rorxl $2, %r14d, %edx xorl %r12d, %eax vpxor %xmm7, %xmm6, %xmm6 # rnd_0: 4 - 4 xorl %edx, %ecx rorxl $22, %r14d, %edx addl %eax, %r13d vpxor %xmm6, %xmm8, %xmm8 # rnd_0: 5 - 5 xorl %ecx, %edx movl %r15d, %eax addl %r13d, %r9d vpshufb %xmm11, %xmm8, %xmm8 # rnd_0: 6 - 6 xorl %r14d, %eax addl %edx, %r13d andl %eax, %ebx vpaddd %xmm8, %xmm4, %xmm4 # rnd_0: 7 - 7 xorl %r15d, %ebx rorxl $6, %r9d, %edx addl %ebx, %r13d # rnd_1: 0 - 0 movl %r10d, %ebx rorxl $11, %r9d, %ecx addl 44(%rsp), %r12d vpshufd $0x50, %xmm4, %xmm6 # rnd_1: 1 - 1 xorl %edx, %ecx xorl %r11d, %ebx rorxl $25, %r9d, %edx vpsrld $10, %xmm6, %xmm9 # rnd_1: 2 - 2 andl %r9d, %ebx xorl %ecx, %edx rorxl $13, %r13d, %ecx vpsrlq $19, %xmm6, %xmm7 # rnd_1: 3 - 3 addl %edx, %r12d rorxl $2, %r13d, %edx xorl %r11d, %ebx vpsrlq $0x11, %xmm6, %xmm6 # rnd_1: 4 - 4 xorl %edx, %ecx rorxl $22, %r13d, %edx addl %ebx, %r12d vpxor %xmm7, %xmm6, %xmm6 # rnd_1: 5 - 5 xorl %ecx, %edx movl %r14d, %ebx addl %r12d, %r8d vpxor %xmm6, %xmm9, %xmm9 # rnd_1: 6 - 6 xorl %r13d, %ebx addl %edx, %r12d andl %ebx, %eax vpshufb %xmm12, %xmm9, %xmm9 # rnd_1: 7 - 7 xorl %r14d, %eax rorxl $6, %r8d, %edx addl %eax, %r12d vpaddd %xmm4, %xmm9, %xmm2 # msg_sched done: 8-11 # msg_sched: 12-15 # rnd_0: 0 - 0 movl %r9d, %eax rorxl $11, %r8d, %ecx addl 48(%rsp), %r11d vpalignr $4, %xmm1, %xmm2, %xmm4 vpalignr $4, %xmm3, %xmm0, %xmm5 # rnd_0: 1 - 2 xorl %edx, %ecx xorl %r10d, %eax rorxl $25, %r8d, %edx andl %r8d, %eax xorl %ecx, %edx rorxl $13, %r12d, %ecx vpsrld $7, %xmm5, %xmm6 vpslld $25, %xmm5, %xmm7 # rnd_0: 3 - 4 addl %edx, %r11d rorxl $2, %r12d, %edx xorl %r10d, %eax xorl %edx, %ecx rorxl $22, %r12d, %edx addl %eax, %r11d vpsrld $3, %xmm5, %xmm8 vpor %xmm6, %xmm7, %xmm7 # rnd_0: 5 - 7 xorl %ecx, %edx movl %r13d, %eax addl %r11d, %r15d xorl %r12d, %eax addl %edx, %r11d andl %eax, %ebx xorl %r13d, %ebx rorxl $6, %r15d, %edx addl %ebx, %r11d # rnd_1: 0 - 0 movl %r8d, %ebx rorxl $11, %r15d, %ecx addl 52(%rsp), %r10d vpsrld $18, %xmm5, %xmm6 # rnd_1: 1 - 1 xorl %edx, %ecx xorl %r9d, %ebx rorxl $25, %r15d, %edx vpslld $14, %xmm5, %xmm5 # rnd_1: 2 - 2 andl %r15d, %ebx xorl %ecx, %edx rorxl $13, %r11d, %ecx vpxor %xmm5, %xmm7, %xmm7 # rnd_1: 3 - 3 addl %edx, %r10d rorxl $2, %r11d, %edx xorl %r9d, %ebx vpxor %xmm6, %xmm7, %xmm7 # rnd_1: 4 - 4 xorl %edx, %ecx rorxl $22, %r11d, %edx addl %ebx, %r10d vpshufd $0xfa, %xmm2, %xmm6 # rnd_1: 5 - 5 xorl %ecx, %edx movl %r12d, %ebx addl %r10d, %r14d vpxor %xmm8, %xmm7, %xmm5 # rnd_1: 6 - 6 xorl %r11d, %ebx addl %edx, %r10d andl %ebx, %eax vpsrld $10, %xmm6, %xmm8 # rnd_1: 7 - 7 xorl %r12d, %eax rorxl $6, %r14d, %edx addl %eax, %r10d # rnd_0: 0 - 0 movl %r15d, %eax rorxl $11, %r14d, %ecx addl 56(%rsp), %r9d vpsrlq $19, %xmm6, %xmm7 # rnd_0: 1 - 1 xorl %edx, %ecx xorl %r8d, %eax rorxl $25, %r14d, %edx vpsrlq $0x11, %xmm6, %xmm6 vpaddd %xmm3, %xmm4, %xmm4 # rnd_0: 2 - 2 andl %r14d, %eax xorl %ecx, %edx rorxl $13, %r10d, %ecx vpaddd %xmm5, %xmm4, %xmm4 # rnd_0: 3 - 3 addl %edx, %r9d rorxl $2, %r10d, %edx xorl %r8d, %eax vpxor %xmm7, %xmm6, %xmm6 # rnd_0: 4 - 4 xorl %edx, %ecx rorxl $22, %r10d, %edx addl %eax, %r9d vpxor %xmm6, %xmm8, %xmm8 # rnd_0: 5 - 5 xorl %ecx, %edx movl %r11d, %eax addl %r9d, %r13d vpshufb %xmm11, %xmm8, %xmm8 # rnd_0: 6 - 6 xorl %r10d, %eax addl %edx, %r9d andl %eax, %ebx vpaddd %xmm8, %xmm4, %xmm4 # rnd_0: 7 - 7 xorl %r11d, %ebx rorxl $6, %r13d, %edx addl %ebx, %r9d # rnd_1: 0 - 0 movl %r14d, %ebx rorxl $11, %r13d, %ecx addl 60(%rsp), %r8d vpshufd $0x50, %xmm4, %xmm6 # rnd_1: 1 - 1 xorl %edx, %ecx xorl %r15d, %ebx rorxl $25, %r13d, %edx vpsrld $10, %xmm6, %xmm9 # rnd_1: 2 - 2 andl %r13d, %ebx xorl %ecx, %edx rorxl $13, %r9d, %ecx vpsrlq $19, %xmm6, %xmm7 # rnd_1: 3 - 3 addl %edx, %r8d rorxl $2, %r9d, %edx xorl %r15d, %ebx vpsrlq $0x11, %xmm6, %xmm6 # rnd_1: 4 - 4 xorl %edx, %ecx rorxl $22, %r9d, %edx addl %ebx, %r8d vpxor %xmm7, %xmm6, %xmm6 # rnd_1: 5 - 5 xorl %ecx, %edx movl %r10d, %ebx addl %r8d, %r12d vpxor %xmm6, %xmm9, %xmm9 # rnd_1: 6 - 6 xorl %r9d, %ebx addl %edx, %r8d andl %ebx, %eax vpshufb %xmm12, %xmm9, %xmm9 # rnd_1: 7 - 7 xorl %r10d, %eax rorxl $6, %r12d, %edx addl %eax, %r8d vpaddd %xmm4, %xmm9, %xmm3 # msg_sched done: 12-15 # set_w_k_xfer_4: 4 vpaddd 64+L_avx1_rorx_sha256_k(%rip), %xmm0, %xmm4 vpaddd 80+L_avx1_rorx_sha256_k(%rip), %xmm1, %xmm5 vmovdqu %xmm4, (%rsp) vmovdqu %xmm5, 16(%rsp) vpaddd 96+L_avx1_rorx_sha256_k(%rip), %xmm2, %xmm6 vpaddd 112+L_avx1_rorx_sha256_k(%rip), %xmm3, %xmm7 vmovdqu %xmm6, 32(%rsp) vmovdqu %xmm7, 48(%rsp) # msg_sched: 0-3 # rnd_0: 0 - 0 movl %r13d, %eax rorxl $11, %r12d, %ecx addl (%rsp), %r15d vpalignr $4, %xmm2, %xmm3, %xmm4 vpalignr $4, %xmm0, %xmm1, %xmm5 # rnd_0: 1 - 2 xorl %edx, %ecx xorl %r14d, %eax rorxl $25, %r12d, %edx andl %r12d, %eax xorl %ecx, %edx rorxl $13, %r8d, %ecx vpsrld $7, %xmm5, %xmm6 vpslld $25, %xmm5, %xmm7 # rnd_0: 3 - 4 addl %edx, %r15d rorxl $2, %r8d, %edx xorl %r14d, %eax xorl %edx, %ecx rorxl $22, %r8d, %edx addl %eax, %r15d vpsrld $3, %xmm5, %xmm8 vpor %xmm6, %xmm7, %xmm7 # rnd_0: 5 - 7 xorl %ecx, %edx movl %r9d, %eax addl %r15d, %r11d xorl %r8d, %eax addl %edx, %r15d andl %eax, %ebx xorl %r9d, %ebx rorxl $6, %r11d, %edx addl %ebx, %r15d # rnd_1: 0 - 0 movl %r12d, %ebx rorxl $11, %r11d, %ecx addl 4(%rsp), %r14d vpsrld $18, %xmm5, %xmm6 # rnd_1: 1 - 1 xorl %edx, %ecx xorl %r13d, %ebx rorxl $25, %r11d, %edx vpslld $14, %xmm5, %xmm5 # rnd_1: 2 - 2 andl %r11d, %ebx xorl %ecx, %edx rorxl $13, %r15d, %ecx vpxor %xmm5, %xmm7, %xmm7 # rnd_1: 3 - 3 addl %edx, %r14d rorxl $2, %r15d, %edx xorl %r13d, %ebx vpxor %xmm6, %xmm7, %xmm7 # rnd_1: 4 - 4 xorl %edx, %ecx rorxl $22, %r15d, %edx addl %ebx, %r14d vpshufd $0xfa, %xmm3, %xmm6 # rnd_1: 5 - 5 xorl %ecx, %edx movl %r8d, %ebx addl %r14d, %r10d vpxor %xmm8, %xmm7, %xmm5 # rnd_1: 6 - 6 xorl %r15d, %ebx addl %edx, %r14d andl %ebx, %eax vpsrld $10, %xmm6, %xmm8 # rnd_1: 7 - 7 xorl %r8d, %eax rorxl $6, %r10d, %edx addl %eax, %r14d # rnd_0: 0 - 0 movl %r11d, %eax rorxl $11, %r10d, %ecx addl 8(%rsp), %r13d vpsrlq $19, %xmm6, %xmm7 # rnd_0: 1 - 1 xorl %edx, %ecx xorl %r12d, %eax rorxl $25, %r10d, %edx vpsrlq $0x11, %xmm6, %xmm6 vpaddd %xmm0, %xmm4, %xmm4 # rnd_0: 2 - 2 andl %r10d, %eax xorl %ecx, %edx rorxl $13, %r14d, %ecx vpaddd %xmm5, %xmm4, %xmm4 # rnd_0: 3 - 3 addl %edx, %r13d rorxl $2, %r14d, %edx xorl %r12d, %eax vpxor %xmm7, %xmm6, %xmm6 # rnd_0: 4 - 4 xorl %edx, %ecx rorxl $22, %r14d, %edx addl %eax, %r13d vpxor %xmm6, %xmm8, %xmm8 # rnd_0: 5 - 5 xorl %ecx, %edx movl %r15d, %eax addl %r13d, %r9d vpshufb %xmm11, %xmm8, %xmm8 # rnd_0: 6 - 6 xorl %r14d, %eax addl %edx, %r13d andl %eax, %ebx vpaddd %xmm8, %xmm4, %xmm4 # rnd_0: 7 - 7 xorl %r15d, %ebx rorxl $6, %r9d, %edx addl %ebx, %r13d # rnd_1: 0 - 0 movl %r10d, %ebx rorxl $11, %r9d, %ecx addl 12(%rsp), %r12d vpshufd $0x50, %xmm4, %xmm6 # rnd_1: 1 - 1 xorl %edx, %ecx xorl %r11d, %ebx rorxl $25, %r9d, %edx vpsrld $10, %xmm6, %xmm9 # rnd_1: 2 - 2 andl %r9d, %ebx xorl %ecx, %edx rorxl $13, %r13d, %ecx vpsrlq $19, %xmm6, %xmm7 # rnd_1: 3 - 3 addl %edx, %r12d rorxl $2, %r13d, %edx xorl %r11d, %ebx vpsrlq $0x11, %xmm6, %xmm6 # rnd_1: 4 - 4 xorl %edx, %ecx rorxl $22, %r13d, %edx addl %ebx, %r12d vpxor %xmm7, %xmm6, %xmm6 # rnd_1: 5 - 5 xorl %ecx, %edx movl %r14d, %ebx addl %r12d, %r8d vpxor %xmm6, %xmm9, %xmm9 # rnd_1: 6 - 6 xorl %r13d, %ebx addl %edx, %r12d andl %ebx, %eax vpshufb %xmm12, %xmm9, %xmm9 # rnd_1: 7 - 7 xorl %r14d, %eax rorxl $6, %r8d, %edx addl %eax, %r12d vpaddd %xmm4, %xmm9, %xmm0 # msg_sched done: 0-3 # msg_sched: 4-7 # rnd_0: 0 - 0 movl %r9d, %eax rorxl $11, %r8d, %ecx addl 16(%rsp), %r11d vpalignr $4, %xmm3, %xmm0, %xmm4 vpalignr $4, %xmm1, %xmm2, %xmm5 # rnd_0: 1 - 2 xorl %edx, %ecx xorl %r10d, %eax rorxl $25, %r8d, %edx andl %r8d, %eax xorl %ecx, %edx rorxl $13, %r12d, %ecx vpsrld $7, %xmm5, %xmm6 vpslld $25, %xmm5, %xmm7 # rnd_0: 3 - 4 addl %edx, %r11d rorxl $2, %r12d, %edx xorl %r10d, %eax xorl %edx, %ecx rorxl $22, %r12d, %edx addl %eax, %r11d vpsrld $3, %xmm5, %xmm8 vpor %xmm6, %xmm7, %xmm7 # rnd_0: 5 - 7 xorl %ecx, %edx movl %r13d, %eax addl %r11d, %r15d xorl %r12d, %eax addl %edx, %r11d andl %eax, %ebx xorl %r13d, %ebx rorxl $6, %r15d, %edx addl %ebx, %r11d # rnd_1: 0 - 0 movl %r8d, %ebx rorxl $11, %r15d, %ecx addl 20(%rsp), %r10d vpsrld $18, %xmm5, %xmm6 # rnd_1: 1 - 1 xorl %edx, %ecx xorl %r9d, %ebx rorxl $25, %r15d, %edx vpslld $14, %xmm5, %xmm5 # rnd_1: 2 - 2 andl %r15d, %ebx xorl %ecx, %edx rorxl $13, %r11d, %ecx vpxor %xmm5, %xmm7, %xmm7 # rnd_1: 3 - 3 addl %edx, %r10d rorxl $2, %r11d, %edx xorl %r9d, %ebx vpxor %xmm6, %xmm7, %xmm7 # rnd_1: 4 - 4 xorl %edx, %ecx rorxl $22, %r11d, %edx addl %ebx, %r10d vpshufd $0xfa, %xmm0, %xmm6 # rnd_1: 5 - 5 xorl %ecx, %edx movl %r12d, %ebx addl %r10d, %r14d vpxor %xmm8, %xmm7, %xmm5 # rnd_1: 6 - 6 xorl %r11d, %ebx addl %edx, %r10d andl %ebx, %eax vpsrld $10, %xmm6, %xmm8 # rnd_1: 7 - 7 xorl %r12d, %eax rorxl $6, %r14d, %edx addl %eax, %r10d # rnd_0: 0 - 0 movl %r15d, %eax rorxl $11, %r14d, %ecx addl 24(%rsp), %r9d vpsrlq $19, %xmm6, %xmm7 # rnd_0: 1 - 1 xorl %edx, %ecx xorl %r8d, %eax rorxl $25, %r14d, %edx vpsrlq $0x11, %xmm6, %xmm6 vpaddd %xmm1, %xmm4, %xmm4 # rnd_0: 2 - 2 andl %r14d, %eax xorl %ecx, %edx rorxl $13, %r10d, %ecx vpaddd %xmm5, %xmm4, %xmm4 # rnd_0: 3 - 3 addl %edx, %r9d rorxl $2, %r10d, %edx xorl %r8d, %eax vpxor %xmm7, %xmm6, %xmm6 # rnd_0: 4 - 4 xorl %edx, %ecx rorxl $22, %r10d, %edx addl %eax, %r9d vpxor %xmm6, %xmm8, %xmm8 # rnd_0: 5 - 5 xorl %ecx, %edx movl %r11d, %eax addl %r9d, %r13d vpshufb %xmm11, %xmm8, %xmm8 # rnd_0: 6 - 6 xorl %r10d, %eax addl %edx, %r9d andl %eax, %ebx vpaddd %xmm8, %xmm4, %xmm4 # rnd_0: 7 - 7 xorl %r11d, %ebx rorxl $6, %r13d, %edx addl %ebx, %r9d # rnd_1: 0 - 0 movl %r14d, %ebx rorxl $11, %r13d, %ecx addl 28(%rsp), %r8d vpshufd $0x50, %xmm4, %xmm6 # rnd_1: 1 - 1 xorl %edx, %ecx xorl %r15d, %ebx rorxl $25, %r13d, %edx vpsrld $10, %xmm6, %xmm9 # rnd_1: 2 - 2 andl %r13d, %ebx xorl %ecx, %edx rorxl $13, %r9d, %ecx vpsrlq $19, %xmm6, %xmm7 # rnd_1: 3 - 3 addl %edx, %r8d rorxl $2, %r9d, %edx xorl %r15d, %ebx vpsrlq $0x11, %xmm6, %xmm6 # rnd_1: 4 - 4 xorl %edx, %ecx rorxl $22, %r9d, %edx addl %ebx, %r8d vpxor %xmm7, %xmm6, %xmm6 # rnd_1: 5 - 5 xorl %ecx, %edx movl %r10d, %ebx addl %r8d, %r12d vpxor %xmm6, %xmm9, %xmm9 # rnd_1: 6 - 6 xorl %r9d, %ebx addl %edx, %r8d andl %ebx, %eax vpshufb %xmm12, %xmm9, %xmm9 # rnd_1: 7 - 7 xorl %r10d, %eax rorxl $6, %r12d, %edx addl %eax, %r8d vpaddd %xmm4, %xmm9, %xmm1 # msg_sched done: 4-7 # msg_sched: 8-11 # rnd_0: 0 - 0 movl %r13d, %eax rorxl $11, %r12d, %ecx addl 32(%rsp), %r15d vpalignr $4, %xmm0, %xmm1, %xmm4 vpalignr $4, %xmm2, %xmm3, %xmm5 # rnd_0: 1 - 2 xorl %edx, %ecx xorl %r14d, %eax rorxl $25, %r12d, %edx andl %r12d, %eax xorl %ecx, %edx rorxl $13, %r8d, %ecx vpsrld $7, %xmm5, %xmm6 vpslld $25, %xmm5, %xmm7 # rnd_0: 3 - 4 addl %edx, %r15d rorxl $2, %r8d, %edx xorl %r14d, %eax xorl %edx, %ecx rorxl $22, %r8d, %edx addl %eax, %r15d vpsrld $3, %xmm5, %xmm8 vpor %xmm6, %xmm7, %xmm7 # rnd_0: 5 - 7 xorl %ecx, %edx movl %r9d, %eax addl %r15d, %r11d xorl %r8d, %eax addl %edx, %r15d andl %eax, %ebx xorl %r9d, %ebx rorxl $6, %r11d, %edx addl %ebx, %r15d # rnd_1: 0 - 0 movl %r12d, %ebx rorxl $11, %r11d, %ecx addl 36(%rsp), %r14d vpsrld $18, %xmm5, %xmm6 # rnd_1: 1 - 1 xorl %edx, %ecx xorl %r13d, %ebx rorxl $25, %r11d, %edx vpslld $14, %xmm5, %xmm5 # rnd_1: 2 - 2 andl %r11d, %ebx xorl %ecx, %edx rorxl $13, %r15d, %ecx vpxor %xmm5, %xmm7, %xmm7 # rnd_1: 3 - 3 addl %edx, %r14d rorxl $2, %r15d, %edx xorl %r13d, %ebx vpxor %xmm6, %xmm7, %xmm7 # rnd_1: 4 - 4 xorl %edx, %ecx rorxl $22, %r15d, %edx addl %ebx, %r14d vpshufd $0xfa, %xmm1, %xmm6 # rnd_1: 5 - 5 xorl %ecx, %edx movl %r8d, %ebx addl %r14d, %r10d vpxor %xmm8, %xmm7, %xmm5 # rnd_1: 6 - 6 xorl %r15d, %ebx addl %edx, %r14d andl %ebx, %eax vpsrld $10, %xmm6, %xmm8 # rnd_1: 7 - 7 xorl %r8d, %eax rorxl $6, %r10d, %edx addl %eax, %r14d # rnd_0: 0 - 0 movl %r11d, %eax rorxl $11, %r10d, %ecx addl 40(%rsp), %r13d vpsrlq $19, %xmm6, %xmm7 # rnd_0: 1 - 1 xorl %edx, %ecx xorl %r12d, %eax rorxl $25, %r10d, %edx vpsrlq $0x11, %xmm6, %xmm6 vpaddd %xmm2, %xmm4, %xmm4 # rnd_0: 2 - 2 andl %r10d, %eax xorl %ecx, %edx rorxl $13, %r14d, %ecx vpaddd %xmm5, %xmm4, %xmm4 # rnd_0: 3 - 3 addl %edx, %r13d rorxl $2, %r14d, %edx xorl %r12d, %eax vpxor %xmm7, %xmm6, %xmm6 # rnd_0: 4 - 4 xorl %edx, %ecx rorxl $22, %r14d, %edx addl %eax, %r13d vpxor %xmm6, %xmm8, %xmm8 # rnd_0: 5 - 5 xorl %ecx, %edx movl %r15d, %eax addl %r13d, %r9d vpshufb %xmm11, %xmm8, %xmm8 # rnd_0: 6 - 6 xorl %r14d, %eax addl %edx, %r13d andl %eax, %ebx vpaddd %xmm8, %xmm4, %xmm4 # rnd_0: 7 - 7 xorl %r15d, %ebx rorxl $6, %r9d, %edx addl %ebx, %r13d # rnd_1: 0 - 0 movl %r10d, %ebx rorxl $11, %r9d, %ecx addl 44(%rsp), %r12d vpshufd $0x50, %xmm4, %xmm6 # rnd_1: 1 - 1 xorl %edx, %ecx xorl %r11d, %ebx rorxl $25, %r9d, %edx vpsrld $10, %xmm6, %xmm9 # rnd_1: 2 - 2 andl %r9d, %ebx xorl %ecx, %edx rorxl $13, %r13d, %ecx vpsrlq $19, %xmm6, %xmm7 # rnd_1: 3 - 3 addl %edx, %r12d rorxl $2, %r13d, %edx xorl %r11d, %ebx vpsrlq $0x11, %xmm6, %xmm6 # rnd_1: 4 - 4 xorl %edx, %ecx rorxl $22, %r13d, %edx addl %ebx, %r12d vpxor %xmm7, %xmm6, %xmm6 # rnd_1: 5 - 5 xorl %ecx, %edx movl %r14d, %ebx addl %r12d, %r8d vpxor %xmm6, %xmm9, %xmm9 # rnd_1: 6 - 6 xorl %r13d, %ebx addl %edx, %r12d andl %ebx, %eax vpshufb %xmm12, %xmm9, %xmm9 # rnd_1: 7 - 7 xorl %r14d, %eax rorxl $6, %r8d, %edx addl %eax, %r12d vpaddd %xmm4, %xmm9, %xmm2 # msg_sched done: 8-11 # msg_sched: 12-15 # rnd_0: 0 - 0 movl %r9d, %eax rorxl $11, %r8d, %ecx addl 48(%rsp), %r11d vpalignr $4, %xmm1, %xmm2, %xmm4 vpalignr $4, %xmm3, %xmm0, %xmm5 # rnd_0: 1 - 2 xorl %edx, %ecx xorl %r10d, %eax rorxl $25, %r8d, %edx andl %r8d, %eax xorl %ecx, %edx rorxl $13, %r12d, %ecx vpsrld $7, %xmm5, %xmm6 vpslld $25, %xmm5, %xmm7 # rnd_0: 3 - 4 addl %edx, %r11d rorxl $2, %r12d, %edx xorl %r10d, %eax xorl %edx, %ecx rorxl $22, %r12d, %edx addl %eax, %r11d vpsrld $3, %xmm5, %xmm8 vpor %xmm6, %xmm7, %xmm7 # rnd_0: 5 - 7 xorl %ecx, %edx movl %r13d, %eax addl %r11d, %r15d xorl %r12d, %eax addl %edx, %r11d andl %eax, %ebx xorl %r13d, %ebx rorxl $6, %r15d, %edx addl %ebx, %r11d # rnd_1: 0 - 0 movl %r8d, %ebx rorxl $11, %r15d, %ecx addl 52(%rsp), %r10d vpsrld $18, %xmm5, %xmm6 # rnd_1: 1 - 1 xorl %edx, %ecx xorl %r9d, %ebx rorxl $25, %r15d, %edx vpslld $14, %xmm5, %xmm5 # rnd_1: 2 - 2 andl %r15d, %ebx xorl %ecx, %edx rorxl $13, %r11d, %ecx vpxor %xmm5, %xmm7, %xmm7 # rnd_1: 3 - 3 addl %edx, %r10d rorxl $2, %r11d, %edx xorl %r9d, %ebx vpxor %xmm6, %xmm7, %xmm7 # rnd_1: 4 - 4 xorl %edx, %ecx rorxl $22, %r11d, %edx addl %ebx, %r10d vpshufd $0xfa, %xmm2, %xmm6 # rnd_1: 5 - 5 xorl %ecx, %edx movl %r12d, %ebx addl %r10d, %r14d vpxor %xmm8, %xmm7, %xmm5 # rnd_1: 6 - 6 xorl %r11d, %ebx addl %edx, %r10d andl %ebx, %eax vpsrld $10, %xmm6, %xmm8 # rnd_1: 7 - 7 xorl %r12d, %eax rorxl $6, %r14d, %edx addl %eax, %r10d # rnd_0: 0 - 0 movl %r15d, %eax rorxl $11, %r14d, %ecx addl 56(%rsp), %r9d vpsrlq $19, %xmm6, %xmm7 # rnd_0: 1 - 1 xorl %edx, %ecx xorl %r8d, %eax rorxl $25, %r14d, %edx vpsrlq $0x11, %xmm6, %xmm6 vpaddd %xmm3, %xmm4, %xmm4 # rnd_0: 2 - 2 andl %r14d, %eax xorl %ecx, %edx rorxl $13, %r10d, %ecx vpaddd %xmm5, %xmm4, %xmm4 # rnd_0: 3 - 3 addl %edx, %r9d rorxl $2, %r10d, %edx xorl %r8d, %eax vpxor %xmm7, %xmm6, %xmm6 # rnd_0: 4 - 4 xorl %edx, %ecx rorxl $22, %r10d, %edx addl %eax, %r9d vpxor %xmm6, %xmm8, %xmm8 # rnd_0: 5 - 5 xorl %ecx, %edx movl %r11d, %eax addl %r9d, %r13d vpshufb %xmm11, %xmm8, %xmm8 # rnd_0: 6 - 6 xorl %r10d, %eax addl %edx, %r9d andl %eax, %ebx vpaddd %xmm8, %xmm4, %xmm4 # rnd_0: 7 - 7 xorl %r11d, %ebx rorxl $6, %r13d, %edx addl %ebx, %r9d # rnd_1: 0 - 0 movl %r14d, %ebx rorxl $11, %r13d, %ecx addl 60(%rsp), %r8d vpshufd $0x50, %xmm4, %xmm6 # rnd_1: 1 - 1 xorl %edx, %ecx xorl %r15d, %ebx rorxl $25, %r13d, %edx vpsrld $10, %xmm6, %xmm9 # rnd_1: 2 - 2 andl %r13d, %ebx xorl %ecx, %edx rorxl $13, %r9d, %ecx vpsrlq $19, %xmm6, %xmm7 # rnd_1: 3 - 3 addl %edx, %r8d rorxl $2, %r9d, %edx xorl %r15d, %ebx vpsrlq $0x11, %xmm6, %xmm6 # rnd_1: 4 - 4 xorl %edx, %ecx rorxl $22, %r9d, %edx addl %ebx, %r8d vpxor %xmm7, %xmm6, %xmm6 # rnd_1: 5 - 5 xorl %ecx, %edx movl %r10d, %ebx addl %r8d, %r12d vpxor %xmm6, %xmm9, %xmm9 # rnd_1: 6 - 6 xorl %r9d, %ebx addl %edx, %r8d andl %ebx, %eax vpshufb %xmm12, %xmm9, %xmm9 # rnd_1: 7 - 7 xorl %r10d, %eax rorxl $6, %r12d, %edx addl %eax, %r8d vpaddd %xmm4, %xmm9, %xmm3 # msg_sched done: 12-15 # set_w_k_xfer_4: 8 vpaddd 128+L_avx1_rorx_sha256_k(%rip), %xmm0, %xmm4 vpaddd 144+L_avx1_rorx_sha256_k(%rip), %xmm1, %xmm5 vmovdqu %xmm4, (%rsp) vmovdqu %xmm5, 16(%rsp) vpaddd 160+L_avx1_rorx_sha256_k(%rip), %xmm2, %xmm6 vpaddd 176+L_avx1_rorx_sha256_k(%rip), %xmm3, %xmm7 vmovdqu %xmm6, 32(%rsp) vmovdqu %xmm7, 48(%rsp) # msg_sched: 0-3 # rnd_0: 0 - 0 movl %r13d, %eax rorxl $11, %r12d, %ecx addl (%rsp), %r15d vpalignr $4, %xmm2, %xmm3, %xmm4 vpalignr $4, %xmm0, %xmm1, %xmm5 # rnd_0: 1 - 2 xorl %edx, %ecx xorl %r14d, %eax rorxl $25, %r12d, %edx andl %r12d, %eax xorl %ecx, %edx rorxl $13, %r8d, %ecx vpsrld $7, %xmm5, %xmm6 vpslld $25, %xmm5, %xmm7 # rnd_0: 3 - 4 addl %edx, %r15d rorxl $2, %r8d, %edx xorl %r14d, %eax xorl %edx, %ecx rorxl $22, %r8d, %edx addl %eax, %r15d vpsrld $3, %xmm5, %xmm8 vpor %xmm6, %xmm7, %xmm7 # rnd_0: 5 - 7 xorl %ecx, %edx movl %r9d, %eax addl %r15d, %r11d xorl %r8d, %eax addl %edx, %r15d andl %eax, %ebx xorl %r9d, %ebx rorxl $6, %r11d, %edx addl %ebx, %r15d # rnd_1: 0 - 0 movl %r12d, %ebx rorxl $11, %r11d, %ecx addl 4(%rsp), %r14d vpsrld $18, %xmm5, %xmm6 # rnd_1: 1 - 1 xorl %edx, %ecx xorl %r13d, %ebx rorxl $25, %r11d, %edx vpslld $14, %xmm5, %xmm5 # rnd_1: 2 - 2 andl %r11d, %ebx xorl %ecx, %edx rorxl $13, %r15d, %ecx vpxor %xmm5, %xmm7, %xmm7 # rnd_1: 3 - 3 addl %edx, %r14d rorxl $2, %r15d, %edx xorl %r13d, %ebx vpxor %xmm6, %xmm7, %xmm7 # rnd_1: 4 - 4 xorl %edx, %ecx rorxl $22, %r15d, %edx addl %ebx, %r14d vpshufd $0xfa, %xmm3, %xmm6 # rnd_1: 5 - 5 xorl %ecx, %edx movl %r8d, %ebx addl %r14d, %r10d vpxor %xmm8, %xmm7, %xmm5 # rnd_1: 6 - 6 xorl %r15d, %ebx addl %edx, %r14d andl %ebx, %eax vpsrld $10, %xmm6, %xmm8 # rnd_1: 7 - 7 xorl %r8d, %eax rorxl $6, %r10d, %edx addl %eax, %r14d # rnd_0: 0 - 0 movl %r11d, %eax rorxl $11, %r10d, %ecx addl 8(%rsp), %r13d vpsrlq $19, %xmm6, %xmm7 # rnd_0: 1 - 1 xorl %edx, %ecx xorl %r12d, %eax rorxl $25, %r10d, %edx vpsrlq $0x11, %xmm6, %xmm6 vpaddd %xmm0, %xmm4, %xmm4 # rnd_0: 2 - 2 andl %r10d, %eax xorl %ecx, %edx rorxl $13, %r14d, %ecx vpaddd %xmm5, %xmm4, %xmm4 # rnd_0: 3 - 3 addl %edx, %r13d rorxl $2, %r14d, %edx xorl %r12d, %eax vpxor %xmm7, %xmm6, %xmm6 # rnd_0: 4 - 4 xorl %edx, %ecx rorxl $22, %r14d, %edx addl %eax, %r13d vpxor %xmm6, %xmm8, %xmm8 # rnd_0: 5 - 5 xorl %ecx, %edx movl %r15d, %eax addl %r13d, %r9d vpshufb %xmm11, %xmm8, %xmm8 # rnd_0: 6 - 6 xorl %r14d, %eax addl %edx, %r13d andl %eax, %ebx vpaddd %xmm8, %xmm4, %xmm4 # rnd_0: 7 - 7 xorl %r15d, %ebx rorxl $6, %r9d, %edx addl %ebx, %r13d # rnd_1: 0 - 0 movl %r10d, %ebx rorxl $11, %r9d, %ecx addl 12(%rsp), %r12d vpshufd $0x50, %xmm4, %xmm6 # rnd_1: 1 - 1 xorl %edx, %ecx xorl %r11d, %ebx rorxl $25, %r9d, %edx vpsrld $10, %xmm6, %xmm9 # rnd_1: 2 - 2 andl %r9d, %ebx xorl %ecx, %edx rorxl $13, %r13d, %ecx vpsrlq $19, %xmm6, %xmm7 # rnd_1: 3 - 3 addl %edx, %r12d rorxl $2, %r13d, %edx xorl %r11d, %ebx vpsrlq $0x11, %xmm6, %xmm6 # rnd_1: 4 - 4 xorl %edx, %ecx rorxl $22, %r13d, %edx addl %ebx, %r12d vpxor %xmm7, %xmm6, %xmm6 # rnd_1: 5 - 5 xorl %ecx, %edx movl %r14d, %ebx addl %r12d, %r8d vpxor %xmm6, %xmm9, %xmm9 # rnd_1: 6 - 6 xorl %r13d, %ebx addl %edx, %r12d andl %ebx, %eax vpshufb %xmm12, %xmm9, %xmm9 # rnd_1: 7 - 7 xorl %r14d, %eax rorxl $6, %r8d, %edx addl %eax, %r12d vpaddd %xmm4, %xmm9, %xmm0 # msg_sched done: 0-3 # msg_sched: 4-7 # rnd_0: 0 - 0 movl %r9d, %eax rorxl $11, %r8d, %ecx addl 16(%rsp), %r11d vpalignr $4, %xmm3, %xmm0, %xmm4 vpalignr $4, %xmm1, %xmm2, %xmm5 # rnd_0: 1 - 2 xorl %edx, %ecx xorl %r10d, %eax rorxl $25, %r8d, %edx andl %r8d, %eax xorl %ecx, %edx rorxl $13, %r12d, %ecx vpsrld $7, %xmm5, %xmm6 vpslld $25, %xmm5, %xmm7 # rnd_0: 3 - 4 addl %edx, %r11d rorxl $2, %r12d, %edx xorl %r10d, %eax xorl %edx, %ecx rorxl $22, %r12d, %edx addl %eax, %r11d vpsrld $3, %xmm5, %xmm8 vpor %xmm6, %xmm7, %xmm7 # rnd_0: 5 - 7 xorl %ecx, %edx movl %r13d, %eax addl %r11d, %r15d xorl %r12d, %eax addl %edx, %r11d andl %eax, %ebx xorl %r13d, %ebx rorxl $6, %r15d, %edx addl %ebx, %r11d # rnd_1: 0 - 0 movl %r8d, %ebx rorxl $11, %r15d, %ecx addl 20(%rsp), %r10d vpsrld $18, %xmm5, %xmm6 # rnd_1: 1 - 1 xorl %edx, %ecx xorl %r9d, %ebx rorxl $25, %r15d, %edx vpslld $14, %xmm5, %xmm5 # rnd_1: 2 - 2 andl %r15d, %ebx xorl %ecx, %edx rorxl $13, %r11d, %ecx vpxor %xmm5, %xmm7, %xmm7 # rnd_1: 3 - 3 addl %edx, %r10d rorxl $2, %r11d, %edx xorl %r9d, %ebx vpxor %xmm6, %xmm7, %xmm7 # rnd_1: 4 - 4 xorl %edx, %ecx rorxl $22, %r11d, %edx addl %ebx, %r10d vpshufd $0xfa, %xmm0, %xmm6 # rnd_1: 5 - 5 xorl %ecx, %edx movl %r12d, %ebx addl %r10d, %r14d vpxor %xmm8, %xmm7, %xmm5 # rnd_1: 6 - 6 xorl %r11d, %ebx addl %edx, %r10d andl %ebx, %eax vpsrld $10, %xmm6, %xmm8 # rnd_1: 7 - 7 xorl %r12d, %eax rorxl $6, %r14d, %edx addl %eax, %r10d # rnd_0: 0 - 0 movl %r15d, %eax rorxl $11, %r14d, %ecx addl 24(%rsp), %r9d vpsrlq $19, %xmm6, %xmm7 # rnd_0: 1 - 1 xorl %edx, %ecx xorl %r8d, %eax rorxl $25, %r14d, %edx vpsrlq $0x11, %xmm6, %xmm6 vpaddd %xmm1, %xmm4, %xmm4 # rnd_0: 2 - 2 andl %r14d, %eax xorl %ecx, %edx rorxl $13, %r10d, %ecx vpaddd %xmm5, %xmm4, %xmm4 # rnd_0: 3 - 3 addl %edx, %r9d rorxl $2, %r10d, %edx xorl %r8d, %eax vpxor %xmm7, %xmm6, %xmm6 # rnd_0: 4 - 4 xorl %edx, %ecx rorxl $22, %r10d, %edx addl %eax, %r9d vpxor %xmm6, %xmm8, %xmm8 # rnd_0: 5 - 5 xorl %ecx, %edx movl %r11d, %eax addl %r9d, %r13d vpshufb %xmm11, %xmm8, %xmm8 # rnd_0: 6 - 6 xorl %r10d, %eax addl %edx, %r9d andl %eax, %ebx vpaddd %xmm8, %xmm4, %xmm4 # rnd_0: 7 - 7 xorl %r11d, %ebx rorxl $6, %r13d, %edx addl %ebx, %r9d # rnd_1: 0 - 0 movl %r14d, %ebx rorxl $11, %r13d, %ecx addl 28(%rsp), %r8d vpshufd $0x50, %xmm4, %xmm6 # rnd_1: 1 - 1 xorl %edx, %ecx xorl %r15d, %ebx rorxl $25, %r13d, %edx vpsrld $10, %xmm6, %xmm9 # rnd_1: 2 - 2 andl %r13d, %ebx xorl %ecx, %edx rorxl $13, %r9d, %ecx vpsrlq $19, %xmm6, %xmm7 # rnd_1: 3 - 3 addl %edx, %r8d rorxl $2, %r9d, %edx xorl %r15d, %ebx vpsrlq $0x11, %xmm6, %xmm6 # rnd_1: 4 - 4 xorl %edx, %ecx rorxl $22, %r9d, %edx addl %ebx, %r8d vpxor %xmm7, %xmm6, %xmm6 # rnd_1: 5 - 5 xorl %ecx, %edx movl %r10d, %ebx addl %r8d, %r12d vpxor %xmm6, %xmm9, %xmm9 # rnd_1: 6 - 6 xorl %r9d, %ebx addl %edx, %r8d andl %ebx, %eax vpshufb %xmm12, %xmm9, %xmm9 # rnd_1: 7 - 7 xorl %r10d, %eax rorxl $6, %r12d, %edx addl %eax, %r8d vpaddd %xmm4, %xmm9, %xmm1 # msg_sched done: 4-7 # msg_sched: 8-11 # rnd_0: 0 - 0 movl %r13d, %eax rorxl $11, %r12d, %ecx addl 32(%rsp), %r15d vpalignr $4, %xmm0, %xmm1, %xmm4 vpalignr $4, %xmm2, %xmm3, %xmm5 # rnd_0: 1 - 2 xorl %edx, %ecx xorl %r14d, %eax rorxl $25, %r12d, %edx andl %r12d, %eax xorl %ecx, %edx rorxl $13, %r8d, %ecx vpsrld $7, %xmm5, %xmm6 vpslld $25, %xmm5, %xmm7 # rnd_0: 3 - 4 addl %edx, %r15d rorxl $2, %r8d, %edx xorl %r14d, %eax xorl %edx, %ecx rorxl $22, %r8d, %edx addl %eax, %r15d vpsrld $3, %xmm5, %xmm8 vpor %xmm6, %xmm7, %xmm7 # rnd_0: 5 - 7 xorl %ecx, %edx movl %r9d, %eax addl %r15d, %r11d xorl %r8d, %eax addl %edx, %r15d andl %eax, %ebx xorl %r9d, %ebx rorxl $6, %r11d, %edx addl %ebx, %r15d # rnd_1: 0 - 0 movl %r12d, %ebx rorxl $11, %r11d, %ecx addl 36(%rsp), %r14d vpsrld $18, %xmm5, %xmm6 # rnd_1: 1 - 1 xorl %edx, %ecx xorl %r13d, %ebx rorxl $25, %r11d, %edx vpslld $14, %xmm5, %xmm5 # rnd_1: 2 - 2 andl %r11d, %ebx xorl %ecx, %edx rorxl $13, %r15d, %ecx vpxor %xmm5, %xmm7, %xmm7 # rnd_1: 3 - 3 addl %edx, %r14d rorxl $2, %r15d, %edx xorl %r13d, %ebx vpxor %xmm6, %xmm7, %xmm7 # rnd_1: 4 - 4 xorl %edx, %ecx rorxl $22, %r15d, %edx addl %ebx, %r14d vpshufd $0xfa, %xmm1, %xmm6 # rnd_1: 5 - 5 xorl %ecx, %edx movl %r8d, %ebx addl %r14d, %r10d vpxor %xmm8, %xmm7, %xmm5 # rnd_1: 6 - 6 xorl %r15d, %ebx addl %edx, %r14d andl %ebx, %eax vpsrld $10, %xmm6, %xmm8 # rnd_1: 7 - 7 xorl %r8d, %eax rorxl $6, %r10d, %edx addl %eax, %r14d # rnd_0: 0 - 0 movl %r11d, %eax rorxl $11, %r10d, %ecx addl 40(%rsp), %r13d vpsrlq $19, %xmm6, %xmm7 # rnd_0: 1 - 1 xorl %edx, %ecx xorl %r12d, %eax rorxl $25, %r10d, %edx vpsrlq $0x11, %xmm6, %xmm6 vpaddd %xmm2, %xmm4, %xmm4 # rnd_0: 2 - 2 andl %r10d, %eax xorl %ecx, %edx rorxl $13, %r14d, %ecx vpaddd %xmm5, %xmm4, %xmm4 # rnd_0: 3 - 3 addl %edx, %r13d rorxl $2, %r14d, %edx xorl %r12d, %eax vpxor %xmm7, %xmm6, %xmm6 # rnd_0: 4 - 4 xorl %edx, %ecx rorxl $22, %r14d, %edx addl %eax, %r13d vpxor %xmm6, %xmm8, %xmm8 # rnd_0: 5 - 5 xorl %ecx, %edx movl %r15d, %eax addl %r13d, %r9d vpshufb %xmm11, %xmm8, %xmm8 # rnd_0: 6 - 6 xorl %r14d, %eax addl %edx, %r13d andl %eax, %ebx vpaddd %xmm8, %xmm4, %xmm4 # rnd_0: 7 - 7 xorl %r15d, %ebx rorxl $6, %r9d, %edx addl %ebx, %r13d # rnd_1: 0 - 0 movl %r10d, %ebx rorxl $11, %r9d, %ecx addl 44(%rsp), %r12d vpshufd $0x50, %xmm4, %xmm6 # rnd_1: 1 - 1 xorl %edx, %ecx xorl %r11d, %ebx rorxl $25, %r9d, %edx vpsrld $10, %xmm6, %xmm9 # rnd_1: 2 - 2 andl %r9d, %ebx xorl %ecx, %edx rorxl $13, %r13d, %ecx vpsrlq $19, %xmm6, %xmm7 # rnd_1: 3 - 3 addl %edx, %r12d rorxl $2, %r13d, %edx xorl %r11d, %ebx vpsrlq $0x11, %xmm6, %xmm6 # rnd_1: 4 - 4 xorl %edx, %ecx rorxl $22, %r13d, %edx addl %ebx, %r12d vpxor %xmm7, %xmm6, %xmm6 # rnd_1: 5 - 5 xorl %ecx, %edx movl %r14d, %ebx addl %r12d, %r8d vpxor %xmm6, %xmm9, %xmm9 # rnd_1: 6 - 6 xorl %r13d, %ebx addl %edx, %r12d andl %ebx, %eax vpshufb %xmm12, %xmm9, %xmm9 # rnd_1: 7 - 7 xorl %r14d, %eax rorxl $6, %r8d, %edx addl %eax, %r12d vpaddd %xmm4, %xmm9, %xmm2 # msg_sched done: 8-11 # msg_sched: 12-15 # rnd_0: 0 - 0 movl %r9d, %eax rorxl $11, %r8d, %ecx addl 48(%rsp), %r11d vpalignr $4, %xmm1, %xmm2, %xmm4 vpalignr $4, %xmm3, %xmm0, %xmm5 # rnd_0: 1 - 2 xorl %edx, %ecx xorl %r10d, %eax rorxl $25, %r8d, %edx andl %r8d, %eax xorl %ecx, %edx rorxl $13, %r12d, %ecx vpsrld $7, %xmm5, %xmm6 vpslld $25, %xmm5, %xmm7 # rnd_0: 3 - 4 addl %edx, %r11d rorxl $2, %r12d, %edx xorl %r10d, %eax xorl %edx, %ecx rorxl $22, %r12d, %edx addl %eax, %r11d vpsrld $3, %xmm5, %xmm8 vpor %xmm6, %xmm7, %xmm7 # rnd_0: 5 - 7 xorl %ecx, %edx movl %r13d, %eax addl %r11d, %r15d xorl %r12d, %eax addl %edx, %r11d andl %eax, %ebx xorl %r13d, %ebx rorxl $6, %r15d, %edx addl %ebx, %r11d # rnd_1: 0 - 0 movl %r8d, %ebx rorxl $11, %r15d, %ecx addl 52(%rsp), %r10d vpsrld $18, %xmm5, %xmm6 # rnd_1: 1 - 1 xorl %edx, %ecx xorl %r9d, %ebx rorxl $25, %r15d, %edx vpslld $14, %xmm5, %xmm5 # rnd_1: 2 - 2 andl %r15d, %ebx xorl %ecx, %edx rorxl $13, %r11d, %ecx vpxor %xmm5, %xmm7, %xmm7 # rnd_1: 3 - 3 addl %edx, %r10d rorxl $2, %r11d, %edx xorl %r9d, %ebx vpxor %xmm6, %xmm7, %xmm7 # rnd_1: 4 - 4 xorl %edx, %ecx rorxl $22, %r11d, %edx addl %ebx, %r10d vpshufd $0xfa, %xmm2, %xmm6 # rnd_1: 5 - 5 xorl %ecx, %edx movl %r12d, %ebx addl %r10d, %r14d vpxor %xmm8, %xmm7, %xmm5 # rnd_1: 6 - 6 xorl %r11d, %ebx addl %edx, %r10d andl %ebx, %eax vpsrld $10, %xmm6, %xmm8 # rnd_1: 7 - 7 xorl %r12d, %eax rorxl $6, %r14d, %edx addl %eax, %r10d # rnd_0: 0 - 0 movl %r15d, %eax rorxl $11, %r14d, %ecx addl 56(%rsp), %r9d vpsrlq $19, %xmm6, %xmm7 # rnd_0: 1 - 1 xorl %edx, %ecx xorl %r8d, %eax rorxl $25, %r14d, %edx vpsrlq $0x11, %xmm6, %xmm6 vpaddd %xmm3, %xmm4, %xmm4 # rnd_0: 2 - 2 andl %r14d, %eax xorl %ecx, %edx rorxl $13, %r10d, %ecx vpaddd %xmm5, %xmm4, %xmm4 # rnd_0: 3 - 3 addl %edx, %r9d rorxl $2, %r10d, %edx xorl %r8d, %eax vpxor %xmm7, %xmm6, %xmm6 # rnd_0: 4 - 4 xorl %edx, %ecx rorxl $22, %r10d, %edx addl %eax, %r9d vpxor %xmm6, %xmm8, %xmm8 # rnd_0: 5 - 5 xorl %ecx, %edx movl %r11d, %eax addl %r9d, %r13d vpshufb %xmm11, %xmm8, %xmm8 # rnd_0: 6 - 6 xorl %r10d, %eax addl %edx, %r9d andl %eax, %ebx vpaddd %xmm8, %xmm4, %xmm4 # rnd_0: 7 - 7 xorl %r11d, %ebx rorxl $6, %r13d, %edx addl %ebx, %r9d # rnd_1: 0 - 0 movl %r14d, %ebx rorxl $11, %r13d, %ecx addl 60(%rsp), %r8d vpshufd $0x50, %xmm4, %xmm6 # rnd_1: 1 - 1 xorl %edx, %ecx xorl %r15d, %ebx rorxl $25, %r13d, %edx vpsrld $10, %xmm6, %xmm9 # rnd_1: 2 - 2 andl %r13d, %ebx xorl %ecx, %edx rorxl $13, %r9d, %ecx vpsrlq $19, %xmm6, %xmm7 # rnd_1: 3 - 3 addl %edx, %r8d rorxl $2, %r9d, %edx xorl %r15d, %ebx vpsrlq $0x11, %xmm6, %xmm6 # rnd_1: 4 - 4 xorl %edx, %ecx rorxl $22, %r9d, %edx addl %ebx, %r8d vpxor %xmm7, %xmm6, %xmm6 # rnd_1: 5 - 5 xorl %ecx, %edx movl %r10d, %ebx addl %r8d, %r12d vpxor %xmm6, %xmm9, %xmm9 # rnd_1: 6 - 6 xorl %r9d, %ebx addl %edx, %r8d andl %ebx, %eax vpshufb %xmm12, %xmm9, %xmm9 # rnd_1: 7 - 7 xorl %r10d, %eax rorxl $6, %r12d, %edx addl %eax, %r8d vpaddd %xmm4, %xmm9, %xmm3 # msg_sched done: 12-15 # set_w_k_xfer_4: 12 vpaddd 192+L_avx1_rorx_sha256_k(%rip), %xmm0, %xmm4 vpaddd 208+L_avx1_rorx_sha256_k(%rip), %xmm1, %xmm5 vmovdqu %xmm4, (%rsp) vmovdqu %xmm5, 16(%rsp) vpaddd 224+L_avx1_rorx_sha256_k(%rip), %xmm2, %xmm6 vpaddd 240+L_avx1_rorx_sha256_k(%rip), %xmm3, %xmm7 vmovdqu %xmm6, 32(%rsp) vmovdqu %xmm7, 48(%rsp) xorl %eax, %eax # rnd_all_4: 0-3 rorxl $6, %r12d, %edx rorxl $11, %r12d, %ecx addl %eax, %r8d addl (%rsp), %r15d movl %r13d, %eax xorl %edx, %ecx xorl %r14d, %eax rorxl $25, %r12d, %edx xorl %ecx, %edx andl %r12d, %eax addl %edx, %r15d rorxl $2, %r8d, %edx rorxl $13, %r8d, %ecx xorl %r14d, %eax xorl %edx, %ecx rorxl $22, %r8d, %edx addl %eax, %r15d xorl %ecx, %edx movl %r9d, %eax addl %r15d, %r11d xorl %r8d, %eax andl %eax, %ebx addl %edx, %r15d xorl %r9d, %ebx rorxl $6, %r11d, %edx rorxl $11, %r11d, %ecx addl %ebx, %r15d addl 4(%rsp), %r14d movl %r12d, %ebx xorl %edx, %ecx xorl %r13d, %ebx rorxl $25, %r11d, %edx xorl %ecx, %edx andl %r11d, %ebx addl %edx, %r14d rorxl $2, %r15d, %edx rorxl $13, %r15d, %ecx xorl %r13d, %ebx xorl %edx, %ecx rorxl $22, %r15d, %edx addl %ebx, %r14d xorl %ecx, %edx movl %r8d, %ebx addl %r14d, %r10d xorl %r15d, %ebx andl %ebx, %eax addl %edx, %r14d xorl %r8d, %eax rorxl $6, %r10d, %edx rorxl $11, %r10d, %ecx addl %eax, %r14d addl 8(%rsp), %r13d movl %r11d, %eax xorl %edx, %ecx xorl %r12d, %eax rorxl $25, %r10d, %edx xorl %ecx, %edx andl %r10d, %eax addl %edx, %r13d rorxl $2, %r14d, %edx rorxl $13, %r14d, %ecx xorl %r12d, %eax xorl %edx, %ecx rorxl $22, %r14d, %edx addl %eax, %r13d xorl %ecx, %edx movl %r15d, %eax addl %r13d, %r9d xorl %r14d, %eax andl %eax, %ebx addl %edx, %r13d xorl %r15d, %ebx rorxl $6, %r9d, %edx rorxl $11, %r9d, %ecx addl %ebx, %r13d addl 12(%rsp), %r12d movl %r10d, %ebx xorl %edx, %ecx xorl %r11d, %ebx rorxl $25, %r9d, %edx xorl %ecx, %edx andl %r9d, %ebx addl %edx, %r12d rorxl $2, %r13d, %edx rorxl $13, %r13d, %ecx xorl %r11d, %ebx xorl %edx, %ecx rorxl $22, %r13d, %edx addl %ebx, %r12d xorl %ecx, %edx movl %r14d, %ebx addl %r12d, %r8d xorl %r13d, %ebx andl %ebx, %eax addl %edx, %r12d xorl %r14d, %eax # rnd_all_4: 1-4 rorxl $6, %r8d, %edx rorxl $11, %r8d, %ecx addl %eax, %r12d addl 16(%rsp), %r11d movl %r9d, %eax xorl %edx, %ecx xorl %r10d, %eax rorxl $25, %r8d, %edx xorl %ecx, %edx andl %r8d, %eax addl %edx, %r11d rorxl $2, %r12d, %edx rorxl $13, %r12d, %ecx xorl %r10d, %eax xorl %edx, %ecx rorxl $22, %r12d, %edx addl %eax, %r11d xorl %ecx, %edx movl %r13d, %eax addl %r11d, %r15d xorl %r12d, %eax andl %eax, %ebx addl %edx, %r11d xorl %r13d, %ebx rorxl $6, %r15d, %edx rorxl $11, %r15d, %ecx addl %ebx, %r11d addl 20(%rsp), %r10d movl %r8d, %ebx xorl %edx, %ecx xorl %r9d, %ebx rorxl $25, %r15d, %edx xorl %ecx, %edx andl %r15d, %ebx addl %edx, %r10d rorxl $2, %r11d, %edx rorxl $13, %r11d, %ecx xorl %r9d, %ebx xorl %edx, %ecx rorxl $22, %r11d, %edx addl %ebx, %r10d xorl %ecx, %edx movl %r12d, %ebx addl %r10d, %r14d xorl %r11d, %ebx andl %ebx, %eax addl %edx, %r10d xorl %r12d, %eax rorxl $6, %r14d, %edx rorxl $11, %r14d, %ecx addl %eax, %r10d addl 24(%rsp), %r9d movl %r15d, %eax xorl %edx, %ecx xorl %r8d, %eax rorxl $25, %r14d, %edx xorl %ecx, %edx andl %r14d, %eax addl %edx, %r9d rorxl $2, %r10d, %edx rorxl $13, %r10d, %ecx xorl %r8d, %eax xorl %edx, %ecx rorxl $22, %r10d, %edx addl %eax, %r9d xorl %ecx, %edx movl %r11d, %eax addl %r9d, %r13d xorl %r10d, %eax andl %eax, %ebx addl %edx, %r9d xorl %r11d, %ebx rorxl $6, %r13d, %edx rorxl $11, %r13d, %ecx addl %ebx, %r9d addl 28(%rsp), %r8d movl %r14d, %ebx xorl %edx, %ecx xorl %r15d, %ebx rorxl $25, %r13d, %edx xorl %ecx, %edx andl %r13d, %ebx addl %edx, %r8d rorxl $2, %r9d, %edx rorxl $13, %r9d, %ecx xorl %r15d, %ebx xorl %edx, %ecx rorxl $22, %r9d, %edx addl %ebx, %r8d xorl %ecx, %edx movl %r10d, %ebx addl %r8d, %r12d xorl %r9d, %ebx andl %ebx, %eax addl %edx, %r8d xorl %r10d, %eax # rnd_all_4: 2-5 rorxl $6, %r12d, %edx rorxl $11, %r12d, %ecx addl %eax, %r8d addl 32(%rsp), %r15d movl %r13d, %eax xorl %edx, %ecx xorl %r14d, %eax rorxl $25, %r12d, %edx xorl %ecx, %edx andl %r12d, %eax addl %edx, %r15d rorxl $2, %r8d, %edx rorxl $13, %r8d, %ecx xorl %r14d, %eax xorl %edx, %ecx rorxl $22, %r8d, %edx addl %eax, %r15d xorl %ecx, %edx movl %r9d, %eax addl %r15d, %r11d xorl %r8d, %eax andl %eax, %ebx addl %edx, %r15d xorl %r9d, %ebx rorxl $6, %r11d, %edx rorxl $11, %r11d, %ecx addl %ebx, %r15d addl 36(%rsp), %r14d movl %r12d, %ebx xorl %edx, %ecx xorl %r13d, %ebx rorxl $25, %r11d, %edx xorl %ecx, %edx andl %r11d, %ebx addl %edx, %r14d rorxl $2, %r15d, %edx rorxl $13, %r15d, %ecx xorl %r13d, %ebx xorl %edx, %ecx rorxl $22, %r15d, %edx addl %ebx, %r14d xorl %ecx, %edx movl %r8d, %ebx addl %r14d, %r10d xorl %r15d, %ebx andl %ebx, %eax addl %edx, %r14d xorl %r8d, %eax rorxl $6, %r10d, %edx rorxl $11, %r10d, %ecx addl %eax, %r14d addl 40(%rsp), %r13d movl %r11d, %eax xorl %edx, %ecx xorl %r12d, %eax rorxl $25, %r10d, %edx xorl %ecx, %edx andl %r10d, %eax addl %edx, %r13d rorxl $2, %r14d, %edx rorxl $13, %r14d, %ecx xorl %r12d, %eax xorl %edx, %ecx rorxl $22, %r14d, %edx addl %eax, %r13d xorl %ecx, %edx movl %r15d, %eax addl %r13d, %r9d xorl %r14d, %eax andl %eax, %ebx addl %edx, %r13d xorl %r15d, %ebx rorxl $6, %r9d, %edx rorxl $11, %r9d, %ecx addl %ebx, %r13d addl 44(%rsp), %r12d movl %r10d, %ebx xorl %edx, %ecx xorl %r11d, %ebx rorxl $25, %r9d, %edx xorl %ecx, %edx andl %r9d, %ebx addl %edx, %r12d rorxl $2, %r13d, %edx rorxl $13, %r13d, %ecx xorl %r11d, %ebx xorl %edx, %ecx rorxl $22, %r13d, %edx addl %ebx, %r12d xorl %ecx, %edx movl %r14d, %ebx addl %r12d, %r8d xorl %r13d, %ebx andl %ebx, %eax addl %edx, %r12d xorl %r14d, %eax # rnd_all_4: 3-6 rorxl $6, %r8d, %edx rorxl $11, %r8d, %ecx addl %eax, %r12d addl 48(%rsp), %r11d movl %r9d, %eax xorl %edx, %ecx xorl %r10d, %eax rorxl $25, %r8d, %edx xorl %ecx, %edx andl %r8d, %eax addl %edx, %r11d rorxl $2, %r12d, %edx rorxl $13, %r12d, %ecx xorl %r10d, %eax xorl %edx, %ecx rorxl $22, %r12d, %edx addl %eax, %r11d xorl %ecx, %edx movl %r13d, %eax addl %r11d, %r15d xorl %r12d, %eax andl %eax, %ebx addl %edx, %r11d xorl %r13d, %ebx rorxl $6, %r15d, %edx rorxl $11, %r15d, %ecx addl %ebx, %r11d addl 52(%rsp), %r10d movl %r8d, %ebx xorl %edx, %ecx xorl %r9d, %ebx rorxl $25, %r15d, %edx xorl %ecx, %edx andl %r15d, %ebx addl %edx, %r10d rorxl $2, %r11d, %edx rorxl $13, %r11d, %ecx xorl %r9d, %ebx xorl %edx, %ecx rorxl $22, %r11d, %edx addl %ebx, %r10d xorl %ecx, %edx movl %r12d, %ebx addl %r10d, %r14d xorl %r11d, %ebx andl %ebx, %eax addl %edx, %r10d xorl %r12d, %eax rorxl $6, %r14d, %edx rorxl $11, %r14d, %ecx addl %eax, %r10d addl 56(%rsp), %r9d movl %r15d, %eax xorl %edx, %ecx xorl %r8d, %eax rorxl $25, %r14d, %edx xorl %ecx, %edx andl %r14d, %eax addl %edx, %r9d rorxl $2, %r10d, %edx rorxl $13, %r10d, %ecx xorl %r8d, %eax xorl %edx, %ecx rorxl $22, %r10d, %edx addl %eax, %r9d xorl %ecx, %edx movl %r11d, %eax addl %r9d, %r13d xorl %r10d, %eax andl %eax, %ebx addl %edx, %r9d xorl %r11d, %ebx rorxl $6, %r13d, %edx rorxl $11, %r13d, %ecx addl %ebx, %r9d addl 60(%rsp), %r8d movl %r14d, %ebx xorl %edx, %ecx xorl %r15d, %ebx rorxl $25, %r13d, %edx xorl %ecx, %edx andl %r13d, %ebx addl %edx, %r8d rorxl $2, %r9d, %edx rorxl $13, %r9d, %ecx xorl %r15d, %ebx xorl %edx, %ecx rorxl $22, %r9d, %edx addl %ebx, %r8d xorl %ecx, %edx movl %r10d, %ebx addl %r8d, %r12d xorl %r9d, %ebx andl %ebx, %eax addl %edx, %r8d xorl %r10d, %eax addl %eax, %r8d addl %r8d, (%rdi) addl %r9d, 4(%rdi) addl %r10d, 8(%rdi) addl %r11d, 12(%rdi) addl %r12d, 16(%rdi) addl %r13d, 20(%rdi) addl %r14d, 24(%rdi) addl %r15d, 28(%rdi) xorq %rax, %rax vzeroupper addq $0x40, %rsp popq %r15 popq %r14 popq %r13 popq %r12 popq %rbx repz retq #ifndef __APPLE__ .size Transform_Sha256_AVX1_RORX,.-Transform_Sha256_AVX1_RORX #endif /* __APPLE__ */ #ifndef __APPLE__ .text .globl Transform_Sha256_AVX1_RORX_Len .type Transform_Sha256_AVX1_RORX_Len,@function .align 16 Transform_Sha256_AVX1_RORX_Len: #else .section __TEXT,__text .globl _Transform_Sha256_AVX1_RORX_Len .p2align 4 _Transform_Sha256_AVX1_RORX_Len: #endif /* __APPLE__ */ pushq %rbx pushq %r12 pushq %r13 pushq %r14 pushq %r15 pushq %rbp movq %rsi, %rbp movq %rdx, %rsi subq $0x40, %rsp vmovdqa L_avx1_rorx_sha256_flip_mask(%rip), %xmm13 vmovdqa L_avx1_rorx_sha256_shuf_00BA(%rip), %xmm11 vmovdqa L_avx1_rorx_sha256_shuf_DC00(%rip), %xmm12 movl (%rdi), %r8d movl 4(%rdi), %r9d movl 8(%rdi), %r10d movl 12(%rdi), %r11d movl 16(%rdi), %r12d movl 20(%rdi), %r13d movl 24(%rdi), %r14d movl 28(%rdi), %r15d # Start of loop processing a block L_sha256_len_avx1_len_rorx_start: # X0, X1, X2, X3 = W[0..15] vmovdqu (%rbp), %xmm0 vmovdqu 16(%rbp), %xmm1 vpshufb %xmm13, %xmm0, %xmm0 vpshufb %xmm13, %xmm1, %xmm1 vmovdqu 32(%rbp), %xmm2 vmovdqu 48(%rbp), %xmm3 vpshufb %xmm13, %xmm2, %xmm2 vpshufb %xmm13, %xmm3, %xmm3 # set_w_k_xfer_4: 0 vpaddd 0+L_avx1_rorx_sha256_k(%rip), %xmm0, %xmm4 vpaddd 16+L_avx1_rorx_sha256_k(%rip), %xmm1, %xmm5 vmovdqu %xmm4, (%rsp) vmovdqu %xmm5, 16(%rsp) vpaddd 32+L_avx1_rorx_sha256_k(%rip), %xmm2, %xmm6 vpaddd 48+L_avx1_rorx_sha256_k(%rip), %xmm3, %xmm7 vmovdqu %xmm6, 32(%rsp) vmovdqu %xmm7, 48(%rsp) movl %r9d, %ebx rorxl $6, %r12d, %edx xorl %r10d, %ebx # msg_sched: 0-3 # rnd_0: 0 - 0 movl %r13d, %eax rorxl $11, %r12d, %ecx addl (%rsp), %r15d vpalignr $4, %xmm2, %xmm3, %xmm4 vpalignr $4, %xmm0, %xmm1, %xmm5 # rnd_0: 1 - 2 xorl %edx, %ecx xorl %r14d, %eax rorxl $25, %r12d, %edx andl %r12d, %eax xorl %ecx, %edx rorxl $13, %r8d, %ecx vpsrld $7, %xmm5, %xmm6 vpslld $25, %xmm5, %xmm7 # rnd_0: 3 - 4 addl %edx, %r15d rorxl $2, %r8d, %edx xorl %r14d, %eax xorl %edx, %ecx rorxl $22, %r8d, %edx addl %eax, %r15d vpsrld $3, %xmm5, %xmm8 vpor %xmm6, %xmm7, %xmm7 # rnd_0: 5 - 7 xorl %ecx, %edx movl %r9d, %eax addl %r15d, %r11d xorl %r8d, %eax addl %edx, %r15d andl %eax, %ebx xorl %r9d, %ebx rorxl $6, %r11d, %edx addl %ebx, %r15d # rnd_1: 0 - 0 movl %r12d, %ebx rorxl $11, %r11d, %ecx addl 4(%rsp), %r14d vpsrld $18, %xmm5, %xmm6 # rnd_1: 1 - 1 xorl %edx, %ecx xorl %r13d, %ebx rorxl $25, %r11d, %edx vpslld $14, %xmm5, %xmm5 # rnd_1: 2 - 2 andl %r11d, %ebx xorl %ecx, %edx rorxl $13, %r15d, %ecx vpxor %xmm5, %xmm7, %xmm7 # rnd_1: 3 - 3 addl %edx, %r14d rorxl $2, %r15d, %edx xorl %r13d, %ebx vpxor %xmm6, %xmm7, %xmm7 # rnd_1: 4 - 4 xorl %edx, %ecx rorxl $22, %r15d, %edx addl %ebx, %r14d vpshufd $0xfa, %xmm3, %xmm6 # rnd_1: 5 - 5 xorl %ecx, %edx movl %r8d, %ebx addl %r14d, %r10d vpxor %xmm8, %xmm7, %xmm5 # rnd_1: 6 - 6 xorl %r15d, %ebx addl %edx, %r14d andl %ebx, %eax vpsrld $10, %xmm6, %xmm8 # rnd_1: 7 - 7 xorl %r8d, %eax rorxl $6, %r10d, %edx addl %eax, %r14d # rnd_0: 0 - 0 movl %r11d, %eax rorxl $11, %r10d, %ecx addl 8(%rsp), %r13d vpsrlq $19, %xmm6, %xmm7 # rnd_0: 1 - 1 xorl %edx, %ecx xorl %r12d, %eax rorxl $25, %r10d, %edx vpsrlq $0x11, %xmm6, %xmm6 vpaddd %xmm0, %xmm4, %xmm4 # rnd_0: 2 - 2 andl %r10d, %eax xorl %ecx, %edx rorxl $13, %r14d, %ecx vpaddd %xmm5, %xmm4, %xmm4 # rnd_0: 3 - 3 addl %edx, %r13d rorxl $2, %r14d, %edx xorl %r12d, %eax vpxor %xmm7, %xmm6, %xmm6 # rnd_0: 4 - 4 xorl %edx, %ecx rorxl $22, %r14d, %edx addl %eax, %r13d vpxor %xmm6, %xmm8, %xmm8 # rnd_0: 5 - 5 xorl %ecx, %edx movl %r15d, %eax addl %r13d, %r9d vpshufb %xmm11, %xmm8, %xmm8 # rnd_0: 6 - 6 xorl %r14d, %eax addl %edx, %r13d andl %eax, %ebx vpaddd %xmm8, %xmm4, %xmm4 # rnd_0: 7 - 7 xorl %r15d, %ebx rorxl $6, %r9d, %edx addl %ebx, %r13d # rnd_1: 0 - 0 movl %r10d, %ebx rorxl $11, %r9d, %ecx addl 12(%rsp), %r12d vpshufd $0x50, %xmm4, %xmm6 # rnd_1: 1 - 1 xorl %edx, %ecx xorl %r11d, %ebx rorxl $25, %r9d, %edx vpsrld $10, %xmm6, %xmm9 # rnd_1: 2 - 2 andl %r9d, %ebx xorl %ecx, %edx rorxl $13, %r13d, %ecx vpsrlq $19, %xmm6, %xmm7 # rnd_1: 3 - 3 addl %edx, %r12d rorxl $2, %r13d, %edx xorl %r11d, %ebx vpsrlq $0x11, %xmm6, %xmm6 # rnd_1: 4 - 4 xorl %edx, %ecx rorxl $22, %r13d, %edx addl %ebx, %r12d vpxor %xmm7, %xmm6, %xmm6 # rnd_1: 5 - 5 xorl %ecx, %edx movl %r14d, %ebx addl %r12d, %r8d vpxor %xmm6, %xmm9, %xmm9 # rnd_1: 6 - 6 xorl %r13d, %ebx addl %edx, %r12d andl %ebx, %eax vpshufb %xmm12, %xmm9, %xmm9 # rnd_1: 7 - 7 xorl %r14d, %eax rorxl $6, %r8d, %edx addl %eax, %r12d vpaddd %xmm4, %xmm9, %xmm0 # msg_sched done: 0-3 # msg_sched: 4-7 # rnd_0: 0 - 0 movl %r9d, %eax rorxl $11, %r8d, %ecx addl 16(%rsp), %r11d vpalignr $4, %xmm3, %xmm0, %xmm4 vpalignr $4, %xmm1, %xmm2, %xmm5 # rnd_0: 1 - 2 xorl %edx, %ecx xorl %r10d, %eax rorxl $25, %r8d, %edx andl %r8d, %eax xorl %ecx, %edx rorxl $13, %r12d, %ecx vpsrld $7, %xmm5, %xmm6 vpslld $25, %xmm5, %xmm7 # rnd_0: 3 - 4 addl %edx, %r11d rorxl $2, %r12d, %edx xorl %r10d, %eax xorl %edx, %ecx rorxl $22, %r12d, %edx addl %eax, %r11d vpsrld $3, %xmm5, %xmm8 vpor %xmm6, %xmm7, %xmm7 # rnd_0: 5 - 7 xorl %ecx, %edx movl %r13d, %eax addl %r11d, %r15d xorl %r12d, %eax addl %edx, %r11d andl %eax, %ebx xorl %r13d, %ebx rorxl $6, %r15d, %edx addl %ebx, %r11d # rnd_1: 0 - 0 movl %r8d, %ebx rorxl $11, %r15d, %ecx addl 20(%rsp), %r10d vpsrld $18, %xmm5, %xmm6 # rnd_1: 1 - 1 xorl %edx, %ecx xorl %r9d, %ebx rorxl $25, %r15d, %edx vpslld $14, %xmm5, %xmm5 # rnd_1: 2 - 2 andl %r15d, %ebx xorl %ecx, %edx rorxl $13, %r11d, %ecx vpxor %xmm5, %xmm7, %xmm7 # rnd_1: 3 - 3 addl %edx, %r10d rorxl $2, %r11d, %edx xorl %r9d, %ebx vpxor %xmm6, %xmm7, %xmm7 # rnd_1: 4 - 4 xorl %edx, %ecx rorxl $22, %r11d, %edx addl %ebx, %r10d vpshufd $0xfa, %xmm0, %xmm6 # rnd_1: 5 - 5 xorl %ecx, %edx movl %r12d, %ebx addl %r10d, %r14d vpxor %xmm8, %xmm7, %xmm5 # rnd_1: 6 - 6 xorl %r11d, %ebx addl %edx, %r10d andl %ebx, %eax vpsrld $10, %xmm6, %xmm8 # rnd_1: 7 - 7 xorl %r12d, %eax rorxl $6, %r14d, %edx addl %eax, %r10d # rnd_0: 0 - 0 movl %r15d, %eax rorxl $11, %r14d, %ecx addl 24(%rsp), %r9d vpsrlq $19, %xmm6, %xmm7 # rnd_0: 1 - 1 xorl %edx, %ecx xorl %r8d, %eax rorxl $25, %r14d, %edx vpsrlq $0x11, %xmm6, %xmm6 vpaddd %xmm1, %xmm4, %xmm4 # rnd_0: 2 - 2 andl %r14d, %eax xorl %ecx, %edx rorxl $13, %r10d, %ecx vpaddd %xmm5, %xmm4, %xmm4 # rnd_0: 3 - 3 addl %edx, %r9d rorxl $2, %r10d, %edx xorl %r8d, %eax vpxor %xmm7, %xmm6, %xmm6 # rnd_0: 4 - 4 xorl %edx, %ecx rorxl $22, %r10d, %edx addl %eax, %r9d vpxor %xmm6, %xmm8, %xmm8 # rnd_0: 5 - 5 xorl %ecx, %edx movl %r11d, %eax addl %r9d, %r13d vpshufb %xmm11, %xmm8, %xmm8 # rnd_0: 6 - 6 xorl %r10d, %eax addl %edx, %r9d andl %eax, %ebx vpaddd %xmm8, %xmm4, %xmm4 # rnd_0: 7 - 7 xorl %r11d, %ebx rorxl $6, %r13d, %edx addl %ebx, %r9d # rnd_1: 0 - 0 movl %r14d, %ebx rorxl $11, %r13d, %ecx addl 28(%rsp), %r8d vpshufd $0x50, %xmm4, %xmm6 # rnd_1: 1 - 1 xorl %edx, %ecx xorl %r15d, %ebx rorxl $25, %r13d, %edx vpsrld $10, %xmm6, %xmm9 # rnd_1: 2 - 2 andl %r13d, %ebx xorl %ecx, %edx rorxl $13, %r9d, %ecx vpsrlq $19, %xmm6, %xmm7 # rnd_1: 3 - 3 addl %edx, %r8d rorxl $2, %r9d, %edx xorl %r15d, %ebx vpsrlq $0x11, %xmm6, %xmm6 # rnd_1: 4 - 4 xorl %edx, %ecx rorxl $22, %r9d, %edx addl %ebx, %r8d vpxor %xmm7, %xmm6, %xmm6 # rnd_1: 5 - 5 xorl %ecx, %edx movl %r10d, %ebx addl %r8d, %r12d vpxor %xmm6, %xmm9, %xmm9 # rnd_1: 6 - 6 xorl %r9d, %ebx addl %edx, %r8d andl %ebx, %eax vpshufb %xmm12, %xmm9, %xmm9 # rnd_1: 7 - 7 xorl %r10d, %eax rorxl $6, %r12d, %edx addl %eax, %r8d vpaddd %xmm4, %xmm9, %xmm1 # msg_sched done: 4-7 # msg_sched: 8-11 # rnd_0: 0 - 0 movl %r13d, %eax rorxl $11, %r12d, %ecx addl 32(%rsp), %r15d vpalignr $4, %xmm0, %xmm1, %xmm4 vpalignr $4, %xmm2, %xmm3, %xmm5 # rnd_0: 1 - 2 xorl %edx, %ecx xorl %r14d, %eax rorxl $25, %r12d, %edx andl %r12d, %eax xorl %ecx, %edx rorxl $13, %r8d, %ecx vpsrld $7, %xmm5, %xmm6 vpslld $25, %xmm5, %xmm7 # rnd_0: 3 - 4 addl %edx, %r15d rorxl $2, %r8d, %edx xorl %r14d, %eax xorl %edx, %ecx rorxl $22, %r8d, %edx addl %eax, %r15d vpsrld $3, %xmm5, %xmm8 vpor %xmm6, %xmm7, %xmm7 # rnd_0: 5 - 7 xorl %ecx, %edx movl %r9d, %eax addl %r15d, %r11d xorl %r8d, %eax addl %edx, %r15d andl %eax, %ebx xorl %r9d, %ebx rorxl $6, %r11d, %edx addl %ebx, %r15d # rnd_1: 0 - 0 movl %r12d, %ebx rorxl $11, %r11d, %ecx addl 36(%rsp), %r14d vpsrld $18, %xmm5, %xmm6 # rnd_1: 1 - 1 xorl %edx, %ecx xorl %r13d, %ebx rorxl $25, %r11d, %edx vpslld $14, %xmm5, %xmm5 # rnd_1: 2 - 2 andl %r11d, %ebx xorl %ecx, %edx rorxl $13, %r15d, %ecx vpxor %xmm5, %xmm7, %xmm7 # rnd_1: 3 - 3 addl %edx, %r14d rorxl $2, %r15d, %edx xorl %r13d, %ebx vpxor %xmm6, %xmm7, %xmm7 # rnd_1: 4 - 4 xorl %edx, %ecx rorxl $22, %r15d, %edx addl %ebx, %r14d vpshufd $0xfa, %xmm1, %xmm6 # rnd_1: 5 - 5 xorl %ecx, %edx movl %r8d, %ebx addl %r14d, %r10d vpxor %xmm8, %xmm7, %xmm5 # rnd_1: 6 - 6 xorl %r15d, %ebx addl %edx, %r14d andl %ebx, %eax vpsrld $10, %xmm6, %xmm8 # rnd_1: 7 - 7 xorl %r8d, %eax rorxl $6, %r10d, %edx addl %eax, %r14d # rnd_0: 0 - 0 movl %r11d, %eax rorxl $11, %r10d, %ecx addl 40(%rsp), %r13d vpsrlq $19, %xmm6, %xmm7 # rnd_0: 1 - 1 xorl %edx, %ecx xorl %r12d, %eax rorxl $25, %r10d, %edx vpsrlq $0x11, %xmm6, %xmm6 vpaddd %xmm2, %xmm4, %xmm4 # rnd_0: 2 - 2 andl %r10d, %eax xorl %ecx, %edx rorxl $13, %r14d, %ecx vpaddd %xmm5, %xmm4, %xmm4 # rnd_0: 3 - 3 addl %edx, %r13d rorxl $2, %r14d, %edx xorl %r12d, %eax vpxor %xmm7, %xmm6, %xmm6 # rnd_0: 4 - 4 xorl %edx, %ecx rorxl $22, %r14d, %edx addl %eax, %r13d vpxor %xmm6, %xmm8, %xmm8 # rnd_0: 5 - 5 xorl %ecx, %edx movl %r15d, %eax addl %r13d, %r9d vpshufb %xmm11, %xmm8, %xmm8 # rnd_0: 6 - 6 xorl %r14d, %eax addl %edx, %r13d andl %eax, %ebx vpaddd %xmm8, %xmm4, %xmm4 # rnd_0: 7 - 7 xorl %r15d, %ebx rorxl $6, %r9d, %edx addl %ebx, %r13d # rnd_1: 0 - 0 movl %r10d, %ebx rorxl $11, %r9d, %ecx addl 44(%rsp), %r12d vpshufd $0x50, %xmm4, %xmm6 # rnd_1: 1 - 1 xorl %edx, %ecx xorl %r11d, %ebx rorxl $25, %r9d, %edx vpsrld $10, %xmm6, %xmm9 # rnd_1: 2 - 2 andl %r9d, %ebx xorl %ecx, %edx rorxl $13, %r13d, %ecx vpsrlq $19, %xmm6, %xmm7 # rnd_1: 3 - 3 addl %edx, %r12d rorxl $2, %r13d, %edx xorl %r11d, %ebx vpsrlq $0x11, %xmm6, %xmm6 # rnd_1: 4 - 4 xorl %edx, %ecx rorxl $22, %r13d, %edx addl %ebx, %r12d vpxor %xmm7, %xmm6, %xmm6 # rnd_1: 5 - 5 xorl %ecx, %edx movl %r14d, %ebx addl %r12d, %r8d vpxor %xmm6, %xmm9, %xmm9 # rnd_1: 6 - 6 xorl %r13d, %ebx addl %edx, %r12d andl %ebx, %eax vpshufb %xmm12, %xmm9, %xmm9 # rnd_1: 7 - 7 xorl %r14d, %eax rorxl $6, %r8d, %edx addl %eax, %r12d vpaddd %xmm4, %xmm9, %xmm2 # msg_sched done: 8-11 # msg_sched: 12-15 # rnd_0: 0 - 0 movl %r9d, %eax rorxl $11, %r8d, %ecx addl 48(%rsp), %r11d vpalignr $4, %xmm1, %xmm2, %xmm4 vpalignr $4, %xmm3, %xmm0, %xmm5 # rnd_0: 1 - 2 xorl %edx, %ecx xorl %r10d, %eax rorxl $25, %r8d, %edx andl %r8d, %eax xorl %ecx, %edx rorxl $13, %r12d, %ecx vpsrld $7, %xmm5, %xmm6 vpslld $25, %xmm5, %xmm7 # rnd_0: 3 - 4 addl %edx, %r11d rorxl $2, %r12d, %edx xorl %r10d, %eax xorl %edx, %ecx rorxl $22, %r12d, %edx addl %eax, %r11d vpsrld $3, %xmm5, %xmm8 vpor %xmm6, %xmm7, %xmm7 # rnd_0: 5 - 7 xorl %ecx, %edx movl %r13d, %eax addl %r11d, %r15d xorl %r12d, %eax addl %edx, %r11d andl %eax, %ebx xorl %r13d, %ebx rorxl $6, %r15d, %edx addl %ebx, %r11d # rnd_1: 0 - 0 movl %r8d, %ebx rorxl $11, %r15d, %ecx addl 52(%rsp), %r10d vpsrld $18, %xmm5, %xmm6 # rnd_1: 1 - 1 xorl %edx, %ecx xorl %r9d, %ebx rorxl $25, %r15d, %edx vpslld $14, %xmm5, %xmm5 # rnd_1: 2 - 2 andl %r15d, %ebx xorl %ecx, %edx rorxl $13, %r11d, %ecx vpxor %xmm5, %xmm7, %xmm7 # rnd_1: 3 - 3 addl %edx, %r10d rorxl $2, %r11d, %edx xorl %r9d, %ebx vpxor %xmm6, %xmm7, %xmm7 # rnd_1: 4 - 4 xorl %edx, %ecx rorxl $22, %r11d, %edx addl %ebx, %r10d vpshufd $0xfa, %xmm2, %xmm6 # rnd_1: 5 - 5 xorl %ecx, %edx movl %r12d, %ebx addl %r10d, %r14d vpxor %xmm8, %xmm7, %xmm5 # rnd_1: 6 - 6 xorl %r11d, %ebx addl %edx, %r10d andl %ebx, %eax vpsrld $10, %xmm6, %xmm8 # rnd_1: 7 - 7 xorl %r12d, %eax rorxl $6, %r14d, %edx addl %eax, %r10d # rnd_0: 0 - 0 movl %r15d, %eax rorxl $11, %r14d, %ecx addl 56(%rsp), %r9d vpsrlq $19, %xmm6, %xmm7 # rnd_0: 1 - 1 xorl %edx, %ecx xorl %r8d, %eax rorxl $25, %r14d, %edx vpsrlq $0x11, %xmm6, %xmm6 vpaddd %xmm3, %xmm4, %xmm4 # rnd_0: 2 - 2 andl %r14d, %eax xorl %ecx, %edx rorxl $13, %r10d, %ecx vpaddd %xmm5, %xmm4, %xmm4 # rnd_0: 3 - 3 addl %edx, %r9d rorxl $2, %r10d, %edx xorl %r8d, %eax vpxor %xmm7, %xmm6, %xmm6 # rnd_0: 4 - 4 xorl %edx, %ecx rorxl $22, %r10d, %edx addl %eax, %r9d vpxor %xmm6, %xmm8, %xmm8 # rnd_0: 5 - 5 xorl %ecx, %edx movl %r11d, %eax addl %r9d, %r13d vpshufb %xmm11, %xmm8, %xmm8 # rnd_0: 6 - 6 xorl %r10d, %eax addl %edx, %r9d andl %eax, %ebx vpaddd %xmm8, %xmm4, %xmm4 # rnd_0: 7 - 7 xorl %r11d, %ebx rorxl $6, %r13d, %edx addl %ebx, %r9d # rnd_1: 0 - 0 movl %r14d, %ebx rorxl $11, %r13d, %ecx addl 60(%rsp), %r8d vpshufd $0x50, %xmm4, %xmm6 # rnd_1: 1 - 1 xorl %edx, %ecx xorl %r15d, %ebx rorxl $25, %r13d, %edx vpsrld $10, %xmm6, %xmm9 # rnd_1: 2 - 2 andl %r13d, %ebx xorl %ecx, %edx rorxl $13, %r9d, %ecx vpsrlq $19, %xmm6, %xmm7 # rnd_1: 3 - 3 addl %edx, %r8d rorxl $2, %r9d, %edx xorl %r15d, %ebx vpsrlq $0x11, %xmm6, %xmm6 # rnd_1: 4 - 4 xorl %edx, %ecx rorxl $22, %r9d, %edx addl %ebx, %r8d vpxor %xmm7, %xmm6, %xmm6 # rnd_1: 5 - 5 xorl %ecx, %edx movl %r10d, %ebx addl %r8d, %r12d vpxor %xmm6, %xmm9, %xmm9 # rnd_1: 6 - 6 xorl %r9d, %ebx addl %edx, %r8d andl %ebx, %eax vpshufb %xmm12, %xmm9, %xmm9 # rnd_1: 7 - 7 xorl %r10d, %eax rorxl $6, %r12d, %edx addl %eax, %r8d vpaddd %xmm4, %xmm9, %xmm3 # msg_sched done: 12-15 # set_w_k_xfer_4: 4 vpaddd 64+L_avx1_rorx_sha256_k(%rip), %xmm0, %xmm4 vpaddd 80+L_avx1_rorx_sha256_k(%rip), %xmm1, %xmm5 vmovdqu %xmm4, (%rsp) vmovdqu %xmm5, 16(%rsp) vpaddd 96+L_avx1_rorx_sha256_k(%rip), %xmm2, %xmm6 vpaddd 112+L_avx1_rorx_sha256_k(%rip), %xmm3, %xmm7 vmovdqu %xmm6, 32(%rsp) vmovdqu %xmm7, 48(%rsp) # msg_sched: 0-3 # rnd_0: 0 - 0 movl %r13d, %eax rorxl $11, %r12d, %ecx addl (%rsp), %r15d vpalignr $4, %xmm2, %xmm3, %xmm4 vpalignr $4, %xmm0, %xmm1, %xmm5 # rnd_0: 1 - 2 xorl %edx, %ecx xorl %r14d, %eax rorxl $25, %r12d, %edx andl %r12d, %eax xorl %ecx, %edx rorxl $13, %r8d, %ecx vpsrld $7, %xmm5, %xmm6 vpslld $25, %xmm5, %xmm7 # rnd_0: 3 - 4 addl %edx, %r15d rorxl $2, %r8d, %edx xorl %r14d, %eax xorl %edx, %ecx rorxl $22, %r8d, %edx addl %eax, %r15d vpsrld $3, %xmm5, %xmm8 vpor %xmm6, %xmm7, %xmm7 # rnd_0: 5 - 7 xorl %ecx, %edx movl %r9d, %eax addl %r15d, %r11d xorl %r8d, %eax addl %edx, %r15d andl %eax, %ebx xorl %r9d, %ebx rorxl $6, %r11d, %edx addl %ebx, %r15d # rnd_1: 0 - 0 movl %r12d, %ebx rorxl $11, %r11d, %ecx addl 4(%rsp), %r14d vpsrld $18, %xmm5, %xmm6 # rnd_1: 1 - 1 xorl %edx, %ecx xorl %r13d, %ebx rorxl $25, %r11d, %edx vpslld $14, %xmm5, %xmm5 # rnd_1: 2 - 2 andl %r11d, %ebx xorl %ecx, %edx rorxl $13, %r15d, %ecx vpxor %xmm5, %xmm7, %xmm7 # rnd_1: 3 - 3 addl %edx, %r14d rorxl $2, %r15d, %edx xorl %r13d, %ebx vpxor %xmm6, %xmm7, %xmm7 # rnd_1: 4 - 4 xorl %edx, %ecx rorxl $22, %r15d, %edx addl %ebx, %r14d vpshufd $0xfa, %xmm3, %xmm6 # rnd_1: 5 - 5 xorl %ecx, %edx movl %r8d, %ebx addl %r14d, %r10d vpxor %xmm8, %xmm7, %xmm5 # rnd_1: 6 - 6 xorl %r15d, %ebx addl %edx, %r14d andl %ebx, %eax vpsrld $10, %xmm6, %xmm8 # rnd_1: 7 - 7 xorl %r8d, %eax rorxl $6, %r10d, %edx addl %eax, %r14d # rnd_0: 0 - 0 movl %r11d, %eax rorxl $11, %r10d, %ecx addl 8(%rsp), %r13d vpsrlq $19, %xmm6, %xmm7 # rnd_0: 1 - 1 xorl %edx, %ecx xorl %r12d, %eax rorxl $25, %r10d, %edx vpsrlq $0x11, %xmm6, %xmm6 vpaddd %xmm0, %xmm4, %xmm4 # rnd_0: 2 - 2 andl %r10d, %eax xorl %ecx, %edx rorxl $13, %r14d, %ecx vpaddd %xmm5, %xmm4, %xmm4 # rnd_0: 3 - 3 addl %edx, %r13d rorxl $2, %r14d, %edx xorl %r12d, %eax vpxor %xmm7, %xmm6, %xmm6 # rnd_0: 4 - 4 xorl %edx, %ecx rorxl $22, %r14d, %edx addl %eax, %r13d vpxor %xmm6, %xmm8, %xmm8 # rnd_0: 5 - 5 xorl %ecx, %edx movl %r15d, %eax addl %r13d, %r9d vpshufb %xmm11, %xmm8, %xmm8 # rnd_0: 6 - 6 xorl %r14d, %eax addl %edx, %r13d andl %eax, %ebx vpaddd %xmm8, %xmm4, %xmm4 # rnd_0: 7 - 7 xorl %r15d, %ebx rorxl $6, %r9d, %edx addl %ebx, %r13d # rnd_1: 0 - 0 movl %r10d, %ebx rorxl $11, %r9d, %ecx addl 12(%rsp), %r12d vpshufd $0x50, %xmm4, %xmm6 # rnd_1: 1 - 1 xorl %edx, %ecx xorl %r11d, %ebx rorxl $25, %r9d, %edx vpsrld $10, %xmm6, %xmm9 # rnd_1: 2 - 2 andl %r9d, %ebx xorl %ecx, %edx rorxl $13, %r13d, %ecx vpsrlq $19, %xmm6, %xmm7 # rnd_1: 3 - 3 addl %edx, %r12d rorxl $2, %r13d, %edx xorl %r11d, %ebx vpsrlq $0x11, %xmm6, %xmm6 # rnd_1: 4 - 4 xorl %edx, %ecx rorxl $22, %r13d, %edx addl %ebx, %r12d vpxor %xmm7, %xmm6, %xmm6 # rnd_1: 5 - 5 xorl %ecx, %edx movl %r14d, %ebx addl %r12d, %r8d vpxor %xmm6, %xmm9, %xmm9 # rnd_1: 6 - 6 xorl %r13d, %ebx addl %edx, %r12d andl %ebx, %eax vpshufb %xmm12, %xmm9, %xmm9 # rnd_1: 7 - 7 xorl %r14d, %eax rorxl $6, %r8d, %edx addl %eax, %r12d vpaddd %xmm4, %xmm9, %xmm0 # msg_sched done: 0-3 # msg_sched: 4-7 # rnd_0: 0 - 0 movl %r9d, %eax rorxl $11, %r8d, %ecx addl 16(%rsp), %r11d vpalignr $4, %xmm3, %xmm0, %xmm4 vpalignr $4, %xmm1, %xmm2, %xmm5 # rnd_0: 1 - 2 xorl %edx, %ecx xorl %r10d, %eax rorxl $25, %r8d, %edx andl %r8d, %eax xorl %ecx, %edx rorxl $13, %r12d, %ecx vpsrld $7, %xmm5, %xmm6 vpslld $25, %xmm5, %xmm7 # rnd_0: 3 - 4 addl %edx, %r11d rorxl $2, %r12d, %edx xorl %r10d, %eax xorl %edx, %ecx rorxl $22, %r12d, %edx addl %eax, %r11d vpsrld $3, %xmm5, %xmm8 vpor %xmm6, %xmm7, %xmm7 # rnd_0: 5 - 7 xorl %ecx, %edx movl %r13d, %eax addl %r11d, %r15d xorl %r12d, %eax addl %edx, %r11d andl %eax, %ebx xorl %r13d, %ebx rorxl $6, %r15d, %edx addl %ebx, %r11d # rnd_1: 0 - 0 movl %r8d, %ebx rorxl $11, %r15d, %ecx addl 20(%rsp), %r10d vpsrld $18, %xmm5, %xmm6 # rnd_1: 1 - 1 xorl %edx, %ecx xorl %r9d, %ebx rorxl $25, %r15d, %edx vpslld $14, %xmm5, %xmm5 # rnd_1: 2 - 2 andl %r15d, %ebx xorl %ecx, %edx rorxl $13, %r11d, %ecx vpxor %xmm5, %xmm7, %xmm7 # rnd_1: 3 - 3 addl %edx, %r10d rorxl $2, %r11d, %edx xorl %r9d, %ebx vpxor %xmm6, %xmm7, %xmm7 # rnd_1: 4 - 4 xorl %edx, %ecx rorxl $22, %r11d, %edx addl %ebx, %r10d vpshufd $0xfa, %xmm0, %xmm6 # rnd_1: 5 - 5 xorl %ecx, %edx movl %r12d, %ebx addl %r10d, %r14d vpxor %xmm8, %xmm7, %xmm5 # rnd_1: 6 - 6 xorl %r11d, %ebx addl %edx, %r10d andl %ebx, %eax vpsrld $10, %xmm6, %xmm8 # rnd_1: 7 - 7 xorl %r12d, %eax rorxl $6, %r14d, %edx addl %eax, %r10d # rnd_0: 0 - 0 movl %r15d, %eax rorxl $11, %r14d, %ecx addl 24(%rsp), %r9d vpsrlq $19, %xmm6, %xmm7 # rnd_0: 1 - 1 xorl %edx, %ecx xorl %r8d, %eax rorxl $25, %r14d, %edx vpsrlq $0x11, %xmm6, %xmm6 vpaddd %xmm1, %xmm4, %xmm4 # rnd_0: 2 - 2 andl %r14d, %eax xorl %ecx, %edx rorxl $13, %r10d, %ecx vpaddd %xmm5, %xmm4, %xmm4 # rnd_0: 3 - 3 addl %edx, %r9d rorxl $2, %r10d, %edx xorl %r8d, %eax vpxor %xmm7, %xmm6, %xmm6 # rnd_0: 4 - 4 xorl %edx, %ecx rorxl $22, %r10d, %edx addl %eax, %r9d vpxor %xmm6, %xmm8, %xmm8 # rnd_0: 5 - 5 xorl %ecx, %edx movl %r11d, %eax addl %r9d, %r13d vpshufb %xmm11, %xmm8, %xmm8 # rnd_0: 6 - 6 xorl %r10d, %eax addl %edx, %r9d andl %eax, %ebx vpaddd %xmm8, %xmm4, %xmm4 # rnd_0: 7 - 7 xorl %r11d, %ebx rorxl $6, %r13d, %edx addl %ebx, %r9d # rnd_1: 0 - 0 movl %r14d, %ebx rorxl $11, %r13d, %ecx addl 28(%rsp), %r8d vpshufd $0x50, %xmm4, %xmm6 # rnd_1: 1 - 1 xorl %edx, %ecx xorl %r15d, %ebx rorxl $25, %r13d, %edx vpsrld $10, %xmm6, %xmm9 # rnd_1: 2 - 2 andl %r13d, %ebx xorl %ecx, %edx rorxl $13, %r9d, %ecx vpsrlq $19, %xmm6, %xmm7 # rnd_1: 3 - 3 addl %edx, %r8d rorxl $2, %r9d, %edx xorl %r15d, %ebx vpsrlq $0x11, %xmm6, %xmm6 # rnd_1: 4 - 4 xorl %edx, %ecx rorxl $22, %r9d, %edx addl %ebx, %r8d vpxor %xmm7, %xmm6, %xmm6 # rnd_1: 5 - 5 xorl %ecx, %edx movl %r10d, %ebx addl %r8d, %r12d vpxor %xmm6, %xmm9, %xmm9 # rnd_1: 6 - 6 xorl %r9d, %ebx addl %edx, %r8d andl %ebx, %eax vpshufb %xmm12, %xmm9, %xmm9 # rnd_1: 7 - 7 xorl %r10d, %eax rorxl $6, %r12d, %edx addl %eax, %r8d vpaddd %xmm4, %xmm9, %xmm1 # msg_sched done: 4-7 # msg_sched: 8-11 # rnd_0: 0 - 0 movl %r13d, %eax rorxl $11, %r12d, %ecx addl 32(%rsp), %r15d vpalignr $4, %xmm0, %xmm1, %xmm4 vpalignr $4, %xmm2, %xmm3, %xmm5 # rnd_0: 1 - 2 xorl %edx, %ecx xorl %r14d, %eax rorxl $25, %r12d, %edx andl %r12d, %eax xorl %ecx, %edx rorxl $13, %r8d, %ecx vpsrld $7, %xmm5, %xmm6 vpslld $25, %xmm5, %xmm7 # rnd_0: 3 - 4 addl %edx, %r15d rorxl $2, %r8d, %edx xorl %r14d, %eax xorl %edx, %ecx rorxl $22, %r8d, %edx addl %eax, %r15d vpsrld $3, %xmm5, %xmm8 vpor %xmm6, %xmm7, %xmm7 # rnd_0: 5 - 7 xorl %ecx, %edx movl %r9d, %eax addl %r15d, %r11d xorl %r8d, %eax addl %edx, %r15d andl %eax, %ebx xorl %r9d, %ebx rorxl $6, %r11d, %edx addl %ebx, %r15d # rnd_1: 0 - 0 movl %r12d, %ebx rorxl $11, %r11d, %ecx addl 36(%rsp), %r14d vpsrld $18, %xmm5, %xmm6 # rnd_1: 1 - 1 xorl %edx, %ecx xorl %r13d, %ebx rorxl $25, %r11d, %edx vpslld $14, %xmm5, %xmm5 # rnd_1: 2 - 2 andl %r11d, %ebx xorl %ecx, %edx rorxl $13, %r15d, %ecx vpxor %xmm5, %xmm7, %xmm7 # rnd_1: 3 - 3 addl %edx, %r14d rorxl $2, %r15d, %edx xorl %r13d, %ebx vpxor %xmm6, %xmm7, %xmm7 # rnd_1: 4 - 4 xorl %edx, %ecx rorxl $22, %r15d, %edx addl %ebx, %r14d vpshufd $0xfa, %xmm1, %xmm6 # rnd_1: 5 - 5 xorl %ecx, %edx movl %r8d, %ebx addl %r14d, %r10d vpxor %xmm8, %xmm7, %xmm5 # rnd_1: 6 - 6 xorl %r15d, %ebx addl %edx, %r14d andl %ebx, %eax vpsrld $10, %xmm6, %xmm8 # rnd_1: 7 - 7 xorl %r8d, %eax rorxl $6, %r10d, %edx addl %eax, %r14d # rnd_0: 0 - 0 movl %r11d, %eax rorxl $11, %r10d, %ecx addl 40(%rsp), %r13d vpsrlq $19, %xmm6, %xmm7 # rnd_0: 1 - 1 xorl %edx, %ecx xorl %r12d, %eax rorxl $25, %r10d, %edx vpsrlq $0x11, %xmm6, %xmm6 vpaddd %xmm2, %xmm4, %xmm4 # rnd_0: 2 - 2 andl %r10d, %eax xorl %ecx, %edx rorxl $13, %r14d, %ecx vpaddd %xmm5, %xmm4, %xmm4 # rnd_0: 3 - 3 addl %edx, %r13d rorxl $2, %r14d, %edx xorl %r12d, %eax vpxor %xmm7, %xmm6, %xmm6 # rnd_0: 4 - 4 xorl %edx, %ecx rorxl $22, %r14d, %edx addl %eax, %r13d vpxor %xmm6, %xmm8, %xmm8 # rnd_0: 5 - 5 xorl %ecx, %edx movl %r15d, %eax addl %r13d, %r9d vpshufb %xmm11, %xmm8, %xmm8 # rnd_0: 6 - 6 xorl %r14d, %eax addl %edx, %r13d andl %eax, %ebx vpaddd %xmm8, %xmm4, %xmm4 # rnd_0: 7 - 7 xorl %r15d, %ebx rorxl $6, %r9d, %edx addl %ebx, %r13d # rnd_1: 0 - 0 movl %r10d, %ebx rorxl $11, %r9d, %ecx addl 44(%rsp), %r12d vpshufd $0x50, %xmm4, %xmm6 # rnd_1: 1 - 1 xorl %edx, %ecx xorl %r11d, %ebx rorxl $25, %r9d, %edx vpsrld $10, %xmm6, %xmm9 # rnd_1: 2 - 2 andl %r9d, %ebx xorl %ecx, %edx rorxl $13, %r13d, %ecx vpsrlq $19, %xmm6, %xmm7 # rnd_1: 3 - 3 addl %edx, %r12d rorxl $2, %r13d, %edx xorl %r11d, %ebx vpsrlq $0x11, %xmm6, %xmm6 # rnd_1: 4 - 4 xorl %edx, %ecx rorxl $22, %r13d, %edx addl %ebx, %r12d vpxor %xmm7, %xmm6, %xmm6 # rnd_1: 5 - 5 xorl %ecx, %edx movl %r14d, %ebx addl %r12d, %r8d vpxor %xmm6, %xmm9, %xmm9 # rnd_1: 6 - 6 xorl %r13d, %ebx addl %edx, %r12d andl %ebx, %eax vpshufb %xmm12, %xmm9, %xmm9 # rnd_1: 7 - 7 xorl %r14d, %eax rorxl $6, %r8d, %edx addl %eax, %r12d vpaddd %xmm4, %xmm9, %xmm2 # msg_sched done: 8-11 # msg_sched: 12-15 # rnd_0: 0 - 0 movl %r9d, %eax rorxl $11, %r8d, %ecx addl 48(%rsp), %r11d vpalignr $4, %xmm1, %xmm2, %xmm4 vpalignr $4, %xmm3, %xmm0, %xmm5 # rnd_0: 1 - 2 xorl %edx, %ecx xorl %r10d, %eax rorxl $25, %r8d, %edx andl %r8d, %eax xorl %ecx, %edx rorxl $13, %r12d, %ecx vpsrld $7, %xmm5, %xmm6 vpslld $25, %xmm5, %xmm7 # rnd_0: 3 - 4 addl %edx, %r11d rorxl $2, %r12d, %edx xorl %r10d, %eax xorl %edx, %ecx rorxl $22, %r12d, %edx addl %eax, %r11d vpsrld $3, %xmm5, %xmm8 vpor %xmm6, %xmm7, %xmm7 # rnd_0: 5 - 7 xorl %ecx, %edx movl %r13d, %eax addl %r11d, %r15d xorl %r12d, %eax addl %edx, %r11d andl %eax, %ebx xorl %r13d, %ebx rorxl $6, %r15d, %edx addl %ebx, %r11d # rnd_1: 0 - 0 movl %r8d, %ebx rorxl $11, %r15d, %ecx addl 52(%rsp), %r10d vpsrld $18, %xmm5, %xmm6 # rnd_1: 1 - 1 xorl %edx, %ecx xorl %r9d, %ebx rorxl $25, %r15d, %edx vpslld $14, %xmm5, %xmm5 # rnd_1: 2 - 2 andl %r15d, %ebx xorl %ecx, %edx rorxl $13, %r11d, %ecx vpxor %xmm5, %xmm7, %xmm7 # rnd_1: 3 - 3 addl %edx, %r10d rorxl $2, %r11d, %edx xorl %r9d, %ebx vpxor %xmm6, %xmm7, %xmm7 # rnd_1: 4 - 4 xorl %edx, %ecx rorxl $22, %r11d, %edx addl %ebx, %r10d vpshufd $0xfa, %xmm2, %xmm6 # rnd_1: 5 - 5 xorl %ecx, %edx movl %r12d, %ebx addl %r10d, %r14d vpxor %xmm8, %xmm7, %xmm5 # rnd_1: 6 - 6 xorl %r11d, %ebx addl %edx, %r10d andl %ebx, %eax vpsrld $10, %xmm6, %xmm8 # rnd_1: 7 - 7 xorl %r12d, %eax rorxl $6, %r14d, %edx addl %eax, %r10d # rnd_0: 0 - 0 movl %r15d, %eax rorxl $11, %r14d, %ecx addl 56(%rsp), %r9d vpsrlq $19, %xmm6, %xmm7 # rnd_0: 1 - 1 xorl %edx, %ecx xorl %r8d, %eax rorxl $25, %r14d, %edx vpsrlq $0x11, %xmm6, %xmm6 vpaddd %xmm3, %xmm4, %xmm4 # rnd_0: 2 - 2 andl %r14d, %eax xorl %ecx, %edx rorxl $13, %r10d, %ecx vpaddd %xmm5, %xmm4, %xmm4 # rnd_0: 3 - 3 addl %edx, %r9d rorxl $2, %r10d, %edx xorl %r8d, %eax vpxor %xmm7, %xmm6, %xmm6 # rnd_0: 4 - 4 xorl %edx, %ecx rorxl $22, %r10d, %edx addl %eax, %r9d vpxor %xmm6, %xmm8, %xmm8 # rnd_0: 5 - 5 xorl %ecx, %edx movl %r11d, %eax addl %r9d, %r13d vpshufb %xmm11, %xmm8, %xmm8 # rnd_0: 6 - 6 xorl %r10d, %eax addl %edx, %r9d andl %eax, %ebx vpaddd %xmm8, %xmm4, %xmm4 # rnd_0: 7 - 7 xorl %r11d, %ebx rorxl $6, %r13d, %edx addl %ebx, %r9d # rnd_1: 0 - 0 movl %r14d, %ebx rorxl $11, %r13d, %ecx addl 60(%rsp), %r8d vpshufd $0x50, %xmm4, %xmm6 # rnd_1: 1 - 1 xorl %edx, %ecx xorl %r15d, %ebx rorxl $25, %r13d, %edx vpsrld $10, %xmm6, %xmm9 # rnd_1: 2 - 2 andl %r13d, %ebx xorl %ecx, %edx rorxl $13, %r9d, %ecx vpsrlq $19, %xmm6, %xmm7 # rnd_1: 3 - 3 addl %edx, %r8d rorxl $2, %r9d, %edx xorl %r15d, %ebx vpsrlq $0x11, %xmm6, %xmm6 # rnd_1: 4 - 4 xorl %edx, %ecx rorxl $22, %r9d, %edx addl %ebx, %r8d vpxor %xmm7, %xmm6, %xmm6 # rnd_1: 5 - 5 xorl %ecx, %edx movl %r10d, %ebx addl %r8d, %r12d vpxor %xmm6, %xmm9, %xmm9 # rnd_1: 6 - 6 xorl %r9d, %ebx addl %edx, %r8d andl %ebx, %eax vpshufb %xmm12, %xmm9, %xmm9 # rnd_1: 7 - 7 xorl %r10d, %eax rorxl $6, %r12d, %edx addl %eax, %r8d vpaddd %xmm4, %xmm9, %xmm3 # msg_sched done: 12-15 # set_w_k_xfer_4: 8 vpaddd 128+L_avx1_rorx_sha256_k(%rip), %xmm0, %xmm4 vpaddd 144+L_avx1_rorx_sha256_k(%rip), %xmm1, %xmm5 vmovdqu %xmm4, (%rsp) vmovdqu %xmm5, 16(%rsp) vpaddd 160+L_avx1_rorx_sha256_k(%rip), %xmm2, %xmm6 vpaddd 176+L_avx1_rorx_sha256_k(%rip), %xmm3, %xmm7 vmovdqu %xmm6, 32(%rsp) vmovdqu %xmm7, 48(%rsp) # msg_sched: 0-3 # rnd_0: 0 - 0 movl %r13d, %eax rorxl $11, %r12d, %ecx addl (%rsp), %r15d vpalignr $4, %xmm2, %xmm3, %xmm4 vpalignr $4, %xmm0, %xmm1, %xmm5 # rnd_0: 1 - 2 xorl %edx, %ecx xorl %r14d, %eax rorxl $25, %r12d, %edx andl %r12d, %eax xorl %ecx, %edx rorxl $13, %r8d, %ecx vpsrld $7, %xmm5, %xmm6 vpslld $25, %xmm5, %xmm7 # rnd_0: 3 - 4 addl %edx, %r15d rorxl $2, %r8d, %edx xorl %r14d, %eax xorl %edx, %ecx rorxl $22, %r8d, %edx addl %eax, %r15d vpsrld $3, %xmm5, %xmm8 vpor %xmm6, %xmm7, %xmm7 # rnd_0: 5 - 7 xorl %ecx, %edx movl %r9d, %eax addl %r15d, %r11d xorl %r8d, %eax addl %edx, %r15d andl %eax, %ebx xorl %r9d, %ebx rorxl $6, %r11d, %edx addl %ebx, %r15d # rnd_1: 0 - 0 movl %r12d, %ebx rorxl $11, %r11d, %ecx addl 4(%rsp), %r14d vpsrld $18, %xmm5, %xmm6 # rnd_1: 1 - 1 xorl %edx, %ecx xorl %r13d, %ebx rorxl $25, %r11d, %edx vpslld $14, %xmm5, %xmm5 # rnd_1: 2 - 2 andl %r11d, %ebx xorl %ecx, %edx rorxl $13, %r15d, %ecx vpxor %xmm5, %xmm7, %xmm7 # rnd_1: 3 - 3 addl %edx, %r14d rorxl $2, %r15d, %edx xorl %r13d, %ebx vpxor %xmm6, %xmm7, %xmm7 # rnd_1: 4 - 4 xorl %edx, %ecx rorxl $22, %r15d, %edx addl %ebx, %r14d vpshufd $0xfa, %xmm3, %xmm6 # rnd_1: 5 - 5 xorl %ecx, %edx movl %r8d, %ebx addl %r14d, %r10d vpxor %xmm8, %xmm7, %xmm5 # rnd_1: 6 - 6 xorl %r15d, %ebx addl %edx, %r14d andl %ebx, %eax vpsrld $10, %xmm6, %xmm8 # rnd_1: 7 - 7 xorl %r8d, %eax rorxl $6, %r10d, %edx addl %eax, %r14d # rnd_0: 0 - 0 movl %r11d, %eax rorxl $11, %r10d, %ecx addl 8(%rsp), %r13d vpsrlq $19, %xmm6, %xmm7 # rnd_0: 1 - 1 xorl %edx, %ecx xorl %r12d, %eax rorxl $25, %r10d, %edx vpsrlq $0x11, %xmm6, %xmm6 vpaddd %xmm0, %xmm4, %xmm4 # rnd_0: 2 - 2 andl %r10d, %eax xorl %ecx, %edx rorxl $13, %r14d, %ecx vpaddd %xmm5, %xmm4, %xmm4 # rnd_0: 3 - 3 addl %edx, %r13d rorxl $2, %r14d, %edx xorl %r12d, %eax vpxor %xmm7, %xmm6, %xmm6 # rnd_0: 4 - 4 xorl %edx, %ecx rorxl $22, %r14d, %edx addl %eax, %r13d vpxor %xmm6, %xmm8, %xmm8 # rnd_0: 5 - 5 xorl %ecx, %edx movl %r15d, %eax addl %r13d, %r9d vpshufb %xmm11, %xmm8, %xmm8 # rnd_0: 6 - 6 xorl %r14d, %eax addl %edx, %r13d andl %eax, %ebx vpaddd %xmm8, %xmm4, %xmm4 # rnd_0: 7 - 7 xorl %r15d, %ebx rorxl $6, %r9d, %edx addl %ebx, %r13d # rnd_1: 0 - 0 movl %r10d, %ebx rorxl $11, %r9d, %ecx addl 12(%rsp), %r12d vpshufd $0x50, %xmm4, %xmm6 # rnd_1: 1 - 1 xorl %edx, %ecx xorl %r11d, %ebx rorxl $25, %r9d, %edx vpsrld $10, %xmm6, %xmm9 # rnd_1: 2 - 2 andl %r9d, %ebx xorl %ecx, %edx rorxl $13, %r13d, %ecx vpsrlq $19, %xmm6, %xmm7 # rnd_1: 3 - 3 addl %edx, %r12d rorxl $2, %r13d, %edx xorl %r11d, %ebx vpsrlq $0x11, %xmm6, %xmm6 # rnd_1: 4 - 4 xorl %edx, %ecx rorxl $22, %r13d, %edx addl %ebx, %r12d vpxor %xmm7, %xmm6, %xmm6 # rnd_1: 5 - 5 xorl %ecx, %edx movl %r14d, %ebx addl %r12d, %r8d vpxor %xmm6, %xmm9, %xmm9 # rnd_1: 6 - 6 xorl %r13d, %ebx addl %edx, %r12d andl %ebx, %eax vpshufb %xmm12, %xmm9, %xmm9 # rnd_1: 7 - 7 xorl %r14d, %eax rorxl $6, %r8d, %edx addl %eax, %r12d vpaddd %xmm4, %xmm9, %xmm0 # msg_sched done: 0-3 # msg_sched: 4-7 # rnd_0: 0 - 0 movl %r9d, %eax rorxl $11, %r8d, %ecx addl 16(%rsp), %r11d vpalignr $4, %xmm3, %xmm0, %xmm4 vpalignr $4, %xmm1, %xmm2, %xmm5 # rnd_0: 1 - 2 xorl %edx, %ecx xorl %r10d, %eax rorxl $25, %r8d, %edx andl %r8d, %eax xorl %ecx, %edx rorxl $13, %r12d, %ecx vpsrld $7, %xmm5, %xmm6 vpslld $25, %xmm5, %xmm7 # rnd_0: 3 - 4 addl %edx, %r11d rorxl $2, %r12d, %edx xorl %r10d, %eax xorl %edx, %ecx rorxl $22, %r12d, %edx addl %eax, %r11d vpsrld $3, %xmm5, %xmm8 vpor %xmm6, %xmm7, %xmm7 # rnd_0: 5 - 7 xorl %ecx, %edx movl %r13d, %eax addl %r11d, %r15d xorl %r12d, %eax addl %edx, %r11d andl %eax, %ebx xorl %r13d, %ebx rorxl $6, %r15d, %edx addl %ebx, %r11d # rnd_1: 0 - 0 movl %r8d, %ebx rorxl $11, %r15d, %ecx addl 20(%rsp), %r10d vpsrld $18, %xmm5, %xmm6 # rnd_1: 1 - 1 xorl %edx, %ecx xorl %r9d, %ebx rorxl $25, %r15d, %edx vpslld $14, %xmm5, %xmm5 # rnd_1: 2 - 2 andl %r15d, %ebx xorl %ecx, %edx rorxl $13, %r11d, %ecx vpxor %xmm5, %xmm7, %xmm7 # rnd_1: 3 - 3 addl %edx, %r10d rorxl $2, %r11d, %edx xorl %r9d, %ebx vpxor %xmm6, %xmm7, %xmm7 # rnd_1: 4 - 4 xorl %edx, %ecx rorxl $22, %r11d, %edx addl %ebx, %r10d vpshufd $0xfa, %xmm0, %xmm6 # rnd_1: 5 - 5 xorl %ecx, %edx movl %r12d, %ebx addl %r10d, %r14d vpxor %xmm8, %xmm7, %xmm5 # rnd_1: 6 - 6 xorl %r11d, %ebx addl %edx, %r10d andl %ebx, %eax vpsrld $10, %xmm6, %xmm8 # rnd_1: 7 - 7 xorl %r12d, %eax rorxl $6, %r14d, %edx addl %eax, %r10d # rnd_0: 0 - 0 movl %r15d, %eax rorxl $11, %r14d, %ecx addl 24(%rsp), %r9d vpsrlq $19, %xmm6, %xmm7 # rnd_0: 1 - 1 xorl %edx, %ecx xorl %r8d, %eax rorxl $25, %r14d, %edx vpsrlq $0x11, %xmm6, %xmm6 vpaddd %xmm1, %xmm4, %xmm4 # rnd_0: 2 - 2 andl %r14d, %eax xorl %ecx, %edx rorxl $13, %r10d, %ecx vpaddd %xmm5, %xmm4, %xmm4 # rnd_0: 3 - 3 addl %edx, %r9d rorxl $2, %r10d, %edx xorl %r8d, %eax vpxor %xmm7, %xmm6, %xmm6 # rnd_0: 4 - 4 xorl %edx, %ecx rorxl $22, %r10d, %edx addl %eax, %r9d vpxor %xmm6, %xmm8, %xmm8 # rnd_0: 5 - 5 xorl %ecx, %edx movl %r11d, %eax addl %r9d, %r13d vpshufb %xmm11, %xmm8, %xmm8 # rnd_0: 6 - 6 xorl %r10d, %eax addl %edx, %r9d andl %eax, %ebx vpaddd %xmm8, %xmm4, %xmm4 # rnd_0: 7 - 7 xorl %r11d, %ebx rorxl $6, %r13d, %edx addl %ebx, %r9d # rnd_1: 0 - 0 movl %r14d, %ebx rorxl $11, %r13d, %ecx addl 28(%rsp), %r8d vpshufd $0x50, %xmm4, %xmm6 # rnd_1: 1 - 1 xorl %edx, %ecx xorl %r15d, %ebx rorxl $25, %r13d, %edx vpsrld $10, %xmm6, %xmm9 # rnd_1: 2 - 2 andl %r13d, %ebx xorl %ecx, %edx rorxl $13, %r9d, %ecx vpsrlq $19, %xmm6, %xmm7 # rnd_1: 3 - 3 addl %edx, %r8d rorxl $2, %r9d, %edx xorl %r15d, %ebx vpsrlq $0x11, %xmm6, %xmm6 # rnd_1: 4 - 4 xorl %edx, %ecx rorxl $22, %r9d, %edx addl %ebx, %r8d vpxor %xmm7, %xmm6, %xmm6 # rnd_1: 5 - 5 xorl %ecx, %edx movl %r10d, %ebx addl %r8d, %r12d vpxor %xmm6, %xmm9, %xmm9 # rnd_1: 6 - 6 xorl %r9d, %ebx addl %edx, %r8d andl %ebx, %eax vpshufb %xmm12, %xmm9, %xmm9 # rnd_1: 7 - 7 xorl %r10d, %eax rorxl $6, %r12d, %edx addl %eax, %r8d vpaddd %xmm4, %xmm9, %xmm1 # msg_sched done: 4-7 # msg_sched: 8-11 # rnd_0: 0 - 0 movl %r13d, %eax rorxl $11, %r12d, %ecx addl 32(%rsp), %r15d vpalignr $4, %xmm0, %xmm1, %xmm4 vpalignr $4, %xmm2, %xmm3, %xmm5 # rnd_0: 1 - 2 xorl %edx, %ecx xorl %r14d, %eax rorxl $25, %r12d, %edx andl %r12d, %eax xorl %ecx, %edx rorxl $13, %r8d, %ecx vpsrld $7, %xmm5, %xmm6 vpslld $25, %xmm5, %xmm7 # rnd_0: 3 - 4 addl %edx, %r15d rorxl $2, %r8d, %edx xorl %r14d, %eax xorl %edx, %ecx rorxl $22, %r8d, %edx addl %eax, %r15d vpsrld $3, %xmm5, %xmm8 vpor %xmm6, %xmm7, %xmm7 # rnd_0: 5 - 7 xorl %ecx, %edx movl %r9d, %eax addl %r15d, %r11d xorl %r8d, %eax addl %edx, %r15d andl %eax, %ebx xorl %r9d, %ebx rorxl $6, %r11d, %edx addl %ebx, %r15d # rnd_1: 0 - 0 movl %r12d, %ebx rorxl $11, %r11d, %ecx addl 36(%rsp), %r14d vpsrld $18, %xmm5, %xmm6 # rnd_1: 1 - 1 xorl %edx, %ecx xorl %r13d, %ebx rorxl $25, %r11d, %edx vpslld $14, %xmm5, %xmm5 # rnd_1: 2 - 2 andl %r11d, %ebx xorl %ecx, %edx rorxl $13, %r15d, %ecx vpxor %xmm5, %xmm7, %xmm7 # rnd_1: 3 - 3 addl %edx, %r14d rorxl $2, %r15d, %edx xorl %r13d, %ebx vpxor %xmm6, %xmm7, %xmm7 # rnd_1: 4 - 4 xorl %edx, %ecx rorxl $22, %r15d, %edx addl %ebx, %r14d vpshufd $0xfa, %xmm1, %xmm6 # rnd_1: 5 - 5 xorl %ecx, %edx movl %r8d, %ebx addl %r14d, %r10d vpxor %xmm8, %xmm7, %xmm5 # rnd_1: 6 - 6 xorl %r15d, %ebx addl %edx, %r14d andl %ebx, %eax vpsrld $10, %xmm6, %xmm8 # rnd_1: 7 - 7 xorl %r8d, %eax rorxl $6, %r10d, %edx addl %eax, %r14d # rnd_0: 0 - 0 movl %r11d, %eax rorxl $11, %r10d, %ecx addl 40(%rsp), %r13d vpsrlq $19, %xmm6, %xmm7 # rnd_0: 1 - 1 xorl %edx, %ecx xorl %r12d, %eax rorxl $25, %r10d, %edx vpsrlq $0x11, %xmm6, %xmm6 vpaddd %xmm2, %xmm4, %xmm4 # rnd_0: 2 - 2 andl %r10d, %eax xorl %ecx, %edx rorxl $13, %r14d, %ecx vpaddd %xmm5, %xmm4, %xmm4 # rnd_0: 3 - 3 addl %edx, %r13d rorxl $2, %r14d, %edx xorl %r12d, %eax vpxor %xmm7, %xmm6, %xmm6 # rnd_0: 4 - 4 xorl %edx, %ecx rorxl $22, %r14d, %edx addl %eax, %r13d vpxor %xmm6, %xmm8, %xmm8 # rnd_0: 5 - 5 xorl %ecx, %edx movl %r15d, %eax addl %r13d, %r9d vpshufb %xmm11, %xmm8, %xmm8 # rnd_0: 6 - 6 xorl %r14d, %eax addl %edx, %r13d andl %eax, %ebx vpaddd %xmm8, %xmm4, %xmm4 # rnd_0: 7 - 7 xorl %r15d, %ebx rorxl $6, %r9d, %edx addl %ebx, %r13d # rnd_1: 0 - 0 movl %r10d, %ebx rorxl $11, %r9d, %ecx addl 44(%rsp), %r12d vpshufd $0x50, %xmm4, %xmm6 # rnd_1: 1 - 1 xorl %edx, %ecx xorl %r11d, %ebx rorxl $25, %r9d, %edx vpsrld $10, %xmm6, %xmm9 # rnd_1: 2 - 2 andl %r9d, %ebx xorl %ecx, %edx rorxl $13, %r13d, %ecx vpsrlq $19, %xmm6, %xmm7 # rnd_1: 3 - 3 addl %edx, %r12d rorxl $2, %r13d, %edx xorl %r11d, %ebx vpsrlq $0x11, %xmm6, %xmm6 # rnd_1: 4 - 4 xorl %edx, %ecx rorxl $22, %r13d, %edx addl %ebx, %r12d vpxor %xmm7, %xmm6, %xmm6 # rnd_1: 5 - 5 xorl %ecx, %edx movl %r14d, %ebx addl %r12d, %r8d vpxor %xmm6, %xmm9, %xmm9 # rnd_1: 6 - 6 xorl %r13d, %ebx addl %edx, %r12d andl %ebx, %eax vpshufb %xmm12, %xmm9, %xmm9 # rnd_1: 7 - 7 xorl %r14d, %eax rorxl $6, %r8d, %edx addl %eax, %r12d vpaddd %xmm4, %xmm9, %xmm2 # msg_sched done: 8-11 # msg_sched: 12-15 # rnd_0: 0 - 0 movl %r9d, %eax rorxl $11, %r8d, %ecx addl 48(%rsp), %r11d vpalignr $4, %xmm1, %xmm2, %xmm4 vpalignr $4, %xmm3, %xmm0, %xmm5 # rnd_0: 1 - 2 xorl %edx, %ecx xorl %r10d, %eax rorxl $25, %r8d, %edx andl %r8d, %eax xorl %ecx, %edx rorxl $13, %r12d, %ecx vpsrld $7, %xmm5, %xmm6 vpslld $25, %xmm5, %xmm7 # rnd_0: 3 - 4 addl %edx, %r11d rorxl $2, %r12d, %edx xorl %r10d, %eax xorl %edx, %ecx rorxl $22, %r12d, %edx addl %eax, %r11d vpsrld $3, %xmm5, %xmm8 vpor %xmm6, %xmm7, %xmm7 # rnd_0: 5 - 7 xorl %ecx, %edx movl %r13d, %eax addl %r11d, %r15d xorl %r12d, %eax addl %edx, %r11d andl %eax, %ebx xorl %r13d, %ebx rorxl $6, %r15d, %edx addl %ebx, %r11d # rnd_1: 0 - 0 movl %r8d, %ebx rorxl $11, %r15d, %ecx addl 52(%rsp), %r10d vpsrld $18, %xmm5, %xmm6 # rnd_1: 1 - 1 xorl %edx, %ecx xorl %r9d, %ebx rorxl $25, %r15d, %edx vpslld $14, %xmm5, %xmm5 # rnd_1: 2 - 2 andl %r15d, %ebx xorl %ecx, %edx rorxl $13, %r11d, %ecx vpxor %xmm5, %xmm7, %xmm7 # rnd_1: 3 - 3 addl %edx, %r10d rorxl $2, %r11d, %edx xorl %r9d, %ebx vpxor %xmm6, %xmm7, %xmm7 # rnd_1: 4 - 4 xorl %edx, %ecx rorxl $22, %r11d, %edx addl %ebx, %r10d vpshufd $0xfa, %xmm2, %xmm6 # rnd_1: 5 - 5 xorl %ecx, %edx movl %r12d, %ebx addl %r10d, %r14d vpxor %xmm8, %xmm7, %xmm5 # rnd_1: 6 - 6 xorl %r11d, %ebx addl %edx, %r10d andl %ebx, %eax vpsrld $10, %xmm6, %xmm8 # rnd_1: 7 - 7 xorl %r12d, %eax rorxl $6, %r14d, %edx addl %eax, %r10d # rnd_0: 0 - 0 movl %r15d, %eax rorxl $11, %r14d, %ecx addl 56(%rsp), %r9d vpsrlq $19, %xmm6, %xmm7 # rnd_0: 1 - 1 xorl %edx, %ecx xorl %r8d, %eax rorxl $25, %r14d, %edx vpsrlq $0x11, %xmm6, %xmm6 vpaddd %xmm3, %xmm4, %xmm4 # rnd_0: 2 - 2 andl %r14d, %eax xorl %ecx, %edx rorxl $13, %r10d, %ecx vpaddd %xmm5, %xmm4, %xmm4 # rnd_0: 3 - 3 addl %edx, %r9d rorxl $2, %r10d, %edx xorl %r8d, %eax vpxor %xmm7, %xmm6, %xmm6 # rnd_0: 4 - 4 xorl %edx, %ecx rorxl $22, %r10d, %edx addl %eax, %r9d vpxor %xmm6, %xmm8, %xmm8 # rnd_0: 5 - 5 xorl %ecx, %edx movl %r11d, %eax addl %r9d, %r13d vpshufb %xmm11, %xmm8, %xmm8 # rnd_0: 6 - 6 xorl %r10d, %eax addl %edx, %r9d andl %eax, %ebx vpaddd %xmm8, %xmm4, %xmm4 # rnd_0: 7 - 7 xorl %r11d, %ebx rorxl $6, %r13d, %edx addl %ebx, %r9d # rnd_1: 0 - 0 movl %r14d, %ebx rorxl $11, %r13d, %ecx addl 60(%rsp), %r8d vpshufd $0x50, %xmm4, %xmm6 # rnd_1: 1 - 1 xorl %edx, %ecx xorl %r15d, %ebx rorxl $25, %r13d, %edx vpsrld $10, %xmm6, %xmm9 # rnd_1: 2 - 2 andl %r13d, %ebx xorl %ecx, %edx rorxl $13, %r9d, %ecx vpsrlq $19, %xmm6, %xmm7 # rnd_1: 3 - 3 addl %edx, %r8d rorxl $2, %r9d, %edx xorl %r15d, %ebx vpsrlq $0x11, %xmm6, %xmm6 # rnd_1: 4 - 4 xorl %edx, %ecx rorxl $22, %r9d, %edx addl %ebx, %r8d vpxor %xmm7, %xmm6, %xmm6 # rnd_1: 5 - 5 xorl %ecx, %edx movl %r10d, %ebx addl %r8d, %r12d vpxor %xmm6, %xmm9, %xmm9 # rnd_1: 6 - 6 xorl %r9d, %ebx addl %edx, %r8d andl %ebx, %eax vpshufb %xmm12, %xmm9, %xmm9 # rnd_1: 7 - 7 xorl %r10d, %eax rorxl $6, %r12d, %edx addl %eax, %r8d vpaddd %xmm4, %xmm9, %xmm3 # msg_sched done: 12-15 # set_w_k_xfer_4: 12 vpaddd 192+L_avx1_rorx_sha256_k(%rip), %xmm0, %xmm4 vpaddd 208+L_avx1_rorx_sha256_k(%rip), %xmm1, %xmm5 vmovdqu %xmm4, (%rsp) vmovdqu %xmm5, 16(%rsp) vpaddd 224+L_avx1_rorx_sha256_k(%rip), %xmm2, %xmm6 vpaddd 240+L_avx1_rorx_sha256_k(%rip), %xmm3, %xmm7 vmovdqu %xmm6, 32(%rsp) vmovdqu %xmm7, 48(%rsp) xorl %eax, %eax xorl %ecx, %ecx # rnd_all_4: 0-3 rorxl $6, %r12d, %edx rorxl $11, %r12d, %ecx addl %eax, %r8d addl (%rsp), %r15d movl %r13d, %eax xorl %edx, %ecx xorl %r14d, %eax rorxl $25, %r12d, %edx xorl %ecx, %edx andl %r12d, %eax addl %edx, %r15d rorxl $2, %r8d, %edx rorxl $13, %r8d, %ecx xorl %r14d, %eax xorl %edx, %ecx rorxl $22, %r8d, %edx addl %eax, %r15d xorl %ecx, %edx movl %r9d, %eax addl %r15d, %r11d xorl %r8d, %eax andl %eax, %ebx addl %edx, %r15d xorl %r9d, %ebx rorxl $6, %r11d, %edx rorxl $11, %r11d, %ecx addl %ebx, %r15d addl 4(%rsp), %r14d movl %r12d, %ebx xorl %edx, %ecx xorl %r13d, %ebx rorxl $25, %r11d, %edx xorl %ecx, %edx andl %r11d, %ebx addl %edx, %r14d rorxl $2, %r15d, %edx rorxl $13, %r15d, %ecx xorl %r13d, %ebx xorl %edx, %ecx rorxl $22, %r15d, %edx addl %ebx, %r14d xorl %ecx, %edx movl %r8d, %ebx addl %r14d, %r10d xorl %r15d, %ebx andl %ebx, %eax addl %edx, %r14d xorl %r8d, %eax rorxl $6, %r10d, %edx rorxl $11, %r10d, %ecx addl %eax, %r14d addl 8(%rsp), %r13d movl %r11d, %eax xorl %edx, %ecx xorl %r12d, %eax rorxl $25, %r10d, %edx xorl %ecx, %edx andl %r10d, %eax addl %edx, %r13d rorxl $2, %r14d, %edx rorxl $13, %r14d, %ecx xorl %r12d, %eax xorl %edx, %ecx rorxl $22, %r14d, %edx addl %eax, %r13d xorl %ecx, %edx movl %r15d, %eax addl %r13d, %r9d xorl %r14d, %eax andl %eax, %ebx addl %edx, %r13d xorl %r15d, %ebx rorxl $6, %r9d, %edx rorxl $11, %r9d, %ecx addl %ebx, %r13d addl 12(%rsp), %r12d movl %r10d, %ebx xorl %edx, %ecx xorl %r11d, %ebx rorxl $25, %r9d, %edx xorl %ecx, %edx andl %r9d, %ebx addl %edx, %r12d rorxl $2, %r13d, %edx rorxl $13, %r13d, %ecx xorl %r11d, %ebx xorl %edx, %ecx rorxl $22, %r13d, %edx addl %ebx, %r12d xorl %ecx, %edx movl %r14d, %ebx addl %r12d, %r8d xorl %r13d, %ebx andl %ebx, %eax addl %edx, %r12d xorl %r14d, %eax # rnd_all_4: 1-4 rorxl $6, %r8d, %edx rorxl $11, %r8d, %ecx addl %eax, %r12d addl 16(%rsp), %r11d movl %r9d, %eax xorl %edx, %ecx xorl %r10d, %eax rorxl $25, %r8d, %edx xorl %ecx, %edx andl %r8d, %eax addl %edx, %r11d rorxl $2, %r12d, %edx rorxl $13, %r12d, %ecx xorl %r10d, %eax xorl %edx, %ecx rorxl $22, %r12d, %edx addl %eax, %r11d xorl %ecx, %edx movl %r13d, %eax addl %r11d, %r15d xorl %r12d, %eax andl %eax, %ebx addl %edx, %r11d xorl %r13d, %ebx rorxl $6, %r15d, %edx rorxl $11, %r15d, %ecx addl %ebx, %r11d addl 20(%rsp), %r10d movl %r8d, %ebx xorl %edx, %ecx xorl %r9d, %ebx rorxl $25, %r15d, %edx xorl %ecx, %edx andl %r15d, %ebx addl %edx, %r10d rorxl $2, %r11d, %edx rorxl $13, %r11d, %ecx xorl %r9d, %ebx xorl %edx, %ecx rorxl $22, %r11d, %edx addl %ebx, %r10d xorl %ecx, %edx movl %r12d, %ebx addl %r10d, %r14d xorl %r11d, %ebx andl %ebx, %eax addl %edx, %r10d xorl %r12d, %eax rorxl $6, %r14d, %edx rorxl $11, %r14d, %ecx addl %eax, %r10d addl 24(%rsp), %r9d movl %r15d, %eax xorl %edx, %ecx xorl %r8d, %eax rorxl $25, %r14d, %edx xorl %ecx, %edx andl %r14d, %eax addl %edx, %r9d rorxl $2, %r10d, %edx rorxl $13, %r10d, %ecx xorl %r8d, %eax xorl %edx, %ecx rorxl $22, %r10d, %edx addl %eax, %r9d xorl %ecx, %edx movl %r11d, %eax addl %r9d, %r13d xorl %r10d, %eax andl %eax, %ebx addl %edx, %r9d xorl %r11d, %ebx rorxl $6, %r13d, %edx rorxl $11, %r13d, %ecx addl %ebx, %r9d addl 28(%rsp), %r8d movl %r14d, %ebx xorl %edx, %ecx xorl %r15d, %ebx rorxl $25, %r13d, %edx xorl %ecx, %edx andl %r13d, %ebx addl %edx, %r8d rorxl $2, %r9d, %edx rorxl $13, %r9d, %ecx xorl %r15d, %ebx xorl %edx, %ecx rorxl $22, %r9d, %edx addl %ebx, %r8d xorl %ecx, %edx movl %r10d, %ebx addl %r8d, %r12d xorl %r9d, %ebx andl %ebx, %eax addl %edx, %r8d xorl %r10d, %eax # rnd_all_4: 2-5 rorxl $6, %r12d, %edx rorxl $11, %r12d, %ecx addl %eax, %r8d addl 32(%rsp), %r15d movl %r13d, %eax xorl %edx, %ecx xorl %r14d, %eax rorxl $25, %r12d, %edx xorl %ecx, %edx andl %r12d, %eax addl %edx, %r15d rorxl $2, %r8d, %edx rorxl $13, %r8d, %ecx xorl %r14d, %eax xorl %edx, %ecx rorxl $22, %r8d, %edx addl %eax, %r15d xorl %ecx, %edx movl %r9d, %eax addl %r15d, %r11d xorl %r8d, %eax andl %eax, %ebx addl %edx, %r15d xorl %r9d, %ebx rorxl $6, %r11d, %edx rorxl $11, %r11d, %ecx addl %ebx, %r15d addl 36(%rsp), %r14d movl %r12d, %ebx xorl %edx, %ecx xorl %r13d, %ebx rorxl $25, %r11d, %edx xorl %ecx, %edx andl %r11d, %ebx addl %edx, %r14d rorxl $2, %r15d, %edx rorxl $13, %r15d, %ecx xorl %r13d, %ebx xorl %edx, %ecx rorxl $22, %r15d, %edx addl %ebx, %r14d xorl %ecx, %edx movl %r8d, %ebx addl %r14d, %r10d xorl %r15d, %ebx andl %ebx, %eax addl %edx, %r14d xorl %r8d, %eax rorxl $6, %r10d, %edx rorxl $11, %r10d, %ecx addl %eax, %r14d addl 40(%rsp), %r13d movl %r11d, %eax xorl %edx, %ecx xorl %r12d, %eax rorxl $25, %r10d, %edx xorl %ecx, %edx andl %r10d, %eax addl %edx, %r13d rorxl $2, %r14d, %edx rorxl $13, %r14d, %ecx xorl %r12d, %eax xorl %edx, %ecx rorxl $22, %r14d, %edx addl %eax, %r13d xorl %ecx, %edx movl %r15d, %eax addl %r13d, %r9d xorl %r14d, %eax andl %eax, %ebx addl %edx, %r13d xorl %r15d, %ebx rorxl $6, %r9d, %edx rorxl $11, %r9d, %ecx addl %ebx, %r13d addl 44(%rsp), %r12d movl %r10d, %ebx xorl %edx, %ecx xorl %r11d, %ebx rorxl $25, %r9d, %edx xorl %ecx, %edx andl %r9d, %ebx addl %edx, %r12d rorxl $2, %r13d, %edx rorxl $13, %r13d, %ecx xorl %r11d, %ebx xorl %edx, %ecx rorxl $22, %r13d, %edx addl %ebx, %r12d xorl %ecx, %edx movl %r14d, %ebx addl %r12d, %r8d xorl %r13d, %ebx andl %ebx, %eax addl %edx, %r12d xorl %r14d, %eax # rnd_all_4: 3-6 rorxl $6, %r8d, %edx rorxl $11, %r8d, %ecx addl %eax, %r12d addl 48(%rsp), %r11d movl %r9d, %eax xorl %edx, %ecx xorl %r10d, %eax rorxl $25, %r8d, %edx xorl %ecx, %edx andl %r8d, %eax addl %edx, %r11d rorxl $2, %r12d, %edx rorxl $13, %r12d, %ecx xorl %r10d, %eax xorl %edx, %ecx rorxl $22, %r12d, %edx addl %eax, %r11d xorl %ecx, %edx movl %r13d, %eax addl %r11d, %r15d xorl %r12d, %eax andl %eax, %ebx addl %edx, %r11d xorl %r13d, %ebx rorxl $6, %r15d, %edx rorxl $11, %r15d, %ecx addl %ebx, %r11d addl 52(%rsp), %r10d movl %r8d, %ebx xorl %edx, %ecx xorl %r9d, %ebx rorxl $25, %r15d, %edx xorl %ecx, %edx andl %r15d, %ebx addl %edx, %r10d rorxl $2, %r11d, %edx rorxl $13, %r11d, %ecx xorl %r9d, %ebx xorl %edx, %ecx rorxl $22, %r11d, %edx addl %ebx, %r10d xorl %ecx, %edx movl %r12d, %ebx addl %r10d, %r14d xorl %r11d, %ebx andl %ebx, %eax addl %edx, %r10d xorl %r12d, %eax rorxl $6, %r14d, %edx rorxl $11, %r14d, %ecx addl %eax, %r10d addl 56(%rsp), %r9d movl %r15d, %eax xorl %edx, %ecx xorl %r8d, %eax rorxl $25, %r14d, %edx xorl %ecx, %edx andl %r14d, %eax addl %edx, %r9d rorxl $2, %r10d, %edx rorxl $13, %r10d, %ecx xorl %r8d, %eax xorl %edx, %ecx rorxl $22, %r10d, %edx addl %eax, %r9d xorl %ecx, %edx movl %r11d, %eax addl %r9d, %r13d xorl %r10d, %eax andl %eax, %ebx addl %edx, %r9d xorl %r11d, %ebx rorxl $6, %r13d, %edx rorxl $11, %r13d, %ecx addl %ebx, %r9d addl 60(%rsp), %r8d movl %r14d, %ebx xorl %edx, %ecx xorl %r15d, %ebx rorxl $25, %r13d, %edx xorl %ecx, %edx andl %r13d, %ebx addl %edx, %r8d rorxl $2, %r9d, %edx rorxl $13, %r9d, %ecx xorl %r15d, %ebx xorl %edx, %ecx rorxl $22, %r9d, %edx addl %ebx, %r8d xorl %ecx, %edx movl %r10d, %ebx addl %r8d, %r12d xorl %r9d, %ebx andl %ebx, %eax addl %edx, %r8d xorl %r10d, %eax addl %eax, %r8d addl (%rdi), %r8d addl 4(%rdi), %r9d addl 8(%rdi), %r10d addl 12(%rdi), %r11d addl 16(%rdi), %r12d addl 20(%rdi), %r13d addl 24(%rdi), %r14d addl 28(%rdi), %r15d addq $0x40, %rbp subl $0x40, %esi movl %r8d, (%rdi) movl %r9d, 4(%rdi) movl %r10d, 8(%rdi) movl %r11d, 12(%rdi) movl %r12d, 16(%rdi) movl %r13d, 20(%rdi) movl %r14d, 24(%rdi) movl %r15d, 28(%rdi) jnz L_sha256_len_avx1_len_rorx_start xorq %rax, %rax vzeroupper addq $0x40, %rsp popq %rbp popq %r15 popq %r14 popq %r13 popq %r12 popq %rbx repz retq #ifndef __APPLE__ .size Transform_Sha256_AVX1_RORX_Len,.-Transform_Sha256_AVX1_RORX_Len #endif /* __APPLE__ */ #endif /* HAVE_INTEL_AVX1 */ #ifdef HAVE_INTEL_AVX2 #ifndef __APPLE__ .data #else .section __DATA,__data #endif /* __APPLE__ */ L_avx2_sha256_k: .long 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5 .long 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5 .long 0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5 .long 0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5 .long 0xd807aa98,0x12835b01,0x243185be,0x550c7dc3 .long 0xd807aa98,0x12835b01,0x243185be,0x550c7dc3 .long 0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174 .long 0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174 .long 0xe49b69c1,0xefbe4786,0xfc19dc6,0x240ca1cc .long 0xe49b69c1,0xefbe4786,0xfc19dc6,0x240ca1cc .long 0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da .long 0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da .long 0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7 .long 0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7 .long 0xc6e00bf3,0xd5a79147,0x6ca6351,0x14292967 .long 0xc6e00bf3,0xd5a79147,0x6ca6351,0x14292967 .long 0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13 .long 0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13 .long 0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85 .long 0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85 .long 0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3 .long 0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3 .long 0xd192e819,0xd6990624,0xf40e3585,0x106aa070 .long 0xd192e819,0xd6990624,0xf40e3585,0x106aa070 .long 0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5 .long 0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5 .long 0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3 .long 0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3 .long 0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208 .long 0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208 .long 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2 .long 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2 #ifndef __APPLE__ .data #else .section __DATA,__data #endif /* __APPLE__ */ #ifndef __APPLE__ .align 32 #else .p2align 5 #endif /* __APPLE__ */ L_avx2_sha256_shuf_00BA: .quad 0xb0a090803020100, 0xffffffffffffffff .quad 0xb0a090803020100, 0xffffffffffffffff #ifndef __APPLE__ .data #else .section __DATA,__data #endif /* __APPLE__ */ #ifndef __APPLE__ .align 32 #else .p2align 5 #endif /* __APPLE__ */ L_avx2_sha256_shuf_DC00: .quad 0xffffffffffffffff, 0xb0a090803020100 .quad 0xffffffffffffffff, 0xb0a090803020100 #ifndef __APPLE__ .data #else .section __DATA,__data #endif /* __APPLE__ */ #ifndef __APPLE__ .align 32 #else .p2align 5 #endif /* __APPLE__ */ L_avx2_sha256_flip_mask: .quad 0x405060700010203, 0xc0d0e0f08090a0b .quad 0x405060700010203, 0xc0d0e0f08090a0b #ifndef __APPLE__ .text .globl Transform_Sha256_AVX2 .type Transform_Sha256_AVX2,@function .align 16 Transform_Sha256_AVX2: #else .section __TEXT,__text .globl _Transform_Sha256_AVX2 .p2align 4 _Transform_Sha256_AVX2: #endif /* __APPLE__ */ pushq %rbx pushq %r12 pushq %r13 pushq %r14 pushq %r15 subq $0x200, %rsp leaq 32(%rdi), %rax vmovdqa L_avx2_sha256_flip_mask(%rip), %xmm13 vmovdqa L_avx2_sha256_shuf_00BA(%rip), %ymm11 vmovdqa L_avx2_sha256_shuf_DC00(%rip), %ymm12 movl (%rdi), %r8d movl 4(%rdi), %r9d movl 8(%rdi), %r10d movl 12(%rdi), %r11d movl 16(%rdi), %r12d movl 20(%rdi), %r13d movl 24(%rdi), %r14d movl 28(%rdi), %r15d # X0, X1, X2, X3 = W[0..15] vmovdqu (%rax), %xmm0 vmovdqu 16(%rax), %xmm1 vpshufb %xmm13, %xmm0, %xmm0 vpshufb %xmm13, %xmm1, %xmm1 vmovdqu 32(%rax), %xmm2 vmovdqu 48(%rax), %xmm3 vpshufb %xmm13, %xmm2, %xmm2 vpshufb %xmm13, %xmm3, %xmm3 movl %r9d, %ebx movl %r12d, %edx xorl %r10d, %ebx # set_w_k_xfer_4: 0 vpaddd 0+L_avx2_sha256_k(%rip), %ymm0, %ymm4 vpaddd 32+L_avx2_sha256_k(%rip), %ymm1, %ymm5 vmovdqu %ymm4, (%rsp) vmovdqu %ymm5, 32(%rsp) vpaddd 64+L_avx2_sha256_k(%rip), %ymm2, %ymm4 vpaddd 96+L_avx2_sha256_k(%rip), %ymm3, %ymm5 vmovdqu %ymm4, 64(%rsp) vmovdqu %ymm5, 96(%rsp) # msg_sched: 0-3 # rnd_0: 0 - 0 rorl $14, %edx vpalignr $4, %ymm0, %ymm1, %ymm5 vpalignr $4, %ymm2, %ymm3, %ymm4 # rnd_0: 1 - 2 movl %r9d, %eax movl %r13d, %ecx addl (%rsp), %r15d xorl %r14d, %ecx xorl %r12d, %edx andl %r12d, %ecx vpsrld $7, %ymm5, %ymm6 vpslld $25, %ymm5, %ymm7 # rnd_0: 3 - 4 rorl $5, %edx xorl %r14d, %ecx xorl %r12d, %edx addl %ecx, %r15d rorl $6, %edx xorl %r8d, %eax addl %edx, %r15d movl %r8d, %ecx vpsrld $18, %ymm5, %ymm8 vpslld $14, %ymm5, %ymm9 # rnd_0: 5 - 6 andl %eax, %ebx rorl $9, %ecx xorl %r8d, %ecx xorl %r9d, %ebx rorl $11, %ecx addl %r15d, %r11d xorl %r8d, %ecx addl %ebx, %r15d vpor %ymm6, %ymm7, %ymm6 vpor %ymm8, %ymm9, %ymm8 # rnd_0: 7 - 7 rorl $2, %ecx movl %r11d, %edx addl %ecx, %r15d # rnd_1: 0 - 1 rorl $14, %edx movl %r8d, %ebx movl %r12d, %ecx addl 4(%rsp), %r14d xorl %r13d, %ecx vpsrld $3, %ymm5, %ymm9 vpxor %ymm6, %ymm8, %ymm6 # rnd_1: 2 - 3 xorl %r11d, %edx andl %r11d, %ecx rorl $5, %edx xorl %r13d, %ecx xorl %r11d, %edx addl %ecx, %r14d vpxor %ymm6, %ymm9, %ymm5 vpshufd $0xfa, %ymm3, %ymm6 # rnd_1: 4 - 5 rorl $6, %edx xorl %r15d, %ebx addl %edx, %r14d movl %r15d, %ecx andl %ebx, %eax rorl $9, %ecx xorl %r15d, %ecx xorl %r8d, %eax vpsrld $10, %ymm6, %ymm8 vpsrlq $19, %ymm6, %ymm7 # rnd_1: 6 - 7 rorl $11, %ecx addl %r14d, %r10d xorl %r15d, %ecx addl %eax, %r14d rorl $2, %ecx movl %r10d, %edx addl %ecx, %r14d # rnd_0: 0 - 0 rorl $14, %edx vpsrlq $0x11, %ymm6, %ymm6 vpaddd %ymm0, %ymm4, %ymm4 # rnd_0: 1 - 3 movl %r15d, %eax movl %r11d, %ecx addl 8(%rsp), %r13d xorl %r12d, %ecx xorl %r10d, %edx andl %r10d, %ecx rorl $5, %edx xorl %r12d, %ecx xorl %r10d, %edx addl %ecx, %r13d vpxor %ymm6, %ymm7, %ymm6 vpaddd %ymm5, %ymm4, %ymm4 # rnd_0: 4 - 4 rorl $6, %edx xorl %r14d, %eax addl %edx, %r13d movl %r14d, %ecx vpxor %ymm6, %ymm8, %ymm8 # rnd_0: 5 - 5 andl %eax, %ebx rorl $9, %ecx xorl %r14d, %ecx xorl %r15d, %ebx vpshufb %ymm11, %ymm8, %ymm8 # rnd_0: 6 - 6 rorl $11, %ecx addl %r13d, %r9d xorl %r14d, %ecx addl %ebx, %r13d vpaddd %ymm8, %ymm4, %ymm4 # rnd_0: 7 - 7 rorl $2, %ecx movl %r9d, %edx addl %ecx, %r13d # rnd_1: 0 - 0 rorl $14, %edx vpshufd $0x50, %ymm4, %ymm6 # rnd_1: 1 - 1 movl %r14d, %ebx movl %r10d, %ecx addl 12(%rsp), %r12d xorl %r11d, %ecx vpsrlq $0x11, %ymm6, %ymm8 vpsrlq $19, %ymm6, %ymm7 # rnd_1: 2 - 3 xorl %r9d, %edx andl %r9d, %ecx rorl $5, %edx xorl %r11d, %ecx xorl %r9d, %edx addl %ecx, %r12d vpsrld $10, %ymm6, %ymm9 vpxor %ymm8, %ymm7, %ymm8 # rnd_1: 4 - 5 rorl $6, %edx xorl %r13d, %ebx addl %edx, %r12d movl %r13d, %ecx andl %ebx, %eax rorl $9, %ecx xorl %r13d, %ecx xorl %r14d, %eax vpxor %ymm9, %ymm8, %ymm9 # rnd_1: 6 - 6 rorl $11, %ecx addl %r12d, %r8d xorl %r13d, %ecx addl %eax, %r12d vpshufb %ymm12, %ymm9, %ymm9 # rnd_1: 7 - 7 rorl $2, %ecx movl %r8d, %edx addl %ecx, %r12d vpaddd %ymm4, %ymm9, %ymm0 # msg_sched done: 0-3 # msg_sched: 8-11 # rnd_0: 0 - 0 rorl $14, %edx vpalignr $4, %ymm1, %ymm2, %ymm5 vpalignr $4, %ymm3, %ymm0, %ymm4 # rnd_0: 1 - 2 movl %r13d, %eax movl %r9d, %ecx addl 32(%rsp), %r11d xorl %r10d, %ecx xorl %r8d, %edx andl %r8d, %ecx vpsrld $7, %ymm5, %ymm6 vpslld $25, %ymm5, %ymm7 # rnd_0: 3 - 4 rorl $5, %edx xorl %r10d, %ecx xorl %r8d, %edx addl %ecx, %r11d rorl $6, %edx xorl %r12d, %eax addl %edx, %r11d movl %r12d, %ecx vpsrld $18, %ymm5, %ymm8 vpslld $14, %ymm5, %ymm9 # rnd_0: 5 - 6 andl %eax, %ebx rorl $9, %ecx xorl %r12d, %ecx xorl %r13d, %ebx rorl $11, %ecx addl %r11d, %r15d xorl %r12d, %ecx addl %ebx, %r11d vpor %ymm6, %ymm7, %ymm6 vpor %ymm8, %ymm9, %ymm8 # rnd_0: 7 - 7 rorl $2, %ecx movl %r15d, %edx addl %ecx, %r11d # rnd_1: 0 - 1 rorl $14, %edx movl %r12d, %ebx movl %r8d, %ecx addl 36(%rsp), %r10d xorl %r9d, %ecx vpsrld $3, %ymm5, %ymm9 vpxor %ymm6, %ymm8, %ymm6 # rnd_1: 2 - 3 xorl %r15d, %edx andl %r15d, %ecx rorl $5, %edx xorl %r9d, %ecx xorl %r15d, %edx addl %ecx, %r10d vpxor %ymm6, %ymm9, %ymm5 vpshufd $0xfa, %ymm0, %ymm6 # rnd_1: 4 - 5 rorl $6, %edx xorl %r11d, %ebx addl %edx, %r10d movl %r11d, %ecx andl %ebx, %eax rorl $9, %ecx xorl %r11d, %ecx xorl %r12d, %eax vpsrld $10, %ymm6, %ymm8 vpsrlq $19, %ymm6, %ymm7 # rnd_1: 6 - 7 rorl $11, %ecx addl %r10d, %r14d xorl %r11d, %ecx addl %eax, %r10d rorl $2, %ecx movl %r14d, %edx addl %ecx, %r10d # rnd_0: 0 - 0 rorl $14, %edx vpsrlq $0x11, %ymm6, %ymm6 vpaddd %ymm1, %ymm4, %ymm4 # rnd_0: 1 - 3 movl %r11d, %eax movl %r15d, %ecx addl 40(%rsp), %r9d xorl %r8d, %ecx xorl %r14d, %edx andl %r14d, %ecx rorl $5, %edx xorl %r8d, %ecx xorl %r14d, %edx addl %ecx, %r9d vpxor %ymm6, %ymm7, %ymm6 vpaddd %ymm5, %ymm4, %ymm4 # rnd_0: 4 - 4 rorl $6, %edx xorl %r10d, %eax addl %edx, %r9d movl %r10d, %ecx vpxor %ymm6, %ymm8, %ymm8 # rnd_0: 5 - 5 andl %eax, %ebx rorl $9, %ecx xorl %r10d, %ecx xorl %r11d, %ebx vpshufb %ymm11, %ymm8, %ymm8 # rnd_0: 6 - 6 rorl $11, %ecx addl %r9d, %r13d xorl %r10d, %ecx addl %ebx, %r9d vpaddd %ymm8, %ymm4, %ymm4 # rnd_0: 7 - 7 rorl $2, %ecx movl %r13d, %edx addl %ecx, %r9d # rnd_1: 0 - 0 rorl $14, %edx vpshufd $0x50, %ymm4, %ymm6 # rnd_1: 1 - 1 movl %r10d, %ebx movl %r14d, %ecx addl 44(%rsp), %r8d xorl %r15d, %ecx vpsrlq $0x11, %ymm6, %ymm8 vpsrlq $19, %ymm6, %ymm7 # rnd_1: 2 - 3 xorl %r13d, %edx andl %r13d, %ecx rorl $5, %edx xorl %r15d, %ecx xorl %r13d, %edx addl %ecx, %r8d vpsrld $10, %ymm6, %ymm9 vpxor %ymm8, %ymm7, %ymm8 # rnd_1: 4 - 5 rorl $6, %edx xorl %r9d, %ebx addl %edx, %r8d movl %r9d, %ecx andl %ebx, %eax rorl $9, %ecx xorl %r9d, %ecx xorl %r10d, %eax vpxor %ymm9, %ymm8, %ymm9 # rnd_1: 6 - 6 rorl $11, %ecx addl %r8d, %r12d xorl %r9d, %ecx addl %eax, %r8d vpshufb %ymm12, %ymm9, %ymm9 # rnd_1: 7 - 7 rorl $2, %ecx movl %r12d, %edx addl %ecx, %r8d vpaddd %ymm4, %ymm9, %ymm1 # msg_sched done: 8-11 # msg_sched: 16-19 # rnd_0: 0 - 0 rorl $14, %edx vpalignr $4, %ymm2, %ymm3, %ymm5 vpalignr $4, %ymm0, %ymm1, %ymm4 # rnd_0: 1 - 2 movl %r9d, %eax movl %r13d, %ecx addl 64(%rsp), %r15d xorl %r14d, %ecx xorl %r12d, %edx andl %r12d, %ecx vpsrld $7, %ymm5, %ymm6 vpslld $25, %ymm5, %ymm7 # rnd_0: 3 - 4 rorl $5, %edx xorl %r14d, %ecx xorl %r12d, %edx addl %ecx, %r15d rorl $6, %edx xorl %r8d, %eax addl %edx, %r15d movl %r8d, %ecx vpsrld $18, %ymm5, %ymm8 vpslld $14, %ymm5, %ymm9 # rnd_0: 5 - 6 andl %eax, %ebx rorl $9, %ecx xorl %r8d, %ecx xorl %r9d, %ebx rorl $11, %ecx addl %r15d, %r11d xorl %r8d, %ecx addl %ebx, %r15d vpor %ymm6, %ymm7, %ymm6 vpor %ymm8, %ymm9, %ymm8 # rnd_0: 7 - 7 rorl $2, %ecx movl %r11d, %edx addl %ecx, %r15d # rnd_1: 0 - 1 rorl $14, %edx movl %r8d, %ebx movl %r12d, %ecx addl 68(%rsp), %r14d xorl %r13d, %ecx vpsrld $3, %ymm5, %ymm9 vpxor %ymm6, %ymm8, %ymm6 # rnd_1: 2 - 3 xorl %r11d, %edx andl %r11d, %ecx rorl $5, %edx xorl %r13d, %ecx xorl %r11d, %edx addl %ecx, %r14d vpxor %ymm6, %ymm9, %ymm5 vpshufd $0xfa, %ymm1, %ymm6 # rnd_1: 4 - 5 rorl $6, %edx xorl %r15d, %ebx addl %edx, %r14d movl %r15d, %ecx andl %ebx, %eax rorl $9, %ecx xorl %r15d, %ecx xorl %r8d, %eax vpsrld $10, %ymm6, %ymm8 vpsrlq $19, %ymm6, %ymm7 # rnd_1: 6 - 7 rorl $11, %ecx addl %r14d, %r10d xorl %r15d, %ecx addl %eax, %r14d rorl $2, %ecx movl %r10d, %edx addl %ecx, %r14d # rnd_0: 0 - 0 rorl $14, %edx vpsrlq $0x11, %ymm6, %ymm6 vpaddd %ymm2, %ymm4, %ymm4 # rnd_0: 1 - 3 movl %r15d, %eax movl %r11d, %ecx addl 72(%rsp), %r13d xorl %r12d, %ecx xorl %r10d, %edx andl %r10d, %ecx rorl $5, %edx xorl %r12d, %ecx xorl %r10d, %edx addl %ecx, %r13d vpxor %ymm6, %ymm7, %ymm6 vpaddd %ymm5, %ymm4, %ymm4 # rnd_0: 4 - 4 rorl $6, %edx xorl %r14d, %eax addl %edx, %r13d movl %r14d, %ecx vpxor %ymm6, %ymm8, %ymm8 # rnd_0: 5 - 5 andl %eax, %ebx rorl $9, %ecx xorl %r14d, %ecx xorl %r15d, %ebx vpshufb %ymm11, %ymm8, %ymm8 # rnd_0: 6 - 6 rorl $11, %ecx addl %r13d, %r9d xorl %r14d, %ecx addl %ebx, %r13d vpaddd %ymm8, %ymm4, %ymm4 # rnd_0: 7 - 7 rorl $2, %ecx movl %r9d, %edx addl %ecx, %r13d # rnd_1: 0 - 0 rorl $14, %edx vpshufd $0x50, %ymm4, %ymm6 # rnd_1: 1 - 1 movl %r14d, %ebx movl %r10d, %ecx addl 76(%rsp), %r12d xorl %r11d, %ecx vpsrlq $0x11, %ymm6, %ymm8 vpsrlq $19, %ymm6, %ymm7 # rnd_1: 2 - 3 xorl %r9d, %edx andl %r9d, %ecx rorl $5, %edx xorl %r11d, %ecx xorl %r9d, %edx addl %ecx, %r12d vpsrld $10, %ymm6, %ymm9 vpxor %ymm8, %ymm7, %ymm8 # rnd_1: 4 - 5 rorl $6, %edx xorl %r13d, %ebx addl %edx, %r12d movl %r13d, %ecx andl %ebx, %eax rorl $9, %ecx xorl %r13d, %ecx xorl %r14d, %eax vpxor %ymm9, %ymm8, %ymm9 # rnd_1: 6 - 6 rorl $11, %ecx addl %r12d, %r8d xorl %r13d, %ecx addl %eax, %r12d vpshufb %ymm12, %ymm9, %ymm9 # rnd_1: 7 - 7 rorl $2, %ecx movl %r8d, %edx addl %ecx, %r12d vpaddd %ymm4, %ymm9, %ymm2 # msg_sched done: 16-19 # msg_sched: 24-27 # rnd_0: 0 - 0 rorl $14, %edx vpalignr $4, %ymm3, %ymm0, %ymm5 vpalignr $4, %ymm1, %ymm2, %ymm4 # rnd_0: 1 - 2 movl %r13d, %eax movl %r9d, %ecx addl 96(%rsp), %r11d xorl %r10d, %ecx xorl %r8d, %edx andl %r8d, %ecx vpsrld $7, %ymm5, %ymm6 vpslld $25, %ymm5, %ymm7 # rnd_0: 3 - 4 rorl $5, %edx xorl %r10d, %ecx xorl %r8d, %edx addl %ecx, %r11d rorl $6, %edx xorl %r12d, %eax addl %edx, %r11d movl %r12d, %ecx vpsrld $18, %ymm5, %ymm8 vpslld $14, %ymm5, %ymm9 # rnd_0: 5 - 6 andl %eax, %ebx rorl $9, %ecx xorl %r12d, %ecx xorl %r13d, %ebx rorl $11, %ecx addl %r11d, %r15d xorl %r12d, %ecx addl %ebx, %r11d vpor %ymm6, %ymm7, %ymm6 vpor %ymm8, %ymm9, %ymm8 # rnd_0: 7 - 7 rorl $2, %ecx movl %r15d, %edx addl %ecx, %r11d # rnd_1: 0 - 1 rorl $14, %edx movl %r12d, %ebx movl %r8d, %ecx addl 100(%rsp), %r10d xorl %r9d, %ecx vpsrld $3, %ymm5, %ymm9 vpxor %ymm6, %ymm8, %ymm6 # rnd_1: 2 - 3 xorl %r15d, %edx andl %r15d, %ecx rorl $5, %edx xorl %r9d, %ecx xorl %r15d, %edx addl %ecx, %r10d vpxor %ymm6, %ymm9, %ymm5 vpshufd $0xfa, %ymm2, %ymm6 # rnd_1: 4 - 5 rorl $6, %edx xorl %r11d, %ebx addl %edx, %r10d movl %r11d, %ecx andl %ebx, %eax rorl $9, %ecx xorl %r11d, %ecx xorl %r12d, %eax vpsrld $10, %ymm6, %ymm8 vpsrlq $19, %ymm6, %ymm7 # rnd_1: 6 - 7 rorl $11, %ecx addl %r10d, %r14d xorl %r11d, %ecx addl %eax, %r10d rorl $2, %ecx movl %r14d, %edx addl %ecx, %r10d # rnd_0: 0 - 0 rorl $14, %edx vpsrlq $0x11, %ymm6, %ymm6 vpaddd %ymm3, %ymm4, %ymm4 # rnd_0: 1 - 3 movl %r11d, %eax movl %r15d, %ecx addl 104(%rsp), %r9d xorl %r8d, %ecx xorl %r14d, %edx andl %r14d, %ecx rorl $5, %edx xorl %r8d, %ecx xorl %r14d, %edx addl %ecx, %r9d vpxor %ymm6, %ymm7, %ymm6 vpaddd %ymm5, %ymm4, %ymm4 # rnd_0: 4 - 4 rorl $6, %edx xorl %r10d, %eax addl %edx, %r9d movl %r10d, %ecx vpxor %ymm6, %ymm8, %ymm8 # rnd_0: 5 - 5 andl %eax, %ebx rorl $9, %ecx xorl %r10d, %ecx xorl %r11d, %ebx vpshufb %ymm11, %ymm8, %ymm8 # rnd_0: 6 - 6 rorl $11, %ecx addl %r9d, %r13d xorl %r10d, %ecx addl %ebx, %r9d vpaddd %ymm8, %ymm4, %ymm4 # rnd_0: 7 - 7 rorl $2, %ecx movl %r13d, %edx addl %ecx, %r9d # rnd_1: 0 - 0 rorl $14, %edx vpshufd $0x50, %ymm4, %ymm6 # rnd_1: 1 - 1 movl %r10d, %ebx movl %r14d, %ecx addl 108(%rsp), %r8d xorl %r15d, %ecx vpsrlq $0x11, %ymm6, %ymm8 vpsrlq $19, %ymm6, %ymm7 # rnd_1: 2 - 3 xorl %r13d, %edx andl %r13d, %ecx rorl $5, %edx xorl %r15d, %ecx xorl %r13d, %edx addl %ecx, %r8d vpsrld $10, %ymm6, %ymm9 vpxor %ymm8, %ymm7, %ymm8 # rnd_1: 4 - 5 rorl $6, %edx xorl %r9d, %ebx addl %edx, %r8d movl %r9d, %ecx andl %ebx, %eax rorl $9, %ecx xorl %r9d, %ecx xorl %r10d, %eax vpxor %ymm9, %ymm8, %ymm9 # rnd_1: 6 - 6 rorl $11, %ecx addl %r8d, %r12d xorl %r9d, %ecx addl %eax, %r8d vpshufb %ymm12, %ymm9, %ymm9 # rnd_1: 7 - 7 rorl $2, %ecx movl %r12d, %edx addl %ecx, %r8d vpaddd %ymm4, %ymm9, %ymm3 # msg_sched done: 24-27 # set_w_k_xfer_4: 4 vpaddd 128+L_avx2_sha256_k(%rip), %ymm0, %ymm4 vpaddd 160+L_avx2_sha256_k(%rip), %ymm1, %ymm5 vmovdqu %ymm4, 128(%rsp) vmovdqu %ymm5, 160(%rsp) vpaddd 192+L_avx2_sha256_k(%rip), %ymm2, %ymm4 vpaddd 224+L_avx2_sha256_k(%rip), %ymm3, %ymm5 vmovdqu %ymm4, 192(%rsp) vmovdqu %ymm5, 224(%rsp) # msg_sched: 32-35 # rnd_0: 0 - 0 rorl $14, %edx vpalignr $4, %ymm0, %ymm1, %ymm5 vpalignr $4, %ymm2, %ymm3, %ymm4 # rnd_0: 1 - 2 movl %r9d, %eax movl %r13d, %ecx addl 128(%rsp), %r15d xorl %r14d, %ecx xorl %r12d, %edx andl %r12d, %ecx vpsrld $7, %ymm5, %ymm6 vpslld $25, %ymm5, %ymm7 # rnd_0: 3 - 4 rorl $5, %edx xorl %r14d, %ecx xorl %r12d, %edx addl %ecx, %r15d rorl $6, %edx xorl %r8d, %eax addl %edx, %r15d movl %r8d, %ecx vpsrld $18, %ymm5, %ymm8 vpslld $14, %ymm5, %ymm9 # rnd_0: 5 - 6 andl %eax, %ebx rorl $9, %ecx xorl %r8d, %ecx xorl %r9d, %ebx rorl $11, %ecx addl %r15d, %r11d xorl %r8d, %ecx addl %ebx, %r15d vpor %ymm6, %ymm7, %ymm6 vpor %ymm8, %ymm9, %ymm8 # rnd_0: 7 - 7 rorl $2, %ecx movl %r11d, %edx addl %ecx, %r15d # rnd_1: 0 - 1 rorl $14, %edx movl %r8d, %ebx movl %r12d, %ecx addl 132(%rsp), %r14d xorl %r13d, %ecx vpsrld $3, %ymm5, %ymm9 vpxor %ymm6, %ymm8, %ymm6 # rnd_1: 2 - 3 xorl %r11d, %edx andl %r11d, %ecx rorl $5, %edx xorl %r13d, %ecx xorl %r11d, %edx addl %ecx, %r14d vpxor %ymm6, %ymm9, %ymm5 vpshufd $0xfa, %ymm3, %ymm6 # rnd_1: 4 - 5 rorl $6, %edx xorl %r15d, %ebx addl %edx, %r14d movl %r15d, %ecx andl %ebx, %eax rorl $9, %ecx xorl %r15d, %ecx xorl %r8d, %eax vpsrld $10, %ymm6, %ymm8 vpsrlq $19, %ymm6, %ymm7 # rnd_1: 6 - 7 rorl $11, %ecx addl %r14d, %r10d xorl %r15d, %ecx addl %eax, %r14d rorl $2, %ecx movl %r10d, %edx addl %ecx, %r14d # rnd_0: 0 - 0 rorl $14, %edx vpsrlq $0x11, %ymm6, %ymm6 vpaddd %ymm0, %ymm4, %ymm4 # rnd_0: 1 - 3 movl %r15d, %eax movl %r11d, %ecx addl 136(%rsp), %r13d xorl %r12d, %ecx xorl %r10d, %edx andl %r10d, %ecx rorl $5, %edx xorl %r12d, %ecx xorl %r10d, %edx addl %ecx, %r13d vpxor %ymm6, %ymm7, %ymm6 vpaddd %ymm5, %ymm4, %ymm4 # rnd_0: 4 - 4 rorl $6, %edx xorl %r14d, %eax addl %edx, %r13d movl %r14d, %ecx vpxor %ymm6, %ymm8, %ymm8 # rnd_0: 5 - 5 andl %eax, %ebx rorl $9, %ecx xorl %r14d, %ecx xorl %r15d, %ebx vpshufb %ymm11, %ymm8, %ymm8 # rnd_0: 6 - 6 rorl $11, %ecx addl %r13d, %r9d xorl %r14d, %ecx addl %ebx, %r13d vpaddd %ymm8, %ymm4, %ymm4 # rnd_0: 7 - 7 rorl $2, %ecx movl %r9d, %edx addl %ecx, %r13d # rnd_1: 0 - 0 rorl $14, %edx vpshufd $0x50, %ymm4, %ymm6 # rnd_1: 1 - 1 movl %r14d, %ebx movl %r10d, %ecx addl 140(%rsp), %r12d xorl %r11d, %ecx vpsrlq $0x11, %ymm6, %ymm8 vpsrlq $19, %ymm6, %ymm7 # rnd_1: 2 - 3 xorl %r9d, %edx andl %r9d, %ecx rorl $5, %edx xorl %r11d, %ecx xorl %r9d, %edx addl %ecx, %r12d vpsrld $10, %ymm6, %ymm9 vpxor %ymm8, %ymm7, %ymm8 # rnd_1: 4 - 5 rorl $6, %edx xorl %r13d, %ebx addl %edx, %r12d movl %r13d, %ecx andl %ebx, %eax rorl $9, %ecx xorl %r13d, %ecx xorl %r14d, %eax vpxor %ymm9, %ymm8, %ymm9 # rnd_1: 6 - 6 rorl $11, %ecx addl %r12d, %r8d xorl %r13d, %ecx addl %eax, %r12d vpshufb %ymm12, %ymm9, %ymm9 # rnd_1: 7 - 7 rorl $2, %ecx movl %r8d, %edx addl %ecx, %r12d vpaddd %ymm4, %ymm9, %ymm0 # msg_sched done: 32-35 # msg_sched: 40-43 # rnd_0: 0 - 0 rorl $14, %edx vpalignr $4, %ymm1, %ymm2, %ymm5 vpalignr $4, %ymm3, %ymm0, %ymm4 # rnd_0: 1 - 2 movl %r13d, %eax movl %r9d, %ecx addl 160(%rsp), %r11d xorl %r10d, %ecx xorl %r8d, %edx andl %r8d, %ecx vpsrld $7, %ymm5, %ymm6 vpslld $25, %ymm5, %ymm7 # rnd_0: 3 - 4 rorl $5, %edx xorl %r10d, %ecx xorl %r8d, %edx addl %ecx, %r11d rorl $6, %edx xorl %r12d, %eax addl %edx, %r11d movl %r12d, %ecx vpsrld $18, %ymm5, %ymm8 vpslld $14, %ymm5, %ymm9 # rnd_0: 5 - 6 andl %eax, %ebx rorl $9, %ecx xorl %r12d, %ecx xorl %r13d, %ebx rorl $11, %ecx addl %r11d, %r15d xorl %r12d, %ecx addl %ebx, %r11d vpor %ymm6, %ymm7, %ymm6 vpor %ymm8, %ymm9, %ymm8 # rnd_0: 7 - 7 rorl $2, %ecx movl %r15d, %edx addl %ecx, %r11d # rnd_1: 0 - 1 rorl $14, %edx movl %r12d, %ebx movl %r8d, %ecx addl 164(%rsp), %r10d xorl %r9d, %ecx vpsrld $3, %ymm5, %ymm9 vpxor %ymm6, %ymm8, %ymm6 # rnd_1: 2 - 3 xorl %r15d, %edx andl %r15d, %ecx rorl $5, %edx xorl %r9d, %ecx xorl %r15d, %edx addl %ecx, %r10d vpxor %ymm6, %ymm9, %ymm5 vpshufd $0xfa, %ymm0, %ymm6 # rnd_1: 4 - 5 rorl $6, %edx xorl %r11d, %ebx addl %edx, %r10d movl %r11d, %ecx andl %ebx, %eax rorl $9, %ecx xorl %r11d, %ecx xorl %r12d, %eax vpsrld $10, %ymm6, %ymm8 vpsrlq $19, %ymm6, %ymm7 # rnd_1: 6 - 7 rorl $11, %ecx addl %r10d, %r14d xorl %r11d, %ecx addl %eax, %r10d rorl $2, %ecx movl %r14d, %edx addl %ecx, %r10d # rnd_0: 0 - 0 rorl $14, %edx vpsrlq $0x11, %ymm6, %ymm6 vpaddd %ymm1, %ymm4, %ymm4 # rnd_0: 1 - 3 movl %r11d, %eax movl %r15d, %ecx addl 168(%rsp), %r9d xorl %r8d, %ecx xorl %r14d, %edx andl %r14d, %ecx rorl $5, %edx xorl %r8d, %ecx xorl %r14d, %edx addl %ecx, %r9d vpxor %ymm6, %ymm7, %ymm6 vpaddd %ymm5, %ymm4, %ymm4 # rnd_0: 4 - 4 rorl $6, %edx xorl %r10d, %eax addl %edx, %r9d movl %r10d, %ecx vpxor %ymm6, %ymm8, %ymm8 # rnd_0: 5 - 5 andl %eax, %ebx rorl $9, %ecx xorl %r10d, %ecx xorl %r11d, %ebx vpshufb %ymm11, %ymm8, %ymm8 # rnd_0: 6 - 6 rorl $11, %ecx addl %r9d, %r13d xorl %r10d, %ecx addl %ebx, %r9d vpaddd %ymm8, %ymm4, %ymm4 # rnd_0: 7 - 7 rorl $2, %ecx movl %r13d, %edx addl %ecx, %r9d # rnd_1: 0 - 0 rorl $14, %edx vpshufd $0x50, %ymm4, %ymm6 # rnd_1: 1 - 1 movl %r10d, %ebx movl %r14d, %ecx addl 172(%rsp), %r8d xorl %r15d, %ecx vpsrlq $0x11, %ymm6, %ymm8 vpsrlq $19, %ymm6, %ymm7 # rnd_1: 2 - 3 xorl %r13d, %edx andl %r13d, %ecx rorl $5, %edx xorl %r15d, %ecx xorl %r13d, %edx addl %ecx, %r8d vpsrld $10, %ymm6, %ymm9 vpxor %ymm8, %ymm7, %ymm8 # rnd_1: 4 - 5 rorl $6, %edx xorl %r9d, %ebx addl %edx, %r8d movl %r9d, %ecx andl %ebx, %eax rorl $9, %ecx xorl %r9d, %ecx xorl %r10d, %eax vpxor %ymm9, %ymm8, %ymm9 # rnd_1: 6 - 6 rorl $11, %ecx addl %r8d, %r12d xorl %r9d, %ecx addl %eax, %r8d vpshufb %ymm12, %ymm9, %ymm9 # rnd_1: 7 - 7 rorl $2, %ecx movl %r12d, %edx addl %ecx, %r8d vpaddd %ymm4, %ymm9, %ymm1 # msg_sched done: 40-43 # msg_sched: 48-51 # rnd_0: 0 - 0 rorl $14, %edx vpalignr $4, %ymm2, %ymm3, %ymm5 vpalignr $4, %ymm0, %ymm1, %ymm4 # rnd_0: 1 - 2 movl %r9d, %eax movl %r13d, %ecx addl 192(%rsp), %r15d xorl %r14d, %ecx xorl %r12d, %edx andl %r12d, %ecx vpsrld $7, %ymm5, %ymm6 vpslld $25, %ymm5, %ymm7 # rnd_0: 3 - 4 rorl $5, %edx xorl %r14d, %ecx xorl %r12d, %edx addl %ecx, %r15d rorl $6, %edx xorl %r8d, %eax addl %edx, %r15d movl %r8d, %ecx vpsrld $18, %ymm5, %ymm8 vpslld $14, %ymm5, %ymm9 # rnd_0: 5 - 6 andl %eax, %ebx rorl $9, %ecx xorl %r8d, %ecx xorl %r9d, %ebx rorl $11, %ecx addl %r15d, %r11d xorl %r8d, %ecx addl %ebx, %r15d vpor %ymm6, %ymm7, %ymm6 vpor %ymm8, %ymm9, %ymm8 # rnd_0: 7 - 7 rorl $2, %ecx movl %r11d, %edx addl %ecx, %r15d # rnd_1: 0 - 1 rorl $14, %edx movl %r8d, %ebx movl %r12d, %ecx addl 196(%rsp), %r14d xorl %r13d, %ecx vpsrld $3, %ymm5, %ymm9 vpxor %ymm6, %ymm8, %ymm6 # rnd_1: 2 - 3 xorl %r11d, %edx andl %r11d, %ecx rorl $5, %edx xorl %r13d, %ecx xorl %r11d, %edx addl %ecx, %r14d vpxor %ymm6, %ymm9, %ymm5 vpshufd $0xfa, %ymm1, %ymm6 # rnd_1: 4 - 5 rorl $6, %edx xorl %r15d, %ebx addl %edx, %r14d movl %r15d, %ecx andl %ebx, %eax rorl $9, %ecx xorl %r15d, %ecx xorl %r8d, %eax vpsrld $10, %ymm6, %ymm8 vpsrlq $19, %ymm6, %ymm7 # rnd_1: 6 - 7 rorl $11, %ecx addl %r14d, %r10d xorl %r15d, %ecx addl %eax, %r14d rorl $2, %ecx movl %r10d, %edx addl %ecx, %r14d # rnd_0: 0 - 0 rorl $14, %edx vpsrlq $0x11, %ymm6, %ymm6 vpaddd %ymm2, %ymm4, %ymm4 # rnd_0: 1 - 3 movl %r15d, %eax movl %r11d, %ecx addl 200(%rsp), %r13d xorl %r12d, %ecx xorl %r10d, %edx andl %r10d, %ecx rorl $5, %edx xorl %r12d, %ecx xorl %r10d, %edx addl %ecx, %r13d vpxor %ymm6, %ymm7, %ymm6 vpaddd %ymm5, %ymm4, %ymm4 # rnd_0: 4 - 4 rorl $6, %edx xorl %r14d, %eax addl %edx, %r13d movl %r14d, %ecx vpxor %ymm6, %ymm8, %ymm8 # rnd_0: 5 - 5 andl %eax, %ebx rorl $9, %ecx xorl %r14d, %ecx xorl %r15d, %ebx vpshufb %ymm11, %ymm8, %ymm8 # rnd_0: 6 - 6 rorl $11, %ecx addl %r13d, %r9d xorl %r14d, %ecx addl %ebx, %r13d vpaddd %ymm8, %ymm4, %ymm4 # rnd_0: 7 - 7 rorl $2, %ecx movl %r9d, %edx addl %ecx, %r13d # rnd_1: 0 - 0 rorl $14, %edx vpshufd $0x50, %ymm4, %ymm6 # rnd_1: 1 - 1 movl %r14d, %ebx movl %r10d, %ecx addl 204(%rsp), %r12d xorl %r11d, %ecx vpsrlq $0x11, %ymm6, %ymm8 vpsrlq $19, %ymm6, %ymm7 # rnd_1: 2 - 3 xorl %r9d, %edx andl %r9d, %ecx rorl $5, %edx xorl %r11d, %ecx xorl %r9d, %edx addl %ecx, %r12d vpsrld $10, %ymm6, %ymm9 vpxor %ymm8, %ymm7, %ymm8 # rnd_1: 4 - 5 rorl $6, %edx xorl %r13d, %ebx addl %edx, %r12d movl %r13d, %ecx andl %ebx, %eax rorl $9, %ecx xorl %r13d, %ecx xorl %r14d, %eax vpxor %ymm9, %ymm8, %ymm9 # rnd_1: 6 - 6 rorl $11, %ecx addl %r12d, %r8d xorl %r13d, %ecx addl %eax, %r12d vpshufb %ymm12, %ymm9, %ymm9 # rnd_1: 7 - 7 rorl $2, %ecx movl %r8d, %edx addl %ecx, %r12d vpaddd %ymm4, %ymm9, %ymm2 # msg_sched done: 48-51 # msg_sched: 56-59 # rnd_0: 0 - 0 rorl $14, %edx vpalignr $4, %ymm3, %ymm0, %ymm5 vpalignr $4, %ymm1, %ymm2, %ymm4 # rnd_0: 1 - 2 movl %r13d, %eax movl %r9d, %ecx addl 224(%rsp), %r11d xorl %r10d, %ecx xorl %r8d, %edx andl %r8d, %ecx vpsrld $7, %ymm5, %ymm6 vpslld $25, %ymm5, %ymm7 # rnd_0: 3 - 4 rorl $5, %edx xorl %r10d, %ecx xorl %r8d, %edx addl %ecx, %r11d rorl $6, %edx xorl %r12d, %eax addl %edx, %r11d movl %r12d, %ecx vpsrld $18, %ymm5, %ymm8 vpslld $14, %ymm5, %ymm9 # rnd_0: 5 - 6 andl %eax, %ebx rorl $9, %ecx xorl %r12d, %ecx xorl %r13d, %ebx rorl $11, %ecx addl %r11d, %r15d xorl %r12d, %ecx addl %ebx, %r11d vpor %ymm6, %ymm7, %ymm6 vpor %ymm8, %ymm9, %ymm8 # rnd_0: 7 - 7 rorl $2, %ecx movl %r15d, %edx addl %ecx, %r11d # rnd_1: 0 - 1 rorl $14, %edx movl %r12d, %ebx movl %r8d, %ecx addl 228(%rsp), %r10d xorl %r9d, %ecx vpsrld $3, %ymm5, %ymm9 vpxor %ymm6, %ymm8, %ymm6 # rnd_1: 2 - 3 xorl %r15d, %edx andl %r15d, %ecx rorl $5, %edx xorl %r9d, %ecx xorl %r15d, %edx addl %ecx, %r10d vpxor %ymm6, %ymm9, %ymm5 vpshufd $0xfa, %ymm2, %ymm6 # rnd_1: 4 - 5 rorl $6, %edx xorl %r11d, %ebx addl %edx, %r10d movl %r11d, %ecx andl %ebx, %eax rorl $9, %ecx xorl %r11d, %ecx xorl %r12d, %eax vpsrld $10, %ymm6, %ymm8 vpsrlq $19, %ymm6, %ymm7 # rnd_1: 6 - 7 rorl $11, %ecx addl %r10d, %r14d xorl %r11d, %ecx addl %eax, %r10d rorl $2, %ecx movl %r14d, %edx addl %ecx, %r10d # rnd_0: 0 - 0 rorl $14, %edx vpsrlq $0x11, %ymm6, %ymm6 vpaddd %ymm3, %ymm4, %ymm4 # rnd_0: 1 - 3 movl %r11d, %eax movl %r15d, %ecx addl 232(%rsp), %r9d xorl %r8d, %ecx xorl %r14d, %edx andl %r14d, %ecx rorl $5, %edx xorl %r8d, %ecx xorl %r14d, %edx addl %ecx, %r9d vpxor %ymm6, %ymm7, %ymm6 vpaddd %ymm5, %ymm4, %ymm4 # rnd_0: 4 - 4 rorl $6, %edx xorl %r10d, %eax addl %edx, %r9d movl %r10d, %ecx vpxor %ymm6, %ymm8, %ymm8 # rnd_0: 5 - 5 andl %eax, %ebx rorl $9, %ecx xorl %r10d, %ecx xorl %r11d, %ebx vpshufb %ymm11, %ymm8, %ymm8 # rnd_0: 6 - 6 rorl $11, %ecx addl %r9d, %r13d xorl %r10d, %ecx addl %ebx, %r9d vpaddd %ymm8, %ymm4, %ymm4 # rnd_0: 7 - 7 rorl $2, %ecx movl %r13d, %edx addl %ecx, %r9d # rnd_1: 0 - 0 rorl $14, %edx vpshufd $0x50, %ymm4, %ymm6 # rnd_1: 1 - 1 movl %r10d, %ebx movl %r14d, %ecx addl 236(%rsp), %r8d xorl %r15d, %ecx vpsrlq $0x11, %ymm6, %ymm8 vpsrlq $19, %ymm6, %ymm7 # rnd_1: 2 - 3 xorl %r13d, %edx andl %r13d, %ecx rorl $5, %edx xorl %r15d, %ecx xorl %r13d, %edx addl %ecx, %r8d vpsrld $10, %ymm6, %ymm9 vpxor %ymm8, %ymm7, %ymm8 # rnd_1: 4 - 5 rorl $6, %edx xorl %r9d, %ebx addl %edx, %r8d movl %r9d, %ecx andl %ebx, %eax rorl $9, %ecx xorl %r9d, %ecx xorl %r10d, %eax vpxor %ymm9, %ymm8, %ymm9 # rnd_1: 6 - 6 rorl $11, %ecx addl %r8d, %r12d xorl %r9d, %ecx addl %eax, %r8d vpshufb %ymm12, %ymm9, %ymm9 # rnd_1: 7 - 7 rorl $2, %ecx movl %r12d, %edx addl %ecx, %r8d vpaddd %ymm4, %ymm9, %ymm3 # msg_sched done: 56-59 # set_w_k_xfer_4: 8 vpaddd 256+L_avx2_sha256_k(%rip), %ymm0, %ymm4 vpaddd 288+L_avx2_sha256_k(%rip), %ymm1, %ymm5 vmovdqu %ymm4, 256(%rsp) vmovdqu %ymm5, 288(%rsp) vpaddd 320+L_avx2_sha256_k(%rip), %ymm2, %ymm4 vpaddd 352+L_avx2_sha256_k(%rip), %ymm3, %ymm5 vmovdqu %ymm4, 320(%rsp) vmovdqu %ymm5, 352(%rsp) # msg_sched: 64-67 # rnd_0: 0 - 0 rorl $14, %edx vpalignr $4, %ymm0, %ymm1, %ymm5 vpalignr $4, %ymm2, %ymm3, %ymm4 # rnd_0: 1 - 2 movl %r9d, %eax movl %r13d, %ecx addl 256(%rsp), %r15d xorl %r14d, %ecx xorl %r12d, %edx andl %r12d, %ecx vpsrld $7, %ymm5, %ymm6 vpslld $25, %ymm5, %ymm7 # rnd_0: 3 - 4 rorl $5, %edx xorl %r14d, %ecx xorl %r12d, %edx addl %ecx, %r15d rorl $6, %edx xorl %r8d, %eax addl %edx, %r15d movl %r8d, %ecx vpsrld $18, %ymm5, %ymm8 vpslld $14, %ymm5, %ymm9 # rnd_0: 5 - 6 andl %eax, %ebx rorl $9, %ecx xorl %r8d, %ecx xorl %r9d, %ebx rorl $11, %ecx addl %r15d, %r11d xorl %r8d, %ecx addl %ebx, %r15d vpor %ymm6, %ymm7, %ymm6 vpor %ymm8, %ymm9, %ymm8 # rnd_0: 7 - 7 rorl $2, %ecx movl %r11d, %edx addl %ecx, %r15d # rnd_1: 0 - 1 rorl $14, %edx movl %r8d, %ebx movl %r12d, %ecx addl 260(%rsp), %r14d xorl %r13d, %ecx vpsrld $3, %ymm5, %ymm9 vpxor %ymm6, %ymm8, %ymm6 # rnd_1: 2 - 3 xorl %r11d, %edx andl %r11d, %ecx rorl $5, %edx xorl %r13d, %ecx xorl %r11d, %edx addl %ecx, %r14d vpxor %ymm6, %ymm9, %ymm5 vpshufd $0xfa, %ymm3, %ymm6 # rnd_1: 4 - 5 rorl $6, %edx xorl %r15d, %ebx addl %edx, %r14d movl %r15d, %ecx andl %ebx, %eax rorl $9, %ecx xorl %r15d, %ecx xorl %r8d, %eax vpsrld $10, %ymm6, %ymm8 vpsrlq $19, %ymm6, %ymm7 # rnd_1: 6 - 7 rorl $11, %ecx addl %r14d, %r10d xorl %r15d, %ecx addl %eax, %r14d rorl $2, %ecx movl %r10d, %edx addl %ecx, %r14d # rnd_0: 0 - 0 rorl $14, %edx vpsrlq $0x11, %ymm6, %ymm6 vpaddd %ymm0, %ymm4, %ymm4 # rnd_0: 1 - 3 movl %r15d, %eax movl %r11d, %ecx addl 264(%rsp), %r13d xorl %r12d, %ecx xorl %r10d, %edx andl %r10d, %ecx rorl $5, %edx xorl %r12d, %ecx xorl %r10d, %edx addl %ecx, %r13d vpxor %ymm6, %ymm7, %ymm6 vpaddd %ymm5, %ymm4, %ymm4 # rnd_0: 4 - 4 rorl $6, %edx xorl %r14d, %eax addl %edx, %r13d movl %r14d, %ecx vpxor %ymm6, %ymm8, %ymm8 # rnd_0: 5 - 5 andl %eax, %ebx rorl $9, %ecx xorl %r14d, %ecx xorl %r15d, %ebx vpshufb %ymm11, %ymm8, %ymm8 # rnd_0: 6 - 6 rorl $11, %ecx addl %r13d, %r9d xorl %r14d, %ecx addl %ebx, %r13d vpaddd %ymm8, %ymm4, %ymm4 # rnd_0: 7 - 7 rorl $2, %ecx movl %r9d, %edx addl %ecx, %r13d # rnd_1: 0 - 0 rorl $14, %edx vpshufd $0x50, %ymm4, %ymm6 # rnd_1: 1 - 1 movl %r14d, %ebx movl %r10d, %ecx addl 268(%rsp), %r12d xorl %r11d, %ecx vpsrlq $0x11, %ymm6, %ymm8 vpsrlq $19, %ymm6, %ymm7 # rnd_1: 2 - 3 xorl %r9d, %edx andl %r9d, %ecx rorl $5, %edx xorl %r11d, %ecx xorl %r9d, %edx addl %ecx, %r12d vpsrld $10, %ymm6, %ymm9 vpxor %ymm8, %ymm7, %ymm8 # rnd_1: 4 - 5 rorl $6, %edx xorl %r13d, %ebx addl %edx, %r12d movl %r13d, %ecx andl %ebx, %eax rorl $9, %ecx xorl %r13d, %ecx xorl %r14d, %eax vpxor %ymm9, %ymm8, %ymm9 # rnd_1: 6 - 6 rorl $11, %ecx addl %r12d, %r8d xorl %r13d, %ecx addl %eax, %r12d vpshufb %ymm12, %ymm9, %ymm9 # rnd_1: 7 - 7 rorl $2, %ecx movl %r8d, %edx addl %ecx, %r12d vpaddd %ymm4, %ymm9, %ymm0 # msg_sched done: 64-67 # msg_sched: 72-75 # rnd_0: 0 - 0 rorl $14, %edx vpalignr $4, %ymm1, %ymm2, %ymm5 vpalignr $4, %ymm3, %ymm0, %ymm4 # rnd_0: 1 - 2 movl %r13d, %eax movl %r9d, %ecx addl 288(%rsp), %r11d xorl %r10d, %ecx xorl %r8d, %edx andl %r8d, %ecx vpsrld $7, %ymm5, %ymm6 vpslld $25, %ymm5, %ymm7 # rnd_0: 3 - 4 rorl $5, %edx xorl %r10d, %ecx xorl %r8d, %edx addl %ecx, %r11d rorl $6, %edx xorl %r12d, %eax addl %edx, %r11d movl %r12d, %ecx vpsrld $18, %ymm5, %ymm8 vpslld $14, %ymm5, %ymm9 # rnd_0: 5 - 6 andl %eax, %ebx rorl $9, %ecx xorl %r12d, %ecx xorl %r13d, %ebx rorl $11, %ecx addl %r11d, %r15d xorl %r12d, %ecx addl %ebx, %r11d vpor %ymm6, %ymm7, %ymm6 vpor %ymm8, %ymm9, %ymm8 # rnd_0: 7 - 7 rorl $2, %ecx movl %r15d, %edx addl %ecx, %r11d # rnd_1: 0 - 1 rorl $14, %edx movl %r12d, %ebx movl %r8d, %ecx addl 292(%rsp), %r10d xorl %r9d, %ecx vpsrld $3, %ymm5, %ymm9 vpxor %ymm6, %ymm8, %ymm6 # rnd_1: 2 - 3 xorl %r15d, %edx andl %r15d, %ecx rorl $5, %edx xorl %r9d, %ecx xorl %r15d, %edx addl %ecx, %r10d vpxor %ymm6, %ymm9, %ymm5 vpshufd $0xfa, %ymm0, %ymm6 # rnd_1: 4 - 5 rorl $6, %edx xorl %r11d, %ebx addl %edx, %r10d movl %r11d, %ecx andl %ebx, %eax rorl $9, %ecx xorl %r11d, %ecx xorl %r12d, %eax vpsrld $10, %ymm6, %ymm8 vpsrlq $19, %ymm6, %ymm7 # rnd_1: 6 - 7 rorl $11, %ecx addl %r10d, %r14d xorl %r11d, %ecx addl %eax, %r10d rorl $2, %ecx movl %r14d, %edx addl %ecx, %r10d # rnd_0: 0 - 0 rorl $14, %edx vpsrlq $0x11, %ymm6, %ymm6 vpaddd %ymm1, %ymm4, %ymm4 # rnd_0: 1 - 3 movl %r11d, %eax movl %r15d, %ecx addl 296(%rsp), %r9d xorl %r8d, %ecx xorl %r14d, %edx andl %r14d, %ecx rorl $5, %edx xorl %r8d, %ecx xorl %r14d, %edx addl %ecx, %r9d vpxor %ymm6, %ymm7, %ymm6 vpaddd %ymm5, %ymm4, %ymm4 # rnd_0: 4 - 4 rorl $6, %edx xorl %r10d, %eax addl %edx, %r9d movl %r10d, %ecx vpxor %ymm6, %ymm8, %ymm8 # rnd_0: 5 - 5 andl %eax, %ebx rorl $9, %ecx xorl %r10d, %ecx xorl %r11d, %ebx vpshufb %ymm11, %ymm8, %ymm8 # rnd_0: 6 - 6 rorl $11, %ecx addl %r9d, %r13d xorl %r10d, %ecx addl %ebx, %r9d vpaddd %ymm8, %ymm4, %ymm4 # rnd_0: 7 - 7 rorl $2, %ecx movl %r13d, %edx addl %ecx, %r9d # rnd_1: 0 - 0 rorl $14, %edx vpshufd $0x50, %ymm4, %ymm6 # rnd_1: 1 - 1 movl %r10d, %ebx movl %r14d, %ecx addl 300(%rsp), %r8d xorl %r15d, %ecx vpsrlq $0x11, %ymm6, %ymm8 vpsrlq $19, %ymm6, %ymm7 # rnd_1: 2 - 3 xorl %r13d, %edx andl %r13d, %ecx rorl $5, %edx xorl %r15d, %ecx xorl %r13d, %edx addl %ecx, %r8d vpsrld $10, %ymm6, %ymm9 vpxor %ymm8, %ymm7, %ymm8 # rnd_1: 4 - 5 rorl $6, %edx xorl %r9d, %ebx addl %edx, %r8d movl %r9d, %ecx andl %ebx, %eax rorl $9, %ecx xorl %r9d, %ecx xorl %r10d, %eax vpxor %ymm9, %ymm8, %ymm9 # rnd_1: 6 - 6 rorl $11, %ecx addl %r8d, %r12d xorl %r9d, %ecx addl %eax, %r8d vpshufb %ymm12, %ymm9, %ymm9 # rnd_1: 7 - 7 rorl $2, %ecx movl %r12d, %edx addl %ecx, %r8d vpaddd %ymm4, %ymm9, %ymm1 # msg_sched done: 72-75 # msg_sched: 80-83 # rnd_0: 0 - 0 rorl $14, %edx vpalignr $4, %ymm2, %ymm3, %ymm5 vpalignr $4, %ymm0, %ymm1, %ymm4 # rnd_0: 1 - 2 movl %r9d, %eax movl %r13d, %ecx addl 320(%rsp), %r15d xorl %r14d, %ecx xorl %r12d, %edx andl %r12d, %ecx vpsrld $7, %ymm5, %ymm6 vpslld $25, %ymm5, %ymm7 # rnd_0: 3 - 4 rorl $5, %edx xorl %r14d, %ecx xorl %r12d, %edx addl %ecx, %r15d rorl $6, %edx xorl %r8d, %eax addl %edx, %r15d movl %r8d, %ecx vpsrld $18, %ymm5, %ymm8 vpslld $14, %ymm5, %ymm9 # rnd_0: 5 - 6 andl %eax, %ebx rorl $9, %ecx xorl %r8d, %ecx xorl %r9d, %ebx rorl $11, %ecx addl %r15d, %r11d xorl %r8d, %ecx addl %ebx, %r15d vpor %ymm6, %ymm7, %ymm6 vpor %ymm8, %ymm9, %ymm8 # rnd_0: 7 - 7 rorl $2, %ecx movl %r11d, %edx addl %ecx, %r15d # rnd_1: 0 - 1 rorl $14, %edx movl %r8d, %ebx movl %r12d, %ecx addl 324(%rsp), %r14d xorl %r13d, %ecx vpsrld $3, %ymm5, %ymm9 vpxor %ymm6, %ymm8, %ymm6 # rnd_1: 2 - 3 xorl %r11d, %edx andl %r11d, %ecx rorl $5, %edx xorl %r13d, %ecx xorl %r11d, %edx addl %ecx, %r14d vpxor %ymm6, %ymm9, %ymm5 vpshufd $0xfa, %ymm1, %ymm6 # rnd_1: 4 - 5 rorl $6, %edx xorl %r15d, %ebx addl %edx, %r14d movl %r15d, %ecx andl %ebx, %eax rorl $9, %ecx xorl %r15d, %ecx xorl %r8d, %eax vpsrld $10, %ymm6, %ymm8 vpsrlq $19, %ymm6, %ymm7 # rnd_1: 6 - 7 rorl $11, %ecx addl %r14d, %r10d xorl %r15d, %ecx addl %eax, %r14d rorl $2, %ecx movl %r10d, %edx addl %ecx, %r14d # rnd_0: 0 - 0 rorl $14, %edx vpsrlq $0x11, %ymm6, %ymm6 vpaddd %ymm2, %ymm4, %ymm4 # rnd_0: 1 - 3 movl %r15d, %eax movl %r11d, %ecx addl 328(%rsp), %r13d xorl %r12d, %ecx xorl %r10d, %edx andl %r10d, %ecx rorl $5, %edx xorl %r12d, %ecx xorl %r10d, %edx addl %ecx, %r13d vpxor %ymm6, %ymm7, %ymm6 vpaddd %ymm5, %ymm4, %ymm4 # rnd_0: 4 - 4 rorl $6, %edx xorl %r14d, %eax addl %edx, %r13d movl %r14d, %ecx vpxor %ymm6, %ymm8, %ymm8 # rnd_0: 5 - 5 andl %eax, %ebx rorl $9, %ecx xorl %r14d, %ecx xorl %r15d, %ebx vpshufb %ymm11, %ymm8, %ymm8 # rnd_0: 6 - 6 rorl $11, %ecx addl %r13d, %r9d xorl %r14d, %ecx addl %ebx, %r13d vpaddd %ymm8, %ymm4, %ymm4 # rnd_0: 7 - 7 rorl $2, %ecx movl %r9d, %edx addl %ecx, %r13d # rnd_1: 0 - 0 rorl $14, %edx vpshufd $0x50, %ymm4, %ymm6 # rnd_1: 1 - 1 movl %r14d, %ebx movl %r10d, %ecx addl 332(%rsp), %r12d xorl %r11d, %ecx vpsrlq $0x11, %ymm6, %ymm8 vpsrlq $19, %ymm6, %ymm7 # rnd_1: 2 - 3 xorl %r9d, %edx andl %r9d, %ecx rorl $5, %edx xorl %r11d, %ecx xorl %r9d, %edx addl %ecx, %r12d vpsrld $10, %ymm6, %ymm9 vpxor %ymm8, %ymm7, %ymm8 # rnd_1: 4 - 5 rorl $6, %edx xorl %r13d, %ebx addl %edx, %r12d movl %r13d, %ecx andl %ebx, %eax rorl $9, %ecx xorl %r13d, %ecx xorl %r14d, %eax vpxor %ymm9, %ymm8, %ymm9 # rnd_1: 6 - 6 rorl $11, %ecx addl %r12d, %r8d xorl %r13d, %ecx addl %eax, %r12d vpshufb %ymm12, %ymm9, %ymm9 # rnd_1: 7 - 7 rorl $2, %ecx movl %r8d, %edx addl %ecx, %r12d vpaddd %ymm4, %ymm9, %ymm2 # msg_sched done: 80-83 # msg_sched: 88-91 # rnd_0: 0 - 0 rorl $14, %edx vpalignr $4, %ymm3, %ymm0, %ymm5 vpalignr $4, %ymm1, %ymm2, %ymm4 # rnd_0: 1 - 2 movl %r13d, %eax movl %r9d, %ecx addl 352(%rsp), %r11d xorl %r10d, %ecx xorl %r8d, %edx andl %r8d, %ecx vpsrld $7, %ymm5, %ymm6 vpslld $25, %ymm5, %ymm7 # rnd_0: 3 - 4 rorl $5, %edx xorl %r10d, %ecx xorl %r8d, %edx addl %ecx, %r11d rorl $6, %edx xorl %r12d, %eax addl %edx, %r11d movl %r12d, %ecx vpsrld $18, %ymm5, %ymm8 vpslld $14, %ymm5, %ymm9 # rnd_0: 5 - 6 andl %eax, %ebx rorl $9, %ecx xorl %r12d, %ecx xorl %r13d, %ebx rorl $11, %ecx addl %r11d, %r15d xorl %r12d, %ecx addl %ebx, %r11d vpor %ymm6, %ymm7, %ymm6 vpor %ymm8, %ymm9, %ymm8 # rnd_0: 7 - 7 rorl $2, %ecx movl %r15d, %edx addl %ecx, %r11d # rnd_1: 0 - 1 rorl $14, %edx movl %r12d, %ebx movl %r8d, %ecx addl 356(%rsp), %r10d xorl %r9d, %ecx vpsrld $3, %ymm5, %ymm9 vpxor %ymm6, %ymm8, %ymm6 # rnd_1: 2 - 3 xorl %r15d, %edx andl %r15d, %ecx rorl $5, %edx xorl %r9d, %ecx xorl %r15d, %edx addl %ecx, %r10d vpxor %ymm6, %ymm9, %ymm5 vpshufd $0xfa, %ymm2, %ymm6 # rnd_1: 4 - 5 rorl $6, %edx xorl %r11d, %ebx addl %edx, %r10d movl %r11d, %ecx andl %ebx, %eax rorl $9, %ecx xorl %r11d, %ecx xorl %r12d, %eax vpsrld $10, %ymm6, %ymm8 vpsrlq $19, %ymm6, %ymm7 # rnd_1: 6 - 7 rorl $11, %ecx addl %r10d, %r14d xorl %r11d, %ecx addl %eax, %r10d rorl $2, %ecx movl %r14d, %edx addl %ecx, %r10d # rnd_0: 0 - 0 rorl $14, %edx vpsrlq $0x11, %ymm6, %ymm6 vpaddd %ymm3, %ymm4, %ymm4 # rnd_0: 1 - 3 movl %r11d, %eax movl %r15d, %ecx addl 360(%rsp), %r9d xorl %r8d, %ecx xorl %r14d, %edx andl %r14d, %ecx rorl $5, %edx xorl %r8d, %ecx xorl %r14d, %edx addl %ecx, %r9d vpxor %ymm6, %ymm7, %ymm6 vpaddd %ymm5, %ymm4, %ymm4 # rnd_0: 4 - 4 rorl $6, %edx xorl %r10d, %eax addl %edx, %r9d movl %r10d, %ecx vpxor %ymm6, %ymm8, %ymm8 # rnd_0: 5 - 5 andl %eax, %ebx rorl $9, %ecx xorl %r10d, %ecx xorl %r11d, %ebx vpshufb %ymm11, %ymm8, %ymm8 # rnd_0: 6 - 6 rorl $11, %ecx addl %r9d, %r13d xorl %r10d, %ecx addl %ebx, %r9d vpaddd %ymm8, %ymm4, %ymm4 # rnd_0: 7 - 7 rorl $2, %ecx movl %r13d, %edx addl %ecx, %r9d # rnd_1: 0 - 0 rorl $14, %edx vpshufd $0x50, %ymm4, %ymm6 # rnd_1: 1 - 1 movl %r10d, %ebx movl %r14d, %ecx addl 364(%rsp), %r8d xorl %r15d, %ecx vpsrlq $0x11, %ymm6, %ymm8 vpsrlq $19, %ymm6, %ymm7 # rnd_1: 2 - 3 xorl %r13d, %edx andl %r13d, %ecx rorl $5, %edx xorl %r15d, %ecx xorl %r13d, %edx addl %ecx, %r8d vpsrld $10, %ymm6, %ymm9 vpxor %ymm8, %ymm7, %ymm8 # rnd_1: 4 - 5 rorl $6, %edx xorl %r9d, %ebx addl %edx, %r8d movl %r9d, %ecx andl %ebx, %eax rorl $9, %ecx xorl %r9d, %ecx xorl %r10d, %eax vpxor %ymm9, %ymm8, %ymm9 # rnd_1: 6 - 6 rorl $11, %ecx addl %r8d, %r12d xorl %r9d, %ecx addl %eax, %r8d vpshufb %ymm12, %ymm9, %ymm9 # rnd_1: 7 - 7 rorl $2, %ecx movl %r12d, %edx addl %ecx, %r8d vpaddd %ymm4, %ymm9, %ymm3 # msg_sched done: 88-91 # set_w_k_xfer_4: 12 vpaddd 384+L_avx2_sha256_k(%rip), %ymm0, %ymm4 vpaddd 416+L_avx2_sha256_k(%rip), %ymm1, %ymm5 vmovdqu %ymm4, 384(%rsp) vmovdqu %ymm5, 416(%rsp) vpaddd 448+L_avx2_sha256_k(%rip), %ymm2, %ymm4 vpaddd 480+L_avx2_sha256_k(%rip), %ymm3, %ymm5 vmovdqu %ymm4, 448(%rsp) vmovdqu %ymm5, 480(%rsp) # rnd_all_4: 24-27 addl 384(%rsp), %r15d movl %r13d, %ecx movl %r9d, %eax xorl %r14d, %ecx rorl $14, %edx andl %r12d, %ecx xorl %r12d, %edx xorl %r14d, %ecx rorl $5, %edx addl %ecx, %r15d xorl %r12d, %edx xorl %r8d, %eax rorl $6, %edx movl %r8d, %ecx addl %edx, %r15d rorl $9, %ecx andl %eax, %ebx xorl %r8d, %ecx xorl %r9d, %ebx rorl $11, %ecx addl %r15d, %r11d xorl %r8d, %ecx addl %ebx, %r15d rorl $2, %ecx movl %r11d, %edx addl %ecx, %r15d addl 388(%rsp), %r14d movl %r12d, %ecx movl %r8d, %ebx xorl %r13d, %ecx rorl $14, %edx andl %r11d, %ecx xorl %r11d, %edx xorl %r13d, %ecx rorl $5, %edx addl %ecx, %r14d xorl %r11d, %edx xorl %r15d, %ebx rorl $6, %edx movl %r15d, %ecx addl %edx, %r14d rorl $9, %ecx andl %ebx, %eax xorl %r15d, %ecx xorl %r8d, %eax rorl $11, %ecx addl %r14d, %r10d xorl %r15d, %ecx addl %eax, %r14d rorl $2, %ecx movl %r10d, %edx addl %ecx, %r14d addl 392(%rsp), %r13d movl %r11d, %ecx movl %r15d, %eax xorl %r12d, %ecx rorl $14, %edx andl %r10d, %ecx xorl %r10d, %edx xorl %r12d, %ecx rorl $5, %edx addl %ecx, %r13d xorl %r10d, %edx xorl %r14d, %eax rorl $6, %edx movl %r14d, %ecx addl %edx, %r13d rorl $9, %ecx andl %eax, %ebx xorl %r14d, %ecx xorl %r15d, %ebx rorl $11, %ecx addl %r13d, %r9d xorl %r14d, %ecx addl %ebx, %r13d rorl $2, %ecx movl %r9d, %edx addl %ecx, %r13d addl 396(%rsp), %r12d movl %r10d, %ecx movl %r14d, %ebx xorl %r11d, %ecx rorl $14, %edx andl %r9d, %ecx xorl %r9d, %edx xorl %r11d, %ecx rorl $5, %edx addl %ecx, %r12d xorl %r9d, %edx xorl %r13d, %ebx rorl $6, %edx movl %r13d, %ecx addl %edx, %r12d rorl $9, %ecx andl %ebx, %eax xorl %r13d, %ecx xorl %r14d, %eax rorl $11, %ecx addl %r12d, %r8d xorl %r13d, %ecx addl %eax, %r12d rorl $2, %ecx movl %r8d, %edx addl %ecx, %r12d # rnd_all_4: 26-29 addl 416(%rsp), %r11d movl %r9d, %ecx movl %r13d, %eax xorl %r10d, %ecx rorl $14, %edx andl %r8d, %ecx xorl %r8d, %edx xorl %r10d, %ecx rorl $5, %edx addl %ecx, %r11d xorl %r8d, %edx xorl %r12d, %eax rorl $6, %edx movl %r12d, %ecx addl %edx, %r11d rorl $9, %ecx andl %eax, %ebx xorl %r12d, %ecx xorl %r13d, %ebx rorl $11, %ecx addl %r11d, %r15d xorl %r12d, %ecx addl %ebx, %r11d rorl $2, %ecx movl %r15d, %edx addl %ecx, %r11d addl 420(%rsp), %r10d movl %r8d, %ecx movl %r12d, %ebx xorl %r9d, %ecx rorl $14, %edx andl %r15d, %ecx xorl %r15d, %edx xorl %r9d, %ecx rorl $5, %edx addl %ecx, %r10d xorl %r15d, %edx xorl %r11d, %ebx rorl $6, %edx movl %r11d, %ecx addl %edx, %r10d rorl $9, %ecx andl %ebx, %eax xorl %r11d, %ecx xorl %r12d, %eax rorl $11, %ecx addl %r10d, %r14d xorl %r11d, %ecx addl %eax, %r10d rorl $2, %ecx movl %r14d, %edx addl %ecx, %r10d addl 424(%rsp), %r9d movl %r15d, %ecx movl %r11d, %eax xorl %r8d, %ecx rorl $14, %edx andl %r14d, %ecx xorl %r14d, %edx xorl %r8d, %ecx rorl $5, %edx addl %ecx, %r9d xorl %r14d, %edx xorl %r10d, %eax rorl $6, %edx movl %r10d, %ecx addl %edx, %r9d rorl $9, %ecx andl %eax, %ebx xorl %r10d, %ecx xorl %r11d, %ebx rorl $11, %ecx addl %r9d, %r13d xorl %r10d, %ecx addl %ebx, %r9d rorl $2, %ecx movl %r13d, %edx addl %ecx, %r9d addl 428(%rsp), %r8d movl %r14d, %ecx movl %r10d, %ebx xorl %r15d, %ecx rorl $14, %edx andl %r13d, %ecx xorl %r13d, %edx xorl %r15d, %ecx rorl $5, %edx addl %ecx, %r8d xorl %r13d, %edx xorl %r9d, %ebx rorl $6, %edx movl %r9d, %ecx addl %edx, %r8d rorl $9, %ecx andl %ebx, %eax xorl %r9d, %ecx xorl %r10d, %eax rorl $11, %ecx addl %r8d, %r12d xorl %r9d, %ecx addl %eax, %r8d rorl $2, %ecx movl %r12d, %edx addl %ecx, %r8d # rnd_all_4: 28-31 addl 448(%rsp), %r15d movl %r13d, %ecx movl %r9d, %eax xorl %r14d, %ecx rorl $14, %edx andl %r12d, %ecx xorl %r12d, %edx xorl %r14d, %ecx rorl $5, %edx addl %ecx, %r15d xorl %r12d, %edx xorl %r8d, %eax rorl $6, %edx movl %r8d, %ecx addl %edx, %r15d rorl $9, %ecx andl %eax, %ebx xorl %r8d, %ecx xorl %r9d, %ebx rorl $11, %ecx addl %r15d, %r11d xorl %r8d, %ecx addl %ebx, %r15d rorl $2, %ecx movl %r11d, %edx addl %ecx, %r15d addl 452(%rsp), %r14d movl %r12d, %ecx movl %r8d, %ebx xorl %r13d, %ecx rorl $14, %edx andl %r11d, %ecx xorl %r11d, %edx xorl %r13d, %ecx rorl $5, %edx addl %ecx, %r14d xorl %r11d, %edx xorl %r15d, %ebx rorl $6, %edx movl %r15d, %ecx addl %edx, %r14d rorl $9, %ecx andl %ebx, %eax xorl %r15d, %ecx xorl %r8d, %eax rorl $11, %ecx addl %r14d, %r10d xorl %r15d, %ecx addl %eax, %r14d rorl $2, %ecx movl %r10d, %edx addl %ecx, %r14d addl 456(%rsp), %r13d movl %r11d, %ecx movl %r15d, %eax xorl %r12d, %ecx rorl $14, %edx andl %r10d, %ecx xorl %r10d, %edx xorl %r12d, %ecx rorl $5, %edx addl %ecx, %r13d xorl %r10d, %edx xorl %r14d, %eax rorl $6, %edx movl %r14d, %ecx addl %edx, %r13d rorl $9, %ecx andl %eax, %ebx xorl %r14d, %ecx xorl %r15d, %ebx rorl $11, %ecx addl %r13d, %r9d xorl %r14d, %ecx addl %ebx, %r13d rorl $2, %ecx movl %r9d, %edx addl %ecx, %r13d addl 460(%rsp), %r12d movl %r10d, %ecx movl %r14d, %ebx xorl %r11d, %ecx rorl $14, %edx andl %r9d, %ecx xorl %r9d, %edx xorl %r11d, %ecx rorl $5, %edx addl %ecx, %r12d xorl %r9d, %edx xorl %r13d, %ebx rorl $6, %edx movl %r13d, %ecx addl %edx, %r12d rorl $9, %ecx andl %ebx, %eax xorl %r13d, %ecx xorl %r14d, %eax rorl $11, %ecx addl %r12d, %r8d xorl %r13d, %ecx addl %eax, %r12d rorl $2, %ecx movl %r8d, %edx addl %ecx, %r12d # rnd_all_4: 30-33 addl 480(%rsp), %r11d movl %r9d, %ecx movl %r13d, %eax xorl %r10d, %ecx rorl $14, %edx andl %r8d, %ecx xorl %r8d, %edx xorl %r10d, %ecx rorl $5, %edx addl %ecx, %r11d xorl %r8d, %edx xorl %r12d, %eax rorl $6, %edx movl %r12d, %ecx addl %edx, %r11d rorl $9, %ecx andl %eax, %ebx xorl %r12d, %ecx xorl %r13d, %ebx rorl $11, %ecx addl %r11d, %r15d xorl %r12d, %ecx addl %ebx, %r11d rorl $2, %ecx movl %r15d, %edx addl %ecx, %r11d addl 484(%rsp), %r10d movl %r8d, %ecx movl %r12d, %ebx xorl %r9d, %ecx rorl $14, %edx andl %r15d, %ecx xorl %r15d, %edx xorl %r9d, %ecx rorl $5, %edx addl %ecx, %r10d xorl %r15d, %edx xorl %r11d, %ebx rorl $6, %edx movl %r11d, %ecx addl %edx, %r10d rorl $9, %ecx andl %ebx, %eax xorl %r11d, %ecx xorl %r12d, %eax rorl $11, %ecx addl %r10d, %r14d xorl %r11d, %ecx addl %eax, %r10d rorl $2, %ecx movl %r14d, %edx addl %ecx, %r10d addl 488(%rsp), %r9d movl %r15d, %ecx movl %r11d, %eax xorl %r8d, %ecx rorl $14, %edx andl %r14d, %ecx xorl %r14d, %edx xorl %r8d, %ecx rorl $5, %edx addl %ecx, %r9d xorl %r14d, %edx xorl %r10d, %eax rorl $6, %edx movl %r10d, %ecx addl %edx, %r9d rorl $9, %ecx andl %eax, %ebx xorl %r10d, %ecx xorl %r11d, %ebx rorl $11, %ecx addl %r9d, %r13d xorl %r10d, %ecx addl %ebx, %r9d rorl $2, %ecx movl %r13d, %edx addl %ecx, %r9d addl 492(%rsp), %r8d movl %r14d, %ecx movl %r10d, %ebx xorl %r15d, %ecx rorl $14, %edx andl %r13d, %ecx xorl %r13d, %edx xorl %r15d, %ecx rorl $5, %edx addl %ecx, %r8d xorl %r13d, %edx xorl %r9d, %ebx rorl $6, %edx movl %r9d, %ecx addl %edx, %r8d rorl $9, %ecx andl %ebx, %eax xorl %r9d, %ecx xorl %r10d, %eax rorl $11, %ecx addl %r8d, %r12d xorl %r9d, %ecx addl %eax, %r8d rorl $2, %ecx movl %r12d, %edx addl %ecx, %r8d addl %r8d, (%rdi) addl %r9d, 4(%rdi) addl %r10d, 8(%rdi) addl %r11d, 12(%rdi) addl %r12d, 16(%rdi) addl %r13d, 20(%rdi) addl %r14d, 24(%rdi) addl %r15d, 28(%rdi) xorq %rax, %rax vzeroupper addq $0x200, %rsp popq %r15 popq %r14 popq %r13 popq %r12 popq %rbx repz retq #ifndef __APPLE__ .size Transform_Sha256_AVX2,.-Transform_Sha256_AVX2 #endif /* __APPLE__ */ #ifndef __APPLE__ .text .globl Transform_Sha256_AVX2_Len .type Transform_Sha256_AVX2_Len,@function .align 16 Transform_Sha256_AVX2_Len: #else .section __TEXT,__text .globl _Transform_Sha256_AVX2_Len .p2align 4 _Transform_Sha256_AVX2_Len: #endif /* __APPLE__ */ pushq %rbx pushq %r12 pushq %r13 pushq %r14 pushq %r15 pushq %rbp movq %rsi, %rbp movq %rdx, %rsi subq $0x200, %rsp testb $0x40, %sil je L_sha256_len_avx2_block vmovdqu (%rbp), %ymm0 vmovdqu 32(%rbp), %ymm1 vmovups %ymm0, 32(%rdi) vmovups %ymm1, 64(%rdi) #ifndef __APPLE__ call Transform_Sha256_AVX2@plt #else call _Transform_Sha256_AVX2 #endif /* __APPLE__ */ addq $0x40, %rbp subl $0x40, %esi jz L_sha256_len_avx2_done L_sha256_len_avx2_block: vmovdqa L_avx2_sha256_flip_mask(%rip), %ymm13 vmovdqa L_avx2_sha256_shuf_00BA(%rip), %ymm11 vmovdqa L_avx2_sha256_shuf_DC00(%rip), %ymm12 movl (%rdi), %r8d movl 4(%rdi), %r9d movl 8(%rdi), %r10d movl 12(%rdi), %r11d movl 16(%rdi), %r12d movl 20(%rdi), %r13d movl 24(%rdi), %r14d movl 28(%rdi), %r15d # Start of loop processing two blocks L_sha256_len_avx2_start: # X0, X1, X2, X3 = W[0..15] vmovdqu (%rbp), %xmm0 vmovdqu 16(%rbp), %xmm1 vmovdqu 64(%rbp), %xmm4 vmovdqu 80(%rbp), %xmm5 vinserti128 $0x01, %xmm4, %ymm0, %ymm0 vinserti128 $0x01, %xmm5, %ymm1, %ymm1 vpshufb %ymm13, %ymm0, %ymm0 vpshufb %ymm13, %ymm1, %ymm1 vmovdqu 32(%rbp), %xmm2 vmovdqu 48(%rbp), %xmm3 vmovdqu 96(%rbp), %xmm6 vmovdqu 112(%rbp), %xmm7 vinserti128 $0x01, %xmm6, %ymm2, %ymm2 vinserti128 $0x01, %xmm7, %ymm3, %ymm3 vpshufb %ymm13, %ymm2, %ymm2 vpshufb %ymm13, %ymm3, %ymm3 movl %r9d, %ebx movl %r12d, %edx xorl %r10d, %ebx # set_w_k_xfer_4: 0 vpaddd 0+L_avx2_sha256_k(%rip), %ymm0, %ymm4 vpaddd 32+L_avx2_sha256_k(%rip), %ymm1, %ymm5 vmovdqu %ymm4, (%rsp) vmovdqu %ymm5, 32(%rsp) vpaddd 64+L_avx2_sha256_k(%rip), %ymm2, %ymm4 vpaddd 96+L_avx2_sha256_k(%rip), %ymm3, %ymm5 vmovdqu %ymm4, 64(%rsp) vmovdqu %ymm5, 96(%rsp) # msg_sched: 0-3 # rnd_0: 0 - 0 rorl $14, %edx vpalignr $4, %ymm0, %ymm1, %ymm5 vpalignr $4, %ymm2, %ymm3, %ymm4 # rnd_0: 1 - 2 movl %r9d, %eax movl %r13d, %ecx addl (%rsp), %r15d xorl %r14d, %ecx xorl %r12d, %edx andl %r12d, %ecx vpsrld $7, %ymm5, %ymm6 vpslld $25, %ymm5, %ymm7 # rnd_0: 3 - 4 rorl $5, %edx xorl %r14d, %ecx xorl %r12d, %edx addl %ecx, %r15d rorl $6, %edx xorl %r8d, %eax addl %edx, %r15d movl %r8d, %ecx vpsrld $18, %ymm5, %ymm8 vpslld $14, %ymm5, %ymm9 # rnd_0: 5 - 6 andl %eax, %ebx rorl $9, %ecx xorl %r8d, %ecx xorl %r9d, %ebx rorl $11, %ecx addl %r15d, %r11d xorl %r8d, %ecx addl %ebx, %r15d vpor %ymm6, %ymm7, %ymm6 vpor %ymm8, %ymm9, %ymm8 # rnd_0: 7 - 7 rorl $2, %ecx movl %r11d, %edx addl %ecx, %r15d # rnd_1: 0 - 1 rorl $14, %edx movl %r8d, %ebx movl %r12d, %ecx addl 4(%rsp), %r14d xorl %r13d, %ecx vpsrld $3, %ymm5, %ymm9 vpxor %ymm6, %ymm8, %ymm6 # rnd_1: 2 - 3 xorl %r11d, %edx andl %r11d, %ecx rorl $5, %edx xorl %r13d, %ecx xorl %r11d, %edx addl %ecx, %r14d vpxor %ymm6, %ymm9, %ymm5 vpshufd $0xfa, %ymm3, %ymm6 # rnd_1: 4 - 5 rorl $6, %edx xorl %r15d, %ebx addl %edx, %r14d movl %r15d, %ecx andl %ebx, %eax rorl $9, %ecx xorl %r15d, %ecx xorl %r8d, %eax vpsrld $10, %ymm6, %ymm8 vpsrlq $19, %ymm6, %ymm7 # rnd_1: 6 - 7 rorl $11, %ecx addl %r14d, %r10d xorl %r15d, %ecx addl %eax, %r14d rorl $2, %ecx movl %r10d, %edx addl %ecx, %r14d # rnd_0: 0 - 0 rorl $14, %edx vpsrlq $0x11, %ymm6, %ymm6 vpaddd %ymm0, %ymm4, %ymm4 # rnd_0: 1 - 3 movl %r15d, %eax movl %r11d, %ecx addl 8(%rsp), %r13d xorl %r12d, %ecx xorl %r10d, %edx andl %r10d, %ecx rorl $5, %edx xorl %r12d, %ecx xorl %r10d, %edx addl %ecx, %r13d vpxor %ymm6, %ymm7, %ymm6 vpaddd %ymm5, %ymm4, %ymm4 # rnd_0: 4 - 4 rorl $6, %edx xorl %r14d, %eax addl %edx, %r13d movl %r14d, %ecx vpxor %ymm6, %ymm8, %ymm8 # rnd_0: 5 - 5 andl %eax, %ebx rorl $9, %ecx xorl %r14d, %ecx xorl %r15d, %ebx vpshufb %ymm11, %ymm8, %ymm8 # rnd_0: 6 - 6 rorl $11, %ecx addl %r13d, %r9d xorl %r14d, %ecx addl %ebx, %r13d vpaddd %ymm8, %ymm4, %ymm4 # rnd_0: 7 - 7 rorl $2, %ecx movl %r9d, %edx addl %ecx, %r13d # rnd_1: 0 - 0 rorl $14, %edx vpshufd $0x50, %ymm4, %ymm6 # rnd_1: 1 - 1 movl %r14d, %ebx movl %r10d, %ecx addl 12(%rsp), %r12d xorl %r11d, %ecx vpsrlq $0x11, %ymm6, %ymm8 vpsrlq $19, %ymm6, %ymm7 # rnd_1: 2 - 3 xorl %r9d, %edx andl %r9d, %ecx rorl $5, %edx xorl %r11d, %ecx xorl %r9d, %edx addl %ecx, %r12d vpsrld $10, %ymm6, %ymm9 vpxor %ymm8, %ymm7, %ymm8 # rnd_1: 4 - 5 rorl $6, %edx xorl %r13d, %ebx addl %edx, %r12d movl %r13d, %ecx andl %ebx, %eax rorl $9, %ecx xorl %r13d, %ecx xorl %r14d, %eax vpxor %ymm9, %ymm8, %ymm9 # rnd_1: 6 - 6 rorl $11, %ecx addl %r12d, %r8d xorl %r13d, %ecx addl %eax, %r12d vpshufb %ymm12, %ymm9, %ymm9 # rnd_1: 7 - 7 rorl $2, %ecx movl %r8d, %edx addl %ecx, %r12d vpaddd %ymm4, %ymm9, %ymm0 # msg_sched done: 0-3 # msg_sched: 8-11 # rnd_0: 0 - 0 rorl $14, %edx vpalignr $4, %ymm1, %ymm2, %ymm5 vpalignr $4, %ymm3, %ymm0, %ymm4 # rnd_0: 1 - 2 movl %r13d, %eax movl %r9d, %ecx addl 32(%rsp), %r11d xorl %r10d, %ecx xorl %r8d, %edx andl %r8d, %ecx vpsrld $7, %ymm5, %ymm6 vpslld $25, %ymm5, %ymm7 # rnd_0: 3 - 4 rorl $5, %edx xorl %r10d, %ecx xorl %r8d, %edx addl %ecx, %r11d rorl $6, %edx xorl %r12d, %eax addl %edx, %r11d movl %r12d, %ecx vpsrld $18, %ymm5, %ymm8 vpslld $14, %ymm5, %ymm9 # rnd_0: 5 - 6 andl %eax, %ebx rorl $9, %ecx xorl %r12d, %ecx xorl %r13d, %ebx rorl $11, %ecx addl %r11d, %r15d xorl %r12d, %ecx addl %ebx, %r11d vpor %ymm6, %ymm7, %ymm6 vpor %ymm8, %ymm9, %ymm8 # rnd_0: 7 - 7 rorl $2, %ecx movl %r15d, %edx addl %ecx, %r11d # rnd_1: 0 - 1 rorl $14, %edx movl %r12d, %ebx movl %r8d, %ecx addl 36(%rsp), %r10d xorl %r9d, %ecx vpsrld $3, %ymm5, %ymm9 vpxor %ymm6, %ymm8, %ymm6 # rnd_1: 2 - 3 xorl %r15d, %edx andl %r15d, %ecx rorl $5, %edx xorl %r9d, %ecx xorl %r15d, %edx addl %ecx, %r10d vpxor %ymm6, %ymm9, %ymm5 vpshufd $0xfa, %ymm0, %ymm6 # rnd_1: 4 - 5 rorl $6, %edx xorl %r11d, %ebx addl %edx, %r10d movl %r11d, %ecx andl %ebx, %eax rorl $9, %ecx xorl %r11d, %ecx xorl %r12d, %eax vpsrld $10, %ymm6, %ymm8 vpsrlq $19, %ymm6, %ymm7 # rnd_1: 6 - 7 rorl $11, %ecx addl %r10d, %r14d xorl %r11d, %ecx addl %eax, %r10d rorl $2, %ecx movl %r14d, %edx addl %ecx, %r10d # rnd_0: 0 - 0 rorl $14, %edx vpsrlq $0x11, %ymm6, %ymm6 vpaddd %ymm1, %ymm4, %ymm4 # rnd_0: 1 - 3 movl %r11d, %eax movl %r15d, %ecx addl 40(%rsp), %r9d xorl %r8d, %ecx xorl %r14d, %edx andl %r14d, %ecx rorl $5, %edx xorl %r8d, %ecx xorl %r14d, %edx addl %ecx, %r9d vpxor %ymm6, %ymm7, %ymm6 vpaddd %ymm5, %ymm4, %ymm4 # rnd_0: 4 - 4 rorl $6, %edx xorl %r10d, %eax addl %edx, %r9d movl %r10d, %ecx vpxor %ymm6, %ymm8, %ymm8 # rnd_0: 5 - 5 andl %eax, %ebx rorl $9, %ecx xorl %r10d, %ecx xorl %r11d, %ebx vpshufb %ymm11, %ymm8, %ymm8 # rnd_0: 6 - 6 rorl $11, %ecx addl %r9d, %r13d xorl %r10d, %ecx addl %ebx, %r9d vpaddd %ymm8, %ymm4, %ymm4 # rnd_0: 7 - 7 rorl $2, %ecx movl %r13d, %edx addl %ecx, %r9d # rnd_1: 0 - 0 rorl $14, %edx vpshufd $0x50, %ymm4, %ymm6 # rnd_1: 1 - 1 movl %r10d, %ebx movl %r14d, %ecx addl 44(%rsp), %r8d xorl %r15d, %ecx vpsrlq $0x11, %ymm6, %ymm8 vpsrlq $19, %ymm6, %ymm7 # rnd_1: 2 - 3 xorl %r13d, %edx andl %r13d, %ecx rorl $5, %edx xorl %r15d, %ecx xorl %r13d, %edx addl %ecx, %r8d vpsrld $10, %ymm6, %ymm9 vpxor %ymm8, %ymm7, %ymm8 # rnd_1: 4 - 5 rorl $6, %edx xorl %r9d, %ebx addl %edx, %r8d movl %r9d, %ecx andl %ebx, %eax rorl $9, %ecx xorl %r9d, %ecx xorl %r10d, %eax vpxor %ymm9, %ymm8, %ymm9 # rnd_1: 6 - 6 rorl $11, %ecx addl %r8d, %r12d xorl %r9d, %ecx addl %eax, %r8d vpshufb %ymm12, %ymm9, %ymm9 # rnd_1: 7 - 7 rorl $2, %ecx movl %r12d, %edx addl %ecx, %r8d vpaddd %ymm4, %ymm9, %ymm1 # msg_sched done: 8-11 # msg_sched: 16-19 # rnd_0: 0 - 0 rorl $14, %edx vpalignr $4, %ymm2, %ymm3, %ymm5 vpalignr $4, %ymm0, %ymm1, %ymm4 # rnd_0: 1 - 2 movl %r9d, %eax movl %r13d, %ecx addl 64(%rsp), %r15d xorl %r14d, %ecx xorl %r12d, %edx andl %r12d, %ecx vpsrld $7, %ymm5, %ymm6 vpslld $25, %ymm5, %ymm7 # rnd_0: 3 - 4 rorl $5, %edx xorl %r14d, %ecx xorl %r12d, %edx addl %ecx, %r15d rorl $6, %edx xorl %r8d, %eax addl %edx, %r15d movl %r8d, %ecx vpsrld $18, %ymm5, %ymm8 vpslld $14, %ymm5, %ymm9 # rnd_0: 5 - 6 andl %eax, %ebx rorl $9, %ecx xorl %r8d, %ecx xorl %r9d, %ebx rorl $11, %ecx addl %r15d, %r11d xorl %r8d, %ecx addl %ebx, %r15d vpor %ymm6, %ymm7, %ymm6 vpor %ymm8, %ymm9, %ymm8 # rnd_0: 7 - 7 rorl $2, %ecx movl %r11d, %edx addl %ecx, %r15d # rnd_1: 0 - 1 rorl $14, %edx movl %r8d, %ebx movl %r12d, %ecx addl 68(%rsp), %r14d xorl %r13d, %ecx vpsrld $3, %ymm5, %ymm9 vpxor %ymm6, %ymm8, %ymm6 # rnd_1: 2 - 3 xorl %r11d, %edx andl %r11d, %ecx rorl $5, %edx xorl %r13d, %ecx xorl %r11d, %edx addl %ecx, %r14d vpxor %ymm6, %ymm9, %ymm5 vpshufd $0xfa, %ymm1, %ymm6 # rnd_1: 4 - 5 rorl $6, %edx xorl %r15d, %ebx addl %edx, %r14d movl %r15d, %ecx andl %ebx, %eax rorl $9, %ecx xorl %r15d, %ecx xorl %r8d, %eax vpsrld $10, %ymm6, %ymm8 vpsrlq $19, %ymm6, %ymm7 # rnd_1: 6 - 7 rorl $11, %ecx addl %r14d, %r10d xorl %r15d, %ecx addl %eax, %r14d rorl $2, %ecx movl %r10d, %edx addl %ecx, %r14d # rnd_0: 0 - 0 rorl $14, %edx vpsrlq $0x11, %ymm6, %ymm6 vpaddd %ymm2, %ymm4, %ymm4 # rnd_0: 1 - 3 movl %r15d, %eax movl %r11d, %ecx addl 72(%rsp), %r13d xorl %r12d, %ecx xorl %r10d, %edx andl %r10d, %ecx rorl $5, %edx xorl %r12d, %ecx xorl %r10d, %edx addl %ecx, %r13d vpxor %ymm6, %ymm7, %ymm6 vpaddd %ymm5, %ymm4, %ymm4 # rnd_0: 4 - 4 rorl $6, %edx xorl %r14d, %eax addl %edx, %r13d movl %r14d, %ecx vpxor %ymm6, %ymm8, %ymm8 # rnd_0: 5 - 5 andl %eax, %ebx rorl $9, %ecx xorl %r14d, %ecx xorl %r15d, %ebx vpshufb %ymm11, %ymm8, %ymm8 # rnd_0: 6 - 6 rorl $11, %ecx addl %r13d, %r9d xorl %r14d, %ecx addl %ebx, %r13d vpaddd %ymm8, %ymm4, %ymm4 # rnd_0: 7 - 7 rorl $2, %ecx movl %r9d, %edx addl %ecx, %r13d # rnd_1: 0 - 0 rorl $14, %edx vpshufd $0x50, %ymm4, %ymm6 # rnd_1: 1 - 1 movl %r14d, %ebx movl %r10d, %ecx addl 76(%rsp), %r12d xorl %r11d, %ecx vpsrlq $0x11, %ymm6, %ymm8 vpsrlq $19, %ymm6, %ymm7 # rnd_1: 2 - 3 xorl %r9d, %edx andl %r9d, %ecx rorl $5, %edx xorl %r11d, %ecx xorl %r9d, %edx addl %ecx, %r12d vpsrld $10, %ymm6, %ymm9 vpxor %ymm8, %ymm7, %ymm8 # rnd_1: 4 - 5 rorl $6, %edx xorl %r13d, %ebx addl %edx, %r12d movl %r13d, %ecx andl %ebx, %eax rorl $9, %ecx xorl %r13d, %ecx xorl %r14d, %eax vpxor %ymm9, %ymm8, %ymm9 # rnd_1: 6 - 6 rorl $11, %ecx addl %r12d, %r8d xorl %r13d, %ecx addl %eax, %r12d vpshufb %ymm12, %ymm9, %ymm9 # rnd_1: 7 - 7 rorl $2, %ecx movl %r8d, %edx addl %ecx, %r12d vpaddd %ymm4, %ymm9, %ymm2 # msg_sched done: 16-19 # msg_sched: 24-27 # rnd_0: 0 - 0 rorl $14, %edx vpalignr $4, %ymm3, %ymm0, %ymm5 vpalignr $4, %ymm1, %ymm2, %ymm4 # rnd_0: 1 - 2 movl %r13d, %eax movl %r9d, %ecx addl 96(%rsp), %r11d xorl %r10d, %ecx xorl %r8d, %edx andl %r8d, %ecx vpsrld $7, %ymm5, %ymm6 vpslld $25, %ymm5, %ymm7 # rnd_0: 3 - 4 rorl $5, %edx xorl %r10d, %ecx xorl %r8d, %edx addl %ecx, %r11d rorl $6, %edx xorl %r12d, %eax addl %edx, %r11d movl %r12d, %ecx vpsrld $18, %ymm5, %ymm8 vpslld $14, %ymm5, %ymm9 # rnd_0: 5 - 6 andl %eax, %ebx rorl $9, %ecx xorl %r12d, %ecx xorl %r13d, %ebx rorl $11, %ecx addl %r11d, %r15d xorl %r12d, %ecx addl %ebx, %r11d vpor %ymm6, %ymm7, %ymm6 vpor %ymm8, %ymm9, %ymm8 # rnd_0: 7 - 7 rorl $2, %ecx movl %r15d, %edx addl %ecx, %r11d # rnd_1: 0 - 1 rorl $14, %edx movl %r12d, %ebx movl %r8d, %ecx addl 100(%rsp), %r10d xorl %r9d, %ecx vpsrld $3, %ymm5, %ymm9 vpxor %ymm6, %ymm8, %ymm6 # rnd_1: 2 - 3 xorl %r15d, %edx andl %r15d, %ecx rorl $5, %edx xorl %r9d, %ecx xorl %r15d, %edx addl %ecx, %r10d vpxor %ymm6, %ymm9, %ymm5 vpshufd $0xfa, %ymm2, %ymm6 # rnd_1: 4 - 5 rorl $6, %edx xorl %r11d, %ebx addl %edx, %r10d movl %r11d, %ecx andl %ebx, %eax rorl $9, %ecx xorl %r11d, %ecx xorl %r12d, %eax vpsrld $10, %ymm6, %ymm8 vpsrlq $19, %ymm6, %ymm7 # rnd_1: 6 - 7 rorl $11, %ecx addl %r10d, %r14d xorl %r11d, %ecx addl %eax, %r10d rorl $2, %ecx movl %r14d, %edx addl %ecx, %r10d # rnd_0: 0 - 0 rorl $14, %edx vpsrlq $0x11, %ymm6, %ymm6 vpaddd %ymm3, %ymm4, %ymm4 # rnd_0: 1 - 3 movl %r11d, %eax movl %r15d, %ecx addl 104(%rsp), %r9d xorl %r8d, %ecx xorl %r14d, %edx andl %r14d, %ecx rorl $5, %edx xorl %r8d, %ecx xorl %r14d, %edx addl %ecx, %r9d vpxor %ymm6, %ymm7, %ymm6 vpaddd %ymm5, %ymm4, %ymm4 # rnd_0: 4 - 4 rorl $6, %edx xorl %r10d, %eax addl %edx, %r9d movl %r10d, %ecx vpxor %ymm6, %ymm8, %ymm8 # rnd_0: 5 - 5 andl %eax, %ebx rorl $9, %ecx xorl %r10d, %ecx xorl %r11d, %ebx vpshufb %ymm11, %ymm8, %ymm8 # rnd_0: 6 - 6 rorl $11, %ecx addl %r9d, %r13d xorl %r10d, %ecx addl %ebx, %r9d vpaddd %ymm8, %ymm4, %ymm4 # rnd_0: 7 - 7 rorl $2, %ecx movl %r13d, %edx addl %ecx, %r9d # rnd_1: 0 - 0 rorl $14, %edx vpshufd $0x50, %ymm4, %ymm6 # rnd_1: 1 - 1 movl %r10d, %ebx movl %r14d, %ecx addl 108(%rsp), %r8d xorl %r15d, %ecx vpsrlq $0x11, %ymm6, %ymm8 vpsrlq $19, %ymm6, %ymm7 # rnd_1: 2 - 3 xorl %r13d, %edx andl %r13d, %ecx rorl $5, %edx xorl %r15d, %ecx xorl %r13d, %edx addl %ecx, %r8d vpsrld $10, %ymm6, %ymm9 vpxor %ymm8, %ymm7, %ymm8 # rnd_1: 4 - 5 rorl $6, %edx xorl %r9d, %ebx addl %edx, %r8d movl %r9d, %ecx andl %ebx, %eax rorl $9, %ecx xorl %r9d, %ecx xorl %r10d, %eax vpxor %ymm9, %ymm8, %ymm9 # rnd_1: 6 - 6 rorl $11, %ecx addl %r8d, %r12d xorl %r9d, %ecx addl %eax, %r8d vpshufb %ymm12, %ymm9, %ymm9 # rnd_1: 7 - 7 rorl $2, %ecx movl %r12d, %edx addl %ecx, %r8d vpaddd %ymm4, %ymm9, %ymm3 # msg_sched done: 24-27 # set_w_k_xfer_4: 4 vpaddd 128+L_avx2_sha256_k(%rip), %ymm0, %ymm4 vpaddd 160+L_avx2_sha256_k(%rip), %ymm1, %ymm5 vmovdqu %ymm4, 128(%rsp) vmovdqu %ymm5, 160(%rsp) vpaddd 192+L_avx2_sha256_k(%rip), %ymm2, %ymm4 vpaddd 224+L_avx2_sha256_k(%rip), %ymm3, %ymm5 vmovdqu %ymm4, 192(%rsp) vmovdqu %ymm5, 224(%rsp) # msg_sched: 32-35 # rnd_0: 0 - 0 rorl $14, %edx vpalignr $4, %ymm0, %ymm1, %ymm5 vpalignr $4, %ymm2, %ymm3, %ymm4 # rnd_0: 1 - 2 movl %r9d, %eax movl %r13d, %ecx addl 128(%rsp), %r15d xorl %r14d, %ecx xorl %r12d, %edx andl %r12d, %ecx vpsrld $7, %ymm5, %ymm6 vpslld $25, %ymm5, %ymm7 # rnd_0: 3 - 4 rorl $5, %edx xorl %r14d, %ecx xorl %r12d, %edx addl %ecx, %r15d rorl $6, %edx xorl %r8d, %eax addl %edx, %r15d movl %r8d, %ecx vpsrld $18, %ymm5, %ymm8 vpslld $14, %ymm5, %ymm9 # rnd_0: 5 - 6 andl %eax, %ebx rorl $9, %ecx xorl %r8d, %ecx xorl %r9d, %ebx rorl $11, %ecx addl %r15d, %r11d xorl %r8d, %ecx addl %ebx, %r15d vpor %ymm6, %ymm7, %ymm6 vpor %ymm8, %ymm9, %ymm8 # rnd_0: 7 - 7 rorl $2, %ecx movl %r11d, %edx addl %ecx, %r15d # rnd_1: 0 - 1 rorl $14, %edx movl %r8d, %ebx movl %r12d, %ecx addl 132(%rsp), %r14d xorl %r13d, %ecx vpsrld $3, %ymm5, %ymm9 vpxor %ymm6, %ymm8, %ymm6 # rnd_1: 2 - 3 xorl %r11d, %edx andl %r11d, %ecx rorl $5, %edx xorl %r13d, %ecx xorl %r11d, %edx addl %ecx, %r14d vpxor %ymm6, %ymm9, %ymm5 vpshufd $0xfa, %ymm3, %ymm6 # rnd_1: 4 - 5 rorl $6, %edx xorl %r15d, %ebx addl %edx, %r14d movl %r15d, %ecx andl %ebx, %eax rorl $9, %ecx xorl %r15d, %ecx xorl %r8d, %eax vpsrld $10, %ymm6, %ymm8 vpsrlq $19, %ymm6, %ymm7 # rnd_1: 6 - 7 rorl $11, %ecx addl %r14d, %r10d xorl %r15d, %ecx addl %eax, %r14d rorl $2, %ecx movl %r10d, %edx addl %ecx, %r14d # rnd_0: 0 - 0 rorl $14, %edx vpsrlq $0x11, %ymm6, %ymm6 vpaddd %ymm0, %ymm4, %ymm4 # rnd_0: 1 - 3 movl %r15d, %eax movl %r11d, %ecx addl 136(%rsp), %r13d xorl %r12d, %ecx xorl %r10d, %edx andl %r10d, %ecx rorl $5, %edx xorl %r12d, %ecx xorl %r10d, %edx addl %ecx, %r13d vpxor %ymm6, %ymm7, %ymm6 vpaddd %ymm5, %ymm4, %ymm4 # rnd_0: 4 - 4 rorl $6, %edx xorl %r14d, %eax addl %edx, %r13d movl %r14d, %ecx vpxor %ymm6, %ymm8, %ymm8 # rnd_0: 5 - 5 andl %eax, %ebx rorl $9, %ecx xorl %r14d, %ecx xorl %r15d, %ebx vpshufb %ymm11, %ymm8, %ymm8 # rnd_0: 6 - 6 rorl $11, %ecx addl %r13d, %r9d xorl %r14d, %ecx addl %ebx, %r13d vpaddd %ymm8, %ymm4, %ymm4 # rnd_0: 7 - 7 rorl $2, %ecx movl %r9d, %edx addl %ecx, %r13d # rnd_1: 0 - 0 rorl $14, %edx vpshufd $0x50, %ymm4, %ymm6 # rnd_1: 1 - 1 movl %r14d, %ebx movl %r10d, %ecx addl 140(%rsp), %r12d xorl %r11d, %ecx vpsrlq $0x11, %ymm6, %ymm8 vpsrlq $19, %ymm6, %ymm7 # rnd_1: 2 - 3 xorl %r9d, %edx andl %r9d, %ecx rorl $5, %edx xorl %r11d, %ecx xorl %r9d, %edx addl %ecx, %r12d vpsrld $10, %ymm6, %ymm9 vpxor %ymm8, %ymm7, %ymm8 # rnd_1: 4 - 5 rorl $6, %edx xorl %r13d, %ebx addl %edx, %r12d movl %r13d, %ecx andl %ebx, %eax rorl $9, %ecx xorl %r13d, %ecx xorl %r14d, %eax vpxor %ymm9, %ymm8, %ymm9 # rnd_1: 6 - 6 rorl $11, %ecx addl %r12d, %r8d xorl %r13d, %ecx addl %eax, %r12d vpshufb %ymm12, %ymm9, %ymm9 # rnd_1: 7 - 7 rorl $2, %ecx movl %r8d, %edx addl %ecx, %r12d vpaddd %ymm4, %ymm9, %ymm0 # msg_sched done: 32-35 # msg_sched: 40-43 # rnd_0: 0 - 0 rorl $14, %edx vpalignr $4, %ymm1, %ymm2, %ymm5 vpalignr $4, %ymm3, %ymm0, %ymm4 # rnd_0: 1 - 2 movl %r13d, %eax movl %r9d, %ecx addl 160(%rsp), %r11d xorl %r10d, %ecx xorl %r8d, %edx andl %r8d, %ecx vpsrld $7, %ymm5, %ymm6 vpslld $25, %ymm5, %ymm7 # rnd_0: 3 - 4 rorl $5, %edx xorl %r10d, %ecx xorl %r8d, %edx addl %ecx, %r11d rorl $6, %edx xorl %r12d, %eax addl %edx, %r11d movl %r12d, %ecx vpsrld $18, %ymm5, %ymm8 vpslld $14, %ymm5, %ymm9 # rnd_0: 5 - 6 andl %eax, %ebx rorl $9, %ecx xorl %r12d, %ecx xorl %r13d, %ebx rorl $11, %ecx addl %r11d, %r15d xorl %r12d, %ecx addl %ebx, %r11d vpor %ymm6, %ymm7, %ymm6 vpor %ymm8, %ymm9, %ymm8 # rnd_0: 7 - 7 rorl $2, %ecx movl %r15d, %edx addl %ecx, %r11d # rnd_1: 0 - 1 rorl $14, %edx movl %r12d, %ebx movl %r8d, %ecx addl 164(%rsp), %r10d xorl %r9d, %ecx vpsrld $3, %ymm5, %ymm9 vpxor %ymm6, %ymm8, %ymm6 # rnd_1: 2 - 3 xorl %r15d, %edx andl %r15d, %ecx rorl $5, %edx xorl %r9d, %ecx xorl %r15d, %edx addl %ecx, %r10d vpxor %ymm6, %ymm9, %ymm5 vpshufd $0xfa, %ymm0, %ymm6 # rnd_1: 4 - 5 rorl $6, %edx xorl %r11d, %ebx addl %edx, %r10d movl %r11d, %ecx andl %ebx, %eax rorl $9, %ecx xorl %r11d, %ecx xorl %r12d, %eax vpsrld $10, %ymm6, %ymm8 vpsrlq $19, %ymm6, %ymm7 # rnd_1: 6 - 7 rorl $11, %ecx addl %r10d, %r14d xorl %r11d, %ecx addl %eax, %r10d rorl $2, %ecx movl %r14d, %edx addl %ecx, %r10d # rnd_0: 0 - 0 rorl $14, %edx vpsrlq $0x11, %ymm6, %ymm6 vpaddd %ymm1, %ymm4, %ymm4 # rnd_0: 1 - 3 movl %r11d, %eax movl %r15d, %ecx addl 168(%rsp), %r9d xorl %r8d, %ecx xorl %r14d, %edx andl %r14d, %ecx rorl $5, %edx xorl %r8d, %ecx xorl %r14d, %edx addl %ecx, %r9d vpxor %ymm6, %ymm7, %ymm6 vpaddd %ymm5, %ymm4, %ymm4 # rnd_0: 4 - 4 rorl $6, %edx xorl %r10d, %eax addl %edx, %r9d movl %r10d, %ecx vpxor %ymm6, %ymm8, %ymm8 # rnd_0: 5 - 5 andl %eax, %ebx rorl $9, %ecx xorl %r10d, %ecx xorl %r11d, %ebx vpshufb %ymm11, %ymm8, %ymm8 # rnd_0: 6 - 6 rorl $11, %ecx addl %r9d, %r13d xorl %r10d, %ecx addl %ebx, %r9d vpaddd %ymm8, %ymm4, %ymm4 # rnd_0: 7 - 7 rorl $2, %ecx movl %r13d, %edx addl %ecx, %r9d # rnd_1: 0 - 0 rorl $14, %edx vpshufd $0x50, %ymm4, %ymm6 # rnd_1: 1 - 1 movl %r10d, %ebx movl %r14d, %ecx addl 172(%rsp), %r8d xorl %r15d, %ecx vpsrlq $0x11, %ymm6, %ymm8 vpsrlq $19, %ymm6, %ymm7 # rnd_1: 2 - 3 xorl %r13d, %edx andl %r13d, %ecx rorl $5, %edx xorl %r15d, %ecx xorl %r13d, %edx addl %ecx, %r8d vpsrld $10, %ymm6, %ymm9 vpxor %ymm8, %ymm7, %ymm8 # rnd_1: 4 - 5 rorl $6, %edx xorl %r9d, %ebx addl %edx, %r8d movl %r9d, %ecx andl %ebx, %eax rorl $9, %ecx xorl %r9d, %ecx xorl %r10d, %eax vpxor %ymm9, %ymm8, %ymm9 # rnd_1: 6 - 6 rorl $11, %ecx addl %r8d, %r12d xorl %r9d, %ecx addl %eax, %r8d vpshufb %ymm12, %ymm9, %ymm9 # rnd_1: 7 - 7 rorl $2, %ecx movl %r12d, %edx addl %ecx, %r8d vpaddd %ymm4, %ymm9, %ymm1 # msg_sched done: 40-43 # msg_sched: 48-51 # rnd_0: 0 - 0 rorl $14, %edx vpalignr $4, %ymm2, %ymm3, %ymm5 vpalignr $4, %ymm0, %ymm1, %ymm4 # rnd_0: 1 - 2 movl %r9d, %eax movl %r13d, %ecx addl 192(%rsp), %r15d xorl %r14d, %ecx xorl %r12d, %edx andl %r12d, %ecx vpsrld $7, %ymm5, %ymm6 vpslld $25, %ymm5, %ymm7 # rnd_0: 3 - 4 rorl $5, %edx xorl %r14d, %ecx xorl %r12d, %edx addl %ecx, %r15d rorl $6, %edx xorl %r8d, %eax addl %edx, %r15d movl %r8d, %ecx vpsrld $18, %ymm5, %ymm8 vpslld $14, %ymm5, %ymm9 # rnd_0: 5 - 6 andl %eax, %ebx rorl $9, %ecx xorl %r8d, %ecx xorl %r9d, %ebx rorl $11, %ecx addl %r15d, %r11d xorl %r8d, %ecx addl %ebx, %r15d vpor %ymm6, %ymm7, %ymm6 vpor %ymm8, %ymm9, %ymm8 # rnd_0: 7 - 7 rorl $2, %ecx movl %r11d, %edx addl %ecx, %r15d # rnd_1: 0 - 1 rorl $14, %edx movl %r8d, %ebx movl %r12d, %ecx addl 196(%rsp), %r14d xorl %r13d, %ecx vpsrld $3, %ymm5, %ymm9 vpxor %ymm6, %ymm8, %ymm6 # rnd_1: 2 - 3 xorl %r11d, %edx andl %r11d, %ecx rorl $5, %edx xorl %r13d, %ecx xorl %r11d, %edx addl %ecx, %r14d vpxor %ymm6, %ymm9, %ymm5 vpshufd $0xfa, %ymm1, %ymm6 # rnd_1: 4 - 5 rorl $6, %edx xorl %r15d, %ebx addl %edx, %r14d movl %r15d, %ecx andl %ebx, %eax rorl $9, %ecx xorl %r15d, %ecx xorl %r8d, %eax vpsrld $10, %ymm6, %ymm8 vpsrlq $19, %ymm6, %ymm7 # rnd_1: 6 - 7 rorl $11, %ecx addl %r14d, %r10d xorl %r15d, %ecx addl %eax, %r14d rorl $2, %ecx movl %r10d, %edx addl %ecx, %r14d # rnd_0: 0 - 0 rorl $14, %edx vpsrlq $0x11, %ymm6, %ymm6 vpaddd %ymm2, %ymm4, %ymm4 # rnd_0: 1 - 3 movl %r15d, %eax movl %r11d, %ecx addl 200(%rsp), %r13d xorl %r12d, %ecx xorl %r10d, %edx andl %r10d, %ecx rorl $5, %edx xorl %r12d, %ecx xorl %r10d, %edx addl %ecx, %r13d vpxor %ymm6, %ymm7, %ymm6 vpaddd %ymm5, %ymm4, %ymm4 # rnd_0: 4 - 4 rorl $6, %edx xorl %r14d, %eax addl %edx, %r13d movl %r14d, %ecx vpxor %ymm6, %ymm8, %ymm8 # rnd_0: 5 - 5 andl %eax, %ebx rorl $9, %ecx xorl %r14d, %ecx xorl %r15d, %ebx vpshufb %ymm11, %ymm8, %ymm8 # rnd_0: 6 - 6 rorl $11, %ecx addl %r13d, %r9d xorl %r14d, %ecx addl %ebx, %r13d vpaddd %ymm8, %ymm4, %ymm4 # rnd_0: 7 - 7 rorl $2, %ecx movl %r9d, %edx addl %ecx, %r13d # rnd_1: 0 - 0 rorl $14, %edx vpshufd $0x50, %ymm4, %ymm6 # rnd_1: 1 - 1 movl %r14d, %ebx movl %r10d, %ecx addl 204(%rsp), %r12d xorl %r11d, %ecx vpsrlq $0x11, %ymm6, %ymm8 vpsrlq $19, %ymm6, %ymm7 # rnd_1: 2 - 3 xorl %r9d, %edx andl %r9d, %ecx rorl $5, %edx xorl %r11d, %ecx xorl %r9d, %edx addl %ecx, %r12d vpsrld $10, %ymm6, %ymm9 vpxor %ymm8, %ymm7, %ymm8 # rnd_1: 4 - 5 rorl $6, %edx xorl %r13d, %ebx addl %edx, %r12d movl %r13d, %ecx andl %ebx, %eax rorl $9, %ecx xorl %r13d, %ecx xorl %r14d, %eax vpxor %ymm9, %ymm8, %ymm9 # rnd_1: 6 - 6 rorl $11, %ecx addl %r12d, %r8d xorl %r13d, %ecx addl %eax, %r12d vpshufb %ymm12, %ymm9, %ymm9 # rnd_1: 7 - 7 rorl $2, %ecx movl %r8d, %edx addl %ecx, %r12d vpaddd %ymm4, %ymm9, %ymm2 # msg_sched done: 48-51 # msg_sched: 56-59 # rnd_0: 0 - 0 rorl $14, %edx vpalignr $4, %ymm3, %ymm0, %ymm5 vpalignr $4, %ymm1, %ymm2, %ymm4 # rnd_0: 1 - 2 movl %r13d, %eax movl %r9d, %ecx addl 224(%rsp), %r11d xorl %r10d, %ecx xorl %r8d, %edx andl %r8d, %ecx vpsrld $7, %ymm5, %ymm6 vpslld $25, %ymm5, %ymm7 # rnd_0: 3 - 4 rorl $5, %edx xorl %r10d, %ecx xorl %r8d, %edx addl %ecx, %r11d rorl $6, %edx xorl %r12d, %eax addl %edx, %r11d movl %r12d, %ecx vpsrld $18, %ymm5, %ymm8 vpslld $14, %ymm5, %ymm9 # rnd_0: 5 - 6 andl %eax, %ebx rorl $9, %ecx xorl %r12d, %ecx xorl %r13d, %ebx rorl $11, %ecx addl %r11d, %r15d xorl %r12d, %ecx addl %ebx, %r11d vpor %ymm6, %ymm7, %ymm6 vpor %ymm8, %ymm9, %ymm8 # rnd_0: 7 - 7 rorl $2, %ecx movl %r15d, %edx addl %ecx, %r11d # rnd_1: 0 - 1 rorl $14, %edx movl %r12d, %ebx movl %r8d, %ecx addl 228(%rsp), %r10d xorl %r9d, %ecx vpsrld $3, %ymm5, %ymm9 vpxor %ymm6, %ymm8, %ymm6 # rnd_1: 2 - 3 xorl %r15d, %edx andl %r15d, %ecx rorl $5, %edx xorl %r9d, %ecx xorl %r15d, %edx addl %ecx, %r10d vpxor %ymm6, %ymm9, %ymm5 vpshufd $0xfa, %ymm2, %ymm6 # rnd_1: 4 - 5 rorl $6, %edx xorl %r11d, %ebx addl %edx, %r10d movl %r11d, %ecx andl %ebx, %eax rorl $9, %ecx xorl %r11d, %ecx xorl %r12d, %eax vpsrld $10, %ymm6, %ymm8 vpsrlq $19, %ymm6, %ymm7 # rnd_1: 6 - 7 rorl $11, %ecx addl %r10d, %r14d xorl %r11d, %ecx addl %eax, %r10d rorl $2, %ecx movl %r14d, %edx addl %ecx, %r10d # rnd_0: 0 - 0 rorl $14, %edx vpsrlq $0x11, %ymm6, %ymm6 vpaddd %ymm3, %ymm4, %ymm4 # rnd_0: 1 - 3 movl %r11d, %eax movl %r15d, %ecx addl 232(%rsp), %r9d xorl %r8d, %ecx xorl %r14d, %edx andl %r14d, %ecx rorl $5, %edx xorl %r8d, %ecx xorl %r14d, %edx addl %ecx, %r9d vpxor %ymm6, %ymm7, %ymm6 vpaddd %ymm5, %ymm4, %ymm4 # rnd_0: 4 - 4 rorl $6, %edx xorl %r10d, %eax addl %edx, %r9d movl %r10d, %ecx vpxor %ymm6, %ymm8, %ymm8 # rnd_0: 5 - 5 andl %eax, %ebx rorl $9, %ecx xorl %r10d, %ecx xorl %r11d, %ebx vpshufb %ymm11, %ymm8, %ymm8 # rnd_0: 6 - 6 rorl $11, %ecx addl %r9d, %r13d xorl %r10d, %ecx addl %ebx, %r9d vpaddd %ymm8, %ymm4, %ymm4 # rnd_0: 7 - 7 rorl $2, %ecx movl %r13d, %edx addl %ecx, %r9d # rnd_1: 0 - 0 rorl $14, %edx vpshufd $0x50, %ymm4, %ymm6 # rnd_1: 1 - 1 movl %r10d, %ebx movl %r14d, %ecx addl 236(%rsp), %r8d xorl %r15d, %ecx vpsrlq $0x11, %ymm6, %ymm8 vpsrlq $19, %ymm6, %ymm7 # rnd_1: 2 - 3 xorl %r13d, %edx andl %r13d, %ecx rorl $5, %edx xorl %r15d, %ecx xorl %r13d, %edx addl %ecx, %r8d vpsrld $10, %ymm6, %ymm9 vpxor %ymm8, %ymm7, %ymm8 # rnd_1: 4 - 5 rorl $6, %edx xorl %r9d, %ebx addl %edx, %r8d movl %r9d, %ecx andl %ebx, %eax rorl $9, %ecx xorl %r9d, %ecx xorl %r10d, %eax vpxor %ymm9, %ymm8, %ymm9 # rnd_1: 6 - 6 rorl $11, %ecx addl %r8d, %r12d xorl %r9d, %ecx addl %eax, %r8d vpshufb %ymm12, %ymm9, %ymm9 # rnd_1: 7 - 7 rorl $2, %ecx movl %r12d, %edx addl %ecx, %r8d vpaddd %ymm4, %ymm9, %ymm3 # msg_sched done: 56-59 # set_w_k_xfer_4: 8 vpaddd 256+L_avx2_sha256_k(%rip), %ymm0, %ymm4 vpaddd 288+L_avx2_sha256_k(%rip), %ymm1, %ymm5 vmovdqu %ymm4, 256(%rsp) vmovdqu %ymm5, 288(%rsp) vpaddd 320+L_avx2_sha256_k(%rip), %ymm2, %ymm4 vpaddd 352+L_avx2_sha256_k(%rip), %ymm3, %ymm5 vmovdqu %ymm4, 320(%rsp) vmovdqu %ymm5, 352(%rsp) # msg_sched: 64-67 # rnd_0: 0 - 0 rorl $14, %edx vpalignr $4, %ymm0, %ymm1, %ymm5 vpalignr $4, %ymm2, %ymm3, %ymm4 # rnd_0: 1 - 2 movl %r9d, %eax movl %r13d, %ecx addl 256(%rsp), %r15d xorl %r14d, %ecx xorl %r12d, %edx andl %r12d, %ecx vpsrld $7, %ymm5, %ymm6 vpslld $25, %ymm5, %ymm7 # rnd_0: 3 - 4 rorl $5, %edx xorl %r14d, %ecx xorl %r12d, %edx addl %ecx, %r15d rorl $6, %edx xorl %r8d, %eax addl %edx, %r15d movl %r8d, %ecx vpsrld $18, %ymm5, %ymm8 vpslld $14, %ymm5, %ymm9 # rnd_0: 5 - 6 andl %eax, %ebx rorl $9, %ecx xorl %r8d, %ecx xorl %r9d, %ebx rorl $11, %ecx addl %r15d, %r11d xorl %r8d, %ecx addl %ebx, %r15d vpor %ymm6, %ymm7, %ymm6 vpor %ymm8, %ymm9, %ymm8 # rnd_0: 7 - 7 rorl $2, %ecx movl %r11d, %edx addl %ecx, %r15d # rnd_1: 0 - 1 rorl $14, %edx movl %r8d, %ebx movl %r12d, %ecx addl 260(%rsp), %r14d xorl %r13d, %ecx vpsrld $3, %ymm5, %ymm9 vpxor %ymm6, %ymm8, %ymm6 # rnd_1: 2 - 3 xorl %r11d, %edx andl %r11d, %ecx rorl $5, %edx xorl %r13d, %ecx xorl %r11d, %edx addl %ecx, %r14d vpxor %ymm6, %ymm9, %ymm5 vpshufd $0xfa, %ymm3, %ymm6 # rnd_1: 4 - 5 rorl $6, %edx xorl %r15d, %ebx addl %edx, %r14d movl %r15d, %ecx andl %ebx, %eax rorl $9, %ecx xorl %r15d, %ecx xorl %r8d, %eax vpsrld $10, %ymm6, %ymm8 vpsrlq $19, %ymm6, %ymm7 # rnd_1: 6 - 7 rorl $11, %ecx addl %r14d, %r10d xorl %r15d, %ecx addl %eax, %r14d rorl $2, %ecx movl %r10d, %edx addl %ecx, %r14d # rnd_0: 0 - 0 rorl $14, %edx vpsrlq $0x11, %ymm6, %ymm6 vpaddd %ymm0, %ymm4, %ymm4 # rnd_0: 1 - 3 movl %r15d, %eax movl %r11d, %ecx addl 264(%rsp), %r13d xorl %r12d, %ecx xorl %r10d, %edx andl %r10d, %ecx rorl $5, %edx xorl %r12d, %ecx xorl %r10d, %edx addl %ecx, %r13d vpxor %ymm6, %ymm7, %ymm6 vpaddd %ymm5, %ymm4, %ymm4 # rnd_0: 4 - 4 rorl $6, %edx xorl %r14d, %eax addl %edx, %r13d movl %r14d, %ecx vpxor %ymm6, %ymm8, %ymm8 # rnd_0: 5 - 5 andl %eax, %ebx rorl $9, %ecx xorl %r14d, %ecx xorl %r15d, %ebx vpshufb %ymm11, %ymm8, %ymm8 # rnd_0: 6 - 6 rorl $11, %ecx addl %r13d, %r9d xorl %r14d, %ecx addl %ebx, %r13d vpaddd %ymm8, %ymm4, %ymm4 # rnd_0: 7 - 7 rorl $2, %ecx movl %r9d, %edx addl %ecx, %r13d # rnd_1: 0 - 0 rorl $14, %edx vpshufd $0x50, %ymm4, %ymm6 # rnd_1: 1 - 1 movl %r14d, %ebx movl %r10d, %ecx addl 268(%rsp), %r12d xorl %r11d, %ecx vpsrlq $0x11, %ymm6, %ymm8 vpsrlq $19, %ymm6, %ymm7 # rnd_1: 2 - 3 xorl %r9d, %edx andl %r9d, %ecx rorl $5, %edx xorl %r11d, %ecx xorl %r9d, %edx addl %ecx, %r12d vpsrld $10, %ymm6, %ymm9 vpxor %ymm8, %ymm7, %ymm8 # rnd_1: 4 - 5 rorl $6, %edx xorl %r13d, %ebx addl %edx, %r12d movl %r13d, %ecx andl %ebx, %eax rorl $9, %ecx xorl %r13d, %ecx xorl %r14d, %eax vpxor %ymm9, %ymm8, %ymm9 # rnd_1: 6 - 6 rorl $11, %ecx addl %r12d, %r8d xorl %r13d, %ecx addl %eax, %r12d vpshufb %ymm12, %ymm9, %ymm9 # rnd_1: 7 - 7 rorl $2, %ecx movl %r8d, %edx addl %ecx, %r12d vpaddd %ymm4, %ymm9, %ymm0 # msg_sched done: 64-67 # msg_sched: 72-75 # rnd_0: 0 - 0 rorl $14, %edx vpalignr $4, %ymm1, %ymm2, %ymm5 vpalignr $4, %ymm3, %ymm0, %ymm4 # rnd_0: 1 - 2 movl %r13d, %eax movl %r9d, %ecx addl 288(%rsp), %r11d xorl %r10d, %ecx xorl %r8d, %edx andl %r8d, %ecx vpsrld $7, %ymm5, %ymm6 vpslld $25, %ymm5, %ymm7 # rnd_0: 3 - 4 rorl $5, %edx xorl %r10d, %ecx xorl %r8d, %edx addl %ecx, %r11d rorl $6, %edx xorl %r12d, %eax addl %edx, %r11d movl %r12d, %ecx vpsrld $18, %ymm5, %ymm8 vpslld $14, %ymm5, %ymm9 # rnd_0: 5 - 6 andl %eax, %ebx rorl $9, %ecx xorl %r12d, %ecx xorl %r13d, %ebx rorl $11, %ecx addl %r11d, %r15d xorl %r12d, %ecx addl %ebx, %r11d vpor %ymm6, %ymm7, %ymm6 vpor %ymm8, %ymm9, %ymm8 # rnd_0: 7 - 7 rorl $2, %ecx movl %r15d, %edx addl %ecx, %r11d # rnd_1: 0 - 1 rorl $14, %edx movl %r12d, %ebx movl %r8d, %ecx addl 292(%rsp), %r10d xorl %r9d, %ecx vpsrld $3, %ymm5, %ymm9 vpxor %ymm6, %ymm8, %ymm6 # rnd_1: 2 - 3 xorl %r15d, %edx andl %r15d, %ecx rorl $5, %edx xorl %r9d, %ecx xorl %r15d, %edx addl %ecx, %r10d vpxor %ymm6, %ymm9, %ymm5 vpshufd $0xfa, %ymm0, %ymm6 # rnd_1: 4 - 5 rorl $6, %edx xorl %r11d, %ebx addl %edx, %r10d movl %r11d, %ecx andl %ebx, %eax rorl $9, %ecx xorl %r11d, %ecx xorl %r12d, %eax vpsrld $10, %ymm6, %ymm8 vpsrlq $19, %ymm6, %ymm7 # rnd_1: 6 - 7 rorl $11, %ecx addl %r10d, %r14d xorl %r11d, %ecx addl %eax, %r10d rorl $2, %ecx movl %r14d, %edx addl %ecx, %r10d # rnd_0: 0 - 0 rorl $14, %edx vpsrlq $0x11, %ymm6, %ymm6 vpaddd %ymm1, %ymm4, %ymm4 # rnd_0: 1 - 3 movl %r11d, %eax movl %r15d, %ecx addl 296(%rsp), %r9d xorl %r8d, %ecx xorl %r14d, %edx andl %r14d, %ecx rorl $5, %edx xorl %r8d, %ecx xorl %r14d, %edx addl %ecx, %r9d vpxor %ymm6, %ymm7, %ymm6 vpaddd %ymm5, %ymm4, %ymm4 # rnd_0: 4 - 4 rorl $6, %edx xorl %r10d, %eax addl %edx, %r9d movl %r10d, %ecx vpxor %ymm6, %ymm8, %ymm8 # rnd_0: 5 - 5 andl %eax, %ebx rorl $9, %ecx xorl %r10d, %ecx xorl %r11d, %ebx vpshufb %ymm11, %ymm8, %ymm8 # rnd_0: 6 - 6 rorl $11, %ecx addl %r9d, %r13d xorl %r10d, %ecx addl %ebx, %r9d vpaddd %ymm8, %ymm4, %ymm4 # rnd_0: 7 - 7 rorl $2, %ecx movl %r13d, %edx addl %ecx, %r9d # rnd_1: 0 - 0 rorl $14, %edx vpshufd $0x50, %ymm4, %ymm6 # rnd_1: 1 - 1 movl %r10d, %ebx movl %r14d, %ecx addl 300(%rsp), %r8d xorl %r15d, %ecx vpsrlq $0x11, %ymm6, %ymm8 vpsrlq $19, %ymm6, %ymm7 # rnd_1: 2 - 3 xorl %r13d, %edx andl %r13d, %ecx rorl $5, %edx xorl %r15d, %ecx xorl %r13d, %edx addl %ecx, %r8d vpsrld $10, %ymm6, %ymm9 vpxor %ymm8, %ymm7, %ymm8 # rnd_1: 4 - 5 rorl $6, %edx xorl %r9d, %ebx addl %edx, %r8d movl %r9d, %ecx andl %ebx, %eax rorl $9, %ecx xorl %r9d, %ecx xorl %r10d, %eax vpxor %ymm9, %ymm8, %ymm9 # rnd_1: 6 - 6 rorl $11, %ecx addl %r8d, %r12d xorl %r9d, %ecx addl %eax, %r8d vpshufb %ymm12, %ymm9, %ymm9 # rnd_1: 7 - 7 rorl $2, %ecx movl %r12d, %edx addl %ecx, %r8d vpaddd %ymm4, %ymm9, %ymm1 # msg_sched done: 72-75 # msg_sched: 80-83 # rnd_0: 0 - 0 rorl $14, %edx vpalignr $4, %ymm2, %ymm3, %ymm5 vpalignr $4, %ymm0, %ymm1, %ymm4 # rnd_0: 1 - 2 movl %r9d, %eax movl %r13d, %ecx addl 320(%rsp), %r15d xorl %r14d, %ecx xorl %r12d, %edx andl %r12d, %ecx vpsrld $7, %ymm5, %ymm6 vpslld $25, %ymm5, %ymm7 # rnd_0: 3 - 4 rorl $5, %edx xorl %r14d, %ecx xorl %r12d, %edx addl %ecx, %r15d rorl $6, %edx xorl %r8d, %eax addl %edx, %r15d movl %r8d, %ecx vpsrld $18, %ymm5, %ymm8 vpslld $14, %ymm5, %ymm9 # rnd_0: 5 - 6 andl %eax, %ebx rorl $9, %ecx xorl %r8d, %ecx xorl %r9d, %ebx rorl $11, %ecx addl %r15d, %r11d xorl %r8d, %ecx addl %ebx, %r15d vpor %ymm6, %ymm7, %ymm6 vpor %ymm8, %ymm9, %ymm8 # rnd_0: 7 - 7 rorl $2, %ecx movl %r11d, %edx addl %ecx, %r15d # rnd_1: 0 - 1 rorl $14, %edx movl %r8d, %ebx movl %r12d, %ecx addl 324(%rsp), %r14d xorl %r13d, %ecx vpsrld $3, %ymm5, %ymm9 vpxor %ymm6, %ymm8, %ymm6 # rnd_1: 2 - 3 xorl %r11d, %edx andl %r11d, %ecx rorl $5, %edx xorl %r13d, %ecx xorl %r11d, %edx addl %ecx, %r14d vpxor %ymm6, %ymm9, %ymm5 vpshufd $0xfa, %ymm1, %ymm6 # rnd_1: 4 - 5 rorl $6, %edx xorl %r15d, %ebx addl %edx, %r14d movl %r15d, %ecx andl %ebx, %eax rorl $9, %ecx xorl %r15d, %ecx xorl %r8d, %eax vpsrld $10, %ymm6, %ymm8 vpsrlq $19, %ymm6, %ymm7 # rnd_1: 6 - 7 rorl $11, %ecx addl %r14d, %r10d xorl %r15d, %ecx addl %eax, %r14d rorl $2, %ecx movl %r10d, %edx addl %ecx, %r14d # rnd_0: 0 - 0 rorl $14, %edx vpsrlq $0x11, %ymm6, %ymm6 vpaddd %ymm2, %ymm4, %ymm4 # rnd_0: 1 - 3 movl %r15d, %eax movl %r11d, %ecx addl 328(%rsp), %r13d xorl %r12d, %ecx xorl %r10d, %edx andl %r10d, %ecx rorl $5, %edx xorl %r12d, %ecx xorl %r10d, %edx addl %ecx, %r13d vpxor %ymm6, %ymm7, %ymm6 vpaddd %ymm5, %ymm4, %ymm4 # rnd_0: 4 - 4 rorl $6, %edx xorl %r14d, %eax addl %edx, %r13d movl %r14d, %ecx vpxor %ymm6, %ymm8, %ymm8 # rnd_0: 5 - 5 andl %eax, %ebx rorl $9, %ecx xorl %r14d, %ecx xorl %r15d, %ebx vpshufb %ymm11, %ymm8, %ymm8 # rnd_0: 6 - 6 rorl $11, %ecx addl %r13d, %r9d xorl %r14d, %ecx addl %ebx, %r13d vpaddd %ymm8, %ymm4, %ymm4 # rnd_0: 7 - 7 rorl $2, %ecx movl %r9d, %edx addl %ecx, %r13d # rnd_1: 0 - 0 rorl $14, %edx vpshufd $0x50, %ymm4, %ymm6 # rnd_1: 1 - 1 movl %r14d, %ebx movl %r10d, %ecx addl 332(%rsp), %r12d xorl %r11d, %ecx vpsrlq $0x11, %ymm6, %ymm8 vpsrlq $19, %ymm6, %ymm7 # rnd_1: 2 - 3 xorl %r9d, %edx andl %r9d, %ecx rorl $5, %edx xorl %r11d, %ecx xorl %r9d, %edx addl %ecx, %r12d vpsrld $10, %ymm6, %ymm9 vpxor %ymm8, %ymm7, %ymm8 # rnd_1: 4 - 5 rorl $6, %edx xorl %r13d, %ebx addl %edx, %r12d movl %r13d, %ecx andl %ebx, %eax rorl $9, %ecx xorl %r13d, %ecx xorl %r14d, %eax vpxor %ymm9, %ymm8, %ymm9 # rnd_1: 6 - 6 rorl $11, %ecx addl %r12d, %r8d xorl %r13d, %ecx addl %eax, %r12d vpshufb %ymm12, %ymm9, %ymm9 # rnd_1: 7 - 7 rorl $2, %ecx movl %r8d, %edx addl %ecx, %r12d vpaddd %ymm4, %ymm9, %ymm2 # msg_sched done: 80-83 # msg_sched: 88-91 # rnd_0: 0 - 0 rorl $14, %edx vpalignr $4, %ymm3, %ymm0, %ymm5 vpalignr $4, %ymm1, %ymm2, %ymm4 # rnd_0: 1 - 2 movl %r13d, %eax movl %r9d, %ecx addl 352(%rsp), %r11d xorl %r10d, %ecx xorl %r8d, %edx andl %r8d, %ecx vpsrld $7, %ymm5, %ymm6 vpslld $25, %ymm5, %ymm7 # rnd_0: 3 - 4 rorl $5, %edx xorl %r10d, %ecx xorl %r8d, %edx addl %ecx, %r11d rorl $6, %edx xorl %r12d, %eax addl %edx, %r11d movl %r12d, %ecx vpsrld $18, %ymm5, %ymm8 vpslld $14, %ymm5, %ymm9 # rnd_0: 5 - 6 andl %eax, %ebx rorl $9, %ecx xorl %r12d, %ecx xorl %r13d, %ebx rorl $11, %ecx addl %r11d, %r15d xorl %r12d, %ecx addl %ebx, %r11d vpor %ymm6, %ymm7, %ymm6 vpor %ymm8, %ymm9, %ymm8 # rnd_0: 7 - 7 rorl $2, %ecx movl %r15d, %edx addl %ecx, %r11d # rnd_1: 0 - 1 rorl $14, %edx movl %r12d, %ebx movl %r8d, %ecx addl 356(%rsp), %r10d xorl %r9d, %ecx vpsrld $3, %ymm5, %ymm9 vpxor %ymm6, %ymm8, %ymm6 # rnd_1: 2 - 3 xorl %r15d, %edx andl %r15d, %ecx rorl $5, %edx xorl %r9d, %ecx xorl %r15d, %edx addl %ecx, %r10d vpxor %ymm6, %ymm9, %ymm5 vpshufd $0xfa, %ymm2, %ymm6 # rnd_1: 4 - 5 rorl $6, %edx xorl %r11d, %ebx addl %edx, %r10d movl %r11d, %ecx andl %ebx, %eax rorl $9, %ecx xorl %r11d, %ecx xorl %r12d, %eax vpsrld $10, %ymm6, %ymm8 vpsrlq $19, %ymm6, %ymm7 # rnd_1: 6 - 7 rorl $11, %ecx addl %r10d, %r14d xorl %r11d, %ecx addl %eax, %r10d rorl $2, %ecx movl %r14d, %edx addl %ecx, %r10d # rnd_0: 0 - 0 rorl $14, %edx vpsrlq $0x11, %ymm6, %ymm6 vpaddd %ymm3, %ymm4, %ymm4 # rnd_0: 1 - 3 movl %r11d, %eax movl %r15d, %ecx addl 360(%rsp), %r9d xorl %r8d, %ecx xorl %r14d, %edx andl %r14d, %ecx rorl $5, %edx xorl %r8d, %ecx xorl %r14d, %edx addl %ecx, %r9d vpxor %ymm6, %ymm7, %ymm6 vpaddd %ymm5, %ymm4, %ymm4 # rnd_0: 4 - 4 rorl $6, %edx xorl %r10d, %eax addl %edx, %r9d movl %r10d, %ecx vpxor %ymm6, %ymm8, %ymm8 # rnd_0: 5 - 5 andl %eax, %ebx rorl $9, %ecx xorl %r10d, %ecx xorl %r11d, %ebx vpshufb %ymm11, %ymm8, %ymm8 # rnd_0: 6 - 6 rorl $11, %ecx addl %r9d, %r13d xorl %r10d, %ecx addl %ebx, %r9d vpaddd %ymm8, %ymm4, %ymm4 # rnd_0: 7 - 7 rorl $2, %ecx movl %r13d, %edx addl %ecx, %r9d # rnd_1: 0 - 0 rorl $14, %edx vpshufd $0x50, %ymm4, %ymm6 # rnd_1: 1 - 1 movl %r10d, %ebx movl %r14d, %ecx addl 364(%rsp), %r8d xorl %r15d, %ecx vpsrlq $0x11, %ymm6, %ymm8 vpsrlq $19, %ymm6, %ymm7 # rnd_1: 2 - 3 xorl %r13d, %edx andl %r13d, %ecx rorl $5, %edx xorl %r15d, %ecx xorl %r13d, %edx addl %ecx, %r8d vpsrld $10, %ymm6, %ymm9 vpxor %ymm8, %ymm7, %ymm8 # rnd_1: 4 - 5 rorl $6, %edx xorl %r9d, %ebx addl %edx, %r8d movl %r9d, %ecx andl %ebx, %eax rorl $9, %ecx xorl %r9d, %ecx xorl %r10d, %eax vpxor %ymm9, %ymm8, %ymm9 # rnd_1: 6 - 6 rorl $11, %ecx addl %r8d, %r12d xorl %r9d, %ecx addl %eax, %r8d vpshufb %ymm12, %ymm9, %ymm9 # rnd_1: 7 - 7 rorl $2, %ecx movl %r12d, %edx addl %ecx, %r8d vpaddd %ymm4, %ymm9, %ymm3 # msg_sched done: 88-91 # set_w_k_xfer_4: 12 vpaddd 384+L_avx2_sha256_k(%rip), %ymm0, %ymm4 vpaddd 416+L_avx2_sha256_k(%rip), %ymm1, %ymm5 vmovdqu %ymm4, 384(%rsp) vmovdqu %ymm5, 416(%rsp) vpaddd 448+L_avx2_sha256_k(%rip), %ymm2, %ymm4 vpaddd 480+L_avx2_sha256_k(%rip), %ymm3, %ymm5 vmovdqu %ymm4, 448(%rsp) vmovdqu %ymm5, 480(%rsp) # rnd_all_4: 24-27 addl 384(%rsp), %r15d movl %r13d, %ecx movl %r9d, %eax xorl %r14d, %ecx rorl $14, %edx andl %r12d, %ecx xorl %r12d, %edx xorl %r14d, %ecx rorl $5, %edx addl %ecx, %r15d xorl %r12d, %edx xorl %r8d, %eax rorl $6, %edx movl %r8d, %ecx addl %edx, %r15d rorl $9, %ecx andl %eax, %ebx xorl %r8d, %ecx xorl %r9d, %ebx rorl $11, %ecx addl %r15d, %r11d xorl %r8d, %ecx addl %ebx, %r15d rorl $2, %ecx movl %r11d, %edx addl %ecx, %r15d addl 388(%rsp), %r14d movl %r12d, %ecx movl %r8d, %ebx xorl %r13d, %ecx rorl $14, %edx andl %r11d, %ecx xorl %r11d, %edx xorl %r13d, %ecx rorl $5, %edx addl %ecx, %r14d xorl %r11d, %edx xorl %r15d, %ebx rorl $6, %edx movl %r15d, %ecx addl %edx, %r14d rorl $9, %ecx andl %ebx, %eax xorl %r15d, %ecx xorl %r8d, %eax rorl $11, %ecx addl %r14d, %r10d xorl %r15d, %ecx addl %eax, %r14d rorl $2, %ecx movl %r10d, %edx addl %ecx, %r14d addl 392(%rsp), %r13d movl %r11d, %ecx movl %r15d, %eax xorl %r12d, %ecx rorl $14, %edx andl %r10d, %ecx xorl %r10d, %edx xorl %r12d, %ecx rorl $5, %edx addl %ecx, %r13d xorl %r10d, %edx xorl %r14d, %eax rorl $6, %edx movl %r14d, %ecx addl %edx, %r13d rorl $9, %ecx andl %eax, %ebx xorl %r14d, %ecx xorl %r15d, %ebx rorl $11, %ecx addl %r13d, %r9d xorl %r14d, %ecx addl %ebx, %r13d rorl $2, %ecx movl %r9d, %edx addl %ecx, %r13d addl 396(%rsp), %r12d movl %r10d, %ecx movl %r14d, %ebx xorl %r11d, %ecx rorl $14, %edx andl %r9d, %ecx xorl %r9d, %edx xorl %r11d, %ecx rorl $5, %edx addl %ecx, %r12d xorl %r9d, %edx xorl %r13d, %ebx rorl $6, %edx movl %r13d, %ecx addl %edx, %r12d rorl $9, %ecx andl %ebx, %eax xorl %r13d, %ecx xorl %r14d, %eax rorl $11, %ecx addl %r12d, %r8d xorl %r13d, %ecx addl %eax, %r12d rorl $2, %ecx movl %r8d, %edx addl %ecx, %r12d # rnd_all_4: 26-29 addl 416(%rsp), %r11d movl %r9d, %ecx movl %r13d, %eax xorl %r10d, %ecx rorl $14, %edx andl %r8d, %ecx xorl %r8d, %edx xorl %r10d, %ecx rorl $5, %edx addl %ecx, %r11d xorl %r8d, %edx xorl %r12d, %eax rorl $6, %edx movl %r12d, %ecx addl %edx, %r11d rorl $9, %ecx andl %eax, %ebx xorl %r12d, %ecx xorl %r13d, %ebx rorl $11, %ecx addl %r11d, %r15d xorl %r12d, %ecx addl %ebx, %r11d rorl $2, %ecx movl %r15d, %edx addl %ecx, %r11d addl 420(%rsp), %r10d movl %r8d, %ecx movl %r12d, %ebx xorl %r9d, %ecx rorl $14, %edx andl %r15d, %ecx xorl %r15d, %edx xorl %r9d, %ecx rorl $5, %edx addl %ecx, %r10d xorl %r15d, %edx xorl %r11d, %ebx rorl $6, %edx movl %r11d, %ecx addl %edx, %r10d rorl $9, %ecx andl %ebx, %eax xorl %r11d, %ecx xorl %r12d, %eax rorl $11, %ecx addl %r10d, %r14d xorl %r11d, %ecx addl %eax, %r10d rorl $2, %ecx movl %r14d, %edx addl %ecx, %r10d addl 424(%rsp), %r9d movl %r15d, %ecx movl %r11d, %eax xorl %r8d, %ecx rorl $14, %edx andl %r14d, %ecx xorl %r14d, %edx xorl %r8d, %ecx rorl $5, %edx addl %ecx, %r9d xorl %r14d, %edx xorl %r10d, %eax rorl $6, %edx movl %r10d, %ecx addl %edx, %r9d rorl $9, %ecx andl %eax, %ebx xorl %r10d, %ecx xorl %r11d, %ebx rorl $11, %ecx addl %r9d, %r13d xorl %r10d, %ecx addl %ebx, %r9d rorl $2, %ecx movl %r13d, %edx addl %ecx, %r9d addl 428(%rsp), %r8d movl %r14d, %ecx movl %r10d, %ebx xorl %r15d, %ecx rorl $14, %edx andl %r13d, %ecx xorl %r13d, %edx xorl %r15d, %ecx rorl $5, %edx addl %ecx, %r8d xorl %r13d, %edx xorl %r9d, %ebx rorl $6, %edx movl %r9d, %ecx addl %edx, %r8d rorl $9, %ecx andl %ebx, %eax xorl %r9d, %ecx xorl %r10d, %eax rorl $11, %ecx addl %r8d, %r12d xorl %r9d, %ecx addl %eax, %r8d rorl $2, %ecx movl %r12d, %edx addl %ecx, %r8d # rnd_all_4: 28-31 addl 448(%rsp), %r15d movl %r13d, %ecx movl %r9d, %eax xorl %r14d, %ecx rorl $14, %edx andl %r12d, %ecx xorl %r12d, %edx xorl %r14d, %ecx rorl $5, %edx addl %ecx, %r15d xorl %r12d, %edx xorl %r8d, %eax rorl $6, %edx movl %r8d, %ecx addl %edx, %r15d rorl $9, %ecx andl %eax, %ebx xorl %r8d, %ecx xorl %r9d, %ebx rorl $11, %ecx addl %r15d, %r11d xorl %r8d, %ecx addl %ebx, %r15d rorl $2, %ecx movl %r11d, %edx addl %ecx, %r15d addl 452(%rsp), %r14d movl %r12d, %ecx movl %r8d, %ebx xorl %r13d, %ecx rorl $14, %edx andl %r11d, %ecx xorl %r11d, %edx xorl %r13d, %ecx rorl $5, %edx addl %ecx, %r14d xorl %r11d, %edx xorl %r15d, %ebx rorl $6, %edx movl %r15d, %ecx addl %edx, %r14d rorl $9, %ecx andl %ebx, %eax xorl %r15d, %ecx xorl %r8d, %eax rorl $11, %ecx addl %r14d, %r10d xorl %r15d, %ecx addl %eax, %r14d rorl $2, %ecx movl %r10d, %edx addl %ecx, %r14d addl 456(%rsp), %r13d movl %r11d, %ecx movl %r15d, %eax xorl %r12d, %ecx rorl $14, %edx andl %r10d, %ecx xorl %r10d, %edx xorl %r12d, %ecx rorl $5, %edx addl %ecx, %r13d xorl %r10d, %edx xorl %r14d, %eax rorl $6, %edx movl %r14d, %ecx addl %edx, %r13d rorl $9, %ecx andl %eax, %ebx xorl %r14d, %ecx xorl %r15d, %ebx rorl $11, %ecx addl %r13d, %r9d xorl %r14d, %ecx addl %ebx, %r13d rorl $2, %ecx movl %r9d, %edx addl %ecx, %r13d addl 460(%rsp), %r12d movl %r10d, %ecx movl %r14d, %ebx xorl %r11d, %ecx rorl $14, %edx andl %r9d, %ecx xorl %r9d, %edx xorl %r11d, %ecx rorl $5, %edx addl %ecx, %r12d xorl %r9d, %edx xorl %r13d, %ebx rorl $6, %edx movl %r13d, %ecx addl %edx, %r12d rorl $9, %ecx andl %ebx, %eax xorl %r13d, %ecx xorl %r14d, %eax rorl $11, %ecx addl %r12d, %r8d xorl %r13d, %ecx addl %eax, %r12d rorl $2, %ecx movl %r8d, %edx addl %ecx, %r12d # rnd_all_4: 30-33 addl 480(%rsp), %r11d movl %r9d, %ecx movl %r13d, %eax xorl %r10d, %ecx rorl $14, %edx andl %r8d, %ecx xorl %r8d, %edx xorl %r10d, %ecx rorl $5, %edx addl %ecx, %r11d xorl %r8d, %edx xorl %r12d, %eax rorl $6, %edx movl %r12d, %ecx addl %edx, %r11d rorl $9, %ecx andl %eax, %ebx xorl %r12d, %ecx xorl %r13d, %ebx rorl $11, %ecx addl %r11d, %r15d xorl %r12d, %ecx addl %ebx, %r11d rorl $2, %ecx movl %r15d, %edx addl %ecx, %r11d addl 484(%rsp), %r10d movl %r8d, %ecx movl %r12d, %ebx xorl %r9d, %ecx rorl $14, %edx andl %r15d, %ecx xorl %r15d, %edx xorl %r9d, %ecx rorl $5, %edx addl %ecx, %r10d xorl %r15d, %edx xorl %r11d, %ebx rorl $6, %edx movl %r11d, %ecx addl %edx, %r10d rorl $9, %ecx andl %ebx, %eax xorl %r11d, %ecx xorl %r12d, %eax rorl $11, %ecx addl %r10d, %r14d xorl %r11d, %ecx addl %eax, %r10d rorl $2, %ecx movl %r14d, %edx addl %ecx, %r10d addl 488(%rsp), %r9d movl %r15d, %ecx movl %r11d, %eax xorl %r8d, %ecx rorl $14, %edx andl %r14d, %ecx xorl %r14d, %edx xorl %r8d, %ecx rorl $5, %edx addl %ecx, %r9d xorl %r14d, %edx xorl %r10d, %eax rorl $6, %edx movl %r10d, %ecx addl %edx, %r9d rorl $9, %ecx andl %eax, %ebx xorl %r10d, %ecx xorl %r11d, %ebx rorl $11, %ecx addl %r9d, %r13d xorl %r10d, %ecx addl %ebx, %r9d rorl $2, %ecx movl %r13d, %edx addl %ecx, %r9d addl 492(%rsp), %r8d movl %r14d, %ecx movl %r10d, %ebx xorl %r15d, %ecx rorl $14, %edx andl %r13d, %ecx xorl %r13d, %edx xorl %r15d, %ecx rorl $5, %edx addl %ecx, %r8d xorl %r13d, %edx xorl %r9d, %ebx rorl $6, %edx movl %r9d, %ecx addl %edx, %r8d rorl $9, %ecx andl %ebx, %eax xorl %r9d, %ecx xorl %r10d, %eax rorl $11, %ecx addl %r8d, %r12d xorl %r9d, %ecx addl %eax, %r8d rorl $2, %ecx movl %r12d, %edx addl %ecx, %r8d addl (%rdi), %r8d addl 4(%rdi), %r9d addl 8(%rdi), %r10d addl 12(%rdi), %r11d addl 16(%rdi), %r12d addl 20(%rdi), %r13d addl 24(%rdi), %r14d addl 28(%rdi), %r15d movl %r8d, (%rdi) movl %r9d, 4(%rdi) movl %r10d, 8(%rdi) movl %r11d, 12(%rdi) movl %r12d, 16(%rdi) movl %r13d, 20(%rdi) movl %r14d, 24(%rdi) movl %r15d, 28(%rdi) movl %r9d, %ebx movl %r12d, %edx xorl %r10d, %ebx # rnd_all_4: 1-4 addl 16(%rsp), %r15d movl %r13d, %ecx movl %r9d, %eax xorl %r14d, %ecx rorl $14, %edx andl %r12d, %ecx xorl %r12d, %edx xorl %r14d, %ecx rorl $5, %edx addl %ecx, %r15d xorl %r12d, %edx xorl %r8d, %eax rorl $6, %edx movl %r8d, %ecx addl %edx, %r15d rorl $9, %ecx andl %eax, %ebx xorl %r8d, %ecx xorl %r9d, %ebx rorl $11, %ecx addl %r15d, %r11d xorl %r8d, %ecx addl %ebx, %r15d rorl $2, %ecx movl %r11d, %edx addl %ecx, %r15d addl 20(%rsp), %r14d movl %r12d, %ecx movl %r8d, %ebx xorl %r13d, %ecx rorl $14, %edx andl %r11d, %ecx xorl %r11d, %edx xorl %r13d, %ecx rorl $5, %edx addl %ecx, %r14d xorl %r11d, %edx xorl %r15d, %ebx rorl $6, %edx movl %r15d, %ecx addl %edx, %r14d rorl $9, %ecx andl %ebx, %eax xorl %r15d, %ecx xorl %r8d, %eax rorl $11, %ecx addl %r14d, %r10d xorl %r15d, %ecx addl %eax, %r14d rorl $2, %ecx movl %r10d, %edx addl %ecx, %r14d addl 24(%rsp), %r13d movl %r11d, %ecx movl %r15d, %eax xorl %r12d, %ecx rorl $14, %edx andl %r10d, %ecx xorl %r10d, %edx xorl %r12d, %ecx rorl $5, %edx addl %ecx, %r13d xorl %r10d, %edx xorl %r14d, %eax rorl $6, %edx movl %r14d, %ecx addl %edx, %r13d rorl $9, %ecx andl %eax, %ebx xorl %r14d, %ecx xorl %r15d, %ebx rorl $11, %ecx addl %r13d, %r9d xorl %r14d, %ecx addl %ebx, %r13d rorl $2, %ecx movl %r9d, %edx addl %ecx, %r13d addl 28(%rsp), %r12d movl %r10d, %ecx movl %r14d, %ebx xorl %r11d, %ecx rorl $14, %edx andl %r9d, %ecx xorl %r9d, %edx xorl %r11d, %ecx rorl $5, %edx addl %ecx, %r12d xorl %r9d, %edx xorl %r13d, %ebx rorl $6, %edx movl %r13d, %ecx addl %edx, %r12d rorl $9, %ecx andl %ebx, %eax xorl %r13d, %ecx xorl %r14d, %eax rorl $11, %ecx addl %r12d, %r8d xorl %r13d, %ecx addl %eax, %r12d rorl $2, %ecx movl %r8d, %edx addl %ecx, %r12d # rnd_all_4: 3-6 addl 48(%rsp), %r11d movl %r9d, %ecx movl %r13d, %eax xorl %r10d, %ecx rorl $14, %edx andl %r8d, %ecx xorl %r8d, %edx xorl %r10d, %ecx rorl $5, %edx addl %ecx, %r11d xorl %r8d, %edx xorl %r12d, %eax rorl $6, %edx movl %r12d, %ecx addl %edx, %r11d rorl $9, %ecx andl %eax, %ebx xorl %r12d, %ecx xorl %r13d, %ebx rorl $11, %ecx addl %r11d, %r15d xorl %r12d, %ecx addl %ebx, %r11d rorl $2, %ecx movl %r15d, %edx addl %ecx, %r11d addl 52(%rsp), %r10d movl %r8d, %ecx movl %r12d, %ebx xorl %r9d, %ecx rorl $14, %edx andl %r15d, %ecx xorl %r15d, %edx xorl %r9d, %ecx rorl $5, %edx addl %ecx, %r10d xorl %r15d, %edx xorl %r11d, %ebx rorl $6, %edx movl %r11d, %ecx addl %edx, %r10d rorl $9, %ecx andl %ebx, %eax xorl %r11d, %ecx xorl %r12d, %eax rorl $11, %ecx addl %r10d, %r14d xorl %r11d, %ecx addl %eax, %r10d rorl $2, %ecx movl %r14d, %edx addl %ecx, %r10d addl 56(%rsp), %r9d movl %r15d, %ecx movl %r11d, %eax xorl %r8d, %ecx rorl $14, %edx andl %r14d, %ecx xorl %r14d, %edx xorl %r8d, %ecx rorl $5, %edx addl %ecx, %r9d xorl %r14d, %edx xorl %r10d, %eax rorl $6, %edx movl %r10d, %ecx addl %edx, %r9d rorl $9, %ecx andl %eax, %ebx xorl %r10d, %ecx xorl %r11d, %ebx rorl $11, %ecx addl %r9d, %r13d xorl %r10d, %ecx addl %ebx, %r9d rorl $2, %ecx movl %r13d, %edx addl %ecx, %r9d addl 60(%rsp), %r8d movl %r14d, %ecx movl %r10d, %ebx xorl %r15d, %ecx rorl $14, %edx andl %r13d, %ecx xorl %r13d, %edx xorl %r15d, %ecx rorl $5, %edx addl %ecx, %r8d xorl %r13d, %edx xorl %r9d, %ebx rorl $6, %edx movl %r9d, %ecx addl %edx, %r8d rorl $9, %ecx andl %ebx, %eax xorl %r9d, %ecx xorl %r10d, %eax rorl $11, %ecx addl %r8d, %r12d xorl %r9d, %ecx addl %eax, %r8d rorl $2, %ecx movl %r12d, %edx addl %ecx, %r8d # rnd_all_4: 5-8 addl 80(%rsp), %r15d movl %r13d, %ecx movl %r9d, %eax xorl %r14d, %ecx rorl $14, %edx andl %r12d, %ecx xorl %r12d, %edx xorl %r14d, %ecx rorl $5, %edx addl %ecx, %r15d xorl %r12d, %edx xorl %r8d, %eax rorl $6, %edx movl %r8d, %ecx addl %edx, %r15d rorl $9, %ecx andl %eax, %ebx xorl %r8d, %ecx xorl %r9d, %ebx rorl $11, %ecx addl %r15d, %r11d xorl %r8d, %ecx addl %ebx, %r15d rorl $2, %ecx movl %r11d, %edx addl %ecx, %r15d addl 84(%rsp), %r14d movl %r12d, %ecx movl %r8d, %ebx xorl %r13d, %ecx rorl $14, %edx andl %r11d, %ecx xorl %r11d, %edx xorl %r13d, %ecx rorl $5, %edx addl %ecx, %r14d xorl %r11d, %edx xorl %r15d, %ebx rorl $6, %edx movl %r15d, %ecx addl %edx, %r14d rorl $9, %ecx andl %ebx, %eax xorl %r15d, %ecx xorl %r8d, %eax rorl $11, %ecx addl %r14d, %r10d xorl %r15d, %ecx addl %eax, %r14d rorl $2, %ecx movl %r10d, %edx addl %ecx, %r14d addl 88(%rsp), %r13d movl %r11d, %ecx movl %r15d, %eax xorl %r12d, %ecx rorl $14, %edx andl %r10d, %ecx xorl %r10d, %edx xorl %r12d, %ecx rorl $5, %edx addl %ecx, %r13d xorl %r10d, %edx xorl %r14d, %eax rorl $6, %edx movl %r14d, %ecx addl %edx, %r13d rorl $9, %ecx andl %eax, %ebx xorl %r14d, %ecx xorl %r15d, %ebx rorl $11, %ecx addl %r13d, %r9d xorl %r14d, %ecx addl %ebx, %r13d rorl $2, %ecx movl %r9d, %edx addl %ecx, %r13d addl 92(%rsp), %r12d movl %r10d, %ecx movl %r14d, %ebx xorl %r11d, %ecx rorl $14, %edx andl %r9d, %ecx xorl %r9d, %edx xorl %r11d, %ecx rorl $5, %edx addl %ecx, %r12d xorl %r9d, %edx xorl %r13d, %ebx rorl $6, %edx movl %r13d, %ecx addl %edx, %r12d rorl $9, %ecx andl %ebx, %eax xorl %r13d, %ecx xorl %r14d, %eax rorl $11, %ecx addl %r12d, %r8d xorl %r13d, %ecx addl %eax, %r12d rorl $2, %ecx movl %r8d, %edx addl %ecx, %r12d # rnd_all_4: 7-10 addl 112(%rsp), %r11d movl %r9d, %ecx movl %r13d, %eax xorl %r10d, %ecx rorl $14, %edx andl %r8d, %ecx xorl %r8d, %edx xorl %r10d, %ecx rorl $5, %edx addl %ecx, %r11d xorl %r8d, %edx xorl %r12d, %eax rorl $6, %edx movl %r12d, %ecx addl %edx, %r11d rorl $9, %ecx andl %eax, %ebx xorl %r12d, %ecx xorl %r13d, %ebx rorl $11, %ecx addl %r11d, %r15d xorl %r12d, %ecx addl %ebx, %r11d rorl $2, %ecx movl %r15d, %edx addl %ecx, %r11d addl 116(%rsp), %r10d movl %r8d, %ecx movl %r12d, %ebx xorl %r9d, %ecx rorl $14, %edx andl %r15d, %ecx xorl %r15d, %edx xorl %r9d, %ecx rorl $5, %edx addl %ecx, %r10d xorl %r15d, %edx xorl %r11d, %ebx rorl $6, %edx movl %r11d, %ecx addl %edx, %r10d rorl $9, %ecx andl %ebx, %eax xorl %r11d, %ecx xorl %r12d, %eax rorl $11, %ecx addl %r10d, %r14d xorl %r11d, %ecx addl %eax, %r10d rorl $2, %ecx movl %r14d, %edx addl %ecx, %r10d addl 120(%rsp), %r9d movl %r15d, %ecx movl %r11d, %eax xorl %r8d, %ecx rorl $14, %edx andl %r14d, %ecx xorl %r14d, %edx xorl %r8d, %ecx rorl $5, %edx addl %ecx, %r9d xorl %r14d, %edx xorl %r10d, %eax rorl $6, %edx movl %r10d, %ecx addl %edx, %r9d rorl $9, %ecx andl %eax, %ebx xorl %r10d, %ecx xorl %r11d, %ebx rorl $11, %ecx addl %r9d, %r13d xorl %r10d, %ecx addl %ebx, %r9d rorl $2, %ecx movl %r13d, %edx addl %ecx, %r9d addl 124(%rsp), %r8d movl %r14d, %ecx movl %r10d, %ebx xorl %r15d, %ecx rorl $14, %edx andl %r13d, %ecx xorl %r13d, %edx xorl %r15d, %ecx rorl $5, %edx addl %ecx, %r8d xorl %r13d, %edx xorl %r9d, %ebx rorl $6, %edx movl %r9d, %ecx addl %edx, %r8d rorl $9, %ecx andl %ebx, %eax xorl %r9d, %ecx xorl %r10d, %eax rorl $11, %ecx addl %r8d, %r12d xorl %r9d, %ecx addl %eax, %r8d rorl $2, %ecx movl %r12d, %edx addl %ecx, %r8d # rnd_all_4: 9-12 addl 144(%rsp), %r15d movl %r13d, %ecx movl %r9d, %eax xorl %r14d, %ecx rorl $14, %edx andl %r12d, %ecx xorl %r12d, %edx xorl %r14d, %ecx rorl $5, %edx addl %ecx, %r15d xorl %r12d, %edx xorl %r8d, %eax rorl $6, %edx movl %r8d, %ecx addl %edx, %r15d rorl $9, %ecx andl %eax, %ebx xorl %r8d, %ecx xorl %r9d, %ebx rorl $11, %ecx addl %r15d, %r11d xorl %r8d, %ecx addl %ebx, %r15d rorl $2, %ecx movl %r11d, %edx addl %ecx, %r15d addl 148(%rsp), %r14d movl %r12d, %ecx movl %r8d, %ebx xorl %r13d, %ecx rorl $14, %edx andl %r11d, %ecx xorl %r11d, %edx xorl %r13d, %ecx rorl $5, %edx addl %ecx, %r14d xorl %r11d, %edx xorl %r15d, %ebx rorl $6, %edx movl %r15d, %ecx addl %edx, %r14d rorl $9, %ecx andl %ebx, %eax xorl %r15d, %ecx xorl %r8d, %eax rorl $11, %ecx addl %r14d, %r10d xorl %r15d, %ecx addl %eax, %r14d rorl $2, %ecx movl %r10d, %edx addl %ecx, %r14d addl 152(%rsp), %r13d movl %r11d, %ecx movl %r15d, %eax xorl %r12d, %ecx rorl $14, %edx andl %r10d, %ecx xorl %r10d, %edx xorl %r12d, %ecx rorl $5, %edx addl %ecx, %r13d xorl %r10d, %edx xorl %r14d, %eax rorl $6, %edx movl %r14d, %ecx addl %edx, %r13d rorl $9, %ecx andl %eax, %ebx xorl %r14d, %ecx xorl %r15d, %ebx rorl $11, %ecx addl %r13d, %r9d xorl %r14d, %ecx addl %ebx, %r13d rorl $2, %ecx movl %r9d, %edx addl %ecx, %r13d addl 156(%rsp), %r12d movl %r10d, %ecx movl %r14d, %ebx xorl %r11d, %ecx rorl $14, %edx andl %r9d, %ecx xorl %r9d, %edx xorl %r11d, %ecx rorl $5, %edx addl %ecx, %r12d xorl %r9d, %edx xorl %r13d, %ebx rorl $6, %edx movl %r13d, %ecx addl %edx, %r12d rorl $9, %ecx andl %ebx, %eax xorl %r13d, %ecx xorl %r14d, %eax rorl $11, %ecx addl %r12d, %r8d xorl %r13d, %ecx addl %eax, %r12d rorl $2, %ecx movl %r8d, %edx addl %ecx, %r12d # rnd_all_4: 11-14 addl 176(%rsp), %r11d movl %r9d, %ecx movl %r13d, %eax xorl %r10d, %ecx rorl $14, %edx andl %r8d, %ecx xorl %r8d, %edx xorl %r10d, %ecx rorl $5, %edx addl %ecx, %r11d xorl %r8d, %edx xorl %r12d, %eax rorl $6, %edx movl %r12d, %ecx addl %edx, %r11d rorl $9, %ecx andl %eax, %ebx xorl %r12d, %ecx xorl %r13d, %ebx rorl $11, %ecx addl %r11d, %r15d xorl %r12d, %ecx addl %ebx, %r11d rorl $2, %ecx movl %r15d, %edx addl %ecx, %r11d addl 180(%rsp), %r10d movl %r8d, %ecx movl %r12d, %ebx xorl %r9d, %ecx rorl $14, %edx andl %r15d, %ecx xorl %r15d, %edx xorl %r9d, %ecx rorl $5, %edx addl %ecx, %r10d xorl %r15d, %edx xorl %r11d, %ebx rorl $6, %edx movl %r11d, %ecx addl %edx, %r10d rorl $9, %ecx andl %ebx, %eax xorl %r11d, %ecx xorl %r12d, %eax rorl $11, %ecx addl %r10d, %r14d xorl %r11d, %ecx addl %eax, %r10d rorl $2, %ecx movl %r14d, %edx addl %ecx, %r10d addl 184(%rsp), %r9d movl %r15d, %ecx movl %r11d, %eax xorl %r8d, %ecx rorl $14, %edx andl %r14d, %ecx xorl %r14d, %edx xorl %r8d, %ecx rorl $5, %edx addl %ecx, %r9d xorl %r14d, %edx xorl %r10d, %eax rorl $6, %edx movl %r10d, %ecx addl %edx, %r9d rorl $9, %ecx andl %eax, %ebx xorl %r10d, %ecx xorl %r11d, %ebx rorl $11, %ecx addl %r9d, %r13d xorl %r10d, %ecx addl %ebx, %r9d rorl $2, %ecx movl %r13d, %edx addl %ecx, %r9d addl 188(%rsp), %r8d movl %r14d, %ecx movl %r10d, %ebx xorl %r15d, %ecx rorl $14, %edx andl %r13d, %ecx xorl %r13d, %edx xorl %r15d, %ecx rorl $5, %edx addl %ecx, %r8d xorl %r13d, %edx xorl %r9d, %ebx rorl $6, %edx movl %r9d, %ecx addl %edx, %r8d rorl $9, %ecx andl %ebx, %eax xorl %r9d, %ecx xorl %r10d, %eax rorl $11, %ecx addl %r8d, %r12d xorl %r9d, %ecx addl %eax, %r8d rorl $2, %ecx movl %r12d, %edx addl %ecx, %r8d # rnd_all_4: 13-16 addl 208(%rsp), %r15d movl %r13d, %ecx movl %r9d, %eax xorl %r14d, %ecx rorl $14, %edx andl %r12d, %ecx xorl %r12d, %edx xorl %r14d, %ecx rorl $5, %edx addl %ecx, %r15d xorl %r12d, %edx xorl %r8d, %eax rorl $6, %edx movl %r8d, %ecx addl %edx, %r15d rorl $9, %ecx andl %eax, %ebx xorl %r8d, %ecx xorl %r9d, %ebx rorl $11, %ecx addl %r15d, %r11d xorl %r8d, %ecx addl %ebx, %r15d rorl $2, %ecx movl %r11d, %edx addl %ecx, %r15d addl 212(%rsp), %r14d movl %r12d, %ecx movl %r8d, %ebx xorl %r13d, %ecx rorl $14, %edx andl %r11d, %ecx xorl %r11d, %edx xorl %r13d, %ecx rorl $5, %edx addl %ecx, %r14d xorl %r11d, %edx xorl %r15d, %ebx rorl $6, %edx movl %r15d, %ecx addl %edx, %r14d rorl $9, %ecx andl %ebx, %eax xorl %r15d, %ecx xorl %r8d, %eax rorl $11, %ecx addl %r14d, %r10d xorl %r15d, %ecx addl %eax, %r14d rorl $2, %ecx movl %r10d, %edx addl %ecx, %r14d addl 216(%rsp), %r13d movl %r11d, %ecx movl %r15d, %eax xorl %r12d, %ecx rorl $14, %edx andl %r10d, %ecx xorl %r10d, %edx xorl %r12d, %ecx rorl $5, %edx addl %ecx, %r13d xorl %r10d, %edx xorl %r14d, %eax rorl $6, %edx movl %r14d, %ecx addl %edx, %r13d rorl $9, %ecx andl %eax, %ebx xorl %r14d, %ecx xorl %r15d, %ebx rorl $11, %ecx addl %r13d, %r9d xorl %r14d, %ecx addl %ebx, %r13d rorl $2, %ecx movl %r9d, %edx addl %ecx, %r13d addl 220(%rsp), %r12d movl %r10d, %ecx movl %r14d, %ebx xorl %r11d, %ecx rorl $14, %edx andl %r9d, %ecx xorl %r9d, %edx xorl %r11d, %ecx rorl $5, %edx addl %ecx, %r12d xorl %r9d, %edx xorl %r13d, %ebx rorl $6, %edx movl %r13d, %ecx addl %edx, %r12d rorl $9, %ecx andl %ebx, %eax xorl %r13d, %ecx xorl %r14d, %eax rorl $11, %ecx addl %r12d, %r8d xorl %r13d, %ecx addl %eax, %r12d rorl $2, %ecx movl %r8d, %edx addl %ecx, %r12d # rnd_all_4: 15-18 addl 240(%rsp), %r11d movl %r9d, %ecx movl %r13d, %eax xorl %r10d, %ecx rorl $14, %edx andl %r8d, %ecx xorl %r8d, %edx xorl %r10d, %ecx rorl $5, %edx addl %ecx, %r11d xorl %r8d, %edx xorl %r12d, %eax rorl $6, %edx movl %r12d, %ecx addl %edx, %r11d rorl $9, %ecx andl %eax, %ebx xorl %r12d, %ecx xorl %r13d, %ebx rorl $11, %ecx addl %r11d, %r15d xorl %r12d, %ecx addl %ebx, %r11d rorl $2, %ecx movl %r15d, %edx addl %ecx, %r11d addl 244(%rsp), %r10d movl %r8d, %ecx movl %r12d, %ebx xorl %r9d, %ecx rorl $14, %edx andl %r15d, %ecx xorl %r15d, %edx xorl %r9d, %ecx rorl $5, %edx addl %ecx, %r10d xorl %r15d, %edx xorl %r11d, %ebx rorl $6, %edx movl %r11d, %ecx addl %edx, %r10d rorl $9, %ecx andl %ebx, %eax xorl %r11d, %ecx xorl %r12d, %eax rorl $11, %ecx addl %r10d, %r14d xorl %r11d, %ecx addl %eax, %r10d rorl $2, %ecx movl %r14d, %edx addl %ecx, %r10d addl 248(%rsp), %r9d movl %r15d, %ecx movl %r11d, %eax xorl %r8d, %ecx rorl $14, %edx andl %r14d, %ecx xorl %r14d, %edx xorl %r8d, %ecx rorl $5, %edx addl %ecx, %r9d xorl %r14d, %edx xorl %r10d, %eax rorl $6, %edx movl %r10d, %ecx addl %edx, %r9d rorl $9, %ecx andl %eax, %ebx xorl %r10d, %ecx xorl %r11d, %ebx rorl $11, %ecx addl %r9d, %r13d xorl %r10d, %ecx addl %ebx, %r9d rorl $2, %ecx movl %r13d, %edx addl %ecx, %r9d addl 252(%rsp), %r8d movl %r14d, %ecx movl %r10d, %ebx xorl %r15d, %ecx rorl $14, %edx andl %r13d, %ecx xorl %r13d, %edx xorl %r15d, %ecx rorl $5, %edx addl %ecx, %r8d xorl %r13d, %edx xorl %r9d, %ebx rorl $6, %edx movl %r9d, %ecx addl %edx, %r8d rorl $9, %ecx andl %ebx, %eax xorl %r9d, %ecx xorl %r10d, %eax rorl $11, %ecx addl %r8d, %r12d xorl %r9d, %ecx addl %eax, %r8d rorl $2, %ecx movl %r12d, %edx addl %ecx, %r8d # rnd_all_4: 17-20 addl 272(%rsp), %r15d movl %r13d, %ecx movl %r9d, %eax xorl %r14d, %ecx rorl $14, %edx andl %r12d, %ecx xorl %r12d, %edx xorl %r14d, %ecx rorl $5, %edx addl %ecx, %r15d xorl %r12d, %edx xorl %r8d, %eax rorl $6, %edx movl %r8d, %ecx addl %edx, %r15d rorl $9, %ecx andl %eax, %ebx xorl %r8d, %ecx xorl %r9d, %ebx rorl $11, %ecx addl %r15d, %r11d xorl %r8d, %ecx addl %ebx, %r15d rorl $2, %ecx movl %r11d, %edx addl %ecx, %r15d addl 276(%rsp), %r14d movl %r12d, %ecx movl %r8d, %ebx xorl %r13d, %ecx rorl $14, %edx andl %r11d, %ecx xorl %r11d, %edx xorl %r13d, %ecx rorl $5, %edx addl %ecx, %r14d xorl %r11d, %edx xorl %r15d, %ebx rorl $6, %edx movl %r15d, %ecx addl %edx, %r14d rorl $9, %ecx andl %ebx, %eax xorl %r15d, %ecx xorl %r8d, %eax rorl $11, %ecx addl %r14d, %r10d xorl %r15d, %ecx addl %eax, %r14d rorl $2, %ecx movl %r10d, %edx addl %ecx, %r14d addl 280(%rsp), %r13d movl %r11d, %ecx movl %r15d, %eax xorl %r12d, %ecx rorl $14, %edx andl %r10d, %ecx xorl %r10d, %edx xorl %r12d, %ecx rorl $5, %edx addl %ecx, %r13d xorl %r10d, %edx xorl %r14d, %eax rorl $6, %edx movl %r14d, %ecx addl %edx, %r13d rorl $9, %ecx andl %eax, %ebx xorl %r14d, %ecx xorl %r15d, %ebx rorl $11, %ecx addl %r13d, %r9d xorl %r14d, %ecx addl %ebx, %r13d rorl $2, %ecx movl %r9d, %edx addl %ecx, %r13d addl 284(%rsp), %r12d movl %r10d, %ecx movl %r14d, %ebx xorl %r11d, %ecx rorl $14, %edx andl %r9d, %ecx xorl %r9d, %edx xorl %r11d, %ecx rorl $5, %edx addl %ecx, %r12d xorl %r9d, %edx xorl %r13d, %ebx rorl $6, %edx movl %r13d, %ecx addl %edx, %r12d rorl $9, %ecx andl %ebx, %eax xorl %r13d, %ecx xorl %r14d, %eax rorl $11, %ecx addl %r12d, %r8d xorl %r13d, %ecx addl %eax, %r12d rorl $2, %ecx movl %r8d, %edx addl %ecx, %r12d # rnd_all_4: 19-22 addl 304(%rsp), %r11d movl %r9d, %ecx movl %r13d, %eax xorl %r10d, %ecx rorl $14, %edx andl %r8d, %ecx xorl %r8d, %edx xorl %r10d, %ecx rorl $5, %edx addl %ecx, %r11d xorl %r8d, %edx xorl %r12d, %eax rorl $6, %edx movl %r12d, %ecx addl %edx, %r11d rorl $9, %ecx andl %eax, %ebx xorl %r12d, %ecx xorl %r13d, %ebx rorl $11, %ecx addl %r11d, %r15d xorl %r12d, %ecx addl %ebx, %r11d rorl $2, %ecx movl %r15d, %edx addl %ecx, %r11d addl 308(%rsp), %r10d movl %r8d, %ecx movl %r12d, %ebx xorl %r9d, %ecx rorl $14, %edx andl %r15d, %ecx xorl %r15d, %edx xorl %r9d, %ecx rorl $5, %edx addl %ecx, %r10d xorl %r15d, %edx xorl %r11d, %ebx rorl $6, %edx movl %r11d, %ecx addl %edx, %r10d rorl $9, %ecx andl %ebx, %eax xorl %r11d, %ecx xorl %r12d, %eax rorl $11, %ecx addl %r10d, %r14d xorl %r11d, %ecx addl %eax, %r10d rorl $2, %ecx movl %r14d, %edx addl %ecx, %r10d addl 312(%rsp), %r9d movl %r15d, %ecx movl %r11d, %eax xorl %r8d, %ecx rorl $14, %edx andl %r14d, %ecx xorl %r14d, %edx xorl %r8d, %ecx rorl $5, %edx addl %ecx, %r9d xorl %r14d, %edx xorl %r10d, %eax rorl $6, %edx movl %r10d, %ecx addl %edx, %r9d rorl $9, %ecx andl %eax, %ebx xorl %r10d, %ecx xorl %r11d, %ebx rorl $11, %ecx addl %r9d, %r13d xorl %r10d, %ecx addl %ebx, %r9d rorl $2, %ecx movl %r13d, %edx addl %ecx, %r9d addl 316(%rsp), %r8d movl %r14d, %ecx movl %r10d, %ebx xorl %r15d, %ecx rorl $14, %edx andl %r13d, %ecx xorl %r13d, %edx xorl %r15d, %ecx rorl $5, %edx addl %ecx, %r8d xorl %r13d, %edx xorl %r9d, %ebx rorl $6, %edx movl %r9d, %ecx addl %edx, %r8d rorl $9, %ecx andl %ebx, %eax xorl %r9d, %ecx xorl %r10d, %eax rorl $11, %ecx addl %r8d, %r12d xorl %r9d, %ecx addl %eax, %r8d rorl $2, %ecx movl %r12d, %edx addl %ecx, %r8d # rnd_all_4: 21-24 addl 336(%rsp), %r15d movl %r13d, %ecx movl %r9d, %eax xorl %r14d, %ecx rorl $14, %edx andl %r12d, %ecx xorl %r12d, %edx xorl %r14d, %ecx rorl $5, %edx addl %ecx, %r15d xorl %r12d, %edx xorl %r8d, %eax rorl $6, %edx movl %r8d, %ecx addl %edx, %r15d rorl $9, %ecx andl %eax, %ebx xorl %r8d, %ecx xorl %r9d, %ebx rorl $11, %ecx addl %r15d, %r11d xorl %r8d, %ecx addl %ebx, %r15d rorl $2, %ecx movl %r11d, %edx addl %ecx, %r15d addl 340(%rsp), %r14d movl %r12d, %ecx movl %r8d, %ebx xorl %r13d, %ecx rorl $14, %edx andl %r11d, %ecx xorl %r11d, %edx xorl %r13d, %ecx rorl $5, %edx addl %ecx, %r14d xorl %r11d, %edx xorl %r15d, %ebx rorl $6, %edx movl %r15d, %ecx addl %edx, %r14d rorl $9, %ecx andl %ebx, %eax xorl %r15d, %ecx xorl %r8d, %eax rorl $11, %ecx addl %r14d, %r10d xorl %r15d, %ecx addl %eax, %r14d rorl $2, %ecx movl %r10d, %edx addl %ecx, %r14d addl 344(%rsp), %r13d movl %r11d, %ecx movl %r15d, %eax xorl %r12d, %ecx rorl $14, %edx andl %r10d, %ecx xorl %r10d, %edx xorl %r12d, %ecx rorl $5, %edx addl %ecx, %r13d xorl %r10d, %edx xorl %r14d, %eax rorl $6, %edx movl %r14d, %ecx addl %edx, %r13d rorl $9, %ecx andl %eax, %ebx xorl %r14d, %ecx xorl %r15d, %ebx rorl $11, %ecx addl %r13d, %r9d xorl %r14d, %ecx addl %ebx, %r13d rorl $2, %ecx movl %r9d, %edx addl %ecx, %r13d addl 348(%rsp), %r12d movl %r10d, %ecx movl %r14d, %ebx xorl %r11d, %ecx rorl $14, %edx andl %r9d, %ecx xorl %r9d, %edx xorl %r11d, %ecx rorl $5, %edx addl %ecx, %r12d xorl %r9d, %edx xorl %r13d, %ebx rorl $6, %edx movl %r13d, %ecx addl %edx, %r12d rorl $9, %ecx andl %ebx, %eax xorl %r13d, %ecx xorl %r14d, %eax rorl $11, %ecx addl %r12d, %r8d xorl %r13d, %ecx addl %eax, %r12d rorl $2, %ecx movl %r8d, %edx addl %ecx, %r12d # rnd_all_4: 23-26 addl 368(%rsp), %r11d movl %r9d, %ecx movl %r13d, %eax xorl %r10d, %ecx rorl $14, %edx andl %r8d, %ecx xorl %r8d, %edx xorl %r10d, %ecx rorl $5, %edx addl %ecx, %r11d xorl %r8d, %edx xorl %r12d, %eax rorl $6, %edx movl %r12d, %ecx addl %edx, %r11d rorl $9, %ecx andl %eax, %ebx xorl %r12d, %ecx xorl %r13d, %ebx rorl $11, %ecx addl %r11d, %r15d xorl %r12d, %ecx addl %ebx, %r11d rorl $2, %ecx movl %r15d, %edx addl %ecx, %r11d addl 372(%rsp), %r10d movl %r8d, %ecx movl %r12d, %ebx xorl %r9d, %ecx rorl $14, %edx andl %r15d, %ecx xorl %r15d, %edx xorl %r9d, %ecx rorl $5, %edx addl %ecx, %r10d xorl %r15d, %edx xorl %r11d, %ebx rorl $6, %edx movl %r11d, %ecx addl %edx, %r10d rorl $9, %ecx andl %ebx, %eax xorl %r11d, %ecx xorl %r12d, %eax rorl $11, %ecx addl %r10d, %r14d xorl %r11d, %ecx addl %eax, %r10d rorl $2, %ecx movl %r14d, %edx addl %ecx, %r10d addl 376(%rsp), %r9d movl %r15d, %ecx movl %r11d, %eax xorl %r8d, %ecx rorl $14, %edx andl %r14d, %ecx xorl %r14d, %edx xorl %r8d, %ecx rorl $5, %edx addl %ecx, %r9d xorl %r14d, %edx xorl %r10d, %eax rorl $6, %edx movl %r10d, %ecx addl %edx, %r9d rorl $9, %ecx andl %eax, %ebx xorl %r10d, %ecx xorl %r11d, %ebx rorl $11, %ecx addl %r9d, %r13d xorl %r10d, %ecx addl %ebx, %r9d rorl $2, %ecx movl %r13d, %edx addl %ecx, %r9d addl 380(%rsp), %r8d movl %r14d, %ecx movl %r10d, %ebx xorl %r15d, %ecx rorl $14, %edx andl %r13d, %ecx xorl %r13d, %edx xorl %r15d, %ecx rorl $5, %edx addl %ecx, %r8d xorl %r13d, %edx xorl %r9d, %ebx rorl $6, %edx movl %r9d, %ecx addl %edx, %r8d rorl $9, %ecx andl %ebx, %eax xorl %r9d, %ecx xorl %r10d, %eax rorl $11, %ecx addl %r8d, %r12d xorl %r9d, %ecx addl %eax, %r8d rorl $2, %ecx movl %r12d, %edx addl %ecx, %r8d # rnd_all_4: 25-28 addl 400(%rsp), %r15d movl %r13d, %ecx movl %r9d, %eax xorl %r14d, %ecx rorl $14, %edx andl %r12d, %ecx xorl %r12d, %edx xorl %r14d, %ecx rorl $5, %edx addl %ecx, %r15d xorl %r12d, %edx xorl %r8d, %eax rorl $6, %edx movl %r8d, %ecx addl %edx, %r15d rorl $9, %ecx andl %eax, %ebx xorl %r8d, %ecx xorl %r9d, %ebx rorl $11, %ecx addl %r15d, %r11d xorl %r8d, %ecx addl %ebx, %r15d rorl $2, %ecx movl %r11d, %edx addl %ecx, %r15d addl 404(%rsp), %r14d movl %r12d, %ecx movl %r8d, %ebx xorl %r13d, %ecx rorl $14, %edx andl %r11d, %ecx xorl %r11d, %edx xorl %r13d, %ecx rorl $5, %edx addl %ecx, %r14d xorl %r11d, %edx xorl %r15d, %ebx rorl $6, %edx movl %r15d, %ecx addl %edx, %r14d rorl $9, %ecx andl %ebx, %eax xorl %r15d, %ecx xorl %r8d, %eax rorl $11, %ecx addl %r14d, %r10d xorl %r15d, %ecx addl %eax, %r14d rorl $2, %ecx movl %r10d, %edx addl %ecx, %r14d addl 408(%rsp), %r13d movl %r11d, %ecx movl %r15d, %eax xorl %r12d, %ecx rorl $14, %edx andl %r10d, %ecx xorl %r10d, %edx xorl %r12d, %ecx rorl $5, %edx addl %ecx, %r13d xorl %r10d, %edx xorl %r14d, %eax rorl $6, %edx movl %r14d, %ecx addl %edx, %r13d rorl $9, %ecx andl %eax, %ebx xorl %r14d, %ecx xorl %r15d, %ebx rorl $11, %ecx addl %r13d, %r9d xorl %r14d, %ecx addl %ebx, %r13d rorl $2, %ecx movl %r9d, %edx addl %ecx, %r13d addl 412(%rsp), %r12d movl %r10d, %ecx movl %r14d, %ebx xorl %r11d, %ecx rorl $14, %edx andl %r9d, %ecx xorl %r9d, %edx xorl %r11d, %ecx rorl $5, %edx addl %ecx, %r12d xorl %r9d, %edx xorl %r13d, %ebx rorl $6, %edx movl %r13d, %ecx addl %edx, %r12d rorl $9, %ecx andl %ebx, %eax xorl %r13d, %ecx xorl %r14d, %eax rorl $11, %ecx addl %r12d, %r8d xorl %r13d, %ecx addl %eax, %r12d rorl $2, %ecx movl %r8d, %edx addl %ecx, %r12d # rnd_all_4: 27-30 addl 432(%rsp), %r11d movl %r9d, %ecx movl %r13d, %eax xorl %r10d, %ecx rorl $14, %edx andl %r8d, %ecx xorl %r8d, %edx xorl %r10d, %ecx rorl $5, %edx addl %ecx, %r11d xorl %r8d, %edx xorl %r12d, %eax rorl $6, %edx movl %r12d, %ecx addl %edx, %r11d rorl $9, %ecx andl %eax, %ebx xorl %r12d, %ecx xorl %r13d, %ebx rorl $11, %ecx addl %r11d, %r15d xorl %r12d, %ecx addl %ebx, %r11d rorl $2, %ecx movl %r15d, %edx addl %ecx, %r11d addl 436(%rsp), %r10d movl %r8d, %ecx movl %r12d, %ebx xorl %r9d, %ecx rorl $14, %edx andl %r15d, %ecx xorl %r15d, %edx xorl %r9d, %ecx rorl $5, %edx addl %ecx, %r10d xorl %r15d, %edx xorl %r11d, %ebx rorl $6, %edx movl %r11d, %ecx addl %edx, %r10d rorl $9, %ecx andl %ebx, %eax xorl %r11d, %ecx xorl %r12d, %eax rorl $11, %ecx addl %r10d, %r14d xorl %r11d, %ecx addl %eax, %r10d rorl $2, %ecx movl %r14d, %edx addl %ecx, %r10d addl 440(%rsp), %r9d movl %r15d, %ecx movl %r11d, %eax xorl %r8d, %ecx rorl $14, %edx andl %r14d, %ecx xorl %r14d, %edx xorl %r8d, %ecx rorl $5, %edx addl %ecx, %r9d xorl %r14d, %edx xorl %r10d, %eax rorl $6, %edx movl %r10d, %ecx addl %edx, %r9d rorl $9, %ecx andl %eax, %ebx xorl %r10d, %ecx xorl %r11d, %ebx rorl $11, %ecx addl %r9d, %r13d xorl %r10d, %ecx addl %ebx, %r9d rorl $2, %ecx movl %r13d, %edx addl %ecx, %r9d addl 444(%rsp), %r8d movl %r14d, %ecx movl %r10d, %ebx xorl %r15d, %ecx rorl $14, %edx andl %r13d, %ecx xorl %r13d, %edx xorl %r15d, %ecx rorl $5, %edx addl %ecx, %r8d xorl %r13d, %edx xorl %r9d, %ebx rorl $6, %edx movl %r9d, %ecx addl %edx, %r8d rorl $9, %ecx andl %ebx, %eax xorl %r9d, %ecx xorl %r10d, %eax rorl $11, %ecx addl %r8d, %r12d xorl %r9d, %ecx addl %eax, %r8d rorl $2, %ecx movl %r12d, %edx addl %ecx, %r8d # rnd_all_4: 29-32 addl 464(%rsp), %r15d movl %r13d, %ecx movl %r9d, %eax xorl %r14d, %ecx rorl $14, %edx andl %r12d, %ecx xorl %r12d, %edx xorl %r14d, %ecx rorl $5, %edx addl %ecx, %r15d xorl %r12d, %edx xorl %r8d, %eax rorl $6, %edx movl %r8d, %ecx addl %edx, %r15d rorl $9, %ecx andl %eax, %ebx xorl %r8d, %ecx xorl %r9d, %ebx rorl $11, %ecx addl %r15d, %r11d xorl %r8d, %ecx addl %ebx, %r15d rorl $2, %ecx movl %r11d, %edx addl %ecx, %r15d addl 468(%rsp), %r14d movl %r12d, %ecx movl %r8d, %ebx xorl %r13d, %ecx rorl $14, %edx andl %r11d, %ecx xorl %r11d, %edx xorl %r13d, %ecx rorl $5, %edx addl %ecx, %r14d xorl %r11d, %edx xorl %r15d, %ebx rorl $6, %edx movl %r15d, %ecx addl %edx, %r14d rorl $9, %ecx andl %ebx, %eax xorl %r15d, %ecx xorl %r8d, %eax rorl $11, %ecx addl %r14d, %r10d xorl %r15d, %ecx addl %eax, %r14d rorl $2, %ecx movl %r10d, %edx addl %ecx, %r14d addl 472(%rsp), %r13d movl %r11d, %ecx movl %r15d, %eax xorl %r12d, %ecx rorl $14, %edx andl %r10d, %ecx xorl %r10d, %edx xorl %r12d, %ecx rorl $5, %edx addl %ecx, %r13d xorl %r10d, %edx xorl %r14d, %eax rorl $6, %edx movl %r14d, %ecx addl %edx, %r13d rorl $9, %ecx andl %eax, %ebx xorl %r14d, %ecx xorl %r15d, %ebx rorl $11, %ecx addl %r13d, %r9d xorl %r14d, %ecx addl %ebx, %r13d rorl $2, %ecx movl %r9d, %edx addl %ecx, %r13d addl 476(%rsp), %r12d movl %r10d, %ecx movl %r14d, %ebx xorl %r11d, %ecx rorl $14, %edx andl %r9d, %ecx xorl %r9d, %edx xorl %r11d, %ecx rorl $5, %edx addl %ecx, %r12d xorl %r9d, %edx xorl %r13d, %ebx rorl $6, %edx movl %r13d, %ecx addl %edx, %r12d rorl $9, %ecx andl %ebx, %eax xorl %r13d, %ecx xorl %r14d, %eax rorl $11, %ecx addl %r12d, %r8d xorl %r13d, %ecx addl %eax, %r12d rorl $2, %ecx movl %r8d, %edx addl %ecx, %r12d # rnd_all_4: 31-34 addl 496(%rsp), %r11d movl %r9d, %ecx movl %r13d, %eax xorl %r10d, %ecx rorl $14, %edx andl %r8d, %ecx xorl %r8d, %edx xorl %r10d, %ecx rorl $5, %edx addl %ecx, %r11d xorl %r8d, %edx xorl %r12d, %eax rorl $6, %edx movl %r12d, %ecx addl %edx, %r11d rorl $9, %ecx andl %eax, %ebx xorl %r12d, %ecx xorl %r13d, %ebx rorl $11, %ecx addl %r11d, %r15d xorl %r12d, %ecx addl %ebx, %r11d rorl $2, %ecx movl %r15d, %edx addl %ecx, %r11d addl 500(%rsp), %r10d movl %r8d, %ecx movl %r12d, %ebx xorl %r9d, %ecx rorl $14, %edx andl %r15d, %ecx xorl %r15d, %edx xorl %r9d, %ecx rorl $5, %edx addl %ecx, %r10d xorl %r15d, %edx xorl %r11d, %ebx rorl $6, %edx movl %r11d, %ecx addl %edx, %r10d rorl $9, %ecx andl %ebx, %eax xorl %r11d, %ecx xorl %r12d, %eax rorl $11, %ecx addl %r10d, %r14d xorl %r11d, %ecx addl %eax, %r10d rorl $2, %ecx movl %r14d, %edx addl %ecx, %r10d addl 504(%rsp), %r9d movl %r15d, %ecx movl %r11d, %eax xorl %r8d, %ecx rorl $14, %edx andl %r14d, %ecx xorl %r14d, %edx xorl %r8d, %ecx rorl $5, %edx addl %ecx, %r9d xorl %r14d, %edx xorl %r10d, %eax rorl $6, %edx movl %r10d, %ecx addl %edx, %r9d rorl $9, %ecx andl %eax, %ebx xorl %r10d, %ecx xorl %r11d, %ebx rorl $11, %ecx addl %r9d, %r13d xorl %r10d, %ecx addl %ebx, %r9d rorl $2, %ecx movl %r13d, %edx addl %ecx, %r9d addl 508(%rsp), %r8d movl %r14d, %ecx movl %r10d, %ebx xorl %r15d, %ecx rorl $14, %edx andl %r13d, %ecx xorl %r13d, %edx xorl %r15d, %ecx rorl $5, %edx addl %ecx, %r8d xorl %r13d, %edx xorl %r9d, %ebx rorl $6, %edx movl %r9d, %ecx addl %edx, %r8d rorl $9, %ecx andl %ebx, %eax xorl %r9d, %ecx xorl %r10d, %eax rorl $11, %ecx addl %r8d, %r12d xorl %r9d, %ecx addl %eax, %r8d rorl $2, %ecx movl %r12d, %edx addl %ecx, %r8d addl (%rdi), %r8d addl 4(%rdi), %r9d addl 8(%rdi), %r10d addl 12(%rdi), %r11d addl 16(%rdi), %r12d addl 20(%rdi), %r13d addl 24(%rdi), %r14d addl 28(%rdi), %r15d addq $0x80, %rbp subl $0x80, %esi movl %r8d, (%rdi) movl %r9d, 4(%rdi) movl %r10d, 8(%rdi) movl %r11d, 12(%rdi) movl %r12d, 16(%rdi) movl %r13d, 20(%rdi) movl %r14d, 24(%rdi) movl %r15d, 28(%rdi) jnz L_sha256_len_avx2_start L_sha256_len_avx2_done: xorq %rax, %rax vzeroupper addq $0x200, %rsp popq %rbp popq %r15 popq %r14 popq %r13 popq %r12 popq %rbx repz retq #ifndef __APPLE__ .size Transform_Sha256_AVX2_Len,.-Transform_Sha256_AVX2_Len #endif /* __APPLE__ */ #ifndef __APPLE__ .data #else .section __DATA,__data #endif /* __APPLE__ */ L_avx2_rorx_sha256_k: .long 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5 .long 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5 .long 0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5 .long 0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5 .long 0xd807aa98,0x12835b01,0x243185be,0x550c7dc3 .long 0xd807aa98,0x12835b01,0x243185be,0x550c7dc3 .long 0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174 .long 0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174 .long 0xe49b69c1,0xefbe4786,0xfc19dc6,0x240ca1cc .long 0xe49b69c1,0xefbe4786,0xfc19dc6,0x240ca1cc .long 0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da .long 0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da .long 0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7 .long 0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7 .long 0xc6e00bf3,0xd5a79147,0x6ca6351,0x14292967 .long 0xc6e00bf3,0xd5a79147,0x6ca6351,0x14292967 .long 0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13 .long 0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13 .long 0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85 .long 0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85 .long 0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3 .long 0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3 .long 0xd192e819,0xd6990624,0xf40e3585,0x106aa070 .long 0xd192e819,0xd6990624,0xf40e3585,0x106aa070 .long 0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5 .long 0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5 .long 0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3 .long 0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3 .long 0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208 .long 0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208 .long 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2 .long 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2 #ifndef __APPLE__ .data #else .section __DATA,__data #endif /* __APPLE__ */ #ifndef __APPLE__ .align 32 #else .p2align 5 #endif /* __APPLE__ */ L_avx2_rorx_sha256_flip_mask: .quad 0x405060700010203, 0xc0d0e0f08090a0b .quad 0x405060700010203, 0xc0d0e0f08090a0b #ifndef __APPLE__ .data #else .section __DATA,__data #endif /* __APPLE__ */ #ifndef __APPLE__ .align 32 #else .p2align 5 #endif /* __APPLE__ */ L_avx2_rorx_sha256_shuf_00BA: .quad 0xb0a090803020100, 0xffffffffffffffff .quad 0xb0a090803020100, 0xffffffffffffffff #ifndef __APPLE__ .data #else .section __DATA,__data #endif /* __APPLE__ */ #ifndef __APPLE__ .align 32 #else .p2align 5 #endif /* __APPLE__ */ L_avx2_rorx_sha256_shuf_DC00: .quad 0xffffffffffffffff, 0xb0a090803020100 .quad 0xffffffffffffffff, 0xb0a090803020100 #ifndef __APPLE__ .text .globl Transform_Sha256_AVX2_RORX .type Transform_Sha256_AVX2_RORX,@function .align 16 Transform_Sha256_AVX2_RORX: #else .section __TEXT,__text .globl _Transform_Sha256_AVX2_RORX .p2align 4 _Transform_Sha256_AVX2_RORX: #endif /* __APPLE__ */ pushq %rbx pushq %r12 pushq %r13 pushq %r14 pushq %r15 subq $0x200, %rsp leaq 32(%rdi), %rax vmovdqa L_avx2_rorx_sha256_flip_mask(%rip), %xmm13 vmovdqa L_avx2_rorx_sha256_shuf_00BA(%rip), %ymm11 vmovdqa L_avx2_rorx_sha256_shuf_DC00(%rip), %ymm12 # X0, X1, X2, X3 = W[0..15] vmovdqu (%rax), %xmm0 vmovdqu 16(%rax), %xmm1 vpshufb %xmm13, %xmm0, %xmm0 vpshufb %xmm13, %xmm1, %xmm1 vpaddd 0+L_avx2_rorx_sha256_k(%rip), %ymm0, %ymm4 vpaddd 32+L_avx2_rorx_sha256_k(%rip), %ymm1, %ymm5 vmovdqu %ymm4, (%rsp) vmovdqu %ymm5, 32(%rsp) vmovdqu 32(%rax), %xmm2 vmovdqu 48(%rax), %xmm3 vpshufb %xmm13, %xmm2, %xmm2 vpshufb %xmm13, %xmm3, %xmm3 vpaddd 64+L_avx2_rorx_sha256_k(%rip), %ymm2, %ymm4 vpaddd 96+L_avx2_rorx_sha256_k(%rip), %ymm3, %ymm5 vmovdqu %ymm4, 64(%rsp) vmovdqu %ymm5, 96(%rsp) movl (%rdi), %r8d movl 4(%rdi), %r9d movl 8(%rdi), %r10d movl 12(%rdi), %r11d movl 16(%rdi), %r12d movl 20(%rdi), %r13d movl 24(%rdi), %r14d movl 28(%rdi), %r15d movl %r9d, %ebx rorxl $6, %r12d, %edx xorl %r10d, %ebx # rnd_0: 0 - 0 movl %r13d, %eax rorxl $11, %r12d, %ecx addl (%rsp), %r15d vpalignr $4, %ymm0, %ymm1, %ymm5 # rnd_0: 1 - 1 xorl %edx, %ecx xorl %r14d, %eax rorxl $25, %r12d, %edx vpalignr $4, %ymm2, %ymm3, %ymm4 # rnd_0: 2 - 2 andl %r12d, %eax xorl %ecx, %edx rorxl $13, %r8d, %ecx vpsrld $7, %ymm5, %ymm6 # rnd_0: 3 - 3 addl %edx, %r15d rorxl $2, %r8d, %edx xorl %r14d, %eax vpslld $25, %ymm5, %ymm7 # rnd_0: 4 - 4 xorl %edx, %ecx rorxl $22, %r8d, %edx addl %eax, %r15d vpsrld $18, %ymm5, %ymm8 # rnd_0: 5 - 5 xorl %ecx, %edx movl %r9d, %eax addl %r15d, %r11d vpslld $14, %ymm5, %ymm9 # rnd_0: 6 - 6 xorl %r8d, %eax addl %edx, %r15d andl %eax, %ebx vpor %ymm7, %ymm6, %ymm6 # rnd_0: 7 - 7 xorl %r9d, %ebx rorxl $6, %r11d, %edx addl %ebx, %r15d vpor %ymm9, %ymm8, %ymm8 # rnd_1: 0 - 0 movl %r12d, %ebx rorxl $11, %r11d, %ecx addl 4(%rsp), %r14d vpsrld $3, %ymm5, %ymm9 # rnd_1: 1 - 1 xorl %edx, %ecx xorl %r13d, %ebx rorxl $25, %r11d, %edx vpxor %ymm8, %ymm6, %ymm6 # rnd_1: 2 - 2 andl %r11d, %ebx xorl %ecx, %edx rorxl $13, %r15d, %ecx vpshufd $0xfa, %ymm3, %ymm7 # rnd_1: 3 - 3 addl %edx, %r14d rorxl $2, %r15d, %edx xorl %r13d, %ebx vpxor %ymm6, %ymm9, %ymm5 # rnd_1: 4 - 4 xorl %edx, %ecx rorxl $22, %r15d, %edx addl %ebx, %r14d vpsrld $10, %ymm7, %ymm8 # rnd_1: 5 - 5 xorl %ecx, %edx addl %r14d, %r10d movl %r8d, %ebx vpsrlq $19, %ymm7, %ymm6 # rnd_1: 6 - 6 xorl %r15d, %ebx addl %edx, %r14d andl %ebx, %eax vpsrlq $0x11, %ymm7, %ymm7 # rnd_1: 7 - 7 xorl %r8d, %eax rorxl $6, %r10d, %edx addl %eax, %r14d vpaddd %ymm0, %ymm4, %ymm4 # rnd_0: 0 - 0 movl %r11d, %eax rorxl $11, %r10d, %ecx addl 8(%rsp), %r13d vpxor %ymm7, %ymm6, %ymm6 # rnd_0: 1 - 1 xorl %edx, %ecx xorl %r12d, %eax rorxl $25, %r10d, %edx vpxor %ymm6, %ymm8, %ymm8 # rnd_0: 2 - 2 andl %r10d, %eax xorl %ecx, %edx rorxl $13, %r14d, %ecx vpaddd %ymm5, %ymm4, %ymm4 # rnd_0: 3 - 3 addl %edx, %r13d rorxl $2, %r14d, %edx xorl %r12d, %eax vpshufb %ymm11, %ymm8, %ymm8 # rnd_0: 4 - 4 xorl %edx, %ecx rorxl $22, %r14d, %edx addl %eax, %r13d vpaddd %ymm8, %ymm4, %ymm4 # rnd_0: 5 - 5 xorl %ecx, %edx movl %r15d, %eax addl %r13d, %r9d vpshufd $0x50, %ymm4, %ymm6 # rnd_0: 6 - 6 xorl %r14d, %eax addl %edx, %r13d andl %eax, %ebx vpsrlq $0x11, %ymm6, %ymm8 # rnd_0: 7 - 7 xorl %r15d, %ebx rorxl $6, %r9d, %edx addl %ebx, %r13d vpsrlq $19, %ymm6, %ymm7 # rnd_1: 0 - 0 movl %r10d, %ebx rorxl $11, %r9d, %ecx addl 12(%rsp), %r12d vpsrld $10, %ymm6, %ymm9 # rnd_1: 1 - 1 xorl %edx, %ecx xorl %r11d, %ebx rorxl $25, %r9d, %edx vpxor %ymm7, %ymm8, %ymm8 # rnd_1: 2 - 2 andl %r9d, %ebx xorl %ecx, %edx rorxl $13, %r13d, %ecx vpxor %ymm8, %ymm9, %ymm9 # rnd_1: 3 - 3 addl %edx, %r12d rorxl $2, %r13d, %edx xorl %r11d, %ebx vpshufb %ymm12, %ymm9, %ymm9 # rnd_1: 4 - 4 xorl %edx, %ecx rorxl $22, %r13d, %edx addl %ebx, %r12d vpaddd %ymm4, %ymm9, %ymm0 # rnd_1: 5 - 5 xorl %ecx, %edx addl %r12d, %r8d movl %r14d, %ebx vpaddd 128+L_avx2_rorx_sha256_k(%rip), %ymm0, %ymm4 # rnd_1: 6 - 6 xorl %r13d, %ebx addl %edx, %r12d andl %ebx, %eax # rnd_1: 7 - 7 xorl %r14d, %eax rorxl $6, %r8d, %edx addl %eax, %r12d vmovdqu %ymm4, 128(%rsp) # rnd_0: 0 - 0 movl %r9d, %eax rorxl $11, %r8d, %ecx addl 32(%rsp), %r11d vpalignr $4, %ymm1, %ymm2, %ymm5 # rnd_0: 1 - 1 xorl %edx, %ecx xorl %r10d, %eax rorxl $25, %r8d, %edx vpalignr $4, %ymm3, %ymm0, %ymm4 # rnd_0: 2 - 2 andl %r8d, %eax xorl %ecx, %edx rorxl $13, %r12d, %ecx vpsrld $7, %ymm5, %ymm6 # rnd_0: 3 - 3 addl %edx, %r11d rorxl $2, %r12d, %edx xorl %r10d, %eax vpslld $25, %ymm5, %ymm7 # rnd_0: 4 - 4 xorl %edx, %ecx rorxl $22, %r12d, %edx addl %eax, %r11d vpsrld $18, %ymm5, %ymm8 # rnd_0: 5 - 5 xorl %ecx, %edx movl %r13d, %eax addl %r11d, %r15d vpslld $14, %ymm5, %ymm9 # rnd_0: 6 - 6 xorl %r12d, %eax addl %edx, %r11d andl %eax, %ebx vpor %ymm7, %ymm6, %ymm6 # rnd_0: 7 - 7 xorl %r13d, %ebx rorxl $6, %r15d, %edx addl %ebx, %r11d vpor %ymm9, %ymm8, %ymm8 # rnd_1: 0 - 0 movl %r8d, %ebx rorxl $11, %r15d, %ecx addl 36(%rsp), %r10d vpsrld $3, %ymm5, %ymm9 # rnd_1: 1 - 1 xorl %edx, %ecx xorl %r9d, %ebx rorxl $25, %r15d, %edx vpxor %ymm8, %ymm6, %ymm6 # rnd_1: 2 - 2 andl %r15d, %ebx xorl %ecx, %edx rorxl $13, %r11d, %ecx vpshufd $0xfa, %ymm0, %ymm7 # rnd_1: 3 - 3 addl %edx, %r10d rorxl $2, %r11d, %edx xorl %r9d, %ebx vpxor %ymm6, %ymm9, %ymm5 # rnd_1: 4 - 4 xorl %edx, %ecx rorxl $22, %r11d, %edx addl %ebx, %r10d vpsrld $10, %ymm7, %ymm8 # rnd_1: 5 - 5 xorl %ecx, %edx addl %r10d, %r14d movl %r12d, %ebx vpsrlq $19, %ymm7, %ymm6 # rnd_1: 6 - 6 xorl %r11d, %ebx addl %edx, %r10d andl %ebx, %eax vpsrlq $0x11, %ymm7, %ymm7 # rnd_1: 7 - 7 xorl %r12d, %eax rorxl $6, %r14d, %edx addl %eax, %r10d vpaddd %ymm1, %ymm4, %ymm4 # rnd_0: 0 - 0 movl %r15d, %eax rorxl $11, %r14d, %ecx addl 40(%rsp), %r9d vpxor %ymm7, %ymm6, %ymm6 # rnd_0: 1 - 1 xorl %edx, %ecx xorl %r8d, %eax rorxl $25, %r14d, %edx vpxor %ymm6, %ymm8, %ymm8 # rnd_0: 2 - 2 andl %r14d, %eax xorl %ecx, %edx rorxl $13, %r10d, %ecx vpaddd %ymm5, %ymm4, %ymm4 # rnd_0: 3 - 3 addl %edx, %r9d rorxl $2, %r10d, %edx xorl %r8d, %eax vpshufb %ymm11, %ymm8, %ymm8 # rnd_0: 4 - 4 xorl %edx, %ecx rorxl $22, %r10d, %edx addl %eax, %r9d vpaddd %ymm8, %ymm4, %ymm4 # rnd_0: 5 - 5 xorl %ecx, %edx movl %r11d, %eax addl %r9d, %r13d vpshufd $0x50, %ymm4, %ymm6 # rnd_0: 6 - 6 xorl %r10d, %eax addl %edx, %r9d andl %eax, %ebx vpsrlq $0x11, %ymm6, %ymm8 # rnd_0: 7 - 7 xorl %r11d, %ebx rorxl $6, %r13d, %edx addl %ebx, %r9d vpsrlq $19, %ymm6, %ymm7 # rnd_1: 0 - 0 movl %r14d, %ebx rorxl $11, %r13d, %ecx addl 44(%rsp), %r8d vpsrld $10, %ymm6, %ymm9 # rnd_1: 1 - 1 xorl %edx, %ecx xorl %r15d, %ebx rorxl $25, %r13d, %edx vpxor %ymm7, %ymm8, %ymm8 # rnd_1: 2 - 2 andl %r13d, %ebx xorl %ecx, %edx rorxl $13, %r9d, %ecx vpxor %ymm8, %ymm9, %ymm9 # rnd_1: 3 - 3 addl %edx, %r8d rorxl $2, %r9d, %edx xorl %r15d, %ebx vpshufb %ymm12, %ymm9, %ymm9 # rnd_1: 4 - 4 xorl %edx, %ecx rorxl $22, %r9d, %edx addl %ebx, %r8d vpaddd %ymm4, %ymm9, %ymm1 # rnd_1: 5 - 5 xorl %ecx, %edx addl %r8d, %r12d movl %r10d, %ebx vpaddd 160+L_avx2_rorx_sha256_k(%rip), %ymm1, %ymm4 # rnd_1: 6 - 6 xorl %r9d, %ebx addl %edx, %r8d andl %ebx, %eax # rnd_1: 7 - 7 xorl %r10d, %eax rorxl $6, %r12d, %edx addl %eax, %r8d vmovdqu %ymm4, 160(%rsp) # rnd_0: 0 - 0 movl %r13d, %eax rorxl $11, %r12d, %ecx addl 64(%rsp), %r15d vpalignr $4, %ymm2, %ymm3, %ymm5 # rnd_0: 1 - 1 xorl %edx, %ecx xorl %r14d, %eax rorxl $25, %r12d, %edx vpalignr $4, %ymm0, %ymm1, %ymm4 # rnd_0: 2 - 2 andl %r12d, %eax xorl %ecx, %edx rorxl $13, %r8d, %ecx vpsrld $7, %ymm5, %ymm6 # rnd_0: 3 - 3 addl %edx, %r15d rorxl $2, %r8d, %edx xorl %r14d, %eax vpslld $25, %ymm5, %ymm7 # rnd_0: 4 - 4 xorl %edx, %ecx rorxl $22, %r8d, %edx addl %eax, %r15d vpsrld $18, %ymm5, %ymm8 # rnd_0: 5 - 5 xorl %ecx, %edx movl %r9d, %eax addl %r15d, %r11d vpslld $14, %ymm5, %ymm9 # rnd_0: 6 - 6 xorl %r8d, %eax addl %edx, %r15d andl %eax, %ebx vpor %ymm7, %ymm6, %ymm6 # rnd_0: 7 - 7 xorl %r9d, %ebx rorxl $6, %r11d, %edx addl %ebx, %r15d vpor %ymm9, %ymm8, %ymm8 # rnd_1: 0 - 0 movl %r12d, %ebx rorxl $11, %r11d, %ecx addl 68(%rsp), %r14d vpsrld $3, %ymm5, %ymm9 # rnd_1: 1 - 1 xorl %edx, %ecx xorl %r13d, %ebx rorxl $25, %r11d, %edx vpxor %ymm8, %ymm6, %ymm6 # rnd_1: 2 - 2 andl %r11d, %ebx xorl %ecx, %edx rorxl $13, %r15d, %ecx vpshufd $0xfa, %ymm1, %ymm7 # rnd_1: 3 - 3 addl %edx, %r14d rorxl $2, %r15d, %edx xorl %r13d, %ebx vpxor %ymm6, %ymm9, %ymm5 # rnd_1: 4 - 4 xorl %edx, %ecx rorxl $22, %r15d, %edx addl %ebx, %r14d vpsrld $10, %ymm7, %ymm8 # rnd_1: 5 - 5 xorl %ecx, %edx addl %r14d, %r10d movl %r8d, %ebx vpsrlq $19, %ymm7, %ymm6 # rnd_1: 6 - 6 xorl %r15d, %ebx addl %edx, %r14d andl %ebx, %eax vpsrlq $0x11, %ymm7, %ymm7 # rnd_1: 7 - 7 xorl %r8d, %eax rorxl $6, %r10d, %edx addl %eax, %r14d vpaddd %ymm2, %ymm4, %ymm4 # rnd_0: 0 - 0 movl %r11d, %eax rorxl $11, %r10d, %ecx addl 72(%rsp), %r13d vpxor %ymm7, %ymm6, %ymm6 # rnd_0: 1 - 1 xorl %edx, %ecx xorl %r12d, %eax rorxl $25, %r10d, %edx vpxor %ymm6, %ymm8, %ymm8 # rnd_0: 2 - 2 andl %r10d, %eax xorl %ecx, %edx rorxl $13, %r14d, %ecx vpaddd %ymm5, %ymm4, %ymm4 # rnd_0: 3 - 3 addl %edx, %r13d rorxl $2, %r14d, %edx xorl %r12d, %eax vpshufb %ymm11, %ymm8, %ymm8 # rnd_0: 4 - 4 xorl %edx, %ecx rorxl $22, %r14d, %edx addl %eax, %r13d vpaddd %ymm8, %ymm4, %ymm4 # rnd_0: 5 - 5 xorl %ecx, %edx movl %r15d, %eax addl %r13d, %r9d vpshufd $0x50, %ymm4, %ymm6 # rnd_0: 6 - 6 xorl %r14d, %eax addl %edx, %r13d andl %eax, %ebx vpsrlq $0x11, %ymm6, %ymm8 # rnd_0: 7 - 7 xorl %r15d, %ebx rorxl $6, %r9d, %edx addl %ebx, %r13d vpsrlq $19, %ymm6, %ymm7 # rnd_1: 0 - 0 movl %r10d, %ebx rorxl $11, %r9d, %ecx addl 76(%rsp), %r12d vpsrld $10, %ymm6, %ymm9 # rnd_1: 1 - 1 xorl %edx, %ecx xorl %r11d, %ebx rorxl $25, %r9d, %edx vpxor %ymm7, %ymm8, %ymm8 # rnd_1: 2 - 2 andl %r9d, %ebx xorl %ecx, %edx rorxl $13, %r13d, %ecx vpxor %ymm8, %ymm9, %ymm9 # rnd_1: 3 - 3 addl %edx, %r12d rorxl $2, %r13d, %edx xorl %r11d, %ebx vpshufb %ymm12, %ymm9, %ymm9 # rnd_1: 4 - 4 xorl %edx, %ecx rorxl $22, %r13d, %edx addl %ebx, %r12d vpaddd %ymm4, %ymm9, %ymm2 # rnd_1: 5 - 5 xorl %ecx, %edx addl %r12d, %r8d movl %r14d, %ebx vpaddd 192+L_avx2_rorx_sha256_k(%rip), %ymm2, %ymm4 # rnd_1: 6 - 6 xorl %r13d, %ebx addl %edx, %r12d andl %ebx, %eax # rnd_1: 7 - 7 xorl %r14d, %eax rorxl $6, %r8d, %edx addl %eax, %r12d vmovdqu %ymm4, 192(%rsp) # rnd_0: 0 - 0 movl %r9d, %eax rorxl $11, %r8d, %ecx addl 96(%rsp), %r11d vpalignr $4, %ymm3, %ymm0, %ymm5 # rnd_0: 1 - 1 xorl %edx, %ecx xorl %r10d, %eax rorxl $25, %r8d, %edx vpalignr $4, %ymm1, %ymm2, %ymm4 # rnd_0: 2 - 2 andl %r8d, %eax xorl %ecx, %edx rorxl $13, %r12d, %ecx vpsrld $7, %ymm5, %ymm6 # rnd_0: 3 - 3 addl %edx, %r11d rorxl $2, %r12d, %edx xorl %r10d, %eax vpslld $25, %ymm5, %ymm7 # rnd_0: 4 - 4 xorl %edx, %ecx rorxl $22, %r12d, %edx addl %eax, %r11d vpsrld $18, %ymm5, %ymm8 # rnd_0: 5 - 5 xorl %ecx, %edx movl %r13d, %eax addl %r11d, %r15d vpslld $14, %ymm5, %ymm9 # rnd_0: 6 - 6 xorl %r12d, %eax addl %edx, %r11d andl %eax, %ebx vpor %ymm7, %ymm6, %ymm6 # rnd_0: 7 - 7 xorl %r13d, %ebx rorxl $6, %r15d, %edx addl %ebx, %r11d vpor %ymm9, %ymm8, %ymm8 # rnd_1: 0 - 0 movl %r8d, %ebx rorxl $11, %r15d, %ecx addl 100(%rsp), %r10d vpsrld $3, %ymm5, %ymm9 # rnd_1: 1 - 1 xorl %edx, %ecx xorl %r9d, %ebx rorxl $25, %r15d, %edx vpxor %ymm8, %ymm6, %ymm6 # rnd_1: 2 - 2 andl %r15d, %ebx xorl %ecx, %edx rorxl $13, %r11d, %ecx vpshufd $0xfa, %ymm2, %ymm7 # rnd_1: 3 - 3 addl %edx, %r10d rorxl $2, %r11d, %edx xorl %r9d, %ebx vpxor %ymm6, %ymm9, %ymm5 # rnd_1: 4 - 4 xorl %edx, %ecx rorxl $22, %r11d, %edx addl %ebx, %r10d vpsrld $10, %ymm7, %ymm8 # rnd_1: 5 - 5 xorl %ecx, %edx addl %r10d, %r14d movl %r12d, %ebx vpsrlq $19, %ymm7, %ymm6 # rnd_1: 6 - 6 xorl %r11d, %ebx addl %edx, %r10d andl %ebx, %eax vpsrlq $0x11, %ymm7, %ymm7 # rnd_1: 7 - 7 xorl %r12d, %eax rorxl $6, %r14d, %edx addl %eax, %r10d vpaddd %ymm3, %ymm4, %ymm4 # rnd_0: 0 - 0 movl %r15d, %eax rorxl $11, %r14d, %ecx addl 104(%rsp), %r9d vpxor %ymm7, %ymm6, %ymm6 # rnd_0: 1 - 1 xorl %edx, %ecx xorl %r8d, %eax rorxl $25, %r14d, %edx vpxor %ymm6, %ymm8, %ymm8 # rnd_0: 2 - 2 andl %r14d, %eax xorl %ecx, %edx rorxl $13, %r10d, %ecx vpaddd %ymm5, %ymm4, %ymm4 # rnd_0: 3 - 3 addl %edx, %r9d rorxl $2, %r10d, %edx xorl %r8d, %eax vpshufb %ymm11, %ymm8, %ymm8 # rnd_0: 4 - 4 xorl %edx, %ecx rorxl $22, %r10d, %edx addl %eax, %r9d vpaddd %ymm8, %ymm4, %ymm4 # rnd_0: 5 - 5 xorl %ecx, %edx movl %r11d, %eax addl %r9d, %r13d vpshufd $0x50, %ymm4, %ymm6 # rnd_0: 6 - 6 xorl %r10d, %eax addl %edx, %r9d andl %eax, %ebx vpsrlq $0x11, %ymm6, %ymm8 # rnd_0: 7 - 7 xorl %r11d, %ebx rorxl $6, %r13d, %edx addl %ebx, %r9d vpsrlq $19, %ymm6, %ymm7 # rnd_1: 0 - 0 movl %r14d, %ebx rorxl $11, %r13d, %ecx addl 108(%rsp), %r8d vpsrld $10, %ymm6, %ymm9 # rnd_1: 1 - 1 xorl %edx, %ecx xorl %r15d, %ebx rorxl $25, %r13d, %edx vpxor %ymm7, %ymm8, %ymm8 # rnd_1: 2 - 2 andl %r13d, %ebx xorl %ecx, %edx rorxl $13, %r9d, %ecx vpxor %ymm8, %ymm9, %ymm9 # rnd_1: 3 - 3 addl %edx, %r8d rorxl $2, %r9d, %edx xorl %r15d, %ebx vpshufb %ymm12, %ymm9, %ymm9 # rnd_1: 4 - 4 xorl %edx, %ecx rorxl $22, %r9d, %edx addl %ebx, %r8d vpaddd %ymm4, %ymm9, %ymm3 # rnd_1: 5 - 5 xorl %ecx, %edx addl %r8d, %r12d movl %r10d, %ebx vpaddd 224+L_avx2_rorx_sha256_k(%rip), %ymm3, %ymm4 # rnd_1: 6 - 6 xorl %r9d, %ebx addl %edx, %r8d andl %ebx, %eax # rnd_1: 7 - 7 xorl %r10d, %eax rorxl $6, %r12d, %edx addl %eax, %r8d vmovdqu %ymm4, 224(%rsp) # rnd_0: 0 - 0 movl %r13d, %eax rorxl $11, %r12d, %ecx addl 128(%rsp), %r15d vpalignr $4, %ymm0, %ymm1, %ymm5 # rnd_0: 1 - 1 xorl %edx, %ecx xorl %r14d, %eax rorxl $25, %r12d, %edx vpalignr $4, %ymm2, %ymm3, %ymm4 # rnd_0: 2 - 2 andl %r12d, %eax xorl %ecx, %edx rorxl $13, %r8d, %ecx vpsrld $7, %ymm5, %ymm6 # rnd_0: 3 - 3 addl %edx, %r15d rorxl $2, %r8d, %edx xorl %r14d, %eax vpslld $25, %ymm5, %ymm7 # rnd_0: 4 - 4 xorl %edx, %ecx rorxl $22, %r8d, %edx addl %eax, %r15d vpsrld $18, %ymm5, %ymm8 # rnd_0: 5 - 5 xorl %ecx, %edx movl %r9d, %eax addl %r15d, %r11d vpslld $14, %ymm5, %ymm9 # rnd_0: 6 - 6 xorl %r8d, %eax addl %edx, %r15d andl %eax, %ebx vpor %ymm7, %ymm6, %ymm6 # rnd_0: 7 - 7 xorl %r9d, %ebx rorxl $6, %r11d, %edx addl %ebx, %r15d vpor %ymm9, %ymm8, %ymm8 # rnd_1: 0 - 0 movl %r12d, %ebx rorxl $11, %r11d, %ecx addl 132(%rsp), %r14d vpsrld $3, %ymm5, %ymm9 # rnd_1: 1 - 1 xorl %edx, %ecx xorl %r13d, %ebx rorxl $25, %r11d, %edx vpxor %ymm8, %ymm6, %ymm6 # rnd_1: 2 - 2 andl %r11d, %ebx xorl %ecx, %edx rorxl $13, %r15d, %ecx vpshufd $0xfa, %ymm3, %ymm7 # rnd_1: 3 - 3 addl %edx, %r14d rorxl $2, %r15d, %edx xorl %r13d, %ebx vpxor %ymm6, %ymm9, %ymm5 # rnd_1: 4 - 4 xorl %edx, %ecx rorxl $22, %r15d, %edx addl %ebx, %r14d vpsrld $10, %ymm7, %ymm8 # rnd_1: 5 - 5 xorl %ecx, %edx addl %r14d, %r10d movl %r8d, %ebx vpsrlq $19, %ymm7, %ymm6 # rnd_1: 6 - 6 xorl %r15d, %ebx addl %edx, %r14d andl %ebx, %eax vpsrlq $0x11, %ymm7, %ymm7 # rnd_1: 7 - 7 xorl %r8d, %eax rorxl $6, %r10d, %edx addl %eax, %r14d vpaddd %ymm0, %ymm4, %ymm4 # rnd_0: 0 - 0 movl %r11d, %eax rorxl $11, %r10d, %ecx addl 136(%rsp), %r13d vpxor %ymm7, %ymm6, %ymm6 # rnd_0: 1 - 1 xorl %edx, %ecx xorl %r12d, %eax rorxl $25, %r10d, %edx vpxor %ymm6, %ymm8, %ymm8 # rnd_0: 2 - 2 andl %r10d, %eax xorl %ecx, %edx rorxl $13, %r14d, %ecx vpaddd %ymm5, %ymm4, %ymm4 # rnd_0: 3 - 3 addl %edx, %r13d rorxl $2, %r14d, %edx xorl %r12d, %eax vpshufb %ymm11, %ymm8, %ymm8 # rnd_0: 4 - 4 xorl %edx, %ecx rorxl $22, %r14d, %edx addl %eax, %r13d vpaddd %ymm8, %ymm4, %ymm4 # rnd_0: 5 - 5 xorl %ecx, %edx movl %r15d, %eax addl %r13d, %r9d vpshufd $0x50, %ymm4, %ymm6 # rnd_0: 6 - 6 xorl %r14d, %eax addl %edx, %r13d andl %eax, %ebx vpsrlq $0x11, %ymm6, %ymm8 # rnd_0: 7 - 7 xorl %r15d, %ebx rorxl $6, %r9d, %edx addl %ebx, %r13d vpsrlq $19, %ymm6, %ymm7 # rnd_1: 0 - 0 movl %r10d, %ebx rorxl $11, %r9d, %ecx addl 140(%rsp), %r12d vpsrld $10, %ymm6, %ymm9 # rnd_1: 1 - 1 xorl %edx, %ecx xorl %r11d, %ebx rorxl $25, %r9d, %edx vpxor %ymm7, %ymm8, %ymm8 # rnd_1: 2 - 2 andl %r9d, %ebx xorl %ecx, %edx rorxl $13, %r13d, %ecx vpxor %ymm8, %ymm9, %ymm9 # rnd_1: 3 - 3 addl %edx, %r12d rorxl $2, %r13d, %edx xorl %r11d, %ebx vpshufb %ymm12, %ymm9, %ymm9 # rnd_1: 4 - 4 xorl %edx, %ecx rorxl $22, %r13d, %edx addl %ebx, %r12d vpaddd %ymm4, %ymm9, %ymm0 # rnd_1: 5 - 5 xorl %ecx, %edx addl %r12d, %r8d movl %r14d, %ebx vpaddd 256+L_avx2_rorx_sha256_k(%rip), %ymm0, %ymm4 # rnd_1: 6 - 6 xorl %r13d, %ebx addl %edx, %r12d andl %ebx, %eax # rnd_1: 7 - 7 xorl %r14d, %eax rorxl $6, %r8d, %edx addl %eax, %r12d vmovdqu %ymm4, 256(%rsp) # rnd_0: 0 - 0 movl %r9d, %eax rorxl $11, %r8d, %ecx addl 160(%rsp), %r11d vpalignr $4, %ymm1, %ymm2, %ymm5 # rnd_0: 1 - 1 xorl %edx, %ecx xorl %r10d, %eax rorxl $25, %r8d, %edx vpalignr $4, %ymm3, %ymm0, %ymm4 # rnd_0: 2 - 2 andl %r8d, %eax xorl %ecx, %edx rorxl $13, %r12d, %ecx vpsrld $7, %ymm5, %ymm6 # rnd_0: 3 - 3 addl %edx, %r11d rorxl $2, %r12d, %edx xorl %r10d, %eax vpslld $25, %ymm5, %ymm7 # rnd_0: 4 - 4 xorl %edx, %ecx rorxl $22, %r12d, %edx addl %eax, %r11d vpsrld $18, %ymm5, %ymm8 # rnd_0: 5 - 5 xorl %ecx, %edx movl %r13d, %eax addl %r11d, %r15d vpslld $14, %ymm5, %ymm9 # rnd_0: 6 - 6 xorl %r12d, %eax addl %edx, %r11d andl %eax, %ebx vpor %ymm7, %ymm6, %ymm6 # rnd_0: 7 - 7 xorl %r13d, %ebx rorxl $6, %r15d, %edx addl %ebx, %r11d vpor %ymm9, %ymm8, %ymm8 # rnd_1: 0 - 0 movl %r8d, %ebx rorxl $11, %r15d, %ecx addl 164(%rsp), %r10d vpsrld $3, %ymm5, %ymm9 # rnd_1: 1 - 1 xorl %edx, %ecx xorl %r9d, %ebx rorxl $25, %r15d, %edx vpxor %ymm8, %ymm6, %ymm6 # rnd_1: 2 - 2 andl %r15d, %ebx xorl %ecx, %edx rorxl $13, %r11d, %ecx vpshufd $0xfa, %ymm0, %ymm7 # rnd_1: 3 - 3 addl %edx, %r10d rorxl $2, %r11d, %edx xorl %r9d, %ebx vpxor %ymm6, %ymm9, %ymm5 # rnd_1: 4 - 4 xorl %edx, %ecx rorxl $22, %r11d, %edx addl %ebx, %r10d vpsrld $10, %ymm7, %ymm8 # rnd_1: 5 - 5 xorl %ecx, %edx addl %r10d, %r14d movl %r12d, %ebx vpsrlq $19, %ymm7, %ymm6 # rnd_1: 6 - 6 xorl %r11d, %ebx addl %edx, %r10d andl %ebx, %eax vpsrlq $0x11, %ymm7, %ymm7 # rnd_1: 7 - 7 xorl %r12d, %eax rorxl $6, %r14d, %edx addl %eax, %r10d vpaddd %ymm1, %ymm4, %ymm4 # rnd_0: 0 - 0 movl %r15d, %eax rorxl $11, %r14d, %ecx addl 168(%rsp), %r9d vpxor %ymm7, %ymm6, %ymm6 # rnd_0: 1 - 1 xorl %edx, %ecx xorl %r8d, %eax rorxl $25, %r14d, %edx vpxor %ymm6, %ymm8, %ymm8 # rnd_0: 2 - 2 andl %r14d, %eax xorl %ecx, %edx rorxl $13, %r10d, %ecx vpaddd %ymm5, %ymm4, %ymm4 # rnd_0: 3 - 3 addl %edx, %r9d rorxl $2, %r10d, %edx xorl %r8d, %eax vpshufb %ymm11, %ymm8, %ymm8 # rnd_0: 4 - 4 xorl %edx, %ecx rorxl $22, %r10d, %edx addl %eax, %r9d vpaddd %ymm8, %ymm4, %ymm4 # rnd_0: 5 - 5 xorl %ecx, %edx movl %r11d, %eax addl %r9d, %r13d vpshufd $0x50, %ymm4, %ymm6 # rnd_0: 6 - 6 xorl %r10d, %eax addl %edx, %r9d andl %eax, %ebx vpsrlq $0x11, %ymm6, %ymm8 # rnd_0: 7 - 7 xorl %r11d, %ebx rorxl $6, %r13d, %edx addl %ebx, %r9d vpsrlq $19, %ymm6, %ymm7 # rnd_1: 0 - 0 movl %r14d, %ebx rorxl $11, %r13d, %ecx addl 172(%rsp), %r8d vpsrld $10, %ymm6, %ymm9 # rnd_1: 1 - 1 xorl %edx, %ecx xorl %r15d, %ebx rorxl $25, %r13d, %edx vpxor %ymm7, %ymm8, %ymm8 # rnd_1: 2 - 2 andl %r13d, %ebx xorl %ecx, %edx rorxl $13, %r9d, %ecx vpxor %ymm8, %ymm9, %ymm9 # rnd_1: 3 - 3 addl %edx, %r8d rorxl $2, %r9d, %edx xorl %r15d, %ebx vpshufb %ymm12, %ymm9, %ymm9 # rnd_1: 4 - 4 xorl %edx, %ecx rorxl $22, %r9d, %edx addl %ebx, %r8d vpaddd %ymm4, %ymm9, %ymm1 # rnd_1: 5 - 5 xorl %ecx, %edx addl %r8d, %r12d movl %r10d, %ebx vpaddd 288+L_avx2_rorx_sha256_k(%rip), %ymm1, %ymm4 # rnd_1: 6 - 6 xorl %r9d, %ebx addl %edx, %r8d andl %ebx, %eax # rnd_1: 7 - 7 xorl %r10d, %eax rorxl $6, %r12d, %edx addl %eax, %r8d vmovdqu %ymm4, 288(%rsp) # rnd_0: 0 - 0 movl %r13d, %eax rorxl $11, %r12d, %ecx addl 192(%rsp), %r15d vpalignr $4, %ymm2, %ymm3, %ymm5 # rnd_0: 1 - 1 xorl %edx, %ecx xorl %r14d, %eax rorxl $25, %r12d, %edx vpalignr $4, %ymm0, %ymm1, %ymm4 # rnd_0: 2 - 2 andl %r12d, %eax xorl %ecx, %edx rorxl $13, %r8d, %ecx vpsrld $7, %ymm5, %ymm6 # rnd_0: 3 - 3 addl %edx, %r15d rorxl $2, %r8d, %edx xorl %r14d, %eax vpslld $25, %ymm5, %ymm7 # rnd_0: 4 - 4 xorl %edx, %ecx rorxl $22, %r8d, %edx addl %eax, %r15d vpsrld $18, %ymm5, %ymm8 # rnd_0: 5 - 5 xorl %ecx, %edx movl %r9d, %eax addl %r15d, %r11d vpslld $14, %ymm5, %ymm9 # rnd_0: 6 - 6 xorl %r8d, %eax addl %edx, %r15d andl %eax, %ebx vpor %ymm7, %ymm6, %ymm6 # rnd_0: 7 - 7 xorl %r9d, %ebx rorxl $6, %r11d, %edx addl %ebx, %r15d vpor %ymm9, %ymm8, %ymm8 # rnd_1: 0 - 0 movl %r12d, %ebx rorxl $11, %r11d, %ecx addl 196(%rsp), %r14d vpsrld $3, %ymm5, %ymm9 # rnd_1: 1 - 1 xorl %edx, %ecx xorl %r13d, %ebx rorxl $25, %r11d, %edx vpxor %ymm8, %ymm6, %ymm6 # rnd_1: 2 - 2 andl %r11d, %ebx xorl %ecx, %edx rorxl $13, %r15d, %ecx vpshufd $0xfa, %ymm1, %ymm7 # rnd_1: 3 - 3 addl %edx, %r14d rorxl $2, %r15d, %edx xorl %r13d, %ebx vpxor %ymm6, %ymm9, %ymm5 # rnd_1: 4 - 4 xorl %edx, %ecx rorxl $22, %r15d, %edx addl %ebx, %r14d vpsrld $10, %ymm7, %ymm8 # rnd_1: 5 - 5 xorl %ecx, %edx addl %r14d, %r10d movl %r8d, %ebx vpsrlq $19, %ymm7, %ymm6 # rnd_1: 6 - 6 xorl %r15d, %ebx addl %edx, %r14d andl %ebx, %eax vpsrlq $0x11, %ymm7, %ymm7 # rnd_1: 7 - 7 xorl %r8d, %eax rorxl $6, %r10d, %edx addl %eax, %r14d vpaddd %ymm2, %ymm4, %ymm4 # rnd_0: 0 - 0 movl %r11d, %eax rorxl $11, %r10d, %ecx addl 200(%rsp), %r13d vpxor %ymm7, %ymm6, %ymm6 # rnd_0: 1 - 1 xorl %edx, %ecx xorl %r12d, %eax rorxl $25, %r10d, %edx vpxor %ymm6, %ymm8, %ymm8 # rnd_0: 2 - 2 andl %r10d, %eax xorl %ecx, %edx rorxl $13, %r14d, %ecx vpaddd %ymm5, %ymm4, %ymm4 # rnd_0: 3 - 3 addl %edx, %r13d rorxl $2, %r14d, %edx xorl %r12d, %eax vpshufb %ymm11, %ymm8, %ymm8 # rnd_0: 4 - 4 xorl %edx, %ecx rorxl $22, %r14d, %edx addl %eax, %r13d vpaddd %ymm8, %ymm4, %ymm4 # rnd_0: 5 - 5 xorl %ecx, %edx movl %r15d, %eax addl %r13d, %r9d vpshufd $0x50, %ymm4, %ymm6 # rnd_0: 6 - 6 xorl %r14d, %eax addl %edx, %r13d andl %eax, %ebx vpsrlq $0x11, %ymm6, %ymm8 # rnd_0: 7 - 7 xorl %r15d, %ebx rorxl $6, %r9d, %edx addl %ebx, %r13d vpsrlq $19, %ymm6, %ymm7 # rnd_1: 0 - 0 movl %r10d, %ebx rorxl $11, %r9d, %ecx addl 204(%rsp), %r12d vpsrld $10, %ymm6, %ymm9 # rnd_1: 1 - 1 xorl %edx, %ecx xorl %r11d, %ebx rorxl $25, %r9d, %edx vpxor %ymm7, %ymm8, %ymm8 # rnd_1: 2 - 2 andl %r9d, %ebx xorl %ecx, %edx rorxl $13, %r13d, %ecx vpxor %ymm8, %ymm9, %ymm9 # rnd_1: 3 - 3 addl %edx, %r12d rorxl $2, %r13d, %edx xorl %r11d, %ebx vpshufb %ymm12, %ymm9, %ymm9 # rnd_1: 4 - 4 xorl %edx, %ecx rorxl $22, %r13d, %edx addl %ebx, %r12d vpaddd %ymm4, %ymm9, %ymm2 # rnd_1: 5 - 5 xorl %ecx, %edx addl %r12d, %r8d movl %r14d, %ebx vpaddd 320+L_avx2_rorx_sha256_k(%rip), %ymm2, %ymm4 # rnd_1: 6 - 6 xorl %r13d, %ebx addl %edx, %r12d andl %ebx, %eax # rnd_1: 7 - 7 xorl %r14d, %eax rorxl $6, %r8d, %edx addl %eax, %r12d vmovdqu %ymm4, 320(%rsp) # rnd_0: 0 - 0 movl %r9d, %eax rorxl $11, %r8d, %ecx addl 224(%rsp), %r11d vpalignr $4, %ymm3, %ymm0, %ymm5 # rnd_0: 1 - 1 xorl %edx, %ecx xorl %r10d, %eax rorxl $25, %r8d, %edx vpalignr $4, %ymm1, %ymm2, %ymm4 # rnd_0: 2 - 2 andl %r8d, %eax xorl %ecx, %edx rorxl $13, %r12d, %ecx vpsrld $7, %ymm5, %ymm6 # rnd_0: 3 - 3 addl %edx, %r11d rorxl $2, %r12d, %edx xorl %r10d, %eax vpslld $25, %ymm5, %ymm7 # rnd_0: 4 - 4 xorl %edx, %ecx rorxl $22, %r12d, %edx addl %eax, %r11d vpsrld $18, %ymm5, %ymm8 # rnd_0: 5 - 5 xorl %ecx, %edx movl %r13d, %eax addl %r11d, %r15d vpslld $14, %ymm5, %ymm9 # rnd_0: 6 - 6 xorl %r12d, %eax addl %edx, %r11d andl %eax, %ebx vpor %ymm7, %ymm6, %ymm6 # rnd_0: 7 - 7 xorl %r13d, %ebx rorxl $6, %r15d, %edx addl %ebx, %r11d vpor %ymm9, %ymm8, %ymm8 # rnd_1: 0 - 0 movl %r8d, %ebx rorxl $11, %r15d, %ecx addl 228(%rsp), %r10d vpsrld $3, %ymm5, %ymm9 # rnd_1: 1 - 1 xorl %edx, %ecx xorl %r9d, %ebx rorxl $25, %r15d, %edx vpxor %ymm8, %ymm6, %ymm6 # rnd_1: 2 - 2 andl %r15d, %ebx xorl %ecx, %edx rorxl $13, %r11d, %ecx vpshufd $0xfa, %ymm2, %ymm7 # rnd_1: 3 - 3 addl %edx, %r10d rorxl $2, %r11d, %edx xorl %r9d, %ebx vpxor %ymm6, %ymm9, %ymm5 # rnd_1: 4 - 4 xorl %edx, %ecx rorxl $22, %r11d, %edx addl %ebx, %r10d vpsrld $10, %ymm7, %ymm8 # rnd_1: 5 - 5 xorl %ecx, %edx addl %r10d, %r14d movl %r12d, %ebx vpsrlq $19, %ymm7, %ymm6 # rnd_1: 6 - 6 xorl %r11d, %ebx addl %edx, %r10d andl %ebx, %eax vpsrlq $0x11, %ymm7, %ymm7 # rnd_1: 7 - 7 xorl %r12d, %eax rorxl $6, %r14d, %edx addl %eax, %r10d vpaddd %ymm3, %ymm4, %ymm4 # rnd_0: 0 - 0 movl %r15d, %eax rorxl $11, %r14d, %ecx addl 232(%rsp), %r9d vpxor %ymm7, %ymm6, %ymm6 # rnd_0: 1 - 1 xorl %edx, %ecx xorl %r8d, %eax rorxl $25, %r14d, %edx vpxor %ymm6, %ymm8, %ymm8 # rnd_0: 2 - 2 andl %r14d, %eax xorl %ecx, %edx rorxl $13, %r10d, %ecx vpaddd %ymm5, %ymm4, %ymm4 # rnd_0: 3 - 3 addl %edx, %r9d rorxl $2, %r10d, %edx xorl %r8d, %eax vpshufb %ymm11, %ymm8, %ymm8 # rnd_0: 4 - 4 xorl %edx, %ecx rorxl $22, %r10d, %edx addl %eax, %r9d vpaddd %ymm8, %ymm4, %ymm4 # rnd_0: 5 - 5 xorl %ecx, %edx movl %r11d, %eax addl %r9d, %r13d vpshufd $0x50, %ymm4, %ymm6 # rnd_0: 6 - 6 xorl %r10d, %eax addl %edx, %r9d andl %eax, %ebx vpsrlq $0x11, %ymm6, %ymm8 # rnd_0: 7 - 7 xorl %r11d, %ebx rorxl $6, %r13d, %edx addl %ebx, %r9d vpsrlq $19, %ymm6, %ymm7 # rnd_1: 0 - 0 movl %r14d, %ebx rorxl $11, %r13d, %ecx addl 236(%rsp), %r8d vpsrld $10, %ymm6, %ymm9 # rnd_1: 1 - 1 xorl %edx, %ecx xorl %r15d, %ebx rorxl $25, %r13d, %edx vpxor %ymm7, %ymm8, %ymm8 # rnd_1: 2 - 2 andl %r13d, %ebx xorl %ecx, %edx rorxl $13, %r9d, %ecx vpxor %ymm8, %ymm9, %ymm9 # rnd_1: 3 - 3 addl %edx, %r8d rorxl $2, %r9d, %edx xorl %r15d, %ebx vpshufb %ymm12, %ymm9, %ymm9 # rnd_1: 4 - 4 xorl %edx, %ecx rorxl $22, %r9d, %edx addl %ebx, %r8d vpaddd %ymm4, %ymm9, %ymm3 # rnd_1: 5 - 5 xorl %ecx, %edx addl %r8d, %r12d movl %r10d, %ebx vpaddd 352+L_avx2_rorx_sha256_k(%rip), %ymm3, %ymm4 # rnd_1: 6 - 6 xorl %r9d, %ebx addl %edx, %r8d andl %ebx, %eax # rnd_1: 7 - 7 xorl %r10d, %eax rorxl $6, %r12d, %edx addl %eax, %r8d vmovdqu %ymm4, 352(%rsp) # rnd_0: 0 - 0 movl %r13d, %eax rorxl $11, %r12d, %ecx addl 256(%rsp), %r15d vpalignr $4, %ymm0, %ymm1, %ymm5 # rnd_0: 1 - 1 xorl %edx, %ecx xorl %r14d, %eax rorxl $25, %r12d, %edx vpalignr $4, %ymm2, %ymm3, %ymm4 # rnd_0: 2 - 2 andl %r12d, %eax xorl %ecx, %edx rorxl $13, %r8d, %ecx vpsrld $7, %ymm5, %ymm6 # rnd_0: 3 - 3 addl %edx, %r15d rorxl $2, %r8d, %edx xorl %r14d, %eax vpslld $25, %ymm5, %ymm7 # rnd_0: 4 - 4 xorl %edx, %ecx rorxl $22, %r8d, %edx addl %eax, %r15d vpsrld $18, %ymm5, %ymm8 # rnd_0: 5 - 5 xorl %ecx, %edx movl %r9d, %eax addl %r15d, %r11d vpslld $14, %ymm5, %ymm9 # rnd_0: 6 - 6 xorl %r8d, %eax addl %edx, %r15d andl %eax, %ebx vpor %ymm7, %ymm6, %ymm6 # rnd_0: 7 - 7 xorl %r9d, %ebx rorxl $6, %r11d, %edx addl %ebx, %r15d vpor %ymm9, %ymm8, %ymm8 # rnd_1: 0 - 0 movl %r12d, %ebx rorxl $11, %r11d, %ecx addl 260(%rsp), %r14d vpsrld $3, %ymm5, %ymm9 # rnd_1: 1 - 1 xorl %edx, %ecx xorl %r13d, %ebx rorxl $25, %r11d, %edx vpxor %ymm8, %ymm6, %ymm6 # rnd_1: 2 - 2 andl %r11d, %ebx xorl %ecx, %edx rorxl $13, %r15d, %ecx vpshufd $0xfa, %ymm3, %ymm7 # rnd_1: 3 - 3 addl %edx, %r14d rorxl $2, %r15d, %edx xorl %r13d, %ebx vpxor %ymm6, %ymm9, %ymm5 # rnd_1: 4 - 4 xorl %edx, %ecx rorxl $22, %r15d, %edx addl %ebx, %r14d vpsrld $10, %ymm7, %ymm8 # rnd_1: 5 - 5 xorl %ecx, %edx addl %r14d, %r10d movl %r8d, %ebx vpsrlq $19, %ymm7, %ymm6 # rnd_1: 6 - 6 xorl %r15d, %ebx addl %edx, %r14d andl %ebx, %eax vpsrlq $0x11, %ymm7, %ymm7 # rnd_1: 7 - 7 xorl %r8d, %eax rorxl $6, %r10d, %edx addl %eax, %r14d vpaddd %ymm0, %ymm4, %ymm4 # rnd_0: 0 - 0 movl %r11d, %eax rorxl $11, %r10d, %ecx addl 264(%rsp), %r13d vpxor %ymm7, %ymm6, %ymm6 # rnd_0: 1 - 1 xorl %edx, %ecx xorl %r12d, %eax rorxl $25, %r10d, %edx vpxor %ymm6, %ymm8, %ymm8 # rnd_0: 2 - 2 andl %r10d, %eax xorl %ecx, %edx rorxl $13, %r14d, %ecx vpaddd %ymm5, %ymm4, %ymm4 # rnd_0: 3 - 3 addl %edx, %r13d rorxl $2, %r14d, %edx xorl %r12d, %eax vpshufb %ymm11, %ymm8, %ymm8 # rnd_0: 4 - 4 xorl %edx, %ecx rorxl $22, %r14d, %edx addl %eax, %r13d vpaddd %ymm8, %ymm4, %ymm4 # rnd_0: 5 - 5 xorl %ecx, %edx movl %r15d, %eax addl %r13d, %r9d vpshufd $0x50, %ymm4, %ymm6 # rnd_0: 6 - 6 xorl %r14d, %eax addl %edx, %r13d andl %eax, %ebx vpsrlq $0x11, %ymm6, %ymm8 # rnd_0: 7 - 7 xorl %r15d, %ebx rorxl $6, %r9d, %edx addl %ebx, %r13d vpsrlq $19, %ymm6, %ymm7 # rnd_1: 0 - 0 movl %r10d, %ebx rorxl $11, %r9d, %ecx addl 268(%rsp), %r12d vpsrld $10, %ymm6, %ymm9 # rnd_1: 1 - 1 xorl %edx, %ecx xorl %r11d, %ebx rorxl $25, %r9d, %edx vpxor %ymm7, %ymm8, %ymm8 # rnd_1: 2 - 2 andl %r9d, %ebx xorl %ecx, %edx rorxl $13, %r13d, %ecx vpxor %ymm8, %ymm9, %ymm9 # rnd_1: 3 - 3 addl %edx, %r12d rorxl $2, %r13d, %edx xorl %r11d, %ebx vpshufb %ymm12, %ymm9, %ymm9 # rnd_1: 4 - 4 xorl %edx, %ecx rorxl $22, %r13d, %edx addl %ebx, %r12d vpaddd %ymm4, %ymm9, %ymm0 # rnd_1: 5 - 5 xorl %ecx, %edx addl %r12d, %r8d movl %r14d, %ebx vpaddd 384+L_avx2_rorx_sha256_k(%rip), %ymm0, %ymm4 # rnd_1: 6 - 6 xorl %r13d, %ebx addl %edx, %r12d andl %ebx, %eax # rnd_1: 7 - 7 xorl %r14d, %eax rorxl $6, %r8d, %edx addl %eax, %r12d vmovdqu %ymm4, 384(%rsp) # rnd_0: 0 - 0 movl %r9d, %eax rorxl $11, %r8d, %ecx addl 288(%rsp), %r11d vpalignr $4, %ymm1, %ymm2, %ymm5 # rnd_0: 1 - 1 xorl %edx, %ecx xorl %r10d, %eax rorxl $25, %r8d, %edx vpalignr $4, %ymm3, %ymm0, %ymm4 # rnd_0: 2 - 2 andl %r8d, %eax xorl %ecx, %edx rorxl $13, %r12d, %ecx vpsrld $7, %ymm5, %ymm6 # rnd_0: 3 - 3 addl %edx, %r11d rorxl $2, %r12d, %edx xorl %r10d, %eax vpslld $25, %ymm5, %ymm7 # rnd_0: 4 - 4 xorl %edx, %ecx rorxl $22, %r12d, %edx addl %eax, %r11d vpsrld $18, %ymm5, %ymm8 # rnd_0: 5 - 5 xorl %ecx, %edx movl %r13d, %eax addl %r11d, %r15d vpslld $14, %ymm5, %ymm9 # rnd_0: 6 - 6 xorl %r12d, %eax addl %edx, %r11d andl %eax, %ebx vpor %ymm7, %ymm6, %ymm6 # rnd_0: 7 - 7 xorl %r13d, %ebx rorxl $6, %r15d, %edx addl %ebx, %r11d vpor %ymm9, %ymm8, %ymm8 # rnd_1: 0 - 0 movl %r8d, %ebx rorxl $11, %r15d, %ecx addl 292(%rsp), %r10d vpsrld $3, %ymm5, %ymm9 # rnd_1: 1 - 1 xorl %edx, %ecx xorl %r9d, %ebx rorxl $25, %r15d, %edx vpxor %ymm8, %ymm6, %ymm6 # rnd_1: 2 - 2 andl %r15d, %ebx xorl %ecx, %edx rorxl $13, %r11d, %ecx vpshufd $0xfa, %ymm0, %ymm7 # rnd_1: 3 - 3 addl %edx, %r10d rorxl $2, %r11d, %edx xorl %r9d, %ebx vpxor %ymm6, %ymm9, %ymm5 # rnd_1: 4 - 4 xorl %edx, %ecx rorxl $22, %r11d, %edx addl %ebx, %r10d vpsrld $10, %ymm7, %ymm8 # rnd_1: 5 - 5 xorl %ecx, %edx addl %r10d, %r14d movl %r12d, %ebx vpsrlq $19, %ymm7, %ymm6 # rnd_1: 6 - 6 xorl %r11d, %ebx addl %edx, %r10d andl %ebx, %eax vpsrlq $0x11, %ymm7, %ymm7 # rnd_1: 7 - 7 xorl %r12d, %eax rorxl $6, %r14d, %edx addl %eax, %r10d vpaddd %ymm1, %ymm4, %ymm4 # rnd_0: 0 - 0 movl %r15d, %eax rorxl $11, %r14d, %ecx addl 296(%rsp), %r9d vpxor %ymm7, %ymm6, %ymm6 # rnd_0: 1 - 1 xorl %edx, %ecx xorl %r8d, %eax rorxl $25, %r14d, %edx vpxor %ymm6, %ymm8, %ymm8 # rnd_0: 2 - 2 andl %r14d, %eax xorl %ecx, %edx rorxl $13, %r10d, %ecx vpaddd %ymm5, %ymm4, %ymm4 # rnd_0: 3 - 3 addl %edx, %r9d rorxl $2, %r10d, %edx xorl %r8d, %eax vpshufb %ymm11, %ymm8, %ymm8 # rnd_0: 4 - 4 xorl %edx, %ecx rorxl $22, %r10d, %edx addl %eax, %r9d vpaddd %ymm8, %ymm4, %ymm4 # rnd_0: 5 - 5 xorl %ecx, %edx movl %r11d, %eax addl %r9d, %r13d vpshufd $0x50, %ymm4, %ymm6 # rnd_0: 6 - 6 xorl %r10d, %eax addl %edx, %r9d andl %eax, %ebx vpsrlq $0x11, %ymm6, %ymm8 # rnd_0: 7 - 7 xorl %r11d, %ebx rorxl $6, %r13d, %edx addl %ebx, %r9d vpsrlq $19, %ymm6, %ymm7 # rnd_1: 0 - 0 movl %r14d, %ebx rorxl $11, %r13d, %ecx addl 300(%rsp), %r8d vpsrld $10, %ymm6, %ymm9 # rnd_1: 1 - 1 xorl %edx, %ecx xorl %r15d, %ebx rorxl $25, %r13d, %edx vpxor %ymm7, %ymm8, %ymm8 # rnd_1: 2 - 2 andl %r13d, %ebx xorl %ecx, %edx rorxl $13, %r9d, %ecx vpxor %ymm8, %ymm9, %ymm9 # rnd_1: 3 - 3 addl %edx, %r8d rorxl $2, %r9d, %edx xorl %r15d, %ebx vpshufb %ymm12, %ymm9, %ymm9 # rnd_1: 4 - 4 xorl %edx, %ecx rorxl $22, %r9d, %edx addl %ebx, %r8d vpaddd %ymm4, %ymm9, %ymm1 # rnd_1: 5 - 5 xorl %ecx, %edx addl %r8d, %r12d movl %r10d, %ebx vpaddd 416+L_avx2_rorx_sha256_k(%rip), %ymm1, %ymm4 # rnd_1: 6 - 6 xorl %r9d, %ebx addl %edx, %r8d andl %ebx, %eax # rnd_1: 7 - 7 xorl %r10d, %eax rorxl $6, %r12d, %edx addl %eax, %r8d vmovdqu %ymm4, 416(%rsp) # rnd_0: 0 - 0 movl %r13d, %eax rorxl $11, %r12d, %ecx addl 320(%rsp), %r15d vpalignr $4, %ymm2, %ymm3, %ymm5 # rnd_0: 1 - 1 xorl %edx, %ecx xorl %r14d, %eax rorxl $25, %r12d, %edx vpalignr $4, %ymm0, %ymm1, %ymm4 # rnd_0: 2 - 2 andl %r12d, %eax xorl %ecx, %edx rorxl $13, %r8d, %ecx vpsrld $7, %ymm5, %ymm6 # rnd_0: 3 - 3 addl %edx, %r15d rorxl $2, %r8d, %edx xorl %r14d, %eax vpslld $25, %ymm5, %ymm7 # rnd_0: 4 - 4 xorl %edx, %ecx rorxl $22, %r8d, %edx addl %eax, %r15d vpsrld $18, %ymm5, %ymm8 # rnd_0: 5 - 5 xorl %ecx, %edx movl %r9d, %eax addl %r15d, %r11d vpslld $14, %ymm5, %ymm9 # rnd_0: 6 - 6 xorl %r8d, %eax addl %edx, %r15d andl %eax, %ebx vpor %ymm7, %ymm6, %ymm6 # rnd_0: 7 - 7 xorl %r9d, %ebx rorxl $6, %r11d, %edx addl %ebx, %r15d vpor %ymm9, %ymm8, %ymm8 # rnd_1: 0 - 0 movl %r12d, %ebx rorxl $11, %r11d, %ecx addl 324(%rsp), %r14d vpsrld $3, %ymm5, %ymm9 # rnd_1: 1 - 1 xorl %edx, %ecx xorl %r13d, %ebx rorxl $25, %r11d, %edx vpxor %ymm8, %ymm6, %ymm6 # rnd_1: 2 - 2 andl %r11d, %ebx xorl %ecx, %edx rorxl $13, %r15d, %ecx vpshufd $0xfa, %ymm1, %ymm7 # rnd_1: 3 - 3 addl %edx, %r14d rorxl $2, %r15d, %edx xorl %r13d, %ebx vpxor %ymm6, %ymm9, %ymm5 # rnd_1: 4 - 4 xorl %edx, %ecx rorxl $22, %r15d, %edx addl %ebx, %r14d vpsrld $10, %ymm7, %ymm8 # rnd_1: 5 - 5 xorl %ecx, %edx addl %r14d, %r10d movl %r8d, %ebx vpsrlq $19, %ymm7, %ymm6 # rnd_1: 6 - 6 xorl %r15d, %ebx addl %edx, %r14d andl %ebx, %eax vpsrlq $0x11, %ymm7, %ymm7 # rnd_1: 7 - 7 xorl %r8d, %eax rorxl $6, %r10d, %edx addl %eax, %r14d vpaddd %ymm2, %ymm4, %ymm4 # rnd_0: 0 - 0 movl %r11d, %eax rorxl $11, %r10d, %ecx addl 328(%rsp), %r13d vpxor %ymm7, %ymm6, %ymm6 # rnd_0: 1 - 1 xorl %edx, %ecx xorl %r12d, %eax rorxl $25, %r10d, %edx vpxor %ymm6, %ymm8, %ymm8 # rnd_0: 2 - 2 andl %r10d, %eax xorl %ecx, %edx rorxl $13, %r14d, %ecx vpaddd %ymm5, %ymm4, %ymm4 # rnd_0: 3 - 3 addl %edx, %r13d rorxl $2, %r14d, %edx xorl %r12d, %eax vpshufb %ymm11, %ymm8, %ymm8 # rnd_0: 4 - 4 xorl %edx, %ecx rorxl $22, %r14d, %edx addl %eax, %r13d vpaddd %ymm8, %ymm4, %ymm4 # rnd_0: 5 - 5 xorl %ecx, %edx movl %r15d, %eax addl %r13d, %r9d vpshufd $0x50, %ymm4, %ymm6 # rnd_0: 6 - 6 xorl %r14d, %eax addl %edx, %r13d andl %eax, %ebx vpsrlq $0x11, %ymm6, %ymm8 # rnd_0: 7 - 7 xorl %r15d, %ebx rorxl $6, %r9d, %edx addl %ebx, %r13d vpsrlq $19, %ymm6, %ymm7 # rnd_1: 0 - 0 movl %r10d, %ebx rorxl $11, %r9d, %ecx addl 332(%rsp), %r12d vpsrld $10, %ymm6, %ymm9 # rnd_1: 1 - 1 xorl %edx, %ecx xorl %r11d, %ebx rorxl $25, %r9d, %edx vpxor %ymm7, %ymm8, %ymm8 # rnd_1: 2 - 2 andl %r9d, %ebx xorl %ecx, %edx rorxl $13, %r13d, %ecx vpxor %ymm8, %ymm9, %ymm9 # rnd_1: 3 - 3 addl %edx, %r12d rorxl $2, %r13d, %edx xorl %r11d, %ebx vpshufb %ymm12, %ymm9, %ymm9 # rnd_1: 4 - 4 xorl %edx, %ecx rorxl $22, %r13d, %edx addl %ebx, %r12d vpaddd %ymm4, %ymm9, %ymm2 # rnd_1: 5 - 5 xorl %ecx, %edx addl %r12d, %r8d movl %r14d, %ebx vpaddd 448+L_avx2_rorx_sha256_k(%rip), %ymm2, %ymm4 # rnd_1: 6 - 6 xorl %r13d, %ebx addl %edx, %r12d andl %ebx, %eax # rnd_1: 7 - 7 xorl %r14d, %eax rorxl $6, %r8d, %edx addl %eax, %r12d vmovdqu %ymm4, 448(%rsp) # rnd_0: 0 - 0 movl %r9d, %eax rorxl $11, %r8d, %ecx addl 352(%rsp), %r11d vpalignr $4, %ymm3, %ymm0, %ymm5 # rnd_0: 1 - 1 xorl %edx, %ecx xorl %r10d, %eax rorxl $25, %r8d, %edx vpalignr $4, %ymm1, %ymm2, %ymm4 # rnd_0: 2 - 2 andl %r8d, %eax xorl %ecx, %edx rorxl $13, %r12d, %ecx vpsrld $7, %ymm5, %ymm6 # rnd_0: 3 - 3 addl %edx, %r11d rorxl $2, %r12d, %edx xorl %r10d, %eax vpslld $25, %ymm5, %ymm7 # rnd_0: 4 - 4 xorl %edx, %ecx rorxl $22, %r12d, %edx addl %eax, %r11d vpsrld $18, %ymm5, %ymm8 # rnd_0: 5 - 5 xorl %ecx, %edx movl %r13d, %eax addl %r11d, %r15d vpslld $14, %ymm5, %ymm9 # rnd_0: 6 - 6 xorl %r12d, %eax addl %edx, %r11d andl %eax, %ebx vpor %ymm7, %ymm6, %ymm6 # rnd_0: 7 - 7 xorl %r13d, %ebx rorxl $6, %r15d, %edx addl %ebx, %r11d vpor %ymm9, %ymm8, %ymm8 # rnd_1: 0 - 0 movl %r8d, %ebx rorxl $11, %r15d, %ecx addl 356(%rsp), %r10d vpsrld $3, %ymm5, %ymm9 # rnd_1: 1 - 1 xorl %edx, %ecx xorl %r9d, %ebx rorxl $25, %r15d, %edx vpxor %ymm8, %ymm6, %ymm6 # rnd_1: 2 - 2 andl %r15d, %ebx xorl %ecx, %edx rorxl $13, %r11d, %ecx vpshufd $0xfa, %ymm2, %ymm7 # rnd_1: 3 - 3 addl %edx, %r10d rorxl $2, %r11d, %edx xorl %r9d, %ebx vpxor %ymm6, %ymm9, %ymm5 # rnd_1: 4 - 4 xorl %edx, %ecx rorxl $22, %r11d, %edx addl %ebx, %r10d vpsrld $10, %ymm7, %ymm8 # rnd_1: 5 - 5 xorl %ecx, %edx addl %r10d, %r14d movl %r12d, %ebx vpsrlq $19, %ymm7, %ymm6 # rnd_1: 6 - 6 xorl %r11d, %ebx addl %edx, %r10d andl %ebx, %eax vpsrlq $0x11, %ymm7, %ymm7 # rnd_1: 7 - 7 xorl %r12d, %eax rorxl $6, %r14d, %edx addl %eax, %r10d vpaddd %ymm3, %ymm4, %ymm4 # rnd_0: 0 - 0 movl %r15d, %eax rorxl $11, %r14d, %ecx addl 360(%rsp), %r9d vpxor %ymm7, %ymm6, %ymm6 # rnd_0: 1 - 1 xorl %edx, %ecx xorl %r8d, %eax rorxl $25, %r14d, %edx vpxor %ymm6, %ymm8, %ymm8 # rnd_0: 2 - 2 andl %r14d, %eax xorl %ecx, %edx rorxl $13, %r10d, %ecx vpaddd %ymm5, %ymm4, %ymm4 # rnd_0: 3 - 3 addl %edx, %r9d rorxl $2, %r10d, %edx xorl %r8d, %eax vpshufb %ymm11, %ymm8, %ymm8 # rnd_0: 4 - 4 xorl %edx, %ecx rorxl $22, %r10d, %edx addl %eax, %r9d vpaddd %ymm8, %ymm4, %ymm4 # rnd_0: 5 - 5 xorl %ecx, %edx movl %r11d, %eax addl %r9d, %r13d vpshufd $0x50, %ymm4, %ymm6 # rnd_0: 6 - 6 xorl %r10d, %eax addl %edx, %r9d andl %eax, %ebx vpsrlq $0x11, %ymm6, %ymm8 # rnd_0: 7 - 7 xorl %r11d, %ebx rorxl $6, %r13d, %edx addl %ebx, %r9d vpsrlq $19, %ymm6, %ymm7 # rnd_1: 0 - 0 movl %r14d, %ebx rorxl $11, %r13d, %ecx addl 364(%rsp), %r8d vpsrld $10, %ymm6, %ymm9 # rnd_1: 1 - 1 xorl %edx, %ecx xorl %r15d, %ebx rorxl $25, %r13d, %edx vpxor %ymm7, %ymm8, %ymm8 # rnd_1: 2 - 2 andl %r13d, %ebx xorl %ecx, %edx rorxl $13, %r9d, %ecx vpxor %ymm8, %ymm9, %ymm9 # rnd_1: 3 - 3 addl %edx, %r8d rorxl $2, %r9d, %edx xorl %r15d, %ebx vpshufb %ymm12, %ymm9, %ymm9 # rnd_1: 4 - 4 xorl %edx, %ecx rorxl $22, %r9d, %edx addl %ebx, %r8d vpaddd %ymm4, %ymm9, %ymm3 # rnd_1: 5 - 5 xorl %ecx, %edx addl %r8d, %r12d movl %r10d, %ebx vpaddd 480+L_avx2_rorx_sha256_k(%rip), %ymm3, %ymm4 # rnd_1: 6 - 6 xorl %r9d, %ebx addl %edx, %r8d andl %ebx, %eax # rnd_1: 7 - 7 xorl %r10d, %eax rorxl $6, %r12d, %edx addl %eax, %r8d vmovdqu %ymm4, 480(%rsp) xorl %eax, %eax xorl %ecx, %ecx rorxl $6, %r12d, %edx rorxl $11, %r12d, %ecx leal (%r8,%rax,1), %r8d addl 384(%rsp), %r15d movl %r13d, %eax xorl %edx, %ecx xorl %r14d, %eax rorxl $25, %r12d, %edx xorl %ecx, %edx andl %r12d, %eax addl %edx, %r15d rorxl $2, %r8d, %edx rorxl $13, %r8d, %ecx xorl %r14d, %eax xorl %edx, %ecx rorxl $22, %r8d, %edx addl %eax, %r15d xorl %ecx, %edx movl %r9d, %eax addl %r15d, %r11d xorl %r8d, %eax andl %eax, %ebx addl %edx, %r15d xorl %r9d, %ebx rorxl $6, %r11d, %edx rorxl $11, %r11d, %ecx addl %ebx, %r15d addl 388(%rsp), %r14d movl %r12d, %ebx xorl %edx, %ecx xorl %r13d, %ebx rorxl $25, %r11d, %edx xorl %ecx, %edx andl %r11d, %ebx addl %edx, %r14d rorxl $2, %r15d, %edx rorxl $13, %r15d, %ecx xorl %r13d, %ebx xorl %edx, %ecx rorxl $22, %r15d, %edx addl %ebx, %r14d xorl %ecx, %edx movl %r8d, %ebx leal (%r10,%r14,1), %r10d xorl %r15d, %ebx andl %ebx, %eax addl %edx, %r14d xorl %r8d, %eax rorxl $6, %r10d, %edx rorxl $11, %r10d, %ecx leal (%r14,%rax,1), %r14d addl 392(%rsp), %r13d movl %r11d, %eax xorl %edx, %ecx xorl %r12d, %eax rorxl $25, %r10d, %edx xorl %ecx, %edx andl %r10d, %eax addl %edx, %r13d rorxl $2, %r14d, %edx rorxl $13, %r14d, %ecx xorl %r12d, %eax xorl %edx, %ecx rorxl $22, %r14d, %edx addl %eax, %r13d xorl %ecx, %edx movl %r15d, %eax addl %r13d, %r9d xorl %r14d, %eax andl %eax, %ebx addl %edx, %r13d xorl %r15d, %ebx rorxl $6, %r9d, %edx rorxl $11, %r9d, %ecx addl %ebx, %r13d addl 396(%rsp), %r12d movl %r10d, %ebx xorl %edx, %ecx xorl %r11d, %ebx rorxl $25, %r9d, %edx xorl %ecx, %edx andl %r9d, %ebx addl %edx, %r12d rorxl $2, %r13d, %edx rorxl $13, %r13d, %ecx xorl %r11d, %ebx xorl %edx, %ecx rorxl $22, %r13d, %edx addl %ebx, %r12d xorl %ecx, %edx movl %r14d, %ebx leal (%r8,%r12,1), %r8d xorl %r13d, %ebx andl %ebx, %eax addl %edx, %r12d xorl %r14d, %eax rorxl $6, %r8d, %edx rorxl $11, %r8d, %ecx leal (%r12,%rax,1), %r12d addl 416(%rsp), %r11d movl %r9d, %eax xorl %edx, %ecx xorl %r10d, %eax rorxl $25, %r8d, %edx xorl %ecx, %edx andl %r8d, %eax addl %edx, %r11d rorxl $2, %r12d, %edx rorxl $13, %r12d, %ecx xorl %r10d, %eax xorl %edx, %ecx rorxl $22, %r12d, %edx addl %eax, %r11d xorl %ecx, %edx movl %r13d, %eax addl %r11d, %r15d xorl %r12d, %eax andl %eax, %ebx addl %edx, %r11d xorl %r13d, %ebx rorxl $6, %r15d, %edx rorxl $11, %r15d, %ecx addl %ebx, %r11d addl 420(%rsp), %r10d movl %r8d, %ebx xorl %edx, %ecx xorl %r9d, %ebx rorxl $25, %r15d, %edx xorl %ecx, %edx andl %r15d, %ebx addl %edx, %r10d rorxl $2, %r11d, %edx rorxl $13, %r11d, %ecx xorl %r9d, %ebx xorl %edx, %ecx rorxl $22, %r11d, %edx addl %ebx, %r10d xorl %ecx, %edx movl %r12d, %ebx leal (%r14,%r10,1), %r14d xorl %r11d, %ebx andl %ebx, %eax addl %edx, %r10d xorl %r12d, %eax rorxl $6, %r14d, %edx rorxl $11, %r14d, %ecx leal (%r10,%rax,1), %r10d addl 424(%rsp), %r9d movl %r15d, %eax xorl %edx, %ecx xorl %r8d, %eax rorxl $25, %r14d, %edx xorl %ecx, %edx andl %r14d, %eax addl %edx, %r9d rorxl $2, %r10d, %edx rorxl $13, %r10d, %ecx xorl %r8d, %eax xorl %edx, %ecx rorxl $22, %r10d, %edx addl %eax, %r9d xorl %ecx, %edx movl %r11d, %eax addl %r9d, %r13d xorl %r10d, %eax andl %eax, %ebx addl %edx, %r9d xorl %r11d, %ebx rorxl $6, %r13d, %edx rorxl $11, %r13d, %ecx addl %ebx, %r9d addl 428(%rsp), %r8d movl %r14d, %ebx xorl %edx, %ecx xorl %r15d, %ebx rorxl $25, %r13d, %edx xorl %ecx, %edx andl %r13d, %ebx addl %edx, %r8d rorxl $2, %r9d, %edx rorxl $13, %r9d, %ecx xorl %r15d, %ebx xorl %edx, %ecx rorxl $22, %r9d, %edx addl %ebx, %r8d xorl %ecx, %edx movl %r10d, %ebx leal (%r12,%r8,1), %r12d xorl %r9d, %ebx andl %ebx, %eax addl %edx, %r8d xorl %r10d, %eax rorxl $6, %r12d, %edx rorxl $11, %r12d, %ecx leal (%r8,%rax,1), %r8d addl 448(%rsp), %r15d movl %r13d, %eax xorl %edx, %ecx xorl %r14d, %eax rorxl $25, %r12d, %edx xorl %ecx, %edx andl %r12d, %eax addl %edx, %r15d rorxl $2, %r8d, %edx rorxl $13, %r8d, %ecx xorl %r14d, %eax xorl %edx, %ecx rorxl $22, %r8d, %edx addl %eax, %r15d xorl %ecx, %edx movl %r9d, %eax addl %r15d, %r11d xorl %r8d, %eax andl %eax, %ebx addl %edx, %r15d xorl %r9d, %ebx rorxl $6, %r11d, %edx rorxl $11, %r11d, %ecx addl %ebx, %r15d addl 452(%rsp), %r14d movl %r12d, %ebx xorl %edx, %ecx xorl %r13d, %ebx rorxl $25, %r11d, %edx xorl %ecx, %edx andl %r11d, %ebx addl %edx, %r14d rorxl $2, %r15d, %edx rorxl $13, %r15d, %ecx xorl %r13d, %ebx xorl %edx, %ecx rorxl $22, %r15d, %edx addl %ebx, %r14d xorl %ecx, %edx movl %r8d, %ebx leal (%r10,%r14,1), %r10d xorl %r15d, %ebx andl %ebx, %eax addl %edx, %r14d xorl %r8d, %eax rorxl $6, %r10d, %edx rorxl $11, %r10d, %ecx leal (%r14,%rax,1), %r14d addl 456(%rsp), %r13d movl %r11d, %eax xorl %edx, %ecx xorl %r12d, %eax rorxl $25, %r10d, %edx xorl %ecx, %edx andl %r10d, %eax addl %edx, %r13d rorxl $2, %r14d, %edx rorxl $13, %r14d, %ecx xorl %r12d, %eax xorl %edx, %ecx rorxl $22, %r14d, %edx addl %eax, %r13d xorl %ecx, %edx movl %r15d, %eax addl %r13d, %r9d xorl %r14d, %eax andl %eax, %ebx addl %edx, %r13d xorl %r15d, %ebx rorxl $6, %r9d, %edx rorxl $11, %r9d, %ecx addl %ebx, %r13d addl 460(%rsp), %r12d movl %r10d, %ebx xorl %edx, %ecx xorl %r11d, %ebx rorxl $25, %r9d, %edx xorl %ecx, %edx andl %r9d, %ebx addl %edx, %r12d rorxl $2, %r13d, %edx rorxl $13, %r13d, %ecx xorl %r11d, %ebx xorl %edx, %ecx rorxl $22, %r13d, %edx addl %ebx, %r12d xorl %ecx, %edx movl %r14d, %ebx leal (%r8,%r12,1), %r8d xorl %r13d, %ebx andl %ebx, %eax addl %edx, %r12d xorl %r14d, %eax rorxl $6, %r8d, %edx rorxl $11, %r8d, %ecx leal (%r12,%rax,1), %r12d addl 480(%rsp), %r11d movl %r9d, %eax xorl %edx, %ecx xorl %r10d, %eax rorxl $25, %r8d, %edx xorl %ecx, %edx andl %r8d, %eax addl %edx, %r11d rorxl $2, %r12d, %edx rorxl $13, %r12d, %ecx xorl %r10d, %eax xorl %edx, %ecx rorxl $22, %r12d, %edx addl %eax, %r11d xorl %ecx, %edx movl %r13d, %eax addl %r11d, %r15d xorl %r12d, %eax andl %eax, %ebx addl %edx, %r11d xorl %r13d, %ebx rorxl $6, %r15d, %edx rorxl $11, %r15d, %ecx addl %ebx, %r11d addl 484(%rsp), %r10d movl %r8d, %ebx xorl %edx, %ecx xorl %r9d, %ebx rorxl $25, %r15d, %edx xorl %ecx, %edx andl %r15d, %ebx addl %edx, %r10d rorxl $2, %r11d, %edx rorxl $13, %r11d, %ecx xorl %r9d, %ebx xorl %edx, %ecx rorxl $22, %r11d, %edx addl %ebx, %r10d xorl %ecx, %edx movl %r12d, %ebx leal (%r14,%r10,1), %r14d xorl %r11d, %ebx andl %ebx, %eax addl %edx, %r10d xorl %r12d, %eax rorxl $6, %r14d, %edx rorxl $11, %r14d, %ecx leal (%r10,%rax,1), %r10d addl 488(%rsp), %r9d movl %r15d, %eax xorl %edx, %ecx xorl %r8d, %eax rorxl $25, %r14d, %edx xorl %ecx, %edx andl %r14d, %eax addl %edx, %r9d rorxl $2, %r10d, %edx rorxl $13, %r10d, %ecx xorl %r8d, %eax xorl %edx, %ecx rorxl $22, %r10d, %edx addl %eax, %r9d xorl %ecx, %edx movl %r11d, %eax addl %r9d, %r13d xorl %r10d, %eax andl %eax, %ebx addl %edx, %r9d xorl %r11d, %ebx rorxl $6, %r13d, %edx rorxl $11, %r13d, %ecx addl %ebx, %r9d addl 492(%rsp), %r8d movl %r14d, %ebx xorl %edx, %ecx xorl %r15d, %ebx rorxl $25, %r13d, %edx xorl %ecx, %edx andl %r13d, %ebx addl %edx, %r8d rorxl $2, %r9d, %edx rorxl $13, %r9d, %ecx xorl %r15d, %ebx xorl %edx, %ecx rorxl $22, %r9d, %edx addl %ebx, %r8d xorl %ecx, %edx movl %r10d, %ebx leal (%r12,%r8,1), %r12d xorl %r9d, %ebx andl %ebx, %eax addl %edx, %r8d xorl %r10d, %eax addl %eax, %r8d addl %r8d, (%rdi) addl %r9d, 4(%rdi) addl %r10d, 8(%rdi) addl %r11d, 12(%rdi) addl %r12d, 16(%rdi) addl %r13d, 20(%rdi) addl %r14d, 24(%rdi) addl %r15d, 28(%rdi) xorq %rax, %rax vzeroupper addq $0x200, %rsp popq %r15 popq %r14 popq %r13 popq %r12 popq %rbx repz retq #ifndef __APPLE__ .size Transform_Sha256_AVX2_RORX,.-Transform_Sha256_AVX2_RORX #endif /* __APPLE__ */ #ifndef __APPLE__ .text .globl Transform_Sha256_AVX2_RORX_Len .type Transform_Sha256_AVX2_RORX_Len,@function .align 16 Transform_Sha256_AVX2_RORX_Len: #else .section __TEXT,__text .globl _Transform_Sha256_AVX2_RORX_Len .p2align 4 _Transform_Sha256_AVX2_RORX_Len: #endif /* __APPLE__ */ pushq %rbx pushq %r12 pushq %r13 pushq %r14 pushq %r15 pushq %rbp movq %rsi, %rbp movq %rdx, %rsi subq $0x200, %rsp testb $0x40, %sil je L_sha256_len_avx2_rorx_block vmovdqu (%rbp), %ymm0 vmovdqu 32(%rbp), %ymm1 vmovups %ymm0, 32(%rdi) vmovups %ymm1, 64(%rdi) #ifndef __APPLE__ call Transform_Sha256_AVX2_RORX@plt #else call _Transform_Sha256_AVX2_RORX #endif /* __APPLE__ */ addq $0x40, %rbp subl $0x40, %esi jz L_sha256_len_avx2_rorx_done L_sha256_len_avx2_rorx_block: vmovdqa L_avx2_rorx_sha256_flip_mask(%rip), %ymm13 vmovdqa L_avx2_rorx_sha256_shuf_00BA(%rip), %ymm11 vmovdqa L_avx2_rorx_sha256_shuf_DC00(%rip), %ymm12 movl (%rdi), %r8d movl 4(%rdi), %r9d movl 8(%rdi), %r10d movl 12(%rdi), %r11d movl 16(%rdi), %r12d movl 20(%rdi), %r13d movl 24(%rdi), %r14d movl 28(%rdi), %r15d # Start of loop processing two blocks L_sha256_len_avx2_rorx_start: # X0, X1, X2, X3 = W[0..15] vmovdqu (%rbp), %xmm0 vmovdqu 16(%rbp), %xmm1 vinserti128 $0x01, 64(%rbp), %ymm0, %ymm0 vinserti128 $0x01, 80(%rbp), %ymm1, %ymm1 vpshufb %ymm13, %ymm0, %ymm0 vpshufb %ymm13, %ymm1, %ymm1 vpaddd 0+L_avx2_rorx_sha256_k(%rip), %ymm0, %ymm4 vpaddd 32+L_avx2_rorx_sha256_k(%rip), %ymm1, %ymm5 vmovdqu %ymm4, (%rsp) vmovdqu %ymm5, 32(%rsp) vmovdqu 32(%rbp), %xmm2 vmovdqu 48(%rbp), %xmm3 vinserti128 $0x01, 96(%rbp), %ymm2, %ymm2 vinserti128 $0x01, 112(%rbp), %ymm3, %ymm3 vpshufb %ymm13, %ymm2, %ymm2 vpshufb %ymm13, %ymm3, %ymm3 vpaddd 64+L_avx2_rorx_sha256_k(%rip), %ymm2, %ymm4 vpaddd 96+L_avx2_rorx_sha256_k(%rip), %ymm3, %ymm5 vmovdqu %ymm4, 64(%rsp) vmovdqu %ymm5, 96(%rsp) movl %r9d, %ebx rorxl $6, %r12d, %edx xorl %r10d, %ebx # rnd_0: 0 - 0 movl %r13d, %eax rorxl $11, %r12d, %ecx addl (%rsp), %r15d vpalignr $4, %ymm0, %ymm1, %ymm5 # rnd_0: 1 - 1 xorl %edx, %ecx xorl %r14d, %eax rorxl $25, %r12d, %edx vpalignr $4, %ymm2, %ymm3, %ymm4 # rnd_0: 2 - 2 andl %r12d, %eax xorl %ecx, %edx rorxl $13, %r8d, %ecx vpsrld $7, %ymm5, %ymm6 # rnd_0: 3 - 3 addl %edx, %r15d rorxl $2, %r8d, %edx xorl %r14d, %eax vpslld $25, %ymm5, %ymm7 # rnd_0: 4 - 4 xorl %edx, %ecx rorxl $22, %r8d, %edx addl %eax, %r15d vpsrld $18, %ymm5, %ymm8 # rnd_0: 5 - 5 xorl %ecx, %edx movl %r9d, %eax addl %r15d, %r11d vpslld $14, %ymm5, %ymm9 # rnd_0: 6 - 6 xorl %r8d, %eax addl %edx, %r15d andl %eax, %ebx vpor %ymm7, %ymm6, %ymm6 # rnd_0: 7 - 7 xorl %r9d, %ebx rorxl $6, %r11d, %edx addl %ebx, %r15d vpor %ymm9, %ymm8, %ymm8 # rnd_1: 0 - 0 movl %r12d, %ebx rorxl $11, %r11d, %ecx addl 4(%rsp), %r14d vpsrld $3, %ymm5, %ymm9 # rnd_1: 1 - 1 xorl %edx, %ecx xorl %r13d, %ebx rorxl $25, %r11d, %edx vpxor %ymm8, %ymm6, %ymm6 # rnd_1: 2 - 2 andl %r11d, %ebx xorl %ecx, %edx rorxl $13, %r15d, %ecx vpshufd $0xfa, %ymm3, %ymm7 # rnd_1: 3 - 3 addl %edx, %r14d rorxl $2, %r15d, %edx xorl %r13d, %ebx vpxor %ymm6, %ymm9, %ymm5 # rnd_1: 4 - 4 xorl %edx, %ecx rorxl $22, %r15d, %edx addl %ebx, %r14d vpsrld $10, %ymm7, %ymm8 # rnd_1: 5 - 5 xorl %ecx, %edx addl %r14d, %r10d movl %r8d, %ebx vpsrlq $19, %ymm7, %ymm6 # rnd_1: 6 - 6 xorl %r15d, %ebx addl %edx, %r14d andl %ebx, %eax vpsrlq $0x11, %ymm7, %ymm7 # rnd_1: 7 - 7 xorl %r8d, %eax rorxl $6, %r10d, %edx addl %eax, %r14d vpaddd %ymm0, %ymm4, %ymm4 # rnd_0: 0 - 0 movl %r11d, %eax rorxl $11, %r10d, %ecx addl 8(%rsp), %r13d vpxor %ymm7, %ymm6, %ymm6 # rnd_0: 1 - 1 xorl %edx, %ecx xorl %r12d, %eax rorxl $25, %r10d, %edx vpxor %ymm6, %ymm8, %ymm8 # rnd_0: 2 - 2 andl %r10d, %eax xorl %ecx, %edx rorxl $13, %r14d, %ecx vpaddd %ymm5, %ymm4, %ymm4 # rnd_0: 3 - 3 addl %edx, %r13d rorxl $2, %r14d, %edx xorl %r12d, %eax vpshufb %ymm11, %ymm8, %ymm8 # rnd_0: 4 - 4 xorl %edx, %ecx rorxl $22, %r14d, %edx addl %eax, %r13d vpaddd %ymm8, %ymm4, %ymm4 # rnd_0: 5 - 5 xorl %ecx, %edx movl %r15d, %eax addl %r13d, %r9d vpshufd $0x50, %ymm4, %ymm6 # rnd_0: 6 - 6 xorl %r14d, %eax addl %edx, %r13d andl %eax, %ebx vpsrlq $0x11, %ymm6, %ymm8 # rnd_0: 7 - 7 xorl %r15d, %ebx rorxl $6, %r9d, %edx addl %ebx, %r13d vpsrlq $19, %ymm6, %ymm7 # rnd_1: 0 - 0 movl %r10d, %ebx rorxl $11, %r9d, %ecx addl 12(%rsp), %r12d vpsrld $10, %ymm6, %ymm9 # rnd_1: 1 - 1 xorl %edx, %ecx xorl %r11d, %ebx rorxl $25, %r9d, %edx vpxor %ymm7, %ymm8, %ymm8 # rnd_1: 2 - 2 andl %r9d, %ebx xorl %ecx, %edx rorxl $13, %r13d, %ecx vpxor %ymm8, %ymm9, %ymm9 # rnd_1: 3 - 3 addl %edx, %r12d rorxl $2, %r13d, %edx xorl %r11d, %ebx vpshufb %ymm12, %ymm9, %ymm9 # rnd_1: 4 - 4 xorl %edx, %ecx rorxl $22, %r13d, %edx addl %ebx, %r12d vpaddd %ymm4, %ymm9, %ymm0 # rnd_1: 5 - 5 xorl %ecx, %edx addl %r12d, %r8d movl %r14d, %ebx vpaddd 128+L_avx2_rorx_sha256_k(%rip), %ymm0, %ymm4 # rnd_1: 6 - 6 xorl %r13d, %ebx addl %edx, %r12d andl %ebx, %eax # rnd_1: 7 - 7 xorl %r14d, %eax rorxl $6, %r8d, %edx addl %eax, %r12d vmovdqu %ymm4, 128(%rsp) # rnd_0: 0 - 0 movl %r9d, %eax rorxl $11, %r8d, %ecx addl 32(%rsp), %r11d vpalignr $4, %ymm1, %ymm2, %ymm5 # rnd_0: 1 - 1 xorl %edx, %ecx xorl %r10d, %eax rorxl $25, %r8d, %edx vpalignr $4, %ymm3, %ymm0, %ymm4 # rnd_0: 2 - 2 andl %r8d, %eax xorl %ecx, %edx rorxl $13, %r12d, %ecx vpsrld $7, %ymm5, %ymm6 # rnd_0: 3 - 3 addl %edx, %r11d rorxl $2, %r12d, %edx xorl %r10d, %eax vpslld $25, %ymm5, %ymm7 # rnd_0: 4 - 4 xorl %edx, %ecx rorxl $22, %r12d, %edx addl %eax, %r11d vpsrld $18, %ymm5, %ymm8 # rnd_0: 5 - 5 xorl %ecx, %edx movl %r13d, %eax addl %r11d, %r15d vpslld $14, %ymm5, %ymm9 # rnd_0: 6 - 6 xorl %r12d, %eax addl %edx, %r11d andl %eax, %ebx vpor %ymm7, %ymm6, %ymm6 # rnd_0: 7 - 7 xorl %r13d, %ebx rorxl $6, %r15d, %edx addl %ebx, %r11d vpor %ymm9, %ymm8, %ymm8 # rnd_1: 0 - 0 movl %r8d, %ebx rorxl $11, %r15d, %ecx addl 36(%rsp), %r10d vpsrld $3, %ymm5, %ymm9 # rnd_1: 1 - 1 xorl %edx, %ecx xorl %r9d, %ebx rorxl $25, %r15d, %edx vpxor %ymm8, %ymm6, %ymm6 # rnd_1: 2 - 2 andl %r15d, %ebx xorl %ecx, %edx rorxl $13, %r11d, %ecx vpshufd $0xfa, %ymm0, %ymm7 # rnd_1: 3 - 3 addl %edx, %r10d rorxl $2, %r11d, %edx xorl %r9d, %ebx vpxor %ymm6, %ymm9, %ymm5 # rnd_1: 4 - 4 xorl %edx, %ecx rorxl $22, %r11d, %edx addl %ebx, %r10d vpsrld $10, %ymm7, %ymm8 # rnd_1: 5 - 5 xorl %ecx, %edx addl %r10d, %r14d movl %r12d, %ebx vpsrlq $19, %ymm7, %ymm6 # rnd_1: 6 - 6 xorl %r11d, %ebx addl %edx, %r10d andl %ebx, %eax vpsrlq $0x11, %ymm7, %ymm7 # rnd_1: 7 - 7 xorl %r12d, %eax rorxl $6, %r14d, %edx addl %eax, %r10d vpaddd %ymm1, %ymm4, %ymm4 # rnd_0: 0 - 0 movl %r15d, %eax rorxl $11, %r14d, %ecx addl 40(%rsp), %r9d vpxor %ymm7, %ymm6, %ymm6 # rnd_0: 1 - 1 xorl %edx, %ecx xorl %r8d, %eax rorxl $25, %r14d, %edx vpxor %ymm6, %ymm8, %ymm8 # rnd_0: 2 - 2 andl %r14d, %eax xorl %ecx, %edx rorxl $13, %r10d, %ecx vpaddd %ymm5, %ymm4, %ymm4 # rnd_0: 3 - 3 addl %edx, %r9d rorxl $2, %r10d, %edx xorl %r8d, %eax vpshufb %ymm11, %ymm8, %ymm8 # rnd_0: 4 - 4 xorl %edx, %ecx rorxl $22, %r10d, %edx addl %eax, %r9d vpaddd %ymm8, %ymm4, %ymm4 # rnd_0: 5 - 5 xorl %ecx, %edx movl %r11d, %eax addl %r9d, %r13d vpshufd $0x50, %ymm4, %ymm6 # rnd_0: 6 - 6 xorl %r10d, %eax addl %edx, %r9d andl %eax, %ebx vpsrlq $0x11, %ymm6, %ymm8 # rnd_0: 7 - 7 xorl %r11d, %ebx rorxl $6, %r13d, %edx addl %ebx, %r9d vpsrlq $19, %ymm6, %ymm7 # rnd_1: 0 - 0 movl %r14d, %ebx rorxl $11, %r13d, %ecx addl 44(%rsp), %r8d vpsrld $10, %ymm6, %ymm9 # rnd_1: 1 - 1 xorl %edx, %ecx xorl %r15d, %ebx rorxl $25, %r13d, %edx vpxor %ymm7, %ymm8, %ymm8 # rnd_1: 2 - 2 andl %r13d, %ebx xorl %ecx, %edx rorxl $13, %r9d, %ecx vpxor %ymm8, %ymm9, %ymm9 # rnd_1: 3 - 3 addl %edx, %r8d rorxl $2, %r9d, %edx xorl %r15d, %ebx vpshufb %ymm12, %ymm9, %ymm9 # rnd_1: 4 - 4 xorl %edx, %ecx rorxl $22, %r9d, %edx addl %ebx, %r8d vpaddd %ymm4, %ymm9, %ymm1 # rnd_1: 5 - 5 xorl %ecx, %edx addl %r8d, %r12d movl %r10d, %ebx vpaddd 160+L_avx2_rorx_sha256_k(%rip), %ymm1, %ymm4 # rnd_1: 6 - 6 xorl %r9d, %ebx addl %edx, %r8d andl %ebx, %eax # rnd_1: 7 - 7 xorl %r10d, %eax rorxl $6, %r12d, %edx addl %eax, %r8d vmovdqu %ymm4, 160(%rsp) # rnd_0: 0 - 0 movl %r13d, %eax rorxl $11, %r12d, %ecx addl 64(%rsp), %r15d vpalignr $4, %ymm2, %ymm3, %ymm5 # rnd_0: 1 - 1 xorl %edx, %ecx xorl %r14d, %eax rorxl $25, %r12d, %edx vpalignr $4, %ymm0, %ymm1, %ymm4 # rnd_0: 2 - 2 andl %r12d, %eax xorl %ecx, %edx rorxl $13, %r8d, %ecx vpsrld $7, %ymm5, %ymm6 # rnd_0: 3 - 3 addl %edx, %r15d rorxl $2, %r8d, %edx xorl %r14d, %eax vpslld $25, %ymm5, %ymm7 # rnd_0: 4 - 4 xorl %edx, %ecx rorxl $22, %r8d, %edx addl %eax, %r15d vpsrld $18, %ymm5, %ymm8 # rnd_0: 5 - 5 xorl %ecx, %edx movl %r9d, %eax addl %r15d, %r11d vpslld $14, %ymm5, %ymm9 # rnd_0: 6 - 6 xorl %r8d, %eax addl %edx, %r15d andl %eax, %ebx vpor %ymm7, %ymm6, %ymm6 # rnd_0: 7 - 7 xorl %r9d, %ebx rorxl $6, %r11d, %edx addl %ebx, %r15d vpor %ymm9, %ymm8, %ymm8 # rnd_1: 0 - 0 movl %r12d, %ebx rorxl $11, %r11d, %ecx addl 68(%rsp), %r14d vpsrld $3, %ymm5, %ymm9 # rnd_1: 1 - 1 xorl %edx, %ecx xorl %r13d, %ebx rorxl $25, %r11d, %edx vpxor %ymm8, %ymm6, %ymm6 # rnd_1: 2 - 2 andl %r11d, %ebx xorl %ecx, %edx rorxl $13, %r15d, %ecx vpshufd $0xfa, %ymm1, %ymm7 # rnd_1: 3 - 3 addl %edx, %r14d rorxl $2, %r15d, %edx xorl %r13d, %ebx vpxor %ymm6, %ymm9, %ymm5 # rnd_1: 4 - 4 xorl %edx, %ecx rorxl $22, %r15d, %edx addl %ebx, %r14d vpsrld $10, %ymm7, %ymm8 # rnd_1: 5 - 5 xorl %ecx, %edx addl %r14d, %r10d movl %r8d, %ebx vpsrlq $19, %ymm7, %ymm6 # rnd_1: 6 - 6 xorl %r15d, %ebx addl %edx, %r14d andl %ebx, %eax vpsrlq $0x11, %ymm7, %ymm7 # rnd_1: 7 - 7 xorl %r8d, %eax rorxl $6, %r10d, %edx addl %eax, %r14d vpaddd %ymm2, %ymm4, %ymm4 # rnd_0: 0 - 0 movl %r11d, %eax rorxl $11, %r10d, %ecx addl 72(%rsp), %r13d vpxor %ymm7, %ymm6, %ymm6 # rnd_0: 1 - 1 xorl %edx, %ecx xorl %r12d, %eax rorxl $25, %r10d, %edx vpxor %ymm6, %ymm8, %ymm8 # rnd_0: 2 - 2 andl %r10d, %eax xorl %ecx, %edx rorxl $13, %r14d, %ecx vpaddd %ymm5, %ymm4, %ymm4 # rnd_0: 3 - 3 addl %edx, %r13d rorxl $2, %r14d, %edx xorl %r12d, %eax vpshufb %ymm11, %ymm8, %ymm8 # rnd_0: 4 - 4 xorl %edx, %ecx rorxl $22, %r14d, %edx addl %eax, %r13d vpaddd %ymm8, %ymm4, %ymm4 # rnd_0: 5 - 5 xorl %ecx, %edx movl %r15d, %eax addl %r13d, %r9d vpshufd $0x50, %ymm4, %ymm6 # rnd_0: 6 - 6 xorl %r14d, %eax addl %edx, %r13d andl %eax, %ebx vpsrlq $0x11, %ymm6, %ymm8 # rnd_0: 7 - 7 xorl %r15d, %ebx rorxl $6, %r9d, %edx addl %ebx, %r13d vpsrlq $19, %ymm6, %ymm7 # rnd_1: 0 - 0 movl %r10d, %ebx rorxl $11, %r9d, %ecx addl 76(%rsp), %r12d vpsrld $10, %ymm6, %ymm9 # rnd_1: 1 - 1 xorl %edx, %ecx xorl %r11d, %ebx rorxl $25, %r9d, %edx vpxor %ymm7, %ymm8, %ymm8 # rnd_1: 2 - 2 andl %r9d, %ebx xorl %ecx, %edx rorxl $13, %r13d, %ecx vpxor %ymm8, %ymm9, %ymm9 # rnd_1: 3 - 3 addl %edx, %r12d rorxl $2, %r13d, %edx xorl %r11d, %ebx vpshufb %ymm12, %ymm9, %ymm9 # rnd_1: 4 - 4 xorl %edx, %ecx rorxl $22, %r13d, %edx addl %ebx, %r12d vpaddd %ymm4, %ymm9, %ymm2 # rnd_1: 5 - 5 xorl %ecx, %edx addl %r12d, %r8d movl %r14d, %ebx vpaddd 192+L_avx2_rorx_sha256_k(%rip), %ymm2, %ymm4 # rnd_1: 6 - 6 xorl %r13d, %ebx addl %edx, %r12d andl %ebx, %eax # rnd_1: 7 - 7 xorl %r14d, %eax rorxl $6, %r8d, %edx addl %eax, %r12d vmovdqu %ymm4, 192(%rsp) # rnd_0: 0 - 0 movl %r9d, %eax rorxl $11, %r8d, %ecx addl 96(%rsp), %r11d vpalignr $4, %ymm3, %ymm0, %ymm5 # rnd_0: 1 - 1 xorl %edx, %ecx xorl %r10d, %eax rorxl $25, %r8d, %edx vpalignr $4, %ymm1, %ymm2, %ymm4 # rnd_0: 2 - 2 andl %r8d, %eax xorl %ecx, %edx rorxl $13, %r12d, %ecx vpsrld $7, %ymm5, %ymm6 # rnd_0: 3 - 3 addl %edx, %r11d rorxl $2, %r12d, %edx xorl %r10d, %eax vpslld $25, %ymm5, %ymm7 # rnd_0: 4 - 4 xorl %edx, %ecx rorxl $22, %r12d, %edx addl %eax, %r11d vpsrld $18, %ymm5, %ymm8 # rnd_0: 5 - 5 xorl %ecx, %edx movl %r13d, %eax addl %r11d, %r15d vpslld $14, %ymm5, %ymm9 # rnd_0: 6 - 6 xorl %r12d, %eax addl %edx, %r11d andl %eax, %ebx vpor %ymm7, %ymm6, %ymm6 # rnd_0: 7 - 7 xorl %r13d, %ebx rorxl $6, %r15d, %edx addl %ebx, %r11d vpor %ymm9, %ymm8, %ymm8 # rnd_1: 0 - 0 movl %r8d, %ebx rorxl $11, %r15d, %ecx addl 100(%rsp), %r10d vpsrld $3, %ymm5, %ymm9 # rnd_1: 1 - 1 xorl %edx, %ecx xorl %r9d, %ebx rorxl $25, %r15d, %edx vpxor %ymm8, %ymm6, %ymm6 # rnd_1: 2 - 2 andl %r15d, %ebx xorl %ecx, %edx rorxl $13, %r11d, %ecx vpshufd $0xfa, %ymm2, %ymm7 # rnd_1: 3 - 3 addl %edx, %r10d rorxl $2, %r11d, %edx xorl %r9d, %ebx vpxor %ymm6, %ymm9, %ymm5 # rnd_1: 4 - 4 xorl %edx, %ecx rorxl $22, %r11d, %edx addl %ebx, %r10d vpsrld $10, %ymm7, %ymm8 # rnd_1: 5 - 5 xorl %ecx, %edx addl %r10d, %r14d movl %r12d, %ebx vpsrlq $19, %ymm7, %ymm6 # rnd_1: 6 - 6 xorl %r11d, %ebx addl %edx, %r10d andl %ebx, %eax vpsrlq $0x11, %ymm7, %ymm7 # rnd_1: 7 - 7 xorl %r12d, %eax rorxl $6, %r14d, %edx addl %eax, %r10d vpaddd %ymm3, %ymm4, %ymm4 # rnd_0: 0 - 0 movl %r15d, %eax rorxl $11, %r14d, %ecx addl 104(%rsp), %r9d vpxor %ymm7, %ymm6, %ymm6 # rnd_0: 1 - 1 xorl %edx, %ecx xorl %r8d, %eax rorxl $25, %r14d, %edx vpxor %ymm6, %ymm8, %ymm8 # rnd_0: 2 - 2 andl %r14d, %eax xorl %ecx, %edx rorxl $13, %r10d, %ecx vpaddd %ymm5, %ymm4, %ymm4 # rnd_0: 3 - 3 addl %edx, %r9d rorxl $2, %r10d, %edx xorl %r8d, %eax vpshufb %ymm11, %ymm8, %ymm8 # rnd_0: 4 - 4 xorl %edx, %ecx rorxl $22, %r10d, %edx addl %eax, %r9d vpaddd %ymm8, %ymm4, %ymm4 # rnd_0: 5 - 5 xorl %ecx, %edx movl %r11d, %eax addl %r9d, %r13d vpshufd $0x50, %ymm4, %ymm6 # rnd_0: 6 - 6 xorl %r10d, %eax addl %edx, %r9d andl %eax, %ebx vpsrlq $0x11, %ymm6, %ymm8 # rnd_0: 7 - 7 xorl %r11d, %ebx rorxl $6, %r13d, %edx addl %ebx, %r9d vpsrlq $19, %ymm6, %ymm7 # rnd_1: 0 - 0 movl %r14d, %ebx rorxl $11, %r13d, %ecx addl 108(%rsp), %r8d vpsrld $10, %ymm6, %ymm9 # rnd_1: 1 - 1 xorl %edx, %ecx xorl %r15d, %ebx rorxl $25, %r13d, %edx vpxor %ymm7, %ymm8, %ymm8 # rnd_1: 2 - 2 andl %r13d, %ebx xorl %ecx, %edx rorxl $13, %r9d, %ecx vpxor %ymm8, %ymm9, %ymm9 # rnd_1: 3 - 3 addl %edx, %r8d rorxl $2, %r9d, %edx xorl %r15d, %ebx vpshufb %ymm12, %ymm9, %ymm9 # rnd_1: 4 - 4 xorl %edx, %ecx rorxl $22, %r9d, %edx addl %ebx, %r8d vpaddd %ymm4, %ymm9, %ymm3 # rnd_1: 5 - 5 xorl %ecx, %edx addl %r8d, %r12d movl %r10d, %ebx vpaddd 224+L_avx2_rorx_sha256_k(%rip), %ymm3, %ymm4 # rnd_1: 6 - 6 xorl %r9d, %ebx addl %edx, %r8d andl %ebx, %eax # rnd_1: 7 - 7 xorl %r10d, %eax rorxl $6, %r12d, %edx addl %eax, %r8d vmovdqu %ymm4, 224(%rsp) # rnd_0: 0 - 0 movl %r13d, %eax rorxl $11, %r12d, %ecx addl 128(%rsp), %r15d vpalignr $4, %ymm0, %ymm1, %ymm5 # rnd_0: 1 - 1 xorl %edx, %ecx xorl %r14d, %eax rorxl $25, %r12d, %edx vpalignr $4, %ymm2, %ymm3, %ymm4 # rnd_0: 2 - 2 andl %r12d, %eax xorl %ecx, %edx rorxl $13, %r8d, %ecx vpsrld $7, %ymm5, %ymm6 # rnd_0: 3 - 3 addl %edx, %r15d rorxl $2, %r8d, %edx xorl %r14d, %eax vpslld $25, %ymm5, %ymm7 # rnd_0: 4 - 4 xorl %edx, %ecx rorxl $22, %r8d, %edx addl %eax, %r15d vpsrld $18, %ymm5, %ymm8 # rnd_0: 5 - 5 xorl %ecx, %edx movl %r9d, %eax addl %r15d, %r11d vpslld $14, %ymm5, %ymm9 # rnd_0: 6 - 6 xorl %r8d, %eax addl %edx, %r15d andl %eax, %ebx vpor %ymm7, %ymm6, %ymm6 # rnd_0: 7 - 7 xorl %r9d, %ebx rorxl $6, %r11d, %edx addl %ebx, %r15d vpor %ymm9, %ymm8, %ymm8 # rnd_1: 0 - 0 movl %r12d, %ebx rorxl $11, %r11d, %ecx addl 132(%rsp), %r14d vpsrld $3, %ymm5, %ymm9 # rnd_1: 1 - 1 xorl %edx, %ecx xorl %r13d, %ebx rorxl $25, %r11d, %edx vpxor %ymm8, %ymm6, %ymm6 # rnd_1: 2 - 2 andl %r11d, %ebx xorl %ecx, %edx rorxl $13, %r15d, %ecx vpshufd $0xfa, %ymm3, %ymm7 # rnd_1: 3 - 3 addl %edx, %r14d rorxl $2, %r15d, %edx xorl %r13d, %ebx vpxor %ymm6, %ymm9, %ymm5 # rnd_1: 4 - 4 xorl %edx, %ecx rorxl $22, %r15d, %edx addl %ebx, %r14d vpsrld $10, %ymm7, %ymm8 # rnd_1: 5 - 5 xorl %ecx, %edx addl %r14d, %r10d movl %r8d, %ebx vpsrlq $19, %ymm7, %ymm6 # rnd_1: 6 - 6 xorl %r15d, %ebx addl %edx, %r14d andl %ebx, %eax vpsrlq $0x11, %ymm7, %ymm7 # rnd_1: 7 - 7 xorl %r8d, %eax rorxl $6, %r10d, %edx addl %eax, %r14d vpaddd %ymm0, %ymm4, %ymm4 # rnd_0: 0 - 0 movl %r11d, %eax rorxl $11, %r10d, %ecx addl 136(%rsp), %r13d vpxor %ymm7, %ymm6, %ymm6 # rnd_0: 1 - 1 xorl %edx, %ecx xorl %r12d, %eax rorxl $25, %r10d, %edx vpxor %ymm6, %ymm8, %ymm8 # rnd_0: 2 - 2 andl %r10d, %eax xorl %ecx, %edx rorxl $13, %r14d, %ecx vpaddd %ymm5, %ymm4, %ymm4 # rnd_0: 3 - 3 addl %edx, %r13d rorxl $2, %r14d, %edx xorl %r12d, %eax vpshufb %ymm11, %ymm8, %ymm8 # rnd_0: 4 - 4 xorl %edx, %ecx rorxl $22, %r14d, %edx addl %eax, %r13d vpaddd %ymm8, %ymm4, %ymm4 # rnd_0: 5 - 5 xorl %ecx, %edx movl %r15d, %eax addl %r13d, %r9d vpshufd $0x50, %ymm4, %ymm6 # rnd_0: 6 - 6 xorl %r14d, %eax addl %edx, %r13d andl %eax, %ebx vpsrlq $0x11, %ymm6, %ymm8 # rnd_0: 7 - 7 xorl %r15d, %ebx rorxl $6, %r9d, %edx addl %ebx, %r13d vpsrlq $19, %ymm6, %ymm7 # rnd_1: 0 - 0 movl %r10d, %ebx rorxl $11, %r9d, %ecx addl 140(%rsp), %r12d vpsrld $10, %ymm6, %ymm9 # rnd_1: 1 - 1 xorl %edx, %ecx xorl %r11d, %ebx rorxl $25, %r9d, %edx vpxor %ymm7, %ymm8, %ymm8 # rnd_1: 2 - 2 andl %r9d, %ebx xorl %ecx, %edx rorxl $13, %r13d, %ecx vpxor %ymm8, %ymm9, %ymm9 # rnd_1: 3 - 3 addl %edx, %r12d rorxl $2, %r13d, %edx xorl %r11d, %ebx vpshufb %ymm12, %ymm9, %ymm9 # rnd_1: 4 - 4 xorl %edx, %ecx rorxl $22, %r13d, %edx addl %ebx, %r12d vpaddd %ymm4, %ymm9, %ymm0 # rnd_1: 5 - 5 xorl %ecx, %edx addl %r12d, %r8d movl %r14d, %ebx vpaddd 256+L_avx2_rorx_sha256_k(%rip), %ymm0, %ymm4 # rnd_1: 6 - 6 xorl %r13d, %ebx addl %edx, %r12d andl %ebx, %eax # rnd_1: 7 - 7 xorl %r14d, %eax rorxl $6, %r8d, %edx addl %eax, %r12d vmovdqu %ymm4, 256(%rsp) # rnd_0: 0 - 0 movl %r9d, %eax rorxl $11, %r8d, %ecx addl 160(%rsp), %r11d vpalignr $4, %ymm1, %ymm2, %ymm5 # rnd_0: 1 - 1 xorl %edx, %ecx xorl %r10d, %eax rorxl $25, %r8d, %edx vpalignr $4, %ymm3, %ymm0, %ymm4 # rnd_0: 2 - 2 andl %r8d, %eax xorl %ecx, %edx rorxl $13, %r12d, %ecx vpsrld $7, %ymm5, %ymm6 # rnd_0: 3 - 3 addl %edx, %r11d rorxl $2, %r12d, %edx xorl %r10d, %eax vpslld $25, %ymm5, %ymm7 # rnd_0: 4 - 4 xorl %edx, %ecx rorxl $22, %r12d, %edx addl %eax, %r11d vpsrld $18, %ymm5, %ymm8 # rnd_0: 5 - 5 xorl %ecx, %edx movl %r13d, %eax addl %r11d, %r15d vpslld $14, %ymm5, %ymm9 # rnd_0: 6 - 6 xorl %r12d, %eax addl %edx, %r11d andl %eax, %ebx vpor %ymm7, %ymm6, %ymm6 # rnd_0: 7 - 7 xorl %r13d, %ebx rorxl $6, %r15d, %edx addl %ebx, %r11d vpor %ymm9, %ymm8, %ymm8 # rnd_1: 0 - 0 movl %r8d, %ebx rorxl $11, %r15d, %ecx addl 164(%rsp), %r10d vpsrld $3, %ymm5, %ymm9 # rnd_1: 1 - 1 xorl %edx, %ecx xorl %r9d, %ebx rorxl $25, %r15d, %edx vpxor %ymm8, %ymm6, %ymm6 # rnd_1: 2 - 2 andl %r15d, %ebx xorl %ecx, %edx rorxl $13, %r11d, %ecx vpshufd $0xfa, %ymm0, %ymm7 # rnd_1: 3 - 3 addl %edx, %r10d rorxl $2, %r11d, %edx xorl %r9d, %ebx vpxor %ymm6, %ymm9, %ymm5 # rnd_1: 4 - 4 xorl %edx, %ecx rorxl $22, %r11d, %edx addl %ebx, %r10d vpsrld $10, %ymm7, %ymm8 # rnd_1: 5 - 5 xorl %ecx, %edx addl %r10d, %r14d movl %r12d, %ebx vpsrlq $19, %ymm7, %ymm6 # rnd_1: 6 - 6 xorl %r11d, %ebx addl %edx, %r10d andl %ebx, %eax vpsrlq $0x11, %ymm7, %ymm7 # rnd_1: 7 - 7 xorl %r12d, %eax rorxl $6, %r14d, %edx addl %eax, %r10d vpaddd %ymm1, %ymm4, %ymm4 # rnd_0: 0 - 0 movl %r15d, %eax rorxl $11, %r14d, %ecx addl 168(%rsp), %r9d vpxor %ymm7, %ymm6, %ymm6 # rnd_0: 1 - 1 xorl %edx, %ecx xorl %r8d, %eax rorxl $25, %r14d, %edx vpxor %ymm6, %ymm8, %ymm8 # rnd_0: 2 - 2 andl %r14d, %eax xorl %ecx, %edx rorxl $13, %r10d, %ecx vpaddd %ymm5, %ymm4, %ymm4 # rnd_0: 3 - 3 addl %edx, %r9d rorxl $2, %r10d, %edx xorl %r8d, %eax vpshufb %ymm11, %ymm8, %ymm8 # rnd_0: 4 - 4 xorl %edx, %ecx rorxl $22, %r10d, %edx addl %eax, %r9d vpaddd %ymm8, %ymm4, %ymm4 # rnd_0: 5 - 5 xorl %ecx, %edx movl %r11d, %eax addl %r9d, %r13d vpshufd $0x50, %ymm4, %ymm6 # rnd_0: 6 - 6 xorl %r10d, %eax addl %edx, %r9d andl %eax, %ebx vpsrlq $0x11, %ymm6, %ymm8 # rnd_0: 7 - 7 xorl %r11d, %ebx rorxl $6, %r13d, %edx addl %ebx, %r9d vpsrlq $19, %ymm6, %ymm7 # rnd_1: 0 - 0 movl %r14d, %ebx rorxl $11, %r13d, %ecx addl 172(%rsp), %r8d vpsrld $10, %ymm6, %ymm9 # rnd_1: 1 - 1 xorl %edx, %ecx xorl %r15d, %ebx rorxl $25, %r13d, %edx vpxor %ymm7, %ymm8, %ymm8 # rnd_1: 2 - 2 andl %r13d, %ebx xorl %ecx, %edx rorxl $13, %r9d, %ecx vpxor %ymm8, %ymm9, %ymm9 # rnd_1: 3 - 3 addl %edx, %r8d rorxl $2, %r9d, %edx xorl %r15d, %ebx vpshufb %ymm12, %ymm9, %ymm9 # rnd_1: 4 - 4 xorl %edx, %ecx rorxl $22, %r9d, %edx addl %ebx, %r8d vpaddd %ymm4, %ymm9, %ymm1 # rnd_1: 5 - 5 xorl %ecx, %edx addl %r8d, %r12d movl %r10d, %ebx vpaddd 288+L_avx2_rorx_sha256_k(%rip), %ymm1, %ymm4 # rnd_1: 6 - 6 xorl %r9d, %ebx addl %edx, %r8d andl %ebx, %eax # rnd_1: 7 - 7 xorl %r10d, %eax rorxl $6, %r12d, %edx addl %eax, %r8d vmovdqu %ymm4, 288(%rsp) # rnd_0: 0 - 0 movl %r13d, %eax rorxl $11, %r12d, %ecx addl 192(%rsp), %r15d vpalignr $4, %ymm2, %ymm3, %ymm5 # rnd_0: 1 - 1 xorl %edx, %ecx xorl %r14d, %eax rorxl $25, %r12d, %edx vpalignr $4, %ymm0, %ymm1, %ymm4 # rnd_0: 2 - 2 andl %r12d, %eax xorl %ecx, %edx rorxl $13, %r8d, %ecx vpsrld $7, %ymm5, %ymm6 # rnd_0: 3 - 3 addl %edx, %r15d rorxl $2, %r8d, %edx xorl %r14d, %eax vpslld $25, %ymm5, %ymm7 # rnd_0: 4 - 4 xorl %edx, %ecx rorxl $22, %r8d, %edx addl %eax, %r15d vpsrld $18, %ymm5, %ymm8 # rnd_0: 5 - 5 xorl %ecx, %edx movl %r9d, %eax addl %r15d, %r11d vpslld $14, %ymm5, %ymm9 # rnd_0: 6 - 6 xorl %r8d, %eax addl %edx, %r15d andl %eax, %ebx vpor %ymm7, %ymm6, %ymm6 # rnd_0: 7 - 7 xorl %r9d, %ebx rorxl $6, %r11d, %edx addl %ebx, %r15d vpor %ymm9, %ymm8, %ymm8 # rnd_1: 0 - 0 movl %r12d, %ebx rorxl $11, %r11d, %ecx addl 196(%rsp), %r14d vpsrld $3, %ymm5, %ymm9 # rnd_1: 1 - 1 xorl %edx, %ecx xorl %r13d, %ebx rorxl $25, %r11d, %edx vpxor %ymm8, %ymm6, %ymm6 # rnd_1: 2 - 2 andl %r11d, %ebx xorl %ecx, %edx rorxl $13, %r15d, %ecx vpshufd $0xfa, %ymm1, %ymm7 # rnd_1: 3 - 3 addl %edx, %r14d rorxl $2, %r15d, %edx xorl %r13d, %ebx vpxor %ymm6, %ymm9, %ymm5 # rnd_1: 4 - 4 xorl %edx, %ecx rorxl $22, %r15d, %edx addl %ebx, %r14d vpsrld $10, %ymm7, %ymm8 # rnd_1: 5 - 5 xorl %ecx, %edx addl %r14d, %r10d movl %r8d, %ebx vpsrlq $19, %ymm7, %ymm6 # rnd_1: 6 - 6 xorl %r15d, %ebx addl %edx, %r14d andl %ebx, %eax vpsrlq $0x11, %ymm7, %ymm7 # rnd_1: 7 - 7 xorl %r8d, %eax rorxl $6, %r10d, %edx addl %eax, %r14d vpaddd %ymm2, %ymm4, %ymm4 # rnd_0: 0 - 0 movl %r11d, %eax rorxl $11, %r10d, %ecx addl 200(%rsp), %r13d vpxor %ymm7, %ymm6, %ymm6 # rnd_0: 1 - 1 xorl %edx, %ecx xorl %r12d, %eax rorxl $25, %r10d, %edx vpxor %ymm6, %ymm8, %ymm8 # rnd_0: 2 - 2 andl %r10d, %eax xorl %ecx, %edx rorxl $13, %r14d, %ecx vpaddd %ymm5, %ymm4, %ymm4 # rnd_0: 3 - 3 addl %edx, %r13d rorxl $2, %r14d, %edx xorl %r12d, %eax vpshufb %ymm11, %ymm8, %ymm8 # rnd_0: 4 - 4 xorl %edx, %ecx rorxl $22, %r14d, %edx addl %eax, %r13d vpaddd %ymm8, %ymm4, %ymm4 # rnd_0: 5 - 5 xorl %ecx, %edx movl %r15d, %eax addl %r13d, %r9d vpshufd $0x50, %ymm4, %ymm6 # rnd_0: 6 - 6 xorl %r14d, %eax addl %edx, %r13d andl %eax, %ebx vpsrlq $0x11, %ymm6, %ymm8 # rnd_0: 7 - 7 xorl %r15d, %ebx rorxl $6, %r9d, %edx addl %ebx, %r13d vpsrlq $19, %ymm6, %ymm7 # rnd_1: 0 - 0 movl %r10d, %ebx rorxl $11, %r9d, %ecx addl 204(%rsp), %r12d vpsrld $10, %ymm6, %ymm9 # rnd_1: 1 - 1 xorl %edx, %ecx xorl %r11d, %ebx rorxl $25, %r9d, %edx vpxor %ymm7, %ymm8, %ymm8 # rnd_1: 2 - 2 andl %r9d, %ebx xorl %ecx, %edx rorxl $13, %r13d, %ecx vpxor %ymm8, %ymm9, %ymm9 # rnd_1: 3 - 3 addl %edx, %r12d rorxl $2, %r13d, %edx xorl %r11d, %ebx vpshufb %ymm12, %ymm9, %ymm9 # rnd_1: 4 - 4 xorl %edx, %ecx rorxl $22, %r13d, %edx addl %ebx, %r12d vpaddd %ymm4, %ymm9, %ymm2 # rnd_1: 5 - 5 xorl %ecx, %edx addl %r12d, %r8d movl %r14d, %ebx vpaddd 320+L_avx2_rorx_sha256_k(%rip), %ymm2, %ymm4 # rnd_1: 6 - 6 xorl %r13d, %ebx addl %edx, %r12d andl %ebx, %eax # rnd_1: 7 - 7 xorl %r14d, %eax rorxl $6, %r8d, %edx addl %eax, %r12d vmovdqu %ymm4, 320(%rsp) # rnd_0: 0 - 0 movl %r9d, %eax rorxl $11, %r8d, %ecx addl 224(%rsp), %r11d vpalignr $4, %ymm3, %ymm0, %ymm5 # rnd_0: 1 - 1 xorl %edx, %ecx xorl %r10d, %eax rorxl $25, %r8d, %edx vpalignr $4, %ymm1, %ymm2, %ymm4 # rnd_0: 2 - 2 andl %r8d, %eax xorl %ecx, %edx rorxl $13, %r12d, %ecx vpsrld $7, %ymm5, %ymm6 # rnd_0: 3 - 3 addl %edx, %r11d rorxl $2, %r12d, %edx xorl %r10d, %eax vpslld $25, %ymm5, %ymm7 # rnd_0: 4 - 4 xorl %edx, %ecx rorxl $22, %r12d, %edx addl %eax, %r11d vpsrld $18, %ymm5, %ymm8 # rnd_0: 5 - 5 xorl %ecx, %edx movl %r13d, %eax addl %r11d, %r15d vpslld $14, %ymm5, %ymm9 # rnd_0: 6 - 6 xorl %r12d, %eax addl %edx, %r11d andl %eax, %ebx vpor %ymm7, %ymm6, %ymm6 # rnd_0: 7 - 7 xorl %r13d, %ebx rorxl $6, %r15d, %edx addl %ebx, %r11d vpor %ymm9, %ymm8, %ymm8 # rnd_1: 0 - 0 movl %r8d, %ebx rorxl $11, %r15d, %ecx addl 228(%rsp), %r10d vpsrld $3, %ymm5, %ymm9 # rnd_1: 1 - 1 xorl %edx, %ecx xorl %r9d, %ebx rorxl $25, %r15d, %edx vpxor %ymm8, %ymm6, %ymm6 # rnd_1: 2 - 2 andl %r15d, %ebx xorl %ecx, %edx rorxl $13, %r11d, %ecx vpshufd $0xfa, %ymm2, %ymm7 # rnd_1: 3 - 3 addl %edx, %r10d rorxl $2, %r11d, %edx xorl %r9d, %ebx vpxor %ymm6, %ymm9, %ymm5 # rnd_1: 4 - 4 xorl %edx, %ecx rorxl $22, %r11d, %edx addl %ebx, %r10d vpsrld $10, %ymm7, %ymm8 # rnd_1: 5 - 5 xorl %ecx, %edx addl %r10d, %r14d movl %r12d, %ebx vpsrlq $19, %ymm7, %ymm6 # rnd_1: 6 - 6 xorl %r11d, %ebx addl %edx, %r10d andl %ebx, %eax vpsrlq $0x11, %ymm7, %ymm7 # rnd_1: 7 - 7 xorl %r12d, %eax rorxl $6, %r14d, %edx addl %eax, %r10d vpaddd %ymm3, %ymm4, %ymm4 # rnd_0: 0 - 0 movl %r15d, %eax rorxl $11, %r14d, %ecx addl 232(%rsp), %r9d vpxor %ymm7, %ymm6, %ymm6 # rnd_0: 1 - 1 xorl %edx, %ecx xorl %r8d, %eax rorxl $25, %r14d, %edx vpxor %ymm6, %ymm8, %ymm8 # rnd_0: 2 - 2 andl %r14d, %eax xorl %ecx, %edx rorxl $13, %r10d, %ecx vpaddd %ymm5, %ymm4, %ymm4 # rnd_0: 3 - 3 addl %edx, %r9d rorxl $2, %r10d, %edx xorl %r8d, %eax vpshufb %ymm11, %ymm8, %ymm8 # rnd_0: 4 - 4 xorl %edx, %ecx rorxl $22, %r10d, %edx addl %eax, %r9d vpaddd %ymm8, %ymm4, %ymm4 # rnd_0: 5 - 5 xorl %ecx, %edx movl %r11d, %eax addl %r9d, %r13d vpshufd $0x50, %ymm4, %ymm6 # rnd_0: 6 - 6 xorl %r10d, %eax addl %edx, %r9d andl %eax, %ebx vpsrlq $0x11, %ymm6, %ymm8 # rnd_0: 7 - 7 xorl %r11d, %ebx rorxl $6, %r13d, %edx addl %ebx, %r9d vpsrlq $19, %ymm6, %ymm7 # rnd_1: 0 - 0 movl %r14d, %ebx rorxl $11, %r13d, %ecx addl 236(%rsp), %r8d vpsrld $10, %ymm6, %ymm9 # rnd_1: 1 - 1 xorl %edx, %ecx xorl %r15d, %ebx rorxl $25, %r13d, %edx vpxor %ymm7, %ymm8, %ymm8 # rnd_1: 2 - 2 andl %r13d, %ebx xorl %ecx, %edx rorxl $13, %r9d, %ecx vpxor %ymm8, %ymm9, %ymm9 # rnd_1: 3 - 3 addl %edx, %r8d rorxl $2, %r9d, %edx xorl %r15d, %ebx vpshufb %ymm12, %ymm9, %ymm9 # rnd_1: 4 - 4 xorl %edx, %ecx rorxl $22, %r9d, %edx addl %ebx, %r8d vpaddd %ymm4, %ymm9, %ymm3 # rnd_1: 5 - 5 xorl %ecx, %edx addl %r8d, %r12d movl %r10d, %ebx vpaddd 352+L_avx2_rorx_sha256_k(%rip), %ymm3, %ymm4 # rnd_1: 6 - 6 xorl %r9d, %ebx addl %edx, %r8d andl %ebx, %eax # rnd_1: 7 - 7 xorl %r10d, %eax rorxl $6, %r12d, %edx addl %eax, %r8d vmovdqu %ymm4, 352(%rsp) # rnd_0: 0 - 0 movl %r13d, %eax rorxl $11, %r12d, %ecx addl 256(%rsp), %r15d vpalignr $4, %ymm0, %ymm1, %ymm5 # rnd_0: 1 - 1 xorl %edx, %ecx xorl %r14d, %eax rorxl $25, %r12d, %edx vpalignr $4, %ymm2, %ymm3, %ymm4 # rnd_0: 2 - 2 andl %r12d, %eax xorl %ecx, %edx rorxl $13, %r8d, %ecx vpsrld $7, %ymm5, %ymm6 # rnd_0: 3 - 3 addl %edx, %r15d rorxl $2, %r8d, %edx xorl %r14d, %eax vpslld $25, %ymm5, %ymm7 # rnd_0: 4 - 4 xorl %edx, %ecx rorxl $22, %r8d, %edx addl %eax, %r15d vpsrld $18, %ymm5, %ymm8 # rnd_0: 5 - 5 xorl %ecx, %edx movl %r9d, %eax addl %r15d, %r11d vpslld $14, %ymm5, %ymm9 # rnd_0: 6 - 6 xorl %r8d, %eax addl %edx, %r15d andl %eax, %ebx vpor %ymm7, %ymm6, %ymm6 # rnd_0: 7 - 7 xorl %r9d, %ebx rorxl $6, %r11d, %edx addl %ebx, %r15d vpor %ymm9, %ymm8, %ymm8 # rnd_1: 0 - 0 movl %r12d, %ebx rorxl $11, %r11d, %ecx addl 260(%rsp), %r14d vpsrld $3, %ymm5, %ymm9 # rnd_1: 1 - 1 xorl %edx, %ecx xorl %r13d, %ebx rorxl $25, %r11d, %edx vpxor %ymm8, %ymm6, %ymm6 # rnd_1: 2 - 2 andl %r11d, %ebx xorl %ecx, %edx rorxl $13, %r15d, %ecx vpshufd $0xfa, %ymm3, %ymm7 # rnd_1: 3 - 3 addl %edx, %r14d rorxl $2, %r15d, %edx xorl %r13d, %ebx vpxor %ymm6, %ymm9, %ymm5 # rnd_1: 4 - 4 xorl %edx, %ecx rorxl $22, %r15d, %edx addl %ebx, %r14d vpsrld $10, %ymm7, %ymm8 # rnd_1: 5 - 5 xorl %ecx, %edx addl %r14d, %r10d movl %r8d, %ebx vpsrlq $19, %ymm7, %ymm6 # rnd_1: 6 - 6 xorl %r15d, %ebx addl %edx, %r14d andl %ebx, %eax vpsrlq $0x11, %ymm7, %ymm7 # rnd_1: 7 - 7 xorl %r8d, %eax rorxl $6, %r10d, %edx addl %eax, %r14d vpaddd %ymm0, %ymm4, %ymm4 # rnd_0: 0 - 0 movl %r11d, %eax rorxl $11, %r10d, %ecx addl 264(%rsp), %r13d vpxor %ymm7, %ymm6, %ymm6 # rnd_0: 1 - 1 xorl %edx, %ecx xorl %r12d, %eax rorxl $25, %r10d, %edx vpxor %ymm6, %ymm8, %ymm8 # rnd_0: 2 - 2 andl %r10d, %eax xorl %ecx, %edx rorxl $13, %r14d, %ecx vpaddd %ymm5, %ymm4, %ymm4 # rnd_0: 3 - 3 addl %edx, %r13d rorxl $2, %r14d, %edx xorl %r12d, %eax vpshufb %ymm11, %ymm8, %ymm8 # rnd_0: 4 - 4 xorl %edx, %ecx rorxl $22, %r14d, %edx addl %eax, %r13d vpaddd %ymm8, %ymm4, %ymm4 # rnd_0: 5 - 5 xorl %ecx, %edx movl %r15d, %eax addl %r13d, %r9d vpshufd $0x50, %ymm4, %ymm6 # rnd_0: 6 - 6 xorl %r14d, %eax addl %edx, %r13d andl %eax, %ebx vpsrlq $0x11, %ymm6, %ymm8 # rnd_0: 7 - 7 xorl %r15d, %ebx rorxl $6, %r9d, %edx addl %ebx, %r13d vpsrlq $19, %ymm6, %ymm7 # rnd_1: 0 - 0 movl %r10d, %ebx rorxl $11, %r9d, %ecx addl 268(%rsp), %r12d vpsrld $10, %ymm6, %ymm9 # rnd_1: 1 - 1 xorl %edx, %ecx xorl %r11d, %ebx rorxl $25, %r9d, %edx vpxor %ymm7, %ymm8, %ymm8 # rnd_1: 2 - 2 andl %r9d, %ebx xorl %ecx, %edx rorxl $13, %r13d, %ecx vpxor %ymm8, %ymm9, %ymm9 # rnd_1: 3 - 3 addl %edx, %r12d rorxl $2, %r13d, %edx xorl %r11d, %ebx vpshufb %ymm12, %ymm9, %ymm9 # rnd_1: 4 - 4 xorl %edx, %ecx rorxl $22, %r13d, %edx addl %ebx, %r12d vpaddd %ymm4, %ymm9, %ymm0 # rnd_1: 5 - 5 xorl %ecx, %edx addl %r12d, %r8d movl %r14d, %ebx vpaddd 384+L_avx2_rorx_sha256_k(%rip), %ymm0, %ymm4 # rnd_1: 6 - 6 xorl %r13d, %ebx addl %edx, %r12d andl %ebx, %eax # rnd_1: 7 - 7 xorl %r14d, %eax rorxl $6, %r8d, %edx addl %eax, %r12d vmovdqu %ymm4, 384(%rsp) # rnd_0: 0 - 0 movl %r9d, %eax rorxl $11, %r8d, %ecx addl 288(%rsp), %r11d vpalignr $4, %ymm1, %ymm2, %ymm5 # rnd_0: 1 - 1 xorl %edx, %ecx xorl %r10d, %eax rorxl $25, %r8d, %edx vpalignr $4, %ymm3, %ymm0, %ymm4 # rnd_0: 2 - 2 andl %r8d, %eax xorl %ecx, %edx rorxl $13, %r12d, %ecx vpsrld $7, %ymm5, %ymm6 # rnd_0: 3 - 3 addl %edx, %r11d rorxl $2, %r12d, %edx xorl %r10d, %eax vpslld $25, %ymm5, %ymm7 # rnd_0: 4 - 4 xorl %edx, %ecx rorxl $22, %r12d, %edx addl %eax, %r11d vpsrld $18, %ymm5, %ymm8 # rnd_0: 5 - 5 xorl %ecx, %edx movl %r13d, %eax addl %r11d, %r15d vpslld $14, %ymm5, %ymm9 # rnd_0: 6 - 6 xorl %r12d, %eax addl %edx, %r11d andl %eax, %ebx vpor %ymm7, %ymm6, %ymm6 # rnd_0: 7 - 7 xorl %r13d, %ebx rorxl $6, %r15d, %edx addl %ebx, %r11d vpor %ymm9, %ymm8, %ymm8 # rnd_1: 0 - 0 movl %r8d, %ebx rorxl $11, %r15d, %ecx addl 292(%rsp), %r10d vpsrld $3, %ymm5, %ymm9 # rnd_1: 1 - 1 xorl %edx, %ecx xorl %r9d, %ebx rorxl $25, %r15d, %edx vpxor %ymm8, %ymm6, %ymm6 # rnd_1: 2 - 2 andl %r15d, %ebx xorl %ecx, %edx rorxl $13, %r11d, %ecx vpshufd $0xfa, %ymm0, %ymm7 # rnd_1: 3 - 3 addl %edx, %r10d rorxl $2, %r11d, %edx xorl %r9d, %ebx vpxor %ymm6, %ymm9, %ymm5 # rnd_1: 4 - 4 xorl %edx, %ecx rorxl $22, %r11d, %edx addl %ebx, %r10d vpsrld $10, %ymm7, %ymm8 # rnd_1: 5 - 5 xorl %ecx, %edx addl %r10d, %r14d movl %r12d, %ebx vpsrlq $19, %ymm7, %ymm6 # rnd_1: 6 - 6 xorl %r11d, %ebx addl %edx, %r10d andl %ebx, %eax vpsrlq $0x11, %ymm7, %ymm7 # rnd_1: 7 - 7 xorl %r12d, %eax rorxl $6, %r14d, %edx addl %eax, %r10d vpaddd %ymm1, %ymm4, %ymm4 # rnd_0: 0 - 0 movl %r15d, %eax rorxl $11, %r14d, %ecx addl 296(%rsp), %r9d vpxor %ymm7, %ymm6, %ymm6 # rnd_0: 1 - 1 xorl %edx, %ecx xorl %r8d, %eax rorxl $25, %r14d, %edx vpxor %ymm6, %ymm8, %ymm8 # rnd_0: 2 - 2 andl %r14d, %eax xorl %ecx, %edx rorxl $13, %r10d, %ecx vpaddd %ymm5, %ymm4, %ymm4 # rnd_0: 3 - 3 addl %edx, %r9d rorxl $2, %r10d, %edx xorl %r8d, %eax vpshufb %ymm11, %ymm8, %ymm8 # rnd_0: 4 - 4 xorl %edx, %ecx rorxl $22, %r10d, %edx addl %eax, %r9d vpaddd %ymm8, %ymm4, %ymm4 # rnd_0: 5 - 5 xorl %ecx, %edx movl %r11d, %eax addl %r9d, %r13d vpshufd $0x50, %ymm4, %ymm6 # rnd_0: 6 - 6 xorl %r10d, %eax addl %edx, %r9d andl %eax, %ebx vpsrlq $0x11, %ymm6, %ymm8 # rnd_0: 7 - 7 xorl %r11d, %ebx rorxl $6, %r13d, %edx addl %ebx, %r9d vpsrlq $19, %ymm6, %ymm7 # rnd_1: 0 - 0 movl %r14d, %ebx rorxl $11, %r13d, %ecx addl 300(%rsp), %r8d vpsrld $10, %ymm6, %ymm9 # rnd_1: 1 - 1 xorl %edx, %ecx xorl %r15d, %ebx rorxl $25, %r13d, %edx vpxor %ymm7, %ymm8, %ymm8 # rnd_1: 2 - 2 andl %r13d, %ebx xorl %ecx, %edx rorxl $13, %r9d, %ecx vpxor %ymm8, %ymm9, %ymm9 # rnd_1: 3 - 3 addl %edx, %r8d rorxl $2, %r9d, %edx xorl %r15d, %ebx vpshufb %ymm12, %ymm9, %ymm9 # rnd_1: 4 - 4 xorl %edx, %ecx rorxl $22, %r9d, %edx addl %ebx, %r8d vpaddd %ymm4, %ymm9, %ymm1 # rnd_1: 5 - 5 xorl %ecx, %edx addl %r8d, %r12d movl %r10d, %ebx vpaddd 416+L_avx2_rorx_sha256_k(%rip), %ymm1, %ymm4 # rnd_1: 6 - 6 xorl %r9d, %ebx addl %edx, %r8d andl %ebx, %eax # rnd_1: 7 - 7 xorl %r10d, %eax rorxl $6, %r12d, %edx addl %eax, %r8d vmovdqu %ymm4, 416(%rsp) # rnd_0: 0 - 0 movl %r13d, %eax rorxl $11, %r12d, %ecx addl 320(%rsp), %r15d vpalignr $4, %ymm2, %ymm3, %ymm5 # rnd_0: 1 - 1 xorl %edx, %ecx xorl %r14d, %eax rorxl $25, %r12d, %edx vpalignr $4, %ymm0, %ymm1, %ymm4 # rnd_0: 2 - 2 andl %r12d, %eax xorl %ecx, %edx rorxl $13, %r8d, %ecx vpsrld $7, %ymm5, %ymm6 # rnd_0: 3 - 3 addl %edx, %r15d rorxl $2, %r8d, %edx xorl %r14d, %eax vpslld $25, %ymm5, %ymm7 # rnd_0: 4 - 4 xorl %edx, %ecx rorxl $22, %r8d, %edx addl %eax, %r15d vpsrld $18, %ymm5, %ymm8 # rnd_0: 5 - 5 xorl %ecx, %edx movl %r9d, %eax addl %r15d, %r11d vpslld $14, %ymm5, %ymm9 # rnd_0: 6 - 6 xorl %r8d, %eax addl %edx, %r15d andl %eax, %ebx vpor %ymm7, %ymm6, %ymm6 # rnd_0: 7 - 7 xorl %r9d, %ebx rorxl $6, %r11d, %edx addl %ebx, %r15d vpor %ymm9, %ymm8, %ymm8 # rnd_1: 0 - 0 movl %r12d, %ebx rorxl $11, %r11d, %ecx addl 324(%rsp), %r14d vpsrld $3, %ymm5, %ymm9 # rnd_1: 1 - 1 xorl %edx, %ecx xorl %r13d, %ebx rorxl $25, %r11d, %edx vpxor %ymm8, %ymm6, %ymm6 # rnd_1: 2 - 2 andl %r11d, %ebx xorl %ecx, %edx rorxl $13, %r15d, %ecx vpshufd $0xfa, %ymm1, %ymm7 # rnd_1: 3 - 3 addl %edx, %r14d rorxl $2, %r15d, %edx xorl %r13d, %ebx vpxor %ymm6, %ymm9, %ymm5 # rnd_1: 4 - 4 xorl %edx, %ecx rorxl $22, %r15d, %edx addl %ebx, %r14d vpsrld $10, %ymm7, %ymm8 # rnd_1: 5 - 5 xorl %ecx, %edx addl %r14d, %r10d movl %r8d, %ebx vpsrlq $19, %ymm7, %ymm6 # rnd_1: 6 - 6 xorl %r15d, %ebx addl %edx, %r14d andl %ebx, %eax vpsrlq $0x11, %ymm7, %ymm7 # rnd_1: 7 - 7 xorl %r8d, %eax rorxl $6, %r10d, %edx addl %eax, %r14d vpaddd %ymm2, %ymm4, %ymm4 # rnd_0: 0 - 0 movl %r11d, %eax rorxl $11, %r10d, %ecx addl 328(%rsp), %r13d vpxor %ymm7, %ymm6, %ymm6 # rnd_0: 1 - 1 xorl %edx, %ecx xorl %r12d, %eax rorxl $25, %r10d, %edx vpxor %ymm6, %ymm8, %ymm8 # rnd_0: 2 - 2 andl %r10d, %eax xorl %ecx, %edx rorxl $13, %r14d, %ecx vpaddd %ymm5, %ymm4, %ymm4 # rnd_0: 3 - 3 addl %edx, %r13d rorxl $2, %r14d, %edx xorl %r12d, %eax vpshufb %ymm11, %ymm8, %ymm8 # rnd_0: 4 - 4 xorl %edx, %ecx rorxl $22, %r14d, %edx addl %eax, %r13d vpaddd %ymm8, %ymm4, %ymm4 # rnd_0: 5 - 5 xorl %ecx, %edx movl %r15d, %eax addl %r13d, %r9d vpshufd $0x50, %ymm4, %ymm6 # rnd_0: 6 - 6 xorl %r14d, %eax addl %edx, %r13d andl %eax, %ebx vpsrlq $0x11, %ymm6, %ymm8 # rnd_0: 7 - 7 xorl %r15d, %ebx rorxl $6, %r9d, %edx addl %ebx, %r13d vpsrlq $19, %ymm6, %ymm7 # rnd_1: 0 - 0 movl %r10d, %ebx rorxl $11, %r9d, %ecx addl 332(%rsp), %r12d vpsrld $10, %ymm6, %ymm9 # rnd_1: 1 - 1 xorl %edx, %ecx xorl %r11d, %ebx rorxl $25, %r9d, %edx vpxor %ymm7, %ymm8, %ymm8 # rnd_1: 2 - 2 andl %r9d, %ebx xorl %ecx, %edx rorxl $13, %r13d, %ecx vpxor %ymm8, %ymm9, %ymm9 # rnd_1: 3 - 3 addl %edx, %r12d rorxl $2, %r13d, %edx xorl %r11d, %ebx vpshufb %ymm12, %ymm9, %ymm9 # rnd_1: 4 - 4 xorl %edx, %ecx rorxl $22, %r13d, %edx addl %ebx, %r12d vpaddd %ymm4, %ymm9, %ymm2 # rnd_1: 5 - 5 xorl %ecx, %edx addl %r12d, %r8d movl %r14d, %ebx vpaddd 448+L_avx2_rorx_sha256_k(%rip), %ymm2, %ymm4 # rnd_1: 6 - 6 xorl %r13d, %ebx addl %edx, %r12d andl %ebx, %eax # rnd_1: 7 - 7 xorl %r14d, %eax rorxl $6, %r8d, %edx addl %eax, %r12d vmovdqu %ymm4, 448(%rsp) # rnd_0: 0 - 0 movl %r9d, %eax rorxl $11, %r8d, %ecx addl 352(%rsp), %r11d vpalignr $4, %ymm3, %ymm0, %ymm5 # rnd_0: 1 - 1 xorl %edx, %ecx xorl %r10d, %eax rorxl $25, %r8d, %edx vpalignr $4, %ymm1, %ymm2, %ymm4 # rnd_0: 2 - 2 andl %r8d, %eax xorl %ecx, %edx rorxl $13, %r12d, %ecx vpsrld $7, %ymm5, %ymm6 # rnd_0: 3 - 3 addl %edx, %r11d rorxl $2, %r12d, %edx xorl %r10d, %eax vpslld $25, %ymm5, %ymm7 # rnd_0: 4 - 4 xorl %edx, %ecx rorxl $22, %r12d, %edx addl %eax, %r11d vpsrld $18, %ymm5, %ymm8 # rnd_0: 5 - 5 xorl %ecx, %edx movl %r13d, %eax addl %r11d, %r15d vpslld $14, %ymm5, %ymm9 # rnd_0: 6 - 6 xorl %r12d, %eax addl %edx, %r11d andl %eax, %ebx vpor %ymm7, %ymm6, %ymm6 # rnd_0: 7 - 7 xorl %r13d, %ebx rorxl $6, %r15d, %edx addl %ebx, %r11d vpor %ymm9, %ymm8, %ymm8 # rnd_1: 0 - 0 movl %r8d, %ebx rorxl $11, %r15d, %ecx addl 356(%rsp), %r10d vpsrld $3, %ymm5, %ymm9 # rnd_1: 1 - 1 xorl %edx, %ecx xorl %r9d, %ebx rorxl $25, %r15d, %edx vpxor %ymm8, %ymm6, %ymm6 # rnd_1: 2 - 2 andl %r15d, %ebx xorl %ecx, %edx rorxl $13, %r11d, %ecx vpshufd $0xfa, %ymm2, %ymm7 # rnd_1: 3 - 3 addl %edx, %r10d rorxl $2, %r11d, %edx xorl %r9d, %ebx vpxor %ymm6, %ymm9, %ymm5 # rnd_1: 4 - 4 xorl %edx, %ecx rorxl $22, %r11d, %edx addl %ebx, %r10d vpsrld $10, %ymm7, %ymm8 # rnd_1: 5 - 5 xorl %ecx, %edx addl %r10d, %r14d movl %r12d, %ebx vpsrlq $19, %ymm7, %ymm6 # rnd_1: 6 - 6 xorl %r11d, %ebx addl %edx, %r10d andl %ebx, %eax vpsrlq $0x11, %ymm7, %ymm7 # rnd_1: 7 - 7 xorl %r12d, %eax rorxl $6, %r14d, %edx addl %eax, %r10d vpaddd %ymm3, %ymm4, %ymm4 # rnd_0: 0 - 0 movl %r15d, %eax rorxl $11, %r14d, %ecx addl 360(%rsp), %r9d vpxor %ymm7, %ymm6, %ymm6 # rnd_0: 1 - 1 xorl %edx, %ecx xorl %r8d, %eax rorxl $25, %r14d, %edx vpxor %ymm6, %ymm8, %ymm8 # rnd_0: 2 - 2 andl %r14d, %eax xorl %ecx, %edx rorxl $13, %r10d, %ecx vpaddd %ymm5, %ymm4, %ymm4 # rnd_0: 3 - 3 addl %edx, %r9d rorxl $2, %r10d, %edx xorl %r8d, %eax vpshufb %ymm11, %ymm8, %ymm8 # rnd_0: 4 - 4 xorl %edx, %ecx rorxl $22, %r10d, %edx addl %eax, %r9d vpaddd %ymm8, %ymm4, %ymm4 # rnd_0: 5 - 5 xorl %ecx, %edx movl %r11d, %eax addl %r9d, %r13d vpshufd $0x50, %ymm4, %ymm6 # rnd_0: 6 - 6 xorl %r10d, %eax addl %edx, %r9d andl %eax, %ebx vpsrlq $0x11, %ymm6, %ymm8 # rnd_0: 7 - 7 xorl %r11d, %ebx rorxl $6, %r13d, %edx addl %ebx, %r9d vpsrlq $19, %ymm6, %ymm7 # rnd_1: 0 - 0 movl %r14d, %ebx rorxl $11, %r13d, %ecx addl 364(%rsp), %r8d vpsrld $10, %ymm6, %ymm9 # rnd_1: 1 - 1 xorl %edx, %ecx xorl %r15d, %ebx rorxl $25, %r13d, %edx vpxor %ymm7, %ymm8, %ymm8 # rnd_1: 2 - 2 andl %r13d, %ebx xorl %ecx, %edx rorxl $13, %r9d, %ecx vpxor %ymm8, %ymm9, %ymm9 # rnd_1: 3 - 3 addl %edx, %r8d rorxl $2, %r9d, %edx xorl %r15d, %ebx vpshufb %ymm12, %ymm9, %ymm9 # rnd_1: 4 - 4 xorl %edx, %ecx rorxl $22, %r9d, %edx addl %ebx, %r8d vpaddd %ymm4, %ymm9, %ymm3 # rnd_1: 5 - 5 xorl %ecx, %edx addl %r8d, %r12d movl %r10d, %ebx vpaddd 480+L_avx2_rorx_sha256_k(%rip), %ymm3, %ymm4 # rnd_1: 6 - 6 xorl %r9d, %ebx addl %edx, %r8d andl %ebx, %eax # rnd_1: 7 - 7 xorl %r10d, %eax rorxl $6, %r12d, %edx addl %eax, %r8d vmovdqu %ymm4, 480(%rsp) xorl %eax, %eax xorl %ecx, %ecx rorxl $6, %r12d, %edx rorxl $11, %r12d, %ecx leal (%r8,%rax,1), %r8d addl 384(%rsp), %r15d movl %r13d, %eax xorl %edx, %ecx xorl %r14d, %eax rorxl $25, %r12d, %edx xorl %ecx, %edx andl %r12d, %eax addl %edx, %r15d rorxl $2, %r8d, %edx rorxl $13, %r8d, %ecx xorl %r14d, %eax xorl %edx, %ecx rorxl $22, %r8d, %edx addl %eax, %r15d xorl %ecx, %edx movl %r9d, %eax addl %r15d, %r11d xorl %r8d, %eax andl %eax, %ebx addl %edx, %r15d xorl %r9d, %ebx rorxl $6, %r11d, %edx rorxl $11, %r11d, %ecx addl %ebx, %r15d addl 388(%rsp), %r14d movl %r12d, %ebx xorl %edx, %ecx xorl %r13d, %ebx rorxl $25, %r11d, %edx xorl %ecx, %edx andl %r11d, %ebx addl %edx, %r14d rorxl $2, %r15d, %edx rorxl $13, %r15d, %ecx xorl %r13d, %ebx xorl %edx, %ecx rorxl $22, %r15d, %edx addl %ebx, %r14d xorl %ecx, %edx movl %r8d, %ebx leal (%r10,%r14,1), %r10d xorl %r15d, %ebx andl %ebx, %eax addl %edx, %r14d xorl %r8d, %eax rorxl $6, %r10d, %edx rorxl $11, %r10d, %ecx leal (%r14,%rax,1), %r14d addl 392(%rsp), %r13d movl %r11d, %eax xorl %edx, %ecx xorl %r12d, %eax rorxl $25, %r10d, %edx xorl %ecx, %edx andl %r10d, %eax addl %edx, %r13d rorxl $2, %r14d, %edx rorxl $13, %r14d, %ecx xorl %r12d, %eax xorl %edx, %ecx rorxl $22, %r14d, %edx addl %eax, %r13d xorl %ecx, %edx movl %r15d, %eax addl %r13d, %r9d xorl %r14d, %eax andl %eax, %ebx addl %edx, %r13d xorl %r15d, %ebx rorxl $6, %r9d, %edx rorxl $11, %r9d, %ecx addl %ebx, %r13d addl 396(%rsp), %r12d movl %r10d, %ebx xorl %edx, %ecx xorl %r11d, %ebx rorxl $25, %r9d, %edx xorl %ecx, %edx andl %r9d, %ebx addl %edx, %r12d rorxl $2, %r13d, %edx rorxl $13, %r13d, %ecx xorl %r11d, %ebx xorl %edx, %ecx rorxl $22, %r13d, %edx addl %ebx, %r12d xorl %ecx, %edx movl %r14d, %ebx leal (%r8,%r12,1), %r8d xorl %r13d, %ebx andl %ebx, %eax addl %edx, %r12d xorl %r14d, %eax rorxl $6, %r8d, %edx rorxl $11, %r8d, %ecx leal (%r12,%rax,1), %r12d addl 416(%rsp), %r11d movl %r9d, %eax xorl %edx, %ecx xorl %r10d, %eax rorxl $25, %r8d, %edx xorl %ecx, %edx andl %r8d, %eax addl %edx, %r11d rorxl $2, %r12d, %edx rorxl $13, %r12d, %ecx xorl %r10d, %eax xorl %edx, %ecx rorxl $22, %r12d, %edx addl %eax, %r11d xorl %ecx, %edx movl %r13d, %eax addl %r11d, %r15d xorl %r12d, %eax andl %eax, %ebx addl %edx, %r11d xorl %r13d, %ebx rorxl $6, %r15d, %edx rorxl $11, %r15d, %ecx addl %ebx, %r11d addl 420(%rsp), %r10d movl %r8d, %ebx xorl %edx, %ecx xorl %r9d, %ebx rorxl $25, %r15d, %edx xorl %ecx, %edx andl %r15d, %ebx addl %edx, %r10d rorxl $2, %r11d, %edx rorxl $13, %r11d, %ecx xorl %r9d, %ebx xorl %edx, %ecx rorxl $22, %r11d, %edx addl %ebx, %r10d xorl %ecx, %edx movl %r12d, %ebx leal (%r14,%r10,1), %r14d xorl %r11d, %ebx andl %ebx, %eax addl %edx, %r10d xorl %r12d, %eax rorxl $6, %r14d, %edx rorxl $11, %r14d, %ecx leal (%r10,%rax,1), %r10d addl 424(%rsp), %r9d movl %r15d, %eax xorl %edx, %ecx xorl %r8d, %eax rorxl $25, %r14d, %edx xorl %ecx, %edx andl %r14d, %eax addl %edx, %r9d rorxl $2, %r10d, %edx rorxl $13, %r10d, %ecx xorl %r8d, %eax xorl %edx, %ecx rorxl $22, %r10d, %edx addl %eax, %r9d xorl %ecx, %edx movl %r11d, %eax addl %r9d, %r13d xorl %r10d, %eax andl %eax, %ebx addl %edx, %r9d xorl %r11d, %ebx rorxl $6, %r13d, %edx rorxl $11, %r13d, %ecx addl %ebx, %r9d addl 428(%rsp), %r8d movl %r14d, %ebx xorl %edx, %ecx xorl %r15d, %ebx rorxl $25, %r13d, %edx xorl %ecx, %edx andl %r13d, %ebx addl %edx, %r8d rorxl $2, %r9d, %edx rorxl $13, %r9d, %ecx xorl %r15d, %ebx xorl %edx, %ecx rorxl $22, %r9d, %edx addl %ebx, %r8d xorl %ecx, %edx movl %r10d, %ebx leal (%r12,%r8,1), %r12d xorl %r9d, %ebx andl %ebx, %eax addl %edx, %r8d xorl %r10d, %eax rorxl $6, %r12d, %edx rorxl $11, %r12d, %ecx leal (%r8,%rax,1), %r8d addl 448(%rsp), %r15d movl %r13d, %eax xorl %edx, %ecx xorl %r14d, %eax rorxl $25, %r12d, %edx xorl %ecx, %edx andl %r12d, %eax addl %edx, %r15d rorxl $2, %r8d, %edx rorxl $13, %r8d, %ecx xorl %r14d, %eax xorl %edx, %ecx rorxl $22, %r8d, %edx addl %eax, %r15d xorl %ecx, %edx movl %r9d, %eax addl %r15d, %r11d xorl %r8d, %eax andl %eax, %ebx addl %edx, %r15d xorl %r9d, %ebx rorxl $6, %r11d, %edx rorxl $11, %r11d, %ecx addl %ebx, %r15d addl 452(%rsp), %r14d movl %r12d, %ebx xorl %edx, %ecx xorl %r13d, %ebx rorxl $25, %r11d, %edx xorl %ecx, %edx andl %r11d, %ebx addl %edx, %r14d rorxl $2, %r15d, %edx rorxl $13, %r15d, %ecx xorl %r13d, %ebx xorl %edx, %ecx rorxl $22, %r15d, %edx addl %ebx, %r14d xorl %ecx, %edx movl %r8d, %ebx leal (%r10,%r14,1), %r10d xorl %r15d, %ebx andl %ebx, %eax addl %edx, %r14d xorl %r8d, %eax rorxl $6, %r10d, %edx rorxl $11, %r10d, %ecx leal (%r14,%rax,1), %r14d addl 456(%rsp), %r13d movl %r11d, %eax xorl %edx, %ecx xorl %r12d, %eax rorxl $25, %r10d, %edx xorl %ecx, %edx andl %r10d, %eax addl %edx, %r13d rorxl $2, %r14d, %edx rorxl $13, %r14d, %ecx xorl %r12d, %eax xorl %edx, %ecx rorxl $22, %r14d, %edx addl %eax, %r13d xorl %ecx, %edx movl %r15d, %eax addl %r13d, %r9d xorl %r14d, %eax andl %eax, %ebx addl %edx, %r13d xorl %r15d, %ebx rorxl $6, %r9d, %edx rorxl $11, %r9d, %ecx addl %ebx, %r13d addl 460(%rsp), %r12d movl %r10d, %ebx xorl %edx, %ecx xorl %r11d, %ebx rorxl $25, %r9d, %edx xorl %ecx, %edx andl %r9d, %ebx addl %edx, %r12d rorxl $2, %r13d, %edx rorxl $13, %r13d, %ecx xorl %r11d, %ebx xorl %edx, %ecx rorxl $22, %r13d, %edx addl %ebx, %r12d xorl %ecx, %edx movl %r14d, %ebx leal (%r8,%r12,1), %r8d xorl %r13d, %ebx andl %ebx, %eax addl %edx, %r12d xorl %r14d, %eax rorxl $6, %r8d, %edx rorxl $11, %r8d, %ecx leal (%r12,%rax,1), %r12d addl 480(%rsp), %r11d movl %r9d, %eax xorl %edx, %ecx xorl %r10d, %eax rorxl $25, %r8d, %edx xorl %ecx, %edx andl %r8d, %eax addl %edx, %r11d rorxl $2, %r12d, %edx rorxl $13, %r12d, %ecx xorl %r10d, %eax xorl %edx, %ecx rorxl $22, %r12d, %edx addl %eax, %r11d xorl %ecx, %edx movl %r13d, %eax addl %r11d, %r15d xorl %r12d, %eax andl %eax, %ebx addl %edx, %r11d xorl %r13d, %ebx rorxl $6, %r15d, %edx rorxl $11, %r15d, %ecx addl %ebx, %r11d addl 484(%rsp), %r10d movl %r8d, %ebx xorl %edx, %ecx xorl %r9d, %ebx rorxl $25, %r15d, %edx xorl %ecx, %edx andl %r15d, %ebx addl %edx, %r10d rorxl $2, %r11d, %edx rorxl $13, %r11d, %ecx xorl %r9d, %ebx xorl %edx, %ecx rorxl $22, %r11d, %edx addl %ebx, %r10d xorl %ecx, %edx movl %r12d, %ebx leal (%r14,%r10,1), %r14d xorl %r11d, %ebx andl %ebx, %eax addl %edx, %r10d xorl %r12d, %eax rorxl $6, %r14d, %edx rorxl $11, %r14d, %ecx leal (%r10,%rax,1), %r10d addl 488(%rsp), %r9d movl %r15d, %eax xorl %edx, %ecx xorl %r8d, %eax rorxl $25, %r14d, %edx xorl %ecx, %edx andl %r14d, %eax addl %edx, %r9d rorxl $2, %r10d, %edx rorxl $13, %r10d, %ecx xorl %r8d, %eax xorl %edx, %ecx rorxl $22, %r10d, %edx addl %eax, %r9d xorl %ecx, %edx movl %r11d, %eax addl %r9d, %r13d xorl %r10d, %eax andl %eax, %ebx addl %edx, %r9d xorl %r11d, %ebx rorxl $6, %r13d, %edx rorxl $11, %r13d, %ecx addl %ebx, %r9d addl 492(%rsp), %r8d movl %r14d, %ebx xorl %edx, %ecx xorl %r15d, %ebx rorxl $25, %r13d, %edx xorl %ecx, %edx andl %r13d, %ebx addl %edx, %r8d rorxl $2, %r9d, %edx rorxl $13, %r9d, %ecx xorl %r15d, %ebx xorl %edx, %ecx rorxl $22, %r9d, %edx addl %ebx, %r8d xorl %ecx, %edx movl %r10d, %ebx leal (%r12,%r8,1), %r12d xorl %r9d, %ebx andl %ebx, %eax addl %edx, %r8d xorl %r10d, %eax addl %eax, %r8d xorl %ecx, %ecx addl (%rdi), %r8d addl 4(%rdi), %r9d addl 8(%rdi), %r10d addl 12(%rdi), %r11d addl 16(%rdi), %r12d addl 20(%rdi), %r13d addl 24(%rdi), %r14d addl 28(%rdi), %r15d movl %r8d, (%rdi) movl %r9d, 4(%rdi) movl %r10d, 8(%rdi) movl %r11d, 12(%rdi) movl %r12d, 16(%rdi) movl %r13d, 20(%rdi) movl %r14d, 24(%rdi) movl %r15d, 28(%rdi) movl %r9d, %ebx xorl %eax, %eax xorl %r10d, %ebx rorxl $6, %r12d, %edx rorxl $11, %r12d, %ecx leal (%r8,%rax,1), %r8d addl 16(%rsp), %r15d movl %r13d, %eax xorl %edx, %ecx xorl %r14d, %eax rorxl $25, %r12d, %edx xorl %ecx, %edx andl %r12d, %eax addl %edx, %r15d rorxl $2, %r8d, %edx rorxl $13, %r8d, %ecx xorl %r14d, %eax xorl %edx, %ecx rorxl $22, %r8d, %edx addl %eax, %r15d xorl %ecx, %edx movl %r9d, %eax addl %r15d, %r11d xorl %r8d, %eax andl %eax, %ebx addl %edx, %r15d xorl %r9d, %ebx rorxl $6, %r11d, %edx rorxl $11, %r11d, %ecx addl %ebx, %r15d addl 20(%rsp), %r14d movl %r12d, %ebx xorl %edx, %ecx xorl %r13d, %ebx rorxl $25, %r11d, %edx xorl %ecx, %edx andl %r11d, %ebx addl %edx, %r14d rorxl $2, %r15d, %edx rorxl $13, %r15d, %ecx xorl %r13d, %ebx xorl %edx, %ecx rorxl $22, %r15d, %edx addl %ebx, %r14d xorl %ecx, %edx movl %r8d, %ebx leal (%r10,%r14,1), %r10d xorl %r15d, %ebx andl %ebx, %eax addl %edx, %r14d xorl %r8d, %eax rorxl $6, %r10d, %edx rorxl $11, %r10d, %ecx leal (%r14,%rax,1), %r14d addl 24(%rsp), %r13d movl %r11d, %eax xorl %edx, %ecx xorl %r12d, %eax rorxl $25, %r10d, %edx xorl %ecx, %edx andl %r10d, %eax addl %edx, %r13d rorxl $2, %r14d, %edx rorxl $13, %r14d, %ecx xorl %r12d, %eax xorl %edx, %ecx rorxl $22, %r14d, %edx addl %eax, %r13d xorl %ecx, %edx movl %r15d, %eax addl %r13d, %r9d xorl %r14d, %eax andl %eax, %ebx addl %edx, %r13d xorl %r15d, %ebx rorxl $6, %r9d, %edx rorxl $11, %r9d, %ecx addl %ebx, %r13d addl 28(%rsp), %r12d movl %r10d, %ebx xorl %edx, %ecx xorl %r11d, %ebx rorxl $25, %r9d, %edx xorl %ecx, %edx andl %r9d, %ebx addl %edx, %r12d rorxl $2, %r13d, %edx rorxl $13, %r13d, %ecx xorl %r11d, %ebx xorl %edx, %ecx rorxl $22, %r13d, %edx addl %ebx, %r12d xorl %ecx, %edx movl %r14d, %ebx leal (%r8,%r12,1), %r8d xorl %r13d, %ebx andl %ebx, %eax addl %edx, %r12d xorl %r14d, %eax rorxl $6, %r8d, %edx rorxl $11, %r8d, %ecx leal (%r12,%rax,1), %r12d addl 48(%rsp), %r11d movl %r9d, %eax xorl %edx, %ecx xorl %r10d, %eax rorxl $25, %r8d, %edx xorl %ecx, %edx andl %r8d, %eax addl %edx, %r11d rorxl $2, %r12d, %edx rorxl $13, %r12d, %ecx xorl %r10d, %eax xorl %edx, %ecx rorxl $22, %r12d, %edx addl %eax, %r11d xorl %ecx, %edx movl %r13d, %eax addl %r11d, %r15d xorl %r12d, %eax andl %eax, %ebx addl %edx, %r11d xorl %r13d, %ebx rorxl $6, %r15d, %edx rorxl $11, %r15d, %ecx addl %ebx, %r11d addl 52(%rsp), %r10d movl %r8d, %ebx xorl %edx, %ecx xorl %r9d, %ebx rorxl $25, %r15d, %edx xorl %ecx, %edx andl %r15d, %ebx addl %edx, %r10d rorxl $2, %r11d, %edx rorxl $13, %r11d, %ecx xorl %r9d, %ebx xorl %edx, %ecx rorxl $22, %r11d, %edx addl %ebx, %r10d xorl %ecx, %edx movl %r12d, %ebx leal (%r14,%r10,1), %r14d xorl %r11d, %ebx andl %ebx, %eax addl %edx, %r10d xorl %r12d, %eax rorxl $6, %r14d, %edx rorxl $11, %r14d, %ecx leal (%r10,%rax,1), %r10d addl 56(%rsp), %r9d movl %r15d, %eax xorl %edx, %ecx xorl %r8d, %eax rorxl $25, %r14d, %edx xorl %ecx, %edx andl %r14d, %eax addl %edx, %r9d rorxl $2, %r10d, %edx rorxl $13, %r10d, %ecx xorl %r8d, %eax xorl %edx, %ecx rorxl $22, %r10d, %edx addl %eax, %r9d xorl %ecx, %edx movl %r11d, %eax addl %r9d, %r13d xorl %r10d, %eax andl %eax, %ebx addl %edx, %r9d xorl %r11d, %ebx rorxl $6, %r13d, %edx rorxl $11, %r13d, %ecx addl %ebx, %r9d addl 60(%rsp), %r8d movl %r14d, %ebx xorl %edx, %ecx xorl %r15d, %ebx rorxl $25, %r13d, %edx xorl %ecx, %edx andl %r13d, %ebx addl %edx, %r8d rorxl $2, %r9d, %edx rorxl $13, %r9d, %ecx xorl %r15d, %ebx xorl %edx, %ecx rorxl $22, %r9d, %edx addl %ebx, %r8d xorl %ecx, %edx movl %r10d, %ebx leal (%r12,%r8,1), %r12d xorl %r9d, %ebx andl %ebx, %eax addl %edx, %r8d xorl %r10d, %eax rorxl $6, %r12d, %edx rorxl $11, %r12d, %ecx leal (%r8,%rax,1), %r8d addl 80(%rsp), %r15d movl %r13d, %eax xorl %edx, %ecx xorl %r14d, %eax rorxl $25, %r12d, %edx xorl %ecx, %edx andl %r12d, %eax addl %edx, %r15d rorxl $2, %r8d, %edx rorxl $13, %r8d, %ecx xorl %r14d, %eax xorl %edx, %ecx rorxl $22, %r8d, %edx addl %eax, %r15d xorl %ecx, %edx movl %r9d, %eax addl %r15d, %r11d xorl %r8d, %eax andl %eax, %ebx addl %edx, %r15d xorl %r9d, %ebx rorxl $6, %r11d, %edx rorxl $11, %r11d, %ecx addl %ebx, %r15d addl 84(%rsp), %r14d movl %r12d, %ebx xorl %edx, %ecx xorl %r13d, %ebx rorxl $25, %r11d, %edx xorl %ecx, %edx andl %r11d, %ebx addl %edx, %r14d rorxl $2, %r15d, %edx rorxl $13, %r15d, %ecx xorl %r13d, %ebx xorl %edx, %ecx rorxl $22, %r15d, %edx addl %ebx, %r14d xorl %ecx, %edx movl %r8d, %ebx leal (%r10,%r14,1), %r10d xorl %r15d, %ebx andl %ebx, %eax addl %edx, %r14d xorl %r8d, %eax rorxl $6, %r10d, %edx rorxl $11, %r10d, %ecx leal (%r14,%rax,1), %r14d addl 88(%rsp), %r13d movl %r11d, %eax xorl %edx, %ecx xorl %r12d, %eax rorxl $25, %r10d, %edx xorl %ecx, %edx andl %r10d, %eax addl %edx, %r13d rorxl $2, %r14d, %edx rorxl $13, %r14d, %ecx xorl %r12d, %eax xorl %edx, %ecx rorxl $22, %r14d, %edx addl %eax, %r13d xorl %ecx, %edx movl %r15d, %eax addl %r13d, %r9d xorl %r14d, %eax andl %eax, %ebx addl %edx, %r13d xorl %r15d, %ebx rorxl $6, %r9d, %edx rorxl $11, %r9d, %ecx addl %ebx, %r13d addl 92(%rsp), %r12d movl %r10d, %ebx xorl %edx, %ecx xorl %r11d, %ebx rorxl $25, %r9d, %edx xorl %ecx, %edx andl %r9d, %ebx addl %edx, %r12d rorxl $2, %r13d, %edx rorxl $13, %r13d, %ecx xorl %r11d, %ebx xorl %edx, %ecx rorxl $22, %r13d, %edx addl %ebx, %r12d xorl %ecx, %edx movl %r14d, %ebx leal (%r8,%r12,1), %r8d xorl %r13d, %ebx andl %ebx, %eax addl %edx, %r12d xorl %r14d, %eax rorxl $6, %r8d, %edx rorxl $11, %r8d, %ecx leal (%r12,%rax,1), %r12d addl 112(%rsp), %r11d movl %r9d, %eax xorl %edx, %ecx xorl %r10d, %eax rorxl $25, %r8d, %edx xorl %ecx, %edx andl %r8d, %eax addl %edx, %r11d rorxl $2, %r12d, %edx rorxl $13, %r12d, %ecx xorl %r10d, %eax xorl %edx, %ecx rorxl $22, %r12d, %edx addl %eax, %r11d xorl %ecx, %edx movl %r13d, %eax addl %r11d, %r15d xorl %r12d, %eax andl %eax, %ebx addl %edx, %r11d xorl %r13d, %ebx rorxl $6, %r15d, %edx rorxl $11, %r15d, %ecx addl %ebx, %r11d addl 116(%rsp), %r10d movl %r8d, %ebx xorl %edx, %ecx xorl %r9d, %ebx rorxl $25, %r15d, %edx xorl %ecx, %edx andl %r15d, %ebx addl %edx, %r10d rorxl $2, %r11d, %edx rorxl $13, %r11d, %ecx xorl %r9d, %ebx xorl %edx, %ecx rorxl $22, %r11d, %edx addl %ebx, %r10d xorl %ecx, %edx movl %r12d, %ebx leal (%r14,%r10,1), %r14d xorl %r11d, %ebx andl %ebx, %eax addl %edx, %r10d xorl %r12d, %eax rorxl $6, %r14d, %edx rorxl $11, %r14d, %ecx leal (%r10,%rax,1), %r10d addl 120(%rsp), %r9d movl %r15d, %eax xorl %edx, %ecx xorl %r8d, %eax rorxl $25, %r14d, %edx xorl %ecx, %edx andl %r14d, %eax addl %edx, %r9d rorxl $2, %r10d, %edx rorxl $13, %r10d, %ecx xorl %r8d, %eax xorl %edx, %ecx rorxl $22, %r10d, %edx addl %eax, %r9d xorl %ecx, %edx movl %r11d, %eax addl %r9d, %r13d xorl %r10d, %eax andl %eax, %ebx addl %edx, %r9d xorl %r11d, %ebx rorxl $6, %r13d, %edx rorxl $11, %r13d, %ecx addl %ebx, %r9d addl 124(%rsp), %r8d movl %r14d, %ebx xorl %edx, %ecx xorl %r15d, %ebx rorxl $25, %r13d, %edx xorl %ecx, %edx andl %r13d, %ebx addl %edx, %r8d rorxl $2, %r9d, %edx rorxl $13, %r9d, %ecx xorl %r15d, %ebx xorl %edx, %ecx rorxl $22, %r9d, %edx addl %ebx, %r8d xorl %ecx, %edx movl %r10d, %ebx leal (%r12,%r8,1), %r12d xorl %r9d, %ebx andl %ebx, %eax addl %edx, %r8d xorl %r10d, %eax rorxl $6, %r12d, %edx rorxl $11, %r12d, %ecx leal (%r8,%rax,1), %r8d addl 144(%rsp), %r15d movl %r13d, %eax xorl %edx, %ecx xorl %r14d, %eax rorxl $25, %r12d, %edx xorl %ecx, %edx andl %r12d, %eax addl %edx, %r15d rorxl $2, %r8d, %edx rorxl $13, %r8d, %ecx xorl %r14d, %eax xorl %edx, %ecx rorxl $22, %r8d, %edx addl %eax, %r15d xorl %ecx, %edx movl %r9d, %eax addl %r15d, %r11d xorl %r8d, %eax andl %eax, %ebx addl %edx, %r15d xorl %r9d, %ebx rorxl $6, %r11d, %edx rorxl $11, %r11d, %ecx addl %ebx, %r15d addl 148(%rsp), %r14d movl %r12d, %ebx xorl %edx, %ecx xorl %r13d, %ebx rorxl $25, %r11d, %edx xorl %ecx, %edx andl %r11d, %ebx addl %edx, %r14d rorxl $2, %r15d, %edx rorxl $13, %r15d, %ecx xorl %r13d, %ebx xorl %edx, %ecx rorxl $22, %r15d, %edx addl %ebx, %r14d xorl %ecx, %edx movl %r8d, %ebx leal (%r10,%r14,1), %r10d xorl %r15d, %ebx andl %ebx, %eax addl %edx, %r14d xorl %r8d, %eax rorxl $6, %r10d, %edx rorxl $11, %r10d, %ecx leal (%r14,%rax,1), %r14d addl 152(%rsp), %r13d movl %r11d, %eax xorl %edx, %ecx xorl %r12d, %eax rorxl $25, %r10d, %edx xorl %ecx, %edx andl %r10d, %eax addl %edx, %r13d rorxl $2, %r14d, %edx rorxl $13, %r14d, %ecx xorl %r12d, %eax xorl %edx, %ecx rorxl $22, %r14d, %edx addl %eax, %r13d xorl %ecx, %edx movl %r15d, %eax addl %r13d, %r9d xorl %r14d, %eax andl %eax, %ebx addl %edx, %r13d xorl %r15d, %ebx rorxl $6, %r9d, %edx rorxl $11, %r9d, %ecx addl %ebx, %r13d addl 156(%rsp), %r12d movl %r10d, %ebx xorl %edx, %ecx xorl %r11d, %ebx rorxl $25, %r9d, %edx xorl %ecx, %edx andl %r9d, %ebx addl %edx, %r12d rorxl $2, %r13d, %edx rorxl $13, %r13d, %ecx xorl %r11d, %ebx xorl %edx, %ecx rorxl $22, %r13d, %edx addl %ebx, %r12d xorl %ecx, %edx movl %r14d, %ebx leal (%r8,%r12,1), %r8d xorl %r13d, %ebx andl %ebx, %eax addl %edx, %r12d xorl %r14d, %eax rorxl $6, %r8d, %edx rorxl $11, %r8d, %ecx leal (%r12,%rax,1), %r12d addl 176(%rsp), %r11d movl %r9d, %eax xorl %edx, %ecx xorl %r10d, %eax rorxl $25, %r8d, %edx xorl %ecx, %edx andl %r8d, %eax addl %edx, %r11d rorxl $2, %r12d, %edx rorxl $13, %r12d, %ecx xorl %r10d, %eax xorl %edx, %ecx rorxl $22, %r12d, %edx addl %eax, %r11d xorl %ecx, %edx movl %r13d, %eax addl %r11d, %r15d xorl %r12d, %eax andl %eax, %ebx addl %edx, %r11d xorl %r13d, %ebx rorxl $6, %r15d, %edx rorxl $11, %r15d, %ecx addl %ebx, %r11d addl 180(%rsp), %r10d movl %r8d, %ebx xorl %edx, %ecx xorl %r9d, %ebx rorxl $25, %r15d, %edx xorl %ecx, %edx andl %r15d, %ebx addl %edx, %r10d rorxl $2, %r11d, %edx rorxl $13, %r11d, %ecx xorl %r9d, %ebx xorl %edx, %ecx rorxl $22, %r11d, %edx addl %ebx, %r10d xorl %ecx, %edx movl %r12d, %ebx leal (%r14,%r10,1), %r14d xorl %r11d, %ebx andl %ebx, %eax addl %edx, %r10d xorl %r12d, %eax rorxl $6, %r14d, %edx rorxl $11, %r14d, %ecx leal (%r10,%rax,1), %r10d addl 184(%rsp), %r9d movl %r15d, %eax xorl %edx, %ecx xorl %r8d, %eax rorxl $25, %r14d, %edx xorl %ecx, %edx andl %r14d, %eax addl %edx, %r9d rorxl $2, %r10d, %edx rorxl $13, %r10d, %ecx xorl %r8d, %eax xorl %edx, %ecx rorxl $22, %r10d, %edx addl %eax, %r9d xorl %ecx, %edx movl %r11d, %eax addl %r9d, %r13d xorl %r10d, %eax andl %eax, %ebx addl %edx, %r9d xorl %r11d, %ebx rorxl $6, %r13d, %edx rorxl $11, %r13d, %ecx addl %ebx, %r9d addl 188(%rsp), %r8d movl %r14d, %ebx xorl %edx, %ecx xorl %r15d, %ebx rorxl $25, %r13d, %edx xorl %ecx, %edx andl %r13d, %ebx addl %edx, %r8d rorxl $2, %r9d, %edx rorxl $13, %r9d, %ecx xorl %r15d, %ebx xorl %edx, %ecx rorxl $22, %r9d, %edx addl %ebx, %r8d xorl %ecx, %edx movl %r10d, %ebx leal (%r12,%r8,1), %r12d xorl %r9d, %ebx andl %ebx, %eax addl %edx, %r8d xorl %r10d, %eax rorxl $6, %r12d, %edx rorxl $11, %r12d, %ecx leal (%r8,%rax,1), %r8d addl 208(%rsp), %r15d movl %r13d, %eax xorl %edx, %ecx xorl %r14d, %eax rorxl $25, %r12d, %edx xorl %ecx, %edx andl %r12d, %eax addl %edx, %r15d rorxl $2, %r8d, %edx rorxl $13, %r8d, %ecx xorl %r14d, %eax xorl %edx, %ecx rorxl $22, %r8d, %edx addl %eax, %r15d xorl %ecx, %edx movl %r9d, %eax addl %r15d, %r11d xorl %r8d, %eax andl %eax, %ebx addl %edx, %r15d xorl %r9d, %ebx rorxl $6, %r11d, %edx rorxl $11, %r11d, %ecx addl %ebx, %r15d addl 212(%rsp), %r14d movl %r12d, %ebx xorl %edx, %ecx xorl %r13d, %ebx rorxl $25, %r11d, %edx xorl %ecx, %edx andl %r11d, %ebx addl %edx, %r14d rorxl $2, %r15d, %edx rorxl $13, %r15d, %ecx xorl %r13d, %ebx xorl %edx, %ecx rorxl $22, %r15d, %edx addl %ebx, %r14d xorl %ecx, %edx movl %r8d, %ebx leal (%r10,%r14,1), %r10d xorl %r15d, %ebx andl %ebx, %eax addl %edx, %r14d xorl %r8d, %eax rorxl $6, %r10d, %edx rorxl $11, %r10d, %ecx leal (%r14,%rax,1), %r14d addl 216(%rsp), %r13d movl %r11d, %eax xorl %edx, %ecx xorl %r12d, %eax rorxl $25, %r10d, %edx xorl %ecx, %edx andl %r10d, %eax addl %edx, %r13d rorxl $2, %r14d, %edx rorxl $13, %r14d, %ecx xorl %r12d, %eax xorl %edx, %ecx rorxl $22, %r14d, %edx addl %eax, %r13d xorl %ecx, %edx movl %r15d, %eax addl %r13d, %r9d xorl %r14d, %eax andl %eax, %ebx addl %edx, %r13d xorl %r15d, %ebx rorxl $6, %r9d, %edx rorxl $11, %r9d, %ecx addl %ebx, %r13d addl 220(%rsp), %r12d movl %r10d, %ebx xorl %edx, %ecx xorl %r11d, %ebx rorxl $25, %r9d, %edx xorl %ecx, %edx andl %r9d, %ebx addl %edx, %r12d rorxl $2, %r13d, %edx rorxl $13, %r13d, %ecx xorl %r11d, %ebx xorl %edx, %ecx rorxl $22, %r13d, %edx addl %ebx, %r12d xorl %ecx, %edx movl %r14d, %ebx leal (%r8,%r12,1), %r8d xorl %r13d, %ebx andl %ebx, %eax addl %edx, %r12d xorl %r14d, %eax rorxl $6, %r8d, %edx rorxl $11, %r8d, %ecx leal (%r12,%rax,1), %r12d addl 240(%rsp), %r11d movl %r9d, %eax xorl %edx, %ecx xorl %r10d, %eax rorxl $25, %r8d, %edx xorl %ecx, %edx andl %r8d, %eax addl %edx, %r11d rorxl $2, %r12d, %edx rorxl $13, %r12d, %ecx xorl %r10d, %eax xorl %edx, %ecx rorxl $22, %r12d, %edx addl %eax, %r11d xorl %ecx, %edx movl %r13d, %eax addl %r11d, %r15d xorl %r12d, %eax andl %eax, %ebx addl %edx, %r11d xorl %r13d, %ebx rorxl $6, %r15d, %edx rorxl $11, %r15d, %ecx addl %ebx, %r11d addl 244(%rsp), %r10d movl %r8d, %ebx xorl %edx, %ecx xorl %r9d, %ebx rorxl $25, %r15d, %edx xorl %ecx, %edx andl %r15d, %ebx addl %edx, %r10d rorxl $2, %r11d, %edx rorxl $13, %r11d, %ecx xorl %r9d, %ebx xorl %edx, %ecx rorxl $22, %r11d, %edx addl %ebx, %r10d xorl %ecx, %edx movl %r12d, %ebx leal (%r14,%r10,1), %r14d xorl %r11d, %ebx andl %ebx, %eax addl %edx, %r10d xorl %r12d, %eax rorxl $6, %r14d, %edx rorxl $11, %r14d, %ecx leal (%r10,%rax,1), %r10d addl 248(%rsp), %r9d movl %r15d, %eax xorl %edx, %ecx xorl %r8d, %eax rorxl $25, %r14d, %edx xorl %ecx, %edx andl %r14d, %eax addl %edx, %r9d rorxl $2, %r10d, %edx rorxl $13, %r10d, %ecx xorl %r8d, %eax xorl %edx, %ecx rorxl $22, %r10d, %edx addl %eax, %r9d xorl %ecx, %edx movl %r11d, %eax addl %r9d, %r13d xorl %r10d, %eax andl %eax, %ebx addl %edx, %r9d xorl %r11d, %ebx rorxl $6, %r13d, %edx rorxl $11, %r13d, %ecx addl %ebx, %r9d addl 252(%rsp), %r8d movl %r14d, %ebx xorl %edx, %ecx xorl %r15d, %ebx rorxl $25, %r13d, %edx xorl %ecx, %edx andl %r13d, %ebx addl %edx, %r8d rorxl $2, %r9d, %edx rorxl $13, %r9d, %ecx xorl %r15d, %ebx xorl %edx, %ecx rorxl $22, %r9d, %edx addl %ebx, %r8d xorl %ecx, %edx movl %r10d, %ebx leal (%r12,%r8,1), %r12d xorl %r9d, %ebx andl %ebx, %eax addl %edx, %r8d xorl %r10d, %eax rorxl $6, %r12d, %edx rorxl $11, %r12d, %ecx leal (%r8,%rax,1), %r8d addl 272(%rsp), %r15d movl %r13d, %eax xorl %edx, %ecx xorl %r14d, %eax rorxl $25, %r12d, %edx xorl %ecx, %edx andl %r12d, %eax addl %edx, %r15d rorxl $2, %r8d, %edx rorxl $13, %r8d, %ecx xorl %r14d, %eax xorl %edx, %ecx rorxl $22, %r8d, %edx addl %eax, %r15d xorl %ecx, %edx movl %r9d, %eax addl %r15d, %r11d xorl %r8d, %eax andl %eax, %ebx addl %edx, %r15d xorl %r9d, %ebx rorxl $6, %r11d, %edx rorxl $11, %r11d, %ecx addl %ebx, %r15d addl 276(%rsp), %r14d movl %r12d, %ebx xorl %edx, %ecx xorl %r13d, %ebx rorxl $25, %r11d, %edx xorl %ecx, %edx andl %r11d, %ebx addl %edx, %r14d rorxl $2, %r15d, %edx rorxl $13, %r15d, %ecx xorl %r13d, %ebx xorl %edx, %ecx rorxl $22, %r15d, %edx addl %ebx, %r14d xorl %ecx, %edx movl %r8d, %ebx leal (%r10,%r14,1), %r10d xorl %r15d, %ebx andl %ebx, %eax addl %edx, %r14d xorl %r8d, %eax rorxl $6, %r10d, %edx rorxl $11, %r10d, %ecx leal (%r14,%rax,1), %r14d addl 280(%rsp), %r13d movl %r11d, %eax xorl %edx, %ecx xorl %r12d, %eax rorxl $25, %r10d, %edx xorl %ecx, %edx andl %r10d, %eax addl %edx, %r13d rorxl $2, %r14d, %edx rorxl $13, %r14d, %ecx xorl %r12d, %eax xorl %edx, %ecx rorxl $22, %r14d, %edx addl %eax, %r13d xorl %ecx, %edx movl %r15d, %eax addl %r13d, %r9d xorl %r14d, %eax andl %eax, %ebx addl %edx, %r13d xorl %r15d, %ebx rorxl $6, %r9d, %edx rorxl $11, %r9d, %ecx addl %ebx, %r13d addl 284(%rsp), %r12d movl %r10d, %ebx xorl %edx, %ecx xorl %r11d, %ebx rorxl $25, %r9d, %edx xorl %ecx, %edx andl %r9d, %ebx addl %edx, %r12d rorxl $2, %r13d, %edx rorxl $13, %r13d, %ecx xorl %r11d, %ebx xorl %edx, %ecx rorxl $22, %r13d, %edx addl %ebx, %r12d xorl %ecx, %edx movl %r14d, %ebx leal (%r8,%r12,1), %r8d xorl %r13d, %ebx andl %ebx, %eax addl %edx, %r12d xorl %r14d, %eax rorxl $6, %r8d, %edx rorxl $11, %r8d, %ecx leal (%r12,%rax,1), %r12d addl 304(%rsp), %r11d movl %r9d, %eax xorl %edx, %ecx xorl %r10d, %eax rorxl $25, %r8d, %edx xorl %ecx, %edx andl %r8d, %eax addl %edx, %r11d rorxl $2, %r12d, %edx rorxl $13, %r12d, %ecx xorl %r10d, %eax xorl %edx, %ecx rorxl $22, %r12d, %edx addl %eax, %r11d xorl %ecx, %edx movl %r13d, %eax addl %r11d, %r15d xorl %r12d, %eax andl %eax, %ebx addl %edx, %r11d xorl %r13d, %ebx rorxl $6, %r15d, %edx rorxl $11, %r15d, %ecx addl %ebx, %r11d addl 308(%rsp), %r10d movl %r8d, %ebx xorl %edx, %ecx xorl %r9d, %ebx rorxl $25, %r15d, %edx xorl %ecx, %edx andl %r15d, %ebx addl %edx, %r10d rorxl $2, %r11d, %edx rorxl $13, %r11d, %ecx xorl %r9d, %ebx xorl %edx, %ecx rorxl $22, %r11d, %edx addl %ebx, %r10d xorl %ecx, %edx movl %r12d, %ebx leal (%r14,%r10,1), %r14d xorl %r11d, %ebx andl %ebx, %eax addl %edx, %r10d xorl %r12d, %eax rorxl $6, %r14d, %edx rorxl $11, %r14d, %ecx leal (%r10,%rax,1), %r10d addl 312(%rsp), %r9d movl %r15d, %eax xorl %edx, %ecx xorl %r8d, %eax rorxl $25, %r14d, %edx xorl %ecx, %edx andl %r14d, %eax addl %edx, %r9d rorxl $2, %r10d, %edx rorxl $13, %r10d, %ecx xorl %r8d, %eax xorl %edx, %ecx rorxl $22, %r10d, %edx addl %eax, %r9d xorl %ecx, %edx movl %r11d, %eax addl %r9d, %r13d xorl %r10d, %eax andl %eax, %ebx addl %edx, %r9d xorl %r11d, %ebx rorxl $6, %r13d, %edx rorxl $11, %r13d, %ecx addl %ebx, %r9d addl 316(%rsp), %r8d movl %r14d, %ebx xorl %edx, %ecx xorl %r15d, %ebx rorxl $25, %r13d, %edx xorl %ecx, %edx andl %r13d, %ebx addl %edx, %r8d rorxl $2, %r9d, %edx rorxl $13, %r9d, %ecx xorl %r15d, %ebx xorl %edx, %ecx rorxl $22, %r9d, %edx addl %ebx, %r8d xorl %ecx, %edx movl %r10d, %ebx leal (%r12,%r8,1), %r12d xorl %r9d, %ebx andl %ebx, %eax addl %edx, %r8d xorl %r10d, %eax rorxl $6, %r12d, %edx rorxl $11, %r12d, %ecx leal (%r8,%rax,1), %r8d addl 336(%rsp), %r15d movl %r13d, %eax xorl %edx, %ecx xorl %r14d, %eax rorxl $25, %r12d, %edx xorl %ecx, %edx andl %r12d, %eax addl %edx, %r15d rorxl $2, %r8d, %edx rorxl $13, %r8d, %ecx xorl %r14d, %eax xorl %edx, %ecx rorxl $22, %r8d, %edx addl %eax, %r15d xorl %ecx, %edx movl %r9d, %eax addl %r15d, %r11d xorl %r8d, %eax andl %eax, %ebx addl %edx, %r15d xorl %r9d, %ebx rorxl $6, %r11d, %edx rorxl $11, %r11d, %ecx addl %ebx, %r15d addl 340(%rsp), %r14d movl %r12d, %ebx xorl %edx, %ecx xorl %r13d, %ebx rorxl $25, %r11d, %edx xorl %ecx, %edx andl %r11d, %ebx addl %edx, %r14d rorxl $2, %r15d, %edx rorxl $13, %r15d, %ecx xorl %r13d, %ebx xorl %edx, %ecx rorxl $22, %r15d, %edx addl %ebx, %r14d xorl %ecx, %edx movl %r8d, %ebx leal (%r10,%r14,1), %r10d xorl %r15d, %ebx andl %ebx, %eax addl %edx, %r14d xorl %r8d, %eax rorxl $6, %r10d, %edx rorxl $11, %r10d, %ecx leal (%r14,%rax,1), %r14d addl 344(%rsp), %r13d movl %r11d, %eax xorl %edx, %ecx xorl %r12d, %eax rorxl $25, %r10d, %edx xorl %ecx, %edx andl %r10d, %eax addl %edx, %r13d rorxl $2, %r14d, %edx rorxl $13, %r14d, %ecx xorl %r12d, %eax xorl %edx, %ecx rorxl $22, %r14d, %edx addl %eax, %r13d xorl %ecx, %edx movl %r15d, %eax addl %r13d, %r9d xorl %r14d, %eax andl %eax, %ebx addl %edx, %r13d xorl %r15d, %ebx rorxl $6, %r9d, %edx rorxl $11, %r9d, %ecx addl %ebx, %r13d addl 348(%rsp), %r12d movl %r10d, %ebx xorl %edx, %ecx xorl %r11d, %ebx rorxl $25, %r9d, %edx xorl %ecx, %edx andl %r9d, %ebx addl %edx, %r12d rorxl $2, %r13d, %edx rorxl $13, %r13d, %ecx xorl %r11d, %ebx xorl %edx, %ecx rorxl $22, %r13d, %edx addl %ebx, %r12d xorl %ecx, %edx movl %r14d, %ebx leal (%r8,%r12,1), %r8d xorl %r13d, %ebx andl %ebx, %eax addl %edx, %r12d xorl %r14d, %eax rorxl $6, %r8d, %edx rorxl $11, %r8d, %ecx leal (%r12,%rax,1), %r12d addl 368(%rsp), %r11d movl %r9d, %eax xorl %edx, %ecx xorl %r10d, %eax rorxl $25, %r8d, %edx xorl %ecx, %edx andl %r8d, %eax addl %edx, %r11d rorxl $2, %r12d, %edx rorxl $13, %r12d, %ecx xorl %r10d, %eax xorl %edx, %ecx rorxl $22, %r12d, %edx addl %eax, %r11d xorl %ecx, %edx movl %r13d, %eax addl %r11d, %r15d xorl %r12d, %eax andl %eax, %ebx addl %edx, %r11d xorl %r13d, %ebx rorxl $6, %r15d, %edx rorxl $11, %r15d, %ecx addl %ebx, %r11d addl 372(%rsp), %r10d movl %r8d, %ebx xorl %edx, %ecx xorl %r9d, %ebx rorxl $25, %r15d, %edx xorl %ecx, %edx andl %r15d, %ebx addl %edx, %r10d rorxl $2, %r11d, %edx rorxl $13, %r11d, %ecx xorl %r9d, %ebx xorl %edx, %ecx rorxl $22, %r11d, %edx addl %ebx, %r10d xorl %ecx, %edx movl %r12d, %ebx leal (%r14,%r10,1), %r14d xorl %r11d, %ebx andl %ebx, %eax addl %edx, %r10d xorl %r12d, %eax rorxl $6, %r14d, %edx rorxl $11, %r14d, %ecx leal (%r10,%rax,1), %r10d addl 376(%rsp), %r9d movl %r15d, %eax xorl %edx, %ecx xorl %r8d, %eax rorxl $25, %r14d, %edx xorl %ecx, %edx andl %r14d, %eax addl %edx, %r9d rorxl $2, %r10d, %edx rorxl $13, %r10d, %ecx xorl %r8d, %eax xorl %edx, %ecx rorxl $22, %r10d, %edx addl %eax, %r9d xorl %ecx, %edx movl %r11d, %eax addl %r9d, %r13d xorl %r10d, %eax andl %eax, %ebx addl %edx, %r9d xorl %r11d, %ebx rorxl $6, %r13d, %edx rorxl $11, %r13d, %ecx addl %ebx, %r9d addl 380(%rsp), %r8d movl %r14d, %ebx xorl %edx, %ecx xorl %r15d, %ebx rorxl $25, %r13d, %edx xorl %ecx, %edx andl %r13d, %ebx addl %edx, %r8d rorxl $2, %r9d, %edx rorxl $13, %r9d, %ecx xorl %r15d, %ebx xorl %edx, %ecx rorxl $22, %r9d, %edx addl %ebx, %r8d xorl %ecx, %edx movl %r10d, %ebx leal (%r12,%r8,1), %r12d xorl %r9d, %ebx andl %ebx, %eax addl %edx, %r8d xorl %r10d, %eax rorxl $6, %r12d, %edx rorxl $11, %r12d, %ecx leal (%r8,%rax,1), %r8d addl 400(%rsp), %r15d movl %r13d, %eax xorl %edx, %ecx xorl %r14d, %eax rorxl $25, %r12d, %edx xorl %ecx, %edx andl %r12d, %eax addl %edx, %r15d rorxl $2, %r8d, %edx rorxl $13, %r8d, %ecx xorl %r14d, %eax xorl %edx, %ecx rorxl $22, %r8d, %edx addl %eax, %r15d xorl %ecx, %edx movl %r9d, %eax addl %r15d, %r11d xorl %r8d, %eax andl %eax, %ebx addl %edx, %r15d xorl %r9d, %ebx rorxl $6, %r11d, %edx rorxl $11, %r11d, %ecx addl %ebx, %r15d addl 404(%rsp), %r14d movl %r12d, %ebx xorl %edx, %ecx xorl %r13d, %ebx rorxl $25, %r11d, %edx xorl %ecx, %edx andl %r11d, %ebx addl %edx, %r14d rorxl $2, %r15d, %edx rorxl $13, %r15d, %ecx xorl %r13d, %ebx xorl %edx, %ecx rorxl $22, %r15d, %edx addl %ebx, %r14d xorl %ecx, %edx movl %r8d, %ebx leal (%r10,%r14,1), %r10d xorl %r15d, %ebx andl %ebx, %eax addl %edx, %r14d xorl %r8d, %eax rorxl $6, %r10d, %edx rorxl $11, %r10d, %ecx leal (%r14,%rax,1), %r14d addl 408(%rsp), %r13d movl %r11d, %eax xorl %edx, %ecx xorl %r12d, %eax rorxl $25, %r10d, %edx xorl %ecx, %edx andl %r10d, %eax addl %edx, %r13d rorxl $2, %r14d, %edx rorxl $13, %r14d, %ecx xorl %r12d, %eax xorl %edx, %ecx rorxl $22, %r14d, %edx addl %eax, %r13d xorl %ecx, %edx movl %r15d, %eax addl %r13d, %r9d xorl %r14d, %eax andl %eax, %ebx addl %edx, %r13d xorl %r15d, %ebx rorxl $6, %r9d, %edx rorxl $11, %r9d, %ecx addl %ebx, %r13d addl 412(%rsp), %r12d movl %r10d, %ebx xorl %edx, %ecx xorl %r11d, %ebx rorxl $25, %r9d, %edx xorl %ecx, %edx andl %r9d, %ebx addl %edx, %r12d rorxl $2, %r13d, %edx rorxl $13, %r13d, %ecx xorl %r11d, %ebx xorl %edx, %ecx rorxl $22, %r13d, %edx addl %ebx, %r12d xorl %ecx, %edx movl %r14d, %ebx leal (%r8,%r12,1), %r8d xorl %r13d, %ebx andl %ebx, %eax addl %edx, %r12d xorl %r14d, %eax rorxl $6, %r8d, %edx rorxl $11, %r8d, %ecx leal (%r12,%rax,1), %r12d addl 432(%rsp), %r11d movl %r9d, %eax xorl %edx, %ecx xorl %r10d, %eax rorxl $25, %r8d, %edx xorl %ecx, %edx andl %r8d, %eax addl %edx, %r11d rorxl $2, %r12d, %edx rorxl $13, %r12d, %ecx xorl %r10d, %eax xorl %edx, %ecx rorxl $22, %r12d, %edx addl %eax, %r11d xorl %ecx, %edx movl %r13d, %eax addl %r11d, %r15d xorl %r12d, %eax andl %eax, %ebx addl %edx, %r11d xorl %r13d, %ebx rorxl $6, %r15d, %edx rorxl $11, %r15d, %ecx addl %ebx, %r11d addl 436(%rsp), %r10d movl %r8d, %ebx xorl %edx, %ecx xorl %r9d, %ebx rorxl $25, %r15d, %edx xorl %ecx, %edx andl %r15d, %ebx addl %edx, %r10d rorxl $2, %r11d, %edx rorxl $13, %r11d, %ecx xorl %r9d, %ebx xorl %edx, %ecx rorxl $22, %r11d, %edx addl %ebx, %r10d xorl %ecx, %edx movl %r12d, %ebx leal (%r14,%r10,1), %r14d xorl %r11d, %ebx andl %ebx, %eax addl %edx, %r10d xorl %r12d, %eax rorxl $6, %r14d, %edx rorxl $11, %r14d, %ecx leal (%r10,%rax,1), %r10d addl 440(%rsp), %r9d movl %r15d, %eax xorl %edx, %ecx xorl %r8d, %eax rorxl $25, %r14d, %edx xorl %ecx, %edx andl %r14d, %eax addl %edx, %r9d rorxl $2, %r10d, %edx rorxl $13, %r10d, %ecx xorl %r8d, %eax xorl %edx, %ecx rorxl $22, %r10d, %edx addl %eax, %r9d xorl %ecx, %edx movl %r11d, %eax addl %r9d, %r13d xorl %r10d, %eax andl %eax, %ebx addl %edx, %r9d xorl %r11d, %ebx rorxl $6, %r13d, %edx rorxl $11, %r13d, %ecx addl %ebx, %r9d addl 444(%rsp), %r8d movl %r14d, %ebx xorl %edx, %ecx xorl %r15d, %ebx rorxl $25, %r13d, %edx xorl %ecx, %edx andl %r13d, %ebx addl %edx, %r8d rorxl $2, %r9d, %edx rorxl $13, %r9d, %ecx xorl %r15d, %ebx xorl %edx, %ecx rorxl $22, %r9d, %edx addl %ebx, %r8d xorl %ecx, %edx movl %r10d, %ebx leal (%r12,%r8,1), %r12d xorl %r9d, %ebx andl %ebx, %eax addl %edx, %r8d xorl %r10d, %eax rorxl $6, %r12d, %edx rorxl $11, %r12d, %ecx leal (%r8,%rax,1), %r8d addl 464(%rsp), %r15d movl %r13d, %eax xorl %edx, %ecx xorl %r14d, %eax rorxl $25, %r12d, %edx xorl %ecx, %edx andl %r12d, %eax addl %edx, %r15d rorxl $2, %r8d, %edx rorxl $13, %r8d, %ecx xorl %r14d, %eax xorl %edx, %ecx rorxl $22, %r8d, %edx addl %eax, %r15d xorl %ecx, %edx movl %r9d, %eax addl %r15d, %r11d xorl %r8d, %eax andl %eax, %ebx addl %edx, %r15d xorl %r9d, %ebx rorxl $6, %r11d, %edx rorxl $11, %r11d, %ecx addl %ebx, %r15d addl 468(%rsp), %r14d movl %r12d, %ebx xorl %edx, %ecx xorl %r13d, %ebx rorxl $25, %r11d, %edx xorl %ecx, %edx andl %r11d, %ebx addl %edx, %r14d rorxl $2, %r15d, %edx rorxl $13, %r15d, %ecx xorl %r13d, %ebx xorl %edx, %ecx rorxl $22, %r15d, %edx addl %ebx, %r14d xorl %ecx, %edx movl %r8d, %ebx leal (%r10,%r14,1), %r10d xorl %r15d, %ebx andl %ebx, %eax addl %edx, %r14d xorl %r8d, %eax rorxl $6, %r10d, %edx rorxl $11, %r10d, %ecx leal (%r14,%rax,1), %r14d addl 472(%rsp), %r13d movl %r11d, %eax xorl %edx, %ecx xorl %r12d, %eax rorxl $25, %r10d, %edx xorl %ecx, %edx andl %r10d, %eax addl %edx, %r13d rorxl $2, %r14d, %edx rorxl $13, %r14d, %ecx xorl %r12d, %eax xorl %edx, %ecx rorxl $22, %r14d, %edx addl %eax, %r13d xorl %ecx, %edx movl %r15d, %eax addl %r13d, %r9d xorl %r14d, %eax andl %eax, %ebx addl %edx, %r13d xorl %r15d, %ebx rorxl $6, %r9d, %edx rorxl $11, %r9d, %ecx addl %ebx, %r13d addl 476(%rsp), %r12d movl %r10d, %ebx xorl %edx, %ecx xorl %r11d, %ebx rorxl $25, %r9d, %edx xorl %ecx, %edx andl %r9d, %ebx addl %edx, %r12d rorxl $2, %r13d, %edx rorxl $13, %r13d, %ecx xorl %r11d, %ebx xorl %edx, %ecx rorxl $22, %r13d, %edx addl %ebx, %r12d xorl %ecx, %edx movl %r14d, %ebx leal (%r8,%r12,1), %r8d xorl %r13d, %ebx andl %ebx, %eax addl %edx, %r12d xorl %r14d, %eax rorxl $6, %r8d, %edx rorxl $11, %r8d, %ecx leal (%r12,%rax,1), %r12d addl 496(%rsp), %r11d movl %r9d, %eax xorl %edx, %ecx xorl %r10d, %eax rorxl $25, %r8d, %edx xorl %ecx, %edx andl %r8d, %eax addl %edx, %r11d rorxl $2, %r12d, %edx rorxl $13, %r12d, %ecx xorl %r10d, %eax xorl %edx, %ecx rorxl $22, %r12d, %edx addl %eax, %r11d xorl %ecx, %edx movl %r13d, %eax addl %r11d, %r15d xorl %r12d, %eax andl %eax, %ebx addl %edx, %r11d xorl %r13d, %ebx rorxl $6, %r15d, %edx rorxl $11, %r15d, %ecx addl %ebx, %r11d addl 500(%rsp), %r10d movl %r8d, %ebx xorl %edx, %ecx xorl %r9d, %ebx rorxl $25, %r15d, %edx xorl %ecx, %edx andl %r15d, %ebx addl %edx, %r10d rorxl $2, %r11d, %edx rorxl $13, %r11d, %ecx xorl %r9d, %ebx xorl %edx, %ecx rorxl $22, %r11d, %edx addl %ebx, %r10d xorl %ecx, %edx movl %r12d, %ebx leal (%r14,%r10,1), %r14d xorl %r11d, %ebx andl %ebx, %eax addl %edx, %r10d xorl %r12d, %eax rorxl $6, %r14d, %edx rorxl $11, %r14d, %ecx leal (%r10,%rax,1), %r10d addl 504(%rsp), %r9d movl %r15d, %eax xorl %edx, %ecx xorl %r8d, %eax rorxl $25, %r14d, %edx xorl %ecx, %edx andl %r14d, %eax addl %edx, %r9d rorxl $2, %r10d, %edx rorxl $13, %r10d, %ecx xorl %r8d, %eax xorl %edx, %ecx rorxl $22, %r10d, %edx addl %eax, %r9d xorl %ecx, %edx movl %r11d, %eax addl %r9d, %r13d xorl %r10d, %eax andl %eax, %ebx addl %edx, %r9d xorl %r11d, %ebx rorxl $6, %r13d, %edx rorxl $11, %r13d, %ecx addl %ebx, %r9d addl 508(%rsp), %r8d movl %r14d, %ebx xorl %edx, %ecx xorl %r15d, %ebx rorxl $25, %r13d, %edx xorl %ecx, %edx andl %r13d, %ebx addl %edx, %r8d rorxl $2, %r9d, %edx rorxl $13, %r9d, %ecx xorl %r15d, %ebx xorl %edx, %ecx rorxl $22, %r9d, %edx addl %ebx, %r8d xorl %ecx, %edx movl %r10d, %ebx leal (%r12,%r8,1), %r12d xorl %r9d, %ebx andl %ebx, %eax addl %edx, %r8d xorl %r10d, %eax addl %eax, %r8d addq $0x80, %rbp addl (%rdi), %r8d addl 4(%rdi), %r9d addl 8(%rdi), %r10d addl 12(%rdi), %r11d addl 16(%rdi), %r12d addl 20(%rdi), %r13d addl 24(%rdi), %r14d addl 28(%rdi), %r15d subl $0x80, %esi movl %r8d, (%rdi) movl %r9d, 4(%rdi) movl %r10d, 8(%rdi) movl %r11d, 12(%rdi) movl %r12d, 16(%rdi) movl %r13d, 20(%rdi) movl %r14d, 24(%rdi) movl %r15d, 28(%rdi) jnz L_sha256_len_avx2_rorx_start L_sha256_len_avx2_rorx_done: xorq %rax, %rax vzeroupper addq $0x200, %rsp popq %rbp popq %r15 popq %r14 popq %r13 popq %r12 popq %rbx repz retq #ifndef __APPLE__ .size Transform_Sha256_AVX2_RORX_Len,.-Transform_Sha256_AVX2_RORX_Len #endif /* __APPLE__ */ #endif /* HAVE_INTEL_AVX2 */ #if defined(__linux__) && defined(__ELF__) .section .note.GNU-stack,"",%progbits #endif