chibipub

experimental activitypub node in C
git clone git://jb55.com/chibipub
Log | Files | Refs | README | LICENSE

blake3_sse41_x86-64_unix.S (61143B)


      1 #if defined(__ELF__) && defined(__linux__)
      2 .section .note.GNU-stack,"",%progbits
      3 #endif
      4 
      5 #if defined(__ELF__) && defined(__CET__) && defined(__has_include)
      6 #if __has_include(<cet.h>)
      7 #include <cet.h>
      8 #endif
      9 #endif
     10 
     11 #if !defined(_CET_ENDBR)
     12 #define _CET_ENDBR
     13 #endif
     14 
     15 .intel_syntax noprefix
     16 .global blake3_hash_many_sse41
     17 .global _blake3_hash_many_sse41
     18 .global blake3_compress_in_place_sse41
     19 .global _blake3_compress_in_place_sse41
     20 .global blake3_compress_xof_sse41
     21 .global _blake3_compress_xof_sse41
     22 #ifdef __APPLE__
     23 .text
     24 #else
     25 .section .text
     26 #endif
     27         .p2align  6
     28 _blake3_hash_many_sse41:
     29 blake3_hash_many_sse41:
     30         _CET_ENDBR
     31         push    r15
     32         push    r14
     33         push    r13
     34         push    r12
     35         push    rbx
     36         push    rbp
     37         mov     rbp, rsp
     38         sub     rsp, 360
     39         and     rsp, 0xFFFFFFFFFFFFFFC0
     40         neg     r9d
     41         movd    xmm0, r9d
     42         pshufd  xmm0, xmm0, 0x00
     43         movdqa  xmmword ptr [rsp+0x130], xmm0
     44         movdqa  xmm1, xmm0
     45         pand    xmm1, xmmword ptr [ADD0+rip]
     46         pand    xmm0, xmmword ptr [ADD1+rip]
     47         movdqa  xmmword ptr [rsp+0x150], xmm0
     48         movd    xmm0, r8d
     49         pshufd  xmm0, xmm0, 0x00
     50         paddd   xmm0, xmm1
     51         movdqa  xmmword ptr [rsp+0x110], xmm0
     52         pxor    xmm0, xmmword ptr [CMP_MSB_MASK+rip]
     53         pxor    xmm1, xmmword ptr [CMP_MSB_MASK+rip]
     54         pcmpgtd xmm1, xmm0
     55         shr     r8, 32
     56         movd    xmm2, r8d
     57         pshufd  xmm2, xmm2, 0x00
     58         psubd   xmm2, xmm1
     59         movdqa  xmmword ptr [rsp+0x120], xmm2
     60         mov     rbx, qword ptr [rbp+0x50]
     61         mov     r15, rdx
     62         shl     r15, 6
     63         movzx   r13d, byte ptr [rbp+0x38]
     64         movzx   r12d, byte ptr [rbp+0x48]
     65         cmp     rsi, 4
     66         jc      3f
     67 2:
     68         movdqu  xmm3, xmmword ptr [rcx]
     69         pshufd  xmm0, xmm3, 0x00
     70         pshufd  xmm1, xmm3, 0x55
     71         pshufd  xmm2, xmm3, 0xAA
     72         pshufd  xmm3, xmm3, 0xFF
     73         movdqu  xmm7, xmmword ptr [rcx+0x10]
     74         pshufd  xmm4, xmm7, 0x00
     75         pshufd  xmm5, xmm7, 0x55
     76         pshufd  xmm6, xmm7, 0xAA
     77         pshufd  xmm7, xmm7, 0xFF
     78         mov     r8, qword ptr [rdi]
     79         mov     r9, qword ptr [rdi+0x8]
     80         mov     r10, qword ptr [rdi+0x10]
     81         mov     r11, qword ptr [rdi+0x18]
     82         movzx   eax, byte ptr [rbp+0x40]
     83         or      eax, r13d
     84         xor     edx, edx
     85 9:
     86         mov     r14d, eax
     87         or      eax, r12d
     88         add     rdx, 64
     89         cmp     rdx, r15
     90         cmovne  eax, r14d
     91         movdqu  xmm8, xmmword ptr [r8+rdx-0x40]
     92         movdqu  xmm9, xmmword ptr [r9+rdx-0x40]
     93         movdqu  xmm10, xmmword ptr [r10+rdx-0x40]
     94         movdqu  xmm11, xmmword ptr [r11+rdx-0x40]
     95         movdqa  xmm12, xmm8
     96         punpckldq xmm8, xmm9
     97         punpckhdq xmm12, xmm9
     98         movdqa  xmm14, xmm10
     99         punpckldq xmm10, xmm11
    100         punpckhdq xmm14, xmm11
    101         movdqa  xmm9, xmm8
    102         punpcklqdq xmm8, xmm10
    103         punpckhqdq xmm9, xmm10
    104         movdqa  xmm13, xmm12
    105         punpcklqdq xmm12, xmm14
    106         punpckhqdq xmm13, xmm14
    107         movdqa  xmmword ptr [rsp], xmm8
    108         movdqa  xmmword ptr [rsp+0x10], xmm9
    109         movdqa  xmmword ptr [rsp+0x20], xmm12
    110         movdqa  xmmword ptr [rsp+0x30], xmm13
    111         movdqu  xmm8, xmmword ptr [r8+rdx-0x30]
    112         movdqu  xmm9, xmmword ptr [r9+rdx-0x30]
    113         movdqu  xmm10, xmmword ptr [r10+rdx-0x30]
    114         movdqu  xmm11, xmmword ptr [r11+rdx-0x30]
    115         movdqa  xmm12, xmm8
    116         punpckldq xmm8, xmm9
    117         punpckhdq xmm12, xmm9
    118         movdqa  xmm14, xmm10
    119         punpckldq xmm10, xmm11
    120         punpckhdq xmm14, xmm11
    121         movdqa  xmm9, xmm8
    122         punpcklqdq xmm8, xmm10
    123         punpckhqdq xmm9, xmm10
    124         movdqa  xmm13, xmm12
    125         punpcklqdq xmm12, xmm14
    126         punpckhqdq xmm13, xmm14
    127         movdqa  xmmword ptr [rsp+0x40], xmm8
    128         movdqa  xmmword ptr [rsp+0x50], xmm9
    129         movdqa  xmmword ptr [rsp+0x60], xmm12
    130         movdqa  xmmword ptr [rsp+0x70], xmm13
    131         movdqu  xmm8, xmmword ptr [r8+rdx-0x20]
    132         movdqu  xmm9, xmmword ptr [r9+rdx-0x20]
    133         movdqu  xmm10, xmmword ptr [r10+rdx-0x20]
    134         movdqu  xmm11, xmmword ptr [r11+rdx-0x20]
    135         movdqa  xmm12, xmm8
    136         punpckldq xmm8, xmm9
    137         punpckhdq xmm12, xmm9
    138         movdqa  xmm14, xmm10
    139         punpckldq xmm10, xmm11
    140         punpckhdq xmm14, xmm11
    141         movdqa  xmm9, xmm8
    142         punpcklqdq xmm8, xmm10
    143         punpckhqdq xmm9, xmm10
    144         movdqa  xmm13, xmm12
    145         punpcklqdq xmm12, xmm14
    146         punpckhqdq xmm13, xmm14
    147         movdqa  xmmword ptr [rsp+0x80], xmm8
    148         movdqa  xmmword ptr [rsp+0x90], xmm9
    149         movdqa  xmmword ptr [rsp+0xA0], xmm12
    150         movdqa  xmmword ptr [rsp+0xB0], xmm13
    151         movdqu  xmm8, xmmword ptr [r8+rdx-0x10]
    152         movdqu  xmm9, xmmword ptr [r9+rdx-0x10]
    153         movdqu  xmm10, xmmword ptr [r10+rdx-0x10]
    154         movdqu  xmm11, xmmword ptr [r11+rdx-0x10]
    155         movdqa  xmm12, xmm8
    156         punpckldq xmm8, xmm9
    157         punpckhdq xmm12, xmm9
    158         movdqa  xmm14, xmm10
    159         punpckldq xmm10, xmm11
    160         punpckhdq xmm14, xmm11
    161         movdqa  xmm9, xmm8
    162         punpcklqdq xmm8, xmm10
    163         punpckhqdq xmm9, xmm10
    164         movdqa  xmm13, xmm12
    165         punpcklqdq xmm12, xmm14
    166         punpckhqdq xmm13, xmm14
    167         movdqa  xmmword ptr [rsp+0xC0], xmm8
    168         movdqa  xmmword ptr [rsp+0xD0], xmm9
    169         movdqa  xmmword ptr [rsp+0xE0], xmm12
    170         movdqa  xmmword ptr [rsp+0xF0], xmm13
    171         movdqa  xmm9, xmmword ptr [BLAKE3_IV_1+rip]
    172         movdqa  xmm10, xmmword ptr [BLAKE3_IV_2+rip]
    173         movdqa  xmm11, xmmword ptr [BLAKE3_IV_3+rip]
    174         movdqa  xmm12, xmmword ptr [rsp+0x110]
    175         movdqa  xmm13, xmmword ptr [rsp+0x120]
    176         movdqa  xmm14, xmmword ptr [BLAKE3_BLOCK_LEN+rip]
    177         movd    xmm15, eax
    178         pshufd  xmm15, xmm15, 0x00
    179         prefetcht0 [r8+rdx+0x80]
    180         prefetcht0 [r9+rdx+0x80]
    181         prefetcht0 [r10+rdx+0x80]
    182         prefetcht0 [r11+rdx+0x80]
    183         paddd   xmm0, xmmword ptr [rsp]
    184         paddd   xmm1, xmmword ptr [rsp+0x20]
    185         paddd   xmm2, xmmword ptr [rsp+0x40]
    186         paddd   xmm3, xmmword ptr [rsp+0x60]
    187         paddd   xmm0, xmm4
    188         paddd   xmm1, xmm5
    189         paddd   xmm2, xmm6
    190         paddd   xmm3, xmm7
    191         pxor    xmm12, xmm0
    192         pxor    xmm13, xmm1
    193         pxor    xmm14, xmm2
    194         pxor    xmm15, xmm3
    195         movdqa  xmm8, xmmword ptr [ROT16+rip]
    196         pshufb  xmm12, xmm8
    197         pshufb  xmm13, xmm8
    198         pshufb  xmm14, xmm8
    199         pshufb  xmm15, xmm8
    200         movdqa  xmm8, xmmword ptr [BLAKE3_IV_0+rip]
    201         paddd   xmm8, xmm12
    202         paddd   xmm9, xmm13
    203         paddd   xmm10, xmm14
    204         paddd   xmm11, xmm15
    205         pxor    xmm4, xmm8
    206         pxor    xmm5, xmm9
    207         pxor    xmm6, xmm10
    208         pxor    xmm7, xmm11
    209         movdqa  xmmword ptr [rsp+0x100], xmm8
    210         movdqa  xmm8, xmm4
    211         psrld   xmm8, 12
    212         pslld   xmm4, 20
    213         por     xmm4, xmm8
    214         movdqa  xmm8, xmm5
    215         psrld   xmm8, 12
    216         pslld   xmm5, 20
    217         por     xmm5, xmm8
    218         movdqa  xmm8, xmm6
    219         psrld   xmm8, 12
    220         pslld   xmm6, 20
    221         por     xmm6, xmm8
    222         movdqa  xmm8, xmm7
    223         psrld   xmm8, 12
    224         pslld   xmm7, 20
    225         por     xmm7, xmm8
    226         paddd   xmm0, xmmword ptr [rsp+0x10]
    227         paddd   xmm1, xmmword ptr [rsp+0x30]
    228         paddd   xmm2, xmmword ptr [rsp+0x50]
    229         paddd   xmm3, xmmword ptr [rsp+0x70]
    230         paddd   xmm0, xmm4
    231         paddd   xmm1, xmm5
    232         paddd   xmm2, xmm6
    233         paddd   xmm3, xmm7
    234         pxor    xmm12, xmm0
    235         pxor    xmm13, xmm1
    236         pxor    xmm14, xmm2
    237         pxor    xmm15, xmm3
    238         movdqa  xmm8, xmmword ptr [ROT8+rip]
    239         pshufb  xmm12, xmm8
    240         pshufb  xmm13, xmm8
    241         pshufb  xmm14, xmm8
    242         pshufb  xmm15, xmm8
    243         movdqa  xmm8, xmmword ptr [rsp+0x100]
    244         paddd   xmm8, xmm12
    245         paddd   xmm9, xmm13
    246         paddd   xmm10, xmm14
    247         paddd   xmm11, xmm15
    248         pxor    xmm4, xmm8
    249         pxor    xmm5, xmm9
    250         pxor    xmm6, xmm10
    251         pxor    xmm7, xmm11
    252         movdqa  xmmword ptr [rsp+0x100], xmm8
    253         movdqa  xmm8, xmm4
    254         psrld   xmm8, 7
    255         pslld   xmm4, 25
    256         por     xmm4, xmm8
    257         movdqa  xmm8, xmm5
    258         psrld   xmm8, 7
    259         pslld   xmm5, 25
    260         por     xmm5, xmm8
    261         movdqa  xmm8, xmm6
    262         psrld   xmm8, 7
    263         pslld   xmm6, 25
    264         por     xmm6, xmm8
    265         movdqa  xmm8, xmm7
    266         psrld   xmm8, 7
    267         pslld   xmm7, 25
    268         por     xmm7, xmm8
    269         paddd   xmm0, xmmword ptr [rsp+0x80]
    270         paddd   xmm1, xmmword ptr [rsp+0xA0]
    271         paddd   xmm2, xmmword ptr [rsp+0xC0]
    272         paddd   xmm3, xmmword ptr [rsp+0xE0]
    273         paddd   xmm0, xmm5
    274         paddd   xmm1, xmm6
    275         paddd   xmm2, xmm7
    276         paddd   xmm3, xmm4
    277         pxor    xmm15, xmm0
    278         pxor    xmm12, xmm1
    279         pxor    xmm13, xmm2
    280         pxor    xmm14, xmm3
    281         movdqa  xmm8, xmmword ptr [ROT16+rip]
    282         pshufb  xmm15, xmm8
    283         pshufb  xmm12, xmm8
    284         pshufb  xmm13, xmm8
    285         pshufb  xmm14, xmm8
    286         paddd   xmm10, xmm15
    287         paddd   xmm11, xmm12
    288         movdqa  xmm8, xmmword ptr [rsp+0x100]
    289         paddd   xmm8, xmm13
    290         paddd   xmm9, xmm14
    291         pxor    xmm5, xmm10
    292         pxor    xmm6, xmm11
    293         pxor    xmm7, xmm8
    294         pxor    xmm4, xmm9
    295         movdqa  xmmword ptr [rsp+0x100], xmm8
    296         movdqa  xmm8, xmm5
    297         psrld   xmm8, 12
    298         pslld   xmm5, 20
    299         por     xmm5, xmm8
    300         movdqa  xmm8, xmm6
    301         psrld   xmm8, 12
    302         pslld   xmm6, 20
    303         por     xmm6, xmm8
    304         movdqa  xmm8, xmm7
    305         psrld   xmm8, 12
    306         pslld   xmm7, 20
    307         por     xmm7, xmm8
    308         movdqa  xmm8, xmm4
    309         psrld   xmm8, 12
    310         pslld   xmm4, 20
    311         por     xmm4, xmm8
    312         paddd   xmm0, xmmword ptr [rsp+0x90]
    313         paddd   xmm1, xmmword ptr [rsp+0xB0]
    314         paddd   xmm2, xmmword ptr [rsp+0xD0]
    315         paddd   xmm3, xmmword ptr [rsp+0xF0]
    316         paddd   xmm0, xmm5
    317         paddd   xmm1, xmm6
    318         paddd   xmm2, xmm7
    319         paddd   xmm3, xmm4
    320         pxor    xmm15, xmm0
    321         pxor    xmm12, xmm1
    322         pxor    xmm13, xmm2
    323         pxor    xmm14, xmm3
    324         movdqa  xmm8, xmmword ptr [ROT8+rip]
    325         pshufb  xmm15, xmm8
    326         pshufb  xmm12, xmm8
    327         pshufb  xmm13, xmm8
    328         pshufb  xmm14, xmm8
    329         paddd   xmm10, xmm15
    330         paddd   xmm11, xmm12
    331         movdqa  xmm8, xmmword ptr [rsp+0x100]
    332         paddd   xmm8, xmm13
    333         paddd   xmm9, xmm14
    334         pxor    xmm5, xmm10
    335         pxor    xmm6, xmm11
    336         pxor    xmm7, xmm8
    337         pxor    xmm4, xmm9
    338         movdqa  xmmword ptr [rsp+0x100], xmm8
    339         movdqa  xmm8, xmm5
    340         psrld   xmm8, 7
    341         pslld   xmm5, 25
    342         por     xmm5, xmm8
    343         movdqa  xmm8, xmm6
    344         psrld   xmm8, 7
    345         pslld   xmm6, 25
    346         por     xmm6, xmm8
    347         movdqa  xmm8, xmm7
    348         psrld   xmm8, 7
    349         pslld   xmm7, 25
    350         por     xmm7, xmm8
    351         movdqa  xmm8, xmm4
    352         psrld   xmm8, 7
    353         pslld   xmm4, 25
    354         por     xmm4, xmm8
    355         paddd   xmm0, xmmword ptr [rsp+0x20]
    356         paddd   xmm1, xmmword ptr [rsp+0x30]
    357         paddd   xmm2, xmmword ptr [rsp+0x70]
    358         paddd   xmm3, xmmword ptr [rsp+0x40]
    359         paddd   xmm0, xmm4
    360         paddd   xmm1, xmm5
    361         paddd   xmm2, xmm6
    362         paddd   xmm3, xmm7
    363         pxor    xmm12, xmm0
    364         pxor    xmm13, xmm1
    365         pxor    xmm14, xmm2
    366         pxor    xmm15, xmm3
    367         movdqa  xmm8, xmmword ptr [ROT16+rip]
    368         pshufb  xmm12, xmm8
    369         pshufb  xmm13, xmm8
    370         pshufb  xmm14, xmm8
    371         pshufb  xmm15, xmm8
    372         movdqa  xmm8, xmmword ptr [rsp+0x100]
    373         paddd   xmm8, xmm12
    374         paddd   xmm9, xmm13
    375         paddd   xmm10, xmm14
    376         paddd   xmm11, xmm15
    377         pxor    xmm4, xmm8
    378         pxor    xmm5, xmm9
    379         pxor    xmm6, xmm10
    380         pxor    xmm7, xmm11
    381         movdqa  xmmword ptr [rsp+0x100], xmm8
    382         movdqa  xmm8, xmm4
    383         psrld   xmm8, 12
    384         pslld   xmm4, 20
    385         por     xmm4, xmm8
    386         movdqa  xmm8, xmm5
    387         psrld   xmm8, 12
    388         pslld   xmm5, 20
    389         por     xmm5, xmm8
    390         movdqa  xmm8, xmm6
    391         psrld   xmm8, 12
    392         pslld   xmm6, 20
    393         por     xmm6, xmm8
    394         movdqa  xmm8, xmm7
    395         psrld   xmm8, 12
    396         pslld   xmm7, 20
    397         por     xmm7, xmm8
    398         paddd   xmm0, xmmword ptr [rsp+0x60]
    399         paddd   xmm1, xmmword ptr [rsp+0xA0]
    400         paddd   xmm2, xmmword ptr [rsp]
    401         paddd   xmm3, xmmword ptr [rsp+0xD0]
    402         paddd   xmm0, xmm4
    403         paddd   xmm1, xmm5
    404         paddd   xmm2, xmm6
    405         paddd   xmm3, xmm7
    406         pxor    xmm12, xmm0
    407         pxor    xmm13, xmm1
    408         pxor    xmm14, xmm2
    409         pxor    xmm15, xmm3
    410         movdqa  xmm8, xmmword ptr [ROT8+rip]
    411         pshufb  xmm12, xmm8
    412         pshufb  xmm13, xmm8
    413         pshufb  xmm14, xmm8
    414         pshufb  xmm15, xmm8
    415         movdqa  xmm8, xmmword ptr [rsp+0x100]
    416         paddd   xmm8, xmm12
    417         paddd   xmm9, xmm13
    418         paddd   xmm10, xmm14
    419         paddd   xmm11, xmm15
    420         pxor    xmm4, xmm8
    421         pxor    xmm5, xmm9
    422         pxor    xmm6, xmm10
    423         pxor    xmm7, xmm11
    424         movdqa  xmmword ptr [rsp+0x100], xmm8
    425         movdqa  xmm8, xmm4
    426         psrld   xmm8, 7
    427         pslld   xmm4, 25
    428         por     xmm4, xmm8
    429         movdqa  xmm8, xmm5
    430         psrld   xmm8, 7
    431         pslld   xmm5, 25
    432         por     xmm5, xmm8
    433         movdqa  xmm8, xmm6
    434         psrld   xmm8, 7
    435         pslld   xmm6, 25
    436         por     xmm6, xmm8
    437         movdqa  xmm8, xmm7
    438         psrld   xmm8, 7
    439         pslld   xmm7, 25
    440         por     xmm7, xmm8
    441         paddd   xmm0, xmmword ptr [rsp+0x10]
    442         paddd   xmm1, xmmword ptr [rsp+0xC0]
    443         paddd   xmm2, xmmword ptr [rsp+0x90]
    444         paddd   xmm3, xmmword ptr [rsp+0xF0]
    445         paddd   xmm0, xmm5
    446         paddd   xmm1, xmm6
    447         paddd   xmm2, xmm7
    448         paddd   xmm3, xmm4
    449         pxor    xmm15, xmm0
    450         pxor    xmm12, xmm1
    451         pxor    xmm13, xmm2
    452         pxor    xmm14, xmm3
    453         movdqa  xmm8, xmmword ptr [ROT16+rip]
    454         pshufb  xmm15, xmm8
    455         pshufb  xmm12, xmm8
    456         pshufb  xmm13, xmm8
    457         pshufb  xmm14, xmm8
    458         paddd   xmm10, xmm15
    459         paddd   xmm11, xmm12
    460         movdqa  xmm8, xmmword ptr [rsp+0x100]
    461         paddd   xmm8, xmm13
    462         paddd   xmm9, xmm14
    463         pxor    xmm5, xmm10
    464         pxor    xmm6, xmm11
    465         pxor    xmm7, xmm8
    466         pxor    xmm4, xmm9
    467         movdqa  xmmword ptr [rsp+0x100], xmm8
    468         movdqa  xmm8, xmm5
    469         psrld   xmm8, 12
    470         pslld   xmm5, 20
    471         por     xmm5, xmm8
    472         movdqa  xmm8, xmm6
    473         psrld   xmm8, 12
    474         pslld   xmm6, 20
    475         por     xmm6, xmm8
    476         movdqa  xmm8, xmm7
    477         psrld   xmm8, 12
    478         pslld   xmm7, 20
    479         por     xmm7, xmm8
    480         movdqa  xmm8, xmm4
    481         psrld   xmm8, 12
    482         pslld   xmm4, 20
    483         por     xmm4, xmm8
    484         paddd   xmm0, xmmword ptr [rsp+0xB0]
    485         paddd   xmm1, xmmword ptr [rsp+0x50]
    486         paddd   xmm2, xmmword ptr [rsp+0xE0]
    487         paddd   xmm3, xmmword ptr [rsp+0x80]
    488         paddd   xmm0, xmm5
    489         paddd   xmm1, xmm6
    490         paddd   xmm2, xmm7
    491         paddd   xmm3, xmm4
    492         pxor    xmm15, xmm0
    493         pxor    xmm12, xmm1
    494         pxor    xmm13, xmm2
    495         pxor    xmm14, xmm3
    496         movdqa  xmm8, xmmword ptr [ROT8+rip]
    497         pshufb  xmm15, xmm8
    498         pshufb  xmm12, xmm8
    499         pshufb  xmm13, xmm8
    500         pshufb  xmm14, xmm8
    501         paddd   xmm10, xmm15
    502         paddd   xmm11, xmm12
    503         movdqa  xmm8, xmmword ptr [rsp+0x100]
    504         paddd   xmm8, xmm13
    505         paddd   xmm9, xmm14
    506         pxor    xmm5, xmm10
    507         pxor    xmm6, xmm11
    508         pxor    xmm7, xmm8
    509         pxor    xmm4, xmm9
    510         movdqa  xmmword ptr [rsp+0x100], xmm8
    511         movdqa  xmm8, xmm5
    512         psrld   xmm8, 7
    513         pslld   xmm5, 25
    514         por     xmm5, xmm8
    515         movdqa  xmm8, xmm6
    516         psrld   xmm8, 7
    517         pslld   xmm6, 25
    518         por     xmm6, xmm8
    519         movdqa  xmm8, xmm7
    520         psrld   xmm8, 7
    521         pslld   xmm7, 25
    522         por     xmm7, xmm8
    523         movdqa  xmm8, xmm4
    524         psrld   xmm8, 7
    525         pslld   xmm4, 25
    526         por     xmm4, xmm8
    527         paddd   xmm0, xmmword ptr [rsp+0x30]
    528         paddd   xmm1, xmmword ptr [rsp+0xA0]
    529         paddd   xmm2, xmmword ptr [rsp+0xD0]
    530         paddd   xmm3, xmmword ptr [rsp+0x70]
    531         paddd   xmm0, xmm4
    532         paddd   xmm1, xmm5
    533         paddd   xmm2, xmm6
    534         paddd   xmm3, xmm7
    535         pxor    xmm12, xmm0
    536         pxor    xmm13, xmm1
    537         pxor    xmm14, xmm2
    538         pxor    xmm15, xmm3
    539         movdqa  xmm8, xmmword ptr [ROT16+rip]
    540         pshufb  xmm12, xmm8
    541         pshufb  xmm13, xmm8
    542         pshufb  xmm14, xmm8
    543         pshufb  xmm15, xmm8
    544         movdqa  xmm8, xmmword ptr [rsp+0x100]
    545         paddd   xmm8, xmm12
    546         paddd   xmm9, xmm13
    547         paddd   xmm10, xmm14
    548         paddd   xmm11, xmm15
    549         pxor    xmm4, xmm8
    550         pxor    xmm5, xmm9
    551         pxor    xmm6, xmm10
    552         pxor    xmm7, xmm11
    553         movdqa  xmmword ptr [rsp+0x100], xmm8
    554         movdqa  xmm8, xmm4
    555         psrld   xmm8, 12
    556         pslld   xmm4, 20
    557         por     xmm4, xmm8
    558         movdqa  xmm8, xmm5
    559         psrld   xmm8, 12
    560         pslld   xmm5, 20
    561         por     xmm5, xmm8
    562         movdqa  xmm8, xmm6
    563         psrld   xmm8, 12
    564         pslld   xmm6, 20
    565         por     xmm6, xmm8
    566         movdqa  xmm8, xmm7
    567         psrld   xmm8, 12
    568         pslld   xmm7, 20
    569         por     xmm7, xmm8
    570         paddd   xmm0, xmmword ptr [rsp+0x40]
    571         paddd   xmm1, xmmword ptr [rsp+0xC0]
    572         paddd   xmm2, xmmword ptr [rsp+0x20]
    573         paddd   xmm3, xmmword ptr [rsp+0xE0]
    574         paddd   xmm0, xmm4
    575         paddd   xmm1, xmm5
    576         paddd   xmm2, xmm6
    577         paddd   xmm3, xmm7
    578         pxor    xmm12, xmm0
    579         pxor    xmm13, xmm1
    580         pxor    xmm14, xmm2
    581         pxor    xmm15, xmm3
    582         movdqa  xmm8, xmmword ptr [ROT8+rip]
    583         pshufb  xmm12, xmm8
    584         pshufb  xmm13, xmm8
    585         pshufb  xmm14, xmm8
    586         pshufb  xmm15, xmm8
    587         movdqa  xmm8, xmmword ptr [rsp+0x100]
    588         paddd   xmm8, xmm12
    589         paddd   xmm9, xmm13
    590         paddd   xmm10, xmm14
    591         paddd   xmm11, xmm15
    592         pxor    xmm4, xmm8
    593         pxor    xmm5, xmm9
    594         pxor    xmm6, xmm10
    595         pxor    xmm7, xmm11
    596         movdqa  xmmword ptr [rsp+0x100], xmm8
    597         movdqa  xmm8, xmm4
    598         psrld   xmm8, 7
    599         pslld   xmm4, 25
    600         por     xmm4, xmm8
    601         movdqa  xmm8, xmm5
    602         psrld   xmm8, 7
    603         pslld   xmm5, 25
    604         por     xmm5, xmm8
    605         movdqa  xmm8, xmm6
    606         psrld   xmm8, 7
    607         pslld   xmm6, 25
    608         por     xmm6, xmm8
    609         movdqa  xmm8, xmm7
    610         psrld   xmm8, 7
    611         pslld   xmm7, 25
    612         por     xmm7, xmm8
    613         paddd   xmm0, xmmword ptr [rsp+0x60]
    614         paddd   xmm1, xmmword ptr [rsp+0x90]
    615         paddd   xmm2, xmmword ptr [rsp+0xB0]
    616         paddd   xmm3, xmmword ptr [rsp+0x80]
    617         paddd   xmm0, xmm5
    618         paddd   xmm1, xmm6
    619         paddd   xmm2, xmm7
    620         paddd   xmm3, xmm4
    621         pxor    xmm15, xmm0
    622         pxor    xmm12, xmm1
    623         pxor    xmm13, xmm2
    624         pxor    xmm14, xmm3
    625         movdqa  xmm8, xmmword ptr [ROT16+rip]
    626         pshufb  xmm15, xmm8
    627         pshufb  xmm12, xmm8
    628         pshufb  xmm13, xmm8
    629         pshufb  xmm14, xmm8
    630         paddd   xmm10, xmm15
    631         paddd   xmm11, xmm12
    632         movdqa  xmm8, xmmword ptr [rsp+0x100]
    633         paddd   xmm8, xmm13
    634         paddd   xmm9, xmm14
    635         pxor    xmm5, xmm10
    636         pxor    xmm6, xmm11
    637         pxor    xmm7, xmm8
    638         pxor    xmm4, xmm9
    639         movdqa  xmmword ptr [rsp+0x100], xmm8
    640         movdqa  xmm8, xmm5
    641         psrld   xmm8, 12
    642         pslld   xmm5, 20
    643         por     xmm5, xmm8
    644         movdqa  xmm8, xmm6
    645         psrld   xmm8, 12
    646         pslld   xmm6, 20
    647         por     xmm6, xmm8
    648         movdqa  xmm8, xmm7
    649         psrld   xmm8, 12
    650         pslld   xmm7, 20
    651         por     xmm7, xmm8
    652         movdqa  xmm8, xmm4
    653         psrld   xmm8, 12
    654         pslld   xmm4, 20
    655         por     xmm4, xmm8
    656         paddd   xmm0, xmmword ptr [rsp+0x50]
    657         paddd   xmm1, xmmword ptr [rsp]
    658         paddd   xmm2, xmmword ptr [rsp+0xF0]
    659         paddd   xmm3, xmmword ptr [rsp+0x10]
    660         paddd   xmm0, xmm5
    661         paddd   xmm1, xmm6
    662         paddd   xmm2, xmm7
    663         paddd   xmm3, xmm4
    664         pxor    xmm15, xmm0
    665         pxor    xmm12, xmm1
    666         pxor    xmm13, xmm2
    667         pxor    xmm14, xmm3
    668         movdqa  xmm8, xmmword ptr [ROT8+rip]
    669         pshufb  xmm15, xmm8
    670         pshufb  xmm12, xmm8
    671         pshufb  xmm13, xmm8
    672         pshufb  xmm14, xmm8
    673         paddd   xmm10, xmm15
    674         paddd   xmm11, xmm12
    675         movdqa  xmm8, xmmword ptr [rsp+0x100]
    676         paddd   xmm8, xmm13
    677         paddd   xmm9, xmm14
    678         pxor    xmm5, xmm10
    679         pxor    xmm6, xmm11
    680         pxor    xmm7, xmm8
    681         pxor    xmm4, xmm9
    682         movdqa  xmmword ptr [rsp+0x100], xmm8
    683         movdqa  xmm8, xmm5
    684         psrld   xmm8, 7
    685         pslld   xmm5, 25
    686         por     xmm5, xmm8
    687         movdqa  xmm8, xmm6
    688         psrld   xmm8, 7
    689         pslld   xmm6, 25
    690         por     xmm6, xmm8
    691         movdqa  xmm8, xmm7
    692         psrld   xmm8, 7
    693         pslld   xmm7, 25
    694         por     xmm7, xmm8
    695         movdqa  xmm8, xmm4
    696         psrld   xmm8, 7
    697         pslld   xmm4, 25
    698         por     xmm4, xmm8
    699         paddd   xmm0, xmmword ptr [rsp+0xA0]
    700         paddd   xmm1, xmmword ptr [rsp+0xC0]
    701         paddd   xmm2, xmmword ptr [rsp+0xE0]
    702         paddd   xmm3, xmmword ptr [rsp+0xD0]
    703         paddd   xmm0, xmm4
    704         paddd   xmm1, xmm5
    705         paddd   xmm2, xmm6
    706         paddd   xmm3, xmm7
    707         pxor    xmm12, xmm0
    708         pxor    xmm13, xmm1
    709         pxor    xmm14, xmm2
    710         pxor    xmm15, xmm3
    711         movdqa  xmm8, xmmword ptr [ROT16+rip]
    712         pshufb  xmm12, xmm8
    713         pshufb  xmm13, xmm8
    714         pshufb  xmm14, xmm8
    715         pshufb  xmm15, xmm8
    716         movdqa  xmm8, xmmword ptr [rsp+0x100]
    717         paddd   xmm8, xmm12
    718         paddd   xmm9, xmm13
    719         paddd   xmm10, xmm14
    720         paddd   xmm11, xmm15
    721         pxor    xmm4, xmm8
    722         pxor    xmm5, xmm9
    723         pxor    xmm6, xmm10
    724         pxor    xmm7, xmm11
    725         movdqa  xmmword ptr [rsp+0x100], xmm8
    726         movdqa  xmm8, xmm4
    727         psrld   xmm8, 12
    728         pslld   xmm4, 20
    729         por     xmm4, xmm8
    730         movdqa  xmm8, xmm5
    731         psrld   xmm8, 12
    732         pslld   xmm5, 20
    733         por     xmm5, xmm8
    734         movdqa  xmm8, xmm6
    735         psrld   xmm8, 12
    736         pslld   xmm6, 20
    737         por     xmm6, xmm8
    738         movdqa  xmm8, xmm7
    739         psrld   xmm8, 12
    740         pslld   xmm7, 20
    741         por     xmm7, xmm8
    742         paddd   xmm0, xmmword ptr [rsp+0x70]
    743         paddd   xmm1, xmmword ptr [rsp+0x90]
    744         paddd   xmm2, xmmword ptr [rsp+0x30]
    745         paddd   xmm3, xmmword ptr [rsp+0xF0]
    746         paddd   xmm0, xmm4
    747         paddd   xmm1, xmm5
    748         paddd   xmm2, xmm6
    749         paddd   xmm3, xmm7
    750         pxor    xmm12, xmm0
    751         pxor    xmm13, xmm1
    752         pxor    xmm14, xmm2
    753         pxor    xmm15, xmm3
    754         movdqa  xmm8, xmmword ptr [ROT8+rip]
    755         pshufb  xmm12, xmm8
    756         pshufb  xmm13, xmm8
    757         pshufb  xmm14, xmm8
    758         pshufb  xmm15, xmm8
    759         movdqa  xmm8, xmmword ptr [rsp+0x100]
    760         paddd   xmm8, xmm12
    761         paddd   xmm9, xmm13
    762         paddd   xmm10, xmm14
    763         paddd   xmm11, xmm15
    764         pxor    xmm4, xmm8
    765         pxor    xmm5, xmm9
    766         pxor    xmm6, xmm10
    767         pxor    xmm7, xmm11
    768         movdqa  xmmword ptr [rsp+0x100], xmm8
    769         movdqa  xmm8, xmm4
    770         psrld   xmm8, 7
    771         pslld   xmm4, 25
    772         por     xmm4, xmm8
    773         movdqa  xmm8, xmm5
    774         psrld   xmm8, 7
    775         pslld   xmm5, 25
    776         por     xmm5, xmm8
    777         movdqa  xmm8, xmm6
    778         psrld   xmm8, 7
    779         pslld   xmm6, 25
    780         por     xmm6, xmm8
    781         movdqa  xmm8, xmm7
    782         psrld   xmm8, 7
    783         pslld   xmm7, 25
    784         por     xmm7, xmm8
    785         paddd   xmm0, xmmword ptr [rsp+0x40]
    786         paddd   xmm1, xmmword ptr [rsp+0xB0]
    787         paddd   xmm2, xmmword ptr [rsp+0x50]
    788         paddd   xmm3, xmmword ptr [rsp+0x10]
    789         paddd   xmm0, xmm5
    790         paddd   xmm1, xmm6
    791         paddd   xmm2, xmm7
    792         paddd   xmm3, xmm4
    793         pxor    xmm15, xmm0
    794         pxor    xmm12, xmm1
    795         pxor    xmm13, xmm2
    796         pxor    xmm14, xmm3
    797         movdqa  xmm8, xmmword ptr [ROT16+rip]
    798         pshufb  xmm15, xmm8
    799         pshufb  xmm12, xmm8
    800         pshufb  xmm13, xmm8
    801         pshufb  xmm14, xmm8
    802         paddd   xmm10, xmm15
    803         paddd   xmm11, xmm12
    804         movdqa  xmm8, xmmword ptr [rsp+0x100]
    805         paddd   xmm8, xmm13
    806         paddd   xmm9, xmm14
    807         pxor    xmm5, xmm10
    808         pxor    xmm6, xmm11
    809         pxor    xmm7, xmm8
    810         pxor    xmm4, xmm9
    811         movdqa  xmmword ptr [rsp+0x100], xmm8
    812         movdqa  xmm8, xmm5
    813         psrld   xmm8, 12
    814         pslld   xmm5, 20
    815         por     xmm5, xmm8
    816         movdqa  xmm8, xmm6
    817         psrld   xmm8, 12
    818         pslld   xmm6, 20
    819         por     xmm6, xmm8
    820         movdqa  xmm8, xmm7
    821         psrld   xmm8, 12
    822         pslld   xmm7, 20
    823         por     xmm7, xmm8
    824         movdqa  xmm8, xmm4
    825         psrld   xmm8, 12
    826         pslld   xmm4, 20
    827         por     xmm4, xmm8
    828         paddd   xmm0, xmmword ptr [rsp]
    829         paddd   xmm1, xmmword ptr [rsp+0x20]
    830         paddd   xmm2, xmmword ptr [rsp+0x80]
    831         paddd   xmm3, xmmword ptr [rsp+0x60]
    832         paddd   xmm0, xmm5
    833         paddd   xmm1, xmm6
    834         paddd   xmm2, xmm7
    835         paddd   xmm3, xmm4
    836         pxor    xmm15, xmm0
    837         pxor    xmm12, xmm1
    838         pxor    xmm13, xmm2
    839         pxor    xmm14, xmm3
    840         movdqa  xmm8, xmmword ptr [ROT8+rip]
    841         pshufb  xmm15, xmm8
    842         pshufb  xmm12, xmm8
    843         pshufb  xmm13, xmm8
    844         pshufb  xmm14, xmm8
    845         paddd   xmm10, xmm15
    846         paddd   xmm11, xmm12
    847         movdqa  xmm8, xmmword ptr [rsp+0x100]
    848         paddd   xmm8, xmm13
    849         paddd   xmm9, xmm14
    850         pxor    xmm5, xmm10
    851         pxor    xmm6, xmm11
    852         pxor    xmm7, xmm8
    853         pxor    xmm4, xmm9
    854         movdqa  xmmword ptr [rsp+0x100], xmm8
    855         movdqa  xmm8, xmm5
    856         psrld   xmm8, 7
    857         pslld   xmm5, 25
    858         por     xmm5, xmm8
    859         movdqa  xmm8, xmm6
    860         psrld   xmm8, 7
    861         pslld   xmm6, 25
    862         por     xmm6, xmm8
    863         movdqa  xmm8, xmm7
    864         psrld   xmm8, 7
    865         pslld   xmm7, 25
    866         por     xmm7, xmm8
    867         movdqa  xmm8, xmm4
    868         psrld   xmm8, 7
    869         pslld   xmm4, 25
    870         por     xmm4, xmm8
    871         paddd   xmm0, xmmword ptr [rsp+0xC0]
    872         paddd   xmm1, xmmword ptr [rsp+0x90]
    873         paddd   xmm2, xmmword ptr [rsp+0xF0]
    874         paddd   xmm3, xmmword ptr [rsp+0xE0]
    875         paddd   xmm0, xmm4
    876         paddd   xmm1, xmm5
    877         paddd   xmm2, xmm6
    878         paddd   xmm3, xmm7
    879         pxor    xmm12, xmm0
    880         pxor    xmm13, xmm1
    881         pxor    xmm14, xmm2
    882         pxor    xmm15, xmm3
    883         movdqa  xmm8, xmmword ptr [ROT16+rip]
    884         pshufb  xmm12, xmm8
    885         pshufb  xmm13, xmm8
    886         pshufb  xmm14, xmm8
    887         pshufb  xmm15, xmm8
    888         movdqa  xmm8, xmmword ptr [rsp+0x100]
    889         paddd   xmm8, xmm12
    890         paddd   xmm9, xmm13
    891         paddd   xmm10, xmm14
    892         paddd   xmm11, xmm15
    893         pxor    xmm4, xmm8
    894         pxor    xmm5, xmm9
    895         pxor    xmm6, xmm10
    896         pxor    xmm7, xmm11
    897         movdqa  xmmword ptr [rsp+0x100], xmm8
    898         movdqa  xmm8, xmm4
    899         psrld   xmm8, 12
    900         pslld   xmm4, 20
    901         por     xmm4, xmm8
    902         movdqa  xmm8, xmm5
    903         psrld   xmm8, 12
    904         pslld   xmm5, 20
    905         por     xmm5, xmm8
    906         movdqa  xmm8, xmm6
    907         psrld   xmm8, 12
    908         pslld   xmm6, 20
    909         por     xmm6, xmm8
    910         movdqa  xmm8, xmm7
    911         psrld   xmm8, 12
    912         pslld   xmm7, 20
    913         por     xmm7, xmm8
    914         paddd   xmm0, xmmword ptr [rsp+0xD0]
    915         paddd   xmm1, xmmword ptr [rsp+0xB0]
    916         paddd   xmm2, xmmword ptr [rsp+0xA0]
    917         paddd   xmm3, xmmword ptr [rsp+0x80]
    918         paddd   xmm0, xmm4
    919         paddd   xmm1, xmm5
    920         paddd   xmm2, xmm6
    921         paddd   xmm3, xmm7
    922         pxor    xmm12, xmm0
    923         pxor    xmm13, xmm1
    924         pxor    xmm14, xmm2
    925         pxor    xmm15, xmm3
    926         movdqa  xmm8, xmmword ptr [ROT8+rip]
    927         pshufb  xmm12, xmm8
    928         pshufb  xmm13, xmm8
    929         pshufb  xmm14, xmm8
    930         pshufb  xmm15, xmm8
    931         movdqa  xmm8, xmmword ptr [rsp+0x100]
    932         paddd   xmm8, xmm12
    933         paddd   xmm9, xmm13
    934         paddd   xmm10, xmm14
    935         paddd   xmm11, xmm15
    936         pxor    xmm4, xmm8
    937         pxor    xmm5, xmm9
    938         pxor    xmm6, xmm10
    939         pxor    xmm7, xmm11
    940         movdqa  xmmword ptr [rsp+0x100], xmm8
    941         movdqa  xmm8, xmm4
    942         psrld   xmm8, 7
    943         pslld   xmm4, 25
    944         por     xmm4, xmm8
    945         movdqa  xmm8, xmm5
    946         psrld   xmm8, 7
    947         pslld   xmm5, 25
    948         por     xmm5, xmm8
    949         movdqa  xmm8, xmm6
    950         psrld   xmm8, 7
    951         pslld   xmm6, 25
    952         por     xmm6, xmm8
    953         movdqa  xmm8, xmm7
    954         psrld   xmm8, 7
    955         pslld   xmm7, 25
    956         por     xmm7, xmm8
    957         paddd   xmm0, xmmword ptr [rsp+0x70]
    958         paddd   xmm1, xmmword ptr [rsp+0x50]
    959         paddd   xmm2, xmmword ptr [rsp]
    960         paddd   xmm3, xmmword ptr [rsp+0x60]
    961         paddd   xmm0, xmm5
    962         paddd   xmm1, xmm6
    963         paddd   xmm2, xmm7
    964         paddd   xmm3, xmm4
    965         pxor    xmm15, xmm0
    966         pxor    xmm12, xmm1
    967         pxor    xmm13, xmm2
    968         pxor    xmm14, xmm3
    969         movdqa  xmm8, xmmword ptr [ROT16+rip]
    970         pshufb  xmm15, xmm8
    971         pshufb  xmm12, xmm8
    972         pshufb  xmm13, xmm8
    973         pshufb  xmm14, xmm8
    974         paddd   xmm10, xmm15
    975         paddd   xmm11, xmm12
    976         movdqa  xmm8, xmmword ptr [rsp+0x100]
    977         paddd   xmm8, xmm13
    978         paddd   xmm9, xmm14
    979         pxor    xmm5, xmm10
    980         pxor    xmm6, xmm11
    981         pxor    xmm7, xmm8
    982         pxor    xmm4, xmm9
    983         movdqa  xmmword ptr [rsp+0x100], xmm8
    984         movdqa  xmm8, xmm5
    985         psrld   xmm8, 12
    986         pslld   xmm5, 20
    987         por     xmm5, xmm8
    988         movdqa  xmm8, xmm6
    989         psrld   xmm8, 12
    990         pslld   xmm6, 20
    991         por     xmm6, xmm8
    992         movdqa  xmm8, xmm7
    993         psrld   xmm8, 12
    994         pslld   xmm7, 20
    995         por     xmm7, xmm8
    996         movdqa  xmm8, xmm4
    997         psrld   xmm8, 12
    998         pslld   xmm4, 20
    999         por     xmm4, xmm8
   1000         paddd   xmm0, xmmword ptr [rsp+0x20]
   1001         paddd   xmm1, xmmword ptr [rsp+0x30]
   1002         paddd   xmm2, xmmword ptr [rsp+0x10]
   1003         paddd   xmm3, xmmword ptr [rsp+0x40]
   1004         paddd   xmm0, xmm5
   1005         paddd   xmm1, xmm6
   1006         paddd   xmm2, xmm7
   1007         paddd   xmm3, xmm4
   1008         pxor    xmm15, xmm0
   1009         pxor    xmm12, xmm1
   1010         pxor    xmm13, xmm2
   1011         pxor    xmm14, xmm3
   1012         movdqa  xmm8, xmmword ptr [ROT8+rip]
   1013         pshufb  xmm15, xmm8
   1014         pshufb  xmm12, xmm8
   1015         pshufb  xmm13, xmm8
   1016         pshufb  xmm14, xmm8
   1017         paddd   xmm10, xmm15
   1018         paddd   xmm11, xmm12
   1019         movdqa  xmm8, xmmword ptr [rsp+0x100]
   1020         paddd   xmm8, xmm13
   1021         paddd   xmm9, xmm14
   1022         pxor    xmm5, xmm10
   1023         pxor    xmm6, xmm11
   1024         pxor    xmm7, xmm8
   1025         pxor    xmm4, xmm9
   1026         movdqa  xmmword ptr [rsp+0x100], xmm8
   1027         movdqa  xmm8, xmm5
   1028         psrld   xmm8, 7
   1029         pslld   xmm5, 25
   1030         por     xmm5, xmm8
   1031         movdqa  xmm8, xmm6
   1032         psrld   xmm8, 7
   1033         pslld   xmm6, 25
   1034         por     xmm6, xmm8
   1035         movdqa  xmm8, xmm7
   1036         psrld   xmm8, 7
   1037         pslld   xmm7, 25
   1038         por     xmm7, xmm8
   1039         movdqa  xmm8, xmm4
   1040         psrld   xmm8, 7
   1041         pslld   xmm4, 25
   1042         por     xmm4, xmm8
   1043         paddd   xmm0, xmmword ptr [rsp+0x90]
   1044         paddd   xmm1, xmmword ptr [rsp+0xB0]
   1045         paddd   xmm2, xmmword ptr [rsp+0x80]
   1046         paddd   xmm3, xmmword ptr [rsp+0xF0]
   1047         paddd   xmm0, xmm4
   1048         paddd   xmm1, xmm5
   1049         paddd   xmm2, xmm6
   1050         paddd   xmm3, xmm7
   1051         pxor    xmm12, xmm0
   1052         pxor    xmm13, xmm1
   1053         pxor    xmm14, xmm2
   1054         pxor    xmm15, xmm3
   1055         movdqa  xmm8, xmmword ptr [ROT16+rip]
   1056         pshufb  xmm12, xmm8
   1057         pshufb  xmm13, xmm8
   1058         pshufb  xmm14, xmm8
   1059         pshufb  xmm15, xmm8
   1060         movdqa  xmm8, xmmword ptr [rsp+0x100]
   1061         paddd   xmm8, xmm12
   1062         paddd   xmm9, xmm13
   1063         paddd   xmm10, xmm14
   1064         paddd   xmm11, xmm15
   1065         pxor    xmm4, xmm8
   1066         pxor    xmm5, xmm9
   1067         pxor    xmm6, xmm10
   1068         pxor    xmm7, xmm11
   1069         movdqa  xmmword ptr [rsp+0x100], xmm8
   1070         movdqa  xmm8, xmm4
   1071         psrld   xmm8, 12
   1072         pslld   xmm4, 20
   1073         por     xmm4, xmm8
   1074         movdqa  xmm8, xmm5
   1075         psrld   xmm8, 12
   1076         pslld   xmm5, 20
   1077         por     xmm5, xmm8
   1078         movdqa  xmm8, xmm6
   1079         psrld   xmm8, 12
   1080         pslld   xmm6, 20
   1081         por     xmm6, xmm8
   1082         movdqa  xmm8, xmm7
   1083         psrld   xmm8, 12
   1084         pslld   xmm7, 20
   1085         por     xmm7, xmm8
   1086         paddd   xmm0, xmmword ptr [rsp+0xE0]
   1087         paddd   xmm1, xmmword ptr [rsp+0x50]
   1088         paddd   xmm2, xmmword ptr [rsp+0xC0]
   1089         paddd   xmm3, xmmword ptr [rsp+0x10]
   1090         paddd   xmm0, xmm4
   1091         paddd   xmm1, xmm5
   1092         paddd   xmm2, xmm6
   1093         paddd   xmm3, xmm7
   1094         pxor    xmm12, xmm0
   1095         pxor    xmm13, xmm1
   1096         pxor    xmm14, xmm2
   1097         pxor    xmm15, xmm3
   1098         movdqa  xmm8, xmmword ptr [ROT8+rip]
   1099         pshufb  xmm12, xmm8
   1100         pshufb  xmm13, xmm8
   1101         pshufb  xmm14, xmm8
   1102         pshufb  xmm15, xmm8
   1103         movdqa  xmm8, xmmword ptr [rsp+0x100]
   1104         paddd   xmm8, xmm12
   1105         paddd   xmm9, xmm13
   1106         paddd   xmm10, xmm14
   1107         paddd   xmm11, xmm15
   1108         pxor    xmm4, xmm8
   1109         pxor    xmm5, xmm9
   1110         pxor    xmm6, xmm10
   1111         pxor    xmm7, xmm11
   1112         movdqa  xmmword ptr [rsp+0x100], xmm8
   1113         movdqa  xmm8, xmm4
   1114         psrld   xmm8, 7
   1115         pslld   xmm4, 25
   1116         por     xmm4, xmm8
   1117         movdqa  xmm8, xmm5
   1118         psrld   xmm8, 7
   1119         pslld   xmm5, 25
   1120         por     xmm5, xmm8
   1121         movdqa  xmm8, xmm6
   1122         psrld   xmm8, 7
   1123         pslld   xmm6, 25
   1124         por     xmm6, xmm8
   1125         movdqa  xmm8, xmm7
   1126         psrld   xmm8, 7
   1127         pslld   xmm7, 25
   1128         por     xmm7, xmm8
   1129         paddd   xmm0, xmmword ptr [rsp+0xD0]
   1130         paddd   xmm1, xmmword ptr [rsp]
   1131         paddd   xmm2, xmmword ptr [rsp+0x20]
   1132         paddd   xmm3, xmmword ptr [rsp+0x40]
   1133         paddd   xmm0, xmm5
   1134         paddd   xmm1, xmm6
   1135         paddd   xmm2, xmm7
   1136         paddd   xmm3, xmm4
   1137         pxor    xmm15, xmm0
   1138         pxor    xmm12, xmm1
   1139         pxor    xmm13, xmm2
   1140         pxor    xmm14, xmm3
   1141         movdqa  xmm8, xmmword ptr [ROT16+rip]
   1142         pshufb  xmm15, xmm8
   1143         pshufb  xmm12, xmm8
   1144         pshufb  xmm13, xmm8
   1145         pshufb  xmm14, xmm8
   1146         paddd   xmm10, xmm15
   1147         paddd   xmm11, xmm12
   1148         movdqa  xmm8, xmmword ptr [rsp+0x100]
   1149         paddd   xmm8, xmm13
   1150         paddd   xmm9, xmm14
   1151         pxor    xmm5, xmm10
   1152         pxor    xmm6, xmm11
   1153         pxor    xmm7, xmm8
   1154         pxor    xmm4, xmm9
   1155         movdqa  xmmword ptr [rsp+0x100], xmm8
   1156         movdqa  xmm8, xmm5
   1157         psrld   xmm8, 12
   1158         pslld   xmm5, 20
   1159         por     xmm5, xmm8
   1160         movdqa  xmm8, xmm6
   1161         psrld   xmm8, 12
   1162         pslld   xmm6, 20
   1163         por     xmm6, xmm8
   1164         movdqa  xmm8, xmm7
   1165         psrld   xmm8, 12
   1166         pslld   xmm7, 20
   1167         por     xmm7, xmm8
   1168         movdqa  xmm8, xmm4
   1169         psrld   xmm8, 12
   1170         pslld   xmm4, 20
   1171         por     xmm4, xmm8
   1172         paddd   xmm0, xmmword ptr [rsp+0x30]
   1173         paddd   xmm1, xmmword ptr [rsp+0xA0]
   1174         paddd   xmm2, xmmword ptr [rsp+0x60]
   1175         paddd   xmm3, xmmword ptr [rsp+0x70]
   1176         paddd   xmm0, xmm5
   1177         paddd   xmm1, xmm6
   1178         paddd   xmm2, xmm7
   1179         paddd   xmm3, xmm4
   1180         pxor    xmm15, xmm0
   1181         pxor    xmm12, xmm1
   1182         pxor    xmm13, xmm2
   1183         pxor    xmm14, xmm3
   1184         movdqa  xmm8, xmmword ptr [ROT8+rip]
   1185         pshufb  xmm15, xmm8
   1186         pshufb  xmm12, xmm8
   1187         pshufb  xmm13, xmm8
   1188         pshufb  xmm14, xmm8
   1189         paddd   xmm10, xmm15
   1190         paddd   xmm11, xmm12
   1191         movdqa  xmm8, xmmword ptr [rsp+0x100]
   1192         paddd   xmm8, xmm13
   1193         paddd   xmm9, xmm14
   1194         pxor    xmm5, xmm10
   1195         pxor    xmm6, xmm11
   1196         pxor    xmm7, xmm8
   1197         pxor    xmm4, xmm9
   1198         movdqa  xmmword ptr [rsp+0x100], xmm8
   1199         movdqa  xmm8, xmm5
   1200         psrld   xmm8, 7
   1201         pslld   xmm5, 25
   1202         por     xmm5, xmm8
   1203         movdqa  xmm8, xmm6
   1204         psrld   xmm8, 7
   1205         pslld   xmm6, 25
   1206         por     xmm6, xmm8
   1207         movdqa  xmm8, xmm7
   1208         psrld   xmm8, 7
   1209         pslld   xmm7, 25
   1210         por     xmm7, xmm8
   1211         movdqa  xmm8, xmm4
   1212         psrld   xmm8, 7
   1213         pslld   xmm4, 25
   1214         por     xmm4, xmm8
   1215         paddd   xmm0, xmmword ptr [rsp+0xB0]
   1216         paddd   xmm1, xmmword ptr [rsp+0x50]
   1217         paddd   xmm2, xmmword ptr [rsp+0x10]
   1218         paddd   xmm3, xmmword ptr [rsp+0x80]
   1219         paddd   xmm0, xmm4
   1220         paddd   xmm1, xmm5
   1221         paddd   xmm2, xmm6
   1222         paddd   xmm3, xmm7
   1223         pxor    xmm12, xmm0
   1224         pxor    xmm13, xmm1
   1225         pxor    xmm14, xmm2
   1226         pxor    xmm15, xmm3
   1227         movdqa  xmm8, xmmword ptr [ROT16+rip]
   1228         pshufb  xmm12, xmm8
   1229         pshufb  xmm13, xmm8
   1230         pshufb  xmm14, xmm8
   1231         pshufb  xmm15, xmm8
   1232         movdqa  xmm8, xmmword ptr [rsp+0x100]
   1233         paddd   xmm8, xmm12
   1234         paddd   xmm9, xmm13
   1235         paddd   xmm10, xmm14
   1236         paddd   xmm11, xmm15
   1237         pxor    xmm4, xmm8
   1238         pxor    xmm5, xmm9
   1239         pxor    xmm6, xmm10
   1240         pxor    xmm7, xmm11
   1241         movdqa  xmmword ptr [rsp+0x100], xmm8
   1242         movdqa  xmm8, xmm4
   1243         psrld   xmm8, 12
   1244         pslld   xmm4, 20
   1245         por     xmm4, xmm8
   1246         movdqa  xmm8, xmm5
   1247         psrld   xmm8, 12
   1248         pslld   xmm5, 20
   1249         por     xmm5, xmm8
   1250         movdqa  xmm8, xmm6
   1251         psrld   xmm8, 12
   1252         pslld   xmm6, 20
   1253         por     xmm6, xmm8
   1254         movdqa  xmm8, xmm7
   1255         psrld   xmm8, 12
   1256         pslld   xmm7, 20
   1257         por     xmm7, xmm8
   1258         paddd   xmm0, xmmword ptr [rsp+0xF0]
   1259         paddd   xmm1, xmmword ptr [rsp]
   1260         paddd   xmm2, xmmword ptr [rsp+0x90]
   1261         paddd   xmm3, xmmword ptr [rsp+0x60]
   1262         paddd   xmm0, xmm4
   1263         paddd   xmm1, xmm5
   1264         paddd   xmm2, xmm6
   1265         paddd   xmm3, xmm7
   1266         pxor    xmm12, xmm0
   1267         pxor    xmm13, xmm1
   1268         pxor    xmm14, xmm2
   1269         pxor    xmm15, xmm3
   1270         movdqa  xmm8, xmmword ptr [ROT8+rip]
   1271         pshufb  xmm12, xmm8
   1272         pshufb  xmm13, xmm8
   1273         pshufb  xmm14, xmm8
   1274         pshufb  xmm15, xmm8
   1275         movdqa  xmm8, xmmword ptr [rsp+0x100]
   1276         paddd   xmm8, xmm12
   1277         paddd   xmm9, xmm13
   1278         paddd   xmm10, xmm14
   1279         paddd   xmm11, xmm15
   1280         pxor    xmm4, xmm8
   1281         pxor    xmm5, xmm9
   1282         pxor    xmm6, xmm10
   1283         pxor    xmm7, xmm11
   1284         movdqa  xmmword ptr [rsp+0x100], xmm8
   1285         movdqa  xmm8, xmm4
   1286         psrld   xmm8, 7
   1287         pslld   xmm4, 25
   1288         por     xmm4, xmm8
   1289         movdqa  xmm8, xmm5
   1290         psrld   xmm8, 7
   1291         pslld   xmm5, 25
   1292         por     xmm5, xmm8
   1293         movdqa  xmm8, xmm6
   1294         psrld   xmm8, 7
   1295         pslld   xmm6, 25
   1296         por     xmm6, xmm8
   1297         movdqa  xmm8, xmm7
   1298         psrld   xmm8, 7
   1299         pslld   xmm7, 25
   1300         por     xmm7, xmm8
   1301         paddd   xmm0, xmmword ptr [rsp+0xE0]
   1302         paddd   xmm1, xmmword ptr [rsp+0x20]
   1303         paddd   xmm2, xmmword ptr [rsp+0x30]
   1304         paddd   xmm3, xmmword ptr [rsp+0x70]
   1305         paddd   xmm0, xmm5
   1306         paddd   xmm1, xmm6
   1307         paddd   xmm2, xmm7
   1308         paddd   xmm3, xmm4
   1309         pxor    xmm15, xmm0
   1310         pxor    xmm12, xmm1
   1311         pxor    xmm13, xmm2
   1312         pxor    xmm14, xmm3
   1313         movdqa  xmm8, xmmword ptr [ROT16+rip]
   1314         pshufb  xmm15, xmm8
   1315         pshufb  xmm12, xmm8
   1316         pshufb  xmm13, xmm8
   1317         pshufb  xmm14, xmm8
   1318         paddd   xmm10, xmm15
   1319         paddd   xmm11, xmm12
   1320         movdqa  xmm8, xmmword ptr [rsp+0x100]
   1321         paddd   xmm8, xmm13
   1322         paddd   xmm9, xmm14
   1323         pxor    xmm5, xmm10
   1324         pxor    xmm6, xmm11
   1325         pxor    xmm7, xmm8
   1326         pxor    xmm4, xmm9
   1327         movdqa  xmmword ptr [rsp+0x100], xmm8
   1328         movdqa  xmm8, xmm5
   1329         psrld   xmm8, 12
   1330         pslld   xmm5, 20
   1331         por     xmm5, xmm8
   1332         movdqa  xmm8, xmm6
   1333         psrld   xmm8, 12
   1334         pslld   xmm6, 20
   1335         por     xmm6, xmm8
   1336         movdqa  xmm8, xmm7
   1337         psrld   xmm8, 12
   1338         pslld   xmm7, 20
   1339         por     xmm7, xmm8
   1340         movdqa  xmm8, xmm4
   1341         psrld   xmm8, 12
   1342         pslld   xmm4, 20
   1343         por     xmm4, xmm8
   1344         paddd   xmm0, xmmword ptr [rsp+0xA0]
   1345         paddd   xmm1, xmmword ptr [rsp+0xC0]
   1346         paddd   xmm2, xmmword ptr [rsp+0x40]
   1347         paddd   xmm3, xmmword ptr [rsp+0xD0]
   1348         paddd   xmm0, xmm5
   1349         paddd   xmm1, xmm6
   1350         paddd   xmm2, xmm7
   1351         paddd   xmm3, xmm4
   1352         pxor    xmm15, xmm0
   1353         pxor    xmm12, xmm1
   1354         pxor    xmm13, xmm2
   1355         pxor    xmm14, xmm3
   1356         movdqa  xmm8, xmmword ptr [ROT8+rip]
   1357         pshufb  xmm15, xmm8
   1358         pshufb  xmm12, xmm8
   1359         pshufb  xmm13, xmm8
   1360         pshufb  xmm14, xmm8
   1361         paddd   xmm10, xmm15
   1362         paddd   xmm11, xmm12
   1363         movdqa  xmm8, xmmword ptr [rsp+0x100]
   1364         paddd   xmm8, xmm13
   1365         paddd   xmm9, xmm14
   1366         pxor    xmm5, xmm10
   1367         pxor    xmm6, xmm11
   1368         pxor    xmm7, xmm8
   1369         pxor    xmm4, xmm9
   1370         pxor    xmm0, xmm8
   1371         pxor    xmm1, xmm9
   1372         pxor    xmm2, xmm10
   1373         pxor    xmm3, xmm11
   1374         movdqa  xmm8, xmm5
   1375         psrld   xmm8, 7
   1376         pslld   xmm5, 25
   1377         por     xmm5, xmm8
   1378         movdqa  xmm8, xmm6
   1379         psrld   xmm8, 7
   1380         pslld   xmm6, 25
   1381         por     xmm6, xmm8
   1382         movdqa  xmm8, xmm7
   1383         psrld   xmm8, 7
   1384         pslld   xmm7, 25
   1385         por     xmm7, xmm8
   1386         movdqa  xmm8, xmm4
   1387         psrld   xmm8, 7
   1388         pslld   xmm4, 25
   1389         por     xmm4, xmm8
   1390         pxor    xmm4, xmm12
   1391         pxor    xmm5, xmm13
   1392         pxor    xmm6, xmm14
   1393         pxor    xmm7, xmm15
   1394         mov     eax, r13d
   1395         jne     9b
   1396         movdqa  xmm9, xmm0
   1397         punpckldq xmm0, xmm1
   1398         punpckhdq xmm9, xmm1
   1399         movdqa  xmm11, xmm2
   1400         punpckldq xmm2, xmm3
   1401         punpckhdq xmm11, xmm3
   1402         movdqa  xmm1, xmm0
   1403         punpcklqdq xmm0, xmm2
   1404         punpckhqdq xmm1, xmm2
   1405         movdqa  xmm3, xmm9
   1406         punpcklqdq xmm9, xmm11
   1407         punpckhqdq xmm3, xmm11
   1408         movdqu  xmmword ptr [rbx], xmm0
   1409         movdqu  xmmword ptr [rbx+0x20], xmm1
   1410         movdqu  xmmword ptr [rbx+0x40], xmm9
   1411         movdqu  xmmword ptr [rbx+0x60], xmm3
   1412         movdqa  xmm9, xmm4
   1413         punpckldq xmm4, xmm5
   1414         punpckhdq xmm9, xmm5
   1415         movdqa  xmm11, xmm6
   1416         punpckldq xmm6, xmm7
   1417         punpckhdq xmm11, xmm7
   1418         movdqa  xmm5, xmm4
   1419         punpcklqdq xmm4, xmm6
   1420         punpckhqdq xmm5, xmm6
   1421         movdqa  xmm7, xmm9
   1422         punpcklqdq xmm9, xmm11
   1423         punpckhqdq xmm7, xmm11
   1424         movdqu  xmmword ptr [rbx+0x10], xmm4
   1425         movdqu  xmmword ptr [rbx+0x30], xmm5
   1426         movdqu  xmmword ptr [rbx+0x50], xmm9
   1427         movdqu  xmmword ptr [rbx+0x70], xmm7
   1428         movdqa  xmm1, xmmword ptr [rsp+0x110]
   1429         movdqa  xmm0, xmm1
   1430         paddd   xmm1, xmmword ptr [rsp+0x150]
   1431         movdqa  xmmword ptr [rsp+0x110], xmm1
   1432         pxor    xmm0, xmmword ptr [CMP_MSB_MASK+rip]
   1433         pxor    xmm1, xmmword ptr [CMP_MSB_MASK+rip]
   1434         pcmpgtd xmm0, xmm1
   1435         movdqa  xmm1, xmmword ptr [rsp+0x120]
   1436         psubd   xmm1, xmm0
   1437         movdqa  xmmword ptr [rsp+0x120], xmm1
   1438         add     rbx, 128
   1439         add     rdi, 32
   1440         sub     rsi, 4
   1441         cmp     rsi, 4
   1442         jnc     2b
   1443         test    rsi, rsi
   1444         jnz     3f
   1445 4:
   1446         mov     rsp, rbp
   1447         pop     rbp
   1448         pop     rbx
   1449         pop     r12
   1450         pop     r13
   1451         pop     r14
   1452         pop     r15
   1453         ret
   1454 .p2align 5
   1455 3:
   1456         test    esi, 0x2
   1457         je      3f
   1458         movups  xmm0, xmmword ptr [rcx]
   1459         movups  xmm1, xmmword ptr [rcx+0x10]
   1460         movaps  xmm8, xmm0
   1461         movaps  xmm9, xmm1
   1462         movd    xmm13, dword ptr [rsp+0x110]
   1463         pinsrd  xmm13, dword ptr [rsp+0x120], 1
   1464         pinsrd  xmm13, dword ptr [BLAKE3_BLOCK_LEN+rip], 2
   1465         movaps  xmmword ptr [rsp], xmm13
   1466         movd    xmm14, dword ptr [rsp+0x114]
   1467         pinsrd  xmm14, dword ptr [rsp+0x124], 1
   1468         pinsrd  xmm14, dword ptr [BLAKE3_BLOCK_LEN+rip], 2
   1469         movaps  xmmword ptr [rsp+0x10], xmm14
   1470         mov     r8, qword ptr [rdi]
   1471         mov     r9, qword ptr [rdi+0x8]
   1472         movzx   eax, byte ptr [rbp+0x40]
   1473         or      eax, r13d
   1474         xor     edx, edx
   1475 2:
   1476         mov     r14d, eax
   1477         or      eax, r12d
   1478         add     rdx, 64
   1479         cmp     rdx, r15
   1480         cmovne  eax, r14d
   1481         movaps  xmm2, xmmword ptr [BLAKE3_IV+rip]
   1482         movaps  xmm10, xmm2
   1483         movups  xmm4, xmmword ptr [r8+rdx-0x40]
   1484         movups  xmm5, xmmword ptr [r8+rdx-0x30]
   1485         movaps  xmm3, xmm4
   1486         shufps  xmm4, xmm5, 136
   1487         shufps  xmm3, xmm5, 221
   1488         movaps  xmm5, xmm3
   1489         movups  xmm6, xmmword ptr [r8+rdx-0x20]
   1490         movups  xmm7, xmmword ptr [r8+rdx-0x10]
   1491         movaps  xmm3, xmm6
   1492         shufps  xmm6, xmm7, 136
   1493         pshufd  xmm6, xmm6, 0x93
   1494         shufps  xmm3, xmm7, 221
   1495         pshufd  xmm7, xmm3, 0x93
   1496         movups  xmm12, xmmword ptr [r9+rdx-0x40]
   1497         movups  xmm13, xmmword ptr [r9+rdx-0x30]
   1498         movaps  xmm11, xmm12
   1499         shufps  xmm12, xmm13, 136
   1500         shufps  xmm11, xmm13, 221
   1501         movaps  xmm13, xmm11
   1502         movups  xmm14, xmmword ptr [r9+rdx-0x20]
   1503         movups  xmm15, xmmword ptr [r9+rdx-0x10]
   1504         movaps  xmm11, xmm14
   1505         shufps  xmm14, xmm15, 136
   1506         pshufd  xmm14, xmm14, 0x93
   1507         shufps  xmm11, xmm15, 221
   1508         pshufd  xmm15, xmm11, 0x93
   1509         movaps  xmm3, xmmword ptr [rsp]
   1510         movaps  xmm11, xmmword ptr [rsp+0x10]
   1511         pinsrd  xmm3, eax, 3
   1512         pinsrd  xmm11, eax, 3
   1513         mov     al, 7
   1514 9:
   1515         paddd   xmm0, xmm4
   1516         paddd   xmm8, xmm12
   1517         movaps  xmmword ptr [rsp+0x20], xmm4
   1518         movaps  xmmword ptr [rsp+0x30], xmm12
   1519         paddd   xmm0, xmm1
   1520         paddd   xmm8, xmm9
   1521         pxor    xmm3, xmm0
   1522         pxor    xmm11, xmm8
   1523         movaps  xmm12, xmmword ptr [ROT16+rip]
   1524         pshufb  xmm3, xmm12
   1525         pshufb  xmm11, xmm12
   1526         paddd   xmm2, xmm3
   1527         paddd   xmm10, xmm11
   1528         pxor    xmm1, xmm2
   1529         pxor    xmm9, xmm10
   1530         movdqa  xmm4, xmm1
   1531         pslld   xmm1, 20
   1532         psrld   xmm4, 12
   1533         por     xmm1, xmm4
   1534         movdqa  xmm4, xmm9
   1535         pslld   xmm9, 20
   1536         psrld   xmm4, 12
   1537         por     xmm9, xmm4
   1538         paddd   xmm0, xmm5
   1539         paddd   xmm8, xmm13
   1540         movaps  xmmword ptr [rsp+0x40], xmm5
   1541         movaps  xmmword ptr [rsp+0x50], xmm13
   1542         paddd   xmm0, xmm1
   1543         paddd   xmm8, xmm9
   1544         pxor    xmm3, xmm0
   1545         pxor    xmm11, xmm8
   1546         movaps  xmm13, xmmword ptr [ROT8+rip]
   1547         pshufb  xmm3, xmm13
   1548         pshufb  xmm11, xmm13
   1549         paddd   xmm2, xmm3
   1550         paddd   xmm10, xmm11
   1551         pxor    xmm1, xmm2
   1552         pxor    xmm9, xmm10
   1553         movdqa  xmm4, xmm1
   1554         pslld   xmm1, 25
   1555         psrld   xmm4, 7
   1556         por     xmm1, xmm4
   1557         movdqa  xmm4, xmm9
   1558         pslld   xmm9, 25
   1559         psrld   xmm4, 7
   1560         por     xmm9, xmm4
   1561         pshufd  xmm0, xmm0, 0x93
   1562         pshufd  xmm8, xmm8, 0x93
   1563         pshufd  xmm3, xmm3, 0x4E
   1564         pshufd  xmm11, xmm11, 0x4E
   1565         pshufd  xmm2, xmm2, 0x39
   1566         pshufd  xmm10, xmm10, 0x39
   1567         paddd   xmm0, xmm6
   1568         paddd   xmm8, xmm14
   1569         paddd   xmm0, xmm1
   1570         paddd   xmm8, xmm9
   1571         pxor    xmm3, xmm0
   1572         pxor    xmm11, xmm8
   1573         pshufb  xmm3, xmm12
   1574         pshufb  xmm11, xmm12
   1575         paddd   xmm2, xmm3
   1576         paddd   xmm10, xmm11
   1577         pxor    xmm1, xmm2
   1578         pxor    xmm9, xmm10
   1579         movdqa  xmm4, xmm1
   1580         pslld   xmm1, 20
   1581         psrld   xmm4, 12
   1582         por     xmm1, xmm4
   1583         movdqa  xmm4, xmm9
   1584         pslld   xmm9, 20
   1585         psrld   xmm4, 12
   1586         por     xmm9, xmm4
   1587         paddd   xmm0, xmm7
   1588         paddd   xmm8, xmm15
   1589         paddd   xmm0, xmm1
   1590         paddd   xmm8, xmm9
   1591         pxor    xmm3, xmm0
   1592         pxor    xmm11, xmm8
   1593         pshufb  xmm3, xmm13
   1594         pshufb  xmm11, xmm13
   1595         paddd   xmm2, xmm3
   1596         paddd   xmm10, xmm11
   1597         pxor    xmm1, xmm2
   1598         pxor    xmm9, xmm10
   1599         movdqa  xmm4, xmm1
   1600         pslld   xmm1, 25
   1601         psrld   xmm4, 7
   1602         por     xmm1, xmm4
   1603         movdqa  xmm4, xmm9
   1604         pslld   xmm9, 25
   1605         psrld   xmm4, 7
   1606         por     xmm9, xmm4
   1607         pshufd  xmm0, xmm0, 0x39
   1608         pshufd  xmm8, xmm8, 0x39
   1609         pshufd  xmm3, xmm3, 0x4E
   1610         pshufd  xmm11, xmm11, 0x4E
   1611         pshufd  xmm2, xmm2, 0x93
   1612         pshufd  xmm10, xmm10, 0x93
   1613         dec     al
   1614         je      9f
   1615         movdqa  xmm12, xmmword ptr [rsp+0x20]
   1616         movdqa  xmm5, xmmword ptr [rsp+0x40]
   1617         pshufd  xmm13, xmm12, 0x0F
   1618         shufps  xmm12, xmm5, 214
   1619         pshufd  xmm4, xmm12, 0x39
   1620         movdqa  xmm12, xmm6
   1621         shufps  xmm12, xmm7, 250
   1622         pblendw xmm13, xmm12, 0xCC
   1623         movdqa  xmm12, xmm7
   1624         punpcklqdq xmm12, xmm5
   1625         pblendw xmm12, xmm6, 0xC0
   1626         pshufd  xmm12, xmm12, 0x78
   1627         punpckhdq xmm5, xmm7
   1628         punpckldq xmm6, xmm5
   1629         pshufd  xmm7, xmm6, 0x1E
   1630         movdqa  xmmword ptr [rsp+0x20], xmm13
   1631         movdqa  xmmword ptr [rsp+0x40], xmm12
   1632         movdqa  xmm5, xmmword ptr [rsp+0x30]
   1633         movdqa  xmm13, xmmword ptr [rsp+0x50]
   1634         pshufd  xmm6, xmm5, 0x0F
   1635         shufps  xmm5, xmm13, 214
   1636         pshufd  xmm12, xmm5, 0x39
   1637         movdqa  xmm5, xmm14
   1638         shufps  xmm5, xmm15, 250
   1639         pblendw xmm6, xmm5, 0xCC
   1640         movdqa  xmm5, xmm15
   1641         punpcklqdq xmm5, xmm13
   1642         pblendw xmm5, xmm14, 0xC0
   1643         pshufd  xmm5, xmm5, 0x78
   1644         punpckhdq xmm13, xmm15
   1645         punpckldq xmm14, xmm13
   1646         pshufd  xmm15, xmm14, 0x1E
   1647         movdqa  xmm13, xmm6
   1648         movdqa  xmm14, xmm5
   1649         movdqa  xmm5, xmmword ptr [rsp+0x20]
   1650         movdqa  xmm6, xmmword ptr [rsp+0x40]
   1651         jmp     9b
   1652 9:
   1653         pxor    xmm0, xmm2
   1654         pxor    xmm1, xmm3
   1655         pxor    xmm8, xmm10
   1656         pxor    xmm9, xmm11
   1657         mov     eax, r13d
   1658         cmp     rdx, r15
   1659         jne     2b
   1660         movups  xmmword ptr [rbx], xmm0
   1661         movups  xmmword ptr [rbx+0x10], xmm1
   1662         movups  xmmword ptr [rbx+0x20], xmm8
   1663         movups  xmmword ptr [rbx+0x30], xmm9
   1664         movdqa  xmm0, xmmword ptr [rsp+0x130]
   1665         movdqa  xmm1, xmmword ptr [rsp+0x110]
   1666         movdqa  xmm2, xmmword ptr [rsp+0x120]
   1667         movdqu  xmm3, xmmword ptr [rsp+0x118]
   1668         movdqu  xmm4, xmmword ptr [rsp+0x128]
   1669         blendvps xmm1, xmm3, xmm0
   1670         blendvps xmm2, xmm4, xmm0
   1671         movdqa  xmmword ptr [rsp+0x110], xmm1
   1672         movdqa  xmmword ptr [rsp+0x120], xmm2
   1673         add     rdi, 16
   1674         add     rbx, 64
   1675         sub     rsi, 2
   1676 3:
   1677         test    esi, 0x1
   1678         je      4b
   1679         movups  xmm0, xmmword ptr [rcx]
   1680         movups  xmm1, xmmword ptr [rcx+0x10]
   1681         movd    xmm13, dword ptr [rsp+0x110]
   1682         pinsrd  xmm13, dword ptr [rsp+0x120], 1
   1683         pinsrd  xmm13, dword ptr [BLAKE3_BLOCK_LEN+rip], 2
   1684         movaps  xmm14, xmmword ptr [ROT8+rip]
   1685         movaps  xmm15, xmmword ptr [ROT16+rip]
   1686         mov     r8, qword ptr [rdi]
   1687         movzx   eax, byte ptr [rbp+0x40]
   1688         or      eax, r13d
   1689         xor     edx, edx
   1690 2:
   1691         mov     r14d, eax
   1692         or      eax, r12d
   1693         add     rdx, 64
   1694         cmp     rdx, r15
   1695         cmovne  eax, r14d
   1696         movaps  xmm2, xmmword ptr [BLAKE3_IV+rip]
   1697         movaps  xmm3, xmm13
   1698         pinsrd  xmm3, eax, 3
   1699         movups  xmm4, xmmword ptr [r8+rdx-0x40]
   1700         movups  xmm5, xmmword ptr [r8+rdx-0x30]
   1701         movaps  xmm8, xmm4
   1702         shufps  xmm4, xmm5, 136
   1703         shufps  xmm8, xmm5, 221
   1704         movaps  xmm5, xmm8
   1705         movups  xmm6, xmmword ptr [r8+rdx-0x20]
   1706         movups  xmm7, xmmword ptr [r8+rdx-0x10]
   1707         movaps  xmm8, xmm6
   1708         shufps  xmm6, xmm7, 136
   1709         pshufd  xmm6, xmm6, 0x93
   1710         shufps  xmm8, xmm7, 221
   1711         pshufd  xmm7, xmm8, 0x93
   1712         mov     al, 7
   1713 9:
   1714         paddd   xmm0, xmm4
   1715         paddd   xmm0, xmm1
   1716         pxor    xmm3, xmm0
   1717         pshufb  xmm3, xmm15
   1718         paddd   xmm2, xmm3
   1719         pxor    xmm1, xmm2
   1720         movdqa  xmm11, xmm1
   1721         pslld   xmm1, 20
   1722         psrld   xmm11, 12
   1723         por     xmm1, xmm11
   1724         paddd   xmm0, xmm5
   1725         paddd   xmm0, xmm1
   1726         pxor    xmm3, xmm0
   1727         pshufb  xmm3, xmm14
   1728         paddd   xmm2, xmm3
   1729         pxor    xmm1, xmm2
   1730         movdqa  xmm11, xmm1
   1731         pslld   xmm1, 25
   1732         psrld   xmm11, 7
   1733         por     xmm1, xmm11
   1734         pshufd  xmm0, xmm0, 0x93
   1735         pshufd  xmm3, xmm3, 0x4E
   1736         pshufd  xmm2, xmm2, 0x39
   1737         paddd   xmm0, xmm6
   1738         paddd   xmm0, xmm1
   1739         pxor    xmm3, xmm0
   1740         pshufb  xmm3, xmm15
   1741         paddd   xmm2, xmm3
   1742         pxor    xmm1, xmm2
   1743         movdqa  xmm11, xmm1
   1744         pslld   xmm1, 20
   1745         psrld   xmm11, 12
   1746         por     xmm1, xmm11
   1747         paddd   xmm0, xmm7
   1748         paddd   xmm0, xmm1
   1749         pxor    xmm3, xmm0
   1750         pshufb  xmm3, xmm14
   1751         paddd   xmm2, xmm3
   1752         pxor    xmm1, xmm2
   1753         movdqa  xmm11, xmm1
   1754         pslld   xmm1, 25
   1755         psrld   xmm11, 7
   1756         por     xmm1, xmm11
   1757         pshufd  xmm0, xmm0, 0x39
   1758         pshufd  xmm3, xmm3, 0x4E
   1759         pshufd  xmm2, xmm2, 0x93
   1760         dec     al
   1761         jz      9f
   1762         movdqa  xmm8, xmm4
   1763         shufps  xmm8, xmm5, 214
   1764         pshufd  xmm9, xmm4, 0x0F
   1765         pshufd  xmm4, xmm8, 0x39
   1766         movdqa  xmm8, xmm6
   1767         shufps  xmm8, xmm7, 250
   1768         pblendw xmm9, xmm8, 0xCC
   1769         movdqa  xmm8, xmm7
   1770         punpcklqdq xmm8, xmm5
   1771         pblendw xmm8, xmm6, 0xC0
   1772         pshufd  xmm8, xmm8, 0x78
   1773         punpckhdq xmm5, xmm7
   1774         punpckldq xmm6, xmm5
   1775         pshufd  xmm7, xmm6, 0x1E
   1776         movdqa  xmm5, xmm9
   1777         movdqa  xmm6, xmm8
   1778         jmp     9b
   1779 9:
   1780         pxor    xmm0, xmm2
   1781         pxor    xmm1, xmm3
   1782         mov     eax, r13d
   1783         cmp     rdx, r15
   1784         jne     2b
   1785         movups  xmmword ptr [rbx], xmm0
   1786         movups  xmmword ptr [rbx+0x10], xmm1
   1787         jmp     4b
   1788 
   1789 .p2align 6
   1790 blake3_compress_in_place_sse41:
   1791 _blake3_compress_in_place_sse41:
   1792         _CET_ENDBR
   1793         movups  xmm0, xmmword ptr [rdi]
   1794         movups  xmm1, xmmword ptr [rdi+0x10]
   1795         movaps  xmm2, xmmword ptr [BLAKE3_IV+rip]
   1796         shl     r8, 32
   1797         add     rdx, r8
   1798         movq    xmm3, rcx
   1799         movq    xmm4, rdx
   1800         punpcklqdq xmm3, xmm4
   1801         movups  xmm4, xmmword ptr [rsi]
   1802         movups  xmm5, xmmword ptr [rsi+0x10]
   1803         movaps  xmm8, xmm4
   1804         shufps  xmm4, xmm5, 136
   1805         shufps  xmm8, xmm5, 221
   1806         movaps  xmm5, xmm8
   1807         movups  xmm6, xmmword ptr [rsi+0x20]
   1808         movups  xmm7, xmmword ptr [rsi+0x30]
   1809         movaps  xmm8, xmm6
   1810         shufps  xmm6, xmm7, 136
   1811         pshufd  xmm6, xmm6, 0x93
   1812         shufps  xmm8, xmm7, 221
   1813         pshufd  xmm7, xmm8, 0x93
   1814         movaps  xmm14, xmmword ptr [ROT8+rip]
   1815         movaps  xmm15, xmmword ptr [ROT16+rip]
   1816         mov     al, 7
   1817 9:
   1818         paddd   xmm0, xmm4
   1819         paddd   xmm0, xmm1
   1820         pxor    xmm3, xmm0
   1821         pshufb  xmm3, xmm15
   1822         paddd   xmm2, xmm3
   1823         pxor    xmm1, xmm2
   1824         movdqa  xmm11, xmm1
   1825         pslld   xmm1, 20
   1826         psrld   xmm11, 12
   1827         por     xmm1, xmm11
   1828         paddd   xmm0, xmm5
   1829         paddd   xmm0, xmm1
   1830         pxor    xmm3, xmm0
   1831         pshufb  xmm3, xmm14
   1832         paddd   xmm2, xmm3
   1833         pxor    xmm1, xmm2
   1834         movdqa  xmm11, xmm1
   1835         pslld   xmm1, 25
   1836         psrld   xmm11, 7
   1837         por     xmm1, xmm11
   1838         pshufd  xmm0, xmm0, 0x93
   1839         pshufd  xmm3, xmm3, 0x4E
   1840         pshufd  xmm2, xmm2, 0x39
   1841         paddd   xmm0, xmm6
   1842         paddd   xmm0, xmm1
   1843         pxor    xmm3, xmm0
   1844         pshufb  xmm3, xmm15
   1845         paddd   xmm2, xmm3
   1846         pxor    xmm1, xmm2
   1847         movdqa  xmm11, xmm1
   1848         pslld   xmm1, 20
   1849         psrld   xmm11, 12
   1850         por     xmm1, xmm11
   1851         paddd   xmm0, xmm7
   1852         paddd   xmm0, xmm1
   1853         pxor    xmm3, xmm0
   1854         pshufb  xmm3, xmm14
   1855         paddd   xmm2, xmm3
   1856         pxor    xmm1, xmm2
   1857         movdqa  xmm11, xmm1
   1858         pslld   xmm1, 25
   1859         psrld   xmm11, 7
   1860         por     xmm1, xmm11
   1861         pshufd  xmm0, xmm0, 0x39
   1862         pshufd  xmm3, xmm3, 0x4E
   1863         pshufd  xmm2, xmm2, 0x93
   1864         dec     al
   1865         jz      9f
   1866         movdqa  xmm8, xmm4
   1867         shufps  xmm8, xmm5, 214
   1868         pshufd  xmm9, xmm4, 0x0F
   1869         pshufd  xmm4, xmm8, 0x39
   1870         movdqa  xmm8, xmm6
   1871         shufps  xmm8, xmm7, 250
   1872         pblendw xmm9, xmm8, 0xCC
   1873         movdqa  xmm8, xmm7
   1874         punpcklqdq xmm8, xmm5
   1875         pblendw xmm8, xmm6, 0xC0
   1876         pshufd  xmm8, xmm8, 0x78
   1877         punpckhdq xmm5, xmm7
   1878         punpckldq xmm6, xmm5
   1879         pshufd  xmm7, xmm6, 0x1E
   1880         movdqa  xmm5, xmm9
   1881         movdqa  xmm6, xmm8
   1882         jmp     9b
   1883 9:
   1884         pxor    xmm0, xmm2
   1885         pxor    xmm1, xmm3
   1886         movups  xmmword ptr [rdi], xmm0
   1887         movups  xmmword ptr [rdi+0x10], xmm1
   1888         ret
   1889 
   1890 .p2align 6
   1891 blake3_compress_xof_sse41:
   1892 _blake3_compress_xof_sse41:
   1893         _CET_ENDBR
   1894         movups  xmm0, xmmword ptr [rdi]
   1895         movups  xmm1, xmmword ptr [rdi+0x10]
   1896         movaps  xmm2, xmmword ptr [BLAKE3_IV+rip]
   1897         movzx   eax, r8b
   1898         movzx   edx, dl
   1899         shl     rax, 32
   1900         add     rdx, rax
   1901         movq    xmm3, rcx
   1902         movq    xmm4, rdx
   1903         punpcklqdq xmm3, xmm4
   1904         movups  xmm4, xmmword ptr [rsi]
   1905         movups  xmm5, xmmword ptr [rsi+0x10]
   1906         movaps  xmm8, xmm4
   1907         shufps  xmm4, xmm5, 136
   1908         shufps  xmm8, xmm5, 221
   1909         movaps  xmm5, xmm8
   1910         movups  xmm6, xmmword ptr [rsi+0x20]
   1911         movups  xmm7, xmmword ptr [rsi+0x30]
   1912         movaps  xmm8, xmm6
   1913         shufps  xmm6, xmm7, 136
   1914         pshufd  xmm6, xmm6, 0x93
   1915         shufps  xmm8, xmm7, 221
   1916         pshufd  xmm7, xmm8, 0x93
   1917         movaps  xmm14, xmmword ptr [ROT8+rip]
   1918         movaps  xmm15, xmmword ptr [ROT16+rip]
   1919         mov     al, 7
   1920 9:
   1921         paddd   xmm0, xmm4
   1922         paddd   xmm0, xmm1
   1923         pxor    xmm3, xmm0
   1924         pshufb  xmm3, xmm15
   1925         paddd   xmm2, xmm3
   1926         pxor    xmm1, xmm2
   1927         movdqa  xmm11, xmm1
   1928         pslld   xmm1, 20
   1929         psrld   xmm11, 12
   1930         por     xmm1, xmm11
   1931         paddd   xmm0, xmm5
   1932         paddd   xmm0, xmm1
   1933         pxor    xmm3, xmm0
   1934         pshufb  xmm3, xmm14
   1935         paddd   xmm2, xmm3
   1936         pxor    xmm1, xmm2
   1937         movdqa  xmm11, xmm1
   1938         pslld   xmm1, 25
   1939         psrld   xmm11, 7
   1940         por     xmm1, xmm11
   1941         pshufd  xmm0, xmm0, 0x93
   1942         pshufd  xmm3, xmm3, 0x4E
   1943         pshufd  xmm2, xmm2, 0x39
   1944         paddd   xmm0, xmm6
   1945         paddd   xmm0, xmm1
   1946         pxor    xmm3, xmm0
   1947         pshufb  xmm3, xmm15
   1948         paddd   xmm2, xmm3
   1949         pxor    xmm1, xmm2
   1950         movdqa  xmm11, xmm1
   1951         pslld   xmm1, 20
   1952         psrld   xmm11, 12
   1953         por     xmm1, xmm11
   1954         paddd   xmm0, xmm7
   1955         paddd   xmm0, xmm1
   1956         pxor    xmm3, xmm0
   1957         pshufb  xmm3, xmm14
   1958         paddd   xmm2, xmm3
   1959         pxor    xmm1, xmm2
   1960         movdqa  xmm11, xmm1
   1961         pslld   xmm1, 25
   1962         psrld   xmm11, 7
   1963         por     xmm1, xmm11
   1964         pshufd  xmm0, xmm0, 0x39
   1965         pshufd  xmm3, xmm3, 0x4E
   1966         pshufd  xmm2, xmm2, 0x93
   1967         dec     al
   1968         jz      9f
   1969         movdqa  xmm8, xmm4
   1970         shufps  xmm8, xmm5, 214
   1971         pshufd  xmm9, xmm4, 0x0F
   1972         pshufd  xmm4, xmm8, 0x39
   1973         movdqa  xmm8, xmm6
   1974         shufps  xmm8, xmm7, 250
   1975         pblendw xmm9, xmm8, 0xCC
   1976         movdqa  xmm8, xmm7
   1977         punpcklqdq xmm8, xmm5
   1978         pblendw xmm8, xmm6, 0xC0
   1979         pshufd  xmm8, xmm8, 0x78
   1980         punpckhdq xmm5, xmm7
   1981         punpckldq xmm6, xmm5
   1982         pshufd  xmm7, xmm6, 0x1E
   1983         movdqa  xmm5, xmm9
   1984         movdqa  xmm6, xmm8
   1985         jmp     9b
   1986 9:
   1987         movdqu  xmm4, xmmword ptr [rdi]
   1988         movdqu  xmm5, xmmword ptr [rdi+0x10]
   1989         pxor    xmm0, xmm2
   1990         pxor    xmm1, xmm3
   1991         pxor    xmm2, xmm4
   1992         pxor    xmm3, xmm5
   1993         movups  xmmword ptr [r9], xmm0
   1994         movups  xmmword ptr [r9+0x10], xmm1
   1995         movups  xmmword ptr [r9+0x20], xmm2
   1996         movups  xmmword ptr [r9+0x30], xmm3
   1997         ret
   1998 
   1999 
   2000 #ifdef __APPLE__
   2001 .static_data
   2002 #else
   2003 .section .rodata
   2004 #endif
   2005 .p2align  6
   2006 BLAKE3_IV:
   2007         .long  0x6A09E667, 0xBB67AE85
   2008         .long  0x3C6EF372, 0xA54FF53A
   2009 ROT16:
   2010         .byte  2, 3, 0, 1, 6, 7, 4, 5, 10, 11, 8, 9, 14, 15, 12, 13
   2011 ROT8:
   2012         .byte  1, 2, 3, 0, 5, 6, 7, 4, 9, 10, 11, 8, 13, 14, 15, 12
   2013 ADD0:	
   2014         .long  0, 1, 2, 3
   2015 ADD1:
   2016 	.long  4, 4, 4, 4
   2017 BLAKE3_IV_0:
   2018 	.long  0x6A09E667, 0x6A09E667, 0x6A09E667, 0x6A09E667
   2019 BLAKE3_IV_1:
   2020 	.long  0xBB67AE85, 0xBB67AE85, 0xBB67AE85, 0xBB67AE85
   2021 BLAKE3_IV_2:
   2022 	.long  0x3C6EF372, 0x3C6EF372, 0x3C6EF372, 0x3C6EF372
   2023 BLAKE3_IV_3:
   2024 	.long  0xA54FF53A, 0xA54FF53A, 0xA54FF53A, 0xA54FF53A
   2025 BLAKE3_BLOCK_LEN:
   2026 	.long  64, 64, 64, 64
   2027 CMP_MSB_MASK:
   2028 	.long  0x80000000, 0x80000000, 0x80000000, 0x80000000