chibipub

experimental activitypub node in C
git clone git://jb55.com/chibipub
Log | Files | Refs | README | LICENSE

blake3_sse41_x86-64_windows_gnu.S (63460B)


      1 .intel_syntax noprefix
      2 .global blake3_hash_many_sse41
      3 .global _blake3_hash_many_sse41
      4 .global blake3_compress_in_place_sse41
      5 .global _blake3_compress_in_place_sse41
      6 .global blake3_compress_xof_sse41
      7 .global _blake3_compress_xof_sse41
      8 .section .text
      9         .p2align  6
     10 _blake3_hash_many_sse41:
     11 blake3_hash_many_sse41:
     12         push    r15
     13         push    r14
     14         push    r13
     15         push    r12
     16         push    rsi
     17         push    rdi
     18         push    rbx
     19         push    rbp
     20         mov     rbp, rsp
     21         sub     rsp, 528
     22         and     rsp, 0xFFFFFFFFFFFFFFC0
     23         movdqa  xmmword ptr [rsp+0x170], xmm6
     24         movdqa  xmmword ptr [rsp+0x180], xmm7
     25         movdqa  xmmword ptr [rsp+0x190], xmm8
     26         movdqa  xmmword ptr [rsp+0x1A0], xmm9
     27         movdqa  xmmword ptr [rsp+0x1B0], xmm10
     28         movdqa  xmmword ptr [rsp+0x1C0], xmm11
     29         movdqa  xmmword ptr [rsp+0x1D0], xmm12
     30         movdqa  xmmword ptr [rsp+0x1E0], xmm13
     31         movdqa  xmmword ptr [rsp+0x1F0], xmm14
     32         movdqa  xmmword ptr [rsp+0x200], xmm15
     33         mov     rdi, rcx
     34         mov     rsi, rdx
     35         mov     rdx, r8
     36         mov     rcx, r9
     37         mov     r8, qword ptr [rbp+0x68]
     38         movzx   r9, byte ptr [rbp+0x70]
     39         neg     r9d
     40         movd    xmm0, r9d
     41         pshufd  xmm0, xmm0, 0x00
     42         movdqa  xmmword ptr [rsp+0x130], xmm0
     43         movdqa  xmm1, xmm0
     44         pand    xmm1, xmmword ptr [ADD0+rip]
     45         pand    xmm0, xmmword ptr [ADD1+rip]
     46         movdqa  xmmword ptr [rsp+0x150], xmm0
     47         movd    xmm0, r8d
     48         pshufd  xmm0, xmm0, 0x00
     49         paddd   xmm0, xmm1
     50         movdqa  xmmword ptr [rsp+0x110], xmm0
     51         pxor    xmm0, xmmword ptr [CMP_MSB_MASK+rip]
     52         pxor    xmm1, xmmword ptr [CMP_MSB_MASK+rip]
     53         pcmpgtd xmm1, xmm0
     54         shr     r8, 32
     55         movd    xmm2, r8d
     56         pshufd  xmm2, xmm2, 0x00
     57         psubd   xmm2, xmm1
     58         movdqa  xmmword ptr [rsp+0x120], xmm2
     59         mov     rbx, qword ptr [rbp+0x90]
     60         mov     r15, rdx
     61         shl     r15, 6
     62         movzx   r13d, byte ptr [rbp+0x78]
     63         movzx   r12d, byte ptr [rbp+0x88]
     64         cmp     rsi, 4
     65         jc      3f
     66 2:
     67         movdqu  xmm3, xmmword ptr [rcx]
     68         pshufd  xmm0, xmm3, 0x00
     69         pshufd  xmm1, xmm3, 0x55
     70         pshufd  xmm2, xmm3, 0xAA
     71         pshufd  xmm3, xmm3, 0xFF
     72         movdqu  xmm7, xmmword ptr [rcx+0x10]
     73         pshufd  xmm4, xmm7, 0x00
     74         pshufd  xmm5, xmm7, 0x55
     75         pshufd  xmm6, xmm7, 0xAA
     76         pshufd  xmm7, xmm7, 0xFF
     77         mov     r8, qword ptr [rdi]
     78         mov     r9, qword ptr [rdi+0x8]
     79         mov     r10, qword ptr [rdi+0x10]
     80         mov     r11, qword ptr [rdi+0x18]
     81         movzx   eax, byte ptr [rbp+0x80]
     82         or      eax, r13d
     83         xor     edx, edx
     84 9:
     85         mov     r14d, eax
     86         or      eax, r12d
     87         add     rdx, 64
     88         cmp     rdx, r15
     89         cmovne  eax, r14d
     90         movdqu  xmm8, xmmword ptr [r8+rdx-0x40]
     91         movdqu  xmm9, xmmword ptr [r9+rdx-0x40]
     92         movdqu  xmm10, xmmword ptr [r10+rdx-0x40]
     93         movdqu  xmm11, xmmword ptr [r11+rdx-0x40]
     94         movdqa  xmm12, xmm8
     95         punpckldq xmm8, xmm9
     96         punpckhdq xmm12, xmm9
     97         movdqa  xmm14, xmm10
     98         punpckldq xmm10, xmm11
     99         punpckhdq xmm14, xmm11
    100         movdqa  xmm9, xmm8
    101         punpcklqdq xmm8, xmm10
    102         punpckhqdq xmm9, xmm10
    103         movdqa  xmm13, xmm12
    104         punpcklqdq xmm12, xmm14
    105         punpckhqdq xmm13, xmm14
    106         movdqa  xmmword ptr [rsp], xmm8
    107         movdqa  xmmword ptr [rsp+0x10], xmm9
    108         movdqa  xmmword ptr [rsp+0x20], xmm12
    109         movdqa  xmmword ptr [rsp+0x30], xmm13
    110         movdqu  xmm8, xmmword ptr [r8+rdx-0x30]
    111         movdqu  xmm9, xmmword ptr [r9+rdx-0x30]
    112         movdqu  xmm10, xmmword ptr [r10+rdx-0x30]
    113         movdqu  xmm11, xmmword ptr [r11+rdx-0x30]
    114         movdqa  xmm12, xmm8
    115         punpckldq xmm8, xmm9
    116         punpckhdq xmm12, xmm9
    117         movdqa  xmm14, xmm10
    118         punpckldq xmm10, xmm11
    119         punpckhdq xmm14, xmm11
    120         movdqa  xmm9, xmm8
    121         punpcklqdq xmm8, xmm10
    122         punpckhqdq xmm9, xmm10
    123         movdqa  xmm13, xmm12
    124         punpcklqdq xmm12, xmm14
    125         punpckhqdq xmm13, xmm14
    126         movdqa  xmmword ptr [rsp+0x40], xmm8
    127         movdqa  xmmword ptr [rsp+0x50], xmm9
    128         movdqa  xmmword ptr [rsp+0x60], xmm12
    129         movdqa  xmmword ptr [rsp+0x70], xmm13
    130         movdqu  xmm8, xmmword ptr [r8+rdx-0x20]
    131         movdqu  xmm9, xmmword ptr [r9+rdx-0x20]
    132         movdqu  xmm10, xmmword ptr [r10+rdx-0x20]
    133         movdqu  xmm11, xmmword ptr [r11+rdx-0x20]
    134         movdqa  xmm12, xmm8
    135         punpckldq xmm8, xmm9
    136         punpckhdq xmm12, xmm9
    137         movdqa  xmm14, xmm10
    138         punpckldq xmm10, xmm11
    139         punpckhdq xmm14, xmm11
    140         movdqa  xmm9, xmm8
    141         punpcklqdq xmm8, xmm10
    142         punpckhqdq xmm9, xmm10
    143         movdqa  xmm13, xmm12
    144         punpcklqdq xmm12, xmm14
    145         punpckhqdq xmm13, xmm14
    146         movdqa  xmmword ptr [rsp+0x80], xmm8
    147         movdqa  xmmword ptr [rsp+0x90], xmm9
    148         movdqa  xmmword ptr [rsp+0xA0], xmm12
    149         movdqa  xmmword ptr [rsp+0xB0], xmm13
    150         movdqu  xmm8, xmmword ptr [r8+rdx-0x10]
    151         movdqu  xmm9, xmmword ptr [r9+rdx-0x10]
    152         movdqu  xmm10, xmmword ptr [r10+rdx-0x10]
    153         movdqu  xmm11, xmmword ptr [r11+rdx-0x10]
    154         movdqa  xmm12, xmm8
    155         punpckldq xmm8, xmm9
    156         punpckhdq xmm12, xmm9
    157         movdqa  xmm14, xmm10
    158         punpckldq xmm10, xmm11
    159         punpckhdq xmm14, xmm11
    160         movdqa  xmm9, xmm8
    161         punpcklqdq xmm8, xmm10
    162         punpckhqdq xmm9, xmm10
    163         movdqa  xmm13, xmm12
    164         punpcklqdq xmm12, xmm14
    165         punpckhqdq xmm13, xmm14
    166         movdqa  xmmword ptr [rsp+0xC0], xmm8
    167         movdqa  xmmword ptr [rsp+0xD0], xmm9
    168         movdqa  xmmword ptr [rsp+0xE0], xmm12
    169         movdqa  xmmword ptr [rsp+0xF0], xmm13
    170         movdqa  xmm9, xmmword ptr [BLAKE3_IV_1+rip]
    171         movdqa  xmm10, xmmword ptr [BLAKE3_IV_2+rip]
    172         movdqa  xmm11, xmmword ptr [BLAKE3_IV_3+rip]
    173         movdqa  xmm12, xmmword ptr [rsp+0x110]
    174         movdqa  xmm13, xmmword ptr [rsp+0x120]
    175         movdqa  xmm14, xmmword ptr [BLAKE3_BLOCK_LEN+rip]
    176         movd    xmm15, eax
    177         pshufd  xmm15, xmm15, 0x00
    178         prefetcht0 [r8+rdx+0x80]
    179         prefetcht0 [r9+rdx+0x80]
    180         prefetcht0 [r10+rdx+0x80]
    181         prefetcht0 [r11+rdx+0x80]
    182         paddd   xmm0, xmmword ptr [rsp]
    183         paddd   xmm1, xmmword ptr [rsp+0x20]
    184         paddd   xmm2, xmmword ptr [rsp+0x40]
    185         paddd   xmm3, xmmword ptr [rsp+0x60]
    186         paddd   xmm0, xmm4
    187         paddd   xmm1, xmm5
    188         paddd   xmm2, xmm6
    189         paddd   xmm3, xmm7
    190         pxor    xmm12, xmm0
    191         pxor    xmm13, xmm1
    192         pxor    xmm14, xmm2
    193         pxor    xmm15, xmm3
    194         movdqa  xmm8, xmmword ptr [ROT16+rip]
    195         pshufb  xmm12, xmm8
    196         pshufb  xmm13, xmm8
    197         pshufb  xmm14, xmm8
    198         pshufb  xmm15, xmm8
    199         movdqa  xmm8, xmmword ptr [BLAKE3_IV_0+rip]
    200         paddd   xmm8, xmm12
    201         paddd   xmm9, xmm13
    202         paddd   xmm10, xmm14
    203         paddd   xmm11, xmm15
    204         pxor    xmm4, xmm8
    205         pxor    xmm5, xmm9
    206         pxor    xmm6, xmm10
    207         pxor    xmm7, xmm11
    208         movdqa  xmmword ptr [rsp+0x100], xmm8
    209         movdqa  xmm8, xmm4
    210         psrld   xmm8, 12
    211         pslld   xmm4, 20
    212         por     xmm4, xmm8
    213         movdqa  xmm8, xmm5
    214         psrld   xmm8, 12
    215         pslld   xmm5, 20
    216         por     xmm5, xmm8
    217         movdqa  xmm8, xmm6
    218         psrld   xmm8, 12
    219         pslld   xmm6, 20
    220         por     xmm6, xmm8
    221         movdqa  xmm8, xmm7
    222         psrld   xmm8, 12
    223         pslld   xmm7, 20
    224         por     xmm7, xmm8
    225         paddd   xmm0, xmmword ptr [rsp+0x10]
    226         paddd   xmm1, xmmword ptr [rsp+0x30]
    227         paddd   xmm2, xmmword ptr [rsp+0x50]
    228         paddd   xmm3, xmmword ptr [rsp+0x70]
    229         paddd   xmm0, xmm4
    230         paddd   xmm1, xmm5
    231         paddd   xmm2, xmm6
    232         paddd   xmm3, xmm7
    233         pxor    xmm12, xmm0
    234         pxor    xmm13, xmm1
    235         pxor    xmm14, xmm2
    236         pxor    xmm15, xmm3
    237         movdqa  xmm8, xmmword ptr [ROT8+rip]
    238         pshufb  xmm12, xmm8
    239         pshufb  xmm13, xmm8
    240         pshufb  xmm14, xmm8
    241         pshufb  xmm15, xmm8
    242         movdqa  xmm8, xmmword ptr [rsp+0x100]
    243         paddd   xmm8, xmm12
    244         paddd   xmm9, xmm13
    245         paddd   xmm10, xmm14
    246         paddd   xmm11, xmm15
    247         pxor    xmm4, xmm8
    248         pxor    xmm5, xmm9
    249         pxor    xmm6, xmm10
    250         pxor    xmm7, xmm11
    251         movdqa  xmmword ptr [rsp+0x100], xmm8
    252         movdqa  xmm8, xmm4
    253         psrld   xmm8, 7
    254         pslld   xmm4, 25
    255         por     xmm4, xmm8
    256         movdqa  xmm8, xmm5
    257         psrld   xmm8, 7
    258         pslld   xmm5, 25
    259         por     xmm5, xmm8
    260         movdqa  xmm8, xmm6
    261         psrld   xmm8, 7
    262         pslld   xmm6, 25
    263         por     xmm6, xmm8
    264         movdqa  xmm8, xmm7
    265         psrld   xmm8, 7
    266         pslld   xmm7, 25
    267         por     xmm7, xmm8
    268         paddd   xmm0, xmmword ptr [rsp+0x80]
    269         paddd   xmm1, xmmword ptr [rsp+0xA0]
    270         paddd   xmm2, xmmword ptr [rsp+0xC0]
    271         paddd   xmm3, xmmword ptr [rsp+0xE0]
    272         paddd   xmm0, xmm5
    273         paddd   xmm1, xmm6
    274         paddd   xmm2, xmm7
    275         paddd   xmm3, xmm4
    276         pxor    xmm15, xmm0
    277         pxor    xmm12, xmm1
    278         pxor    xmm13, xmm2
    279         pxor    xmm14, xmm3
    280         movdqa  xmm8, xmmword ptr [ROT16+rip]
    281         pshufb  xmm15, xmm8
    282         pshufb  xmm12, xmm8
    283         pshufb  xmm13, xmm8
    284         pshufb  xmm14, xmm8
    285         paddd   xmm10, xmm15
    286         paddd   xmm11, xmm12
    287         movdqa  xmm8, xmmword ptr [rsp+0x100]
    288         paddd   xmm8, xmm13
    289         paddd   xmm9, xmm14
    290         pxor    xmm5, xmm10
    291         pxor    xmm6, xmm11
    292         pxor    xmm7, xmm8
    293         pxor    xmm4, xmm9
    294         movdqa  xmmword ptr [rsp+0x100], xmm8
    295         movdqa  xmm8, xmm5
    296         psrld   xmm8, 12
    297         pslld   xmm5, 20
    298         por     xmm5, xmm8
    299         movdqa  xmm8, xmm6
    300         psrld   xmm8, 12
    301         pslld   xmm6, 20
    302         por     xmm6, xmm8
    303         movdqa  xmm8, xmm7
    304         psrld   xmm8, 12
    305         pslld   xmm7, 20
    306         por     xmm7, xmm8
    307         movdqa  xmm8, xmm4
    308         psrld   xmm8, 12
    309         pslld   xmm4, 20
    310         por     xmm4, xmm8
    311         paddd   xmm0, xmmword ptr [rsp+0x90]
    312         paddd   xmm1, xmmword ptr [rsp+0xB0]
    313         paddd   xmm2, xmmword ptr [rsp+0xD0]
    314         paddd   xmm3, xmmword ptr [rsp+0xF0]
    315         paddd   xmm0, xmm5
    316         paddd   xmm1, xmm6
    317         paddd   xmm2, xmm7
    318         paddd   xmm3, xmm4
    319         pxor    xmm15, xmm0
    320         pxor    xmm12, xmm1
    321         pxor    xmm13, xmm2
    322         pxor    xmm14, xmm3
    323         movdqa  xmm8, xmmword ptr [ROT8+rip]
    324         pshufb  xmm15, xmm8
    325         pshufb  xmm12, xmm8
    326         pshufb  xmm13, xmm8
    327         pshufb  xmm14, xmm8
    328         paddd   xmm10, xmm15
    329         paddd   xmm11, xmm12
    330         movdqa  xmm8, xmmword ptr [rsp+0x100]
    331         paddd   xmm8, xmm13
    332         paddd   xmm9, xmm14
    333         pxor    xmm5, xmm10
    334         pxor    xmm6, xmm11
    335         pxor    xmm7, xmm8
    336         pxor    xmm4, xmm9
    337         movdqa  xmmword ptr [rsp+0x100], xmm8
    338         movdqa  xmm8, xmm5
    339         psrld   xmm8, 7
    340         pslld   xmm5, 25
    341         por     xmm5, xmm8
    342         movdqa  xmm8, xmm6
    343         psrld   xmm8, 7
    344         pslld   xmm6, 25
    345         por     xmm6, xmm8
    346         movdqa  xmm8, xmm7
    347         psrld   xmm8, 7
    348         pslld   xmm7, 25
    349         por     xmm7, xmm8
    350         movdqa  xmm8, xmm4
    351         psrld   xmm8, 7
    352         pslld   xmm4, 25
    353         por     xmm4, xmm8
    354         paddd   xmm0, xmmword ptr [rsp+0x20]
    355         paddd   xmm1, xmmword ptr [rsp+0x30]
    356         paddd   xmm2, xmmword ptr [rsp+0x70]
    357         paddd   xmm3, xmmword ptr [rsp+0x40]
    358         paddd   xmm0, xmm4
    359         paddd   xmm1, xmm5
    360         paddd   xmm2, xmm6
    361         paddd   xmm3, xmm7
    362         pxor    xmm12, xmm0
    363         pxor    xmm13, xmm1
    364         pxor    xmm14, xmm2
    365         pxor    xmm15, xmm3
    366         movdqa  xmm8, xmmword ptr [ROT16+rip]
    367         pshufb  xmm12, xmm8
    368         pshufb  xmm13, xmm8
    369         pshufb  xmm14, xmm8
    370         pshufb  xmm15, xmm8
    371         movdqa  xmm8, xmmword ptr [rsp+0x100]
    372         paddd   xmm8, xmm12
    373         paddd   xmm9, xmm13
    374         paddd   xmm10, xmm14
    375         paddd   xmm11, xmm15
    376         pxor    xmm4, xmm8
    377         pxor    xmm5, xmm9
    378         pxor    xmm6, xmm10
    379         pxor    xmm7, xmm11
    380         movdqa  xmmword ptr [rsp+0x100], xmm8
    381         movdqa  xmm8, xmm4
    382         psrld   xmm8, 12
    383         pslld   xmm4, 20
    384         por     xmm4, xmm8
    385         movdqa  xmm8, xmm5
    386         psrld   xmm8, 12
    387         pslld   xmm5, 20
    388         por     xmm5, xmm8
    389         movdqa  xmm8, xmm6
    390         psrld   xmm8, 12
    391         pslld   xmm6, 20
    392         por     xmm6, xmm8
    393         movdqa  xmm8, xmm7
    394         psrld   xmm8, 12
    395         pslld   xmm7, 20
    396         por     xmm7, xmm8
    397         paddd   xmm0, xmmword ptr [rsp+0x60]
    398         paddd   xmm1, xmmword ptr [rsp+0xA0]
    399         paddd   xmm2, xmmword ptr [rsp]
    400         paddd   xmm3, xmmword ptr [rsp+0xD0]
    401         paddd   xmm0, xmm4
    402         paddd   xmm1, xmm5
    403         paddd   xmm2, xmm6
    404         paddd   xmm3, xmm7
    405         pxor    xmm12, xmm0
    406         pxor    xmm13, xmm1
    407         pxor    xmm14, xmm2
    408         pxor    xmm15, xmm3
    409         movdqa  xmm8, xmmword ptr [ROT8+rip]
    410         pshufb  xmm12, xmm8
    411         pshufb  xmm13, xmm8
    412         pshufb  xmm14, xmm8
    413         pshufb  xmm15, xmm8
    414         movdqa  xmm8, xmmword ptr [rsp+0x100]
    415         paddd   xmm8, xmm12
    416         paddd   xmm9, xmm13
    417         paddd   xmm10, xmm14
    418         paddd   xmm11, xmm15
    419         pxor    xmm4, xmm8
    420         pxor    xmm5, xmm9
    421         pxor    xmm6, xmm10
    422         pxor    xmm7, xmm11
    423         movdqa  xmmword ptr [rsp+0x100], xmm8
    424         movdqa  xmm8, xmm4
    425         psrld   xmm8, 7
    426         pslld   xmm4, 25
    427         por     xmm4, xmm8
    428         movdqa  xmm8, xmm5
    429         psrld   xmm8, 7
    430         pslld   xmm5, 25
    431         por     xmm5, xmm8
    432         movdqa  xmm8, xmm6
    433         psrld   xmm8, 7
    434         pslld   xmm6, 25
    435         por     xmm6, xmm8
    436         movdqa  xmm8, xmm7
    437         psrld   xmm8, 7
    438         pslld   xmm7, 25
    439         por     xmm7, xmm8
    440         paddd   xmm0, xmmword ptr [rsp+0x10]
    441         paddd   xmm1, xmmword ptr [rsp+0xC0]
    442         paddd   xmm2, xmmword ptr [rsp+0x90]
    443         paddd   xmm3, xmmword ptr [rsp+0xF0]
    444         paddd   xmm0, xmm5
    445         paddd   xmm1, xmm6
    446         paddd   xmm2, xmm7
    447         paddd   xmm3, xmm4
    448         pxor    xmm15, xmm0
    449         pxor    xmm12, xmm1
    450         pxor    xmm13, xmm2
    451         pxor    xmm14, xmm3
    452         movdqa  xmm8, xmmword ptr [ROT16+rip]
    453         pshufb  xmm15, xmm8
    454         pshufb  xmm12, xmm8
    455         pshufb  xmm13, xmm8
    456         pshufb  xmm14, xmm8
    457         paddd   xmm10, xmm15
    458         paddd   xmm11, xmm12
    459         movdqa  xmm8, xmmword ptr [rsp+0x100]
    460         paddd   xmm8, xmm13
    461         paddd   xmm9, xmm14
    462         pxor    xmm5, xmm10
    463         pxor    xmm6, xmm11
    464         pxor    xmm7, xmm8
    465         pxor    xmm4, xmm9
    466         movdqa  xmmword ptr [rsp+0x100], xmm8
    467         movdqa  xmm8, xmm5
    468         psrld   xmm8, 12
    469         pslld   xmm5, 20
    470         por     xmm5, xmm8
    471         movdqa  xmm8, xmm6
    472         psrld   xmm8, 12
    473         pslld   xmm6, 20
    474         por     xmm6, xmm8
    475         movdqa  xmm8, xmm7
    476         psrld   xmm8, 12
    477         pslld   xmm7, 20
    478         por     xmm7, xmm8
    479         movdqa  xmm8, xmm4
    480         psrld   xmm8, 12
    481         pslld   xmm4, 20
    482         por     xmm4, xmm8
    483         paddd   xmm0, xmmword ptr [rsp+0xB0]
    484         paddd   xmm1, xmmword ptr [rsp+0x50]
    485         paddd   xmm2, xmmword ptr [rsp+0xE0]
    486         paddd   xmm3, xmmword ptr [rsp+0x80]
    487         paddd   xmm0, xmm5
    488         paddd   xmm1, xmm6
    489         paddd   xmm2, xmm7
    490         paddd   xmm3, xmm4
    491         pxor    xmm15, xmm0
    492         pxor    xmm12, xmm1
    493         pxor    xmm13, xmm2
    494         pxor    xmm14, xmm3
    495         movdqa  xmm8, xmmword ptr [ROT8+rip]
    496         pshufb  xmm15, xmm8
    497         pshufb  xmm12, xmm8
    498         pshufb  xmm13, xmm8
    499         pshufb  xmm14, xmm8
    500         paddd   xmm10, xmm15
    501         paddd   xmm11, xmm12
    502         movdqa  xmm8, xmmword ptr [rsp+0x100]
    503         paddd   xmm8, xmm13
    504         paddd   xmm9, xmm14
    505         pxor    xmm5, xmm10
    506         pxor    xmm6, xmm11
    507         pxor    xmm7, xmm8
    508         pxor    xmm4, xmm9
    509         movdqa  xmmword ptr [rsp+0x100], xmm8
    510         movdqa  xmm8, xmm5
    511         psrld   xmm8, 7
    512         pslld   xmm5, 25
    513         por     xmm5, xmm8
    514         movdqa  xmm8, xmm6
    515         psrld   xmm8, 7
    516         pslld   xmm6, 25
    517         por     xmm6, xmm8
    518         movdqa  xmm8, xmm7
    519         psrld   xmm8, 7
    520         pslld   xmm7, 25
    521         por     xmm7, xmm8
    522         movdqa  xmm8, xmm4
    523         psrld   xmm8, 7
    524         pslld   xmm4, 25
    525         por     xmm4, xmm8
    526         paddd   xmm0, xmmword ptr [rsp+0x30]
    527         paddd   xmm1, xmmword ptr [rsp+0xA0]
    528         paddd   xmm2, xmmword ptr [rsp+0xD0]
    529         paddd   xmm3, xmmword ptr [rsp+0x70]
    530         paddd   xmm0, xmm4
    531         paddd   xmm1, xmm5
    532         paddd   xmm2, xmm6
    533         paddd   xmm3, xmm7
    534         pxor    xmm12, xmm0
    535         pxor    xmm13, xmm1
    536         pxor    xmm14, xmm2
    537         pxor    xmm15, xmm3
    538         movdqa  xmm8, xmmword ptr [ROT16+rip]
    539         pshufb  xmm12, xmm8
    540         pshufb  xmm13, xmm8
    541         pshufb  xmm14, xmm8
    542         pshufb  xmm15, xmm8
    543         movdqa  xmm8, xmmword ptr [rsp+0x100]
    544         paddd   xmm8, xmm12
    545         paddd   xmm9, xmm13
    546         paddd   xmm10, xmm14
    547         paddd   xmm11, xmm15
    548         pxor    xmm4, xmm8
    549         pxor    xmm5, xmm9
    550         pxor    xmm6, xmm10
    551         pxor    xmm7, xmm11
    552         movdqa  xmmword ptr [rsp+0x100], xmm8
    553         movdqa  xmm8, xmm4
    554         psrld   xmm8, 12
    555         pslld   xmm4, 20
    556         por     xmm4, xmm8
    557         movdqa  xmm8, xmm5
    558         psrld   xmm8, 12
    559         pslld   xmm5, 20
    560         por     xmm5, xmm8
    561         movdqa  xmm8, xmm6
    562         psrld   xmm8, 12
    563         pslld   xmm6, 20
    564         por     xmm6, xmm8
    565         movdqa  xmm8, xmm7
    566         psrld   xmm8, 12
    567         pslld   xmm7, 20
    568         por     xmm7, xmm8
    569         paddd   xmm0, xmmword ptr [rsp+0x40]
    570         paddd   xmm1, xmmword ptr [rsp+0xC0]
    571         paddd   xmm2, xmmword ptr [rsp+0x20]
    572         paddd   xmm3, xmmword ptr [rsp+0xE0]
    573         paddd   xmm0, xmm4
    574         paddd   xmm1, xmm5
    575         paddd   xmm2, xmm6
    576         paddd   xmm3, xmm7
    577         pxor    xmm12, xmm0
    578         pxor    xmm13, xmm1
    579         pxor    xmm14, xmm2
    580         pxor    xmm15, xmm3
    581         movdqa  xmm8, xmmword ptr [ROT8+rip]
    582         pshufb  xmm12, xmm8
    583         pshufb  xmm13, xmm8
    584         pshufb  xmm14, xmm8
    585         pshufb  xmm15, xmm8
    586         movdqa  xmm8, xmmword ptr [rsp+0x100]
    587         paddd   xmm8, xmm12
    588         paddd   xmm9, xmm13
    589         paddd   xmm10, xmm14
    590         paddd   xmm11, xmm15
    591         pxor    xmm4, xmm8
    592         pxor    xmm5, xmm9
    593         pxor    xmm6, xmm10
    594         pxor    xmm7, xmm11
    595         movdqa  xmmword ptr [rsp+0x100], xmm8
    596         movdqa  xmm8, xmm4
    597         psrld   xmm8, 7
    598         pslld   xmm4, 25
    599         por     xmm4, xmm8
    600         movdqa  xmm8, xmm5
    601         psrld   xmm8, 7
    602         pslld   xmm5, 25
    603         por     xmm5, xmm8
    604         movdqa  xmm8, xmm6
    605         psrld   xmm8, 7
    606         pslld   xmm6, 25
    607         por     xmm6, xmm8
    608         movdqa  xmm8, xmm7
    609         psrld   xmm8, 7
    610         pslld   xmm7, 25
    611         por     xmm7, xmm8
    612         paddd   xmm0, xmmword ptr [rsp+0x60]
    613         paddd   xmm1, xmmword ptr [rsp+0x90]
    614         paddd   xmm2, xmmword ptr [rsp+0xB0]
    615         paddd   xmm3, xmmword ptr [rsp+0x80]
    616         paddd   xmm0, xmm5
    617         paddd   xmm1, xmm6
    618         paddd   xmm2, xmm7
    619         paddd   xmm3, xmm4
    620         pxor    xmm15, xmm0
    621         pxor    xmm12, xmm1
    622         pxor    xmm13, xmm2
    623         pxor    xmm14, xmm3
    624         movdqa  xmm8, xmmword ptr [ROT16+rip]
    625         pshufb  xmm15, xmm8
    626         pshufb  xmm12, xmm8
    627         pshufb  xmm13, xmm8
    628         pshufb  xmm14, xmm8
    629         paddd   xmm10, xmm15
    630         paddd   xmm11, xmm12
    631         movdqa  xmm8, xmmword ptr [rsp+0x100]
    632         paddd   xmm8, xmm13
    633         paddd   xmm9, xmm14
    634         pxor    xmm5, xmm10
    635         pxor    xmm6, xmm11
    636         pxor    xmm7, xmm8
    637         pxor    xmm4, xmm9
    638         movdqa  xmmword ptr [rsp+0x100], xmm8
    639         movdqa  xmm8, xmm5
    640         psrld   xmm8, 12
    641         pslld   xmm5, 20
    642         por     xmm5, xmm8
    643         movdqa  xmm8, xmm6
    644         psrld   xmm8, 12
    645         pslld   xmm6, 20
    646         por     xmm6, xmm8
    647         movdqa  xmm8, xmm7
    648         psrld   xmm8, 12
    649         pslld   xmm7, 20
    650         por     xmm7, xmm8
    651         movdqa  xmm8, xmm4
    652         psrld   xmm8, 12
    653         pslld   xmm4, 20
    654         por     xmm4, xmm8
    655         paddd   xmm0, xmmword ptr [rsp+0x50]
    656         paddd   xmm1, xmmword ptr [rsp]
    657         paddd   xmm2, xmmword ptr [rsp+0xF0]
    658         paddd   xmm3, xmmword ptr [rsp+0x10]
    659         paddd   xmm0, xmm5
    660         paddd   xmm1, xmm6
    661         paddd   xmm2, xmm7
    662         paddd   xmm3, xmm4
    663         pxor    xmm15, xmm0
    664         pxor    xmm12, xmm1
    665         pxor    xmm13, xmm2
    666         pxor    xmm14, xmm3
    667         movdqa  xmm8, xmmword ptr [ROT8+rip]
    668         pshufb  xmm15, xmm8
    669         pshufb  xmm12, xmm8
    670         pshufb  xmm13, xmm8
    671         pshufb  xmm14, xmm8
    672         paddd   xmm10, xmm15
    673         paddd   xmm11, xmm12
    674         movdqa  xmm8, xmmword ptr [rsp+0x100]
    675         paddd   xmm8, xmm13
    676         paddd   xmm9, xmm14
    677         pxor    xmm5, xmm10
    678         pxor    xmm6, xmm11
    679         pxor    xmm7, xmm8
    680         pxor    xmm4, xmm9
    681         movdqa  xmmword ptr [rsp+0x100], xmm8
    682         movdqa  xmm8, xmm5
    683         psrld   xmm8, 7
    684         pslld   xmm5, 25
    685         por     xmm5, xmm8
    686         movdqa  xmm8, xmm6
    687         psrld   xmm8, 7
    688         pslld   xmm6, 25
    689         por     xmm6, xmm8
    690         movdqa  xmm8, xmm7
    691         psrld   xmm8, 7
    692         pslld   xmm7, 25
    693         por     xmm7, xmm8
    694         movdqa  xmm8, xmm4
    695         psrld   xmm8, 7
    696         pslld   xmm4, 25
    697         por     xmm4, xmm8
    698         paddd   xmm0, xmmword ptr [rsp+0xA0]
    699         paddd   xmm1, xmmword ptr [rsp+0xC0]
    700         paddd   xmm2, xmmword ptr [rsp+0xE0]
    701         paddd   xmm3, xmmword ptr [rsp+0xD0]
    702         paddd   xmm0, xmm4
    703         paddd   xmm1, xmm5
    704         paddd   xmm2, xmm6
    705         paddd   xmm3, xmm7
    706         pxor    xmm12, xmm0
    707         pxor    xmm13, xmm1
    708         pxor    xmm14, xmm2
    709         pxor    xmm15, xmm3
    710         movdqa  xmm8, xmmword ptr [ROT16+rip]
    711         pshufb  xmm12, xmm8
    712         pshufb  xmm13, xmm8
    713         pshufb  xmm14, xmm8
    714         pshufb  xmm15, xmm8
    715         movdqa  xmm8, xmmword ptr [rsp+0x100]
    716         paddd   xmm8, xmm12
    717         paddd   xmm9, xmm13
    718         paddd   xmm10, xmm14
    719         paddd   xmm11, xmm15
    720         pxor    xmm4, xmm8
    721         pxor    xmm5, xmm9
    722         pxor    xmm6, xmm10
    723         pxor    xmm7, xmm11
    724         movdqa  xmmword ptr [rsp+0x100], xmm8
    725         movdqa  xmm8, xmm4
    726         psrld   xmm8, 12
    727         pslld   xmm4, 20
    728         por     xmm4, xmm8
    729         movdqa  xmm8, xmm5
    730         psrld   xmm8, 12
    731         pslld   xmm5, 20
    732         por     xmm5, xmm8
    733         movdqa  xmm8, xmm6
    734         psrld   xmm8, 12
    735         pslld   xmm6, 20
    736         por     xmm6, xmm8
    737         movdqa  xmm8, xmm7
    738         psrld   xmm8, 12
    739         pslld   xmm7, 20
    740         por     xmm7, xmm8
    741         paddd   xmm0, xmmword ptr [rsp+0x70]
    742         paddd   xmm1, xmmword ptr [rsp+0x90]
    743         paddd   xmm2, xmmword ptr [rsp+0x30]
    744         paddd   xmm3, xmmword ptr [rsp+0xF0]
    745         paddd   xmm0, xmm4
    746         paddd   xmm1, xmm5
    747         paddd   xmm2, xmm6
    748         paddd   xmm3, xmm7
    749         pxor    xmm12, xmm0
    750         pxor    xmm13, xmm1
    751         pxor    xmm14, xmm2
    752         pxor    xmm15, xmm3
    753         movdqa  xmm8, xmmword ptr [ROT8+rip]
    754         pshufb  xmm12, xmm8
    755         pshufb  xmm13, xmm8
    756         pshufb  xmm14, xmm8
    757         pshufb  xmm15, xmm8
    758         movdqa  xmm8, xmmword ptr [rsp+0x100]
    759         paddd   xmm8, xmm12
    760         paddd   xmm9, xmm13
    761         paddd   xmm10, xmm14
    762         paddd   xmm11, xmm15
    763         pxor    xmm4, xmm8
    764         pxor    xmm5, xmm9
    765         pxor    xmm6, xmm10
    766         pxor    xmm7, xmm11
    767         movdqa  xmmword ptr [rsp+0x100], xmm8
    768         movdqa  xmm8, xmm4
    769         psrld   xmm8, 7
    770         pslld   xmm4, 25
    771         por     xmm4, xmm8
    772         movdqa  xmm8, xmm5
    773         psrld   xmm8, 7
    774         pslld   xmm5, 25
    775         por     xmm5, xmm8
    776         movdqa  xmm8, xmm6
    777         psrld   xmm8, 7
    778         pslld   xmm6, 25
    779         por     xmm6, xmm8
    780         movdqa  xmm8, xmm7
    781         psrld   xmm8, 7
    782         pslld   xmm7, 25
    783         por     xmm7, xmm8
    784         paddd   xmm0, xmmword ptr [rsp+0x40]
    785         paddd   xmm1, xmmword ptr [rsp+0xB0]
    786         paddd   xmm2, xmmword ptr [rsp+0x50]
    787         paddd   xmm3, xmmword ptr [rsp+0x10]
    788         paddd   xmm0, xmm5
    789         paddd   xmm1, xmm6
    790         paddd   xmm2, xmm7
    791         paddd   xmm3, xmm4
    792         pxor    xmm15, xmm0
    793         pxor    xmm12, xmm1
    794         pxor    xmm13, xmm2
    795         pxor    xmm14, xmm3
    796         movdqa  xmm8, xmmword ptr [ROT16+rip]
    797         pshufb  xmm15, xmm8
    798         pshufb  xmm12, xmm8
    799         pshufb  xmm13, xmm8
    800         pshufb  xmm14, xmm8
    801         paddd   xmm10, xmm15
    802         paddd   xmm11, xmm12
    803         movdqa  xmm8, xmmword ptr [rsp+0x100]
    804         paddd   xmm8, xmm13
    805         paddd   xmm9, xmm14
    806         pxor    xmm5, xmm10
    807         pxor    xmm6, xmm11
    808         pxor    xmm7, xmm8
    809         pxor    xmm4, xmm9
    810         movdqa  xmmword ptr [rsp+0x100], xmm8
    811         movdqa  xmm8, xmm5
    812         psrld   xmm8, 12
    813         pslld   xmm5, 20
    814         por     xmm5, xmm8
    815         movdqa  xmm8, xmm6
    816         psrld   xmm8, 12
    817         pslld   xmm6, 20
    818         por     xmm6, xmm8
    819         movdqa  xmm8, xmm7
    820         psrld   xmm8, 12
    821         pslld   xmm7, 20
    822         por     xmm7, xmm8
    823         movdqa  xmm8, xmm4
    824         psrld   xmm8, 12
    825         pslld   xmm4, 20
    826         por     xmm4, xmm8
    827         paddd   xmm0, xmmword ptr [rsp]
    828         paddd   xmm1, xmmword ptr [rsp+0x20]
    829         paddd   xmm2, xmmword ptr [rsp+0x80]
    830         paddd   xmm3, xmmword ptr [rsp+0x60]
    831         paddd   xmm0, xmm5
    832         paddd   xmm1, xmm6
    833         paddd   xmm2, xmm7
    834         paddd   xmm3, xmm4
    835         pxor    xmm15, xmm0
    836         pxor    xmm12, xmm1
    837         pxor    xmm13, xmm2
    838         pxor    xmm14, xmm3
    839         movdqa  xmm8, xmmword ptr [ROT8+rip]
    840         pshufb  xmm15, xmm8
    841         pshufb  xmm12, xmm8
    842         pshufb  xmm13, xmm8
    843         pshufb  xmm14, xmm8
    844         paddd   xmm10, xmm15
    845         paddd   xmm11, xmm12
    846         movdqa  xmm8, xmmword ptr [rsp+0x100]
    847         paddd   xmm8, xmm13
    848         paddd   xmm9, xmm14
    849         pxor    xmm5, xmm10
    850         pxor    xmm6, xmm11
    851         pxor    xmm7, xmm8
    852         pxor    xmm4, xmm9
    853         movdqa  xmmword ptr [rsp+0x100], xmm8
    854         movdqa  xmm8, xmm5
    855         psrld   xmm8, 7
    856         pslld   xmm5, 25
    857         por     xmm5, xmm8
    858         movdqa  xmm8, xmm6
    859         psrld   xmm8, 7
    860         pslld   xmm6, 25
    861         por     xmm6, xmm8
    862         movdqa  xmm8, xmm7
    863         psrld   xmm8, 7
    864         pslld   xmm7, 25
    865         por     xmm7, xmm8
    866         movdqa  xmm8, xmm4
    867         psrld   xmm8, 7
    868         pslld   xmm4, 25
    869         por     xmm4, xmm8
    870         paddd   xmm0, xmmword ptr [rsp+0xC0]
    871         paddd   xmm1, xmmword ptr [rsp+0x90]
    872         paddd   xmm2, xmmword ptr [rsp+0xF0]
    873         paddd   xmm3, xmmword ptr [rsp+0xE0]
    874         paddd   xmm0, xmm4
    875         paddd   xmm1, xmm5
    876         paddd   xmm2, xmm6
    877         paddd   xmm3, xmm7
    878         pxor    xmm12, xmm0
    879         pxor    xmm13, xmm1
    880         pxor    xmm14, xmm2
    881         pxor    xmm15, xmm3
    882         movdqa  xmm8, xmmword ptr [ROT16+rip]
    883         pshufb  xmm12, xmm8
    884         pshufb  xmm13, xmm8
    885         pshufb  xmm14, xmm8
    886         pshufb  xmm15, xmm8
    887         movdqa  xmm8, xmmword ptr [rsp+0x100]
    888         paddd   xmm8, xmm12
    889         paddd   xmm9, xmm13
    890         paddd   xmm10, xmm14
    891         paddd   xmm11, xmm15
    892         pxor    xmm4, xmm8
    893         pxor    xmm5, xmm9
    894         pxor    xmm6, xmm10
    895         pxor    xmm7, xmm11
    896         movdqa  xmmword ptr [rsp+0x100], xmm8
    897         movdqa  xmm8, xmm4
    898         psrld   xmm8, 12
    899         pslld   xmm4, 20
    900         por     xmm4, xmm8
    901         movdqa  xmm8, xmm5
    902         psrld   xmm8, 12
    903         pslld   xmm5, 20
    904         por     xmm5, xmm8
    905         movdqa  xmm8, xmm6
    906         psrld   xmm8, 12
    907         pslld   xmm6, 20
    908         por     xmm6, xmm8
    909         movdqa  xmm8, xmm7
    910         psrld   xmm8, 12
    911         pslld   xmm7, 20
    912         por     xmm7, xmm8
    913         paddd   xmm0, xmmword ptr [rsp+0xD0]
    914         paddd   xmm1, xmmword ptr [rsp+0xB0]
    915         paddd   xmm2, xmmword ptr [rsp+0xA0]
    916         paddd   xmm3, xmmword ptr [rsp+0x80]
    917         paddd   xmm0, xmm4
    918         paddd   xmm1, xmm5
    919         paddd   xmm2, xmm6
    920         paddd   xmm3, xmm7
    921         pxor    xmm12, xmm0
    922         pxor    xmm13, xmm1
    923         pxor    xmm14, xmm2
    924         pxor    xmm15, xmm3
    925         movdqa  xmm8, xmmword ptr [ROT8+rip]
    926         pshufb  xmm12, xmm8
    927         pshufb  xmm13, xmm8
    928         pshufb  xmm14, xmm8
    929         pshufb  xmm15, xmm8
    930         movdqa  xmm8, xmmword ptr [rsp+0x100]
    931         paddd   xmm8, xmm12
    932         paddd   xmm9, xmm13
    933         paddd   xmm10, xmm14
    934         paddd   xmm11, xmm15
    935         pxor    xmm4, xmm8
    936         pxor    xmm5, xmm9
    937         pxor    xmm6, xmm10
    938         pxor    xmm7, xmm11
    939         movdqa  xmmword ptr [rsp+0x100], xmm8
    940         movdqa  xmm8, xmm4
    941         psrld   xmm8, 7
    942         pslld   xmm4, 25
    943         por     xmm4, xmm8
    944         movdqa  xmm8, xmm5
    945         psrld   xmm8, 7
    946         pslld   xmm5, 25
    947         por     xmm5, xmm8
    948         movdqa  xmm8, xmm6
    949         psrld   xmm8, 7
    950         pslld   xmm6, 25
    951         por     xmm6, xmm8
    952         movdqa  xmm8, xmm7
    953         psrld   xmm8, 7
    954         pslld   xmm7, 25
    955         por     xmm7, xmm8
    956         paddd   xmm0, xmmword ptr [rsp+0x70]
    957         paddd   xmm1, xmmword ptr [rsp+0x50]
    958         paddd   xmm2, xmmword ptr [rsp]
    959         paddd   xmm3, xmmword ptr [rsp+0x60]
    960         paddd   xmm0, xmm5
    961         paddd   xmm1, xmm6
    962         paddd   xmm2, xmm7
    963         paddd   xmm3, xmm4
    964         pxor    xmm15, xmm0
    965         pxor    xmm12, xmm1
    966         pxor    xmm13, xmm2
    967         pxor    xmm14, xmm3
    968         movdqa  xmm8, xmmword ptr [ROT16+rip]
    969         pshufb  xmm15, xmm8
    970         pshufb  xmm12, xmm8
    971         pshufb  xmm13, xmm8
    972         pshufb  xmm14, xmm8
    973         paddd   xmm10, xmm15
    974         paddd   xmm11, xmm12
    975         movdqa  xmm8, xmmword ptr [rsp+0x100]
    976         paddd   xmm8, xmm13
    977         paddd   xmm9, xmm14
    978         pxor    xmm5, xmm10
    979         pxor    xmm6, xmm11
    980         pxor    xmm7, xmm8
    981         pxor    xmm4, xmm9
    982         movdqa  xmmword ptr [rsp+0x100], xmm8
    983         movdqa  xmm8, xmm5
    984         psrld   xmm8, 12
    985         pslld   xmm5, 20
    986         por     xmm5, xmm8
    987         movdqa  xmm8, xmm6
    988         psrld   xmm8, 12
    989         pslld   xmm6, 20
    990         por     xmm6, xmm8
    991         movdqa  xmm8, xmm7
    992         psrld   xmm8, 12
    993         pslld   xmm7, 20
    994         por     xmm7, xmm8
    995         movdqa  xmm8, xmm4
    996         psrld   xmm8, 12
    997         pslld   xmm4, 20
    998         por     xmm4, xmm8
    999         paddd   xmm0, xmmword ptr [rsp+0x20]
   1000         paddd   xmm1, xmmword ptr [rsp+0x30]
   1001         paddd   xmm2, xmmword ptr [rsp+0x10]
   1002         paddd   xmm3, xmmword ptr [rsp+0x40]
   1003         paddd   xmm0, xmm5
   1004         paddd   xmm1, xmm6
   1005         paddd   xmm2, xmm7
   1006         paddd   xmm3, xmm4
   1007         pxor    xmm15, xmm0
   1008         pxor    xmm12, xmm1
   1009         pxor    xmm13, xmm2
   1010         pxor    xmm14, xmm3
   1011         movdqa  xmm8, xmmword ptr [ROT8+rip]
   1012         pshufb  xmm15, xmm8
   1013         pshufb  xmm12, xmm8
   1014         pshufb  xmm13, xmm8
   1015         pshufb  xmm14, xmm8
   1016         paddd   xmm10, xmm15
   1017         paddd   xmm11, xmm12
   1018         movdqa  xmm8, xmmword ptr [rsp+0x100]
   1019         paddd   xmm8, xmm13
   1020         paddd   xmm9, xmm14
   1021         pxor    xmm5, xmm10
   1022         pxor    xmm6, xmm11
   1023         pxor    xmm7, xmm8
   1024         pxor    xmm4, xmm9
   1025         movdqa  xmmword ptr [rsp+0x100], xmm8
   1026         movdqa  xmm8, xmm5
   1027         psrld   xmm8, 7
   1028         pslld   xmm5, 25
   1029         por     xmm5, xmm8
   1030         movdqa  xmm8, xmm6
   1031         psrld   xmm8, 7
   1032         pslld   xmm6, 25
   1033         por     xmm6, xmm8
   1034         movdqa  xmm8, xmm7
   1035         psrld   xmm8, 7
   1036         pslld   xmm7, 25
   1037         por     xmm7, xmm8
   1038         movdqa  xmm8, xmm4
   1039         psrld   xmm8, 7
   1040         pslld   xmm4, 25
   1041         por     xmm4, xmm8
   1042         paddd   xmm0, xmmword ptr [rsp+0x90]
   1043         paddd   xmm1, xmmword ptr [rsp+0xB0]
   1044         paddd   xmm2, xmmword ptr [rsp+0x80]
   1045         paddd   xmm3, xmmword ptr [rsp+0xF0]
   1046         paddd   xmm0, xmm4
   1047         paddd   xmm1, xmm5
   1048         paddd   xmm2, xmm6
   1049         paddd   xmm3, xmm7
   1050         pxor    xmm12, xmm0
   1051         pxor    xmm13, xmm1
   1052         pxor    xmm14, xmm2
   1053         pxor    xmm15, xmm3
   1054         movdqa  xmm8, xmmword ptr [ROT16+rip]
   1055         pshufb  xmm12, xmm8
   1056         pshufb  xmm13, xmm8
   1057         pshufb  xmm14, xmm8
   1058         pshufb  xmm15, xmm8
   1059         movdqa  xmm8, xmmword ptr [rsp+0x100]
   1060         paddd   xmm8, xmm12
   1061         paddd   xmm9, xmm13
   1062         paddd   xmm10, xmm14
   1063         paddd   xmm11, xmm15
   1064         pxor    xmm4, xmm8
   1065         pxor    xmm5, xmm9
   1066         pxor    xmm6, xmm10
   1067         pxor    xmm7, xmm11
   1068         movdqa  xmmword ptr [rsp+0x100], xmm8
   1069         movdqa  xmm8, xmm4
   1070         psrld   xmm8, 12
   1071         pslld   xmm4, 20
   1072         por     xmm4, xmm8
   1073         movdqa  xmm8, xmm5
   1074         psrld   xmm8, 12
   1075         pslld   xmm5, 20
   1076         por     xmm5, xmm8
   1077         movdqa  xmm8, xmm6
   1078         psrld   xmm8, 12
   1079         pslld   xmm6, 20
   1080         por     xmm6, xmm8
   1081         movdqa  xmm8, xmm7
   1082         psrld   xmm8, 12
   1083         pslld   xmm7, 20
   1084         por     xmm7, xmm8
   1085         paddd   xmm0, xmmword ptr [rsp+0xE0]
   1086         paddd   xmm1, xmmword ptr [rsp+0x50]
   1087         paddd   xmm2, xmmword ptr [rsp+0xC0]
   1088         paddd   xmm3, xmmword ptr [rsp+0x10]
   1089         paddd   xmm0, xmm4
   1090         paddd   xmm1, xmm5
   1091         paddd   xmm2, xmm6
   1092         paddd   xmm3, xmm7
   1093         pxor    xmm12, xmm0
   1094         pxor    xmm13, xmm1
   1095         pxor    xmm14, xmm2
   1096         pxor    xmm15, xmm3
   1097         movdqa  xmm8, xmmword ptr [ROT8+rip]
   1098         pshufb  xmm12, xmm8
   1099         pshufb  xmm13, xmm8
   1100         pshufb  xmm14, xmm8
   1101         pshufb  xmm15, xmm8
   1102         movdqa  xmm8, xmmword ptr [rsp+0x100]
   1103         paddd   xmm8, xmm12
   1104         paddd   xmm9, xmm13
   1105         paddd   xmm10, xmm14
   1106         paddd   xmm11, xmm15
   1107         pxor    xmm4, xmm8
   1108         pxor    xmm5, xmm9
   1109         pxor    xmm6, xmm10
   1110         pxor    xmm7, xmm11
   1111         movdqa  xmmword ptr [rsp+0x100], xmm8
   1112         movdqa  xmm8, xmm4
   1113         psrld   xmm8, 7
   1114         pslld   xmm4, 25
   1115         por     xmm4, xmm8
   1116         movdqa  xmm8, xmm5
   1117         psrld   xmm8, 7
   1118         pslld   xmm5, 25
   1119         por     xmm5, xmm8
   1120         movdqa  xmm8, xmm6
   1121         psrld   xmm8, 7
   1122         pslld   xmm6, 25
   1123         por     xmm6, xmm8
   1124         movdqa  xmm8, xmm7
   1125         psrld   xmm8, 7
   1126         pslld   xmm7, 25
   1127         por     xmm7, xmm8
   1128         paddd   xmm0, xmmword ptr [rsp+0xD0]
   1129         paddd   xmm1, xmmword ptr [rsp]
   1130         paddd   xmm2, xmmword ptr [rsp+0x20]
   1131         paddd   xmm3, xmmword ptr [rsp+0x40]
   1132         paddd   xmm0, xmm5
   1133         paddd   xmm1, xmm6
   1134         paddd   xmm2, xmm7
   1135         paddd   xmm3, xmm4
   1136         pxor    xmm15, xmm0
   1137         pxor    xmm12, xmm1
   1138         pxor    xmm13, xmm2
   1139         pxor    xmm14, xmm3
   1140         movdqa  xmm8, xmmword ptr [ROT16+rip]
   1141         pshufb  xmm15, xmm8
   1142         pshufb  xmm12, xmm8
   1143         pshufb  xmm13, xmm8
   1144         pshufb  xmm14, xmm8
   1145         paddd   xmm10, xmm15
   1146         paddd   xmm11, xmm12
   1147         movdqa  xmm8, xmmword ptr [rsp+0x100]
   1148         paddd   xmm8, xmm13
   1149         paddd   xmm9, xmm14
   1150         pxor    xmm5, xmm10
   1151         pxor    xmm6, xmm11
   1152         pxor    xmm7, xmm8
   1153         pxor    xmm4, xmm9
   1154         movdqa  xmmword ptr [rsp+0x100], xmm8
   1155         movdqa  xmm8, xmm5
   1156         psrld   xmm8, 12
   1157         pslld   xmm5, 20
   1158         por     xmm5, xmm8
   1159         movdqa  xmm8, xmm6
   1160         psrld   xmm8, 12
   1161         pslld   xmm6, 20
   1162         por     xmm6, xmm8
   1163         movdqa  xmm8, xmm7
   1164         psrld   xmm8, 12
   1165         pslld   xmm7, 20
   1166         por     xmm7, xmm8
   1167         movdqa  xmm8, xmm4
   1168         psrld   xmm8, 12
   1169         pslld   xmm4, 20
   1170         por     xmm4, xmm8
   1171         paddd   xmm0, xmmword ptr [rsp+0x30]
   1172         paddd   xmm1, xmmword ptr [rsp+0xA0]
   1173         paddd   xmm2, xmmword ptr [rsp+0x60]
   1174         paddd   xmm3, xmmword ptr [rsp+0x70]
   1175         paddd   xmm0, xmm5
   1176         paddd   xmm1, xmm6
   1177         paddd   xmm2, xmm7
   1178         paddd   xmm3, xmm4
   1179         pxor    xmm15, xmm0
   1180         pxor    xmm12, xmm1
   1181         pxor    xmm13, xmm2
   1182         pxor    xmm14, xmm3
   1183         movdqa  xmm8, xmmword ptr [ROT8+rip]
   1184         pshufb  xmm15, xmm8
   1185         pshufb  xmm12, xmm8
   1186         pshufb  xmm13, xmm8
   1187         pshufb  xmm14, xmm8
   1188         paddd   xmm10, xmm15
   1189         paddd   xmm11, xmm12
   1190         movdqa  xmm8, xmmword ptr [rsp+0x100]
   1191         paddd   xmm8, xmm13
   1192         paddd   xmm9, xmm14
   1193         pxor    xmm5, xmm10
   1194         pxor    xmm6, xmm11
   1195         pxor    xmm7, xmm8
   1196         pxor    xmm4, xmm9
   1197         movdqa  xmmword ptr [rsp+0x100], xmm8
   1198         movdqa  xmm8, xmm5
   1199         psrld   xmm8, 7
   1200         pslld   xmm5, 25
   1201         por     xmm5, xmm8
   1202         movdqa  xmm8, xmm6
   1203         psrld   xmm8, 7
   1204         pslld   xmm6, 25
   1205         por     xmm6, xmm8
   1206         movdqa  xmm8, xmm7
   1207         psrld   xmm8, 7
   1208         pslld   xmm7, 25
   1209         por     xmm7, xmm8
   1210         movdqa  xmm8, xmm4
   1211         psrld   xmm8, 7
   1212         pslld   xmm4, 25
   1213         por     xmm4, xmm8
   1214         paddd   xmm0, xmmword ptr [rsp+0xB0]
   1215         paddd   xmm1, xmmword ptr [rsp+0x50]
   1216         paddd   xmm2, xmmword ptr [rsp+0x10]
   1217         paddd   xmm3, xmmword ptr [rsp+0x80]
   1218         paddd   xmm0, xmm4
   1219         paddd   xmm1, xmm5
   1220         paddd   xmm2, xmm6
   1221         paddd   xmm3, xmm7
   1222         pxor    xmm12, xmm0
   1223         pxor    xmm13, xmm1
   1224         pxor    xmm14, xmm2
   1225         pxor    xmm15, xmm3
   1226         movdqa  xmm8, xmmword ptr [ROT16+rip]
   1227         pshufb  xmm12, xmm8
   1228         pshufb  xmm13, xmm8
   1229         pshufb  xmm14, xmm8
   1230         pshufb  xmm15, xmm8
   1231         movdqa  xmm8, xmmword ptr [rsp+0x100]
   1232         paddd   xmm8, xmm12
   1233         paddd   xmm9, xmm13
   1234         paddd   xmm10, xmm14
   1235         paddd   xmm11, xmm15
   1236         pxor    xmm4, xmm8
   1237         pxor    xmm5, xmm9
   1238         pxor    xmm6, xmm10
   1239         pxor    xmm7, xmm11
   1240         movdqa  xmmword ptr [rsp+0x100], xmm8
   1241         movdqa  xmm8, xmm4
   1242         psrld   xmm8, 12
   1243         pslld   xmm4, 20
   1244         por     xmm4, xmm8
   1245         movdqa  xmm8, xmm5
   1246         psrld   xmm8, 12
   1247         pslld   xmm5, 20
   1248         por     xmm5, xmm8
   1249         movdqa  xmm8, xmm6
   1250         psrld   xmm8, 12
   1251         pslld   xmm6, 20
   1252         por     xmm6, xmm8
   1253         movdqa  xmm8, xmm7
   1254         psrld   xmm8, 12
   1255         pslld   xmm7, 20
   1256         por     xmm7, xmm8
   1257         paddd   xmm0, xmmword ptr [rsp+0xF0]
   1258         paddd   xmm1, xmmword ptr [rsp]
   1259         paddd   xmm2, xmmword ptr [rsp+0x90]
   1260         paddd   xmm3, xmmword ptr [rsp+0x60]
   1261         paddd   xmm0, xmm4
   1262         paddd   xmm1, xmm5
   1263         paddd   xmm2, xmm6
   1264         paddd   xmm3, xmm7
   1265         pxor    xmm12, xmm0
   1266         pxor    xmm13, xmm1
   1267         pxor    xmm14, xmm2
   1268         pxor    xmm15, xmm3
   1269         movdqa  xmm8, xmmword ptr [ROT8+rip]
   1270         pshufb  xmm12, xmm8
   1271         pshufb  xmm13, xmm8
   1272         pshufb  xmm14, xmm8
   1273         pshufb  xmm15, xmm8
   1274         movdqa  xmm8, xmmword ptr [rsp+0x100]
   1275         paddd   xmm8, xmm12
   1276         paddd   xmm9, xmm13
   1277         paddd   xmm10, xmm14
   1278         paddd   xmm11, xmm15
   1279         pxor    xmm4, xmm8
   1280         pxor    xmm5, xmm9
   1281         pxor    xmm6, xmm10
   1282         pxor    xmm7, xmm11
   1283         movdqa  xmmword ptr [rsp+0x100], xmm8
   1284         movdqa  xmm8, xmm4
   1285         psrld   xmm8, 7
   1286         pslld   xmm4, 25
   1287         por     xmm4, xmm8
   1288         movdqa  xmm8, xmm5
   1289         psrld   xmm8, 7
   1290         pslld   xmm5, 25
   1291         por     xmm5, xmm8
   1292         movdqa  xmm8, xmm6
   1293         psrld   xmm8, 7
   1294         pslld   xmm6, 25
   1295         por     xmm6, xmm8
   1296         movdqa  xmm8, xmm7
   1297         psrld   xmm8, 7
   1298         pslld   xmm7, 25
   1299         por     xmm7, xmm8
   1300         paddd   xmm0, xmmword ptr [rsp+0xE0]
   1301         paddd   xmm1, xmmword ptr [rsp+0x20]
   1302         paddd   xmm2, xmmword ptr [rsp+0x30]
   1303         paddd   xmm3, xmmword ptr [rsp+0x70]
   1304         paddd   xmm0, xmm5
   1305         paddd   xmm1, xmm6
   1306         paddd   xmm2, xmm7
   1307         paddd   xmm3, xmm4
   1308         pxor    xmm15, xmm0
   1309         pxor    xmm12, xmm1
   1310         pxor    xmm13, xmm2
   1311         pxor    xmm14, xmm3
   1312         movdqa  xmm8, xmmword ptr [ROT16+rip]
   1313         pshufb  xmm15, xmm8
   1314         pshufb  xmm12, xmm8
   1315         pshufb  xmm13, xmm8
   1316         pshufb  xmm14, xmm8
   1317         paddd   xmm10, xmm15
   1318         paddd   xmm11, xmm12
   1319         movdqa  xmm8, xmmword ptr [rsp+0x100]
   1320         paddd   xmm8, xmm13
   1321         paddd   xmm9, xmm14
   1322         pxor    xmm5, xmm10
   1323         pxor    xmm6, xmm11
   1324         pxor    xmm7, xmm8
   1325         pxor    xmm4, xmm9
   1326         movdqa  xmmword ptr [rsp+0x100], xmm8
   1327         movdqa  xmm8, xmm5
   1328         psrld   xmm8, 12
   1329         pslld   xmm5, 20
   1330         por     xmm5, xmm8
   1331         movdqa  xmm8, xmm6
   1332         psrld   xmm8, 12
   1333         pslld   xmm6, 20
   1334         por     xmm6, xmm8
   1335         movdqa  xmm8, xmm7
   1336         psrld   xmm8, 12
   1337         pslld   xmm7, 20
   1338         por     xmm7, xmm8
   1339         movdqa  xmm8, xmm4
   1340         psrld   xmm8, 12
   1341         pslld   xmm4, 20
   1342         por     xmm4, xmm8
   1343         paddd   xmm0, xmmword ptr [rsp+0xA0]
   1344         paddd   xmm1, xmmword ptr [rsp+0xC0]
   1345         paddd   xmm2, xmmword ptr [rsp+0x40]
   1346         paddd   xmm3, xmmword ptr [rsp+0xD0]
   1347         paddd   xmm0, xmm5
   1348         paddd   xmm1, xmm6
   1349         paddd   xmm2, xmm7
   1350         paddd   xmm3, xmm4
   1351         pxor    xmm15, xmm0
   1352         pxor    xmm12, xmm1
   1353         pxor    xmm13, xmm2
   1354         pxor    xmm14, xmm3
   1355         movdqa  xmm8, xmmword ptr [ROT8+rip]
   1356         pshufb  xmm15, xmm8
   1357         pshufb  xmm12, xmm8
   1358         pshufb  xmm13, xmm8
   1359         pshufb  xmm14, xmm8
   1360         paddd   xmm10, xmm15
   1361         paddd   xmm11, xmm12
   1362         movdqa  xmm8, xmmword ptr [rsp+0x100]
   1363         paddd   xmm8, xmm13
   1364         paddd   xmm9, xmm14
   1365         pxor    xmm5, xmm10
   1366         pxor    xmm6, xmm11
   1367         pxor    xmm7, xmm8
   1368         pxor    xmm4, xmm9
   1369         pxor    xmm0, xmm8
   1370         pxor    xmm1, xmm9
   1371         pxor    xmm2, xmm10
   1372         pxor    xmm3, xmm11
   1373         movdqa  xmm8, xmm5
   1374         psrld   xmm8, 7
   1375         pslld   xmm5, 25
   1376         por     xmm5, xmm8
   1377         movdqa  xmm8, xmm6
   1378         psrld   xmm8, 7
   1379         pslld   xmm6, 25
   1380         por     xmm6, xmm8
   1381         movdqa  xmm8, xmm7
   1382         psrld   xmm8, 7
   1383         pslld   xmm7, 25
   1384         por     xmm7, xmm8
   1385         movdqa  xmm8, xmm4
   1386         psrld   xmm8, 7
   1387         pslld   xmm4, 25
   1388         por     xmm4, xmm8
   1389         pxor    xmm4, xmm12
   1390         pxor    xmm5, xmm13
   1391         pxor    xmm6, xmm14
   1392         pxor    xmm7, xmm15
   1393         mov     eax, r13d
   1394         jne     9b
   1395         movdqa  xmm9, xmm0
   1396         punpckldq xmm0, xmm1
   1397         punpckhdq xmm9, xmm1
   1398         movdqa  xmm11, xmm2
   1399         punpckldq xmm2, xmm3
   1400         punpckhdq xmm11, xmm3
   1401         movdqa  xmm1, xmm0
   1402         punpcklqdq xmm0, xmm2
   1403         punpckhqdq xmm1, xmm2
   1404         movdqa  xmm3, xmm9
   1405         punpcklqdq xmm9, xmm11
   1406         punpckhqdq xmm3, xmm11
   1407         movdqu  xmmword ptr [rbx], xmm0
   1408         movdqu  xmmword ptr [rbx+0x20], xmm1
   1409         movdqu  xmmword ptr [rbx+0x40], xmm9
   1410         movdqu  xmmword ptr [rbx+0x60], xmm3
   1411         movdqa  xmm9, xmm4
   1412         punpckldq xmm4, xmm5
   1413         punpckhdq xmm9, xmm5
   1414         movdqa  xmm11, xmm6
   1415         punpckldq xmm6, xmm7
   1416         punpckhdq xmm11, xmm7
   1417         movdqa  xmm5, xmm4
   1418         punpcklqdq xmm4, xmm6
   1419         punpckhqdq xmm5, xmm6
   1420         movdqa  xmm7, xmm9
   1421         punpcklqdq xmm9, xmm11
   1422         punpckhqdq xmm7, xmm11
   1423         movdqu  xmmword ptr [rbx+0x10], xmm4
   1424         movdqu  xmmword ptr [rbx+0x30], xmm5
   1425         movdqu  xmmword ptr [rbx+0x50], xmm9
   1426         movdqu  xmmword ptr [rbx+0x70], xmm7
   1427         movdqa  xmm1, xmmword ptr [rsp+0x110]
   1428         movdqa  xmm0, xmm1
   1429         paddd   xmm1, xmmword ptr [rsp+0x150]
   1430         movdqa  xmmword ptr [rsp+0x110], xmm1
   1431         pxor    xmm0, xmmword ptr [CMP_MSB_MASK+rip]
   1432         pxor    xmm1, xmmword ptr [CMP_MSB_MASK+rip]
   1433         pcmpgtd xmm0, xmm1
   1434         movdqa  xmm1, xmmword ptr [rsp+0x120]
   1435         psubd   xmm1, xmm0
   1436         movdqa  xmmword ptr [rsp+0x120], xmm1
   1437         add     rbx, 128
   1438         add     rdi, 32
   1439         sub     rsi, 4
   1440         cmp     rsi, 4
   1441         jnc     2b
   1442         test    rsi, rsi
   1443         jne     3f
   1444 4:
   1445         movdqa  xmm6, xmmword ptr [rsp+0x170]
   1446         movdqa  xmm7, xmmword ptr [rsp+0x180]
   1447         movdqa  xmm8, xmmword ptr [rsp+0x190]
   1448         movdqa  xmm9, xmmword ptr [rsp+0x1A0]
   1449         movdqa  xmm10, xmmword ptr [rsp+0x1B0]
   1450         movdqa  xmm11, xmmword ptr [rsp+0x1C0]
   1451         movdqa  xmm12, xmmword ptr [rsp+0x1D0]
   1452         movdqa  xmm13, xmmword ptr [rsp+0x1E0]
   1453         movdqa  xmm14, xmmword ptr [rsp+0x1F0]
   1454         movdqa  xmm15, xmmword ptr [rsp+0x200]
   1455         mov     rsp, rbp
   1456         pop     rbp
   1457         pop     rbx
   1458         pop     rdi
   1459         pop     rsi
   1460         pop     r12
   1461         pop     r13
   1462         pop     r14
   1463         pop     r15
   1464         ret
   1465 .p2align 5
   1466 3:
   1467         test    esi, 0x2
   1468         je      3f
   1469         movups  xmm0, xmmword ptr [rcx]
   1470         movups  xmm1, xmmword ptr [rcx+0x10]
   1471         movaps  xmm8, xmm0
   1472         movaps  xmm9, xmm1
   1473         movd    xmm13, dword ptr [rsp+0x110]
   1474         pinsrd  xmm13, dword ptr [rsp+0x120], 1
   1475         pinsrd  xmm13, dword ptr [BLAKE3_BLOCK_LEN+rip], 2
   1476         movaps  xmmword ptr [rsp], xmm13
   1477         movd    xmm14, dword ptr [rsp+0x114]
   1478         pinsrd  xmm14, dword ptr [rsp+0x124], 1
   1479         pinsrd  xmm14, dword ptr [BLAKE3_BLOCK_LEN+rip], 2
   1480         movaps  xmmword ptr [rsp+0x10], xmm14
   1481         mov     r8, qword ptr [rdi]
   1482         mov     r9, qword ptr [rdi+0x8]
   1483         movzx   eax, byte ptr [rbp+0x80]
   1484         or      eax, r13d
   1485         xor     edx, edx
   1486 2:
   1487         mov     r14d, eax
   1488         or      eax, r12d
   1489         add     rdx, 64
   1490         cmp     rdx, r15
   1491         cmovne  eax, r14d
   1492         movaps  xmm2, xmmword ptr [BLAKE3_IV+rip]
   1493         movaps  xmm10, xmm2
   1494         movups  xmm4, xmmword ptr [r8+rdx-0x40]
   1495         movups  xmm5, xmmword ptr [r8+rdx-0x30]
   1496         movaps  xmm3, xmm4
   1497         shufps  xmm4, xmm5, 136
   1498         shufps  xmm3, xmm5, 221
   1499         movaps  xmm5, xmm3
   1500         movups  xmm6, xmmword ptr [r8+rdx-0x20]
   1501         movups  xmm7, xmmword ptr [r8+rdx-0x10]
   1502         movaps  xmm3, xmm6
   1503         shufps  xmm6, xmm7, 136
   1504         pshufd  xmm6, xmm6, 0x93
   1505         shufps  xmm3, xmm7, 221
   1506         pshufd  xmm7, xmm3, 0x93
   1507         movups  xmm12, xmmword ptr [r9+rdx-0x40]
   1508         movups  xmm13, xmmword ptr [r9+rdx-0x30]
   1509         movaps  xmm11, xmm12
   1510         shufps  xmm12, xmm13, 136
   1511         shufps  xmm11, xmm13, 221
   1512         movaps  xmm13, xmm11
   1513         movups  xmm14, xmmword ptr [r9+rdx-0x20]
   1514         movups  xmm15, xmmword ptr [r9+rdx-0x10]
   1515         movaps  xmm11, xmm14
   1516         shufps  xmm14, xmm15, 136
   1517         pshufd  xmm14, xmm14, 0x93
   1518         shufps  xmm11, xmm15, 221
   1519         pshufd  xmm15, xmm11, 0x93
   1520         movaps  xmm3, xmmword ptr [rsp]
   1521         movaps  xmm11, xmmword ptr [rsp+0x10]
   1522         pinsrd  xmm3, eax, 3
   1523         pinsrd  xmm11, eax, 3
   1524         mov     al, 7
   1525 9:
   1526         paddd   xmm0, xmm4
   1527         paddd   xmm8, xmm12
   1528         movaps  xmmword ptr [rsp+0x20], xmm4
   1529         movaps  xmmword ptr [rsp+0x30], xmm12
   1530         paddd   xmm0, xmm1
   1531         paddd   xmm8, xmm9
   1532         pxor    xmm3, xmm0
   1533         pxor    xmm11, xmm8
   1534         movaps  xmm12, xmmword ptr [ROT16+rip]
   1535         pshufb  xmm3, xmm12
   1536         pshufb  xmm11, xmm12
   1537         paddd   xmm2, xmm3
   1538         paddd   xmm10, xmm11
   1539         pxor    xmm1, xmm2
   1540         pxor    xmm9, xmm10
   1541         movdqa  xmm4, xmm1
   1542         pslld   xmm1, 20
   1543         psrld   xmm4, 12
   1544         por     xmm1, xmm4
   1545         movdqa  xmm4, xmm9
   1546         pslld   xmm9, 20
   1547         psrld   xmm4, 12
   1548         por     xmm9, xmm4
   1549         paddd   xmm0, xmm5
   1550         paddd   xmm8, xmm13
   1551         movaps  xmmword ptr [rsp+0x40], xmm5
   1552         movaps  xmmword ptr [rsp+0x50], xmm13
   1553         paddd   xmm0, xmm1
   1554         paddd   xmm8, xmm9
   1555         pxor    xmm3, xmm0
   1556         pxor    xmm11, xmm8
   1557         movaps  xmm13, xmmword ptr [ROT8+rip]
   1558         pshufb  xmm3, xmm13
   1559         pshufb  xmm11, xmm13
   1560         paddd   xmm2, xmm3
   1561         paddd   xmm10, xmm11
   1562         pxor    xmm1, xmm2
   1563         pxor    xmm9, xmm10
   1564         movdqa  xmm4, xmm1
   1565         pslld   xmm1, 25
   1566         psrld   xmm4, 7
   1567         por     xmm1, xmm4
   1568         movdqa  xmm4, xmm9
   1569         pslld   xmm9, 25
   1570         psrld   xmm4, 7
   1571         por     xmm9, xmm4
   1572         pshufd  xmm0, xmm0, 0x93
   1573         pshufd  xmm8, xmm8, 0x93
   1574         pshufd  xmm3, xmm3, 0x4E
   1575         pshufd  xmm11, xmm11, 0x4E
   1576         pshufd  xmm2, xmm2, 0x39
   1577         pshufd  xmm10, xmm10, 0x39
   1578         paddd   xmm0, xmm6
   1579         paddd   xmm8, xmm14
   1580         paddd   xmm0, xmm1
   1581         paddd   xmm8, xmm9
   1582         pxor    xmm3, xmm0
   1583         pxor    xmm11, xmm8
   1584         pshufb  xmm3, xmm12
   1585         pshufb  xmm11, xmm12
   1586         paddd   xmm2, xmm3
   1587         paddd   xmm10, xmm11
   1588         pxor    xmm1, xmm2
   1589         pxor    xmm9, xmm10
   1590         movdqa  xmm4, xmm1
   1591         pslld   xmm1, 20
   1592         psrld   xmm4, 12
   1593         por     xmm1, xmm4
   1594         movdqa  xmm4, xmm9
   1595         pslld   xmm9, 20
   1596         psrld   xmm4, 12
   1597         por     xmm9, xmm4
   1598         paddd   xmm0, xmm7
   1599         paddd   xmm8, xmm15
   1600         paddd   xmm0, xmm1
   1601         paddd   xmm8, xmm9
   1602         pxor    xmm3, xmm0
   1603         pxor    xmm11, xmm8
   1604         pshufb  xmm3, xmm13
   1605         pshufb  xmm11, xmm13
   1606         paddd   xmm2, xmm3
   1607         paddd   xmm10, xmm11
   1608         pxor    xmm1, xmm2
   1609         pxor    xmm9, xmm10
   1610         movdqa  xmm4, xmm1
   1611         pslld   xmm1, 25
   1612         psrld   xmm4, 7
   1613         por     xmm1, xmm4
   1614         movdqa  xmm4, xmm9
   1615         pslld   xmm9, 25
   1616         psrld   xmm4, 7
   1617         por     xmm9, xmm4
   1618         pshufd  xmm0, xmm0, 0x39
   1619         pshufd  xmm8, xmm8, 0x39
   1620         pshufd  xmm3, xmm3, 0x4E
   1621         pshufd  xmm11, xmm11, 0x4E
   1622         pshufd  xmm2, xmm2, 0x93
   1623         pshufd  xmm10, xmm10, 0x93
   1624         dec     al
   1625         je      9f
   1626         movdqa  xmm12, xmmword ptr [rsp+0x20]
   1627         movdqa  xmm5, xmmword ptr [rsp+0x40]
   1628         pshufd  xmm13, xmm12, 0x0F
   1629         shufps  xmm12, xmm5, 214
   1630         pshufd  xmm4, xmm12, 0x39
   1631         movdqa  xmm12, xmm6
   1632         shufps  xmm12, xmm7, 250
   1633         pblendw xmm13, xmm12, 0xCC
   1634         movdqa  xmm12, xmm7
   1635         punpcklqdq xmm12, xmm5
   1636         pblendw xmm12, xmm6, 0xC0
   1637         pshufd  xmm12, xmm12, 0x78
   1638         punpckhdq xmm5, xmm7
   1639         punpckldq xmm6, xmm5
   1640         pshufd  xmm7, xmm6, 0x1E
   1641         movdqa  xmmword ptr [rsp+0x20], xmm13
   1642         movdqa  xmmword ptr [rsp+0x40], xmm12
   1643         movdqa  xmm5, xmmword ptr [rsp+0x30]
   1644         movdqa  xmm13, xmmword ptr [rsp+0x50]
   1645         pshufd  xmm6, xmm5, 0x0F
   1646         shufps  xmm5, xmm13, 214
   1647         pshufd  xmm12, xmm5, 0x39
   1648         movdqa  xmm5, xmm14
   1649         shufps  xmm5, xmm15, 250
   1650         pblendw xmm6, xmm5, 0xCC
   1651         movdqa  xmm5, xmm15
   1652         punpcklqdq xmm5, xmm13
   1653         pblendw xmm5, xmm14, 0xC0
   1654         pshufd  xmm5, xmm5, 0x78
   1655         punpckhdq xmm13, xmm15
   1656         punpckldq xmm14, xmm13
   1657         pshufd  xmm15, xmm14, 0x1E
   1658         movdqa  xmm13, xmm6
   1659         movdqa  xmm14, xmm5
   1660         movdqa  xmm5, xmmword ptr [rsp+0x20]
   1661         movdqa  xmm6, xmmword ptr [rsp+0x40]
   1662         jmp     9b
   1663 9:
   1664         pxor    xmm0, xmm2
   1665         pxor    xmm1, xmm3
   1666         pxor    xmm8, xmm10
   1667         pxor    xmm9, xmm11
   1668         mov     eax, r13d
   1669         cmp     rdx, r15
   1670         jne     2b
   1671         movups  xmmword ptr [rbx], xmm0
   1672         movups  xmmword ptr [rbx+0x10], xmm1
   1673         movups  xmmword ptr [rbx+0x20], xmm8
   1674         movups  xmmword ptr [rbx+0x30], xmm9
   1675         movdqa  xmm0, xmmword ptr [rsp+0x130]
   1676         movdqa  xmm1, xmmword ptr [rsp+0x110]
   1677         movdqa  xmm2, xmmword ptr [rsp+0x120]
   1678         movdqu  xmm3, xmmword ptr [rsp+0x118]
   1679         movdqu  xmm4, xmmword ptr [rsp+0x128]
   1680         blendvps xmm1, xmm3, xmm0
   1681         blendvps xmm2, xmm4, xmm0
   1682         movdqa  xmmword ptr [rsp+0x110], xmm1
   1683         movdqa  xmmword ptr [rsp+0x120], xmm2
   1684         add     rdi, 16
   1685         add     rbx, 64
   1686         sub     rsi, 2
   1687 3:
   1688         test    esi, 0x1
   1689         je      4b
   1690         movups  xmm0, xmmword ptr [rcx]
   1691         movups  xmm1, xmmword ptr [rcx+0x10]
   1692         movd    xmm13, dword ptr [rsp+0x110]
   1693         pinsrd  xmm13, dword ptr [rsp+0x120], 1
   1694         pinsrd  xmm13, dword ptr [BLAKE3_BLOCK_LEN+rip], 2
   1695         movaps  xmm14, xmmword ptr [ROT8+rip]
   1696         movaps  xmm15, xmmword ptr [ROT16+rip]
   1697         mov     r8, qword ptr [rdi]
   1698         movzx   eax, byte ptr [rbp+0x80]
   1699         or      eax, r13d
   1700         xor     edx, edx
   1701 2:
   1702         mov     r14d, eax
   1703         or      eax, r12d
   1704         add     rdx, 64
   1705         cmp     rdx, r15
   1706         cmovne  eax, r14d
   1707         movaps  xmm2, xmmword ptr [BLAKE3_IV+rip]
   1708         movaps  xmm3, xmm13
   1709         pinsrd  xmm3, eax, 3
   1710         movups  xmm4, xmmword ptr [r8+rdx-0x40]
   1711         movups  xmm5, xmmword ptr [r8+rdx-0x30]
   1712         movaps  xmm8, xmm4
   1713         shufps  xmm4, xmm5, 136
   1714         shufps  xmm8, xmm5, 221
   1715         movaps  xmm5, xmm8
   1716         movups  xmm6, xmmword ptr [r8+rdx-0x20]
   1717         movups  xmm7, xmmword ptr [r8+rdx-0x10]
   1718         movaps  xmm8, xmm6
   1719         shufps  xmm6, xmm7, 136
   1720         pshufd  xmm6, xmm6, 0x93
   1721         shufps  xmm8, xmm7, 221
   1722         pshufd  xmm7, xmm8, 0x93
   1723         mov     al, 7
   1724 9:
   1725         paddd   xmm0, xmm4
   1726         paddd   xmm0, xmm1
   1727         pxor    xmm3, xmm0
   1728         pshufb  xmm3, xmm15
   1729         paddd   xmm2, xmm3
   1730         pxor    xmm1, xmm2
   1731         movdqa  xmm11, xmm1
   1732         pslld   xmm1, 20
   1733         psrld   xmm11, 12
   1734         por     xmm1, xmm11
   1735         paddd   xmm0, xmm5
   1736         paddd   xmm0, xmm1
   1737         pxor    xmm3, xmm0
   1738         pshufb  xmm3, xmm14
   1739         paddd   xmm2, xmm3
   1740         pxor    xmm1, xmm2
   1741         movdqa  xmm11, xmm1
   1742         pslld   xmm1, 25
   1743         psrld   xmm11, 7
   1744         por     xmm1, xmm11
   1745         pshufd  xmm0, xmm0, 0x93
   1746         pshufd  xmm3, xmm3, 0x4E
   1747         pshufd  xmm2, xmm2, 0x39
   1748         paddd   xmm0, xmm6
   1749         paddd   xmm0, xmm1
   1750         pxor    xmm3, xmm0
   1751         pshufb  xmm3, xmm15
   1752         paddd   xmm2, xmm3
   1753         pxor    xmm1, xmm2
   1754         movdqa  xmm11, xmm1
   1755         pslld   xmm1, 20
   1756         psrld   xmm11, 12
   1757         por     xmm1, xmm11
   1758         paddd   xmm0, xmm7
   1759         paddd   xmm0, xmm1
   1760         pxor    xmm3, xmm0
   1761         pshufb  xmm3, xmm14
   1762         paddd   xmm2, xmm3
   1763         pxor    xmm1, xmm2
   1764         movdqa  xmm11, xmm1
   1765         pslld   xmm1, 25
   1766         psrld   xmm11, 7
   1767         por     xmm1, xmm11
   1768         pshufd  xmm0, xmm0, 0x39
   1769         pshufd  xmm3, xmm3, 0x4E
   1770         pshufd  xmm2, xmm2, 0x93
   1771         dec     al
   1772         jz      9f
   1773         movdqa  xmm8, xmm4
   1774         shufps  xmm8, xmm5, 214
   1775         pshufd  xmm9, xmm4, 0x0F
   1776         pshufd  xmm4, xmm8, 0x39
   1777         movdqa  xmm8, xmm6
   1778         shufps  xmm8, xmm7, 250
   1779         pblendw xmm9, xmm8, 0xCC
   1780         movdqa  xmm8, xmm7
   1781         punpcklqdq xmm8, xmm5
   1782         pblendw xmm8, xmm6, 0xC0
   1783         pshufd  xmm8, xmm8, 0x78
   1784         punpckhdq xmm5, xmm7
   1785         punpckldq xmm6, xmm5
   1786         pshufd  xmm7, xmm6, 0x1E
   1787         movdqa  xmm5, xmm9
   1788         movdqa  xmm6, xmm8
   1789         jmp     9b
   1790 9:
   1791         pxor    xmm0, xmm2
   1792         pxor    xmm1, xmm3
   1793         mov     eax, r13d
   1794         cmp     rdx, r15
   1795         jne     2b
   1796         movups  xmmword ptr [rbx], xmm0
   1797         movups  xmmword ptr [rbx+0x10], xmm1
   1798         jmp     4b
   1799 
   1800 .p2align 6
   1801 blake3_compress_in_place_sse41:
   1802 _blake3_compress_in_place_sse41:
   1803         sub     rsp, 120
   1804         movdqa  xmmword ptr [rsp], xmm6
   1805         movdqa  xmmword ptr [rsp+0x10], xmm7
   1806         movdqa  xmmword ptr [rsp+0x20], xmm8
   1807         movdqa  xmmword ptr [rsp+0x30], xmm9
   1808         movdqa  xmmword ptr [rsp+0x40], xmm11
   1809         movdqa  xmmword ptr [rsp+0x50], xmm14
   1810         movdqa  xmmword ptr [rsp+0x60], xmm15
   1811         movups  xmm0, xmmword ptr [rcx]
   1812         movups  xmm1, xmmword ptr [rcx+0x10]
   1813         movaps  xmm2, xmmword ptr [BLAKE3_IV+rip]
   1814         movzx   eax, byte ptr [rsp+0xA0]
   1815         movzx   r8d, r8b
   1816         shl     rax, 32
   1817         add     r8, rax
   1818         movq    xmm3, r9
   1819         movq    xmm4, r8
   1820         punpcklqdq xmm3, xmm4
   1821         movups  xmm4, xmmword ptr [rdx]
   1822         movups  xmm5, xmmword ptr [rdx+0x10]
   1823         movaps  xmm8, xmm4
   1824         shufps  xmm4, xmm5, 136
   1825         shufps  xmm8, xmm5, 221
   1826         movaps  xmm5, xmm8
   1827         movups  xmm6, xmmword ptr [rdx+0x20]
   1828         movups  xmm7, xmmword ptr [rdx+0x30]
   1829         movaps  xmm8, xmm6
   1830         shufps  xmm6, xmm7, 136
   1831         pshufd  xmm6, xmm6, 0x93
   1832         shufps  xmm8, xmm7, 221
   1833         pshufd  xmm7, xmm8, 0x93
   1834         movaps  xmm14, xmmword ptr [ROT8+rip]
   1835         movaps  xmm15, xmmword ptr [ROT16+rip]
   1836         mov     al, 7
   1837 9:
   1838         paddd   xmm0, xmm4
   1839         paddd   xmm0, xmm1
   1840         pxor    xmm3, xmm0
   1841         pshufb  xmm3, xmm15
   1842         paddd   xmm2, xmm3
   1843         pxor    xmm1, xmm2
   1844         movdqa  xmm11, xmm1
   1845         pslld   xmm1, 20
   1846         psrld   xmm11, 12
   1847         por     xmm1, xmm11
   1848         paddd   xmm0, xmm5
   1849         paddd   xmm0, xmm1
   1850         pxor    xmm3, xmm0
   1851         pshufb  xmm3, xmm14
   1852         paddd   xmm2, xmm3
   1853         pxor    xmm1, xmm2
   1854         movdqa  xmm11, xmm1
   1855         pslld   xmm1, 25
   1856         psrld   xmm11, 7
   1857         por     xmm1, xmm11
   1858         pshufd  xmm0, xmm0, 0x93
   1859         pshufd  xmm3, xmm3, 0x4E
   1860         pshufd  xmm2, xmm2, 0x39
   1861         paddd   xmm0, xmm6
   1862         paddd   xmm0, xmm1
   1863         pxor    xmm3, xmm0
   1864         pshufb  xmm3, xmm15
   1865         paddd   xmm2, xmm3
   1866         pxor    xmm1, xmm2
   1867         movdqa  xmm11, xmm1
   1868         pslld   xmm1, 20
   1869         psrld   xmm11, 12
   1870         por     xmm1, xmm11
   1871         paddd   xmm0, xmm7
   1872         paddd   xmm0, xmm1
   1873         pxor    xmm3, xmm0
   1874         pshufb  xmm3, xmm14
   1875         paddd   xmm2, xmm3
   1876         pxor    xmm1, xmm2
   1877         movdqa  xmm11, xmm1
   1878         pslld   xmm1, 25
   1879         psrld   xmm11, 7
   1880         por     xmm1, xmm11
   1881         pshufd  xmm0, xmm0, 0x39
   1882         pshufd  xmm3, xmm3, 0x4E
   1883         pshufd  xmm2, xmm2, 0x93
   1884         dec     al
   1885         jz      9f
   1886         movdqa  xmm8, xmm4
   1887         shufps  xmm8, xmm5, 214
   1888         pshufd  xmm9, xmm4, 0x0F
   1889         pshufd  xmm4, xmm8, 0x39
   1890         movdqa  xmm8, xmm6
   1891         shufps  xmm8, xmm7, 250
   1892         pblendw xmm9, xmm8, 0xCC
   1893         movdqa  xmm8, xmm7
   1894         punpcklqdq xmm8, xmm5
   1895         pblendw xmm8, xmm6, 0xC0
   1896         pshufd  xmm8, xmm8, 0x78
   1897         punpckhdq xmm5, xmm7
   1898         punpckldq xmm6, xmm5
   1899         pshufd  xmm7, xmm6, 0x1E
   1900         movdqa  xmm5, xmm9
   1901         movdqa  xmm6, xmm8
   1902         jmp     9b
   1903 9:
   1904         pxor    xmm0, xmm2
   1905         pxor    xmm1, xmm3
   1906         movups  xmmword ptr [rcx], xmm0
   1907         movups  xmmword ptr [rcx+0x10], xmm1
   1908         movdqa  xmm6, xmmword ptr [rsp]
   1909         movdqa  xmm7, xmmword ptr [rsp+0x10]
   1910         movdqa  xmm8, xmmword ptr [rsp+0x20]
   1911         movdqa  xmm9, xmmword ptr [rsp+0x30]
   1912         movdqa  xmm11, xmmword ptr [rsp+0x40]
   1913         movdqa  xmm14, xmmword ptr [rsp+0x50]
   1914         movdqa  xmm15, xmmword ptr [rsp+0x60]
   1915         add     rsp, 120
   1916         ret
   1917 
   1918 
   1919 .p2align 6
   1920 _blake3_compress_xof_sse41:
   1921 blake3_compress_xof_sse41:
   1922         sub     rsp, 120
   1923         movdqa  xmmword ptr [rsp], xmm6
   1924         movdqa  xmmword ptr [rsp+0x10], xmm7
   1925         movdqa  xmmword ptr [rsp+0x20], xmm8
   1926         movdqa  xmmword ptr [rsp+0x30], xmm9
   1927         movdqa  xmmword ptr [rsp+0x40], xmm11
   1928         movdqa  xmmword ptr [rsp+0x50], xmm14
   1929         movdqa  xmmword ptr [rsp+0x60], xmm15
   1930         movups  xmm0, xmmword ptr [rcx]
   1931         movups  xmm1, xmmword ptr [rcx+0x10]
   1932         movaps  xmm2, xmmword ptr [BLAKE3_IV+rip]
   1933         movzx   eax, byte ptr [rsp+0xA0]
   1934         movzx   r8d, r8b
   1935         mov     r10, qword ptr [rsp+0xA8]
   1936         shl     rax, 32
   1937         add     r8, rax
   1938         movq    xmm3, r9
   1939         movq    xmm4, r8
   1940         punpcklqdq xmm3, xmm4
   1941         movups  xmm4, xmmword ptr [rdx]
   1942         movups  xmm5, xmmword ptr [rdx+0x10]
   1943         movaps  xmm8, xmm4
   1944         shufps  xmm4, xmm5, 136
   1945         shufps  xmm8, xmm5, 221
   1946         movaps  xmm5, xmm8
   1947         movups  xmm6, xmmword ptr [rdx+0x20]
   1948         movups  xmm7, xmmword ptr [rdx+0x30]
   1949         movaps  xmm8, xmm6
   1950         shufps  xmm6, xmm7, 136
   1951         pshufd  xmm6, xmm6, 0x93
   1952         shufps  xmm8, xmm7, 221
   1953         pshufd  xmm7, xmm8, 0x93
   1954         movaps  xmm14, xmmword ptr [ROT8+rip]
   1955         movaps  xmm15, xmmword ptr [ROT16+rip]
   1956         mov     al, 7
   1957 9:
   1958         paddd   xmm0, xmm4
   1959         paddd   xmm0, xmm1
   1960         pxor    xmm3, xmm0
   1961         pshufb  xmm3, xmm15
   1962         paddd   xmm2, xmm3
   1963         pxor    xmm1, xmm2
   1964         movdqa  xmm11, xmm1
   1965         pslld   xmm1, 20
   1966         psrld   xmm11, 12
   1967         por     xmm1, xmm11
   1968         paddd   xmm0, xmm5
   1969         paddd   xmm0, xmm1
   1970         pxor    xmm3, xmm0
   1971         pshufb  xmm3, xmm14
   1972         paddd   xmm2, xmm3
   1973         pxor    xmm1, xmm2
   1974         movdqa  xmm11, xmm1
   1975         pslld   xmm1, 25
   1976         psrld   xmm11, 7
   1977         por     xmm1, xmm11
   1978         pshufd  xmm0, xmm0, 0x93
   1979         pshufd  xmm3, xmm3, 0x4E
   1980         pshufd  xmm2, xmm2, 0x39
   1981         paddd   xmm0, xmm6
   1982         paddd   xmm0, xmm1
   1983         pxor    xmm3, xmm0
   1984         pshufb  xmm3, xmm15
   1985         paddd   xmm2, xmm3
   1986         pxor    xmm1, xmm2
   1987         movdqa  xmm11, xmm1
   1988         pslld   xmm1, 20
   1989         psrld   xmm11, 12
   1990         por     xmm1, xmm11
   1991         paddd   xmm0, xmm7
   1992         paddd   xmm0, xmm1
   1993         pxor    xmm3, xmm0
   1994         pshufb  xmm3, xmm14
   1995         paddd   xmm2, xmm3
   1996         pxor    xmm1, xmm2
   1997         movdqa  xmm11, xmm1
   1998         pslld   xmm1, 25
   1999         psrld   xmm11, 7
   2000         por     xmm1, xmm11
   2001         pshufd  xmm0, xmm0, 0x39
   2002         pshufd  xmm3, xmm3, 0x4E
   2003         pshufd  xmm2, xmm2, 0x93
   2004         dec     al
   2005         jz      9f
   2006         movdqa  xmm8, xmm4
   2007         shufps  xmm8, xmm5, 214
   2008         pshufd  xmm9, xmm4, 0x0F
   2009         pshufd  xmm4, xmm8, 0x39
   2010         movdqa  xmm8, xmm6
   2011         shufps  xmm8, xmm7, 250
   2012         pblendw xmm9, xmm8, 0xCC
   2013         movdqa  xmm8, xmm7
   2014         punpcklqdq xmm8, xmm5
   2015         pblendw xmm8, xmm6, 0xC0
   2016         pshufd  xmm8, xmm8, 0x78
   2017         punpckhdq xmm5, xmm7
   2018         punpckldq xmm6, xmm5
   2019         pshufd  xmm7, xmm6, 0x1E
   2020         movdqa  xmm5, xmm9
   2021         movdqa  xmm6, xmm8
   2022         jmp     9b
   2023 9:
   2024         movdqu  xmm4, xmmword ptr [rcx]
   2025         movdqu  xmm5, xmmword ptr [rcx+0x10]
   2026         pxor    xmm0, xmm2
   2027         pxor    xmm1, xmm3
   2028         pxor    xmm2, xmm4
   2029         pxor    xmm3, xmm5
   2030         movups  xmmword ptr [r10], xmm0
   2031         movups  xmmword ptr [r10+0x10], xmm1
   2032         movups  xmmword ptr [r10+0x20], xmm2
   2033         movups  xmmword ptr [r10+0x30], xmm3
   2034         movdqa  xmm6, xmmword ptr [rsp]
   2035         movdqa  xmm7, xmmword ptr [rsp+0x10]
   2036         movdqa  xmm8, xmmword ptr [rsp+0x20]
   2037         movdqa  xmm9, xmmword ptr [rsp+0x30]
   2038         movdqa  xmm11, xmmword ptr [rsp+0x40]
   2039         movdqa  xmm14, xmmword ptr [rsp+0x50]
   2040         movdqa  xmm15, xmmword ptr [rsp+0x60]
   2041         add     rsp, 120
   2042         ret
   2043 
   2044 
   2045 .section .rodata
   2046 .p2align  6
   2047 BLAKE3_IV:
   2048         .long  0x6A09E667, 0xBB67AE85
   2049         .long  0x3C6EF372, 0xA54FF53A
   2050 ROT16:
   2051         .byte  2, 3, 0, 1, 6, 7, 4, 5, 10, 11, 8, 9, 14, 15, 12, 13
   2052 ROT8:
   2053         .byte  1, 2, 3, 0, 5, 6, 7, 4, 9, 10, 11, 8, 13, 14, 15, 12
   2054 ADD0:   
   2055         .long  0, 1, 2, 3
   2056 ADD1:
   2057         .long  4, 4, 4, 4
   2058 BLAKE3_IV_0:
   2059         .long  0x6A09E667, 0x6A09E667, 0x6A09E667, 0x6A09E667
   2060 BLAKE3_IV_1:
   2061         .long  0xBB67AE85, 0xBB67AE85, 0xBB67AE85, 0xBB67AE85
   2062 BLAKE3_IV_2:
   2063         .long  0x3C6EF372, 0x3C6EF372, 0x3C6EF372, 0x3C6EF372
   2064 BLAKE3_IV_3:
   2065         .long  0xA54FF53A, 0xA54FF53A, 0xA54FF53A, 0xA54FF53A
   2066 BLAKE3_BLOCK_LEN:
   2067         .long  64, 64, 64, 64
   2068 CMP_MSB_MASK:
   2069         .long  0x80000000, 0x80000000, 0x80000000, 0x80000000