blake3_sse41_x86-64_unix.S (61143B)
1 #if defined(__ELF__) && defined(__linux__) 2 .section .note.GNU-stack,"",%progbits 3 #endif 4 5 #if defined(__ELF__) && defined(__CET__) && defined(__has_include) 6 #if __has_include(<cet.h>) 7 #include <cet.h> 8 #endif 9 #endif 10 11 #if !defined(_CET_ENDBR) 12 #define _CET_ENDBR 13 #endif 14 15 .intel_syntax noprefix 16 .global blake3_hash_many_sse41 17 .global _blake3_hash_many_sse41 18 .global blake3_compress_in_place_sse41 19 .global _blake3_compress_in_place_sse41 20 .global blake3_compress_xof_sse41 21 .global _blake3_compress_xof_sse41 22 #ifdef __APPLE__ 23 .text 24 #else 25 .section .text 26 #endif 27 .p2align 6 28 _blake3_hash_many_sse41: 29 blake3_hash_many_sse41: 30 _CET_ENDBR 31 push r15 32 push r14 33 push r13 34 push r12 35 push rbx 36 push rbp 37 mov rbp, rsp 38 sub rsp, 360 39 and rsp, 0xFFFFFFFFFFFFFFC0 40 neg r9d 41 movd xmm0, r9d 42 pshufd xmm0, xmm0, 0x00 43 movdqa xmmword ptr [rsp+0x130], xmm0 44 movdqa xmm1, xmm0 45 pand xmm1, xmmword ptr [ADD0+rip] 46 pand xmm0, xmmword ptr [ADD1+rip] 47 movdqa xmmword ptr [rsp+0x150], xmm0 48 movd xmm0, r8d 49 pshufd xmm0, xmm0, 0x00 50 paddd xmm0, xmm1 51 movdqa xmmword ptr [rsp+0x110], xmm0 52 pxor xmm0, xmmword ptr [CMP_MSB_MASK+rip] 53 pxor xmm1, xmmword ptr [CMP_MSB_MASK+rip] 54 pcmpgtd xmm1, xmm0 55 shr r8, 32 56 movd xmm2, r8d 57 pshufd xmm2, xmm2, 0x00 58 psubd xmm2, xmm1 59 movdqa xmmword ptr [rsp+0x120], xmm2 60 mov rbx, qword ptr [rbp+0x50] 61 mov r15, rdx 62 shl r15, 6 63 movzx r13d, byte ptr [rbp+0x38] 64 movzx r12d, byte ptr [rbp+0x48] 65 cmp rsi, 4 66 jc 3f 67 2: 68 movdqu xmm3, xmmword ptr [rcx] 69 pshufd xmm0, xmm3, 0x00 70 pshufd xmm1, xmm3, 0x55 71 pshufd xmm2, xmm3, 0xAA 72 pshufd xmm3, xmm3, 0xFF 73 movdqu xmm7, xmmword ptr [rcx+0x10] 74 pshufd xmm4, xmm7, 0x00 75 pshufd xmm5, xmm7, 0x55 76 pshufd xmm6, xmm7, 0xAA 77 pshufd xmm7, xmm7, 0xFF 78 mov r8, qword ptr [rdi] 79 mov r9, qword ptr [rdi+0x8] 80 mov r10, qword ptr [rdi+0x10] 81 mov r11, qword ptr [rdi+0x18] 82 movzx eax, byte ptr [rbp+0x40] 83 or eax, r13d 84 xor edx, edx 85 9: 86 mov r14d, eax 87 or eax, r12d 88 add rdx, 64 89 cmp rdx, r15 90 cmovne eax, r14d 91 movdqu xmm8, xmmword ptr [r8+rdx-0x40] 92 movdqu xmm9, xmmword ptr [r9+rdx-0x40] 93 movdqu xmm10, xmmword ptr [r10+rdx-0x40] 94 movdqu xmm11, xmmword ptr [r11+rdx-0x40] 95 movdqa xmm12, xmm8 96 punpckldq xmm8, xmm9 97 punpckhdq xmm12, xmm9 98 movdqa xmm14, xmm10 99 punpckldq xmm10, xmm11 100 punpckhdq xmm14, xmm11 101 movdqa xmm9, xmm8 102 punpcklqdq xmm8, xmm10 103 punpckhqdq xmm9, xmm10 104 movdqa xmm13, xmm12 105 punpcklqdq xmm12, xmm14 106 punpckhqdq xmm13, xmm14 107 movdqa xmmword ptr [rsp], xmm8 108 movdqa xmmword ptr [rsp+0x10], xmm9 109 movdqa xmmword ptr [rsp+0x20], xmm12 110 movdqa xmmword ptr [rsp+0x30], xmm13 111 movdqu xmm8, xmmword ptr [r8+rdx-0x30] 112 movdqu xmm9, xmmword ptr [r9+rdx-0x30] 113 movdqu xmm10, xmmword ptr [r10+rdx-0x30] 114 movdqu xmm11, xmmword ptr [r11+rdx-0x30] 115 movdqa xmm12, xmm8 116 punpckldq xmm8, xmm9 117 punpckhdq xmm12, xmm9 118 movdqa xmm14, xmm10 119 punpckldq xmm10, xmm11 120 punpckhdq xmm14, xmm11 121 movdqa xmm9, xmm8 122 punpcklqdq xmm8, xmm10 123 punpckhqdq xmm9, xmm10 124 movdqa xmm13, xmm12 125 punpcklqdq xmm12, xmm14 126 punpckhqdq xmm13, xmm14 127 movdqa xmmword ptr [rsp+0x40], xmm8 128 movdqa xmmword ptr [rsp+0x50], xmm9 129 movdqa xmmword ptr [rsp+0x60], xmm12 130 movdqa xmmword ptr [rsp+0x70], xmm13 131 movdqu xmm8, xmmword ptr [r8+rdx-0x20] 132 movdqu xmm9, xmmword ptr [r9+rdx-0x20] 133 movdqu xmm10, xmmword ptr [r10+rdx-0x20] 134 movdqu xmm11, xmmword ptr [r11+rdx-0x20] 135 movdqa xmm12, xmm8 136 punpckldq xmm8, xmm9 137 punpckhdq xmm12, xmm9 138 movdqa xmm14, xmm10 139 punpckldq xmm10, xmm11 140 punpckhdq xmm14, xmm11 141 movdqa xmm9, xmm8 142 punpcklqdq xmm8, xmm10 143 punpckhqdq xmm9, xmm10 144 movdqa xmm13, xmm12 145 punpcklqdq xmm12, xmm14 146 punpckhqdq xmm13, xmm14 147 movdqa xmmword ptr [rsp+0x80], xmm8 148 movdqa xmmword ptr [rsp+0x90], xmm9 149 movdqa xmmword ptr [rsp+0xA0], xmm12 150 movdqa xmmword ptr [rsp+0xB0], xmm13 151 movdqu xmm8, xmmword ptr [r8+rdx-0x10] 152 movdqu xmm9, xmmword ptr [r9+rdx-0x10] 153 movdqu xmm10, xmmword ptr [r10+rdx-0x10] 154 movdqu xmm11, xmmword ptr [r11+rdx-0x10] 155 movdqa xmm12, xmm8 156 punpckldq xmm8, xmm9 157 punpckhdq xmm12, xmm9 158 movdqa xmm14, xmm10 159 punpckldq xmm10, xmm11 160 punpckhdq xmm14, xmm11 161 movdqa xmm9, xmm8 162 punpcklqdq xmm8, xmm10 163 punpckhqdq xmm9, xmm10 164 movdqa xmm13, xmm12 165 punpcklqdq xmm12, xmm14 166 punpckhqdq xmm13, xmm14 167 movdqa xmmword ptr [rsp+0xC0], xmm8 168 movdqa xmmword ptr [rsp+0xD0], xmm9 169 movdqa xmmword ptr [rsp+0xE0], xmm12 170 movdqa xmmword ptr [rsp+0xF0], xmm13 171 movdqa xmm9, xmmword ptr [BLAKE3_IV_1+rip] 172 movdqa xmm10, xmmword ptr [BLAKE3_IV_2+rip] 173 movdqa xmm11, xmmword ptr [BLAKE3_IV_3+rip] 174 movdqa xmm12, xmmword ptr [rsp+0x110] 175 movdqa xmm13, xmmword ptr [rsp+0x120] 176 movdqa xmm14, xmmword ptr [BLAKE3_BLOCK_LEN+rip] 177 movd xmm15, eax 178 pshufd xmm15, xmm15, 0x00 179 prefetcht0 [r8+rdx+0x80] 180 prefetcht0 [r9+rdx+0x80] 181 prefetcht0 [r10+rdx+0x80] 182 prefetcht0 [r11+rdx+0x80] 183 paddd xmm0, xmmword ptr [rsp] 184 paddd xmm1, xmmword ptr [rsp+0x20] 185 paddd xmm2, xmmword ptr [rsp+0x40] 186 paddd xmm3, xmmword ptr [rsp+0x60] 187 paddd xmm0, xmm4 188 paddd xmm1, xmm5 189 paddd xmm2, xmm6 190 paddd xmm3, xmm7 191 pxor xmm12, xmm0 192 pxor xmm13, xmm1 193 pxor xmm14, xmm2 194 pxor xmm15, xmm3 195 movdqa xmm8, xmmword ptr [ROT16+rip] 196 pshufb xmm12, xmm8 197 pshufb xmm13, xmm8 198 pshufb xmm14, xmm8 199 pshufb xmm15, xmm8 200 movdqa xmm8, xmmword ptr [BLAKE3_IV_0+rip] 201 paddd xmm8, xmm12 202 paddd xmm9, xmm13 203 paddd xmm10, xmm14 204 paddd xmm11, xmm15 205 pxor xmm4, xmm8 206 pxor xmm5, xmm9 207 pxor xmm6, xmm10 208 pxor xmm7, xmm11 209 movdqa xmmword ptr [rsp+0x100], xmm8 210 movdqa xmm8, xmm4 211 psrld xmm8, 12 212 pslld xmm4, 20 213 por xmm4, xmm8 214 movdqa xmm8, xmm5 215 psrld xmm8, 12 216 pslld xmm5, 20 217 por xmm5, xmm8 218 movdqa xmm8, xmm6 219 psrld xmm8, 12 220 pslld xmm6, 20 221 por xmm6, xmm8 222 movdqa xmm8, xmm7 223 psrld xmm8, 12 224 pslld xmm7, 20 225 por xmm7, xmm8 226 paddd xmm0, xmmword ptr [rsp+0x10] 227 paddd xmm1, xmmword ptr [rsp+0x30] 228 paddd xmm2, xmmword ptr [rsp+0x50] 229 paddd xmm3, xmmword ptr [rsp+0x70] 230 paddd xmm0, xmm4 231 paddd xmm1, xmm5 232 paddd xmm2, xmm6 233 paddd xmm3, xmm7 234 pxor xmm12, xmm0 235 pxor xmm13, xmm1 236 pxor xmm14, xmm2 237 pxor xmm15, xmm3 238 movdqa xmm8, xmmword ptr [ROT8+rip] 239 pshufb xmm12, xmm8 240 pshufb xmm13, xmm8 241 pshufb xmm14, xmm8 242 pshufb xmm15, xmm8 243 movdqa xmm8, xmmword ptr [rsp+0x100] 244 paddd xmm8, xmm12 245 paddd xmm9, xmm13 246 paddd xmm10, xmm14 247 paddd xmm11, xmm15 248 pxor xmm4, xmm8 249 pxor xmm5, xmm9 250 pxor xmm6, xmm10 251 pxor xmm7, xmm11 252 movdqa xmmword ptr [rsp+0x100], xmm8 253 movdqa xmm8, xmm4 254 psrld xmm8, 7 255 pslld xmm4, 25 256 por xmm4, xmm8 257 movdqa xmm8, xmm5 258 psrld xmm8, 7 259 pslld xmm5, 25 260 por xmm5, xmm8 261 movdqa xmm8, xmm6 262 psrld xmm8, 7 263 pslld xmm6, 25 264 por xmm6, xmm8 265 movdqa xmm8, xmm7 266 psrld xmm8, 7 267 pslld xmm7, 25 268 por xmm7, xmm8 269 paddd xmm0, xmmword ptr [rsp+0x80] 270 paddd xmm1, xmmword ptr [rsp+0xA0] 271 paddd xmm2, xmmword ptr [rsp+0xC0] 272 paddd xmm3, xmmword ptr [rsp+0xE0] 273 paddd xmm0, xmm5 274 paddd xmm1, xmm6 275 paddd xmm2, xmm7 276 paddd xmm3, xmm4 277 pxor xmm15, xmm0 278 pxor xmm12, xmm1 279 pxor xmm13, xmm2 280 pxor xmm14, xmm3 281 movdqa xmm8, xmmword ptr [ROT16+rip] 282 pshufb xmm15, xmm8 283 pshufb xmm12, xmm8 284 pshufb xmm13, xmm8 285 pshufb xmm14, xmm8 286 paddd xmm10, xmm15 287 paddd xmm11, xmm12 288 movdqa xmm8, xmmword ptr [rsp+0x100] 289 paddd xmm8, xmm13 290 paddd xmm9, xmm14 291 pxor xmm5, xmm10 292 pxor xmm6, xmm11 293 pxor xmm7, xmm8 294 pxor xmm4, xmm9 295 movdqa xmmword ptr [rsp+0x100], xmm8 296 movdqa xmm8, xmm5 297 psrld xmm8, 12 298 pslld xmm5, 20 299 por xmm5, xmm8 300 movdqa xmm8, xmm6 301 psrld xmm8, 12 302 pslld xmm6, 20 303 por xmm6, xmm8 304 movdqa xmm8, xmm7 305 psrld xmm8, 12 306 pslld xmm7, 20 307 por xmm7, xmm8 308 movdqa xmm8, xmm4 309 psrld xmm8, 12 310 pslld xmm4, 20 311 por xmm4, xmm8 312 paddd xmm0, xmmword ptr [rsp+0x90] 313 paddd xmm1, xmmword ptr [rsp+0xB0] 314 paddd xmm2, xmmword ptr [rsp+0xD0] 315 paddd xmm3, xmmword ptr [rsp+0xF0] 316 paddd xmm0, xmm5 317 paddd xmm1, xmm6 318 paddd xmm2, xmm7 319 paddd xmm3, xmm4 320 pxor xmm15, xmm0 321 pxor xmm12, xmm1 322 pxor xmm13, xmm2 323 pxor xmm14, xmm3 324 movdqa xmm8, xmmword ptr [ROT8+rip] 325 pshufb xmm15, xmm8 326 pshufb xmm12, xmm8 327 pshufb xmm13, xmm8 328 pshufb xmm14, xmm8 329 paddd xmm10, xmm15 330 paddd xmm11, xmm12 331 movdqa xmm8, xmmword ptr [rsp+0x100] 332 paddd xmm8, xmm13 333 paddd xmm9, xmm14 334 pxor xmm5, xmm10 335 pxor xmm6, xmm11 336 pxor xmm7, xmm8 337 pxor xmm4, xmm9 338 movdqa xmmword ptr [rsp+0x100], xmm8 339 movdqa xmm8, xmm5 340 psrld xmm8, 7 341 pslld xmm5, 25 342 por xmm5, xmm8 343 movdqa xmm8, xmm6 344 psrld xmm8, 7 345 pslld xmm6, 25 346 por xmm6, xmm8 347 movdqa xmm8, xmm7 348 psrld xmm8, 7 349 pslld xmm7, 25 350 por xmm7, xmm8 351 movdqa xmm8, xmm4 352 psrld xmm8, 7 353 pslld xmm4, 25 354 por xmm4, xmm8 355 paddd xmm0, xmmword ptr [rsp+0x20] 356 paddd xmm1, xmmword ptr [rsp+0x30] 357 paddd xmm2, xmmword ptr [rsp+0x70] 358 paddd xmm3, xmmword ptr [rsp+0x40] 359 paddd xmm0, xmm4 360 paddd xmm1, xmm5 361 paddd xmm2, xmm6 362 paddd xmm3, xmm7 363 pxor xmm12, xmm0 364 pxor xmm13, xmm1 365 pxor xmm14, xmm2 366 pxor xmm15, xmm3 367 movdqa xmm8, xmmword ptr [ROT16+rip] 368 pshufb xmm12, xmm8 369 pshufb xmm13, xmm8 370 pshufb xmm14, xmm8 371 pshufb xmm15, xmm8 372 movdqa xmm8, xmmword ptr [rsp+0x100] 373 paddd xmm8, xmm12 374 paddd xmm9, xmm13 375 paddd xmm10, xmm14 376 paddd xmm11, xmm15 377 pxor xmm4, xmm8 378 pxor xmm5, xmm9 379 pxor xmm6, xmm10 380 pxor xmm7, xmm11 381 movdqa xmmword ptr [rsp+0x100], xmm8 382 movdqa xmm8, xmm4 383 psrld xmm8, 12 384 pslld xmm4, 20 385 por xmm4, xmm8 386 movdqa xmm8, xmm5 387 psrld xmm8, 12 388 pslld xmm5, 20 389 por xmm5, xmm8 390 movdqa xmm8, xmm6 391 psrld xmm8, 12 392 pslld xmm6, 20 393 por xmm6, xmm8 394 movdqa xmm8, xmm7 395 psrld xmm8, 12 396 pslld xmm7, 20 397 por xmm7, xmm8 398 paddd xmm0, xmmword ptr [rsp+0x60] 399 paddd xmm1, xmmword ptr [rsp+0xA0] 400 paddd xmm2, xmmword ptr [rsp] 401 paddd xmm3, xmmword ptr [rsp+0xD0] 402 paddd xmm0, xmm4 403 paddd xmm1, xmm5 404 paddd xmm2, xmm6 405 paddd xmm3, xmm7 406 pxor xmm12, xmm0 407 pxor xmm13, xmm1 408 pxor xmm14, xmm2 409 pxor xmm15, xmm3 410 movdqa xmm8, xmmword ptr [ROT8+rip] 411 pshufb xmm12, xmm8 412 pshufb xmm13, xmm8 413 pshufb xmm14, xmm8 414 pshufb xmm15, xmm8 415 movdqa xmm8, xmmword ptr [rsp+0x100] 416 paddd xmm8, xmm12 417 paddd xmm9, xmm13 418 paddd xmm10, xmm14 419 paddd xmm11, xmm15 420 pxor xmm4, xmm8 421 pxor xmm5, xmm9 422 pxor xmm6, xmm10 423 pxor xmm7, xmm11 424 movdqa xmmword ptr [rsp+0x100], xmm8 425 movdqa xmm8, xmm4 426 psrld xmm8, 7 427 pslld xmm4, 25 428 por xmm4, xmm8 429 movdqa xmm8, xmm5 430 psrld xmm8, 7 431 pslld xmm5, 25 432 por xmm5, xmm8 433 movdqa xmm8, xmm6 434 psrld xmm8, 7 435 pslld xmm6, 25 436 por xmm6, xmm8 437 movdqa xmm8, xmm7 438 psrld xmm8, 7 439 pslld xmm7, 25 440 por xmm7, xmm8 441 paddd xmm0, xmmword ptr [rsp+0x10] 442 paddd xmm1, xmmword ptr [rsp+0xC0] 443 paddd xmm2, xmmword ptr [rsp+0x90] 444 paddd xmm3, xmmword ptr [rsp+0xF0] 445 paddd xmm0, xmm5 446 paddd xmm1, xmm6 447 paddd xmm2, xmm7 448 paddd xmm3, xmm4 449 pxor xmm15, xmm0 450 pxor xmm12, xmm1 451 pxor xmm13, xmm2 452 pxor xmm14, xmm3 453 movdqa xmm8, xmmword ptr [ROT16+rip] 454 pshufb xmm15, xmm8 455 pshufb xmm12, xmm8 456 pshufb xmm13, xmm8 457 pshufb xmm14, xmm8 458 paddd xmm10, xmm15 459 paddd xmm11, xmm12 460 movdqa xmm8, xmmword ptr [rsp+0x100] 461 paddd xmm8, xmm13 462 paddd xmm9, xmm14 463 pxor xmm5, xmm10 464 pxor xmm6, xmm11 465 pxor xmm7, xmm8 466 pxor xmm4, xmm9 467 movdqa xmmword ptr [rsp+0x100], xmm8 468 movdqa xmm8, xmm5 469 psrld xmm8, 12 470 pslld xmm5, 20 471 por xmm5, xmm8 472 movdqa xmm8, xmm6 473 psrld xmm8, 12 474 pslld xmm6, 20 475 por xmm6, xmm8 476 movdqa xmm8, xmm7 477 psrld xmm8, 12 478 pslld xmm7, 20 479 por xmm7, xmm8 480 movdqa xmm8, xmm4 481 psrld xmm8, 12 482 pslld xmm4, 20 483 por xmm4, xmm8 484 paddd xmm0, xmmword ptr [rsp+0xB0] 485 paddd xmm1, xmmword ptr [rsp+0x50] 486 paddd xmm2, xmmword ptr [rsp+0xE0] 487 paddd xmm3, xmmword ptr [rsp+0x80] 488 paddd xmm0, xmm5 489 paddd xmm1, xmm6 490 paddd xmm2, xmm7 491 paddd xmm3, xmm4 492 pxor xmm15, xmm0 493 pxor xmm12, xmm1 494 pxor xmm13, xmm2 495 pxor xmm14, xmm3 496 movdqa xmm8, xmmword ptr [ROT8+rip] 497 pshufb xmm15, xmm8 498 pshufb xmm12, xmm8 499 pshufb xmm13, xmm8 500 pshufb xmm14, xmm8 501 paddd xmm10, xmm15 502 paddd xmm11, xmm12 503 movdqa xmm8, xmmword ptr [rsp+0x100] 504 paddd xmm8, xmm13 505 paddd xmm9, xmm14 506 pxor xmm5, xmm10 507 pxor xmm6, xmm11 508 pxor xmm7, xmm8 509 pxor xmm4, xmm9 510 movdqa xmmword ptr [rsp+0x100], xmm8 511 movdqa xmm8, xmm5 512 psrld xmm8, 7 513 pslld xmm5, 25 514 por xmm5, xmm8 515 movdqa xmm8, xmm6 516 psrld xmm8, 7 517 pslld xmm6, 25 518 por xmm6, xmm8 519 movdqa xmm8, xmm7 520 psrld xmm8, 7 521 pslld xmm7, 25 522 por xmm7, xmm8 523 movdqa xmm8, xmm4 524 psrld xmm8, 7 525 pslld xmm4, 25 526 por xmm4, xmm8 527 paddd xmm0, xmmword ptr [rsp+0x30] 528 paddd xmm1, xmmword ptr [rsp+0xA0] 529 paddd xmm2, xmmword ptr [rsp+0xD0] 530 paddd xmm3, xmmword ptr [rsp+0x70] 531 paddd xmm0, xmm4 532 paddd xmm1, xmm5 533 paddd xmm2, xmm6 534 paddd xmm3, xmm7 535 pxor xmm12, xmm0 536 pxor xmm13, xmm1 537 pxor xmm14, xmm2 538 pxor xmm15, xmm3 539 movdqa xmm8, xmmword ptr [ROT16+rip] 540 pshufb xmm12, xmm8 541 pshufb xmm13, xmm8 542 pshufb xmm14, xmm8 543 pshufb xmm15, xmm8 544 movdqa xmm8, xmmword ptr [rsp+0x100] 545 paddd xmm8, xmm12 546 paddd xmm9, xmm13 547 paddd xmm10, xmm14 548 paddd xmm11, xmm15 549 pxor xmm4, xmm8 550 pxor xmm5, xmm9 551 pxor xmm6, xmm10 552 pxor xmm7, xmm11 553 movdqa xmmword ptr [rsp+0x100], xmm8 554 movdqa xmm8, xmm4 555 psrld xmm8, 12 556 pslld xmm4, 20 557 por xmm4, xmm8 558 movdqa xmm8, xmm5 559 psrld xmm8, 12 560 pslld xmm5, 20 561 por xmm5, xmm8 562 movdqa xmm8, xmm6 563 psrld xmm8, 12 564 pslld xmm6, 20 565 por xmm6, xmm8 566 movdqa xmm8, xmm7 567 psrld xmm8, 12 568 pslld xmm7, 20 569 por xmm7, xmm8 570 paddd xmm0, xmmword ptr [rsp+0x40] 571 paddd xmm1, xmmword ptr [rsp+0xC0] 572 paddd xmm2, xmmword ptr [rsp+0x20] 573 paddd xmm3, xmmword ptr [rsp+0xE0] 574 paddd xmm0, xmm4 575 paddd xmm1, xmm5 576 paddd xmm2, xmm6 577 paddd xmm3, xmm7 578 pxor xmm12, xmm0 579 pxor xmm13, xmm1 580 pxor xmm14, xmm2 581 pxor xmm15, xmm3 582 movdqa xmm8, xmmword ptr [ROT8+rip] 583 pshufb xmm12, xmm8 584 pshufb xmm13, xmm8 585 pshufb xmm14, xmm8 586 pshufb xmm15, xmm8 587 movdqa xmm8, xmmword ptr [rsp+0x100] 588 paddd xmm8, xmm12 589 paddd xmm9, xmm13 590 paddd xmm10, xmm14 591 paddd xmm11, xmm15 592 pxor xmm4, xmm8 593 pxor xmm5, xmm9 594 pxor xmm6, xmm10 595 pxor xmm7, xmm11 596 movdqa xmmword ptr [rsp+0x100], xmm8 597 movdqa xmm8, xmm4 598 psrld xmm8, 7 599 pslld xmm4, 25 600 por xmm4, xmm8 601 movdqa xmm8, xmm5 602 psrld xmm8, 7 603 pslld xmm5, 25 604 por xmm5, xmm8 605 movdqa xmm8, xmm6 606 psrld xmm8, 7 607 pslld xmm6, 25 608 por xmm6, xmm8 609 movdqa xmm8, xmm7 610 psrld xmm8, 7 611 pslld xmm7, 25 612 por xmm7, xmm8 613 paddd xmm0, xmmword ptr [rsp+0x60] 614 paddd xmm1, xmmword ptr [rsp+0x90] 615 paddd xmm2, xmmword ptr [rsp+0xB0] 616 paddd xmm3, xmmword ptr [rsp+0x80] 617 paddd xmm0, xmm5 618 paddd xmm1, xmm6 619 paddd xmm2, xmm7 620 paddd xmm3, xmm4 621 pxor xmm15, xmm0 622 pxor xmm12, xmm1 623 pxor xmm13, xmm2 624 pxor xmm14, xmm3 625 movdqa xmm8, xmmword ptr [ROT16+rip] 626 pshufb xmm15, xmm8 627 pshufb xmm12, xmm8 628 pshufb xmm13, xmm8 629 pshufb xmm14, xmm8 630 paddd xmm10, xmm15 631 paddd xmm11, xmm12 632 movdqa xmm8, xmmword ptr [rsp+0x100] 633 paddd xmm8, xmm13 634 paddd xmm9, xmm14 635 pxor xmm5, xmm10 636 pxor xmm6, xmm11 637 pxor xmm7, xmm8 638 pxor xmm4, xmm9 639 movdqa xmmword ptr [rsp+0x100], xmm8 640 movdqa xmm8, xmm5 641 psrld xmm8, 12 642 pslld xmm5, 20 643 por xmm5, xmm8 644 movdqa xmm8, xmm6 645 psrld xmm8, 12 646 pslld xmm6, 20 647 por xmm6, xmm8 648 movdqa xmm8, xmm7 649 psrld xmm8, 12 650 pslld xmm7, 20 651 por xmm7, xmm8 652 movdqa xmm8, xmm4 653 psrld xmm8, 12 654 pslld xmm4, 20 655 por xmm4, xmm8 656 paddd xmm0, xmmword ptr [rsp+0x50] 657 paddd xmm1, xmmword ptr [rsp] 658 paddd xmm2, xmmword ptr [rsp+0xF0] 659 paddd xmm3, xmmword ptr [rsp+0x10] 660 paddd xmm0, xmm5 661 paddd xmm1, xmm6 662 paddd xmm2, xmm7 663 paddd xmm3, xmm4 664 pxor xmm15, xmm0 665 pxor xmm12, xmm1 666 pxor xmm13, xmm2 667 pxor xmm14, xmm3 668 movdqa xmm8, xmmword ptr [ROT8+rip] 669 pshufb xmm15, xmm8 670 pshufb xmm12, xmm8 671 pshufb xmm13, xmm8 672 pshufb xmm14, xmm8 673 paddd xmm10, xmm15 674 paddd xmm11, xmm12 675 movdqa xmm8, xmmword ptr [rsp+0x100] 676 paddd xmm8, xmm13 677 paddd xmm9, xmm14 678 pxor xmm5, xmm10 679 pxor xmm6, xmm11 680 pxor xmm7, xmm8 681 pxor xmm4, xmm9 682 movdqa xmmword ptr [rsp+0x100], xmm8 683 movdqa xmm8, xmm5 684 psrld xmm8, 7 685 pslld xmm5, 25 686 por xmm5, xmm8 687 movdqa xmm8, xmm6 688 psrld xmm8, 7 689 pslld xmm6, 25 690 por xmm6, xmm8 691 movdqa xmm8, xmm7 692 psrld xmm8, 7 693 pslld xmm7, 25 694 por xmm7, xmm8 695 movdqa xmm8, xmm4 696 psrld xmm8, 7 697 pslld xmm4, 25 698 por xmm4, xmm8 699 paddd xmm0, xmmword ptr [rsp+0xA0] 700 paddd xmm1, xmmword ptr [rsp+0xC0] 701 paddd xmm2, xmmword ptr [rsp+0xE0] 702 paddd xmm3, xmmword ptr [rsp+0xD0] 703 paddd xmm0, xmm4 704 paddd xmm1, xmm5 705 paddd xmm2, xmm6 706 paddd xmm3, xmm7 707 pxor xmm12, xmm0 708 pxor xmm13, xmm1 709 pxor xmm14, xmm2 710 pxor xmm15, xmm3 711 movdqa xmm8, xmmword ptr [ROT16+rip] 712 pshufb xmm12, xmm8 713 pshufb xmm13, xmm8 714 pshufb xmm14, xmm8 715 pshufb xmm15, xmm8 716 movdqa xmm8, xmmword ptr [rsp+0x100] 717 paddd xmm8, xmm12 718 paddd xmm9, xmm13 719 paddd xmm10, xmm14 720 paddd xmm11, xmm15 721 pxor xmm4, xmm8 722 pxor xmm5, xmm9 723 pxor xmm6, xmm10 724 pxor xmm7, xmm11 725 movdqa xmmword ptr [rsp+0x100], xmm8 726 movdqa xmm8, xmm4 727 psrld xmm8, 12 728 pslld xmm4, 20 729 por xmm4, xmm8 730 movdqa xmm8, xmm5 731 psrld xmm8, 12 732 pslld xmm5, 20 733 por xmm5, xmm8 734 movdqa xmm8, xmm6 735 psrld xmm8, 12 736 pslld xmm6, 20 737 por xmm6, xmm8 738 movdqa xmm8, xmm7 739 psrld xmm8, 12 740 pslld xmm7, 20 741 por xmm7, xmm8 742 paddd xmm0, xmmword ptr [rsp+0x70] 743 paddd xmm1, xmmword ptr [rsp+0x90] 744 paddd xmm2, xmmword ptr [rsp+0x30] 745 paddd xmm3, xmmword ptr [rsp+0xF0] 746 paddd xmm0, xmm4 747 paddd xmm1, xmm5 748 paddd xmm2, xmm6 749 paddd xmm3, xmm7 750 pxor xmm12, xmm0 751 pxor xmm13, xmm1 752 pxor xmm14, xmm2 753 pxor xmm15, xmm3 754 movdqa xmm8, xmmword ptr [ROT8+rip] 755 pshufb xmm12, xmm8 756 pshufb xmm13, xmm8 757 pshufb xmm14, xmm8 758 pshufb xmm15, xmm8 759 movdqa xmm8, xmmword ptr [rsp+0x100] 760 paddd xmm8, xmm12 761 paddd xmm9, xmm13 762 paddd xmm10, xmm14 763 paddd xmm11, xmm15 764 pxor xmm4, xmm8 765 pxor xmm5, xmm9 766 pxor xmm6, xmm10 767 pxor xmm7, xmm11 768 movdqa xmmword ptr [rsp+0x100], xmm8 769 movdqa xmm8, xmm4 770 psrld xmm8, 7 771 pslld xmm4, 25 772 por xmm4, xmm8 773 movdqa xmm8, xmm5 774 psrld xmm8, 7 775 pslld xmm5, 25 776 por xmm5, xmm8 777 movdqa xmm8, xmm6 778 psrld xmm8, 7 779 pslld xmm6, 25 780 por xmm6, xmm8 781 movdqa xmm8, xmm7 782 psrld xmm8, 7 783 pslld xmm7, 25 784 por xmm7, xmm8 785 paddd xmm0, xmmword ptr [rsp+0x40] 786 paddd xmm1, xmmword ptr [rsp+0xB0] 787 paddd xmm2, xmmword ptr [rsp+0x50] 788 paddd xmm3, xmmword ptr [rsp+0x10] 789 paddd xmm0, xmm5 790 paddd xmm1, xmm6 791 paddd xmm2, xmm7 792 paddd xmm3, xmm4 793 pxor xmm15, xmm0 794 pxor xmm12, xmm1 795 pxor xmm13, xmm2 796 pxor xmm14, xmm3 797 movdqa xmm8, xmmword ptr [ROT16+rip] 798 pshufb xmm15, xmm8 799 pshufb xmm12, xmm8 800 pshufb xmm13, xmm8 801 pshufb xmm14, xmm8 802 paddd xmm10, xmm15 803 paddd xmm11, xmm12 804 movdqa xmm8, xmmword ptr [rsp+0x100] 805 paddd xmm8, xmm13 806 paddd xmm9, xmm14 807 pxor xmm5, xmm10 808 pxor xmm6, xmm11 809 pxor xmm7, xmm8 810 pxor xmm4, xmm9 811 movdqa xmmword ptr [rsp+0x100], xmm8 812 movdqa xmm8, xmm5 813 psrld xmm8, 12 814 pslld xmm5, 20 815 por xmm5, xmm8 816 movdqa xmm8, xmm6 817 psrld xmm8, 12 818 pslld xmm6, 20 819 por xmm6, xmm8 820 movdqa xmm8, xmm7 821 psrld xmm8, 12 822 pslld xmm7, 20 823 por xmm7, xmm8 824 movdqa xmm8, xmm4 825 psrld xmm8, 12 826 pslld xmm4, 20 827 por xmm4, xmm8 828 paddd xmm0, xmmword ptr [rsp] 829 paddd xmm1, xmmword ptr [rsp+0x20] 830 paddd xmm2, xmmword ptr [rsp+0x80] 831 paddd xmm3, xmmword ptr [rsp+0x60] 832 paddd xmm0, xmm5 833 paddd xmm1, xmm6 834 paddd xmm2, xmm7 835 paddd xmm3, xmm4 836 pxor xmm15, xmm0 837 pxor xmm12, xmm1 838 pxor xmm13, xmm2 839 pxor xmm14, xmm3 840 movdqa xmm8, xmmword ptr [ROT8+rip] 841 pshufb xmm15, xmm8 842 pshufb xmm12, xmm8 843 pshufb xmm13, xmm8 844 pshufb xmm14, xmm8 845 paddd xmm10, xmm15 846 paddd xmm11, xmm12 847 movdqa xmm8, xmmword ptr [rsp+0x100] 848 paddd xmm8, xmm13 849 paddd xmm9, xmm14 850 pxor xmm5, xmm10 851 pxor xmm6, xmm11 852 pxor xmm7, xmm8 853 pxor xmm4, xmm9 854 movdqa xmmword ptr [rsp+0x100], xmm8 855 movdqa xmm8, xmm5 856 psrld xmm8, 7 857 pslld xmm5, 25 858 por xmm5, xmm8 859 movdqa xmm8, xmm6 860 psrld xmm8, 7 861 pslld xmm6, 25 862 por xmm6, xmm8 863 movdqa xmm8, xmm7 864 psrld xmm8, 7 865 pslld xmm7, 25 866 por xmm7, xmm8 867 movdqa xmm8, xmm4 868 psrld xmm8, 7 869 pslld xmm4, 25 870 por xmm4, xmm8 871 paddd xmm0, xmmword ptr [rsp+0xC0] 872 paddd xmm1, xmmword ptr [rsp+0x90] 873 paddd xmm2, xmmword ptr [rsp+0xF0] 874 paddd xmm3, xmmword ptr [rsp+0xE0] 875 paddd xmm0, xmm4 876 paddd xmm1, xmm5 877 paddd xmm2, xmm6 878 paddd xmm3, xmm7 879 pxor xmm12, xmm0 880 pxor xmm13, xmm1 881 pxor xmm14, xmm2 882 pxor xmm15, xmm3 883 movdqa xmm8, xmmword ptr [ROT16+rip] 884 pshufb xmm12, xmm8 885 pshufb xmm13, xmm8 886 pshufb xmm14, xmm8 887 pshufb xmm15, xmm8 888 movdqa xmm8, xmmword ptr [rsp+0x100] 889 paddd xmm8, xmm12 890 paddd xmm9, xmm13 891 paddd xmm10, xmm14 892 paddd xmm11, xmm15 893 pxor xmm4, xmm8 894 pxor xmm5, xmm9 895 pxor xmm6, xmm10 896 pxor xmm7, xmm11 897 movdqa xmmword ptr [rsp+0x100], xmm8 898 movdqa xmm8, xmm4 899 psrld xmm8, 12 900 pslld xmm4, 20 901 por xmm4, xmm8 902 movdqa xmm8, xmm5 903 psrld xmm8, 12 904 pslld xmm5, 20 905 por xmm5, xmm8 906 movdqa xmm8, xmm6 907 psrld xmm8, 12 908 pslld xmm6, 20 909 por xmm6, xmm8 910 movdqa xmm8, xmm7 911 psrld xmm8, 12 912 pslld xmm7, 20 913 por xmm7, xmm8 914 paddd xmm0, xmmword ptr [rsp+0xD0] 915 paddd xmm1, xmmword ptr [rsp+0xB0] 916 paddd xmm2, xmmword ptr [rsp+0xA0] 917 paddd xmm3, xmmword ptr [rsp+0x80] 918 paddd xmm0, xmm4 919 paddd xmm1, xmm5 920 paddd xmm2, xmm6 921 paddd xmm3, xmm7 922 pxor xmm12, xmm0 923 pxor xmm13, xmm1 924 pxor xmm14, xmm2 925 pxor xmm15, xmm3 926 movdqa xmm8, xmmword ptr [ROT8+rip] 927 pshufb xmm12, xmm8 928 pshufb xmm13, xmm8 929 pshufb xmm14, xmm8 930 pshufb xmm15, xmm8 931 movdqa xmm8, xmmword ptr [rsp+0x100] 932 paddd xmm8, xmm12 933 paddd xmm9, xmm13 934 paddd xmm10, xmm14 935 paddd xmm11, xmm15 936 pxor xmm4, xmm8 937 pxor xmm5, xmm9 938 pxor xmm6, xmm10 939 pxor xmm7, xmm11 940 movdqa xmmword ptr [rsp+0x100], xmm8 941 movdqa xmm8, xmm4 942 psrld xmm8, 7 943 pslld xmm4, 25 944 por xmm4, xmm8 945 movdqa xmm8, xmm5 946 psrld xmm8, 7 947 pslld xmm5, 25 948 por xmm5, xmm8 949 movdqa xmm8, xmm6 950 psrld xmm8, 7 951 pslld xmm6, 25 952 por xmm6, xmm8 953 movdqa xmm8, xmm7 954 psrld xmm8, 7 955 pslld xmm7, 25 956 por xmm7, xmm8 957 paddd xmm0, xmmword ptr [rsp+0x70] 958 paddd xmm1, xmmword ptr [rsp+0x50] 959 paddd xmm2, xmmword ptr [rsp] 960 paddd xmm3, xmmword ptr [rsp+0x60] 961 paddd xmm0, xmm5 962 paddd xmm1, xmm6 963 paddd xmm2, xmm7 964 paddd xmm3, xmm4 965 pxor xmm15, xmm0 966 pxor xmm12, xmm1 967 pxor xmm13, xmm2 968 pxor xmm14, xmm3 969 movdqa xmm8, xmmword ptr [ROT16+rip] 970 pshufb xmm15, xmm8 971 pshufb xmm12, xmm8 972 pshufb xmm13, xmm8 973 pshufb xmm14, xmm8 974 paddd xmm10, xmm15 975 paddd xmm11, xmm12 976 movdqa xmm8, xmmword ptr [rsp+0x100] 977 paddd xmm8, xmm13 978 paddd xmm9, xmm14 979 pxor xmm5, xmm10 980 pxor xmm6, xmm11 981 pxor xmm7, xmm8 982 pxor xmm4, xmm9 983 movdqa xmmword ptr [rsp+0x100], xmm8 984 movdqa xmm8, xmm5 985 psrld xmm8, 12 986 pslld xmm5, 20 987 por xmm5, xmm8 988 movdqa xmm8, xmm6 989 psrld xmm8, 12 990 pslld xmm6, 20 991 por xmm6, xmm8 992 movdqa xmm8, xmm7 993 psrld xmm8, 12 994 pslld xmm7, 20 995 por xmm7, xmm8 996 movdqa xmm8, xmm4 997 psrld xmm8, 12 998 pslld xmm4, 20 999 por xmm4, xmm8 1000 paddd xmm0, xmmword ptr [rsp+0x20] 1001 paddd xmm1, xmmword ptr [rsp+0x30] 1002 paddd xmm2, xmmword ptr [rsp+0x10] 1003 paddd xmm3, xmmword ptr [rsp+0x40] 1004 paddd xmm0, xmm5 1005 paddd xmm1, xmm6 1006 paddd xmm2, xmm7 1007 paddd xmm3, xmm4 1008 pxor xmm15, xmm0 1009 pxor xmm12, xmm1 1010 pxor xmm13, xmm2 1011 pxor xmm14, xmm3 1012 movdqa xmm8, xmmword ptr [ROT8+rip] 1013 pshufb xmm15, xmm8 1014 pshufb xmm12, xmm8 1015 pshufb xmm13, xmm8 1016 pshufb xmm14, xmm8 1017 paddd xmm10, xmm15 1018 paddd xmm11, xmm12 1019 movdqa xmm8, xmmword ptr [rsp+0x100] 1020 paddd xmm8, xmm13 1021 paddd xmm9, xmm14 1022 pxor xmm5, xmm10 1023 pxor xmm6, xmm11 1024 pxor xmm7, xmm8 1025 pxor xmm4, xmm9 1026 movdqa xmmword ptr [rsp+0x100], xmm8 1027 movdqa xmm8, xmm5 1028 psrld xmm8, 7 1029 pslld xmm5, 25 1030 por xmm5, xmm8 1031 movdqa xmm8, xmm6 1032 psrld xmm8, 7 1033 pslld xmm6, 25 1034 por xmm6, xmm8 1035 movdqa xmm8, xmm7 1036 psrld xmm8, 7 1037 pslld xmm7, 25 1038 por xmm7, xmm8 1039 movdqa xmm8, xmm4 1040 psrld xmm8, 7 1041 pslld xmm4, 25 1042 por xmm4, xmm8 1043 paddd xmm0, xmmword ptr [rsp+0x90] 1044 paddd xmm1, xmmword ptr [rsp+0xB0] 1045 paddd xmm2, xmmword ptr [rsp+0x80] 1046 paddd xmm3, xmmword ptr [rsp+0xF0] 1047 paddd xmm0, xmm4 1048 paddd xmm1, xmm5 1049 paddd xmm2, xmm6 1050 paddd xmm3, xmm7 1051 pxor xmm12, xmm0 1052 pxor xmm13, xmm1 1053 pxor xmm14, xmm2 1054 pxor xmm15, xmm3 1055 movdqa xmm8, xmmword ptr [ROT16+rip] 1056 pshufb xmm12, xmm8 1057 pshufb xmm13, xmm8 1058 pshufb xmm14, xmm8 1059 pshufb xmm15, xmm8 1060 movdqa xmm8, xmmword ptr [rsp+0x100] 1061 paddd xmm8, xmm12 1062 paddd xmm9, xmm13 1063 paddd xmm10, xmm14 1064 paddd xmm11, xmm15 1065 pxor xmm4, xmm8 1066 pxor xmm5, xmm9 1067 pxor xmm6, xmm10 1068 pxor xmm7, xmm11 1069 movdqa xmmword ptr [rsp+0x100], xmm8 1070 movdqa xmm8, xmm4 1071 psrld xmm8, 12 1072 pslld xmm4, 20 1073 por xmm4, xmm8 1074 movdqa xmm8, xmm5 1075 psrld xmm8, 12 1076 pslld xmm5, 20 1077 por xmm5, xmm8 1078 movdqa xmm8, xmm6 1079 psrld xmm8, 12 1080 pslld xmm6, 20 1081 por xmm6, xmm8 1082 movdqa xmm8, xmm7 1083 psrld xmm8, 12 1084 pslld xmm7, 20 1085 por xmm7, xmm8 1086 paddd xmm0, xmmword ptr [rsp+0xE0] 1087 paddd xmm1, xmmword ptr [rsp+0x50] 1088 paddd xmm2, xmmword ptr [rsp+0xC0] 1089 paddd xmm3, xmmword ptr [rsp+0x10] 1090 paddd xmm0, xmm4 1091 paddd xmm1, xmm5 1092 paddd xmm2, xmm6 1093 paddd xmm3, xmm7 1094 pxor xmm12, xmm0 1095 pxor xmm13, xmm1 1096 pxor xmm14, xmm2 1097 pxor xmm15, xmm3 1098 movdqa xmm8, xmmword ptr [ROT8+rip] 1099 pshufb xmm12, xmm8 1100 pshufb xmm13, xmm8 1101 pshufb xmm14, xmm8 1102 pshufb xmm15, xmm8 1103 movdqa xmm8, xmmword ptr [rsp+0x100] 1104 paddd xmm8, xmm12 1105 paddd xmm9, xmm13 1106 paddd xmm10, xmm14 1107 paddd xmm11, xmm15 1108 pxor xmm4, xmm8 1109 pxor xmm5, xmm9 1110 pxor xmm6, xmm10 1111 pxor xmm7, xmm11 1112 movdqa xmmword ptr [rsp+0x100], xmm8 1113 movdqa xmm8, xmm4 1114 psrld xmm8, 7 1115 pslld xmm4, 25 1116 por xmm4, xmm8 1117 movdqa xmm8, xmm5 1118 psrld xmm8, 7 1119 pslld xmm5, 25 1120 por xmm5, xmm8 1121 movdqa xmm8, xmm6 1122 psrld xmm8, 7 1123 pslld xmm6, 25 1124 por xmm6, xmm8 1125 movdqa xmm8, xmm7 1126 psrld xmm8, 7 1127 pslld xmm7, 25 1128 por xmm7, xmm8 1129 paddd xmm0, xmmword ptr [rsp+0xD0] 1130 paddd xmm1, xmmword ptr [rsp] 1131 paddd xmm2, xmmword ptr [rsp+0x20] 1132 paddd xmm3, xmmword ptr [rsp+0x40] 1133 paddd xmm0, xmm5 1134 paddd xmm1, xmm6 1135 paddd xmm2, xmm7 1136 paddd xmm3, xmm4 1137 pxor xmm15, xmm0 1138 pxor xmm12, xmm1 1139 pxor xmm13, xmm2 1140 pxor xmm14, xmm3 1141 movdqa xmm8, xmmword ptr [ROT16+rip] 1142 pshufb xmm15, xmm8 1143 pshufb xmm12, xmm8 1144 pshufb xmm13, xmm8 1145 pshufb xmm14, xmm8 1146 paddd xmm10, xmm15 1147 paddd xmm11, xmm12 1148 movdqa xmm8, xmmword ptr [rsp+0x100] 1149 paddd xmm8, xmm13 1150 paddd xmm9, xmm14 1151 pxor xmm5, xmm10 1152 pxor xmm6, xmm11 1153 pxor xmm7, xmm8 1154 pxor xmm4, xmm9 1155 movdqa xmmword ptr [rsp+0x100], xmm8 1156 movdqa xmm8, xmm5 1157 psrld xmm8, 12 1158 pslld xmm5, 20 1159 por xmm5, xmm8 1160 movdqa xmm8, xmm6 1161 psrld xmm8, 12 1162 pslld xmm6, 20 1163 por xmm6, xmm8 1164 movdqa xmm8, xmm7 1165 psrld xmm8, 12 1166 pslld xmm7, 20 1167 por xmm7, xmm8 1168 movdqa xmm8, xmm4 1169 psrld xmm8, 12 1170 pslld xmm4, 20 1171 por xmm4, xmm8 1172 paddd xmm0, xmmword ptr [rsp+0x30] 1173 paddd xmm1, xmmword ptr [rsp+0xA0] 1174 paddd xmm2, xmmword ptr [rsp+0x60] 1175 paddd xmm3, xmmword ptr [rsp+0x70] 1176 paddd xmm0, xmm5 1177 paddd xmm1, xmm6 1178 paddd xmm2, xmm7 1179 paddd xmm3, xmm4 1180 pxor xmm15, xmm0 1181 pxor xmm12, xmm1 1182 pxor xmm13, xmm2 1183 pxor xmm14, xmm3 1184 movdqa xmm8, xmmword ptr [ROT8+rip] 1185 pshufb xmm15, xmm8 1186 pshufb xmm12, xmm8 1187 pshufb xmm13, xmm8 1188 pshufb xmm14, xmm8 1189 paddd xmm10, xmm15 1190 paddd xmm11, xmm12 1191 movdqa xmm8, xmmword ptr [rsp+0x100] 1192 paddd xmm8, xmm13 1193 paddd xmm9, xmm14 1194 pxor xmm5, xmm10 1195 pxor xmm6, xmm11 1196 pxor xmm7, xmm8 1197 pxor xmm4, xmm9 1198 movdqa xmmword ptr [rsp+0x100], xmm8 1199 movdqa xmm8, xmm5 1200 psrld xmm8, 7 1201 pslld xmm5, 25 1202 por xmm5, xmm8 1203 movdqa xmm8, xmm6 1204 psrld xmm8, 7 1205 pslld xmm6, 25 1206 por xmm6, xmm8 1207 movdqa xmm8, xmm7 1208 psrld xmm8, 7 1209 pslld xmm7, 25 1210 por xmm7, xmm8 1211 movdqa xmm8, xmm4 1212 psrld xmm8, 7 1213 pslld xmm4, 25 1214 por xmm4, xmm8 1215 paddd xmm0, xmmword ptr [rsp+0xB0] 1216 paddd xmm1, xmmword ptr [rsp+0x50] 1217 paddd xmm2, xmmword ptr [rsp+0x10] 1218 paddd xmm3, xmmword ptr [rsp+0x80] 1219 paddd xmm0, xmm4 1220 paddd xmm1, xmm5 1221 paddd xmm2, xmm6 1222 paddd xmm3, xmm7 1223 pxor xmm12, xmm0 1224 pxor xmm13, xmm1 1225 pxor xmm14, xmm2 1226 pxor xmm15, xmm3 1227 movdqa xmm8, xmmword ptr [ROT16+rip] 1228 pshufb xmm12, xmm8 1229 pshufb xmm13, xmm8 1230 pshufb xmm14, xmm8 1231 pshufb xmm15, xmm8 1232 movdqa xmm8, xmmword ptr [rsp+0x100] 1233 paddd xmm8, xmm12 1234 paddd xmm9, xmm13 1235 paddd xmm10, xmm14 1236 paddd xmm11, xmm15 1237 pxor xmm4, xmm8 1238 pxor xmm5, xmm9 1239 pxor xmm6, xmm10 1240 pxor xmm7, xmm11 1241 movdqa xmmword ptr [rsp+0x100], xmm8 1242 movdqa xmm8, xmm4 1243 psrld xmm8, 12 1244 pslld xmm4, 20 1245 por xmm4, xmm8 1246 movdqa xmm8, xmm5 1247 psrld xmm8, 12 1248 pslld xmm5, 20 1249 por xmm5, xmm8 1250 movdqa xmm8, xmm6 1251 psrld xmm8, 12 1252 pslld xmm6, 20 1253 por xmm6, xmm8 1254 movdqa xmm8, xmm7 1255 psrld xmm8, 12 1256 pslld xmm7, 20 1257 por xmm7, xmm8 1258 paddd xmm0, xmmword ptr [rsp+0xF0] 1259 paddd xmm1, xmmword ptr [rsp] 1260 paddd xmm2, xmmword ptr [rsp+0x90] 1261 paddd xmm3, xmmword ptr [rsp+0x60] 1262 paddd xmm0, xmm4 1263 paddd xmm1, xmm5 1264 paddd xmm2, xmm6 1265 paddd xmm3, xmm7 1266 pxor xmm12, xmm0 1267 pxor xmm13, xmm1 1268 pxor xmm14, xmm2 1269 pxor xmm15, xmm3 1270 movdqa xmm8, xmmword ptr [ROT8+rip] 1271 pshufb xmm12, xmm8 1272 pshufb xmm13, xmm8 1273 pshufb xmm14, xmm8 1274 pshufb xmm15, xmm8 1275 movdqa xmm8, xmmword ptr [rsp+0x100] 1276 paddd xmm8, xmm12 1277 paddd xmm9, xmm13 1278 paddd xmm10, xmm14 1279 paddd xmm11, xmm15 1280 pxor xmm4, xmm8 1281 pxor xmm5, xmm9 1282 pxor xmm6, xmm10 1283 pxor xmm7, xmm11 1284 movdqa xmmword ptr [rsp+0x100], xmm8 1285 movdqa xmm8, xmm4 1286 psrld xmm8, 7 1287 pslld xmm4, 25 1288 por xmm4, xmm8 1289 movdqa xmm8, xmm5 1290 psrld xmm8, 7 1291 pslld xmm5, 25 1292 por xmm5, xmm8 1293 movdqa xmm8, xmm6 1294 psrld xmm8, 7 1295 pslld xmm6, 25 1296 por xmm6, xmm8 1297 movdqa xmm8, xmm7 1298 psrld xmm8, 7 1299 pslld xmm7, 25 1300 por xmm7, xmm8 1301 paddd xmm0, xmmword ptr [rsp+0xE0] 1302 paddd xmm1, xmmword ptr [rsp+0x20] 1303 paddd xmm2, xmmword ptr [rsp+0x30] 1304 paddd xmm3, xmmword ptr [rsp+0x70] 1305 paddd xmm0, xmm5 1306 paddd xmm1, xmm6 1307 paddd xmm2, xmm7 1308 paddd xmm3, xmm4 1309 pxor xmm15, xmm0 1310 pxor xmm12, xmm1 1311 pxor xmm13, xmm2 1312 pxor xmm14, xmm3 1313 movdqa xmm8, xmmword ptr [ROT16+rip] 1314 pshufb xmm15, xmm8 1315 pshufb xmm12, xmm8 1316 pshufb xmm13, xmm8 1317 pshufb xmm14, xmm8 1318 paddd xmm10, xmm15 1319 paddd xmm11, xmm12 1320 movdqa xmm8, xmmword ptr [rsp+0x100] 1321 paddd xmm8, xmm13 1322 paddd xmm9, xmm14 1323 pxor xmm5, xmm10 1324 pxor xmm6, xmm11 1325 pxor xmm7, xmm8 1326 pxor xmm4, xmm9 1327 movdqa xmmword ptr [rsp+0x100], xmm8 1328 movdqa xmm8, xmm5 1329 psrld xmm8, 12 1330 pslld xmm5, 20 1331 por xmm5, xmm8 1332 movdqa xmm8, xmm6 1333 psrld xmm8, 12 1334 pslld xmm6, 20 1335 por xmm6, xmm8 1336 movdqa xmm8, xmm7 1337 psrld xmm8, 12 1338 pslld xmm7, 20 1339 por xmm7, xmm8 1340 movdqa xmm8, xmm4 1341 psrld xmm8, 12 1342 pslld xmm4, 20 1343 por xmm4, xmm8 1344 paddd xmm0, xmmword ptr [rsp+0xA0] 1345 paddd xmm1, xmmword ptr [rsp+0xC0] 1346 paddd xmm2, xmmword ptr [rsp+0x40] 1347 paddd xmm3, xmmword ptr [rsp+0xD0] 1348 paddd xmm0, xmm5 1349 paddd xmm1, xmm6 1350 paddd xmm2, xmm7 1351 paddd xmm3, xmm4 1352 pxor xmm15, xmm0 1353 pxor xmm12, xmm1 1354 pxor xmm13, xmm2 1355 pxor xmm14, xmm3 1356 movdqa xmm8, xmmword ptr [ROT8+rip] 1357 pshufb xmm15, xmm8 1358 pshufb xmm12, xmm8 1359 pshufb xmm13, xmm8 1360 pshufb xmm14, xmm8 1361 paddd xmm10, xmm15 1362 paddd xmm11, xmm12 1363 movdqa xmm8, xmmword ptr [rsp+0x100] 1364 paddd xmm8, xmm13 1365 paddd xmm9, xmm14 1366 pxor xmm5, xmm10 1367 pxor xmm6, xmm11 1368 pxor xmm7, xmm8 1369 pxor xmm4, xmm9 1370 pxor xmm0, xmm8 1371 pxor xmm1, xmm9 1372 pxor xmm2, xmm10 1373 pxor xmm3, xmm11 1374 movdqa xmm8, xmm5 1375 psrld xmm8, 7 1376 pslld xmm5, 25 1377 por xmm5, xmm8 1378 movdqa xmm8, xmm6 1379 psrld xmm8, 7 1380 pslld xmm6, 25 1381 por xmm6, xmm8 1382 movdqa xmm8, xmm7 1383 psrld xmm8, 7 1384 pslld xmm7, 25 1385 por xmm7, xmm8 1386 movdqa xmm8, xmm4 1387 psrld xmm8, 7 1388 pslld xmm4, 25 1389 por xmm4, xmm8 1390 pxor xmm4, xmm12 1391 pxor xmm5, xmm13 1392 pxor xmm6, xmm14 1393 pxor xmm7, xmm15 1394 mov eax, r13d 1395 jne 9b 1396 movdqa xmm9, xmm0 1397 punpckldq xmm0, xmm1 1398 punpckhdq xmm9, xmm1 1399 movdqa xmm11, xmm2 1400 punpckldq xmm2, xmm3 1401 punpckhdq xmm11, xmm3 1402 movdqa xmm1, xmm0 1403 punpcklqdq xmm0, xmm2 1404 punpckhqdq xmm1, xmm2 1405 movdqa xmm3, xmm9 1406 punpcklqdq xmm9, xmm11 1407 punpckhqdq xmm3, xmm11 1408 movdqu xmmword ptr [rbx], xmm0 1409 movdqu xmmword ptr [rbx+0x20], xmm1 1410 movdqu xmmword ptr [rbx+0x40], xmm9 1411 movdqu xmmword ptr [rbx+0x60], xmm3 1412 movdqa xmm9, xmm4 1413 punpckldq xmm4, xmm5 1414 punpckhdq xmm9, xmm5 1415 movdqa xmm11, xmm6 1416 punpckldq xmm6, xmm7 1417 punpckhdq xmm11, xmm7 1418 movdqa xmm5, xmm4 1419 punpcklqdq xmm4, xmm6 1420 punpckhqdq xmm5, xmm6 1421 movdqa xmm7, xmm9 1422 punpcklqdq xmm9, xmm11 1423 punpckhqdq xmm7, xmm11 1424 movdqu xmmword ptr [rbx+0x10], xmm4 1425 movdqu xmmword ptr [rbx+0x30], xmm5 1426 movdqu xmmword ptr [rbx+0x50], xmm9 1427 movdqu xmmword ptr [rbx+0x70], xmm7 1428 movdqa xmm1, xmmword ptr [rsp+0x110] 1429 movdqa xmm0, xmm1 1430 paddd xmm1, xmmword ptr [rsp+0x150] 1431 movdqa xmmword ptr [rsp+0x110], xmm1 1432 pxor xmm0, xmmword ptr [CMP_MSB_MASK+rip] 1433 pxor xmm1, xmmword ptr [CMP_MSB_MASK+rip] 1434 pcmpgtd xmm0, xmm1 1435 movdqa xmm1, xmmword ptr [rsp+0x120] 1436 psubd xmm1, xmm0 1437 movdqa xmmword ptr [rsp+0x120], xmm1 1438 add rbx, 128 1439 add rdi, 32 1440 sub rsi, 4 1441 cmp rsi, 4 1442 jnc 2b 1443 test rsi, rsi 1444 jnz 3f 1445 4: 1446 mov rsp, rbp 1447 pop rbp 1448 pop rbx 1449 pop r12 1450 pop r13 1451 pop r14 1452 pop r15 1453 ret 1454 .p2align 5 1455 3: 1456 test esi, 0x2 1457 je 3f 1458 movups xmm0, xmmword ptr [rcx] 1459 movups xmm1, xmmword ptr [rcx+0x10] 1460 movaps xmm8, xmm0 1461 movaps xmm9, xmm1 1462 movd xmm13, dword ptr [rsp+0x110] 1463 pinsrd xmm13, dword ptr [rsp+0x120], 1 1464 pinsrd xmm13, dword ptr [BLAKE3_BLOCK_LEN+rip], 2 1465 movaps xmmword ptr [rsp], xmm13 1466 movd xmm14, dword ptr [rsp+0x114] 1467 pinsrd xmm14, dword ptr [rsp+0x124], 1 1468 pinsrd xmm14, dword ptr [BLAKE3_BLOCK_LEN+rip], 2 1469 movaps xmmword ptr [rsp+0x10], xmm14 1470 mov r8, qword ptr [rdi] 1471 mov r9, qword ptr [rdi+0x8] 1472 movzx eax, byte ptr [rbp+0x40] 1473 or eax, r13d 1474 xor edx, edx 1475 2: 1476 mov r14d, eax 1477 or eax, r12d 1478 add rdx, 64 1479 cmp rdx, r15 1480 cmovne eax, r14d 1481 movaps xmm2, xmmword ptr [BLAKE3_IV+rip] 1482 movaps xmm10, xmm2 1483 movups xmm4, xmmword ptr [r8+rdx-0x40] 1484 movups xmm5, xmmword ptr [r8+rdx-0x30] 1485 movaps xmm3, xmm4 1486 shufps xmm4, xmm5, 136 1487 shufps xmm3, xmm5, 221 1488 movaps xmm5, xmm3 1489 movups xmm6, xmmword ptr [r8+rdx-0x20] 1490 movups xmm7, xmmword ptr [r8+rdx-0x10] 1491 movaps xmm3, xmm6 1492 shufps xmm6, xmm7, 136 1493 pshufd xmm6, xmm6, 0x93 1494 shufps xmm3, xmm7, 221 1495 pshufd xmm7, xmm3, 0x93 1496 movups xmm12, xmmword ptr [r9+rdx-0x40] 1497 movups xmm13, xmmword ptr [r9+rdx-0x30] 1498 movaps xmm11, xmm12 1499 shufps xmm12, xmm13, 136 1500 shufps xmm11, xmm13, 221 1501 movaps xmm13, xmm11 1502 movups xmm14, xmmword ptr [r9+rdx-0x20] 1503 movups xmm15, xmmword ptr [r9+rdx-0x10] 1504 movaps xmm11, xmm14 1505 shufps xmm14, xmm15, 136 1506 pshufd xmm14, xmm14, 0x93 1507 shufps xmm11, xmm15, 221 1508 pshufd xmm15, xmm11, 0x93 1509 movaps xmm3, xmmword ptr [rsp] 1510 movaps xmm11, xmmword ptr [rsp+0x10] 1511 pinsrd xmm3, eax, 3 1512 pinsrd xmm11, eax, 3 1513 mov al, 7 1514 9: 1515 paddd xmm0, xmm4 1516 paddd xmm8, xmm12 1517 movaps xmmword ptr [rsp+0x20], xmm4 1518 movaps xmmword ptr [rsp+0x30], xmm12 1519 paddd xmm0, xmm1 1520 paddd xmm8, xmm9 1521 pxor xmm3, xmm0 1522 pxor xmm11, xmm8 1523 movaps xmm12, xmmword ptr [ROT16+rip] 1524 pshufb xmm3, xmm12 1525 pshufb xmm11, xmm12 1526 paddd xmm2, xmm3 1527 paddd xmm10, xmm11 1528 pxor xmm1, xmm2 1529 pxor xmm9, xmm10 1530 movdqa xmm4, xmm1 1531 pslld xmm1, 20 1532 psrld xmm4, 12 1533 por xmm1, xmm4 1534 movdqa xmm4, xmm9 1535 pslld xmm9, 20 1536 psrld xmm4, 12 1537 por xmm9, xmm4 1538 paddd xmm0, xmm5 1539 paddd xmm8, xmm13 1540 movaps xmmword ptr [rsp+0x40], xmm5 1541 movaps xmmword ptr [rsp+0x50], xmm13 1542 paddd xmm0, xmm1 1543 paddd xmm8, xmm9 1544 pxor xmm3, xmm0 1545 pxor xmm11, xmm8 1546 movaps xmm13, xmmword ptr [ROT8+rip] 1547 pshufb xmm3, xmm13 1548 pshufb xmm11, xmm13 1549 paddd xmm2, xmm3 1550 paddd xmm10, xmm11 1551 pxor xmm1, xmm2 1552 pxor xmm9, xmm10 1553 movdqa xmm4, xmm1 1554 pslld xmm1, 25 1555 psrld xmm4, 7 1556 por xmm1, xmm4 1557 movdqa xmm4, xmm9 1558 pslld xmm9, 25 1559 psrld xmm4, 7 1560 por xmm9, xmm4 1561 pshufd xmm0, xmm0, 0x93 1562 pshufd xmm8, xmm8, 0x93 1563 pshufd xmm3, xmm3, 0x4E 1564 pshufd xmm11, xmm11, 0x4E 1565 pshufd xmm2, xmm2, 0x39 1566 pshufd xmm10, xmm10, 0x39 1567 paddd xmm0, xmm6 1568 paddd xmm8, xmm14 1569 paddd xmm0, xmm1 1570 paddd xmm8, xmm9 1571 pxor xmm3, xmm0 1572 pxor xmm11, xmm8 1573 pshufb xmm3, xmm12 1574 pshufb xmm11, xmm12 1575 paddd xmm2, xmm3 1576 paddd xmm10, xmm11 1577 pxor xmm1, xmm2 1578 pxor xmm9, xmm10 1579 movdqa xmm4, xmm1 1580 pslld xmm1, 20 1581 psrld xmm4, 12 1582 por xmm1, xmm4 1583 movdqa xmm4, xmm9 1584 pslld xmm9, 20 1585 psrld xmm4, 12 1586 por xmm9, xmm4 1587 paddd xmm0, xmm7 1588 paddd xmm8, xmm15 1589 paddd xmm0, xmm1 1590 paddd xmm8, xmm9 1591 pxor xmm3, xmm0 1592 pxor xmm11, xmm8 1593 pshufb xmm3, xmm13 1594 pshufb xmm11, xmm13 1595 paddd xmm2, xmm3 1596 paddd xmm10, xmm11 1597 pxor xmm1, xmm2 1598 pxor xmm9, xmm10 1599 movdqa xmm4, xmm1 1600 pslld xmm1, 25 1601 psrld xmm4, 7 1602 por xmm1, xmm4 1603 movdqa xmm4, xmm9 1604 pslld xmm9, 25 1605 psrld xmm4, 7 1606 por xmm9, xmm4 1607 pshufd xmm0, xmm0, 0x39 1608 pshufd xmm8, xmm8, 0x39 1609 pshufd xmm3, xmm3, 0x4E 1610 pshufd xmm11, xmm11, 0x4E 1611 pshufd xmm2, xmm2, 0x93 1612 pshufd xmm10, xmm10, 0x93 1613 dec al 1614 je 9f 1615 movdqa xmm12, xmmword ptr [rsp+0x20] 1616 movdqa xmm5, xmmword ptr [rsp+0x40] 1617 pshufd xmm13, xmm12, 0x0F 1618 shufps xmm12, xmm5, 214 1619 pshufd xmm4, xmm12, 0x39 1620 movdqa xmm12, xmm6 1621 shufps xmm12, xmm7, 250 1622 pblendw xmm13, xmm12, 0xCC 1623 movdqa xmm12, xmm7 1624 punpcklqdq xmm12, xmm5 1625 pblendw xmm12, xmm6, 0xC0 1626 pshufd xmm12, xmm12, 0x78 1627 punpckhdq xmm5, xmm7 1628 punpckldq xmm6, xmm5 1629 pshufd xmm7, xmm6, 0x1E 1630 movdqa xmmword ptr [rsp+0x20], xmm13 1631 movdqa xmmword ptr [rsp+0x40], xmm12 1632 movdqa xmm5, xmmword ptr [rsp+0x30] 1633 movdqa xmm13, xmmword ptr [rsp+0x50] 1634 pshufd xmm6, xmm5, 0x0F 1635 shufps xmm5, xmm13, 214 1636 pshufd xmm12, xmm5, 0x39 1637 movdqa xmm5, xmm14 1638 shufps xmm5, xmm15, 250 1639 pblendw xmm6, xmm5, 0xCC 1640 movdqa xmm5, xmm15 1641 punpcklqdq xmm5, xmm13 1642 pblendw xmm5, xmm14, 0xC0 1643 pshufd xmm5, xmm5, 0x78 1644 punpckhdq xmm13, xmm15 1645 punpckldq xmm14, xmm13 1646 pshufd xmm15, xmm14, 0x1E 1647 movdqa xmm13, xmm6 1648 movdqa xmm14, xmm5 1649 movdqa xmm5, xmmword ptr [rsp+0x20] 1650 movdqa xmm6, xmmword ptr [rsp+0x40] 1651 jmp 9b 1652 9: 1653 pxor xmm0, xmm2 1654 pxor xmm1, xmm3 1655 pxor xmm8, xmm10 1656 pxor xmm9, xmm11 1657 mov eax, r13d 1658 cmp rdx, r15 1659 jne 2b 1660 movups xmmword ptr [rbx], xmm0 1661 movups xmmword ptr [rbx+0x10], xmm1 1662 movups xmmword ptr [rbx+0x20], xmm8 1663 movups xmmword ptr [rbx+0x30], xmm9 1664 movdqa xmm0, xmmword ptr [rsp+0x130] 1665 movdqa xmm1, xmmword ptr [rsp+0x110] 1666 movdqa xmm2, xmmword ptr [rsp+0x120] 1667 movdqu xmm3, xmmword ptr [rsp+0x118] 1668 movdqu xmm4, xmmword ptr [rsp+0x128] 1669 blendvps xmm1, xmm3, xmm0 1670 blendvps xmm2, xmm4, xmm0 1671 movdqa xmmword ptr [rsp+0x110], xmm1 1672 movdqa xmmword ptr [rsp+0x120], xmm2 1673 add rdi, 16 1674 add rbx, 64 1675 sub rsi, 2 1676 3: 1677 test esi, 0x1 1678 je 4b 1679 movups xmm0, xmmword ptr [rcx] 1680 movups xmm1, xmmword ptr [rcx+0x10] 1681 movd xmm13, dword ptr [rsp+0x110] 1682 pinsrd xmm13, dword ptr [rsp+0x120], 1 1683 pinsrd xmm13, dword ptr [BLAKE3_BLOCK_LEN+rip], 2 1684 movaps xmm14, xmmword ptr [ROT8+rip] 1685 movaps xmm15, xmmword ptr [ROT16+rip] 1686 mov r8, qword ptr [rdi] 1687 movzx eax, byte ptr [rbp+0x40] 1688 or eax, r13d 1689 xor edx, edx 1690 2: 1691 mov r14d, eax 1692 or eax, r12d 1693 add rdx, 64 1694 cmp rdx, r15 1695 cmovne eax, r14d 1696 movaps xmm2, xmmword ptr [BLAKE3_IV+rip] 1697 movaps xmm3, xmm13 1698 pinsrd xmm3, eax, 3 1699 movups xmm4, xmmword ptr [r8+rdx-0x40] 1700 movups xmm5, xmmword ptr [r8+rdx-0x30] 1701 movaps xmm8, xmm4 1702 shufps xmm4, xmm5, 136 1703 shufps xmm8, xmm5, 221 1704 movaps xmm5, xmm8 1705 movups xmm6, xmmword ptr [r8+rdx-0x20] 1706 movups xmm7, xmmword ptr [r8+rdx-0x10] 1707 movaps xmm8, xmm6 1708 shufps xmm6, xmm7, 136 1709 pshufd xmm6, xmm6, 0x93 1710 shufps xmm8, xmm7, 221 1711 pshufd xmm7, xmm8, 0x93 1712 mov al, 7 1713 9: 1714 paddd xmm0, xmm4 1715 paddd xmm0, xmm1 1716 pxor xmm3, xmm0 1717 pshufb xmm3, xmm15 1718 paddd xmm2, xmm3 1719 pxor xmm1, xmm2 1720 movdqa xmm11, xmm1 1721 pslld xmm1, 20 1722 psrld xmm11, 12 1723 por xmm1, xmm11 1724 paddd xmm0, xmm5 1725 paddd xmm0, xmm1 1726 pxor xmm3, xmm0 1727 pshufb xmm3, xmm14 1728 paddd xmm2, xmm3 1729 pxor xmm1, xmm2 1730 movdqa xmm11, xmm1 1731 pslld xmm1, 25 1732 psrld xmm11, 7 1733 por xmm1, xmm11 1734 pshufd xmm0, xmm0, 0x93 1735 pshufd xmm3, xmm3, 0x4E 1736 pshufd xmm2, xmm2, 0x39 1737 paddd xmm0, xmm6 1738 paddd xmm0, xmm1 1739 pxor xmm3, xmm0 1740 pshufb xmm3, xmm15 1741 paddd xmm2, xmm3 1742 pxor xmm1, xmm2 1743 movdqa xmm11, xmm1 1744 pslld xmm1, 20 1745 psrld xmm11, 12 1746 por xmm1, xmm11 1747 paddd xmm0, xmm7 1748 paddd xmm0, xmm1 1749 pxor xmm3, xmm0 1750 pshufb xmm3, xmm14 1751 paddd xmm2, xmm3 1752 pxor xmm1, xmm2 1753 movdqa xmm11, xmm1 1754 pslld xmm1, 25 1755 psrld xmm11, 7 1756 por xmm1, xmm11 1757 pshufd xmm0, xmm0, 0x39 1758 pshufd xmm3, xmm3, 0x4E 1759 pshufd xmm2, xmm2, 0x93 1760 dec al 1761 jz 9f 1762 movdqa xmm8, xmm4 1763 shufps xmm8, xmm5, 214 1764 pshufd xmm9, xmm4, 0x0F 1765 pshufd xmm4, xmm8, 0x39 1766 movdqa xmm8, xmm6 1767 shufps xmm8, xmm7, 250 1768 pblendw xmm9, xmm8, 0xCC 1769 movdqa xmm8, xmm7 1770 punpcklqdq xmm8, xmm5 1771 pblendw xmm8, xmm6, 0xC0 1772 pshufd xmm8, xmm8, 0x78 1773 punpckhdq xmm5, xmm7 1774 punpckldq xmm6, xmm5 1775 pshufd xmm7, xmm6, 0x1E 1776 movdqa xmm5, xmm9 1777 movdqa xmm6, xmm8 1778 jmp 9b 1779 9: 1780 pxor xmm0, xmm2 1781 pxor xmm1, xmm3 1782 mov eax, r13d 1783 cmp rdx, r15 1784 jne 2b 1785 movups xmmword ptr [rbx], xmm0 1786 movups xmmword ptr [rbx+0x10], xmm1 1787 jmp 4b 1788 1789 .p2align 6 1790 blake3_compress_in_place_sse41: 1791 _blake3_compress_in_place_sse41: 1792 _CET_ENDBR 1793 movups xmm0, xmmword ptr [rdi] 1794 movups xmm1, xmmword ptr [rdi+0x10] 1795 movaps xmm2, xmmword ptr [BLAKE3_IV+rip] 1796 shl r8, 32 1797 add rdx, r8 1798 movq xmm3, rcx 1799 movq xmm4, rdx 1800 punpcklqdq xmm3, xmm4 1801 movups xmm4, xmmword ptr [rsi] 1802 movups xmm5, xmmword ptr [rsi+0x10] 1803 movaps xmm8, xmm4 1804 shufps xmm4, xmm5, 136 1805 shufps xmm8, xmm5, 221 1806 movaps xmm5, xmm8 1807 movups xmm6, xmmword ptr [rsi+0x20] 1808 movups xmm7, xmmword ptr [rsi+0x30] 1809 movaps xmm8, xmm6 1810 shufps xmm6, xmm7, 136 1811 pshufd xmm6, xmm6, 0x93 1812 shufps xmm8, xmm7, 221 1813 pshufd xmm7, xmm8, 0x93 1814 movaps xmm14, xmmword ptr [ROT8+rip] 1815 movaps xmm15, xmmword ptr [ROT16+rip] 1816 mov al, 7 1817 9: 1818 paddd xmm0, xmm4 1819 paddd xmm0, xmm1 1820 pxor xmm3, xmm0 1821 pshufb xmm3, xmm15 1822 paddd xmm2, xmm3 1823 pxor xmm1, xmm2 1824 movdqa xmm11, xmm1 1825 pslld xmm1, 20 1826 psrld xmm11, 12 1827 por xmm1, xmm11 1828 paddd xmm0, xmm5 1829 paddd xmm0, xmm1 1830 pxor xmm3, xmm0 1831 pshufb xmm3, xmm14 1832 paddd xmm2, xmm3 1833 pxor xmm1, xmm2 1834 movdqa xmm11, xmm1 1835 pslld xmm1, 25 1836 psrld xmm11, 7 1837 por xmm1, xmm11 1838 pshufd xmm0, xmm0, 0x93 1839 pshufd xmm3, xmm3, 0x4E 1840 pshufd xmm2, xmm2, 0x39 1841 paddd xmm0, xmm6 1842 paddd xmm0, xmm1 1843 pxor xmm3, xmm0 1844 pshufb xmm3, xmm15 1845 paddd xmm2, xmm3 1846 pxor xmm1, xmm2 1847 movdqa xmm11, xmm1 1848 pslld xmm1, 20 1849 psrld xmm11, 12 1850 por xmm1, xmm11 1851 paddd xmm0, xmm7 1852 paddd xmm0, xmm1 1853 pxor xmm3, xmm0 1854 pshufb xmm3, xmm14 1855 paddd xmm2, xmm3 1856 pxor xmm1, xmm2 1857 movdqa xmm11, xmm1 1858 pslld xmm1, 25 1859 psrld xmm11, 7 1860 por xmm1, xmm11 1861 pshufd xmm0, xmm0, 0x39 1862 pshufd xmm3, xmm3, 0x4E 1863 pshufd xmm2, xmm2, 0x93 1864 dec al 1865 jz 9f 1866 movdqa xmm8, xmm4 1867 shufps xmm8, xmm5, 214 1868 pshufd xmm9, xmm4, 0x0F 1869 pshufd xmm4, xmm8, 0x39 1870 movdqa xmm8, xmm6 1871 shufps xmm8, xmm7, 250 1872 pblendw xmm9, xmm8, 0xCC 1873 movdqa xmm8, xmm7 1874 punpcklqdq xmm8, xmm5 1875 pblendw xmm8, xmm6, 0xC0 1876 pshufd xmm8, xmm8, 0x78 1877 punpckhdq xmm5, xmm7 1878 punpckldq xmm6, xmm5 1879 pshufd xmm7, xmm6, 0x1E 1880 movdqa xmm5, xmm9 1881 movdqa xmm6, xmm8 1882 jmp 9b 1883 9: 1884 pxor xmm0, xmm2 1885 pxor xmm1, xmm3 1886 movups xmmword ptr [rdi], xmm0 1887 movups xmmword ptr [rdi+0x10], xmm1 1888 ret 1889 1890 .p2align 6 1891 blake3_compress_xof_sse41: 1892 _blake3_compress_xof_sse41: 1893 _CET_ENDBR 1894 movups xmm0, xmmword ptr [rdi] 1895 movups xmm1, xmmword ptr [rdi+0x10] 1896 movaps xmm2, xmmword ptr [BLAKE3_IV+rip] 1897 movzx eax, r8b 1898 movzx edx, dl 1899 shl rax, 32 1900 add rdx, rax 1901 movq xmm3, rcx 1902 movq xmm4, rdx 1903 punpcklqdq xmm3, xmm4 1904 movups xmm4, xmmword ptr [rsi] 1905 movups xmm5, xmmword ptr [rsi+0x10] 1906 movaps xmm8, xmm4 1907 shufps xmm4, xmm5, 136 1908 shufps xmm8, xmm5, 221 1909 movaps xmm5, xmm8 1910 movups xmm6, xmmword ptr [rsi+0x20] 1911 movups xmm7, xmmword ptr [rsi+0x30] 1912 movaps xmm8, xmm6 1913 shufps xmm6, xmm7, 136 1914 pshufd xmm6, xmm6, 0x93 1915 shufps xmm8, xmm7, 221 1916 pshufd xmm7, xmm8, 0x93 1917 movaps xmm14, xmmword ptr [ROT8+rip] 1918 movaps xmm15, xmmword ptr [ROT16+rip] 1919 mov al, 7 1920 9: 1921 paddd xmm0, xmm4 1922 paddd xmm0, xmm1 1923 pxor xmm3, xmm0 1924 pshufb xmm3, xmm15 1925 paddd xmm2, xmm3 1926 pxor xmm1, xmm2 1927 movdqa xmm11, xmm1 1928 pslld xmm1, 20 1929 psrld xmm11, 12 1930 por xmm1, xmm11 1931 paddd xmm0, xmm5 1932 paddd xmm0, xmm1 1933 pxor xmm3, xmm0 1934 pshufb xmm3, xmm14 1935 paddd xmm2, xmm3 1936 pxor xmm1, xmm2 1937 movdqa xmm11, xmm1 1938 pslld xmm1, 25 1939 psrld xmm11, 7 1940 por xmm1, xmm11 1941 pshufd xmm0, xmm0, 0x93 1942 pshufd xmm3, xmm3, 0x4E 1943 pshufd xmm2, xmm2, 0x39 1944 paddd xmm0, xmm6 1945 paddd xmm0, xmm1 1946 pxor xmm3, xmm0 1947 pshufb xmm3, xmm15 1948 paddd xmm2, xmm3 1949 pxor xmm1, xmm2 1950 movdqa xmm11, xmm1 1951 pslld xmm1, 20 1952 psrld xmm11, 12 1953 por xmm1, xmm11 1954 paddd xmm0, xmm7 1955 paddd xmm0, xmm1 1956 pxor xmm3, xmm0 1957 pshufb xmm3, xmm14 1958 paddd xmm2, xmm3 1959 pxor xmm1, xmm2 1960 movdqa xmm11, xmm1 1961 pslld xmm1, 25 1962 psrld xmm11, 7 1963 por xmm1, xmm11 1964 pshufd xmm0, xmm0, 0x39 1965 pshufd xmm3, xmm3, 0x4E 1966 pshufd xmm2, xmm2, 0x93 1967 dec al 1968 jz 9f 1969 movdqa xmm8, xmm4 1970 shufps xmm8, xmm5, 214 1971 pshufd xmm9, xmm4, 0x0F 1972 pshufd xmm4, xmm8, 0x39 1973 movdqa xmm8, xmm6 1974 shufps xmm8, xmm7, 250 1975 pblendw xmm9, xmm8, 0xCC 1976 movdqa xmm8, xmm7 1977 punpcklqdq xmm8, xmm5 1978 pblendw xmm8, xmm6, 0xC0 1979 pshufd xmm8, xmm8, 0x78 1980 punpckhdq xmm5, xmm7 1981 punpckldq xmm6, xmm5 1982 pshufd xmm7, xmm6, 0x1E 1983 movdqa xmm5, xmm9 1984 movdqa xmm6, xmm8 1985 jmp 9b 1986 9: 1987 movdqu xmm4, xmmword ptr [rdi] 1988 movdqu xmm5, xmmword ptr [rdi+0x10] 1989 pxor xmm0, xmm2 1990 pxor xmm1, xmm3 1991 pxor xmm2, xmm4 1992 pxor xmm3, xmm5 1993 movups xmmword ptr [r9], xmm0 1994 movups xmmword ptr [r9+0x10], xmm1 1995 movups xmmword ptr [r9+0x20], xmm2 1996 movups xmmword ptr [r9+0x30], xmm3 1997 ret 1998 1999 2000 #ifdef __APPLE__ 2001 .static_data 2002 #else 2003 .section .rodata 2004 #endif 2005 .p2align 6 2006 BLAKE3_IV: 2007 .long 0x6A09E667, 0xBB67AE85 2008 .long 0x3C6EF372, 0xA54FF53A 2009 ROT16: 2010 .byte 2, 3, 0, 1, 6, 7, 4, 5, 10, 11, 8, 9, 14, 15, 12, 13 2011 ROT8: 2012 .byte 1, 2, 3, 0, 5, 6, 7, 4, 9, 10, 11, 8, 13, 14, 15, 12 2013 ADD0: 2014 .long 0, 1, 2, 3 2015 ADD1: 2016 .long 4, 4, 4, 4 2017 BLAKE3_IV_0: 2018 .long 0x6A09E667, 0x6A09E667, 0x6A09E667, 0x6A09E667 2019 BLAKE3_IV_1: 2020 .long 0xBB67AE85, 0xBB67AE85, 0xBB67AE85, 0xBB67AE85 2021 BLAKE3_IV_2: 2022 .long 0x3C6EF372, 0x3C6EF372, 0x3C6EF372, 0x3C6EF372 2023 BLAKE3_IV_3: 2024 .long 0xA54FF53A, 0xA54FF53A, 0xA54FF53A, 0xA54FF53A 2025 BLAKE3_BLOCK_LEN: 2026 .long 64, 64, 64, 64 2027 CMP_MSB_MASK: 2028 .long 0x80000000, 0x80000000, 0x80000000, 0x80000000