blake3_sse2_x86-64_windows_gnu.S (71203B)
1 .intel_syntax noprefix 2 .global blake3_hash_many_sse2 3 .global _blake3_hash_many_sse2 4 .global blake3_compress_in_place_sse2 5 .global _blake3_compress_in_place_sse2 6 .global blake3_compress_xof_sse2 7 .global _blake3_compress_xof_sse2 8 .section .text 9 .p2align 6 10 _blake3_hash_many_sse2: 11 blake3_hash_many_sse2: 12 push r15 13 push r14 14 push r13 15 push r12 16 push rsi 17 push rdi 18 push rbx 19 push rbp 20 mov rbp, rsp 21 sub rsp, 528 22 and rsp, 0xFFFFFFFFFFFFFFC0 23 movdqa xmmword ptr [rsp+0x170], xmm6 24 movdqa xmmword ptr [rsp+0x180], xmm7 25 movdqa xmmword ptr [rsp+0x190], xmm8 26 movdqa xmmword ptr [rsp+0x1A0], xmm9 27 movdqa xmmword ptr [rsp+0x1B0], xmm10 28 movdqa xmmword ptr [rsp+0x1C0], xmm11 29 movdqa xmmword ptr [rsp+0x1D0], xmm12 30 movdqa xmmword ptr [rsp+0x1E0], xmm13 31 movdqa xmmword ptr [rsp+0x1F0], xmm14 32 movdqa xmmword ptr [rsp+0x200], xmm15 33 mov rdi, rcx 34 mov rsi, rdx 35 mov rdx, r8 36 mov rcx, r9 37 mov r8, qword ptr [rbp+0x68] 38 movzx r9, byte ptr [rbp+0x70] 39 neg r9d 40 movd xmm0, r9d 41 pshufd xmm0, xmm0, 0x00 42 movdqa xmmword ptr [rsp+0x130], xmm0 43 movdqa xmm1, xmm0 44 pand xmm1, xmmword ptr [ADD0+rip] 45 pand xmm0, xmmword ptr [ADD1+rip] 46 movdqa xmmword ptr [rsp+0x150], xmm0 47 movd xmm0, r8d 48 pshufd xmm0, xmm0, 0x00 49 paddd xmm0, xmm1 50 movdqa xmmword ptr [rsp+0x110], xmm0 51 pxor xmm0, xmmword ptr [CMP_MSB_MASK+rip] 52 pxor xmm1, xmmword ptr [CMP_MSB_MASK+rip] 53 pcmpgtd xmm1, xmm0 54 shr r8, 32 55 movd xmm2, r8d 56 pshufd xmm2, xmm2, 0x00 57 psubd xmm2, xmm1 58 movdqa xmmword ptr [rsp+0x120], xmm2 59 mov rbx, qword ptr [rbp+0x90] 60 mov r15, rdx 61 shl r15, 6 62 movzx r13d, byte ptr [rbp+0x78] 63 movzx r12d, byte ptr [rbp+0x88] 64 cmp rsi, 4 65 jc 3f 66 2: 67 movdqu xmm3, xmmword ptr [rcx] 68 pshufd xmm0, xmm3, 0x00 69 pshufd xmm1, xmm3, 0x55 70 pshufd xmm2, xmm3, 0xAA 71 pshufd xmm3, xmm3, 0xFF 72 movdqu xmm7, xmmword ptr [rcx+0x10] 73 pshufd xmm4, xmm7, 0x00 74 pshufd xmm5, xmm7, 0x55 75 pshufd xmm6, xmm7, 0xAA 76 pshufd xmm7, xmm7, 0xFF 77 mov r8, qword ptr [rdi] 78 mov r9, qword ptr [rdi+0x8] 79 mov r10, qword ptr [rdi+0x10] 80 mov r11, qword ptr [rdi+0x18] 81 movzx eax, byte ptr [rbp+0x80] 82 or eax, r13d 83 xor edx, edx 84 9: 85 mov r14d, eax 86 or eax, r12d 87 add rdx, 64 88 cmp rdx, r15 89 cmovne eax, r14d 90 movdqu xmm8, xmmword ptr [r8+rdx-0x40] 91 movdqu xmm9, xmmword ptr [r9+rdx-0x40] 92 movdqu xmm10, xmmword ptr [r10+rdx-0x40] 93 movdqu xmm11, xmmword ptr [r11+rdx-0x40] 94 movdqa xmm12, xmm8 95 punpckldq xmm8, xmm9 96 punpckhdq xmm12, xmm9 97 movdqa xmm14, xmm10 98 punpckldq xmm10, xmm11 99 punpckhdq xmm14, xmm11 100 movdqa xmm9, xmm8 101 punpcklqdq xmm8, xmm10 102 punpckhqdq xmm9, xmm10 103 movdqa xmm13, xmm12 104 punpcklqdq xmm12, xmm14 105 punpckhqdq xmm13, xmm14 106 movdqa xmmword ptr [rsp], xmm8 107 movdqa xmmword ptr [rsp+0x10], xmm9 108 movdqa xmmword ptr [rsp+0x20], xmm12 109 movdqa xmmword ptr [rsp+0x30], xmm13 110 movdqu xmm8, xmmword ptr [r8+rdx-0x30] 111 movdqu xmm9, xmmword ptr [r9+rdx-0x30] 112 movdqu xmm10, xmmword ptr [r10+rdx-0x30] 113 movdqu xmm11, xmmword ptr [r11+rdx-0x30] 114 movdqa xmm12, xmm8 115 punpckldq xmm8, xmm9 116 punpckhdq xmm12, xmm9 117 movdqa xmm14, xmm10 118 punpckldq xmm10, xmm11 119 punpckhdq xmm14, xmm11 120 movdqa xmm9, xmm8 121 punpcklqdq xmm8, xmm10 122 punpckhqdq xmm9, xmm10 123 movdqa xmm13, xmm12 124 punpcklqdq xmm12, xmm14 125 punpckhqdq xmm13, xmm14 126 movdqa xmmword ptr [rsp+0x40], xmm8 127 movdqa xmmword ptr [rsp+0x50], xmm9 128 movdqa xmmword ptr [rsp+0x60], xmm12 129 movdqa xmmword ptr [rsp+0x70], xmm13 130 movdqu xmm8, xmmword ptr [r8+rdx-0x20] 131 movdqu xmm9, xmmword ptr [r9+rdx-0x20] 132 movdqu xmm10, xmmword ptr [r10+rdx-0x20] 133 movdqu xmm11, xmmword ptr [r11+rdx-0x20] 134 movdqa xmm12, xmm8 135 punpckldq xmm8, xmm9 136 punpckhdq xmm12, xmm9 137 movdqa xmm14, xmm10 138 punpckldq xmm10, xmm11 139 punpckhdq xmm14, xmm11 140 movdqa xmm9, xmm8 141 punpcklqdq xmm8, xmm10 142 punpckhqdq xmm9, xmm10 143 movdqa xmm13, xmm12 144 punpcklqdq xmm12, xmm14 145 punpckhqdq xmm13, xmm14 146 movdqa xmmword ptr [rsp+0x80], xmm8 147 movdqa xmmword ptr [rsp+0x90], xmm9 148 movdqa xmmword ptr [rsp+0xA0], xmm12 149 movdqa xmmword ptr [rsp+0xB0], xmm13 150 movdqu xmm8, xmmword ptr [r8+rdx-0x10] 151 movdqu xmm9, xmmword ptr [r9+rdx-0x10] 152 movdqu xmm10, xmmword ptr [r10+rdx-0x10] 153 movdqu xmm11, xmmword ptr [r11+rdx-0x10] 154 movdqa xmm12, xmm8 155 punpckldq xmm8, xmm9 156 punpckhdq xmm12, xmm9 157 movdqa xmm14, xmm10 158 punpckldq xmm10, xmm11 159 punpckhdq xmm14, xmm11 160 movdqa xmm9, xmm8 161 punpcklqdq xmm8, xmm10 162 punpckhqdq xmm9, xmm10 163 movdqa xmm13, xmm12 164 punpcklqdq xmm12, xmm14 165 punpckhqdq xmm13, xmm14 166 movdqa xmmword ptr [rsp+0xC0], xmm8 167 movdqa xmmword ptr [rsp+0xD0], xmm9 168 movdqa xmmword ptr [rsp+0xE0], xmm12 169 movdqa xmmword ptr [rsp+0xF0], xmm13 170 movdqa xmm9, xmmword ptr [BLAKE3_IV_1+rip] 171 movdqa xmm10, xmmword ptr [BLAKE3_IV_2+rip] 172 movdqa xmm11, xmmword ptr [BLAKE3_IV_3+rip] 173 movdqa xmm12, xmmword ptr [rsp+0x110] 174 movdqa xmm13, xmmword ptr [rsp+0x120] 175 movdqa xmm14, xmmword ptr [BLAKE3_BLOCK_LEN+rip] 176 movd xmm15, eax 177 pshufd xmm15, xmm15, 0x00 178 prefetcht0 [r8+rdx+0x80] 179 prefetcht0 [r9+rdx+0x80] 180 prefetcht0 [r10+rdx+0x80] 181 prefetcht0 [r11+rdx+0x80] 182 paddd xmm0, xmmword ptr [rsp] 183 paddd xmm1, xmmword ptr [rsp+0x20] 184 paddd xmm2, xmmword ptr [rsp+0x40] 185 paddd xmm3, xmmword ptr [rsp+0x60] 186 paddd xmm0, xmm4 187 paddd xmm1, xmm5 188 paddd xmm2, xmm6 189 paddd xmm3, xmm7 190 pxor xmm12, xmm0 191 pxor xmm13, xmm1 192 pxor xmm14, xmm2 193 pxor xmm15, xmm3 194 pshuflw xmm12, xmm12, 0xB1 195 pshufhw xmm12, xmm12, 0xB1 196 pshuflw xmm13, xmm13, 0xB1 197 pshufhw xmm13, xmm13, 0xB1 198 pshuflw xmm14, xmm14, 0xB1 199 pshufhw xmm14, xmm14, 0xB1 200 pshuflw xmm15, xmm15, 0xB1 201 pshufhw xmm15, xmm15, 0xB1 202 movdqa xmm8, xmmword ptr [BLAKE3_IV_0+rip] 203 paddd xmm8, xmm12 204 paddd xmm9, xmm13 205 paddd xmm10, xmm14 206 paddd xmm11, xmm15 207 pxor xmm4, xmm8 208 pxor xmm5, xmm9 209 pxor xmm6, xmm10 210 pxor xmm7, xmm11 211 movdqa xmmword ptr [rsp+0x100], xmm8 212 movdqa xmm8, xmm4 213 psrld xmm8, 12 214 pslld xmm4, 20 215 por xmm4, xmm8 216 movdqa xmm8, xmm5 217 psrld xmm8, 12 218 pslld xmm5, 20 219 por xmm5, xmm8 220 movdqa xmm8, xmm6 221 psrld xmm8, 12 222 pslld xmm6, 20 223 por xmm6, xmm8 224 movdqa xmm8, xmm7 225 psrld xmm8, 12 226 pslld xmm7, 20 227 por xmm7, xmm8 228 paddd xmm0, xmmword ptr [rsp+0x10] 229 paddd xmm1, xmmword ptr [rsp+0x30] 230 paddd xmm2, xmmword ptr [rsp+0x50] 231 paddd xmm3, xmmword ptr [rsp+0x70] 232 paddd xmm0, xmm4 233 paddd xmm1, xmm5 234 paddd xmm2, xmm6 235 paddd xmm3, xmm7 236 pxor xmm12, xmm0 237 pxor xmm13, xmm1 238 pxor xmm14, xmm2 239 pxor xmm15, xmm3 240 movdqa xmm8, xmm12 241 psrld xmm12, 8 242 pslld xmm8, 24 243 pxor xmm12, xmm8 244 movdqa xmm8, xmm13 245 psrld xmm13, 8 246 pslld xmm8, 24 247 pxor xmm13, xmm8 248 movdqa xmm8, xmm14 249 psrld xmm14, 8 250 pslld xmm8, 24 251 pxor xmm14, xmm8 252 movdqa xmm8, xmm15 253 psrld xmm15, 8 254 pslld xmm8, 24 255 pxor xmm15, xmm8 256 movdqa xmm8, xmmword ptr [rsp+0x100] 257 paddd xmm8, xmm12 258 paddd xmm9, xmm13 259 paddd xmm10, xmm14 260 paddd xmm11, xmm15 261 pxor xmm4, xmm8 262 pxor xmm5, xmm9 263 pxor xmm6, xmm10 264 pxor xmm7, xmm11 265 movdqa xmmword ptr [rsp+0x100], xmm8 266 movdqa xmm8, xmm4 267 psrld xmm8, 7 268 pslld xmm4, 25 269 por xmm4, xmm8 270 movdqa xmm8, xmm5 271 psrld xmm8, 7 272 pslld xmm5, 25 273 por xmm5, xmm8 274 movdqa xmm8, xmm6 275 psrld xmm8, 7 276 pslld xmm6, 25 277 por xmm6, xmm8 278 movdqa xmm8, xmm7 279 psrld xmm8, 7 280 pslld xmm7, 25 281 por xmm7, xmm8 282 paddd xmm0, xmmword ptr [rsp+0x80] 283 paddd xmm1, xmmword ptr [rsp+0xA0] 284 paddd xmm2, xmmword ptr [rsp+0xC0] 285 paddd xmm3, xmmword ptr [rsp+0xE0] 286 paddd xmm0, xmm5 287 paddd xmm1, xmm6 288 paddd xmm2, xmm7 289 paddd xmm3, xmm4 290 pxor xmm15, xmm0 291 pxor xmm12, xmm1 292 pxor xmm13, xmm2 293 pxor xmm14, xmm3 294 pshuflw xmm15, xmm15, 0xB1 295 pshufhw xmm15, xmm15, 0xB1 296 pshuflw xmm12, xmm12, 0xB1 297 pshufhw xmm12, xmm12, 0xB1 298 pshuflw xmm13, xmm13, 0xB1 299 pshufhw xmm13, xmm13, 0xB1 300 pshuflw xmm14, xmm14, 0xB1 301 pshufhw xmm14, xmm14, 0xB1 302 paddd xmm10, xmm15 303 paddd xmm11, xmm12 304 movdqa xmm8, xmmword ptr [rsp+0x100] 305 paddd xmm8, xmm13 306 paddd xmm9, xmm14 307 pxor xmm5, xmm10 308 pxor xmm6, xmm11 309 pxor xmm7, xmm8 310 pxor xmm4, xmm9 311 movdqa xmmword ptr [rsp+0x100], xmm8 312 movdqa xmm8, xmm5 313 psrld xmm8, 12 314 pslld xmm5, 20 315 por xmm5, xmm8 316 movdqa xmm8, xmm6 317 psrld xmm8, 12 318 pslld xmm6, 20 319 por xmm6, xmm8 320 movdqa xmm8, xmm7 321 psrld xmm8, 12 322 pslld xmm7, 20 323 por xmm7, xmm8 324 movdqa xmm8, xmm4 325 psrld xmm8, 12 326 pslld xmm4, 20 327 por xmm4, xmm8 328 paddd xmm0, xmmword ptr [rsp+0x90] 329 paddd xmm1, xmmword ptr [rsp+0xB0] 330 paddd xmm2, xmmword ptr [rsp+0xD0] 331 paddd xmm3, xmmword ptr [rsp+0xF0] 332 paddd xmm0, xmm5 333 paddd xmm1, xmm6 334 paddd xmm2, xmm7 335 paddd xmm3, xmm4 336 pxor xmm15, xmm0 337 pxor xmm12, xmm1 338 pxor xmm13, xmm2 339 pxor xmm14, xmm3 340 movdqa xmm8, xmm15 341 psrld xmm15, 8 342 pslld xmm8, 24 343 pxor xmm15, xmm8 344 movdqa xmm8, xmm12 345 psrld xmm12, 8 346 pslld xmm8, 24 347 pxor xmm12, xmm8 348 movdqa xmm8, xmm13 349 psrld xmm13, 8 350 pslld xmm8, 24 351 pxor xmm13, xmm8 352 movdqa xmm8, xmm14 353 psrld xmm14, 8 354 pslld xmm8, 24 355 pxor xmm14, xmm8 356 paddd xmm10, xmm15 357 paddd xmm11, xmm12 358 movdqa xmm8, xmmword ptr [rsp+0x100] 359 paddd xmm8, xmm13 360 paddd xmm9, xmm14 361 pxor xmm5, xmm10 362 pxor xmm6, xmm11 363 pxor xmm7, xmm8 364 pxor xmm4, xmm9 365 movdqa xmmword ptr [rsp+0x100], xmm8 366 movdqa xmm8, xmm5 367 psrld xmm8, 7 368 pslld xmm5, 25 369 por xmm5, xmm8 370 movdqa xmm8, xmm6 371 psrld xmm8, 7 372 pslld xmm6, 25 373 por xmm6, xmm8 374 movdqa xmm8, xmm7 375 psrld xmm8, 7 376 pslld xmm7, 25 377 por xmm7, xmm8 378 movdqa xmm8, xmm4 379 psrld xmm8, 7 380 pslld xmm4, 25 381 por xmm4, xmm8 382 paddd xmm0, xmmword ptr [rsp+0x20] 383 paddd xmm1, xmmword ptr [rsp+0x30] 384 paddd xmm2, xmmword ptr [rsp+0x70] 385 paddd xmm3, xmmword ptr [rsp+0x40] 386 paddd xmm0, xmm4 387 paddd xmm1, xmm5 388 paddd xmm2, xmm6 389 paddd xmm3, xmm7 390 pxor xmm12, xmm0 391 pxor xmm13, xmm1 392 pxor xmm14, xmm2 393 pxor xmm15, xmm3 394 pshuflw xmm12, xmm12, 0xB1 395 pshufhw xmm12, xmm12, 0xB1 396 pshuflw xmm13, xmm13, 0xB1 397 pshufhw xmm13, xmm13, 0xB1 398 pshuflw xmm14, xmm14, 0xB1 399 pshufhw xmm14, xmm14, 0xB1 400 pshuflw xmm15, xmm15, 0xB1 401 pshufhw xmm15, xmm15, 0xB1 402 movdqa xmm8, xmmword ptr [rsp+0x100] 403 paddd xmm8, xmm12 404 paddd xmm9, xmm13 405 paddd xmm10, xmm14 406 paddd xmm11, xmm15 407 pxor xmm4, xmm8 408 pxor xmm5, xmm9 409 pxor xmm6, xmm10 410 pxor xmm7, xmm11 411 movdqa xmmword ptr [rsp+0x100], xmm8 412 movdqa xmm8, xmm4 413 psrld xmm8, 12 414 pslld xmm4, 20 415 por xmm4, xmm8 416 movdqa xmm8, xmm5 417 psrld xmm8, 12 418 pslld xmm5, 20 419 por xmm5, xmm8 420 movdqa xmm8, xmm6 421 psrld xmm8, 12 422 pslld xmm6, 20 423 por xmm6, xmm8 424 movdqa xmm8, xmm7 425 psrld xmm8, 12 426 pslld xmm7, 20 427 por xmm7, xmm8 428 paddd xmm0, xmmword ptr [rsp+0x60] 429 paddd xmm1, xmmword ptr [rsp+0xA0] 430 paddd xmm2, xmmword ptr [rsp] 431 paddd xmm3, xmmword ptr [rsp+0xD0] 432 paddd xmm0, xmm4 433 paddd xmm1, xmm5 434 paddd xmm2, xmm6 435 paddd xmm3, xmm7 436 pxor xmm12, xmm0 437 pxor xmm13, xmm1 438 pxor xmm14, xmm2 439 pxor xmm15, xmm3 440 movdqa xmm8, xmm12 441 psrld xmm12, 8 442 pslld xmm8, 24 443 pxor xmm12, xmm8 444 movdqa xmm8, xmm13 445 psrld xmm13, 8 446 pslld xmm8, 24 447 pxor xmm13, xmm8 448 movdqa xmm8, xmm14 449 psrld xmm14, 8 450 pslld xmm8, 24 451 pxor xmm14, xmm8 452 movdqa xmm8, xmm15 453 psrld xmm15, 8 454 pslld xmm8, 24 455 pxor xmm15, xmm8 456 movdqa xmm8, xmmword ptr [rsp+0x100] 457 paddd xmm8, xmm12 458 paddd xmm9, xmm13 459 paddd xmm10, xmm14 460 paddd xmm11, xmm15 461 pxor xmm4, xmm8 462 pxor xmm5, xmm9 463 pxor xmm6, xmm10 464 pxor xmm7, xmm11 465 movdqa xmmword ptr [rsp+0x100], xmm8 466 movdqa xmm8, xmm4 467 psrld xmm8, 7 468 pslld xmm4, 25 469 por xmm4, xmm8 470 movdqa xmm8, xmm5 471 psrld xmm8, 7 472 pslld xmm5, 25 473 por xmm5, xmm8 474 movdqa xmm8, xmm6 475 psrld xmm8, 7 476 pslld xmm6, 25 477 por xmm6, xmm8 478 movdqa xmm8, xmm7 479 psrld xmm8, 7 480 pslld xmm7, 25 481 por xmm7, xmm8 482 paddd xmm0, xmmword ptr [rsp+0x10] 483 paddd xmm1, xmmword ptr [rsp+0xC0] 484 paddd xmm2, xmmword ptr [rsp+0x90] 485 paddd xmm3, xmmword ptr [rsp+0xF0] 486 paddd xmm0, xmm5 487 paddd xmm1, xmm6 488 paddd xmm2, xmm7 489 paddd xmm3, xmm4 490 pxor xmm15, xmm0 491 pxor xmm12, xmm1 492 pxor xmm13, xmm2 493 pxor xmm14, xmm3 494 pshuflw xmm15, xmm15, 0xB1 495 pshufhw xmm15, xmm15, 0xB1 496 pshuflw xmm12, xmm12, 0xB1 497 pshufhw xmm12, xmm12, 0xB1 498 pshuflw xmm13, xmm13, 0xB1 499 pshufhw xmm13, xmm13, 0xB1 500 pshuflw xmm14, xmm14, 0xB1 501 pshufhw xmm14, xmm14, 0xB1 502 paddd xmm10, xmm15 503 paddd xmm11, xmm12 504 movdqa xmm8, xmmword ptr [rsp+0x100] 505 paddd xmm8, xmm13 506 paddd xmm9, xmm14 507 pxor xmm5, xmm10 508 pxor xmm6, xmm11 509 pxor xmm7, xmm8 510 pxor xmm4, xmm9 511 movdqa xmmword ptr [rsp+0x100], xmm8 512 movdqa xmm8, xmm5 513 psrld xmm8, 12 514 pslld xmm5, 20 515 por xmm5, xmm8 516 movdqa xmm8, xmm6 517 psrld xmm8, 12 518 pslld xmm6, 20 519 por xmm6, xmm8 520 movdqa xmm8, xmm7 521 psrld xmm8, 12 522 pslld xmm7, 20 523 por xmm7, xmm8 524 movdqa xmm8, xmm4 525 psrld xmm8, 12 526 pslld xmm4, 20 527 por xmm4, xmm8 528 paddd xmm0, xmmword ptr [rsp+0xB0] 529 paddd xmm1, xmmword ptr [rsp+0x50] 530 paddd xmm2, xmmword ptr [rsp+0xE0] 531 paddd xmm3, xmmword ptr [rsp+0x80] 532 paddd xmm0, xmm5 533 paddd xmm1, xmm6 534 paddd xmm2, xmm7 535 paddd xmm3, xmm4 536 pxor xmm15, xmm0 537 pxor xmm12, xmm1 538 pxor xmm13, xmm2 539 pxor xmm14, xmm3 540 movdqa xmm8, xmm15 541 psrld xmm15, 8 542 pslld xmm8, 24 543 pxor xmm15, xmm8 544 movdqa xmm8, xmm12 545 psrld xmm12, 8 546 pslld xmm8, 24 547 pxor xmm12, xmm8 548 movdqa xmm8, xmm13 549 psrld xmm13, 8 550 pslld xmm8, 24 551 pxor xmm13, xmm8 552 movdqa xmm8, xmm14 553 psrld xmm14, 8 554 pslld xmm8, 24 555 pxor xmm14, xmm8 556 paddd xmm10, xmm15 557 paddd xmm11, xmm12 558 movdqa xmm8, xmmword ptr [rsp+0x100] 559 paddd xmm8, xmm13 560 paddd xmm9, xmm14 561 pxor xmm5, xmm10 562 pxor xmm6, xmm11 563 pxor xmm7, xmm8 564 pxor xmm4, xmm9 565 movdqa xmmword ptr [rsp+0x100], xmm8 566 movdqa xmm8, xmm5 567 psrld xmm8, 7 568 pslld xmm5, 25 569 por xmm5, xmm8 570 movdqa xmm8, xmm6 571 psrld xmm8, 7 572 pslld xmm6, 25 573 por xmm6, xmm8 574 movdqa xmm8, xmm7 575 psrld xmm8, 7 576 pslld xmm7, 25 577 por xmm7, xmm8 578 movdqa xmm8, xmm4 579 psrld xmm8, 7 580 pslld xmm4, 25 581 por xmm4, xmm8 582 paddd xmm0, xmmword ptr [rsp+0x30] 583 paddd xmm1, xmmword ptr [rsp+0xA0] 584 paddd xmm2, xmmword ptr [rsp+0xD0] 585 paddd xmm3, xmmword ptr [rsp+0x70] 586 paddd xmm0, xmm4 587 paddd xmm1, xmm5 588 paddd xmm2, xmm6 589 paddd xmm3, xmm7 590 pxor xmm12, xmm0 591 pxor xmm13, xmm1 592 pxor xmm14, xmm2 593 pxor xmm15, xmm3 594 pshuflw xmm12, xmm12, 0xB1 595 pshufhw xmm12, xmm12, 0xB1 596 pshuflw xmm13, xmm13, 0xB1 597 pshufhw xmm13, xmm13, 0xB1 598 pshuflw xmm14, xmm14, 0xB1 599 pshufhw xmm14, xmm14, 0xB1 600 pshuflw xmm15, xmm15, 0xB1 601 pshufhw xmm15, xmm15, 0xB1 602 movdqa xmm8, xmmword ptr [rsp+0x100] 603 paddd xmm8, xmm12 604 paddd xmm9, xmm13 605 paddd xmm10, xmm14 606 paddd xmm11, xmm15 607 pxor xmm4, xmm8 608 pxor xmm5, xmm9 609 pxor xmm6, xmm10 610 pxor xmm7, xmm11 611 movdqa xmmword ptr [rsp+0x100], xmm8 612 movdqa xmm8, xmm4 613 psrld xmm8, 12 614 pslld xmm4, 20 615 por xmm4, xmm8 616 movdqa xmm8, xmm5 617 psrld xmm8, 12 618 pslld xmm5, 20 619 por xmm5, xmm8 620 movdqa xmm8, xmm6 621 psrld xmm8, 12 622 pslld xmm6, 20 623 por xmm6, xmm8 624 movdqa xmm8, xmm7 625 psrld xmm8, 12 626 pslld xmm7, 20 627 por xmm7, xmm8 628 paddd xmm0, xmmword ptr [rsp+0x40] 629 paddd xmm1, xmmword ptr [rsp+0xC0] 630 paddd xmm2, xmmword ptr [rsp+0x20] 631 paddd xmm3, xmmword ptr [rsp+0xE0] 632 paddd xmm0, xmm4 633 paddd xmm1, xmm5 634 paddd xmm2, xmm6 635 paddd xmm3, xmm7 636 pxor xmm12, xmm0 637 pxor xmm13, xmm1 638 pxor xmm14, xmm2 639 pxor xmm15, xmm3 640 movdqa xmm8, xmm12 641 psrld xmm12, 8 642 pslld xmm8, 24 643 pxor xmm12, xmm8 644 movdqa xmm8, xmm13 645 psrld xmm13, 8 646 pslld xmm8, 24 647 pxor xmm13, xmm8 648 movdqa xmm8, xmm14 649 psrld xmm14, 8 650 pslld xmm8, 24 651 pxor xmm14, xmm8 652 movdqa xmm8, xmm15 653 psrld xmm15, 8 654 pslld xmm8, 24 655 pxor xmm15, xmm8 656 movdqa xmm8, xmmword ptr [rsp+0x100] 657 paddd xmm8, xmm12 658 paddd xmm9, xmm13 659 paddd xmm10, xmm14 660 paddd xmm11, xmm15 661 pxor xmm4, xmm8 662 pxor xmm5, xmm9 663 pxor xmm6, xmm10 664 pxor xmm7, xmm11 665 movdqa xmmword ptr [rsp+0x100], xmm8 666 movdqa xmm8, xmm4 667 psrld xmm8, 7 668 pslld xmm4, 25 669 por xmm4, xmm8 670 movdqa xmm8, xmm5 671 psrld xmm8, 7 672 pslld xmm5, 25 673 por xmm5, xmm8 674 movdqa xmm8, xmm6 675 psrld xmm8, 7 676 pslld xmm6, 25 677 por xmm6, xmm8 678 movdqa xmm8, xmm7 679 psrld xmm8, 7 680 pslld xmm7, 25 681 por xmm7, xmm8 682 paddd xmm0, xmmword ptr [rsp+0x60] 683 paddd xmm1, xmmword ptr [rsp+0x90] 684 paddd xmm2, xmmword ptr [rsp+0xB0] 685 paddd xmm3, xmmword ptr [rsp+0x80] 686 paddd xmm0, xmm5 687 paddd xmm1, xmm6 688 paddd xmm2, xmm7 689 paddd xmm3, xmm4 690 pxor xmm15, xmm0 691 pxor xmm12, xmm1 692 pxor xmm13, xmm2 693 pxor xmm14, xmm3 694 pshuflw xmm15, xmm15, 0xB1 695 pshufhw xmm15, xmm15, 0xB1 696 pshuflw xmm12, xmm12, 0xB1 697 pshufhw xmm12, xmm12, 0xB1 698 pshuflw xmm13, xmm13, 0xB1 699 pshufhw xmm13, xmm13, 0xB1 700 pshuflw xmm14, xmm14, 0xB1 701 pshufhw xmm14, xmm14, 0xB1 702 paddd xmm10, xmm15 703 paddd xmm11, xmm12 704 movdqa xmm8, xmmword ptr [rsp+0x100] 705 paddd xmm8, xmm13 706 paddd xmm9, xmm14 707 pxor xmm5, xmm10 708 pxor xmm6, xmm11 709 pxor xmm7, xmm8 710 pxor xmm4, xmm9 711 movdqa xmmword ptr [rsp+0x100], xmm8 712 movdqa xmm8, xmm5 713 psrld xmm8, 12 714 pslld xmm5, 20 715 por xmm5, xmm8 716 movdqa xmm8, xmm6 717 psrld xmm8, 12 718 pslld xmm6, 20 719 por xmm6, xmm8 720 movdqa xmm8, xmm7 721 psrld xmm8, 12 722 pslld xmm7, 20 723 por xmm7, xmm8 724 movdqa xmm8, xmm4 725 psrld xmm8, 12 726 pslld xmm4, 20 727 por xmm4, xmm8 728 paddd xmm0, xmmword ptr [rsp+0x50] 729 paddd xmm1, xmmword ptr [rsp] 730 paddd xmm2, xmmword ptr [rsp+0xF0] 731 paddd xmm3, xmmword ptr [rsp+0x10] 732 paddd xmm0, xmm5 733 paddd xmm1, xmm6 734 paddd xmm2, xmm7 735 paddd xmm3, xmm4 736 pxor xmm15, xmm0 737 pxor xmm12, xmm1 738 pxor xmm13, xmm2 739 pxor xmm14, xmm3 740 movdqa xmm8, xmm15 741 psrld xmm15, 8 742 pslld xmm8, 24 743 pxor xmm15, xmm8 744 movdqa xmm8, xmm12 745 psrld xmm12, 8 746 pslld xmm8, 24 747 pxor xmm12, xmm8 748 movdqa xmm8, xmm13 749 psrld xmm13, 8 750 pslld xmm8, 24 751 pxor xmm13, xmm8 752 movdqa xmm8, xmm14 753 psrld xmm14, 8 754 pslld xmm8, 24 755 pxor xmm14, xmm8 756 paddd xmm10, xmm15 757 paddd xmm11, xmm12 758 movdqa xmm8, xmmword ptr [rsp+0x100] 759 paddd xmm8, xmm13 760 paddd xmm9, xmm14 761 pxor xmm5, xmm10 762 pxor xmm6, xmm11 763 pxor xmm7, xmm8 764 pxor xmm4, xmm9 765 movdqa xmmword ptr [rsp+0x100], xmm8 766 movdqa xmm8, xmm5 767 psrld xmm8, 7 768 pslld xmm5, 25 769 por xmm5, xmm8 770 movdqa xmm8, xmm6 771 psrld xmm8, 7 772 pslld xmm6, 25 773 por xmm6, xmm8 774 movdqa xmm8, xmm7 775 psrld xmm8, 7 776 pslld xmm7, 25 777 por xmm7, xmm8 778 movdqa xmm8, xmm4 779 psrld xmm8, 7 780 pslld xmm4, 25 781 por xmm4, xmm8 782 paddd xmm0, xmmword ptr [rsp+0xA0] 783 paddd xmm1, xmmword ptr [rsp+0xC0] 784 paddd xmm2, xmmword ptr [rsp+0xE0] 785 paddd xmm3, xmmword ptr [rsp+0xD0] 786 paddd xmm0, xmm4 787 paddd xmm1, xmm5 788 paddd xmm2, xmm6 789 paddd xmm3, xmm7 790 pxor xmm12, xmm0 791 pxor xmm13, xmm1 792 pxor xmm14, xmm2 793 pxor xmm15, xmm3 794 pshuflw xmm12, xmm12, 0xB1 795 pshufhw xmm12, xmm12, 0xB1 796 pshuflw xmm13, xmm13, 0xB1 797 pshufhw xmm13, xmm13, 0xB1 798 pshuflw xmm14, xmm14, 0xB1 799 pshufhw xmm14, xmm14, 0xB1 800 pshuflw xmm15, xmm15, 0xB1 801 pshufhw xmm15, xmm15, 0xB1 802 movdqa xmm8, xmmword ptr [rsp+0x100] 803 paddd xmm8, xmm12 804 paddd xmm9, xmm13 805 paddd xmm10, xmm14 806 paddd xmm11, xmm15 807 pxor xmm4, xmm8 808 pxor xmm5, xmm9 809 pxor xmm6, xmm10 810 pxor xmm7, xmm11 811 movdqa xmmword ptr [rsp+0x100], xmm8 812 movdqa xmm8, xmm4 813 psrld xmm8, 12 814 pslld xmm4, 20 815 por xmm4, xmm8 816 movdqa xmm8, xmm5 817 psrld xmm8, 12 818 pslld xmm5, 20 819 por xmm5, xmm8 820 movdqa xmm8, xmm6 821 psrld xmm8, 12 822 pslld xmm6, 20 823 por xmm6, xmm8 824 movdqa xmm8, xmm7 825 psrld xmm8, 12 826 pslld xmm7, 20 827 por xmm7, xmm8 828 paddd xmm0, xmmword ptr [rsp+0x70] 829 paddd xmm1, xmmword ptr [rsp+0x90] 830 paddd xmm2, xmmword ptr [rsp+0x30] 831 paddd xmm3, xmmword ptr [rsp+0xF0] 832 paddd xmm0, xmm4 833 paddd xmm1, xmm5 834 paddd xmm2, xmm6 835 paddd xmm3, xmm7 836 pxor xmm12, xmm0 837 pxor xmm13, xmm1 838 pxor xmm14, xmm2 839 pxor xmm15, xmm3 840 movdqa xmm8, xmm12 841 psrld xmm12, 8 842 pslld xmm8, 24 843 pxor xmm12, xmm8 844 movdqa xmm8, xmm13 845 psrld xmm13, 8 846 pslld xmm8, 24 847 pxor xmm13, xmm8 848 movdqa xmm8, xmm14 849 psrld xmm14, 8 850 pslld xmm8, 24 851 pxor xmm14, xmm8 852 movdqa xmm8, xmm15 853 psrld xmm15, 8 854 pslld xmm8, 24 855 pxor xmm15, xmm8 856 movdqa xmm8, xmmword ptr [rsp+0x100] 857 paddd xmm8, xmm12 858 paddd xmm9, xmm13 859 paddd xmm10, xmm14 860 paddd xmm11, xmm15 861 pxor xmm4, xmm8 862 pxor xmm5, xmm9 863 pxor xmm6, xmm10 864 pxor xmm7, xmm11 865 movdqa xmmword ptr [rsp+0x100], xmm8 866 movdqa xmm8, xmm4 867 psrld xmm8, 7 868 pslld xmm4, 25 869 por xmm4, xmm8 870 movdqa xmm8, xmm5 871 psrld xmm8, 7 872 pslld xmm5, 25 873 por xmm5, xmm8 874 movdqa xmm8, xmm6 875 psrld xmm8, 7 876 pslld xmm6, 25 877 por xmm6, xmm8 878 movdqa xmm8, xmm7 879 psrld xmm8, 7 880 pslld xmm7, 25 881 por xmm7, xmm8 882 paddd xmm0, xmmword ptr [rsp+0x40] 883 paddd xmm1, xmmword ptr [rsp+0xB0] 884 paddd xmm2, xmmword ptr [rsp+0x50] 885 paddd xmm3, xmmword ptr [rsp+0x10] 886 paddd xmm0, xmm5 887 paddd xmm1, xmm6 888 paddd xmm2, xmm7 889 paddd xmm3, xmm4 890 pxor xmm15, xmm0 891 pxor xmm12, xmm1 892 pxor xmm13, xmm2 893 pxor xmm14, xmm3 894 pshuflw xmm15, xmm15, 0xB1 895 pshufhw xmm15, xmm15, 0xB1 896 pshuflw xmm12, xmm12, 0xB1 897 pshufhw xmm12, xmm12, 0xB1 898 pshuflw xmm13, xmm13, 0xB1 899 pshufhw xmm13, xmm13, 0xB1 900 pshuflw xmm14, xmm14, 0xB1 901 pshufhw xmm14, xmm14, 0xB1 902 paddd xmm10, xmm15 903 paddd xmm11, xmm12 904 movdqa xmm8, xmmword ptr [rsp+0x100] 905 paddd xmm8, xmm13 906 paddd xmm9, xmm14 907 pxor xmm5, xmm10 908 pxor xmm6, xmm11 909 pxor xmm7, xmm8 910 pxor xmm4, xmm9 911 movdqa xmmword ptr [rsp+0x100], xmm8 912 movdqa xmm8, xmm5 913 psrld xmm8, 12 914 pslld xmm5, 20 915 por xmm5, xmm8 916 movdqa xmm8, xmm6 917 psrld xmm8, 12 918 pslld xmm6, 20 919 por xmm6, xmm8 920 movdqa xmm8, xmm7 921 psrld xmm8, 12 922 pslld xmm7, 20 923 por xmm7, xmm8 924 movdqa xmm8, xmm4 925 psrld xmm8, 12 926 pslld xmm4, 20 927 por xmm4, xmm8 928 paddd xmm0, xmmword ptr [rsp] 929 paddd xmm1, xmmword ptr [rsp+0x20] 930 paddd xmm2, xmmword ptr [rsp+0x80] 931 paddd xmm3, xmmword ptr [rsp+0x60] 932 paddd xmm0, xmm5 933 paddd xmm1, xmm6 934 paddd xmm2, xmm7 935 paddd xmm3, xmm4 936 pxor xmm15, xmm0 937 pxor xmm12, xmm1 938 pxor xmm13, xmm2 939 pxor xmm14, xmm3 940 movdqa xmm8, xmm15 941 psrld xmm15, 8 942 pslld xmm8, 24 943 pxor xmm15, xmm8 944 movdqa xmm8, xmm12 945 psrld xmm12, 8 946 pslld xmm8, 24 947 pxor xmm12, xmm8 948 movdqa xmm8, xmm13 949 psrld xmm13, 8 950 pslld xmm8, 24 951 pxor xmm13, xmm8 952 movdqa xmm8, xmm14 953 psrld xmm14, 8 954 pslld xmm8, 24 955 pxor xmm14, xmm8 956 paddd xmm10, xmm15 957 paddd xmm11, xmm12 958 movdqa xmm8, xmmword ptr [rsp+0x100] 959 paddd xmm8, xmm13 960 paddd xmm9, xmm14 961 pxor xmm5, xmm10 962 pxor xmm6, xmm11 963 pxor xmm7, xmm8 964 pxor xmm4, xmm9 965 movdqa xmmword ptr [rsp+0x100], xmm8 966 movdqa xmm8, xmm5 967 psrld xmm8, 7 968 pslld xmm5, 25 969 por xmm5, xmm8 970 movdqa xmm8, xmm6 971 psrld xmm8, 7 972 pslld xmm6, 25 973 por xmm6, xmm8 974 movdqa xmm8, xmm7 975 psrld xmm8, 7 976 pslld xmm7, 25 977 por xmm7, xmm8 978 movdqa xmm8, xmm4 979 psrld xmm8, 7 980 pslld xmm4, 25 981 por xmm4, xmm8 982 paddd xmm0, xmmword ptr [rsp+0xC0] 983 paddd xmm1, xmmword ptr [rsp+0x90] 984 paddd xmm2, xmmword ptr [rsp+0xF0] 985 paddd xmm3, xmmword ptr [rsp+0xE0] 986 paddd xmm0, xmm4 987 paddd xmm1, xmm5 988 paddd xmm2, xmm6 989 paddd xmm3, xmm7 990 pxor xmm12, xmm0 991 pxor xmm13, xmm1 992 pxor xmm14, xmm2 993 pxor xmm15, xmm3 994 pshuflw xmm12, xmm12, 0xB1 995 pshufhw xmm12, xmm12, 0xB1 996 pshuflw xmm13, xmm13, 0xB1 997 pshufhw xmm13, xmm13, 0xB1 998 pshuflw xmm14, xmm14, 0xB1 999 pshufhw xmm14, xmm14, 0xB1 1000 pshuflw xmm15, xmm15, 0xB1 1001 pshufhw xmm15, xmm15, 0xB1 1002 movdqa xmm8, xmmword ptr [rsp+0x100] 1003 paddd xmm8, xmm12 1004 paddd xmm9, xmm13 1005 paddd xmm10, xmm14 1006 paddd xmm11, xmm15 1007 pxor xmm4, xmm8 1008 pxor xmm5, xmm9 1009 pxor xmm6, xmm10 1010 pxor xmm7, xmm11 1011 movdqa xmmword ptr [rsp+0x100], xmm8 1012 movdqa xmm8, xmm4 1013 psrld xmm8, 12 1014 pslld xmm4, 20 1015 por xmm4, xmm8 1016 movdqa xmm8, xmm5 1017 psrld xmm8, 12 1018 pslld xmm5, 20 1019 por xmm5, xmm8 1020 movdqa xmm8, xmm6 1021 psrld xmm8, 12 1022 pslld xmm6, 20 1023 por xmm6, xmm8 1024 movdqa xmm8, xmm7 1025 psrld xmm8, 12 1026 pslld xmm7, 20 1027 por xmm7, xmm8 1028 paddd xmm0, xmmword ptr [rsp+0xD0] 1029 paddd xmm1, xmmword ptr [rsp+0xB0] 1030 paddd xmm2, xmmword ptr [rsp+0xA0] 1031 paddd xmm3, xmmword ptr [rsp+0x80] 1032 paddd xmm0, xmm4 1033 paddd xmm1, xmm5 1034 paddd xmm2, xmm6 1035 paddd xmm3, xmm7 1036 pxor xmm12, xmm0 1037 pxor xmm13, xmm1 1038 pxor xmm14, xmm2 1039 pxor xmm15, xmm3 1040 movdqa xmm8, xmm12 1041 psrld xmm12, 8 1042 pslld xmm8, 24 1043 pxor xmm12, xmm8 1044 movdqa xmm8, xmm13 1045 psrld xmm13, 8 1046 pslld xmm8, 24 1047 pxor xmm13, xmm8 1048 movdqa xmm8, xmm14 1049 psrld xmm14, 8 1050 pslld xmm8, 24 1051 pxor xmm14, xmm8 1052 movdqa xmm8, xmm15 1053 psrld xmm15, 8 1054 pslld xmm8, 24 1055 pxor xmm15, xmm8 1056 movdqa xmm8, xmmword ptr [rsp+0x100] 1057 paddd xmm8, xmm12 1058 paddd xmm9, xmm13 1059 paddd xmm10, xmm14 1060 paddd xmm11, xmm15 1061 pxor xmm4, xmm8 1062 pxor xmm5, xmm9 1063 pxor xmm6, xmm10 1064 pxor xmm7, xmm11 1065 movdqa xmmword ptr [rsp+0x100], xmm8 1066 movdqa xmm8, xmm4 1067 psrld xmm8, 7 1068 pslld xmm4, 25 1069 por xmm4, xmm8 1070 movdqa xmm8, xmm5 1071 psrld xmm8, 7 1072 pslld xmm5, 25 1073 por xmm5, xmm8 1074 movdqa xmm8, xmm6 1075 psrld xmm8, 7 1076 pslld xmm6, 25 1077 por xmm6, xmm8 1078 movdqa xmm8, xmm7 1079 psrld xmm8, 7 1080 pslld xmm7, 25 1081 por xmm7, xmm8 1082 paddd xmm0, xmmword ptr [rsp+0x70] 1083 paddd xmm1, xmmword ptr [rsp+0x50] 1084 paddd xmm2, xmmword ptr [rsp] 1085 paddd xmm3, xmmword ptr [rsp+0x60] 1086 paddd xmm0, xmm5 1087 paddd xmm1, xmm6 1088 paddd xmm2, xmm7 1089 paddd xmm3, xmm4 1090 pxor xmm15, xmm0 1091 pxor xmm12, xmm1 1092 pxor xmm13, xmm2 1093 pxor xmm14, xmm3 1094 pshuflw xmm15, xmm15, 0xB1 1095 pshufhw xmm15, xmm15, 0xB1 1096 pshuflw xmm12, xmm12, 0xB1 1097 pshufhw xmm12, xmm12, 0xB1 1098 pshuflw xmm13, xmm13, 0xB1 1099 pshufhw xmm13, xmm13, 0xB1 1100 pshuflw xmm14, xmm14, 0xB1 1101 pshufhw xmm14, xmm14, 0xB1 1102 paddd xmm10, xmm15 1103 paddd xmm11, xmm12 1104 movdqa xmm8, xmmword ptr [rsp+0x100] 1105 paddd xmm8, xmm13 1106 paddd xmm9, xmm14 1107 pxor xmm5, xmm10 1108 pxor xmm6, xmm11 1109 pxor xmm7, xmm8 1110 pxor xmm4, xmm9 1111 movdqa xmmword ptr [rsp+0x100], xmm8 1112 movdqa xmm8, xmm5 1113 psrld xmm8, 12 1114 pslld xmm5, 20 1115 por xmm5, xmm8 1116 movdqa xmm8, xmm6 1117 psrld xmm8, 12 1118 pslld xmm6, 20 1119 por xmm6, xmm8 1120 movdqa xmm8, xmm7 1121 psrld xmm8, 12 1122 pslld xmm7, 20 1123 por xmm7, xmm8 1124 movdqa xmm8, xmm4 1125 psrld xmm8, 12 1126 pslld xmm4, 20 1127 por xmm4, xmm8 1128 paddd xmm0, xmmword ptr [rsp+0x20] 1129 paddd xmm1, xmmword ptr [rsp+0x30] 1130 paddd xmm2, xmmword ptr [rsp+0x10] 1131 paddd xmm3, xmmword ptr [rsp+0x40] 1132 paddd xmm0, xmm5 1133 paddd xmm1, xmm6 1134 paddd xmm2, xmm7 1135 paddd xmm3, xmm4 1136 pxor xmm15, xmm0 1137 pxor xmm12, xmm1 1138 pxor xmm13, xmm2 1139 pxor xmm14, xmm3 1140 movdqa xmm8, xmm15 1141 psrld xmm15, 8 1142 pslld xmm8, 24 1143 pxor xmm15, xmm8 1144 movdqa xmm8, xmm12 1145 psrld xmm12, 8 1146 pslld xmm8, 24 1147 pxor xmm12, xmm8 1148 movdqa xmm8, xmm13 1149 psrld xmm13, 8 1150 pslld xmm8, 24 1151 pxor xmm13, xmm8 1152 movdqa xmm8, xmm14 1153 psrld xmm14, 8 1154 pslld xmm8, 24 1155 pxor xmm14, xmm8 1156 paddd xmm10, xmm15 1157 paddd xmm11, xmm12 1158 movdqa xmm8, xmmword ptr [rsp+0x100] 1159 paddd xmm8, xmm13 1160 paddd xmm9, xmm14 1161 pxor xmm5, xmm10 1162 pxor xmm6, xmm11 1163 pxor xmm7, xmm8 1164 pxor xmm4, xmm9 1165 movdqa xmmword ptr [rsp+0x100], xmm8 1166 movdqa xmm8, xmm5 1167 psrld xmm8, 7 1168 pslld xmm5, 25 1169 por xmm5, xmm8 1170 movdqa xmm8, xmm6 1171 psrld xmm8, 7 1172 pslld xmm6, 25 1173 por xmm6, xmm8 1174 movdqa xmm8, xmm7 1175 psrld xmm8, 7 1176 pslld xmm7, 25 1177 por xmm7, xmm8 1178 movdqa xmm8, xmm4 1179 psrld xmm8, 7 1180 pslld xmm4, 25 1181 por xmm4, xmm8 1182 paddd xmm0, xmmword ptr [rsp+0x90] 1183 paddd xmm1, xmmword ptr [rsp+0xB0] 1184 paddd xmm2, xmmword ptr [rsp+0x80] 1185 paddd xmm3, xmmword ptr [rsp+0xF0] 1186 paddd xmm0, xmm4 1187 paddd xmm1, xmm5 1188 paddd xmm2, xmm6 1189 paddd xmm3, xmm7 1190 pxor xmm12, xmm0 1191 pxor xmm13, xmm1 1192 pxor xmm14, xmm2 1193 pxor xmm15, xmm3 1194 pshuflw xmm12, xmm12, 0xB1 1195 pshufhw xmm12, xmm12, 0xB1 1196 pshuflw xmm13, xmm13, 0xB1 1197 pshufhw xmm13, xmm13, 0xB1 1198 pshuflw xmm14, xmm14, 0xB1 1199 pshufhw xmm14, xmm14, 0xB1 1200 pshuflw xmm15, xmm15, 0xB1 1201 pshufhw xmm15, xmm15, 0xB1 1202 movdqa xmm8, xmmword ptr [rsp+0x100] 1203 paddd xmm8, xmm12 1204 paddd xmm9, xmm13 1205 paddd xmm10, xmm14 1206 paddd xmm11, xmm15 1207 pxor xmm4, xmm8 1208 pxor xmm5, xmm9 1209 pxor xmm6, xmm10 1210 pxor xmm7, xmm11 1211 movdqa xmmword ptr [rsp+0x100], xmm8 1212 movdqa xmm8, xmm4 1213 psrld xmm8, 12 1214 pslld xmm4, 20 1215 por xmm4, xmm8 1216 movdqa xmm8, xmm5 1217 psrld xmm8, 12 1218 pslld xmm5, 20 1219 por xmm5, xmm8 1220 movdqa xmm8, xmm6 1221 psrld xmm8, 12 1222 pslld xmm6, 20 1223 por xmm6, xmm8 1224 movdqa xmm8, xmm7 1225 psrld xmm8, 12 1226 pslld xmm7, 20 1227 por xmm7, xmm8 1228 paddd xmm0, xmmword ptr [rsp+0xE0] 1229 paddd xmm1, xmmword ptr [rsp+0x50] 1230 paddd xmm2, xmmword ptr [rsp+0xC0] 1231 paddd xmm3, xmmword ptr [rsp+0x10] 1232 paddd xmm0, xmm4 1233 paddd xmm1, xmm5 1234 paddd xmm2, xmm6 1235 paddd xmm3, xmm7 1236 pxor xmm12, xmm0 1237 pxor xmm13, xmm1 1238 pxor xmm14, xmm2 1239 pxor xmm15, xmm3 1240 movdqa xmm8, xmm12 1241 psrld xmm12, 8 1242 pslld xmm8, 24 1243 pxor xmm12, xmm8 1244 movdqa xmm8, xmm13 1245 psrld xmm13, 8 1246 pslld xmm8, 24 1247 pxor xmm13, xmm8 1248 movdqa xmm8, xmm14 1249 psrld xmm14, 8 1250 pslld xmm8, 24 1251 pxor xmm14, xmm8 1252 movdqa xmm8, xmm15 1253 psrld xmm15, 8 1254 pslld xmm8, 24 1255 pxor xmm15, xmm8 1256 movdqa xmm8, xmmword ptr [rsp+0x100] 1257 paddd xmm8, xmm12 1258 paddd xmm9, xmm13 1259 paddd xmm10, xmm14 1260 paddd xmm11, xmm15 1261 pxor xmm4, xmm8 1262 pxor xmm5, xmm9 1263 pxor xmm6, xmm10 1264 pxor xmm7, xmm11 1265 movdqa xmmword ptr [rsp+0x100], xmm8 1266 movdqa xmm8, xmm4 1267 psrld xmm8, 7 1268 pslld xmm4, 25 1269 por xmm4, xmm8 1270 movdqa xmm8, xmm5 1271 psrld xmm8, 7 1272 pslld xmm5, 25 1273 por xmm5, xmm8 1274 movdqa xmm8, xmm6 1275 psrld xmm8, 7 1276 pslld xmm6, 25 1277 por xmm6, xmm8 1278 movdqa xmm8, xmm7 1279 psrld xmm8, 7 1280 pslld xmm7, 25 1281 por xmm7, xmm8 1282 paddd xmm0, xmmword ptr [rsp+0xD0] 1283 paddd xmm1, xmmword ptr [rsp] 1284 paddd xmm2, xmmword ptr [rsp+0x20] 1285 paddd xmm3, xmmword ptr [rsp+0x40] 1286 paddd xmm0, xmm5 1287 paddd xmm1, xmm6 1288 paddd xmm2, xmm7 1289 paddd xmm3, xmm4 1290 pxor xmm15, xmm0 1291 pxor xmm12, xmm1 1292 pxor xmm13, xmm2 1293 pxor xmm14, xmm3 1294 pshuflw xmm15, xmm15, 0xB1 1295 pshufhw xmm15, xmm15, 0xB1 1296 pshuflw xmm12, xmm12, 0xB1 1297 pshufhw xmm12, xmm12, 0xB1 1298 pshuflw xmm13, xmm13, 0xB1 1299 pshufhw xmm13, xmm13, 0xB1 1300 pshuflw xmm14, xmm14, 0xB1 1301 pshufhw xmm14, xmm14, 0xB1 1302 paddd xmm10, xmm15 1303 paddd xmm11, xmm12 1304 movdqa xmm8, xmmword ptr [rsp+0x100] 1305 paddd xmm8, xmm13 1306 paddd xmm9, xmm14 1307 pxor xmm5, xmm10 1308 pxor xmm6, xmm11 1309 pxor xmm7, xmm8 1310 pxor xmm4, xmm9 1311 movdqa xmmword ptr [rsp+0x100], xmm8 1312 movdqa xmm8, xmm5 1313 psrld xmm8, 12 1314 pslld xmm5, 20 1315 por xmm5, xmm8 1316 movdqa xmm8, xmm6 1317 psrld xmm8, 12 1318 pslld xmm6, 20 1319 por xmm6, xmm8 1320 movdqa xmm8, xmm7 1321 psrld xmm8, 12 1322 pslld xmm7, 20 1323 por xmm7, xmm8 1324 movdqa xmm8, xmm4 1325 psrld xmm8, 12 1326 pslld xmm4, 20 1327 por xmm4, xmm8 1328 paddd xmm0, xmmword ptr [rsp+0x30] 1329 paddd xmm1, xmmword ptr [rsp+0xA0] 1330 paddd xmm2, xmmword ptr [rsp+0x60] 1331 paddd xmm3, xmmword ptr [rsp+0x70] 1332 paddd xmm0, xmm5 1333 paddd xmm1, xmm6 1334 paddd xmm2, xmm7 1335 paddd xmm3, xmm4 1336 pxor xmm15, xmm0 1337 pxor xmm12, xmm1 1338 pxor xmm13, xmm2 1339 pxor xmm14, xmm3 1340 movdqa xmm8, xmm15 1341 psrld xmm15, 8 1342 pslld xmm8, 24 1343 pxor xmm15, xmm8 1344 movdqa xmm8, xmm12 1345 psrld xmm12, 8 1346 pslld xmm8, 24 1347 pxor xmm12, xmm8 1348 movdqa xmm8, xmm13 1349 psrld xmm13, 8 1350 pslld xmm8, 24 1351 pxor xmm13, xmm8 1352 movdqa xmm8, xmm14 1353 psrld xmm14, 8 1354 pslld xmm8, 24 1355 pxor xmm14, xmm8 1356 paddd xmm10, xmm15 1357 paddd xmm11, xmm12 1358 movdqa xmm8, xmmword ptr [rsp+0x100] 1359 paddd xmm8, xmm13 1360 paddd xmm9, xmm14 1361 pxor xmm5, xmm10 1362 pxor xmm6, xmm11 1363 pxor xmm7, xmm8 1364 pxor xmm4, xmm9 1365 movdqa xmmword ptr [rsp+0x100], xmm8 1366 movdqa xmm8, xmm5 1367 psrld xmm8, 7 1368 pslld xmm5, 25 1369 por xmm5, xmm8 1370 movdqa xmm8, xmm6 1371 psrld xmm8, 7 1372 pslld xmm6, 25 1373 por xmm6, xmm8 1374 movdqa xmm8, xmm7 1375 psrld xmm8, 7 1376 pslld xmm7, 25 1377 por xmm7, xmm8 1378 movdqa xmm8, xmm4 1379 psrld xmm8, 7 1380 pslld xmm4, 25 1381 por xmm4, xmm8 1382 paddd xmm0, xmmword ptr [rsp+0xB0] 1383 paddd xmm1, xmmword ptr [rsp+0x50] 1384 paddd xmm2, xmmword ptr [rsp+0x10] 1385 paddd xmm3, xmmword ptr [rsp+0x80] 1386 paddd xmm0, xmm4 1387 paddd xmm1, xmm5 1388 paddd xmm2, xmm6 1389 paddd xmm3, xmm7 1390 pxor xmm12, xmm0 1391 pxor xmm13, xmm1 1392 pxor xmm14, xmm2 1393 pxor xmm15, xmm3 1394 pshuflw xmm12, xmm12, 0xB1 1395 pshufhw xmm12, xmm12, 0xB1 1396 pshuflw xmm13, xmm13, 0xB1 1397 pshufhw xmm13, xmm13, 0xB1 1398 pshuflw xmm14, xmm14, 0xB1 1399 pshufhw xmm14, xmm14, 0xB1 1400 pshuflw xmm15, xmm15, 0xB1 1401 pshufhw xmm15, xmm15, 0xB1 1402 movdqa xmm8, xmmword ptr [rsp+0x100] 1403 paddd xmm8, xmm12 1404 paddd xmm9, xmm13 1405 paddd xmm10, xmm14 1406 paddd xmm11, xmm15 1407 pxor xmm4, xmm8 1408 pxor xmm5, xmm9 1409 pxor xmm6, xmm10 1410 pxor xmm7, xmm11 1411 movdqa xmmword ptr [rsp+0x100], xmm8 1412 movdqa xmm8, xmm4 1413 psrld xmm8, 12 1414 pslld xmm4, 20 1415 por xmm4, xmm8 1416 movdqa xmm8, xmm5 1417 psrld xmm8, 12 1418 pslld xmm5, 20 1419 por xmm5, xmm8 1420 movdqa xmm8, xmm6 1421 psrld xmm8, 12 1422 pslld xmm6, 20 1423 por xmm6, xmm8 1424 movdqa xmm8, xmm7 1425 psrld xmm8, 12 1426 pslld xmm7, 20 1427 por xmm7, xmm8 1428 paddd xmm0, xmmword ptr [rsp+0xF0] 1429 paddd xmm1, xmmword ptr [rsp] 1430 paddd xmm2, xmmword ptr [rsp+0x90] 1431 paddd xmm3, xmmword ptr [rsp+0x60] 1432 paddd xmm0, xmm4 1433 paddd xmm1, xmm5 1434 paddd xmm2, xmm6 1435 paddd xmm3, xmm7 1436 pxor xmm12, xmm0 1437 pxor xmm13, xmm1 1438 pxor xmm14, xmm2 1439 pxor xmm15, xmm3 1440 movdqa xmm8, xmm12 1441 psrld xmm12, 8 1442 pslld xmm8, 24 1443 pxor xmm12, xmm8 1444 movdqa xmm8, xmm13 1445 psrld xmm13, 8 1446 pslld xmm8, 24 1447 pxor xmm13, xmm8 1448 movdqa xmm8, xmm14 1449 psrld xmm14, 8 1450 pslld xmm8, 24 1451 pxor xmm14, xmm8 1452 movdqa xmm8, xmm15 1453 psrld xmm15, 8 1454 pslld xmm8, 24 1455 pxor xmm15, xmm8 1456 movdqa xmm8, xmmword ptr [rsp+0x100] 1457 paddd xmm8, xmm12 1458 paddd xmm9, xmm13 1459 paddd xmm10, xmm14 1460 paddd xmm11, xmm15 1461 pxor xmm4, xmm8 1462 pxor xmm5, xmm9 1463 pxor xmm6, xmm10 1464 pxor xmm7, xmm11 1465 movdqa xmmword ptr [rsp+0x100], xmm8 1466 movdqa xmm8, xmm4 1467 psrld xmm8, 7 1468 pslld xmm4, 25 1469 por xmm4, xmm8 1470 movdqa xmm8, xmm5 1471 psrld xmm8, 7 1472 pslld xmm5, 25 1473 por xmm5, xmm8 1474 movdqa xmm8, xmm6 1475 psrld xmm8, 7 1476 pslld xmm6, 25 1477 por xmm6, xmm8 1478 movdqa xmm8, xmm7 1479 psrld xmm8, 7 1480 pslld xmm7, 25 1481 por xmm7, xmm8 1482 paddd xmm0, xmmword ptr [rsp+0xE0] 1483 paddd xmm1, xmmword ptr [rsp+0x20] 1484 paddd xmm2, xmmword ptr [rsp+0x30] 1485 paddd xmm3, xmmword ptr [rsp+0x70] 1486 paddd xmm0, xmm5 1487 paddd xmm1, xmm6 1488 paddd xmm2, xmm7 1489 paddd xmm3, xmm4 1490 pxor xmm15, xmm0 1491 pxor xmm12, xmm1 1492 pxor xmm13, xmm2 1493 pxor xmm14, xmm3 1494 pshuflw xmm15, xmm15, 0xB1 1495 pshufhw xmm15, xmm15, 0xB1 1496 pshuflw xmm12, xmm12, 0xB1 1497 pshufhw xmm12, xmm12, 0xB1 1498 pshuflw xmm13, xmm13, 0xB1 1499 pshufhw xmm13, xmm13, 0xB1 1500 pshuflw xmm14, xmm14, 0xB1 1501 pshufhw xmm14, xmm14, 0xB1 1502 paddd xmm10, xmm15 1503 paddd xmm11, xmm12 1504 movdqa xmm8, xmmword ptr [rsp+0x100] 1505 paddd xmm8, xmm13 1506 paddd xmm9, xmm14 1507 pxor xmm5, xmm10 1508 pxor xmm6, xmm11 1509 pxor xmm7, xmm8 1510 pxor xmm4, xmm9 1511 movdqa xmmword ptr [rsp+0x100], xmm8 1512 movdqa xmm8, xmm5 1513 psrld xmm8, 12 1514 pslld xmm5, 20 1515 por xmm5, xmm8 1516 movdqa xmm8, xmm6 1517 psrld xmm8, 12 1518 pslld xmm6, 20 1519 por xmm6, xmm8 1520 movdqa xmm8, xmm7 1521 psrld xmm8, 12 1522 pslld xmm7, 20 1523 por xmm7, xmm8 1524 movdqa xmm8, xmm4 1525 psrld xmm8, 12 1526 pslld xmm4, 20 1527 por xmm4, xmm8 1528 paddd xmm0, xmmword ptr [rsp+0xA0] 1529 paddd xmm1, xmmword ptr [rsp+0xC0] 1530 paddd xmm2, xmmword ptr [rsp+0x40] 1531 paddd xmm3, xmmword ptr [rsp+0xD0] 1532 paddd xmm0, xmm5 1533 paddd xmm1, xmm6 1534 paddd xmm2, xmm7 1535 paddd xmm3, xmm4 1536 pxor xmm15, xmm0 1537 pxor xmm12, xmm1 1538 pxor xmm13, xmm2 1539 pxor xmm14, xmm3 1540 movdqa xmm8, xmm15 1541 psrld xmm15, 8 1542 pslld xmm8, 24 1543 pxor xmm15, xmm8 1544 movdqa xmm8, xmm12 1545 psrld xmm12, 8 1546 pslld xmm8, 24 1547 pxor xmm12, xmm8 1548 movdqa xmm8, xmm13 1549 psrld xmm13, 8 1550 pslld xmm8, 24 1551 pxor xmm13, xmm8 1552 movdqa xmm8, xmm14 1553 psrld xmm14, 8 1554 pslld xmm8, 24 1555 pxor xmm14, xmm8 1556 paddd xmm10, xmm15 1557 paddd xmm11, xmm12 1558 movdqa xmm8, xmmword ptr [rsp+0x100] 1559 paddd xmm8, xmm13 1560 paddd xmm9, xmm14 1561 pxor xmm5, xmm10 1562 pxor xmm6, xmm11 1563 pxor xmm7, xmm8 1564 pxor xmm4, xmm9 1565 pxor xmm0, xmm8 1566 pxor xmm1, xmm9 1567 pxor xmm2, xmm10 1568 pxor xmm3, xmm11 1569 movdqa xmm8, xmm5 1570 psrld xmm8, 7 1571 pslld xmm5, 25 1572 por xmm5, xmm8 1573 movdqa xmm8, xmm6 1574 psrld xmm8, 7 1575 pslld xmm6, 25 1576 por xmm6, xmm8 1577 movdqa xmm8, xmm7 1578 psrld xmm8, 7 1579 pslld xmm7, 25 1580 por xmm7, xmm8 1581 movdqa xmm8, xmm4 1582 psrld xmm8, 7 1583 pslld xmm4, 25 1584 por xmm4, xmm8 1585 pxor xmm4, xmm12 1586 pxor xmm5, xmm13 1587 pxor xmm6, xmm14 1588 pxor xmm7, xmm15 1589 mov eax, r13d 1590 jne 9b 1591 movdqa xmm9, xmm0 1592 punpckldq xmm0, xmm1 1593 punpckhdq xmm9, xmm1 1594 movdqa xmm11, xmm2 1595 punpckldq xmm2, xmm3 1596 punpckhdq xmm11, xmm3 1597 movdqa xmm1, xmm0 1598 punpcklqdq xmm0, xmm2 1599 punpckhqdq xmm1, xmm2 1600 movdqa xmm3, xmm9 1601 punpcklqdq xmm9, xmm11 1602 punpckhqdq xmm3, xmm11 1603 movdqu xmmword ptr [rbx], xmm0 1604 movdqu xmmword ptr [rbx+0x20], xmm1 1605 movdqu xmmword ptr [rbx+0x40], xmm9 1606 movdqu xmmword ptr [rbx+0x60], xmm3 1607 movdqa xmm9, xmm4 1608 punpckldq xmm4, xmm5 1609 punpckhdq xmm9, xmm5 1610 movdqa xmm11, xmm6 1611 punpckldq xmm6, xmm7 1612 punpckhdq xmm11, xmm7 1613 movdqa xmm5, xmm4 1614 punpcklqdq xmm4, xmm6 1615 punpckhqdq xmm5, xmm6 1616 movdqa xmm7, xmm9 1617 punpcklqdq xmm9, xmm11 1618 punpckhqdq xmm7, xmm11 1619 movdqu xmmword ptr [rbx+0x10], xmm4 1620 movdqu xmmword ptr [rbx+0x30], xmm5 1621 movdqu xmmword ptr [rbx+0x50], xmm9 1622 movdqu xmmword ptr [rbx+0x70], xmm7 1623 movdqa xmm1, xmmword ptr [rsp+0x110] 1624 movdqa xmm0, xmm1 1625 paddd xmm1, xmmword ptr [rsp+0x150] 1626 movdqa xmmword ptr [rsp+0x110], xmm1 1627 pxor xmm0, xmmword ptr [CMP_MSB_MASK+rip] 1628 pxor xmm1, xmmword ptr [CMP_MSB_MASK+rip] 1629 pcmpgtd xmm0, xmm1 1630 movdqa xmm1, xmmword ptr [rsp+0x120] 1631 psubd xmm1, xmm0 1632 movdqa xmmword ptr [rsp+0x120], xmm1 1633 add rbx, 128 1634 add rdi, 32 1635 sub rsi, 4 1636 cmp rsi, 4 1637 jnc 2b 1638 test rsi, rsi 1639 jne 3f 1640 4: 1641 movdqa xmm6, xmmword ptr [rsp+0x170] 1642 movdqa xmm7, xmmword ptr [rsp+0x180] 1643 movdqa xmm8, xmmword ptr [rsp+0x190] 1644 movdqa xmm9, xmmword ptr [rsp+0x1A0] 1645 movdqa xmm10, xmmword ptr [rsp+0x1B0] 1646 movdqa xmm11, xmmword ptr [rsp+0x1C0] 1647 movdqa xmm12, xmmword ptr [rsp+0x1D0] 1648 movdqa xmm13, xmmword ptr [rsp+0x1E0] 1649 movdqa xmm14, xmmword ptr [rsp+0x1F0] 1650 movdqa xmm15, xmmword ptr [rsp+0x200] 1651 mov rsp, rbp 1652 pop rbp 1653 pop rbx 1654 pop rdi 1655 pop rsi 1656 pop r12 1657 pop r13 1658 pop r14 1659 pop r15 1660 ret 1661 .p2align 5 1662 3: 1663 test esi, 0x2 1664 je 3f 1665 movups xmm0, xmmword ptr [rcx] 1666 movups xmm1, xmmword ptr [rcx+0x10] 1667 movaps xmm8, xmm0 1668 movaps xmm9, xmm1 1669 movd xmm13, dword ptr [rsp+0x110] 1670 movd xmm14, dword ptr [rsp+0x120] 1671 punpckldq xmm13, xmm14 1672 movaps xmmword ptr [rsp], xmm13 1673 movd xmm14, dword ptr [rsp+0x114] 1674 movd xmm13, dword ptr [rsp+0x124] 1675 punpckldq xmm14, xmm13 1676 movaps xmmword ptr [rsp+0x10], xmm14 1677 mov r8, qword ptr [rdi] 1678 mov r9, qword ptr [rdi+0x8] 1679 movzx eax, byte ptr [rbp+0x80] 1680 or eax, r13d 1681 xor edx, edx 1682 2: 1683 mov r14d, eax 1684 or eax, r12d 1685 add rdx, 64 1686 cmp rdx, r15 1687 cmovne eax, r14d 1688 movaps xmm2, xmmword ptr [BLAKE3_IV+rip] 1689 movaps xmm10, xmm2 1690 movups xmm4, xmmword ptr [r8+rdx-0x40] 1691 movups xmm5, xmmword ptr [r8+rdx-0x30] 1692 movaps xmm3, xmm4 1693 shufps xmm4, xmm5, 136 1694 shufps xmm3, xmm5, 221 1695 movaps xmm5, xmm3 1696 movups xmm6, xmmword ptr [r8+rdx-0x20] 1697 movups xmm7, xmmword ptr [r8+rdx-0x10] 1698 movaps xmm3, xmm6 1699 shufps xmm6, xmm7, 136 1700 pshufd xmm6, xmm6, 0x93 1701 shufps xmm3, xmm7, 221 1702 pshufd xmm7, xmm3, 0x93 1703 movups xmm12, xmmword ptr [r9+rdx-0x40] 1704 movups xmm13, xmmword ptr [r9+rdx-0x30] 1705 movaps xmm11, xmm12 1706 shufps xmm12, xmm13, 136 1707 shufps xmm11, xmm13, 221 1708 movaps xmm13, xmm11 1709 movups xmm14, xmmword ptr [r9+rdx-0x20] 1710 movups xmm15, xmmword ptr [r9+rdx-0x10] 1711 movaps xmm11, xmm14 1712 shufps xmm14, xmm15, 136 1713 pshufd xmm14, xmm14, 0x93 1714 shufps xmm11, xmm15, 221 1715 pshufd xmm15, xmm11, 0x93 1716 shl rax, 0x20 1717 or rax, 0x40 1718 movd xmm3, rax 1719 movdqa xmmword ptr [rsp+0x20], xmm3 1720 movaps xmm3, xmmword ptr [rsp] 1721 movaps xmm11, xmmword ptr [rsp+0x10] 1722 punpcklqdq xmm3, xmmword ptr [rsp+0x20] 1723 punpcklqdq xmm11, xmmword ptr [rsp+0x20] 1724 mov al, 7 1725 9: 1726 paddd xmm0, xmm4 1727 paddd xmm8, xmm12 1728 movaps xmmword ptr [rsp+0x20], xmm4 1729 movaps xmmword ptr [rsp+0x30], xmm12 1730 paddd xmm0, xmm1 1731 paddd xmm8, xmm9 1732 pxor xmm3, xmm0 1733 pxor xmm11, xmm8 1734 pshuflw xmm3, xmm3, 0xB1 1735 pshufhw xmm3, xmm3, 0xB1 1736 pshuflw xmm11, xmm11, 0xB1 1737 pshufhw xmm11, xmm11, 0xB1 1738 paddd xmm2, xmm3 1739 paddd xmm10, xmm11 1740 pxor xmm1, xmm2 1741 pxor xmm9, xmm10 1742 movdqa xmm4, xmm1 1743 pslld xmm1, 20 1744 psrld xmm4, 12 1745 por xmm1, xmm4 1746 movdqa xmm4, xmm9 1747 pslld xmm9, 20 1748 psrld xmm4, 12 1749 por xmm9, xmm4 1750 paddd xmm0, xmm5 1751 paddd xmm8, xmm13 1752 movaps xmmword ptr [rsp+0x40], xmm5 1753 movaps xmmword ptr [rsp+0x50], xmm13 1754 paddd xmm0, xmm1 1755 paddd xmm8, xmm9 1756 pxor xmm3, xmm0 1757 pxor xmm11, xmm8 1758 movdqa xmm13, xmm3 1759 psrld xmm3, 8 1760 pslld xmm13, 24 1761 pxor xmm3, xmm13 1762 movdqa xmm13, xmm11 1763 psrld xmm11, 8 1764 pslld xmm13, 24 1765 pxor xmm11, xmm13 1766 paddd xmm2, xmm3 1767 paddd xmm10, xmm11 1768 pxor xmm1, xmm2 1769 pxor xmm9, xmm10 1770 movdqa xmm4, xmm1 1771 pslld xmm1, 25 1772 psrld xmm4, 7 1773 por xmm1, xmm4 1774 movdqa xmm4, xmm9 1775 pslld xmm9, 25 1776 psrld xmm4, 7 1777 por xmm9, xmm4 1778 pshufd xmm0, xmm0, 0x93 1779 pshufd xmm8, xmm8, 0x93 1780 pshufd xmm3, xmm3, 0x4E 1781 pshufd xmm11, xmm11, 0x4E 1782 pshufd xmm2, xmm2, 0x39 1783 pshufd xmm10, xmm10, 0x39 1784 paddd xmm0, xmm6 1785 paddd xmm8, xmm14 1786 paddd xmm0, xmm1 1787 paddd xmm8, xmm9 1788 pxor xmm3, xmm0 1789 pxor xmm11, xmm8 1790 pshuflw xmm3, xmm3, 0xB1 1791 pshufhw xmm3, xmm3, 0xB1 1792 pshuflw xmm11, xmm11, 0xB1 1793 pshufhw xmm11, xmm11, 0xB1 1794 paddd xmm2, xmm3 1795 paddd xmm10, xmm11 1796 pxor xmm1, xmm2 1797 pxor xmm9, xmm10 1798 movdqa xmm4, xmm1 1799 pslld xmm1, 20 1800 psrld xmm4, 12 1801 por xmm1, xmm4 1802 movdqa xmm4, xmm9 1803 pslld xmm9, 20 1804 psrld xmm4, 12 1805 por xmm9, xmm4 1806 paddd xmm0, xmm7 1807 paddd xmm8, xmm15 1808 paddd xmm0, xmm1 1809 paddd xmm8, xmm9 1810 pxor xmm3, xmm0 1811 pxor xmm11, xmm8 1812 movdqa xmm13, xmm3 1813 psrld xmm3, 8 1814 pslld xmm13, 24 1815 pxor xmm3, xmm13 1816 movdqa xmm13, xmm11 1817 psrld xmm11, 8 1818 pslld xmm13, 24 1819 pxor xmm11, xmm13 1820 paddd xmm2, xmm3 1821 paddd xmm10, xmm11 1822 pxor xmm1, xmm2 1823 pxor xmm9, xmm10 1824 movdqa xmm4, xmm1 1825 pslld xmm1, 25 1826 psrld xmm4, 7 1827 por xmm1, xmm4 1828 movdqa xmm4, xmm9 1829 pslld xmm9, 25 1830 psrld xmm4, 7 1831 por xmm9, xmm4 1832 pshufd xmm0, xmm0, 0x39 1833 pshufd xmm8, xmm8, 0x39 1834 pshufd xmm3, xmm3, 0x4E 1835 pshufd xmm11, xmm11, 0x4E 1836 pshufd xmm2, xmm2, 0x93 1837 pshufd xmm10, xmm10, 0x93 1838 dec al 1839 je 9f 1840 movdqa xmm12, xmmword ptr [rsp+0x20] 1841 movdqa xmm5, xmmword ptr [rsp+0x40] 1842 pshufd xmm13, xmm12, 0x0F 1843 shufps xmm12, xmm5, 214 1844 pshufd xmm4, xmm12, 0x39 1845 movdqa xmm12, xmm6 1846 shufps xmm12, xmm7, 250 1847 pand xmm13, xmmword ptr [PBLENDW_0x33_MASK+rip] 1848 pand xmm12, xmmword ptr [PBLENDW_0xCC_MASK+rip] 1849 por xmm13, xmm12 1850 movdqa xmmword ptr [rsp+0x20], xmm13 1851 movdqa xmm12, xmm7 1852 punpcklqdq xmm12, xmm5 1853 movdqa xmm13, xmm6 1854 pand xmm12, xmmword ptr [PBLENDW_0x3F_MASK+rip] 1855 pand xmm13, xmmword ptr [PBLENDW_0xC0_MASK+rip] 1856 por xmm12, xmm13 1857 pshufd xmm12, xmm12, 0x78 1858 punpckhdq xmm5, xmm7 1859 punpckldq xmm6, xmm5 1860 pshufd xmm7, xmm6, 0x1E 1861 movdqa xmmword ptr [rsp+0x40], xmm12 1862 movdqa xmm5, xmmword ptr [rsp+0x30] 1863 movdqa xmm13, xmmword ptr [rsp+0x50] 1864 pshufd xmm6, xmm5, 0x0F 1865 shufps xmm5, xmm13, 214 1866 pshufd xmm12, xmm5, 0x39 1867 movdqa xmm5, xmm14 1868 shufps xmm5, xmm15, 250 1869 pand xmm6, xmmword ptr [PBLENDW_0x33_MASK+rip] 1870 pand xmm5, xmmword ptr [PBLENDW_0xCC_MASK+rip] 1871 por xmm6, xmm5 1872 movdqa xmm5, xmm15 1873 punpcklqdq xmm5, xmm13 1874 movdqa xmmword ptr [rsp+0x30], xmm2 1875 movdqa xmm2, xmm14 1876 pand xmm5, xmmword ptr [PBLENDW_0x3F_MASK+rip] 1877 pand xmm2, xmmword ptr [PBLENDW_0xC0_MASK+rip] 1878 por xmm5, xmm2 1879 movdqa xmm2, xmmword ptr [rsp+0x30] 1880 pshufd xmm5, xmm5, 0x78 1881 punpckhdq xmm13, xmm15 1882 punpckldq xmm14, xmm13 1883 pshufd xmm15, xmm14, 0x1E 1884 movdqa xmm13, xmm6 1885 movdqa xmm14, xmm5 1886 movdqa xmm5, xmmword ptr [rsp+0x20] 1887 movdqa xmm6, xmmword ptr [rsp+0x40] 1888 jmp 9b 1889 9: 1890 pxor xmm0, xmm2 1891 pxor xmm1, xmm3 1892 pxor xmm8, xmm10 1893 pxor xmm9, xmm11 1894 mov eax, r13d 1895 cmp rdx, r15 1896 jne 2b 1897 movups xmmword ptr [rbx], xmm0 1898 movups xmmword ptr [rbx+0x10], xmm1 1899 movups xmmword ptr [rbx+0x20], xmm8 1900 movups xmmword ptr [rbx+0x30], xmm9 1901 mov eax, dword ptr [rsp+0x130] 1902 neg eax 1903 mov r10d, dword ptr [rsp+0x110+8*rax] 1904 mov r11d, dword ptr [rsp+0x120+8*rax] 1905 mov dword ptr [rsp+0x110], r10d 1906 mov dword ptr [rsp+0x120], r11d 1907 add rdi, 16 1908 add rbx, 64 1909 sub rsi, 2 1910 3: 1911 test esi, 0x1 1912 je 4b 1913 movups xmm0, xmmword ptr [rcx] 1914 movups xmm1, xmmword ptr [rcx+0x10] 1915 movd xmm13, dword ptr [rsp+0x110] 1916 movd xmm14, dword ptr [rsp+0x120] 1917 punpckldq xmm13, xmm14 1918 mov r8, qword ptr [rdi] 1919 movzx eax, byte ptr [rbp+0x80] 1920 or eax, r13d 1921 xor edx, edx 1922 2: 1923 mov r14d, eax 1924 or eax, r12d 1925 add rdx, 64 1926 cmp rdx, r15 1927 cmovne eax, r14d 1928 movaps xmm2, xmmword ptr [BLAKE3_IV+rip] 1929 shl rax, 32 1930 or rax, 64 1931 movd xmm12, rax 1932 movdqa xmm3, xmm13 1933 punpcklqdq xmm3, xmm12 1934 movups xmm4, xmmword ptr [r8+rdx-0x40] 1935 movups xmm5, xmmword ptr [r8+rdx-0x30] 1936 movaps xmm8, xmm4 1937 shufps xmm4, xmm5, 136 1938 shufps xmm8, xmm5, 221 1939 movaps xmm5, xmm8 1940 movups xmm6, xmmword ptr [r8+rdx-0x20] 1941 movups xmm7, xmmword ptr [r8+rdx-0x10] 1942 movaps xmm8, xmm6 1943 shufps xmm6, xmm7, 136 1944 pshufd xmm6, xmm6, 0x93 1945 shufps xmm8, xmm7, 221 1946 pshufd xmm7, xmm8, 0x93 1947 mov al, 7 1948 9: 1949 paddd xmm0, xmm4 1950 paddd xmm0, xmm1 1951 pxor xmm3, xmm0 1952 pshuflw xmm3, xmm3, 0xB1 1953 pshufhw xmm3, xmm3, 0xB1 1954 paddd xmm2, xmm3 1955 pxor xmm1, xmm2 1956 movdqa xmm11, xmm1 1957 pslld xmm1, 20 1958 psrld xmm11, 12 1959 por xmm1, xmm11 1960 paddd xmm0, xmm5 1961 paddd xmm0, xmm1 1962 pxor xmm3, xmm0 1963 movdqa xmm14, xmm3 1964 psrld xmm3, 8 1965 pslld xmm14, 24 1966 pxor xmm3, xmm14 1967 paddd xmm2, xmm3 1968 pxor xmm1, xmm2 1969 movdqa xmm11, xmm1 1970 pslld xmm1, 25 1971 psrld xmm11, 7 1972 por xmm1, xmm11 1973 pshufd xmm0, xmm0, 0x93 1974 pshufd xmm3, xmm3, 0x4E 1975 pshufd xmm2, xmm2, 0x39 1976 paddd xmm0, xmm6 1977 paddd xmm0, xmm1 1978 pxor xmm3, xmm0 1979 pshuflw xmm3, xmm3, 0xB1 1980 pshufhw xmm3, xmm3, 0xB1 1981 paddd xmm2, xmm3 1982 pxor xmm1, xmm2 1983 movdqa xmm11, xmm1 1984 pslld xmm1, 20 1985 psrld xmm11, 12 1986 por xmm1, xmm11 1987 paddd xmm0, xmm7 1988 paddd xmm0, xmm1 1989 pxor xmm3, xmm0 1990 movdqa xmm14, xmm3 1991 psrld xmm3, 8 1992 pslld xmm14, 24 1993 pxor xmm3, xmm14 1994 paddd xmm2, xmm3 1995 pxor xmm1, xmm2 1996 movdqa xmm11, xmm1 1997 pslld xmm1, 25 1998 psrld xmm11, 7 1999 por xmm1, xmm11 2000 pshufd xmm0, xmm0, 0x39 2001 pshufd xmm3, xmm3, 0x4E 2002 pshufd xmm2, xmm2, 0x93 2003 dec al 2004 jz 9f 2005 movdqa xmm8, xmm4 2006 shufps xmm8, xmm5, 214 2007 pshufd xmm9, xmm4, 0x0F 2008 pshufd xmm4, xmm8, 0x39 2009 movdqa xmm8, xmm6 2010 shufps xmm8, xmm7, 250 2011 pand xmm9, xmmword ptr [PBLENDW_0x33_MASK+rip] 2012 pand xmm8, xmmword ptr [PBLENDW_0xCC_MASK+rip] 2013 por xmm9, xmm8 2014 movdqa xmm8, xmm7 2015 punpcklqdq xmm8, xmm5 2016 movdqa xmm10, xmm6 2017 pand xmm8, xmmword ptr [PBLENDW_0x3F_MASK+rip] 2018 pand xmm10, xmmword ptr [PBLENDW_0xC0_MASK+rip] 2019 por xmm8, xmm10 2020 pshufd xmm8, xmm8, 0x78 2021 punpckhdq xmm5, xmm7 2022 punpckldq xmm6, xmm5 2023 pshufd xmm7, xmm6, 0x1E 2024 movdqa xmm5, xmm9 2025 movdqa xmm6, xmm8 2026 jmp 9b 2027 9: 2028 pxor xmm0, xmm2 2029 pxor xmm1, xmm3 2030 mov eax, r13d 2031 cmp rdx, r15 2032 jne 2b 2033 movups xmmword ptr [rbx], xmm0 2034 movups xmmword ptr [rbx+0x10], xmm1 2035 jmp 4b 2036 2037 .p2align 6 2038 blake3_compress_in_place_sse2: 2039 _blake3_compress_in_place_sse2: 2040 sub rsp, 120 2041 movdqa xmmword ptr [rsp], xmm6 2042 movdqa xmmword ptr [rsp+0x10], xmm7 2043 movdqa xmmword ptr [rsp+0x20], xmm8 2044 movdqa xmmword ptr [rsp+0x30], xmm9 2045 movdqa xmmword ptr [rsp+0x40], xmm11 2046 movdqa xmmword ptr [rsp+0x50], xmm14 2047 movdqa xmmword ptr [rsp+0x60], xmm15 2048 movups xmm0, xmmword ptr [rcx] 2049 movups xmm1, xmmword ptr [rcx+0x10] 2050 movaps xmm2, xmmword ptr [BLAKE3_IV+rip] 2051 movzx eax, byte ptr [rsp+0xA0] 2052 movzx r8d, r8b 2053 shl rax, 32 2054 add r8, rax 2055 movq xmm3, r9 2056 movq xmm4, r8 2057 punpcklqdq xmm3, xmm4 2058 movups xmm4, xmmword ptr [rdx] 2059 movups xmm5, xmmword ptr [rdx+0x10] 2060 movaps xmm8, xmm4 2061 shufps xmm4, xmm5, 136 2062 shufps xmm8, xmm5, 221 2063 movaps xmm5, xmm8 2064 movups xmm6, xmmword ptr [rdx+0x20] 2065 movups xmm7, xmmword ptr [rdx+0x30] 2066 movaps xmm8, xmm6 2067 shufps xmm6, xmm7, 136 2068 pshufd xmm6, xmm6, 0x93 2069 shufps xmm8, xmm7, 221 2070 pshufd xmm7, xmm8, 0x93 2071 mov al, 7 2072 9: 2073 paddd xmm0, xmm4 2074 paddd xmm0, xmm1 2075 pxor xmm3, xmm0 2076 pshuflw xmm3, xmm3, 0xB1 2077 pshufhw xmm3, xmm3, 0xB1 2078 paddd xmm2, xmm3 2079 pxor xmm1, xmm2 2080 movdqa xmm11, xmm1 2081 pslld xmm1, 20 2082 psrld xmm11, 12 2083 por xmm1, xmm11 2084 paddd xmm0, xmm5 2085 paddd xmm0, xmm1 2086 pxor xmm3, xmm0 2087 movdqa xmm14, xmm3 2088 psrld xmm3, 8 2089 pslld xmm14, 24 2090 pxor xmm3, xmm14 2091 paddd xmm2, xmm3 2092 pxor xmm1, xmm2 2093 movdqa xmm11, xmm1 2094 pslld xmm1, 25 2095 psrld xmm11, 7 2096 por xmm1, xmm11 2097 pshufd xmm0, xmm0, 0x93 2098 pshufd xmm3, xmm3, 0x4E 2099 pshufd xmm2, xmm2, 0x39 2100 paddd xmm0, xmm6 2101 paddd xmm0, xmm1 2102 pxor xmm3, xmm0 2103 pshuflw xmm3, xmm3, 0xB1 2104 pshufhw xmm3, xmm3, 0xB1 2105 paddd xmm2, xmm3 2106 pxor xmm1, xmm2 2107 movdqa xmm11, xmm1 2108 pslld xmm1, 20 2109 psrld xmm11, 12 2110 por xmm1, xmm11 2111 paddd xmm0, xmm7 2112 paddd xmm0, xmm1 2113 pxor xmm3, xmm0 2114 movdqa xmm14, xmm3 2115 psrld xmm3, 8 2116 pslld xmm14, 24 2117 pxor xmm3, xmm14 2118 paddd xmm2, xmm3 2119 pxor xmm1, xmm2 2120 movdqa xmm11, xmm1 2121 pslld xmm1, 25 2122 psrld xmm11, 7 2123 por xmm1, xmm11 2124 pshufd xmm0, xmm0, 0x39 2125 pshufd xmm3, xmm3, 0x4E 2126 pshufd xmm2, xmm2, 0x93 2127 dec al 2128 jz 9f 2129 movdqa xmm8, xmm4 2130 shufps xmm8, xmm5, 214 2131 pshufd xmm9, xmm4, 0x0F 2132 pshufd xmm4, xmm8, 0x39 2133 movdqa xmm8, xmm6 2134 shufps xmm8, xmm7, 250 2135 pand xmm9, xmmword ptr [PBLENDW_0x33_MASK+rip] 2136 pand xmm8, xmmword ptr [PBLENDW_0xCC_MASK+rip] 2137 por xmm9, xmm8 2138 movdqa xmm8, xmm7 2139 punpcklqdq xmm8, xmm5 2140 movdqa xmm10, xmm6 2141 pand xmm8, xmmword ptr [PBLENDW_0x3F_MASK+rip] 2142 pand xmm10, xmmword ptr [PBLENDW_0xC0_MASK+rip] 2143 por xmm8, xmm10 2144 pshufd xmm8, xmm8, 0x78 2145 punpckhdq xmm5, xmm7 2146 punpckldq xmm6, xmm5 2147 pshufd xmm7, xmm6, 0x1E 2148 movdqa xmm5, xmm9 2149 movdqa xmm6, xmm8 2150 jmp 9b 2151 9: 2152 pxor xmm0, xmm2 2153 pxor xmm1, xmm3 2154 movups xmmword ptr [rcx], xmm0 2155 movups xmmword ptr [rcx+0x10], xmm1 2156 movdqa xmm6, xmmword ptr [rsp] 2157 movdqa xmm7, xmmword ptr [rsp+0x10] 2158 movdqa xmm8, xmmword ptr [rsp+0x20] 2159 movdqa xmm9, xmmword ptr [rsp+0x30] 2160 movdqa xmm11, xmmword ptr [rsp+0x40] 2161 movdqa xmm14, xmmword ptr [rsp+0x50] 2162 movdqa xmm15, xmmword ptr [rsp+0x60] 2163 add rsp, 120 2164 ret 2165 2166 2167 .p2align 6 2168 _blake3_compress_xof_sse2: 2169 blake3_compress_xof_sse2: 2170 sub rsp, 120 2171 movdqa xmmword ptr [rsp], xmm6 2172 movdqa xmmword ptr [rsp+0x10], xmm7 2173 movdqa xmmword ptr [rsp+0x20], xmm8 2174 movdqa xmmword ptr [rsp+0x30], xmm9 2175 movdqa xmmword ptr [rsp+0x40], xmm11 2176 movdqa xmmword ptr [rsp+0x50], xmm14 2177 movdqa xmmword ptr [rsp+0x60], xmm15 2178 movups xmm0, xmmword ptr [rcx] 2179 movups xmm1, xmmword ptr [rcx+0x10] 2180 movaps xmm2, xmmword ptr [BLAKE3_IV+rip] 2181 movzx eax, byte ptr [rsp+0xA0] 2182 movzx r8d, r8b 2183 mov r10, qword ptr [rsp+0xA8] 2184 shl rax, 32 2185 add r8, rax 2186 movq xmm3, r9 2187 movq xmm4, r8 2188 punpcklqdq xmm3, xmm4 2189 movups xmm4, xmmword ptr [rdx] 2190 movups xmm5, xmmword ptr [rdx+0x10] 2191 movaps xmm8, xmm4 2192 shufps xmm4, xmm5, 136 2193 shufps xmm8, xmm5, 221 2194 movaps xmm5, xmm8 2195 movups xmm6, xmmword ptr [rdx+0x20] 2196 movups xmm7, xmmword ptr [rdx+0x30] 2197 movaps xmm8, xmm6 2198 shufps xmm6, xmm7, 136 2199 pshufd xmm6, xmm6, 0x93 2200 shufps xmm8, xmm7, 221 2201 pshufd xmm7, xmm8, 0x93 2202 mov al, 7 2203 9: 2204 paddd xmm0, xmm4 2205 paddd xmm0, xmm1 2206 pxor xmm3, xmm0 2207 pshuflw xmm3, xmm3, 0xB1 2208 pshufhw xmm3, xmm3, 0xB1 2209 paddd xmm2, xmm3 2210 pxor xmm1, xmm2 2211 movdqa xmm11, xmm1 2212 pslld xmm1, 20 2213 psrld xmm11, 12 2214 por xmm1, xmm11 2215 paddd xmm0, xmm5 2216 paddd xmm0, xmm1 2217 pxor xmm3, xmm0 2218 movdqa xmm14, xmm3 2219 psrld xmm3, 8 2220 pslld xmm14, 24 2221 pxor xmm3, xmm14 2222 paddd xmm2, xmm3 2223 pxor xmm1, xmm2 2224 movdqa xmm11, xmm1 2225 pslld xmm1, 25 2226 psrld xmm11, 7 2227 por xmm1, xmm11 2228 pshufd xmm0, xmm0, 0x93 2229 pshufd xmm3, xmm3, 0x4E 2230 pshufd xmm2, xmm2, 0x39 2231 paddd xmm0, xmm6 2232 paddd xmm0, xmm1 2233 pxor xmm3, xmm0 2234 pshuflw xmm3, xmm3, 0xB1 2235 pshufhw xmm3, xmm3, 0xB1 2236 paddd xmm2, xmm3 2237 pxor xmm1, xmm2 2238 movdqa xmm11, xmm1 2239 pslld xmm1, 20 2240 psrld xmm11, 12 2241 por xmm1, xmm11 2242 paddd xmm0, xmm7 2243 paddd xmm0, xmm1 2244 pxor xmm3, xmm0 2245 movdqa xmm14, xmm3 2246 psrld xmm3, 8 2247 pslld xmm14, 24 2248 pxor xmm3, xmm14 2249 paddd xmm2, xmm3 2250 pxor xmm1, xmm2 2251 movdqa xmm11, xmm1 2252 pslld xmm1, 25 2253 psrld xmm11, 7 2254 por xmm1, xmm11 2255 pshufd xmm0, xmm0, 0x39 2256 pshufd xmm3, xmm3, 0x4E 2257 pshufd xmm2, xmm2, 0x93 2258 dec al 2259 jz 9f 2260 movdqa xmm8, xmm4 2261 shufps xmm8, xmm5, 214 2262 pshufd xmm9, xmm4, 0x0F 2263 pshufd xmm4, xmm8, 0x39 2264 movdqa xmm8, xmm6 2265 shufps xmm8, xmm7, 250 2266 pand xmm9, xmmword ptr [PBLENDW_0x33_MASK+rip] 2267 pand xmm8, xmmword ptr [PBLENDW_0xCC_MASK+rip] 2268 por xmm9, xmm8 2269 movdqa xmm8, xmm7 2270 punpcklqdq xmm8, xmm5 2271 movdqa xmm10, xmm6 2272 pand xmm8, xmmword ptr [PBLENDW_0x3F_MASK+rip] 2273 pand xmm10, xmmword ptr [PBLENDW_0xC0_MASK+rip] 2274 por xmm8, xmm10 2275 pshufd xmm8, xmm8, 0x78 2276 punpckhdq xmm5, xmm7 2277 punpckldq xmm6, xmm5 2278 pshufd xmm7, xmm6, 0x1E 2279 movdqa xmm5, xmm9 2280 movdqa xmm6, xmm8 2281 jmp 9b 2282 9: 2283 movdqu xmm4, xmmword ptr [rcx] 2284 movdqu xmm5, xmmword ptr [rcx+0x10] 2285 pxor xmm0, xmm2 2286 pxor xmm1, xmm3 2287 pxor xmm2, xmm4 2288 pxor xmm3, xmm5 2289 movups xmmword ptr [r10], xmm0 2290 movups xmmword ptr [r10+0x10], xmm1 2291 movups xmmword ptr [r10+0x20], xmm2 2292 movups xmmword ptr [r10+0x30], xmm3 2293 movdqa xmm6, xmmword ptr [rsp] 2294 movdqa xmm7, xmmword ptr [rsp+0x10] 2295 movdqa xmm8, xmmword ptr [rsp+0x20] 2296 movdqa xmm9, xmmword ptr [rsp+0x30] 2297 movdqa xmm11, xmmword ptr [rsp+0x40] 2298 movdqa xmm14, xmmword ptr [rsp+0x50] 2299 movdqa xmm15, xmmword ptr [rsp+0x60] 2300 add rsp, 120 2301 ret 2302 2303 2304 .section .rodata 2305 .p2align 6 2306 BLAKE3_IV: 2307 .long 0x6A09E667, 0xBB67AE85 2308 .long 0x3C6EF372, 0xA54FF53A 2309 ADD0: 2310 .long 0, 1, 2, 3 2311 ADD1: 2312 .long 4, 4, 4, 4 2313 BLAKE3_IV_0: 2314 .long 0x6A09E667, 0x6A09E667, 0x6A09E667, 0x6A09E667 2315 BLAKE3_IV_1: 2316 .long 0xBB67AE85, 0xBB67AE85, 0xBB67AE85, 0xBB67AE85 2317 BLAKE3_IV_2: 2318 .long 0x3C6EF372, 0x3C6EF372, 0x3C6EF372, 0x3C6EF372 2319 BLAKE3_IV_3: 2320 .long 0xA54FF53A, 0xA54FF53A, 0xA54FF53A, 0xA54FF53A 2321 BLAKE3_BLOCK_LEN: 2322 .long 64, 64, 64, 64 2323 CMP_MSB_MASK: 2324 .long 0x80000000, 0x80000000, 0x80000000, 0x80000000 2325 PBLENDW_0x33_MASK: 2326 .long 0xFFFFFFFF, 0x00000000, 0xFFFFFFFF, 0x00000000 2327 PBLENDW_0xCC_MASK: 2328 .long 0x00000000, 0xFFFFFFFF, 0x00000000, 0xFFFFFFFF 2329 PBLENDW_0x3F_MASK: 2330 .long 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0x00000000 2331 PBLENDW_0xC0_MASK: 2332 .long 0x00000000, 0x00000000, 0x00000000, 0xFFFFFFFF