blake3_sse41_x86-64_windows_gnu.S (63460B)
1 .intel_syntax noprefix 2 .global blake3_hash_many_sse41 3 .global _blake3_hash_many_sse41 4 .global blake3_compress_in_place_sse41 5 .global _blake3_compress_in_place_sse41 6 .global blake3_compress_xof_sse41 7 .global _blake3_compress_xof_sse41 8 .section .text 9 .p2align 6 10 _blake3_hash_many_sse41: 11 blake3_hash_many_sse41: 12 push r15 13 push r14 14 push r13 15 push r12 16 push rsi 17 push rdi 18 push rbx 19 push rbp 20 mov rbp, rsp 21 sub rsp, 528 22 and rsp, 0xFFFFFFFFFFFFFFC0 23 movdqa xmmword ptr [rsp+0x170], xmm6 24 movdqa xmmword ptr [rsp+0x180], xmm7 25 movdqa xmmword ptr [rsp+0x190], xmm8 26 movdqa xmmword ptr [rsp+0x1A0], xmm9 27 movdqa xmmword ptr [rsp+0x1B0], xmm10 28 movdqa xmmword ptr [rsp+0x1C0], xmm11 29 movdqa xmmword ptr [rsp+0x1D0], xmm12 30 movdqa xmmword ptr [rsp+0x1E0], xmm13 31 movdqa xmmword ptr [rsp+0x1F0], xmm14 32 movdqa xmmword ptr [rsp+0x200], xmm15 33 mov rdi, rcx 34 mov rsi, rdx 35 mov rdx, r8 36 mov rcx, r9 37 mov r8, qword ptr [rbp+0x68] 38 movzx r9, byte ptr [rbp+0x70] 39 neg r9d 40 movd xmm0, r9d 41 pshufd xmm0, xmm0, 0x00 42 movdqa xmmword ptr [rsp+0x130], xmm0 43 movdqa xmm1, xmm0 44 pand xmm1, xmmword ptr [ADD0+rip] 45 pand xmm0, xmmword ptr [ADD1+rip] 46 movdqa xmmword ptr [rsp+0x150], xmm0 47 movd xmm0, r8d 48 pshufd xmm0, xmm0, 0x00 49 paddd xmm0, xmm1 50 movdqa xmmword ptr [rsp+0x110], xmm0 51 pxor xmm0, xmmword ptr [CMP_MSB_MASK+rip] 52 pxor xmm1, xmmword ptr [CMP_MSB_MASK+rip] 53 pcmpgtd xmm1, xmm0 54 shr r8, 32 55 movd xmm2, r8d 56 pshufd xmm2, xmm2, 0x00 57 psubd xmm2, xmm1 58 movdqa xmmword ptr [rsp+0x120], xmm2 59 mov rbx, qword ptr [rbp+0x90] 60 mov r15, rdx 61 shl r15, 6 62 movzx r13d, byte ptr [rbp+0x78] 63 movzx r12d, byte ptr [rbp+0x88] 64 cmp rsi, 4 65 jc 3f 66 2: 67 movdqu xmm3, xmmword ptr [rcx] 68 pshufd xmm0, xmm3, 0x00 69 pshufd xmm1, xmm3, 0x55 70 pshufd xmm2, xmm3, 0xAA 71 pshufd xmm3, xmm3, 0xFF 72 movdqu xmm7, xmmword ptr [rcx+0x10] 73 pshufd xmm4, xmm7, 0x00 74 pshufd xmm5, xmm7, 0x55 75 pshufd xmm6, xmm7, 0xAA 76 pshufd xmm7, xmm7, 0xFF 77 mov r8, qword ptr [rdi] 78 mov r9, qword ptr [rdi+0x8] 79 mov r10, qword ptr [rdi+0x10] 80 mov r11, qword ptr [rdi+0x18] 81 movzx eax, byte ptr [rbp+0x80] 82 or eax, r13d 83 xor edx, edx 84 9: 85 mov r14d, eax 86 or eax, r12d 87 add rdx, 64 88 cmp rdx, r15 89 cmovne eax, r14d 90 movdqu xmm8, xmmword ptr [r8+rdx-0x40] 91 movdqu xmm9, xmmword ptr [r9+rdx-0x40] 92 movdqu xmm10, xmmword ptr [r10+rdx-0x40] 93 movdqu xmm11, xmmword ptr [r11+rdx-0x40] 94 movdqa xmm12, xmm8 95 punpckldq xmm8, xmm9 96 punpckhdq xmm12, xmm9 97 movdqa xmm14, xmm10 98 punpckldq xmm10, xmm11 99 punpckhdq xmm14, xmm11 100 movdqa xmm9, xmm8 101 punpcklqdq xmm8, xmm10 102 punpckhqdq xmm9, xmm10 103 movdqa xmm13, xmm12 104 punpcklqdq xmm12, xmm14 105 punpckhqdq xmm13, xmm14 106 movdqa xmmword ptr [rsp], xmm8 107 movdqa xmmword ptr [rsp+0x10], xmm9 108 movdqa xmmword ptr [rsp+0x20], xmm12 109 movdqa xmmword ptr [rsp+0x30], xmm13 110 movdqu xmm8, xmmword ptr [r8+rdx-0x30] 111 movdqu xmm9, xmmword ptr [r9+rdx-0x30] 112 movdqu xmm10, xmmword ptr [r10+rdx-0x30] 113 movdqu xmm11, xmmword ptr [r11+rdx-0x30] 114 movdqa xmm12, xmm8 115 punpckldq xmm8, xmm9 116 punpckhdq xmm12, xmm9 117 movdqa xmm14, xmm10 118 punpckldq xmm10, xmm11 119 punpckhdq xmm14, xmm11 120 movdqa xmm9, xmm8 121 punpcklqdq xmm8, xmm10 122 punpckhqdq xmm9, xmm10 123 movdqa xmm13, xmm12 124 punpcklqdq xmm12, xmm14 125 punpckhqdq xmm13, xmm14 126 movdqa xmmword ptr [rsp+0x40], xmm8 127 movdqa xmmword ptr [rsp+0x50], xmm9 128 movdqa xmmword ptr [rsp+0x60], xmm12 129 movdqa xmmword ptr [rsp+0x70], xmm13 130 movdqu xmm8, xmmword ptr [r8+rdx-0x20] 131 movdqu xmm9, xmmword ptr [r9+rdx-0x20] 132 movdqu xmm10, xmmword ptr [r10+rdx-0x20] 133 movdqu xmm11, xmmword ptr [r11+rdx-0x20] 134 movdqa xmm12, xmm8 135 punpckldq xmm8, xmm9 136 punpckhdq xmm12, xmm9 137 movdqa xmm14, xmm10 138 punpckldq xmm10, xmm11 139 punpckhdq xmm14, xmm11 140 movdqa xmm9, xmm8 141 punpcklqdq xmm8, xmm10 142 punpckhqdq xmm9, xmm10 143 movdqa xmm13, xmm12 144 punpcklqdq xmm12, xmm14 145 punpckhqdq xmm13, xmm14 146 movdqa xmmword ptr [rsp+0x80], xmm8 147 movdqa xmmword ptr [rsp+0x90], xmm9 148 movdqa xmmword ptr [rsp+0xA0], xmm12 149 movdqa xmmword ptr [rsp+0xB0], xmm13 150 movdqu xmm8, xmmword ptr [r8+rdx-0x10] 151 movdqu xmm9, xmmword ptr [r9+rdx-0x10] 152 movdqu xmm10, xmmword ptr [r10+rdx-0x10] 153 movdqu xmm11, xmmword ptr [r11+rdx-0x10] 154 movdqa xmm12, xmm8 155 punpckldq xmm8, xmm9 156 punpckhdq xmm12, xmm9 157 movdqa xmm14, xmm10 158 punpckldq xmm10, xmm11 159 punpckhdq xmm14, xmm11 160 movdqa xmm9, xmm8 161 punpcklqdq xmm8, xmm10 162 punpckhqdq xmm9, xmm10 163 movdqa xmm13, xmm12 164 punpcklqdq xmm12, xmm14 165 punpckhqdq xmm13, xmm14 166 movdqa xmmword ptr [rsp+0xC0], xmm8 167 movdqa xmmword ptr [rsp+0xD0], xmm9 168 movdqa xmmword ptr [rsp+0xE0], xmm12 169 movdqa xmmword ptr [rsp+0xF0], xmm13 170 movdqa xmm9, xmmword ptr [BLAKE3_IV_1+rip] 171 movdqa xmm10, xmmword ptr [BLAKE3_IV_2+rip] 172 movdqa xmm11, xmmword ptr [BLAKE3_IV_3+rip] 173 movdqa xmm12, xmmword ptr [rsp+0x110] 174 movdqa xmm13, xmmword ptr [rsp+0x120] 175 movdqa xmm14, xmmword ptr [BLAKE3_BLOCK_LEN+rip] 176 movd xmm15, eax 177 pshufd xmm15, xmm15, 0x00 178 prefetcht0 [r8+rdx+0x80] 179 prefetcht0 [r9+rdx+0x80] 180 prefetcht0 [r10+rdx+0x80] 181 prefetcht0 [r11+rdx+0x80] 182 paddd xmm0, xmmword ptr [rsp] 183 paddd xmm1, xmmword ptr [rsp+0x20] 184 paddd xmm2, xmmword ptr [rsp+0x40] 185 paddd xmm3, xmmword ptr [rsp+0x60] 186 paddd xmm0, xmm4 187 paddd xmm1, xmm5 188 paddd xmm2, xmm6 189 paddd xmm3, xmm7 190 pxor xmm12, xmm0 191 pxor xmm13, xmm1 192 pxor xmm14, xmm2 193 pxor xmm15, xmm3 194 movdqa xmm8, xmmword ptr [ROT16+rip] 195 pshufb xmm12, xmm8 196 pshufb xmm13, xmm8 197 pshufb xmm14, xmm8 198 pshufb xmm15, xmm8 199 movdqa xmm8, xmmword ptr [BLAKE3_IV_0+rip] 200 paddd xmm8, xmm12 201 paddd xmm9, xmm13 202 paddd xmm10, xmm14 203 paddd xmm11, xmm15 204 pxor xmm4, xmm8 205 pxor xmm5, xmm9 206 pxor xmm6, xmm10 207 pxor xmm7, xmm11 208 movdqa xmmword ptr [rsp+0x100], xmm8 209 movdqa xmm8, xmm4 210 psrld xmm8, 12 211 pslld xmm4, 20 212 por xmm4, xmm8 213 movdqa xmm8, xmm5 214 psrld xmm8, 12 215 pslld xmm5, 20 216 por xmm5, xmm8 217 movdqa xmm8, xmm6 218 psrld xmm8, 12 219 pslld xmm6, 20 220 por xmm6, xmm8 221 movdqa xmm8, xmm7 222 psrld xmm8, 12 223 pslld xmm7, 20 224 por xmm7, xmm8 225 paddd xmm0, xmmword ptr [rsp+0x10] 226 paddd xmm1, xmmword ptr [rsp+0x30] 227 paddd xmm2, xmmword ptr [rsp+0x50] 228 paddd xmm3, xmmword ptr [rsp+0x70] 229 paddd xmm0, xmm4 230 paddd xmm1, xmm5 231 paddd xmm2, xmm6 232 paddd xmm3, xmm7 233 pxor xmm12, xmm0 234 pxor xmm13, xmm1 235 pxor xmm14, xmm2 236 pxor xmm15, xmm3 237 movdqa xmm8, xmmword ptr [ROT8+rip] 238 pshufb xmm12, xmm8 239 pshufb xmm13, xmm8 240 pshufb xmm14, xmm8 241 pshufb xmm15, xmm8 242 movdqa xmm8, xmmword ptr [rsp+0x100] 243 paddd xmm8, xmm12 244 paddd xmm9, xmm13 245 paddd xmm10, xmm14 246 paddd xmm11, xmm15 247 pxor xmm4, xmm8 248 pxor xmm5, xmm9 249 pxor xmm6, xmm10 250 pxor xmm7, xmm11 251 movdqa xmmword ptr [rsp+0x100], xmm8 252 movdqa xmm8, xmm4 253 psrld xmm8, 7 254 pslld xmm4, 25 255 por xmm4, xmm8 256 movdqa xmm8, xmm5 257 psrld xmm8, 7 258 pslld xmm5, 25 259 por xmm5, xmm8 260 movdqa xmm8, xmm6 261 psrld xmm8, 7 262 pslld xmm6, 25 263 por xmm6, xmm8 264 movdqa xmm8, xmm7 265 psrld xmm8, 7 266 pslld xmm7, 25 267 por xmm7, xmm8 268 paddd xmm0, xmmword ptr [rsp+0x80] 269 paddd xmm1, xmmword ptr [rsp+0xA0] 270 paddd xmm2, xmmword ptr [rsp+0xC0] 271 paddd xmm3, xmmword ptr [rsp+0xE0] 272 paddd xmm0, xmm5 273 paddd xmm1, xmm6 274 paddd xmm2, xmm7 275 paddd xmm3, xmm4 276 pxor xmm15, xmm0 277 pxor xmm12, xmm1 278 pxor xmm13, xmm2 279 pxor xmm14, xmm3 280 movdqa xmm8, xmmword ptr [ROT16+rip] 281 pshufb xmm15, xmm8 282 pshufb xmm12, xmm8 283 pshufb xmm13, xmm8 284 pshufb xmm14, xmm8 285 paddd xmm10, xmm15 286 paddd xmm11, xmm12 287 movdqa xmm8, xmmword ptr [rsp+0x100] 288 paddd xmm8, xmm13 289 paddd xmm9, xmm14 290 pxor xmm5, xmm10 291 pxor xmm6, xmm11 292 pxor xmm7, xmm8 293 pxor xmm4, xmm9 294 movdqa xmmword ptr [rsp+0x100], xmm8 295 movdqa xmm8, xmm5 296 psrld xmm8, 12 297 pslld xmm5, 20 298 por xmm5, xmm8 299 movdqa xmm8, xmm6 300 psrld xmm8, 12 301 pslld xmm6, 20 302 por xmm6, xmm8 303 movdqa xmm8, xmm7 304 psrld xmm8, 12 305 pslld xmm7, 20 306 por xmm7, xmm8 307 movdqa xmm8, xmm4 308 psrld xmm8, 12 309 pslld xmm4, 20 310 por xmm4, xmm8 311 paddd xmm0, xmmword ptr [rsp+0x90] 312 paddd xmm1, xmmword ptr [rsp+0xB0] 313 paddd xmm2, xmmword ptr [rsp+0xD0] 314 paddd xmm3, xmmword ptr [rsp+0xF0] 315 paddd xmm0, xmm5 316 paddd xmm1, xmm6 317 paddd xmm2, xmm7 318 paddd xmm3, xmm4 319 pxor xmm15, xmm0 320 pxor xmm12, xmm1 321 pxor xmm13, xmm2 322 pxor xmm14, xmm3 323 movdqa xmm8, xmmword ptr [ROT8+rip] 324 pshufb xmm15, xmm8 325 pshufb xmm12, xmm8 326 pshufb xmm13, xmm8 327 pshufb xmm14, xmm8 328 paddd xmm10, xmm15 329 paddd xmm11, xmm12 330 movdqa xmm8, xmmword ptr [rsp+0x100] 331 paddd xmm8, xmm13 332 paddd xmm9, xmm14 333 pxor xmm5, xmm10 334 pxor xmm6, xmm11 335 pxor xmm7, xmm8 336 pxor xmm4, xmm9 337 movdqa xmmword ptr [rsp+0x100], xmm8 338 movdqa xmm8, xmm5 339 psrld xmm8, 7 340 pslld xmm5, 25 341 por xmm5, xmm8 342 movdqa xmm8, xmm6 343 psrld xmm8, 7 344 pslld xmm6, 25 345 por xmm6, xmm8 346 movdqa xmm8, xmm7 347 psrld xmm8, 7 348 pslld xmm7, 25 349 por xmm7, xmm8 350 movdqa xmm8, xmm4 351 psrld xmm8, 7 352 pslld xmm4, 25 353 por xmm4, xmm8 354 paddd xmm0, xmmword ptr [rsp+0x20] 355 paddd xmm1, xmmword ptr [rsp+0x30] 356 paddd xmm2, xmmword ptr [rsp+0x70] 357 paddd xmm3, xmmword ptr [rsp+0x40] 358 paddd xmm0, xmm4 359 paddd xmm1, xmm5 360 paddd xmm2, xmm6 361 paddd xmm3, xmm7 362 pxor xmm12, xmm0 363 pxor xmm13, xmm1 364 pxor xmm14, xmm2 365 pxor xmm15, xmm3 366 movdqa xmm8, xmmword ptr [ROT16+rip] 367 pshufb xmm12, xmm8 368 pshufb xmm13, xmm8 369 pshufb xmm14, xmm8 370 pshufb xmm15, xmm8 371 movdqa xmm8, xmmword ptr [rsp+0x100] 372 paddd xmm8, xmm12 373 paddd xmm9, xmm13 374 paddd xmm10, xmm14 375 paddd xmm11, xmm15 376 pxor xmm4, xmm8 377 pxor xmm5, xmm9 378 pxor xmm6, xmm10 379 pxor xmm7, xmm11 380 movdqa xmmword ptr [rsp+0x100], xmm8 381 movdqa xmm8, xmm4 382 psrld xmm8, 12 383 pslld xmm4, 20 384 por xmm4, xmm8 385 movdqa xmm8, xmm5 386 psrld xmm8, 12 387 pslld xmm5, 20 388 por xmm5, xmm8 389 movdqa xmm8, xmm6 390 psrld xmm8, 12 391 pslld xmm6, 20 392 por xmm6, xmm8 393 movdqa xmm8, xmm7 394 psrld xmm8, 12 395 pslld xmm7, 20 396 por xmm7, xmm8 397 paddd xmm0, xmmword ptr [rsp+0x60] 398 paddd xmm1, xmmword ptr [rsp+0xA0] 399 paddd xmm2, xmmword ptr [rsp] 400 paddd xmm3, xmmword ptr [rsp+0xD0] 401 paddd xmm0, xmm4 402 paddd xmm1, xmm5 403 paddd xmm2, xmm6 404 paddd xmm3, xmm7 405 pxor xmm12, xmm0 406 pxor xmm13, xmm1 407 pxor xmm14, xmm2 408 pxor xmm15, xmm3 409 movdqa xmm8, xmmword ptr [ROT8+rip] 410 pshufb xmm12, xmm8 411 pshufb xmm13, xmm8 412 pshufb xmm14, xmm8 413 pshufb xmm15, xmm8 414 movdqa xmm8, xmmword ptr [rsp+0x100] 415 paddd xmm8, xmm12 416 paddd xmm9, xmm13 417 paddd xmm10, xmm14 418 paddd xmm11, xmm15 419 pxor xmm4, xmm8 420 pxor xmm5, xmm9 421 pxor xmm6, xmm10 422 pxor xmm7, xmm11 423 movdqa xmmword ptr [rsp+0x100], xmm8 424 movdqa xmm8, xmm4 425 psrld xmm8, 7 426 pslld xmm4, 25 427 por xmm4, xmm8 428 movdqa xmm8, xmm5 429 psrld xmm8, 7 430 pslld xmm5, 25 431 por xmm5, xmm8 432 movdqa xmm8, xmm6 433 psrld xmm8, 7 434 pslld xmm6, 25 435 por xmm6, xmm8 436 movdqa xmm8, xmm7 437 psrld xmm8, 7 438 pslld xmm7, 25 439 por xmm7, xmm8 440 paddd xmm0, xmmword ptr [rsp+0x10] 441 paddd xmm1, xmmword ptr [rsp+0xC0] 442 paddd xmm2, xmmword ptr [rsp+0x90] 443 paddd xmm3, xmmword ptr [rsp+0xF0] 444 paddd xmm0, xmm5 445 paddd xmm1, xmm6 446 paddd xmm2, xmm7 447 paddd xmm3, xmm4 448 pxor xmm15, xmm0 449 pxor xmm12, xmm1 450 pxor xmm13, xmm2 451 pxor xmm14, xmm3 452 movdqa xmm8, xmmword ptr [ROT16+rip] 453 pshufb xmm15, xmm8 454 pshufb xmm12, xmm8 455 pshufb xmm13, xmm8 456 pshufb xmm14, xmm8 457 paddd xmm10, xmm15 458 paddd xmm11, xmm12 459 movdqa xmm8, xmmword ptr [rsp+0x100] 460 paddd xmm8, xmm13 461 paddd xmm9, xmm14 462 pxor xmm5, xmm10 463 pxor xmm6, xmm11 464 pxor xmm7, xmm8 465 pxor xmm4, xmm9 466 movdqa xmmword ptr [rsp+0x100], xmm8 467 movdqa xmm8, xmm5 468 psrld xmm8, 12 469 pslld xmm5, 20 470 por xmm5, xmm8 471 movdqa xmm8, xmm6 472 psrld xmm8, 12 473 pslld xmm6, 20 474 por xmm6, xmm8 475 movdqa xmm8, xmm7 476 psrld xmm8, 12 477 pslld xmm7, 20 478 por xmm7, xmm8 479 movdqa xmm8, xmm4 480 psrld xmm8, 12 481 pslld xmm4, 20 482 por xmm4, xmm8 483 paddd xmm0, xmmword ptr [rsp+0xB0] 484 paddd xmm1, xmmword ptr [rsp+0x50] 485 paddd xmm2, xmmword ptr [rsp+0xE0] 486 paddd xmm3, xmmword ptr [rsp+0x80] 487 paddd xmm0, xmm5 488 paddd xmm1, xmm6 489 paddd xmm2, xmm7 490 paddd xmm3, xmm4 491 pxor xmm15, xmm0 492 pxor xmm12, xmm1 493 pxor xmm13, xmm2 494 pxor xmm14, xmm3 495 movdqa xmm8, xmmword ptr [ROT8+rip] 496 pshufb xmm15, xmm8 497 pshufb xmm12, xmm8 498 pshufb xmm13, xmm8 499 pshufb xmm14, xmm8 500 paddd xmm10, xmm15 501 paddd xmm11, xmm12 502 movdqa xmm8, xmmword ptr [rsp+0x100] 503 paddd xmm8, xmm13 504 paddd xmm9, xmm14 505 pxor xmm5, xmm10 506 pxor xmm6, xmm11 507 pxor xmm7, xmm8 508 pxor xmm4, xmm9 509 movdqa xmmword ptr [rsp+0x100], xmm8 510 movdqa xmm8, xmm5 511 psrld xmm8, 7 512 pslld xmm5, 25 513 por xmm5, xmm8 514 movdqa xmm8, xmm6 515 psrld xmm8, 7 516 pslld xmm6, 25 517 por xmm6, xmm8 518 movdqa xmm8, xmm7 519 psrld xmm8, 7 520 pslld xmm7, 25 521 por xmm7, xmm8 522 movdqa xmm8, xmm4 523 psrld xmm8, 7 524 pslld xmm4, 25 525 por xmm4, xmm8 526 paddd xmm0, xmmword ptr [rsp+0x30] 527 paddd xmm1, xmmword ptr [rsp+0xA0] 528 paddd xmm2, xmmword ptr [rsp+0xD0] 529 paddd xmm3, xmmword ptr [rsp+0x70] 530 paddd xmm0, xmm4 531 paddd xmm1, xmm5 532 paddd xmm2, xmm6 533 paddd xmm3, xmm7 534 pxor xmm12, xmm0 535 pxor xmm13, xmm1 536 pxor xmm14, xmm2 537 pxor xmm15, xmm3 538 movdqa xmm8, xmmword ptr [ROT16+rip] 539 pshufb xmm12, xmm8 540 pshufb xmm13, xmm8 541 pshufb xmm14, xmm8 542 pshufb xmm15, xmm8 543 movdqa xmm8, xmmword ptr [rsp+0x100] 544 paddd xmm8, xmm12 545 paddd xmm9, xmm13 546 paddd xmm10, xmm14 547 paddd xmm11, xmm15 548 pxor xmm4, xmm8 549 pxor xmm5, xmm9 550 pxor xmm6, xmm10 551 pxor xmm7, xmm11 552 movdqa xmmword ptr [rsp+0x100], xmm8 553 movdqa xmm8, xmm4 554 psrld xmm8, 12 555 pslld xmm4, 20 556 por xmm4, xmm8 557 movdqa xmm8, xmm5 558 psrld xmm8, 12 559 pslld xmm5, 20 560 por xmm5, xmm8 561 movdqa xmm8, xmm6 562 psrld xmm8, 12 563 pslld xmm6, 20 564 por xmm6, xmm8 565 movdqa xmm8, xmm7 566 psrld xmm8, 12 567 pslld xmm7, 20 568 por xmm7, xmm8 569 paddd xmm0, xmmword ptr [rsp+0x40] 570 paddd xmm1, xmmword ptr [rsp+0xC0] 571 paddd xmm2, xmmword ptr [rsp+0x20] 572 paddd xmm3, xmmword ptr [rsp+0xE0] 573 paddd xmm0, xmm4 574 paddd xmm1, xmm5 575 paddd xmm2, xmm6 576 paddd xmm3, xmm7 577 pxor xmm12, xmm0 578 pxor xmm13, xmm1 579 pxor xmm14, xmm2 580 pxor xmm15, xmm3 581 movdqa xmm8, xmmword ptr [ROT8+rip] 582 pshufb xmm12, xmm8 583 pshufb xmm13, xmm8 584 pshufb xmm14, xmm8 585 pshufb xmm15, xmm8 586 movdqa xmm8, xmmword ptr [rsp+0x100] 587 paddd xmm8, xmm12 588 paddd xmm9, xmm13 589 paddd xmm10, xmm14 590 paddd xmm11, xmm15 591 pxor xmm4, xmm8 592 pxor xmm5, xmm9 593 pxor xmm6, xmm10 594 pxor xmm7, xmm11 595 movdqa xmmword ptr [rsp+0x100], xmm8 596 movdqa xmm8, xmm4 597 psrld xmm8, 7 598 pslld xmm4, 25 599 por xmm4, xmm8 600 movdqa xmm8, xmm5 601 psrld xmm8, 7 602 pslld xmm5, 25 603 por xmm5, xmm8 604 movdqa xmm8, xmm6 605 psrld xmm8, 7 606 pslld xmm6, 25 607 por xmm6, xmm8 608 movdqa xmm8, xmm7 609 psrld xmm8, 7 610 pslld xmm7, 25 611 por xmm7, xmm8 612 paddd xmm0, xmmword ptr [rsp+0x60] 613 paddd xmm1, xmmword ptr [rsp+0x90] 614 paddd xmm2, xmmword ptr [rsp+0xB0] 615 paddd xmm3, xmmword ptr [rsp+0x80] 616 paddd xmm0, xmm5 617 paddd xmm1, xmm6 618 paddd xmm2, xmm7 619 paddd xmm3, xmm4 620 pxor xmm15, xmm0 621 pxor xmm12, xmm1 622 pxor xmm13, xmm2 623 pxor xmm14, xmm3 624 movdqa xmm8, xmmword ptr [ROT16+rip] 625 pshufb xmm15, xmm8 626 pshufb xmm12, xmm8 627 pshufb xmm13, xmm8 628 pshufb xmm14, xmm8 629 paddd xmm10, xmm15 630 paddd xmm11, xmm12 631 movdqa xmm8, xmmword ptr [rsp+0x100] 632 paddd xmm8, xmm13 633 paddd xmm9, xmm14 634 pxor xmm5, xmm10 635 pxor xmm6, xmm11 636 pxor xmm7, xmm8 637 pxor xmm4, xmm9 638 movdqa xmmword ptr [rsp+0x100], xmm8 639 movdqa xmm8, xmm5 640 psrld xmm8, 12 641 pslld xmm5, 20 642 por xmm5, xmm8 643 movdqa xmm8, xmm6 644 psrld xmm8, 12 645 pslld xmm6, 20 646 por xmm6, xmm8 647 movdqa xmm8, xmm7 648 psrld xmm8, 12 649 pslld xmm7, 20 650 por xmm7, xmm8 651 movdqa xmm8, xmm4 652 psrld xmm8, 12 653 pslld xmm4, 20 654 por xmm4, xmm8 655 paddd xmm0, xmmword ptr [rsp+0x50] 656 paddd xmm1, xmmword ptr [rsp] 657 paddd xmm2, xmmword ptr [rsp+0xF0] 658 paddd xmm3, xmmword ptr [rsp+0x10] 659 paddd xmm0, xmm5 660 paddd xmm1, xmm6 661 paddd xmm2, xmm7 662 paddd xmm3, xmm4 663 pxor xmm15, xmm0 664 pxor xmm12, xmm1 665 pxor xmm13, xmm2 666 pxor xmm14, xmm3 667 movdqa xmm8, xmmword ptr [ROT8+rip] 668 pshufb xmm15, xmm8 669 pshufb xmm12, xmm8 670 pshufb xmm13, xmm8 671 pshufb xmm14, xmm8 672 paddd xmm10, xmm15 673 paddd xmm11, xmm12 674 movdqa xmm8, xmmword ptr [rsp+0x100] 675 paddd xmm8, xmm13 676 paddd xmm9, xmm14 677 pxor xmm5, xmm10 678 pxor xmm6, xmm11 679 pxor xmm7, xmm8 680 pxor xmm4, xmm9 681 movdqa xmmword ptr [rsp+0x100], xmm8 682 movdqa xmm8, xmm5 683 psrld xmm8, 7 684 pslld xmm5, 25 685 por xmm5, xmm8 686 movdqa xmm8, xmm6 687 psrld xmm8, 7 688 pslld xmm6, 25 689 por xmm6, xmm8 690 movdqa xmm8, xmm7 691 psrld xmm8, 7 692 pslld xmm7, 25 693 por xmm7, xmm8 694 movdqa xmm8, xmm4 695 psrld xmm8, 7 696 pslld xmm4, 25 697 por xmm4, xmm8 698 paddd xmm0, xmmword ptr [rsp+0xA0] 699 paddd xmm1, xmmword ptr [rsp+0xC0] 700 paddd xmm2, xmmword ptr [rsp+0xE0] 701 paddd xmm3, xmmword ptr [rsp+0xD0] 702 paddd xmm0, xmm4 703 paddd xmm1, xmm5 704 paddd xmm2, xmm6 705 paddd xmm3, xmm7 706 pxor xmm12, xmm0 707 pxor xmm13, xmm1 708 pxor xmm14, xmm2 709 pxor xmm15, xmm3 710 movdqa xmm8, xmmword ptr [ROT16+rip] 711 pshufb xmm12, xmm8 712 pshufb xmm13, xmm8 713 pshufb xmm14, xmm8 714 pshufb xmm15, xmm8 715 movdqa xmm8, xmmword ptr [rsp+0x100] 716 paddd xmm8, xmm12 717 paddd xmm9, xmm13 718 paddd xmm10, xmm14 719 paddd xmm11, xmm15 720 pxor xmm4, xmm8 721 pxor xmm5, xmm9 722 pxor xmm6, xmm10 723 pxor xmm7, xmm11 724 movdqa xmmword ptr [rsp+0x100], xmm8 725 movdqa xmm8, xmm4 726 psrld xmm8, 12 727 pslld xmm4, 20 728 por xmm4, xmm8 729 movdqa xmm8, xmm5 730 psrld xmm8, 12 731 pslld xmm5, 20 732 por xmm5, xmm8 733 movdqa xmm8, xmm6 734 psrld xmm8, 12 735 pslld xmm6, 20 736 por xmm6, xmm8 737 movdqa xmm8, xmm7 738 psrld xmm8, 12 739 pslld xmm7, 20 740 por xmm7, xmm8 741 paddd xmm0, xmmword ptr [rsp+0x70] 742 paddd xmm1, xmmword ptr [rsp+0x90] 743 paddd xmm2, xmmword ptr [rsp+0x30] 744 paddd xmm3, xmmword ptr [rsp+0xF0] 745 paddd xmm0, xmm4 746 paddd xmm1, xmm5 747 paddd xmm2, xmm6 748 paddd xmm3, xmm7 749 pxor xmm12, xmm0 750 pxor xmm13, xmm1 751 pxor xmm14, xmm2 752 pxor xmm15, xmm3 753 movdqa xmm8, xmmword ptr [ROT8+rip] 754 pshufb xmm12, xmm8 755 pshufb xmm13, xmm8 756 pshufb xmm14, xmm8 757 pshufb xmm15, xmm8 758 movdqa xmm8, xmmword ptr [rsp+0x100] 759 paddd xmm8, xmm12 760 paddd xmm9, xmm13 761 paddd xmm10, xmm14 762 paddd xmm11, xmm15 763 pxor xmm4, xmm8 764 pxor xmm5, xmm9 765 pxor xmm6, xmm10 766 pxor xmm7, xmm11 767 movdqa xmmword ptr [rsp+0x100], xmm8 768 movdqa xmm8, xmm4 769 psrld xmm8, 7 770 pslld xmm4, 25 771 por xmm4, xmm8 772 movdqa xmm8, xmm5 773 psrld xmm8, 7 774 pslld xmm5, 25 775 por xmm5, xmm8 776 movdqa xmm8, xmm6 777 psrld xmm8, 7 778 pslld xmm6, 25 779 por xmm6, xmm8 780 movdqa xmm8, xmm7 781 psrld xmm8, 7 782 pslld xmm7, 25 783 por xmm7, xmm8 784 paddd xmm0, xmmword ptr [rsp+0x40] 785 paddd xmm1, xmmword ptr [rsp+0xB0] 786 paddd xmm2, xmmword ptr [rsp+0x50] 787 paddd xmm3, xmmword ptr [rsp+0x10] 788 paddd xmm0, xmm5 789 paddd xmm1, xmm6 790 paddd xmm2, xmm7 791 paddd xmm3, xmm4 792 pxor xmm15, xmm0 793 pxor xmm12, xmm1 794 pxor xmm13, xmm2 795 pxor xmm14, xmm3 796 movdqa xmm8, xmmword ptr [ROT16+rip] 797 pshufb xmm15, xmm8 798 pshufb xmm12, xmm8 799 pshufb xmm13, xmm8 800 pshufb xmm14, xmm8 801 paddd xmm10, xmm15 802 paddd xmm11, xmm12 803 movdqa xmm8, xmmword ptr [rsp+0x100] 804 paddd xmm8, xmm13 805 paddd xmm9, xmm14 806 pxor xmm5, xmm10 807 pxor xmm6, xmm11 808 pxor xmm7, xmm8 809 pxor xmm4, xmm9 810 movdqa xmmword ptr [rsp+0x100], xmm8 811 movdqa xmm8, xmm5 812 psrld xmm8, 12 813 pslld xmm5, 20 814 por xmm5, xmm8 815 movdqa xmm8, xmm6 816 psrld xmm8, 12 817 pslld xmm6, 20 818 por xmm6, xmm8 819 movdqa xmm8, xmm7 820 psrld xmm8, 12 821 pslld xmm7, 20 822 por xmm7, xmm8 823 movdqa xmm8, xmm4 824 psrld xmm8, 12 825 pslld xmm4, 20 826 por xmm4, xmm8 827 paddd xmm0, xmmword ptr [rsp] 828 paddd xmm1, xmmword ptr [rsp+0x20] 829 paddd xmm2, xmmword ptr [rsp+0x80] 830 paddd xmm3, xmmword ptr [rsp+0x60] 831 paddd xmm0, xmm5 832 paddd xmm1, xmm6 833 paddd xmm2, xmm7 834 paddd xmm3, xmm4 835 pxor xmm15, xmm0 836 pxor xmm12, xmm1 837 pxor xmm13, xmm2 838 pxor xmm14, xmm3 839 movdqa xmm8, xmmword ptr [ROT8+rip] 840 pshufb xmm15, xmm8 841 pshufb xmm12, xmm8 842 pshufb xmm13, xmm8 843 pshufb xmm14, xmm8 844 paddd xmm10, xmm15 845 paddd xmm11, xmm12 846 movdqa xmm8, xmmword ptr [rsp+0x100] 847 paddd xmm8, xmm13 848 paddd xmm9, xmm14 849 pxor xmm5, xmm10 850 pxor xmm6, xmm11 851 pxor xmm7, xmm8 852 pxor xmm4, xmm9 853 movdqa xmmword ptr [rsp+0x100], xmm8 854 movdqa xmm8, xmm5 855 psrld xmm8, 7 856 pslld xmm5, 25 857 por xmm5, xmm8 858 movdqa xmm8, xmm6 859 psrld xmm8, 7 860 pslld xmm6, 25 861 por xmm6, xmm8 862 movdqa xmm8, xmm7 863 psrld xmm8, 7 864 pslld xmm7, 25 865 por xmm7, xmm8 866 movdqa xmm8, xmm4 867 psrld xmm8, 7 868 pslld xmm4, 25 869 por xmm4, xmm8 870 paddd xmm0, xmmword ptr [rsp+0xC0] 871 paddd xmm1, xmmword ptr [rsp+0x90] 872 paddd xmm2, xmmword ptr [rsp+0xF0] 873 paddd xmm3, xmmword ptr [rsp+0xE0] 874 paddd xmm0, xmm4 875 paddd xmm1, xmm5 876 paddd xmm2, xmm6 877 paddd xmm3, xmm7 878 pxor xmm12, xmm0 879 pxor xmm13, xmm1 880 pxor xmm14, xmm2 881 pxor xmm15, xmm3 882 movdqa xmm8, xmmword ptr [ROT16+rip] 883 pshufb xmm12, xmm8 884 pshufb xmm13, xmm8 885 pshufb xmm14, xmm8 886 pshufb xmm15, xmm8 887 movdqa xmm8, xmmword ptr [rsp+0x100] 888 paddd xmm8, xmm12 889 paddd xmm9, xmm13 890 paddd xmm10, xmm14 891 paddd xmm11, xmm15 892 pxor xmm4, xmm8 893 pxor xmm5, xmm9 894 pxor xmm6, xmm10 895 pxor xmm7, xmm11 896 movdqa xmmword ptr [rsp+0x100], xmm8 897 movdqa xmm8, xmm4 898 psrld xmm8, 12 899 pslld xmm4, 20 900 por xmm4, xmm8 901 movdqa xmm8, xmm5 902 psrld xmm8, 12 903 pslld xmm5, 20 904 por xmm5, xmm8 905 movdqa xmm8, xmm6 906 psrld xmm8, 12 907 pslld xmm6, 20 908 por xmm6, xmm8 909 movdqa xmm8, xmm7 910 psrld xmm8, 12 911 pslld xmm7, 20 912 por xmm7, xmm8 913 paddd xmm0, xmmword ptr [rsp+0xD0] 914 paddd xmm1, xmmword ptr [rsp+0xB0] 915 paddd xmm2, xmmword ptr [rsp+0xA0] 916 paddd xmm3, xmmword ptr [rsp+0x80] 917 paddd xmm0, xmm4 918 paddd xmm1, xmm5 919 paddd xmm2, xmm6 920 paddd xmm3, xmm7 921 pxor xmm12, xmm0 922 pxor xmm13, xmm1 923 pxor xmm14, xmm2 924 pxor xmm15, xmm3 925 movdqa xmm8, xmmword ptr [ROT8+rip] 926 pshufb xmm12, xmm8 927 pshufb xmm13, xmm8 928 pshufb xmm14, xmm8 929 pshufb xmm15, xmm8 930 movdqa xmm8, xmmword ptr [rsp+0x100] 931 paddd xmm8, xmm12 932 paddd xmm9, xmm13 933 paddd xmm10, xmm14 934 paddd xmm11, xmm15 935 pxor xmm4, xmm8 936 pxor xmm5, xmm9 937 pxor xmm6, xmm10 938 pxor xmm7, xmm11 939 movdqa xmmword ptr [rsp+0x100], xmm8 940 movdqa xmm8, xmm4 941 psrld xmm8, 7 942 pslld xmm4, 25 943 por xmm4, xmm8 944 movdqa xmm8, xmm5 945 psrld xmm8, 7 946 pslld xmm5, 25 947 por xmm5, xmm8 948 movdqa xmm8, xmm6 949 psrld xmm8, 7 950 pslld xmm6, 25 951 por xmm6, xmm8 952 movdqa xmm8, xmm7 953 psrld xmm8, 7 954 pslld xmm7, 25 955 por xmm7, xmm8 956 paddd xmm0, xmmword ptr [rsp+0x70] 957 paddd xmm1, xmmword ptr [rsp+0x50] 958 paddd xmm2, xmmword ptr [rsp] 959 paddd xmm3, xmmword ptr [rsp+0x60] 960 paddd xmm0, xmm5 961 paddd xmm1, xmm6 962 paddd xmm2, xmm7 963 paddd xmm3, xmm4 964 pxor xmm15, xmm0 965 pxor xmm12, xmm1 966 pxor xmm13, xmm2 967 pxor xmm14, xmm3 968 movdqa xmm8, xmmword ptr [ROT16+rip] 969 pshufb xmm15, xmm8 970 pshufb xmm12, xmm8 971 pshufb xmm13, xmm8 972 pshufb xmm14, xmm8 973 paddd xmm10, xmm15 974 paddd xmm11, xmm12 975 movdqa xmm8, xmmword ptr [rsp+0x100] 976 paddd xmm8, xmm13 977 paddd xmm9, xmm14 978 pxor xmm5, xmm10 979 pxor xmm6, xmm11 980 pxor xmm7, xmm8 981 pxor xmm4, xmm9 982 movdqa xmmword ptr [rsp+0x100], xmm8 983 movdqa xmm8, xmm5 984 psrld xmm8, 12 985 pslld xmm5, 20 986 por xmm5, xmm8 987 movdqa xmm8, xmm6 988 psrld xmm8, 12 989 pslld xmm6, 20 990 por xmm6, xmm8 991 movdqa xmm8, xmm7 992 psrld xmm8, 12 993 pslld xmm7, 20 994 por xmm7, xmm8 995 movdqa xmm8, xmm4 996 psrld xmm8, 12 997 pslld xmm4, 20 998 por xmm4, xmm8 999 paddd xmm0, xmmword ptr [rsp+0x20] 1000 paddd xmm1, xmmword ptr [rsp+0x30] 1001 paddd xmm2, xmmword ptr [rsp+0x10] 1002 paddd xmm3, xmmword ptr [rsp+0x40] 1003 paddd xmm0, xmm5 1004 paddd xmm1, xmm6 1005 paddd xmm2, xmm7 1006 paddd xmm3, xmm4 1007 pxor xmm15, xmm0 1008 pxor xmm12, xmm1 1009 pxor xmm13, xmm2 1010 pxor xmm14, xmm3 1011 movdqa xmm8, xmmword ptr [ROT8+rip] 1012 pshufb xmm15, xmm8 1013 pshufb xmm12, xmm8 1014 pshufb xmm13, xmm8 1015 pshufb xmm14, xmm8 1016 paddd xmm10, xmm15 1017 paddd xmm11, xmm12 1018 movdqa xmm8, xmmword ptr [rsp+0x100] 1019 paddd xmm8, xmm13 1020 paddd xmm9, xmm14 1021 pxor xmm5, xmm10 1022 pxor xmm6, xmm11 1023 pxor xmm7, xmm8 1024 pxor xmm4, xmm9 1025 movdqa xmmword ptr [rsp+0x100], xmm8 1026 movdqa xmm8, xmm5 1027 psrld xmm8, 7 1028 pslld xmm5, 25 1029 por xmm5, xmm8 1030 movdqa xmm8, xmm6 1031 psrld xmm8, 7 1032 pslld xmm6, 25 1033 por xmm6, xmm8 1034 movdqa xmm8, xmm7 1035 psrld xmm8, 7 1036 pslld xmm7, 25 1037 por xmm7, xmm8 1038 movdqa xmm8, xmm4 1039 psrld xmm8, 7 1040 pslld xmm4, 25 1041 por xmm4, xmm8 1042 paddd xmm0, xmmword ptr [rsp+0x90] 1043 paddd xmm1, xmmword ptr [rsp+0xB0] 1044 paddd xmm2, xmmword ptr [rsp+0x80] 1045 paddd xmm3, xmmword ptr [rsp+0xF0] 1046 paddd xmm0, xmm4 1047 paddd xmm1, xmm5 1048 paddd xmm2, xmm6 1049 paddd xmm3, xmm7 1050 pxor xmm12, xmm0 1051 pxor xmm13, xmm1 1052 pxor xmm14, xmm2 1053 pxor xmm15, xmm3 1054 movdqa xmm8, xmmword ptr [ROT16+rip] 1055 pshufb xmm12, xmm8 1056 pshufb xmm13, xmm8 1057 pshufb xmm14, xmm8 1058 pshufb xmm15, xmm8 1059 movdqa xmm8, xmmword ptr [rsp+0x100] 1060 paddd xmm8, xmm12 1061 paddd xmm9, xmm13 1062 paddd xmm10, xmm14 1063 paddd xmm11, xmm15 1064 pxor xmm4, xmm8 1065 pxor xmm5, xmm9 1066 pxor xmm6, xmm10 1067 pxor xmm7, xmm11 1068 movdqa xmmword ptr [rsp+0x100], xmm8 1069 movdqa xmm8, xmm4 1070 psrld xmm8, 12 1071 pslld xmm4, 20 1072 por xmm4, xmm8 1073 movdqa xmm8, xmm5 1074 psrld xmm8, 12 1075 pslld xmm5, 20 1076 por xmm5, xmm8 1077 movdqa xmm8, xmm6 1078 psrld xmm8, 12 1079 pslld xmm6, 20 1080 por xmm6, xmm8 1081 movdqa xmm8, xmm7 1082 psrld xmm8, 12 1083 pslld xmm7, 20 1084 por xmm7, xmm8 1085 paddd xmm0, xmmword ptr [rsp+0xE0] 1086 paddd xmm1, xmmword ptr [rsp+0x50] 1087 paddd xmm2, xmmword ptr [rsp+0xC0] 1088 paddd xmm3, xmmword ptr [rsp+0x10] 1089 paddd xmm0, xmm4 1090 paddd xmm1, xmm5 1091 paddd xmm2, xmm6 1092 paddd xmm3, xmm7 1093 pxor xmm12, xmm0 1094 pxor xmm13, xmm1 1095 pxor xmm14, xmm2 1096 pxor xmm15, xmm3 1097 movdqa xmm8, xmmword ptr [ROT8+rip] 1098 pshufb xmm12, xmm8 1099 pshufb xmm13, xmm8 1100 pshufb xmm14, xmm8 1101 pshufb xmm15, xmm8 1102 movdqa xmm8, xmmword ptr [rsp+0x100] 1103 paddd xmm8, xmm12 1104 paddd xmm9, xmm13 1105 paddd xmm10, xmm14 1106 paddd xmm11, xmm15 1107 pxor xmm4, xmm8 1108 pxor xmm5, xmm9 1109 pxor xmm6, xmm10 1110 pxor xmm7, xmm11 1111 movdqa xmmword ptr [rsp+0x100], xmm8 1112 movdqa xmm8, xmm4 1113 psrld xmm8, 7 1114 pslld xmm4, 25 1115 por xmm4, xmm8 1116 movdqa xmm8, xmm5 1117 psrld xmm8, 7 1118 pslld xmm5, 25 1119 por xmm5, xmm8 1120 movdqa xmm8, xmm6 1121 psrld xmm8, 7 1122 pslld xmm6, 25 1123 por xmm6, xmm8 1124 movdqa xmm8, xmm7 1125 psrld xmm8, 7 1126 pslld xmm7, 25 1127 por xmm7, xmm8 1128 paddd xmm0, xmmword ptr [rsp+0xD0] 1129 paddd xmm1, xmmword ptr [rsp] 1130 paddd xmm2, xmmword ptr [rsp+0x20] 1131 paddd xmm3, xmmword ptr [rsp+0x40] 1132 paddd xmm0, xmm5 1133 paddd xmm1, xmm6 1134 paddd xmm2, xmm7 1135 paddd xmm3, xmm4 1136 pxor xmm15, xmm0 1137 pxor xmm12, xmm1 1138 pxor xmm13, xmm2 1139 pxor xmm14, xmm3 1140 movdqa xmm8, xmmword ptr [ROT16+rip] 1141 pshufb xmm15, xmm8 1142 pshufb xmm12, xmm8 1143 pshufb xmm13, xmm8 1144 pshufb xmm14, xmm8 1145 paddd xmm10, xmm15 1146 paddd xmm11, xmm12 1147 movdqa xmm8, xmmword ptr [rsp+0x100] 1148 paddd xmm8, xmm13 1149 paddd xmm9, xmm14 1150 pxor xmm5, xmm10 1151 pxor xmm6, xmm11 1152 pxor xmm7, xmm8 1153 pxor xmm4, xmm9 1154 movdqa xmmword ptr [rsp+0x100], xmm8 1155 movdqa xmm8, xmm5 1156 psrld xmm8, 12 1157 pslld xmm5, 20 1158 por xmm5, xmm8 1159 movdqa xmm8, xmm6 1160 psrld xmm8, 12 1161 pslld xmm6, 20 1162 por xmm6, xmm8 1163 movdqa xmm8, xmm7 1164 psrld xmm8, 12 1165 pslld xmm7, 20 1166 por xmm7, xmm8 1167 movdqa xmm8, xmm4 1168 psrld xmm8, 12 1169 pslld xmm4, 20 1170 por xmm4, xmm8 1171 paddd xmm0, xmmword ptr [rsp+0x30] 1172 paddd xmm1, xmmword ptr [rsp+0xA0] 1173 paddd xmm2, xmmword ptr [rsp+0x60] 1174 paddd xmm3, xmmword ptr [rsp+0x70] 1175 paddd xmm0, xmm5 1176 paddd xmm1, xmm6 1177 paddd xmm2, xmm7 1178 paddd xmm3, xmm4 1179 pxor xmm15, xmm0 1180 pxor xmm12, xmm1 1181 pxor xmm13, xmm2 1182 pxor xmm14, xmm3 1183 movdqa xmm8, xmmword ptr [ROT8+rip] 1184 pshufb xmm15, xmm8 1185 pshufb xmm12, xmm8 1186 pshufb xmm13, xmm8 1187 pshufb xmm14, xmm8 1188 paddd xmm10, xmm15 1189 paddd xmm11, xmm12 1190 movdqa xmm8, xmmword ptr [rsp+0x100] 1191 paddd xmm8, xmm13 1192 paddd xmm9, xmm14 1193 pxor xmm5, xmm10 1194 pxor xmm6, xmm11 1195 pxor xmm7, xmm8 1196 pxor xmm4, xmm9 1197 movdqa xmmword ptr [rsp+0x100], xmm8 1198 movdqa xmm8, xmm5 1199 psrld xmm8, 7 1200 pslld xmm5, 25 1201 por xmm5, xmm8 1202 movdqa xmm8, xmm6 1203 psrld xmm8, 7 1204 pslld xmm6, 25 1205 por xmm6, xmm8 1206 movdqa xmm8, xmm7 1207 psrld xmm8, 7 1208 pslld xmm7, 25 1209 por xmm7, xmm8 1210 movdqa xmm8, xmm4 1211 psrld xmm8, 7 1212 pslld xmm4, 25 1213 por xmm4, xmm8 1214 paddd xmm0, xmmword ptr [rsp+0xB0] 1215 paddd xmm1, xmmword ptr [rsp+0x50] 1216 paddd xmm2, xmmword ptr [rsp+0x10] 1217 paddd xmm3, xmmword ptr [rsp+0x80] 1218 paddd xmm0, xmm4 1219 paddd xmm1, xmm5 1220 paddd xmm2, xmm6 1221 paddd xmm3, xmm7 1222 pxor xmm12, xmm0 1223 pxor xmm13, xmm1 1224 pxor xmm14, xmm2 1225 pxor xmm15, xmm3 1226 movdqa xmm8, xmmword ptr [ROT16+rip] 1227 pshufb xmm12, xmm8 1228 pshufb xmm13, xmm8 1229 pshufb xmm14, xmm8 1230 pshufb xmm15, xmm8 1231 movdqa xmm8, xmmword ptr [rsp+0x100] 1232 paddd xmm8, xmm12 1233 paddd xmm9, xmm13 1234 paddd xmm10, xmm14 1235 paddd xmm11, xmm15 1236 pxor xmm4, xmm8 1237 pxor xmm5, xmm9 1238 pxor xmm6, xmm10 1239 pxor xmm7, xmm11 1240 movdqa xmmword ptr [rsp+0x100], xmm8 1241 movdqa xmm8, xmm4 1242 psrld xmm8, 12 1243 pslld xmm4, 20 1244 por xmm4, xmm8 1245 movdqa xmm8, xmm5 1246 psrld xmm8, 12 1247 pslld xmm5, 20 1248 por xmm5, xmm8 1249 movdqa xmm8, xmm6 1250 psrld xmm8, 12 1251 pslld xmm6, 20 1252 por xmm6, xmm8 1253 movdqa xmm8, xmm7 1254 psrld xmm8, 12 1255 pslld xmm7, 20 1256 por xmm7, xmm8 1257 paddd xmm0, xmmword ptr [rsp+0xF0] 1258 paddd xmm1, xmmword ptr [rsp] 1259 paddd xmm2, xmmword ptr [rsp+0x90] 1260 paddd xmm3, xmmword ptr [rsp+0x60] 1261 paddd xmm0, xmm4 1262 paddd xmm1, xmm5 1263 paddd xmm2, xmm6 1264 paddd xmm3, xmm7 1265 pxor xmm12, xmm0 1266 pxor xmm13, xmm1 1267 pxor xmm14, xmm2 1268 pxor xmm15, xmm3 1269 movdqa xmm8, xmmword ptr [ROT8+rip] 1270 pshufb xmm12, xmm8 1271 pshufb xmm13, xmm8 1272 pshufb xmm14, xmm8 1273 pshufb xmm15, xmm8 1274 movdqa xmm8, xmmword ptr [rsp+0x100] 1275 paddd xmm8, xmm12 1276 paddd xmm9, xmm13 1277 paddd xmm10, xmm14 1278 paddd xmm11, xmm15 1279 pxor xmm4, xmm8 1280 pxor xmm5, xmm9 1281 pxor xmm6, xmm10 1282 pxor xmm7, xmm11 1283 movdqa xmmword ptr [rsp+0x100], xmm8 1284 movdqa xmm8, xmm4 1285 psrld xmm8, 7 1286 pslld xmm4, 25 1287 por xmm4, xmm8 1288 movdqa xmm8, xmm5 1289 psrld xmm8, 7 1290 pslld xmm5, 25 1291 por xmm5, xmm8 1292 movdqa xmm8, xmm6 1293 psrld xmm8, 7 1294 pslld xmm6, 25 1295 por xmm6, xmm8 1296 movdqa xmm8, xmm7 1297 psrld xmm8, 7 1298 pslld xmm7, 25 1299 por xmm7, xmm8 1300 paddd xmm0, xmmword ptr [rsp+0xE0] 1301 paddd xmm1, xmmword ptr [rsp+0x20] 1302 paddd xmm2, xmmword ptr [rsp+0x30] 1303 paddd xmm3, xmmword ptr [rsp+0x70] 1304 paddd xmm0, xmm5 1305 paddd xmm1, xmm6 1306 paddd xmm2, xmm7 1307 paddd xmm3, xmm4 1308 pxor xmm15, xmm0 1309 pxor xmm12, xmm1 1310 pxor xmm13, xmm2 1311 pxor xmm14, xmm3 1312 movdqa xmm8, xmmword ptr [ROT16+rip] 1313 pshufb xmm15, xmm8 1314 pshufb xmm12, xmm8 1315 pshufb xmm13, xmm8 1316 pshufb xmm14, xmm8 1317 paddd xmm10, xmm15 1318 paddd xmm11, xmm12 1319 movdqa xmm8, xmmword ptr [rsp+0x100] 1320 paddd xmm8, xmm13 1321 paddd xmm9, xmm14 1322 pxor xmm5, xmm10 1323 pxor xmm6, xmm11 1324 pxor xmm7, xmm8 1325 pxor xmm4, xmm9 1326 movdqa xmmword ptr [rsp+0x100], xmm8 1327 movdqa xmm8, xmm5 1328 psrld xmm8, 12 1329 pslld xmm5, 20 1330 por xmm5, xmm8 1331 movdqa xmm8, xmm6 1332 psrld xmm8, 12 1333 pslld xmm6, 20 1334 por xmm6, xmm8 1335 movdqa xmm8, xmm7 1336 psrld xmm8, 12 1337 pslld xmm7, 20 1338 por xmm7, xmm8 1339 movdqa xmm8, xmm4 1340 psrld xmm8, 12 1341 pslld xmm4, 20 1342 por xmm4, xmm8 1343 paddd xmm0, xmmword ptr [rsp+0xA0] 1344 paddd xmm1, xmmword ptr [rsp+0xC0] 1345 paddd xmm2, xmmword ptr [rsp+0x40] 1346 paddd xmm3, xmmword ptr [rsp+0xD0] 1347 paddd xmm0, xmm5 1348 paddd xmm1, xmm6 1349 paddd xmm2, xmm7 1350 paddd xmm3, xmm4 1351 pxor xmm15, xmm0 1352 pxor xmm12, xmm1 1353 pxor xmm13, xmm2 1354 pxor xmm14, xmm3 1355 movdqa xmm8, xmmword ptr [ROT8+rip] 1356 pshufb xmm15, xmm8 1357 pshufb xmm12, xmm8 1358 pshufb xmm13, xmm8 1359 pshufb xmm14, xmm8 1360 paddd xmm10, xmm15 1361 paddd xmm11, xmm12 1362 movdqa xmm8, xmmword ptr [rsp+0x100] 1363 paddd xmm8, xmm13 1364 paddd xmm9, xmm14 1365 pxor xmm5, xmm10 1366 pxor xmm6, xmm11 1367 pxor xmm7, xmm8 1368 pxor xmm4, xmm9 1369 pxor xmm0, xmm8 1370 pxor xmm1, xmm9 1371 pxor xmm2, xmm10 1372 pxor xmm3, xmm11 1373 movdqa xmm8, xmm5 1374 psrld xmm8, 7 1375 pslld xmm5, 25 1376 por xmm5, xmm8 1377 movdqa xmm8, xmm6 1378 psrld xmm8, 7 1379 pslld xmm6, 25 1380 por xmm6, xmm8 1381 movdqa xmm8, xmm7 1382 psrld xmm8, 7 1383 pslld xmm7, 25 1384 por xmm7, xmm8 1385 movdqa xmm8, xmm4 1386 psrld xmm8, 7 1387 pslld xmm4, 25 1388 por xmm4, xmm8 1389 pxor xmm4, xmm12 1390 pxor xmm5, xmm13 1391 pxor xmm6, xmm14 1392 pxor xmm7, xmm15 1393 mov eax, r13d 1394 jne 9b 1395 movdqa xmm9, xmm0 1396 punpckldq xmm0, xmm1 1397 punpckhdq xmm9, xmm1 1398 movdqa xmm11, xmm2 1399 punpckldq xmm2, xmm3 1400 punpckhdq xmm11, xmm3 1401 movdqa xmm1, xmm0 1402 punpcklqdq xmm0, xmm2 1403 punpckhqdq xmm1, xmm2 1404 movdqa xmm3, xmm9 1405 punpcklqdq xmm9, xmm11 1406 punpckhqdq xmm3, xmm11 1407 movdqu xmmword ptr [rbx], xmm0 1408 movdqu xmmword ptr [rbx+0x20], xmm1 1409 movdqu xmmword ptr [rbx+0x40], xmm9 1410 movdqu xmmword ptr [rbx+0x60], xmm3 1411 movdqa xmm9, xmm4 1412 punpckldq xmm4, xmm5 1413 punpckhdq xmm9, xmm5 1414 movdqa xmm11, xmm6 1415 punpckldq xmm6, xmm7 1416 punpckhdq xmm11, xmm7 1417 movdqa xmm5, xmm4 1418 punpcklqdq xmm4, xmm6 1419 punpckhqdq xmm5, xmm6 1420 movdqa xmm7, xmm9 1421 punpcklqdq xmm9, xmm11 1422 punpckhqdq xmm7, xmm11 1423 movdqu xmmword ptr [rbx+0x10], xmm4 1424 movdqu xmmword ptr [rbx+0x30], xmm5 1425 movdqu xmmword ptr [rbx+0x50], xmm9 1426 movdqu xmmword ptr [rbx+0x70], xmm7 1427 movdqa xmm1, xmmword ptr [rsp+0x110] 1428 movdqa xmm0, xmm1 1429 paddd xmm1, xmmword ptr [rsp+0x150] 1430 movdqa xmmword ptr [rsp+0x110], xmm1 1431 pxor xmm0, xmmword ptr [CMP_MSB_MASK+rip] 1432 pxor xmm1, xmmword ptr [CMP_MSB_MASK+rip] 1433 pcmpgtd xmm0, xmm1 1434 movdqa xmm1, xmmword ptr [rsp+0x120] 1435 psubd xmm1, xmm0 1436 movdqa xmmword ptr [rsp+0x120], xmm1 1437 add rbx, 128 1438 add rdi, 32 1439 sub rsi, 4 1440 cmp rsi, 4 1441 jnc 2b 1442 test rsi, rsi 1443 jne 3f 1444 4: 1445 movdqa xmm6, xmmword ptr [rsp+0x170] 1446 movdqa xmm7, xmmword ptr [rsp+0x180] 1447 movdqa xmm8, xmmword ptr [rsp+0x190] 1448 movdqa xmm9, xmmword ptr [rsp+0x1A0] 1449 movdqa xmm10, xmmword ptr [rsp+0x1B0] 1450 movdqa xmm11, xmmword ptr [rsp+0x1C0] 1451 movdqa xmm12, xmmword ptr [rsp+0x1D0] 1452 movdqa xmm13, xmmword ptr [rsp+0x1E0] 1453 movdqa xmm14, xmmword ptr [rsp+0x1F0] 1454 movdqa xmm15, xmmword ptr [rsp+0x200] 1455 mov rsp, rbp 1456 pop rbp 1457 pop rbx 1458 pop rdi 1459 pop rsi 1460 pop r12 1461 pop r13 1462 pop r14 1463 pop r15 1464 ret 1465 .p2align 5 1466 3: 1467 test esi, 0x2 1468 je 3f 1469 movups xmm0, xmmword ptr [rcx] 1470 movups xmm1, xmmword ptr [rcx+0x10] 1471 movaps xmm8, xmm0 1472 movaps xmm9, xmm1 1473 movd xmm13, dword ptr [rsp+0x110] 1474 pinsrd xmm13, dword ptr [rsp+0x120], 1 1475 pinsrd xmm13, dword ptr [BLAKE3_BLOCK_LEN+rip], 2 1476 movaps xmmword ptr [rsp], xmm13 1477 movd xmm14, dword ptr [rsp+0x114] 1478 pinsrd xmm14, dword ptr [rsp+0x124], 1 1479 pinsrd xmm14, dword ptr [BLAKE3_BLOCK_LEN+rip], 2 1480 movaps xmmword ptr [rsp+0x10], xmm14 1481 mov r8, qword ptr [rdi] 1482 mov r9, qword ptr [rdi+0x8] 1483 movzx eax, byte ptr [rbp+0x80] 1484 or eax, r13d 1485 xor edx, edx 1486 2: 1487 mov r14d, eax 1488 or eax, r12d 1489 add rdx, 64 1490 cmp rdx, r15 1491 cmovne eax, r14d 1492 movaps xmm2, xmmword ptr [BLAKE3_IV+rip] 1493 movaps xmm10, xmm2 1494 movups xmm4, xmmword ptr [r8+rdx-0x40] 1495 movups xmm5, xmmword ptr [r8+rdx-0x30] 1496 movaps xmm3, xmm4 1497 shufps xmm4, xmm5, 136 1498 shufps xmm3, xmm5, 221 1499 movaps xmm5, xmm3 1500 movups xmm6, xmmword ptr [r8+rdx-0x20] 1501 movups xmm7, xmmword ptr [r8+rdx-0x10] 1502 movaps xmm3, xmm6 1503 shufps xmm6, xmm7, 136 1504 pshufd xmm6, xmm6, 0x93 1505 shufps xmm3, xmm7, 221 1506 pshufd xmm7, xmm3, 0x93 1507 movups xmm12, xmmword ptr [r9+rdx-0x40] 1508 movups xmm13, xmmword ptr [r9+rdx-0x30] 1509 movaps xmm11, xmm12 1510 shufps xmm12, xmm13, 136 1511 shufps xmm11, xmm13, 221 1512 movaps xmm13, xmm11 1513 movups xmm14, xmmword ptr [r9+rdx-0x20] 1514 movups xmm15, xmmword ptr [r9+rdx-0x10] 1515 movaps xmm11, xmm14 1516 shufps xmm14, xmm15, 136 1517 pshufd xmm14, xmm14, 0x93 1518 shufps xmm11, xmm15, 221 1519 pshufd xmm15, xmm11, 0x93 1520 movaps xmm3, xmmword ptr [rsp] 1521 movaps xmm11, xmmword ptr [rsp+0x10] 1522 pinsrd xmm3, eax, 3 1523 pinsrd xmm11, eax, 3 1524 mov al, 7 1525 9: 1526 paddd xmm0, xmm4 1527 paddd xmm8, xmm12 1528 movaps xmmword ptr [rsp+0x20], xmm4 1529 movaps xmmword ptr [rsp+0x30], xmm12 1530 paddd xmm0, xmm1 1531 paddd xmm8, xmm9 1532 pxor xmm3, xmm0 1533 pxor xmm11, xmm8 1534 movaps xmm12, xmmword ptr [ROT16+rip] 1535 pshufb xmm3, xmm12 1536 pshufb xmm11, xmm12 1537 paddd xmm2, xmm3 1538 paddd xmm10, xmm11 1539 pxor xmm1, xmm2 1540 pxor xmm9, xmm10 1541 movdqa xmm4, xmm1 1542 pslld xmm1, 20 1543 psrld xmm4, 12 1544 por xmm1, xmm4 1545 movdqa xmm4, xmm9 1546 pslld xmm9, 20 1547 psrld xmm4, 12 1548 por xmm9, xmm4 1549 paddd xmm0, xmm5 1550 paddd xmm8, xmm13 1551 movaps xmmword ptr [rsp+0x40], xmm5 1552 movaps xmmword ptr [rsp+0x50], xmm13 1553 paddd xmm0, xmm1 1554 paddd xmm8, xmm9 1555 pxor xmm3, xmm0 1556 pxor xmm11, xmm8 1557 movaps xmm13, xmmword ptr [ROT8+rip] 1558 pshufb xmm3, xmm13 1559 pshufb xmm11, xmm13 1560 paddd xmm2, xmm3 1561 paddd xmm10, xmm11 1562 pxor xmm1, xmm2 1563 pxor xmm9, xmm10 1564 movdqa xmm4, xmm1 1565 pslld xmm1, 25 1566 psrld xmm4, 7 1567 por xmm1, xmm4 1568 movdqa xmm4, xmm9 1569 pslld xmm9, 25 1570 psrld xmm4, 7 1571 por xmm9, xmm4 1572 pshufd xmm0, xmm0, 0x93 1573 pshufd xmm8, xmm8, 0x93 1574 pshufd xmm3, xmm3, 0x4E 1575 pshufd xmm11, xmm11, 0x4E 1576 pshufd xmm2, xmm2, 0x39 1577 pshufd xmm10, xmm10, 0x39 1578 paddd xmm0, xmm6 1579 paddd xmm8, xmm14 1580 paddd xmm0, xmm1 1581 paddd xmm8, xmm9 1582 pxor xmm3, xmm0 1583 pxor xmm11, xmm8 1584 pshufb xmm3, xmm12 1585 pshufb xmm11, xmm12 1586 paddd xmm2, xmm3 1587 paddd xmm10, xmm11 1588 pxor xmm1, xmm2 1589 pxor xmm9, xmm10 1590 movdqa xmm4, xmm1 1591 pslld xmm1, 20 1592 psrld xmm4, 12 1593 por xmm1, xmm4 1594 movdqa xmm4, xmm9 1595 pslld xmm9, 20 1596 psrld xmm4, 12 1597 por xmm9, xmm4 1598 paddd xmm0, xmm7 1599 paddd xmm8, xmm15 1600 paddd xmm0, xmm1 1601 paddd xmm8, xmm9 1602 pxor xmm3, xmm0 1603 pxor xmm11, xmm8 1604 pshufb xmm3, xmm13 1605 pshufb xmm11, xmm13 1606 paddd xmm2, xmm3 1607 paddd xmm10, xmm11 1608 pxor xmm1, xmm2 1609 pxor xmm9, xmm10 1610 movdqa xmm4, xmm1 1611 pslld xmm1, 25 1612 psrld xmm4, 7 1613 por xmm1, xmm4 1614 movdqa xmm4, xmm9 1615 pslld xmm9, 25 1616 psrld xmm4, 7 1617 por xmm9, xmm4 1618 pshufd xmm0, xmm0, 0x39 1619 pshufd xmm8, xmm8, 0x39 1620 pshufd xmm3, xmm3, 0x4E 1621 pshufd xmm11, xmm11, 0x4E 1622 pshufd xmm2, xmm2, 0x93 1623 pshufd xmm10, xmm10, 0x93 1624 dec al 1625 je 9f 1626 movdqa xmm12, xmmword ptr [rsp+0x20] 1627 movdqa xmm5, xmmword ptr [rsp+0x40] 1628 pshufd xmm13, xmm12, 0x0F 1629 shufps xmm12, xmm5, 214 1630 pshufd xmm4, xmm12, 0x39 1631 movdqa xmm12, xmm6 1632 shufps xmm12, xmm7, 250 1633 pblendw xmm13, xmm12, 0xCC 1634 movdqa xmm12, xmm7 1635 punpcklqdq xmm12, xmm5 1636 pblendw xmm12, xmm6, 0xC0 1637 pshufd xmm12, xmm12, 0x78 1638 punpckhdq xmm5, xmm7 1639 punpckldq xmm6, xmm5 1640 pshufd xmm7, xmm6, 0x1E 1641 movdqa xmmword ptr [rsp+0x20], xmm13 1642 movdqa xmmword ptr [rsp+0x40], xmm12 1643 movdqa xmm5, xmmword ptr [rsp+0x30] 1644 movdqa xmm13, xmmword ptr [rsp+0x50] 1645 pshufd xmm6, xmm5, 0x0F 1646 shufps xmm5, xmm13, 214 1647 pshufd xmm12, xmm5, 0x39 1648 movdqa xmm5, xmm14 1649 shufps xmm5, xmm15, 250 1650 pblendw xmm6, xmm5, 0xCC 1651 movdqa xmm5, xmm15 1652 punpcklqdq xmm5, xmm13 1653 pblendw xmm5, xmm14, 0xC0 1654 pshufd xmm5, xmm5, 0x78 1655 punpckhdq xmm13, xmm15 1656 punpckldq xmm14, xmm13 1657 pshufd xmm15, xmm14, 0x1E 1658 movdqa xmm13, xmm6 1659 movdqa xmm14, xmm5 1660 movdqa xmm5, xmmword ptr [rsp+0x20] 1661 movdqa xmm6, xmmword ptr [rsp+0x40] 1662 jmp 9b 1663 9: 1664 pxor xmm0, xmm2 1665 pxor xmm1, xmm3 1666 pxor xmm8, xmm10 1667 pxor xmm9, xmm11 1668 mov eax, r13d 1669 cmp rdx, r15 1670 jne 2b 1671 movups xmmword ptr [rbx], xmm0 1672 movups xmmword ptr [rbx+0x10], xmm1 1673 movups xmmword ptr [rbx+0x20], xmm8 1674 movups xmmword ptr [rbx+0x30], xmm9 1675 movdqa xmm0, xmmword ptr [rsp+0x130] 1676 movdqa xmm1, xmmword ptr [rsp+0x110] 1677 movdqa xmm2, xmmword ptr [rsp+0x120] 1678 movdqu xmm3, xmmword ptr [rsp+0x118] 1679 movdqu xmm4, xmmword ptr [rsp+0x128] 1680 blendvps xmm1, xmm3, xmm0 1681 blendvps xmm2, xmm4, xmm0 1682 movdqa xmmword ptr [rsp+0x110], xmm1 1683 movdqa xmmword ptr [rsp+0x120], xmm2 1684 add rdi, 16 1685 add rbx, 64 1686 sub rsi, 2 1687 3: 1688 test esi, 0x1 1689 je 4b 1690 movups xmm0, xmmword ptr [rcx] 1691 movups xmm1, xmmword ptr [rcx+0x10] 1692 movd xmm13, dword ptr [rsp+0x110] 1693 pinsrd xmm13, dword ptr [rsp+0x120], 1 1694 pinsrd xmm13, dword ptr [BLAKE3_BLOCK_LEN+rip], 2 1695 movaps xmm14, xmmword ptr [ROT8+rip] 1696 movaps xmm15, xmmword ptr [ROT16+rip] 1697 mov r8, qword ptr [rdi] 1698 movzx eax, byte ptr [rbp+0x80] 1699 or eax, r13d 1700 xor edx, edx 1701 2: 1702 mov r14d, eax 1703 or eax, r12d 1704 add rdx, 64 1705 cmp rdx, r15 1706 cmovne eax, r14d 1707 movaps xmm2, xmmword ptr [BLAKE3_IV+rip] 1708 movaps xmm3, xmm13 1709 pinsrd xmm3, eax, 3 1710 movups xmm4, xmmword ptr [r8+rdx-0x40] 1711 movups xmm5, xmmword ptr [r8+rdx-0x30] 1712 movaps xmm8, xmm4 1713 shufps xmm4, xmm5, 136 1714 shufps xmm8, xmm5, 221 1715 movaps xmm5, xmm8 1716 movups xmm6, xmmword ptr [r8+rdx-0x20] 1717 movups xmm7, xmmword ptr [r8+rdx-0x10] 1718 movaps xmm8, xmm6 1719 shufps xmm6, xmm7, 136 1720 pshufd xmm6, xmm6, 0x93 1721 shufps xmm8, xmm7, 221 1722 pshufd xmm7, xmm8, 0x93 1723 mov al, 7 1724 9: 1725 paddd xmm0, xmm4 1726 paddd xmm0, xmm1 1727 pxor xmm3, xmm0 1728 pshufb xmm3, xmm15 1729 paddd xmm2, xmm3 1730 pxor xmm1, xmm2 1731 movdqa xmm11, xmm1 1732 pslld xmm1, 20 1733 psrld xmm11, 12 1734 por xmm1, xmm11 1735 paddd xmm0, xmm5 1736 paddd xmm0, xmm1 1737 pxor xmm3, xmm0 1738 pshufb xmm3, xmm14 1739 paddd xmm2, xmm3 1740 pxor xmm1, xmm2 1741 movdqa xmm11, xmm1 1742 pslld xmm1, 25 1743 psrld xmm11, 7 1744 por xmm1, xmm11 1745 pshufd xmm0, xmm0, 0x93 1746 pshufd xmm3, xmm3, 0x4E 1747 pshufd xmm2, xmm2, 0x39 1748 paddd xmm0, xmm6 1749 paddd xmm0, xmm1 1750 pxor xmm3, xmm0 1751 pshufb xmm3, xmm15 1752 paddd xmm2, xmm3 1753 pxor xmm1, xmm2 1754 movdqa xmm11, xmm1 1755 pslld xmm1, 20 1756 psrld xmm11, 12 1757 por xmm1, xmm11 1758 paddd xmm0, xmm7 1759 paddd xmm0, xmm1 1760 pxor xmm3, xmm0 1761 pshufb xmm3, xmm14 1762 paddd xmm2, xmm3 1763 pxor xmm1, xmm2 1764 movdqa xmm11, xmm1 1765 pslld xmm1, 25 1766 psrld xmm11, 7 1767 por xmm1, xmm11 1768 pshufd xmm0, xmm0, 0x39 1769 pshufd xmm3, xmm3, 0x4E 1770 pshufd xmm2, xmm2, 0x93 1771 dec al 1772 jz 9f 1773 movdqa xmm8, xmm4 1774 shufps xmm8, xmm5, 214 1775 pshufd xmm9, xmm4, 0x0F 1776 pshufd xmm4, xmm8, 0x39 1777 movdqa xmm8, xmm6 1778 shufps xmm8, xmm7, 250 1779 pblendw xmm9, xmm8, 0xCC 1780 movdqa xmm8, xmm7 1781 punpcklqdq xmm8, xmm5 1782 pblendw xmm8, xmm6, 0xC0 1783 pshufd xmm8, xmm8, 0x78 1784 punpckhdq xmm5, xmm7 1785 punpckldq xmm6, xmm5 1786 pshufd xmm7, xmm6, 0x1E 1787 movdqa xmm5, xmm9 1788 movdqa xmm6, xmm8 1789 jmp 9b 1790 9: 1791 pxor xmm0, xmm2 1792 pxor xmm1, xmm3 1793 mov eax, r13d 1794 cmp rdx, r15 1795 jne 2b 1796 movups xmmword ptr [rbx], xmm0 1797 movups xmmword ptr [rbx+0x10], xmm1 1798 jmp 4b 1799 1800 .p2align 6 1801 blake3_compress_in_place_sse41: 1802 _blake3_compress_in_place_sse41: 1803 sub rsp, 120 1804 movdqa xmmword ptr [rsp], xmm6 1805 movdqa xmmword ptr [rsp+0x10], xmm7 1806 movdqa xmmword ptr [rsp+0x20], xmm8 1807 movdqa xmmword ptr [rsp+0x30], xmm9 1808 movdqa xmmword ptr [rsp+0x40], xmm11 1809 movdqa xmmword ptr [rsp+0x50], xmm14 1810 movdqa xmmword ptr [rsp+0x60], xmm15 1811 movups xmm0, xmmword ptr [rcx] 1812 movups xmm1, xmmword ptr [rcx+0x10] 1813 movaps xmm2, xmmword ptr [BLAKE3_IV+rip] 1814 movzx eax, byte ptr [rsp+0xA0] 1815 movzx r8d, r8b 1816 shl rax, 32 1817 add r8, rax 1818 movq xmm3, r9 1819 movq xmm4, r8 1820 punpcklqdq xmm3, xmm4 1821 movups xmm4, xmmword ptr [rdx] 1822 movups xmm5, xmmword ptr [rdx+0x10] 1823 movaps xmm8, xmm4 1824 shufps xmm4, xmm5, 136 1825 shufps xmm8, xmm5, 221 1826 movaps xmm5, xmm8 1827 movups xmm6, xmmword ptr [rdx+0x20] 1828 movups xmm7, xmmword ptr [rdx+0x30] 1829 movaps xmm8, xmm6 1830 shufps xmm6, xmm7, 136 1831 pshufd xmm6, xmm6, 0x93 1832 shufps xmm8, xmm7, 221 1833 pshufd xmm7, xmm8, 0x93 1834 movaps xmm14, xmmword ptr [ROT8+rip] 1835 movaps xmm15, xmmword ptr [ROT16+rip] 1836 mov al, 7 1837 9: 1838 paddd xmm0, xmm4 1839 paddd xmm0, xmm1 1840 pxor xmm3, xmm0 1841 pshufb xmm3, xmm15 1842 paddd xmm2, xmm3 1843 pxor xmm1, xmm2 1844 movdqa xmm11, xmm1 1845 pslld xmm1, 20 1846 psrld xmm11, 12 1847 por xmm1, xmm11 1848 paddd xmm0, xmm5 1849 paddd xmm0, xmm1 1850 pxor xmm3, xmm0 1851 pshufb xmm3, xmm14 1852 paddd xmm2, xmm3 1853 pxor xmm1, xmm2 1854 movdqa xmm11, xmm1 1855 pslld xmm1, 25 1856 psrld xmm11, 7 1857 por xmm1, xmm11 1858 pshufd xmm0, xmm0, 0x93 1859 pshufd xmm3, xmm3, 0x4E 1860 pshufd xmm2, xmm2, 0x39 1861 paddd xmm0, xmm6 1862 paddd xmm0, xmm1 1863 pxor xmm3, xmm0 1864 pshufb xmm3, xmm15 1865 paddd xmm2, xmm3 1866 pxor xmm1, xmm2 1867 movdqa xmm11, xmm1 1868 pslld xmm1, 20 1869 psrld xmm11, 12 1870 por xmm1, xmm11 1871 paddd xmm0, xmm7 1872 paddd xmm0, xmm1 1873 pxor xmm3, xmm0 1874 pshufb xmm3, xmm14 1875 paddd xmm2, xmm3 1876 pxor xmm1, xmm2 1877 movdqa xmm11, xmm1 1878 pslld xmm1, 25 1879 psrld xmm11, 7 1880 por xmm1, xmm11 1881 pshufd xmm0, xmm0, 0x39 1882 pshufd xmm3, xmm3, 0x4E 1883 pshufd xmm2, xmm2, 0x93 1884 dec al 1885 jz 9f 1886 movdqa xmm8, xmm4 1887 shufps xmm8, xmm5, 214 1888 pshufd xmm9, xmm4, 0x0F 1889 pshufd xmm4, xmm8, 0x39 1890 movdqa xmm8, xmm6 1891 shufps xmm8, xmm7, 250 1892 pblendw xmm9, xmm8, 0xCC 1893 movdqa xmm8, xmm7 1894 punpcklqdq xmm8, xmm5 1895 pblendw xmm8, xmm6, 0xC0 1896 pshufd xmm8, xmm8, 0x78 1897 punpckhdq xmm5, xmm7 1898 punpckldq xmm6, xmm5 1899 pshufd xmm7, xmm6, 0x1E 1900 movdqa xmm5, xmm9 1901 movdqa xmm6, xmm8 1902 jmp 9b 1903 9: 1904 pxor xmm0, xmm2 1905 pxor xmm1, xmm3 1906 movups xmmword ptr [rcx], xmm0 1907 movups xmmword ptr [rcx+0x10], xmm1 1908 movdqa xmm6, xmmword ptr [rsp] 1909 movdqa xmm7, xmmword ptr [rsp+0x10] 1910 movdqa xmm8, xmmword ptr [rsp+0x20] 1911 movdqa xmm9, xmmword ptr [rsp+0x30] 1912 movdqa xmm11, xmmword ptr [rsp+0x40] 1913 movdqa xmm14, xmmword ptr [rsp+0x50] 1914 movdqa xmm15, xmmword ptr [rsp+0x60] 1915 add rsp, 120 1916 ret 1917 1918 1919 .p2align 6 1920 _blake3_compress_xof_sse41: 1921 blake3_compress_xof_sse41: 1922 sub rsp, 120 1923 movdqa xmmword ptr [rsp], xmm6 1924 movdqa xmmword ptr [rsp+0x10], xmm7 1925 movdqa xmmword ptr [rsp+0x20], xmm8 1926 movdqa xmmword ptr [rsp+0x30], xmm9 1927 movdqa xmmword ptr [rsp+0x40], xmm11 1928 movdqa xmmword ptr [rsp+0x50], xmm14 1929 movdqa xmmword ptr [rsp+0x60], xmm15 1930 movups xmm0, xmmword ptr [rcx] 1931 movups xmm1, xmmword ptr [rcx+0x10] 1932 movaps xmm2, xmmword ptr [BLAKE3_IV+rip] 1933 movzx eax, byte ptr [rsp+0xA0] 1934 movzx r8d, r8b 1935 mov r10, qword ptr [rsp+0xA8] 1936 shl rax, 32 1937 add r8, rax 1938 movq xmm3, r9 1939 movq xmm4, r8 1940 punpcklqdq xmm3, xmm4 1941 movups xmm4, xmmword ptr [rdx] 1942 movups xmm5, xmmword ptr [rdx+0x10] 1943 movaps xmm8, xmm4 1944 shufps xmm4, xmm5, 136 1945 shufps xmm8, xmm5, 221 1946 movaps xmm5, xmm8 1947 movups xmm6, xmmword ptr [rdx+0x20] 1948 movups xmm7, xmmword ptr [rdx+0x30] 1949 movaps xmm8, xmm6 1950 shufps xmm6, xmm7, 136 1951 pshufd xmm6, xmm6, 0x93 1952 shufps xmm8, xmm7, 221 1953 pshufd xmm7, xmm8, 0x93 1954 movaps xmm14, xmmword ptr [ROT8+rip] 1955 movaps xmm15, xmmword ptr [ROT16+rip] 1956 mov al, 7 1957 9: 1958 paddd xmm0, xmm4 1959 paddd xmm0, xmm1 1960 pxor xmm3, xmm0 1961 pshufb xmm3, xmm15 1962 paddd xmm2, xmm3 1963 pxor xmm1, xmm2 1964 movdqa xmm11, xmm1 1965 pslld xmm1, 20 1966 psrld xmm11, 12 1967 por xmm1, xmm11 1968 paddd xmm0, xmm5 1969 paddd xmm0, xmm1 1970 pxor xmm3, xmm0 1971 pshufb xmm3, xmm14 1972 paddd xmm2, xmm3 1973 pxor xmm1, xmm2 1974 movdqa xmm11, xmm1 1975 pslld xmm1, 25 1976 psrld xmm11, 7 1977 por xmm1, xmm11 1978 pshufd xmm0, xmm0, 0x93 1979 pshufd xmm3, xmm3, 0x4E 1980 pshufd xmm2, xmm2, 0x39 1981 paddd xmm0, xmm6 1982 paddd xmm0, xmm1 1983 pxor xmm3, xmm0 1984 pshufb xmm3, xmm15 1985 paddd xmm2, xmm3 1986 pxor xmm1, xmm2 1987 movdqa xmm11, xmm1 1988 pslld xmm1, 20 1989 psrld xmm11, 12 1990 por xmm1, xmm11 1991 paddd xmm0, xmm7 1992 paddd xmm0, xmm1 1993 pxor xmm3, xmm0 1994 pshufb xmm3, xmm14 1995 paddd xmm2, xmm3 1996 pxor xmm1, xmm2 1997 movdqa xmm11, xmm1 1998 pslld xmm1, 25 1999 psrld xmm11, 7 2000 por xmm1, xmm11 2001 pshufd xmm0, xmm0, 0x39 2002 pshufd xmm3, xmm3, 0x4E 2003 pshufd xmm2, xmm2, 0x93 2004 dec al 2005 jz 9f 2006 movdqa xmm8, xmm4 2007 shufps xmm8, xmm5, 214 2008 pshufd xmm9, xmm4, 0x0F 2009 pshufd xmm4, xmm8, 0x39 2010 movdqa xmm8, xmm6 2011 shufps xmm8, xmm7, 250 2012 pblendw xmm9, xmm8, 0xCC 2013 movdqa xmm8, xmm7 2014 punpcklqdq xmm8, xmm5 2015 pblendw xmm8, xmm6, 0xC0 2016 pshufd xmm8, xmm8, 0x78 2017 punpckhdq xmm5, xmm7 2018 punpckldq xmm6, xmm5 2019 pshufd xmm7, xmm6, 0x1E 2020 movdqa xmm5, xmm9 2021 movdqa xmm6, xmm8 2022 jmp 9b 2023 9: 2024 movdqu xmm4, xmmword ptr [rcx] 2025 movdqu xmm5, xmmword ptr [rcx+0x10] 2026 pxor xmm0, xmm2 2027 pxor xmm1, xmm3 2028 pxor xmm2, xmm4 2029 pxor xmm3, xmm5 2030 movups xmmword ptr [r10], xmm0 2031 movups xmmword ptr [r10+0x10], xmm1 2032 movups xmmword ptr [r10+0x20], xmm2 2033 movups xmmword ptr [r10+0x30], xmm3 2034 movdqa xmm6, xmmword ptr [rsp] 2035 movdqa xmm7, xmmword ptr [rsp+0x10] 2036 movdqa xmm8, xmmword ptr [rsp+0x20] 2037 movdqa xmm9, xmmword ptr [rsp+0x30] 2038 movdqa xmm11, xmmword ptr [rsp+0x40] 2039 movdqa xmm14, xmmword ptr [rsp+0x50] 2040 movdqa xmm15, xmmword ptr [rsp+0x60] 2041 add rsp, 120 2042 ret 2043 2044 2045 .section .rodata 2046 .p2align 6 2047 BLAKE3_IV: 2048 .long 0x6A09E667, 0xBB67AE85 2049 .long 0x3C6EF372, 0xA54FF53A 2050 ROT16: 2051 .byte 2, 3, 0, 1, 6, 7, 4, 5, 10, 11, 8, 9, 14, 15, 12, 13 2052 ROT8: 2053 .byte 1, 2, 3, 0, 5, 6, 7, 4, 9, 10, 11, 8, 13, 14, 15, 12 2054 ADD0: 2055 .long 0, 1, 2, 3 2056 ADD1: 2057 .long 4, 4, 4, 4 2058 BLAKE3_IV_0: 2059 .long 0x6A09E667, 0x6A09E667, 0x6A09E667, 0x6A09E667 2060 BLAKE3_IV_1: 2061 .long 0xBB67AE85, 0xBB67AE85, 0xBB67AE85, 0xBB67AE85 2062 BLAKE3_IV_2: 2063 .long 0x3C6EF372, 0x3C6EF372, 0x3C6EF372, 0x3C6EF372 2064 BLAKE3_IV_3: 2065 .long 0xA54FF53A, 0xA54FF53A, 0xA54FF53A, 0xA54FF53A 2066 BLAKE3_BLOCK_LEN: 2067 .long 64, 64, 64, 64 2068 CMP_MSB_MASK: 2069 .long 0x80000000, 0x80000000, 0x80000000, 0x80000000