blake3_sse2_x86-64_windows_msvc.asm (70982B)
1 public _blake3_hash_many_sse2 2 public blake3_hash_many_sse2 3 public blake3_compress_in_place_sse2 4 public _blake3_compress_in_place_sse2 5 public blake3_compress_xof_sse2 6 public _blake3_compress_xof_sse2 7 8 _TEXT SEGMENT ALIGN(16) 'CODE' 9 10 ALIGN 16 11 blake3_hash_many_sse2 PROC 12 _blake3_hash_many_sse2 PROC 13 push r15 14 push r14 15 push r13 16 push r12 17 push rsi 18 push rdi 19 push rbx 20 push rbp 21 mov rbp, rsp 22 sub rsp, 528 23 and rsp, 0FFFFFFFFFFFFFFC0H 24 movdqa xmmword ptr [rsp+170H], xmm6 25 movdqa xmmword ptr [rsp+180H], xmm7 26 movdqa xmmword ptr [rsp+190H], xmm8 27 movdqa xmmword ptr [rsp+1A0H], xmm9 28 movdqa xmmword ptr [rsp+1B0H], xmm10 29 movdqa xmmword ptr [rsp+1C0H], xmm11 30 movdqa xmmword ptr [rsp+1D0H], xmm12 31 movdqa xmmword ptr [rsp+1E0H], xmm13 32 movdqa xmmword ptr [rsp+1F0H], xmm14 33 movdqa xmmword ptr [rsp+200H], xmm15 34 mov rdi, rcx 35 mov rsi, rdx 36 mov rdx, r8 37 mov rcx, r9 38 mov r8, qword ptr [rbp+68H] 39 movzx r9, byte ptr [rbp+70H] 40 neg r9d 41 movd xmm0, r9d 42 pshufd xmm0, xmm0, 00H 43 movdqa xmmword ptr [rsp+130H], xmm0 44 movdqa xmm1, xmm0 45 pand xmm1, xmmword ptr [ADD0] 46 pand xmm0, xmmword ptr [ADD1] 47 movdqa xmmword ptr [rsp+150H], xmm0 48 movd xmm0, r8d 49 pshufd xmm0, xmm0, 00H 50 paddd xmm0, xmm1 51 movdqa xmmword ptr [rsp+110H], xmm0 52 pxor xmm0, xmmword ptr [CMP_MSB_MASK] 53 pxor xmm1, xmmword ptr [CMP_MSB_MASK] 54 pcmpgtd xmm1, xmm0 55 shr r8, 32 56 movd xmm2, r8d 57 pshufd xmm2, xmm2, 00H 58 psubd xmm2, xmm1 59 movdqa xmmword ptr [rsp+120H], xmm2 60 mov rbx, qword ptr [rbp+90H] 61 mov r15, rdx 62 shl r15, 6 63 movzx r13d, byte ptr [rbp+78H] 64 movzx r12d, byte ptr [rbp+88H] 65 cmp rsi, 4 66 jc final3blocks 67 outerloop4: 68 movdqu xmm3, xmmword ptr [rcx] 69 pshufd xmm0, xmm3, 00H 70 pshufd xmm1, xmm3, 55H 71 pshufd xmm2, xmm3, 0AAH 72 pshufd xmm3, xmm3, 0FFH 73 movdqu xmm7, xmmword ptr [rcx+10H] 74 pshufd xmm4, xmm7, 00H 75 pshufd xmm5, xmm7, 55H 76 pshufd xmm6, xmm7, 0AAH 77 pshufd xmm7, xmm7, 0FFH 78 mov r8, qword ptr [rdi] 79 mov r9, qword ptr [rdi+8H] 80 mov r10, qword ptr [rdi+10H] 81 mov r11, qword ptr [rdi+18H] 82 movzx eax, byte ptr [rbp+80H] 83 or eax, r13d 84 xor edx, edx 85 innerloop4: 86 mov r14d, eax 87 or eax, r12d 88 add rdx, 64 89 cmp rdx, r15 90 cmovne eax, r14d 91 movdqu xmm8, xmmword ptr [r8+rdx-40H] 92 movdqu xmm9, xmmword ptr [r9+rdx-40H] 93 movdqu xmm10, xmmword ptr [r10+rdx-40H] 94 movdqu xmm11, xmmword ptr [r11+rdx-40H] 95 movdqa xmm12, xmm8 96 punpckldq xmm8, xmm9 97 punpckhdq xmm12, xmm9 98 movdqa xmm14, xmm10 99 punpckldq xmm10, xmm11 100 punpckhdq xmm14, xmm11 101 movdqa xmm9, xmm8 102 punpcklqdq xmm8, xmm10 103 punpckhqdq xmm9, xmm10 104 movdqa xmm13, xmm12 105 punpcklqdq xmm12, xmm14 106 punpckhqdq xmm13, xmm14 107 movdqa xmmword ptr [rsp], xmm8 108 movdqa xmmword ptr [rsp+10H], xmm9 109 movdqa xmmword ptr [rsp+20H], xmm12 110 movdqa xmmword ptr [rsp+30H], xmm13 111 movdqu xmm8, xmmword ptr [r8+rdx-30H] 112 movdqu xmm9, xmmword ptr [r9+rdx-30H] 113 movdqu xmm10, xmmword ptr [r10+rdx-30H] 114 movdqu xmm11, xmmword ptr [r11+rdx-30H] 115 movdqa xmm12, xmm8 116 punpckldq xmm8, xmm9 117 punpckhdq xmm12, xmm9 118 movdqa xmm14, xmm10 119 punpckldq xmm10, xmm11 120 punpckhdq xmm14, xmm11 121 movdqa xmm9, xmm8 122 punpcklqdq xmm8, xmm10 123 punpckhqdq xmm9, xmm10 124 movdqa xmm13, xmm12 125 punpcklqdq xmm12, xmm14 126 punpckhqdq xmm13, xmm14 127 movdqa xmmword ptr [rsp+40H], xmm8 128 movdqa xmmword ptr [rsp+50H], xmm9 129 movdqa xmmword ptr [rsp+60H], xmm12 130 movdqa xmmword ptr [rsp+70H], xmm13 131 movdqu xmm8, xmmword ptr [r8+rdx-20H] 132 movdqu xmm9, xmmword ptr [r9+rdx-20H] 133 movdqu xmm10, xmmword ptr [r10+rdx-20H] 134 movdqu xmm11, xmmword ptr [r11+rdx-20H] 135 movdqa xmm12, xmm8 136 punpckldq xmm8, xmm9 137 punpckhdq xmm12, xmm9 138 movdqa xmm14, xmm10 139 punpckldq xmm10, xmm11 140 punpckhdq xmm14, xmm11 141 movdqa xmm9, xmm8 142 punpcklqdq xmm8, xmm10 143 punpckhqdq xmm9, xmm10 144 movdqa xmm13, xmm12 145 punpcklqdq xmm12, xmm14 146 punpckhqdq xmm13, xmm14 147 movdqa xmmword ptr [rsp+80H], xmm8 148 movdqa xmmword ptr [rsp+90H], xmm9 149 movdqa xmmword ptr [rsp+0A0H], xmm12 150 movdqa xmmword ptr [rsp+0B0H], xmm13 151 movdqu xmm8, xmmword ptr [r8+rdx-10H] 152 movdqu xmm9, xmmword ptr [r9+rdx-10H] 153 movdqu xmm10, xmmword ptr [r10+rdx-10H] 154 movdqu xmm11, xmmword ptr [r11+rdx-10H] 155 movdqa xmm12, xmm8 156 punpckldq xmm8, xmm9 157 punpckhdq xmm12, xmm9 158 movdqa xmm14, xmm10 159 punpckldq xmm10, xmm11 160 punpckhdq xmm14, xmm11 161 movdqa xmm9, xmm8 162 punpcklqdq xmm8, xmm10 163 punpckhqdq xmm9, xmm10 164 movdqa xmm13, xmm12 165 punpcklqdq xmm12, xmm14 166 punpckhqdq xmm13, xmm14 167 movdqa xmmword ptr [rsp+0C0H], xmm8 168 movdqa xmmword ptr [rsp+0D0H], xmm9 169 movdqa xmmword ptr [rsp+0E0H], xmm12 170 movdqa xmmword ptr [rsp+0F0H], xmm13 171 movdqa xmm9, xmmword ptr [BLAKE3_IV_1] 172 movdqa xmm10, xmmword ptr [BLAKE3_IV_2] 173 movdqa xmm11, xmmword ptr [BLAKE3_IV_3] 174 movdqa xmm12, xmmword ptr [rsp+110H] 175 movdqa xmm13, xmmword ptr [rsp+120H] 176 movdqa xmm14, xmmword ptr [BLAKE3_BLOCK_LEN] 177 movd xmm15, eax 178 pshufd xmm15, xmm15, 00H 179 prefetcht0 byte ptr [r8+rdx+80H] 180 prefetcht0 byte ptr [r9+rdx+80H] 181 prefetcht0 byte ptr [r10+rdx+80H] 182 prefetcht0 byte ptr [r11+rdx+80H] 183 paddd xmm0, xmmword ptr [rsp] 184 paddd xmm1, xmmword ptr [rsp+20H] 185 paddd xmm2, xmmword ptr [rsp+40H] 186 paddd xmm3, xmmword ptr [rsp+60H] 187 paddd xmm0, xmm4 188 paddd xmm1, xmm5 189 paddd xmm2, xmm6 190 paddd xmm3, xmm7 191 pxor xmm12, xmm0 192 pxor xmm13, xmm1 193 pxor xmm14, xmm2 194 pxor xmm15, xmm3 195 pshuflw xmm12, xmm12, 0B1H 196 pshufhw xmm12, xmm12, 0B1H 197 pshuflw xmm13, xmm13, 0B1H 198 pshufhw xmm13, xmm13, 0B1H 199 pshuflw xmm14, xmm14, 0B1H 200 pshufhw xmm14, xmm14, 0B1H 201 pshuflw xmm15, xmm15, 0B1H 202 pshufhw xmm15, xmm15, 0B1H 203 movdqa xmm8, xmmword ptr [BLAKE3_IV_0] 204 paddd xmm8, xmm12 205 paddd xmm9, xmm13 206 paddd xmm10, xmm14 207 paddd xmm11, xmm15 208 pxor xmm4, xmm8 209 pxor xmm5, xmm9 210 pxor xmm6, xmm10 211 pxor xmm7, xmm11 212 movdqa xmmword ptr [rsp+100H], xmm8 213 movdqa xmm8, xmm4 214 psrld xmm8, 12 215 pslld xmm4, 20 216 por xmm4, xmm8 217 movdqa xmm8, xmm5 218 psrld xmm8, 12 219 pslld xmm5, 20 220 por xmm5, xmm8 221 movdqa xmm8, xmm6 222 psrld xmm8, 12 223 pslld xmm6, 20 224 por xmm6, xmm8 225 movdqa xmm8, xmm7 226 psrld xmm8, 12 227 pslld xmm7, 20 228 por xmm7, xmm8 229 paddd xmm0, xmmword ptr [rsp+10H] 230 paddd xmm1, xmmword ptr [rsp+30H] 231 paddd xmm2, xmmword ptr [rsp+50H] 232 paddd xmm3, xmmword ptr [rsp+70H] 233 paddd xmm0, xmm4 234 paddd xmm1, xmm5 235 paddd xmm2, xmm6 236 paddd xmm3, xmm7 237 pxor xmm12, xmm0 238 pxor xmm13, xmm1 239 pxor xmm14, xmm2 240 pxor xmm15, xmm3 241 movdqa xmm8, xmm12 242 psrld xmm12, 8 243 pslld xmm8, 24 244 pxor xmm12, xmm8 245 movdqa xmm8, xmm13 246 psrld xmm13, 8 247 pslld xmm8, 24 248 pxor xmm13, xmm8 249 movdqa xmm8, xmm14 250 psrld xmm14, 8 251 pslld xmm8, 24 252 pxor xmm14, xmm8 253 movdqa xmm8, xmm15 254 psrld xmm15, 8 255 pslld xmm8, 24 256 pxor xmm15, xmm8 257 movdqa xmm8, xmmword ptr [rsp+100H] 258 paddd xmm8, xmm12 259 paddd xmm9, xmm13 260 paddd xmm10, xmm14 261 paddd xmm11, xmm15 262 pxor xmm4, xmm8 263 pxor xmm5, xmm9 264 pxor xmm6, xmm10 265 pxor xmm7, xmm11 266 movdqa xmmword ptr [rsp+100H], xmm8 267 movdqa xmm8, xmm4 268 psrld xmm8, 7 269 pslld xmm4, 25 270 por xmm4, xmm8 271 movdqa xmm8, xmm5 272 psrld xmm8, 7 273 pslld xmm5, 25 274 por xmm5, xmm8 275 movdqa xmm8, xmm6 276 psrld xmm8, 7 277 pslld xmm6, 25 278 por xmm6, xmm8 279 movdqa xmm8, xmm7 280 psrld xmm8, 7 281 pslld xmm7, 25 282 por xmm7, xmm8 283 paddd xmm0, xmmword ptr [rsp+80H] 284 paddd xmm1, xmmword ptr [rsp+0A0H] 285 paddd xmm2, xmmword ptr [rsp+0C0H] 286 paddd xmm3, xmmword ptr [rsp+0E0H] 287 paddd xmm0, xmm5 288 paddd xmm1, xmm6 289 paddd xmm2, xmm7 290 paddd xmm3, xmm4 291 pxor xmm15, xmm0 292 pxor xmm12, xmm1 293 pxor xmm13, xmm2 294 pxor xmm14, xmm3 295 pshuflw xmm15, xmm15, 0B1H 296 pshufhw xmm15, xmm15, 0B1H 297 pshuflw xmm12, xmm12, 0B1H 298 pshufhw xmm12, xmm12, 0B1H 299 pshuflw xmm13, xmm13, 0B1H 300 pshufhw xmm13, xmm13, 0B1H 301 pshuflw xmm14, xmm14, 0B1H 302 pshufhw xmm14, xmm14, 0B1H 303 paddd xmm10, xmm15 304 paddd xmm11, xmm12 305 movdqa xmm8, xmmword ptr [rsp+100H] 306 paddd xmm8, xmm13 307 paddd xmm9, xmm14 308 pxor xmm5, xmm10 309 pxor xmm6, xmm11 310 pxor xmm7, xmm8 311 pxor xmm4, xmm9 312 movdqa xmmword ptr [rsp+100H], xmm8 313 movdqa xmm8, xmm5 314 psrld xmm8, 12 315 pslld xmm5, 20 316 por xmm5, xmm8 317 movdqa xmm8, xmm6 318 psrld xmm8, 12 319 pslld xmm6, 20 320 por xmm6, xmm8 321 movdqa xmm8, xmm7 322 psrld xmm8, 12 323 pslld xmm7, 20 324 por xmm7, xmm8 325 movdqa xmm8, xmm4 326 psrld xmm8, 12 327 pslld xmm4, 20 328 por xmm4, xmm8 329 paddd xmm0, xmmword ptr [rsp+90H] 330 paddd xmm1, xmmword ptr [rsp+0B0H] 331 paddd xmm2, xmmword ptr [rsp+0D0H] 332 paddd xmm3, xmmword ptr [rsp+0F0H] 333 paddd xmm0, xmm5 334 paddd xmm1, xmm6 335 paddd xmm2, xmm7 336 paddd xmm3, xmm4 337 pxor xmm15, xmm0 338 pxor xmm12, xmm1 339 pxor xmm13, xmm2 340 pxor xmm14, xmm3 341 movdqa xmm8, xmm15 342 psrld xmm15, 8 343 pslld xmm8, 24 344 pxor xmm15, xmm8 345 movdqa xmm8, xmm12 346 psrld xmm12, 8 347 pslld xmm8, 24 348 pxor xmm12, xmm8 349 movdqa xmm8, xmm13 350 psrld xmm13, 8 351 pslld xmm8, 24 352 pxor xmm13, xmm8 353 movdqa xmm8, xmm14 354 psrld xmm14, 8 355 pslld xmm8, 24 356 pxor xmm14, xmm8 357 paddd xmm10, xmm15 358 paddd xmm11, xmm12 359 movdqa xmm8, xmmword ptr [rsp+100H] 360 paddd xmm8, xmm13 361 paddd xmm9, xmm14 362 pxor xmm5, xmm10 363 pxor xmm6, xmm11 364 pxor xmm7, xmm8 365 pxor xmm4, xmm9 366 movdqa xmmword ptr [rsp+100H], xmm8 367 movdqa xmm8, xmm5 368 psrld xmm8, 7 369 pslld xmm5, 25 370 por xmm5, xmm8 371 movdqa xmm8, xmm6 372 psrld xmm8, 7 373 pslld xmm6, 25 374 por xmm6, xmm8 375 movdqa xmm8, xmm7 376 psrld xmm8, 7 377 pslld xmm7, 25 378 por xmm7, xmm8 379 movdqa xmm8, xmm4 380 psrld xmm8, 7 381 pslld xmm4, 25 382 por xmm4, xmm8 383 paddd xmm0, xmmword ptr [rsp+20H] 384 paddd xmm1, xmmword ptr [rsp+30H] 385 paddd xmm2, xmmword ptr [rsp+70H] 386 paddd xmm3, xmmword ptr [rsp+40H] 387 paddd xmm0, xmm4 388 paddd xmm1, xmm5 389 paddd xmm2, xmm6 390 paddd xmm3, xmm7 391 pxor xmm12, xmm0 392 pxor xmm13, xmm1 393 pxor xmm14, xmm2 394 pxor xmm15, xmm3 395 pshuflw xmm12, xmm12, 0B1H 396 pshufhw xmm12, xmm12, 0B1H 397 pshuflw xmm13, xmm13, 0B1H 398 pshufhw xmm13, xmm13, 0B1H 399 pshuflw xmm14, xmm14, 0B1H 400 pshufhw xmm14, xmm14, 0B1H 401 pshuflw xmm15, xmm15, 0B1H 402 pshufhw xmm15, xmm15, 0B1H 403 movdqa xmm8, xmmword ptr [rsp+100H] 404 paddd xmm8, xmm12 405 paddd xmm9, xmm13 406 paddd xmm10, xmm14 407 paddd xmm11, xmm15 408 pxor xmm4, xmm8 409 pxor xmm5, xmm9 410 pxor xmm6, xmm10 411 pxor xmm7, xmm11 412 movdqa xmmword ptr [rsp+100H], xmm8 413 movdqa xmm8, xmm4 414 psrld xmm8, 12 415 pslld xmm4, 20 416 por xmm4, xmm8 417 movdqa xmm8, xmm5 418 psrld xmm8, 12 419 pslld xmm5, 20 420 por xmm5, xmm8 421 movdqa xmm8, xmm6 422 psrld xmm8, 12 423 pslld xmm6, 20 424 por xmm6, xmm8 425 movdqa xmm8, xmm7 426 psrld xmm8, 12 427 pslld xmm7, 20 428 por xmm7, xmm8 429 paddd xmm0, xmmword ptr [rsp+60H] 430 paddd xmm1, xmmword ptr [rsp+0A0H] 431 paddd xmm2, xmmword ptr [rsp] 432 paddd xmm3, xmmword ptr [rsp+0D0H] 433 paddd xmm0, xmm4 434 paddd xmm1, xmm5 435 paddd xmm2, xmm6 436 paddd xmm3, xmm7 437 pxor xmm12, xmm0 438 pxor xmm13, xmm1 439 pxor xmm14, xmm2 440 pxor xmm15, xmm3 441 movdqa xmm8, xmm12 442 psrld xmm12, 8 443 pslld xmm8, 24 444 pxor xmm12, xmm8 445 movdqa xmm8, xmm13 446 psrld xmm13, 8 447 pslld xmm8, 24 448 pxor xmm13, xmm8 449 movdqa xmm8, xmm14 450 psrld xmm14, 8 451 pslld xmm8, 24 452 pxor xmm14, xmm8 453 movdqa xmm8, xmm15 454 psrld xmm15, 8 455 pslld xmm8, 24 456 pxor xmm15, xmm8 457 movdqa xmm8, xmmword ptr [rsp+100H] 458 paddd xmm8, xmm12 459 paddd xmm9, xmm13 460 paddd xmm10, xmm14 461 paddd xmm11, xmm15 462 pxor xmm4, xmm8 463 pxor xmm5, xmm9 464 pxor xmm6, xmm10 465 pxor xmm7, xmm11 466 movdqa xmmword ptr [rsp+100H], xmm8 467 movdqa xmm8, xmm4 468 psrld xmm8, 7 469 pslld xmm4, 25 470 por xmm4, xmm8 471 movdqa xmm8, xmm5 472 psrld xmm8, 7 473 pslld xmm5, 25 474 por xmm5, xmm8 475 movdqa xmm8, xmm6 476 psrld xmm8, 7 477 pslld xmm6, 25 478 por xmm6, xmm8 479 movdqa xmm8, xmm7 480 psrld xmm8, 7 481 pslld xmm7, 25 482 por xmm7, xmm8 483 paddd xmm0, xmmword ptr [rsp+10H] 484 paddd xmm1, xmmword ptr [rsp+0C0H] 485 paddd xmm2, xmmword ptr [rsp+90H] 486 paddd xmm3, xmmword ptr [rsp+0F0H] 487 paddd xmm0, xmm5 488 paddd xmm1, xmm6 489 paddd xmm2, xmm7 490 paddd xmm3, xmm4 491 pxor xmm15, xmm0 492 pxor xmm12, xmm1 493 pxor xmm13, xmm2 494 pxor xmm14, xmm3 495 pshuflw xmm15, xmm15, 0B1H 496 pshufhw xmm15, xmm15, 0B1H 497 pshuflw xmm12, xmm12, 0B1H 498 pshufhw xmm12, xmm12, 0B1H 499 pshuflw xmm13, xmm13, 0B1H 500 pshufhw xmm13, xmm13, 0B1H 501 pshuflw xmm14, xmm14, 0B1H 502 pshufhw xmm14, xmm14, 0B1H 503 paddd xmm10, xmm15 504 paddd xmm11, xmm12 505 movdqa xmm8, xmmword ptr [rsp+100H] 506 paddd xmm8, xmm13 507 paddd xmm9, xmm14 508 pxor xmm5, xmm10 509 pxor xmm6, xmm11 510 pxor xmm7, xmm8 511 pxor xmm4, xmm9 512 movdqa xmmword ptr [rsp+100H], xmm8 513 movdqa xmm8, xmm5 514 psrld xmm8, 12 515 pslld xmm5, 20 516 por xmm5, xmm8 517 movdqa xmm8, xmm6 518 psrld xmm8, 12 519 pslld xmm6, 20 520 por xmm6, xmm8 521 movdqa xmm8, xmm7 522 psrld xmm8, 12 523 pslld xmm7, 20 524 por xmm7, xmm8 525 movdqa xmm8, xmm4 526 psrld xmm8, 12 527 pslld xmm4, 20 528 por xmm4, xmm8 529 paddd xmm0, xmmword ptr [rsp+0B0H] 530 paddd xmm1, xmmword ptr [rsp+50H] 531 paddd xmm2, xmmword ptr [rsp+0E0H] 532 paddd xmm3, xmmword ptr [rsp+80H] 533 paddd xmm0, xmm5 534 paddd xmm1, xmm6 535 paddd xmm2, xmm7 536 paddd xmm3, xmm4 537 pxor xmm15, xmm0 538 pxor xmm12, xmm1 539 pxor xmm13, xmm2 540 pxor xmm14, xmm3 541 movdqa xmm8, xmm15 542 psrld xmm15, 8 543 pslld xmm8, 24 544 pxor xmm15, xmm8 545 movdqa xmm8, xmm12 546 psrld xmm12, 8 547 pslld xmm8, 24 548 pxor xmm12, xmm8 549 movdqa xmm8, xmm13 550 psrld xmm13, 8 551 pslld xmm8, 24 552 pxor xmm13, xmm8 553 movdqa xmm8, xmm14 554 psrld xmm14, 8 555 pslld xmm8, 24 556 pxor xmm14, xmm8 557 paddd xmm10, xmm15 558 paddd xmm11, xmm12 559 movdqa xmm8, xmmword ptr [rsp+100H] 560 paddd xmm8, xmm13 561 paddd xmm9, xmm14 562 pxor xmm5, xmm10 563 pxor xmm6, xmm11 564 pxor xmm7, xmm8 565 pxor xmm4, xmm9 566 movdqa xmmword ptr [rsp+100H], xmm8 567 movdqa xmm8, xmm5 568 psrld xmm8, 7 569 pslld xmm5, 25 570 por xmm5, xmm8 571 movdqa xmm8, xmm6 572 psrld xmm8, 7 573 pslld xmm6, 25 574 por xmm6, xmm8 575 movdqa xmm8, xmm7 576 psrld xmm8, 7 577 pslld xmm7, 25 578 por xmm7, xmm8 579 movdqa xmm8, xmm4 580 psrld xmm8, 7 581 pslld xmm4, 25 582 por xmm4, xmm8 583 paddd xmm0, xmmword ptr [rsp+30H] 584 paddd xmm1, xmmword ptr [rsp+0A0H] 585 paddd xmm2, xmmword ptr [rsp+0D0H] 586 paddd xmm3, xmmword ptr [rsp+70H] 587 paddd xmm0, xmm4 588 paddd xmm1, xmm5 589 paddd xmm2, xmm6 590 paddd xmm3, xmm7 591 pxor xmm12, xmm0 592 pxor xmm13, xmm1 593 pxor xmm14, xmm2 594 pxor xmm15, xmm3 595 pshuflw xmm12, xmm12, 0B1H 596 pshufhw xmm12, xmm12, 0B1H 597 pshuflw xmm13, xmm13, 0B1H 598 pshufhw xmm13, xmm13, 0B1H 599 pshuflw xmm14, xmm14, 0B1H 600 pshufhw xmm14, xmm14, 0B1H 601 pshuflw xmm15, xmm15, 0B1H 602 pshufhw xmm15, xmm15, 0B1H 603 movdqa xmm8, xmmword ptr [rsp+100H] 604 paddd xmm8, xmm12 605 paddd xmm9, xmm13 606 paddd xmm10, xmm14 607 paddd xmm11, xmm15 608 pxor xmm4, xmm8 609 pxor xmm5, xmm9 610 pxor xmm6, xmm10 611 pxor xmm7, xmm11 612 movdqa xmmword ptr [rsp+100H], xmm8 613 movdqa xmm8, xmm4 614 psrld xmm8, 12 615 pslld xmm4, 20 616 por xmm4, xmm8 617 movdqa xmm8, xmm5 618 psrld xmm8, 12 619 pslld xmm5, 20 620 por xmm5, xmm8 621 movdqa xmm8, xmm6 622 psrld xmm8, 12 623 pslld xmm6, 20 624 por xmm6, xmm8 625 movdqa xmm8, xmm7 626 psrld xmm8, 12 627 pslld xmm7, 20 628 por xmm7, xmm8 629 paddd xmm0, xmmword ptr [rsp+40H] 630 paddd xmm1, xmmword ptr [rsp+0C0H] 631 paddd xmm2, xmmword ptr [rsp+20H] 632 paddd xmm3, xmmword ptr [rsp+0E0H] 633 paddd xmm0, xmm4 634 paddd xmm1, xmm5 635 paddd xmm2, xmm6 636 paddd xmm3, xmm7 637 pxor xmm12, xmm0 638 pxor xmm13, xmm1 639 pxor xmm14, xmm2 640 pxor xmm15, xmm3 641 movdqa xmm8, xmm12 642 psrld xmm12, 8 643 pslld xmm8, 24 644 pxor xmm12, xmm8 645 movdqa xmm8, xmm13 646 psrld xmm13, 8 647 pslld xmm8, 24 648 pxor xmm13, xmm8 649 movdqa xmm8, xmm14 650 psrld xmm14, 8 651 pslld xmm8, 24 652 pxor xmm14, xmm8 653 movdqa xmm8, xmm15 654 psrld xmm15, 8 655 pslld xmm8, 24 656 pxor xmm15, xmm8 657 movdqa xmm8, xmmword ptr [rsp+100H] 658 paddd xmm8, xmm12 659 paddd xmm9, xmm13 660 paddd xmm10, xmm14 661 paddd xmm11, xmm15 662 pxor xmm4, xmm8 663 pxor xmm5, xmm9 664 pxor xmm6, xmm10 665 pxor xmm7, xmm11 666 movdqa xmmword ptr [rsp+100H], xmm8 667 movdqa xmm8, xmm4 668 psrld xmm8, 7 669 pslld xmm4, 25 670 por xmm4, xmm8 671 movdqa xmm8, xmm5 672 psrld xmm8, 7 673 pslld xmm5, 25 674 por xmm5, xmm8 675 movdqa xmm8, xmm6 676 psrld xmm8, 7 677 pslld xmm6, 25 678 por xmm6, xmm8 679 movdqa xmm8, xmm7 680 psrld xmm8, 7 681 pslld xmm7, 25 682 por xmm7, xmm8 683 paddd xmm0, xmmword ptr [rsp+60H] 684 paddd xmm1, xmmword ptr [rsp+90H] 685 paddd xmm2, xmmword ptr [rsp+0B0H] 686 paddd xmm3, xmmword ptr [rsp+80H] 687 paddd xmm0, xmm5 688 paddd xmm1, xmm6 689 paddd xmm2, xmm7 690 paddd xmm3, xmm4 691 pxor xmm15, xmm0 692 pxor xmm12, xmm1 693 pxor xmm13, xmm2 694 pxor xmm14, xmm3 695 pshuflw xmm15, xmm15, 0B1H 696 pshufhw xmm15, xmm15, 0B1H 697 pshuflw xmm12, xmm12, 0B1H 698 pshufhw xmm12, xmm12, 0B1H 699 pshuflw xmm13, xmm13, 0B1H 700 pshufhw xmm13, xmm13, 0B1H 701 pshuflw xmm14, xmm14, 0B1H 702 pshufhw xmm14, xmm14, 0B1H 703 paddd xmm10, xmm15 704 paddd xmm11, xmm12 705 movdqa xmm8, xmmword ptr [rsp+100H] 706 paddd xmm8, xmm13 707 paddd xmm9, xmm14 708 pxor xmm5, xmm10 709 pxor xmm6, xmm11 710 pxor xmm7, xmm8 711 pxor xmm4, xmm9 712 movdqa xmmword ptr [rsp+100H], xmm8 713 movdqa xmm8, xmm5 714 psrld xmm8, 12 715 pslld xmm5, 20 716 por xmm5, xmm8 717 movdqa xmm8, xmm6 718 psrld xmm8, 12 719 pslld xmm6, 20 720 por xmm6, xmm8 721 movdqa xmm8, xmm7 722 psrld xmm8, 12 723 pslld xmm7, 20 724 por xmm7, xmm8 725 movdqa xmm8, xmm4 726 psrld xmm8, 12 727 pslld xmm4, 20 728 por xmm4, xmm8 729 paddd xmm0, xmmword ptr [rsp+50H] 730 paddd xmm1, xmmword ptr [rsp] 731 paddd xmm2, xmmword ptr [rsp+0F0H] 732 paddd xmm3, xmmword ptr [rsp+10H] 733 paddd xmm0, xmm5 734 paddd xmm1, xmm6 735 paddd xmm2, xmm7 736 paddd xmm3, xmm4 737 pxor xmm15, xmm0 738 pxor xmm12, xmm1 739 pxor xmm13, xmm2 740 pxor xmm14, xmm3 741 movdqa xmm8, xmm15 742 psrld xmm15, 8 743 pslld xmm8, 24 744 pxor xmm15, xmm8 745 movdqa xmm8, xmm12 746 psrld xmm12, 8 747 pslld xmm8, 24 748 pxor xmm12, xmm8 749 movdqa xmm8, xmm13 750 psrld xmm13, 8 751 pslld xmm8, 24 752 pxor xmm13, xmm8 753 movdqa xmm8, xmm14 754 psrld xmm14, 8 755 pslld xmm8, 24 756 pxor xmm14, xmm8 757 paddd xmm10, xmm15 758 paddd xmm11, xmm12 759 movdqa xmm8, xmmword ptr [rsp+100H] 760 paddd xmm8, xmm13 761 paddd xmm9, xmm14 762 pxor xmm5, xmm10 763 pxor xmm6, xmm11 764 pxor xmm7, xmm8 765 pxor xmm4, xmm9 766 movdqa xmmword ptr [rsp+100H], xmm8 767 movdqa xmm8, xmm5 768 psrld xmm8, 7 769 pslld xmm5, 25 770 por xmm5, xmm8 771 movdqa xmm8, xmm6 772 psrld xmm8, 7 773 pslld xmm6, 25 774 por xmm6, xmm8 775 movdqa xmm8, xmm7 776 psrld xmm8, 7 777 pslld xmm7, 25 778 por xmm7, xmm8 779 movdqa xmm8, xmm4 780 psrld xmm8, 7 781 pslld xmm4, 25 782 por xmm4, xmm8 783 paddd xmm0, xmmword ptr [rsp+0A0H] 784 paddd xmm1, xmmword ptr [rsp+0C0H] 785 paddd xmm2, xmmword ptr [rsp+0E0H] 786 paddd xmm3, xmmword ptr [rsp+0D0H] 787 paddd xmm0, xmm4 788 paddd xmm1, xmm5 789 paddd xmm2, xmm6 790 paddd xmm3, xmm7 791 pxor xmm12, xmm0 792 pxor xmm13, xmm1 793 pxor xmm14, xmm2 794 pxor xmm15, xmm3 795 pshuflw xmm12, xmm12, 0B1H 796 pshufhw xmm12, xmm12, 0B1H 797 pshuflw xmm13, xmm13, 0B1H 798 pshufhw xmm13, xmm13, 0B1H 799 pshuflw xmm14, xmm14, 0B1H 800 pshufhw xmm14, xmm14, 0B1H 801 pshuflw xmm15, xmm15, 0B1H 802 pshufhw xmm15, xmm15, 0B1H 803 movdqa xmm8, xmmword ptr [rsp+100H] 804 paddd xmm8, xmm12 805 paddd xmm9, xmm13 806 paddd xmm10, xmm14 807 paddd xmm11, xmm15 808 pxor xmm4, xmm8 809 pxor xmm5, xmm9 810 pxor xmm6, xmm10 811 pxor xmm7, xmm11 812 movdqa xmmword ptr [rsp+100H], xmm8 813 movdqa xmm8, xmm4 814 psrld xmm8, 12 815 pslld xmm4, 20 816 por xmm4, xmm8 817 movdqa xmm8, xmm5 818 psrld xmm8, 12 819 pslld xmm5, 20 820 por xmm5, xmm8 821 movdqa xmm8, xmm6 822 psrld xmm8, 12 823 pslld xmm6, 20 824 por xmm6, xmm8 825 movdqa xmm8, xmm7 826 psrld xmm8, 12 827 pslld xmm7, 20 828 por xmm7, xmm8 829 paddd xmm0, xmmword ptr [rsp+70H] 830 paddd xmm1, xmmword ptr [rsp+90H] 831 paddd xmm2, xmmword ptr [rsp+30H] 832 paddd xmm3, xmmword ptr [rsp+0F0H] 833 paddd xmm0, xmm4 834 paddd xmm1, xmm5 835 paddd xmm2, xmm6 836 paddd xmm3, xmm7 837 pxor xmm12, xmm0 838 pxor xmm13, xmm1 839 pxor xmm14, xmm2 840 pxor xmm15, xmm3 841 movdqa xmm8, xmm12 842 psrld xmm12, 8 843 pslld xmm8, 24 844 pxor xmm12, xmm8 845 movdqa xmm8, xmm13 846 psrld xmm13, 8 847 pslld xmm8, 24 848 pxor xmm13, xmm8 849 movdqa xmm8, xmm14 850 psrld xmm14, 8 851 pslld xmm8, 24 852 pxor xmm14, xmm8 853 movdqa xmm8, xmm15 854 psrld xmm15, 8 855 pslld xmm8, 24 856 pxor xmm15, xmm8 857 movdqa xmm8, xmmword ptr [rsp+100H] 858 paddd xmm8, xmm12 859 paddd xmm9, xmm13 860 paddd xmm10, xmm14 861 paddd xmm11, xmm15 862 pxor xmm4, xmm8 863 pxor xmm5, xmm9 864 pxor xmm6, xmm10 865 pxor xmm7, xmm11 866 movdqa xmmword ptr [rsp+100H], xmm8 867 movdqa xmm8, xmm4 868 psrld xmm8, 7 869 pslld xmm4, 25 870 por xmm4, xmm8 871 movdqa xmm8, xmm5 872 psrld xmm8, 7 873 pslld xmm5, 25 874 por xmm5, xmm8 875 movdqa xmm8, xmm6 876 psrld xmm8, 7 877 pslld xmm6, 25 878 por xmm6, xmm8 879 movdqa xmm8, xmm7 880 psrld xmm8, 7 881 pslld xmm7, 25 882 por xmm7, xmm8 883 paddd xmm0, xmmword ptr [rsp+40H] 884 paddd xmm1, xmmword ptr [rsp+0B0H] 885 paddd xmm2, xmmword ptr [rsp+50H] 886 paddd xmm3, xmmword ptr [rsp+10H] 887 paddd xmm0, xmm5 888 paddd xmm1, xmm6 889 paddd xmm2, xmm7 890 paddd xmm3, xmm4 891 pxor xmm15, xmm0 892 pxor xmm12, xmm1 893 pxor xmm13, xmm2 894 pxor xmm14, xmm3 895 pshuflw xmm15, xmm15, 0B1H 896 pshufhw xmm15, xmm15, 0B1H 897 pshuflw xmm12, xmm12, 0B1H 898 pshufhw xmm12, xmm12, 0B1H 899 pshuflw xmm13, xmm13, 0B1H 900 pshufhw xmm13, xmm13, 0B1H 901 pshuflw xmm14, xmm14, 0B1H 902 pshufhw xmm14, xmm14, 0B1H 903 paddd xmm10, xmm15 904 paddd xmm11, xmm12 905 movdqa xmm8, xmmword ptr [rsp+100H] 906 paddd xmm8, xmm13 907 paddd xmm9, xmm14 908 pxor xmm5, xmm10 909 pxor xmm6, xmm11 910 pxor xmm7, xmm8 911 pxor xmm4, xmm9 912 movdqa xmmword ptr [rsp+100H], xmm8 913 movdqa xmm8, xmm5 914 psrld xmm8, 12 915 pslld xmm5, 20 916 por xmm5, xmm8 917 movdqa xmm8, xmm6 918 psrld xmm8, 12 919 pslld xmm6, 20 920 por xmm6, xmm8 921 movdqa xmm8, xmm7 922 psrld xmm8, 12 923 pslld xmm7, 20 924 por xmm7, xmm8 925 movdqa xmm8, xmm4 926 psrld xmm8, 12 927 pslld xmm4, 20 928 por xmm4, xmm8 929 paddd xmm0, xmmword ptr [rsp] 930 paddd xmm1, xmmword ptr [rsp+20H] 931 paddd xmm2, xmmword ptr [rsp+80H] 932 paddd xmm3, xmmword ptr [rsp+60H] 933 paddd xmm0, xmm5 934 paddd xmm1, xmm6 935 paddd xmm2, xmm7 936 paddd xmm3, xmm4 937 pxor xmm15, xmm0 938 pxor xmm12, xmm1 939 pxor xmm13, xmm2 940 pxor xmm14, xmm3 941 movdqa xmm8, xmm15 942 psrld xmm15, 8 943 pslld xmm8, 24 944 pxor xmm15, xmm8 945 movdqa xmm8, xmm12 946 psrld xmm12, 8 947 pslld xmm8, 24 948 pxor xmm12, xmm8 949 movdqa xmm8, xmm13 950 psrld xmm13, 8 951 pslld xmm8, 24 952 pxor xmm13, xmm8 953 movdqa xmm8, xmm14 954 psrld xmm14, 8 955 pslld xmm8, 24 956 pxor xmm14, xmm8 957 paddd xmm10, xmm15 958 paddd xmm11, xmm12 959 movdqa xmm8, xmmword ptr [rsp+100H] 960 paddd xmm8, xmm13 961 paddd xmm9, xmm14 962 pxor xmm5, xmm10 963 pxor xmm6, xmm11 964 pxor xmm7, xmm8 965 pxor xmm4, xmm9 966 movdqa xmmword ptr [rsp+100H], xmm8 967 movdqa xmm8, xmm5 968 psrld xmm8, 7 969 pslld xmm5, 25 970 por xmm5, xmm8 971 movdqa xmm8, xmm6 972 psrld xmm8, 7 973 pslld xmm6, 25 974 por xmm6, xmm8 975 movdqa xmm8, xmm7 976 psrld xmm8, 7 977 pslld xmm7, 25 978 por xmm7, xmm8 979 movdqa xmm8, xmm4 980 psrld xmm8, 7 981 pslld xmm4, 25 982 por xmm4, xmm8 983 paddd xmm0, xmmword ptr [rsp+0C0H] 984 paddd xmm1, xmmword ptr [rsp+90H] 985 paddd xmm2, xmmword ptr [rsp+0F0H] 986 paddd xmm3, xmmword ptr [rsp+0E0H] 987 paddd xmm0, xmm4 988 paddd xmm1, xmm5 989 paddd xmm2, xmm6 990 paddd xmm3, xmm7 991 pxor xmm12, xmm0 992 pxor xmm13, xmm1 993 pxor xmm14, xmm2 994 pxor xmm15, xmm3 995 pshuflw xmm12, xmm12, 0B1H 996 pshufhw xmm12, xmm12, 0B1H 997 pshuflw xmm13, xmm13, 0B1H 998 pshufhw xmm13, xmm13, 0B1H 999 pshuflw xmm14, xmm14, 0B1H 1000 pshufhw xmm14, xmm14, 0B1H 1001 pshuflw xmm15, xmm15, 0B1H 1002 pshufhw xmm15, xmm15, 0B1H 1003 movdqa xmm8, xmmword ptr [rsp+100H] 1004 paddd xmm8, xmm12 1005 paddd xmm9, xmm13 1006 paddd xmm10, xmm14 1007 paddd xmm11, xmm15 1008 pxor xmm4, xmm8 1009 pxor xmm5, xmm9 1010 pxor xmm6, xmm10 1011 pxor xmm7, xmm11 1012 movdqa xmmword ptr [rsp+100H], xmm8 1013 movdqa xmm8, xmm4 1014 psrld xmm8, 12 1015 pslld xmm4, 20 1016 por xmm4, xmm8 1017 movdqa xmm8, xmm5 1018 psrld xmm8, 12 1019 pslld xmm5, 20 1020 por xmm5, xmm8 1021 movdqa xmm8, xmm6 1022 psrld xmm8, 12 1023 pslld xmm6, 20 1024 por xmm6, xmm8 1025 movdqa xmm8, xmm7 1026 psrld xmm8, 12 1027 pslld xmm7, 20 1028 por xmm7, xmm8 1029 paddd xmm0, xmmword ptr [rsp+0D0H] 1030 paddd xmm1, xmmword ptr [rsp+0B0H] 1031 paddd xmm2, xmmword ptr [rsp+0A0H] 1032 paddd xmm3, xmmword ptr [rsp+80H] 1033 paddd xmm0, xmm4 1034 paddd xmm1, xmm5 1035 paddd xmm2, xmm6 1036 paddd xmm3, xmm7 1037 pxor xmm12, xmm0 1038 pxor xmm13, xmm1 1039 pxor xmm14, xmm2 1040 pxor xmm15, xmm3 1041 movdqa xmm8, xmm12 1042 psrld xmm12, 8 1043 pslld xmm8, 24 1044 pxor xmm12, xmm8 1045 movdqa xmm8, xmm13 1046 psrld xmm13, 8 1047 pslld xmm8, 24 1048 pxor xmm13, xmm8 1049 movdqa xmm8, xmm14 1050 psrld xmm14, 8 1051 pslld xmm8, 24 1052 pxor xmm14, xmm8 1053 movdqa xmm8, xmm15 1054 psrld xmm15, 8 1055 pslld xmm8, 24 1056 pxor xmm15, xmm8 1057 movdqa xmm8, xmmword ptr [rsp+100H] 1058 paddd xmm8, xmm12 1059 paddd xmm9, xmm13 1060 paddd xmm10, xmm14 1061 paddd xmm11, xmm15 1062 pxor xmm4, xmm8 1063 pxor xmm5, xmm9 1064 pxor xmm6, xmm10 1065 pxor xmm7, xmm11 1066 movdqa xmmword ptr [rsp+100H], xmm8 1067 movdqa xmm8, xmm4 1068 psrld xmm8, 7 1069 pslld xmm4, 25 1070 por xmm4, xmm8 1071 movdqa xmm8, xmm5 1072 psrld xmm8, 7 1073 pslld xmm5, 25 1074 por xmm5, xmm8 1075 movdqa xmm8, xmm6 1076 psrld xmm8, 7 1077 pslld xmm6, 25 1078 por xmm6, xmm8 1079 movdqa xmm8, xmm7 1080 psrld xmm8, 7 1081 pslld xmm7, 25 1082 por xmm7, xmm8 1083 paddd xmm0, xmmword ptr [rsp+70H] 1084 paddd xmm1, xmmword ptr [rsp+50H] 1085 paddd xmm2, xmmword ptr [rsp] 1086 paddd xmm3, xmmword ptr [rsp+60H] 1087 paddd xmm0, xmm5 1088 paddd xmm1, xmm6 1089 paddd xmm2, xmm7 1090 paddd xmm3, xmm4 1091 pxor xmm15, xmm0 1092 pxor xmm12, xmm1 1093 pxor xmm13, xmm2 1094 pxor xmm14, xmm3 1095 pshuflw xmm15, xmm15, 0B1H 1096 pshufhw xmm15, xmm15, 0B1H 1097 pshuflw xmm12, xmm12, 0B1H 1098 pshufhw xmm12, xmm12, 0B1H 1099 pshuflw xmm13, xmm13, 0B1H 1100 pshufhw xmm13, xmm13, 0B1H 1101 pshuflw xmm14, xmm14, 0B1H 1102 pshufhw xmm14, xmm14, 0B1H 1103 paddd xmm10, xmm15 1104 paddd xmm11, xmm12 1105 movdqa xmm8, xmmword ptr [rsp+100H] 1106 paddd xmm8, xmm13 1107 paddd xmm9, xmm14 1108 pxor xmm5, xmm10 1109 pxor xmm6, xmm11 1110 pxor xmm7, xmm8 1111 pxor xmm4, xmm9 1112 movdqa xmmword ptr [rsp+100H], xmm8 1113 movdqa xmm8, xmm5 1114 psrld xmm8, 12 1115 pslld xmm5, 20 1116 por xmm5, xmm8 1117 movdqa xmm8, xmm6 1118 psrld xmm8, 12 1119 pslld xmm6, 20 1120 por xmm6, xmm8 1121 movdqa xmm8, xmm7 1122 psrld xmm8, 12 1123 pslld xmm7, 20 1124 por xmm7, xmm8 1125 movdqa xmm8, xmm4 1126 psrld xmm8, 12 1127 pslld xmm4, 20 1128 por xmm4, xmm8 1129 paddd xmm0, xmmword ptr [rsp+20H] 1130 paddd xmm1, xmmword ptr [rsp+30H] 1131 paddd xmm2, xmmword ptr [rsp+10H] 1132 paddd xmm3, xmmword ptr [rsp+40H] 1133 paddd xmm0, xmm5 1134 paddd xmm1, xmm6 1135 paddd xmm2, xmm7 1136 paddd xmm3, xmm4 1137 pxor xmm15, xmm0 1138 pxor xmm12, xmm1 1139 pxor xmm13, xmm2 1140 pxor xmm14, xmm3 1141 movdqa xmm8, xmm15 1142 psrld xmm15, 8 1143 pslld xmm8, 24 1144 pxor xmm15, xmm8 1145 movdqa xmm8, xmm12 1146 psrld xmm12, 8 1147 pslld xmm8, 24 1148 pxor xmm12, xmm8 1149 movdqa xmm8, xmm13 1150 psrld xmm13, 8 1151 pslld xmm8, 24 1152 pxor xmm13, xmm8 1153 movdqa xmm8, xmm14 1154 psrld xmm14, 8 1155 pslld xmm8, 24 1156 pxor xmm14, xmm8 1157 paddd xmm10, xmm15 1158 paddd xmm11, xmm12 1159 movdqa xmm8, xmmword ptr [rsp+100H] 1160 paddd xmm8, xmm13 1161 paddd xmm9, xmm14 1162 pxor xmm5, xmm10 1163 pxor xmm6, xmm11 1164 pxor xmm7, xmm8 1165 pxor xmm4, xmm9 1166 movdqa xmmword ptr [rsp+100H], xmm8 1167 movdqa xmm8, xmm5 1168 psrld xmm8, 7 1169 pslld xmm5, 25 1170 por xmm5, xmm8 1171 movdqa xmm8, xmm6 1172 psrld xmm8, 7 1173 pslld xmm6, 25 1174 por xmm6, xmm8 1175 movdqa xmm8, xmm7 1176 psrld xmm8, 7 1177 pslld xmm7, 25 1178 por xmm7, xmm8 1179 movdqa xmm8, xmm4 1180 psrld xmm8, 7 1181 pslld xmm4, 25 1182 por xmm4, xmm8 1183 paddd xmm0, xmmword ptr [rsp+90H] 1184 paddd xmm1, xmmword ptr [rsp+0B0H] 1185 paddd xmm2, xmmword ptr [rsp+80H] 1186 paddd xmm3, xmmword ptr [rsp+0F0H] 1187 paddd xmm0, xmm4 1188 paddd xmm1, xmm5 1189 paddd xmm2, xmm6 1190 paddd xmm3, xmm7 1191 pxor xmm12, xmm0 1192 pxor xmm13, xmm1 1193 pxor xmm14, xmm2 1194 pxor xmm15, xmm3 1195 pshuflw xmm12, xmm12, 0B1H 1196 pshufhw xmm12, xmm12, 0B1H 1197 pshuflw xmm13, xmm13, 0B1H 1198 pshufhw xmm13, xmm13, 0B1H 1199 pshuflw xmm14, xmm14, 0B1H 1200 pshufhw xmm14, xmm14, 0B1H 1201 pshuflw xmm15, xmm15, 0B1H 1202 pshufhw xmm15, xmm15, 0B1H 1203 movdqa xmm8, xmmword ptr [rsp+100H] 1204 paddd xmm8, xmm12 1205 paddd xmm9, xmm13 1206 paddd xmm10, xmm14 1207 paddd xmm11, xmm15 1208 pxor xmm4, xmm8 1209 pxor xmm5, xmm9 1210 pxor xmm6, xmm10 1211 pxor xmm7, xmm11 1212 movdqa xmmword ptr [rsp+100H], xmm8 1213 movdqa xmm8, xmm4 1214 psrld xmm8, 12 1215 pslld xmm4, 20 1216 por xmm4, xmm8 1217 movdqa xmm8, xmm5 1218 psrld xmm8, 12 1219 pslld xmm5, 20 1220 por xmm5, xmm8 1221 movdqa xmm8, xmm6 1222 psrld xmm8, 12 1223 pslld xmm6, 20 1224 por xmm6, xmm8 1225 movdqa xmm8, xmm7 1226 psrld xmm8, 12 1227 pslld xmm7, 20 1228 por xmm7, xmm8 1229 paddd xmm0, xmmword ptr [rsp+0E0H] 1230 paddd xmm1, xmmword ptr [rsp+50H] 1231 paddd xmm2, xmmword ptr [rsp+0C0H] 1232 paddd xmm3, xmmword ptr [rsp+10H] 1233 paddd xmm0, xmm4 1234 paddd xmm1, xmm5 1235 paddd xmm2, xmm6 1236 paddd xmm3, xmm7 1237 pxor xmm12, xmm0 1238 pxor xmm13, xmm1 1239 pxor xmm14, xmm2 1240 pxor xmm15, xmm3 1241 movdqa xmm8, xmm12 1242 psrld xmm12, 8 1243 pslld xmm8, 24 1244 pxor xmm12, xmm8 1245 movdqa xmm8, xmm13 1246 psrld xmm13, 8 1247 pslld xmm8, 24 1248 pxor xmm13, xmm8 1249 movdqa xmm8, xmm14 1250 psrld xmm14, 8 1251 pslld xmm8, 24 1252 pxor xmm14, xmm8 1253 movdqa xmm8, xmm15 1254 psrld xmm15, 8 1255 pslld xmm8, 24 1256 pxor xmm15, xmm8 1257 movdqa xmm8, xmmword ptr [rsp+100H] 1258 paddd xmm8, xmm12 1259 paddd xmm9, xmm13 1260 paddd xmm10, xmm14 1261 paddd xmm11, xmm15 1262 pxor xmm4, xmm8 1263 pxor xmm5, xmm9 1264 pxor xmm6, xmm10 1265 pxor xmm7, xmm11 1266 movdqa xmmword ptr [rsp+100H], xmm8 1267 movdqa xmm8, xmm4 1268 psrld xmm8, 7 1269 pslld xmm4, 25 1270 por xmm4, xmm8 1271 movdqa xmm8, xmm5 1272 psrld xmm8, 7 1273 pslld xmm5, 25 1274 por xmm5, xmm8 1275 movdqa xmm8, xmm6 1276 psrld xmm8, 7 1277 pslld xmm6, 25 1278 por xmm6, xmm8 1279 movdqa xmm8, xmm7 1280 psrld xmm8, 7 1281 pslld xmm7, 25 1282 por xmm7, xmm8 1283 paddd xmm0, xmmword ptr [rsp+0D0H] 1284 paddd xmm1, xmmword ptr [rsp] 1285 paddd xmm2, xmmword ptr [rsp+20H] 1286 paddd xmm3, xmmword ptr [rsp+40H] 1287 paddd xmm0, xmm5 1288 paddd xmm1, xmm6 1289 paddd xmm2, xmm7 1290 paddd xmm3, xmm4 1291 pxor xmm15, xmm0 1292 pxor xmm12, xmm1 1293 pxor xmm13, xmm2 1294 pxor xmm14, xmm3 1295 pshuflw xmm15, xmm15, 0B1H 1296 pshufhw xmm15, xmm15, 0B1H 1297 pshuflw xmm12, xmm12, 0B1H 1298 pshufhw xmm12, xmm12, 0B1H 1299 pshuflw xmm13, xmm13, 0B1H 1300 pshufhw xmm13, xmm13, 0B1H 1301 pshuflw xmm14, xmm14, 0B1H 1302 pshufhw xmm14, xmm14, 0B1H 1303 paddd xmm10, xmm15 1304 paddd xmm11, xmm12 1305 movdqa xmm8, xmmword ptr [rsp+100H] 1306 paddd xmm8, xmm13 1307 paddd xmm9, xmm14 1308 pxor xmm5, xmm10 1309 pxor xmm6, xmm11 1310 pxor xmm7, xmm8 1311 pxor xmm4, xmm9 1312 movdqa xmmword ptr [rsp+100H], xmm8 1313 movdqa xmm8, xmm5 1314 psrld xmm8, 12 1315 pslld xmm5, 20 1316 por xmm5, xmm8 1317 movdqa xmm8, xmm6 1318 psrld xmm8, 12 1319 pslld xmm6, 20 1320 por xmm6, xmm8 1321 movdqa xmm8, xmm7 1322 psrld xmm8, 12 1323 pslld xmm7, 20 1324 por xmm7, xmm8 1325 movdqa xmm8, xmm4 1326 psrld xmm8, 12 1327 pslld xmm4, 20 1328 por xmm4, xmm8 1329 paddd xmm0, xmmword ptr [rsp+30H] 1330 paddd xmm1, xmmword ptr [rsp+0A0H] 1331 paddd xmm2, xmmword ptr [rsp+60H] 1332 paddd xmm3, xmmword ptr [rsp+70H] 1333 paddd xmm0, xmm5 1334 paddd xmm1, xmm6 1335 paddd xmm2, xmm7 1336 paddd xmm3, xmm4 1337 pxor xmm15, xmm0 1338 pxor xmm12, xmm1 1339 pxor xmm13, xmm2 1340 pxor xmm14, xmm3 1341 movdqa xmm8, xmm15 1342 psrld xmm15, 8 1343 pslld xmm8, 24 1344 pxor xmm15, xmm8 1345 movdqa xmm8, xmm12 1346 psrld xmm12, 8 1347 pslld xmm8, 24 1348 pxor xmm12, xmm8 1349 movdqa xmm8, xmm13 1350 psrld xmm13, 8 1351 pslld xmm8, 24 1352 pxor xmm13, xmm8 1353 movdqa xmm8, xmm14 1354 psrld xmm14, 8 1355 pslld xmm8, 24 1356 pxor xmm14, xmm8 1357 paddd xmm10, xmm15 1358 paddd xmm11, xmm12 1359 movdqa xmm8, xmmword ptr [rsp+100H] 1360 paddd xmm8, xmm13 1361 paddd xmm9, xmm14 1362 pxor xmm5, xmm10 1363 pxor xmm6, xmm11 1364 pxor xmm7, xmm8 1365 pxor xmm4, xmm9 1366 movdqa xmmword ptr [rsp+100H], xmm8 1367 movdqa xmm8, xmm5 1368 psrld xmm8, 7 1369 pslld xmm5, 25 1370 por xmm5, xmm8 1371 movdqa xmm8, xmm6 1372 psrld xmm8, 7 1373 pslld xmm6, 25 1374 por xmm6, xmm8 1375 movdqa xmm8, xmm7 1376 psrld xmm8, 7 1377 pslld xmm7, 25 1378 por xmm7, xmm8 1379 movdqa xmm8, xmm4 1380 psrld xmm8, 7 1381 pslld xmm4, 25 1382 por xmm4, xmm8 1383 paddd xmm0, xmmword ptr [rsp+0B0H] 1384 paddd xmm1, xmmword ptr [rsp+50H] 1385 paddd xmm2, xmmword ptr [rsp+10H] 1386 paddd xmm3, xmmword ptr [rsp+80H] 1387 paddd xmm0, xmm4 1388 paddd xmm1, xmm5 1389 paddd xmm2, xmm6 1390 paddd xmm3, xmm7 1391 pxor xmm12, xmm0 1392 pxor xmm13, xmm1 1393 pxor xmm14, xmm2 1394 pxor xmm15, xmm3 1395 pshuflw xmm12, xmm12, 0B1H 1396 pshufhw xmm12, xmm12, 0B1H 1397 pshuflw xmm13, xmm13, 0B1H 1398 pshufhw xmm13, xmm13, 0B1H 1399 pshuflw xmm14, xmm14, 0B1H 1400 pshufhw xmm14, xmm14, 0B1H 1401 pshuflw xmm15, xmm15, 0B1H 1402 pshufhw xmm15, xmm15, 0B1H 1403 movdqa xmm8, xmmword ptr [rsp+100H] 1404 paddd xmm8, xmm12 1405 paddd xmm9, xmm13 1406 paddd xmm10, xmm14 1407 paddd xmm11, xmm15 1408 pxor xmm4, xmm8 1409 pxor xmm5, xmm9 1410 pxor xmm6, xmm10 1411 pxor xmm7, xmm11 1412 movdqa xmmword ptr [rsp+100H], xmm8 1413 movdqa xmm8, xmm4 1414 psrld xmm8, 12 1415 pslld xmm4, 20 1416 por xmm4, xmm8 1417 movdqa xmm8, xmm5 1418 psrld xmm8, 12 1419 pslld xmm5, 20 1420 por xmm5, xmm8 1421 movdqa xmm8, xmm6 1422 psrld xmm8, 12 1423 pslld xmm6, 20 1424 por xmm6, xmm8 1425 movdqa xmm8, xmm7 1426 psrld xmm8, 12 1427 pslld xmm7, 20 1428 por xmm7, xmm8 1429 paddd xmm0, xmmword ptr [rsp+0F0H] 1430 paddd xmm1, xmmword ptr [rsp] 1431 paddd xmm2, xmmword ptr [rsp+90H] 1432 paddd xmm3, xmmword ptr [rsp+60H] 1433 paddd xmm0, xmm4 1434 paddd xmm1, xmm5 1435 paddd xmm2, xmm6 1436 paddd xmm3, xmm7 1437 pxor xmm12, xmm0 1438 pxor xmm13, xmm1 1439 pxor xmm14, xmm2 1440 pxor xmm15, xmm3 1441 movdqa xmm8, xmm12 1442 psrld xmm12, 8 1443 pslld xmm8, 24 1444 pxor xmm12, xmm8 1445 movdqa xmm8, xmm13 1446 psrld xmm13, 8 1447 pslld xmm8, 24 1448 pxor xmm13, xmm8 1449 movdqa xmm8, xmm14 1450 psrld xmm14, 8 1451 pslld xmm8, 24 1452 pxor xmm14, xmm8 1453 movdqa xmm8, xmm15 1454 psrld xmm15, 8 1455 pslld xmm8, 24 1456 pxor xmm15, xmm8 1457 movdqa xmm8, xmmword ptr [rsp+100H] 1458 paddd xmm8, xmm12 1459 paddd xmm9, xmm13 1460 paddd xmm10, xmm14 1461 paddd xmm11, xmm15 1462 pxor xmm4, xmm8 1463 pxor xmm5, xmm9 1464 pxor xmm6, xmm10 1465 pxor xmm7, xmm11 1466 movdqa xmmword ptr [rsp+100H], xmm8 1467 movdqa xmm8, xmm4 1468 psrld xmm8, 7 1469 pslld xmm4, 25 1470 por xmm4, xmm8 1471 movdqa xmm8, xmm5 1472 psrld xmm8, 7 1473 pslld xmm5, 25 1474 por xmm5, xmm8 1475 movdqa xmm8, xmm6 1476 psrld xmm8, 7 1477 pslld xmm6, 25 1478 por xmm6, xmm8 1479 movdqa xmm8, xmm7 1480 psrld xmm8, 7 1481 pslld xmm7, 25 1482 por xmm7, xmm8 1483 paddd xmm0, xmmword ptr [rsp+0E0H] 1484 paddd xmm1, xmmword ptr [rsp+20H] 1485 paddd xmm2, xmmword ptr [rsp+30H] 1486 paddd xmm3, xmmword ptr [rsp+70H] 1487 paddd xmm0, xmm5 1488 paddd xmm1, xmm6 1489 paddd xmm2, xmm7 1490 paddd xmm3, xmm4 1491 pxor xmm15, xmm0 1492 pxor xmm12, xmm1 1493 pxor xmm13, xmm2 1494 pxor xmm14, xmm3 1495 pshuflw xmm15, xmm15, 0B1H 1496 pshufhw xmm15, xmm15, 0B1H 1497 pshuflw xmm12, xmm12, 0B1H 1498 pshufhw xmm12, xmm12, 0B1H 1499 pshuflw xmm13, xmm13, 0B1H 1500 pshufhw xmm13, xmm13, 0B1H 1501 pshuflw xmm14, xmm14, 0B1H 1502 pshufhw xmm14, xmm14, 0B1H 1503 paddd xmm10, xmm15 1504 paddd xmm11, xmm12 1505 movdqa xmm8, xmmword ptr [rsp+100H] 1506 paddd xmm8, xmm13 1507 paddd xmm9, xmm14 1508 pxor xmm5, xmm10 1509 pxor xmm6, xmm11 1510 pxor xmm7, xmm8 1511 pxor xmm4, xmm9 1512 movdqa xmmword ptr [rsp+100H], xmm8 1513 movdqa xmm8, xmm5 1514 psrld xmm8, 12 1515 pslld xmm5, 20 1516 por xmm5, xmm8 1517 movdqa xmm8, xmm6 1518 psrld xmm8, 12 1519 pslld xmm6, 20 1520 por xmm6, xmm8 1521 movdqa xmm8, xmm7 1522 psrld xmm8, 12 1523 pslld xmm7, 20 1524 por xmm7, xmm8 1525 movdqa xmm8, xmm4 1526 psrld xmm8, 12 1527 pslld xmm4, 20 1528 por xmm4, xmm8 1529 paddd xmm0, xmmword ptr [rsp+0A0H] 1530 paddd xmm1, xmmword ptr [rsp+0C0H] 1531 paddd xmm2, xmmword ptr [rsp+40H] 1532 paddd xmm3, xmmword ptr [rsp+0D0H] 1533 paddd xmm0, xmm5 1534 paddd xmm1, xmm6 1535 paddd xmm2, xmm7 1536 paddd xmm3, xmm4 1537 pxor xmm15, xmm0 1538 pxor xmm12, xmm1 1539 pxor xmm13, xmm2 1540 pxor xmm14, xmm3 1541 movdqa xmm8, xmm15 1542 psrld xmm15, 8 1543 pslld xmm8, 24 1544 pxor xmm15, xmm8 1545 movdqa xmm8, xmm12 1546 psrld xmm12, 8 1547 pslld xmm8, 24 1548 pxor xmm12, xmm8 1549 movdqa xmm8, xmm13 1550 psrld xmm13, 8 1551 pslld xmm8, 24 1552 pxor xmm13, xmm8 1553 movdqa xmm8, xmm14 1554 psrld xmm14, 8 1555 pslld xmm8, 24 1556 pxor xmm14, xmm8 1557 paddd xmm10, xmm15 1558 paddd xmm11, xmm12 1559 movdqa xmm8, xmmword ptr [rsp+100H] 1560 paddd xmm8, xmm13 1561 paddd xmm9, xmm14 1562 pxor xmm5, xmm10 1563 pxor xmm6, xmm11 1564 pxor xmm7, xmm8 1565 pxor xmm4, xmm9 1566 pxor xmm0, xmm8 1567 pxor xmm1, xmm9 1568 pxor xmm2, xmm10 1569 pxor xmm3, xmm11 1570 movdqa xmm8, xmm5 1571 psrld xmm8, 7 1572 pslld xmm5, 25 1573 por xmm5, xmm8 1574 movdqa xmm8, xmm6 1575 psrld xmm8, 7 1576 pslld xmm6, 25 1577 por xmm6, xmm8 1578 movdqa xmm8, xmm7 1579 psrld xmm8, 7 1580 pslld xmm7, 25 1581 por xmm7, xmm8 1582 movdqa xmm8, xmm4 1583 psrld xmm8, 7 1584 pslld xmm4, 25 1585 por xmm4, xmm8 1586 pxor xmm4, xmm12 1587 pxor xmm5, xmm13 1588 pxor xmm6, xmm14 1589 pxor xmm7, xmm15 1590 mov eax, r13d 1591 jne innerloop4 1592 movdqa xmm9, xmm0 1593 punpckldq xmm0, xmm1 1594 punpckhdq xmm9, xmm1 1595 movdqa xmm11, xmm2 1596 punpckldq xmm2, xmm3 1597 punpckhdq xmm11, xmm3 1598 movdqa xmm1, xmm0 1599 punpcklqdq xmm0, xmm2 1600 punpckhqdq xmm1, xmm2 1601 movdqa xmm3, xmm9 1602 punpcklqdq xmm9, xmm11 1603 punpckhqdq xmm3, xmm11 1604 movdqu xmmword ptr [rbx], xmm0 1605 movdqu xmmword ptr [rbx+20H], xmm1 1606 movdqu xmmword ptr [rbx+40H], xmm9 1607 movdqu xmmword ptr [rbx+60H], xmm3 1608 movdqa xmm9, xmm4 1609 punpckldq xmm4, xmm5 1610 punpckhdq xmm9, xmm5 1611 movdqa xmm11, xmm6 1612 punpckldq xmm6, xmm7 1613 punpckhdq xmm11, xmm7 1614 movdqa xmm5, xmm4 1615 punpcklqdq xmm4, xmm6 1616 punpckhqdq xmm5, xmm6 1617 movdqa xmm7, xmm9 1618 punpcklqdq xmm9, xmm11 1619 punpckhqdq xmm7, xmm11 1620 movdqu xmmword ptr [rbx+10H], xmm4 1621 movdqu xmmword ptr [rbx+30H], xmm5 1622 movdqu xmmword ptr [rbx+50H], xmm9 1623 movdqu xmmword ptr [rbx+70H], xmm7 1624 movdqa xmm1, xmmword ptr [rsp+110H] 1625 movdqa xmm0, xmm1 1626 paddd xmm1, xmmword ptr [rsp+150H] 1627 movdqa xmmword ptr [rsp+110H], xmm1 1628 pxor xmm0, xmmword ptr [CMP_MSB_MASK] 1629 pxor xmm1, xmmword ptr [CMP_MSB_MASK] 1630 pcmpgtd xmm0, xmm1 1631 movdqa xmm1, xmmword ptr [rsp+120H] 1632 psubd xmm1, xmm0 1633 movdqa xmmword ptr [rsp+120H], xmm1 1634 add rbx, 128 1635 add rdi, 32 1636 sub rsi, 4 1637 cmp rsi, 4 1638 jnc outerloop4 1639 test rsi, rsi 1640 jne final3blocks 1641 unwind: 1642 movdqa xmm6, xmmword ptr [rsp+170H] 1643 movdqa xmm7, xmmword ptr [rsp+180H] 1644 movdqa xmm8, xmmword ptr [rsp+190H] 1645 movdqa xmm9, xmmword ptr [rsp+1A0H] 1646 movdqa xmm10, xmmword ptr [rsp+1B0H] 1647 movdqa xmm11, xmmword ptr [rsp+1C0H] 1648 movdqa xmm12, xmmword ptr [rsp+1D0H] 1649 movdqa xmm13, xmmword ptr [rsp+1E0H] 1650 movdqa xmm14, xmmword ptr [rsp+1F0H] 1651 movdqa xmm15, xmmword ptr [rsp+200H] 1652 mov rsp, rbp 1653 pop rbp 1654 pop rbx 1655 pop rdi 1656 pop rsi 1657 pop r12 1658 pop r13 1659 pop r14 1660 pop r15 1661 ret 1662 ALIGN 16 1663 final3blocks: 1664 test esi, 2H 1665 je final1block 1666 movups xmm0, xmmword ptr [rcx] 1667 movups xmm1, xmmword ptr [rcx+10H] 1668 movaps xmm8, xmm0 1669 movaps xmm9, xmm1 1670 movd xmm13, dword ptr [rsp+110H] 1671 movd xmm14, dword ptr [rsp+120H] 1672 punpckldq xmm13, xmm14 1673 movaps xmmword ptr [rsp], xmm13 1674 movd xmm14, dword ptr [rsp+114H] 1675 movd xmm13, dword ptr [rsp+124H] 1676 punpckldq xmm14, xmm13 1677 movaps xmmword ptr [rsp+10H], xmm14 1678 mov r8, qword ptr [rdi] 1679 mov r9, qword ptr [rdi+8H] 1680 movzx eax, byte ptr [rbp+80H] 1681 or eax, r13d 1682 xor edx, edx 1683 innerloop2: 1684 mov r14d, eax 1685 or eax, r12d 1686 add rdx, 64 1687 cmp rdx, r15 1688 cmovne eax, r14d 1689 movaps xmm2, xmmword ptr [BLAKE3_IV] 1690 movaps xmm10, xmm2 1691 movups xmm4, xmmword ptr [r8+rdx-40H] 1692 movups xmm5, xmmword ptr [r8+rdx-30H] 1693 movaps xmm3, xmm4 1694 shufps xmm4, xmm5, 136 1695 shufps xmm3, xmm5, 221 1696 movaps xmm5, xmm3 1697 movups xmm6, xmmword ptr [r8+rdx-20H] 1698 movups xmm7, xmmword ptr [r8+rdx-10H] 1699 movaps xmm3, xmm6 1700 shufps xmm6, xmm7, 136 1701 pshufd xmm6, xmm6, 93H 1702 shufps xmm3, xmm7, 221 1703 pshufd xmm7, xmm3, 93H 1704 movups xmm12, xmmword ptr [r9+rdx-40H] 1705 movups xmm13, xmmword ptr [r9+rdx-30H] 1706 movaps xmm11, xmm12 1707 shufps xmm12, xmm13, 136 1708 shufps xmm11, xmm13, 221 1709 movaps xmm13, xmm11 1710 movups xmm14, xmmword ptr [r9+rdx-20H] 1711 movups xmm15, xmmword ptr [r9+rdx-10H] 1712 movaps xmm11, xmm14 1713 shufps xmm14, xmm15, 136 1714 pshufd xmm14, xmm14, 93H 1715 shufps xmm11, xmm15, 221 1716 pshufd xmm15, xmm11, 93H 1717 shl rax, 20H 1718 or rax, 40H 1719 movd xmm3, rax 1720 movdqa xmmword ptr [rsp+20H], xmm3 1721 movaps xmm3, xmmword ptr [rsp] 1722 movaps xmm11, xmmword ptr [rsp+10H] 1723 punpcklqdq xmm3, xmmword ptr [rsp+20H] 1724 punpcklqdq xmm11, xmmword ptr [rsp+20H] 1725 mov al, 7 1726 roundloop2: 1727 paddd xmm0, xmm4 1728 paddd xmm8, xmm12 1729 movaps xmmword ptr [rsp+20H], xmm4 1730 movaps xmmword ptr [rsp+30H], xmm12 1731 paddd xmm0, xmm1 1732 paddd xmm8, xmm9 1733 pxor xmm3, xmm0 1734 pxor xmm11, xmm8 1735 pshuflw xmm3, xmm3, 0B1H 1736 pshufhw xmm3, xmm3, 0B1H 1737 pshuflw xmm11, xmm11, 0B1H 1738 pshufhw xmm11, xmm11, 0B1H 1739 paddd xmm2, xmm3 1740 paddd xmm10, xmm11 1741 pxor xmm1, xmm2 1742 pxor xmm9, xmm10 1743 movdqa xmm4, xmm1 1744 pslld xmm1, 20 1745 psrld xmm4, 12 1746 por xmm1, xmm4 1747 movdqa xmm4, xmm9 1748 pslld xmm9, 20 1749 psrld xmm4, 12 1750 por xmm9, xmm4 1751 paddd xmm0, xmm5 1752 paddd xmm8, xmm13 1753 movaps xmmword ptr [rsp+40H], xmm5 1754 movaps xmmword ptr [rsp+50H], xmm13 1755 paddd xmm0, xmm1 1756 paddd xmm8, xmm9 1757 pxor xmm3, xmm0 1758 pxor xmm11, xmm8 1759 movdqa xmm13, xmm3 1760 psrld xmm3, 8 1761 pslld xmm13, 24 1762 pxor xmm3, xmm13 1763 movdqa xmm13, xmm11 1764 psrld xmm11, 8 1765 pslld xmm13, 24 1766 pxor xmm11, xmm13 1767 paddd xmm2, xmm3 1768 paddd xmm10, xmm11 1769 pxor xmm1, xmm2 1770 pxor xmm9, xmm10 1771 movdqa xmm4, xmm1 1772 pslld xmm1, 25 1773 psrld xmm4, 7 1774 por xmm1, xmm4 1775 movdqa xmm4, xmm9 1776 pslld xmm9, 25 1777 psrld xmm4, 7 1778 por xmm9, xmm4 1779 pshufd xmm0, xmm0, 93H 1780 pshufd xmm8, xmm8, 93H 1781 pshufd xmm3, xmm3, 4EH 1782 pshufd xmm11, xmm11, 4EH 1783 pshufd xmm2, xmm2, 39H 1784 pshufd xmm10, xmm10, 39H 1785 paddd xmm0, xmm6 1786 paddd xmm8, xmm14 1787 paddd xmm0, xmm1 1788 paddd xmm8, xmm9 1789 pxor xmm3, xmm0 1790 pxor xmm11, xmm8 1791 pshuflw xmm3, xmm3, 0B1H 1792 pshufhw xmm3, xmm3, 0B1H 1793 pshuflw xmm11, xmm11, 0B1H 1794 pshufhw xmm11, xmm11, 0B1H 1795 paddd xmm2, xmm3 1796 paddd xmm10, xmm11 1797 pxor xmm1, xmm2 1798 pxor xmm9, xmm10 1799 movdqa xmm4, xmm1 1800 pslld xmm1, 20 1801 psrld xmm4, 12 1802 por xmm1, xmm4 1803 movdqa xmm4, xmm9 1804 pslld xmm9, 20 1805 psrld xmm4, 12 1806 por xmm9, xmm4 1807 paddd xmm0, xmm7 1808 paddd xmm8, xmm15 1809 paddd xmm0, xmm1 1810 paddd xmm8, xmm9 1811 pxor xmm3, xmm0 1812 pxor xmm11, xmm8 1813 movdqa xmm13, xmm3 1814 psrld xmm3, 8 1815 pslld xmm13, 24 1816 pxor xmm3, xmm13 1817 movdqa xmm13, xmm11 1818 psrld xmm11, 8 1819 pslld xmm13, 24 1820 pxor xmm11, xmm13 1821 paddd xmm2, xmm3 1822 paddd xmm10, xmm11 1823 pxor xmm1, xmm2 1824 pxor xmm9, xmm10 1825 movdqa xmm4, xmm1 1826 pslld xmm1, 25 1827 psrld xmm4, 7 1828 por xmm1, xmm4 1829 movdqa xmm4, xmm9 1830 pslld xmm9, 25 1831 psrld xmm4, 7 1832 por xmm9, xmm4 1833 pshufd xmm0, xmm0, 39H 1834 pshufd xmm8, xmm8, 39H 1835 pshufd xmm3, xmm3, 4EH 1836 pshufd xmm11, xmm11, 4EH 1837 pshufd xmm2, xmm2, 93H 1838 pshufd xmm10, xmm10, 93H 1839 dec al 1840 je endroundloop2 1841 movdqa xmm12, xmmword ptr [rsp+20H] 1842 movdqa xmm5, xmmword ptr [rsp+40H] 1843 pshufd xmm13, xmm12, 0FH 1844 shufps xmm12, xmm5, 214 1845 pshufd xmm4, xmm12, 39H 1846 movdqa xmm12, xmm6 1847 shufps xmm12, xmm7, 250 1848 pand xmm13, xmmword ptr [PBLENDW_0x33_MASK] 1849 pand xmm12, xmmword ptr [PBLENDW_0xCC_MASK] 1850 por xmm13, xmm12 1851 movdqa xmmword ptr [rsp+20H], xmm13 1852 movdqa xmm12, xmm7 1853 punpcklqdq xmm12, xmm5 1854 movdqa xmm13, xmm6 1855 pand xmm12, xmmword ptr [PBLENDW_0x3F_MASK] 1856 pand xmm13, xmmword ptr [PBLENDW_0xC0_MASK] 1857 por xmm12, xmm13 1858 pshufd xmm12, xmm12, 78H 1859 punpckhdq xmm5, xmm7 1860 punpckldq xmm6, xmm5 1861 pshufd xmm7, xmm6, 1EH 1862 movdqa xmmword ptr [rsp+40H], xmm12 1863 movdqa xmm5, xmmword ptr [rsp+30H] 1864 movdqa xmm13, xmmword ptr [rsp+50H] 1865 pshufd xmm6, xmm5, 0FH 1866 shufps xmm5, xmm13, 214 1867 pshufd xmm12, xmm5, 39H 1868 movdqa xmm5, xmm14 1869 shufps xmm5, xmm15, 250 1870 pand xmm6, xmmword ptr [PBLENDW_0x33_MASK] 1871 pand xmm5, xmmword ptr [PBLENDW_0xCC_MASK] 1872 por xmm6, xmm5 1873 movdqa xmm5, xmm15 1874 punpcklqdq xmm5, xmm13 1875 movdqa xmmword ptr [rsp+30H], xmm2 1876 movdqa xmm2, xmm14 1877 pand xmm5, xmmword ptr [PBLENDW_0x3F_MASK] 1878 pand xmm2, xmmword ptr [PBLENDW_0xC0_MASK] 1879 por xmm5, xmm2 1880 movdqa xmm2, xmmword ptr [rsp+30H] 1881 pshufd xmm5, xmm5, 78H 1882 punpckhdq xmm13, xmm15 1883 punpckldq xmm14, xmm13 1884 pshufd xmm15, xmm14, 1EH 1885 movdqa xmm13, xmm6 1886 movdqa xmm14, xmm5 1887 movdqa xmm5, xmmword ptr [rsp+20H] 1888 movdqa xmm6, xmmword ptr [rsp+40H] 1889 jmp roundloop2 1890 endroundloop2: 1891 pxor xmm0, xmm2 1892 pxor xmm1, xmm3 1893 pxor xmm8, xmm10 1894 pxor xmm9, xmm11 1895 mov eax, r13d 1896 cmp rdx, r15 1897 jne innerloop2 1898 movups xmmword ptr [rbx], xmm0 1899 movups xmmword ptr [rbx+10H], xmm1 1900 movups xmmword ptr [rbx+20H], xmm8 1901 movups xmmword ptr [rbx+30H], xmm9 1902 mov eax, dword ptr [rsp+130H] 1903 neg eax 1904 mov r10d, dword ptr [rsp+110H+8*rax] 1905 mov r11d, dword ptr [rsp+120H+8*rax] 1906 mov dword ptr [rsp+110H], r10d 1907 mov dword ptr [rsp+120H], r11d 1908 add rdi, 16 1909 add rbx, 64 1910 sub rsi, 2 1911 final1block: 1912 test esi, 1H 1913 je unwind 1914 movups xmm0, xmmword ptr [rcx] 1915 movups xmm1, xmmword ptr [rcx+10H] 1916 movd xmm13, dword ptr [rsp+110H] 1917 movd xmm14, dword ptr [rsp+120H] 1918 punpckldq xmm13, xmm14 1919 mov r8, qword ptr [rdi] 1920 movzx eax, byte ptr [rbp+80H] 1921 or eax, r13d 1922 xor edx, edx 1923 innerloop1: 1924 mov r14d, eax 1925 or eax, r12d 1926 add rdx, 64 1927 cmp rdx, r15 1928 cmovne eax, r14d 1929 movaps xmm2, xmmword ptr [BLAKE3_IV] 1930 shl rax, 32 1931 or rax, 64 1932 movd xmm12, rax 1933 movdqa xmm3, xmm13 1934 punpcklqdq xmm3, xmm12 1935 movups xmm4, xmmword ptr [r8+rdx-40H] 1936 movups xmm5, xmmword ptr [r8+rdx-30H] 1937 movaps xmm8, xmm4 1938 shufps xmm4, xmm5, 136 1939 shufps xmm8, xmm5, 221 1940 movaps xmm5, xmm8 1941 movups xmm6, xmmword ptr [r8+rdx-20H] 1942 movups xmm7, xmmword ptr [r8+rdx-10H] 1943 movaps xmm8, xmm6 1944 shufps xmm6, xmm7, 136 1945 pshufd xmm6, xmm6, 93H 1946 shufps xmm8, xmm7, 221 1947 pshufd xmm7, xmm8, 93H 1948 mov al, 7 1949 roundloop1: 1950 paddd xmm0, xmm4 1951 paddd xmm0, xmm1 1952 pxor xmm3, xmm0 1953 pshuflw xmm3, xmm3, 0B1H 1954 pshufhw xmm3, xmm3, 0B1H 1955 paddd xmm2, xmm3 1956 pxor xmm1, xmm2 1957 movdqa xmm11, xmm1 1958 pslld xmm1, 20 1959 psrld xmm11, 12 1960 por xmm1, xmm11 1961 paddd xmm0, xmm5 1962 paddd xmm0, xmm1 1963 pxor xmm3, xmm0 1964 movdqa xmm14, xmm3 1965 psrld xmm3, 8 1966 pslld xmm14, 24 1967 pxor xmm3, xmm14 1968 paddd xmm2, xmm3 1969 pxor xmm1, xmm2 1970 movdqa xmm11, xmm1 1971 pslld xmm1, 25 1972 psrld xmm11, 7 1973 por xmm1, xmm11 1974 pshufd xmm0, xmm0, 93H 1975 pshufd xmm3, xmm3, 4EH 1976 pshufd xmm2, xmm2, 39H 1977 paddd xmm0, xmm6 1978 paddd xmm0, xmm1 1979 pxor xmm3, xmm0 1980 pshuflw xmm3, xmm3, 0B1H 1981 pshufhw xmm3, xmm3, 0B1H 1982 paddd xmm2, xmm3 1983 pxor xmm1, xmm2 1984 movdqa xmm11, xmm1 1985 pslld xmm1, 20 1986 psrld xmm11, 12 1987 por xmm1, xmm11 1988 paddd xmm0, xmm7 1989 paddd xmm0, xmm1 1990 pxor xmm3, xmm0 1991 movdqa xmm14, xmm3 1992 psrld xmm3, 8 1993 pslld xmm14, 24 1994 pxor xmm3, xmm14 1995 paddd xmm2, xmm3 1996 pxor xmm1, xmm2 1997 movdqa xmm11, xmm1 1998 pslld xmm1, 25 1999 psrld xmm11, 7 2000 por xmm1, xmm11 2001 pshufd xmm0, xmm0, 39H 2002 pshufd xmm3, xmm3, 4EH 2003 pshufd xmm2, xmm2, 93H 2004 dec al 2005 jz endroundloop1 2006 movdqa xmm8, xmm4 2007 shufps xmm8, xmm5, 214 2008 pshufd xmm9, xmm4, 0FH 2009 pshufd xmm4, xmm8, 39H 2010 movdqa xmm8, xmm6 2011 shufps xmm8, xmm7, 250 2012 pand xmm9, xmmword ptr [PBLENDW_0x33_MASK] 2013 pand xmm8, xmmword ptr [PBLENDW_0xCC_MASK] 2014 por xmm9, xmm8 2015 movdqa xmm8, xmm7 2016 punpcklqdq xmm8, xmm5 2017 movdqa xmm10, xmm6 2018 pand xmm8, xmmword ptr [PBLENDW_0x3F_MASK] 2019 pand xmm10, xmmword ptr [PBLENDW_0xC0_MASK] 2020 por xmm8, xmm10 2021 pshufd xmm8, xmm8, 78H 2022 punpckhdq xmm5, xmm7 2023 punpckldq xmm6, xmm5 2024 pshufd xmm7, xmm6, 1EH 2025 movdqa xmm5, xmm9 2026 movdqa xmm6, xmm8 2027 jmp roundloop1 2028 endroundloop1: 2029 pxor xmm0, xmm2 2030 pxor xmm1, xmm3 2031 mov eax, r13d 2032 cmp rdx, r15 2033 jne innerloop1 2034 movups xmmword ptr [rbx], xmm0 2035 movups xmmword ptr [rbx+10H], xmm1 2036 jmp unwind 2037 _blake3_hash_many_sse2 ENDP 2038 blake3_hash_many_sse2 ENDP 2039 2040 blake3_compress_in_place_sse2 PROC 2041 _blake3_compress_in_place_sse2 PROC 2042 sub rsp, 120 2043 movdqa xmmword ptr [rsp], xmm6 2044 movdqa xmmword ptr [rsp+10H], xmm7 2045 movdqa xmmword ptr [rsp+20H], xmm8 2046 movdqa xmmword ptr [rsp+30H], xmm9 2047 movdqa xmmword ptr [rsp+40H], xmm11 2048 movdqa xmmword ptr [rsp+50H], xmm14 2049 movdqa xmmword ptr [rsp+60H], xmm15 2050 movups xmm0, xmmword ptr [rcx] 2051 movups xmm1, xmmword ptr [rcx+10H] 2052 movaps xmm2, xmmword ptr [BLAKE3_IV] 2053 movzx eax, byte ptr [rsp+0A0H] 2054 movzx r8d, r8b 2055 shl rax, 32 2056 add r8, rax 2057 movq xmm3, r9 2058 movq xmm4, r8 2059 punpcklqdq xmm3, xmm4 2060 movups xmm4, xmmword ptr [rdx] 2061 movups xmm5, xmmword ptr [rdx+10H] 2062 movaps xmm8, xmm4 2063 shufps xmm4, xmm5, 136 2064 shufps xmm8, xmm5, 221 2065 movaps xmm5, xmm8 2066 movups xmm6, xmmword ptr [rdx+20H] 2067 movups xmm7, xmmword ptr [rdx+30H] 2068 movaps xmm8, xmm6 2069 shufps xmm6, xmm7, 136 2070 pshufd xmm6, xmm6, 93H 2071 shufps xmm8, xmm7, 221 2072 pshufd xmm7, xmm8, 93H 2073 mov al, 7 2074 @@: 2075 paddd xmm0, xmm4 2076 paddd xmm0, xmm1 2077 pxor xmm3, xmm0 2078 pshuflw xmm3, xmm3, 0B1H 2079 pshufhw xmm3, xmm3, 0B1H 2080 paddd xmm2, xmm3 2081 pxor xmm1, xmm2 2082 movdqa xmm11, xmm1 2083 pslld xmm1, 20 2084 psrld xmm11, 12 2085 por xmm1, xmm11 2086 paddd xmm0, xmm5 2087 paddd xmm0, xmm1 2088 pxor xmm3, xmm0 2089 movdqa xmm14, xmm3 2090 psrld xmm3, 8 2091 pslld xmm14, 24 2092 pxor xmm3, xmm14 2093 paddd xmm2, xmm3 2094 pxor xmm1, xmm2 2095 movdqa xmm11, xmm1 2096 pslld xmm1, 25 2097 psrld xmm11, 7 2098 por xmm1, xmm11 2099 pshufd xmm0, xmm0, 93H 2100 pshufd xmm3, xmm3, 4EH 2101 pshufd xmm2, xmm2, 39H 2102 paddd xmm0, xmm6 2103 paddd xmm0, xmm1 2104 pxor xmm3, xmm0 2105 pshuflw xmm3, xmm3, 0B1H 2106 pshufhw xmm3, xmm3, 0B1H 2107 paddd xmm2, xmm3 2108 pxor xmm1, xmm2 2109 movdqa xmm11, xmm1 2110 pslld xmm1, 20 2111 psrld xmm11, 12 2112 por xmm1, xmm11 2113 paddd xmm0, xmm7 2114 paddd xmm0, xmm1 2115 pxor xmm3, xmm0 2116 movdqa xmm14, xmm3 2117 psrld xmm3, 8 2118 pslld xmm14, 24 2119 pxor xmm3, xmm14 2120 paddd xmm2, xmm3 2121 pxor xmm1, xmm2 2122 movdqa xmm11, xmm1 2123 pslld xmm1, 25 2124 psrld xmm11, 7 2125 por xmm1, xmm11 2126 pshufd xmm0, xmm0, 39H 2127 pshufd xmm3, xmm3, 4EH 2128 pshufd xmm2, xmm2, 93H 2129 dec al 2130 jz @F 2131 movdqa xmm8, xmm4 2132 shufps xmm8, xmm5, 214 2133 pshufd xmm9, xmm4, 0FH 2134 pshufd xmm4, xmm8, 39H 2135 movdqa xmm8, xmm6 2136 shufps xmm8, xmm7, 250 2137 pand xmm9, xmmword ptr [PBLENDW_0x33_MASK] 2138 pand xmm8, xmmword ptr [PBLENDW_0xCC_MASK] 2139 por xmm9, xmm8 2140 movdqa xmm8, xmm7 2141 punpcklqdq xmm8, xmm5 2142 movdqa xmm10, xmm6 2143 pand xmm8, xmmword ptr [PBLENDW_0x3F_MASK] 2144 pand xmm10, xmmword ptr [PBLENDW_0xC0_MASK] 2145 por xmm8, xmm10 2146 pshufd xmm8, xmm8, 78H 2147 punpckhdq xmm5, xmm7 2148 punpckldq xmm6, xmm5 2149 pshufd xmm7, xmm6, 1EH 2150 movdqa xmm5, xmm9 2151 movdqa xmm6, xmm8 2152 jmp @B 2153 @@: 2154 pxor xmm0, xmm2 2155 pxor xmm1, xmm3 2156 movups xmmword ptr [rcx], xmm0 2157 movups xmmword ptr [rcx+10H], xmm1 2158 movdqa xmm6, xmmword ptr [rsp] 2159 movdqa xmm7, xmmword ptr [rsp+10H] 2160 movdqa xmm8, xmmword ptr [rsp+20H] 2161 movdqa xmm9, xmmword ptr [rsp+30H] 2162 movdqa xmm11, xmmword ptr [rsp+40H] 2163 movdqa xmm14, xmmword ptr [rsp+50H] 2164 movdqa xmm15, xmmword ptr [rsp+60H] 2165 add rsp, 120 2166 ret 2167 _blake3_compress_in_place_sse2 ENDP 2168 blake3_compress_in_place_sse2 ENDP 2169 2170 ALIGN 16 2171 blake3_compress_xof_sse2 PROC 2172 _blake3_compress_xof_sse2 PROC 2173 sub rsp, 120 2174 movdqa xmmword ptr [rsp], xmm6 2175 movdqa xmmword ptr [rsp+10H], xmm7 2176 movdqa xmmword ptr [rsp+20H], xmm8 2177 movdqa xmmword ptr [rsp+30H], xmm9 2178 movdqa xmmword ptr [rsp+40H], xmm11 2179 movdqa xmmword ptr [rsp+50H], xmm14 2180 movdqa xmmword ptr [rsp+60H], xmm15 2181 movups xmm0, xmmword ptr [rcx] 2182 movups xmm1, xmmword ptr [rcx+10H] 2183 movaps xmm2, xmmword ptr [BLAKE3_IV] 2184 movzx eax, byte ptr [rsp+0A0H] 2185 movzx r8d, r8b 2186 mov r10, qword ptr [rsp+0A8H] 2187 shl rax, 32 2188 add r8, rax 2189 movq xmm3, r9 2190 movq xmm4, r8 2191 punpcklqdq xmm3, xmm4 2192 movups xmm4, xmmword ptr [rdx] 2193 movups xmm5, xmmword ptr [rdx+10H] 2194 movaps xmm8, xmm4 2195 shufps xmm4, xmm5, 136 2196 shufps xmm8, xmm5, 221 2197 movaps xmm5, xmm8 2198 movups xmm6, xmmword ptr [rdx+20H] 2199 movups xmm7, xmmword ptr [rdx+30H] 2200 movaps xmm8, xmm6 2201 shufps xmm6, xmm7, 136 2202 pshufd xmm6, xmm6, 93H 2203 shufps xmm8, xmm7, 221 2204 pshufd xmm7, xmm8, 93H 2205 mov al, 7 2206 @@: 2207 paddd xmm0, xmm4 2208 paddd xmm0, xmm1 2209 pxor xmm3, xmm0 2210 pshuflw xmm3, xmm3, 0B1H 2211 pshufhw xmm3, xmm3, 0B1H 2212 paddd xmm2, xmm3 2213 pxor xmm1, xmm2 2214 movdqa xmm11, xmm1 2215 pslld xmm1, 20 2216 psrld xmm11, 12 2217 por xmm1, xmm11 2218 paddd xmm0, xmm5 2219 paddd xmm0, xmm1 2220 pxor xmm3, xmm0 2221 movdqa xmm14, xmm3 2222 psrld xmm3, 8 2223 pslld xmm14, 24 2224 pxor xmm3, xmm14 2225 paddd xmm2, xmm3 2226 pxor xmm1, xmm2 2227 movdqa xmm11, xmm1 2228 pslld xmm1, 25 2229 psrld xmm11, 7 2230 por xmm1, xmm11 2231 pshufd xmm0, xmm0, 93H 2232 pshufd xmm3, xmm3, 4EH 2233 pshufd xmm2, xmm2, 39H 2234 paddd xmm0, xmm6 2235 paddd xmm0, xmm1 2236 pxor xmm3, xmm0 2237 pshuflw xmm3, xmm3, 0B1H 2238 pshufhw xmm3, xmm3, 0B1H 2239 paddd xmm2, xmm3 2240 pxor xmm1, xmm2 2241 movdqa xmm11, xmm1 2242 pslld xmm1, 20 2243 psrld xmm11, 12 2244 por xmm1, xmm11 2245 paddd xmm0, xmm7 2246 paddd xmm0, xmm1 2247 pxor xmm3, xmm0 2248 movdqa xmm14, xmm3 2249 psrld xmm3, 8 2250 pslld xmm14, 24 2251 pxor xmm3, xmm14 2252 paddd xmm2, xmm3 2253 pxor xmm1, xmm2 2254 movdqa xmm11, xmm1 2255 pslld xmm1, 25 2256 psrld xmm11, 7 2257 por xmm1, xmm11 2258 pshufd xmm0, xmm0, 39H 2259 pshufd xmm3, xmm3, 4EH 2260 pshufd xmm2, xmm2, 93H 2261 dec al 2262 jz @F 2263 movdqa xmm8, xmm4 2264 shufps xmm8, xmm5, 214 2265 pshufd xmm9, xmm4, 0FH 2266 pshufd xmm4, xmm8, 39H 2267 movdqa xmm8, xmm6 2268 shufps xmm8, xmm7, 250 2269 pand xmm9, xmmword ptr [PBLENDW_0x33_MASK] 2270 pand xmm8, xmmword ptr [PBLENDW_0xCC_MASK] 2271 por xmm9, xmm8 2272 movdqa xmm8, xmm7 2273 punpcklqdq xmm8, xmm5 2274 movdqa xmm10, xmm6 2275 pand xmm8, xmmword ptr [PBLENDW_0x3F_MASK] 2276 pand xmm10, xmmword ptr [PBLENDW_0xC0_MASK] 2277 por xmm8, xmm10 2278 pshufd xmm8, xmm8, 78H 2279 punpckhdq xmm5, xmm7 2280 punpckldq xmm6, xmm5 2281 pshufd xmm7, xmm6, 1EH 2282 movdqa xmm5, xmm9 2283 movdqa xmm6, xmm8 2284 jmp @B 2285 @@: 2286 movdqu xmm4, xmmword ptr [rcx] 2287 movdqu xmm5, xmmword ptr [rcx+10H] 2288 pxor xmm0, xmm2 2289 pxor xmm1, xmm3 2290 pxor xmm2, xmm4 2291 pxor xmm3, xmm5 2292 movups xmmword ptr [r10], xmm0 2293 movups xmmword ptr [r10+10H], xmm1 2294 movups xmmword ptr [r10+20H], xmm2 2295 movups xmmword ptr [r10+30H], xmm3 2296 movdqa xmm6, xmmword ptr [rsp] 2297 movdqa xmm7, xmmword ptr [rsp+10H] 2298 movdqa xmm8, xmmword ptr [rsp+20H] 2299 movdqa xmm9, xmmword ptr [rsp+30H] 2300 movdqa xmm11, xmmword ptr [rsp+40H] 2301 movdqa xmm14, xmmword ptr [rsp+50H] 2302 movdqa xmm15, xmmword ptr [rsp+60H] 2303 add rsp, 120 2304 ret 2305 _blake3_compress_xof_sse2 ENDP 2306 blake3_compress_xof_sse2 ENDP 2307 2308 _TEXT ENDS 2309 2310 2311 _RDATA SEGMENT READONLY PAGE ALIAS(".rdata") 'CONST' 2312 ALIGN 64 2313 BLAKE3_IV: 2314 dd 6A09E667H, 0BB67AE85H, 3C6EF372H, 0A54FF53AH 2315 2316 ADD0: 2317 dd 0, 1, 2, 3 2318 2319 ADD1: 2320 dd 4 dup (4) 2321 2322 BLAKE3_IV_0: 2323 dd 4 dup (6A09E667H) 2324 2325 BLAKE3_IV_1: 2326 dd 4 dup (0BB67AE85H) 2327 2328 BLAKE3_IV_2: 2329 dd 4 dup (3C6EF372H) 2330 2331 BLAKE3_IV_3: 2332 dd 4 dup (0A54FF53AH) 2333 2334 BLAKE3_BLOCK_LEN: 2335 dd 4 dup (64) 2336 2337 CMP_MSB_MASK: 2338 dd 8 dup(80000000H) 2339 2340 PBLENDW_0x33_MASK: 2341 dd 0FFFFFFFFH, 000000000H, 0FFFFFFFFH, 000000000H 2342 PBLENDW_0xCC_MASK: 2343 dd 000000000H, 0FFFFFFFFH, 000000000H, 0FFFFFFFFH 2344 PBLENDW_0x3F_MASK: 2345 dd 0FFFFFFFFH, 0FFFFFFFFH, 0FFFFFFFFH, 000000000H 2346 PBLENDW_0xC0_MASK: 2347 dd 000000000H, 000000000H, 000000000H, 0FFFFFFFFH 2348 2349 _RDATA ENDS 2350 END