aboutsummaryrefslogtreecommitdiffhomepage
path: root/test/avx10/old.txt
diff options
context:
space:
mode:
Diffstat (limited to 'test/avx10/old.txt')
-rw-r--r--test/avx10/old.txt657
1 files changed, 657 insertions, 0 deletions
diff --git a/test/avx10/old.txt b/test/avx10/old.txt
new file mode 100644
index 0000000..9e4f097
--- /dev/null
+++ b/test/avx10/old.txt
@@ -0,0 +1,657 @@
+v4fmaddps(zmm1, zmm8, ptr [rdx + 64]);
+v4fmaddss(xmm15, xmm8, ptr [rax + 64]);
+v4fnmaddps(zmm5 | k5, zmm2, ptr [rcx + 0x80]);
+v4fnmaddss(xmm31, xmm2, ptr [rsp + 0x80]);
+vp4dpwssd(zmm23 | k7 | T_z, zmm1, ptr [rax + 64]);
+vp4dpwssds(zmm10 | k4, zmm3, ptr [rsp + rax * 4 + 64]);
+vaesdec(xmm20, xmm30, ptr [rcx + 64]);
+vaesdec(ymm1, ymm2, ptr [rcx + 64]);
+vaesdec(zmm1, zmm2, ptr [rcx + 64]);
+vaesdeclast(xmm20, xmm30, ptr [rax + 64]);
+vaesdeclast(ymm20, ymm30, ptr [rax + 64]);
+vaesdeclast(zmm20, zmm30, ptr [rax + 64]);
+vaesenc(xmm20, xmm30, ptr [rcx + 64]);
+vaesenc(ymm1, ymm2, ptr [rcx + 64]);
+vaesenc(zmm1, zmm2, ptr [rcx + 64]);
+vaesenclast(xmm20, xmm30, ptr [rax + 64]);
+vaesenclast(ymm20, ymm30, ptr [rax + 64]);
+vaesenclast(zmm20, zmm30, ptr [rax + 64]);
+vpclmulqdq(xmm2, xmm3, ptr [rax + 64], 3);
+vpclmulqdq(ymm2, ymm3, ptr [rax + 64], 3);
+vpclmulqdq(zmm2, zmm3, ptr [rax + 64], 3);
+vpclmulqdq(xmm20, xmm3, ptr [rax + 64], 3);
+vpclmulqdq(ymm20, ymm3, ptr [rax + 64], 3);
+vpclmulqdq(zmm20, zmm3, ptr [rax + 64], 3);
+vpcompressb(ptr[rax + 64], xmm1);
+vpcompressb(xmm30 | k5, xmm1);
+vpcompressb(ptr[rax + 64], ymm1);
+vpcompressb(ymm30 | k3 |T_z, ymm1);
+vpcompressb(ptr[rax + 64], zmm1);
+vpcompressb(zmm30 | k2 |T_z, zmm1);
+vpcompressw(ptr[rax + 64], xmm1);
+vpcompressw(xmm30 | k5, xmm1);
+vpcompressw(ptr[rax + 64], ymm1);
+vpcompressw(ymm30 | k3 |T_z, ymm1);
+vpcompressw(ptr[rax + 64], zmm1);
+vpcompressw(zmm30 | k2 |T_z, zmm1);
+vpshldw(xmm5|k3|T_z, xmm2, ptr [rax + 0x40], 5);
+vpshldw(ymm5|k3|T_z, ymm2, ptr [rax + 0x40], 5);
+vpshldw(zmm5|k3|T_z, zmm2, ptr [rax + 0x40], 5);
+vpshldd(xmm5|k3|T_z, xmm2, ptr [rax + 0x40], 5);
+vpshldd(ymm5|k3|T_z, ymm2, ptr [rax + 0x40], 5);
+vpshldd(zmm5|k3|T_z, zmm2, ptr [rax + 0x40], 5);
+vpshldq(xmm5|k3|T_z, xmm2, ptr [rax + 0x40], 5);
+vpshldq(ymm5|k3|T_z, ymm2, ptr [rax + 0x40], 5);
+vpshldq(zmm5|k3|T_z, zmm2, ptr [rax + 0x40], 5);
+vpshldvw(xmm5|k3|T_z, xmm2, ptr [rax + 0x40]);
+vpshldvw(ymm5|k3|T_z, ymm2, ptr [rax + 0x40]);
+vpshldvw(zmm5|k3|T_z, zmm2, ptr [rax + 0x40]);
+vpshldvd(xmm5|k3|T_z, xmm2, ptr [rax + 0x40]);
+vpshldvd(ymm5|k3|T_z, ymm2, ptr [rax + 0x40]);
+vpshldvd(zmm5|k3|T_z, zmm2, ptr [rax + 0x40]);
+vpshldvq(xmm5|k3|T_z, xmm2, ptr [rax + 0x40]);
+vpshldvq(ymm5|k3|T_z, ymm2, ptr [rax + 0x40]);
+vpshldvq(zmm5|k3|T_z, zmm2, ptr [rax + 0x40]);
+vpshrdw(xmm5|k3|T_z, xmm2, ptr [rax + 0x40], 5);
+vpshrdw(ymm5|k3|T_z, ymm2, ptr [rax + 0x40], 5);
+vpshrdw(zmm5|k3|T_z, zmm2, ptr [rax + 0x40], 5);
+vpshrdd(xmm5|k3|T_z, xmm2, ptr [rax + 0x40], 5);
+vpshrdd(ymm5|k3|T_z, ymm2, ptr [rax + 0x40], 5);
+vpshrdd(zmm5|k3|T_z, zmm2, ptr [rax + 0x40], 5);
+vpshrdq(xmm5|k3|T_z, xmm2, ptr [rax + 0x40], 5);
+vpshrdq(ymm5|k3|T_z, ymm2, ptr [rax + 0x40], 5);
+vpshrdq(zmm5|k3|T_z, zmm2, ptr [rax + 0x40], 5);
+vpshrdvw(xmm5|k3|T_z, xmm2, ptr [rax + 0x40]);
+vpshrdvw(ymm5|k3|T_z, ymm2, ptr [rax + 0x40]);
+vpshrdvw(zmm5|k3|T_z, zmm2, ptr [rax + 0x40]);
+vpshrdvd(xmm5|k3|T_z, xmm2, ptr [rax + 0x40]);
+vpshrdvd(ymm5|k3|T_z, ymm2, ptr [rax + 0x40]);
+vpshrdvd(zmm5|k3|T_z, zmm2, ptr [rax + 0x40]);
+vpshrdvq(xmm5|k3|T_z, xmm2, ptr [rax + 0x40]);
+vpshrdvq(ymm5|k3|T_z, ymm2, ptr [rax + 0x40]);
+vpshrdvq(zmm5|k3|T_z, zmm2, ptr [rax + 0x40]);
+vpshrdd(xmm5|k3|T_z, xmm2, ptr_b [rax + 0x40], 5);
+vpshrdd(ymm5|k3|T_z, ymm2, ptr_b [rax + 0x40], 5);
+vpshrdd(zmm5|k3|T_z, zmm2, ptr_b [rax + 0x40], 5);
+vpshrdq(xmm5|k3|T_z, xmm2, ptr_b [rax + 0x40], 5);
+vpshrdq(ymm5|k3|T_z, ymm2, ptr_b [rax + 0x40], 5);
+vpshrdq(zmm5|k3|T_z, zmm2, ptr_b [rax + 0x40], 5);
+vpshrdvd(xmm5|k3|T_z, xmm2, ptr_b [rax + 0x40]);
+vpshrdvd(ymm5|k3|T_z, ymm2, ptr_b [rax + 0x40]);
+vpshrdvd(zmm5|k3|T_z, zmm2, ptr_b [rax + 0x40]);
+vpshrdvq(xmm5|k3|T_z, xmm2, ptr_b [rax + 0x40]);
+vpshrdvq(ymm5|k3|T_z, ymm2, ptr_b [rax + 0x40]);
+vpshrdvq(zmm5|k3|T_z, zmm2, ptr_b [rax + 0x40]);
+vpopcntb(xmm5|k3|T_z, ptr [rax + 0x40]);
+vpopcntb(ymm5|k3|T_z, ptr [rax + 0x40]);
+vpopcntb(zmm5|k3|T_z, ptr [rax + 0x40]);
+vpopcntw(xmm5|k3|T_z, ptr [rax + 0x40]);
+vpopcntw(ymm5|k3|T_z, ptr [rax + 0x40]);
+vpopcntw(zmm5|k3|T_z, ptr [rax + 0x40]);
+vpopcntd(xmm5|k3|T_z, ptr [rax + 0x40]);
+vpopcntd(ymm5|k3|T_z, ptr [rax + 0x40]);
+vpopcntd(zmm5|k3|T_z, ptr [rax + 0x40]);
+vpopcntd(xmm5|k3|T_z, ptr_b [rax + 0x40]);
+vpopcntd(ymm5|k3|T_z, ptr_b [rax + 0x40]);
+vpopcntd(zmm5|k3|T_z, ptr_b [rax + 0x40]);
+vpopcntq(xmm5|k3|T_z, ptr [rax + 0x40]);
+vpopcntq(ymm5|k3|T_z, ptr [rax + 0x40]);
+vpopcntq(zmm5|k3|T_z, ptr [rax + 0x40]);
+vpopcntq(xmm5|k3|T_z, ptr_b [rax + 0x40]);
+vpopcntq(ymm5|k3|T_z, ptr_b [rax + 0x40]);
+vpopcntq(zmm5|k3|T_z, ptr_b [rax + 0x40]);
+vpdpbusd(xmm5|k3|T_z, xmm20, ptr [rax + 0x40]);
+vpdpbusd(ymm5|k3|T_z, ymm20, ptr [rax + 0x40]);
+vpdpbusd(zmm5|k3|T_z, zmm20, ptr [rax + 0x40]);
+vpdpbusd(xmm5|k3|T_z, xmm20, ptr_b [rax + 0x40]);
+vpdpbusd(ymm5|k3|T_z, ymm20, ptr_b [rax + 0x40]);
+vpdpbusd(zmm5|k3|T_z, zmm20, ptr_b [rax + 0x40]);
+vpdpbusds(xmm5|k3|T_z, xmm20, ptr [rax + 0x40]);
+vpdpbusds(ymm5|k3|T_z, ymm20, ptr [rax + 0x40]);
+vpdpbusds(zmm5|k3|T_z, zmm20, ptr [rax + 0x40]);
+vpdpbusds(xmm5|k3|T_z, xmm20, ptr_b [rax + 0x40]);
+vpdpbusds(ymm5|k3|T_z, ymm20, ptr_b [rax + 0x40]);
+vpdpbusds(zmm5|k3|T_z, zmm20, ptr_b [rax + 0x40]);
+vpdpwssd(xmm5|k3|T_z, xmm20, ptr [rax + 0x40]);
+vpdpwssd(ymm5|k3|T_z, ymm20, ptr [rax + 0x40]);
+vpdpwssd(zmm5|k3|T_z, zmm20, ptr [rax + 0x40]);
+vpdpwssd(xmm5|k3|T_z, xmm20, ptr_b [rax + 0x40]);
+vpdpwssd(ymm5|k3|T_z, ymm20, ptr_b [rax + 0x40]);
+vpdpwssd(zmm5|k3|T_z, zmm20, ptr_b [rax + 0x40]);
+vpdpwssds(xmm5|k3|T_z, xmm20, ptr [rax + 0x40]);
+vpdpwssds(ymm5|k3|T_z, ymm20, ptr [rax + 0x40]);
+vpdpwssds(zmm5|k3|T_z, zmm20, ptr [rax + 0x40]);
+vpdpwssds(xmm5|k3|T_z, xmm20, ptr_b [rax + 0x40]);
+vpdpwssds(ymm5|k3|T_z, ymm20, ptr_b [rax + 0x40]);
+vpdpwssds(zmm5|k3|T_z, zmm20, ptr_b [rax + 0x40]);
+vpexpandb(xmm5|k3|T_z, xmm30);
+vpexpandb(ymm5|k3|T_z, ymm30);
+vpexpandb(zmm5|k3|T_z, zmm30);
+vpexpandb(xmm5|k3|T_z, ptr [rax + 0x40]);
+vpexpandb(ymm5|k3|T_z, ptr [rax + 0x40]);
+vpexpandb(zmm5|k3|T_z, ptr [rax + 0x40]);
+vpexpandw(xmm5|k3|T_z, xmm30);
+vpexpandw(ymm5|k3|T_z, ymm30);
+vpexpandw(zmm5|k3|T_z, zmm30);
+vpexpandw(xmm5|k3|T_z, ptr [rax + 0x40]);
+vpexpandw(ymm5|k3|T_z, ptr [rax + 0x40]);
+vpexpandw(zmm5|k3|T_z, ptr [rax + 0x40]);
+vpshufbitqmb(k1|k2, xmm2, ptr [rax + 0x40]);
+vpshufbitqmb(k1|k2, ymm2, ptr [rax + 0x40]);
+vpshufbitqmb(k1|k2, zmm2, ptr [rax + 0x40]);
+gf2p8affineinvqb(xmm1, xmm2, 3);
+gf2p8affineinvqb(xmm1, ptr [rax + 0x40], 3);
+vgf2p8affineinvqb(xmm1, xmm5, xmm2, 3);
+vgf2p8affineinvqb(ymm1, ymm5, ymm2, 3);
+vgf2p8affineinvqb(xmm1, xmm5, ptr [rax + 0x40], 3);
+vgf2p8affineinvqb(ymm1, ymm5, ptr [rax + 0x40], 3);
+vgf2p8affineinvqb(xmm30, xmm31, xmm4, 5);
+vgf2p8affineinvqb(ymm30, ymm31, ymm4, 5);
+vgf2p8affineinvqb(zmm30, zmm31, zmm4, 5);
+vgf2p8affineinvqb(xmm30|k1|T_z, xmm5, ptr [rax + 0x40], 5);
+vgf2p8affineinvqb(ymm30|k1|T_z, ymm5, ptr [rax + 0x40], 5);
+vgf2p8affineinvqb(zmm30|k1|T_z, zmm5, ptr [rax + 0x40], 5);
+vgf2p8affineinvqb(xmm30|k1|T_z, xmm5, ptr_b [rax + 0x40], 5);
+vgf2p8affineinvqb(ymm30|k1|T_z, ymm5, ptr_b [rax + 0x40], 5);
+vgf2p8affineinvqb(zmm30|k1|T_z, zmm5, ptr_b [rax + 0x40], 5);
+gf2p8affineqb(xmm1, xmm2, 3);
+gf2p8affineqb(xmm1, ptr [rax + 0x40], 3);
+vgf2p8affineqb(xmm1, xmm5, xmm2, 3);
+vgf2p8affineqb(ymm1, ymm5, ymm2, 3);
+vgf2p8affineqb(xmm1, xmm5, ptr [rax + 0x40], 3);
+vgf2p8affineqb(ymm1, ymm5, ptr [rax + 0x40], 3);
+vgf2p8affineqb(xmm30, xmm31, xmm4, 5);
+vgf2p8affineqb(ymm30, ymm31, ymm4, 5);
+vgf2p8affineqb(zmm30, zmm31, zmm4, 5);
+vgf2p8affineqb(xmm30|k1|T_z, xmm5, ptr [rax + 0x40], 5);
+vgf2p8affineqb(ymm30|k1|T_z, ymm5, ptr [rax + 0x40], 5);
+vgf2p8affineqb(zmm30|k1|T_z, zmm5, ptr [rax + 0x40], 5);
+vgf2p8affineqb(xmm30|k1|T_z, xmm5, ptr_b [rax + 0x40], 5);
+vgf2p8affineqb(ymm30|k1|T_z, ymm5, ptr_b [rax + 0x40], 5);
+vgf2p8affineqb(zmm30|k1|T_z, zmm5, ptr_b [rax + 0x40], 5);
+gf2p8mulb(xmm1, xmm2);
+gf2p8mulb(xmm1, ptr [rax + 0x40]);
+vgf2p8mulb(xmm1, xmm5, xmm2);
+vgf2p8mulb(ymm1, ymm5, ymm2);
+vgf2p8mulb(xmm1, xmm5, ptr [rax + 0x40]);
+vgf2p8mulb(ymm1, ymm5, ptr [rax + 0x40]);
+vgf2p8mulb(xmm30, xmm31, xmm4);
+vgf2p8mulb(ymm30, ymm31, ymm4);
+vgf2p8mulb(zmm30, zmm31, zmm4);
+vgf2p8mulb(xmm30|k1|T_z, xmm5, ptr [rax + 0x40]);
+vgf2p8mulb(ymm30|k1|T_z, ymm5, ptr [rax + 0x40]);
+vgf2p8mulb(zmm30|k1|T_z, zmm5, ptr [rax + 0x40]);
+vcvtne2ps2bf16(xmm0 | k1, xmm1, ptr [rax + 64]);
+vcvtne2ps2bf16(ymm0 | k1 | T_z, ymm0, ptr [rax + 64]);
+vcvtne2ps2bf16(zmm0 | k1, zmm1, ptr [rax + 64]);
+vcvtneps2bf16(xmm0, xword [rax + 64]);
+vcvtneps2bf16(xmm0 | k1, yword [rax + 64]);
+vcvtneps2bf16(ymm0 | k1, zword [rax + 64]);
+vcvtneps2bf16(ymm0 | k1, ptr [rax + 64]);
+vdpbf16ps(xmm0 | k1, xmm1, ptr [rax + 64]);
+vdpbf16ps(ymm0 | k1, ymm1, ptr [rax + 64]);
+vdpbf16ps(zmm0 | k1, zmm1, ptr [rax + 64]);
+ldtilecfg(ptr[rax + rcx * 4 + 64]);
+sttilecfg(ptr[rsp + rax * 8 + 128]);
+tileloadd(tmm3, ptr[rdi + rdx * 2 + 8]);
+tileloaddt1(tmm4, ptr[r8 + r9 + 32]);
+tilerelease();
+tilestored(ptr[r10 + r11 * 2 + 32], tmm2);
+tilezero(tmm7);
+tdpbssd(tmm1, tmm2, tmm3);
+tdpbsud(tmm2, tmm3, tmm4);
+tdpbusd(tmm3, tmm4, tmm5);
+tdpbuud(tmm4, tmm5, tmm6);
+tdpbf16ps(tmm5, tmm6, tmm7);
+tileloadd(tmm1, ptr[r8+r8]);
+tileloadd(tmm1, ptr[rax+rcx*4]);
+tileloadd(tmm1, ptr[r8+r9*1+0x40]);
+vaddph(zmm0, zmm1, ptr[rax+64]);
+vaddph(ymm0, ymm1, ptr[rax+64]);
+vaddph(xmm0, xmm1, ptr[rax+64]);
+vaddph(zmm0, zmm1, ptr_b[rax+64]);
+vaddph(ymm0, ymm1, ptr_b[rax+64]);
+vaddph(xmm0, xmm1, ptr_b[rax+64]);
+vaddsh(xmm0, xmm15, ptr[rax+64]);
+vaddsh(xmm0|k5|T_z|T_rd_sae, xmm15, xmm3);
+vcmpph(k1, xm15, ptr[rax+64], 1);
+vcmpph(k2, ym15, ptr[rax+64], 2);
+vcmpph(k3, zm15, ptr[rax+64], 3);
+vcmpph(k1, xm15, ptr_b[rax+64], 1);
+vcmpph(k2, ym15, ptr_b[rax+64], 2);
+vcmpph(k3, zm15, ptr_b[rax+64], 3);
+vcmpsh(k1, xm15, ptr[rax+64], 1);
+vcmpsh(k3|k5, xmm1, xmm25|T_sae, 4);
+vcomish(xmm1, ptr[rax+64]);
+vcomish(xmm1|T_sae, xmm15);
+vucomish(xmm1, ptr [rax+0x40]);
+vucomish(xmm1|T_sae, xmm15);
+vfmaddsub213ph(xmm1, xmm2, ptr [rax+0x40]);
+vfmaddsub213ph(xmm1, xmm2, ptr_b [rax+0x40]);
+vfmaddsub213ph(xmm1|k3, xmm2, xmm5);
+vfmaddsub213ph(ymm1, ymm2, ptr [rax+0x40]);
+vfmaddsub213ph(ymm1, ymm2, ptr_b[rax+0x40]);
+vfmaddsub213ph(ymm1|k3, ymm2, ymm5);
+vfmaddsub213ph(zmm1, zmm2, ptr [rax+0x40]);
+vfmaddsub213ph(zmm1, zmm2, ptr_b [rax+0x40]);
+vfmaddsub213ph(zmm1|T_ru_sae, zmm2, zmm5);
+vfmsubadd132ph(xmm1, xmm2, ptr [rax+0x40]);
+vfmsubadd132ph(xmm1, xmm2, ptr_b [rax+0x40]);
+vfmsubadd132ph(ymm1, ymm2, ptr [rax+0x40]);
+vfmsubadd132ph(ymm1, ymm2, ptr_b [rax+0x40]);
+vfmsubadd132ph(zmm1, zmm2, ptr [rax+0x40]);
+vfmsubadd132ph(zmm1, zmm2, ptr_b [rax+0x40]);
+vfmsubadd132ph(zmm1|T_ru_sae, zmm2, zmm5);
+vfmadd132ph(xmm1, xmm2, ptr [rax+0x40]);
+vfmadd132ph(xmm1, xmm2, ptr_b [rax+0x40]);
+vfmadd132ph(ymm1, ymm2, ptr [rax+0x40]);
+vfmadd132ph(ymm1, ymm2, ptr_b [rax+0x40]);
+vfmadd132ph(zmm1, zmm2, ptr [rax+0x40]);
+vfmadd132ph(zmm1, zmm2, ptr_b [rax+0x40]);
+vfmadd132ph(zmm1|T_rd_sae, zmm2, zmm5);
+vfmsub231ph(xmm1, xmm2, ptr [rax+0x40]);
+vfmsub231ph(xmm1, xmm2, ptr_b [rax+0x40]);
+vfmsub231ph(ymm1, ymm2, ptr [rax+0x40]);
+vfmsub231ph(ymm1, ymm2, ptr_b [rax+0x40]);
+vfmsub231ph(zmm1, zmm2, ptr [rax+0x40]);
+vfmsub231ph(zmm1, zmm2, ptr_b [rax+0x40]);
+vfmsub231ph(zmm1|T_rd_sae, zmm2, zmm5);
+vfnmsub231ph(xmm1, xmm2, ptr [rax+0x40]);
+vfnmsub231ph(ymm1, ymm2, ptr_b [rax+0x40]);
+vfnmsub231ph(zmm1, zmm2, ptr_b [rax+0x40]);
+vfnmsub231ph(zmm1|T_rd_sae, zmm2, zmm5);
+vfmadd132sh(xmm1|k1|T_z|T_rd_sae, xmm2, xmm3);
+vfmadd132sh(xmm1, xmm2, ptr [rax+0x40]);
+vfnmadd132sh(xmm1|k1|T_z|T_rd_sae, xmm2, xmm3);
+vfnmadd132sh(xmm1, xmm2, ptr [rax+0x40]);
+vfmsub132sh(xmm1|k1|T_z|T_rd_sae, xmm2, xmm3);
+vfmsub132sh(xmm1, xmm2, ptr [rax+0x40]);
+vfnmsub132sh(xmm1|k1|T_z|T_rd_sae, xmm2, xmm3);
+vfnmsub132sh(xmm1, xmm2, ptr [rax+0x40]);
+vfcmaddcph(xmm1|k1|T_z, xmm2, ptr [rax+0x40]);
+vfcmaddcph(ymm1|k1|T_z, ymm2, ptr [rax+0x40]);
+vfcmaddcph(zmm1|k1, zmm2, ptr [rax+0x40]);
+vfcmaddcph(zmm1|k1|T_rd_sae, zmm2, zmm5);
+vfcmaddcph(xmm1|k1|T_z, xmm2, ptr_b [rax+0x40]);
+vfcmaddcph(ymm1|k1|T_z, ymm2, ptr_b [rax+0x40]);
+vfcmaddcph(zmm1|k1|T_z, zmm2, ptr_b [rax+0x40]);
+vfmaddcph(xm1, xm2, ptr[rax+0x40]);
+vfmaddcph(ym1|k1|T_z, ym2, ptr_b[rax+0x40]);
+vfmaddcph(zm1, zm2, ptr_b[rax+0x40]);
+vfcmulcph(xmm1, xmm2, ptr [rax+0x40]);
+vfcmulcph(ymm1|k1|T_z, ymm2, ptr_b [rax+0x40]);
+vfcmulcph(zmm1, zmm2, ptr_b [rax+0x40]);
+vfmulcph(xmm1, xmm2, ptr [rax+0x40]);
+vfmulcph(ymm1|k1|T_z, ymm2, ptr_b [rax+0x40]);
+vfmulcph(zmm1, zmm2, ptr_b [rax+0x40]);
+vrcpph(xmm1, ptr [rax+0x40]);
+vrcpph(xmm1, ptr_b [rax+0x40]);
+vrcpph(ymm1, ptr [rax+0x40]);
+vrcpph(ymm1, ptr_b [rax+0x40]);
+vrcpph(zmm1, ptr [rax+0x40]);
+vrcpph(zmm1, ptr_b [rax+0x40]);
+vrcpsh(xmm1, xmm3, ptr [rax+0x40]);
+vrsqrtph(xmm1, ptr [rax+0x40]);
+vrsqrtph(xmm1, ptr_b [rax+0x40]);
+vrsqrtph(ymm2, ptr [rax+0x40]);
+vrsqrtph(ymm2, ptr_b [rax+0x40]);
+vrsqrtph(zmm2, ptr [rax+0x40]);
+vrsqrtph(zmm2, ptr_b [rax+0x40]);
+vrsqrtsh(xmm1|k5|T_z, xmm7, ptr [rax+0x40]);
+vsqrtph(xmm1|k4|T_z, ptr [rax+0x40]);
+vsqrtph(xmm1|k4|T_z, ptr_b [rax+0x40]);
+vsqrtph(ymm1|k4|T_z, ptr_b [rax+0x40]);
+vsqrtph(zmm1|k4|T_z, ptr [rax+0x40]);
+vsqrtph(zmm1|k4|T_z, ptr_b [rax+0x40]);
+vsqrtsh(xmm1|k4|T_z, xmm5, ptr [rax+0x40]);
+vsqrtsh(xmm1|k4|T_z|T_rd_sae, xmm5, xmm7);
+vscalefph(xmm1, xmm5, ptr [rax+0x40]);
+vscalefph(xmm1, xmm5, ptr_b [rax+0x40]);
+vscalefph(ymm1, ymm5, ptr [rax+0x40]);
+vscalefph(ymm1, ymm5, ptr_b [rax+0x40]);
+vscalefph(zmm1, zmm5, ptr [rax+0x40]);
+vscalefph(zmm1, zmm5, ptr_b [rax+0x40]);
+vscalefph(zmm1|k1|T_z|T_rd_sae, zmm5, zmm7);
+vscalefsh(xmm1, xmm5, ptr [rax+0x40]);
+vscalefsh(xmm1|k1|T_z|T_rd_sae, xmm5, xmm7);
+vreduceph(xmm1, ptr [rax+0x40], 0x1);
+vreduceph(xmm1, ptr_b [rax+0x40], 0x2);
+vreduceph(ymm1, ptr [rax+0x40], 0x3);
+vreduceph(ymm1, ptr_b [rax+0x40], 0x4);
+vreduceph(zmm1, ptr [rax+0x40], 0x5);
+vreduceph(zmm1, ptr_b [rax+0x40], 0x6);
+vreduceph(zmm1|k1|T_z|T_sae, zmm5, 0x7);
+vreducesh(xmm1, xmm3, ptr [rax+0x40], 0x1);
+vreducesh(xmm1|k1|T_z|T_sae, xmm5, xmm4, 0x2);
+vrndscaleph(xmm1, ptr [rax+0x40], 0x1);
+vrndscaleph(xmm1, ptr_b [rax+0x40], 0x2);
+vrndscaleph(ymm1, ptr [rax+0x40], 0x3);
+vrndscaleph(ymm1, ptr_b [rax+0x40], 0x4);
+vrndscaleph(zmm1, ptr [rax+0x40], 0x5);
+vrndscaleph(zmm1, ptr_b [rax+0x40], 0x6);
+vrndscaleph(zmm1|k1|T_z|T_sae, zmm5, 0x7);
+vrndscalesh(xmm1, xmm3, ptr [rax+0x40], 0x1);
+vrndscalesh(xmm1|k1|T_z|T_sae, xmm5, xmm4, 0x2);
+vfpclassph(k1, xword [rax+0x40], 0x1);
+vfpclassph(k1, xword_b[rax+0x40], 0x2);
+vfpclassph(k1, yword [rax+0x40], 0x3);
+vfpclassph(k1, yword_b[rax+0x40], 0x4);
+vfpclassph(k1, zword [rax+0x40], 0x5);
+vfpclassph(k1, zword_b[rax+0x40], 0x6);
+vfpclasssh(k1|k2, xmm3, 0x5);
+vfpclasssh(k1|k2, ptr [rax+0x40], 0x5);
+vgetexpph(xmm1, ptr [rax+0x40]);
+vgetexpph(ymm1, ptr_b [rax+0x40]);
+vgetexpph(zmm1, ptr [rax+0x40]);
+vgetexpph(zmm1|k1|T_z|T_sae, zmm5);
+vgetexpsh(xmm1, xmm5, ptr [rax+0x40]);
+vgetexpsh(xmm1|k1|T_z|T_sae, xmm3, xmm5);
+vgetmantph(xmm1, ptr [rax+0x40], 0x1);
+vgetmantph(ymm1, ptr_b [rax+0x40], 0x2);
+vgetmantph(zmm1, ptr [rax+0x40], 0x3);
+vgetmantph(zmm1|k1|T_z|T_sae, zmm5, 0x4);
+vgetmantsh(xmm1, xmm5, ptr [rax+0x40], 0x5);
+vgetmantsh(xmm1|k1|T_z|T_sae, xmm3, xmm5, 0x6);
+vmovsh(xmm1|k1|T_z, ptr [rax+0x40]);
+vmovsh(ptr [rax+0x40]|k1, xmm1);
+vmovsh(xmm1|k2|T_z, xmm3, xmm5);
+vmovw(xmm1, r13d);
+vmovw(xmm3, ptr [rax+0x40]);
+vmovw(r9d, xmm1);
+vmovw(ptr [rax+0x40], xmm7);
+vcvtsd2sh(xmm1|k1|T_z|T_rd_sae, xmm2, xmm3);
+vcvtsd2sh(xmm1, xmm2, ptr [rax+0x40]);
+vcvtsh2sd(xmm1|k1|T_z|T_sae, xmm2, xmm3);
+vcvtsh2sd(xmm1, xmm2, ptr [rax+0x40]);
+vcvtsh2ss(xmm1|k1|T_z|T_sae, xmm2, xmm3);
+vcvtsh2ss(xmm1, xmm2, ptr [rax+0x40]);
+vcvtss2sh(xmm1|k1|T_z|T_rd_sae, xmm2, xmm3);
+vcvtss2sh(xmm1, xmm2, ptr [rax+0x40]);
+vcvtsh2si(edx|T_rd_sae, xmm1);
+vcvtsh2si(edx, ptr [rax+0x40]);
+vcvtsh2si(rdx|T_rd_sae, xmm1);
+vcvtsh2si(r8, ptr [rax+0x40]);
+vcvtph2dq(xmm1, xmm5);
+vcvtph2dq(xmm1, ptr [rax+0x40]);
+vcvtph2dq(xmm1, ptr_b [rax+0x40]);
+vcvtph2dq(ymm1|k2|T_z, xmm5);
+vcvtph2dq(ymm1, ptr [rax+0x40]);
+vcvtph2dq(ymm1, ptr_b [rax+0x40]);
+vcvtph2dq(zmm1|k5|T_z|T_rd_sae, ymm3);
+vcvtph2dq(zmm1|k5|T_z, ptr [rax+0x40]);
+vcvtph2dq(zmm1|k5|T_z, ptr_b [rax+0x40]);
+vcvtph2psx(xmm1, xmm5);
+vcvtph2psx(xmm1, ptr [rax+0x40]);
+vcvtph2psx(xmm1, ptr_b [rax+0x40]);
+vcvtph2psx(ymm1|k2|T_z, xmm5);
+vcvtph2psx(ymm1, ptr [rax+0x40]);
+vcvtph2psx(ymm1, ptr_b [rax+0x40]);
+vcvtph2psx(zmm1|k5|T_z|T_sae, ymm3);
+vcvtph2psx(zmm1|k5|T_z, ptr [rax+0x40]);
+vcvtph2psx(zmm1|k5|T_z, ptr_b [rax+0x40]);
+vcvtph2udq(xmm1, xmm5);
+vcvtph2udq(xmm1, ptr [rax+0x40]);
+vcvtph2udq(xmm1, ptr_b [rax+0x40]);
+vcvtph2udq(ymm1|k2|T_z, xmm5);
+vcvtph2udq(ymm1, ptr [rax+0x40]);
+vcvtph2udq(ymm1, ptr_b [rax+0x40]);
+vcvtph2udq(zmm1|k5|T_z|T_rd_sae, ymm3);
+vcvtph2udq(zmm1|k5|T_z, ptr [rax+0x40]);
+vcvtph2udq(zmm1|k5|T_z, ptr_b [rax+0x40]);
+vcvttph2dq(xmm1, xmm5);
+vcvttph2dq(xmm1, ptr [rax+0x40]);
+vcvttph2dq(xmm1, ptr_b [rax+0x40]);
+vcvttph2dq(ymm1|k2|T_z, xmm5);
+vcvttph2dq(ymm1, ptr [rax+0x40]);
+vcvttph2dq(ymm1, ptr_b [rax+0x40]);
+vcvttph2dq(zmm1|k5|T_z|T_sae, ymm3);
+vcvttph2dq(zmm1|k5|T_z, ptr [rax+0x40]);
+vcvttph2dq(zmm1|k5|T_z, ptr_b [rax+0x40]);
+vcvttph2udq(xmm1, xmm5);
+vcvttph2udq(xmm1, ptr [rax+0x40]);
+vcvttph2udq(xmm1, ptr_b [rax+0x40]);
+vcvttph2udq(ymm1|k2|T_z, xmm5);
+vcvttph2udq(ymm1, ptr [rax+0x40]);
+vcvttph2udq(ymm1, ptr_b [rax+0x40]);
+vcvttph2udq(zmm1|k5|T_z|T_sae, ymm3);
+vcvttph2udq(zmm1|k5|T_z, ptr [rax+0x40]);
+vcvttph2udq(zmm1|k5|T_z, ptr_b [rax+0x40]);
+vcvtph2pd(xmm1, xmm5);
+vcvtph2pd(xmm1, ptr [rax+0x40]);
+vcvtph2pd(xmm1, ptr_b [rax+0x40]);
+vcvtph2pd(ymm1|k2|T_z, xmm5);
+vcvtph2pd(ymm1, ptr [rax+0x40]);
+vcvtph2pd(ymm1, ptr_b [rax+0x40]);
+vcvtph2pd(zmm1|k5|T_z|T_sae, xmm3);
+vcvtph2pd(zmm1|k5|T_z, ptr [rax+0x40]);
+vcvtph2pd(zmm1|k5|T_z, ptr_b [rax+0x40]);
+vcvtph2qq(xmm1, xmm5);
+vcvtph2qq(xmm1, ptr [rax+0x40]);
+vcvtph2qq(xmm1, ptr_b [rax+0x40]);
+vcvtph2qq(ymm1|k2|T_z, xmm5);
+vcvtph2qq(ymm1, ptr [rax+0x40]);
+vcvtph2qq(ymm1, ptr_b [rax+0x40]);
+vcvtph2qq(zmm1|k5|T_z|T_rd_sae, xmm3);
+vcvtph2qq(zmm1|k5|T_z, ptr [rax+0x40]);
+vcvtph2qq(zmm1|k5|T_z, ptr_b [rax+0x40]);
+vcvtph2uqq(xmm1, xmm5);
+vcvtph2uqq(xmm1, ptr [rax+0x40]);
+vcvtph2uqq(xmm1, ptr_b [rax+0x40]);
+vcvtph2uqq(ymm1|k2|T_z, xmm5);
+vcvtph2uqq(ymm1, ptr [rax+0x40]);
+vcvtph2uqq(ymm1, ptr_b [rax+0x40]);
+vcvtph2uqq(zmm1|k5|T_z|T_rd_sae, xmm3);
+vcvtph2uqq(zmm1|k5|T_z, ptr [rax+0x40]);
+vcvtph2uqq(zmm1|k5|T_z, ptr_b [rax+0x40]);
+vcvttph2uqq(xmm1, xmm5);
+vcvttph2uqq(xmm1, ptr [rax+0x40]);
+vcvttph2uqq(xmm1, ptr_b [rax+0x40]);
+vcvttph2uqq(ymm1|k2|T_z, xmm5);
+vcvttph2uqq(ymm1, ptr [rax+0x40]);
+vcvttph2uqq(ymm1, ptr_b [rax+0x40]);
+vcvttph2uqq(zmm1|k5|T_z|T_sae, xmm3);
+vcvttph2uqq(zmm1|k5|T_z, ptr [rax+0x40]);
+vcvttph2uqq(zmm1|k5|T_z, ptr_b [rax+0x40]);
+vcvtdq2ph(xmm1, xmm5);
+vcvtdq2ph(xmm1, xword [rax+0x40]);
+vcvtdq2ph(xmm1, xword_b [rax+0x40]);
+vcvtdq2ph(xmm1, yword [rax+0x40]);
+vcvtdq2ph(xmm1, yword_b [rax+0x40]);
+vcvtdq2ph(ymm1|k2|T_z|T_rd_sae, zmm5);
+vcvtdq2ph(ymm1, ptr [rax+0x40]);
+vcvtdq2ph(ymm1, ptr_b [rax+0x40]);
+vcvtps2phx(xmm1, xmm5);
+vcvtps2phx(xmm1, xword [rax+0x40]);
+vcvtps2phx(xmm1, xword_b [rax+0x40]);
+vcvtps2phx(xmm1, yword [rax+0x40]);
+vcvtps2phx(xmm1, yword_b [rax+0x40]);
+vcvtps2phx(ymm1|k2|T_z|T_rd_sae, zmm5);
+vcvtps2phx(ymm1, ptr [rax+0x40]);
+vcvtps2phx(ymm1, ptr_b [rax+0x40]);
+vcvtudq2ph(xmm1, xmm5);
+vcvtudq2ph(xmm1, xword [rax+0x40]);
+vcvtudq2ph(xmm1, xword_b [rax+0x40]);
+vcvtudq2ph(xmm1, yword [rax+0x40]);
+vcvtudq2ph(xmm1, yword_b [rax+0x40]);
+vcvtudq2ph(ymm1|k2|T_z|T_rd_sae, zmm5);
+vcvtudq2ph(ymm1, ptr [rax+0x40]);
+vcvtudq2ph(ymm1, ptr_b [rax+0x40]);
+vcvtpd2ph(xmm1, xmm5);
+vcvtpd2ph(xmm1, ymm5);
+vcvtpd2ph(xmm1|k2|T_z|T_rd_sae, zmm5);
+vcvtpd2ph(xmm1, xword [rax+0x40]);
+vcvtpd2ph(xmm1, xword_b [rax+0x40]);
+vcvtpd2ph(xmm1, yword [rax+0x40]);
+vcvtpd2ph(xmm1, yword_b [rax+0x40]);
+vcvtpd2ph(xmm1, zword [rax+0x40]);
+vcvtpd2ph(xmm1, zword_b [rax+0x40]);
+vcvtqq2ph(xmm1, xmm5);
+vcvtqq2ph(xmm1, ymm5);
+vcvtqq2ph(xmm1|k2|T_z|T_rd_sae, zmm5);
+vcvtqq2ph(xmm1, xword [rax+0x40]);
+vcvtqq2ph(xmm1, xword_b [rax+0x40]);
+vcvtqq2ph(xmm1, yword [rax+0x40]);
+vcvtqq2ph(xmm1, yword_b [rax+0x40]);
+vcvtqq2ph(xmm1, zword [rax+0x40]);
+vcvtqq2ph(xmm1, zword_b [rax+0x40]);
+vcvtuqq2ph(xmm1, xmm5);
+vcvtuqq2ph(xmm1, ymm5);
+vcvtuqq2ph(xmm1|k2|T_z|T_rd_sae, zmm5);
+vcvtuqq2ph(xmm1, xword [rax+0x40]);
+vcvtuqq2ph(xmm1, xword_b [rax+0x40]);
+vcvtuqq2ph(xmm1, yword [rax+0x40]);
+vcvtuqq2ph(xmm1, yword_b [rax+0x40]);
+vcvtuqq2ph(xmm1, zword [rax+0x40]);
+vcvtuqq2ph(xmm1, zword_b [rax+0x40]);
+vcvtph2uw(xmm1, xmm5);
+vcvtph2uw(xmm1, ptr [rax+0x40]);
+vcvtph2uw(xmm1, ptr_b [rax+0x40]);
+vcvtph2uw(ymm1, ptr [rax+0x40]);
+vcvtph2uw(ymm1, ptr_b [rax+0x40]);
+vcvtph2uw(zmm1|k2|T_z|T_rd_sae, zmm5);
+vcvtph2uw(zmm1, ptr [rax+0x40]);
+vcvtph2uw(zmm1, ptr_b [rax+0x40]);
+vcvtph2w(xmm1, xmm5);
+vcvtph2w(xmm1, ptr [rax+0x40]);
+vcvtph2w(xmm1, ptr_b [rax+0x40]);
+vcvtph2w(ymm1, ptr [rax+0x40]);
+vcvtph2w(ymm1, ptr_b [rax+0x40]);
+vcvtph2w(zmm1|k2|T_z|T_rd_sae, zmm5);
+vcvtph2w(zmm1, ptr [rax+0x40]);
+vcvtph2w(zmm1, ptr_b [rax+0x40]);
+vcvttph2uw(xmm1, xmm5);
+vcvttph2uw(xmm1, ptr [rax+0x40]);
+vcvttph2uw(xmm1, ptr_b [rax+0x40]);
+vcvttph2uw(ymm1, ptr [rax+0x40]);
+vcvttph2uw(ymm1, ptr_b [rax+0x40]);
+vcvttph2uw(zmm1|k2|T_z|T_sae, zmm5);
+vcvttph2uw(zmm1, ptr [rax+0x40]);
+vcvttph2uw(zmm1, ptr_b [rax+0x40]);
+vcvttph2w(xmm1, xmm5);
+vcvttph2w(xmm1, ptr [rax+0x40]);
+vcvttph2w(xmm1, ptr_b [rax+0x40]);
+vcvttph2w(ymm1, ptr [rax+0x40]);
+vcvttph2w(ymm1, ptr_b [rax+0x40]);
+vcvttph2w(zmm1|k2|T_z|T_sae, zmm5);
+vcvttph2w(zmm1, ptr [rax+0x40]);
+vcvttph2w(zmm1, ptr_b [rax+0x40]);
+vcvtuw2ph(xmm1, xmm5);
+vcvtuw2ph(xmm1, ptr [rax+0x40]);
+vcvtuw2ph(xmm1, ptr_b [rax+0x40]);
+vcvtuw2ph(ymm1, ptr [rax+0x40]);
+vcvtuw2ph(ymm1, ptr_b [rax+0x40]);
+vcvtuw2ph(zmm1|k2|T_z|T_rd_sae, zmm5);
+vcvtuw2ph(zmm1, ptr [rax+0x40]);
+vcvtuw2ph(zmm1, ptr_b [rax+0x40]);
+vcvtw2ph(xmm1, xmm5);
+vcvtw2ph(xmm1, ptr [rax+0x40]);
+vcvtw2ph(xmm1, ptr_b [rax+0x40]);
+vcvtw2ph(ymm1, ptr [rax+0x40]);
+vcvtw2ph(ymm1, ptr_b [rax+0x40]);
+vcvtw2ph(zmm1|k2|T_z|T_rd_sae, zmm5);
+vcvtw2ph(zmm1, ptr [rax+0x40]);
+vcvtw2ph(zmm1, ptr_b [rax+0x40]);
+vcvtps2ph(xmm1, xmm2, 0x1);
+vcvtps2ph(ptr [rax+0x40], xmm2, 0x2);
+vcvtps2ph(xmm1, ymm2, 0x3);
+vcvtps2ph(ptr [rax+0x40], ymm2, 0x4);
+vcvtps2ph(xmm1|k1|T_z, xmm2, 0x5);
+vcvtps2ph(ptr [rax+0x40]|k1, xmm3, 0x6);
+vcvtps2ph(xmm1|k2, ymm4, 0x7);
+vcvtps2ph(ptr [rax+0x40]|k2, ymm5, 0x8);
+vcvtps2ph(ymm1|k2|T_sae, zmm5, 0x9);
+vcvtps2ph(ptr [rax+0x40]|k5, zmm4, 0xa);
+vcvtsh2usi(ecx|T_rd_sae, xmm1);
+vcvtsh2usi(eax, ptr [rax+0x40]);
+vcvtsh2usi(r9|T_rd_sae, xmm1);
+vcvtsh2usi(r13, ptr [rax+0x40]);
+vcvttsh2si(ecx|T_sae, xmm1);
+vcvttsh2si(eax, ptr [rax+0x40]);
+vcvttsh2si(r9|T_sae, xmm1);
+vcvttsh2si(r13, ptr [rax+0x40]);
+vcvttsh2usi(ecx|T_sae, xmm1);
+vcvttsh2usi(eax, ptr [rax+0x40]);
+vcvttsh2usi(r9|T_sae, xmm1);
+vcvttsh2usi(r13, ptr [rax+0x40]);
+vcvttph2qq(xmm1, xmm5);
+vcvttph2qq(xmm1, ptr [rax+0x40]);
+vcvttph2qq(xmm1, ptr_b [rax+0x40]);
+vcvttph2qq(ymm1|k2|T_z, xmm5);
+vcvttph2qq(ymm1, ptr [rax+0x40]);
+vcvttph2qq(ymm1, ptr_b [rax+0x40]);
+vcvttph2qq(zmm1|k5|T_z|T_sae, xmm3);
+vcvttph2qq(zmm1|k5|T_z, ptr [rax+0x40]);
+vcvttph2qq(zmm1|k5|T_z, ptr_b [rax+0x40]);
+vcvtsi2sh(xmm1|T_rd_sae, xmm2, eax);
+vcvtsi2sh(xmm1, xmm2, dword [rax+0x40]);
+vcvtsi2sh(xmm1|T_rd_sae, xmm2, r9);
+vcvtsi2sh(xmm1, xmm2, qword [rax+0x40]);
+vcvtusi2sh(xmm1|T_rd_sae, xmm2, eax);
+vcvtusi2sh(xmm1, xmm2, dword [rax+0x40]);
+vcvtusi2sh(xmm1|T_rd_sae, xmm2, r9);
+vcvtusi2sh(xmm1, xmm2, qword [rax+0x40]);
+aadd(ptr[rax], ecx);
+aadd(ptr[eax], ecx);
+aadd(ptr[rax], r10);
+aand(ptr[rax], ecx);
+aand(ptr[eax], ecx);
+aand(ptr[rax], r10);
+aor(ptr[rax], ecx);
+aor(ptr[eax], ecx);
+aor(ptr[rax], r10);
+axor(ptr[rax], ecx);
+axor(ptr[eax], ecx);
+axor(ptr[rax], r10);
+cmpbexadd(ptr[rax+r10*4], rcx, rdx);
+cmpbxadd(ptr[rax+r10*4], rcx, rdx);
+cmplexadd(ptr[rax+r10*4], rcx, rdx);
+cmplxadd(ptr[rax+r10*4], rcx, rdx);
+cmpnbexadd(ptr[rax+r10*4], rcx, rdx);
+cmpnbxadd(ptr[rax+r10*4], rcx, rdx);
+cmpnlexadd(ptr[rax+r10*4], rcx, rdx);
+cmpnlxadd(ptr[rax+r10*4], rcx, rdx);
+cmpnoxadd(ptr[rax+r10*4], rcx, rdx);
+cmpnpxadd(ptr[rax+r10*4], rcx, rdx);
+cmpnsxadd(ptr[rax+r10*4], rcx, rdx);
+cmpnzxadd(ptr[rax+r10*4], rcx, rdx);
+cmpoxadd(ptr[rax+r10*4], rcx, rdx);
+cmppxadd(ptr[rax+r10*4], rcx, rdx);
+cmpsxadd(ptr[rax+r10*4], rcx, rdx);
+cmpzxadd(ptr[rax+r10*4], rcx, rdx);
+vsha512msg1(ymm3, xmm5);
+vsha512msg2(ymm9, ymm10);
+vsha512rnds2(ymm1, ymm3, xmm2);
+vsm3msg1(xmm1, xmm2, xmm3);
+vsm3msg1(xmm1, xmm2, ptr [rax]);
+vsm3msg2(xmm5, xmm7, xmm3);
+vsm3msg2(xmm5, xmm6, ptr [rax]);
+vsm3rnds2(xmm5, xmm7, xmm3, 0x12);
+vsm3rnds2(xmm5, xmm7, ptr [rcx], 0x34);
+vsm4key4(xmm1, xmm2, xmm3);
+vsm4key4(xmm1, xmm2, ptr [rdx]);
+vsm4rnds4(xmm1, xmm2, xmm3);
+vsm4rnds4(xmm5, xmm6, ptr [rcx+rax*4]);
+vpdpbssd(xmm1, xmm2, xmm3);
+vpdpbssd(ymm1, ymm2, ptr [rax]);
+vpdpbssds(xmm1, xmm2, xmm3);
+vpdpbssds(ymm1, ymm2, ptr [rax]);
+vpdpbsud(xmm1, xmm2, xmm3);
+vpdpbsud(ymm1, ymm2, ptr [rax]);
+vpdpbsuds(xmm1, xmm2, xmm3);
+vpdpbsuds(ymm1, ymm2, ptr [rax]);
+vpdpbuud(xmm1, xmm2, xmm3);
+vpdpbuud(ymm1, ymm2, ptr [rax]);
+vpdpbuuds(xmm1, xmm2, xmm3);
+vpdpbuuds(ymm1, ymm2, ptr [rax]);
+vpdpwsud(xmm1, xmm2, xmm3);
+vpdpwsud(ymm1, ymm2, ptr [rax]);
+vpdpwsuds(xmm1, xmm2, xmm3);
+vpdpwsuds(ymm1, ymm2, ptr [rax]);
+vpdpwusd(xmm1, xmm2, xmm3);
+vpdpwusd(ymm1, ymm2, ptr [rax]);
+vpdpwusds(xmm1, xmm2, xmm3);
+vpdpwusds(ymm1, ymm2, ptr [rax]);
+vpdpwuud(xmm1, xmm2, xmm3);
+vpdpwuud(ymm1, ymm2, ptr [rax]);
+vpdpwuuds(xmm1, xmm2, xmm3);
+vpdpwuuds(ymm1, ymm2, ptr [rax]);