diff options
-rw-r--r-- | doc/usage.md | 7 | ||||
-rw-r--r-- | gen/gen_avx512.cpp | 7 | ||||
-rw-r--r-- | test/Makefile | 2 | ||||
-rw-r--r-- | test/avx10/misc.txt | 7 | ||||
-rw-r--r-- | test/avx10/old.txt | 4 | ||||
-rw-r--r-- | test/test_by_xed.cpp | 2 | ||||
-rw-r--r-- | xbyak/xbyak.h | 47 | ||||
-rw-r--r-- | xbyak/xbyak_mnemonic.h | 3 |
8 files changed, 44 insertions, 35 deletions
diff --git a/doc/usage.md b/doc/usage.md index 9636613..ef38d63 100644 --- a/doc/usage.md +++ b/doc/usage.md @@ -124,7 +124,7 @@ vpdpbusd(xm0, xm1, xm2); // VEX vmpsadbw(xm1, xm3, xm15, 3); // default encoding: VEX (AVX-VNNI) vmpsadbw(xm1, xm3, xm15, 3, VexEncoding); // same as the above vmpsadbw(xm1, xm3, xm15, 3, EvexEncoding); // EVEX (AVX10.2) -setDefaultEncoding(VexEncoding, AVX10p2Encoding); // use 2nd argument. +setDefaultEncoding(VexEncoding, AVX10v2Encoding); // use 2nd argument. vmpsadbw(xm1, xm3, xm15, 3); // EVEX (AVX10.2) ``` @@ -133,9 +133,10 @@ Control the default encoding of mnemonics with `Xbyak::PreferredEncoding` param. param|vnniEnc|avx10Enc -|-|- -VexEncoding|AVX-VNNI|AVX-VNNI-INT8 +VexEncoding|AVX-VNNI|- EvexEncoding|AVX512-VNNI|- -AVX10p2Encoding|-|AVX10.2 +PreAVX10v2Encoding|-|AVX-VNNI-INT8, AVX512-FP16 +AVX10v2Encoding|-|AVX10.2 default|EvexEncoding|VexEncoding mnemonic|vpdpbusd, vpdpbusds, vpdpwssd, vpdpwssds|vmpsadbw, vpdpbssd, vpdpbssds, vpdpbsud, vpdpbsuds, vpdpbuud, vpdpbuuds, vpdpwsud vpdpwsuds vpdpwusd vpdpwusds vpdpwuud, vpdpwuuds, vmovd, vmovw diff --git a/gen/gen_avx512.cpp b/gen/gen_avx512.cpp index 07e68b4..e4d319e 100644 --- a/gen/gen_avx512.cpp +++ b/gen/gen_avx512.cpp @@ -264,7 +264,6 @@ void putM_X() { 0x7F, "vmovdqu32", T_F3 | T_0F | T_MUST_EVEX | T_YMM | T_EW0 | T_ER_X | T_ER_Y | T_ER_Z | T_M_K }, { 0x7F, "vmovdqu64", T_F3 | T_0F | T_MUST_EVEX | T_YMM | T_EW1 | T_ER_X | T_ER_Y | T_ER_Z | T_M_K }, { 0x11, "vmovsh", T_F3 | T_MAP5 | T_MUST_EVEX | T_EW0 | T_N2 | T_M_K }, - { 0x7E, "vmovw", T_66 | T_MAP5 | T_MUST_EVEX | T_N2 }, }; for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) { const Tbl *p = &tbl[i]; @@ -1079,12 +1078,6 @@ void putFP16_2() printf("void vmovsh(const Xmm& x, const Address& addr) { opAVX_X_X_XM(x, xm0, addr, %s, 0x10); }\n", s.c_str()); printf("void vmovsh(const Xmm& x1, const Xmm& x2, const Xmm& x3) { opAVX_X_X_XM(x1, x2, x3, %s, 0x10); }\n", s.c_str()); } - { - uint64_t type = T_66 | T_MAP5 | T_MUST_EVEX | T_N2; - std::string s = type2String(type); - printf("void vmovw(const Xmm& x, const Operand& op) { if (!op.isREG(32|64) && !op.isMEM()) XBYAK_THROW(ERR_BAD_COMBINATION) opAVX_X_X_XM(x, xm0, op, %s, 0x6E); }\n", s.c_str()); - printf("void vmovw(const Reg32e& r, const Xmm& x) { opAVX_X_X_XM(x, xm0, r, %s, 0x7E); }\n", s.c_str()); - } } void putFP16() diff --git a/test/Makefile b/test/Makefile index 8313a6c..cf5c716 100644 --- a/test/Makefile +++ b/test/Makefile @@ -60,7 +60,7 @@ apx: apx.cpp $(XBYAK_INC) avx10_test: avx10_test.cpp $(XBYAK_INC) $(CXX) $(CFLAGS) avx10_test.cpp -o $@ -DXBYAK64 -TEST_FILES=old.txt new-ymm.txt bf16.txt comp.txt convert.txt minmax.txt saturation.txt +TEST_FILES=old.txt new-ymm.txt bf16.txt comp.txt misc.txt convert.txt minmax.txt saturation.txt xed_test: @set -e; \ for target in $(addprefix avx10/, $(TEST_FILES)); do \ diff --git a/test/avx10/misc.txt b/test/avx10/misc.txt index 7c969bf..6f5c156 100644 --- a/test/avx10/misc.txt +++ b/test/avx10/misc.txt @@ -1,3 +1,4 @@ +// AVX10 integer and FP16 VNNI, media and zero-extending vdpphps(xm1, xm2, xm3); vdpphps(xm1, xm2, ptr[rax+128]); vdpphps(xm1, xm2, ptr_b[rax+128]); @@ -168,5 +169,11 @@ vpdpwuuds(zm1, zm2, ptr_b[rax+128]); // vmovd(xm10, xm20); +vmovd(xm1, xm2); vmovd(xm10, ptr[rax+128]); vmovd(ptr[rax+128], xm30); +// +vmovw(xm1, xm20); +vmovw(xm1, xm2); +vmovw(xm3, ptr [rax+0x40]); +vmovw(ptr [rax+0x40], xm7); diff --git a/test/avx10/old.txt b/test/avx10/old.txt index 9e4f097..f5a143c 100644 --- a/test/avx10/old.txt +++ b/test/avx10/old.txt @@ -355,10 +355,6 @@ vgetmantsh(xmm1|k1|T_z|T_sae, xmm3, xmm5, 0x6); vmovsh(xmm1|k1|T_z, ptr [rax+0x40]); vmovsh(ptr [rax+0x40]|k1, xmm1); vmovsh(xmm1|k2|T_z, xmm3, xmm5); -vmovw(xmm1, r13d); -vmovw(xmm3, ptr [rax+0x40]); -vmovw(r9d, xmm1); -vmovw(ptr [rax+0x40], xmm7); vcvtsd2sh(xmm1|k1|T_z|T_rd_sae, xmm2, xmm3); vcvtsd2sh(xmm1, xmm2, ptr [rax+0x40]); vcvtsh2sd(xmm1|k1|T_z|T_sae, xmm2, xmm3); diff --git a/test/test_by_xed.cpp b/test/test_by_xed.cpp index 71b5137..af39296 100644 --- a/test/test_by_xed.cpp +++ b/test/test_by_xed.cpp @@ -7,7 +7,7 @@ struct Code : Xbyak::CodeGenerator { Code() : Xbyak::CodeGenerator(4096*8) { - setDefaultEncoding(EvexEncoding, AVX10p2Encoding); + setDefaultEncoding(EvexEncoding, AVX10v2Encoding); #include "tmp.cpp" } }; diff --git a/xbyak/xbyak.h b/xbyak/xbyak.h index a3d1fca..5367d83 100644 --- a/xbyak/xbyak.h +++ b/xbyak/xbyak.h @@ -1674,8 +1674,8 @@ typedef enum { DefaultEncoding, VexEncoding, EvexEncoding, - AVX512Encoding = EvexEncoding, - AVX10p2Encoding + PreAVX10v2Encoding = EvexEncoding, + AVX10v2Encoding } PreferredEncoding; class CodeGenerator : public CodeArray { @@ -3177,9 +3177,9 @@ public: #endif // set default encoding - // vnniEnc : control AVX512_VNNI (evex:default) or AVX-VNNI (vex) - // avx10Enc : control mpsadbw, AVX-VNNI-INT8 (vex:default) or AVX10.2 (AVX10p2Encoding) - void setDefaultEncoding(PreferredEncoding vnniEnc = EvexEncoding, PreferredEncoding avx10Enc = VexEncoding) + // vnniEnc : AVX512_VNNI (default:EvexEncoding) or AVX-VNNI (VexEncoding) + // avx10Enc : mpsadbw etc., AVX-VNNI-INT8/AVX512-FP16 (default:PreAVX10v2Encoding) or AVX10.2 (AVX10v2Encoding) + void setDefaultEncoding(PreferredEncoding vnniEnc = EvexEncoding, PreferredEncoding avx10Enc = PreAVX10v2Encoding) { defaultEncoding_[0] = vnniEnc; defaultEncoding_[1] = avx10Enc; } void bswap(const Reg32e& r) @@ -3194,7 +3194,8 @@ public: } db(0xC8 + (idx & 7)); } - void vmovd(const Operand& op1, const Operand& op2, PreferredEncoding encoding = DefaultEncoding) + // AVX10 zero-extending for vmovd, vmovw + void opAVX10ZeroExt(const Operand& op1, const Operand& op2, const uint64_t typeTbl[4], const int codeTbl[4], PreferredEncoding encoding, int bit) { const Operand *p1 = &op1; const Operand *p2 = &op2; @@ -3208,18 +3209,32 @@ public: std::swap(p1, p2); rev = !rev; } - if (getEncoding(encoding, 1) == AVX10p2Encoding) { - if ((p1->isXMM() || p1->isMEM()) && p2->isXMM()) { - opAVX_X_X_XM(*static_cast<const Xmm*>(p2), xm0, *p1, T_EVEX|(rev ? T_F3 : T_66)|T_MUST_EVEX|T_0F|T_EW0|T_N4, rev ? 0x7E : 0xD6); - return; - } + int sel = -1; + if (getEncoding(encoding, 1) == AVX10v2Encoding) { + if ((p1->isXMM() || p1->isMEM()) && p2->isXMM()) sel = 2 + int(rev); } else { - if ((p1->isREG(32) || p1->isMEM()) && p2->isXMM()) { - opAVX_X_X_XM(*static_cast<const Xmm*>(p2), xm0, *p1, T_EVEX|T_66|T_0F|T_W0|T_N4, rev ? 0x6E : 0x7E); - return; - } + if ((p1->isREG(bit) || p1->isMEM()) && p2->isXMM()) sel = int(rev); } - XBYAK_THROW(ERR_BAD_COMBINATION) + if (sel == -1) XBYAK_THROW(ERR_BAD_COMBINATION) + opAVX_X_X_XM(*static_cast<const Xmm*>(p2), xm0, *p1, typeTbl[sel], codeTbl[sel]); + } + void vmovd(const Operand& op1, const Operand& op2, PreferredEncoding encoding = DefaultEncoding) + { + const uint64_t typeTbl[] = { + T_EVEX|T_66|T_0F|T_W0|T_N4, T_EVEX|T_66|T_0F|T_W0|T_N4, // legacy, avx, avx512 + T_MUST_EVEX|T_66|T_0F|T_EW0|T_N4, T_MUST_EVEX|T_F3|T_0F|T_EW0|T_N4, // avx10.2 + }; + const int codeTbl[] = { 0x7E, 0x6E, 0xD6, 0x7E }; + opAVX10ZeroExt(op1, op2, typeTbl, codeTbl, encoding, 32); + } + void vmovw(const Operand& op1, const Operand& op2, PreferredEncoding encoding = DefaultEncoding) + { + const uint64_t typeTbl[] = { + T_MUST_EVEX|T_66|T_MAP5|T_N2, T_MUST_EVEX|T_66|T_MAP5|T_N2, // avx512-fp16 + T_MUST_EVEX|T_F3|T_MAP5|T_EW0|T_N2, T_MUST_EVEX|T_F3|T_MAP5|T_EW0|T_N2, // avx10.2 + }; + const int codeTbl[] = { 0x7E, 0x6E, 0x7E, 0x6E }; + opAVX10ZeroExt(op1, op2, typeTbl, codeTbl, encoding, 16|32|64); } /* use single byte nop if useMultiByteNop = false diff --git a/xbyak/xbyak_mnemonic.h b/xbyak/xbyak_mnemonic.h index cea4e61..314bb13 100644 --- a/xbyak/xbyak_mnemonic.h +++ b/xbyak/xbyak_mnemonic.h @@ -2422,9 +2422,6 @@ void vmovdqu8(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_F2|T_0F void vmovsh(const Address& addr, const Xmm& x) { opAVX_X_XM_IMM(x, addr, T_N2|T_F3|T_MAP5|T_EW0|T_MUST_EVEX|T_M_K, 0x11); } void vmovsh(const Xmm& x, const Address& addr) { opAVX_X_X_XM(x, xm0, addr, T_N2|T_F3|T_MAP5|T_EW0|T_MUST_EVEX, 0x10); } void vmovsh(const Xmm& x1, const Xmm& x2, const Xmm& x3) { opAVX_X_X_XM(x1, x2, x3, T_N2|T_F3|T_MAP5|T_EW0|T_MUST_EVEX, 0x10); } -void vmovw(const Address& addr, const Xmm& x) { opAVX_X_XM_IMM(x, addr, T_N2|T_66|T_MAP5|T_MUST_EVEX, 0x7E); } -void vmovw(const Reg32e& r, const Xmm& x) { opAVX_X_X_XM(x, xm0, r, T_N2|T_66|T_MAP5|T_MUST_EVEX, 0x7E); } -void vmovw(const Xmm& x, const Operand& op) { if (!op.isREG(32|64) && !op.isMEM()) XBYAK_THROW(ERR_BAD_COMBINATION) opAVX_X_X_XM(x, xm0, op, T_N2|T_66|T_MAP5|T_MUST_EVEX, 0x6E); } void vmpsadbw(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm, PreferredEncoding encoding = DefaultEncoding) { opEncoding(x1, x2, op, T_0F3A|T_YMM, 0x42, encoding, imm, T_66|T_W0|T_YMM, T_F3|T_0F3A|T_EW0|T_B32, 1); } void vmulnepbf16(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_MAP5|T_EW0|T_YMM|T_MUST_EVEX|T_B16, 0x59); } void vmulph(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_MAP5 | T_EW0 | T_YMM | T_MUST_EVEX | T_ER_Z | T_B16, 0x59); } |