diff options
author | MITSUNARI Shigeo <[email protected]> | 2024-10-13 14:55:20 +0900 |
---|---|---|
committer | MITSUNARI Shigeo <[email protected]> | 2024-10-13 14:55:20 +0900 |
commit | 14ae9bf4859739ca9b23f421d23693a15e75769d (patch) | |
tree | b24ebb716d4e158ca6c2d016e602096f6a82d3d3 | |
parent | 2818beeffd198dae543019347360252d0ea7b78f (diff) | |
download | xbyak-14ae9bf4859739ca9b23f421d23693a15e75769d.tar.gz xbyak-14ae9bf4859739ca9b23f421d23693a15e75769d.zip |
add vpdpbssd for avx10.2
-rw-r--r-- | gen/gen_avx512.cpp | 33 | ||||
-rw-r--r-- | gen/gen_code.cpp | 2 | ||||
-rw-r--r-- | test/avx10/misc.txt | 12 | ||||
-rw-r--r-- | xbyak/xbyak_mnemonic.h | 4 |
4 files changed, 47 insertions, 4 deletions
diff --git a/gen/gen_avx512.cpp b/gen/gen_avx512.cpp index 9840844..9159a64 100644 --- a/gen/gen_avx512.cpp +++ b/gen/gen_avx512.cpp @@ -454,7 +454,37 @@ void putX_X_XM_IMM() printf("void %s(const Xmm& x1, const Xmm& x2, const Operand& op%s) { opAVX_X_X_XM(x1, x2, op, %s, 0x%02X%s); }\n" , p->name, p->hasIMM ? ", uint8_t imm" : "", s.c_str(), p->code, p->hasIMM ? ", imm" : ""); } - puts("void vmpsadbw(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm, PreferredEncoding encoding = DefaultEncoding) { opEncoding(x1, x2, op, T_0F3A | T_YMM, 0x42, encoding, imm, T_66 | T_W0 | T_YMM, T_F3 | T_0F3A | T_EW0 | T_B32, 1); }"); +} + +void putX_X_XM_IMM_AVX10() +{ + const struct Tbl { + uint8_t code; + const char *name; + uint64_t type; + uint64_t typeVex; + uint64_t typeEvex; + int sel; + bool hasIMM; + } tbl[] = { + { 0x50, "vpdpbssd", T_F2|T_0F38|T_YMM, T_W0, T_EW0|T_B32, 1, false }, +#if 0 + { 0x51, "vpdpbssds", T_MUST_EVEX | T_YMM | T_F2 | T_0F38 | T_EW0 | T_B32, false }, + { 0x50, "vpdpbsud", T_MUST_EVEX | T_YMM | T_F3 | T_0F38 | T_EW0 | T_B32, false }, + { 0x51, "vpdpbsuds", T_MUST_EVEX | T_YMM | T_F3 | T_0F38 | T_EW0 | T_B32, false }, + { 0x50, "vpdpbuud", T_MUST_EVEX | T_YMM | T_0F38 | T_EW0 | T_B32, false }, + { 0x51, "vpdpbuuds", T_MUST_EVEX | T_YMM | T_0F38 | T_EW0 | T_B32, false }, +#endif + { 0x42, "vmpsadbw", T_0F3A|T_YMM, T_66|T_W0|T_YMM, T_F3|T_0F3A|T_EW0|T_B32, 1, true }, + }; + for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) { + const Tbl *p = &tbl[i]; + std::string s = type2String(p->type); + std::string sVex = type2String(p->typeVex); + std::string sEvex = type2String(p->typeEvex); + printf("void %s(const Xmm& x1, const Xmm& x2, const Operand& op%s, PreferredEncoding encoding = DefaultEncoding) { opEncoding(x1, x2, op, %s, 0x%02X, encoding, %s, %s, %s, %d); }\n" + , p->name, p->hasIMM ? ", uint8_t imm" : "", s.c_str(), p->code, p->hasIMM ? "imm" : "NONE", sVex.c_str(), sEvex.c_str(), p->sel); + } } void putShift() @@ -1059,6 +1089,7 @@ int main(int argc, char *[]) putM_X(); putXM_X(); putX_X_XM_IMM(); + putX_X_XM_IMM_AVX10(); putShift(); putExtractInsert(); putCvt(); diff --git a/gen/gen_code.cpp b/gen/gen_code.cpp index 58c176a..caa9e79 100644 --- a/gen/gen_code.cpp +++ b/gen/gen_code.cpp @@ -1907,7 +1907,7 @@ void put() const char *name; uint64_t type; } tbl[] = { - { 0x50, "vpdpbssd", T_F2 | T_0F38 | T_W0 | T_YMM }, +// { 0x50, "vpdpbssd", T_F2 | T_0F38 | T_W0 | T_YMM }, { 0x51, "vpdpbssds", T_F2 | T_0F38 | T_W0 | T_YMM }, { 0x50, "vpdpbsud", T_F3 | T_0F38 | T_W0 | T_YMM }, { 0x51, "vpdpbsuds", T_F3 | T_0F38 | T_W0 | T_YMM }, diff --git a/test/avx10/misc.txt b/test/avx10/misc.txt index 5c39e81..8993107 100644 --- a/test/avx10/misc.txt +++ b/test/avx10/misc.txt @@ -18,3 +18,15 @@ vmpsadbw(ym1, ym4, ptr[rax+128], 5); vmpsadbw(zm1|k4, zm3, zm15, 3); vmpsadbw(zm1, zm4, ptr[rax+128], 5); + +vpdpbssd(xm1, xm2, xm3); +vpdpbssd(xm1, xm2, ptr[rax+128]); +vpdpbssd(xm1, xm2, ptr_b[rax+128]); + +vpdpbssd(ym1, ym2, ym3); +vpdpbssd(ym1, ym2, ptr[rax+128]); +vpdpbssd(ym1, ym2, ptr_b[rax+128]); + +vpdpbssd(zm1, zm2, zm3); +vpdpbssd(zm1, zm2, ptr[rax+128]); +vpdpbssd(zm1, zm2, ptr_b[rax+128]); diff --git a/xbyak/xbyak_mnemonic.h b/xbyak/xbyak_mnemonic.h index 8515e41..dbe52e9 100644 --- a/xbyak/xbyak_mnemonic.h +++ b/xbyak/xbyak_mnemonic.h @@ -1419,7 +1419,6 @@ void vpcmpgtq(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1 void vpcmpgtw(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F|T_YMM, 0x65); } void vpcmpistri(const Xmm& xm, const Operand& op, uint8_t imm) { opAVX_X_XM_IMM(xm, op, T_66|T_0F3A, 0x63, imm); } void vpcmpistrm(const Xmm& xm, const Operand& op, uint8_t imm) { opAVX_X_XM_IMM(xm, op, T_66|T_0F3A, 0x62, imm); } -void vpdpbssd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_F2|T_0F38|T_W0|T_YMM, 0x50); } void vpdpbssds(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_F2|T_0F38|T_W0|T_YMM, 0x51); } void vpdpbsud(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_F3|T_0F38|T_W0|T_YMM, 0x50); } void vpdpbsuds(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_F3|T_0F38|T_W0|T_YMM, 0x51); } @@ -2407,7 +2406,7 @@ void vmovsh(const Xmm& x1, const Xmm& x2, const Xmm& x3) { opAVX_X_X_XM(x1, x2, void vmovw(const Address& addr, const Xmm& x) { opAVX_X_XM_IMM(x, addr, T_N2|T_66|T_MAP5|T_MUST_EVEX, 0x7E); } void vmovw(const Reg32e& r, const Xmm& x) { opAVX_X_X_XM(x, xm0, r, T_N2|T_66|T_MAP5|T_MUST_EVEX, 0x7E); } void vmovw(const Xmm& x, const Operand& op) { if (!op.isREG(32|64) && !op.isMEM()) XBYAK_THROW(ERR_BAD_COMBINATION) opAVX_X_X_XM(x, xm0, op, T_N2|T_66|T_MAP5|T_MUST_EVEX, 0x6E); } -void vmpsadbw(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm, PreferredEncoding encoding = DefaultEncoding) { opEncoding(x1, x2, op, T_0F3A | T_YMM, 0x42, encoding, imm, T_66 | T_W0 | T_YMM, T_F3 | T_0F3A | T_EW0 | T_B32, 1); } +void vmpsadbw(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm, PreferredEncoding encoding = DefaultEncoding) { opEncoding(x1, x2, op, T_0F3A|T_YMM, 0x42, encoding, imm, T_66|T_W0|T_YMM, T_F3|T_0F3A|T_EW0|T_B32, 1); } void vmulnepbf16(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_MAP5|T_EW0|T_YMM|T_MUST_EVEX|T_B16, 0x59); } void vmulph(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_MAP5 | T_EW0 | T_YMM | T_MUST_EVEX | T_ER_Z | T_B16, 0x59); } void vmulsh(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_MAP5 | T_F3 | T_EW0 | T_MUST_EVEX | T_ER_X | T_N2, 0x59); } @@ -2451,6 +2450,7 @@ void vpcompressq(const Operand& op, const Xmm& x) { opAVX_X_XM_IMM(x, op, T_N8|T void vpcompressw(const Operand& op, const Xmm& x) { opAVX_X_XM_IMM(x, op, T_N2|T_66|T_0F38|T_EW1|T_YMM|T_MUST_EVEX, 0x63); } void vpconflictd(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_66|T_0F38|T_EW0|T_YMM|T_MUST_EVEX|T_B32, 0xC4); } void vpconflictq(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_66|T_0F38|T_EW1|T_YMM|T_MUST_EVEX|T_B64, 0xC4); } +void vpdpbssd(const Xmm& x1, const Xmm& x2, const Operand& op, PreferredEncoding encoding = DefaultEncoding) { opEncoding(x1, x2, op, T_F2|T_0F38|T_YMM, 0x50, encoding, NONE, T_W0, T_EW0|T_B32, 1); } void vpermb(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F38|T_EW0|T_YMM|T_MUST_EVEX, 0x8D); } void vpermi2b(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F38|T_EW0|T_YMM|T_MUST_EVEX, 0x75); } void vpermi2d(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F38|T_EW0|T_YMM|T_MUST_EVEX|T_B32, 0x76); } |