diff options
-rw-r--r-- | gen/gen_avx512.cpp | 5 | ||||
-rw-r--r-- | test/misc.cpp | 19 | ||||
-rw-r--r-- | xbyak/xbyak.h | 13 | ||||
-rw-r--r-- | xbyak/xbyak_mnemonic.h | 1 |
4 files changed, 36 insertions, 2 deletions
diff --git a/gen/gen_avx512.cpp b/gen/gen_avx512.cpp index 86b0b2d..b0af54e 100644 --- a/gen/gen_avx512.cpp +++ b/gen/gen_avx512.cpp @@ -561,6 +561,8 @@ void putCvt() { 0x7B, "vcvtph2qq", T_66 | T_MAP5 | T_MUST_EVEX | T_YMM | T_EW0 | T_B16 | T_N4 | T_N_VL | T_ER_X, 3 }, { 0x79, "vcvtph2uqq", T_66 | T_MAP5 | T_MUST_EVEX | T_YMM | T_EW0 | T_B16 | T_N4 | T_N_VL | T_ER_X, 3 }, { 0x78, "vcvttph2uqq", T_66 | T_MAP5 | T_MUST_EVEX | T_YMM | T_EW0 | T_B16 | T_N4 | T_N_VL | T_SAE_X, 3 }, + + { 0x5B, "vcvtdq2ph", T_MAP5 | T_YMM | T_MUST_EVEX | T_EW0 | T_B32 | T_ER_Z | T_N16 | T_N_VL, 4 }, }; for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) { const Tbl& p = tbl[i]; @@ -578,6 +580,9 @@ void putCvt() case 3: printf("void %s(const Xmm& x, const Operand& op) { if (!op.isXMM() && !op.isMEM()) XBYAK_THROW(ERR_BAD_MEM_SIZE) opVex(x, 0, op, %s, 0x%02X); }\n", p.name, type.c_str(), p.code); break; + case 4: + printf("void %s(const Xmm& x, const Operand& op) { checkCvt4(x, op); opCvt(x, op, %s, 0x%02X); }\n", p.name, type.c_str(), p.code); + break; } } puts("void vcvtusi2sd(const Xmm& x1, const Xmm& x2, const Operand& op) { opCvt3(x1, x2, op, T_F2 | T_0F | T_MUST_EVEX, T_W1 | T_EW1 | T_ER_X | T_N8, T_W0 | T_EW0 | T_N4, 0x7B); }"); diff --git a/test/misc.cpp b/test/misc.cpp index b70f49c..7b23dbd 100644 --- a/test/misc.cpp +++ b/test/misc.cpp @@ -1170,6 +1170,15 @@ CYBOZU_TEST_AUTO(vaddph) vcvttph2uqq(zmm1|k5|T_z, ptr [rax+0x40]); vcvttph2uqq(zmm1|k5|T_z, ptr_b [rax+0x40]); + vcvtdq2ph(xmm1, xmm5); + vcvtdq2ph(xmm1, xword [rax+0x40]); + vcvtdq2ph(xmm1, xword_b [rax+0x40]); + vcvtdq2ph(xmm1, yword [rax+0x40]); + vcvtdq2ph(xmm1, yword_b [rax+0x40]); + vcvtdq2ph(ymm1|k2|T_z|T_rd_sae, zmm5); + vcvtdq2ph(ymm1, ptr [rax+0x40]); + vcvtdq2ph(ymm1, ptr_b [rax+0x40]); + } } c; const uint8_t tbl[] = { @@ -1525,6 +1534,16 @@ CYBOZU_TEST_AUTO(vaddph) 0x62, 0xf5, 0x7d, 0x9d, 0x78, 0xcb, 0x62, 0xf5, 0x7d, 0xcd, 0x78, 0x48, 0x04, 0x62, 0xf5, 0x7d, 0xdd, 0x78, 0x48, 0x20, + + // vcvtdq2ph + 0x62, 0xf5, 0x7c, 0x08, 0x5b, 0xcd, + 0x62, 0xf5, 0x7c, 0x08, 0x5b, 0x48, 0x04, + 0x62, 0xf5, 0x7c, 0x18, 0x5b, 0x48, 0x10, + 0x62, 0xf5, 0x7c, 0x28, 0x5b, 0x48, 0x02, + 0x62, 0xf5, 0x7c, 0x38, 0x5b, 0x48, 0x10, + 0x62, 0xf5, 0x7c, 0xba, 0x5b, 0xcd, + 0x62, 0xf5, 0x7c, 0x48, 0x5b, 0x48, 0x01, + 0x62, 0xf5, 0x7c, 0x58, 0x5b, 0x48, 0x10, }; const size_t n = sizeof(tbl) / sizeof(tbl[0]); CYBOZU_TEST_EQUAL(c.getSize(), n); diff --git a/xbyak/xbyak.h b/xbyak/xbyak.h index 70f7bac..63194e9 100644 --- a/xbyak/xbyak.h +++ b/xbyak/xbyak.h @@ -2233,12 +2233,16 @@ private: { if (!(x.isXMM() && op.is(Operand::XMM | Operand::YMM | Operand::MEM)) && !(x.isYMM() && op.is(Operand::ZMM | Operand::MEM))) XBYAK_THROW(ERR_BAD_COMBINATION) } - void opCvt2(const Xmm& x, const Operand& op, int type, int code) + void opCvt(const Xmm& x, const Operand& op, int type, int code) { - checkCvt2(x, op); Operand::Kind kind = x.isXMM() ? (op.isBit(256) ? Operand::YMM : Operand::XMM) : Operand::ZMM; opVex(x.copyAndSetKind(kind), &xm0, op, type, code); } + void opCvt2(const Xmm& x, const Operand& op, int type, int code) + { + checkCvt2(x, op); + opCvt(x, op, type, code); + } void opCvt3(const Xmm& x1, const Xmm& x2, const Operand& op, int type, int type64, int type32, uint8_t code) { if (!(x1.isXMM() && x2.isXMM() && (op.isREG(i32e) || op.isMEM()))) XBYAK_THROW(ERR_BAD_SIZE_OF_REGISTER) @@ -2246,6 +2250,11 @@ private: const Operand *p = op.isREG() ? &x : &op; opVex(x1, &x2, *p, type | (op.isBit(64) ? type64 : type32), code); } + // (x, x/y/xword/yword), (y, z/m) + void checkCvt4(const Xmm& x, const Operand& op) const + { + if (!(x.isXMM() && op.is(Operand::XMM | Operand::YMM | Operand::MEM) && op.isBit(128|256)) && !(x.isYMM() && op.is(Operand::ZMM | Operand::MEM))) XBYAK_THROW(ERR_BAD_COMBINATION) + } const Xmm& cvtIdx0(const Operand& x) const { return x.isZMM() ? zm0 : x.isYMM() ? ym0 : xm0; diff --git a/xbyak/xbyak_mnemonic.h b/xbyak/xbyak_mnemonic.h index 3389f5a..31924ee 100644 --- a/xbyak/xbyak_mnemonic.h +++ b/xbyak/xbyak_mnemonic.h @@ -1896,6 +1896,7 @@ void vcompressb(const Operand& op, const Xmm& x) { opAVX_X_XM_IMM(x, op, T_N1 | void vcompresspd(const Operand& op, const Xmm& x) { opAVX_X_XM_IMM(x, op, T_N8 | T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX, 0x8A); } void vcompressps(const Operand& op, const Xmm& x) { opAVX_X_XM_IMM(x, op, T_N4 | T_66 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX, 0x8A); } void vcompressw(const Operand& op, const Xmm& x) { opAVX_X_XM_IMM(x, op, T_N2 | T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX, 0x63); } +void vcvtdq2ph(const Xmm& x, const Operand& op) { checkCvt4(x, op); opCvt(x, op, T_N16 | T_N_VL | T_MAP5 | T_EW0 | T_YMM | T_ER_Z | T_MUST_EVEX | T_B32, 0x5B); } void vcvtne2ps2bf16(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_F2 | T_0F38 | T_EW0 | T_YMM | T_SAE_Z | T_MUST_EVEX | T_B32, 0x72); } void vcvtneps2bf16(const Xmm& x, const Operand& op) { opCvt2(x, op, T_F3 | T_0F38 | T_EW0 | T_YMM | T_SAE_Z | T_MUST_EVEX | T_B32, 0x72); } void vcvtpd2qq(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_66 | T_0F | T_EW1 | T_YMM | T_ER_Z | T_MUST_EVEX | T_B64, 0x7B); } |