diff options
author | MITSUNARI Shigeo <[email protected]> | 2021-09-14 09:31:53 +0900 |
---|---|---|
committer | MITSUNARI Shigeo <[email protected]> | 2021-09-14 09:31:53 +0900 |
commit | 34abda5c5f330358764eb86465941c26d6ed1030 (patch) | |
tree | a34736bfb38ddd6101497394529fcff4bfc999fa | |
parent | facd622b2877b8169c5f0367a70745362a858958 (diff) | |
download | xbyak-34abda5c5f330358764eb86465941c26d6ed1030.tar.gz xbyak-34abda5c5f330358764eb86465941c26d6ed1030.zip |
extend vcvtps2ph
-rw-r--r-- | gen/gen_code.cpp | 2 | ||||
-rw-r--r-- | test/misc.cpp | 24 | ||||
-rw-r--r-- | xbyak/xbyak_mnemonic.h | 2 |
3 files changed, 26 insertions, 2 deletions
diff --git a/gen/gen_code.cpp b/gen/gen_code.cpp index db5295c..19022e6 100644 --- a/gen/gen_code.cpp +++ b/gen/gen_code.cpp @@ -1652,7 +1652,7 @@ void put() puts("void vcvttpd2dq(const Xmm& x, const Operand& op) { opCvt2(x, op, T_66 | T_0F | T_YMM | T_EVEX |T_EW1 | T_B64 | T_ER_Z, 0xE6); }"); puts("void vcvtph2ps(const Xmm& x, const Operand& op) { checkCvt1(x, op); opVex(x, 0, op, T_0F38 | T_66 | T_W0 | T_EVEX | T_EW0 | T_N8 | T_N_VL | T_SAE_Y, 0x13); }"); - puts("void vcvtps2ph(const Operand& op, const Xmm& x, uint8_t imm) { checkCvt1(x, op); opVex(x, 0, op, T_0F3A | T_66 | T_W0 | T_EVEX | T_EW0 | T_N8 | T_N_VL | T_SAE_Y, 0x1D, imm); }"); + puts("void vcvtps2ph(const Operand& op, const Xmm& x, uint8_t imm) { checkCvt1(x, op); opVex(x, 0, op, T_0F3A | T_66 | T_W0 | T_EVEX | T_EW0 | T_N8 | T_N_VL | T_SAE_Y | T_M_K, 0x1D, imm); }"); } // haswell gpr(reg, reg, r/m) diff --git a/test/misc.cpp b/test/misc.cpp index 3d8cd43..da50d4f 100644 --- a/test/misc.cpp +++ b/test/misc.cpp @@ -1280,6 +1280,18 @@ CYBOZU_TEST_AUTO(vaddph) vcvtw2ph(zmm1|k2|T_z|T_rd_sae, zmm5); vcvtw2ph(zmm1, ptr [rax+0x40]); vcvtw2ph(zmm1, ptr_b [rax+0x40]); + + vcvtps2ph(xmm1, xmm2, 0x1); + vcvtps2ph(ptr [rax+0x40], xmm2, 0x2); + vcvtps2ph(xmm1, ymm2, 0x3); + vcvtps2ph(ptr [rax+0x40], ymm2, 0x4); + vcvtps2ph(xmm1|k1|T_z, xmm2, 0x5); + vcvtps2ph(ptr [rax+0x40]|k1, xmm3, 0x6); + vcvtps2ph(xmm1|k2, ymm4, 0x7); + vcvtps2ph(ptr [rax+0x40]|k2, ymm5, 0x8); + vcvtps2ph(ymm1|k2|T_sae, zmm5, 0x9); + vcvtps2ph(ptr [rax+0x40]|k5, zmm4, 0xa); + } } c; const uint8_t tbl[] = { @@ -1758,6 +1770,18 @@ CYBOZU_TEST_AUTO(vaddph) 0x62, 0xf5, 0x7e, 0xba, 0x7d, 0xcd, 0x62, 0xf5, 0x7e, 0x48, 0x7d, 0x48, 0x01, 0x62, 0xf5, 0x7e, 0x58, 0x7d, 0x48, 0x20, + + // vcvtps2ph + 0xc4, 0xe3, 0x79, 0x1d, 0xd1, 0x01, + 0xc4, 0xe3, 0x79, 0x1d, 0x50, 0x40, 0x02, + 0xc4, 0xe3, 0x7d, 0x1d, 0xd1, 0x03, + 0xc4, 0xe3, 0x7d, 0x1d, 0x50, 0x40, 0x04, + 0x62, 0xf3, 0x7d, 0x89, 0x1d, 0xd1, 0x05, + 0x62, 0xf3, 0x7d, 0x09, 0x1d, 0x58, 0x08, 0x06, + 0x62, 0xf3, 0x7d, 0x2a, 0x1d, 0xe1, 0x07, + 0x62, 0xf3, 0x7d, 0x2a, 0x1d, 0x68, 0x04, 0x08, + 0x62, 0xf3, 0x7d, 0x1a, 0x1d, 0xe9, 0x09, + 0x62, 0xf3, 0x7d, 0x4d, 0x1d, 0x60, 0x02, 0x0a, }; const size_t n = sizeof(tbl) / sizeof(tbl[0]); CYBOZU_TEST_EQUAL(c.getSize(), n); diff --git a/xbyak/xbyak_mnemonic.h b/xbyak/xbyak_mnemonic.h index b9e1175..4e96396 100644 --- a/xbyak/xbyak_mnemonic.h +++ b/xbyak/xbyak_mnemonic.h @@ -984,7 +984,7 @@ void vcvtpd2ps(const Xmm& x, const Operand& op) { opCvt2(x, op, T_0F | T_66 | T_ void vcvtph2ps(const Xmm& x, const Operand& op) { checkCvt1(x, op); opVex(x, 0, op, T_0F38 | T_66 | T_W0 | T_EVEX | T_EW0 | T_N8 | T_N_VL | T_SAE_Y, 0x13); } void vcvtps2dq(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, T_66 | T_0F | T_EW0 | T_YMM | T_EVEX | T_ER_Z | T_B32, 0x5B); } void vcvtps2pd(const Xmm& x, const Operand& op) { checkCvt1(x, op); opVex(x, 0, op, T_0F | T_YMM | T_EVEX | T_EW0 | T_B32 | T_N8 | T_N_VL | T_SAE_Y, 0x5A); } -void vcvtps2ph(const Operand& op, const Xmm& x, uint8_t imm) { checkCvt1(x, op); opVex(x, 0, op, T_0F3A | T_66 | T_W0 | T_EVEX | T_EW0 | T_N8 | T_N_VL | T_SAE_Y, 0x1D, imm); } +void vcvtps2ph(const Operand& op, const Xmm& x, uint8_t imm) { checkCvt1(x, op); opVex(x, 0, op, T_0F3A | T_66 | T_W0 | T_EVEX | T_EW0 | T_N8 | T_N_VL | T_SAE_Y | T_M_K, 0x1D, imm); } void vcvtsd2si(const Reg32& r, const Operand& op) { opAVX_X_X_XM(Xmm(r.getIdx()), xm0, op, T_0F | T_F2 | T_W0 | T_EVEX | T_EW0 | T_N4 | T_ER_X, 0x2D); } void vcvtsd2ss(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N8 | T_F2 | T_0F | T_EW1 | T_EVEX | T_ER_X, 0x5A); } void vcvtsi2sd(const Xmm& x1, const Xmm& x2, const Operand& op) { opCvt3(x1, x2, op, T_0F | T_F2 | T_EVEX, T_W1 | T_EW1 | T_ER_X | T_N8, T_W0 | T_EW0 | T_N4, 0x2A); } |