diff options
-rw-r--r-- | gen/gen_avx512.cpp | 1 | ||||
-rw-r--r-- | test/misc.cpp | 19 | ||||
-rw-r--r-- | xbyak/xbyak_mnemonic.h | 1 |
3 files changed, 21 insertions, 0 deletions
diff --git a/gen/gen_avx512.cpp b/gen/gen_avx512.cpp index 1cd9546..4d26599 100644 --- a/gen/gen_avx512.cpp +++ b/gen/gen_avx512.cpp @@ -205,6 +205,7 @@ void putX_XM() { 0x7D, "vcvtph2w", T_66 | T_MAP5 | T_MUST_EVEX | T_YMM | T_EW0 | T_B16 | T_ER_Z }, { 0x7C, "vcvttph2uw", T_MAP5 | T_MUST_EVEX | T_YMM | T_EW0 | T_B16 | T_SAE_Z }, { 0x7C, "vcvttph2w", T_66 | T_MAP5 | T_MUST_EVEX | T_YMM | T_EW0 | T_B16 | T_SAE_Z }, + { 0x7D, "vcvtuw2ph", T_F2 | T_MAP5 | T_MUST_EVEX | T_YMM | T_EW0 | T_B16 | T_ER_Z }, }; for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) { const Tbl *p = &tbl[i]; diff --git a/test/misc.cpp b/test/misc.cpp index 1497e58..fde0e6b 100644 --- a/test/misc.cpp +++ b/test/misc.cpp @@ -1262,6 +1262,15 @@ CYBOZU_TEST_AUTO(vaddph) vcvttph2w(zmm1|k2|T_z|T_sae, zmm5); vcvttph2w(zmm1, ptr [rax+0x40]); vcvttph2w(zmm1, ptr_b [rax+0x40]); + + vcvtuw2ph(xmm1, xmm5); + vcvtuw2ph(xmm1, ptr [rax+0x40]); + vcvtuw2ph(xmm1, ptr_b [rax+0x40]); + vcvtuw2ph(ymm1, ptr [rax+0x40]); + vcvtuw2ph(ymm1, ptr_b [rax+0x40]); + vcvtuw2ph(zmm1|k2|T_z|T_rd_sae, zmm5); + vcvtuw2ph(zmm1, ptr [rax+0x40]); + vcvtuw2ph(zmm1, ptr_b [rax+0x40]); } } c; const uint8_t tbl[] = { @@ -1720,6 +1729,16 @@ CYBOZU_TEST_AUTO(vaddph) 0x62, 0xf5, 0x7d, 0x9a, 0x7c, 0xcd, 0x62, 0xf5, 0x7d, 0x48, 0x7c, 0x48, 0x01, 0x62, 0xf5, 0x7d, 0x58, 0x7c, 0x48, 0x20, + + // vcvtuw2ph + 0x62, 0xf5, 0x7f, 0x08, 0x7d, 0xcd, + 0x62, 0xf5, 0x7f, 0x08, 0x7d, 0x48, 0x04, + 0x62, 0xf5, 0x7f, 0x18, 0x7d, 0x48, 0x20, + 0x62, 0xf5, 0x7f, 0x28, 0x7d, 0x48, 0x02, + 0x62, 0xf5, 0x7f, 0x38, 0x7d, 0x48, 0x20, + 0x62, 0xf5, 0x7f, 0xba, 0x7d, 0xcd, + 0x62, 0xf5, 0x7f, 0x48, 0x7d, 0x48, 0x01, + 0x62, 0xf5, 0x7f, 0x58, 0x7d, 0x48, 0x20, }; const size_t n = sizeof(tbl) / sizeof(tbl[0]); CYBOZU_TEST_EQUAL(c.getSize(), n); diff --git a/xbyak/xbyak_mnemonic.h b/xbyak/xbyak_mnemonic.h index bdcd2fd..fa76e30 100644 --- a/xbyak/xbyak_mnemonic.h +++ b/xbyak/xbyak_mnemonic.h @@ -1946,6 +1946,7 @@ void vcvtuqq2ph(const Xmm& x, const Operand& op) { opCvt5(x, op, T_N16 | T_N_VL void vcvtuqq2ps(const Xmm& x, const Operand& op) { opCvt2(x, op, T_F2 | T_0F | T_EW1 | T_YMM | T_ER_Z | T_MUST_EVEX | T_B64, 0x7A); } void vcvtusi2sd(const Xmm& x1, const Xmm& x2, const Operand& op) { opCvt3(x1, x2, op, T_F2 | T_0F | T_MUST_EVEX, T_W1 | T_EW1 | T_ER_X | T_N8, T_W0 | T_EW0 | T_N4, 0x7B); } void vcvtusi2ss(const Xmm& x1, const Xmm& x2, const Operand& op) { opCvt3(x1, x2, op, T_F3 | T_0F | T_MUST_EVEX | T_ER_X, T_W1 | T_EW1 | T_N8, T_W0 | T_EW0 | T_N4, 0x7B); } +void vcvtuw2ph(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_F2 | T_MAP5 | T_EW0 | T_YMM | T_ER_Z | T_MUST_EVEX | T_B16, 0x7D); } void vdbpsadbw(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F3A | T_EW0 | T_YMM | T_MUST_EVEX, 0x42, imm); } void vdivph(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_MAP5 | T_EW0 | T_YMM | T_MUST_EVEX | T_ER_Z | T_B16, 0x5E); } void vdivsh(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_MAP5 | T_F3 | T_EW0 | T_MUST_EVEX | T_ER_X | T_N2, 0x5E); } |