aboutsummaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
authorMITSUNARI Shigeo <[email protected]>2021-09-14 09:31:53 +0900
committerMITSUNARI Shigeo <[email protected]>2021-09-14 09:31:53 +0900
commit34abda5c5f330358764eb86465941c26d6ed1030 (patch)
treea34736bfb38ddd6101497394529fcff4bfc999fa
parentfacd622b2877b8169c5f0367a70745362a858958 (diff)
downloadxbyak-34abda5c5f330358764eb86465941c26d6ed1030.tar.gz
xbyak-34abda5c5f330358764eb86465941c26d6ed1030.zip
extend vcvtps2ph
-rw-r--r--gen/gen_code.cpp2
-rw-r--r--test/misc.cpp24
-rw-r--r--xbyak/xbyak_mnemonic.h2
3 files changed, 26 insertions, 2 deletions
diff --git a/gen/gen_code.cpp b/gen/gen_code.cpp
index db5295c..19022e6 100644
--- a/gen/gen_code.cpp
+++ b/gen/gen_code.cpp
@@ -1652,7 +1652,7 @@ void put()
puts("void vcvttpd2dq(const Xmm& x, const Operand& op) { opCvt2(x, op, T_66 | T_0F | T_YMM | T_EVEX |T_EW1 | T_B64 | T_ER_Z, 0xE6); }");
puts("void vcvtph2ps(const Xmm& x, const Operand& op) { checkCvt1(x, op); opVex(x, 0, op, T_0F38 | T_66 | T_W0 | T_EVEX | T_EW0 | T_N8 | T_N_VL | T_SAE_Y, 0x13); }");
- puts("void vcvtps2ph(const Operand& op, const Xmm& x, uint8_t imm) { checkCvt1(x, op); opVex(x, 0, op, T_0F3A | T_66 | T_W0 | T_EVEX | T_EW0 | T_N8 | T_N_VL | T_SAE_Y, 0x1D, imm); }");
+ puts("void vcvtps2ph(const Operand& op, const Xmm& x, uint8_t imm) { checkCvt1(x, op); opVex(x, 0, op, T_0F3A | T_66 | T_W0 | T_EVEX | T_EW0 | T_N8 | T_N_VL | T_SAE_Y | T_M_K, 0x1D, imm); }");
}
// haswell gpr(reg, reg, r/m)
diff --git a/test/misc.cpp b/test/misc.cpp
index 3d8cd43..da50d4f 100644
--- a/test/misc.cpp
+++ b/test/misc.cpp
@@ -1280,6 +1280,18 @@ CYBOZU_TEST_AUTO(vaddph)
vcvtw2ph(zmm1|k2|T_z|T_rd_sae, zmm5);
vcvtw2ph(zmm1, ptr [rax+0x40]);
vcvtw2ph(zmm1, ptr_b [rax+0x40]);
+
+ vcvtps2ph(xmm1, xmm2, 0x1);
+ vcvtps2ph(ptr [rax+0x40], xmm2, 0x2);
+ vcvtps2ph(xmm1, ymm2, 0x3);
+ vcvtps2ph(ptr [rax+0x40], ymm2, 0x4);
+ vcvtps2ph(xmm1|k1|T_z, xmm2, 0x5);
+ vcvtps2ph(ptr [rax+0x40]|k1, xmm3, 0x6);
+ vcvtps2ph(xmm1|k2, ymm4, 0x7);
+ vcvtps2ph(ptr [rax+0x40]|k2, ymm5, 0x8);
+ vcvtps2ph(ymm1|k2|T_sae, zmm5, 0x9);
+ vcvtps2ph(ptr [rax+0x40]|k5, zmm4, 0xa);
+
}
} c;
const uint8_t tbl[] = {
@@ -1758,6 +1770,18 @@ CYBOZU_TEST_AUTO(vaddph)
0x62, 0xf5, 0x7e, 0xba, 0x7d, 0xcd,
0x62, 0xf5, 0x7e, 0x48, 0x7d, 0x48, 0x01,
0x62, 0xf5, 0x7e, 0x58, 0x7d, 0x48, 0x20,
+
+ // vcvtps2ph
+ 0xc4, 0xe3, 0x79, 0x1d, 0xd1, 0x01,
+ 0xc4, 0xe3, 0x79, 0x1d, 0x50, 0x40, 0x02,
+ 0xc4, 0xe3, 0x7d, 0x1d, 0xd1, 0x03,
+ 0xc4, 0xe3, 0x7d, 0x1d, 0x50, 0x40, 0x04,
+ 0x62, 0xf3, 0x7d, 0x89, 0x1d, 0xd1, 0x05,
+ 0x62, 0xf3, 0x7d, 0x09, 0x1d, 0x58, 0x08, 0x06,
+ 0x62, 0xf3, 0x7d, 0x2a, 0x1d, 0xe1, 0x07,
+ 0x62, 0xf3, 0x7d, 0x2a, 0x1d, 0x68, 0x04, 0x08,
+ 0x62, 0xf3, 0x7d, 0x1a, 0x1d, 0xe9, 0x09,
+ 0x62, 0xf3, 0x7d, 0x4d, 0x1d, 0x60, 0x02, 0x0a,
};
const size_t n = sizeof(tbl) / sizeof(tbl[0]);
CYBOZU_TEST_EQUAL(c.getSize(), n);
diff --git a/xbyak/xbyak_mnemonic.h b/xbyak/xbyak_mnemonic.h
index b9e1175..4e96396 100644
--- a/xbyak/xbyak_mnemonic.h
+++ b/xbyak/xbyak_mnemonic.h
@@ -984,7 +984,7 @@ void vcvtpd2ps(const Xmm& x, const Operand& op) { opCvt2(x, op, T_0F | T_66 | T_
void vcvtph2ps(const Xmm& x, const Operand& op) { checkCvt1(x, op); opVex(x, 0, op, T_0F38 | T_66 | T_W0 | T_EVEX | T_EW0 | T_N8 | T_N_VL | T_SAE_Y, 0x13); }
void vcvtps2dq(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, T_66 | T_0F | T_EW0 | T_YMM | T_EVEX | T_ER_Z | T_B32, 0x5B); }
void vcvtps2pd(const Xmm& x, const Operand& op) { checkCvt1(x, op); opVex(x, 0, op, T_0F | T_YMM | T_EVEX | T_EW0 | T_B32 | T_N8 | T_N_VL | T_SAE_Y, 0x5A); }
-void vcvtps2ph(const Operand& op, const Xmm& x, uint8_t imm) { checkCvt1(x, op); opVex(x, 0, op, T_0F3A | T_66 | T_W0 | T_EVEX | T_EW0 | T_N8 | T_N_VL | T_SAE_Y, 0x1D, imm); }
+void vcvtps2ph(const Operand& op, const Xmm& x, uint8_t imm) { checkCvt1(x, op); opVex(x, 0, op, T_0F3A | T_66 | T_W0 | T_EVEX | T_EW0 | T_N8 | T_N_VL | T_SAE_Y | T_M_K, 0x1D, imm); }
void vcvtsd2si(const Reg32& r, const Operand& op) { opAVX_X_X_XM(Xmm(r.getIdx()), xm0, op, T_0F | T_F2 | T_W0 | T_EVEX | T_EW0 | T_N4 | T_ER_X, 0x2D); }
void vcvtsd2ss(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N8 | T_F2 | T_0F | T_EW1 | T_EVEX | T_ER_X, 0x5A); }
void vcvtsi2sd(const Xmm& x1, const Xmm& x2, const Operand& op) { opCvt3(x1, x2, op, T_0F | T_F2 | T_EVEX, T_W1 | T_EW1 | T_ER_X | T_N8, T_W0 | T_EW0 | T_N4, 0x2A); }