aboutsummaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
authorMITSUNARI Shigeo <[email protected]>2021-09-13 17:07:14 +0900
committerMITSUNARI Shigeo <[email protected]>2021-09-13 17:07:14 +0900
commit88e426aa4457eff22df820014de5562a3ac2cb86 (patch)
tree34773a31cb095614e1428d33862ded00ae72e56d
parentbf28a94a5a98a911f8422fa4940888f0c0842a04 (diff)
downloadxbyak-88e426aa4457eff22df820014de5562a3ac2cb86.tar.gz
xbyak-88e426aa4457eff22df820014de5562a3ac2cb86.zip
add vcvtdq2ph
-rw-r--r--gen/gen_avx512.cpp5
-rw-r--r--test/misc.cpp19
-rw-r--r--xbyak/xbyak.h13
-rw-r--r--xbyak/xbyak_mnemonic.h1
4 files changed, 36 insertions, 2 deletions
diff --git a/gen/gen_avx512.cpp b/gen/gen_avx512.cpp
index 86b0b2d..b0af54e 100644
--- a/gen/gen_avx512.cpp
+++ b/gen/gen_avx512.cpp
@@ -561,6 +561,8 @@ void putCvt()
{ 0x7B, "vcvtph2qq", T_66 | T_MAP5 | T_MUST_EVEX | T_YMM | T_EW0 | T_B16 | T_N4 | T_N_VL | T_ER_X, 3 },
{ 0x79, "vcvtph2uqq", T_66 | T_MAP5 | T_MUST_EVEX | T_YMM | T_EW0 | T_B16 | T_N4 | T_N_VL | T_ER_X, 3 },
{ 0x78, "vcvttph2uqq", T_66 | T_MAP5 | T_MUST_EVEX | T_YMM | T_EW0 | T_B16 | T_N4 | T_N_VL | T_SAE_X, 3 },
+
+ { 0x5B, "vcvtdq2ph", T_MAP5 | T_YMM | T_MUST_EVEX | T_EW0 | T_B32 | T_ER_Z | T_N16 | T_N_VL, 4 },
};
for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
const Tbl& p = tbl[i];
@@ -578,6 +580,9 @@ void putCvt()
case 3:
printf("void %s(const Xmm& x, const Operand& op) { if (!op.isXMM() && !op.isMEM()) XBYAK_THROW(ERR_BAD_MEM_SIZE) opVex(x, 0, op, %s, 0x%02X); }\n", p.name, type.c_str(), p.code);
break;
+ case 4:
+ printf("void %s(const Xmm& x, const Operand& op) { checkCvt4(x, op); opCvt(x, op, %s, 0x%02X); }\n", p.name, type.c_str(), p.code);
+ break;
}
}
puts("void vcvtusi2sd(const Xmm& x1, const Xmm& x2, const Operand& op) { opCvt3(x1, x2, op, T_F2 | T_0F | T_MUST_EVEX, T_W1 | T_EW1 | T_ER_X | T_N8, T_W0 | T_EW0 | T_N4, 0x7B); }");
diff --git a/test/misc.cpp b/test/misc.cpp
index b70f49c..7b23dbd 100644
--- a/test/misc.cpp
+++ b/test/misc.cpp
@@ -1170,6 +1170,15 @@ CYBOZU_TEST_AUTO(vaddph)
vcvttph2uqq(zmm1|k5|T_z, ptr [rax+0x40]);
vcvttph2uqq(zmm1|k5|T_z, ptr_b [rax+0x40]);
+ vcvtdq2ph(xmm1, xmm5);
+ vcvtdq2ph(xmm1, xword [rax+0x40]);
+ vcvtdq2ph(xmm1, xword_b [rax+0x40]);
+ vcvtdq2ph(xmm1, yword [rax+0x40]);
+ vcvtdq2ph(xmm1, yword_b [rax+0x40]);
+ vcvtdq2ph(ymm1|k2|T_z|T_rd_sae, zmm5);
+ vcvtdq2ph(ymm1, ptr [rax+0x40]);
+ vcvtdq2ph(ymm1, ptr_b [rax+0x40]);
+
}
} c;
const uint8_t tbl[] = {
@@ -1525,6 +1534,16 @@ CYBOZU_TEST_AUTO(vaddph)
0x62, 0xf5, 0x7d, 0x9d, 0x78, 0xcb,
0x62, 0xf5, 0x7d, 0xcd, 0x78, 0x48, 0x04,
0x62, 0xf5, 0x7d, 0xdd, 0x78, 0x48, 0x20,
+
+ // vcvtdq2ph
+ 0x62, 0xf5, 0x7c, 0x08, 0x5b, 0xcd,
+ 0x62, 0xf5, 0x7c, 0x08, 0x5b, 0x48, 0x04,
+ 0x62, 0xf5, 0x7c, 0x18, 0x5b, 0x48, 0x10,
+ 0x62, 0xf5, 0x7c, 0x28, 0x5b, 0x48, 0x02,
+ 0x62, 0xf5, 0x7c, 0x38, 0x5b, 0x48, 0x10,
+ 0x62, 0xf5, 0x7c, 0xba, 0x5b, 0xcd,
+ 0x62, 0xf5, 0x7c, 0x48, 0x5b, 0x48, 0x01,
+ 0x62, 0xf5, 0x7c, 0x58, 0x5b, 0x48, 0x10,
};
const size_t n = sizeof(tbl) / sizeof(tbl[0]);
CYBOZU_TEST_EQUAL(c.getSize(), n);
diff --git a/xbyak/xbyak.h b/xbyak/xbyak.h
index 70f7bac..63194e9 100644
--- a/xbyak/xbyak.h
+++ b/xbyak/xbyak.h
@@ -2233,12 +2233,16 @@ private:
{
if (!(x.isXMM() && op.is(Operand::XMM | Operand::YMM | Operand::MEM)) && !(x.isYMM() && op.is(Operand::ZMM | Operand::MEM))) XBYAK_THROW(ERR_BAD_COMBINATION)
}
- void opCvt2(const Xmm& x, const Operand& op, int type, int code)
+ void opCvt(const Xmm& x, const Operand& op, int type, int code)
{
- checkCvt2(x, op);
Operand::Kind kind = x.isXMM() ? (op.isBit(256) ? Operand::YMM : Operand::XMM) : Operand::ZMM;
opVex(x.copyAndSetKind(kind), &xm0, op, type, code);
}
+ void opCvt2(const Xmm& x, const Operand& op, int type, int code)
+ {
+ checkCvt2(x, op);
+ opCvt(x, op, type, code);
+ }
void opCvt3(const Xmm& x1, const Xmm& x2, const Operand& op, int type, int type64, int type32, uint8_t code)
{
if (!(x1.isXMM() && x2.isXMM() && (op.isREG(i32e) || op.isMEM()))) XBYAK_THROW(ERR_BAD_SIZE_OF_REGISTER)
@@ -2246,6 +2250,11 @@ private:
const Operand *p = op.isREG() ? &x : &op;
opVex(x1, &x2, *p, type | (op.isBit(64) ? type64 : type32), code);
}
+ // (x, x/y/xword/yword), (y, z/m)
+ void checkCvt4(const Xmm& x, const Operand& op) const
+ {
+ if (!(x.isXMM() && op.is(Operand::XMM | Operand::YMM | Operand::MEM) && op.isBit(128|256)) && !(x.isYMM() && op.is(Operand::ZMM | Operand::MEM))) XBYAK_THROW(ERR_BAD_COMBINATION)
+ }
const Xmm& cvtIdx0(const Operand& x) const
{
return x.isZMM() ? zm0 : x.isYMM() ? ym0 : xm0;
diff --git a/xbyak/xbyak_mnemonic.h b/xbyak/xbyak_mnemonic.h
index 3389f5a..31924ee 100644
--- a/xbyak/xbyak_mnemonic.h
+++ b/xbyak/xbyak_mnemonic.h
@@ -1896,6 +1896,7 @@ void vcompressb(const Operand& op, const Xmm& x) { opAVX_X_XM_IMM(x, op, T_N1 |
void vcompresspd(const Operand& op, const Xmm& x) { opAVX_X_XM_IMM(x, op, T_N8 | T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX, 0x8A); }
void vcompressps(const Operand& op, const Xmm& x) { opAVX_X_XM_IMM(x, op, T_N4 | T_66 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX, 0x8A); }
void vcompressw(const Operand& op, const Xmm& x) { opAVX_X_XM_IMM(x, op, T_N2 | T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX, 0x63); }
+void vcvtdq2ph(const Xmm& x, const Operand& op) { checkCvt4(x, op); opCvt(x, op, T_N16 | T_N_VL | T_MAP5 | T_EW0 | T_YMM | T_ER_Z | T_MUST_EVEX | T_B32, 0x5B); }
void vcvtne2ps2bf16(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_F2 | T_0F38 | T_EW0 | T_YMM | T_SAE_Z | T_MUST_EVEX | T_B32, 0x72); }
void vcvtneps2bf16(const Xmm& x, const Operand& op) { opCvt2(x, op, T_F3 | T_0F38 | T_EW0 | T_YMM | T_SAE_Z | T_MUST_EVEX | T_B32, 0x72); }
void vcvtpd2qq(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_66 | T_0F | T_EW1 | T_YMM | T_ER_Z | T_MUST_EVEX | T_B64, 0x7B); }