aboutsummaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
authorSrinivas Putta <[email protected]>2022-10-03 14:09:09 -0700
committerSrinivas Putta <[email protected]>2022-10-03 14:12:30 -0700
commit1c5cb7efab14223f54e542e01eafc17e2cc0f550 (patch)
treee94b5aa04328fe20849da6756f807627708b8b05
parent564fe9acd37cfe3fa68f01c8cf6517a28b532e28 (diff)
downloadxbyak-1c5cb7efab14223f54e542e01eafc17e2cc0f550.tar.gz
xbyak-1c5cb7efab14223f54e542e01eafc17e2cc0f550.zip
add AVX-NE-CONVERT instructions
-rw-r--r--xbyak/xbyak.h12
-rw-r--r--xbyak/xbyak_mnemonic.h8
-rw-r--r--xbyak/xbyak_util.h2
3 files changed, 21 insertions, 1 deletions
diff --git a/xbyak/xbyak.h b/xbyak/xbyak.h
index d7955bf..32fa4ab 100644
--- a/xbyak/xbyak.h
+++ b/xbyak/xbyak.h
@@ -2432,6 +2432,18 @@ private:
}
opAVX_X_X_XM(x1, x2, op, type, code0);
}
+ int opShouldPreferEvex(PreferredEncoding encoding) {
+ if (encoding == DefaultEncoding) {
+ encoding = EvexEncoding;
+ }
+ if (encoding == EvexEncoding) {
+#ifdef XBYAK_DISABLE_AVX512
+ XBYAK_THROW(ERR_EVEX_IS_INVALID)
+#endif
+ return T_MUST_EVEX;
+ }
+ return 0;
+ }
void opInOut(const Reg& a, const Reg& d, uint8_t code)
{
if (a.getIdx() == Operand::AL && d.getIdx() == Operand::DX && d.getBit() == 16) {
diff --git a/xbyak/xbyak_mnemonic.h b/xbyak/xbyak_mnemonic.h
index ed25e8c..a588ba1 100644
--- a/xbyak/xbyak_mnemonic.h
+++ b/xbyak/xbyak_mnemonic.h
@@ -845,6 +845,8 @@ void vandnpd(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand())
void vandnps(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_EW0 | T_YMM | T_EVEX | T_ER_Z | T_B32, 0x55); }
void vandpd(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_66 | T_EW1 | T_YMM | T_EVEX | T_ER_Z | T_B64, 0x54); }
void vandps(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_EW0 | T_YMM | T_EVEX | T_ER_Z | T_B32, 0x54); }
+void vbcstnebf162ps(const Xmm& x, const Address& addr) { opVex(x, 0, addr, T_F3 | T_0F38 | T_W0 | T_B16 | T_YMM, 0xB1); }
+void vbcstnesh2ps(const Xmm& x, const Address& addr) { opVex(x, 0, addr, T_66 | T_0F38 | T_W0 | T_B16 | T_YMM, 0xB1); }
void vblendpd(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F3A | T_W0 | T_YMM, 0x0D, imm); }
void vblendps(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F3A | T_W0 | T_YMM, 0x0C, imm); }
void vblendvpd(const Xmm& x1, const Xmm& x2, const Operand& op, const Xmm& x4) { opAVX_X_X_XM(x1, x2, op, T_0F3A | T_66 | T_YMM, 0x4B, x4.getIdx() << 4); }
@@ -1915,7 +1917,11 @@ void vcompressps(const Operand& op, const Xmm& x) { opAVX_X_XM_IMM(x, op, T_N4 |
void vcompressw(const Operand& op, const Xmm& x) { opAVX_X_XM_IMM(x, op, T_N2 | T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX, 0x63); }
void vcvtdq2ph(const Xmm& x, const Operand& op) { checkCvt4(x, op); opCvt(x, op, T_N16 | T_N_VL | T_MAP5 | T_EW0 | T_YMM | T_ER_Z | T_MUST_EVEX | T_B32, 0x5B); }
void vcvtne2ps2bf16(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_F2 | T_0F38 | T_EW0 | T_YMM | T_SAE_Z | T_MUST_EVEX | T_B32, 0x72); }
-void vcvtneps2bf16(const Xmm& x, const Operand& op) { opCvt2(x, op, T_F3 | T_0F38 | T_EW0 | T_YMM | T_SAE_Z | T_MUST_EVEX | T_B32, 0x72); }
+void vcvtneebf162ps(const Xmm& x, const Address& addr) { opVex(x, 0, addr, T_F3 | T_0F38 | T_W0 | T_YMM, 0xB0); }
+void vcvtneeph2ps(const Xmm& x, const Address& addr) { opVex(x, 0, addr, T_66 | T_0F38 | T_W0 | T_YMM, 0xB0); }
+void vcvtneobf162ps(const Xmm& x, const Address& addr) { opVex(x, 0, addr, T_F2 | T_0F38 | T_W0 | T_YMM, 0xB0); }
+void vcvtneoph2ps(const Xmm& x, const Address& addr) { opVex(x, 0, addr, T_0F38 | T_W0 | T_YMM, 0xB0); }
+void vcvtneps2bf16(const Xmm& x, const Operand& op, PreferredEncoding encoding = DefaultEncoding) { opCvt2(x, op, T_F3 | T_0F38 | T_EW0 | T_YMM | T_SAE_Z | T_B32 | opShouldPreferEvex(encoding), 0x72); }
void vcvtpd2ph(const Xmm& x, const Operand& op) { opCvt5(x, op, T_N16 | T_N_VL | T_66 | T_MAP5 | T_EW1 | T_ER_Z | T_MUST_EVEX | T_B64, 0x5A); }
void vcvtpd2qq(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_66 | T_0F | T_EW1 | T_YMM | T_ER_Z | T_MUST_EVEX | T_B64, 0x7B); }
void vcvtpd2udq(const Xmm& x, const Operand& op) { opCvt2(x, op, T_0F | T_EW1 | T_YMM | T_ER_Z | T_MUST_EVEX | T_B64, 0x79); }
diff --git a/xbyak/xbyak_util.h b/xbyak/xbyak_util.h
index bde9c5f..e50b850 100644
--- a/xbyak/xbyak_util.h
+++ b/xbyak/xbyak_util.h
@@ -412,6 +412,7 @@ public:
XBYAK_DEFINE_TYPE(67, tCLZERO); // AMD Zen
XBYAK_DEFINE_TYPE(68, tAMX_FP16);
XBYAK_DEFINE_TYPE(69, tAVX_VNNI_INT8);
+ XBYAK_DEFINE_TYPE(70, tAVX_NE_CONVERT);
#undef XBYAK_SPLIT_ID
#undef XBYAK_DEFINE_TYPE
@@ -557,6 +558,7 @@ public:
}
if (EAX & (1U << 21)) type_ |= tAMX_FP16;
if (EDX & (1U << 4)) type_ |= tAVX_VNNI_INT8;
+ if (EDX & (1U << 5)) type_ |= tAVX_NE_CONVERT;
}
}
setFamily();