aboutsummaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
authorMITSUNARI Shigeo <[email protected]>2024-10-11 13:02:17 +0900
committerMITSUNARI Shigeo <[email protected]>2024-10-13 13:51:06 +0900
commit6dc564185b85d16e240f9f15aad5be08036176d9 (patch)
tree5767c9b2bedff5d530444fe800d3d9081bcf0f70
parenta84866bcbc8411416e51f53b210c7d9f06e3e763 (diff)
downloadxbyak-6dc564185b85d16e240f9f15aad5be08036176d9.tar.gz
xbyak-6dc564185b85d16e240f9f15aad5be08036176d9.zip
add vcmppbf16, vfpclasspbf16
-rw-r--r--gen/gen_avx512.cpp5
-rw-r--r--test/avx10/bf16.txt17
-rw-r--r--xbyak/xbyak_mnemonic.h2
3 files changed, 22 insertions, 2 deletions
diff --git a/gen/gen_avx512.cpp b/gen/gen_avx512.cpp
index b1bf0b1..3812cdd 100644
--- a/gen/gen_avx512.cpp
+++ b/gen/gen_avx512.cpp
@@ -946,7 +946,7 @@ void putFP16_2()
void putAVX10_BF16()
{
- // x, x, op
+ // x, x, op : 8
const struct xxopTbl {
const char *name;
uint64_t type;
@@ -981,7 +981,8 @@ void putAVX10_BF16()
std::string s = type2String(p.type | T_MUST_EVEX);
printf("void %s(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, %s, 0x%2X); }\n" , p.name, s.c_str(), p.code);
}
-// { "vrcppbf16", T_66 | T_MAP6 | T_EW0 | T_YMM | T_B16, 0x4C },
+ puts("void vcmppbf16(const Opmask& k, const Xmm& x, const Operand& op, uint8_t imm) { opVex(k, &x, op, T_MUST_EVEX|T_F2|T_0F3A|T_EW0|T_YMM|T_B16, 0xC2, imm); }");
+ puts("void vfpclasspbf16(const Opmask& k, const Operand& op, uint8_t imm) { opVex(k.changeBit(op.getBit()), 0, op, T_MUST_EVEX|T_F2|T_0F3A|T_EW0|T_YMM|T_B16, 0x66, imm); }");
}
void putFP16()
diff --git a/test/avx10/bf16.txt b/test/avx10/bf16.txt
index 7dcdb25..f3e44a6 100644
--- a/test/avx10/bf16.txt
+++ b/test/avx10/bf16.txt
@@ -92,3 +92,20 @@ vfnmsub231nepbf16(xm1, xm2, xm3);
vfnmsub231nepbf16(ym1|k1, ym2, ptr[rax+128]);
vfnmsub231nepbf16(ym1|k1, ym2, ptr_b[rax+128]);
vfnmsub231nepbf16(zm1|k2|T_z, zm2, ptr_b[rax+128]);
+
+vcmppbf16(k1, xm5, xm4, 5);
+vcmppbf16(k2, ym5, ym4, 6);
+vcmppbf16(k3, ym15, ptr_b[rax+128], 7);
+vcmppbf16(k4, zm30, zm20, 8);
+vcmppbf16(k5, zm1, ptr[rax+128], 9);
+vcmppbf16(k6, zm10, ptr_b[rax+128], 10);
+
+vfpclasspbf16(k1, xm4, 5);
+vfpclasspbf16(k2|k5, ym4, 6);
+vfpclasspbf16(k3|k5, zm20, 7);
+vfpclasspbf16(k3|k5, xword[rax+128], 8);
+vfpclasspbf16(k3, xword_b[rax+128], 9);
+vfpclasspbf16(k5|k5, yword[rax+128], 10);
+vfpclasspbf16(k6|k5, yword_b[rax+128], 11);
+vfpclasspbf16(k7|k5, zword[rax+128], 12);
+vfpclasspbf16(k7|k5, zword_b[rax+128], 13);
diff --git a/xbyak/xbyak_mnemonic.h b/xbyak/xbyak_mnemonic.h
index 7ce61e0..cfcd6e2 100644
--- a/xbyak/xbyak_mnemonic.h
+++ b/xbyak/xbyak_mnemonic.h
@@ -2176,6 +2176,7 @@ void vcmpordpd(const Opmask& k, const Xmm& x, const Operand& op) { vcmppd(k, x,
void vcmpordps(const Opmask& k, const Xmm& x, const Operand& op) { vcmpps(k, x, op, 7); }
void vcmpordsd(const Opmask& k, const Xmm& x, const Operand& op) { vcmpsd(k, x, op, 7); }
void vcmpordss(const Opmask& k, const Xmm& x, const Operand& op) { vcmpss(k, x, op, 7); }
+void vcmppbf16(const Opmask& k, const Xmm& x, const Operand& op, uint8_t imm) { opVex(k, &x, op, T_MUST_EVEX|T_F2|T_0F3A|T_EW0|T_YMM|T_B16, 0xC2, imm); }
void vcmppd(const Opmask& k, const Xmm& x, const Operand& op, uint8_t imm) { opAVX_K_X_XM(k, x, op, T_66|T_0F|T_EW1|T_YMM|T_SAE_Z|T_MUST_EVEX|T_B64, 0xC2, imm); }
void vcmpph(const Opmask& k, const Xmm& x, const Operand& op, uint8_t imm) { opAVX_K_X_XM(k, x, op, T_0F3A|T_EW0|T_YMM|T_SAE_Z|T_MUST_EVEX|T_B16, 0xC2, imm); }
void vcmpps(const Opmask& k, const Xmm& x, const Operand& op, uint8_t imm) { opAVX_K_X_XM(k, x, op, T_0F|T_EW0|T_YMM|T_SAE_Z|T_MUST_EVEX|T_B32, 0xC2, imm); }
@@ -2325,6 +2326,7 @@ void vfnmsub213sh(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_X
void vfnmsub231nepbf16(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_MAP6|T_EW0|T_YMM|T_MUST_EVEX|T_B16, 0xBE); }
void vfnmsub231ph(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_MAP6|T_EW0|T_YMM|T_ER_Z|T_MUST_EVEX|T_B16, 0xBE); }
void vfnmsub231sh(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N2|T_66|T_MAP6|T_EW0|T_ER_X|T_MUST_EVEX, 0xBF); }
+void vfpclasspbf16(const Opmask& k, const Operand& op, uint8_t imm) { opVex(k.changeBit(op.getBit()), 0, op, T_MUST_EVEX|T_F2|T_0F3A|T_EW0|T_YMM|T_B16, 0x66, imm); }
void vfpclasspd(const Opmask& k, const Operand& op, uint8_t imm) { if (!op.isBit(128|256|512)) XBYAK_THROW(ERR_BAD_MEM_SIZE) opVex(k.changeBit(op.getBit()), 0, op, T_66 | T_0F3A | T_MUST_EVEX | T_YMM | T_EW1 | T_B64, 0x66, imm); }
void vfpclassph(const Opmask& k, const Operand& op, uint8_t imm) { if (!op.isBit(128|256|512)) XBYAK_THROW(ERR_BAD_MEM_SIZE) opVex(k.changeBit(op.getBit()), 0, op, T_0F3A | T_MUST_EVEX | T_YMM | T_EW0 | T_B16, 0x66, imm); }
void vfpclassps(const Opmask& k, const Operand& op, uint8_t imm) { if (!op.isBit(128|256|512)) XBYAK_THROW(ERR_BAD_MEM_SIZE) opVex(k.changeBit(op.getBit()), 0, op, T_66 | T_0F3A | T_MUST_EVEX | T_YMM | T_EW0 | T_B32, 0x66, imm); }