aboutsummaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
authorMITSUNARI Shigeo <[email protected]>2024-10-14 04:48:25 +0900
committerMITSUNARI Shigeo <[email protected]>2024-10-14 04:52:03 +0900
commit86e532fe1a1027f8b13e4833d1558697c6512f76 (patch)
tree45e2c9bda876a1fb50629ac652405c549a1997e5
parentaabf2abeb0c65fecffdb71eccad62a5eac8587c5 (diff)
downloadxbyak-86e532fe1a1027f8b13e4833d1558697c6512f76.tar.gz
xbyak-86e532fe1a1027f8b13e4833d1558697c6512f76.zip
add avx10 minmax
-rw-r--r--gen/gen_avx512.cpp7
-rw-r--r--test/avx10/minmax.txt66
-rw-r--r--xbyak/xbyak_mnemonic.h7
3 files changed, 80 insertions, 0 deletions
diff --git a/gen/gen_avx512.cpp b/gen/gen_avx512.cpp
index 2b8a328..cfe0ac6 100644
--- a/gen/gen_avx512.cpp
+++ b/gen/gen_avx512.cpp
@@ -447,6 +447,13 @@ void putX_X_XM_IMM()
{ 0x1B, "vcvtne2ph2hf8s", T_MUST_EVEX | T_F2 | T_MAP5 | T_EW0 | T_YMM | T_B16 | T_N1, false },
{ 0x52, "vdpphps", T_MUST_EVEX | T_0F38 | T_EW0 | T_YMM | T_B32, false },
+ { 0x52, "vminmaxnepbf16", T_MUST_EVEX | T_F2 | T_0F3A | T_EW0 | T_YMM | T_B16, true },
+ { 0x52, "vminmaxpd", T_MUST_EVEX | T_66 | T_0F3A | T_EW1 | T_YMM | T_B64 | T_SAE_Y | T_SAE_Z, true },
+ { 0x52, "vminmaxph", T_MUST_EVEX | T_0F3A | T_EW0 | T_YMM | T_B16 | T_SAE_Y | T_SAE_Z, true },
+ { 0x52, "vminmaxps", T_MUST_EVEX | T_66 | T_0F3A | T_EW0 | T_YMM | T_B32 | T_SAE_Y | T_SAE_Z, true },
+ { 0x53, "vminmaxsd", T_MUST_EVEX | T_66 | T_0F3A | T_EW1 | T_SAE_X | T_N8, true },
+ { 0x53, "vminmaxsh", T_MUST_EVEX | T_0F3A | T_EW0 | T_SAE_X | T_N2, true },
+ { 0x53, "vminmaxss", T_MUST_EVEX | T_66 | T_0F3A | T_EW0 | T_SAE_X | T_N4, true },
};
for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
const Tbl *p = &tbl[i];
diff --git a/test/avx10/minmax.txt b/test/avx10/minmax.txt
new file mode 100644
index 0000000..8b2c662
--- /dev/null
+++ b/test/avx10/minmax.txt
@@ -0,0 +1,66 @@
+vminmaxnepbf16(xm1|k3|T_z, xm2, xm3, 5);
+vminmaxnepbf16(xm1|k3|T_z, xm2, ptr[rax+128], 5);
+vminmaxnepbf16(xm1|k3|T_z, xm2, ptr_b[rax+128], 5);
+
+vminmaxnepbf16(ym1|k3|T_z, ym2, ym3, 5);
+vminmaxnepbf16(ym1|k3|T_z, ym2, ptr[rax+128], 5);
+vminmaxnepbf16(ym1|k3|T_z, ym2, ptr_b[rax+128], 5);
+
+vminmaxnepbf16(zm1|k3|T_z, zm2, zm3, 5);
+vminmaxnepbf16(zm1|k3|T_z, zm2, ptr[rax+128], 5);
+vminmaxnepbf16(zm1|k3|T_z, zm2, ptr_b[rax+128], 5);
+//
+vminmaxpd(xm1|k3|T_z, xm2, xm3, 5);
+vminmaxpd(xm1|k3|T_z, xm2, ptr[rax+128], 5);
+vminmaxpd(xm1|k3|T_z, xm2, ptr_b[rax+128], 5);
+
+vminmaxpd(ym1|k3|T_z, ym2, ym3, 5);
+vminmaxpd(ym1|k3|T_z, ym2, ym3|T_sae, 5);
+vminmaxpd(ym1|k3|T_z, ym2, ptr[rax+128], 5);
+vminmaxpd(ym1|k3|T_z, ym2, ptr_b[rax+128], 5);
+
+vminmaxpd(zm1|k3|T_z, zm2, zm3, 5);
+vminmaxpd(zm1|k3|T_z, zm2, zm3|T_sae, 5);
+vminmaxpd(zm1|k3|T_z, zm2, ptr[rax+128], 5);
+vminmaxpd(zm1|k3|T_z, zm2, ptr_b[rax+128], 5);
+//
+vminmaxph(xm1|k3|T_z, xm2, xm3, 5);
+vminmaxph(xm1|k3|T_z, xm2, ptr[rax+128], 5);
+vminmaxph(xm1|k3|T_z, xm2, ptr[rax+128], 5);
+vminmaxph(xm1|k3|T_z, xm2, ptr_b[rax+128], 5);
+
+vminmaxph(ym1|k3|T_z, ym2, ym3, 5);
+vminmaxph(ym1|k3|T_z, ym2, ym3|T_sae, 5);
+vminmaxph(ym1|k3|T_z, ym2, ptr[rax+128], 5);
+vminmaxph(ym1|k3|T_z, ym2, ptr_b[rax+128], 5);
+
+vminmaxph(zm1|k3|T_z, zm2, zm3, 5);
+vminmaxph(zm1|k3|T_z, zm2, zm3|T_sae, 5);
+vminmaxph(zm1|k3|T_z, zm2, ptr[rax+128], 5);
+vminmaxph(zm1|k3|T_z, zm2, ptr_b[rax+128], 5);
+//
+vminmaxps(xm1|k3|T_z, xm2, xm3, 5);
+vminmaxps(xm1|k3|T_z, xm2, ptr[rax+128], 5);
+vminmaxps(xm1|k3|T_z, xm2, ptr_b[rax+128], 5);
+
+vminmaxps(ym1|k3|T_z, ym2, ym3, 5);
+vminmaxps(ym1|k3|T_z, ym2, ym3|T_sae, 5);
+vminmaxps(ym1|k3|T_z, ym2, ptr[rax+128], 5);
+vminmaxps(ym1|k3|T_z, ym2, ptr_b[rax+128], 5);
+
+vminmaxps(zm1|k3|T_z, zm2, zm3, 5);
+vminmaxps(zm1|k3|T_z, zm2, zm3|T_sae, 5);
+vminmaxps(zm1|k3|T_z, zm2, ptr[rax+128], 5);
+vminmaxps(zm1|k3|T_z, zm2, ptr_b[rax+128], 5);
+//
+vminmaxsd(xm1|k3|T_z, xm2, xm3, 5);
+vminmaxsd(xm1|k3|T_z, xm2, xm3|T_sae, 5);
+vminmaxsd(xm1|k3|T_z, xm2, ptr[rax+128], 5);
+//
+vminmaxsh(xm1|k3|T_z, xm2, xm3, 5);
+vminmaxsh(xm1|k3|T_z, xm2, xm3|T_sae, 5);
+vminmaxsh(xm1|k3|T_z, xm2, ptr[rax+128], 5);
+//
+vminmaxss(xm1|k3|T_z, xm2, xm3, 5);
+vminmaxss(xm1|k3|T_z, xm2, xm3|T_sae, 5);
+vminmaxss(xm1|k3|T_z, xm2, ptr[rax+128], 5);
diff --git a/xbyak/xbyak_mnemonic.h b/xbyak/xbyak_mnemonic.h
index 0397ffd..b4cb11c 100644
--- a/xbyak/xbyak_mnemonic.h
+++ b/xbyak/xbyak_mnemonic.h
@@ -2374,6 +2374,13 @@ void vinserti64x4(const Zmm& r1, const Zmm& r2, const Operand& op, uint8_t imm)
void vmaxpbf16(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_MAP5|T_EW0|T_YMM|T_MUST_EVEX|T_B16, 0x5F); }
void vmaxph(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_MAP5 | T_EW0 | T_YMM | T_MUST_EVEX | T_SAE_Z | T_B16, 0x5F); }
void vmaxsh(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_MAP5 | T_F3 | T_EW0 | T_MUST_EVEX | T_SAE_X | T_N2, 0x5F); }
+void vminmaxnepbf16(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(x1, x2, op, T_F2|T_0F3A|T_EW0|T_YMM|T_MUST_EVEX|T_B16, 0x52, imm); }
+void vminmaxpd(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F3A|T_EW1|T_YMM|T_SAE_Y|T_SAE_Z|T_MUST_EVEX|T_B64, 0x52, imm); }
+void vminmaxph(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(x1, x2, op, T_0F3A|T_EW0|T_YMM|T_SAE_Y|T_SAE_Z|T_MUST_EVEX|T_B16, 0x52, imm); }
+void vminmaxps(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F3A|T_EW0|T_YMM|T_SAE_Y|T_SAE_Z|T_MUST_EVEX|T_B32, 0x52, imm); }
+void vminmaxsd(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(x1, x2, op, T_N8|T_66|T_0F3A|T_EW1|T_SAE_X|T_MUST_EVEX, 0x53, imm); }
+void vminmaxsh(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(x1, x2, op, T_N2|T_0F3A|T_EW0|T_SAE_X|T_MUST_EVEX, 0x53, imm); }
+void vminmaxss(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(x1, x2, op, T_N4|T_66|T_0F3A|T_EW0|T_SAE_X|T_MUST_EVEX, 0x53, imm); }
void vminpbf16(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_MAP5|T_EW0|T_YMM|T_MUST_EVEX|T_B16, 0x5D); }
void vminph(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_MAP5 | T_EW0 | T_YMM | T_MUST_EVEX | T_SAE_Z | T_B16, 0x5D); }
void vminsh(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_MAP5 | T_F3 | T_EW0 | T_MUST_EVEX | T_SAE_X | T_N2, 0x5D); }