aboutsummaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
authorMITSUNARI Shigeo <[email protected]>2024-10-13 14:55:20 +0900
committerMITSUNARI Shigeo <[email protected]>2024-10-13 14:55:20 +0900
commit14ae9bf4859739ca9b23f421d23693a15e75769d (patch)
treeb24ebb716d4e158ca6c2d016e602096f6a82d3d3
parent2818beeffd198dae543019347360252d0ea7b78f (diff)
downloadxbyak-14ae9bf4859739ca9b23f421d23693a15e75769d.tar.gz
xbyak-14ae9bf4859739ca9b23f421d23693a15e75769d.zip
add vpdpbssd for avx10.2
-rw-r--r--gen/gen_avx512.cpp33
-rw-r--r--gen/gen_code.cpp2
-rw-r--r--test/avx10/misc.txt12
-rw-r--r--xbyak/xbyak_mnemonic.h4
4 files changed, 47 insertions, 4 deletions
diff --git a/gen/gen_avx512.cpp b/gen/gen_avx512.cpp
index 9840844..9159a64 100644
--- a/gen/gen_avx512.cpp
+++ b/gen/gen_avx512.cpp
@@ -454,7 +454,37 @@ void putX_X_XM_IMM()
printf("void %s(const Xmm& x1, const Xmm& x2, const Operand& op%s) { opAVX_X_X_XM(x1, x2, op, %s, 0x%02X%s); }\n"
, p->name, p->hasIMM ? ", uint8_t imm" : "", s.c_str(), p->code, p->hasIMM ? ", imm" : "");
}
- puts("void vmpsadbw(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm, PreferredEncoding encoding = DefaultEncoding) { opEncoding(x1, x2, op, T_0F3A | T_YMM, 0x42, encoding, imm, T_66 | T_W0 | T_YMM, T_F3 | T_0F3A | T_EW0 | T_B32, 1); }");
+}
+
+void putX_X_XM_IMM_AVX10()
+{
+ const struct Tbl {
+ uint8_t code;
+ const char *name;
+ uint64_t type;
+ uint64_t typeVex;
+ uint64_t typeEvex;
+ int sel;
+ bool hasIMM;
+ } tbl[] = {
+ { 0x50, "vpdpbssd", T_F2|T_0F38|T_YMM, T_W0, T_EW0|T_B32, 1, false },
+#if 0
+ { 0x51, "vpdpbssds", T_MUST_EVEX | T_YMM | T_F2 | T_0F38 | T_EW0 | T_B32, false },
+ { 0x50, "vpdpbsud", T_MUST_EVEX | T_YMM | T_F3 | T_0F38 | T_EW0 | T_B32, false },
+ { 0x51, "vpdpbsuds", T_MUST_EVEX | T_YMM | T_F3 | T_0F38 | T_EW0 | T_B32, false },
+ { 0x50, "vpdpbuud", T_MUST_EVEX | T_YMM | T_0F38 | T_EW0 | T_B32, false },
+ { 0x51, "vpdpbuuds", T_MUST_EVEX | T_YMM | T_0F38 | T_EW0 | T_B32, false },
+#endif
+ { 0x42, "vmpsadbw", T_0F3A|T_YMM, T_66|T_W0|T_YMM, T_F3|T_0F3A|T_EW0|T_B32, 1, true },
+ };
+ for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
+ const Tbl *p = &tbl[i];
+ std::string s = type2String(p->type);
+ std::string sVex = type2String(p->typeVex);
+ std::string sEvex = type2String(p->typeEvex);
+ printf("void %s(const Xmm& x1, const Xmm& x2, const Operand& op%s, PreferredEncoding encoding = DefaultEncoding) { opEncoding(x1, x2, op, %s, 0x%02X, encoding, %s, %s, %s, %d); }\n"
+ , p->name, p->hasIMM ? ", uint8_t imm" : "", s.c_str(), p->code, p->hasIMM ? "imm" : "NONE", sVex.c_str(), sEvex.c_str(), p->sel);
+ }
}
void putShift()
@@ -1059,6 +1089,7 @@ int main(int argc, char *[])
putM_X();
putXM_X();
putX_X_XM_IMM();
+ putX_X_XM_IMM_AVX10();
putShift();
putExtractInsert();
putCvt();
diff --git a/gen/gen_code.cpp b/gen/gen_code.cpp
index 58c176a..caa9e79 100644
--- a/gen/gen_code.cpp
+++ b/gen/gen_code.cpp
@@ -1907,7 +1907,7 @@ void put()
const char *name;
uint64_t type;
} tbl[] = {
- { 0x50, "vpdpbssd", T_F2 | T_0F38 | T_W0 | T_YMM },
+// { 0x50, "vpdpbssd", T_F2 | T_0F38 | T_W0 | T_YMM },
{ 0x51, "vpdpbssds", T_F2 | T_0F38 | T_W0 | T_YMM },
{ 0x50, "vpdpbsud", T_F3 | T_0F38 | T_W0 | T_YMM },
{ 0x51, "vpdpbsuds", T_F3 | T_0F38 | T_W0 | T_YMM },
diff --git a/test/avx10/misc.txt b/test/avx10/misc.txt
index 5c39e81..8993107 100644
--- a/test/avx10/misc.txt
+++ b/test/avx10/misc.txt
@@ -18,3 +18,15 @@ vmpsadbw(ym1, ym4, ptr[rax+128], 5);
vmpsadbw(zm1|k4, zm3, zm15, 3);
vmpsadbw(zm1, zm4, ptr[rax+128], 5);
+
+vpdpbssd(xm1, xm2, xm3);
+vpdpbssd(xm1, xm2, ptr[rax+128]);
+vpdpbssd(xm1, xm2, ptr_b[rax+128]);
+
+vpdpbssd(ym1, ym2, ym3);
+vpdpbssd(ym1, ym2, ptr[rax+128]);
+vpdpbssd(ym1, ym2, ptr_b[rax+128]);
+
+vpdpbssd(zm1, zm2, zm3);
+vpdpbssd(zm1, zm2, ptr[rax+128]);
+vpdpbssd(zm1, zm2, ptr_b[rax+128]);
diff --git a/xbyak/xbyak_mnemonic.h b/xbyak/xbyak_mnemonic.h
index 8515e41..dbe52e9 100644
--- a/xbyak/xbyak_mnemonic.h
+++ b/xbyak/xbyak_mnemonic.h
@@ -1419,7 +1419,6 @@ void vpcmpgtq(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1
void vpcmpgtw(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F|T_YMM, 0x65); }
void vpcmpistri(const Xmm& xm, const Operand& op, uint8_t imm) { opAVX_X_XM_IMM(xm, op, T_66|T_0F3A, 0x63, imm); }
void vpcmpistrm(const Xmm& xm, const Operand& op, uint8_t imm) { opAVX_X_XM_IMM(xm, op, T_66|T_0F3A, 0x62, imm); }
-void vpdpbssd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_F2|T_0F38|T_W0|T_YMM, 0x50); }
void vpdpbssds(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_F2|T_0F38|T_W0|T_YMM, 0x51); }
void vpdpbsud(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_F3|T_0F38|T_W0|T_YMM, 0x50); }
void vpdpbsuds(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_F3|T_0F38|T_W0|T_YMM, 0x51); }
@@ -2407,7 +2406,7 @@ void vmovsh(const Xmm& x1, const Xmm& x2, const Xmm& x3) { opAVX_X_X_XM(x1, x2,
void vmovw(const Address& addr, const Xmm& x) { opAVX_X_XM_IMM(x, addr, T_N2|T_66|T_MAP5|T_MUST_EVEX, 0x7E); }
void vmovw(const Reg32e& r, const Xmm& x) { opAVX_X_X_XM(x, xm0, r, T_N2|T_66|T_MAP5|T_MUST_EVEX, 0x7E); }
void vmovw(const Xmm& x, const Operand& op) { if (!op.isREG(32|64) && !op.isMEM()) XBYAK_THROW(ERR_BAD_COMBINATION) opAVX_X_X_XM(x, xm0, op, T_N2|T_66|T_MAP5|T_MUST_EVEX, 0x6E); }
-void vmpsadbw(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm, PreferredEncoding encoding = DefaultEncoding) { opEncoding(x1, x2, op, T_0F3A | T_YMM, 0x42, encoding, imm, T_66 | T_W0 | T_YMM, T_F3 | T_0F3A | T_EW0 | T_B32, 1); }
+void vmpsadbw(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm, PreferredEncoding encoding = DefaultEncoding) { opEncoding(x1, x2, op, T_0F3A|T_YMM, 0x42, encoding, imm, T_66|T_W0|T_YMM, T_F3|T_0F3A|T_EW0|T_B32, 1); }
void vmulnepbf16(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_MAP5|T_EW0|T_YMM|T_MUST_EVEX|T_B16, 0x59); }
void vmulph(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_MAP5 | T_EW0 | T_YMM | T_MUST_EVEX | T_ER_Z | T_B16, 0x59); }
void vmulsh(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_MAP5 | T_F3 | T_EW0 | T_MUST_EVEX | T_ER_X | T_N2, 0x59); }
@@ -2451,6 +2450,7 @@ void vpcompressq(const Operand& op, const Xmm& x) { opAVX_X_XM_IMM(x, op, T_N8|T
void vpcompressw(const Operand& op, const Xmm& x) { opAVX_X_XM_IMM(x, op, T_N2|T_66|T_0F38|T_EW1|T_YMM|T_MUST_EVEX, 0x63); }
void vpconflictd(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_66|T_0F38|T_EW0|T_YMM|T_MUST_EVEX|T_B32, 0xC4); }
void vpconflictq(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_66|T_0F38|T_EW1|T_YMM|T_MUST_EVEX|T_B64, 0xC4); }
+void vpdpbssd(const Xmm& x1, const Xmm& x2, const Operand& op, PreferredEncoding encoding = DefaultEncoding) { opEncoding(x1, x2, op, T_F2|T_0F38|T_YMM, 0x50, encoding, NONE, T_W0, T_EW0|T_B32, 1); }
void vpermb(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F38|T_EW0|T_YMM|T_MUST_EVEX, 0x8D); }
void vpermi2b(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F38|T_EW0|T_YMM|T_MUST_EVEX, 0x75); }
void vpermi2d(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F38|T_EW0|T_YMM|T_MUST_EVEX|T_B32, 0x76); }