aboutsummaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
authorMITSUNARI Shigeo <[email protected]>2016-07-14 15:00:03 +0900
committerMITSUNARI Shigeo <[email protected]>2016-07-14 15:00:03 +0900
commitc04cfa4fda3e71ca910306f332dedfbcd8e835a2 (patch)
treeae687a98f706aff38591953f98d43ff8f0166484
parent50cceb1786bd621b2d6f55973f59b91b0c2f811d (diff)
downloadxbyak-c04cfa4fda3e71ca910306f332dedfbcd8e835a2.tar.gz
xbyak-c04cfa4fda3e71ca910306f332dedfbcd8e835a2.zip
fix vpbroadcast{b,w} ; add vbroadcasti{32x2,32x4,64x2,32x8,64x4}
-rw-r--r--gen/gen_avx512.cpp25
-rw-r--r--test/make_nm.cpp22
-rw-r--r--xbyak/xbyak_avx512.h11
3 files changed, 58 insertions, 0 deletions
diff --git a/gen/gen_avx512.cpp b/gen/gen_avx512.cpp
index d36a1ae..c81515e 100644
--- a/gen/gen_avx512.cpp
+++ b/gen/gen_avx512.cpp
@@ -284,6 +284,31 @@ void putBroadcast()
puts("void vbroadcastf32x4(const Ymm& y, const Address& addr) { opAVX_X_XM_IMM(y, addr, T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0, 0x1A); }");
puts("void vbroadcastf64x2(const Ymm& y, const Address& addr) { opAVX_X_XM_IMM(y, addr, T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1, 0x1A); }");
puts("void vbroadcastf64x4(const Zmm& y, const Address& addr) { opAVX_X_XM_IMM(y, addr, T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1, 0x1B); }");
+ {
+ const struct Tbl {
+ uint8 code;
+ const char *name;
+ int type;
+ int reg;
+ } tbl[] = {
+ { 0x7A, "vpbroadcastb", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0, 8 },
+ { 0x7B, "vpbroadcastw", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0, 16 },
+ { 0x7C, "vpbroadcastd", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0, 32 },
+ { 0x7C, "vpbroadcastq", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1, 64},
+ };
+ for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
+ const Tbl& p = tbl[i];
+ std::string type = type2String(p.type);
+ if (p.reg == 64) puts("#ifdef XBYAK64");
+ printf("void %s(const Xmm& x, const Reg%d& r) { opVex(x, &cvtIdx0(x), r, %s, 0x%02X); }\n", p.name, p.reg, type.c_str(), p.code);
+ if (p.reg == 64) puts("#endif");
+ }
+ }
+ puts("void vbroadcasti32x2(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0, 0x59); }");
+ puts("void vbroadcasti32x4(const Ymm& y, const Operand& op) { opAVX_X_XM_IMM(y, op, T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0, 0x5A); }");
+ puts("void vbroadcasti64x2(const Ymm& y, const Operand& op) { opAVX_X_XM_IMM(y, op, T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1, 0x5A); }");
+ puts("void vbroadcasti32x8(const Zmm& z, const Operand& op) { opAVX_X_XM_IMM(z, op, T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0, 0x5B); }");
+ puts("void vbroadcasti64x4(const Zmm& z, const Operand& op) { opAVX_X_XM_IMM(z, op, T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1, 0x5B); }");
}
int main()
diff --git a/test/make_nm.cpp b/test/make_nm.cpp
index 1a7dd2d..df990e0 100644
--- a/test/make_nm.cpp
+++ b/test/make_nm.cpp
@@ -2612,6 +2612,28 @@ public:
for (int i = 0; i < 9; i++) {
putBroadcastSub(i);
}
+ put("vpbroadcastb", XMM_KZ | ZMM_KZ, REG8);
+ put("vpbroadcastw", XMM_KZ | ZMM_KZ, REG16);
+ put("vpbroadcastd", XMM_KZ | ZMM_KZ, REG32);
+#ifdef XBYAK64
+ put("vpbroadcastq", XMM_KZ | ZMM_KZ, REG64);
+#endif
+ {
+ const char *tbl[] = {
+ "vpbroadcastb",
+ "vpbroadcastw",
+ "vpbroadcastd",
+ "vpbroadcastq",
+ };
+ for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
+ put(tbl[i], XMM_KZ | ZMM_KZ, _XMM | _MEM);
+ }
+ }
+ put("vbroadcasti32x2", XMM_KZ | YMM_KZ | ZMM_KZ, _XMM | _MEM);
+ put("vbroadcasti32x4", YMM_KZ | ZMM_KZ, _MEM);
+ put("vbroadcasti64x2", YMM_KZ | ZMM_KZ, _MEM);
+ put("vbroadcasti32x8", ZMM_KZ, _MEM);
+ put("vbroadcasti64x4", ZMM_KZ, _MEM);
}
void putAVX512_M_X()
{
diff --git a/xbyak/xbyak_avx512.h b/xbyak/xbyak_avx512.h
index 4df3a41..f822a71 100644
--- a/xbyak/xbyak_avx512.h
+++ b/xbyak/xbyak_avx512.h
@@ -132,4 +132,15 @@ void vbroadcastf32x2(const Ymm& y, const Operand& op) { opAVX_X_XM_IMM(y, op, T_
void vbroadcastf32x4(const Ymm& y, const Address& addr) { opAVX_X_XM_IMM(y, addr, T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0, 0x1A); }
void vbroadcastf64x2(const Ymm& y, const Address& addr) { opAVX_X_XM_IMM(y, addr, T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1, 0x1A); }
void vbroadcastf64x4(const Zmm& y, const Address& addr) { opAVX_X_XM_IMM(y, addr, T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1, 0x1B); }
+void vpbroadcastb(const Xmm& x, const Reg8& r) { opVex(x, &cvtIdx0(x), r, T_0F38 | T_66 | T_EW0 | T_YMM | T_MUST_EVEX, 0x7A); }
+void vpbroadcastw(const Xmm& x, const Reg16& r) { opVex(x, &cvtIdx0(x), r, T_0F38 | T_66 | T_EW0 | T_YMM | T_MUST_EVEX, 0x7B); }
+void vpbroadcastd(const Xmm& x, const Reg32& r) { opVex(x, &cvtIdx0(x), r, T_0F38 | T_66 | T_EW0 | T_YMM | T_MUST_EVEX, 0x7C); }
+#ifdef XBYAK64
+void vpbroadcastq(const Xmm& x, const Reg64& r) { opVex(x, &cvtIdx0(x), r, T_0F38 | T_66 | T_EW1 | T_YMM | T_MUST_EVEX, 0x7C); }
+#endif
+void vbroadcasti32x2(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0, 0x59); }
+void vbroadcasti32x4(const Ymm& y, const Operand& op) { opAVX_X_XM_IMM(y, op, T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0, 0x5A); }
+void vbroadcasti64x2(const Ymm& y, const Operand& op) { opAVX_X_XM_IMM(y, op, T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1, 0x5A); }
+void vbroadcasti32x8(const Zmm& z, const Operand& op) { opAVX_X_XM_IMM(z, op, T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0, 0x5B); }
+void vbroadcasti64x4(const Zmm& z, const Operand& op) { opAVX_X_XM_IMM(z, op, T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1, 0x5B); }
#endif