diff options
author | MITSUNARI Shigeo <[email protected]> | 2016-07-14 15:00:03 +0900 |
---|---|---|
committer | MITSUNARI Shigeo <[email protected]> | 2016-07-14 15:00:03 +0900 |
commit | c04cfa4fda3e71ca910306f332dedfbcd8e835a2 (patch) | |
tree | ae687a98f706aff38591953f98d43ff8f0166484 | |
parent | 50cceb1786bd621b2d6f55973f59b91b0c2f811d (diff) | |
download | xbyak-c04cfa4fda3e71ca910306f332dedfbcd8e835a2.tar.gz xbyak-c04cfa4fda3e71ca910306f332dedfbcd8e835a2.zip |
fix vpbroadcast{b,w} ; add vbroadcasti{32x2,32x4,64x2,32x8,64x4}
-rw-r--r-- | gen/gen_avx512.cpp | 25 | ||||
-rw-r--r-- | test/make_nm.cpp | 22 | ||||
-rw-r--r-- | xbyak/xbyak_avx512.h | 11 |
3 files changed, 58 insertions, 0 deletions
diff --git a/gen/gen_avx512.cpp b/gen/gen_avx512.cpp index d36a1ae..c81515e 100644 --- a/gen/gen_avx512.cpp +++ b/gen/gen_avx512.cpp @@ -284,6 +284,31 @@ void putBroadcast() puts("void vbroadcastf32x4(const Ymm& y, const Address& addr) { opAVX_X_XM_IMM(y, addr, T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0, 0x1A); }"); puts("void vbroadcastf64x2(const Ymm& y, const Address& addr) { opAVX_X_XM_IMM(y, addr, T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1, 0x1A); }"); puts("void vbroadcastf64x4(const Zmm& y, const Address& addr) { opAVX_X_XM_IMM(y, addr, T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1, 0x1B); }"); + { + const struct Tbl { + uint8 code; + const char *name; + int type; + int reg; + } tbl[] = { + { 0x7A, "vpbroadcastb", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0, 8 }, + { 0x7B, "vpbroadcastw", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0, 16 }, + { 0x7C, "vpbroadcastd", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0, 32 }, + { 0x7C, "vpbroadcastq", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1, 64}, + }; + for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) { + const Tbl& p = tbl[i]; + std::string type = type2String(p.type); + if (p.reg == 64) puts("#ifdef XBYAK64"); + printf("void %s(const Xmm& x, const Reg%d& r) { opVex(x, &cvtIdx0(x), r, %s, 0x%02X); }\n", p.name, p.reg, type.c_str(), p.code); + if (p.reg == 64) puts("#endif"); + } + } + puts("void vbroadcasti32x2(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0, 0x59); }"); + puts("void vbroadcasti32x4(const Ymm& y, const Operand& op) { opAVX_X_XM_IMM(y, op, T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0, 0x5A); }"); + puts("void vbroadcasti64x2(const Ymm& y, const Operand& op) { opAVX_X_XM_IMM(y, op, T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1, 0x5A); }"); + puts("void vbroadcasti32x8(const Zmm& z, const Operand& op) { opAVX_X_XM_IMM(z, op, T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0, 0x5B); }"); + puts("void vbroadcasti64x4(const Zmm& z, const Operand& op) { opAVX_X_XM_IMM(z, op, T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1, 0x5B); }"); } int main() diff --git a/test/make_nm.cpp b/test/make_nm.cpp index 1a7dd2d..df990e0 100644 --- a/test/make_nm.cpp +++ b/test/make_nm.cpp @@ -2612,6 +2612,28 @@ public: for (int i = 0; i < 9; i++) { putBroadcastSub(i); } + put("vpbroadcastb", XMM_KZ | ZMM_KZ, REG8); + put("vpbroadcastw", XMM_KZ | ZMM_KZ, REG16); + put("vpbroadcastd", XMM_KZ | ZMM_KZ, REG32); +#ifdef XBYAK64 + put("vpbroadcastq", XMM_KZ | ZMM_KZ, REG64); +#endif + { + const char *tbl[] = { + "vpbroadcastb", + "vpbroadcastw", + "vpbroadcastd", + "vpbroadcastq", + }; + for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) { + put(tbl[i], XMM_KZ | ZMM_KZ, _XMM | _MEM); + } + } + put("vbroadcasti32x2", XMM_KZ | YMM_KZ | ZMM_KZ, _XMM | _MEM); + put("vbroadcasti32x4", YMM_KZ | ZMM_KZ, _MEM); + put("vbroadcasti64x2", YMM_KZ | ZMM_KZ, _MEM); + put("vbroadcasti32x8", ZMM_KZ, _MEM); + put("vbroadcasti64x4", ZMM_KZ, _MEM); } void putAVX512_M_X() { diff --git a/xbyak/xbyak_avx512.h b/xbyak/xbyak_avx512.h index 4df3a41..f822a71 100644 --- a/xbyak/xbyak_avx512.h +++ b/xbyak/xbyak_avx512.h @@ -132,4 +132,15 @@ void vbroadcastf32x2(const Ymm& y, const Operand& op) { opAVX_X_XM_IMM(y, op, T_ void vbroadcastf32x4(const Ymm& y, const Address& addr) { opAVX_X_XM_IMM(y, addr, T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0, 0x1A); } void vbroadcastf64x2(const Ymm& y, const Address& addr) { opAVX_X_XM_IMM(y, addr, T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1, 0x1A); } void vbroadcastf64x4(const Zmm& y, const Address& addr) { opAVX_X_XM_IMM(y, addr, T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1, 0x1B); } +void vpbroadcastb(const Xmm& x, const Reg8& r) { opVex(x, &cvtIdx0(x), r, T_0F38 | T_66 | T_EW0 | T_YMM | T_MUST_EVEX, 0x7A); } +void vpbroadcastw(const Xmm& x, const Reg16& r) { opVex(x, &cvtIdx0(x), r, T_0F38 | T_66 | T_EW0 | T_YMM | T_MUST_EVEX, 0x7B); } +void vpbroadcastd(const Xmm& x, const Reg32& r) { opVex(x, &cvtIdx0(x), r, T_0F38 | T_66 | T_EW0 | T_YMM | T_MUST_EVEX, 0x7C); } +#ifdef XBYAK64 +void vpbroadcastq(const Xmm& x, const Reg64& r) { opVex(x, &cvtIdx0(x), r, T_0F38 | T_66 | T_EW1 | T_YMM | T_MUST_EVEX, 0x7C); } +#endif +void vbroadcasti32x2(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0, 0x59); } +void vbroadcasti32x4(const Ymm& y, const Operand& op) { opAVX_X_XM_IMM(y, op, T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0, 0x5A); } +void vbroadcasti64x2(const Ymm& y, const Operand& op) { opAVX_X_XM_IMM(y, op, T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1, 0x5A); } +void vbroadcasti32x8(const Zmm& z, const Operand& op) { opAVX_X_XM_IMM(z, op, T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0, 0x5B); } +void vbroadcasti64x4(const Zmm& z, const Operand& op) { opAVX_X_XM_IMM(z, op, T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1, 0x5B); } #endif |