diff options
author | MITSUNARI Shigeo <[email protected]> | 2016-07-21 20:29:25 +0900 |
---|---|---|
committer | MITSUNARI Shigeo <[email protected]> | 2016-07-21 20:29:25 +0900 |
commit | 5e77cfae66dfb67a33007ca3d3a4193f7c696a00 (patch) | |
tree | ff619ef02162e5622216475774b8da1c6e3b8971 | |
parent | 6c62620430223fb6a0104bd675cf1ef7734aee8d (diff) | |
download | xbyak-5e77cfae66dfb67a33007ca3d3a4193f7c696a00.tar.gz xbyak-5e77cfae66dfb67a33007ca3d3a4193f7c696a00.zip |
add vshuf*, vpternlog{d,q}
-rw-r--r-- | gen/gen_avx512.cpp | 18 | ||||
-rw-r--r-- | test/make_512.cpp | 36 | ||||
-rw-r--r-- | xbyak/xbyak_avx512.h | 11 |
3 files changed, 63 insertions, 2 deletions
diff --git a/gen/gen_avx512.cpp b/gen/gen_avx512.cpp index a912d33..d724edb 100644 --- a/gen/gen_avx512.cpp +++ b/gen/gen_avx512.cpp @@ -271,6 +271,15 @@ void putX_X_XM_IMM() { 0x7E, "vpermt2q", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1 | T_B64, false }, { 0x7F, "vpermt2ps", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0 | T_B32, false }, { 0x7F, "vpermt2pd", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1 | T_B64, false }, + + { 0x75, "vpermi2w", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1, false }, + { 0x76, "vpermi2d", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0 | T_B32, false }, + { 0x76, "vpermi2q", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1 | T_B64, false }, + { 0x77, "vpermi2ps", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0 | T_B32, false }, + { 0x77, "vpermi2pd", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1 | T_B64, false }, + + { 0x25, "vpternlogd", T_66 | T_0F3A | T_YMM | T_MUST_EVEX | T_EW0 | T_B32, true }, + { 0x25, "vpternlogq", T_66 | T_0F3A | T_YMM | T_MUST_EVEX | T_EW1 | T_B64, true }, }; for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) { const Tbl *p = &tbl[i]; @@ -444,6 +453,14 @@ void putGather() } } +void putShuff() +{ + puts("void vshuff32x4(const Ymm& y1, const Ymm& y2, const Operand& op, uint8 imm) { opAVX_X_X_XM(y1, y2, op, T_66 | T_0F3A | T_YMM | T_MUST_EVEX | T_EW0 | T_B32, 0x23, imm); }"); + puts("void vshuff64x2(const Ymm& y1, const Ymm& y2, const Operand& op, uint8 imm) { opAVX_X_X_XM(y1, y2, op, T_66 | T_0F3A | T_YMM | T_MUST_EVEX | T_EW1 | T_B64, 0x23, imm); }"); + puts("void vshufi32x4(const Ymm& y1, const Ymm& y2, const Operand& op, uint8 imm) { opAVX_X_X_XM(y1, y2, op, T_66 | T_0F3A | T_YMM | T_MUST_EVEX | T_EW0 | T_B32, 0x43, imm); }"); + puts("void vshufi64x2(const Ymm& y1, const Ymm& y2, const Operand& op, uint8 imm) { opAVX_X_X_XM(y1, y2, op, T_66 | T_0F3A | T_YMM | T_MUST_EVEX | T_EW1 | T_B64, 0x43, imm); }"); +} + int main() { puts("#ifndef XBYAK_DISABLE_AVX512"); @@ -460,5 +477,6 @@ int main() #endif putCvt(); putGather(); + putShuff(); puts("#endif"); } diff --git a/test/make_512.cpp b/test/make_512.cpp index 3f65659..9a40304 100644 --- a/test/make_512.cpp +++ b/test/make_512.cpp @@ -30,7 +30,7 @@ const uint64 MEM32 = 1ULL << 17; const uint64 VM32Z = 1ULL << 19; const uint64 K_K = 1ULL << 20; const uint64 MEM_ONLY_DISP = 1ULL << 21; -const uint64 NEG32 = 1ULL << 23; +//const uint64 QQQ = 1ULL << 23; const uint64 _YMM = 1ULL << 24; const uint64 VM32X_32 = 1ULL << 39; const uint64 VM32X_64 = 1ULL << 40; @@ -1714,6 +1714,11 @@ public: { "vpermt2q", M_1to2 }, { "vpermt2ps", M_1to4 }, { "vpermt2pd", M_1to2 }, + + { "vpermi2w", 0 }, + { "vpermi2d", M_1to4 }, + { "vpermi2q", M_1to2 }, + { "vpermi2ps", M_1to4 }, }; for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) { const Tbl& p = tbl[i]; @@ -1723,10 +1728,33 @@ public: put(p.name, ZMM_KZ, _ZMM, _ZMM | _MEM | bTbl[2]); } } + void putShuff() + { + put("vshuff32x4", YMM_KZ, _YMM, _YMM | _MEM | M_1to8, IMM8); + put("vshuff32x4", ZMM_KZ, _ZMM, _ZMM | _MEM | M_1to16, IMM8); + + put("vshuff64x2", YMM_KZ, _YMM, _YMM | _MEM | M_1to4, IMM8); + put("vshuff64x2", ZMM_KZ, _ZMM, _ZMM | _MEM | M_1to8, IMM8); + + put("vshufi32x4", YMM_KZ, _YMM, _YMM | _MEM | M_1to8, IMM8); + put("vshufi32x4", ZMM_KZ, _ZMM, _ZMM | _MEM | M_1to16, IMM8); + + put("vshufi64x2", YMM_KZ, _YMM, _YMM | _MEM | M_1to4, IMM8); + put("vshufi64x2", ZMM_KZ, _ZMM, _ZMM | _MEM | M_1to8, IMM8); + } + void putMisc2() + { + put("vpternlogd", XMM_KZ, _XMM, _XMM | _MEM | M_1to4, IMM8); + put("vpternlogd", YMM_KZ, _YMM, _YMM | _MEM | M_1to8, IMM8); + put("vpternlogd", ZMM_KZ, _ZMM, _ZMM | _MEM | M_1to16, IMM8); + + put("vpternlogq", XMM_KZ, _XMM, _XMM | _MEM | M_1to2, IMM8); + put("vpternlogq", YMM_KZ, _YMM, _YMM | _MEM | M_1to4, IMM8); + put("vpternlogq", ZMM_KZ, _ZMM, _ZMM | _MEM | M_1to8, IMM8); + } void putMin() { #ifdef XBYAK64 - putPerm(); #endif } void putAVX512() @@ -1775,6 +1803,10 @@ public: putCompExp(); separateFunc(); putPerm(); + separateFunc(); + putShuff(); + separateFunc(); + putMisc2(); #endif } }; diff --git a/xbyak/xbyak_avx512.h b/xbyak/xbyak_avx512.h index 48c5ec9..e22bd05 100644 --- a/xbyak/xbyak_avx512.h +++ b/xbyak/xbyak_avx512.h @@ -156,6 +156,13 @@ void vpermt2d(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1 void vpermt2q(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX | T_B64, 0x7E); } void vpermt2ps(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX | T_B32, 0x7F); } void vpermt2pd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX | T_B64, 0x7F); } +void vpermi2w(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX, 0x75); } +void vpermi2d(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX | T_B32, 0x76); } +void vpermi2q(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX | T_B64, 0x76); } +void vpermi2ps(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX | T_B32, 0x77); } +void vpermi2pd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX | T_B64, 0x77); } +void vpternlogd(const Xmm& x1, const Xmm& x2, const Operand& op, uint8 imm) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F3A | T_EW0 | T_YMM | T_MUST_EVEX | T_B32, 0x25, imm); } +void vpternlogq(const Xmm& x1, const Xmm& x2, const Operand& op, uint8 imm) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F3A | T_EW1 | T_YMM | T_MUST_EVEX | T_B64, 0x25, imm); } void vpsraq(const Xmm& x, const Operand& op, uint8 imm) { opAVX_X_X_XM(Xmm(x.getKind(), 4), x, op, T_66 | T_0F | T_EW1 | T_YMM | T_MUST_EVEX | T_B64, 0x72, imm); } void vextractf32x4(const Operand& op, const Ymm& r, uint8 imm) { opAVX_X_X_XMcvt(r, true, cvtIdx0(r), op, op.isXMM(), Operand::YMM, T_66 | T_0F3A | T_EW0 | T_YMM | T_MUST_EVEX | T_N16, 0x19, imm); } void vextractf64x2(const Operand& op, const Ymm& r, uint8 imm) { opAVX_X_X_XMcvt(r, true, cvtIdx0(r), op, op.isXMM(), Operand::YMM, T_66 | T_0F3A | T_EW1 | T_YMM | T_MUST_EVEX | T_N16, 0x19, imm); } @@ -217,4 +224,8 @@ void vgatherdps(const Xmm& x, const Address& addr) { opGather2(x, addr, T_66 | T void vgatherdpd(const Xmm& x, const Address& addr) { opGather2(x, addr, T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX | T_N8, 0x92, 1); } void vgatherqps(const Xmm& x, const Address& addr) { opGather2(x, addr, T_66 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX | T_N4, 0x93, 2); } void vgatherqpd(const Xmm& x, const Address& addr) { opGather2(x, addr, T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX | T_N8, 0x93, 0); } +void vshuff32x4(const Ymm& y1, const Ymm& y2, const Operand& op, uint8 imm) { opAVX_X_X_XM(y1, y2, op, T_66 | T_0F3A | T_YMM | T_MUST_EVEX | T_EW0 | T_B32, 0x23, imm); } +void vshuff64x2(const Ymm& y1, const Ymm& y2, const Operand& op, uint8 imm) { opAVX_X_X_XM(y1, y2, op, T_66 | T_0F3A | T_YMM | T_MUST_EVEX | T_EW1 | T_B64, 0x23, imm); } +void vshufi32x4(const Ymm& y1, const Ymm& y2, const Operand& op, uint8 imm) { opAVX_X_X_XM(y1, y2, op, T_66 | T_0F3A | T_YMM | T_MUST_EVEX | T_EW0 | T_B32, 0x43, imm); } +void vshufi64x2(const Ymm& y1, const Ymm& y2, const Operand& op, uint8 imm) { opAVX_X_X_XM(y1, y2, op, T_66 | T_0F3A | T_YMM | T_MUST_EVEX | T_EW1 | T_B64, 0x43, imm); } #endif |