diff options
author | MITSUNARI Shigeo <[email protected]> | 2016-07-14 10:41:54 +0900 |
---|---|---|
committer | MITSUNARI Shigeo <[email protected]> | 2016-07-14 10:41:54 +0900 |
commit | 2540a7c6520c83eec214d481bfc3dc74cb0125a1 (patch) | |
tree | 9cbbc2fafd3b27459dc3b22d8124c08a856e6092 | |
parent | cd650aba4d35a915275df13d0f3e06708f28b026 (diff) | |
download | xbyak-2540a7c6520c83eec214d481bfc3dc74cb0125a1.tar.gz xbyak-2540a7c6520c83eec214d481bfc3dc74cb0125a1.zip |
add vextractf{32x4,64x2,32x8,64x4}
-rw-r--r-- | gen/gen_avx512.cpp | 19 | ||||
-rw-r--r-- | test/make_nm.cpp | 5 | ||||
-rw-r--r-- | xbyak/xbyak.h | 7 | ||||
-rw-r--r-- | xbyak/xbyak_avx512.h | 4 |
4 files changed, 32 insertions, 3 deletions
diff --git a/gen/gen_avx512.cpp b/gen/gen_avx512.cpp index 1b0d489..a51a07a 100644 --- a/gen/gen_avx512.cpp +++ b/gen/gen_avx512.cpp @@ -224,6 +224,24 @@ void putShift() } } +void putEtc() +{ +/* +EVEX.256.66.0F3A.W0 19 VEXTRACTF32X4 xmm1/m128 {k1}{z}, ymm2, imm8 +EVEX.512.66.0F3A.W0 19 VEXTRACTF32x4 xmm1/m128 {k1}{z}, zmm2, imm8 + +EVEX.256.66.0F3A.W1 19 VEXTRACTF64X2 xmm1/m128 {k1}{z}, ymm2, imm8 +EVEX.512.66.0F3A.W1 19 VEXTRACTF64X2 xmm1/m128 {k1}{z}, zmm2, imm8 + +EVEX.512.66.0F3A.W0 1B VEXTRACTF32X8 ymm1/m256 {k1}{z}, zmm2, imm8 +EVEX.512.66.0F3A.W1 1B VEXTRACTF64x4 ymm1/m256 {k1}{z}, zmm2, imm8 +*/ + puts("void vextractf32x4(const Operand& op, const Ymm& y, uint8 imm) { opAVX_X_X_XMcvt(y, cvtIdx0(y), op, op.isXMM(), Operand::YMM, T_66 | T_0F3A | T_MUST_EVEX | T_EW0 | T_YMM, 0x19, imm); }"); + puts("void vextractf64x2(const Operand& op, const Ymm& y, uint8 imm) { opAVX_X_X_XMcvt(y, cvtIdx0(y), op, op.isXMM(), Operand::YMM, T_66 | T_0F3A | T_MUST_EVEX | T_EW1 | T_YMM, 0x19, imm); }"); + puts("void vextractf32x8(const Operand& op, const Zmm& z, uint8 imm) { opAVX_X_X_XMcvt(z, cvtIdx0(z), op, op.isXMM(), Operand::YMM, T_66 | T_0F3A | T_MUST_EVEX | T_EW0 | T_YMM, 0x1B, imm); }"); + puts("void vextractf64x4(const Operand& op, const Zmm& z, uint8 imm) { opAVX_X_X_XMcvt(z, cvtIdx0(z), op, op.isXMM(), Operand::YMM, T_66 | T_0F3A | T_MUST_EVEX | T_EW1 | T_YMM, 0x1B, imm); }"); +} + int main() { puts("#ifndef XBYAK_DISABLE_AVX512"); @@ -233,5 +251,6 @@ int main() putM_X(); putX_X_XM_IMM(); putShift(); + putEtc(); puts("#endif"); } diff --git a/test/make_nm.cpp b/test/make_nm.cpp index dc2cd0f..bae711d 100644 --- a/test/make_nm.cpp +++ b/test/make_nm.cpp @@ -2970,6 +2970,11 @@ public: { "vpunpcklqdq", _XMM3, _XMM, M_1to2 }, { "vpunpcklqdq", _ZMM, _ZMM, M_1to8 }, + + { "vextractf32x4", _XMM | XMM_KZ | _MEM, _YMM | _ZMM, IMM8 }, + { "vextractf64x2", _XMM | XMM_KZ | _MEM, _YMM | _ZMM, IMM8 }, + { "vextractf32x8", _YMM | YMM_KZ | _MEM, _ZMM, IMM8 }, + { "vextractf64x4", _YMM | YMM_KZ | _MEM, _ZMM, IMM8 }, }; for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) { const Tbl& p = tbl[i]; diff --git a/xbyak/xbyak.h b/xbyak/xbyak.h index 011d4d7..303243e 100644 --- a/xbyak/xbyak.h +++ b/xbyak/xbyak.h @@ -401,9 +401,9 @@ public: bool hasZero() const { return zero_; } int getOpmaskIdx() const { return mask_; } int getRounding() const { return rounding_; } - void setOpmaskIdx(int idx) + void setOpmaskIdx(int idx, bool ignore_idx0 = false) { - if (idx == 0) throw Error(ERR_K0_IS_INVALID); + if (!ignore_idx0 && idx == 0) throw Error(ERR_K0_IS_INVALID); if (mask_) throw Error(ERR_OPMASK_IS_ALREADY_SET); mask_ = idx; } @@ -1824,8 +1824,9 @@ private: // if cvt then return pointer to Xmm(idx) (or Ymm(idx)), otherwise return op void opAVX_X_X_XMcvt(const Xmm& x1, const Operand& op1, const Operand& op2, bool cvt, Operand::Kind kind, int type, int code0, int imm8 = NONE) { + Xmm x = x1; x.setOpmaskIdx(op2.getOpmaskIdx(), true); if (op2.hasZero()) x.setZero(); // use static_cast to avoid calling unintentional copy constructor on gcc - opAVX_X_X_XM(x1, op1, cvt ? kind == Operand::XMM ? static_cast<const Operand&>(Xmm(op2.getIdx())) : static_cast<const Operand&>(Ymm(op2.getIdx())) : op2, type, code0, imm8); + opAVX_X_X_XM(x, op1, cvt ? kind == Operand::XMM ? static_cast<const Operand&>(Xmm(op2.getIdx())) : static_cast<const Operand&>(Ymm(op2.getIdx())) : op2, type, code0, imm8); } const Xmm& cvtIdx0(const Operand& x) const { diff --git a/xbyak/xbyak_avx512.h b/xbyak/xbyak_avx512.h index d65cbb0..5f3b3d1 100644 --- a/xbyak/xbyak_avx512.h +++ b/xbyak/xbyak_avx512.h @@ -110,4 +110,8 @@ void vpxord(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, void vpxorq(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_0F | T_66 | T_EW1 | T_YMM | T_MUST_EVEX | T_B64, 0xEF); } void vpmullq(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_0F38 | T_66 | T_EW1 | T_YMM | T_MUST_EVEX | T_B64, 0x40); } void vpsraq(const Xmm& x, const Operand& op, uint8 imm) { opAVX_X_X_XM(x.copyAndSetIdx(4), x, op, T_0F | T_66 | T_EW1 | T_YMM | T_MUST_EVEX | T_B64, 0x72, imm); } +void vextractf32x4(const Operand& op, const Ymm& y, uint8 imm) { opAVX_X_X_XMcvt(y, cvtIdx0(y), op, op.isXMM(), Operand::YMM, T_66 | T_0F3A | T_MUST_EVEX | T_EW0 | T_YMM, 0x19, imm); } +void vextractf64x2(const Operand& op, const Ymm& y, uint8 imm) { opAVX_X_X_XMcvt(y, cvtIdx0(y), op, op.isXMM(), Operand::YMM, T_66 | T_0F3A | T_MUST_EVEX | T_EW1 | T_YMM, 0x19, imm); } +void vextractf32x8(const Operand& op, const Zmm& z, uint8 imm) { opAVX_X_X_XMcvt(z, cvtIdx0(z), op, op.isXMM(), Operand::YMM, T_66 | T_0F3A | T_MUST_EVEX | T_EW0 | T_YMM, 0x1B, imm); } +void vextractf64x4(const Operand& op, const Zmm& z, uint8 imm) { opAVX_X_X_XMcvt(z, cvtIdx0(z), op, op.isXMM(), Operand::YMM, T_66 | T_0F3A | T_MUST_EVEX | T_EW1 | T_YMM, 0x1B, imm); } #endif |