aboutsummaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
authorMITSUNARI Shigeo <[email protected]>2016-07-14 10:41:54 +0900
committerMITSUNARI Shigeo <[email protected]>2016-07-14 10:41:54 +0900
commit2540a7c6520c83eec214d481bfc3dc74cb0125a1 (patch)
tree9cbbc2fafd3b27459dc3b22d8124c08a856e6092
parentcd650aba4d35a915275df13d0f3e06708f28b026 (diff)
downloadxbyak-2540a7c6520c83eec214d481bfc3dc74cb0125a1.tar.gz
xbyak-2540a7c6520c83eec214d481bfc3dc74cb0125a1.zip
add vextractf{32x4,64x2,32x8,64x4}
-rw-r--r--gen/gen_avx512.cpp19
-rw-r--r--test/make_nm.cpp5
-rw-r--r--xbyak/xbyak.h7
-rw-r--r--xbyak/xbyak_avx512.h4
4 files changed, 32 insertions, 3 deletions
diff --git a/gen/gen_avx512.cpp b/gen/gen_avx512.cpp
index 1b0d489..a51a07a 100644
--- a/gen/gen_avx512.cpp
+++ b/gen/gen_avx512.cpp
@@ -224,6 +224,24 @@ void putShift()
}
}
+void putEtc()
+{
+/*
+EVEX.256.66.0F3A.W0 19 VEXTRACTF32X4 xmm1/m128 {k1}{z}, ymm2, imm8
+EVEX.512.66.0F3A.W0 19 VEXTRACTF32x4 xmm1/m128 {k1}{z}, zmm2, imm8
+
+EVEX.256.66.0F3A.W1 19 VEXTRACTF64X2 xmm1/m128 {k1}{z}, ymm2, imm8
+EVEX.512.66.0F3A.W1 19 VEXTRACTF64X2 xmm1/m128 {k1}{z}, zmm2, imm8
+
+EVEX.512.66.0F3A.W0 1B VEXTRACTF32X8 ymm1/m256 {k1}{z}, zmm2, imm8
+EVEX.512.66.0F3A.W1 1B VEXTRACTF64x4 ymm1/m256 {k1}{z}, zmm2, imm8
+*/
+ puts("void vextractf32x4(const Operand& op, const Ymm& y, uint8 imm) { opAVX_X_X_XMcvt(y, cvtIdx0(y), op, op.isXMM(), Operand::YMM, T_66 | T_0F3A | T_MUST_EVEX | T_EW0 | T_YMM, 0x19, imm); }");
+ puts("void vextractf64x2(const Operand& op, const Ymm& y, uint8 imm) { opAVX_X_X_XMcvt(y, cvtIdx0(y), op, op.isXMM(), Operand::YMM, T_66 | T_0F3A | T_MUST_EVEX | T_EW1 | T_YMM, 0x19, imm); }");
+ puts("void vextractf32x8(const Operand& op, const Zmm& z, uint8 imm) { opAVX_X_X_XMcvt(z, cvtIdx0(z), op, op.isXMM(), Operand::YMM, T_66 | T_0F3A | T_MUST_EVEX | T_EW0 | T_YMM, 0x1B, imm); }");
+ puts("void vextractf64x4(const Operand& op, const Zmm& z, uint8 imm) { opAVX_X_X_XMcvt(z, cvtIdx0(z), op, op.isXMM(), Operand::YMM, T_66 | T_0F3A | T_MUST_EVEX | T_EW1 | T_YMM, 0x1B, imm); }");
+}
+
int main()
{
puts("#ifndef XBYAK_DISABLE_AVX512");
@@ -233,5 +251,6 @@ int main()
putM_X();
putX_X_XM_IMM();
putShift();
+ putEtc();
puts("#endif");
}
diff --git a/test/make_nm.cpp b/test/make_nm.cpp
index dc2cd0f..bae711d 100644
--- a/test/make_nm.cpp
+++ b/test/make_nm.cpp
@@ -2970,6 +2970,11 @@ public:
{ "vpunpcklqdq", _XMM3, _XMM, M_1to2 },
{ "vpunpcklqdq", _ZMM, _ZMM, M_1to8 },
+
+ { "vextractf32x4", _XMM | XMM_KZ | _MEM, _YMM | _ZMM, IMM8 },
+ { "vextractf64x2", _XMM | XMM_KZ | _MEM, _YMM | _ZMM, IMM8 },
+ { "vextractf32x8", _YMM | YMM_KZ | _MEM, _ZMM, IMM8 },
+ { "vextractf64x4", _YMM | YMM_KZ | _MEM, _ZMM, IMM8 },
};
for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
const Tbl& p = tbl[i];
diff --git a/xbyak/xbyak.h b/xbyak/xbyak.h
index 011d4d7..303243e 100644
--- a/xbyak/xbyak.h
+++ b/xbyak/xbyak.h
@@ -401,9 +401,9 @@ public:
bool hasZero() const { return zero_; }
int getOpmaskIdx() const { return mask_; }
int getRounding() const { return rounding_; }
- void setOpmaskIdx(int idx)
+ void setOpmaskIdx(int idx, bool ignore_idx0 = false)
{
- if (idx == 0) throw Error(ERR_K0_IS_INVALID);
+ if (!ignore_idx0 && idx == 0) throw Error(ERR_K0_IS_INVALID);
if (mask_) throw Error(ERR_OPMASK_IS_ALREADY_SET);
mask_ = idx;
}
@@ -1824,8 +1824,9 @@ private:
// if cvt then return pointer to Xmm(idx) (or Ymm(idx)), otherwise return op
void opAVX_X_X_XMcvt(const Xmm& x1, const Operand& op1, const Operand& op2, bool cvt, Operand::Kind kind, int type, int code0, int imm8 = NONE)
{
+ Xmm x = x1; x.setOpmaskIdx(op2.getOpmaskIdx(), true); if (op2.hasZero()) x.setZero();
// use static_cast to avoid calling unintentional copy constructor on gcc
- opAVX_X_X_XM(x1, op1, cvt ? kind == Operand::XMM ? static_cast<const Operand&>(Xmm(op2.getIdx())) : static_cast<const Operand&>(Ymm(op2.getIdx())) : op2, type, code0, imm8);
+ opAVX_X_X_XM(x, op1, cvt ? kind == Operand::XMM ? static_cast<const Operand&>(Xmm(op2.getIdx())) : static_cast<const Operand&>(Ymm(op2.getIdx())) : op2, type, code0, imm8);
}
const Xmm& cvtIdx0(const Operand& x) const
{
diff --git a/xbyak/xbyak_avx512.h b/xbyak/xbyak_avx512.h
index d65cbb0..5f3b3d1 100644
--- a/xbyak/xbyak_avx512.h
+++ b/xbyak/xbyak_avx512.h
@@ -110,4 +110,8 @@ void vpxord(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1,
void vpxorq(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_0F | T_66 | T_EW1 | T_YMM | T_MUST_EVEX | T_B64, 0xEF); }
void vpmullq(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_0F38 | T_66 | T_EW1 | T_YMM | T_MUST_EVEX | T_B64, 0x40); }
void vpsraq(const Xmm& x, const Operand& op, uint8 imm) { opAVX_X_X_XM(x.copyAndSetIdx(4), x, op, T_0F | T_66 | T_EW1 | T_YMM | T_MUST_EVEX | T_B64, 0x72, imm); }
+void vextractf32x4(const Operand& op, const Ymm& y, uint8 imm) { opAVX_X_X_XMcvt(y, cvtIdx0(y), op, op.isXMM(), Operand::YMM, T_66 | T_0F3A | T_MUST_EVEX | T_EW0 | T_YMM, 0x19, imm); }
+void vextractf64x2(const Operand& op, const Ymm& y, uint8 imm) { opAVX_X_X_XMcvt(y, cvtIdx0(y), op, op.isXMM(), Operand::YMM, T_66 | T_0F3A | T_MUST_EVEX | T_EW1 | T_YMM, 0x19, imm); }
+void vextractf32x8(const Operand& op, const Zmm& z, uint8 imm) { opAVX_X_X_XMcvt(z, cvtIdx0(z), op, op.isXMM(), Operand::YMM, T_66 | T_0F3A | T_MUST_EVEX | T_EW0 | T_YMM, 0x1B, imm); }
+void vextractf64x4(const Operand& op, const Zmm& z, uint8 imm) { opAVX_X_X_XMcvt(z, cvtIdx0(z), op, op.isXMM(), Operand::YMM, T_66 | T_0F3A | T_MUST_EVEX | T_EW1 | T_YMM, 0x1B, imm); }
#endif