diff options
author | MITSUNARI Shigeo <[email protected]> | 2018-01-04 14:38:45 +0900 |
---|---|---|
committer | MITSUNARI Shigeo <[email protected]> | 2018-01-04 14:38:45 +0900 |
commit | f181c2595272aa80686d626bbd3a3eaff0439e62 (patch) | |
tree | fa37f1d4ac85410dacc1986a1d25ae6ecf24cd5f | |
parent | 5a402477f8e2f842a3b755b4e115e2af08c25e9d (diff) | |
download | xbyak-f181c2595272aa80686d626bbd3a3eaff0439e62.tar.gz xbyak-f181c2595272aa80686d626bbd3a3eaff0439e62.zip |
add vcompressb, vcompressw
-rw-r--r-- | gen/gen_avx512.cpp | 3 | ||||
-rw-r--r-- | test/misc.cpp | 41 | ||||
-rw-r--r-- | xbyak/xbyak_mnemonic.h | 2 |
3 files changed, 45 insertions, 1 deletions
diff --git a/gen/gen_avx512.cpp b/gen/gen_avx512.cpp index 1c8cf9b..8a11b7d 100644 --- a/gen/gen_avx512.cpp +++ b/gen/gen_avx512.cpp @@ -228,6 +228,9 @@ void putXM_X() { 0x8B, "vpcompressd", T_66 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_N4 }, { 0x8B, "vpcompressq", T_66 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW1 | T_N8 }, + + { 0x63, "vcompressb", T_66 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_N1 }, + { 0x63, "vcompressw", T_66 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW1 | T_N2 }, }; for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) { const Tbl *p = &tbl[i]; diff --git a/test/misc.cpp b/test/misc.cpp index 5fa5ea7..e7db693 100644 --- a/test/misc.cpp +++ b/test/misc.cpp @@ -199,5 +199,44 @@ CYBOZU_TEST_AUTO(vpclmulqdq) CYBOZU_TEST_EQUAL(c.getSize(), n); CYBOZU_TEST_EQUAL_ARRAY(c.getCode(), tbl, n); } - +CYBOZU_TEST_AUTO(vcompressb_w) +{ + struct Code : Xbyak::CodeGenerator { + Code() + { + vcompressb(ptr[rax + 64], xmm1); + vcompressb(xmm30 | k5, xmm1); + vcompressb(ptr[rax + 64], ymm1); + vcompressb(ymm30 | k3 |T_z, ymm1); + vcompressb(ptr[rax + 64], zmm1); + vcompressb(zmm30 | k2 |T_z, zmm1); + + vcompressw(ptr[rax + 64], xmm1); + vcompressw(xmm30 | k5, xmm1); + vcompressw(ptr[rax + 64], ymm1); + vcompressw(ymm30 | k3 |T_z, ymm1); + vcompressw(ptr[rax + 64], zmm1); + vcompressw(zmm30 | k2 |T_z, zmm1); + } + } c; + const uint8_t tbl[] = { + 0x62, 0xf2, 0x7d, 0x08, 0x63, 0x48, 0x40, + 0x62, 0x92, 0x7d, 0x0d, 0x63, 0xce, + 0x62, 0xf2, 0x7d, 0x28, 0x63, 0x48, 0x40, + 0x62, 0x92, 0x7d, 0xab, 0x63, 0xce, + 0x62, 0xf2, 0x7d, 0x48, 0x63, 0x48, 0x40, + 0x62, 0x92, 0x7d, 0xca, 0x63, 0xce, + + + 0x62, 0xf2, 0xfd, 0x08, 0x63, 0x48, 0x20, + 0x62, 0x92, 0xfd, 0x0d, 0x63, 0xce, + 0x62, 0xf2, 0xfd, 0x28, 0x63, 0x48, 0x20, + 0x62, 0x92, 0xfd, 0xab, 0x63, 0xce, + 0x62, 0xf2, 0xfd, 0x48, 0x63, 0x48, 0x20, + 0x62, 0x92, 0xfd, 0xca, 0x63, 0xce, + }; + const size_t n = sizeof(tbl) / sizeof(tbl[0]); + CYBOZU_TEST_EQUAL(c.getSize(), n); + CYBOZU_TEST_EQUAL_ARRAY(c.getCode(), tbl, n); +} #endif diff --git a/xbyak/xbyak_mnemonic.h b/xbyak/xbyak_mnemonic.h index 5591bb3..79aa8a2 100644 --- a/xbyak/xbyak_mnemonic.h +++ b/xbyak/xbyak_mnemonic.h @@ -1662,8 +1662,10 @@ void vcmppd(const Opmask& k, const Xmm& x, const Operand& op, uint8 imm) { opAVX void vcmpps(const Opmask& k, const Xmm& x, const Operand& op, uint8 imm) { opAVX_K_X_XM(k, x, op, T_0F | T_EW0 | T_YMM | T_SAE_Z | T_MUST_EVEX, 0xC2, imm); } void vcmpsd(const Opmask& k, const Xmm& x, const Operand& op, uint8 imm) { opAVX_K_X_XM(k, x, op, T_N8 | T_F2 | T_0F | T_EW1 | T_SAE_Z | T_MUST_EVEX, 0xC2, imm); } void vcmpss(const Opmask& k, const Xmm& x, const Operand& op, uint8 imm) { opAVX_K_X_XM(k, x, op, T_N4 | T_F3 | T_0F | T_EW0 | T_SAE_Z | T_MUST_EVEX, 0xC2, imm); } +void vcompressb(const Operand& op, const Xmm& x) { opAVX_X_XM_IMM(x, op, T_N1 | T_66 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX, 0x63); } void vcompresspd(const Operand& op, const Xmm& x) { opAVX_X_XM_IMM(x, op, T_N8 | T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX, 0x8A); } void vcompressps(const Operand& op, const Xmm& x) { opAVX_X_XM_IMM(x, op, T_N4 | T_66 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX, 0x8A); } +void vcompressw(const Operand& op, const Xmm& x) { opAVX_X_XM_IMM(x, op, T_N2 | T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX, 0x63); } void vcvtpd2qq(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_66 | T_0F | T_EW1 | T_YMM | T_ER_Z | T_MUST_EVEX | T_B64, 0x7B); } void vcvtpd2udq(const Xmm& x, const Operand& op) { opCvt2(x, op, T_0F | T_YMM | T_MUST_EVEX | T_EW1 | T_B64 | T_ER_Z, 0x79); } void vcvtpd2uqq(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_66 | T_0F | T_EW1 | T_YMM | T_ER_Z | T_MUST_EVEX | T_B64, 0x79); } |