diff options
author | MITSUNARI Shigeo <[email protected]> | 2021-06-17 15:56:09 +0900 |
---|---|---|
committer | MITSUNARI Shigeo <[email protected]> | 2021-06-17 15:56:09 +0900 |
commit | 413a66b44de9b623510395510b09a2ba19ef6f39 (patch) | |
tree | b48fd4c601d9bb6974d98b66622a5dc36afbea35 | |
parent | 6f4f768901b3c521bb694c646616f6c774866e6e (diff) | |
download | xbyak-413a66b44de9b623510395510b09a2ba19ef6f39.tar.gz xbyak-413a66b44de9b623510395510b09a2ba19ef6f39.zip |
add alias of vcmpXX{ps,pd,ss,sd} for mask register
-rw-r--r-- | gen/gen_avx512.cpp | 18 | ||||
-rw-r--r-- | readme.md | 3 | ||||
-rw-r--r-- | readme.txt | 3 | ||||
-rw-r--r-- | xbyak/xbyak_mnemonic.h | 128 |
4 files changed, 150 insertions, 2 deletions
diff --git a/gen/gen_avx512.cpp b/gen/gen_avx512.cpp index b2b88c3..f678e39 100644 --- a/gen/gen_avx512.cpp +++ b/gen/gen_avx512.cpp @@ -146,6 +146,23 @@ void putVcmp() } } +void putVcmpAlias() +{ + const char pred[32][16] = { + "eq", "lt", "le", "unord", "neq", "nlt", "nle", "ord", + "eq_uq", "nge", "ngt", "false", "neq_oq", "ge", "gt", + "true", "eq_os", "lt_oq", "le_oq", "unord_s", "neq_us", "nlt_uq", "nle_uq", "ord_s", + "eq_us", "nge_uq", "ngt_uq", "false_os", "neq_os", "ge_oq", "gt_oq", "true_us" + }; + const char suf[][4] = { "pd", "ps", "sd", "ss" }; + for (int i = 0; i < 4; i++) { + const char *s = suf[i]; + for (int j = 0; j < 32; j++) { + printf("void vcmp%s%s(const Opmask& k, const Xmm& x, const Operand& op) { vcmp%s(k, x, op, %d); }\n", pred[j], s, s, j); + } + } +} + // XM_X void putX_XM() { @@ -733,6 +750,7 @@ int main(int argc, char *[]) return 0; } putVcmp(); + putVcmpAlias(); putX_XM(); putM_X(); putXM_X(); @@ -1,6 +1,6 @@ [![Build Status](https://github.com/herumi/xbyak/actions/workflows/main.yml/badge.svg)](https://github.com/herumi/xbyak/actions/workflows/main.yml) -# Xbyak 5.993 ; JIT assembler for x86(IA32), x64(AMD64, x86-64) by C++ +# Xbyak 5.994 ; JIT assembler for x86(IA32), x64(AMD64, x86-64) by C++ ## Abstract @@ -446,6 +446,7 @@ modified new BSD License http://opensource.org/licenses/BSD-3-Clause ## History +* 2021/Jun/17 ver 5.994 add alias of vcmpXX{ps,pd,ss,sd} with mask register * 2021/Jun/06 ver 5.993 strict check of gather/scatter register combination * 2021/May/09 ver 5.992 support endbr32 and endbr64 * 2020/Nov/16 ver 5.991 disable constexpr for gcc-5 with -std=c++-14 @@ -1,5 +1,5 @@ - C++用x86(IA-32), x64(AMD64, x86-64) JITアセンブラ Xbyak 5.993
+ C++用x86(IA-32), x64(AMD64, x86-64) JITアセンブラ Xbyak 5.994
-----------------------------------------------------------------------------
◎概要
@@ -382,6 +382,7 @@ sample/{echo,hello}.bfは http://www.kmonos.net/alang/etc/brainfuck.php から -----------------------------------------------------------------------------
◎履歴
+2021/06/17 ver 5.994 マスクレジスタ用のvcmpXX{ps,pd,ss,sd}のalias追加
2021/06/06 ver 5.993 gather/scatterのレジスタの組み合わせの厳密なチェック
2021/05/09 ver 5.992 endbr32とendbr64のサポート
2020/11/16 ver 5.991 g++-5のC++14でconstexpr機能の抑制
diff --git a/xbyak/xbyak_mnemonic.h b/xbyak/xbyak_mnemonic.h index 9392afd..3483d5d 100644 --- a/xbyak/xbyak_mnemonic.h +++ b/xbyak/xbyak_mnemonic.h @@ -1755,10 +1755,138 @@ void vbroadcasti32x4(const Ymm& y, const Operand& op) { opAVX_X_XM_IMM(y, op, T_ void vbroadcasti32x8(const Zmm& z, const Operand& op) { opAVX_X_XM_IMM(z, op, T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0 | T_N32, 0x5B); } void vbroadcasti64x2(const Ymm& y, const Operand& op) { opAVX_X_XM_IMM(y, op, T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1 | T_N16, 0x5A); } void vbroadcasti64x4(const Zmm& z, const Operand& op) { opAVX_X_XM_IMM(z, op, T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1 | T_N32, 0x5B); } +void vcmpeq_ospd(const Opmask& k, const Xmm& x, const Operand& op) { vcmppd(k, x, op, 16); } +void vcmpeq_osps(const Opmask& k, const Xmm& x, const Operand& op) { vcmpps(k, x, op, 16); } +void vcmpeq_ossd(const Opmask& k, const Xmm& x, const Operand& op) { vcmpsd(k, x, op, 16); } +void vcmpeq_osss(const Opmask& k, const Xmm& x, const Operand& op) { vcmpss(k, x, op, 16); } +void vcmpeq_uqpd(const Opmask& k, const Xmm& x, const Operand& op) { vcmppd(k, x, op, 8); } +void vcmpeq_uqps(const Opmask& k, const Xmm& x, const Operand& op) { vcmpps(k, x, op, 8); } +void vcmpeq_uqsd(const Opmask& k, const Xmm& x, const Operand& op) { vcmpsd(k, x, op, 8); } +void vcmpeq_uqss(const Opmask& k, const Xmm& x, const Operand& op) { vcmpss(k, x, op, 8); } +void vcmpeq_uspd(const Opmask& k, const Xmm& x, const Operand& op) { vcmppd(k, x, op, 24); } +void vcmpeq_usps(const Opmask& k, const Xmm& x, const Operand& op) { vcmpps(k, x, op, 24); } +void vcmpeq_ussd(const Opmask& k, const Xmm& x, const Operand& op) { vcmpsd(k, x, op, 24); } +void vcmpeq_usss(const Opmask& k, const Xmm& x, const Operand& op) { vcmpss(k, x, op, 24); } +void vcmpeqpd(const Opmask& k, const Xmm& x, const Operand& op) { vcmppd(k, x, op, 0); } +void vcmpeqps(const Opmask& k, const Xmm& x, const Operand& op) { vcmpps(k, x, op, 0); } +void vcmpeqsd(const Opmask& k, const Xmm& x, const Operand& op) { vcmpsd(k, x, op, 0); } +void vcmpeqss(const Opmask& k, const Xmm& x, const Operand& op) { vcmpss(k, x, op, 0); } +void vcmpfalse_ospd(const Opmask& k, const Xmm& x, const Operand& op) { vcmppd(k, x, op, 27); } +void vcmpfalse_osps(const Opmask& k, const Xmm& x, const Operand& op) { vcmpps(k, x, op, 27); } +void vcmpfalse_ossd(const Opmask& k, const Xmm& x, const Operand& op) { vcmpsd(k, x, op, 27); } +void vcmpfalse_osss(const Opmask& k, const Xmm& x, const Operand& op) { vcmpss(k, x, op, 27); } +void vcmpfalsepd(const Opmask& k, const Xmm& x, const Operand& op) { vcmppd(k, x, op, 11); } +void vcmpfalseps(const Opmask& k, const Xmm& x, const Operand& op) { vcmpps(k, x, op, 11); } +void vcmpfalsesd(const Opmask& k, const Xmm& x, const Operand& op) { vcmpsd(k, x, op, 11); } +void vcmpfalsess(const Opmask& k, const Xmm& x, const Operand& op) { vcmpss(k, x, op, 11); } +void vcmpge_oqpd(const Opmask& k, const Xmm& x, const Operand& op) { vcmppd(k, x, op, 29); } +void vcmpge_oqps(const Opmask& k, const Xmm& x, const Operand& op) { vcmpps(k, x, op, 29); } +void vcmpge_oqsd(const Opmask& k, const Xmm& x, const Operand& op) { vcmpsd(k, x, op, 29); } +void vcmpge_oqss(const Opmask& k, const Xmm& x, const Operand& op) { vcmpss(k, x, op, 29); } +void vcmpgepd(const Opmask& k, const Xmm& x, const Operand& op) { vcmppd(k, x, op, 13); } +void vcmpgeps(const Opmask& k, const Xmm& x, const Operand& op) { vcmpps(k, x, op, 13); } +void vcmpgesd(const Opmask& k, const Xmm& x, const Operand& op) { vcmpsd(k, x, op, 13); } +void vcmpgess(const Opmask& k, const Xmm& x, const Operand& op) { vcmpss(k, x, op, 13); } +void vcmpgt_oqpd(const Opmask& k, const Xmm& x, const Operand& op) { vcmppd(k, x, op, 30); } +void vcmpgt_oqps(const Opmask& k, const Xmm& x, const Operand& op) { vcmpps(k, x, op, 30); } +void vcmpgt_oqsd(const Opmask& k, const Xmm& x, const Operand& op) { vcmpsd(k, x, op, 30); } +void vcmpgt_oqss(const Opmask& k, const Xmm& x, const Operand& op) { vcmpss(k, x, op, 30); } +void vcmpgtpd(const Opmask& k, const Xmm& x, const Operand& op) { vcmppd(k, x, op, 14); } +void vcmpgtps(const Opmask& k, const Xmm& x, const Operand& op) { vcmpps(k, x, op, 14); } +void vcmpgtsd(const Opmask& k, const Xmm& x, const Operand& op) { vcmpsd(k, x, op, 14); } +void vcmpgtss(const Opmask& k, const Xmm& x, const Operand& op) { vcmpss(k, x, op, 14); } +void vcmple_oqpd(const Opmask& k, const Xmm& x, const Operand& op) { vcmppd(k, x, op, 18); } +void vcmple_oqps(const Opmask& k, const Xmm& x, const Operand& op) { vcmpps(k, x, op, 18); } +void vcmple_oqsd(const Opmask& k, const Xmm& x, const Operand& op) { vcmpsd(k, x, op, 18); } +void vcmple_oqss(const Opmask& k, const Xmm& x, const Operand& op) { vcmpss(k, x, op, 18); } +void vcmplepd(const Opmask& k, const Xmm& x, const Operand& op) { vcmppd(k, x, op, 2); } +void vcmpleps(const Opmask& k, const Xmm& x, const Operand& op) { vcmpps(k, x, op, 2); } +void vcmplesd(const Opmask& k, const Xmm& x, const Operand& op) { vcmpsd(k, x, op, 2); } +void vcmpless(const Opmask& k, const Xmm& x, const Operand& op) { vcmpss(k, x, op, 2); } +void vcmplt_oqpd(const Opmask& k, const Xmm& x, const Operand& op) { vcmppd(k, x, op, 17); } +void vcmplt_oqps(const Opmask& k, const Xmm& x, const Operand& op) { vcmpps(k, x, op, 17); } +void vcmplt_oqsd(const Opmask& k, const Xmm& x, const Operand& op) { vcmpsd(k, x, op, 17); } +void vcmplt_oqss(const Opmask& k, const Xmm& x, const Operand& op) { vcmpss(k, x, op, 17); } +void vcmpltpd(const Opmask& k, const Xmm& x, const Operand& op) { vcmppd(k, x, op, 1); } +void vcmpltps(const Opmask& k, const Xmm& x, const Operand& op) { vcmpps(k, x, op, 1); } +void vcmpltsd(const Opmask& k, const Xmm& x, const Operand& op) { vcmpsd(k, x, op, 1); } +void vcmpltss(const Opmask& k, const Xmm& x, const Operand& op) { vcmpss(k, x, op, 1); } +void vcmpneq_oqpd(const Opmask& k, const Xmm& x, const Operand& op) { vcmppd(k, x, op, 12); } +void vcmpneq_oqps(const Opmask& k, const Xmm& x, const Operand& op) { vcmpps(k, x, op, 12); } +void vcmpneq_oqsd(const Opmask& k, const Xmm& x, const Operand& op) { vcmpsd(k, x, op, 12); } +void vcmpneq_oqss(const Opmask& k, const Xmm& x, const Operand& op) { vcmpss(k, x, op, 12); } +void vcmpneq_ospd(const Opmask& k, const Xmm& x, const Operand& op) { vcmppd(k, x, op, 28); } +void vcmpneq_osps(const Opmask& k, const Xmm& x, const Operand& op) { vcmpps(k, x, op, 28); } +void vcmpneq_ossd(const Opmask& k, const Xmm& x, const Operand& op) { vcmpsd(k, x, op, 28); } +void vcmpneq_osss(const Opmask& k, const Xmm& x, const Operand& op) { vcmpss(k, x, op, 28); } +void vcmpneq_uspd(const Opmask& k, const Xmm& x, const Operand& op) { vcmppd(k, x, op, 20); } +void vcmpneq_usps(const Opmask& k, const Xmm& x, const Operand& op) { vcmpps(k, x, op, 20); } +void vcmpneq_ussd(const Opmask& k, const Xmm& x, const Operand& op) { vcmpsd(k, x, op, 20); } +void vcmpneq_usss(const Opmask& k, const Xmm& x, const Operand& op) { vcmpss(k, x, op, 20); } +void vcmpneqpd(const Opmask& k, const Xmm& x, const Operand& op) { vcmppd(k, x, op, 4); } +void vcmpneqps(const Opmask& k, const Xmm& x, const Operand& op) { vcmpps(k, x, op, 4); } +void vcmpneqsd(const Opmask& k, const Xmm& x, const Operand& op) { vcmpsd(k, x, op, 4); } +void vcmpneqss(const Opmask& k, const Xmm& x, const Operand& op) { vcmpss(k, x, op, 4); } +void vcmpnge_uqpd(const Opmask& k, const Xmm& x, const Operand& op) { vcmppd(k, x, op, 25); } +void vcmpnge_uqps(const Opmask& k, const Xmm& x, const Operand& op) { vcmpps(k, x, op, 25); } +void vcmpnge_uqsd(const Opmask& k, const Xmm& x, const Operand& op) { vcmpsd(k, x, op, 25); } +void vcmpnge_uqss(const Opmask& k, const Xmm& x, const Operand& op) { vcmpss(k, x, op, 25); } +void vcmpngepd(const Opmask& k, const Xmm& x, const Operand& op) { vcmppd(k, x, op, 9); } +void vcmpngeps(const Opmask& k, const Xmm& x, const Operand& op) { vcmpps(k, x, op, 9); } +void vcmpngesd(const Opmask& k, const Xmm& x, const Operand& op) { vcmpsd(k, x, op, 9); } +void vcmpngess(const Opmask& k, const Xmm& x, const Operand& op) { vcmpss(k, x, op, 9); } +void vcmpngt_uqpd(const Opmask& k, const Xmm& x, const Operand& op) { vcmppd(k, x, op, 26); } +void vcmpngt_uqps(const Opmask& k, const Xmm& x, const Operand& op) { vcmpps(k, x, op, 26); } +void vcmpngt_uqsd(const Opmask& k, const Xmm& x, const Operand& op) { vcmpsd(k, x, op, 26); } +void vcmpngt_uqss(const Opmask& k, const Xmm& x, const Operand& op) { vcmpss(k, x, op, 26); } +void vcmpngtpd(const Opmask& k, const Xmm& x, const Operand& op) { vcmppd(k, x, op, 10); } +void vcmpngtps(const Opmask& k, const Xmm& x, const Operand& op) { vcmpps(k, x, op, 10); } +void vcmpngtsd(const Opmask& k, const Xmm& x, const Operand& op) { vcmpsd(k, x, op, 10); } +void vcmpngtss(const Opmask& k, const Xmm& x, const Operand& op) { vcmpss(k, x, op, 10); } +void vcmpnle_uqpd(const Opmask& k, const Xmm& x, const Operand& op) { vcmppd(k, x, op, 22); } +void vcmpnle_uqps(const Opmask& k, const Xmm& x, const Operand& op) { vcmpps(k, x, op, 22); } +void vcmpnle_uqsd(const Opmask& k, const Xmm& x, const Operand& op) { vcmpsd(k, x, op, 22); } +void vcmpnle_uqss(const Opmask& k, const Xmm& x, const Operand& op) { vcmpss(k, x, op, 22); } +void vcmpnlepd(const Opmask& k, const Xmm& x, const Operand& op) { vcmppd(k, x, op, 6); } +void vcmpnleps(const Opmask& k, const Xmm& x, const Operand& op) { vcmpps(k, x, op, 6); } +void vcmpnlesd(const Opmask& k, const Xmm& x, const Operand& op) { vcmpsd(k, x, op, 6); } +void vcmpnless(const Opmask& k, const Xmm& x, const Operand& op) { vcmpss(k, x, op, 6); } +void vcmpnlt_uqpd(const Opmask& k, const Xmm& x, const Operand& op) { vcmppd(k, x, op, 21); } +void vcmpnlt_uqps(const Opmask& k, const Xmm& x, const Operand& op) { vcmpps(k, x, op, 21); } +void vcmpnlt_uqsd(const Opmask& k, const Xmm& x, const Operand& op) { vcmpsd(k, x, op, 21); } +void vcmpnlt_uqss(const Opmask& k, const Xmm& x, const Operand& op) { vcmpss(k, x, op, 21); } +void vcmpnltpd(const Opmask& k, const Xmm& x, const Operand& op) { vcmppd(k, x, op, 5); } +void vcmpnltps(const Opmask& k, const Xmm& x, const Operand& op) { vcmpps(k, x, op, 5); } +void vcmpnltsd(const Opmask& k, const Xmm& x, const Operand& op) { vcmpsd(k, x, op, 5); } +void vcmpnltss(const Opmask& k, const Xmm& x, const Operand& op) { vcmpss(k, x, op, 5); } +void vcmpord_spd(const Opmask& k, const Xmm& x, const Operand& op) { vcmppd(k, x, op, 23); } +void vcmpord_sps(const Opmask& k, const Xmm& x, const Operand& op) { vcmpps(k, x, op, 23); } +void vcmpord_ssd(const Opmask& k, const Xmm& x, const Operand& op) { vcmpsd(k, x, op, 23); } +void vcmpord_sss(const Opmask& k, const Xmm& x, const Operand& op) { vcmpss(k, x, op, 23); } +void vcmpordpd(const Opmask& k, const Xmm& x, const Operand& op) { vcmppd(k, x, op, 7); } +void vcmpordps(const Opmask& k, const Xmm& x, const Operand& op) { vcmpps(k, x, op, 7); } +void vcmpordsd(const Opmask& k, const Xmm& x, const Operand& op) { vcmpsd(k, x, op, 7); } +void vcmpordss(const Opmask& k, const Xmm& x, const Operand& op) { vcmpss(k, x, op, 7); } void vcmppd(const Opmask& k, const Xmm& x, const Operand& op, uint8_t imm) { opAVX_K_X_XM(k, x, op, T_66 | T_0F | T_EW1 | T_YMM | T_SAE_Z | T_MUST_EVEX | T_B64, 0xC2, imm); } void vcmpps(const Opmask& k, const Xmm& x, const Operand& op, uint8_t imm) { opAVX_K_X_XM(k, x, op, T_0F | T_EW0 | T_YMM | T_SAE_Z | T_MUST_EVEX | T_B32, 0xC2, imm); } void vcmpsd(const Opmask& k, const Xmm& x, const Operand& op, uint8_t imm) { opAVX_K_X_XM(k, x, op, T_N8 | T_F2 | T_0F | T_EW1 | T_SAE_Z | T_MUST_EVEX, 0xC2, imm); } void vcmpss(const Opmask& k, const Xmm& x, const Operand& op, uint8_t imm) { opAVX_K_X_XM(k, x, op, T_N4 | T_F3 | T_0F | T_EW0 | T_SAE_Z | T_MUST_EVEX, 0xC2, imm); } +void vcmptrue_uspd(const Opmask& k, const Xmm& x, const Operand& op) { vcmppd(k, x, op, 31); } +void vcmptrue_usps(const Opmask& k, const Xmm& x, const Operand& op) { vcmpps(k, x, op, 31); } +void vcmptrue_ussd(const Opmask& k, const Xmm& x, const Operand& op) { vcmpsd(k, x, op, 31); } +void vcmptrue_usss(const Opmask& k, const Xmm& x, const Operand& op) { vcmpss(k, x, op, 31); } +void vcmptruepd(const Opmask& k, const Xmm& x, const Operand& op) { vcmppd(k, x, op, 15); } +void vcmptrueps(const Opmask& k, const Xmm& x, const Operand& op) { vcmpps(k, x, op, 15); } +void vcmptruesd(const Opmask& k, const Xmm& x, const Operand& op) { vcmpsd(k, x, op, 15); } +void vcmptruess(const Opmask& k, const Xmm& x, const Operand& op) { vcmpss(k, x, op, 15); } +void vcmpunord_spd(const Opmask& k, const Xmm& x, const Operand& op) { vcmppd(k, x, op, 19); } +void vcmpunord_sps(const Opmask& k, const Xmm& x, const Operand& op) { vcmpps(k, x, op, 19); } +void vcmpunord_ssd(const Opmask& k, const Xmm& x, const Operand& op) { vcmpsd(k, x, op, 19); } +void vcmpunord_sss(const Opmask& k, const Xmm& x, const Operand& op) { vcmpss(k, x, op, 19); } +void vcmpunordpd(const Opmask& k, const Xmm& x, const Operand& op) { vcmppd(k, x, op, 3); } +void vcmpunordps(const Opmask& k, const Xmm& x, const Operand& op) { vcmpps(k, x, op, 3); } +void vcmpunordsd(const Opmask& k, const Xmm& x, const Operand& op) { vcmpsd(k, x, op, 3); } +void vcmpunordss(const Opmask& k, const Xmm& x, const Operand& op) { vcmpss(k, x, op, 3); } void vcompressb(const Operand& op, const Xmm& x) { opAVX_X_XM_IMM(x, op, T_N1 | T_66 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX, 0x63); } void vcompresspd(const Operand& op, const Xmm& x) { opAVX_X_XM_IMM(x, op, T_N8 | T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX, 0x8A); } void vcompressps(const Operand& op, const Xmm& x) { opAVX_X_XM_IMM(x, op, T_N4 | T_66 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX, 0x8A); } |