diff options
author | MITSUNARI Shigeo <[email protected]> | 2023-11-30 21:15:25 +0900 |
---|---|---|
committer | MITSUNARI Shigeo <[email protected]> | 2023-11-30 21:15:25 +0900 |
commit | bd85d108ca86d85e586f97d2c8050d95a519fb30 (patch) | |
tree | 1473c8901e14e8fee69c858298fce1c22e35de61 | |
parent | 93bd6a0b795f1cf6153686b093ed2ca07d533ea4 (diff) | |
download | xbyak-bd85d108ca86d85e586f97d2c8050d95a519fb30.tar.gz xbyak-bd85d108ca86d85e586f97d2c8050d95a519fb30.zip |
kmov* supports apx
-rw-r--r-- | gen/avx_type.hpp | 1 | ||||
-rw-r--r-- | gen/avx_type_def.h | 1 | ||||
-rw-r--r-- | gen/gen_avx512.cpp | 27 | ||||
-rw-r--r-- | xbyak/xbyak.h | 26 | ||||
-rw-r--r-- | xbyak/xbyak_mnemonic.h | 28 |
5 files changed, 49 insertions, 34 deletions
diff --git a/gen/avx_type.hpp b/gen/avx_type.hpp index a24a0a0..5ec0229 100644 --- a/gen/avx_type.hpp +++ b/gen/avx_type.hpp @@ -67,6 +67,7 @@ std::string type2String(uint64_t type) if (type & T_MAP3) str += "|T_MAP3"; if (type & T_ND1) str += "|T_ND1"; if (type & T_ZU) str += "|T_ZU"; + if (type & T_MAP1) str += "|T_MAP1"; if (str[0] == '|') str = str.substr(1); return str; diff --git a/gen/avx_type_def.h b/gen/avx_type_def.h index ba82d5b..81052c4 100644 --- a/gen/avx_type_def.h +++ b/gen/avx_type_def.h @@ -47,6 +47,7 @@ static const uint64_t T_ND1 = 1ull << 35; // ND=1 static const uint64_t T_ZU = 1ull << 36; // ND=ZU static const uint64_t T_F2 = 1ull << 37; // pp = 3 + static const uint64_t T_MAP1 = 1ull << 38; // kmov // T_66 = 1, T_F3 = 2, T_F2 = 3 static inline uint32_t getPP(uint64_t type) { return (type & T_66) ? 1 : (type & T_F3) ? 2 : (type & T_F2) ? 3 : 0; } static inline uint32_t getMMM(uint64_t type) { return (type & T_0F) ? 1 : (type & T_0F38) ? 2 : (type & T_0F3A) ? 3 : 0; } diff --git a/gen/gen_avx512.cpp b/gen/gen_avx512.cpp index 8463543..499db28 100644 --- a/gen/gen_avx512.cpp +++ b/gen/gen_avx512.cpp @@ -15,8 +15,7 @@ using namespace Xbyak; void putOpmask(bool only64bit) { if (only64bit) { - puts("void kmovq(const Opmask& k, const Reg64& r) { opVex(k, 0, r, T_L0 | T_0F | T_F2 | T_W1, 0x92); }"); - puts("void kmovq(const Reg64& r, const Opmask& k) { opVex(r, 0, k, T_L0 | T_0F | T_F2 | T_W1, 0x93); }"); + puts("void kmovq(const Reg64& r, const Opmask& k) { opKmov(k, r, true, 64); }"); return; } @@ -76,22 +75,14 @@ void putOpmask(bool only64bit) printf("void %sd(const Opmask& r1, const Opmask& r2, uint8_t imm) { opVex(r1, 0, r2, T_66 | T_0F3A | T_W0, 0x%02X, imm); }\n", p.name, p.code + 1); } } - puts("void kmovw(const Opmask& k, const Operand& op) { if (!op.isMEM() && !op.isOPMASK()) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(k, 0, op, T_L0 | T_0F | T_W0, 0x90); }"); - puts("void kmovq(const Opmask& k, const Operand& op) { if (!op.isMEM() && !op.isOPMASK()) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(k, 0, op, T_L0 | T_0F | T_W1, 0x90); }"); - puts("void kmovb(const Opmask& k, const Operand& op) { if (!op.isMEM() && !op.isOPMASK()) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(k, 0, op, T_L0 | T_0F | T_66 | T_W0, 0x90); }"); - puts("void kmovd(const Opmask& k, const Operand& op) { if (!op.isMEM() && !op.isOPMASK()) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(k, 0, op, T_L0 | T_0F | T_66 | T_W1, 0x90); }"); - - puts("void kmovw(const Address& addr, const Opmask& k) { opVex(k, 0, addr, T_L0 | T_0F | T_W0, 0x91); }"); - puts("void kmovq(const Address& addr, const Opmask& k) { opVex(k, 0, addr, T_L0 | T_0F | T_W1, 0x91); }"); - puts("void kmovb(const Address& addr, const Opmask& k) { opVex(k, 0, addr, T_L0 | T_0F | T_66 | T_W0, 0x91); }"); - puts("void kmovd(const Address& addr, const Opmask& k) { opVex(k, 0, addr, T_L0 | T_0F | T_66 | T_W1, 0x91); }"); - - puts("void kmovw(const Opmask& k, const Reg32& r) { opVex(k, 0, r, T_L0 | T_0F | T_W0, 0x92); }"); - puts("void kmovw(const Reg32& r, const Opmask& k) { opVex(r, 0, k, T_L0 | T_0F | T_W0, 0x93); }"); - puts("void kmovb(const Opmask& k, const Reg32& r) { opVex(k, 0, r, T_L0 | T_0F | T_66 | T_W0, 0x92); }"); - puts("void kmovb(const Reg32& r, const Opmask& k) { opVex(r, 0, k, T_L0 | T_0F | T_66 | T_W0, 0x93); }"); - puts("void kmovd(const Opmask& k, const Reg32& r) { opVex(k, 0, r, T_L0 | T_0F | T_F2 | T_W0, 0x92); }"); - puts("void kmovd(const Reg32& r, const Opmask& k) { opVex(r, 0, k, T_L0 | T_0F | T_F2 | T_W0, 0x93); }"); + for (int i = 0; i < 4; i++) { + const char tbl[] = "bwdq"; + const int bitTbl[] = { 8, 16, 32, 64 }; + int bit = bitTbl[i]; + printf("void kmov%c(const Opmask& k, const Operand& op) { opKmov(k, op, false, %d); }\n", tbl[i], bit); + printf("void kmov%c(const Address& addr, const Opmask& k) { opKmov(k, addr, true, %d); }\n", tbl[i], bit); + if (i < 3) printf("void kmov%c(const Reg32& r, const Opmask& k) { opKmov(k, r, true, %d); }\n", tbl[i], bit); + } } // vcmppd(k, x, op) diff --git a/xbyak/xbyak.h b/xbyak/xbyak.h index a94e272..5d03da8 100644 --- a/xbyak/xbyak.h +++ b/xbyak/xbyak.h @@ -1828,6 +1828,7 @@ private: static const uint64_t T_ND1 = 1ull << 35; // ND=1 static const uint64_t T_ZU = 1ull << 36; // ND=ZU static const uint64_t T_F2 = 1ull << 37; // pp = 3 + static const uint64_t T_MAP1 = 1ull << 38; // kmov // T_66 = 1, T_F3 = 2, T_F2 = 3 static inline uint32_t getPP(uint64_t type) { return (type & T_66) ? 1 : (type & T_F3) ? 2 : (type & T_F2) ? 3 : 0; } static inline uint32_t getMMM(uint64_t type) { return (type & T_0F) ? 1 : (type & T_0F38) ? 2 : (type & T_0F3A) ? 3 : 0; } @@ -1927,6 +1928,7 @@ private: } static inline int getMap(uint64_t type) { + if (type & T_MAP1) return 1; if (type & T_MAP3) return 3; if (type & (T_0F38|T_0F3A)) return 2; return 4; // legacy @@ -3093,6 +3095,30 @@ public: // set default encoding to select Vex or Evex void setDefaultEncoding(PreferredEncoding encoding) { defaultEncoding_ = encoding; } + // (reg32e/mem, k) if rev else (k, k/mem/reg32e) + // size = 8, 16, 32, 64 + void opKmov(const Opmask& k, const Operand& op, bool rev, int size) + { + int code = 0; + bool isReg = op.isREG(size < 64 ? 32 : 64); + if (rev) { + code = isReg ? 0x93 : op.isMEM() ? 0x91 : 0; + } else { + code = op.isOPMASK() || op.isMEM() ? 0x90 : isReg ? 0x92 : 0; + } + if (code == 0) XBYAK_THROW(ERR_BAD_COMBINATION) + uint64_t type = 0; + switch (size) { + case 8: type = T_W0|T_66; break; + case 16: type = T_W0; break; + case 32: type = isReg ? T_W0|T_F2 : T_W1|T_66; break; + case 64: type = isReg ? T_W1|T_F2 : T_W1; break; + } + const Operand *p1 = &k, *p2 = &op; + if (code == 0x93) { std::swap(p1, p2); } + if (opROO(Reg(), *p2, *p1, T_MAP1|type, code)) return; + opVex(static_cast<const Reg&>(*p1), 0, *p2, T_L0|T_0F|type, code); + } /* use single byte nop if useMultiByteNop = false */ diff --git a/xbyak/xbyak_mnemonic.h b/xbyak/xbyak_mnemonic.h index 07bd24c..5b072f3 100644 --- a/xbyak/xbyak_mnemonic.h +++ b/xbyak/xbyak_mnemonic.h @@ -1982,20 +1982,17 @@ void kandnq(const Opmask& r1, const Opmask& r2, const Opmask& r3) { opVex(r1, &r void kandnw(const Opmask& r1, const Opmask& r2, const Opmask& r3) { opVex(r1, &r2, r3, T_L1 | T_0F | T_W0, 0x42); } void kandq(const Opmask& r1, const Opmask& r2, const Opmask& r3) { opVex(r1, &r2, r3, T_L1 | T_0F | T_W1, 0x41); } void kandw(const Opmask& r1, const Opmask& r2, const Opmask& r3) { opVex(r1, &r2, r3, T_L1 | T_0F | T_W0, 0x41); } -void kmovb(const Address& addr, const Opmask& k) { opVex(k, 0, addr, T_L0 | T_0F | T_66 | T_W0, 0x91); } -void kmovb(const Opmask& k, const Operand& op) { if (!op.isMEM() && !op.isOPMASK()) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(k, 0, op, T_L0 | T_0F | T_66 | T_W0, 0x90); } -void kmovb(const Opmask& k, const Reg32& r) { opVex(k, 0, r, T_L0 | T_0F | T_66 | T_W0, 0x92); } -void kmovb(const Reg32& r, const Opmask& k) { opVex(r, 0, k, T_L0 | T_0F | T_66 | T_W0, 0x93); } -void kmovd(const Address& addr, const Opmask& k) { opVex(k, 0, addr, T_L0 | T_0F | T_66 | T_W1, 0x91); } -void kmovd(const Opmask& k, const Operand& op) { if (!op.isMEM() && !op.isOPMASK()) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(k, 0, op, T_L0 | T_0F | T_66 | T_W1, 0x90); } -void kmovd(const Opmask& k, const Reg32& r) { opVex(k, 0, r, T_L0 | T_0F | T_F2 | T_W0, 0x92); } -void kmovd(const Reg32& r, const Opmask& k) { opVex(r, 0, k, T_L0 | T_0F | T_F2 | T_W0, 0x93); } -void kmovq(const Address& addr, const Opmask& k) { opVex(k, 0, addr, T_L0 | T_0F | T_W1, 0x91); } -void kmovq(const Opmask& k, const Operand& op) { if (!op.isMEM() && !op.isOPMASK()) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(k, 0, op, T_L0 | T_0F | T_W1, 0x90); } -void kmovw(const Address& addr, const Opmask& k) { opVex(k, 0, addr, T_L0 | T_0F | T_W0, 0x91); } -void kmovw(const Opmask& k, const Operand& op) { if (!op.isMEM() && !op.isOPMASK()) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(k, 0, op, T_L0 | T_0F | T_W0, 0x90); } -void kmovw(const Opmask& k, const Reg32& r) { opVex(k, 0, r, T_L0 | T_0F | T_W0, 0x92); } -void kmovw(const Reg32& r, const Opmask& k) { opVex(r, 0, k, T_L0 | T_0F | T_W0, 0x93); } +void kmovb(const Address& addr, const Opmask& k) { opKmov(k, addr, true, 8); } +void kmovb(const Opmask& k, const Operand& op) { opKmov(k, op, false, 8); } +void kmovb(const Reg32& r, const Opmask& k) { opKmov(k, r, true, 8); } +void kmovd(const Address& addr, const Opmask& k) { opKmov(k, addr, true, 32); } +void kmovd(const Opmask& k, const Operand& op) { opKmov(k, op, false, 32); } +void kmovd(const Reg32& r, const Opmask& k) { opKmov(k, r, true, 32); } +void kmovq(const Address& addr, const Opmask& k) { opKmov(k, addr, true, 64); } +void kmovq(const Opmask& k, const Operand& op) { opKmov(k, op, false, 64); } +void kmovw(const Address& addr, const Opmask& k) { opKmov(k, addr, true, 16); } +void kmovw(const Opmask& k, const Operand& op) { opKmov(k, op, false, 16); } +void kmovw(const Reg32& r, const Opmask& k) { opKmov(k, r, true, 16); } void knotb(const Opmask& r1, const Opmask& r2) { opVex(r1, 0, r2, T_0F | T_66 | T_W0, 0x44); } void knotd(const Opmask& r1, const Opmask& r2) { opVex(r1, 0, r2, T_0F | T_66 | T_W1, 0x44); } void knotq(const Opmask& r1, const Opmask& r2) { opVex(r1, 0, r2, T_0F | T_W1, 0x44); } @@ -2570,8 +2567,7 @@ void vsubph(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) void vsubsh(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_MAP5 | T_F3 | T_EW0 | T_MUST_EVEX | T_ER_X | T_N2, 0x5C); } void vucomish(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_MAP5 | T_MUST_EVEX | T_EW0 | T_SAE_X | T_N2, 0x2E); } #ifdef XBYAK64 -void kmovq(const Opmask& k, const Reg64& r) { opVex(k, 0, r, T_L0 | T_0F | T_F2 | T_W1, 0x92); } -void kmovq(const Reg64& r, const Opmask& k) { opVex(r, 0, k, T_L0 | T_0F | T_F2 | T_W1, 0x93); } +void kmovq(const Reg64& r, const Opmask& k) { opKmov(k, r, true, 64); } void vpbroadcastq(const Xmm& x, const Reg64& r) { opVex(x, 0, r, T_66|T_0F38|T_EW1|T_YMM|T_MUST_EVEX, 0x7C); } #endif #endif |