aboutsummaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
authorMITSUNARI Shigeo <[email protected]>2023-11-30 21:15:25 +0900
committerMITSUNARI Shigeo <[email protected]>2023-11-30 21:15:25 +0900
commitbd85d108ca86d85e586f97d2c8050d95a519fb30 (patch)
tree1473c8901e14e8fee69c858298fce1c22e35de61
parent93bd6a0b795f1cf6153686b093ed2ca07d533ea4 (diff)
downloadxbyak-bd85d108ca86d85e586f97d2c8050d95a519fb30.tar.gz
xbyak-bd85d108ca86d85e586f97d2c8050d95a519fb30.zip
kmov* supports apx
-rw-r--r--gen/avx_type.hpp1
-rw-r--r--gen/avx_type_def.h1
-rw-r--r--gen/gen_avx512.cpp27
-rw-r--r--xbyak/xbyak.h26
-rw-r--r--xbyak/xbyak_mnemonic.h28
5 files changed, 49 insertions, 34 deletions
diff --git a/gen/avx_type.hpp b/gen/avx_type.hpp
index a24a0a0..5ec0229 100644
--- a/gen/avx_type.hpp
+++ b/gen/avx_type.hpp
@@ -67,6 +67,7 @@ std::string type2String(uint64_t type)
if (type & T_MAP3) str += "|T_MAP3";
if (type & T_ND1) str += "|T_ND1";
if (type & T_ZU) str += "|T_ZU";
+ if (type & T_MAP1) str += "|T_MAP1";
if (str[0] == '|') str = str.substr(1);
return str;
diff --git a/gen/avx_type_def.h b/gen/avx_type_def.h
index ba82d5b..81052c4 100644
--- a/gen/avx_type_def.h
+++ b/gen/avx_type_def.h
@@ -47,6 +47,7 @@
static const uint64_t T_ND1 = 1ull << 35; // ND=1
static const uint64_t T_ZU = 1ull << 36; // ND=ZU
static const uint64_t T_F2 = 1ull << 37; // pp = 3
+ static const uint64_t T_MAP1 = 1ull << 38; // kmov
// T_66 = 1, T_F3 = 2, T_F2 = 3
static inline uint32_t getPP(uint64_t type) { return (type & T_66) ? 1 : (type & T_F3) ? 2 : (type & T_F2) ? 3 : 0; }
static inline uint32_t getMMM(uint64_t type) { return (type & T_0F) ? 1 : (type & T_0F38) ? 2 : (type & T_0F3A) ? 3 : 0; }
diff --git a/gen/gen_avx512.cpp b/gen/gen_avx512.cpp
index 8463543..499db28 100644
--- a/gen/gen_avx512.cpp
+++ b/gen/gen_avx512.cpp
@@ -15,8 +15,7 @@ using namespace Xbyak;
void putOpmask(bool only64bit)
{
if (only64bit) {
- puts("void kmovq(const Opmask& k, const Reg64& r) { opVex(k, 0, r, T_L0 | T_0F | T_F2 | T_W1, 0x92); }");
- puts("void kmovq(const Reg64& r, const Opmask& k) { opVex(r, 0, k, T_L0 | T_0F | T_F2 | T_W1, 0x93); }");
+ puts("void kmovq(const Reg64& r, const Opmask& k) { opKmov(k, r, true, 64); }");
return;
}
@@ -76,22 +75,14 @@ void putOpmask(bool only64bit)
printf("void %sd(const Opmask& r1, const Opmask& r2, uint8_t imm) { opVex(r1, 0, r2, T_66 | T_0F3A | T_W0, 0x%02X, imm); }\n", p.name, p.code + 1);
}
}
- puts("void kmovw(const Opmask& k, const Operand& op) { if (!op.isMEM() && !op.isOPMASK()) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(k, 0, op, T_L0 | T_0F | T_W0, 0x90); }");
- puts("void kmovq(const Opmask& k, const Operand& op) { if (!op.isMEM() && !op.isOPMASK()) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(k, 0, op, T_L0 | T_0F | T_W1, 0x90); }");
- puts("void kmovb(const Opmask& k, const Operand& op) { if (!op.isMEM() && !op.isOPMASK()) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(k, 0, op, T_L0 | T_0F | T_66 | T_W0, 0x90); }");
- puts("void kmovd(const Opmask& k, const Operand& op) { if (!op.isMEM() && !op.isOPMASK()) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(k, 0, op, T_L0 | T_0F | T_66 | T_W1, 0x90); }");
-
- puts("void kmovw(const Address& addr, const Opmask& k) { opVex(k, 0, addr, T_L0 | T_0F | T_W0, 0x91); }");
- puts("void kmovq(const Address& addr, const Opmask& k) { opVex(k, 0, addr, T_L0 | T_0F | T_W1, 0x91); }");
- puts("void kmovb(const Address& addr, const Opmask& k) { opVex(k, 0, addr, T_L0 | T_0F | T_66 | T_W0, 0x91); }");
- puts("void kmovd(const Address& addr, const Opmask& k) { opVex(k, 0, addr, T_L0 | T_0F | T_66 | T_W1, 0x91); }");
-
- puts("void kmovw(const Opmask& k, const Reg32& r) { opVex(k, 0, r, T_L0 | T_0F | T_W0, 0x92); }");
- puts("void kmovw(const Reg32& r, const Opmask& k) { opVex(r, 0, k, T_L0 | T_0F | T_W0, 0x93); }");
- puts("void kmovb(const Opmask& k, const Reg32& r) { opVex(k, 0, r, T_L0 | T_0F | T_66 | T_W0, 0x92); }");
- puts("void kmovb(const Reg32& r, const Opmask& k) { opVex(r, 0, k, T_L0 | T_0F | T_66 | T_W0, 0x93); }");
- puts("void kmovd(const Opmask& k, const Reg32& r) { opVex(k, 0, r, T_L0 | T_0F | T_F2 | T_W0, 0x92); }");
- puts("void kmovd(const Reg32& r, const Opmask& k) { opVex(r, 0, k, T_L0 | T_0F | T_F2 | T_W0, 0x93); }");
+ for (int i = 0; i < 4; i++) {
+ const char tbl[] = "bwdq";
+ const int bitTbl[] = { 8, 16, 32, 64 };
+ int bit = bitTbl[i];
+ printf("void kmov%c(const Opmask& k, const Operand& op) { opKmov(k, op, false, %d); }\n", tbl[i], bit);
+ printf("void kmov%c(const Address& addr, const Opmask& k) { opKmov(k, addr, true, %d); }\n", tbl[i], bit);
+ if (i < 3) printf("void kmov%c(const Reg32& r, const Opmask& k) { opKmov(k, r, true, %d); }\n", tbl[i], bit);
+ }
}
// vcmppd(k, x, op)
diff --git a/xbyak/xbyak.h b/xbyak/xbyak.h
index a94e272..5d03da8 100644
--- a/xbyak/xbyak.h
+++ b/xbyak/xbyak.h
@@ -1828,6 +1828,7 @@ private:
static const uint64_t T_ND1 = 1ull << 35; // ND=1
static const uint64_t T_ZU = 1ull << 36; // ND=ZU
static const uint64_t T_F2 = 1ull << 37; // pp = 3
+ static const uint64_t T_MAP1 = 1ull << 38; // kmov
// T_66 = 1, T_F3 = 2, T_F2 = 3
static inline uint32_t getPP(uint64_t type) { return (type & T_66) ? 1 : (type & T_F3) ? 2 : (type & T_F2) ? 3 : 0; }
static inline uint32_t getMMM(uint64_t type) { return (type & T_0F) ? 1 : (type & T_0F38) ? 2 : (type & T_0F3A) ? 3 : 0; }
@@ -1927,6 +1928,7 @@ private:
}
static inline int getMap(uint64_t type)
{
+ if (type & T_MAP1) return 1;
if (type & T_MAP3) return 3;
if (type & (T_0F38|T_0F3A)) return 2;
return 4; // legacy
@@ -3093,6 +3095,30 @@ public:
// set default encoding to select Vex or Evex
void setDefaultEncoding(PreferredEncoding encoding) { defaultEncoding_ = encoding; }
+ // (reg32e/mem, k) if rev else (k, k/mem/reg32e)
+ // size = 8, 16, 32, 64
+ void opKmov(const Opmask& k, const Operand& op, bool rev, int size)
+ {
+ int code = 0;
+ bool isReg = op.isREG(size < 64 ? 32 : 64);
+ if (rev) {
+ code = isReg ? 0x93 : op.isMEM() ? 0x91 : 0;
+ } else {
+ code = op.isOPMASK() || op.isMEM() ? 0x90 : isReg ? 0x92 : 0;
+ }
+ if (code == 0) XBYAK_THROW(ERR_BAD_COMBINATION)
+ uint64_t type = 0;
+ switch (size) {
+ case 8: type = T_W0|T_66; break;
+ case 16: type = T_W0; break;
+ case 32: type = isReg ? T_W0|T_F2 : T_W1|T_66; break;
+ case 64: type = isReg ? T_W1|T_F2 : T_W1; break;
+ }
+ const Operand *p1 = &k, *p2 = &op;
+ if (code == 0x93) { std::swap(p1, p2); }
+ if (opROO(Reg(), *p2, *p1, T_MAP1|type, code)) return;
+ opVex(static_cast<const Reg&>(*p1), 0, *p2, T_L0|T_0F|type, code);
+ }
/*
use single byte nop if useMultiByteNop = false
*/
diff --git a/xbyak/xbyak_mnemonic.h b/xbyak/xbyak_mnemonic.h
index 07bd24c..5b072f3 100644
--- a/xbyak/xbyak_mnemonic.h
+++ b/xbyak/xbyak_mnemonic.h
@@ -1982,20 +1982,17 @@ void kandnq(const Opmask& r1, const Opmask& r2, const Opmask& r3) { opVex(r1, &r
void kandnw(const Opmask& r1, const Opmask& r2, const Opmask& r3) { opVex(r1, &r2, r3, T_L1 | T_0F | T_W0, 0x42); }
void kandq(const Opmask& r1, const Opmask& r2, const Opmask& r3) { opVex(r1, &r2, r3, T_L1 | T_0F | T_W1, 0x41); }
void kandw(const Opmask& r1, const Opmask& r2, const Opmask& r3) { opVex(r1, &r2, r3, T_L1 | T_0F | T_W0, 0x41); }
-void kmovb(const Address& addr, const Opmask& k) { opVex(k, 0, addr, T_L0 | T_0F | T_66 | T_W0, 0x91); }
-void kmovb(const Opmask& k, const Operand& op) { if (!op.isMEM() && !op.isOPMASK()) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(k, 0, op, T_L0 | T_0F | T_66 | T_W0, 0x90); }
-void kmovb(const Opmask& k, const Reg32& r) { opVex(k, 0, r, T_L0 | T_0F | T_66 | T_W0, 0x92); }
-void kmovb(const Reg32& r, const Opmask& k) { opVex(r, 0, k, T_L0 | T_0F | T_66 | T_W0, 0x93); }
-void kmovd(const Address& addr, const Opmask& k) { opVex(k, 0, addr, T_L0 | T_0F | T_66 | T_W1, 0x91); }
-void kmovd(const Opmask& k, const Operand& op) { if (!op.isMEM() && !op.isOPMASK()) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(k, 0, op, T_L0 | T_0F | T_66 | T_W1, 0x90); }
-void kmovd(const Opmask& k, const Reg32& r) { opVex(k, 0, r, T_L0 | T_0F | T_F2 | T_W0, 0x92); }
-void kmovd(const Reg32& r, const Opmask& k) { opVex(r, 0, k, T_L0 | T_0F | T_F2 | T_W0, 0x93); }
-void kmovq(const Address& addr, const Opmask& k) { opVex(k, 0, addr, T_L0 | T_0F | T_W1, 0x91); }
-void kmovq(const Opmask& k, const Operand& op) { if (!op.isMEM() && !op.isOPMASK()) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(k, 0, op, T_L0 | T_0F | T_W1, 0x90); }
-void kmovw(const Address& addr, const Opmask& k) { opVex(k, 0, addr, T_L0 | T_0F | T_W0, 0x91); }
-void kmovw(const Opmask& k, const Operand& op) { if (!op.isMEM() && !op.isOPMASK()) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(k, 0, op, T_L0 | T_0F | T_W0, 0x90); }
-void kmovw(const Opmask& k, const Reg32& r) { opVex(k, 0, r, T_L0 | T_0F | T_W0, 0x92); }
-void kmovw(const Reg32& r, const Opmask& k) { opVex(r, 0, k, T_L0 | T_0F | T_W0, 0x93); }
+void kmovb(const Address& addr, const Opmask& k) { opKmov(k, addr, true, 8); }
+void kmovb(const Opmask& k, const Operand& op) { opKmov(k, op, false, 8); }
+void kmovb(const Reg32& r, const Opmask& k) { opKmov(k, r, true, 8); }
+void kmovd(const Address& addr, const Opmask& k) { opKmov(k, addr, true, 32); }
+void kmovd(const Opmask& k, const Operand& op) { opKmov(k, op, false, 32); }
+void kmovd(const Reg32& r, const Opmask& k) { opKmov(k, r, true, 32); }
+void kmovq(const Address& addr, const Opmask& k) { opKmov(k, addr, true, 64); }
+void kmovq(const Opmask& k, const Operand& op) { opKmov(k, op, false, 64); }
+void kmovw(const Address& addr, const Opmask& k) { opKmov(k, addr, true, 16); }
+void kmovw(const Opmask& k, const Operand& op) { opKmov(k, op, false, 16); }
+void kmovw(const Reg32& r, const Opmask& k) { opKmov(k, r, true, 16); }
void knotb(const Opmask& r1, const Opmask& r2) { opVex(r1, 0, r2, T_0F | T_66 | T_W0, 0x44); }
void knotd(const Opmask& r1, const Opmask& r2) { opVex(r1, 0, r2, T_0F | T_66 | T_W1, 0x44); }
void knotq(const Opmask& r1, const Opmask& r2) { opVex(r1, 0, r2, T_0F | T_W1, 0x44); }
@@ -2570,8 +2567,7 @@ void vsubph(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand())
void vsubsh(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_MAP5 | T_F3 | T_EW0 | T_MUST_EVEX | T_ER_X | T_N2, 0x5C); }
void vucomish(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_MAP5 | T_MUST_EVEX | T_EW0 | T_SAE_X | T_N2, 0x2E); }
#ifdef XBYAK64
-void kmovq(const Opmask& k, const Reg64& r) { opVex(k, 0, r, T_L0 | T_0F | T_F2 | T_W1, 0x92); }
-void kmovq(const Reg64& r, const Opmask& k) { opVex(r, 0, k, T_L0 | T_0F | T_F2 | T_W1, 0x93); }
+void kmovq(const Reg64& r, const Opmask& k) { opKmov(k, r, true, 64); }
void vpbroadcastq(const Xmm& x, const Reg64& r) { opVex(x, 0, r, T_66|T_0F38|T_EW1|T_YMM|T_MUST_EVEX, 0x7C); }
#endif
#endif